WIP: image processing library (or libraries?) #12

Draft
schrottkatze wants to merge 15 commits from schrottkatze/iowo:proc-libs into main
10 changed files with 362 additions and 341 deletions
Showing only changes of commit 98f6af78be - Show all commits

View file

@ -56,7 +56,6 @@ fn main() {
mod dev {
use clap::Subcommand;
use prowocessing::experimental::trait_based::DataType;
#[derive(Subcommand)]
pub(crate) enum DevCommands {
@ -81,7 +80,7 @@ mod dev {
println!("Lwr: {}", lwr.run(test_str.clone()));
}
DevCommands::Add { num0, num1 } => {
use prowocessing::experimental::trait_based::PipelineBuilder;
use prowocessing::experimental::trait_based::pipeline::PipelineBuilder;
let pipe = PipelineBuilder::new().add(1).stringify().build();
println!(

View file

@ -1,339 +1,4 @@
use self::{
numops::{Add, Stringify, Subtract},
strops::{Concatenate, Lower, Upper},
};
trait PipelineElement {
fn runner(&self) -> fn(&Inputs) -> Outputs;
fn signature(&self) -> ElementIo;
}
struct ElementIo {
pub inputs: Vec<DataType>,
pub outputs: Vec<DataType>,
}
// TODO:
// - Bind additional inputs if instruction has more then one and is passd without any additional
// - allow binding to pointers to other pipelines?
// - allow referencing earlier data
pub struct PipelineBuilder {
elements: Vec<Box<dyn PipelineElement>>,
}
pub struct Pipeline {
runners: Vec<fn(&Inputs) -> Outputs>,
}
impl Pipeline {
pub fn run(&self, inputs: Inputs) -> Outputs {
let mut out: Outputs = inputs.into();
for runner in &self.runners {
out = runner(&(&out).into());
}
out
}
}
impl PipelineBuilder {
pub fn new() -> Self {
Self {
elements: Vec::new(),
}
}
fn insert<T: PipelineElement + 'static>(mut self, el: T) -> Self {
if let Some(previous_item) = self.elements.last() {
assert_eq!(
previous_item.signature().outputs[0],
el.signature().inputs[0]
);
}
self.elements.push(Box::new(el));
self
}
#[must_use]
pub fn concatenate(self, sec: String) -> Self {
self.insert(Concatenate(sec))
}
#[must_use]
pub fn upper(self) -> Self {
self.insert(Upper)
}
#[must_use]
pub fn lower(self) -> Self {
self.insert(Lower)
}
#[must_use]
#[allow(
clippy::should_implement_trait,
reason = "is not equivalent to addition"
)]
pub fn add(self, sec: i32) -> Self {
self.insert(Add(sec))
}
#[must_use]
pub fn subtract(self, sec: i32) -> Self {
self.insert(Subtract(sec))
}
#[must_use]
pub fn stringify(self) -> Self {
self.insert(Stringify)
}
pub fn build(&self) -> Pipeline {
let mut r = Vec::new();
self.elements.iter().for_each(|el| r.push(el.runner()));
Pipeline { runners: r }
}
}
impl Default for PipelineBuilder {
fn default() -> Self {
Self::new()
}
}
#[derive(Clone, Copy)]
pub enum Data<'a> {
String(&'a str),
Int(i32),
}
impl Data<'_> {
pub fn to_owned_data(&self) -> OwnedData {
match self {
Data::String(s) => (*s).to_owned().into(),
Data::Int(i) => (*i).into(),
}
}
}
impl<'a> From<&'a str> for Data<'a> {
fn from(value: &'a str) -> Self {
Self::String(value)
}
}
impl From<i32> for Data<'_> {
fn from(value: i32) -> Self {
Self::Int(value)
}
}
impl<'a> From<&'a OwnedData> for Data<'a> {
fn from(value: &'a OwnedData) -> Self {
match value {
OwnedData::String(s) => Data::String(s),
OwnedData::Int(i) => Data::Int(*i),
}
}
}
#[derive(Clone, Debug)]
pub enum OwnedData {
String(String),
Int(i32),
}
impl From<String> for OwnedData {
fn from(value: String) -> Self {
Self::String(value)
}
}
impl From<i32> for OwnedData {
fn from(value: i32) -> Self {
Self::Int(value)
}
}
#[derive(Clone, Copy, PartialEq, Eq, Debug)]
pub enum DataType {
String,
Int,
}
pub struct Inputs<'a>(Vec<Data<'a>>);
impl<'a> Inputs<'a> {
fn inner(&self) -> Vec<Data<'a>> {
self.0.clone()
}
}
impl<'a> From<Vec<Data<'a>>> for Inputs<'a> {
fn from(value: Vec<Data<'a>>) -> Self {
Self(value)
}
}
impl<'a, T: Into<Data<'a>>> From<T> for Inputs<'a> {
fn from(value: T) -> Self {
Self(vec![value.into()])
}
}
impl<'a> From<&'a Outputs> for Inputs<'a> {
fn from(value: &'a Outputs) -> Self {
Self(value.0.iter().map(std::convert::Into::into).collect())
}
}
pub struct Outputs(Vec<OwnedData>);
impl Outputs {
pub fn into_inner(self) -> Vec<OwnedData> {
self.0
}
}
impl From<Vec<OwnedData>> for Outputs {
fn from(value: Vec<OwnedData>) -> Self {
Self(value)
}
}
impl<T: Into<OwnedData>> From<T> for Outputs {
fn from(value: T) -> Self {
Self(vec![value.into()])
}
}
impl From<Inputs<'_>> for Outputs {
fn from(value: Inputs) -> Self {
Self(
value
.0
.into_iter()
.map(|i: Data<'_>| Data::to_owned_data(&i))
.collect(),
)
}
}
mod strops {
use super::{Data, DataType, ElementIo, Inputs, Outputs, PipelineElement};
pub struct Concatenate(pub String);
impl PipelineElement for Concatenate {
fn runner(&self) -> fn(&Inputs) -> Outputs {
|input| {
if let [Data::String(s0), Data::String(s1), ..] = input.inner()[..] {
format!("{s0}{s1}").into()
} else {
panic!("Invalid data passed")
}
}
}
fn signature(&self) -> ElementIo {
ElementIo {
inputs: vec![DataType::String, DataType::String],
outputs: vec![DataType::String],
}
}
}
pub struct Upper;
impl PipelineElement for Upper {
fn runner(&self) -> fn(&Inputs) -> Outputs {
|input| {
if let [Data::String(s), ..] = input.inner()[..] {
s.to_uppercase().into()
} else {
panic!("Invalid data passed")
}
}
}
fn signature(&self) -> ElementIo {
ElementIo {
inputs: vec![DataType::String],
outputs: vec![DataType::String],
}
}
}
pub struct Lower;
impl PipelineElement for Lower {
fn runner(&self) -> fn(&Inputs) -> Outputs {
|input| {
if let [Data::String(s), ..] = input.inner()[..] {
s.to_lowercase().into()
} else {
panic!("Invalid data passed")
}
}
}
fn signature(&self) -> ElementIo {
ElementIo {
inputs: vec![DataType::String],
outputs: vec![DataType::String],
}
}
}
}
mod numops {
use core::panic;
use super::{Data, DataType, ElementIo, Inputs, Outputs, PipelineElement};
pub struct Add(pub i32);
impl PipelineElement for Add {
fn runner(&self) -> fn(&Inputs) -> Outputs {
|input| {
if let [Data::Int(i0), Data::Int(i1), ..] = input.inner()[..] {
(i0 + i1).into()
} else {
panic!("Invalid data passed")
}
}
}
fn signature(&self) -> ElementIo {
ElementIo {
inputs: vec![DataType::Int, DataType::Int],
outputs: vec![DataType::Int],
}
}
}
pub struct Subtract(pub i32);
impl PipelineElement for Subtract {
fn runner(&self) -> fn(&Inputs) -> Outputs {
|input| {
if let [Data::Int(i0), Data::Int(i1), ..] = input.inner()[..] {
(i0 + i1).into()
} else {
panic!("Invalid data passed")
}
}
}
fn signature(&self) -> ElementIo {
ElementIo {
inputs: vec![DataType::Int, DataType::Int],
outputs: vec![DataType::Int],
}
}
}
pub struct Stringify;
impl PipelineElement for Stringify {
fn runner(&self) -> fn(&Inputs) -> Outputs {
|input| {
if let [Data::Int(int), ..] = input.inner()[..] {
int.to_string().into()
} else {
panic!("Invalid data passed")
}
}
}
fn signature(&self) -> ElementIo {
ElementIo {
inputs: vec![DataType::Int],
outputs: vec![DataType::String],
}
}
}
}
pub mod data;
pub mod element;
pub mod ops;
pub mod pipeline;

View file

@ -0,0 +1,2 @@
pub mod io;
pub mod raw;

View file

@ -0,0 +1,51 @@
use super::raw::{Data, OwnedData};
pub struct Inputs<'a>(Vec<Data<'a>>);
impl<'a> Inputs<'a> {
pub(crate) fn inner(&self) -> Vec<Data<'a>> {
self.0.clone()
}
schrottkatze marked this conversation as resolved

Quite ambiguous doc-comment also regarding the rather lengthy doc-comment on the type itself.
How about removing this method altogether and making the content of Inputs directly public,
given that one's free to convert from/to it already?

Quite ambiguous doc-comment also regarding the rather lengthy doc-comment on the type itself. How about removing this method altogether and making the content of `Inputs` directly public, given that one's free to convert from/to it already?
}
impl<'a> From<Vec<Data<'a>>> for Inputs<'a> {
fn from(value: Vec<Data<'a>>) -> Self {
Self(value)
}
}
impl<'a, T: Into<Data<'a>>> From<T> for Inputs<'a> {
fn from(value: T) -> Self {
Self(vec![value.into()])
}
}
impl<'a> From<&'a Outputs> for Inputs<'a> {
fn from(value: &'a Outputs) -> Self {
Self(value.0.iter().map(std::convert::Into::into).collect())
}
}
pub struct Outputs(Vec<OwnedData>);
impl Outputs {
pub fn into_inner(self) -> Vec<OwnedData> {
schrottkatze marked this conversation as resolved

Unnecessary full method path, consider just using From::from or Into::into instead.

Unnecessary full method path, consider just using `From::from` or `Into::into` instead.
Review

ah yes, rust-analyzer loves completing full paths lol

ah yes, rust-analyzer loves completing full paths lol
self.0
}
}
impl From<Vec<OwnedData>> for Outputs {
fn from(value: Vec<OwnedData>) -> Self {
Self(value)
}
}
impl<T: Into<OwnedData>> From<T> for Outputs {
schrottkatze marked this conversation as resolved

Wait, why is Outputs allowed to be consumed for its inner content consumed while Inputs doesn't?

Wait, why is `Outputs` allowed to be consumed for its inner content consumed while `Inputs` doesn't?
Review

Inputs only contains a Vec of Data which either contains a string slice or an integer, which are really cheap to clone. OwnedData would be much heavier to clone in this case.

(I'm currently not happy how the IO of instructions works anyway, planning on reworking that to be more sensible, clear and flexible))

`Inputs` only contains a `Vec` of `Data` which either contains a string slice or an integer, which are really cheap to clone. `OwnedData` would be much heavier to clone in this case. (I'm currently not happy how the IO of instructions works anyway, planning on reworking that to be more sensible, clear and flexible))
fn from(value: T) -> Self {
Self(vec![value.into()])
}
}
impl From<Inputs<'_>> for Outputs {
fn from(value: Inputs) -> Self {
Self(
value
.0
.into_iter()
.map(|i: Data<'_>| Data::to_owned_data(&i))
.collect(),
)
}
}

View file

@ -0,0 +1,48 @@
#[derive(Clone, Copy)]
pub enum Data<'a> {
String(&'a str),
Int(i32),
}
impl Data<'_> {
pub fn to_owned_data(&self) -> OwnedData {
match self {
Data::String(s) => (*s).to_owned().into(),
Data::Int(i) => (*i).into(),
}
}
}
impl<'a> From<&'a str> for Data<'a> {
fn from(value: &'a str) -> Self {
Self::String(value)
}
}
impl From<i32> for Data<'_> {
fn from(value: i32) -> Self {
Self::Int(value)
}
}
impl<'a> From<&'a OwnedData> for Data<'a> {
fn from(value: &'a OwnedData) -> Self {
match value {
OwnedData::String(s) => Data::String(s),
OwnedData::Int(i) => Data::Int(*i),
}
}
}
#[derive(Clone, Debug)]
pub enum OwnedData {
String(String),
Int(i32),
}
impl From<String> for OwnedData {
fn from(value: String) -> Self {
Self::String(value)
}
}
impl From<i32> for OwnedData {
fn from(value: i32) -> Self {
Self::Int(value)
}
}

View file

@ -0,0 +1,19 @@
use crate::experimental::trait_based::data::io::Inputs;
use super::data::io::Outputs;
pub(crate) trait PipelineElement {
fn runner(&self) -> fn(&Inputs) -> Outputs;
fn signature(&self) -> ElementIo;
}
pub(crate) struct ElementIo {
pub inputs: Vec<DataType>,
pub outputs: Vec<DataType>,
}
#[derive(Clone, Copy, PartialEq, Eq, Debug)]
pub enum DataType {
String,
Int,
}

View file

@ -0,0 +1,7 @@
mod num;
mod str;
pub mod prelude {
pub(crate) use super::num::*;
pub(crate) use super::str::*;
}

View file

@ -0,0 +1,69 @@
use core::panic;
use crate::experimental::trait_based::{
data::{
io::{Inputs, Outputs},
raw::Data,
},
element::{DataType, ElementIo, PipelineElement},
};
pub struct Add(pub i32);
impl PipelineElement for Add {
fn runner(&self) -> fn(&Inputs) -> Outputs {
|input| {
if let [Data::Int(i0), Data::Int(i1), ..] = input.inner()[..] {
(i0 + i1).into()
} else {
panic!("Invalid data passed")
}
}
}
fn signature(&self) -> ElementIo {
ElementIo {
inputs: vec![DataType::Int, DataType::Int],
outputs: vec![DataType::Int],
}
}
}
pub struct Subtract(pub i32);
impl PipelineElement for Subtract {
fn runner(&self) -> fn(&Inputs) -> Outputs {
|input| {
if let [Data::Int(i0), Data::Int(i1), ..] = input.inner()[..] {
(i0 + i1).into()
} else {
panic!("Invalid data passed")
}
}
}
fn signature(&self) -> ElementIo {
ElementIo {
inputs: vec![DataType::Int, DataType::Int],
outputs: vec![DataType::Int],
}
}
}
pub struct Stringify;
impl PipelineElement for Stringify {
fn runner(&self) -> fn(&Inputs) -> Outputs {
|input| {
if let [Data::Int(int), ..] = input.inner()[..] {
int.to_string().into()
} else {
panic!("Invalid data passed")
}
}
}
fn signature(&self) -> ElementIo {
ElementIo {
inputs: vec![DataType::Int],
outputs: vec![DataType::String],
}
}
}

View file

@ -0,0 +1,67 @@
use crate::experimental::trait_based::{
data::{
io::{Inputs, Outputs},
raw::Data,
},
element::{DataType, ElementIo, PipelineElement},
};
pub struct Concatenate(pub String);
impl PipelineElement for Concatenate {
fn runner(&self) -> fn(&Inputs) -> Outputs {
|input| {
if let [Data::String(s0), Data::String(s1), ..] = input.inner()[..] {
format!("{s0}{s1}").into()
} else {
panic!("Invalid data passed")
}
}
}
fn signature(&self) -> ElementIo {
ElementIo {
inputs: vec![DataType::String, DataType::String],
outputs: vec![DataType::String],
}
}
}
pub struct Upper;
impl PipelineElement for Upper {
fn runner(&self) -> fn(&Inputs) -> Outputs {
|input| {
if let [Data::String(s), ..] = input.inner()[..] {
s.to_uppercase().into()
} else {
panic!("Invalid data passed")
}
}
}
fn signature(&self) -> ElementIo {
ElementIo {
inputs: vec![DataType::String],
outputs: vec![DataType::String],
}
}
}
pub struct Lower;
impl PipelineElement for Lower {
fn runner(&self) -> fn(&Inputs) -> Outputs {
|input| {
if let [Data::String(s), ..] = input.inner()[..] {
s.to_lowercase().into()
} else {
panic!("Invalid data passed")
}
}
}
fn signature(&self) -> ElementIo {
ElementIo {
inputs: vec![DataType::String],
outputs: vec![DataType::String],
}
}
}

View file

@ -0,0 +1,94 @@
use super::data::io::{Inputs, Outputs};
use super::element::PipelineElement;
use super::ops::prelude::*;
// TODO:
// - Bind additional inputs if instruction has more then one and is passd without any additional
// - allow binding to pointers to other pipelines?
// - allow referencing earlier data
pub struct PipelineBuilder {
elements: Vec<Box<dyn PipelineElement>>,
multisamplednight marked this conversation as resolved

Those TODO:s seem like they should belong in an issue, so one can

  • discuss on them
  • selectively mark them as done/undone
  • edit them easily
  • link them from a respective PR
Those `TODO:`s seem like they should belong in an issue, so one can - discuss on them - selectively mark them as done/undone - edit them easily - link them from a respective PR
Review

this is heavily WIP, remember, these are experiments. There will not be a seperate PR until this is not only out of experimental state, but a functioning image processing library that has all that fixed.

this is heavily WIP, remember, these are experiments. There will not be a seperate PR until this is not only out of experimental state, but a functioning image processing library that has all that fixed.

So you'd rather do all the work of finding all TODO:s and converting them to issues (and then creating a PR removing them) after "finishing" prowocessing?

So you'd rather do all the work of finding all `TODO:`s and converting them to issues (and then creating a PR removing them) after "finishing" `prowocessing`?

Also, that just addresses one point, while the other 3 are still standing.

Also, that just addresses one point, while the other 3 are still standing.
Review

The intention is to perfect the api beforehand, and tbh, you'll be the only other person reviewing stuff anyway. And wdym "editing them easily", tbh i find editing text in my editor much, much easier then working with a forgejo web ui...

and if they're done, i delete them.

The intention is to perfect the api beforehand, and tbh, you'll be the only other person reviewing stuff anyway. And wdym "editing them easily", tbh i find editing text in my editor much, *much* easier then working with a forgejo web ui... and if they're done, i delete them.

Sounds fine to me.

Sounds fine to me.
}
impl PipelineBuilder {
pub fn new() -> Self {
Self {
elements: Vec::new(),
}
}
fn insert<T: PipelineElement + 'static>(mut self, el: T) -> Self {
if let Some(previous_item) = self.elements.last() {
assert_eq!(
previous_item.signature().outputs[0],
el.signature().inputs[0]
);
}
self.elements.push(Box::new(el));
self
}
#[must_use]
pub fn concatenate(self, sec: String) -> Self {
self.insert(Concatenate(sec))
}
#[must_use]
pub fn upper(self) -> Self {
self.insert(Upper)
}
#[must_use]
pub fn lower(self) -> Self {
self.insert(Lower)
}
#[must_use]
#[allow(
clippy::should_implement_trait,
reason = "is not equivalent to addition"
)]
pub fn add(self, sec: i32) -> Self {
self.insert(Add(sec))
}
#[must_use]
pub fn subtract(self, sec: i32) -> Self {
self.insert(Subtract(sec))
}
#[must_use]
pub fn stringify(self) -> Self {
self.insert(Stringify)
}
pub fn build(&self) -> Pipeline {
let mut r = Vec::new();
self.elements.iter().for_each(|el| r.push(el.runner()));
Pipeline { runners: r }
}
}
impl Default for PipelineBuilder {
fn default() -> Self {
Self::new()
}
}
pub struct Pipeline {
runners: Vec<fn(&Inputs) -> Outputs>,
}
impl Pipeline {
pub fn run(&self, inputs: Inputs) -> Outputs {
let mut out: Outputs = inputs.into();
for runner in &self.runners {
out = runner(&(&out).into());
}
out
}

Also regarding the enum-based arch: Why the indirection of fn(&Inputs) -> Outputs? Why does Pipeline not hold Box<dyn Element> as well?

Also regarding the enum-based arch: Why the indirection of `fn(&Inputs) -> Outputs`? Why does `Pipeline` not hold `Box<dyn Element>` as well?
Review

Unless I misunderstood rusts dyn, this avoids the overhead of dynamic dispatch by saving static function pointers, which can just be called on the fly

Unless I misunderstood rusts `dyn`, this avoids the overhead of dynamic dispatch by saving static function pointers, which can just be called on the fly

They do save 1 indirection, but that is 1 indirection which hasn't even been benchmarked yet against

  1. worse debuggability
    • when Debugging Pipeline if it were to implement Debug, all one sees at the moment is a bunch of addresses of a function in hexadecimal, e.g. 0x000056380fa85320.
  2. when writing fn runner() as opposed to a direct fn eval(&Inputs) -> Outputs or the like
    1. an extra indent for the actual logic
    2. extra noise for defining the return type and the returned closure

Sidenote: If you care this much about indirection, &Inputs is actually &Vec<&Data>, which are 2 indirections already before the element can access any contained data.

They do save 1 indirection, but that is 1 indirection which hasn't even been benchmarked yet against 1. worse debuggability - when `Debug`ging `Pipeline` if it were to implement `Debug`, all one sees at the moment is a bunch of addresses of a function in hexadecimal, e.g. `0x000056380fa85320`. 2. when writing `fn runner()` as opposed to a direct `fn eval(&Inputs) -> Outputs` or the like 1. an extra indent for the actual logic 2. extra noise for defining the return type and the returned closure Sidenote: If you care this much about indirection, `&Inputs` is actually `&Vec<&Data>`, which are 2 indirections already before the element can access any contained data.
Review
  1. worse debuggability

i have an idea what one could do for that, I'll be implementing that soon-ish when i have the energy

  1. when writing fn runner() as opposed to a direct fn eval(&Inputs) -> Outputs or the like
    1. an extra indent for the actual logic
    2. extra noise for defining the return type and the returned closure

the return types are defined as opaque types deliberately, since during actual execution the pipeline does not know those.
the only thing the pipeline is supposed to do, is to execute the runners in order and get data where it's supposed to go (which is, once again, an unsolved problem currently)

Sidenote: If you care this much about indirection, &Inputs is actually &Vec<&Data>, which are 2 indirections already before the element can access any contained data.

yes, but I don't think that's avoidable.

> 1. worse debuggability i have an idea what one could do for that, I'll be implementing that soon-ish when i have the energy > 1. when writing fn runner() as opposed to a direct fn eval(&Inputs) -> Outputs or the like > 1. an extra indent for the actual logic > 2. extra noise for defining the return type and the returned closure the return types are defined as opaque types deliberately, since during actual execution the pipeline does not know those. the only thing the pipeline is supposed to do, is to execute the runners in order and get data where it's supposed to go (which is, once again, an unsolved problem currently) > Sidenote: If you care this much about indirection, &Inputs is actually &Vec<&Data>, which are 2 indirections already before the element can access any contained data. yes, but I don't think that's avoidable.

i have an idea what one could do for that, I'll be implementing that soon-ish when i have the energy

Does this idea imply a map of fn pointer to debug info? If so, I'm not sure if the size increase outweighs the 1 (still unbenchmarked) indirection.

the return types are defined as opaque types deliberately, since during actual execution the pipeline does not know those.
the only thing the pipeline is supposed to do, is to execute the runners in order and get data where it's supposed to go (which is, once again, an unsolved problem currently)

That does not address the points you quote at all. At no point I questioned the fn pointers being opaque. You just re-stated what the pipeline should do, which I already know, without addressing what I listed.

> i have an idea what one could do for that, I'll be implementing that soon-ish when i have the energy Does this idea imply a map of fn pointer to debug info? If so, I'm not sure if the size increase outweighs the 1 (still unbenchmarked) indirection. > the return types are defined as opaque types deliberately, since during actual execution the pipeline does not know those. > the only thing the pipeline is supposed to do, is to execute the runners in order and get data where it's supposed to go (which is, once again, an unsolved problem currently) That does not address the points you quote at all. At no point I questioned the fn pointers being opaque. You just re-stated what the pipeline should do, which I already know, without addressing what I listed.

yes, but I don't think that's avoidable.

It is. One could clone the vec on every instruction call.

EDIT: Thinking about it, actually the vec will need to be created for every instruction call anyway, since in a graph, multiple instructions may be source for an instruction.

> yes, but I don't think that's avoidable. It is. One could clone the vec on every instruction call. EDIT: Thinking about it, actually the vec will need to be created for every instruction call anyway, since in a graph, multiple instructions may be source for an instruction.
}