iowo/crates/ir/src/lib.rs

use std::{
    collections::{BTreeMap, BTreeSet},
    ops::RangeInclusive,
};

use instruction::SocketCount;
use serde::{Deserialize, Serialize};

pub mod id;
pub mod instruction;
pub mod semi_human;

pub type Map<K, V> = std::collections::BTreeMap<K, V>;
pub type Set<V> = std::collections::BTreeSet<V>;

/// Gives you a super well typed graph IR for a given human-readable repr.
///
/// Look at [`semi_human::GraphIr`] and the test files in the repo at `testfiles/`
/// to see what the RON should look like.
/// No, we don't want you to write out [`GraphIr`] in full by hand.
/// That's something for the machines to do.
///
/// # Errors
///
/// Returns an error if the parsed source is not a valid human-readable graph IR.
pub fn from_ron(source: &str) -> ron::error::SpannedResult<GraphIr> {
    let human_repr: semi_human::GraphIr = ron::from_str(source)?;
    Ok(human_repr.into())
}

/// The toplevel representation of a whole pipeline.
///
/// Pipelines may not be fully linear. They may branch out and recombine later on.
/// As such, the representation for them which is currently used is a
/// [**D**irected **A**cyclic **G**raph](https://en.wikipedia.org/wiki/Directed_acyclic_graph).
///
/// For those who are already familiar with graphs, a DAG is one, except that:
///
/// - It is **directed**: Edges have a direction they point to.
///   In this case, edges point from the outputs of streamers to inputs of consumers.
/// - It is **acyclic**: Those directed edges may not form loops.
///   In other words, if one follows edges only in their direction, it must be impossible
///   to come back to an already visited node.
///
/// Here, if an edge points from _A_ to _B_ (`A --> B`),
/// then _A_ is called a **dependency** of _B_,
/// and _B_ is called a **dependent** of _A_.
///
/// The DAG also enables another neat operation:
/// [Topological sorting](https://en.wikipedia.org/wiki/Topological_sorting).
/// This allows to put the entire graph into a linear list,
/// where it's guaranteed that once a vertex is visited,
/// all dependencies of it will have been visited already as well.
///
/// The representation used here in specific is a bit more complicated,
/// since **instructions** directly aren't just connected to one another,
/// but their **sockets** are instead.
///
/// So the vertices of the DAG are the **sockets**
/// (which are either [`id::Input`] or [`id::Output`] depending on the direction),
/// and each **socket** in turn belongs to an **instruction**.
#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize)]
pub struct GraphIr {
    /// "Backbone" storage of all **instruction** IDs to
    /// what **kind of instruction** they are.
    instructions: Map<id::Instruction, instruction::Kind>,

    /// How the data flows forward. **Dependencies** map to **dependents** here.
    edges: Map<id::Output, Set<id::Input>>,
    /// How the data flows backward. **Dependents** map to **dependencies** here.
    rev_edges: Map<id::Input, id::Output>,
}

impl GraphIr {
    // TODO: this function, but actually the whole module, screams for tests
    /// Returns the instruction corresponding to the given ID.
    /// Returns [`None`] if there is no such instruction in this graph IR.
    ///
    /// Theoretically this could be fixed easily at the expense of some memory
    /// by just incrementing and storing some global counter,
    /// however, at the moment there's no compelling reason
    /// to actually have multiple [`GraphIr`]s at one point in time.
    /// Open an issue if that poses a problem for you.
    #[must_use]
    pub fn resolve<'ir>(&'ir self, id: &id::Instruction) -> Option<Instruction<'ir>> {
        let (id, kind) = self.instructions.get_key_value(id)?;

        // just try each slot and see if it's connected
        // very crude, but it works for a proof of concept
        let SocketCount { inputs, outputs } = kind.socket_count();
        let socket = |id: &id::Instruction, idx| id::Socket {
            belongs_to: id.clone(),
            // impossible since the length is limited to a u16 already
            #[allow(clippy::cast_possible_truncation)]
            idx: id::SocketIdx(idx as u16),
        };

        let mut inputs_from = vec![None; inputs.into()];
        for (idx, slot) in inputs_from.iter_mut().enumerate() {
            let input = id::Input(socket(id, idx));
            *slot = self.rev_edges.get(&input);
        }

        let mut outputs_to = vec![None; outputs.into()];
        for (idx, slot) in outputs_to.iter_mut().enumerate() {
            let output = id::Output(socket(id, idx));
            *slot = self.edges.get(&output);
        }

        Some(Instruction {
            id,
            kind,
            inputs_from,
            outputs_to,
        })
    }

    /// Returns the instruction this input belongs to.
    ///
    /// The same caveats as for [`GraphIr::resolve`] apply.
    #[must_use]
    pub fn owner_of_input<'ir>(&'ir self, input: &id::Input) -> Option<Instruction<'ir>> {
        self.resolve(&input.socket().belongs_to)
    }

    /// Returns the instruction this output belongs to.
    ///
    /// The same caveats as for [`GraphIr::resolve`] apply.
    #[must_use]
    pub fn owner_of_output<'ir>(&'ir self, output: &id::Output) -> Option<Instruction<'ir>> {
        self.resolve(&output.socket().belongs_to)
    }

    #[must_use]
    pub fn topological_sort(&self) -> Vec<Instruction> {
        // count how many incoming edges each vertex has
        // chances are the BTreeMap is overkill
        let incoming_counts: BTreeMap<_, _> = self
            .rev_edges
            .iter()
            .map(|(input, _)| (self.owner_of_input(input), 1))
            .collect();

        todo!()
    }
}

/// A full instruction in context, with its inputs and outputs.
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub struct Instruction<'ir> {
    pub id: &'ir id::Instruction,
    pub kind: &'ir instruction::Kind,

    // can't have these two public since then a user might corrupt their length
    inputs_from: Vec<Option<&'ir id::Output>>,
    outputs_to: Vec<Option<&'ir BTreeSet<id::Input>>>,
}

impl<'ir> Instruction<'ir> {
    /// Where this instruction gets its inputs from.
    ///
    /// [`None`] means that this input is unfilled,
    /// and must be filled before the instruction can be ran.
    #[must_use]
    pub fn inputs_from(&self) -> &[Option<&'ir id::Output>] {
        &self.inputs_from
    }

    /// To whom outputs are sent. [`None`] means that this output is unused.
    #[must_use]
    pub fn outputs_to(&self) -> &[Option<&'ir BTreeSet<id::Input>>] {
        &self.outputs_to
    }
}

/// Some part referred to in source code.
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)]
pub struct Span {
    // would love to use an actual [`std::ops::RangeInclusive`], but those don't implement
    // `PartialOrd` and `Ord` unfortunately
    /// At which byte this span starts, inclusively.
    pub from: usize,
    /// At which byte this span ends, inclusively.
    pub to: usize,
}

impl From<RangeInclusive<usize>> for Span {
    fn from(range: RangeInclusive<usize>) -> Self {
        Self {
            from: *range.start(),
            to: *range.end(),
        }
    }
}
feat(ir): implement resolve functionality (untested) 2024-01-18 23:45:01 +00:00			`use std::{`
			`collections::{BTreeMap, BTreeSet},`
			`ops::RangeInclusive,`
			`};`
feat(ir): replace Rpl with GraphIr Semi-broken as atm the CLI just does nothing except printing the parsed IR, instead of actually executing it. 2024-01-12 16:23:17 +00:00
feat(ir): implement resolve functionality (untested) 2024-01-18 23:45:01 +00:00			`use instruction::SocketCount;`
feat(ir): replace Rpl with GraphIr Semi-broken as atm the CLI just does nothing except printing the parsed IR, instead of actually executing it. 2024-01-12 16:23:17 +00:00			`use serde::{Deserialize, Serialize};`

chore: extract id stuff into its own file 2024-01-18 21:09:11 +00:00			`pub mod id;`
feat(ir): replace Rpl with GraphIr Semi-broken as atm the CLI just does nothing except printing the parsed IR, instead of actually executing it. 2024-01-12 16:23:17 +00:00			`pub mod instruction;`
docs: ensure docs are well-linked, fed and washed 2024-01-18 21:24:03 +00:00			`pub mod semi_human;`
chore: put semi human graph ir into its own module 2024-01-18 20:54:41 +00:00
feat(ir): implement resolve functionality (untested) 2024-01-18 23:45:01 +00:00			`pub type Map<K, V> = std::collections::BTreeMap<K, V>;`
			`pub type Set<V> = std::collections::BTreeSet<V>;`
feat(ir): replace Rpl with GraphIr Semi-broken as atm the CLI just does nothing except printing the parsed IR, instead of actually executing it. 2024-01-12 16:23:17 +00:00
feat: add semi-human-readable-and-writable-ir 2024-01-18 20:39:19 +00:00			`/// Gives you a super well typed graph IR for a given human-readable repr.`
			`///`
docs: ensure docs are well-linked, fed and washed 2024-01-18 21:24:03 +00:00			/// Look at [`semi_human::GraphIr`] and the test files in the repo at `testfiles/`
feat: add semi-human-readable-and-writable-ir 2024-01-18 20:39:19 +00:00			`/// to see what the RON should look like.`
			/// No, we don't want you to write out [`GraphIr`] in full by hand.
			`/// That's something for the machines to do.`
			`///`
feat(ir): replace Rpl with GraphIr Semi-broken as atm the CLI just does nothing except printing the parsed IR, instead of actually executing it. 2024-01-12 16:23:17 +00:00			`/// # Errors`
			`///`
feat: add semi-human-readable-and-writable-ir 2024-01-18 20:39:19 +00:00			`/// Returns an error if the parsed source is not a valid human-readable graph IR.`
feat(ir): replace Rpl with GraphIr Semi-broken as atm the CLI just does nothing except printing the parsed IR, instead of actually executing it. 2024-01-12 16:23:17 +00:00			`pub fn from_ron(source: &str) -> ron::error::SpannedResult<GraphIr> {`
chore: put semi human graph ir into its own module 2024-01-18 20:54:41 +00:00			`let human_repr: semi_human::GraphIr = ron::from_str(source)?;`
feat: add semi-human-readable-and-writable-ir 2024-01-18 20:39:19 +00:00			`Ok(human_repr.into())`
feat(ir): replace Rpl with GraphIr Semi-broken as atm the CLI just does nothing except printing the parsed IR, instead of actually executing it. 2024-01-12 16:23:17 +00:00			`}`

			`/// The toplevel representation of a whole pipeline.`
			`///`
			`/// Pipelines may not be fully linear. They may branch out and recombine later on.`
			`/// As such, the representation for them which is currently used is a`
docs: ensure docs are well-linked, fed and washed 2024-01-18 21:24:03 +00:00			`/// [Directed Acyclic Graph](https://en.wikipedia.org/wiki/Directed_acyclic_graph).`
feat(ir): replace Rpl with GraphIr Semi-broken as atm the CLI just does nothing except printing the parsed IR, instead of actually executing it. 2024-01-12 16:23:17 +00:00			`///`
			`/// For those who are already familiar with graphs, a DAG is one, except that:`
			`///`
			`/// - It is directed: Edges have a direction they point to.`
			`/// In this case, edges point from the outputs of streamers to inputs of consumers.`
			`/// - It is acyclic: Those directed edges may not form loops.`
			`/// In other words, if one follows edges only in their direction, it must be impossible`
			`/// to come back to an already visited node.`
			`///`
			/// Here, if an edge points from _A_ to _B_ (`A --> B`),
			`/// then _A_ is called a dependency of _B_,`
			`/// and _B_ is called a dependent of _A_.`
			`///`
			`/// The DAG also enables another neat operation:`
			`/// [Topological sorting](https://en.wikipedia.org/wiki/Topological_sorting).`
			`/// This allows to put the entire graph into a linear list,`
			`/// where it's guaranteed that once a vertex is visited,`
			`/// all dependencies of it will have been visited already as well.`
			`///`
			`/// The representation used here in specific is a bit more complicated,`
			`/// since instructions directly aren't just connected to one another,`
			`/// but their sockets are instead.`
			`///`
			`/// So the vertices of the DAG are the sockets`
			/// (which are either [`id::Input`] or [`id::Output`] depending on the direction),
			`/// and each socket in turn belongs to an instruction.`
			`#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize)]`
			`pub struct GraphIr {`
			`/// "Backbone" storage of all instruction IDs to`
			`/// what kind of instruction they are.`
			`instructions: Map<id::Instruction, instruction::Kind>,`

			`/// How the data flows forward. Dependencies map to dependents here.`
			`edges: Map<id::Output, Set<id::Input>>,`
			`/// How the data flows backward. Dependents map to dependencies here.`
feat(ir): implement resolve functionality (untested) 2024-01-18 23:45:01 +00:00			`rev_edges: Map<id::Input, id::Output>,`
feat(ir): replace Rpl with GraphIr Semi-broken as atm the CLI just does nothing except printing the parsed IR, instead of actually executing it. 2024-01-12 16:23:17 +00:00			`}`

feat(ir): implement resolve functionality (untested) 2024-01-18 23:45:01 +00:00			`impl GraphIr {`
			`// TODO: this function, but actually the whole module, screams for tests`
			`/// Returns the instruction corresponding to the given ID.`
			/// Returns [`None`] if there is no such instruction in this graph IR.
			`///`
			`/// Theoretically this could be fixed easily at the expense of some memory`
			`/// by just incrementing and storing some global counter,`
			`/// however, at the moment there's no compelling reason`
			/// to actually have multiple [`GraphIr`]s at one point in time.
			`/// Open an issue if that poses a problem for you.`
			`#[must_use]`
			`pub fn resolve<'ir>(&'ir self, id: &id::Instruction) -> Option<Instruction<'ir>> {`
			`let (id, kind) = self.instructions.get_key_value(id)?;`

			`// just try each slot and see if it's connected`
			`// very crude, but it works for a proof of concept`
			`let SocketCount { inputs, outputs } = kind.socket_count();`
			`let socket = \|id: &id::Instruction, idx\| id::Socket {`
			`belongs_to: id.clone(),`
			`// impossible since the length is limited to a u16 already`
			`#[allow(clippy::cast_possible_truncation)]`
			`idx: id::SocketIdx(idx as u16),`
			`};`

			`let mut inputs_from = vec![None; inputs.into()];`
			`for (idx, slot) in inputs_from.iter_mut().enumerate() {`
			`let input = id::Input(socket(id, idx));`
			`*slot = self.rev_edges.get(&input);`
			`}`

			`let mut outputs_to = vec![None; outputs.into()];`
			`for (idx, slot) in outputs_to.iter_mut().enumerate() {`
			`let output = id::Output(socket(id, idx));`
			`*slot = self.edges.get(&output);`
			`}`

			`Some(Instruction {`
			`id,`
			`kind,`
			`inputs_from,`
			`outputs_to,`
			`})`
			`}`

			`/// Returns the instruction this input belongs to.`
			`///`
			/// The same caveats as for [`GraphIr::resolve`] apply.
			`#[must_use]`
			`pub fn owner_of_input<'ir>(&'ir self, input: &id::Input) -> Option<Instruction<'ir>> {`
			`self.resolve(&input.socket().belongs_to)`
			`}`

			`/// Returns the instruction this output belongs to.`
			`///`
			/// The same caveats as for [`GraphIr::resolve`] apply.
			`#[must_use]`
			`pub fn owner_of_output<'ir>(&'ir self, output: &id::Output) -> Option<Instruction<'ir>> {`
			`self.resolve(&output.socket().belongs_to)`
			`}`

			`#[must_use]`
			`pub fn topological_sort(&self) -> Vec<Instruction> {`
			`// count how many incoming edges each vertex has`
			`// chances are the BTreeMap is overkill`
			`let incoming_counts: BTreeMap<_, _> = self`
			`.rev_edges`
			`.iter()`
			`.map(\|(input, _)\| (self.owner_of_input(input), 1))`
			`.collect();`

			`todo!()`
			`}`
			`}`

			`/// A full instruction in context, with its inputs and outputs.`
			`#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]`
			`pub struct Instruction<'ir> {`
			`pub id: &'ir id::Instruction,`
			`pub kind: &'ir instruction::Kind,`

			`// can't have these two public since then a user might corrupt their length`
			`inputs_from: Vec<Option<&'ir id::Output>>,`
			`outputs_to: Vec<Option<&'ir BTreeSet<id::Input>>>,`
			`}`

			`impl<'ir> Instruction<'ir> {`
			`/// Where this instruction gets its inputs from.`
			`///`
			/// [`None`] means that this input is unfilled,
			`/// and must be filled before the instruction can be ran.`
			`#[must_use]`
			`pub fn inputs_from(&self) -> &[Option<&'ir id::Output>] {`
			`&self.inputs_from`
			`}`

			/// To whom outputs are sent. [`None`] means that this output is unused.
			`#[must_use]`
			`pub fn outputs_to(&self) -> &[Option<&'ir BTreeSet<id::Input>>] {`
			`&self.outputs_to`
			`}`
			`}`

			`/// Some part referred to in source code.`
			`#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)]`
feat(ir): replace Rpl with GraphIr Semi-broken as atm the CLI just does nothing except printing the parsed IR, instead of actually executing it. 2024-01-12 16:23:17 +00:00			`pub struct Span {`
feat(ir): implement resolve functionality (untested) 2024-01-18 23:45:01 +00:00			// would love to use an actual [`std::ops::RangeInclusive`], but those don't implement
			// `PartialOrd` and `Ord` unfortunately
			`/// At which byte this span starts, inclusively.`
			`pub from: usize,`
			`/// At which byte this span ends, inclusively.`
			`pub to: usize,`
			`}`

			`impl From<RangeInclusive<usize>> for Span {`
			`fn from(range: RangeInclusive<usize>) -> Self {`
			`Self {`
			`from: *range.start(),`
			`to: *range.end(),`
			`}`
			`}`
feat(ir): replace Rpl with GraphIr Semi-broken as atm the CLI just does nothing except printing the parsed IR, instead of actually executing it. 2024-01-12 16:23:17 +00:00			`}`