iowo/crates/ir/src/lib.rs

use std::ops::RangeInclusive;

use serde::{Deserialize, Serialize};

pub mod instruction;

pub type Map<K, V> = ahash::AHashMap<K, V>;
pub type Set<V> = ahash::AHashSet<V>;

/// Gives you a super well typed graph IR for a given human-readable repr.
///
/// Look at [`SemiHumanGraphIr`] and the test files in the repo at `testfiles/`
/// to see what the RON should look like.
/// No, we don't want you to write out [`GraphIr`] in full by hand.
/// That's something for the machines to do.
///
/// # Errors
///
/// Returns an error if the parsed source is not a valid human-readable graph IR.
pub fn from_ron(source: &str) -> ron::error::SpannedResult<GraphIr> {
    let human_repr: SemiHumanGraphIr = ron::from_str(source)?;
    Ok(human_repr.into())
}

/// The toplevel representation of a whole pipeline.
///
/// Pipelines may not be fully linear. They may branch out and recombine later on.
/// As such, the representation for them which is currently used is a
/// [**D**irected **A**cyclic **G**raph](https://en.wikipedia.org/wiki/Directed_acyclic_graph)
/// .
///
/// For those who are already familiar with graphs, a DAG is one, except that:
///
/// - It is **directed**: Edges have a direction they point to.
///   In this case, edges point from the outputs of streamers to inputs of consumers.
/// - It is **acyclic**: Those directed edges may not form loops.
///   In other words, if one follows edges only in their direction, it must be impossible
///   to come back to an already visited node.
///
/// Here, if an edge points from _A_ to _B_ (`A --> B`),
/// then _A_ is called a **dependency** of _B_,
/// and _B_ is called a **dependent** of _A_.
///
/// The DAG also enables another neat operation:
/// [Topological sorting](https://en.wikipedia.org/wiki/Topological_sorting).
/// This allows to put the entire graph into a linear list,
/// where it's guaranteed that once a vertex is visited,
/// all dependencies of it will have been visited already as well.
///
/// The representation used here in specific is a bit more complicated,
/// since **instructions** directly aren't just connected to one another,
/// but their **sockets** are instead.
///
/// So the vertices of the DAG are the **sockets**
/// (which are either [`id::Input`] or [`id::Output`] depending on the direction),
/// and each **socket** in turn belongs to an **instruction**.
#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize)]
pub struct GraphIr {
    /// "Backbone" storage of all **instruction** IDs to
    /// what **kind of instruction** they are.
    instructions: Map<id::Instruction, instruction::Kind>,

    /// How the data flows forward. **Dependencies** map to **dependents** here.
    edges: Map<id::Output, Set<id::Input>>,
    /// How the data flows backward. **Dependents** map to **dependencies** here.
    rev_edges: Map<id::Input, Set<id::Output>>,
}

pub mod id {
    use serde::{Deserialize, Serialize};

    use crate::Span;

    /// One specific instruction, and where it is found in code.
    ///
    /// It does **not** contain what kind of instruction this is.
    /// Refer to [`crate::instruction::Kind`] for this instead.
    #[derive(Clone, Debug, PartialEq, Eq, Hash, Deserialize, Serialize)]
    pub struct Instruction(pub(super) Span);

    impl Instruction {
        /// Where this instruction is written down.
        #[must_use]
        pub fn span(&self) -> &Span {
            &self.0
        }
    }

    /// On an **instruction**, accepts incoming data.
    ///
    /// An **instruction** cannot run if any of these are not connected.
    #[derive(Clone, Debug, PartialEq, Eq, Hash, Deserialize, Serialize)]
    pub struct Input(pub(super) Socket);

    /// On an **instruction**, returns outgoing data to be fed to [`Input`]s.
    ///
    /// In contrast to [`Input`]s, [`Output`]s may be used or unused.
    #[derive(Clone, Debug, PartialEq, Eq, Hash, Deserialize, Serialize)]
    pub struct Output(pub(super) Socket);

    /// An unspecified socket on a specific **instruction**,
    /// and where it is on that **instruction**.
    #[derive(Clone, Debug, PartialEq, Eq, Hash, Deserialize, Serialize)]
    pub struct Socket {
        pub belongs_to: Instruction,
        pub idx: SocketIdx,
    }

    /// Where a [`Socket`] is on an **instruction**.
    #[derive(Clone, Debug, PartialEq, Eq, Hash, Deserialize, Serialize)]
    pub struct SocketIdx(pub u16);
}

#[derive(Clone, Debug, PartialEq, Eq, Hash, Deserialize, Serialize)]
pub struct Span {
    range: RangeInclusive<usize>,
}

// TODO: this desperately belongs into its own file, also id should get its own file
/// Semi-human-{read,writ}able [`GraphIr`] with far less useful types.
///
/// **Do not use this if you want to programatically construct IR.**
/// Instead, directly use [`GraphIr`].
#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize)]
pub struct SemiHumanGraphIr {
    /// See [`GraphIr::instructions`], just that a simple number is used for the ID instead
    /// of a proper span.
    instructions: Map<usize, instruction::Kind>,
    /// See [`GraphIr::edges`]. RON wants you to type the set as a list.
    edges: Map<SemiHumanSocket, Set<SemiHumanSocket>>,
}

#[derive(Clone, Debug, PartialEq, Eq, Hash, Deserialize, Serialize)]
pub struct SemiHumanSocket {
    /// ID of the instruction this socket is on.
    on: usize,
    idx: u16,
}

impl From<SemiHumanSocket> for id::Socket {
    fn from(source: SemiHumanSocket) -> Self {
        Self {
            belongs_to: (id::Instruction(Span {
                range: source.on..=source.on,
            })),
            idx: id::SocketIdx(source.idx),
        }
    }
}

impl From<SemiHumanGraphIr> for GraphIr {
    fn from(source: SemiHumanGraphIr) -> Self {
        let edges = source.edges.clone();
        Self {
            instructions: source
                .instructions
                .into_iter()
                .map(|(id, kind)| (id::Instruction(Span { range: id..=id }), kind))
                .collect(),
            edges: type_edges(source.edges),
            // same as above, but also reverse the mapping
            rev_edges: reverse_and_type_edges(edges),
        }
    }
}

fn type_edges(
    edges: Map<SemiHumanSocket, Set<SemiHumanSocket>>,
) -> Map<id::Output, Set<id::Input>> {
    edges
        .into_iter()
        .map(|(output, inputs)| {
            let output = id::Output(output.into());
            let inputs = inputs.into_iter().map(Into::into).map(id::Input).collect();
            (output, inputs)
        })
        .collect()
}

fn reverse_and_type_edges(
    edges: Map<SemiHumanSocket, Set<SemiHumanSocket>>,
) -> Map<id::Input, Set<id::Output>> {
    edges
        .into_iter()
        .fold(Map::new(), |mut rev_edges, (output, inputs)| {
            let output = id::Output(output.into());

            for input in inputs {
                let input = id::Input(input.into());
                rev_edges.entry(input).or_default().insert(output.clone());
            }

            rev_edges
        })
}
feat(ir): replace Rpl with GraphIr Semi-broken as atm the CLI just does nothing except printing the parsed IR, instead of actually executing it. 2024-01-12 16:23:17 +00:00			`use std::ops::RangeInclusive;`

			`use serde::{Deserialize, Serialize};`

			`pub mod instruction;`

			`pub type Map<K, V> = ahash::AHashMap<K, V>;`
			`pub type Set<V> = ahash::AHashSet<V>;`

feat: add semi-human-readable-and-writable-ir 2024-01-18 20:39:19 +00:00			`/// Gives you a super well typed graph IR for a given human-readable repr.`
			`///`
			/// Look at [`SemiHumanGraphIr`] and the test files in the repo at `testfiles/`
			`/// to see what the RON should look like.`
			/// No, we don't want you to write out [`GraphIr`] in full by hand.
			`/// That's something for the machines to do.`
			`///`
feat(ir): replace Rpl with GraphIr Semi-broken as atm the CLI just does nothing except printing the parsed IR, instead of actually executing it. 2024-01-12 16:23:17 +00:00			`/// # Errors`
			`///`
feat: add semi-human-readable-and-writable-ir 2024-01-18 20:39:19 +00:00			`/// Returns an error if the parsed source is not a valid human-readable graph IR.`
feat(ir): replace Rpl with GraphIr Semi-broken as atm the CLI just does nothing except printing the parsed IR, instead of actually executing it. 2024-01-12 16:23:17 +00:00			`pub fn from_ron(source: &str) -> ron::error::SpannedResult<GraphIr> {`
feat: add semi-human-readable-and-writable-ir 2024-01-18 20:39:19 +00:00			`let human_repr: SemiHumanGraphIr = ron::from_str(source)?;`
			`Ok(human_repr.into())`
feat(ir): replace Rpl with GraphIr Semi-broken as atm the CLI just does nothing except printing the parsed IR, instead of actually executing it. 2024-01-12 16:23:17 +00:00			`}`

			`/// The toplevel representation of a whole pipeline.`
			`///`
			`/// Pipelines may not be fully linear. They may branch out and recombine later on.`
			`/// As such, the representation for them which is currently used is a`
			`/// [Directed Acyclic Graph](https://en.wikipedia.org/wiki/Directed_acyclic_graph)`
			`/// .`
			`///`
			`/// For those who are already familiar with graphs, a DAG is one, except that:`
			`///`
			`/// - It is directed: Edges have a direction they point to.`
			`/// In this case, edges point from the outputs of streamers to inputs of consumers.`
			`/// - It is acyclic: Those directed edges may not form loops.`
			`/// In other words, if one follows edges only in their direction, it must be impossible`
			`/// to come back to an already visited node.`
			`///`
			/// Here, if an edge points from _A_ to _B_ (`A --> B`),
			`/// then _A_ is called a dependency of _B_,`
			`/// and _B_ is called a dependent of _A_.`
			`///`
			`/// The DAG also enables another neat operation:`
			`/// [Topological sorting](https://en.wikipedia.org/wiki/Topological_sorting).`
			`/// This allows to put the entire graph into a linear list,`
			`/// where it's guaranteed that once a vertex is visited,`
			`/// all dependencies of it will have been visited already as well.`
			`///`
			`/// The representation used here in specific is a bit more complicated,`
			`/// since instructions directly aren't just connected to one another,`
			`/// but their sockets are instead.`
			`///`
			`/// So the vertices of the DAG are the sockets`
			/// (which are either [`id::Input`] or [`id::Output`] depending on the direction),
			`/// and each socket in turn belongs to an instruction.`
			`#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize)]`
			`pub struct GraphIr {`
			`/// "Backbone" storage of all instruction IDs to`
			`/// what kind of instruction they are.`
			`instructions: Map<id::Instruction, instruction::Kind>,`

			`/// How the data flows forward. Dependencies map to dependents here.`
			`edges: Map<id::Output, Set<id::Input>>,`
			`/// How the data flows backward. Dependents map to dependencies here.`
			`rev_edges: Map<id::Input, Set<id::Output>>,`
			`}`

			`pub mod id {`
			`use serde::{Deserialize, Serialize};`

			`use crate::Span;`

			`/// One specific instruction, and where it is found in code.`
			`///`
			`/// It does not contain what kind of instruction this is.`
			/// Refer to [`crate::instruction::Kind`] for this instead.
			`#[derive(Clone, Debug, PartialEq, Eq, Hash, Deserialize, Serialize)]`
feat: add semi-human-readable-and-writable-ir 2024-01-18 20:39:19 +00:00			`pub struct Instruction(pub(super) Span);`
feat(ir): replace Rpl with GraphIr Semi-broken as atm the CLI just does nothing except printing the parsed IR, instead of actually executing it. 2024-01-12 16:23:17 +00:00
			`impl Instruction {`
			`/// Where this instruction is written down.`
feat: add semi-human-readable-and-writable-ir 2024-01-18 20:39:19 +00:00			`#[must_use]`
feat(ir): replace Rpl with GraphIr Semi-broken as atm the CLI just does nothing except printing the parsed IR, instead of actually executing it. 2024-01-12 16:23:17 +00:00			`pub fn span(&self) -> &Span {`
			`&self.0`
			`}`
			`}`

			`/// On an instruction, accepts incoming data.`
			`///`
			`/// An instruction cannot run if any of these are not connected.`
			`#[derive(Clone, Debug, PartialEq, Eq, Hash, Deserialize, Serialize)]`
feat: add semi-human-readable-and-writable-ir 2024-01-18 20:39:19 +00:00			`pub struct Input(pub(super) Socket);`
feat(ir): replace Rpl with GraphIr Semi-broken as atm the CLI just does nothing except printing the parsed IR, instead of actually executing it. 2024-01-12 16:23:17 +00:00
			/// On an instruction, returns outgoing data to be fed to [`Input`]s.
			`///`
			/// In contrast to [`Input`]s, [`Output`]s may be used or unused.
			`#[derive(Clone, Debug, PartialEq, Eq, Hash, Deserialize, Serialize)]`
feat: add semi-human-readable-and-writable-ir 2024-01-18 20:39:19 +00:00			`pub struct Output(pub(super) Socket);`
feat(ir): replace Rpl with GraphIr Semi-broken as atm the CLI just does nothing except printing the parsed IR, instead of actually executing it. 2024-01-12 16:23:17 +00:00
			`/// An unspecified socket on a specific instruction,`
			`/// and where it is on that instruction.`
			`#[derive(Clone, Debug, PartialEq, Eq, Hash, Deserialize, Serialize)]`
			`pub struct Socket {`
			`pub belongs_to: Instruction,`
feat: add semi-human-readable-and-writable-ir 2024-01-18 20:39:19 +00:00			`pub idx: SocketIdx,`
feat(ir): replace Rpl with GraphIr Semi-broken as atm the CLI just does nothing except printing the parsed IR, instead of actually executing it. 2024-01-12 16:23:17 +00:00			`}`

			/// Where a [`Socket`] is on an instruction.
			`#[derive(Clone, Debug, PartialEq, Eq, Hash, Deserialize, Serialize)]`
			`pub struct SocketIdx(pub u16);`
			`}`

			`#[derive(Clone, Debug, PartialEq, Eq, Hash, Deserialize, Serialize)]`
			`pub struct Span {`
			`range: RangeInclusive<usize>,`
			`}`
feat: add semi-human-readable-and-writable-ir 2024-01-18 20:39:19 +00:00
			`// TODO: this desperately belongs into its own file, also id should get its own file`
			/// Semi-human-{read,writ}able [`GraphIr`] with far less useful types.
			`///`
			`/// Do not use this if you want to programatically construct IR.`
			/// Instead, directly use [`GraphIr`].
			`#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize)]`
			`pub struct SemiHumanGraphIr {`
			/// See [`GraphIr::instructions`], just that a simple number is used for the ID instead
			`/// of a proper span.`
			`instructions: Map<usize, instruction::Kind>,`
			/// See [`GraphIr::edges`]. RON wants you to type the set as a list.
			`edges: Map<SemiHumanSocket, Set<SemiHumanSocket>>,`
			`}`

			`#[derive(Clone, Debug, PartialEq, Eq, Hash, Deserialize, Serialize)]`
			`pub struct SemiHumanSocket {`
			`/// ID of the instruction this socket is on.`
			`on: usize,`
			`idx: u16,`
			`}`

			`impl From<SemiHumanSocket> for id::Socket {`
			`fn from(source: SemiHumanSocket) -> Self {`
			`Self {`
			`belongs_to: (id::Instruction(Span {`
			`range: source.on..=source.on,`
			`})),`
			`idx: id::SocketIdx(source.idx),`
			`}`
			`}`
			`}`

			`impl From<SemiHumanGraphIr> for GraphIr {`
			`fn from(source: SemiHumanGraphIr) -> Self {`
			`let edges = source.edges.clone();`
			`Self {`
			`instructions: source`
			`.instructions`
			`.into_iter()`
			`.map(\|(id, kind)\| (id::Instruction(Span { range: id..=id }), kind))`
			`.collect(),`
			`edges: type_edges(source.edges),`
			`// same as above, but also reverse the mapping`
			`rev_edges: reverse_and_type_edges(edges),`
			`}`
			`}`
			`}`

			`fn type_edges(`
			`edges: Map<SemiHumanSocket, Set<SemiHumanSocket>>,`
			`) -> Map<id::Output, Set<id::Input>> {`
			`edges`
			`.into_iter()`
			`.map(\|(output, inputs)\| {`
			`let output = id::Output(output.into());`
			`let inputs = inputs.into_iter().map(Into::into).map(id::Input).collect();`
			`(output, inputs)`
			`})`
			`.collect()`
			`}`

			`fn reverse_and_type_edges(`
			`edges: Map<SemiHumanSocket, Set<SemiHumanSocket>>,`
			`) -> Map<id::Input, Set<id::Output>> {`
			`edges`
			`.into_iter()`
			`.fold(Map::new(), \|mut rev_edges, (output, inputs)\| {`
			`let output = id::Output(output.into());`

			`for input in inputs {`
			`let input = id::Input(input.into());`
			`rev_edges.entry(input).or_default().insert(output.clone());`
			`}`

			`rev_edges`
			`})`
			`}`