iowo/crates/ir/src/lib.rs

196 lines
6.9 KiB
Rust
Raw Normal View History

use std::ops::RangeInclusive;
use serde::{Deserialize, Serialize};
pub mod instruction;
pub type Map<K, V> = ahash::AHashMap<K, V>;
pub type Set<V> = ahash::AHashSet<V>;
/// Gives you a super well typed graph IR for a given human-readable repr.
///
/// Look at [`SemiHumanGraphIr`] and the test files in the repo at `testfiles/`
/// to see what the RON should look like.
/// No, we don't want you to write out [`GraphIr`] in full by hand.
/// That's something for the machines to do.
///
/// # Errors
///
/// Returns an error if the parsed source is not a valid human-readable graph IR.
pub fn from_ron(source: &str) -> ron::error::SpannedResult<GraphIr> {
let human_repr: SemiHumanGraphIr = ron::from_str(source)?;
Ok(human_repr.into())
}
/// The toplevel representation of a whole pipeline.
///
/// Pipelines may not be fully linear. They may branch out and recombine later on.
/// As such, the representation for them which is currently used is a
/// [**D**irected **A**cyclic **G**raph](https://en.wikipedia.org/wiki/Directed_acyclic_graph)
/// .
///
/// For those who are already familiar with graphs, a DAG is one, except that:
///
/// - It is **directed**: Edges have a direction they point to.
/// In this case, edges point from the outputs of streamers to inputs of consumers.
/// - It is **acyclic**: Those directed edges may not form loops.
/// In other words, if one follows edges only in their direction, it must be impossible
/// to come back to an already visited node.
///
/// Here, if an edge points from _A_ to _B_ (`A --> B`),
/// then _A_ is called a **dependency** of _B_,
/// and _B_ is called a **dependent** of _A_.
///
/// The DAG also enables another neat operation:
/// [Topological sorting](https://en.wikipedia.org/wiki/Topological_sorting).
/// This allows to put the entire graph into a linear list,
/// where it's guaranteed that once a vertex is visited,
/// all dependencies of it will have been visited already as well.
///
/// The representation used here in specific is a bit more complicated,
/// since **instructions** directly aren't just connected to one another,
/// but their **sockets** are instead.
///
/// So the vertices of the DAG are the **sockets**
/// (which are either [`id::Input`] or [`id::Output`] depending on the direction),
/// and each **socket** in turn belongs to an **instruction**.
#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize)]
pub struct GraphIr {
/// "Backbone" storage of all **instruction** IDs to
/// what **kind of instruction** they are.
instructions: Map<id::Instruction, instruction::Kind>,
/// How the data flows forward. **Dependencies** map to **dependents** here.
edges: Map<id::Output, Set<id::Input>>,
/// How the data flows backward. **Dependents** map to **dependencies** here.
rev_edges: Map<id::Input, Set<id::Output>>,
}
pub mod id {
use serde::{Deserialize, Serialize};
use crate::Span;
/// One specific instruction, and where it is found in code.
///
/// It does **not** contain what kind of instruction this is.
/// Refer to [`crate::instruction::Kind`] for this instead.
#[derive(Clone, Debug, PartialEq, Eq, Hash, Deserialize, Serialize)]
pub struct Instruction(pub(super) Span);
impl Instruction {
/// Where this instruction is written down.
#[must_use]
pub fn span(&self) -> &Span {
&self.0
}
}
/// On an **instruction**, accepts incoming data.
///
/// An **instruction** cannot run if any of these are not connected.
#[derive(Clone, Debug, PartialEq, Eq, Hash, Deserialize, Serialize)]
pub struct Input(pub(super) Socket);
/// On an **instruction**, returns outgoing data to be fed to [`Input`]s.
///
/// In contrast to [`Input`]s, [`Output`]s may be used or unused.
#[derive(Clone, Debug, PartialEq, Eq, Hash, Deserialize, Serialize)]
pub struct Output(pub(super) Socket);
/// An unspecified socket on a specific **instruction**,
/// and where it is on that **instruction**.
#[derive(Clone, Debug, PartialEq, Eq, Hash, Deserialize, Serialize)]
pub struct Socket {
pub belongs_to: Instruction,
pub idx: SocketIdx,
}
/// Where a [`Socket`] is on an **instruction**.
#[derive(Clone, Debug, PartialEq, Eq, Hash, Deserialize, Serialize)]
pub struct SocketIdx(pub u16);
}
#[derive(Clone, Debug, PartialEq, Eq, Hash, Deserialize, Serialize)]
pub struct Span {
range: RangeInclusive<usize>,
}
// TODO: this desperately belongs into its own file, also id should get its own file
/// Semi-human-{read,writ}able [`GraphIr`] with far less useful types.
///
/// **Do not use this if you want to programatically construct IR.**
/// Instead, directly use [`GraphIr`].
#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize)]
pub struct SemiHumanGraphIr {
/// See [`GraphIr::instructions`], just that a simple number is used for the ID instead
/// of a proper span.
instructions: Map<usize, instruction::Kind>,
/// See [`GraphIr::edges`]. RON wants you to type the set as a list.
edges: Map<SemiHumanSocket, Set<SemiHumanSocket>>,
}
#[derive(Clone, Debug, PartialEq, Eq, Hash, Deserialize, Serialize)]
pub struct SemiHumanSocket {
/// ID of the instruction this socket is on.
on: usize,
idx: u16,
}
impl From<SemiHumanSocket> for id::Socket {
fn from(source: SemiHumanSocket) -> Self {
Self {
belongs_to: (id::Instruction(Span {
range: source.on..=source.on,
})),
idx: id::SocketIdx(source.idx),
}
}
}
impl From<SemiHumanGraphIr> for GraphIr {
fn from(source: SemiHumanGraphIr) -> Self {
let edges = source.edges.clone();
Self {
instructions: source
.instructions
.into_iter()
.map(|(id, kind)| (id::Instruction(Span { range: id..=id }), kind))
.collect(),
edges: type_edges(source.edges),
// same as above, but also reverse the mapping
rev_edges: reverse_and_type_edges(edges),
}
}
}
fn type_edges(
edges: Map<SemiHumanSocket, Set<SemiHumanSocket>>,
) -> Map<id::Output, Set<id::Input>> {
edges
.into_iter()
.map(|(output, inputs)| {
let output = id::Output(output.into());
let inputs = inputs.into_iter().map(Into::into).map(id::Input).collect();
(output, inputs)
})
.collect()
}
fn reverse_and_type_edges(
edges: Map<SemiHumanSocket, Set<SemiHumanSocket>>,
) -> Map<id::Input, Set<id::Output>> {
edges
.into_iter()
.fold(Map::new(), |mut rev_edges, (output, inputs)| {
let output = id::Output(output.into());
for input in inputs {
let input = id::Input(input.into());
rev_edges.entry(input).or_default().insert(output.clone());
}
rev_edges
})
}