use std::{ collections::{BTreeMap, BTreeSet}, ops::RangeInclusive, }; use instruction::SocketCount; use serde::{Deserialize, Serialize}; pub mod id; pub mod instruction; pub mod semi_human; pub type Map = std::collections::BTreeMap; pub type Set = std::collections::BTreeSet; /// Gives you a super well typed graph IR for a given human-readable repr. /// /// Look at [`semi_human::GraphIr`] and the test files in the repo at `testfiles/` /// to see what the RON should look like. /// No, we don't want you to write out [`GraphIr`] in full by hand. /// That's something for the machines to do. /// /// # Errors /// /// Returns an error if the parsed source is not a valid human-readable graph IR. pub fn from_ron(source: &str) -> ron::error::SpannedResult { let human_repr: semi_human::GraphIr = ron::from_str(source)?; Ok(human_repr.into()) } /// The toplevel representation of a whole pipeline. /// /// Pipelines may not be fully linear. They may branch out and recombine later on. /// As such, the representation for them which is currently used is a /// [**D**irected **A**cyclic **G**raph](https://en.wikipedia.org/wiki/Directed_acyclic_graph). /// /// For those who are already familiar with graphs, a DAG is one, except that: /// /// - It is **directed**: Edges have a direction they point to. /// In this case, edges point from the outputs of streamers to inputs of consumers. /// - It is **acyclic**: Those directed edges may not form loops. /// In other words, if one follows edges only in their direction, it must be impossible /// to come back to an already visited node. /// /// Here, if an edge points from _A_ to _B_ (`A --> B`), /// then _A_ is called a **dependency** of _B_, /// and _B_ is called a **dependent** of _A_. /// /// The DAG also enables another neat operation: /// [Topological sorting](https://en.wikipedia.org/wiki/Topological_sorting). /// This allows to put the entire graph into a linear list, /// where it's guaranteed that once a vertex is visited, /// all dependencies of it will have been visited already as well. /// /// The representation used here in specific is a bit more complicated, /// since **instructions** directly aren't just connected to one another, /// but their **sockets** are instead. /// /// So the vertices of the DAG are the **sockets** /// (which are either [`id::Input`] or [`id::Output`] depending on the direction), /// and each **socket** in turn belongs to an **instruction**. #[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize)] pub struct GraphIr { /// "Backbone" storage of all **instruction** IDs to /// what **kind of instruction** they are. instructions: Map, /// How the data flows forward. **Dependencies** map to **dependents** here. edges: Map>, /// How the data flows backward. **Dependents** map to **dependencies** here. rev_edges: Map, } impl GraphIr { // TODO: this function, but actually the whole module, screams for tests /// Returns the instruction corresponding to the given ID. /// Returns [`None`] if there is no such instruction in this graph IR. /// /// Theoretically this could be fixed easily at the expense of some memory /// by just incrementing and storing some global counter, /// however, at the moment there's no compelling reason /// to actually have multiple [`GraphIr`]s at one point in time. /// Open an issue if that poses a problem for you. #[must_use] pub fn resolve<'ir>(&'ir self, id: &id::Instruction) -> Option> { let (id, kind) = self.instructions.get_key_value(id)?; // just try each slot and see if it's connected // very crude, but it works for a proof of concept let SocketCount { inputs, outputs } = kind.socket_count(); let socket = |id: &id::Instruction, idx| id::Socket { belongs_to: id.clone(), // impossible since the length is limited to a u16 already #[allow(clippy::cast_possible_truncation)] idx: id::SocketIdx(idx as u16), }; let mut inputs_from = vec![None; inputs.into()]; for (idx, slot) in inputs_from.iter_mut().enumerate() { let input = id::Input(socket(id, idx)); *slot = self.rev_edges.get(&input); } let mut outputs_to = vec![None; outputs.into()]; for (idx, slot) in outputs_to.iter_mut().enumerate() { let output = id::Output(socket(id, idx)); *slot = self.edges.get(&output); } Some(Instruction { id, kind, inputs_from, outputs_to, }) } /// Returns the instruction this input belongs to. /// /// The same caveats as for [`GraphIr::resolve`] apply. #[must_use] pub fn owner_of_input<'ir>(&'ir self, input: &id::Input) -> Option> { self.resolve(&input.socket().belongs_to) } /// Returns the instruction this output belongs to. /// /// The same caveats as for [`GraphIr::resolve`] apply. #[must_use] pub fn owner_of_output<'ir>(&'ir self, output: &id::Output) -> Option> { self.resolve(&output.socket().belongs_to) } #[must_use] pub fn topological_sort(&self) -> Vec { // count how many incoming edges each vertex has // chances are the BTreeMap is overkill let incoming_counts: BTreeMap<_, _> = self .rev_edges .iter() .map(|(input, _)| (self.owner_of_input(input), 1)) .collect(); todo!() } } /// A full instruction in context, with its inputs and outputs. #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] pub struct Instruction<'ir> { pub id: &'ir id::Instruction, pub kind: &'ir instruction::Kind, // can't have these two public since then a user might corrupt their length inputs_from: Vec>, outputs_to: Vec>>, } impl<'ir> Instruction<'ir> { /// Where this instruction gets its inputs from. /// /// [`None`] means that this input is unfilled, /// and must be filled before the instruction can be ran. #[must_use] pub fn inputs_from(&self) -> &[Option<&'ir id::Output>] { &self.inputs_from } /// To whom outputs are sent. [`None`] means that this output is unused. #[must_use] pub fn outputs_to(&self) -> &[Option<&'ir BTreeSet>] { &self.outputs_to } } /// Some part referred to in source code. #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)] pub struct Span { // would love to use an actual [`std::ops::RangeInclusive`], but those don't implement // `PartialOrd` and `Ord` unfortunately /// At which byte this span starts, inclusively. pub from: usize, /// At which byte this span ends, inclusively. pub to: usize, } impl From> for Span { fn from(range: RangeInclusive) -> Self { Self { from: *range.start(), to: *range.end(), } } }