iowo/crates/ir/src/lib.rs

195 lines
7.1 KiB
Rust
Raw Normal View History

use std::{
collections::{BTreeMap, BTreeSet},
ops::RangeInclusive,
};
use instruction::SocketCount;
use serde::{Deserialize, Serialize};
pub mod id;
pub mod instruction;
pub mod semi_human;
pub type Map<K, V> = std::collections::BTreeMap<K, V>;
pub type Set<V> = std::collections::BTreeSet<V>;
/// Gives you a super well typed graph IR for a given human-readable repr.
///
/// Look at [`semi_human::GraphIr`] and the test files in the repo at `testfiles/`
/// to see what the RON should look like.
/// No, we don't want you to write out [`GraphIr`] in full by hand.
/// That's something for the machines to do.
///
/// # Errors
///
/// Returns an error if the parsed source is not a valid human-readable graph IR.
pub fn from_ron(source: &str) -> ron::error::SpannedResult<GraphIr> {
let human_repr: semi_human::GraphIr = ron::from_str(source)?;
Ok(human_repr.into())
}
/// The toplevel representation of a whole pipeline.
///
/// Pipelines may not be fully linear. They may branch out and recombine later on.
/// As such, the representation for them which is currently used is a
/// [**D**irected **A**cyclic **G**raph](https://en.wikipedia.org/wiki/Directed_acyclic_graph).
///
/// For those who are already familiar with graphs, a DAG is one, except that:
///
/// - It is **directed**: Edges have a direction they point to.
/// In this case, edges point from the outputs of streamers to inputs of consumers.
/// - It is **acyclic**: Those directed edges may not form loops.
/// In other words, if one follows edges only in their direction, it must be impossible
/// to come back to an already visited node.
///
/// Here, if an edge points from _A_ to _B_ (`A --> B`),
/// then _A_ is called a **dependency** of _B_,
/// and _B_ is called a **dependent** of _A_.
///
/// The DAG also enables another neat operation:
/// [Topological sorting](https://en.wikipedia.org/wiki/Topological_sorting).
/// This allows to put the entire graph into a linear list,
/// where it's guaranteed that once a vertex is visited,
/// all dependencies of it will have been visited already as well.
///
/// The representation used here in specific is a bit more complicated,
/// since **instructions** directly aren't just connected to one another,
/// but their **sockets** are instead.
///
/// So the vertices of the DAG are the **sockets**
/// (which are either [`id::Input`] or [`id::Output`] depending on the direction),
/// and each **socket** in turn belongs to an **instruction**.
#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize)]
pub struct GraphIr {
/// "Backbone" storage of all **instruction** IDs to
/// what **kind of instruction** they are.
instructions: Map<id::Instruction, instruction::Kind>,
/// How the data flows forward. **Dependencies** map to **dependents** here.
edges: Map<id::Output, Set<id::Input>>,
/// How the data flows backward. **Dependents** map to **dependencies** here.
rev_edges: Map<id::Input, id::Output>,
}
impl GraphIr {
// TODO: this function, but actually the whole module, screams for tests
/// Returns the instruction corresponding to the given ID.
/// Returns [`None`] if there is no such instruction in this graph IR.
///
/// Theoretically this could be fixed easily at the expense of some memory
/// by just incrementing and storing some global counter,
/// however, at the moment there's no compelling reason
/// to actually have multiple [`GraphIr`]s at one point in time.
/// Open an issue if that poses a problem for you.
#[must_use]
pub fn resolve<'ir>(&'ir self, id: &id::Instruction) -> Option<Instruction<'ir>> {
let (id, kind) = self.instructions.get_key_value(id)?;
// just try each slot and see if it's connected
// very crude, but it works for a proof of concept
let SocketCount { inputs, outputs } = kind.socket_count();
let socket = |id: &id::Instruction, idx| id::Socket {
belongs_to: id.clone(),
// impossible since the length is limited to a u16 already
#[allow(clippy::cast_possible_truncation)]
idx: id::SocketIdx(idx as u16),
};
let mut inputs_from = vec![None; inputs.into()];
for (idx, slot) in inputs_from.iter_mut().enumerate() {
let input = id::Input(socket(id, idx));
*slot = self.rev_edges.get(&input);
}
let mut outputs_to = vec![None; outputs.into()];
for (idx, slot) in outputs_to.iter_mut().enumerate() {
let output = id::Output(socket(id, idx));
*slot = self.edges.get(&output);
}
Some(Instruction {
id,
kind,
inputs_from,
outputs_to,
})
}
/// Returns the instruction this input belongs to.
///
/// The same caveats as for [`GraphIr::resolve`] apply.
#[must_use]
pub fn owner_of_input<'ir>(&'ir self, input: &id::Input) -> Option<Instruction<'ir>> {
self.resolve(&input.socket().belongs_to)
}
/// Returns the instruction this output belongs to.
///
/// The same caveats as for [`GraphIr::resolve`] apply.
#[must_use]
pub fn owner_of_output<'ir>(&'ir self, output: &id::Output) -> Option<Instruction<'ir>> {
self.resolve(&output.socket().belongs_to)
}
#[must_use]
pub fn topological_sort(&self) -> Vec<Instruction> {
// count how many incoming edges each vertex has
// chances are the BTreeMap is overkill
let incoming_counts: BTreeMap<_, _> = self
.rev_edges
.iter()
.map(|(input, _)| (self.owner_of_input(input), 1))
.collect();
todo!()
}
}
/// A full instruction in context, with its inputs and outputs.
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub struct Instruction<'ir> {
pub id: &'ir id::Instruction,
pub kind: &'ir instruction::Kind,
// can't have these two public since then a user might corrupt their length
inputs_from: Vec<Option<&'ir id::Output>>,
outputs_to: Vec<Option<&'ir BTreeSet<id::Input>>>,
}
impl<'ir> Instruction<'ir> {
/// Where this instruction gets its inputs from.
///
/// [`None`] means that this input is unfilled,
/// and must be filled before the instruction can be ran.
#[must_use]
pub fn inputs_from(&self) -> &[Option<&'ir id::Output>] {
&self.inputs_from
}
/// To whom outputs are sent. [`None`] means that this output is unused.
#[must_use]
pub fn outputs_to(&self) -> &[Option<&'ir BTreeSet<id::Input>>] {
&self.outputs_to
}
}
/// Some part referred to in source code.
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)]
pub struct Span {
// would love to use an actual [`std::ops::RangeInclusive`], but those don't implement
// `PartialOrd` and `Ord` unfortunately
/// At which byte this span starts, inclusively.
pub from: usize,
/// At which byte this span ends, inclusively.
pub to: usize,
}
impl From<RangeInclusive<usize>> for Span {
fn from(range: RangeInclusive<usize>) -> Self {
Self {
from: *range.start(),
to: *range.end(),
}
}
}