init i guess

This commit is contained in:
Schrottkatze 2025-05-12 15:50:27 +02:00
parent c90532830e
commit 01de2f385a
Signed by: schrottkatze
SSH key fingerprint: SHA256:FPOYVeBy3QP20FEM42uWF1Wa/Qhlk+L3S2+Wuau/Auo
21 changed files with 1315 additions and 0 deletions

View file

@ -0,0 +1,12 @@
[package]
name = "lopal_core"
version = "0.1.0"
edition = "2021"
[dependencies]
rowan = "0.15.15"
drop_bomb = "0.1.5"
enumset = "1.1.3"
[lints]
workspace = true

View file

@ -0,0 +1,8 @@
#![feature(iter_collect_into)]
pub mod parser;
pub use parser::{
error::SyntaxError,
marker::{CompletedMarker, Marker},
Parser, SyntaxElement,
};

View file

@ -0,0 +1,253 @@
use std::{cell::Cell, fmt, marker::PhantomData, mem};
use enumset::{EnumSet, EnumSetType};
use rowan::{GreenNode, GreenNodeBuilder};
use crate::parser::event::NodeKind;
use self::{event::Event, input::Input, marker::Marker};
pub use {error::SyntaxError, output::ParserOutput};
pub mod error;
mod event;
mod input;
pub mod marker;
pub mod output;
/// this is used to define some required SyntaxKinds like an EOF token or an error token
pub trait SyntaxElement
where
Self: EnumSetType
+ Into<rowan::SyntaxKind>
+ From<rowan::SyntaxKind>
+ fmt::Debug
+ Clone
+ PartialEq
+ Eq,
{
/// EOF value. This will be used by the rest of the parser library to represent an EOF.
const SYNTAX_EOF: Self;
/// Error value. This will be used as a placeholder for associated respective errors.
const SYNTAX_ERROR: Self;
const SYNTAX_ROOT: Self;
}
pub struct Parser<'src, SyntaxKind: SyntaxElement, SyntaxErr: SyntaxError> {
input: Input<'src, SyntaxKind>,
pos: usize,
events: Vec<Event<SyntaxKind, SyntaxErr>>,
step_limit: u32,
steps: Cell<u32>,
}
impl<'src, 'toks, SyntaxKind: SyntaxElement, SyntaxErr: SyntaxError>
Parser<'src, SyntaxKind, SyntaxErr>
{
/// eat all meaningless tokens at the end of the file.
pub fn eat_succeeding_meaningless(&mut self) {
self.push_ev(Event::Eat {
count: self.input.meaningless_tail_len(),
});
}
/// Get token from current position of the parser.
pub fn current(&self) -> SyntaxKind {
self.step();
self.input.kind(self.pos)
}
pub fn start(&mut self, name: &str) -> Marker {
let pos = self.events.len();
self.push_ev(Event::tombstone());
Marker::new(pos, name)
}
/// Eat next token if it's of kind `kind` and return `true`.
/// Otherwise, `false`.
pub fn eat(&mut self, kind: SyntaxKind) -> bool {
if !self.at(kind) {
return false;
}
self.do_bump();
true
}
pub fn do_bump(&mut self) {
self.push_ev(Event::Eat {
count: self.input.preceding_meaningless(self.pos),
});
self.pos += 1;
}
/// Check if the token at the current parser position is of `kind`
pub fn at(&self, kind: SyntaxKind) -> bool {
self.nth_at(0, kind)
}
/// Check if the token that is `n` ahead is of `kind`
pub fn nth_at(&self, n: usize, kind: SyntaxKind) -> bool {
self.nth(n) == kind
}
pub fn nth(&self, n: usize) -> SyntaxKind {
self.step();
self.input.kind(self.pos + n)
}
fn push_ev(&mut self, event: Event<SyntaxKind, SyntaxErr>) {
self.events.push(event);
}
fn step(&self) {
let steps = self.steps.get();
assert!(steps <= self.step_limit, "the parser seems stuck.");
self.steps.set(steps + 1);
}
pub fn finish(self) -> ParserOutput<SyntaxKind, SyntaxErr> {
let Self {
input,
pos,
mut events,
step_limit,
steps,
} = self;
let (mut raw_toks, meaningless_tokens) = input.dissolve();
let mut builder = GreenNodeBuilder::new();
// TODO: document what the hell a forward parent is
let mut fw_parents = Vec::new();
let mut errors: Vec<SyntaxErr> = Vec::new();
raw_toks.reverse();
// always have an implicit root node to avoid [`GreenNodeBuilder::finish()`] panicking due to multiple root elements.
builder.start_node(SyntaxKind::SYNTAX_ROOT.into());
for i in 0..events.len() {
match mem::replace(&mut events[i], Event::tombstone()) {
Event::Start {
kind,
forward_parent,
} => {
if kind == NodeKind::Tombstone && forward_parent.is_none() {
continue;
}
// resolving forward parents
// temporarily jump around with the parser index and replace them with tombstones
fw_parents.push(kind);
let mut idx = i;
let mut fp = forward_parent;
while let Some(fwd) = fp {
idx += fwd as usize;
fp = match mem::replace(&mut events[idx], Event::tombstone()) {
Event::Start {
kind,
forward_parent,
} => {
fw_parents.push(kind);
forward_parent
}
_ => unreachable!(),
}
}
// clear semantically meaningless tokens before the new tree node for aesthetic reasons
while raw_toks
.last()
.is_some_and(|v| meaningless_tokens.contains(v.0))
{
// update first next Eat event
match events.iter_mut().find(|ev| matches!(ev, Event::Eat { .. })) {
Some(Event::Eat { count }) => *count -= 1,
_ => unreachable!(),
}
// put whitespace into lst
let (tok, text) = raw_toks.pop().unwrap();
builder.token(tok.into(), text);
}
// insert forward parents into the tree in correct order
for kind in fw_parents.drain(..).rev() {
match kind {
NodeKind::Syntax(kind) => builder.start_node(kind.into()),
NodeKind::Error(err) => {
errors.push(err);
builder.start_node(SyntaxKind::SYNTAX_ERROR.into())
}
_ => {}
}
}
}
Event::Finish => builder.finish_node(),
Event::Eat { count } => (0..count).for_each(|_| {
let (tok, text) = raw_toks.pop().unwrap();
builder.token(tok.into(), text);
}),
}
}
// finish SYNTAX_ROOT
builder.finish_node();
ParserOutput {
green_node: builder.finish(),
errors,
_syntax_kind: PhantomData::<SyntaxKind>,
}
}
}
pub struct ParserBuilder<
'src,
SyntaxKind: SyntaxElement,
// SyntaxErr: SyntaxError,
> {
raw_toks: Vec<(SyntaxKind, &'src str)>,
meaningless_token_kinds: EnumSet<SyntaxKind>,
step_limit: u32,
}
impl<'src, SyntaxKind: SyntaxElement> ParserBuilder<'src, SyntaxKind> {
pub fn new(raw_toks: Vec<(SyntaxKind, &'src str)>) -> Self {
Self {
raw_toks,
meaningless_token_kinds: EnumSet::new(),
step_limit: 4096,
}
}
/// Sets the parser step limit.
/// Defaults to 4096
pub fn step_limit(mut self, new: u32) -> Self {
self.step_limit = new;
self
}
pub fn add_meaningless(mut self, kind: SyntaxKind) -> Self {
self.meaningless_token_kinds.insert(kind);
self
}
pub fn add_meaningless_many(mut self, kind: Vec<SyntaxKind>) -> Self {
self.meaningless_token_kinds
.insert_all(kind.into_iter().collect());
self
}
pub fn build<SyntaxErr: SyntaxError>(self) -> Parser<'src, SyntaxKind, SyntaxErr> {
let Self {
raw_toks,
meaningless_token_kinds,
step_limit,
} = self;
Parser {
input: Input::new(raw_toks, Some(meaningless_token_kinds)),
pos: 0,
events: Vec::new(),
step_limit,
steps: Cell::new(0),
}
}
}

View file

@ -0,0 +1,9 @@
use std::fmt;
/// A marker trait... for now!
// TODO: constrain that conversion to `NodeKind::Error` is enforced to be possible
pub trait SyntaxError
where
Self: fmt::Debug + Clone + PartialEq + Eq,
{
}

View file

@ -0,0 +1,42 @@
use enumset::EnumSetType;
use super::{error::SyntaxError, SyntaxElement};
pub enum Event<SyntaxKind: SyntaxElement, SyntaxErr: SyntaxError> {
Start {
kind: NodeKind<SyntaxKind, SyntaxErr>,
forward_parent: Option<usize>,
},
Finish,
Eat {
count: usize,
},
}
impl<SyntaxKind: SyntaxElement, SyntaxErr: SyntaxError> Event<SyntaxKind, SyntaxErr> {
pub fn tombstone() -> Self {
Self::Start {
kind: NodeKind::Tombstone,
forward_parent: None,
}
}
}
#[derive(Clone, PartialEq, Eq)]
pub enum NodeKind<SyntaxKind: SyntaxElement, SyntaxErr: SyntaxError> {
Tombstone,
Syntax(SyntaxKind),
Error(SyntaxErr),
}
impl<SyntaxKind: SyntaxElement, SyntaxErr: SyntaxError> NodeKind<SyntaxKind, SyntaxErr> {
pub fn is_tombstone(&self) -> bool {
matches!(self, Self::Tombstone)
}
pub fn is_syntax(&self) -> bool {
matches!(self, Self::Syntax(_))
}
pub fn is_error(&self) -> bool {
matches!(self, Self::Error(_))
}
}

View file

@ -0,0 +1,67 @@
use enumset::{EnumSet, EnumSetType};
use super::SyntaxElement;
pub struct Input<'src, SyntaxKind: SyntaxElement> {
raw: Vec<(SyntaxKind, &'src str)>,
// enumset of meaningless tokens
semantically_meaningless: EnumSet<SyntaxKind>,
// indices of non-meaningless tokens
meaningful_toks: Vec<usize>,
}
impl<'src, SyntaxKind: SyntaxElement> Input<'src, SyntaxKind> {
pub fn new(
raw_toks: Vec<(SyntaxKind, &'src str)>,
meaningless: Option<EnumSet<SyntaxKind>>,
) -> Self {
let mut meaningful_toks = Vec::new();
if let Some(meaningless) = meaningless {
let meaningful_toks = raw_toks
.iter()
.enumerate()
.filter_map(|(i, tok)| (!meaningless.contains(tok.0)).then_some(i))
.collect_into(&mut meaningful_toks);
}
Self {
raw: raw_toks,
semantically_meaningless: meaningless.unwrap_or_default(),
meaningful_toks,
}
}
pub fn kind(&self, idx: usize) -> SyntaxKind {
let Some(meaningful_idx) = self.meaningful_toks.get(idx) else {
return SyntaxKind::SYNTAX_EOF;
};
self.raw.get(*meaningful_idx).unwrap().0
}
pub fn preceding_meaningless(&self, idx: usize) -> usize {
assert!(self.meaningful_toks.len() > idx);
if idx == 0 {
// maybe should be `self.meaningful_toks[idx]` instead??
1
} else {
self.meaningful_toks[idx] - self.meaningful_toks[idx - 1]
}
}
/// get the count of meaningless tokens at the end of the file.
pub fn meaningless_tail_len(&self) -> usize {
self.raw.len() - (self.meaningful_toks.last().unwrap() + 1)
}
pub fn dissolve(self) -> (Vec<(SyntaxKind, &'src str)>, EnumSet<SyntaxKind>) {
let Self {
raw,
semantically_meaningless,
..
} = self;
(raw, semantically_meaningless)
}
}

View file

@ -0,0 +1,97 @@
use drop_bomb::DropBomb;
use rowan::SyntaxKind;
use super::{
error::SyntaxError,
event::{Event, NodeKind},
Parser, SyntaxElement,
};
pub struct Marker {
pos: usize,
bomb: DropBomb,
}
impl Marker {
pub(super) fn new(pos: usize, name: &str) -> Self {
Self {
pos,
bomb: DropBomb::new(format!("Marker {name} must be completed or abandoned.")),
}
}
fn close_node<SyntaxKind: SyntaxElement, SyntaxErr: SyntaxError>(
mut self,
p: &mut Parser<SyntaxKind, SyntaxErr>,
kind: NodeKind<SyntaxKind, SyntaxErr>,
) -> CompletedMarker<SyntaxKind, SyntaxErr> {
self.bomb.defuse();
match &mut p.events[self.pos] {
Event::Start { kind: slot, .. } => *slot = kind.clone(),
_ => unreachable!(),
}
p.push_ev(Event::Finish);
CompletedMarker {
pos: self.pos,
kind,
}
}
pub fn complete<SyntaxKind: SyntaxElement, SyntaxErr: SyntaxError>(
self,
p: &mut Parser<SyntaxKind, SyntaxErr>,
kind: SyntaxKind,
) -> CompletedMarker<SyntaxKind, SyntaxErr> {
self.close_node(p, NodeKind::Syntax(kind))
}
pub fn error<SyntaxKind: SyntaxElement, SyntaxErr: SyntaxError>(
self,
p: &mut Parser<SyntaxKind, SyntaxErr>,
kind: SyntaxErr,
) -> CompletedMarker<SyntaxKind, SyntaxErr> {
self.close_node(p, NodeKind::Error(kind))
}
pub fn abandon<SyntaxKind: SyntaxElement, SyntaxErr: SyntaxError>(
mut self,
p: &mut Parser<SyntaxKind, SyntaxErr>,
) {
self.bomb.defuse();
// clean up empty tombstone event from marker
if self.pos == p.events.len() - 1 {
match p.events.pop() {
Some(Event::Start {
kind: NodeKind::Tombstone,
forward_parent: None,
}) => (),
_ => unreachable!(),
}
}
}
}
pub struct CompletedMarker<SyntaxKind: SyntaxElement, SyntaxErr: SyntaxError> {
pos: usize,
kind: NodeKind<SyntaxKind, SyntaxErr>,
}
impl<SyntaxKind: SyntaxElement, SyntaxErr: SyntaxError> CompletedMarker<SyntaxKind, SyntaxErr> {
pub fn precede(self, p: &mut Parser<SyntaxKind, SyntaxErr>, name: &str) -> Marker {
let new_pos = p.start(name);
match &mut p.events[self.pos] {
Event::Start { forward_parent, .. } => {
// point forward parent of the node this marker completed to the new node
// will later be used to make the new node a parent of the current node.
*forward_parent = Some(new_pos.pos - self.pos)
}
_ => unreachable!(),
}
new_pos
}
}

View file

@ -0,0 +1,73 @@
use std::{fmt, marker::PhantomData};
use rowan::{GreenNode, GreenNodeData, GreenTokenData, NodeOrToken};
use crate::{SyntaxElement, SyntaxError};
pub struct ParserOutput<SyntaxKind: SyntaxElement, SyntaxErr: SyntaxError> {
pub green_node: GreenNode,
pub errors: Vec<SyntaxErr>,
pub(super) _syntax_kind: PhantomData<SyntaxKind>,
}
impl<SyntaxKind: SyntaxElement, SyntaxErr: SyntaxError> std::fmt::Debug
for ParserOutput<SyntaxKind, SyntaxErr>
{
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let mut errs: Vec<&SyntaxErr> = self.errors.iter().collect();
errs.reverse();
debug_print_output::<SyntaxKind, SyntaxErr>(
NodeOrToken::Node(&self.green_node),
f,
0,
&mut errs,
)
}
}
fn debug_print_output<SyntaxKind: SyntaxElement, SyntaxErr: SyntaxError>(
node: NodeOrToken<&GreenNodeData, &GreenTokenData>,
f: &mut std::fmt::Formatter<'_>,
lvl: i32,
errs: &mut Vec<&SyntaxErr>,
) -> std::fmt::Result {
if f.alternate() {
for _ in 0..lvl {
f.write_str(" ")?;
}
}
let maybe_newline = if f.alternate() { "\n" } else { " " };
match node {
NodeOrToken::Node(n) => {
let kind: SyntaxKind = node.kind().into();
if kind != SyntaxKind::SYNTAX_ERROR {
write!(f, "{:?} {{{maybe_newline}", kind)?;
} else {
let err = errs
.pop()
.expect("all error syntax nodes should correspond to an error");
write!(f, "{:?}: {err:?} {{{maybe_newline}", kind)?;
}
for c in n.children() {
debug_print_output::<SyntaxKind, SyntaxErr>(c, f, lvl + 1, errs)?;
}
if f.alternate() {
for _ in 0..lvl {
f.write_str(" ")?;
}
}
write!(f, "}}{maybe_newline}")
}
NodeOrToken::Token(t) => {
write!(
f,
"{:?} {:?};{maybe_newline}",
Into::<SyntaxKind>::into(t.kind()),
t.text()
)
}
}
}