use std::{cell::Cell, fmt, marker::PhantomData, mem}; use enumset::{EnumSet, EnumSetType}; use rowan::{GreenNode, GreenNodeBuilder}; use crate::parser::event::NodeKind; use self::{event::Event, input::Input, marker::Marker}; pub use {error::SyntaxError, output::ParserOutput}; pub mod error; mod event; mod input; pub mod marker; pub mod output; /// this is used to define some required SyntaxKinds like an EOF token or an error token pub trait SyntaxElement where Self: EnumSetType + Into + From + fmt::Debug + Clone + PartialEq + Eq, { /// EOF value. This will be used by the rest of the parser library to represent an EOF. const SYNTAX_EOF: Self; /// Error value. This will be used as a placeholder for associated respective errors. const SYNTAX_ERROR: Self; const SYNTAX_ROOT: Self; } pub struct Parser<'src, SyntaxKind: SyntaxElement, SyntaxErr: SyntaxError> { input: Input<'src, SyntaxKind>, pos: usize, events: Vec>, step_limit: u32, steps: Cell, } impl<'src, 'toks, SyntaxKind: SyntaxElement, SyntaxErr: SyntaxError> Parser<'src, SyntaxKind, SyntaxErr> { /// eat all meaningless tokens at the end of the file. pub fn eat_succeeding_meaningless(&mut self) { self.push_ev(Event::Eat { count: self.input.meaningless_tail_len(), }); } /// Get token from current position of the parser. pub fn current(&self) -> SyntaxKind { self.step(); self.input.kind(self.pos) } pub fn start(&mut self, name: &str) -> Marker { let pos = self.events.len(); self.push_ev(Event::tombstone()); Marker::new(pos, name) } /// Eat next token if it's of kind `kind` and return `true`. /// Otherwise, `false`. pub fn eat(&mut self, kind: SyntaxKind) -> bool { if !self.at(kind) { return false; } self.do_bump(); true } pub fn do_bump(&mut self) { self.push_ev(Event::Eat { count: self.input.preceding_meaningless(self.pos), }); self.pos += 1; } /// Check if the token at the current parser position is of `kind` pub fn at(&self, kind: SyntaxKind) -> bool { self.nth_at(0, kind) } /// Check if the token that is `n` ahead is of `kind` pub fn nth_at(&self, n: usize, kind: SyntaxKind) -> bool { self.nth(n) == kind } pub fn nth(&self, n: usize) -> SyntaxKind { self.step(); self.input.kind(self.pos + n) } fn push_ev(&mut self, event: Event) { self.events.push(event); } fn step(&self) { let steps = self.steps.get(); assert!(steps <= self.step_limit, "the parser seems stuck."); self.steps.set(steps + 1); } pub fn finish(self) -> ParserOutput { let Self { input, pos, mut events, step_limit, steps, } = self; let (mut raw_toks, meaningless_tokens) = input.dissolve(); let mut builder = GreenNodeBuilder::new(); // TODO: document what the hell a forward parent is let mut fw_parents = Vec::new(); let mut errors: Vec = Vec::new(); raw_toks.reverse(); // always have an implicit root node to avoid [`GreenNodeBuilder::finish()`] panicking due to multiple root elements. builder.start_node(SyntaxKind::SYNTAX_ROOT.into()); for i in 0..events.len() { match mem::replace(&mut events[i], Event::tombstone()) { Event::Start { kind, forward_parent, } => { if kind == NodeKind::Tombstone && forward_parent.is_none() { continue; } // resolving forward parents // temporarily jump around with the parser index and replace them with tombstones fw_parents.push(kind); let mut idx = i; let mut fp = forward_parent; while let Some(fwd) = fp { idx += fwd as usize; fp = match mem::replace(&mut events[idx], Event::tombstone()) { Event::Start { kind, forward_parent, } => { fw_parents.push(kind); forward_parent } _ => unreachable!(), } } // clear semantically meaningless tokens before the new tree node for aesthetic reasons while raw_toks .last() .is_some_and(|v| meaningless_tokens.contains(v.0)) { // update first next Eat event match events.iter_mut().find(|ev| matches!(ev, Event::Eat { .. })) { Some(Event::Eat { count }) => *count -= 1, _ => unreachable!(), } // put whitespace into lst let (tok, text) = raw_toks.pop().unwrap(); builder.token(tok.into(), text); } // insert forward parents into the tree in correct order for kind in fw_parents.drain(..).rev() { match kind { NodeKind::Syntax(kind) => builder.start_node(kind.into()), NodeKind::Error(err) => { errors.push(err); builder.start_node(SyntaxKind::SYNTAX_ERROR.into()) } _ => {} } } } Event::Finish => builder.finish_node(), Event::Eat { count } => (0..count).for_each(|_| { let (tok, text) = raw_toks.pop().unwrap(); builder.token(tok.into(), text); }), } } // finish SYNTAX_ROOT builder.finish_node(); ParserOutput { green_node: builder.finish(), errors, _syntax_kind: PhantomData::, } } } pub struct ParserBuilder< 'src, SyntaxKind: SyntaxElement, // SyntaxErr: SyntaxError, > { raw_toks: Vec<(SyntaxKind, &'src str)>, meaningless_token_kinds: EnumSet, step_limit: u32, } impl<'src, SyntaxKind: SyntaxElement> ParserBuilder<'src, SyntaxKind> { pub fn new(raw_toks: Vec<(SyntaxKind, &'src str)>) -> Self { Self { raw_toks, meaningless_token_kinds: EnumSet::new(), step_limit: 4096, } } /// Sets the parser step limit. /// Defaults to 4096 pub fn step_limit(mut self, new: u32) -> Self { self.step_limit = new; self } pub fn add_meaningless(mut self, kind: SyntaxKind) -> Self { self.meaningless_token_kinds.insert(kind); self } pub fn add_meaningless_many(mut self, kind: Vec) -> Self { self.meaningless_token_kinds .insert_all(kind.into_iter().collect()); self } pub fn build(self) -> Parser<'src, SyntaxKind, SyntaxErr> { let Self { raw_toks, meaningless_token_kinds, step_limit, } = self; Parser { input: Input::new(raw_toks, Some(meaningless_token_kinds)), pos: 0, events: Vec::new(), step_limit, steps: Cell::new(0), } } }