254 lines
7.9 KiB
Rust
254 lines
7.9 KiB
Rust
|
use std::{cell::Cell, fmt, marker::PhantomData, mem};
|
||
|
|
||
|
use enumset::{EnumSet, EnumSetType};
|
||
|
use rowan::{GreenNode, GreenNodeBuilder};
|
||
|
|
||
|
use crate::parser::event::NodeKind;
|
||
|
|
||
|
use self::{event::Event, input::Input, marker::Marker};
|
||
|
pub use {error::SyntaxError, output::ParserOutput};
|
||
|
|
||
|
pub mod error;
|
||
|
mod event;
|
||
|
mod input;
|
||
|
pub mod marker;
|
||
|
pub mod output;
|
||
|
|
||
|
/// this is used to define some required SyntaxKinds like an EOF token or an error token
|
||
|
pub trait SyntaxElement
|
||
|
where
|
||
|
Self: EnumSetType
|
||
|
+ Into<rowan::SyntaxKind>
|
||
|
+ From<rowan::SyntaxKind>
|
||
|
+ fmt::Debug
|
||
|
+ Clone
|
||
|
+ PartialEq
|
||
|
+ Eq,
|
||
|
{
|
||
|
/// EOF value. This will be used by the rest of the parser library to represent an EOF.
|
||
|
const SYNTAX_EOF: Self;
|
||
|
/// Error value. This will be used as a placeholder for associated respective errors.
|
||
|
const SYNTAX_ERROR: Self;
|
||
|
const SYNTAX_ROOT: Self;
|
||
|
}
|
||
|
|
||
|
pub struct Parser<'src, SyntaxKind: SyntaxElement, SyntaxErr: SyntaxError> {
|
||
|
input: Input<'src, SyntaxKind>,
|
||
|
pos: usize,
|
||
|
events: Vec<Event<SyntaxKind, SyntaxErr>>,
|
||
|
step_limit: u32,
|
||
|
steps: Cell<u32>,
|
||
|
}
|
||
|
|
||
|
impl<'src, 'toks, SyntaxKind: SyntaxElement, SyntaxErr: SyntaxError>
|
||
|
Parser<'src, SyntaxKind, SyntaxErr>
|
||
|
{
|
||
|
/// eat all meaningless tokens at the end of the file.
|
||
|
pub fn eat_succeeding_meaningless(&mut self) {
|
||
|
self.push_ev(Event::Eat {
|
||
|
count: self.input.meaningless_tail_len(),
|
||
|
});
|
||
|
}
|
||
|
|
||
|
/// Get token from current position of the parser.
|
||
|
pub fn current(&self) -> SyntaxKind {
|
||
|
self.step();
|
||
|
self.input.kind(self.pos)
|
||
|
}
|
||
|
|
||
|
pub fn start(&mut self, name: &str) -> Marker {
|
||
|
let pos = self.events.len();
|
||
|
self.push_ev(Event::tombstone());
|
||
|
Marker::new(pos, name)
|
||
|
}
|
||
|
|
||
|
/// Eat next token if it's of kind `kind` and return `true`.
|
||
|
/// Otherwise, `false`.
|
||
|
pub fn eat(&mut self, kind: SyntaxKind) -> bool {
|
||
|
if !self.at(kind) {
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
self.do_bump();
|
||
|
true
|
||
|
}
|
||
|
|
||
|
pub fn do_bump(&mut self) {
|
||
|
self.push_ev(Event::Eat {
|
||
|
count: self.input.preceding_meaningless(self.pos),
|
||
|
});
|
||
|
self.pos += 1;
|
||
|
}
|
||
|
|
||
|
/// Check if the token at the current parser position is of `kind`
|
||
|
pub fn at(&self, kind: SyntaxKind) -> bool {
|
||
|
self.nth_at(0, kind)
|
||
|
}
|
||
|
|
||
|
/// Check if the token that is `n` ahead is of `kind`
|
||
|
pub fn nth_at(&self, n: usize, kind: SyntaxKind) -> bool {
|
||
|
self.nth(n) == kind
|
||
|
}
|
||
|
|
||
|
pub fn nth(&self, n: usize) -> SyntaxKind {
|
||
|
self.step();
|
||
|
self.input.kind(self.pos + n)
|
||
|
}
|
||
|
|
||
|
fn push_ev(&mut self, event: Event<SyntaxKind, SyntaxErr>) {
|
||
|
self.events.push(event);
|
||
|
}
|
||
|
|
||
|
fn step(&self) {
|
||
|
let steps = self.steps.get();
|
||
|
assert!(steps <= self.step_limit, "the parser seems stuck.");
|
||
|
self.steps.set(steps + 1);
|
||
|
}
|
||
|
|
||
|
pub fn finish(self) -> ParserOutput<SyntaxKind, SyntaxErr> {
|
||
|
let Self {
|
||
|
input,
|
||
|
pos,
|
||
|
mut events,
|
||
|
step_limit,
|
||
|
steps,
|
||
|
} = self;
|
||
|
let (mut raw_toks, meaningless_tokens) = input.dissolve();
|
||
|
let mut builder = GreenNodeBuilder::new();
|
||
|
// TODO: document what the hell a forward parent is
|
||
|
let mut fw_parents = Vec::new();
|
||
|
let mut errors: Vec<SyntaxErr> = Vec::new();
|
||
|
raw_toks.reverse();
|
||
|
|
||
|
// always have an implicit root node to avoid [`GreenNodeBuilder::finish()`] panicking due to multiple root elements.
|
||
|
builder.start_node(SyntaxKind::SYNTAX_ROOT.into());
|
||
|
|
||
|
for i in 0..events.len() {
|
||
|
match mem::replace(&mut events[i], Event::tombstone()) {
|
||
|
Event::Start {
|
||
|
kind,
|
||
|
forward_parent,
|
||
|
} => {
|
||
|
if kind == NodeKind::Tombstone && forward_parent.is_none() {
|
||
|
continue;
|
||
|
}
|
||
|
|
||
|
// resolving forward parents
|
||
|
// temporarily jump around with the parser index and replace them with tombstones
|
||
|
fw_parents.push(kind);
|
||
|
let mut idx = i;
|
||
|
let mut fp = forward_parent;
|
||
|
while let Some(fwd) = fp {
|
||
|
idx += fwd as usize;
|
||
|
fp = match mem::replace(&mut events[idx], Event::tombstone()) {
|
||
|
Event::Start {
|
||
|
kind,
|
||
|
forward_parent,
|
||
|
} => {
|
||
|
fw_parents.push(kind);
|
||
|
forward_parent
|
||
|
}
|
||
|
_ => unreachable!(),
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// clear semantically meaningless tokens before the new tree node for aesthetic reasons
|
||
|
while raw_toks
|
||
|
.last()
|
||
|
.is_some_and(|v| meaningless_tokens.contains(v.0))
|
||
|
{
|
||
|
// update first next Eat event
|
||
|
match events.iter_mut().find(|ev| matches!(ev, Event::Eat { .. })) {
|
||
|
Some(Event::Eat { count }) => *count -= 1,
|
||
|
_ => unreachable!(),
|
||
|
}
|
||
|
|
||
|
// put whitespace into lst
|
||
|
let (tok, text) = raw_toks.pop().unwrap();
|
||
|
builder.token(tok.into(), text);
|
||
|
}
|
||
|
|
||
|
// insert forward parents into the tree in correct order
|
||
|
for kind in fw_parents.drain(..).rev() {
|
||
|
match kind {
|
||
|
NodeKind::Syntax(kind) => builder.start_node(kind.into()),
|
||
|
NodeKind::Error(err) => {
|
||
|
errors.push(err);
|
||
|
builder.start_node(SyntaxKind::SYNTAX_ERROR.into())
|
||
|
}
|
||
|
_ => {}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
Event::Finish => builder.finish_node(),
|
||
|
Event::Eat { count } => (0..count).for_each(|_| {
|
||
|
let (tok, text) = raw_toks.pop().unwrap();
|
||
|
builder.token(tok.into(), text);
|
||
|
}),
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// finish SYNTAX_ROOT
|
||
|
builder.finish_node();
|
||
|
|
||
|
ParserOutput {
|
||
|
green_node: builder.finish(),
|
||
|
errors,
|
||
|
_syntax_kind: PhantomData::<SyntaxKind>,
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
pub struct ParserBuilder<
|
||
|
'src,
|
||
|
SyntaxKind: SyntaxElement,
|
||
|
// SyntaxErr: SyntaxError,
|
||
|
> {
|
||
|
raw_toks: Vec<(SyntaxKind, &'src str)>,
|
||
|
meaningless_token_kinds: EnumSet<SyntaxKind>,
|
||
|
step_limit: u32,
|
||
|
}
|
||
|
|
||
|
impl<'src, SyntaxKind: SyntaxElement> ParserBuilder<'src, SyntaxKind> {
|
||
|
pub fn new(raw_toks: Vec<(SyntaxKind, &'src str)>) -> Self {
|
||
|
Self {
|
||
|
raw_toks,
|
||
|
meaningless_token_kinds: EnumSet::new(),
|
||
|
step_limit: 4096,
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/// Sets the parser step limit.
|
||
|
/// Defaults to 4096
|
||
|
pub fn step_limit(mut self, new: u32) -> Self {
|
||
|
self.step_limit = new;
|
||
|
self
|
||
|
}
|
||
|
|
||
|
pub fn add_meaningless(mut self, kind: SyntaxKind) -> Self {
|
||
|
self.meaningless_token_kinds.insert(kind);
|
||
|
self
|
||
|
}
|
||
|
|
||
|
pub fn add_meaningless_many(mut self, kind: Vec<SyntaxKind>) -> Self {
|
||
|
self.meaningless_token_kinds
|
||
|
.insert_all(kind.into_iter().collect());
|
||
|
self
|
||
|
}
|
||
|
|
||
|
pub fn build<SyntaxErr: SyntaxError>(self) -> Parser<'src, SyntaxKind, SyntaxErr> {
|
||
|
let Self {
|
||
|
raw_toks,
|
||
|
meaningless_token_kinds,
|
||
|
step_limit,
|
||
|
} = self;
|
||
|
Parser {
|
||
|
input: Input::new(raw_toks, Some(meaningless_token_kinds)),
|
||
|
pos: 0,
|
||
|
events: Vec::new(),
|
||
|
step_limit,
|
||
|
steps: Cell::new(0),
|
||
|
}
|
||
|
}
|
||
|
}
|