From fed8cf2466c651ed5114e3a81a15b4b7b8930806 Mon Sep 17 00:00:00 2001 From: Schrottkatze Date: Mon, 21 Oct 2024 15:15:06 +0200 Subject: [PATCH 1/3] pawarser: require/derive `PartialEq` + `Eq` for NodeKind and its contents --- crates/json-pawarser/src/syntax_error.rs | 2 +- crates/pawarser/src/parser.rs | 2 +- crates/pawarser/src/parser/error.rs | 2 +- crates/pawarser/src/parser/event.rs | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/crates/json-pawarser/src/syntax_error.rs b/crates/json-pawarser/src/syntax_error.rs index 6ff9067..84377d7 100644 --- a/crates/json-pawarser/src/syntax_error.rs +++ b/crates/json-pawarser/src/syntax_error.rs @@ -1,6 +1,6 @@ use crate::syntax_kind::SyntaxKind; -#[derive(Clone)] +#[derive(Clone, PartialEq, Eq)] pub enum SyntaxError { DisallowedKeyType(SyntaxKind), MemberMissingValue, diff --git a/crates/pawarser/src/parser.rs b/crates/pawarser/src/parser.rs index 91d44b8..bc4724f 100644 --- a/crates/pawarser/src/parser.rs +++ b/crates/pawarser/src/parser.rs @@ -13,7 +13,7 @@ pub mod marker; /// this is used to define some required SyntaxKinds like an EOF token or an error token pub trait SyntaxElement where - Self: EnumSetType + Into + Clone, + Self: EnumSetType + Into + Clone + PartialEq + Eq, { /// EOF value. This will be used by the rest of the parser library to represent an EOF. const EOF: Self; diff --git a/crates/pawarser/src/parser/error.rs b/crates/pawarser/src/parser/error.rs index ba52ff0..07c033d 100644 --- a/crates/pawarser/src/parser/error.rs +++ b/crates/pawarser/src/parser/error.rs @@ -2,6 +2,6 @@ // TODO: constrain that conversion to `NodeKind::Error` is enforced to be possible pub trait SyntaxError where - Self: Clone, + Self: Clone + PartialEq + Eq, { } diff --git a/crates/pawarser/src/parser/event.rs b/crates/pawarser/src/parser/event.rs index 3cd0ef5..1b71d8e 100644 --- a/crates/pawarser/src/parser/event.rs +++ b/crates/pawarser/src/parser/event.rs @@ -22,7 +22,7 @@ impl Event { Tombstone, Syntax(SyntaxKind), From 9b1f6a1dc11b55b161e8d6c2d7cd32d393d28815 Mon Sep 17 00:00:00 2001 From: Schrottkatze Date: Mon, 21 Oct 2024 15:15:40 +0200 Subject: [PATCH 2/3] pawarser: Implement `CompletedMarker::precede` --- crates/pawarser/src/parser/marker.rs | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/crates/pawarser/src/parser/marker.rs b/crates/pawarser/src/parser/marker.rs index 2d3fc5a..d03e358 100644 --- a/crates/pawarser/src/parser/marker.rs +++ b/crates/pawarser/src/parser/marker.rs @@ -69,6 +69,17 @@ pub struct CompletedMarker { impl CompletedMarker { pub fn precede(self, p: &mut Parser, name: &str) -> Marker { - todo!() + let new_pos = p.start(name); + + match &mut p.events[self.pos] { + Event::Start { forward_parent, .. } => { + // point forward parent of the node this marker completed to the new node + // will later be used to make the new node a parent of the current node. + *forward_parent = Some(new_pos.pos - self.pos) + } + _ => unreachable!(), + } + + new_pos } } From ac75978c01b22264889d06f7e54633055dc2ff36 Mon Sep 17 00:00:00 2001 From: Schrottkatze Date: Mon, 21 Oct 2024 15:16:36 +0200 Subject: [PATCH 3/3] pawarser: Implement `Parser::finish` --- crates/pawarser/src/parser.rs | 89 ++++++++++++++++++++++++++++- crates/pawarser/src/parser/input.rs | 9 +++ 2 files changed, 97 insertions(+), 1 deletion(-) diff --git a/crates/pawarser/src/parser.rs b/crates/pawarser/src/parser.rs index bc4724f..ccbb5b2 100644 --- a/crates/pawarser/src/parser.rs +++ b/crates/pawarser/src/parser.rs @@ -1,6 +1,9 @@ -use std::cell::Cell; +use std::{cell::Cell, marker::PhantomData, mem}; use enumset::{EnumSet, EnumSetType}; +use rowan::{GreenNode, GreenNodeBuilder}; + +use crate::parser::event::NodeKind; use self::{event::Event, input::Input, marker::Marker}; pub use error::SyntaxError; @@ -93,6 +96,90 @@ impl<'src, 'toks, SyntaxKind: SyntaxElement, SyntaxErr: SyntaxError> assert!(steps <= self.step_limit, "the parser seems stuck."); self.steps.set(steps + 1); } + + pub fn finish( + Self { + input, + pos, + mut events, + step_limit, + steps, + }: Self, + ) -> ParserOutput { + let (mut raw_toks, meaningless_tokens) = input.dissolve(); + let mut builder = GreenNodeBuilder::new(); + // TODO: document what the hell a forward parent is + let mut fw_parents = Vec::new(); + let mut errors: Vec = Vec::new(); + raw_toks.reverse(); + + for i in 0..events.len() { + match mem::replace(&mut events[i], Event::tombstone()) { + Event::Start { + kind, + forward_parent, + } => { + if kind == NodeKind::Tombstone && forward_parent.is_none() { + continue; + } + + // resolving forward parents + // temporarily jump around with the parser index and replace them with tombstones + fw_parents.push(kind); + let mut idx = i; + let mut fp = forward_parent; + while let Some(fwd) = fp { + idx += fwd as usize; + fp = match mem::replace(&mut events[idx], Event::tombstone()) { + Event::Start { + kind, + forward_parent, + } => { + fw_parents.push(kind); + forward_parent + } + _ => unreachable!(), + } + } + + // clear semantically meaningless tokens before the new tree node for aesthetic reasons + while raw_toks + .last() + .is_some_and(|v| meaningless_tokens.contains(v.0)) + { + // update first next Eat event + match events.iter_mut().find(|ev| matches!(ev, Event::Eat { .. })) { + Some(Event::Eat { count }) => *count -= 1, + _ => unreachable!(), + } + + // put whitespace into lst + let (tok, text) = raw_toks.pop().unwrap(); + builder.token(tok.into(), text); + } + + // insert forward parents into the tree in correct order + for kind in fw_parents.drain(..).rev() { + match kind { + NodeKind::Syntax(kind) => builder.start_node(kind.into()), + NodeKind::Error(err) => { + errors.push(err); + builder.start_node(SyntaxKind::ERROR.into()) + } + _ => {} + } + } + } + Event::Finish => builder.finish_node(), + Event::Eat { count } => (0..count).for_each(|_| { + let (tok, text) = raw_toks.pop().unwrap(); + builder.token(tok.into(), text); + }), + } + } + + todo!() + } } pub struct ParserBuilder< diff --git a/crates/pawarser/src/parser/input.rs b/crates/pawarser/src/parser/input.rs index 0a5ff60..a20d73d 100644 --- a/crates/pawarser/src/parser/input.rs +++ b/crates/pawarser/src/parser/input.rs @@ -55,4 +55,13 @@ impl<'src, SyntaxKind: SyntaxElement> Input<'src, SyntaxKind> { pub fn meaningless_tail_len(&self) -> usize { self.raw.len() - (self.meaningful_toks.last().unwrap() + 1) } + + pub fn dissolve(self) -> (Vec<(SyntaxKind, &'src str)>, EnumSet) { + let Self { + raw, + semantically_meaningless, + .. + } = self; + (raw, semantically_meaningless) + } }