forked from katzen-cafe/iowo
pawarser(setup): continue working on the bare basics
This commit is contained in:
parent
34ddaacb58
commit
21bcf62ea5
5 changed files with 196 additions and 26 deletions
|
@ -2,18 +2,25 @@ use std::cell::Cell;
|
||||||
|
|
||||||
use enumset::{EnumSet, EnumSetType};
|
use enumset::{EnumSet, EnumSetType};
|
||||||
|
|
||||||
use self::{error::SyntaxError, event::Event, input::Input};
|
use self::{error::SyntaxError, event::Event, input::Input, marker::Marker};
|
||||||
|
|
||||||
mod error;
|
mod error;
|
||||||
mod event;
|
mod event;
|
||||||
pub mod input;
|
mod input;
|
||||||
|
mod marker;
|
||||||
|
|
||||||
pub struct Parser<
|
/// this is used to define some required SyntaxKinds like an EOF token or an error token
|
||||||
'src,
|
pub trait SyntaxElement
|
||||||
'toks,
|
where
|
||||||
SyntaxKind: EnumSetType + Into<rowan::SyntaxKind>,
|
Self: EnumSetType + Into<rowan::SyntaxKind> + Clone,
|
||||||
SyntaxErr: SyntaxError,
|
{
|
||||||
> {
|
/// EOF value. This will be used by the rest of the parser library to represent an EOF.
|
||||||
|
const EOF: Self;
|
||||||
|
/// Error value. This will be used as a placeholder for associated respective errors.
|
||||||
|
const ERROR: Self;
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct Parser<'src, 'toks, SyntaxKind: SyntaxElement, SyntaxErr: SyntaxError> {
|
||||||
input: Input<'src, 'toks, SyntaxKind>,
|
input: Input<'src, 'toks, SyntaxKind>,
|
||||||
pos: usize,
|
pos: usize,
|
||||||
events: Vec<Event<SyntaxKind, SyntaxErr>>,
|
events: Vec<Event<SyntaxKind, SyntaxErr>>,
|
||||||
|
@ -21,10 +28,76 @@ pub struct Parser<
|
||||||
steps: Cell<u32>,
|
steps: Cell<u32>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl<'src, 'toks, SyntaxKind: SyntaxElement, SyntaxErr: SyntaxError>
|
||||||
|
Parser<'src, 'toks, SyntaxKind, SyntaxErr>
|
||||||
|
{
|
||||||
|
/// eat all meaningless tokens at the end of the file.
|
||||||
|
pub fn eat_succeeding_meaningless(&mut self) {
|
||||||
|
self.push_ev(Event::Eat {
|
||||||
|
count: self.input.meaningless_tail_len(),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get token from current position of the parser.
|
||||||
|
pub fn current(&self) -> SyntaxKind {
|
||||||
|
self.step();
|
||||||
|
self.input.kind(self.pos)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn start(&mut self, name: &str) -> Marker {
|
||||||
|
let pos = self.events.len();
|
||||||
|
self.push_ev(Event::tombstone());
|
||||||
|
Marker::new(pos, name)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Eat next token if it's of kind `kind` and return `true`.
|
||||||
|
/// Otherwise, `false`.
|
||||||
|
pub fn eat(&mut self, kind: SyntaxKind) -> bool {
|
||||||
|
if !self.at(kind) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
self.do_bump();
|
||||||
|
true
|
||||||
|
}
|
||||||
|
|
||||||
|
fn do_bump(&mut self) {
|
||||||
|
self.push_ev(Event::Eat {
|
||||||
|
count: self.input.preceding_meaningless(self.pos),
|
||||||
|
});
|
||||||
|
self.pos += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Check if the token at the current parser position is of `kind`
|
||||||
|
pub fn at(&self, kind: SyntaxKind) -> bool {
|
||||||
|
self.nth_at(0, kind)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Check if the token that is `n` ahead is of `kind`
|
||||||
|
pub fn nth_at(&self, n: usize, kind: SyntaxKind) -> bool {
|
||||||
|
self.nth(n) == kind
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn nth(&self, n: usize) -> SyntaxKind {
|
||||||
|
self.step();
|
||||||
|
self.input.kind(self.pos + n)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn push_ev(&mut self, event: Event<SyntaxKind, SyntaxErr>) {
|
||||||
|
self.events.push(event);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn step(&self) {
|
||||||
|
let steps = self.steps.get();
|
||||||
|
assert!(steps <= self.step_limit, "the parser seems stuck.");
|
||||||
|
self.steps.set(steps + 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pub struct ParserBuilder<
|
pub struct ParserBuilder<
|
||||||
'src,
|
'src,
|
||||||
'toks,
|
'toks,
|
||||||
SyntaxKind: EnumSetType + Into<rowan::SyntaxKind>,
|
SyntaxKind: SyntaxElement,
|
||||||
// SyntaxErr: SyntaxError,
|
// SyntaxErr: SyntaxError,
|
||||||
> {
|
> {
|
||||||
raw_toks: &'toks Vec<(SyntaxKind, &'src str)>,
|
raw_toks: &'toks Vec<(SyntaxKind, &'src str)>,
|
||||||
|
@ -32,9 +105,7 @@ pub struct ParserBuilder<
|
||||||
step_limit: u32,
|
step_limit: u32,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'src, 'toks, SyntaxKind: EnumSetType + Into<rowan::SyntaxKind>>
|
impl<'src, 'toks, SyntaxKind: SyntaxElement> ParserBuilder<'src, 'toks, SyntaxKind> {
|
||||||
ParserBuilder<'src, 'toks, SyntaxKind>
|
|
||||||
{
|
|
||||||
pub fn new(raw_toks: &'toks Vec<(SyntaxKind, &'src str)>) -> Self {
|
pub fn new(raw_toks: &'toks Vec<(SyntaxKind, &'src str)>) -> Self {
|
||||||
Self {
|
Self {
|
||||||
raw_toks,
|
raw_toks,
|
||||||
|
|
|
@ -1,3 +1,7 @@
|
||||||
/// A marker trait... for now!
|
/// A marker trait... for now!
|
||||||
// TODO: constrain that conversion to `NodeKind::Error` is enforced to be possible
|
// TODO: constrain that conversion to `NodeKind::Error` is enforced to be possible
|
||||||
pub trait SyntaxError {}
|
pub trait SyntaxError
|
||||||
|
where
|
||||||
|
Self: Clone,
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
|
@ -1,8 +1,8 @@
|
||||||
use enumset::EnumSetType;
|
use enumset::EnumSetType;
|
||||||
|
|
||||||
use super::error::SyntaxError;
|
use super::{error::SyntaxError, SyntaxElement};
|
||||||
|
|
||||||
pub enum Event<SyntaxKind: EnumSetType + Into<rowan::SyntaxKind>, SyntaxErr: SyntaxError> {
|
pub enum Event<SyntaxKind: SyntaxElement, SyntaxErr: SyntaxError> {
|
||||||
Start {
|
Start {
|
||||||
kind: NodeKind<SyntaxKind, SyntaxErr>,
|
kind: NodeKind<SyntaxKind, SyntaxErr>,
|
||||||
forward_parent: Option<usize>,
|
forward_parent: Option<usize>,
|
||||||
|
@ -13,9 +13,7 @@ pub enum Event<SyntaxKind: EnumSetType + Into<rowan::SyntaxKind>, SyntaxErr: Syn
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<SyntaxKind: EnumSetType + Into<rowan::SyntaxKind>, SyntaxErr: SyntaxError>
|
impl<SyntaxKind: SyntaxElement, SyntaxErr: SyntaxError> Event<SyntaxKind, SyntaxErr> {
|
||||||
Event<SyntaxKind, SyntaxErr>
|
|
||||||
{
|
|
||||||
pub fn tombstone() -> Self {
|
pub fn tombstone() -> Self {
|
||||||
Self::Start {
|
Self::Start {
|
||||||
kind: NodeKind::Tombstone,
|
kind: NodeKind::Tombstone,
|
||||||
|
@ -24,15 +22,14 @@ impl<SyntaxKind: EnumSetType + Into<rowan::SyntaxKind>, SyntaxErr: SyntaxError>
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub enum NodeKind<SyntaxKind: EnumSetType + Into<rowan::SyntaxKind>, SyntaxErr: SyntaxError> {
|
#[derive(Clone)]
|
||||||
|
pub enum NodeKind<SyntaxKind: SyntaxElement, SyntaxErr: SyntaxError> {
|
||||||
Tombstone,
|
Tombstone,
|
||||||
Syntax(SyntaxKind),
|
Syntax(SyntaxKind),
|
||||||
Error(SyntaxErr),
|
Error(SyntaxErr),
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<SyntaxKind: EnumSetType + Into<rowan::SyntaxKind>, SyntaxErr: SyntaxError>
|
impl<SyntaxKind: SyntaxElement, SyntaxErr: SyntaxError> NodeKind<SyntaxKind, SyntaxErr> {
|
||||||
NodeKind<SyntaxKind, SyntaxErr>
|
|
||||||
{
|
|
||||||
pub fn is_tombstone(&self) -> bool {
|
pub fn is_tombstone(&self) -> bool {
|
||||||
matches!(self, Self::Tombstone)
|
matches!(self, Self::Tombstone)
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,6 +1,8 @@
|
||||||
use enumset::{EnumSet, EnumSetType};
|
use enumset::{EnumSet, EnumSetType};
|
||||||
|
|
||||||
pub struct Input<'src, 'toks, SyntaxKind: EnumSetType + Into<rowan::SyntaxKind>> {
|
use super::SyntaxElement;
|
||||||
|
|
||||||
|
pub struct Input<'src, 'toks, SyntaxKind: SyntaxElement> {
|
||||||
raw: &'toks Vec<(SyntaxKind, &'src str)>,
|
raw: &'toks Vec<(SyntaxKind, &'src str)>,
|
||||||
// enumset of meaningless tokens
|
// enumset of meaningless tokens
|
||||||
semantically_meaningless: EnumSet<SyntaxKind>,
|
semantically_meaningless: EnumSet<SyntaxKind>,
|
||||||
|
@ -8,9 +10,7 @@ pub struct Input<'src, 'toks, SyntaxKind: EnumSetType + Into<rowan::SyntaxKind>>
|
||||||
meaningful_toks: Vec<usize>,
|
meaningful_toks: Vec<usize>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'src, 'toks, SyntaxKind: EnumSetType + Into<rowan::SyntaxKind>>
|
impl<'src, 'toks, SyntaxKind: SyntaxElement> Input<'src, 'toks, SyntaxKind> {
|
||||||
Input<'src, 'toks, SyntaxKind>
|
|
||||||
{
|
|
||||||
pub fn new(
|
pub fn new(
|
||||||
raw_toks: &'toks Vec<(SyntaxKind, &'src str)>,
|
raw_toks: &'toks Vec<(SyntaxKind, &'src str)>,
|
||||||
meaningless: Option<EnumSet<SyntaxKind>>,
|
meaningless: Option<EnumSet<SyntaxKind>>,
|
||||||
|
@ -31,4 +31,28 @@ impl<'src, 'toks, SyntaxKind: EnumSetType + Into<rowan::SyntaxKind>>
|
||||||
meaningful_toks,
|
meaningful_toks,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn kind(&self, idx: usize) -> SyntaxKind {
|
||||||
|
let Some(meaningful_idx) = self.meaningful_toks.get(idx) else {
|
||||||
|
return SyntaxKind::EOF;
|
||||||
|
};
|
||||||
|
|
||||||
|
self.raw.get(*meaningful_idx).unwrap().0
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn preceding_meaningless(&self, idx: usize) -> usize {
|
||||||
|
assert!(self.meaningful_toks.len() > idx);
|
||||||
|
|
||||||
|
if idx == 0 {
|
||||||
|
// maybe should be `self.meaningful_toks[idx]` instead??
|
||||||
|
1
|
||||||
|
} else {
|
||||||
|
self.meaningful_toks[idx] - self.meaningful_toks[idx - 1]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// get the count of meaningless tokens at the end of the file.
|
||||||
|
pub fn meaningless_tail_len(&self) -> usize {
|
||||||
|
self.raw.len() - (self.meaningful_toks.last().unwrap() + 1)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
74
crates/pawarser/src/parser/marker.rs
Normal file
74
crates/pawarser/src/parser/marker.rs
Normal file
|
@ -0,0 +1,74 @@
|
||||||
|
use drop_bomb::DropBomb;
|
||||||
|
use rowan::SyntaxKind;
|
||||||
|
|
||||||
|
use super::{
|
||||||
|
error::SyntaxError,
|
||||||
|
event::{Event, NodeKind},
|
||||||
|
Parser, SyntaxElement,
|
||||||
|
};
|
||||||
|
|
||||||
|
pub struct Marker {
|
||||||
|
pos: usize,
|
||||||
|
bomb: DropBomb,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Marker {
|
||||||
|
pub(super) fn new(pos: usize, name: &str) -> Self {
|
||||||
|
Self {
|
||||||
|
pos,
|
||||||
|
bomb: DropBomb::new(format!("Marker {name} must be completed or abandoned.")),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn close_node<SyntaxKind: SyntaxElement, SyntaxErr: SyntaxError>(
|
||||||
|
mut self,
|
||||||
|
p: &mut Parser<SyntaxKind, SyntaxErr>,
|
||||||
|
kind: NodeKind<SyntaxKind, SyntaxErr>,
|
||||||
|
) -> CompletedMarker<SyntaxKind, SyntaxErr> {
|
||||||
|
self.bomb.defuse();
|
||||||
|
|
||||||
|
match &mut p.events[self.pos] {
|
||||||
|
Event::Start { kind: slot, .. } => *slot = kind.clone(),
|
||||||
|
_ => unreachable!(),
|
||||||
|
}
|
||||||
|
|
||||||
|
p.push_ev(Event::Finish);
|
||||||
|
CompletedMarker {
|
||||||
|
pos: self.pos,
|
||||||
|
kind,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn complete<SyntaxKind: SyntaxElement, SyntaxErr: SyntaxError>(
|
||||||
|
self,
|
||||||
|
p: &mut Parser<SyntaxKind, SyntaxErr>,
|
||||||
|
kind: SyntaxKind,
|
||||||
|
) -> CompletedMarker<SyntaxKind, SyntaxErr> {
|
||||||
|
self.close_node(p, NodeKind::Syntax(kind))
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn error<SyntaxKind: SyntaxElement, SyntaxErr: SyntaxError>(
|
||||||
|
self,
|
||||||
|
p: &mut Parser<SyntaxKind, SyntaxErr>,
|
||||||
|
kind: SyntaxErr,
|
||||||
|
) -> CompletedMarker<SyntaxKind, SyntaxErr> {
|
||||||
|
self.close_node(p, NodeKind::Error(kind))
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn abandon<SyntaxKind: SyntaxElement, SyntaxErr: SyntaxError>(
|
||||||
|
mut self,
|
||||||
|
p: &mut Parser<SyntaxKind, SyntaxErr>,
|
||||||
|
) {
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct CompletedMarker<SyntaxKind: SyntaxElement, SyntaxErr: SyntaxError> {
|
||||||
|
pos: usize,
|
||||||
|
kind: NodeKind<SyntaxKind, SyntaxErr>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<SyntaxKind: SyntaxElement, SyntaxErr: SyntaxError> CompletedMarker<SyntaxKind, SyntaxErr> {
|
||||||
|
pub fn precede(self, p: &mut Parser<SyntaxKind, SyntaxErr>, name: &str) -> Marker {
|
||||||
|
todo!()
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in a new issue