lang: parsing to events now
This commit is contained in:
parent
f7b61f9e0e
commit
1711d17fa6
8 changed files with 176 additions and 383 deletions
|
@ -7,7 +7,7 @@ edition = "2021"
|
|||
|
||||
[dependencies]
|
||||
logos = "0.14"
|
||||
chumsky = {version= "1.0.0-alpha.7", features=["label"]}
|
||||
chumsky = {version= "1.0.0-alpha.7", features=["label", "extension"]}
|
||||
petgraph = { workspace = true}
|
||||
indexmap = "2.2.6"
|
||||
clap = { version = "4", features = ["derive"] }
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
#![feature(type_alias_impl_trait)]
|
||||
pub mod err_reporting;
|
||||
pub mod parser;
|
||||
pub mod tokens;
|
||||
|
|
|
@ -5,7 +5,7 @@ use lang::{
|
|||
err_reporting::ErrorCollector,
|
||||
parser::ast::lossless::{
|
||||
lex,
|
||||
parser::{self, parser_to_events::to_events},
|
||||
parser::{self, parse},
|
||||
},
|
||||
};
|
||||
|
||||
|
@ -20,7 +20,7 @@ fn main() {
|
|||
let n = args.file.clone();
|
||||
let f = fs::read_to_string(n.clone()).expect("failed to read file");
|
||||
println!("toks: {:?}", lex::lex(&f));
|
||||
println!("evs: {:?}", to_events(&f));
|
||||
println!("parse res: {:?}", parse(&f));
|
||||
|
||||
// let parse_res = parser::parse(&f);
|
||||
// println!("parse: {:?}", parse_res);
|
||||
|
|
|
@ -10,7 +10,6 @@ pub fn lex(src: &str) -> Vec<(SyntaxKind, &str)> {
|
|||
r.push((tok_res.unwrap_or(SyntaxKind::LEX_ERR), lex.slice()))
|
||||
}
|
||||
|
||||
r.reverse();
|
||||
r
|
||||
}
|
||||
|
||||
|
|
|
@ -9,24 +9,29 @@ use crate::parser::{
|
|||
Span,
|
||||
};
|
||||
|
||||
use self::parser_to_events::{to_events, Event};
|
||||
|
||||
use super::lex::{self, SyntaxKind};
|
||||
|
||||
pub mod parser_to_events {
|
||||
use chumsky::Parser;
|
||||
use chumsky::prelude::*;
|
||||
|
||||
use crate::parser::ast::lossless::lex::SyntaxKind::{self, *};
|
||||
use crate::parser::ast::lossless::lex::{
|
||||
self,
|
||||
SyntaxKind::{self, *},
|
||||
};
|
||||
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
pub(super) enum Event {
|
||||
StartNode,
|
||||
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
|
||||
pub enum Event {
|
||||
StartNode(SyntaxKind),
|
||||
StartErr(SyntaxError),
|
||||
EatToken,
|
||||
FinishNode,
|
||||
FinishErr,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
enum SyntaxError {
|
||||
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
|
||||
pub enum SyntaxError {
|
||||
Expected(SyntaxKind),
|
||||
AttrExpectedValue,
|
||||
/// guessed if there's a newline and attr on next line without comma
|
||||
|
@ -34,14 +39,107 @@ pub mod parser_to_events {
|
|||
ExpectedCommaBetweenAttrs,
|
||||
}
|
||||
|
||||
pub fn to_events(src: &str) -> Vec<Event> {
|
||||
let mut tokens = lex::lex(src);
|
||||
parser().parse(tokens)
|
||||
pub fn to_events(tokens: &[(SyntaxKind, &str)]) -> Vec<Event> {
|
||||
let mut only_toks: Vec<SyntaxKind> = tokens.iter().map(|(t, _)| *t).collect();
|
||||
let res = parser().parse(&only_toks);
|
||||
res.unwrap()
|
||||
}
|
||||
|
||||
pub fn parser() -> impl Parser<'static, SyntaxKind, Vec<Event>> {
|
||||
let whitespace = just(WHITESPACE).or(NEWLINE).repeated().collect::<Vec<_>>();
|
||||
whitespace
|
||||
macro_rules! padded {
|
||||
($parser:expr) => {{
|
||||
let ws = one_of([WHITESPACE, NEWLINE])
|
||||
.to(Event::EatToken)
|
||||
.repeated()
|
||||
.collect::<Vec<Event>>();
|
||||
ws.then($parser)
|
||||
.then(ws)
|
||||
.map(|((mut before, mut c), mut after)| {
|
||||
before.append(&mut c);
|
||||
before.append(&mut after);
|
||||
before
|
||||
})
|
||||
}};
|
||||
}
|
||||
|
||||
pub fn parser<'toks>() -> impl Parser<'toks, &'toks [SyntaxKind], Vec<Event>> {
|
||||
let ws = one_of([WHITESPACE, NEWLINE])
|
||||
.to(Event::EatToken)
|
||||
.repeated()
|
||||
.collect::<Vec<Event>>();
|
||||
// let ws_without_newlines = just(WHITESPACE)
|
||||
// .to(Event::EatToken)
|
||||
// .repeated()
|
||||
// .collect::<Vec<Event>>();
|
||||
let parenthesized = |c| {
|
||||
just(L_PAREN)
|
||||
.to(vec![Event::EatToken])
|
||||
.then(c)
|
||||
.then(just(R_PAREN).to(vec![Event::EatToken]))
|
||||
.map(|((mut before, mut c), mut after)| {
|
||||
before.append(&mut c);
|
||||
before.append(&mut after);
|
||||
before
|
||||
})
|
||||
};
|
||||
|
||||
let expr = recursive(|expr| {
|
||||
let lit = one_of([INT_NUM, FLOAT_NUM, STRING]).to(vec![
|
||||
Event::StartNode(EXPR),
|
||||
Event::EatToken,
|
||||
Event::FinishNode,
|
||||
]);
|
||||
|
||||
let atom = lit.clone().or(parenthesized(expr));
|
||||
|
||||
let ident = just(IDENT).to(vec![Event::EatToken]);
|
||||
let instr_name = ident
|
||||
.clone()
|
||||
.map(|mut v| {
|
||||
v.insert(0, Event::StartNode(INSTR_NAME));
|
||||
v
|
||||
})
|
||||
.foldl(
|
||||
ws.then(ident).repeated(),
|
||||
|mut ident, (mut ws, mut next)| {
|
||||
ident.append(&mut ws);
|
||||
ident.append(&mut next);
|
||||
ident
|
||||
},
|
||||
)
|
||||
.map(|mut v| {
|
||||
v.push(Event::FinishNode);
|
||||
v
|
||||
});
|
||||
let instr = padded!(instr_name)
|
||||
.then(
|
||||
atom.clone()
|
||||
.map(|mut v| {
|
||||
v.insert(0, Event::StartNode(INSTR_PARAMS));
|
||||
v
|
||||
})
|
||||
.foldl(
|
||||
ws.then(atom.clone()).repeated(),
|
||||
|mut cur, (mut ws, mut next)| {
|
||||
cur.append(&mut ws);
|
||||
cur.append(&mut next);
|
||||
cur
|
||||
},
|
||||
)
|
||||
.map(|mut v| {
|
||||
v.push(Event::FinishNode);
|
||||
v
|
||||
}),
|
||||
)
|
||||
.map(|(mut name, mut params)| {
|
||||
name.insert(0, Event::StartNode(INSTR));
|
||||
name.append(&mut params);
|
||||
name.push(Event::FinishNode);
|
||||
name
|
||||
});
|
||||
padded!(instr.or(lit).or(atom))
|
||||
});
|
||||
expr
|
||||
// .map(|(lit, mut ev)| lit.append(&mut ev));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -99,7 +197,7 @@ enum SyntaxError {
|
|||
}
|
||||
|
||||
pub fn parse(src: &str) -> Parse {
|
||||
let mut tokens = lex::lex(src);
|
||||
let tokens = lex::lex(src);
|
||||
Parser {
|
||||
tokens,
|
||||
builder: GreenNodeBuilder::new(),
|
||||
|
@ -110,12 +208,19 @@ pub fn parse(src: &str) -> Parse {
|
|||
|
||||
impl Parser<'_> {
|
||||
fn parse(mut self) -> Parse {
|
||||
self.start_node(ROOT);
|
||||
let evs = to_events(&self.tokens);
|
||||
self.builder.start_node(ROOT.into());
|
||||
|
||||
match self.expr(None) {
|
||||
expr::ExprRes::Ok => (),
|
||||
expr::ExprRes::Eof => (),
|
||||
expr::ExprRes::NoExpr => todo!(),
|
||||
self.tokens.reverse();
|
||||
|
||||
for ev in evs {
|
||||
match ev {
|
||||
Event::StartNode(kind) => self.builder.start_node(kind.into()),
|
||||
Event::StartErr(SyntaxError) => todo!(),
|
||||
Event::EatToken => self.bump(),
|
||||
Event::FinishNode => self.builder.finish_node(),
|
||||
Event::FinishErr => todo!(),
|
||||
}
|
||||
}
|
||||
|
||||
self.builder.finish_node();
|
||||
|
@ -124,13 +229,6 @@ impl Parser<'_> {
|
|||
}
|
||||
}
|
||||
|
||||
fn start_node(&mut self, kind: SyntaxKind) {
|
||||
self.builder.start_node(kind.into());
|
||||
}
|
||||
fn finish_node(&mut self) {
|
||||
self.builder.finish_node();
|
||||
}
|
||||
|
||||
/// Advance one token, adding it to the current branch of the tree builder.
|
||||
fn bump(&mut self) {
|
||||
let (kind, text) = self.tokens.pop().unwrap();
|
||||
|
@ -143,7 +241,7 @@ impl Parser<'_> {
|
|||
}
|
||||
fn syntax_err_by_checkpoint(&mut self, checkpoint: Checkpoint, err: SyntaxError) {
|
||||
self.builder.start_node_at(checkpoint, PARSE_ERR.into());
|
||||
self.finish_node();
|
||||
self.builder.finish_node();
|
||||
self.errors.push(err);
|
||||
}
|
||||
fn expected(&mut self, expected: SyntaxKind) {
|
||||
|
@ -158,313 +256,4 @@ impl Parser<'_> {
|
|||
.get(self.tokens.len() - 2)
|
||||
.map(|(kind, _)| *kind)
|
||||
}
|
||||
fn skip_ws(&mut self) {
|
||||
while self.current() == Some(WHITESPACE) || self.current() == Some(NEWLINE) {
|
||||
self.bump()
|
||||
}
|
||||
}
|
||||
fn skip_ws_without_newlines(&mut self) {
|
||||
while self.current() == Some(WHITESPACE) {
|
||||
self.bump()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
mod expr {
|
||||
use rowan::Checkpoint;
|
||||
|
||||
use super::{attrset::AttrsetRes, instr::NodeRes, Parser};
|
||||
use crate::parser::{ast::lossless::lex::SyntaxKind::*, Span};
|
||||
impl Parser<'_> {
|
||||
pub(super) fn expr(&mut self, start: Option<Checkpoint>) -> ExprRes {
|
||||
self.skip_ws();
|
||||
let start = start.unwrap_or_else(|| self.builder.checkpoint());
|
||||
match self.current() {
|
||||
Some(IDENT) => {
|
||||
let expr_res = match self.instr() {
|
||||
NodeRes::Ok => ExprRes::Ok,
|
||||
NodeRes::Eof => ExprRes::Eof,
|
||||
};
|
||||
self.builder.start_node_at(start, EXPR.into());
|
||||
self.finish_node();
|
||||
expr_res
|
||||
}
|
||||
Some(_) => self.atom(Some(start)),
|
||||
None => ExprRes::Eof,
|
||||
}
|
||||
}
|
||||
|
||||
pub(super) fn atom(&mut self, start: Option<Checkpoint>) -> ExprRes {
|
||||
self.skip_ws();
|
||||
let start = start.unwrap_or_else(|| self.builder.checkpoint());
|
||||
match self.current() {
|
||||
Some(INT_NUM | FLOAT_NUM | STRING) => {
|
||||
self.bump();
|
||||
self.builder.start_node_at(start, EXPR.into());
|
||||
self.finish_node();
|
||||
ExprRes::Ok
|
||||
}
|
||||
Some(L_CURLY) => match self.attrset(start) {
|
||||
AttrsetRes::Ok => ExprRes::Ok,
|
||||
AttrsetRes::Eof => ExprRes::Eof,
|
||||
},
|
||||
Some(L_PAREN) => {
|
||||
self.builder.start_node_at(start, PARENTHESIZED_EXPR.into());
|
||||
self.bump();
|
||||
self.expr(None);
|
||||
self.skip_ws();
|
||||
match self.current() {
|
||||
Some(R_PAREN) => ExprRes::Ok,
|
||||
Some(_) => todo!(),
|
||||
None => ExprRes::Eof,
|
||||
}
|
||||
}
|
||||
Some(_) => ExprRes::NoExpr,
|
||||
None => ExprRes::Eof,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub enum ExprRes {
|
||||
Ok,
|
||||
Eof,
|
||||
/// isnt an expression
|
||||
NoExpr,
|
||||
}
|
||||
}
|
||||
|
||||
mod attrset {
|
||||
use chumsky::container::Container;
|
||||
use rowan::Checkpoint;
|
||||
|
||||
use super::{expr::ExprRes, instr::NodeRes, Parser};
|
||||
use crate::parser::{
|
||||
ast::lossless::{lex::SyntaxKind::*, parser::SyntaxError},
|
||||
Span,
|
||||
};
|
||||
impl Parser<'_> {
|
||||
pub(super) fn attrset(&mut self, checkpoint: Checkpoint) -> AttrsetRes {
|
||||
assert_eq!(self.current(), Some(L_CURLY));
|
||||
self.bump();
|
||||
self.skip_ws();
|
||||
match self.current() {
|
||||
Some(R_CURLY) => {
|
||||
self.builder.start_node_at(checkpoint, ATTR_SET.into());
|
||||
self.bump();
|
||||
self.finish_node();
|
||||
AttrsetRes::Ok
|
||||
}
|
||||
Some(_) => {
|
||||
self.builder.start_node_at(checkpoint, ATTR_SET.into());
|
||||
let res = match self.attrs() {
|
||||
AttrRes::Eof => AttrsetRes::Eof,
|
||||
AttrRes::RCurly | AttrRes::Ok => {
|
||||
println!("curr: {:?}", self.current());
|
||||
AttrsetRes::Ok
|
||||
}
|
||||
};
|
||||
self.finish_node();
|
||||
res
|
||||
}
|
||||
None => AttrsetRes::Eof,
|
||||
}
|
||||
// self.start_node(ATTR);
|
||||
}
|
||||
|
||||
fn attrs(&mut self) -> AttrRes {
|
||||
let mut res = AttrRes::Ok;
|
||||
|
||||
while res == AttrRes::Ok {
|
||||
println!("it: {:?}", self.tokens.last());
|
||||
match self.attr() {
|
||||
AttrRes::Ok => {
|
||||
self.skip_ws_without_newlines();
|
||||
println!(
|
||||
"a: {:?}, {:?}",
|
||||
self.tokens.last(),
|
||||
self.tokens.get(self.tokens.len() - 2)
|
||||
);
|
||||
println!("errs: {:?}", self.errors);
|
||||
res = AttrRes::Ok;
|
||||
let checkpoint_previous_end = self.builder.checkpoint();
|
||||
res = match self.current() {
|
||||
Some(COMMA) => {
|
||||
self.bump();
|
||||
AttrRes::Ok
|
||||
}
|
||||
Some(R_CURLY) => {
|
||||
self.bump();
|
||||
res = AttrRes::Ok;
|
||||
break;
|
||||
}
|
||||
Some(NEWLINE) => {
|
||||
self.skip_ws();
|
||||
println!(
|
||||
"b: {:?}, {:?}",
|
||||
self.tokens.last(),
|
||||
self.tokens.get(self.tokens.len() - 2)
|
||||
);
|
||||
match self.current() {
|
||||
Some(COMMA) => {
|
||||
self.bump();
|
||||
AttrRes::Ok
|
||||
}
|
||||
Some(R_CURLY) => {
|
||||
self.bump();
|
||||
res = AttrRes::Ok;
|
||||
break;
|
||||
}
|
||||
Some(IDENT) => {
|
||||
println!("wtf");
|
||||
self.syntax_err_by_checkpoint(
|
||||
checkpoint_previous_end,
|
||||
SyntaxError::ExpectedCommaBetweenAttrs,
|
||||
);
|
||||
// self.syntax_err(SyntaxError::ExpectedCommaBetweenAttrs);
|
||||
AttrRes::Ok
|
||||
}
|
||||
Some(_) => {
|
||||
self.bump();
|
||||
AttrRes::Ok
|
||||
}
|
||||
None => {
|
||||
res = AttrRes::Eof;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
Some(_) => {
|
||||
self.bump();
|
||||
println!(
|
||||
"c: {:?}, {:?}",
|
||||
self.tokens.last(),
|
||||
self.tokens.get(self.tokens.len() - 2)
|
||||
);
|
||||
AttrRes::Ok
|
||||
}
|
||||
None => {
|
||||
res = AttrRes::Eof;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
AttrRes::Eof => {
|
||||
res = AttrRes::Eof;
|
||||
break;
|
||||
}
|
||||
AttrRes::RCurly => {
|
||||
res = AttrRes::RCurly;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
println!("toks_left: {:?}", self.tokens);
|
||||
res
|
||||
}
|
||||
|
||||
fn attr(&mut self) -> AttrRes {
|
||||
self.skip_ws();
|
||||
self.start_node(ATTR);
|
||||
self.start_node(ATTR_NAME);
|
||||
match self.current() {
|
||||
Some(IDENT) => self.bump(),
|
||||
Some(R_CURLY) => return AttrRes::Ok,
|
||||
Some(_) => self.expected(IDENT),
|
||||
None => return AttrRes::Eof,
|
||||
}
|
||||
self.finish_node();
|
||||
self.skip_ws();
|
||||
match self.current() {
|
||||
Some(COLON) => self.bump(),
|
||||
Some(R_CURLY) => {
|
||||
self.expected(COLON);
|
||||
return AttrRes::RCurly;
|
||||
}
|
||||
Some(_) => self.expected(COLON),
|
||||
None => return AttrRes::Eof,
|
||||
}
|
||||
self.skip_ws();
|
||||
self.start_node(ATTR_VALUE);
|
||||
match self.expr(None) {
|
||||
ExprRes::Ok => self.bump(),
|
||||
ExprRes::Eof => return AttrRes::Eof,
|
||||
ExprRes::NoExpr => match self.current() {
|
||||
Some(COMMA) => self.syntax_err(SyntaxError::AttrExpectedValue),
|
||||
Some(R_CURLY) => {
|
||||
self.syntax_err(SyntaxError::AttrExpectedValue);
|
||||
return AttrRes::RCurly;
|
||||
}
|
||||
Some(_) => self.expected(EXPR),
|
||||
None => unreachable!(),
|
||||
},
|
||||
}
|
||||
self.finish_node();
|
||||
self.finish_node();
|
||||
AttrRes::Ok
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(PartialEq, Eq)]
|
||||
pub enum AttrsetRes {
|
||||
Ok,
|
||||
Eof,
|
||||
}
|
||||
|
||||
#[derive(PartialEq, Eq)]
|
||||
enum AttrRes {
|
||||
Ok,
|
||||
Eof,
|
||||
RCurly,
|
||||
}
|
||||
}
|
||||
|
||||
mod instr {
|
||||
use super::Parser;
|
||||
use crate::parser::{
|
||||
ast::lossless::{lex::SyntaxKind::*, parser::expr::ExprRes},
|
||||
Span,
|
||||
};
|
||||
|
||||
impl Parser<'_> {
|
||||
pub(super) fn instr(&mut self) -> NodeRes {
|
||||
assert_eq!(self.current(), Some(IDENT));
|
||||
self.skip_ws();
|
||||
self.start_node(INSTR);
|
||||
self.instr_name();
|
||||
|
||||
// used to count positionals
|
||||
let mut i = 0;
|
||||
let params_checkpoint = self.builder.checkpoint();
|
||||
loop {
|
||||
match self.expr(None) {
|
||||
ExprRes::Ok => {
|
||||
i += 1;
|
||||
continue;
|
||||
}
|
||||
ExprRes::NoExpr | ExprRes::Eof => break,
|
||||
}
|
||||
}
|
||||
if i >= 1 {
|
||||
self.builder
|
||||
.start_node_at(params_checkpoint, INSTR_PARAMS.into());
|
||||
self.finish_node();
|
||||
}
|
||||
self.finish_node();
|
||||
NodeRes::Ok
|
||||
}
|
||||
|
||||
fn instr_name(&mut self) {
|
||||
self.start_node(INSTR_NAME);
|
||||
while self.current() == Some(IDENT) {
|
||||
self.bump();
|
||||
self.skip_ws_without_newlines();
|
||||
}
|
||||
self.finish_node();
|
||||
}
|
||||
}
|
||||
|
||||
pub(super) enum NodeRes {
|
||||
Ok,
|
||||
Eof,
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue