lang: rewrite parser
This commit is contained in:
parent
6d8b79e8f7
commit
381ab45edc
25 changed files with 524 additions and 1161 deletions
|
@ -1,24 +0,0 @@
|
|||
use std::collections::{BTreeMap, HashMap};
|
||||
|
||||
use indexmap::IndexMap;
|
||||
|
||||
use super::Spanned;
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub struct File<'src> {
|
||||
pub decls: IndexMap<Spanned<&'src str>, raw_ast::RawExpression<'src>>,
|
||||
}
|
||||
|
||||
pub mod raw_ast;
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub enum Lit<'src> {
|
||||
// TODO: more bigger better number types
|
||||
Int(i64),
|
||||
Float(f64),
|
||||
String(&'src str),
|
||||
}
|
||||
|
||||
pub mod lossless;
|
||||
|
||||
pub mod ast_tree;
|
|
@ -1,31 +0,0 @@
|
|||
use ego_tree::Tree;
|
||||
|
||||
use crate::parser::Spanned;
|
||||
|
||||
use super::{File, Lit};
|
||||
|
||||
pub struct Ast<'src> {
|
||||
tree: Tree<AstNode<'src>>,
|
||||
}
|
||||
|
||||
struct AstNode<'src> {
|
||||
kind: NodeKind<'src>,
|
||||
}
|
||||
|
||||
enum NodeKind<'src> {
|
||||
Decl,
|
||||
Ident(&'src str),
|
||||
Instr,
|
||||
Expr,
|
||||
MappingPipe,
|
||||
NullPipe,
|
||||
MultiPipe,
|
||||
Var(&'src str),
|
||||
InputVar(&'src str),
|
||||
AttrSet,
|
||||
Attr,
|
||||
Lit(Lit<'src>),
|
||||
Matrix,
|
||||
Dimensions(u16, u16),
|
||||
MatrixRow,
|
||||
}
|
|
@ -1,19 +0,0 @@
|
|||
use self::lex::SyntaxKind;
|
||||
|
||||
pub mod parser;
|
||||
|
||||
pub mod lex;
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
enum Lang {}
|
||||
impl rowan::Language for Lang {
|
||||
type Kind = SyntaxKind;
|
||||
#[allow(unsafe_code)]
|
||||
fn kind_from_raw(raw: rowan::SyntaxKind) -> Self::Kind {
|
||||
assert!(raw.0 <= SyntaxKind::ROOT as u16);
|
||||
unsafe { std::mem::transmute::<u16, SyntaxKind>(raw.0) }
|
||||
}
|
||||
fn kind_to_raw(kind: Self::Kind) -> rowan::SyntaxKind {
|
||||
kind.into()
|
||||
}
|
||||
}
|
|
@ -1,290 +0,0 @@
|
|||
use std::borrow::Borrow;
|
||||
|
||||
use rowan::{
|
||||
Checkpoint, GreenNode, GreenNodeBuilder, GreenNodeData, GreenTokenData, Language, NodeOrToken,
|
||||
};
|
||||
|
||||
use crate::parser::{
|
||||
ast::lossless::{lex::SyntaxKind::*, Lang},
|
||||
Span,
|
||||
};
|
||||
|
||||
use self::parser_to_events::{to_events, Event};
|
||||
|
||||
use super::lex::{self, SyntaxKind};
|
||||
|
||||
pub mod parser_to_events {
|
||||
use chumsky::prelude::*;
|
||||
|
||||
use crate::parser::ast::lossless::lex::{
|
||||
self,
|
||||
SyntaxKind::{self, *},
|
||||
};
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
|
||||
pub enum Event {
|
||||
StartNode(SyntaxKind),
|
||||
StartErr(SyntaxError),
|
||||
EatToken,
|
||||
FinishNode,
|
||||
FinishErr,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
|
||||
pub enum SyntaxError {
|
||||
Expected(SyntaxKind),
|
||||
AttrExpectedValue,
|
||||
/// guessed if there's a newline and attr on next line without comma
|
||||
/// should then suggest comma after attr
|
||||
ExpectedCommaBetweenAttrs,
|
||||
}
|
||||
|
||||
pub fn to_events(tokens: &[(SyntaxKind, &str)]) -> Vec<Event> {
|
||||
let only_toks: Vec<SyntaxKind> = tokens.iter().map(|(t, _)| *t).collect();
|
||||
let res = parser().parse(&only_toks);
|
||||
res.unwrap()
|
||||
}
|
||||
|
||||
macro_rules! padded {
|
||||
($parser:expr) => {{
|
||||
let ws = one_of([WHITESPACE, NEWLINE])
|
||||
.to(Event::EatToken)
|
||||
.repeated()
|
||||
.collect::<Vec<Event>>();
|
||||
ws.then($parser)
|
||||
.then(ws)
|
||||
.map(|((mut before, mut c), mut after)| {
|
||||
before.append(&mut c);
|
||||
before.append(&mut after);
|
||||
before
|
||||
})
|
||||
}};
|
||||
}
|
||||
macro_rules! parenthesized {
|
||||
($parser:expr) => {
|
||||
just(L_PAREN)
|
||||
.to(vec![Event::EatToken])
|
||||
.then($parser)
|
||||
.then(just(R_PAREN).to(vec![Event::EatToken]))
|
||||
.map(|((mut before, mut c), mut after)| {
|
||||
before.insert(0, Event::StartNode(PARENTHESIZED_EXPR));
|
||||
before.append(&mut c);
|
||||
before.append(&mut after);
|
||||
before.push(Event::FinishNode);
|
||||
before
|
||||
})
|
||||
};
|
||||
}
|
||||
|
||||
pub fn parser<'toks>() -> impl Parser<'toks, &'toks [SyntaxKind], Vec<Event>> {
|
||||
let ws = one_of([WHITESPACE, NEWLINE])
|
||||
.to(Event::EatToken)
|
||||
.repeated()
|
||||
.collect::<Vec<Event>>();
|
||||
let ident = just(IDENT).to(vec![Event::EatToken]);
|
||||
|
||||
let expr = recursive(|expr| {
|
||||
let lit = one_of([INT_NUM, FLOAT_NUM, STRING]).to(vec![
|
||||
Event::StartNode(EXPR),
|
||||
Event::EatToken,
|
||||
Event::FinishNode,
|
||||
]);
|
||||
let attrset = just(L_CURLY)
|
||||
.then(
|
||||
padded!(just(IDENT).to(vec![
|
||||
Event::StartNode(ATTR),
|
||||
Event::StartNode(ATTR_NAME),
|
||||
Event::EatToken,
|
||||
Event::FinishNode
|
||||
]))
|
||||
.then(just(COLON))
|
||||
.then(padded!(expr.clone().map(|mut exp: Vec<Event>| {
|
||||
exp.insert(0, Event::StartNode(ATTR_VALUE));
|
||||
exp.push(Event::FinishNode);
|
||||
exp.push(Event::FinishNode);
|
||||
exp
|
||||
})))
|
||||
.map(|((mut name, _), mut value)| {
|
||||
// colon
|
||||
name.push(Event::EatToken);
|
||||
name.append(&mut value);
|
||||
name
|
||||
}),
|
||||
)
|
||||
.then(just(R_CURLY))
|
||||
.map(|((_, mut attrs), _)| {
|
||||
attrs.insert(0, Event::StartNode(ATTR_SET));
|
||||
attrs.insert(0, Event::EatToken);
|
||||
attrs.push(Event::EatToken);
|
||||
attrs.push(Event::FinishNode);
|
||||
attrs
|
||||
});
|
||||
|
||||
let atom = lit.clone().or(attrset).or(parenthesized!(expr));
|
||||
|
||||
let instr_name = ident
|
||||
.clone()
|
||||
.map(|mut v| {
|
||||
v.insert(0, Event::StartNode(INSTR_NAME));
|
||||
v
|
||||
})
|
||||
.foldl(
|
||||
ws.then(ident).repeated(),
|
||||
|mut ident, (mut ws, mut next)| {
|
||||
ident.append(&mut ws);
|
||||
ident.append(&mut next);
|
||||
ident
|
||||
},
|
||||
)
|
||||
.map(|mut v| {
|
||||
v.push(Event::FinishNode);
|
||||
v
|
||||
});
|
||||
let instr = padded!(instr_name)
|
||||
.then(
|
||||
atom.clone()
|
||||
.map(|mut v| {
|
||||
v.insert(0, Event::StartNode(INSTR_PARAMS));
|
||||
v
|
||||
})
|
||||
.foldl(
|
||||
ws.then(atom.clone()).repeated(),
|
||||
|mut cur, (mut ws, mut next)| {
|
||||
cur.append(&mut ws);
|
||||
cur.append(&mut next);
|
||||
cur
|
||||
},
|
||||
)
|
||||
.map(|mut v| {
|
||||
v.push(Event::FinishNode);
|
||||
v
|
||||
}),
|
||||
)
|
||||
.map(|(mut name, mut params)| {
|
||||
name.insert(0, Event::StartNode(INSTR));
|
||||
name.append(&mut params);
|
||||
name.push(Event::FinishNode);
|
||||
name
|
||||
});
|
||||
padded!(instr.or(lit).or(atom))
|
||||
});
|
||||
expr
|
||||
// .map(|(lit, mut ev)| lit.append(&mut ev));
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(PartialEq, Eq)]
|
||||
pub struct Parse {
|
||||
pub green_node: GreenNode,
|
||||
}
|
||||
|
||||
impl std::fmt::Debug for Parse {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
debug_print_green_node(NodeOrToken::Node(self.green_node.borrow()), f, 0)
|
||||
}
|
||||
}
|
||||
|
||||
fn debug_print_green_node(
|
||||
node: NodeOrToken<&GreenNodeData, &GreenTokenData>,
|
||||
f: &mut std::fmt::Formatter<'_>,
|
||||
lvl: i32,
|
||||
) -> std::fmt::Result {
|
||||
for _ in 0..lvl {
|
||||
f.write_str(" ")?;
|
||||
}
|
||||
|
||||
match node {
|
||||
NodeOrToken::Node(n) => {
|
||||
writeln!(f, "{:?} {{", Lang::kind_from_raw(node.kind()));
|
||||
for c in n.children() {
|
||||
debug_print_green_node(c, f, lvl + 1)?;
|
||||
}
|
||||
for _ in 0..lvl {
|
||||
f.write_str(" ")?;
|
||||
}
|
||||
f.write_str("}\n")
|
||||
}
|
||||
NodeOrToken::Token(t) => {
|
||||
writeln!(f, "{:?} {:?};", Lang::kind_from_raw(t.kind()), t.text())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct Parser<'src> {
|
||||
tokens: Vec<(SyntaxKind, &'src str)>,
|
||||
builder: GreenNodeBuilder<'src>,
|
||||
errors: Vec<SyntaxError>,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
enum SyntaxError {
|
||||
Expected(SyntaxKind),
|
||||
AttrExpectedValue,
|
||||
/// guessed if there's a newline and attr on next line without comma
|
||||
/// should then suggest comma after attr
|
||||
ExpectedCommaBetweenAttrs,
|
||||
}
|
||||
|
||||
pub fn parse(src: &str) -> Parse {
|
||||
let tokens = lex::lex(src);
|
||||
Parser {
|
||||
tokens,
|
||||
builder: GreenNodeBuilder::new(),
|
||||
errors: Vec::new(),
|
||||
}
|
||||
.parse()
|
||||
}
|
||||
|
||||
impl Parser<'_> {
|
||||
fn parse(mut self) -> Parse {
|
||||
let evs = to_events(&self.tokens);
|
||||
self.builder.start_node(ROOT.into());
|
||||
println!("evs: {evs:?}");
|
||||
|
||||
self.tokens.reverse();
|
||||
|
||||
for ev in evs {
|
||||
match ev {
|
||||
Event::StartNode(kind) => self.builder.start_node(kind.into()),
|
||||
Event::StartErr(SyntaxError) => todo!(),
|
||||
Event::EatToken => self.bump(),
|
||||
Event::FinishNode => self.builder.finish_node(),
|
||||
Event::FinishErr => todo!(),
|
||||
}
|
||||
}
|
||||
|
||||
self.builder.finish_node();
|
||||
Parse {
|
||||
green_node: self.builder.finish(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Advance one token, adding it to the current branch of the tree builder.
|
||||
fn bump(&mut self) {
|
||||
let (kind, text) = self.tokens.pop().unwrap();
|
||||
self.builder.token(kind.into(), text);
|
||||
}
|
||||
fn syntax_err(&mut self, err: SyntaxError) {
|
||||
let (_, text) = self.tokens.pop().unwrap();
|
||||
self.builder.token(PARSE_ERR.into(), text);
|
||||
self.errors.push(err);
|
||||
}
|
||||
fn syntax_err_by_checkpoint(&mut self, checkpoint: Checkpoint, err: SyntaxError) {
|
||||
self.builder.start_node_at(checkpoint, PARSE_ERR.into());
|
||||
self.builder.finish_node();
|
||||
self.errors.push(err);
|
||||
}
|
||||
fn expected(&mut self, expected: SyntaxKind) {
|
||||
self.syntax_err(SyntaxError::Expected(expected))
|
||||
}
|
||||
/// Peek at the first unprocessed token
|
||||
fn current(&self) -> Option<SyntaxKind> {
|
||||
self.tokens.last().map(|(kind, _)| *kind)
|
||||
}
|
||||
fn next(&self) -> Option<SyntaxKind> {
|
||||
self.tokens
|
||||
.get(self.tokens.len() - 2)
|
||||
.map(|(kind, _)| *kind)
|
||||
}
|
||||
}
|
|
@ -1,50 +0,0 @@
|
|||
use indexmap::IndexMap;
|
||||
|
||||
use super::super::Spanned;
|
||||
|
||||
use super::super::Span;
|
||||
use super::Lit;
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub struct RawExpression<'src> {
|
||||
pub expr: Box<RawExpr<'src>>,
|
||||
pub span: Span,
|
||||
}
|
||||
|
||||
impl<'src> RawExpression<'src> {
|
||||
pub fn new(expr: RawExpr<'src>, span: Span) -> Self {
|
||||
Self {
|
||||
expr: Box::new(expr),
|
||||
span,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub enum RawExpr<'src> {
|
||||
Node(
|
||||
Vec<Spanned<&'src str>>,
|
||||
Option<Spanned<IndexMap<Spanned<&'src str>, RawExpression<'src>>>>,
|
||||
),
|
||||
SimplePipe(RawExpression<'src>, RawExpression<'src>),
|
||||
// NamingPipe(
|
||||
// Box<Expression<'src>>,
|
||||
// (Vec<Spanned<&'src str>>, Vec<Spanned<&'src str>>),
|
||||
// Box<Expression<'src>>,
|
||||
// ),
|
||||
MappingPipe(RawExpression<'src>, RawExpression<'src>),
|
||||
NullPipe(RawExpression<'src>, RawExpression<'src>),
|
||||
MultiPipe(IndexMap<Spanned<&'src str>, RawExpression<'src>>),
|
||||
// LetIn(
|
||||
// IndexMap<Spanned<&'src str>, Box<Expression<'src>>>,
|
||||
// Box<Expression<'src>>,
|
||||
// ),
|
||||
// $
|
||||
Var(&'src str),
|
||||
// @
|
||||
InputVar(&'src str),
|
||||
AttrSet(Spanned<IndexMap<Spanned<&'src str>, RawExpression<'src>>>),
|
||||
Lit(Lit<'src>),
|
||||
Matrix(Spanned<(u16, u16)>, Vec<RawExpression<'src>>),
|
||||
List(Vec<RawExpression<'src>>),
|
||||
}
|
6
crates/lang/src/parser/error.rs
Normal file
6
crates/lang/src/parser/error.rs
Normal file
|
@ -0,0 +1,6 @@
|
|||
use crate::parser::syntax_kind::SyntaxKind;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum SyntaxError {
|
||||
Expected(Vec<SyntaxKind>),
|
||||
}
|
23
crates/lang/src/parser/events.rs
Normal file
23
crates/lang/src/parser/events.rs
Normal file
|
@ -0,0 +1,23 @@
|
|||
use crate::parser::syntax_kind::SyntaxKind;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum Event {
|
||||
Start {
|
||||
kind: SyntaxKind,
|
||||
forward_parent: Option<usize>,
|
||||
},
|
||||
Finish,
|
||||
Eat {
|
||||
count: usize,
|
||||
},
|
||||
Error,
|
||||
}
|
||||
|
||||
impl Event {
|
||||
pub(crate) fn tombstone() -> Self {
|
||||
Self::Start {
|
||||
kind: SyntaxKind::TOMBSTONE,
|
||||
forward_parent: None,
|
||||
}
|
||||
}
|
||||
}
|
14
crates/lang/src/parser/grammar.rs
Normal file
14
crates/lang/src/parser/grammar.rs
Normal file
|
@ -0,0 +1,14 @@
|
|||
use crate::parser::syntax_kind::SyntaxKind::*;
|
||||
|
||||
use super::Parser;
|
||||
|
||||
mod expression;
|
||||
|
||||
pub fn source_file(p: &mut Parser) {
|
||||
let root = p.start();
|
||||
|
||||
expression::expression(p);
|
||||
p.eat_succeeding_ws();
|
||||
|
||||
root.complete(p, ROOT);
|
||||
}
|
14
crates/lang/src/parser/grammar/expression.rs
Normal file
14
crates/lang/src/parser/grammar/expression.rs
Normal file
|
@ -0,0 +1,14 @@
|
|||
use crate::parser::{syntax_kind::SyntaxKind::*, Parser};
|
||||
|
||||
use self::{instruction::instr, lit::literal};
|
||||
|
||||
mod instruction;
|
||||
mod lit;
|
||||
|
||||
pub fn expression(p: &mut Parser) {
|
||||
let expr = p.start();
|
||||
|
||||
instr(p);
|
||||
|
||||
expr.complete(p, EXPR);
|
||||
}
|
30
crates/lang/src/parser/grammar/expression/instruction.rs
Normal file
30
crates/lang/src/parser/grammar/expression/instruction.rs
Normal file
|
@ -0,0 +1,30 @@
|
|||
use crate::parser::{syntax_kind::SyntaxKind::*, Parser};
|
||||
|
||||
use super::lit::literal;
|
||||
|
||||
pub fn instr(p: &mut Parser) {
|
||||
let instr = p.start();
|
||||
|
||||
instr_name(p);
|
||||
instr_params(p);
|
||||
|
||||
instr.complete(p, INSTR);
|
||||
}
|
||||
|
||||
fn instr_name(p: &mut Parser) {
|
||||
let instr_name = p.start();
|
||||
|
||||
while p.at(IDENT) {
|
||||
p.do_bump();
|
||||
}
|
||||
|
||||
instr_name.complete(p, INSTR_NAME);
|
||||
}
|
||||
|
||||
fn instr_params(p: &mut Parser) {
|
||||
if let Some(start) = literal(p) {
|
||||
while literal(p).is_some() {}
|
||||
|
||||
start.precede(p).complete(p, INSTR_PARAMS);
|
||||
}
|
||||
}
|
20
crates/lang/src/parser/grammar/expression/lit.rs
Normal file
20
crates/lang/src/parser/grammar/expression/lit.rs
Normal file
|
@ -0,0 +1,20 @@
|
|||
use enumset::enum_set;
|
||||
|
||||
use crate::parser::{
|
||||
syntax_kind::{SyntaxKind::*, TokenSet},
|
||||
CompletedMarker, Parser,
|
||||
};
|
||||
|
||||
const LIT_TOKENS: TokenSet = enum_set!(INT_NUM | FLOAT_NUM | STRING);
|
||||
|
||||
pub fn literal(p: &mut Parser) -> Option<CompletedMarker> {
|
||||
if !LIT_TOKENS.contains(p.current()) {
|
||||
return None;
|
||||
}
|
||||
|
||||
let lit = p.start();
|
||||
|
||||
p.do_bump();
|
||||
|
||||
Some(lit.complete(p, LITERAL))
|
||||
}
|
61
crates/lang/src/parser/input.rs
Normal file
61
crates/lang/src/parser/input.rs
Normal file
|
@ -0,0 +1,61 @@
|
|||
use crate::parser::syntax_kind::SyntaxKind;
|
||||
|
||||
pub struct Input<'src, 'toks> {
|
||||
raw: &'toks Vec<(SyntaxKind, &'src str)>,
|
||||
/// indices of the "meaningful" tokens (not whitespace etc)
|
||||
/// includes newlines because those might indeed help with finding errors
|
||||
meaningful: Vec<usize>,
|
||||
/// indices of newlines for the purpose of easily querying them
|
||||
/// can be helpful with missing commas etc
|
||||
newlines: Vec<usize>,
|
||||
}
|
||||
|
||||
impl<'src, 'toks> Input<'src, 'toks> {
|
||||
pub fn new(raw_toks: &'toks Vec<(SyntaxKind, &'src str)>) -> Self {
|
||||
let meaningful = raw_toks
|
||||
.iter()
|
||||
.enumerate()
|
||||
.filter_map(|(i, tok)| match tok.0 {
|
||||
SyntaxKind::WHITESPACE | SyntaxKind::NEWLINE => None,
|
||||
_ => Some(i),
|
||||
})
|
||||
.collect();
|
||||
let newlines = raw_toks
|
||||
.iter()
|
||||
.enumerate()
|
||||
.filter_map(|(i, tok)| match tok.0 {
|
||||
SyntaxKind::NEWLINE => Some(i),
|
||||
_ => None,
|
||||
})
|
||||
.collect();
|
||||
|
||||
Self {
|
||||
raw: raw_toks,
|
||||
meaningful,
|
||||
newlines,
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(clippy::unwrap_used, reason = "meaningful indices cannot be invalid")]
|
||||
pub(crate) fn kind(&self, idx: usize) -> SyntaxKind {
|
||||
let Some(meaningful_idx) = self.meaningful.get(idx) else {
|
||||
return SyntaxKind::EOF;
|
||||
};
|
||||
|
||||
self.raw.get(*meaningful_idx).unwrap().0
|
||||
}
|
||||
|
||||
pub(crate) fn preceding_meaningless(&self, idx: usize) -> usize {
|
||||
assert!(self.meaningful.len() > idx);
|
||||
|
||||
if idx == 0 {
|
||||
1
|
||||
} else {
|
||||
self.meaningful[idx] - self.meaningful[idx - 1]
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn meaningless_tail_len(&self) -> usize {
|
||||
self.raw.len() - (self.meaningful.last().unwrap() + 1)
|
||||
}
|
||||
}
|
113
crates/lang/src/parser/output.rs
Normal file
113
crates/lang/src/parser/output.rs
Normal file
|
@ -0,0 +1,113 @@
|
|||
use rowan::{GreenNode, GreenNodeBuilder, GreenNodeData, GreenTokenData, Language, NodeOrToken};
|
||||
use std::mem;
|
||||
|
||||
use crate::parser::syntax_kind::{Lang, SyntaxKind};
|
||||
|
||||
use super::{error::SyntaxError, events::Event};
|
||||
|
||||
pub struct Output {
|
||||
pub green_node: GreenNode,
|
||||
pub errors: Vec<SyntaxError>,
|
||||
}
|
||||
impl std::fmt::Debug for Output {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
debug_print_green_node(NodeOrToken::Node(&self.green_node), f, 0)
|
||||
}
|
||||
}
|
||||
|
||||
fn debug_print_green_node(
|
||||
node: NodeOrToken<&GreenNodeData, &GreenTokenData>,
|
||||
f: &mut std::fmt::Formatter<'_>,
|
||||
lvl: i32,
|
||||
) -> std::fmt::Result {
|
||||
for _ in 0..lvl {
|
||||
f.write_str(" ")?;
|
||||
}
|
||||
|
||||
match node {
|
||||
NodeOrToken::Node(n) => {
|
||||
writeln!(f, "{:?} {{", Lang::kind_from_raw(node.kind()))?;
|
||||
for c in n.children() {
|
||||
debug_print_green_node(c, f, lvl + 1)?;
|
||||
}
|
||||
for _ in 0..lvl {
|
||||
f.write_str(" ")?;
|
||||
}
|
||||
f.write_str("}\n")
|
||||
}
|
||||
NodeOrToken::Token(t) => {
|
||||
writeln!(f, "{:?} {:?};", Lang::kind_from_raw(t.kind()), t.text())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Output {
|
||||
pub fn from_parser_output(
|
||||
mut raw_toks: Vec<(SyntaxKind, &str)>,
|
||||
(mut events, errs): (Vec<Event>, Vec<SyntaxError>),
|
||||
) -> Self {
|
||||
let mut builder = GreenNodeBuilder::new();
|
||||
let mut fw_parents = Vec::new();
|
||||
raw_toks.reverse();
|
||||
|
||||
for i in 0..events.len() {
|
||||
match mem::replace(&mut events[i], Event::tombstone()) {
|
||||
Event::Start {
|
||||
kind,
|
||||
forward_parent,
|
||||
} => {
|
||||
if kind == SyntaxKind::TOMBSTONE && forward_parent.is_none() {
|
||||
continue;
|
||||
}
|
||||
|
||||
fw_parents.push(kind);
|
||||
let mut idx = i;
|
||||
let mut fp = forward_parent;
|
||||
while let Some(fwd) = fp {
|
||||
idx += fwd as usize;
|
||||
fp = match mem::replace(&mut events[idx], Event::tombstone()) {
|
||||
Event::Start {
|
||||
kind,
|
||||
forward_parent,
|
||||
} => {
|
||||
fw_parents.push(kind);
|
||||
forward_parent
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
|
||||
// remove whitespace bc it's ugly
|
||||
while let Some((SyntaxKind::WHITESPACE | SyntaxKind::NEWLINE, _)) =
|
||||
raw_toks.last()
|
||||
{
|
||||
match events.iter_mut().find(|ev| matches!(ev, Event::Eat { .. })) {
|
||||
Some(Event::Eat { count }) => *count -= 1,
|
||||
_ => unreachable!(),
|
||||
}
|
||||
|
||||
let (tok, text): (SyntaxKind, &str) = raw_toks.pop().unwrap();
|
||||
builder.token(tok.into(), text);
|
||||
}
|
||||
|
||||
for kind in fw_parents.drain(..).rev() {
|
||||
if kind != SyntaxKind::TOMBSTONE {
|
||||
builder.start_node(kind.into());
|
||||
}
|
||||
}
|
||||
}
|
||||
Event::Finish => builder.finish_node(),
|
||||
Event::Eat { count } => (0..count).for_each(|_| {
|
||||
let (tok, text): (SyntaxKind, &str) = raw_toks.pop().unwrap();
|
||||
builder.token(tok.into(), text);
|
||||
}),
|
||||
Event::Error => todo!(),
|
||||
}
|
||||
}
|
||||
|
||||
Self {
|
||||
green_node: builder.finish(),
|
||||
errors: errs,
|
||||
}
|
||||
}
|
||||
}
|
6
crates/lang/src/parser/parser.rs
Normal file
6
crates/lang/src/parser/parser.rs
Normal file
|
@ -0,0 +1,6 @@
|
|||
//! The parser architecture is *heavily* inspired (and partially copied and adapted) from the amazing rust-analyzer
|
||||
use drop_bomb::DropBomb;
|
||||
|
||||
use self::{error::SyntaxError, events::Event, input::Input};
|
||||
|
||||
use super::syntax_kind::SyntaxKind;
|
|
@ -1,7 +1,6 @@
|
|||
use enumset::EnumSet;
|
||||
use logos::Logos;
|
||||
|
||||
use crate::parser::Span;
|
||||
|
||||
pub fn lex(src: &str) -> Vec<(SyntaxKind, &str)> {
|
||||
let mut lex = SyntaxKind::lexer(src);
|
||||
let mut r = Vec::new();
|
||||
|
@ -13,8 +12,9 @@ pub fn lex(src: &str) -> Vec<(SyntaxKind, &str)> {
|
|||
r
|
||||
}
|
||||
|
||||
#[derive(Logos, Debug, PartialEq, Eq, Clone, Copy, Hash, PartialOrd, Ord)]
|
||||
#[derive(enumset::EnumSetType, Logos, Debug, PartialEq, Eq, Clone, Copy, Hash, PartialOrd, Ord)]
|
||||
#[repr(u16)]
|
||||
#[enumset(no_super_impls)]
|
||||
#[allow(non_camel_case_types)]
|
||||
pub enum SyntaxKind {
|
||||
#[token("def")]
|
||||
|
@ -39,6 +39,7 @@ pub enum SyntaxKind {
|
|||
MAT_BODY,
|
||||
PARENTHESIZED_EXPR,
|
||||
EXPR,
|
||||
LITERAL,
|
||||
#[token("(")]
|
||||
L_PAREN,
|
||||
#[token(")")]
|
||||
|
@ -109,9 +110,29 @@ pub enum SyntaxKind {
|
|||
PARSE_ERR,
|
||||
LEX_ERR,
|
||||
ROOT,
|
||||
EOF,
|
||||
TOMBSTONE,
|
||||
ERROR,
|
||||
}
|
||||
|
||||
pub type TokenSet = EnumSet<SyntaxKind>;
|
||||
|
||||
impl From<SyntaxKind> for rowan::SyntaxKind {
|
||||
fn from(kind: SyntaxKind) -> Self {
|
||||
Self(kind as u16)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub enum Lang {}
|
||||
impl rowan::Language for Lang {
|
||||
type Kind = SyntaxKind;
|
||||
#[allow(unsafe_code)]
|
||||
fn kind_from_raw(raw: rowan::SyntaxKind) -> Self::Kind {
|
||||
assert!(raw.0 <= SyntaxKind::ROOT as u16);
|
||||
unsafe { std::mem::transmute::<u16, SyntaxKind>(raw.0) }
|
||||
}
|
||||
fn kind_to_raw(kind: Self::Kind) -> rowan::SyntaxKind {
|
||||
kind.into()
|
||||
}
|
||||
}
|
|
@ -1,143 +1 @@
|
|||
use crate::parser::ast::File;
|
||||
use crate::parser::parse;
|
||||
use crate::tokens::Token;
|
||||
use chumsky::input::Stream;
|
||||
use chumsky::prelude::*;
|
||||
use indexmap::IndexMap;
|
||||
use logos::Logos;
|
||||
|
||||
// #[test]
|
||||
// fn test_parse_node_with_params() {
|
||||
// const INPUT: &str = "meow [ hello: $foo, world: @bar]";
|
||||
// assert_eq!(
|
||||
// parse(INPUT).unwrap(),
|
||||
// File {
|
||||
// decls: IndexMap::from_iter([(
|
||||
// ("main", (0..0).into()),
|
||||
// (
|
||||
// Expr::Node(
|
||||
// ("meow", (0..4).into()),
|
||||
// Some((
|
||||
// IndexMap::from_iter([
|
||||
// (
|
||||
// ("hello", (7..12).into()),
|
||||
// Expr::Var(("foo", (14..18).into()))
|
||||
// ),
|
||||
// (
|
||||
// ("world", (20..25).into()),
|
||||
// Expr::InputVar(("bar", (27..31).into()))
|
||||
// )
|
||||
// ]),
|
||||
// (5..32).into()
|
||||
// ))
|
||||
// ),
|
||||
// (0..32).into()
|
||||
// )
|
||||
// )])
|
||||
// }
|
||||
// );
|
||||
// }
|
||||
|
||||
// fn test_parse_multiple_top_level_complex() {
|
||||
// const INPUT: &str = r"def main = meow
|
||||
// | uwu
|
||||
// [ foo: @bar
|
||||
// , hello: world @| test [ more: params ] | yay
|
||||
// ]
|
||||
// !| awa
|
||||
// @| nya
|
||||
// | rawr;
|
||||
|
||||
// def test = meow
|
||||
// [ hello: $foo
|
||||
// , world: @bar
|
||||
// ];
|
||||
// ";
|
||||
// assert_eq!(
|
||||
// parse(INPUT).unwrap(),
|
||||
// File {
|
||||
// decls: IndexMap::from_iter([
|
||||
// (
|
||||
// ("main", (4..8).into()),
|
||||
// (
|
||||
// Expr::SimplePipe(
|
||||
// Box::new(Expr::Node(("meow", (11..15).into()), None)),
|
||||
// Box::new(Expr::NullPipe(
|
||||
// Box::new(Expr::Node(
|
||||
// ("uwu", (20..23).into()),
|
||||
// Some((
|
||||
// IndexMap::from_iter([
|
||||
// (
|
||||
// ("foo", (29..32).into()),
|
||||
// Expr::InputVar(("bar", (34..38).into()))
|
||||
// ),
|
||||
// (
|
||||
// ("hello", (44..49).into()),
|
||||
// Expr::MappingPipe(
|
||||
// Box::new(Expr::Node(
|
||||
// ("world", (51..56).into()),
|
||||
// None
|
||||
// )),
|
||||
// Box::new(Expr::SimplePipe(
|
||||
// Box::new(Expr::Node(
|
||||
// ("test", (60..64).into()),
|
||||
// Some((
|
||||
// IndexMap::from_iter([(
|
||||
// ("more", (67..71).into()),
|
||||
// Expr::Node(
|
||||
// ("params", (73..79).into()),
|
||||
// None
|
||||
// )
|
||||
// )]),
|
||||
// (65..81).into()
|
||||
// ))
|
||||
// )),
|
||||
// Box::new(Expr::Node(
|
||||
// ("yay", (84..87).into()),
|
||||
// None
|
||||
// ))
|
||||
// ))
|
||||
// )
|
||||
// )
|
||||
// ]),
|
||||
// (27..92).into()
|
||||
// ))
|
||||
// )),
|
||||
// Box::new(Expr::MappingPipe(
|
||||
// Box::new(Expr::Node(("awa", (97..100).into()), None)),
|
||||
// Box::new(Expr::SimplePipe(
|
||||
// Box::new(Expr::Node(("nya", (106..109).into()), None)),
|
||||
// Box::new(Expr::Node(("rawr", (114..118).into()), None))
|
||||
// ))
|
||||
// ))
|
||||
// ))
|
||||
// ),
|
||||
// (11..118).into()
|
||||
// ),
|
||||
// ),
|
||||
// (
|
||||
// ("test", (125..129).into()),
|
||||
// (
|
||||
// Expr::Node(
|
||||
// ("meow", (132..136).into()),
|
||||
// Some((
|
||||
// IndexMap::from_iter([
|
||||
// (
|
||||
// ("hello", (141..146).into()),
|
||||
// Expr::Var(("foo", (148..152).into()))
|
||||
// ),
|
||||
// (
|
||||
// ("world", (156..161).into()),
|
||||
// Expr::InputVar(("bar", (163..167).into()))
|
||||
// )
|
||||
// ]),
|
||||
// (139..171).into()
|
||||
// ))
|
||||
// ),
|
||||
// (132..171).into()
|
||||
// )
|
||||
// )
|
||||
// ])
|
||||
// }
|
||||
// );
|
||||
// }
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue