forked from katzen-cafe/iowo
470 lines
15 KiB
Rust
470 lines
15 KiB
Rust
use std::borrow::Borrow;
|
|
|
|
use rowan::{
|
|
Checkpoint, GreenNode, GreenNodeBuilder, GreenNodeData, GreenTokenData, Language, NodeOrToken,
|
|
};
|
|
|
|
use crate::parser::{
|
|
ast::lossless::{lex::SyntaxKind::*, Lang},
|
|
Span,
|
|
};
|
|
|
|
use super::lex::{self, SyntaxKind};
|
|
|
|
pub mod parser_to_events {
|
|
use chumsky::Parser;
|
|
|
|
use crate::parser::ast::lossless::lex::SyntaxKind::{self, *};
|
|
|
|
#[derive(Debug, PartialEq, Eq)]
|
|
pub(super) enum Event {
|
|
StartNode,
|
|
StartErr(SyntaxError),
|
|
EatToken,
|
|
FinishNode,
|
|
FinishErr,
|
|
}
|
|
|
|
#[derive(Debug, PartialEq, Eq)]
|
|
enum SyntaxError {
|
|
Expected(SyntaxKind),
|
|
AttrExpectedValue,
|
|
/// guessed if there's a newline and attr on next line without comma
|
|
/// should then suggest comma after attr
|
|
ExpectedCommaBetweenAttrs,
|
|
}
|
|
|
|
pub fn to_events(src: &str) -> Vec<Event> {
|
|
let mut tokens = lex::lex(src);
|
|
parser().parse(tokens)
|
|
}
|
|
|
|
pub fn parser() -> impl Parser<'static, SyntaxKind, Vec<Event>> {
|
|
let whitespace = just(WHITESPACE).or(NEWLINE).repeated().collect::<Vec<_>>();
|
|
whitespace
|
|
}
|
|
}
|
|
|
|
#[derive(PartialEq, Eq)]
|
|
pub struct Parse {
|
|
pub green_node: GreenNode,
|
|
}
|
|
|
|
impl std::fmt::Debug for Parse {
|
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
|
debug_print_green_node(NodeOrToken::Node(self.green_node.borrow()), f, 0)
|
|
}
|
|
}
|
|
|
|
fn debug_print_green_node(
|
|
node: NodeOrToken<&GreenNodeData, &GreenTokenData>,
|
|
f: &mut std::fmt::Formatter<'_>,
|
|
lvl: i32,
|
|
) -> std::fmt::Result {
|
|
for _ in 0..lvl {
|
|
f.write_str(" ")?;
|
|
}
|
|
|
|
match node {
|
|
NodeOrToken::Node(n) => {
|
|
writeln!(f, "{:?} {{", Lang::kind_from_raw(node.kind()));
|
|
for c in n.children() {
|
|
debug_print_green_node(c, f, lvl + 1)?;
|
|
}
|
|
for _ in 0..lvl {
|
|
f.write_str(" ")?;
|
|
}
|
|
f.write_str("}\n")
|
|
}
|
|
NodeOrToken::Token(t) => {
|
|
writeln!(f, "{:?} {:?};", Lang::kind_from_raw(t.kind()), t.text())
|
|
}
|
|
}
|
|
}
|
|
|
|
#[derive(Debug)]
|
|
struct Parser<'src> {
|
|
tokens: Vec<(SyntaxKind, &'src str)>,
|
|
builder: GreenNodeBuilder<'src>,
|
|
errors: Vec<SyntaxError>,
|
|
}
|
|
|
|
#[derive(Debug, PartialEq, Eq)]
|
|
enum SyntaxError {
|
|
Expected(SyntaxKind),
|
|
AttrExpectedValue,
|
|
/// guessed if there's a newline and attr on next line without comma
|
|
/// should then suggest comma after attr
|
|
ExpectedCommaBetweenAttrs,
|
|
}
|
|
|
|
pub fn parse(src: &str) -> Parse {
|
|
let mut tokens = lex::lex(src);
|
|
Parser {
|
|
tokens,
|
|
builder: GreenNodeBuilder::new(),
|
|
errors: Vec::new(),
|
|
}
|
|
.parse()
|
|
}
|
|
|
|
impl Parser<'_> {
|
|
fn parse(mut self) -> Parse {
|
|
self.start_node(ROOT);
|
|
|
|
match self.expr(None) {
|
|
expr::ExprRes::Ok => (),
|
|
expr::ExprRes::Eof => (),
|
|
expr::ExprRes::NoExpr => todo!(),
|
|
}
|
|
|
|
self.builder.finish_node();
|
|
Parse {
|
|
green_node: self.builder.finish(),
|
|
}
|
|
}
|
|
|
|
fn start_node(&mut self, kind: SyntaxKind) {
|
|
self.builder.start_node(kind.into());
|
|
}
|
|
fn finish_node(&mut self) {
|
|
self.builder.finish_node();
|
|
}
|
|
|
|
/// Advance one token, adding it to the current branch of the tree builder.
|
|
fn bump(&mut self) {
|
|
let (kind, text) = self.tokens.pop().unwrap();
|
|
self.builder.token(kind.into(), text);
|
|
}
|
|
fn syntax_err(&mut self, err: SyntaxError) {
|
|
let (_, text) = self.tokens.pop().unwrap();
|
|
self.builder.token(PARSE_ERR.into(), text);
|
|
self.errors.push(err);
|
|
}
|
|
fn syntax_err_by_checkpoint(&mut self, checkpoint: Checkpoint, err: SyntaxError) {
|
|
self.builder.start_node_at(checkpoint, PARSE_ERR.into());
|
|
self.finish_node();
|
|
self.errors.push(err);
|
|
}
|
|
fn expected(&mut self, expected: SyntaxKind) {
|
|
self.syntax_err(SyntaxError::Expected(expected))
|
|
}
|
|
/// Peek at the first unprocessed token
|
|
fn current(&self) -> Option<SyntaxKind> {
|
|
self.tokens.last().map(|(kind, _)| *kind)
|
|
}
|
|
fn next(&self) -> Option<SyntaxKind> {
|
|
self.tokens
|
|
.get(self.tokens.len() - 2)
|
|
.map(|(kind, _)| *kind)
|
|
}
|
|
fn skip_ws(&mut self) {
|
|
while self.current() == Some(WHITESPACE) || self.current() == Some(NEWLINE) {
|
|
self.bump()
|
|
}
|
|
}
|
|
fn skip_ws_without_newlines(&mut self) {
|
|
while self.current() == Some(WHITESPACE) {
|
|
self.bump()
|
|
}
|
|
}
|
|
}
|
|
|
|
mod expr {
|
|
use rowan::Checkpoint;
|
|
|
|
use super::{attrset::AttrsetRes, instr::NodeRes, Parser};
|
|
use crate::parser::{ast::lossless::lex::SyntaxKind::*, Span};
|
|
impl Parser<'_> {
|
|
pub(super) fn expr(&mut self, start: Option<Checkpoint>) -> ExprRes {
|
|
self.skip_ws();
|
|
let start = start.unwrap_or_else(|| self.builder.checkpoint());
|
|
match self.current() {
|
|
Some(IDENT) => {
|
|
let expr_res = match self.instr() {
|
|
NodeRes::Ok => ExprRes::Ok,
|
|
NodeRes::Eof => ExprRes::Eof,
|
|
};
|
|
self.builder.start_node_at(start, EXPR.into());
|
|
self.finish_node();
|
|
expr_res
|
|
}
|
|
Some(_) => self.atom(Some(start)),
|
|
None => ExprRes::Eof,
|
|
}
|
|
}
|
|
|
|
pub(super) fn atom(&mut self, start: Option<Checkpoint>) -> ExprRes {
|
|
self.skip_ws();
|
|
let start = start.unwrap_or_else(|| self.builder.checkpoint());
|
|
match self.current() {
|
|
Some(INT_NUM | FLOAT_NUM | STRING) => {
|
|
self.bump();
|
|
self.builder.start_node_at(start, EXPR.into());
|
|
self.finish_node();
|
|
ExprRes::Ok
|
|
}
|
|
Some(L_CURLY) => match self.attrset(start) {
|
|
AttrsetRes::Ok => ExprRes::Ok,
|
|
AttrsetRes::Eof => ExprRes::Eof,
|
|
},
|
|
Some(L_PAREN) => {
|
|
self.builder.start_node_at(start, PARENTHESIZED_EXPR.into());
|
|
self.bump();
|
|
self.expr(None);
|
|
self.skip_ws();
|
|
match self.current() {
|
|
Some(R_PAREN) => ExprRes::Ok,
|
|
Some(_) => todo!(),
|
|
None => ExprRes::Eof,
|
|
}
|
|
}
|
|
Some(_) => ExprRes::NoExpr,
|
|
None => ExprRes::Eof,
|
|
}
|
|
}
|
|
}
|
|
|
|
pub enum ExprRes {
|
|
Ok,
|
|
Eof,
|
|
/// isnt an expression
|
|
NoExpr,
|
|
}
|
|
}
|
|
|
|
mod attrset {
|
|
use chumsky::container::Container;
|
|
use rowan::Checkpoint;
|
|
|
|
use super::{expr::ExprRes, instr::NodeRes, Parser};
|
|
use crate::parser::{
|
|
ast::lossless::{lex::SyntaxKind::*, parser::SyntaxError},
|
|
Span,
|
|
};
|
|
impl Parser<'_> {
|
|
pub(super) fn attrset(&mut self, checkpoint: Checkpoint) -> AttrsetRes {
|
|
assert_eq!(self.current(), Some(L_CURLY));
|
|
self.bump();
|
|
self.skip_ws();
|
|
match self.current() {
|
|
Some(R_CURLY) => {
|
|
self.builder.start_node_at(checkpoint, ATTR_SET.into());
|
|
self.bump();
|
|
self.finish_node();
|
|
AttrsetRes::Ok
|
|
}
|
|
Some(_) => {
|
|
self.builder.start_node_at(checkpoint, ATTR_SET.into());
|
|
let res = match self.attrs() {
|
|
AttrRes::Eof => AttrsetRes::Eof,
|
|
AttrRes::RCurly | AttrRes::Ok => {
|
|
println!("curr: {:?}", self.current());
|
|
AttrsetRes::Ok
|
|
}
|
|
};
|
|
self.finish_node();
|
|
res
|
|
}
|
|
None => AttrsetRes::Eof,
|
|
}
|
|
// self.start_node(ATTR);
|
|
}
|
|
|
|
fn attrs(&mut self) -> AttrRes {
|
|
let mut res = AttrRes::Ok;
|
|
|
|
while res == AttrRes::Ok {
|
|
println!("it: {:?}", self.tokens.last());
|
|
match self.attr() {
|
|
AttrRes::Ok => {
|
|
self.skip_ws_without_newlines();
|
|
println!(
|
|
"a: {:?}, {:?}",
|
|
self.tokens.last(),
|
|
self.tokens.get(self.tokens.len() - 2)
|
|
);
|
|
println!("errs: {:?}", self.errors);
|
|
res = AttrRes::Ok;
|
|
let checkpoint_previous_end = self.builder.checkpoint();
|
|
res = match self.current() {
|
|
Some(COMMA) => {
|
|
self.bump();
|
|
AttrRes::Ok
|
|
}
|
|
Some(R_CURLY) => {
|
|
self.bump();
|
|
res = AttrRes::Ok;
|
|
break;
|
|
}
|
|
Some(NEWLINE) => {
|
|
self.skip_ws();
|
|
println!(
|
|
"b: {:?}, {:?}",
|
|
self.tokens.last(),
|
|
self.tokens.get(self.tokens.len() - 2)
|
|
);
|
|
match self.current() {
|
|
Some(COMMA) => {
|
|
self.bump();
|
|
AttrRes::Ok
|
|
}
|
|
Some(R_CURLY) => {
|
|
self.bump();
|
|
res = AttrRes::Ok;
|
|
break;
|
|
}
|
|
Some(IDENT) => {
|
|
println!("wtf");
|
|
self.syntax_err_by_checkpoint(
|
|
checkpoint_previous_end,
|
|
SyntaxError::ExpectedCommaBetweenAttrs,
|
|
);
|
|
// self.syntax_err(SyntaxError::ExpectedCommaBetweenAttrs);
|
|
AttrRes::Ok
|
|
}
|
|
Some(_) => {
|
|
self.bump();
|
|
AttrRes::Ok
|
|
}
|
|
None => {
|
|
res = AttrRes::Eof;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
Some(_) => {
|
|
self.bump();
|
|
println!(
|
|
"c: {:?}, {:?}",
|
|
self.tokens.last(),
|
|
self.tokens.get(self.tokens.len() - 2)
|
|
);
|
|
AttrRes::Ok
|
|
}
|
|
None => {
|
|
res = AttrRes::Eof;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
AttrRes::Eof => {
|
|
res = AttrRes::Eof;
|
|
break;
|
|
}
|
|
AttrRes::RCurly => {
|
|
res = AttrRes::RCurly;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
println!("toks_left: {:?}", self.tokens);
|
|
res
|
|
}
|
|
|
|
fn attr(&mut self) -> AttrRes {
|
|
self.skip_ws();
|
|
self.start_node(ATTR);
|
|
self.start_node(ATTR_NAME);
|
|
match self.current() {
|
|
Some(IDENT) => self.bump(),
|
|
Some(R_CURLY) => return AttrRes::Ok,
|
|
Some(_) => self.expected(IDENT),
|
|
None => return AttrRes::Eof,
|
|
}
|
|
self.finish_node();
|
|
self.skip_ws();
|
|
match self.current() {
|
|
Some(COLON) => self.bump(),
|
|
Some(R_CURLY) => {
|
|
self.expected(COLON);
|
|
return AttrRes::RCurly;
|
|
}
|
|
Some(_) => self.expected(COLON),
|
|
None => return AttrRes::Eof,
|
|
}
|
|
self.skip_ws();
|
|
self.start_node(ATTR_VALUE);
|
|
match self.expr(None) {
|
|
ExprRes::Ok => self.bump(),
|
|
ExprRes::Eof => return AttrRes::Eof,
|
|
ExprRes::NoExpr => match self.current() {
|
|
Some(COMMA) => self.syntax_err(SyntaxError::AttrExpectedValue),
|
|
Some(R_CURLY) => {
|
|
self.syntax_err(SyntaxError::AttrExpectedValue);
|
|
return AttrRes::RCurly;
|
|
}
|
|
Some(_) => self.expected(EXPR),
|
|
None => unreachable!(),
|
|
},
|
|
}
|
|
self.finish_node();
|
|
self.finish_node();
|
|
AttrRes::Ok
|
|
}
|
|
}
|
|
|
|
#[derive(PartialEq, Eq)]
|
|
pub enum AttrsetRes {
|
|
Ok,
|
|
Eof,
|
|
}
|
|
|
|
#[derive(PartialEq, Eq)]
|
|
enum AttrRes {
|
|
Ok,
|
|
Eof,
|
|
RCurly,
|
|
}
|
|
}
|
|
|
|
mod instr {
|
|
use super::Parser;
|
|
use crate::parser::{
|
|
ast::lossless::{lex::SyntaxKind::*, parser::expr::ExprRes},
|
|
Span,
|
|
};
|
|
|
|
impl Parser<'_> {
|
|
pub(super) fn instr(&mut self) -> NodeRes {
|
|
assert_eq!(self.current(), Some(IDENT));
|
|
self.skip_ws();
|
|
self.start_node(INSTR);
|
|
self.instr_name();
|
|
|
|
// used to count positionals
|
|
let mut i = 0;
|
|
let params_checkpoint = self.builder.checkpoint();
|
|
loop {
|
|
match self.expr(None) {
|
|
ExprRes::Ok => {
|
|
i += 1;
|
|
continue;
|
|
}
|
|
ExprRes::NoExpr | ExprRes::Eof => break,
|
|
}
|
|
}
|
|
if i >= 1 {
|
|
self.builder
|
|
.start_node_at(params_checkpoint, INSTR_PARAMS.into());
|
|
self.finish_node();
|
|
}
|
|
self.finish_node();
|
|
NodeRes::Ok
|
|
}
|
|
|
|
fn instr_name(&mut self) {
|
|
self.start_node(INSTR_NAME);
|
|
while self.current() == Some(IDENT) {
|
|
self.bump();
|
|
self.skip_ws_without_newlines();
|
|
}
|
|
self.finish_node();
|
|
}
|
|
}
|
|
|
|
pub(super) enum NodeRes {
|
|
Ok,
|
|
Eof,
|
|
}
|
|
}
|