use std::borrow::Borrow; use rowan::{ Checkpoint, GreenNode, GreenNodeBuilder, GreenNodeData, GreenTokenData, Language, NodeOrToken, }; use crate::parser::{ ast::lossless::{lex::SyntaxKind::*, Lang}, Span, }; use super::lex::{self, SyntaxKind}; mod parsers { use rowan::GreenNode; use crate::parser::ast::lossless::lex::SyntaxKind; use super::SyntaxError; struct ParseResult { green_node: GreenNode, errors: Vec, } trait Parser { fn parse<'src>(input: &[(SyntaxKind, &'src str)]) -> ParseResult; } } #[derive(PartialEq, Eq)] pub struct Parse { pub green_node: GreenNode, } impl std::fmt::Debug for Parse { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { debug_print_green_node(NodeOrToken::Node(self.green_node.borrow()), f, 0) } } fn debug_print_green_node( node: NodeOrToken<&GreenNodeData, &GreenTokenData>, f: &mut std::fmt::Formatter<'_>, lvl: i32, ) -> std::fmt::Result { for _ in 0..lvl { f.write_str(" ")?; } match node { NodeOrToken::Node(n) => { writeln!(f, "{:?} {{", Lang::kind_from_raw(node.kind())); for c in n.children() { debug_print_green_node(c, f, lvl + 1)?; } for _ in 0..lvl { f.write_str(" ")?; } f.write_str("}\n") } NodeOrToken::Token(t) => { writeln!(f, "{:?} {:?};", Lang::kind_from_raw(t.kind()), t.text()) } } } #[derive(Debug)] struct Parser<'src> { tokens: Vec<(SyntaxKind, &'src str)>, builder: GreenNodeBuilder<'src>, errors: Vec, } #[derive(Debug, PartialEq, Eq)] enum SyntaxError { Expected(SyntaxKind), AttrExpectedValue, /// guessed if there's a newline and attr on next line without comma /// should then suggest comma after attr ExpectedCommaBetweenAttrs, } pub fn parse(src: &str) -> Parse { let mut tokens = lex::lex(src); Parser { tokens, builder: GreenNodeBuilder::new(), errors: Vec::new(), } .parse() } impl Parser<'_> { fn parse(mut self) -> Parse { self.start_node(ROOT); match self.expr(None) { expr::ExprRes::Ok => (), expr::ExprRes::Eof => (), expr::ExprRes::NoExpr => todo!(), } self.builder.finish_node(); Parse { green_node: self.builder.finish(), } } fn start_node(&mut self, kind: SyntaxKind) { self.builder.start_node(kind.into()); } fn finish_node(&mut self) { self.builder.finish_node(); } /// Advance one token, adding it to the current branch of the tree builder. fn bump(&mut self) { let (kind, text) = self.tokens.pop().unwrap(); self.builder.token(kind.into(), text); } fn syntax_err(&mut self, err: SyntaxError) { let (_, text) = self.tokens.pop().unwrap(); self.builder.token(PARSE_ERR.into(), text); self.errors.push(err); } fn syntax_err_by_checkpoint(&mut self, checkpoint: Checkpoint, err: SyntaxError) { self.builder.start_node_at(checkpoint, PARSE_ERR.into()); self.finish_node(); self.errors.push(err); } fn expected(&mut self, expected: SyntaxKind) { self.syntax_err(SyntaxError::Expected(expected)) } /// Peek at the first unprocessed token fn current(&self) -> Option { self.tokens.last().map(|(kind, _)| *kind) } fn next(&self) -> Option { self.tokens .get(self.tokens.len() - 2) .map(|(kind, _)| *kind) } fn skip_ws(&mut self) { while self.current() == Some(WHITESPACE) || self.current() == Some(NEWLINE) { self.bump() } } fn skip_ws_without_newlines(&mut self) { while self.current() == Some(WHITESPACE) { self.bump() } } } mod expr { use rowan::Checkpoint; use super::{attrset::AttrsetRes, instr::NodeRes, Parser}; use crate::parser::{ast::lossless::lex::SyntaxKind::*, Span}; impl Parser<'_> { pub(super) fn expr(&mut self, start: Option) -> ExprRes { self.skip_ws(); let start = start.unwrap_or_else(|| self.builder.checkpoint()); match self.current() { Some(IDENT) => { let expr_res = match self.instr() { NodeRes::Ok => ExprRes::Ok, NodeRes::Eof => ExprRes::Eof, }; self.builder.start_node_at(start, EXPR.into()); self.finish_node(); expr_res } Some(_) => self.atom(Some(start)), None => ExprRes::Eof, } } pub(super) fn atom(&mut self, start: Option) -> ExprRes { self.skip_ws(); let start = start.unwrap_or_else(|| self.builder.checkpoint()); match self.current() { Some(INT_NUM | FLOAT_NUM | STRING) => { self.bump(); self.builder.start_node_at(start, EXPR.into()); self.finish_node(); ExprRes::Ok } Some(L_CURLY) => match self.attrset(start) { AttrsetRes::Ok => ExprRes::Ok, AttrsetRes::Eof => ExprRes::Eof, }, Some(L_PAREN) => { self.builder.start_node_at(start, PARENTHESIZED_EXPR.into()); self.bump(); self.expr(None); self.skip_ws(); match self.current() { Some(R_PAREN) => ExprRes::Ok, Some(_) => todo!(), None => ExprRes::Eof, } } Some(_) => ExprRes::NoExpr, None => ExprRes::Eof, } } } pub enum ExprRes { Ok, Eof, /// isnt an expression NoExpr, } } mod attrset { use chumsky::container::Container; use rowan::Checkpoint; use super::{expr::ExprRes, instr::NodeRes, Parser}; use crate::parser::{ ast::lossless::{lex::SyntaxKind::*, parser::SyntaxError}, Span, }; impl Parser<'_> { pub(super) fn attrset(&mut self, checkpoint: Checkpoint) -> AttrsetRes { assert_eq!(self.current(), Some(L_CURLY)); self.bump(); self.skip_ws(); match self.current() { Some(R_CURLY) => { self.builder.start_node_at(checkpoint, ATTR_SET.into()); self.bump(); self.finish_node(); AttrsetRes::Ok } Some(_) => { self.builder.start_node_at(checkpoint, ATTR_SET.into()); let res = match self.attrs() { AttrRes::Eof => AttrsetRes::Eof, AttrRes::RCurly | AttrRes::Ok => { println!("curr: {:?}", self.current()); AttrsetRes::Ok } }; self.finish_node(); res } None => AttrsetRes::Eof, } // self.start_node(ATTR); } fn attrs(&mut self) -> AttrRes { let mut res = AttrRes::Ok; while res == AttrRes::Ok { println!("it: {:?}", self.tokens.last()); match self.attr() { AttrRes::Ok => { self.skip_ws_without_newlines(); println!( "a: {:?}, {:?}", self.tokens.last(), self.tokens.get(self.tokens.len() - 2) ); println!("errs: {:?}", self.errors); res = AttrRes::Ok; let checkpoint_previous_end = self.builder.checkpoint(); res = match self.current() { Some(COMMA) => { self.bump(); AttrRes::Ok } Some(R_CURLY) => { self.bump(); res = AttrRes::Ok; break; } Some(NEWLINE) => { self.skip_ws(); println!( "b: {:?}, {:?}", self.tokens.last(), self.tokens.get(self.tokens.len() - 2) ); match self.current() { Some(COMMA) => { self.bump(); AttrRes::Ok } Some(R_CURLY) => { self.bump(); res = AttrRes::Ok; break; } Some(IDENT) => { println!("wtf"); self.syntax_err_by_checkpoint( checkpoint_previous_end, SyntaxError::ExpectedCommaBetweenAttrs, ); // self.syntax_err(SyntaxError::ExpectedCommaBetweenAttrs); AttrRes::Ok } Some(_) => { self.bump(); AttrRes::Ok } None => { res = AttrRes::Eof; break; } } } Some(_) => { self.bump(); println!( "c: {:?}, {:?}", self.tokens.last(), self.tokens.get(self.tokens.len() - 2) ); AttrRes::Ok } None => { res = AttrRes::Eof; break; } } } AttrRes::Eof => { res = AttrRes::Eof; break; } AttrRes::RCurly => { res = AttrRes::RCurly; break; } } } println!("toks_left: {:?}", self.tokens); res } fn attr(&mut self) -> AttrRes { self.skip_ws(); self.start_node(ATTR); self.start_node(ATTR_NAME); match self.current() { Some(IDENT) => self.bump(), Some(R_CURLY) => return AttrRes::Ok, Some(_) => self.expected(IDENT), None => return AttrRes::Eof, } self.finish_node(); self.skip_ws(); match self.current() { Some(COLON) => self.bump(), Some(R_CURLY) => { self.expected(COLON); return AttrRes::RCurly; } Some(_) => self.expected(COLON), None => return AttrRes::Eof, } self.skip_ws(); self.start_node(ATTR_VALUE); match self.expr(None) { ExprRes::Ok => self.bump(), ExprRes::Eof => return AttrRes::Eof, ExprRes::NoExpr => match self.current() { Some(COMMA) => self.syntax_err(SyntaxError::AttrExpectedValue), Some(R_CURLY) => { self.syntax_err(SyntaxError::AttrExpectedValue); return AttrRes::RCurly; } Some(_) => self.expected(EXPR), None => unreachable!(), }, } self.finish_node(); self.finish_node(); AttrRes::Ok } } #[derive(PartialEq, Eq)] pub enum AttrsetRes { Ok, Eof, } #[derive(PartialEq, Eq)] enum AttrRes { Ok, Eof, RCurly, } } mod instr { use super::Parser; use crate::parser::{ ast::lossless::{lex::SyntaxKind::*, parser::expr::ExprRes}, Span, }; impl Parser<'_> { pub(super) fn instr(&mut self) -> NodeRes { assert_eq!(self.current(), Some(IDENT)); self.skip_ws(); self.start_node(INSTR); self.instr_name(); // used to count positionals let mut i = 0; let params_checkpoint = self.builder.checkpoint(); loop { match self.expr(None) { ExprRes::Ok => { i += 1; continue; } ExprRes::NoExpr | ExprRes::Eof => break, } } if i >= 1 { self.builder .start_node_at(params_checkpoint, INSTR_PARAMS.into()); self.finish_node(); } self.finish_node(); NodeRes::Ok } fn instr_name(&mut self) { self.start_node(INSTR_NAME); while self.current() == Some(IDENT) { self.bump(); self.skip_ws_without_newlines(); } self.finish_node(); } } pub(super) enum NodeRes { Ok, Eof, } }