From 9da157ff4a3b843a2ed107bac074f0d61cfdfa97 Mon Sep 17 00:00:00 2001 From: Schrottkatze Date: Thu, 11 Apr 2024 03:23:03 +0200 Subject: [PATCH] lang: massive amounts of parser and ast pain --- Cargo.lock | 52 ++- crates/lang/Cargo.toml | 4 +- crates/lang/src/main.rs | 37 +- crates/lang/src/parser.rs | 117 +++-- crates/lang/src/parser/ast.rs | 47 +- crates/lang/src/parser/ast/ast_tree.rs | 31 ++ crates/lang/src/parser/ast/lossless.rs | 19 + crates/lang/src/parser/ast/lossless/lex.rs | 118 +++++ crates/lang/src/parser/ast/lossless/parser.rs | 437 ++++++++++++++++++ crates/lang/src/parser/ast/raw_ast.rs | 50 ++ crates/lang/src/parser/tests.rs | 2 +- crates/lang/src/tokens.rs | 15 +- crates/lang/src/tokens/tests.rs | 14 +- flake.lock | 30 +- flake.nix | 86 ++-- testfiles/test.owo | 11 +- 16 files changed, 900 insertions(+), 170 deletions(-) create mode 100644 crates/lang/src/parser/ast/ast_tree.rs create mode 100644 crates/lang/src/parser/ast/lossless.rs create mode 100644 crates/lang/src/parser/ast/lossless/lex.rs create mode 100644 crates/lang/src/parser/ast/lossless/parser.rs create mode 100644 crates/lang/src/parser/ast/raw_ast.rs diff --git a/Cargo.lock b/Cargo.lock index e762ead..a25751d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -174,9 +174,9 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "chumsky" -version = "1.0.0-alpha.6" +version = "1.0.0-alpha.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b9c28d4e5dd9a9262a38b231153591da6ce1471b818233f4727985d3dd0ed93c" +checksum = "c7b80276986f86789dc56ca6542d53bba9cda3c66091ebbe7bd96fc1bdf20f1f" dependencies = [ "hashbrown", "regex-automata", @@ -237,6 +237,12 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7" +[[package]] +name = "countme" +version = "3.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7704b5fdd17b18ae31c4c1da5a2e0305a2bf17b5249300a9ee9ed7b72114c636" + [[package]] name = "crc32fast" version = "1.3.2" @@ -322,6 +328,12 @@ dependencies = [ "windows-sys 0.48.0", ] +[[package]] +name = "ego-tree" +version = "0.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a68a4904193147e0a8dec3314640e6db742afd5f6e634f428a6af230d9b3591" + [[package]] name = "either" version = "1.9.0" @@ -506,9 +518,11 @@ dependencies = [ "ariadne", "chumsky", "clap", + "ego-tree", "indexmap", "logos", "petgraph", + "rowan", ] [[package]] @@ -589,6 +603,15 @@ version = "2.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "523dc4f511e55ab87b694dc30d0f820d60906ef06413f93d4d7a1385599cc149" +[[package]] +name = "memoffset" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "488016bfae457b036d996092f6cb448677611ce4449e970ceaf42695203f218a" +dependencies = [ + "autocfg", +] + [[package]] name = "miniz_oxide" version = "0.7.1" @@ -863,6 +886,25 @@ dependencies = [ "serde_derive", ] +[[package]] +name = "rowan" +version = "0.15.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a58fa8a7ccff2aec4f39cc45bf5f985cec7125ab271cf681c279fd00192b49" +dependencies = [ + "countme", + "hashbrown", + "memoffset", + "rustc-hash", + "text-size", +] + +[[package]] +name = "rustc-hash" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" + [[package]] name = "ryu" version = "1.0.16" @@ -973,6 +1015,12 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "text-size" +version = "1.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f18aa187839b2bdb1ad2fa35ead8c4c2976b64e4363c386d45ac0f7ee85c9233" + [[package]] name = "thiserror" version = "1.0.55" diff --git a/crates/lang/Cargo.toml b/crates/lang/Cargo.toml index 1cd8654..8c1d3db 100644 --- a/crates/lang/Cargo.toml +++ b/crates/lang/Cargo.toml @@ -7,11 +7,13 @@ edition = "2021" [dependencies] logos = "0.14" -chumsky = {version= "1.0.0-alpha.6", features=["label"]} +chumsky = {version= "1.0.0-alpha.7", features=["label"]} petgraph = { workspace = true} indexmap = "2.2.6" clap = { version = "4", features = ["derive"] } ariadne = "0.4.0" +ego-tree = "0.6.2" +rowan = "0.15.15" [lints] workspace = true diff --git a/crates/lang/src/main.rs b/crates/lang/src/main.rs index 3e70973..16ec4e4 100644 --- a/crates/lang/src/main.rs +++ b/crates/lang/src/main.rs @@ -1,7 +1,10 @@ use std::{fs, path::PathBuf}; use clap::Parser; -use lang::{err_reporting::ErrorCollector, parser::parse}; +use lang::{ + err_reporting::ErrorCollector, + parser::ast::lossless::{lex, parser}, +}; #[derive(Parser)] struct Args { @@ -13,20 +16,24 @@ fn main() { let args = Args::parse(); let n = args.file.clone(); let f = fs::read_to_string(n.clone()).expect("failed to read file"); - let mut err_collector = ErrorCollector::new(vec![(n.to_str().unwrap(), &f)]); + println!("toks: {:?}", lex::lex(&f)); + let parse_res = parser::parse(&f); + println!("parse: {:?}", parse_res); + // dbg!(lex::lex(&f)); + // let mut err_collector = ErrorCollector::new(vec![(n.to_str().unwrap(), &f)]); - println!("file: {f}\n"); - let parse_res = parse(&f); - err_collector.insert_many( - args.file.to_str().unwrap(), - lang::err_reporting::Stage::Parse, - parse_res - .errors() - .into_iter() - .map(|e| e.to_owned()) - .collect::>(), - ); + // println!("file: {f}\n"); + // let parse_res = parse(&f); + // err_collector.insert_many( + // args.file.to_str().unwrap(), + // lang::err_reporting::Stage::Parse, + // parse_res + // .errors() + // .into_iter() + // .map(|e| e.to_owned()) + // .collect::>(), + // ); - err_collector.report_raw(); - println!("res: {:?}", parse_res); + // err_collector.report_raw(); + // println!("res: {:?}", parse_res); } diff --git a/crates/lang/src/parser.rs b/crates/lang/src/parser.rs index 7c0e406..b1ee34b 100644 --- a/crates/lang/src/parser.rs +++ b/crates/lang/src/parser.rs @@ -1,5 +1,3 @@ -use std::ops::Range; - use chumsky::{ error::Rich, input::{Stream, ValueInput}, @@ -10,19 +8,22 @@ use chumsky::{ IterParser, }; use indexmap::IndexMap; -use logos::{Logos, Source}; +use logos::Logos; use crate::tokens::Token; pub mod ast; #[cfg(test)] mod tests; -use self::ast::{Expr, Expression, File}; +use self::ast::{ + raw_ast::{RawExpr, RawExpression}, + File, +}; pub type Span = SimpleSpan; pub type Spanned = (T, Span); -pub fn parse<'src>(src: &'src str) -> ParseResult, Rich<'_, Token<'_>>> { +pub fn parse(src: &str) -> ParseResult, Rich<'_, Token<'_>>> { let toks: Vec<_> = Token::lexer(src) .spanned() .map(|(t, s)| (t.expect("TODO: add lexer error(s)"), Span::from(s))) @@ -35,22 +36,39 @@ pub(crate) fn parser< 'src: 'tokens, I: ValueInput<'tokens, Token = Token<'src>, Span = Span>, >() -> impl Parser<'tokens, I, File<'src>, extra::Err, Span>>> { - let word = select! { Token::Word(word) => word }; + let word = select! { Token::Word(word) = e => (word, e.span())}; let expr = recursive(|expr| { let lit = select! { - Token::Int(i) = e => Expression::new(Expr::Lit(ast::Lit::Int(i.parse().unwrap())), e.span()), - Token::Float(f) = e => Expression::new(Expr::Lit(ast::Lit::Float(f.parse().unwrap())), e.span()), + Token::Int(i) = e => RawExpression::new(RawExpr::Lit(ast::Lit::Int(i.parse().expect("TODO: handle better"))), e.span()), + Token::Float(f) = e => RawExpression::new(RawExpr::Lit(ast::Lit::Float(f.parse().expect("TODO: handle better"))), e.span()), + Token::String(s) = e => RawExpression::new(RawExpr::Lit(ast::Lit::String(s.strip_prefix('"').expect("a").strip_suffix('"').expect("b"))), e.span()) }; + let mat = just(Token::Mat) + .ignore_then(select! { Token::Dimensions(dimensions) = e => (dimensions, e.span())}) + .then( + lit.separated_by(just(Token::Comma)) + .collect::>() + .separated_by(just(Token::Semicolon)) + .collect::>() + .delimited_by(just(Token::BracketOpen), just(Token::BracketClose)), + ) + .map_with(|(dimensions, data), e| { + // TODO: Validation and proper error handling/reporting + // (validation = validating the matrix dimensions) + RawExpression::new( + RawExpr::Matrix(dimensions, data.into_iter().flatten().collect()), + e.span(), + ) + }); let var = select! { - Token::VarIdent(name) => (Expr::Var as fn(_) -> _, name), - Token::InputIdent(name) => (Expr::InputVar as fn(_) -> _, name) + Token::VarIdent(name) => (RawExpr::Var as fn(_) -> _, name), + Token::InputIdent(name) => (RawExpr::InputVar as fn(_) -> _, name) } - .map_with(|(item_type, name), extra| Expression::new(item_type(name), extra.span())) + .map_with(|(item_type, name), extra| RawExpression::new(item_type(name), extra.span())) .labelled("variable"); let attrset = word - .map_with(|n, e| (n, e.span())) .labelled("attr name") .then_ignore(just(Token::Colon)) .then(expr) @@ -63,57 +81,72 @@ pub(crate) fn parser< .labelled("attrset"); let node = word - .map_with(|v, e| (v, e.span())) + .repeated() + .collect() .then(attrset.clone().or_not()) .map_with(|(name, params), extra| { - Expression::new(Expr::Node(name, params), extra.span()) + RawExpression::new(RawExpr::Node(name, params), extra.span()) }) - .or(var) - .or(attrset - .map_with(|attrset, extra| Expression::new(Expr::AttrSet(attrset), extra.span()))) + // .or(var) + // .or(attrset + // .map_with(|attrset, extra| Expression::new(Expr::AttrSet(attrset), extra.span()))) + // .or(lit) + // .or(mat) .labelled("node"); + let atom = var + .or(lit) + .or(mat) + .or(attrset.map_with(|attrset, extra| { + RawExpression::new(RawExpr::AttrSet(attrset), extra.span()) + })) + .or(node.clone()); + #[allow(clippy::let_and_return)] - let pipeline = node + let pipeline = atom .clone() .then(choice(( - just(Token::Pipe).to(Expr::SimplePipe as fn(_, _) -> _), - just(Token::MappingPipe).to(Expr::MappingPipe as fn(_, _) -> _), - just(Token::NullPipe).to(Expr::NullPipe as fn(_, _) -> _), + just(Token::Pipe).to(RawExpr::SimplePipe as fn(_, _) -> _), + just(Token::MappingPipe).to(RawExpr::MappingPipe as fn(_, _) -> _), + just(Token::NullPipe).to(RawExpr::NullPipe as fn(_, _) -> _), ))) .repeated() - .foldr_with(node, |(curr, pipe), next, extra| { - Expression::new(pipe(Box::new(curr), Box::new(next)), extra.span()) + .foldr_with(atom, |(curr, pipe), next, extra| { + RawExpression::new(pipe(curr, next), extra.span()) }); pipeline }); - let decl = just(Token::Def).ignore_then( - word.map_with(|n, e| (n, e.span())) - .then_ignore(just(Token::Equals)) - .then(expr.clone().map(|expr| expr)) - .then_ignore(just(Token::SemiColon)), - ); + let decls = just(Token::Def) + .ignore_then( + word.then_ignore(just(Token::Equals)) + .then(expr.clone().map(|expr| expr)) + .then_ignore(just(Token::Semicolon)), + ) + .repeated() + .collect::>() + .map(|decls| File { + decls: IndexMap::from_iter(decls), + }); - expr.map(|expr| File { + let single_expr = expr.map(|expr| File { decls: IndexMap::from_iter([(("main", (0..0).into()), expr)]), - }) - .or(decl.repeated().collect::>().map(|decls| File { - decls: IndexMap::from_iter(decls), - })) + }); + + just(Token::Def).rewind().ignore_then(decls).or(single_expr) + // single_expr.or(decls) + + // expr.map(|expr| File { + // decls: IndexMap::from_iter([(("main", (0..0).into()), expr)]), + // }) + // .or(decl.repeated().collect::>().map(|decls| File { + // decls: IndexMap::from_iter(decls), + // })) } pub mod asg { use petgraph::graph::DiGraph; use super::Spanned; - - pub struct Asg<'src> { - graph: DiGraph, String>, - } - - enum AsgNode<'src> { - Node(Spanned<&'src str>), - } } diff --git a/crates/lang/src/parser/ast.rs b/crates/lang/src/parser/ast.rs index d21ad49..d45ef66 100644 --- a/crates/lang/src/parser/ast.rs +++ b/crates/lang/src/parser/ast.rs @@ -2,51 +2,14 @@ use std::collections::{BTreeMap, HashMap}; use indexmap::IndexMap; -use super::{Span, Spanned}; +use super::Spanned; #[derive(Debug, PartialEq)] pub struct File<'src> { - pub decls: IndexMap, Expression<'src>>, + pub decls: IndexMap, raw_ast::RawExpression<'src>>, } -#[derive(Debug, PartialEq)] -pub struct Expression<'src> { - pub expr: Expr<'src>, - pub span: Span, -} - -impl<'src> Expression<'src> { - pub fn new(expr: Expr<'src>, span: Span) -> Self { - Self { expr, span } - } -} - -#[derive(Debug, PartialEq)] -pub enum Expr<'src> { - Node( - Spanned<&'src str>, - Option, Expression<'src>>>>, - ), - SimplePipe(Box>, Box>), - // NamingPipe( - // Box>, - // (Vec>, Vec>), - // Box>, - // ), - MappingPipe(Box>, Box>), - NullPipe(Box>, Box>), - MultiPipe(IndexMap, Expression<'src>>), - // LetIn( - // IndexMap, Box>>, - // Box>, - // ), - // $ - Var(&'src str), - // @ - InputVar(&'src str), - AttrSet(Spanned, Expression<'src>>>), - Lit(Lit<'src>), -} +pub mod raw_ast; #[derive(Debug, PartialEq)] pub enum Lit<'src> { @@ -55,3 +18,7 @@ pub enum Lit<'src> { Float(f64), String(&'src str), } + +pub mod lossless; + +pub mod ast_tree; diff --git a/crates/lang/src/parser/ast/ast_tree.rs b/crates/lang/src/parser/ast/ast_tree.rs new file mode 100644 index 0000000..46db4e9 --- /dev/null +++ b/crates/lang/src/parser/ast/ast_tree.rs @@ -0,0 +1,31 @@ +use ego_tree::Tree; + +use crate::parser::Spanned; + +use super::{File, Lit}; + +pub struct Ast<'src> { + tree: Tree>, +} + +struct AstNode<'src> { + kind: NodeKind<'src>, +} + +enum NodeKind<'src> { + Decl, + Ident(&'src str), + Instr, + Expr, + MappingPipe, + NullPipe, + MultiPipe, + Var(&'src str), + InputVar(&'src str), + AttrSet, + Attr, + Lit(Lit<'src>), + Matrix, + Dimensions(u16, u16), + MatrixRow, +} diff --git a/crates/lang/src/parser/ast/lossless.rs b/crates/lang/src/parser/ast/lossless.rs new file mode 100644 index 0000000..0047441 --- /dev/null +++ b/crates/lang/src/parser/ast/lossless.rs @@ -0,0 +1,19 @@ +use self::lex::SyntaxKind; + +pub mod parser; + +pub mod lex; + +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +enum Lang {} +impl rowan::Language for Lang { + type Kind = SyntaxKind; + #[allow(unsafe_code)] + fn kind_from_raw(raw: rowan::SyntaxKind) -> Self::Kind { + assert!(raw.0 <= SyntaxKind::ROOT as u16); + unsafe { std::mem::transmute::(raw.0) } + } + fn kind_to_raw(kind: Self::Kind) -> rowan::SyntaxKind { + kind.into() + } +} diff --git a/crates/lang/src/parser/ast/lossless/lex.rs b/crates/lang/src/parser/ast/lossless/lex.rs new file mode 100644 index 0000000..c25608a --- /dev/null +++ b/crates/lang/src/parser/ast/lossless/lex.rs @@ -0,0 +1,118 @@ +use logos::Logos; + +use crate::parser::Span; + +pub fn lex(src: &str) -> Vec<(SyntaxKind, &str)> { + let mut lex = SyntaxKind::lexer(src); + let mut r = Vec::new(); + + while let Some(tok_res) = lex.next() { + r.push((tok_res.unwrap_or(SyntaxKind::LEX_ERR), lex.slice())) + } + + r.reverse(); + r +} + +#[derive(Logos, Debug, PartialEq, Eq, Clone, Copy, Hash, PartialOrd, Ord)] +#[repr(u16)] +#[allow(non_camel_case_types)] +pub enum SyntaxKind { + #[token("def")] + DEF_KW = 0, + #[token("let")] + LET_KW, + #[token("in")] + IN_KW, + #[token("mat")] + MAT_KW, + #[regex("[\\d]+x[\\d]+")] + PAT_DIMENSIONS, + #[regex("[\\d]+")] + INT_NUM, + #[regex("[+-]?([\\d]+\\.[\\d]*|[\\d]*\\.[\\d]+)")] + FLOAT_NUM, + #[regex(r#""([^"\\]|\\["\\bnfrt]|u[a-fA-F0-9]{4})*""#)] + STRING, + MATRIX, + DECL, + LIST, + MAT_BODY, + PARENTHESIZED_EXPR, + EXPR, + #[token("(")] + L_PAREN, + #[token(")")] + R_PAREN, + #[token("{")] + L_CURLY, + #[token("}")] + R_CURLY, + #[token("[")] + L_BRACK, + #[token("]")] + R_BRACK, + #[token("<")] + L_ANGLE, + #[token(">")] + R_ANGLE, + #[token("+")] + PLUS, + #[token("-")] + MINUS, + #[token("*")] + STAR, + #[token("/")] + SLASH, + #[token("%")] + PERCENT, + #[token("^")] + CARET, + INSTR, + INSTR_NAME, + INSTR_PARAMS, + ATTR_SET, + ATTR, + ATTR_NAME, + ATTR_VALUE, + #[regex("[a-zA-Z_]+[a-zA-Z_\\-\\d]*")] + IDENT, + #[regex("\\$[a-zA-Z0-9_\\-]+")] + VAR, + #[regex("\\@[a-zA-Z0-9_\\-]+")] + INPUT_VAR, + #[token("$")] + DOLLAR, + #[token("@")] + AT, + #[token(",")] + COMMA, + #[token("|")] + PIPE, + #[token("@|")] + MAPPING_PIPE, + #[token("!|")] + NULL_PIPE, + #[token("=")] + EQ, + #[token(":")] + COLON, + #[token(";")] + SEMICOLON, + #[token(".")] + DOT, + #[token("!")] + BANG, + #[regex("[ \\t\\f]+")] + WHITESPACE, + #[token("\n")] + NEWLINE, + PARSE_ERR, + LEX_ERR, + ROOT, +} +impl From for rowan::SyntaxKind { + fn from(kind: SyntaxKind) -> Self { + Self(kind as u16) + } +} diff --git a/crates/lang/src/parser/ast/lossless/parser.rs b/crates/lang/src/parser/ast/lossless/parser.rs new file mode 100644 index 0000000..a3c30ca --- /dev/null +++ b/crates/lang/src/parser/ast/lossless/parser.rs @@ -0,0 +1,437 @@ +use std::borrow::Borrow; + +use chumsky::container::Container; +use rowan::{ + Checkpoint, GreenNode, GreenNodeBuilder, GreenNodeData, GreenTokenData, Language, NodeOrToken, +}; + +use crate::parser::{ + ast::lossless::{lex::SyntaxKind::*, Lang}, + Span, +}; + +use super::lex::{self, SyntaxKind}; + +#[derive(PartialEq, Eq)] +pub struct Parse { + pub green_node: GreenNode, +} + +impl std::fmt::Debug for Parse { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + debug_print_green_node(NodeOrToken::Node(self.green_node.borrow()), f, 0) + } +} + +fn debug_print_green_node( + node: NodeOrToken<&GreenNodeData, &GreenTokenData>, + f: &mut std::fmt::Formatter<'_>, + lvl: i32, +) -> std::fmt::Result { + for _ in 0..lvl { + f.write_str(" ")?; + } + + match node { + NodeOrToken::Node(n) => { + writeln!(f, "{:?} {{", Lang::kind_from_raw(node.kind())); + for c in n.children() { + debug_print_green_node(c, f, lvl + 1)?; + } + for _ in 0..lvl { + f.write_str(" ")?; + } + f.write_str("}\n") + } + NodeOrToken::Token(t) => { + writeln!(f, "{:?} {:?};", Lang::kind_from_raw(t.kind()), t.text()) + } + } +} + +#[derive(Debug)] +struct Parser<'src> { + tokens: Vec<(SyntaxKind, &'src str)>, + builder: GreenNodeBuilder<'src>, + errors: Vec, +} + +#[derive(Debug, PartialEq, Eq)] +enum SyntaxError { + Expected(SyntaxKind), + AttrExpectedValue, + /// guessed if there's a newline and attr on next line without comma + /// should then suggest comma after attr + ExpectedCommaBetweenAttrs, +} + +pub fn parse(src: &str) -> Parse { + let mut tokens = lex::lex(src); + Parser { + tokens, + builder: GreenNodeBuilder::new(), + errors: Vec::new(), + } + .parse() +} + +impl Parser<'_> { + fn parse(mut self) -> Parse { + self.start_node(ROOT); + + match self.expr(None) { + expr::ExprRes::Ok => (), + expr::ExprRes::Eof => (), + expr::ExprRes::NoExpr => todo!(), + } + + self.builder.finish_node(); + Parse { + green_node: self.builder.finish(), + } + } + + fn start_node(&mut self, kind: SyntaxKind) { + self.builder.start_node(kind.into()); + } + fn finish_node(&mut self) { + self.builder.finish_node(); + } + + /// Advance one token, adding it to the current branch of the tree builder. + fn bump(&mut self) { + let (kind, text) = self.tokens.pop().unwrap(); + self.builder.token(kind.into(), text); + } + fn syntax_err(&mut self, err: SyntaxError) { + let (_, text) = self.tokens.pop().unwrap(); + self.builder.token(PARSE_ERR.into(), text); + self.errors.push(err); + } + fn syntax_err_by_checkpoint(&mut self, checkpoint: Checkpoint, err: SyntaxError) { + self.builder.start_node_at(checkpoint, PARSE_ERR.into()); + self.finish_node(); + self.errors.push(err); + } + fn expected(&mut self, expected: SyntaxKind) { + self.syntax_err(SyntaxError::Expected(expected)) + } + /// Peek at the first unprocessed token + fn current(&self) -> Option { + self.tokens.last().map(|(kind, _)| *kind) + } + fn next(&self) -> Option { + self.tokens + .get(self.tokens.len() - 2) + .map(|(kind, _)| *kind) + } + fn skip_ws(&mut self) { + while self.current() == Some(WHITESPACE) || self.current() == Some(NEWLINE) { + self.bump() + } + } + fn skip_ws_without_newlines(&mut self) { + while self.current() == Some(WHITESPACE) { + self.bump() + } + } +} + +mod expr { + use rowan::Checkpoint; + + use super::{attrset::AttrsetRes, instr::NodeRes, Parser}; + use crate::parser::{ast::lossless::lex::SyntaxKind::*, Span}; + impl Parser<'_> { + pub(super) fn expr(&mut self, start: Option) -> ExprRes { + self.skip_ws(); + let start = start.unwrap_or_else(|| self.builder.checkpoint()); + match self.current() { + Some(IDENT) => { + let expr_res = match self.instr() { + NodeRes::Ok => ExprRes::Ok, + NodeRes::Eof => ExprRes::Eof, + }; + self.builder.start_node_at(start, EXPR.into()); + self.finish_node(); + expr_res + } + Some(_) => self.atom(Some(start)), + None => ExprRes::Eof, + } + } + + pub(super) fn atom(&mut self, start: Option) -> ExprRes { + self.skip_ws(); + let start = start.unwrap_or_else(|| self.builder.checkpoint()); + match self.current() { + Some(INT_NUM | FLOAT_NUM | STRING) => { + self.bump(); + self.builder.start_node_at(start, EXPR.into()); + self.finish_node(); + ExprRes::Ok + } + Some(L_CURLY) => match self.attrset(start) { + AttrsetRes::Ok => ExprRes::Ok, + AttrsetRes::Eof => ExprRes::Eof, + }, + Some(L_PAREN) => { + self.builder.start_node_at(start, PARENTHESIZED_EXPR.into()); + self.bump(); + self.expr(None); + self.skip_ws(); + match self.current() { + Some(R_PAREN) => ExprRes::Ok, + Some(_) => todo!(), + None => ExprRes::Eof, + } + } + Some(_) => ExprRes::NoExpr, + None => ExprRes::Eof, + } + } + } + + pub enum ExprRes { + Ok, + Eof, + /// isnt an expression + NoExpr, + } +} + +mod attrset { + use chumsky::container::Container; + use rowan::Checkpoint; + + use super::{expr::ExprRes, instr::NodeRes, Parser}; + use crate::parser::{ + ast::lossless::{lex::SyntaxKind::*, parser::SyntaxError}, + Span, + }; + impl Parser<'_> { + pub(super) fn attrset(&mut self, checkpoint: Checkpoint) -> AttrsetRes { + assert_eq!(self.current(), Some(L_CURLY)); + self.bump(); + self.skip_ws(); + match self.current() { + Some(R_CURLY) => { + self.builder.start_node_at(checkpoint, ATTR_SET.into()); + self.bump(); + self.finish_node(); + AttrsetRes::Ok + } + Some(_) => { + self.builder.start_node_at(checkpoint, ATTR_SET.into()); + let res = match self.attrs() { + AttrRes::Eof => AttrsetRes::Eof, + AttrRes::RCurly | AttrRes::Ok => { + println!("curr: {:?}", self.current()); + AttrsetRes::Ok + } + }; + self.finish_node(); + res + } + None => AttrsetRes::Eof, + } + // self.start_node(ATTR); + } + + fn attrs(&mut self) -> AttrRes { + let mut res = AttrRes::Ok; + + while res == AttrRes::Ok { + println!("it: {:?}", self.tokens.last()); + match self.attr() { + AttrRes::Ok => { + self.skip_ws_without_newlines(); + println!( + "a: {:?}, {:?}", + self.tokens.last(), + self.tokens.get(self.tokens.len() - 2) + ); + println!("errs: {:?}", self.errors); + res = AttrRes::Ok; + let checkpoint_previous_end = self.builder.checkpoint(); + res = match self.current() { + Some(COMMA) => { + self.bump(); + AttrRes::Ok + } + Some(R_CURLY) => { + self.bump(); + res = AttrRes::Ok; + break; + } + Some(NEWLINE) => { + self.skip_ws(); + println!( + "b: {:?}, {:?}", + self.tokens.last(), + self.tokens.get(self.tokens.len() - 2) + ); + match self.current() { + Some(COMMA) => { + self.bump(); + AttrRes::Ok + } + Some(R_CURLY) => { + self.bump(); + res = AttrRes::Ok; + break; + } + Some(IDENT) => { + println!("wtf"); + self.syntax_err_by_checkpoint( + checkpoint_previous_end, + SyntaxError::ExpectedCommaBetweenAttrs, + ); + // self.syntax_err(SyntaxError::ExpectedCommaBetweenAttrs); + AttrRes::Ok + } + Some(_) => { + self.bump(); + AttrRes::Ok + } + None => { + res = AttrRes::Eof; + break; + } + } + } + Some(_) => { + self.bump(); + println!( + "c: {:?}, {:?}", + self.tokens.last(), + self.tokens.get(self.tokens.len() - 2) + ); + AttrRes::Ok + } + None => { + res = AttrRes::Eof; + break; + } + } + } + AttrRes::Eof => { + res = AttrRes::Eof; + break; + } + AttrRes::RCurly => { + res = AttrRes::RCurly; + break; + } + } + } + println!("toks_left: {:?}", self.tokens); + res + } + + fn attr(&mut self) -> AttrRes { + self.skip_ws(); + self.start_node(ATTR); + self.start_node(ATTR_NAME); + match self.current() { + Some(IDENT) => self.bump(), + Some(R_CURLY) => return AttrRes::Ok, + Some(_) => self.expected(IDENT), + None => return AttrRes::Eof, + } + self.finish_node(); + self.skip_ws(); + match self.current() { + Some(COLON) => self.bump(), + Some(R_CURLY) => { + self.expected(COLON); + return AttrRes::RCurly; + } + Some(_) => self.expected(COLON), + None => return AttrRes::Eof, + } + self.skip_ws(); + self.start_node(ATTR_VALUE); + match self.expr(None) { + ExprRes::Ok => self.bump(), + ExprRes::Eof => return AttrRes::Eof, + ExprRes::NoExpr => match self.current() { + Some(COMMA) => self.syntax_err(SyntaxError::AttrExpectedValue), + Some(R_CURLY) => { + self.syntax_err(SyntaxError::AttrExpectedValue); + return AttrRes::RCurly; + } + Some(_) => self.expected(EXPR), + None => unreachable!(), + }, + } + self.finish_node(); + self.finish_node(); + AttrRes::Ok + } + } + + #[derive(PartialEq, Eq)] + pub enum AttrsetRes { + Ok, + Eof, + } + + #[derive(PartialEq, Eq)] + enum AttrRes { + Ok, + Eof, + RCurly, + } +} + +mod instr { + use super::Parser; + use crate::parser::{ + ast::lossless::{lex::SyntaxKind::*, parser::expr::ExprRes}, + Span, + }; + + impl Parser<'_> { + pub(super) fn instr(&mut self) -> NodeRes { + assert_eq!(self.current(), Some(IDENT)); + self.skip_ws(); + self.start_node(INSTR); + self.instr_name(); + + // used to count positionals + let mut i = 0; + let params_checkpoint = self.builder.checkpoint(); + loop { + match self.expr(None) { + ExprRes::Ok => { + i += 1; + continue; + } + ExprRes::NoExpr | ExprRes::Eof => break, + } + } + if i >= 1 { + self.builder + .start_node_at(params_checkpoint, INSTR_PARAMS.into()); + self.finish_node(); + } + self.finish_node(); + NodeRes::Ok + } + + fn instr_name(&mut self) { + self.start_node(INSTR_NAME); + while self.current() == Some(IDENT) { + self.bump(); + self.skip_ws_without_newlines(); + } + self.finish_node(); + } + } + + pub(super) enum NodeRes { + Ok, + Eof, + } +} diff --git a/crates/lang/src/parser/ast/raw_ast.rs b/crates/lang/src/parser/ast/raw_ast.rs new file mode 100644 index 0000000..a0ec749 --- /dev/null +++ b/crates/lang/src/parser/ast/raw_ast.rs @@ -0,0 +1,50 @@ +use indexmap::IndexMap; + +use super::super::Spanned; + +use super::super::Span; +use super::Lit; + +#[derive(Debug, PartialEq)] +pub struct RawExpression<'src> { + pub expr: Box>, + pub span: Span, +} + +impl<'src> RawExpression<'src> { + pub fn new(expr: RawExpr<'src>, span: Span) -> Self { + Self { + expr: Box::new(expr), + span, + } + } +} + +#[derive(Debug, PartialEq)] +pub enum RawExpr<'src> { + Node( + Vec>, + Option, RawExpression<'src>>>>, + ), + SimplePipe(RawExpression<'src>, RawExpression<'src>), + // NamingPipe( + // Box>, + // (Vec>, Vec>), + // Box>, + // ), + MappingPipe(RawExpression<'src>, RawExpression<'src>), + NullPipe(RawExpression<'src>, RawExpression<'src>), + MultiPipe(IndexMap, RawExpression<'src>>), + // LetIn( + // IndexMap, Box>>, + // Box>, + // ), + // $ + Var(&'src str), + // @ + InputVar(&'src str), + AttrSet(Spanned, RawExpression<'src>>>), + Lit(Lit<'src>), + Matrix(Spanned<(u16, u16)>, Vec>), + List(Vec>), +} diff --git a/crates/lang/src/parser/tests.rs b/crates/lang/src/parser/tests.rs index 2bd2779..5bd221f 100644 --- a/crates/lang/src/parser/tests.rs +++ b/crates/lang/src/parser/tests.rs @@ -1,4 +1,4 @@ -use crate::parser::ast::{Expr, File}; +use crate::parser::ast::File; use crate::parser::parse; use crate::tokens::Token; use chumsky::input::Stream; diff --git a/crates/lang/src/tokens.rs b/crates/lang/src/tokens.rs index 6caa57c..3314916 100644 --- a/crates/lang/src/tokens.rs +++ b/crates/lang/src/tokens.rs @@ -14,6 +14,14 @@ pub enum Token<'a> { Let, #[token("in")] In, + #[token("mat")] + Mat, + #[regex("[\\d]+x[\\d]+", |lex| { + let (x, y) = lex.slice().split_once('x').expect("shouldn't fail to split"); + // TODO: handle overflows etc + (x.parse().expect("should only match valid u16s"), y.parse().expect("should only match valid u16s")) + })] + Dimensions((u16, u16)), #[regex("[\\d]+", |lex| lex.slice())] Int(&'a str), #[regex("[+-]?([\\d]+\\.[\\d]*|[\\d]*\\.[\\d]+)", |lex| lex.slice())] @@ -30,12 +38,11 @@ pub enum Token<'a> { Mult, #[token("/")] Div, - #[regex("[a-zA-Z_]+[a-zA-Z0-9_\\-]*", |lex| lex.slice())] + // TODO: figure out how to allow numbers in words? + #[regex("[a-zA-Z_]+[a-zA-Z_\\-\\d]*", |lex| lex.slice().trim())] Word(&'a str), #[regex("\\$[a-zA-Z0-9_\\-]+", |lex| &lex.slice()[1..])] VarIdent(&'a str), - #[token("@..")] - InputSpread, #[regex("\\@[a-zA-Z0-9_\\-]+", |lex| &lex.slice()[1..])] InputIdent(&'a str), #[token(",")] @@ -55,7 +62,7 @@ pub enum Token<'a> { #[token(":")] Colon, #[token(";")] - SemiColon, + Semicolon, #[token("[")] BracketOpen, #[token("]")] diff --git a/crates/lang/src/tokens/tests.rs b/crates/lang/src/tokens/tests.rs index 2b0454f..3b35ace 100644 --- a/crates/lang/src/tokens/tests.rs +++ b/crates/lang/src/tokens/tests.rs @@ -34,7 +34,7 @@ lexer_test! { lexer_test! { test_lex_subgroup, - "subgroup(first, second) = a | b { in1: $first } | c { in1: $second }", + "subgroup(first, second) = a | b [ $first ] | c [ $second ]", [ Token::Word("subgroup"), Token::ParenOpen, @@ -46,18 +46,14 @@ lexer_test! { Token::Word("a"), Token::Pipe, Token::Word("b"), - Token::BraceOpen, - Token::Word("in1"), - Token::Colon, + Token::BracketOpen, Token::VarIdent("first"), - Token::BraceClose, + Token::BracketClose, Token::Pipe, Token::Word("c"), - Token::BraceOpen, - Token::Word("in1"), - Token::Colon, + Token::BracketOpen, Token::VarIdent("second"), - Token::BraceClose + Token::BracketClose ] } diff --git a/flake.lock b/flake.lock index 43f154e..7c3a06d 100644 --- a/flake.lock +++ b/flake.lock @@ -33,11 +33,11 @@ "pre-commit-hooks": "pre-commit-hooks_2" }, "locked": { - "lastModified": 1712579011, - "narHash": "sha256-trHgFNW8CW85c1OuAPBI+OGous53KkVhMemvcq7syDo=", + "lastModified": 1712724616, + "narHash": "sha256-qs9uEbrOpp6oXcDOp5cpilyU52t78ZpEPATtaHRVLIU=", "owner": "cachix", "repo": "devenv", - "rev": "a71323c618664a6b7a39bc183b0ce22ac8511cf9", + "rev": "d1a11d14dbe96a03c7f9068e4d3af05f283734e0", "type": "github" }, "original": { @@ -83,11 +83,11 @@ "rust-analyzer-src": "rust-analyzer-src" }, "locked": { - "lastModified": 1712384501, - "narHash": "sha256-AZmYmEnc1ZkSlxUJVUtGh9VFAqWPr+xtNIiBqD2eKfc=", + "lastModified": 1712730246, + "narHash": "sha256-iB8bFj+07RHpmt+XuGGvYQk2Iwm12u6+DklGq/+Tg5s=", "owner": "nix-community", "repo": "fenix", - "rev": "99c6241db5ca5363c05c8f4acbdf3a4e8fc42844", + "rev": "d402ae4a5e5676722290470f61a5e8e3155b5487", "type": "github" }, "original": { @@ -447,11 +447,11 @@ }, "nixpkgs_3": { "locked": { - "lastModified": 1712163089, - "narHash": "sha256-Um+8kTIrC19vD4/lUCN9/cU9kcOsD1O1m+axJqQPyMM=", + "lastModified": 1712608508, + "narHash": "sha256-vMZ5603yU0wxgyQeHJryOI+O61yrX2AHwY6LOFyV1gM=", "owner": "nixos", "repo": "nixpkgs", - "rev": "fd281bd6b7d3e32ddfa399853946f782553163b5", + "rev": "4cba8b53da471aea2ab2b0c1f30a81e7c451f4b6", "type": "github" }, "original": { @@ -463,11 +463,11 @@ }, "nixpkgs_4": { "locked": { - "lastModified": 1712439257, - "narHash": "sha256-aSpiNepFOMk9932HOax0XwNxbA38GOUVOiXfUVPOrck=", + "lastModified": 1712608508, + "narHash": "sha256-vMZ5603yU0wxgyQeHJryOI+O61yrX2AHwY6LOFyV1gM=", "owner": "NixOS", "repo": "nixpkgs", - "rev": "ff0dbd94265ac470dda06a657d5fe49de93b4599", + "rev": "4cba8b53da471aea2ab2b0c1f30a81e7c451f4b6", "type": "github" }, "original": { @@ -567,11 +567,11 @@ "rust-analyzer-src": { "flake": false, "locked": { - "lastModified": 1712156296, - "narHash": "sha256-St7ZQrkrr5lmQX9wC1ZJAFxL8W7alswnyZk9d1se3Us=", + "lastModified": 1712663608, + "narHash": "sha256-tN9ZL6kGppmHg84lxlpAlaN+kXWNctKK7Yitq/iXDEw=", "owner": "rust-lang", "repo": "rust-analyzer", - "rev": "8e581ac348e223488622f4d3003cb2bd412bf27e", + "rev": "a5feb4f05f09adca661c869b1bf2324898cbaa43", "type": "github" }, "original": { diff --git a/flake.nix b/flake.nix index da80ec2..9d43d6e 100644 --- a/flake.nix +++ b/flake.nix @@ -11,41 +11,59 @@ extra-substituters = "https://devenv.cachix.org"; }; - outputs = { self, nixpkgs, devenv, systems, ... } @ inputs: - let - forEachSystem = nixpkgs.lib.genAttrs (import systems); - in - { - devShells = forEachSystem - (system: - let - pkgs = nixpkgs.legacyPackages.${system}; - in - { - default = devenv.lib.mkShell { - inherit inputs pkgs; - modules = [ - ({pkgs, config, ...}: { - languages.rust = { - enable = true; - channel = "nightly"; - }; + outputs = { + self, + nixpkgs, + devenv, + systems, + ... + } @ inputs: let + forEachSystem = nixpkgs.lib.genAttrs (import systems); + in { + devShells = + forEachSystem + (system: let + pkgs = nixpkgs.legacyPackages.${system}; + in { + default = devenv.lib.mkShell { + inherit inputs pkgs; + modules = [ + ({ + pkgs, + config, + ... + }: { + languages.rust = { + enable = true; + channel = "nightly"; + components = [ + "rustc" + "cargo" + "clippy" + "rustfmt" + "rust-src" + ]; + }; - pre-commit.hooks = { - clippy.enable = true; - rustfmt.enable = true; - }; + pre-commit.hooks = { + clippy.enable = true; + rustfmt.enable = true; + }; - packages = with pkgs; [ - just nushell - ripgrep - typst typst-lsp - mold - cargo-nextest cargo-watch - ]; - }) + packages = with pkgs; [ + just + nushell + ripgrep + typst + typst-lsp + mold + cargo-nextest + cargo-watch + rust-analyzer ]; - }; - }); - }; + }) + ]; + }; + }); + }; } diff --git a/testfiles/test.owo b/testfiles/test.owo index 809a6e1..3662b45 100644 --- a/testfiles/test.owo +++ b/testfiles/test.owo @@ -1,7 +1,4 @@ -def blend1 = [ - open "test.png", - open "test2.png" - ] - | blend multiply 0.6 - -def blend2 = open "test.png" | blend multiply 0.6 [ open test2.png ] +meow mew meow 5 3.14 "uwu" { + meow: test 24 + another: hi "hello", +} "awa"