From 381ab45edcf496ec1ed6b564602e41a0a52ecf66 Mon Sep 17 00:00:00 2001 From: Schrottkatze Date: Wed, 24 Apr 2024 11:07:38 +0200 Subject: [PATCH] lang: rewrite parser --- Cargo.lock | 215 +++++-------- crates/lang/Cargo.toml | 3 +- crates/lang/src/err_reporting.rs | 88 ------ crates/lang/src/lib.rs | 4 +- crates/lang/src/main.rs | 24 +- crates/lang/src/parser.rs | 257 ++++++++-------- crates/lang/src/parser/ast.rs | 24 -- crates/lang/src/parser/ast/ast_tree.rs | 31 -- crates/lang/src/parser/ast/lossless.rs | 19 -- crates/lang/src/parser/ast/lossless/parser.rs | 290 ------------------ crates/lang/src/parser/ast/raw_ast.rs | 50 --- crates/lang/src/parser/error.rs | 6 + crates/lang/src/parser/events.rs | 23 ++ crates/lang/src/parser/grammar.rs | 14 + crates/lang/src/parser/grammar/expression.rs | 14 + .../parser/grammar/expression/instruction.rs | 30 ++ .../lang/src/parser/grammar/expression/lit.rs | 20 ++ crates/lang/src/parser/input.rs | 61 ++++ crates/lang/src/parser/output.rs | 113 +++++++ crates/lang/src/parser/parser.rs | 6 + .../{ast/lossless/lex.rs => syntax_kind.rs} | 27 +- crates/lang/src/parser/tests.rs | 142 --------- crates/lang/src/tokens.rs | 81 ----- crates/lang/src/tokens/tests.rs | 135 -------- testfiles/test.owo | 8 +- 25 files changed, 524 insertions(+), 1161 deletions(-) delete mode 100644 crates/lang/src/err_reporting.rs delete mode 100644 crates/lang/src/parser/ast.rs delete mode 100644 crates/lang/src/parser/ast/ast_tree.rs delete mode 100644 crates/lang/src/parser/ast/lossless.rs delete mode 100644 crates/lang/src/parser/ast/lossless/parser.rs delete mode 100644 crates/lang/src/parser/ast/raw_ast.rs create mode 100644 crates/lang/src/parser/error.rs create mode 100644 crates/lang/src/parser/events.rs create mode 100644 crates/lang/src/parser/grammar.rs create mode 100644 crates/lang/src/parser/grammar/expression.rs create mode 100644 crates/lang/src/parser/grammar/expression/instruction.rs create mode 100644 crates/lang/src/parser/grammar/expression/lit.rs create mode 100644 crates/lang/src/parser/input.rs create mode 100644 crates/lang/src/parser/output.rs create mode 100644 crates/lang/src/parser/parser.rs rename crates/lang/src/parser/{ast/lossless/lex.rs => syntax_kind.rs} (74%) delete mode 100644 crates/lang/src/tokens.rs delete mode 100644 crates/lang/src/tokens/tests.rs diff --git a/Cargo.lock b/Cargo.lock index a25751d..d759c0b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8,33 +8,6 @@ version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" -[[package]] -name = "ahash" -version = "0.8.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011" -dependencies = [ - "cfg-if", - "once_cell", - "version_check", - "zerocopy", -] - -[[package]] -name = "aho-corasick" -version = "1.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" -dependencies = [ - "memchr", -] - -[[package]] -name = "allocator-api2" -version = "0.2.16" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0942ffc6dcaadf03badf6e6a2d0228460359d5e34b57ccdc720b7382dfbd5ec5" - [[package]] name = "anstream" version = "0.6.5" @@ -160,31 +133,12 @@ version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" -[[package]] -name = "cc" -version = "1.0.90" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8cd6604a82acf3039f1144f54b8eb34e91ffba622051189e71b781822d5ee1f5" - [[package]] name = "cfg-if" version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" -[[package]] -name = "chumsky" -version = "1.0.0-alpha.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c7b80276986f86789dc56ca6542d53bba9cda3c66091ebbe7bd96fc1bdf20f1f" -dependencies = [ - "hashbrown", - "regex-automata", - "serde", - "stacker", - "unicode-ident", -] - [[package]] name = "clap" version = "4.4.12" @@ -298,6 +252,40 @@ dependencies = [ "phf", ] +[[package]] +name = "darling" +version = "0.20.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "54e36fcd13ed84ffdfda6f5be89b31287cbb80c439841fe69e04841435464391" +dependencies = [ + "darling_core", + "darling_macro", +] + +[[package]] +name = "darling_core" +version = "0.20.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c2cf1c23a687a1feeb728783b993c4e1ad83d99f351801977dd809b48d0a70f" +dependencies = [ + "fnv", + "ident_case", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "darling_macro" +version = "0.20.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a668eda54683121533a393014d8692171709ff57a7d61f187b6e782719f8933f" +dependencies = [ + "darling_core", + "quote", + "syn", +] + [[package]] name = "deranged" version = "0.3.11" @@ -328,6 +316,12 @@ dependencies = [ "windows-sys 0.48.0", ] +[[package]] +name = "drop_bomb" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9bda8e21c04aca2ae33ffc2fd8c23134f3cac46db123ba97bd9d3f3b8a4a85e1" + [[package]] name = "ego-tree" version = "0.6.2" @@ -340,6 +334,27 @@ version = "1.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a26ae43d7bcc3b814de94796a5e736d4029efb0ee900c12e2d54c993ad1a1e07" +[[package]] +name = "enumset" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "226c0da7462c13fb57e5cc9e0dc8f0635e7d27f276a3a7fd30054647f669007d" +dependencies = [ + "enumset_derive", +] + +[[package]] +name = "enumset_derive" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e08b6c6ab82d70f08844964ba10c7babb716de2ecaeab9be5717918a5177d3af" +dependencies = [ + "darling", + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "equivalent" version = "1.0.1" @@ -447,10 +462,6 @@ name = "hashbrown" version = "0.14.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "290f1a1d9242c78d09ce40a5e87e7554ee637af1351968159f4952f028f75604" -dependencies = [ - "ahash", - "allocator-api2", -] [[package]] name = "heck" @@ -458,6 +469,12 @@ version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" +[[package]] +name = "ident_case" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" + [[package]] name = "image" version = "0.24.7" @@ -516,9 +533,10 @@ name = "lang" version = "0.1.0" dependencies = [ "ariadne", - "chumsky", "clap", + "drop_bomb", "ego-tree", + "enumset", "indexmap", "logos", "petgraph", @@ -584,7 +602,7 @@ dependencies = [ "lazy_static", "proc-macro2", "quote", - "regex-syntax 0.8.2", + "regex-syntax", "syn", ] @@ -661,12 +679,6 @@ dependencies = [ "libc", ] -[[package]] -name = "once_cell" -version = "1.19.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" - [[package]] name = "option-ext" version = "0.2.0" @@ -759,15 +771,6 @@ dependencies = [ "unicode-ident", ] -[[package]] -name = "psm" -version = "0.1.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5787f7cda34e3033a72192c018bc5883100330f362ef279a8cbccfce8bb4e874" -dependencies = [ - "cc", -] - [[package]] name = "qoi" version = "0.4.1" @@ -851,23 +854,6 @@ dependencies = [ "thiserror", ] -[[package]] -name = "regex-automata" -version = "0.3.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "59b23e92ee4318893fa3fe3e6fb365258efbfe6ac6ab30f090cdcbb7aa37efa9" -dependencies = [ - "aho-corasick", - "memchr", - "regex-syntax 0.7.5", -] - -[[package]] -name = "regex-syntax" -version = "0.7.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dbb5fb1acd8a1a18b3dd5be62d25485eb770e05afb408a9627d14d451bae12da" - [[package]] name = "regex-syntax" version = "0.8.2" @@ -975,19 +961,6 @@ dependencies = [ "lock_api", ] -[[package]] -name = "stacker" -version = "0.1.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c886bd4480155fd3ef527d45e9ac8dd7118a898a46530b7b94c3e21866259fce" -dependencies = [ - "cc", - "cfg-if", - "libc", - "psm", - "winapi", -] - [[package]] name = "strsim" version = "0.10.0" @@ -1090,12 +1063,6 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a" -[[package]] -name = "version_check" -version = "0.9.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" - [[package]] name = "wasi" version = "0.11.0+wasi-snapshot-preview1" @@ -1108,28 +1075,6 @@ version = "0.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9193164d4de03a926d909d3bc7c30543cecb35400c02114792c2cae20d5e2dbb" -[[package]] -name = "winapi" -version = "0.3.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" -dependencies = [ - "winapi-i686-pc-windows-gnu", - "winapi-x86_64-pc-windows-gnu", -] - -[[package]] -name = "winapi-i686-pc-windows-gnu" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" - -[[package]] -name = "winapi-x86_64-pc-windows-gnu" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" - [[package]] name = "windows-sys" version = "0.48.0" @@ -1268,26 +1213,6 @@ version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09041cd90cf85f7f8b2df60c646f853b7f535ce68f85244eb6731cf89fa498ec" -[[package]] -name = "zerocopy" -version = "0.7.32" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "74d4d3961e53fa4c9a25a8637fc2bfaf2595b3d3ae34875568a5cf64787716be" -dependencies = [ - "zerocopy-derive", -] - -[[package]] -name = "zerocopy-derive" -version = "0.7.32" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ce1b18ccd8e73a9321186f97e46f9f04b778851177567b1975109d26a08d2a6" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - [[package]] name = "zune-inflate" version = "0.2.54" diff --git a/crates/lang/Cargo.toml b/crates/lang/Cargo.toml index 3e03209..fba2bcf 100644 --- a/crates/lang/Cargo.toml +++ b/crates/lang/Cargo.toml @@ -7,13 +7,14 @@ edition = "2021" [dependencies] logos = "0.14" -chumsky = {version= "1.0.0-alpha.7", features=["label", "extension"]} petgraph = { workspace = true} indexmap = "2.2.6" clap = { version = "4", features = ["derive"] } ariadne = "0.4.0" ego-tree = "0.6.2" rowan = "0.15.15" +drop_bomb = "0.1.5" +enumset = "1.1.3" [lints] workspace = true diff --git a/crates/lang/src/err_reporting.rs b/crates/lang/src/err_reporting.rs deleted file mode 100644 index ebf7866..0000000 --- a/crates/lang/src/err_reporting.rs +++ /dev/null @@ -1,88 +0,0 @@ -use std::{collections::HashMap, fs}; - -use ariadne::{sources, Label, Report, Source}; -use chumsky::{ - error::{self, Rich}, - ParseResult, -}; -use indexmap::IndexMap; - -use crate::{ - parser::{ast::File, Span}, - tokens::Token, -}; - -#[derive(Debug, PartialEq, Eq, Hash)] -pub enum Stage { - Lex, - Parse, -} - -impl Stage { - fn variants() -> [Stage; 2] { - [Stage::Lex, Stage::Parse] - } -} - -pub struct ErrorCollector<'filename, 'tokens, 'src> { - files: HashMap<&'filename str, &'src str>, - raw_errors: IndexMap<(&'filename str, Stage), Vec, Span>>>, -} - -impl<'filename, 'tokens, 'src> ErrorCollector<'filename, 'tokens, 'src> { - pub fn new(files: Vec<(&'filename str, &'src str)>) -> Self { - Self { - files: HashMap::from_iter(files.clone()), - raw_errors: files - .iter() - .flat_map(|(name, _)| Stage::variants().map(|s| (name, s))) - .map(|(name, stage)| ((*name, stage), Vec::new())) - .collect(), - } - } - - pub fn insert_many( - &mut self, - file: &'filename str, - curr_stage: Stage, - mut errs: Vec, Span>>, - ) { - let err_vec = self - .raw_errors - .get_mut(&(file, curr_stage)) - .expect("filename should exist"); - err_vec.append(&mut errs); - } - - pub fn analyze_and_report(self) { - let ErrorCollector { files, raw_errors } = self; - todo!() - } - - pub fn report_raw(self) { - let ErrorCollector { files, raw_errors } = self; - - for ((file, stage), errs) in raw_errors.into_iter() { - for err in errs { - eprintln!("e: {err:?}"); - Report::build(ariadne::ReportKind::Error, file, err.span().start) - .with_message(format!("error at stage {stage:?}, {:?}", err.reason())) - .with_label( - Label::new((file, err.span().into_range())).with_message(format!( - "found: {:?}", - err.found().expect("errors should have a reason") - )), - ) - .with_help(format!( - "expected: {:?}", - err.expected().collect::>() - )) - .finish() - .print((file, Source::from(files[file]))); - } - } - } -} - -#[derive(Debug, PartialEq, Eq)] -struct Loc<'filename>(&'filename str, Span); diff --git a/crates/lang/src/lib.rs b/crates/lang/src/lib.rs index ebf22d3..1b08789 100644 --- a/crates/lang/src/lib.rs +++ b/crates/lang/src/lib.rs @@ -1,4 +1,2 @@ -#![feature(type_alias_impl_trait)] -pub mod err_reporting; +#![feature(type_alias_impl_trait, lint_reasons)] pub mod parser; -pub mod tokens; diff --git a/crates/lang/src/main.rs b/crates/lang/src/main.rs index 346862e..6b1caa0 100644 --- a/crates/lang/src/main.rs +++ b/crates/lang/src/main.rs @@ -1,12 +1,9 @@ +use clap::Parser; use std::{fs, path::PathBuf}; -use clap::Parser; -use lang::{ - err_reporting::ErrorCollector, - parser::ast::lossless::{ - lex, - parser::{self, parse}, - }, +use lang::parser::{ + parser::{self, grammar, input, output::Output}, + syntax_kind, }; #[derive(Parser)] @@ -19,8 +16,17 @@ fn main() { let args = Args::parse(); let n = args.file.clone(); let f = fs::read_to_string(n.clone()).expect("failed to read file"); - println!("toks: {:?}", lex::lex(&f)); - println!("parse res: {:?}", parse(&f)); + + let toks = dbg!(syntax_kind::lex(&f)); + let input = input::Input::new(&toks); + let mut parser = parser::Parser::new(input); + + grammar::source_file(&mut parser); + + let p_out = dbg!(parser.finish()); + let o = Output::from_parser_output(toks, p_out); + + println!("Out: {:?}", o); // let parse_res = parser::parse(&f); // println!("parse: {:?}", parse_res); diff --git a/crates/lang/src/parser.rs b/crates/lang/src/parser.rs index b1ee34b..e850ab0 100644 --- a/crates/lang/src/parser.rs +++ b/crates/lang/src/parser.rs @@ -1,152 +1,143 @@ -use chumsky::{ - error::Rich, - input::{Stream, ValueInput}, - prelude::*, - primitive::just, - recursive::recursive, - span::SimpleSpan, - IterParser, -}; -use indexmap::IndexMap; -use logos::Logos; +use drop_bomb::DropBomb; -use crate::tokens::Token; +use self::{error::SyntaxError, events::Event, input::Input, syntax_kind::SyntaxKind}; -pub mod ast; +pub mod syntax_kind; #[cfg(test)] mod tests; -use self::ast::{ - raw_ast::{RawExpr, RawExpression}, - File, -}; -pub type Span = SimpleSpan; -pub type Spanned = (T, Span); +pub mod error; +pub mod events; +pub mod grammar; +pub mod input; +pub mod output; -pub fn parse(src: &str) -> ParseResult, Rich<'_, Token<'_>>> { - let toks: Vec<_> = Token::lexer(src) - .spanned() - .map(|(t, s)| (t.expect("TODO: add lexer error(s)"), Span::from(s))) - .collect(); - let tok_stream = Stream::from_iter(toks).spanned((src.len()..src.len()).into()); - parser().parse(tok_stream) +pub struct Parser<'src, 'toks> { + input: Input<'src, 'toks>, + pos: usize, + events: Vec, + errors: Vec, } -pub(crate) fn parser< - 'tokens, - 'src: 'tokens, - I: ValueInput<'tokens, Token = Token<'src>, Span = Span>, ->() -> impl Parser<'tokens, I, File<'src>, extra::Err, Span>>> { - let word = select! { Token::Word(word) = e => (word, e.span())}; - let expr = recursive(|expr| { - let lit = select! { - Token::Int(i) = e => RawExpression::new(RawExpr::Lit(ast::Lit::Int(i.parse().expect("TODO: handle better"))), e.span()), - Token::Float(f) = e => RawExpression::new(RawExpr::Lit(ast::Lit::Float(f.parse().expect("TODO: handle better"))), e.span()), - Token::String(s) = e => RawExpression::new(RawExpr::Lit(ast::Lit::String(s.strip_prefix('"').expect("a").strip_suffix('"').expect("b"))), e.span()) - }; - let mat = just(Token::Mat) - .ignore_then(select! { Token::Dimensions(dimensions) = e => (dimensions, e.span())}) - .then( - lit.separated_by(just(Token::Comma)) - .collect::>() - .separated_by(just(Token::Semicolon)) - .collect::>() - .delimited_by(just(Token::BracketOpen), just(Token::BracketClose)), - ) - .map_with(|(dimensions, data), e| { - // TODO: Validation and proper error handling/reporting - // (validation = validating the matrix dimensions) - RawExpression::new( - RawExpr::Matrix(dimensions, data.into_iter().flatten().collect()), - e.span(), - ) - }); - let var = select! { - Token::VarIdent(name) => (RawExpr::Var as fn(_) -> _, name), - Token::InputIdent(name) => (RawExpr::InputVar as fn(_) -> _, name) +impl<'src, 'toks> Parser<'src, 'toks> { + pub fn new(input: Input<'src, 'toks>) -> Self { + Self { + input, + pos: 0, + events: Vec::new(), + errors: Vec::new(), } - .map_with(|(item_type, name), extra| RawExpression::new(item_type(name), extra.span())) - .labelled("variable"); + } - let attrset = word - .labelled("attr name") - .then_ignore(just(Token::Colon)) - .then(expr) - .labelled("attr body") - .separated_by(just(Token::Comma)) - .collect::>() - .map(IndexMap::from_iter) - .delimited_by(just(Token::BraceOpen), just(Token::BraceClose)) - .map_with(|v, e| (v, e.span())) - .labelled("attrset"); + pub fn finish(self) -> (Vec, Vec) { + (self.events, self.errors) + } - let node = word - .repeated() - .collect() - .then(attrset.clone().or_not()) - .map_with(|(name, params), extra| { - RawExpression::new(RawExpr::Node(name, params), extra.span()) - }) - // .or(var) - // .or(attrset - // .map_with(|attrset, extra| Expression::new(Expr::AttrSet(attrset), extra.span()))) - // .or(lit) - // .or(mat) - .labelled("node"); + pub(crate) fn nth(&self, n: usize) -> SyntaxKind { + self.input.kind(self.pos + n) + } - let atom = var - .or(lit) - .or(mat) - .or(attrset.map_with(|attrset, extra| { - RawExpression::new(RawExpr::AttrSet(attrset), extra.span()) - })) - .or(node.clone()); - - #[allow(clippy::let_and_return)] - let pipeline = atom - .clone() - .then(choice(( - just(Token::Pipe).to(RawExpr::SimplePipe as fn(_, _) -> _), - just(Token::MappingPipe).to(RawExpr::MappingPipe as fn(_, _) -> _), - just(Token::NullPipe).to(RawExpr::NullPipe as fn(_, _) -> _), - ))) - .repeated() - .foldr_with(atom, |(curr, pipe), next, extra| { - RawExpression::new(pipe(curr, next), extra.span()) - }); - - pipeline - }); - - let decls = just(Token::Def) - .ignore_then( - word.then_ignore(just(Token::Equals)) - .then(expr.clone().map(|expr| expr)) - .then_ignore(just(Token::Semicolon)), - ) - .repeated() - .collect::>() - .map(|decls| File { - decls: IndexMap::from_iter(decls), + pub fn eat_succeeding_ws(&mut self) { + self.push_ev(Event::Eat { + count: self.input.meaningless_tail_len(), }); + } - let single_expr = expr.map(|expr| File { - decls: IndexMap::from_iter([(("main", (0..0).into()), expr)]), - }); + pub(crate) fn current(&self) -> SyntaxKind { + self.input.kind(self.pos) + } - just(Token::Def).rewind().ignore_then(decls).or(single_expr) - // single_expr.or(decls) + pub(crate) fn start(&mut self) -> Marker { + let pos = self.events.len(); + self.push_ev(Event::tombstone()); + Marker::new(pos) + } - // expr.map(|expr| File { - // decls: IndexMap::from_iter([(("main", (0..0).into()), expr)]), - // }) - // .or(decl.repeated().collect::>().map(|decls| File { - // decls: IndexMap::from_iter(decls), - // })) + pub(crate) fn at(&self, kind: SyntaxKind) -> bool { + self.nth_at(0, kind) + } + + pub(crate) fn eat(&mut self, kind: SyntaxKind) -> bool { + if !self.at(kind) { + return false; + } + + self.do_bump(); + true + } + + pub(crate) fn nth_at(&self, n: usize, kind: SyntaxKind) -> bool { + self.nth(n) == kind + } + + fn do_bump(&mut self) { + self.push_ev(Event::Eat { + count: self.input.preceding_meaningless(self.pos), + }); + self.pos += 1; + } + + fn push_ev(&mut self, event: Event) { + self.events.push(event) + } } -pub mod asg { - use petgraph::graph::DiGraph; - - use super::Spanned; +pub(crate) struct Marker { + pos: usize, + bomb: DropBomb, +} + +impl Marker { + pub(crate) fn new(pos: usize) -> Self { + Self { + pos, + bomb: DropBomb::new("Marker must be completed or abandoned"), + } + } + pub(crate) fn complete(mut self, p: &mut Parser<'_, '_>, kind: SyntaxKind) -> CompletedMarker { + self.bomb.defuse(); + match &mut p.events[self.pos] { + Event::Start { kind: slot, .. } => *slot = kind, + _ => unreachable!(), + } + p.push_ev(Event::Finish); + + CompletedMarker { + pos: self.pos, + kind, + } + } + + pub(crate) fn abandon(mut self, p: &mut Parser<'_, '_>) { + self.bomb.defuse(); + if self.pos == p.events.len() - 1 { + match p.events.pop() { + Some(Event::Start { + kind: SyntaxKind::TOMBSTONE, + forward_parent: None, + }) => (), + _ => unreachable!(), + } + } + } +} + +pub(crate) struct CompletedMarker { + pos: usize, + kind: SyntaxKind, +} + +impl CompletedMarker { + pub(crate) fn precede(self, p: &mut Parser<'_, '_>) -> Marker { + let new_pos = p.start(); + + match &mut p.events[self.pos] { + Event::Start { forward_parent, .. } => { + *forward_parent = Some(new_pos.pos - self.pos); + } + _ => unreachable!(), + } + + new_pos + } } diff --git a/crates/lang/src/parser/ast.rs b/crates/lang/src/parser/ast.rs deleted file mode 100644 index d45ef66..0000000 --- a/crates/lang/src/parser/ast.rs +++ /dev/null @@ -1,24 +0,0 @@ -use std::collections::{BTreeMap, HashMap}; - -use indexmap::IndexMap; - -use super::Spanned; - -#[derive(Debug, PartialEq)] -pub struct File<'src> { - pub decls: IndexMap, raw_ast::RawExpression<'src>>, -} - -pub mod raw_ast; - -#[derive(Debug, PartialEq)] -pub enum Lit<'src> { - // TODO: more bigger better number types - Int(i64), - Float(f64), - String(&'src str), -} - -pub mod lossless; - -pub mod ast_tree; diff --git a/crates/lang/src/parser/ast/ast_tree.rs b/crates/lang/src/parser/ast/ast_tree.rs deleted file mode 100644 index 46db4e9..0000000 --- a/crates/lang/src/parser/ast/ast_tree.rs +++ /dev/null @@ -1,31 +0,0 @@ -use ego_tree::Tree; - -use crate::parser::Spanned; - -use super::{File, Lit}; - -pub struct Ast<'src> { - tree: Tree>, -} - -struct AstNode<'src> { - kind: NodeKind<'src>, -} - -enum NodeKind<'src> { - Decl, - Ident(&'src str), - Instr, - Expr, - MappingPipe, - NullPipe, - MultiPipe, - Var(&'src str), - InputVar(&'src str), - AttrSet, - Attr, - Lit(Lit<'src>), - Matrix, - Dimensions(u16, u16), - MatrixRow, -} diff --git a/crates/lang/src/parser/ast/lossless.rs b/crates/lang/src/parser/ast/lossless.rs deleted file mode 100644 index 0047441..0000000 --- a/crates/lang/src/parser/ast/lossless.rs +++ /dev/null @@ -1,19 +0,0 @@ -use self::lex::SyntaxKind; - -pub mod parser; - -pub mod lex; - -#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] -enum Lang {} -impl rowan::Language for Lang { - type Kind = SyntaxKind; - #[allow(unsafe_code)] - fn kind_from_raw(raw: rowan::SyntaxKind) -> Self::Kind { - assert!(raw.0 <= SyntaxKind::ROOT as u16); - unsafe { std::mem::transmute::(raw.0) } - } - fn kind_to_raw(kind: Self::Kind) -> rowan::SyntaxKind { - kind.into() - } -} diff --git a/crates/lang/src/parser/ast/lossless/parser.rs b/crates/lang/src/parser/ast/lossless/parser.rs deleted file mode 100644 index 3a9c11d..0000000 --- a/crates/lang/src/parser/ast/lossless/parser.rs +++ /dev/null @@ -1,290 +0,0 @@ -use std::borrow::Borrow; - -use rowan::{ - Checkpoint, GreenNode, GreenNodeBuilder, GreenNodeData, GreenTokenData, Language, NodeOrToken, -}; - -use crate::parser::{ - ast::lossless::{lex::SyntaxKind::*, Lang}, - Span, -}; - -use self::parser_to_events::{to_events, Event}; - -use super::lex::{self, SyntaxKind}; - -pub mod parser_to_events { - use chumsky::prelude::*; - - use crate::parser::ast::lossless::lex::{ - self, - SyntaxKind::{self, *}, - }; - - #[derive(Debug, PartialEq, Eq, Clone, Copy)] - pub enum Event { - StartNode(SyntaxKind), - StartErr(SyntaxError), - EatToken, - FinishNode, - FinishErr, - } - - #[derive(Debug, PartialEq, Eq, Clone, Copy)] - pub enum SyntaxError { - Expected(SyntaxKind), - AttrExpectedValue, - /// guessed if there's a newline and attr on next line without comma - /// should then suggest comma after attr - ExpectedCommaBetweenAttrs, - } - - pub fn to_events(tokens: &[(SyntaxKind, &str)]) -> Vec { - let only_toks: Vec = tokens.iter().map(|(t, _)| *t).collect(); - let res = parser().parse(&only_toks); - res.unwrap() - } - - macro_rules! padded { - ($parser:expr) => {{ - let ws = one_of([WHITESPACE, NEWLINE]) - .to(Event::EatToken) - .repeated() - .collect::>(); - ws.then($parser) - .then(ws) - .map(|((mut before, mut c), mut after)| { - before.append(&mut c); - before.append(&mut after); - before - }) - }}; - } - macro_rules! parenthesized { - ($parser:expr) => { - just(L_PAREN) - .to(vec![Event::EatToken]) - .then($parser) - .then(just(R_PAREN).to(vec![Event::EatToken])) - .map(|((mut before, mut c), mut after)| { - before.insert(0, Event::StartNode(PARENTHESIZED_EXPR)); - before.append(&mut c); - before.append(&mut after); - before.push(Event::FinishNode); - before - }) - }; - } - - pub fn parser<'toks>() -> impl Parser<'toks, &'toks [SyntaxKind], Vec> { - let ws = one_of([WHITESPACE, NEWLINE]) - .to(Event::EatToken) - .repeated() - .collect::>(); - let ident = just(IDENT).to(vec![Event::EatToken]); - - let expr = recursive(|expr| { - let lit = one_of([INT_NUM, FLOAT_NUM, STRING]).to(vec![ - Event::StartNode(EXPR), - Event::EatToken, - Event::FinishNode, - ]); - let attrset = just(L_CURLY) - .then( - padded!(just(IDENT).to(vec![ - Event::StartNode(ATTR), - Event::StartNode(ATTR_NAME), - Event::EatToken, - Event::FinishNode - ])) - .then(just(COLON)) - .then(padded!(expr.clone().map(|mut exp: Vec| { - exp.insert(0, Event::StartNode(ATTR_VALUE)); - exp.push(Event::FinishNode); - exp.push(Event::FinishNode); - exp - }))) - .map(|((mut name, _), mut value)| { - // colon - name.push(Event::EatToken); - name.append(&mut value); - name - }), - ) - .then(just(R_CURLY)) - .map(|((_, mut attrs), _)| { - attrs.insert(0, Event::StartNode(ATTR_SET)); - attrs.insert(0, Event::EatToken); - attrs.push(Event::EatToken); - attrs.push(Event::FinishNode); - attrs - }); - - let atom = lit.clone().or(attrset).or(parenthesized!(expr)); - - let instr_name = ident - .clone() - .map(|mut v| { - v.insert(0, Event::StartNode(INSTR_NAME)); - v - }) - .foldl( - ws.then(ident).repeated(), - |mut ident, (mut ws, mut next)| { - ident.append(&mut ws); - ident.append(&mut next); - ident - }, - ) - .map(|mut v| { - v.push(Event::FinishNode); - v - }); - let instr = padded!(instr_name) - .then( - atom.clone() - .map(|mut v| { - v.insert(0, Event::StartNode(INSTR_PARAMS)); - v - }) - .foldl( - ws.then(atom.clone()).repeated(), - |mut cur, (mut ws, mut next)| { - cur.append(&mut ws); - cur.append(&mut next); - cur - }, - ) - .map(|mut v| { - v.push(Event::FinishNode); - v - }), - ) - .map(|(mut name, mut params)| { - name.insert(0, Event::StartNode(INSTR)); - name.append(&mut params); - name.push(Event::FinishNode); - name - }); - padded!(instr.or(lit).or(atom)) - }); - expr - // .map(|(lit, mut ev)| lit.append(&mut ev)); - } -} - -#[derive(PartialEq, Eq)] -pub struct Parse { - pub green_node: GreenNode, -} - -impl std::fmt::Debug for Parse { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - debug_print_green_node(NodeOrToken::Node(self.green_node.borrow()), f, 0) - } -} - -fn debug_print_green_node( - node: NodeOrToken<&GreenNodeData, &GreenTokenData>, - f: &mut std::fmt::Formatter<'_>, - lvl: i32, -) -> std::fmt::Result { - for _ in 0..lvl { - f.write_str(" ")?; - } - - match node { - NodeOrToken::Node(n) => { - writeln!(f, "{:?} {{", Lang::kind_from_raw(node.kind())); - for c in n.children() { - debug_print_green_node(c, f, lvl + 1)?; - } - for _ in 0..lvl { - f.write_str(" ")?; - } - f.write_str("}\n") - } - NodeOrToken::Token(t) => { - writeln!(f, "{:?} {:?};", Lang::kind_from_raw(t.kind()), t.text()) - } - } -} - -#[derive(Debug)] -struct Parser<'src> { - tokens: Vec<(SyntaxKind, &'src str)>, - builder: GreenNodeBuilder<'src>, - errors: Vec, -} - -#[derive(Debug, PartialEq, Eq)] -enum SyntaxError { - Expected(SyntaxKind), - AttrExpectedValue, - /// guessed if there's a newline and attr on next line without comma - /// should then suggest comma after attr - ExpectedCommaBetweenAttrs, -} - -pub fn parse(src: &str) -> Parse { - let tokens = lex::lex(src); - Parser { - tokens, - builder: GreenNodeBuilder::new(), - errors: Vec::new(), - } - .parse() -} - -impl Parser<'_> { - fn parse(mut self) -> Parse { - let evs = to_events(&self.tokens); - self.builder.start_node(ROOT.into()); - println!("evs: {evs:?}"); - - self.tokens.reverse(); - - for ev in evs { - match ev { - Event::StartNode(kind) => self.builder.start_node(kind.into()), - Event::StartErr(SyntaxError) => todo!(), - Event::EatToken => self.bump(), - Event::FinishNode => self.builder.finish_node(), - Event::FinishErr => todo!(), - } - } - - self.builder.finish_node(); - Parse { - green_node: self.builder.finish(), - } - } - - /// Advance one token, adding it to the current branch of the tree builder. - fn bump(&mut self) { - let (kind, text) = self.tokens.pop().unwrap(); - self.builder.token(kind.into(), text); - } - fn syntax_err(&mut self, err: SyntaxError) { - let (_, text) = self.tokens.pop().unwrap(); - self.builder.token(PARSE_ERR.into(), text); - self.errors.push(err); - } - fn syntax_err_by_checkpoint(&mut self, checkpoint: Checkpoint, err: SyntaxError) { - self.builder.start_node_at(checkpoint, PARSE_ERR.into()); - self.builder.finish_node(); - self.errors.push(err); - } - fn expected(&mut self, expected: SyntaxKind) { - self.syntax_err(SyntaxError::Expected(expected)) - } - /// Peek at the first unprocessed token - fn current(&self) -> Option { - self.tokens.last().map(|(kind, _)| *kind) - } - fn next(&self) -> Option { - self.tokens - .get(self.tokens.len() - 2) - .map(|(kind, _)| *kind) - } -} diff --git a/crates/lang/src/parser/ast/raw_ast.rs b/crates/lang/src/parser/ast/raw_ast.rs deleted file mode 100644 index a0ec749..0000000 --- a/crates/lang/src/parser/ast/raw_ast.rs +++ /dev/null @@ -1,50 +0,0 @@ -use indexmap::IndexMap; - -use super::super::Spanned; - -use super::super::Span; -use super::Lit; - -#[derive(Debug, PartialEq)] -pub struct RawExpression<'src> { - pub expr: Box>, - pub span: Span, -} - -impl<'src> RawExpression<'src> { - pub fn new(expr: RawExpr<'src>, span: Span) -> Self { - Self { - expr: Box::new(expr), - span, - } - } -} - -#[derive(Debug, PartialEq)] -pub enum RawExpr<'src> { - Node( - Vec>, - Option, RawExpression<'src>>>>, - ), - SimplePipe(RawExpression<'src>, RawExpression<'src>), - // NamingPipe( - // Box>, - // (Vec>, Vec>), - // Box>, - // ), - MappingPipe(RawExpression<'src>, RawExpression<'src>), - NullPipe(RawExpression<'src>, RawExpression<'src>), - MultiPipe(IndexMap, RawExpression<'src>>), - // LetIn( - // IndexMap, Box>>, - // Box>, - // ), - // $ - Var(&'src str), - // @ - InputVar(&'src str), - AttrSet(Spanned, RawExpression<'src>>>), - Lit(Lit<'src>), - Matrix(Spanned<(u16, u16)>, Vec>), - List(Vec>), -} diff --git a/crates/lang/src/parser/error.rs b/crates/lang/src/parser/error.rs new file mode 100644 index 0000000..698ecaf --- /dev/null +++ b/crates/lang/src/parser/error.rs @@ -0,0 +1,6 @@ +use crate::parser::syntax_kind::SyntaxKind; + +#[derive(Debug)] +pub enum SyntaxError { + Expected(Vec), +} diff --git a/crates/lang/src/parser/events.rs b/crates/lang/src/parser/events.rs new file mode 100644 index 0000000..6fc5b2c --- /dev/null +++ b/crates/lang/src/parser/events.rs @@ -0,0 +1,23 @@ +use crate::parser::syntax_kind::SyntaxKind; + +#[derive(Debug)] +pub enum Event { + Start { + kind: SyntaxKind, + forward_parent: Option, + }, + Finish, + Eat { + count: usize, + }, + Error, +} + +impl Event { + pub(crate) fn tombstone() -> Self { + Self::Start { + kind: SyntaxKind::TOMBSTONE, + forward_parent: None, + } + } +} diff --git a/crates/lang/src/parser/grammar.rs b/crates/lang/src/parser/grammar.rs new file mode 100644 index 0000000..f301d4e --- /dev/null +++ b/crates/lang/src/parser/grammar.rs @@ -0,0 +1,14 @@ +use crate::parser::syntax_kind::SyntaxKind::*; + +use super::Parser; + +mod expression; + +pub fn source_file(p: &mut Parser) { + let root = p.start(); + + expression::expression(p); + p.eat_succeeding_ws(); + + root.complete(p, ROOT); +} diff --git a/crates/lang/src/parser/grammar/expression.rs b/crates/lang/src/parser/grammar/expression.rs new file mode 100644 index 0000000..18323a5 --- /dev/null +++ b/crates/lang/src/parser/grammar/expression.rs @@ -0,0 +1,14 @@ +use crate::parser::{syntax_kind::SyntaxKind::*, Parser}; + +use self::{instruction::instr, lit::literal}; + +mod instruction; +mod lit; + +pub fn expression(p: &mut Parser) { + let expr = p.start(); + + instr(p); + + expr.complete(p, EXPR); +} diff --git a/crates/lang/src/parser/grammar/expression/instruction.rs b/crates/lang/src/parser/grammar/expression/instruction.rs new file mode 100644 index 0000000..136f4ed --- /dev/null +++ b/crates/lang/src/parser/grammar/expression/instruction.rs @@ -0,0 +1,30 @@ +use crate::parser::{syntax_kind::SyntaxKind::*, Parser}; + +use super::lit::literal; + +pub fn instr(p: &mut Parser) { + let instr = p.start(); + + instr_name(p); + instr_params(p); + + instr.complete(p, INSTR); +} + +fn instr_name(p: &mut Parser) { + let instr_name = p.start(); + + while p.at(IDENT) { + p.do_bump(); + } + + instr_name.complete(p, INSTR_NAME); +} + +fn instr_params(p: &mut Parser) { + if let Some(start) = literal(p) { + while literal(p).is_some() {} + + start.precede(p).complete(p, INSTR_PARAMS); + } +} diff --git a/crates/lang/src/parser/grammar/expression/lit.rs b/crates/lang/src/parser/grammar/expression/lit.rs new file mode 100644 index 0000000..bb48360 --- /dev/null +++ b/crates/lang/src/parser/grammar/expression/lit.rs @@ -0,0 +1,20 @@ +use enumset::enum_set; + +use crate::parser::{ + syntax_kind::{SyntaxKind::*, TokenSet}, + CompletedMarker, Parser, +}; + +const LIT_TOKENS: TokenSet = enum_set!(INT_NUM | FLOAT_NUM | STRING); + +pub fn literal(p: &mut Parser) -> Option { + if !LIT_TOKENS.contains(p.current()) { + return None; + } + + let lit = p.start(); + + p.do_bump(); + + Some(lit.complete(p, LITERAL)) +} diff --git a/crates/lang/src/parser/input.rs b/crates/lang/src/parser/input.rs new file mode 100644 index 0000000..c61fc87 --- /dev/null +++ b/crates/lang/src/parser/input.rs @@ -0,0 +1,61 @@ +use crate::parser::syntax_kind::SyntaxKind; + +pub struct Input<'src, 'toks> { + raw: &'toks Vec<(SyntaxKind, &'src str)>, + /// indices of the "meaningful" tokens (not whitespace etc) + /// includes newlines because those might indeed help with finding errors + meaningful: Vec, + /// indices of newlines for the purpose of easily querying them + /// can be helpful with missing commas etc + newlines: Vec, +} + +impl<'src, 'toks> Input<'src, 'toks> { + pub fn new(raw_toks: &'toks Vec<(SyntaxKind, &'src str)>) -> Self { + let meaningful = raw_toks + .iter() + .enumerate() + .filter_map(|(i, tok)| match tok.0 { + SyntaxKind::WHITESPACE | SyntaxKind::NEWLINE => None, + _ => Some(i), + }) + .collect(); + let newlines = raw_toks + .iter() + .enumerate() + .filter_map(|(i, tok)| match tok.0 { + SyntaxKind::NEWLINE => Some(i), + _ => None, + }) + .collect(); + + Self { + raw: raw_toks, + meaningful, + newlines, + } + } + + #[allow(clippy::unwrap_used, reason = "meaningful indices cannot be invalid")] + pub(crate) fn kind(&self, idx: usize) -> SyntaxKind { + let Some(meaningful_idx) = self.meaningful.get(idx) else { + return SyntaxKind::EOF; + }; + + self.raw.get(*meaningful_idx).unwrap().0 + } + + pub(crate) fn preceding_meaningless(&self, idx: usize) -> usize { + assert!(self.meaningful.len() > idx); + + if idx == 0 { + 1 + } else { + self.meaningful[idx] - self.meaningful[idx - 1] + } + } + + pub(crate) fn meaningless_tail_len(&self) -> usize { + self.raw.len() - (self.meaningful.last().unwrap() + 1) + } +} diff --git a/crates/lang/src/parser/output.rs b/crates/lang/src/parser/output.rs new file mode 100644 index 0000000..75019bb --- /dev/null +++ b/crates/lang/src/parser/output.rs @@ -0,0 +1,113 @@ +use rowan::{GreenNode, GreenNodeBuilder, GreenNodeData, GreenTokenData, Language, NodeOrToken}; +use std::mem; + +use crate::parser::syntax_kind::{Lang, SyntaxKind}; + +use super::{error::SyntaxError, events::Event}; + +pub struct Output { + pub green_node: GreenNode, + pub errors: Vec, +} +impl std::fmt::Debug for Output { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + debug_print_green_node(NodeOrToken::Node(&self.green_node), f, 0) + } +} + +fn debug_print_green_node( + node: NodeOrToken<&GreenNodeData, &GreenTokenData>, + f: &mut std::fmt::Formatter<'_>, + lvl: i32, +) -> std::fmt::Result { + for _ in 0..lvl { + f.write_str(" ")?; + } + + match node { + NodeOrToken::Node(n) => { + writeln!(f, "{:?} {{", Lang::kind_from_raw(node.kind()))?; + for c in n.children() { + debug_print_green_node(c, f, lvl + 1)?; + } + for _ in 0..lvl { + f.write_str(" ")?; + } + f.write_str("}\n") + } + NodeOrToken::Token(t) => { + writeln!(f, "{:?} {:?};", Lang::kind_from_raw(t.kind()), t.text()) + } + } +} + +impl Output { + pub fn from_parser_output( + mut raw_toks: Vec<(SyntaxKind, &str)>, + (mut events, errs): (Vec, Vec), + ) -> Self { + let mut builder = GreenNodeBuilder::new(); + let mut fw_parents = Vec::new(); + raw_toks.reverse(); + + for i in 0..events.len() { + match mem::replace(&mut events[i], Event::tombstone()) { + Event::Start { + kind, + forward_parent, + } => { + if kind == SyntaxKind::TOMBSTONE && forward_parent.is_none() { + continue; + } + + fw_parents.push(kind); + let mut idx = i; + let mut fp = forward_parent; + while let Some(fwd) = fp { + idx += fwd as usize; + fp = match mem::replace(&mut events[idx], Event::tombstone()) { + Event::Start { + kind, + forward_parent, + } => { + fw_parents.push(kind); + forward_parent + } + _ => unreachable!(), + } + } + + // remove whitespace bc it's ugly + while let Some((SyntaxKind::WHITESPACE | SyntaxKind::NEWLINE, _)) = + raw_toks.last() + { + match events.iter_mut().find(|ev| matches!(ev, Event::Eat { .. })) { + Some(Event::Eat { count }) => *count -= 1, + _ => unreachable!(), + } + + let (tok, text): (SyntaxKind, &str) = raw_toks.pop().unwrap(); + builder.token(tok.into(), text); + } + + for kind in fw_parents.drain(..).rev() { + if kind != SyntaxKind::TOMBSTONE { + builder.start_node(kind.into()); + } + } + } + Event::Finish => builder.finish_node(), + Event::Eat { count } => (0..count).for_each(|_| { + let (tok, text): (SyntaxKind, &str) = raw_toks.pop().unwrap(); + builder.token(tok.into(), text); + }), + Event::Error => todo!(), + } + } + + Self { + green_node: builder.finish(), + errors: errs, + } + } +} diff --git a/crates/lang/src/parser/parser.rs b/crates/lang/src/parser/parser.rs new file mode 100644 index 0000000..9ee53da --- /dev/null +++ b/crates/lang/src/parser/parser.rs @@ -0,0 +1,6 @@ +//! The parser architecture is *heavily* inspired (and partially copied and adapted) from the amazing rust-analyzer +use drop_bomb::DropBomb; + +use self::{error::SyntaxError, events::Event, input::Input}; + +use super::syntax_kind::SyntaxKind; diff --git a/crates/lang/src/parser/ast/lossless/lex.rs b/crates/lang/src/parser/syntax_kind.rs similarity index 74% rename from crates/lang/src/parser/ast/lossless/lex.rs rename to crates/lang/src/parser/syntax_kind.rs index e2a867b..e37254f 100644 --- a/crates/lang/src/parser/ast/lossless/lex.rs +++ b/crates/lang/src/parser/syntax_kind.rs @@ -1,7 +1,6 @@ +use enumset::EnumSet; use logos::Logos; -use crate::parser::Span; - pub fn lex(src: &str) -> Vec<(SyntaxKind, &str)> { let mut lex = SyntaxKind::lexer(src); let mut r = Vec::new(); @@ -13,8 +12,9 @@ pub fn lex(src: &str) -> Vec<(SyntaxKind, &str)> { r } -#[derive(Logos, Debug, PartialEq, Eq, Clone, Copy, Hash, PartialOrd, Ord)] +#[derive(enumset::EnumSetType, Logos, Debug, PartialEq, Eq, Clone, Copy, Hash, PartialOrd, Ord)] #[repr(u16)] +#[enumset(no_super_impls)] #[allow(non_camel_case_types)] pub enum SyntaxKind { #[token("def")] @@ -39,6 +39,7 @@ pub enum SyntaxKind { MAT_BODY, PARENTHESIZED_EXPR, EXPR, + LITERAL, #[token("(")] L_PAREN, #[token(")")] @@ -109,9 +110,29 @@ pub enum SyntaxKind { PARSE_ERR, LEX_ERR, ROOT, + EOF, + TOMBSTONE, + ERROR, } + +pub type TokenSet = EnumSet; + impl From for rowan::SyntaxKind { fn from(kind: SyntaxKind) -> Self { Self(kind as u16) } } + +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum Lang {} +impl rowan::Language for Lang { + type Kind = SyntaxKind; + #[allow(unsafe_code)] + fn kind_from_raw(raw: rowan::SyntaxKind) -> Self::Kind { + assert!(raw.0 <= SyntaxKind::ROOT as u16); + unsafe { std::mem::transmute::(raw.0) } + } + fn kind_to_raw(kind: Self::Kind) -> rowan::SyntaxKind { + kind.into() + } +} diff --git a/crates/lang/src/parser/tests.rs b/crates/lang/src/parser/tests.rs index 5bd221f..8b13789 100644 --- a/crates/lang/src/parser/tests.rs +++ b/crates/lang/src/parser/tests.rs @@ -1,143 +1 @@ -use crate::parser::ast::File; -use crate::parser::parse; -use crate::tokens::Token; -use chumsky::input::Stream; -use chumsky::prelude::*; -use indexmap::IndexMap; -use logos::Logos; -// #[test] -// fn test_parse_node_with_params() { -// const INPUT: &str = "meow [ hello: $foo, world: @bar]"; -// assert_eq!( -// parse(INPUT).unwrap(), -// File { -// decls: IndexMap::from_iter([( -// ("main", (0..0).into()), -// ( -// Expr::Node( -// ("meow", (0..4).into()), -// Some(( -// IndexMap::from_iter([ -// ( -// ("hello", (7..12).into()), -// Expr::Var(("foo", (14..18).into())) -// ), -// ( -// ("world", (20..25).into()), -// Expr::InputVar(("bar", (27..31).into())) -// ) -// ]), -// (5..32).into() -// )) -// ), -// (0..32).into() -// ) -// )]) -// } -// ); -// } - -// fn test_parse_multiple_top_level_complex() { -// const INPUT: &str = r"def main = meow -// | uwu -// [ foo: @bar -// , hello: world @| test [ more: params ] | yay -// ] -// !| awa -// @| nya -// | rawr; - -// def test = meow -// [ hello: $foo -// , world: @bar -// ]; -// "; -// assert_eq!( -// parse(INPUT).unwrap(), -// File { -// decls: IndexMap::from_iter([ -// ( -// ("main", (4..8).into()), -// ( -// Expr::SimplePipe( -// Box::new(Expr::Node(("meow", (11..15).into()), None)), -// Box::new(Expr::NullPipe( -// Box::new(Expr::Node( -// ("uwu", (20..23).into()), -// Some(( -// IndexMap::from_iter([ -// ( -// ("foo", (29..32).into()), -// Expr::InputVar(("bar", (34..38).into())) -// ), -// ( -// ("hello", (44..49).into()), -// Expr::MappingPipe( -// Box::new(Expr::Node( -// ("world", (51..56).into()), -// None -// )), -// Box::new(Expr::SimplePipe( -// Box::new(Expr::Node( -// ("test", (60..64).into()), -// Some(( -// IndexMap::from_iter([( -// ("more", (67..71).into()), -// Expr::Node( -// ("params", (73..79).into()), -// None -// ) -// )]), -// (65..81).into() -// )) -// )), -// Box::new(Expr::Node( -// ("yay", (84..87).into()), -// None -// )) -// )) -// ) -// ) -// ]), -// (27..92).into() -// )) -// )), -// Box::new(Expr::MappingPipe( -// Box::new(Expr::Node(("awa", (97..100).into()), None)), -// Box::new(Expr::SimplePipe( -// Box::new(Expr::Node(("nya", (106..109).into()), None)), -// Box::new(Expr::Node(("rawr", (114..118).into()), None)) -// )) -// )) -// )) -// ), -// (11..118).into() -// ), -// ), -// ( -// ("test", (125..129).into()), -// ( -// Expr::Node( -// ("meow", (132..136).into()), -// Some(( -// IndexMap::from_iter([ -// ( -// ("hello", (141..146).into()), -// Expr::Var(("foo", (148..152).into())) -// ), -// ( -// ("world", (156..161).into()), -// Expr::InputVar(("bar", (163..167).into())) -// ) -// ]), -// (139..171).into() -// )) -// ), -// (132..171).into() -// ) -// ) -// ]) -// } -// ); -// } diff --git a/crates/lang/src/tokens.rs b/crates/lang/src/tokens.rs deleted file mode 100644 index 3314916..0000000 --- a/crates/lang/src/tokens.rs +++ /dev/null @@ -1,81 +0,0 @@ -use logos::Logos; - -#[derive(Logos, Debug, PartialEq, Eq, Clone)] -#[logos(skip r"[ \t\n\f]+")] -pub enum Token<'a> { - // hack! - // this isn't actually supposed to be in the language. - // i just can't figure out how to automatically choose between a top level declaration - // or a top level expression - // so a declaration needs the keyword def until i can figure this out - #[token("def")] - Def, - #[token("let")] - Let, - #[token("in")] - In, - #[token("mat")] - Mat, - #[regex("[\\d]+x[\\d]+", |lex| { - let (x, y) = lex.slice().split_once('x').expect("shouldn't fail to split"); - // TODO: handle overflows etc - (x.parse().expect("should only match valid u16s"), y.parse().expect("should only match valid u16s")) - })] - Dimensions((u16, u16)), - #[regex("[\\d]+", |lex| lex.slice())] - Int(&'a str), - #[regex("[+-]?([\\d]+\\.[\\d]*|[\\d]*\\.[\\d]+)", |lex| lex.slice())] - Float(&'a str), - // TODO: more bigger better more complex string lexing - // TODO: templating? - #[regex(r#""([^"\\]|\\["\\bnfrt]|u[a-fA-F0-9]{4})*""#, |lex| lex.slice())] - String(&'a str), - #[token("+")] - Plus, - #[token("-")] - Minus, - #[token("*")] - Mult, - #[token("/")] - Div, - // TODO: figure out how to allow numbers in words? - #[regex("[a-zA-Z_]+[a-zA-Z_\\-\\d]*", |lex| lex.slice().trim())] - Word(&'a str), - #[regex("\\$[a-zA-Z0-9_\\-]+", |lex| &lex.slice()[1..])] - VarIdent(&'a str), - #[regex("\\@[a-zA-Z0-9_\\-]+", |lex| &lex.slice()[1..])] - InputIdent(&'a str), - #[token(",")] - Comma, - #[token("|")] - Pipe, - #[token("@|")] - MappingPipe, - #[token("!|")] - NullPipe, - #[token("@")] - At, - #[token(">")] - GreaterThan, - #[token("=")] - Equals, - #[token(":")] - Colon, - #[token(";")] - Semicolon, - #[token("[")] - BracketOpen, - #[token("]")] - BracketClose, - #[token("(")] - ParenOpen, - #[token(")")] - ParenClose, - #[token("{")] - BraceOpen, - #[token("}")] - BraceClose, -} - -#[cfg(test)] -mod tests; diff --git a/crates/lang/src/tokens/tests.rs b/crates/lang/src/tokens/tests.rs deleted file mode 100644 index 3b35ace..0000000 --- a/crates/lang/src/tokens/tests.rs +++ /dev/null @@ -1,135 +0,0 @@ -use logos::Logos; - -use super::Token; - -/// generates tests for the lexer to avoid writing boilerplate -macro_rules! lexer_test { - ($name:ident, $input:literal, $out:expr) => { - #[test] - fn $name() { - let lex = Token::lexer($input); - let toks = lex.map(|tok| tok.unwrap()).collect::>(); - assert_eq!(toks, $out); - } - }; -} - -lexer_test! { - test_lex_simple_pipeline, - "streamer | processor | sink", - [ - Token::Word("streamer"), - Token::Pipe, - Token::Word("processor"), - Token::Pipe, - Token::Word("sink") - ] -} - -lexer_test! { - test_lex_var_ident, - "$identifier", - [ Token::VarIdent("identifier") ] -} - -lexer_test! { - test_lex_subgroup, - "subgroup(first, second) = a | b [ $first ] | c [ $second ]", - [ - Token::Word("subgroup"), - Token::ParenOpen, - Token::Word("first"), - Token::Comma, - Token::Word("second"), - Token::ParenClose, - Token::Equals, - Token::Word("a"), - Token::Pipe, - Token::Word("b"), - Token::BracketOpen, - Token::VarIdent("first"), - Token::BracketClose, - Token::Pipe, - Token::Word("c"), - Token::BracketOpen, - Token::VarIdent("second"), - Token::BracketClose - ] -} - -lexer_test! { - text_lex_crossing_pipeline_reordering, - "a >first, second|second, first> c", - [ - Token::Word("a"), - Token::GreaterThan, - Token::Word("first"), - Token::Comma, - Token::Word("second"), - Token::Pipe, - Token::Word("second"), - Token::Comma, - Token::Word("first"), - Token::GreaterThan, - Token::Word("c") - ] -} - -lexer_test! { - test_lex_crossing_input_args, - "a >second| c { second: @first }", - [ - Token::Word("a"), - Token::GreaterThan, - Token::Word("second"), - Token::Pipe, - Token::Word("c"), - Token::BraceOpen, - Token::Word("second"), - Token::Colon, - Token::InputIdent("first"), - Token::BraceClose - ] -} - -lexer_test! { - test_lex_map_io_named, - "a @| c", - [ - Token::Word("a"), - Token::MappingPipe, - Token::Word("c") - ] -} - -lexer_test! { - test_lex_int_literal, - "42", - [ - Token::Int("42") - ] -} - -lexer_test! { - test_lex_float_literal_0, - "1.5", - [ - Token::Float("1.5") - ] -} - -lexer_test! { - test_lex_float_literal_1, - "42.", - [ - Token::Float("42.") - ] -} - -lexer_test! { - test_lex_float_literal_2, - ".42", - [ - Token::Float(".42") - ] -} diff --git a/testfiles/test.owo b/testfiles/test.owo index dc95a8d..ab4456d 100644 --- a/testfiles/test.owo +++ b/testfiles/test.owo @@ -1,7 +1 @@ -hello world test - 42 - (another command 3.14 "meow") - "uwu" - { - some: attrs 42 (meow gay 1) - } +hello world test 1.5 42 69 "gay"