diff --git a/crates/lang/Cargo.toml b/crates/lang/Cargo.toml index 3e03209..8c1d3db 100644 --- a/crates/lang/Cargo.toml +++ b/crates/lang/Cargo.toml @@ -7,7 +7,7 @@ edition = "2021" [dependencies] logos = "0.14" -chumsky = {version= "1.0.0-alpha.7", features=["label", "extension"]} +chumsky = {version= "1.0.0-alpha.7", features=["label"]} petgraph = { workspace = true} indexmap = "2.2.6" clap = { version = "4", features = ["derive"] } diff --git a/crates/lang/src/lib.rs b/crates/lang/src/lib.rs index ebf22d3..94a1430 100644 --- a/crates/lang/src/lib.rs +++ b/crates/lang/src/lib.rs @@ -1,4 +1,3 @@ -#![feature(type_alias_impl_trait)] pub mod err_reporting; pub mod parser; pub mod tokens; diff --git a/crates/lang/src/main.rs b/crates/lang/src/main.rs index 346862e..16ec4e4 100644 --- a/crates/lang/src/main.rs +++ b/crates/lang/src/main.rs @@ -3,10 +3,7 @@ use std::{fs, path::PathBuf}; use clap::Parser; use lang::{ err_reporting::ErrorCollector, - parser::ast::lossless::{ - lex, - parser::{self, parse}, - }, + parser::ast::lossless::{lex, parser}, }; #[derive(Parser)] @@ -20,11 +17,8 @@ fn main() { let n = args.file.clone(); let f = fs::read_to_string(n.clone()).expect("failed to read file"); println!("toks: {:?}", lex::lex(&f)); - println!("parse res: {:?}", parse(&f)); - - // let parse_res = parser::parse(&f); - // println!("parse: {:?}", parse_res); - + let parse_res = parser::parse(&f); + println!("parse: {:?}", parse_res); // dbg!(lex::lex(&f)); // let mut err_collector = ErrorCollector::new(vec![(n.to_str().unwrap(), &f)]); diff --git a/crates/lang/src/parser/ast/lossless/lex.rs b/crates/lang/src/parser/ast/lossless/lex.rs index e2a867b..c25608a 100644 --- a/crates/lang/src/parser/ast/lossless/lex.rs +++ b/crates/lang/src/parser/ast/lossless/lex.rs @@ -10,6 +10,7 @@ pub fn lex(src: &str) -> Vec<(SyntaxKind, &str)> { r.push((tok_res.unwrap_or(SyntaxKind::LEX_ERR), lex.slice())) } + r.reverse(); r } diff --git a/crates/lang/src/parser/ast/lossless/parser.rs b/crates/lang/src/parser/ast/lossless/parser.rs index adcb08e..95bb7d7 100644 --- a/crates/lang/src/parser/ast/lossless/parser.rs +++ b/crates/lang/src/parser/ast/lossless/parser.rs @@ -9,137 +9,22 @@ use crate::parser::{ Span, }; -use self::parser_to_events::{to_events, Event}; - use super::lex::{self, SyntaxKind}; -pub mod parser_to_events { - use chumsky::prelude::*; +mod parsers { + use rowan::GreenNode; - use crate::parser::ast::lossless::lex::{ - self, - SyntaxKind::{self, *}, - }; + use crate::parser::ast::lossless::lex::SyntaxKind; - #[derive(Debug, PartialEq, Eq, Clone, Copy)] - pub enum Event { - StartNode(SyntaxKind), - StartErr(SyntaxError), - EatToken, - FinishNode, - FinishErr, + use super::SyntaxError; + + struct ParseResult { + green_node: GreenNode, + errors: Vec, } - #[derive(Debug, PartialEq, Eq, Clone, Copy)] - pub enum SyntaxError { - Expected(SyntaxKind), - AttrExpectedValue, - /// guessed if there's a newline and attr on next line without comma - /// should then suggest comma after attr - ExpectedCommaBetweenAttrs, - } - - pub fn to_events(tokens: &[(SyntaxKind, &str)]) -> Vec { - let mut only_toks: Vec = tokens.iter().map(|(t, _)| *t).collect(); - let res = parser().parse(&only_toks); - res.unwrap() - } - - macro_rules! padded { - ($parser:expr) => {{ - let ws = one_of([WHITESPACE, NEWLINE]) - .to(Event::EatToken) - .repeated() - .collect::>(); - ws.then($parser) - .then(ws) - .map(|((mut before, mut c), mut after)| { - before.append(&mut c); - before.append(&mut after); - before - }) - }}; - } - - pub fn parser<'toks>() -> impl Parser<'toks, &'toks [SyntaxKind], Vec> { - let ws = one_of([WHITESPACE, NEWLINE]) - .to(Event::EatToken) - .repeated() - .collect::>(); - // let ws_without_newlines = just(WHITESPACE) - // .to(Event::EatToken) - // .repeated() - // .collect::>(); - let parenthesized = |c| { - just(L_PAREN) - .to(vec![Event::EatToken]) - .then(c) - .then(just(R_PAREN).to(vec![Event::EatToken])) - .map(|((mut before, mut c), mut after)| { - before.append(&mut c); - before.append(&mut after); - before - }) - }; - - let expr = recursive(|expr| { - let lit = one_of([INT_NUM, FLOAT_NUM, STRING]).to(vec![ - Event::StartNode(EXPR), - Event::EatToken, - Event::FinishNode, - ]); - - let atom = lit.clone().or(parenthesized(expr)); - - let ident = just(IDENT).to(vec![Event::EatToken]); - let instr_name = ident - .clone() - .map(|mut v| { - v.insert(0, Event::StartNode(INSTR_NAME)); - v - }) - .foldl( - ws.then(ident).repeated(), - |mut ident, (mut ws, mut next)| { - ident.append(&mut ws); - ident.append(&mut next); - ident - }, - ) - .map(|mut v| { - v.push(Event::FinishNode); - v - }); - let instr = padded!(instr_name) - .then( - atom.clone() - .map(|mut v| { - v.insert(0, Event::StartNode(INSTR_PARAMS)); - v - }) - .foldl( - ws.then(atom.clone()).repeated(), - |mut cur, (mut ws, mut next)| { - cur.append(&mut ws); - cur.append(&mut next); - cur - }, - ) - .map(|mut v| { - v.push(Event::FinishNode); - v - }), - ) - .map(|(mut name, mut params)| { - name.insert(0, Event::StartNode(INSTR)); - name.append(&mut params); - name.push(Event::FinishNode); - name - }); - padded!(instr.or(lit).or(atom)) - }); - expr - // .map(|(lit, mut ev)| lit.append(&mut ev)); + trait Parser { + fn parse<'src>(input: &[(SyntaxKind, &'src str)]) -> ParseResult; } } @@ -197,7 +82,7 @@ enum SyntaxError { } pub fn parse(src: &str) -> Parse { - let tokens = lex::lex(src); + let mut tokens = lex::lex(src); Parser { tokens, builder: GreenNodeBuilder::new(), @@ -208,19 +93,12 @@ pub fn parse(src: &str) -> Parse { impl Parser<'_> { fn parse(mut self) -> Parse { - let evs = to_events(&self.tokens); - self.builder.start_node(ROOT.into()); + self.start_node(ROOT); - self.tokens.reverse(); - - for ev in evs { - match ev { - Event::StartNode(kind) => self.builder.start_node(kind.into()), - Event::StartErr(SyntaxError) => todo!(), - Event::EatToken => self.bump(), - Event::FinishNode => self.builder.finish_node(), - Event::FinishErr => todo!(), - } + match self.expr(None) { + expr::ExprRes::Ok => (), + expr::ExprRes::Eof => (), + expr::ExprRes::NoExpr => todo!(), } self.builder.finish_node(); @@ -229,6 +107,13 @@ impl Parser<'_> { } } + fn start_node(&mut self, kind: SyntaxKind) { + self.builder.start_node(kind.into()); + } + fn finish_node(&mut self) { + self.builder.finish_node(); + } + /// Advance one token, adding it to the current branch of the tree builder. fn bump(&mut self) { let (kind, text) = self.tokens.pop().unwrap(); @@ -241,7 +126,7 @@ impl Parser<'_> { } fn syntax_err_by_checkpoint(&mut self, checkpoint: Checkpoint, err: SyntaxError) { self.builder.start_node_at(checkpoint, PARSE_ERR.into()); - self.builder.finish_node(); + self.finish_node(); self.errors.push(err); } fn expected(&mut self, expected: SyntaxKind) { @@ -256,4 +141,313 @@ impl Parser<'_> { .get(self.tokens.len() - 2) .map(|(kind, _)| *kind) } + fn skip_ws(&mut self) { + while self.current() == Some(WHITESPACE) || self.current() == Some(NEWLINE) { + self.bump() + } + } + fn skip_ws_without_newlines(&mut self) { + while self.current() == Some(WHITESPACE) { + self.bump() + } + } +} + +mod expr { + use rowan::Checkpoint; + + use super::{attrset::AttrsetRes, instr::NodeRes, Parser}; + use crate::parser::{ast::lossless::lex::SyntaxKind::*, Span}; + impl Parser<'_> { + pub(super) fn expr(&mut self, start: Option) -> ExprRes { + self.skip_ws(); + let start = start.unwrap_or_else(|| self.builder.checkpoint()); + match self.current() { + Some(IDENT) => { + let expr_res = match self.instr() { + NodeRes::Ok => ExprRes::Ok, + NodeRes::Eof => ExprRes::Eof, + }; + self.builder.start_node_at(start, EXPR.into()); + self.finish_node(); + expr_res + } + Some(_) => self.atom(Some(start)), + None => ExprRes::Eof, + } + } + + pub(super) fn atom(&mut self, start: Option) -> ExprRes { + self.skip_ws(); + let start = start.unwrap_or_else(|| self.builder.checkpoint()); + match self.current() { + Some(INT_NUM | FLOAT_NUM | STRING) => { + self.bump(); + self.builder.start_node_at(start, EXPR.into()); + self.finish_node(); + ExprRes::Ok + } + Some(L_CURLY) => match self.attrset(start) { + AttrsetRes::Ok => ExprRes::Ok, + AttrsetRes::Eof => ExprRes::Eof, + }, + Some(L_PAREN) => { + self.builder.start_node_at(start, PARENTHESIZED_EXPR.into()); + self.bump(); + self.expr(None); + self.skip_ws(); + match self.current() { + Some(R_PAREN) => ExprRes::Ok, + Some(_) => todo!(), + None => ExprRes::Eof, + } + } + Some(_) => ExprRes::NoExpr, + None => ExprRes::Eof, + } + } + } + + pub enum ExprRes { + Ok, + Eof, + /// isnt an expression + NoExpr, + } +} + +mod attrset { + use chumsky::container::Container; + use rowan::Checkpoint; + + use super::{expr::ExprRes, instr::NodeRes, Parser}; + use crate::parser::{ + ast::lossless::{lex::SyntaxKind::*, parser::SyntaxError}, + Span, + }; + impl Parser<'_> { + pub(super) fn attrset(&mut self, checkpoint: Checkpoint) -> AttrsetRes { + assert_eq!(self.current(), Some(L_CURLY)); + self.bump(); + self.skip_ws(); + match self.current() { + Some(R_CURLY) => { + self.builder.start_node_at(checkpoint, ATTR_SET.into()); + self.bump(); + self.finish_node(); + AttrsetRes::Ok + } + Some(_) => { + self.builder.start_node_at(checkpoint, ATTR_SET.into()); + let res = match self.attrs() { + AttrRes::Eof => AttrsetRes::Eof, + AttrRes::RCurly | AttrRes::Ok => { + println!("curr: {:?}", self.current()); + AttrsetRes::Ok + } + }; + self.finish_node(); + res + } + None => AttrsetRes::Eof, + } + // self.start_node(ATTR); + } + + fn attrs(&mut self) -> AttrRes { + let mut res = AttrRes::Ok; + + while res == AttrRes::Ok { + println!("it: {:?}", self.tokens.last()); + match self.attr() { + AttrRes::Ok => { + self.skip_ws_without_newlines(); + println!( + "a: {:?}, {:?}", + self.tokens.last(), + self.tokens.get(self.tokens.len() - 2) + ); + println!("errs: {:?}", self.errors); + res = AttrRes::Ok; + let checkpoint_previous_end = self.builder.checkpoint(); + res = match self.current() { + Some(COMMA) => { + self.bump(); + AttrRes::Ok + } + Some(R_CURLY) => { + self.bump(); + res = AttrRes::Ok; + break; + } + Some(NEWLINE) => { + self.skip_ws(); + println!( + "b: {:?}, {:?}", + self.tokens.last(), + self.tokens.get(self.tokens.len() - 2) + ); + match self.current() { + Some(COMMA) => { + self.bump(); + AttrRes::Ok + } + Some(R_CURLY) => { + self.bump(); + res = AttrRes::Ok; + break; + } + Some(IDENT) => { + println!("wtf"); + self.syntax_err_by_checkpoint( + checkpoint_previous_end, + SyntaxError::ExpectedCommaBetweenAttrs, + ); + // self.syntax_err(SyntaxError::ExpectedCommaBetweenAttrs); + AttrRes::Ok + } + Some(_) => { + self.bump(); + AttrRes::Ok + } + None => { + res = AttrRes::Eof; + break; + } + } + } + Some(_) => { + self.bump(); + println!( + "c: {:?}, {:?}", + self.tokens.last(), + self.tokens.get(self.tokens.len() - 2) + ); + AttrRes::Ok + } + None => { + res = AttrRes::Eof; + break; + } + } + } + AttrRes::Eof => { + res = AttrRes::Eof; + break; + } + AttrRes::RCurly => { + res = AttrRes::RCurly; + break; + } + } + } + println!("toks_left: {:?}", self.tokens); + res + } + + fn attr(&mut self) -> AttrRes { + self.skip_ws(); + self.start_node(ATTR); + self.start_node(ATTR_NAME); + match self.current() { + Some(IDENT) => self.bump(), + Some(R_CURLY) => return AttrRes::Ok, + Some(_) => self.expected(IDENT), + None => return AttrRes::Eof, + } + self.finish_node(); + self.skip_ws(); + match self.current() { + Some(COLON) => self.bump(), + Some(R_CURLY) => { + self.expected(COLON); + return AttrRes::RCurly; + } + Some(_) => self.expected(COLON), + None => return AttrRes::Eof, + } + self.skip_ws(); + self.start_node(ATTR_VALUE); + match self.expr(None) { + ExprRes::Ok => self.bump(), + ExprRes::Eof => return AttrRes::Eof, + ExprRes::NoExpr => match self.current() { + Some(COMMA) => self.syntax_err(SyntaxError::AttrExpectedValue), + Some(R_CURLY) => { + self.syntax_err(SyntaxError::AttrExpectedValue); + return AttrRes::RCurly; + } + Some(_) => self.expected(EXPR), + None => unreachable!(), + }, + } + self.finish_node(); + self.finish_node(); + AttrRes::Ok + } + } + + #[derive(PartialEq, Eq)] + pub enum AttrsetRes { + Ok, + Eof, + } + + #[derive(PartialEq, Eq)] + enum AttrRes { + Ok, + Eof, + RCurly, + } +} + +mod instr { + use super::Parser; + use crate::parser::{ + ast::lossless::{lex::SyntaxKind::*, parser::expr::ExprRes}, + Span, + }; + + impl Parser<'_> { + pub(super) fn instr(&mut self) -> NodeRes { + assert_eq!(self.current(), Some(IDENT)); + self.skip_ws(); + self.start_node(INSTR); + self.instr_name(); + + // used to count positionals + let mut i = 0; + let params_checkpoint = self.builder.checkpoint(); + loop { + match self.expr(None) { + ExprRes::Ok => { + i += 1; + continue; + } + ExprRes::NoExpr | ExprRes::Eof => break, + } + } + if i >= 1 { + self.builder + .start_node_at(params_checkpoint, INSTR_PARAMS.into()); + self.finish_node(); + } + self.finish_node(); + NodeRes::Ok + } + + fn instr_name(&mut self) { + self.start_node(INSTR_NAME); + while self.current() == Some(IDENT) { + self.bump(); + self.skip_ws_without_newlines(); + } + self.finish_node(); + } + } + + pub(super) enum NodeRes { + Ok, + Eof, + } } diff --git a/flake.lock b/flake.lock index 9719001..7c3a06d 100644 --- a/flake.lock +++ b/flake.lock @@ -11,11 +11,11 @@ "pre-commit-hooks": "pre-commit-hooks" }, "locked": { - "lastModified": 1712055811, - "narHash": "sha256-7FcfMm5A/f02yyzuavJe06zLa9hcMHsagE28ADcmQvk=", + "lastModified": 1710475558, + "narHash": "sha256-egKrPCKjy/cE+NqCj4hg2fNX/NwLCf0bRDInraYXDgs=", "owner": "cachix", "repo": "cachix", - "rev": "02e38da89851ec7fec3356a5c04bc8349cae0e30", + "rev": "661bbb7f8b55722a0406456b15267b5426a3bda6", "type": "github" }, "original": { @@ -33,11 +33,11 @@ "pre-commit-hooks": "pre-commit-hooks_2" }, "locked": { - "lastModified": 1712925466, - "narHash": "sha256-MJ6VxGNu/ftbn8SErJjBz80FUNXkZfcObHg/JP7wwAc=", + "lastModified": 1712724616, + "narHash": "sha256-qs9uEbrOpp6oXcDOp5cpilyU52t78ZpEPATtaHRVLIU=", "owner": "cachix", "repo": "devenv", - "rev": "1af93652caf48bfeef6ba7d1cf59fc66e506e5c2", + "rev": "d1a11d14dbe96a03c7f9068e4d3af05f283734e0", "type": "github" }, "original": { @@ -83,11 +83,11 @@ "rust-analyzer-src": "rust-analyzer-src" }, "locked": { - "lastModified": 1712903033, - "narHash": "sha256-KcvsEm0h1mIwBHFAzWFBjGihnbf2fxpAaXOdVbUfAI4=", + "lastModified": 1712730246, + "narHash": "sha256-iB8bFj+07RHpmt+XuGGvYQk2Iwm12u6+DklGq/+Tg5s=", "owner": "nix-community", "repo": "fenix", - "rev": "c739f83545e625227f4d0af7fe2a71e69931fa4c", + "rev": "d402ae4a5e5676722290470f61a5e8e3155b5487", "type": "github" }, "original": { @@ -335,11 +335,11 @@ "nixpkgs-regression": "nixpkgs-regression_2" }, "locked": { - "lastModified": 1712911606, - "narHash": "sha256-BGvBhepCufsjcUkXnEEXhEVjwdJAwPglCC2+bInc794=", + "lastModified": 1710500156, + "narHash": "sha256-zvCqeUO2GLOm7jnU23G4EzTZR7eylcJN+HJ5svjmubI=", "owner": "domenkozar", "repo": "nix", - "rev": "b24a9318ea3f3600c1e24b4a00691ee912d4de12", + "rev": "c5bbf14ecbd692eeabf4184cc8d50f79c2446549", "type": "github" }, "original": { @@ -431,11 +431,11 @@ }, "nixpkgs_2": { "locked": { - "lastModified": 1710796454, - "narHash": "sha256-lQlICw60RhH8sHTDD/tJiiJrlAfNn8FDI9c+7G2F0SE=", + "lastModified": 1710236354, + "narHash": "sha256-vWrciFdq49vve43g4pbi7NjmL4cwG1ifXnQx+dU3T5E=", "owner": "cachix", "repo": "devenv-nixpkgs", - "rev": "06fb0f1c643aee3ae6838dda3b37ef0abc3c763b", + "rev": "829e73affeadfb4198a7105cbe3a03153d13edc9", "type": "github" }, "original": { @@ -447,11 +447,11 @@ }, "nixpkgs_3": { "locked": { - "lastModified": 1712791164, - "narHash": "sha256-3sbWO1mbpWsLepZGbWaMovSO7ndZeFqDSdX0hZ9nVyw=", + "lastModified": 1712608508, + "narHash": "sha256-vMZ5603yU0wxgyQeHJryOI+O61yrX2AHwY6LOFyV1gM=", "owner": "nixos", "repo": "nixpkgs", - "rev": "1042fd8b148a9105f3c0aca3a6177fd1d9360ba5", + "rev": "4cba8b53da471aea2ab2b0c1f30a81e7c451f4b6", "type": "github" }, "original": { @@ -463,11 +463,11 @@ }, "nixpkgs_4": { "locked": { - "lastModified": 1712791164, - "narHash": "sha256-3sbWO1mbpWsLepZGbWaMovSO7ndZeFqDSdX0hZ9nVyw=", + "lastModified": 1712608508, + "narHash": "sha256-vMZ5603yU0wxgyQeHJryOI+O61yrX2AHwY6LOFyV1gM=", "owner": "NixOS", "repo": "nixpkgs", - "rev": "1042fd8b148a9105f3c0aca3a6177fd1d9360ba5", + "rev": "4cba8b53da471aea2ab2b0c1f30a81e7c451f4b6", "type": "github" }, "original": { @@ -543,11 +543,11 @@ "nixpkgs-stable": "nixpkgs-stable_2" }, "locked": { - "lastModified": 1712897695, - "narHash": "sha256-nMirxrGteNAl9sWiOhoN5tIHyjBbVi5e2tgZUgZlK3Y=", + "lastModified": 1712055707, + "narHash": "sha256-4XLvuSIDZJGS17xEwSrNuJLL7UjDYKGJSbK1WWX2AK8=", "owner": "cachix", "repo": "pre-commit-hooks.nix", - "rev": "40e6053ecb65fcbf12863338a6dcefb3f55f1bf8", + "rev": "e35aed5fda3cc79f88ed7f1795021e559582093a", "type": "github" }, "original": { @@ -567,11 +567,11 @@ "rust-analyzer-src": { "flake": false, "locked": { - "lastModified": 1712818880, - "narHash": "sha256-VDxsvgj/bNypHq48tQWtc3VRbWvzlFjzKf9ZZIVO10Y=", + "lastModified": 1712663608, + "narHash": "sha256-tN9ZL6kGppmHg84lxlpAlaN+kXWNctKK7Yitq/iXDEw=", "owner": "rust-lang", "repo": "rust-analyzer", - "rev": "657b33b0cb9bd49085202e91ad5b4676532c9140", + "rev": "a5feb4f05f09adca661c869b1bf2324898cbaa43", "type": "github" }, "original": { diff --git a/flake.nix b/flake.nix index caad7d6..3c8d815 100644 --- a/flake.nix +++ b/flake.nix @@ -15,7 +15,6 @@ self, nixpkgs, devenv, - fenix, systems, ... } @ inputs: let @@ -25,11 +24,6 @@ forEachSystem (system: let pkgs = nixpkgs.legacyPackages.${system}; - toolchain = with fenix.packages.${system}; - combine [ - default.toolchain - rust-analyzer - ]; in { default = devenv.lib.mkShell { inherit inputs pkgs; @@ -39,18 +33,17 @@ config, ... }: { - # languages.rust = { - # enable = true; - # channel = "nightly"; - # components = [ - # "rustc" - # "cargo" - # "clippy" - # "rustfmt" - # "rust-src" - # "rust-analyzer" - # ]; - # }; + languages.rust = { + enable = true; + channel = "nightly"; + components = [ + "rustc" + "cargo" + "clippy" + "rustfmt" + "rust-src" + ]; + }; pre-commit.hooks = { clippy.enable = false; @@ -66,7 +59,7 @@ mold cargo-nextest cargo-watch - toolchain + rust-analyzer ]; }) ]; diff --git a/testfiles/test.owo b/testfiles/test.owo index 78cbbda..3662b45 100644 --- a/testfiles/test.owo +++ b/testfiles/test.owo @@ -1 +1,4 @@ -hello world test 42 3.14 "uwu" +meow mew meow 5 3.14 "uwu" { + meow: test 24 + another: hi "hello", +} "awa"