lang: parsing to events now

This commit is contained in:
Schrottkatze 2024-04-12 20:55:55 +02:00
parent f7b61f9e0e
commit 1711d17fa6
Signed by: schrottkatze
SSH key fingerprint: SHA256:hXb3t1vINBFCiDCmhRABHX5ocdbLiKyCdKI4HK2Rbbc
8 changed files with 176 additions and 383 deletions

View file

@ -7,7 +7,7 @@ edition = "2021"
[dependencies]
logos = "0.14"
chumsky = {version= "1.0.0-alpha.7", features=["label"]}
chumsky = {version= "1.0.0-alpha.7", features=["label", "extension"]}
petgraph = { workspace = true}
indexmap = "2.2.6"
clap = { version = "4", features = ["derive"] }

View file

@ -1,3 +1,4 @@
#![feature(type_alias_impl_trait)]
pub mod err_reporting;
pub mod parser;
pub mod tokens;

View file

@ -5,7 +5,7 @@ use lang::{
err_reporting::ErrorCollector,
parser::ast::lossless::{
lex,
parser::{self, parser_to_events::to_events},
parser::{self, parse},
},
};
@ -20,7 +20,7 @@ fn main() {
let n = args.file.clone();
let f = fs::read_to_string(n.clone()).expect("failed to read file");
println!("toks: {:?}", lex::lex(&f));
println!("evs: {:?}", to_events(&f));
println!("parse res: {:?}", parse(&f));
// let parse_res = parser::parse(&f);
// println!("parse: {:?}", parse_res);

View file

@ -10,7 +10,6 @@ pub fn lex(src: &str) -> Vec<(SyntaxKind, &str)> {
r.push((tok_res.unwrap_or(SyntaxKind::LEX_ERR), lex.slice()))
}
r.reverse();
r
}

View file

@ -9,24 +9,29 @@ use crate::parser::{
Span,
};
use self::parser_to_events::{to_events, Event};
use super::lex::{self, SyntaxKind};
pub mod parser_to_events {
use chumsky::Parser;
use chumsky::prelude::*;
use crate::parser::ast::lossless::lex::SyntaxKind::{self, *};
use crate::parser::ast::lossless::lex::{
self,
SyntaxKind::{self, *},
};
#[derive(Debug, PartialEq, Eq)]
pub(super) enum Event {
StartNode,
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
pub enum Event {
StartNode(SyntaxKind),
StartErr(SyntaxError),
EatToken,
FinishNode,
FinishErr,
}
#[derive(Debug, PartialEq, Eq)]
enum SyntaxError {
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
pub enum SyntaxError {
Expected(SyntaxKind),
AttrExpectedValue,
/// guessed if there's a newline and attr on next line without comma
@ -34,14 +39,107 @@ pub mod parser_to_events {
ExpectedCommaBetweenAttrs,
}
pub fn to_events(src: &str) -> Vec<Event> {
let mut tokens = lex::lex(src);
parser().parse(tokens)
pub fn to_events(tokens: &[(SyntaxKind, &str)]) -> Vec<Event> {
let mut only_toks: Vec<SyntaxKind> = tokens.iter().map(|(t, _)| *t).collect();
let res = parser().parse(&only_toks);
res.unwrap()
}
pub fn parser() -> impl Parser<'static, SyntaxKind, Vec<Event>> {
let whitespace = just(WHITESPACE).or(NEWLINE).repeated().collect::<Vec<_>>();
whitespace
macro_rules! padded {
($parser:expr) => {{
let ws = one_of([WHITESPACE, NEWLINE])
.to(Event::EatToken)
.repeated()
.collect::<Vec<Event>>();
ws.then($parser)
.then(ws)
.map(|((mut before, mut c), mut after)| {
before.append(&mut c);
before.append(&mut after);
before
})
}};
}
pub fn parser<'toks>() -> impl Parser<'toks, &'toks [SyntaxKind], Vec<Event>> {
let ws = one_of([WHITESPACE, NEWLINE])
.to(Event::EatToken)
.repeated()
.collect::<Vec<Event>>();
// let ws_without_newlines = just(WHITESPACE)
// .to(Event::EatToken)
// .repeated()
// .collect::<Vec<Event>>();
let parenthesized = |c| {
just(L_PAREN)
.to(vec![Event::EatToken])
.then(c)
.then(just(R_PAREN).to(vec![Event::EatToken]))
.map(|((mut before, mut c), mut after)| {
before.append(&mut c);
before.append(&mut after);
before
})
};
let expr = recursive(|expr| {
let lit = one_of([INT_NUM, FLOAT_NUM, STRING]).to(vec![
Event::StartNode(EXPR),
Event::EatToken,
Event::FinishNode,
]);
let atom = lit.clone().or(parenthesized(expr));
let ident = just(IDENT).to(vec![Event::EatToken]);
let instr_name = ident
.clone()
.map(|mut v| {
v.insert(0, Event::StartNode(INSTR_NAME));
v
})
.foldl(
ws.then(ident).repeated(),
|mut ident, (mut ws, mut next)| {
ident.append(&mut ws);
ident.append(&mut next);
ident
},
)
.map(|mut v| {
v.push(Event::FinishNode);
v
});
let instr = padded!(instr_name)
.then(
atom.clone()
.map(|mut v| {
v.insert(0, Event::StartNode(INSTR_PARAMS));
v
})
.foldl(
ws.then(atom.clone()).repeated(),
|mut cur, (mut ws, mut next)| {
cur.append(&mut ws);
cur.append(&mut next);
cur
},
)
.map(|mut v| {
v.push(Event::FinishNode);
v
}),
)
.map(|(mut name, mut params)| {
name.insert(0, Event::StartNode(INSTR));
name.append(&mut params);
name.push(Event::FinishNode);
name
});
padded!(instr.or(lit).or(atom))
});
expr
// .map(|(lit, mut ev)| lit.append(&mut ev));
}
}
@ -99,7 +197,7 @@ enum SyntaxError {
}
pub fn parse(src: &str) -> Parse {
let mut tokens = lex::lex(src);
let tokens = lex::lex(src);
Parser {
tokens,
builder: GreenNodeBuilder::new(),
@ -110,12 +208,19 @@ pub fn parse(src: &str) -> Parse {
impl Parser<'_> {
fn parse(mut self) -> Parse {
self.start_node(ROOT);
let evs = to_events(&self.tokens);
self.builder.start_node(ROOT.into());
match self.expr(None) {
expr::ExprRes::Ok => (),
expr::ExprRes::Eof => (),
expr::ExprRes::NoExpr => todo!(),
self.tokens.reverse();
for ev in evs {
match ev {
Event::StartNode(kind) => self.builder.start_node(kind.into()),
Event::StartErr(SyntaxError) => todo!(),
Event::EatToken => self.bump(),
Event::FinishNode => self.builder.finish_node(),
Event::FinishErr => todo!(),
}
}
self.builder.finish_node();
@ -124,13 +229,6 @@ impl Parser<'_> {
}
}
fn start_node(&mut self, kind: SyntaxKind) {
self.builder.start_node(kind.into());
}
fn finish_node(&mut self) {
self.builder.finish_node();
}
/// Advance one token, adding it to the current branch of the tree builder.
fn bump(&mut self) {
let (kind, text) = self.tokens.pop().unwrap();
@ -143,7 +241,7 @@ impl Parser<'_> {
}
fn syntax_err_by_checkpoint(&mut self, checkpoint: Checkpoint, err: SyntaxError) {
self.builder.start_node_at(checkpoint, PARSE_ERR.into());
self.finish_node();
self.builder.finish_node();
self.errors.push(err);
}
fn expected(&mut self, expected: SyntaxKind) {
@ -158,313 +256,4 @@ impl Parser<'_> {
.get(self.tokens.len() - 2)
.map(|(kind, _)| *kind)
}
fn skip_ws(&mut self) {
while self.current() == Some(WHITESPACE) || self.current() == Some(NEWLINE) {
self.bump()
}
}
fn skip_ws_without_newlines(&mut self) {
while self.current() == Some(WHITESPACE) {
self.bump()
}
}
}
mod expr {
use rowan::Checkpoint;
use super::{attrset::AttrsetRes, instr::NodeRes, Parser};
use crate::parser::{ast::lossless::lex::SyntaxKind::*, Span};
impl Parser<'_> {
pub(super) fn expr(&mut self, start: Option<Checkpoint>) -> ExprRes {
self.skip_ws();
let start = start.unwrap_or_else(|| self.builder.checkpoint());
match self.current() {
Some(IDENT) => {
let expr_res = match self.instr() {
NodeRes::Ok => ExprRes::Ok,
NodeRes::Eof => ExprRes::Eof,
};
self.builder.start_node_at(start, EXPR.into());
self.finish_node();
expr_res
}
Some(_) => self.atom(Some(start)),
None => ExprRes::Eof,
}
}
pub(super) fn atom(&mut self, start: Option<Checkpoint>) -> ExprRes {
self.skip_ws();
let start = start.unwrap_or_else(|| self.builder.checkpoint());
match self.current() {
Some(INT_NUM | FLOAT_NUM | STRING) => {
self.bump();
self.builder.start_node_at(start, EXPR.into());
self.finish_node();
ExprRes::Ok
}
Some(L_CURLY) => match self.attrset(start) {
AttrsetRes::Ok => ExprRes::Ok,
AttrsetRes::Eof => ExprRes::Eof,
},
Some(L_PAREN) => {
self.builder.start_node_at(start, PARENTHESIZED_EXPR.into());
self.bump();
self.expr(None);
self.skip_ws();
match self.current() {
Some(R_PAREN) => ExprRes::Ok,
Some(_) => todo!(),
None => ExprRes::Eof,
}
}
Some(_) => ExprRes::NoExpr,
None => ExprRes::Eof,
}
}
}
pub enum ExprRes {
Ok,
Eof,
/// isnt an expression
NoExpr,
}
}
mod attrset {
use chumsky::container::Container;
use rowan::Checkpoint;
use super::{expr::ExprRes, instr::NodeRes, Parser};
use crate::parser::{
ast::lossless::{lex::SyntaxKind::*, parser::SyntaxError},
Span,
};
impl Parser<'_> {
pub(super) fn attrset(&mut self, checkpoint: Checkpoint) -> AttrsetRes {
assert_eq!(self.current(), Some(L_CURLY));
self.bump();
self.skip_ws();
match self.current() {
Some(R_CURLY) => {
self.builder.start_node_at(checkpoint, ATTR_SET.into());
self.bump();
self.finish_node();
AttrsetRes::Ok
}
Some(_) => {
self.builder.start_node_at(checkpoint, ATTR_SET.into());
let res = match self.attrs() {
AttrRes::Eof => AttrsetRes::Eof,
AttrRes::RCurly | AttrRes::Ok => {
println!("curr: {:?}", self.current());
AttrsetRes::Ok
}
};
self.finish_node();
res
}
None => AttrsetRes::Eof,
}
// self.start_node(ATTR);
}
fn attrs(&mut self) -> AttrRes {
let mut res = AttrRes::Ok;
while res == AttrRes::Ok {
println!("it: {:?}", self.tokens.last());
match self.attr() {
AttrRes::Ok => {
self.skip_ws_without_newlines();
println!(
"a: {:?}, {:?}",
self.tokens.last(),
self.tokens.get(self.tokens.len() - 2)
);
println!("errs: {:?}", self.errors);
res = AttrRes::Ok;
let checkpoint_previous_end = self.builder.checkpoint();
res = match self.current() {
Some(COMMA) => {
self.bump();
AttrRes::Ok
}
Some(R_CURLY) => {
self.bump();
res = AttrRes::Ok;
break;
}
Some(NEWLINE) => {
self.skip_ws();
println!(
"b: {:?}, {:?}",
self.tokens.last(),
self.tokens.get(self.tokens.len() - 2)
);
match self.current() {
Some(COMMA) => {
self.bump();
AttrRes::Ok
}
Some(R_CURLY) => {
self.bump();
res = AttrRes::Ok;
break;
}
Some(IDENT) => {
println!("wtf");
self.syntax_err_by_checkpoint(
checkpoint_previous_end,
SyntaxError::ExpectedCommaBetweenAttrs,
);
// self.syntax_err(SyntaxError::ExpectedCommaBetweenAttrs);
AttrRes::Ok
}
Some(_) => {
self.bump();
AttrRes::Ok
}
None => {
res = AttrRes::Eof;
break;
}
}
}
Some(_) => {
self.bump();
println!(
"c: {:?}, {:?}",
self.tokens.last(),
self.tokens.get(self.tokens.len() - 2)
);
AttrRes::Ok
}
None => {
res = AttrRes::Eof;
break;
}
}
}
AttrRes::Eof => {
res = AttrRes::Eof;
break;
}
AttrRes::RCurly => {
res = AttrRes::RCurly;
break;
}
}
}
println!("toks_left: {:?}", self.tokens);
res
}
fn attr(&mut self) -> AttrRes {
self.skip_ws();
self.start_node(ATTR);
self.start_node(ATTR_NAME);
match self.current() {
Some(IDENT) => self.bump(),
Some(R_CURLY) => return AttrRes::Ok,
Some(_) => self.expected(IDENT),
None => return AttrRes::Eof,
}
self.finish_node();
self.skip_ws();
match self.current() {
Some(COLON) => self.bump(),
Some(R_CURLY) => {
self.expected(COLON);
return AttrRes::RCurly;
}
Some(_) => self.expected(COLON),
None => return AttrRes::Eof,
}
self.skip_ws();
self.start_node(ATTR_VALUE);
match self.expr(None) {
ExprRes::Ok => self.bump(),
ExprRes::Eof => return AttrRes::Eof,
ExprRes::NoExpr => match self.current() {
Some(COMMA) => self.syntax_err(SyntaxError::AttrExpectedValue),
Some(R_CURLY) => {
self.syntax_err(SyntaxError::AttrExpectedValue);
return AttrRes::RCurly;
}
Some(_) => self.expected(EXPR),
None => unreachable!(),
},
}
self.finish_node();
self.finish_node();
AttrRes::Ok
}
}
#[derive(PartialEq, Eq)]
pub enum AttrsetRes {
Ok,
Eof,
}
#[derive(PartialEq, Eq)]
enum AttrRes {
Ok,
Eof,
RCurly,
}
}
mod instr {
use super::Parser;
use crate::parser::{
ast::lossless::{lex::SyntaxKind::*, parser::expr::ExprRes},
Span,
};
impl Parser<'_> {
pub(super) fn instr(&mut self) -> NodeRes {
assert_eq!(self.current(), Some(IDENT));
self.skip_ws();
self.start_node(INSTR);
self.instr_name();
// used to count positionals
let mut i = 0;
let params_checkpoint = self.builder.checkpoint();
loop {
match self.expr(None) {
ExprRes::Ok => {
i += 1;
continue;
}
ExprRes::NoExpr | ExprRes::Eof => break,
}
}
if i >= 1 {
self.builder
.start_node_at(params_checkpoint, INSTR_PARAMS.into());
self.finish_node();
}
self.finish_node();
NodeRes::Ok
}
fn instr_name(&mut self) {
self.start_node(INSTR_NAME);
while self.current() == Some(IDENT) {
self.bump();
self.skip_ws_without_newlines();
}
self.finish_node();
}
}
pub(super) enum NodeRes {
Ok,
Eof,
}
}

View file

@ -11,11 +11,11 @@
"pre-commit-hooks": "pre-commit-hooks"
},
"locked": {
"lastModified": 1710475558,
"narHash": "sha256-egKrPCKjy/cE+NqCj4hg2fNX/NwLCf0bRDInraYXDgs=",
"lastModified": 1712055811,
"narHash": "sha256-7FcfMm5A/f02yyzuavJe06zLa9hcMHsagE28ADcmQvk=",
"owner": "cachix",
"repo": "cachix",
"rev": "661bbb7f8b55722a0406456b15267b5426a3bda6",
"rev": "02e38da89851ec7fec3356a5c04bc8349cae0e30",
"type": "github"
},
"original": {
@ -33,11 +33,11 @@
"pre-commit-hooks": "pre-commit-hooks_2"
},
"locked": {
"lastModified": 1712724616,
"narHash": "sha256-qs9uEbrOpp6oXcDOp5cpilyU52t78ZpEPATtaHRVLIU=",
"lastModified": 1712925466,
"narHash": "sha256-MJ6VxGNu/ftbn8SErJjBz80FUNXkZfcObHg/JP7wwAc=",
"owner": "cachix",
"repo": "devenv",
"rev": "d1a11d14dbe96a03c7f9068e4d3af05f283734e0",
"rev": "1af93652caf48bfeef6ba7d1cf59fc66e506e5c2",
"type": "github"
},
"original": {
@ -83,11 +83,11 @@
"rust-analyzer-src": "rust-analyzer-src"
},
"locked": {
"lastModified": 1712730246,
"narHash": "sha256-iB8bFj+07RHpmt+XuGGvYQk2Iwm12u6+DklGq/+Tg5s=",
"lastModified": 1712903033,
"narHash": "sha256-KcvsEm0h1mIwBHFAzWFBjGihnbf2fxpAaXOdVbUfAI4=",
"owner": "nix-community",
"repo": "fenix",
"rev": "d402ae4a5e5676722290470f61a5e8e3155b5487",
"rev": "c739f83545e625227f4d0af7fe2a71e69931fa4c",
"type": "github"
},
"original": {
@ -335,11 +335,11 @@
"nixpkgs-regression": "nixpkgs-regression_2"
},
"locked": {
"lastModified": 1710500156,
"narHash": "sha256-zvCqeUO2GLOm7jnU23G4EzTZR7eylcJN+HJ5svjmubI=",
"lastModified": 1712911606,
"narHash": "sha256-BGvBhepCufsjcUkXnEEXhEVjwdJAwPglCC2+bInc794=",
"owner": "domenkozar",
"repo": "nix",
"rev": "c5bbf14ecbd692eeabf4184cc8d50f79c2446549",
"rev": "b24a9318ea3f3600c1e24b4a00691ee912d4de12",
"type": "github"
},
"original": {
@ -431,11 +431,11 @@
},
"nixpkgs_2": {
"locked": {
"lastModified": 1710236354,
"narHash": "sha256-vWrciFdq49vve43g4pbi7NjmL4cwG1ifXnQx+dU3T5E=",
"lastModified": 1710796454,
"narHash": "sha256-lQlICw60RhH8sHTDD/tJiiJrlAfNn8FDI9c+7G2F0SE=",
"owner": "cachix",
"repo": "devenv-nixpkgs",
"rev": "829e73affeadfb4198a7105cbe3a03153d13edc9",
"rev": "06fb0f1c643aee3ae6838dda3b37ef0abc3c763b",
"type": "github"
},
"original": {
@ -447,11 +447,11 @@
},
"nixpkgs_3": {
"locked": {
"lastModified": 1712608508,
"narHash": "sha256-vMZ5603yU0wxgyQeHJryOI+O61yrX2AHwY6LOFyV1gM=",
"lastModified": 1712791164,
"narHash": "sha256-3sbWO1mbpWsLepZGbWaMovSO7ndZeFqDSdX0hZ9nVyw=",
"owner": "nixos",
"repo": "nixpkgs",
"rev": "4cba8b53da471aea2ab2b0c1f30a81e7c451f4b6",
"rev": "1042fd8b148a9105f3c0aca3a6177fd1d9360ba5",
"type": "github"
},
"original": {
@ -463,11 +463,11 @@
},
"nixpkgs_4": {
"locked": {
"lastModified": 1712608508,
"narHash": "sha256-vMZ5603yU0wxgyQeHJryOI+O61yrX2AHwY6LOFyV1gM=",
"lastModified": 1712791164,
"narHash": "sha256-3sbWO1mbpWsLepZGbWaMovSO7ndZeFqDSdX0hZ9nVyw=",
"owner": "NixOS",
"repo": "nixpkgs",
"rev": "4cba8b53da471aea2ab2b0c1f30a81e7c451f4b6",
"rev": "1042fd8b148a9105f3c0aca3a6177fd1d9360ba5",
"type": "github"
},
"original": {
@ -543,11 +543,11 @@
"nixpkgs-stable": "nixpkgs-stable_2"
},
"locked": {
"lastModified": 1712055707,
"narHash": "sha256-4XLvuSIDZJGS17xEwSrNuJLL7UjDYKGJSbK1WWX2AK8=",
"lastModified": 1712897695,
"narHash": "sha256-nMirxrGteNAl9sWiOhoN5tIHyjBbVi5e2tgZUgZlK3Y=",
"owner": "cachix",
"repo": "pre-commit-hooks.nix",
"rev": "e35aed5fda3cc79f88ed7f1795021e559582093a",
"rev": "40e6053ecb65fcbf12863338a6dcefb3f55f1bf8",
"type": "github"
},
"original": {
@ -567,11 +567,11 @@
"rust-analyzer-src": {
"flake": false,
"locked": {
"lastModified": 1712663608,
"narHash": "sha256-tN9ZL6kGppmHg84lxlpAlaN+kXWNctKK7Yitq/iXDEw=",
"lastModified": 1712818880,
"narHash": "sha256-VDxsvgj/bNypHq48tQWtc3VRbWvzlFjzKf9ZZIVO10Y=",
"owner": "rust-lang",
"repo": "rust-analyzer",
"rev": "a5feb4f05f09adca661c869b1bf2324898cbaa43",
"rev": "657b33b0cb9bd49085202e91ad5b4676532c9140",
"type": "github"
},
"original": {

View file

@ -15,6 +15,7 @@
self,
nixpkgs,
devenv,
fenix,
systems,
...
} @ inputs: let
@ -24,6 +25,11 @@
forEachSystem
(system: let
pkgs = nixpkgs.legacyPackages.${system};
toolchain = with fenix.packages.${system};
combine [
default.toolchain
rust-analyzer
];
in {
default = devenv.lib.mkShell {
inherit inputs pkgs;
@ -33,17 +39,18 @@
config,
...
}: {
languages.rust = {
enable = true;
channel = "nightly";
components = [
"rustc"
"cargo"
"clippy"
"rustfmt"
"rust-src"
];
};
# languages.rust = {
# enable = true;
# channel = "nightly";
# components = [
# "rustc"
# "cargo"
# "clippy"
# "rustfmt"
# "rust-src"
# "rust-analyzer"
# ];
# };
pre-commit.hooks = {
clippy.enable = false;
@ -59,7 +66,7 @@
mold
cargo-nextest
cargo-watch
rust-analyzer
toolchain
];
})
];

View file

@ -1,4 +1 @@
meow mew meow 5 3.14 "uwu" {
meow: test 24
another: hi "hello",
} "awa"
hello world test 42 3.14 "uwu"