lang: rewrite parser

This commit is contained in:
Schrottkatze 2024-04-24 11:07:38 +02:00
parent 6d8b79e8f7
commit 381ab45edc
Signed by: schrottkatze
SSH key fingerprint: SHA256:hXb3t1vINBFCiDCmhRABHX5ocdbLiKyCdKI4HK2Rbbc
25 changed files with 524 additions and 1161 deletions

215
Cargo.lock generated
View file

@ -8,33 +8,6 @@ version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe"
[[package]]
name = "ahash"
version = "0.8.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011"
dependencies = [
"cfg-if",
"once_cell",
"version_check",
"zerocopy",
]
[[package]]
name = "aho-corasick"
version = "1.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916"
dependencies = [
"memchr",
]
[[package]]
name = "allocator-api2"
version = "0.2.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0942ffc6dcaadf03badf6e6a2d0228460359d5e34b57ccdc720b7382dfbd5ec5"
[[package]] [[package]]
name = "anstream" name = "anstream"
version = "0.6.5" version = "0.6.5"
@ -160,31 +133,12 @@ version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b"
[[package]]
name = "cc"
version = "1.0.90"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8cd6604a82acf3039f1144f54b8eb34e91ffba622051189e71b781822d5ee1f5"
[[package]] [[package]]
name = "cfg-if" name = "cfg-if"
version = "1.0.0" version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "chumsky"
version = "1.0.0-alpha.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c7b80276986f86789dc56ca6542d53bba9cda3c66091ebbe7bd96fc1bdf20f1f"
dependencies = [
"hashbrown",
"regex-automata",
"serde",
"stacker",
"unicode-ident",
]
[[package]] [[package]]
name = "clap" name = "clap"
version = "4.4.12" version = "4.4.12"
@ -298,6 +252,40 @@ dependencies = [
"phf", "phf",
] ]
[[package]]
name = "darling"
version = "0.20.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "54e36fcd13ed84ffdfda6f5be89b31287cbb80c439841fe69e04841435464391"
dependencies = [
"darling_core",
"darling_macro",
]
[[package]]
name = "darling_core"
version = "0.20.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9c2cf1c23a687a1feeb728783b993c4e1ad83d99f351801977dd809b48d0a70f"
dependencies = [
"fnv",
"ident_case",
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "darling_macro"
version = "0.20.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a668eda54683121533a393014d8692171709ff57a7d61f187b6e782719f8933f"
dependencies = [
"darling_core",
"quote",
"syn",
]
[[package]] [[package]]
name = "deranged" name = "deranged"
version = "0.3.11" version = "0.3.11"
@ -328,6 +316,12 @@ dependencies = [
"windows-sys 0.48.0", "windows-sys 0.48.0",
] ]
[[package]]
name = "drop_bomb"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9bda8e21c04aca2ae33ffc2fd8c23134f3cac46db123ba97bd9d3f3b8a4a85e1"
[[package]] [[package]]
name = "ego-tree" name = "ego-tree"
version = "0.6.2" version = "0.6.2"
@ -340,6 +334,27 @@ version = "1.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a26ae43d7bcc3b814de94796a5e736d4029efb0ee900c12e2d54c993ad1a1e07" checksum = "a26ae43d7bcc3b814de94796a5e736d4029efb0ee900c12e2d54c993ad1a1e07"
[[package]]
name = "enumset"
version = "1.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "226c0da7462c13fb57e5cc9e0dc8f0635e7d27f276a3a7fd30054647f669007d"
dependencies = [
"enumset_derive",
]
[[package]]
name = "enumset_derive"
version = "0.8.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e08b6c6ab82d70f08844964ba10c7babb716de2ecaeab9be5717918a5177d3af"
dependencies = [
"darling",
"proc-macro2",
"quote",
"syn",
]
[[package]] [[package]]
name = "equivalent" name = "equivalent"
version = "1.0.1" version = "1.0.1"
@ -447,10 +462,6 @@ name = "hashbrown"
version = "0.14.3" version = "0.14.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "290f1a1d9242c78d09ce40a5e87e7554ee637af1351968159f4952f028f75604" checksum = "290f1a1d9242c78d09ce40a5e87e7554ee637af1351968159f4952f028f75604"
dependencies = [
"ahash",
"allocator-api2",
]
[[package]] [[package]]
name = "heck" name = "heck"
@ -458,6 +469,12 @@ version = "0.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8"
[[package]]
name = "ident_case"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39"
[[package]] [[package]]
name = "image" name = "image"
version = "0.24.7" version = "0.24.7"
@ -516,9 +533,10 @@ name = "lang"
version = "0.1.0" version = "0.1.0"
dependencies = [ dependencies = [
"ariadne", "ariadne",
"chumsky",
"clap", "clap",
"drop_bomb",
"ego-tree", "ego-tree",
"enumset",
"indexmap", "indexmap",
"logos", "logos",
"petgraph", "petgraph",
@ -584,7 +602,7 @@ dependencies = [
"lazy_static", "lazy_static",
"proc-macro2", "proc-macro2",
"quote", "quote",
"regex-syntax 0.8.2", "regex-syntax",
"syn", "syn",
] ]
@ -661,12 +679,6 @@ dependencies = [
"libc", "libc",
] ]
[[package]]
name = "once_cell"
version = "1.19.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92"
[[package]] [[package]]
name = "option-ext" name = "option-ext"
version = "0.2.0" version = "0.2.0"
@ -759,15 +771,6 @@ dependencies = [
"unicode-ident", "unicode-ident",
] ]
[[package]]
name = "psm"
version = "0.1.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5787f7cda34e3033a72192c018bc5883100330f362ef279a8cbccfce8bb4e874"
dependencies = [
"cc",
]
[[package]] [[package]]
name = "qoi" name = "qoi"
version = "0.4.1" version = "0.4.1"
@ -851,23 +854,6 @@ dependencies = [
"thiserror", "thiserror",
] ]
[[package]]
name = "regex-automata"
version = "0.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "59b23e92ee4318893fa3fe3e6fb365258efbfe6ac6ab30f090cdcbb7aa37efa9"
dependencies = [
"aho-corasick",
"memchr",
"regex-syntax 0.7.5",
]
[[package]]
name = "regex-syntax"
version = "0.7.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dbb5fb1acd8a1a18b3dd5be62d25485eb770e05afb408a9627d14d451bae12da"
[[package]] [[package]]
name = "regex-syntax" name = "regex-syntax"
version = "0.8.2" version = "0.8.2"
@ -975,19 +961,6 @@ dependencies = [
"lock_api", "lock_api",
] ]
[[package]]
name = "stacker"
version = "0.1.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c886bd4480155fd3ef527d45e9ac8dd7118a898a46530b7b94c3e21866259fce"
dependencies = [
"cc",
"cfg-if",
"libc",
"psm",
"winapi",
]
[[package]] [[package]]
name = "strsim" name = "strsim"
version = "0.10.0" version = "0.10.0"
@ -1090,12 +1063,6 @@ version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a" checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a"
[[package]]
name = "version_check"
version = "0.9.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
[[package]] [[package]]
name = "wasi" name = "wasi"
version = "0.11.0+wasi-snapshot-preview1" version = "0.11.0+wasi-snapshot-preview1"
@ -1108,28 +1075,6 @@ version = "0.1.7"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9193164d4de03a926d909d3bc7c30543cecb35400c02114792c2cae20d5e2dbb" checksum = "9193164d4de03a926d909d3bc7c30543cecb35400c02114792c2cae20d5e2dbb"
[[package]]
name = "winapi"
version = "0.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
dependencies = [
"winapi-i686-pc-windows-gnu",
"winapi-x86_64-pc-windows-gnu",
]
[[package]]
name = "winapi-i686-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
[[package]]
name = "winapi-x86_64-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
[[package]] [[package]]
name = "windows-sys" name = "windows-sys"
version = "0.48.0" version = "0.48.0"
@ -1268,26 +1213,6 @@ version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "09041cd90cf85f7f8b2df60c646f853b7f535ce68f85244eb6731cf89fa498ec" checksum = "09041cd90cf85f7f8b2df60c646f853b7f535ce68f85244eb6731cf89fa498ec"
[[package]]
name = "zerocopy"
version = "0.7.32"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "74d4d3961e53fa4c9a25a8637fc2bfaf2595b3d3ae34875568a5cf64787716be"
dependencies = [
"zerocopy-derive",
]
[[package]]
name = "zerocopy-derive"
version = "0.7.32"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9ce1b18ccd8e73a9321186f97e46f9f04b778851177567b1975109d26a08d2a6"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]] [[package]]
name = "zune-inflate" name = "zune-inflate"
version = "0.2.54" version = "0.2.54"

View file

@ -7,13 +7,14 @@ edition = "2021"
[dependencies] [dependencies]
logos = "0.14" logos = "0.14"
chumsky = {version= "1.0.0-alpha.7", features=["label", "extension"]}
petgraph = { workspace = true} petgraph = { workspace = true}
indexmap = "2.2.6" indexmap = "2.2.6"
clap = { version = "4", features = ["derive"] } clap = { version = "4", features = ["derive"] }
ariadne = "0.4.0" ariadne = "0.4.0"
ego-tree = "0.6.2" ego-tree = "0.6.2"
rowan = "0.15.15" rowan = "0.15.15"
drop_bomb = "0.1.5"
enumset = "1.1.3"
[lints] [lints]
workspace = true workspace = true

View file

@ -1,88 +0,0 @@
use std::{collections::HashMap, fs};
use ariadne::{sources, Label, Report, Source};
use chumsky::{
error::{self, Rich},
ParseResult,
};
use indexmap::IndexMap;
use crate::{
parser::{ast::File, Span},
tokens::Token,
};
#[derive(Debug, PartialEq, Eq, Hash)]
pub enum Stage {
Lex,
Parse,
}
impl Stage {
fn variants() -> [Stage; 2] {
[Stage::Lex, Stage::Parse]
}
}
pub struct ErrorCollector<'filename, 'tokens, 'src> {
files: HashMap<&'filename str, &'src str>,
raw_errors: IndexMap<(&'filename str, Stage), Vec<error::Rich<'tokens, Token<'src>, Span>>>,
}
impl<'filename, 'tokens, 'src> ErrorCollector<'filename, 'tokens, 'src> {
pub fn new(files: Vec<(&'filename str, &'src str)>) -> Self {
Self {
files: HashMap::from_iter(files.clone()),
raw_errors: files
.iter()
.flat_map(|(name, _)| Stage::variants().map(|s| (name, s)))
.map(|(name, stage)| ((*name, stage), Vec::new()))
.collect(),
}
}
pub fn insert_many(
&mut self,
file: &'filename str,
curr_stage: Stage,
mut errs: Vec<error::Rich<'tokens, Token<'src>, Span>>,
) {
let err_vec = self
.raw_errors
.get_mut(&(file, curr_stage))
.expect("filename should exist");
err_vec.append(&mut errs);
}
pub fn analyze_and_report(self) {
let ErrorCollector { files, raw_errors } = self;
todo!()
}
pub fn report_raw(self) {
let ErrorCollector { files, raw_errors } = self;
for ((file, stage), errs) in raw_errors.into_iter() {
for err in errs {
eprintln!("e: {err:?}");
Report::build(ariadne::ReportKind::Error, file, err.span().start)
.with_message(format!("error at stage {stage:?}, {:?}", err.reason()))
.with_label(
Label::new((file, err.span().into_range())).with_message(format!(
"found: {:?}",
err.found().expect("errors should have a reason")
)),
)
.with_help(format!(
"expected: {:?}",
err.expected().collect::<Vec<_>>()
))
.finish()
.print((file, Source::from(files[file])));
}
}
}
}
#[derive(Debug, PartialEq, Eq)]
struct Loc<'filename>(&'filename str, Span);

View file

@ -1,4 +1,2 @@
#![feature(type_alias_impl_trait)] #![feature(type_alias_impl_trait, lint_reasons)]
pub mod err_reporting;
pub mod parser; pub mod parser;
pub mod tokens;

View file

@ -1,12 +1,9 @@
use clap::Parser;
use std::{fs, path::PathBuf}; use std::{fs, path::PathBuf};
use clap::Parser; use lang::parser::{
use lang::{ parser::{self, grammar, input, output::Output},
err_reporting::ErrorCollector, syntax_kind,
parser::ast::lossless::{
lex,
parser::{self, parse},
},
}; };
#[derive(Parser)] #[derive(Parser)]
@ -19,8 +16,17 @@ fn main() {
let args = Args::parse(); let args = Args::parse();
let n = args.file.clone(); let n = args.file.clone();
let f = fs::read_to_string(n.clone()).expect("failed to read file"); let f = fs::read_to_string(n.clone()).expect("failed to read file");
println!("toks: {:?}", lex::lex(&f));
println!("parse res: {:?}", parse(&f)); let toks = dbg!(syntax_kind::lex(&f));
let input = input::Input::new(&toks);
let mut parser = parser::Parser::new(input);
grammar::source_file(&mut parser);
let p_out = dbg!(parser.finish());
let o = Output::from_parser_output(toks, p_out);
println!("Out: {:?}", o);
// let parse_res = parser::parse(&f); // let parse_res = parser::parse(&f);
// println!("parse: {:?}", parse_res); // println!("parse: {:?}", parse_res);

View file

@ -1,152 +1,143 @@
use chumsky::{ use drop_bomb::DropBomb;
error::Rich,
input::{Stream, ValueInput},
prelude::*,
primitive::just,
recursive::recursive,
span::SimpleSpan,
IterParser,
};
use indexmap::IndexMap;
use logos::Logos;
use crate::tokens::Token; use self::{error::SyntaxError, events::Event, input::Input, syntax_kind::SyntaxKind};
pub mod ast; pub mod syntax_kind;
#[cfg(test)] #[cfg(test)]
mod tests; mod tests;
use self::ast::{
raw_ast::{RawExpr, RawExpression},
File,
};
pub type Span = SimpleSpan; pub mod error;
pub type Spanned<T> = (T, Span); pub mod events;
pub mod grammar;
pub mod input;
pub mod output;
pub fn parse(src: &str) -> ParseResult<File<'_>, Rich<'_, Token<'_>>> { pub struct Parser<'src, 'toks> {
let toks: Vec<_> = Token::lexer(src) input: Input<'src, 'toks>,
.spanned() pos: usize,
.map(|(t, s)| (t.expect("TODO: add lexer error(s)"), Span::from(s))) events: Vec<Event>,
.collect(); errors: Vec<SyntaxError>,
let tok_stream = Stream::from_iter(toks).spanned((src.len()..src.len()).into());
parser().parse(tok_stream)
} }
pub(crate) fn parser<
'tokens,
'src: 'tokens,
I: ValueInput<'tokens, Token = Token<'src>, Span = Span>,
>() -> impl Parser<'tokens, I, File<'src>, extra::Err<Rich<'tokens, Token<'src>, Span>>> {
let word = select! { Token::Word(word) = e => (word, e.span())};
let expr = recursive(|expr| { impl<'src, 'toks> Parser<'src, 'toks> {
let lit = select! { pub fn new(input: Input<'src, 'toks>) -> Self {
Token::Int(i) = e => RawExpression::new(RawExpr::Lit(ast::Lit::Int(i.parse().expect("TODO: handle better"))), e.span()), Self {
Token::Float(f) = e => RawExpression::new(RawExpr::Lit(ast::Lit::Float(f.parse().expect("TODO: handle better"))), e.span()), input,
Token::String(s) = e => RawExpression::new(RawExpr::Lit(ast::Lit::String(s.strip_prefix('"').expect("a").strip_suffix('"').expect("b"))), e.span()) pos: 0,
}; events: Vec::new(),
let mat = just(Token::Mat) errors: Vec::new(),
.ignore_then(select! { Token::Dimensions(dimensions) = e => (dimensions, e.span())}) }
.then(
lit.separated_by(just(Token::Comma))
.collect::<Vec<_>>()
.separated_by(just(Token::Semicolon))
.collect::<Vec<_>>()
.delimited_by(just(Token::BracketOpen), just(Token::BracketClose)),
)
.map_with(|(dimensions, data), e| {
// TODO: Validation and proper error handling/reporting
// (validation = validating the matrix dimensions)
RawExpression::new(
RawExpr::Matrix(dimensions, data.into_iter().flatten().collect()),
e.span(),
)
});
let var = select! {
Token::VarIdent(name) => (RawExpr::Var as fn(_) -> _, name),
Token::InputIdent(name) => (RawExpr::InputVar as fn(_) -> _, name)
} }
.map_with(|(item_type, name), extra| RawExpression::new(item_type(name), extra.span()))
.labelled("variable");
let attrset = word pub fn finish(self) -> (Vec<Event>, Vec<SyntaxError>) {
.labelled("attr name") (self.events, self.errors)
.then_ignore(just(Token::Colon)) }
.then(expr)
.labelled("attr body")
.separated_by(just(Token::Comma))
.collect::<Vec<_>>()
.map(IndexMap::from_iter)
.delimited_by(just(Token::BraceOpen), just(Token::BraceClose))
.map_with(|v, e| (v, e.span()))
.labelled("attrset");
let node = word pub(crate) fn nth(&self, n: usize) -> SyntaxKind {
.repeated() self.input.kind(self.pos + n)
.collect() }
.then(attrset.clone().or_not())
.map_with(|(name, params), extra| {
RawExpression::new(RawExpr::Node(name, params), extra.span())
})
// .or(var)
// .or(attrset
// .map_with(|attrset, extra| Expression::new(Expr::AttrSet(attrset), extra.span())))
// .or(lit)
// .or(mat)
.labelled("node");
let atom = var pub fn eat_succeeding_ws(&mut self) {
.or(lit) self.push_ev(Event::Eat {
.or(mat) count: self.input.meaningless_tail_len(),
.or(attrset.map_with(|attrset, extra| {
RawExpression::new(RawExpr::AttrSet(attrset), extra.span())
}))
.or(node.clone());
#[allow(clippy::let_and_return)]
let pipeline = atom
.clone()
.then(choice((
just(Token::Pipe).to(RawExpr::SimplePipe as fn(_, _) -> _),
just(Token::MappingPipe).to(RawExpr::MappingPipe as fn(_, _) -> _),
just(Token::NullPipe).to(RawExpr::NullPipe as fn(_, _) -> _),
)))
.repeated()
.foldr_with(atom, |(curr, pipe), next, extra| {
RawExpression::new(pipe(curr, next), extra.span())
}); });
}
pipeline pub(crate) fn current(&self) -> SyntaxKind {
self.input.kind(self.pos)
}
pub(crate) fn start(&mut self) -> Marker {
let pos = self.events.len();
self.push_ev(Event::tombstone());
Marker::new(pos)
}
pub(crate) fn at(&self, kind: SyntaxKind) -> bool {
self.nth_at(0, kind)
}
pub(crate) fn eat(&mut self, kind: SyntaxKind) -> bool {
if !self.at(kind) {
return false;
}
self.do_bump();
true
}
pub(crate) fn nth_at(&self, n: usize, kind: SyntaxKind) -> bool {
self.nth(n) == kind
}
fn do_bump(&mut self) {
self.push_ev(Event::Eat {
count: self.input.preceding_meaningless(self.pos),
}); });
self.pos += 1;
}
let decls = just(Token::Def) fn push_ev(&mut self, event: Event) {
.ignore_then( self.events.push(event)
word.then_ignore(just(Token::Equals)) }
.then(expr.clone().map(|expr| expr))
.then_ignore(just(Token::Semicolon)),
)
.repeated()
.collect::<Vec<_>>()
.map(|decls| File {
decls: IndexMap::from_iter(decls),
});
let single_expr = expr.map(|expr| File {
decls: IndexMap::from_iter([(("main", (0..0).into()), expr)]),
});
just(Token::Def).rewind().ignore_then(decls).or(single_expr)
// single_expr.or(decls)
// expr.map(|expr| File {
// decls: IndexMap::from_iter([(("main", (0..0).into()), expr)]),
// })
// .or(decl.repeated().collect::<Vec<_>>().map(|decls| File {
// decls: IndexMap::from_iter(decls),
// }))
} }
pub mod asg { pub(crate) struct Marker {
use petgraph::graph::DiGraph; pos: usize,
bomb: DropBomb,
use super::Spanned; }
impl Marker {
pub(crate) fn new(pos: usize) -> Self {
Self {
pos,
bomb: DropBomb::new("Marker must be completed or abandoned"),
}
}
pub(crate) fn complete(mut self, p: &mut Parser<'_, '_>, kind: SyntaxKind) -> CompletedMarker {
self.bomb.defuse();
match &mut p.events[self.pos] {
Event::Start { kind: slot, .. } => *slot = kind,
_ => unreachable!(),
}
p.push_ev(Event::Finish);
CompletedMarker {
pos: self.pos,
kind,
}
}
pub(crate) fn abandon(mut self, p: &mut Parser<'_, '_>) {
self.bomb.defuse();
if self.pos == p.events.len() - 1 {
match p.events.pop() {
Some(Event::Start {
kind: SyntaxKind::TOMBSTONE,
forward_parent: None,
}) => (),
_ => unreachable!(),
}
}
}
}
pub(crate) struct CompletedMarker {
pos: usize,
kind: SyntaxKind,
}
impl CompletedMarker {
pub(crate) fn precede(self, p: &mut Parser<'_, '_>) -> Marker {
let new_pos = p.start();
match &mut p.events[self.pos] {
Event::Start { forward_parent, .. } => {
*forward_parent = Some(new_pos.pos - self.pos);
}
_ => unreachable!(),
}
new_pos
}
} }

View file

@ -1,24 +0,0 @@
use std::collections::{BTreeMap, HashMap};
use indexmap::IndexMap;
use super::Spanned;
#[derive(Debug, PartialEq)]
pub struct File<'src> {
pub decls: IndexMap<Spanned<&'src str>, raw_ast::RawExpression<'src>>,
}
pub mod raw_ast;
#[derive(Debug, PartialEq)]
pub enum Lit<'src> {
// TODO: more bigger better number types
Int(i64),
Float(f64),
String(&'src str),
}
pub mod lossless;
pub mod ast_tree;

View file

@ -1,31 +0,0 @@
use ego_tree::Tree;
use crate::parser::Spanned;
use super::{File, Lit};
pub struct Ast<'src> {
tree: Tree<AstNode<'src>>,
}
struct AstNode<'src> {
kind: NodeKind<'src>,
}
enum NodeKind<'src> {
Decl,
Ident(&'src str),
Instr,
Expr,
MappingPipe,
NullPipe,
MultiPipe,
Var(&'src str),
InputVar(&'src str),
AttrSet,
Attr,
Lit(Lit<'src>),
Matrix,
Dimensions(u16, u16),
MatrixRow,
}

View file

@ -1,19 +0,0 @@
use self::lex::SyntaxKind;
pub mod parser;
pub mod lex;
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
enum Lang {}
impl rowan::Language for Lang {
type Kind = SyntaxKind;
#[allow(unsafe_code)]
fn kind_from_raw(raw: rowan::SyntaxKind) -> Self::Kind {
assert!(raw.0 <= SyntaxKind::ROOT as u16);
unsafe { std::mem::transmute::<u16, SyntaxKind>(raw.0) }
}
fn kind_to_raw(kind: Self::Kind) -> rowan::SyntaxKind {
kind.into()
}
}

View file

@ -1,290 +0,0 @@
use std::borrow::Borrow;
use rowan::{
Checkpoint, GreenNode, GreenNodeBuilder, GreenNodeData, GreenTokenData, Language, NodeOrToken,
};
use crate::parser::{
ast::lossless::{lex::SyntaxKind::*, Lang},
Span,
};
use self::parser_to_events::{to_events, Event};
use super::lex::{self, SyntaxKind};
pub mod parser_to_events {
use chumsky::prelude::*;
use crate::parser::ast::lossless::lex::{
self,
SyntaxKind::{self, *},
};
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
pub enum Event {
StartNode(SyntaxKind),
StartErr(SyntaxError),
EatToken,
FinishNode,
FinishErr,
}
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
pub enum SyntaxError {
Expected(SyntaxKind),
AttrExpectedValue,
/// guessed if there's a newline and attr on next line without comma
/// should then suggest comma after attr
ExpectedCommaBetweenAttrs,
}
pub fn to_events(tokens: &[(SyntaxKind, &str)]) -> Vec<Event> {
let only_toks: Vec<SyntaxKind> = tokens.iter().map(|(t, _)| *t).collect();
let res = parser().parse(&only_toks);
res.unwrap()
}
macro_rules! padded {
($parser:expr) => {{
let ws = one_of([WHITESPACE, NEWLINE])
.to(Event::EatToken)
.repeated()
.collect::<Vec<Event>>();
ws.then($parser)
.then(ws)
.map(|((mut before, mut c), mut after)| {
before.append(&mut c);
before.append(&mut after);
before
})
}};
}
macro_rules! parenthesized {
($parser:expr) => {
just(L_PAREN)
.to(vec![Event::EatToken])
.then($parser)
.then(just(R_PAREN).to(vec![Event::EatToken]))
.map(|((mut before, mut c), mut after)| {
before.insert(0, Event::StartNode(PARENTHESIZED_EXPR));
before.append(&mut c);
before.append(&mut after);
before.push(Event::FinishNode);
before
})
};
}
pub fn parser<'toks>() -> impl Parser<'toks, &'toks [SyntaxKind], Vec<Event>> {
let ws = one_of([WHITESPACE, NEWLINE])
.to(Event::EatToken)
.repeated()
.collect::<Vec<Event>>();
let ident = just(IDENT).to(vec![Event::EatToken]);
let expr = recursive(|expr| {
let lit = one_of([INT_NUM, FLOAT_NUM, STRING]).to(vec![
Event::StartNode(EXPR),
Event::EatToken,
Event::FinishNode,
]);
let attrset = just(L_CURLY)
.then(
padded!(just(IDENT).to(vec![
Event::StartNode(ATTR),
Event::StartNode(ATTR_NAME),
Event::EatToken,
Event::FinishNode
]))
.then(just(COLON))
.then(padded!(expr.clone().map(|mut exp: Vec<Event>| {
exp.insert(0, Event::StartNode(ATTR_VALUE));
exp.push(Event::FinishNode);
exp.push(Event::FinishNode);
exp
})))
.map(|((mut name, _), mut value)| {
// colon
name.push(Event::EatToken);
name.append(&mut value);
name
}),
)
.then(just(R_CURLY))
.map(|((_, mut attrs), _)| {
attrs.insert(0, Event::StartNode(ATTR_SET));
attrs.insert(0, Event::EatToken);
attrs.push(Event::EatToken);
attrs.push(Event::FinishNode);
attrs
});
let atom = lit.clone().or(attrset).or(parenthesized!(expr));
let instr_name = ident
.clone()
.map(|mut v| {
v.insert(0, Event::StartNode(INSTR_NAME));
v
})
.foldl(
ws.then(ident).repeated(),
|mut ident, (mut ws, mut next)| {
ident.append(&mut ws);
ident.append(&mut next);
ident
},
)
.map(|mut v| {
v.push(Event::FinishNode);
v
});
let instr = padded!(instr_name)
.then(
atom.clone()
.map(|mut v| {
v.insert(0, Event::StartNode(INSTR_PARAMS));
v
})
.foldl(
ws.then(atom.clone()).repeated(),
|mut cur, (mut ws, mut next)| {
cur.append(&mut ws);
cur.append(&mut next);
cur
},
)
.map(|mut v| {
v.push(Event::FinishNode);
v
}),
)
.map(|(mut name, mut params)| {
name.insert(0, Event::StartNode(INSTR));
name.append(&mut params);
name.push(Event::FinishNode);
name
});
padded!(instr.or(lit).or(atom))
});
expr
// .map(|(lit, mut ev)| lit.append(&mut ev));
}
}
#[derive(PartialEq, Eq)]
pub struct Parse {
pub green_node: GreenNode,
}
impl std::fmt::Debug for Parse {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
debug_print_green_node(NodeOrToken::Node(self.green_node.borrow()), f, 0)
}
}
fn debug_print_green_node(
node: NodeOrToken<&GreenNodeData, &GreenTokenData>,
f: &mut std::fmt::Formatter<'_>,
lvl: i32,
) -> std::fmt::Result {
for _ in 0..lvl {
f.write_str(" ")?;
}
match node {
NodeOrToken::Node(n) => {
writeln!(f, "{:?} {{", Lang::kind_from_raw(node.kind()));
for c in n.children() {
debug_print_green_node(c, f, lvl + 1)?;
}
for _ in 0..lvl {
f.write_str(" ")?;
}
f.write_str("}\n")
}
NodeOrToken::Token(t) => {
writeln!(f, "{:?} {:?};", Lang::kind_from_raw(t.kind()), t.text())
}
}
}
#[derive(Debug)]
struct Parser<'src> {
tokens: Vec<(SyntaxKind, &'src str)>,
builder: GreenNodeBuilder<'src>,
errors: Vec<SyntaxError>,
}
#[derive(Debug, PartialEq, Eq)]
enum SyntaxError {
Expected(SyntaxKind),
AttrExpectedValue,
/// guessed if there's a newline and attr on next line without comma
/// should then suggest comma after attr
ExpectedCommaBetweenAttrs,
}
pub fn parse(src: &str) -> Parse {
let tokens = lex::lex(src);
Parser {
tokens,
builder: GreenNodeBuilder::new(),
errors: Vec::new(),
}
.parse()
}
impl Parser<'_> {
fn parse(mut self) -> Parse {
let evs = to_events(&self.tokens);
self.builder.start_node(ROOT.into());
println!("evs: {evs:?}");
self.tokens.reverse();
for ev in evs {
match ev {
Event::StartNode(kind) => self.builder.start_node(kind.into()),
Event::StartErr(SyntaxError) => todo!(),
Event::EatToken => self.bump(),
Event::FinishNode => self.builder.finish_node(),
Event::FinishErr => todo!(),
}
}
self.builder.finish_node();
Parse {
green_node: self.builder.finish(),
}
}
/// Advance one token, adding it to the current branch of the tree builder.
fn bump(&mut self) {
let (kind, text) = self.tokens.pop().unwrap();
self.builder.token(kind.into(), text);
}
fn syntax_err(&mut self, err: SyntaxError) {
let (_, text) = self.tokens.pop().unwrap();
self.builder.token(PARSE_ERR.into(), text);
self.errors.push(err);
}
fn syntax_err_by_checkpoint(&mut self, checkpoint: Checkpoint, err: SyntaxError) {
self.builder.start_node_at(checkpoint, PARSE_ERR.into());
self.builder.finish_node();
self.errors.push(err);
}
fn expected(&mut self, expected: SyntaxKind) {
self.syntax_err(SyntaxError::Expected(expected))
}
/// Peek at the first unprocessed token
fn current(&self) -> Option<SyntaxKind> {
self.tokens.last().map(|(kind, _)| *kind)
}
fn next(&self) -> Option<SyntaxKind> {
self.tokens
.get(self.tokens.len() - 2)
.map(|(kind, _)| *kind)
}
}

View file

@ -1,50 +0,0 @@
use indexmap::IndexMap;
use super::super::Spanned;
use super::super::Span;
use super::Lit;
#[derive(Debug, PartialEq)]
pub struct RawExpression<'src> {
pub expr: Box<RawExpr<'src>>,
pub span: Span,
}
impl<'src> RawExpression<'src> {
pub fn new(expr: RawExpr<'src>, span: Span) -> Self {
Self {
expr: Box::new(expr),
span,
}
}
}
#[derive(Debug, PartialEq)]
pub enum RawExpr<'src> {
Node(
Vec<Spanned<&'src str>>,
Option<Spanned<IndexMap<Spanned<&'src str>, RawExpression<'src>>>>,
),
SimplePipe(RawExpression<'src>, RawExpression<'src>),
// NamingPipe(
// Box<Expression<'src>>,
// (Vec<Spanned<&'src str>>, Vec<Spanned<&'src str>>),
// Box<Expression<'src>>,
// ),
MappingPipe(RawExpression<'src>, RawExpression<'src>),
NullPipe(RawExpression<'src>, RawExpression<'src>),
MultiPipe(IndexMap<Spanned<&'src str>, RawExpression<'src>>),
// LetIn(
// IndexMap<Spanned<&'src str>, Box<Expression<'src>>>,
// Box<Expression<'src>>,
// ),
// $
Var(&'src str),
// @
InputVar(&'src str),
AttrSet(Spanned<IndexMap<Spanned<&'src str>, RawExpression<'src>>>),
Lit(Lit<'src>),
Matrix(Spanned<(u16, u16)>, Vec<RawExpression<'src>>),
List(Vec<RawExpression<'src>>),
}

View file

@ -0,0 +1,6 @@
use crate::parser::syntax_kind::SyntaxKind;
#[derive(Debug)]
pub enum SyntaxError {
Expected(Vec<SyntaxKind>),
}

View file

@ -0,0 +1,23 @@
use crate::parser::syntax_kind::SyntaxKind;
#[derive(Debug)]
pub enum Event {
Start {
kind: SyntaxKind,
forward_parent: Option<usize>,
},
Finish,
Eat {
count: usize,
},
Error,
}
impl Event {
pub(crate) fn tombstone() -> Self {
Self::Start {
kind: SyntaxKind::TOMBSTONE,
forward_parent: None,
}
}
}

View file

@ -0,0 +1,14 @@
use crate::parser::syntax_kind::SyntaxKind::*;
use super::Parser;
mod expression;
pub fn source_file(p: &mut Parser) {
let root = p.start();
expression::expression(p);
p.eat_succeeding_ws();
root.complete(p, ROOT);
}

View file

@ -0,0 +1,14 @@
use crate::parser::{syntax_kind::SyntaxKind::*, Parser};
use self::{instruction::instr, lit::literal};
mod instruction;
mod lit;
pub fn expression(p: &mut Parser) {
let expr = p.start();
instr(p);
expr.complete(p, EXPR);
}

View file

@ -0,0 +1,30 @@
use crate::parser::{syntax_kind::SyntaxKind::*, Parser};
use super::lit::literal;
pub fn instr(p: &mut Parser) {
let instr = p.start();
instr_name(p);
instr_params(p);
instr.complete(p, INSTR);
}
fn instr_name(p: &mut Parser) {
let instr_name = p.start();
while p.at(IDENT) {
p.do_bump();
}
instr_name.complete(p, INSTR_NAME);
}
fn instr_params(p: &mut Parser) {
if let Some(start) = literal(p) {
while literal(p).is_some() {}
start.precede(p).complete(p, INSTR_PARAMS);
}
}

View file

@ -0,0 +1,20 @@
use enumset::enum_set;
use crate::parser::{
syntax_kind::{SyntaxKind::*, TokenSet},
CompletedMarker, Parser,
};
const LIT_TOKENS: TokenSet = enum_set!(INT_NUM | FLOAT_NUM | STRING);
pub fn literal(p: &mut Parser) -> Option<CompletedMarker> {
if !LIT_TOKENS.contains(p.current()) {
return None;
}
let lit = p.start();
p.do_bump();
Some(lit.complete(p, LITERAL))
}

View file

@ -0,0 +1,61 @@
use crate::parser::syntax_kind::SyntaxKind;
pub struct Input<'src, 'toks> {
raw: &'toks Vec<(SyntaxKind, &'src str)>,
/// indices of the "meaningful" tokens (not whitespace etc)
/// includes newlines because those might indeed help with finding errors
meaningful: Vec<usize>,
/// indices of newlines for the purpose of easily querying them
/// can be helpful with missing commas etc
newlines: Vec<usize>,
}
impl<'src, 'toks> Input<'src, 'toks> {
pub fn new(raw_toks: &'toks Vec<(SyntaxKind, &'src str)>) -> Self {
let meaningful = raw_toks
.iter()
.enumerate()
.filter_map(|(i, tok)| match tok.0 {
SyntaxKind::WHITESPACE | SyntaxKind::NEWLINE => None,
_ => Some(i),
})
.collect();
let newlines = raw_toks
.iter()
.enumerate()
.filter_map(|(i, tok)| match tok.0 {
SyntaxKind::NEWLINE => Some(i),
_ => None,
})
.collect();
Self {
raw: raw_toks,
meaningful,
newlines,
}
}
#[allow(clippy::unwrap_used, reason = "meaningful indices cannot be invalid")]
pub(crate) fn kind(&self, idx: usize) -> SyntaxKind {
let Some(meaningful_idx) = self.meaningful.get(idx) else {
return SyntaxKind::EOF;
};
self.raw.get(*meaningful_idx).unwrap().0
}
pub(crate) fn preceding_meaningless(&self, idx: usize) -> usize {
assert!(self.meaningful.len() > idx);
if idx == 0 {
1
} else {
self.meaningful[idx] - self.meaningful[idx - 1]
}
}
pub(crate) fn meaningless_tail_len(&self) -> usize {
self.raw.len() - (self.meaningful.last().unwrap() + 1)
}
}

View file

@ -0,0 +1,113 @@
use rowan::{GreenNode, GreenNodeBuilder, GreenNodeData, GreenTokenData, Language, NodeOrToken};
use std::mem;
use crate::parser::syntax_kind::{Lang, SyntaxKind};
use super::{error::SyntaxError, events::Event};
pub struct Output {
pub green_node: GreenNode,
pub errors: Vec<SyntaxError>,
}
impl std::fmt::Debug for Output {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
debug_print_green_node(NodeOrToken::Node(&self.green_node), f, 0)
}
}
fn debug_print_green_node(
node: NodeOrToken<&GreenNodeData, &GreenTokenData>,
f: &mut std::fmt::Formatter<'_>,
lvl: i32,
) -> std::fmt::Result {
for _ in 0..lvl {
f.write_str(" ")?;
}
match node {
NodeOrToken::Node(n) => {
writeln!(f, "{:?} {{", Lang::kind_from_raw(node.kind()))?;
for c in n.children() {
debug_print_green_node(c, f, lvl + 1)?;
}
for _ in 0..lvl {
f.write_str(" ")?;
}
f.write_str("}\n")
}
NodeOrToken::Token(t) => {
writeln!(f, "{:?} {:?};", Lang::kind_from_raw(t.kind()), t.text())
}
}
}
impl Output {
pub fn from_parser_output(
mut raw_toks: Vec<(SyntaxKind, &str)>,
(mut events, errs): (Vec<Event>, Vec<SyntaxError>),
) -> Self {
let mut builder = GreenNodeBuilder::new();
let mut fw_parents = Vec::new();
raw_toks.reverse();
for i in 0..events.len() {
match mem::replace(&mut events[i], Event::tombstone()) {
Event::Start {
kind,
forward_parent,
} => {
if kind == SyntaxKind::TOMBSTONE && forward_parent.is_none() {
continue;
}
fw_parents.push(kind);
let mut idx = i;
let mut fp = forward_parent;
while let Some(fwd) = fp {
idx += fwd as usize;
fp = match mem::replace(&mut events[idx], Event::tombstone()) {
Event::Start {
kind,
forward_parent,
} => {
fw_parents.push(kind);
forward_parent
}
_ => unreachable!(),
}
}
// remove whitespace bc it's ugly
while let Some((SyntaxKind::WHITESPACE | SyntaxKind::NEWLINE, _)) =
raw_toks.last()
{
match events.iter_mut().find(|ev| matches!(ev, Event::Eat { .. })) {
Some(Event::Eat { count }) => *count -= 1,
_ => unreachable!(),
}
let (tok, text): (SyntaxKind, &str) = raw_toks.pop().unwrap();
builder.token(tok.into(), text);
}
for kind in fw_parents.drain(..).rev() {
if kind != SyntaxKind::TOMBSTONE {
builder.start_node(kind.into());
}
}
}
Event::Finish => builder.finish_node(),
Event::Eat { count } => (0..count).for_each(|_| {
let (tok, text): (SyntaxKind, &str) = raw_toks.pop().unwrap();
builder.token(tok.into(), text);
}),
Event::Error => todo!(),
}
}
Self {
green_node: builder.finish(),
errors: errs,
}
}
}

View file

@ -0,0 +1,6 @@
//! The parser architecture is *heavily* inspired (and partially copied and adapted) from the amazing rust-analyzer
use drop_bomb::DropBomb;
use self::{error::SyntaxError, events::Event, input::Input};
use super::syntax_kind::SyntaxKind;

View file

@ -1,7 +1,6 @@
use enumset::EnumSet;
use logos::Logos; use logos::Logos;
use crate::parser::Span;
pub fn lex(src: &str) -> Vec<(SyntaxKind, &str)> { pub fn lex(src: &str) -> Vec<(SyntaxKind, &str)> {
let mut lex = SyntaxKind::lexer(src); let mut lex = SyntaxKind::lexer(src);
let mut r = Vec::new(); let mut r = Vec::new();
@ -13,8 +12,9 @@ pub fn lex(src: &str) -> Vec<(SyntaxKind, &str)> {
r r
} }
#[derive(Logos, Debug, PartialEq, Eq, Clone, Copy, Hash, PartialOrd, Ord)] #[derive(enumset::EnumSetType, Logos, Debug, PartialEq, Eq, Clone, Copy, Hash, PartialOrd, Ord)]
#[repr(u16)] #[repr(u16)]
#[enumset(no_super_impls)]
#[allow(non_camel_case_types)] #[allow(non_camel_case_types)]
pub enum SyntaxKind { pub enum SyntaxKind {
#[token("def")] #[token("def")]
@ -39,6 +39,7 @@ pub enum SyntaxKind {
MAT_BODY, MAT_BODY,
PARENTHESIZED_EXPR, PARENTHESIZED_EXPR,
EXPR, EXPR,
LITERAL,
#[token("(")] #[token("(")]
L_PAREN, L_PAREN,
#[token(")")] #[token(")")]
@ -109,9 +110,29 @@ pub enum SyntaxKind {
PARSE_ERR, PARSE_ERR,
LEX_ERR, LEX_ERR,
ROOT, ROOT,
EOF,
TOMBSTONE,
ERROR,
} }
pub type TokenSet = EnumSet<SyntaxKind>;
impl From<SyntaxKind> for rowan::SyntaxKind { impl From<SyntaxKind> for rowan::SyntaxKind {
fn from(kind: SyntaxKind) -> Self { fn from(kind: SyntaxKind) -> Self {
Self(kind as u16) Self(kind as u16)
} }
} }
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum Lang {}
impl rowan::Language for Lang {
type Kind = SyntaxKind;
#[allow(unsafe_code)]
fn kind_from_raw(raw: rowan::SyntaxKind) -> Self::Kind {
assert!(raw.0 <= SyntaxKind::ROOT as u16);
unsafe { std::mem::transmute::<u16, SyntaxKind>(raw.0) }
}
fn kind_to_raw(kind: Self::Kind) -> rowan::SyntaxKind {
kind.into()
}
}

View file

@ -1,143 +1 @@
use crate::parser::ast::File;
use crate::parser::parse;
use crate::tokens::Token;
use chumsky::input::Stream;
use chumsky::prelude::*;
use indexmap::IndexMap;
use logos::Logos;
// #[test]
// fn test_parse_node_with_params() {
// const INPUT: &str = "meow [ hello: $foo, world: @bar]";
// assert_eq!(
// parse(INPUT).unwrap(),
// File {
// decls: IndexMap::from_iter([(
// ("main", (0..0).into()),
// (
// Expr::Node(
// ("meow", (0..4).into()),
// Some((
// IndexMap::from_iter([
// (
// ("hello", (7..12).into()),
// Expr::Var(("foo", (14..18).into()))
// ),
// (
// ("world", (20..25).into()),
// Expr::InputVar(("bar", (27..31).into()))
// )
// ]),
// (5..32).into()
// ))
// ),
// (0..32).into()
// )
// )])
// }
// );
// }
// fn test_parse_multiple_top_level_complex() {
// const INPUT: &str = r"def main = meow
// | uwu
// [ foo: @bar
// , hello: world @| test [ more: params ] | yay
// ]
// !| awa
// @| nya
// | rawr;
// def test = meow
// [ hello: $foo
// , world: @bar
// ];
// ";
// assert_eq!(
// parse(INPUT).unwrap(),
// File {
// decls: IndexMap::from_iter([
// (
// ("main", (4..8).into()),
// (
// Expr::SimplePipe(
// Box::new(Expr::Node(("meow", (11..15).into()), None)),
// Box::new(Expr::NullPipe(
// Box::new(Expr::Node(
// ("uwu", (20..23).into()),
// Some((
// IndexMap::from_iter([
// (
// ("foo", (29..32).into()),
// Expr::InputVar(("bar", (34..38).into()))
// ),
// (
// ("hello", (44..49).into()),
// Expr::MappingPipe(
// Box::new(Expr::Node(
// ("world", (51..56).into()),
// None
// )),
// Box::new(Expr::SimplePipe(
// Box::new(Expr::Node(
// ("test", (60..64).into()),
// Some((
// IndexMap::from_iter([(
// ("more", (67..71).into()),
// Expr::Node(
// ("params", (73..79).into()),
// None
// )
// )]),
// (65..81).into()
// ))
// )),
// Box::new(Expr::Node(
// ("yay", (84..87).into()),
// None
// ))
// ))
// )
// )
// ]),
// (27..92).into()
// ))
// )),
// Box::new(Expr::MappingPipe(
// Box::new(Expr::Node(("awa", (97..100).into()), None)),
// Box::new(Expr::SimplePipe(
// Box::new(Expr::Node(("nya", (106..109).into()), None)),
// Box::new(Expr::Node(("rawr", (114..118).into()), None))
// ))
// ))
// ))
// ),
// (11..118).into()
// ),
// ),
// (
// ("test", (125..129).into()),
// (
// Expr::Node(
// ("meow", (132..136).into()),
// Some((
// IndexMap::from_iter([
// (
// ("hello", (141..146).into()),
// Expr::Var(("foo", (148..152).into()))
// ),
// (
// ("world", (156..161).into()),
// Expr::InputVar(("bar", (163..167).into()))
// )
// ]),
// (139..171).into()
// ))
// ),
// (132..171).into()
// )
// )
// ])
// }
// );
// }

View file

@ -1,81 +0,0 @@
use logos::Logos;
#[derive(Logos, Debug, PartialEq, Eq, Clone)]
#[logos(skip r"[ \t\n\f]+")]
pub enum Token<'a> {
// hack!
// this isn't actually supposed to be in the language.
// i just can't figure out how to automatically choose between a top level declaration
// or a top level expression
// so a declaration needs the keyword def until i can figure this out
#[token("def")]
Def,
#[token("let")]
Let,
#[token("in")]
In,
#[token("mat")]
Mat,
#[regex("[\\d]+x[\\d]+", |lex| {
let (x, y) = lex.slice().split_once('x').expect("shouldn't fail to split");
// TODO: handle overflows etc
(x.parse().expect("should only match valid u16s"), y.parse().expect("should only match valid u16s"))
})]
Dimensions((u16, u16)),
#[regex("[\\d]+", |lex| lex.slice())]
Int(&'a str),
#[regex("[+-]?([\\d]+\\.[\\d]*|[\\d]*\\.[\\d]+)", |lex| lex.slice())]
Float(&'a str),
// TODO: more bigger better more complex string lexing
// TODO: templating?
#[regex(r#""([^"\\]|\\["\\bnfrt]|u[a-fA-F0-9]{4})*""#, |lex| lex.slice())]
String(&'a str),
#[token("+")]
Plus,
#[token("-")]
Minus,
#[token("*")]
Mult,
#[token("/")]
Div,
// TODO: figure out how to allow numbers in words?
#[regex("[a-zA-Z_]+[a-zA-Z_\\-\\d]*", |lex| lex.slice().trim())]
Word(&'a str),
#[regex("\\$[a-zA-Z0-9_\\-]+", |lex| &lex.slice()[1..])]
VarIdent(&'a str),
#[regex("\\@[a-zA-Z0-9_\\-]+", |lex| &lex.slice()[1..])]
InputIdent(&'a str),
#[token(",")]
Comma,
#[token("|")]
Pipe,
#[token("@|")]
MappingPipe,
#[token("!|")]
NullPipe,
#[token("@")]
At,
#[token(">")]
GreaterThan,
#[token("=")]
Equals,
#[token(":")]
Colon,
#[token(";")]
Semicolon,
#[token("[")]
BracketOpen,
#[token("]")]
BracketClose,
#[token("(")]
ParenOpen,
#[token(")")]
ParenClose,
#[token("{")]
BraceOpen,
#[token("}")]
BraceClose,
}
#[cfg(test)]
mod tests;

View file

@ -1,135 +0,0 @@
use logos::Logos;
use super::Token;
/// generates tests for the lexer to avoid writing boilerplate
macro_rules! lexer_test {
($name:ident, $input:literal, $out:expr) => {
#[test]
fn $name() {
let lex = Token::lexer($input);
let toks = lex.map(|tok| tok.unwrap()).collect::<Vec<_>>();
assert_eq!(toks, $out);
}
};
}
lexer_test! {
test_lex_simple_pipeline,
"streamer | processor | sink",
[
Token::Word("streamer"),
Token::Pipe,
Token::Word("processor"),
Token::Pipe,
Token::Word("sink")
]
}
lexer_test! {
test_lex_var_ident,
"$identifier",
[ Token::VarIdent("identifier") ]
}
lexer_test! {
test_lex_subgroup,
"subgroup(first, second) = a | b [ $first ] | c [ $second ]",
[
Token::Word("subgroup"),
Token::ParenOpen,
Token::Word("first"),
Token::Comma,
Token::Word("second"),
Token::ParenClose,
Token::Equals,
Token::Word("a"),
Token::Pipe,
Token::Word("b"),
Token::BracketOpen,
Token::VarIdent("first"),
Token::BracketClose,
Token::Pipe,
Token::Word("c"),
Token::BracketOpen,
Token::VarIdent("second"),
Token::BracketClose
]
}
lexer_test! {
text_lex_crossing_pipeline_reordering,
"a >first, second|second, first> c",
[
Token::Word("a"),
Token::GreaterThan,
Token::Word("first"),
Token::Comma,
Token::Word("second"),
Token::Pipe,
Token::Word("second"),
Token::Comma,
Token::Word("first"),
Token::GreaterThan,
Token::Word("c")
]
}
lexer_test! {
test_lex_crossing_input_args,
"a >second| c { second: @first }",
[
Token::Word("a"),
Token::GreaterThan,
Token::Word("second"),
Token::Pipe,
Token::Word("c"),
Token::BraceOpen,
Token::Word("second"),
Token::Colon,
Token::InputIdent("first"),
Token::BraceClose
]
}
lexer_test! {
test_lex_map_io_named,
"a @| c",
[
Token::Word("a"),
Token::MappingPipe,
Token::Word("c")
]
}
lexer_test! {
test_lex_int_literal,
"42",
[
Token::Int("42")
]
}
lexer_test! {
test_lex_float_literal_0,
"1.5",
[
Token::Float("1.5")
]
}
lexer_test! {
test_lex_float_literal_1,
"42.",
[
Token::Float("42.")
]
}
lexer_test! {
test_lex_float_literal_2,
".42",
[
Token::Float(".42")
]
}

View file

@ -1,7 +1 @@
hello world test hello world test 1.5 42 69 "gay"
42
(another command 3.14 "meow")
"uwu"
{
some: attrs 42 (meow gay 1)
}