From ca84af4e1bd6b22e7e32c3f3864c4f2618c94a15 Mon Sep 17 00:00:00 2001 From: Schrottkatze Date: Wed, 3 Apr 2024 00:08:00 +0200 Subject: [PATCH] lang: basic parser --- Cargo.lock | 153 ++++++++++++++++++++++++++++++++- Cargo.toml | 1 + crates/lang/Cargo.toml | 4 + crates/lang/src/lib.rs | 4 + crates/lang/src/main.rs | 17 ++++ crates/lang/src/parser.rs | 134 +++++++++++++++++++++++++++++ crates/lang/src/parser/ast.rs | 36 ++++++++ crates/lang/src/tokens.rs | 11 ++- crates/svg-filters/Cargo.toml | 2 +- crates/svg-filters/src/main.rs | 29 +------ testfiles/test.owo | 4 + 11 files changed, 362 insertions(+), 33 deletions(-) create mode 100644 crates/lang/src/main.rs create mode 100644 crates/lang/src/parser.rs create mode 100644 crates/lang/src/parser/ast.rs create mode 100644 testfiles/test.owo diff --git a/Cargo.lock b/Cargo.lock index a8b1f50..bc8692d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8,6 +8,33 @@ version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" +[[package]] +name = "ahash" +version = "0.8.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011" +dependencies = [ + "cfg-if", + "once_cell", + "version_check", + "zerocopy", +] + +[[package]] +name = "aho-corasick" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" +dependencies = [ + "memchr", +] + +[[package]] +name = "allocator-api2" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0942ffc6dcaadf03badf6e6a2d0228460359d5e34b57ccdc720b7382dfbd5ec5" + [[package]] name = "anstream" version = "0.6.5" @@ -133,12 +160,31 @@ version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" +[[package]] +name = "cc" +version = "1.0.90" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8cd6604a82acf3039f1144f54b8eb34e91ffba622051189e71b781822d5ee1f5" + [[package]] name = "cfg-if" version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +[[package]] +name = "chumsky" +version = "1.0.0-alpha.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9c28d4e5dd9a9262a38b231153591da6ce1471b818233f4727985d3dd0ed93c" +dependencies = [ + "hashbrown", + "regex-automata", + "serde", + "stacker", + "unicode-ident", +] + [[package]] name = "clap" version = "4.4.12" @@ -389,6 +435,10 @@ name = "hashbrown" version = "0.14.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "290f1a1d9242c78d09ce40a5e87e7554ee637af1351968159f4952f028f75604" +dependencies = [ + "ahash", + "allocator-api2", +] [[package]] name = "heck" @@ -417,9 +467,9 @@ dependencies = [ [[package]] name = "indexmap" -version = "2.2.5" +version = "2.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b0b929d511467233429c45a44ac1dcaa21ba0f5ba11e4879e6ed28ddb4f9df4" +checksum = "168fb715dda47215e360912c096649d23d58bf392ac62f73919e831745e40f26" dependencies = [ "equivalent", "hashbrown", @@ -453,7 +503,11 @@ dependencies = [ name = "lang" version = "0.1.0" dependencies = [ + "chumsky", + "clap", + "indexmap", "logos", + "petgraph", ] [[package]] @@ -515,7 +569,7 @@ dependencies = [ "lazy_static", "proc-macro2", "quote", - "regex-syntax", + "regex-syntax 0.8.2", "syn", ] @@ -583,6 +637,12 @@ dependencies = [ "libc", ] +[[package]] +name = "once_cell" +version = "1.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" + [[package]] name = "option-ext" version = "0.2.0" @@ -675,6 +735,15 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "psm" +version = "0.1.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5787f7cda34e3033a72192c018bc5883100330f362ef279a8cbccfce8bb4e874" +dependencies = [ + "cc", +] + [[package]] name = "qoi" version = "0.4.1" @@ -758,6 +827,23 @@ dependencies = [ "thiserror", ] +[[package]] +name = "regex-automata" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59b23e92ee4318893fa3fe3e6fb365258efbfe6ac6ab30f090cdcbb7aa37efa9" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax 0.7.5", +] + +[[package]] +name = "regex-syntax" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dbb5fb1acd8a1a18b3dd5be62d25485eb770e05afb408a9627d14d451bae12da" + [[package]] name = "regex-syntax" version = "0.8.2" @@ -846,6 +932,19 @@ dependencies = [ "lock_api", ] +[[package]] +name = "stacker" +version = "0.1.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c886bd4480155fd3ef527d45e9ac8dd7118a898a46530b7b94c3e21866259fce" +dependencies = [ + "cc", + "cfg-if", + "libc", + "psm", + "winapi", +] + [[package]] name = "strsim" version = "0.10.0" @@ -942,6 +1041,12 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a" +[[package]] +name = "version_check" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" + [[package]] name = "wasi" version = "0.11.0+wasi-snapshot-preview1" @@ -954,6 +1059,28 @@ version = "0.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9193164d4de03a926d909d3bc7c30543cecb35400c02114792c2cae20d5e2dbb" +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + [[package]] name = "windows-sys" version = "0.48.0" @@ -1092,6 +1219,26 @@ version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09041cd90cf85f7f8b2df60c646f853b7f535ce68f85244eb6731cf89fa498ec" +[[package]] +name = "zerocopy" +version = "0.7.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "74d4d3961e53fa4c9a25a8637fc2bfaf2595b3d3ae34875568a5cf64787716be" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.7.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ce1b18ccd8e73a9321186f97e46f9f04b778851177567b1975109d26a08d2a6" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "zune-inflate" version = "0.2.54" diff --git a/Cargo.toml b/Cargo.toml index cbd514c..2d4a618 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,6 +11,7 @@ resolver = "2" [workspace.dependencies] clap = { version = "4", features = ["derive"] } serde = { version = "1.0", features = ["derive"] } +petgraph = "0.6.4" # to enable all the lints below, this must be present in a workspace member's Cargo.toml: # [lints] diff --git a/crates/lang/Cargo.toml b/crates/lang/Cargo.toml index 1182341..8126ffa 100644 --- a/crates/lang/Cargo.toml +++ b/crates/lang/Cargo.toml @@ -7,6 +7,10 @@ edition = "2021" [dependencies] logos = "0.14" +chumsky = "1.0.0-alpha.6" +petgraph = { workspace = true} +indexmap = "2.2.6" +clap = { version = "4", features = ["derive"] } [lints] workspace = true diff --git a/crates/lang/src/lib.rs b/crates/lang/src/lib.rs index 5c76635..6878991 100644 --- a/crates/lang/src/lib.rs +++ b/crates/lang/src/lib.rs @@ -1 +1,5 @@ +pub mod parser; pub mod tokens; +pub mod err_reporting { + pub struct GlobalReporter {} +} diff --git a/crates/lang/src/main.rs b/crates/lang/src/main.rs new file mode 100644 index 0000000..3fef5ac --- /dev/null +++ b/crates/lang/src/main.rs @@ -0,0 +1,17 @@ +use std::{fs, path::PathBuf}; + +use clap::Parser; +use lang::parser::parse; + +#[derive(Parser)] +struct Args { + file: PathBuf, +} + +fn main() { + let args = Args::parse(); + let f = fs::read_to_string(args.file).expect("failed to read file"); + + println!("file: {f}\n"); + println!("parsed: {:?}", parse(&f)) +} diff --git a/crates/lang/src/parser.rs b/crates/lang/src/parser.rs new file mode 100644 index 0000000..b1034fa --- /dev/null +++ b/crates/lang/src/parser.rs @@ -0,0 +1,134 @@ +use std::ops::Range; + +use chumsky::{ + error::Rich, + extra, + input::{Stream, ValueInput}, + prelude::*, + primitive::just, + recursive::recursive, + select, + span::SimpleSpan, + IterParser, Parser, +}; +use indexmap::IndexMap; +use logos::{Logos, Source}; + +use crate::tokens::Token; + +pub mod ast; +use self::ast::{Expr, File}; + +type Span = SimpleSpan; +type Spanned = (T, Span); + +pub fn parse<'src>(src: &'src str) -> ParseResult, Rich<'_, Token<'_>>> { + let toks: Vec<_> = Token::lexer(src) + .spanned() + .into_iter() + .map(|(t, s)| (t.expect("TODO: add lexer error(s)"), Span::from(s))) + .collect(); + let tok_stream = Stream::from_iter(toks).spanned((src.len()..src.len()).into()); + expr_parser().parse(tok_stream) +} + +fn expr_parser<'tokens, 'src: 'tokens, I: ValueInput<'tokens, Token = Token<'src>, Span = Span>>( +) -> impl Parser<'tokens, I, File<'src>, extra::Err, Span>>> { + let word = select! { Token::Word(word) => word }; + + let expr = recursive(|expr| { + let var = select! { + Token::VarIdent(name) => (Expr::Var as fn(_) -> _, name), + Token::InputIdent(name) => (Expr::InputVar as fn(_) -> _, name) + } + .map_with(|(item_type, name), extra| item_type((name, extra.span()))); + + let attrset = word + .map_with(|n, e| (n, e.span())) + .then_ignore(just(Token::Colon)) + .then(expr) + .separated_by(just(Token::Comma)) + .collect::>() + .map(IndexMap::from_iter) + .delimited_by(just(Token::BracketOpen), just(Token::BracketClose)) + .map_with(|v, e| (v, e.span())); + + let node = word + .map_with(|v, e| (v, e.span())) + .then(attrset.clone().or_not()) + .map(|(name, params)| Expr::Node(name, params)) + .or(var) + .or(attrset.map(Expr::AttrSet)); + + let pipeline = node + .clone() + .then(choice(( + just(Token::Pipe).to(Expr::SimplePipe as fn(_, _) -> _), + just(Token::MappingPipe).to(Expr::MappingPipe as fn(_, _) -> _), + just(Token::NullPipe).to(Expr::NullPipe as fn(_, _) -> _), + ))) + .repeated() + .foldr(node, |(curr, pipe), next| { + pipe(Box::new(curr), Box::new(next)) + }); + + pipeline + }); + + let decl = just(Token::Def).ignore_then( + word.map_with(|n, e| (n, e.span())) + .then_ignore(just(Token::Equals)) + .then(expr.clone().map_with(|expr, extra| (expr, extra.span()))) + .then_ignore(just(Token::SemiColon)), + ); + + expr.map_with(|expr, extra| File { + decls: IndexMap::from_iter([(("main", (0..0).into()), (expr, extra.span()))]), + }) + .or(decl.repeated().collect::>().map(|decls| File { + decls: IndexMap::from_iter(decls), + })) +} + +#[cfg(test)] +mod tests { + use crate::parser::ast::{Expr, File}; + use crate::parser::parse; + use crate::tokens::Token; + use chumsky::input::Stream; + use chumsky::prelude::*; + use indexmap::IndexMap; + use logos::Logos; + + #[test] + fn test_parse_node_with_params() { + const INPUT: &str = "meow [ hello: $foo, world: @bar]"; + assert_eq!( + parse(INPUT).unwrap(), + File { + decls: IndexMap::from_iter([( + ("main", (0..0).into()), + ( + Expr::Node( + ("meow", (0..4).into()), + Some(( + IndexMap::from_iter([ + ( + ("hello", (7..12).into()), + Expr::Var(("foo", (14..18).into())) + ), + ( + ("world", (20..25).into()), + Expr::InputVar(("bar", (27..31).into())) + ) + ]), + (5..32).into() + )) + ), + (0..0).into() + ) + )]) + } + ); + } +} diff --git a/crates/lang/src/parser/ast.rs b/crates/lang/src/parser/ast.rs new file mode 100644 index 0000000..a276a75 --- /dev/null +++ b/crates/lang/src/parser/ast.rs @@ -0,0 +1,36 @@ +use std::collections::{BTreeMap, HashMap}; + +use indexmap::IndexMap; + +use super::Spanned; + +#[derive(Debug, PartialEq, Eq)] +pub struct File<'src> { + pub decls: IndexMap, Spanned>>, +} + +#[derive(Debug, PartialEq, Eq)] +pub enum Expr<'src> { + Node( + Spanned<&'src str>, + Option, Expr<'src>>>>, + ), + SimplePipe(Box>, Box>), + NamingPipe( + Box>, + (Vec>, Vec>), + Box>, + ), + MappingPipe(Box>, Box>), + NullPipe(Box>, Box>), + MultiPipe(IndexMap, Expr<'src>>), + LetIn( + IndexMap, Box>>, + Box>, + ), + // $ + Var(Spanned<&'src str>), + // @ + InputVar(Spanned<&'src str>), + AttrSet(Spanned, Expr<'src>>>), +} diff --git a/crates/lang/src/tokens.rs b/crates/lang/src/tokens.rs index e21b961..40280e7 100644 --- a/crates/lang/src/tokens.rs +++ b/crates/lang/src/tokens.rs @@ -1,8 +1,15 @@ use logos::Logos; -#[derive(Logos, Debug, PartialEq, Eq)] +#[derive(Logos, Debug, PartialEq, Eq, Clone)] #[logos(skip r"[ \t\n\f]+")] pub enum Token<'a> { + // hack! + // this isn't actually supposed to be in the language. + // i just can't figure out how to automatically choose between a top level declaration + // or a top level expression + // so a declaration needs the keyword def until i can figure this out + #[token("def")] + Def, #[regex("[a-zA-Z0-9_\\-]+", |lex| lex.slice())] Word(&'a str), #[regex("\\$[a-zA-Z0-9_\\-]+", |lex| &lex.slice()[1..])] @@ -27,6 +34,8 @@ pub enum Token<'a> { Equals, #[token(":")] Colon, + #[token(";")] + SemiColon, #[token("[")] BracketOpen, #[token("]")] diff --git a/crates/svg-filters/Cargo.toml b/crates/svg-filters/Cargo.toml index fbe3aaf..7f41e66 100644 --- a/crates/svg-filters/Cargo.toml +++ b/crates/svg-filters/Cargo.toml @@ -8,7 +8,7 @@ edition = "2021" [dependencies] csscolorparser = "0.6.2" indexmap = "2.2.5" -petgraph = "0.6.4" +petgraph = { workspace = true } quick-xml = { version = "0.31.0", features = ["serialize"] } [lints] diff --git a/crates/svg-filters/src/main.rs b/crates/svg-filters/src/main.rs index 2fc8df3..5ce0102 100644 --- a/crates/svg-filters/src/main.rs +++ b/crates/svg-filters/src/main.rs @@ -1,35 +1,8 @@ -use std::hint::black_box; - -use svg_filters::{ - codegen::SvgDocument, - types::{ - graph::edge::Edge, - nodes::{ - primitives::{ - blend::BlendMode, - color_matrix::ColorMatrixType, - turbulence::{NoiseType, StitchTiles, Turbulence}, - FePrimitive, - }, - standard_input::StandardInput, - }, - }, - Node, -}; +use svg_filters::codegen::SvgDocument; fn main() { let mut doc = SvgDocument::new(); - let noise = doc.create_filter("noise"); - - noise.add_node(Node::simple(FePrimitive::Turbulence(Turbulence { - base_frequency: (0.2, 0.2), - num_octaves: 1, - seed: 2, - stitch_tiles: StitchTiles::NoStitch, - noise_type: NoiseType::FractalNoise, - }))); - eprintln!("{}", doc.generate_svg_pretty()); println!("{}", doc.generate_svg()); } diff --git a/testfiles/test.owo b/testfiles/test.owo new file mode 100644 index 0000000..d9d05da --- /dev/null +++ b/testfiles/test.owo @@ -0,0 +1,4 @@ +def main = meow | test; +def test = meow [ hello: $foo, world: @bar]; + +def uwu = owo;