diff --git a/Cargo.lock b/Cargo.lock index 7447d0d..5cd0ff9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -94,6 +94,12 @@ version = "0.21.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "35636a1494ede3b646cc98f74f8e62c773a38a659ebc777a2cf26b9b74171df9" +[[package]] +name = "beef" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a8241f3ebb85c056b509d4327ad0358fbbba6ffb340bf388f26350aeda225b1" + [[package]] name = "bit_field" version = "0.10.2" @@ -321,6 +327,12 @@ dependencies = [ "spin", ] +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + [[package]] name = "getrandom" version = "0.2.12" @@ -400,6 +412,19 @@ dependencies = [ "rayon", ] +[[package]] +name = "lang" +version = "0.1.0" +dependencies = [ + "logos", +] + +[[package]] +name = "lazy_static" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" + [[package]] name = "lebe" version = "0.5.2" @@ -433,6 +458,39 @@ dependencies = [ "scopeguard", ] +[[package]] +name = "logos" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "161971eb88a0da7ae0c333e1063467c5b5727e7fb6b710b8db4814eade3a42e8" +dependencies = [ + "logos-derive", +] + +[[package]] +name = "logos-codegen" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e31badd9de5131fdf4921f6473d457e3dd85b11b7f091ceb50e4df7c3eeb12a" +dependencies = [ + "beef", + "fnv", + "lazy_static", + "proc-macro2", + "quote", + "regex-syntax", + "syn", +] + +[[package]] +name = "logos-derive" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1c2a69b3eb68d5bd595107c9ee58d7e07fe2bb5e360cc85b0f084dedac80de0a" +dependencies = [ + "logos-codegen", +] + [[package]] name = "miniz_oxide" version = "0.7.1" @@ -580,6 +638,12 @@ dependencies = [ "thiserror", ] +[[package]] +name = "regex-syntax" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f" + [[package]] name = "ron" version = "0.8.1" diff --git a/Cargo.toml b/Cargo.toml index 6d7f53d..82e4afd 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -2,7 +2,8 @@ members = [ "crates/app", "crates/eval", - "crates/ir", + "crates/ir", + "crates/lang", ] resolver = "2" diff --git a/crates/lang/Cargo.toml b/crates/lang/Cargo.toml new file mode 100644 index 0000000..1182341 --- /dev/null +++ b/crates/lang/Cargo.toml @@ -0,0 +1,12 @@ +[package] +name = "lang" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +logos = "0.14" + +[lints] +workspace = true diff --git a/crates/lang/src/lib.rs b/crates/lang/src/lib.rs new file mode 100644 index 0000000..5c76635 --- /dev/null +++ b/crates/lang/src/lib.rs @@ -0,0 +1 @@ +pub mod tokens; diff --git a/crates/lang/src/tokens.rs b/crates/lang/src/tokens.rs new file mode 100644 index 0000000..e21b961 --- /dev/null +++ b/crates/lang/src/tokens.rs @@ -0,0 +1,45 @@ +use logos::Logos; + +#[derive(Logos, Debug, PartialEq, Eq)] +#[logos(skip r"[ \t\n\f]+")] +pub enum Token<'a> { + #[regex("[a-zA-Z0-9_\\-]+", |lex| lex.slice())] + Word(&'a str), + #[regex("\\$[a-zA-Z0-9_\\-]+", |lex| &lex.slice()[1..])] + VarIdent(&'a str), + #[token("@..")] + InputSpread, + #[regex("\\@[a-zA-Z0-9_\\-]+", |lex| &lex.slice()[1..])] + InputIdent(&'a str), + #[token(",")] + Comma, + #[token("|")] + Pipe, + #[token("@|")] + MappingPipe, + #[token("!|")] + NullPipe, + #[token("@")] + At, + #[token(">")] + GreaterThan, + #[token("=")] + Equals, + #[token(":")] + Colon, + #[token("[")] + BracketOpen, + #[token("]")] + BracketClose, + #[token("(")] + ParenOpen, + #[token(")")] + ParenClose, + #[token("{")] + BraceOpen, + #[token("}")] + BraceClose, +} + +#[cfg(test)] +mod tests; diff --git a/crates/lang/src/tokens/tests.rs b/crates/lang/src/tokens/tests.rs new file mode 100644 index 0000000..fe4270e --- /dev/null +++ b/crates/lang/src/tokens/tests.rs @@ -0,0 +1,107 @@ +use logos::Logos; + +use super::Token; + +/// generates tests for the lexer to avoid writing boilerplate +macro_rules! lexer_test { + ($name:ident, $input:literal, $out:expr) => { + #[test] + fn $name() { + let lex = Token::lexer($input); + let toks = lex.map(|tok| tok.unwrap()).collect::>(); + assert_eq!(toks, $out); + } + }; +} + +lexer_test! { + test_lex_simple_pipeline, + "streamer | processor | sink", + [ + Token::Word("streamer"), + Token::Pipe, + Token::Word("processor"), + Token::Pipe, + Token::Word("sink") + ] +} + +lexer_test! { + test_lex_var_ident, + "$identifier", + [ Token::VarIdent("identifier") ] +} + +lexer_test! { + test_lex_subgroup, + "subgroup(first, second) = a | b { 1: $first } | c { 1: $second }", + [ + Token::Word("subgroup"), + Token::ParenOpen, + Token::Word("first"), + Token::Comma, + Token::Word("second"), + Token::ParenClose, + Token::Equals, + Token::Word("a"), + Token::Pipe, + Token::Word("b"), + Token::BraceOpen, + Token::Word("1"), + Token::Colon, + Token::VarIdent("first"), + Token::BraceClose, + Token::Pipe, + Token::Word("c"), + Token::BraceOpen, + Token::Word("1"), + Token::Colon, + Token::VarIdent("second"), + Token::BraceClose + ] +} + +lexer_test! { + text_lex_crossing_pipeline_reordering, + "a >first, second|second, first> c", + [ + Token::Word("a"), + Token::GreaterThan, + Token::Word("first"), + Token::Comma, + Token::Word("second"), + Token::Pipe, + Token::Word("second"), + Token::Comma, + Token::Word("first"), + Token::GreaterThan, + Token::Word("c") + ] +} + +lexer_test! { + test_lex_crossing_input_args, + "a >second| c { second: @first }", + [ + Token::Word("a"), + Token::GreaterThan, + Token::Word("second"), + Token::Pipe, + Token::Word("c"), + Token::BraceOpen, + Token::Word("second"), + Token::Colon, + Token::InputIdent("first"), + Token::BraceClose + ] +} + +lexer_test! { + test_lex_map_io_named, + "a @| c", + [ + Token::Word("a"), + Token::MappingPipe, + Token::Word("c") + ] +}