lang: lexer

This commit is contained in:
Schrottkatze 2024-03-08 12:34:09 +01:00
parent d79383a7df
commit 98850ee1e9
Signed by: schrottkatze
SSH key fingerprint: SHA256:hXb3t1vINBFCiDCmhRABHX5ocdbLiKyCdKI4HK2Rbbc
6 changed files with 231 additions and 1 deletions

64
Cargo.lock generated
View file

@ -94,6 +94,12 @@ version = "0.21.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "35636a1494ede3b646cc98f74f8e62c773a38a659ebc777a2cf26b9b74171df9"
[[package]]
name = "beef"
version = "0.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3a8241f3ebb85c056b509d4327ad0358fbbba6ffb340bf388f26350aeda225b1"
[[package]]
name = "bit_field"
version = "0.10.2"
@ -321,6 +327,12 @@ dependencies = [
"spin",
]
[[package]]
name = "fnv"
version = "1.0.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
[[package]]
name = "getrandom"
version = "0.2.12"
@ -400,6 +412,19 @@ dependencies = [
"rayon",
]
[[package]]
name = "lang"
version = "0.1.0"
dependencies = [
"logos",
]
[[package]]
name = "lazy_static"
version = "1.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
[[package]]
name = "lebe"
version = "0.5.2"
@ -433,6 +458,39 @@ dependencies = [
"scopeguard",
]
[[package]]
name = "logos"
version = "0.14.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "161971eb88a0da7ae0c333e1063467c5b5727e7fb6b710b8db4814eade3a42e8"
dependencies = [
"logos-derive",
]
[[package]]
name = "logos-codegen"
version = "0.14.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8e31badd9de5131fdf4921f6473d457e3dd85b11b7f091ceb50e4df7c3eeb12a"
dependencies = [
"beef",
"fnv",
"lazy_static",
"proc-macro2",
"quote",
"regex-syntax",
"syn",
]
[[package]]
name = "logos-derive"
version = "0.14.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1c2a69b3eb68d5bd595107c9ee58d7e07fe2bb5e360cc85b0f084dedac80de0a"
dependencies = [
"logos-codegen",
]
[[package]]
name = "miniz_oxide"
version = "0.7.1"
@ -580,6 +638,12 @@ dependencies = [
"thiserror",
]
[[package]]
name = "regex-syntax"
version = "0.8.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f"
[[package]]
name = "ron"
version = "0.8.1"

View file

@ -3,6 +3,7 @@ members = [
"crates/app",
"crates/eval",
"crates/ir",
"crates/lang",
]
resolver = "2"

12
crates/lang/Cargo.toml Normal file
View file

@ -0,0 +1,12 @@
[package]
name = "lang"
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
logos = "0.14"
[lints]
workspace = true

1
crates/lang/src/lib.rs Normal file
View file

@ -0,0 +1 @@
pub mod tokens;

45
crates/lang/src/tokens.rs Normal file
View file

@ -0,0 +1,45 @@
use logos::Logos;
#[derive(Logos, Debug, PartialEq, Eq)]
#[logos(skip r"[ \t\n\f]+")]
pub enum Token<'a> {
#[regex("[a-zA-Z0-9_\\-]+", |lex| lex.slice())]
Word(&'a str),
#[regex("\\$[a-zA-Z0-9_\\-]+", |lex| &lex.slice()[1..])]
VarIdent(&'a str),
#[token("@..")]
InputSpread,
#[regex("\\@[a-zA-Z0-9_\\-]+", |lex| &lex.slice()[1..])]
InputIdent(&'a str),
#[token(",")]
Comma,
#[token("|")]
Pipe,
#[token("@|")]
MappingPipe,
#[token("!|")]
NullPipe,
#[token("@")]
At,
#[token(">")]
GreaterThan,
#[token("=")]
Equals,
#[token(":")]
Colon,
#[token("[")]
BracketOpen,
#[token("]")]
BracketClose,
#[token("(")]
ParenOpen,
#[token(")")]
ParenClose,
#[token("{")]
BraceOpen,
#[token("}")]
BraceClose,
}
#[cfg(test)]
mod tests;

View file

@ -0,0 +1,107 @@
use logos::Logos;
use super::Token;
/// generates tests for the lexer to avoid writing boilerplate
macro_rules! lexer_test {
($name:ident, $input:literal, $out:expr) => {
#[test]
fn $name() {
let lex = Token::lexer($input);
let toks = lex.map(|tok| tok.unwrap()).collect::<Vec<_>>();
assert_eq!(toks, $out);
}
};
}
lexer_test! {
test_lex_simple_pipeline,
"streamer | processor | sink",
[
Token::Word("streamer"),
Token::Pipe,
Token::Word("processor"),
Token::Pipe,
Token::Word("sink")
]
}
lexer_test! {
test_lex_var_ident,
"$identifier",
[ Token::VarIdent("identifier") ]
}
lexer_test! {
test_lex_subgroup,
"subgroup(first, second) = a | b { 1: $first } | c { 1: $second }",
[
Token::Word("subgroup"),
Token::ParenOpen,
Token::Word("first"),
Token::Comma,
Token::Word("second"),
Token::ParenClose,
Token::Equals,
Token::Word("a"),
Token::Pipe,
Token::Word("b"),
Token::BraceOpen,
Token::Word("1"),
Token::Colon,
Token::VarIdent("first"),
Token::BraceClose,
Token::Pipe,
Token::Word("c"),
Token::BraceOpen,
Token::Word("1"),
Token::Colon,
Token::VarIdent("second"),
Token::BraceClose
]
}
lexer_test! {
text_lex_crossing_pipeline_reordering,
"a >first, second|second, first> c",
[
Token::Word("a"),
Token::GreaterThan,
Token::Word("first"),
Token::Comma,
Token::Word("second"),
Token::Pipe,
Token::Word("second"),
Token::Comma,
Token::Word("first"),
Token::GreaterThan,
Token::Word("c")
]
}
lexer_test! {
test_lex_crossing_input_args,
"a >second| c { second: @first }",
[
Token::Word("a"),
Token::GreaterThan,
Token::Word("second"),
Token::Pipe,
Token::Word("c"),
Token::BraceOpen,
Token::Word("second"),
Token::Colon,
Token::InputIdent("first"),
Token::BraceClose
]
}
lexer_test! {
test_lex_map_io_named,
"a @| c",
[
Token::Word("a"),
Token::MappingPipe,
Token::Word("c")
]
}