lang: lexer

This commit is contained in:
Schrottkatze 2024-03-08 12:34:09 +01:00
parent d79383a7df
commit 98850ee1e9
Signed by: schrottkatze
SSH key fingerprint: SHA256:hXb3t1vINBFCiDCmhRABHX5ocdbLiKyCdKI4HK2Rbbc
6 changed files with 231 additions and 1 deletions

12
crates/lang/Cargo.toml Normal file
View file

@ -0,0 +1,12 @@
[package]
name = "lang"
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
logos = "0.14"
[lints]
workspace = true

1
crates/lang/src/lib.rs Normal file
View file

@ -0,0 +1 @@
pub mod tokens;

45
crates/lang/src/tokens.rs Normal file
View file

@ -0,0 +1,45 @@
use logos::Logos;
#[derive(Logos, Debug, PartialEq, Eq)]
#[logos(skip r"[ \t\n\f]+")]
pub enum Token<'a> {
#[regex("[a-zA-Z0-9_\\-]+", |lex| lex.slice())]
Word(&'a str),
#[regex("\\$[a-zA-Z0-9_\\-]+", |lex| &lex.slice()[1..])]
VarIdent(&'a str),
#[token("@..")]
InputSpread,
#[regex("\\@[a-zA-Z0-9_\\-]+", |lex| &lex.slice()[1..])]
InputIdent(&'a str),
#[token(",")]
Comma,
#[token("|")]
Pipe,
#[token("@|")]
MappingPipe,
#[token("!|")]
NullPipe,
#[token("@")]
At,
#[token(">")]
GreaterThan,
#[token("=")]
Equals,
#[token(":")]
Colon,
#[token("[")]
BracketOpen,
#[token("]")]
BracketClose,
#[token("(")]
ParenOpen,
#[token(")")]
ParenClose,
#[token("{")]
BraceOpen,
#[token("}")]
BraceClose,
}
#[cfg(test)]
mod tests;

View file

@ -0,0 +1,107 @@
use logos::Logos;
use super::Token;
/// generates tests for the lexer to avoid writing boilerplate
macro_rules! lexer_test {
($name:ident, $input:literal, $out:expr) => {
#[test]
fn $name() {
let lex = Token::lexer($input);
let toks = lex.map(|tok| tok.unwrap()).collect::<Vec<_>>();
assert_eq!(toks, $out);
}
};
}
lexer_test! {
test_lex_simple_pipeline,
"streamer | processor | sink",
[
Token::Word("streamer"),
Token::Pipe,
Token::Word("processor"),
Token::Pipe,
Token::Word("sink")
]
}
lexer_test! {
test_lex_var_ident,
"$identifier",
[ Token::VarIdent("identifier") ]
}
lexer_test! {
test_lex_subgroup,
"subgroup(first, second) = a | b { 1: $first } | c { 1: $second }",
[
Token::Word("subgroup"),
Token::ParenOpen,
Token::Word("first"),
Token::Comma,
Token::Word("second"),
Token::ParenClose,
Token::Equals,
Token::Word("a"),
Token::Pipe,
Token::Word("b"),
Token::BraceOpen,
Token::Word("1"),
Token::Colon,
Token::VarIdent("first"),
Token::BraceClose,
Token::Pipe,
Token::Word("c"),
Token::BraceOpen,
Token::Word("1"),
Token::Colon,
Token::VarIdent("second"),
Token::BraceClose
]
}
lexer_test! {
text_lex_crossing_pipeline_reordering,
"a >first, second|second, first> c",
[
Token::Word("a"),
Token::GreaterThan,
Token::Word("first"),
Token::Comma,
Token::Word("second"),
Token::Pipe,
Token::Word("second"),
Token::Comma,
Token::Word("first"),
Token::GreaterThan,
Token::Word("c")
]
}
lexer_test! {
test_lex_crossing_input_args,
"a >second| c { second: @first }",
[
Token::Word("a"),
Token::GreaterThan,
Token::Word("second"),
Token::Pipe,
Token::Word("c"),
Token::BraceOpen,
Token::Word("second"),
Token::Colon,
Token::InputIdent("first"),
Token::BraceClose
]
}
lexer_test! {
test_lex_map_io_named,
"a @| c",
[
Token::Word("a"),
Token::MappingPipe,
Token::Word("c")
]
}