lang: lexer
This commit is contained in:
parent
d79383a7df
commit
98850ee1e9
6 changed files with 231 additions and 1 deletions
12
crates/lang/Cargo.toml
Normal file
12
crates/lang/Cargo.toml
Normal file
|
@ -0,0 +1,12 @@
|
|||
[package]
|
||||
name = "lang"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
logos = "0.14"
|
||||
|
||||
[lints]
|
||||
workspace = true
|
1
crates/lang/src/lib.rs
Normal file
1
crates/lang/src/lib.rs
Normal file
|
@ -0,0 +1 @@
|
|||
pub mod tokens;
|
45
crates/lang/src/tokens.rs
Normal file
45
crates/lang/src/tokens.rs
Normal file
|
@ -0,0 +1,45 @@
|
|||
use logos::Logos;
|
||||
|
||||
#[derive(Logos, Debug, PartialEq, Eq)]
|
||||
#[logos(skip r"[ \t\n\f]+")]
|
||||
pub enum Token<'a> {
|
||||
#[regex("[a-zA-Z0-9_\\-]+", |lex| lex.slice())]
|
||||
Word(&'a str),
|
||||
#[regex("\\$[a-zA-Z0-9_\\-]+", |lex| &lex.slice()[1..])]
|
||||
VarIdent(&'a str),
|
||||
#[token("@..")]
|
||||
InputSpread,
|
||||
#[regex("\\@[a-zA-Z0-9_\\-]+", |lex| &lex.slice()[1..])]
|
||||
InputIdent(&'a str),
|
||||
#[token(",")]
|
||||
Comma,
|
||||
#[token("|")]
|
||||
Pipe,
|
||||
#[token("@|")]
|
||||
MappingPipe,
|
||||
#[token("!|")]
|
||||
NullPipe,
|
||||
#[token("@")]
|
||||
At,
|
||||
#[token(">")]
|
||||
GreaterThan,
|
||||
#[token("=")]
|
||||
Equals,
|
||||
#[token(":")]
|
||||
Colon,
|
||||
#[token("[")]
|
||||
BracketOpen,
|
||||
#[token("]")]
|
||||
BracketClose,
|
||||
#[token("(")]
|
||||
ParenOpen,
|
||||
#[token(")")]
|
||||
ParenClose,
|
||||
#[token("{")]
|
||||
BraceOpen,
|
||||
#[token("}")]
|
||||
BraceClose,
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests;
|
107
crates/lang/src/tokens/tests.rs
Normal file
107
crates/lang/src/tokens/tests.rs
Normal file
|
@ -0,0 +1,107 @@
|
|||
use logos::Logos;
|
||||
|
||||
use super::Token;
|
||||
|
||||
/// generates tests for the lexer to avoid writing boilerplate
|
||||
macro_rules! lexer_test {
|
||||
($name:ident, $input:literal, $out:expr) => {
|
||||
#[test]
|
||||
fn $name() {
|
||||
let lex = Token::lexer($input);
|
||||
let toks = lex.map(|tok| tok.unwrap()).collect::<Vec<_>>();
|
||||
assert_eq!(toks, $out);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
lexer_test! {
|
||||
test_lex_simple_pipeline,
|
||||
"streamer | processor | sink",
|
||||
[
|
||||
Token::Word("streamer"),
|
||||
Token::Pipe,
|
||||
Token::Word("processor"),
|
||||
Token::Pipe,
|
||||
Token::Word("sink")
|
||||
]
|
||||
}
|
||||
|
||||
lexer_test! {
|
||||
test_lex_var_ident,
|
||||
"$identifier",
|
||||
[ Token::VarIdent("identifier") ]
|
||||
}
|
||||
|
||||
lexer_test! {
|
||||
test_lex_subgroup,
|
||||
"subgroup(first, second) = a | b { 1: $first } | c { 1: $second }",
|
||||
[
|
||||
Token::Word("subgroup"),
|
||||
Token::ParenOpen,
|
||||
Token::Word("first"),
|
||||
Token::Comma,
|
||||
Token::Word("second"),
|
||||
Token::ParenClose,
|
||||
Token::Equals,
|
||||
Token::Word("a"),
|
||||
Token::Pipe,
|
||||
Token::Word("b"),
|
||||
Token::BraceOpen,
|
||||
Token::Word("1"),
|
||||
Token::Colon,
|
||||
Token::VarIdent("first"),
|
||||
Token::BraceClose,
|
||||
Token::Pipe,
|
||||
Token::Word("c"),
|
||||
Token::BraceOpen,
|
||||
Token::Word("1"),
|
||||
Token::Colon,
|
||||
Token::VarIdent("second"),
|
||||
Token::BraceClose
|
||||
]
|
||||
}
|
||||
|
||||
lexer_test! {
|
||||
text_lex_crossing_pipeline_reordering,
|
||||
"a >first, second|second, first> c",
|
||||
[
|
||||
Token::Word("a"),
|
||||
Token::GreaterThan,
|
||||
Token::Word("first"),
|
||||
Token::Comma,
|
||||
Token::Word("second"),
|
||||
Token::Pipe,
|
||||
Token::Word("second"),
|
||||
Token::Comma,
|
||||
Token::Word("first"),
|
||||
Token::GreaterThan,
|
||||
Token::Word("c")
|
||||
]
|
||||
}
|
||||
|
||||
lexer_test! {
|
||||
test_lex_crossing_input_args,
|
||||
"a >second| c { second: @first }",
|
||||
[
|
||||
Token::Word("a"),
|
||||
Token::GreaterThan,
|
||||
Token::Word("second"),
|
||||
Token::Pipe,
|
||||
Token::Word("c"),
|
||||
Token::BraceOpen,
|
||||
Token::Word("second"),
|
||||
Token::Colon,
|
||||
Token::InputIdent("first"),
|
||||
Token::BraceClose
|
||||
]
|
||||
}
|
||||
|
||||
lexer_test! {
|
||||
test_lex_map_io_named,
|
||||
"a @| c",
|
||||
[
|
||||
Token::Word("a"),
|
||||
Token::MappingPipe,
|
||||
Token::Word("c")
|
||||
]
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue