lang: lexer
This commit is contained in:
parent
d79383a7df
commit
98850ee1e9
6 changed files with 231 additions and 1 deletions
64
Cargo.lock
generated
64
Cargo.lock
generated
|
@ -94,6 +94,12 @@ version = "0.21.5"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "35636a1494ede3b646cc98f74f8e62c773a38a659ebc777a2cf26b9b74171df9"
|
||||
|
||||
[[package]]
|
||||
name = "beef"
|
||||
version = "0.5.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3a8241f3ebb85c056b509d4327ad0358fbbba6ffb340bf388f26350aeda225b1"
|
||||
|
||||
[[package]]
|
||||
name = "bit_field"
|
||||
version = "0.10.2"
|
||||
|
@ -321,6 +327,12 @@ dependencies = [
|
|||
"spin",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "fnv"
|
||||
version = "1.0.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
|
||||
|
||||
[[package]]
|
||||
name = "getrandom"
|
||||
version = "0.2.12"
|
||||
|
@ -400,6 +412,19 @@ dependencies = [
|
|||
"rayon",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "lang"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"logos",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "lazy_static"
|
||||
version = "1.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
|
||||
|
||||
[[package]]
|
||||
name = "lebe"
|
||||
version = "0.5.2"
|
||||
|
@ -433,6 +458,39 @@ dependencies = [
|
|||
"scopeguard",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "logos"
|
||||
version = "0.14.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "161971eb88a0da7ae0c333e1063467c5b5727e7fb6b710b8db4814eade3a42e8"
|
||||
dependencies = [
|
||||
"logos-derive",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "logos-codegen"
|
||||
version = "0.14.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8e31badd9de5131fdf4921f6473d457e3dd85b11b7f091ceb50e4df7c3eeb12a"
|
||||
dependencies = [
|
||||
"beef",
|
||||
"fnv",
|
||||
"lazy_static",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"regex-syntax",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "logos-derive"
|
||||
version = "0.14.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1c2a69b3eb68d5bd595107c9ee58d7e07fe2bb5e360cc85b0f084dedac80de0a"
|
||||
dependencies = [
|
||||
"logos-codegen",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "miniz_oxide"
|
||||
version = "0.7.1"
|
||||
|
@ -580,6 +638,12 @@ dependencies = [
|
|||
"thiserror",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "regex-syntax"
|
||||
version = "0.8.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f"
|
||||
|
||||
[[package]]
|
||||
name = "ron"
|
||||
version = "0.8.1"
|
||||
|
|
|
@ -3,6 +3,7 @@ members = [
|
|||
"crates/app",
|
||||
"crates/eval",
|
||||
"crates/ir",
|
||||
"crates/lang",
|
||||
]
|
||||
resolver = "2"
|
||||
|
||||
|
|
12
crates/lang/Cargo.toml
Normal file
12
crates/lang/Cargo.toml
Normal file
|
@ -0,0 +1,12 @@
|
|||
[package]
|
||||
name = "lang"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
logos = "0.14"
|
||||
|
||||
[lints]
|
||||
workspace = true
|
1
crates/lang/src/lib.rs
Normal file
1
crates/lang/src/lib.rs
Normal file
|
@ -0,0 +1 @@
|
|||
pub mod tokens;
|
45
crates/lang/src/tokens.rs
Normal file
45
crates/lang/src/tokens.rs
Normal file
|
@ -0,0 +1,45 @@
|
|||
use logos::Logos;
|
||||
|
||||
#[derive(Logos, Debug, PartialEq, Eq)]
|
||||
#[logos(skip r"[ \t\n\f]+")]
|
||||
pub enum Token<'a> {
|
||||
#[regex("[a-zA-Z0-9_\\-]+", |lex| lex.slice())]
|
||||
Word(&'a str),
|
||||
#[regex("\\$[a-zA-Z0-9_\\-]+", |lex| &lex.slice()[1..])]
|
||||
VarIdent(&'a str),
|
||||
#[token("@..")]
|
||||
InputSpread,
|
||||
#[regex("\\@[a-zA-Z0-9_\\-]+", |lex| &lex.slice()[1..])]
|
||||
InputIdent(&'a str),
|
||||
#[token(",")]
|
||||
Comma,
|
||||
#[token("|")]
|
||||
Pipe,
|
||||
#[token("@|")]
|
||||
MappingPipe,
|
||||
#[token("!|")]
|
||||
NullPipe,
|
||||
#[token("@")]
|
||||
At,
|
||||
#[token(">")]
|
||||
GreaterThan,
|
||||
#[token("=")]
|
||||
Equals,
|
||||
#[token(":")]
|
||||
Colon,
|
||||
#[token("[")]
|
||||
BracketOpen,
|
||||
#[token("]")]
|
||||
BracketClose,
|
||||
#[token("(")]
|
||||
ParenOpen,
|
||||
#[token(")")]
|
||||
ParenClose,
|
||||
#[token("{")]
|
||||
BraceOpen,
|
||||
#[token("}")]
|
||||
BraceClose,
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests;
|
107
crates/lang/src/tokens/tests.rs
Normal file
107
crates/lang/src/tokens/tests.rs
Normal file
|
@ -0,0 +1,107 @@
|
|||
use logos::Logos;
|
||||
|
||||
use super::Token;
|
||||
|
||||
/// generates tests for the lexer to avoid writing boilerplate
|
||||
macro_rules! lexer_test {
|
||||
($name:ident, $input:literal, $out:expr) => {
|
||||
#[test]
|
||||
fn $name() {
|
||||
let lex = Token::lexer($input);
|
||||
let toks = lex.map(|tok| tok.unwrap()).collect::<Vec<_>>();
|
||||
assert_eq!(toks, $out);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
lexer_test! {
|
||||
test_lex_simple_pipeline,
|
||||
"streamer | processor | sink",
|
||||
[
|
||||
Token::Word("streamer"),
|
||||
Token::Pipe,
|
||||
Token::Word("processor"),
|
||||
Token::Pipe,
|
||||
Token::Word("sink")
|
||||
]
|
||||
}
|
||||
|
||||
lexer_test! {
|
||||
test_lex_var_ident,
|
||||
"$identifier",
|
||||
[ Token::VarIdent("identifier") ]
|
||||
}
|
||||
|
||||
lexer_test! {
|
||||
test_lex_subgroup,
|
||||
"subgroup(first, second) = a | b { 1: $first } | c { 1: $second }",
|
||||
[
|
||||
Token::Word("subgroup"),
|
||||
Token::ParenOpen,
|
||||
Token::Word("first"),
|
||||
Token::Comma,
|
||||
Token::Word("second"),
|
||||
Token::ParenClose,
|
||||
Token::Equals,
|
||||
Token::Word("a"),
|
||||
Token::Pipe,
|
||||
Token::Word("b"),
|
||||
Token::BraceOpen,
|
||||
Token::Word("1"),
|
||||
Token::Colon,
|
||||
Token::VarIdent("first"),
|
||||
Token::BraceClose,
|
||||
Token::Pipe,
|
||||
Token::Word("c"),
|
||||
Token::BraceOpen,
|
||||
Token::Word("1"),
|
||||
Token::Colon,
|
||||
Token::VarIdent("second"),
|
||||
Token::BraceClose
|
||||
]
|
||||
}
|
||||
|
||||
lexer_test! {
|
||||
text_lex_crossing_pipeline_reordering,
|
||||
"a >first, second|second, first> c",
|
||||
[
|
||||
Token::Word("a"),
|
||||
Token::GreaterThan,
|
||||
Token::Word("first"),
|
||||
Token::Comma,
|
||||
Token::Word("second"),
|
||||
Token::Pipe,
|
||||
Token::Word("second"),
|
||||
Token::Comma,
|
||||
Token::Word("first"),
|
||||
Token::GreaterThan,
|
||||
Token::Word("c")
|
||||
]
|
||||
}
|
||||
|
||||
lexer_test! {
|
||||
test_lex_crossing_input_args,
|
||||
"a >second| c { second: @first }",
|
||||
[
|
||||
Token::Word("a"),
|
||||
Token::GreaterThan,
|
||||
Token::Word("second"),
|
||||
Token::Pipe,
|
||||
Token::Word("c"),
|
||||
Token::BraceOpen,
|
||||
Token::Word("second"),
|
||||
Token::Colon,
|
||||
Token::InputIdent("first"),
|
||||
Token::BraceClose
|
||||
]
|
||||
}
|
||||
|
||||
lexer_test! {
|
||||
test_lex_map_io_named,
|
||||
"a @| c",
|
||||
[
|
||||
Token::Word("a"),
|
||||
Token::MappingPipe,
|
||||
Token::Word("c")
|
||||
]
|
||||
}
|
Loading…
Reference in a new issue