2024-04-24 11:07:38 +02:00
|
|
|
use enumset::EnumSet;
|
2024-04-11 03:23:03 +02:00
|
|
|
use logos::Logos;
|
|
|
|
|
|
|
|
pub fn lex(src: &str) -> Vec<(SyntaxKind, &str)> {
|
|
|
|
let mut lex = SyntaxKind::lexer(src);
|
|
|
|
let mut r = Vec::new();
|
|
|
|
|
|
|
|
while let Some(tok_res) = lex.next() {
|
|
|
|
r.push((tok_res.unwrap_or(SyntaxKind::LEX_ERR), lex.slice()))
|
|
|
|
}
|
|
|
|
|
|
|
|
r
|
|
|
|
}
|
|
|
|
|
2024-04-24 11:07:38 +02:00
|
|
|
#[derive(enumset::EnumSetType, Logos, Debug, PartialEq, Eq, Clone, Copy, Hash, PartialOrd, Ord)]
|
2024-04-11 03:23:03 +02:00
|
|
|
#[repr(u16)]
|
2024-04-24 11:07:38 +02:00
|
|
|
#[enumset(no_super_impls)]
|
2024-04-11 03:23:03 +02:00
|
|
|
#[allow(non_camel_case_types)]
|
|
|
|
pub enum SyntaxKind {
|
|
|
|
#[token("def")]
|
|
|
|
DEF_KW = 0,
|
2024-05-04 21:44:02 +02:00
|
|
|
DEF,
|
|
|
|
DEF_NAME,
|
|
|
|
DEF_BODY,
|
2024-04-11 03:23:03 +02:00
|
|
|
#[token("let")]
|
|
|
|
LET_KW,
|
|
|
|
#[token("in")]
|
|
|
|
IN_KW,
|
2024-05-04 21:44:02 +02:00
|
|
|
LET_IN,
|
|
|
|
#[token("mod")]
|
|
|
|
MOD_KW,
|
|
|
|
MODULE,
|
2024-06-03 11:22:36 +02:00
|
|
|
MODULE_NAME,
|
2024-05-04 21:44:02 +02:00
|
|
|
MODULE_BODY,
|
|
|
|
#[token("use")]
|
|
|
|
USE_KW,
|
|
|
|
USE_PAT,
|
2024-04-11 03:23:03 +02:00
|
|
|
#[regex("[\\d]+")]
|
|
|
|
INT_NUM,
|
|
|
|
#[regex("[+-]?([\\d]+\\.[\\d]*|[\\d]*\\.[\\d]+)")]
|
|
|
|
FLOAT_NUM,
|
|
|
|
#[regex(r#""([^"\\]|\\["\\bnfrt]|u[a-fA-F0-9]{4})*""#)]
|
|
|
|
STRING,
|
|
|
|
MATRIX,
|
2024-04-24 19:37:52 +02:00
|
|
|
MAT_ROW,
|
2024-04-30 09:45:36 +02:00
|
|
|
VEC,
|
2024-04-11 03:23:03 +02:00
|
|
|
LIST,
|
2024-04-30 09:45:36 +02:00
|
|
|
// either of a vec, a matrix or a list
|
|
|
|
COLLECTION_ITEM,
|
|
|
|
DECL,
|
2024-04-11 03:23:03 +02:00
|
|
|
PARENTHESIZED_EXPR,
|
|
|
|
EXPR,
|
2024-04-24 11:07:38 +02:00
|
|
|
LITERAL,
|
2024-04-11 03:23:03 +02:00
|
|
|
#[token("(")]
|
|
|
|
L_PAREN,
|
|
|
|
#[token(")")]
|
|
|
|
R_PAREN,
|
|
|
|
#[token("{")]
|
2024-04-24 19:37:52 +02:00
|
|
|
L_BRACE,
|
2024-04-11 03:23:03 +02:00
|
|
|
#[token("}")]
|
2024-04-24 19:37:52 +02:00
|
|
|
R_BRACE,
|
2024-04-11 03:23:03 +02:00
|
|
|
#[token("[")]
|
|
|
|
L_BRACK,
|
|
|
|
#[token("]")]
|
|
|
|
R_BRACK,
|
|
|
|
#[token("<")]
|
|
|
|
L_ANGLE,
|
|
|
|
#[token(">")]
|
|
|
|
R_ANGLE,
|
|
|
|
#[token("+")]
|
|
|
|
PLUS,
|
|
|
|
#[token("-")]
|
|
|
|
MINUS,
|
|
|
|
#[token("*")]
|
|
|
|
STAR,
|
|
|
|
#[token("/")]
|
|
|
|
SLASH,
|
|
|
|
#[token("%")]
|
|
|
|
PERCENT,
|
|
|
|
#[token("^")]
|
|
|
|
CARET,
|
|
|
|
INSTR,
|
|
|
|
INSTR_NAME,
|
|
|
|
INSTR_PARAMS,
|
|
|
|
ATTR_SET,
|
|
|
|
ATTR,
|
|
|
|
ATTR_NAME,
|
|
|
|
ATTR_VALUE,
|
|
|
|
#[regex("[a-zA-Z_]+[a-zA-Z_\\-\\d]*")]
|
|
|
|
IDENT,
|
|
|
|
#[regex("\\$[a-zA-Z0-9_\\-]+")]
|
|
|
|
VAR,
|
|
|
|
#[regex("\\@[a-zA-Z0-9_\\-]+")]
|
|
|
|
INPUT_VAR,
|
|
|
|
#[token("$")]
|
|
|
|
DOLLAR,
|
|
|
|
#[token("@")]
|
|
|
|
AT,
|
|
|
|
#[token(",")]
|
|
|
|
COMMA,
|
|
|
|
#[token("|")]
|
|
|
|
PIPE,
|
|
|
|
#[token("@|")]
|
|
|
|
MAPPING_PIPE,
|
|
|
|
#[token("!|")]
|
|
|
|
NULL_PIPE,
|
2024-04-30 12:21:06 +02:00
|
|
|
PIPELINE,
|
2024-04-11 03:23:03 +02:00
|
|
|
#[token("=")]
|
|
|
|
EQ,
|
|
|
|
#[token(":")]
|
|
|
|
COLON,
|
|
|
|
#[token(";")]
|
|
|
|
SEMICOLON,
|
|
|
|
#[token(".")]
|
|
|
|
DOT,
|
|
|
|
#[token("!")]
|
|
|
|
BANG,
|
|
|
|
#[regex("[ \\t\\f]+")]
|
|
|
|
WHITESPACE,
|
|
|
|
#[token("\n")]
|
|
|
|
NEWLINE,
|
|
|
|
PARSE_ERR,
|
|
|
|
LEX_ERR,
|
|
|
|
ROOT,
|
2024-04-24 11:07:38 +02:00
|
|
|
EOF,
|
|
|
|
TOMBSTONE,
|
|
|
|
ERROR,
|
2024-04-11 03:23:03 +02:00
|
|
|
}
|
2024-04-24 11:07:38 +02:00
|
|
|
|
|
|
|
pub type TokenSet = EnumSet<SyntaxKind>;
|
|
|
|
|
2024-04-11 03:23:03 +02:00
|
|
|
impl From<SyntaxKind> for rowan::SyntaxKind {
|
|
|
|
fn from(kind: SyntaxKind) -> Self {
|
|
|
|
Self(kind as u16)
|
|
|
|
}
|
|
|
|
}
|
2024-04-24 11:07:38 +02:00
|
|
|
|
|
|
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
|
|
|
pub enum Lang {}
|
|
|
|
impl rowan::Language for Lang {
|
|
|
|
type Kind = SyntaxKind;
|
|
|
|
#[allow(unsafe_code)]
|
|
|
|
fn kind_from_raw(raw: rowan::SyntaxKind) -> Self::Kind {
|
|
|
|
assert!(raw.0 <= SyntaxKind::ROOT as u16);
|
|
|
|
unsafe { std::mem::transmute::<u16, SyntaxKind>(raw.0) }
|
|
|
|
}
|
|
|
|
fn kind_to_raw(kind: Self::Kind) -> rowan::SyntaxKind {
|
|
|
|
kind.into()
|
|
|
|
}
|
|
|
|
}
|