iowo/crates/lang/src/lst_parser/syntax_kind.rs

151 lines
2.8 KiB
Rust
Raw Normal View History

2024-04-24 11:07:38 +02:00
use enumset::EnumSet;
use logos::Logos;
pub fn lex(src: &str) -> Vec<(SyntaxKind, &str)> {
let mut lex = SyntaxKind::lexer(src);
let mut r = Vec::new();
while let Some(tok_res) = lex.next() {
r.push((tok_res.unwrap_or(SyntaxKind::LEX_ERR), lex.slice()))
}
r
}
2024-04-24 11:07:38 +02:00
#[derive(enumset::EnumSetType, Logos, Debug, PartialEq, Eq, Clone, Copy, Hash, PartialOrd, Ord)]
#[repr(u16)]
2024-04-24 11:07:38 +02:00
#[enumset(no_super_impls)]
#[allow(non_camel_case_types)]
pub enum SyntaxKind {
#[token("def")]
DEF_KW = 0,
2024-05-04 21:44:02 +02:00
DEF,
DEF_NAME,
DEF_BODY,
#[token("let")]
LET_KW,
#[token("in")]
IN_KW,
2024-05-04 21:44:02 +02:00
LET_IN,
#[token("mod")]
MOD_KW,
MODULE,
2024-06-03 11:22:36 +02:00
MODULE_NAME,
2024-05-04 21:44:02 +02:00
MODULE_BODY,
#[token("use")]
USE_KW,
USE_PAT,
#[regex("[\\d]+")]
INT_NUM,
#[regex("[+-]?([\\d]+\\.[\\d]*|[\\d]*\\.[\\d]+)")]
FLOAT_NUM,
#[regex(r#""([^"\\]|\\["\\bnfrt]|u[a-fA-F0-9]{4})*""#)]
STRING,
MATRIX,
2024-04-24 19:37:52 +02:00
MAT_ROW,
2024-04-30 09:45:36 +02:00
VEC,
LIST,
2024-04-30 09:45:36 +02:00
// either of a vec, a matrix or a list
COLLECTION_ITEM,
DECL,
PARENTHESIZED_EXPR,
EXPR,
2024-04-24 11:07:38 +02:00
LITERAL,
#[token("(")]
L_PAREN,
#[token(")")]
R_PAREN,
#[token("{")]
2024-04-24 19:37:52 +02:00
L_BRACE,
#[token("}")]
2024-04-24 19:37:52 +02:00
R_BRACE,
#[token("[")]
L_BRACK,
#[token("]")]
R_BRACK,
#[token("<")]
L_ANGLE,
#[token(">")]
R_ANGLE,
#[token("+")]
PLUS,
#[token("-")]
MINUS,
#[token("*")]
STAR,
#[token("/")]
SLASH,
#[token("%")]
PERCENT,
#[token("^")]
CARET,
INSTR,
INSTR_NAME,
INSTR_PARAMS,
ATTR_SET,
ATTR,
ATTR_NAME,
ATTR_VALUE,
#[regex("[a-zA-Z_]+[a-zA-Z_\\-\\d]*")]
IDENT,
#[regex("\\$[a-zA-Z0-9_\\-]+")]
VAR,
#[regex("\\@[a-zA-Z0-9_\\-]+")]
INPUT_VAR,
#[token("$")]
DOLLAR,
#[token("@")]
AT,
#[token(",")]
COMMA,
#[token("|")]
PIPE,
#[token("@|")]
MAPPING_PIPE,
#[token("!|")]
NULL_PIPE,
PIPELINE,
#[token("=")]
EQ,
#[token(":")]
COLON,
#[token(";")]
SEMICOLON,
#[token(".")]
DOT,
#[token("!")]
BANG,
#[regex("[ \\t\\f]+")]
WHITESPACE,
#[token("\n")]
NEWLINE,
PARSE_ERR,
LEX_ERR,
ROOT,
2024-04-24 11:07:38 +02:00
EOF,
TOMBSTONE,
ERROR,
}
2024-04-24 11:07:38 +02:00
pub type TokenSet = EnumSet<SyntaxKind>;
impl From<SyntaxKind> for rowan::SyntaxKind {
fn from(kind: SyntaxKind) -> Self {
Self(kind as u16)
}
}
2024-04-24 11:07:38 +02:00
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum Lang {}
impl rowan::Language for Lang {
type Kind = SyntaxKind;
#[allow(unsafe_code)]
fn kind_from_raw(raw: rowan::SyntaxKind) -> Self::Kind {
assert!(raw.0 <= SyntaxKind::ROOT as u16);
unsafe { std::mem::transmute::<u16, SyntaxKind>(raw.0) }
}
fn kind_to_raw(kind: Self::Kind) -> rowan::SyntaxKind {
kind.into()
}
}