iowo/crates/lang/src/parser/ast/lossless/lex.rs

118 lines
2.1 KiB
Rust
Raw Normal View History

use logos::Logos;
use crate::parser::Span;
pub fn lex(src: &str) -> Vec<(SyntaxKind, &str)> {
let mut lex = SyntaxKind::lexer(src);
let mut r = Vec::new();
while let Some(tok_res) = lex.next() {
r.push((tok_res.unwrap_or(SyntaxKind::LEX_ERR), lex.slice()))
}
r
}
#[derive(Logos, Debug, PartialEq, Eq, Clone, Copy, Hash, PartialOrd, Ord)]
#[repr(u16)]
#[allow(non_camel_case_types)]
pub enum SyntaxKind {
#[token("def")]
DEF_KW = 0,
#[token("let")]
LET_KW,
#[token("in")]
IN_KW,
#[token("mat")]
MAT_KW,
#[regex("[\\d]+x[\\d]+")]
PAT_DIMENSIONS,
#[regex("[\\d]+")]
INT_NUM,
#[regex("[+-]?([\\d]+\\.[\\d]*|[\\d]*\\.[\\d]+)")]
FLOAT_NUM,
#[regex(r#""([^"\\]|\\["\\bnfrt]|u[a-fA-F0-9]{4})*""#)]
STRING,
MATRIX,
DECL,
LIST,
MAT_BODY,
PARENTHESIZED_EXPR,
EXPR,
#[token("(")]
L_PAREN,
#[token(")")]
R_PAREN,
#[token("{")]
L_CURLY,
#[token("}")]
R_CURLY,
#[token("[")]
L_BRACK,
#[token("]")]
R_BRACK,
#[token("<")]
L_ANGLE,
#[token(">")]
R_ANGLE,
#[token("+")]
PLUS,
#[token("-")]
MINUS,
#[token("*")]
STAR,
#[token("/")]
SLASH,
#[token("%")]
PERCENT,
#[token("^")]
CARET,
INSTR,
INSTR_NAME,
INSTR_PARAMS,
ATTR_SET,
ATTR,
ATTR_NAME,
ATTR_VALUE,
#[regex("[a-zA-Z_]+[a-zA-Z_\\-\\d]*")]
IDENT,
#[regex("\\$[a-zA-Z0-9_\\-]+")]
VAR,
#[regex("\\@[a-zA-Z0-9_\\-]+")]
INPUT_VAR,
#[token("$")]
DOLLAR,
#[token("@")]
AT,
#[token(",")]
COMMA,
#[token("|")]
PIPE,
#[token("@|")]
MAPPING_PIPE,
#[token("!|")]
NULL_PIPE,
#[token("=")]
EQ,
#[token(":")]
COLON,
#[token(";")]
SEMICOLON,
#[token(".")]
DOT,
#[token("!")]
BANG,
#[regex("[ \\t\\f]+")]
WHITESPACE,
#[token("\n")]
NEWLINE,
PARSE_ERR,
LEX_ERR,
ROOT,
}
impl From<SyntaxKind> for rowan::SyntaxKind {
fn from(kind: SyntaxKind) -> Self {
Self(kind as u16)
}
}