forked from katzen-cafe/iowo
112 lines
2.6 KiB
Rust
112 lines
2.6 KiB
Rust
use logos::Logos;
|
|
|
|
pub fn lex(src: &str) -> Vec<(SyntaxKind, &str)> {
|
|
let mut lex = SyntaxKind::lexer(src);
|
|
let mut r = Vec::new();
|
|
|
|
while let Some(tok_res) = lex.next() {
|
|
r.push((tok_res.unwrap_or(SyntaxKind::LEX_ERR), lex.slice()))
|
|
}
|
|
|
|
r
|
|
}
|
|
|
|
#[derive(enumset::EnumSetType, Debug, Logos, PartialEq, Eq, Clone, Copy, Hash)]
|
|
#[repr(u16)]
|
|
#[enumset(no_super_impls)]
|
|
#[allow(non_camel_case_types)]
|
|
pub enum SyntaxKind {
|
|
OBJECT,
|
|
MEMBER,
|
|
MEMBER_NAME,
|
|
MEMBER_VALUE,
|
|
|
|
ARRAY,
|
|
ELEMENT,
|
|
|
|
// Tokens
|
|
// Regexes adapted from [the logos handbook](https://logos.maciej.codes/examples/json_borrowed.html)
|
|
#[token("true")]
|
|
#[token("false")]
|
|
BOOL,
|
|
#[token("{")]
|
|
BRACE_OPEN,
|
|
#[token("}")]
|
|
BRACE_CLOSE,
|
|
#[token("[")]
|
|
BRACKET_OPEN,
|
|
#[token("]")]
|
|
BRACKET_CLOSE,
|
|
#[token(":")]
|
|
COLON,
|
|
#[token(",")]
|
|
COMMA,
|
|
#[token("null")]
|
|
NULL,
|
|
#[regex(r"-?(?:0|[1-9]\d*)(?:\.\d+)?(?:[eE][+-]?\d+)?")]
|
|
NUMBER,
|
|
#[regex(r#""([^"\\]|\\["\\bnfrt]|u[a-fA-F0-9]{4})*""#)]
|
|
STRING,
|
|
|
|
// Whitespace tokens
|
|
#[regex("[ \\t\\f]+")]
|
|
WHITESPACE,
|
|
#[token("\n")]
|
|
NEWLINE,
|
|
|
|
// Error SyntaxKinds
|
|
LEX_ERR,
|
|
PARSE_ERR,
|
|
|
|
// Meta SyntaxKinds
|
|
EOF,
|
|
}
|
|
|
|
impl pawarser::parser::SyntaxElement for SyntaxKind {
|
|
const EOF: Self = Self::EOF;
|
|
|
|
const ERROR: Self = Self::PARSE_ERR;
|
|
}
|
|
|
|
impl From<SyntaxKind> for rowan::SyntaxKind {
|
|
fn from(kind: SyntaxKind) -> Self {
|
|
Self(kind as u16)
|
|
}
|
|
}
|
|
|
|
impl From<rowan::SyntaxKind> for SyntaxKind {
|
|
fn from(raw: rowan::SyntaxKind) -> Self {
|
|
assert!(raw.0 <= SyntaxKind::EOF as u16);
|
|
#[allow(unsafe_code, reason = "The transmute is necessary here")]
|
|
unsafe {
|
|
std::mem::transmute::<u16, SyntaxKind>(raw.0)
|
|
}
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use crate::syntax_kind::{lex, SyntaxKind};
|
|
|
|
#[test]
|
|
fn simple_object() {
|
|
const TEST_DATA: &str = r#"{"hello_world": "meow", "some_num":7.42}"#;
|
|
|
|
assert_eq!(
|
|
dbg!(lex(TEST_DATA)),
|
|
vec![
|
|
(SyntaxKind::BRACE_OPEN, "{"),
|
|
(SyntaxKind::STRING, "\"hello_world\""),
|
|
(SyntaxKind::COLON, ":"),
|
|
(SyntaxKind::WHITESPACE, " "),
|
|
(SyntaxKind::STRING, "\"meow\""),
|
|
(SyntaxKind::COMMA, ","),
|
|
(SyntaxKind::WHITESPACE, " "),
|
|
(SyntaxKind::STRING, "\"some_num\""),
|
|
(SyntaxKind::COLON, ":"),
|
|
(SyntaxKind::NUMBER, "7.42"),
|
|
(SyntaxKind::BRACE_CLOSE, "}")
|
|
]
|
|
);
|
|
}
|
|
}
|