use logos::Logos; pub fn lex(src: &str) -> Vec<(SyntaxKind, &str)> { let mut lex = SyntaxKind::lexer(src); let mut r = Vec::new(); while let Some(tok_res) = lex.next() { r.push((tok_res.unwrap_or(SyntaxKind::LEX_ERR), lex.slice())) } r } #[derive(enumset::EnumSetType, Debug, Logos, PartialEq, Eq, Clone, Copy, Hash)] #[repr(u16)] #[enumset(no_super_impls)] #[allow(non_camel_case_types)] pub enum SyntaxKind { OBJECT, MEMBER, MEMBER_NAME, MEMBER_VALUE, ARRAY, ELEMENT, // Tokens // Regexes adapted from [the logos handbook](https://logos.maciej.codes/examples/json_borrowed.html) #[token("true")] #[token("false")] BOOL, #[token("{")] BRACE_OPEN, #[token("}")] BRACE_CLOSE, #[token("[")] BRACKET_OPEN, #[token("]")] BRACKET_CLOSE, #[token(":")] COLON, #[token(",")] COMMA, #[token("null")] NULL, #[regex(r"-?(?:0|[1-9]\d*)(?:\.\d+)?(?:[eE][+-]?\d+)?")] NUMBER, #[regex(r#""([^"\\]|\\["\\bnfrt]|u[a-fA-F0-9]{4})*""#)] STRING, // Whitespace tokens #[regex("[ \\t\\f]+")] WHITESPACE, #[token("\n")] NEWLINE, // Error SyntaxKinds LEX_ERR, PARSE_ERR, // Meta SyntaxKinds EOF, } impl pawarser::parser::SyntaxElement for SyntaxKind { const EOF: Self = Self::EOF; const ERROR: Self = Self::PARSE_ERR; } impl From for rowan::SyntaxKind { fn from(kind: SyntaxKind) -> Self { Self(kind as u16) } } impl From for SyntaxKind { fn from(raw: rowan::SyntaxKind) -> Self { assert!(raw.0 <= SyntaxKind::EOF as u16); #[allow(unsafe_code, reason = "The transmute is necessary here")] unsafe { std::mem::transmute::(raw.0) } } } #[cfg(test)] mod tests { use crate::syntax_kind::{lex, SyntaxKind}; #[test] fn simple_object() { const TEST_DATA: &str = r#"{"hello_world": "meow", "some_num":7.42}"#; assert_eq!( dbg!(lex(TEST_DATA)), vec![ (SyntaxKind::BRACE_OPEN, "{"), (SyntaxKind::STRING, "\"hello_world\""), (SyntaxKind::COLON, ":"), (SyntaxKind::WHITESPACE, " "), (SyntaxKind::STRING, "\"meow\""), (SyntaxKind::COMMA, ","), (SyntaxKind::WHITESPACE, " "), (SyntaxKind::STRING, "\"some_num\""), (SyntaxKind::COLON, ":"), (SyntaxKind::NUMBER, "7.42"), (SyntaxKind::BRACE_CLOSE, "}") ] ); } }