init i guess

This commit is contained in:
Schrottkatze 2025-05-12 15:50:27 +02:00
parent c90532830e
commit 01de2f385a
Signed by: schrottkatze
SSH key fingerprint: SHA256:FPOYVeBy3QP20FEM42uWF1Wa/Qhlk+L3S2+Wuau/Auo
21 changed files with 1315 additions and 0 deletions

View file

@ -0,0 +1,117 @@
use logos::Logos;
pub fn lex(src: &str) -> Vec<(SyntaxKind, &str)> {
let mut lex = SyntaxKind::lexer(src);
let mut r = Vec::new();
while let Some(tok_res) = lex.next() {
r.push((tok_res.unwrap_or(SyntaxKind::LEX_ERR), lex.slice()))
}
r
}
#[derive(enumset::EnumSetType, Debug, Logos, PartialEq, Eq, Clone, Copy, Hash)]
#[repr(u16)]
#[enumset(no_super_impls)]
#[allow(non_camel_case_types)]
pub enum SyntaxKind {
OBJECT,
MEMBER,
MEMBER_NAME,
MEMBER_VALUE,
ARRAY,
ELEMENT,
// SyntaxKinds for future json5/etc support
TRAILING_COMMA,
// Tokens
// Regexes adapted from [the logos handbook](https://logos.maciej.codes/examples/json_borrowed.html)
#[token("true")]
#[token("false")]
BOOL,
#[token("{")]
BRACE_OPEN,
#[token("}")]
BRACE_CLOSE,
#[token("[")]
BRACKET_OPEN,
#[token("]")]
BRACKET_CLOSE,
#[token(":")]
COLON,
#[token(",")]
COMMA,
#[token("null")]
NULL,
#[regex(r"-?(?:0|[1-9]\d*)(?:\.\d+)?(?:[eE][+-]?\d+)?")]
NUMBER,
#[regex(r#""([^"\\]|\\["\\bnfrt]|u[a-fA-F0-9]{4})*""#)]
STRING,
// Whitespace tokens
#[regex("[ \\t\\f]+")]
WHITESPACE,
#[token("\n")]
NEWLINE,
// Error SyntaxKinds
LEX_ERR,
PARSE_ERR,
// Meta SyntaxKinds
ROOT,
EOF,
}
impl lopal_core::parser::SyntaxElement for SyntaxKind {
const SYNTAX_EOF: Self = Self::EOF;
const SYNTAX_ERROR: Self = Self::PARSE_ERR;
const SYNTAX_ROOT: Self = Self::ROOT;
}
impl From<SyntaxKind> for rowan::SyntaxKind {
fn from(kind: SyntaxKind) -> Self {
Self(kind as u16)
}
}
impl From<rowan::SyntaxKind> for SyntaxKind {
fn from(raw: rowan::SyntaxKind) -> Self {
assert!(raw.0 <= SyntaxKind::EOF as u16);
#[allow(unsafe_code, reason = "The transmute is necessary here")]
unsafe {
std::mem::transmute::<u16, SyntaxKind>(raw.0)
}
}
}
#[cfg(test)]
mod tests {
use crate::syntax_kind::{lex, SyntaxKind};
#[test]
fn simple_object() {
const TEST_DATA: &str = r#"{"hello_world": "meow", "some_num":7.42}"#;
assert_eq!(
dbg!(lex(TEST_DATA)),
vec![
(SyntaxKind::BRACE_OPEN, "{"),
(SyntaxKind::STRING, "\"hello_world\""),
(SyntaxKind::COLON, ":"),
(SyntaxKind::WHITESPACE, " "),
(SyntaxKind::STRING, "\"meow\""),
(SyntaxKind::COMMA, ","),
(SyntaxKind::WHITESPACE, " "),
(SyntaxKind::STRING, "\"some_num\""),
(SyntaxKind::COLON, ":"),
(SyntaxKind::NUMBER, "7.42"),
(SyntaxKind::BRACE_CLOSE, "}")
]
);
}
}