init i guess

This commit is contained in:
Schrottkatze 2025-05-12 15:50:27 +02:00
parent c90532830e
commit 01de2f385a
Signed by: schrottkatze
SSH key fingerprint: SHA256:FPOYVeBy3QP20FEM42uWF1Wa/Qhlk+L3S2+Wuau/Auo
21 changed files with 1315 additions and 0 deletions

View file

@ -0,0 +1,13 @@
[package]
name = "lopal_json"
version = "0.1.0"
edition = "2021"
[dependencies]
logos = "0.14.2"
enumset = "1.1.3"
rowan = "0.15.15"
lopal_core = { path = "../lopal_core" }
[lints]
workspace = true

View file

@ -0,0 +1,78 @@
use array::array;
use enumset::{enum_set, EnumSet};
use lopal_core::parser::ParserBuilder;
use crate::{
syntax_error::SyntaxError,
syntax_kind::{lex, SyntaxKind},
};
use self::object::object;
mod array;
mod object;
pub(crate) type Parser<'src> = lopal_core::Parser<'src, SyntaxKind, SyntaxError>;
pub(crate) type CompletedMarker = lopal_core::CompletedMarker<SyntaxKind, SyntaxError>;
const BASIC_VALUE_TOKENS: EnumSet<SyntaxKind> =
enum_set!(SyntaxKind::BOOL | SyntaxKind::NULL | SyntaxKind::NUMBER | SyntaxKind::STRING);
pub fn value(p: &mut Parser) -> bool {
if BASIC_VALUE_TOKENS.contains(p.current()) {
p.do_bump();
return true;
} else {
object(p).or_else(|| array(p)).is_some()
}
}
#[cfg(test)]
mod tests {
use super::{
test_utils::{check_parser, gen_checks},
value,
};
#[test]
fn value_lit() {
gen_checks! {value;
r#""helo world""# => r#"ROOT { STRING "\"helo world\""; }"#,
"42" => r#"ROOT { NUMBER "42"; }"#,
"null" => r#"ROOT { NULL "null"; }"#,
"true" => r#"ROOT { BOOL "true"; }"#,
"false" => r#"ROOT { BOOL "false"; }"#
};
}
}
#[cfg(test)]
mod test_utils {
use lopal_core::parser::ParserBuilder;
use crate::syntax_kind::{lex, SyntaxKind};
use super::Parser;
macro_rules! gen_checks {
($fn_to_test:ident; $($in:literal => $out:literal),+) => {
$(crate::grammar::test_utils::check_parser($in, |p| { $fn_to_test(p); }, $out);)+
}
}
pub(super) use gen_checks;
pub(super) fn check_parser(input: &str, parser_fn: fn(&mut Parser), expected_output: &str) {
let toks = lex(input);
let mut p: Parser = ParserBuilder::new(toks)
.add_meaningless(SyntaxKind::WHITESPACE)
.add_meaningless(SyntaxKind::NEWLINE)
.build();
parser_fn(&mut p);
let out = p.finish();
assert_eq!(format!("{out:?}").trim_end(), expected_output);
}
}

View file

@ -0,0 +1,52 @@
use crate::{syntax_error::SyntaxError, syntax_kind::SyntaxKind};
use super::{value, CompletedMarker, Parser};
pub(super) fn array(p: &mut Parser) -> Option<CompletedMarker> {
let array_start = p.start("array");
if !p.eat(SyntaxKind::BRACKET_OPEN) {
array_start.abandon(p);
return None;
}
let el = p.start("arr_el");
value(p);
el.complete(p, SyntaxKind::ELEMENT);
while p.at(SyntaxKind::COMMA) {
let potential_trailing_comma = p.start("potential_trailing_comma");
p.eat(SyntaxKind::COMMA);
let maybe_el = p.start("arr_el");
if !value(p) {
maybe_el.abandon(p);
potential_trailing_comma.complete(p, SyntaxKind::TRAILING_COMMA);
} else {
maybe_el.complete(p, SyntaxKind::ELEMENT);
potential_trailing_comma.abandon(p);
}
}
Some(if !p.eat(SyntaxKind::BRACKET_CLOSE) {
array_start.error(p, SyntaxError::UnclosedArray)
} else {
array_start.complete(p, SyntaxKind::ARRAY)
})
}
#[cfg(test)]
mod tests {
use crate::grammar::{array::array, test_utils::gen_checks};
#[test]
fn array_basic() {
gen_checks! {array;
r#"[1,2,3]"# => r#"ROOT { ARRAY { BRACKET_OPEN "["; ELEMENT { NUMBER "1"; } COMMA ","; ELEMENT { NUMBER "2"; } COMMA ","; ELEMENT { NUMBER "3"; } BRACKET_CLOSE "]"; } }"#,
r#"[1,2,]"# => r#"ROOT { ARRAY { BRACKET_OPEN "["; ELEMENT { NUMBER "1"; } COMMA ","; ELEMENT { NUMBER "2"; } TRAILING_COMMA { COMMA ","; } BRACKET_CLOSE "]"; } }"#,
r#"[1,2"# => r#"ROOT { PARSE_ERR: UnclosedArray { BRACKET_OPEN "["; ELEMENT { NUMBER "1"; } COMMA ","; ELEMENT { NUMBER "2"; } } }"#,
r#"[1,2,"# => r#"ROOT { PARSE_ERR: UnclosedArray { BRACKET_OPEN "["; ELEMENT { NUMBER "1"; } COMMA ","; ELEMENT { NUMBER "2"; } TRAILING_COMMA { COMMA ","; } } }"#,
r#"[{"hello":"world""# => r#"ROOT { PARSE_ERR: UnclosedArray { BRACKET_OPEN "["; ELEMENT { PARSE_ERR: UnclosedObject { BRACE_OPEN "{"; MEMBER { MEMBER_NAME { STRING "\"hello\""; } COLON ":"; MEMBER_VALUE { STRING "\"world\""; } } } } } }"#
}
}
}

View file

@ -0,0 +1,92 @@
use crate::{grammar::value, syntax_error::SyntaxError, syntax_kind::SyntaxKind};
use super::{CompletedMarker, Parser, BASIC_VALUE_TOKENS};
pub(super) fn object(p: &mut Parser) -> Option<CompletedMarker> {
let obj_start = p.start("object");
if !p.eat(SyntaxKind::BRACE_OPEN) {
obj_start.abandon(p);
return None;
}
member(p);
while p.at(SyntaxKind::COMMA) {
// not always an error, later configurable
let potential_trailing_comma = p.start("potential_trailing_comma");
p.eat(SyntaxKind::COMMA);
if member(p).is_none() {
potential_trailing_comma.complete(p, SyntaxKind::TRAILING_COMMA);
} else {
potential_trailing_comma.abandon(p);
}
}
Some(if p.eat(SyntaxKind::BRACE_CLOSE) {
obj_start.complete(p, SyntaxKind::OBJECT)
} else {
obj_start.error(p, SyntaxError::UnclosedObject)
})
}
fn member(p: &mut Parser) -> Option<CompletedMarker> {
let member_start = p.start("member");
if p.at(SyntaxKind::BRACE_CLOSE) {
member_start.abandon(p);
return None;
} else if p.at(SyntaxKind::STRING) {
let member_name_start = p.start("member_name");
p.eat(SyntaxKind::STRING);
member_name_start.complete(p, SyntaxKind::MEMBER_NAME);
} else {
return todo!("handle other tokens: {:?}", p.current());
}
if !p.eat(SyntaxKind::COLON) {
todo!("handle wrong tokens")
}
let member_value_start = p.start("member_value_start");
if value(p) {
member_value_start.complete(p, SyntaxKind::MEMBER_VALUE);
Some(member_start.complete(p, SyntaxKind::MEMBER))
} else {
member_value_start.abandon(p);
let e = member_start.error(p, SyntaxError::MemberMissingValue);
Some(
e.precede(p, "member but failed already")
.complete(p, SyntaxKind::MEMBER),
)
}
}
#[cfg(test)]
mod tests {
use crate::grammar::{
object::{member, object},
test_utils::gen_checks,
};
#[test]
fn object_basic() {
gen_checks! {object;
r#"{"a": "b"}"# => r#"ROOT { OBJECT { BRACE_OPEN "{"; MEMBER { MEMBER_NAME { STRING "\"a\""; } COLON ":"; WHITESPACE " "; MEMBER_VALUE { STRING "\"b\""; } } BRACE_CLOSE "}"; } }"#,
r#"{"a": 42}"# => r#"ROOT { OBJECT { BRACE_OPEN "{"; MEMBER { MEMBER_NAME { STRING "\"a\""; } COLON ":"; WHITESPACE " "; MEMBER_VALUE { NUMBER "42"; } } BRACE_CLOSE "}"; } }"#,
r#"{"a": "b""# => r#"ROOT { PARSE_ERR: UnclosedObject { BRACE_OPEN "{"; MEMBER { MEMBER_NAME { STRING "\"a\""; } COLON ":"; WHITESPACE " "; MEMBER_VALUE { STRING "\"b\""; } } } }"#,
r#"{"a": }"# => r#"ROOT { OBJECT { BRACE_OPEN "{"; MEMBER { PARSE_ERR: MemberMissingValue { MEMBER_NAME { STRING "\"a\""; } COLON ":"; } } WHITESPACE " "; BRACE_CLOSE "}"; } }"#,
r#"{"a":"# => r#"ROOT { PARSE_ERR: UnclosedObject { BRACE_OPEN "{"; MEMBER { PARSE_ERR: MemberMissingValue { MEMBER_NAME { STRING "\"a\""; } COLON ":"; } } } }"#,
r#"{"a":true,}"# => r#"ROOT { OBJECT { BRACE_OPEN "{"; MEMBER { MEMBER_NAME { STRING "\"a\""; } COLON ":"; MEMBER_VALUE { BOOL "true"; } } TRAILING_COMMA { COMMA ","; } BRACE_CLOSE "}"; } }"#
}
}
#[test]
fn member_basic() {
gen_checks! {member;
r#""a": "b""# => r#"ROOT { MEMBER { MEMBER_NAME { STRING "\"a\""; } COLON ":"; WHITESPACE " "; MEMBER_VALUE { STRING "\"b\""; } } }"#,
r#""a": 42"# => r#"ROOT { MEMBER { MEMBER_NAME { STRING "\"a\""; } COLON ":"; WHITESPACE " "; MEMBER_VALUE { NUMBER "42"; } } }"#,
r#""a":"# => r#"ROOT { MEMBER { PARSE_ERR: MemberMissingValue { MEMBER_NAME { STRING "\"a\""; } COLON ":"; } } }"#
}
}
}

View file

@ -0,0 +1,3 @@
mod grammar;
mod syntax_error;
mod syntax_kind;

View file

@ -0,0 +1,11 @@
use crate::syntax_kind::SyntaxKind;
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum SyntaxError {
UnclosedObject,
UnclosedArray,
DisallowedKeyType(SyntaxKind),
MemberMissingValue,
UnexpectedTrailingComma,
}
impl lopal_core::parser::SyntaxError for SyntaxError {}

View file

@ -0,0 +1,117 @@
use logos::Logos;
pub fn lex(src: &str) -> Vec<(SyntaxKind, &str)> {
let mut lex = SyntaxKind::lexer(src);
let mut r = Vec::new();
while let Some(tok_res) = lex.next() {
r.push((tok_res.unwrap_or(SyntaxKind::LEX_ERR), lex.slice()))
}
r
}
#[derive(enumset::EnumSetType, Debug, Logos, PartialEq, Eq, Clone, Copy, Hash)]
#[repr(u16)]
#[enumset(no_super_impls)]
#[allow(non_camel_case_types)]
pub enum SyntaxKind {
OBJECT,
MEMBER,
MEMBER_NAME,
MEMBER_VALUE,
ARRAY,
ELEMENT,
// SyntaxKinds for future json5/etc support
TRAILING_COMMA,
// Tokens
// Regexes adapted from [the logos handbook](https://logos.maciej.codes/examples/json_borrowed.html)
#[token("true")]
#[token("false")]
BOOL,
#[token("{")]
BRACE_OPEN,
#[token("}")]
BRACE_CLOSE,
#[token("[")]
BRACKET_OPEN,
#[token("]")]
BRACKET_CLOSE,
#[token(":")]
COLON,
#[token(",")]
COMMA,
#[token("null")]
NULL,
#[regex(r"-?(?:0|[1-9]\d*)(?:\.\d+)?(?:[eE][+-]?\d+)?")]
NUMBER,
#[regex(r#""([^"\\]|\\["\\bnfrt]|u[a-fA-F0-9]{4})*""#)]
STRING,
// Whitespace tokens
#[regex("[ \\t\\f]+")]
WHITESPACE,
#[token("\n")]
NEWLINE,
// Error SyntaxKinds
LEX_ERR,
PARSE_ERR,
// Meta SyntaxKinds
ROOT,
EOF,
}
impl lopal_core::parser::SyntaxElement for SyntaxKind {
const SYNTAX_EOF: Self = Self::EOF;
const SYNTAX_ERROR: Self = Self::PARSE_ERR;
const SYNTAX_ROOT: Self = Self::ROOT;
}
impl From<SyntaxKind> for rowan::SyntaxKind {
fn from(kind: SyntaxKind) -> Self {
Self(kind as u16)
}
}
impl From<rowan::SyntaxKind> for SyntaxKind {
fn from(raw: rowan::SyntaxKind) -> Self {
assert!(raw.0 <= SyntaxKind::EOF as u16);
#[allow(unsafe_code, reason = "The transmute is necessary here")]
unsafe {
std::mem::transmute::<u16, SyntaxKind>(raw.0)
}
}
}
#[cfg(test)]
mod tests {
use crate::syntax_kind::{lex, SyntaxKind};
#[test]
fn simple_object() {
const TEST_DATA: &str = r#"{"hello_world": "meow", "some_num":7.42}"#;
assert_eq!(
dbg!(lex(TEST_DATA)),
vec![
(SyntaxKind::BRACE_OPEN, "{"),
(SyntaxKind::STRING, "\"hello_world\""),
(SyntaxKind::COLON, ":"),
(SyntaxKind::WHITESPACE, " "),
(SyntaxKind::STRING, "\"meow\""),
(SyntaxKind::COMMA, ","),
(SyntaxKind::WHITESPACE, " "),
(SyntaxKind::STRING, "\"some_num\""),
(SyntaxKind::COLON, ":"),
(SyntaxKind::NUMBER, "7.42"),
(SyntaxKind::BRACE_CLOSE, "}")
]
);
}
}