init i guess
This commit is contained in:
parent
c90532830e
commit
01de2f385a
21 changed files with 1315 additions and 0 deletions
13
crates/lopal_json/Cargo.toml
Normal file
13
crates/lopal_json/Cargo.toml
Normal file
|
@ -0,0 +1,13 @@
|
|||
[package]
|
||||
name = "lopal_json"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
logos = "0.14.2"
|
||||
enumset = "1.1.3"
|
||||
rowan = "0.15.15"
|
||||
lopal_core = { path = "../lopal_core" }
|
||||
|
||||
[lints]
|
||||
workspace = true
|
78
crates/lopal_json/src/grammar.rs
Normal file
78
crates/lopal_json/src/grammar.rs
Normal file
|
@ -0,0 +1,78 @@
|
|||
use array::array;
|
||||
use enumset::{enum_set, EnumSet};
|
||||
use lopal_core::parser::ParserBuilder;
|
||||
|
||||
use crate::{
|
||||
syntax_error::SyntaxError,
|
||||
syntax_kind::{lex, SyntaxKind},
|
||||
};
|
||||
|
||||
use self::object::object;
|
||||
|
||||
mod array;
|
||||
mod object;
|
||||
|
||||
pub(crate) type Parser<'src> = lopal_core::Parser<'src, SyntaxKind, SyntaxError>;
|
||||
pub(crate) type CompletedMarker = lopal_core::CompletedMarker<SyntaxKind, SyntaxError>;
|
||||
|
||||
const BASIC_VALUE_TOKENS: EnumSet<SyntaxKind> =
|
||||
enum_set!(SyntaxKind::BOOL | SyntaxKind::NULL | SyntaxKind::NUMBER | SyntaxKind::STRING);
|
||||
|
||||
pub fn value(p: &mut Parser) -> bool {
|
||||
if BASIC_VALUE_TOKENS.contains(p.current()) {
|
||||
p.do_bump();
|
||||
return true;
|
||||
} else {
|
||||
object(p).or_else(|| array(p)).is_some()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::{
|
||||
test_utils::{check_parser, gen_checks},
|
||||
value,
|
||||
};
|
||||
|
||||
#[test]
|
||||
fn value_lit() {
|
||||
gen_checks! {value;
|
||||
r#""helo world""# => r#"ROOT { STRING "\"helo world\""; }"#,
|
||||
"42" => r#"ROOT { NUMBER "42"; }"#,
|
||||
"null" => r#"ROOT { NULL "null"; }"#,
|
||||
"true" => r#"ROOT { BOOL "true"; }"#,
|
||||
"false" => r#"ROOT { BOOL "false"; }"#
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test_utils {
|
||||
use lopal_core::parser::ParserBuilder;
|
||||
|
||||
use crate::syntax_kind::{lex, SyntaxKind};
|
||||
|
||||
use super::Parser;
|
||||
|
||||
macro_rules! gen_checks {
|
||||
($fn_to_test:ident; $($in:literal => $out:literal),+) => {
|
||||
$(crate::grammar::test_utils::check_parser($in, |p| { $fn_to_test(p); }, $out);)+
|
||||
}
|
||||
}
|
||||
|
||||
pub(super) use gen_checks;
|
||||
|
||||
pub(super) fn check_parser(input: &str, parser_fn: fn(&mut Parser), expected_output: &str) {
|
||||
let toks = lex(input);
|
||||
let mut p: Parser = ParserBuilder::new(toks)
|
||||
.add_meaningless(SyntaxKind::WHITESPACE)
|
||||
.add_meaningless(SyntaxKind::NEWLINE)
|
||||
.build();
|
||||
|
||||
parser_fn(&mut p);
|
||||
|
||||
let out = p.finish();
|
||||
|
||||
assert_eq!(format!("{out:?}").trim_end(), expected_output);
|
||||
}
|
||||
}
|
52
crates/lopal_json/src/grammar/array.rs
Normal file
52
crates/lopal_json/src/grammar/array.rs
Normal file
|
@ -0,0 +1,52 @@
|
|||
use crate::{syntax_error::SyntaxError, syntax_kind::SyntaxKind};
|
||||
|
||||
use super::{value, CompletedMarker, Parser};
|
||||
|
||||
pub(super) fn array(p: &mut Parser) -> Option<CompletedMarker> {
|
||||
let array_start = p.start("array");
|
||||
|
||||
if !p.eat(SyntaxKind::BRACKET_OPEN) {
|
||||
array_start.abandon(p);
|
||||
return None;
|
||||
}
|
||||
|
||||
let el = p.start("arr_el");
|
||||
value(p);
|
||||
el.complete(p, SyntaxKind::ELEMENT);
|
||||
|
||||
while p.at(SyntaxKind::COMMA) {
|
||||
let potential_trailing_comma = p.start("potential_trailing_comma");
|
||||
|
||||
p.eat(SyntaxKind::COMMA);
|
||||
let maybe_el = p.start("arr_el");
|
||||
if !value(p) {
|
||||
maybe_el.abandon(p);
|
||||
potential_trailing_comma.complete(p, SyntaxKind::TRAILING_COMMA);
|
||||
} else {
|
||||
maybe_el.complete(p, SyntaxKind::ELEMENT);
|
||||
potential_trailing_comma.abandon(p);
|
||||
}
|
||||
}
|
||||
|
||||
Some(if !p.eat(SyntaxKind::BRACKET_CLOSE) {
|
||||
array_start.error(p, SyntaxError::UnclosedArray)
|
||||
} else {
|
||||
array_start.complete(p, SyntaxKind::ARRAY)
|
||||
})
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::grammar::{array::array, test_utils::gen_checks};
|
||||
|
||||
#[test]
|
||||
fn array_basic() {
|
||||
gen_checks! {array;
|
||||
r#"[1,2,3]"# => r#"ROOT { ARRAY { BRACKET_OPEN "["; ELEMENT { NUMBER "1"; } COMMA ","; ELEMENT { NUMBER "2"; } COMMA ","; ELEMENT { NUMBER "3"; } BRACKET_CLOSE "]"; } }"#,
|
||||
r#"[1,2,]"# => r#"ROOT { ARRAY { BRACKET_OPEN "["; ELEMENT { NUMBER "1"; } COMMA ","; ELEMENT { NUMBER "2"; } TRAILING_COMMA { COMMA ","; } BRACKET_CLOSE "]"; } }"#,
|
||||
r#"[1,2"# => r#"ROOT { PARSE_ERR: UnclosedArray { BRACKET_OPEN "["; ELEMENT { NUMBER "1"; } COMMA ","; ELEMENT { NUMBER "2"; } } }"#,
|
||||
r#"[1,2,"# => r#"ROOT { PARSE_ERR: UnclosedArray { BRACKET_OPEN "["; ELEMENT { NUMBER "1"; } COMMA ","; ELEMENT { NUMBER "2"; } TRAILING_COMMA { COMMA ","; } } }"#,
|
||||
r#"[{"hello":"world""# => r#"ROOT { PARSE_ERR: UnclosedArray { BRACKET_OPEN "["; ELEMENT { PARSE_ERR: UnclosedObject { BRACE_OPEN "{"; MEMBER { MEMBER_NAME { STRING "\"hello\""; } COLON ":"; MEMBER_VALUE { STRING "\"world\""; } } } } } }"#
|
||||
}
|
||||
}
|
||||
}
|
92
crates/lopal_json/src/grammar/object.rs
Normal file
92
crates/lopal_json/src/grammar/object.rs
Normal file
|
@ -0,0 +1,92 @@
|
|||
use crate::{grammar::value, syntax_error::SyntaxError, syntax_kind::SyntaxKind};
|
||||
|
||||
use super::{CompletedMarker, Parser, BASIC_VALUE_TOKENS};
|
||||
|
||||
pub(super) fn object(p: &mut Parser) -> Option<CompletedMarker> {
|
||||
let obj_start = p.start("object");
|
||||
|
||||
if !p.eat(SyntaxKind::BRACE_OPEN) {
|
||||
obj_start.abandon(p);
|
||||
return None;
|
||||
}
|
||||
|
||||
member(p);
|
||||
while p.at(SyntaxKind::COMMA) {
|
||||
// not always an error, later configurable
|
||||
let potential_trailing_comma = p.start("potential_trailing_comma");
|
||||
p.eat(SyntaxKind::COMMA);
|
||||
|
||||
if member(p).is_none() {
|
||||
potential_trailing_comma.complete(p, SyntaxKind::TRAILING_COMMA);
|
||||
} else {
|
||||
potential_trailing_comma.abandon(p);
|
||||
}
|
||||
}
|
||||
|
||||
Some(if p.eat(SyntaxKind::BRACE_CLOSE) {
|
||||
obj_start.complete(p, SyntaxKind::OBJECT)
|
||||
} else {
|
||||
obj_start.error(p, SyntaxError::UnclosedObject)
|
||||
})
|
||||
}
|
||||
|
||||
fn member(p: &mut Parser) -> Option<CompletedMarker> {
|
||||
let member_start = p.start("member");
|
||||
|
||||
if p.at(SyntaxKind::BRACE_CLOSE) {
|
||||
member_start.abandon(p);
|
||||
return None;
|
||||
} else if p.at(SyntaxKind::STRING) {
|
||||
let member_name_start = p.start("member_name");
|
||||
p.eat(SyntaxKind::STRING);
|
||||
member_name_start.complete(p, SyntaxKind::MEMBER_NAME);
|
||||
} else {
|
||||
return todo!("handle other tokens: {:?}", p.current());
|
||||
}
|
||||
|
||||
if !p.eat(SyntaxKind::COLON) {
|
||||
todo!("handle wrong tokens")
|
||||
}
|
||||
|
||||
let member_value_start = p.start("member_value_start");
|
||||
if value(p) {
|
||||
member_value_start.complete(p, SyntaxKind::MEMBER_VALUE);
|
||||
Some(member_start.complete(p, SyntaxKind::MEMBER))
|
||||
} else {
|
||||
member_value_start.abandon(p);
|
||||
let e = member_start.error(p, SyntaxError::MemberMissingValue);
|
||||
Some(
|
||||
e.precede(p, "member but failed already")
|
||||
.complete(p, SyntaxKind::MEMBER),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::grammar::{
|
||||
object::{member, object},
|
||||
test_utils::gen_checks,
|
||||
};
|
||||
|
||||
#[test]
|
||||
fn object_basic() {
|
||||
gen_checks! {object;
|
||||
r#"{"a": "b"}"# => r#"ROOT { OBJECT { BRACE_OPEN "{"; MEMBER { MEMBER_NAME { STRING "\"a\""; } COLON ":"; WHITESPACE " "; MEMBER_VALUE { STRING "\"b\""; } } BRACE_CLOSE "}"; } }"#,
|
||||
r#"{"a": 42}"# => r#"ROOT { OBJECT { BRACE_OPEN "{"; MEMBER { MEMBER_NAME { STRING "\"a\""; } COLON ":"; WHITESPACE " "; MEMBER_VALUE { NUMBER "42"; } } BRACE_CLOSE "}"; } }"#,
|
||||
r#"{"a": "b""# => r#"ROOT { PARSE_ERR: UnclosedObject { BRACE_OPEN "{"; MEMBER { MEMBER_NAME { STRING "\"a\""; } COLON ":"; WHITESPACE " "; MEMBER_VALUE { STRING "\"b\""; } } } }"#,
|
||||
r#"{"a": }"# => r#"ROOT { OBJECT { BRACE_OPEN "{"; MEMBER { PARSE_ERR: MemberMissingValue { MEMBER_NAME { STRING "\"a\""; } COLON ":"; } } WHITESPACE " "; BRACE_CLOSE "}"; } }"#,
|
||||
r#"{"a":"# => r#"ROOT { PARSE_ERR: UnclosedObject { BRACE_OPEN "{"; MEMBER { PARSE_ERR: MemberMissingValue { MEMBER_NAME { STRING "\"a\""; } COLON ":"; } } } }"#,
|
||||
r#"{"a":true,}"# => r#"ROOT { OBJECT { BRACE_OPEN "{"; MEMBER { MEMBER_NAME { STRING "\"a\""; } COLON ":"; MEMBER_VALUE { BOOL "true"; } } TRAILING_COMMA { COMMA ","; } BRACE_CLOSE "}"; } }"#
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn member_basic() {
|
||||
gen_checks! {member;
|
||||
r#""a": "b""# => r#"ROOT { MEMBER { MEMBER_NAME { STRING "\"a\""; } COLON ":"; WHITESPACE " "; MEMBER_VALUE { STRING "\"b\""; } } }"#,
|
||||
r#""a": 42"# => r#"ROOT { MEMBER { MEMBER_NAME { STRING "\"a\""; } COLON ":"; WHITESPACE " "; MEMBER_VALUE { NUMBER "42"; } } }"#,
|
||||
r#""a":"# => r#"ROOT { MEMBER { PARSE_ERR: MemberMissingValue { MEMBER_NAME { STRING "\"a\""; } COLON ":"; } } }"#
|
||||
}
|
||||
}
|
||||
}
|
3
crates/lopal_json/src/lib.rs
Normal file
3
crates/lopal_json/src/lib.rs
Normal file
|
@ -0,0 +1,3 @@
|
|||
mod grammar;
|
||||
mod syntax_error;
|
||||
mod syntax_kind;
|
11
crates/lopal_json/src/syntax_error.rs
Normal file
11
crates/lopal_json/src/syntax_error.rs
Normal file
|
@ -0,0 +1,11 @@
|
|||
use crate::syntax_kind::SyntaxKind;
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub enum SyntaxError {
|
||||
UnclosedObject,
|
||||
UnclosedArray,
|
||||
DisallowedKeyType(SyntaxKind),
|
||||
MemberMissingValue,
|
||||
UnexpectedTrailingComma,
|
||||
}
|
||||
impl lopal_core::parser::SyntaxError for SyntaxError {}
|
117
crates/lopal_json/src/syntax_kind.rs
Normal file
117
crates/lopal_json/src/syntax_kind.rs
Normal file
|
@ -0,0 +1,117 @@
|
|||
use logos::Logos;
|
||||
|
||||
pub fn lex(src: &str) -> Vec<(SyntaxKind, &str)> {
|
||||
let mut lex = SyntaxKind::lexer(src);
|
||||
let mut r = Vec::new();
|
||||
|
||||
while let Some(tok_res) = lex.next() {
|
||||
r.push((tok_res.unwrap_or(SyntaxKind::LEX_ERR), lex.slice()))
|
||||
}
|
||||
|
||||
r
|
||||
}
|
||||
|
||||
#[derive(enumset::EnumSetType, Debug, Logos, PartialEq, Eq, Clone, Copy, Hash)]
|
||||
#[repr(u16)]
|
||||
#[enumset(no_super_impls)]
|
||||
#[allow(non_camel_case_types)]
|
||||
pub enum SyntaxKind {
|
||||
OBJECT,
|
||||
MEMBER,
|
||||
MEMBER_NAME,
|
||||
MEMBER_VALUE,
|
||||
|
||||
ARRAY,
|
||||
ELEMENT,
|
||||
|
||||
// SyntaxKinds for future json5/etc support
|
||||
TRAILING_COMMA,
|
||||
|
||||
// Tokens
|
||||
// Regexes adapted from [the logos handbook](https://logos.maciej.codes/examples/json_borrowed.html)
|
||||
#[token("true")]
|
||||
#[token("false")]
|
||||
BOOL,
|
||||
#[token("{")]
|
||||
BRACE_OPEN,
|
||||
#[token("}")]
|
||||
BRACE_CLOSE,
|
||||
#[token("[")]
|
||||
BRACKET_OPEN,
|
||||
#[token("]")]
|
||||
BRACKET_CLOSE,
|
||||
#[token(":")]
|
||||
COLON,
|
||||
#[token(",")]
|
||||
COMMA,
|
||||
#[token("null")]
|
||||
NULL,
|
||||
#[regex(r"-?(?:0|[1-9]\d*)(?:\.\d+)?(?:[eE][+-]?\d+)?")]
|
||||
NUMBER,
|
||||
#[regex(r#""([^"\\]|\\["\\bnfrt]|u[a-fA-F0-9]{4})*""#)]
|
||||
STRING,
|
||||
|
||||
// Whitespace tokens
|
||||
#[regex("[ \\t\\f]+")]
|
||||
WHITESPACE,
|
||||
#[token("\n")]
|
||||
NEWLINE,
|
||||
|
||||
// Error SyntaxKinds
|
||||
LEX_ERR,
|
||||
PARSE_ERR,
|
||||
|
||||
// Meta SyntaxKinds
|
||||
ROOT,
|
||||
EOF,
|
||||
}
|
||||
|
||||
impl lopal_core::parser::SyntaxElement for SyntaxKind {
|
||||
const SYNTAX_EOF: Self = Self::EOF;
|
||||
|
||||
const SYNTAX_ERROR: Self = Self::PARSE_ERR;
|
||||
const SYNTAX_ROOT: Self = Self::ROOT;
|
||||
}
|
||||
|
||||
impl From<SyntaxKind> for rowan::SyntaxKind {
|
||||
fn from(kind: SyntaxKind) -> Self {
|
||||
Self(kind as u16)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<rowan::SyntaxKind> for SyntaxKind {
|
||||
fn from(raw: rowan::SyntaxKind) -> Self {
|
||||
assert!(raw.0 <= SyntaxKind::EOF as u16);
|
||||
#[allow(unsafe_code, reason = "The transmute is necessary here")]
|
||||
unsafe {
|
||||
std::mem::transmute::<u16, SyntaxKind>(raw.0)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::syntax_kind::{lex, SyntaxKind};
|
||||
|
||||
#[test]
|
||||
fn simple_object() {
|
||||
const TEST_DATA: &str = r#"{"hello_world": "meow", "some_num":7.42}"#;
|
||||
|
||||
assert_eq!(
|
||||
dbg!(lex(TEST_DATA)),
|
||||
vec![
|
||||
(SyntaxKind::BRACE_OPEN, "{"),
|
||||
(SyntaxKind::STRING, "\"hello_world\""),
|
||||
(SyntaxKind::COLON, ":"),
|
||||
(SyntaxKind::WHITESPACE, " "),
|
||||
(SyntaxKind::STRING, "\"meow\""),
|
||||
(SyntaxKind::COMMA, ","),
|
||||
(SyntaxKind::WHITESPACE, " "),
|
||||
(SyntaxKind::STRING, "\"some_num\""),
|
||||
(SyntaxKind::COLON, ":"),
|
||||
(SyntaxKind::NUMBER, "7.42"),
|
||||
(SyntaxKind::BRACE_CLOSE, "}")
|
||||
]
|
||||
);
|
||||
}
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue