json-pawarser: init

This commit is contained in:
Schrottkatze 2024-10-18 14:05:27 +02:00
parent 21bcf62ea5
commit becc4b4041
Signed by: schrottkatze
SSH key fingerprint: SHA256:hXb3t1vINBFCiDCmhRABHX5ocdbLiKyCdKI4HK2Rbbc
9 changed files with 224 additions and 11 deletions

22
Cargo.lock generated
View file

@ -804,6 +804,16 @@ dependencies = [
"rayon", "rayon",
] ]
[[package]]
name = "json-pawarser"
version = "0.1.0"
dependencies = [
"enumset",
"logos",
"pawarser",
"rowan",
]
[[package]] [[package]]
name = "lang" name = "lang"
version = "0.1.0" version = "0.1.0"
@ -881,18 +891,18 @@ checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24"
[[package]] [[package]]
name = "logos" name = "logos"
version = "0.14.0" version = "0.14.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "161971eb88a0da7ae0c333e1063467c5b5727e7fb6b710b8db4814eade3a42e8" checksum = "1c6b6e02facda28ca5fb8dbe4b152496ba3b1bd5a4b40bb2b1b2d8ad74e0f39b"
dependencies = [ dependencies = [
"logos-derive", "logos-derive",
] ]
[[package]] [[package]]
name = "logos-codegen" name = "logos-codegen"
version = "0.14.0" version = "0.14.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8e31badd9de5131fdf4921f6473d457e3dd85b11b7f091ceb50e4df7c3eeb12a" checksum = "b32eb6b5f26efacd015b000bfc562186472cd9b34bdba3f6b264e2a052676d10"
dependencies = [ dependencies = [
"beef", "beef",
"fnv", "fnv",
@ -905,9 +915,9 @@ dependencies = [
[[package]] [[package]]
name = "logos-derive" name = "logos-derive"
version = "0.14.0" version = "0.14.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1c2a69b3eb68d5bd595107c9ee58d7e07fe2bb5e360cc85b0f084dedac80de0a" checksum = "3e5d0c5463c911ef55624739fc353238b4e310f0144be1f875dc42fec6bfd5ec"
dependencies = [ dependencies = [
"logos-codegen", "logos-codegen",
] ]

View file

@ -6,7 +6,9 @@ members = [
"crates/lang", "crates/lang",
"crates/svg-filters", "crates/svg-filters",
"crates/prowocessing", "crates/prowocessing",
"crates/executor-poc", "crates/pawarser", "crates/executor-poc",
"crates/pawarser",
"crates/json-pawarser",
] ]
resolver = "2" resolver = "2"

View file

@ -0,0 +1,13 @@
[package]
name = "json-pawarser"
version = "0.1.0"
edition = "2021"
[dependencies]
logos = "0.14.2"
enumset = "1.1.3"
rowan = "0.15.15"
pawarser = { path = "../pawarser" }
[lints]
workspace = true

View file

@ -0,0 +1,67 @@
use enumset::{enum_set, EnumSet};
use crate::{syntax_error::SyntaxError, syntax_kind::SyntaxKind};
use self::object::object;
type Parser<'src, 'toks> = pawarser::Parser<'src, 'toks, SyntaxKind, SyntaxError>;
type CompletedMarker = pawarser::CompletedMarker<SyntaxKind, SyntaxError>;
const BASIC_VALUE_TOKENS: EnumSet<SyntaxKind> =
enum_set!(SyntaxKind::BOOL | SyntaxKind::NULL | SyntaxKind::NUMBER | SyntaxKind::STRING);
pub fn value(p: &mut Parser) -> bool {
if BASIC_VALUE_TOKENS.contains(p.current()) {
p.do_bump();
return true;
} else {
object(p).is_some()
}
}
mod object {
use crate::{grammar::value, syntax_error::SyntaxError, syntax_kind::SyntaxKind};
use super::{CompletedMarker, Parser, BASIC_VALUE_TOKENS};
pub(super) fn object(p: &mut Parser) -> Option<CompletedMarker> {
let obj_start = p.start("object");
if !p.at(SyntaxKind::BRACE_OPEN) {
obj_start.abandon(p);
return None;
}
todo!()
}
fn member(p: &mut Parser) -> Option<CompletedMarker> {
let member_start = p.start("member");
if p.at(SyntaxKind::BRACE_CLOSE) {
member_start.abandon(p);
return None;
} else if p.at(SyntaxKind::STRING) {
let member_name_start = p.start("member_name");
p.eat(SyntaxKind::STRING);
member_name_start.complete(p, SyntaxKind::MEMBER_NAME);
} else {
return todo!("handle other tokens");
}
if !p.eat(SyntaxKind::COLON) {
todo!("handle wrong tokens")
}
if value(p) {
Some(member_start.complete(p, SyntaxKind::MEMBER))
} else {
let e = member_start.error(p, SyntaxError::MemberMissingValue);
Some(
e.precede(p, "member but failed already")
.complete(p, SyntaxKind::MEMBER),
)
}
}
}
mod array {}

View file

@ -0,0 +1,3 @@
mod grammar;
mod syntax_error;
mod syntax_kind;

View file

@ -0,0 +1,8 @@
use crate::syntax_kind::SyntaxKind;
#[derive(Clone)]
pub enum SyntaxError {
DisallowedKeyType(SyntaxKind),
MemberMissingValue,
}
impl pawarser::parser::SyntaxError for SyntaxError {}

View file

@ -0,0 +1,103 @@
use logos::Logos;
pub fn lex(src: &str) -> Vec<(SyntaxKind, &str)> {
let mut lex = SyntaxKind::lexer(src);
let mut r = Vec::new();
while let Some(tok_res) = lex.next() {
r.push((tok_res.unwrap_or(SyntaxKind::LEX_ERR), lex.slice()))
}
r
}
#[derive(enumset::EnumSetType, Debug, Logos, PartialEq, Eq, Clone, Copy, Hash)]
#[repr(u16)]
#[enumset(no_super_impls)]
#[allow(non_camel_case_types)]
pub enum SyntaxKind {
// Error SyntaxKinds
LEX_ERR,
PARSE_ERR,
// Meta SyntaxKinds
TOMBSTONE,
EOF,
OBJECT,
MEMBER,
MEMBER_NAME,
MEMBER_VALUE,
ARRAY,
ELEMENT,
// Tokens
// Regexes adapted from [the logos handbook](https://logos.maciej.codes/examples/json_borrowed.html)
#[token("true")]
#[token("false")]
BOOL,
#[token("{")]
BRACE_OPEN,
#[token("}")]
BRACE_CLOSE,
#[token("[")]
BRACKET_OPEN,
#[token("]")]
BRACKET_CLOSE,
#[token(":")]
COLON,
#[token(",")]
COMMA,
#[token("null")]
NULL,
#[regex(r"-?(?:0|[1-9]\d*)(?:\.\d+)?(?:[eE][+-]?\d+)?")]
NUMBER,
#[regex(r#""([^"\\]|\\["\\bnfrt]|u[a-fA-F0-9]{4})*""#)]
STRING,
// Whitespace tokens
#[regex("[ \\t\\f]+")]
WHITESPACE,
#[token("\n")]
NEWLINE,
}
impl pawarser::parser::SyntaxElement for SyntaxKind {
const EOF: Self = Self::EOF;
const ERROR: Self = Self::PARSE_ERR;
}
impl From<SyntaxKind> for rowan::SyntaxKind {
fn from(kind: SyntaxKind) -> Self {
Self(kind as u16)
}
}
#[cfg(test)]
mod tests {
use crate::syntax_kind::{lex, SyntaxKind};
#[test]
fn simple_object() {
const TEST_DATA: &str = r#"{"hello_world": "meow", "some_num":7.42}"#;
assert_eq!(
dbg!(lex(TEST_DATA)),
vec![
(SyntaxKind::BRACE_OPEN, "{"),
(SyntaxKind::STRING, "\"hello_world\""),
(SyntaxKind::COLON, ":"),
(SyntaxKind::WHITESPACE, " "),
(SyntaxKind::STRING, "\"meow\""),
(SyntaxKind::COMMA, ","),
(SyntaxKind::WHITESPACE, " "),
(SyntaxKind::STRING, "\"some_num\""),
(SyntaxKind::COLON, ":"),
(SyntaxKind::NUMBER, "7.42"),
(SyntaxKind::BRACE_CLOSE, "}")
]
);
}
}

View file

@ -1,2 +1,8 @@
#![feature(iter_collect_into)] #![feature(iter_collect_into)]
pub mod parser; pub mod parser;
pub use parser::{
error::SyntaxError,
marker::{CompletedMarker, Marker},
Parser, SyntaxElement,
};

View file

@ -2,12 +2,13 @@ use std::cell::Cell;
use enumset::{EnumSet, EnumSetType}; use enumset::{EnumSet, EnumSetType};
use self::{error::SyntaxError, event::Event, input::Input, marker::Marker}; use self::{event::Event, input::Input, marker::Marker};
pub use error::SyntaxError;
mod error; pub mod error;
mod event; mod event;
mod input; mod input;
mod marker; pub mod marker;
/// this is used to define some required SyntaxKinds like an EOF token or an error token /// this is used to define some required SyntaxKinds like an EOF token or an error token
pub trait SyntaxElement pub trait SyntaxElement
@ -61,7 +62,7 @@ impl<'src, 'toks, SyntaxKind: SyntaxElement, SyntaxErr: SyntaxError>
true true
} }
fn do_bump(&mut self) { pub fn do_bump(&mut self) {
self.push_ev(Event::Eat { self.push_ev(Event::Eat {
count: self.input.preceding_meaningless(self.pos), count: self.input.preceding_meaningless(self.pos),
}); });