forked from katzen-cafe/iowo
json-pawarser: init
This commit is contained in:
parent
21bcf62ea5
commit
becc4b4041
9 changed files with 224 additions and 11 deletions
22
Cargo.lock
generated
22
Cargo.lock
generated
|
@ -804,6 +804,16 @@ dependencies = [
|
||||||
"rayon",
|
"rayon",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "json-pawarser"
|
||||||
|
version = "0.1.0"
|
||||||
|
dependencies = [
|
||||||
|
"enumset",
|
||||||
|
"logos",
|
||||||
|
"pawarser",
|
||||||
|
"rowan",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "lang"
|
name = "lang"
|
||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
|
@ -881,18 +891,18 @@ checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "logos"
|
name = "logos"
|
||||||
version = "0.14.0"
|
version = "0.14.2"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "161971eb88a0da7ae0c333e1063467c5b5727e7fb6b710b8db4814eade3a42e8"
|
checksum = "1c6b6e02facda28ca5fb8dbe4b152496ba3b1bd5a4b40bb2b1b2d8ad74e0f39b"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"logos-derive",
|
"logos-derive",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "logos-codegen"
|
name = "logos-codegen"
|
||||||
version = "0.14.0"
|
version = "0.14.2"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "8e31badd9de5131fdf4921f6473d457e3dd85b11b7f091ceb50e4df7c3eeb12a"
|
checksum = "b32eb6b5f26efacd015b000bfc562186472cd9b34bdba3f6b264e2a052676d10"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"beef",
|
"beef",
|
||||||
"fnv",
|
"fnv",
|
||||||
|
@ -905,9 +915,9 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "logos-derive"
|
name = "logos-derive"
|
||||||
version = "0.14.0"
|
version = "0.14.2"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "1c2a69b3eb68d5bd595107c9ee58d7e07fe2bb5e360cc85b0f084dedac80de0a"
|
checksum = "3e5d0c5463c911ef55624739fc353238b4e310f0144be1f875dc42fec6bfd5ec"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"logos-codegen",
|
"logos-codegen",
|
||||||
]
|
]
|
||||||
|
|
|
@ -6,7 +6,9 @@ members = [
|
||||||
"crates/lang",
|
"crates/lang",
|
||||||
"crates/svg-filters",
|
"crates/svg-filters",
|
||||||
"crates/prowocessing",
|
"crates/prowocessing",
|
||||||
"crates/executor-poc", "crates/pawarser",
|
"crates/executor-poc",
|
||||||
|
"crates/pawarser",
|
||||||
|
"crates/json-pawarser",
|
||||||
]
|
]
|
||||||
resolver = "2"
|
resolver = "2"
|
||||||
|
|
||||||
|
|
13
crates/json-pawarser/Cargo.toml
Normal file
13
crates/json-pawarser/Cargo.toml
Normal file
|
@ -0,0 +1,13 @@
|
||||||
|
[package]
|
||||||
|
name = "json-pawarser"
|
||||||
|
version = "0.1.0"
|
||||||
|
edition = "2021"
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
logos = "0.14.2"
|
||||||
|
enumset = "1.1.3"
|
||||||
|
rowan = "0.15.15"
|
||||||
|
pawarser = { path = "../pawarser" }
|
||||||
|
|
||||||
|
[lints]
|
||||||
|
workspace = true
|
67
crates/json-pawarser/src/grammar.rs
Normal file
67
crates/json-pawarser/src/grammar.rs
Normal file
|
@ -0,0 +1,67 @@
|
||||||
|
use enumset::{enum_set, EnumSet};
|
||||||
|
|
||||||
|
use crate::{syntax_error::SyntaxError, syntax_kind::SyntaxKind};
|
||||||
|
|
||||||
|
use self::object::object;
|
||||||
|
|
||||||
|
type Parser<'src, 'toks> = pawarser::Parser<'src, 'toks, SyntaxKind, SyntaxError>;
|
||||||
|
type CompletedMarker = pawarser::CompletedMarker<SyntaxKind, SyntaxError>;
|
||||||
|
|
||||||
|
const BASIC_VALUE_TOKENS: EnumSet<SyntaxKind> =
|
||||||
|
enum_set!(SyntaxKind::BOOL | SyntaxKind::NULL | SyntaxKind::NUMBER | SyntaxKind::STRING);
|
||||||
|
|
||||||
|
pub fn value(p: &mut Parser) -> bool {
|
||||||
|
if BASIC_VALUE_TOKENS.contains(p.current()) {
|
||||||
|
p.do_bump();
|
||||||
|
return true;
|
||||||
|
} else {
|
||||||
|
object(p).is_some()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
mod object {
|
||||||
|
use crate::{grammar::value, syntax_error::SyntaxError, syntax_kind::SyntaxKind};
|
||||||
|
|
||||||
|
use super::{CompletedMarker, Parser, BASIC_VALUE_TOKENS};
|
||||||
|
|
||||||
|
pub(super) fn object(p: &mut Parser) -> Option<CompletedMarker> {
|
||||||
|
let obj_start = p.start("object");
|
||||||
|
|
||||||
|
if !p.at(SyntaxKind::BRACE_OPEN) {
|
||||||
|
obj_start.abandon(p);
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
|
||||||
|
todo!()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn member(p: &mut Parser) -> Option<CompletedMarker> {
|
||||||
|
let member_start = p.start("member");
|
||||||
|
|
||||||
|
if p.at(SyntaxKind::BRACE_CLOSE) {
|
||||||
|
member_start.abandon(p);
|
||||||
|
return None;
|
||||||
|
} else if p.at(SyntaxKind::STRING) {
|
||||||
|
let member_name_start = p.start("member_name");
|
||||||
|
p.eat(SyntaxKind::STRING);
|
||||||
|
member_name_start.complete(p, SyntaxKind::MEMBER_NAME);
|
||||||
|
} else {
|
||||||
|
return todo!("handle other tokens");
|
||||||
|
}
|
||||||
|
|
||||||
|
if !p.eat(SyntaxKind::COLON) {
|
||||||
|
todo!("handle wrong tokens")
|
||||||
|
}
|
||||||
|
|
||||||
|
if value(p) {
|
||||||
|
Some(member_start.complete(p, SyntaxKind::MEMBER))
|
||||||
|
} else {
|
||||||
|
let e = member_start.error(p, SyntaxError::MemberMissingValue);
|
||||||
|
Some(
|
||||||
|
e.precede(p, "member but failed already")
|
||||||
|
.complete(p, SyntaxKind::MEMBER),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
mod array {}
|
3
crates/json-pawarser/src/lib.rs
Normal file
3
crates/json-pawarser/src/lib.rs
Normal file
|
@ -0,0 +1,3 @@
|
||||||
|
mod grammar;
|
||||||
|
mod syntax_error;
|
||||||
|
mod syntax_kind;
|
8
crates/json-pawarser/src/syntax_error.rs
Normal file
8
crates/json-pawarser/src/syntax_error.rs
Normal file
|
@ -0,0 +1,8 @@
|
||||||
|
use crate::syntax_kind::SyntaxKind;
|
||||||
|
|
||||||
|
#[derive(Clone)]
|
||||||
|
pub enum SyntaxError {
|
||||||
|
DisallowedKeyType(SyntaxKind),
|
||||||
|
MemberMissingValue,
|
||||||
|
}
|
||||||
|
impl pawarser::parser::SyntaxError for SyntaxError {}
|
103
crates/json-pawarser/src/syntax_kind.rs
Normal file
103
crates/json-pawarser/src/syntax_kind.rs
Normal file
|
@ -0,0 +1,103 @@
|
||||||
|
use logos::Logos;
|
||||||
|
|
||||||
|
pub fn lex(src: &str) -> Vec<(SyntaxKind, &str)> {
|
||||||
|
let mut lex = SyntaxKind::lexer(src);
|
||||||
|
let mut r = Vec::new();
|
||||||
|
|
||||||
|
while let Some(tok_res) = lex.next() {
|
||||||
|
r.push((tok_res.unwrap_or(SyntaxKind::LEX_ERR), lex.slice()))
|
||||||
|
}
|
||||||
|
|
||||||
|
r
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(enumset::EnumSetType, Debug, Logos, PartialEq, Eq, Clone, Copy, Hash)]
|
||||||
|
#[repr(u16)]
|
||||||
|
#[enumset(no_super_impls)]
|
||||||
|
#[allow(non_camel_case_types)]
|
||||||
|
pub enum SyntaxKind {
|
||||||
|
// Error SyntaxKinds
|
||||||
|
LEX_ERR,
|
||||||
|
PARSE_ERR,
|
||||||
|
|
||||||
|
// Meta SyntaxKinds
|
||||||
|
TOMBSTONE,
|
||||||
|
EOF,
|
||||||
|
|
||||||
|
OBJECT,
|
||||||
|
MEMBER,
|
||||||
|
MEMBER_NAME,
|
||||||
|
MEMBER_VALUE,
|
||||||
|
|
||||||
|
ARRAY,
|
||||||
|
ELEMENT,
|
||||||
|
|
||||||
|
// Tokens
|
||||||
|
// Regexes adapted from [the logos handbook](https://logos.maciej.codes/examples/json_borrowed.html)
|
||||||
|
#[token("true")]
|
||||||
|
#[token("false")]
|
||||||
|
BOOL,
|
||||||
|
#[token("{")]
|
||||||
|
BRACE_OPEN,
|
||||||
|
#[token("}")]
|
||||||
|
BRACE_CLOSE,
|
||||||
|
#[token("[")]
|
||||||
|
BRACKET_OPEN,
|
||||||
|
#[token("]")]
|
||||||
|
BRACKET_CLOSE,
|
||||||
|
#[token(":")]
|
||||||
|
COLON,
|
||||||
|
#[token(",")]
|
||||||
|
COMMA,
|
||||||
|
#[token("null")]
|
||||||
|
NULL,
|
||||||
|
#[regex(r"-?(?:0|[1-9]\d*)(?:\.\d+)?(?:[eE][+-]?\d+)?")]
|
||||||
|
NUMBER,
|
||||||
|
#[regex(r#""([^"\\]|\\["\\bnfrt]|u[a-fA-F0-9]{4})*""#)]
|
||||||
|
STRING,
|
||||||
|
|
||||||
|
// Whitespace tokens
|
||||||
|
#[regex("[ \\t\\f]+")]
|
||||||
|
WHITESPACE,
|
||||||
|
#[token("\n")]
|
||||||
|
NEWLINE,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl pawarser::parser::SyntaxElement for SyntaxKind {
|
||||||
|
const EOF: Self = Self::EOF;
|
||||||
|
|
||||||
|
const ERROR: Self = Self::PARSE_ERR;
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<SyntaxKind> for rowan::SyntaxKind {
|
||||||
|
fn from(kind: SyntaxKind) -> Self {
|
||||||
|
Self(kind as u16)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use crate::syntax_kind::{lex, SyntaxKind};
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn simple_object() {
|
||||||
|
const TEST_DATA: &str = r#"{"hello_world": "meow", "some_num":7.42}"#;
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
dbg!(lex(TEST_DATA)),
|
||||||
|
vec![
|
||||||
|
(SyntaxKind::BRACE_OPEN, "{"),
|
||||||
|
(SyntaxKind::STRING, "\"hello_world\""),
|
||||||
|
(SyntaxKind::COLON, ":"),
|
||||||
|
(SyntaxKind::WHITESPACE, " "),
|
||||||
|
(SyntaxKind::STRING, "\"meow\""),
|
||||||
|
(SyntaxKind::COMMA, ","),
|
||||||
|
(SyntaxKind::WHITESPACE, " "),
|
||||||
|
(SyntaxKind::STRING, "\"some_num\""),
|
||||||
|
(SyntaxKind::COLON, ":"),
|
||||||
|
(SyntaxKind::NUMBER, "7.42"),
|
||||||
|
(SyntaxKind::BRACE_CLOSE, "}")
|
||||||
|
]
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
|
@ -1,2 +1,8 @@
|
||||||
#![feature(iter_collect_into)]
|
#![feature(iter_collect_into)]
|
||||||
pub mod parser;
|
pub mod parser;
|
||||||
|
|
||||||
|
pub use parser::{
|
||||||
|
error::SyntaxError,
|
||||||
|
marker::{CompletedMarker, Marker},
|
||||||
|
Parser, SyntaxElement,
|
||||||
|
};
|
||||||
|
|
|
@ -2,12 +2,13 @@ use std::cell::Cell;
|
||||||
|
|
||||||
use enumset::{EnumSet, EnumSetType};
|
use enumset::{EnumSet, EnumSetType};
|
||||||
|
|
||||||
use self::{error::SyntaxError, event::Event, input::Input, marker::Marker};
|
use self::{event::Event, input::Input, marker::Marker};
|
||||||
|
pub use error::SyntaxError;
|
||||||
|
|
||||||
mod error;
|
pub mod error;
|
||||||
mod event;
|
mod event;
|
||||||
mod input;
|
mod input;
|
||||||
mod marker;
|
pub mod marker;
|
||||||
|
|
||||||
/// this is used to define some required SyntaxKinds like an EOF token or an error token
|
/// this is used to define some required SyntaxKinds like an EOF token or an error token
|
||||||
pub trait SyntaxElement
|
pub trait SyntaxElement
|
||||||
|
@ -61,7 +62,7 @@ impl<'src, 'toks, SyntaxKind: SyntaxElement, SyntaxErr: SyntaxError>
|
||||||
true
|
true
|
||||||
}
|
}
|
||||||
|
|
||||||
fn do_bump(&mut self) {
|
pub fn do_bump(&mut self) {
|
||||||
self.push_ev(Event::Eat {
|
self.push_ev(Event::Eat {
|
||||||
count: self.input.preceding_meaningless(self.pos),
|
count: self.input.preceding_meaningless(self.pos),
|
||||||
});
|
});
|
||||||
|
|
Loading…
Reference in a new issue