json-pawarser: init
This commit is contained in:
parent
21bcf62ea5
commit
becc4b4041
9 changed files with 224 additions and 11 deletions
22
Cargo.lock
generated
22
Cargo.lock
generated
|
@ -804,6 +804,16 @@ dependencies = [
|
|||
"rayon",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "json-pawarser"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"enumset",
|
||||
"logos",
|
||||
"pawarser",
|
||||
"rowan",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "lang"
|
||||
version = "0.1.0"
|
||||
|
@ -881,18 +891,18 @@ checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24"
|
|||
|
||||
[[package]]
|
||||
name = "logos"
|
||||
version = "0.14.0"
|
||||
version = "0.14.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "161971eb88a0da7ae0c333e1063467c5b5727e7fb6b710b8db4814eade3a42e8"
|
||||
checksum = "1c6b6e02facda28ca5fb8dbe4b152496ba3b1bd5a4b40bb2b1b2d8ad74e0f39b"
|
||||
dependencies = [
|
||||
"logos-derive",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "logos-codegen"
|
||||
version = "0.14.0"
|
||||
version = "0.14.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8e31badd9de5131fdf4921f6473d457e3dd85b11b7f091ceb50e4df7c3eeb12a"
|
||||
checksum = "b32eb6b5f26efacd015b000bfc562186472cd9b34bdba3f6b264e2a052676d10"
|
||||
dependencies = [
|
||||
"beef",
|
||||
"fnv",
|
||||
|
@ -905,9 +915,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "logos-derive"
|
||||
version = "0.14.0"
|
||||
version = "0.14.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1c2a69b3eb68d5bd595107c9ee58d7e07fe2bb5e360cc85b0f084dedac80de0a"
|
||||
checksum = "3e5d0c5463c911ef55624739fc353238b4e310f0144be1f875dc42fec6bfd5ec"
|
||||
dependencies = [
|
||||
"logos-codegen",
|
||||
]
|
||||
|
|
|
@ -6,7 +6,9 @@ members = [
|
|||
"crates/lang",
|
||||
"crates/svg-filters",
|
||||
"crates/prowocessing",
|
||||
"crates/executor-poc", "crates/pawarser",
|
||||
"crates/executor-poc",
|
||||
"crates/pawarser",
|
||||
"crates/json-pawarser",
|
||||
]
|
||||
resolver = "2"
|
||||
|
||||
|
|
13
crates/json-pawarser/Cargo.toml
Normal file
13
crates/json-pawarser/Cargo.toml
Normal file
|
@ -0,0 +1,13 @@
|
|||
[package]
|
||||
name = "json-pawarser"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
logos = "0.14.2"
|
||||
enumset = "1.1.3"
|
||||
rowan = "0.15.15"
|
||||
pawarser = { path = "../pawarser" }
|
||||
|
||||
[lints]
|
||||
workspace = true
|
67
crates/json-pawarser/src/grammar.rs
Normal file
67
crates/json-pawarser/src/grammar.rs
Normal file
|
@ -0,0 +1,67 @@
|
|||
use enumset::{enum_set, EnumSet};
|
||||
|
||||
use crate::{syntax_error::SyntaxError, syntax_kind::SyntaxKind};
|
||||
|
||||
use self::object::object;
|
||||
|
||||
type Parser<'src, 'toks> = pawarser::Parser<'src, 'toks, SyntaxKind, SyntaxError>;
|
||||
type CompletedMarker = pawarser::CompletedMarker<SyntaxKind, SyntaxError>;
|
||||
|
||||
const BASIC_VALUE_TOKENS: EnumSet<SyntaxKind> =
|
||||
enum_set!(SyntaxKind::BOOL | SyntaxKind::NULL | SyntaxKind::NUMBER | SyntaxKind::STRING);
|
||||
|
||||
pub fn value(p: &mut Parser) -> bool {
|
||||
if BASIC_VALUE_TOKENS.contains(p.current()) {
|
||||
p.do_bump();
|
||||
return true;
|
||||
} else {
|
||||
object(p).is_some()
|
||||
}
|
||||
}
|
||||
|
||||
mod object {
|
||||
use crate::{grammar::value, syntax_error::SyntaxError, syntax_kind::SyntaxKind};
|
||||
|
||||
use super::{CompletedMarker, Parser, BASIC_VALUE_TOKENS};
|
||||
|
||||
pub(super) fn object(p: &mut Parser) -> Option<CompletedMarker> {
|
||||
let obj_start = p.start("object");
|
||||
|
||||
if !p.at(SyntaxKind::BRACE_OPEN) {
|
||||
obj_start.abandon(p);
|
||||
return None;
|
||||
}
|
||||
|
||||
todo!()
|
||||
}
|
||||
|
||||
fn member(p: &mut Parser) -> Option<CompletedMarker> {
|
||||
let member_start = p.start("member");
|
||||
|
||||
if p.at(SyntaxKind::BRACE_CLOSE) {
|
||||
member_start.abandon(p);
|
||||
return None;
|
||||
} else if p.at(SyntaxKind::STRING) {
|
||||
let member_name_start = p.start("member_name");
|
||||
p.eat(SyntaxKind::STRING);
|
||||
member_name_start.complete(p, SyntaxKind::MEMBER_NAME);
|
||||
} else {
|
||||
return todo!("handle other tokens");
|
||||
}
|
||||
|
||||
if !p.eat(SyntaxKind::COLON) {
|
||||
todo!("handle wrong tokens")
|
||||
}
|
||||
|
||||
if value(p) {
|
||||
Some(member_start.complete(p, SyntaxKind::MEMBER))
|
||||
} else {
|
||||
let e = member_start.error(p, SyntaxError::MemberMissingValue);
|
||||
Some(
|
||||
e.precede(p, "member but failed already")
|
||||
.complete(p, SyntaxKind::MEMBER),
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
mod array {}
|
3
crates/json-pawarser/src/lib.rs
Normal file
3
crates/json-pawarser/src/lib.rs
Normal file
|
@ -0,0 +1,3 @@
|
|||
mod grammar;
|
||||
mod syntax_error;
|
||||
mod syntax_kind;
|
8
crates/json-pawarser/src/syntax_error.rs
Normal file
8
crates/json-pawarser/src/syntax_error.rs
Normal file
|
@ -0,0 +1,8 @@
|
|||
use crate::syntax_kind::SyntaxKind;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub enum SyntaxError {
|
||||
DisallowedKeyType(SyntaxKind),
|
||||
MemberMissingValue,
|
||||
}
|
||||
impl pawarser::parser::SyntaxError for SyntaxError {}
|
103
crates/json-pawarser/src/syntax_kind.rs
Normal file
103
crates/json-pawarser/src/syntax_kind.rs
Normal file
|
@ -0,0 +1,103 @@
|
|||
use logos::Logos;
|
||||
|
||||
pub fn lex(src: &str) -> Vec<(SyntaxKind, &str)> {
|
||||
let mut lex = SyntaxKind::lexer(src);
|
||||
let mut r = Vec::new();
|
||||
|
||||
while let Some(tok_res) = lex.next() {
|
||||
r.push((tok_res.unwrap_or(SyntaxKind::LEX_ERR), lex.slice()))
|
||||
}
|
||||
|
||||
r
|
||||
}
|
||||
|
||||
#[derive(enumset::EnumSetType, Debug, Logos, PartialEq, Eq, Clone, Copy, Hash)]
|
||||
#[repr(u16)]
|
||||
#[enumset(no_super_impls)]
|
||||
#[allow(non_camel_case_types)]
|
||||
pub enum SyntaxKind {
|
||||
// Error SyntaxKinds
|
||||
LEX_ERR,
|
||||
PARSE_ERR,
|
||||
|
||||
// Meta SyntaxKinds
|
||||
TOMBSTONE,
|
||||
EOF,
|
||||
|
||||
OBJECT,
|
||||
MEMBER,
|
||||
MEMBER_NAME,
|
||||
MEMBER_VALUE,
|
||||
|
||||
ARRAY,
|
||||
ELEMENT,
|
||||
|
||||
// Tokens
|
||||
// Regexes adapted from [the logos handbook](https://logos.maciej.codes/examples/json_borrowed.html)
|
||||
#[token("true")]
|
||||
#[token("false")]
|
||||
BOOL,
|
||||
#[token("{")]
|
||||
BRACE_OPEN,
|
||||
#[token("}")]
|
||||
BRACE_CLOSE,
|
||||
#[token("[")]
|
||||
BRACKET_OPEN,
|
||||
#[token("]")]
|
||||
BRACKET_CLOSE,
|
||||
#[token(":")]
|
||||
COLON,
|
||||
#[token(",")]
|
||||
COMMA,
|
||||
#[token("null")]
|
||||
NULL,
|
||||
#[regex(r"-?(?:0|[1-9]\d*)(?:\.\d+)?(?:[eE][+-]?\d+)?")]
|
||||
NUMBER,
|
||||
#[regex(r#""([^"\\]|\\["\\bnfrt]|u[a-fA-F0-9]{4})*""#)]
|
||||
STRING,
|
||||
|
||||
// Whitespace tokens
|
||||
#[regex("[ \\t\\f]+")]
|
||||
WHITESPACE,
|
||||
#[token("\n")]
|
||||
NEWLINE,
|
||||
}
|
||||
|
||||
impl pawarser::parser::SyntaxElement for SyntaxKind {
|
||||
const EOF: Self = Self::EOF;
|
||||
|
||||
const ERROR: Self = Self::PARSE_ERR;
|
||||
}
|
||||
|
||||
impl From<SyntaxKind> for rowan::SyntaxKind {
|
||||
fn from(kind: SyntaxKind) -> Self {
|
||||
Self(kind as u16)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::syntax_kind::{lex, SyntaxKind};
|
||||
|
||||
#[test]
|
||||
fn simple_object() {
|
||||
const TEST_DATA: &str = r#"{"hello_world": "meow", "some_num":7.42}"#;
|
||||
|
||||
assert_eq!(
|
||||
dbg!(lex(TEST_DATA)),
|
||||
vec![
|
||||
(SyntaxKind::BRACE_OPEN, "{"),
|
||||
(SyntaxKind::STRING, "\"hello_world\""),
|
||||
(SyntaxKind::COLON, ":"),
|
||||
(SyntaxKind::WHITESPACE, " "),
|
||||
(SyntaxKind::STRING, "\"meow\""),
|
||||
(SyntaxKind::COMMA, ","),
|
||||
(SyntaxKind::WHITESPACE, " "),
|
||||
(SyntaxKind::STRING, "\"some_num\""),
|
||||
(SyntaxKind::COLON, ":"),
|
||||
(SyntaxKind::NUMBER, "7.42"),
|
||||
(SyntaxKind::BRACE_CLOSE, "}")
|
||||
]
|
||||
);
|
||||
}
|
||||
}
|
|
@ -1,2 +1,8 @@
|
|||
#![feature(iter_collect_into)]
|
||||
pub mod parser;
|
||||
|
||||
pub use parser::{
|
||||
error::SyntaxError,
|
||||
marker::{CompletedMarker, Marker},
|
||||
Parser, SyntaxElement,
|
||||
};
|
||||
|
|
|
@ -2,12 +2,13 @@ use std::cell::Cell;
|
|||
|
||||
use enumset::{EnumSet, EnumSetType};
|
||||
|
||||
use self::{error::SyntaxError, event::Event, input::Input, marker::Marker};
|
||||
use self::{event::Event, input::Input, marker::Marker};
|
||||
pub use error::SyntaxError;
|
||||
|
||||
mod error;
|
||||
pub mod error;
|
||||
mod event;
|
||||
mod input;
|
||||
mod marker;
|
||||
pub mod marker;
|
||||
|
||||
/// this is used to define some required SyntaxKinds like an EOF token or an error token
|
||||
pub trait SyntaxElement
|
||||
|
@ -61,7 +62,7 @@ impl<'src, 'toks, SyntaxKind: SyntaxElement, SyntaxErr: SyntaxError>
|
|||
true
|
||||
}
|
||||
|
||||
fn do_bump(&mut self) {
|
||||
pub fn do_bump(&mut self) {
|
||||
self.push_ev(Event::Eat {
|
||||
count: self.input.preceding_meaningless(self.pos),
|
||||
});
|
||||
|
|
Loading…
Reference in a new issue