json-pawarser: init

This commit is contained in:
Schrottkatze 2024-10-18 14:05:27 +02:00
parent 21bcf62ea5
commit becc4b4041
Signed by: schrottkatze
SSH key fingerprint: SHA256:hXb3t1vINBFCiDCmhRABHX5ocdbLiKyCdKI4HK2Rbbc
9 changed files with 224 additions and 11 deletions

View file

@ -0,0 +1,13 @@
[package]
name = "json-pawarser"
version = "0.1.0"
edition = "2021"
[dependencies]
logos = "0.14.2"
enumset = "1.1.3"
rowan = "0.15.15"
pawarser = { path = "../pawarser" }
[lints]
workspace = true

View file

@ -0,0 +1,67 @@
use enumset::{enum_set, EnumSet};
use crate::{syntax_error::SyntaxError, syntax_kind::SyntaxKind};
use self::object::object;
type Parser<'src, 'toks> = pawarser::Parser<'src, 'toks, SyntaxKind, SyntaxError>;
type CompletedMarker = pawarser::CompletedMarker<SyntaxKind, SyntaxError>;
const BASIC_VALUE_TOKENS: EnumSet<SyntaxKind> =
enum_set!(SyntaxKind::BOOL | SyntaxKind::NULL | SyntaxKind::NUMBER | SyntaxKind::STRING);
pub fn value(p: &mut Parser) -> bool {
if BASIC_VALUE_TOKENS.contains(p.current()) {
p.do_bump();
return true;
} else {
object(p).is_some()
}
}
mod object {
use crate::{grammar::value, syntax_error::SyntaxError, syntax_kind::SyntaxKind};
use super::{CompletedMarker, Parser, BASIC_VALUE_TOKENS};
pub(super) fn object(p: &mut Parser) -> Option<CompletedMarker> {
let obj_start = p.start("object");
if !p.at(SyntaxKind::BRACE_OPEN) {
obj_start.abandon(p);
return None;
}
todo!()
}
fn member(p: &mut Parser) -> Option<CompletedMarker> {
let member_start = p.start("member");
if p.at(SyntaxKind::BRACE_CLOSE) {
member_start.abandon(p);
return None;
} else if p.at(SyntaxKind::STRING) {
let member_name_start = p.start("member_name");
p.eat(SyntaxKind::STRING);
member_name_start.complete(p, SyntaxKind::MEMBER_NAME);
} else {
return todo!("handle other tokens");
}
if !p.eat(SyntaxKind::COLON) {
todo!("handle wrong tokens")
}
if value(p) {
Some(member_start.complete(p, SyntaxKind::MEMBER))
} else {
let e = member_start.error(p, SyntaxError::MemberMissingValue);
Some(
e.precede(p, "member but failed already")
.complete(p, SyntaxKind::MEMBER),
)
}
}
}
mod array {}

View file

@ -0,0 +1,3 @@
mod grammar;
mod syntax_error;
mod syntax_kind;

View file

@ -0,0 +1,8 @@
use crate::syntax_kind::SyntaxKind;
#[derive(Clone)]
pub enum SyntaxError {
DisallowedKeyType(SyntaxKind),
MemberMissingValue,
}
impl pawarser::parser::SyntaxError for SyntaxError {}

View file

@ -0,0 +1,103 @@
use logos::Logos;
pub fn lex(src: &str) -> Vec<(SyntaxKind, &str)> {
let mut lex = SyntaxKind::lexer(src);
let mut r = Vec::new();
while let Some(tok_res) = lex.next() {
r.push((tok_res.unwrap_or(SyntaxKind::LEX_ERR), lex.slice()))
}
r
}
#[derive(enumset::EnumSetType, Debug, Logos, PartialEq, Eq, Clone, Copy, Hash)]
#[repr(u16)]
#[enumset(no_super_impls)]
#[allow(non_camel_case_types)]
pub enum SyntaxKind {
// Error SyntaxKinds
LEX_ERR,
PARSE_ERR,
// Meta SyntaxKinds
TOMBSTONE,
EOF,
OBJECT,
MEMBER,
MEMBER_NAME,
MEMBER_VALUE,
ARRAY,
ELEMENT,
// Tokens
// Regexes adapted from [the logos handbook](https://logos.maciej.codes/examples/json_borrowed.html)
#[token("true")]
#[token("false")]
BOOL,
#[token("{")]
BRACE_OPEN,
#[token("}")]
BRACE_CLOSE,
#[token("[")]
BRACKET_OPEN,
#[token("]")]
BRACKET_CLOSE,
#[token(":")]
COLON,
#[token(",")]
COMMA,
#[token("null")]
NULL,
#[regex(r"-?(?:0|[1-9]\d*)(?:\.\d+)?(?:[eE][+-]?\d+)?")]
NUMBER,
#[regex(r#""([^"\\]|\\["\\bnfrt]|u[a-fA-F0-9]{4})*""#)]
STRING,
// Whitespace tokens
#[regex("[ \\t\\f]+")]
WHITESPACE,
#[token("\n")]
NEWLINE,
}
impl pawarser::parser::SyntaxElement for SyntaxKind {
const EOF: Self = Self::EOF;
const ERROR: Self = Self::PARSE_ERR;
}
impl From<SyntaxKind> for rowan::SyntaxKind {
fn from(kind: SyntaxKind) -> Self {
Self(kind as u16)
}
}
#[cfg(test)]
mod tests {
use crate::syntax_kind::{lex, SyntaxKind};
#[test]
fn simple_object() {
const TEST_DATA: &str = r#"{"hello_world": "meow", "some_num":7.42}"#;
assert_eq!(
dbg!(lex(TEST_DATA)),
vec![
(SyntaxKind::BRACE_OPEN, "{"),
(SyntaxKind::STRING, "\"hello_world\""),
(SyntaxKind::COLON, ":"),
(SyntaxKind::WHITESPACE, " "),
(SyntaxKind::STRING, "\"meow\""),
(SyntaxKind::COMMA, ","),
(SyntaxKind::WHITESPACE, " "),
(SyntaxKind::STRING, "\"some_num\""),
(SyntaxKind::COLON, ":"),
(SyntaxKind::NUMBER, "7.42"),
(SyntaxKind::BRACE_CLOSE, "}")
]
);
}
}

View file

@ -1,2 +1,8 @@
#![feature(iter_collect_into)]
pub mod parser;
pub use parser::{
error::SyntaxError,
marker::{CompletedMarker, Marker},
Parser, SyntaxElement,
};

View file

@ -2,12 +2,13 @@ use std::cell::Cell;
use enumset::{EnumSet, EnumSetType};
use self::{error::SyntaxError, event::Event, input::Input, marker::Marker};
use self::{event::Event, input::Input, marker::Marker};
pub use error::SyntaxError;
mod error;
pub mod error;
mod event;
mod input;
mod marker;
pub mod marker;
/// this is used to define some required SyntaxKinds like an EOF token or an error token
pub trait SyntaxElement
@ -61,7 +62,7 @@ impl<'src, 'toks, SyntaxKind: SyntaxElement, SyntaxErr: SyntaxError>
true
}
fn do_bump(&mut self) {
pub fn do_bump(&mut self) {
self.push_ev(Event::Eat {
count: self.input.preceding_meaningless(self.pos),
});