forked from katzen-cafe/iowo
lang: rewrite parser
This commit is contained in:
parent
6d8b79e8f7
commit
381ab45edc
25 changed files with 524 additions and 1161 deletions
215
Cargo.lock
generated
215
Cargo.lock
generated
|
@ -8,33 +8,6 @@ version = "1.0.2"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe"
|
checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe"
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "ahash"
|
|
||||||
version = "0.8.11"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011"
|
|
||||||
dependencies = [
|
|
||||||
"cfg-if",
|
|
||||||
"once_cell",
|
|
||||||
"version_check",
|
|
||||||
"zerocopy",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "aho-corasick"
|
|
||||||
version = "1.1.3"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916"
|
|
||||||
dependencies = [
|
|
||||||
"memchr",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "allocator-api2"
|
|
||||||
version = "0.2.16"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "0942ffc6dcaadf03badf6e6a2d0228460359d5e34b57ccdc720b7382dfbd5ec5"
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "anstream"
|
name = "anstream"
|
||||||
version = "0.6.5"
|
version = "0.6.5"
|
||||||
|
@ -160,31 +133,12 @@ version = "1.5.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b"
|
checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b"
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "cc"
|
|
||||||
version = "1.0.90"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "8cd6604a82acf3039f1144f54b8eb34e91ffba622051189e71b781822d5ee1f5"
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "cfg-if"
|
name = "cfg-if"
|
||||||
version = "1.0.0"
|
version = "1.0.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
|
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "chumsky"
|
|
||||||
version = "1.0.0-alpha.7"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "c7b80276986f86789dc56ca6542d53bba9cda3c66091ebbe7bd96fc1bdf20f1f"
|
|
||||||
dependencies = [
|
|
||||||
"hashbrown",
|
|
||||||
"regex-automata",
|
|
||||||
"serde",
|
|
||||||
"stacker",
|
|
||||||
"unicode-ident",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "clap"
|
name = "clap"
|
||||||
version = "4.4.12"
|
version = "4.4.12"
|
||||||
|
@ -298,6 +252,40 @@ dependencies = [
|
||||||
"phf",
|
"phf",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "darling"
|
||||||
|
version = "0.20.8"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "54e36fcd13ed84ffdfda6f5be89b31287cbb80c439841fe69e04841435464391"
|
||||||
|
dependencies = [
|
||||||
|
"darling_core",
|
||||||
|
"darling_macro",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "darling_core"
|
||||||
|
version = "0.20.8"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "9c2cf1c23a687a1feeb728783b993c4e1ad83d99f351801977dd809b48d0a70f"
|
||||||
|
dependencies = [
|
||||||
|
"fnv",
|
||||||
|
"ident_case",
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"syn",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "darling_macro"
|
||||||
|
version = "0.20.8"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "a668eda54683121533a393014d8692171709ff57a7d61f187b6e782719f8933f"
|
||||||
|
dependencies = [
|
||||||
|
"darling_core",
|
||||||
|
"quote",
|
||||||
|
"syn",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "deranged"
|
name = "deranged"
|
||||||
version = "0.3.11"
|
version = "0.3.11"
|
||||||
|
@ -328,6 +316,12 @@ dependencies = [
|
||||||
"windows-sys 0.48.0",
|
"windows-sys 0.48.0",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "drop_bomb"
|
||||||
|
version = "0.1.5"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "9bda8e21c04aca2ae33ffc2fd8c23134f3cac46db123ba97bd9d3f3b8a4a85e1"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "ego-tree"
|
name = "ego-tree"
|
||||||
version = "0.6.2"
|
version = "0.6.2"
|
||||||
|
@ -340,6 +334,27 @@ version = "1.9.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "a26ae43d7bcc3b814de94796a5e736d4029efb0ee900c12e2d54c993ad1a1e07"
|
checksum = "a26ae43d7bcc3b814de94796a5e736d4029efb0ee900c12e2d54c993ad1a1e07"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "enumset"
|
||||||
|
version = "1.1.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "226c0da7462c13fb57e5cc9e0dc8f0635e7d27f276a3a7fd30054647f669007d"
|
||||||
|
dependencies = [
|
||||||
|
"enumset_derive",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "enumset_derive"
|
||||||
|
version = "0.8.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "e08b6c6ab82d70f08844964ba10c7babb716de2ecaeab9be5717918a5177d3af"
|
||||||
|
dependencies = [
|
||||||
|
"darling",
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"syn",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "equivalent"
|
name = "equivalent"
|
||||||
version = "1.0.1"
|
version = "1.0.1"
|
||||||
|
@ -447,10 +462,6 @@ name = "hashbrown"
|
||||||
version = "0.14.3"
|
version = "0.14.3"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "290f1a1d9242c78d09ce40a5e87e7554ee637af1351968159f4952f028f75604"
|
checksum = "290f1a1d9242c78d09ce40a5e87e7554ee637af1351968159f4952f028f75604"
|
||||||
dependencies = [
|
|
||||||
"ahash",
|
|
||||||
"allocator-api2",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "heck"
|
name = "heck"
|
||||||
|
@ -458,6 +469,12 @@ version = "0.4.1"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8"
|
checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "ident_case"
|
||||||
|
version = "1.0.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "image"
|
name = "image"
|
||||||
version = "0.24.7"
|
version = "0.24.7"
|
||||||
|
@ -516,9 +533,10 @@ name = "lang"
|
||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"ariadne",
|
"ariadne",
|
||||||
"chumsky",
|
|
||||||
"clap",
|
"clap",
|
||||||
|
"drop_bomb",
|
||||||
"ego-tree",
|
"ego-tree",
|
||||||
|
"enumset",
|
||||||
"indexmap",
|
"indexmap",
|
||||||
"logos",
|
"logos",
|
||||||
"petgraph",
|
"petgraph",
|
||||||
|
@ -584,7 +602,7 @@ dependencies = [
|
||||||
"lazy_static",
|
"lazy_static",
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
"quote",
|
"quote",
|
||||||
"regex-syntax 0.8.2",
|
"regex-syntax",
|
||||||
"syn",
|
"syn",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
@ -661,12 +679,6 @@ dependencies = [
|
||||||
"libc",
|
"libc",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "once_cell"
|
|
||||||
version = "1.19.0"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92"
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "option-ext"
|
name = "option-ext"
|
||||||
version = "0.2.0"
|
version = "0.2.0"
|
||||||
|
@ -759,15 +771,6 @@ dependencies = [
|
||||||
"unicode-ident",
|
"unicode-ident",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "psm"
|
|
||||||
version = "0.1.21"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "5787f7cda34e3033a72192c018bc5883100330f362ef279a8cbccfce8bb4e874"
|
|
||||||
dependencies = [
|
|
||||||
"cc",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "qoi"
|
name = "qoi"
|
||||||
version = "0.4.1"
|
version = "0.4.1"
|
||||||
|
@ -851,23 +854,6 @@ dependencies = [
|
||||||
"thiserror",
|
"thiserror",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "regex-automata"
|
|
||||||
version = "0.3.9"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "59b23e92ee4318893fa3fe3e6fb365258efbfe6ac6ab30f090cdcbb7aa37efa9"
|
|
||||||
dependencies = [
|
|
||||||
"aho-corasick",
|
|
||||||
"memchr",
|
|
||||||
"regex-syntax 0.7.5",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "regex-syntax"
|
|
||||||
version = "0.7.5"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "dbb5fb1acd8a1a18b3dd5be62d25485eb770e05afb408a9627d14d451bae12da"
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "regex-syntax"
|
name = "regex-syntax"
|
||||||
version = "0.8.2"
|
version = "0.8.2"
|
||||||
|
@ -975,19 +961,6 @@ dependencies = [
|
||||||
"lock_api",
|
"lock_api",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "stacker"
|
|
||||||
version = "0.1.15"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "c886bd4480155fd3ef527d45e9ac8dd7118a898a46530b7b94c3e21866259fce"
|
|
||||||
dependencies = [
|
|
||||||
"cc",
|
|
||||||
"cfg-if",
|
|
||||||
"libc",
|
|
||||||
"psm",
|
|
||||||
"winapi",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "strsim"
|
name = "strsim"
|
||||||
version = "0.10.0"
|
version = "0.10.0"
|
||||||
|
@ -1090,12 +1063,6 @@ version = "0.2.1"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a"
|
checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a"
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "version_check"
|
|
||||||
version = "0.9.4"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "wasi"
|
name = "wasi"
|
||||||
version = "0.11.0+wasi-snapshot-preview1"
|
version = "0.11.0+wasi-snapshot-preview1"
|
||||||
|
@ -1108,28 +1075,6 @@ version = "0.1.7"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "9193164d4de03a926d909d3bc7c30543cecb35400c02114792c2cae20d5e2dbb"
|
checksum = "9193164d4de03a926d909d3bc7c30543cecb35400c02114792c2cae20d5e2dbb"
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "winapi"
|
|
||||||
version = "0.3.9"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
|
|
||||||
dependencies = [
|
|
||||||
"winapi-i686-pc-windows-gnu",
|
|
||||||
"winapi-x86_64-pc-windows-gnu",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "winapi-i686-pc-windows-gnu"
|
|
||||||
version = "0.4.0"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "winapi-x86_64-pc-windows-gnu"
|
|
||||||
version = "0.4.0"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "windows-sys"
|
name = "windows-sys"
|
||||||
version = "0.48.0"
|
version = "0.48.0"
|
||||||
|
@ -1268,26 +1213,6 @@ version = "0.5.1"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "09041cd90cf85f7f8b2df60c646f853b7f535ce68f85244eb6731cf89fa498ec"
|
checksum = "09041cd90cf85f7f8b2df60c646f853b7f535ce68f85244eb6731cf89fa498ec"
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "zerocopy"
|
|
||||||
version = "0.7.32"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "74d4d3961e53fa4c9a25a8637fc2bfaf2595b3d3ae34875568a5cf64787716be"
|
|
||||||
dependencies = [
|
|
||||||
"zerocopy-derive",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "zerocopy-derive"
|
|
||||||
version = "0.7.32"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "9ce1b18ccd8e73a9321186f97e46f9f04b778851177567b1975109d26a08d2a6"
|
|
||||||
dependencies = [
|
|
||||||
"proc-macro2",
|
|
||||||
"quote",
|
|
||||||
"syn",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "zune-inflate"
|
name = "zune-inflate"
|
||||||
version = "0.2.54"
|
version = "0.2.54"
|
||||||
|
|
|
@ -7,13 +7,14 @@ edition = "2021"
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
logos = "0.14"
|
logos = "0.14"
|
||||||
chumsky = {version= "1.0.0-alpha.7", features=["label", "extension"]}
|
|
||||||
petgraph = { workspace = true}
|
petgraph = { workspace = true}
|
||||||
indexmap = "2.2.6"
|
indexmap = "2.2.6"
|
||||||
clap = { version = "4", features = ["derive"] }
|
clap = { version = "4", features = ["derive"] }
|
||||||
ariadne = "0.4.0"
|
ariadne = "0.4.0"
|
||||||
ego-tree = "0.6.2"
|
ego-tree = "0.6.2"
|
||||||
rowan = "0.15.15"
|
rowan = "0.15.15"
|
||||||
|
drop_bomb = "0.1.5"
|
||||||
|
enumset = "1.1.3"
|
||||||
|
|
||||||
[lints]
|
[lints]
|
||||||
workspace = true
|
workspace = true
|
||||||
|
|
|
@ -1,88 +0,0 @@
|
||||||
use std::{collections::HashMap, fs};
|
|
||||||
|
|
||||||
use ariadne::{sources, Label, Report, Source};
|
|
||||||
use chumsky::{
|
|
||||||
error::{self, Rich},
|
|
||||||
ParseResult,
|
|
||||||
};
|
|
||||||
use indexmap::IndexMap;
|
|
||||||
|
|
||||||
use crate::{
|
|
||||||
parser::{ast::File, Span},
|
|
||||||
tokens::Token,
|
|
||||||
};
|
|
||||||
|
|
||||||
#[derive(Debug, PartialEq, Eq, Hash)]
|
|
||||||
pub enum Stage {
|
|
||||||
Lex,
|
|
||||||
Parse,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Stage {
|
|
||||||
fn variants() -> [Stage; 2] {
|
|
||||||
[Stage::Lex, Stage::Parse]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub struct ErrorCollector<'filename, 'tokens, 'src> {
|
|
||||||
files: HashMap<&'filename str, &'src str>,
|
|
||||||
raw_errors: IndexMap<(&'filename str, Stage), Vec<error::Rich<'tokens, Token<'src>, Span>>>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<'filename, 'tokens, 'src> ErrorCollector<'filename, 'tokens, 'src> {
|
|
||||||
pub fn new(files: Vec<(&'filename str, &'src str)>) -> Self {
|
|
||||||
Self {
|
|
||||||
files: HashMap::from_iter(files.clone()),
|
|
||||||
raw_errors: files
|
|
||||||
.iter()
|
|
||||||
.flat_map(|(name, _)| Stage::variants().map(|s| (name, s)))
|
|
||||||
.map(|(name, stage)| ((*name, stage), Vec::new()))
|
|
||||||
.collect(),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn insert_many(
|
|
||||||
&mut self,
|
|
||||||
file: &'filename str,
|
|
||||||
curr_stage: Stage,
|
|
||||||
mut errs: Vec<error::Rich<'tokens, Token<'src>, Span>>,
|
|
||||||
) {
|
|
||||||
let err_vec = self
|
|
||||||
.raw_errors
|
|
||||||
.get_mut(&(file, curr_stage))
|
|
||||||
.expect("filename should exist");
|
|
||||||
err_vec.append(&mut errs);
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn analyze_and_report(self) {
|
|
||||||
let ErrorCollector { files, raw_errors } = self;
|
|
||||||
todo!()
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn report_raw(self) {
|
|
||||||
let ErrorCollector { files, raw_errors } = self;
|
|
||||||
|
|
||||||
for ((file, stage), errs) in raw_errors.into_iter() {
|
|
||||||
for err in errs {
|
|
||||||
eprintln!("e: {err:?}");
|
|
||||||
Report::build(ariadne::ReportKind::Error, file, err.span().start)
|
|
||||||
.with_message(format!("error at stage {stage:?}, {:?}", err.reason()))
|
|
||||||
.with_label(
|
|
||||||
Label::new((file, err.span().into_range())).with_message(format!(
|
|
||||||
"found: {:?}",
|
|
||||||
err.found().expect("errors should have a reason")
|
|
||||||
)),
|
|
||||||
)
|
|
||||||
.with_help(format!(
|
|
||||||
"expected: {:?}",
|
|
||||||
err.expected().collect::<Vec<_>>()
|
|
||||||
))
|
|
||||||
.finish()
|
|
||||||
.print((file, Source::from(files[file])));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, PartialEq, Eq)]
|
|
||||||
struct Loc<'filename>(&'filename str, Span);
|
|
|
@ -1,4 +1,2 @@
|
||||||
#![feature(type_alias_impl_trait)]
|
#![feature(type_alias_impl_trait, lint_reasons)]
|
||||||
pub mod err_reporting;
|
|
||||||
pub mod parser;
|
pub mod parser;
|
||||||
pub mod tokens;
|
|
||||||
|
|
|
@ -1,12 +1,9 @@
|
||||||
|
use clap::Parser;
|
||||||
use std::{fs, path::PathBuf};
|
use std::{fs, path::PathBuf};
|
||||||
|
|
||||||
use clap::Parser;
|
use lang::parser::{
|
||||||
use lang::{
|
parser::{self, grammar, input, output::Output},
|
||||||
err_reporting::ErrorCollector,
|
syntax_kind,
|
||||||
parser::ast::lossless::{
|
|
||||||
lex,
|
|
||||||
parser::{self, parse},
|
|
||||||
},
|
|
||||||
};
|
};
|
||||||
|
|
||||||
#[derive(Parser)]
|
#[derive(Parser)]
|
||||||
|
@ -19,8 +16,17 @@ fn main() {
|
||||||
let args = Args::parse();
|
let args = Args::parse();
|
||||||
let n = args.file.clone();
|
let n = args.file.clone();
|
||||||
let f = fs::read_to_string(n.clone()).expect("failed to read file");
|
let f = fs::read_to_string(n.clone()).expect("failed to read file");
|
||||||
println!("toks: {:?}", lex::lex(&f));
|
|
||||||
println!("parse res: {:?}", parse(&f));
|
let toks = dbg!(syntax_kind::lex(&f));
|
||||||
|
let input = input::Input::new(&toks);
|
||||||
|
let mut parser = parser::Parser::new(input);
|
||||||
|
|
||||||
|
grammar::source_file(&mut parser);
|
||||||
|
|
||||||
|
let p_out = dbg!(parser.finish());
|
||||||
|
let o = Output::from_parser_output(toks, p_out);
|
||||||
|
|
||||||
|
println!("Out: {:?}", o);
|
||||||
|
|
||||||
// let parse_res = parser::parse(&f);
|
// let parse_res = parser::parse(&f);
|
||||||
// println!("parse: {:?}", parse_res);
|
// println!("parse: {:?}", parse_res);
|
||||||
|
|
|
@ -1,152 +1,143 @@
|
||||||
use chumsky::{
|
use drop_bomb::DropBomb;
|
||||||
error::Rich,
|
|
||||||
input::{Stream, ValueInput},
|
|
||||||
prelude::*,
|
|
||||||
primitive::just,
|
|
||||||
recursive::recursive,
|
|
||||||
span::SimpleSpan,
|
|
||||||
IterParser,
|
|
||||||
};
|
|
||||||
use indexmap::IndexMap;
|
|
||||||
use logos::Logos;
|
|
||||||
|
|
||||||
use crate::tokens::Token;
|
use self::{error::SyntaxError, events::Event, input::Input, syntax_kind::SyntaxKind};
|
||||||
|
|
||||||
pub mod ast;
|
pub mod syntax_kind;
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests;
|
mod tests;
|
||||||
use self::ast::{
|
|
||||||
raw_ast::{RawExpr, RawExpression},
|
|
||||||
File,
|
|
||||||
};
|
|
||||||
|
|
||||||
pub type Span = SimpleSpan;
|
pub mod error;
|
||||||
pub type Spanned<T> = (T, Span);
|
pub mod events;
|
||||||
|
pub mod grammar;
|
||||||
|
pub mod input;
|
||||||
|
pub mod output;
|
||||||
|
|
||||||
pub fn parse(src: &str) -> ParseResult<File<'_>, Rich<'_, Token<'_>>> {
|
pub struct Parser<'src, 'toks> {
|
||||||
let toks: Vec<_> = Token::lexer(src)
|
input: Input<'src, 'toks>,
|
||||||
.spanned()
|
pos: usize,
|
||||||
.map(|(t, s)| (t.expect("TODO: add lexer error(s)"), Span::from(s)))
|
events: Vec<Event>,
|
||||||
.collect();
|
errors: Vec<SyntaxError>,
|
||||||
let tok_stream = Stream::from_iter(toks).spanned((src.len()..src.len()).into());
|
|
||||||
parser().parse(tok_stream)
|
|
||||||
}
|
}
|
||||||
pub(crate) fn parser<
|
|
||||||
'tokens,
|
|
||||||
'src: 'tokens,
|
|
||||||
I: ValueInput<'tokens, Token = Token<'src>, Span = Span>,
|
|
||||||
>() -> impl Parser<'tokens, I, File<'src>, extra::Err<Rich<'tokens, Token<'src>, Span>>> {
|
|
||||||
let word = select! { Token::Word(word) = e => (word, e.span())};
|
|
||||||
|
|
||||||
let expr = recursive(|expr| {
|
impl<'src, 'toks> Parser<'src, 'toks> {
|
||||||
let lit = select! {
|
pub fn new(input: Input<'src, 'toks>) -> Self {
|
||||||
Token::Int(i) = e => RawExpression::new(RawExpr::Lit(ast::Lit::Int(i.parse().expect("TODO: handle better"))), e.span()),
|
Self {
|
||||||
Token::Float(f) = e => RawExpression::new(RawExpr::Lit(ast::Lit::Float(f.parse().expect("TODO: handle better"))), e.span()),
|
input,
|
||||||
Token::String(s) = e => RawExpression::new(RawExpr::Lit(ast::Lit::String(s.strip_prefix('"').expect("a").strip_suffix('"').expect("b"))), e.span())
|
pos: 0,
|
||||||
};
|
events: Vec::new(),
|
||||||
let mat = just(Token::Mat)
|
errors: Vec::new(),
|
||||||
.ignore_then(select! { Token::Dimensions(dimensions) = e => (dimensions, e.span())})
|
}
|
||||||
.then(
|
|
||||||
lit.separated_by(just(Token::Comma))
|
|
||||||
.collect::<Vec<_>>()
|
|
||||||
.separated_by(just(Token::Semicolon))
|
|
||||||
.collect::<Vec<_>>()
|
|
||||||
.delimited_by(just(Token::BracketOpen), just(Token::BracketClose)),
|
|
||||||
)
|
|
||||||
.map_with(|(dimensions, data), e| {
|
|
||||||
// TODO: Validation and proper error handling/reporting
|
|
||||||
// (validation = validating the matrix dimensions)
|
|
||||||
RawExpression::new(
|
|
||||||
RawExpr::Matrix(dimensions, data.into_iter().flatten().collect()),
|
|
||||||
e.span(),
|
|
||||||
)
|
|
||||||
});
|
|
||||||
let var = select! {
|
|
||||||
Token::VarIdent(name) => (RawExpr::Var as fn(_) -> _, name),
|
|
||||||
Token::InputIdent(name) => (RawExpr::InputVar as fn(_) -> _, name)
|
|
||||||
}
|
}
|
||||||
.map_with(|(item_type, name), extra| RawExpression::new(item_type(name), extra.span()))
|
|
||||||
.labelled("variable");
|
|
||||||
|
|
||||||
let attrset = word
|
pub fn finish(self) -> (Vec<Event>, Vec<SyntaxError>) {
|
||||||
.labelled("attr name")
|
(self.events, self.errors)
|
||||||
.then_ignore(just(Token::Colon))
|
}
|
||||||
.then(expr)
|
|
||||||
.labelled("attr body")
|
|
||||||
.separated_by(just(Token::Comma))
|
|
||||||
.collect::<Vec<_>>()
|
|
||||||
.map(IndexMap::from_iter)
|
|
||||||
.delimited_by(just(Token::BraceOpen), just(Token::BraceClose))
|
|
||||||
.map_with(|v, e| (v, e.span()))
|
|
||||||
.labelled("attrset");
|
|
||||||
|
|
||||||
let node = word
|
pub(crate) fn nth(&self, n: usize) -> SyntaxKind {
|
||||||
.repeated()
|
self.input.kind(self.pos + n)
|
||||||
.collect()
|
}
|
||||||
.then(attrset.clone().or_not())
|
|
||||||
.map_with(|(name, params), extra| {
|
|
||||||
RawExpression::new(RawExpr::Node(name, params), extra.span())
|
|
||||||
})
|
|
||||||
// .or(var)
|
|
||||||
// .or(attrset
|
|
||||||
// .map_with(|attrset, extra| Expression::new(Expr::AttrSet(attrset), extra.span())))
|
|
||||||
// .or(lit)
|
|
||||||
// .or(mat)
|
|
||||||
.labelled("node");
|
|
||||||
|
|
||||||
let atom = var
|
pub fn eat_succeeding_ws(&mut self) {
|
||||||
.or(lit)
|
self.push_ev(Event::Eat {
|
||||||
.or(mat)
|
count: self.input.meaningless_tail_len(),
|
||||||
.or(attrset.map_with(|attrset, extra| {
|
|
||||||
RawExpression::new(RawExpr::AttrSet(attrset), extra.span())
|
|
||||||
}))
|
|
||||||
.or(node.clone());
|
|
||||||
|
|
||||||
#[allow(clippy::let_and_return)]
|
|
||||||
let pipeline = atom
|
|
||||||
.clone()
|
|
||||||
.then(choice((
|
|
||||||
just(Token::Pipe).to(RawExpr::SimplePipe as fn(_, _) -> _),
|
|
||||||
just(Token::MappingPipe).to(RawExpr::MappingPipe as fn(_, _) -> _),
|
|
||||||
just(Token::NullPipe).to(RawExpr::NullPipe as fn(_, _) -> _),
|
|
||||||
)))
|
|
||||||
.repeated()
|
|
||||||
.foldr_with(atom, |(curr, pipe), next, extra| {
|
|
||||||
RawExpression::new(pipe(curr, next), extra.span())
|
|
||||||
});
|
});
|
||||||
|
}
|
||||||
|
|
||||||
pipeline
|
pub(crate) fn current(&self) -> SyntaxKind {
|
||||||
|
self.input.kind(self.pos)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn start(&mut self) -> Marker {
|
||||||
|
let pos = self.events.len();
|
||||||
|
self.push_ev(Event::tombstone());
|
||||||
|
Marker::new(pos)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn at(&self, kind: SyntaxKind) -> bool {
|
||||||
|
self.nth_at(0, kind)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn eat(&mut self, kind: SyntaxKind) -> bool {
|
||||||
|
if !self.at(kind) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
self.do_bump();
|
||||||
|
true
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn nth_at(&self, n: usize, kind: SyntaxKind) -> bool {
|
||||||
|
self.nth(n) == kind
|
||||||
|
}
|
||||||
|
|
||||||
|
fn do_bump(&mut self) {
|
||||||
|
self.push_ev(Event::Eat {
|
||||||
|
count: self.input.preceding_meaningless(self.pos),
|
||||||
});
|
});
|
||||||
|
self.pos += 1;
|
||||||
|
}
|
||||||
|
|
||||||
let decls = just(Token::Def)
|
fn push_ev(&mut self, event: Event) {
|
||||||
.ignore_then(
|
self.events.push(event)
|
||||||
word.then_ignore(just(Token::Equals))
|
}
|
||||||
.then(expr.clone().map(|expr| expr))
|
|
||||||
.then_ignore(just(Token::Semicolon)),
|
|
||||||
)
|
|
||||||
.repeated()
|
|
||||||
.collect::<Vec<_>>()
|
|
||||||
.map(|decls| File {
|
|
||||||
decls: IndexMap::from_iter(decls),
|
|
||||||
});
|
|
||||||
|
|
||||||
let single_expr = expr.map(|expr| File {
|
|
||||||
decls: IndexMap::from_iter([(("main", (0..0).into()), expr)]),
|
|
||||||
});
|
|
||||||
|
|
||||||
just(Token::Def).rewind().ignore_then(decls).or(single_expr)
|
|
||||||
// single_expr.or(decls)
|
|
||||||
|
|
||||||
// expr.map(|expr| File {
|
|
||||||
// decls: IndexMap::from_iter([(("main", (0..0).into()), expr)]),
|
|
||||||
// })
|
|
||||||
// .or(decl.repeated().collect::<Vec<_>>().map(|decls| File {
|
|
||||||
// decls: IndexMap::from_iter(decls),
|
|
||||||
// }))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub mod asg {
|
pub(crate) struct Marker {
|
||||||
use petgraph::graph::DiGraph;
|
pos: usize,
|
||||||
|
bomb: DropBomb,
|
||||||
use super::Spanned;
|
}
|
||||||
|
|
||||||
|
impl Marker {
|
||||||
|
pub(crate) fn new(pos: usize) -> Self {
|
||||||
|
Self {
|
||||||
|
pos,
|
||||||
|
bomb: DropBomb::new("Marker must be completed or abandoned"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
pub(crate) fn complete(mut self, p: &mut Parser<'_, '_>, kind: SyntaxKind) -> CompletedMarker {
|
||||||
|
self.bomb.defuse();
|
||||||
|
match &mut p.events[self.pos] {
|
||||||
|
Event::Start { kind: slot, .. } => *slot = kind,
|
||||||
|
_ => unreachable!(),
|
||||||
|
}
|
||||||
|
p.push_ev(Event::Finish);
|
||||||
|
|
||||||
|
CompletedMarker {
|
||||||
|
pos: self.pos,
|
||||||
|
kind,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn abandon(mut self, p: &mut Parser<'_, '_>) {
|
||||||
|
self.bomb.defuse();
|
||||||
|
if self.pos == p.events.len() - 1 {
|
||||||
|
match p.events.pop() {
|
||||||
|
Some(Event::Start {
|
||||||
|
kind: SyntaxKind::TOMBSTONE,
|
||||||
|
forward_parent: None,
|
||||||
|
}) => (),
|
||||||
|
_ => unreachable!(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) struct CompletedMarker {
|
||||||
|
pos: usize,
|
||||||
|
kind: SyntaxKind,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl CompletedMarker {
|
||||||
|
pub(crate) fn precede(self, p: &mut Parser<'_, '_>) -> Marker {
|
||||||
|
let new_pos = p.start();
|
||||||
|
|
||||||
|
match &mut p.events[self.pos] {
|
||||||
|
Event::Start { forward_parent, .. } => {
|
||||||
|
*forward_parent = Some(new_pos.pos - self.pos);
|
||||||
|
}
|
||||||
|
_ => unreachable!(),
|
||||||
|
}
|
||||||
|
|
||||||
|
new_pos
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,24 +0,0 @@
|
||||||
use std::collections::{BTreeMap, HashMap};
|
|
||||||
|
|
||||||
use indexmap::IndexMap;
|
|
||||||
|
|
||||||
use super::Spanned;
|
|
||||||
|
|
||||||
#[derive(Debug, PartialEq)]
|
|
||||||
pub struct File<'src> {
|
|
||||||
pub decls: IndexMap<Spanned<&'src str>, raw_ast::RawExpression<'src>>,
|
|
||||||
}
|
|
||||||
|
|
||||||
pub mod raw_ast;
|
|
||||||
|
|
||||||
#[derive(Debug, PartialEq)]
|
|
||||||
pub enum Lit<'src> {
|
|
||||||
// TODO: more bigger better number types
|
|
||||||
Int(i64),
|
|
||||||
Float(f64),
|
|
||||||
String(&'src str),
|
|
||||||
}
|
|
||||||
|
|
||||||
pub mod lossless;
|
|
||||||
|
|
||||||
pub mod ast_tree;
|
|
|
@ -1,31 +0,0 @@
|
||||||
use ego_tree::Tree;
|
|
||||||
|
|
||||||
use crate::parser::Spanned;
|
|
||||||
|
|
||||||
use super::{File, Lit};
|
|
||||||
|
|
||||||
pub struct Ast<'src> {
|
|
||||||
tree: Tree<AstNode<'src>>,
|
|
||||||
}
|
|
||||||
|
|
||||||
struct AstNode<'src> {
|
|
||||||
kind: NodeKind<'src>,
|
|
||||||
}
|
|
||||||
|
|
||||||
enum NodeKind<'src> {
|
|
||||||
Decl,
|
|
||||||
Ident(&'src str),
|
|
||||||
Instr,
|
|
||||||
Expr,
|
|
||||||
MappingPipe,
|
|
||||||
NullPipe,
|
|
||||||
MultiPipe,
|
|
||||||
Var(&'src str),
|
|
||||||
InputVar(&'src str),
|
|
||||||
AttrSet,
|
|
||||||
Attr,
|
|
||||||
Lit(Lit<'src>),
|
|
||||||
Matrix,
|
|
||||||
Dimensions(u16, u16),
|
|
||||||
MatrixRow,
|
|
||||||
}
|
|
|
@ -1,19 +0,0 @@
|
||||||
use self::lex::SyntaxKind;
|
|
||||||
|
|
||||||
pub mod parser;
|
|
||||||
|
|
||||||
pub mod lex;
|
|
||||||
|
|
||||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
|
||||||
enum Lang {}
|
|
||||||
impl rowan::Language for Lang {
|
|
||||||
type Kind = SyntaxKind;
|
|
||||||
#[allow(unsafe_code)]
|
|
||||||
fn kind_from_raw(raw: rowan::SyntaxKind) -> Self::Kind {
|
|
||||||
assert!(raw.0 <= SyntaxKind::ROOT as u16);
|
|
||||||
unsafe { std::mem::transmute::<u16, SyntaxKind>(raw.0) }
|
|
||||||
}
|
|
||||||
fn kind_to_raw(kind: Self::Kind) -> rowan::SyntaxKind {
|
|
||||||
kind.into()
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,290 +0,0 @@
|
||||||
use std::borrow::Borrow;
|
|
||||||
|
|
||||||
use rowan::{
|
|
||||||
Checkpoint, GreenNode, GreenNodeBuilder, GreenNodeData, GreenTokenData, Language, NodeOrToken,
|
|
||||||
};
|
|
||||||
|
|
||||||
use crate::parser::{
|
|
||||||
ast::lossless::{lex::SyntaxKind::*, Lang},
|
|
||||||
Span,
|
|
||||||
};
|
|
||||||
|
|
||||||
use self::parser_to_events::{to_events, Event};
|
|
||||||
|
|
||||||
use super::lex::{self, SyntaxKind};
|
|
||||||
|
|
||||||
pub mod parser_to_events {
|
|
||||||
use chumsky::prelude::*;
|
|
||||||
|
|
||||||
use crate::parser::ast::lossless::lex::{
|
|
||||||
self,
|
|
||||||
SyntaxKind::{self, *},
|
|
||||||
};
|
|
||||||
|
|
||||||
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
|
|
||||||
pub enum Event {
|
|
||||||
StartNode(SyntaxKind),
|
|
||||||
StartErr(SyntaxError),
|
|
||||||
EatToken,
|
|
||||||
FinishNode,
|
|
||||||
FinishErr,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
|
|
||||||
pub enum SyntaxError {
|
|
||||||
Expected(SyntaxKind),
|
|
||||||
AttrExpectedValue,
|
|
||||||
/// guessed if there's a newline and attr on next line without comma
|
|
||||||
/// should then suggest comma after attr
|
|
||||||
ExpectedCommaBetweenAttrs,
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn to_events(tokens: &[(SyntaxKind, &str)]) -> Vec<Event> {
|
|
||||||
let only_toks: Vec<SyntaxKind> = tokens.iter().map(|(t, _)| *t).collect();
|
|
||||||
let res = parser().parse(&only_toks);
|
|
||||||
res.unwrap()
|
|
||||||
}
|
|
||||||
|
|
||||||
macro_rules! padded {
|
|
||||||
($parser:expr) => {{
|
|
||||||
let ws = one_of([WHITESPACE, NEWLINE])
|
|
||||||
.to(Event::EatToken)
|
|
||||||
.repeated()
|
|
||||||
.collect::<Vec<Event>>();
|
|
||||||
ws.then($parser)
|
|
||||||
.then(ws)
|
|
||||||
.map(|((mut before, mut c), mut after)| {
|
|
||||||
before.append(&mut c);
|
|
||||||
before.append(&mut after);
|
|
||||||
before
|
|
||||||
})
|
|
||||||
}};
|
|
||||||
}
|
|
||||||
macro_rules! parenthesized {
|
|
||||||
($parser:expr) => {
|
|
||||||
just(L_PAREN)
|
|
||||||
.to(vec![Event::EatToken])
|
|
||||||
.then($parser)
|
|
||||||
.then(just(R_PAREN).to(vec![Event::EatToken]))
|
|
||||||
.map(|((mut before, mut c), mut after)| {
|
|
||||||
before.insert(0, Event::StartNode(PARENTHESIZED_EXPR));
|
|
||||||
before.append(&mut c);
|
|
||||||
before.append(&mut after);
|
|
||||||
before.push(Event::FinishNode);
|
|
||||||
before
|
|
||||||
})
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn parser<'toks>() -> impl Parser<'toks, &'toks [SyntaxKind], Vec<Event>> {
|
|
||||||
let ws = one_of([WHITESPACE, NEWLINE])
|
|
||||||
.to(Event::EatToken)
|
|
||||||
.repeated()
|
|
||||||
.collect::<Vec<Event>>();
|
|
||||||
let ident = just(IDENT).to(vec![Event::EatToken]);
|
|
||||||
|
|
||||||
let expr = recursive(|expr| {
|
|
||||||
let lit = one_of([INT_NUM, FLOAT_NUM, STRING]).to(vec![
|
|
||||||
Event::StartNode(EXPR),
|
|
||||||
Event::EatToken,
|
|
||||||
Event::FinishNode,
|
|
||||||
]);
|
|
||||||
let attrset = just(L_CURLY)
|
|
||||||
.then(
|
|
||||||
padded!(just(IDENT).to(vec![
|
|
||||||
Event::StartNode(ATTR),
|
|
||||||
Event::StartNode(ATTR_NAME),
|
|
||||||
Event::EatToken,
|
|
||||||
Event::FinishNode
|
|
||||||
]))
|
|
||||||
.then(just(COLON))
|
|
||||||
.then(padded!(expr.clone().map(|mut exp: Vec<Event>| {
|
|
||||||
exp.insert(0, Event::StartNode(ATTR_VALUE));
|
|
||||||
exp.push(Event::FinishNode);
|
|
||||||
exp.push(Event::FinishNode);
|
|
||||||
exp
|
|
||||||
})))
|
|
||||||
.map(|((mut name, _), mut value)| {
|
|
||||||
// colon
|
|
||||||
name.push(Event::EatToken);
|
|
||||||
name.append(&mut value);
|
|
||||||
name
|
|
||||||
}),
|
|
||||||
)
|
|
||||||
.then(just(R_CURLY))
|
|
||||||
.map(|((_, mut attrs), _)| {
|
|
||||||
attrs.insert(0, Event::StartNode(ATTR_SET));
|
|
||||||
attrs.insert(0, Event::EatToken);
|
|
||||||
attrs.push(Event::EatToken);
|
|
||||||
attrs.push(Event::FinishNode);
|
|
||||||
attrs
|
|
||||||
});
|
|
||||||
|
|
||||||
let atom = lit.clone().or(attrset).or(parenthesized!(expr));
|
|
||||||
|
|
||||||
let instr_name = ident
|
|
||||||
.clone()
|
|
||||||
.map(|mut v| {
|
|
||||||
v.insert(0, Event::StartNode(INSTR_NAME));
|
|
||||||
v
|
|
||||||
})
|
|
||||||
.foldl(
|
|
||||||
ws.then(ident).repeated(),
|
|
||||||
|mut ident, (mut ws, mut next)| {
|
|
||||||
ident.append(&mut ws);
|
|
||||||
ident.append(&mut next);
|
|
||||||
ident
|
|
||||||
},
|
|
||||||
)
|
|
||||||
.map(|mut v| {
|
|
||||||
v.push(Event::FinishNode);
|
|
||||||
v
|
|
||||||
});
|
|
||||||
let instr = padded!(instr_name)
|
|
||||||
.then(
|
|
||||||
atom.clone()
|
|
||||||
.map(|mut v| {
|
|
||||||
v.insert(0, Event::StartNode(INSTR_PARAMS));
|
|
||||||
v
|
|
||||||
})
|
|
||||||
.foldl(
|
|
||||||
ws.then(atom.clone()).repeated(),
|
|
||||||
|mut cur, (mut ws, mut next)| {
|
|
||||||
cur.append(&mut ws);
|
|
||||||
cur.append(&mut next);
|
|
||||||
cur
|
|
||||||
},
|
|
||||||
)
|
|
||||||
.map(|mut v| {
|
|
||||||
v.push(Event::FinishNode);
|
|
||||||
v
|
|
||||||
}),
|
|
||||||
)
|
|
||||||
.map(|(mut name, mut params)| {
|
|
||||||
name.insert(0, Event::StartNode(INSTR));
|
|
||||||
name.append(&mut params);
|
|
||||||
name.push(Event::FinishNode);
|
|
||||||
name
|
|
||||||
});
|
|
||||||
padded!(instr.or(lit).or(atom))
|
|
||||||
});
|
|
||||||
expr
|
|
||||||
// .map(|(lit, mut ev)| lit.append(&mut ev));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(PartialEq, Eq)]
|
|
||||||
pub struct Parse {
|
|
||||||
pub green_node: GreenNode,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl std::fmt::Debug for Parse {
|
|
||||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
|
||||||
debug_print_green_node(NodeOrToken::Node(self.green_node.borrow()), f, 0)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn debug_print_green_node(
|
|
||||||
node: NodeOrToken<&GreenNodeData, &GreenTokenData>,
|
|
||||||
f: &mut std::fmt::Formatter<'_>,
|
|
||||||
lvl: i32,
|
|
||||||
) -> std::fmt::Result {
|
|
||||||
for _ in 0..lvl {
|
|
||||||
f.write_str(" ")?;
|
|
||||||
}
|
|
||||||
|
|
||||||
match node {
|
|
||||||
NodeOrToken::Node(n) => {
|
|
||||||
writeln!(f, "{:?} {{", Lang::kind_from_raw(node.kind()));
|
|
||||||
for c in n.children() {
|
|
||||||
debug_print_green_node(c, f, lvl + 1)?;
|
|
||||||
}
|
|
||||||
for _ in 0..lvl {
|
|
||||||
f.write_str(" ")?;
|
|
||||||
}
|
|
||||||
f.write_str("}\n")
|
|
||||||
}
|
|
||||||
NodeOrToken::Token(t) => {
|
|
||||||
writeln!(f, "{:?} {:?};", Lang::kind_from_raw(t.kind()), t.text())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug)]
|
|
||||||
struct Parser<'src> {
|
|
||||||
tokens: Vec<(SyntaxKind, &'src str)>,
|
|
||||||
builder: GreenNodeBuilder<'src>,
|
|
||||||
errors: Vec<SyntaxError>,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, PartialEq, Eq)]
|
|
||||||
enum SyntaxError {
|
|
||||||
Expected(SyntaxKind),
|
|
||||||
AttrExpectedValue,
|
|
||||||
/// guessed if there's a newline and attr on next line without comma
|
|
||||||
/// should then suggest comma after attr
|
|
||||||
ExpectedCommaBetweenAttrs,
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn parse(src: &str) -> Parse {
|
|
||||||
let tokens = lex::lex(src);
|
|
||||||
Parser {
|
|
||||||
tokens,
|
|
||||||
builder: GreenNodeBuilder::new(),
|
|
||||||
errors: Vec::new(),
|
|
||||||
}
|
|
||||||
.parse()
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Parser<'_> {
|
|
||||||
fn parse(mut self) -> Parse {
|
|
||||||
let evs = to_events(&self.tokens);
|
|
||||||
self.builder.start_node(ROOT.into());
|
|
||||||
println!("evs: {evs:?}");
|
|
||||||
|
|
||||||
self.tokens.reverse();
|
|
||||||
|
|
||||||
for ev in evs {
|
|
||||||
match ev {
|
|
||||||
Event::StartNode(kind) => self.builder.start_node(kind.into()),
|
|
||||||
Event::StartErr(SyntaxError) => todo!(),
|
|
||||||
Event::EatToken => self.bump(),
|
|
||||||
Event::FinishNode => self.builder.finish_node(),
|
|
||||||
Event::FinishErr => todo!(),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
self.builder.finish_node();
|
|
||||||
Parse {
|
|
||||||
green_node: self.builder.finish(),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Advance one token, adding it to the current branch of the tree builder.
|
|
||||||
fn bump(&mut self) {
|
|
||||||
let (kind, text) = self.tokens.pop().unwrap();
|
|
||||||
self.builder.token(kind.into(), text);
|
|
||||||
}
|
|
||||||
fn syntax_err(&mut self, err: SyntaxError) {
|
|
||||||
let (_, text) = self.tokens.pop().unwrap();
|
|
||||||
self.builder.token(PARSE_ERR.into(), text);
|
|
||||||
self.errors.push(err);
|
|
||||||
}
|
|
||||||
fn syntax_err_by_checkpoint(&mut self, checkpoint: Checkpoint, err: SyntaxError) {
|
|
||||||
self.builder.start_node_at(checkpoint, PARSE_ERR.into());
|
|
||||||
self.builder.finish_node();
|
|
||||||
self.errors.push(err);
|
|
||||||
}
|
|
||||||
fn expected(&mut self, expected: SyntaxKind) {
|
|
||||||
self.syntax_err(SyntaxError::Expected(expected))
|
|
||||||
}
|
|
||||||
/// Peek at the first unprocessed token
|
|
||||||
fn current(&self) -> Option<SyntaxKind> {
|
|
||||||
self.tokens.last().map(|(kind, _)| *kind)
|
|
||||||
}
|
|
||||||
fn next(&self) -> Option<SyntaxKind> {
|
|
||||||
self.tokens
|
|
||||||
.get(self.tokens.len() - 2)
|
|
||||||
.map(|(kind, _)| *kind)
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,50 +0,0 @@
|
||||||
use indexmap::IndexMap;
|
|
||||||
|
|
||||||
use super::super::Spanned;
|
|
||||||
|
|
||||||
use super::super::Span;
|
|
||||||
use super::Lit;
|
|
||||||
|
|
||||||
#[derive(Debug, PartialEq)]
|
|
||||||
pub struct RawExpression<'src> {
|
|
||||||
pub expr: Box<RawExpr<'src>>,
|
|
||||||
pub span: Span,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<'src> RawExpression<'src> {
|
|
||||||
pub fn new(expr: RawExpr<'src>, span: Span) -> Self {
|
|
||||||
Self {
|
|
||||||
expr: Box::new(expr),
|
|
||||||
span,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, PartialEq)]
|
|
||||||
pub enum RawExpr<'src> {
|
|
||||||
Node(
|
|
||||||
Vec<Spanned<&'src str>>,
|
|
||||||
Option<Spanned<IndexMap<Spanned<&'src str>, RawExpression<'src>>>>,
|
|
||||||
),
|
|
||||||
SimplePipe(RawExpression<'src>, RawExpression<'src>),
|
|
||||||
// NamingPipe(
|
|
||||||
// Box<Expression<'src>>,
|
|
||||||
// (Vec<Spanned<&'src str>>, Vec<Spanned<&'src str>>),
|
|
||||||
// Box<Expression<'src>>,
|
|
||||||
// ),
|
|
||||||
MappingPipe(RawExpression<'src>, RawExpression<'src>),
|
|
||||||
NullPipe(RawExpression<'src>, RawExpression<'src>),
|
|
||||||
MultiPipe(IndexMap<Spanned<&'src str>, RawExpression<'src>>),
|
|
||||||
// LetIn(
|
|
||||||
// IndexMap<Spanned<&'src str>, Box<Expression<'src>>>,
|
|
||||||
// Box<Expression<'src>>,
|
|
||||||
// ),
|
|
||||||
// $
|
|
||||||
Var(&'src str),
|
|
||||||
// @
|
|
||||||
InputVar(&'src str),
|
|
||||||
AttrSet(Spanned<IndexMap<Spanned<&'src str>, RawExpression<'src>>>),
|
|
||||||
Lit(Lit<'src>),
|
|
||||||
Matrix(Spanned<(u16, u16)>, Vec<RawExpression<'src>>),
|
|
||||||
List(Vec<RawExpression<'src>>),
|
|
||||||
}
|
|
6
crates/lang/src/parser/error.rs
Normal file
6
crates/lang/src/parser/error.rs
Normal file
|
@ -0,0 +1,6 @@
|
||||||
|
use crate::parser::syntax_kind::SyntaxKind;
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub enum SyntaxError {
|
||||||
|
Expected(Vec<SyntaxKind>),
|
||||||
|
}
|
23
crates/lang/src/parser/events.rs
Normal file
23
crates/lang/src/parser/events.rs
Normal file
|
@ -0,0 +1,23 @@
|
||||||
|
use crate::parser::syntax_kind::SyntaxKind;
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub enum Event {
|
||||||
|
Start {
|
||||||
|
kind: SyntaxKind,
|
||||||
|
forward_parent: Option<usize>,
|
||||||
|
},
|
||||||
|
Finish,
|
||||||
|
Eat {
|
||||||
|
count: usize,
|
||||||
|
},
|
||||||
|
Error,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Event {
|
||||||
|
pub(crate) fn tombstone() -> Self {
|
||||||
|
Self::Start {
|
||||||
|
kind: SyntaxKind::TOMBSTONE,
|
||||||
|
forward_parent: None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
14
crates/lang/src/parser/grammar.rs
Normal file
14
crates/lang/src/parser/grammar.rs
Normal file
|
@ -0,0 +1,14 @@
|
||||||
|
use crate::parser::syntax_kind::SyntaxKind::*;
|
||||||
|
|
||||||
|
use super::Parser;
|
||||||
|
|
||||||
|
mod expression;
|
||||||
|
|
||||||
|
pub fn source_file(p: &mut Parser) {
|
||||||
|
let root = p.start();
|
||||||
|
|
||||||
|
expression::expression(p);
|
||||||
|
p.eat_succeeding_ws();
|
||||||
|
|
||||||
|
root.complete(p, ROOT);
|
||||||
|
}
|
14
crates/lang/src/parser/grammar/expression.rs
Normal file
14
crates/lang/src/parser/grammar/expression.rs
Normal file
|
@ -0,0 +1,14 @@
|
||||||
|
use crate::parser::{syntax_kind::SyntaxKind::*, Parser};
|
||||||
|
|
||||||
|
use self::{instruction::instr, lit::literal};
|
||||||
|
|
||||||
|
mod instruction;
|
||||||
|
mod lit;
|
||||||
|
|
||||||
|
pub fn expression(p: &mut Parser) {
|
||||||
|
let expr = p.start();
|
||||||
|
|
||||||
|
instr(p);
|
||||||
|
|
||||||
|
expr.complete(p, EXPR);
|
||||||
|
}
|
30
crates/lang/src/parser/grammar/expression/instruction.rs
Normal file
30
crates/lang/src/parser/grammar/expression/instruction.rs
Normal file
|
@ -0,0 +1,30 @@
|
||||||
|
use crate::parser::{syntax_kind::SyntaxKind::*, Parser};
|
||||||
|
|
||||||
|
use super::lit::literal;
|
||||||
|
|
||||||
|
pub fn instr(p: &mut Parser) {
|
||||||
|
let instr = p.start();
|
||||||
|
|
||||||
|
instr_name(p);
|
||||||
|
instr_params(p);
|
||||||
|
|
||||||
|
instr.complete(p, INSTR);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn instr_name(p: &mut Parser) {
|
||||||
|
let instr_name = p.start();
|
||||||
|
|
||||||
|
while p.at(IDENT) {
|
||||||
|
p.do_bump();
|
||||||
|
}
|
||||||
|
|
||||||
|
instr_name.complete(p, INSTR_NAME);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn instr_params(p: &mut Parser) {
|
||||||
|
if let Some(start) = literal(p) {
|
||||||
|
while literal(p).is_some() {}
|
||||||
|
|
||||||
|
start.precede(p).complete(p, INSTR_PARAMS);
|
||||||
|
}
|
||||||
|
}
|
20
crates/lang/src/parser/grammar/expression/lit.rs
Normal file
20
crates/lang/src/parser/grammar/expression/lit.rs
Normal file
|
@ -0,0 +1,20 @@
|
||||||
|
use enumset::enum_set;
|
||||||
|
|
||||||
|
use crate::parser::{
|
||||||
|
syntax_kind::{SyntaxKind::*, TokenSet},
|
||||||
|
CompletedMarker, Parser,
|
||||||
|
};
|
||||||
|
|
||||||
|
const LIT_TOKENS: TokenSet = enum_set!(INT_NUM | FLOAT_NUM | STRING);
|
||||||
|
|
||||||
|
pub fn literal(p: &mut Parser) -> Option<CompletedMarker> {
|
||||||
|
if !LIT_TOKENS.contains(p.current()) {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
|
||||||
|
let lit = p.start();
|
||||||
|
|
||||||
|
p.do_bump();
|
||||||
|
|
||||||
|
Some(lit.complete(p, LITERAL))
|
||||||
|
}
|
61
crates/lang/src/parser/input.rs
Normal file
61
crates/lang/src/parser/input.rs
Normal file
|
@ -0,0 +1,61 @@
|
||||||
|
use crate::parser::syntax_kind::SyntaxKind;
|
||||||
|
|
||||||
|
pub struct Input<'src, 'toks> {
|
||||||
|
raw: &'toks Vec<(SyntaxKind, &'src str)>,
|
||||||
|
/// indices of the "meaningful" tokens (not whitespace etc)
|
||||||
|
/// includes newlines because those might indeed help with finding errors
|
||||||
|
meaningful: Vec<usize>,
|
||||||
|
/// indices of newlines for the purpose of easily querying them
|
||||||
|
/// can be helpful with missing commas etc
|
||||||
|
newlines: Vec<usize>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'src, 'toks> Input<'src, 'toks> {
|
||||||
|
pub fn new(raw_toks: &'toks Vec<(SyntaxKind, &'src str)>) -> Self {
|
||||||
|
let meaningful = raw_toks
|
||||||
|
.iter()
|
||||||
|
.enumerate()
|
||||||
|
.filter_map(|(i, tok)| match tok.0 {
|
||||||
|
SyntaxKind::WHITESPACE | SyntaxKind::NEWLINE => None,
|
||||||
|
_ => Some(i),
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
let newlines = raw_toks
|
||||||
|
.iter()
|
||||||
|
.enumerate()
|
||||||
|
.filter_map(|(i, tok)| match tok.0 {
|
||||||
|
SyntaxKind::NEWLINE => Some(i),
|
||||||
|
_ => None,
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
Self {
|
||||||
|
raw: raw_toks,
|
||||||
|
meaningful,
|
||||||
|
newlines,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[allow(clippy::unwrap_used, reason = "meaningful indices cannot be invalid")]
|
||||||
|
pub(crate) fn kind(&self, idx: usize) -> SyntaxKind {
|
||||||
|
let Some(meaningful_idx) = self.meaningful.get(idx) else {
|
||||||
|
return SyntaxKind::EOF;
|
||||||
|
};
|
||||||
|
|
||||||
|
self.raw.get(*meaningful_idx).unwrap().0
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn preceding_meaningless(&self, idx: usize) -> usize {
|
||||||
|
assert!(self.meaningful.len() > idx);
|
||||||
|
|
||||||
|
if idx == 0 {
|
||||||
|
1
|
||||||
|
} else {
|
||||||
|
self.meaningful[idx] - self.meaningful[idx - 1]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn meaningless_tail_len(&self) -> usize {
|
||||||
|
self.raw.len() - (self.meaningful.last().unwrap() + 1)
|
||||||
|
}
|
||||||
|
}
|
113
crates/lang/src/parser/output.rs
Normal file
113
crates/lang/src/parser/output.rs
Normal file
|
@ -0,0 +1,113 @@
|
||||||
|
use rowan::{GreenNode, GreenNodeBuilder, GreenNodeData, GreenTokenData, Language, NodeOrToken};
|
||||||
|
use std::mem;
|
||||||
|
|
||||||
|
use crate::parser::syntax_kind::{Lang, SyntaxKind};
|
||||||
|
|
||||||
|
use super::{error::SyntaxError, events::Event};
|
||||||
|
|
||||||
|
pub struct Output {
|
||||||
|
pub green_node: GreenNode,
|
||||||
|
pub errors: Vec<SyntaxError>,
|
||||||
|
}
|
||||||
|
impl std::fmt::Debug for Output {
|
||||||
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
|
debug_print_green_node(NodeOrToken::Node(&self.green_node), f, 0)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn debug_print_green_node(
|
||||||
|
node: NodeOrToken<&GreenNodeData, &GreenTokenData>,
|
||||||
|
f: &mut std::fmt::Formatter<'_>,
|
||||||
|
lvl: i32,
|
||||||
|
) -> std::fmt::Result {
|
||||||
|
for _ in 0..lvl {
|
||||||
|
f.write_str(" ")?;
|
||||||
|
}
|
||||||
|
|
||||||
|
match node {
|
||||||
|
NodeOrToken::Node(n) => {
|
||||||
|
writeln!(f, "{:?} {{", Lang::kind_from_raw(node.kind()))?;
|
||||||
|
for c in n.children() {
|
||||||
|
debug_print_green_node(c, f, lvl + 1)?;
|
||||||
|
}
|
||||||
|
for _ in 0..lvl {
|
||||||
|
f.write_str(" ")?;
|
||||||
|
}
|
||||||
|
f.write_str("}\n")
|
||||||
|
}
|
||||||
|
NodeOrToken::Token(t) => {
|
||||||
|
writeln!(f, "{:?} {:?};", Lang::kind_from_raw(t.kind()), t.text())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Output {
|
||||||
|
pub fn from_parser_output(
|
||||||
|
mut raw_toks: Vec<(SyntaxKind, &str)>,
|
||||||
|
(mut events, errs): (Vec<Event>, Vec<SyntaxError>),
|
||||||
|
) -> Self {
|
||||||
|
let mut builder = GreenNodeBuilder::new();
|
||||||
|
let mut fw_parents = Vec::new();
|
||||||
|
raw_toks.reverse();
|
||||||
|
|
||||||
|
for i in 0..events.len() {
|
||||||
|
match mem::replace(&mut events[i], Event::tombstone()) {
|
||||||
|
Event::Start {
|
||||||
|
kind,
|
||||||
|
forward_parent,
|
||||||
|
} => {
|
||||||
|
if kind == SyntaxKind::TOMBSTONE && forward_parent.is_none() {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
fw_parents.push(kind);
|
||||||
|
let mut idx = i;
|
||||||
|
let mut fp = forward_parent;
|
||||||
|
while let Some(fwd) = fp {
|
||||||
|
idx += fwd as usize;
|
||||||
|
fp = match mem::replace(&mut events[idx], Event::tombstone()) {
|
||||||
|
Event::Start {
|
||||||
|
kind,
|
||||||
|
forward_parent,
|
||||||
|
} => {
|
||||||
|
fw_parents.push(kind);
|
||||||
|
forward_parent
|
||||||
|
}
|
||||||
|
_ => unreachable!(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// remove whitespace bc it's ugly
|
||||||
|
while let Some((SyntaxKind::WHITESPACE | SyntaxKind::NEWLINE, _)) =
|
||||||
|
raw_toks.last()
|
||||||
|
{
|
||||||
|
match events.iter_mut().find(|ev| matches!(ev, Event::Eat { .. })) {
|
||||||
|
Some(Event::Eat { count }) => *count -= 1,
|
||||||
|
_ => unreachable!(),
|
||||||
|
}
|
||||||
|
|
||||||
|
let (tok, text): (SyntaxKind, &str) = raw_toks.pop().unwrap();
|
||||||
|
builder.token(tok.into(), text);
|
||||||
|
}
|
||||||
|
|
||||||
|
for kind in fw_parents.drain(..).rev() {
|
||||||
|
if kind != SyntaxKind::TOMBSTONE {
|
||||||
|
builder.start_node(kind.into());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Event::Finish => builder.finish_node(),
|
||||||
|
Event::Eat { count } => (0..count).for_each(|_| {
|
||||||
|
let (tok, text): (SyntaxKind, &str) = raw_toks.pop().unwrap();
|
||||||
|
builder.token(tok.into(), text);
|
||||||
|
}),
|
||||||
|
Event::Error => todo!(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Self {
|
||||||
|
green_node: builder.finish(),
|
||||||
|
errors: errs,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
6
crates/lang/src/parser/parser.rs
Normal file
6
crates/lang/src/parser/parser.rs
Normal file
|
@ -0,0 +1,6 @@
|
||||||
|
//! The parser architecture is *heavily* inspired (and partially copied and adapted) from the amazing rust-analyzer
|
||||||
|
use drop_bomb::DropBomb;
|
||||||
|
|
||||||
|
use self::{error::SyntaxError, events::Event, input::Input};
|
||||||
|
|
||||||
|
use super::syntax_kind::SyntaxKind;
|
|
@ -1,7 +1,6 @@
|
||||||
|
use enumset::EnumSet;
|
||||||
use logos::Logos;
|
use logos::Logos;
|
||||||
|
|
||||||
use crate::parser::Span;
|
|
||||||
|
|
||||||
pub fn lex(src: &str) -> Vec<(SyntaxKind, &str)> {
|
pub fn lex(src: &str) -> Vec<(SyntaxKind, &str)> {
|
||||||
let mut lex = SyntaxKind::lexer(src);
|
let mut lex = SyntaxKind::lexer(src);
|
||||||
let mut r = Vec::new();
|
let mut r = Vec::new();
|
||||||
|
@ -13,8 +12,9 @@ pub fn lex(src: &str) -> Vec<(SyntaxKind, &str)> {
|
||||||
r
|
r
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Logos, Debug, PartialEq, Eq, Clone, Copy, Hash, PartialOrd, Ord)]
|
#[derive(enumset::EnumSetType, Logos, Debug, PartialEq, Eq, Clone, Copy, Hash, PartialOrd, Ord)]
|
||||||
#[repr(u16)]
|
#[repr(u16)]
|
||||||
|
#[enumset(no_super_impls)]
|
||||||
#[allow(non_camel_case_types)]
|
#[allow(non_camel_case_types)]
|
||||||
pub enum SyntaxKind {
|
pub enum SyntaxKind {
|
||||||
#[token("def")]
|
#[token("def")]
|
||||||
|
@ -39,6 +39,7 @@ pub enum SyntaxKind {
|
||||||
MAT_BODY,
|
MAT_BODY,
|
||||||
PARENTHESIZED_EXPR,
|
PARENTHESIZED_EXPR,
|
||||||
EXPR,
|
EXPR,
|
||||||
|
LITERAL,
|
||||||
#[token("(")]
|
#[token("(")]
|
||||||
L_PAREN,
|
L_PAREN,
|
||||||
#[token(")")]
|
#[token(")")]
|
||||||
|
@ -109,9 +110,29 @@ pub enum SyntaxKind {
|
||||||
PARSE_ERR,
|
PARSE_ERR,
|
||||||
LEX_ERR,
|
LEX_ERR,
|
||||||
ROOT,
|
ROOT,
|
||||||
|
EOF,
|
||||||
|
TOMBSTONE,
|
||||||
|
ERROR,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub type TokenSet = EnumSet<SyntaxKind>;
|
||||||
|
|
||||||
impl From<SyntaxKind> for rowan::SyntaxKind {
|
impl From<SyntaxKind> for rowan::SyntaxKind {
|
||||||
fn from(kind: SyntaxKind) -> Self {
|
fn from(kind: SyntaxKind) -> Self {
|
||||||
Self(kind as u16)
|
Self(kind as u16)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||||
|
pub enum Lang {}
|
||||||
|
impl rowan::Language for Lang {
|
||||||
|
type Kind = SyntaxKind;
|
||||||
|
#[allow(unsafe_code)]
|
||||||
|
fn kind_from_raw(raw: rowan::SyntaxKind) -> Self::Kind {
|
||||||
|
assert!(raw.0 <= SyntaxKind::ROOT as u16);
|
||||||
|
unsafe { std::mem::transmute::<u16, SyntaxKind>(raw.0) }
|
||||||
|
}
|
||||||
|
fn kind_to_raw(kind: Self::Kind) -> rowan::SyntaxKind {
|
||||||
|
kind.into()
|
||||||
|
}
|
||||||
|
}
|
|
@ -1,143 +1 @@
|
||||||
use crate::parser::ast::File;
|
|
||||||
use crate::parser::parse;
|
|
||||||
use crate::tokens::Token;
|
|
||||||
use chumsky::input::Stream;
|
|
||||||
use chumsky::prelude::*;
|
|
||||||
use indexmap::IndexMap;
|
|
||||||
use logos::Logos;
|
|
||||||
|
|
||||||
// #[test]
|
|
||||||
// fn test_parse_node_with_params() {
|
|
||||||
// const INPUT: &str = "meow [ hello: $foo, world: @bar]";
|
|
||||||
// assert_eq!(
|
|
||||||
// parse(INPUT).unwrap(),
|
|
||||||
// File {
|
|
||||||
// decls: IndexMap::from_iter([(
|
|
||||||
// ("main", (0..0).into()),
|
|
||||||
// (
|
|
||||||
// Expr::Node(
|
|
||||||
// ("meow", (0..4).into()),
|
|
||||||
// Some((
|
|
||||||
// IndexMap::from_iter([
|
|
||||||
// (
|
|
||||||
// ("hello", (7..12).into()),
|
|
||||||
// Expr::Var(("foo", (14..18).into()))
|
|
||||||
// ),
|
|
||||||
// (
|
|
||||||
// ("world", (20..25).into()),
|
|
||||||
// Expr::InputVar(("bar", (27..31).into()))
|
|
||||||
// )
|
|
||||||
// ]),
|
|
||||||
// (5..32).into()
|
|
||||||
// ))
|
|
||||||
// ),
|
|
||||||
// (0..32).into()
|
|
||||||
// )
|
|
||||||
// )])
|
|
||||||
// }
|
|
||||||
// );
|
|
||||||
// }
|
|
||||||
|
|
||||||
// fn test_parse_multiple_top_level_complex() {
|
|
||||||
// const INPUT: &str = r"def main = meow
|
|
||||||
// | uwu
|
|
||||||
// [ foo: @bar
|
|
||||||
// , hello: world @| test [ more: params ] | yay
|
|
||||||
// ]
|
|
||||||
// !| awa
|
|
||||||
// @| nya
|
|
||||||
// | rawr;
|
|
||||||
|
|
||||||
// def test = meow
|
|
||||||
// [ hello: $foo
|
|
||||||
// , world: @bar
|
|
||||||
// ];
|
|
||||||
// ";
|
|
||||||
// assert_eq!(
|
|
||||||
// parse(INPUT).unwrap(),
|
|
||||||
// File {
|
|
||||||
// decls: IndexMap::from_iter([
|
|
||||||
// (
|
|
||||||
// ("main", (4..8).into()),
|
|
||||||
// (
|
|
||||||
// Expr::SimplePipe(
|
|
||||||
// Box::new(Expr::Node(("meow", (11..15).into()), None)),
|
|
||||||
// Box::new(Expr::NullPipe(
|
|
||||||
// Box::new(Expr::Node(
|
|
||||||
// ("uwu", (20..23).into()),
|
|
||||||
// Some((
|
|
||||||
// IndexMap::from_iter([
|
|
||||||
// (
|
|
||||||
// ("foo", (29..32).into()),
|
|
||||||
// Expr::InputVar(("bar", (34..38).into()))
|
|
||||||
// ),
|
|
||||||
// (
|
|
||||||
// ("hello", (44..49).into()),
|
|
||||||
// Expr::MappingPipe(
|
|
||||||
// Box::new(Expr::Node(
|
|
||||||
// ("world", (51..56).into()),
|
|
||||||
// None
|
|
||||||
// )),
|
|
||||||
// Box::new(Expr::SimplePipe(
|
|
||||||
// Box::new(Expr::Node(
|
|
||||||
// ("test", (60..64).into()),
|
|
||||||
// Some((
|
|
||||||
// IndexMap::from_iter([(
|
|
||||||
// ("more", (67..71).into()),
|
|
||||||
// Expr::Node(
|
|
||||||
// ("params", (73..79).into()),
|
|
||||||
// None
|
|
||||||
// )
|
|
||||||
// )]),
|
|
||||||
// (65..81).into()
|
|
||||||
// ))
|
|
||||||
// )),
|
|
||||||
// Box::new(Expr::Node(
|
|
||||||
// ("yay", (84..87).into()),
|
|
||||||
// None
|
|
||||||
// ))
|
|
||||||
// ))
|
|
||||||
// )
|
|
||||||
// )
|
|
||||||
// ]),
|
|
||||||
// (27..92).into()
|
|
||||||
// ))
|
|
||||||
// )),
|
|
||||||
// Box::new(Expr::MappingPipe(
|
|
||||||
// Box::new(Expr::Node(("awa", (97..100).into()), None)),
|
|
||||||
// Box::new(Expr::SimplePipe(
|
|
||||||
// Box::new(Expr::Node(("nya", (106..109).into()), None)),
|
|
||||||
// Box::new(Expr::Node(("rawr", (114..118).into()), None))
|
|
||||||
// ))
|
|
||||||
// ))
|
|
||||||
// ))
|
|
||||||
// ),
|
|
||||||
// (11..118).into()
|
|
||||||
// ),
|
|
||||||
// ),
|
|
||||||
// (
|
|
||||||
// ("test", (125..129).into()),
|
|
||||||
// (
|
|
||||||
// Expr::Node(
|
|
||||||
// ("meow", (132..136).into()),
|
|
||||||
// Some((
|
|
||||||
// IndexMap::from_iter([
|
|
||||||
// (
|
|
||||||
// ("hello", (141..146).into()),
|
|
||||||
// Expr::Var(("foo", (148..152).into()))
|
|
||||||
// ),
|
|
||||||
// (
|
|
||||||
// ("world", (156..161).into()),
|
|
||||||
// Expr::InputVar(("bar", (163..167).into()))
|
|
||||||
// )
|
|
||||||
// ]),
|
|
||||||
// (139..171).into()
|
|
||||||
// ))
|
|
||||||
// ),
|
|
||||||
// (132..171).into()
|
|
||||||
// )
|
|
||||||
// )
|
|
||||||
// ])
|
|
||||||
// }
|
|
||||||
// );
|
|
||||||
// }
|
|
||||||
|
|
|
@ -1,81 +0,0 @@
|
||||||
use logos::Logos;
|
|
||||||
|
|
||||||
#[derive(Logos, Debug, PartialEq, Eq, Clone)]
|
|
||||||
#[logos(skip r"[ \t\n\f]+")]
|
|
||||||
pub enum Token<'a> {
|
|
||||||
// hack!
|
|
||||||
// this isn't actually supposed to be in the language.
|
|
||||||
// i just can't figure out how to automatically choose between a top level declaration
|
|
||||||
// or a top level expression
|
|
||||||
// so a declaration needs the keyword def until i can figure this out
|
|
||||||
#[token("def")]
|
|
||||||
Def,
|
|
||||||
#[token("let")]
|
|
||||||
Let,
|
|
||||||
#[token("in")]
|
|
||||||
In,
|
|
||||||
#[token("mat")]
|
|
||||||
Mat,
|
|
||||||
#[regex("[\\d]+x[\\d]+", |lex| {
|
|
||||||
let (x, y) = lex.slice().split_once('x').expect("shouldn't fail to split");
|
|
||||||
// TODO: handle overflows etc
|
|
||||||
(x.parse().expect("should only match valid u16s"), y.parse().expect("should only match valid u16s"))
|
|
||||||
})]
|
|
||||||
Dimensions((u16, u16)),
|
|
||||||
#[regex("[\\d]+", |lex| lex.slice())]
|
|
||||||
Int(&'a str),
|
|
||||||
#[regex("[+-]?([\\d]+\\.[\\d]*|[\\d]*\\.[\\d]+)", |lex| lex.slice())]
|
|
||||||
Float(&'a str),
|
|
||||||
// TODO: more bigger better more complex string lexing
|
|
||||||
// TODO: templating?
|
|
||||||
#[regex(r#""([^"\\]|\\["\\bnfrt]|u[a-fA-F0-9]{4})*""#, |lex| lex.slice())]
|
|
||||||
String(&'a str),
|
|
||||||
#[token("+")]
|
|
||||||
Plus,
|
|
||||||
#[token("-")]
|
|
||||||
Minus,
|
|
||||||
#[token("*")]
|
|
||||||
Mult,
|
|
||||||
#[token("/")]
|
|
||||||
Div,
|
|
||||||
// TODO: figure out how to allow numbers in words?
|
|
||||||
#[regex("[a-zA-Z_]+[a-zA-Z_\\-\\d]*", |lex| lex.slice().trim())]
|
|
||||||
Word(&'a str),
|
|
||||||
#[regex("\\$[a-zA-Z0-9_\\-]+", |lex| &lex.slice()[1..])]
|
|
||||||
VarIdent(&'a str),
|
|
||||||
#[regex("\\@[a-zA-Z0-9_\\-]+", |lex| &lex.slice()[1..])]
|
|
||||||
InputIdent(&'a str),
|
|
||||||
#[token(",")]
|
|
||||||
Comma,
|
|
||||||
#[token("|")]
|
|
||||||
Pipe,
|
|
||||||
#[token("@|")]
|
|
||||||
MappingPipe,
|
|
||||||
#[token("!|")]
|
|
||||||
NullPipe,
|
|
||||||
#[token("@")]
|
|
||||||
At,
|
|
||||||
#[token(">")]
|
|
||||||
GreaterThan,
|
|
||||||
#[token("=")]
|
|
||||||
Equals,
|
|
||||||
#[token(":")]
|
|
||||||
Colon,
|
|
||||||
#[token(";")]
|
|
||||||
Semicolon,
|
|
||||||
#[token("[")]
|
|
||||||
BracketOpen,
|
|
||||||
#[token("]")]
|
|
||||||
BracketClose,
|
|
||||||
#[token("(")]
|
|
||||||
ParenOpen,
|
|
||||||
#[token(")")]
|
|
||||||
ParenClose,
|
|
||||||
#[token("{")]
|
|
||||||
BraceOpen,
|
|
||||||
#[token("}")]
|
|
||||||
BraceClose,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[cfg(test)]
|
|
||||||
mod tests;
|
|
|
@ -1,135 +0,0 @@
|
||||||
use logos::Logos;
|
|
||||||
|
|
||||||
use super::Token;
|
|
||||||
|
|
||||||
/// generates tests for the lexer to avoid writing boilerplate
|
|
||||||
macro_rules! lexer_test {
|
|
||||||
($name:ident, $input:literal, $out:expr) => {
|
|
||||||
#[test]
|
|
||||||
fn $name() {
|
|
||||||
let lex = Token::lexer($input);
|
|
||||||
let toks = lex.map(|tok| tok.unwrap()).collect::<Vec<_>>();
|
|
||||||
assert_eq!(toks, $out);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
lexer_test! {
|
|
||||||
test_lex_simple_pipeline,
|
|
||||||
"streamer | processor | sink",
|
|
||||||
[
|
|
||||||
Token::Word("streamer"),
|
|
||||||
Token::Pipe,
|
|
||||||
Token::Word("processor"),
|
|
||||||
Token::Pipe,
|
|
||||||
Token::Word("sink")
|
|
||||||
]
|
|
||||||
}
|
|
||||||
|
|
||||||
lexer_test! {
|
|
||||||
test_lex_var_ident,
|
|
||||||
"$identifier",
|
|
||||||
[ Token::VarIdent("identifier") ]
|
|
||||||
}
|
|
||||||
|
|
||||||
lexer_test! {
|
|
||||||
test_lex_subgroup,
|
|
||||||
"subgroup(first, second) = a | b [ $first ] | c [ $second ]",
|
|
||||||
[
|
|
||||||
Token::Word("subgroup"),
|
|
||||||
Token::ParenOpen,
|
|
||||||
Token::Word("first"),
|
|
||||||
Token::Comma,
|
|
||||||
Token::Word("second"),
|
|
||||||
Token::ParenClose,
|
|
||||||
Token::Equals,
|
|
||||||
Token::Word("a"),
|
|
||||||
Token::Pipe,
|
|
||||||
Token::Word("b"),
|
|
||||||
Token::BracketOpen,
|
|
||||||
Token::VarIdent("first"),
|
|
||||||
Token::BracketClose,
|
|
||||||
Token::Pipe,
|
|
||||||
Token::Word("c"),
|
|
||||||
Token::BracketOpen,
|
|
||||||
Token::VarIdent("second"),
|
|
||||||
Token::BracketClose
|
|
||||||
]
|
|
||||||
}
|
|
||||||
|
|
||||||
lexer_test! {
|
|
||||||
text_lex_crossing_pipeline_reordering,
|
|
||||||
"a >first, second|second, first> c",
|
|
||||||
[
|
|
||||||
Token::Word("a"),
|
|
||||||
Token::GreaterThan,
|
|
||||||
Token::Word("first"),
|
|
||||||
Token::Comma,
|
|
||||||
Token::Word("second"),
|
|
||||||
Token::Pipe,
|
|
||||||
Token::Word("second"),
|
|
||||||
Token::Comma,
|
|
||||||
Token::Word("first"),
|
|
||||||
Token::GreaterThan,
|
|
||||||
Token::Word("c")
|
|
||||||
]
|
|
||||||
}
|
|
||||||
|
|
||||||
lexer_test! {
|
|
||||||
test_lex_crossing_input_args,
|
|
||||||
"a >second| c { second: @first }",
|
|
||||||
[
|
|
||||||
Token::Word("a"),
|
|
||||||
Token::GreaterThan,
|
|
||||||
Token::Word("second"),
|
|
||||||
Token::Pipe,
|
|
||||||
Token::Word("c"),
|
|
||||||
Token::BraceOpen,
|
|
||||||
Token::Word("second"),
|
|
||||||
Token::Colon,
|
|
||||||
Token::InputIdent("first"),
|
|
||||||
Token::BraceClose
|
|
||||||
]
|
|
||||||
}
|
|
||||||
|
|
||||||
lexer_test! {
|
|
||||||
test_lex_map_io_named,
|
|
||||||
"a @| c",
|
|
||||||
[
|
|
||||||
Token::Word("a"),
|
|
||||||
Token::MappingPipe,
|
|
||||||
Token::Word("c")
|
|
||||||
]
|
|
||||||
}
|
|
||||||
|
|
||||||
lexer_test! {
|
|
||||||
test_lex_int_literal,
|
|
||||||
"42",
|
|
||||||
[
|
|
||||||
Token::Int("42")
|
|
||||||
]
|
|
||||||
}
|
|
||||||
|
|
||||||
lexer_test! {
|
|
||||||
test_lex_float_literal_0,
|
|
||||||
"1.5",
|
|
||||||
[
|
|
||||||
Token::Float("1.5")
|
|
||||||
]
|
|
||||||
}
|
|
||||||
|
|
||||||
lexer_test! {
|
|
||||||
test_lex_float_literal_1,
|
|
||||||
"42.",
|
|
||||||
[
|
|
||||||
Token::Float("42.")
|
|
||||||
]
|
|
||||||
}
|
|
||||||
|
|
||||||
lexer_test! {
|
|
||||||
test_lex_float_literal_2,
|
|
||||||
".42",
|
|
||||||
[
|
|
||||||
Token::Float(".42")
|
|
||||||
]
|
|
||||||
}
|
|
|
@ -1,7 +1 @@
|
||||||
hello world test
|
hello world test 1.5 42 69 "gay"
|
||||||
42
|
|
||||||
(another command 3.14 "meow")
|
|
||||||
"uwu"
|
|
||||||
{
|
|
||||||
some: attrs 42 (meow gay 1)
|
|
||||||
}
|
|
||||||
|
|
Loading…
Reference in a new issue