lang: massive amounts of parser and ast pain

This commit is contained in:
Schrottkatze 2024-04-11 03:23:03 +02:00
parent 881a987b2f
commit 9da157ff4a
Signed by: schrottkatze
SSH key fingerprint: SHA256:hXb3t1vINBFCiDCmhRABHX5ocdbLiKyCdKI4HK2Rbbc
16 changed files with 900 additions and 170 deletions

52
Cargo.lock generated
View file

@ -174,9 +174,9 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]] [[package]]
name = "chumsky" name = "chumsky"
version = "1.0.0-alpha.6" version = "1.0.0-alpha.7"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b9c28d4e5dd9a9262a38b231153591da6ce1471b818233f4727985d3dd0ed93c" checksum = "c7b80276986f86789dc56ca6542d53bba9cda3c66091ebbe7bd96fc1bdf20f1f"
dependencies = [ dependencies = [
"hashbrown", "hashbrown",
"regex-automata", "regex-automata",
@ -237,6 +237,12 @@ version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7" checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7"
[[package]]
name = "countme"
version = "3.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7704b5fdd17b18ae31c4c1da5a2e0305a2bf17b5249300a9ee9ed7b72114c636"
[[package]] [[package]]
name = "crc32fast" name = "crc32fast"
version = "1.3.2" version = "1.3.2"
@ -322,6 +328,12 @@ dependencies = [
"windows-sys 0.48.0", "windows-sys 0.48.0",
] ]
[[package]]
name = "ego-tree"
version = "0.6.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3a68a4904193147e0a8dec3314640e6db742afd5f6e634f428a6af230d9b3591"
[[package]] [[package]]
name = "either" name = "either"
version = "1.9.0" version = "1.9.0"
@ -506,9 +518,11 @@ dependencies = [
"ariadne", "ariadne",
"chumsky", "chumsky",
"clap", "clap",
"ego-tree",
"indexmap", "indexmap",
"logos", "logos",
"petgraph", "petgraph",
"rowan",
] ]
[[package]] [[package]]
@ -589,6 +603,15 @@ version = "2.7.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "523dc4f511e55ab87b694dc30d0f820d60906ef06413f93d4d7a1385599cc149" checksum = "523dc4f511e55ab87b694dc30d0f820d60906ef06413f93d4d7a1385599cc149"
[[package]]
name = "memoffset"
version = "0.9.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "488016bfae457b036d996092f6cb448677611ce4449e970ceaf42695203f218a"
dependencies = [
"autocfg",
]
[[package]] [[package]]
name = "miniz_oxide" name = "miniz_oxide"
version = "0.7.1" version = "0.7.1"
@ -863,6 +886,25 @@ dependencies = [
"serde_derive", "serde_derive",
] ]
[[package]]
name = "rowan"
version = "0.15.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "32a58fa8a7ccff2aec4f39cc45bf5f985cec7125ab271cf681c279fd00192b49"
dependencies = [
"countme",
"hashbrown",
"memoffset",
"rustc-hash",
"text-size",
]
[[package]]
name = "rustc-hash"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"
[[package]] [[package]]
name = "ryu" name = "ryu"
version = "1.0.16" version = "1.0.16"
@ -973,6 +1015,12 @@ dependencies = [
"unicode-ident", "unicode-ident",
] ]
[[package]]
name = "text-size"
version = "1.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f18aa187839b2bdb1ad2fa35ead8c4c2976b64e4363c386d45ac0f7ee85c9233"
[[package]] [[package]]
name = "thiserror" name = "thiserror"
version = "1.0.55" version = "1.0.55"

View file

@ -7,11 +7,13 @@ edition = "2021"
[dependencies] [dependencies]
logos = "0.14" logos = "0.14"
chumsky = {version= "1.0.0-alpha.6", features=["label"]} chumsky = {version= "1.0.0-alpha.7", features=["label"]}
petgraph = { workspace = true} petgraph = { workspace = true}
indexmap = "2.2.6" indexmap = "2.2.6"
clap = { version = "4", features = ["derive"] } clap = { version = "4", features = ["derive"] }
ariadne = "0.4.0" ariadne = "0.4.0"
ego-tree = "0.6.2"
rowan = "0.15.15"
[lints] [lints]
workspace = true workspace = true

View file

@ -1,7 +1,10 @@
use std::{fs, path::PathBuf}; use std::{fs, path::PathBuf};
use clap::Parser; use clap::Parser;
use lang::{err_reporting::ErrorCollector, parser::parse}; use lang::{
err_reporting::ErrorCollector,
parser::ast::lossless::{lex, parser},
};
#[derive(Parser)] #[derive(Parser)]
struct Args { struct Args {
@ -13,20 +16,24 @@ fn main() {
let args = Args::parse(); let args = Args::parse();
let n = args.file.clone(); let n = args.file.clone();
let f = fs::read_to_string(n.clone()).expect("failed to read file"); let f = fs::read_to_string(n.clone()).expect("failed to read file");
let mut err_collector = ErrorCollector::new(vec![(n.to_str().unwrap(), &f)]); println!("toks: {:?}", lex::lex(&f));
let parse_res = parser::parse(&f);
println!("parse: {:?}", parse_res);
// dbg!(lex::lex(&f));
// let mut err_collector = ErrorCollector::new(vec![(n.to_str().unwrap(), &f)]);
println!("file: {f}\n"); // println!("file: {f}\n");
let parse_res = parse(&f); // let parse_res = parse(&f);
err_collector.insert_many( // err_collector.insert_many(
args.file.to_str().unwrap(), // args.file.to_str().unwrap(),
lang::err_reporting::Stage::Parse, // lang::err_reporting::Stage::Parse,
parse_res // parse_res
.errors() // .errors()
.into_iter() // .into_iter()
.map(|e| e.to_owned()) // .map(|e| e.to_owned())
.collect::<Vec<_>>(), // .collect::<Vec<_>>(),
); // );
err_collector.report_raw(); // err_collector.report_raw();
println!("res: {:?}", parse_res); // println!("res: {:?}", parse_res);
} }

View file

@ -1,5 +1,3 @@
use std::ops::Range;
use chumsky::{ use chumsky::{
error::Rich, error::Rich,
input::{Stream, ValueInput}, input::{Stream, ValueInput},
@ -10,19 +8,22 @@ use chumsky::{
IterParser, IterParser,
}; };
use indexmap::IndexMap; use indexmap::IndexMap;
use logos::{Logos, Source}; use logos::Logos;
use crate::tokens::Token; use crate::tokens::Token;
pub mod ast; pub mod ast;
#[cfg(test)] #[cfg(test)]
mod tests; mod tests;
use self::ast::{Expr, Expression, File}; use self::ast::{
raw_ast::{RawExpr, RawExpression},
File,
};
pub type Span = SimpleSpan; pub type Span = SimpleSpan;
pub type Spanned<T> = (T, Span); pub type Spanned<T> = (T, Span);
pub fn parse<'src>(src: &'src str) -> ParseResult<File<'_>, Rich<'_, Token<'_>>> { pub fn parse(src: &str) -> ParseResult<File<'_>, Rich<'_, Token<'_>>> {
let toks: Vec<_> = Token::lexer(src) let toks: Vec<_> = Token::lexer(src)
.spanned() .spanned()
.map(|(t, s)| (t.expect("TODO: add lexer error(s)"), Span::from(s))) .map(|(t, s)| (t.expect("TODO: add lexer error(s)"), Span::from(s)))
@ -35,22 +36,39 @@ pub(crate) fn parser<
'src: 'tokens, 'src: 'tokens,
I: ValueInput<'tokens, Token = Token<'src>, Span = Span>, I: ValueInput<'tokens, Token = Token<'src>, Span = Span>,
>() -> impl Parser<'tokens, I, File<'src>, extra::Err<Rich<'tokens, Token<'src>, Span>>> { >() -> impl Parser<'tokens, I, File<'src>, extra::Err<Rich<'tokens, Token<'src>, Span>>> {
let word = select! { Token::Word(word) => word }; let word = select! { Token::Word(word) = e => (word, e.span())};
let expr = recursive(|expr| { let expr = recursive(|expr| {
let lit = select! { let lit = select! {
Token::Int(i) = e => Expression::new(Expr::Lit(ast::Lit::Int(i.parse().unwrap())), e.span()), Token::Int(i) = e => RawExpression::new(RawExpr::Lit(ast::Lit::Int(i.parse().expect("TODO: handle better"))), e.span()),
Token::Float(f) = e => Expression::new(Expr::Lit(ast::Lit::Float(f.parse().unwrap())), e.span()), Token::Float(f) = e => RawExpression::new(RawExpr::Lit(ast::Lit::Float(f.parse().expect("TODO: handle better"))), e.span()),
Token::String(s) = e => RawExpression::new(RawExpr::Lit(ast::Lit::String(s.strip_prefix('"').expect("a").strip_suffix('"').expect("b"))), e.span())
}; };
let mat = just(Token::Mat)
.ignore_then(select! { Token::Dimensions(dimensions) = e => (dimensions, e.span())})
.then(
lit.separated_by(just(Token::Comma))
.collect::<Vec<_>>()
.separated_by(just(Token::Semicolon))
.collect::<Vec<_>>()
.delimited_by(just(Token::BracketOpen), just(Token::BracketClose)),
)
.map_with(|(dimensions, data), e| {
// TODO: Validation and proper error handling/reporting
// (validation = validating the matrix dimensions)
RawExpression::new(
RawExpr::Matrix(dimensions, data.into_iter().flatten().collect()),
e.span(),
)
});
let var = select! { let var = select! {
Token::VarIdent(name) => (Expr::Var as fn(_) -> _, name), Token::VarIdent(name) => (RawExpr::Var as fn(_) -> _, name),
Token::InputIdent(name) => (Expr::InputVar as fn(_) -> _, name) Token::InputIdent(name) => (RawExpr::InputVar as fn(_) -> _, name)
} }
.map_with(|(item_type, name), extra| Expression::new(item_type(name), extra.span())) .map_with(|(item_type, name), extra| RawExpression::new(item_type(name), extra.span()))
.labelled("variable"); .labelled("variable");
let attrset = word let attrset = word
.map_with(|n, e| (n, e.span()))
.labelled("attr name") .labelled("attr name")
.then_ignore(just(Token::Colon)) .then_ignore(just(Token::Colon))
.then(expr) .then(expr)
@ -63,57 +81,72 @@ pub(crate) fn parser<
.labelled("attrset"); .labelled("attrset");
let node = word let node = word
.map_with(|v, e| (v, e.span())) .repeated()
.collect()
.then(attrset.clone().or_not()) .then(attrset.clone().or_not())
.map_with(|(name, params), extra| { .map_with(|(name, params), extra| {
Expression::new(Expr::Node(name, params), extra.span()) RawExpression::new(RawExpr::Node(name, params), extra.span())
}) })
.or(var) // .or(var)
.or(attrset // .or(attrset
.map_with(|attrset, extra| Expression::new(Expr::AttrSet(attrset), extra.span()))) // .map_with(|attrset, extra| Expression::new(Expr::AttrSet(attrset), extra.span())))
// .or(lit)
// .or(mat)
.labelled("node"); .labelled("node");
let atom = var
.or(lit)
.or(mat)
.or(attrset.map_with(|attrset, extra| {
RawExpression::new(RawExpr::AttrSet(attrset), extra.span())
}))
.or(node.clone());
#[allow(clippy::let_and_return)] #[allow(clippy::let_and_return)]
let pipeline = node let pipeline = atom
.clone() .clone()
.then(choice(( .then(choice((
just(Token::Pipe).to(Expr::SimplePipe as fn(_, _) -> _), just(Token::Pipe).to(RawExpr::SimplePipe as fn(_, _) -> _),
just(Token::MappingPipe).to(Expr::MappingPipe as fn(_, _) -> _), just(Token::MappingPipe).to(RawExpr::MappingPipe as fn(_, _) -> _),
just(Token::NullPipe).to(Expr::NullPipe as fn(_, _) -> _), just(Token::NullPipe).to(RawExpr::NullPipe as fn(_, _) -> _),
))) )))
.repeated() .repeated()
.foldr_with(node, |(curr, pipe), next, extra| { .foldr_with(atom, |(curr, pipe), next, extra| {
Expression::new(pipe(Box::new(curr), Box::new(next)), extra.span()) RawExpression::new(pipe(curr, next), extra.span())
}); });
pipeline pipeline
}); });
let decl = just(Token::Def).ignore_then( let decls = just(Token::Def)
word.map_with(|n, e| (n, e.span())) .ignore_then(
.then_ignore(just(Token::Equals)) word.then_ignore(just(Token::Equals))
.then(expr.clone().map(|expr| expr)) .then(expr.clone().map(|expr| expr))
.then_ignore(just(Token::SemiColon)), .then_ignore(just(Token::Semicolon)),
); )
.repeated()
expr.map(|expr| File { .collect::<Vec<_>>()
decls: IndexMap::from_iter([(("main", (0..0).into()), expr)]), .map(|decls| File {
})
.or(decl.repeated().collect::<Vec<_>>().map(|decls| File {
decls: IndexMap::from_iter(decls), decls: IndexMap::from_iter(decls),
})) });
let single_expr = expr.map(|expr| File {
decls: IndexMap::from_iter([(("main", (0..0).into()), expr)]),
});
just(Token::Def).rewind().ignore_then(decls).or(single_expr)
// single_expr.or(decls)
// expr.map(|expr| File {
// decls: IndexMap::from_iter([(("main", (0..0).into()), expr)]),
// })
// .or(decl.repeated().collect::<Vec<_>>().map(|decls| File {
// decls: IndexMap::from_iter(decls),
// }))
} }
pub mod asg { pub mod asg {
use petgraph::graph::DiGraph; use petgraph::graph::DiGraph;
use super::Spanned; use super::Spanned;
pub struct Asg<'src> {
graph: DiGraph<AsgNode<'src>, String>,
}
enum AsgNode<'src> {
Node(Spanned<&'src str>),
}
} }

View file

@ -2,51 +2,14 @@ use std::collections::{BTreeMap, HashMap};
use indexmap::IndexMap; use indexmap::IndexMap;
use super::{Span, Spanned}; use super::Spanned;
#[derive(Debug, PartialEq)] #[derive(Debug, PartialEq)]
pub struct File<'src> { pub struct File<'src> {
pub decls: IndexMap<Spanned<&'src str>, Expression<'src>>, pub decls: IndexMap<Spanned<&'src str>, raw_ast::RawExpression<'src>>,
} }
#[derive(Debug, PartialEq)] pub mod raw_ast;
pub struct Expression<'src> {
pub expr: Expr<'src>,
pub span: Span,
}
impl<'src> Expression<'src> {
pub fn new(expr: Expr<'src>, span: Span) -> Self {
Self { expr, span }
}
}
#[derive(Debug, PartialEq)]
pub enum Expr<'src> {
Node(
Spanned<&'src str>,
Option<Spanned<IndexMap<Spanned<&'src str>, Expression<'src>>>>,
),
SimplePipe(Box<Expression<'src>>, Box<Expression<'src>>),
// NamingPipe(
// Box<Expression<'src>>,
// (Vec<Spanned<&'src str>>, Vec<Spanned<&'src str>>),
// Box<Expression<'src>>,
// ),
MappingPipe(Box<Expression<'src>>, Box<Expression<'src>>),
NullPipe(Box<Expression<'src>>, Box<Expression<'src>>),
MultiPipe(IndexMap<Spanned<&'src str>, Expression<'src>>),
// LetIn(
// IndexMap<Spanned<&'src str>, Box<Expression<'src>>>,
// Box<Expression<'src>>,
// ),
// $
Var(&'src str),
// @
InputVar(&'src str),
AttrSet(Spanned<IndexMap<Spanned<&'src str>, Expression<'src>>>),
Lit(Lit<'src>),
}
#[derive(Debug, PartialEq)] #[derive(Debug, PartialEq)]
pub enum Lit<'src> { pub enum Lit<'src> {
@ -55,3 +18,7 @@ pub enum Lit<'src> {
Float(f64), Float(f64),
String(&'src str), String(&'src str),
} }
pub mod lossless;
pub mod ast_tree;

View file

@ -0,0 +1,31 @@
use ego_tree::Tree;
use crate::parser::Spanned;
use super::{File, Lit};
pub struct Ast<'src> {
tree: Tree<AstNode<'src>>,
}
struct AstNode<'src> {
kind: NodeKind<'src>,
}
enum NodeKind<'src> {
Decl,
Ident(&'src str),
Instr,
Expr,
MappingPipe,
NullPipe,
MultiPipe,
Var(&'src str),
InputVar(&'src str),
AttrSet,
Attr,
Lit(Lit<'src>),
Matrix,
Dimensions(u16, u16),
MatrixRow,
}

View file

@ -0,0 +1,19 @@
use self::lex::SyntaxKind;
pub mod parser;
pub mod lex;
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
enum Lang {}
impl rowan::Language for Lang {
type Kind = SyntaxKind;
#[allow(unsafe_code)]
fn kind_from_raw(raw: rowan::SyntaxKind) -> Self::Kind {
assert!(raw.0 <= SyntaxKind::ROOT as u16);
unsafe { std::mem::transmute::<u16, SyntaxKind>(raw.0) }
}
fn kind_to_raw(kind: Self::Kind) -> rowan::SyntaxKind {
kind.into()
}
}

View file

@ -0,0 +1,118 @@
use logos::Logos;
use crate::parser::Span;
pub fn lex(src: &str) -> Vec<(SyntaxKind, &str)> {
let mut lex = SyntaxKind::lexer(src);
let mut r = Vec::new();
while let Some(tok_res) = lex.next() {
r.push((tok_res.unwrap_or(SyntaxKind::LEX_ERR), lex.slice()))
}
r.reverse();
r
}
#[derive(Logos, Debug, PartialEq, Eq, Clone, Copy, Hash, PartialOrd, Ord)]
#[repr(u16)]
#[allow(non_camel_case_types)]
pub enum SyntaxKind {
#[token("def")]
DEF_KW = 0,
#[token("let")]
LET_KW,
#[token("in")]
IN_KW,
#[token("mat")]
MAT_KW,
#[regex("[\\d]+x[\\d]+")]
PAT_DIMENSIONS,
#[regex("[\\d]+")]
INT_NUM,
#[regex("[+-]?([\\d]+\\.[\\d]*|[\\d]*\\.[\\d]+)")]
FLOAT_NUM,
#[regex(r#""([^"\\]|\\["\\bnfrt]|u[a-fA-F0-9]{4})*""#)]
STRING,
MATRIX,
DECL,
LIST,
MAT_BODY,
PARENTHESIZED_EXPR,
EXPR,
#[token("(")]
L_PAREN,
#[token(")")]
R_PAREN,
#[token("{")]
L_CURLY,
#[token("}")]
R_CURLY,
#[token("[")]
L_BRACK,
#[token("]")]
R_BRACK,
#[token("<")]
L_ANGLE,
#[token(">")]
R_ANGLE,
#[token("+")]
PLUS,
#[token("-")]
MINUS,
#[token("*")]
STAR,
#[token("/")]
SLASH,
#[token("%")]
PERCENT,
#[token("^")]
CARET,
INSTR,
INSTR_NAME,
INSTR_PARAMS,
ATTR_SET,
ATTR,
ATTR_NAME,
ATTR_VALUE,
#[regex("[a-zA-Z_]+[a-zA-Z_\\-\\d]*")]
IDENT,
#[regex("\\$[a-zA-Z0-9_\\-]+")]
VAR,
#[regex("\\@[a-zA-Z0-9_\\-]+")]
INPUT_VAR,
#[token("$")]
DOLLAR,
#[token("@")]
AT,
#[token(",")]
COMMA,
#[token("|")]
PIPE,
#[token("@|")]
MAPPING_PIPE,
#[token("!|")]
NULL_PIPE,
#[token("=")]
EQ,
#[token(":")]
COLON,
#[token(";")]
SEMICOLON,
#[token(".")]
DOT,
#[token("!")]
BANG,
#[regex("[ \\t\\f]+")]
WHITESPACE,
#[token("\n")]
NEWLINE,
PARSE_ERR,
LEX_ERR,
ROOT,
}
impl From<SyntaxKind> for rowan::SyntaxKind {
fn from(kind: SyntaxKind) -> Self {
Self(kind as u16)
}
}

View file

@ -0,0 +1,437 @@
use std::borrow::Borrow;
use chumsky::container::Container;
use rowan::{
Checkpoint, GreenNode, GreenNodeBuilder, GreenNodeData, GreenTokenData, Language, NodeOrToken,
};
use crate::parser::{
ast::lossless::{lex::SyntaxKind::*, Lang},
Span,
};
use super::lex::{self, SyntaxKind};
#[derive(PartialEq, Eq)]
pub struct Parse {
pub green_node: GreenNode,
}
impl std::fmt::Debug for Parse {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
debug_print_green_node(NodeOrToken::Node(self.green_node.borrow()), f, 0)
}
}
fn debug_print_green_node(
node: NodeOrToken<&GreenNodeData, &GreenTokenData>,
f: &mut std::fmt::Formatter<'_>,
lvl: i32,
) -> std::fmt::Result {
for _ in 0..lvl {
f.write_str(" ")?;
}
match node {
NodeOrToken::Node(n) => {
writeln!(f, "{:?} {{", Lang::kind_from_raw(node.kind()));
for c in n.children() {
debug_print_green_node(c, f, lvl + 1)?;
}
for _ in 0..lvl {
f.write_str(" ")?;
}
f.write_str("}\n")
}
NodeOrToken::Token(t) => {
writeln!(f, "{:?} {:?};", Lang::kind_from_raw(t.kind()), t.text())
}
}
}
#[derive(Debug)]
struct Parser<'src> {
tokens: Vec<(SyntaxKind, &'src str)>,
builder: GreenNodeBuilder<'src>,
errors: Vec<SyntaxError>,
}
#[derive(Debug, PartialEq, Eq)]
enum SyntaxError {
Expected(SyntaxKind),
AttrExpectedValue,
/// guessed if there's a newline and attr on next line without comma
/// should then suggest comma after attr
ExpectedCommaBetweenAttrs,
}
pub fn parse(src: &str) -> Parse {
let mut tokens = lex::lex(src);
Parser {
tokens,
builder: GreenNodeBuilder::new(),
errors: Vec::new(),
}
.parse()
}
impl Parser<'_> {
fn parse(mut self) -> Parse {
self.start_node(ROOT);
match self.expr(None) {
expr::ExprRes::Ok => (),
expr::ExprRes::Eof => (),
expr::ExprRes::NoExpr => todo!(),
}
self.builder.finish_node();
Parse {
green_node: self.builder.finish(),
}
}
fn start_node(&mut self, kind: SyntaxKind) {
self.builder.start_node(kind.into());
}
fn finish_node(&mut self) {
self.builder.finish_node();
}
/// Advance one token, adding it to the current branch of the tree builder.
fn bump(&mut self) {
let (kind, text) = self.tokens.pop().unwrap();
self.builder.token(kind.into(), text);
}
fn syntax_err(&mut self, err: SyntaxError) {
let (_, text) = self.tokens.pop().unwrap();
self.builder.token(PARSE_ERR.into(), text);
self.errors.push(err);
}
fn syntax_err_by_checkpoint(&mut self, checkpoint: Checkpoint, err: SyntaxError) {
self.builder.start_node_at(checkpoint, PARSE_ERR.into());
self.finish_node();
self.errors.push(err);
}
fn expected(&mut self, expected: SyntaxKind) {
self.syntax_err(SyntaxError::Expected(expected))
}
/// Peek at the first unprocessed token
fn current(&self) -> Option<SyntaxKind> {
self.tokens.last().map(|(kind, _)| *kind)
}
fn next(&self) -> Option<SyntaxKind> {
self.tokens
.get(self.tokens.len() - 2)
.map(|(kind, _)| *kind)
}
fn skip_ws(&mut self) {
while self.current() == Some(WHITESPACE) || self.current() == Some(NEWLINE) {
self.bump()
}
}
fn skip_ws_without_newlines(&mut self) {
while self.current() == Some(WHITESPACE) {
self.bump()
}
}
}
mod expr {
use rowan::Checkpoint;
use super::{attrset::AttrsetRes, instr::NodeRes, Parser};
use crate::parser::{ast::lossless::lex::SyntaxKind::*, Span};
impl Parser<'_> {
pub(super) fn expr(&mut self, start: Option<Checkpoint>) -> ExprRes {
self.skip_ws();
let start = start.unwrap_or_else(|| self.builder.checkpoint());
match self.current() {
Some(IDENT) => {
let expr_res = match self.instr() {
NodeRes::Ok => ExprRes::Ok,
NodeRes::Eof => ExprRes::Eof,
};
self.builder.start_node_at(start, EXPR.into());
self.finish_node();
expr_res
}
Some(_) => self.atom(Some(start)),
None => ExprRes::Eof,
}
}
pub(super) fn atom(&mut self, start: Option<Checkpoint>) -> ExprRes {
self.skip_ws();
let start = start.unwrap_or_else(|| self.builder.checkpoint());
match self.current() {
Some(INT_NUM | FLOAT_NUM | STRING) => {
self.bump();
self.builder.start_node_at(start, EXPR.into());
self.finish_node();
ExprRes::Ok
}
Some(L_CURLY) => match self.attrset(start) {
AttrsetRes::Ok => ExprRes::Ok,
AttrsetRes::Eof => ExprRes::Eof,
},
Some(L_PAREN) => {
self.builder.start_node_at(start, PARENTHESIZED_EXPR.into());
self.bump();
self.expr(None);
self.skip_ws();
match self.current() {
Some(R_PAREN) => ExprRes::Ok,
Some(_) => todo!(),
None => ExprRes::Eof,
}
}
Some(_) => ExprRes::NoExpr,
None => ExprRes::Eof,
}
}
}
pub enum ExprRes {
Ok,
Eof,
/// isnt an expression
NoExpr,
}
}
mod attrset {
use chumsky::container::Container;
use rowan::Checkpoint;
use super::{expr::ExprRes, instr::NodeRes, Parser};
use crate::parser::{
ast::lossless::{lex::SyntaxKind::*, parser::SyntaxError},
Span,
};
impl Parser<'_> {
pub(super) fn attrset(&mut self, checkpoint: Checkpoint) -> AttrsetRes {
assert_eq!(self.current(), Some(L_CURLY));
self.bump();
self.skip_ws();
match self.current() {
Some(R_CURLY) => {
self.builder.start_node_at(checkpoint, ATTR_SET.into());
self.bump();
self.finish_node();
AttrsetRes::Ok
}
Some(_) => {
self.builder.start_node_at(checkpoint, ATTR_SET.into());
let res = match self.attrs() {
AttrRes::Eof => AttrsetRes::Eof,
AttrRes::RCurly | AttrRes::Ok => {
println!("curr: {:?}", self.current());
AttrsetRes::Ok
}
};
self.finish_node();
res
}
None => AttrsetRes::Eof,
}
// self.start_node(ATTR);
}
fn attrs(&mut self) -> AttrRes {
let mut res = AttrRes::Ok;
while res == AttrRes::Ok {
println!("it: {:?}", self.tokens.last());
match self.attr() {
AttrRes::Ok => {
self.skip_ws_without_newlines();
println!(
"a: {:?}, {:?}",
self.tokens.last(),
self.tokens.get(self.tokens.len() - 2)
);
println!("errs: {:?}", self.errors);
res = AttrRes::Ok;
let checkpoint_previous_end = self.builder.checkpoint();
res = match self.current() {
Some(COMMA) => {
self.bump();
AttrRes::Ok
}
Some(R_CURLY) => {
self.bump();
res = AttrRes::Ok;
break;
}
Some(NEWLINE) => {
self.skip_ws();
println!(
"b: {:?}, {:?}",
self.tokens.last(),
self.tokens.get(self.tokens.len() - 2)
);
match self.current() {
Some(COMMA) => {
self.bump();
AttrRes::Ok
}
Some(R_CURLY) => {
self.bump();
res = AttrRes::Ok;
break;
}
Some(IDENT) => {
println!("wtf");
self.syntax_err_by_checkpoint(
checkpoint_previous_end,
SyntaxError::ExpectedCommaBetweenAttrs,
);
// self.syntax_err(SyntaxError::ExpectedCommaBetweenAttrs);
AttrRes::Ok
}
Some(_) => {
self.bump();
AttrRes::Ok
}
None => {
res = AttrRes::Eof;
break;
}
}
}
Some(_) => {
self.bump();
println!(
"c: {:?}, {:?}",
self.tokens.last(),
self.tokens.get(self.tokens.len() - 2)
);
AttrRes::Ok
}
None => {
res = AttrRes::Eof;
break;
}
}
}
AttrRes::Eof => {
res = AttrRes::Eof;
break;
}
AttrRes::RCurly => {
res = AttrRes::RCurly;
break;
}
}
}
println!("toks_left: {:?}", self.tokens);
res
}
fn attr(&mut self) -> AttrRes {
self.skip_ws();
self.start_node(ATTR);
self.start_node(ATTR_NAME);
match self.current() {
Some(IDENT) => self.bump(),
Some(R_CURLY) => return AttrRes::Ok,
Some(_) => self.expected(IDENT),
None => return AttrRes::Eof,
}
self.finish_node();
self.skip_ws();
match self.current() {
Some(COLON) => self.bump(),
Some(R_CURLY) => {
self.expected(COLON);
return AttrRes::RCurly;
}
Some(_) => self.expected(COLON),
None => return AttrRes::Eof,
}
self.skip_ws();
self.start_node(ATTR_VALUE);
match self.expr(None) {
ExprRes::Ok => self.bump(),
ExprRes::Eof => return AttrRes::Eof,
ExprRes::NoExpr => match self.current() {
Some(COMMA) => self.syntax_err(SyntaxError::AttrExpectedValue),
Some(R_CURLY) => {
self.syntax_err(SyntaxError::AttrExpectedValue);
return AttrRes::RCurly;
}
Some(_) => self.expected(EXPR),
None => unreachable!(),
},
}
self.finish_node();
self.finish_node();
AttrRes::Ok
}
}
#[derive(PartialEq, Eq)]
pub enum AttrsetRes {
Ok,
Eof,
}
#[derive(PartialEq, Eq)]
enum AttrRes {
Ok,
Eof,
RCurly,
}
}
mod instr {
use super::Parser;
use crate::parser::{
ast::lossless::{lex::SyntaxKind::*, parser::expr::ExprRes},
Span,
};
impl Parser<'_> {
pub(super) fn instr(&mut self) -> NodeRes {
assert_eq!(self.current(), Some(IDENT));
self.skip_ws();
self.start_node(INSTR);
self.instr_name();
// used to count positionals
let mut i = 0;
let params_checkpoint = self.builder.checkpoint();
loop {
match self.expr(None) {
ExprRes::Ok => {
i += 1;
continue;
}
ExprRes::NoExpr | ExprRes::Eof => break,
}
}
if i >= 1 {
self.builder
.start_node_at(params_checkpoint, INSTR_PARAMS.into());
self.finish_node();
}
self.finish_node();
NodeRes::Ok
}
fn instr_name(&mut self) {
self.start_node(INSTR_NAME);
while self.current() == Some(IDENT) {
self.bump();
self.skip_ws_without_newlines();
}
self.finish_node();
}
}
pub(super) enum NodeRes {
Ok,
Eof,
}
}

View file

@ -0,0 +1,50 @@
use indexmap::IndexMap;
use super::super::Spanned;
use super::super::Span;
use super::Lit;
#[derive(Debug, PartialEq)]
pub struct RawExpression<'src> {
pub expr: Box<RawExpr<'src>>,
pub span: Span,
}
impl<'src> RawExpression<'src> {
pub fn new(expr: RawExpr<'src>, span: Span) -> Self {
Self {
expr: Box::new(expr),
span,
}
}
}
#[derive(Debug, PartialEq)]
pub enum RawExpr<'src> {
Node(
Vec<Spanned<&'src str>>,
Option<Spanned<IndexMap<Spanned<&'src str>, RawExpression<'src>>>>,
),
SimplePipe(RawExpression<'src>, RawExpression<'src>),
// NamingPipe(
// Box<Expression<'src>>,
// (Vec<Spanned<&'src str>>, Vec<Spanned<&'src str>>),
// Box<Expression<'src>>,
// ),
MappingPipe(RawExpression<'src>, RawExpression<'src>),
NullPipe(RawExpression<'src>, RawExpression<'src>),
MultiPipe(IndexMap<Spanned<&'src str>, RawExpression<'src>>),
// LetIn(
// IndexMap<Spanned<&'src str>, Box<Expression<'src>>>,
// Box<Expression<'src>>,
// ),
// $
Var(&'src str),
// @
InputVar(&'src str),
AttrSet(Spanned<IndexMap<Spanned<&'src str>, RawExpression<'src>>>),
Lit(Lit<'src>),
Matrix(Spanned<(u16, u16)>, Vec<RawExpression<'src>>),
List(Vec<RawExpression<'src>>),
}

View file

@ -1,4 +1,4 @@
use crate::parser::ast::{Expr, File}; use crate::parser::ast::File;
use crate::parser::parse; use crate::parser::parse;
use crate::tokens::Token; use crate::tokens::Token;
use chumsky::input::Stream; use chumsky::input::Stream;

View file

@ -14,6 +14,14 @@ pub enum Token<'a> {
Let, Let,
#[token("in")] #[token("in")]
In, In,
#[token("mat")]
Mat,
#[regex("[\\d]+x[\\d]+", |lex| {
let (x, y) = lex.slice().split_once('x').expect("shouldn't fail to split");
// TODO: handle overflows etc
(x.parse().expect("should only match valid u16s"), y.parse().expect("should only match valid u16s"))
})]
Dimensions((u16, u16)),
#[regex("[\\d]+", |lex| lex.slice())] #[regex("[\\d]+", |lex| lex.slice())]
Int(&'a str), Int(&'a str),
#[regex("[+-]?([\\d]+\\.[\\d]*|[\\d]*\\.[\\d]+)", |lex| lex.slice())] #[regex("[+-]?([\\d]+\\.[\\d]*|[\\d]*\\.[\\d]+)", |lex| lex.slice())]
@ -30,12 +38,11 @@ pub enum Token<'a> {
Mult, Mult,
#[token("/")] #[token("/")]
Div, Div,
#[regex("[a-zA-Z_]+[a-zA-Z0-9_\\-]*", |lex| lex.slice())] // TODO: figure out how to allow numbers in words?
#[regex("[a-zA-Z_]+[a-zA-Z_\\-\\d]*", |lex| lex.slice().trim())]
Word(&'a str), Word(&'a str),
#[regex("\\$[a-zA-Z0-9_\\-]+", |lex| &lex.slice()[1..])] #[regex("\\$[a-zA-Z0-9_\\-]+", |lex| &lex.slice()[1..])]
VarIdent(&'a str), VarIdent(&'a str),
#[token("@..")]
InputSpread,
#[regex("\\@[a-zA-Z0-9_\\-]+", |lex| &lex.slice()[1..])] #[regex("\\@[a-zA-Z0-9_\\-]+", |lex| &lex.slice()[1..])]
InputIdent(&'a str), InputIdent(&'a str),
#[token(",")] #[token(",")]
@ -55,7 +62,7 @@ pub enum Token<'a> {
#[token(":")] #[token(":")]
Colon, Colon,
#[token(";")] #[token(";")]
SemiColon, Semicolon,
#[token("[")] #[token("[")]
BracketOpen, BracketOpen,
#[token("]")] #[token("]")]

View file

@ -34,7 +34,7 @@ lexer_test! {
lexer_test! { lexer_test! {
test_lex_subgroup, test_lex_subgroup,
"subgroup(first, second) = a | b { in1: $first } | c { in1: $second }", "subgroup(first, second) = a | b [ $first ] | c [ $second ]",
[ [
Token::Word("subgroup"), Token::Word("subgroup"),
Token::ParenOpen, Token::ParenOpen,
@ -46,18 +46,14 @@ lexer_test! {
Token::Word("a"), Token::Word("a"),
Token::Pipe, Token::Pipe,
Token::Word("b"), Token::Word("b"),
Token::BraceOpen, Token::BracketOpen,
Token::Word("in1"),
Token::Colon,
Token::VarIdent("first"), Token::VarIdent("first"),
Token::BraceClose, Token::BracketClose,
Token::Pipe, Token::Pipe,
Token::Word("c"), Token::Word("c"),
Token::BraceOpen, Token::BracketOpen,
Token::Word("in1"),
Token::Colon,
Token::VarIdent("second"), Token::VarIdent("second"),
Token::BraceClose Token::BracketClose
] ]
} }

View file

@ -33,11 +33,11 @@
"pre-commit-hooks": "pre-commit-hooks_2" "pre-commit-hooks": "pre-commit-hooks_2"
}, },
"locked": { "locked": {
"lastModified": 1712579011, "lastModified": 1712724616,
"narHash": "sha256-trHgFNW8CW85c1OuAPBI+OGous53KkVhMemvcq7syDo=", "narHash": "sha256-qs9uEbrOpp6oXcDOp5cpilyU52t78ZpEPATtaHRVLIU=",
"owner": "cachix", "owner": "cachix",
"repo": "devenv", "repo": "devenv",
"rev": "a71323c618664a6b7a39bc183b0ce22ac8511cf9", "rev": "d1a11d14dbe96a03c7f9068e4d3af05f283734e0",
"type": "github" "type": "github"
}, },
"original": { "original": {
@ -83,11 +83,11 @@
"rust-analyzer-src": "rust-analyzer-src" "rust-analyzer-src": "rust-analyzer-src"
}, },
"locked": { "locked": {
"lastModified": 1712384501, "lastModified": 1712730246,
"narHash": "sha256-AZmYmEnc1ZkSlxUJVUtGh9VFAqWPr+xtNIiBqD2eKfc=", "narHash": "sha256-iB8bFj+07RHpmt+XuGGvYQk2Iwm12u6+DklGq/+Tg5s=",
"owner": "nix-community", "owner": "nix-community",
"repo": "fenix", "repo": "fenix",
"rev": "99c6241db5ca5363c05c8f4acbdf3a4e8fc42844", "rev": "d402ae4a5e5676722290470f61a5e8e3155b5487",
"type": "github" "type": "github"
}, },
"original": { "original": {
@ -447,11 +447,11 @@
}, },
"nixpkgs_3": { "nixpkgs_3": {
"locked": { "locked": {
"lastModified": 1712163089, "lastModified": 1712608508,
"narHash": "sha256-Um+8kTIrC19vD4/lUCN9/cU9kcOsD1O1m+axJqQPyMM=", "narHash": "sha256-vMZ5603yU0wxgyQeHJryOI+O61yrX2AHwY6LOFyV1gM=",
"owner": "nixos", "owner": "nixos",
"repo": "nixpkgs", "repo": "nixpkgs",
"rev": "fd281bd6b7d3e32ddfa399853946f782553163b5", "rev": "4cba8b53da471aea2ab2b0c1f30a81e7c451f4b6",
"type": "github" "type": "github"
}, },
"original": { "original": {
@ -463,11 +463,11 @@
}, },
"nixpkgs_4": { "nixpkgs_4": {
"locked": { "locked": {
"lastModified": 1712439257, "lastModified": 1712608508,
"narHash": "sha256-aSpiNepFOMk9932HOax0XwNxbA38GOUVOiXfUVPOrck=", "narHash": "sha256-vMZ5603yU0wxgyQeHJryOI+O61yrX2AHwY6LOFyV1gM=",
"owner": "NixOS", "owner": "NixOS",
"repo": "nixpkgs", "repo": "nixpkgs",
"rev": "ff0dbd94265ac470dda06a657d5fe49de93b4599", "rev": "4cba8b53da471aea2ab2b0c1f30a81e7c451f4b6",
"type": "github" "type": "github"
}, },
"original": { "original": {
@ -567,11 +567,11 @@
"rust-analyzer-src": { "rust-analyzer-src": {
"flake": false, "flake": false,
"locked": { "locked": {
"lastModified": 1712156296, "lastModified": 1712663608,
"narHash": "sha256-St7ZQrkrr5lmQX9wC1ZJAFxL8W7alswnyZk9d1se3Us=", "narHash": "sha256-tN9ZL6kGppmHg84lxlpAlaN+kXWNctKK7Yitq/iXDEw=",
"owner": "rust-lang", "owner": "rust-lang",
"repo": "rust-analyzer", "repo": "rust-analyzer",
"rev": "8e581ac348e223488622f4d3003cb2bd412bf27e", "rev": "a5feb4f05f09adca661c869b1bf2324898cbaa43",
"type": "github" "type": "github"
}, },
"original": { "original": {

View file

@ -11,24 +11,38 @@
extra-substituters = "https://devenv.cachix.org"; extra-substituters = "https://devenv.cachix.org";
}; };
outputs = { self, nixpkgs, devenv, systems, ... } @ inputs: outputs = {
let self,
nixpkgs,
devenv,
systems,
...
} @ inputs: let
forEachSystem = nixpkgs.lib.genAttrs (import systems); forEachSystem = nixpkgs.lib.genAttrs (import systems);
in in {
{ devShells =
devShells = forEachSystem forEachSystem
(system: (system: let
let
pkgs = nixpkgs.legacyPackages.${system}; pkgs = nixpkgs.legacyPackages.${system};
in in {
{
default = devenv.lib.mkShell { default = devenv.lib.mkShell {
inherit inputs pkgs; inherit inputs pkgs;
modules = [ modules = [
({pkgs, config, ...}: { ({
pkgs,
config,
...
}: {
languages.rust = { languages.rust = {
enable = true; enable = true;
channel = "nightly"; channel = "nightly";
components = [
"rustc"
"cargo"
"clippy"
"rustfmt"
"rust-src"
];
}; };
pre-commit.hooks = { pre-commit.hooks = {
@ -37,11 +51,15 @@
}; };
packages = with pkgs; [ packages = with pkgs; [
just nushell just
nushell
ripgrep ripgrep
typst typst-lsp typst
typst-lsp
mold mold
cargo-nextest cargo-watch cargo-nextest
cargo-watch
rust-analyzer
]; ];
}) })
]; ];

View file

@ -1,7 +1,4 @@
def blend1 = [ meow mew meow 5 3.14 "uwu" {
open "test.png", meow: test 24
open "test2.png" another: hi "hello",
] } "awa"
| blend multiply 0.6
def blend2 = open "test.png" | blend multiply 0.6 [ open test2.png ]