From d6bc644fb6fd6340c731045286ecb09258f9a3b9 Mon Sep 17 00:00:00 2001 From: Schrottkatze Date: Wed, 5 Jun 2024 18:00:14 +0200 Subject: [PATCH] lang: basic ast work --- crates/lang/src/ast.rs | 68 ++++++ crates/lang/src/ast/error.rs | 30 +++ crates/lang/src/ast/mod_tree.rs | 248 ++++++++++++++++++++++ crates/lang/src/ast/nodes.rs | 77 +++++++ crates/lang/src/ast/source_file.rs | 80 +++++++ crates/lang/src/lib.rs | 22 ++ crates/lang/src/lst_parser/output.rs | 14 +- crates/lang/src/lst_parser/syntax_kind.rs | 16 -- crates/lang/src/main.rs | 42 ++-- testfiles/hello.owo | 1 + testfiles/hello/meow.owo | 2 + testfiles/test.owo | 17 +- testfiles/uwu.owo | 1 + 13 files changed, 560 insertions(+), 58 deletions(-) create mode 100644 crates/lang/src/ast.rs create mode 100644 crates/lang/src/ast/error.rs create mode 100644 crates/lang/src/ast/mod_tree.rs create mode 100644 crates/lang/src/ast/nodes.rs create mode 100644 crates/lang/src/ast/source_file.rs create mode 100644 testfiles/hello.owo create mode 100644 testfiles/hello/meow.owo create mode 100644 testfiles/uwu.owo diff --git a/crates/lang/src/ast.rs b/crates/lang/src/ast.rs new file mode 100644 index 0000000..1375694 --- /dev/null +++ b/crates/lang/src/ast.rs @@ -0,0 +1,68 @@ +use std::{collections::HashMap, path::PathBuf, sync::Arc}; + +use rowan::ast::{AstNode, AstPtr}; + +use self::{ + error::{Error, WorldCreationError}, + mod_tree::{Module, ModuleTree}, + source_file::SourceFile, +}; + +mod error; +mod mod_tree; +mod nodes; +mod source_file; + +struct Loc { + file: PathBuf, + syntax_el: AstPtr, +} + +impl Loc { + pub fn new(file: PathBuf, syntax_el: &T) -> Self { + Self { + file, + syntax_el: AstPtr::new(syntax_el), + } + } +} + +pub struct World { + entry_point: PathBuf, + files: HashMap, + errors: Vec, + module_tree: ModuleTree, +} + +impl World { + pub fn new(entry_point: PathBuf) -> Result { + let entry_point = entry_point.canonicalize().unwrap(); + let source = match std::fs::read_to_string(&entry_point) { + Ok(f) => f, + Err(e) => return Err(WorldCreationError::FailedToOpenEntryPoint(entry_point, e)), + }; + + let (src, mut errors) = SourceFile::parse_from(entry_point.clone(), source); + + let (module_tree, mut files, new_errors) = ModuleTree::parse_from_main(&entry_point, &src); + errors.extend(new_errors); + module_tree.print_tree(&src.tree()); + dbg!(&errors); + + let _ = files.insert(entry_point.clone(), src); + + Ok(Self { + files, + entry_point, + errors, + module_tree, + }) + } + + // fn parse_mod_tree(files: &mut HashMap) +} + +// struct SourceFile { +// tree: SyntaxNode, +// items: Vec, +// } diff --git a/crates/lang/src/ast/error.rs b/crates/lang/src/ast/error.rs new file mode 100644 index 0000000..352e831 --- /dev/null +++ b/crates/lang/src/ast/error.rs @@ -0,0 +1,30 @@ +use std::{fmt::Display, path::PathBuf}; + +use crate::lst_parser::error::SyntaxError; + +#[derive(Debug)] +pub enum Error { + Syntax(PathBuf, SyntaxError), + FailedToOpenFileMod(PathBuf, std::io::Error), +} + +impl Error { + pub fn from_syntax(file: PathBuf, e: SyntaxError) -> Self { + Self::Syntax(file, e) + } +} + +#[derive(Debug)] +pub enum WorldCreationError { + FailedToOpenEntryPoint(PathBuf, std::io::Error), +} + +impl Display for WorldCreationError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + WorldCreationError::FailedToOpenEntryPoint(entry_path, e) => { + write!(f, "failed to open entry_point '{entry_path:?}': {e}") + } + } + } +} diff --git a/crates/lang/src/ast/mod_tree.rs b/crates/lang/src/ast/mod_tree.rs new file mode 100644 index 0000000..035faec --- /dev/null +++ b/crates/lang/src/ast/mod_tree.rs @@ -0,0 +1,248 @@ +use std::{ + collections::{HashMap, HashSet}, + path::PathBuf, + sync::Arc, +}; + +use rowan::ast::{AstNode, AstPtr}; + +use crate::{lst_parser::syntax_kind::SyntaxKind, SyntaxNode}; + +use super::{ + error::Error, + nodes::{self, Mod, ModBody, ModName, Root}, + source_file::SourceFile, + Loc, +}; + +pub struct ModuleTree { + modules: HashMap>, +} + +impl ModuleTree { + pub fn parse_from_main( + path: &PathBuf, + main_file: &SourceFile, + ) -> (Self, HashMap, Vec) { + let tree = main_file.tree(); + let mut files = HashMap::new(); + let mut errors = Vec::new(); + + let entry_path = path.parent().unwrap().to_owned(); + let modules = main_file + .top_level_modules() + .into_iter() + .filter_map(|m| { + let module = + Module::parse_mod(m.to_node(tree.syntax()), Vec::new(), &path, &entry_path); + match module { + Ok(module) => { + files.extend(module.1); + errors.extend(module.2); + Some((module.0.name(), Arc::new(module.0))) + } + Err(err) => { + errors.push(err); + None + } + } + }) + .collect::>>(); + + (Self { modules }, files, errors) + } + + pub fn print_tree(&self, lst: &Root) { + let name = "main"; + print_tree(&name, &self.modules, 0) + } +} + +pub struct Module { + path: Vec, + name: String, + kind: ModuleKind, + children: HashMap>, + parent: Option>, +} + +impl Module { + fn parse_mod( + module: Mod, + cur_path: Vec, + cur_file: &PathBuf, + entry_path: &PathBuf, + ) -> Result<(Self, HashMap, Vec), Error> { + dbg!(cur_file); + dbg!(entry_path); + let children = module + .syntax() + .children() + // .map(|n| n.kind()) + .collect::>(); + + if children.len() == 1 { + let name = &children[0]; + assert_eq!(name.kind(), SyntaxKind::MODULE_NAME); + return Self::parse_file_mod( + name.text().to_string(), + Loc::new(cur_file.clone(), &module), + cur_path, + entry_path, + ); + } else if children.len() == 2 { + let name = &children[0]; + assert_eq!(name.kind(), SyntaxKind::MODULE_NAME); + let body = &children[1]; + assert_eq!(body.kind(), SyntaxKind::MODULE_BODY); + return Ok(Self::parse_inline_mod( + module, cur_path, cur_file, entry_path, + )); + } + todo!() + } + + fn parse_file_mod( + name: String, + decl: Loc, + mut cur_path: Vec, + entry_path: &PathBuf, + ) -> Result<(Self, HashMap, Vec), Error> { + let mut mod_file_path = entry_path.to_owned(); + + for el in &cur_path { + mod_file_path.push(format!("{el}/")); + } + mod_file_path.push(format!("{name}.owo")); + + let mut files = HashMap::new(); + let mut errors = Vec::new(); + + let source = match std::fs::read_to_string(dbg!(&mod_file_path)) { + Ok(f) => f, + Err(e) => return Err(Error::FailedToOpenFileMod(mod_file_path, e)), + }; + + let (source_file, file_errors) = SourceFile::parse_from(mod_file_path.clone(), source); + errors.extend(file_errors); + + let tree = source_file.tree(); + let old_path = cur_path.clone(); + cur_path.push(name.clone()); + let children = source_file + .top_level_modules() + .into_iter() + .filter_map(|m| { + let module = Module::parse_mod( + m.to_node(tree.syntax()), + cur_path.clone(), + &mod_file_path, + &entry_path, + ); + match module { + Ok(module) => { + files.extend(module.1); + errors.extend(module.2); + Some((module.0.name(), Arc::new(module.0))) + } + Err(err) => { + errors.push(err); + None + } + } + }) + .collect::>>(); + + files.insert(mod_file_path.clone(), source_file); + + Ok(( + Self { + path: old_path, + name, + kind: ModuleKind::File { + declaration: decl, + file_id: mod_file_path, + }, + children, + parent: None, + }, + files, + errors, + )) + } + + fn parse_inline_mod( + module: Mod, + mut cur_path: Vec, + cur_file: &PathBuf, + entry_path: &PathBuf, + ) -> (Self, HashMap, Vec) { + let mut children = module.syntax().children().collect::>(); + + let body = ModBody::cast(children.pop().unwrap()).unwrap(); + let name = ModName::cast(children.pop().unwrap()).unwrap(); + + let mut files = HashMap::new(); + let mut errors = Vec::new(); + + let old_path = cur_path.clone(); + cur_path.push(name.syntax().to_string()); + let children = body + .syntax() + .children() + .filter_map(|node| Mod::cast(node)) + .filter_map(|m| { + let m = Self::parse_mod(m, cur_path.clone(), cur_file, entry_path); + match m { + Ok(module) => { + files.extend(module.1); + errors.extend(module.2); + Some((module.0.name(), Arc::new(module.0))) + } + Err(err) => { + errors.push(err); + None + } + } + }) + .collect::>>(); + + ( + Self { + name: name.syntax().text().to_string(), + kind: ModuleKind::Inline(Loc::new(cur_file.to_owned(), &module)), + children, + parent: None, + path: old_path, + }, + files, + errors, + ) + } + + pub fn name(&self) -> String { + // self.name.to_node(lst.syntax()).syntax().text().to_string() + self.name.clone() + } +} + +fn print_tree(name: &str, children: &HashMap>, level: u32) { + const INDENT_STR: &str = " "; + + for _ in 0..level { + print!("{}", INDENT_STR); + } + print!("{name}\n"); + + for (name, module) in children { + print_tree(name, &module.children, level + 1); + } +} + +enum ModuleKind { + Inline(Loc), + File { + declaration: Loc, + file_id: PathBuf, + }, +} diff --git a/crates/lang/src/ast/nodes.rs b/crates/lang/src/ast/nodes.rs new file mode 100644 index 0000000..e59c0f3 --- /dev/null +++ b/crates/lang/src/ast/nodes.rs @@ -0,0 +1,77 @@ +use crate::lst_parser::syntax_kind::SyntaxKind::*; +use crate::SyntaxNode; +use rowan::Language; +macro_rules! ast_nodes { + ($($ast:ident, $kind:ident);+) => { + $( + #[derive(PartialEq, Eq, Hash)] + #[repr(transparent)] + pub struct $ast(SyntaxNode); + impl rowan::ast::AstNode for $ast { + type Language = crate::Lang; + + fn can_cast(kind: ::Kind) -> bool { + kind == $kind + } + + fn cast(node: SyntaxNode) -> Option { + if node.kind() == $kind { + Some(Self(node)) + } else { + None + } + } + + fn syntax(&self) -> &SyntaxNode { + &self.0 + } + } + )+ + }; +} +ast_nodes!( + Def, DEF; + DefName, DEF_NAME; + DefBody, DEF_BODY; + + Mod, MODULE; + ModName, MODULE_NAME; + ModBody, MODULE_BODY; + + Use, USE; + UsePat, USE_PAT; + PatItem, PAT_ITEM; + PatGlob, PAT_GLOB; + PatGroup, PAT_GROUP; + + Literal, LITERAL; + IntLit, INT_NUM; + FloatLit, FLOAT_NUM; + StringLit, STRING; + + Matrix, MATRIX; + MatrixRow, MAT_ROW; + Vector, VEC; + List, LIST; + CollectionItem, COLLECTION_ITEM; + + ParenthesizedExpr, PARENTHESIZED_EXPR; + Expression, EXPR; + + Pipeline, PIPELINE; + + Instruction, INSTR; + InstructionName, INSTR_NAME; + InstructionParams, INSTR_PARAMS; + + AttributeSet, ATTR_SET; + Attribute, ATTR; + AttributeName, ATTR_NAME; + AttributeValue, ATTR_VALUE; + + ParseError, PARSE_ERR; + LexError, LEX_ERR; + + Root, ROOT; + Eof, EOF +); diff --git a/crates/lang/src/ast/source_file.rs b/crates/lang/src/ast/source_file.rs new file mode 100644 index 0000000..e6e33be --- /dev/null +++ b/crates/lang/src/ast/source_file.rs @@ -0,0 +1,80 @@ +use std::path::PathBuf; + +use rowan::ast::{AstNode, AstPtr}; + +use crate::lst_parser::{ + grammar::source_file, + input::Input, + output::Output, + syntax_kind::{self, SyntaxKind}, + Parser, +}; + +use super::{ + error::Error, + nodes::{Def, Mod, Root, Use}, +}; + +pub struct SourceFile { + lst: Root, + modules: Vec>, + defs: Vec>, + uses: Vec>, +} + +impl SourceFile { + pub fn parse_from(path: PathBuf, source_text: String) -> (Self, Vec) { + let toks = syntax_kind::lex(&source_text); + let input = Input::new(&toks); + let mut parser = Parser::new(input); + + source_file(&mut parser); + + let parser_out = parser.finish(); + let out = Output::from_parser_output(toks, parser_out); + + let lst = out.syntax(); + + let mut modules = Vec::new(); + let mut defs = Vec::new(); + let mut uses = Vec::new(); + + for c in lst.children() { + match c.kind() { + SyntaxKind::MODULE => modules.push(AstPtr::new(&Mod::cast(c).unwrap())), + SyntaxKind::DEF => defs.push(AstPtr::new(&Def::cast(c).unwrap())), + SyntaxKind::USE => uses.push(AstPtr::new(&Use::cast(c).unwrap())), + _ => {} + } + } + + ( + Self { + lst: Root::cast(lst).unwrap(), + modules, + defs, + uses, + }, + out.errors() + .into_iter() + .map(|err| Error::from_syntax(path.clone(), err)) + .collect(), + ) + } + + pub fn tree(&self) -> &Root { + &self.lst + } + + pub fn top_level_modules(&self) -> Vec> { + self.modules.clone() + } + + pub fn top_level_defs(&self) -> Vec> { + self.defs.clone() + } + + pub fn top_level_uses(&self) -> Vec> { + self.uses.clone() + } +} diff --git a/crates/lang/src/lib.rs b/crates/lang/src/lib.rs index e552496..d700d45 100644 --- a/crates/lang/src/lib.rs +++ b/crates/lang/src/lib.rs @@ -1,2 +1,24 @@ #![feature(type_alias_impl_trait, lint_reasons)] + +use crate::lst_parser::syntax_kind::SyntaxKind; + +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum Lang {} +impl rowan::Language for Lang { + type Kind = SyntaxKind; + #[allow(unsafe_code)] + fn kind_from_raw(raw: rowan::SyntaxKind) -> Self::Kind { + assert!(raw.0 <= SyntaxKind::ROOT as u16); + unsafe { std::mem::transmute::(raw.0) } + } + fn kind_to_raw(kind: Self::Kind) -> rowan::SyntaxKind { + kind.into() + } +} + +pub type SyntaxNode = rowan::SyntaxNode; +pub type SyntaxToken = rowan::SyntaxNode; +pub type SyntaxElement = rowan::NodeOrToken; + +pub mod ast; pub mod lst_parser; diff --git a/crates/lang/src/lst_parser/output.rs b/crates/lang/src/lst_parser/output.rs index 49d29c6..af483ab 100644 --- a/crates/lang/src/lst_parser/output.rs +++ b/crates/lang/src/lst_parser/output.rs @@ -3,9 +3,9 @@ use owo_colors::{unset_override, OwoColorize}; use rowan::{GreenNode, GreenNodeBuilder, GreenNodeData, GreenTokenData, Language, NodeOrToken}; use std::mem; -use crate::lst_parser::{ - input::MEANINGLESS_TOKS, - syntax_kind::{Lang, SyntaxKind}, +use crate::{ + lst_parser::{input::MEANINGLESS_TOKS, syntax_kind::SyntaxKind}, + Lang, SyntaxNode, }; use super::{ @@ -201,4 +201,12 @@ impl Output { errors, } } + + pub fn syntax(&self) -> SyntaxNode { + SyntaxNode::new_root(self.green_node.clone()) + } + + pub fn errors(&self) -> Vec { + self.errors.clone() + } } diff --git a/crates/lang/src/lst_parser/syntax_kind.rs b/crates/lang/src/lst_parser/syntax_kind.rs index 87230e7..5cb7fb1 100644 --- a/crates/lang/src/lst_parser/syntax_kind.rs +++ b/crates/lang/src/lst_parser/syntax_kind.rs @@ -53,7 +53,6 @@ pub enum SyntaxKind { LIST, // either of a vec, a matrix or a list COLLECTION_ITEM, - DECL, PARENTHESIZED_EXPR, EXPR, LITERAL, @@ -130,7 +129,6 @@ pub enum SyntaxKind { ROOT, EOF, TOMBSTONE, - ERROR, } pub type TokenSet = EnumSet; @@ -140,17 +138,3 @@ impl From for rowan::SyntaxKind { Self(kind as u16) } } - -#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] -pub enum Lang {} -impl rowan::Language for Lang { - type Kind = SyntaxKind; - #[allow(unsafe_code)] - fn kind_from_raw(raw: rowan::SyntaxKind) -> Self::Kind { - assert!(raw.0 <= SyntaxKind::ROOT as u16); - unsafe { std::mem::transmute::(raw.0) } - } - fn kind_to_raw(kind: Self::Kind) -> rowan::SyntaxKind { - kind.into() - } -} diff --git a/crates/lang/src/main.rs b/crates/lang/src/main.rs index 2531709..1dbd527 100644 --- a/crates/lang/src/main.rs +++ b/crates/lang/src/main.rs @@ -1,7 +1,10 @@ use clap::Parser; use std::{fs, path::PathBuf}; -use lang::lst_parser::{self, grammar, input, output::Output, syntax_kind}; +use lang::{ + ast::World, + lst_parser::{self, grammar, input, output::Output, syntax_kind}, +}; #[derive(Parser)] struct Args { @@ -12,37 +15,18 @@ struct Args { fn main() { let args = Args::parse(); let n = args.file.clone(); - let f = fs::read_to_string(n.clone()).expect("failed to read file"); + // let f = fs::read_to_string(n.clone()).expect("failed to read file"); - let toks = dbg!(syntax_kind::lex(&f)); - let input = input::Input::new(&toks); - let mut parser = lst_parser::Parser::new(input); + // let toks = dbg!(syntax_kind::lex(&f)); + // let input = input::Input::new(&toks); + // let mut parser = lst_parser::Parser::new(input); - grammar::source_file(&mut parser); + // grammar::source_file(&mut parser); - let p_out = dbg!(parser.finish()); - let o = Output::from_parser_output(toks, p_out); + // let p_out = dbg!(parser.finish()); + // let o = Output::from_parser_output(toks, p_out); - println!("{}", o.debug_colored()); + // println!("{}", o.debug_colored()); - // let parse_res = parser::parse(&f); - // println!("parse: {:?}", parse_res); - - // dbg!(lex::lex(&f)); - // let mut err_collector = ErrorCollector::new(vec![(n.to_str().unwrap(), &f)]); - - // println!("file: {f}\n"); - // let parse_res = parse(&f); - // err_collector.insert_many( - // args.file.to_str().unwrap(), - // lang::err_reporting::Stage::Parse, - // parse_res - // .errors() - // .into_iter() - // .map(|e| e.to_owned()) - // .collect::>(), - // ); - - // err_collector.report_raw(); - // println!("res: {:?}", parse_res); + World::new(n); } diff --git a/testfiles/hello.owo b/testfiles/hello.owo new file mode 100644 index 0000000..d72460b --- /dev/null +++ b/testfiles/hello.owo @@ -0,0 +1 @@ +mod meow; diff --git a/testfiles/hello/meow.owo b/testfiles/hello/meow.owo new file mode 100644 index 0000000..643c13a --- /dev/null +++ b/testfiles/hello/meow.owo @@ -0,0 +1,2 @@ +mod mrawr {} +mod mrow {} diff --git a/testfiles/test.owo b/testfiles/test.owo index 4f06140..13bb5e7 100644 --- a/testfiles/test.owo +++ b/testfiles/test.owo @@ -1,11 +1,8 @@ -use hello::meow; - -def hello_world = meow [ 1 2 ]; - -def test - -mod hello { - use gay:;uwu_meow::*; - def meow = uwu; +mod hello; +mod world { + mod meow {} + mod uwu { + mod test {} + mod meow {} + } } - diff --git a/testfiles/uwu.owo b/testfiles/uwu.owo new file mode 100644 index 0000000..1a6b5d1 --- /dev/null +++ b/testfiles/uwu.owo @@ -0,0 +1 @@ +mod meow {}