From 500eb972bdeaab9a15a31536b5f3593bee2e38c7 Mon Sep 17 00:00:00 2001 From: Gabriel <68819302+obsidianical@users.noreply.github.com> Date: Mon, 20 Dec 2021 14:34:21 +0100 Subject: [PATCH] basic tokenization WORKSSSSSSSS BABYYYY --- src/main.rs | 4 +-- src/parsing/lexer.rs | 57 +++++++++++++++++-------------------- src/parsing/preprocessor.rs | 4 +-- src/parsing/token.rs | 3 +- src/shell.rs | 6 +++- 5 files changed, 36 insertions(+), 38 deletions(-) diff --git a/src/main.rs b/src/main.rs index 8731b4e..46bb143 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,8 +1,8 @@ use std::env; use std::fs; -mod shell; -mod parsing; +pub mod shell; +pub mod parsing; use parsing::preprocessor::preprocess; diff --git a/src/parsing/lexer.rs b/src/parsing/lexer.rs index 260b4ba..78ae7da 100644 --- a/src/parsing/lexer.rs +++ b/src/parsing/lexer.rs @@ -1,6 +1,7 @@ -use preprocessor::preprocess; +use super::preprocessor::preprocess; +use super::token::Token; -struct Lexer { +pub struct Lexer { code: String, pos: usize, current: Option, @@ -9,9 +10,9 @@ struct Lexer { impl Lexer { pub fn new(code: String) -> Lexer { Lexer { - code, - pos: 1, - current: None, + code: code.clone(), + pos: 0, + current: code.chars().nth(0), } } @@ -26,35 +27,29 @@ impl Lexer { pub fn tokenize(&mut self) { let mut tokens: Vec = vec![]; - self.code = preprocess(self.code); + self.code = preprocess(self.code.clone()); loop { - //let token = match self.current { - //Some('+') => Token::ADD, - //Some('-') => Token::SUBTRACT, - //Some('*') => Token::MULTIPLY, - //Some('/') => Token::DIVIDE, - //Some('(') => Token::LBRACK, - //Some(')') => Token::RBRACK, - //Some(' ') => continue, - //Some('\n') => continue, - //None => break, - //Some(_) => continue, - //}; - - tokens.push(match self.current { - Some('+') => Token::ADD, - Some('-') => Token::SUBTRACT, - Some('*') => Token::MULTIPLY, - Some('/') => Token::DIVIDE, - Some('(') => Token::LBRACK, - Some(')') => Token::RBRACK, - Some(' ') => continue, - Some('\n') => continue, + let token: Option = match self.current { + Some('+') => Some(Token::ADD), + Some('-') => Some(Token::SUBTRACT), + Some('*') => Some(Token::MULTIPLY), + Some('/') => Some(Token::DIVIDE), + Some('(') => Some(Token::LBRACK), + Some(')') => Some(Token::RBRACK), + Some(' ') => None, + Some('\n') => None, None => break, - Some(_) => continue, - }); - self.next(); + Some(_) => None, + }; + + match token { + Some(token) => { + tokens.push(token); + self.next(); + }, + None => self.next(), + } } println!("{:?}", tokens); diff --git a/src/parsing/preprocessor.rs b/src/parsing/preprocessor.rs index 4f57c18..0241f57 100644 --- a/src/parsing/preprocessor.rs +++ b/src/parsing/preprocessor.rs @@ -1,14 +1,14 @@ use lazy_static::lazy_static; use fancy_regex::Regex; -pub fn preprocess(raw_code: String) { +pub fn preprocess(raw_code: String) -> String { lazy_static! { static ref COMMENT_RE: Regex = Regex::new(r"(\]\].*?\[\[|&&.*\n|;.*\n|``(.|\n)*?ยดยด)").unwrap(); } let code = COMMENT_RE.replace_all(&raw_code, ""); let code = remove_empty_lines(code.to_string()); - println!("{}", &code) + code } pub fn remove_empty_lines(text: String) -> String { diff --git a/src/parsing/token.rs b/src/parsing/token.rs index cd6f344..9622421 100644 --- a/src/parsing/token.rs +++ b/src/parsing/token.rs @@ -1,3 +1,4 @@ +#[derive(Debug)] pub enum Token { INT, FLOAT, @@ -9,5 +10,3 @@ pub enum Token { RBRACK, } -impl Token {} - diff --git a/src/shell.rs b/src/shell.rs index f918835..e2d566f 100644 --- a/src/shell.rs +++ b/src/shell.rs @@ -1,12 +1,16 @@ use std::io; +use super::parsing::lexer::Lexer; + pub fn run_shell() { 'shell: loop { let string = get_line(); if string.eq("q\n") || string.eq("quit\n") { break 'shell; } - println!("{}", string); + let mut lexer = Lexer::new(string); + lexer.tokenize(); + //println!("{}", string); } }