basic tokenization WORKSSSSSSSS BABYYYY

2021-12-20 14:34:21 +01:00 · 2021-12-20 14:34:21 +01:00 · 500eb972bd
commit 500eb972bd
parent d5da7b0f26
5 changed files with 36 additions and 38 deletions
--- a/src/main.rs
+++ b/src/main.rs
@ -1,8 +1,8 @@
 use std::env;
 use std::fs;
-mod shell;
+pub mod shell;
-mod parsing;
+pub mod parsing;
 use parsing::preprocessor::preprocess;
--- a/src/parsing/lexer.rs
+++ b/src/parsing/lexer.rs
@ -1,6 +1,7 @@
-use preprocessor::preprocess;
+use super::preprocessor::preprocess;
 use super::token::Token;
-struct Lexer {
+pub struct Lexer {
    code: String,
    pos: usize,
    current: Option<char>,
@ -9,9 +10,9 @@ struct Lexer {
 impl Lexer {
    pub fn new(code: String) -> Lexer {
        Lexer {
-            code,
+            code: code.clone(),
-            pos: 1,
+            pos: 0,
-            current: None,
+            current: code.chars().nth(0),
        }
    }
@ -26,35 +27,29 @@ impl Lexer {
    pub fn tokenize(&mut self) {
        let mut tokens: Vec<Token> = vec![];
-        self.code = preprocess(self.code);
+        self.code = preprocess(self.code.clone());
        loop {
-            //let token = match self.current {
+            let token: Option<Token> = match self.current {
-                //Some('+') => Token::ADD,
+                Some('+') => Some(Token::ADD),
-                //Some('-') => Token::SUBTRACT,
+                Some('-') => Some(Token::SUBTRACT),
-                //Some('*') => Token::MULTIPLY,
+                Some('*') => Some(Token::MULTIPLY),
-                //Some('/') => Token::DIVIDE,
+                Some('/') => Some(Token::DIVIDE),
-                //Some('(') => Token::LBRACK,
+                Some('(') => Some(Token::LBRACK),
-                //Some(')') => Token::RBRACK,
+                Some(')') => Some(Token::RBRACK),
-                //Some(' ') => continue,
+                Some(' ') => None,
-                //Some('\n') => continue,
+                Some('\n') => None,
                //None => break,
                //Some(_) => continue,
            //};
            tokens.push(match self.current {
                Some('+') => Token::ADD,
                Some('-') => Token::SUBTRACT,
                Some('*') => Token::MULTIPLY,
                Some('/') => Token::DIVIDE,
                Some('(') => Token::LBRACK,
                Some(')') => Token::RBRACK,
                Some(' ') => continue,
                Some('\n') => continue,
                None => break,
-                Some(_) => continue,
+                Some(_) => None,
-            });
+            };
-            self.next();
+
            match token {
                Some(token) => {
                    tokens.push(token);
                    self.next();
                },
                None => self.next(),
            }
        }
        println!("{:?}", tokens);
--- a/src/parsing/preprocessor.rs
+++ b/src/parsing/preprocessor.rs
@ -1,14 +1,14 @@
 use lazy_static::lazy_static;
 use fancy_regex::Regex;
-pub fn preprocess(raw_code: String) {
+pub fn preprocess(raw_code: String) -> String {
    lazy_static! {
        static ref COMMENT_RE: Regex = Regex::new(r"(\]\].*?\[\[|&&.*\n|;.*\n|``(.|\n)*?´´)").unwrap();
    }
    let code = COMMENT_RE.replace_all(&raw_code, "");
    let code = remove_empty_lines(code.to_string());
-    println!("{}", &code)
+    code
 }
 pub fn remove_empty_lines(text: String) -> String {
--- a/src/parsing/token.rs
+++ b/src/parsing/token.rs
@ -1,3 +1,4 @@
 #[derive(Debug)]
 pub enum Token {
    INT,
    FLOAT,
@ -9,5 +10,3 @@ pub enum Token {
    RBRACK,
 }
 impl Token {}
--- a/src/shell.rs
+++ b/src/shell.rs
@ -1,12 +1,16 @@
 use std::io;
 use super::parsing::lexer::Lexer;
 pub fn run_shell() {
    'shell: loop {
        let string = get_line();
        if string.eq("q\n") || string.eq("quit\n") {
            break 'shell;
        }
-        println!("{}", string);
+        let mut lexer = Lexer::new(string);
        lexer.tokenize();
        //println!("{}", string);
    }
 }