basic tokenization WORKSSSSSSSS BABYYYY

2021-12-20 14:34:21 +01:00 · 2021-12-20 14:34:21 +01:00 · 500eb972bd
commit 500eb972bd
parent d5da7b0f26
5 changed files with 36 additions and 38 deletions
--- a/src/main.rs
+++ b/src/main.rs
@ -1,8 +1,8 @@
 use std::env;
 use std::fs;

-mod shell;
-mod parsing;
+pub mod shell;
+pub mod parsing;

 use parsing::preprocessor::preprocess;

--- a/src/parsing/lexer.rs
+++ b/src/parsing/lexer.rs
@ -1,6 +1,7 @@
-use preprocessor::preprocess;
+use super::preprocessor::preprocess;
+use super::token::Token;

-struct Lexer {
+pub struct Lexer {
    code: String,
    pos: usize,
    current: Option<char>,
@ -9,9 +10,9 @@ struct Lexer {
 impl Lexer {
    pub fn new(code: String) -> Lexer {
        Lexer {
-            code,
-            pos: 1,
-            current: None,
+            code: code.clone(),
+            pos: 0,
+            current: code.chars().nth(0),
        }
    }

@ -26,35 +27,29 @@ impl Lexer {

    pub fn tokenize(&mut self) {
        let mut tokens: Vec<Token> = vec![];
-        self.code = preprocess(self.code);
+        self.code = preprocess(self.code.clone());
        
        loop {
-            //let token = match self.current {
-                //Some('+') => Token::ADD,
-                //Some('-') => Token::SUBTRACT,
-                //Some('*') => Token::MULTIPLY,
-                //Some('/') => Token::DIVIDE,
-                //Some('(') => Token::LBRACK,
-                //Some(')') => Token::RBRACK,
-                //Some(' ') => continue,
-                //Some('\n') => continue,
-                //None => break,
-                //Some(_) => continue,
-            //};
-
-            tokens.push(match self.current {
-                Some('+') => Token::ADD,
-                Some('-') => Token::SUBTRACT,
-                Some('*') => Token::MULTIPLY,
-                Some('/') => Token::DIVIDE,
-                Some('(') => Token::LBRACK,
-                Some(')') => Token::RBRACK,
-                Some(' ') => continue,
-                Some('\n') => continue,
+            let token: Option<Token> = match self.current {
+                Some('+') => Some(Token::ADD),
+                Some('-') => Some(Token::SUBTRACT),
+                Some('*') => Some(Token::MULTIPLY),
+                Some('/') => Some(Token::DIVIDE),
+                Some('(') => Some(Token::LBRACK),
+                Some(')') => Some(Token::RBRACK),
+                Some(' ') => None,
+                Some('\n') => None,
                None => break,
-                Some(_) => continue,
-            });
-            self.next();
+                Some(_) => None,
+            };
+
+            match token {
+                Some(token) => {
+                    tokens.push(token);
+                    self.next();
+                },
+                None => self.next(),
+            }
        }

        println!("{:?}", tokens);
--- a/src/parsing/preprocessor.rs
+++ b/src/parsing/preprocessor.rs
@ -1,14 +1,14 @@
 use lazy_static::lazy_static;
 use fancy_regex::Regex;

-pub fn preprocess(raw_code: String) {
+pub fn preprocess(raw_code: String) -> String {
    lazy_static! {
        static ref COMMENT_RE: Regex = Regex::new(r"(\]\].*?\[\[|&&.*\n|;.*\n|``(.|\n)*?´´)").unwrap();
    }

    let code = COMMENT_RE.replace_all(&raw_code, "");
    let code = remove_empty_lines(code.to_string());
-    println!("{}", &code)
+    code
 }

 pub fn remove_empty_lines(text: String) -> String {
--- a/src/parsing/token.rs
+++ b/src/parsing/token.rs
@ -1,3 +1,4 @@
+#[derive(Debug)]
 pub enum Token {
    INT,
    FLOAT,
@ -9,5 +10,3 @@ pub enum Token {
    RBRACK,
 }

-impl Token {}
-
--- a/src/shell.rs
+++ b/src/shell.rs
@ -1,12 +1,16 @@
 use std::io;

+use super::parsing::lexer::Lexer;
+
 pub fn run_shell() {
    'shell: loop {
        let string = get_line();
        if string.eq("q\n") || string.eq("quit\n") {
            break 'shell;
        }
-        println!("{}", string);
+        let mut lexer = Lexer::new(string);
+        lexer.tokenize();
+        //println!("{}", string);
    }
 }