From 500eb972bdeaab9a15a31536b5f3593bee2e38c7 Mon Sep 17 00:00:00 2001
From: Gabriel <68819302+obsidianical@users.noreply.github.com>
Date: Mon, 20 Dec 2021 14:34:21 +0100
Subject: [PATCH] basic tokenization WORKSSSSSSSS BABYYYY

---
 src/main.rs                 |  4 +--
 src/parsing/lexer.rs        | 57 +++++++++++++++++--------------------
 src/parsing/preprocessor.rs |  4 +--
 src/parsing/token.rs        |  3 +-
 src/shell.rs                |  6 +++-
 5 files changed, 36 insertions(+), 38 deletions(-)
diff --git a/src/main.rs b/src/main.rs
index 8731b4e..46bb143 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -1,8 +1,8 @@
 use std::env;
 use std::fs;
 
-mod shell;
-mod parsing;
+pub mod shell;
+pub mod parsing;
 
 use parsing::preprocessor::preprocess;
 
diff --git a/src/parsing/lexer.rs b/src/parsing/lexer.rs
index 260b4ba..78ae7da 100644
--- a/src/parsing/lexer.rs
+++ b/src/parsing/lexer.rs
@@ -1,6 +1,7 @@
-use preprocessor::preprocess;
+use super::preprocessor::preprocess;
+use super::token::Token;
 
-struct Lexer {
+pub struct Lexer {
     code: String,
     pos: usize,
     current: Option<char>,
@@ -9,9 +10,9 @@ struct Lexer {
 impl Lexer {
     pub fn new(code: String) -> Lexer {
         Lexer {
-            code,
-            pos: 1,
-            current: None,
+            code: code.clone(),
+            pos: 0,
+            current: code.chars().nth(0),
         }
     }
 
@@ -26,35 +27,29 @@ impl Lexer {
 
     pub fn tokenize(&mut self) {
         let mut tokens: Vec<Token> = vec![];
-        self.code = preprocess(self.code);
+        self.code = preprocess(self.code.clone());
         
         loop {
-            //let token = match self.current {
-                //Some('+') => Token::ADD,
-                //Some('-') => Token::SUBTRACT,
-                //Some('*') => Token::MULTIPLY,
-                //Some('/') => Token::DIVIDE,
-                //Some('(') => Token::LBRACK,
-                //Some(')') => Token::RBRACK,
-                //Some(' ') => continue,
-                //Some('\n') => continue,
-                //None => break,
-                //Some(_) => continue,
-            //};
-
-            tokens.push(match self.current {
-                Some('+') => Token::ADD,
-                Some('-') => Token::SUBTRACT,
-                Some('*') => Token::MULTIPLY,
-                Some('/') => Token::DIVIDE,
-                Some('(') => Token::LBRACK,
-                Some(')') => Token::RBRACK,
-                Some(' ') => continue,
-                Some('\n') => continue,
+            let token: Option<Token> = match self.current {
+                Some('+') => Some(Token::ADD),
+                Some('-') => Some(Token::SUBTRACT),
+                Some('*') => Some(Token::MULTIPLY),
+                Some('/') => Some(Token::DIVIDE),
+                Some('(') => Some(Token::LBRACK),
+                Some(')') => Some(Token::RBRACK),
+                Some(' ') => None,
+                Some('\n') => None,
                 None => break,
-                Some(_) => continue,
-            });
-            self.next();
+                Some(_) => None,
+            };
+
+            match token {
+                Some(token) => {
+                    tokens.push(token);
+                    self.next();
+                },
+                None => self.next(),
+            }
         }
 
         println!("{:?}", tokens);
diff --git a/src/parsing/preprocessor.rs b/src/parsing/preprocessor.rs
index 4f57c18..0241f57 100644
--- a/src/parsing/preprocessor.rs
+++ b/src/parsing/preprocessor.rs
@@ -1,14 +1,14 @@
 use lazy_static::lazy_static;
 use fancy_regex::Regex;
 
-pub fn preprocess(raw_code: String) {
+pub fn preprocess(raw_code: String) -> String {
     lazy_static! {
         static ref COMMENT_RE: Regex = Regex::new(r"(\]\].*?\[\[|&&.*\n|;.*\n|``(.|\n)*?´´)").unwrap();
     }
 
     let code = COMMENT_RE.replace_all(&raw_code, "");
     let code = remove_empty_lines(code.to_string());
-    println!("{}", &code)
+    code
 }
 
 pub fn remove_empty_lines(text: String) -> String {
diff --git a/src/parsing/token.rs b/src/parsing/token.rs
index cd6f344..9622421 100644
--- a/src/parsing/token.rs
+++ b/src/parsing/token.rs
@@ -1,3 +1,4 @@
+#[derive(Debug)]
 pub enum Token {
     INT,
     FLOAT,
@@ -9,5 +10,3 @@ pub enum Token {
     RBRACK,
 }
 
-impl Token {}
-
diff --git a/src/shell.rs b/src/shell.rs
index f918835..e2d566f 100644
--- a/src/shell.rs
+++ b/src/shell.rs
@@ -1,12 +1,16 @@
 use std::io;
 
+use super::parsing::lexer::Lexer;
+
 pub fn run_shell() {
     'shell: loop {
         let string = get_line();
         if string.eq("q\n") || string.eq("quit\n") {
             break 'shell;
         }
-        println!("{}", string);
+        let mut lexer = Lexer::new(string);
+        lexer.tokenize();
+        //println!("{}", string);
     }
 }