basic tokenization WORKSSSSSSSS BABYYYY
This commit is contained in:
parent
d5da7b0f26
commit
500eb972bd
5 changed files with 36 additions and 38 deletions
|
@ -1,8 +1,8 @@
|
||||||
use std::env;
|
use std::env;
|
||||||
use std::fs;
|
use std::fs;
|
||||||
|
|
||||||
mod shell;
|
pub mod shell;
|
||||||
mod parsing;
|
pub mod parsing;
|
||||||
|
|
||||||
use parsing::preprocessor::preprocess;
|
use parsing::preprocessor::preprocess;
|
||||||
|
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
use preprocessor::preprocess;
|
use super::preprocessor::preprocess;
|
||||||
|
use super::token::Token;
|
||||||
|
|
||||||
struct Lexer {
|
pub struct Lexer {
|
||||||
code: String,
|
code: String,
|
||||||
pos: usize,
|
pos: usize,
|
||||||
current: Option<char>,
|
current: Option<char>,
|
||||||
|
@ -9,9 +10,9 @@ struct Lexer {
|
||||||
impl Lexer {
|
impl Lexer {
|
||||||
pub fn new(code: String) -> Lexer {
|
pub fn new(code: String) -> Lexer {
|
||||||
Lexer {
|
Lexer {
|
||||||
code,
|
code: code.clone(),
|
||||||
pos: 1,
|
pos: 0,
|
||||||
current: None,
|
current: code.chars().nth(0),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -26,35 +27,29 @@ impl Lexer {
|
||||||
|
|
||||||
pub fn tokenize(&mut self) {
|
pub fn tokenize(&mut self) {
|
||||||
let mut tokens: Vec<Token> = vec![];
|
let mut tokens: Vec<Token> = vec![];
|
||||||
self.code = preprocess(self.code);
|
self.code = preprocess(self.code.clone());
|
||||||
|
|
||||||
loop {
|
loop {
|
||||||
//let token = match self.current {
|
let token: Option<Token> = match self.current {
|
||||||
//Some('+') => Token::ADD,
|
Some('+') => Some(Token::ADD),
|
||||||
//Some('-') => Token::SUBTRACT,
|
Some('-') => Some(Token::SUBTRACT),
|
||||||
//Some('*') => Token::MULTIPLY,
|
Some('*') => Some(Token::MULTIPLY),
|
||||||
//Some('/') => Token::DIVIDE,
|
Some('/') => Some(Token::DIVIDE),
|
||||||
//Some('(') => Token::LBRACK,
|
Some('(') => Some(Token::LBRACK),
|
||||||
//Some(')') => Token::RBRACK,
|
Some(')') => Some(Token::RBRACK),
|
||||||
//Some(' ') => continue,
|
Some(' ') => None,
|
||||||
//Some('\n') => continue,
|
Some('\n') => None,
|
||||||
//None => break,
|
|
||||||
//Some(_) => continue,
|
|
||||||
//};
|
|
||||||
|
|
||||||
tokens.push(match self.current {
|
|
||||||
Some('+') => Token::ADD,
|
|
||||||
Some('-') => Token::SUBTRACT,
|
|
||||||
Some('*') => Token::MULTIPLY,
|
|
||||||
Some('/') => Token::DIVIDE,
|
|
||||||
Some('(') => Token::LBRACK,
|
|
||||||
Some(')') => Token::RBRACK,
|
|
||||||
Some(' ') => continue,
|
|
||||||
Some('\n') => continue,
|
|
||||||
None => break,
|
None => break,
|
||||||
Some(_) => continue,
|
Some(_) => None,
|
||||||
});
|
};
|
||||||
self.next();
|
|
||||||
|
match token {
|
||||||
|
Some(token) => {
|
||||||
|
tokens.push(token);
|
||||||
|
self.next();
|
||||||
|
},
|
||||||
|
None => self.next(),
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
println!("{:?}", tokens);
|
println!("{:?}", tokens);
|
||||||
|
|
|
@ -1,14 +1,14 @@
|
||||||
use lazy_static::lazy_static;
|
use lazy_static::lazy_static;
|
||||||
use fancy_regex::Regex;
|
use fancy_regex::Regex;
|
||||||
|
|
||||||
pub fn preprocess(raw_code: String) {
|
pub fn preprocess(raw_code: String) -> String {
|
||||||
lazy_static! {
|
lazy_static! {
|
||||||
static ref COMMENT_RE: Regex = Regex::new(r"(\]\].*?\[\[|&&.*\n|;.*\n|``(.|\n)*?´´)").unwrap();
|
static ref COMMENT_RE: Regex = Regex::new(r"(\]\].*?\[\[|&&.*\n|;.*\n|``(.|\n)*?´´)").unwrap();
|
||||||
}
|
}
|
||||||
|
|
||||||
let code = COMMENT_RE.replace_all(&raw_code, "");
|
let code = COMMENT_RE.replace_all(&raw_code, "");
|
||||||
let code = remove_empty_lines(code.to_string());
|
let code = remove_empty_lines(code.to_string());
|
||||||
println!("{}", &code)
|
code
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn remove_empty_lines(text: String) -> String {
|
pub fn remove_empty_lines(text: String) -> String {
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
#[derive(Debug)]
|
||||||
pub enum Token {
|
pub enum Token {
|
||||||
INT,
|
INT,
|
||||||
FLOAT,
|
FLOAT,
|
||||||
|
@ -9,5 +10,3 @@ pub enum Token {
|
||||||
RBRACK,
|
RBRACK,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Token {}
|
|
||||||
|
|
||||||
|
|
|
@ -1,12 +1,16 @@
|
||||||
use std::io;
|
use std::io;
|
||||||
|
|
||||||
|
use super::parsing::lexer::Lexer;
|
||||||
|
|
||||||
pub fn run_shell() {
|
pub fn run_shell() {
|
||||||
'shell: loop {
|
'shell: loop {
|
||||||
let string = get_line();
|
let string = get_line();
|
||||||
if string.eq("q\n") || string.eq("quit\n") {
|
if string.eq("q\n") || string.eq("quit\n") {
|
||||||
break 'shell;
|
break 'shell;
|
||||||
}
|
}
|
||||||
println!("{}", string);
|
let mut lexer = Lexer::new(string);
|
||||||
|
lexer.tokenize();
|
||||||
|
//println!("{}", string);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue