start with bare basics

This commit is contained in:
Schrottkatze 2023-11-15 10:55:14 +01:00
commit b71b7f309b
10 changed files with 712 additions and 0 deletions

58
src/lexer.rs Normal file
View file

@ -0,0 +1,58 @@
use logos::Logos;
#[derive(Logos, Debug, PartialEq)]
#[logos(skip r"[\s]+")]
pub enum Token<'a> {
#[regex("[\\w]+", |lex| lex.slice())]
Word(&'a str),
#[regex("[\\d]+", priority = 2, callback = |lex| lex.slice().parse::<i64>().unwrap())]
IntLiteral(i64),
#[regex("[\\d]+.[\\d]+", |lex| lex.slice().parse::<f64>().unwrap())]
FloatLiteral(f64),
#[regex(r#""([^"\\]|\\["\\bnfrt]|u[a-fA-F0-9]{4})*""#, |lex| lex.slice().to_owned())]
StringLiteral(String),
#[token("def")]
Define,
#[token("type")]
Type,
#[token("->")]
RightArrow,
#[token("|")]
Pipe,
#[token("[")]
BracketOpening,
#[token("]")]
BracketClosing,
#[token("(")]
ParensOpening,
#[token(")")]
ParensClosing,
#[token("{")]
BraceOpening,
#[token("}")]
BraceClosing,
#[token("+")]
Plus,
#[token("-")]
Minus,
#[token("*")]
Multiply,
#[token("/")]
Divide,
#[token("%")]
Percent,
#[token("&")]
Ampersand,
#[token(":")]
Colon,
#[token(";")]
Semicolon,
#[token(".")]
Dot,
#[token(",")]
Comma,
#[token("!")]
ExclaimationMark,
#[token("?")]
QuestionMark,
}

16
src/main.rs Normal file
View file

@ -0,0 +1,16 @@
use lexer::Token;
use logos::Lexer;
use logos::Logos;
use syntax::parse;
use utils::ws;
use winnow::prelude::*;
use winnow::Parser;
mod lexer;
mod syntax;
mod utils;
fn main() {
let input = "load \"./image.png\" | invert | save \"./image_processed.jpg\"";
dbg!(parse(input));
}

98
src/syntax.rs Normal file
View file

@ -0,0 +1,98 @@
use std::mem;
use logos::Logos;
use logos::Span;
use crate::lexer::Token;
#[derive(Debug, Clone, PartialEq)]
pub struct PipelineElement {
kind: PipelineElementKind,
span: Span,
}
#[derive(Debug, Clone, PartialEq)]
pub enum PipelineElementKind {
Pipe,
Command(Vec<CommandPart>),
}
#[derive(Debug, Clone, PartialEq)]
pub struct CommandPart {
kind: CommandPartKind,
span: Span,
}
#[derive(Debug, Clone, PartialEq)]
pub enum CommandPartKind {
Word(String),
Integer(i64),
Float(f64),
String(String),
}
pub fn parse(input: &str) -> Vec<PipelineElement> {
let lexer = Token::lexer(input);
let mut r = Vec::new();
let mut partial_command: Vec<CommandPart> = Vec::new();
for (tok, span) in lexer.spanned().into_iter() {
if let Ok(tok) = tok {
match tok {
Token::Pipe => {
if !partial_command.is_empty() {
let span = partial_command.first().unwrap().span.start
..partial_command.last().unwrap().span.end;
r.push(PipelineElement {
kind: PipelineElementKind::Command(mem::replace(
&mut partial_command,
Vec::new(),
)),
span,
});
}
r.push(PipelineElement {
kind: PipelineElementKind::Pipe,
span,
});
}
Token::Word(word) => partial_command.push(CommandPart {
kind: CommandPartKind::Word(word.to_owned()),
span,
}),
Token::IntLiteral(int) => partial_command.push(CommandPart {
kind: CommandPartKind::Integer(int),
span,
}),
Token::FloatLiteral(float) => partial_command.push(CommandPart {
kind: CommandPartKind::Float(float),
span,
}),
Token::StringLiteral(string) => partial_command.push(CommandPart {
kind: CommandPartKind::String(string),
span,
}),
_ => {}
}
} else {
}
}
if !partial_command.is_empty() {
let span =
partial_command.first().unwrap().span.start..partial_command.last().unwrap().span.end;
r.push(PipelineElement {
kind: PipelineElementKind::Command(mem::replace(&mut partial_command, Vec::new())),
span,
});
}
r
}
#[test]
fn test_simple_parse_pipeline() {
let test_pipeline = "load ./image.png | invert | save ./image_processed.jpg";
parse(test_pipeline);
}

13
src/utils.rs Normal file
View file

@ -0,0 +1,13 @@
use winnow::ascii::space0;
// from https://docs.rs/winnow/latest/winnow/_topic/language/index.html#whitespace
use winnow::prelude::*;
use winnow::{ascii::multispace0, combinator::delimited, error::ParserError};
/// A combinator that takes a parser `inner` and produces a parser that also consumes both leading and
/// trailing whitespace, returning the output of `inner`.
pub fn ws<'a, F, O, E: ParserError<&'a str>>(inner: F) -> impl Parser<&'a str, O, E>
where
F: Parser<&'a str, O, E>,
{
delimited(space0, inner, space0)
}