iowo/crates/lang/src/lst_parser/input.rs

71 lines
2 KiB
Rust
Raw Normal View History

2024-05-04 22:35:18 +02:00
use enumset::enum_set;
use crate::lst_parser::syntax_kind::SyntaxKind;
2024-04-24 11:07:38 +02:00
2024-05-04 22:35:18 +02:00
use super::syntax_kind::TokenSet;
2024-04-24 11:07:38 +02:00
pub struct Input<'src, 'toks> {
raw: &'toks Vec<(SyntaxKind, &'src str)>,
/// indices of the "meaningful" tokens (not whitespace etc)
/// includes newlines because those might indeed help with finding errors
meaningful: Vec<usize>,
/// indices of newlines for the purpose of easily querying them
/// can be helpful with missing commas etc
newlines: Vec<usize>,
}
2024-05-04 22:35:18 +02:00
pub const MEANINGLESS_TOKS: TokenSet = enum_set!(SyntaxKind::WHITESPACE | SyntaxKind::NEWLINE);
2024-04-24 11:07:38 +02:00
impl<'src, 'toks> Input<'src, 'toks> {
pub fn new(raw_toks: &'toks Vec<(SyntaxKind, &'src str)>) -> Self {
let meaningful = raw_toks
.iter()
.enumerate()
2024-05-04 22:35:18 +02:00
.filter_map(|(i, tok)| {
if MEANINGLESS_TOKS.contains(tok.0) {
None
} else {
Some(i)
}
2024-04-24 11:07:38 +02:00
})
.collect();
let newlines = raw_toks
.iter()
.enumerate()
.filter_map(|(i, tok)| match tok.0 {
SyntaxKind::NEWLINE => Some(i),
_ => None,
})
.collect();
Self {
raw: raw_toks,
meaningful,
newlines,
}
}
#[allow(clippy::unwrap_used, reason = "meaningful indices cannot be invalid")]
pub(crate) fn kind(&self, idx: usize) -> SyntaxKind {
let Some(meaningful_idx) = self.meaningful.get(idx) else {
return SyntaxKind::EOF;
};
self.raw.get(*meaningful_idx).unwrap().0
}
pub(crate) fn preceding_meaningless(&self, idx: usize) -> usize {
assert!(self.meaningful.len() > idx);
if idx == 0 {
1
} else {
self.meaningful[idx] - self.meaningful[idx - 1]
}
}
pub(crate) fn meaningless_tail_len(&self) -> usize {
self.raw.len() - (self.meaningful.last().unwrap() + 1)
}
}