iowo/crates/lang/src/lst_parser/input.rs

use enumset::enum_set;

use crate::lst_parser::syntax_kind::SyntaxKind;

use super::syntax_kind::TokenSet;

pub struct Input<'src, 'toks> {
    raw: &'toks Vec<(SyntaxKind, &'src str)>,
    /// indices of the "meaningful" tokens (not whitespace etc)
    /// includes newlines because those might indeed help with finding errors
    meaningful: Vec<usize>,
    /// indices of newlines for the purpose of easily querying them
    /// can be helpful with missing commas etc
    newlines: Vec<usize>,
}

pub const MEANINGLESS_TOKS: TokenSet = enum_set!(SyntaxKind::WHITESPACE | SyntaxKind::NEWLINE);

impl<'src, 'toks> Input<'src, 'toks> {
    pub fn new(raw_toks: &'toks Vec<(SyntaxKind, &'src str)>) -> Self {
        let meaningful = raw_toks
            .iter()
            .enumerate()
            .filter_map(|(i, tok)| {
                if MEANINGLESS_TOKS.contains(tok.0) {
                    None
                } else {
                    Some(i)
                }
            })
            .collect();
        let newlines = raw_toks
            .iter()
            .enumerate()
            .filter_map(|(i, tok)| match tok.0 {
                SyntaxKind::NEWLINE => Some(i),
                _ => None,
            })
            .collect();

        Self {
            raw: raw_toks,
            meaningful,
            newlines,
        }
    }

    #[allow(clippy::unwrap_used, reason = "meaningful indices cannot be invalid")]
    pub(crate) fn kind(&self, idx: usize) -> SyntaxKind {
        let Some(meaningful_idx) = self.meaningful.get(idx) else {
            return SyntaxKind::EOF;
        };

        self.raw.get(*meaningful_idx).unwrap().0
    }

    pub(crate) fn preceding_meaningless(&self, idx: usize) -> usize {
        assert!(self.meaningful.len() > idx);

        if idx == 0 {
            1
        } else {
            self.meaningful[idx] - self.meaningful[idx - 1]
        }
    }

    pub(crate) fn meaningless_tail_len(&self) -> usize {
        self.raw.len() - (self.meaningful.last().unwrap() + 1)
    }
}