diff --git a/crates/lexer/src/lib.rs b/crates/lexer/src/lib.rs index b422eb9..4ca2263 100644 --- a/crates/lexer/src/lib.rs +++ b/crates/lexer/src/lib.rs @@ -445,120 +445,80 @@ impl LexemeParser { } } -// but what if...? prefix-tree! -mod trie { - use super::Token; - use std::collections::BTreeMap; +use trie::{OnceAndIter, Tree}; - #[derive(Debug, Default)] - struct Node { - keys: Vec, - values: Vec>, - edges: Vec>>, - } +pub struct LexemeIterator> { + trie: Tree, + iter: OnceAndIter, +} - #[derive(Debug)] - struct Tree { - root: Option>, - } +impl> LexemeIterator { + pub fn new(iter: I) -> Self { + let mut trie = Tree::new(); - fn build_tree() -> Tree { - let lexemes = Token::lexemes() - .iter() - .map(|(tok, _)| tok.clone()) - .collect::>(); - - let mut tree = Tree { - root: Some(Box::new(Node::default())), - }; - - for tok in lexemes { - let lexeme = tok.lexeme().unwrap(); - let mut current = &mut Box::new(Node::default()); - let mut next = &mut tree.root; - let mut p = 0; - for c in lexeme.chars() { - current = next.get_or_insert(Box::new(Node::default())); - p = match current.keys.binary_search(&c) { - Ok(p) => p, - Err(p) => { - current.keys.insert(p, c); - current.values.insert(p, None); - current.edges.insert(p, None); - p - } - }; - - next = current.edges.get_mut(p).unwrap(); - } - current.values[p] = Some(tok); + for (token, token_str) in Token::lexemes() { + trie.insert(token_str.chars(), *token); } - tree + Self { + trie, + iter: iter.into(), + } } - fn search_tree(tree: &Tree, mut tokens: impl Iterator) -> Option { - let mut current = tree.root.as_ref().unwrap(); - let mut p = None; + fn parse(&mut self) -> Option { + match self.trie.get_closest(&mut self.iter) { + Some((Some(key), token)) => { + // skip the peeked item + self.iter.set_once(key); + Some(*token) + } + Some((None, token)) => Some(*token), + None => None, + } + } + fn skip_whitespaces(&mut self) { loop { - let Some(ch) = tokens.next() else { - break; - }; + let Some(c) = self.iter.next() else { break }; - if crate::is_things::is_whitespace(ch) { + if is_things::is_whitespace(c) { + continue; + } else { + self.iter.set_once(c); break; } - - let n = match current.keys.binary_search(&ch) { - Ok(p) => p, - Err(p) => { - eprintln!("No match for {ch} in {:?} (p={p})", current.keys); - return None; - } - }; - - current = match current.edges.get(n) { - Some(Some(node)) => node, - _ => { - p = Some(n); - break; - } - }; - } - - eprintln!("current: {:?}", current); - current.values.get(p?).copied().flatten() - } - - #[cfg(test)] - mod tree_tests { - use super::*; - - #[test] - fn test_tree() { - use werkzeug::iter::AdvanceWhile; - let tree = build_tree(); - eprintln!("Tree: {tree:?}"); - let mut tokens = "fn let void+++(++bool)".chars(); - assert_eq!(search_tree(&tree, &mut tokens), Some(Token::Fn)); - tokens.advance_while(|&c| crate::is_things::is_whitespace(c)); - assert_eq!(search_tree(&tree, &mut tokens), Some(Token::Let)); - tokens.advance_while(|&c| crate::is_things::is_whitespace(c)); - assert_eq!(search_tree(&tree, &mut tokens), Some(Token::Void)); - tokens.advance_while(|&c| crate::is_things::is_whitespace(c)); - assert_eq!(search_tree(&tree, &mut tokens), Some(Token::PlusPlus)); - tokens.advance_while(|&c| crate::is_things::is_whitespace(c)); - assert_eq!(search_tree(&tree, &mut tokens), Some(Token::Plus)); - tokens.advance_while(|&c| crate::is_things::is_whitespace(c)); - assert_eq!(search_tree(&tree, &mut tokens), Some(Token::OpenParens)); - tokens.advance_while(|&c| crate::is_things::is_whitespace(c)); - assert_eq!(search_tree(&tree, &mut tokens), Some(Token::PlusPlus)); - tokens.advance_while(|&c| crate::is_things::is_whitespace(c)); - assert_eq!(search_tree(&tree, &mut tokens), Some(Token::Bool)); - tokens.advance_while(|&c| crate::is_things::is_whitespace(c)); - assert_eq!(search_tree(&tree, &mut tokens), Some(Token::CloseParens)); - assert_eq!(search_tree(&tree, &mut tokens), None); } } } + +impl> Iterator for LexemeIterator { + type Item = Token; + + fn next(&mut self) -> Option { + // skip whitespace + self.skip_whitespaces(); + + self.parse() + } +} +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_iterator() { + let mut tokens = "fn let void+++(++bool)".chars(); + let mut lexer = LexemeIterator::new(&mut tokens); + assert_eq!(lexer.next(), Some(Token::Fn)); + assert_eq!(lexer.next(), Some(Token::Let)); + assert_eq!(lexer.next(), Some(Token::Void)); + assert_eq!(lexer.next(), Some(Token::PlusPlus)); + assert_eq!(lexer.next(), Some(Token::Plus)); + assert_eq!(lexer.next(), Some(Token::OpenParens)); + assert_eq!(lexer.next(), Some(Token::PlusPlus)); + assert_eq!(lexer.next(), Some(Token::Bool)); + assert_eq!(lexer.next(), Some(Token::CloseParens)); + assert_eq!(lexer.next(), None); + } +}