From 3bbf9d67eca98656bb04e4d46e022e34e9712ae8 Mon Sep 17 00:00:00 2001 From: Janis Date: Thu, 7 Aug 2025 03:21:27 +0200 Subject: [PATCH] tree lexer --- crates/lexer/src/lib.rs | 98 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 97 insertions(+), 1 deletion(-) diff --git a/crates/lexer/src/lib.rs b/crates/lexer/src/lib.rs index 8b53c6c..0ab28c5 100644 --- a/crates/lexer/src/lib.rs +++ b/crates/lexer/src/lib.rs @@ -89,7 +89,7 @@ macro_rules! tokens { }) => { #[allow(dead_code)] - #[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)] + #[derive(Debug, Clone, Copy, Eq, PartialEq, Ord, PartialOrd, Hash)] $vis enum $ty_name { $($name, )* @@ -442,3 +442,99 @@ impl LexemeParser { self.lexemes.candidates().last().copied() } } + +// but what if...? tree! +mod tree { + use super::Token; + + #[derive(Debug, Default)] + struct Node { + keys: Vec, + values: Vec>, + edges: Vec>>, + } + + #[derive(Debug)] + struct Tree { + root: Option>, + } + + fn build_tree() -> Tree { + let lexemes = Token::lexemes() + .iter() + .map(|(tok, _)| tok.clone()) + .collect::>(); + + let mut tree = Tree { + root: Some(Box::new(Node::default())), + }; + + for tok in lexemes { + let lexeme = tok.lexeme().unwrap(); + let mut current = &mut Box::new(Node::default()); + let mut next = &mut tree.root; + let mut p = 0; + for c in lexeme.chars() { + current = next.get_or_insert(Box::new(Node::default())); + p = match current.keys.binary_search(&c) { + Ok(p) => p, + Err(p) => { + current.keys.insert(p, c); + current.values.insert(p, None); + current.edges.insert(p, None); + p + } + }; + + next = current.edges.get_mut(p).unwrap(); + } + current.values[p] = Some(tok); + } + + tree + } + + fn search_tree(tree: &Tree, mut tokens: impl Iterator) -> Option { + let mut current = tree.root.as_ref().unwrap(); + let mut p = 0; + + loop { + let Some(ch) = tokens.next() else { + break; + }; + + if crate::is_things::is_whitespace(ch) { + break; + } + + p = match current.keys.binary_search(&ch) { + Ok(p) => p, + Err(p) => { + eprintln!("No match for {ch} in {:?} (p={p})", current.keys); + return None; + } + }; + + current = match current.edges.get(p) { + Some(Some(node)) => node, + _ => break, + }; + } + + eprintln!("current: {:?}", current); + current.values.get(p).copied().flatten() + } + + #[cfg(test)] + mod tree_tests { + use super::*; + + #[test] + fn test_tree() { + let tree = build_tree(); + eprintln!("Tree: {tree:?}"); + let tokens = "fn let void"; + assert_eq!(search_tree(&tree, tokens.chars()), Some(Token::Fn)); + } + } +}