tree lexer

This commit is contained in:
Janis 2025-08-07 03:21:27 +02:00
parent bc0acf7e19
commit 3bbf9d67ec

View file

@ -89,7 +89,7 @@ macro_rules! tokens {
}) => {
#[allow(dead_code)]
#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)]
#[derive(Debug, Clone, Copy, Eq, PartialEq, Ord, PartialOrd, Hash)]
$vis enum $ty_name {
$($name,
)*
@ -442,3 +442,99 @@ impl LexemeParser {
self.lexemes.candidates().last().copied()
}
}
// but what if...? tree!
mod tree {
use super::Token;
#[derive(Debug, Default)]
struct Node {
keys: Vec<char>,
values: Vec<Option<Token>>,
edges: Vec<Option<Box<Node>>>,
}
#[derive(Debug)]
struct Tree {
root: Option<Box<Node>>,
}
fn build_tree() -> Tree {
let lexemes = Token::lexemes()
.iter()
.map(|(tok, _)| tok.clone())
.collect::<Box<_>>();
let mut tree = Tree {
root: Some(Box::new(Node::default())),
};
for tok in lexemes {
let lexeme = tok.lexeme().unwrap();
let mut current = &mut Box::new(Node::default());
let mut next = &mut tree.root;
let mut p = 0;
for c in lexeme.chars() {
current = next.get_or_insert(Box::new(Node::default()));
p = match current.keys.binary_search(&c) {
Ok(p) => p,
Err(p) => {
current.keys.insert(p, c);
current.values.insert(p, None);
current.edges.insert(p, None);
p
}
};
next = current.edges.get_mut(p).unwrap();
}
current.values[p] = Some(tok);
}
tree
}
fn search_tree(tree: &Tree, mut tokens: impl Iterator<Item = char>) -> Option<Token> {
let mut current = tree.root.as_ref().unwrap();
let mut p = 0;
loop {
let Some(ch) = tokens.next() else {
break;
};
if crate::is_things::is_whitespace(ch) {
break;
}
p = match current.keys.binary_search(&ch) {
Ok(p) => p,
Err(p) => {
eprintln!("No match for {ch} in {:?} (p={p})", current.keys);
return None;
}
};
current = match current.edges.get(p) {
Some(Some(node)) => node,
_ => break,
};
}
eprintln!("current: {:?}", current);
current.values.get(p).copied().flatten()
}
#[cfg(test)]
mod tree_tests {
use super::*;
#[test]
fn test_tree() {
let tree = build_tree();
eprintln!("Tree: {tree:?}");
let tokens = "fn let void";
assert_eq!(search_tree(&tree, tokens.chars()), Some(Token::Fn));
}
}
}