more refactoring

This commit is contained in:
janis 2025-09-27 18:20:52 +02:00
parent 6e0fed0962
commit 2790bc561f
Signed by: janis
SSH key fingerprint: SHA256:bB1qbbqmDXZNT0KKD5c2Dfjg53JGhj7B3CFcLIzSqq8

View file

@ -297,21 +297,29 @@ impl Token {
}
}
use std::ops::Range;
use trie::Tree;
pub struct TokenItem<'a> {
pub token: Token,
pub lexeme: &'a str,
pub offset: u32,
}
#[derive(Debug, Clone, Copy)]
struct CountingIterator<I: Iterator> {
struct CharCountingIterator<I: Iterator> {
iter: I,
count: usize,
}
impl<I: Iterator> From<I> for CountingIterator<I> {
impl<I: Iterator> From<I> for CharCountingIterator<I> {
fn from(iter: I) -> Self {
Self { iter, count: 0 }
}
}
impl<I: Iterator<Item = char>> Iterator for CountingIterator<I> {
impl<I: Iterator<Item = char>> Iterator for CharCountingIterator<I> {
type Item = I::Item;
fn next(&mut self) -> Option<Self::Item> {
@ -319,13 +327,13 @@ impl<I: Iterator<Item = char>> Iterator for CountingIterator<I> {
}
}
impl<I: Iterator> CountingIterator<I> {
impl<I: Iterator> CharCountingIterator<I> {
pub(crate) fn offset(&self) -> usize {
self.count
}
}
impl<I: Iterator> core::ops::Deref for CountingIterator<I> {
impl<I: Iterator> core::ops::Deref for CharCountingIterator<I> {
type Target = I;
fn deref(&self) -> &Self::Target {
@ -333,13 +341,13 @@ impl<I: Iterator> core::ops::Deref for CountingIterator<I> {
}
}
impl<I: Iterator> core::ops::DerefMut for CountingIterator<I> {
impl<I: Iterator> core::ops::DerefMut for CharCountingIterator<I> {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.iter
}
}
type Source<'a> = CountingIterator<core::iter::Peekable<core::str::Chars<'a>>>;
type Source<'a> = CharCountingIterator<core::iter::Peekable<core::str::Chars<'a>>>;
pub struct TokenIterator<'a> {
trie: Tree<char, Token>,
@ -363,11 +371,11 @@ impl<'a> TokenIterator<'a> {
}
fn peekable_source(&self) -> Source<'a> {
CountingIterator::from(self.source[self.offset..].chars().peekable())
CharCountingIterator::from(self.source[self.offset..].chars().peekable())
}
fn parse(&mut self) -> Option<Token> {
let mut iter = CountingIterator::from(self.source[self.offset..].chars());
let mut iter = CharCountingIterator::from(self.source[self.offset..].chars());
match self.trie.get_closest(&mut iter) {
Some(token) => {
@ -407,12 +415,8 @@ impl<'a> TokenIterator<'a> {
}
count
}
}
impl<'a> Iterator for TokenIterator<'a> {
type Item = (Token, &'a str);
fn next(&mut self) -> Option<Self::Item> {
fn next_token(&mut self) -> Option<(Token, Range<usize>)> {
// skip whitespace
self.skip_whitespaces();
@ -425,19 +429,19 @@ impl<'a> Iterator for TokenIterator<'a> {
let token = complex_tokens::parse_constant(&mut source).ok()?;
self.offset += source.offset();
Some((token, &self.source[start..self.offset]))
Some(token)
}
Some('.') if cursor.next().map_or(false, is_things::is_digit) => {
let token = complex_tokens::parse_constant(&mut source).ok()?;
self.offset += source.offset();
Some((token, &self.source[start..self.offset]))
Some(token)
}
Some('\'' | '"') => {
let token = complex_tokens::parse_string_or_char_constant(&mut source).ok()?;
self.offset += source.offset();
Some((token, &self.source[start..self.offset]))
Some(token)
}
Some('`') => {
// raw identifier
@ -445,10 +449,10 @@ impl<'a> Iterator for TokenIterator<'a> {
self.skip_while(|c| is_things::is_id_continue(c));
if self.peekable_source().next() == Some('`') {
self.skip(1);
Some((Token::Ident, &self.source[start..self.offset]))
Some(Token::Ident)
} else {
// unterminated raw identifier
Some((Token::ParseError, &self.source[start..self.offset]))
Some(Token::ParseError)
}
}
// `//`-style comments or doc-comments
@ -465,9 +469,9 @@ impl<'a> Iterator for TokenIterator<'a> {
}) {
Some(tok) => {
if tok.maybe_ident() && self.skip_while(|c| is_things::is_id_continue(c)) > 0 {
Some((Token::Ident, &self.source[start..self.offset]))
Some(Token::Ident)
} else {
Some((tok, &self.source[start..self.offset]))
Some(tok)
}
}
None => {
@ -478,15 +482,49 @@ impl<'a> Iterator for TokenIterator<'a> {
{
self.skip(1);
self.skip_while(|c| is_things::is_id_continue(c));
Some((Token::Ident, &self.source[start..self.offset]))
Some(Token::Ident)
} else {
None
}
}
},
};
}?;
token
Some((token, start..self.offset))
}
fn next_token_item(&mut self) -> Option<TokenItem<'a>> {
let (token, range) = self.next_token()?;
let lexeme = &self.source[range.clone()];
Some(TokenItem {
token,
lexeme,
offset: range.start as u32,
})
}
pub fn into_token_items(self) -> TokenItemIterator<'a> {
TokenItemIterator { inner: self }
}
}
impl<'a> Iterator for TokenIterator<'a> {
type Item = Token;
fn next(&mut self) -> Option<Self::Item> {
self.next_token().map(|(token, _)| token)
}
}
pub struct TokenItemIterator<'a> {
inner: TokenIterator<'a>,
}
impl<'a> Iterator for TokenItemIterator<'a> {
type Item = TokenItem<'a>;
fn next(&mut self) -> Option<Self::Item> {
self.inner.next_token_item()
}
}
@ -500,27 +538,27 @@ mod tests {
fn test_iterator() {
let tokens = "fn let void+(+bool)";
let mut lexer = TokenIterator::new(&tokens);
assert_eq!(lexer.next(), Some((Token::Fn, "fn")));
assert_eq!(lexer.next(), Some((Token::Let, "let")));
assert_eq!(lexer.next(), Some((Token::Void, "void")));
assert_eq!(lexer.next(), Some((Token::Plus, "+")));
assert_eq!(lexer.next(), Some((Token::OpenParens, "(")));
assert_eq!(lexer.next(), Some((Token::Plus, "+")));
assert_eq!(lexer.next(), Some((Token::Bool, "bool")));
assert_eq!(lexer.next(), Some((Token::CloseParens, ")")));
assert_eq!(lexer.next(), Some(Token::Fn));
assert_eq!(lexer.next(), Some(Token::Let));
assert_eq!(lexer.next(), Some(Token::Void));
assert_eq!(lexer.next(), Some(Token::Plus));
assert_eq!(lexer.next(), Some(Token::OpenParens));
assert_eq!(lexer.next(), Some(Token::Plus));
assert_eq!(lexer.next(), Some(Token::Bool));
assert_eq!(lexer.next(), Some(Token::CloseParens));
assert_eq!(lexer.next(), None);
}
#[test]
fn idents() {
let lexer = TokenIterator::new("a a1 a_ a-b _a _1 _- -a -1 -_ `123");
assert!(lexer.map(|(tok, _)| tok).all(|tok| tok == Token::Ident));
let mut lexer = TokenIterator::new("a a1 a_ a-b _a _1 _- -a -1 -_ `123");
assert!(lexer.all(|tok| tok == Token::Ident));
}
#[test]
fn ident_minus_ambiguity() {
let lexer = TokenIterator::new("a-a a- - a -a --a");
let tokens = lexer.map(|(tok, _)| tok).collect::<Vec<_>>();
let tokens = lexer.collect::<Vec<_>>();
assert_eq!(
tokens,
vec![
@ -537,27 +575,30 @@ mod tests {
#[test]
fn complex_iterator() {
let tokens = "fn my-function(x: i32, y: f32) -> f32 { return x + y; }";
let mut lexer = TokenIterator::new(&tokens);
assert_eq!(lexer.next(), Some((Token::Fn, "fn")));
assert_eq!(lexer.next(), Some((Token::Ident, "my-function")));
assert_eq!(lexer.next(), Some((Token::OpenParens, "(")));
assert_eq!(lexer.next(), Some((Token::Ident, "x")));
assert_eq!(lexer.next(), Some((Token::Colon, ":")));
assert_eq!(lexer.next(), Some((Token::I32, "i32")));
assert_eq!(lexer.next(), Some((Token::Comma, ",")));
assert_eq!(lexer.next(), Some((Token::Ident, "y")));
assert_eq!(lexer.next(), Some((Token::Colon, ":")));
assert_eq!(lexer.next(), Some((Token::F32, "f32")));
assert_eq!(lexer.next(), Some((Token::CloseParens, ")")));
assert_eq!(lexer.next(), Some((Token::MinusGreater, "->")));
assert_eq!(lexer.next(), Some((Token::F32, "f32")));
assert_eq!(lexer.next(), Some((Token::OpenBrace, "{")));
assert_eq!(lexer.next(), Some((Token::Return, "return")));
assert_eq!(lexer.next(), Some((Token::Ident, "x")));
assert_eq!(lexer.next(), Some((Token::Plus, "+")));
assert_eq!(lexer.next(), Some((Token::Ident, "y")));
assert_eq!(lexer.next(), Some((Token::Semi, ";")));
assert_eq!(lexer.next(), Some((Token::CloseBrace, "}")));
assert_eq!(lexer.next(), None);
let lexer = TokenIterator::new(&tokens);
let mut items = lexer
.into_token_items()
.map(|item| (item.token, item.lexeme));
assert_eq!(items.next(), Some((Token::Fn, "fn")));
assert_eq!(items.next(), Some((Token::Ident, "my-function")));
assert_eq!(items.next(), Some((Token::OpenParens, "(")));
assert_eq!(items.next(), Some((Token::Ident, "x")));
assert_eq!(items.next(), Some((Token::Colon, ":")));
assert_eq!(items.next(), Some((Token::I32, "i32")));
assert_eq!(items.next(), Some((Token::Comma, ",")));
assert_eq!(items.next(), Some((Token::Ident, "y")));
assert_eq!(items.next(), Some((Token::Colon, ":")));
assert_eq!(items.next(), Some((Token::F32, "f32")));
assert_eq!(items.next(), Some((Token::CloseParens, ")")));
assert_eq!(items.next(), Some((Token::MinusGreater, "->")));
assert_eq!(items.next(), Some((Token::F32, "f32")));
assert_eq!(items.next(), Some((Token::OpenBrace, "{")));
assert_eq!(items.next(), Some((Token::Return, "return")));
assert_eq!(items.next(), Some((Token::Ident, "x")));
assert_eq!(items.next(), Some((Token::Plus, "+")));
assert_eq!(items.next(), Some((Token::Ident, "y")));
assert_eq!(items.next(), Some((Token::Semi, ";")));
assert_eq!(items.next(), Some((Token::CloseBrace, "}")));
assert_eq!(items.next(), None);
}
}