more refactoring
This commit is contained in:
parent
6e0fed0962
commit
2790bc561f
|
@ -297,21 +297,29 @@ impl Token {
|
|||
}
|
||||
}
|
||||
|
||||
use std::ops::Range;
|
||||
|
||||
use trie::Tree;
|
||||
|
||||
pub struct TokenItem<'a> {
|
||||
pub token: Token,
|
||||
pub lexeme: &'a str,
|
||||
pub offset: u32,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
struct CountingIterator<I: Iterator> {
|
||||
struct CharCountingIterator<I: Iterator> {
|
||||
iter: I,
|
||||
count: usize,
|
||||
}
|
||||
|
||||
impl<I: Iterator> From<I> for CountingIterator<I> {
|
||||
impl<I: Iterator> From<I> for CharCountingIterator<I> {
|
||||
fn from(iter: I) -> Self {
|
||||
Self { iter, count: 0 }
|
||||
}
|
||||
}
|
||||
|
||||
impl<I: Iterator<Item = char>> Iterator for CountingIterator<I> {
|
||||
impl<I: Iterator<Item = char>> Iterator for CharCountingIterator<I> {
|
||||
type Item = I::Item;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
|
@ -319,13 +327,13 @@ impl<I: Iterator<Item = char>> Iterator for CountingIterator<I> {
|
|||
}
|
||||
}
|
||||
|
||||
impl<I: Iterator> CountingIterator<I> {
|
||||
impl<I: Iterator> CharCountingIterator<I> {
|
||||
pub(crate) fn offset(&self) -> usize {
|
||||
self.count
|
||||
}
|
||||
}
|
||||
|
||||
impl<I: Iterator> core::ops::Deref for CountingIterator<I> {
|
||||
impl<I: Iterator> core::ops::Deref for CharCountingIterator<I> {
|
||||
type Target = I;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
|
@ -333,13 +341,13 @@ impl<I: Iterator> core::ops::Deref for CountingIterator<I> {
|
|||
}
|
||||
}
|
||||
|
||||
impl<I: Iterator> core::ops::DerefMut for CountingIterator<I> {
|
||||
impl<I: Iterator> core::ops::DerefMut for CharCountingIterator<I> {
|
||||
fn deref_mut(&mut self) -> &mut Self::Target {
|
||||
&mut self.iter
|
||||
}
|
||||
}
|
||||
|
||||
type Source<'a> = CountingIterator<core::iter::Peekable<core::str::Chars<'a>>>;
|
||||
type Source<'a> = CharCountingIterator<core::iter::Peekable<core::str::Chars<'a>>>;
|
||||
|
||||
pub struct TokenIterator<'a> {
|
||||
trie: Tree<char, Token>,
|
||||
|
@ -363,11 +371,11 @@ impl<'a> TokenIterator<'a> {
|
|||
}
|
||||
|
||||
fn peekable_source(&self) -> Source<'a> {
|
||||
CountingIterator::from(self.source[self.offset..].chars().peekable())
|
||||
CharCountingIterator::from(self.source[self.offset..].chars().peekable())
|
||||
}
|
||||
|
||||
fn parse(&mut self) -> Option<Token> {
|
||||
let mut iter = CountingIterator::from(self.source[self.offset..].chars());
|
||||
let mut iter = CharCountingIterator::from(self.source[self.offset..].chars());
|
||||
|
||||
match self.trie.get_closest(&mut iter) {
|
||||
Some(token) => {
|
||||
|
@ -407,12 +415,8 @@ impl<'a> TokenIterator<'a> {
|
|||
}
|
||||
count
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Iterator for TokenIterator<'a> {
|
||||
type Item = (Token, &'a str);
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
fn next_token(&mut self) -> Option<(Token, Range<usize>)> {
|
||||
// skip whitespace
|
||||
self.skip_whitespaces();
|
||||
|
||||
|
@ -425,19 +429,19 @@ impl<'a> Iterator for TokenIterator<'a> {
|
|||
let token = complex_tokens::parse_constant(&mut source).ok()?;
|
||||
self.offset += source.offset();
|
||||
|
||||
Some((token, &self.source[start..self.offset]))
|
||||
Some(token)
|
||||
}
|
||||
Some('.') if cursor.next().map_or(false, is_things::is_digit) => {
|
||||
let token = complex_tokens::parse_constant(&mut source).ok()?;
|
||||
self.offset += source.offset();
|
||||
|
||||
Some((token, &self.source[start..self.offset]))
|
||||
Some(token)
|
||||
}
|
||||
Some('\'' | '"') => {
|
||||
let token = complex_tokens::parse_string_or_char_constant(&mut source).ok()?;
|
||||
self.offset += source.offset();
|
||||
|
||||
Some((token, &self.source[start..self.offset]))
|
||||
Some(token)
|
||||
}
|
||||
Some('`') => {
|
||||
// raw identifier
|
||||
|
@ -445,10 +449,10 @@ impl<'a> Iterator for TokenIterator<'a> {
|
|||
self.skip_while(|c| is_things::is_id_continue(c));
|
||||
if self.peekable_source().next() == Some('`') {
|
||||
self.skip(1);
|
||||
Some((Token::Ident, &self.source[start..self.offset]))
|
||||
Some(Token::Ident)
|
||||
} else {
|
||||
// unterminated raw identifier
|
||||
Some((Token::ParseError, &self.source[start..self.offset]))
|
||||
Some(Token::ParseError)
|
||||
}
|
||||
}
|
||||
// `//`-style comments or doc-comments
|
||||
|
@ -465,9 +469,9 @@ impl<'a> Iterator for TokenIterator<'a> {
|
|||
}) {
|
||||
Some(tok) => {
|
||||
if tok.maybe_ident() && self.skip_while(|c| is_things::is_id_continue(c)) > 0 {
|
||||
Some((Token::Ident, &self.source[start..self.offset]))
|
||||
Some(Token::Ident)
|
||||
} else {
|
||||
Some((tok, &self.source[start..self.offset]))
|
||||
Some(tok)
|
||||
}
|
||||
}
|
||||
None => {
|
||||
|
@ -478,15 +482,49 @@ impl<'a> Iterator for TokenIterator<'a> {
|
|||
{
|
||||
self.skip(1);
|
||||
self.skip_while(|c| is_things::is_id_continue(c));
|
||||
Some((Token::Ident, &self.source[start..self.offset]))
|
||||
Some(Token::Ident)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
},
|
||||
};
|
||||
}?;
|
||||
|
||||
token
|
||||
Some((token, start..self.offset))
|
||||
}
|
||||
|
||||
fn next_token_item(&mut self) -> Option<TokenItem<'a>> {
|
||||
let (token, range) = self.next_token()?;
|
||||
let lexeme = &self.source[range.clone()];
|
||||
Some(TokenItem {
|
||||
token,
|
||||
lexeme,
|
||||
offset: range.start as u32,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn into_token_items(self) -> TokenItemIterator<'a> {
|
||||
TokenItemIterator { inner: self }
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Iterator for TokenIterator<'a> {
|
||||
type Item = Token;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
self.next_token().map(|(token, _)| token)
|
||||
}
|
||||
}
|
||||
|
||||
pub struct TokenItemIterator<'a> {
|
||||
inner: TokenIterator<'a>,
|
||||
}
|
||||
|
||||
impl<'a> Iterator for TokenItemIterator<'a> {
|
||||
type Item = TokenItem<'a>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
self.inner.next_token_item()
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -500,27 +538,27 @@ mod tests {
|
|||
fn test_iterator() {
|
||||
let tokens = "fn let void+(+bool)";
|
||||
let mut lexer = TokenIterator::new(&tokens);
|
||||
assert_eq!(lexer.next(), Some((Token::Fn, "fn")));
|
||||
assert_eq!(lexer.next(), Some((Token::Let, "let")));
|
||||
assert_eq!(lexer.next(), Some((Token::Void, "void")));
|
||||
assert_eq!(lexer.next(), Some((Token::Plus, "+")));
|
||||
assert_eq!(lexer.next(), Some((Token::OpenParens, "(")));
|
||||
assert_eq!(lexer.next(), Some((Token::Plus, "+")));
|
||||
assert_eq!(lexer.next(), Some((Token::Bool, "bool")));
|
||||
assert_eq!(lexer.next(), Some((Token::CloseParens, ")")));
|
||||
assert_eq!(lexer.next(), Some(Token::Fn));
|
||||
assert_eq!(lexer.next(), Some(Token::Let));
|
||||
assert_eq!(lexer.next(), Some(Token::Void));
|
||||
assert_eq!(lexer.next(), Some(Token::Plus));
|
||||
assert_eq!(lexer.next(), Some(Token::OpenParens));
|
||||
assert_eq!(lexer.next(), Some(Token::Plus));
|
||||
assert_eq!(lexer.next(), Some(Token::Bool));
|
||||
assert_eq!(lexer.next(), Some(Token::CloseParens));
|
||||
assert_eq!(lexer.next(), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn idents() {
|
||||
let lexer = TokenIterator::new("a a1 a_ a-b _a _1 _- -a -1 -_ `123");
|
||||
assert!(lexer.map(|(tok, _)| tok).all(|tok| tok == Token::Ident));
|
||||
let mut lexer = TokenIterator::new("a a1 a_ a-b _a _1 _- -a -1 -_ `123");
|
||||
assert!(lexer.all(|tok| tok == Token::Ident));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ident_minus_ambiguity() {
|
||||
let lexer = TokenIterator::new("a-a a- - a -a --a");
|
||||
let tokens = lexer.map(|(tok, _)| tok).collect::<Vec<_>>();
|
||||
let tokens = lexer.collect::<Vec<_>>();
|
||||
assert_eq!(
|
||||
tokens,
|
||||
vec![
|
||||
|
@ -537,27 +575,30 @@ mod tests {
|
|||
#[test]
|
||||
fn complex_iterator() {
|
||||
let tokens = "fn my-function(x: i32, y: f32) -> f32 { return x + y; }";
|
||||
let mut lexer = TokenIterator::new(&tokens);
|
||||
assert_eq!(lexer.next(), Some((Token::Fn, "fn")));
|
||||
assert_eq!(lexer.next(), Some((Token::Ident, "my-function")));
|
||||
assert_eq!(lexer.next(), Some((Token::OpenParens, "(")));
|
||||
assert_eq!(lexer.next(), Some((Token::Ident, "x")));
|
||||
assert_eq!(lexer.next(), Some((Token::Colon, ":")));
|
||||
assert_eq!(lexer.next(), Some((Token::I32, "i32")));
|
||||
assert_eq!(lexer.next(), Some((Token::Comma, ",")));
|
||||
assert_eq!(lexer.next(), Some((Token::Ident, "y")));
|
||||
assert_eq!(lexer.next(), Some((Token::Colon, ":")));
|
||||
assert_eq!(lexer.next(), Some((Token::F32, "f32")));
|
||||
assert_eq!(lexer.next(), Some((Token::CloseParens, ")")));
|
||||
assert_eq!(lexer.next(), Some((Token::MinusGreater, "->")));
|
||||
assert_eq!(lexer.next(), Some((Token::F32, "f32")));
|
||||
assert_eq!(lexer.next(), Some((Token::OpenBrace, "{")));
|
||||
assert_eq!(lexer.next(), Some((Token::Return, "return")));
|
||||
assert_eq!(lexer.next(), Some((Token::Ident, "x")));
|
||||
assert_eq!(lexer.next(), Some((Token::Plus, "+")));
|
||||
assert_eq!(lexer.next(), Some((Token::Ident, "y")));
|
||||
assert_eq!(lexer.next(), Some((Token::Semi, ";")));
|
||||
assert_eq!(lexer.next(), Some((Token::CloseBrace, "}")));
|
||||
assert_eq!(lexer.next(), None);
|
||||
let lexer = TokenIterator::new(&tokens);
|
||||
let mut items = lexer
|
||||
.into_token_items()
|
||||
.map(|item| (item.token, item.lexeme));
|
||||
assert_eq!(items.next(), Some((Token::Fn, "fn")));
|
||||
assert_eq!(items.next(), Some((Token::Ident, "my-function")));
|
||||
assert_eq!(items.next(), Some((Token::OpenParens, "(")));
|
||||
assert_eq!(items.next(), Some((Token::Ident, "x")));
|
||||
assert_eq!(items.next(), Some((Token::Colon, ":")));
|
||||
assert_eq!(items.next(), Some((Token::I32, "i32")));
|
||||
assert_eq!(items.next(), Some((Token::Comma, ",")));
|
||||
assert_eq!(items.next(), Some((Token::Ident, "y")));
|
||||
assert_eq!(items.next(), Some((Token::Colon, ":")));
|
||||
assert_eq!(items.next(), Some((Token::F32, "f32")));
|
||||
assert_eq!(items.next(), Some((Token::CloseParens, ")")));
|
||||
assert_eq!(items.next(), Some((Token::MinusGreater, "->")));
|
||||
assert_eq!(items.next(), Some((Token::F32, "f32")));
|
||||
assert_eq!(items.next(), Some((Token::OpenBrace, "{")));
|
||||
assert_eq!(items.next(), Some((Token::Return, "return")));
|
||||
assert_eq!(items.next(), Some((Token::Ident, "x")));
|
||||
assert_eq!(items.next(), Some((Token::Plus, "+")));
|
||||
assert_eq!(items.next(), Some((Token::Ident, "y")));
|
||||
assert_eq!(items.next(), Some((Token::Semi, ";")));
|
||||
assert_eq!(items.next(), Some((Token::CloseBrace, "}")));
|
||||
assert_eq!(items.next(), None);
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue