more refactoring
This commit is contained in:
parent
6e0fed0962
commit
2790bc561f
|
@ -297,21 +297,29 @@ impl Token {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
use std::ops::Range;
|
||||||
|
|
||||||
use trie::Tree;
|
use trie::Tree;
|
||||||
|
|
||||||
|
pub struct TokenItem<'a> {
|
||||||
|
pub token: Token,
|
||||||
|
pub lexeme: &'a str,
|
||||||
|
pub offset: u32,
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, Copy)]
|
#[derive(Debug, Clone, Copy)]
|
||||||
struct CountingIterator<I: Iterator> {
|
struct CharCountingIterator<I: Iterator> {
|
||||||
iter: I,
|
iter: I,
|
||||||
count: usize,
|
count: usize,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<I: Iterator> From<I> for CountingIterator<I> {
|
impl<I: Iterator> From<I> for CharCountingIterator<I> {
|
||||||
fn from(iter: I) -> Self {
|
fn from(iter: I) -> Self {
|
||||||
Self { iter, count: 0 }
|
Self { iter, count: 0 }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<I: Iterator<Item = char>> Iterator for CountingIterator<I> {
|
impl<I: Iterator<Item = char>> Iterator for CharCountingIterator<I> {
|
||||||
type Item = I::Item;
|
type Item = I::Item;
|
||||||
|
|
||||||
fn next(&mut self) -> Option<Self::Item> {
|
fn next(&mut self) -> Option<Self::Item> {
|
||||||
|
@ -319,13 +327,13 @@ impl<I: Iterator<Item = char>> Iterator for CountingIterator<I> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<I: Iterator> CountingIterator<I> {
|
impl<I: Iterator> CharCountingIterator<I> {
|
||||||
pub(crate) fn offset(&self) -> usize {
|
pub(crate) fn offset(&self) -> usize {
|
||||||
self.count
|
self.count
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<I: Iterator> core::ops::Deref for CountingIterator<I> {
|
impl<I: Iterator> core::ops::Deref for CharCountingIterator<I> {
|
||||||
type Target = I;
|
type Target = I;
|
||||||
|
|
||||||
fn deref(&self) -> &Self::Target {
|
fn deref(&self) -> &Self::Target {
|
||||||
|
@ -333,13 +341,13 @@ impl<I: Iterator> core::ops::Deref for CountingIterator<I> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<I: Iterator> core::ops::DerefMut for CountingIterator<I> {
|
impl<I: Iterator> core::ops::DerefMut for CharCountingIterator<I> {
|
||||||
fn deref_mut(&mut self) -> &mut Self::Target {
|
fn deref_mut(&mut self) -> &mut Self::Target {
|
||||||
&mut self.iter
|
&mut self.iter
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
type Source<'a> = CountingIterator<core::iter::Peekable<core::str::Chars<'a>>>;
|
type Source<'a> = CharCountingIterator<core::iter::Peekable<core::str::Chars<'a>>>;
|
||||||
|
|
||||||
pub struct TokenIterator<'a> {
|
pub struct TokenIterator<'a> {
|
||||||
trie: Tree<char, Token>,
|
trie: Tree<char, Token>,
|
||||||
|
@ -363,11 +371,11 @@ impl<'a> TokenIterator<'a> {
|
||||||
}
|
}
|
||||||
|
|
||||||
fn peekable_source(&self) -> Source<'a> {
|
fn peekable_source(&self) -> Source<'a> {
|
||||||
CountingIterator::from(self.source[self.offset..].chars().peekable())
|
CharCountingIterator::from(self.source[self.offset..].chars().peekable())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse(&mut self) -> Option<Token> {
|
fn parse(&mut self) -> Option<Token> {
|
||||||
let mut iter = CountingIterator::from(self.source[self.offset..].chars());
|
let mut iter = CharCountingIterator::from(self.source[self.offset..].chars());
|
||||||
|
|
||||||
match self.trie.get_closest(&mut iter) {
|
match self.trie.get_closest(&mut iter) {
|
||||||
Some(token) => {
|
Some(token) => {
|
||||||
|
@ -407,12 +415,8 @@ impl<'a> TokenIterator<'a> {
|
||||||
}
|
}
|
||||||
count
|
count
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
impl<'a> Iterator for TokenIterator<'a> {
|
fn next_token(&mut self) -> Option<(Token, Range<usize>)> {
|
||||||
type Item = (Token, &'a str);
|
|
||||||
|
|
||||||
fn next(&mut self) -> Option<Self::Item> {
|
|
||||||
// skip whitespace
|
// skip whitespace
|
||||||
self.skip_whitespaces();
|
self.skip_whitespaces();
|
||||||
|
|
||||||
|
@ -425,19 +429,19 @@ impl<'a> Iterator for TokenIterator<'a> {
|
||||||
let token = complex_tokens::parse_constant(&mut source).ok()?;
|
let token = complex_tokens::parse_constant(&mut source).ok()?;
|
||||||
self.offset += source.offset();
|
self.offset += source.offset();
|
||||||
|
|
||||||
Some((token, &self.source[start..self.offset]))
|
Some(token)
|
||||||
}
|
}
|
||||||
Some('.') if cursor.next().map_or(false, is_things::is_digit) => {
|
Some('.') if cursor.next().map_or(false, is_things::is_digit) => {
|
||||||
let token = complex_tokens::parse_constant(&mut source).ok()?;
|
let token = complex_tokens::parse_constant(&mut source).ok()?;
|
||||||
self.offset += source.offset();
|
self.offset += source.offset();
|
||||||
|
|
||||||
Some((token, &self.source[start..self.offset]))
|
Some(token)
|
||||||
}
|
}
|
||||||
Some('\'' | '"') => {
|
Some('\'' | '"') => {
|
||||||
let token = complex_tokens::parse_string_or_char_constant(&mut source).ok()?;
|
let token = complex_tokens::parse_string_or_char_constant(&mut source).ok()?;
|
||||||
self.offset += source.offset();
|
self.offset += source.offset();
|
||||||
|
|
||||||
Some((token, &self.source[start..self.offset]))
|
Some(token)
|
||||||
}
|
}
|
||||||
Some('`') => {
|
Some('`') => {
|
||||||
// raw identifier
|
// raw identifier
|
||||||
|
@ -445,10 +449,10 @@ impl<'a> Iterator for TokenIterator<'a> {
|
||||||
self.skip_while(|c| is_things::is_id_continue(c));
|
self.skip_while(|c| is_things::is_id_continue(c));
|
||||||
if self.peekable_source().next() == Some('`') {
|
if self.peekable_source().next() == Some('`') {
|
||||||
self.skip(1);
|
self.skip(1);
|
||||||
Some((Token::Ident, &self.source[start..self.offset]))
|
Some(Token::Ident)
|
||||||
} else {
|
} else {
|
||||||
// unterminated raw identifier
|
// unterminated raw identifier
|
||||||
Some((Token::ParseError, &self.source[start..self.offset]))
|
Some(Token::ParseError)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// `//`-style comments or doc-comments
|
// `//`-style comments or doc-comments
|
||||||
|
@ -465,9 +469,9 @@ impl<'a> Iterator for TokenIterator<'a> {
|
||||||
}) {
|
}) {
|
||||||
Some(tok) => {
|
Some(tok) => {
|
||||||
if tok.maybe_ident() && self.skip_while(|c| is_things::is_id_continue(c)) > 0 {
|
if tok.maybe_ident() && self.skip_while(|c| is_things::is_id_continue(c)) > 0 {
|
||||||
Some((Token::Ident, &self.source[start..self.offset]))
|
Some(Token::Ident)
|
||||||
} else {
|
} else {
|
||||||
Some((tok, &self.source[start..self.offset]))
|
Some(tok)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
None => {
|
None => {
|
||||||
|
@ -478,15 +482,49 @@ impl<'a> Iterator for TokenIterator<'a> {
|
||||||
{
|
{
|
||||||
self.skip(1);
|
self.skip(1);
|
||||||
self.skip_while(|c| is_things::is_id_continue(c));
|
self.skip_while(|c| is_things::is_id_continue(c));
|
||||||
Some((Token::Ident, &self.source[start..self.offset]))
|
Some(Token::Ident)
|
||||||
} else {
|
} else {
|
||||||
None
|
None
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
};
|
}?;
|
||||||
|
|
||||||
token
|
Some((token, start..self.offset))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn next_token_item(&mut self) -> Option<TokenItem<'a>> {
|
||||||
|
let (token, range) = self.next_token()?;
|
||||||
|
let lexeme = &self.source[range.clone()];
|
||||||
|
Some(TokenItem {
|
||||||
|
token,
|
||||||
|
lexeme,
|
||||||
|
offset: range.start as u32,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn into_token_items(self) -> TokenItemIterator<'a> {
|
||||||
|
TokenItemIterator { inner: self }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> Iterator for TokenIterator<'a> {
|
||||||
|
type Item = Token;
|
||||||
|
|
||||||
|
fn next(&mut self) -> Option<Self::Item> {
|
||||||
|
self.next_token().map(|(token, _)| token)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct TokenItemIterator<'a> {
|
||||||
|
inner: TokenIterator<'a>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> Iterator for TokenItemIterator<'a> {
|
||||||
|
type Item = TokenItem<'a>;
|
||||||
|
|
||||||
|
fn next(&mut self) -> Option<Self::Item> {
|
||||||
|
self.inner.next_token_item()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -500,27 +538,27 @@ mod tests {
|
||||||
fn test_iterator() {
|
fn test_iterator() {
|
||||||
let tokens = "fn let void+(+bool)";
|
let tokens = "fn let void+(+bool)";
|
||||||
let mut lexer = TokenIterator::new(&tokens);
|
let mut lexer = TokenIterator::new(&tokens);
|
||||||
assert_eq!(lexer.next(), Some((Token::Fn, "fn")));
|
assert_eq!(lexer.next(), Some(Token::Fn));
|
||||||
assert_eq!(lexer.next(), Some((Token::Let, "let")));
|
assert_eq!(lexer.next(), Some(Token::Let));
|
||||||
assert_eq!(lexer.next(), Some((Token::Void, "void")));
|
assert_eq!(lexer.next(), Some(Token::Void));
|
||||||
assert_eq!(lexer.next(), Some((Token::Plus, "+")));
|
assert_eq!(lexer.next(), Some(Token::Plus));
|
||||||
assert_eq!(lexer.next(), Some((Token::OpenParens, "(")));
|
assert_eq!(lexer.next(), Some(Token::OpenParens));
|
||||||
assert_eq!(lexer.next(), Some((Token::Plus, "+")));
|
assert_eq!(lexer.next(), Some(Token::Plus));
|
||||||
assert_eq!(lexer.next(), Some((Token::Bool, "bool")));
|
assert_eq!(lexer.next(), Some(Token::Bool));
|
||||||
assert_eq!(lexer.next(), Some((Token::CloseParens, ")")));
|
assert_eq!(lexer.next(), Some(Token::CloseParens));
|
||||||
assert_eq!(lexer.next(), None);
|
assert_eq!(lexer.next(), None);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn idents() {
|
fn idents() {
|
||||||
let lexer = TokenIterator::new("a a1 a_ a-b _a _1 _- -a -1 -_ `123");
|
let mut lexer = TokenIterator::new("a a1 a_ a-b _a _1 _- -a -1 -_ `123");
|
||||||
assert!(lexer.map(|(tok, _)| tok).all(|tok| tok == Token::Ident));
|
assert!(lexer.all(|tok| tok == Token::Ident));
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn ident_minus_ambiguity() {
|
fn ident_minus_ambiguity() {
|
||||||
let lexer = TokenIterator::new("a-a a- - a -a --a");
|
let lexer = TokenIterator::new("a-a a- - a -a --a");
|
||||||
let tokens = lexer.map(|(tok, _)| tok).collect::<Vec<_>>();
|
let tokens = lexer.collect::<Vec<_>>();
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
tokens,
|
tokens,
|
||||||
vec![
|
vec![
|
||||||
|
@ -537,27 +575,30 @@ mod tests {
|
||||||
#[test]
|
#[test]
|
||||||
fn complex_iterator() {
|
fn complex_iterator() {
|
||||||
let tokens = "fn my-function(x: i32, y: f32) -> f32 { return x + y; }";
|
let tokens = "fn my-function(x: i32, y: f32) -> f32 { return x + y; }";
|
||||||
let mut lexer = TokenIterator::new(&tokens);
|
let lexer = TokenIterator::new(&tokens);
|
||||||
assert_eq!(lexer.next(), Some((Token::Fn, "fn")));
|
let mut items = lexer
|
||||||
assert_eq!(lexer.next(), Some((Token::Ident, "my-function")));
|
.into_token_items()
|
||||||
assert_eq!(lexer.next(), Some((Token::OpenParens, "(")));
|
.map(|item| (item.token, item.lexeme));
|
||||||
assert_eq!(lexer.next(), Some((Token::Ident, "x")));
|
assert_eq!(items.next(), Some((Token::Fn, "fn")));
|
||||||
assert_eq!(lexer.next(), Some((Token::Colon, ":")));
|
assert_eq!(items.next(), Some((Token::Ident, "my-function")));
|
||||||
assert_eq!(lexer.next(), Some((Token::I32, "i32")));
|
assert_eq!(items.next(), Some((Token::OpenParens, "(")));
|
||||||
assert_eq!(lexer.next(), Some((Token::Comma, ",")));
|
assert_eq!(items.next(), Some((Token::Ident, "x")));
|
||||||
assert_eq!(lexer.next(), Some((Token::Ident, "y")));
|
assert_eq!(items.next(), Some((Token::Colon, ":")));
|
||||||
assert_eq!(lexer.next(), Some((Token::Colon, ":")));
|
assert_eq!(items.next(), Some((Token::I32, "i32")));
|
||||||
assert_eq!(lexer.next(), Some((Token::F32, "f32")));
|
assert_eq!(items.next(), Some((Token::Comma, ",")));
|
||||||
assert_eq!(lexer.next(), Some((Token::CloseParens, ")")));
|
assert_eq!(items.next(), Some((Token::Ident, "y")));
|
||||||
assert_eq!(lexer.next(), Some((Token::MinusGreater, "->")));
|
assert_eq!(items.next(), Some((Token::Colon, ":")));
|
||||||
assert_eq!(lexer.next(), Some((Token::F32, "f32")));
|
assert_eq!(items.next(), Some((Token::F32, "f32")));
|
||||||
assert_eq!(lexer.next(), Some((Token::OpenBrace, "{")));
|
assert_eq!(items.next(), Some((Token::CloseParens, ")")));
|
||||||
assert_eq!(lexer.next(), Some((Token::Return, "return")));
|
assert_eq!(items.next(), Some((Token::MinusGreater, "->")));
|
||||||
assert_eq!(lexer.next(), Some((Token::Ident, "x")));
|
assert_eq!(items.next(), Some((Token::F32, "f32")));
|
||||||
assert_eq!(lexer.next(), Some((Token::Plus, "+")));
|
assert_eq!(items.next(), Some((Token::OpenBrace, "{")));
|
||||||
assert_eq!(lexer.next(), Some((Token::Ident, "y")));
|
assert_eq!(items.next(), Some((Token::Return, "return")));
|
||||||
assert_eq!(lexer.next(), Some((Token::Semi, ";")));
|
assert_eq!(items.next(), Some((Token::Ident, "x")));
|
||||||
assert_eq!(lexer.next(), Some((Token::CloseBrace, "}")));
|
assert_eq!(items.next(), Some((Token::Plus, "+")));
|
||||||
assert_eq!(lexer.next(), None);
|
assert_eq!(items.next(), Some((Token::Ident, "y")));
|
||||||
|
assert_eq!(items.next(), Some((Token::Semi, ";")));
|
||||||
|
assert_eq!(items.next(), Some((Token::CloseBrace, "}")));
|
||||||
|
assert_eq!(items.next(), None);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue