|
|
|
|
@ -58,12 +58,10 @@ mod is_things {
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[expect(dead_code)]
|
|
|
|
|
pub fn is_digit(ch: char) -> bool {
|
|
|
|
|
('0'..='9').contains(&ch)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[expect(dead_code)]
|
|
|
|
|
pub fn is_bin_digit(ch: char) -> bool {
|
|
|
|
|
ch == '0' || ch == '1'
|
|
|
|
|
}
|
|
|
|
|
@ -73,12 +71,10 @@ mod is_things {
|
|
|
|
|
('1'..='9').contains(&ch)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[expect(dead_code)]
|
|
|
|
|
pub fn is_oct_digit(ch: char) -> bool {
|
|
|
|
|
('0'..='7').contains(&ch)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[expect(dead_code)]
|
|
|
|
|
pub fn is_hex_digit(ch: char) -> bool {
|
|
|
|
|
('0'..='9').contains(&ch) || ('a'..='f').contains(&ch) || ('A'..='F').contains(&ch)
|
|
|
|
|
}
|
|
|
|
|
@ -188,6 +184,16 @@ tokens!(pub Token: {
|
|
|
|
|
F64 => "f64",
|
|
|
|
|
ISize => "isize",
|
|
|
|
|
USize => "usize",
|
|
|
|
|
U1 => "u1",
|
|
|
|
|
U8 => "u8",
|
|
|
|
|
U16 => "u16",
|
|
|
|
|
U32 => "u32",
|
|
|
|
|
U64 => "u64",
|
|
|
|
|
I1 => "i1",
|
|
|
|
|
I8 => "i8",
|
|
|
|
|
I16 => "i16",
|
|
|
|
|
I32 => "i32",
|
|
|
|
|
I64 => "i64",
|
|
|
|
|
Const => "const",
|
|
|
|
|
Volatile => "volatile",
|
|
|
|
|
Noalias => "noalias",
|
|
|
|
|
@ -211,9 +217,9 @@ tokens!(pub Token: {
|
|
|
|
|
Bang => "!",
|
|
|
|
|
Tilde => "~",
|
|
|
|
|
Plus => "+",
|
|
|
|
|
PlusPlus => "++",
|
|
|
|
|
// PlusPlus => "++",
|
|
|
|
|
Minus => "-",
|
|
|
|
|
MinusMinus => "--",
|
|
|
|
|
// MinusMinus => "--",
|
|
|
|
|
Star => "*",
|
|
|
|
|
Slash => "/",
|
|
|
|
|
Percent => "%",
|
|
|
|
|
@ -291,185 +297,29 @@ impl Token {
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// A list of lexemes used by the `LexemeParser`.
|
|
|
|
|
/// `lexemes` contains every token that has a defined lexeme, such as `fn`, `f32`, `const`, etc.
|
|
|
|
|
/// The `LexemeList` keeps track of two offsets into the `lexemes` array,
|
|
|
|
|
/// splitting it into three windows:
|
|
|
|
|
/// - [0, start_candidates) - tokens that are still being considered for parsing
|
|
|
|
|
/// - [start_candidates, end_candidates) - the tokens which this lexeme matches
|
|
|
|
|
/// - [end_candidates, len) - tokens that have been filtered out and are no longer considered
|
|
|
|
|
/// On each iteration of the parsing loop, the remaining tokens are matched
|
|
|
|
|
/// against the next character and, if they match completely, are swapped into
|
|
|
|
|
/// the candidates window, or swapped to the end if they don't.
|
|
|
|
|
struct LexemeList {
|
|
|
|
|
lexemes: Box<[Token]>,
|
|
|
|
|
start_candidates: usize,
|
|
|
|
|
end_candidates: usize,
|
|
|
|
|
filtered: Vec<(usize, FilterResult)>,
|
|
|
|
|
}
|
|
|
|
|
use std::ops::Range;
|
|
|
|
|
|
|
|
|
|
enum FilterResult {
|
|
|
|
|
Remove,
|
|
|
|
|
Candidate,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
impl LexemeList {
|
|
|
|
|
fn new() -> Self {
|
|
|
|
|
let lexemes = Token::lexemes()
|
|
|
|
|
.iter()
|
|
|
|
|
.map(|(tok, _)| tok.clone())
|
|
|
|
|
.collect::<Box<_>>();
|
|
|
|
|
|
|
|
|
|
Self {
|
|
|
|
|
start_candidates: lexemes.len(),
|
|
|
|
|
end_candidates: lexemes.len(),
|
|
|
|
|
lexemes,
|
|
|
|
|
filtered: Vec::new(),
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn clear(&mut self) {
|
|
|
|
|
self.start_candidates = self.lexemes.len();
|
|
|
|
|
self.end_candidates = self.lexemes.len();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn remaining(&self) -> &[Token] {
|
|
|
|
|
&self.lexemes[0..self.start_candidates]
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn candidates(&self) -> &[Token] {
|
|
|
|
|
&self.lexemes[self.start_candidates..self.end_candidates]
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn step(&mut self, ch: char, pos: usize) {
|
|
|
|
|
// smartly reuse allocation for `filtered`
|
|
|
|
|
// truly one of the premature optimizations.
|
|
|
|
|
// but it just feels good, innit?
|
|
|
|
|
let mut filtered = core::mem::take(&mut self.filtered);
|
|
|
|
|
|
|
|
|
|
self.remaining()
|
|
|
|
|
.iter()
|
|
|
|
|
.enumerate()
|
|
|
|
|
.filter_map(|(i, tok)| {
|
|
|
|
|
let bytes = tok.lexeme().unwrap().as_bytes();
|
|
|
|
|
// SAFETY: all tokens in `self.remaining()` are lexical tokens, and
|
|
|
|
|
// they are all valid ascii
|
|
|
|
|
let c = unsafe {
|
|
|
|
|
// TODO: maybe keep a list of `Char<'_>`s around in order to
|
|
|
|
|
// support fully utf8 tokens?
|
|
|
|
|
char::from_u32_unchecked(bytes[pos] as u32)
|
|
|
|
|
};
|
|
|
|
|
match c == ch {
|
|
|
|
|
false => Some((i, FilterResult::Remove)),
|
|
|
|
|
true if bytes.len() <= pos + 1 => Some((i, FilterResult::Candidate)),
|
|
|
|
|
true => None,
|
|
|
|
|
}
|
|
|
|
|
})
|
|
|
|
|
.collect_into(&mut filtered);
|
|
|
|
|
|
|
|
|
|
// iterate in reverse order so that we can safely swap elements
|
|
|
|
|
// drain here so that we can possibly reuse the `filtered` Vec allcoation
|
|
|
|
|
filtered.drain(..).rev().for_each(|(i, f)| {
|
|
|
|
|
match f {
|
|
|
|
|
// for candidates, swap the candidate with the last remaining
|
|
|
|
|
// token, then dec `start_candidates`
|
|
|
|
|
FilterResult::Candidate => {
|
|
|
|
|
// SAFETY: we know that `i` and `self.start_candidates - 1`
|
|
|
|
|
// are both valid indices: `self.start_candidates` starts at
|
|
|
|
|
// the end and each time it is decremented, one more element
|
|
|
|
|
// is removed from the front, so that as long as an element
|
|
|
|
|
// is remaining, `self.start_candidates` is always greater
|
|
|
|
|
// than 0.
|
|
|
|
|
// the order of the remaining elements is not meaningfully
|
|
|
|
|
// impacted because we only ever swap with elements after
|
|
|
|
|
// `i`, and `i` is the greatest index we will touch.
|
|
|
|
|
unsafe {
|
|
|
|
|
self.lexemes.swap_unchecked(i, self.start_candidates - 1);
|
|
|
|
|
self.start_candidates = self.start_candidates.saturating_sub(1);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
// for removes, swap the last candidate with the last remainign
|
|
|
|
|
// token, then swap the remove with the last candidate, then dec
|
|
|
|
|
// `end_candidates` and `start_candidates`
|
|
|
|
|
FilterResult::Remove => {
|
|
|
|
|
unsafe {
|
|
|
|
|
// in the case that `start_candidates` ==
|
|
|
|
|
// `end_candidates`, no swap happens and that's fine.
|
|
|
|
|
// remove this: v
|
|
|
|
|
// [a,b,c][d,e,f][g,h,i]
|
|
|
|
|
// swap these: ^ ^
|
|
|
|
|
// [a,b,f][d,e,c][g,h,i]
|
|
|
|
|
// swap these: ^ ^
|
|
|
|
|
// [a,c,f][d,e,b][g,h,i]
|
|
|
|
|
// decrement both counters:
|
|
|
|
|
// [a,c][f,d,e][b,g,h,i]
|
|
|
|
|
self.lexemes
|
|
|
|
|
.swap_unchecked(self.start_candidates - 1, self.end_candidates - 1);
|
|
|
|
|
self.lexemes.swap_unchecked(i, self.end_candidates - 1);
|
|
|
|
|
self.start_candidates = self.start_candidates.saturating_sub(1);
|
|
|
|
|
self.end_candidates = self.end_candidates.saturating_sub(1);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
// replace `filtered`
|
|
|
|
|
self.filtered = filtered;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Helper type for parsing tokens that have a defined lexeme, such as `fn`,
|
|
|
|
|
/// `f32`, `const`, etc. Tokens with variable lexemes, such as primitive
|
|
|
|
|
/// integral types, constants or identifiers are not parsed by this.
|
|
|
|
|
pub struct LexemeParser {
|
|
|
|
|
lexemes: LexemeList,
|
|
|
|
|
len: usize,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
impl LexemeParser {
|
|
|
|
|
pub fn new() -> Self {
|
|
|
|
|
Self {
|
|
|
|
|
lexemes: LexemeList::new(),
|
|
|
|
|
len: 0,
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
pub fn parse(&mut self, mut tokens: impl Iterator<Item = char>) -> Option<Token> {
|
|
|
|
|
self.lexemes.clear();
|
|
|
|
|
loop {
|
|
|
|
|
let Some(ch) = tokens.next() else {
|
|
|
|
|
break;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
if crate::is_things::is_whitespace(ch) {
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
self.lexemes.step(ch, self.len);
|
|
|
|
|
if self.lexemes.remaining().is_empty() {
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
self.lexemes.candidates().last().copied()
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
use itertools::Itertools;
|
|
|
|
|
use trie::Tree;
|
|
|
|
|
|
|
|
|
|
pub struct TokenItem<'a> {
|
|
|
|
|
pub token: Token,
|
|
|
|
|
pub lexeme: &'a str,
|
|
|
|
|
pub offset: u32,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[derive(Debug, Clone, Copy)]
|
|
|
|
|
struct CountingIterator<I: Iterator> {
|
|
|
|
|
struct CharCountingIterator<I: Iterator> {
|
|
|
|
|
iter: I,
|
|
|
|
|
count: usize,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
impl<I: Iterator> From<I> for CountingIterator<I> {
|
|
|
|
|
impl<I: Iterator> From<I> for CharCountingIterator<I> {
|
|
|
|
|
fn from(iter: I) -> Self {
|
|
|
|
|
Self { iter, count: 0 }
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
impl<I: Iterator<Item = char>> Iterator for CountingIterator<I> {
|
|
|
|
|
impl<I: Iterator<Item = char>> Iterator for CharCountingIterator<I> {
|
|
|
|
|
type Item = I::Item;
|
|
|
|
|
|
|
|
|
|
fn next(&mut self) -> Option<Self::Item> {
|
|
|
|
|
@ -477,13 +327,13 @@ impl<I: Iterator<Item = char>> Iterator for CountingIterator<I> {
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
impl<I: Iterator> CountingIterator<I> {
|
|
|
|
|
impl<I: Iterator> CharCountingIterator<I> {
|
|
|
|
|
pub(crate) fn offset(&self) -> usize {
|
|
|
|
|
self.count
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
impl<I: Iterator> core::ops::Deref for CountingIterator<I> {
|
|
|
|
|
impl<I: Iterator> core::ops::Deref for CharCountingIterator<I> {
|
|
|
|
|
type Target = I;
|
|
|
|
|
|
|
|
|
|
fn deref(&self) -> &Self::Target {
|
|
|
|
|
@ -491,13 +341,13 @@ impl<I: Iterator> core::ops::Deref for CountingIterator<I> {
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
impl<I: Iterator> core::ops::DerefMut for CountingIterator<I> {
|
|
|
|
|
impl<I: Iterator> core::ops::DerefMut for CharCountingIterator<I> {
|
|
|
|
|
fn deref_mut(&mut self) -> &mut Self::Target {
|
|
|
|
|
&mut self.iter
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
type Source<'a> = CountingIterator<core::iter::Peekable<core::str::Chars<'a>>>;
|
|
|
|
|
type Source<'a> = CharCountingIterator<core::iter::Peekable<core::str::Chars<'a>>>;
|
|
|
|
|
|
|
|
|
|
pub struct TokenIterator<'a> {
|
|
|
|
|
trie: Tree<char, Token>,
|
|
|
|
|
@ -521,11 +371,11 @@ impl<'a> TokenIterator<'a> {
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn peekable_source(&self) -> Source<'a> {
|
|
|
|
|
CountingIterator::from(self.source[self.offset..].chars().peekable())
|
|
|
|
|
CharCountingIterator::from(self.source[self.offset..].chars().peekable())
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn parse(&mut self) -> Option<Token> {
|
|
|
|
|
let mut iter = CountingIterator::from(self.source[self.offset..].chars());
|
|
|
|
|
let mut iter = CharCountingIterator::from(self.source[self.offset..].chars());
|
|
|
|
|
|
|
|
|
|
match self.trie.get_closest(&mut iter) {
|
|
|
|
|
Some(token) => {
|
|
|
|
|
@ -565,12 +415,8 @@ impl<'a> TokenIterator<'a> {
|
|
|
|
|
}
|
|
|
|
|
count
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
impl<'a> Iterator for TokenIterator<'a> {
|
|
|
|
|
type Item = (Token, &'a str);
|
|
|
|
|
|
|
|
|
|
fn next(&mut self) -> Option<Self::Item> {
|
|
|
|
|
fn next_token(&mut self) -> Option<(Token, Range<usize>)> {
|
|
|
|
|
// skip whitespace
|
|
|
|
|
self.skip_whitespaces();
|
|
|
|
|
|
|
|
|
|
@ -583,36 +429,49 @@ impl<'a> Iterator for TokenIterator<'a> {
|
|
|
|
|
let token = complex_tokens::parse_constant(&mut source).ok()?;
|
|
|
|
|
self.offset += source.offset();
|
|
|
|
|
|
|
|
|
|
Some((token, &self.source[start..self.offset]))
|
|
|
|
|
Some(token)
|
|
|
|
|
}
|
|
|
|
|
Some('.') if cursor.next().map_or(false, is_things::is_digit) => {
|
|
|
|
|
let token = complex_tokens::parse_constant(&mut source).ok()?;
|
|
|
|
|
self.offset += source.offset();
|
|
|
|
|
|
|
|
|
|
Some((token, &self.source[start..self.offset]))
|
|
|
|
|
Some(token)
|
|
|
|
|
}
|
|
|
|
|
Some('\'' | '"') => {
|
|
|
|
|
let token = complex_tokens::parse_string_or_char_constant(&mut source).ok()?;
|
|
|
|
|
self.offset += source.offset();
|
|
|
|
|
|
|
|
|
|
Some((token, &self.source[start..self.offset]))
|
|
|
|
|
Some(token)
|
|
|
|
|
}
|
|
|
|
|
Some('`') => {
|
|
|
|
|
// raw identifier
|
|
|
|
|
self.skip(1);
|
|
|
|
|
self.skip_while(|c| is_things::is_id_continue(c));
|
|
|
|
|
if self.peekable_source().next() == Some('`') {
|
|
|
|
|
self.skip(1);
|
|
|
|
|
Some(Token::Ident)
|
|
|
|
|
} else {
|
|
|
|
|
// unterminated raw identifier
|
|
|
|
|
Some(Token::ParseError)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
// `//`-style comments or doc-comments
|
|
|
|
|
_ => match self.parse().map(|tok| match tok {
|
|
|
|
|
Token::SlashSlash => {
|
|
|
|
|
self.skip_while(|c| c == '\n');
|
|
|
|
|
(Token::Comment)
|
|
|
|
|
Token::Comment
|
|
|
|
|
}
|
|
|
|
|
Token::SlashSlashSlash => {
|
|
|
|
|
self.skip_while(|c| c == '\n');
|
|
|
|
|
(Token::DocComment)
|
|
|
|
|
Token::DocComment
|
|
|
|
|
}
|
|
|
|
|
_ => tok,
|
|
|
|
|
}) {
|
|
|
|
|
Some(tok) => {
|
|
|
|
|
if tok.maybe_ident() && self.skip_while(|c| is_things::is_id_continue(c)) > 0 {
|
|
|
|
|
Some((Token::Ident, &self.source[start..self.offset]))
|
|
|
|
|
Some(Token::Ident)
|
|
|
|
|
} else {
|
|
|
|
|
Some((tok, &self.source[start..self.offset]))
|
|
|
|
|
Some(tok)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
None => {
|
|
|
|
|
@ -623,15 +482,49 @@ impl<'a> Iterator for TokenIterator<'a> {
|
|
|
|
|
{
|
|
|
|
|
self.skip(1);
|
|
|
|
|
self.skip_while(|c| is_things::is_id_continue(c));
|
|
|
|
|
Some((Token::Ident, &self.source[start..self.offset]))
|
|
|
|
|
Some(Token::Ident)
|
|
|
|
|
} else {
|
|
|
|
|
None
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
},
|
|
|
|
|
};
|
|
|
|
|
}?;
|
|
|
|
|
|
|
|
|
|
token
|
|
|
|
|
Some((token, start..self.offset))
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn next_token_item(&mut self) -> Option<TokenItem<'a>> {
|
|
|
|
|
let (token, range) = self.next_token()?;
|
|
|
|
|
let lexeme = &self.source[range.clone()];
|
|
|
|
|
Some(TokenItem {
|
|
|
|
|
token,
|
|
|
|
|
lexeme,
|
|
|
|
|
offset: range.start as u32,
|
|
|
|
|
})
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
pub fn into_token_items(self) -> TokenItemIterator<'a> {
|
|
|
|
|
TokenItemIterator { inner: self }
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
impl<'a> Iterator for TokenIterator<'a> {
|
|
|
|
|
type Item = Token;
|
|
|
|
|
|
|
|
|
|
fn next(&mut self) -> Option<Self::Item> {
|
|
|
|
|
self.next_token().map(|(token, _)| token)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
pub struct TokenItemIterator<'a> {
|
|
|
|
|
inner: TokenIterator<'a>,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
impl<'a> Iterator for TokenItemIterator<'a> {
|
|
|
|
|
type Item = TokenItem<'a>;
|
|
|
|
|
|
|
|
|
|
fn next(&mut self) -> Option<Self::Item> {
|
|
|
|
|
self.inner.next_token_item()
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@ -643,44 +536,69 @@ mod tests {
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn test_iterator() {
|
|
|
|
|
let tokens = "fn let void+++(++bool)";
|
|
|
|
|
let tokens = "fn let void+(+bool)";
|
|
|
|
|
let mut lexer = TokenIterator::new(&tokens);
|
|
|
|
|
assert_eq!(lexer.next(), Some((Token::Fn, "fn")));
|
|
|
|
|
assert_eq!(lexer.next(), Some((Token::Let, "let")));
|
|
|
|
|
assert_eq!(lexer.next(), Some((Token::Void, "void")));
|
|
|
|
|
assert_eq!(lexer.next(), Some((Token::PlusPlus, "++")));
|
|
|
|
|
assert_eq!(lexer.next(), Some((Token::Plus, "+")));
|
|
|
|
|
assert_eq!(lexer.next(), Some((Token::OpenParens, "(")));
|
|
|
|
|
assert_eq!(lexer.next(), Some((Token::PlusPlus, "++")));
|
|
|
|
|
assert_eq!(lexer.next(), Some((Token::Bool, "bool")));
|
|
|
|
|
assert_eq!(lexer.next(), Some((Token::CloseParens, ")")));
|
|
|
|
|
assert_eq!(lexer.next(), Some(Token::Fn));
|
|
|
|
|
assert_eq!(lexer.next(), Some(Token::Let));
|
|
|
|
|
assert_eq!(lexer.next(), Some(Token::Void));
|
|
|
|
|
assert_eq!(lexer.next(), Some(Token::Plus));
|
|
|
|
|
assert_eq!(lexer.next(), Some(Token::OpenParens));
|
|
|
|
|
assert_eq!(lexer.next(), Some(Token::Plus));
|
|
|
|
|
assert_eq!(lexer.next(), Some(Token::Bool));
|
|
|
|
|
assert_eq!(lexer.next(), Some(Token::CloseParens));
|
|
|
|
|
assert_eq!(lexer.next(), None);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn idents() {
|
|
|
|
|
let mut lexer = TokenIterator::new("a a1 a_ a-b _a _1 _- -a -1 -_ `123");
|
|
|
|
|
assert!(lexer.all(|tok| tok == Token::Ident));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn ident_minus_ambiguity() {
|
|
|
|
|
let lexer = TokenIterator::new("a-a a- - a -a --a");
|
|
|
|
|
let tokens = lexer.collect::<Vec<_>>();
|
|
|
|
|
assert_eq!(
|
|
|
|
|
tokens,
|
|
|
|
|
vec![
|
|
|
|
|
Token::Ident,
|
|
|
|
|
Token::Ident,
|
|
|
|
|
Token::Minus,
|
|
|
|
|
Token::Ident,
|
|
|
|
|
Token::Ident,
|
|
|
|
|
Token::Ident
|
|
|
|
|
]
|
|
|
|
|
);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn complex_iterator() {
|
|
|
|
|
let tokens = "fn my-function(x: i32, y: f32) -> f32 { return x + y; }";
|
|
|
|
|
let mut lexer = TokenIterator::new(&tokens);
|
|
|
|
|
assert_eq!(lexer.next(), Some((Token::Fn, "fn")));
|
|
|
|
|
assert_eq!(lexer.next(), Some((Token::Ident, "my-function")));
|
|
|
|
|
assert_eq!(lexer.next(), Some((Token::OpenParens, "(")));
|
|
|
|
|
assert_eq!(lexer.next(), Some((Token::Ident, "x")));
|
|
|
|
|
assert_eq!(lexer.next(), Some((Token::Colon, ":")));
|
|
|
|
|
assert_eq!(lexer.next(), Some((Token::Ident, "i32")));
|
|
|
|
|
assert_eq!(lexer.next(), Some((Token::Comma, ",")));
|
|
|
|
|
assert_eq!(lexer.next(), Some((Token::Ident, "y")));
|
|
|
|
|
assert_eq!(lexer.next(), Some((Token::Colon, ":")));
|
|
|
|
|
assert_eq!(lexer.next(), Some((Token::F32, "f32")));
|
|
|
|
|
assert_eq!(lexer.next(), Some((Token::CloseParens, ")")));
|
|
|
|
|
assert_eq!(lexer.next(), Some((Token::MinusGreater, "->")));
|
|
|
|
|
assert_eq!(lexer.next(), Some((Token::F32, "f32")));
|
|
|
|
|
assert_eq!(lexer.next(), Some((Token::OpenBrace, "{")));
|
|
|
|
|
assert_eq!(lexer.next(), Some((Token::Return, "return")));
|
|
|
|
|
assert_eq!(lexer.next(), Some((Token::Ident, "x")));
|
|
|
|
|
assert_eq!(lexer.next(), Some((Token::Plus, "+")));
|
|
|
|
|
assert_eq!(lexer.next(), Some((Token::Ident, "y")));
|
|
|
|
|
assert_eq!(lexer.next(), Some((Token::Semi, ";")));
|
|
|
|
|
assert_eq!(lexer.next(), Some((Token::CloseBrace, "}")));
|
|
|
|
|
assert_eq!(lexer.next(), None);
|
|
|
|
|
let lexer = TokenIterator::new(&tokens);
|
|
|
|
|
let mut items = lexer
|
|
|
|
|
.into_token_items()
|
|
|
|
|
.map(|item| (item.token, item.lexeme));
|
|
|
|
|
assert_eq!(items.next(), Some((Token::Fn, "fn")));
|
|
|
|
|
assert_eq!(items.next(), Some((Token::Ident, "my-function")));
|
|
|
|
|
assert_eq!(items.next(), Some((Token::OpenParens, "(")));
|
|
|
|
|
assert_eq!(items.next(), Some((Token::Ident, "x")));
|
|
|
|
|
assert_eq!(items.next(), Some((Token::Colon, ":")));
|
|
|
|
|
assert_eq!(items.next(), Some((Token::I32, "i32")));
|
|
|
|
|
assert_eq!(items.next(), Some((Token::Comma, ",")));
|
|
|
|
|
assert_eq!(items.next(), Some((Token::Ident, "y")));
|
|
|
|
|
assert_eq!(items.next(), Some((Token::Colon, ":")));
|
|
|
|
|
assert_eq!(items.next(), Some((Token::F32, "f32")));
|
|
|
|
|
assert_eq!(items.next(), Some((Token::CloseParens, ")")));
|
|
|
|
|
assert_eq!(items.next(), Some((Token::MinusGreater, "->")));
|
|
|
|
|
assert_eq!(items.next(), Some((Token::F32, "f32")));
|
|
|
|
|
assert_eq!(items.next(), Some((Token::OpenBrace, "{")));
|
|
|
|
|
assert_eq!(items.next(), Some((Token::Return, "return")));
|
|
|
|
|
assert_eq!(items.next(), Some((Token::Ident, "x")));
|
|
|
|
|
assert_eq!(items.next(), Some((Token::Plus, "+")));
|
|
|
|
|
assert_eq!(items.next(), Some((Token::Ident, "y")));
|
|
|
|
|
assert_eq!(items.next(), Some((Token::Semi, ";")));
|
|
|
|
|
assert_eq!(items.next(), Some((Token::CloseBrace, "}")));
|
|
|
|
|
assert_eq!(items.next(), None);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|