836 lines
23 KiB
Rust
836 lines
23 KiB
Rust
#![feature(slice_swap_unchecked, iter_collect_into, push_mut)]
|
|
|
|
mod is_things {
|
|
/// True if `c` is considered a whitespace according to Rust language definition.
|
|
/// See [Rust language reference](https://doc.rust-lang.org/reference/whitespace.html)
|
|
/// for definitions of these classes.
|
|
pub fn is_whitespace(c: char) -> bool {
|
|
// This is Pattern_White_Space.
|
|
//
|
|
// Note that this set is stable (ie, it doesn't change with different
|
|
// Unicode versions), so it's ok to just hard-code the values.
|
|
|
|
matches!(
|
|
c,
|
|
// Usual ASCII suspects
|
|
'\u{0009}' // \t
|
|
| '\u{000A}' // \n
|
|
| '\u{000B}' // vertical tab
|
|
| '\u{000C}' // form feed
|
|
| '\u{000D}' // \r
|
|
| '\u{0020}' // space
|
|
|
|
// NEXT LINE from latin1
|
|
| '\u{0085}'
|
|
|
|
// Bidi markers
|
|
| '\u{200E}' // LEFT-TO-RIGHT MARK
|
|
| '\u{200F}' // RIGHT-TO-LEFT MARK
|
|
|
|
// Dedicated whitespace characters from Unicode
|
|
| '\u{2028}' // LINE SEPARATOR
|
|
| '\u{2029}' // PARAGRAPH SEPARATOR
|
|
)
|
|
}
|
|
|
|
/// True if `c` is valid as a first character of an identifier.
|
|
/// See [Rust language reference](https://doc.rust-lang.org/reference/identifiers.html) for
|
|
/// a formal definition of valid identifier name.
|
|
pub fn is_id_start(c: char) -> bool {
|
|
// This is XID_Start OR '_' (which formally is not a XID_Start).
|
|
c == '_' || c == '-' || unicode_xid::UnicodeXID::is_xid_start(c)
|
|
}
|
|
|
|
/// True if `c` is valid as a non-first character of an identifier.
|
|
/// See [Rust language reference](https://doc.rust-lang.org/reference/identifiers.html) for
|
|
/// a formal definition of valid identifier name.
|
|
pub fn is_id_continue(c: char) -> bool {
|
|
unicode_xid::UnicodeXID::is_xid_continue(c) || c == '-'
|
|
}
|
|
|
|
/// The passed string is lexically an identifier.
|
|
pub fn is_ident(string: &str) -> bool {
|
|
let mut chars = string.chars();
|
|
if let Some(start) = chars.next() {
|
|
is_id_start(start) && chars.all(is_id_continue)
|
|
} else {
|
|
false
|
|
}
|
|
}
|
|
|
|
pub fn is_digit(ch: char) -> bool {
|
|
('0'..='9').contains(&ch)
|
|
}
|
|
|
|
pub fn is_bin_digit(ch: char) -> bool {
|
|
ch == '0' || ch == '1'
|
|
}
|
|
|
|
#[expect(dead_code)]
|
|
pub fn is_nonzero_digit(ch: char) -> bool {
|
|
('1'..='9').contains(&ch)
|
|
}
|
|
|
|
pub fn is_oct_digit(ch: char) -> bool {
|
|
('0'..='7').contains(&ch)
|
|
}
|
|
|
|
pub fn is_hex_digit(ch: char) -> bool {
|
|
('0'..='9').contains(&ch) || ('a'..='f').contains(&ch) || ('A'..='F').contains(&ch)
|
|
}
|
|
}
|
|
|
|
macro_rules! tokens {
|
|
($vis:vis $ty_name:ident:
|
|
{
|
|
$($name2:ident),*
|
|
},
|
|
{
|
|
$($name:ident => $lexeme:literal),*
|
|
}) => {
|
|
|
|
#[allow(dead_code)]
|
|
#[derive(Debug, Clone, Copy, Eq, PartialEq, Ord, PartialOrd, Hash)]
|
|
$vis enum $ty_name {
|
|
$($name,
|
|
)*
|
|
$($name2,)*
|
|
}
|
|
|
|
impl ::core::fmt::Display for $ty_name {
|
|
fn fmt(&self, f: &mut ::core::fmt::Formatter<'_>) -> ::core::fmt::Result {
|
|
match self {
|
|
$(Self::$name => write!(f, "{}", $lexeme),)*
|
|
$(Self::$name2 => write!(f, "<{}>", stringify!($name2))),*
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(dead_code)]
|
|
impl $ty_name {
|
|
$vis fn lexeme(&self) -> Option<&'static str> {
|
|
match self {
|
|
$(Self::$name => Some($lexeme),)*
|
|
$(Self::$name2 => None),*
|
|
}
|
|
}
|
|
|
|
/// returns the number of chars in this lexeme
|
|
$vis fn lexeme_len(&self) -> usize {
|
|
self.lexeme().map(|lexeme|lexeme.chars().count()).unwrap_or(0)
|
|
}
|
|
|
|
/// returns the number of chars in this lexeme
|
|
$vis fn lexeme_len_utf8(&self) -> usize {
|
|
self.lexeme().map(|lexeme|lexeme.len()).unwrap_or(0)
|
|
}
|
|
|
|
$vis fn maybe_ident(&self) -> bool {
|
|
self.lexeme().map(|lexeme| crate::is_things::is_ident(lexeme)).unwrap_or(false)
|
|
}
|
|
|
|
$vis fn lexemes() -> &'static [(Self, &'static str)] {
|
|
&[
|
|
$((Self::$name, $lexeme)),*
|
|
]
|
|
}
|
|
}
|
|
};
|
|
}
|
|
|
|
tokens!(pub Token: {
|
|
Eof,
|
|
ParseError,
|
|
// Marker Token for any Comment
|
|
Comment,
|
|
DocComment,
|
|
// Marker Token for any pre-processing directive
|
|
CharConstant,
|
|
IntegerConstant,
|
|
IntegerHexConstant,
|
|
IntegerBinConstant,
|
|
IntegerOctConstant,
|
|
FloatingConstant,
|
|
FloatingExpConstant,
|
|
DotFloatingConstant,
|
|
DotFloatingExpConstant,
|
|
StringConstant,
|
|
Ident
|
|
},
|
|
// Lexical Tokens:
|
|
{
|
|
SlashSlash => "//",
|
|
SlashSlashSlash => "///",
|
|
// SlashStar => "/*",
|
|
// SlashStarStar => "/**",
|
|
//StarSlash => "*/",
|
|
// Punctuation:
|
|
OpenParens => "(",
|
|
CloseParens => ")",
|
|
OpenBrace => "{",
|
|
CloseBrace => "}",
|
|
OpenSquareBracket => "[",
|
|
CloseSquareBracket => "]",
|
|
Semi => ";",
|
|
Comma => ",",
|
|
Elipsis3 => "...",
|
|
Elipsis2 => "..",
|
|
Colon => ":",
|
|
Equal => "=",
|
|
// Keywords:
|
|
Void => "void",
|
|
Bool => "bool",
|
|
F32 => "f32",
|
|
F64 => "f64",
|
|
ISize => "isize",
|
|
USize => "usize",
|
|
U1 => "u1",
|
|
U8 => "u8",
|
|
U16 => "u16",
|
|
U32 => "u32",
|
|
U64 => "u64",
|
|
I1 => "i1",
|
|
I8 => "i8",
|
|
I16 => "i16",
|
|
I32 => "i32",
|
|
I64 => "i64",
|
|
Const => "const",
|
|
Mutable => "mut",
|
|
Volatile => "volatile",
|
|
Noalias => "noalias",
|
|
Fn => "fn",
|
|
Let => "let",
|
|
Var => "var",
|
|
If => "if",
|
|
As => "as",
|
|
Else => "else",
|
|
Return => "return",
|
|
Struct => "struct",
|
|
Type => "type",
|
|
Union => "union",
|
|
Enum => "enum",
|
|
Packed => "packed",
|
|
Extern => "extern",
|
|
Pub => "pub",
|
|
Module => "mod",
|
|
// Operators
|
|
Dot => ".",
|
|
MinusGreater => "->",
|
|
Bang => "!",
|
|
Tilde => "~",
|
|
Plus => "+",
|
|
// PlusPlus => "++",
|
|
Minus => "-",
|
|
// MinusMinus => "--",
|
|
Star => "*",
|
|
Slash => "/",
|
|
Percent => "%",
|
|
Less => "<",
|
|
Greater => ">",
|
|
LessEqual => "<=",
|
|
GreaterEqual => ">=",
|
|
EqualEqual => "==",
|
|
BangEqual => "!=",
|
|
PipePipe => "||",
|
|
AmpersandAmpersand => "&&",
|
|
Ampersand => "&",
|
|
Caret => "^",
|
|
Pipe => "|",
|
|
LessLess => "<<",
|
|
GreaterGreater => ">>",
|
|
Question => "?",
|
|
PlusEqual => "+=",
|
|
MinusEqual => "-=",
|
|
StarEqual => "*=",
|
|
SlashEqual => "/=",
|
|
PercentEqual => "%=",
|
|
AmpersandEqual => "&=",
|
|
PipeEqual => "|=",
|
|
CaretEqual => "^=",
|
|
LessLessEqual => "<<=",
|
|
GreaterGreaterEqual => ">>="
|
|
});
|
|
|
|
impl Token {
|
|
pub fn is_assignment_op(self) -> bool {
|
|
match self {
|
|
Token::PlusEqual
|
|
| Token::MinusEqual
|
|
| Token::StarEqual
|
|
| Token::SlashEqual
|
|
| Token::PercentEqual
|
|
| Token::PipeEqual
|
|
| Token::CaretEqual
|
|
| Token::AmpersandEqual
|
|
| Token::LessLessEqual
|
|
| Token::GreaterGreaterEqual
|
|
| Token::Equal => true,
|
|
_ => false,
|
|
}
|
|
}
|
|
pub fn is_unary_op(self) -> bool {
|
|
match self {
|
|
Token::Plus | Token::Minus | Token::Star | Token::Ampersand | Token::Bang => true,
|
|
_ => false,
|
|
}
|
|
}
|
|
pub fn is_binary_op(self) -> bool {
|
|
match self {
|
|
Token::Star
|
|
| Token::Slash
|
|
| Token::Percent
|
|
| Token::Pipe
|
|
| Token::Ampersand
|
|
| Token::Caret
|
|
| Token::Plus
|
|
| Token::Minus
|
|
| Token::PipePipe
|
|
| Token::AmpersandAmpersand
|
|
| Token::BangEqual
|
|
| Token::EqualEqual
|
|
| Token::Less
|
|
| Token::Greater
|
|
| Token::LessEqual
|
|
| Token::GreaterEqual
|
|
| Token::LessLess
|
|
| Token::GreaterGreater => true,
|
|
_ => false,
|
|
}
|
|
}
|
|
}
|
|
|
|
use std::{
|
|
collections::VecDeque,
|
|
marker::PhantomData,
|
|
ops::{Deref, DerefMut, Range},
|
|
};
|
|
|
|
use trie::Tree;
|
|
|
|
#[derive(Debug, Clone)]
|
|
pub struct TokenItem<'a> {
|
|
pub token: Token,
|
|
pub lexeme: &'a str,
|
|
pub offset: u32,
|
|
}
|
|
|
|
#[derive(Debug, Clone, Copy)]
|
|
struct CharCountingIterator<I: Iterator> {
|
|
iter: I,
|
|
count: usize,
|
|
}
|
|
|
|
impl<I: Iterator> From<I> for CharCountingIterator<I> {
|
|
fn from(iter: I) -> Self {
|
|
Self { iter, count: 0 }
|
|
}
|
|
}
|
|
|
|
impl<I: Iterator<Item = char>> Iterator for CharCountingIterator<I> {
|
|
type Item = I::Item;
|
|
|
|
fn next(&mut self) -> Option<Self::Item> {
|
|
self.iter.next().inspect(|c| self.count += c.len_utf8())
|
|
}
|
|
}
|
|
|
|
impl<I: Iterator> CharCountingIterator<I> {
|
|
pub(crate) fn offset(&self) -> usize {
|
|
self.count
|
|
}
|
|
}
|
|
|
|
impl<I: Iterator> core::ops::Deref for CharCountingIterator<I> {
|
|
type Target = I;
|
|
|
|
fn deref(&self) -> &Self::Target {
|
|
&self.iter
|
|
}
|
|
}
|
|
|
|
impl<I: Iterator> core::ops::DerefMut for CharCountingIterator<I> {
|
|
fn deref_mut(&mut self) -> &mut Self::Target {
|
|
&mut self.iter
|
|
}
|
|
}
|
|
|
|
type Source<'a> = CharCountingIterator<core::iter::Peekable<core::str::Chars<'a>>>;
|
|
|
|
pub struct TokenIterator<'a> {
|
|
trie: Tree<char, Token>,
|
|
source: &'a str,
|
|
offset: usize,
|
|
}
|
|
|
|
impl<'a> TokenIterator<'a> {
|
|
pub fn new(source: &'a str) -> Self {
|
|
let mut trie = Tree::new();
|
|
|
|
for (token, token_str) in Token::lexemes() {
|
|
trie.insert(token_str.chars(), *token);
|
|
}
|
|
|
|
Self {
|
|
trie,
|
|
source,
|
|
offset: 0,
|
|
}
|
|
}
|
|
|
|
fn peekable_source(&self) -> Source<'a> {
|
|
CharCountingIterator::from(self.source[self.offset..].chars().peekable())
|
|
}
|
|
|
|
fn parse(&mut self) -> Option<Token> {
|
|
let mut iter = CharCountingIterator::from(self.source[self.offset..].chars());
|
|
|
|
match self.trie.get_closest(&mut iter) {
|
|
Some(token) => {
|
|
// skip the peeked item
|
|
self.offset += token.lexeme_len();
|
|
Some(*token)
|
|
}
|
|
None => None,
|
|
}
|
|
}
|
|
|
|
fn skip_whitespaces(&mut self) -> usize {
|
|
self.skip_while(is_things::is_whitespace)
|
|
}
|
|
|
|
fn skip(&mut self, mut n: usize) -> usize {
|
|
self.skip_while(|_| {
|
|
n -= 1;
|
|
n > 0
|
|
})
|
|
}
|
|
|
|
fn skip_while(&mut self, mut pred: impl FnMut(char) -> bool) -> usize {
|
|
let mut count = 0;
|
|
loop {
|
|
let Some(c) = self.source[self.offset..].chars().next() else {
|
|
break;
|
|
};
|
|
|
|
if pred(c) {
|
|
self.offset += c.len_utf8();
|
|
count += c.len_utf8();
|
|
continue;
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
count
|
|
}
|
|
|
|
fn next_token(&mut self) -> Option<(Token, Range<usize>)> {
|
|
// skip whitespace
|
|
self.skip_whitespaces();
|
|
|
|
let start = self.offset;
|
|
|
|
let mut source = self.peekable_source();
|
|
let mut cursor = self.peekable_source();
|
|
let token = match cursor.next() {
|
|
Some('0'..='9') => {
|
|
let token = complex_tokens::parse_constant(&mut source).ok()?;
|
|
self.offset += source.offset();
|
|
|
|
Some(token)
|
|
}
|
|
Some('.') if cursor.next().map_or(false, is_things::is_digit) => {
|
|
let token = complex_tokens::parse_constant(&mut source).ok()?;
|
|
self.offset += source.offset();
|
|
|
|
Some(token)
|
|
}
|
|
Some('\'' | '"') => {
|
|
let token = complex_tokens::parse_string_or_char_constant(&mut source).ok()?;
|
|
self.offset += source.offset();
|
|
|
|
Some(token)
|
|
}
|
|
Some('`') => {
|
|
// raw identifier
|
|
self.skip(1);
|
|
self.skip_while(|c| is_things::is_id_continue(c));
|
|
if self.peekable_source().next() == Some('`') {
|
|
self.skip(1);
|
|
Some(Token::Ident)
|
|
} else {
|
|
// unterminated raw identifier
|
|
Some(Token::ParseError)
|
|
}
|
|
}
|
|
// `//`-style comments or doc-comments
|
|
_ => match self.parse().map(|tok| match tok {
|
|
Token::SlashSlash => {
|
|
self.skip_while(|c| c == '\n');
|
|
Token::Comment
|
|
}
|
|
Token::SlashSlashSlash => {
|
|
self.skip_while(|c| c == '\n');
|
|
Token::DocComment
|
|
}
|
|
_ => tok,
|
|
}) {
|
|
Some(tok) => {
|
|
if tok.maybe_ident() && self.skip_while(|c| is_things::is_id_continue(c)) > 0 {
|
|
Some(Token::Ident)
|
|
} else {
|
|
Some(tok)
|
|
}
|
|
}
|
|
None => {
|
|
if self
|
|
.peekable_source()
|
|
.next()
|
|
.map_or(false, |c| is_things::is_id_start(c))
|
|
{
|
|
self.skip(1);
|
|
self.skip_while(|c| is_things::is_id_continue(c));
|
|
Some(Token::Ident)
|
|
} else {
|
|
None
|
|
}
|
|
}
|
|
},
|
|
}?;
|
|
|
|
Some((token, start..self.offset))
|
|
}
|
|
|
|
fn next_token_item(&mut self) -> Option<TokenItem<'a>> {
|
|
let (token, range) = self.next_token()?;
|
|
let lexeme = &self.source[range.clone()];
|
|
Some(TokenItem {
|
|
token,
|
|
lexeme,
|
|
offset: range.start as u32,
|
|
})
|
|
}
|
|
|
|
pub fn into_token_items(self) -> TokenItemIterator<'a> {
|
|
TokenItemIterator { inner: self }
|
|
}
|
|
}
|
|
|
|
impl<'a> Iterator for TokenIterator<'a> {
|
|
type Item = Token;
|
|
|
|
fn next(&mut self) -> Option<Self::Item> {
|
|
self.next_token().map(|(token, _)| token)
|
|
}
|
|
}
|
|
|
|
pub struct TokenItemIterator<'a> {
|
|
inner: TokenIterator<'a>,
|
|
}
|
|
|
|
impl<'a> Iterator for TokenItemIterator<'a> {
|
|
type Item = TokenItem<'a>;
|
|
|
|
fn next(&mut self) -> Option<Self::Item> {
|
|
self.inner.next_token_item()
|
|
}
|
|
}
|
|
|
|
pub struct Peeking;
|
|
pub struct Consuming;
|
|
pub trait ReborrowMode: sealed::Sealed {}
|
|
impl ReborrowMode for Peeking {}
|
|
impl ReborrowMode for Consuming {}
|
|
|
|
mod sealed {
|
|
pub trait Sealed {}
|
|
impl Sealed for super::Peeking {}
|
|
impl Sealed for super::Consuming {}
|
|
}
|
|
|
|
enum Queue<'a, T> {
|
|
Owned(VecDeque<T>),
|
|
Borrowed(&'a mut VecDeque<T>),
|
|
}
|
|
|
|
impl<'a, T> Queue<'a, T> {
|
|
fn borrowed(&'_ mut self) -> Queue<'_, T> {
|
|
match self {
|
|
Queue::Owned(v) => Queue::Borrowed(v),
|
|
Queue::Borrowed(v) => Queue::Borrowed(v),
|
|
}
|
|
}
|
|
}
|
|
|
|
impl<T> Deref for Queue<'_, T> {
|
|
type Target = VecDeque<T>;
|
|
|
|
fn deref(&self) -> &Self::Target {
|
|
match self {
|
|
Queue::Owned(v) => v,
|
|
Queue::Borrowed(v) => v,
|
|
}
|
|
}
|
|
}
|
|
|
|
impl<T> DerefMut for Queue<'_, T> {
|
|
fn deref_mut(&mut self) -> &mut Self::Target {
|
|
match self {
|
|
Queue::Owned(v) => v,
|
|
Queue::Borrowed(v) => v,
|
|
}
|
|
}
|
|
}
|
|
|
|
pub struct ReborrowingIterator<'a, 'b, I, T, Marker>
|
|
where
|
|
I: Iterator<Item = T>,
|
|
{
|
|
iter: &'a mut I,
|
|
cache: Queue<'b, T>,
|
|
peeking_cursor: usize,
|
|
_marker: PhantomData<Marker>,
|
|
}
|
|
|
|
pub type ReborrowingPeekingIterator<'a, 'b, I, T> = ReborrowingIterator<'a, 'b, I, T, Peeking>;
|
|
pub type ReborrowingConsumingIterator<'a, 'b, I, T> = ReborrowingIterator<'a, 'b, I, T, Consuming>;
|
|
|
|
impl<'a, 'b, I, T, Marker> ReborrowingIterator<'a, 'b, I, T, Marker>
|
|
where
|
|
I: Iterator<Item = T>,
|
|
{
|
|
pub fn new(iter: &'a mut I) -> Self {
|
|
Self {
|
|
iter,
|
|
cache: Queue::Owned(VecDeque::new()),
|
|
peeking_cursor: 0,
|
|
_marker: PhantomData,
|
|
}
|
|
}
|
|
|
|
pub fn reborrow_peeking(self) -> ReborrowingIterator<'a, 'b, I, T, Peeking> {
|
|
ReborrowingIterator {
|
|
iter: self.iter,
|
|
cache: self.cache,
|
|
peeking_cursor: 0,
|
|
_marker: PhantomData,
|
|
}
|
|
}
|
|
|
|
pub fn reborrow_consuming(self) -> ReborrowingIterator<'a, 'b, I, T, Consuming> {
|
|
ReborrowingIterator {
|
|
iter: self.iter,
|
|
cache: self.cache,
|
|
peeking_cursor: 0,
|
|
_marker: PhantomData,
|
|
}
|
|
}
|
|
|
|
pub fn borrow_peeking(&'_ mut self) -> ReborrowingIterator<'_, '_, I, T, Peeking> {
|
|
ReborrowingIterator {
|
|
iter: self.iter,
|
|
cache: self.cache.borrowed(),
|
|
peeking_cursor: 0,
|
|
_marker: PhantomData,
|
|
}
|
|
}
|
|
|
|
pub fn borrow_consuming(&'_ mut self) -> ReborrowingIterator<'_, '_, I, T, Consuming> {
|
|
ReborrowingIterator {
|
|
iter: self.iter,
|
|
cache: self.cache.borrowed(),
|
|
peeking_cursor: 0,
|
|
_marker: PhantomData,
|
|
}
|
|
}
|
|
}
|
|
|
|
impl<'a, 'b, I, T> ReborrowingIterator<'a, 'b, I, T, Consuming>
|
|
where
|
|
I: Iterator<Item = T>,
|
|
{
|
|
pub fn expect_one_of<Ts: IntoIterator<Item = T>>(&mut self, candidates: Ts) -> Option<T>
|
|
where
|
|
T: Eq,
|
|
{
|
|
let mut candidates = candidates.into_iter();
|
|
|
|
let token = self.next()?;
|
|
if candidates.any(|cand| cand == token) {
|
|
Some(token)
|
|
} else {
|
|
None
|
|
}
|
|
}
|
|
}
|
|
|
|
impl<'a, 'b, I, T> Iterator for ReborrowingIterator<'a, 'b, I, T, Consuming>
|
|
where
|
|
I: Iterator<Item = T>,
|
|
{
|
|
type Item = T;
|
|
|
|
fn next(&mut self) -> Option<Self::Item> {
|
|
self.cache.pop_front().or_else(|| self.iter.next())
|
|
}
|
|
}
|
|
|
|
impl<'a, 'b, I, T> Iterator for ReborrowingIterator<'a, 'b, I, T, Peeking>
|
|
where
|
|
I: Iterator<Item = T>,
|
|
T: Copy,
|
|
{
|
|
type Item = T;
|
|
|
|
fn next(&mut self) -> Option<Self::Item> {
|
|
self.peek_next().copied()
|
|
}
|
|
}
|
|
|
|
impl<'a, 'b, I, T> ReborrowingIterator<'a, 'b, I, T, Peeking>
|
|
where
|
|
I: Iterator<Item = T>,
|
|
{
|
|
pub fn peek_next(&mut self) -> Option<&T> {
|
|
if self.peeking_cursor >= self.cache.len() {
|
|
if let Some(item) = self.iter.next() {
|
|
self.peeking_cursor += 1;
|
|
Some(self.cache.push_back_mut(item))
|
|
} else {
|
|
None
|
|
}
|
|
} else {
|
|
let item = self.cache.get(self.peeking_cursor)?;
|
|
self.peeking_cursor += 1;
|
|
Some(item)
|
|
}
|
|
}
|
|
|
|
pub fn drain_peeked(&mut self) -> impl Iterator<Item = T> + '_ {
|
|
let drained = self.cache.drain(0..self.peeking_cursor);
|
|
self.peeking_cursor = 0;
|
|
drained
|
|
}
|
|
|
|
pub fn skip(&mut self, n: usize) {
|
|
let cached = self.cache.len() - self.peeking_cursor;
|
|
self.peeking_cursor = self.peeking_cursor.saturating_add(n);
|
|
if n > cached {
|
|
// need to pull from the underlying iterator
|
|
let surplus = n - cached;
|
|
self.cache.extend(self.iter.take(surplus));
|
|
self.peeking_cursor += n;
|
|
}
|
|
}
|
|
|
|
pub fn borrow_consuming_at_cursor(
|
|
&'_ mut self,
|
|
) -> ReborrowingIterator<'_, '_, I, T, Consuming> {
|
|
_ = self.drain_peeked();
|
|
ReborrowingIterator {
|
|
iter: self.iter,
|
|
cache: self.cache.borrowed(),
|
|
peeking_cursor: 0,
|
|
_marker: PhantomData,
|
|
}
|
|
}
|
|
|
|
pub fn reborrow_consuming_at_cursor(mut self) -> ReborrowingIterator<'a, 'b, I, T, Consuming> {
|
|
_ = self.drain_peeked();
|
|
ReborrowingIterator {
|
|
iter: self.iter,
|
|
cache: self.cache,
|
|
peeking_cursor: 0,
|
|
_marker: PhantomData,
|
|
}
|
|
}
|
|
|
|
pub fn peek_one_of<Ts: IntoIterator<Item = T>>(&mut self, candidates: Ts) -> Option<&T>
|
|
where
|
|
T: Eq,
|
|
{
|
|
let mut candidates = candidates.into_iter();
|
|
|
|
let token = self.peek_next()?;
|
|
if candidates.any(|cand| &cand == token) {
|
|
Some(token)
|
|
} else {
|
|
None
|
|
}
|
|
}
|
|
}
|
|
|
|
mod complex_tokens;
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
|
|
#[test]
|
|
fn test_iterator() {
|
|
let tokens = "fn let void+(+bool)";
|
|
let mut lexer = TokenIterator::new(&tokens);
|
|
assert_eq!(lexer.next(), Some(Token::Fn));
|
|
assert_eq!(lexer.next(), Some(Token::Let));
|
|
assert_eq!(lexer.next(), Some(Token::Void));
|
|
assert_eq!(lexer.next(), Some(Token::Plus));
|
|
assert_eq!(lexer.next(), Some(Token::OpenParens));
|
|
assert_eq!(lexer.next(), Some(Token::Plus));
|
|
assert_eq!(lexer.next(), Some(Token::Bool));
|
|
assert_eq!(lexer.next(), Some(Token::CloseParens));
|
|
assert_eq!(lexer.next(), None);
|
|
}
|
|
|
|
#[test]
|
|
fn idents() {
|
|
let mut lexer = TokenIterator::new("a a1 a_ a-b _a _1 _- -a -1 -_ `123");
|
|
assert!(lexer.all(|tok| tok == Token::Ident));
|
|
}
|
|
|
|
#[test]
|
|
fn ident_minus_ambiguity() {
|
|
let lexer = TokenIterator::new("a-a a- - a -a --a");
|
|
let tokens = lexer.collect::<Vec<_>>();
|
|
assert_eq!(
|
|
tokens,
|
|
vec![
|
|
Token::Ident,
|
|
Token::Ident,
|
|
Token::Minus,
|
|
Token::Ident,
|
|
Token::Ident,
|
|
Token::Ident
|
|
]
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn complex_iterator() {
|
|
let tokens = "fn my-function(x: i32, y: f32) -> f32 { return x + y; }";
|
|
let lexer = TokenIterator::new(&tokens);
|
|
let mut items = lexer
|
|
.into_token_items()
|
|
.map(|item| (item.token, item.lexeme));
|
|
assert_eq!(items.next(), Some((Token::Fn, "fn")));
|
|
assert_eq!(items.next(), Some((Token::Ident, "my-function")));
|
|
assert_eq!(items.next(), Some((Token::OpenParens, "(")));
|
|
assert_eq!(items.next(), Some((Token::Ident, "x")));
|
|
assert_eq!(items.next(), Some((Token::Colon, ":")));
|
|
assert_eq!(items.next(), Some((Token::I32, "i32")));
|
|
assert_eq!(items.next(), Some((Token::Comma, ",")));
|
|
assert_eq!(items.next(), Some((Token::Ident, "y")));
|
|
assert_eq!(items.next(), Some((Token::Colon, ":")));
|
|
assert_eq!(items.next(), Some((Token::F32, "f32")));
|
|
assert_eq!(items.next(), Some((Token::CloseParens, ")")));
|
|
assert_eq!(items.next(), Some((Token::MinusGreater, "->")));
|
|
assert_eq!(items.next(), Some((Token::F32, "f32")));
|
|
assert_eq!(items.next(), Some((Token::OpenBrace, "{")));
|
|
assert_eq!(items.next(), Some((Token::Return, "return")));
|
|
assert_eq!(items.next(), Some((Token::Ident, "x")));
|
|
assert_eq!(items.next(), Some((Token::Plus, "+")));
|
|
assert_eq!(items.next(), Some((Token::Ident, "y")));
|
|
assert_eq!(items.next(), Some((Token::Semi, ";")));
|
|
assert_eq!(items.next(), Some((Token::CloseBrace, "}")));
|
|
assert_eq!(items.next(), None);
|
|
}
|
|
}
|