From 260150de15b70d17975df47462af3b528fa53422 Mon Sep 17 00:00:00 2001 From: janis Date: Sat, 27 Sep 2025 22:16:04 +0200 Subject: [PATCH] more iterator stuff --- Cargo.toml | 4 +- crates/lexer/src/lib.rs | 178 +++++++++++++++++++++++++++++++++++++++- 2 files changed, 178 insertions(+), 4 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index f96d813..bc5564b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -2,7 +2,7 @@ resolver = "3" members = [ "crates/lexer" -] +, "crates/parser"] [package] name = "compiler" @@ -34,4 +34,4 @@ thiserror = "1.0.63" itertools = "0.13.0" werkzeug = { path = "../../rust/werkzeug" } -trie = { path = "../../rust/trie" } \ No newline at end of file +trie = { path = "../../rust/trie" } diff --git a/crates/lexer/src/lib.rs b/crates/lexer/src/lib.rs index 12df766..7232c62 100644 --- a/crates/lexer/src/lib.rs +++ b/crates/lexer/src/lib.rs @@ -1,4 +1,4 @@ -#![feature(slice_swap_unchecked, iter_collect_into)] +#![feature(slice_swap_unchecked, iter_collect_into, push_mut)] mod is_things { /// True if `c` is considered a whitespace according to Rust language definition. @@ -297,7 +297,11 @@ impl Token { } } -use std::ops::Range; +use std::{ + collections::VecDeque, + marker::PhantomData, + ops::{Deref, DerefMut, Range}, +}; use trie::Tree; @@ -528,6 +532,176 @@ impl<'a> Iterator for TokenItemIterator<'a> { } } +pub struct Peeking; +pub struct Consuming; +pub trait ReborrowMode: sealed::Sealed {} +impl ReborrowMode for Peeking {} +impl ReborrowMode for Consuming {} + +mod sealed { + pub trait Sealed {} + impl Sealed for super::Peeking {} + impl Sealed for super::Consuming {} +} + +enum Queue<'a, T> { + Owned(VecDeque), + Borrowed(&'a mut VecDeque), +} + +impl<'a, T> Queue<'a, T> { + fn borrowed(&'_ mut self) -> Queue<'_, T> { + match self { + Queue::Owned(v) => Queue::Borrowed(v), + Queue::Borrowed(v) => Queue::Borrowed(v), + } + } +} + +impl Deref for Queue<'_, T> { + type Target = VecDeque; + + fn deref(&self) -> &Self::Target { + match self { + Queue::Owned(v) => v, + Queue::Borrowed(v) => v, + } + } +} + +impl DerefMut for Queue<'_, T> { + fn deref_mut(&mut self) -> &mut Self::Target { + match self { + Queue::Owned(v) => v, + Queue::Borrowed(v) => v, + } + } +} + +pub struct ReborrowingIterator<'a, 'b, I, T, Marker> +where + I: Iterator, +{ + iter: &'a mut I, + cache: Queue<'b, T>, + cursor: usize, + _marker: PhantomData, +} + +impl<'a, 'b, I, T, Marker> ReborrowingIterator<'a, 'b, I, T, Marker> +where + I: Iterator, +{ + pub fn new(iter: &'a mut I) -> Self { + Self { + iter, + cache: Queue::Owned(VecDeque::new()), + cursor: 0, + _marker: PhantomData, + } + } + + pub fn reborrow_peeking(self) -> ReborrowingIterator<'a, 'b, I, T, Peeking> { + ReborrowingIterator { + iter: self.iter, + cache: self.cache, + cursor: 0, + _marker: PhantomData, + } + } + + pub fn reborrow_consuming(self) -> ReborrowingIterator<'a, 'b, I, T, Consuming> { + ReborrowingIterator { + iter: self.iter, + cache: self.cache, + cursor: 0, + _marker: PhantomData, + } + } + + pub fn borrow_peeking(&'_ mut self) -> ReborrowingIterator<'_, '_, I, T, Peeking> { + ReborrowingIterator { + iter: self.iter, + cache: self.cache.borrowed(), + cursor: 0, + _marker: PhantomData, + } + } + + pub fn borrow_consuming(&'_ mut self) -> ReborrowingIterator<'_, '_, I, T, Consuming> { + ReborrowingIterator { + iter: self.iter, + cache: self.cache.borrowed(), + cursor: 0, + _marker: PhantomData, + } + } + + pub fn borrow_consuming_at_cursor( + &'_ mut self, + ) -> ReborrowingIterator<'_, '_, I, T, Consuming> { + _ = self.cache.drain(0..self.cursor); + ReborrowingIterator { + iter: self.iter, + cache: self.cache.borrowed(), + cursor: 0, + _marker: PhantomData, + } + } + + pub fn reborrow_consuming_at_cursor(mut self) -> ReborrowingIterator<'a, 'b, I, T, Consuming> { + _ = self.cache.drain(0..self.cursor); + ReborrowingIterator { + iter: self.iter, + cache: self.cache, + cursor: 0, + _marker: PhantomData, + } + } +} + +impl<'a, 'b, I, T> Iterator for ReborrowingIterator<'a, 'b, I, T, Consuming> +where + I: Iterator, +{ + type Item = T; + + fn next(&mut self) -> Option { + self.cache.pop_front().or_else(|| self.iter.next()) + } +} + +impl<'a, 'b, I, T> ReborrowingIterator<'a, 'b, I, T, Peeking> +where + I: Iterator, +{ + pub fn peek(&mut self) -> Option<&T> { + if self.cursor >= self.cache.len() { + if let Some(item) = self.iter.next() { + self.cursor += 1; + Some(self.cache.push_back_mut(item)) + } else { + None + } + } else { + let item = self.cache.get(self.cursor)?; + self.cursor += 1; + Some(item) + } + } + + pub fn skip(&mut self, n: usize) { + let cached = self.cache.len() - self.cursor; + self.cursor.saturating_add(n); + if n > cached { + // need to pull from the underlying iterator + let surplus = n - cached; + self.cache.extend(self.iter.take(surplus)); + self.cursor += n; + } + } +} + mod complex_tokens; #[cfg(test)]