more useless token stuff that I didn't really want anyway

This commit is contained in:
janis 2025-09-30 16:44:42 +02:00
parent df2bb54272
commit 357590ec07
Signed by: janis
SSH key fingerprint: SHA256:bB1qbbqmDXZNT0KKD5c2Dfjg53JGhj7B3CFcLIzSqq8
3 changed files with 213 additions and 285 deletions

View file

@ -9,4 +9,6 @@ werkzeug = { workspace = true }
thiserror = { workspace = true }
itertools = { workspace = true }
trie = { workspace = true }
unicode-xid = { workspace = true }
unicode-xid = { workspace = true }
variadics_please = "1.1.0"

View file

@ -307,7 +307,7 @@ use std::{
use trie::Tree;
#[derive(Debug, Clone)]
#[derive(Debug, Clone, Copy)]
pub struct TokenItem<'a> {
pub token: Token,
pub lexeme: &'a str,
@ -644,23 +644,207 @@ where
}
}
impl<'a, 'b, I, T> ReborrowingIterator<'a, 'b, I, T, Consuming>
pub trait TokenConsumer<'a> {
type Product;
type Error;
fn try_consume_tokens<I: Iterator<Item = TokenItem<'a>> + Clone>(
&mut self,
iter: &mut I,
) -> Result<Self::Product, Self::Error>;
}
struct SimpleTokenConsumer<S, T: Default = ()>(S, PhantomData<T>);
impl<'a, S, T> TokenConsumer<'a> for SimpleTokenConsumer<S, T>
where
S: TokenSequence,
T: Default,
{
type Product = T;
type Error = ();
fn try_consume_tokens<I: Iterator<Item = TokenItem<'a>> + Clone>(
&mut self,
iter: &mut I,
) -> Result<Self::Product, Self::Error> {
let ref mut iter2 = iter.clone();
if iter2
.zip(self.0.tokens().iter().copied())
.all(|(item, expected)| item.token == expected)
{
core::mem::swap(iter, iter2);
Ok(T::default())
} else {
Err(())
}
}
}
struct TokenSequenceListConsumer<L: TokenSequenceList> {
list: L,
}
impl<'a, L: TokenSequenceList> TokenConsumer<'a> for TokenSequenceListConsumer<L> {
type Product = Vec<TokenItem<'a>>;
type Error = ();
fn try_consume_tokens<I: Iterator<Item = TokenItem<'a>> + Clone>(
&mut self,
iter: &mut I,
) -> Result<Self::Product, Self::Error> {
let sequences = self.list.iter_sequences();
for seq in sequences {
let mut iter2 = StealingIterator {
iter: iter.clone(),
yielded: Vec::new(),
};
if (&mut iter2)
.zip(seq.iter().copied())
.all(|(item, expected)| item.token == expected)
{
core::mem::swap(iter, &mut iter2.iter);
return Ok(iter2.yielded);
}
}
Err(())
}
}
struct StealingIterator<T, I: Iterator<Item = T>> {
pub iter: I,
pub yielded: Vec<T>,
}
impl<I, T> Iterator for StealingIterator<T, I>
where
T: Clone,
I: Iterator<Item = T>,
{
pub fn expect_one_of<Ts: IntoIterator<Item = T>>(&mut self, candidates: Ts) -> Option<T>
where
T: Eq,
{
type Item = T;
fn next(&mut self) -> Option<Self::Item> {
self.iter.next().map(move |item| {
self.yielded.push(item.clone());
item
})
}
}
pub trait TokenSequence {
fn tokens(&'_ self) -> &'_ [Token];
}
impl TokenSequence for Token {
fn tokens(&'_ self) -> &'_ [Token] {
std::slice::from_ref(self)
}
}
impl TokenSequence for [Token] {
fn tokens(&'_ self) -> &'_ [Token] {
self
}
}
impl TokenSequence for &[Token] {
fn tokens(&'_ self) -> &'_ [Token] {
self
}
}
impl<const N: usize> TokenSequence for [Token; N] {
fn tokens(&'_ self) -> &'_ [Token] {
self
}
}
pub trait TokenSequenceList {
fn for_each(&mut self, f: impl FnMut(&dyn TokenSequence));
fn iter_sequences(&self) -> impl Iterator<Item = &[Token]>;
fn first<T>(&mut self, pred: impl FnMut(&dyn TokenSequence) -> Option<T>) -> Option<T>;
}
impl<T: TokenSequence> TokenSequenceList for T {
fn for_each(&mut self, mut f: impl FnMut(&dyn TokenSequence)) {
f(self);
}
fn iter_sequences(&self) -> impl Iterator<Item = &[Token]> {
std::iter::once(self.tokens())
}
fn first<U>(&mut self, mut pred: impl FnMut(&dyn TokenSequence) -> Option<U>) -> Option<U> {
pred(self)
}
}
macro_rules! impl_token_sequence_list {
($(($is:tt, $ts:ident)),*) => {
impl<$($ts,)*> $crate::TokenSequenceList for ($($ts,)*) where
$($ts: $crate::TokenSequenceList,)* {
fn for_each(&mut self, mut f: impl FnMut(&dyn $crate::TokenSequence)) {
$(self.$is.for_each(&mut f);)*
}
fn iter_sequences(&self) -> impl Iterator<Item = &[Token]> {
std::iter::empty()
$(.chain(self.$is.iter_sequences()))*
}
fn first<U>(&mut self, mut pred: impl FnMut(&dyn $crate::TokenSequence) -> Option<U>) -> Option<U> {
$(
if let Some(res) = self.$is.first(&mut pred) {
return Some(res);
}
)*
None
}
}
};
}
variadics_please::all_tuples_enumerated!(impl_token_sequence_list, 1, 15, T);
impl<'a, 'b, I> ReborrowingIterator<'a, 'b, I, TokenItem<'a>, Consuming>
where
I: Iterator<Item = TokenItem<'a>>,
{
pub fn expect_one_of<Ts: IntoIterator<Item = Token>>(
&mut self,
candidates: Ts,
) -> Option<TokenItem<'a>> {
let mut candidates = candidates.into_iter();
let token = self.next()?;
if candidates.any(|cand| cand == token) {
Some(token)
let item = self.next()?;
if candidates.any(|cand| cand == item.token) {
Some(item)
} else {
None
}
}
pub fn expect_sequence<S: TokenSequence + ?Sized>(
&mut self,
sequence: &S,
) -> Option<Vec<TokenItem<'a>>> {
let ref mut peeking = self.borrow_peeking();
// check that the next tokens match the expected sequence
let matches = sequence
.tokens()
.into_iter()
.copied()
.zip(peeking.map(|item| item.token))
.all(|(a, b)| a == b);
if matches {
Some(peeking.drain_peeked().collect())
} else {
None
}
}
pub fn expect_sequence_list<L: TokenSequenceList>(&mut self, mut list: L) {
list.first(|s| self.expect_sequence(s));
}
}
impl<'a, 'b, I, T> Iterator for ReborrowingIterator<'a, 'b, I, T, Consuming>
@ -743,16 +927,21 @@ where
_marker: PhantomData,
}
}
}
pub fn peek_one_of<Ts: IntoIterator<Item = T>>(&mut self, candidates: Ts) -> Option<&T>
where
T: Eq,
{
impl<'a, 'b, I> ReborrowingIterator<'a, 'b, I, TokenItem<'a>, Peeking>
where
I: Iterator<Item = TokenItem<'a>>,
{
pub fn peek_one_of<Ts: IntoIterator<Item = Token>>(
&mut self,
candidates: Ts,
) -> Option<TokenItem<'a>> {
let mut candidates = candidates.into_iter();
let token = self.peek_next()?;
if candidates.any(|cand| &cand == token) {
Some(token)
let item = self.peek_next()?;
if candidates.any(|cand| cand == item.token) {
Some(*item)
} else {
None
}

View file

@ -1,11 +1,12 @@
use internment::Intern;
use lexer::{
Consuming, ReborrowingConsumingIterator, ReborrowingIterator, ReborrowingPeekingIterator,
Token, TokenItem, TokenItemIterator,
Token, TokenConsumer, TokenItem, TokenItemIterator,
};
use thiserror::Error;
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub enum Type {
pub enum InnerType {
Top,
Bottom,
Unit,
@ -39,6 +40,8 @@ pub enum Type {
},
}
type Type = internment::Intern<InnerType>;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum FloatType {
F32,
@ -273,272 +276,6 @@ impl Ast {
}
}
struct ParseCtx<'a> {
ast: Ast,
source: ReborrowingIterator<'a, 'a, TokenItemIterator<'a>, TokenItem<'a>, Consuming>,
peeked: Vec<TokenItem<'a>>,
}
impl<'a> ParseCtx<'a> {
fn new(ast: Ast, source: &'a mut TokenItemIterator<'a>) -> Self {
Self {
ast,
source: ReborrowingIterator::new(source),
peeked: Vec::new(),
}
}
/// Parse the entire source into an AST, returning an `AstNode::File` node.
fn parse_file(&mut self) -> Result<AstNode, ParseError> {
todo!()
}
}
struct FileParser<'a> {
ctx: &'a mut ParseCtx<'a>,
}
impl<'a> FileParser<'a> {
pub fn new(ctx: &'a mut ParseCtx<'a>) -> Self {
Self { ctx }
}
pub fn parse_global_decl(&mut self) -> Result<AstNode, ParseError> {
let mut peeking = self.ctx.source.borrow_peeking();
let next = peeking.try_peek_next()?;
match next.token {
Token::Fn => {
// function
todo!("impl function parsing")
}
Token::SlashSlash | Token::SlashSlashSlash => {
// comment
todo!("impl comment parsing")
}
Token::Const => {
// constant
todo!("impl constant parsing")
}
Token::Type => {
// type alias
todo!("impl type parsing")
}
Token::Module => {
// module
todo!("impl module parsing")
}
_ => Err(ParseError::UnexpectedToken(next.token)),
}
}
}
struct TypeParser<'a> {
ctx: &'a mut ParseCtx<'a>,
}
impl<'a> TypeParser<'a> {
pub fn new(ctx: &'a mut ParseCtx<'a>) -> Self {
Self { ctx }
}
fn parse_type(&mut self) -> Result<Type, ParseError> {
todo!()
}
fn parse_primitive_type(&mut self) -> Result<Type, ParseError> {
let mut peeking = self.ctx.source.borrow_peeking();
let next = peeking.try_peek_next()?.token;
let ty = match next {
Token::Bang => {
// Top type
Some(Type::Top)
}
Token::OpenParens if peeking.try_peek_next()?.token == Token::CloseParens => {
// Unit type
Some(Type::Unit)
}
Token::Bool => {
// Bool type
Some(Type::Bool)
}
Token::I8 => {
// i8 type
Some(Type::Int {
signed: true,
bits: 8,
})
}
Token::I16 => {
// i16 type
Some(Type::Int {
signed: true,
bits: 16,
})
}
Token::I32 => {
// i32 type
Some(Type::Int {
signed: true,
bits: 32,
})
}
Token::I64 => {
// i64 type
Some(Type::Int {
signed: true,
bits: 64,
})
}
Token::U8 => {
// u8 type
Some(Type::Int {
signed: false,
bits: 8,
})
}
Token::U16 => {
// u16 type
Some(Type::Int {
signed: false,
bits: 16,
})
}
Token::U32 => {
// u32 type
Some(Type::Int {
signed: false,
bits: 32,
})
}
Token::U64 => {
// u64 type
Some(Type::Int {
signed: false,
bits: 64,
})
}
Token::F32 => {
// f32 type
Some(Type::Float {
float_type: FloatType::F32,
})
}
Token::F64 => {
// f64 type
Some(Type::Float {
float_type: FloatType::F64,
})
}
Token::USize => {
// usize type
Some(Type::Int {
signed: false,
bits: 64,
}) // TODO: Detect pointer size
}
Token::ISize => {
// isize type
Some(Type::Int {
signed: true,
bits: 64,
}) // TODO: Detect pointer size
}
Token::Star => {
// Pointer type
let _const_or_mut = peeking
.peek_one_of([Token::Mutable, Token::Const].iter().copied())
.ok_or(ParseError::NotAType)?;
peeking.drain_peeked();
Some(Type::Pointer {
pointee: Box::new(self.parse_type()?),
})
}
_ => None,
};
if let Some(ty) = ty {
let cursor = peeking.reborrow_consuming_at_cursor();
Some(ty)
} else {
Err(ParseError::NotAType)
}
}
fn parse_array_type(&mut self) -> Result<Type, ParseError> {
todo!()
}
fn parse_function_type(&mut self) -> Result<Type, ParseError> {
todo!()
}
fn parse_tuple_type(&mut self) -> Result<Type, ParseError> {
todo!()
}
}
struct FunctionParser<'a> {
ctx: &'a mut ParseCtx<'a>,
}
impl<'a> FunctionParser<'a> {
pub fn new(ctx: &'a mut ParseCtx<'a>) -> Self {
Self { ctx }
}
fn parse_function_proto(&mut self) -> Result<AstNode, ParseError> {
todo!()
}
fn parse_parameter_list(&mut self) -> Result<AstNode, ParseError> {
todo!()
}
fn parse_parameter(&mut self) -> Result<AstNode, ParseError> {
todo!()
}
fn parse_function_body(&mut self) -> Result<AstNode, ParseError> {
todo!()
}
}
impl<'a> Iterator for FileParser<'a> {
type Item = Result<AstNode, ParseError>;
fn next(&mut self) -> Option<Self::Item> {
todo!()
}
}
trait TryReborrowingPeekingExt<T> {
fn try_peek_next(&mut self) -> Result<&T, ParseError>;
}
trait TryReborrowingConsumingExt<T> {
fn try_next(&mut self) -> Result<T, ParseError>;
}
impl<'a, I, T> TryReborrowingPeekingExt<T> for ReborrowingPeekingIterator<'a, 'a, I, T>
where
I: Iterator<Item = T>,
{
fn try_peek_next(&mut self) -> Result<&T, ParseError> {
self.peek_next().ok_or(ParseError::EOF)
}
}
impl<'a, I, T> TryReborrowingConsumingExt<T> for ReborrowingConsumingIterator<'a, 'a, I, T>
where
I: Iterator<Item = T>,
{
fn try_next(&mut self) -> Result<T, ParseError> {
self.next().ok_or(ParseError::EOF)
}
}
#[cfg(test)]
mod tests {
use crate::AstNode;