From 357590ec07f1fbafc6658573f9d45bb8bb214493 Mon Sep 17 00:00:00 2001 From: janis Date: Tue, 30 Sep 2025 16:44:42 +0200 Subject: [PATCH] more useless token stuff that I didn't really want anyway --- crates/lexer/Cargo.toml | 4 +- crates/lexer/src/lib.rs | 221 ++++++++++++++++++++++++++++--- crates/parser/src/lib.rs | 273 +-------------------------------------- 3 files changed, 213 insertions(+), 285 deletions(-) diff --git a/crates/lexer/Cargo.toml b/crates/lexer/Cargo.toml index 8084c67..55a88e3 100644 --- a/crates/lexer/Cargo.toml +++ b/crates/lexer/Cargo.toml @@ -9,4 +9,6 @@ werkzeug = { workspace = true } thiserror = { workspace = true } itertools = { workspace = true } trie = { workspace = true } -unicode-xid = { workspace = true } \ No newline at end of file +unicode-xid = { workspace = true } + +variadics_please = "1.1.0" diff --git a/crates/lexer/src/lib.rs b/crates/lexer/src/lib.rs index 46df06e..dfcbb9b 100644 --- a/crates/lexer/src/lib.rs +++ b/crates/lexer/src/lib.rs @@ -307,7 +307,7 @@ use std::{ use trie::Tree; -#[derive(Debug, Clone)] +#[derive(Debug, Clone, Copy)] pub struct TokenItem<'a> { pub token: Token, pub lexeme: &'a str, @@ -644,23 +644,207 @@ where } } -impl<'a, 'b, I, T> ReborrowingIterator<'a, 'b, I, T, Consuming> +pub trait TokenConsumer<'a> { + type Product; + type Error; + fn try_consume_tokens> + Clone>( + &mut self, + iter: &mut I, + ) -> Result; +} + +struct SimpleTokenConsumer(S, PhantomData); + +impl<'a, S, T> TokenConsumer<'a> for SimpleTokenConsumer where + S: TokenSequence, + T: Default, +{ + type Product = T; + type Error = (); + + fn try_consume_tokens> + Clone>( + &mut self, + iter: &mut I, + ) -> Result { + let ref mut iter2 = iter.clone(); + if iter2 + .zip(self.0.tokens().iter().copied()) + .all(|(item, expected)| item.token == expected) + { + core::mem::swap(iter, iter2); + Ok(T::default()) + } else { + Err(()) + } + } +} + +struct TokenSequenceListConsumer { + list: L, +} + +impl<'a, L: TokenSequenceList> TokenConsumer<'a> for TokenSequenceListConsumer { + type Product = Vec>; + type Error = (); + + fn try_consume_tokens> + Clone>( + &mut self, + iter: &mut I, + ) -> Result { + let sequences = self.list.iter_sequences(); + + for seq in sequences { + let mut iter2 = StealingIterator { + iter: iter.clone(), + yielded: Vec::new(), + }; + if (&mut iter2) + .zip(seq.iter().copied()) + .all(|(item, expected)| item.token == expected) + { + core::mem::swap(iter, &mut iter2.iter); + return Ok(iter2.yielded); + } + } + Err(()) + } +} + +struct StealingIterator> { + pub iter: I, + pub yielded: Vec, +} + +impl Iterator for StealingIterator +where + T: Clone, I: Iterator, { - pub fn expect_one_of>(&mut self, candidates: Ts) -> Option - where - T: Eq, - { + type Item = T; + + fn next(&mut self) -> Option { + self.iter.next().map(move |item| { + self.yielded.push(item.clone()); + item + }) + } +} + +pub trait TokenSequence { + fn tokens(&'_ self) -> &'_ [Token]; +} + +impl TokenSequence for Token { + fn tokens(&'_ self) -> &'_ [Token] { + std::slice::from_ref(self) + } +} +impl TokenSequence for [Token] { + fn tokens(&'_ self) -> &'_ [Token] { + self + } +} +impl TokenSequence for &[Token] { + fn tokens(&'_ self) -> &'_ [Token] { + self + } +} + +impl TokenSequence for [Token; N] { + fn tokens(&'_ self) -> &'_ [Token] { + self + } +} + +pub trait TokenSequenceList { + fn for_each(&mut self, f: impl FnMut(&dyn TokenSequence)); + fn iter_sequences(&self) -> impl Iterator; + fn first(&mut self, pred: impl FnMut(&dyn TokenSequence) -> Option) -> Option; +} +impl TokenSequenceList for T { + fn for_each(&mut self, mut f: impl FnMut(&dyn TokenSequence)) { + f(self); + } + + fn iter_sequences(&self) -> impl Iterator { + std::iter::once(self.tokens()) + } + + fn first(&mut self, mut pred: impl FnMut(&dyn TokenSequence) -> Option) -> Option { + pred(self) + } +} + +macro_rules! impl_token_sequence_list { + + ($(($is:tt, $ts:ident)),*) => { + impl<$($ts,)*> $crate::TokenSequenceList for ($($ts,)*) where + $($ts: $crate::TokenSequenceList,)* { + fn for_each(&mut self, mut f: impl FnMut(&dyn $crate::TokenSequence)) { + $(self.$is.for_each(&mut f);)* + } + + fn iter_sequences(&self) -> impl Iterator { + std::iter::empty() + $(.chain(self.$is.iter_sequences()))* + } + + fn first(&mut self, mut pred: impl FnMut(&dyn $crate::TokenSequence) -> Option) -> Option { + $( + if let Some(res) = self.$is.first(&mut pred) { + return Some(res); + } + )* + None + } + } + }; +} + +variadics_please::all_tuples_enumerated!(impl_token_sequence_list, 1, 15, T); + +impl<'a, 'b, I> ReborrowingIterator<'a, 'b, I, TokenItem<'a>, Consuming> +where + I: Iterator>, +{ + pub fn expect_one_of>( + &mut self, + candidates: Ts, + ) -> Option> { let mut candidates = candidates.into_iter(); - let token = self.next()?; - if candidates.any(|cand| cand == token) { - Some(token) + let item = self.next()?; + if candidates.any(|cand| cand == item.token) { + Some(item) } else { None } } + + pub fn expect_sequence( + &mut self, + sequence: &S, + ) -> Option>> { + let ref mut peeking = self.borrow_peeking(); + + // check that the next tokens match the expected sequence + let matches = sequence + .tokens() + .into_iter() + .copied() + .zip(peeking.map(|item| item.token)) + .all(|(a, b)| a == b); + if matches { + Some(peeking.drain_peeked().collect()) + } else { + None + } + } + + pub fn expect_sequence_list(&mut self, mut list: L) { + list.first(|s| self.expect_sequence(s)); + } } impl<'a, 'b, I, T> Iterator for ReborrowingIterator<'a, 'b, I, T, Consuming> @@ -743,16 +927,21 @@ where _marker: PhantomData, } } +} - pub fn peek_one_of>(&mut self, candidates: Ts) -> Option<&T> - where - T: Eq, - { +impl<'a, 'b, I> ReborrowingIterator<'a, 'b, I, TokenItem<'a>, Peeking> +where + I: Iterator>, +{ + pub fn peek_one_of>( + &mut self, + candidates: Ts, + ) -> Option> { let mut candidates = candidates.into_iter(); - let token = self.peek_next()?; - if candidates.any(|cand| &cand == token) { - Some(token) + let item = self.peek_next()?; + if candidates.any(|cand| cand == item.token) { + Some(*item) } else { None } diff --git a/crates/parser/src/lib.rs b/crates/parser/src/lib.rs index 6527292..e6ef52b 100644 --- a/crates/parser/src/lib.rs +++ b/crates/parser/src/lib.rs @@ -1,11 +1,12 @@ +use internment::Intern; use lexer::{ Consuming, ReborrowingConsumingIterator, ReborrowingIterator, ReborrowingPeekingIterator, - Token, TokenItem, TokenItemIterator, + Token, TokenConsumer, TokenItem, TokenItemIterator, }; use thiserror::Error; #[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub enum Type { +pub enum InnerType { Top, Bottom, Unit, @@ -39,6 +40,8 @@ pub enum Type { }, } +type Type = internment::Intern; + #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub enum FloatType { F32, @@ -273,272 +276,6 @@ impl Ast { } } -struct ParseCtx<'a> { - ast: Ast, - source: ReborrowingIterator<'a, 'a, TokenItemIterator<'a>, TokenItem<'a>, Consuming>, - peeked: Vec>, -} - -impl<'a> ParseCtx<'a> { - fn new(ast: Ast, source: &'a mut TokenItemIterator<'a>) -> Self { - Self { - ast, - source: ReborrowingIterator::new(source), - peeked: Vec::new(), - } - } - - /// Parse the entire source into an AST, returning an `AstNode::File` node. - fn parse_file(&mut self) -> Result { - todo!() - } -} - -struct FileParser<'a> { - ctx: &'a mut ParseCtx<'a>, -} - -impl<'a> FileParser<'a> { - pub fn new(ctx: &'a mut ParseCtx<'a>) -> Self { - Self { ctx } - } - - pub fn parse_global_decl(&mut self) -> Result { - let mut peeking = self.ctx.source.borrow_peeking(); - let next = peeking.try_peek_next()?; - match next.token { - Token::Fn => { - // function - todo!("impl function parsing") - } - Token::SlashSlash | Token::SlashSlashSlash => { - // comment - todo!("impl comment parsing") - } - Token::Const => { - // constant - todo!("impl constant parsing") - } - Token::Type => { - // type alias - todo!("impl type parsing") - } - Token::Module => { - // module - todo!("impl module parsing") - } - _ => Err(ParseError::UnexpectedToken(next.token)), - } - } -} - -struct TypeParser<'a> { - ctx: &'a mut ParseCtx<'a>, -} - -impl<'a> TypeParser<'a> { - pub fn new(ctx: &'a mut ParseCtx<'a>) -> Self { - Self { ctx } - } - - fn parse_type(&mut self) -> Result { - todo!() - } - - fn parse_primitive_type(&mut self) -> Result { - let mut peeking = self.ctx.source.borrow_peeking(); - let next = peeking.try_peek_next()?.token; - let ty = match next { - Token::Bang => { - // Top type - Some(Type::Top) - } - Token::OpenParens if peeking.try_peek_next()?.token == Token::CloseParens => { - // Unit type - Some(Type::Unit) - } - Token::Bool => { - // Bool type - Some(Type::Bool) - } - - Token::I8 => { - // i8 type - Some(Type::Int { - signed: true, - bits: 8, - }) - } - Token::I16 => { - // i16 type - Some(Type::Int { - signed: true, - bits: 16, - }) - } - Token::I32 => { - // i32 type - Some(Type::Int { - signed: true, - bits: 32, - }) - } - Token::I64 => { - // i64 type - Some(Type::Int { - signed: true, - bits: 64, - }) - } - Token::U8 => { - // u8 type - Some(Type::Int { - signed: false, - bits: 8, - }) - } - Token::U16 => { - // u16 type - Some(Type::Int { - signed: false, - bits: 16, - }) - } - Token::U32 => { - // u32 type - Some(Type::Int { - signed: false, - bits: 32, - }) - } - Token::U64 => { - // u64 type - Some(Type::Int { - signed: false, - bits: 64, - }) - } - Token::F32 => { - // f32 type - Some(Type::Float { - float_type: FloatType::F32, - }) - } - Token::F64 => { - // f64 type - Some(Type::Float { - float_type: FloatType::F64, - }) - } - Token::USize => { - // usize type - Some(Type::Int { - signed: false, - bits: 64, - }) // TODO: Detect pointer size - } - Token::ISize => { - // isize type - Some(Type::Int { - signed: true, - bits: 64, - }) // TODO: Detect pointer size - } - Token::Star => { - // Pointer type - let _const_or_mut = peeking - .peek_one_of([Token::Mutable, Token::Const].iter().copied()) - .ok_or(ParseError::NotAType)?; - peeking.drain_peeked(); - Some(Type::Pointer { - pointee: Box::new(self.parse_type()?), - }) - } - _ => None, - }; - - if let Some(ty) = ty { - let cursor = peeking.reborrow_consuming_at_cursor(); - - Some(ty) - } else { - Err(ParseError::NotAType) - } - } - - fn parse_array_type(&mut self) -> Result { - todo!() - } - - fn parse_function_type(&mut self) -> Result { - todo!() - } - - fn parse_tuple_type(&mut self) -> Result { - todo!() - } -} - -struct FunctionParser<'a> { - ctx: &'a mut ParseCtx<'a>, -} - -impl<'a> FunctionParser<'a> { - pub fn new(ctx: &'a mut ParseCtx<'a>) -> Self { - Self { ctx } - } - - fn parse_function_proto(&mut self) -> Result { - todo!() - } - - fn parse_parameter_list(&mut self) -> Result { - todo!() - } - - fn parse_parameter(&mut self) -> Result { - todo!() - } - - fn parse_function_body(&mut self) -> Result { - todo!() - } -} - -impl<'a> Iterator for FileParser<'a> { - type Item = Result; - - fn next(&mut self) -> Option { - todo!() - } -} - -trait TryReborrowingPeekingExt { - fn try_peek_next(&mut self) -> Result<&T, ParseError>; -} - -trait TryReborrowingConsumingExt { - fn try_next(&mut self) -> Result; -} - -impl<'a, I, T> TryReborrowingPeekingExt for ReborrowingPeekingIterator<'a, 'a, I, T> -where - I: Iterator, -{ - fn try_peek_next(&mut self) -> Result<&T, ParseError> { - self.peek_next().ok_or(ParseError::EOF) - } -} - -impl<'a, I, T> TryReborrowingConsumingExt for ReborrowingConsumingIterator<'a, 'a, I, T> -where - I: Iterator, -{ - fn try_next(&mut self) -> Result { - self.next().ok_or(ParseError::EOF) - } -} - #[cfg(test)] mod tests { use crate::AstNode;