From df2bb54272021f5cbee24a13ad35a09cd7881966 Mon Sep 17 00:00:00 2001 From: janis Date: Mon, 29 Sep 2025 15:56:13 +0200 Subject: [PATCH] parser crate --- crates/parser/Cargo.toml | 14 + crates/parser/src/lib.rs | 550 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 564 insertions(+) create mode 100644 crates/parser/Cargo.toml create mode 100644 crates/parser/src/lib.rs diff --git a/crates/parser/Cargo.toml b/crates/parser/Cargo.toml new file mode 100644 index 0000000..4db7793 --- /dev/null +++ b/crates/parser/Cargo.toml @@ -0,0 +1,14 @@ +[package] +name = "parser" +version = "0.1.0" +edition = "2024" + +[dependencies] + +tracing = { workspace = true } +werkzeug = { workspace = true } +thiserror = { workspace = true } +itertools = { workspace = true } +internment = "0.8.6" + +lexer = { path = "../lexer", version = "0.1.0" } \ No newline at end of file diff --git a/crates/parser/src/lib.rs b/crates/parser/src/lib.rs new file mode 100644 index 0000000..6527292 --- /dev/null +++ b/crates/parser/src/lib.rs @@ -0,0 +1,550 @@ +use lexer::{ + Consuming, ReborrowingConsumingIterator, ReborrowingIterator, ReborrowingPeekingIterator, + Token, TokenItem, TokenItemIterator, +}; +use thiserror::Error; + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub enum Type { + Top, + Bottom, + Unit, + Bool, + Int { + signed: bool, + bits: u8, + }, + Float { + float_type: FloatType, + }, + Pointer { + pointee: Box, + }, + Array { + element: Box, + size: usize, + }, + Function { + return_type: Box, + parameter_types: Vec, + }, + Tuple { + elements: Vec, + }, + TypeUnion { + types: Vec, + }, + TypeIntersection { + types: Vec, + }, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub enum FloatType { + F32, + F64, +} + +#[derive(Debug, Clone)] +pub enum Value { + Bool(bool), + Int(i64), + UInt(u64), + Float(f64), + String(String), +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub enum ControlFlowKind { + Return, + Break, + Continue, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub struct Index(u32); + +#[derive(Debug)] +pub enum AstNode { + Root { + files: Vec, + }, + File { + decls: Vec, + }, + FunctionProto { + name: String, + return_type: Type, + parameter_list: Index, + }, + ParameterList { + parameters: Vec, + }, + Parameter { + name: String, + param_type: Type, + }, + FunctionDecl { + proto: Index, + body: Index, + }, + Block { + statements: Vec, + expr: Option, + }, + Constant { + ty: Type, + value: Value, + }, + ExpressionStatement { + expr: Index, + }, + ControlFlow { + kind: ControlFlowKind, + expr: Option, + }, + VarDecl { + mutable: bool, + name: String, + var_type: Type, + }, + Assignment { + dest: Index, + expr: Index, + }, + GlobalDecl { + name: String, + var_type: Type, + value: Index, + }, + StructDecl { + name: String, + fields: Vec, + }, + FieldDecl { + name: String, + field_type: Type, + }, + FieldAccess { + expr: Index, + field: String, + }, + UnresolvedDeclRef { + name: String, + }, + DeclRef { + decl: Index, + }, + TypeDeclRef { + ty: Index, + }, + ExplicitCast { + expr: Index, + ty: Type, + }, + Deref { + expr: Index, + }, + AddressOf { + expr: Index, + }, + PlaceToValue { + expr: Index, + }, + ValueToPlace { + expr: Index, + }, + CallExpr { + callee: Index, + arguments: Vec, + }, + Argument { + expr: Index, + }, + Not(Index), + Negate(Index), + Multiply { + left: Index, + right: Index, + }, + Divide { + left: Index, + right: Index, + }, + Modulus { + left: Index, + right: Index, + }, + Add { + left: Index, + right: Index, + }, + Subtract { + left: Index, + right: Index, + }, + BitOr { + left: Index, + right: Index, + }, + BitAnd { + left: Index, + right: Index, + }, + BitXor { + left: Index, + right: Index, + }, + LogicalOr { + left: Index, + right: Index, + }, + LogicalAnd { + left: Index, + right: Index, + }, + Eq { + left: Index, + right: Index, + }, + NotEq { + left: Index, + right: Index, + }, + Less { + left: Index, + right: Index, + }, + LessEq { + left: Index, + right: Index, + }, + Greater { + left: Index, + right: Index, + }, + GreaterEq { + left: Index, + right: Index, + }, + ShiftLeft { + left: Index, + right: Index, + }, + ShiftRight { + left: Index, + right: Index, + }, + Subscript { + expr: Index, + index: Index, + }, + If { + condition: Index, + then: Index, + r#else: Option, + }, + Else { + expr: Index, + }, + Error { + err: Box, + }, +} + +#[derive(Debug, Error)] +pub enum ParseError { + #[error("End of file.")] + EOF, + #[error("Unexpected token: {0:?}")] + UnexpectedToken(Token), + #[error("Not a type.")] + NotAType, +} + +#[derive(Default, Debug)] +pub struct Ast { + nodes: Vec, +} + +impl Ast { + pub fn new() -> Self { + Self::default() + } +} + +struct ParseCtx<'a> { + ast: Ast, + source: ReborrowingIterator<'a, 'a, TokenItemIterator<'a>, TokenItem<'a>, Consuming>, + peeked: Vec>, +} + +impl<'a> ParseCtx<'a> { + fn new(ast: Ast, source: &'a mut TokenItemIterator<'a>) -> Self { + Self { + ast, + source: ReborrowingIterator::new(source), + peeked: Vec::new(), + } + } + + /// Parse the entire source into an AST, returning an `AstNode::File` node. + fn parse_file(&mut self) -> Result { + todo!() + } +} + +struct FileParser<'a> { + ctx: &'a mut ParseCtx<'a>, +} + +impl<'a> FileParser<'a> { + pub fn new(ctx: &'a mut ParseCtx<'a>) -> Self { + Self { ctx } + } + + pub fn parse_global_decl(&mut self) -> Result { + let mut peeking = self.ctx.source.borrow_peeking(); + let next = peeking.try_peek_next()?; + match next.token { + Token::Fn => { + // function + todo!("impl function parsing") + } + Token::SlashSlash | Token::SlashSlashSlash => { + // comment + todo!("impl comment parsing") + } + Token::Const => { + // constant + todo!("impl constant parsing") + } + Token::Type => { + // type alias + todo!("impl type parsing") + } + Token::Module => { + // module + todo!("impl module parsing") + } + _ => Err(ParseError::UnexpectedToken(next.token)), + } + } +} + +struct TypeParser<'a> { + ctx: &'a mut ParseCtx<'a>, +} + +impl<'a> TypeParser<'a> { + pub fn new(ctx: &'a mut ParseCtx<'a>) -> Self { + Self { ctx } + } + + fn parse_type(&mut self) -> Result { + todo!() + } + + fn parse_primitive_type(&mut self) -> Result { + let mut peeking = self.ctx.source.borrow_peeking(); + let next = peeking.try_peek_next()?.token; + let ty = match next { + Token::Bang => { + // Top type + Some(Type::Top) + } + Token::OpenParens if peeking.try_peek_next()?.token == Token::CloseParens => { + // Unit type + Some(Type::Unit) + } + Token::Bool => { + // Bool type + Some(Type::Bool) + } + + Token::I8 => { + // i8 type + Some(Type::Int { + signed: true, + bits: 8, + }) + } + Token::I16 => { + // i16 type + Some(Type::Int { + signed: true, + bits: 16, + }) + } + Token::I32 => { + // i32 type + Some(Type::Int { + signed: true, + bits: 32, + }) + } + Token::I64 => { + // i64 type + Some(Type::Int { + signed: true, + bits: 64, + }) + } + Token::U8 => { + // u8 type + Some(Type::Int { + signed: false, + bits: 8, + }) + } + Token::U16 => { + // u16 type + Some(Type::Int { + signed: false, + bits: 16, + }) + } + Token::U32 => { + // u32 type + Some(Type::Int { + signed: false, + bits: 32, + }) + } + Token::U64 => { + // u64 type + Some(Type::Int { + signed: false, + bits: 64, + }) + } + Token::F32 => { + // f32 type + Some(Type::Float { + float_type: FloatType::F32, + }) + } + Token::F64 => { + // f64 type + Some(Type::Float { + float_type: FloatType::F64, + }) + } + Token::USize => { + // usize type + Some(Type::Int { + signed: false, + bits: 64, + }) // TODO: Detect pointer size + } + Token::ISize => { + // isize type + Some(Type::Int { + signed: true, + bits: 64, + }) // TODO: Detect pointer size + } + Token::Star => { + // Pointer type + let _const_or_mut = peeking + .peek_one_of([Token::Mutable, Token::Const].iter().copied()) + .ok_or(ParseError::NotAType)?; + peeking.drain_peeked(); + Some(Type::Pointer { + pointee: Box::new(self.parse_type()?), + }) + } + _ => None, + }; + + if let Some(ty) = ty { + let cursor = peeking.reborrow_consuming_at_cursor(); + + Some(ty) + } else { + Err(ParseError::NotAType) + } + } + + fn parse_array_type(&mut self) -> Result { + todo!() + } + + fn parse_function_type(&mut self) -> Result { + todo!() + } + + fn parse_tuple_type(&mut self) -> Result { + todo!() + } +} + +struct FunctionParser<'a> { + ctx: &'a mut ParseCtx<'a>, +} + +impl<'a> FunctionParser<'a> { + pub fn new(ctx: &'a mut ParseCtx<'a>) -> Self { + Self { ctx } + } + + fn parse_function_proto(&mut self) -> Result { + todo!() + } + + fn parse_parameter_list(&mut self) -> Result { + todo!() + } + + fn parse_parameter(&mut self) -> Result { + todo!() + } + + fn parse_function_body(&mut self) -> Result { + todo!() + } +} + +impl<'a> Iterator for FileParser<'a> { + type Item = Result; + + fn next(&mut self) -> Option { + todo!() + } +} + +trait TryReborrowingPeekingExt { + fn try_peek_next(&mut self) -> Result<&T, ParseError>; +} + +trait TryReborrowingConsumingExt { + fn try_next(&mut self) -> Result; +} + +impl<'a, I, T> TryReborrowingPeekingExt for ReborrowingPeekingIterator<'a, 'a, I, T> +where + I: Iterator, +{ + fn try_peek_next(&mut self) -> Result<&T, ParseError> { + self.peek_next().ok_or(ParseError::EOF) + } +} + +impl<'a, I, T> TryReborrowingConsumingExt for ReborrowingConsumingIterator<'a, 'a, I, T> +where + I: Iterator, +{ + fn try_next(&mut self) -> Result { + self.next().ok_or(ParseError::EOF) + } +} + +#[cfg(test)] +mod tests { + use crate::AstNode; + + #[test] + fn print_ast_node_size() { + eprintln!("Size of AstNode: {}", std::mem::size_of::()); + } +}