From 1bde8f3ccd49bf513f642e15bdc44987bb854aef Mon Sep 17 00:00:00 2001 From: Janis Date: Thu, 6 Mar 2025 22:36:53 +0100 Subject: [PATCH] new trait for ast for getting nice nodes --- Cargo.toml | 1 + src/ast2/intern.rs | 7 + src/ast2/ir.rs | 20 +- src/ast2/mod.rs | 5 +- src/ast2/tag.rs | 631 ++++++++++++++++++++++++++++++++++++++++++++ src/ast2/visitor.rs | 26 +- src/lib.rs | 4 +- 7 files changed, 681 insertions(+), 13 deletions(-) create mode 100644 src/ast2/tag.rs diff --git a/Cargo.toml b/Cargo.toml index 7ad3168..ba55114 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,6 +12,7 @@ log = "0.4.22" num-bigint = "0.4.6" num-traits = "0.2.19" ordered-float = "4.2.2" +paste = "1.0.15" petgraph = "0.6.5" thiserror = "1.0.63" unicode-xid = "0.2.4" diff --git a/src/ast2/intern.rs b/src/ast2/intern.rs index 6c3c2d5..1c8d27e 100644 --- a/src/ast2/intern.rs +++ b/src/ast2/intern.rs @@ -1382,6 +1382,13 @@ impl InternPool { } } + pub fn try_get_return_type(&self, func: Index) -> Option { + match self.get_key(func) { + Key::FunctionType { return_type, .. } => Some(return_type), + _ => None, + } + } + pub fn get_pointer_type(&mut self, pointee: Index, flags: Option) -> Index { let key = Key::PointerType { pointee, diff --git a/src/ast2/ir.rs b/src/ast2/ir.rs index 6f87d6b..4ed220a 100644 --- a/src/ast2/ir.rs +++ b/src/ast2/ir.rs @@ -835,7 +835,7 @@ impl<'a> AstVisitorTrait<&'a mut super::Ast> for IrBuilder { idx: super::Index, ) -> Result<(), Self::Error> { let data = ast.expect_node_data_for_tag(idx, Tag::FunctionDecl); - let (proto, block) = data.as_two_indices(); + let (_proto, _block) = data.as_two_indices(); // visit proto @@ -890,19 +890,23 @@ impl<'a> AstVisitorTrait<&'a mut Ast> for PlacenessSolver { fn visit_parameter( &mut self, - ast: &'a mut Ast, + _ast: &'a mut Ast, idx: Index, ) -> Result { Ok(PlaceOrValue::Value(idx)) } - fn visit_var_decl(&mut self, ast: &'a mut Ast, idx: Index) -> Result { + fn visit_var_decl( + &mut self, + _ast: &'a mut Ast, + idx: Index, + ) -> Result { Ok(PlaceOrValue::Place(idx)) } fn visit_global_decl( &mut self, - ast: &'a mut Ast, + _ast: &'a mut Ast, idx: Index, ) -> Result { Ok(PlaceOrValue::Place(idx)) @@ -910,7 +914,7 @@ impl<'a> AstVisitorTrait<&'a mut Ast> for PlacenessSolver { fn visit_address_of_expr( &mut self, - ast: &'a mut Ast, + _ast: &'a mut Ast, idx: Index, ) -> Result { Ok(PlaceOrValue::Value(idx)) @@ -933,7 +937,7 @@ impl<'a> AstVisitorTrait<&'a mut Ast> for PlacenessSolver { let data = ast.expect_node_data_for_tag(idx, Tag::FunctionDecl); let (_, body) = data.as_two_indices(); - let body = self.visit_block_maybe_trailing_as_value(ast, body)?; + let _body = self.visit_block_maybe_trailing_as_value(ast, body)?; Ok(PlaceOrValue::Value(idx)) } @@ -1015,6 +1019,9 @@ impl<'a> AstVisitorTrait<&'a mut Ast> for PlacenessSolver { let bodies = unsafe { Index::from_slice_unchecked(&ast.extra[extra..][..2]) }; let &[a, b] = bodies else { unreachable!() }; + let cond = self.visit_any(ast, cond)?; + let cond = ast.convert_to_value_expr(cond); + let a = self.visit_any(ast, a)?; let b = self.visit_any(ast, b)?; @@ -1025,6 +1032,7 @@ impl<'a> AstVisitorTrait<&'a mut Ast> for PlacenessSolver { } .map(|a| a.as_u32()); + ast.datas[idx] = Data::index_and_extra_offset(cond, extra as u32); ast.extra[extra..][..2].copy_from_slice(&bodies); Ok(a.with_index(idx)) diff --git a/src/ast2/mod.rs b/src/ast2/mod.rs index cc81b49..ef08414 100644 --- a/src/ast2/mod.rs +++ b/src/ast2/mod.rs @@ -17,6 +17,7 @@ pub mod debug; pub mod intern; pub mod ir; pub mod parser; +pub mod tag; pub mod visitor; #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] @@ -63,7 +64,7 @@ pub enum Tag { StructDeclInterned, /// `data` is an index to a type, and an intern to a name FieldDecl, - /// `data` is an index to a VarDecl, GlobalDecl or FunctionDecl, and an opaque DeclKind + /// `data` is an index to a Parameter, VarDecl, GlobalDecl or FunctionDecl, and an opaque DeclKind DeclRef, /// `data` is an inlined key into the symbol table (scope: index, name: intern) DeclRefUnresolved, @@ -180,7 +181,7 @@ impl Tag { } #[derive(Debug, Clone, Copy, thiserror::Error, PartialEq, Eq)] -enum ParseError { +pub enum ParseError { #[error("Unexpected end of token iter.")] UnexpectedEndOfTokens, #[error("Expected Token {0}.")] diff --git a/src/ast2/tag.rs b/src/ast2/tag.rs new file mode 100644 index 0000000..7e5a454 --- /dev/null +++ b/src/ast2/tag.rs @@ -0,0 +1,631 @@ +use super::{ + intern::{Index as Interned, PointerFlags, StructFlags}, + visitor::AstExt, + Ast, Index, ParseError, Tag, +}; + +pub trait AstNodeExt { + fn get_ast_node(&self, idx: Index) -> AstNode; +} + +impl AstNodeExt for &mut Ast { + fn get_ast_node(&self, idx: Index) -> AstNode { + ::get_ast_node(self, idx) + } +} + +impl AstNodeExt for &Ast { + fn get_ast_node(&self, idx: Index) -> AstNode { + ::get_ast_node(self, idx) + } +} + +impl AstNodeExt for Ast { + fn get_ast_node(&self, idx: Index) -> AstNode { + let (tag, data) = self.get_node_tag_and_data(idx); + + match tag { + Tag::Root => { + unreachable!() + } + Tag::File => { + let (a, b) = data.as_extra_range(); + let decls = unsafe { Index::from_slice_unchecked(&self.extra[a..b]).to_vec() }; + + AstNode::File { decls } + } + Tag::FunctionProto => { + let (name, extra) = data.as_intern_and_extra_offset(); + + let (return_type, parameter_list) = ( + Index::from_u32(self.extra[extra]).unwrap(), + Index::from_u32(self.extra[extra + 1]).unwrap(), + ); + + AstNode::FunctionProto { + name, + return_type, + parameter_list, + } + } + Tag::FunctionProtoInterned => { + let (name, ty) = data.as_two_interns(); + AstNode::FunctionProtoInterned { name, ty } + } + Tag::FunctionDecl => { + let (proto, body) = data.as_two_indices(); + + AstNode::FunctionDecl { proto, body } + } + Tag::ParameterList => { + let (a, b) = data.as_extra_range(); + let params = unsafe { Index::from_slice_unchecked(&self.extra[a..b]).to_vec() }; + + AstNode::ParameterList { params } + } + Tag::Parameter => { + let (ty, name) = data.as_index_intern(); + + AstNode::Parameter { ty, name } + } + Tag::Block => { + let (a, b) = data.as_extra_range(); + let statements = unsafe { Index::from_slice_unchecked(&self.extra[a..b]).to_vec() }; + + AstNode::Block { + statements, + expr: None, + } + } + Tag::BlockTrailingExpr => { + let (a, b) = data.as_extra_range(); + let (expr, statements) = unsafe { + Index::from_slice_unchecked(&self.extra[a..b]) + .split_last() + .unwrap() + }; + + AstNode::Block { + statements: statements.to_vec(), + expr: Some(*expr), + } + } + Tag::Constant => { + let (ty, value) = data.as_index_intern(); + AstNode::Constant { ty, value } + } + Tag::ExprStmt => AstNode::ExprStmt { + expr: data.as_index(), + }, + Tag::ReturnStmt => AstNode::ReturnStmt, + Tag::ReturnExprStmt => AstNode::ReturnExprStmt { + expr: data.as_index(), + }, + Tag::VarDecl => { + let (a, _) = data.as_extra_range(); + let name = Interned::from_u32(self.extra[a]); + let ty = Index::from_u32(self.extra[a + 1]).unwrap(); + + AstNode::VarDecl { name, ty } + } + Tag::MutVarDecl => { + let (a, _) = data.as_extra_range(); + let name = Interned::from_u32(self.extra[a]); + let ty = Index::from_u32(self.extra[a + 1]).unwrap(); + + AstNode::MutVarDecl { name, ty } + } + Tag::VarDeclAssignment => { + let (a, b) = data.as_extra_range(); + let extra = &self.extra[a..b]; + let name = Interned::from_u32(*extra.get(0).unwrap()); + let expr = Index::from_u32(*extra.get(1).unwrap()).unwrap(); + let ty = extra.get(2).map(|&inner| Index::from_u32(inner).unwrap()); + + AstNode::MutVarDeclAssignment { name, expr, ty } + } + Tag::MutVarDeclAssignment => { + let (a, b) = data.as_extra_range(); + let extra = &self.extra[a..b]; + let name = Interned::from_u32(*extra.get(0).unwrap()); + let expr = Index::from_u32(*extra.get(1).unwrap()).unwrap(); + let ty = extra.get(2).map(|&inner| Index::from_u32(inner).unwrap()); + + AstNode::MutVarDeclAssignment { name, expr, ty } + } + Tag::GlobalDecl => { + let (name, offset) = data.as_intern_and_extra_offset(); + let ty = Index::from_u32(self.extra[offset]).unwrap(); + let expr = Index::from_u32(self.extra[offset + 1]).unwrap(); + + AstNode::GlobalDecl { name, expr, ty } + } + Tag::StructDecl => { + let (name, offset) = data.as_intern_and_extra_offset(); + let flags = StructFlags::unpack(self.extra[offset]); + + let types = (offset + 1)..(offset + 1 + flags.num_fields as usize); + let names = (offset + 1 + flags.num_fields as usize) + ..(offset + 1 + flags.num_fields as usize * 2); + + let field_types = + unsafe { Index::from_slice_unchecked(&self.extra[types]).to_vec() }; + + let field_names = self.extra[names] + .iter() + .map(|&i| Interned::from_u32(i)) + .collect(); + + AstNode::StructDecl { + name, + flags, + field_names, + field_types, + } + } + Tag::StructDeclInterned => { + let (name, ty) = data.as_two_interns(); + + AstNode::StructDeclInterned { name, ty } + } + Tag::FieldDecl => { + let (ty, name) = data.as_index_intern(); + + AstNode::FieldDecl { name, ty } + } + Tag::DeclRef => AstNode::DeclRef { + decl: data.as_index(), + }, + Tag::DeclRefUnresolved => { + let (scope, name) = data.as_index_intern(); + AstNode::DeclRefUnresolved { scope, name } + } + Tag::InternedType => AstNode::InternedType { + intern: data.as_intern(), + }, + Tag::TypeDeclRef => AstNode::TypeDeclRef { + decl: data.as_index(), + }, + Tag::TypeDeclRefUnresolved => { + let (scope, name) = data.as_index_intern(); + AstNode::TypeDeclRefUnresolved { scope, name } + } + Tag::PointerType => { + let (ty, flags) = data.as_index_and_opaque(); + let flags = PointerFlags::unpack(flags as u8); + AstNode::PointerType { ty, flags } + } + Tag::ArrayType => { + let (length, pointer) = data.as_two_indices(); + + AstNode::ArrayType { length, pointer } + } + Tag::CallExpr => { + let (func, argument_list) = data.as_two_indices(); + AstNode::CallExpr { + func, + argument_list, + } + } + Tag::FieldAccess => { + let (expr, field_name) = data.as_index_intern(); + AstNode::FieldAccess { field_name, expr } + } + Tag::ArgumentList => { + let (a, b) = data.as_extra_range(); + let arguments = unsafe { Index::from_slice_unchecked(&self.extra[a..b]).to_vec() }; + + AstNode::ArgumentList { arguments } + } + Tag::Argument => AstNode::Argument { + expr: data.as_index(), + name: None, + }, + Tag::NamedArgument => { + let (expr, name) = data.as_index_intern(); + AstNode::Argument { + expr, + name: Some(name), + } + } + Tag::ExplicitCast => { + let (expr, ty) = data.as_two_indices(); + + AstNode::ExplicitCast { expr, ty } + } + Tag::Deref => AstNode::Deref { + expr: data.as_index(), + }, + Tag::AddressOf => AstNode::AddressOf { + expr: data.as_index(), + }, + Tag::Not => AstNode::Not { + expr: data.as_index(), + }, + Tag::Negate => AstNode::Negate { + expr: data.as_index(), + }, + Tag::PlaceToValueConversion => AstNode::PlaceToValueConversion { + expr: data.as_index(), + }, + Tag::ValueToPlaceConversion => AstNode::ValueToPlaceConversion { + expr: data.as_index(), + }, + Tag::Or => { + let (lhs, rhs) = data.as_two_indices(); + AstNode::Or { lhs, rhs } + } + Tag::And => { + let (lhs, rhs) = data.as_two_indices(); + AstNode::And { lhs, rhs } + } + Tag::BitOr => { + let (lhs, rhs) = data.as_two_indices(); + AstNode::BitOr { lhs, rhs } + } + Tag::BitXOr => { + let (lhs, rhs) = data.as_two_indices(); + AstNode::BitXOr { lhs, rhs } + } + Tag::BitAnd => { + let (lhs, rhs) = data.as_two_indices(); + AstNode::BitAnd { lhs, rhs } + } + Tag::Eq => { + let (lhs, rhs) = data.as_two_indices(); + AstNode::Eq { lhs, rhs } + } + Tag::NEq => { + let (lhs, rhs) = data.as_two_indices(); + AstNode::NEq { lhs, rhs } + } + Tag::Lt => { + let (lhs, rhs) = data.as_two_indices(); + AstNode::Lt { lhs, rhs } + } + Tag::Gt => { + let (lhs, rhs) = data.as_two_indices(); + AstNode::Gt { lhs, rhs } + } + Tag::Le => { + let (lhs, rhs) = data.as_two_indices(); + AstNode::Le { lhs, rhs } + } + Tag::Ge => { + let (lhs, rhs) = data.as_two_indices(); + AstNode::Ge { lhs, rhs } + } + Tag::Shl => { + let (lhs, rhs) = data.as_two_indices(); + AstNode::Shl { lhs, rhs } + } + Tag::Shr => { + let (lhs, rhs) = data.as_two_indices(); + AstNode::Shr { lhs, rhs } + } + Tag::Add => { + let (lhs, rhs) = data.as_two_indices(); + AstNode::Add { lhs, rhs } + } + Tag::Sub => { + let (lhs, rhs) = data.as_two_indices(); + AstNode::Sub { lhs, rhs } + } + Tag::Mul => { + let (lhs, rhs) = data.as_two_indices(); + AstNode::Mul { lhs, rhs } + } + Tag::Div => { + let (lhs, rhs) = data.as_two_indices(); + AstNode::Div { lhs, rhs } + } + Tag::Rem => { + let (lhs, rhs) = data.as_two_indices(); + AstNode::Rem { lhs, rhs } + } + Tag::Assign => { + let (lhs, rhs) = data.as_two_indices(); + AstNode::Assign { lhs, rhs } + } + Tag::SubscriptExpr => { + let (lhs, rhs) = data.as_two_indices(); + AstNode::SubscriptExpr { lhs, rhs } + } + Tag::IfExpr => { + let (cond, body) = data.as_two_indices(); + AstNode::IfExpr { cond, body } + } + Tag::IfElseExpr => { + let (cond, extra) = data.as_index_and_extra_offset(); + let [a, b] = self.extra[extra..][..2] else { + unreachable!() + }; + + AstNode::IfElseExpr { + cond, + a: Index::from_u32(a).unwrap(), + b: Index::from_u32(b).unwrap(), + } + } + Tag::Error => AstNode::Error { + err: data.as_error(), + }, + Tag::Undefined => AstNode::Undefined, + } + } +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum AstNode { + /// pseudo tag, contains a range from a..b into extra of all files. + Root { + files: Vec, + }, + /// `data` is a range from a..b into extra of all global nodes. + File { + decls: Vec, + }, + /// `data` is an intern to a name, and an index into extra of [index: return_type, index: ParameterList] + FunctionProto { + name: Interned, + return_type: Index, + parameter_list: Index, + }, + /// `data` is an intern to a name, and an intern to the function type + FunctionProtoInterned { + name: Interned, + ty: Interned, + }, + /// `data` is an index to a FunctionProto and an index to a Block + FunctionDecl { + proto: Index, + body: Index, + }, + /// `data` is a range from a..b into extra of indices to parameters + ParameterList { + params: Vec, + }, + /// `data` is an index to a type, and an intern to a name + Parameter { + ty: Index, + name: Interned, + }, + /// `data` is range from a..b into `extra` of indices to statements + Block { + statements: Vec, + expr: Option, + }, + /// `data` is an index to a type, and an intern to a value + Constant { + ty: Index, + value: Interned, + }, + /// `data` is an index to an expression + ExprStmt { + expr: Index, + }, + /// `data` is none + ReturnStmt, + /// `data` is an index to an expr + ReturnExprStmt { + expr: Index, + }, + /// `data` is a range from a..b into `extra` of `[name: intern, type: index]` + VarDecl { + name: Interned, + ty: Index, + }, + /// `data` is a range from a..b into `extra` of `[name: intern, type: index]` + MutVarDecl { + name: Interned, + ty: Index, + }, + /// `data` is a range from a..b into `extra` of `[name: intern, expr: index, type?: index]` + VarDeclAssignment { + name: Interned, + expr: Index, + ty: Option, + }, + /// `data` is a range from a..b into `extra` of `[name: intern, expr: index, type?: index]` + MutVarDeclAssignment { + name: Interned, + expr: Index, + ty: Option, + }, + /// `data` is an intern to a name, and an offset into `extra` of `[type: index, expr: index]` + GlobalDecl { + name: Interned, + expr: Index, + ty: Index, + }, + /// `data` is an intern to a name, and an offset into extra of `[flags, type0 ,..., typeN ,name0 ,..., nameN]` + StructDecl { + name: Interned, + flags: StructFlags, + field_names: Vec, + field_types: Vec, + }, + /// `data` is an intern to a name, and an intern to the type of the struct + StructDeclInterned { + name: Interned, + ty: Interned, + }, + /// `data` is an index to a type, and an intern to a name + FieldDecl { + name: Interned, + ty: Index, + }, + /// `data` is an index to a Parameter, VarDecl, GlobalDecl or FunctionDecl, and an opaque DeclKind + DeclRef { + decl: Index, + }, + /// `data` is an inlined key into the symbol table (scope: index, name: intern) + DeclRefUnresolved { + scope: Index, + name: Interned, + }, + /// `data` is an intern of a type + InternedType { + intern: Interned, + }, + /// `data` is an index to a StructDecl + TypeDeclRef { + decl: Index, + }, + /// `data` is an inlined key into the symbol table (scope: index, name: intern) + TypeDeclRefUnresolved { + scope: Index, + name: Interned, + }, + /// `data` is an index to a Type and u32 PointerFlags (extra offset) + PointerType { + ty: Index, + flags: PointerFlags, + }, + /// `data` is an index to a length expression, and an underlying pointer type + ArrayType { + length: Index, + pointer: Index, + }, + /// `data` is an index to an expr and an index to an ArgumentList + CallExpr { + func: Index, + argument_list: Index, + }, + /// `data` is an index to an expr and an intern to a field name + FieldAccess { + field_name: Interned, + expr: Index, + }, + /// `data` is a range from a..b into extra of indices to arguments + ArgumentList { + arguments: Vec, + }, + /// `data` is an index to an expression + Argument { + expr: Index, + name: Option, + }, + /// `data` is an index to lhs, and an index to the type + ExplicitCast { + expr: Index, + ty: Index, + }, + /// `data` is a single index to an expr + Deref { + expr: Index, + }, + AddressOf { + expr: Index, + }, + Not { + expr: Index, + }, + Negate { + expr: Index, + }, + PlaceToValueConversion { + expr: Index, + }, + ValueToPlaceConversion { + expr: Index, + }, + /// data is two indices for `lhs` and `rhs` + Or { + lhs: Index, + rhs: Index, + }, + And { + lhs: Index, + rhs: Index, + }, + BitOr { + lhs: Index, + rhs: Index, + }, + BitXOr { + lhs: Index, + rhs: Index, + }, + BitAnd { + lhs: Index, + rhs: Index, + }, + Eq { + lhs: Index, + rhs: Index, + }, + NEq { + lhs: Index, + rhs: Index, + }, + Lt { + lhs: Index, + rhs: Index, + }, + Gt { + lhs: Index, + rhs: Index, + }, + Le { + lhs: Index, + rhs: Index, + }, + Ge { + lhs: Index, + rhs: Index, + }, + Shl { + lhs: Index, + rhs: Index, + }, + Shr { + lhs: Index, + rhs: Index, + }, + Add { + lhs: Index, + rhs: Index, + }, + Sub { + lhs: Index, + rhs: Index, + }, + Mul { + lhs: Index, + rhs: Index, + }, + Div { + lhs: Index, + rhs: Index, + }, + Rem { + lhs: Index, + rhs: Index, + }, + Assign { + lhs: Index, + rhs: Index, + }, + SubscriptExpr { + lhs: Index, + rhs: Index, + }, + IfExpr { + cond: Index, + body: Index, + }, + /// `data` is an index to an expression and an index into extra for [if, else] + IfElseExpr { + cond: Index, + a: Index, + b: Index, + }, + // TODO: + /// `data` is a ParseError + Error { + err: ParseError, + }, + /// placeholder tag for reserved indices/nodes, `data` is none + Undefined, +} diff --git a/src/ast2/visitor.rs b/src/ast2/visitor.rs index f3a37b2..3e048b1 100644 --- a/src/ast2/visitor.rs +++ b/src/ast2/visitor.rs @@ -1,4 +1,6 @@ -use super::*; +use crate::{ast2::tag::AstNode, variant}; + +use super::{tag::AstNodeExt, *}; pub trait AstExt { fn get_node_children(&self, node: Index) -> Vec; @@ -252,7 +254,27 @@ impl Ast { } } -pub trait AstVisitorTrait { +macro_rules! tag_visit_fn { + ($($tag:tt {$($field_name:ident : $field_ty:ty),* $(,)?}),* $(,)?) => { + $( + paste::paste! { + + fn [](&mut self, ast: Ast, idx: Index) -> Result { + variant!(ast.get_ast_node(idx) => AstNode::$tag { $($field_name),* }); + self.[](ast, idx, $($field_name),*) + } + fn [](&mut self, ast: Ast, idx: Index, $($field_name: $field_ty),*) -> Result { + _ = (ast, idx, $($field_name),*); + Err(Self::UNIMPL) + } + } + )* + }; +} + +use intern::Index as Interned; + +pub trait AstVisitorTrait { type Error; type Value; const UNIMPL: Self::Error; diff --git a/src/lib.rs b/src/lib.rs index e20e10a..f283566 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,8 +1,6 @@ #![feature( - extract_if, iter_advance_by, box_into_inner, - hash_extract_if, bigint_helper_methods, map_try_insert, iter_intersperse, @@ -29,7 +27,7 @@ pub mod symbol_table; pub mod tokens; pub mod triples; -mod utils; +pub mod utils; use utils::unit; pub fn tokenize<'a>(