From 888517f2ea7a393b7c838962b2feb3d9726f7891 Mon Sep 17 00:00:00 2001 From: Janis Date: Sun, 15 Sep 2024 00:38:50 +0200 Subject: [PATCH] soooo many thigns in need to commit more often also, types exist in the ast, but are interned after parsing refs and typerefs are resolved true and false in the intern pool, need to add them to primaryexpr structs in the internpool take a decl to differentiate struct types of the same name field access postfix expr? also nesting postfix expr (e.g. var[1][2]) arrays can only be specified with integral constants as length fixes: sinttype and uinttype store bits inline in item.index correctly mut/var decl assignment? have correct doc strings let/var symbol is inserted after the assignment expr, so that the expr can still use now-shadowed variables structs can have fields of type pointer-to-self (ast) lots of wrong-cases in node children getting --- src/ast2/mod.rs | 1261 +++++++++++++++++++++++++++++++++------- src/lib.rs | 1 + src/symbol_table.rs | 35 ++ tests/legal/array.sea | 4 + tests/legal/struct.sea | 6 +- 5 files changed, 1092 insertions(+), 215 deletions(-) create mode 100644 tests/legal/array.sea diff --git a/src/ast2/mod.rs b/src/ast2/mod.rs index 32fc9e0..7f4533b 100644 --- a/src/ast2/mod.rs +++ b/src/ast2/mod.rs @@ -1,15 +1,20 @@ #![allow(dead_code)] use std::{ + collections::BTreeMap, fmt::{Debug, Display}, num::NonZero, }; +use intern::{InternPool, PointerFlags, StructFlags}; +use num_bigint::BigInt; + use crate::{lexer::SourceLocation, tokens::Token, writeln_indented}; pub mod intern { use std::{ collections::BTreeMap, + fmt::Display, hash::{Hash, Hasher}, }; @@ -52,6 +57,8 @@ pub mod intern { String, SIntSmall, UIntSmall, + TrueValue, + FalseValue, UInt64, SInt64, F32, @@ -126,11 +133,15 @@ pub mod intern { parameters: Vec, }, StructType { + decl: super::Index, name: Index, packed: bool, c_like: bool, + /// vec of (Name, Type) fields: Vec<(Index, Index)>, }, + TrueValue, + FalseValue, } impl Hash for Key<'_> { @@ -155,16 +166,12 @@ pub mod intern { flags, length, } => (*pointee, *flags, *length).hash(state), - Key::StructType { - name, - packed, - c_like, - fields, - } => (*name, *packed, *c_like, fields).hash(state), + Key::StructType { name, decl, .. } => (*name, *decl).hash(state), Key::FunctionType { return_type, parameters, } => (return_type, parameters).hash(state), + Key::TrueValue | Key::FalseValue => {} } } } @@ -186,10 +193,10 @@ pub mod intern { } } - fn pack(self) -> u8 { + pub fn pack(self) -> u8 { (self.volatile as u8) << 0 | (self.is_const as u8) << 1 | (self.noalias as u8) << 2 } - fn unpack(packed: u8) -> Self { + pub fn unpack(packed: u8) -> Self { Self { volatile: packed & (1 << 0) != 0, is_const: packed & (1 << 1) != 0, @@ -199,10 +206,10 @@ pub mod intern { } #[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Hash)] - struct StructFlags { - packed: bool, - c_like: bool, - num_fields: u32, + pub struct StructFlags { + pub packed: bool, + pub c_like: bool, + pub num_fields: u32, } impl StructFlags { @@ -215,11 +222,11 @@ pub mod intern { num_fields, } } - fn pack(self) -> u32 { + pub fn pack(self) -> u32 { assert!(self.num_fields < (1 << 30)); (self.packed as u32) << 31 | (self.c_like as u32) << 30 | self.num_fields & Self::MASK } - fn unpack(packed: u32) -> Self { + pub fn unpack(packed: u32) -> Self { Self { packed: packed & (1 << 31) != 0, c_like: packed & (1 << 30) != 0, @@ -264,7 +271,13 @@ pub mod intern { #[repr(transparent)] #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] - pub struct Index(u32); + pub struct Index(pub u32); + + impl Display for Index { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "#{}", self.0) + } + } impl Index { pub fn into_u32(self) -> u32 { @@ -321,7 +334,7 @@ pub mod intern { } } - const STATIC_KEYS: [Key; 19] = [ + const STATIC_KEYS: [Key; 21] = [ Key::SimpleType { ty: SimpleType::Bool, }, @@ -355,79 +368,87 @@ pub mod intern { Key::UIntType { bits: 32 }, Key::SIntType { bits: 64 }, Key::UIntType { bits: 64 }, + Key::TrueValue, + Key::FalseValue, ]; impl InternPool { pub fn get_void_type(&self) -> Index { - self.get_assume_present(Key::SimpleType { + self.get_assume_present(&Key::SimpleType { ty: SimpleType::Void, }) } pub fn get_bool_type(&self) -> Index { - self.get_assume_present(Key::SimpleType { + self.get_assume_present(&Key::SimpleType { ty: SimpleType::Bool, }) } + pub fn get_true_value(&self) -> Index { + self.get_assume_present(&Key::TrueValue) + } + pub fn get_false_value(&self) -> Index { + self.get_assume_present(&Key::FalseValue) + } pub fn get_f32_type(&self) -> Index { - self.get_assume_present(Key::SimpleType { + self.get_assume_present(&Key::SimpleType { ty: SimpleType::F32, }) } pub fn get_f64_type(&self) -> Index { - self.get_assume_present(Key::SimpleType { + self.get_assume_present(&Key::SimpleType { ty: SimpleType::F64, }) } pub fn get_comptime_int_type(&self) -> Index { - self.get_assume_present(Key::SimpleType { + self.get_assume_present(&Key::SimpleType { ty: SimpleType::ComptimeInt, }) } pub fn get_usize_type(&self) -> Index { - self.get_assume_present(Key::SimpleType { + self.get_assume_present(&Key::SimpleType { ty: SimpleType::USize, }) } pub fn get_isize_type(&self) -> Index { - self.get_assume_present(Key::SimpleType { + self.get_assume_present(&Key::SimpleType { ty: SimpleType::ISize, }) } pub fn get_u0_type(&self) -> Index { - self.get_assume_present(Key::UIntType { bits: 0 }) + self.get_assume_present(&Key::UIntType { bits: 0 }) } pub fn get_i0_type(&self) -> Index { - self.get_assume_present(Key::SIntType { bits: 0 }) + self.get_assume_present(&Key::SIntType { bits: 0 }) } pub fn get_u1_type(&self) -> Index { - self.get_assume_present(Key::UIntType { bits: 1 }) + self.get_assume_present(&Key::UIntType { bits: 1 }) } pub fn get_i1_type(&self) -> Index { - self.get_assume_present(Key::SIntType { bits: 1 }) + self.get_assume_present(&Key::SIntType { bits: 1 }) } pub fn get_u8_type(&self) -> Index { - self.get_assume_present(Key::UIntType { bits: 8 }) + self.get_assume_present(&Key::UIntType { bits: 8 }) } pub fn get_i8_type(&self) -> Index { - self.get_assume_present(Key::SIntType { bits: 8 }) + self.get_assume_present(&Key::SIntType { bits: 8 }) } pub fn get_u16_type(&self) -> Index { - self.get_assume_present(Key::UIntType { bits: 16 }) + self.get_assume_present(&Key::UIntType { bits: 16 }) } pub fn get_i16_type(&self) -> Index { - self.get_assume_present(Key::SIntType { bits: 16 }) + self.get_assume_present(&Key::SIntType { bits: 16 }) } pub fn get_u32_type(&self) -> Index { - self.get_assume_present(Key::UIntType { bits: 32 }) + self.get_assume_present(&Key::UIntType { bits: 32 }) } pub fn get_i32_type(&self) -> Index { - self.get_assume_present(Key::SIntType { bits: 32 }) + self.get_assume_present(&Key::SIntType { bits: 32 }) } pub fn get_u64_type(&self) -> Index { - self.get_assume_present(Key::UIntType { bits: 64 }) + self.get_assume_present(&Key::UIntType { bits: 64 }) } pub fn get_i64_type(&self) -> Index { - self.get_assume_present(Key::SIntType { bits: 64 }) + self.get_assume_present(&Key::SIntType { bits: 64 }) } } @@ -596,8 +617,8 @@ pub mod intern { _ = self.extend_words(words); self.create_item(Tag::NegativeInt, i) } - Key::UIntType { bits } => self.create_item(Tag::SIntSmall, bits as u32), - Key::SIntType { bits } => self.create_item(Tag::SIntSmall, bits as u32), + Key::UIntType { bits } => self.create_item(Tag::UIntType, bits as u32), + Key::SIntType { bits } => self.create_item(Tag::SIntType, bits as u32), Key::SimpleType { ty } => self.create_item(Tag::SimpleType, ty as u8 as u32), Key::PointerType { pointee, flags } => { let flags = flags.pack(); @@ -611,22 +632,26 @@ pub mod intern { } => { let flags = flags.pack(); let i = self.extend_words([pointee.0, flags as u32, length]); - self.create_item(Tag::PointerType, i) + self.create_item(Tag::ArrayType, i) } Key::StructType { name, + decl, packed, c_like, fields, } => { let flags = StructFlags::new(packed, c_like, fields.len() as u32).pack(); - let i = self.extend_words([name.into_u32(), flags]); - self.extend_words( - fields - .into_iter() - .map(|(n, t)| [n.into_u32(), t.into_u32()]) - .flatten(), - ); + let i = self.extend_words([name.into_u32(), decl.into_u32(), flags, u32::MAX]); + if !fields.is_empty() { + let fields_offset = self.extend_words( + fields + .into_iter() + .map(|(n, t)| [n.into_u32(), t.into_u32()]) + .flatten(), + ); + self.words[i as usize + 3] = fields_offset; + } self.create_item(Tag::StructType, i) } Key::FunctionType { @@ -639,10 +664,13 @@ pub mod intern { ); let start = self.push_word(info.pack()); + self.extend_words([return_type.into_u32()]); _ = self.extend_words(parameters.into_iter().map(|i| i.0)); self.create_item(Tag::FunctionType, start) } + Key::TrueValue => self.create_item(Tag::TrueValue, 0), + Key::FalseValue => self.create_item(Tag::FalseValue, 0), } } @@ -724,14 +752,12 @@ pub mod intern { let bigint = BigInt::from_biguint(Sign::Plus, data); Key::PositiveInt { bigint } } - Tag::SIntType => { - let bits = self.words[item.idx()] as u16; - Key::SIntType { bits } - } - Tag::UIntType => { - let bits = self.words[item.idx()] as u16; - Key::SIntType { bits } - } + Tag::SIntType => Key::SIntType { + bits: item.index as u16, + }, + Tag::UIntType => Key::UIntType { + bits: item.index as u16, + }, Tag::SimpleType => { let ty = item.idx() as u8; @@ -758,19 +784,25 @@ pub mod intern { } Tag::StructType => { let name = Index(self.words[item.idx()]); - let flags = StructFlags::unpack(self.words[item.idx() + 1]); - let start = item.idx() + 2; - let end = start + flags.num_fields as usize * 2; + let decl = super::Index::new(self.words[item.idx() + 1]); + let flags = StructFlags::unpack(self.words[item.idx() + 2]); + let fields = if flags.num_fields != 0 { + let fields_offset = self.words[item.idx() + 3] as usize; + let fields_end = fields_offset + flags.num_fields as usize * 2; - let fields = self.words[start..end] - .iter() - .cloned() - .array_chunks::<2>() - .map(|[n, t]| (Index(n), Index(t))) - .collect::>(); + self.words[fields_offset..fields_end] + .iter() + .cloned() + .array_chunks::<2>() + .map(|[n, t]| (Index(n), Index(t))) + .collect::>() + } else { + vec![] + }; Key::StructType { name, + decl, packed: flags.packed, c_like: flags.c_like, fields, @@ -787,15 +819,15 @@ pub mod intern { .map(|&i| Index(i)) .collect::>(); ( - self.get_assume_present(Key::SimpleType { + self.get_assume_present(&Key::SimpleType { ty: SimpleType::Void, }), params, ) } else { + let return_type = Index(self.words[item.idx() + 1]); let start = item.idx() + 2; let end = start + len as usize; - let return_type = Index(self.words[item.idx() + 1]); let params = self.words[start..end] .iter() .map(|&i| Index(i)) @@ -808,18 +840,21 @@ pub mod intern { parameters, } } + Tag::TrueValue => Key::TrueValue, + Tag::FalseValue => Key::FalseValue, } } - pub fn get_assume_present(&self, key: Key) -> Index { + pub fn try_get_index(&self, key: &Key) -> Option { let mut hasher = std::hash::DefaultHasher::new(); key.hash(&mut hasher); let digest = hasher.finish(); - if let Some(&idx) = self.hashed.get(&digest) { - idx - } else { - panic!("key {key:?} not present in pool.") - } + self.hashed.get(&digest).cloned() + } + + pub fn get_assume_present(&self, key: &Key) -> Index { + self.try_get_index(&key) + .expect(&format!("key {key:?} not present in pool.")) } pub fn get_int_type(&mut self, signed: bool, bits: u16) -> Index { @@ -834,10 +869,16 @@ pub mod intern { pub fn get_string_index(&mut self, str: &str) -> Index { self.get_or_insert(Key::String { str }) } + pub fn try_get_string_index(&self, str: &str) -> Option { + self.try_get_index(&Key::String { str }) + } pub fn get_simple_type(&mut self, ty: SimpleType) -> Index { self.get_or_insert(Key::SimpleType { ty }) } + pub fn try_get_simple_type(&self, ty: SimpleType) -> Option { + self.try_get_index(&Key::SimpleType { ty }) + } pub fn get_function_type>( &mut self, @@ -850,6 +891,17 @@ pub mod intern { }) } + pub fn try_get_function_type>( + &self, + return_type: Index, + parameters: P, + ) -> Option { + self.try_get_index(&Key::FunctionType { + return_type, + parameters: parameters.into_iter().collect(), + }) + } + pub fn get_pointer_type(&mut self, pointee: Index, flags: Option) -> Index { let key = Key::PointerType { pointee, @@ -857,22 +909,69 @@ pub mod intern { }; self.get_or_insert(key) } + pub fn try_get_pointer_type( + &self, + pointee: Index, + flags: Option, + ) -> Option { + self.try_get_index( + &(Key::PointerType { + pointee, + flags: flags.unwrap_or_default(), + }), + ) + } - pub fn get_struct_type( + pub fn insert_or_replace_struct_type>( &mut self, name: Index, + decl: super::Index, packed: bool, c_like: bool, - fields: Vec<(Index, Index)>, + fields: I, ) -> Index { let key = Key::StructType { name, + decl, packed, c_like, - fields, + fields: vec![], + }; + if let Some(i) = self.try_get_index(&key).and_then(|i| self.get_item(i)) { + let fields_offset = self.extend_words( + fields + .into_iter() + .map(|(n, t)| [n.into_u32(), t.into_u32()]) + .flatten(), + ); + self.words[i.idx() + 3] = fields_offset; + let fields_end = self.words.len() as u32; + let num_fields = (fields_end - fields_offset) / 2; + let flags = StructFlags::new(packed, c_like, num_fields).pack(); + self.words[i.idx() + 2] = flags; + } + self.get_or_insert(key) + } + + pub fn get_struct_type(&mut self, name: Index, decl: super::Index) -> Index { + let key = Key::StructType { + name, + decl, + packed: false, + c_like: false, + fields: vec![], }; self.get_or_insert(key) } + pub fn try_get_struct_type(&self, name: Index, decl: super::Index) -> Option { + self.try_get_index(&Key::StructType { + name, + decl, + packed: false, + c_like: false, + fields: vec![], + }) + } pub fn get_array_type( &mut self, @@ -887,6 +986,18 @@ pub mod intern { }; self.get_or_insert(key) } + pub fn try_get_array_type( + &self, + pointee: Index, + flags: Option, + length: u32, + ) -> Option { + self.try_get_index(&Key::ArrayType { + pointee, + flags: flags.unwrap_or_default(), + length, + }) + } pub fn get_str(&self, index: Index) -> &str { let key = self.get_key(index); @@ -915,19 +1026,19 @@ enum Tag { Root, /// `data` is a range from a..b into extra of all global nodes. File, - /// `data` is an intern to a name, and an index into extra of [intern: return_type, index: ParameterList] + /// `data` is an intern to a name, and an index into extra of [index: return_type, index: ParameterList] FunctionProto, /// `data` is an index to a FunctionProto and an index to a Block FunctionDecl, /// `data` is a range from a..b into extra of indices to parameters ParameterList, - /// `data` is an intern to a name, and an intern to a type + /// `data` is an index to a type, and an intern to a name Parameter, /// `data` is range from a..b into `extra` of indices to statements Block, /// `data` is range from a..b into `extra` of indices to statements, where the last one is an expression BlockTrailingExpr, - /// `data` is an intern to a value, intern to a type + /// `data` is an index to a type, and an intern to a value Constant, /// `data` is an index to an expression ExprStmt, @@ -935,31 +1046,45 @@ enum Tag { ReturnStmt, /// `data` is an index to an expr ReturnExprStmt, - /// `data` is a range from a..b into `extra` of an intern to a name and an optional intern to a type + /// `data` is a range from a..b into `extra` of `[name: intern, type: index]` VarDecl, - /// `data` is a range from a..b into `extra` of an intern to a name and an optional intern to a type + /// `data` is a range from a..b into `extra` of `[name: intern, type: index]` MutVarDecl, - /// `data` is a range from a..b into `extra` of an intern to a name, an index to an expr, and an optional intern to a type + /// `data` is a range from a..b into `extra` of `[name: intern, expr: index, type?: index]` VarDeclAssignment, - /// `data` is a range from a..b into `extra` of an intern to a name, an index to an expr, and an optional intern to a type + /// `data` is a range from a..b into `extra` of `[name: intern, expr: index, type?: index]` MutVarDeclAssignment, - /// `data` is an intern to a name, and an offset into `extra` of [type: intern, expr: index] + /// `data` is an intern to a name, and an offset into `extra` of `[type: index, expr: index]` GlobalDecl, - /// `data` is an intern to a struct type + /// `data` is an intern to a name, and an offset into extra of `[flags, type0 ,..., typeN ,name0 ,..., nameN]` StructDecl, + /// `data` is an index to a type, and an intern to a name + FieldDecl, /// `data` is an index to a VarDecl, GlobalDecl or FunctionDecl DeclRef, /// `data` is an inlined key into the symbol table (scope: index, name: intern) DeclRefUnresolved, + /// `data` is an intern of a type + InternedType, + /// `data` is an index to a StructDecl + TypeDeclRef, + /// `data` is an inlined key into the symbol table (scope: index, name: intern) + TypeDeclRefUnresolved, + /// `data` is an index to a Type and u32 PointerFlags + PointerType, + /// `data` is an index to a length expression, and an underlying pointer type + ArrayType, /// `data` is an index to an expr and an index to an ArgumentList CallExpr, + /// `data` is an index to an expr and an intern to a field name + FieldAccess, /// `data` is a range from a..b into extra of indices to arguments ArgumentList, /// `data` is an index to an expression Argument, /// `data` is an index to an expression, and an intern to a name NamedArgument, - /// `data` is an index to lhs, and an intern to the type + /// `data` is an index to lhs, and an index to the type ExplicitCast, /// `data` is a single index to an expr Deref, @@ -1018,6 +1143,8 @@ enum ParseError { #[error("Dummy Message.")] ExpectedPrimaryExpression, #[error("Dummy Message.")] + ExpectedConstantLiteral, + #[error("Dummy Message.")] ExpectedExpression, #[error("Dummy Message.")] ExpectedPostfixExpression, @@ -1045,7 +1172,7 @@ enum ParseError { ErrorNode(Index), } -#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] #[repr(transparent)] pub struct Index(NonZero); @@ -1157,7 +1284,7 @@ impl ExpandedData { impl From<(Tag, Data)> for ExpandedData { fn from((tag, data): (Tag, Data)) -> Self { match tag { - Tag::FunctionProto => Self::from_intern_and_extra_offset(data), + Tag::FunctionProto => Self::from_index_and_extra_offset(data), Tag::ParameterList => Self::from_extra_range(data), Tag::Root => Self::from_extra_range(data), Tag::File => Self::from_extra_range(data), @@ -1168,7 +1295,7 @@ impl From<(Tag, Data)> for ExpandedData { | Tag::MutVarDeclAssignment | Tag::BlockTrailingExpr | Tag::Block => Self::from_extra_range(data), - Tag::Constant | Tag::Parameter => Self::from_two_interns(data), + Tag::FieldDecl | Tag::Constant | Tag::Parameter => Self::from_index_intern(data), Tag::Or | Tag::And | Tag::BitOr @@ -1191,21 +1318,25 @@ impl From<(Tag, Data)> for ExpandedData { | Tag::IfExpr | Tag::SubscriptExpr | Tag::CallExpr + | Tag::ArrayType | Tag::FunctionDecl => Self::from_two_indices(data), Tag::ReturnExprStmt | Tag::DeclRef + | Tag::TypeDeclRef | Tag::Argument | Tag::Deref | Tag::AddressOf | Tag::Not | Tag::Negate | Tag::ExprStmt => Self::from_index(data), - Tag::DeclRefUnresolved | Tag::NamedArgument | Tag::ExplicitCast => { - Self::from_index_intern(data) - } + Tag::FieldAccess + | Tag::DeclRefUnresolved + | Tag::TypeDeclRefUnresolved + | Tag::NamedArgument + | Tag::ExplicitCast => Self::from_index_intern(data), Tag::GlobalDecl => Self::from_intern_and_extra_offset(data), - Tag::StructDecl => Self::from_intern(data), - Tag::IfElseExpr => Self::from_index_and_extra_offset(data), + Tag::InternedType | Tag::StructDecl => Self::from_intern(data), + Tag::PointerType | Tag::IfElseExpr => Self::from_index_and_extra_offset(data), Tag::Error => Self::from_error(data), Tag::ReturnStmt | Tag::Undefined => Self::from_none(data), } @@ -1388,7 +1519,7 @@ impl Ast { fn push_global_decl( &mut self, ident: intern::Index, - ty: intern::Index, + ty: Index, expr: Index, loc: SourceLocation, ) -> Index { @@ -1430,7 +1561,7 @@ impl Ast { &mut self, is_let: bool, name: intern::Index, - ty: Option, + ty: Option, assignment: Option, loc: SourceLocation, ) -> Index { @@ -1454,16 +1585,10 @@ impl Ast { i } - fn push_struct_decl(&mut self, struct_type: intern::Index, loc: SourceLocation) -> Index { - let i = self.reserve_node(); - self.set_tag_data_source_loc(i, Tag::StructDecl, Data::intern(struct_type), loc); - i - } - fn push_fn_proto( &mut self, ident: intern::Index, - return_type: intern::Index, + return_type: Index, parameter_list: Index, loc: SourceLocation, ) -> Index { @@ -1558,14 +1683,9 @@ impl Ast { i } - fn push_parameter( - &mut self, - name: intern::Index, - ty: intern::Index, - loc: SourceLocation, - ) -> Index { + fn push_parameter(&mut self, name: intern::Index, ty: Index, loc: SourceLocation) -> Index { let i = self.reserve_node(); - self.set_tag_data_source_loc(i, Tag::Parameter, Data::two_interns(name, ty), loc); + self.set_tag_data_source_loc(i, Tag::Parameter, Data::index_and_intern(ty, name), loc); i } @@ -1608,9 +1728,9 @@ impl Ast { i } - fn push_cast(&mut self, lhs: Index, ty: intern::Index, loc: SourceLocation) -> Index { + fn push_cast(&mut self, lhs: Index, ty: Index, loc: SourceLocation) -> Index { let i = self.reserve_node(); - self.set_tag_data_source_loc(i, Tag::ExplicitCast, Data::index_and_intern(lhs, ty), loc); + self.set_tag_data_source_loc(i, Tag::ExplicitCast, Data::two_indices(lhs, ty), loc); i } @@ -1670,6 +1790,107 @@ impl Ast { self.datas[i.index()] = Data::index(decl); } + fn push_struct_decl>( + &mut self, + name: intern::Index, + flags: StructFlags, + fields: I, + loc: SourceLocation, + ) -> Index { + let i = self.reserve_node(); + let (offset, _) = self.extend_extra([flags.pack()]); + let (names, types) = fields + .into_iter() + .map(|(name, ty)| (name.into_u32(), ty.into_u32())) + .unzip::<_, _, Vec<_>, Vec<_>>(); + self.extend_extra(types); + self.extend_extra(names); + self.set_tag_data_source_loc( + i, + Tag::StructDecl, + Data::intern_and_extra_offset(name, offset), + loc, + ); + i + } + + fn push_field_decl(&mut self, name: intern::Index, ty: Index, loc: SourceLocation) -> Index { + let i = self.reserve_node(); + self.set_tag_data_source_loc(i, Tag::FieldDecl, Data::index_and_intern(ty, name), loc); + + i + } + + fn push_field_access( + &mut self, + expr: Index, + name: intern::Index, + loc: SourceLocation, + ) -> Index { + let i = self.reserve_node(); + self.set_tag_data_source_loc(i, Tag::FieldAccess, Data::index_and_intern(expr, name), loc); + + i + } + + fn push_interend_type(&mut self, ty: intern::Index, loc: SourceLocation) -> Index { + let i = self.reserve_node(); + self.set_tag_data_source_loc(i, Tag::InternedType, Data::intern(ty), loc); + + i + } + + fn push_array_type( + &mut self, + length_expr: Index, + pointer_ty: Index, + loc: SourceLocation, + ) -> Index { + let i = self.reserve_node(); + self.set_tag_data_source_loc( + i, + Tag::ArrayType, + Data::two_indices(length_expr, pointer_ty), + loc, + ); + + i + } + + fn push_pointer_type(&mut self, ty: Index, flags: PointerFlags, loc: SourceLocation) -> Index { + let i = self.reserve_node(); + self.set_tag_data_source_loc( + i, + Tag::PointerType, + Data::index_and_extra_offset(ty, flags.pack() as u32), + loc, + ); + + i + } + + fn push_type_ref_unresolved( + &mut self, + scope: Index, + ident: intern::Index, + loc: SourceLocation, + ) -> Index { + let i = self.reserve_node(); + self.set_tag_data_source_loc( + i, + Tag::TypeDeclRefUnresolved, + Data::index_and_intern(scope, ident), + loc, + ); + + i + } + + fn resolve_type_ref(&mut self, i: Index, decl: Index) { + self.tags[i.index()] = Tag::TypeDeclRef; + self.datas[i.index()] = Data::index(decl); + } + fn push_expr_stmt(&mut self, expr: Index) -> Index { let i = self.reserve_node(); let loc = self.get_loc(expr); @@ -1678,14 +1899,9 @@ impl Ast { i } - fn push_constant( - &mut self, - value: intern::Index, - ty: intern::Index, - loc: SourceLocation, - ) -> Index { + fn push_constant(&mut self, value: intern::Index, ty: Index, loc: SourceLocation) -> Index { let i = self.reserve_node(); - self.set_tag_data_source_loc(i, Tag::Constant, Data::two_interns(value, ty), loc); + self.set_tag_data_source_loc(i, Tag::Constant, Data::index_and_intern(ty, value), loc); i } @@ -1721,7 +1937,180 @@ impl Display for Children { } } +type TypeCache = BTreeMap; + impl Ast { + fn get_type_of_node( + &self, + ip: &InternPool, + cache: &mut TypeCache, + index: Index, + ) -> intern::Index { + if let Some(ty) = cache.get(&index) { + return *ty; + } + + let void = ip.get_void_type(); + let tag = self.tags[index.index()]; + let data = self.datas[index.index()]; + + let ty = match tag { + Tag::ArgumentList + | Tag::ExprStmt + | Tag::ReturnExprStmt + | Tag::Block + | Tag::ParameterList + | Tag::File => void, + Tag::VarDeclAssignment | Tag::MutVarDeclAssignment => { + let (_, b) = data.as_extra_range(); + self.get_type_of_node(ip, cache, Index::new(self.extra[b - 1])) + } + Tag::VarDecl | Tag::MutVarDecl => { + let (a, b) = data.as_extra_range(); + self.get_type_of_node(ip, cache, Index::new(self.extra[a + 1])) + } + Tag::GlobalDecl => { + let (_, a) = data.as_intern_and_extra_offset(); + self.get_type_of_node(ip, cache, Index::new(self.extra[a])) + } + Tag::FunctionDecl => self.get_type_of_node(ip, cache, data.as_two_indices().0), + Tag::FunctionProto => { + let (_, i) = data.as_intern_and_extra_offset(); + let return_type = { self.datas[self.extra[i] as usize].as_intern() }; + let parameters = { + let (a, b) = self.datas[self.extra[i + 1] as usize].as_extra_range(); + self.extra[a..b].iter().map(|&i| { + // i is index to a parameter, a parameter is (index, intern) + let ty = self.datas[i as usize].as_index_intern().0; + self.datas[ty.index()].as_intern() + }) + }; + + ip.try_get_function_type(return_type, parameters).unwrap() + } + Tag::BlockTrailingExpr => { + let (a, b) = data.as_extra_range(); + self.get_type_of_node(ip, cache, Index::new(self.extra[b - 1])) + } + Tag::CallExpr => { + let (expr, _args) = data.as_two_indices(); + let fn_ty = self.get_type_of_node(ip, cache, expr); + if let intern::Key::FunctionType { return_type, .. } = ip.get_key(fn_ty) { + return_type + } else { + eprintln!("lhs of call expr is not a function!"); + void + } + } + Tag::Argument => self.get_type_of_node(ip, cache, data.as_index()), + Tag::NamedArgument => { + let (a, _) = data.as_index_intern(); + self.get_type_of_node(ip, cache, a) + } + Tag::ExplicitCast => { + let (_, a) = data.as_two_indices(); + self.get_type_of_node(ip, cache, a) + } + Tag::FieldAccess => { + let (ty_expr, name) = data.as_index_intern(); + let ty = self.get_type_of_node(ip, cache, ty_expr); + match ip.get_key(ty) { + intern::Key::PointerType { pointee, .. } + if let intern::Key::StructType { fields, .. } = ip.get_key(pointee) => + { + fields + .iter() + .cloned() + .find(|(n, _)| n == &name) + .map(|(_, t)| t) + .unwrap_or(void) + } + intern::Key::StructType { fields, .. } => fields + .iter() + .cloned() + .find(|(n, _)| n == &name) + .map(|(_, t)| t) + .unwrap_or(void), + _ => { + unimplemented!() + } + } + } + Tag::Deref => { + let ty = self.get_type_of_node(ip, cache, data.as_index()); + if let intern::Key::PointerType { pointee, .. } = ip.get_key(ty) { + pointee + } else { + eprintln!("lhs of deref is not a pointer!"); + void + } + } + Tag::SubscriptExpr => { + let ty = self.get_type_of_node(ip, cache, data.as_two_indices().0); + match ip.get_key(ty) { + intern::Key::PointerType { pointee, .. } + | intern::Key::ArrayType { pointee, .. } => pointee, + _ => { + eprintln!("lhs of subscript is not an array or pointer!"); + void + } + } + } + Tag::AddressOf => { + let ty = self.get_type_of_node(ip, cache, data.as_index()); + // TODO: find out of the expression is const, volatile for flags + ip.try_get_pointer_type(ty, None).unwrap() + } + Tag::Not | Tag::Negate => self.get_type_of_node(ip, cache, data.as_index()), + Tag::Or + | Tag::And + | Tag::BitOr + | Tag::BitXOr + | Tag::BitAnd + | Tag::Eq + | Tag::NEq + | Tag::Lt + | Tag::Gt + | Tag::Le + | Tag::Ge + | Tag::Shl + | Tag::Shr + | Tag::Add + | Tag::Sub + | Tag::Mul + | Tag::Div + | Tag::Rem => self.get_type_of_node(ip, cache, data.as_two_indices().0), + Tag::IfExpr => ip.get_bool_type(), // really? + Tag::IfElseExpr => { + let (_, b) = data.as_index_and_extra_offset(); + let if_ = Index::new(self.extra[b]); + self.get_type_of_node(ip, cache, if_) + } + Tag::Constant | Tag::Parameter => { + self.get_type_of_node(ip, cache, data.as_index_intern().0) + } + Tag::DeclRef => self.get_type_of_node(ip, cache, data.as_index()), + Tag::StructDecl => { + let (name, _) = data.as_intern_and_extra_offset(); + ip.try_get_struct_type(name, index).unwrap() + } + Tag::Assign + | Tag::Root + | Tag::DeclRefUnresolved + | Tag::Error + | Tag::Undefined + | Tag::ReturnStmt => void, + Tag::FieldDecl => self.get_type_of_node(ip, cache, data.as_index_intern().0), + Tag::InternedType => data.as_intern(), + Tag::TypeDeclRef | Tag::TypeDeclRefUnresolved | Tag::PointerType | Tag::ArrayType => { + unreachable!() + } + }; + + cache.insert(index, ty); + ty + } + fn get_node_children(&self, index: Index) -> Vec { let tag = self.tags[index.index()]; let data = self.datas[index.index()]; @@ -1733,7 +2122,10 @@ impl Ast { } Tag::FunctionProto => { let (_, i) = data.as_intern_and_extra_offset(); - vec![Index::new(self.extra[i + 1])] + self.extra[i..=i + 1] + .iter() + .map(|&i| Index::new(i)) + .collect() } Tag::FunctionDecl => { let (a, b) = data.as_two_indices(); @@ -1752,16 +2144,20 @@ impl Ast { vec![a] } Tag::VarDeclAssignment | Tag::MutVarDeclAssignment => { - let (a, _) = data.as_extra_range(); - let expr = Index::new(self.extra[a + 1]); - vec![expr] + let (a, b) = data.as_extra_range(); + self.extra[a + 1..b] + .iter() + .map(|&i| Index::new(i)) + .collect() } Tag::GlobalDecl => { let (_, offset) = data.as_intern_and_extra_offset(); - let expr = Index::new(self.extra[offset + 1]); - vec![expr] + self.extra[offset..=offset + 1] + .iter() + .map(|&i| Index::new(i)) + .collect() } - Tag::CallExpr => { + Tag::CallExpr | Tag::ExplicitCast => { let (a, b) = data.as_two_indices(); vec![a, b] } @@ -1773,11 +2169,7 @@ impl Ast { let a = data.as_index(); vec![a] } - Tag::NamedArgument => { - let (a, _) = data.as_index_intern(); - vec![a] - } - Tag::ExplicitCast => { + Tag::FieldDecl | Tag::FieldAccess | Tag::NamedArgument => { let (a, _) = data.as_index_intern(); vec![a] } @@ -1805,6 +2197,7 @@ impl Ast { | Tag::Rem | Tag::Assign | Tag::SubscriptExpr + | Tag::ArrayType | Tag::IfExpr => { let (a, b) = data.as_two_indices(); vec![a, b] @@ -1815,14 +2208,251 @@ impl Ast { let else_ = Index::new(self.extra[b + 1]); vec![a, if_, else_] } - Tag::StructDecl + Tag::PointerType => { + let (a, _) = data.as_index_and_extra_offset(); + vec![a] + } + Tag::StructDecl => { + let (a, offset) = data.as_intern_and_extra_offset(); + let flags = StructFlags::unpack(self.extra[offset]); + self.extra[offset + 1..(offset + 1 + flags.num_fields as usize)] + .iter() + .map(|&i| Index::new(i)) + .collect() + } + Tag::InternedType + | Tag::Root + | Tag::TypeDeclRefUnresolved + | Tag::DeclRefUnresolved + | Tag::Error + | Tag::Undefined + | Tag::TypeDeclRef | Tag::DeclRef - | Tag::Parameter - | Tag::Constant - | Tag::ReturnStmt - | Tag::VarDecl - | Tag::MutVarDecl => vec![], - _ => vec![], + | Tag::ReturnStmt => vec![], + Tag::Parameter | Tag::Constant => { + let (a, _) = data.as_index_intern(); + vec![a] + } + Tag::VarDecl | Tag::MutVarDecl => { + let (a, _) = data.as_extra_range(); + + vec![Index::new(self.extra[a + 1])] + } + } + } + + fn comptime_value_of_node( + &self, + ip: &InternPool, + pointer_bits: u16, + cache: &mut TypeCache, + index: Index, + ) -> crate::comptime::ComptimeNumber { + let tag = self.tags[index.index()]; + let data = self.datas[index.index()]; + + match tag { + Tag::Root => todo!(), + Tag::File => todo!(), + Tag::FunctionProto => todo!(), + Tag::FunctionDecl => todo!(), + Tag::ParameterList => todo!(), + Tag::Parameter => todo!(), + Tag::Block => todo!(), + Tag::BlockTrailingExpr => todo!(), + Tag::Constant => { + let (ty, value) = data.as_index_intern(); + let ty = self.get_type_of_node(ip, cache, ty); + interned_type_and_value_to_comptime_number(ip, pointer_bits, ty, value) + } + Tag::ExprStmt => todo!(), + Tag::ReturnStmt => todo!(), + Tag::ReturnExprStmt => todo!(), + Tag::VarDecl => todo!(), + Tag::MutVarDecl => todo!(), + Tag::VarDeclAssignment => todo!(), + Tag::MutVarDeclAssignment => todo!(), + Tag::GlobalDecl => todo!(), + Tag::StructDecl => todo!(), + Tag::FieldDecl => todo!(), + Tag::DeclRef => todo!(), + Tag::DeclRefUnresolved => todo!(), + Tag::InternedType => todo!(), + Tag::TypeDeclRef => todo!(), + Tag::TypeDeclRefUnresolved => todo!(), + Tag::PointerType => todo!(), + Tag::ArrayType => todo!(), + Tag::CallExpr => todo!(), + Tag::FieldAccess => todo!(), + Tag::ArgumentList => todo!(), + Tag::Argument => todo!(), + Tag::NamedArgument => todo!(), + Tag::ExplicitCast => todo!(), + Tag::Deref => todo!(), + Tag::AddressOf => todo!(), + Tag::Not => todo!(), + Tag::Negate => todo!(), + Tag::Or => todo!(), + Tag::And => todo!(), + Tag::BitOr => todo!(), + Tag::BitXOr => todo!(), + Tag::BitAnd => todo!(), + Tag::Eq => todo!(), + Tag::NEq => todo!(), + Tag::Lt => todo!(), + Tag::Gt => todo!(), + Tag::Le => todo!(), + Tag::Ge => todo!(), + Tag::Shl => todo!(), + Tag::Shr => todo!(), + Tag::Add => todo!(), + Tag::Sub => todo!(), + Tag::Mul => todo!(), + Tag::Div => todo!(), + Tag::Rem => todo!(), + Tag::Assign => todo!(), + Tag::SubscriptExpr => todo!(), + Tag::IfExpr => todo!(), + Tag::IfElseExpr => todo!(), + Tag::Error => todo!(), + Tag::Undefined => todo!(), + } + } +} + +fn interned_type_and_value_to_comptime_number( + ip: &InternPool, + pointer_bits: u16, + ty: intern::Index, + val: intern::Index, +) -> crate::comptime::ComptimeNumber { + use crate::ast::IntegralType; + use crate::comptime::*; + + let ty_key = ip.get_key(ty); + match ty_key { + intern::Key::SIntType { bits } | intern::Key::UIntType { bits } => { + let ty = IntegralType::new(false, bits); + match ip.get_key(val) { + intern::Key::SIntSmall { bits } => ComptimeNumber::Integral(ComptimeInt::Native { + bits: bits as _, + ty, + }), + intern::Key::UIntSmall { bits } => ComptimeNumber::Integral(ComptimeInt::Native { + bits: bits as _, + ty, + }), + intern::Key::SInt64 { bits } => ComptimeNumber::Integral(ComptimeInt::Native { + bits: bits as _, + ty, + }), + intern::Key::UInt64 { bits } => ComptimeNumber::Integral(ComptimeInt::Native { + bits: bits as _, + ty, + }), + intern::Key::PositiveInt { bigint } => { + ComptimeNumber::Integral(ComptimeInt::BigInt { bits: bigint, ty }) + } + intern::Key::NegativeInt { bigint } => { + ComptimeNumber::Integral(ComptimeInt::BigInt { bits: bigint, ty }) + } + _ => { + unreachable!() + } + } + } + intern::Key::SimpleType { ty } => match ty { + intern::SimpleType::F32 => match ip.get_key(val) { + intern::Key::F32 { bits } => { + ComptimeNumber::Floating(ComptimeFloat::Binary32(bits)) + } + _ => { + unreachable!() + } + }, + intern::SimpleType::F64 => match ip.get_key(val) { + intern::Key::F64 { bits } => { + ComptimeNumber::Floating(ComptimeFloat::Binary64(bits)) + } + _ => { + unreachable!() + } + }, + intern::SimpleType::Bool => match ip.get_key(val) { + intern::Key::TrueValue => ComptimeNumber::Bool(true), + intern::Key::FalseValue => ComptimeNumber::Bool(false), + _ => unreachable!(), + }, + intern::SimpleType::Void => todo!(), + intern::SimpleType::USize | intern::SimpleType::ISize => { + let ty = IntegralType::new( + matches!( + ty_key, + intern::Key::SimpleType { + ty: intern::SimpleType::ISize + } + ), + pointer_bits, + ); + + match ip.get_key(val) { + intern::Key::SIntSmall { bits } => { + ComptimeNumber::Integral(ComptimeInt::Native { + bits: bits as _, + ty, + }) + } + intern::Key::UIntSmall { bits } => { + ComptimeNumber::Integral(ComptimeInt::Native { + bits: bits as _, + ty, + }) + } + intern::Key::SInt64 { bits } => ComptimeNumber::Integral(ComptimeInt::Native { + bits: bits as _, + ty, + }), + intern::Key::UInt64 { bits } => ComptimeNumber::Integral(ComptimeInt::Native { + bits: bits as _, + ty, + }), + intern::Key::PositiveInt { bigint } => { + ComptimeNumber::Integral(ComptimeInt::BigInt { bits: bigint, ty }) + } + intern::Key::NegativeInt { bigint } => { + ComptimeNumber::Integral(ComptimeInt::BigInt { bits: bigint, ty }) + } + _ => { + unreachable!() + } + } + } + intern::SimpleType::ComptimeInt => { + let bigint = match ip.get_key(val) { + intern::Key::SIntSmall { bits } => { + BigInt::from_signed_bytes_le(&bits.to_le_bytes()) + } + intern::Key::UIntSmall { bits } => { + BigInt::from_signed_bytes_le(&bits.to_le_bytes()) + } + intern::Key::SInt64 { bits } => { + BigInt::from_signed_bytes_le(&bits.to_le_bytes()) + } + intern::Key::UInt64 { bits } => { + BigInt::from_signed_bytes_le(&bits.to_le_bytes()) + } + intern::Key::PositiveInt { bigint } | intern::Key::NegativeInt { bigint } => { + bigint + } + _ => { + unreachable!() + } + }; + ComptimeNumber::Integral(ComptimeInt::Comptime(bigint)) + } + }, + _ => { + unreachable!() } } } @@ -1831,15 +2461,23 @@ pub struct AstRenderer<'a> { ast: &'a Ast, #[allow(dead_code)] syms: &'a crate::symbol_table::syms2::Symbols, + ip: &'a InternPool, scopes: Vec, + cache: TypeCache, } impl<'a> AstRenderer<'a> { - pub fn new(ast: &'a Ast, syms: &'a crate::symbol_table::syms2::Symbols) -> Self { + pub fn new( + ast: &'a Ast, + ip: &'a InternPool, + syms: &'a crate::symbol_table::syms2::Symbols, + ) -> Self { Self { ast, syms, + ip, scopes: Vec::new(), + cache: TypeCache::new(), } } @@ -1859,7 +2497,8 @@ impl<'a> AstRenderer<'a> { } let children = Children(self.ast.get_node_children(node)); - writeln_indented!(indent, w, "{node} = ({loc}) {tag:?} {}", children)?; + let ty = self.ast.get_type_of_node(self.ip, &mut self.cache, node); + writeln_indented!(indent, w, "{node} ({ty}) = ({loc}) {tag:?} {}", children)?; for child in children.0 { self.render_node(w, indent + 1, child)?; @@ -1895,6 +2534,7 @@ pub mod ast_gen { lexer::{Radix, TokenItem, TokenIterator}, symbol_table::syms2::SymbolKind, tokens::PRECEDENCE_MAP, + variant, }; use super::*; @@ -1934,7 +2574,128 @@ pub mod ast_gen { } pub fn display(&self) -> AstRenderer<'_> { - AstRenderer::new(&self.ast, &self.syms) + AstRenderer::new(&self.ast, &self.intern, &self.syms) + } + + pub fn fold_and_typecheck(&mut self) {} + + pub fn intern_types(&mut self) { + let mut nodes = self + .ast + .get_root_file_indices() + .map(|i| A::PushChildren(i)) + .collect::>(); + + enum A { + PushChildren(Index), + PopSelf(Index), + } + + while let Some(node) = nodes.pop() { + match node { + A::PushChildren(i) => { + nodes.push(A::PopSelf(i)); + nodes.extend( + self.ast + .get_node_children(i) + .into_iter() + .map(|i| A::PushChildren(i)), + ); + } + A::PopSelf(i) => { + let tag = self.ast.tags[i.index()]; + let data = self.ast.datas[i.index()]; + match tag { + Tag::ArrayType => { + let (length, pointee) = data.as_two_indices(); + let pointee = self.ast.datas[pointee.index()].as_intern(); + variant!( self.intern.get_key(pointee) => intern::Key::PointerType { pointee, flags }); + + let length = { + let value = self.ast.datas[length.index()].as_index_intern().1; + + match self.intern.get_key(value) { + intern::Key::SIntSmall { bits } => bits as u32, + intern::Key::UIntSmall { bits } => bits as u32, + intern::Key::SInt64 { bits } => bits as u32, + intern::Key::UInt64 { bits } => bits as u32, + intern::Key::NegativeInt { bigint } + | intern::Key::PositiveInt { bigint } => { + bigint.iter_u32_digits().next().unwrap_or(0) + } + _ => 0, + } + }; + + let ty = self.intern.get_array_type(pointee, Some(flags), length); + self.ast.tags[i.index()] = Tag::InternedType; + self.ast.datas[i.index()] = Data::intern(ty); + } + Tag::PointerType => { + let (pointee, flags) = data.as_index_and_extra_offset(); + let pointee = self.ast.datas[pointee.index()].as_intern(); + let ty = self.intern.get_pointer_type( + pointee, + Some(PointerFlags::unpack(flags as u8)), + ); + self.ast.tags[i.index()] = Tag::InternedType; + self.ast.datas[i.index()] = Data::intern(ty); + } + Tag::TypeDeclRef => { + let decl = data.as_index(); + let (name, _) = + self.ast.datas[decl.index()].as_intern_and_extra_offset(); + + let ty = self.intern.get_struct_type(name, decl); + self.ast.tags[i.index()] = Tag::InternedType; + self.ast.datas[i.index()] = Data::intern(ty); + } + Tag::FunctionProto => { + let (_, i) = data.as_intern_and_extra_offset(); + let return_type = self.ast.get_type_of_node( + &self.intern, + &mut TypeCache::new(), + Index::new(self.ast.extra[i]), + ); + let parameters = { + let (a, b) = self.ast.datas[self.ast.extra[i + 1] as usize] + .as_extra_range(); + self.ast.extra[a..b].iter().map(|&i| { + // i is index to a parameter, a parameter is (index, intern) + let ty = self.ast.datas[i as usize].as_index_intern().0; + self.ast.datas[ty.index()].as_intern() + }) + }; + + self.intern.get_function_type(return_type, parameters); + } + Tag::StructDecl => { + let (name, offset) = data.as_intern_and_extra_offset(); + let flags = StructFlags::unpack(self.ast.extra[offset]); + + let types = (offset + 1)..(offset + 1 + flags.num_fields as usize); + let names = (offset + 1 + flags.num_fields as usize) + ..(offset + 1 + flags.num_fields as usize * 2); + + let types = self.ast.extra[types] + .iter() + .map(|&i| Index::new(i)) + .map(|i| self.ast.datas[i.index()].as_intern()); + let names = self.ast.extra[names].iter().map(|&i| intern::Index(i)); + + self.intern.insert_or_replace_struct_type( + name, + i, + flags.packed, + flags.c_like, + names.zip(types), + ); + } + _ => {} + } + } + } + } } pub fn resolve_decl_refs(&mut self) { @@ -1946,6 +2707,27 @@ pub mod ast_gen { Tag::File | Tag::FunctionDecl | Tag::Block | Tag::BlockTrailingExpr => { scopes.push(node); } + _ => {} + } + + let children = self.ast.get_node_children(node); + nodes.extend(children.into_iter().rev()); + + match self.ast.tags[node.index()] { + Tag::File | Tag::FunctionDecl | Tag::Block | Tag::BlockTrailingExpr => { + scopes.pop(); + } + Tag::TypeDeclRefUnresolved => { + let (scope, name) = self.ast.datas[node.index()].as_index_intern(); + // look in my_scope + if let Some(decl) = self.syms.find_type_symbol( + scope, + name, + self.ast.source_locs[node.index()], + ) { + self.ast.resolve_type_ref(node, decl) + }; + } Tag::DeclRefUnresolved => { let (scope, name) = self.ast.datas[node.index()].as_index_intern(); @@ -1959,15 +2741,6 @@ pub mod ast_gen { } _ => {} } - - nodes.extend(self.ast.get_node_children(node)); - - match self.ast.tags[node.index()] { - Tag::File | Tag::FunctionDecl | Tag::Block | Tag::BlockTrailingExpr => { - scopes.pop(); - } - _ => {} - } } } @@ -1988,7 +2761,8 @@ pub mod ast_gen { Ok(name) } - fn parse_pointer(&mut self, tokens: &mut TokenIterator) -> ParseResult { + fn parse_pointer(&mut self, tokens: &mut TokenIterator) -> ParseResult { + let loc = tokens.current_source_location(); tokens.eat_token(Token::Star).ok_or(ErrorInfo { error: ParseError::ExpectedToken(Token::Star), loc: tokens.current_source_location(), @@ -2002,65 +2776,65 @@ pub mod ast_gen { let pointee = self.parse_type(tokens)?; Ok(self - .intern - .get_pointer_type(pointee, Some(PointerFlags::new(cnst, vol, noalias)))) + .ast + .push_pointer_type(pointee, PointerFlags::new(cnst, vol, noalias), loc)) } /// [LENGTH]const? volatile? noalias? TYPE - fn parse_array_type(&mut self, tokens: &mut TokenIterator) -> ParseResult { - let start = tokens.eat_token(Token::OpenSquareBracket).unwrap(); - let length = match self.parse_expr(tokens) { - Ok(i) => { - _ = tokens - .eat_token(Token::CloseSquareBracket) - .ok_or(ErrorInfo { - error: ParseError::ExpectedTypeName, - loc: tokens.current_source_location(), - })?; - - i - } - Err(err) => { - tokens.advance_past_end_of_bracketed().ok_or(ErrorInfo { - error: ParseError::ExpectedToken(Token::CloseSquareBracket), + fn parse_array_type(&mut self, tokens: &mut TokenIterator) -> ParseResult { + let loc = tokens.current_source_location(); + let length_expr = self.parse_bracketed(tokens, |this, tokens| { + let next = tokens.peek_token().ok_or(ErrorInfo { + error: ParseError::UnexpectedEndOfTokens, + loc: tokens.current_source_location(), + })?; + match next.token() { + Token::IntegerBinConstant + | Token::IntegerHexConstant + | Token::IntegerOctConstant + | Token::IntegerConstant => { + _ = tokens.next(); + Ok(this.parse_integral_constant(&next, next.source_location())) + } + _ => Err(ErrorInfo { + error: ParseError::ExpectedConstantLiteral, loc: tokens.current_source_location(), - })?; - self.push_error(err.error, err.loc) + }), } - }; + })?; let &[cnst, vol, noalias] = &tokens.eat_all_zero_or_once(&[Token::Const, Token::Volatile, Token::Noalias])[..3] else { unreachable!() }; - let pointee = self.parse_type(tokens)?; - Ok(self.intern.get_array_type( - pointee, - Some(PointerFlags::new(cnst, vol, noalias)), - 0, // length, TODO: evaluate this tree branch for an u32 - )) + let pointee = self.parse_type(tokens)?; + let pointer = + self.ast + .push_pointer_type(pointee, PointerFlags::new(cnst, vol, noalias), loc); + + Ok(self.ast.push_array_type(length_expr, pointer, loc)) } fn parse_simple_type(&mut self, token: Token) -> Option { match token { - Token::Void => Some(self.intern.get_assume_present(intern::Key::SimpleType { + Token::Void => Some(self.intern.get_assume_present(&intern::Key::SimpleType { ty: SimpleType::Void, })), - Token::Bool => Some(self.intern.get_assume_present(intern::Key::SimpleType { + Token::Bool => Some(self.intern.get_assume_present(&intern::Key::SimpleType { ty: SimpleType::Bool, })), - Token::F32 => Some(self.intern.get_assume_present(intern::Key::SimpleType { + Token::F32 => Some(self.intern.get_assume_present(&intern::Key::SimpleType { ty: SimpleType::F32, })), - Token::F64 => Some(self.intern.get_assume_present(intern::Key::SimpleType { + Token::F64 => Some(self.intern.get_assume_present(&intern::Key::SimpleType { ty: SimpleType::F64, })), - Token::USize => Some(self.intern.get_assume_present(intern::Key::SimpleType { + Token::USize => Some(self.intern.get_assume_present(&intern::Key::SimpleType { ty: SimpleType::USize, })), - Token::ISize => Some(self.intern.get_assume_present(intern::Key::SimpleType { + Token::ISize => Some(self.intern.get_assume_present(&intern::Key::SimpleType { ty: SimpleType::ISize, })), _ => None, @@ -2118,7 +2892,7 @@ pub mod ast_gen { Ok(Some(self.intern.get_int_type(signed, bits))) } - fn try_parse_integral_constant( + fn parse_integral_constant_inner( &mut self, item: &TokenItem, ) -> (intern::Index, intern::Index) { @@ -2165,10 +2939,17 @@ pub mod ast_gen { } }; - (interned, ty.unwrap_or(self.intern.get_comptime_int_type())) + let ty = ty.unwrap_or(self.intern.get_comptime_int_type()); + (interned, ty) } - fn parse_floating_constant(&mut self, item: &TokenItem) -> (intern::Index, intern::Index) { + fn parse_integral_constant(&mut self, item: &TokenItem, loc: SourceLocation) -> Index { + let (interned, ty) = self.parse_integral_constant_inner(item); + let ty = self.ast.push_interend_type(ty, loc); + return self.ast.push_constant(interned, ty, loc); + } + + fn parse_floating_constant(&mut self, item: &TokenItem, loc: SourceLocation) -> Index { let lexeme = item.lexeme(); let lexeme = lexeme .strip_suffix("f32") @@ -2190,7 +2971,8 @@ pub mod ast_gen { }) }; - (bits, lexeme.1) + let ty = self.ast.push_interend_type(lexeme.1, loc); + return self.ast.push_constant(bits, ty, loc); } /// TYPE <- @@ -2199,7 +2981,8 @@ pub mod ast_gen { /// SIMPLE_TYPE /// [ TYPE ; CONSTANT_EXPR ] /// INTEGRAL_TYPE // u[0..65535] | i[0..65535] - fn parse_type(&mut self, tokens: &mut TokenIterator) -> ParseResult { + fn parse_type(&mut self, tokens: &mut TokenIterator) -> ParseResult { + let loc = tokens.current_source_location(); match tokens .peek_token() .ok_or(ErrorInfo { @@ -2218,12 +3001,16 @@ pub mod ast_gen { error, loc: token.source_location(), })? { - Some(int) => Ok(int), + Some(int) => Ok(self.ast.push_interend_type(int, loc)), None => { let name = self.intern.get_or_insert(intern::Key::String { str: token.lexeme(), }); - Ok(name) + // TODO: this will cause issues with redefinitions of types with the same name + // and actually, make type into a proper node of the ast + Ok(self + .ast + .push_type_ref_unresolved(self.current_scope(), name, loc)) } } } @@ -2232,9 +3019,9 @@ pub mod ast_gen { error: ParseError::ExpectedTypeName, loc: tokens.current_source_location(), })?; - _ = tokens.next(); - Ok(ty) + + Ok(self.ast.push_interend_type(ty, loc)) } } } @@ -2333,7 +3120,10 @@ pub mod ast_gen { let return_type = if let Some(_) = tokens.eat_token(Token::MinusGreater) { self.parse_type(tokens)? } else { - self.intern.get_void_type() + self.ast.push_interend_type( + self.intern.get_void_type(), + tokens.current_source_location(), + ) }; return Ok(self.ast.push_fn_proto(ident, return_type, parameters, loc)); @@ -2434,7 +3224,6 @@ pub mod ast_gen { let is_let = let_or_var.token() == Token::Let; let name = self.parse_ident(tokens)?; - let name_loc = let_or_var.source_location(); let ty = if tokens.eat_token(Token::Colon).is_some() { Some(self.parse_type(tokens)?) @@ -2452,7 +3241,7 @@ pub mod ast_gen { self.syms.insert_symbol( self.current_scope(), name, - SymbolKind::Local(name_loc), + SymbolKind::Local(tokens.current_source_location()), decl, ); @@ -2468,6 +3257,9 @@ pub mod ast_gen { let mut statements = Vec::new(); let trailing = loop { + if tokens.is_next_token(Token::CloseBrace) { + break None; + } let next = tokens.peek_token().ok_or(ErrorInfo { error: ParseError::UnexpectedEndOfTokens, loc: tokens.current_source_location(), @@ -2477,9 +3269,6 @@ pub mod ast_gen { statements.push(decl); } else { match next.token() { - Token::CloseBrace => { - break None; - } Token::Return => { statements.push(self.parse_return_stmt(tokens)?); } @@ -2641,16 +3430,14 @@ pub mod ast_gen { | Token::IntegerOctConstant | Token::IntegerConstant => { _ = tokens.next(); - let (value, ty) = self.try_parse_integral_constant(&next); - return Ok(self.ast.push_constant(value, ty, loc)); + return Ok(self.parse_integral_constant(&next, next.source_location())); } Token::FloatingConstant | Token::FloatingExpConstant | Token::DotFloatingConstant | Token::DotFloatingExpConstant => { _ = tokens.next(); - let (value, ty) = self.parse_floating_constant(&next); - return Ok(self.ast.push_constant(value, ty, loc)); + return Ok(self.parse_floating_constant(&next, next.source_location())); } Token::OpenParens => { @@ -2687,10 +3474,22 @@ pub mod ast_gen { /// PRIMARY_EXPR ( ) /// PRIMARY_EXPR ( ARGUMENT_LIST ) /// PRIMARY_EXPR [ EXPR ] + /// POSTFIX_EXPR . IDENTIFIER fn parse_postfix_expr(&mut self, tokens: &mut TokenIterator) -> ParseResult { - let lhs = self.parse_primary_expr(tokens)?; + let mut lhs = self.parse_primary_expr(tokens)?; + while let Some(postfix) = self.try_parse_postfix_expr_inner(tokens, lhs)? { + lhs = postfix; + } - if let Some(next) = tokens.peek_token() { + Ok(lhs) + } + + fn try_parse_postfix_expr_inner( + &mut self, + tokens: &mut TokenIterator, + lhs: Index, + ) -> ParseResult> { + let lhs = if let Some(next) = tokens.peek_token() { let loc = next.source_location(); match next.token() { Token::OpenParens => { @@ -2702,19 +3501,29 @@ pub mod ast_gen { } })?; - return Ok(self.ast.push_call_expr(lhs, arguments, loc)); + Some(self.ast.push_call_expr(lhs, arguments, loc)) } Token::OpenSquareBracket => { let subscript = self.parse_bracketed(tokens, |this, tokens| this.parse_expr(tokens))?; - return Ok(self - .ast - .push_binary(Tag::SubscriptExpr, lhs, subscript, loc)); + Some( + self.ast + .push_binary(Tag::SubscriptExpr, lhs, subscript, loc), + ) } - _ => {} + Token::Dot if tokens.is_next_token2(Token::Ident) => { + _ = tokens.next(); + let loc = tokens.current_source_location(); + let name = self.parse_ident(tokens)?; + + Some(self.ast.push_field_access(lhs, name, loc)) + } + _ => None, } - } + } else { + None + }; Ok(lhs) } @@ -3000,7 +3809,7 @@ pub mod ast_gen { }); }; - match next.token() { + let decl = match next.token() { Token::Struct => self.parse_struct_decl(tokens, name, c_like, packed, loc), Token::Union => { unimplemented!() @@ -3032,7 +3841,12 @@ pub mod ast_gen { } } } - } + }?; + + self.syms + .insert_symbol(self.current_scope(), name, SymbolKind::Type, decl); + + Ok(decl) } /// SUMTYPE_DECL <- @@ -3082,8 +3896,8 @@ pub mod ast_gen { let decl = self.parse_braced(tokens, |this, tokens| { this.parse_struct_fields(tokens).map(|fields| { _ = tokens.eat_token(Token::Comma); - let struct_type = this.intern.get_struct_type(name, packed, c_like, fields); - this.ast.push_struct_decl(struct_type, loc) + let flags = StructFlags::new(packed, c_like, fields.len() as u32); + this.ast.push_struct_decl(name, flags, fields, loc) }) })?; @@ -3233,7 +4047,7 @@ pub mod ast_gen { fn parse_struct_fields( &mut self, tokens: &mut TokenIterator, - ) -> ParseResult> { + ) -> ParseResult> { let mut fields = Vec::new(); loop { fields.push(self.parse_struct_field(tokens)?); @@ -3256,7 +4070,7 @@ pub mod ast_gen { fn parse_struct_field( &mut self, tokens: &mut TokenIterator, - ) -> ParseResult<(intern::Index, intern::Index)> { + ) -> ParseResult<(intern::Index, Index)> { let name = self.parse_ident(tokens)?; let Some(_) = tokens.eat_token(Token::Colon) else { return Err(ErrorInfo { @@ -3327,7 +4141,10 @@ pub mod ast_gen { pub fn parse(&mut self, mut tokens: TokenIterator) { let file = self.parse_file(&mut tokens); self.ast.set_root([file]); + eprintln!("resolving decls:"); self.resolve_decl_refs(); + eprintln!("interning types:"); + self.intern_types(); } fn push_scope(&mut self, ast: Index, name: intern::Index) { @@ -3395,3 +4212,19 @@ pub mod ast_gen { } } } + +pub mod ir_gen { + use intern::InternPool; + + use super::*; + use crate::{symbol_table::syms2::Symbols, triples::*}; + + struct IRGen { + ast: Ast, + syms: Symbols, + intern: InternPool, + ir: IR, + } + + impl IRGen {} +} diff --git a/src/lib.rs b/src/lib.rs index a7fe9c6..ddf2975 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -8,6 +8,7 @@ iter_intersperse, iter_array_chunks, int_roundings, + if_let_guard, debug_closure_helpers )] #![allow(unused_macros)] diff --git a/src/symbol_table.rs b/src/symbol_table.rs index b8bca7a..b690c4c 100644 --- a/src/symbol_table.rs +++ b/src/symbol_table.rs @@ -430,6 +430,7 @@ pub mod syms2 { Const, Function, Type, + __TypeScope, Scope, ParentScope, Local(SourceLocation), @@ -547,6 +548,40 @@ pub mod syms2 { } } + pub fn find_type_symbol( + &self, + scope: AstIndex, + name: InternIndex, + loc: SourceLocation, + ) -> Option { + use SymbolKind::*; + let range = self.inner.range( + Key::Symbol { + scope, + name, + kind: __First, + }..=Key::Symbol { + scope, + name, + kind: __TypeScope, + }, + ); + + if let Some((_, payload)) = range.rev().next() { + Some(payload.as_ast()) + } else { + if let Some(parent) = self.inner.get(&Key::Symbol { + scope, + name: InternIndex::invalid(), + kind: ParentScope, + }) { + self.find_symbol(parent.as_ast(), name, loc) + } else { + None + } + } + } + pub fn insert_symbol( &mut self, scope: AstIndex, diff --git a/tests/legal/array.sea b/tests/legal/array.sea new file mode 100644 index 0000000..4143eb4 --- /dev/null +++ b/tests/legal/array.sea @@ -0,0 +1,4 @@ +fn main() { + var arr: [4] u8; + arr[0] = 1; +} \ No newline at end of file diff --git a/tests/legal/struct.sea b/tests/legal/struct.sea index 18af87d..1de34b6 100644 --- a/tests/legal/struct.sea +++ b/tests/legal/struct.sea @@ -1,8 +1,12 @@ type MyStruct = struct { i: i32, b: bool, + next: *MyStruct, } fn square_if_true(arg: MyStruct) -> i32 { - 0 + if (arg.b) + arg.i * arg.i + else + 0 } \ No newline at end of file