#![allow(dead_code)] use std::{ collections::BTreeMap, fmt::{Debug, Display}, num::NonZero, }; use intern::{InternPool, PointerFlags, StructFlags}; use num_bigint::BigInt; use crate::{lexer::SourceLocation, tokens::Token, writeln_indented}; pub mod intern { use std::{ collections::BTreeMap, fmt::Display, hash::{Hash, Hasher}, }; use num_bigint::{BigInt, BigUint, Sign}; use crate::{ common::{from_lo_hi_dwords, into_lo_hi_dwords}, variant, }; #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] #[repr(u8)] pub enum SimpleType { F32 = 0, F64, Bool, Void, USize, ISize, ComptimeInt, } impl From for SimpleType { fn from(value: u8) -> Self { match value { 0 => Self::F32, 1 => Self::F64, 2 => Self::Bool, 3 => Self::Void, 4 => Self::USize, 5 => Self::ISize, 6 => Self::ComptimeInt, _ => panic!("{value} is not a simple type"), } } } #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub enum Tag { String, SIntSmall, UIntSmall, TrueValue, FalseValue, UInt64, SInt64, F32, F64, PositiveInt, NegativeInt, UIntType, SIntType, SimpleType, PointerType, ArrayType, FunctionType, StructType, } #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] struct Item { tag: Tag, index: u32, } #[derive(Debug, Clone, PartialEq)] #[non_exhaustive] pub enum Key<'a> { String { str: &'a str, }, SIntSmall { bits: i32, }, UIntSmall { bits: u32, }, SInt64 { bits: i64, }, UInt64 { bits: u64, }, F32 { bits: f32, }, F64 { bits: f64, }, PositiveInt { bigint: BigInt, }, NegativeInt { bigint: BigInt, }, UIntType { bits: u16, }, SIntType { bits: u16, }, SimpleType { ty: SimpleType, }, PointerType { pointee: Index, flags: PointerFlags, }, ArrayType { pointee: Index, flags: PointerFlags, length: u32, }, FunctionType { return_type: Index, parameters: Vec, }, StructType { decl: super::Index, name: Index, packed: bool, c_like: bool, /// vec of (Name, Type) fields: Vec<(Index, Index)>, }, TrueValue, FalseValue, } impl Hash for Key<'_> { fn hash(&self, state: &mut H) { core::mem::discriminant(self).hash(state); match self { Key::String { str } => str.hash(state), Key::SIntSmall { bits } => bits.hash(state), Key::UIntSmall { bits } => bits.hash(state), Key::SInt64 { bits } => bits.hash(state), Key::UInt64 { bits } => bits.hash(state), Key::F32 { bits } => ordered_float::OrderedFloat(*bits).hash(state), Key::F64 { bits } => ordered_float::OrderedFloat(*bits).hash(state), Key::PositiveInt { bigint } => bigint.hash(state), Key::NegativeInt { bigint } => bigint.hash(state), Key::UIntType { bits } => bits.hash(state), Key::SIntType { bits } => bits.hash(state), Key::SimpleType { ty } => ty.hash(state), Key::PointerType { pointee, flags } => (pointee, flags).hash(state), Key::ArrayType { pointee, flags, length, } => (*pointee, *flags, *length).hash(state), Key::StructType { name, decl, .. } => (*name, *decl).hash(state), Key::FunctionType { return_type, parameters, } => (return_type, parameters).hash(state), Key::TrueValue | Key::FalseValue => {} } } } // #[repr(packed)] #[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Hash)] pub struct PointerFlags { pub volatile: bool, pub is_const: bool, pub noalias: bool, } impl PointerFlags { pub fn new(is_const: bool, volatile: bool, noalias: bool) -> Self { Self { is_const, volatile, noalias, } } pub fn pack(self) -> u8 { (self.volatile as u8) << 0 | (self.is_const as u8) << 1 | (self.noalias as u8) << 2 } pub fn unpack(packed: u8) -> Self { Self { volatile: packed & (1 << 0) != 0, is_const: packed & (1 << 1) != 0, noalias: packed & (1 << 2) != 0, } } } #[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Hash)] pub struct StructFlags { pub packed: bool, pub c_like: bool, pub num_fields: u32, } impl StructFlags { const MASK: u32 = (1u32 << 30) - 1; pub fn new(packed: bool, c_like: bool, num_fields: u32) -> Self { assert!(num_fields < (1 << 30)); Self { packed, c_like, num_fields, } } pub fn pack(self) -> u32 { assert!(self.num_fields < (1 << 30)); (self.packed as u32) << 31 | (self.c_like as u32) << 30 | self.num_fields & Self::MASK } pub fn unpack(packed: u32) -> Self { Self { packed: packed & (1 << 31) != 0, c_like: packed & (1 << 30) != 0, num_fields: packed & Self::MASK, } } } #[derive(Debug, Clone, Copy)] struct FunctionInfo { void_return: bool, num_params: u32, } impl FunctionInfo { fn new(void_return: bool, num_params: u32) -> Self { Self { void_return, num_params, } } const MASK: u32 = 1u32 << (u32::BITS - 1); fn pack(self) -> u32 { (self.void_return as u32 * Self::MASK) | self.num_params & !Self::MASK } fn unpack(packed: u32) -> Self { Self { void_return: packed & Self::MASK != 0, num_params: packed & !Self::MASK, } } fn len(self) -> u32 { self.void_return as u32 + self.num_params } } impl Item { fn idx(self) -> usize { self.index as usize } } #[repr(transparent)] #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] pub struct Index(pub u32); impl Display for Index { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "#{}", self.0) } } impl Index { pub fn into_u32(self) -> u32 { unsafe { core::mem::transmute(self) } } pub fn as_u32(&self) -> &u32 { unsafe { core::mem::transmute(self) } } fn index(&self) -> usize { self.0 as usize } pub fn is_valid(&self) -> bool { self.0 != u32::MAX } pub fn invalid() -> Self { Self(u32::MAX) } } pub struct InternPool { tags: Vec, indices: Vec, // strings: Vec, words: Vec, hashed: BTreeMap, } impl std::fmt::Debug for InternPool { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.debug_struct("InternPool") .field_with("keys", |f| { let mut list = f.debug_list(); let keys = (0..self.indices.len()) .map(|i| Index(i as u32)) .map(|idx| (idx, self.get_key(idx))); for (idx, key) in keys { list.entry_with(|f| write!(f, "{}: {key:?}", idx.0)); } list.finish() }) .field_with("hashed", |f| { let mut list = f.debug_list(); for (hash, idx) in self.hashed.iter() { list.entry_with(|f| write!(f, "{hash}: {}", idx.0)); } list.finish() }) .finish_non_exhaustive() } } const STATIC_KEYS: [Key; 21] = [ Key::SimpleType { ty: SimpleType::Bool, }, Key::SimpleType { ty: SimpleType::F32, }, Key::SimpleType { ty: SimpleType::F64, }, Key::SimpleType { ty: SimpleType::USize, }, Key::SimpleType { ty: SimpleType::ISize, }, Key::SimpleType { ty: SimpleType::Void, }, Key::SimpleType { ty: SimpleType::ComptimeInt, }, Key::SIntType { bits: 1 }, Key::UIntType { bits: 1 }, Key::SIntType { bits: 0 }, Key::UIntType { bits: 0 }, Key::SIntType { bits: 8 }, Key::UIntType { bits: 8 }, Key::SIntType { bits: 16 }, Key::UIntType { bits: 16 }, Key::SIntType { bits: 32 }, Key::UIntType { bits: 32 }, Key::SIntType { bits: 64 }, Key::UIntType { bits: 64 }, Key::TrueValue, Key::FalseValue, ]; impl InternPool { pub fn get_void_type(&self) -> Index { self.get_assume_present(&Key::SimpleType { ty: SimpleType::Void, }) } pub fn get_bool_type(&self) -> Index { self.get_assume_present(&Key::SimpleType { ty: SimpleType::Bool, }) } pub fn get_true_value(&self) -> Index { self.get_assume_present(&Key::TrueValue) } pub fn get_false_value(&self) -> Index { self.get_assume_present(&Key::FalseValue) } pub fn get_f32_type(&self) -> Index { self.get_assume_present(&Key::SimpleType { ty: SimpleType::F32, }) } pub fn get_f64_type(&self) -> Index { self.get_assume_present(&Key::SimpleType { ty: SimpleType::F64, }) } pub fn get_comptime_int_type(&self) -> Index { self.get_assume_present(&Key::SimpleType { ty: SimpleType::ComptimeInt, }) } pub fn get_usize_type(&self) -> Index { self.get_assume_present(&Key::SimpleType { ty: SimpleType::USize, }) } pub fn get_isize_type(&self) -> Index { self.get_assume_present(&Key::SimpleType { ty: SimpleType::ISize, }) } pub fn get_u0_type(&self) -> Index { self.get_assume_present(&Key::UIntType { bits: 0 }) } pub fn get_i0_type(&self) -> Index { self.get_assume_present(&Key::SIntType { bits: 0 }) } pub fn get_u1_type(&self) -> Index { self.get_assume_present(&Key::UIntType { bits: 1 }) } pub fn get_i1_type(&self) -> Index { self.get_assume_present(&Key::SIntType { bits: 1 }) } pub fn get_u8_type(&self) -> Index { self.get_assume_present(&Key::UIntType { bits: 8 }) } pub fn get_i8_type(&self) -> Index { self.get_assume_present(&Key::SIntType { bits: 8 }) } pub fn get_u16_type(&self) -> Index { self.get_assume_present(&Key::UIntType { bits: 16 }) } pub fn get_i16_type(&self) -> Index { self.get_assume_present(&Key::SIntType { bits: 16 }) } pub fn get_u32_type(&self) -> Index { self.get_assume_present(&Key::UIntType { bits: 32 }) } pub fn get_i32_type(&self) -> Index { self.get_assume_present(&Key::SIntType { bits: 32 }) } pub fn get_u64_type(&self) -> Index { self.get_assume_present(&Key::UIntType { bits: 64 }) } pub fn get_i64_type(&self) -> Index { self.get_assume_present(&Key::SIntType { bits: 64 }) } } #[derive(Debug, Clone, Copy)] pub struct TypeInfo { pub bitsize: u32, pub bitalign: u32, } impl InternPool { pub fn size_of_type(&self, index: Index, ptr_size: TypeInfo) -> TypeInfo { match self.get_key(index) { Key::UIntType { bits } => { let bits = bits as u32; TypeInfo { bitsize: bits, bitalign: bits.next_multiple_of(8).next_power_of_two(), } } Key::SIntType { bits } => { let bits = bits as u32; TypeInfo { bitsize: bits, bitalign: bits.next_multiple_of(8).next_power_of_two(), } } Key::SimpleType { ty } => match ty { SimpleType::F32 => TypeInfo { bitsize: 32, bitalign: 32, }, SimpleType::F64 => TypeInfo { bitsize: 64, bitalign: 64, }, SimpleType::Bool => TypeInfo { bitsize: 1, bitalign: 1, }, SimpleType::Void => TypeInfo { bitsize: 0, bitalign: 0, }, SimpleType::USize => ptr_size, SimpleType::ISize => ptr_size, SimpleType::ComptimeInt => panic!("comptime int is unsized"), }, Key::PointerType { .. } => ptr_size, Key::ArrayType { pointee, length, .. } => { let element_size = self.size_of_type(pointee, ptr_size); let bitsize = element_size.bitalign * length; TypeInfo { bitsize, ..element_size } } Key::FunctionType { .. } => ptr_size, Key::StructType { packed, fields, .. } => { // TODO: c-like layout let (size, align) = fields.iter().fold((0, 0), |(size, align), (_name, ty)| { let field_size = self.size_of_type(*ty, ptr_size); let size = size + field_size.bitsize; let size = if packed { size.next_multiple_of(field_size.bitalign) } else { size }; let align = align.max(field_size.bitalign); (size, align) }); TypeInfo { bitsize: size, bitalign: align, } } _ => { panic!("index was not a type") } } } } impl InternPool { pub fn create() -> Self { let mut this = Self { tags: Vec::new(), indices: Vec::new(), strings: Vec::new(), words: Vec::new(), hashed: BTreeMap::new(), }; this.extend_keys(STATIC_KEYS); this } fn extend_keys<'a, K: IntoIterator>>(&mut self, keys: K) { for k in keys.into_iter() { let mut hasher = std::hash::DefaultHasher::new(); k.hash(&mut hasher); let digest = hasher.finish(); let i = self.insert(k); self.hashed.insert(digest, i); } } fn len(&self) -> u32 { u32::try_from(self.tags.len()) .expect(&format!("more than {} items in internpool!", u32::MAX)) } pub fn get_or_insert(&mut self, key: Key) -> Index { let mut hasher = std::hash::DefaultHasher::new(); key.hash(&mut hasher); let digest = hasher.finish(); if let Some(&idx) = self.hashed.get(&digest) { idx } else { let i = self.insert(key); self.hashed.insert(digest, i); i } } fn insert(&mut self, key: Key) -> Index { match key { Key::String { str } => { let len = str.len() as u32; let start = self.extend_strings(str); let words_idx = self.extend_words([start, len]); self.create_item(Tag::String, words_idx) } Key::SIntSmall { bits } => self.create_item(Tag::SIntSmall, bits as u32), Key::UIntSmall { bits } => self.create_item(Tag::UIntSmall, bits as u32), Key::F32 { bits } => self.create_item(Tag::F32, bits as u32), Key::F64 { bits } => { let (lo, hi) = into_lo_hi_dwords(bits as u64); let words_idx = self.extend_words([lo, hi]); self.create_item(Tag::F64, words_idx) } Key::SInt64 { bits } => { let (lo, hi) = into_lo_hi_dwords(bits as u64); let i = self.extend_words([lo, hi]); self.create_item(Tag::SInt64, i) } Key::UInt64 { bits } => { let (lo, hi) = into_lo_hi_dwords(bits as u64); let i = self.extend_words([lo, hi]); self.create_item(Tag::UInt64, i) } Key::PositiveInt { bigint } => { let (_, words) = bigint.to_u32_digits(); let i = self.push_word(words.len() as u32); _ = self.extend_words(words); self.create_item(Tag::PositiveInt, i) } Key::NegativeInt { bigint } => { let (_, words) = bigint.to_u32_digits(); let i = self.push_word(words.len() as u32); _ = self.extend_words(words); self.create_item(Tag::NegativeInt, i) } Key::UIntType { bits } => self.create_item(Tag::UIntType, bits as u32), Key::SIntType { bits } => self.create_item(Tag::SIntType, bits as u32), Key::SimpleType { ty } => self.create_item(Tag::SimpleType, ty as u8 as u32), Key::PointerType { pointee, flags } => { let flags = flags.pack(); let i = self.extend_words([pointee.0, flags as u32]); self.create_item(Tag::PointerType, i) } Key::ArrayType { pointee, flags, length, } => { let flags = flags.pack(); let i = self.extend_words([pointee.0, flags as u32, length]); self.create_item(Tag::ArrayType, i) } Key::StructType { name, decl, packed, c_like, fields, } => { let flags = StructFlags::new(packed, c_like, fields.len() as u32).pack(); let i = self.extend_words([name.into_u32(), decl.into_u32(), flags, u32::MAX]); if !fields.is_empty() { let fields_offset = self.extend_words( fields .into_iter() .map(|(n, t)| [n.into_u32(), t.into_u32()]) .flatten(), ); self.words[i as usize + 3] = fields_offset; } self.create_item(Tag::StructType, i) } Key::FunctionType { return_type, parameters, } => { let info = FunctionInfo::new( return_type == self.get_simple_type(SimpleType::Void), parameters.len() as u32, ); let start = self.push_word(info.pack()); self.extend_words([return_type.into_u32()]); _ = self.extend_words(parameters.into_iter().map(|i| i.0)); self.create_item(Tag::FunctionType, start) } Key::TrueValue => self.create_item(Tag::TrueValue, 0), Key::FalseValue => self.create_item(Tag::FalseValue, 0), } } fn extend_strings>(&mut self, b: B) -> u32 { let idx = self.strings.len() as u32; self.strings.extend(b.as_ref()); idx } fn extend_words>(&mut self, i: I) -> u32 { let idx = self.words.len() as u32; self.words.extend(i); idx } fn push_word(&mut self, word: u32) -> u32 { let idx = self.words.len() as u32; self.words.push(word); idx } fn create_item(&mut self, tag: Tag, index: u32) -> Index { let len = self.len(); self.tags.push(tag); self.indices.push(index); Index(len) } pub fn get_key(&self, index: Index) -> Key { let item = self.get_item(index).unwrap(); match item.tag { Tag::String => { let start = self.words[item.idx()]; let len = self.words[item.idx() + 1]; let str = unsafe { core::str::from_utf8_unchecked( &self.strings[start as usize..][..len as usize], ) }; Key::String { str } } Tag::UIntSmall => Key::UIntSmall { bits: item.index as u32, }, Tag::SIntSmall => Key::SIntSmall { bits: item.index as i32, }, Tag::F32 => Key::F32 { bits: f32::from_le_bytes(item.index.to_le_bytes()), }, Tag::F64 => { let idx = item.idx(); let bits = from_lo_hi_dwords(self.words[idx], self.words[idx + 1]); Key::F64 { bits: f64::from_le_bytes(bits.to_le_bytes()), } } Tag::SInt64 => { let bits = from_lo_hi_dwords(self.words[item.idx()], self.words[item.idx() + 1]) as i64; Key::SInt64 { bits } } Tag::UInt64 => { let bits = from_lo_hi_dwords(self.words[item.idx()], self.words[item.idx() + 1]); Key::UInt64 { bits } } Tag::NegativeInt => { let len = self.words[item.idx()]; let start = item.idx() + 1; let end = start + len as usize; let data = BigUint::from_slice(&self.words[start..end]); let bigint = BigInt::from_biguint(Sign::Minus, data); Key::NegativeInt { bigint } } Tag::PositiveInt => { let len = self.words[item.idx()]; let start = item.idx() + 1; let end = start + len as usize; let data = BigUint::from_slice(&self.words[start..end]); let bigint = BigInt::from_biguint(Sign::Plus, data); Key::PositiveInt { bigint } } Tag::SIntType => Key::SIntType { bits: item.index as u16, }, Tag::UIntType => Key::UIntType { bits: item.index as u16, }, Tag::SimpleType => { let ty = item.idx() as u8; Key::SimpleType { ty: unsafe { core::mem::transmute::(ty) }, } } Tag::PointerType => { let pointee = Index(self.words[item.idx()]); let flags = PointerFlags::unpack(self.words[item.idx() + 1] as u8); Key::PointerType { pointee, flags } } Tag::ArrayType => { let pointee = Index(self.words[item.idx()]); let flags = PointerFlags::unpack(self.words[item.idx() + 1] as u8); let length = self.words[item.idx() + 2]; Key::ArrayType { pointee, flags, length, } } Tag::StructType => { let name = Index(self.words[item.idx()]); let decl = super::Index::new(self.words[item.idx() + 1]); let flags = StructFlags::unpack(self.words[item.idx() + 2]); let fields = if flags.num_fields != 0 { let fields_offset = self.words[item.idx() + 3] as usize; let fields_end = fields_offset + flags.num_fields as usize * 2; self.words[fields_offset..fields_end] .iter() .cloned() .array_chunks::<2>() .map(|[n, t]| (Index(n), Index(t))) .collect::>() } else { vec![] }; Key::StructType { name, decl, packed: flags.packed, c_like: flags.c_like, fields, } } Tag::FunctionType => { let info = FunctionInfo::unpack(self.words[item.idx()]); let len = info.len(); let (return_type, parameters) = if info.void_return { let start = item.idx() + 1; let end = start + len as usize; let params = self.words[start..end] .iter() .map(|&i| Index(i)) .collect::>(); ( self.get_assume_present(&Key::SimpleType { ty: SimpleType::Void, }), params, ) } else { let return_type = Index(self.words[item.idx() + 1]); let start = item.idx() + 2; let end = start + len as usize; let params = self.words[start..end] .iter() .map(|&i| Index(i)) .collect::>(); (return_type, params) }; Key::FunctionType { return_type, parameters, } } Tag::TrueValue => Key::TrueValue, Tag::FalseValue => Key::FalseValue, } } pub fn try_get_index(&self, key: &Key) -> Option { let mut hasher = std::hash::DefaultHasher::new(); key.hash(&mut hasher); let digest = hasher.finish(); self.hashed.get(&digest).cloned() } pub fn get_assume_present(&self, key: &Key) -> Index { self.try_get_index(&key) .expect(&format!("key {key:?} not present in pool.")) } pub fn get_int_type(&mut self, signed: bool, bits: u16) -> Index { let key = match signed { true => Key::SIntType { bits }, false => Key::UIntType { bits }, }; self.get_or_insert(key) } pub fn get_string_index(&mut self, str: &str) -> Index { self.get_or_insert(Key::String { str }) } pub fn try_get_string_index(&self, str: &str) -> Option { self.try_get_index(&Key::String { str }) } pub fn get_simple_type(&mut self, ty: SimpleType) -> Index { self.get_or_insert(Key::SimpleType { ty }) } pub fn try_get_simple_type(&self, ty: SimpleType) -> Option { self.try_get_index(&Key::SimpleType { ty }) } pub fn get_function_type>( &mut self, return_type: Index, parameters: P, ) -> Index { self.get_or_insert(Key::FunctionType { return_type, parameters: parameters.into_iter().collect(), }) } pub fn try_get_function_type>( &self, return_type: Index, parameters: P, ) -> Option { self.try_get_index(&Key::FunctionType { return_type, parameters: parameters.into_iter().collect(), }) } pub fn get_pointer_type(&mut self, pointee: Index, flags: Option) -> Index { let key = Key::PointerType { pointee, flags: flags.unwrap_or_default(), }; self.get_or_insert(key) } pub fn try_get_pointer_type( &self, pointee: Index, flags: Option, ) -> Option { self.try_get_index( &(Key::PointerType { pointee, flags: flags.unwrap_or_default(), }), ) } pub fn insert_or_replace_struct_type>( &mut self, name: Index, decl: super::Index, packed: bool, c_like: bool, fields: I, ) -> Index { let key = Key::StructType { name, decl, packed, c_like, fields: vec![], }; if let Some(i) = self.try_get_index(&key).and_then(|i| self.get_item(i)) { let fields_offset = self.extend_words( fields .into_iter() .map(|(n, t)| [n.into_u32(), t.into_u32()]) .flatten(), ); self.words[i.idx() + 3] = fields_offset; let fields_end = self.words.len() as u32; let num_fields = (fields_end - fields_offset) / 2; let flags = StructFlags::new(packed, c_like, num_fields).pack(); self.words[i.idx() + 2] = flags; } self.get_or_insert(key) } pub fn get_struct_type(&mut self, name: Index, decl: super::Index) -> Index { let key = Key::StructType { name, decl, packed: false, c_like: false, fields: vec![], }; self.get_or_insert(key) } pub fn try_get_struct_type(&self, name: Index, decl: super::Index) -> Option { self.try_get_index(&Key::StructType { name, decl, packed: false, c_like: false, fields: vec![], }) } pub fn get_array_type( &mut self, pointee: Index, flags: Option, length: u32, ) -> Index { let key = Key::ArrayType { pointee, flags: flags.unwrap_or_default(), length, }; self.get_or_insert(key) } pub fn try_get_array_type( &self, pointee: Index, flags: Option, length: u32, ) -> Option { self.try_get_index(&Key::ArrayType { pointee, flags: flags.unwrap_or_default(), length, }) } pub fn get_str(&self, index: Index) -> &str { let key = self.get_key(index); assert!(matches!(key, Key::String { .. })); variant!(key => Key::String { str }); str } fn check_bounds(&self, index: Index) -> Option { (index.0 < self.len()).then_some(index) } fn get_item(&self, index: Index) -> Option { self.check_bounds(index).map(|i| Item { tag: self.tags[i.index()], index: self.indices[i.index()], }) } } } #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] enum Tag { /// pseudo tag, contains a range from a..b into extra of all files. Root, /// `data` is a range from a..b into extra of all global nodes. File, /// `data` is an intern to a name, and an index into extra of [index: return_type, index: ParameterList] FunctionProto, /// `data` is an index to a FunctionProto and an index to a Block FunctionDecl, /// `data` is a range from a..b into extra of indices to parameters ParameterList, /// `data` is an index to a type, and an intern to a name Parameter, /// `data` is range from a..b into `extra` of indices to statements Block, /// `data` is range from a..b into `extra` of indices to statements, where the last one is an expression BlockTrailingExpr, /// `data` is an index to a type, and an intern to a value Constant, /// `data` is an index to an expression ExprStmt, /// `data` is none ReturnStmt, /// `data` is an index to an expr ReturnExprStmt, /// `data` is a range from a..b into `extra` of `[name: intern, type: index]` VarDecl, /// `data` is a range from a..b into `extra` of `[name: intern, type: index]` MutVarDecl, /// `data` is a range from a..b into `extra` of `[name: intern, expr: index, type?: index]` VarDeclAssignment, /// `data` is a range from a..b into `extra` of `[name: intern, expr: index, type?: index]` MutVarDeclAssignment, /// `data` is an intern to a name, and an offset into `extra` of `[type: index, expr: index]` GlobalDecl, /// `data` is an intern to a name, and an offset into extra of `[flags, type0 ,..., typeN ,name0 ,..., nameN]` StructDecl, /// `data` is an index to a type, and an intern to a name FieldDecl, /// `data` is an index to a VarDecl, GlobalDecl or FunctionDecl DeclRef, /// `data` is an inlined key into the symbol table (scope: index, name: intern) DeclRefUnresolved, /// `data` is an intern of a type InternedType, /// `data` is an index to a StructDecl TypeDeclRef, /// `data` is an inlined key into the symbol table (scope: index, name: intern) TypeDeclRefUnresolved, /// `data` is an index to a Type and u32 PointerFlags PointerType, /// `data` is an index to a length expression, and an underlying pointer type ArrayType, /// `data` is an index to an expr and an index to an ArgumentList CallExpr, /// `data` is an index to an expr and an intern to a field name FieldAccess, /// `data` is a range from a..b into extra of indices to arguments ArgumentList, /// `data` is an index to an expression Argument, /// `data` is an index to an expression, and an intern to a name NamedArgument, /// `data` is an index to lhs, and an index to the type ExplicitCast, /// `data` is a single index to an expr Deref, AddressOf, Not, Negate, /// data is two indices for `lhs` and `rhs` Or, And, BitOr, BitXOr, BitAnd, Eq, NEq, Lt, Gt, Le, Ge, Shl, Shr, Add, Sub, Mul, Div, Rem, Assign, SubscriptExpr, IfExpr, /// `data` is an index to an expression and an index into extra for [if, else] IfElseExpr, // TODO: /// `data` is a ParseError Error, /// placeholder tag for reserved indices/nodes, `data` is none Undefined, } #[derive(Debug, Clone, Copy, thiserror::Error, PartialEq, Eq)] enum ParseError { #[error("Unexpected end of token iter.")] UnexpectedEndOfTokens, #[error("Expected Token {0}.")] ExpectedToken(Token), #[error("Expected Token {0}, but other token was found.")] ExpectedTokenNotFound(Token), #[error("Expected either a function declaration or a global variable.")] UnexpectedTokenAtFileScope, #[error("Expected Ident.")] ExpectedIdent, #[error("Integral types may not be wider than 65535 bits.")] IntegralTypeTooWide, #[error("Expected typename.")] ExpectedTypeName, #[error("Dummy Message.")] ExpectedFunctionPrototype, #[error("Dummy Message.")] ExpectedPrimaryExpression, #[error("Dummy Message.")] ExpectedConstantLiteral, #[error("Dummy Message.")] ExpectedExpression, #[error("Dummy Message.")] ExpectedPostfixExpression, #[error("Dummy Message.")] ExpectedPrefixExpression, #[error("Dummy Message.")] ExpectedArgumentList, #[error("Dummy Message.")] ExpectedStatement, #[error("Dummy Message.")] UnmatchedParens(u32), #[error("Dummy Message.")] ExpectedTypeDeclaration, #[error("Dummy Message.")] UnexpectedTypeAttributes, #[error("Dummy Message.")] UnmatchedSquareBracket(u32), #[error("Dummy Message.")] ExpectedEndOfBlock, #[error("Dummy Message.")] UnmatchedBrace(u32), #[error("Dummy Message.")] UnmatchedDelimiter(u32), #[error("Error in child node {0:?}.")] ErrorNode(Index), } #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] #[repr(transparent)] pub struct Index(NonZero); impl Display for Index { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "%{}", self.0.get()) } } impl Index { pub fn new(i: u32) -> Index { Self(NonZero::::new(i).unwrap()) } pub fn as_u32(&self) -> &u32 { unsafe { core::mem::transmute(self) } } pub fn into_u32(self) -> u32 { unsafe { core::mem::transmute(self) } } fn index(self) -> usize { self.0.get() as usize } } #[repr(packed)] #[derive(Clone, Copy)] struct Node { /// defines the type of the node in the tree tag: Tag, data: Data, } #[derive(Clone, Copy)] union Data { none: (), error: ParseError, index: Index, two_indices: (Index, Index), range: (Index, Index), extra_range: (u32, u32), intern: intern::Index, index_intern: (Index, intern::Index), two_interns: (intern::Index, intern::Index), intern_and_extra_offset: (intern::Index, u32), index_and_extra_offset: (Index, u32), } #[derive(Debug)] #[allow(dead_code)] enum ExpandedData { None, Error(ParseError), Index(Index), TwoIndices(Index, Index), Range(Index, Index), ExtraRange(usize, usize), Intern(intern::Index), IndexIntern(Index, intern::Index), TwoInterns(intern::Index, intern::Index), InternAndExtraOffset(intern::Index, usize), IndexAndExtraOffset(Index, usize), } impl ExpandedData { fn from_none(data: Data) -> Self { Self::None } fn from_error(data: Data) -> Self { Self::Error(data.as_error()) } fn from_index(data: Data) -> Self { Self::Index(data.as_index()) } fn from_two_indices(data: Data) -> Self { let data = data.as_two_indices(); Self::TwoIndices(data.0, data.1) } fn from_range(data: Data) -> Self { let data = data.as_index_range(); Self::Range(data.0, data.1) } fn from_extra_range(data: Data) -> Self { let data = data.as_extra_range(); Self::ExtraRange(data.0, data.1) } fn from_intern(data: Data) -> Self { let data = data.as_intern(); Self::Intern(data) } fn from_index_intern(data: Data) -> Self { let data = data.as_index_intern(); Self::IndexIntern(data.0, data.1) } fn from_two_interns(data: Data) -> Self { let data = data.as_two_interns(); Self::TwoInterns(data.0, data.1) } fn from_intern_and_extra_offset(data: Data) -> Self { let data = data.as_intern_and_extra_offset(); Self::InternAndExtraOffset(data.0, data.1) } fn from_index_and_extra_offset(data: Data) -> Self { let data = data.as_index_and_extra_offset(); Self::IndexAndExtraOffset(data.0, data.1) } } impl From<(Tag, Data)> for ExpandedData { fn from((tag, data): (Tag, Data)) -> Self { match tag { Tag::FunctionProto => Self::from_index_and_extra_offset(data), Tag::ParameterList => Self::from_extra_range(data), Tag::Root => Self::from_extra_range(data), Tag::File => Self::from_extra_range(data), Tag::ArgumentList | Tag::VarDecl | Tag::MutVarDecl | Tag::VarDeclAssignment | Tag::MutVarDeclAssignment | Tag::BlockTrailingExpr | Tag::Block => Self::from_extra_range(data), Tag::FieldDecl | Tag::Constant | Tag::Parameter => Self::from_index_intern(data), Tag::Or | Tag::And | Tag::BitOr | Tag::BitXOr | Tag::BitAnd | Tag::Eq | Tag::NEq | Tag::Lt | Tag::Gt | Tag::Le | Tag::Ge | Tag::Shl | Tag::Shr | Tag::Add | Tag::Sub | Tag::Mul | Tag::Div | Tag::Rem | Tag::Assign | Tag::IfExpr | Tag::SubscriptExpr | Tag::CallExpr | Tag::ArrayType | Tag::FunctionDecl => Self::from_two_indices(data), Tag::ReturnExprStmt | Tag::DeclRef | Tag::TypeDeclRef | Tag::Argument | Tag::Deref | Tag::AddressOf | Tag::Not | Tag::Negate | Tag::ExprStmt => Self::from_index(data), Tag::FieldAccess | Tag::DeclRefUnresolved | Tag::TypeDeclRefUnresolved | Tag::NamedArgument | Tag::ExplicitCast => Self::from_index_intern(data), Tag::GlobalDecl => Self::from_intern_and_extra_offset(data), Tag::InternedType | Tag::StructDecl => Self::from_intern(data), Tag::PointerType | Tag::IfElseExpr => Self::from_index_and_extra_offset(data), Tag::Error => Self::from_error(data), Tag::ReturnStmt | Tag::Undefined => Self::from_none(data), } } } impl Data { fn as_error(self) -> ParseError { unsafe { self.error } } fn as_index(self) -> Index { unsafe { self.index } } fn as_two_indices(self) -> (Index, Index) { unsafe { self.two_indices } } fn as_index_range(self) -> (Index, Index) { unsafe { self.range } } fn as_extra_range(self) -> (usize, usize) { let (a, b) = unsafe { self.extra_range }; (a as usize, b as usize) } fn as_intern(self) -> intern::Index { unsafe { self.intern } } fn as_two_interns(self) -> (intern::Index, intern::Index) { unsafe { self.two_interns } } fn as_index_intern(self) -> (Index, intern::Index) { unsafe { self.index_intern } } fn as_index_and_extra_offset(self) -> (Index, usize) { let (i, e) = unsafe { self.index_and_extra_offset }; (i, e as usize) } fn as_intern_and_extra_offset(self) -> (intern::Index, usize) { let (i, e) = unsafe { self.intern_and_extra_offset }; (i, e as usize) } } impl Data { fn none() -> Self { Self { none: () } } fn error(error: ParseError) -> Self { Self { error } } fn index(index: Index) -> Self { Self { index } } fn two_indices(a: Index, b: Index) -> Self { Self { two_indices: (a, b), } } fn two_interns(a: intern::Index, b: intern::Index) -> Self { Self { two_interns: (a, b), } } fn range_of_indices(a: Index, b: Index) -> Self { Self { range: (a, b) } } fn extra_range(a: u32, b: u32) -> Self { Self { extra_range: (a, b), } } fn intern(intern: intern::Index) -> Self { Self { intern } } fn index_and_intern(index: Index, intern: intern::Index) -> Self { Self { index_intern: (index, intern), } } fn intern_and_extra_offset(intern: intern::Index, offset: u32) -> Self { Self { intern_and_extra_offset: (intern, offset), } } fn index_and_extra_offset(index: Index, offset: u32) -> Self { Self { index_and_extra_offset: (index, offset), } } } pub struct Ast { tags: Vec, datas: Vec, extra: Vec, source_locs: Vec, } impl Debug for Ast { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.debug_struct("Ast") .field_with("nodes", |f| { let mut list = f.debug_list(); struct LocDisplay(SourceLocation); impl Debug for LocDisplay { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "({})", self.0) } } let entries = self .tags .iter() .cloned() .zip(self.datas.iter().cloned()) .zip(self.source_locs.iter().cloned()) .enumerate() .map(|(i, ((tag, data), loc))| { (i, tag, ExpandedData::from((tag, data)), LocDisplay(loc)) }); list.entries(entries).finish() }) .field("extra", &self.extra) .finish() } } impl Ast { fn new() -> Ast { Self { tags: vec![Tag::Root], datas: vec![Data::extra_range(0, 0)], extra: vec![], source_locs: vec![SourceLocation::new(0, 0)], } } fn reserve_node(&mut self) -> Index { let i = unsafe { Index(NonZero::new_unchecked(self.tags.len() as u32)) }; self.tags.push(Tag::Undefined); self.datas.push(Data::none()); self.source_locs.push(SourceLocation::invalid()); i } fn get_loc(&self, index: Index) -> SourceLocation { self.source_locs[index.index()] } fn push_error(&mut self, error: ParseError, loc: SourceLocation) -> Index { let i = self.reserve_node(); self.set_tag_data_source_loc(i, Tag::Error, Data::error(error), loc); i } fn set_file>(&mut self, i: Index, decls: I, loc: SourceLocation) { let (extra_start, extra_end) = self.extend_extra_by_indices(decls); self.set_tag_data_source_loc(i, Tag::File, Data::extra_range(extra_start, extra_end), loc); } fn push_file>(&mut self, decls: I, loc: SourceLocation) -> Index { let i = self.reserve_node(); self.set_file(i, decls, loc); i } fn set_root>(&mut self, decls: I) { let (extra_start, extra_end) = self.extend_extra_by_indices(decls); self.tags[0] = Tag::Root; self.datas[0] = Data::extra_range(extra_start, extra_end); } fn get_root_file_indices<'a>(&'a self) -> impl Iterator + 'a { let (a, b) = self.datas[0].as_extra_range(); self.extra[a..b].iter().cloned().map(|i| Index::new(i)) } fn push_global_decl( &mut self, ident: intern::Index, ty: Index, expr: Index, loc: SourceLocation, ) -> Index { let i = self.reserve_node(); let (extra_start, _) = self.extend_extra([ty.into_u32(), expr.into_u32()]); self.set_tag_data_source_loc( i, Tag::GlobalDecl, Data::intern_and_extra_offset(ident, extra_start), loc, ); i } fn set_fn_decl(&mut self, i: Index, proto: Index, body: Index, loc: SourceLocation) { self.set_tag_data_source_loc(i, Tag::FunctionDecl, Data::two_indices(proto, body), loc); } fn push_fn_decl(&mut self, proto: Index, body: Index, loc: SourceLocation) -> Index { let i = self.reserve_node(); self.set_fn_decl(i, proto, body, loc); i } fn push_ret(&mut self, expr: Option, loc: SourceLocation) -> Index { let i = self.reserve_node(); match expr { Some(expr) => { self.set_tag_data_source_loc(i, Tag::ReturnExprStmt, Data::index(expr), loc) } None => self.set_tag_data_source_loc(i, Tag::ReturnStmt, Data::none(), loc), } i } fn push_var_decl( &mut self, is_let: bool, name: intern::Index, ty: Option, assignment: Option, loc: SourceLocation, ) -> Index { let i = self.reserve_node(); let start = self.extra.len() as u32; self.extra.push(name.into_u32()); _ = self.extend_extra(assignment.map(|i| i.into_u32())); _ = self.extend_extra(ty.map(|i| i.into_u32())); let end = self.extra.len() as u32; let tag = match (is_let, assignment.is_some()) { (true, false) => Tag::VarDecl, (true, true) => Tag::VarDeclAssignment, (false, false) => Tag::MutVarDecl, (false, true) => Tag::MutVarDeclAssignment, }; self.set_tag_data_source_loc(i, tag, Data::extra_range(start, end), loc); i } fn push_fn_proto( &mut self, ident: intern::Index, return_type: Index, parameter_list: Index, loc: SourceLocation, ) -> Index { let i = self.reserve_node(); let (extra_start, _) = self.extend_extra([return_type.into_u32(), parameter_list.into_u32()]); self.set_tag_data_source_loc( i, Tag::FunctionProto, Data::intern_and_extra_offset(ident, extra_start), loc, ); i } fn set_block>( &mut self, i: Index, statements: I, trailing: Option, loc: SourceLocation, ) { let (extra_start, extra_end) = self.extend_extra_by_indices(statements.into_iter().chain(trailing.into_iter())); if trailing.is_some() { self.set_tag_data_source_loc( i, Tag::BlockTrailingExpr, Data::extra_range(extra_start, extra_end), loc, ); } else { self.set_tag_data_source_loc( i, Tag::Block, Data::extra_range(extra_start, extra_end), loc, ); } } fn push_block>( &mut self, statements: I, trailing: Option, loc: SourceLocation, ) -> Index { let i = self.reserve_node(); self.set_block(i, statements, trailing, loc); i } fn push_parameter_list>( &mut self, parameters: I, loc: SourceLocation, ) -> Index { let i = self.reserve_node(); let (extra_start, extra_end) = self.extend_extra_by_indices(parameters); self.set_tag_data_source_loc( i, Tag::ParameterList, Data::extra_range(extra_start, extra_end), loc, ); i } fn push_argument(&mut self, expr: Index, loc: SourceLocation) -> Index { let i = self.reserve_node(); self.set_tag_data_source_loc(i, Tag::Argument, Data::index(expr), loc); i } fn push_named_argument( &mut self, name: intern::Index, expr: Index, loc: SourceLocation, ) -> Index { let i = self.reserve_node(); self.set_tag_data_source_loc( i, Tag::NamedArgument, Data::index_and_intern(expr, name), loc, ); i } fn push_parameter(&mut self, name: intern::Index, ty: Index, loc: SourceLocation) -> Index { let i = self.reserve_node(); self.set_tag_data_source_loc(i, Tag::Parameter, Data::index_and_intern(ty, name), loc); i } fn push_argument_list>( &mut self, args: I, loc: SourceLocation, ) -> Index { let i = self.reserve_node(); let (extra_start, extra_end) = self.extend_extra_by_indices(args); self.set_tag_data_source_loc( i, Tag::ArgumentList, Data::extra_range(extra_start, extra_end), loc, ); i } fn push_unary(&mut self, tag: Tag, lhs: Index, loc: SourceLocation) -> Index { let i = self.reserve_node(); self.set_tag_data_source_loc(i, tag, Data::index(lhs), loc); i } fn push_binary(&mut self, tag: Tag, lhs: Index, rhs: Index, loc: SourceLocation) -> Index { let i = self.reserve_node(); self.set_tag_data_source_loc(i, tag, Data::two_indices(lhs, rhs), loc); i } fn push_assign(&mut self, lhs: Index, rhs: Index, loc: SourceLocation) -> Index { let i = self.reserve_node(); self.set_tag_data_source_loc(i, Tag::Assign, Data::two_indices(lhs, rhs), loc); i } fn push_cast(&mut self, lhs: Index, ty: Index, loc: SourceLocation) -> Index { let i = self.reserve_node(); self.set_tag_data_source_loc(i, Tag::ExplicitCast, Data::two_indices(lhs, ty), loc); i } fn push_if(&mut self, cond: Index, body: Index, loc: SourceLocation) -> Index { let i = self.reserve_node(); self.set_tag_data_source_loc(i, Tag::IfExpr, Data::two_indices(cond, body), loc); i } fn push_if_else( &mut self, cond: Index, body: Index, other: Index, loc: SourceLocation, ) -> Index { let i = self.reserve_node(); let (extra_start, _) = self.extend_extra_by_indices([body, other]); self.set_tag_data_source_loc( i, Tag::IfElseExpr, Data::index_and_extra_offset(cond, extra_start), loc, ); i } fn push_call_expr(&mut self, lhs: Index, args: Index, loc: SourceLocation) -> Index { let i = self.reserve_node(); self.set_tag_data_source_loc(i, Tag::CallExpr, Data::two_indices(lhs, args), loc); i } fn push_decl_ref_unresolved( &mut self, scope: Index, ident: intern::Index, loc: SourceLocation, ) -> Index { let i = self.reserve_node(); self.set_tag_data_source_loc( i, Tag::DeclRefUnresolved, Data::index_and_intern(scope, ident), loc, ); i } fn resolve_decl_ref(&mut self, i: Index, decl: Index) { self.tags[i.index()] = Tag::DeclRef; self.datas[i.index()] = Data::index(decl); } fn push_struct_decl>( &mut self, name: intern::Index, flags: StructFlags, fields: I, loc: SourceLocation, ) -> Index { let i = self.reserve_node(); let (offset, _) = self.extend_extra([flags.pack()]); let (names, types) = fields .into_iter() .map(|(name, ty)| (name.into_u32(), ty.into_u32())) .unzip::<_, _, Vec<_>, Vec<_>>(); self.extend_extra(types); self.extend_extra(names); self.set_tag_data_source_loc( i, Tag::StructDecl, Data::intern_and_extra_offset(name, offset), loc, ); i } fn push_field_decl(&mut self, name: intern::Index, ty: Index, loc: SourceLocation) -> Index { let i = self.reserve_node(); self.set_tag_data_source_loc(i, Tag::FieldDecl, Data::index_and_intern(ty, name), loc); i } fn push_field_access( &mut self, expr: Index, name: intern::Index, loc: SourceLocation, ) -> Index { let i = self.reserve_node(); self.set_tag_data_source_loc(i, Tag::FieldAccess, Data::index_and_intern(expr, name), loc); i } fn push_interend_type(&mut self, ty: intern::Index, loc: SourceLocation) -> Index { let i = self.reserve_node(); self.set_tag_data_source_loc(i, Tag::InternedType, Data::intern(ty), loc); i } fn push_array_type( &mut self, length_expr: Index, pointer_ty: Index, loc: SourceLocation, ) -> Index { let i = self.reserve_node(); self.set_tag_data_source_loc( i, Tag::ArrayType, Data::two_indices(length_expr, pointer_ty), loc, ); i } fn push_pointer_type(&mut self, ty: Index, flags: PointerFlags, loc: SourceLocation) -> Index { let i = self.reserve_node(); self.set_tag_data_source_loc( i, Tag::PointerType, Data::index_and_extra_offset(ty, flags.pack() as u32), loc, ); i } fn push_type_ref_unresolved( &mut self, scope: Index, ident: intern::Index, loc: SourceLocation, ) -> Index { let i = self.reserve_node(); self.set_tag_data_source_loc( i, Tag::TypeDeclRefUnresolved, Data::index_and_intern(scope, ident), loc, ); i } fn resolve_type_ref(&mut self, i: Index, decl: Index) { self.tags[i.index()] = Tag::TypeDeclRef; self.datas[i.index()] = Data::index(decl); } fn push_expr_stmt(&mut self, expr: Index) -> Index { let i = self.reserve_node(); let loc = self.get_loc(expr); self.set_tag_data_source_loc(i, Tag::ExprStmt, Data::index(expr), loc); i } fn push_constant(&mut self, value: intern::Index, ty: Index, loc: SourceLocation) -> Index { let i = self.reserve_node(); self.set_tag_data_source_loc(i, Tag::Constant, Data::index_and_intern(ty, value), loc); i } fn extend_extra_by_indices>(&mut self, indices: I) -> (u32, u32) { self.extend_extra(indices.into_iter().map(|i| i.0.get())) } fn extend_extra>(&mut self, words: I) -> (u32, u32) { let i = self.extra.len() as u32; self.extra.extend(words); (i, self.extra.len() as u32) } fn set_tag_data_source_loc(&mut self, index: Index, tag: Tag, data: Data, loc: SourceLocation) { self.tags[index.index()] = tag; self.datas[index.index()] = data; self.source_locs[index.index()] = loc; } } struct Children(Vec); impl Display for Children { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "[")?; if let Some((last, rest)) = self.0.split_last() { for i in rest { write!(f, "{i}, ")?; } write!(f, "{last}")?; } write!(f, "]") } } type TypeCache = BTreeMap; impl Ast { fn get_type_of_node( &self, ip: &InternPool, cache: &mut TypeCache, index: Index, ) -> intern::Index { if let Some(ty) = cache.get(&index) { return *ty; } let void = ip.get_void_type(); let tag = self.tags[index.index()]; let data = self.datas[index.index()]; let ty = match tag { Tag::ArgumentList | Tag::ExprStmt | Tag::ReturnExprStmt | Tag::Block | Tag::ParameterList | Tag::File => void, Tag::VarDeclAssignment | Tag::MutVarDeclAssignment => { let (_, b) = data.as_extra_range(); self.get_type_of_node(ip, cache, Index::new(self.extra[b - 1])) } Tag::VarDecl | Tag::MutVarDecl => { let (a, b) = data.as_extra_range(); self.get_type_of_node(ip, cache, Index::new(self.extra[a + 1])) } Tag::GlobalDecl => { let (_, a) = data.as_intern_and_extra_offset(); self.get_type_of_node(ip, cache, Index::new(self.extra[a])) } Tag::FunctionDecl => self.get_type_of_node(ip, cache, data.as_two_indices().0), Tag::FunctionProto => { let (_, i) = data.as_intern_and_extra_offset(); let return_type = { self.datas[self.extra[i] as usize].as_intern() }; let parameters = { let (a, b) = self.datas[self.extra[i + 1] as usize].as_extra_range(); self.extra[a..b].iter().map(|&i| { // i is index to a parameter, a parameter is (index, intern) let ty = self.datas[i as usize].as_index_intern().0; self.datas[ty.index()].as_intern() }) }; ip.try_get_function_type(return_type, parameters).unwrap() } Tag::BlockTrailingExpr => { let (a, b) = data.as_extra_range(); self.get_type_of_node(ip, cache, Index::new(self.extra[b - 1])) } Tag::CallExpr => { let (expr, _args) = data.as_two_indices(); let fn_ty = self.get_type_of_node(ip, cache, expr); if let intern::Key::FunctionType { return_type, .. } = ip.get_key(fn_ty) { return_type } else { eprintln!("lhs of call expr is not a function!"); void } } Tag::Argument => self.get_type_of_node(ip, cache, data.as_index()), Tag::NamedArgument => { let (a, _) = data.as_index_intern(); self.get_type_of_node(ip, cache, a) } Tag::ExplicitCast => { let (_, a) = data.as_two_indices(); self.get_type_of_node(ip, cache, a) } Tag::FieldAccess => { let (ty_expr, name) = data.as_index_intern(); let ty = self.get_type_of_node(ip, cache, ty_expr); match ip.get_key(ty) { intern::Key::PointerType { pointee, .. } if let intern::Key::StructType { fields, .. } = ip.get_key(pointee) => { fields .iter() .cloned() .find(|(n, _)| n == &name) .map(|(_, t)| t) .unwrap_or(void) } intern::Key::StructType { fields, .. } => fields .iter() .cloned() .find(|(n, _)| n == &name) .map(|(_, t)| t) .unwrap_or(void), _ => { unimplemented!() } } } Tag::Deref => { let ty = self.get_type_of_node(ip, cache, data.as_index()); if let intern::Key::PointerType { pointee, .. } = ip.get_key(ty) { pointee } else { eprintln!("lhs of deref is not a pointer!"); void } } Tag::SubscriptExpr => { let ty = self.get_type_of_node(ip, cache, data.as_two_indices().0); match ip.get_key(ty) { intern::Key::PointerType { pointee, .. } | intern::Key::ArrayType { pointee, .. } => pointee, _ => { eprintln!("lhs of subscript is not an array or pointer!"); void } } } Tag::AddressOf => { let ty = self.get_type_of_node(ip, cache, data.as_index()); // TODO: find out of the expression is const, volatile for flags ip.try_get_pointer_type(ty, None).unwrap() } Tag::Not | Tag::Negate => self.get_type_of_node(ip, cache, data.as_index()), Tag::Or | Tag::And | Tag::BitOr | Tag::BitXOr | Tag::BitAnd | Tag::Eq | Tag::NEq | Tag::Lt | Tag::Gt | Tag::Le | Tag::Ge | Tag::Shl | Tag::Shr | Tag::Add | Tag::Sub | Tag::Mul | Tag::Div | Tag::Rem => self.get_type_of_node(ip, cache, data.as_two_indices().0), Tag::IfExpr => ip.get_bool_type(), // really? Tag::IfElseExpr => { let (_, b) = data.as_index_and_extra_offset(); let if_ = Index::new(self.extra[b]); self.get_type_of_node(ip, cache, if_) } Tag::Constant | Tag::Parameter => { self.get_type_of_node(ip, cache, data.as_index_intern().0) } Tag::DeclRef => self.get_type_of_node(ip, cache, data.as_index()), Tag::StructDecl => { let (name, _) = data.as_intern_and_extra_offset(); ip.try_get_struct_type(name, index).unwrap() } Tag::Assign | Tag::Root | Tag::DeclRefUnresolved | Tag::Error | Tag::Undefined | Tag::ReturnStmt => void, Tag::FieldDecl => self.get_type_of_node(ip, cache, data.as_index_intern().0), Tag::InternedType => data.as_intern(), Tag::TypeDeclRef | Tag::TypeDeclRefUnresolved | Tag::PointerType | Tag::ArrayType => { unreachable!() } }; cache.insert(index, ty); ty } fn get_node_children(&self, index: Index) -> Vec { let tag = self.tags[index.index()]; let data = self.datas[index.index()]; match tag { Tag::File => { let (a, b) = data.as_extra_range(); self.extra[a..b].iter().map(|&i| Index::new(i)).collect() } Tag::FunctionProto => { let (_, i) = data.as_intern_and_extra_offset(); self.extra[i..=i + 1] .iter() .map(|&i| Index::new(i)) .collect() } Tag::FunctionDecl => { let (a, b) = data.as_two_indices(); vec![a, b] } Tag::ParameterList => { let (a, b) = data.as_extra_range(); self.extra[a..b].iter().map(|&i| Index::new(i)).collect() } Tag::Block | Tag::BlockTrailingExpr => { let (a, b) = data.as_extra_range(); self.extra[a..b].iter().map(|&i| Index::new(i)).collect() } Tag::ExprStmt | Tag::ReturnExprStmt => { let a = data.as_index(); vec![a] } Tag::VarDeclAssignment | Tag::MutVarDeclAssignment => { let (a, b) = data.as_extra_range(); self.extra[a + 1..b] .iter() .map(|&i| Index::new(i)) .collect() } Tag::GlobalDecl => { let (_, offset) = data.as_intern_and_extra_offset(); self.extra[offset..=offset + 1] .iter() .map(|&i| Index::new(i)) .collect() } Tag::CallExpr | Tag::ExplicitCast => { let (a, b) = data.as_two_indices(); vec![a, b] } Tag::ArgumentList => { let (a, b) = data.as_extra_range(); self.extra[a..b].iter().map(|&i| Index::new(i)).collect() } Tag::Argument => { let a = data.as_index(); vec![a] } Tag::FieldDecl | Tag::FieldAccess | Tag::NamedArgument => { let (a, _) = data.as_index_intern(); vec![a] } Tag::Deref | Tag::AddressOf | Tag::Not | Tag::Negate => { let a = data.as_index(); vec![a] } Tag::Or | Tag::And | Tag::BitOr | Tag::BitXOr | Tag::BitAnd | Tag::Eq | Tag::NEq | Tag::Lt | Tag::Gt | Tag::Le | Tag::Ge | Tag::Shl | Tag::Shr | Tag::Add | Tag::Sub | Tag::Mul | Tag::Div | Tag::Rem | Tag::Assign | Tag::SubscriptExpr | Tag::ArrayType | Tag::IfExpr => { let (a, b) = data.as_two_indices(); vec![a, b] } Tag::IfElseExpr => { let (a, b) = data.as_index_and_extra_offset(); let if_ = Index::new(self.extra[b]); let else_ = Index::new(self.extra[b + 1]); vec![a, if_, else_] } Tag::PointerType => { let (a, _) = data.as_index_and_extra_offset(); vec![a] } Tag::StructDecl => { let (a, offset) = data.as_intern_and_extra_offset(); let flags = StructFlags::unpack(self.extra[offset]); self.extra[offset + 1..(offset + 1 + flags.num_fields as usize)] .iter() .map(|&i| Index::new(i)) .collect() } Tag::InternedType | Tag::Root | Tag::TypeDeclRefUnresolved | Tag::DeclRefUnresolved | Tag::Error | Tag::Undefined | Tag::TypeDeclRef | Tag::DeclRef | Tag::ReturnStmt => vec![], Tag::Parameter | Tag::Constant => { let (a, _) = data.as_index_intern(); vec![a] } Tag::VarDecl | Tag::MutVarDecl => { let (a, _) = data.as_extra_range(); vec![Index::new(self.extra[a + 1])] } } } fn comptime_value_of_node( &self, ip: &InternPool, pointer_bits: u16, cache: &mut TypeCache, index: Index, ) -> crate::comptime::ComptimeNumber { let tag = self.tags[index.index()]; let data = self.datas[index.index()]; match tag { Tag::Root => todo!(), Tag::File => todo!(), Tag::FunctionProto => todo!(), Tag::FunctionDecl => todo!(), Tag::ParameterList => todo!(), Tag::Parameter => todo!(), Tag::Block => todo!(), Tag::BlockTrailingExpr => todo!(), Tag::Constant => { let (ty, value) = data.as_index_intern(); let ty = self.get_type_of_node(ip, cache, ty); interned_type_and_value_to_comptime_number(ip, pointer_bits, ty, value) } Tag::ExprStmt => todo!(), Tag::ReturnStmt => todo!(), Tag::ReturnExprStmt => todo!(), Tag::VarDecl => todo!(), Tag::MutVarDecl => todo!(), Tag::VarDeclAssignment => todo!(), Tag::MutVarDeclAssignment => todo!(), Tag::GlobalDecl => todo!(), Tag::StructDecl => todo!(), Tag::FieldDecl => todo!(), Tag::DeclRef => todo!(), Tag::DeclRefUnresolved => todo!(), Tag::InternedType => todo!(), Tag::TypeDeclRef => todo!(), Tag::TypeDeclRefUnresolved => todo!(), Tag::PointerType => todo!(), Tag::ArrayType => todo!(), Tag::CallExpr => todo!(), Tag::FieldAccess => todo!(), Tag::ArgumentList => todo!(), Tag::Argument => todo!(), Tag::NamedArgument => todo!(), Tag::ExplicitCast => todo!(), Tag::Deref => todo!(), Tag::AddressOf => todo!(), Tag::Not => todo!(), Tag::Negate => todo!(), Tag::Or => todo!(), Tag::And => todo!(), Tag::BitOr => todo!(), Tag::BitXOr => todo!(), Tag::BitAnd => todo!(), Tag::Eq => todo!(), Tag::NEq => todo!(), Tag::Lt => todo!(), Tag::Gt => todo!(), Tag::Le => todo!(), Tag::Ge => todo!(), Tag::Shl => todo!(), Tag::Shr => todo!(), Tag::Add => todo!(), Tag::Sub => todo!(), Tag::Mul => todo!(), Tag::Div => todo!(), Tag::Rem => todo!(), Tag::Assign => todo!(), Tag::SubscriptExpr => todo!(), Tag::IfExpr => todo!(), Tag::IfElseExpr => todo!(), Tag::Error => todo!(), Tag::Undefined => todo!(), } } } fn interned_type_and_value_to_comptime_number( ip: &InternPool, pointer_bits: u16, ty: intern::Index, val: intern::Index, ) -> crate::comptime::ComptimeNumber { use crate::ast::IntegralType; use crate::comptime::*; let ty_key = ip.get_key(ty); match ty_key { intern::Key::SIntType { bits } | intern::Key::UIntType { bits } => { let ty = IntegralType::new(false, bits); match ip.get_key(val) { intern::Key::SIntSmall { bits } => ComptimeNumber::Integral(ComptimeInt::Native { bits: bits as _, ty, }), intern::Key::UIntSmall { bits } => ComptimeNumber::Integral(ComptimeInt::Native { bits: bits as _, ty, }), intern::Key::SInt64 { bits } => ComptimeNumber::Integral(ComptimeInt::Native { bits: bits as _, ty, }), intern::Key::UInt64 { bits } => ComptimeNumber::Integral(ComptimeInt::Native { bits: bits as _, ty, }), intern::Key::PositiveInt { bigint } => { ComptimeNumber::Integral(ComptimeInt::BigInt { bits: bigint, ty }) } intern::Key::NegativeInt { bigint } => { ComptimeNumber::Integral(ComptimeInt::BigInt { bits: bigint, ty }) } _ => { unreachable!() } } } intern::Key::SimpleType { ty } => match ty { intern::SimpleType::F32 => match ip.get_key(val) { intern::Key::F32 { bits } => { ComptimeNumber::Floating(ComptimeFloat::Binary32(bits)) } _ => { unreachable!() } }, intern::SimpleType::F64 => match ip.get_key(val) { intern::Key::F64 { bits } => { ComptimeNumber::Floating(ComptimeFloat::Binary64(bits)) } _ => { unreachable!() } }, intern::SimpleType::Bool => match ip.get_key(val) { intern::Key::TrueValue => ComptimeNumber::Bool(true), intern::Key::FalseValue => ComptimeNumber::Bool(false), _ => unreachable!(), }, intern::SimpleType::Void => todo!(), intern::SimpleType::USize | intern::SimpleType::ISize => { let ty = IntegralType::new( matches!( ty_key, intern::Key::SimpleType { ty: intern::SimpleType::ISize } ), pointer_bits, ); match ip.get_key(val) { intern::Key::SIntSmall { bits } => { ComptimeNumber::Integral(ComptimeInt::Native { bits: bits as _, ty, }) } intern::Key::UIntSmall { bits } => { ComptimeNumber::Integral(ComptimeInt::Native { bits: bits as _, ty, }) } intern::Key::SInt64 { bits } => ComptimeNumber::Integral(ComptimeInt::Native { bits: bits as _, ty, }), intern::Key::UInt64 { bits } => ComptimeNumber::Integral(ComptimeInt::Native { bits: bits as _, ty, }), intern::Key::PositiveInt { bigint } => { ComptimeNumber::Integral(ComptimeInt::BigInt { bits: bigint, ty }) } intern::Key::NegativeInt { bigint } => { ComptimeNumber::Integral(ComptimeInt::BigInt { bits: bigint, ty }) } _ => { unreachable!() } } } intern::SimpleType::ComptimeInt => { let bigint = match ip.get_key(val) { intern::Key::SIntSmall { bits } => { BigInt::from_signed_bytes_le(&bits.to_le_bytes()) } intern::Key::UIntSmall { bits } => { BigInt::from_signed_bytes_le(&bits.to_le_bytes()) } intern::Key::SInt64 { bits } => { BigInt::from_signed_bytes_le(&bits.to_le_bytes()) } intern::Key::UInt64 { bits } => { BigInt::from_signed_bytes_le(&bits.to_le_bytes()) } intern::Key::PositiveInt { bigint } | intern::Key::NegativeInt { bigint } => { bigint } _ => { unreachable!() } }; ComptimeNumber::Integral(ComptimeInt::Comptime(bigint)) } }, _ => { unreachable!() } } } pub struct AstRenderer<'a> { ast: &'a Ast, #[allow(dead_code)] syms: &'a crate::symbol_table::syms2::Symbols, ip: &'a InternPool, scopes: Vec, cache: TypeCache, } impl<'a> AstRenderer<'a> { pub fn new( ast: &'a Ast, ip: &'a InternPool, syms: &'a crate::symbol_table::syms2::Symbols, ) -> Self { Self { ast, syms, ip, scopes: Vec::new(), cache: TypeCache::new(), } } fn render_node( &mut self, w: &mut W, indent: u32, node: Index, ) -> core::fmt::Result { let tag = self.ast.tags[node.index()]; let loc = self.ast.source_locs[node.index()]; match tag { Tag::File | Tag::FunctionDecl | Tag::Block | Tag::BlockTrailingExpr => { self.scopes.push(node); } _ => {} } let children = Children(self.ast.get_node_children(node)); let ty = self.ast.get_type_of_node(self.ip, &mut self.cache, node); writeln_indented!(indent, w, "{node} ({ty}) = ({loc}) {tag:?} {}", children)?; for child in children.0 { self.render_node(w, indent + 1, child)?; } match tag { Tag::File | Tag::FunctionDecl | Tag::Block | Tag::BlockTrailingExpr => { self.scopes.pop(); } _ => {} } Ok(()) } fn render(&mut self, w: &mut W) -> core::fmt::Result { for file in self.ast.get_root_file_indices() { self.render_node(w, 0, file)?; } Ok(()) } } pub mod ast_gen { use intern::{PointerFlags, SimpleType}; use itertools::Itertools; use num_bigint::{BigInt, BigUint}; use crate::{ common::from_lo_hi_dwords, comptime, lexer::{Radix, TokenItem, TokenIterator}, symbol_table::syms2::SymbolKind, tokens::PRECEDENCE_MAP, variant, }; use super::*; #[derive(Debug)] pub struct ErrorInfo { error: ParseError, loc: SourceLocation, } #[derive(Debug)] pub struct Parser { pub ast: Ast, pub intern: intern::InternPool, pub syms: crate::symbol_table::syms2::Symbols, scopes: Vec, pub errors: Vec, } type ParseResult = core::result::Result; impl Display for Parser { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { self.display().render(f) } } impl Parser { pub fn new() -> Parser { Self { ast: Ast::new(), intern: intern::InternPool::create(), syms: crate::symbol_table::syms2::Symbols::new(), scopes: Vec::new(), errors: Vec::new(), } } pub fn display(&self) -> AstRenderer<'_> { AstRenderer::new(&self.ast, &self.intern, &self.syms) } pub fn fold_and_typecheck(&mut self) {} pub fn intern_types(&mut self) { let mut nodes = self .ast .get_root_file_indices() .map(|i| A::PushChildren(i)) .collect::>(); enum A { PushChildren(Index), PopSelf(Index), } while let Some(node) = nodes.pop() { match node { A::PushChildren(i) => { nodes.push(A::PopSelf(i)); nodes.extend( self.ast .get_node_children(i) .into_iter() .map(|i| A::PushChildren(i)), ); } A::PopSelf(i) => { let tag = self.ast.tags[i.index()]; let data = self.ast.datas[i.index()]; match tag { Tag::ArrayType => { let (length, pointee) = data.as_two_indices(); let pointee = self.ast.datas[pointee.index()].as_intern(); variant!( self.intern.get_key(pointee) => intern::Key::PointerType { pointee, flags }); let length = { let value = self.ast.datas[length.index()].as_index_intern().1; match self.intern.get_key(value) { intern::Key::SIntSmall { bits } => bits as u32, intern::Key::UIntSmall { bits } => bits as u32, intern::Key::SInt64 { bits } => bits as u32, intern::Key::UInt64 { bits } => bits as u32, intern::Key::NegativeInt { bigint } | intern::Key::PositiveInt { bigint } => { bigint.iter_u32_digits().next().unwrap_or(0) } _ => 0, } }; let ty = self.intern.get_array_type(pointee, Some(flags), length); self.ast.tags[i.index()] = Tag::InternedType; self.ast.datas[i.index()] = Data::intern(ty); } Tag::PointerType => { let (pointee, flags) = data.as_index_and_extra_offset(); let pointee = self.ast.datas[pointee.index()].as_intern(); let ty = self.intern.get_pointer_type( pointee, Some(PointerFlags::unpack(flags as u8)), ); self.ast.tags[i.index()] = Tag::InternedType; self.ast.datas[i.index()] = Data::intern(ty); } Tag::TypeDeclRef => { let decl = data.as_index(); let (name, _) = self.ast.datas[decl.index()].as_intern_and_extra_offset(); let ty = self.intern.get_struct_type(name, decl); self.ast.tags[i.index()] = Tag::InternedType; self.ast.datas[i.index()] = Data::intern(ty); } Tag::FunctionProto => { let (_, i) = data.as_intern_and_extra_offset(); let return_type = self.ast.get_type_of_node( &self.intern, &mut TypeCache::new(), Index::new(self.ast.extra[i]), ); let parameters = { let (a, b) = self.ast.datas[self.ast.extra[i + 1] as usize] .as_extra_range(); self.ast.extra[a..b].iter().map(|&i| { // i is index to a parameter, a parameter is (index, intern) let ty = self.ast.datas[i as usize].as_index_intern().0; self.ast.datas[ty.index()].as_intern() }) }; self.intern.get_function_type(return_type, parameters); } Tag::StructDecl => { let (name, offset) = data.as_intern_and_extra_offset(); let flags = StructFlags::unpack(self.ast.extra[offset]); let types = (offset + 1)..(offset + 1 + flags.num_fields as usize); let names = (offset + 1 + flags.num_fields as usize) ..(offset + 1 + flags.num_fields as usize * 2); let types = self.ast.extra[types] .iter() .map(|&i| Index::new(i)) .map(|i| self.ast.datas[i.index()].as_intern()); let names = self.ast.extra[names].iter().map(|&i| intern::Index(i)); self.intern.insert_or_replace_struct_type( name, i, flags.packed, flags.c_like, names.zip(types), ); } _ => {} } } } } } pub fn resolve_decl_refs(&mut self) { let mut nodes = self.ast.get_root_file_indices().collect::>(); let mut scopes = Vec::new(); while let Some(node) = nodes.pop() { match self.ast.tags[node.index()] { Tag::File | Tag::FunctionDecl | Tag::Block | Tag::BlockTrailingExpr => { scopes.push(node); } _ => {} } let children = self.ast.get_node_children(node); nodes.extend(children.into_iter().rev()); match self.ast.tags[node.index()] { Tag::File | Tag::FunctionDecl | Tag::Block | Tag::BlockTrailingExpr => { scopes.pop(); } Tag::TypeDeclRefUnresolved => { let (scope, name) = self.ast.datas[node.index()].as_index_intern(); // look in my_scope if let Some(decl) = self.syms.find_type_symbol( scope, name, self.ast.source_locs[node.index()], ) { self.ast.resolve_type_ref(node, decl) }; } Tag::DeclRefUnresolved => { let (scope, name) = self.ast.datas[node.index()].as_index_intern(); // look in my_scope if let Some(decl) = self.syms .find_symbol(scope, name, self.ast.source_locs[node.index()]) { self.ast.resolve_decl_ref(node, decl) }; } _ => {} } } } fn current_scope(&self) -> Index { self.scopes.last().cloned().unwrap() } fn parse_ident(&mut self, tokens: &mut TokenIterator) -> Result { let ident = tokens.expect_token(Token::Ident).map_err(|_| ErrorInfo { error: ParseError::ExpectedIdent, loc: tokens.current_source_location(), })?; let name = self.intern.get_or_insert(intern::Key::String { str: ident.lexeme(), }); Ok(name) } fn parse_pointer(&mut self, tokens: &mut TokenIterator) -> ParseResult { let loc = tokens.current_source_location(); tokens.eat_token(Token::Star).ok_or(ErrorInfo { error: ParseError::ExpectedToken(Token::Star), loc: tokens.current_source_location(), })?; let &[cnst, vol, noalias] = &tokens.eat_all_zero_or_once(&[Token::Const, Token::Volatile, Token::Noalias])[..3] else { unreachable!() }; let pointee = self.parse_type(tokens)?; Ok(self .ast .push_pointer_type(pointee, PointerFlags::new(cnst, vol, noalias), loc)) } /// [LENGTH]const? volatile? noalias? TYPE fn parse_array_type(&mut self, tokens: &mut TokenIterator) -> ParseResult { let loc = tokens.current_source_location(); let length_expr = self.parse_bracketed(tokens, |this, tokens| { let next = tokens.peek_token().ok_or(ErrorInfo { error: ParseError::UnexpectedEndOfTokens, loc: tokens.current_source_location(), })?; match next.token() { Token::IntegerBinConstant | Token::IntegerHexConstant | Token::IntegerOctConstant | Token::IntegerConstant => { _ = tokens.next(); Ok(this.parse_integral_constant(&next, next.source_location())) } _ => Err(ErrorInfo { error: ParseError::ExpectedConstantLiteral, loc: tokens.current_source_location(), }), } })?; let &[cnst, vol, noalias] = &tokens.eat_all_zero_or_once(&[Token::Const, Token::Volatile, Token::Noalias])[..3] else { unreachable!() }; let pointee = self.parse_type(tokens)?; let pointer = self.ast .push_pointer_type(pointee, PointerFlags::new(cnst, vol, noalias), loc); Ok(self.ast.push_array_type(length_expr, pointer, loc)) } fn parse_simple_type(&mut self, token: Token) -> Option { match token { Token::Void => Some(self.intern.get_assume_present(&intern::Key::SimpleType { ty: SimpleType::Void, })), Token::Bool => Some(self.intern.get_assume_present(&intern::Key::SimpleType { ty: SimpleType::Bool, })), Token::F32 => Some(self.intern.get_assume_present(&intern::Key::SimpleType { ty: SimpleType::F32, })), Token::F64 => Some(self.intern.get_assume_present(&intern::Key::SimpleType { ty: SimpleType::F64, })), Token::USize => Some(self.intern.get_assume_present(&intern::Key::SimpleType { ty: SimpleType::USize, })), Token::ISize => Some(self.intern.get_assume_present(&intern::Key::SimpleType { ty: SimpleType::ISize, })), _ => None, } } fn try_parse_integral_type( &mut self, typename: &str, ) -> Result, ParseError> { let mut iter = typename.chars().peekable(); let signed = match iter.next() { Some('u') => false, Some('i') => true, _ => { return Ok(None); } }; // need 1 digit for an integral type if iter.peek().map(|&c| crate::common::is_digit(c)) != Some(true) { return Ok(None); } // need no nondigits after digits if iter .clone() .skip_while(|&c| crate::common::is_digit(c)) .next() .is_some() { return Ok(None); } let mut bits = 0u16; loop { let Some(digit) = iter.next().map(|c| c as u8 - b'0') else { break; }; match bits .checked_mul(10) .and_then(|bits| bits.checked_add(digit as u16)) { Some(val) => { bits = val; } None => { // this IS an integral type, but it is bigger than u/i65535 return Err(ParseError::IntegralTypeTooWide); } } } Ok(Some(self.intern.get_int_type(signed, bits))) } fn parse_integral_constant_inner( &mut self, item: &TokenItem, ) -> (intern::Index, intern::Index) { let radix = Radix::from_token(item.token()).unwrap(); let mut chars = item.lexeme().char_indices(); match radix { Radix::Dec => {} _ => { _ = chars.advance_by(2); } } let digits = chars .take_while_ref(|&(_, c)| radix.is_digit()(c) || c == '_') .filter(|&(_, c)| c != '_') .map(|(_, c)| c) .collect::>(); let value = comptime::bigint::parse_bigint(digits.into_iter(), radix); let ty = match chars.clone().next() { Some((i, 'u')) | Some((i, 'i')) => self .try_parse_integral_type(&item.lexeme()[i..]) .expect("invalid integral type??"), _ => None, }; let interned = match value.len() { ..1 => { let bits = value.get(0).cloned().unwrap_or(0); self.intern.get_or_insert(intern::Key::UIntSmall { bits }) } ..2 => { let lo = value.get(0).cloned().unwrap_or(0); let hi = value.get(1).cloned().unwrap_or(0); let bits = from_lo_hi_dwords(lo, hi); self.intern.get_or_insert(intern::Key::UInt64 { bits }) } _ => { let bigint = BigInt::from_biguint(num_bigint::Sign::Plus, BigUint::new(value)); self.intern .get_or_insert(intern::Key::PositiveInt { bigint }) } }; let ty = ty.unwrap_or(self.intern.get_comptime_int_type()); (interned, ty) } fn parse_integral_constant(&mut self, item: &TokenItem, loc: SourceLocation) -> Index { let (interned, ty) = self.parse_integral_constant_inner(item); let ty = self.ast.push_interend_type(ty, loc); return self.ast.push_constant(interned, ty, loc); } fn parse_floating_constant(&mut self, item: &TokenItem, loc: SourceLocation) -> Index { let lexeme = item.lexeme(); let lexeme = lexeme .strip_suffix("f32") .map(|l| (l, self.intern.get_f32_type())) .unwrap_or( lexeme .strip_suffix("f64") .map(|l| (l, self.intern.get_f64_type())) .unwrap_or((lexeme, self.intern.get_f64_type())), ); let bits = if lexeme.1 == self.intern.get_f32_type() { self.intern.get_or_insert(intern::Key::F32 { bits: lexeme.0.parse::().unwrap(), }) } else { self.intern.get_or_insert(intern::Key::F64 { bits: lexeme.0.parse::().unwrap(), }) }; let ty = self.ast.push_interend_type(lexeme.1, loc); return self.ast.push_constant(bits, ty, loc); } /// TYPE <- /// * TYPE /// IDENTIFIER /// SIMPLE_TYPE /// [ TYPE ; CONSTANT_EXPR ] /// INTEGRAL_TYPE // u[0..65535] | i[0..65535] fn parse_type(&mut self, tokens: &mut TokenIterator) -> ParseResult { let loc = tokens.current_source_location(); match tokens .peek_token() .ok_or(ErrorInfo { error: ParseError::ExpectedTypeName, loc: tokens.current_source_location(), })? .token() { Token::Star => self.parse_pointer(tokens), Token::OpenSquareBracket => self.parse_array_type(tokens), Token::Ident => { let token = tokens.next().unwrap(); match self .try_parse_integral_type(token.lexeme()) .map_err(|error| ErrorInfo { error, loc: token.source_location(), })? { Some(int) => Ok(self.ast.push_interend_type(int, loc)), None => { let name = self.intern.get_or_insert(intern::Key::String { str: token.lexeme(), }); // TODO: this will cause issues with redefinitions of types with the same name // and actually, make type into a proper node of the ast Ok(self .ast .push_type_ref_unresolved(self.current_scope(), name, loc)) } } } token => { let ty = self.parse_simple_type(token).ok_or(ErrorInfo { error: ParseError::ExpectedTypeName, loc: tokens.current_source_location(), })?; _ = tokens.next(); Ok(self.ast.push_interend_type(ty, loc)) } } } /// GLOBAL_DECL <- /// const IDENTIFIER: TYPENAME = EXPR; fn parse_const_decl(&mut self, tokens: &mut TokenIterator) -> ParseResult { let err = 'blk: { let loc = tokens.current_source_location(); let Some(_) = tokens.eat_token(Token::Const) else { break 'blk ErrorInfo { error: ParseError::ExpectedToken(Token::Const), loc, }; }; let ident = match self.parse_ident(tokens) { Ok(i) => i, Err(err) => { break 'blk err; } }; let Some(_) = tokens.eat_token(Token::Colon) else { return Err(ErrorInfo { error: ParseError::ExpectedToken(Token::Colon), loc, }); }; let typename = match self.parse_type(tokens) { Ok(i) => i, Err(err) => { break 'blk err; } }; let Some(_) = tokens.eat_token(Token::Equal) else { break 'blk ErrorInfo { error: ParseError::ExpectedToken(Token::Equal), loc: tokens.current_source_location(), }; }; let expr = match self.parse_expr(tokens) { Ok(i) => i, Err(err) => { break 'blk err; } }; let Some(_) = tokens.eat_token(Token::Semi) else { break 'blk ErrorInfo { error: ParseError::ExpectedToken(Token::Semi), loc: tokens.current_source_location(), }; }; let decl = self.ast.push_global_decl(ident, typename, expr, loc); self.syms .insert_symbol(self.current_scope(), ident, SymbolKind::Const, decl); return Ok(decl); }; tokens.advance_past_semi().ok_or(ErrorInfo { error: ParseError::ExpectedToken(Token::Semi), loc: tokens.current_source_location(), })?; Ok(self.ast.push_error(err.error, err.loc)) } /// FUNCTION_PROTO <- /// fn IDENTIFIER () /// fn IDENTIFIER () -> TYPENAME /// fn IDENTIFIER ( PARAMETER_LIST ,? ) /// fn IDENTIFIER ( PARAMETER_LIST ,? ) -> TYPENAME fn parse_fn_proto(&mut self, tokens: &mut TokenIterator) -> ParseResult { let loc = tokens.current_source_location(); let _ = tokens.eat_token(Token::Fn).ok_or(ErrorInfo { error: ParseError::ExpectedToken(Token::Fn), loc, })?; let ident = self.parse_ident(tokens)?; let parameters = self.parse_parenthesised(tokens, |this, tokens| { if tokens.is_next_token(Token::CloseParens) { Ok(this.ast.push_parameter_list([], loc)) } else { this.parse_parameter_list(tokens) } })?; let return_type = if let Some(_) = tokens.eat_token(Token::MinusGreater) { self.parse_type(tokens)? } else { self.ast.push_interend_type( self.intern.get_void_type(), tokens.current_source_location(), ) }; return Ok(self.ast.push_fn_proto(ident, return_type, parameters, loc)); } fn parse_fn_inner(&mut self, tokens: &mut TokenIterator) -> ParseResult { let loc = tokens.current_source_location(); let func = self.ast.reserve_node(); self.push_scope(func, intern::Index::invalid()); let proto = self.parse_fn_proto(tokens).map_err(|e| { self.pop_scope(); e })?; let body = self.parse_block(tokens).map_err(|e| { self.pop_scope(); e })?; self.pop_scope(); self.ast.set_fn_decl(func, proto, body, loc); Ok(func) } /// FUNCTION_DECL <- /// FUNCTION_PROTO BLOCK fn parse_fn_decl(&mut self, tokens: &mut TokenIterator) -> Index { match self.parse_fn_inner(tokens) { Ok(i) => i, Err(err) => { self.find_next_fn_or_const(tokens); self.push_error(err.error, err.loc) } } } /// RETURN_STATEMENT <- /// return EXPRESSION? ; fn parse_return_stmt(&mut self, tokens: &mut TokenIterator) -> ParseResult { // SAFETY: function invariance let ret = tokens.next().unwrap(); let loc = ret.source_location(); let expr = if tokens.eat_token(Token::Semi).is_some() { self.ast.push_ret(None, loc) } else { match self.parse_expr(tokens) { Ok(i) => { tokens.eat_token(Token::Semi).ok_or(ErrorInfo { error: ParseError::ExpectedToken(Token::Semi), loc: tokens.current_source_location(), })?; self.ast.push_ret(Some(i), loc) } Err(err) => { tokens.advance_past_semi().ok_or(ErrorInfo { error: ParseError::ExpectedToken(Token::Semi), loc: tokens.current_source_location(), })?; self.push_error(err.error, err.loc) } } }; Ok(expr) } /// VAR_DECL <- /// (let | var) IDENTIFIER (: TYPENAME)? ; /// (let | var) IDENTIFIER (: TYPENAME)? = EXPRESSION ; fn parse_var_decl(&mut self, tokens: &mut TokenIterator) -> ParseResult { match self.parse_var_decl_inner(tokens) { Ok(i) => { _ = tokens.eat_token(Token::Semi).ok_or(ErrorInfo { error: ParseError::ExpectedToken(Token::Semi), loc: tokens.current_source_location(), })?; Ok(i) } Err(err) => { tokens.advance_past_semi().ok_or(ErrorInfo { error: ParseError::ExpectedToken(Token::Semi), loc: tokens.current_source_location(), })?; Ok(self.push_error(err.error, err.loc)) } } } fn parse_var_decl_inner(&mut self, tokens: &mut TokenIterator) -> ParseResult { // SAFETY: function invariance let let_or_var = tokens.next().unwrap(); let loc = let_or_var.source_location(); let is_let = let_or_var.token() == Token::Let; let name = self.parse_ident(tokens)?; let ty = if tokens.eat_token(Token::Colon).is_some() { Some(self.parse_type(tokens)?) } else { None }; let assignment = if tokens.eat_token(Token::Equal).is_some() { Some(self.parse_expr(tokens)?) } else { None }; let decl = self.ast.push_var_decl(is_let, name, ty, assignment, loc); self.syms.insert_symbol( self.current_scope(), name, SymbolKind::Local(tokens.current_source_location()), decl, ); Ok(decl) } fn parse_block_inner( &mut self, block: Index, tokens: &mut TokenIterator, ) -> ParseResult { let loc = tokens.current_source_location(); let mut statements = Vec::new(); let trailing = loop { if tokens.is_next_token(Token::CloseBrace) { break None; } let next = tokens.peek_token().ok_or(ErrorInfo { error: ParseError::UnexpectedEndOfTokens, loc: tokens.current_source_location(), })?; if let Some(decl) = self.parse_constant_decls(tokens)? { statements.push(decl); } else { match next.token() { Token::Return => { statements.push(self.parse_return_stmt(tokens)?); } Token::Var | Token::Let => { statements.push(self.parse_var_decl(tokens)?); } _ => { if self.is_statement(tokens) { // expr -> statements let expr = self .parse_with_trailing_semi(tokens, |this, tokens| { this.parse_expr(tokens) })?; statements.push(expr); } else { // expr -> trailing let expr = self.parse_expr(tokens)?; if !tokens.is_next_token(Token::CloseBrace) { statements.push(self.push_error( ParseError::ExpectedEndOfBlock, tokens.current_source_location(), )); } else { break Some(expr); } } } } } }; self.ast.set_block(block, statements, trailing, loc); Ok(block) } /// BLOCK <- /// { STATEMENT* EXPRESSION? } fn parse_block(&mut self, tokens: &mut TokenIterator) -> ParseResult { let block = self.parse_braced(tokens, |this, tokens| { let block = this.ast.reserve_node(); this.push_scope(block, intern::Index::invalid()); let block_result = this.parse_block_inner(block, tokens); this.pop_scope(); block_result })?; Ok(block) } /// PARAMETER_LIST <- /// PARAMETER /// PARAMETER_LIST , ARGUMENT fn parse_parameter_list(&mut self, tokens: &mut TokenIterator) -> ParseResult { let loc = tokens.current_source_location(); let mut params = Vec::new(); loop { params.push(self.parse_parameter(tokens)?); if !tokens.is_next_token(Token::Comma) { break; } if tokens.is_next_token2(Token::CloseParens) { break; } // skip comma _ = tokens.next(); } return Ok(self.ast.push_parameter_list(params, loc)); } /// PARAMETER <- /// IDENT : TYPENAME fn parse_parameter(&mut self, tokens: &mut TokenIterator) -> ParseResult { let loc = tokens.current_source_location(); let name = self.parse_ident(tokens)?; let Some(_) = tokens.eat_token(Token::Colon) else { return Err(ErrorInfo { error: ParseError::ExpectedToken(Token::Colon), loc, }); }; let ty = self.parse_type(tokens)?; let param = self.ast.push_parameter(name, ty, loc); self.syms .insert_symbol(self.current_scope(), name, SymbolKind::Local(loc), param); return Ok(param); } /// ARGUMENT <- /// IDENT : EXPR /// EXPR fn parse_argument(&mut self, tokens: &mut TokenIterator) -> ParseResult { let loc = tokens.current_source_location(); let name = if tokens.is_next_token2(Token::Colon) && tokens.is_next_token(Token::Ident) { let name = self.parse_ident(tokens)?; // we checked `is_next_token2` _ = tokens.eat_token(Token::Colon).unwrap(); Some(name) } else { None }; let expr = self.parse_expr(tokens)?; let i = match name { Some(name) => self.ast.push_named_argument(name, expr, loc), None => self.ast.push_argument(expr, loc), }; Ok(i) } /// ARGUMENT_LIST <- /// ARGUMENT /// ARGUMENT_LIST , ARGUMENT fn parse_argument_list(&mut self, tokens: &mut TokenIterator) -> ParseResult { let loc = tokens.current_source_location(); let mut args = Vec::new(); loop { args.push(self.parse_argument(tokens)?); if !tokens.is_next_token(Token::Comma) { break; } if tokens.is_next_token2(Token::CloseParens) { break; } // skip comma _ = tokens.next(); } return Ok(self.ast.push_argument_list(args, loc)); } /// PRIMARY_EXPR <- /// IDENTIFIER /// INTEGER_CONSTANT /// FLOATING_CONSTANT /// ( EXPRESSION ) /// BLOCK fn parse_primary_expr(&mut self, tokens: &mut TokenIterator) -> ParseResult { let loc = tokens.current_source_location(); let Some(next) = tokens.peek_token() else { return Err(ErrorInfo { error: ParseError::ExpectedPrimaryExpression, loc, }); }; match next.token() { Token::IntegerBinConstant | Token::IntegerHexConstant | Token::IntegerOctConstant | Token::IntegerConstant => { _ = tokens.next(); return Ok(self.parse_integral_constant(&next, next.source_location())); } Token::FloatingConstant | Token::FloatingExpConstant | Token::DotFloatingConstant | Token::DotFloatingExpConstant => { _ = tokens.next(); return Ok(self.parse_floating_constant(&next, next.source_location())); } Token::OpenParens => { let expr = self.parse_parenthesised(tokens, |this, tokens| this.parse_expr(tokens))?; return Ok(expr); } Token::OpenBrace => { return self.parse_block(tokens); } Token::Ident => { _ = tokens.next(); let ident = next.lexeme(); let ident = self .intern .get_or_insert(intern::Key::String { str: ident }); return Ok(self .ast .push_decl_ref_unresolved(self.current_scope(), ident, loc)); } // TODO: eventually handle paths _ => { return Err(ErrorInfo { error: ParseError::ExpectedPrimaryExpression, loc, }); } } } /// POSTFIX_EXPR <- /// PRIMARY_EXPR /// PRIMARY_EXPR ( ) /// PRIMARY_EXPR ( ARGUMENT_LIST ) /// PRIMARY_EXPR [ EXPR ] /// POSTFIX_EXPR . IDENTIFIER fn parse_postfix_expr(&mut self, tokens: &mut TokenIterator) -> ParseResult { let mut lhs = self.parse_primary_expr(tokens)?; while let Some(postfix) = self.try_parse_postfix_expr_inner(tokens, lhs)? { lhs = postfix; } Ok(lhs) } fn try_parse_postfix_expr_inner( &mut self, tokens: &mut TokenIterator, lhs: Index, ) -> ParseResult> { let lhs = if let Some(next) = tokens.peek_token() { let loc = next.source_location(); match next.token() { Token::OpenParens => { let arguments = self.parse_parenthesised(tokens, |this, tokens| { if tokens.is_next_token(Token::CloseParens) { Ok(this.ast.push_argument_list([], loc)) } else { this.parse_argument_list(tokens) } })?; Some(self.ast.push_call_expr(lhs, arguments, loc)) } Token::OpenSquareBracket => { let subscript = self.parse_bracketed(tokens, |this, tokens| this.parse_expr(tokens))?; Some( self.ast .push_binary(Tag::SubscriptExpr, lhs, subscript, loc), ) } Token::Dot if tokens.is_next_token2(Token::Ident) => { _ = tokens.next(); let loc = tokens.current_source_location(); let name = self.parse_ident(tokens)?; Some(self.ast.push_field_access(lhs, name, loc)) } _ => None, } } else { None }; Ok(lhs) } fn push_error(&mut self, error: ParseError, loc: SourceLocation) -> Index { self.errors.push(ErrorInfo { error, loc }); self.ast.push_error(error, loc) } /// PREFIX_EXPR <- /// POSTFIX_EXPR /// ! POSTFIX_EXPR /// - POSTFIX_EXPR /// & POSTFIX_EXPR /// * POSTFIX_EXPR fn parse_prefix_expr(&mut self, tokens: &mut TokenIterator) -> ParseResult { let next = tokens.peek_token().ok_or(ErrorInfo { error: ParseError::ExpectedPrefixExpression, loc: tokens.current_source_location(), })?; let loc = next.source_location(); let expr = match next.token() { Token::Bang => { _ = tokens.next(); let lhs = self.parse_postfix_expr(tokens)?; self.ast.push_unary(Tag::Not, lhs, loc) } Token::Minus => { _ = tokens.next(); let lhs = self.parse_postfix_expr(tokens)?; self.ast.push_unary(Tag::Negate, lhs, loc) } Token::Ampersand => { _ = tokens.next(); let lhs = self.parse_postfix_expr(tokens)?; self.ast.push_unary(Tag::AddressOf, lhs, loc) } Token::Star => { _ = tokens.next(); let lhs = self.parse_postfix_expr(tokens)?; self.ast.push_unary(Tag::Deref, lhs, loc) } _ => self.parse_postfix_expr(tokens)?, }; Ok(expr) } /// AS_EXPR <- /// PREFIX_EXPR /// PREFIX_EXPR as TYPENAME fn parse_as_expr(&mut self, tokens: &mut TokenIterator) -> ParseResult { let loc = tokens.current_source_location(); let expr = self.parse_prefix_expr(tokens)?; if tokens.eat_token(Token::As).is_some() { let typename = self.parse_type(tokens)?; return Ok(self.ast.push_cast(expr, typename, loc)); } else { return Ok(expr); } } /// BINARY_EXPR <- /// AS_EXPR /// AS_EXPR * EXPRESSION /// AS_EXPR / EXPRESSION /// AS_EXPR % EXPRESSION /// AS_EXPR + EXPRESSION /// AS_EXPR - EXPRESSION /// AS_EXPR << EXPRESSION /// AS_EXPR >> EXPRESSION /// AS_EXPR < EXPRESSION /// AS_EXPR > EXPRESSION /// AS_EXPR <= EXPRESSION /// AS_EXPR >= EXPRESSION /// AS_EXPR == EXPRESSION /// AS_EXPR != EXPRESSION /// AS_EXPR & EXPRESSION /// AS_EXPR ^ EXPRESSION /// AS_EXPR | EXPRESSION /// AS_EXPR && EXPRESSION /// AS_EXPR || EXPRESSION fn parse_binary_expr( &mut self, tokens: &mut TokenIterator, precedence: u32, ) -> ParseResult { let mut node = self.parse_as_expr(tokens)?; loop { let Some(tok) = tokens.peek_token() else { break; }; let loc = tok.source_location(); let Some(prec) = PRECEDENCE_MAP.get(&tok.token()).cloned() else { break; }; if prec < precedence { break; } // SAFETY: we peeked `tok` let tok = tokens.next().unwrap(); let lhs = node; let rhs = self.parse_binary_expr(tokens, prec + 1)?; let tag = match tok.token() { Token::PipePipe => Tag::Or, Token::AmpersandAmpersand => Tag::And, Token::Pipe => Tag::BitOr, Token::Caret => Tag::BitXOr, Token::Ampersand => Tag::BitAnd, Token::BangEqual => Tag::NEq, Token::EqualEqual => Tag::Eq, Token::LessEqual => Tag::Le, Token::GreaterEqual => Tag::Ge, Token::Less => Tag::Lt, Token::Greater => Tag::Gt, Token::GreaterGreater => Tag::Shr, Token::LessLess => Tag::Shl, Token::Plus => Tag::Add, Token::Minus => Tag::Sub, Token::Percent => Tag::Rem, Token::Star => Tag::Mul, Token::Slash => Tag::Div, _ => unreachable!(), }; node = self.ast.push_binary(tag, lhs, rhs, loc); } Ok(node) } /// ASSIGNMENT_EXPR <- /// BINARY_EXPRESSION /// BINARY_EXPRESSION ASSIGNMENT_OP EXPRESSION /// ASSIGNMENT_OP <- /// = += -= *= /= %= ... fn parse_assignment_expr(&mut self, tokens: &mut TokenIterator) -> ParseResult { let lhs = self.parse_binary_expr(tokens, 0)?; if tokens .peek_token() .map(|itm| itm.token().is_assignment_op()) == Some(true) { // SAFETY: we peeked let op = tokens.next().unwrap(); let loc = op.source_location(); let rhs = self.parse_expr(tokens)?; let rhs = if op.token() == Token::Equal { rhs } else { let tag = match op.token() { Token::PlusEqual => Tag::Add, Token::MinusEqual => Tag::Sub, Token::StarEqual => Tag::Mul, Token::SlashEqual => Tag::Sub, Token::PercentEqual => Tag::Rem, Token::PipeEqual => Tag::BitOr, Token::CaretEqual => Tag::BitXOr, Token::AmpersandEqual => Tag::BitAnd, Token::LessLessEqual => Tag::Shl, Token::GreaterGreaterEqual => Tag::Shr, _ => { unreachable!() } }; self.ast.push_binary(tag, lhs, rhs, loc) }; Ok(self.ast.push_assign(lhs, rhs, loc)) } else { Ok(lhs) } } /// ELSE_EXPR <- /// 'else' (IF_EXPR | EXPR_OR_STATEMENT_OR_BLOCK) fn parse_else_expr(&mut self, tokens: &mut TokenIterator) -> ParseResult { // SAFETY: function invariance let _else_ = tokens.eat_token(Token::Else).unwrap(); if tokens.is_next_token(Token::If) { self.parse_if_expr(tokens) } else { self.parse_expr_or_block_as_block(tokens) } } /// IF_EXPR <- /// 'if' ( EXPR ) EXPR_OR_STATEMENT_OR_BLOCK ELSE_EXPR? fn parse_if_expr(&mut self, tokens: &mut TokenIterator) -> ParseResult { // SAFETY: function invariance let iff = tokens.eat_token(Token::If).unwrap(); let loc = iff.source_location(); let cond = self.parse_parenthesised(tokens, |this, tokens| this.parse_expr(tokens))?; let body = self.parse_expr_or_block_as_block(tokens)?; if tokens.is_next_token(Token::Else) { let else_expr = self.parse_else_expr(tokens)?; Ok(self.ast.push_if_else(cond, body, else_expr, loc)) } else { Ok(self.ast.push_if(cond, body, loc)) } } fn parse_expr_or_block_as_block( &mut self, tokens: &mut TokenIterator, ) -> ParseResult { let Some(next) = tokens.peek_token() else { return Err(ErrorInfo { error: ParseError::ExpectedExpression, loc: tokens.current_source_location(), }); }; match next.token() { Token::OpenBrace => self.parse_block(tokens), _ => { let loc = tokens.current_source_location(); let expr = self.parse_expr(tokens)?; Ok(self.ast.push_block([], Some(expr), loc)) } } } fn parse_expr(&mut self, tokens: &mut TokenIterator) -> ParseResult { let loc = tokens.current_source_location(); let Some(next) = tokens.peek_token() else { return Err(ErrorInfo { error: ParseError::ExpectedExpression, loc, }); }; match next.token() { Token::If => self.parse_if_expr(tokens), _ => self.parse_assignment_expr(tokens), } } /// TYPE_DECL <- /// type IDENTIFIER = TYPE_UNION ; /// type IDENTIFIER = '(' (TYPE,)* ')' ; /// type IDENTIFIER = extern? union { (IDENTIFIER: TYPE,)* } /// type IDENTIFIER = extern? packed? enum { (IDENTIFIER (= EXPRESSION),)* } /// type IDENTIFIER = extern? packed? struct { (IDENTIFIER: TYPE,)* } fn parse_type_decl(&mut self, tokens: &mut TokenIterator) -> ParseResult { _ = tokens.eat_token(Token::Type).ok_or(ErrorInfo { error: ParseError::ExpectedToken(Token::Type), loc: tokens.current_source_location(), }); let name = self.parse_ident(tokens)?; let loc = tokens.current_source_location(); _ = tokens.eat_token(Token::Equal).ok_or(ErrorInfo { error: ParseError::ExpectedToken(Token::Equal), loc: tokens.current_source_location(), }); let (has_attributes, c_like, packed) = { let vec = tokens.eat_all_zero_or_once(&[Token::Extern, Token::Packed]); (vec[0] || vec[1], vec[0], vec[1]) }; let Some(next) = tokens.peek_token() else { return Err(ErrorInfo { error: ParseError::ExpectedTypeDeclaration, loc: tokens.current_source_location(), }); }; let decl = match next.token() { Token::Struct => self.parse_struct_decl(tokens, name, c_like, packed, loc), Token::Union => { unimplemented!() } Token::Enum => { unimplemented!() } _ => { if has_attributes { return Err(ErrorInfo { error: ParseError::UnexpectedTypeAttributes, loc: tokens.current_source_location(), }); } match next.token() { Token::OpenParens => { // tuple unimplemented!() } Token::Ident => { // sumtype unimplemented!() } _ => { return Err(ErrorInfo { error: ParseError::ExpectedTypeDeclaration, loc: tokens.current_source_location(), }); } } } }?; self.syms .insert_symbol(self.current_scope(), name, SymbolKind::Type, decl); Ok(decl) } /// SUMTYPE_DECL <- /// type IDENTIFIER = TYPE_UNION /// TYPE_UNION <- /// TYPE (| TYPE_UNION)? /// IDENTIFIER: TYPE (| TYPE_UNION)? fn parse_sumtype_decl(&mut self, tokens: &mut TokenIterator) -> ParseResult { todo!() } /// TUPLE_DECL <- /// type IDENTIFIER = (TYPE,* ) fn parse_tuple_decl(&mut self, tokens: &mut TokenIterator) -> ParseResult { todo!() } /// UNION_DECL <- /// type IDENTIFIER = union { IDENTIFIER: TYPE,* } fn parse_union_decl(&mut self, tokens: &mut TokenIterator) -> ParseResult { todo!() } /// ENUM_DECL <- /// type IDENTIFIER = packed? enum { IDENTIFIER (= EXPRESSION),* } fn parse_enum_decl(&mut self, tokens: &mut TokenIterator) -> ParseResult { todo!() } /// STRUCT_DECL <- /// type IDENTIFIER = extern? packed? struct { STRUCT_FIELD,* } fn parse_struct_decl( &mut self, tokens: &mut TokenIterator, name: intern::Index, c_like: bool, packed: bool, loc: SourceLocation, ) -> ParseResult { // SAFETY: function invariance _ = tokens.eat_token(Token::Struct).ok_or(ErrorInfo { error: ParseError::ExpectedToken(Token::Struct), loc: tokens.current_source_location(), })?; let decl = self.parse_braced(tokens, |this, tokens| { this.parse_struct_fields(tokens).map(|fields| { _ = tokens.eat_token(Token::Comma); let flags = StructFlags::new(packed, c_like, fields.len() as u32); this.ast.push_struct_decl(name, flags, fields, loc) }) })?; Ok(decl) } fn parse_with_trailing_semi( &mut self, tokens: &mut TokenIterator, parse: F, ) -> ParseResult where F: FnOnce(&mut Self, &mut TokenIterator) -> ParseResult, { match parse(self, tokens) { Ok(i) => { _ = tokens.eat_token(Token::Semi).ok_or(ErrorInfo { error: ParseError::ExpectedToken(Token::Semi), loc: tokens.current_source_location(), })?; Ok(i) } Err(err) => { tokens.advance_past_semi().ok_or(ErrorInfo { error: ParseError::ExpectedToken(Token::Semi), loc: tokens.current_source_location(), })?; Ok(self.push_error(err.error, err.loc)) } } } fn parse_inner( &mut self, tokens: &mut TokenIterator, open: Token, close: Token, parse: F, on_err: E, ) -> ParseResult where F: FnOnce(&mut Self, &mut TokenIterator) -> ParseResult, E: FnOnce(&mut Self, &mut TokenIterator, ErrorInfo, TokenItem) -> ParseResult, { let Some(start) = tokens.eat_token(open) else { return Err(ErrorInfo { error: ParseError::ExpectedToken(open), loc: tokens.current_source_location(), }); }; match parse(self, tokens) { Ok(i) => { _ = tokens.eat_token(close).ok_or(ErrorInfo { error: match open { Token::OpenBrace => ParseError::UnmatchedBrace(start.token_pos().start), Token::OpenParens => { ParseError::UnmatchedParens(start.token_pos().start) } Token::OpenSquareBracket => { ParseError::UnmatchedSquareBracket(start.token_pos().start) } _ => ParseError::UnmatchedDelimiter(start.token_pos().start), }, loc: tokens.current_source_location(), })?; Ok(i) } Err(e) => on_err(self, tokens, e, start), } } fn parse_inner2( &mut self, tokens: &mut TokenIterator, open: Token, close: Token, parse: F, ) -> ParseResult where F: FnOnce(&mut Self, &mut TokenIterator) -> ParseResult, { self.parse_inner(tokens, open, close, parse, |this, tokens, err, start| { match close { Token::CloseBrace => { tokens.advance_past_end_of_braced().ok_or(ErrorInfo { error: ParseError::UnmatchedBrace(start.token_pos().start), loc: tokens.current_source_location(), })?; } Token::CloseParens => { tokens.advance_past_end_of_parens().ok_or(ErrorInfo { error: ParseError::UnmatchedParens(start.token_pos().start), loc: tokens.current_source_location(), })?; } Token::CloseSquareBracket => { tokens.advance_past_end_of_bracketed().ok_or(ErrorInfo { error: ParseError::UnmatchedSquareBracket(start.token_pos().start), loc: tokens.current_source_location(), })?; } Token::Semi => { tokens.advance_past_semi().ok_or(ErrorInfo { error: ParseError::ExpectedToken(Token::Semi), loc: tokens.current_source_location(), })?; } _ => unimplemented!(), } Ok(this.push_error(err.error, err.loc)) }) } fn parse_bracketed(&mut self, tokens: &mut TokenIterator, parse: F) -> ParseResult where F: FnOnce(&mut Self, &mut TokenIterator) -> ParseResult, { self.parse_inner2( tokens, Token::OpenSquareBracket, Token::CloseSquareBracket, parse, ) } fn parse_braced(&mut self, tokens: &mut TokenIterator, parse: F) -> ParseResult where F: FnOnce(&mut Self, &mut TokenIterator) -> ParseResult, { self.parse_inner2(tokens, Token::OpenBrace, Token::CloseBrace, parse) } fn parse_parenthesised( &mut self, tokens: &mut TokenIterator, parse: F, ) -> ParseResult where F: FnOnce(&mut Self, &mut TokenIterator) -> ParseResult, { self.parse_inner2(tokens, Token::OpenParens, Token::CloseParens, parse) } fn parse_struct_fields( &mut self, tokens: &mut TokenIterator, ) -> ParseResult> { let mut fields = Vec::new(); loop { fields.push(self.parse_struct_field(tokens)?); if !tokens.is_next_token(Token::Comma) { break; } if tokens.is_next_token2(Token::CloseBrace) { break; } // skip comma _ = tokens.next(); } Ok(fields) } /// STRUCT_FIELD <- /// IDENTIFIER: TYPE fn parse_struct_field( &mut self, tokens: &mut TokenIterator, ) -> ParseResult<(intern::Index, Index)> { let name = self.parse_ident(tokens)?; let Some(_) = tokens.eat_token(Token::Colon) else { return Err(ErrorInfo { error: ParseError::ExpectedToken(Token::Colon), loc: tokens.current_source_location(), }); }; let ty = self.parse_type(tokens)?; return Ok((name, ty)); } /// CONSTANT_DECL <- /// FUNCTION_DECL /// GLOBAL_DECL /// STRUCT_DECL fn parse_constant_decls( &mut self, tokens: &mut TokenIterator, ) -> ParseResult> { let next = tokens.peek_token().ok_or(ErrorInfo { error: ParseError::UnexpectedEndOfTokens, loc: tokens.current_source_location(), })?; match next.token() { Token::Fn => Ok(Some(self.parse_fn_decl(tokens))), Token::Const => self.parse_const_decl(tokens).map(|i| Some(i)), Token::Type => self.parse_type_decl(tokens).map(|i| Some(i)), _ => Ok(None), } } /// FILE <- /// (FUNCTION_DECL | GLOBAL_DECL)* fn parse_file(&mut self, tokens: &mut TokenIterator) -> Index { let start = tokens.current_source_location(); let mut decls = Vec::new(); let file = self.ast.reserve_node(); self.push_scope(file, intern::Index::invalid()); while let Some(next) = tokens.peek_token() { let loc = next.source_location(); let decl = match self.parse_constant_decls(tokens).and_then(|i| match i { Some(i) => Ok(i), None => { let error = ParseError::UnexpectedTokenAtFileScope; let node = self.push_error(error, loc); self.find_next_fn_or_const(tokens); Ok(node) } }) { Ok(i) => i, Err(err) => self.push_error(err.error, err.loc), }; decls.push(decl); } self.pop_scope(); self.ast.set_file(file, decls, start); file } /// FILE <- /// (FUNCTION_DECL | GLOBAL_DECL)* pub fn parse(&mut self, mut tokens: TokenIterator) { let file = self.parse_file(&mut tokens); self.ast.set_root([file]); eprintln!("resolving decls:"); self.resolve_decl_refs(); eprintln!("interning types:"); self.intern_types(); } fn push_scope(&mut self, ast: Index, name: intern::Index) { let parent = self.scopes.last().cloned(); self.scopes.push(ast); if let Some(parent) = parent { self.syms.insert_symbol( ast, intern::Index::invalid(), SymbolKind::ParentScope, parent, ); } self.syms.insert_scope(name, ast); } fn pop_scope(&mut self) { self.scopes.pop(); } fn is_statement(&self, tokens: &mut TokenIterator) -> bool { let mut tokens = tokens.clone(); let mut braces = 0; let mut parens = 0; let mut brackets = 0; while let Some(itm) = tokens.next() { match itm.token() { Token::OpenBrace => { braces += 1; } Token::CloseBrace => { braces -= 1; } Token::OpenParens => { parens += 1; } Token::CloseParens => { parens -= 1; } Token::OpenSquareBracket => { brackets += 1; } Token::CloseSquareBracket => { brackets -= 1; } Token::Semi => { if braces == 0 && parens == 0 && brackets == 0 { return true; } } _ => {} } if braces < 0 || parens < 0 || brackets < 0 { break; } } false } fn find_next_fn_or_const(&mut self, tokens: &mut TokenIterator) -> Option<()> { tokens .advance_until_before_one_of(&[Token::Const, Token::Fn, Token::Type]) .map(|_| ()) } } } pub mod ir_gen { use intern::InternPool; use super::*; use crate::{symbol_table::syms2::Symbols, triples::*}; struct IRGen { ast: Ast, syms: Symbols, intern: InternPool, ir: IR, } impl IRGen {} }