#![allow(dead_code)] use std::{ fmt::{Debug, Display}, num::NonZero, }; use crate::{lexer::SourceLocation, tokens::Token, writeln_indented}; pub mod intern { use std::{ collections::BTreeMap, hash::{Hash, Hasher}, }; use num_bigint::{BigInt, BigUint, Sign}; use crate::{ common::{from_lo_hi_dwords, into_lo_hi_dwords}, variant, }; #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] #[repr(u8)] pub enum SimpleType { F32 = 0, F64, Bool, Void, USize, ISize, ComptimeInt, } impl From for SimpleType { fn from(value: u8) -> Self { match value { 0 => Self::F32, 1 => Self::F64, 2 => Self::Bool, 3 => Self::Void, 4 => Self::USize, 5 => Self::ISize, 6 => Self::ComptimeInt, _ => panic!("{value} is not a simple type"), } } } #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub enum Tag { String, SIntSmall, UIntSmall, UInt64, SInt64, F32, F64, PositiveInt, NegativeInt, UIntType, SIntType, SimpleType, PointerType, ArrayType, FunctionType, StructType, } #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] struct Item { tag: Tag, index: u32, } #[derive(Debug, Clone, PartialEq)] #[non_exhaustive] pub enum Key<'a> { String { str: &'a str, }, SIntSmall { bits: i32, }, UIntSmall { bits: u32, }, SInt64 { bits: i64, }, UInt64 { bits: u64, }, F32 { bits: f32, }, F64 { bits: f64, }, PositiveInt { bigint: BigInt, }, NegativeInt { bigint: BigInt, }, UIntType { bits: u16, }, SIntType { bits: u16, }, SimpleType { ty: SimpleType, }, PointerType { pointee: Index, flags: PointerFlags, }, ArrayType { pointee: Index, flags: PointerFlags, length: u32, }, FunctionType { return_type: Index, parameters: Vec, }, StructType { name: Index, packed: bool, c_like: bool, fields: Vec<(Index, Index)>, }, } impl Hash for Key<'_> { fn hash(&self, state: &mut H) { core::mem::discriminant(self).hash(state); match self { Key::String { str } => str.hash(state), Key::SIntSmall { bits } => bits.hash(state), Key::UIntSmall { bits } => bits.hash(state), Key::SInt64 { bits } => bits.hash(state), Key::UInt64 { bits } => bits.hash(state), Key::F32 { bits } => ordered_float::OrderedFloat(*bits).hash(state), Key::F64 { bits } => ordered_float::OrderedFloat(*bits).hash(state), Key::PositiveInt { bigint } => bigint.hash(state), Key::NegativeInt { bigint } => bigint.hash(state), Key::UIntType { bits } => bits.hash(state), Key::SIntType { bits } => bits.hash(state), Key::SimpleType { ty } => ty.hash(state), Key::PointerType { pointee, flags } => (pointee, flags).hash(state), Key::ArrayType { pointee, flags, length, } => (*pointee, *flags, *length).hash(state), Key::StructType { name, packed, c_like, fields, } => (*name, *packed, *c_like, fields).hash(state), Key::FunctionType { return_type, parameters, } => (return_type, parameters).hash(state), } } } // #[repr(packed)] #[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Hash)] pub struct PointerFlags { pub volatile: bool, pub is_const: bool, pub noalias: bool, } impl PointerFlags { pub fn new(is_const: bool, volatile: bool, noalias: bool) -> Self { Self { is_const, volatile, noalias, } } fn pack(self) -> u8 { (self.volatile as u8) << 0 | (self.is_const as u8) << 1 | (self.noalias as u8) << 2 } fn unpack(packed: u8) -> Self { Self { volatile: packed & (1 << 0) != 0, is_const: packed & (1 << 1) != 0, noalias: packed & (1 << 2) != 0, } } } #[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Hash)] struct StructFlags { packed: bool, c_like: bool, num_fields: u32, } impl StructFlags { const MASK: u32 = (1u32 << 30) - 1; pub fn new(packed: bool, c_like: bool, num_fields: u32) -> Self { assert!(num_fields < (1 << 30)); Self { packed, c_like, num_fields, } } fn pack(self) -> u32 { assert!(self.num_fields < (1 << 30)); (self.packed as u32) << 31 | (self.c_like as u32) << 30 | self.num_fields & Self::MASK } fn unpack(packed: u32) -> Self { Self { packed: packed & (1 << 31) != 0, c_like: packed & (1 << 30) != 0, num_fields: packed & Self::MASK, } } } #[derive(Debug, Clone, Copy)] struct FunctionInfo { void_return: bool, num_params: u32, } impl FunctionInfo { fn new(void_return: bool, num_params: u32) -> Self { Self { void_return, num_params, } } const MASK: u32 = 1u32 << (u32::BITS - 1); fn pack(self) -> u32 { (self.void_return as u32 * Self::MASK) | self.num_params & !Self::MASK } fn unpack(packed: u32) -> Self { Self { void_return: packed & Self::MASK != 0, num_params: packed & !Self::MASK, } } fn len(self) -> u32 { self.void_return as u32 + self.num_params } } impl Item { fn idx(self) -> usize { self.index as usize } } #[repr(transparent)] #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] pub struct Index(u32); impl Index { pub fn into_u32(self) -> u32 { unsafe { core::mem::transmute(self) } } pub fn as_u32(&self) -> &u32 { unsafe { core::mem::transmute(self) } } fn index(&self) -> usize { self.0 as usize } pub fn is_valid(&self) -> bool { self.0 != u32::MAX } pub fn invalid() -> Self { Self(u32::MAX) } } pub struct InternPool { tags: Vec, indices: Vec, // strings: Vec, words: Vec, hashed: BTreeMap, } impl std::fmt::Debug for InternPool { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.debug_struct("InternPool") .field_with("keys", |f| { let mut list = f.debug_list(); let keys = (0..self.indices.len()) .map(|i| Index(i as u32)) .map(|idx| (idx, self.get_key(idx))); for (idx, key) in keys { list.entry_with(|f| write!(f, "{}: {key:?}", idx.0)); } list.finish() }) .field_with("hashed", |f| { let mut list = f.debug_list(); for (hash, idx) in self.hashed.iter() { list.entry_with(|f| write!(f, "{hash}: {}", idx.0)); } list.finish() }) .finish_non_exhaustive() } } const STATIC_KEYS: [Key; 19] = [ Key::SimpleType { ty: SimpleType::Bool, }, Key::SimpleType { ty: SimpleType::F32, }, Key::SimpleType { ty: SimpleType::F64, }, Key::SimpleType { ty: SimpleType::USize, }, Key::SimpleType { ty: SimpleType::ISize, }, Key::SimpleType { ty: SimpleType::Void, }, Key::SimpleType { ty: SimpleType::ComptimeInt, }, Key::SIntType { bits: 1 }, Key::UIntType { bits: 1 }, Key::SIntType { bits: 0 }, Key::UIntType { bits: 0 }, Key::SIntType { bits: 8 }, Key::UIntType { bits: 8 }, Key::SIntType { bits: 16 }, Key::UIntType { bits: 16 }, Key::SIntType { bits: 32 }, Key::UIntType { bits: 32 }, Key::SIntType { bits: 64 }, Key::UIntType { bits: 64 }, ]; impl InternPool { pub fn get_void_type(&self) -> Index { self.get_assume_present(Key::SimpleType { ty: SimpleType::Void, }) } pub fn get_bool_type(&self) -> Index { self.get_assume_present(Key::SimpleType { ty: SimpleType::Bool, }) } pub fn get_f32_type(&self) -> Index { self.get_assume_present(Key::SimpleType { ty: SimpleType::F32, }) } pub fn get_f64_type(&self) -> Index { self.get_assume_present(Key::SimpleType { ty: SimpleType::F64, }) } pub fn get_comptime_int_type(&self) -> Index { self.get_assume_present(Key::SimpleType { ty: SimpleType::ComptimeInt, }) } pub fn get_usize_type(&self) -> Index { self.get_assume_present(Key::SimpleType { ty: SimpleType::USize, }) } pub fn get_isize_type(&self) -> Index { self.get_assume_present(Key::SimpleType { ty: SimpleType::ISize, }) } pub fn get_u0_type(&self) -> Index { self.get_assume_present(Key::UIntType { bits: 0 }) } pub fn get_i0_type(&self) -> Index { self.get_assume_present(Key::SIntType { bits: 0 }) } pub fn get_u1_type(&self) -> Index { self.get_assume_present(Key::UIntType { bits: 1 }) } pub fn get_i1_type(&self) -> Index { self.get_assume_present(Key::SIntType { bits: 1 }) } pub fn get_u8_type(&self) -> Index { self.get_assume_present(Key::UIntType { bits: 8 }) } pub fn get_i8_type(&self) -> Index { self.get_assume_present(Key::SIntType { bits: 8 }) } pub fn get_u16_type(&self) -> Index { self.get_assume_present(Key::UIntType { bits: 16 }) } pub fn get_i16_type(&self) -> Index { self.get_assume_present(Key::SIntType { bits: 16 }) } pub fn get_u32_type(&self) -> Index { self.get_assume_present(Key::UIntType { bits: 32 }) } pub fn get_i32_type(&self) -> Index { self.get_assume_present(Key::SIntType { bits: 32 }) } pub fn get_u64_type(&self) -> Index { self.get_assume_present(Key::UIntType { bits: 64 }) } pub fn get_i64_type(&self) -> Index { self.get_assume_present(Key::SIntType { bits: 64 }) } } #[derive(Debug, Clone, Copy)] pub struct TypeInfo { pub bitsize: u32, pub bitalign: u32, } impl InternPool { pub fn size_of_type(&self, index: Index, ptr_size: TypeInfo) -> TypeInfo { match self.get_key(index) { Key::UIntType { bits } => { let bits = bits as u32; TypeInfo { bitsize: bits, bitalign: bits.next_multiple_of(8).next_power_of_two(), } } Key::SIntType { bits } => { let bits = bits as u32; TypeInfo { bitsize: bits, bitalign: bits.next_multiple_of(8).next_power_of_two(), } } Key::SimpleType { ty } => match ty { SimpleType::F32 => TypeInfo { bitsize: 32, bitalign: 32, }, SimpleType::F64 => TypeInfo { bitsize: 64, bitalign: 64, }, SimpleType::Bool => TypeInfo { bitsize: 1, bitalign: 1, }, SimpleType::Void => TypeInfo { bitsize: 0, bitalign: 0, }, SimpleType::USize => ptr_size, SimpleType::ISize => ptr_size, SimpleType::ComptimeInt => panic!("comptime int is unsized"), }, Key::PointerType { .. } => ptr_size, Key::ArrayType { pointee, length, .. } => { let element_size = self.size_of_type(pointee, ptr_size); let bitsize = element_size.bitalign * length; TypeInfo { bitsize, ..element_size } } Key::FunctionType { .. } => ptr_size, Key::StructType { packed, fields, .. } => { // TODO: c-like layout let (size, align) = fields.iter().fold((0, 0), |(size, align), (_name, ty)| { let field_size = self.size_of_type(*ty, ptr_size); let size = size + field_size.bitsize; let size = if packed { size.next_multiple_of(field_size.bitalign) } else { size }; let align = align.max(field_size.bitalign); (size, align) }); TypeInfo { bitsize: size, bitalign: align, } } _ => { panic!("index was not a type") } } } } impl InternPool { pub fn create() -> Self { let mut this = Self { tags: Vec::new(), indices: Vec::new(), strings: Vec::new(), words: Vec::new(), hashed: BTreeMap::new(), }; this.extend_keys(STATIC_KEYS); this } fn extend_keys<'a, K: IntoIterator>>(&mut self, keys: K) { for k in keys.into_iter() { let mut hasher = std::hash::DefaultHasher::new(); k.hash(&mut hasher); let digest = hasher.finish(); let i = self.insert(k); self.hashed.insert(digest, i); } } fn len(&self) -> u32 { u32::try_from(self.tags.len()) .expect(&format!("more than {} items in internpool!", u32::MAX)) } pub fn get_or_insert(&mut self, key: Key) -> Index { let mut hasher = std::hash::DefaultHasher::new(); key.hash(&mut hasher); let digest = hasher.finish(); if let Some(&idx) = self.hashed.get(&digest) { idx } else { let i = self.insert(key); self.hashed.insert(digest, i); i } } fn insert(&mut self, key: Key) -> Index { match key { Key::String { str } => { let len = str.len() as u32; let start = self.extend_strings(str); let words_idx = self.extend_words([start, len]); self.create_item(Tag::String, words_idx) } Key::SIntSmall { bits } => self.create_item(Tag::SIntSmall, bits as u32), Key::UIntSmall { bits } => self.create_item(Tag::UIntSmall, bits as u32), Key::F32 { bits } => self.create_item(Tag::F32, bits as u32), Key::F64 { bits } => { let (lo, hi) = into_lo_hi_dwords(bits as u64); let words_idx = self.extend_words([lo, hi]); self.create_item(Tag::F64, words_idx) } Key::SInt64 { bits } => { let (lo, hi) = into_lo_hi_dwords(bits as u64); let i = self.extend_words([lo, hi]); self.create_item(Tag::SInt64, i) } Key::UInt64 { bits } => { let (lo, hi) = into_lo_hi_dwords(bits as u64); let i = self.extend_words([lo, hi]); self.create_item(Tag::UInt64, i) } Key::PositiveInt { bigint } => { let (_, words) = bigint.to_u32_digits(); let i = self.push_word(words.len() as u32); _ = self.extend_words(words); self.create_item(Tag::PositiveInt, i) } Key::NegativeInt { bigint } => { let (_, words) = bigint.to_u32_digits(); let i = self.push_word(words.len() as u32); _ = self.extend_words(words); self.create_item(Tag::NegativeInt, i) } Key::UIntType { bits } => self.create_item(Tag::SIntSmall, bits as u32), Key::SIntType { bits } => self.create_item(Tag::SIntSmall, bits as u32), Key::SimpleType { ty } => self.create_item(Tag::SimpleType, ty as u8 as u32), Key::PointerType { pointee, flags } => { let flags = flags.pack(); let i = self.extend_words([pointee.0, flags as u32]); self.create_item(Tag::PointerType, i) } Key::ArrayType { pointee, flags, length, } => { let flags = flags.pack(); let i = self.extend_words([pointee.0, flags as u32, length]); self.create_item(Tag::PointerType, i) } Key::StructType { name, packed, c_like, fields, } => { let flags = StructFlags::new(packed, c_like, fields.len() as u32).pack(); let i = self.extend_words([name.into_u32(), flags]); self.extend_words( fields .into_iter() .map(|(n, t)| [n.into_u32(), t.into_u32()]) .flatten(), ); self.create_item(Tag::StructType, i) } Key::FunctionType { return_type, parameters, } => { let info = FunctionInfo::new( return_type == self.get_simple_type(SimpleType::Void), parameters.len() as u32, ); let start = self.push_word(info.pack()); _ = self.extend_words(parameters.into_iter().map(|i| i.0)); self.create_item(Tag::FunctionType, start) } } } fn extend_strings>(&mut self, b: B) -> u32 { let idx = self.strings.len() as u32; self.strings.extend(b.as_ref()); idx } fn extend_words>(&mut self, i: I) -> u32 { let idx = self.words.len() as u32; self.words.extend(i); idx } fn push_word(&mut self, word: u32) -> u32 { let idx = self.words.len() as u32; self.words.push(word); idx } fn create_item(&mut self, tag: Tag, index: u32) -> Index { let len = self.len(); self.tags.push(tag); self.indices.push(index); Index(len) } pub fn get_key(&self, index: Index) -> Key { let item = self.get_item(index).unwrap(); match item.tag { Tag::String => { let start = self.words[item.idx()]; let len = self.words[item.idx() + 1]; let str = unsafe { core::str::from_utf8_unchecked( &self.strings[start as usize..][..len as usize], ) }; Key::String { str } } Tag::UIntSmall => Key::UIntSmall { bits: item.index as u32, }, Tag::SIntSmall => Key::SIntSmall { bits: item.index as i32, }, Tag::F32 => Key::F32 { bits: f32::from_le_bytes(item.index.to_le_bytes()), }, Tag::F64 => { let idx = item.idx(); let bits = from_lo_hi_dwords(self.words[idx], self.words[idx + 1]); Key::F64 { bits: f64::from_le_bytes(bits.to_le_bytes()), } } Tag::SInt64 => { let bits = from_lo_hi_dwords(self.words[item.idx()], self.words[item.idx() + 1]) as i64; Key::SInt64 { bits } } Tag::UInt64 => { let bits = from_lo_hi_dwords(self.words[item.idx()], self.words[item.idx() + 1]); Key::UInt64 { bits } } Tag::NegativeInt => { let len = self.words[item.idx()]; let start = item.idx() + 1; let end = start + len as usize; let data = BigUint::from_slice(&self.words[start..end]); let bigint = BigInt::from_biguint(Sign::Minus, data); Key::NegativeInt { bigint } } Tag::PositiveInt => { let len = self.words[item.idx()]; let start = item.idx() + 1; let end = start + len as usize; let data = BigUint::from_slice(&self.words[start..end]); let bigint = BigInt::from_biguint(Sign::Plus, data); Key::PositiveInt { bigint } } Tag::SIntType => { let bits = self.words[item.idx()] as u16; Key::SIntType { bits } } Tag::UIntType => { let bits = self.words[item.idx()] as u16; Key::SIntType { bits } } Tag::SimpleType => { let ty = item.idx() as u8; Key::SimpleType { ty: unsafe { core::mem::transmute::(ty) }, } } Tag::PointerType => { let pointee = Index(self.words[item.idx()]); let flags = PointerFlags::unpack(self.words[item.idx() + 1] as u8); Key::PointerType { pointee, flags } } Tag::ArrayType => { let pointee = Index(self.words[item.idx()]); let flags = PointerFlags::unpack(self.words[item.idx() + 1] as u8); let length = self.words[item.idx() + 2]; Key::ArrayType { pointee, flags, length, } } Tag::StructType => { let name = Index(self.words[item.idx()]); let flags = StructFlags::unpack(self.words[item.idx() + 1]); let start = item.idx() + 2; let end = start + flags.num_fields as usize * 2; let fields = self.words[start..end] .iter() .cloned() .array_chunks::<2>() .map(|[n, t]| (Index(n), Index(t))) .collect::>(); Key::StructType { name, packed: flags.packed, c_like: flags.c_like, fields, } } Tag::FunctionType => { let info = FunctionInfo::unpack(self.words[item.idx()]); let len = info.len(); let (return_type, parameters) = if info.void_return { let start = item.idx() + 1; let end = start + len as usize; let params = self.words[start..end] .iter() .map(|&i| Index(i)) .collect::>(); ( self.get_assume_present(Key::SimpleType { ty: SimpleType::Void, }), params, ) } else { let start = item.idx() + 2; let end = start + len as usize; let return_type = Index(self.words[item.idx() + 1]); let params = self.words[start..end] .iter() .map(|&i| Index(i)) .collect::>(); (return_type, params) }; Key::FunctionType { return_type, parameters, } } } } pub fn get_assume_present(&self, key: Key) -> Index { let mut hasher = std::hash::DefaultHasher::new(); key.hash(&mut hasher); let digest = hasher.finish(); if let Some(&idx) = self.hashed.get(&digest) { idx } else { panic!("key {key:?} not present in pool.") } } pub fn get_int_type(&mut self, signed: bool, bits: u16) -> Index { let key = match signed { true => Key::SIntType { bits }, false => Key::UIntType { bits }, }; self.get_or_insert(key) } pub fn get_string_index(&mut self, str: &str) -> Index { self.get_or_insert(Key::String { str }) } pub fn get_simple_type(&mut self, ty: SimpleType) -> Index { self.get_or_insert(Key::SimpleType { ty }) } pub fn get_function_type>( &mut self, return_type: Index, parameters: P, ) -> Index { self.get_or_insert(Key::FunctionType { return_type, parameters: parameters.into_iter().collect(), }) } pub fn get_pointer_type(&mut self, pointee: Index, flags: Option) -> Index { let key = Key::PointerType { pointee, flags: flags.unwrap_or_default(), }; self.get_or_insert(key) } pub fn get_struct_type( &mut self, name: Index, packed: bool, c_like: bool, fields: Vec<(Index, Index)>, ) -> Index { let key = Key::StructType { name, packed, c_like, fields, }; self.get_or_insert(key) } pub fn get_array_type( &mut self, pointee: Index, flags: Option, length: u32, ) -> Index { let key = Key::ArrayType { pointee, flags: flags.unwrap_or_default(), length, }; self.get_or_insert(key) } pub fn get_str(&self, index: Index) -> &str { let key = self.get_key(index); assert!(matches!(key, Key::String { .. })); variant!(key => Key::String { str }); str } fn check_bounds(&self, index: Index) -> Option { (index.0 < self.len()).then_some(index) } fn get_item(&self, index: Index) -> Option { self.check_bounds(index).map(|i| Item { tag: self.tags[i.index()], index: self.indices[i.index()], }) } } } #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] enum Tag { /// pseudo tag, contains a range from a..b into extra of all files. Root, /// `data` is a range from a..b into extra of all global nodes. File, /// `data` is an intern to a name, and an index into extra of [intern: return_type, index: ParameterList] FunctionProto, /// `data` is an index to a FunctionProto and an index to a Block FunctionDecl, /// `data` is a range from a..b into extra of indices to parameters ParameterList, /// `data` is an intern to a name, and an intern to a type Parameter, /// `data` is range from a..b into `extra` of indices to statements Block, /// `data` is range from a..b into `extra` of indices to statements, where the last one is an expression BlockTrailingExpr, /// `data` is an intern to a value, intern to a type Constant, /// `data` is an index to an expression ExprStmt, /// `data` is none ReturnStmt, /// `data` is an index to an expr ReturnExprStmt, /// `data` is a range from a..b into `extra` of an intern to a name and an optional intern to a type VarDecl, /// `data` is a range from a..b into `extra` of an intern to a name and an optional intern to a type MutVarDecl, /// `data` is a range from a..b into `extra` of an intern to a name, an index to an expr, and an optional intern to a type VarDeclAssignment, /// `data` is a range from a..b into `extra` of an intern to a name, an index to an expr, and an optional intern to a type MutVarDeclAssignment, /// `data` is an intern to a name, and an offset into `extra` of [type: intern, expr: index] GlobalDecl, /// `data` is an intern to a struct type StructDecl, /// `data` is an index to a VarDecl, GlobalDecl or FunctionDecl DeclRef, /// `data` is an inlined key into the symbol table (scope: index, name: intern) DeclRefUnresolved, /// `data` is an index to an expr and an index to an ArgumentList CallExpr, /// `data` is a range from a..b into extra of indices to arguments ArgumentList, /// `data` is an index to an expression Argument, /// `data` is an index to an expression, and an intern to a name NamedArgument, /// `data` is an index to lhs, and an intern to the type ExplicitCast, /// `data` is a single index to an expr Deref, AddressOf, Not, Negate, /// data is two indices for `lhs` and `rhs` Or, And, BitOr, BitXOr, BitAnd, Eq, NEq, Lt, Gt, Le, Ge, Shl, Shr, Add, Sub, Mul, Div, Rem, Assign, SubscriptExpr, IfExpr, /// `data` is an index to an expression and an index into extra for [if, else] IfElseExpr, // TODO: /// `data` is a ParseError Error, /// placeholder tag for reserved indices/nodes, `data` is none Undefined, } #[derive(Debug, Clone, Copy, thiserror::Error, PartialEq, Eq)] enum ParseError { #[error("Unexpected end of token iter.")] UnexpectedEndOfTokens, #[error("Expected Token {0}.")] ExpectedToken(Token), #[error("Expected Token {0}, but other token was found.")] ExpectedTokenNotFound(Token), #[error("Expected either a function declaration or a global variable.")] UnexpectedTokenAtFileScope, #[error("Expected Ident.")] ExpectedIdent, #[error("Integral types may not be wider than 65535 bits.")] IntegralTypeTooWide, #[error("Expected typename.")] ExpectedTypeName, #[error("Dummy Message.")] ExpectedFunctionPrototype, #[error("Dummy Message.")] ExpectedPrimaryExpression, #[error("Dummy Message.")] ExpectedExpression, #[error("Dummy Message.")] ExpectedPostfixExpression, #[error("Dummy Message.")] ExpectedPrefixExpression, #[error("Dummy Message.")] ExpectedArgumentList, #[error("Dummy Message.")] ExpectedStatement, #[error("Dummy Message.")] UnmatchedParens(u32), #[error("Dummy Message.")] ExpectedTypeDeclaration, #[error("Dummy Message.")] UnexpectedTypeAttributes, #[error("Dummy Message.")] UnmatchedSquareBracket(u32), #[error("Dummy Message.")] ExpectedEndOfBlock, #[error("Dummy Message.")] UnmatchedBrace(u32), #[error("Dummy Message.")] UnmatchedDelimiter(u32), #[error("Error in child node {0:?}.")] ErrorNode(Index), } #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] #[repr(transparent)] pub struct Index(NonZero); impl Display for Index { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "%{}", self.0.get()) } } impl Index { pub fn new(i: u32) -> Index { Self(NonZero::::new(i).unwrap()) } pub fn as_u32(&self) -> &u32 { unsafe { core::mem::transmute(self) } } pub fn into_u32(self) -> u32 { unsafe { core::mem::transmute(self) } } fn index(self) -> usize { self.0.get() as usize } } #[repr(packed)] #[derive(Clone, Copy)] struct Node { /// defines the type of the node in the tree tag: Tag, data: Data, } #[derive(Clone, Copy)] union Data { none: (), error: ParseError, index: Index, two_indices: (Index, Index), range: (Index, Index), extra_range: (u32, u32), intern: intern::Index, index_intern: (Index, intern::Index), two_interns: (intern::Index, intern::Index), intern_and_extra_offset: (intern::Index, u32), index_and_extra_offset: (Index, u32), } #[derive(Debug)] #[allow(dead_code)] enum ExpandedData { None, Error(ParseError), Index(Index), TwoIndices(Index, Index), Range(Index, Index), ExtraRange(usize, usize), Intern(intern::Index), IndexIntern(Index, intern::Index), TwoInterns(intern::Index, intern::Index), InternAndExtraOffset(intern::Index, usize), IndexAndExtraOffset(Index, usize), } impl ExpandedData { fn from_none(data: Data) -> Self { Self::None } fn from_error(data: Data) -> Self { Self::Error(data.as_error()) } fn from_index(data: Data) -> Self { Self::Index(data.as_index()) } fn from_two_indices(data: Data) -> Self { let data = data.as_two_indices(); Self::TwoIndices(data.0, data.1) } fn from_range(data: Data) -> Self { let data = data.as_index_range(); Self::Range(data.0, data.1) } fn from_extra_range(data: Data) -> Self { let data = data.as_extra_range(); Self::ExtraRange(data.0, data.1) } fn from_intern(data: Data) -> Self { let data = data.as_intern(); Self::Intern(data) } fn from_index_intern(data: Data) -> Self { let data = data.as_index_intern(); Self::IndexIntern(data.0, data.1) } fn from_two_interns(data: Data) -> Self { let data = data.as_two_interns(); Self::TwoInterns(data.0, data.1) } fn from_intern_and_extra_offset(data: Data) -> Self { let data = data.as_intern_and_extra_offset(); Self::InternAndExtraOffset(data.0, data.1) } fn from_index_and_extra_offset(data: Data) -> Self { let data = data.as_index_and_extra_offset(); Self::IndexAndExtraOffset(data.0, data.1) } } impl From<(Tag, Data)> for ExpandedData { fn from((tag, data): (Tag, Data)) -> Self { match tag { Tag::FunctionProto => Self::from_intern_and_extra_offset(data), Tag::ParameterList => Self::from_extra_range(data), Tag::Root => Self::from_extra_range(data), Tag::File => Self::from_extra_range(data), Tag::ArgumentList | Tag::VarDecl | Tag::MutVarDecl | Tag::VarDeclAssignment | Tag::MutVarDeclAssignment | Tag::BlockTrailingExpr | Tag::Block => Self::from_extra_range(data), Tag::Constant | Tag::Parameter => Self::from_two_interns(data), Tag::Or | Tag::And | Tag::BitOr | Tag::BitXOr | Tag::BitAnd | Tag::Eq | Tag::NEq | Tag::Lt | Tag::Gt | Tag::Le | Tag::Ge | Tag::Shl | Tag::Shr | Tag::Add | Tag::Sub | Tag::Mul | Tag::Div | Tag::Rem | Tag::Assign | Tag::IfExpr | Tag::SubscriptExpr | Tag::CallExpr | Tag::FunctionDecl => Self::from_two_indices(data), Tag::ReturnExprStmt | Tag::DeclRef | Tag::Argument | Tag::Deref | Tag::AddressOf | Tag::Not | Tag::Negate | Tag::ExprStmt => Self::from_index(data), Tag::DeclRefUnresolved | Tag::NamedArgument | Tag::ExplicitCast => { Self::from_index_intern(data) } Tag::GlobalDecl => Self::from_intern_and_extra_offset(data), Tag::StructDecl => Self::from_intern(data), Tag::IfElseExpr => Self::from_index_and_extra_offset(data), Tag::Error => Self::from_error(data), Tag::ReturnStmt | Tag::Undefined => Self::from_none(data), } } } impl Data { fn as_error(self) -> ParseError { unsafe { self.error } } fn as_index(self) -> Index { unsafe { self.index } } fn as_two_indices(self) -> (Index, Index) { unsafe { self.two_indices } } fn as_index_range(self) -> (Index, Index) { unsafe { self.range } } fn as_extra_range(self) -> (usize, usize) { let (a, b) = unsafe { self.extra_range }; (a as usize, b as usize) } fn as_intern(self) -> intern::Index { unsafe { self.intern } } fn as_two_interns(self) -> (intern::Index, intern::Index) { unsafe { self.two_interns } } fn as_index_intern(self) -> (Index, intern::Index) { unsafe { self.index_intern } } fn as_index_and_extra_offset(self) -> (Index, usize) { let (i, e) = unsafe { self.index_and_extra_offset }; (i, e as usize) } fn as_intern_and_extra_offset(self) -> (intern::Index, usize) { let (i, e) = unsafe { self.intern_and_extra_offset }; (i, e as usize) } } impl Data { fn none() -> Self { Self { none: () } } fn error(error: ParseError) -> Self { Self { error } } fn index(index: Index) -> Self { Self { index } } fn two_indices(a: Index, b: Index) -> Self { Self { two_indices: (a, b), } } fn two_interns(a: intern::Index, b: intern::Index) -> Self { Self { two_interns: (a, b), } } fn range_of_indices(a: Index, b: Index) -> Self { Self { range: (a, b) } } fn extra_range(a: u32, b: u32) -> Self { Self { extra_range: (a, b), } } fn intern(intern: intern::Index) -> Self { Self { intern } } fn index_and_intern(index: Index, intern: intern::Index) -> Self { Self { index_intern: (index, intern), } } fn intern_and_extra_offset(intern: intern::Index, offset: u32) -> Self { Self { intern_and_extra_offset: (intern, offset), } } fn index_and_extra_offset(index: Index, offset: u32) -> Self { Self { index_and_extra_offset: (index, offset), } } } pub struct Ast { tags: Vec, datas: Vec, extra: Vec, source_locs: Vec, } impl Debug for Ast { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.debug_struct("Ast") .field_with("nodes", |f| { let mut list = f.debug_list(); struct LocDisplay(SourceLocation); impl Debug for LocDisplay { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "({})", self.0) } } let entries = self .tags .iter() .cloned() .zip(self.datas.iter().cloned()) .zip(self.source_locs.iter().cloned()) .enumerate() .map(|(i, ((tag, data), loc))| { (i, tag, ExpandedData::from((tag, data)), LocDisplay(loc)) }); list.entries(entries).finish() }) .field("extra", &self.extra) .finish() } } impl Ast { fn new() -> Ast { Self { tags: vec![Tag::Root], datas: vec![Data::extra_range(0, 0)], extra: vec![], source_locs: vec![SourceLocation::new(0, 0)], } } fn reserve_node(&mut self) -> Index { let i = unsafe { Index(NonZero::new_unchecked(self.tags.len() as u32)) }; self.tags.push(Tag::Undefined); self.datas.push(Data::none()); self.source_locs.push(SourceLocation::invalid()); i } fn get_loc(&self, index: Index) -> SourceLocation { self.source_locs[index.index()] } fn push_error(&mut self, error: ParseError, loc: SourceLocation) -> Index { let i = self.reserve_node(); self.set_tag_data_source_loc(i, Tag::Error, Data::error(error), loc); i } fn set_file>(&mut self, i: Index, decls: I, loc: SourceLocation) { let (extra_start, extra_end) = self.extend_extra_by_indices(decls); self.set_tag_data_source_loc(i, Tag::File, Data::extra_range(extra_start, extra_end), loc); } fn push_file>(&mut self, decls: I, loc: SourceLocation) -> Index { let i = self.reserve_node(); self.set_file(i, decls, loc); i } fn set_root>(&mut self, decls: I) { let (extra_start, extra_end) = self.extend_extra_by_indices(decls); self.tags[0] = Tag::Root; self.datas[0] = Data::extra_range(extra_start, extra_end); } fn get_root_file_indices<'a>(&'a self) -> impl Iterator + 'a { let (a, b) = self.datas[0].as_extra_range(); self.extra[a..b].iter().cloned().map(|i| Index::new(i)) } fn push_global_decl( &mut self, ident: intern::Index, ty: intern::Index, expr: Index, loc: SourceLocation, ) -> Index { let i = self.reserve_node(); let (extra_start, _) = self.extend_extra([ty.into_u32(), expr.into_u32()]); self.set_tag_data_source_loc( i, Tag::GlobalDecl, Data::intern_and_extra_offset(ident, extra_start), loc, ); i } fn set_fn_decl(&mut self, i: Index, proto: Index, body: Index, loc: SourceLocation) { self.set_tag_data_source_loc(i, Tag::FunctionDecl, Data::two_indices(proto, body), loc); } fn push_fn_decl(&mut self, proto: Index, body: Index, loc: SourceLocation) -> Index { let i = self.reserve_node(); self.set_fn_decl(i, proto, body, loc); i } fn push_ret(&mut self, expr: Option, loc: SourceLocation) -> Index { let i = self.reserve_node(); match expr { Some(expr) => { self.set_tag_data_source_loc(i, Tag::ReturnExprStmt, Data::index(expr), loc) } None => self.set_tag_data_source_loc(i, Tag::ReturnStmt, Data::none(), loc), } i } fn push_var_decl( &mut self, is_let: bool, name: intern::Index, ty: Option, assignment: Option, loc: SourceLocation, ) -> Index { let i = self.reserve_node(); let start = self.extra.len() as u32; self.extra.push(name.into_u32()); _ = self.extend_extra(assignment.map(|i| i.into_u32())); _ = self.extend_extra(ty.map(|i| i.into_u32())); let end = self.extra.len() as u32; let tag = match (is_let, assignment.is_some()) { (true, false) => Tag::VarDecl, (true, true) => Tag::VarDeclAssignment, (false, false) => Tag::MutVarDecl, (false, true) => Tag::MutVarDeclAssignment, }; self.set_tag_data_source_loc(i, tag, Data::extra_range(start, end), loc); i } fn push_struct_decl(&mut self, struct_type: intern::Index, loc: SourceLocation) -> Index { let i = self.reserve_node(); self.set_tag_data_source_loc(i, Tag::StructDecl, Data::intern(struct_type), loc); i } fn push_fn_proto( &mut self, ident: intern::Index, return_type: intern::Index, parameter_list: Index, loc: SourceLocation, ) -> Index { let i = self.reserve_node(); let (extra_start, _) = self.extend_extra([return_type.into_u32(), parameter_list.into_u32()]); self.set_tag_data_source_loc( i, Tag::FunctionProto, Data::intern_and_extra_offset(ident, extra_start), loc, ); i } fn set_block>( &mut self, i: Index, statements: I, trailing: Option, loc: SourceLocation, ) { let (extra_start, extra_end) = self.extend_extra_by_indices(statements.into_iter().chain(trailing.into_iter())); if trailing.is_some() { self.set_tag_data_source_loc( i, Tag::BlockTrailingExpr, Data::extra_range(extra_start, extra_end), loc, ); } else { self.set_tag_data_source_loc( i, Tag::Block, Data::extra_range(extra_start, extra_end), loc, ); } } fn push_block>( &mut self, statements: I, trailing: Option, loc: SourceLocation, ) -> Index { let i = self.reserve_node(); self.set_block(i, statements, trailing, loc); i } fn push_parameter_list>( &mut self, parameters: I, loc: SourceLocation, ) -> Index { let i = self.reserve_node(); let (extra_start, extra_end) = self.extend_extra_by_indices(parameters); self.set_tag_data_source_loc( i, Tag::ParameterList, Data::extra_range(extra_start, extra_end), loc, ); i } fn push_argument(&mut self, expr: Index, loc: SourceLocation) -> Index { let i = self.reserve_node(); self.set_tag_data_source_loc(i, Tag::Argument, Data::index(expr), loc); i } fn push_named_argument( &mut self, name: intern::Index, expr: Index, loc: SourceLocation, ) -> Index { let i = self.reserve_node(); self.set_tag_data_source_loc( i, Tag::NamedArgument, Data::index_and_intern(expr, name), loc, ); i } fn push_parameter( &mut self, name: intern::Index, ty: intern::Index, loc: SourceLocation, ) -> Index { let i = self.reserve_node(); self.set_tag_data_source_loc(i, Tag::Parameter, Data::two_interns(name, ty), loc); i } fn push_argument_list>( &mut self, args: I, loc: SourceLocation, ) -> Index { let i = self.reserve_node(); let (extra_start, extra_end) = self.extend_extra_by_indices(args); self.set_tag_data_source_loc( i, Tag::ArgumentList, Data::extra_range(extra_start, extra_end), loc, ); i } fn push_unary(&mut self, tag: Tag, lhs: Index, loc: SourceLocation) -> Index { let i = self.reserve_node(); self.set_tag_data_source_loc(i, tag, Data::index(lhs), loc); i } fn push_binary(&mut self, tag: Tag, lhs: Index, rhs: Index, loc: SourceLocation) -> Index { let i = self.reserve_node(); self.set_tag_data_source_loc(i, tag, Data::two_indices(lhs, rhs), loc); i } fn push_assign(&mut self, lhs: Index, rhs: Index, loc: SourceLocation) -> Index { let i = self.reserve_node(); self.set_tag_data_source_loc(i, Tag::Assign, Data::two_indices(lhs, rhs), loc); i } fn push_cast(&mut self, lhs: Index, ty: intern::Index, loc: SourceLocation) -> Index { let i = self.reserve_node(); self.set_tag_data_source_loc(i, Tag::ExplicitCast, Data::index_and_intern(lhs, ty), loc); i } fn push_if(&mut self, cond: Index, body: Index, loc: SourceLocation) -> Index { let i = self.reserve_node(); self.set_tag_data_source_loc(i, Tag::IfExpr, Data::two_indices(cond, body), loc); i } fn push_if_else( &mut self, cond: Index, body: Index, other: Index, loc: SourceLocation, ) -> Index { let i = self.reserve_node(); let (extra_start, _) = self.extend_extra_by_indices([body, other]); self.set_tag_data_source_loc( i, Tag::IfElseExpr, Data::index_and_extra_offset(cond, extra_start), loc, ); i } fn push_call_expr(&mut self, lhs: Index, args: Index, loc: SourceLocation) -> Index { let i = self.reserve_node(); self.set_tag_data_source_loc(i, Tag::CallExpr, Data::two_indices(lhs, args), loc); i } fn push_decl_ref_unresolved( &mut self, scope: Index, ident: intern::Index, loc: SourceLocation, ) -> Index { let i = self.reserve_node(); self.set_tag_data_source_loc( i, Tag::DeclRefUnresolved, Data::index_and_intern(scope, ident), loc, ); i } fn resolve_decl_ref(&mut self, i: Index, decl: Index) { self.tags[i.index()] = Tag::DeclRef; self.datas[i.index()] = Data::index(decl); } fn push_expr_stmt(&mut self, expr: Index) -> Index { let i = self.reserve_node(); let loc = self.get_loc(expr); self.set_tag_data_source_loc(i, Tag::ExprStmt, Data::index(expr), loc); i } fn push_constant( &mut self, value: intern::Index, ty: intern::Index, loc: SourceLocation, ) -> Index { let i = self.reserve_node(); self.set_tag_data_source_loc(i, Tag::Constant, Data::two_interns(value, ty), loc); i } fn extend_extra_by_indices>(&mut self, indices: I) -> (u32, u32) { self.extend_extra(indices.into_iter().map(|i| i.0.get())) } fn extend_extra>(&mut self, words: I) -> (u32, u32) { let i = self.extra.len() as u32; self.extra.extend(words); (i, self.extra.len() as u32) } fn set_tag_data_source_loc(&mut self, index: Index, tag: Tag, data: Data, loc: SourceLocation) { self.tags[index.index()] = tag; self.datas[index.index()] = data; self.source_locs[index.index()] = loc; } } struct Children(Vec); impl Display for Children { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "[")?; if let Some((last, rest)) = self.0.split_last() { for i in rest { write!(f, "{i}, ")?; } write!(f, "{last}")?; } write!(f, "]") } } impl Ast { fn get_node_children(&self, index: Index) -> Vec { let tag = self.tags[index.index()]; let data = self.datas[index.index()]; match tag { Tag::File => { let (a, b) = data.as_extra_range(); self.extra[a..b].iter().map(|&i| Index::new(i)).collect() } Tag::FunctionProto => { let (_, i) = data.as_intern_and_extra_offset(); vec![Index::new(self.extra[i + 1])] } Tag::FunctionDecl => { let (a, b) = data.as_two_indices(); vec![a, b] } Tag::ParameterList => { let (a, b) = data.as_extra_range(); self.extra[a..b].iter().map(|&i| Index::new(i)).collect() } Tag::Block | Tag::BlockTrailingExpr => { let (a, b) = data.as_extra_range(); self.extra[a..b].iter().map(|&i| Index::new(i)).collect() } Tag::ExprStmt | Tag::ReturnExprStmt => { let a = data.as_index(); vec![a] } Tag::VarDeclAssignment | Tag::MutVarDeclAssignment => { let (a, _) = data.as_extra_range(); let expr = Index::new(self.extra[a + 1]); vec![expr] } Tag::GlobalDecl => { let (_, offset) = data.as_intern_and_extra_offset(); let expr = Index::new(self.extra[offset + 1]); vec![expr] } Tag::CallExpr => { let (a, b) = data.as_two_indices(); vec![a, b] } Tag::ArgumentList => { let (a, b) = data.as_extra_range(); self.extra[a..b].iter().map(|&i| Index::new(i)).collect() } Tag::Argument => { let a = data.as_index(); vec![a] } Tag::NamedArgument => { let (a, _) = data.as_index_intern(); vec![a] } Tag::ExplicitCast => { let (a, _) = data.as_index_intern(); vec![a] } Tag::Deref | Tag::AddressOf | Tag::Not | Tag::Negate => { let a = data.as_index(); vec![a] } Tag::Or | Tag::And | Tag::BitOr | Tag::BitXOr | Tag::BitAnd | Tag::Eq | Tag::NEq | Tag::Lt | Tag::Gt | Tag::Le | Tag::Ge | Tag::Shl | Tag::Shr | Tag::Add | Tag::Sub | Tag::Mul | Tag::Div | Tag::Rem | Tag::Assign | Tag::SubscriptExpr | Tag::IfExpr => { let (a, b) = data.as_two_indices(); vec![a, b] } Tag::IfElseExpr => { let (a, b) = data.as_index_and_extra_offset(); let if_ = Index::new(self.extra[b]); let else_ = Index::new(self.extra[b + 1]); vec![a, if_, else_] } Tag::StructDecl | Tag::DeclRef | Tag::Parameter | Tag::Constant | Tag::ReturnStmt | Tag::VarDecl | Tag::MutVarDecl => vec![], _ => vec![], } } } pub struct AstRenderer<'a> { ast: &'a Ast, #[allow(dead_code)] syms: &'a crate::symbol_table::syms2::Symbols, scopes: Vec, } impl<'a> AstRenderer<'a> { pub fn new(ast: &'a Ast, syms: &'a crate::symbol_table::syms2::Symbols) -> Self { Self { ast, syms, scopes: Vec::new(), } } fn render_node( &mut self, w: &mut W, indent: u32, node: Index, ) -> core::fmt::Result { let tag = self.ast.tags[node.index()]; let loc = self.ast.source_locs[node.index()]; match tag { Tag::File | Tag::FunctionDecl | Tag::Block | Tag::BlockTrailingExpr => { self.scopes.push(node); } _ => {} } let children = Children(self.ast.get_node_children(node)); writeln_indented!(indent, w, "{node} = ({loc}) {tag:?} {}", children)?; for child in children.0 { self.render_node(w, indent + 1, child)?; } match tag { Tag::File | Tag::FunctionDecl | Tag::Block | Tag::BlockTrailingExpr => { self.scopes.pop(); } _ => {} } Ok(()) } fn render(&mut self, w: &mut W) -> core::fmt::Result { for file in self.ast.get_root_file_indices() { self.render_node(w, 0, file)?; } Ok(()) } } pub mod ast_gen { use intern::{PointerFlags, SimpleType}; use itertools::Itertools; use num_bigint::{BigInt, BigUint}; use crate::{ common::from_lo_hi_dwords, comptime, lexer::{Radix, TokenItem, TokenIterator}, symbol_table::syms2::SymbolKind, tokens::PRECEDENCE_MAP, }; use super::*; #[derive(Debug)] pub struct ErrorInfo { error: ParseError, loc: SourceLocation, } #[derive(Debug)] pub struct Parser { pub ast: Ast, pub intern: intern::InternPool, pub syms: crate::symbol_table::syms2::Symbols, scopes: Vec, pub errors: Vec, } type ParseResult = core::result::Result; impl Display for Parser { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { self.display().render(f) } } impl Parser { pub fn new() -> Parser { Self { ast: Ast::new(), intern: intern::InternPool::create(), syms: crate::symbol_table::syms2::Symbols::new(), scopes: Vec::new(), errors: Vec::new(), } } pub fn display(&self) -> AstRenderer<'_> { AstRenderer::new(&self.ast, &self.syms) } pub fn resolve_decl_refs(&mut self) { let mut nodes = self.ast.get_root_file_indices().collect::>(); let mut scopes = Vec::new(); while let Some(node) = nodes.pop() { match self.ast.tags[node.index()] { Tag::File | Tag::FunctionDecl | Tag::Block | Tag::BlockTrailingExpr => { scopes.push(node); } Tag::DeclRefUnresolved => { let (scope, name) = self.ast.datas[node.index()].as_index_intern(); // look in my_scope if let Some(decl) = self.syms .find_symbol(scope, name, self.ast.source_locs[node.index()]) { self.ast.resolve_decl_ref(node, decl) }; } _ => {} } nodes.extend(self.ast.get_node_children(node)); match self.ast.tags[node.index()] { Tag::File | Tag::FunctionDecl | Tag::Block | Tag::BlockTrailingExpr => { scopes.pop(); } _ => {} } } } fn current_scope(&self) -> Index { self.scopes.last().cloned().unwrap() } fn parse_ident(&mut self, tokens: &mut TokenIterator) -> Result { let ident = tokens.expect_token(Token::Ident).map_err(|_| ErrorInfo { error: ParseError::ExpectedIdent, loc: tokens.current_source_location(), })?; let name = self.intern.get_or_insert(intern::Key::String { str: ident.lexeme(), }); Ok(name) } fn parse_pointer(&mut self, tokens: &mut TokenIterator) -> ParseResult { tokens.eat_token(Token::Star).ok_or(ErrorInfo { error: ParseError::ExpectedToken(Token::Star), loc: tokens.current_source_location(), })?; let &[cnst, vol, noalias] = &tokens.eat_all_zero_or_once(&[Token::Const, Token::Volatile, Token::Noalias])[..3] else { unreachable!() }; let pointee = self.parse_type(tokens)?; Ok(self .intern .get_pointer_type(pointee, Some(PointerFlags::new(cnst, vol, noalias)))) } /// [LENGTH]const? volatile? noalias? TYPE fn parse_array_type(&mut self, tokens: &mut TokenIterator) -> ParseResult { let start = tokens.eat_token(Token::OpenSquareBracket).unwrap(); let length = match self.parse_expr(tokens) { Ok(i) => { _ = tokens .eat_token(Token::CloseSquareBracket) .ok_or(ErrorInfo { error: ParseError::ExpectedTypeName, loc: tokens.current_source_location(), })?; i } Err(err) => { tokens.advance_past_end_of_bracketed().ok_or(ErrorInfo { error: ParseError::ExpectedToken(Token::CloseSquareBracket), loc: tokens.current_source_location(), })?; self.push_error(err.error, err.loc) } }; let &[cnst, vol, noalias] = &tokens.eat_all_zero_or_once(&[Token::Const, Token::Volatile, Token::Noalias])[..3] else { unreachable!() }; let pointee = self.parse_type(tokens)?; Ok(self.intern.get_array_type( pointee, Some(PointerFlags::new(cnst, vol, noalias)), 0, // length, TODO: evaluate this tree branch for an u32 )) } fn parse_simple_type(&mut self, token: Token) -> Option { match token { Token::Void => Some(self.intern.get_assume_present(intern::Key::SimpleType { ty: SimpleType::Void, })), Token::Bool => Some(self.intern.get_assume_present(intern::Key::SimpleType { ty: SimpleType::Bool, })), Token::F32 => Some(self.intern.get_assume_present(intern::Key::SimpleType { ty: SimpleType::F32, })), Token::F64 => Some(self.intern.get_assume_present(intern::Key::SimpleType { ty: SimpleType::F64, })), Token::USize => Some(self.intern.get_assume_present(intern::Key::SimpleType { ty: SimpleType::USize, })), Token::ISize => Some(self.intern.get_assume_present(intern::Key::SimpleType { ty: SimpleType::ISize, })), _ => None, } } fn try_parse_integral_type( &mut self, typename: &str, ) -> Result, ParseError> { let mut iter = typename.chars().peekable(); let signed = match iter.next() { Some('u') => false, Some('i') => true, _ => { return Ok(None); } }; // need 1 digit for an integral type if iter.peek().map(|&c| crate::common::is_digit(c)) != Some(true) { return Ok(None); } // need no nondigits after digits if iter .clone() .skip_while(|&c| crate::common::is_digit(c)) .next() .is_some() { return Ok(None); } let mut bits = 0u16; loop { let Some(digit) = iter.next().map(|c| c as u8 - b'0') else { break; }; match bits .checked_mul(10) .and_then(|bits| bits.checked_add(digit as u16)) { Some(val) => { bits = val; } None => { // this IS an integral type, but it is bigger than u/i65535 return Err(ParseError::IntegralTypeTooWide); } } } Ok(Some(self.intern.get_int_type(signed, bits))) } fn try_parse_integral_constant( &mut self, item: &TokenItem, ) -> (intern::Index, intern::Index) { let radix = Radix::from_token(item.token()).unwrap(); let mut chars = item.lexeme().char_indices(); match radix { Radix::Dec => {} _ => { _ = chars.advance_by(2); } } let digits = chars .take_while_ref(|&(_, c)| radix.is_digit()(c) || c == '_') .filter(|&(_, c)| c != '_') .map(|(_, c)| c) .collect::>(); let value = comptime::bigint::parse_bigint(digits.into_iter(), radix); let ty = match chars.clone().next() { Some((i, 'u')) | Some((i, 'i')) => self .try_parse_integral_type(&item.lexeme()[i..]) .expect("invalid integral type??"), _ => None, }; let interned = match value.len() { ..1 => { let bits = value.get(0).cloned().unwrap_or(0); self.intern.get_or_insert(intern::Key::UIntSmall { bits }) } ..2 => { let lo = value.get(0).cloned().unwrap_or(0); let hi = value.get(1).cloned().unwrap_or(0); let bits = from_lo_hi_dwords(lo, hi); self.intern.get_or_insert(intern::Key::UInt64 { bits }) } _ => { let bigint = BigInt::from_biguint(num_bigint::Sign::Plus, BigUint::new(value)); self.intern .get_or_insert(intern::Key::PositiveInt { bigint }) } }; (interned, ty.unwrap_or(self.intern.get_comptime_int_type())) } fn parse_floating_constant(&mut self, item: &TokenItem) -> (intern::Index, intern::Index) { let lexeme = item.lexeme(); let lexeme = lexeme .strip_suffix("f32") .map(|l| (l, self.intern.get_f32_type())) .unwrap_or( lexeme .strip_suffix("f64") .map(|l| (l, self.intern.get_f64_type())) .unwrap_or((lexeme, self.intern.get_f64_type())), ); let bits = if lexeme.1 == self.intern.get_f32_type() { self.intern.get_or_insert(intern::Key::F32 { bits: lexeme.0.parse::().unwrap(), }) } else { self.intern.get_or_insert(intern::Key::F64 { bits: lexeme.0.parse::().unwrap(), }) }; (bits, lexeme.1) } /// TYPE <- /// * TYPE /// IDENTIFIER /// SIMPLE_TYPE /// [ TYPE ; CONSTANT_EXPR ] /// INTEGRAL_TYPE // u[0..65535] | i[0..65535] fn parse_type(&mut self, tokens: &mut TokenIterator) -> ParseResult { match tokens .peek_token() .ok_or(ErrorInfo { error: ParseError::ExpectedTypeName, loc: tokens.current_source_location(), })? .token() { Token::Star => self.parse_pointer(tokens), Token::OpenSquareBracket => self.parse_array_type(tokens), Token::Ident => { let token = tokens.next().unwrap(); match self .try_parse_integral_type(token.lexeme()) .map_err(|error| ErrorInfo { error, loc: token.source_location(), })? { Some(int) => Ok(int), None => { let name = self.intern.get_or_insert(intern::Key::String { str: token.lexeme(), }); Ok(name) } } } token => { let ty = self.parse_simple_type(token).ok_or(ErrorInfo { error: ParseError::ExpectedTypeName, loc: tokens.current_source_location(), })?; _ = tokens.next(); Ok(ty) } } } /// GLOBAL_DECL <- /// const IDENTIFIER: TYPENAME = EXPR; fn parse_const_decl(&mut self, tokens: &mut TokenIterator) -> ParseResult { let err = 'blk: { let loc = tokens.current_source_location(); let Some(_) = tokens.eat_token(Token::Const) else { break 'blk ErrorInfo { error: ParseError::ExpectedToken(Token::Const), loc, }; }; let ident = match self.parse_ident(tokens) { Ok(i) => i, Err(err) => { break 'blk err; } }; let Some(_) = tokens.eat_token(Token::Colon) else { return Err(ErrorInfo { error: ParseError::ExpectedToken(Token::Colon), loc, }); }; let typename = match self.parse_type(tokens) { Ok(i) => i, Err(err) => { break 'blk err; } }; let Some(_) = tokens.eat_token(Token::Equal) else { break 'blk ErrorInfo { error: ParseError::ExpectedToken(Token::Equal), loc: tokens.current_source_location(), }; }; let expr = match self.parse_expr(tokens) { Ok(i) => i, Err(err) => { break 'blk err; } }; let Some(_) = tokens.eat_token(Token::Semi) else { break 'blk ErrorInfo { error: ParseError::ExpectedToken(Token::Semi), loc: tokens.current_source_location(), }; }; let decl = self.ast.push_global_decl(ident, typename, expr, loc); self.syms .insert_symbol(self.current_scope(), ident, SymbolKind::Const, decl); return Ok(decl); }; tokens.advance_past_semi().ok_or(ErrorInfo { error: ParseError::ExpectedToken(Token::Semi), loc: tokens.current_source_location(), })?; Ok(self.ast.push_error(err.error, err.loc)) } /// FUNCTION_PROTO <- /// fn IDENTIFIER () /// fn IDENTIFIER () -> TYPENAME /// fn IDENTIFIER ( PARAMETER_LIST ,? ) /// fn IDENTIFIER ( PARAMETER_LIST ,? ) -> TYPENAME fn parse_fn_proto(&mut self, tokens: &mut TokenIterator) -> ParseResult { let loc = tokens.current_source_location(); let _ = tokens.eat_token(Token::Fn).ok_or(ErrorInfo { error: ParseError::ExpectedToken(Token::Fn), loc, })?; let ident = self.parse_ident(tokens)?; let parameters = self.parse_parenthesised(tokens, |this, tokens| { if tokens.is_next_token(Token::CloseParens) { Ok(this.ast.push_parameter_list([], loc)) } else { this.parse_parameter_list(tokens) } })?; let return_type = if let Some(_) = tokens.eat_token(Token::MinusGreater) { self.parse_type(tokens)? } else { self.intern.get_void_type() }; return Ok(self.ast.push_fn_proto(ident, return_type, parameters, loc)); } fn parse_fn_inner(&mut self, tokens: &mut TokenIterator) -> ParseResult { let loc = tokens.current_source_location(); let func = self.ast.reserve_node(); self.push_scope(func, intern::Index::invalid()); let proto = self.parse_fn_proto(tokens).map_err(|e| { self.pop_scope(); e })?; let body = self.parse_block(tokens).map_err(|e| { self.pop_scope(); e })?; self.pop_scope(); self.ast.set_fn_decl(func, proto, body, loc); Ok(func) } /// FUNCTION_DECL <- /// FUNCTION_PROTO BLOCK fn parse_fn_decl(&mut self, tokens: &mut TokenIterator) -> Index { match self.parse_fn_inner(tokens) { Ok(i) => i, Err(err) => { self.find_next_fn_or_const(tokens); self.push_error(err.error, err.loc) } } } /// RETURN_STATEMENT <- /// return EXPRESSION? ; fn parse_return_stmt(&mut self, tokens: &mut TokenIterator) -> ParseResult { // SAFETY: function invariance let ret = tokens.next().unwrap(); let loc = ret.source_location(); let expr = if tokens.eat_token(Token::Semi).is_some() { self.ast.push_ret(None, loc) } else { match self.parse_expr(tokens) { Ok(i) => { tokens.eat_token(Token::Semi).ok_or(ErrorInfo { error: ParseError::ExpectedToken(Token::Semi), loc: tokens.current_source_location(), })?; self.ast.push_ret(Some(i), loc) } Err(err) => { tokens.advance_past_semi().ok_or(ErrorInfo { error: ParseError::ExpectedToken(Token::Semi), loc: tokens.current_source_location(), })?; self.push_error(err.error, err.loc) } } }; Ok(expr) } /// VAR_DECL <- /// (let | var) IDENTIFIER (: TYPENAME)? ; /// (let | var) IDENTIFIER (: TYPENAME)? = EXPRESSION ; fn parse_var_decl(&mut self, tokens: &mut TokenIterator) -> ParseResult { match self.parse_var_decl_inner(tokens) { Ok(i) => { _ = tokens.eat_token(Token::Semi).ok_or(ErrorInfo { error: ParseError::ExpectedToken(Token::Semi), loc: tokens.current_source_location(), })?; Ok(i) } Err(err) => { tokens.advance_past_semi().ok_or(ErrorInfo { error: ParseError::ExpectedToken(Token::Semi), loc: tokens.current_source_location(), })?; Ok(self.push_error(err.error, err.loc)) } } } fn parse_var_decl_inner(&mut self, tokens: &mut TokenIterator) -> ParseResult { // SAFETY: function invariance let let_or_var = tokens.next().unwrap(); let loc = let_or_var.source_location(); let is_let = let_or_var.token() == Token::Let; let name = self.parse_ident(tokens)?; let name_loc = let_or_var.source_location(); let ty = if tokens.eat_token(Token::Colon).is_some() { Some(self.parse_type(tokens)?) } else { None }; let assignment = if tokens.eat_token(Token::Equal).is_some() { Some(self.parse_expr(tokens)?) } else { None }; let decl = self.ast.push_var_decl(is_let, name, ty, assignment, loc); self.syms.insert_symbol( self.current_scope(), name, SymbolKind::Local(name_loc), decl, ); Ok(decl) } fn parse_block_inner( &mut self, block: Index, tokens: &mut TokenIterator, ) -> ParseResult { let loc = tokens.current_source_location(); let mut statements = Vec::new(); let trailing = loop { let next = tokens.peek_token().ok_or(ErrorInfo { error: ParseError::UnexpectedEndOfTokens, loc: tokens.current_source_location(), })?; if let Some(decl) = self.parse_constant_decls(tokens)? { statements.push(decl); } else { match next.token() { Token::CloseBrace => { break None; } Token::Return => { statements.push(self.parse_return_stmt(tokens)?); } Token::Var | Token::Let => { statements.push(self.parse_var_decl(tokens)?); } _ => { if self.is_statement(tokens) { // expr -> statements let expr = self .parse_with_trailing_semi(tokens, |this, tokens| { this.parse_expr(tokens) })?; statements.push(expr); } else { // expr -> trailing let expr = self.parse_expr(tokens)?; if !tokens.is_next_token(Token::CloseBrace) { statements.push(self.push_error( ParseError::ExpectedEndOfBlock, tokens.current_source_location(), )); } else { break Some(expr); } } } } } }; self.ast.set_block(block, statements, trailing, loc); Ok(block) } /// BLOCK <- /// { STATEMENT* EXPRESSION? } fn parse_block(&mut self, tokens: &mut TokenIterator) -> ParseResult { let block = self.parse_braced(tokens, |this, tokens| { let block = this.ast.reserve_node(); this.push_scope(block, intern::Index::invalid()); let block_result = this.parse_block_inner(block, tokens); this.pop_scope(); block_result })?; Ok(block) } /// PARAMETER_LIST <- /// PARAMETER /// PARAMETER_LIST , ARGUMENT fn parse_parameter_list(&mut self, tokens: &mut TokenIterator) -> ParseResult { let loc = tokens.current_source_location(); let mut params = Vec::new(); loop { params.push(self.parse_parameter(tokens)?); if !tokens.is_next_token(Token::Comma) { break; } if tokens.is_next_token2(Token::CloseParens) { break; } // skip comma _ = tokens.next(); } return Ok(self.ast.push_parameter_list(params, loc)); } /// PARAMETER <- /// IDENT : TYPENAME fn parse_parameter(&mut self, tokens: &mut TokenIterator) -> ParseResult { let loc = tokens.current_source_location(); let name = self.parse_ident(tokens)?; let Some(_) = tokens.eat_token(Token::Colon) else { return Err(ErrorInfo { error: ParseError::ExpectedToken(Token::Colon), loc, }); }; let ty = self.parse_type(tokens)?; let param = self.ast.push_parameter(name, ty, loc); self.syms .insert_symbol(self.current_scope(), name, SymbolKind::Local(loc), param); return Ok(param); } /// ARGUMENT <- /// IDENT : EXPR /// EXPR fn parse_argument(&mut self, tokens: &mut TokenIterator) -> ParseResult { let loc = tokens.current_source_location(); let name = if tokens.is_next_token2(Token::Colon) && tokens.is_next_token(Token::Ident) { let name = self.parse_ident(tokens)?; // we checked `is_next_token2` _ = tokens.eat_token(Token::Colon).unwrap(); Some(name) } else { None }; let expr = self.parse_expr(tokens)?; let i = match name { Some(name) => self.ast.push_named_argument(name, expr, loc), None => self.ast.push_argument(expr, loc), }; Ok(i) } /// ARGUMENT_LIST <- /// ARGUMENT /// ARGUMENT_LIST , ARGUMENT fn parse_argument_list(&mut self, tokens: &mut TokenIterator) -> ParseResult { let loc = tokens.current_source_location(); let mut args = Vec::new(); loop { args.push(self.parse_argument(tokens)?); if !tokens.is_next_token(Token::Comma) { break; } if tokens.is_next_token2(Token::CloseParens) { break; } // skip comma _ = tokens.next(); } return Ok(self.ast.push_argument_list(args, loc)); } /// PRIMARY_EXPR <- /// IDENTIFIER /// INTEGER_CONSTANT /// FLOATING_CONSTANT /// ( EXPRESSION ) /// BLOCK fn parse_primary_expr(&mut self, tokens: &mut TokenIterator) -> ParseResult { let loc = tokens.current_source_location(); let Some(next) = tokens.peek_token() else { return Err(ErrorInfo { error: ParseError::ExpectedPrimaryExpression, loc, }); }; match next.token() { Token::IntegerBinConstant | Token::IntegerHexConstant | Token::IntegerOctConstant | Token::IntegerConstant => { _ = tokens.next(); let (value, ty) = self.try_parse_integral_constant(&next); return Ok(self.ast.push_constant(value, ty, loc)); } Token::FloatingConstant | Token::FloatingExpConstant | Token::DotFloatingConstant | Token::DotFloatingExpConstant => { _ = tokens.next(); let (value, ty) = self.parse_floating_constant(&next); return Ok(self.ast.push_constant(value, ty, loc)); } Token::OpenParens => { let expr = self.parse_parenthesised(tokens, |this, tokens| this.parse_expr(tokens))?; return Ok(expr); } Token::OpenBrace => { return self.parse_block(tokens); } Token::Ident => { _ = tokens.next(); let ident = next.lexeme(); let ident = self .intern .get_or_insert(intern::Key::String { str: ident }); return Ok(self .ast .push_decl_ref_unresolved(self.current_scope(), ident, loc)); } // TODO: eventually handle paths _ => { return Err(ErrorInfo { error: ParseError::ExpectedPrimaryExpression, loc, }); } } } /// POSTFIX_EXPR <- /// PRIMARY_EXPR /// PRIMARY_EXPR ( ) /// PRIMARY_EXPR ( ARGUMENT_LIST ) /// PRIMARY_EXPR [ EXPR ] fn parse_postfix_expr(&mut self, tokens: &mut TokenIterator) -> ParseResult { let lhs = self.parse_primary_expr(tokens)?; if let Some(next) = tokens.peek_token() { let loc = next.source_location(); match next.token() { Token::OpenParens => { let arguments = self.parse_parenthesised(tokens, |this, tokens| { if tokens.is_next_token(Token::CloseParens) { Ok(this.ast.push_argument_list([], loc)) } else { this.parse_argument_list(tokens) } })?; return Ok(self.ast.push_call_expr(lhs, arguments, loc)); } Token::OpenSquareBracket => { let subscript = self.parse_bracketed(tokens, |this, tokens| this.parse_expr(tokens))?; return Ok(self .ast .push_binary(Tag::SubscriptExpr, lhs, subscript, loc)); } _ => {} } } Ok(lhs) } fn push_error(&mut self, error: ParseError, loc: SourceLocation) -> Index { self.errors.push(ErrorInfo { error, loc }); self.ast.push_error(error, loc) } /// PREFIX_EXPR <- /// POSTFIX_EXPR /// ! POSTFIX_EXPR /// - POSTFIX_EXPR /// & POSTFIX_EXPR /// * POSTFIX_EXPR fn parse_prefix_expr(&mut self, tokens: &mut TokenIterator) -> ParseResult { let next = tokens.peek_token().ok_or(ErrorInfo { error: ParseError::ExpectedPrefixExpression, loc: tokens.current_source_location(), })?; let loc = next.source_location(); let expr = match next.token() { Token::Bang => { _ = tokens.next(); let lhs = self.parse_postfix_expr(tokens)?; self.ast.push_unary(Tag::Not, lhs, loc) } Token::Minus => { _ = tokens.next(); let lhs = self.parse_postfix_expr(tokens)?; self.ast.push_unary(Tag::Negate, lhs, loc) } Token::Ampersand => { _ = tokens.next(); let lhs = self.parse_postfix_expr(tokens)?; self.ast.push_unary(Tag::AddressOf, lhs, loc) } Token::Star => { _ = tokens.next(); let lhs = self.parse_postfix_expr(tokens)?; self.ast.push_unary(Tag::Deref, lhs, loc) } _ => self.parse_postfix_expr(tokens)?, }; Ok(expr) } /// AS_EXPR <- /// PREFIX_EXPR /// PREFIX_EXPR as TYPENAME fn parse_as_expr(&mut self, tokens: &mut TokenIterator) -> ParseResult { let loc = tokens.current_source_location(); let expr = self.parse_prefix_expr(tokens)?; if tokens.eat_token(Token::As).is_some() { let typename = self.parse_type(tokens)?; return Ok(self.ast.push_cast(expr, typename, loc)); } else { return Ok(expr); } } /// BINARY_EXPR <- /// AS_EXPR /// AS_EXPR * EXPRESSION /// AS_EXPR / EXPRESSION /// AS_EXPR % EXPRESSION /// AS_EXPR + EXPRESSION /// AS_EXPR - EXPRESSION /// AS_EXPR << EXPRESSION /// AS_EXPR >> EXPRESSION /// AS_EXPR < EXPRESSION /// AS_EXPR > EXPRESSION /// AS_EXPR <= EXPRESSION /// AS_EXPR >= EXPRESSION /// AS_EXPR == EXPRESSION /// AS_EXPR != EXPRESSION /// AS_EXPR & EXPRESSION /// AS_EXPR ^ EXPRESSION /// AS_EXPR | EXPRESSION /// AS_EXPR && EXPRESSION /// AS_EXPR || EXPRESSION fn parse_binary_expr( &mut self, tokens: &mut TokenIterator, precedence: u32, ) -> ParseResult { let mut node = self.parse_as_expr(tokens)?; loop { let Some(tok) = tokens.peek_token() else { break; }; let loc = tok.source_location(); let Some(prec) = PRECEDENCE_MAP.get(&tok.token()).cloned() else { break; }; if prec < precedence { break; } // SAFETY: we peeked `tok` let tok = tokens.next().unwrap(); let lhs = node; let rhs = self.parse_binary_expr(tokens, prec + 1)?; let tag = match tok.token() { Token::PipePipe => Tag::Or, Token::AmpersandAmpersand => Tag::And, Token::Pipe => Tag::BitOr, Token::Caret => Tag::BitXOr, Token::Ampersand => Tag::BitAnd, Token::BangEqual => Tag::NEq, Token::EqualEqual => Tag::Eq, Token::LessEqual => Tag::Le, Token::GreaterEqual => Tag::Ge, Token::Less => Tag::Lt, Token::Greater => Tag::Gt, Token::GreaterGreater => Tag::Shr, Token::LessLess => Tag::Shl, Token::Plus => Tag::Add, Token::Minus => Tag::Sub, Token::Percent => Tag::Rem, Token::Star => Tag::Mul, Token::Slash => Tag::Div, _ => unreachable!(), }; node = self.ast.push_binary(tag, lhs, rhs, loc); } Ok(node) } /// ASSIGNMENT_EXPR <- /// BINARY_EXPRESSION /// BINARY_EXPRESSION ASSIGNMENT_OP EXPRESSION /// ASSIGNMENT_OP <- /// = += -= *= /= %= ... fn parse_assignment_expr(&mut self, tokens: &mut TokenIterator) -> ParseResult { let lhs = self.parse_binary_expr(tokens, 0)?; if tokens .peek_token() .map(|itm| itm.token().is_assignment_op()) == Some(true) { // SAFETY: we peeked let op = tokens.next().unwrap(); let loc = op.source_location(); let rhs = self.parse_expr(tokens)?; let rhs = if op.token() == Token::Equal { rhs } else { let tag = match op.token() { Token::PlusEqual => Tag::Add, Token::MinusEqual => Tag::Sub, Token::StarEqual => Tag::Mul, Token::SlashEqual => Tag::Sub, Token::PercentEqual => Tag::Rem, Token::PipeEqual => Tag::BitOr, Token::CaretEqual => Tag::BitXOr, Token::AmpersandEqual => Tag::BitAnd, Token::LessLessEqual => Tag::Shl, Token::GreaterGreaterEqual => Tag::Shr, _ => { unreachable!() } }; self.ast.push_binary(tag, lhs, rhs, loc) }; Ok(self.ast.push_assign(lhs, rhs, loc)) } else { Ok(lhs) } } /// ELSE_EXPR <- /// 'else' (IF_EXPR | EXPR_OR_STATEMENT_OR_BLOCK) fn parse_else_expr(&mut self, tokens: &mut TokenIterator) -> ParseResult { // SAFETY: function invariance let _else_ = tokens.eat_token(Token::Else).unwrap(); if tokens.is_next_token(Token::If) { self.parse_if_expr(tokens) } else { self.parse_expr_or_block_as_block(tokens) } } /// IF_EXPR <- /// 'if' ( EXPR ) EXPR_OR_STATEMENT_OR_BLOCK ELSE_EXPR? fn parse_if_expr(&mut self, tokens: &mut TokenIterator) -> ParseResult { // SAFETY: function invariance let iff = tokens.eat_token(Token::If).unwrap(); let loc = iff.source_location(); let cond = self.parse_parenthesised(tokens, |this, tokens| this.parse_expr(tokens))?; let body = self.parse_expr_or_block_as_block(tokens)?; if tokens.is_next_token(Token::Else) { let else_expr = self.parse_else_expr(tokens)?; Ok(self.ast.push_if_else(cond, body, else_expr, loc)) } else { Ok(self.ast.push_if(cond, body, loc)) } } fn parse_expr_or_block_as_block( &mut self, tokens: &mut TokenIterator, ) -> ParseResult { let Some(next) = tokens.peek_token() else { return Err(ErrorInfo { error: ParseError::ExpectedExpression, loc: tokens.current_source_location(), }); }; match next.token() { Token::OpenBrace => self.parse_block(tokens), _ => { let loc = tokens.current_source_location(); let expr = self.parse_expr(tokens)?; Ok(self.ast.push_block([], Some(expr), loc)) } } } fn parse_expr(&mut self, tokens: &mut TokenIterator) -> ParseResult { let loc = tokens.current_source_location(); let Some(next) = tokens.peek_token() else { return Err(ErrorInfo { error: ParseError::ExpectedExpression, loc, }); }; match next.token() { Token::If => self.parse_if_expr(tokens), _ => self.parse_assignment_expr(tokens), } } /// TYPE_DECL <- /// type IDENTIFIER = TYPE_UNION ; /// type IDENTIFIER = '(' (TYPE,)* ')' ; /// type IDENTIFIER = extern? union { (IDENTIFIER: TYPE,)* } /// type IDENTIFIER = extern? packed? enum { (IDENTIFIER (= EXPRESSION),)* } /// type IDENTIFIER = extern? packed? struct { (IDENTIFIER: TYPE,)* } fn parse_type_decl(&mut self, tokens: &mut TokenIterator) -> ParseResult { _ = tokens.eat_token(Token::Type).ok_or(ErrorInfo { error: ParseError::ExpectedToken(Token::Type), loc: tokens.current_source_location(), }); let name = self.parse_ident(tokens)?; let loc = tokens.current_source_location(); _ = tokens.eat_token(Token::Equal).ok_or(ErrorInfo { error: ParseError::ExpectedToken(Token::Equal), loc: tokens.current_source_location(), }); let (has_attributes, c_like, packed) = { let vec = tokens.eat_all_zero_or_once(&[Token::Extern, Token::Packed]); (vec[0] || vec[1], vec[0], vec[1]) }; let Some(next) = tokens.peek_token() else { return Err(ErrorInfo { error: ParseError::ExpectedTypeDeclaration, loc: tokens.current_source_location(), }); }; match next.token() { Token::Struct => self.parse_struct_decl(tokens, name, c_like, packed, loc), Token::Union => { unimplemented!() } Token::Enum => { unimplemented!() } _ => { if has_attributes { return Err(ErrorInfo { error: ParseError::UnexpectedTypeAttributes, loc: tokens.current_source_location(), }); } match next.token() { Token::OpenParens => { // tuple unimplemented!() } Token::Ident => { // sumtype unimplemented!() } _ => { return Err(ErrorInfo { error: ParseError::ExpectedTypeDeclaration, loc: tokens.current_source_location(), }); } } } } } /// SUMTYPE_DECL <- /// type IDENTIFIER = TYPE_UNION /// TYPE_UNION <- /// TYPE (| TYPE_UNION)? /// IDENTIFIER: TYPE (| TYPE_UNION)? fn parse_sumtype_decl(&mut self, tokens: &mut TokenIterator) -> ParseResult { todo!() } /// TUPLE_DECL <- /// type IDENTIFIER = (TYPE,* ) fn parse_tuple_decl(&mut self, tokens: &mut TokenIterator) -> ParseResult { todo!() } /// UNION_DECL <- /// type IDENTIFIER = union { IDENTIFIER: TYPE,* } fn parse_union_decl(&mut self, tokens: &mut TokenIterator) -> ParseResult { todo!() } /// ENUM_DECL <- /// type IDENTIFIER = packed? enum { IDENTIFIER (= EXPRESSION),* } fn parse_enum_decl(&mut self, tokens: &mut TokenIterator) -> ParseResult { todo!() } /// STRUCT_DECL <- /// type IDENTIFIER = extern? packed? struct { STRUCT_FIELD,* } fn parse_struct_decl( &mut self, tokens: &mut TokenIterator, name: intern::Index, c_like: bool, packed: bool, loc: SourceLocation, ) -> ParseResult { // SAFETY: function invariance _ = tokens.eat_token(Token::Struct).ok_or(ErrorInfo { error: ParseError::ExpectedToken(Token::Struct), loc: tokens.current_source_location(), })?; let decl = self.parse_braced(tokens, |this, tokens| { this.parse_struct_fields(tokens).map(|fields| { _ = tokens.eat_token(Token::Comma); let struct_type = this.intern.get_struct_type(name, packed, c_like, fields); this.ast.push_struct_decl(struct_type, loc) }) })?; Ok(decl) } fn parse_with_trailing_semi( &mut self, tokens: &mut TokenIterator, parse: F, ) -> ParseResult where F: FnOnce(&mut Self, &mut TokenIterator) -> ParseResult, { match parse(self, tokens) { Ok(i) => { _ = tokens.eat_token(Token::Semi).ok_or(ErrorInfo { error: ParseError::ExpectedToken(Token::Semi), loc: tokens.current_source_location(), })?; Ok(i) } Err(err) => { tokens.advance_past_semi().ok_or(ErrorInfo { error: ParseError::ExpectedToken(Token::Semi), loc: tokens.current_source_location(), })?; Ok(self.push_error(err.error, err.loc)) } } } fn parse_inner( &mut self, tokens: &mut TokenIterator, open: Token, close: Token, parse: F, on_err: E, ) -> ParseResult where F: FnOnce(&mut Self, &mut TokenIterator) -> ParseResult, E: FnOnce(&mut Self, &mut TokenIterator, ErrorInfo, TokenItem) -> ParseResult, { let Some(start) = tokens.eat_token(open) else { return Err(ErrorInfo { error: ParseError::ExpectedToken(open), loc: tokens.current_source_location(), }); }; match parse(self, tokens) { Ok(i) => { _ = tokens.eat_token(close).ok_or(ErrorInfo { error: match open { Token::OpenBrace => ParseError::UnmatchedBrace(start.token_pos().start), Token::OpenParens => { ParseError::UnmatchedParens(start.token_pos().start) } Token::OpenSquareBracket => { ParseError::UnmatchedSquareBracket(start.token_pos().start) } _ => ParseError::UnmatchedDelimiter(start.token_pos().start), }, loc: tokens.current_source_location(), })?; Ok(i) } Err(e) => on_err(self, tokens, e, start), } } fn parse_inner2( &mut self, tokens: &mut TokenIterator, open: Token, close: Token, parse: F, ) -> ParseResult where F: FnOnce(&mut Self, &mut TokenIterator) -> ParseResult, { self.parse_inner(tokens, open, close, parse, |this, tokens, err, start| { match close { Token::CloseBrace => { tokens.advance_past_end_of_braced().ok_or(ErrorInfo { error: ParseError::UnmatchedBrace(start.token_pos().start), loc: tokens.current_source_location(), })?; } Token::CloseParens => { tokens.advance_past_end_of_parens().ok_or(ErrorInfo { error: ParseError::UnmatchedParens(start.token_pos().start), loc: tokens.current_source_location(), })?; } Token::CloseSquareBracket => { tokens.advance_past_end_of_bracketed().ok_or(ErrorInfo { error: ParseError::UnmatchedSquareBracket(start.token_pos().start), loc: tokens.current_source_location(), })?; } Token::Semi => { tokens.advance_past_semi().ok_or(ErrorInfo { error: ParseError::ExpectedToken(Token::Semi), loc: tokens.current_source_location(), })?; } _ => unimplemented!(), } Ok(this.push_error(err.error, err.loc)) }) } fn parse_bracketed(&mut self, tokens: &mut TokenIterator, parse: F) -> ParseResult where F: FnOnce(&mut Self, &mut TokenIterator) -> ParseResult, { self.parse_inner2( tokens, Token::OpenSquareBracket, Token::CloseSquareBracket, parse, ) } fn parse_braced(&mut self, tokens: &mut TokenIterator, parse: F) -> ParseResult where F: FnOnce(&mut Self, &mut TokenIterator) -> ParseResult, { self.parse_inner2(tokens, Token::OpenBrace, Token::CloseBrace, parse) } fn parse_parenthesised( &mut self, tokens: &mut TokenIterator, parse: F, ) -> ParseResult where F: FnOnce(&mut Self, &mut TokenIterator) -> ParseResult, { self.parse_inner2(tokens, Token::OpenParens, Token::CloseParens, parse) } fn parse_struct_fields( &mut self, tokens: &mut TokenIterator, ) -> ParseResult> { let mut fields = Vec::new(); loop { fields.push(self.parse_struct_field(tokens)?); if !tokens.is_next_token(Token::Comma) { break; } if tokens.is_next_token2(Token::CloseBrace) { break; } // skip comma _ = tokens.next(); } Ok(fields) } /// STRUCT_FIELD <- /// IDENTIFIER: TYPE fn parse_struct_field( &mut self, tokens: &mut TokenIterator, ) -> ParseResult<(intern::Index, intern::Index)> { let name = self.parse_ident(tokens)?; let Some(_) = tokens.eat_token(Token::Colon) else { return Err(ErrorInfo { error: ParseError::ExpectedToken(Token::Colon), loc: tokens.current_source_location(), }); }; let ty = self.parse_type(tokens)?; return Ok((name, ty)); } /// CONSTANT_DECL <- /// FUNCTION_DECL /// GLOBAL_DECL /// STRUCT_DECL fn parse_constant_decls( &mut self, tokens: &mut TokenIterator, ) -> ParseResult> { let next = tokens.peek_token().ok_or(ErrorInfo { error: ParseError::UnexpectedEndOfTokens, loc: tokens.current_source_location(), })?; match next.token() { Token::Fn => Ok(Some(self.parse_fn_decl(tokens))), Token::Const => self.parse_const_decl(tokens).map(|i| Some(i)), Token::Type => self.parse_type_decl(tokens).map(|i| Some(i)), _ => Ok(None), } } /// FILE <- /// (FUNCTION_DECL | GLOBAL_DECL)* fn parse_file(&mut self, tokens: &mut TokenIterator) -> Index { let start = tokens.current_source_location(); let mut decls = Vec::new(); let file = self.ast.reserve_node(); self.push_scope(file, intern::Index::invalid()); while let Some(next) = tokens.peek_token() { let loc = next.source_location(); let decl = match self.parse_constant_decls(tokens).and_then(|i| match i { Some(i) => Ok(i), None => { let error = ParseError::UnexpectedTokenAtFileScope; let node = self.push_error(error, loc); self.find_next_fn_or_const(tokens); Ok(node) } }) { Ok(i) => i, Err(err) => self.push_error(err.error, err.loc), }; decls.push(decl); } self.pop_scope(); self.ast.set_file(file, decls, start); file } /// FILE <- /// (FUNCTION_DECL | GLOBAL_DECL)* pub fn parse(&mut self, mut tokens: TokenIterator) { let file = self.parse_file(&mut tokens); self.ast.set_root([file]); self.resolve_decl_refs(); } fn push_scope(&mut self, ast: Index, name: intern::Index) { let parent = self.scopes.last().cloned(); self.scopes.push(ast); if let Some(parent) = parent { self.syms.insert_symbol( ast, intern::Index::invalid(), SymbolKind::ParentScope, parent, ); } self.syms.insert_scope(name, ast); } fn pop_scope(&mut self) { self.scopes.pop(); } fn is_statement(&self, tokens: &mut TokenIterator) -> bool { let mut tokens = tokens.clone(); let mut braces = 0; let mut parens = 0; let mut brackets = 0; while let Some(itm) = tokens.next() { match itm.token() { Token::OpenBrace => { braces += 1; } Token::CloseBrace => { braces -= 1; } Token::OpenParens => { parens += 1; } Token::CloseParens => { parens -= 1; } Token::OpenSquareBracket => { brackets += 1; } Token::CloseSquareBracket => { brackets -= 1; } Token::Semi => { if braces == 0 && parens == 0 && brackets == 0 { return true; } } _ => {} } if braces < 0 || parens < 0 || brackets < 0 { break; } } false } fn find_next_fn_or_const(&mut self, tokens: &mut TokenIterator) -> Option<()> { tokens .advance_until_before_one_of(&[Token::Const, Token::Fn, Token::Type]) .map(|_| ()) } } }