From 45653380cf2776aba9856ea45b3fc0d763318055 Mon Sep 17 00:00:00 2001 From: Janis Date: Mon, 9 Sep 2024 15:01:53 +0200 Subject: [PATCH] new ast parser --- Cargo.toml | 1 + src/ast2/mod.rs | 2638 ++++++++++++++++++++++++++++++++++++++++++ src/bin/tokenizer.rs | 2 +- src/common.rs | 7 + src/comptime.rs | 32 +- src/lexer.rs | 240 +++- src/lib.rs | 1 + src/parser.rs | 25 +- src/tokens.rs | 37 + 9 files changed, 2939 insertions(+), 44 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index f262906..7ad3168 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,6 +11,7 @@ itertools = "0.13.0" log = "0.4.22" num-bigint = "0.4.6" num-traits = "0.2.19" +ordered-float = "4.2.2" petgraph = "0.6.5" thiserror = "1.0.63" unicode-xid = "0.2.4" diff --git a/src/ast2/mod.rs b/src/ast2/mod.rs index e69de29..8538ce0 100644 --- a/src/ast2/mod.rs +++ b/src/ast2/mod.rs @@ -0,0 +1,2638 @@ +use std::num::NonZero; + +use crate::{lexer::SourceLocation, tokens::Token}; + +pub mod intern { + use std::{ + collections::BTreeMap, + hash::{Hash, Hasher}, + }; + + use itertools::Itertools; + use num_bigint::{BigInt, BigUint, Sign}; + + use crate::{ + common::{from_lo_hi_dwords, into_lo_hi_dwords}, + variant, + }; + + #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] + #[repr(u8)] + pub enum SimpleType { + F32, + F64, + Bool, + Void, + USize, + ISize, + ComptimeInt, + } + + #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] + pub enum Tag { + String, + SIntSmall, + UIntSmall, + UInt64, + SInt64, + F32, + F64, + PositiveInt, + NegativeInt, + UIntType, + SIntType, + SimpleType, + PointerType, + ArrayType, + FunctionType, + StructType, + } + + #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] + struct Item { + tag: Tag, + index: u32, + } + + #[derive(Debug, Clone, PartialEq)] + #[non_exhaustive] + pub enum Key<'a> { + String { + str: &'a str, + }, + SIntSmall { + bits: i32, + }, + UIntSmall { + bits: u32, + }, + SInt64 { + bits: i64, + }, + UInt64 { + bits: u64, + }, + F32 { + bits: f32, + }, + F64 { + bits: f64, + }, + PositiveInt { + bigint: BigInt, + }, + NegativeInt { + bigint: BigInt, + }, + UIntType { + bits: u16, + }, + SIntType { + bits: u16, + }, + SimpleType { + ty: SimpleType, + }, + PointerType { + pointee: Index, + flags: PointerFlags, + }, + ArrayType { + pointee: Index, + flags: PointerFlags, + length: u32, + }, + FunctionType { + return_type: Index, + parameters: Vec, + }, + StructType { + name: Index, + packed: bool, + c_like: bool, + fields: Vec<(Index, Index)>, + }, + } + + impl Hash for Key<'_> { + fn hash(&self, state: &mut H) { + core::mem::discriminant(self).hash(state); + match self { + Key::String { str } => str.hash(state), + Key::SIntSmall { bits } => bits.hash(state), + Key::UIntSmall { bits } => bits.hash(state), + Key::SInt64 { bits } => bits.hash(state), + Key::UInt64 { bits } => bits.hash(state), + Key::F32 { bits } => ordered_float::OrderedFloat(*bits).hash(state), + Key::F64 { bits } => ordered_float::OrderedFloat(*bits).hash(state), + Key::PositiveInt { bigint } => bigint.hash(state), + Key::NegativeInt { bigint } => bigint.hash(state), + Key::UIntType { bits } => bits.hash(state), + Key::SIntType { bits } => bits.hash(state), + Key::SimpleType { ty } => ty.hash(state), + Key::PointerType { pointee, flags } => (pointee, flags).hash(state), + Key::ArrayType { + pointee, + flags, + length, + } => (pointee, flags, length).hash(state), + Key::FunctionType { + return_type, + parameters, + } => (return_type, parameters).hash(state), + } + } + } + + // #[repr(packed)] + #[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Hash)] + pub struct PointerFlags { + pub volatile: bool, + pub is_const: bool, + pub noalias: bool, + } + + impl PointerFlags { + pub fn new(is_const: bool, volatile: bool, noalias: bool) -> Self { + Self { + is_const, + volatile, + noalias, + } + } + + fn pack(self) -> u8 { + (self.volatile as u8) << 0 | (self.is_const as u8) << 1 | (self.noalias as u8) << 2 + } + fn unpack(packed: u8) -> Self { + Self { + volatile: packed & (1 << 0) != 0, + is_const: packed & (1 << 1) != 0, + noalias: packed & (1 << 2) != 0, + } + } + } + + #[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Hash)] + struct StructFlags { + packed: bool, + c_like: bool, + num_fields: u32, + } + + impl StructFlags { + const MASK: u32 = (1u32 << 30) - 1; + pub fn new(packed: bool, c_like: bool, num_fields: u32) -> Self { + assert!(num_fields < (1 << 30)); + Self { + packed, + c_like, + num_fields, + } + } + fn pack(self) -> u32 { + assert!(self.num_fields < (1 << 30)); + (self.packed as u32) << 31 | (self.c_like as u32) << 30 | self.num_fields & Self::MASK + } + fn unpack(packed: u32) -> Self { + Self { + packed: packed & (1 << 31) != 0, + c_like: packed & (1 << 30) != 0, + num_fields: packed & Self::MASK, + } + } + } + + #[derive(Debug, Clone, Copy)] + struct FunctionInfo { + void_return: bool, + num_params: u32, + } + impl FunctionInfo { + fn new(void_return: bool, num_params: u32) -> Self { + Self { + void_return, + num_params, + } + } + + const MASK: u32 = 1u32 << (u32::BITS - 1); + fn pack(self) -> u32 { + (self.void_return as u32 * Self::MASK) | self.num_params & !Self::MASK + } + fn unpack(packed: u32) -> Self { + Self { + void_return: packed & Self::MASK != 0, + num_params: packed & !Self::MASK, + } + } + fn len(self) -> u32 { + self.void_return as u32 + self.num_params + } + } + + impl Item { + fn idx(self) -> usize { + self.index as usize + } + } + + #[repr(transparent)] + #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] + pub struct Index(u32); + + impl Index { + pub fn into_u32(self) -> u32 { + unsafe { core::mem::transmute(self) } + } + pub fn as_u32(&self) -> &u32 { + unsafe { core::mem::transmute(self) } + } + fn index(&self) -> usize { + self.0 as usize + } + } + + pub struct InternPool { + tags: Vec, + indices: Vec, + // + strings: Vec, + words: Vec, + hashed: BTreeMap, + } + + const STATIC_KEYS: [Key; 19] = [ + Key::SimpleType { + ty: SimpleType::Bool, + }, + Key::SimpleType { + ty: SimpleType::F32, + }, + Key::SimpleType { + ty: SimpleType::F64, + }, + Key::SimpleType { + ty: SimpleType::USize, + }, + Key::SimpleType { + ty: SimpleType::ISize, + }, + Key::SimpleType { + ty: SimpleType::Void, + }, + Key::SimpleType { + ty: SimpleType::ComptimeInt, + }, + Key::SIntType { bits: 1 }, + Key::UIntType { bits: 1 }, + Key::SIntType { bits: 0 }, + Key::UIntType { bits: 0 }, + Key::SIntType { bits: 8 }, + Key::UIntType { bits: 8 }, + Key::SIntType { bits: 16 }, + Key::UIntType { bits: 16 }, + Key::SIntType { bits: 32 }, + Key::UIntType { bits: 32 }, + Key::SIntType { bits: 64 }, + Key::UIntType { bits: 64 }, + ]; + + impl InternPool { + pub fn get_void_type(&self) -> Index { + self.get_assume_present(Key::SimpleType { + ty: SimpleType::Void, + }) + } + pub fn get_bool_type(&self) -> Index { + self.get_assume_present(Key::SimpleType { + ty: SimpleType::Bool, + }) + } + pub fn get_f32_type(&self) -> Index { + self.get_assume_present(Key::SimpleType { + ty: SimpleType::F32, + }) + } + pub fn get_f64_type(&self) -> Index { + self.get_assume_present(Key::SimpleType { + ty: SimpleType::F64, + }) + } + pub fn get_comptime_int_type(&self) -> Index { + self.get_assume_present(Key::SimpleType { + ty: SimpleType::ComptimeInt, + }) + } + pub fn get_usize_type(&self) -> Index { + self.get_assume_present(Key::SimpleType { + ty: SimpleType::USize, + }) + } + pub fn get_isize_type(&self) -> Index { + self.get_assume_present(Key::SimpleType { + ty: SimpleType::ISize, + }) + } + pub fn get_u0_type(&self) -> Index { + self.get_assume_present(Key::UIntType { bits: 0 }) + } + pub fn get_i0_type(&self) -> Index { + self.get_assume_present(Key::SIntType { bits: 0 }) + } + pub fn get_u1_type(&self) -> Index { + self.get_assume_present(Key::UIntType { bits: 1 }) + } + pub fn get_i1_type(&self) -> Index { + self.get_assume_present(Key::SIntType { bits: 1 }) + } + pub fn get_u8_type(&self) -> Index { + self.get_assume_present(Key::UIntType { bits: 8 }) + } + pub fn get_i8_type(&self) -> Index { + self.get_assume_present(Key::SIntType { bits: 8 }) + } + pub fn get_u16_type(&self) -> Index { + self.get_assume_present(Key::UIntType { bits: 16 }) + } + pub fn get_i16_type(&self) -> Index { + self.get_assume_present(Key::SIntType { bits: 16 }) + } + pub fn get_u32_type(&self) -> Index { + self.get_assume_present(Key::UIntType { bits: 32 }) + } + pub fn get_i32_type(&self) -> Index { + self.get_assume_present(Key::SIntType { bits: 32 }) + } + pub fn get_u64_type(&self) -> Index { + self.get_assume_present(Key::UIntType { bits: 64 }) + } + pub fn get_i64_type(&self) -> Index { + self.get_assume_present(Key::SIntType { bits: 64 }) + } + } + + #[derive(Debug, Clone, Copy)] + pub struct TypeInfo { + pub bitsize: u32, + pub bitalign: u32, + } + + impl InternPool { + pub fn size_of_type(&self, index: Index, ptr_size: TypeInfo) -> TypeInfo { + match self.get_key(index) { + Key::UIntType { bits } => { + let bits = bits as u32; + TypeInfo { + bitsize: bits, + bitalign: bits.next_multiple_of(8).next_power_of_two(), + } + } + Key::SIntType { bits } => { + let bits = bits as u32; + TypeInfo { + bitsize: bits, + bitalign: bits.next_multiple_of(8).next_power_of_two(), + } + } + Key::SimpleType { ty } => match ty { + SimpleType::F32 => TypeInfo { + bitsize: 32, + bitalign: 32, + }, + SimpleType::F64 => TypeInfo { + bitsize: 64, + bitalign: 64, + }, + SimpleType::Bool => TypeInfo { + bitsize: 1, + bitalign: 1, + }, + SimpleType::Void => TypeInfo { + bitsize: 0, + bitalign: 0, + }, + SimpleType::USize => ptr_size, + SimpleType::ISize => ptr_size, + SimpleType::ComptimeInt => panic!("comptime int is unsized"), + }, + Key::PointerType { .. } => ptr_size, + Key::ArrayType { + pointee, length, .. + } => { + let element_size = self.size_of_type(pointee, ptr_size); + let bitsize = element_size.bitalign * length; + TypeInfo { + bitsize, + ..element_size + } + } + Key::FunctionType { .. } => ptr_size, + Key::StructType { packed, fields, .. } => { + // TODO: c-like layout + let (size, align) = fields.iter().fold((0, 0), |(size, align), (_name, ty)| { + let field_size = self.size_of_type(*ty, ptr_size); + let size = size + field_size.bitsize; + + let size = if packed { + size.next_multiple_of(field_size.bitalign) + } else { + size + }; + let align = align.max(field_size.bitalign); + (size, align) + }); + + TypeInfo { + bitsize: size, + bitalign: align, + } + } + _ => { + panic!("index was not a type") + } + } + } + } + + impl InternPool { + pub fn create() -> Self { + let mut this = Self { + tags: Vec::new(), + indices: Vec::new(), + strings: Vec::new(), + words: Vec::new(), + hashed: BTreeMap::new(), + }; + + this.extend_keys(STATIC_KEYS); + + this + } + + fn extend_keys<'a, K: IntoIterator>>(&mut self, keys: K) { + for k in keys.into_iter() { + self.insert(k); + } + } + + fn len(&self) -> u32 { + u32::try_from(self.tags.len()) + .expect(&format!("more than {} items in internpool!", u32::MAX)) + } + + pub fn get_or_insert(&mut self, key: Key) -> Index { + let mut hasher = std::hash::DefaultHasher::new(); + key.hash(&mut hasher); + let digest = hasher.finish(); + if let Some(&idx) = self.hashed.get(&digest) { + idx + } else { + self.insert(key) + } + } + + fn insert(&mut self, key: Key) -> Index { + match key { + Key::String { str } => { + let len = str.len() as u32; + let start = self.extend_strings(str); + + let words_idx = self.extend_words([start, len]); + self.create_item(Tag::String, words_idx) + } + Key::SIntSmall { bits } => self.create_item(Tag::SIntSmall, bits as u32), + Key::UIntSmall { bits } => self.create_item(Tag::UIntSmall, bits as u32), + Key::F32 { bits } => self.create_item(Tag::F32, bits as u32), + Key::F64 { bits } => { + let (lo, hi) = into_lo_hi_dwords(bits as u64); + let words_idx = self.extend_words([lo, hi]); + self.create_item(Tag::F64, words_idx) + } + Key::SInt64 { bits } => { + let (lo, hi) = into_lo_hi_dwords(bits as u64); + let i = self.extend_words([lo, hi]); + self.create_item(Tag::SInt64, i) + } + Key::UInt64 { bits } => { + let (lo, hi) = into_lo_hi_dwords(bits as u64); + let i = self.extend_words([lo, hi]); + self.create_item(Tag::UInt64, i) + } + Key::PositiveInt { bigint } => { + let (_, words) = bigint.to_u32_digits(); + let i = self.push_word(words.len() as u32); + _ = self.extend_words(words); + self.create_item(Tag::PositiveInt, i) + } + Key::NegativeInt { bigint } => { + let (_, words) = bigint.to_u32_digits(); + let i = self.push_word(words.len() as u32); + _ = self.extend_words(words); + self.create_item(Tag::NegativeInt, i) + } + Key::UIntType { bits } => self.create_item(Tag::SIntSmall, bits as u32), + Key::SIntType { bits } => self.create_item(Tag::SIntSmall, bits as u32), + Key::SimpleType { ty } => self.create_item(Tag::SimpleType, ty as u8 as u32), + Key::PointerType { pointee, flags } => { + let flags = flags.pack(); + let i = self.extend_words([pointee.0, flags as u32]); + self.create_item(Tag::PointerType, i) + } + Key::ArrayType { + pointee, + flags, + length, + } => { + let flags = flags.pack(); + let i = self.extend_words([pointee.0, flags as u32, length]); + self.create_item(Tag::PointerType, i) + } + Key::StructType { + name, + packed, + c_like, + fields, + } => { + let flags = StructFlags::new(packed, c_like, fields.len() as u32).pack(); + let i = self.push_word(name.into_u32()); + let i = self.push_word(flags); + self.extend_words( + fields + .into_iter() + .map(|(n, t)| [n.into_u32(), t.into_u32()]) + .flatten(), + ); + self.create_item(Tag::StructType, i) + } + Key::FunctionType { + return_type, + parameters, + } => { + let info = FunctionInfo::new( + return_type == self.get_simple_type(SimpleType::Void), + parameters.len() as u32, + ); + + let start = self.push_word(info.pack()); + _ = self.extend_words(parameters.into_iter().map(|i| i.0)); + + self.create_item(Tag::FunctionType, start) + } + } + } + + fn extend_strings>(&mut self, b: B) -> u32 { + let idx = self.strings.len() as u32; + self.strings.extend(b.as_ref()); + idx + } + fn extend_words>(&mut self, i: I) -> u32 { + let idx = self.words.len() as u32; + self.words.extend(i); + idx + } + fn push_word(&mut self, word: u32) -> u32 { + let idx = self.words.len() as u32; + self.words.push(word); + idx + } + + fn create_item(&mut self, tag: Tag, index: u32) -> Index { + let len = self.len(); + self.tags.push(tag); + self.indices.push(index); + Index(len) + } + + pub fn get_key(&self, index: Index) -> Key { + let item = self.get_item(index).unwrap(); + match item.tag { + Tag::String => { + let start = self.words[item.idx()]; + let len = self.words[item.idx() + 1]; + let str = unsafe { + core::str::from_utf8_unchecked( + &self.strings[start as usize..][..len as usize], + ) + }; + Key::String { str } + } + Tag::UIntSmall => Key::UIntSmall { + bits: item.index as u32, + }, + Tag::SIntSmall => Key::SIntSmall { + bits: item.index as i32, + }, + Tag::F32 => Key::F32 { + bits: f32::from_le_bytes(item.index.to_le_bytes()), + }, + Tag::F64 => { + let idx = item.idx(); + let bits = from_lo_hi_dwords(self.words[idx], self.words[idx + 1]); + Key::F64 { + bits: f64::from_le_bytes(bits.to_le_bytes()), + } + } + Tag::SInt64 => { + let bits = from_lo_hi_dwords(self.words[item.idx()], self.words[item.idx() + 1]) + as i64; + Key::SInt64 { bits } + } + Tag::UInt64 => { + let bits = + from_lo_hi_dwords(self.words[item.idx()], self.words[item.idx() + 1]); + Key::UInt64 { bits } + } + Tag::NegativeInt => { + let len = self.words[item.idx()]; + let start = item.idx() + 1; + let end = start + len as usize; + let data = BigUint::from_slice(&self.words[start..end]); + let bigint = BigInt::from_biguint(Sign::Minus, data); + Key::NegativeInt { bigint } + } + Tag::PositiveInt => { + let len = self.words[item.idx()]; + let start = item.idx() + 1; + let end = start + len as usize; + let data = BigUint::from_slice(&self.words[start..end]); + let bigint = BigInt::from_biguint(Sign::Plus, data); + Key::PositiveInt { bigint } + } + Tag::SIntType => { + let bits = self.words[item.idx()] as u16; + Key::SIntType { bits } + } + Tag::UIntType => { + let bits = self.words[item.idx()] as u16; + Key::SIntType { bits } + } + Tag::SimpleType => { + let ty = self.words[item.idx()] as u8; + + Key::SimpleType { + ty: unsafe { core::mem::transmute::(ty) }, + } + } + Tag::PointerType => { + let pointee = Index(self.words[item.idx()]); + let flags = PointerFlags::unpack(self.words[item.idx() + 1] as u8); + + Key::PointerType { pointee, flags } + } + Tag::ArrayType => { + let pointee = Index(self.words[item.idx()]); + let flags = PointerFlags::unpack(self.words[item.idx() + 1] as u8); + let length = self.words[item.idx() + 2]; + + Key::ArrayType { + pointee, + flags, + length, + } + } + Tag::StructType => { + let name = Index(self.words[item.idx()]); + let flags = StructFlags::unpack(self.words[item.idx() + 1]); + let start = item.idx() + 2; + let end = start + flags.num_fields as usize * 2; + + let fields = self.words[start..end] + .iter() + .cloned() + .array_chunks::<2>() + .map(|[n, t]| (Index(n), Index(t))) + .collect::>(); + + Key::StructType { + name, + packed: flags.packed, + c_like: flags.c_like, + fields, + } + } + Tag::FunctionType => { + let info = FunctionInfo::unpack(self.words[item.idx()]); + let len = info.len(); + let (return_type, parameters) = if info.void_return { + let start = item.idx() + 1; + let end = start + len as usize; + let params = self.words[start..end] + .iter() + .map(|&i| Index(i)) + .collect::>(); + ( + self.get_assume_present(Key::SimpleType { + ty: SimpleType::Void, + }), + params, + ) + } else { + let start = item.idx() + 2; + let end = start + len as usize; + let return_type = Index(self.words[item.idx() + 1]); + let params = self.words[start..end] + .iter() + .map(|&i| Index(i)) + .collect::>(); + (return_type, params) + }; + + Key::FunctionType { + return_type, + parameters, + } + } + } + } + + pub fn get_assume_present(&self, key: Key) -> Index { + let mut hasher = std::hash::DefaultHasher::new(); + key.hash(&mut hasher); + let digest = hasher.finish(); + if let Some(&idx) = self.hashed.get(&digest) { + idx + } else { + panic!("key {key:?} not present in pool.") + } + } + + pub fn get_int_type(&mut self, signed: bool, bits: u16) -> Index { + let key = match signed { + true => Key::SIntType { bits }, + false => Key::UIntType { bits }, + }; + + self.get_or_insert(key) + } + + pub fn get_string_index(&mut self, str: &str) -> Index { + self.get_or_insert(Key::String { str }) + } + + pub fn get_simple_type(&mut self, ty: SimpleType) -> Index { + self.get_or_insert(Key::SimpleType { ty }) + } + + pub fn get_function_type>( + &mut self, + return_type: Index, + parameters: P, + ) -> Index { + self.get_or_insert(Key::FunctionType { + return_type, + parameters: parameters.into_iter().collect(), + }) + } + + pub fn get_pointer_type(&mut self, pointee: Index, flags: Option) -> Index { + let key = Key::PointerType { + pointee, + flags: flags.unwrap_or_default(), + }; + self.get_or_insert(key) + } + + pub fn get_struct_type( + &mut self, + name: Index, + packed: bool, + c_like: bool, + fields: Vec<(Index, Index)>, + ) -> Index { + let key = Key::StructType { + name, + packed, + c_like, + fields, + }; + self.get_or_insert(key) + } + + pub fn get_array_type( + &mut self, + pointee: Index, + flags: Option, + length: u32, + ) -> Index { + let key = Key::ArrayType { + pointee, + flags: flags.unwrap_or_default(), + length, + }; + self.get_or_insert(key) + } + + pub fn get_str(&self, index: Index) -> &str { + let key = self.get_key(index); + assert!(matches!(key, Key::String { .. })); + variant!(key => Key::String { str }); + + str + } + + fn check_bounds(&self, index: Index) -> Option { + (index.0 < self.len()).then_some(index) + } + + fn get_item(&self, index: Index) -> Option { + self.check_bounds(index).map(|i| Item { + tag: self.tags[i.index()], + index: self.indices[i.index()], + }) + } + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +enum Tag { + /// pseudo tag + Root, + /// `data` is a range from a..b into extra of all global nodes + File, + /// `data` is an intern to a name, and an index into extra of [intern: return_type, index: ParameterList] + FunctionProto, + /// `data` is an index to a FunctionProto and an index to a Block + FunctionDecl, + /// `data` is a range from a..b into extra of indices to parameters + ParameterList, + /// `data` is an intern to a name, and an intern to a type + Parameter, + /// `data` is range from a..b into `extra` of indices to statements + Block, + /// `data` is range from a..b into `extra` of indices to statements, where the last one is an expression + BlockTrailingExpr, + /// `data` is an intern to a value, intern to a type + Constant, + /// `data` is an index to an expression + ExprStmt, + /// `data` is none + ReturnStmt, + /// `data` is an index to an expr + ReturnValueStmt, + /// `data` is a range from a..b into `extra` of an intern to a name and an optional intern to a type + VarDecl, + /// `data` is a range from a..b into `extra` of an intern to a name and an optional intern to a type + MutVarDecl, + /// `data` is a range from a..b into `extra` of an intern to a name, an index to an expr, and an optional intern to a type + VarDeclAssignment, + /// `data` is a range from a..b into `extra` of an intern to a name, an index to an expr, and an optional intern to a type + MutVarDeclAssignment, + /// `data` is an index to an expression and an intern to a name + GlobalDecl, + /// `data` is an intern to a struct type + StructDecl, + /// `data` is an index to a VarDecl, GlobalDecl or FunctionDecl + DeclRef, + /// `data` is an index to an expr and an index to an ArgumentList + CallExpr, + /// `data` is a range from a..b into extra of indices to arguments + ArgumentList, + /// `data` is an index to an expression + Argument, + /// `data` is an index to an expression, and an intern to a name + NamedArgument, + /// `data` is an index to lhs, and an intern to the type + ExplicitCast, + /// `data` is a single index to an expr + Deref, + AddressOf, + Not, + Negate, + /// data is two indices for `lhs` and `rhs` + Or, + And, + BitOr, + BitXOr, + BitAnd, + Eq, + NEq, + Lt, + Gt, + Le, + Ge, + Shl, + Shr, + Add, + Sub, + Mul, + Div, + Rem, + Assign, + SubscriptExpr, + IfExpr, + /// `data` is an index to an expression and an index into extra for [if, else] + IfElseExpr, + // TODO: + /// `data` is a ParseError + Error, + /// placeholder tag for reserved indices/nodes, `data` is none + Undefined, +} + +#[derive(Debug, Clone, Copy, thiserror::Error, PartialEq, Eq)] +enum ParseError { + #[error("Unexpected end of token iter.")] + UnexpectedEndOfTokens, + #[error("Expected Token {0}.")] + ExpectedToken(Token), + #[error("Expected Token {0}, but other token was found.")] + ExpectedTokenNotFound(Token), + #[error("Expected either a function declaration or a global variable.")] + UnexpectedTokenAtFileScope, + #[error("Expected Ident.")] + ExpectedIdent, + #[error("Integral types may not be wider than 65535 bits.")] + IntegralTypeTooWide, + #[error("Expected typename.")] + ExpectedTypeName, + #[error("Dummy Message.")] + ExpectedFunctionPrototype, + #[error("Dummy Message.")] + ExpectedPrimaryExpression, + #[error("Dummy Message.")] + ExpectedExpression, + #[error("Dummy Message.")] + ExpectedPostfixExpression, + #[error("Dummy Message.")] + ExpectedPrefixExpression, + #[error("Dummy Message.")] + ExpectedStatement, + #[error("Dummy Message.")] + UnmatchedParens(u32), + #[error("Dummy Message.")] + UnmatchedSquareBracket(u32), + #[error("Dummy Message.")] + ExpectedEndOfBlock, + #[error("Dummy Message.")] + UnmatchedBrace(u32), + #[error("Error in child node {0:?}.")] + ErrorNode(Index), +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] +#[repr(transparent)] +pub struct Index(NonZero); + +impl Index { + pub fn as_u32(&self) -> &u32 { + unsafe { core::mem::transmute(self) } + } + pub fn into_u32(self) -> u32 { + unsafe { core::mem::transmute(self) } + } + fn index(self) -> usize { + self.0.get() as usize + } +} + +#[repr(packed)] +#[derive(Clone, Copy)] +struct Node { + /// defines the type of the node in the tree + tag: Tag, + data: Data, +} + +#[derive(Clone, Copy)] +union Data { + none: (), + error: ParseError, + index: Index, + two_indices: (Index, Index), + range: (Index, Index), + extra_range: (u32, u32), + intern: intern::Index, + index_intern: (Index, intern::Index), + two_interns: (intern::Index, intern::Index), + intern_and_extra_offset: (intern::Index, u32), + index_and_extra_offset: (Index, u32), +} + +impl Data { + fn none() -> Self { + Self { none: () } + } + fn error(error: ParseError) -> Self { + Self { error } + } + fn index(index: Index) -> Self { + Self { index } + } + fn two_indices(a: Index, b: Index) -> Self { + Self { + two_indices: (a, b), + } + } + fn two_interns(a: intern::Index, b: intern::Index) -> Self { + Self { + two_interns: (a, b), + } + } + fn range_of_indices(a: Index, b: Index) -> Self { + Self { range: (a, b) } + } + fn extra_range(a: u32, b: u32) -> Self { + Self { + extra_range: (a, b), + } + } + fn intern(intern: intern::Index) -> Self { + Self { intern } + } + fn index_and_intern(index: Index, intern: intern::Index) -> Self { + Self { + index_intern: (index, intern), + } + } + fn intern_and_extra_offset(intern: intern::Index, offset: u32) -> Self { + Self { + intern_and_extra_offset: (intern, offset), + } + } + fn index_and_extra_offset(index: Index, offset: u32) -> Self { + Self { + index_and_extra_offset: (index, offset), + } + } +} + +struct Ast { + tags: Vec, + datas: Vec, + extra: Vec, + source_locs: Vec, +} + +impl Ast { + fn reserve_node(&mut self) -> Index { + let i = unsafe { Index(NonZero::new_unchecked(self.tags.len() as u32)) }; + self.tags.push(Tag::Undefined); + self.datas.push(Data::none()); + self.source_locs.push(SourceLocation::invalid()); + + i + } + + fn get_loc(&self, index: Index) -> SourceLocation { + self.source_locs[index.index()] + } + + fn push_error(&mut self, error: ParseError, loc: SourceLocation) -> Index { + let i = self.reserve_node(); + self.set_tag_data_source_loc(i, Tag::Error, Data::error(error), loc); + + i + } + + fn push_file>(&mut self, decls: I, loc: SourceLocation) -> Index { + let i = self.reserve_node(); + let (extra_start, extra_end) = self.extend_extra_by_indices(decls); + self.set_tag_data_source_loc(i, Tag::File, Data::extra_range(extra_start, extra_end), loc); + + i + } + + fn push_global_decl( + &mut self, + ident: intern::Index, + ty: intern::Index, + expr: Index, + loc: SourceLocation, + ) -> Index { + let i = self.reserve_node(); + let (extra_start, _) = self.extend_extra([ty.into_u32(), expr.into_u32()]); + self.set_tag_data_source_loc( + i, + Tag::GlobalDecl, + Data::intern_and_extra_offset(ident, extra_start), + loc, + ); + + i + } + + fn push_fn_decl(&mut self, proto: Index, body: Index, loc: SourceLocation) -> Index { + let i = self.reserve_node(); + self.set_tag_data_source_loc(i, Tag::FunctionDecl, Data::two_indices(proto, body), loc); + + i + } + + fn push_ret(&mut self, expr: Option, loc: SourceLocation) -> Index { + let i = self.reserve_node(); + match expr { + Some(expr) => { + self.set_tag_data_source_loc(i, Tag::ReturnValueStmt, Data::index(expr), loc) + } + None => self.set_tag_data_source_loc(i, Tag::ReturnStmt, Data::none(), loc), + } + + i + } + + fn push_var_decl( + &mut self, + is_let: bool, + name: intern::Index, + ty: Option, + assignment: Option, + loc: SourceLocation, + ) -> Index { + let i = self.reserve_node(); + + let start = self.extra.len() as u32; + self.extra.push(name.into_u32()); + _ = self.extend_extra(assignment.map(|i| i.into_u32())); + let (_, end) = self.extend_extra(ty.map(|i| i.into_u32())); + + let tag = match (is_let, assignment.is_some()) { + (true, false) => Tag::VarDecl, + (true, true) => Tag::VarDeclAssignment, + (false, false) => Tag::MutVarDecl, + (false, true) => Tag::MutVarDeclAssignment, + }; + + self.set_tag_data_source_loc(i, tag, Data::extra_range(start, end), loc); + + i + } + + fn push_struct_decl(&mut self, struct_type: intern::Index, loc: SourceLocation) -> Index { + let i = self.reserve_node(); + self.set_tag_data_source_loc(i, Tag::StructDecl, Data::intern(struct_type), loc); + i + } + + fn push_fn_proto( + &mut self, + ident: intern::Index, + return_type: intern::Index, + parameter_list: Index, + loc: SourceLocation, + ) -> Index { + let i = self.reserve_node(); + let (extra_start, _) = + self.extend_extra([return_type.into_u32(), parameter_list.into_u32()]); + self.set_tag_data_source_loc( + i, + Tag::FunctionProto, + Data::intern_and_extra_offset(ident, extra_start), + loc, + ); + + i + } + + fn push_block>( + &mut self, + statements: I, + trailing: Option, + loc: SourceLocation, + ) -> Index { + let i = self.reserve_node(); + let (extra_start, extra_end) = + self.extend_extra_by_indices(statements.into_iter().chain(trailing.into_iter())); + if trailing.is_some() { + self.set_tag_data_source_loc( + i, + Tag::BlockTrailingExpr, + Data::extra_range(extra_start, extra_end), + loc, + ); + } else { + self.set_tag_data_source_loc( + i, + Tag::Block, + Data::extra_range(extra_start, extra_end), + loc, + ); + } + + i + } + + fn push_parameter_list>( + &mut self, + parameters: I, + loc: SourceLocation, + ) -> Index { + let i = self.reserve_node(); + let (extra_start, extra_end) = self.extend_extra_by_indices(parameters); + self.set_tag_data_source_loc( + i, + Tag::ParameterList, + Data::extra_range(extra_start, extra_end), + loc, + ); + + i + } + + fn push_argument(&mut self, expr: Index, loc: SourceLocation) -> Index { + let i = self.reserve_node(); + self.set_tag_data_source_loc(i, Tag::Argument, Data::index(expr), loc); + + i + } + + fn push_named_argument( + &mut self, + name: intern::Index, + expr: Index, + loc: SourceLocation, + ) -> Index { + let i = self.reserve_node(); + self.set_tag_data_source_loc( + i, + Tag::NamedArgument, + Data::index_and_intern(expr, name), + loc, + ); + + i + } + + fn push_parameter( + &mut self, + name: intern::Index, + ty: intern::Index, + loc: SourceLocation, + ) -> Index { + let i = self.reserve_node(); + self.set_tag_data_source_loc(i, Tag::Parameter, Data::two_interns(name, ty), loc); + + i + } + + fn push_argument_list>( + &mut self, + args: I, + loc: SourceLocation, + ) -> Index { + let i = self.reserve_node(); + let (extra_start, extra_end) = self.extend_extra_by_indices(args); + self.set_tag_data_source_loc( + i, + Tag::ArgumentList, + Data::extra_range(extra_start, extra_end), + loc, + ); + + i + } + + fn push_unary(&mut self, tag: Tag, lhs: Index, loc: SourceLocation) -> Index { + let i = self.reserve_node(); + self.set_tag_data_source_loc(i, tag, Data::index(lhs), loc); + + i + } + + fn push_binary(&mut self, tag: Tag, lhs: Index, rhs: Index, loc: SourceLocation) -> Index { + let i = self.reserve_node(); + self.set_tag_data_source_loc(i, tag, Data::two_indices(lhs, rhs), loc); + + i + } + + fn push_assign(&mut self, lhs: Index, rhs: Index, loc: SourceLocation) -> Index { + let i = self.reserve_node(); + self.set_tag_data_source_loc(i, Tag::Assign, Data::two_indices(lhs, rhs), loc); + + i + } + + fn push_cast(&mut self, lhs: Index, ty: intern::Index, loc: SourceLocation) -> Index { + let i = self.reserve_node(); + self.set_tag_data_source_loc(i, Tag::ExplicitCast, Data::index_and_intern(lhs, ty), loc); + + i + } + + fn push_if(&mut self, cond: Index, body: Index, loc: SourceLocation) -> Index { + let i = self.reserve_node(); + self.set_tag_data_source_loc(i, Tag::IfExpr, Data::two_indices(cond, body), loc); + + i + } + + fn push_if_else( + &mut self, + cond: Index, + body: Index, + other: Index, + loc: SourceLocation, + ) -> Index { + let i = self.reserve_node(); + let (extra_start, _) = self.extend_extra_by_indices([body, other]); + self.set_tag_data_source_loc( + i, + Tag::IfElseExpr, + Data::index_and_extra_offset(cond, extra_start), + loc, + ); + + i + } + + fn push_call_expr(&mut self, lhs: Index, args: Index, loc: SourceLocation) -> Index { + let i = self.reserve_node(); + self.set_tag_data_source_loc(i, Tag::CallExpr, Data::two_indices(lhs, args), loc); + + i + } + + fn push_decl_ref(&mut self, ident: intern::Index, loc: SourceLocation) -> Index { + let i = self.reserve_node(); + self.set_tag_data_source_loc(i, Tag::DeclRef, Data::intern(ident), loc); + + i + } + + fn push_expr_stmt(&mut self, expr: Index) -> Index { + let i = self.reserve_node(); + let loc = self.get_loc(expr); + self.set_tag_data_source_loc(i, Tag::ExprStmt, Data::index(expr), loc); + + i + } + + fn push_constant( + &mut self, + value: intern::Index, + ty: intern::Index, + loc: SourceLocation, + ) -> Index { + let i = self.reserve_node(); + self.set_tag_data_source_loc(i, Tag::Constant, Data::two_interns(value, ty), loc); + + i + } + + fn extend_extra_by_indices>(&mut self, indices: I) -> (u32, u32) { + self.extend_extra(indices.into_iter().map(|i| i.0.get())) + } + fn extend_extra>(&mut self, words: I) -> (u32, u32) { + let i = self.extra.len() as u32; + self.extra.extend(words); + + (i, self.extra.len() as u32) + } + fn set_tag_data_source_loc(&mut self, index: Index, tag: Tag, data: Data, loc: SourceLocation) { + self.tags[index.index()] = tag; + self.datas[index.index()] = data; + self.source_locs[index.index()] = loc; + } +} + +mod ast_gen { + + use intern::{PointerFlags, SimpleType}; + use itertools::Itertools; + use num_bigint::{BigInt, BigUint}; + + use crate::{ + common::{from_lo_hi_dwords, NextIf}, + comptime, + lexer::{Radix, TokenItem, TokenIterator}, + tokens::PRECEDENCE_MAP, + }; + + use super::*; + + struct ErrorInfo { + error: ParseError, + loc: SourceLocation, + } + + struct Parser { + ast: Ast, + intern: intern::InternPool, + scope: Vec, + errors: Vec, + } + + type ParseResult = core::result::Result; + + impl Parser { + fn parse_ident(&mut self, tokens: &mut TokenIterator) -> Result { + let ident = tokens.expect_token(Token::Ident).map_err(|_| ErrorInfo { + error: ParseError::ExpectedIdent, + loc: tokens.current_source_location(), + })?; + + let name = self.intern.get_or_insert(intern::Key::String { + str: ident.lexeme(), + }); + + Ok(name) + } + + fn parse_pointer(&mut self, tokens: &mut TokenIterator) -> ParseResult { + tokens.eat_token(Token::Star).ok_or(ErrorInfo { + error: ParseError::ExpectedToken(Token::Star), + loc: tokens.current_source_location(), + })?; + + let &[cnst, vol, noalias] = + &tokens.eat_all_zero_or_once(&[Token::Const, Token::Volatile, Token::Noalias])[..3] + else { + unreachable!() + }; + let pointee = self.parse_type(tokens)?; + + Ok(self + .intern + .get_pointer_type(pointee, Some(PointerFlags::new(cnst, vol, noalias)))) + } + + /// [LENGTH]const? volatile? noalias? TYPE + fn parse_array_type(&mut self, tokens: &mut TokenIterator) -> ParseResult { + let start = tokens.eat_token(Token::OpenSquareBracket).unwrap(); + let length = match self.parse_expr(tokens) { + Ok(i) => { + _ = tokens + .eat_token(Token::CloseSquareBracket) + .ok_or(ErrorInfo { + error: ParseError::ExpectedTypeName, + loc: tokens.current_source_location(), + })?; + + i + } + Err(err) => { + tokens.advance_past_end_of_bracketed().ok_or(ErrorInfo { + error: ParseError::ExpectedToken(Token::CloseSquareBracket), + loc: tokens.current_source_location(), + })?; + self.push_error(err.error, err.loc) + } + }; + + let &[cnst, vol, noalias] = + &tokens.eat_all_zero_or_once(&[Token::Const, Token::Volatile, Token::Noalias])[..3] + else { + unreachable!() + }; + let pointee = self.parse_type(tokens)?; + + Ok(self.intern.get_array_type( + pointee, + Some(PointerFlags::new(cnst, vol, noalias)), + 0, // length, TODO: evaluate this tree branch for an u32 + )) + } + + fn parse_simple_type(&mut self, token: Token) -> Option { + match token { + Token::Void => Some(self.intern.get_assume_present(intern::Key::SimpleType { + ty: SimpleType::Void, + })), + Token::Bool => Some(self.intern.get_assume_present(intern::Key::SimpleType { + ty: SimpleType::Bool, + })), + Token::F32 => Some(self.intern.get_assume_present(intern::Key::SimpleType { + ty: SimpleType::F32, + })), + Token::F64 => Some(self.intern.get_assume_present(intern::Key::SimpleType { + ty: SimpleType::F64, + })), + Token::USize => Some(self.intern.get_assume_present(intern::Key::SimpleType { + ty: SimpleType::USize, + })), + Token::ISize => Some(self.intern.get_assume_present(intern::Key::SimpleType { + ty: SimpleType::ISize, + })), + _ => None, + } + } + + fn try_parse_integral_type( + &mut self, + typename: &str, + ) -> Result, ParseError> { + let mut iter = typename.chars().peekable(); + let signed = match iter.next() { + Some('u') => false, + Some('i') => true, + _ => { + return Ok(None); + } + }; + + // need 1 digit for an integral type + if iter.peek().map(|&c| crate::common::is_digit(c)) != Some(true) { + return Ok(None); + } + + // need no nondigits after digits + if iter + .clone() + .skip_while(|&c| crate::common::is_digit(c)) + .next() + .is_some() + { + return Ok(None); + } + + let mut bits = 0u16; + loop { + let Some(digit) = iter.next().map(|c| c as u8 - b'0') else { + break; + }; + + match bits + .checked_mul(10) + .and_then(|bits| bits.checked_add(digit as u16)) + { + Some(val) => { + bits = val; + } + None => { + // this IS an integral type, but it is bigger than u/i65535 + return Err(ParseError::IntegralTypeTooWide); + } + } + } + + Ok(Some(self.intern.get_int_type(signed, bits))) + } + + fn try_parse_integral_constant( + &mut self, + item: &TokenItem, + ) -> (intern::Index, intern::Index) { + let radix = Radix::from_token(item.token()).unwrap(); + + let mut chars = item.lexeme().char_indices(); + match radix { + Radix::Dec => {} + _ => { + _ = chars.advance_by(2); + } + } + + let digits = chars + .take_while_ref(|&(_, c)| radix.is_digit()(c) || c == '_') + .filter(|&(_, c)| c != '_') + .map(|(_, c)| c) + .collect::>(); + + let value = comptime::bigint::parse_bigint(digits.into_iter(), radix); + + let ty = match chars.clone().next() { + Some((i, 'u')) | Some((i, 'i')) => self + .try_parse_integral_type(&item.lexeme()[i..]) + .expect("invalid integral type??"), + _ => None, + }; + + let interned = match value.len() { + ..1 => { + let bits = value.get(0).cloned().unwrap_or(0); + self.intern.get_or_insert(intern::Key::UIntSmall { bits }) + } + ..2 => { + let lo = value.get(0).cloned().unwrap_or(0); + let hi = value.get(1).cloned().unwrap_or(0); + let bits = from_lo_hi_dwords(lo, hi); + self.intern.get_or_insert(intern::Key::UInt64 { bits }) + } + _ => { + let bigint = BigInt::from_biguint(num_bigint::Sign::Plus, BigUint::new(value)); + self.intern + .get_or_insert(intern::Key::PositiveInt { bigint }) + } + }; + + (interned, ty.unwrap_or(self.intern.get_comptime_int_type())) + } + + fn parse_floating_constant(&mut self, item: &TokenItem) -> (intern::Index, intern::Index) { + let lexeme = item.lexeme(); + let lexeme = lexeme + .strip_suffix("f32") + .map(|l| (l, self.intern.get_f32_type())) + .unwrap_or( + lexeme + .strip_suffix("f64") + .map(|l| (l, self.intern.get_f64_type())) + .unwrap_or((lexeme, self.intern.get_f64_type())), + ); + + let bits = if lexeme.1 == self.intern.get_f32_type() { + self.intern.get_or_insert(intern::Key::F32 { + bits: lexeme.0.parse::().unwrap(), + }) + } else { + self.intern.get_or_insert(intern::Key::F64 { + bits: lexeme.0.parse::().unwrap(), + }) + }; + + (bits, lexeme.1) + } + + /// TYPE <- + /// * TYPE + /// IDENTIFIER + /// SIMPLE_TYPE + /// [ TYPE ; CONSTANT_EXPR ] + /// INTEGRAL_TYPE // u[0..65535] | i[0..65535] + fn parse_type(&mut self, tokens: &mut TokenIterator) -> ParseResult { + match tokens + .peek_token() + .ok_or(ErrorInfo { + error: ParseError::ExpectedTypeName, + loc: tokens.current_source_location(), + })? + .token() + { + Token::Star => self.parse_pointer(tokens), + Token::OpenSquareBracket => self.parse_array_type(tokens), + Token::Ident => { + let token = tokens.next().unwrap(); + match self + .try_parse_integral_type(token.lexeme()) + .map_err(|error| ErrorInfo { + error, + loc: token.source_location(), + })? { + Some(int) => Ok(int), + None => { + let name = self.intern.get_or_insert(intern::Key::String { + str: token.lexeme(), + }); + Ok(name) + } + } + } + token => self.parse_simple_type(token).ok_or(ErrorInfo { + error: ParseError::ExpectedTypeName, + loc: tokens.current_source_location(), + }), + } + } + + /// GLOBAL_DECL <- + /// const IDENTIFIER: TYPENAME = EXPR; + fn parse_const_decl(&mut self, tokens: &mut TokenIterator) -> ParseResult { + let err = 'blk: { + let loc = tokens.current_source_location(); + let Some(_) = tokens.eat_token(Token::Const) else { + break 'blk ErrorInfo { + error: ParseError::ExpectedToken(Token::Const), + loc, + }; + }; + + let ident = match self.parse_ident(tokens) { + Ok(i) => i, + Err(err) => { + break 'blk err; + } + }; + + let typename = match self.parse_type(tokens) { + Ok(i) => i, + Err(err) => { + break 'blk err; + } + }; + + let Some(_) = tokens.eat_token(Token::Equal) else { + break 'blk ErrorInfo { + error: ParseError::ExpectedToken(Token::Equal), + loc: tokens.current_source_location(), + }; + }; + + let expr = match self.parse_expr(tokens) { + Ok(i) => i, + Err(err) => { + break 'blk err; + } + }; + + return Ok(self.ast.push_global_decl(ident, typename, expr, loc)); + }; + + tokens.advance_past_semi().ok_or(ErrorInfo { + error: ParseError::ExpectedToken(Token::Semi), + loc: tokens.current_source_location(), + })?; + + Ok(self.ast.push_error(err.error, err.loc)) + } + + /// FUNCTION_PROTO <- + /// fn IDENTIFIER () + /// fn IDENTIFIER () -> TYPENAME + /// fn IDENTIFIER ( PARAMETER_LIST ,? ) + /// fn IDENTIFIER ( PARAMETER_LIST ,? ) -> TYPENAME + fn parse_fn_proto(&mut self, tokens: &mut TokenIterator) -> ParseResult { + let loc = tokens.current_source_location(); + let _ = tokens.eat_token(Token::Fn).ok_or(ErrorInfo { + error: ParseError::ExpectedToken(Token::Fn), + loc, + })?; + + let ident = self.parse_ident(tokens)?; + + let parameters = self.parse_parameter_list(tokens)?; + + let return_type = if let Some(_) = tokens.eat_token(Token::MinusGreater) { + self.parse_type(tokens)? + } else { + self.intern.get_void_type() + }; + + return Ok(self.ast.push_fn_proto(ident, return_type, parameters, loc)); + } + + fn parse_fn_inner(&mut self, tokens: &mut TokenIterator) -> ParseResult { + let loc = tokens.current_source_location(); + + let proto = self.parse_fn_proto(tokens)?; + + let body = self.parse_block(tokens)?; + + Ok(self.ast.push_fn_decl(proto, body, loc)) + } + + /// FUNCTION_DECL <- + /// FUNCTION_PROTO BLOCK + fn parse_fn_decl(&mut self, tokens: &mut TokenIterator) -> Index { + match self.parse_fn_inner(tokens) { + Ok(i) => i, + Err(err) => { + self.find_next_fn_or_const(tokens); + self.push_error(err.error, err.loc) + } + } + } + + /// RETURN_STATEMENT <- + /// return EXPRESSION? ; + fn parse_return_stmt(&mut self, tokens: &mut TokenIterator) -> ParseResult { + // SAFETY: function invariance + let ret = tokens.next().unwrap(); + let loc = ret.source_location(); + + let expr = if tokens.eat_token(Token::Semi).is_some() { + self.ast.push_ret(None, loc) + } else { + match self.parse_expr(tokens) { + Ok(i) => { + tokens.eat_token(Token::Semi).ok_or(ErrorInfo { + error: ParseError::ExpectedToken(Token::Semi), + loc: tokens.current_source_location(), + })?; + self.ast.push_ret(Some(i), loc) + } + Err(err) => { + tokens.advance_past_semi().ok_or(ErrorInfo { + error: ParseError::ExpectedToken(Token::Semi), + loc: tokens.current_source_location(), + })?; + self.push_error(err.error, err.loc) + } + } + }; + + Ok(expr) + } + + /// VAR_DECL <- + /// (let | var) IDENTIFIER (: TYPENAME)? ; + /// (let | var) IDENTIFIER (: TYPENAME)? = EXPRESSION ; + fn parse_var_decl(&mut self, tokens: &mut TokenIterator) -> ParseResult { + match self.parse_var_decl_inner(tokens) { + Ok(i) => { + _ = tokens.eat_token(Token::Semi).ok_or(ErrorInfo { + error: ParseError::ExpectedToken(Token::Semi), + loc: tokens.current_source_location(), + })?; + Ok(i) + } + Err(err) => { + tokens.advance_past_semi().ok_or(ErrorInfo { + error: ParseError::ExpectedToken(Token::Semi), + loc: tokens.current_source_location(), + })?; + Ok(self.push_error(err.error, err.loc)) + } + } + } + + fn parse_var_decl_inner(&mut self, tokens: &mut TokenIterator) -> ParseResult { + // SAFETY: function invariance + let let_or_var = tokens.next().unwrap(); + let loc = let_or_var.source_location(); + + let is_let = let_or_var.token() == Token::Let; + + let name = self.parse_ident(tokens)?; + + let ty = if tokens.eat_token(Token::Colon).is_some() { + Some(self.parse_type(tokens)?) + } else { + None + }; + + let assignment = if tokens.eat_token(Token::Equal).is_some() { + Some(self.parse_expr(tokens)?) + } else { + None + }; + + Ok(self.ast.push_var_decl(is_let, name, ty, assignment, loc)) + } + + fn parse_block_inner(&mut self, tokens: &mut TokenIterator) -> ParseResult { + let loc = tokens.current_source_location(); + let mut statements = Vec::new(); + + let trailing = loop { + let next = tokens.peek_token().ok_or(ErrorInfo { + error: ParseError::UnexpectedEndOfTokens, + loc: tokens.current_source_location(), + })?; + + match next.token() { + Token::CloseBrace => { + break None; + } + Token::Return => { + statements.push(self.parse_return_stmt(tokens)?); + } + Token::Var | Token::Let => { + statements.push(self.parse_var_decl(tokens)?); + } + Token::Const => { + statements.push(self.parse_const_decl(tokens)?); + } + Token::Fn => { + statements.push(self.parse_fn_decl(tokens)); + } + _ => { + if self.is_statement(tokens) { + // expr -> statements + let expr = match self.parse_expr(tokens) { + Ok(i) => { + _ = tokens.eat_token(Token::Semi).ok_or(ErrorInfo { + error: ParseError::ExpectedToken(Token::Semi), + loc: tokens.current_source_location(), + })?; + + i + } + Err(err) => { + tokens.advance_past_semi().ok_or(ErrorInfo { + error: ParseError::ExpectedToken(Token::Semi), + loc: tokens.current_source_location(), + })?; + self.push_error(err.error, err.loc) + } + }; + + statements.push(expr); + } else { + // expr -> trailing + let expr = match self.parse_expr(tokens) { + Ok(i) => { + if !tokens.is_next_token(Token::CloseBrace) { + return Err(ErrorInfo { + error: ParseError::ExpectedEndOfBlock, + loc: tokens.current_source_location(), + }); + } + + i + } + Err(err) => { + tokens.advance_past_end_of_braced().ok_or(ErrorInfo { + error: ParseError::ExpectedToken(Token::CloseBrace), + loc: tokens.current_source_location(), + })?; + self.push_error(err.error, err.loc) + } + }; + break Some(expr); + } + } + } + }; + + Ok(self.ast.push_block(statements, trailing, loc)) + } + + /// BLOCK <- + /// { STATEMENT* EXPRESSION? } + fn parse_block(&mut self, tokens: &mut TokenIterator) -> ParseResult { + let loc = tokens.current_source_location(); + + let open_brace = tokens.eat_token(Token::OpenBrace).ok_or(ErrorInfo { + error: ParseError::ExpectedToken(Token::OpenBrace), + loc, + })?; + + let block = match self.parse_block_inner(tokens) { + Ok(i) => { + self.scope.pop(); + + if !tokens.is_next_token(Token::CloseBrace) { + return Err(ErrorInfo { + error: ParseError::UnmatchedBrace(open_brace.token_pos().start), + loc: tokens.current_source_location(), + }); + } + + i + } + Err(err) => { + tokens.advance_past_end_of_braced().ok_or(ErrorInfo { + error: ParseError::UnmatchedBrace(open_brace.token_pos().start), + loc: tokens.current_source_location(), + }); + self.push_error(err.error, err.loc) + } + }; + + Ok(block) + } + + /// PARAMETER_LIST <- + /// PARAMETER + /// PARAMETER_LIST , ARGUMENT + fn parse_parameter_list(&mut self, tokens: &mut TokenIterator) -> ParseResult { + let loc = tokens.current_source_location(); + let mut params = Vec::new(); + loop { + params.push(self.parse_parameter(tokens)?); + + if !tokens.is_next_token(Token::Comma) { + break; + } + if tokens.is_next_token2(Token::CloseParens) { + break; + } + // skip comma + _ = tokens.next(); + } + return Ok(self.ast.push_parameter_list(params, loc)); + } + + /// PARAMETER <- + /// IDENT : TYPENAME + fn parse_parameter(&mut self, tokens: &mut TokenIterator) -> ParseResult { + let loc = tokens.current_source_location(); + let name = self.parse_ident(tokens)?; + let ty = self.parse_type(tokens)?; + + return Ok(self.ast.push_parameter(name, ty, loc)); + } + + /// ARGUMENT <- + /// IDENT : EXPR + /// EXPR + fn parse_argument(&mut self, tokens: &mut TokenIterator) -> ParseResult { + let loc = tokens.current_source_location(); + let name = if tokens.is_next_token2(Token::Colon) && tokens.is_next_token(Token::Ident) + { + let name = self.parse_ident(tokens)?; + // we checked `is_next_token2` + _ = tokens.eat_token(Token::Colon).unwrap(); + Some(name) + } else { + None + }; + let expr = self.parse_expr(tokens)?; + + let i = match name { + Some(name) => self.ast.push_named_argument(name, expr, loc), + None => self.ast.push_argument(expr, loc), + }; + + Ok(i) + } + + /// ARGUMENT_LIST <- + /// ARGUMENT + /// ARGUMENT_LIST , ARGUMENT + fn parse_argument_list(&mut self, tokens: &mut TokenIterator) -> ParseResult { + let loc = tokens.current_source_location(); + let mut args = Vec::new(); + loop { + args.push(self.parse_argument(tokens)?); + + if !tokens.is_next_token(Token::Comma) { + break; + } + if tokens.is_next_token2(Token::CloseParens) { + break; + } + // skip comma + _ = tokens.next(); + } + return Ok(self.ast.push_argument_list(args, loc)); + } + + /// PRIMARY_EXPR <- + /// IDENTIFIER + /// INTEGER_CONSTANT + /// FLOATING_CONSTANT + /// ( EXPRESSION ) + /// BLOCK + fn parse_primary_expr(&mut self, tokens: &mut TokenIterator) -> ParseResult { + let loc = tokens.current_source_location(); + + let Some(next) = tokens.peek_token() else { + return Err(ErrorInfo { + error: ParseError::ExpectedPrimaryExpression, + loc, + }); + }; + + match next.token() { + Token::Ident => { + _ = tokens.next(); + let ident = next.lexeme(); + let ident = self + .intern + .get_or_insert(intern::Key::String { str: ident }); + return Ok(self.ast.push_decl_ref(ident, loc)); + } + Token::IntegerBinConstant + | Token::IntegerHexConstant + | Token::IntegerOctConstant + | Token::IntegerConstant => { + let (value, ty) = self.try_parse_integral_constant(&next); + return Ok(self.ast.push_constant(value, ty, loc)); + } + Token::FloatingConstant + | Token::FloatingExpConstant + | Token::DotFloatingConstant + | Token::DotFloatingExpConstant => { + let (value, ty) = self.parse_floating_constant(&next); + return Ok(self.ast.push_constant(value, ty, loc)); + } + + Token::OpenParens => { + _ = tokens.next(); + + let expr = match self.parse_expr(tokens) { + Ok(i) => { + let Some(_) = tokens.eat_token(Token::CloseParens) else { + return Err(ErrorInfo { + error: ParseError::UnmatchedParens(next.token_pos().start), + loc, + }); + }; + + i + } + Err(err) => { + tokens.advance_past_end_of_parens().ok_or(ErrorInfo { + error: ParseError::UnmatchedParens(next.token_pos().start), + loc: tokens.current_source_location(), + })?; + self.push_error(err.error, err.loc) + } + }; + + return Ok(expr); + } + Token::OpenBrace => { + return self.parse_block(tokens); + } + _ => { + return Err(ErrorInfo { + error: ParseError::ExpectedPrimaryExpression, + loc, + }); + } + } + } + + /// POSTFIX_EXPR <- + /// PRIMARY_EXPR + /// PRIMARY_EXPR ( ) + /// PRIMARY_EXPR ( ARGUMENT_LIST ) + /// PRIMARY_EXPR [ EXPR ] + fn parse_postfix_expr(&mut self, tokens: &mut TokenIterator) -> ParseResult { + let lhs = self.parse_primary_expr(tokens)?; + + if let Some(next) = tokens.peek_token() { + let loc = next.source_location(); + match next.token() { + Token::OpenParens => { + let rhs = if tokens.is_next_token(Token::CloseParens) { + self.ast.push_argument_list([], loc) + } else { + match self.parse_argument_list(tokens) { + Ok(i) => { + _ = tokens.eat_token(Token::Comma); + + let Some(_) = tokens.eat_token(Token::CloseParens) else { + let loc = tokens.current_source_location(); + return Err(ErrorInfo { + error: ParseError::UnmatchedParens( + next.token_pos().start, + ), + loc, + }); + }; + + i + } + Err(err) => { + tokens.advance_past_end_of_parens().ok_or(ErrorInfo { + error: ParseError::UnmatchedParens(next.token_pos().start), + loc: tokens.current_source_location(), + })?; + + self.push_error(err.error, err.loc) + } + } + }; + + return Ok(self.ast.push_call_expr(lhs, rhs, loc)); + } + Token::OpenSquareBracket => { + let subscript = match self.parse_expr(tokens) { + Ok(i) => i, + Err(err) => { + tokens.advance_past_end_of_bracketed().ok_or(ErrorInfo { + error: ParseError::UnmatchedSquareBracket( + next.token_pos().start, + ), + loc: tokens.current_source_location(), + })?; + self.push_error(err.error, err.loc) + } + }; + + return Ok(self + .ast + .push_binary(Tag::SubscriptExpr, lhs, subscript, loc)); + } + _ => {} + } + } + + Ok(lhs) + } + + fn push_error(&mut self, error: ParseError, loc: SourceLocation) -> Index { + self.errors.push(ErrorInfo { error, loc }); + self.ast.push_error(error, loc) + } + + /// PREFIX_EXPR <- + /// POSTFIX_EXPR + /// ! POSTFIX_EXPR + /// - POSTFIX_EXPR + /// & POSTFIX_EXPR + /// * POSTFIX_EXPR + fn parse_prefix_expr(&mut self, tokens: &mut TokenIterator) -> ParseResult { + let next = tokens.peek_token().ok_or(ErrorInfo { + error: ParseError::ExpectedPrefixExpression, + loc: tokens.current_source_location(), + })?; + + let loc = next.source_location(); + + let expr = match next.token() { + Token::Bang => { + _ = tokens.next(); + let lhs = self.parse_postfix_expr(tokens)?; + self.ast.push_unary(Tag::Not, lhs, loc) + } + Token::Minus => { + _ = tokens.next(); + let lhs = self.parse_postfix_expr(tokens)?; + self.ast.push_unary(Tag::Negate, lhs, loc) + } + Token::Ampersand => { + _ = tokens.next(); + let lhs = self.parse_postfix_expr(tokens)?; + self.ast.push_unary(Tag::AddressOf, lhs, loc) + } + Token::Star => { + _ = tokens.next(); + let lhs = self.parse_postfix_expr(tokens)?; + self.ast.push_unary(Tag::Deref, lhs, loc) + } + _ => self.parse_postfix_expr(tokens)?, + }; + + Ok(expr) + } + + /// AS_EXPR <- + /// PREFIX_EXPR + /// PREFIX_EXPR as TYPENAME + fn parse_as_expr(&mut self, tokens: &mut TokenIterator) -> ParseResult { + let loc = tokens.current_source_location(); + let expr = self.parse_prefix_expr(tokens)?; + + if tokens.eat_token(Token::As).is_some() { + let typename = self.parse_type(tokens)?; + + return Ok(self.ast.push_cast(expr, typename, loc)); + } else { + return Ok(expr); + } + } + + /// BINARY_EXPR <- + /// AS_EXPR + /// AS_EXPR * EXPRESSION + /// AS_EXPR / EXPRESSION + /// AS_EXPR % EXPRESSION + /// AS_EXPR + EXPRESSION + /// AS_EXPR - EXPRESSION + /// AS_EXPR << EXPRESSION + /// AS_EXPR >> EXPRESSION + /// AS_EXPR < EXPRESSION + /// AS_EXPR > EXPRESSION + /// AS_EXPR <= EXPRESSION + /// AS_EXPR >= EXPRESSION + /// AS_EXPR == EXPRESSION + /// AS_EXPR != EXPRESSION + /// AS_EXPR & EXPRESSION + /// AS_EXPR ^ EXPRESSION + /// AS_EXPR | EXPRESSION + /// AS_EXPR && EXPRESSION + /// AS_EXPR || EXPRESSION + fn parse_binary_expr( + &mut self, + tokens: &mut TokenIterator, + precedence: u32, + ) -> ParseResult { + let mut node = self.parse_as_expr(tokens)?; + + loop { + let Some(tok) = tokens.peek_token() else { + break; + }; + let loc = tok.source_location(); + let Some(prec) = PRECEDENCE_MAP.get(&tok.token()).cloned() else { + break; + }; + + if prec < precedence { + break; + } + + // SAFETY: we peeked `tok` + let tok = tokens.next().unwrap(); + + let lhs = node; + let rhs = self.parse_binary_expr(tokens, prec + 1)?; + + let tag = match tok.token() { + Token::PipePipe => Tag::Or, + Token::AmpersandAmpersand => Tag::And, + Token::Pipe => Tag::BitOr, + Token::Caret => Tag::BitXOr, + Token::Ampersand => Tag::BitAnd, + Token::BangEqual => Tag::NEq, + Token::EqualEqual => Tag::Eq, + Token::LessEqual => Tag::Le, + Token::GreaterEqual => Tag::Ge, + Token::Less => Tag::Lt, + Token::Greater => Tag::Gt, + Token::GreaterGreater => Tag::Shr, + Token::LessLess => Tag::Shl, + Token::Plus => Tag::Add, + Token::Minus => Tag::Sub, + Token::Percent => Tag::Rem, + Token::Star => Tag::Mul, + Token::Slash => Tag::Div, + _ => unreachable!(), + }; + + node = self.ast.push_binary(tag, lhs, rhs, loc); + } + + Ok(node) + } + + /// ASSIGNMENT_EXPR <- + /// BINARY_EXPRESSION + /// BINARY_EXPRESSION ASSIGNMENT_OP EXPRESSION + /// ASSIGNMENT_OP <- + /// = += -= *= /= %= ... + fn parse_assignment_expr(&mut self, tokens: &mut TokenIterator) -> ParseResult { + let lhs = self.parse_binary_expr(tokens, 0)?; + + if tokens + .peek_token() + .map(|itm| itm.token().is_assignment_op()) + == Some(true) + { + // SAFETY: we peeked + let op = tokens.next().unwrap(); + let loc = op.source_location(); + let rhs = self.parse_expr(tokens)?; + + let rhs = if op.token() == Token::Equal { + rhs + } else { + let tag = match op.token() { + Token::PlusEqual => Tag::Add, + Token::MinusEqual => Tag::Sub, + Token::StarEqual => Tag::Mul, + Token::SlashEqual => Tag::Sub, + Token::PercentEqual => Tag::Rem, + Token::PipeEqual => Tag::BitOr, + Token::CaretEqual => Tag::BitXOr, + Token::AmpersandEqual => Tag::BitAnd, + Token::LessLessEqual => Tag::Shl, + Token::GreaterGreaterEqual => Tag::Shr, + _ => { + unreachable!() + } + }; + self.ast.push_binary(tag, lhs, rhs, loc) + }; + + Ok(self.ast.push_assign(lhs, rhs, loc)) + } else { + Ok(lhs) + } + } + + /// ELSE_EXPR <- + /// 'else' (IF_EXPR | EXPR_OR_STATEMENT_OR_BLOCK) + fn parse_else_expr(&mut self, tokens: &mut TokenIterator) -> ParseResult { + // SAFETY: function invariance + let _else_ = tokens.eat_token(Token::Else).unwrap(); + + if tokens.is_next_token(Token::If) { + self.parse_if_expr(tokens) + } else { + self.parse_expr_or_block_as_block(tokens) + } + } + + /// IF_EXPR <- + /// 'if' ( EXPR ) EXPR_OR_STATEMENT_OR_BLOCK ELSE_EXPR? + fn parse_if_expr(&mut self, tokens: &mut TokenIterator) -> ParseResult { + // SAFETY: function invariance + let iff = tokens.eat_token(Token::If).unwrap(); + let loc = iff.source_location(); + + let open_parens = tokens.eat_token(Token::OpenParens).ok_or(ErrorInfo { + error: ParseError::ExpectedToken(Token::OpenParens), + loc: tokens.current_source_location(), + })?; + + let cond = match self.parse_expr(tokens) { + Ok(i) => i, + Err(err) => { + tokens.advance_past_end_of_parens().ok_or(ErrorInfo { + error: ParseError::UnmatchedParens(open_parens.token_pos().start), + loc: tokens.current_source_location(), + })?; + self.push_error(err.error, err.loc) + } + }; + + let body = self.parse_expr_or_block_as_block(tokens)?; + + if tokens.is_next_token(Token::Else) { + let else_expr = self.parse_expr(tokens)?; + Ok(self.ast.push_if_else(cond, body, else_expr, loc)) + } else { + Ok(self.ast.push_if(cond, body, loc)) + } + } + + fn parse_expr_or_block_as_block( + &mut self, + tokens: &mut TokenIterator, + ) -> ParseResult { + let Some(next) = tokens.peek_token() else { + return Err(ErrorInfo { + error: ParseError::ExpectedExpression, + loc: tokens.current_source_location(), + }); + }; + + match next.token() { + Token::OpenBrace => self.parse_block(tokens), + _ => { + let loc = tokens.current_source_location(); + let expr = self.parse_expr(tokens)?; + Ok(self.ast.push_block([], Some(expr), loc)) + } + } + } + + fn parse_expr(&mut self, tokens: &mut TokenIterator) -> ParseResult { + let loc = tokens.current_source_location(); + let Some(next) = tokens.peek_token() else { + return Err(ErrorInfo { + error: ParseError::ExpectedExpression, + loc, + }); + }; + + match next.token() { + Token::If => self.parse_if_expr(tokens), + _ => self.parse_assignment_expr(tokens), + } + } + + fn parse_statement(&mut self, tokens: &mut TokenIterator) -> ParseResult { + let expr = self.parse_expr(tokens)?; + + let stmt = match tokens.eat_token(Token::Semi) { + Some(_) => self.ast.push_expr_stmt(expr), + None => self.push_error(ParseError::ExpectedStatement, self.ast.get_loc(expr)), + }; + + Ok(stmt) + } + + /// SUMTYPE_DECL <- + /// type IDENTIFIER = TYPE_UNION + /// TYPE_UNION <- + /// TYPE (| TYPE_UNION)? + /// IDENTIFIER: TYPE (| TYPE_UNION)? + fn parse_sumtype_decl(&mut self, tokens: &mut TokenIterator) -> ParseResult { + todo!() + } + + /// TUPLE_DECL <- + /// type IDENTIFIER = (TYPE,* ) + fn parse_tuple_decl(&mut self, tokens: &mut TokenIterator) -> ParseResult { + todo!() + } + + /// UNION_DECL <- + /// type IDENTIFIER = union { IDENTIFIER: TYPE,* } + fn parse_union_decl(&mut self, tokens: &mut TokenIterator) -> ParseResult { + todo!() + } + + /// ENUM_DECL <- + /// type IDENTIFIER = packed? enum { IDENTIFIER (= EXPRESSION),* } + fn parse_enum_decl(&mut self, tokens: &mut TokenIterator) -> ParseResult { + todo!() + } + + /// STRUCT_DECL <- + /// type IDENTIFIER = extern? packed? struct { STRUCT_FIELD,* } + fn parse_struct_decl(&mut self, tokens: &mut TokenIterator) -> ParseResult { + // SAFETY: function invariance + let start = tokens.eat_token(Token::Type).unwrap(); + let loc = start.source_location(); + let name = self.parse_ident(tokens)?; + + _ = tokens.eat_token(Token::Equal).ok_or(ErrorInfo { + error: ParseError::ExpectedToken(Token::Equal), + loc: tokens.current_source_location(), + })?; + + let flags = tokens.eat_all_zero_or_once(&[Token::Packed, Token::Extern]); + let (packed, c_like) = (flags[0], flags[1]); + + _ = tokens.eat_token(Token::Struct).ok_or(ErrorInfo { + error: ParseError::ExpectedToken(Token::Struct), + loc: tokens.current_source_location(), + })?; + _ = tokens.eat_token(Token::OpenBrace).ok_or(ErrorInfo { + error: ParseError::ExpectedToken(Token::OpenBrace), + loc: tokens.current_source_location(), + })?; + + match self.parse_struct_fields(tokens) { + Ok(fields) => { + _ = tokens.eat_token(Token::Comma); + + _ = tokens.eat_token(Token::CloseBrace).ok_or(ErrorInfo { + error: ParseError::ExpectedToken(Token::CloseBrace), + loc: tokens.current_source_location(), + })?; + let struct_type = self.intern.get_struct_type(name, packed, c_like, fields); + return Ok(self.ast.push_struct_decl(struct_type, loc)); + } + Err(err) => { + tokens.advance_past_end_of_braced().ok_or(ErrorInfo { + error: ParseError::ExpectedToken(Token::CloseBrace), + loc: tokens.current_source_location(), + })?; + return Ok(self.push_error(err.error, err.loc)); + } + } + } + + fn parse_struct_fields( + &mut self, + tokens: &mut TokenIterator, + ) -> ParseResult> { + let mut fields = Vec::new(); + loop { + fields.push(self.parse_struct_field(tokens)?); + + if !tokens.is_next_token(Token::Comma) { + break; + } + if tokens.is_next_token2(Token::CloseBrace) { + break; + } + // skip comma + _ = tokens.next(); + } + + Ok(fields) + } + + /// STRUCT_FIELD <- + /// IDENTIFIER: TYPE + fn parse_struct_field( + &mut self, + tokens: &mut TokenIterator, + ) -> ParseResult<(intern::Index, intern::Index)> { + let name = self.parse_ident(tokens)?; + let ty = self.parse_type(tokens)?; + + return Ok((name, ty)); + } + + /// FILE <- + /// (FUNCTION_DECL | GLOBAL_DECL)* + fn parse_file(&mut self, tokens: &mut TokenIterator) -> Index { + let start = tokens.current_source_location(); + let mut decls = Vec::new(); + + while let Some(next) = tokens.peek_token() { + match next.token() { + Token::Fn => {} + Token::Const => {} + _ => { + // error node: + let error = ParseError::UnexpectedTokenAtFileScope; + let node = self.push_error(error, next.source_location()); + decls.push(node); + + self.find_next_fn_or_const(tokens); + } + } + } + + self.ast.push_file(decls, start) + } + + fn is_statement(&self, tokens: &mut TokenIterator) -> bool { + let mut tokens = tokens.clone(); + let mut braces = 0; + let mut parens = 0; + let mut brackets = 0; + while let Some(itm) = tokens.next() { + match itm.token() { + Token::OpenBrace => { + braces += 1; + } + Token::CloseBrace => { + braces -= 1; + } + Token::OpenParens => { + parens += 1; + } + Token::CloseParens => { + parens -= 1; + } + Token::OpenSquareBracket => { + brackets += 1; + } + Token::CloseSquareBracket => { + brackets -= 1; + } + Token::Semi => { + if braces == 0 && parens == 0 && brackets == 0 { + return true; + } + } + _ => {} + } + if braces < 0 || parens < 0 || brackets < 0 { + break; + } + } + false + } + + fn find_next_fn_or_const(&mut self, tokens: &mut TokenIterator) -> Option<()> { + tokens + .advance_until_before_one_of(&[Token::Const, Token::Fn]) + .map(|_| ()) + } + } +} diff --git a/src/bin/tokenizer.rs b/src/bin/tokenizer.rs index a43b028..f5819b4 100644 --- a/src/bin/tokenizer.rs +++ b/src/bin/tokenizer.rs @@ -26,7 +26,7 @@ fn main() { match tokens { Ok(tokens) => { for tok in tokens.iter() { - println!("{}@[{}]", tok.token(), tok.source_location().start); + println!("{}@[{}]", tok.token(), tok.source_location()); } } Err((tokens, errors)) => { diff --git a/src/common.rs b/src/common.rs index fc99533..7d68574 100644 --- a/src/common.rs +++ b/src/common.rs @@ -167,3 +167,10 @@ macro_rules! variant { let $pattern = $value else { unreachable!() }; }; } + +pub fn from_lo_hi_dwords(lo: u32, hi: u32) -> u64 { + lo as u64 | (hi as u64) << 32 +} +pub fn into_lo_hi_dwords(qword: u64) -> (u32, u32) { + (qword as u32, (qword >> 32) as u32) +} diff --git a/src/comptime.rs b/src/comptime.rs index 51419cb..0ec886f 100644 --- a/src/comptime.rs +++ b/src/comptime.rs @@ -19,7 +19,7 @@ pub mod bigint { Self(vec![v]) } pub fn from_u64(v: u64) -> BigInt { - let (lo, hi) = into_lo_hi(v); + let (lo, hi) = into_lo_hi_dwords(v); Self(vec![lo, hi]) } @@ -120,7 +120,7 @@ pub mod bigint { impl PartialEq for BigInt { fn eq(&self, other: &u64) -> bool { - let (lo, hi) = into_lo_hi(*other); + let (lo, hi) = into_lo_hi_dwords(*other); cmp_bigint(&self.0, &[lo, hi]) == Ordering::Equal } } @@ -133,7 +133,7 @@ pub mod bigint { impl PartialOrd for BigInt { fn partial_cmp(&self, other: &u64) -> Option { - let (lo, hi) = into_lo_hi(*other); + let (lo, hi) = into_lo_hi_dwords(*other); Some(cmp_bigint(&self.0, &[lo, hi])) } } @@ -217,7 +217,7 @@ pub mod bigint { impl AddAssign for BigInt { fn add_assign(&mut self, rhs: u64) { - let (lo, hi) = into_lo_hi(rhs); + let (lo, hi) = into_lo_hi_dwords(rhs); if hi == 0 { *self += lo; } else { @@ -278,7 +278,7 @@ pub mod bigint { rhs.0.push(0); } - let (lo, hi) = into_lo_hi(self); + let (lo, hi) = into_lo_hi_dwords(self); sub_bigint_in_right(&[lo, hi], &mut rhs.0); rhs.normalised() @@ -302,7 +302,7 @@ pub mod bigint { impl SubAssign for BigInt { fn sub_assign(&mut self, rhs: u64) { - let (lo, hi) = into_lo_hi(rhs); + let (lo, hi) = into_lo_hi_dwords(rhs); while self.num_digits() < 2 { self.0.push(0); } @@ -332,7 +332,7 @@ pub mod bigint { type Output = Self; fn mul(self, rhs: u64) -> Self::Output { - let (lo, hi) = into_lo_hi(rhs); + let (lo, hi) = into_lo_hi_dwords(rhs); BigInt(mul_bigint(&self.0, &[lo, hi])) } } @@ -357,7 +357,7 @@ pub mod bigint { type Output = Self; fn div(self, rhs: u64) -> Self::Output { - let (lo, hi) = into_lo_hi(rhs); + let (lo, hi) = into_lo_hi_dwords(rhs); div_rem_bigint(self, BigInt([lo, hi].to_vec())).0 } } @@ -382,7 +382,7 @@ pub mod bigint { type Output = Self; fn rem(self, rhs: u64) -> Self::Output { - let (lo, hi) = into_lo_hi(rhs); + let (lo, hi) = into_lo_hi_dwords(rhs); div_rem_bigint(self, BigInt([lo, hi].to_vec())).1 } } @@ -717,12 +717,7 @@ pub mod bigint { (divident.normalised(), rem) } - fn from_lo_hi(lo: u32, hi: u32) -> u64 { - lo as u64 | (hi as u64) << 32 - } - fn into_lo_hi(qword: u64) -> (u32, u32) { - (qword as u32, (qword >> 32) as u32) - } + use crate::common::{from_lo_hi_dwords, into_lo_hi_dwords}; // from rust num_bigint /// Subtract a multiple. @@ -740,10 +735,11 @@ pub mod bigint { // sum >= -(big_digit::MAX * big_digit::MAX) - big_digit::MAX // sum <= big_digit::MAX // Offsetting sum by (big_digit::MAX << big_digit::BITS) puts it in DoubleBigDigit range. - let offset_sum = from_lo_hi(u32::MAX, *x) - u32::MAX as u64 + offset_carry as u64 + let offset_sum = from_lo_hi_dwords(u32::MAX, *x) - u32::MAX as u64 + + offset_carry as u64 - *y as u64 * c as u64; - let (new_x, new_offset_carry) = into_lo_hi(offset_sum); + let (new_x, new_offset_carry) = into_lo_hi_dwords(offset_sum); offset_carry = new_offset_carry; *x = new_x; } @@ -793,7 +789,7 @@ pub mod bigint { // q0 is too large if: // [a2,a1,a0] < q0 * [b1,b0] // (r << BITS) + a2 < q0 * b1 - while r <= u32::MAX as u64 && from_lo_hi(r as u32, a2) < q0 as u64 * b1 as u64 { + while r <= u32::MAX as u64 && from_lo_hi_dwords(r as u32, a2) < q0 as u64 * b1 as u64 { q0 -= 1; r += b0 as u64; } diff --git a/src/lexer.rs b/src/lexer.rs index 8680105..90fe1a9 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -67,6 +67,21 @@ impl<'a> Chars<'a> { self.offset } + pub fn offset_to_source_location(&self, offset: u32) -> SourceLocation { + let (start_l, start_c) = { + let range = self.get_from_to(0, offset); + range.chars().fold((1u32, 0u32), |(line, col), c| { + if c == '\n' { + (line + 1, 0) + } else { + (line, col + 1) + } + }) + }; + + SourceLocation::new(start_l, start_c) + } + pub fn get_source_span(&self, start: u32, end: u32) -> std::ops::Range { let (start_l, start_c) = { let range = self.get_from_to(0, start); @@ -153,7 +168,218 @@ pub struct TokenIterator<'a> { offset: usize, } +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] +enum DelimitorCondition { + BelowZero, + MinusOne, + Zero, + One, + AboveZero, +} + +impl DelimitorCondition { + fn from_i32(i: i32) -> Self { + match i { + 0 => Self::Zero, + 1 => Self::One, + -1 => Self::MinusOne, + 1.. => Self::AboveZero, + ..-1 => Self::BelowZero, + } + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +struct DelimitorConditions { + braces: DelimitorCondition, + parens: DelimitorCondition, + brackets: DelimitorCondition, + semis: Option, +} +impl DelimitorConditions { + fn is_invalid(&self, cond: &Self) -> bool { + self.braces < cond.braces + || self.parens < cond.parens + || self.brackets < cond.brackets + || self.semis > cond.semis + } + + fn from_i32s(braces: i32, parens: i32, brackets: i32, semis: Option) -> Self { + Self { + braces: DelimitorCondition::from_i32(braces), + parens: DelimitorCondition::from_i32(parens), + brackets: DelimitorCondition::from_i32(brackets), + semis, + } + } +} + impl<'a> TokenIterator<'a> { + pub fn advance_until_before_token(&mut self, token: Token) -> Option { + while let Some(next) = self.peek_token() { + if next.token() == token { + return Some(token); + } + _ = self.next(); + } + None + } + + pub fn advance_until_before_one_of(&mut self, tokens: &[Token]) -> Option { + while let Some(next) = self.peek_token() { + if tokens.contains(&next.token()) { + return Some(next.token()); + } + _ = self.next(); + } + None + } + + pub fn advance_past_end_of_braced(&mut self) -> Option<()> { + use DelimitorCondition::*; + self.advance_past_condition(DelimitorConditions { + braces: MinusOne, + parens: Zero, + brackets: Zero, + semis: None, + }) + } + pub fn advance_past_end_of_bracketed(&mut self) -> Option<()> { + use DelimitorCondition::*; + self.advance_past_condition(DelimitorConditions { + braces: Zero, + parens: Zero, + brackets: MinusOne, + semis: None, + }) + } + + pub fn advance_past_semi(&mut self) -> Option<()> { + use DelimitorCondition::*; + self.advance_past_condition(DelimitorConditions { + braces: Zero, + parens: Zero, + brackets: Zero, + semis: Some(1), + }) + } + + pub fn advance_past_end_of_parens(&mut self) -> Option<()> { + use DelimitorCondition::*; + self.advance_past_condition(DelimitorConditions { + braces: Zero, + parens: MinusOne, + brackets: Zero, + semis: None, + }) + } + + pub fn advance_until_start_of_braced(&mut self) -> Option<()> { + use DelimitorCondition::*; + self.advance_until_condition(DelimitorConditions { + braces: One, + parens: Zero, + brackets: Zero, + semis: None, + }) + } + + pub fn advance_until_start_of_parens(&mut self) -> Option<()> { + use DelimitorCondition::*; + self.advance_until_condition(DelimitorConditions { + braces: Zero, + parens: One, + brackets: Zero, + semis: None, + }) + } + pub fn advance_until_end_of_parens(&mut self) -> Option<()> { + use DelimitorCondition::*; + self.advance_until_condition(DelimitorConditions { + braces: Zero, + parens: MinusOne, + brackets: Zero, + semis: None, + }) + } + + fn advance_until_condition(&mut self, cond: DelimitorConditions) -> Option<()> { + self.advance_past_condition(cond)?; + _ = self.offset.saturating_sub(1); + Some(()) + } + + fn advance_past_condition(&mut self, cond: DelimitorConditions) -> Option<()> { + let mut braces = 0; + let mut parens = 0; + let mut brackets = 0; + let mut semis = 0; + while let Some(next) = self.next() { + match next.token() { + Token::OpenBrace => { + braces += 1; + } + Token::CloseBrace => { + braces -= 1; + } + Token::OpenParens => { + parens += 1; + } + Token::CloseParens => { + parens -= 1; + } + Token::OpenSquareBracket => { + brackets += 1; + } + Token::CloseSquareBracket => { + brackets -= 1; + } + Token::Semi => { + semis += 1; + } + _ => { /* nada */ } + } + + let current = + DelimitorConditions::from_i32s(braces, parens, brackets, cond.semis.map(|_| semis)); + if cond == current { + return Some(()); + } + + if current.is_invalid(&cond) { + return None; + } + } + + None + } + + pub fn eat_all_zero_or_once(&mut self, tokens: &[Token]) -> Vec { + let mut occurences = vec![0u32; tokens.len()]; + + while occurences.iter().all(|&i| i <= 1) { + let Some(next) = self.peek_token() else { + break; + }; + if let Some(pos) = tokens.iter().position(|&t| t == next.token()) { + occurences[pos] += 1; + } + } + + occurences.into_iter().map(|i| i >= 1).collect() + } + + pub fn current_source_location(&self) -> SourceLocation { + self.clone() + .next() + .map(|i| i.source_location()) + .unwrap_or_else(|| { + self.tokenizer + .source + .offset_to_source_location(self.tokenizer.source.num_bytes() as u32) + }) + } + pub fn expect_token(&mut self, token: Token) -> crate::parser::Result> { self.next_if(|item| item.token() == token) .ok_or(crate::parser::Error::ExpectedTokenNotFound(token)) @@ -209,6 +435,9 @@ impl Display for SourceLocation { } impl SourceLocation { + pub fn invalid() -> Self { + Self::new(u32::MAX, u32::MAX) + } pub fn new(line: u32, column: u32) -> Self { Self { line, column } } @@ -244,6 +473,9 @@ impl SourceLocation { } impl<'a> TokenItem<'a> { + pub fn token_pos(&self) -> TokenPos { + self.inner + } pub fn token(&self) -> Token { self.inner.token } @@ -254,11 +486,17 @@ impl<'a> TokenItem<'a> { .get_from_to(self.inner.start, self.inner.end) } - pub fn source_location(&self) -> std::ops::Range { + pub fn source_location_range(&self) -> std::ops::Range { self.tokenizer .source .get_source_span(self.inner.start, self.inner.end) } + + pub fn source_location(&self) -> SourceLocation { + self.tokenizer + .source + .offset_to_source_location(self.inner.start) + } } impl<'a> Iterator for TokenIterator<'a> { diff --git a/src/lib.rs b/src/lib.rs index 3019af3..a0a5b30 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -6,6 +6,7 @@ bigint_helper_methods, map_try_insert, iter_intersperse, + iter_array_chunks, int_roundings )] #![allow(unused_macros)] diff --git a/src/parser.rs b/src/parser.rs index f0e285e..a824ece 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -11,7 +11,7 @@ use crate::{ lexer::{Radix, TokenIterator}, string_table::{ImmOrIndex, Index, StringTable}, symbol_table::{SymbolKind, SymbolTable}, - tokens::Token, + tokens::{Token, PRECEDENCE_MAP}, }; #[derive(Debug, thiserror::Error)] @@ -2462,26 +2462,3 @@ impl Tree { } } } - -static PRECEDENCE_MAP: std::sync::LazyLock> = std::sync::LazyLock::new(|| { - HashMap::from([ - (Token::PipePipe, 10), - (Token::AmpersandAmpersand, 20), - (Token::Pipe, 30), - (Token::Caret, 40), - (Token::Ampersand, 50), - (Token::BangEqual, 60), - (Token::EqualEqual, 60), - (Token::LessEqual, 70), - (Token::GreaterEqual, 70), - (Token::Less, 70), - (Token::Greater, 70), - (Token::GreaterGreater, 80), - (Token::LessLess, 80), - (Token::Plus, 90), - (Token::Minus, 90), - (Token::Percent, 100), - (Token::Star, 100), - (Token::Slash, 100), - ]) -}); diff --git a/src/tokens.rs b/src/tokens.rs index 9a3f0c0..7fc58ff 100644 --- a/src/tokens.rs +++ b/src/tokens.rs @@ -1,3 +1,5 @@ +use std::collections::HashMap; + macro_rules! tokens { ($vis:vis $ty_name:ident: { @@ -96,7 +98,11 @@ tokens!(pub Token: { Bool => "bool", F32 => "f32", F64 => "f64", + ISize => "isize", + USize => "usize", Const => "const", + Volatile => "volatile", + Noalias => "noalias", Fn => "fn", Let => "let", Var => "var", @@ -104,6 +110,13 @@ tokens!(pub Token: { As => "as", Else => "else", Return => "return", + Struct => "struct", + Type => "type", + Union => "union", + Enum => "enum", + Packed => "packed", + Extern => "extern", + Pub => "pub", // Operators Dot => ".", MinusGreater => "->", @@ -294,3 +307,27 @@ impl TokenPos { Self { token, start, end } } } + +pub static PRECEDENCE_MAP: std::sync::LazyLock> = + std::sync::LazyLock::new(|| { + HashMap::from([ + (Token::PipePipe, 10), + (Token::AmpersandAmpersand, 20), + (Token::Pipe, 30), + (Token::Caret, 40), + (Token::Ampersand, 50), + (Token::BangEqual, 60), + (Token::EqualEqual, 60), + (Token::LessEqual, 70), + (Token::GreaterEqual, 70), + (Token::Less, 70), + (Token::Greater, 70), + (Token::GreaterGreater, 80), + (Token::LessLess, 80), + (Token::Plus, 90), + (Token::Minus, 90), + (Token::Percent, 100), + (Token::Star, 100), + (Token::Slash, 100), + ]) + });