SeaLang/src/ast2/mod.rs

3398 lines
112 KiB
Rust

#![allow(dead_code)]
use std::{
fmt::{Debug, Display},
num::NonZero,
};
use crate::{lexer::SourceLocation, tokens::Token, writeln_indented};
pub mod intern {
use std::{
collections::BTreeMap,
hash::{Hash, Hasher},
};
use num_bigint::{BigInt, BigUint, Sign};
use crate::{
common::{from_lo_hi_dwords, into_lo_hi_dwords},
variant,
};
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
#[repr(u8)]
pub enum SimpleType {
F32 = 0,
F64,
Bool,
Void,
USize,
ISize,
ComptimeInt,
}
impl From<u8> for SimpleType {
fn from(value: u8) -> Self {
match value {
0 => Self::F32,
1 => Self::F64,
2 => Self::Bool,
3 => Self::Void,
4 => Self::USize,
5 => Self::ISize,
6 => Self::ComptimeInt,
_ => panic!("{value} is not a simple type"),
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum Tag {
String,
SIntSmall,
UIntSmall,
UInt64,
SInt64,
F32,
F64,
PositiveInt,
NegativeInt,
UIntType,
SIntType,
SimpleType,
PointerType,
ArrayType,
FunctionType,
StructType,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
struct Item {
tag: Tag,
index: u32,
}
#[derive(Debug, Clone, PartialEq)]
#[non_exhaustive]
pub enum Key<'a> {
String {
str: &'a str,
},
SIntSmall {
bits: i32,
},
UIntSmall {
bits: u32,
},
SInt64 {
bits: i64,
},
UInt64 {
bits: u64,
},
F32 {
bits: f32,
},
F64 {
bits: f64,
},
PositiveInt {
bigint: BigInt,
},
NegativeInt {
bigint: BigInt,
},
UIntType {
bits: u16,
},
SIntType {
bits: u16,
},
SimpleType {
ty: SimpleType,
},
PointerType {
pointee: Index,
flags: PointerFlags,
},
ArrayType {
pointee: Index,
flags: PointerFlags,
length: u32,
},
FunctionType {
return_type: Index,
parameters: Vec<Index>,
},
StructType {
name: Index,
packed: bool,
c_like: bool,
fields: Vec<(Index, Index)>,
},
}
impl Hash for Key<'_> {
fn hash<H: Hasher>(&self, state: &mut H) {
core::mem::discriminant(self).hash(state);
match self {
Key::String { str } => str.hash(state),
Key::SIntSmall { bits } => bits.hash(state),
Key::UIntSmall { bits } => bits.hash(state),
Key::SInt64 { bits } => bits.hash(state),
Key::UInt64 { bits } => bits.hash(state),
Key::F32 { bits } => ordered_float::OrderedFloat(*bits).hash(state),
Key::F64 { bits } => ordered_float::OrderedFloat(*bits).hash(state),
Key::PositiveInt { bigint } => bigint.hash(state),
Key::NegativeInt { bigint } => bigint.hash(state),
Key::UIntType { bits } => bits.hash(state),
Key::SIntType { bits } => bits.hash(state),
Key::SimpleType { ty } => ty.hash(state),
Key::PointerType { pointee, flags } => (pointee, flags).hash(state),
Key::ArrayType {
pointee,
flags,
length,
} => (*pointee, *flags, *length).hash(state),
Key::StructType {
name,
packed,
c_like,
fields,
} => (*name, *packed, *c_like, fields).hash(state),
Key::FunctionType {
return_type,
parameters,
} => (return_type, parameters).hash(state),
}
}
}
// #[repr(packed)]
#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Hash)]
pub struct PointerFlags {
pub volatile: bool,
pub is_const: bool,
pub noalias: bool,
}
impl PointerFlags {
pub fn new(is_const: bool, volatile: bool, noalias: bool) -> Self {
Self {
is_const,
volatile,
noalias,
}
}
fn pack(self) -> u8 {
(self.volatile as u8) << 0 | (self.is_const as u8) << 1 | (self.noalias as u8) << 2
}
fn unpack(packed: u8) -> Self {
Self {
volatile: packed & (1 << 0) != 0,
is_const: packed & (1 << 1) != 0,
noalias: packed & (1 << 2) != 0,
}
}
}
#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Hash)]
struct StructFlags {
packed: bool,
c_like: bool,
num_fields: u32,
}
impl StructFlags {
const MASK: u32 = (1u32 << 30) - 1;
pub fn new(packed: bool, c_like: bool, num_fields: u32) -> Self {
assert!(num_fields < (1 << 30));
Self {
packed,
c_like,
num_fields,
}
}
fn pack(self) -> u32 {
assert!(self.num_fields < (1 << 30));
(self.packed as u32) << 31 | (self.c_like as u32) << 30 | self.num_fields & Self::MASK
}
fn unpack(packed: u32) -> Self {
Self {
packed: packed & (1 << 31) != 0,
c_like: packed & (1 << 30) != 0,
num_fields: packed & Self::MASK,
}
}
}
#[derive(Debug, Clone, Copy)]
struct FunctionInfo {
void_return: bool,
num_params: u32,
}
impl FunctionInfo {
fn new(void_return: bool, num_params: u32) -> Self {
Self {
void_return,
num_params,
}
}
const MASK: u32 = 1u32 << (u32::BITS - 1);
fn pack(self) -> u32 {
(self.void_return as u32 * Self::MASK) | self.num_params & !Self::MASK
}
fn unpack(packed: u32) -> Self {
Self {
void_return: packed & Self::MASK != 0,
num_params: packed & !Self::MASK,
}
}
fn len(self) -> u32 {
self.void_return as u32 + self.num_params
}
}
impl Item {
fn idx(self) -> usize {
self.index as usize
}
}
#[repr(transparent)]
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Index(u32);
impl Index {
pub fn into_u32(self) -> u32 {
unsafe { core::mem::transmute(self) }
}
pub fn as_u32(&self) -> &u32 {
unsafe { core::mem::transmute(self) }
}
fn index(&self) -> usize {
self.0 as usize
}
pub fn is_valid(&self) -> bool {
self.0 != u32::MAX
}
pub fn invalid() -> Self {
Self(u32::MAX)
}
}
pub struct InternPool {
tags: Vec<Tag>,
indices: Vec<u32>,
//
strings: Vec<u8>,
words: Vec<u32>,
hashed: BTreeMap<u64, Index>,
}
impl std::fmt::Debug for InternPool {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("InternPool")
.field_with("keys", |f| {
let mut list = f.debug_list();
let keys = (0..self.indices.len())
.map(|i| Index(i as u32))
.map(|idx| (idx, self.get_key(idx)));
for (idx, key) in keys {
list.entry_with(|f| write!(f, "{}: {key:?}", idx.0));
}
list.finish()
})
.field_with("hashed", |f| {
let mut list = f.debug_list();
for (hash, idx) in self.hashed.iter() {
list.entry_with(|f| write!(f, "{hash}: {}", idx.0));
}
list.finish()
})
.finish_non_exhaustive()
}
}
const STATIC_KEYS: [Key; 19] = [
Key::SimpleType {
ty: SimpleType::Bool,
},
Key::SimpleType {
ty: SimpleType::F32,
},
Key::SimpleType {
ty: SimpleType::F64,
},
Key::SimpleType {
ty: SimpleType::USize,
},
Key::SimpleType {
ty: SimpleType::ISize,
},
Key::SimpleType {
ty: SimpleType::Void,
},
Key::SimpleType {
ty: SimpleType::ComptimeInt,
},
Key::SIntType { bits: 1 },
Key::UIntType { bits: 1 },
Key::SIntType { bits: 0 },
Key::UIntType { bits: 0 },
Key::SIntType { bits: 8 },
Key::UIntType { bits: 8 },
Key::SIntType { bits: 16 },
Key::UIntType { bits: 16 },
Key::SIntType { bits: 32 },
Key::UIntType { bits: 32 },
Key::SIntType { bits: 64 },
Key::UIntType { bits: 64 },
];
impl InternPool {
pub fn get_void_type(&self) -> Index {
self.get_assume_present(Key::SimpleType {
ty: SimpleType::Void,
})
}
pub fn get_bool_type(&self) -> Index {
self.get_assume_present(Key::SimpleType {
ty: SimpleType::Bool,
})
}
pub fn get_f32_type(&self) -> Index {
self.get_assume_present(Key::SimpleType {
ty: SimpleType::F32,
})
}
pub fn get_f64_type(&self) -> Index {
self.get_assume_present(Key::SimpleType {
ty: SimpleType::F64,
})
}
pub fn get_comptime_int_type(&self) -> Index {
self.get_assume_present(Key::SimpleType {
ty: SimpleType::ComptimeInt,
})
}
pub fn get_usize_type(&self) -> Index {
self.get_assume_present(Key::SimpleType {
ty: SimpleType::USize,
})
}
pub fn get_isize_type(&self) -> Index {
self.get_assume_present(Key::SimpleType {
ty: SimpleType::ISize,
})
}
pub fn get_u0_type(&self) -> Index {
self.get_assume_present(Key::UIntType { bits: 0 })
}
pub fn get_i0_type(&self) -> Index {
self.get_assume_present(Key::SIntType { bits: 0 })
}
pub fn get_u1_type(&self) -> Index {
self.get_assume_present(Key::UIntType { bits: 1 })
}
pub fn get_i1_type(&self) -> Index {
self.get_assume_present(Key::SIntType { bits: 1 })
}
pub fn get_u8_type(&self) -> Index {
self.get_assume_present(Key::UIntType { bits: 8 })
}
pub fn get_i8_type(&self) -> Index {
self.get_assume_present(Key::SIntType { bits: 8 })
}
pub fn get_u16_type(&self) -> Index {
self.get_assume_present(Key::UIntType { bits: 16 })
}
pub fn get_i16_type(&self) -> Index {
self.get_assume_present(Key::SIntType { bits: 16 })
}
pub fn get_u32_type(&self) -> Index {
self.get_assume_present(Key::UIntType { bits: 32 })
}
pub fn get_i32_type(&self) -> Index {
self.get_assume_present(Key::SIntType { bits: 32 })
}
pub fn get_u64_type(&self) -> Index {
self.get_assume_present(Key::UIntType { bits: 64 })
}
pub fn get_i64_type(&self) -> Index {
self.get_assume_present(Key::SIntType { bits: 64 })
}
}
#[derive(Debug, Clone, Copy)]
pub struct TypeInfo {
pub bitsize: u32,
pub bitalign: u32,
}
impl InternPool {
pub fn size_of_type(&self, index: Index, ptr_size: TypeInfo) -> TypeInfo {
match self.get_key(index) {
Key::UIntType { bits } => {
let bits = bits as u32;
TypeInfo {
bitsize: bits,
bitalign: bits.next_multiple_of(8).next_power_of_two(),
}
}
Key::SIntType { bits } => {
let bits = bits as u32;
TypeInfo {
bitsize: bits,
bitalign: bits.next_multiple_of(8).next_power_of_two(),
}
}
Key::SimpleType { ty } => match ty {
SimpleType::F32 => TypeInfo {
bitsize: 32,
bitalign: 32,
},
SimpleType::F64 => TypeInfo {
bitsize: 64,
bitalign: 64,
},
SimpleType::Bool => TypeInfo {
bitsize: 1,
bitalign: 1,
},
SimpleType::Void => TypeInfo {
bitsize: 0,
bitalign: 0,
},
SimpleType::USize => ptr_size,
SimpleType::ISize => ptr_size,
SimpleType::ComptimeInt => panic!("comptime int is unsized"),
},
Key::PointerType { .. } => ptr_size,
Key::ArrayType {
pointee, length, ..
} => {
let element_size = self.size_of_type(pointee, ptr_size);
let bitsize = element_size.bitalign * length;
TypeInfo {
bitsize,
..element_size
}
}
Key::FunctionType { .. } => ptr_size,
Key::StructType { packed, fields, .. } => {
// TODO: c-like layout
let (size, align) = fields.iter().fold((0, 0), |(size, align), (_name, ty)| {
let field_size = self.size_of_type(*ty, ptr_size);
let size = size + field_size.bitsize;
let size = if packed {
size.next_multiple_of(field_size.bitalign)
} else {
size
};
let align = align.max(field_size.bitalign);
(size, align)
});
TypeInfo {
bitsize: size,
bitalign: align,
}
}
_ => {
panic!("index was not a type")
}
}
}
}
impl InternPool {
pub fn create() -> Self {
let mut this = Self {
tags: Vec::new(),
indices: Vec::new(),
strings: Vec::new(),
words: Vec::new(),
hashed: BTreeMap::new(),
};
this.extend_keys(STATIC_KEYS);
this
}
fn extend_keys<'a, K: IntoIterator<Item = Key<'a>>>(&mut self, keys: K) {
for k in keys.into_iter() {
let mut hasher = std::hash::DefaultHasher::new();
k.hash(&mut hasher);
let digest = hasher.finish();
let i = self.insert(k);
self.hashed.insert(digest, i);
}
}
fn len(&self) -> u32 {
u32::try_from(self.tags.len())
.expect(&format!("more than {} items in internpool!", u32::MAX))
}
pub fn get_or_insert(&mut self, key: Key) -> Index {
let mut hasher = std::hash::DefaultHasher::new();
key.hash(&mut hasher);
let digest = hasher.finish();
if let Some(&idx) = self.hashed.get(&digest) {
idx
} else {
let i = self.insert(key);
self.hashed.insert(digest, i);
i
}
}
fn insert(&mut self, key: Key) -> Index {
match key {
Key::String { str } => {
let len = str.len() as u32;
let start = self.extend_strings(str);
let words_idx = self.extend_words([start, len]);
self.create_item(Tag::String, words_idx)
}
Key::SIntSmall { bits } => self.create_item(Tag::SIntSmall, bits as u32),
Key::UIntSmall { bits } => self.create_item(Tag::UIntSmall, bits as u32),
Key::F32 { bits } => self.create_item(Tag::F32, bits as u32),
Key::F64 { bits } => {
let (lo, hi) = into_lo_hi_dwords(bits as u64);
let words_idx = self.extend_words([lo, hi]);
self.create_item(Tag::F64, words_idx)
}
Key::SInt64 { bits } => {
let (lo, hi) = into_lo_hi_dwords(bits as u64);
let i = self.extend_words([lo, hi]);
self.create_item(Tag::SInt64, i)
}
Key::UInt64 { bits } => {
let (lo, hi) = into_lo_hi_dwords(bits as u64);
let i = self.extend_words([lo, hi]);
self.create_item(Tag::UInt64, i)
}
Key::PositiveInt { bigint } => {
let (_, words) = bigint.to_u32_digits();
let i = self.push_word(words.len() as u32);
_ = self.extend_words(words);
self.create_item(Tag::PositiveInt, i)
}
Key::NegativeInt { bigint } => {
let (_, words) = bigint.to_u32_digits();
let i = self.push_word(words.len() as u32);
_ = self.extend_words(words);
self.create_item(Tag::NegativeInt, i)
}
Key::UIntType { bits } => self.create_item(Tag::SIntSmall, bits as u32),
Key::SIntType { bits } => self.create_item(Tag::SIntSmall, bits as u32),
Key::SimpleType { ty } => self.create_item(Tag::SimpleType, ty as u8 as u32),
Key::PointerType { pointee, flags } => {
let flags = flags.pack();
let i = self.extend_words([pointee.0, flags as u32]);
self.create_item(Tag::PointerType, i)
}
Key::ArrayType {
pointee,
flags,
length,
} => {
let flags = flags.pack();
let i = self.extend_words([pointee.0, flags as u32, length]);
self.create_item(Tag::PointerType, i)
}
Key::StructType {
name,
packed,
c_like,
fields,
} => {
let flags = StructFlags::new(packed, c_like, fields.len() as u32).pack();
let i = self.extend_words([name.into_u32(), flags]);
self.extend_words(
fields
.into_iter()
.map(|(n, t)| [n.into_u32(), t.into_u32()])
.flatten(),
);
self.create_item(Tag::StructType, i)
}
Key::FunctionType {
return_type,
parameters,
} => {
let info = FunctionInfo::new(
return_type == self.get_simple_type(SimpleType::Void),
parameters.len() as u32,
);
let start = self.push_word(info.pack());
_ = self.extend_words(parameters.into_iter().map(|i| i.0));
self.create_item(Tag::FunctionType, start)
}
}
}
fn extend_strings<B: AsRef<[u8]>>(&mut self, b: B) -> u32 {
let idx = self.strings.len() as u32;
self.strings.extend(b.as_ref());
idx
}
fn extend_words<I: IntoIterator<Item = u32>>(&mut self, i: I) -> u32 {
let idx = self.words.len() as u32;
self.words.extend(i);
idx
}
fn push_word(&mut self, word: u32) -> u32 {
let idx = self.words.len() as u32;
self.words.push(word);
idx
}
fn create_item(&mut self, tag: Tag, index: u32) -> Index {
let len = self.len();
self.tags.push(tag);
self.indices.push(index);
Index(len)
}
pub fn get_key(&self, index: Index) -> Key {
let item = self.get_item(index).unwrap();
match item.tag {
Tag::String => {
let start = self.words[item.idx()];
let len = self.words[item.idx() + 1];
let str = unsafe {
core::str::from_utf8_unchecked(
&self.strings[start as usize..][..len as usize],
)
};
Key::String { str }
}
Tag::UIntSmall => Key::UIntSmall {
bits: item.index as u32,
},
Tag::SIntSmall => Key::SIntSmall {
bits: item.index as i32,
},
Tag::F32 => Key::F32 {
bits: f32::from_le_bytes(item.index.to_le_bytes()),
},
Tag::F64 => {
let idx = item.idx();
let bits = from_lo_hi_dwords(self.words[idx], self.words[idx + 1]);
Key::F64 {
bits: f64::from_le_bytes(bits.to_le_bytes()),
}
}
Tag::SInt64 => {
let bits = from_lo_hi_dwords(self.words[item.idx()], self.words[item.idx() + 1])
as i64;
Key::SInt64 { bits }
}
Tag::UInt64 => {
let bits =
from_lo_hi_dwords(self.words[item.idx()], self.words[item.idx() + 1]);
Key::UInt64 { bits }
}
Tag::NegativeInt => {
let len = self.words[item.idx()];
let start = item.idx() + 1;
let end = start + len as usize;
let data = BigUint::from_slice(&self.words[start..end]);
let bigint = BigInt::from_biguint(Sign::Minus, data);
Key::NegativeInt { bigint }
}
Tag::PositiveInt => {
let len = self.words[item.idx()];
let start = item.idx() + 1;
let end = start + len as usize;
let data = BigUint::from_slice(&self.words[start..end]);
let bigint = BigInt::from_biguint(Sign::Plus, data);
Key::PositiveInt { bigint }
}
Tag::SIntType => {
let bits = self.words[item.idx()] as u16;
Key::SIntType { bits }
}
Tag::UIntType => {
let bits = self.words[item.idx()] as u16;
Key::SIntType { bits }
}
Tag::SimpleType => {
let ty = item.idx() as u8;
Key::SimpleType {
ty: unsafe { core::mem::transmute::<u8, SimpleType>(ty) },
}
}
Tag::PointerType => {
let pointee = Index(self.words[item.idx()]);
let flags = PointerFlags::unpack(self.words[item.idx() + 1] as u8);
Key::PointerType { pointee, flags }
}
Tag::ArrayType => {
let pointee = Index(self.words[item.idx()]);
let flags = PointerFlags::unpack(self.words[item.idx() + 1] as u8);
let length = self.words[item.idx() + 2];
Key::ArrayType {
pointee,
flags,
length,
}
}
Tag::StructType => {
let name = Index(self.words[item.idx()]);
let flags = StructFlags::unpack(self.words[item.idx() + 1]);
let start = item.idx() + 2;
let end = start + flags.num_fields as usize * 2;
let fields = self.words[start..end]
.iter()
.cloned()
.array_chunks::<2>()
.map(|[n, t]| (Index(n), Index(t)))
.collect::<Vec<_>>();
Key::StructType {
name,
packed: flags.packed,
c_like: flags.c_like,
fields,
}
}
Tag::FunctionType => {
let info = FunctionInfo::unpack(self.words[item.idx()]);
let len = info.len();
let (return_type, parameters) = if info.void_return {
let start = item.idx() + 1;
let end = start + len as usize;
let params = self.words[start..end]
.iter()
.map(|&i| Index(i))
.collect::<Vec<_>>();
(
self.get_assume_present(Key::SimpleType {
ty: SimpleType::Void,
}),
params,
)
} else {
let start = item.idx() + 2;
let end = start + len as usize;
let return_type = Index(self.words[item.idx() + 1]);
let params = self.words[start..end]
.iter()
.map(|&i| Index(i))
.collect::<Vec<_>>();
(return_type, params)
};
Key::FunctionType {
return_type,
parameters,
}
}
}
}
pub fn get_assume_present(&self, key: Key) -> Index {
let mut hasher = std::hash::DefaultHasher::new();
key.hash(&mut hasher);
let digest = hasher.finish();
if let Some(&idx) = self.hashed.get(&digest) {
idx
} else {
panic!("key {key:?} not present in pool.")
}
}
pub fn get_int_type(&mut self, signed: bool, bits: u16) -> Index {
let key = match signed {
true => Key::SIntType { bits },
false => Key::UIntType { bits },
};
self.get_or_insert(key)
}
pub fn get_string_index(&mut self, str: &str) -> Index {
self.get_or_insert(Key::String { str })
}
pub fn get_simple_type(&mut self, ty: SimpleType) -> Index {
self.get_or_insert(Key::SimpleType { ty })
}
pub fn get_function_type<P: IntoIterator<Item = Index>>(
&mut self,
return_type: Index,
parameters: P,
) -> Index {
self.get_or_insert(Key::FunctionType {
return_type,
parameters: parameters.into_iter().collect(),
})
}
pub fn get_pointer_type(&mut self, pointee: Index, flags: Option<PointerFlags>) -> Index {
let key = Key::PointerType {
pointee,
flags: flags.unwrap_or_default(),
};
self.get_or_insert(key)
}
pub fn get_struct_type(
&mut self,
name: Index,
packed: bool,
c_like: bool,
fields: Vec<(Index, Index)>,
) -> Index {
let key = Key::StructType {
name,
packed,
c_like,
fields,
};
self.get_or_insert(key)
}
pub fn get_array_type(
&mut self,
pointee: Index,
flags: Option<PointerFlags>,
length: u32,
) -> Index {
let key = Key::ArrayType {
pointee,
flags: flags.unwrap_or_default(),
length,
};
self.get_or_insert(key)
}
pub fn get_str(&self, index: Index) -> &str {
let key = self.get_key(index);
assert!(matches!(key, Key::String { .. }));
variant!(key => Key::String { str });
str
}
fn check_bounds(&self, index: Index) -> Option<Index> {
(index.0 < self.len()).then_some(index)
}
fn get_item(&self, index: Index) -> Option<Item> {
self.check_bounds(index).map(|i| Item {
tag: self.tags[i.index()],
index: self.indices[i.index()],
})
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
enum Tag {
/// pseudo tag, contains a range from a..b into extra of all files.
Root,
/// `data` is a range from a..b into extra of all global nodes.
File,
/// `data` is an intern to a name, and an index into extra of [intern: return_type, index: ParameterList]
FunctionProto,
/// `data` is an index to a FunctionProto and an index to a Block
FunctionDecl,
/// `data` is a range from a..b into extra of indices to parameters
ParameterList,
/// `data` is an intern to a name, and an intern to a type
Parameter,
/// `data` is range from a..b into `extra` of indices to statements
Block,
/// `data` is range from a..b into `extra` of indices to statements, where the last one is an expression
BlockTrailingExpr,
/// `data` is an intern to a value, intern to a type
Constant,
/// `data` is an index to an expression
ExprStmt,
/// `data` is none
ReturnStmt,
/// `data` is an index to an expr
ReturnExprStmt,
/// `data` is a range from a..b into `extra` of an intern to a name and an optional intern to a type
VarDecl,
/// `data` is a range from a..b into `extra` of an intern to a name and an optional intern to a type
MutVarDecl,
/// `data` is a range from a..b into `extra` of an intern to a name, an index to an expr, and an optional intern to a type
VarDeclAssignment,
/// `data` is a range from a..b into `extra` of an intern to a name, an index to an expr, and an optional intern to a type
MutVarDeclAssignment,
/// `data` is an intern to a name, and an offset into `extra` of [type: intern, expr: index]
GlobalDecl,
/// `data` is an intern to a struct type
StructDecl,
/// `data` is an index to a VarDecl, GlobalDecl or FunctionDecl
DeclRef,
/// `data` is an inlined key into the symbol table (scope: index, name: intern)
DeclRefUnresolved,
/// `data` is an index to an expr and an index to an ArgumentList
CallExpr,
/// `data` is a range from a..b into extra of indices to arguments
ArgumentList,
/// `data` is an index to an expression
Argument,
/// `data` is an index to an expression, and an intern to a name
NamedArgument,
/// `data` is an index to lhs, and an intern to the type
ExplicitCast,
/// `data` is a single index to an expr
Deref,
AddressOf,
Not,
Negate,
/// data is two indices for `lhs` and `rhs`
Or,
And,
BitOr,
BitXOr,
BitAnd,
Eq,
NEq,
Lt,
Gt,
Le,
Ge,
Shl,
Shr,
Add,
Sub,
Mul,
Div,
Rem,
Assign,
SubscriptExpr,
IfExpr,
/// `data` is an index to an expression and an index into extra for [if, else]
IfElseExpr,
// TODO:
/// `data` is a ParseError
Error,
/// placeholder tag for reserved indices/nodes, `data` is none
Undefined,
}
#[derive(Debug, Clone, Copy, thiserror::Error, PartialEq, Eq)]
enum ParseError {
#[error("Unexpected end of token iter.")]
UnexpectedEndOfTokens,
#[error("Expected Token {0}.")]
ExpectedToken(Token),
#[error("Expected Token {0}, but other token was found.")]
ExpectedTokenNotFound(Token),
#[error("Expected either a function declaration or a global variable.")]
UnexpectedTokenAtFileScope,
#[error("Expected Ident.")]
ExpectedIdent,
#[error("Integral types may not be wider than 65535 bits.")]
IntegralTypeTooWide,
#[error("Expected typename.")]
ExpectedTypeName,
#[error("Dummy Message.")]
ExpectedFunctionPrototype,
#[error("Dummy Message.")]
ExpectedPrimaryExpression,
#[error("Dummy Message.")]
ExpectedExpression,
#[error("Dummy Message.")]
ExpectedPostfixExpression,
#[error("Dummy Message.")]
ExpectedPrefixExpression,
#[error("Dummy Message.")]
ExpectedArgumentList,
#[error("Dummy Message.")]
ExpectedStatement,
#[error("Dummy Message.")]
UnmatchedParens(u32),
#[error("Dummy Message.")]
ExpectedTypeDeclaration,
#[error("Dummy Message.")]
UnexpectedTypeAttributes,
#[error("Dummy Message.")]
UnmatchedSquareBracket(u32),
#[error("Dummy Message.")]
ExpectedEndOfBlock,
#[error("Dummy Message.")]
UnmatchedBrace(u32),
#[error("Dummy Message.")]
UnmatchedDelimiter(u32),
#[error("Error in child node {0:?}.")]
ErrorNode(Index),
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
#[repr(transparent)]
pub struct Index(NonZero<u32>);
impl Display for Index {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "%{}", self.0.get())
}
}
impl Index {
pub fn new(i: u32) -> Index {
Self(NonZero::<u32>::new(i).unwrap())
}
pub fn as_u32(&self) -> &u32 {
unsafe { core::mem::transmute(self) }
}
pub fn into_u32(self) -> u32 {
unsafe { core::mem::transmute(self) }
}
fn index(self) -> usize {
self.0.get() as usize
}
}
#[repr(packed)]
#[derive(Clone, Copy)]
struct Node {
/// defines the type of the node in the tree
tag: Tag,
data: Data,
}
#[derive(Clone, Copy)]
union Data {
none: (),
error: ParseError,
index: Index,
two_indices: (Index, Index),
range: (Index, Index),
extra_range: (u32, u32),
intern: intern::Index,
index_intern: (Index, intern::Index),
two_interns: (intern::Index, intern::Index),
intern_and_extra_offset: (intern::Index, u32),
index_and_extra_offset: (Index, u32),
}
#[derive(Debug)]
#[allow(dead_code)]
enum ExpandedData {
None,
Error(ParseError),
Index(Index),
TwoIndices(Index, Index),
Range(Index, Index),
ExtraRange(usize, usize),
Intern(intern::Index),
IndexIntern(Index, intern::Index),
TwoInterns(intern::Index, intern::Index),
InternAndExtraOffset(intern::Index, usize),
IndexAndExtraOffset(Index, usize),
}
impl ExpandedData {
fn from_none(data: Data) -> Self {
Self::None
}
fn from_error(data: Data) -> Self {
Self::Error(data.as_error())
}
fn from_index(data: Data) -> Self {
Self::Index(data.as_index())
}
fn from_two_indices(data: Data) -> Self {
let data = data.as_two_indices();
Self::TwoIndices(data.0, data.1)
}
fn from_range(data: Data) -> Self {
let data = data.as_index_range();
Self::Range(data.0, data.1)
}
fn from_extra_range(data: Data) -> Self {
let data = data.as_extra_range();
Self::ExtraRange(data.0, data.1)
}
fn from_intern(data: Data) -> Self {
let data = data.as_intern();
Self::Intern(data)
}
fn from_index_intern(data: Data) -> Self {
let data = data.as_index_intern();
Self::IndexIntern(data.0, data.1)
}
fn from_two_interns(data: Data) -> Self {
let data = data.as_two_interns();
Self::TwoInterns(data.0, data.1)
}
fn from_intern_and_extra_offset(data: Data) -> Self {
let data = data.as_intern_and_extra_offset();
Self::InternAndExtraOffset(data.0, data.1)
}
fn from_index_and_extra_offset(data: Data) -> Self {
let data = data.as_index_and_extra_offset();
Self::IndexAndExtraOffset(data.0, data.1)
}
}
impl From<(Tag, Data)> for ExpandedData {
fn from((tag, data): (Tag, Data)) -> Self {
match tag {
Tag::FunctionProto => Self::from_intern_and_extra_offset(data),
Tag::ParameterList => Self::from_extra_range(data),
Tag::Root => Self::from_extra_range(data),
Tag::File => Self::from_extra_range(data),
Tag::ArgumentList
| Tag::VarDecl
| Tag::MutVarDecl
| Tag::VarDeclAssignment
| Tag::MutVarDeclAssignment
| Tag::BlockTrailingExpr
| Tag::Block => Self::from_extra_range(data),
Tag::Constant | Tag::Parameter => Self::from_two_interns(data),
Tag::Or
| Tag::And
| Tag::BitOr
| Tag::BitXOr
| Tag::BitAnd
| Tag::Eq
| Tag::NEq
| Tag::Lt
| Tag::Gt
| Tag::Le
| Tag::Ge
| Tag::Shl
| Tag::Shr
| Tag::Add
| Tag::Sub
| Tag::Mul
| Tag::Div
| Tag::Rem
| Tag::Assign
| Tag::IfExpr
| Tag::SubscriptExpr
| Tag::CallExpr
| Tag::FunctionDecl => Self::from_two_indices(data),
Tag::ReturnExprStmt
| Tag::DeclRef
| Tag::Argument
| Tag::Deref
| Tag::AddressOf
| Tag::Not
| Tag::Negate
| Tag::ExprStmt => Self::from_index(data),
Tag::DeclRefUnresolved | Tag::NamedArgument | Tag::ExplicitCast => {
Self::from_index_intern(data)
}
Tag::GlobalDecl => Self::from_intern_and_extra_offset(data),
Tag::StructDecl => Self::from_intern(data),
Tag::IfElseExpr => Self::from_index_and_extra_offset(data),
Tag::Error => Self::from_error(data),
Tag::ReturnStmt | Tag::Undefined => Self::from_none(data),
}
}
}
impl Data {
fn as_error(self) -> ParseError {
unsafe { self.error }
}
fn as_index(self) -> Index {
unsafe { self.index }
}
fn as_two_indices(self) -> (Index, Index) {
unsafe { self.two_indices }
}
fn as_index_range(self) -> (Index, Index) {
unsafe { self.range }
}
fn as_extra_range(self) -> (usize, usize) {
let (a, b) = unsafe { self.extra_range };
(a as usize, b as usize)
}
fn as_intern(self) -> intern::Index {
unsafe { self.intern }
}
fn as_two_interns(self) -> (intern::Index, intern::Index) {
unsafe { self.two_interns }
}
fn as_index_intern(self) -> (Index, intern::Index) {
unsafe { self.index_intern }
}
fn as_index_and_extra_offset(self) -> (Index, usize) {
let (i, e) = unsafe { self.index_and_extra_offset };
(i, e as usize)
}
fn as_intern_and_extra_offset(self) -> (intern::Index, usize) {
let (i, e) = unsafe { self.intern_and_extra_offset };
(i, e as usize)
}
}
impl Data {
fn none() -> Self {
Self { none: () }
}
fn error(error: ParseError) -> Self {
Self { error }
}
fn index(index: Index) -> Self {
Self { index }
}
fn two_indices(a: Index, b: Index) -> Self {
Self {
two_indices: (a, b),
}
}
fn two_interns(a: intern::Index, b: intern::Index) -> Self {
Self {
two_interns: (a, b),
}
}
fn range_of_indices(a: Index, b: Index) -> Self {
Self { range: (a, b) }
}
fn extra_range(a: u32, b: u32) -> Self {
Self {
extra_range: (a, b),
}
}
fn intern(intern: intern::Index) -> Self {
Self { intern }
}
fn index_and_intern(index: Index, intern: intern::Index) -> Self {
Self {
index_intern: (index, intern),
}
}
fn intern_and_extra_offset(intern: intern::Index, offset: u32) -> Self {
Self {
intern_and_extra_offset: (intern, offset),
}
}
fn index_and_extra_offset(index: Index, offset: u32) -> Self {
Self {
index_and_extra_offset: (index, offset),
}
}
}
pub struct Ast {
tags: Vec<Tag>,
datas: Vec<Data>,
extra: Vec<u32>,
source_locs: Vec<SourceLocation>,
}
impl Debug for Ast {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("Ast")
.field_with("nodes", |f| {
let mut list = f.debug_list();
struct LocDisplay(SourceLocation);
impl Debug for LocDisplay {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "({})", self.0)
}
}
let entries = self
.tags
.iter()
.cloned()
.zip(self.datas.iter().cloned())
.zip(self.source_locs.iter().cloned())
.enumerate()
.map(|(i, ((tag, data), loc))| {
(i, tag, ExpandedData::from((tag, data)), LocDisplay(loc))
});
list.entries(entries).finish()
})
.field("extra", &self.extra)
.finish()
}
}
impl Ast {
fn new() -> Ast {
Self {
tags: vec![Tag::Root],
datas: vec![Data::extra_range(0, 0)],
extra: vec![],
source_locs: vec![SourceLocation::new(0, 0)],
}
}
fn reserve_node(&mut self) -> Index {
let i = unsafe { Index(NonZero::new_unchecked(self.tags.len() as u32)) };
self.tags.push(Tag::Undefined);
self.datas.push(Data::none());
self.source_locs.push(SourceLocation::invalid());
i
}
fn get_loc(&self, index: Index) -> SourceLocation {
self.source_locs[index.index()]
}
fn push_error(&mut self, error: ParseError, loc: SourceLocation) -> Index {
let i = self.reserve_node();
self.set_tag_data_source_loc(i, Tag::Error, Data::error(error), loc);
i
}
fn set_file<I: IntoIterator<Item = Index>>(&mut self, i: Index, decls: I, loc: SourceLocation) {
let (extra_start, extra_end) = self.extend_extra_by_indices(decls);
self.set_tag_data_source_loc(i, Tag::File, Data::extra_range(extra_start, extra_end), loc);
}
fn push_file<I: IntoIterator<Item = Index>>(&mut self, decls: I, loc: SourceLocation) -> Index {
let i = self.reserve_node();
self.set_file(i, decls, loc);
i
}
fn set_root<I: IntoIterator<Item = Index>>(&mut self, decls: I) {
let (extra_start, extra_end) = self.extend_extra_by_indices(decls);
self.tags[0] = Tag::Root;
self.datas[0] = Data::extra_range(extra_start, extra_end);
}
fn get_root_file_indices<'a>(&'a self) -> impl Iterator<Item = Index> + 'a {
let (a, b) = self.datas[0].as_extra_range();
self.extra[a..b].iter().cloned().map(|i| Index::new(i))
}
fn push_global_decl(
&mut self,
ident: intern::Index,
ty: intern::Index,
expr: Index,
loc: SourceLocation,
) -> Index {
let i = self.reserve_node();
let (extra_start, _) = self.extend_extra([ty.into_u32(), expr.into_u32()]);
self.set_tag_data_source_loc(
i,
Tag::GlobalDecl,
Data::intern_and_extra_offset(ident, extra_start),
loc,
);
i
}
fn set_fn_decl(&mut self, i: Index, proto: Index, body: Index, loc: SourceLocation) {
self.set_tag_data_source_loc(i, Tag::FunctionDecl, Data::two_indices(proto, body), loc);
}
fn push_fn_decl(&mut self, proto: Index, body: Index, loc: SourceLocation) -> Index {
let i = self.reserve_node();
self.set_fn_decl(i, proto, body, loc);
i
}
fn push_ret(&mut self, expr: Option<Index>, loc: SourceLocation) -> Index {
let i = self.reserve_node();
match expr {
Some(expr) => {
self.set_tag_data_source_loc(i, Tag::ReturnExprStmt, Data::index(expr), loc)
}
None => self.set_tag_data_source_loc(i, Tag::ReturnStmt, Data::none(), loc),
}
i
}
fn push_var_decl(
&mut self,
is_let: bool,
name: intern::Index,
ty: Option<intern::Index>,
assignment: Option<Index>,
loc: SourceLocation,
) -> Index {
let i = self.reserve_node();
let start = self.extra.len() as u32;
self.extra.push(name.into_u32());
_ = self.extend_extra(assignment.map(|i| i.into_u32()));
_ = self.extend_extra(ty.map(|i| i.into_u32()));
let end = self.extra.len() as u32;
let tag = match (is_let, assignment.is_some()) {
(true, false) => Tag::VarDecl,
(true, true) => Tag::VarDeclAssignment,
(false, false) => Tag::MutVarDecl,
(false, true) => Tag::MutVarDeclAssignment,
};
self.set_tag_data_source_loc(i, tag, Data::extra_range(start, end), loc);
i
}
fn push_struct_decl(&mut self, struct_type: intern::Index, loc: SourceLocation) -> Index {
let i = self.reserve_node();
self.set_tag_data_source_loc(i, Tag::StructDecl, Data::intern(struct_type), loc);
i
}
fn push_fn_proto(
&mut self,
ident: intern::Index,
return_type: intern::Index,
parameter_list: Index,
loc: SourceLocation,
) -> Index {
let i = self.reserve_node();
let (extra_start, _) =
self.extend_extra([return_type.into_u32(), parameter_list.into_u32()]);
self.set_tag_data_source_loc(
i,
Tag::FunctionProto,
Data::intern_and_extra_offset(ident, extra_start),
loc,
);
i
}
fn set_block<I: IntoIterator<Item = Index>>(
&mut self,
i: Index,
statements: I,
trailing: Option<Index>,
loc: SourceLocation,
) {
let (extra_start, extra_end) =
self.extend_extra_by_indices(statements.into_iter().chain(trailing.into_iter()));
if trailing.is_some() {
self.set_tag_data_source_loc(
i,
Tag::BlockTrailingExpr,
Data::extra_range(extra_start, extra_end),
loc,
);
} else {
self.set_tag_data_source_loc(
i,
Tag::Block,
Data::extra_range(extra_start, extra_end),
loc,
);
}
}
fn push_block<I: IntoIterator<Item = Index>>(
&mut self,
statements: I,
trailing: Option<Index>,
loc: SourceLocation,
) -> Index {
let i = self.reserve_node();
self.set_block(i, statements, trailing, loc);
i
}
fn push_parameter_list<I: IntoIterator<Item = Index>>(
&mut self,
parameters: I,
loc: SourceLocation,
) -> Index {
let i = self.reserve_node();
let (extra_start, extra_end) = self.extend_extra_by_indices(parameters);
self.set_tag_data_source_loc(
i,
Tag::ParameterList,
Data::extra_range(extra_start, extra_end),
loc,
);
i
}
fn push_argument(&mut self, expr: Index, loc: SourceLocation) -> Index {
let i = self.reserve_node();
self.set_tag_data_source_loc(i, Tag::Argument, Data::index(expr), loc);
i
}
fn push_named_argument(
&mut self,
name: intern::Index,
expr: Index,
loc: SourceLocation,
) -> Index {
let i = self.reserve_node();
self.set_tag_data_source_loc(
i,
Tag::NamedArgument,
Data::index_and_intern(expr, name),
loc,
);
i
}
fn push_parameter(
&mut self,
name: intern::Index,
ty: intern::Index,
loc: SourceLocation,
) -> Index {
let i = self.reserve_node();
self.set_tag_data_source_loc(i, Tag::Parameter, Data::two_interns(name, ty), loc);
i
}
fn push_argument_list<I: IntoIterator<Item = Index>>(
&mut self,
args: I,
loc: SourceLocation,
) -> Index {
let i = self.reserve_node();
let (extra_start, extra_end) = self.extend_extra_by_indices(args);
self.set_tag_data_source_loc(
i,
Tag::ArgumentList,
Data::extra_range(extra_start, extra_end),
loc,
);
i
}
fn push_unary(&mut self, tag: Tag, lhs: Index, loc: SourceLocation) -> Index {
let i = self.reserve_node();
self.set_tag_data_source_loc(i, tag, Data::index(lhs), loc);
i
}
fn push_binary(&mut self, tag: Tag, lhs: Index, rhs: Index, loc: SourceLocation) -> Index {
let i = self.reserve_node();
self.set_tag_data_source_loc(i, tag, Data::two_indices(lhs, rhs), loc);
i
}
fn push_assign(&mut self, lhs: Index, rhs: Index, loc: SourceLocation) -> Index {
let i = self.reserve_node();
self.set_tag_data_source_loc(i, Tag::Assign, Data::two_indices(lhs, rhs), loc);
i
}
fn push_cast(&mut self, lhs: Index, ty: intern::Index, loc: SourceLocation) -> Index {
let i = self.reserve_node();
self.set_tag_data_source_loc(i, Tag::ExplicitCast, Data::index_and_intern(lhs, ty), loc);
i
}
fn push_if(&mut self, cond: Index, body: Index, loc: SourceLocation) -> Index {
let i = self.reserve_node();
self.set_tag_data_source_loc(i, Tag::IfExpr, Data::two_indices(cond, body), loc);
i
}
fn push_if_else(
&mut self,
cond: Index,
body: Index,
other: Index,
loc: SourceLocation,
) -> Index {
let i = self.reserve_node();
let (extra_start, _) = self.extend_extra_by_indices([body, other]);
self.set_tag_data_source_loc(
i,
Tag::IfElseExpr,
Data::index_and_extra_offset(cond, extra_start),
loc,
);
i
}
fn push_call_expr(&mut self, lhs: Index, args: Index, loc: SourceLocation) -> Index {
let i = self.reserve_node();
self.set_tag_data_source_loc(i, Tag::CallExpr, Data::two_indices(lhs, args), loc);
i
}
fn push_decl_ref_unresolved(
&mut self,
scope: Index,
ident: intern::Index,
loc: SourceLocation,
) -> Index {
let i = self.reserve_node();
self.set_tag_data_source_loc(
i,
Tag::DeclRefUnresolved,
Data::index_and_intern(scope, ident),
loc,
);
i
}
fn resolve_decl_ref(&mut self, i: Index, decl: Index) {
self.tags[i.index()] = Tag::DeclRef;
self.datas[i.index()] = Data::index(decl);
}
fn push_expr_stmt(&mut self, expr: Index) -> Index {
let i = self.reserve_node();
let loc = self.get_loc(expr);
self.set_tag_data_source_loc(i, Tag::ExprStmt, Data::index(expr), loc);
i
}
fn push_constant(
&mut self,
value: intern::Index,
ty: intern::Index,
loc: SourceLocation,
) -> Index {
let i = self.reserve_node();
self.set_tag_data_source_loc(i, Tag::Constant, Data::two_interns(value, ty), loc);
i
}
fn extend_extra_by_indices<I: IntoIterator<Item = Index>>(&mut self, indices: I) -> (u32, u32) {
self.extend_extra(indices.into_iter().map(|i| i.0.get()))
}
fn extend_extra<I: IntoIterator<Item = u32>>(&mut self, words: I) -> (u32, u32) {
let i = self.extra.len() as u32;
self.extra.extend(words);
(i, self.extra.len() as u32)
}
fn set_tag_data_source_loc(&mut self, index: Index, tag: Tag, data: Data, loc: SourceLocation) {
self.tags[index.index()] = tag;
self.datas[index.index()] = data;
self.source_locs[index.index()] = loc;
}
}
struct Children(Vec<Index>);
impl Display for Children {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "[")?;
if let Some((last, rest)) = self.0.split_last() {
for i in rest {
write!(f, "{i}, ")?;
}
write!(f, "{last}")?;
}
write!(f, "]")
}
}
impl Ast {
fn get_node_children(&self, index: Index) -> Vec<Index> {
let tag = self.tags[index.index()];
let data = self.datas[index.index()];
match tag {
Tag::File => {
let (a, b) = data.as_extra_range();
self.extra[a..b].iter().map(|&i| Index::new(i)).collect()
}
Tag::FunctionProto => {
let (_, i) = data.as_intern_and_extra_offset();
vec![Index::new(self.extra[i + 1])]
}
Tag::FunctionDecl => {
let (a, b) = data.as_two_indices();
vec![a, b]
}
Tag::ParameterList => {
let (a, b) = data.as_extra_range();
self.extra[a..b].iter().map(|&i| Index::new(i)).collect()
}
Tag::Block | Tag::BlockTrailingExpr => {
let (a, b) = data.as_extra_range();
self.extra[a..b].iter().map(|&i| Index::new(i)).collect()
}
Tag::ExprStmt | Tag::ReturnExprStmt => {
let a = data.as_index();
vec![a]
}
Tag::VarDeclAssignment | Tag::MutVarDeclAssignment => {
let (a, _) = data.as_extra_range();
let expr = Index::new(self.extra[a + 1]);
vec![expr]
}
Tag::GlobalDecl => {
let (_, offset) = data.as_intern_and_extra_offset();
let expr = Index::new(self.extra[offset + 1]);
vec![expr]
}
Tag::CallExpr => {
let (a, b) = data.as_two_indices();
vec![a, b]
}
Tag::ArgumentList => {
let (a, b) = data.as_extra_range();
self.extra[a..b].iter().map(|&i| Index::new(i)).collect()
}
Tag::Argument => {
let a = data.as_index();
vec![a]
}
Tag::NamedArgument => {
let (a, _) = data.as_index_intern();
vec![a]
}
Tag::ExplicitCast => {
let (a, _) = data.as_index_intern();
vec![a]
}
Tag::Deref | Tag::AddressOf | Tag::Not | Tag::Negate => {
let a = data.as_index();
vec![a]
}
Tag::Or
| Tag::And
| Tag::BitOr
| Tag::BitXOr
| Tag::BitAnd
| Tag::Eq
| Tag::NEq
| Tag::Lt
| Tag::Gt
| Tag::Le
| Tag::Ge
| Tag::Shl
| Tag::Shr
| Tag::Add
| Tag::Sub
| Tag::Mul
| Tag::Div
| Tag::Rem
| Tag::Assign
| Tag::SubscriptExpr
| Tag::IfExpr => {
let (a, b) = data.as_two_indices();
vec![a, b]
}
Tag::IfElseExpr => {
let (a, b) = data.as_index_and_extra_offset();
let if_ = Index::new(self.extra[b]);
let else_ = Index::new(self.extra[b + 1]);
vec![a, if_, else_]
}
Tag::StructDecl
| Tag::DeclRef
| Tag::Parameter
| Tag::Constant
| Tag::ReturnStmt
| Tag::VarDecl
| Tag::MutVarDecl => vec![],
_ => vec![],
}
}
}
pub struct AstRenderer<'a> {
ast: &'a Ast,
#[allow(dead_code)]
syms: &'a crate::symbol_table::syms2::Symbols,
scopes: Vec<Index>,
}
impl<'a> AstRenderer<'a> {
pub fn new(ast: &'a Ast, syms: &'a crate::symbol_table::syms2::Symbols) -> Self {
Self {
ast,
syms,
scopes: Vec::new(),
}
}
fn render_node<W: core::fmt::Write>(
&mut self,
w: &mut W,
indent: u32,
node: Index,
) -> core::fmt::Result {
let tag = self.ast.tags[node.index()];
let loc = self.ast.source_locs[node.index()];
match tag {
Tag::File | Tag::FunctionDecl | Tag::Block | Tag::BlockTrailingExpr => {
self.scopes.push(node);
}
_ => {}
}
let children = Children(self.ast.get_node_children(node));
writeln_indented!(indent, w, "{node} = ({loc}) {tag:?} {}", children)?;
for child in children.0 {
self.render_node(w, indent + 1, child)?;
}
match tag {
Tag::File | Tag::FunctionDecl | Tag::Block | Tag::BlockTrailingExpr => {
self.scopes.pop();
}
_ => {}
}
Ok(())
}
fn render<W: core::fmt::Write>(&mut self, w: &mut W) -> core::fmt::Result {
for file in self.ast.get_root_file_indices() {
self.render_node(w, 0, file)?;
}
Ok(())
}
}
pub mod ast_gen {
use intern::{PointerFlags, SimpleType};
use itertools::Itertools;
use num_bigint::{BigInt, BigUint};
use crate::{
common::from_lo_hi_dwords,
comptime,
lexer::{Radix, TokenItem, TokenIterator},
symbol_table::syms2::SymbolKind,
tokens::PRECEDENCE_MAP,
};
use super::*;
#[derive(Debug)]
pub struct ErrorInfo {
error: ParseError,
loc: SourceLocation,
}
#[derive(Debug)]
pub struct Parser {
pub ast: Ast,
pub intern: intern::InternPool,
pub syms: crate::symbol_table::syms2::Symbols,
scopes: Vec<Index>,
pub errors: Vec<ErrorInfo>,
}
type ParseResult<T> = core::result::Result<T, ErrorInfo>;
impl Display for Parser {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
self.display().render(f)
}
}
impl Parser {
pub fn new() -> Parser {
Self {
ast: Ast::new(),
intern: intern::InternPool::create(),
syms: crate::symbol_table::syms2::Symbols::new(),
scopes: Vec::new(),
errors: Vec::new(),
}
}
pub fn display(&self) -> AstRenderer<'_> {
AstRenderer::new(&self.ast, &self.syms)
}
pub fn resolve_decl_refs(&mut self) {
let mut nodes = self.ast.get_root_file_indices().collect::<Vec<_>>();
let mut scopes = Vec::new();
while let Some(node) = nodes.pop() {
match self.ast.tags[node.index()] {
Tag::File | Tag::FunctionDecl | Tag::Block | Tag::BlockTrailingExpr => {
scopes.push(node);
}
Tag::DeclRefUnresolved => {
let (scope, name) = self.ast.datas[node.index()].as_index_intern();
// look in my_scope
if let Some(decl) =
self.syms
.find_symbol(scope, name, self.ast.source_locs[node.index()])
{
self.ast.resolve_decl_ref(node, decl)
};
}
_ => {}
}
nodes.extend(self.ast.get_node_children(node));
match self.ast.tags[node.index()] {
Tag::File | Tag::FunctionDecl | Tag::Block | Tag::BlockTrailingExpr => {
scopes.pop();
}
_ => {}
}
}
}
fn current_scope(&self) -> Index {
self.scopes.last().cloned().unwrap()
}
fn parse_ident(&mut self, tokens: &mut TokenIterator) -> Result<intern::Index, ErrorInfo> {
let ident = tokens.expect_token(Token::Ident).map_err(|_| ErrorInfo {
error: ParseError::ExpectedIdent,
loc: tokens.current_source_location(),
})?;
let name = self.intern.get_or_insert(intern::Key::String {
str: ident.lexeme(),
});
Ok(name)
}
fn parse_pointer(&mut self, tokens: &mut TokenIterator) -> ParseResult<intern::Index> {
tokens.eat_token(Token::Star).ok_or(ErrorInfo {
error: ParseError::ExpectedToken(Token::Star),
loc: tokens.current_source_location(),
})?;
let &[cnst, vol, noalias] =
&tokens.eat_all_zero_or_once(&[Token::Const, Token::Volatile, Token::Noalias])[..3]
else {
unreachable!()
};
let pointee = self.parse_type(tokens)?;
Ok(self
.intern
.get_pointer_type(pointee, Some(PointerFlags::new(cnst, vol, noalias))))
}
/// [LENGTH]const? volatile? noalias? TYPE
fn parse_array_type(&mut self, tokens: &mut TokenIterator) -> ParseResult<intern::Index> {
let start = tokens.eat_token(Token::OpenSquareBracket).unwrap();
let length = match self.parse_expr(tokens) {
Ok(i) => {
_ = tokens
.eat_token(Token::CloseSquareBracket)
.ok_or(ErrorInfo {
error: ParseError::ExpectedTypeName,
loc: tokens.current_source_location(),
})?;
i
}
Err(err) => {
tokens.advance_past_end_of_bracketed().ok_or(ErrorInfo {
error: ParseError::ExpectedToken(Token::CloseSquareBracket),
loc: tokens.current_source_location(),
})?;
self.push_error(err.error, err.loc)
}
};
let &[cnst, vol, noalias] =
&tokens.eat_all_zero_or_once(&[Token::Const, Token::Volatile, Token::Noalias])[..3]
else {
unreachable!()
};
let pointee = self.parse_type(tokens)?;
Ok(self.intern.get_array_type(
pointee,
Some(PointerFlags::new(cnst, vol, noalias)),
0, // length, TODO: evaluate this tree branch for an u32
))
}
fn parse_simple_type(&mut self, token: Token) -> Option<intern::Index> {
match token {
Token::Void => Some(self.intern.get_assume_present(intern::Key::SimpleType {
ty: SimpleType::Void,
})),
Token::Bool => Some(self.intern.get_assume_present(intern::Key::SimpleType {
ty: SimpleType::Bool,
})),
Token::F32 => Some(self.intern.get_assume_present(intern::Key::SimpleType {
ty: SimpleType::F32,
})),
Token::F64 => Some(self.intern.get_assume_present(intern::Key::SimpleType {
ty: SimpleType::F64,
})),
Token::USize => Some(self.intern.get_assume_present(intern::Key::SimpleType {
ty: SimpleType::USize,
})),
Token::ISize => Some(self.intern.get_assume_present(intern::Key::SimpleType {
ty: SimpleType::ISize,
})),
_ => None,
}
}
fn try_parse_integral_type(
&mut self,
typename: &str,
) -> Result<Option<intern::Index>, ParseError> {
let mut iter = typename.chars().peekable();
let signed = match iter.next() {
Some('u') => false,
Some('i') => true,
_ => {
return Ok(None);
}
};
// need 1 digit for an integral type
if iter.peek().map(|&c| crate::common::is_digit(c)) != Some(true) {
return Ok(None);
}
// need no nondigits after digits
if iter
.clone()
.skip_while(|&c| crate::common::is_digit(c))
.next()
.is_some()
{
return Ok(None);
}
let mut bits = 0u16;
loop {
let Some(digit) = iter.next().map(|c| c as u8 - b'0') else {
break;
};
match bits
.checked_mul(10)
.and_then(|bits| bits.checked_add(digit as u16))
{
Some(val) => {
bits = val;
}
None => {
// this IS an integral type, but it is bigger than u/i65535
return Err(ParseError::IntegralTypeTooWide);
}
}
}
Ok(Some(self.intern.get_int_type(signed, bits)))
}
fn try_parse_integral_constant(
&mut self,
item: &TokenItem,
) -> (intern::Index, intern::Index) {
let radix = Radix::from_token(item.token()).unwrap();
let mut chars = item.lexeme().char_indices();
match radix {
Radix::Dec => {}
_ => {
_ = chars.advance_by(2);
}
}
let digits = chars
.take_while_ref(|&(_, c)| radix.is_digit()(c) || c == '_')
.filter(|&(_, c)| c != '_')
.map(|(_, c)| c)
.collect::<Vec<_>>();
let value = comptime::bigint::parse_bigint(digits.into_iter(), radix);
let ty = match chars.clone().next() {
Some((i, 'u')) | Some((i, 'i')) => self
.try_parse_integral_type(&item.lexeme()[i..])
.expect("invalid integral type??"),
_ => None,
};
let interned = match value.len() {
..1 => {
let bits = value.get(0).cloned().unwrap_or(0);
self.intern.get_or_insert(intern::Key::UIntSmall { bits })
}
..2 => {
let lo = value.get(0).cloned().unwrap_or(0);
let hi = value.get(1).cloned().unwrap_or(0);
let bits = from_lo_hi_dwords(lo, hi);
self.intern.get_or_insert(intern::Key::UInt64 { bits })
}
_ => {
let bigint = BigInt::from_biguint(num_bigint::Sign::Plus, BigUint::new(value));
self.intern
.get_or_insert(intern::Key::PositiveInt { bigint })
}
};
(interned, ty.unwrap_or(self.intern.get_comptime_int_type()))
}
fn parse_floating_constant(&mut self, item: &TokenItem) -> (intern::Index, intern::Index) {
let lexeme = item.lexeme();
let lexeme = lexeme
.strip_suffix("f32")
.map(|l| (l, self.intern.get_f32_type()))
.unwrap_or(
lexeme
.strip_suffix("f64")
.map(|l| (l, self.intern.get_f64_type()))
.unwrap_or((lexeme, self.intern.get_f64_type())),
);
let bits = if lexeme.1 == self.intern.get_f32_type() {
self.intern.get_or_insert(intern::Key::F32 {
bits: lexeme.0.parse::<f32>().unwrap(),
})
} else {
self.intern.get_or_insert(intern::Key::F64 {
bits: lexeme.0.parse::<f64>().unwrap(),
})
};
(bits, lexeme.1)
}
/// TYPE <-
/// * TYPE
/// IDENTIFIER
/// SIMPLE_TYPE
/// [ TYPE ; CONSTANT_EXPR ]
/// INTEGRAL_TYPE // u[0..65535] | i[0..65535]
fn parse_type(&mut self, tokens: &mut TokenIterator) -> ParseResult<intern::Index> {
match tokens
.peek_token()
.ok_or(ErrorInfo {
error: ParseError::ExpectedTypeName,
loc: tokens.current_source_location(),
})?
.token()
{
Token::Star => self.parse_pointer(tokens),
Token::OpenSquareBracket => self.parse_array_type(tokens),
Token::Ident => {
let token = tokens.next().unwrap();
match self
.try_parse_integral_type(token.lexeme())
.map_err(|error| ErrorInfo {
error,
loc: token.source_location(),
})? {
Some(int) => Ok(int),
None => {
let name = self.intern.get_or_insert(intern::Key::String {
str: token.lexeme(),
});
Ok(name)
}
}
}
token => {
let ty = self.parse_simple_type(token).ok_or(ErrorInfo {
error: ParseError::ExpectedTypeName,
loc: tokens.current_source_location(),
})?;
_ = tokens.next();
Ok(ty)
}
}
}
/// GLOBAL_DECL <-
/// const IDENTIFIER: TYPENAME = EXPR;
fn parse_const_decl(&mut self, tokens: &mut TokenIterator) -> ParseResult<Index> {
let err = 'blk: {
let loc = tokens.current_source_location();
let Some(_) = tokens.eat_token(Token::Const) else {
break 'blk ErrorInfo {
error: ParseError::ExpectedToken(Token::Const),
loc,
};
};
let ident = match self.parse_ident(tokens) {
Ok(i) => i,
Err(err) => {
break 'blk err;
}
};
let Some(_) = tokens.eat_token(Token::Colon) else {
return Err(ErrorInfo {
error: ParseError::ExpectedToken(Token::Colon),
loc,
});
};
let typename = match self.parse_type(tokens) {
Ok(i) => i,
Err(err) => {
break 'blk err;
}
};
let Some(_) = tokens.eat_token(Token::Equal) else {
break 'blk ErrorInfo {
error: ParseError::ExpectedToken(Token::Equal),
loc: tokens.current_source_location(),
};
};
let expr = match self.parse_expr(tokens) {
Ok(i) => i,
Err(err) => {
break 'blk err;
}
};
let Some(_) = tokens.eat_token(Token::Semi) else {
break 'blk ErrorInfo {
error: ParseError::ExpectedToken(Token::Semi),
loc: tokens.current_source_location(),
};
};
let decl = self.ast.push_global_decl(ident, typename, expr, loc);
self.syms
.insert_symbol(self.current_scope(), ident, SymbolKind::Const, decl);
return Ok(decl);
};
tokens.advance_past_semi().ok_or(ErrorInfo {
error: ParseError::ExpectedToken(Token::Semi),
loc: tokens.current_source_location(),
})?;
Ok(self.ast.push_error(err.error, err.loc))
}
/// FUNCTION_PROTO <-
/// fn IDENTIFIER ()
/// fn IDENTIFIER () -> TYPENAME
/// fn IDENTIFIER ( PARAMETER_LIST ,? )
/// fn IDENTIFIER ( PARAMETER_LIST ,? ) -> TYPENAME
fn parse_fn_proto(&mut self, tokens: &mut TokenIterator) -> ParseResult<Index> {
let loc = tokens.current_source_location();
let _ = tokens.eat_token(Token::Fn).ok_or(ErrorInfo {
error: ParseError::ExpectedToken(Token::Fn),
loc,
})?;
let ident = self.parse_ident(tokens)?;
let parameters = self.parse_parenthesised(tokens, |this, tokens| {
if tokens.is_next_token(Token::CloseParens) {
Ok(this.ast.push_parameter_list([], loc))
} else {
this.parse_parameter_list(tokens)
}
})?;
let return_type = if let Some(_) = tokens.eat_token(Token::MinusGreater) {
self.parse_type(tokens)?
} else {
self.intern.get_void_type()
};
return Ok(self.ast.push_fn_proto(ident, return_type, parameters, loc));
}
fn parse_fn_inner(&mut self, tokens: &mut TokenIterator) -> ParseResult<Index> {
let loc = tokens.current_source_location();
let func = self.ast.reserve_node();
self.push_scope(func, intern::Index::invalid());
let proto = self.parse_fn_proto(tokens).map_err(|e| {
self.pop_scope();
e
})?;
let body = self.parse_block(tokens).map_err(|e| {
self.pop_scope();
e
})?;
self.pop_scope();
self.ast.set_fn_decl(func, proto, body, loc);
Ok(func)
}
/// FUNCTION_DECL <-
/// FUNCTION_PROTO BLOCK
fn parse_fn_decl(&mut self, tokens: &mut TokenIterator) -> Index {
match self.parse_fn_inner(tokens) {
Ok(i) => i,
Err(err) => {
self.find_next_fn_or_const(tokens);
self.push_error(err.error, err.loc)
}
}
}
/// RETURN_STATEMENT <-
/// return EXPRESSION? ;
fn parse_return_stmt(&mut self, tokens: &mut TokenIterator) -> ParseResult<Index> {
// SAFETY: function invariance
let ret = tokens.next().unwrap();
let loc = ret.source_location();
let expr = if tokens.eat_token(Token::Semi).is_some() {
self.ast.push_ret(None, loc)
} else {
match self.parse_expr(tokens) {
Ok(i) => {
tokens.eat_token(Token::Semi).ok_or(ErrorInfo {
error: ParseError::ExpectedToken(Token::Semi),
loc: tokens.current_source_location(),
})?;
self.ast.push_ret(Some(i), loc)
}
Err(err) => {
tokens.advance_past_semi().ok_or(ErrorInfo {
error: ParseError::ExpectedToken(Token::Semi),
loc: tokens.current_source_location(),
})?;
self.push_error(err.error, err.loc)
}
}
};
Ok(expr)
}
/// VAR_DECL <-
/// (let | var) IDENTIFIER (: TYPENAME)? ;
/// (let | var) IDENTIFIER (: TYPENAME)? = EXPRESSION ;
fn parse_var_decl(&mut self, tokens: &mut TokenIterator) -> ParseResult<Index> {
match self.parse_var_decl_inner(tokens) {
Ok(i) => {
_ = tokens.eat_token(Token::Semi).ok_or(ErrorInfo {
error: ParseError::ExpectedToken(Token::Semi),
loc: tokens.current_source_location(),
})?;
Ok(i)
}
Err(err) => {
tokens.advance_past_semi().ok_or(ErrorInfo {
error: ParseError::ExpectedToken(Token::Semi),
loc: tokens.current_source_location(),
})?;
Ok(self.push_error(err.error, err.loc))
}
}
}
fn parse_var_decl_inner(&mut self, tokens: &mut TokenIterator) -> ParseResult<Index> {
// SAFETY: function invariance
let let_or_var = tokens.next().unwrap();
let loc = let_or_var.source_location();
let is_let = let_or_var.token() == Token::Let;
let name = self.parse_ident(tokens)?;
let name_loc = let_or_var.source_location();
let ty = if tokens.eat_token(Token::Colon).is_some() {
Some(self.parse_type(tokens)?)
} else {
None
};
let assignment = if tokens.eat_token(Token::Equal).is_some() {
Some(self.parse_expr(tokens)?)
} else {
None
};
let decl = self.ast.push_var_decl(is_let, name, ty, assignment, loc);
self.syms.insert_symbol(
self.current_scope(),
name,
SymbolKind::Local(name_loc),
decl,
);
Ok(decl)
}
fn parse_block_inner(
&mut self,
block: Index,
tokens: &mut TokenIterator,
) -> ParseResult<Index> {
let loc = tokens.current_source_location();
let mut statements = Vec::new();
let trailing = loop {
let next = tokens.peek_token().ok_or(ErrorInfo {
error: ParseError::UnexpectedEndOfTokens,
loc: tokens.current_source_location(),
})?;
if let Some(decl) = self.parse_constant_decls(tokens)? {
statements.push(decl);
} else {
match next.token() {
Token::CloseBrace => {
break None;
}
Token::Return => {
statements.push(self.parse_return_stmt(tokens)?);
}
Token::Var | Token::Let => {
statements.push(self.parse_var_decl(tokens)?);
}
_ => {
if self.is_statement(tokens) {
// expr -> statements
let expr = self
.parse_with_trailing_semi(tokens, |this, tokens| {
this.parse_expr(tokens)
})?;
statements.push(expr);
} else {
// expr -> trailing
let expr = self.parse_expr(tokens)?;
if !tokens.is_next_token(Token::CloseBrace) {
statements.push(self.push_error(
ParseError::ExpectedEndOfBlock,
tokens.current_source_location(),
));
} else {
break Some(expr);
}
}
}
}
}
};
self.ast.set_block(block, statements, trailing, loc);
Ok(block)
}
/// BLOCK <-
/// { STATEMENT* EXPRESSION? }
fn parse_block(&mut self, tokens: &mut TokenIterator) -> ParseResult<Index> {
let block = self.parse_braced(tokens, |this, tokens| {
let block = this.ast.reserve_node();
this.push_scope(block, intern::Index::invalid());
let block_result = this.parse_block_inner(block, tokens);
this.pop_scope();
block_result
})?;
Ok(block)
}
/// PARAMETER_LIST <-
/// PARAMETER
/// PARAMETER_LIST , ARGUMENT
fn parse_parameter_list(&mut self, tokens: &mut TokenIterator) -> ParseResult<Index> {
let loc = tokens.current_source_location();
let mut params = Vec::new();
loop {
params.push(self.parse_parameter(tokens)?);
if !tokens.is_next_token(Token::Comma) {
break;
}
if tokens.is_next_token2(Token::CloseParens) {
break;
}
// skip comma
_ = tokens.next();
}
return Ok(self.ast.push_parameter_list(params, loc));
}
/// PARAMETER <-
/// IDENT : TYPENAME
fn parse_parameter(&mut self, tokens: &mut TokenIterator) -> ParseResult<Index> {
let loc = tokens.current_source_location();
let name = self.parse_ident(tokens)?;
let Some(_) = tokens.eat_token(Token::Colon) else {
return Err(ErrorInfo {
error: ParseError::ExpectedToken(Token::Colon),
loc,
});
};
let ty = self.parse_type(tokens)?;
let param = self.ast.push_parameter(name, ty, loc);
self.syms
.insert_symbol(self.current_scope(), name, SymbolKind::Local(loc), param);
return Ok(param);
}
/// ARGUMENT <-
/// IDENT : EXPR
/// EXPR
fn parse_argument(&mut self, tokens: &mut TokenIterator) -> ParseResult<Index> {
let loc = tokens.current_source_location();
let name = if tokens.is_next_token2(Token::Colon) && tokens.is_next_token(Token::Ident)
{
let name = self.parse_ident(tokens)?;
// we checked `is_next_token2`
_ = tokens.eat_token(Token::Colon).unwrap();
Some(name)
} else {
None
};
let expr = self.parse_expr(tokens)?;
let i = match name {
Some(name) => self.ast.push_named_argument(name, expr, loc),
None => self.ast.push_argument(expr, loc),
};
Ok(i)
}
/// ARGUMENT_LIST <-
/// ARGUMENT
/// ARGUMENT_LIST , ARGUMENT
fn parse_argument_list(&mut self, tokens: &mut TokenIterator) -> ParseResult<Index> {
let loc = tokens.current_source_location();
let mut args = Vec::new();
loop {
args.push(self.parse_argument(tokens)?);
if !tokens.is_next_token(Token::Comma) {
break;
}
if tokens.is_next_token2(Token::CloseParens) {
break;
}
// skip comma
_ = tokens.next();
}
return Ok(self.ast.push_argument_list(args, loc));
}
/// PRIMARY_EXPR <-
/// IDENTIFIER
/// INTEGER_CONSTANT
/// FLOATING_CONSTANT
/// ( EXPRESSION )
/// BLOCK
fn parse_primary_expr(&mut self, tokens: &mut TokenIterator) -> ParseResult<Index> {
let loc = tokens.current_source_location();
let Some(next) = tokens.peek_token() else {
return Err(ErrorInfo {
error: ParseError::ExpectedPrimaryExpression,
loc,
});
};
match next.token() {
Token::IntegerBinConstant
| Token::IntegerHexConstant
| Token::IntegerOctConstant
| Token::IntegerConstant => {
_ = tokens.next();
let (value, ty) = self.try_parse_integral_constant(&next);
return Ok(self.ast.push_constant(value, ty, loc));
}
Token::FloatingConstant
| Token::FloatingExpConstant
| Token::DotFloatingConstant
| Token::DotFloatingExpConstant => {
_ = tokens.next();
let (value, ty) = self.parse_floating_constant(&next);
return Ok(self.ast.push_constant(value, ty, loc));
}
Token::OpenParens => {
let expr =
self.parse_parenthesised(tokens, |this, tokens| this.parse_expr(tokens))?;
return Ok(expr);
}
Token::OpenBrace => {
return self.parse_block(tokens);
}
Token::Ident => {
_ = tokens.next();
let ident = next.lexeme();
let ident = self
.intern
.get_or_insert(intern::Key::String { str: ident });
return Ok(self
.ast
.push_decl_ref_unresolved(self.current_scope(), ident, loc));
}
// TODO: eventually handle paths
_ => {
return Err(ErrorInfo {
error: ParseError::ExpectedPrimaryExpression,
loc,
});
}
}
}
/// POSTFIX_EXPR <-
/// PRIMARY_EXPR
/// PRIMARY_EXPR ( )
/// PRIMARY_EXPR ( ARGUMENT_LIST )
/// PRIMARY_EXPR [ EXPR ]
fn parse_postfix_expr(&mut self, tokens: &mut TokenIterator) -> ParseResult<Index> {
let lhs = self.parse_primary_expr(tokens)?;
if let Some(next) = tokens.peek_token() {
let loc = next.source_location();
match next.token() {
Token::OpenParens => {
let arguments = self.parse_parenthesised(tokens, |this, tokens| {
if tokens.is_next_token(Token::CloseParens) {
Ok(this.ast.push_argument_list([], loc))
} else {
this.parse_argument_list(tokens)
}
})?;
return Ok(self.ast.push_call_expr(lhs, arguments, loc));
}
Token::OpenSquareBracket => {
let subscript =
self.parse_bracketed(tokens, |this, tokens| this.parse_expr(tokens))?;
return Ok(self
.ast
.push_binary(Tag::SubscriptExpr, lhs, subscript, loc));
}
_ => {}
}
}
Ok(lhs)
}
fn push_error(&mut self, error: ParseError, loc: SourceLocation) -> Index {
self.errors.push(ErrorInfo { error, loc });
self.ast.push_error(error, loc)
}
/// PREFIX_EXPR <-
/// POSTFIX_EXPR
/// ! POSTFIX_EXPR
/// - POSTFIX_EXPR
/// & POSTFIX_EXPR
/// * POSTFIX_EXPR
fn parse_prefix_expr(&mut self, tokens: &mut TokenIterator) -> ParseResult<Index> {
let next = tokens.peek_token().ok_or(ErrorInfo {
error: ParseError::ExpectedPrefixExpression,
loc: tokens.current_source_location(),
})?;
let loc = next.source_location();
let expr = match next.token() {
Token::Bang => {
_ = tokens.next();
let lhs = self.parse_postfix_expr(tokens)?;
self.ast.push_unary(Tag::Not, lhs, loc)
}
Token::Minus => {
_ = tokens.next();
let lhs = self.parse_postfix_expr(tokens)?;
self.ast.push_unary(Tag::Negate, lhs, loc)
}
Token::Ampersand => {
_ = tokens.next();
let lhs = self.parse_postfix_expr(tokens)?;
self.ast.push_unary(Tag::AddressOf, lhs, loc)
}
Token::Star => {
_ = tokens.next();
let lhs = self.parse_postfix_expr(tokens)?;
self.ast.push_unary(Tag::Deref, lhs, loc)
}
_ => self.parse_postfix_expr(tokens)?,
};
Ok(expr)
}
/// AS_EXPR <-
/// PREFIX_EXPR
/// PREFIX_EXPR as TYPENAME
fn parse_as_expr(&mut self, tokens: &mut TokenIterator) -> ParseResult<Index> {
let loc = tokens.current_source_location();
let expr = self.parse_prefix_expr(tokens)?;
if tokens.eat_token(Token::As).is_some() {
let typename = self.parse_type(tokens)?;
return Ok(self.ast.push_cast(expr, typename, loc));
} else {
return Ok(expr);
}
}
/// BINARY_EXPR <-
/// AS_EXPR
/// AS_EXPR * EXPRESSION
/// AS_EXPR / EXPRESSION
/// AS_EXPR % EXPRESSION
/// AS_EXPR + EXPRESSION
/// AS_EXPR - EXPRESSION
/// AS_EXPR << EXPRESSION
/// AS_EXPR >> EXPRESSION
/// AS_EXPR < EXPRESSION
/// AS_EXPR > EXPRESSION
/// AS_EXPR <= EXPRESSION
/// AS_EXPR >= EXPRESSION
/// AS_EXPR == EXPRESSION
/// AS_EXPR != EXPRESSION
/// AS_EXPR & EXPRESSION
/// AS_EXPR ^ EXPRESSION
/// AS_EXPR | EXPRESSION
/// AS_EXPR && EXPRESSION
/// AS_EXPR || EXPRESSION
fn parse_binary_expr(
&mut self,
tokens: &mut TokenIterator,
precedence: u32,
) -> ParseResult<Index> {
let mut node = self.parse_as_expr(tokens)?;
loop {
let Some(tok) = tokens.peek_token() else {
break;
};
let loc = tok.source_location();
let Some(prec) = PRECEDENCE_MAP.get(&tok.token()).cloned() else {
break;
};
if prec < precedence {
break;
}
// SAFETY: we peeked `tok`
let tok = tokens.next().unwrap();
let lhs = node;
let rhs = self.parse_binary_expr(tokens, prec + 1)?;
let tag = match tok.token() {
Token::PipePipe => Tag::Or,
Token::AmpersandAmpersand => Tag::And,
Token::Pipe => Tag::BitOr,
Token::Caret => Tag::BitXOr,
Token::Ampersand => Tag::BitAnd,
Token::BangEqual => Tag::NEq,
Token::EqualEqual => Tag::Eq,
Token::LessEqual => Tag::Le,
Token::GreaterEqual => Tag::Ge,
Token::Less => Tag::Lt,
Token::Greater => Tag::Gt,
Token::GreaterGreater => Tag::Shr,
Token::LessLess => Tag::Shl,
Token::Plus => Tag::Add,
Token::Minus => Tag::Sub,
Token::Percent => Tag::Rem,
Token::Star => Tag::Mul,
Token::Slash => Tag::Div,
_ => unreachable!(),
};
node = self.ast.push_binary(tag, lhs, rhs, loc);
}
Ok(node)
}
/// ASSIGNMENT_EXPR <-
/// BINARY_EXPRESSION
/// BINARY_EXPRESSION ASSIGNMENT_OP EXPRESSION
/// ASSIGNMENT_OP <-
/// = += -= *= /= %= ...
fn parse_assignment_expr(&mut self, tokens: &mut TokenIterator) -> ParseResult<Index> {
let lhs = self.parse_binary_expr(tokens, 0)?;
if tokens
.peek_token()
.map(|itm| itm.token().is_assignment_op())
== Some(true)
{
// SAFETY: we peeked
let op = tokens.next().unwrap();
let loc = op.source_location();
let rhs = self.parse_expr(tokens)?;
let rhs = if op.token() == Token::Equal {
rhs
} else {
let tag = match op.token() {
Token::PlusEqual => Tag::Add,
Token::MinusEqual => Tag::Sub,
Token::StarEqual => Tag::Mul,
Token::SlashEqual => Tag::Sub,
Token::PercentEqual => Tag::Rem,
Token::PipeEqual => Tag::BitOr,
Token::CaretEqual => Tag::BitXOr,
Token::AmpersandEqual => Tag::BitAnd,
Token::LessLessEqual => Tag::Shl,
Token::GreaterGreaterEqual => Tag::Shr,
_ => {
unreachable!()
}
};
self.ast.push_binary(tag, lhs, rhs, loc)
};
Ok(self.ast.push_assign(lhs, rhs, loc))
} else {
Ok(lhs)
}
}
/// ELSE_EXPR <-
/// 'else' (IF_EXPR | EXPR_OR_STATEMENT_OR_BLOCK)
fn parse_else_expr(&mut self, tokens: &mut TokenIterator) -> ParseResult<Index> {
// SAFETY: function invariance
let _else_ = tokens.eat_token(Token::Else).unwrap();
if tokens.is_next_token(Token::If) {
self.parse_if_expr(tokens)
} else {
self.parse_expr_or_block_as_block(tokens)
}
}
/// IF_EXPR <-
/// 'if' ( EXPR ) EXPR_OR_STATEMENT_OR_BLOCK ELSE_EXPR?
fn parse_if_expr(&mut self, tokens: &mut TokenIterator) -> ParseResult<Index> {
// SAFETY: function invariance
let iff = tokens.eat_token(Token::If).unwrap();
let loc = iff.source_location();
let cond = self.parse_parenthesised(tokens, |this, tokens| this.parse_expr(tokens))?;
let body = self.parse_expr_or_block_as_block(tokens)?;
if tokens.is_next_token(Token::Else) {
let else_expr = self.parse_else_expr(tokens)?;
Ok(self.ast.push_if_else(cond, body, else_expr, loc))
} else {
Ok(self.ast.push_if(cond, body, loc))
}
}
fn parse_expr_or_block_as_block(
&mut self,
tokens: &mut TokenIterator,
) -> ParseResult<Index> {
let Some(next) = tokens.peek_token() else {
return Err(ErrorInfo {
error: ParseError::ExpectedExpression,
loc: tokens.current_source_location(),
});
};
match next.token() {
Token::OpenBrace => self.parse_block(tokens),
_ => {
let loc = tokens.current_source_location();
let expr = self.parse_expr(tokens)?;
Ok(self.ast.push_block([], Some(expr), loc))
}
}
}
fn parse_expr(&mut self, tokens: &mut TokenIterator) -> ParseResult<Index> {
let loc = tokens.current_source_location();
let Some(next) = tokens.peek_token() else {
return Err(ErrorInfo {
error: ParseError::ExpectedExpression,
loc,
});
};
match next.token() {
Token::If => self.parse_if_expr(tokens),
_ => self.parse_assignment_expr(tokens),
}
}
/// TYPE_DECL <-
/// type IDENTIFIER = TYPE_UNION ;
/// type IDENTIFIER = '(' (TYPE,)* ')' ;
/// type IDENTIFIER = extern? union { (IDENTIFIER: TYPE,)* }
/// type IDENTIFIER = extern? packed? enum { (IDENTIFIER (= EXPRESSION),)* }
/// type IDENTIFIER = extern? packed? struct { (IDENTIFIER: TYPE,)* }
fn parse_type_decl(&mut self, tokens: &mut TokenIterator) -> ParseResult<Index> {
_ = tokens.eat_token(Token::Type).ok_or(ErrorInfo {
error: ParseError::ExpectedToken(Token::Type),
loc: tokens.current_source_location(),
});
let name = self.parse_ident(tokens)?;
let loc = tokens.current_source_location();
_ = tokens.eat_token(Token::Equal).ok_or(ErrorInfo {
error: ParseError::ExpectedToken(Token::Equal),
loc: tokens.current_source_location(),
});
let (has_attributes, c_like, packed) = {
let vec = tokens.eat_all_zero_or_once(&[Token::Extern, Token::Packed]);
(vec[0] || vec[1], vec[0], vec[1])
};
let Some(next) = tokens.peek_token() else {
return Err(ErrorInfo {
error: ParseError::ExpectedTypeDeclaration,
loc: tokens.current_source_location(),
});
};
match next.token() {
Token::Struct => self.parse_struct_decl(tokens, name, c_like, packed, loc),
Token::Union => {
unimplemented!()
}
Token::Enum => {
unimplemented!()
}
_ => {
if has_attributes {
return Err(ErrorInfo {
error: ParseError::UnexpectedTypeAttributes,
loc: tokens.current_source_location(),
});
}
match next.token() {
Token::OpenParens => {
// tuple
unimplemented!()
}
Token::Ident => {
// sumtype
unimplemented!()
}
_ => {
return Err(ErrorInfo {
error: ParseError::ExpectedTypeDeclaration,
loc: tokens.current_source_location(),
});
}
}
}
}
}
/// SUMTYPE_DECL <-
/// type IDENTIFIER = TYPE_UNION
/// TYPE_UNION <-
/// TYPE (| TYPE_UNION)?
/// IDENTIFIER: TYPE (| TYPE_UNION)?
fn parse_sumtype_decl(&mut self, tokens: &mut TokenIterator) -> ParseResult<Index> {
todo!()
}
/// TUPLE_DECL <-
/// type IDENTIFIER = (TYPE,* )
fn parse_tuple_decl(&mut self, tokens: &mut TokenIterator) -> ParseResult<Index> {
todo!()
}
/// UNION_DECL <-
/// type IDENTIFIER = union { IDENTIFIER: TYPE,* }
fn parse_union_decl(&mut self, tokens: &mut TokenIterator) -> ParseResult<Index> {
todo!()
}
/// ENUM_DECL <-
/// type IDENTIFIER = packed? enum { IDENTIFIER (= EXPRESSION),* }
fn parse_enum_decl(&mut self, tokens: &mut TokenIterator) -> ParseResult<Index> {
todo!()
}
/// STRUCT_DECL <-
/// type IDENTIFIER = extern? packed? struct { STRUCT_FIELD,* }
fn parse_struct_decl(
&mut self,
tokens: &mut TokenIterator,
name: intern::Index,
c_like: bool,
packed: bool,
loc: SourceLocation,
) -> ParseResult<Index> {
// SAFETY: function invariance
_ = tokens.eat_token(Token::Struct).ok_or(ErrorInfo {
error: ParseError::ExpectedToken(Token::Struct),
loc: tokens.current_source_location(),
})?;
let decl = self.parse_braced(tokens, |this, tokens| {
this.parse_struct_fields(tokens).map(|fields| {
_ = tokens.eat_token(Token::Comma);
let struct_type = this.intern.get_struct_type(name, packed, c_like, fields);
this.ast.push_struct_decl(struct_type, loc)
})
})?;
Ok(decl)
}
fn parse_with_trailing_semi<F>(
&mut self,
tokens: &mut TokenIterator,
parse: F,
) -> ParseResult<Index>
where
F: FnOnce(&mut Self, &mut TokenIterator) -> ParseResult<Index>,
{
match parse(self, tokens) {
Ok(i) => {
_ = tokens.eat_token(Token::Semi).ok_or(ErrorInfo {
error: ParseError::ExpectedToken(Token::Semi),
loc: tokens.current_source_location(),
})?;
Ok(i)
}
Err(err) => {
tokens.advance_past_semi().ok_or(ErrorInfo {
error: ParseError::ExpectedToken(Token::Semi),
loc: tokens.current_source_location(),
})?;
Ok(self.push_error(err.error, err.loc))
}
}
}
fn parse_inner<F, E>(
&mut self,
tokens: &mut TokenIterator,
open: Token,
close: Token,
parse: F,
on_err: E,
) -> ParseResult<Index>
where
F: FnOnce(&mut Self, &mut TokenIterator) -> ParseResult<Index>,
E: FnOnce(&mut Self, &mut TokenIterator, ErrorInfo, TokenItem) -> ParseResult<Index>,
{
let Some(start) = tokens.eat_token(open) else {
return Err(ErrorInfo {
error: ParseError::ExpectedToken(open),
loc: tokens.current_source_location(),
});
};
match parse(self, tokens) {
Ok(i) => {
_ = tokens.eat_token(close).ok_or(ErrorInfo {
error: match open {
Token::OpenBrace => ParseError::UnmatchedBrace(start.token_pos().start),
Token::OpenParens => {
ParseError::UnmatchedParens(start.token_pos().start)
}
Token::OpenSquareBracket => {
ParseError::UnmatchedSquareBracket(start.token_pos().start)
}
_ => ParseError::UnmatchedDelimiter(start.token_pos().start),
},
loc: tokens.current_source_location(),
})?;
Ok(i)
}
Err(e) => on_err(self, tokens, e, start),
}
}
fn parse_inner2<F>(
&mut self,
tokens: &mut TokenIterator,
open: Token,
close: Token,
parse: F,
) -> ParseResult<Index>
where
F: FnOnce(&mut Self, &mut TokenIterator) -> ParseResult<Index>,
{
self.parse_inner(tokens, open, close, parse, |this, tokens, err, start| {
match close {
Token::CloseBrace => {
tokens.advance_past_end_of_braced().ok_or(ErrorInfo {
error: ParseError::UnmatchedBrace(start.token_pos().start),
loc: tokens.current_source_location(),
})?;
}
Token::CloseParens => {
tokens.advance_past_end_of_parens().ok_or(ErrorInfo {
error: ParseError::UnmatchedParens(start.token_pos().start),
loc: tokens.current_source_location(),
})?;
}
Token::CloseSquareBracket => {
tokens.advance_past_end_of_bracketed().ok_or(ErrorInfo {
error: ParseError::UnmatchedSquareBracket(start.token_pos().start),
loc: tokens.current_source_location(),
})?;
}
Token::Semi => {
tokens.advance_past_semi().ok_or(ErrorInfo {
error: ParseError::ExpectedToken(Token::Semi),
loc: tokens.current_source_location(),
})?;
}
_ => unimplemented!(),
}
Ok(this.push_error(err.error, err.loc))
})
}
fn parse_bracketed<F>(&mut self, tokens: &mut TokenIterator, parse: F) -> ParseResult<Index>
where
F: FnOnce(&mut Self, &mut TokenIterator) -> ParseResult<Index>,
{
self.parse_inner2(
tokens,
Token::OpenSquareBracket,
Token::CloseSquareBracket,
parse,
)
}
fn parse_braced<F>(&mut self, tokens: &mut TokenIterator, parse: F) -> ParseResult<Index>
where
F: FnOnce(&mut Self, &mut TokenIterator) -> ParseResult<Index>,
{
self.parse_inner2(tokens, Token::OpenBrace, Token::CloseBrace, parse)
}
fn parse_parenthesised<F>(
&mut self,
tokens: &mut TokenIterator,
parse: F,
) -> ParseResult<Index>
where
F: FnOnce(&mut Self, &mut TokenIterator) -> ParseResult<Index>,
{
self.parse_inner2(tokens, Token::OpenParens, Token::CloseParens, parse)
}
fn parse_struct_fields(
&mut self,
tokens: &mut TokenIterator,
) -> ParseResult<Vec<(intern::Index, intern::Index)>> {
let mut fields = Vec::new();
loop {
fields.push(self.parse_struct_field(tokens)?);
if !tokens.is_next_token(Token::Comma) {
break;
}
if tokens.is_next_token2(Token::CloseBrace) {
break;
}
// skip comma
_ = tokens.next();
}
Ok(fields)
}
/// STRUCT_FIELD <-
/// IDENTIFIER: TYPE
fn parse_struct_field(
&mut self,
tokens: &mut TokenIterator,
) -> ParseResult<(intern::Index, intern::Index)> {
let name = self.parse_ident(tokens)?;
let Some(_) = tokens.eat_token(Token::Colon) else {
return Err(ErrorInfo {
error: ParseError::ExpectedToken(Token::Colon),
loc: tokens.current_source_location(),
});
};
let ty = self.parse_type(tokens)?;
return Ok((name, ty));
}
/// CONSTANT_DECL <-
/// FUNCTION_DECL
/// GLOBAL_DECL
/// STRUCT_DECL
fn parse_constant_decls(
&mut self,
tokens: &mut TokenIterator,
) -> ParseResult<Option<Index>> {
let next = tokens.peek_token().ok_or(ErrorInfo {
error: ParseError::UnexpectedEndOfTokens,
loc: tokens.current_source_location(),
})?;
match next.token() {
Token::Fn => Ok(Some(self.parse_fn_decl(tokens))),
Token::Const => self.parse_const_decl(tokens).map(|i| Some(i)),
Token::Type => self.parse_type_decl(tokens).map(|i| Some(i)),
_ => Ok(None),
}
}
/// FILE <-
/// (FUNCTION_DECL | GLOBAL_DECL)*
fn parse_file(&mut self, tokens: &mut TokenIterator) -> Index {
let start = tokens.current_source_location();
let mut decls = Vec::new();
let file = self.ast.reserve_node();
self.push_scope(file, intern::Index::invalid());
while let Some(next) = tokens.peek_token() {
let loc = next.source_location();
let decl = match self.parse_constant_decls(tokens).and_then(|i| match i {
Some(i) => Ok(i),
None => {
let error = ParseError::UnexpectedTokenAtFileScope;
let node = self.push_error(error, loc);
self.find_next_fn_or_const(tokens);
Ok(node)
}
}) {
Ok(i) => i,
Err(err) => self.push_error(err.error, err.loc),
};
decls.push(decl);
}
self.pop_scope();
self.ast.set_file(file, decls, start);
file
}
/// FILE <-
/// (FUNCTION_DECL | GLOBAL_DECL)*
pub fn parse(&mut self, mut tokens: TokenIterator) {
let file = self.parse_file(&mut tokens);
self.ast.set_root([file]);
self.resolve_decl_refs();
}
fn push_scope(&mut self, ast: Index, name: intern::Index) {
let parent = self.scopes.last().cloned();
self.scopes.push(ast);
if let Some(parent) = parent {
self.syms.insert_symbol(
ast,
intern::Index::invalid(),
SymbolKind::ParentScope,
parent,
);
}
self.syms.insert_scope(name, ast);
}
fn pop_scope(&mut self) {
self.scopes.pop();
}
fn is_statement(&self, tokens: &mut TokenIterator) -> bool {
let mut tokens = tokens.clone();
let mut braces = 0;
let mut parens = 0;
let mut brackets = 0;
while let Some(itm) = tokens.next() {
match itm.token() {
Token::OpenBrace => {
braces += 1;
}
Token::CloseBrace => {
braces -= 1;
}
Token::OpenParens => {
parens += 1;
}
Token::CloseParens => {
parens -= 1;
}
Token::OpenSquareBracket => {
brackets += 1;
}
Token::CloseSquareBracket => {
brackets -= 1;
}
Token::Semi => {
if braces == 0 && parens == 0 && brackets == 0 {
return true;
}
}
_ => {}
}
if braces < 0 || parens < 0 || brackets < 0 {
break;
}
}
false
}
fn find_next_fn_or_const(&mut self, tokens: &mut TokenIterator) -> Option<()> {
tokens
.advance_until_before_one_of(&[Token::Const, Token::Fn, Token::Type])
.map(|_| ())
}
}
}