From ddd65e8c4d11c15bd644ea7ed7bdbd0c0c178e55 Mon Sep 17 00:00:00 2001 From: Janis Date: Fri, 16 Aug 2024 22:38:13 +0200 Subject: [PATCH] has string_table now :^) --- src/ast.rs | 132 +++++++++++++++++++--- src/lexer.rs | 266 ++++++++++++++++++++++++++++++++++++++++++++ src/lib.rs | 8 +- src/parser.rs | 126 ++++++++++++--------- src/string_table.rs | 9 +- src/symbol_table.rs | 6 +- src/triples.rs | 207 ++++++++++++++++++++++------------ 7 files changed, 613 insertions(+), 141 deletions(-) diff --git a/src/ast.rs b/src/ast.rs index bc41166..dc21424 100644 --- a/src/ast.rs +++ b/src/ast.rs @@ -1,5 +1,9 @@ use std::num::NonZero; +use itertools::Itertools; + +use crate::string_table::{self, ImmOrIndex}; + pub type Node = NonZero; #[derive(Debug, Clone, PartialEq, Eq)] @@ -40,16 +44,20 @@ pub enum Tag { body: Node, }, Ident { - name: String, + name: string_table::Index, }, IntegralConstant { - bits: u64, - ty: IntegralType, + bits: string_table::Index, + ty: Option, }, FloatingConstant { bits: u64, ty: FloatingType, }, + Constant { + bytes: ImmOrIndex, + ty: Type, + }, Block { /// ReturnStmt | ExprStmt | VarDecl statements: Vec, @@ -231,11 +239,12 @@ impl IntegralType { } } -#[derive(Debug, Clone, PartialEq, Eq, Hash)] +#[derive(Debug, Clone, Eq, Hash)] pub enum Type { Any, Void, Bool, + ComptimeNumber, Integer(IntegralType), Floating(FloatingType), Pointer { @@ -248,7 +257,86 @@ pub enum Type { }, } +impl core::fmt::Display for Type { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Type::Any => f.write_str("?"), + Type::Void => f.write_str("void"), + Type::Bool => f.write_str("bool"), + Type::ComptimeNumber => f.write_str("comptime_number"), + Type::Integer(t) => t.fmt(f), + Type::Floating(t) => t.fmt(f), + Type::Pointer { constness, pointee } => { + write!(f, "*{}{}", if *constness { "const " } else { "" }, pointee) + } + Type::Fn { + parameter_types, + return_type, + } => { + write!(f, "fn (")?; + for param in parameter_types.iter().map(|p| Some(p)).intersperse(None) { + match param { + Some(param) => param.fmt(f)?, + None => write!(f, ", ")?, + } + } + + write!(f, ") -> {}", return_type) + } + } + } +} + +impl PartialEq for Type { + fn eq(&self, other: &Self) -> bool { + match (self, other) { + (Self::ComptimeNumber, Self::Integer(_)) => true, + (Self::Integer(_), Self::ComptimeNumber) => true, + (Self::ComptimeNumber, Self::Floating(_)) => true, + (Self::Floating(_), Self::ComptimeNumber) => true, + (Self::Integer(l0), Self::Integer(r0)) => l0 == r0, + (Self::Floating(l0), Self::Floating(r0)) => l0 == r0, + ( + Self::Pointer { + constness: l_constness, + pointee: l_pointee, + }, + Self::Pointer { + constness: r_constness, + pointee: r_pointee, + }, + ) => l_constness == r_constness && l_pointee == r_pointee, + ( + Self::Fn { + parameter_types: l_parameter_types, + return_type: l_return_type, + }, + Self::Fn { + parameter_types: r_parameter_types, + return_type: r_return_type, + }, + ) => l_parameter_types == r_parameter_types && l_return_type == r_return_type, + _ => core::mem::discriminant(self) == core::mem::discriminant(other), + } + } +} + impl Type { + pub fn equal_type(&self, rhs: &Self) -> Option { + match (self, rhs) { + (Self::ComptimeNumber, Self::Floating(_)) + | (Self::ComptimeNumber, Self::Integer(_)) => Some(rhs.clone()), + (Self::Integer(_), Self::ComptimeNumber) + | (Self::Floating(_), Self::ComptimeNumber) => Some(self.clone()), + _ => { + if self.eq(rhs) { + Some(self.clone()) + } else { + None + } + } + } + } pub fn void() -> Type { Self::Void } @@ -256,7 +344,7 @@ impl Type { Self::Void } pub fn any() -> Type { - Self::Void + Self::Any } pub fn into_ptr(self) -> Type { Self::Pointer { @@ -274,52 +362,67 @@ impl Type { pub fn can_negate(&self) -> bool { match self { - Type::Bool | Type::Integer(_) => true, + Type::ComptimeNumber | Type::Bool | Type::Integer(_) => true, _ => false, } } pub fn can_bitxor_and_or(&self) -> bool { match self { - Type::Bool | Type::Integer(_) => true, + Type::ComptimeNumber | Type::Bool | Type::Integer(_) => true, _ => false, } } pub fn can_add_sub(&self) -> bool { match self { - Type::Pointer { .. } | Type::Floating(_) | Type::Integer(_) => true, + Type::ComptimeNumber | Type::Pointer { .. } | Type::Floating(_) | Type::Integer(_) => { + true + } _ => false, } } pub fn can_shift(&self) -> bool { match self { - Type::Integer(_) => true, + Type::ComptimeNumber | Type::Integer(_) => true, _ => false, } } pub fn can_eq(&self) -> bool { match self { - Type::Pointer { .. } | Type::Bool | Type::Floating(_) | Type::Integer(_) => true, + Type::ComptimeNumber + | Type::Pointer { .. } + | Type::Bool + | Type::Floating(_) + | Type::Integer(_) => true, _ => false, } } pub fn can_cmp(&self) -> bool { match self { - Type::Pointer { .. } | Type::Floating(_) | Type::Integer(_) => true, + Type::ComptimeNumber | Type::Pointer { .. } | Type::Floating(_) | Type::Integer(_) => { + true + } _ => false, } } pub fn can_mul_div_rem(&self) -> bool { match self { - Type::Floating(_) | Type::Integer(_) => true, + Type::ComptimeNumber | Type::Floating(_) | Type::Integer(_) => true, _ => false, } } pub fn is_integer(&self) -> bool { match self { - Type::Integer(_) => true, + Type::ComptimeNumber | Type::Integer(_) => true, + _ => false, + } + } + + pub fn is_float(&self) -> bool { + match self { + Type::ComptimeNumber | Type::Floating(_) => true, _ => false, } } @@ -336,6 +439,7 @@ impl Type { match self { Type::Any => 0, Type::Void => 0, + Type::ComptimeNumber => 0, Type::Bool => 1, Type::Integer(t) => t.bits.div_ceil(8) as u32, Type::Floating(t) => match t { @@ -351,7 +455,7 @@ impl Type { #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum PrimitiveType { FloatingType(FloatingType), - IntegralType(Node), + IntegralType(IntegralType), Bool, Void, } diff --git a/src/lexer.rs b/src/lexer.rs index 64fe000..25f9acc 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -582,6 +582,250 @@ fn try_parse_integral_type(source: &mut Chars) -> Result> { Ok(Some(())) } +pub mod bigint { + + use super::Radix; + pub struct BigInt(Vec); + + impl BigInt { + pub fn parse_digits>(text: C, radix: Radix) -> BigInt { + parse_bigint(text.into_iter(), radix) + } + + pub fn bit_width(&self) -> u32 { + let mut bits = self.0.len() as u32; + + for d in self.0.iter().rev() { + if *d == 0 { + bits -= u32::BITS; + } else { + bits -= d.leading_zeros(); + break; + } + } + + bits + } + + pub fn from_bytes_le(bytes: &[u8]) -> BigInt { + let data = bytes + .chunks(4) + .map(|chunk| { + let mut int = [0u8; 4]; + int[..chunk.len()].copy_from_slice(chunk); + u32::from_le_bytes(int) + }) + .collect::>(); + + BigInt(data) + } + + pub fn into_bytes_le(&self) -> Vec { + let mut bytes = Vec::::new(); + + for d in &self.0[..] { + bytes.extend(&d.to_le_bytes()); + } + + let count = bytes.iter().rev().take_while(|&&b| b == 0).count(); + bytes.truncate(bytes.len() - count); + + bytes + } + } + + impl core::ops::Add for BigInt { + type Output = Self; + + fn add(mut self, mut rhs: Self) -> Self::Output { + let (mut digits, carry) = if self.0.len() > rhs.0.len() { + let c = add_bigint(&mut self.0, &rhs.0); + (self.0, c) + } else { + let c = add_bigint(&mut rhs.0, &self.0); + (rhs.0, c) + }; + + if carry { + digits.push(u32::from(carry)); + } + + BigInt(digits) + } + } + + impl core::ops::Sub for BigInt { + type Output = Self; + + fn sub(mut self, rhs: Self) -> Self::Output { + if self.0.len() < rhs.0.len() { + println!("extending self by {} zeroes", rhs.0.len() - self.0.len()); + self.0 + .extend(core::iter::repeat(0).take(rhs.0.len() - self.0.len())); + println!("self: {self:?}"); + } + sub_bigint(&mut self.0, &rhs.0); + + self + } + } + + impl core::fmt::Debug for BigInt { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let mut list = f.debug_list(); + list.entries(self.0.iter()).finish() + } + } + + #[allow(unused)] + /// lhs must be bigger than rhs + fn sub_bigint(lhs: &mut [u32], rhs: &[u32]) { + let len = lhs.len().min(rhs.len()); + let (l_lo, l_hi) = lhs.split_at_mut(len); + let (r_lo, r_hi) = rhs.split_at(len); + + println!("lhs: {{ lo: {l_lo:?}, hi: {l_hi:?} }}"); + println!("rhs: {{ lo: {r_lo:?}, hi: {r_hi:?} }}"); + + let mut borrow = false; + for (lhs, rhs) in l_lo.iter_mut().zip(r_lo) { + (*lhs, borrow) = lhs.borrowing_sub(*rhs, borrow); + } + + if borrow { + for lhs in l_hi { + (*lhs, borrow) = lhs.borrowing_sub(0, borrow); + } + } + + if borrow || !r_hi.iter().all(|&v| v == 0) { + panic!("sub failed: borrow: {borrow}"); + } + } + + /// lhs must be bigger than rhs + fn add_bigint(lhs: &mut [u32], rhs: &[u32]) -> bool { + let (l_lo, l_hi) = lhs.split_at_mut(rhs.len()); + + let mut carry = false; + for (lhs, rhs) in l_lo.iter_mut().zip(rhs) { + (*lhs, carry) = lhs.carrying_add(*rhs, carry); + } + + if carry { + for d in l_hi.iter_mut() { + (*d, carry) = d.carrying_add(0, carry); + if !carry { + break; + } + } + } + + carry + } + + fn parse_bigint(text: impl Iterator, radix: Radix) -> BigInt { + let digits = text + .filter_map(|c| match c { + '_' => None, + c => Some(radix.map_digit(c)), + }) + .collect::>(); + + let (max, power) = { + let radix = radix.radix() as u64; + let mut power = 1; + let mut base = radix; + while let Some(b) = base.checked_mul(radix) { + if b > u32::MAX as u64 { + break; + } + base = b; + power += 1; + } + (base, power) + }; + let radix = radix.radix() as u32; + + let r = digits.len() % power; + let i = if r == 0 { power } else { r }; + let (head, tail) = digits.split_at(i); + + let first = head + .iter() + .fold(0, |acc, &digit| acc * radix + digit as u32); + let mut data = vec![first]; + + for chunk in tail.chunks(power) { + if data.last() != Some(&0) { + data.push(0); + } + let mut carry = 0u64; + for digit in data.iter_mut() { + carry += *digit as u64 * max as u64; + *digit = carry as u32; + carry >>= u32::BITS; + } + assert!(carry == 0); + let next = chunk + .iter() + .fold(0, |acc, &digit| acc * radix + digit as u32); + + let (res, mut carry) = data[0].carrying_add(next, false); + data[0] = res; + if carry { + for digit in data[1..].iter_mut() { + (*digit, carry) = digit.carrying_add(0, carry); + if !carry { + break; + } + } + } + } + BigInt(data) + } + + #[cfg(test)] + mod tests { + use super::*; + + #[test] + fn parse() { + let bigint = super::parse_bigint("2_cafe_babe_dead_beef".chars(), Radix::Hex); + println!("{:#x?}", bigint); + let bigint = super::parse_bigint("f".chars(), Radix::Hex); + println!("{:#x?}", bigint); + } + #[test] + fn add() { + let a = super::parse_bigint("2_0000_0000_0000_0000".chars(), Radix::Hex); + println!("{:#x?}", a); + let b = super::parse_bigint("cafebabe".chars(), Radix::Hex); + println!("{:#x?}", b); + let sum = a + b; + println!("{:#x?}", sum); + } + #[test] + fn sub() { + let a = super::parse_bigint("2_0000_0000_0000_0000".chars(), Radix::Hex); + println!("{:#x?}", a); + let b = super::parse_bigint("ffff_ffff".chars(), Radix::Hex); + println!("{:#x?}", b); + let sum = a - b; + println!("{:#x?}", sum); + } + #[test] + fn overflowing_sub() { + let a = super::parse_bigint("2_0000_0000_0000_0000".chars(), Radix::Hex); + println!("{:#x?}", a); + let b = super::parse_bigint("ffff_ffff".chars(), Radix::Hex); + println!("{:#x?}", b); + let sum = b - a; + println!("{:#x?}", sum); + } + } +} + #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum Radix { Hex, @@ -638,6 +882,28 @@ impl Radix { _ => None, } } + pub fn map_digit(self, c: char) -> u8 { + match self { + Radix::Hex => match c { + '0'..='9' => c as u8 - b'0', + 'a'..='f' => 10 + c as u8 - b'a', + 'A'..='F' => 10 + c as u8 - b'A', + _ => unreachable!(), + }, + Radix::Bin => match c { + '0'..='1' => c as u8 - b'0', + _ => unreachable!(), + }, + Radix::Dec => match c { + '0'..='9' => c as u8 - b'0', + _ => unreachable!(), + }, + Radix::Oct => match c { + '0'..='7' => c as u8 - b'0', + _ => unreachable!(), + }, + } + } pub fn folding_method(self) -> fn(u64, char) -> u64 { match self { Radix::Hex => { diff --git a/src/lib.rs b/src/lib.rs index 4cd5083..b6d5e3e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,4 +1,10 @@ -#![feature(extract_if, iter_advance_by, box_into_inner, hash_extract_if)] +#![feature( + extract_if, + iter_advance_by, + box_into_inner, + hash_extract_if, + bigint_helper_methods +)] #![allow(unused_macros)] pub mod ast; diff --git a/src/parser.rs b/src/parser.rs index 3e14dfa..0750104 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -5,7 +5,8 @@ use itertools::Itertools; use crate::{ ast::{FloatingType, IntegralType, LetOrVar, Node, PrimitiveType, Tag, Type}, common::NextIf, - lexer::{Radix, TokenIterator}, + lexer::{bigint::BigInt, Radix, TokenIterator}, + string_table::{Index, StringTable}, symbol_table::{SymbolKind, SymbolTable}, tokens::Token, }; @@ -47,12 +48,6 @@ impl Nodes { inner: vec![Tag::Root], } } - pub fn get_ident_str(&self, node: Node) -> Option<&str> { - match &self.inner[node.get() as usize] { - Tag::Ident { name } => Some(name.as_str()), - _ => None, - } - } fn len(&self) -> u32 { self.inner.len() as u32 } @@ -87,6 +82,7 @@ impl Nodes { pub struct Tree { pub nodes: Nodes, pub st: SymbolTable, + pub strings: StringTable, pub global_decls: Vec, } @@ -125,6 +121,7 @@ impl Tree { Self { nodes: Nodes::new(), st: SymbolTable::new(), + strings: StringTable::new(), global_decls: Vec::new(), } } @@ -205,22 +202,25 @@ impl Tree { IntegralType { signed, bits } } - fn parse_integral_constant(token: Token, lexeme: &str) -> (u64, IntegralType) { + fn parse_integral_constant(token: Token, lexeme: &str) -> (BigInt, Option) { let radix = Radix::from_token(token).unwrap(); // TODO: figure out how to do this safely for bigger types, whether to // wrap, saturate, or else. let iter = &mut lexeme.char_indices(); - let value = iter + let digits = iter .take_while_ref(|&(_, c)| radix.is_digit()(c) || c == '_') .filter(|&(_, c)| c != '_') - .fold(0u64, |acc, (_, c)| radix.folding_method()(acc, c)); + .map(|(_, c)| c) + .collect::>(); + + let value = crate::lexer::bigint::BigInt::parse_digits(digits, radix); let ty = match iter.clone().next() { Some((_, 'u')) | Some((_, 'i')) => { - Self::parse_integral_type(&lexeme[iter.next().unwrap().0..]) + Some(Self::parse_integral_type(&lexeme[iter.next().unwrap().0..])) } - _ => IntegralType::u32(), + _ => None, }; (value, ty) @@ -254,10 +254,18 @@ impl Tree { } fn parse_ident(&mut self, tokens: &mut TokenIterator) -> Result { - let name = tokens.expect_token(Token::Ident)?.lexeme().to_owned(); + let ident = tokens.expect_token(Token::Ident)?; + let name = self.strings.insert(ident.lexeme().as_bytes()); Ok(self.nodes.push_tag(Tag::Ident { name })) } + fn ident_index(&self, node: Node) -> Index { + match &self.nodes[node] { + Tag::Ident { name } => *name, + _ => Index::new(0, 0), + } + } + pub fn parse_primitive_type(&mut self, tokens: &mut TokenIterator) -> Result { let token = tokens.next().ok_or(Error::UnexpectedEndOfTokens)?; let prim = match token.token() { @@ -288,9 +296,10 @@ impl Tree { let token = tokens.next().unwrap(); match Self::try_parse_integral_type(token.lexeme())? { Some(int) => Ok(self.nodes.push_tag(Tag::IntegralType(int))), - None => Ok(self.nodes.push_tag(Tag::Ident { - name: token.lexeme().to_owned(), - })), + None => { + let name = self.strings.insert(token.lexeme().as_bytes()); + Ok(self.nodes.push_tag(Tag::Ident { name })) + } } } _ => self.parse_primitive_type(tokens), @@ -317,7 +326,7 @@ impl Tree { None }; - let name_str = self.nodes.get_ident_str(name).unwrap().to_owned(); + let name_str = self.strings.get_str(self.ident_index(name)).to_owned(); let node = { let node = self.nodes.reserve_node(); self.st.insert_symbol(&name_str, node, SymbolKind::Var); @@ -361,7 +370,7 @@ impl Tree { None }; - let name_str = self.nodes.get_ident_str(name).unwrap().to_owned(); + let name_str = self.get_ident_str(name).unwrap().to_owned(); let node = { let node = match self.st.find_root_symbol(&name_str) { Some(r) => r.node(), @@ -406,7 +415,7 @@ impl Tree { let param = self.nodes.reserve_node(); self.st.insert_symbol( - self.nodes.get_ident_str(name).unwrap(), + &self.get_ident_str(name).unwrap().to_owned(), param, SymbolKind::Var, ); @@ -479,13 +488,13 @@ impl Tree { let decl = match self .st - .find_orderless_symbol(self.nodes.get_ident_str(name).unwrap()) + .find_orderless_symbol(self.get_ident_str(name).unwrap()) { Some(record) => record.node(), None => { let decl = self.nodes.reserve_node(); self.st - .insert_orderless_symbol(self.nodes.get_ident_str(name).unwrap(), decl); + .insert_orderless_symbol(&self.get_ident_str(name).unwrap().to_owned(), decl); decl } }; @@ -813,7 +822,16 @@ impl Tree { | Token::IntegerConstant => { _ = tokens.next(); let (bits, ty) = Self::parse_integral_constant(token.token(), token.lexeme()); - Ok(self.nodes.push_tag(Tag::IntegralConstant { bits, ty })) + let index = self.strings.insert(bits.into_bytes_le()); + let ty = match ty { + Some(int) => Type::Integer(int), + None => Type::ComptimeNumber, + }; + + Ok(self.nodes.push_tag(Tag::Constant { + bytes: crate::string_table::ImmOrIndex::Index(index), + ty, + })) } Token::FloatingConstant | Token::FloatingExpConstant @@ -822,7 +840,10 @@ impl Tree { _ = tokens.next(); let (bits, ty) = Self::parse_floating_constant(token.token(), token.lexeme()); - Ok(self.nodes.push_tag(Tag::FloatingConstant { bits, ty })) + Ok(self.nodes.push_tag(Tag::Constant { + bytes: crate::string_table::ImmOrIndex::U64(bits), + ty: Type::Floating(ty), + })) } Token::OpenParens => { _ = tokens.next(); @@ -869,10 +890,17 @@ impl Tree { self.parse_program(&mut tokens) } + pub fn get_ident_str(&self, node: Node) -> Option<&str> { + match &self.nodes[node] { + Tag::Ident { name } => Some(self.strings.get_str(*name)), + _ => None, + } + } + fn get_typename_str(&self, node: Node) -> Option { match self.nodes.get_node(node) { Tag::IntegralType(i) => Some(i.to_string()), - Tag::Ident { name } => Some(name.clone()), + Tag::Ident { name } => Some(self.strings.get_str(*name).to_owned()), Tag::Pointer { pointee } => self.get_typename_str(*pointee), Tag::PrimitiveType(prim) => Some(prim.to_string()), _ => None, @@ -897,11 +925,7 @@ impl Tree { self.render_node(writer, parameters, indent)?; } write_indented!(indent, writer, "%{} = function_proto: {{", node.get())?; - write!( - writer, - "name: \"{}\"", - self.nodes.get_ident_str(name).unwrap() - )?; + write!(writer, "name: \"{}\"", self.get_ident_str(name).unwrap())?; if let Some(parameters) = parameters { write!(writer, ", parameters: %{}", parameters.get())?; } @@ -921,7 +945,7 @@ impl Tree { writer, "%{} = {}: {},", node.get(), - self.nodes.get_ident_str(name).unwrap(), + self.get_ident_str(name).unwrap(), self.get_typename_str(ty).unwrap() ) } @@ -949,26 +973,12 @@ impl Tree { writeln_indented!(indent, writer, "}}") } Tag::Ident { name } => { - writeln_indented!(indent, writer, "%{} = identifier(\"{name}\")", node.get()) - } - Tag::IntegralConstant { bits, ty } => { writeln_indented!( indent, writer, - "%{} = {}({})", + "%{} = identifier(\"{}\")", node.get(), - ty.to_string(), - bits - ) - } - Tag::FloatingConstant { bits, ty } => { - writeln_indented!( - indent, - writer, - "%{} = {}({})", - node.get(), - ty.to_string(), - bits + self.strings.get_str(name) ) } Tag::Block { @@ -1023,7 +1033,7 @@ impl Tree { "mut" } }, - self.nodes.get_ident_str(name).unwrap() + self.get_ident_str(name).unwrap() )?; if let Some(ty) = explicit_type { write!(writer, ", ty: {}", self.get_typename_str(ty).unwrap())?; @@ -1043,7 +1053,7 @@ impl Tree { writer, "%{} = global_decl(name: \"{}\"", node.get(), - self.nodes.get_ident_str(name).unwrap() + self.get_ident_str(name).unwrap() )?; if let Some(ty) = explicit_type { write!(writer, ", ty: {}", self.get_typename_str(ty).unwrap())?; @@ -1349,6 +1359,20 @@ impl Tree { )) ) } + Tag::Constant { bytes, ty } => { + let bytes = match bytes { + crate::string_table::ImmOrIndex::U64(i) => &i.to_le_bytes()[..], + crate::string_table::ImmOrIndex::U32(i) => &i.to_le_bytes()[..], + crate::string_table::ImmOrIndex::Index(idx) => self.strings.get_bytes(idx), + }; + writeln_indented!( + indent, + writer, + "%{} = constant{{ ty: {}, bytes: {bytes:?}}}", + node.get(), + ty + ) + } _ => unreachable!(), } } @@ -1390,15 +1414,14 @@ impl Tree { constness: false, pointee: Box::new(self.type_of_node(*pointee)), }, + Tag::Constant { ty, .. } => ty.clone(), Tag::IntegralType(t) => Type::Integer(*t), Tag::PrimitiveType(t) => match t { PrimitiveType::FloatingType(t) => Type::Floating(*t), - PrimitiveType::IntegralType(t) => self.type_of_node(*t), + PrimitiveType::IntegralType(t) => Type::Integer(*t), PrimitiveType::Bool => Type::bool(), PrimitiveType::Void => Type::void(), }, - Tag::IntegralConstant { ty, .. } => Type::Integer(*ty), - Tag::FloatingConstant { ty, .. } => Type::Floating(*ty), Tag::Block { trailing_expr, .. } => trailing_expr .map(|n| self.type_of_node(n)) .unwrap_or(Type::void()), @@ -1458,6 +1481,7 @@ impl Tree { Tag::Le { .. } => Type::bool(), Tag::Ge { .. } => Type::bool(), Tag::DeclRef(decl) => self.type_of_node(*decl), + Tag::GlobalRef(decl) => self.type_of_node(*decl), _ => Type::void(), } } diff --git a/src/string_table.rs b/src/string_table.rs index 587d7a9..12356d8 100644 --- a/src/string_table.rs +++ b/src/string_table.rs @@ -1,11 +1,18 @@ use std::{collections::BTreeMap, hash::Hasher}; -#[derive(Debug, Clone, Copy)] +#[derive(Debug, Clone, Copy, PartialEq, Eq)] pub struct Index { pub start: u32, pub end: u32, } +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum ImmOrIndex { + U64(u64), + U32(u32), + Index(Index), +} + impl Index { pub fn new(start: u32, end: u32) -> Self { Self { start, end } diff --git a/src/symbol_table.rs b/src/symbol_table.rs index 2b78bfc..ce515fd 100644 --- a/src/symbol_table.rs +++ b/src/symbol_table.rs @@ -35,13 +35,13 @@ impl SymbolPath { for node in self.0.iter().skip(1).rev() { match tree.nodes.get_node(node.unwrap()) { Tag::VarDecl { name, .. } => { - _ = write!(&mut buf, "V{}::", tree.nodes.get_ident_str(*name).unwrap()); + _ = write!(&mut buf, "V{}::", tree.get_ident_str(*name).unwrap()); } Tag::GlobalDecl { name, .. } => { - _ = write!(&mut buf, "G{}::", tree.nodes.get_ident_str(*name).unwrap()); + _ = write!(&mut buf, "G{}::", tree.get_ident_str(*name).unwrap()); } Tag::FunctionProto { name, .. } => { - _ = write!(&mut buf, "F{}::", tree.nodes.get_ident_str(*name).unwrap()); + _ = write!(&mut buf, "F{}::", tree.get_ident_str(*name).unwrap()); } _ => {} } diff --git a/src/triples.rs b/src/triples.rs index 3df77d5..10b6ece 100644 --- a/src/triples.rs +++ b/src/triples.rs @@ -5,6 +5,7 @@ use std::collections::{hash_map::Entry, HashMap}; use crate::{ ast::{FloatingType, IntegralType, Node as AstNode, Tag, Type}, parser::Tree, + string_table::{ImmOrIndex, Index as StringsIndex}, writeln_indented, }; @@ -33,27 +34,26 @@ enum Inst { Negate { lhs: Node }, ReturnValue { lhs: Node }, Return, + ExplicitCast { node: Node, ty: Type }, Alloc { size: u32, align: u32 }, AddressOf(Node), Load { source: Node }, Store { dest: Node, source: Node }, } -enum Value { - Int { kind: IntegralType, bits: u64 }, - Float { kind: FloatingType, bits: u64 }, +struct Value { + explicit_type: Option, + bytes: ImmOrIndex, } impl core::fmt::Display for Value { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - Value::Int { kind, bits } => { - write!(f, "{kind} {}", bits) - } - Value::Float { kind, bits } => { - write!(f, "{kind} {{{}}}", bits) - } - } + write!( + f, + "{} {:?}", + self.explicit_type.as_ref().unwrap_or(&Type::any()), + self.bytes + ) } } @@ -64,6 +64,14 @@ struct IRBuilder<'tree, 'ir> { lookup: HashMap, } +impl core::ops::Index for IR { + type Output = Inst; + + fn index(&self, index: Node) -> &Self::Output { + &self.nodes[index as usize] + } +} + impl<'tree, 'ir> IRBuilder<'tree, 'ir> { fn new(ir: &'ir mut IR, tree: &'tree mut Tree) -> Self { Self { @@ -122,7 +130,7 @@ impl<'tree, 'ir> IRBuilder<'tree, 'ir> { parameters, name, .. } => { let label = self.ir.push(Inst::Label( - self.tree.nodes.get_ident_str(*name).unwrap().to_string(), + self.tree.get_ident_str(*name).unwrap().to_string(), )); parameters.map(|p| self.visit(p)); @@ -172,7 +180,7 @@ impl<'tree, 'ir> IRBuilder<'tree, 'ir> { Tag::GlobalDecl { name, .. } => { let ty = self.tree.type_of_node(node); let _label = self.ir.push(Inst::Label( - self.tree.nodes.get_ident_str(*name).unwrap().to_string(), + self.tree.get_ident_str(*name).unwrap().to_string(), )); let alloca = self.ir.push(Inst::Alloc { size: ty.size_of(), @@ -194,111 +202,131 @@ impl<'tree, 'ir> IRBuilder<'tree, 'ir> { let lhs = self.visit(*lhs); self.ir.push(Inst::Load { source: lhs }) } - Tag::IntegralConstant { bits, ty } => self.ir.push(Inst::Constant(Value::Int { - kind: *ty, - bits: *bits, - })), - Tag::FloatingConstant { bits, ty } => self.ir.push(Inst::Constant(Value::Float { - kind: *ty, - bits: *bits, - })), Tag::Assign { lhs, rhs } => { + let dest = self.visit(*lhs); + let source = self.visit(*rhs); + self.type_check(*lhs, *rhs); - let lhs = self.visit(*lhs); - let rhs = self.visit(*rhs); - self.ir.push(Inst::Store { - dest: lhs, - source: rhs, - }) + self.ir.push(Inst::Store { dest, source }) } - Tag::Add { lhs, rhs } => { - let ty = self.type_check(*lhs, *rhs); + Tag::Add { + lhs: lhs0, + rhs: rhs0, + } => { + let lhs = self.visit(*lhs0); + let rhs = self.visit(*rhs0); + let ty = self.type_check(*lhs0, *rhs0); if !ty.can_add_sub() { eprintln!("add is not available for type {ty:?}"); } - - let lhs = self.visit(*lhs); - let rhs = self.visit(*rhs); self.ir.push(Inst::Add { lhs, rhs }) } - Tag::Sub { lhs, rhs } => { - let ty = self.type_check(*lhs, *rhs); + Tag::Sub { + lhs: left, + rhs: right, + } => { + let lhs = self.visit(*left); + let rhs = self.visit(*right); + + let ty = self.type_check(*left, *right); if !ty.can_add_sub() { eprintln!("sub is not available for type {ty:?}"); } - let lhs = self.visit(*lhs); - let rhs = self.visit(*rhs); self.ir.push(Inst::Sub { lhs, rhs }) } - Tag::Mul { lhs, rhs } => { - let ty = self.type_check(*lhs, *rhs); + Tag::Mul { + lhs: left, + rhs: right, + } => { + let lhs = self.visit(*left); + let rhs = self.visit(*right); + + let ty = self.type_check(*left, *right); if !ty.can_mul_div_rem() { eprintln!("mul is not available for type {ty:?}"); } - let lhs = self.visit(*lhs); - let rhs = self.visit(*rhs); self.ir.push(Inst::Mul { lhs, rhs }) } - Tag::Div { lhs, rhs } => { - let ty = self.type_check(*lhs, *rhs); + Tag::Div { + lhs: left, + rhs: right, + } => { + let lhs = self.visit(*left); + let rhs = self.visit(*right); + + let ty = self.type_check(*left, *right); if !ty.can_mul_div_rem() { eprintln!("div is not available for type {ty:?}"); } - let lhs = self.visit(*lhs); - let rhs = self.visit(*rhs); self.ir.push(Inst::Div { lhs, rhs }) } - Tag::Rem { lhs, rhs } => { - let ty = self.type_check(*lhs, *rhs); + Tag::Rem { + lhs: left, + rhs: right, + } => { + let lhs = self.visit(*left); + let rhs = self.visit(*right); + + let ty = self.type_check(*left, *right); if !ty.can_mul_div_rem() { eprintln!("rem is not available for type {ty:?}"); } - let lhs = self.visit(*lhs); - let rhs = self.visit(*rhs); self.ir.push(Inst::Rem { lhs, rhs }) } - Tag::BitAnd { lhs, rhs } => { - let ty = self.type_check(*lhs, *rhs); + // bitwise + Tag::BitAnd { + lhs: left, + rhs: right, + } => { + let lhs = self.visit(*left); + let rhs = self.visit(*right); + + let ty = self.type_check(*left, *right); if !ty.can_bitxor_and_or() { eprintln!("bitand is not available for type {ty:?}"); } - let lhs = self.visit(*lhs); - let rhs = self.visit(*rhs); self.ir.push(Inst::BitAnd { lhs, rhs }) } - Tag::BitOr { lhs, rhs } => { - let ty = self.type_check(*lhs, *rhs); + Tag::BitOr { + lhs: left, + rhs: right, + } => { + let lhs = self.visit(*left); + let rhs = self.visit(*right); + + let ty = self.type_check(*left, *right); if !ty.can_bitxor_and_or() { eprintln!("bitor is not available for type {ty:?}"); } - let lhs = self.visit(*lhs); - let rhs = self.visit(*rhs); self.ir.push(Inst::BitOr { lhs, rhs }) } - Tag::BitXOr { lhs, rhs } => { - let ty = self.type_check(*lhs, *rhs); + Tag::BitXOr { + lhs: left, + rhs: right, + } => { + let lhs = self.visit(*left); + let rhs = self.visit(*right); + + let ty = self.type_check(*left, *right); if !ty.can_bitxor_and_or() { eprintln!("bitxor is not available for type {ty:?}"); } - let lhs = self.visit(*lhs); - let rhs = self.visit(*rhs); self.ir.push(Inst::BitXOr { lhs, rhs }) } - Tag::Negate { lhs } => { - let ty = self.tree.type_of_node(*lhs); + Tag::Negate { lhs: left } => { + let lhs = self.visit(*left); + let ty = self.tree.type_of_node(*left); if !ty.can_negate() { eprintln!("negation is not available for type {ty:?}"); } - - let lhs = self.visit(*lhs); self.ir.push(Inst::Negate { lhs }) } Tag::DeclRef(decl) => match self.lookup.get_mut(decl) { @@ -320,6 +348,18 @@ impl<'tree, 'ir> IRBuilder<'tree, 'ir> { let lhs = self.visit(*lhs); self.ir.push(Inst::AddressOf(lhs)) } + Tag::Constant { bytes, ty } => { + let bytes = match ty { + Type::ComptimeNumber | Type::Floating(_) | Type::Integer(_) => Value { + explicit_type: Some(ty.clone()), + bytes: *bytes, + }, + _ => { + unimplemented!() + } + }; + self.ir.push(Inst::Constant(bytes)) + } _ => { dbg!(&self.tree.nodes[node]); todo!() @@ -327,13 +367,35 @@ impl<'tree, 'ir> IRBuilder<'tree, 'ir> { } } - fn type_check(&self, lhs: AstNode, rhs: AstNode) -> Type { - let t_lhs = self.tree.type_of_node(lhs); - let t_rhs = self.tree.type_of_node(rhs); - if t_lhs != t_rhs { - eprintln!("incompatible types {t_lhs:?} and {t_rhs:?}!"); + fn type_check(&mut self, lhs: AstNode, rhs: AstNode) -> Type { + let left_t = match self.type_map.entry(lhs.clone()) { + Entry::Occupied(o) => o.get().clone(), + Entry::Vacant(v) => v.insert(self.tree.type_of_node(lhs)).clone(), + }; + let right_t = match self.type_map.entry(rhs.clone()) { + Entry::Occupied(o) => o.get().clone(), + Entry::Vacant(v) => v.insert(self.tree.type_of_node(rhs)).clone(), + }; + match left_t.equal_type(&right_t) { + Some(t) => { + if left_t == Type::ComptimeNumber { + self.type_map.insert(lhs, t.clone()); + } + if right_t == Type::ComptimeNumber { + self.type_map.insert(rhs, t.clone()); + } + + t + } + None => { + eprintln!( + "incompatible types %{}: {left_t:?} and %{}: {right_t:?}!", + lhs.get(), + rhs.get() + ); + Type::void() + } } - t_lhs } } @@ -446,6 +508,9 @@ impl IR { ast_node.get() )?; } + Inst::ExplicitCast { node: lhs, ty } => { + writeln_indented!(indent, w, "%{} = explicit_cast %{} to {}", node, lhs, ty)?; + } } Ok(()) } @@ -468,7 +533,7 @@ mod tests { fn ir() { let src = " fn main() -> u32 { - let a: u32 = 0 + 3; + let a: u32 = 0 + 3u32; let ptr_a = &a; return *ptr_a * global; }