From 3bb4ba79bcd0d5ed42a52a6a6228edaffe057593 Mon Sep 17 00:00:00 2001 From: Janis Date: Wed, 14 Aug 2024 14:57:23 +0200 Subject: [PATCH] move symboltable into parser and tree, away from triples gen --- src/parser.rs | 90 ++++++++++++++- src/symbol_table.rs | 8 +- src/triples.rs | 263 +++++--------------------------------------- 3 files changed, 124 insertions(+), 237 deletions(-) diff --git a/src/parser.rs b/src/parser.rs index e97eed1..47fc7de 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -3,7 +3,7 @@ use std::collections::HashMap; use itertools::Itertools; use crate::{ - ast::{FloatingType, IntegralType, LetOrVar, Node, PrimitiveType, Tag}, + ast::{FloatingType, IntegralType, LetOrVar, Node, PrimitiveType, Tag, Type}, common::NextIf, lexer::{Radix, TokenIterator}, symbol_table::{SymbolKind, SymbolTable}, @@ -86,7 +86,7 @@ impl Nodes { #[derive(Debug)] pub struct Tree { pub nodes: Nodes, - st: SymbolTable, + pub st: SymbolTable, pub global_decls: Vec, } @@ -1287,6 +1287,92 @@ impl Tree { Ok(()) } + + pub fn type_of_node(&self, node: Node) -> crate::ast::Type { + match self.nodes.get_node(node) { + Tag::FunctionDecl { proto, .. } => self.type_of_node(*proto), + Tag::FunctionProto { + parameters, + return_type, + .. + } => { + let return_type = self.type_of_node(*return_type); + let parameter_types = parameters + .map(|p| match self.nodes.get_node(p) { + Tag::ParameterList { parameters } => parameters + .iter() + .map(|p| self.type_of_node(*p)) + .collect::>(), + _ => panic!("parameters is not a parameterlist!"), + }) + .unwrap_or(Vec::new()); + + crate::ast::Type::Fn { + parameter_types, + return_type: Box::new(return_type), + } + } + Tag::Parameter { ty, .. } => self.type_of_node(*ty), + Tag::Pointer { pointee } => Type::Pointer { + constness: false, + pointee: Box::new(self.type_of_node(*pointee)), + }, + Tag::IntegralType(t) => Type::Integer(*t), + Tag::PrimitiveType(t) => match t { + PrimitiveType::FloatingType(t) => Type::Floating(*t), + PrimitiveType::IntegralType(t) => self.type_of_node(*t), + PrimitiveType::Bool => Type::bool(), + PrimitiveType::Void => Type::void(), + }, + Tag::IntegralConstant { ty, .. } => Type::Integer(*ty), + Tag::FloatingConstant { ty, .. } => Type::Floating(*ty), + Tag::Block { trailing_expr, .. } => trailing_expr + .map(|n| self.type_of_node(n)) + .unwrap_or(Type::void()), + Tag::VarDecl { + explicit_type, + assignment, // this is a Tag::Assign + .. + } => { + let lhs = explicit_type.map(|n| self.type_of_node(n)); + let rhs = assignment.map(|n| match self.nodes.get_node(n) { + Tag::Assign { rhs, .. } => self.type_of_node(*rhs), + _ => unreachable!(), + }); + if lhs.as_ref().zip(rhs.as_ref()).map(|(l, r)| l != r) == Some(true) { + eprintln!("vardecl: incompatible types {lhs:?} and {rhs:?}."); + } + lhs.or(rhs) + .expect("Type could not be automatically deduced.") + } + Tag::CallExpr { lhs, .. } => self.type_of_node(*lhs), + Tag::ExplicitCast { typename, .. } => self.type_of_node(*typename), + Tag::Deref { lhs } => self.type_of_node(*lhs).remove_ptr().unwrap(), + Tag::Ref { lhs } => self.type_of_node(*lhs).into_ptr(), + Tag::Not { lhs } => self.type_of_node(*lhs), + Tag::Negate { lhs } => self.type_of_node(*lhs), + Tag::Or { lhs, .. } => self.type_of_node(*lhs), + Tag::And { lhs, .. } => self.type_of_node(*lhs), + Tag::BitOr { lhs, .. } => self.type_of_node(*lhs), + Tag::BitAnd { lhs, .. } => self.type_of_node(*lhs), + Tag::BitXOr { lhs, .. } => self.type_of_node(*lhs), + Tag::Shl { lhs, .. } => self.type_of_node(*lhs), + Tag::Shr { lhs, .. } => self.type_of_node(*lhs), + Tag::Add { lhs, .. } => self.type_of_node(*lhs), + Tag::Sub { lhs, .. } => self.type_of_node(*lhs), + Tag::Mul { lhs, .. } => self.type_of_node(*lhs), + Tag::Rem { lhs, .. } => self.type_of_node(*lhs), + Tag::Div { lhs, .. } => self.type_of_node(*lhs), + Tag::Eq { .. } => Type::bool(), + Tag::NEq { .. } => Type::bool(), + Tag::Lt { .. } => Type::bool(), + Tag::Gt { .. } => Type::bool(), + Tag::Le { .. } => Type::bool(), + Tag::Ge { .. } => Type::bool(), + Tag::DeclRef(decl) => self.type_of_node(*decl), + _ => Type::void(), + } + } } static PRECEDENCE_MAP: std::sync::LazyLock> = std::sync::LazyLock::new(|| { diff --git a/src/symbol_table.rs b/src/symbol_table.rs index 9c433cd..74fcf42 100644 --- a/src/symbol_table.rs +++ b/src/symbol_table.rs @@ -30,15 +30,19 @@ pub struct SymbolTable { // parameters, so any `x` may be redefined. ordered_identifiers: Vec, orderless_identifiers: HashMap, - parent: Option>, children: BTreeMap, SymbolTable>, scope: Option, + parent: Option>, } impl SymbolTable { pub fn new() -> SymbolTable { Self { - ..Default::default() + ordered_identifiers: Vec::new(), + orderless_identifiers: HashMap::new(), + children: BTreeMap::new(), + scope: None, + parent: None, } } diff --git a/src/triples.rs b/src/triples.rs index 39b2361..f793a58 100644 --- a/src/triples.rs +++ b/src/triples.rs @@ -3,209 +3,11 @@ use std::collections::HashMap; use crate::{ - ast::{FloatingType, IntegralType, Node as AstNode, PrimitiveType, Tag, Type}, + ast::{FloatingType, IntegralType, Node as AstNode, Tag, Type}, parser::Tree, writeln_indented, }; -struct SymbolRecord { - name: String, - decl: AstNode, - ty: Type, -} - -struct SymbolTable<'a> { - tree: &'a Tree, - identifiers: Vec, - parent: Option>>, -} - -impl<'a> SymbolTable<'a> { - pub fn root(tree: &'a Tree) -> Self { - Self { - tree, - identifiers: Vec::new(), - parent: None, - } - } - - fn insert_symbol_with_name(&mut self, ast_node: AstNode, name: String) { - let ty = self.type_of_node(ast_node); - self.identifiers.push(SymbolRecord { - name, - decl: ast_node, - ty, - }); - } - - pub fn insert_return_symbol(&mut self, ast_node: AstNode) { - self.insert_symbol_with_name(ast_node, "return".to_string()) - } - - pub fn insert_symbol(&mut self, ast_node: AstNode) { - let name = match self.tree.nodes.get_node(ast_node) { - Tag::VarDecl { name, .. } => self.tree.nodes.get_ident_str(*name).unwrap().to_string(), - Tag::Parameter { name, .. } => { - self.tree.nodes.get_ident_str(*name).unwrap().to_string() - } - Tag::FunctionProto { name, .. } => { - self.tree.nodes.get_ident_str(*name).unwrap().to_string() - } - _ => { - panic!("ast_node wasn't any kind of decl!"); - } - }; - - self.insert_symbol_with_name(ast_node, name) - } - - pub fn find_symbol_name(&self, name: &str) -> Option<&SymbolRecord> { - self.identifiers - .iter() - .find(|r| r.name.as_str() == name) - .or_else(|| { - self.parent - .as_ref() - .and_then(|parent| parent.find_symbol_name(name)) - }) - } - - pub fn find_symbol(&self, ident_node: AstNode) -> Option<&SymbolRecord> { - self.identifiers - .iter() - .find(|r| Some(r.name.as_str()) == self.tree.nodes.get_ident_str(ident_node)) - .or_else(|| { - self.parent - .as_ref() - .and_then(|parent| parent.find_symbol(ident_node)) - }) - } - pub fn into_child_in_place(&mut self) { - let mut parent = Self { - parent: None, - tree: self.tree, - identifiers: Vec::new(), - }; - core::mem::swap(self, &mut parent); - self.parent = Some(Box::new(parent)); - } - - pub fn into_child(self) -> SymbolTable<'a> { - Self { - identifiers: Vec::new(), - tree: self.tree, - parent: Some(Box::new(self)), - } - } - - pub fn parent_mut(&mut self) -> Option<&mut SymbolTable<'a>> { - self.parent.as_mut().map(|parent| parent.as_mut()) - } - - pub fn into_parent_in_place(&mut self) -> Option> { - if let Some(child) = self.parent.take() { - let mut child = Box::into_inner(child); - core::mem::swap(self, &mut child); - Some(child) - } else { - None - } - } - - pub fn into_parent(self) -> Option> { - self.parent.map(|parent| Box::into_inner(parent)) - } - - fn type_of_node(&self, node: AstNode) -> crate::ast::Type { - match self.tree.nodes.get_node(node) { - Tag::Ident { name } => self - .find_symbol(node) - .map(|r| r.ty.clone()) - .expect(&format!("identifier '{name}' not found in SymbolTable!")), - Tag::FunctionDecl { proto, .. } => self.type_of_node(*proto), - Tag::FunctionProto { - parameters, - return_type, - .. - } => { - let return_type = self.type_of_node(*return_type); - let parameter_types = parameters - .map(|p| match self.tree.nodes.get_node(p) { - Tag::ParameterList { parameters } => parameters - .iter() - .map(|p| self.type_of_node(*p)) - .collect::>(), - _ => panic!("parameters is not a parameterlist!"), - }) - .unwrap_or(Vec::new()); - - crate::ast::Type::Fn { - parameter_types, - return_type: Box::new(return_type), - } - } - Tag::Parameter { ty, .. } => self.type_of_node(*ty), - Tag::Pointer { pointee } => Type::Pointer { - constness: false, - pointee: Box::new(self.type_of_node(*pointee)), - }, - Tag::IntegralType(t) => Type::Integer(*t), - Tag::PrimitiveType(t) => match t { - PrimitiveType::FloatingType(t) => Type::Floating(*t), - PrimitiveType::IntegralType(t) => self.type_of_node(*t), - PrimitiveType::Bool => Type::bool(), - PrimitiveType::Void => Type::void(), - }, - Tag::IntegralConstant { ty, .. } => Type::Integer(*ty), - Tag::FloatingConstant { ty, .. } => Type::Floating(*ty), - Tag::Block { trailing_expr, .. } => trailing_expr - .map(|n| self.type_of_node(n)) - .unwrap_or(Type::void()), - Tag::VarDecl { - explicit_type, - assignment, // this is a Tag::Assign - .. - } => { - let lhs = explicit_type.map(|n| self.type_of_node(n)); - let rhs = assignment.map(|n| match self.tree.nodes.get_node(n) { - Tag::Assign { rhs, .. } => self.type_of_node(*rhs), - _ => unreachable!(), - }); - if lhs.as_ref().zip(rhs.as_ref()).map(|(l, r)| l != r) == Some(true) { - eprintln!("vardecl: incompatible types {lhs:?} and {rhs:?}."); - } - lhs.or(rhs) - .expect("Type could not be automatically deduced.") - } - Tag::CallExpr { lhs, .. } => self.type_of_node(*lhs), - Tag::ExplicitCast { typename, .. } => self.type_of_node(*typename), - Tag::Deref { lhs } => self.type_of_node(*lhs).remove_ptr().unwrap(), - Tag::Ref { lhs } => self.type_of_node(*lhs).into_ptr(), - Tag::Not { lhs } => self.type_of_node(*lhs), - Tag::Negate { lhs } => self.type_of_node(*lhs), - Tag::Or { lhs, .. } => self.type_of_node(*lhs), - Tag::And { lhs, .. } => self.type_of_node(*lhs), - Tag::BitOr { lhs, .. } => self.type_of_node(*lhs), - Tag::BitAnd { lhs, .. } => self.type_of_node(*lhs), - Tag::BitXOr { lhs, .. } => self.type_of_node(*lhs), - Tag::Shl { lhs, .. } => self.type_of_node(*lhs), - Tag::Shr { lhs, .. } => self.type_of_node(*lhs), - Tag::Add { lhs, .. } => self.type_of_node(*lhs), - Tag::Sub { lhs, .. } => self.type_of_node(*lhs), - Tag::Mul { lhs, .. } => self.type_of_node(*lhs), - Tag::Rem { lhs, .. } => self.type_of_node(*lhs), - Tag::Div { lhs, .. } => self.type_of_node(*lhs), - Tag::Eq { .. } => Type::bool(), - Tag::NEq { .. } => Type::bool(), - Tag::Lt { .. } => Type::bool(), - Tag::Gt { .. } => Type::bool(), - Tag::Le { .. } => Type::bool(), - Tag::Ge { .. } => Type::bool(), - _ => Type::void(), - } - } -} - type Node = u32; enum Inst { @@ -249,47 +51,51 @@ impl core::fmt::Display for Value { struct IRBuilder<'tree, 'ir> { ir: &'ir mut IR, - st: SymbolTable<'tree>, + tree: &'tree mut Tree, + type_map: HashMap, lookup: HashMap, } impl<'tree, 'ir> IRBuilder<'tree, 'ir> { - fn new(ir: &'ir mut IR, st: SymbolTable<'tree>) -> Self { + fn new(ir: &'ir mut IR, tree: &'tree mut Tree) -> Self { Self { ir, - st, + tree, + type_map: HashMap::new(), lookup: HashMap::new(), } } fn visit(&mut self, node: AstNode) -> Node { - match &self.st.tree.nodes[node] { + match &self.tree.nodes[node].clone() { Tag::FunctionDecl { proto, body } => { self.visit(*proto); - self.st.into_child_in_place(); + self.tree.st.into_child(node); let value = self.visit(*body); // TODO: return value of body expression let node = if value != !0 { - self.type_check(self.st.find_symbol_name("return").unwrap().decl, *body); + let return_type = { + match self.tree.nodes.get_node(*proto) { + Tag::FunctionProto { return_type, .. } => *return_type, + _ => unreachable!(), + } + }; + self.type_check(return_type, *body); self.ir.push(Inst::ReturnValue { lhs: value }) } else { !0 }; - self.st.into_parent_in_place(); + self.tree.st.into_parent(); node } Tag::FunctionProto { - parameters, - return_type, - name, + parameters, name, .. } => { - self.st.insert_symbol(node); - self.st.insert_return_symbol(*return_type); parameters.map(|p| self.visit(p)); let label = self.ir.push(Inst::Label( - self.st.tree.nodes.get_ident_str(*name).unwrap().to_string(), + self.tree.nodes.get_ident_str(*name).unwrap().to_string(), )); self.lookup.insert(node, label); @@ -303,7 +109,6 @@ impl<'tree, 'ir> IRBuilder<'tree, 'ir> { !0 } Tag::Parameter { .. } => { - self.st.insert_symbol(node); let param = self.ir.push(Inst::Parameter); self.lookup.insert(node, param); @@ -324,12 +129,11 @@ impl<'tree, 'ir> IRBuilder<'tree, 'ir> { } } Tag::VarDecl { .. } => { - let ty = self.st.type_of_node(node); + let ty = self.tree.type_of_node(node); let alloca = self.ir.push(Inst::Alloc { size: ty.size_of(), align: ty.align_of(), }); - self.st.insert_symbol(node); self.lookup.insert(node, alloca); alloca } @@ -445,7 +249,7 @@ impl<'tree, 'ir> IRBuilder<'tree, 'ir> { self.ir.push(Inst::BitXOr { lhs, rhs }) } Tag::Negate { lhs } => { - let ty = self.st.type_of_node(*lhs); + let ty = self.tree.type_of_node(*lhs); if !ty.can_negate() { eprintln!("negation is not available for type {ty:?}"); } @@ -453,29 +257,21 @@ impl<'tree, 'ir> IRBuilder<'tree, 'ir> { let lhs = self.visit(*lhs); self.ir.push(Inst::Negate { lhs }) } - Tag::Ident { name } => { - let decl = self - .st - .find_symbol(node) - .expect(&format!("symbol '{name}' not found in SymbolMap!")) - .decl; - - *self.lookup.get(&decl).unwrap() - } + Tag::DeclRef(decl) => *self.lookup.get(decl).expect("declref not in lookup map"), Tag::Ref { lhs } => { let lhs = self.visit(*lhs); self.ir.push(Inst::AddressOf(lhs)) } _ => { - dbg!(&self.st.tree.nodes[node]); + dbg!(&self.tree.nodes[node]); todo!() } } } fn type_check(&self, lhs: AstNode, rhs: AstNode) -> Type { - let t_lhs = self.st.type_of_node(lhs); - let t_rhs = self.st.type_of_node(rhs); + let t_lhs = self.tree.type_of_node(lhs); + let t_rhs = self.tree.type_of_node(rhs); if t_lhs != t_rhs { eprintln!("incompatible types {t_lhs:?} and {t_rhs:?}!"); } @@ -498,9 +294,8 @@ impl IR { node } - pub fn build(&mut self, tree: &Tree, ast_node: crate::ast::Node) { - let st = SymbolTable::root(tree); - let mut builder = IRBuilder::new(self, st); + pub fn build(&mut self, tree: &mut Tree, ast_node: crate::ast::Node) { + let mut builder = IRBuilder::new(self, tree); builder.visit(ast_node); } @@ -590,8 +385,9 @@ mod tests { fn main() -> u32 { let a: u32 = 0 + 3; let ptr_a = &a; -return *ptr_a * 2; +return *ptr_a * global; } +let global: u32 = 42; "; let tokens = Tokenizer::new(src.as_bytes()).unwrap(); @@ -603,7 +399,8 @@ return *ptr_a * 2; println!("{buf}"); let mut ir = IR::new(); - ir.build(&tree, *tree.global_decls.first().unwrap()); + let decl = *tree.global_decls.first().unwrap(); + ir.build(&mut tree, decl); let mut buf = String::new(); ir.render(&mut buf).unwrap(); println!("{buf}");