From 5c57dfb904c612ee4de28a1966141c0bad1b1161 Mon Sep 17 00:00:00 2001 From: Janis Date: Fri, 16 Aug 2024 16:56:11 +0200 Subject: [PATCH] trying to refactor a bit: - working on a string/bytes table - no longer directly referencing ir outside of current function; the goal here is to end up only generating ir for 1 function, then link together in assembling step. this involves adding an GlobalRef() ast-node and ExternRef() ir instruction --- src/ast.rs | 1 + src/lib.rs | 1 + src/parser.rs | 35 +++++++++++++---- src/string_table.rs | 68 ++++++++++++++++++++++++++++++++ src/symbol_table.rs | 94 +++++++++++++++++++++++++++++++++++++++++---- src/triples.rs | 13 ++++++- 6 files changed, 196 insertions(+), 16 deletions(-) create mode 100644 src/string_table.rs diff --git a/src/ast.rs b/src/ast.rs index dbda4cf..bc41166 100644 --- a/src/ast.rs +++ b/src/ast.rs @@ -77,6 +77,7 @@ pub enum Tag { assignment: Node, }, DeclRef(Node), + GlobalRef(Node), CallExpr { /// Ident | Expr lhs: Node, diff --git a/src/lib.rs b/src/lib.rs index cbf66d6..4cd5083 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -6,6 +6,7 @@ pub mod codegen; pub mod common; pub mod lexer; pub mod parser; +pub mod string_table; pub mod symbol_table; pub mod tokens; pub mod triples; diff --git a/src/parser.rs b/src/parser.rs index f64e237..3e14dfa 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -793,15 +793,19 @@ impl Tree { Token::Ident => { let ident = tokens.expect_token(Token::Ident)?; - let decl = match self.st.find_symbol(ident.lexeme()) { - Some(rec) => rec.node(), - None => self + let name = ident.lexeme(); + if let Some(record) = self.st.find_ordered_symbol(name) { + Ok(self.nodes.push_tag(Tag::DeclRef(record.node()))) + } else if let Some(record) = self.st.find_orderless_symbol(name) { + Ok(self.nodes.push_tag(Tag::GlobalRef(record.node()))) + } else { + let node = self .st - .insert_orderless_symbol(ident.lexeme(), self.nodes.reserve_node()) - .node(), - }; + .insert_orderless_symbol(name, self.nodes.reserve_node()) + .node(); - Ok(self.nodes.push_tag(Tag::DeclRef(decl))) + Ok(self.nodes.push_tag(Tag::GlobalRef(node))) + } } Token::IntegerBinConstant | Token::IntegerHexConstant @@ -1328,6 +1332,23 @@ impl Tree { )) ) } + Tag::GlobalRef(decl) => { + writeln_indented!( + indent, + writer, + "%{} = global_ref(%{}, name: {})", + node.get(), + decl.get(), + self.st + .symbol_path(decl) + .map(|p| p.mangle(self)) + .unwrap_or(format!( + "SymbolTable entry not found?, %{}, %{}", + node.get(), + decl.get() + )) + ) + } _ => unreachable!(), } } diff --git a/src/string_table.rs b/src/string_table.rs new file mode 100644 index 0000000..587d7a9 --- /dev/null +++ b/src/string_table.rs @@ -0,0 +1,68 @@ +use std::{collections::BTreeMap, hash::Hasher}; + +#[derive(Debug, Clone, Copy)] +pub struct Index { + pub start: u32, + pub end: u32, +} + +impl Index { + pub fn new(start: u32, end: u32) -> Self { + Self { start, end } + } +} + +#[derive(Debug)] +pub struct StringTable { + bytes: Vec, + indices: BTreeMap, +} + +impl core::ops::Index for StringTable { + type Output = [u8]; + + fn index(&self, idx: Index) -> &Self::Output { + &self.bytes[idx.start as usize..idx.end as usize] + } +} + +impl StringTable { + pub fn new() -> Self { + Self { + bytes: Vec::new(), + indices: BTreeMap::new(), + } + } + + pub fn get_str(&self, idx: Index) -> &str { + unsafe { core::str::from_utf8_unchecked(&self[idx]) } + } + + pub fn get_bytes(&self, idx: Index) -> &[u8] { + &self[idx] + } + + pub fn insert>(&mut self, bytes: B) -> Index { + let bytes = bytes.as_ref(); + let hash = { + let mut hasher = std::hash::DefaultHasher::new(); + hasher.write(bytes); + hasher.finish() + }; + + if let Some(idx) = self.indices.get(&hash).cloned() { + idx + } else { + self.insert_inner(hash, bytes) + } + } + + fn insert_inner(&mut self, hash: u64, bytes: &[u8]) -> Index { + let start = self.bytes.len(); + self.bytes.extend(bytes); + let end = self.bytes.len(); + let index = Index::new(start as u32, end as u32); + self.indices.insert(hash, index); + index + } +} diff --git a/src/symbol_table.rs b/src/symbol_table.rs index 8335b6b..2b78bfc 100644 --- a/src/symbol_table.rs +++ b/src/symbol_table.rs @@ -3,7 +3,10 @@ use std::{ ptr::NonNull, }; -use crate::ast::Node as AstNode; +use crate::{ + ast::{Node as AstNode, Tag}, + parser::Tree, +}; #[derive(Debug)] pub struct SymbolRecord { @@ -21,6 +24,33 @@ impl SymbolRecord { } } +#[allow(unused)] +pub struct SymbolPath(Vec>, String); + +impl SymbolPath { + pub fn mangle(&self, tree: &Tree) -> String { + use core::fmt::Write; + let mut buf = String::new(); + + for node in self.0.iter().skip(1).rev() { + match tree.nodes.get_node(node.unwrap()) { + Tag::VarDecl { name, .. } => { + _ = write!(&mut buf, "V{}::", tree.nodes.get_ident_str(*name).unwrap()); + } + Tag::GlobalDecl { name, .. } => { + _ = write!(&mut buf, "G{}::", tree.nodes.get_ident_str(*name).unwrap()); + } + Tag::FunctionProto { name, .. } => { + _ = write!(&mut buf, "F{}::", tree.nodes.get_ident_str(*name).unwrap()); + } + _ => {} + } + } + _ = write!(&mut buf, "{}", self.1); + buf + } +} + pub enum SymbolKind { Var, // or Let Orderless, @@ -31,33 +61,38 @@ struct InnerSymbolTable { ordered_identifiers: Vec, orderless_identifiers: HashMap, children: BTreeMap>, + scope: Option, parent: Option>, } impl InnerSymbolTable { fn new() -> NonNull { Self::new_with(Self::new_inner) } + fn new_with(gen: G) -> NonNull where G: FnOnce() -> Self, { NonNull::new(Box::leak(Box::new(gen())) as *mut _).unwrap() } + fn new_inner() -> InnerSymbolTable { Self { parent: None, ordered_identifiers: Vec::new(), orderless_identifiers: HashMap::new(), children: BTreeMap::new(), + scope: None, } } - fn make_child(&self) -> NonNull { + fn make_child(&self, scope: AstNode) -> NonNull { Self::new_with(|| Self { parent: NonNull::new(self.as_ptr()), ordered_identifiers: Vec::new(), orderless_identifiers: HashMap::new(), children: BTreeMap::new(), + scope: Some(scope), }) } @@ -124,7 +159,7 @@ impl InnerSymbolTable { F: FnOnce() -> (AstNode, SymbolKind), { let this = self as *mut Self; - if let Some(record) = unsafe { &*this }.find_symbol(name) { + if let Some(record) = unsafe { &*this }.find_any_symbol(name) { record } else { let (node, kind) = cb(); @@ -145,12 +180,19 @@ impl InnerSymbolTable { .or_else(|| self.parent_ref().and_then(|p| p.find_symbol_by_decl(decl))) } - fn find_symbol(&self, name: &str) -> Option<&SymbolRecord> { + fn find_any_symbol(&self, name: &str) -> Option<&SymbolRecord> { self.ordered_identifiers .iter() .find(|r| r.name.as_str() == name) .or_else(|| self.orderless_identifiers.get(name)) - .or_else(|| self.parent_ref().and_then(|p| p.find_symbol(name))) + .or_else(|| self.parent_ref().and_then(|p| p.find_any_symbol(name))) + } + + fn find_ordered_symbol(&self, name: &str) -> Option<&SymbolRecord> { + self.ordered_identifiers + .iter() + .find(|r| r.name.as_str() == name) + .or_else(|| self.parent_ref().and_then(|p| p.find_ordered_symbol(name))) } fn find_orderless_symbol(&self, name: &str) -> Option<&SymbolRecord> { @@ -229,7 +271,7 @@ impl SymbolTableWrapper { let child = if let Some(child) = self.current().children.get(&scope) { *child } else { - let child = self.current().make_child(); + let child = self.current().make_child(scope); self.current_mut().children.insert(scope, child); child }; @@ -271,14 +313,50 @@ impl SymbolTableWrapper { self.current().find_symbol_by_decl(decl) } - pub fn find_symbol(&self, name: &str) -> Option<&SymbolRecord> { - self.current().find_symbol(name) + pub fn find_any_symbol(&self, name: &str) -> Option<&SymbolRecord> { + self.current().find_any_symbol(name) + } + + pub fn find_ordered_symbol(&self, name: &str) -> Option<&SymbolRecord> { + self.current().find_ordered_symbol(name) } pub fn find_orderless_symbol(&self, name: &str) -> Option<&SymbolRecord> { self.current().find_orderless_symbol(name) } + pub fn symbol_path(&self, decl: AstNode) -> Option { + let mut table = self.current(); + loop { + if let Some(record) = table + .ordered_identifiers + .iter() + .find(|r| r.node() == decl) + .or_else(|| { + table + .orderless_identifiers + .iter() + .find(|(_, v)| v.decl == decl) + .map(|(_, v)| v) + }) + { + let mut path = Vec::new(); + while let Some(parent) = table.parent_ref() { + path.push(parent.scope); + } + + return Some(SymbolPath(path, record.name.clone())); + }; + + let Some(parent) = table.parent_ref() else { + break; + }; + table = parent; + } + + None + } + pub fn extend_orderless(&mut self, iter: I) where I: IntoIterator, diff --git a/src/triples.rs b/src/triples.rs index 02152e2..3df77d5 100644 --- a/src/triples.rs +++ b/src/triples.rs @@ -19,6 +19,7 @@ enum Inst { Label(String), Constant(Value), UnresolvedRef, + ExternRef(AstNode), Ref(Node), Parameter { size: u32, align: u32 }, Add { lhs: Node, rhs: Node }, @@ -135,7 +136,7 @@ impl<'tree, 'ir> IRBuilder<'tree, 'ir> { } !0 } - Tag::Parameter { name, ty } => { + Tag::Parameter { ty, .. } => { let ty = self.tree.type_of_node(*ty); let param = self.ir.push(Inst::Parameter { size: ty.size_of(), @@ -314,6 +315,7 @@ impl<'tree, 'ir> IRBuilder<'tree, 'ir> { unresolved } }, + Tag::GlobalRef(decl) => self.ir.push(Inst::ExternRef(*decl)), Tag::Ref { lhs } => { let lhs = self.visit(*lhs); self.ir.push(Inst::AddressOf(lhs)) @@ -435,6 +437,15 @@ impl IR { Inst::Store { dest, source } => { writeln_indented!(indent, w, "%{} = store ptr %{dest} from %{source}", node)?; } + Inst::ExternRef(ast_node) => { + writeln_indented!( + indent, + w, + "%{} = extern reference ast-node %{}", + node, + ast_node.get() + )?; + } } Ok(()) }