trying to refactor a bit:

- working on a string/bytes table
- no longer directly referencing ir outside of current function; the goal here
is to end up only generating ir for 1 function, then link together in assembling
step. this involves adding an GlobalRef() ast-node and ExternRef() ir instruction
This commit is contained in:
Janis 2024-08-16 16:56:11 +02:00
parent 2689179ed4
commit 5c57dfb904
6 changed files with 196 additions and 16 deletions

View file

@ -77,6 +77,7 @@ pub enum Tag {
assignment: Node, assignment: Node,
}, },
DeclRef(Node), DeclRef(Node),
GlobalRef(Node),
CallExpr { CallExpr {
/// Ident | Expr /// Ident | Expr
lhs: Node, lhs: Node,

View file

@ -6,6 +6,7 @@ pub mod codegen;
pub mod common; pub mod common;
pub mod lexer; pub mod lexer;
pub mod parser; pub mod parser;
pub mod string_table;
pub mod symbol_table; pub mod symbol_table;
pub mod tokens; pub mod tokens;
pub mod triples; pub mod triples;

View file

@ -793,15 +793,19 @@ impl Tree {
Token::Ident => { Token::Ident => {
let ident = tokens.expect_token(Token::Ident)?; let ident = tokens.expect_token(Token::Ident)?;
let decl = match self.st.find_symbol(ident.lexeme()) { let name = ident.lexeme();
Some(rec) => rec.node(), if let Some(record) = self.st.find_ordered_symbol(name) {
None => self Ok(self.nodes.push_tag(Tag::DeclRef(record.node())))
} else if let Some(record) = self.st.find_orderless_symbol(name) {
Ok(self.nodes.push_tag(Tag::GlobalRef(record.node())))
} else {
let node = self
.st .st
.insert_orderless_symbol(ident.lexeme(), self.nodes.reserve_node()) .insert_orderless_symbol(name, self.nodes.reserve_node())
.node(), .node();
};
Ok(self.nodes.push_tag(Tag::DeclRef(decl))) Ok(self.nodes.push_tag(Tag::GlobalRef(node)))
}
} }
Token::IntegerBinConstant Token::IntegerBinConstant
| Token::IntegerHexConstant | Token::IntegerHexConstant
@ -1328,6 +1332,23 @@ impl Tree {
)) ))
) )
} }
Tag::GlobalRef(decl) => {
writeln_indented!(
indent,
writer,
"%{} = global_ref(%{}, name: {})",
node.get(),
decl.get(),
self.st
.symbol_path(decl)
.map(|p| p.mangle(self))
.unwrap_or(format!(
"SymbolTable entry not found?, %{}, %{}",
node.get(),
decl.get()
))
)
}
_ => unreachable!(), _ => unreachable!(),
} }
} }

68
src/string_table.rs Normal file
View file

@ -0,0 +1,68 @@
use std::{collections::BTreeMap, hash::Hasher};
#[derive(Debug, Clone, Copy)]
pub struct Index {
pub start: u32,
pub end: u32,
}
impl Index {
pub fn new(start: u32, end: u32) -> Self {
Self { start, end }
}
}
#[derive(Debug)]
pub struct StringTable {
bytes: Vec<u8>,
indices: BTreeMap<u64, Index>,
}
impl core::ops::Index<Index> for StringTable {
type Output = [u8];
fn index(&self, idx: Index) -> &Self::Output {
&self.bytes[idx.start as usize..idx.end as usize]
}
}
impl StringTable {
pub fn new() -> Self {
Self {
bytes: Vec::new(),
indices: BTreeMap::new(),
}
}
pub fn get_str(&self, idx: Index) -> &str {
unsafe { core::str::from_utf8_unchecked(&self[idx]) }
}
pub fn get_bytes(&self, idx: Index) -> &[u8] {
&self[idx]
}
pub fn insert<B: AsRef<[u8]>>(&mut self, bytes: B) -> Index {
let bytes = bytes.as_ref();
let hash = {
let mut hasher = std::hash::DefaultHasher::new();
hasher.write(bytes);
hasher.finish()
};
if let Some(idx) = self.indices.get(&hash).cloned() {
idx
} else {
self.insert_inner(hash, bytes)
}
}
fn insert_inner(&mut self, hash: u64, bytes: &[u8]) -> Index {
let start = self.bytes.len();
self.bytes.extend(bytes);
let end = self.bytes.len();
let index = Index::new(start as u32, end as u32);
self.indices.insert(hash, index);
index
}
}

View file

@ -3,7 +3,10 @@ use std::{
ptr::NonNull, ptr::NonNull,
}; };
use crate::ast::Node as AstNode; use crate::{
ast::{Node as AstNode, Tag},
parser::Tree,
};
#[derive(Debug)] #[derive(Debug)]
pub struct SymbolRecord { pub struct SymbolRecord {
@ -21,6 +24,33 @@ impl SymbolRecord {
} }
} }
#[allow(unused)]
pub struct SymbolPath(Vec<Option<AstNode>>, String);
impl SymbolPath {
pub fn mangle(&self, tree: &Tree) -> String {
use core::fmt::Write;
let mut buf = String::new();
for node in self.0.iter().skip(1).rev() {
match tree.nodes.get_node(node.unwrap()) {
Tag::VarDecl { name, .. } => {
_ = write!(&mut buf, "V{}::", tree.nodes.get_ident_str(*name).unwrap());
}
Tag::GlobalDecl { name, .. } => {
_ = write!(&mut buf, "G{}::", tree.nodes.get_ident_str(*name).unwrap());
}
Tag::FunctionProto { name, .. } => {
_ = write!(&mut buf, "F{}::", tree.nodes.get_ident_str(*name).unwrap());
}
_ => {}
}
}
_ = write!(&mut buf, "{}", self.1);
buf
}
}
pub enum SymbolKind { pub enum SymbolKind {
Var, // or Let Var, // or Let
Orderless, Orderless,
@ -31,33 +61,38 @@ struct InnerSymbolTable {
ordered_identifiers: Vec<SymbolRecord>, ordered_identifiers: Vec<SymbolRecord>,
orderless_identifiers: HashMap<String, SymbolRecord>, orderless_identifiers: HashMap<String, SymbolRecord>,
children: BTreeMap<AstNode, NonNull<InnerSymbolTable>>, children: BTreeMap<AstNode, NonNull<InnerSymbolTable>>,
scope: Option<AstNode>,
parent: Option<NonNull<InnerSymbolTable>>, parent: Option<NonNull<InnerSymbolTable>>,
} }
impl InnerSymbolTable { impl InnerSymbolTable {
fn new() -> NonNull<InnerSymbolTable> { fn new() -> NonNull<InnerSymbolTable> {
Self::new_with(Self::new_inner) Self::new_with(Self::new_inner)
} }
fn new_with<G>(gen: G) -> NonNull<InnerSymbolTable> fn new_with<G>(gen: G) -> NonNull<InnerSymbolTable>
where where
G: FnOnce() -> Self, G: FnOnce() -> Self,
{ {
NonNull::new(Box::leak(Box::new(gen())) as *mut _).unwrap() NonNull::new(Box::leak(Box::new(gen())) as *mut _).unwrap()
} }
fn new_inner() -> InnerSymbolTable { fn new_inner() -> InnerSymbolTable {
Self { Self {
parent: None, parent: None,
ordered_identifiers: Vec::new(), ordered_identifiers: Vec::new(),
orderless_identifiers: HashMap::new(), orderless_identifiers: HashMap::new(),
children: BTreeMap::new(), children: BTreeMap::new(),
scope: None,
} }
} }
fn make_child(&self) -> NonNull<InnerSymbolTable> { fn make_child(&self, scope: AstNode) -> NonNull<InnerSymbolTable> {
Self::new_with(|| Self { Self::new_with(|| Self {
parent: NonNull::new(self.as_ptr()), parent: NonNull::new(self.as_ptr()),
ordered_identifiers: Vec::new(), ordered_identifiers: Vec::new(),
orderless_identifiers: HashMap::new(), orderless_identifiers: HashMap::new(),
children: BTreeMap::new(), children: BTreeMap::new(),
scope: Some(scope),
}) })
} }
@ -124,7 +159,7 @@ impl InnerSymbolTable {
F: FnOnce() -> (AstNode, SymbolKind), F: FnOnce() -> (AstNode, SymbolKind),
{ {
let this = self as *mut Self; let this = self as *mut Self;
if let Some(record) = unsafe { &*this }.find_symbol(name) { if let Some(record) = unsafe { &*this }.find_any_symbol(name) {
record record
} else { } else {
let (node, kind) = cb(); let (node, kind) = cb();
@ -145,12 +180,19 @@ impl InnerSymbolTable {
.or_else(|| self.parent_ref().and_then(|p| p.find_symbol_by_decl(decl))) .or_else(|| self.parent_ref().and_then(|p| p.find_symbol_by_decl(decl)))
} }
fn find_symbol(&self, name: &str) -> Option<&SymbolRecord> { fn find_any_symbol(&self, name: &str) -> Option<&SymbolRecord> {
self.ordered_identifiers self.ordered_identifiers
.iter() .iter()
.find(|r| r.name.as_str() == name) .find(|r| r.name.as_str() == name)
.or_else(|| self.orderless_identifiers.get(name)) .or_else(|| self.orderless_identifiers.get(name))
.or_else(|| self.parent_ref().and_then(|p| p.find_symbol(name))) .or_else(|| self.parent_ref().and_then(|p| p.find_any_symbol(name)))
}
fn find_ordered_symbol(&self, name: &str) -> Option<&SymbolRecord> {
self.ordered_identifiers
.iter()
.find(|r| r.name.as_str() == name)
.or_else(|| self.parent_ref().and_then(|p| p.find_ordered_symbol(name)))
} }
fn find_orderless_symbol(&self, name: &str) -> Option<&SymbolRecord> { fn find_orderless_symbol(&self, name: &str) -> Option<&SymbolRecord> {
@ -229,7 +271,7 @@ impl SymbolTableWrapper {
let child = if let Some(child) = self.current().children.get(&scope) { let child = if let Some(child) = self.current().children.get(&scope) {
*child *child
} else { } else {
let child = self.current().make_child(); let child = self.current().make_child(scope);
self.current_mut().children.insert(scope, child); self.current_mut().children.insert(scope, child);
child child
}; };
@ -271,14 +313,50 @@ impl SymbolTableWrapper {
self.current().find_symbol_by_decl(decl) self.current().find_symbol_by_decl(decl)
} }
pub fn find_symbol(&self, name: &str) -> Option<&SymbolRecord> { pub fn find_any_symbol(&self, name: &str) -> Option<&SymbolRecord> {
self.current().find_symbol(name) self.current().find_any_symbol(name)
}
pub fn find_ordered_symbol(&self, name: &str) -> Option<&SymbolRecord> {
self.current().find_ordered_symbol(name)
} }
pub fn find_orderless_symbol(&self, name: &str) -> Option<&SymbolRecord> { pub fn find_orderless_symbol(&self, name: &str) -> Option<&SymbolRecord> {
self.current().find_orderless_symbol(name) self.current().find_orderless_symbol(name)
} }
pub fn symbol_path(&self, decl: AstNode) -> Option<SymbolPath> {
let mut table = self.current();
loop {
if let Some(record) = table
.ordered_identifiers
.iter()
.find(|r| r.node() == decl)
.or_else(|| {
table
.orderless_identifiers
.iter()
.find(|(_, v)| v.decl == decl)
.map(|(_, v)| v)
})
{
let mut path = Vec::new();
while let Some(parent) = table.parent_ref() {
path.push(parent.scope);
}
return Some(SymbolPath(path, record.name.clone()));
};
let Some(parent) = table.parent_ref() else {
break;
};
table = parent;
}
None
}
pub fn extend_orderless<I>(&mut self, iter: I) pub fn extend_orderless<I>(&mut self, iter: I)
where where
I: IntoIterator<Item = (String, SymbolRecord)>, I: IntoIterator<Item = (String, SymbolRecord)>,

View file

@ -19,6 +19,7 @@ enum Inst {
Label(String), Label(String),
Constant(Value), Constant(Value),
UnresolvedRef, UnresolvedRef,
ExternRef(AstNode),
Ref(Node), Ref(Node),
Parameter { size: u32, align: u32 }, Parameter { size: u32, align: u32 },
Add { lhs: Node, rhs: Node }, Add { lhs: Node, rhs: Node },
@ -135,7 +136,7 @@ impl<'tree, 'ir> IRBuilder<'tree, 'ir> {
} }
!0 !0
} }
Tag::Parameter { name, ty } => { Tag::Parameter { ty, .. } => {
let ty = self.tree.type_of_node(*ty); let ty = self.tree.type_of_node(*ty);
let param = self.ir.push(Inst::Parameter { let param = self.ir.push(Inst::Parameter {
size: ty.size_of(), size: ty.size_of(),
@ -314,6 +315,7 @@ impl<'tree, 'ir> IRBuilder<'tree, 'ir> {
unresolved unresolved
} }
}, },
Tag::GlobalRef(decl) => self.ir.push(Inst::ExternRef(*decl)),
Tag::Ref { lhs } => { Tag::Ref { lhs } => {
let lhs = self.visit(*lhs); let lhs = self.visit(*lhs);
self.ir.push(Inst::AddressOf(lhs)) self.ir.push(Inst::AddressOf(lhs))
@ -435,6 +437,15 @@ impl IR {
Inst::Store { dest, source } => { Inst::Store { dest, source } => {
writeln_indented!(indent, w, "%{} = store ptr %{dest} from %{source}", node)?; writeln_indented!(indent, w, "%{} = store ptr %{dest} from %{source}", node)?;
} }
Inst::ExternRef(ast_node) => {
writeln_indented!(
indent,
w,
"%{} = extern reference ast-node %{}",
node,
ast_node.get()
)?;
}
} }
Ok(()) Ok(())
} }