use std::{ collections::{BTreeMap, HashMap}, ptr::NonNull, }; use crate::{ ast::{Node as AstNode, Tag}, parser::Tree, }; #[derive(Debug)] pub struct SymbolRecord { name: String, decl: AstNode, } impl SymbolRecord { pub fn node(&self) -> AstNode { self.decl } pub fn name(&self) -> &str { &self.name } } #[allow(unused)] pub struct SymbolPath(Vec>, String); impl SymbolPath { pub fn mangle(&self, tree: &Tree) -> String { use core::fmt::Write; let mut buf = String::new(); for node in self.0.iter().skip(1).rev() { match tree.nodes.get_node(node.unwrap()) { Tag::VarDecl { name, .. } => { _ = write!(&mut buf, "V{}::", tree.get_ident_str(*name).unwrap()); } Tag::GlobalDecl { name, .. } => { _ = write!(&mut buf, "G{}::", tree.get_ident_str(*name).unwrap()); } Tag::FunctionProto { name, .. } => { _ = write!(&mut buf, "F{}::", tree.get_ident_str(*name).unwrap()); } _ => {} } } _ = write!(&mut buf, "{}", self.1); buf } } pub enum SymbolKind { Var, // or Let Orderless, } #[derive(Debug, Default)] struct InnerSymbolTable { ordered_identifiers: Vec, orderless_identifiers: HashMap, children: BTreeMap>, scope: Option, parent: Option>, } impl InnerSymbolTable { fn new() -> NonNull { Self::new_with(Self::new_inner) } fn new_with(gen: G) -> NonNull where G: FnOnce() -> Self, { NonNull::new(Box::leak(Box::new(gen())) as *mut _).unwrap() } fn new_inner() -> InnerSymbolTable { Self { parent: None, ordered_identifiers: Vec::new(), orderless_identifiers: HashMap::new(), children: BTreeMap::new(), scope: None, } } fn make_child(&self, scope: AstNode) -> NonNull { Self::new_with(|| Self { parent: NonNull::new(self.as_ptr()), ordered_identifiers: Vec::new(), orderless_identifiers: HashMap::new(), children: BTreeMap::new(), scope: Some(scope), }) } fn parent(&self) -> Option> { self.parent } fn parent_ref(&self) -> Option<&InnerSymbolTable> { unsafe { self.parent.map(|p| p.as_ref()) } } fn parent_mut(&mut self) -> Option<&mut InnerSymbolTable> { unsafe { self.parent.map(|mut p| p.as_mut()) } } fn as_ptr(&self) -> *mut Self { self as *const _ as *mut _ } fn root(&self) -> NonNull { self.parent() .map(|p| unsafe { p.as_ref().root() }) .unwrap_or(NonNull::new(self.as_ptr()).unwrap()) } } impl Drop for InnerSymbolTable { fn drop(&mut self) { for child in self.children.values() { unsafe { _ = Box::from_raw(child.as_ptr()); } } } } impl InnerSymbolTable { fn insert_symbol(&mut self, name: &str, node: AstNode, kind: SymbolKind) -> &SymbolRecord { match kind { SymbolKind::Var => { self.ordered_identifiers.push(SymbolRecord { name: name.to_owned(), decl: node, }); self.ordered_identifiers.last().unwrap() } _ => self.insert_orderless_symbol(name, node), } } fn insert_orderless_symbol(&mut self, name: &str, node: AstNode) -> &SymbolRecord { self.orderless_identifiers.insert( name.to_owned(), SymbolRecord { name: name.to_owned(), decl: node, }, ); self.orderless_identifiers.get(name).unwrap() } fn find_symbol_or_insert_with<'a, F>(&'a mut self, name: &str, cb: F) -> &'a SymbolRecord where F: FnOnce() -> (AstNode, SymbolKind), { let this = self as *mut Self; if let Some(record) = unsafe { &*this }.find_any_symbol(name) { record } else { let (node, kind) = cb(); self.insert_symbol(name, node, kind) } } fn find_symbol_by_decl(&self, decl: AstNode) -> Option<&SymbolRecord> { self.ordered_identifiers .iter() .find(|r| r.decl == decl) .or_else(|| { self.orderless_identifiers .iter() .find(|(_, v)| v.decl == decl) .map(|(_, v)| v) }) .or_else(|| self.parent_ref().and_then(|p| p.find_symbol_by_decl(decl))) } fn find_any_symbol(&self, name: &str) -> Option<&SymbolRecord> { self.ordered_identifiers .iter() .find(|r| r.name.as_str() == name) .or_else(|| self.orderless_identifiers.get(name)) .or_else(|| self.parent_ref().and_then(|p| p.find_any_symbol(name))) } fn find_ordered_symbol(&self, name: &str) -> Option<&SymbolRecord> { self.ordered_identifiers .iter() .find(|r| r.name.as_str() == name) .or_else(|| self.parent_ref().and_then(|p| p.find_ordered_symbol(name))) } fn find_orderless_symbol(&self, name: &str) -> Option<&SymbolRecord> { self.orderless_identifiers.get(name).or_else(|| { self.parent_ref() .and_then(|p| p.find_orderless_symbol(name)) }) } fn extend_orderless(&mut self, iter: I) where I: IntoIterator, { self.orderless_identifiers.extend(iter) } fn extract_orderless_if( &mut self, pred: F, ) -> std::collections::hash_map::ExtractIf where F: FnMut(&String, &mut SymbolRecord) -> bool, { self.orderless_identifiers.extract_if(pred) } } #[derive(Debug)] pub struct SymbolTableWrapper { current: NonNull, } impl Drop for SymbolTableWrapper { fn drop(&mut self) { unsafe { _ = Box::from_raw(self.current.as_ref().root().as_ptr()); } } } impl SymbolTableWrapper { pub fn new() -> SymbolTableWrapper { Self { current: InnerSymbolTable::new(), } } fn current(&self) -> &InnerSymbolTable { unsafe { self.current.as_ref() } } fn current_mut(&mut self) -> &mut InnerSymbolTable { unsafe { self.current.as_mut() } } #[allow(dead_code)] fn root_ref(&self) -> &InnerSymbolTable { unsafe { self.current().root().as_ref() } } fn root_mut(&mut self) -> &mut InnerSymbolTable { unsafe { self.current_mut().root().as_mut() } } #[allow(dead_code)] fn parent_ref(&self) -> Option<&InnerSymbolTable> { self.current().parent_ref() } #[allow(dead_code)] fn parent_mut(&mut self) -> Option<&mut InnerSymbolTable> { self.current_mut().parent_mut() } pub fn into_child(&mut self, scope: AstNode) { let child = if let Some(child) = self.current().children.get(&scope) { *child } else { let child = self.current().make_child(scope); self.current_mut().children.insert(scope, child); child }; self.current = child; } pub fn into_parent(&mut self) { if let Some(parent) = self.current().parent() { self.current = parent; } } } impl SymbolTableWrapper { pub fn insert_symbol(&mut self, name: &str, node: AstNode, kind: SymbolKind) -> &SymbolRecord { self.current_mut().insert_symbol(name, node, kind) } pub fn find_root_symbol(&mut self, name: &str) -> Option<&SymbolRecord> { self.root_mut().find_orderless_symbol(name) } pub fn insert_root_symbol(&mut self, name: &str, node: AstNode) -> &SymbolRecord { self.root_mut().insert_orderless_symbol(name, node) } pub fn insert_orderless_symbol(&mut self, name: &str, node: AstNode) -> &SymbolRecord { self.current_mut().insert_orderless_symbol(name, node) } pub fn find_symbol_or_insert_with<'a, F>(&'a mut self, name: &str, cb: F) -> &'a SymbolRecord where F: FnOnce() -> (AstNode, SymbolKind), { self.current_mut().find_symbol_or_insert_with(name, cb) } pub fn find_symbol_by_decl(&self, decl: AstNode) -> Option<&SymbolRecord> { self.current().find_symbol_by_decl(decl) } pub fn find_any_symbol(&self, name: &str) -> Option<&SymbolRecord> { self.current().find_any_symbol(name) } pub fn find_ordered_symbol(&self, name: &str) -> Option<&SymbolRecord> { self.current().find_ordered_symbol(name) } pub fn find_orderless_symbol(&self, name: &str) -> Option<&SymbolRecord> { self.current().find_orderless_symbol(name) } pub fn symbol_path(&self, decl: AstNode) -> Option { let mut table = self.current(); loop { if let Some(record) = table .ordered_identifiers .iter() .find(|r| r.node() == decl) .or_else(|| { table .orderless_identifiers .iter() .find(|(_, v)| v.decl == decl) .map(|(_, v)| v) }) { let mut path = Vec::new(); while let Some(parent) = table.parent_ref() { path.push(parent.scope); } return Some(SymbolPath(path, record.name.clone())); }; let Some(parent) = table.parent_ref() else { break; }; table = parent; } None } pub fn extend_orderless(&mut self, iter: I) where I: IntoIterator, { self.current_mut().extend_orderless(iter) } pub fn extract_orderless_if( &mut self, pred: F, ) -> std::collections::hash_map::ExtractIf where F: FnMut(&String, &mut SymbolRecord) -> bool, { self.current_mut().extract_orderless_if(pred) } } pub type SymbolTable = SymbolTableWrapper; pub mod syms2 { /*! Coming from the ast, we have a `DeclRef` with an interned identifier `ident` and want to find the symbol it refers to. To help, we have a struct keeping track of all accessible scopes. Now, we want to look through any accessible scope `s` for a symbol with the name `ident`. Thus: `Symbol {scope: `s`, name: `ident`, ..}`. We might also know the type of the symbol we are looking for, if we want to permit fields/variables and methods/functions sharing names. Since I want to allow variable shadowing for local variables, some strategy to differentiate between shadowed variables must be employed: - keys of type SymbolKind::Local might point to a list of values with source locations - keys might contain source locations. Any symbol pointed at from within the ast must again point at an ast object. Thus: `Key` -> `AstIndex` Exception: `Key::ScopeByIndex` -> `InternIndex` */ use std::collections::BTreeMap; use std::fmt::Debug; use crate::ast2::intern::Index as InternIndex; use crate::ast2::Index as AstIndex; use crate::lexer::SourceLocation; #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] pub enum Key { ScopeByName { name: InternIndex, }, /// not all scopes have a name, as some are anonymous blocks or otherwise nameless ScopeByIndex { ast: AstIndex, }, Symbol { scope: AstIndex, name: InternIndex, kind: SymbolKind, }, } #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] pub enum SymbolKind { __First, Const, Function, Type, __TypeScope, Scope, ParentScope, Local(SourceLocation), __Last, } #[derive(Clone, Copy)] pub union Payload { ast_index: AstIndex, intern_index: InternIndex, } #[derive(Debug)] #[allow(dead_code)] enum ExpandedPayload { Ast(AstIndex), Intern(InternIndex), } impl Payload { fn new_ast(ast: AstIndex) -> Payload { Self { ast_index: ast } } fn new_intern(intern: InternIndex) -> Payload { Self { intern_index: intern, } } fn as_ast(&self) -> AstIndex { unsafe { self.ast_index } } fn as_intern(&self) -> InternIndex { unsafe { self.intern_index } } } pub struct Symbols { inner: BTreeMap, } impl Debug for Symbols { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "Symbols [")?; if f.alternate() { writeln!(f, "")?; } let entries = self.inner.iter().map(|(key, val)| { let payload = match key { Key::ScopeByIndex { .. } => ExpandedPayload::Intern(val.as_intern()), _ => ExpandedPayload::Ast(val.as_ast()), }; (*key, payload) }); f.debug_list().entries(entries).finish()?; write!(f, "]")?; if f.alternate() { writeln!(f, "")?; } Ok(()) } } // checks for each scope in scopes_in_tree Symbol { scope, kind: SymbolKind::Local, 0}..Symbol { scope, kind: SymbolKind::Scope, u32::MAX} struct SymbolTreePos { scopes_in_scope: Vec, } impl Symbols { pub fn new() -> Symbols { Self { inner: BTreeMap::new(), } } pub fn insert_scope(&mut self, name: InternIndex, ast: AstIndex) { self.inner .insert(Key::ScopeByIndex { ast }, Payload::new_intern(name)); self.inner .insert(Key::ScopeByName { name }, Payload::new_ast(ast)); } pub fn find_symbol( &self, scope: AstIndex, name: InternIndex, loc: SourceLocation, ) -> Option { use SymbolKind::*; let range = self.inner.range( Key::Symbol { scope, name, kind: __First, }..=Key::Symbol { scope, name, kind: Local(loc), }, ); if let Some((_, payload)) = range.rev().next() { Some(payload.as_ast()) } else { if let Some(parent) = self.inner.get(&Key::Symbol { scope, name: InternIndex::invalid(), kind: ParentScope, }) { self.find_symbol(parent.as_ast(), name, loc) } else { None } } } pub fn find_type_symbol( &self, scope: AstIndex, name: InternIndex, loc: SourceLocation, ) -> Option { use SymbolKind::*; let range = self.inner.range( Key::Symbol { scope, name, kind: __First, }..=Key::Symbol { scope, name, kind: __TypeScope, }, ); if let Some((_, payload)) = range.rev().next() { Some(payload.as_ast()) } else { if let Some(parent) = self.inner.get(&Key::Symbol { scope, name: InternIndex::invalid(), kind: ParentScope, }) { self.find_symbol(parent.as_ast(), name, loc) } else { None } } } pub fn insert_symbol( &mut self, scope: AstIndex, name: InternIndex, kind: SymbolKind, ast: AstIndex, ) { self.inner .insert(Key::Symbol { scope, name, kind }, Payload::new_ast(ast)); } } }