SeaLang/src/symbol_table.rs

632 lines
18 KiB
Rust

use std::{
collections::{BTreeMap, HashMap},
ptr::NonNull,
};
use crate::{
ast::{Node as AstNode, Tag},
parser::Tree,
};
#[derive(Debug)]
pub struct SymbolRecord {
name: String,
decl: AstNode,
}
impl SymbolRecord {
pub fn node(&self) -> AstNode {
self.decl
}
pub fn name(&self) -> &str {
&self.name
}
}
#[allow(unused)]
pub struct SymbolPath(Vec<Option<AstNode>>, String);
impl SymbolPath {
pub fn mangle(&self, tree: &Tree) -> String {
use core::fmt::Write;
let mut buf = String::new();
for node in self.0.iter().skip(1).rev() {
match tree.nodes.get_node(node.unwrap()) {
Tag::VarDecl { name, .. } => {
_ = write!(&mut buf, "V{}::", tree.get_ident_str(*name).unwrap());
}
Tag::GlobalDecl { name, .. } => {
_ = write!(&mut buf, "G{}::", tree.get_ident_str(*name).unwrap());
}
Tag::FunctionProto { name, .. } => {
_ = write!(&mut buf, "F{}::", tree.get_ident_str(*name).unwrap());
}
_ => {}
}
}
_ = write!(&mut buf, "{}", self.1);
buf
}
}
pub enum SymbolKind {
Var, // or Let
Orderless,
}
#[derive(Debug, Default)]
struct InnerSymbolTable {
ordered_identifiers: Vec<SymbolRecord>,
orderless_identifiers: HashMap<String, SymbolRecord>,
children: BTreeMap<AstNode, NonNull<InnerSymbolTable>>,
scope: Option<AstNode>,
parent: Option<NonNull<InnerSymbolTable>>,
}
impl InnerSymbolTable {
fn new() -> NonNull<InnerSymbolTable> {
Self::new_with(Self::new_inner)
}
fn new_with<G>(r#gen: G) -> NonNull<InnerSymbolTable>
where
G: FnOnce() -> Self,
{
Box::into_non_null(Box::new(r#gen()))
}
fn new_inner() -> InnerSymbolTable {
Self {
parent: None,
ordered_identifiers: Vec::new(),
orderless_identifiers: HashMap::new(),
children: BTreeMap::new(),
scope: None,
}
}
fn make_child(&self, scope: AstNode) -> NonNull<InnerSymbolTable> {
Self::new_with(|| Self {
parent: NonNull::new(self.as_ptr()),
ordered_identifiers: Vec::new(),
orderless_identifiers: HashMap::new(),
children: BTreeMap::new(),
scope: Some(scope),
})
}
fn parent(&self) -> Option<NonNull<InnerSymbolTable>> {
self.parent
}
fn parent_ref(&self) -> Option<&InnerSymbolTable> {
unsafe { self.parent.map(|p| p.as_ref()) }
}
fn parent_mut(&mut self) -> Option<&mut InnerSymbolTable> {
unsafe { self.parent.map(|mut p| p.as_mut()) }
}
fn as_ptr(&self) -> *mut Self {
self as *const _ as *mut _
}
fn root(&self) -> NonNull<InnerSymbolTable> {
self.parent()
.map(|p| unsafe { p.as_ref().root() })
.unwrap_or(NonNull::new(self.as_ptr()).unwrap())
}
}
impl Drop for InnerSymbolTable {
fn drop(&mut self) {
for child in self.children.values() {
unsafe {
_ = Box::from_raw(child.as_ptr());
}
}
}
}
impl InnerSymbolTable {
fn insert_symbol(&mut self, name: &str, node: AstNode, kind: SymbolKind) -> &SymbolRecord {
match kind {
SymbolKind::Var => {
self.ordered_identifiers.push(SymbolRecord {
name: name.to_owned(),
decl: node,
});
self.ordered_identifiers.last().unwrap()
}
_ => self.insert_orderless_symbol(name, node),
}
}
fn insert_orderless_symbol(&mut self, name: &str, node: AstNode) -> &SymbolRecord {
self.orderless_identifiers.insert(
name.to_owned(),
SymbolRecord {
name: name.to_owned(),
decl: node,
},
);
self.orderless_identifiers.get(name).unwrap()
}
fn find_symbol_or_insert_with<'a, F>(&'a mut self, name: &str, cb: F) -> &'a SymbolRecord
where
F: FnOnce() -> (AstNode, SymbolKind),
{
let this = self as *mut Self;
if let Some(record) = unsafe { &*this }.find_any_symbol(name) {
record
} else {
let (node, kind) = cb();
self.insert_symbol(name, node, kind)
}
}
fn find_symbol_by_decl(&self, decl: AstNode) -> Option<&SymbolRecord> {
self.ordered_identifiers
.iter()
.find(|r| r.decl == decl)
.or_else(|| {
self.orderless_identifiers
.iter()
.find(|(_, v)| v.decl == decl)
.map(|(_, v)| v)
})
.or_else(|| self.parent_ref().and_then(|p| p.find_symbol_by_decl(decl)))
}
fn find_any_symbol(&self, name: &str) -> Option<&SymbolRecord> {
self.ordered_identifiers
.iter()
.find(|r| r.name.as_str() == name)
.or_else(|| self.orderless_identifiers.get(name))
.or_else(|| self.parent_ref().and_then(|p| p.find_any_symbol(name)))
}
fn find_ordered_symbol(&self, name: &str) -> Option<&SymbolRecord> {
self.ordered_identifiers
.iter()
.find(|r| r.name.as_str() == name)
.or_else(|| self.parent_ref().and_then(|p| p.find_ordered_symbol(name)))
}
fn find_orderless_symbol(&self, name: &str) -> Option<&SymbolRecord> {
self.orderless_identifiers.get(name).or_else(|| {
self.parent_ref()
.and_then(|p| p.find_orderless_symbol(name))
})
}
fn extend_orderless<I>(&mut self, iter: I)
where
I: IntoIterator<Item = (String, SymbolRecord)>,
{
self.orderless_identifiers.extend(iter)
}
fn extract_orderless_if<F>(
&mut self,
pred: F,
) -> std::collections::hash_map::ExtractIf<String, SymbolRecord, F>
where
F: FnMut(&String, &mut SymbolRecord) -> bool,
{
self.orderless_identifiers.extract_if(pred)
}
}
#[derive(Debug)]
pub struct SymbolTableWrapper {
current: NonNull<InnerSymbolTable>,
}
impl Drop for SymbolTableWrapper {
fn drop(&mut self) {
unsafe {
_ = Box::from_raw(self.current.as_ref().root().as_ptr());
}
}
}
impl SymbolTableWrapper {
pub fn new() -> SymbolTableWrapper {
Self {
current: InnerSymbolTable::new(),
}
}
fn current(&self) -> &InnerSymbolTable {
unsafe { self.current.as_ref() }
}
fn current_mut(&mut self) -> &mut InnerSymbolTable {
unsafe { self.current.as_mut() }
}
#[allow(dead_code)]
fn root_ref(&self) -> &InnerSymbolTable {
unsafe { self.current().root().as_ref() }
}
fn root_mut(&mut self) -> &mut InnerSymbolTable {
unsafe { self.current_mut().root().as_mut() }
}
#[allow(dead_code)]
fn parent_ref(&self) -> Option<&InnerSymbolTable> {
self.current().parent_ref()
}
#[allow(dead_code)]
fn parent_mut(&mut self) -> Option<&mut InnerSymbolTable> {
self.current_mut().parent_mut()
}
pub fn into_child(&mut self, scope: AstNode) {
let child = if let Some(child) = self.current().children.get(&scope) {
*child
} else {
let child = self.current().make_child(scope);
self.current_mut().children.insert(scope, child);
child
};
self.current = child;
}
pub fn into_parent(&mut self) {
if let Some(parent) = self.current().parent() {
self.current = parent;
}
}
}
impl SymbolTableWrapper {
pub fn insert_symbol(&mut self, name: &str, node: AstNode, kind: SymbolKind) -> &SymbolRecord {
self.current_mut().insert_symbol(name, node, kind)
}
pub fn find_root_symbol(&mut self, name: &str) -> Option<&SymbolRecord> {
self.root_mut().find_orderless_symbol(name)
}
pub fn insert_root_symbol(&mut self, name: &str, node: AstNode) -> &SymbolRecord {
self.root_mut().insert_orderless_symbol(name, node)
}
pub fn insert_orderless_symbol(&mut self, name: &str, node: AstNode) -> &SymbolRecord {
self.current_mut().insert_orderless_symbol(name, node)
}
pub fn find_symbol_or_insert_with<'a, F>(&'a mut self, name: &str, cb: F) -> &'a SymbolRecord
where
F: FnOnce() -> (AstNode, SymbolKind),
{
self.current_mut().find_symbol_or_insert_with(name, cb)
}
pub fn find_symbol_by_decl(&self, decl: AstNode) -> Option<&SymbolRecord> {
self.current().find_symbol_by_decl(decl)
}
pub fn find_any_symbol(&self, name: &str) -> Option<&SymbolRecord> {
self.current().find_any_symbol(name)
}
pub fn find_ordered_symbol(&self, name: &str) -> Option<&SymbolRecord> {
self.current().find_ordered_symbol(name)
}
pub fn find_orderless_symbol(&self, name: &str) -> Option<&SymbolRecord> {
self.current().find_orderless_symbol(name)
}
pub fn symbol_path(&self, decl: AstNode) -> Option<SymbolPath> {
let mut table = self.current();
loop {
if let Some(record) = table
.ordered_identifiers
.iter()
.find(|r| r.node() == decl)
.or_else(|| {
table
.orderless_identifiers
.iter()
.find(|(_, v)| v.decl == decl)
.map(|(_, v)| v)
})
{
let mut path = Vec::new();
while let Some(parent) = table.parent_ref() {
path.push(parent.scope);
}
return Some(SymbolPath(path, record.name.clone()));
};
let Some(parent) = table.parent_ref() else {
break;
};
table = parent;
}
None
}
pub fn extend_orderless<I>(&mut self, iter: I)
where
I: IntoIterator<Item = (String, SymbolRecord)>,
{
self.current_mut().extend_orderless(iter)
}
pub fn extract_orderless_if<F>(
&mut self,
pred: F,
) -> std::collections::hash_map::ExtractIf<String, SymbolRecord, F>
where
F: FnMut(&String, &mut SymbolRecord) -> bool,
{
self.current_mut().extract_orderless_if(pred)
}
}
pub type SymbolTable = SymbolTableWrapper;
pub mod syms2 {
/*!
Coming from the ast, we have a `DeclRef` with an interned identifier `ident`
and want to find the symbol it refers to.
To help, we have a struct keeping track of all accessible scopes. Now,
we want to look through any accessible scope `s` for a symbol with the
name `ident`.
Thus: `Symbol {scope: `s`, name: `ident`, ..}`.
We might also know the type of the symbol we are looking for, if we want to
permit fields/variables and methods/functions sharing names.
Since I want to allow variable shadowing for local variables, some strategy to differentiate between shadowed variables must be employed:
- keys of type SymbolKind::Local might point to a list of values with source locations
- keys might contain source locations.
Any symbol pointed at from within the ast must again point at an ast
object.
Thus: `Key` -> `AstIndex`
Exception: `Key::ScopeByIndex` -> `InternIndex`
*/
use std::collections::BTreeMap;
use std::fmt::Debug;
use crate::ast2::Index as AstIndex;
use crate::ast2::intern::Index as InternIndex;
use crate::lexer::SourceLocation;
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
pub enum Key {
ScopeByName {
name: InternIndex,
},
/// not all scopes have a name, as some are anonymous blocks or otherwise nameless
ScopeByIndex {
ast: AstIndex,
},
Symbol {
scope: AstIndex,
name: InternIndex,
kind: SymbolKind,
},
}
impl Key {
pub fn kind(&self) -> Option<SymbolKind> {
match self {
Key::Symbol { kind, .. } => Some(*kind),
_ => None,
}
}
}
#[repr(u32)]
pub enum DeclKind {
Local = 1,
Parameter,
}
impl DeclKind {
pub fn from_u32(v: u32) -> Option<Self> {
match v {
1 => Some(Self::Local),
2 => Some(Self::Parameter),
_ => None,
}
}
}
impl From<SymbolKind> for Option<DeclKind> {
fn from(value: SymbolKind) -> Self {
match value {
SymbolKind::Parameter(_) => Some(DeclKind::Parameter),
SymbolKind::Local(_) => Some(DeclKind::Local),
_ => None,
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
pub enum SymbolKind {
__First,
Const,
Function,
Type,
__TypeScope,
Scope,
ParentScope,
Parameter(SourceLocation),
Local(SourceLocation),
__Last,
}
#[derive(Clone, Copy)]
pub union Payload {
ast_index: AstIndex,
intern_index: InternIndex,
}
#[derive(Debug)]
#[allow(dead_code)]
enum ExpandedPayload {
Ast(AstIndex),
Intern(InternIndex),
}
impl Payload {
fn new_ast(ast: AstIndex) -> Payload {
Self { ast_index: ast }
}
fn new_intern(intern: InternIndex) -> Payload {
Self {
intern_index: intern,
}
}
fn as_ast(&self) -> AstIndex {
unsafe { self.ast_index }
}
fn as_intern(&self) -> InternIndex {
unsafe { self.intern_index }
}
}
pub struct Symbols {
inner: BTreeMap<Key, Payload>,
}
impl Debug for Symbols {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "Symbols [")?;
if f.alternate() {
writeln!(f, "")?;
}
let entries = self.inner.iter().map(|(key, val)| {
let payload = match key {
Key::ScopeByIndex { .. } => ExpandedPayload::Intern(val.as_intern()),
_ => ExpandedPayload::Ast(val.as_ast()),
};
(*key, payload)
});
f.debug_list().entries(entries).finish()?;
write!(f, "]")?;
if f.alternate() {
writeln!(f, "")?;
}
Ok(())
}
}
// checks for each scope in scopes_in_tree Symbol { scope, kind: SymbolKind::Local, 0}..Symbol { scope, kind: SymbolKind::Scope, u32::MAX}
struct SymbolTreePos {
scopes_in_scope: Vec<AstIndex>,
}
impl Symbols {
pub fn new() -> Symbols {
Self {
inner: BTreeMap::new(),
}
}
pub fn insert_scope(&mut self, name: InternIndex, ast: AstIndex) {
self.inner
.insert(Key::ScopeByIndex { ast }, Payload::new_intern(name));
self.inner
.insert(Key::ScopeByName { name }, Payload::new_ast(ast));
}
pub fn find_symbol(
&self,
scope: AstIndex,
name: InternIndex,
loc: SourceLocation,
) -> Option<(Key, AstIndex)> {
use SymbolKind::*;
let range = self.inner.range(
Key::Symbol {
scope,
name,
kind: __First,
}..=Key::Symbol {
scope,
name,
kind: Local(loc),
},
);
if let Some((key, payload)) = range.rev().next() {
Some((*key, payload.as_ast()))
} else {
if let Some(parent) = self.inner.get(&Key::Symbol {
scope,
name: InternIndex::invalid(),
kind: ParentScope,
}) {
self.find_symbol(parent.as_ast(), name, loc)
} else {
None
}
}
}
pub fn find_type_symbol(
&self,
scope: AstIndex,
name: InternIndex,
loc: SourceLocation,
) -> Option<(Key, AstIndex)> {
use SymbolKind::*;
let range = self.inner.range(
Key::Symbol {
scope,
name,
kind: __First,
}..=Key::Symbol {
scope,
name,
kind: __TypeScope,
},
);
if let Some((key, payload)) = range.rev().next() {
Some((*key, payload.as_ast()))
} else {
if let Some(parent) = self.inner.get(&Key::Symbol {
scope,
name: InternIndex::invalid(),
kind: ParentScope,
}) {
self.find_type_symbol(parent.as_ast(), name, loc)
} else {
None
}
}
}
pub fn insert_symbol(
&mut self,
scope: AstIndex,
name: InternIndex,
kind: SymbolKind,
ast: AstIndex,
) {
self.inner
.insert(Key::Symbol { scope, name, kind }, Payload::new_ast(ast));
}
}
}