SeaLang/src/symbol_table.rs
Janis 888517f2ea soooo many thigns in need to commit more often
also, types exist in the ast, but are interned after parsing
refs and typerefs are resolved
true and false in the intern pool, need to add them to primaryexpr
structs in the internpool take a decl to differentiate struct types of the same
name
field access postfix expr? also nesting postfix expr (e.g. var[1][2])
arrays can only be specified with integral constants as length

fixes:
sinttype and uinttype store bits inline in item.index correctly
mut/var decl assignment? have correct doc strings
let/var symbol is inserted after the assignment expr, so that the expr can still
use
now-shadowed variables
structs can have fields of type pointer-to-self (ast)
lots of wrong-cases in node children getting
2024-09-15 00:38:50 +02:00

597 lines
17 KiB
Rust

use std::{
collections::{BTreeMap, HashMap},
ptr::NonNull,
};
use crate::{
ast::{Node as AstNode, Tag},
parser::Tree,
};
#[derive(Debug)]
pub struct SymbolRecord {
name: String,
decl: AstNode,
}
impl SymbolRecord {
pub fn node(&self) -> AstNode {
self.decl
}
pub fn name(&self) -> &str {
&self.name
}
}
#[allow(unused)]
pub struct SymbolPath(Vec<Option<AstNode>>, String);
impl SymbolPath {
pub fn mangle(&self, tree: &Tree) -> String {
use core::fmt::Write;
let mut buf = String::new();
for node in self.0.iter().skip(1).rev() {
match tree.nodes.get_node(node.unwrap()) {
Tag::VarDecl { name, .. } => {
_ = write!(&mut buf, "V{}::", tree.get_ident_str(*name).unwrap());
}
Tag::GlobalDecl { name, .. } => {
_ = write!(&mut buf, "G{}::", tree.get_ident_str(*name).unwrap());
}
Tag::FunctionProto { name, .. } => {
_ = write!(&mut buf, "F{}::", tree.get_ident_str(*name).unwrap());
}
_ => {}
}
}
_ = write!(&mut buf, "{}", self.1);
buf
}
}
pub enum SymbolKind {
Var, // or Let
Orderless,
}
#[derive(Debug, Default)]
struct InnerSymbolTable {
ordered_identifiers: Vec<SymbolRecord>,
orderless_identifiers: HashMap<String, SymbolRecord>,
children: BTreeMap<AstNode, NonNull<InnerSymbolTable>>,
scope: Option<AstNode>,
parent: Option<NonNull<InnerSymbolTable>>,
}
impl InnerSymbolTable {
fn new() -> NonNull<InnerSymbolTable> {
Self::new_with(Self::new_inner)
}
fn new_with<G>(gen: G) -> NonNull<InnerSymbolTable>
where
G: FnOnce() -> Self,
{
NonNull::new(Box::leak(Box::new(gen())) as *mut _).unwrap()
}
fn new_inner() -> InnerSymbolTable {
Self {
parent: None,
ordered_identifiers: Vec::new(),
orderless_identifiers: HashMap::new(),
children: BTreeMap::new(),
scope: None,
}
}
fn make_child(&self, scope: AstNode) -> NonNull<InnerSymbolTable> {
Self::new_with(|| Self {
parent: NonNull::new(self.as_ptr()),
ordered_identifiers: Vec::new(),
orderless_identifiers: HashMap::new(),
children: BTreeMap::new(),
scope: Some(scope),
})
}
fn parent(&self) -> Option<NonNull<InnerSymbolTable>> {
self.parent
}
fn parent_ref(&self) -> Option<&InnerSymbolTable> {
unsafe { self.parent.map(|p| p.as_ref()) }
}
fn parent_mut(&mut self) -> Option<&mut InnerSymbolTable> {
unsafe { self.parent.map(|mut p| p.as_mut()) }
}
fn as_ptr(&self) -> *mut Self {
self as *const _ as *mut _
}
fn root(&self) -> NonNull<InnerSymbolTable> {
self.parent()
.map(|p| unsafe { p.as_ref().root() })
.unwrap_or(NonNull::new(self.as_ptr()).unwrap())
}
}
impl Drop for InnerSymbolTable {
fn drop(&mut self) {
for child in self.children.values() {
unsafe {
_ = Box::from_raw(child.as_ptr());
}
}
}
}
impl InnerSymbolTable {
fn insert_symbol(&mut self, name: &str, node: AstNode, kind: SymbolKind) -> &SymbolRecord {
match kind {
SymbolKind::Var => {
self.ordered_identifiers.push(SymbolRecord {
name: name.to_owned(),
decl: node,
});
self.ordered_identifiers.last().unwrap()
}
_ => self.insert_orderless_symbol(name, node),
}
}
fn insert_orderless_symbol(&mut self, name: &str, node: AstNode) -> &SymbolRecord {
self.orderless_identifiers.insert(
name.to_owned(),
SymbolRecord {
name: name.to_owned(),
decl: node,
},
);
self.orderless_identifiers.get(name).unwrap()
}
fn find_symbol_or_insert_with<'a, F>(&'a mut self, name: &str, cb: F) -> &'a SymbolRecord
where
F: FnOnce() -> (AstNode, SymbolKind),
{
let this = self as *mut Self;
if let Some(record) = unsafe { &*this }.find_any_symbol(name) {
record
} else {
let (node, kind) = cb();
self.insert_symbol(name, node, kind)
}
}
fn find_symbol_by_decl(&self, decl: AstNode) -> Option<&SymbolRecord> {
self.ordered_identifiers
.iter()
.find(|r| r.decl == decl)
.or_else(|| {
self.orderless_identifiers
.iter()
.find(|(_, v)| v.decl == decl)
.map(|(_, v)| v)
})
.or_else(|| self.parent_ref().and_then(|p| p.find_symbol_by_decl(decl)))
}
fn find_any_symbol(&self, name: &str) -> Option<&SymbolRecord> {
self.ordered_identifiers
.iter()
.find(|r| r.name.as_str() == name)
.or_else(|| self.orderless_identifiers.get(name))
.or_else(|| self.parent_ref().and_then(|p| p.find_any_symbol(name)))
}
fn find_ordered_symbol(&self, name: &str) -> Option<&SymbolRecord> {
self.ordered_identifiers
.iter()
.find(|r| r.name.as_str() == name)
.or_else(|| self.parent_ref().and_then(|p| p.find_ordered_symbol(name)))
}
fn find_orderless_symbol(&self, name: &str) -> Option<&SymbolRecord> {
self.orderless_identifiers.get(name).or_else(|| {
self.parent_ref()
.and_then(|p| p.find_orderless_symbol(name))
})
}
fn extend_orderless<I>(&mut self, iter: I)
where
I: IntoIterator<Item = (String, SymbolRecord)>,
{
self.orderless_identifiers.extend(iter)
}
fn extract_orderless_if<F>(
&mut self,
pred: F,
) -> std::collections::hash_map::ExtractIf<String, SymbolRecord, F>
where
F: FnMut(&String, &mut SymbolRecord) -> bool,
{
self.orderless_identifiers.extract_if(pred)
}
}
#[derive(Debug)]
pub struct SymbolTableWrapper {
current: NonNull<InnerSymbolTable>,
}
impl Drop for SymbolTableWrapper {
fn drop(&mut self) {
unsafe {
_ = Box::from_raw(self.current.as_ref().root().as_ptr());
}
}
}
impl SymbolTableWrapper {
pub fn new() -> SymbolTableWrapper {
Self {
current: InnerSymbolTable::new(),
}
}
fn current(&self) -> &InnerSymbolTable {
unsafe { self.current.as_ref() }
}
fn current_mut(&mut self) -> &mut InnerSymbolTable {
unsafe { self.current.as_mut() }
}
#[allow(dead_code)]
fn root_ref(&self) -> &InnerSymbolTable {
unsafe { self.current().root().as_ref() }
}
fn root_mut(&mut self) -> &mut InnerSymbolTable {
unsafe { self.current_mut().root().as_mut() }
}
#[allow(dead_code)]
fn parent_ref(&self) -> Option<&InnerSymbolTable> {
self.current().parent_ref()
}
#[allow(dead_code)]
fn parent_mut(&mut self) -> Option<&mut InnerSymbolTable> {
self.current_mut().parent_mut()
}
pub fn into_child(&mut self, scope: AstNode) {
let child = if let Some(child) = self.current().children.get(&scope) {
*child
} else {
let child = self.current().make_child(scope);
self.current_mut().children.insert(scope, child);
child
};
self.current = child;
}
pub fn into_parent(&mut self) {
if let Some(parent) = self.current().parent() {
self.current = parent;
}
}
}
impl SymbolTableWrapper {
pub fn insert_symbol(&mut self, name: &str, node: AstNode, kind: SymbolKind) -> &SymbolRecord {
self.current_mut().insert_symbol(name, node, kind)
}
pub fn find_root_symbol(&mut self, name: &str) -> Option<&SymbolRecord> {
self.root_mut().find_orderless_symbol(name)
}
pub fn insert_root_symbol(&mut self, name: &str, node: AstNode) -> &SymbolRecord {
self.root_mut().insert_orderless_symbol(name, node)
}
pub fn insert_orderless_symbol(&mut self, name: &str, node: AstNode) -> &SymbolRecord {
self.current_mut().insert_orderless_symbol(name, node)
}
pub fn find_symbol_or_insert_with<'a, F>(&'a mut self, name: &str, cb: F) -> &'a SymbolRecord
where
F: FnOnce() -> (AstNode, SymbolKind),
{
self.current_mut().find_symbol_or_insert_with(name, cb)
}
pub fn find_symbol_by_decl(&self, decl: AstNode) -> Option<&SymbolRecord> {
self.current().find_symbol_by_decl(decl)
}
pub fn find_any_symbol(&self, name: &str) -> Option<&SymbolRecord> {
self.current().find_any_symbol(name)
}
pub fn find_ordered_symbol(&self, name: &str) -> Option<&SymbolRecord> {
self.current().find_ordered_symbol(name)
}
pub fn find_orderless_symbol(&self, name: &str) -> Option<&SymbolRecord> {
self.current().find_orderless_symbol(name)
}
pub fn symbol_path(&self, decl: AstNode) -> Option<SymbolPath> {
let mut table = self.current();
loop {
if let Some(record) = table
.ordered_identifiers
.iter()
.find(|r| r.node() == decl)
.or_else(|| {
table
.orderless_identifiers
.iter()
.find(|(_, v)| v.decl == decl)
.map(|(_, v)| v)
})
{
let mut path = Vec::new();
while let Some(parent) = table.parent_ref() {
path.push(parent.scope);
}
return Some(SymbolPath(path, record.name.clone()));
};
let Some(parent) = table.parent_ref() else {
break;
};
table = parent;
}
None
}
pub fn extend_orderless<I>(&mut self, iter: I)
where
I: IntoIterator<Item = (String, SymbolRecord)>,
{
self.current_mut().extend_orderless(iter)
}
pub fn extract_orderless_if<F>(
&mut self,
pred: F,
) -> std::collections::hash_map::ExtractIf<String, SymbolRecord, F>
where
F: FnMut(&String, &mut SymbolRecord) -> bool,
{
self.current_mut().extract_orderless_if(pred)
}
}
pub type SymbolTable = SymbolTableWrapper;
pub mod syms2 {
/*!
Coming from the ast, we have a `DeclRef` with an interned identifier `ident`
and want to find the symbol it refers to.
To help, we have a struct keeping track of all accessible scopes. Now,
we want to look through any accessible scope `s` for a symbol with the
name `ident`.
Thus: `Symbol {scope: `s`, name: `ident`, ..}`.
We might also know the type of the symbol we are looking for, if we want to
permit fields/variables and methods/functions sharing names.
Since I want to allow variable shadowing for local variables, some strategy to differentiate between shadowed variables must be employed:
- keys of type SymbolKind::Local might point to a list of values with source locations
- keys might contain source locations.
Any symbol pointed at from within the ast must again point at an ast
object.
Thus: `Key` -> `AstIndex`
Exception: `Key::ScopeByIndex` -> `InternIndex`
*/
use std::collections::BTreeMap;
use std::fmt::Debug;
use crate::ast2::intern::Index as InternIndex;
use crate::ast2::Index as AstIndex;
use crate::lexer::SourceLocation;
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
pub enum Key {
ScopeByName {
name: InternIndex,
},
/// not all scopes have a name, as some are anonymous blocks or otherwise nameless
ScopeByIndex {
ast: AstIndex,
},
Symbol {
scope: AstIndex,
name: InternIndex,
kind: SymbolKind,
},
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
pub enum SymbolKind {
__First,
Const,
Function,
Type,
__TypeScope,
Scope,
ParentScope,
Local(SourceLocation),
__Last,
}
#[derive(Clone, Copy)]
pub union Payload {
ast_index: AstIndex,
intern_index: InternIndex,
}
#[derive(Debug)]
#[allow(dead_code)]
enum ExpandedPayload {
Ast(AstIndex),
Intern(InternIndex),
}
impl Payload {
fn new_ast(ast: AstIndex) -> Payload {
Self { ast_index: ast }
}
fn new_intern(intern: InternIndex) -> Payload {
Self {
intern_index: intern,
}
}
fn as_ast(&self) -> AstIndex {
unsafe { self.ast_index }
}
fn as_intern(&self) -> InternIndex {
unsafe { self.intern_index }
}
}
pub struct Symbols {
inner: BTreeMap<Key, Payload>,
}
impl Debug for Symbols {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "Symbols [")?;
if f.alternate() {
writeln!(f, "")?;
}
let entries = self.inner.iter().map(|(key, val)| {
let payload = match key {
Key::ScopeByIndex { .. } => ExpandedPayload::Intern(val.as_intern()),
_ => ExpandedPayload::Ast(val.as_ast()),
};
(*key, payload)
});
f.debug_list().entries(entries).finish()?;
write!(f, "]")?;
if f.alternate() {
writeln!(f, "")?;
}
Ok(())
}
}
// checks for each scope in scopes_in_tree Symbol { scope, kind: SymbolKind::Local, 0}..Symbol { scope, kind: SymbolKind::Scope, u32::MAX}
struct SymbolTreePos {
scopes_in_scope: Vec<AstIndex>,
}
impl Symbols {
pub fn new() -> Symbols {
Self {
inner: BTreeMap::new(),
}
}
pub fn insert_scope(&mut self, name: InternIndex, ast: AstIndex) {
self.inner
.insert(Key::ScopeByIndex { ast }, Payload::new_intern(name));
self.inner
.insert(Key::ScopeByName { name }, Payload::new_ast(ast));
}
pub fn find_symbol(
&self,
scope: AstIndex,
name: InternIndex,
loc: SourceLocation,
) -> Option<AstIndex> {
use SymbolKind::*;
let range = self.inner.range(
Key::Symbol {
scope,
name,
kind: __First,
}..=Key::Symbol {
scope,
name,
kind: Local(loc),
},
);
if let Some((_, payload)) = range.rev().next() {
Some(payload.as_ast())
} else {
if let Some(parent) = self.inner.get(&Key::Symbol {
scope,
name: InternIndex::invalid(),
kind: ParentScope,
}) {
self.find_symbol(parent.as_ast(), name, loc)
} else {
None
}
}
}
pub fn find_type_symbol(
&self,
scope: AstIndex,
name: InternIndex,
loc: SourceLocation,
) -> Option<AstIndex> {
use SymbolKind::*;
let range = self.inner.range(
Key::Symbol {
scope,
name,
kind: __First,
}..=Key::Symbol {
scope,
name,
kind: __TypeScope,
},
);
if let Some((_, payload)) = range.rev().next() {
Some(payload.as_ast())
} else {
if let Some(parent) = self.inner.get(&Key::Symbol {
scope,
name: InternIndex::invalid(),
kind: ParentScope,
}) {
self.find_symbol(parent.as_ast(), name, loc)
} else {
None
}
}
}
pub fn insert_symbol(
&mut self,
scope: AstIndex,
name: InternIndex,
kind: SymbolKind,
ast: AstIndex,
) {
self.inner
.insert(Key::Symbol { scope, name, kind }, Payload::new_ast(ast));
}
}
}