cooooool stuff for the new ast generator and intern pool and symbol table

basically, btrees are awesome.
This commit is contained in:
Janis 2024-09-13 19:07:42 +02:00
parent 6f3a7dc56c
commit 5c38969ef8
5 changed files with 803 additions and 73 deletions

File diff suppressed because it is too large Load diff

View file

@ -18,6 +18,7 @@ fn main() {
) )
.subcommands([ .subcommands([
Command::new("ast").about("output AST."), Command::new("ast").about("output AST."),
Command::new("ast2").about("output AST."),
Command::new("mir").about("output machine-level intermediate representation."), Command::new("mir").about("output machine-level intermediate representation."),
Command::new("ir").about("output intermediate representation."), Command::new("ir").about("output intermediate representation."),
Command::new("asm").about("output x86-64 assembly (intel syntax)."), Command::new("asm").about("output x86-64 assembly (intel syntax)."),
@ -47,6 +48,12 @@ fn main() {
tree.render(&mut buf).unwrap(); tree.render(&mut buf).unwrap();
println!("AST:\n{buf}"); println!("AST:\n{buf}");
} }
"ast2" => {
let mut tree2 = compiler::ast2::ast_gen::Parser::new();
tree2.parse(tokens.iter());
eprintln!("{tree2:#?}");
println!("AST (new):\n{tree2}");
}
"ir" => { "ir" => {
let mut ir = IR::new(); let mut ir = IR::new();
let builder = ir.build(&mut tree); let builder = ir.build(&mut tree);

View file

@ -1,3 +1,4 @@
use std::fmt::Debug;
use std::fmt::Display; use std::fmt::Display;
use crate::tokens::Token; use crate::tokens::Token;
@ -363,6 +364,8 @@ impl<'a> TokenIterator<'a> {
}; };
if let Some(pos) = tokens.iter().position(|&t| t == next.token()) { if let Some(pos) = tokens.iter().position(|&t| t == next.token()) {
occurences[pos] += 1; occurences[pos] += 1;
} else {
break;
} }
} }
@ -416,12 +419,20 @@ impl<'a> TokenIterator<'a> {
} }
} }
#[derive(Debug)]
pub struct TokenItem<'a> { pub struct TokenItem<'a> {
tokenizer: &'a Tokenizer<'a>, tokenizer: &'a Tokenizer<'a>,
inner: TokenPos, inner: TokenPos,
} }
impl<'a> Debug for TokenItem<'a> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("TokenItem")
.field("lexeme", &self.lexeme())
.field("inner", &self.inner)
.finish_non_exhaustive()
}
}
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone, Copy, Hash)] #[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone, Copy, Hash)]
pub struct SourceLocation { pub struct SourceLocation {
pub line: u32, pub line: u32,
@ -430,7 +441,7 @@ pub struct SourceLocation {
impl Display for SourceLocation { impl Display for SourceLocation {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "l:{},c:{}", self.line, self.column) write!(f, "{}:{}", self.line, self.column)
} }
} }

View file

@ -7,7 +7,8 @@
map_try_insert, map_try_insert,
iter_intersperse, iter_intersperse,
iter_array_chunks, iter_array_chunks,
int_roundings int_roundings,
debug_closure_helpers
)] )]
#![allow(unused_macros)] #![allow(unused_macros)]

View file

@ -376,3 +376,186 @@ impl SymbolTableWrapper {
} }
pub type SymbolTable = SymbolTableWrapper; pub type SymbolTable = SymbolTableWrapper;
pub mod syms2 {
/*!
Coming from the ast, we have a `DeclRef` with an interned identifier `ident`
and want to find the symbol it refers to.
To help, we have a struct keeping track of all accessible scopes. Now,
we want to look through any accessible scope `s` for a symbol with the
name `ident`.
Thus: `Symbol {scope: `s`, name: `ident`, ..}`.
We might also know the type of the symbol we are looking for, if we want to
permit fields/variables and methods/functions sharing names.
Since I want to allow variable shadowing for local variables, some strategy to differentiate between shadowed variables must be employed:
- keys of type SymbolKind::Local might point to a list of values with source locations
- keys might contain source locations.
Any symbol pointed at from within the ast must again point at an ast
object.
Thus: `Key` -> `AstIndex`
Exception: `Key::ScopeByIndex` -> `InternIndex`
*/
use std::collections::BTreeMap;
use std::fmt::Debug;
use crate::ast2::intern::Index as InternIndex;
use crate::ast2::Index as AstIndex;
use crate::lexer::SourceLocation;
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
pub enum Key {
ScopeByName {
name: InternIndex,
},
/// not all scopes have a name, as some are anonymous blocks or otherwise nameless
ScopeByIndex {
ast: AstIndex,
},
Symbol {
scope: AstIndex,
name: InternIndex,
kind: SymbolKind,
},
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
pub enum SymbolKind {
__First,
Const,
Function,
Type,
Scope,
ParentScope,
Local(SourceLocation),
__Last,
}
#[derive(Clone, Copy)]
pub union Payload {
ast_index: AstIndex,
intern_index: InternIndex,
}
#[derive(Debug)]
#[allow(dead_code)]
enum ExpandedPayload {
Ast(AstIndex),
Intern(InternIndex),
}
impl Payload {
fn new_ast(ast: AstIndex) -> Payload {
Self { ast_index: ast }
}
fn new_intern(intern: InternIndex) -> Payload {
Self {
intern_index: intern,
}
}
fn as_ast(&self) -> AstIndex {
unsafe { self.ast_index }
}
fn as_intern(&self) -> InternIndex {
unsafe { self.intern_index }
}
}
pub struct Symbols {
inner: BTreeMap<Key, Payload>,
}
impl Debug for Symbols {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "Symbols [")?;
if f.alternate() {
writeln!(f, "")?;
}
let entries = self.inner.iter().map(|(key, val)| {
let payload = match key {
Key::ScopeByIndex { .. } => ExpandedPayload::Intern(val.as_intern()),
_ => ExpandedPayload::Ast(val.as_ast()),
};
(*key, payload)
});
f.debug_list().entries(entries).finish()?;
write!(f, "]")?;
if f.alternate() {
writeln!(f, "")?;
}
Ok(())
}
}
// checks for each scope in scopes_in_tree Symbol { scope, kind: SymbolKind::Local, 0}..Symbol { scope, kind: SymbolKind::Scope, u32::MAX}
struct SymbolTreePos {
scopes_in_scope: Vec<AstIndex>,
}
impl Symbols {
pub fn new() -> Symbols {
Self {
inner: BTreeMap::new(),
}
}
pub fn insert_scope(&mut self, name: InternIndex, ast: AstIndex) {
self.inner
.insert(Key::ScopeByIndex { ast }, Payload::new_intern(name));
self.inner
.insert(Key::ScopeByName { name }, Payload::new_ast(ast));
}
pub fn find_symbol(
&self,
scope: AstIndex,
name: InternIndex,
loc: SourceLocation,
) -> Option<AstIndex> {
use SymbolKind::*;
let range = self.inner.range(
Key::Symbol {
scope,
name,
kind: __First,
}..=Key::Symbol {
scope,
name,
kind: Local(loc),
},
);
if let Some((_, payload)) = range.rev().next() {
Some(payload.as_ast())
} else {
if let Some(parent) = self.inner.get(&Key::Symbol {
scope,
name: InternIndex::invalid(),
kind: ParentScope,
}) {
self.find_symbol(parent.as_ast(), name, loc)
} else {
None
}
}
}
pub fn insert_symbol(
&mut self,
scope: AstIndex,
name: InternIndex,
kind: SymbolKind,
ast: AstIndex,
) {
self.inner
.insert(Key::Symbol { scope, name, kind }, Payload::new_ast(ast));
}
}
}