cooooool stuff for the new ast generator and intern pool and symbol table
basically, btrees are awesome.
This commit is contained in:
parent
6f3a7dc56c
commit
5c38969ef8
668
src/ast2/mod.rs
668
src/ast2/mod.rs
File diff suppressed because it is too large
Load diff
|
@ -18,6 +18,7 @@ fn main() {
|
|||
)
|
||||
.subcommands([
|
||||
Command::new("ast").about("output AST."),
|
||||
Command::new("ast2").about("output AST."),
|
||||
Command::new("mir").about("output machine-level intermediate representation."),
|
||||
Command::new("ir").about("output intermediate representation."),
|
||||
Command::new("asm").about("output x86-64 assembly (intel syntax)."),
|
||||
|
@ -47,6 +48,12 @@ fn main() {
|
|||
tree.render(&mut buf).unwrap();
|
||||
println!("AST:\n{buf}");
|
||||
}
|
||||
"ast2" => {
|
||||
let mut tree2 = compiler::ast2::ast_gen::Parser::new();
|
||||
tree2.parse(tokens.iter());
|
||||
eprintln!("{tree2:#?}");
|
||||
println!("AST (new):\n{tree2}");
|
||||
}
|
||||
"ir" => {
|
||||
let mut ir = IR::new();
|
||||
let builder = ir.build(&mut tree);
|
||||
|
|
15
src/lexer.rs
15
src/lexer.rs
|
@ -1,3 +1,4 @@
|
|||
use std::fmt::Debug;
|
||||
use std::fmt::Display;
|
||||
|
||||
use crate::tokens::Token;
|
||||
|
@ -363,6 +364,8 @@ impl<'a> TokenIterator<'a> {
|
|||
};
|
||||
if let Some(pos) = tokens.iter().position(|&t| t == next.token()) {
|
||||
occurences[pos] += 1;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -416,12 +419,20 @@ impl<'a> TokenIterator<'a> {
|
|||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct TokenItem<'a> {
|
||||
tokenizer: &'a Tokenizer<'a>,
|
||||
inner: TokenPos,
|
||||
}
|
||||
|
||||
impl<'a> Debug for TokenItem<'a> {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
f.debug_struct("TokenItem")
|
||||
.field("lexeme", &self.lexeme())
|
||||
.field("inner", &self.inner)
|
||||
.finish_non_exhaustive()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone, Copy, Hash)]
|
||||
pub struct SourceLocation {
|
||||
pub line: u32,
|
||||
|
@ -430,7 +441,7 @@ pub struct SourceLocation {
|
|||
|
||||
impl Display for SourceLocation {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "l:{},c:{}", self.line, self.column)
|
||||
write!(f, "{}:{}", self.line, self.column)
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -7,7 +7,8 @@
|
|||
map_try_insert,
|
||||
iter_intersperse,
|
||||
iter_array_chunks,
|
||||
int_roundings
|
||||
int_roundings,
|
||||
debug_closure_helpers
|
||||
)]
|
||||
#![allow(unused_macros)]
|
||||
|
||||
|
|
|
@ -376,3 +376,186 @@ impl SymbolTableWrapper {
|
|||
}
|
||||
|
||||
pub type SymbolTable = SymbolTableWrapper;
|
||||
|
||||
pub mod syms2 {
|
||||
/*!
|
||||
Coming from the ast, we have a `DeclRef` with an interned identifier `ident`
|
||||
and want to find the symbol it refers to.
|
||||
|
||||
To help, we have a struct keeping track of all accessible scopes. Now,
|
||||
we want to look through any accessible scope `s` for a symbol with the
|
||||
|
||||
name `ident`.
|
||||
Thus: `Symbol {scope: `s`, name: `ident`, ..}`.
|
||||
|
||||
We might also know the type of the symbol we are looking for, if we want to
|
||||
permit fields/variables and methods/functions sharing names.
|
||||
|
||||
Since I want to allow variable shadowing for local variables, some strategy to differentiate between shadowed variables must be employed:
|
||||
- keys of type SymbolKind::Local might point to a list of values with source locations
|
||||
- keys might contain source locations.
|
||||
|
||||
Any symbol pointed at from within the ast must again point at an ast
|
||||
object.
|
||||
Thus: `Key` -> `AstIndex`
|
||||
Exception: `Key::ScopeByIndex` -> `InternIndex`
|
||||
*/
|
||||
|
||||
use std::collections::BTreeMap;
|
||||
use std::fmt::Debug;
|
||||
|
||||
use crate::ast2::intern::Index as InternIndex;
|
||||
use crate::ast2::Index as AstIndex;
|
||||
use crate::lexer::SourceLocation;
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
|
||||
pub enum Key {
|
||||
ScopeByName {
|
||||
name: InternIndex,
|
||||
},
|
||||
/// not all scopes have a name, as some are anonymous blocks or otherwise nameless
|
||||
ScopeByIndex {
|
||||
ast: AstIndex,
|
||||
},
|
||||
Symbol {
|
||||
scope: AstIndex,
|
||||
name: InternIndex,
|
||||
kind: SymbolKind,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
|
||||
pub enum SymbolKind {
|
||||
__First,
|
||||
Const,
|
||||
Function,
|
||||
Type,
|
||||
Scope,
|
||||
ParentScope,
|
||||
Local(SourceLocation),
|
||||
__Last,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy)]
|
||||
pub union Payload {
|
||||
ast_index: AstIndex,
|
||||
intern_index: InternIndex,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
#[allow(dead_code)]
|
||||
enum ExpandedPayload {
|
||||
Ast(AstIndex),
|
||||
Intern(InternIndex),
|
||||
}
|
||||
|
||||
impl Payload {
|
||||
fn new_ast(ast: AstIndex) -> Payload {
|
||||
Self { ast_index: ast }
|
||||
}
|
||||
fn new_intern(intern: InternIndex) -> Payload {
|
||||
Self {
|
||||
intern_index: intern,
|
||||
}
|
||||
}
|
||||
|
||||
fn as_ast(&self) -> AstIndex {
|
||||
unsafe { self.ast_index }
|
||||
}
|
||||
fn as_intern(&self) -> InternIndex {
|
||||
unsafe { self.intern_index }
|
||||
}
|
||||
}
|
||||
|
||||
pub struct Symbols {
|
||||
inner: BTreeMap<Key, Payload>,
|
||||
}
|
||||
|
||||
impl Debug for Symbols {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "Symbols [")?;
|
||||
if f.alternate() {
|
||||
writeln!(f, "")?;
|
||||
}
|
||||
let entries = self.inner.iter().map(|(key, val)| {
|
||||
let payload = match key {
|
||||
Key::ScopeByIndex { .. } => ExpandedPayload::Intern(val.as_intern()),
|
||||
_ => ExpandedPayload::Ast(val.as_ast()),
|
||||
};
|
||||
|
||||
(*key, payload)
|
||||
});
|
||||
f.debug_list().entries(entries).finish()?;
|
||||
write!(f, "]")?;
|
||||
if f.alternate() {
|
||||
writeln!(f, "")?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
// checks for each scope in scopes_in_tree Symbol { scope, kind: SymbolKind::Local, 0}..Symbol { scope, kind: SymbolKind::Scope, u32::MAX}
|
||||
struct SymbolTreePos {
|
||||
scopes_in_scope: Vec<AstIndex>,
|
||||
}
|
||||
|
||||
impl Symbols {
|
||||
pub fn new() -> Symbols {
|
||||
Self {
|
||||
inner: BTreeMap::new(),
|
||||
}
|
||||
}
|
||||
pub fn insert_scope(&mut self, name: InternIndex, ast: AstIndex) {
|
||||
self.inner
|
||||
.insert(Key::ScopeByIndex { ast }, Payload::new_intern(name));
|
||||
self.inner
|
||||
.insert(Key::ScopeByName { name }, Payload::new_ast(ast));
|
||||
}
|
||||
|
||||
pub fn find_symbol(
|
||||
&self,
|
||||
scope: AstIndex,
|
||||
name: InternIndex,
|
||||
loc: SourceLocation,
|
||||
) -> Option<AstIndex> {
|
||||
use SymbolKind::*;
|
||||
let range = self.inner.range(
|
||||
Key::Symbol {
|
||||
scope,
|
||||
name,
|
||||
kind: __First,
|
||||
}..=Key::Symbol {
|
||||
scope,
|
||||
name,
|
||||
kind: Local(loc),
|
||||
},
|
||||
);
|
||||
|
||||
if let Some((_, payload)) = range.rev().next() {
|
||||
Some(payload.as_ast())
|
||||
} else {
|
||||
if let Some(parent) = self.inner.get(&Key::Symbol {
|
||||
scope,
|
||||
name: InternIndex::invalid(),
|
||||
kind: ParentScope,
|
||||
}) {
|
||||
self.find_symbol(parent.as_ast(), name, loc)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn insert_symbol(
|
||||
&mut self,
|
||||
scope: AstIndex,
|
||||
name: InternIndex,
|
||||
kind: SymbolKind,
|
||||
ast: AstIndex,
|
||||
) {
|
||||
self.inner
|
||||
.insert(Key::Symbol { scope, name, kind }, Payload::new_ast(ast));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue