cooooool stuff for the new ast generator and intern pool and symbol table
basically, btrees are awesome.
This commit is contained in:
parent
6f3a7dc56c
commit
5c38969ef8
668
src/ast2/mod.rs
668
src/ast2/mod.rs
File diff suppressed because it is too large
Load diff
|
@ -18,6 +18,7 @@ fn main() {
|
||||||
)
|
)
|
||||||
.subcommands([
|
.subcommands([
|
||||||
Command::new("ast").about("output AST."),
|
Command::new("ast").about("output AST."),
|
||||||
|
Command::new("ast2").about("output AST."),
|
||||||
Command::new("mir").about("output machine-level intermediate representation."),
|
Command::new("mir").about("output machine-level intermediate representation."),
|
||||||
Command::new("ir").about("output intermediate representation."),
|
Command::new("ir").about("output intermediate representation."),
|
||||||
Command::new("asm").about("output x86-64 assembly (intel syntax)."),
|
Command::new("asm").about("output x86-64 assembly (intel syntax)."),
|
||||||
|
@ -47,6 +48,12 @@ fn main() {
|
||||||
tree.render(&mut buf).unwrap();
|
tree.render(&mut buf).unwrap();
|
||||||
println!("AST:\n{buf}");
|
println!("AST:\n{buf}");
|
||||||
}
|
}
|
||||||
|
"ast2" => {
|
||||||
|
let mut tree2 = compiler::ast2::ast_gen::Parser::new();
|
||||||
|
tree2.parse(tokens.iter());
|
||||||
|
eprintln!("{tree2:#?}");
|
||||||
|
println!("AST (new):\n{tree2}");
|
||||||
|
}
|
||||||
"ir" => {
|
"ir" => {
|
||||||
let mut ir = IR::new();
|
let mut ir = IR::new();
|
||||||
let builder = ir.build(&mut tree);
|
let builder = ir.build(&mut tree);
|
||||||
|
|
15
src/lexer.rs
15
src/lexer.rs
|
@ -1,3 +1,4 @@
|
||||||
|
use std::fmt::Debug;
|
||||||
use std::fmt::Display;
|
use std::fmt::Display;
|
||||||
|
|
||||||
use crate::tokens::Token;
|
use crate::tokens::Token;
|
||||||
|
@ -363,6 +364,8 @@ impl<'a> TokenIterator<'a> {
|
||||||
};
|
};
|
||||||
if let Some(pos) = tokens.iter().position(|&t| t == next.token()) {
|
if let Some(pos) = tokens.iter().position(|&t| t == next.token()) {
|
||||||
occurences[pos] += 1;
|
occurences[pos] += 1;
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -416,12 +419,20 @@ impl<'a> TokenIterator<'a> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug)]
|
|
||||||
pub struct TokenItem<'a> {
|
pub struct TokenItem<'a> {
|
||||||
tokenizer: &'a Tokenizer<'a>,
|
tokenizer: &'a Tokenizer<'a>,
|
||||||
inner: TokenPos,
|
inner: TokenPos,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl<'a> Debug for TokenItem<'a> {
|
||||||
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
|
f.debug_struct("TokenItem")
|
||||||
|
.field("lexeme", &self.lexeme())
|
||||||
|
.field("inner", &self.inner)
|
||||||
|
.finish_non_exhaustive()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone, Copy, Hash)]
|
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone, Copy, Hash)]
|
||||||
pub struct SourceLocation {
|
pub struct SourceLocation {
|
||||||
pub line: u32,
|
pub line: u32,
|
||||||
|
@ -430,7 +441,7 @@ pub struct SourceLocation {
|
||||||
|
|
||||||
impl Display for SourceLocation {
|
impl Display for SourceLocation {
|
||||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
write!(f, "l:{},c:{}", self.line, self.column)
|
write!(f, "{}:{}", self.line, self.column)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -7,7 +7,8 @@
|
||||||
map_try_insert,
|
map_try_insert,
|
||||||
iter_intersperse,
|
iter_intersperse,
|
||||||
iter_array_chunks,
|
iter_array_chunks,
|
||||||
int_roundings
|
int_roundings,
|
||||||
|
debug_closure_helpers
|
||||||
)]
|
)]
|
||||||
#![allow(unused_macros)]
|
#![allow(unused_macros)]
|
||||||
|
|
||||||
|
|
|
@ -376,3 +376,186 @@ impl SymbolTableWrapper {
|
||||||
}
|
}
|
||||||
|
|
||||||
pub type SymbolTable = SymbolTableWrapper;
|
pub type SymbolTable = SymbolTableWrapper;
|
||||||
|
|
||||||
|
pub mod syms2 {
|
||||||
|
/*!
|
||||||
|
Coming from the ast, we have a `DeclRef` with an interned identifier `ident`
|
||||||
|
and want to find the symbol it refers to.
|
||||||
|
|
||||||
|
To help, we have a struct keeping track of all accessible scopes. Now,
|
||||||
|
we want to look through any accessible scope `s` for a symbol with the
|
||||||
|
|
||||||
|
name `ident`.
|
||||||
|
Thus: `Symbol {scope: `s`, name: `ident`, ..}`.
|
||||||
|
|
||||||
|
We might also know the type of the symbol we are looking for, if we want to
|
||||||
|
permit fields/variables and methods/functions sharing names.
|
||||||
|
|
||||||
|
Since I want to allow variable shadowing for local variables, some strategy to differentiate between shadowed variables must be employed:
|
||||||
|
- keys of type SymbolKind::Local might point to a list of values with source locations
|
||||||
|
- keys might contain source locations.
|
||||||
|
|
||||||
|
Any symbol pointed at from within the ast must again point at an ast
|
||||||
|
object.
|
||||||
|
Thus: `Key` -> `AstIndex`
|
||||||
|
Exception: `Key::ScopeByIndex` -> `InternIndex`
|
||||||
|
*/
|
||||||
|
|
||||||
|
use std::collections::BTreeMap;
|
||||||
|
use std::fmt::Debug;
|
||||||
|
|
||||||
|
use crate::ast2::intern::Index as InternIndex;
|
||||||
|
use crate::ast2::Index as AstIndex;
|
||||||
|
use crate::lexer::SourceLocation;
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
|
||||||
|
pub enum Key {
|
||||||
|
ScopeByName {
|
||||||
|
name: InternIndex,
|
||||||
|
},
|
||||||
|
/// not all scopes have a name, as some are anonymous blocks or otherwise nameless
|
||||||
|
ScopeByIndex {
|
||||||
|
ast: AstIndex,
|
||||||
|
},
|
||||||
|
Symbol {
|
||||||
|
scope: AstIndex,
|
||||||
|
name: InternIndex,
|
||||||
|
kind: SymbolKind,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
|
||||||
|
pub enum SymbolKind {
|
||||||
|
__First,
|
||||||
|
Const,
|
||||||
|
Function,
|
||||||
|
Type,
|
||||||
|
Scope,
|
||||||
|
ParentScope,
|
||||||
|
Local(SourceLocation),
|
||||||
|
__Last,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Copy)]
|
||||||
|
pub union Payload {
|
||||||
|
ast_index: AstIndex,
|
||||||
|
intern_index: InternIndex,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
#[allow(dead_code)]
|
||||||
|
enum ExpandedPayload {
|
||||||
|
Ast(AstIndex),
|
||||||
|
Intern(InternIndex),
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Payload {
|
||||||
|
fn new_ast(ast: AstIndex) -> Payload {
|
||||||
|
Self { ast_index: ast }
|
||||||
|
}
|
||||||
|
fn new_intern(intern: InternIndex) -> Payload {
|
||||||
|
Self {
|
||||||
|
intern_index: intern,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn as_ast(&self) -> AstIndex {
|
||||||
|
unsafe { self.ast_index }
|
||||||
|
}
|
||||||
|
fn as_intern(&self) -> InternIndex {
|
||||||
|
unsafe { self.intern_index }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct Symbols {
|
||||||
|
inner: BTreeMap<Key, Payload>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Debug for Symbols {
|
||||||
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
|
write!(f, "Symbols [")?;
|
||||||
|
if f.alternate() {
|
||||||
|
writeln!(f, "")?;
|
||||||
|
}
|
||||||
|
let entries = self.inner.iter().map(|(key, val)| {
|
||||||
|
let payload = match key {
|
||||||
|
Key::ScopeByIndex { .. } => ExpandedPayload::Intern(val.as_intern()),
|
||||||
|
_ => ExpandedPayload::Ast(val.as_ast()),
|
||||||
|
};
|
||||||
|
|
||||||
|
(*key, payload)
|
||||||
|
});
|
||||||
|
f.debug_list().entries(entries).finish()?;
|
||||||
|
write!(f, "]")?;
|
||||||
|
if f.alternate() {
|
||||||
|
writeln!(f, "")?;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// checks for each scope in scopes_in_tree Symbol { scope, kind: SymbolKind::Local, 0}..Symbol { scope, kind: SymbolKind::Scope, u32::MAX}
|
||||||
|
struct SymbolTreePos {
|
||||||
|
scopes_in_scope: Vec<AstIndex>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Symbols {
|
||||||
|
pub fn new() -> Symbols {
|
||||||
|
Self {
|
||||||
|
inner: BTreeMap::new(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
pub fn insert_scope(&mut self, name: InternIndex, ast: AstIndex) {
|
||||||
|
self.inner
|
||||||
|
.insert(Key::ScopeByIndex { ast }, Payload::new_intern(name));
|
||||||
|
self.inner
|
||||||
|
.insert(Key::ScopeByName { name }, Payload::new_ast(ast));
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn find_symbol(
|
||||||
|
&self,
|
||||||
|
scope: AstIndex,
|
||||||
|
name: InternIndex,
|
||||||
|
loc: SourceLocation,
|
||||||
|
) -> Option<AstIndex> {
|
||||||
|
use SymbolKind::*;
|
||||||
|
let range = self.inner.range(
|
||||||
|
Key::Symbol {
|
||||||
|
scope,
|
||||||
|
name,
|
||||||
|
kind: __First,
|
||||||
|
}..=Key::Symbol {
|
||||||
|
scope,
|
||||||
|
name,
|
||||||
|
kind: Local(loc),
|
||||||
|
},
|
||||||
|
);
|
||||||
|
|
||||||
|
if let Some((_, payload)) = range.rev().next() {
|
||||||
|
Some(payload.as_ast())
|
||||||
|
} else {
|
||||||
|
if let Some(parent) = self.inner.get(&Key::Symbol {
|
||||||
|
scope,
|
||||||
|
name: InternIndex::invalid(),
|
||||||
|
kind: ParentScope,
|
||||||
|
}) {
|
||||||
|
self.find_symbol(parent.as_ast(), name, loc)
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn insert_symbol(
|
||||||
|
&mut self,
|
||||||
|
scope: AstIndex,
|
||||||
|
name: InternIndex,
|
||||||
|
kind: SymbolKind,
|
||||||
|
ast: AstIndex,
|
||||||
|
) {
|
||||||
|
self.inner
|
||||||
|
.insert(Key::Symbol { scope, name, kind }, Payload::new_ast(ast));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in a new issue