Compare commits

...

6 commits

Author SHA1 Message Date
janis 51ef019fd1
var decls 2025-10-14 15:10:03 +02:00
janis 0ee6bbad61
port symbol table from old crate 2025-10-14 13:52:39 +02:00
janis 0468f1fab3
remove eprintlns from lexer 2025-10-14 13:22:54 +02:00
janis a2632ca06e
function parsing and stuff.. 2025-10-13 23:37:06 +02:00
janis c7d5e4e6c0
parsing with indirect recursion 2025-10-13 23:08:15 +02:00
janis 2771593605
pretty printing ast, parsing functions 2025-10-13 22:02:22 +02:00
5 changed files with 1160 additions and 242 deletions

View file

@ -380,21 +380,14 @@ pub(crate) fn parse_comment<'a>(source: &'a mut Source) -> Result<bool> {
}
let doc = source.next_if_eq(&'/').is_some();
eprintln!("doc comment: {doc}");
loop {
// take until new line
source
.take_while_inclusive(|&c| c != '\n')
.inspect(|c| eprintln!("skipping comment char: {c}"))
.for_each(drop);
source.take_while_inclusive(|&c| c != '\n').for_each(drop);
let mut copy = source.clone();
// skip whitespaces after new line to find continuation of comment
(&mut copy)
.take_while_ref(|&c| {
eprintln!("Skipping whitespace: {c}");
is_things::is_whitespace(c) && c != '\n'
})
.take_while_ref(|&c| is_things::is_whitespace(c) && c != '\n')
.for_each(drop);
if (copy.next() == Some('/')) && (copy.next() == Some('/')) {

View file

@ -496,8 +496,6 @@ impl<'a> TokenIterator<'a> {
Some('/') if self.follows("//") => {
let doc = complex_tokens::parse_comment(&mut source).ok()?;
self.offset += source.offset();
eprintln!("next: {:?}", source.next());
eprintln!("rest: {:?}", &self.source[self.offset..]);
let lexeme = &self.source[start..self.offset];
if doc {

File diff suppressed because it is too large Load diff

398
crates/parser/src/pretty.rs Normal file
View file

@ -0,0 +1,398 @@
use crate::{Ast, AstNode, Index};
pub struct PrettyPrint {
lines: Vec<String>,
indents: Vec<Indent>,
}
const VERTICAL: &str = "";
const HORIZONTAL: &str = "";
const CONNECTOR: &str = "";
const END_CONNECTOR: &str = "";
const EMPTY: &str = " ";
#[derive(PartialEq, Eq, Clone, Copy, Debug)]
enum Indent {
Vertical,
End,
Empty,
}
impl PrettyPrint {
pub fn new() -> Self {
Self {
lines: Vec::new(),
indents: Vec::new(),
}
}
fn push_line(&mut self, line: String) {
let (last, rest) = self
.indents
.split_last_mut()
.map_or::<(Option<&mut Indent>, &mut [Indent]), _>((None, &mut []), |(last, rest)| {
(Some(last), rest)
});
let rest = rest.iter_mut().map(|indent| match *indent {
Indent::Vertical => VERTICAL,
Indent::End => {
*indent = Indent::Empty;
END_CONNECTOR
}
Indent::Empty => EMPTY,
});
let last = if let Some(last) = last {
match last {
Indent::Vertical => CONNECTOR,
Indent::End => {
*last = Indent::Empty;
END_CONNECTOR
}
Indent::Empty => CONNECTOR,
}
} else {
""
};
self.lines.push(
rest.chain(std::iter::once(last))
.chain(std::iter::once(line.as_str()))
.collect::<String>(),
);
}
pub fn print(mut self, ast: &Ast) {
let root = ast.nodes.len().checked_sub(1).unwrap();
self.stuff(ast, Index(root as u32));
for line in self.lines {
println!("{}", line);
}
}
fn with_indent<I>(
&mut self,
mut items: impl DoubleEndedIterator<Item = I>,
mut f: impl FnMut(&mut Self, I),
) {
if let Some(last) = (&mut items).next_back() {
self.indents.push(Indent::Vertical);
for item in items {
f(self, item);
}
*self.indents.last_mut().unwrap() = Indent::End;
f(self, last);
self.indents.pop();
}
}
fn stuff(&mut self, ast: &Ast, node: Index) {
let mut node = ast.nodes.get(node.0 as usize).unwrap();
match node {
AstNode::Root { files } => {
self.push_line(format!(
"{} {{num_files: {}}}",
node_name(node),
files.len()
));
self.with_indent(files.iter(), |this, idx| {
this.stuff(ast, *idx);
});
}
AstNode::File { decls } => {
self.push_line(format!(
"{} {{num_decls: {}}}",
node_name(node),
decls.len()
));
self.with_indent(decls.iter(), |this, idx| {
this.stuff(ast, *idx);
});
}
AstNode::ParameterList(parameters) => {
self.push_line(format!(
"{}[{}]",
node_name(node),
parameters.parameters.len()
));
self.with_indent(parameters.parameters.iter(), |this, idx| {
this.stuff(ast, *idx);
});
}
AstNode::Parameter(parameter) => {
self.push_line(format!(
"{} [{}: {}]",
node_name(node),
parameter.name,
parameter.param_type
));
}
AstNode::FunctionDecl(function_decl) => {
self.push_line(format!("{} {}", node_name(node), function_decl.name,));
self.indents.push(Indent::Vertical);
self.push_line(format!("VISIBILITY: {:?}", function_decl.visibility));
self.stuff(ast, function_decl.parameter_list);
self.push_line(format!("RETURN_TYPE: {}", function_decl.return_type));
*self.indents.last_mut().unwrap() = Indent::End;
self.stuff(ast, function_decl.body);
self.indents.pop();
}
AstNode::Block { statements, expr } => {
self.push_line(format!("{}", node_name(node),));
self.with_indent(statements.iter().chain(expr), |this, idx| {
this.stuff(ast, *idx);
});
}
AstNode::Constant { ty, value } => {
self.push_line(format!("{} [{} := {:?}]", node_name(node), ty, value));
}
AstNode::NoopExpr => {
self.push_line(format!("{}", node_name(node),));
}
AstNode::Stmt { expr } => {
self.push_line(format!("{}", node_name(node),));
self.indents.push(Indent::End);
self.stuff(ast, *expr);
self.indents.pop();
}
AstNode::ControlFlow { kind, expr } => todo!(),
AstNode::VarDecl {
mutable,
name,
var_type,
} => {
self.push_line(format!(
"{} [{} {}: {}]",
node_name(node),
name,
mutable.then_some(" mut").unwrap_or(""),
var_type.map_or("?".to_string(), |ty| format!("{ty}"))
));
}
AstNode::Assignment { dest, expr } => {
self.push_line(format!("{}", node_name(node),));
self.indents.push(Indent::Vertical);
self.push_line("DEST".to_string());
self.with_indent(core::iter::once(*dest), |this, idx| {
this.stuff(ast, idx);
});
*self.indents.last_mut().unwrap() = Indent::End;
self.push_line("EXPR".to_string());
self.with_indent(core::iter::once(*expr), |this, idx| {
this.stuff(ast, idx);
});
self.indents.pop();
}
AstNode::GlobalDecl {
name,
var_type,
expr,
} => {
self.push_line(format!("{} [{}: {}]", node_name(node), name, var_type));
self.with_indent(core::iter::once(*expr), |this, idx| {
this.stuff(ast, idx);
});
}
AstNode::StructDecl { name, fields } => todo!(),
AstNode::FieldDecl { name, field_type } => todo!(),
AstNode::FieldAccess { expr, field } => todo!(),
AstNode::UnresolvedDeclRef { name } => {
self.push_line(format!("{} \"{}\"", node_name(node), name,));
}
AstNode::DeclRef { decl } => {
self.push_line(format!("{} @{}", node_name(node), decl.0,));
}
AstNode::TypeDeclRef { ty } => {
self.push_line(format!("{} @{:?}", node_name(node), ty,));
}
AstNode::ExplicitCast { expr, ty } => {
self.push_line(format!("{} {}", node_name(node), ty,));
self.indents.push(Indent::End);
self.stuff(ast, *expr);
self.indents.pop();
}
AstNode::Not(expr)
| AstNode::Negate(expr)
| AstNode::Deref { expr }
| AstNode::AddressOf { expr }
| AstNode::ValueToPlace { expr }
| AstNode::PlaceToValue { expr } => {
self.push_line(format!("{}", node_name(node),));
self.indents.push(Indent::End);
self.stuff(ast, *expr);
self.indents.pop();
}
AstNode::CallExpr { callee, arguments } => {
self.push_line(format!(
"{} {{num_args: {}}}",
node_name(node),
arguments.len()
));
self.indents.push(Indent::Vertical);
self.stuff(ast, *callee);
*self.indents.last_mut().unwrap() = Indent::End;
self.with_indent(arguments.iter(), |this, arg| {
this.stuff(ast, *arg);
});
self.indents.pop();
}
AstNode::Argument { expr } => {
self.stuff(ast, *expr);
}
AstNode::Multiply { left, right }
| AstNode::Divide { left, right }
| AstNode::Modulus { left, right }
| AstNode::Add { left, right }
| AstNode::Subtract { left, right }
| AstNode::BitOr { left, right }
| AstNode::BitAnd { left, right }
| AstNode::BitXor { left, right }
| AstNode::LogicalOr { left, right }
| AstNode::LogicalAnd { left, right }
| AstNode::Eq { left, right }
| AstNode::NotEq { left, right }
| AstNode::Less { left, right }
| AstNode::LessEq { left, right }
| AstNode::Greater { left, right }
| AstNode::GreaterEq { left, right }
| AstNode::ShiftLeft { left, right }
| AstNode::ShiftRight { left, right } => {
self.push_line(format!("{}", node_name(node),));
self.with_indent([*left, *right].into_iter(), |this, idx| {
this.stuff(ast, idx);
});
}
AstNode::Subscript { expr, index } => {
self.push_line(format!("{}", node_name(node),));
self.indents.push(Indent::Vertical);
self.stuff(ast, *expr);
*self.indents.last_mut().unwrap() = Indent::End;
self.push_line("INDEX".to_string());
self.with_indent([*index].into_iter(), |this, idx| {
this.stuff(ast, idx);
});
self.indents.pop();
}
AstNode::If {
condition,
then,
r#else,
} => {
self.push_line(format!("{}", node_name(node),));
self.indents.push(Indent::Vertical);
self.push_line("COND".to_string());
self.with_indent([*condition].into_iter(), |this, idx| {
this.stuff(ast, idx);
});
self.push_line("THEN".to_string());
if let Some(r#else) = r#else {
self.with_indent(core::iter::once(*then), |this, idx| {
this.stuff(ast, idx);
});
*self.indents.last_mut().unwrap() = Indent::End;
self.push_line("ELSE".to_string());
self.with_indent(core::iter::once(*r#else), |this, idx| {
this.stuff(ast, idx);
});
} else {
*self.indents.last_mut().unwrap() = Indent::End;
self.with_indent(core::iter::once(*then), |this, idx| {
this.stuff(ast, idx);
});
}
self.indents.pop();
}
AstNode::Else { expr } => {
self.push_line(format!("{}", node_name(node),));
self.indents.push(Indent::End);
self.stuff(ast, *expr);
self.indents.pop();
}
AstNode::Comment { text } => {
self.push_line(format!("{} \"{}\"", node_name(node), text,));
}
AstNode::Attributes { attrs } => {
self.push_line(format!(
"{} {{num_attrs: {}}}",
node_name(node),
attrs.len()
));
self.with_indent(attrs.iter(), |this, &attr| {
this.stuff(ast, attr);
});
}
AstNode::Doc { text } => {
self.push_line(format!("{} \"{}\"", node_name(node), text,));
}
AstNode::Error { err } => {
self.push_line(format!("{} \"{}\"", node_name(node), err,));
}
}
}
}
fn node_name(node: &AstNode) -> &'static str {
match node {
AstNode::Root { .. } => "ROOT",
AstNode::File { .. } => "FILE",
AstNode::ParameterList(_) => "PARAMS",
AstNode::Parameter(_) => "PARAM",
AstNode::FunctionDecl(_) => "FN",
AstNode::Block { .. } => "BLOCK",
AstNode::Constant { .. } => "CONSTANT",
AstNode::NoopExpr => "NOOP",
AstNode::Stmt { .. } => "STMT",
AstNode::ControlFlow { .. } => "CONTROL_FLOW",
AstNode::VarDecl { .. } => "VAR_DECL",
AstNode::Assignment { .. } => "ASSIGN",
AstNode::GlobalDecl { .. } => "GLOBAL_DECL",
AstNode::StructDecl { .. } => "STRUCT",
AstNode::FieldDecl { .. } => "FIELD",
AstNode::FieldAccess { .. } => "FIELD_ACCESS",
AstNode::UnresolvedDeclRef { .. } => "UNRESOLVED_DECL_REF",
AstNode::DeclRef { .. } => "DECL_REF",
AstNode::TypeDeclRef { .. } => "TYPE_REF",
AstNode::ExplicitCast { .. } => "AS",
AstNode::Deref { .. } => "DEREF",
AstNode::AddressOf { .. } => "ADDR_OF",
AstNode::PlaceToValue { .. } => "INTO_VALUE",
AstNode::ValueToPlace { .. } => "INTO_PLACE",
AstNode::CallExpr { .. } => "CALL",
AstNode::Argument { .. } => "ARG",
AstNode::Not(_) => "NOT",
AstNode::Negate(_) => "NEGATE",
AstNode::Multiply { .. } => "MUL",
AstNode::Divide { .. } => "DIV",
AstNode::Modulus { .. } => "REM",
AstNode::Add { .. } => "ADD",
AstNode::Subtract { .. } => "SUB",
AstNode::BitOr { .. } => "BIT_OR",
AstNode::BitAnd { .. } => "BIT_AND",
AstNode::BitXor { .. } => "BIT_XOR",
AstNode::LogicalOr { .. } => "OR",
AstNode::LogicalAnd { .. } => "AND",
AstNode::Eq { .. } => "EQ",
AstNode::NotEq { .. } => "NEQ",
AstNode::Less { .. } => "LT",
AstNode::LessEq { .. } => "LEQ",
AstNode::Greater { .. } => "GT",
AstNode::GreaterEq { .. } => "GEQ",
AstNode::ShiftLeft { .. } => "SHL",
AstNode::ShiftRight { .. } => "SHR",
AstNode::Subscript { .. } => "SUBSCRIPT",
AstNode::If { .. } => "IF",
AstNode::Else { .. } => "ELSE",
AstNode::Comment { .. } => "COMMENT",
AstNode::Attributes { .. } => "META",
AstNode::Doc { .. } => "DOCS",
AstNode::Error { .. } => "ERR",
}
}

View file

@ -0,0 +1,178 @@
//! Coming from the ast, we have a `DeclRef` with an interned identifier `ident`
//! and want to find the symbol it refers to.
//!
//! To help, we have a struct keeping track of all accessible scopes. Now, we
//! want to look through any accessible scope `s` for a symbol with the name
//! `ident`. Thus: `Symbol {scope: `s`, name: `ident`, ..}`.
//!
//! We might also know the type of the symbol we are looking for, if we want
//! to permit fields/variables and methods/functions sharing names.
//!
//! Since I want to allow variable shadowing for local variables, some strategy
//! to differentiate between shadowed variables must be employed:
//! - keys of type SymbolKind::Local might point to a list of values with source locations
//! - keys might contain source locations.
//!
//! Any symbol pointed at from within the ast must again point at an ast
//! object.
//! Thus: `Key` -> `AstIndex`
//! Exception: `Key::ScopeByIndex` -> `InternIndex`
use core::fmt::Debug;
use std::collections::BTreeMap;
use internment::Intern;
use crate::Index;
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Span(u32);
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Spanned<T>(pub T, Span);
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
pub enum SymbolKind {
__First,
Const,
Function,
Type,
__TypeScope,
Scope,
ParentScope,
Parameter(Span),
Local(Span),
__Last,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
pub enum Key {
ScopeByName {
name: Intern<str>,
},
/// not all scopes have a name, as some are anonymous blocks or otherwise nameless
ScopeByIndex {
ast: Index,
},
Symbol {
scope: Index,
name: Intern<str>,
kind: SymbolKind,
},
}
impl Key {
pub fn parent(scope: Index) -> Key {
Key::Symbol {
scope,
name: Intern::from(""),
kind: SymbolKind::ParentScope,
}
}
}
#[derive(Clone, Copy, Debug)]
pub enum Payload {
Ast(Index),
Name(Intern<str>),
}
pub struct Symbols {
inner: BTreeMap<Key, Payload>,
}
impl Debug for Symbols {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "Symbols [")?;
if f.alternate() {
writeln!(f, "")?;
}
let entries = self.inner.iter();
f.debug_list().entries(entries).finish()?;
write!(f, "]")?;
if f.alternate() {
writeln!(f, "")?;
}
Ok(())
}
}
// checks for each scope in scopes_in_tree Symbol { scope, kind: SymbolKind::Local, 0}..Symbol { scope, kind: SymbolKind::Scope, u32::MAX}
struct SymbolTreePos {
scopes_in_scope: Vec<Index>,
}
impl Symbols {
pub fn new() -> Symbols {
Self {
inner: BTreeMap::new(),
}
}
pub fn insert_scope(&mut self, name: Intern<str>, ast: Index) {
self.inner
.insert(Key::ScopeByIndex { ast }, Payload::Name(name));
self.inner
.insert(Key::ScopeByName { name }, Payload::Ast(ast));
}
pub fn find_symbol(&self, scope: Index, name: Intern<str>, loc: Span) -> Option<(Key, Index)> {
use SymbolKind::*;
let range = self.inner.range(
Key::Symbol {
scope,
name,
kind: __First,
}..=Key::Symbol {
scope,
name,
kind: Local(loc),
},
);
if let Some((&key, &Payload::Ast(index))) = range.rev().next() {
Some((key, index))
} else {
if let Some(&Payload::Ast(parent)) = self.inner.get(&Key::parent(scope)) {
self.find_symbol(parent, name, loc)
} else {
None
}
}
}
pub fn find_type_symbol(
&self,
scope: Index,
name: Intern<str>,
loc: Span,
) -> Option<(Key, Index)> {
use SymbolKind::*;
let range = self.inner.range(
Key::Symbol {
scope,
name,
kind: __First,
}..=Key::Symbol {
scope,
name,
kind: __TypeScope,
},
);
if let Some((&key, &Payload::Ast(index))) = range.rev().next() {
Some((key, index))
} else {
if let Some(&Payload::Ast(parent)) = self.inner.get(&Key::parent(scope)) {
self.find_type_symbol(parent, name, loc)
} else {
None
}
}
}
pub fn insert_symbol(&mut self, scope: Index, name: Intern<str>, kind: SymbolKind, ast: Index) {
self.inner
.insert(Key::Symbol { scope, name, kind }, Payload::Ast(ast));
}
}