pretty printing ast, parsing functions

This commit is contained in:
janis 2025-10-13 22:02:22 +02:00
parent b0b87c68f2
commit 2771593605
Signed by: janis
SSH key fingerprint: SHA256:bB1qbbqmDXZNT0KKD5c2Dfjg53JGhj7B3CFcLIzSqq8
2 changed files with 603 additions and 29 deletions

View file

@ -1,4 +1,4 @@
use std::{hash::Hash, ops::Range, sync::Arc};
use std::{fmt::Display, hash::Hash, ops::Range, sync::Arc};
use chumsky::{
IterParser, Parser,
@ -63,6 +63,86 @@ pub enum InnerType {
type Type = internment::Intern<InnerType>;
impl Display for InnerType {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let name = match self {
InnerType::Top => "",
InnerType::Bottom => "",
InnerType::Unit => "()",
InnerType::Bool => "bool",
InnerType::AnyInt => "comptime_int",
InnerType::AnyUInt => "comptime_uint",
InnerType::Str => "str",
InnerType::Int { signed, size } => {
return write!(
f,
"{}{}",
if *signed { "i" } else { "u" },
match size {
IntSize::Bits(bits) => bits.to_string(),
IntSize::Pointer => "size".to_string(),
}
);
}
InnerType::Float { float_type } => match float_type {
FloatType::F32 => "f32",
FloatType::F64 => "f64",
},
InnerType::Pointer { pointee } => {
return write!(f, "*{}", pointee);
}
InnerType::Array { element, size } => {
return write!(f, "[{}; {}]", element, size);
}
InnerType::Function {
return_type,
parameter_types,
} => {
write!(f, "fn(")?;
if let Some((last, rest)) = parameter_types.split_last() {
for param in rest {
write!(f, "{}, ", param)?;
}
write!(f, "{}", last)?;
}
return write!(f, ") -> {}", return_type);
}
InnerType::Tuple { elements } => {
write!(f, "(")?;
if let Some((last, rest)) = elements.split_last() {
for elem in rest {
write!(f, "{}, ", elem)?;
}
write!(f, "{}", last)?;
}
return write!(f, ")");
}
InnerType::TypeUnion { types } => {
if let Some((last, rest)) = types.split_last() {
for ty in rest {
write!(f, "{} | ", ty)?;
}
write!(f, "{}", last)?;
}
return Ok(());
}
InnerType::TypeIntersection { types } => {
if let Some((last, rest)) = types.split_last() {
for ty in rest {
write!(f, "{} ^ ", ty)?;
}
write!(f, "{}", last)?;
}
return Ok(());
}
};
write!(f, "{name}")
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum FloatType {
F32,
@ -108,8 +188,18 @@ pub enum ControlFlowKind {
Continue,
}
impl ControlFlowKind {
fn as_str(&self) -> &'static str {
match self {
ControlFlowKind::Return => "RETURN",
ControlFlowKind::Break => "BREAK",
ControlFlowKind::Continue => "CONTINUE",
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub struct Index(u32);
pub struct Index(pub u32);
impl Index {
pub fn as_value(self) -> PlaceOrValue {
@ -128,14 +218,7 @@ pub enum AstNode {
File {
decls: Vec<Index>,
},
FunctionProto {
name: String,
return_type: Type,
parameter_list: Index,
},
ParameterList {
parameters: Vec<Index>,
},
ParameterList(ParameterList),
Parameter(Parameter),
FunctionDecl(FunctionDecl),
Block {
@ -166,7 +249,7 @@ pub enum AstNode {
GlobalDecl {
name: String,
var_type: Type,
value: Index,
expr: Index,
},
StructDecl {
name: String,
@ -325,6 +408,12 @@ impl PlaceOrValue {
PlaceOrValue::Value(i) => i,
}
}
pub fn with_index(self, index: Index) -> Self {
match self {
PlaceOrValue::Place(_) => PlaceOrValue::Place(index),
PlaceOrValue::Value(_) => PlaceOrValue::Value(index),
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default)]
@ -349,6 +438,14 @@ pub struct Ast {
nodes: Vec<AstNode>,
}
impl core::ops::Index<Index> for Ast {
type Output = AstNode;
fn index(&self, index: Index) -> &Self::Output {
self.nodes.get(index.0 as usize).expect("Invalid AST index")
}
}
impl Ast {
pub fn new() -> Self {
Self::default()
@ -366,7 +463,7 @@ pub struct FunctionDecl {
name: String,
visibility: Visibility,
return_type: Type,
parameter_list: ParameterList,
parameter_list: Index,
body: Index,
}
@ -413,7 +510,7 @@ fn new_token_input<'a>(input: &'a str) -> TokenInput<'a> {
IterInput::new(spanned_input, num_bytes..num_bytes)
}
fn type_parser<'a, E>() -> impl Parser<'a, TokenInput<'a>, Type, E>
fn type_parser<'a, E>() -> impl Parser<'a, TokenInput<'a>, Type, E> + Clone
where
E: chumsky::extra::ParserExtra<'a, TokenInput<'a>, Error = EmptyErr> + 'a,
{
@ -466,7 +563,7 @@ where
})
}
fn visibility<'a>() -> impl Parser<'a, TokenInput<'a>, Visibility, ParserExtra> {
fn visibility<'a>() -> impl Parser<'a, TokenInput<'a>, Visibility, ParserExtra> + Clone {
choice((just(Token::Pub).to(Visibility::Public),))
.or_not()
.map(|v| v.unwrap_or(Visibility::Private))
@ -495,7 +592,10 @@ fn func_parser<'a>() -> impl Parser<'a, TokenInput<'a>, Index, ParserExtra> {
.collect::<Vec<_>>()
.delimited_by(just(Token::OpenParens), just(Token::CloseParens))
.labelled("function parameters")
.map(|params| ParameterList { parameters: params });
.map_with(|params, e: &mut E| {
e.state()
.push(AstNode::ParameterList(ParameterList { parameters: params }))
});
visibility()
.then_ignore(just(Token::Fn))
@ -523,17 +623,19 @@ fn func_parser<'a>() -> impl Parser<'a, TokenInput<'a>, Index, ParserExtra> {
type ParserExtra = chumsky::extra::Full<EmptyErr, SimpleState<Ast>, ()>;
fn block<'a>() -> impl Parser<'a, TokenInput<'a>, PlaceOrValue, ParserExtra> + Clone {
just(Token::OpenBrace)
.ignored()
.then_ignore(just(Token::CloseBrace))
.map_with(|_, e: &mut MapExtra<'_, '_, _, ParserExtra>| {
e.state()
.push(AstNode::Block {
statements: vec![],
expr: None,
})
.as_value()
// TODO: add statements and expr and map placeness by expr
expr()
.then_ignore(just(Token::Semi))
.map(PlaceOrValue::index)
.repeated()
.collect::<Vec<_>>()
.then(expr().or_not())
.delimited_by(just(Token::OpenBrace), just(Token::CloseBrace))
.map_with(|(statements, expr), e: &mut E| {
expr.unwrap_or(PlaceOrValue::Value(Index(u32::MAX)))
.with_index(e.state().push(AstNode::Block {
statements,
expr: expr.map(PlaceOrValue::index),
}))
})
}
@ -592,7 +694,7 @@ fn simple_expr<'a, 'b>(
))
}
fn expr<'a>() -> impl Parser<'a, TokenInput<'a>, PlaceOrValue, ParserExtra> {
fn expr<'a>() -> impl Parser<'a, TokenInput<'a>, PlaceOrValue, ParserExtra> + Clone {
let assignment = choice((
just(Token::Equal),
just(Token::PlusEqual),
@ -866,13 +968,107 @@ fn expr<'a>() -> impl Parser<'a, TokenInput<'a>, PlaceOrValue, ParserExtra> {
})
}
fn attrs<'a>() -> impl Parser<'a, TokenInput<'a>, Index, ParserExtra> + Clone {
let docs = select! { Token::DocComment(doc) => doc }.map_with(|doc, e: &mut E| {
e.state().push(AstNode::Doc {
text: doc.to_string(),
})
});
docs.repeated()
.at_least(1)
.collect::<Vec<_>>()
.map_with(|attrs, e: &mut E| e.state().push(AstNode::Attributes { attrs }))
}
fn function_decl<'a>() -> impl Parser<'a, TokenInput<'a>, Index, ParserExtra> + Clone {
let ident = select! {Token::Ident(ident) => ident};
let param = select! {Token::Mutable => ()}
.or_not()
.then(ident)
.then_ignore(just(Token::Colon))
.then(type_parser::<ParserExtra>())
.map_with(|((mutable, name), param_type), e| {
e.state().push(AstNode::Parameter(Parameter {
mutable: mutable.is_some(),
name: name.to_string(),
param_type,
}))
});
let params = param
.separated_by(just(Token::Comma))
.allow_trailing()
.collect::<Vec<_>>()
.delimited_by(just(Token::OpenParens), just(Token::CloseParens))
.labelled("function parameters")
.map_with(|params, e: &mut E| {
e.state()
.push(AstNode::ParameterList(ParameterList { parameters: params }))
});
let ret_type = just(Token::MinusGreater)
.ignore_then(type_parser::<ParserExtra>())
.or_not();
attrs()
.or_not()
.then(visibility())
.then_ignore(just(Token::Fn))
.then(ident)
.then(params)
.then(ret_type)
.then(block())
.map_with(|(((((attrs, vis), ident), params), ret), body), e| {
e.state().push(AstNode::FunctionDecl(FunctionDecl {
attrs,
name: ident.to_string(),
visibility: vis,
return_type: ret.unwrap_or_else(|| Intern::new(InnerType::Unit)),
parameter_list: params,
body: body.index(),
}))
})
}
fn global_decl<'a>() -> impl Parser<'a, TokenInput<'a>, Index, ParserExtra> + Clone {
let ident = select! {Token::Ident(ident) => ident};
attrs()
.or_not()
.then(visibility())
.then_ignore(just(Token::Let))
.then(ident)
.then_ignore(just(Token::Colon))
.then(type_parser::<ParserExtra>())
.then_ignore(just(Token::Equal))
.then(expr())
.then_ignore(just(Token::Semi))
.map_with(|((((_attrs, _vis), name), var_type), value), e| {
e.state().push(AstNode::GlobalDecl {
name: name.to_string(),
var_type,
expr: value.index(),
})
})
}
fn file<'a>() -> impl Parser<'a, TokenInput<'a>, Index, ParserExtra> + Clone {
choice((function_decl(), global_decl()))
.repeated()
.collect::<Vec<_>>()
.map_with(|decls, e: &mut E| e.state().push(AstNode::File { decls }))
}
mod constants;
mod pretty;
#[cfg(test)]
mod tests {
use chumsky::{Parser, extra::SimpleState};
use crate::{Ast, AstNode, new_token_input, type_parser};
use crate::{Ast, AstNode, new_token_input, pretty, type_parser};
#[test]
fn print_ast_node_size() {
@ -937,11 +1133,26 @@ mod tests {
let print_ast = |tokens| {
let mut state = SimpleState(Ast::new());
let out = crate::expr().parse_with_state(tokens, &mut state).unwrap();
eprintln!("{:?}", state.0);
let ast = state.0;
let mut pretty = pretty::PrettyPrint::new();
pretty.print(&ast);
};
print_ast(new_token_input("()"));
print_ast(new_token_input("!() as i32"));
print_ast(new_token_input("1 << 2 & 3"));
print_ast(new_token_input(
r#"
fn my_function(a: i32, b: *const u8) -> i32 {
let x: i32;
x = a + 1;
if (x < *b as i32) {
return x;
} else {
return 10;
}
}
"#,
));
}
}

363
crates/parser/src/pretty.rs Normal file
View file

@ -0,0 +1,363 @@
use crate::{Ast, AstNode, Index};
pub struct PrettyPrint {
lines: Vec<String>,
indents: Vec<Indent>,
}
const VERTICAL: &str = "";
const HORIZONTAL: &str = "";
const CONNECTOR: &str = "";
const END_CONNECTOR: &str = "";
const EMPTY: &str = " ";
#[derive(PartialEq, Eq, Clone, Copy, Debug)]
enum Indent {
Vertical,
End,
Empty,
}
impl PrettyPrint {
pub fn new() -> Self {
Self {
lines: Vec::new(),
indents: Vec::new(),
}
}
fn push_line(&mut self, line: String) {
let (last, rest) = self
.indents
.split_last_mut()
.map_or::<(Option<&mut Indent>, &mut [Indent]), _>((None, &mut []), |(last, rest)| {
(Some(last), rest)
});
let rest = rest.iter_mut().map(|indent| match *indent {
Indent::Vertical => VERTICAL,
Indent::End => {
*indent = Indent::Empty;
END_CONNECTOR
}
Indent::Empty => EMPTY,
});
let last = if let Some(last) = last {
match last {
Indent::Vertical => CONNECTOR,
Indent::End => {
*last = Indent::Empty;
END_CONNECTOR
}
Indent::Empty => CONNECTOR,
}
} else {
""
};
self.lines.push(
rest.chain(std::iter::once(last))
.chain(std::iter::once(line.as_str()))
.collect::<String>(),
);
}
pub fn print(mut self, ast: &Ast) {
let root = ast.nodes.len().checked_sub(1).unwrap();
self.stuff(ast, Index(root as u32));
for line in self.lines {
println!("{}", line);
}
}
fn with_indent<I>(
&mut self,
mut items: impl DoubleEndedIterator<Item = I>,
mut f: impl FnMut(&mut Self, I),
) {
if let Some(last) = (&mut items).next_back() {
self.indents.push(Indent::Vertical);
for item in items {
f(self, item);
}
*self.indents.last_mut().unwrap() = Indent::End;
f(self, last);
self.indents.pop();
}
}
fn stuff(&mut self, ast: &Ast, node: Index) {
let mut node = ast.nodes.get(node.0 as usize).unwrap();
match node {
AstNode::Root { files } => {
self.push_line(format!(
"{} {{num_files: {}}}",
node_name(node),
files.len()
));
self.with_indent(files.iter(), |this, idx| {
this.stuff(ast, *idx);
});
}
AstNode::File { decls } => {
self.push_line(format!(
"{} {{num_decls: {}}}",
node_name(node),
decls.len()
));
self.with_indent(decls.iter(), |this, idx| {
this.stuff(ast, *idx);
});
}
AstNode::ParameterList(parameters) => {
self.push_line(format!(
"{}[{}]",
node_name(node),
parameters.parameters.len()
));
self.with_indent(parameters.parameters.iter(), |this, idx| {
this.stuff(ast, *idx);
});
}
AstNode::Parameter(parameter) => {
self.push_line(format!(
"{} [{}: {}]",
node_name(node),
parameter.name,
parameter.param_type
));
}
AstNode::FunctionDecl(function_decl) => {
self.push_line(format!("{} {}", node_name(node), function_decl.name,));
self.indents.push(Indent::Vertical);
self.push_line(format!("VISIBILITY: {:?}", function_decl.visibility));
self.stuff(ast, function_decl.parameter_list);
self.push_line(format!("RETURN_TYPE: {}", function_decl.return_type));
*self.indents.last_mut().unwrap() = Indent::End;
self.stuff(ast, function_decl.body);
self.indents.pop();
}
AstNode::Block { statements, expr } => {
self.push_line(format!("{}", node_name(node),));
self.with_indent(statements.iter().chain(expr), |this, idx| {
this.stuff(ast, *idx);
});
}
AstNode::Constant { ty, value } => {
self.push_line(format!("{} [{} := {:?}]", node_name(node), ty, value));
}
AstNode::NoopExpr => {
self.push_line(format!("{}", node_name(node),));
}
AstNode::Stmt { expr } => {
self.push_line(format!("{}", node_name(node),));
self.indents.push(Indent::End);
self.stuff(ast, *expr);
self.indents.pop();
}
AstNode::ControlFlow { kind, expr } => todo!(),
AstNode::VarDecl {
mutable,
name,
var_type,
} => todo!(),
AstNode::Assignment { dest, expr } => todo!(),
AstNode::GlobalDecl {
name,
var_type,
expr: value,
} => todo!(),
AstNode::StructDecl { name, fields } => todo!(),
AstNode::FieldDecl { name, field_type } => todo!(),
AstNode::FieldAccess { expr, field } => todo!(),
AstNode::UnresolvedDeclRef { name } => {
self.push_line(format!("{} \"{}\"", node_name(node), name,));
}
AstNode::DeclRef { decl } => {
self.push_line(format!("{} @{}", node_name(node), decl.0,));
}
AstNode::TypeDeclRef { ty } => {
self.push_line(format!("{} @{:?}", node_name(node), ty,));
}
AstNode::ExplicitCast { expr, ty } => {
self.push_line(format!("{} {}", node_name(node), ty,));
self.indents.push(Indent::End);
self.stuff(ast, *expr);
self.indents.pop();
}
AstNode::Not(expr)
| AstNode::Negate(expr)
| AstNode::Deref { expr }
| AstNode::AddressOf { expr }
| AstNode::ValueToPlace { expr }
| AstNode::PlaceToValue { expr } => {
self.push_line(format!("{}", node_name(node),));
self.indents.push(Indent::End);
self.stuff(ast, *expr);
self.indents.pop();
}
AstNode::CallExpr { callee, arguments } => {
self.push_line(format!(
"{} {{num_args: {}}}",
node_name(node),
arguments.len()
));
self.indents.push(Indent::Vertical);
self.stuff(ast, *callee);
*self.indents.last_mut().unwrap() = Indent::End;
self.with_indent(arguments.iter(), |this, arg| {
this.stuff(ast, *arg);
});
self.indents.pop();
}
AstNode::Argument { expr } => {
self.stuff(ast, *expr);
}
AstNode::Multiply { left, right }
| AstNode::Divide { left, right }
| AstNode::Modulus { left, right }
| AstNode::Add { left, right }
| AstNode::Subtract { left, right }
| AstNode::BitOr { left, right }
| AstNode::BitAnd { left, right }
| AstNode::BitXor { left, right }
| AstNode::LogicalOr { left, right }
| AstNode::LogicalAnd { left, right }
| AstNode::Eq { left, right }
| AstNode::NotEq { left, right }
| AstNode::Less { left, right }
| AstNode::LessEq { left, right }
| AstNode::Greater { left, right }
| AstNode::GreaterEq { left, right }
| AstNode::ShiftLeft { left, right }
| AstNode::ShiftRight { left, right } => {
self.push_line(format!("{}", node_name(node),));
self.with_indent([*left, *right].into_iter(), |this, idx| {
this.stuff(ast, idx);
});
}
AstNode::Subscript { expr, index } => {
self.push_line(format!("{}", node_name(node),));
self.indents.push(Indent::Vertical);
self.stuff(ast, *expr);
*self.indents.last_mut().unwrap() = Indent::End;
self.push_line("INDEX".to_string());
self.with_indent([*index].into_iter(), |this, idx| {
this.stuff(ast, idx);
});
self.indents.pop();
}
AstNode::If {
condition,
then,
r#else,
} => {
self.push_line(format!("{}", node_name(node),));
self.indents.push(Indent::Vertical);
self.push_line("COND".to_string());
self.with_indent([*condition].into_iter(), |this, idx| {
this.stuff(ast, idx);
});
if let Some(r#else) = r#else {
self.stuff(ast, *then);
*self.indents.last_mut().unwrap() = Indent::End;
self.stuff(ast, *r#else);
} else {
*self.indents.last_mut().unwrap() = Indent::End;
self.stuff(ast, *then);
}
self.indents.pop();
}
AstNode::Else { expr } => {
self.push_line(format!("{}", node_name(node),));
self.indents.push(Indent::End);
self.stuff(ast, *expr);
self.indents.pop();
}
AstNode::Comment { text } => {
self.push_line(format!("{} \"{}\"", node_name(node), text,));
}
AstNode::Attributes { attrs } => {
self.push_line(format!(
"{} {{num_attrs: {}}}",
node_name(node),
attrs.len()
));
self.with_indent(attrs.iter(), |this, &attr| {
this.stuff(ast, attr);
});
}
AstNode::Doc { text } => {
self.push_line(format!("{} \"{}\"", node_name(node), text,));
}
AstNode::Error { err } => {
self.push_line(format!("{} \"{}\"", node_name(node), err,));
}
}
}
}
fn node_name(node: &AstNode) -> &'static str {
match node {
AstNode::Root { .. } => "ROOT",
AstNode::File { .. } => "FILE",
AstNode::ParameterList(_) => "PARAMS",
AstNode::Parameter(_) => "PARAM",
AstNode::FunctionDecl(_) => "FN",
AstNode::Block { .. } => "BLOCK",
AstNode::Constant { .. } => "CONSTANT",
AstNode::NoopExpr => "NOOP",
AstNode::Stmt { .. } => "STMT",
AstNode::ControlFlow { .. } => "CONTROL_FLOW",
AstNode::VarDecl { .. } => "VAR_DECL",
AstNode::Assignment { .. } => "ASSIGN",
AstNode::GlobalDecl { .. } => "GLOBAL_DECL",
AstNode::StructDecl { .. } => "STRUCT",
AstNode::FieldDecl { .. } => "FIELD",
AstNode::FieldAccess { .. } => "FIELD_ACCESS",
AstNode::UnresolvedDeclRef { .. } => "UNRESOLVED_DECL_REF",
AstNode::DeclRef { .. } => "DECL_REF",
AstNode::TypeDeclRef { .. } => "TYPE_REF",
AstNode::ExplicitCast { .. } => "AS",
AstNode::Deref { .. } => "DEREF",
AstNode::AddressOf { .. } => "ADDR_OF",
AstNode::PlaceToValue { .. } => "INTO_PLACE",
AstNode::ValueToPlace { .. } => "INTO_VALUE",
AstNode::CallExpr { .. } => "CALL",
AstNode::Argument { .. } => "ARG",
AstNode::Not(_) => "NOT",
AstNode::Negate(_) => "NEGATE",
AstNode::Multiply { .. } => "MUL",
AstNode::Divide { .. } => "DIV",
AstNode::Modulus { .. } => "REM",
AstNode::Add { .. } => "ADD",
AstNode::Subtract { .. } => "SUB",
AstNode::BitOr { .. } => "BIT_OR",
AstNode::BitAnd { .. } => "BIT_AND",
AstNode::BitXor { .. } => "BIT_XOR",
AstNode::LogicalOr { .. } => "OR",
AstNode::LogicalAnd { .. } => "AND",
AstNode::Eq { .. } => "EQ",
AstNode::NotEq { .. } => "NEQ",
AstNode::Less { .. } => "LT",
AstNode::LessEq { .. } => "LEQ",
AstNode::Greater { .. } => "GT",
AstNode::GreaterEq { .. } => "GEQ",
AstNode::ShiftLeft { .. } => "SHL",
AstNode::ShiftRight { .. } => "SHR",
AstNode::Subscript { .. } => "SUBSCRIPT",
AstNode::If { .. } => "IF",
AstNode::Else { .. } => "ELSE",
AstNode::Comment { .. } => "COMMENT",
AstNode::Attributes { .. } => "META",
AstNode::Doc { .. } => "DOCS",
AstNode::Error { .. } => "ERR",
}
}