From 27715936052ae4906f236cbba73e48b7590e2349 Mon Sep 17 00:00:00 2001 From: janis Date: Mon, 13 Oct 2025 22:02:22 +0200 Subject: [PATCH] pretty printing ast, parsing functions --- crates/parser/src/lib.rs | 269 +++++++++++++++++++++++--- crates/parser/src/pretty.rs | 363 ++++++++++++++++++++++++++++++++++++ 2 files changed, 603 insertions(+), 29 deletions(-) create mode 100644 crates/parser/src/pretty.rs diff --git a/crates/parser/src/lib.rs b/crates/parser/src/lib.rs index d4a61aa..eec01de 100644 --- a/crates/parser/src/lib.rs +++ b/crates/parser/src/lib.rs @@ -1,4 +1,4 @@ -use std::{hash::Hash, ops::Range, sync::Arc}; +use std::{fmt::Display, hash::Hash, ops::Range, sync::Arc}; use chumsky::{ IterParser, Parser, @@ -63,6 +63,86 @@ pub enum InnerType { type Type = internment::Intern; +impl Display for InnerType { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let name = match self { + InnerType::Top => "⊤", + InnerType::Bottom => "⊥", + InnerType::Unit => "()", + InnerType::Bool => "bool", + InnerType::AnyInt => "comptime_int", + InnerType::AnyUInt => "comptime_uint", + InnerType::Str => "str", + InnerType::Int { signed, size } => { + return write!( + f, + "{}{}", + if *signed { "i" } else { "u" }, + match size { + IntSize::Bits(bits) => bits.to_string(), + IntSize::Pointer => "size".to_string(), + } + ); + } + InnerType::Float { float_type } => match float_type { + FloatType::F32 => "f32", + FloatType::F64 => "f64", + }, + InnerType::Pointer { pointee } => { + return write!(f, "*{}", pointee); + } + InnerType::Array { element, size } => { + return write!(f, "[{}; {}]", element, size); + } + InnerType::Function { + return_type, + parameter_types, + } => { + write!(f, "fn(")?; + + if let Some((last, rest)) = parameter_types.split_last() { + for param in rest { + write!(f, "{}, ", param)?; + } + write!(f, "{}", last)?; + } + return write!(f, ") -> {}", return_type); + } + InnerType::Tuple { elements } => { + write!(f, "(")?; + + if let Some((last, rest)) = elements.split_last() { + for elem in rest { + write!(f, "{}, ", elem)?; + } + write!(f, "{}", last)?; + } + return write!(f, ")"); + } + InnerType::TypeUnion { types } => { + if let Some((last, rest)) = types.split_last() { + for ty in rest { + write!(f, "{} | ", ty)?; + } + write!(f, "{}", last)?; + } + return Ok(()); + } + InnerType::TypeIntersection { types } => { + if let Some((last, rest)) = types.split_last() { + for ty in rest { + write!(f, "{} ^ ", ty)?; + } + write!(f, "{}", last)?; + } + return Ok(()); + } + }; + + write!(f, "{name}") + } +} + #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub enum FloatType { F32, @@ -108,8 +188,18 @@ pub enum ControlFlowKind { Continue, } +impl ControlFlowKind { + fn as_str(&self) -> &'static str { + match self { + ControlFlowKind::Return => "RETURN", + ControlFlowKind::Break => "BREAK", + ControlFlowKind::Continue => "CONTINUE", + } + } +} + #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] -pub struct Index(u32); +pub struct Index(pub u32); impl Index { pub fn as_value(self) -> PlaceOrValue { @@ -128,14 +218,7 @@ pub enum AstNode { File { decls: Vec, }, - FunctionProto { - name: String, - return_type: Type, - parameter_list: Index, - }, - ParameterList { - parameters: Vec, - }, + ParameterList(ParameterList), Parameter(Parameter), FunctionDecl(FunctionDecl), Block { @@ -166,7 +249,7 @@ pub enum AstNode { GlobalDecl { name: String, var_type: Type, - value: Index, + expr: Index, }, StructDecl { name: String, @@ -325,6 +408,12 @@ impl PlaceOrValue { PlaceOrValue::Value(i) => i, } } + pub fn with_index(self, index: Index) -> Self { + match self { + PlaceOrValue::Place(_) => PlaceOrValue::Place(index), + PlaceOrValue::Value(_) => PlaceOrValue::Value(index), + } + } } #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default)] @@ -349,6 +438,14 @@ pub struct Ast { nodes: Vec, } +impl core::ops::Index for Ast { + type Output = AstNode; + + fn index(&self, index: Index) -> &Self::Output { + self.nodes.get(index.0 as usize).expect("Invalid AST index") + } +} + impl Ast { pub fn new() -> Self { Self::default() @@ -366,7 +463,7 @@ pub struct FunctionDecl { name: String, visibility: Visibility, return_type: Type, - parameter_list: ParameterList, + parameter_list: Index, body: Index, } @@ -413,7 +510,7 @@ fn new_token_input<'a>(input: &'a str) -> TokenInput<'a> { IterInput::new(spanned_input, num_bytes..num_bytes) } -fn type_parser<'a, E>() -> impl Parser<'a, TokenInput<'a>, Type, E> +fn type_parser<'a, E>() -> impl Parser<'a, TokenInput<'a>, Type, E> + Clone where E: chumsky::extra::ParserExtra<'a, TokenInput<'a>, Error = EmptyErr> + 'a, { @@ -466,7 +563,7 @@ where }) } -fn visibility<'a>() -> impl Parser<'a, TokenInput<'a>, Visibility, ParserExtra> { +fn visibility<'a>() -> impl Parser<'a, TokenInput<'a>, Visibility, ParserExtra> + Clone { choice((just(Token::Pub).to(Visibility::Public),)) .or_not() .map(|v| v.unwrap_or(Visibility::Private)) @@ -495,7 +592,10 @@ fn func_parser<'a>() -> impl Parser<'a, TokenInput<'a>, Index, ParserExtra> { .collect::>() .delimited_by(just(Token::OpenParens), just(Token::CloseParens)) .labelled("function parameters") - .map(|params| ParameterList { parameters: params }); + .map_with(|params, e: &mut E| { + e.state() + .push(AstNode::ParameterList(ParameterList { parameters: params })) + }); visibility() .then_ignore(just(Token::Fn)) @@ -523,17 +623,19 @@ fn func_parser<'a>() -> impl Parser<'a, TokenInput<'a>, Index, ParserExtra> { type ParserExtra = chumsky::extra::Full, ()>; fn block<'a>() -> impl Parser<'a, TokenInput<'a>, PlaceOrValue, ParserExtra> + Clone { - just(Token::OpenBrace) - .ignored() - .then_ignore(just(Token::CloseBrace)) - .map_with(|_, e: &mut MapExtra<'_, '_, _, ParserExtra>| { - e.state() - .push(AstNode::Block { - statements: vec![], - expr: None, - }) - .as_value() - // TODO: add statements and expr and map placeness by expr + expr() + .then_ignore(just(Token::Semi)) + .map(PlaceOrValue::index) + .repeated() + .collect::>() + .then(expr().or_not()) + .delimited_by(just(Token::OpenBrace), just(Token::CloseBrace)) + .map_with(|(statements, expr), e: &mut E| { + expr.unwrap_or(PlaceOrValue::Value(Index(u32::MAX))) + .with_index(e.state().push(AstNode::Block { + statements, + expr: expr.map(PlaceOrValue::index), + })) }) } @@ -592,7 +694,7 @@ fn simple_expr<'a, 'b>( )) } -fn expr<'a>() -> impl Parser<'a, TokenInput<'a>, PlaceOrValue, ParserExtra> { +fn expr<'a>() -> impl Parser<'a, TokenInput<'a>, PlaceOrValue, ParserExtra> + Clone { let assignment = choice(( just(Token::Equal), just(Token::PlusEqual), @@ -866,13 +968,107 @@ fn expr<'a>() -> impl Parser<'a, TokenInput<'a>, PlaceOrValue, ParserExtra> { }) } +fn attrs<'a>() -> impl Parser<'a, TokenInput<'a>, Index, ParserExtra> + Clone { + let docs = select! { Token::DocComment(doc) => doc }.map_with(|doc, e: &mut E| { + e.state().push(AstNode::Doc { + text: doc.to_string(), + }) + }); + + docs.repeated() + .at_least(1) + .collect::>() + .map_with(|attrs, e: &mut E| e.state().push(AstNode::Attributes { attrs })) +} + +fn function_decl<'a>() -> impl Parser<'a, TokenInput<'a>, Index, ParserExtra> + Clone { + let ident = select! {Token::Ident(ident) => ident}; + + let param = select! {Token::Mutable => ()} + .or_not() + .then(ident) + .then_ignore(just(Token::Colon)) + .then(type_parser::()) + .map_with(|((mutable, name), param_type), e| { + e.state().push(AstNode::Parameter(Parameter { + mutable: mutable.is_some(), + name: name.to_string(), + param_type, + })) + }); + + let params = param + .separated_by(just(Token::Comma)) + .allow_trailing() + .collect::>() + .delimited_by(just(Token::OpenParens), just(Token::CloseParens)) + .labelled("function parameters") + .map_with(|params, e: &mut E| { + e.state() + .push(AstNode::ParameterList(ParameterList { parameters: params })) + }); + + let ret_type = just(Token::MinusGreater) + .ignore_then(type_parser::()) + .or_not(); + + attrs() + .or_not() + .then(visibility()) + .then_ignore(just(Token::Fn)) + .then(ident) + .then(params) + .then(ret_type) + .then(block()) + .map_with(|(((((attrs, vis), ident), params), ret), body), e| { + e.state().push(AstNode::FunctionDecl(FunctionDecl { + attrs, + name: ident.to_string(), + visibility: vis, + return_type: ret.unwrap_or_else(|| Intern::new(InnerType::Unit)), + parameter_list: params, + body: body.index(), + })) + }) +} + +fn global_decl<'a>() -> impl Parser<'a, TokenInput<'a>, Index, ParserExtra> + Clone { + let ident = select! {Token::Ident(ident) => ident}; + + attrs() + .or_not() + .then(visibility()) + .then_ignore(just(Token::Let)) + .then(ident) + .then_ignore(just(Token::Colon)) + .then(type_parser::()) + .then_ignore(just(Token::Equal)) + .then(expr()) + .then_ignore(just(Token::Semi)) + .map_with(|((((_attrs, _vis), name), var_type), value), e| { + e.state().push(AstNode::GlobalDecl { + name: name.to_string(), + var_type, + expr: value.index(), + }) + }) +} + +fn file<'a>() -> impl Parser<'a, TokenInput<'a>, Index, ParserExtra> + Clone { + choice((function_decl(), global_decl())) + .repeated() + .collect::>() + .map_with(|decls, e: &mut E| e.state().push(AstNode::File { decls })) +} + mod constants; +mod pretty; #[cfg(test)] mod tests { use chumsky::{Parser, extra::SimpleState}; - use crate::{Ast, AstNode, new_token_input, type_parser}; + use crate::{Ast, AstNode, new_token_input, pretty, type_parser}; #[test] fn print_ast_node_size() { @@ -937,11 +1133,26 @@ mod tests { let print_ast = |tokens| { let mut state = SimpleState(Ast::new()); let out = crate::expr().parse_with_state(tokens, &mut state).unwrap(); - eprintln!("{:?}", state.0); + let ast = state.0; + let mut pretty = pretty::PrettyPrint::new(); + pretty.print(&ast); }; print_ast(new_token_input("()")); print_ast(new_token_input("!() as i32")); print_ast(new_token_input("1 << 2 & 3")); + print_ast(new_token_input( + r#" +fn my_function(a: i32, b: *const u8) -> i32 { + let x: i32; + x = a + 1; + if (x < *b as i32) { + return x; + } else { + return 10; + } +} +"#, + )); } } diff --git a/crates/parser/src/pretty.rs b/crates/parser/src/pretty.rs new file mode 100644 index 0000000..e862aba --- /dev/null +++ b/crates/parser/src/pretty.rs @@ -0,0 +1,363 @@ +use crate::{Ast, AstNode, Index}; + +pub struct PrettyPrint { + lines: Vec, + indents: Vec, +} + +const VERTICAL: &str = "│"; +const HORIZONTAL: &str = "─"; +const CONNECTOR: &str = "├"; +const END_CONNECTOR: &str = "└"; +const EMPTY: &str = " "; + +#[derive(PartialEq, Eq, Clone, Copy, Debug)] +enum Indent { + Vertical, + End, + Empty, +} + +impl PrettyPrint { + pub fn new() -> Self { + Self { + lines: Vec::new(), + indents: Vec::new(), + } + } + + fn push_line(&mut self, line: String) { + let (last, rest) = self + .indents + .split_last_mut() + .map_or::<(Option<&mut Indent>, &mut [Indent]), _>((None, &mut []), |(last, rest)| { + (Some(last), rest) + }); + let rest = rest.iter_mut().map(|indent| match *indent { + Indent::Vertical => VERTICAL, + Indent::End => { + *indent = Indent::Empty; + END_CONNECTOR + } + Indent::Empty => EMPTY, + }); + let last = if let Some(last) = last { + match last { + Indent::Vertical => CONNECTOR, + Indent::End => { + *last = Indent::Empty; + END_CONNECTOR + } + Indent::Empty => CONNECTOR, + } + } else { + "" + }; + + self.lines.push( + rest.chain(std::iter::once(last)) + .chain(std::iter::once(line.as_str())) + .collect::(), + ); + } + + pub fn print(mut self, ast: &Ast) { + let root = ast.nodes.len().checked_sub(1).unwrap(); + self.stuff(ast, Index(root as u32)); + for line in self.lines { + println!("{}", line); + } + } + + fn with_indent( + &mut self, + mut items: impl DoubleEndedIterator, + mut f: impl FnMut(&mut Self, I), + ) { + if let Some(last) = (&mut items).next_back() { + self.indents.push(Indent::Vertical); + for item in items { + f(self, item); + } + *self.indents.last_mut().unwrap() = Indent::End; + f(self, last); + self.indents.pop(); + } + } + + fn stuff(&mut self, ast: &Ast, node: Index) { + let mut node = ast.nodes.get(node.0 as usize).unwrap(); + + match node { + AstNode::Root { files } => { + self.push_line(format!( + "{} {{num_files: {}}}", + node_name(node), + files.len() + )); + + self.with_indent(files.iter(), |this, idx| { + this.stuff(ast, *idx); + }); + } + AstNode::File { decls } => { + self.push_line(format!( + "{} {{num_decls: {}}}", + node_name(node), + decls.len() + )); + + self.with_indent(decls.iter(), |this, idx| { + this.stuff(ast, *idx); + }); + } + AstNode::ParameterList(parameters) => { + self.push_line(format!( + "{}[{}]", + node_name(node), + parameters.parameters.len() + )); + + self.with_indent(parameters.parameters.iter(), |this, idx| { + this.stuff(ast, *idx); + }); + } + AstNode::Parameter(parameter) => { + self.push_line(format!( + "{} [{}: {}]", + node_name(node), + parameter.name, + parameter.param_type + )); + } + AstNode::FunctionDecl(function_decl) => { + self.push_line(format!("{} {}", node_name(node), function_decl.name,)); + self.indents.push(Indent::Vertical); + self.push_line(format!("VISIBILITY: {:?}", function_decl.visibility)); + self.stuff(ast, function_decl.parameter_list); + self.push_line(format!("RETURN_TYPE: {}", function_decl.return_type)); + *self.indents.last_mut().unwrap() = Indent::End; + self.stuff(ast, function_decl.body); + self.indents.pop(); + } + AstNode::Block { statements, expr } => { + self.push_line(format!("{}", node_name(node),)); + + self.with_indent(statements.iter().chain(expr), |this, idx| { + this.stuff(ast, *idx); + }); + } + AstNode::Constant { ty, value } => { + self.push_line(format!("{} [{} := {:?}]", node_name(node), ty, value)); + } + AstNode::NoopExpr => { + self.push_line(format!("{}", node_name(node),)); + } + AstNode::Stmt { expr } => { + self.push_line(format!("{}", node_name(node),)); + self.indents.push(Indent::End); + self.stuff(ast, *expr); + self.indents.pop(); + } + AstNode::ControlFlow { kind, expr } => todo!(), + AstNode::VarDecl { + mutable, + name, + var_type, + } => todo!(), + AstNode::Assignment { dest, expr } => todo!(), + AstNode::GlobalDecl { + name, + var_type, + expr: value, + } => todo!(), + AstNode::StructDecl { name, fields } => todo!(), + AstNode::FieldDecl { name, field_type } => todo!(), + AstNode::FieldAccess { expr, field } => todo!(), + AstNode::UnresolvedDeclRef { name } => { + self.push_line(format!("{} \"{}\"", node_name(node), name,)); + } + AstNode::DeclRef { decl } => { + self.push_line(format!("{} @{}", node_name(node), decl.0,)); + } + AstNode::TypeDeclRef { ty } => { + self.push_line(format!("{} @{:?}", node_name(node), ty,)); + } + AstNode::ExplicitCast { expr, ty } => { + self.push_line(format!("{} {}", node_name(node), ty,)); + self.indents.push(Indent::End); + self.stuff(ast, *expr); + self.indents.pop(); + } + AstNode::Not(expr) + | AstNode::Negate(expr) + | AstNode::Deref { expr } + | AstNode::AddressOf { expr } + | AstNode::ValueToPlace { expr } + | AstNode::PlaceToValue { expr } => { + self.push_line(format!("{}", node_name(node),)); + self.indents.push(Indent::End); + self.stuff(ast, *expr); + self.indents.pop(); + } + AstNode::CallExpr { callee, arguments } => { + self.push_line(format!( + "{} {{num_args: {}}}", + node_name(node), + arguments.len() + )); + self.indents.push(Indent::Vertical); + self.stuff(ast, *callee); + *self.indents.last_mut().unwrap() = Indent::End; + self.with_indent(arguments.iter(), |this, arg| { + this.stuff(ast, *arg); + }); + self.indents.pop(); + } + AstNode::Argument { expr } => { + self.stuff(ast, *expr); + } + AstNode::Multiply { left, right } + | AstNode::Divide { left, right } + | AstNode::Modulus { left, right } + | AstNode::Add { left, right } + | AstNode::Subtract { left, right } + | AstNode::BitOr { left, right } + | AstNode::BitAnd { left, right } + | AstNode::BitXor { left, right } + | AstNode::LogicalOr { left, right } + | AstNode::LogicalAnd { left, right } + | AstNode::Eq { left, right } + | AstNode::NotEq { left, right } + | AstNode::Less { left, right } + | AstNode::LessEq { left, right } + | AstNode::Greater { left, right } + | AstNode::GreaterEq { left, right } + | AstNode::ShiftLeft { left, right } + | AstNode::ShiftRight { left, right } => { + self.push_line(format!("{}", node_name(node),)); + self.with_indent([*left, *right].into_iter(), |this, idx| { + this.stuff(ast, idx); + }); + } + AstNode::Subscript { expr, index } => { + self.push_line(format!("{}", node_name(node),)); + self.indents.push(Indent::Vertical); + self.stuff(ast, *expr); + *self.indents.last_mut().unwrap() = Indent::End; + + self.push_line("INDEX".to_string()); + self.with_indent([*index].into_iter(), |this, idx| { + this.stuff(ast, idx); + }); + + self.indents.pop(); + } + AstNode::If { + condition, + then, + r#else, + } => { + self.push_line(format!("{}", node_name(node),)); + self.indents.push(Indent::Vertical); + self.push_line("COND".to_string()); + self.with_indent([*condition].into_iter(), |this, idx| { + this.stuff(ast, idx); + }); + if let Some(r#else) = r#else { + self.stuff(ast, *then); + *self.indents.last_mut().unwrap() = Indent::End; + self.stuff(ast, *r#else); + } else { + *self.indents.last_mut().unwrap() = Indent::End; + self.stuff(ast, *then); + } + self.indents.pop(); + } + AstNode::Else { expr } => { + self.push_line(format!("{}", node_name(node),)); + self.indents.push(Indent::End); + self.stuff(ast, *expr); + self.indents.pop(); + } + AstNode::Comment { text } => { + self.push_line(format!("{} \"{}\"", node_name(node), text,)); + } + AstNode::Attributes { attrs } => { + self.push_line(format!( + "{} {{num_attrs: {}}}", + node_name(node), + attrs.len() + )); + self.with_indent(attrs.iter(), |this, &attr| { + this.stuff(ast, attr); + }); + } + AstNode::Doc { text } => { + self.push_line(format!("{} \"{}\"", node_name(node), text,)); + } + + AstNode::Error { err } => { + self.push_line(format!("{} \"{}\"", node_name(node), err,)); + } + } + } +} + +fn node_name(node: &AstNode) -> &'static str { + match node { + AstNode::Root { .. } => "ROOT", + AstNode::File { .. } => "FILE", + AstNode::ParameterList(_) => "PARAMS", + AstNode::Parameter(_) => "PARAM", + AstNode::FunctionDecl(_) => "FN", + AstNode::Block { .. } => "BLOCK", + AstNode::Constant { .. } => "CONSTANT", + AstNode::NoopExpr => "NOOP", + AstNode::Stmt { .. } => "STMT", + AstNode::ControlFlow { .. } => "CONTROL_FLOW", + AstNode::VarDecl { .. } => "VAR_DECL", + AstNode::Assignment { .. } => "ASSIGN", + AstNode::GlobalDecl { .. } => "GLOBAL_DECL", + AstNode::StructDecl { .. } => "STRUCT", + AstNode::FieldDecl { .. } => "FIELD", + AstNode::FieldAccess { .. } => "FIELD_ACCESS", + AstNode::UnresolvedDeclRef { .. } => "UNRESOLVED_DECL_REF", + AstNode::DeclRef { .. } => "DECL_REF", + AstNode::TypeDeclRef { .. } => "TYPE_REF", + AstNode::ExplicitCast { .. } => "AS", + AstNode::Deref { .. } => "DEREF", + AstNode::AddressOf { .. } => "ADDR_OF", + AstNode::PlaceToValue { .. } => "INTO_PLACE", + AstNode::ValueToPlace { .. } => "INTO_VALUE", + AstNode::CallExpr { .. } => "CALL", + AstNode::Argument { .. } => "ARG", + AstNode::Not(_) => "NOT", + AstNode::Negate(_) => "NEGATE", + AstNode::Multiply { .. } => "MUL", + AstNode::Divide { .. } => "DIV", + AstNode::Modulus { .. } => "REM", + AstNode::Add { .. } => "ADD", + AstNode::Subtract { .. } => "SUB", + AstNode::BitOr { .. } => "BIT_OR", + AstNode::BitAnd { .. } => "BIT_AND", + AstNode::BitXor { .. } => "BIT_XOR", + AstNode::LogicalOr { .. } => "OR", + AstNode::LogicalAnd { .. } => "AND", + AstNode::Eq { .. } => "EQ", + AstNode::NotEq { .. } => "NEQ", + AstNode::Less { .. } => "LT", + AstNode::LessEq { .. } => "LEQ", + AstNode::Greater { .. } => "GT", + AstNode::GreaterEq { .. } => "GEQ", + AstNode::ShiftLeft { .. } => "SHL", + AstNode::ShiftRight { .. } => "SHR", + AstNode::Subscript { .. } => "SUBSCRIPT", + AstNode::If { .. } => "IF", + AstNode::Else { .. } => "ELSE", + AstNode::Comment { .. } => "COMMENT", + AstNode::Attributes { .. } => "META", + AstNode::Doc { .. } => "DOCS", + AstNode::Error { .. } => "ERR", + } +}