From c7d5e4e6c02752e46aeffbd23ac6a6e39c2d4eba Mon Sep 17 00:00:00 2001 From: janis Date: Mon, 13 Oct 2025 23:08:15 +0200 Subject: [PATCH] parsing with indirect recursion --- crates/parser/src/lib.rs | 516 ++++++++++++++++++------------------ crates/parser/src/pretty.rs | 15 +- 2 files changed, 270 insertions(+), 261 deletions(-) diff --git a/crates/parser/src/lib.rs b/crates/parser/src/lib.rs index eec01de..cc2fde3 100644 --- a/crates/parser/src/lib.rs +++ b/crates/parser/src/lib.rs @@ -7,7 +7,7 @@ use chumsky::{ input::{IterInput, MapExtra}, pratt::{infix, left, postfix, prefix, right}, prelude::{Recursive, choice, just, recursive}, - recursive::Direct, + recursive::{Direct, Indirect}, select, text, }; use internment::Intern; @@ -569,195 +569,235 @@ fn visibility<'a>() -> impl Parser<'a, TokenInput<'a>, Visibility, ParserExtra> .map(|v| v.unwrap_or(Visibility::Private)) } -fn func_parser<'a>() -> impl Parser<'a, TokenInput<'a>, Index, ParserExtra> { - let ident = select! {Token::Ident(ident) => ident}; - - let param = just(Token::Mutable) - .to(()) - .or_not() - .then(ident) - .then_ignore(just(Token::Colon)) - .then(type_parser::()) - .map_with(|((mutable, name), param_type), e| { - e.state().push(AstNode::Parameter(Parameter { - mutable: mutable.is_some(), - name: name.to_string(), - param_type, - })) - }); - - let params = param - .separated_by(just(Token::Comma)) - .allow_trailing() - .collect::>() - .delimited_by(just(Token::OpenParens), just(Token::CloseParens)) - .labelled("function parameters") - .map_with(|params, e: &mut E| { - e.state() - .push(AstNode::ParameterList(ParameterList { parameters: params })) - }); - - visibility() - .then_ignore(just(Token::Fn)) - .then(ident) - .then(params) - // optional return type - .then( - just(Token::MinusGreater) - .ignore_then(type_parser()) - .or_not(), - ) - .then(block()) - .map_with(|((((vis, ident), params), ret), body), e| { - e.state().push(AstNode::FunctionDecl(FunctionDecl { - attrs: None, - name: ident.to_string(), - visibility: vis, - return_type: ret.unwrap_or_else(|| Intern::new(InnerType::Unit)), - parameter_list: params, - body: body.index(), - })) - }) -} - type ParserExtra = chumsky::extra::Full, ()>; -fn block<'a>() -> impl Parser<'a, TokenInput<'a>, PlaceOrValue, ParserExtra> + Clone { - expr() - .then_ignore(just(Token::Semi)) - .map(PlaceOrValue::index) - .repeated() - .collect::>() - .then(expr().or_not()) - .delimited_by(just(Token::OpenBrace), just(Token::CloseBrace)) - .map_with(|(statements, expr), e: &mut E| { - expr.unwrap_or(PlaceOrValue::Value(Index(u32::MAX))) - .with_index(e.state().push(AstNode::Block { - statements, - expr: expr.map(PlaceOrValue::index), +struct ParserCtx<'src, 'b> { + expr: Recursive, PlaceOrValue, ParserExtra>>, + function: Recursive, Index, ParserExtra>>, +} + +impl<'src, 'b> ParserCtx<'src, 'b> +where + 'src: 'b, +{ + fn new() -> Self { + let mut this = Self { + expr: Recursive::declare(), + function: Recursive::declare(), + }; + + let function = this.create_function_decl(); + this.function.define(function); + let expr = this.create_expr(); + this.expr.define(expr); + + this + } + + pub fn expr(&self) -> impl Parser<'src, TokenInput<'src>, PlaceOrValue, ParserExtra> + Clone { + self.expr.clone() + } + + pub fn function(&self) -> impl Parser<'src, TokenInput<'src>, Index, ParserExtra> + Clone { + self.function.clone() + } + + fn stmt(&self) -> impl Parser<'src, TokenInput<'src>, Index, ParserExtra> + Clone { + choice(( + self.function.clone(), + self.expr + .clone() + .then_ignore(just(Token::Semi)) + .map(PlaceOrValue::index), + )) + } + + fn simple_expr( + &self, + ) -> impl Parser<'src, TokenInput<'src>, PlaceOrValue, ParserExtra> + Clone + use<'src, 'b> + { + let ident = select! {Token::Ident(ident) => ident}.map_with( + |ident, e: &mut MapExtra, ParserExtra>| { + e.state() + .push(AstNode::UnresolvedDeclRef { + name: ident.to_string(), + }) + .as_place() + }, + ); + + let constant = select! { + Token::FloatingConstant(lexeme)| + Token::DotFloatingConstant(lexeme)| + Token::FloatingExpConstant(lexeme)| + Token::DotFloatingExpConstant(lexeme) => { + constants::parse_floating_constant(lexeme) + }, + Token::IntegerConstant(lexeme) => { + constants::parse_integer_constant(lexeme, Radix::Dec) + }, + tok @ Token::IntegerHexConstant(lexeme)| + tok @ Token::IntegerOctConstant(lexeme)| + tok @ Token::IntegerBinConstant(lexeme) => { + let radix = Radix::from_token(tok).unwrap(); + constants::parse_integer_constant(&lexeme[2..], radix) + }, + } + .map_with(|(value, ty), e: &mut E| { + e.state().push(AstNode::Constant { ty, value }).as_value() + }); + + choice(( + unit().map(PlaceOrValue::Value), + ident, + constant, + self.expr + .clone() + .delimited_by(just(Token::OpenParens), just(Token::CloseParens)), + self.block(), + )) + } + + fn create_function_decl( + &self, + ) -> impl Parser<'src, TokenInput<'src>, Index, ParserExtra> + Clone + use<'src, 'b> { + let ident = select! {Token::Ident(ident) => ident}; + + let param = select! {Token::Mutable => ()} + .or_not() + .then(ident) + .then_ignore(just(Token::Colon)) + .then(type_parser::()) + .map_with(|((mutable, name), param_type), e| { + e.state().push(AstNode::Parameter(Parameter { + mutable: mutable.is_some(), + name: name.to_string(), + param_type, })) - }) -} + }); -fn unit<'a>() -> impl Parser<'a, TokenInput<'a>, Index, ParserExtra> + Clone { - just(Token::OpenParens) - .ignored() - .ignore_then(just(Token::CloseParens)) - .map_with(|_, e: &mut MapExtra, ParserExtra>| { - e.state().push(AstNode::Constant { - ty: Intern::new(InnerType::Unit), - value: Intern::new(Value::Unit), - }) - }) -} + let params = param + .separated_by(just(Token::Comma)) + .allow_trailing() + .collect::>() + .delimited_by(just(Token::OpenParens), just(Token::CloseParens)) + .labelled("function parameters") + .map_with(|params, e: &mut E| { + e.state() + .push(AstNode::ParameterList(ParameterList { parameters: params })) + }); -type E<'a, 'b> = MapExtra<'a, 'b, TokenInput<'a>, ParserExtra>; + let ret_type = just(Token::MinusGreater) + .ignore_then(type_parser::()) + .or_not(); -fn simple_expr<'a, 'b>( - expr: Recursive, PlaceOrValue, ParserExtra>>, -) -> impl Parser<'a, TokenInput<'a>, PlaceOrValue, ParserExtra> + Clone { - let ident = select! {Token::Ident(ident) => ident}.map_with( - |ident, e: &mut MapExtra, ParserExtra>| { - e.state() - .push(AstNode::UnresolvedDeclRef { + attrs() + .or_not() + .then(visibility()) + .then_ignore(just(Token::Fn)) + .then(ident) + .then(params) + .then(ret_type) + .then(self.block()) + .map_with(|(((((attrs, vis), ident), params), ret), body), e| { + e.state().push(AstNode::FunctionDecl(FunctionDecl { + attrs, name: ident.to_string(), - }) - .as_place() - }, - ); - - let constant = select! { - Token::FloatingConstant(lexeme)| - Token::DotFloatingConstant(lexeme)| - Token::FloatingExpConstant(lexeme)| - Token::DotFloatingExpConstant(lexeme) => { - constants::parse_floating_constant(lexeme) - }, - Token::IntegerConstant(lexeme) => { - constants::parse_integer_constant(lexeme, Radix::Dec) - }, - tok @ Token::IntegerHexConstant(lexeme)| - tok @ Token::IntegerOctConstant(lexeme)| - tok @ Token::IntegerBinConstant(lexeme) => { - let radix = Radix::from_token(tok).unwrap(); - constants::parse_integer_constant(&lexeme[2..], radix) - }, + visibility: vis, + return_type: ret.unwrap_or_else(|| Intern::new(InnerType::Unit)), + parameter_list: params, + body: body.index(), + })) + }) } - .map_with(|(value, ty), e: &mut E| e.state().push(AstNode::Constant { ty, value }).as_value()); - choice(( - unit().map(PlaceOrValue::Value), - ident, - constant, - expr.delimited_by(just(Token::OpenParens), just(Token::CloseParens)), - block(), - )) -} + fn block( + &self, + ) -> impl Parser<'src, TokenInput<'src>, PlaceOrValue, ParserExtra> + Clone + use<'src, 'b> + { + self.expr + .clone() + .then_ignore(just(Token::Semi)) + .map(PlaceOrValue::index) + .repeated() + .collect::>() + .then(self.expr.clone().or_not()) + .delimited_by(just(Token::OpenBrace), just(Token::CloseBrace)) + .map_with(|(statements, expr), e: &mut E| { + expr.unwrap_or(PlaceOrValue::Value(Index(u32::MAX))) + .with_index(e.state().push(AstNode::Block { + statements, + expr: expr.map(PlaceOrValue::index), + })) + }) + } -fn expr<'a>() -> impl Parser<'a, TokenInput<'a>, PlaceOrValue, ParserExtra> + Clone { - let assignment = choice(( - just(Token::Equal), - just(Token::PlusEqual), - just(Token::MinusEqual), - just(Token::StarEqual), - just(Token::SlashEqual), - just(Token::PercentEqual), - just(Token::AmpersandEqual), - just(Token::PipeEqual), - just(Token::CaretEqual), - just(Token::LessLessEqual), - just(Token::GreaterGreaterEqual), - )); + fn create_expr( + &'_ self, + ) -> Box, PlaceOrValue, ParserExtra> + Clone + use<'src, 'b>> + { + let assignment = choice(( + just(Token::Equal), + just(Token::PlusEqual), + just(Token::MinusEqual), + just(Token::StarEqual), + just(Token::SlashEqual), + just(Token::PercentEqual), + just(Token::AmpersandEqual), + just(Token::PipeEqual), + just(Token::CaretEqual), + just(Token::LessLessEqual), + just(Token::GreaterGreaterEqual), + )); - let logical_or = just(Token::PipePipe); - let logical_and = just(Token::AmpersandAmpersand); + let logical_or = just(Token::PipePipe); + let logical_and = just(Token::AmpersandAmpersand); - let or = just(Token::Pipe); - let xor = just(Token::Caret); - let and = just(Token::Ampersand); + let or = just(Token::Pipe); + let xor = just(Token::Caret); + let and = just(Token::Ampersand); - let equality = choice((just(Token::BangEqual), just(Token::EqualEqual))); + let equality = choice((just(Token::BangEqual), just(Token::EqualEqual))); - let relational = choice(( - just(Token::LessEqual), - just(Token::Less), - just(Token::GreaterEqual), - just(Token::Greater), - )); + let relational = choice(( + just(Token::LessEqual), + just(Token::Less), + just(Token::GreaterEqual), + just(Token::Greater), + )); - let shift = choice((just(Token::LessLess), just(Token::GreaterGreater))); + let shift = choice((just(Token::LessLess), just(Token::GreaterGreater))); - let additive = choice((just(Token::Plus), just(Token::Minus))); - let multiplicative = choice((just(Token::Star), just(Token::Slash), just(Token::Percent))); + let additive = choice((just(Token::Plus), just(Token::Minus))); + let multiplicative = choice((just(Token::Star), just(Token::Slash), just(Token::Percent))); - let prefixes = choice(( - just(Token::Bang), - just(Token::Minus), - just(Token::Star), - just(Token::Ampersand), - )); + let prefixes = choice(( + just(Token::Bang), + just(Token::Minus), + just(Token::Star), + just(Token::Ampersand), + )); - let r#as = just(Token::As).ignore_then(type_parser::()); + let r#as = just(Token::As).ignore_then(type_parser::()); - fn into_value(idx: PlaceOrValue, e: &mut E) -> PlaceOrValue { - match idx { - PlaceOrValue::Place(index) => e.state().push(AstNode::PlaceToValue { expr: index }), - PlaceOrValue::Value(index) => index, + fn into_value(idx: PlaceOrValue, e: &mut E) -> PlaceOrValue { + match idx { + PlaceOrValue::Place(index) => e.state().push(AstNode::PlaceToValue { expr: index }), + PlaceOrValue::Value(index) => index, + } + .as_value() } - .as_value() - } - fn into_place(idx: PlaceOrValue, e: &mut E) -> PlaceOrValue { - match idx { - PlaceOrValue::Value(index) => e.state().push(AstNode::ValueToPlace { expr: index }), - PlaceOrValue::Place(index) => index, + fn into_place(idx: PlaceOrValue, e: &mut E) -> PlaceOrValue { + match idx { + PlaceOrValue::Value(index) => e.state().push(AstNode::ValueToPlace { expr: index }), + PlaceOrValue::Place(index) => index, + } + .as_place() } - .as_place() - } - // TODO: postfix: function call, field access, array subscript - recursive(move |_expr| { - let simple = simple_expr(_expr.clone()); + // TODO: postfix: function call, field access, array subscript + let _expr = self.expr.clone(); let subscript = _expr .clone() @@ -780,7 +820,7 @@ fn expr<'a>() -> impl Parser<'a, TokenInput<'a>, PlaceOrValue, ParserExtra> + Cl let field = just(Token::Dot).ignore_then(select! {Token::Ident(ident) => ident}); - let assignment_expr = simple.pratt(( + let assignment_expr = self.simple_expr().pratt(( postfix(100, subscript, |expr, index: PlaceOrValue, e: &mut E| { let node = AstNode::Subscript { expr: into_value(expr, e).index(), @@ -964,10 +1004,53 @@ fn expr<'a>() -> impl Parser<'a, TokenInput<'a>, PlaceOrValue, ParserExtra> + Cl let expr = choice((if_expr, assignment_expr)).labelled("expression"); - Arc::new(expr) - }) + Box::new(expr) + } + + fn global_decl(&self) -> impl Parser<'src, TokenInput<'src>, Index, ParserExtra> + Clone { + let ident = select! {Token::Ident(ident) => ident}; + + attrs() + .or_not() + .then(visibility()) + .then_ignore(just(Token::Let)) + .then(ident) + .then_ignore(just(Token::Colon)) + .then(type_parser::()) + .then_ignore(just(Token::Equal)) + .then(self.expr.clone()) + .then_ignore(just(Token::Semi)) + .map_with(|((((_attrs, _vis), name), var_type), value), e| { + e.state().push(AstNode::GlobalDecl { + name: name.to_string(), + var_type, + expr: value.index(), + }) + }) + } + + fn file(&self) -> impl Parser<'src, TokenInput<'src>, Index, ParserExtra> + Clone { + choice((self.function.clone(), self.global_decl())) + .repeated() + .collect::>() + .map_with(|decls, e: &mut E| e.state().push(AstNode::File { decls })) + } } +fn unit<'a>() -> impl Parser<'a, TokenInput<'a>, Index, ParserExtra> + Clone { + just(Token::OpenParens) + .ignored() + .ignore_then(just(Token::CloseParens)) + .map_with(|_, e: &mut MapExtra, ParserExtra>| { + e.state().push(AstNode::Constant { + ty: Intern::new(InnerType::Unit), + value: Intern::new(Value::Unit), + }) + }) +} + +type E<'a, 'b> = MapExtra<'a, 'b, TokenInput<'a>, ParserExtra>; + fn attrs<'a>() -> impl Parser<'a, TokenInput<'a>, Index, ParserExtra> + Clone { let docs = select! { Token::DocComment(doc) => doc }.map_with(|doc, e: &mut E| { e.state().push(AstNode::Doc { @@ -981,86 +1064,6 @@ fn attrs<'a>() -> impl Parser<'a, TokenInput<'a>, Index, ParserExtra> + Clone { .map_with(|attrs, e: &mut E| e.state().push(AstNode::Attributes { attrs })) } -fn function_decl<'a>() -> impl Parser<'a, TokenInput<'a>, Index, ParserExtra> + Clone { - let ident = select! {Token::Ident(ident) => ident}; - - let param = select! {Token::Mutable => ()} - .or_not() - .then(ident) - .then_ignore(just(Token::Colon)) - .then(type_parser::()) - .map_with(|((mutable, name), param_type), e| { - e.state().push(AstNode::Parameter(Parameter { - mutable: mutable.is_some(), - name: name.to_string(), - param_type, - })) - }); - - let params = param - .separated_by(just(Token::Comma)) - .allow_trailing() - .collect::>() - .delimited_by(just(Token::OpenParens), just(Token::CloseParens)) - .labelled("function parameters") - .map_with(|params, e: &mut E| { - e.state() - .push(AstNode::ParameterList(ParameterList { parameters: params })) - }); - - let ret_type = just(Token::MinusGreater) - .ignore_then(type_parser::()) - .or_not(); - - attrs() - .or_not() - .then(visibility()) - .then_ignore(just(Token::Fn)) - .then(ident) - .then(params) - .then(ret_type) - .then(block()) - .map_with(|(((((attrs, vis), ident), params), ret), body), e| { - e.state().push(AstNode::FunctionDecl(FunctionDecl { - attrs, - name: ident.to_string(), - visibility: vis, - return_type: ret.unwrap_or_else(|| Intern::new(InnerType::Unit)), - parameter_list: params, - body: body.index(), - })) - }) -} - -fn global_decl<'a>() -> impl Parser<'a, TokenInput<'a>, Index, ParserExtra> + Clone { - let ident = select! {Token::Ident(ident) => ident}; - - attrs() - .or_not() - .then(visibility()) - .then_ignore(just(Token::Let)) - .then(ident) - .then_ignore(just(Token::Colon)) - .then(type_parser::()) - .then_ignore(just(Token::Equal)) - .then(expr()) - .then_ignore(just(Token::Semi)) - .map_with(|((((_attrs, _vis), name), var_type), value), e| { - e.state().push(AstNode::GlobalDecl { - name: name.to_string(), - var_type, - expr: value.index(), - }) - }) -} - -fn file<'a>() -> impl Parser<'a, TokenInput<'a>, Index, ParserExtra> + Clone { - choice((function_decl(), global_decl())) - .repeated() - .collect::>() - .map_with(|decls, e: &mut E| e.state().push(AstNode::File { decls })) -} - mod constants; mod pretty; @@ -1130,27 +1133,20 @@ mod tests { #[test] fn parse_exprs() { + let ctx = crate::ParserCtx::new(); let print_ast = |tokens| { let mut state = SimpleState(Ast::new()); - let out = crate::expr().parse_with_state(tokens, &mut state).unwrap(); + let out = ctx.function().parse_with_state(tokens, &mut state).unwrap(); let ast = state.0; let mut pretty = pretty::PrettyPrint::new(); pretty.print(&ast); }; - print_ast(new_token_input("()")); - print_ast(new_token_input("!() as i32")); - print_ast(new_token_input("1 << 2 & 3")); print_ast(new_token_input( r#" fn my_function(a: i32, b: *const u8) -> i32 { - let x: i32; x = a + 1; - if (x < *b as i32) { - return x; - } else { - return 10; - } + x } "#, )); diff --git a/crates/parser/src/pretty.rs b/crates/parser/src/pretty.rs index e862aba..a841442 100644 --- a/crates/parser/src/pretty.rs +++ b/crates/parser/src/pretty.rs @@ -165,7 +165,20 @@ impl PrettyPrint { name, var_type, } => todo!(), - AstNode::Assignment { dest, expr } => todo!(), + AstNode::Assignment { dest, expr } => { + self.push_line(format!("{}", node_name(node),)); + self.indents.push(Indent::Vertical); + self.push_line("DEST".to_string()); + self.with_indent(core::iter::once(*dest), |this, idx| { + this.stuff(ast, idx); + }); + *self.indents.last_mut().unwrap() = Indent::End; + self.push_line("EXPR".to_string()); + self.with_indent(core::iter::once(*dest), |this, idx| { + this.stuff(ast, idx); + }); + self.indents.pop(); + } AstNode::GlobalDecl { name, var_type,