parsing with indirect recursion

This commit is contained in:
janis 2025-10-13 23:08:15 +02:00
parent 2771593605
commit c7d5e4e6c0
Signed by: janis
SSH key fingerprint: SHA256:bB1qbbqmDXZNT0KKD5c2Dfjg53JGhj7B3CFcLIzSqq8
2 changed files with 270 additions and 261 deletions

View file

@ -7,7 +7,7 @@ use chumsky::{
input::{IterInput, MapExtra}, input::{IterInput, MapExtra},
pratt::{infix, left, postfix, prefix, right}, pratt::{infix, left, postfix, prefix, right},
prelude::{Recursive, choice, just, recursive}, prelude::{Recursive, choice, just, recursive},
recursive::Direct, recursive::{Direct, Indirect},
select, text, select, text,
}; };
use internment::Intern; use internment::Intern;
@ -569,195 +569,235 @@ fn visibility<'a>() -> impl Parser<'a, TokenInput<'a>, Visibility, ParserExtra>
.map(|v| v.unwrap_or(Visibility::Private)) .map(|v| v.unwrap_or(Visibility::Private))
} }
fn func_parser<'a>() -> impl Parser<'a, TokenInput<'a>, Index, ParserExtra> {
let ident = select! {Token::Ident(ident) => ident};
let param = just(Token::Mutable)
.to(())
.or_not()
.then(ident)
.then_ignore(just(Token::Colon))
.then(type_parser::<ParserExtra>())
.map_with(|((mutable, name), param_type), e| {
e.state().push(AstNode::Parameter(Parameter {
mutable: mutable.is_some(),
name: name.to_string(),
param_type,
}))
});
let params = param
.separated_by(just(Token::Comma))
.allow_trailing()
.collect::<Vec<_>>()
.delimited_by(just(Token::OpenParens), just(Token::CloseParens))
.labelled("function parameters")
.map_with(|params, e: &mut E| {
e.state()
.push(AstNode::ParameterList(ParameterList { parameters: params }))
});
visibility()
.then_ignore(just(Token::Fn))
.then(ident)
.then(params)
// optional return type
.then(
just(Token::MinusGreater)
.ignore_then(type_parser())
.or_not(),
)
.then(block())
.map_with(|((((vis, ident), params), ret), body), e| {
e.state().push(AstNode::FunctionDecl(FunctionDecl {
attrs: None,
name: ident.to_string(),
visibility: vis,
return_type: ret.unwrap_or_else(|| Intern::new(InnerType::Unit)),
parameter_list: params,
body: body.index(),
}))
})
}
type ParserExtra = chumsky::extra::Full<EmptyErr, SimpleState<Ast>, ()>; type ParserExtra = chumsky::extra::Full<EmptyErr, SimpleState<Ast>, ()>;
fn block<'a>() -> impl Parser<'a, TokenInput<'a>, PlaceOrValue, ParserExtra> + Clone { struct ParserCtx<'src, 'b> {
expr() expr: Recursive<Indirect<'src, 'b, TokenInput<'src>, PlaceOrValue, ParserExtra>>,
.then_ignore(just(Token::Semi)) function: Recursive<Indirect<'src, 'b, TokenInput<'src>, Index, ParserExtra>>,
.map(PlaceOrValue::index) }
.repeated()
.collect::<Vec<_>>() impl<'src, 'b> ParserCtx<'src, 'b>
.then(expr().or_not()) where
.delimited_by(just(Token::OpenBrace), just(Token::CloseBrace)) 'src: 'b,
.map_with(|(statements, expr), e: &mut E| { {
expr.unwrap_or(PlaceOrValue::Value(Index(u32::MAX))) fn new() -> Self {
.with_index(e.state().push(AstNode::Block { let mut this = Self {
statements, expr: Recursive::declare(),
expr: expr.map(PlaceOrValue::index), function: Recursive::declare(),
};
let function = this.create_function_decl();
this.function.define(function);
let expr = this.create_expr();
this.expr.define(expr);
this
}
pub fn expr(&self) -> impl Parser<'src, TokenInput<'src>, PlaceOrValue, ParserExtra> + Clone {
self.expr.clone()
}
pub fn function(&self) -> impl Parser<'src, TokenInput<'src>, Index, ParserExtra> + Clone {
self.function.clone()
}
fn stmt(&self) -> impl Parser<'src, TokenInput<'src>, Index, ParserExtra> + Clone {
choice((
self.function.clone(),
self.expr
.clone()
.then_ignore(just(Token::Semi))
.map(PlaceOrValue::index),
))
}
fn simple_expr(
&self,
) -> impl Parser<'src, TokenInput<'src>, PlaceOrValue, ParserExtra> + Clone + use<'src, 'b>
{
let ident = select! {Token::Ident(ident) => ident}.map_with(
|ident, e: &mut MapExtra<TokenInput<'src>, ParserExtra>| {
e.state()
.push(AstNode::UnresolvedDeclRef {
name: ident.to_string(),
})
.as_place()
},
);
let constant = select! {
Token::FloatingConstant(lexeme)|
Token::DotFloatingConstant(lexeme)|
Token::FloatingExpConstant(lexeme)|
Token::DotFloatingExpConstant(lexeme) => {
constants::parse_floating_constant(lexeme)
},
Token::IntegerConstant(lexeme) => {
constants::parse_integer_constant(lexeme, Radix::Dec)
},
tok @ Token::IntegerHexConstant(lexeme)|
tok @ Token::IntegerOctConstant(lexeme)|
tok @ Token::IntegerBinConstant(lexeme) => {
let radix = Radix::from_token(tok).unwrap();
constants::parse_integer_constant(&lexeme[2..], radix)
},
}
.map_with(|(value, ty), e: &mut E| {
e.state().push(AstNode::Constant { ty, value }).as_value()
});
choice((
unit().map(PlaceOrValue::Value),
ident,
constant,
self.expr
.clone()
.delimited_by(just(Token::OpenParens), just(Token::CloseParens)),
self.block(),
))
}
fn create_function_decl(
&self,
) -> impl Parser<'src, TokenInput<'src>, Index, ParserExtra> + Clone + use<'src, 'b> {
let ident = select! {Token::Ident(ident) => ident};
let param = select! {Token::Mutable => ()}
.or_not()
.then(ident)
.then_ignore(just(Token::Colon))
.then(type_parser::<ParserExtra>())
.map_with(|((mutable, name), param_type), e| {
e.state().push(AstNode::Parameter(Parameter {
mutable: mutable.is_some(),
name: name.to_string(),
param_type,
})) }))
}) });
}
fn unit<'a>() -> impl Parser<'a, TokenInput<'a>, Index, ParserExtra> + Clone { let params = param
just(Token::OpenParens) .separated_by(just(Token::Comma))
.ignored() .allow_trailing()
.ignore_then(just(Token::CloseParens)) .collect::<Vec<_>>()
.map_with(|_, e: &mut MapExtra<TokenInput<'a>, ParserExtra>| { .delimited_by(just(Token::OpenParens), just(Token::CloseParens))
e.state().push(AstNode::Constant { .labelled("function parameters")
ty: Intern::new(InnerType::Unit), .map_with(|params, e: &mut E| {
value: Intern::new(Value::Unit), e.state()
}) .push(AstNode::ParameterList(ParameterList { parameters: params }))
}) });
}
type E<'a, 'b> = MapExtra<'a, 'b, TokenInput<'a>, ParserExtra>; let ret_type = just(Token::MinusGreater)
.ignore_then(type_parser::<ParserExtra>())
.or_not();
fn simple_expr<'a, 'b>( attrs()
expr: Recursive<Direct<'a, 'b, TokenInput<'a>, PlaceOrValue, ParserExtra>>, .or_not()
) -> impl Parser<'a, TokenInput<'a>, PlaceOrValue, ParserExtra> + Clone { .then(visibility())
let ident = select! {Token::Ident(ident) => ident}.map_with( .then_ignore(just(Token::Fn))
|ident, e: &mut MapExtra<TokenInput<'a>, ParserExtra>| { .then(ident)
e.state() .then(params)
.push(AstNode::UnresolvedDeclRef { .then(ret_type)
.then(self.block())
.map_with(|(((((attrs, vis), ident), params), ret), body), e| {
e.state().push(AstNode::FunctionDecl(FunctionDecl {
attrs,
name: ident.to_string(), name: ident.to_string(),
}) visibility: vis,
.as_place() return_type: ret.unwrap_or_else(|| Intern::new(InnerType::Unit)),
}, parameter_list: params,
); body: body.index(),
}))
let constant = select! { })
Token::FloatingConstant(lexeme)|
Token::DotFloatingConstant(lexeme)|
Token::FloatingExpConstant(lexeme)|
Token::DotFloatingExpConstant(lexeme) => {
constants::parse_floating_constant(lexeme)
},
Token::IntegerConstant(lexeme) => {
constants::parse_integer_constant(lexeme, Radix::Dec)
},
tok @ Token::IntegerHexConstant(lexeme)|
tok @ Token::IntegerOctConstant(lexeme)|
tok @ Token::IntegerBinConstant(lexeme) => {
let radix = Radix::from_token(tok).unwrap();
constants::parse_integer_constant(&lexeme[2..], radix)
},
} }
.map_with(|(value, ty), e: &mut E| e.state().push(AstNode::Constant { ty, value }).as_value());
choice(( fn block(
unit().map(PlaceOrValue::Value), &self,
ident, ) -> impl Parser<'src, TokenInput<'src>, PlaceOrValue, ParserExtra> + Clone + use<'src, 'b>
constant, {
expr.delimited_by(just(Token::OpenParens), just(Token::CloseParens)), self.expr
block(), .clone()
)) .then_ignore(just(Token::Semi))
} .map(PlaceOrValue::index)
.repeated()
.collect::<Vec<_>>()
.then(self.expr.clone().or_not())
.delimited_by(just(Token::OpenBrace), just(Token::CloseBrace))
.map_with(|(statements, expr), e: &mut E| {
expr.unwrap_or(PlaceOrValue::Value(Index(u32::MAX)))
.with_index(e.state().push(AstNode::Block {
statements,
expr: expr.map(PlaceOrValue::index),
}))
})
}
fn expr<'a>() -> impl Parser<'a, TokenInput<'a>, PlaceOrValue, ParserExtra> + Clone { fn create_expr(
let assignment = choice(( &'_ self,
just(Token::Equal), ) -> Box<impl Parser<'src, TokenInput<'src>, PlaceOrValue, ParserExtra> + Clone + use<'src, 'b>>
just(Token::PlusEqual), {
just(Token::MinusEqual), let assignment = choice((
just(Token::StarEqual), just(Token::Equal),
just(Token::SlashEqual), just(Token::PlusEqual),
just(Token::PercentEqual), just(Token::MinusEqual),
just(Token::AmpersandEqual), just(Token::StarEqual),
just(Token::PipeEqual), just(Token::SlashEqual),
just(Token::CaretEqual), just(Token::PercentEqual),
just(Token::LessLessEqual), just(Token::AmpersandEqual),
just(Token::GreaterGreaterEqual), just(Token::PipeEqual),
)); just(Token::CaretEqual),
just(Token::LessLessEqual),
just(Token::GreaterGreaterEqual),
));
let logical_or = just(Token::PipePipe); let logical_or = just(Token::PipePipe);
let logical_and = just(Token::AmpersandAmpersand); let logical_and = just(Token::AmpersandAmpersand);
let or = just(Token::Pipe); let or = just(Token::Pipe);
let xor = just(Token::Caret); let xor = just(Token::Caret);
let and = just(Token::Ampersand); let and = just(Token::Ampersand);
let equality = choice((just(Token::BangEqual), just(Token::EqualEqual))); let equality = choice((just(Token::BangEqual), just(Token::EqualEqual)));
let relational = choice(( let relational = choice((
just(Token::LessEqual), just(Token::LessEqual),
just(Token::Less), just(Token::Less),
just(Token::GreaterEqual), just(Token::GreaterEqual),
just(Token::Greater), just(Token::Greater),
)); ));
let shift = choice((just(Token::LessLess), just(Token::GreaterGreater))); let shift = choice((just(Token::LessLess), just(Token::GreaterGreater)));
let additive = choice((just(Token::Plus), just(Token::Minus))); let additive = choice((just(Token::Plus), just(Token::Minus)));
let multiplicative = choice((just(Token::Star), just(Token::Slash), just(Token::Percent))); let multiplicative = choice((just(Token::Star), just(Token::Slash), just(Token::Percent)));
let prefixes = choice(( let prefixes = choice((
just(Token::Bang), just(Token::Bang),
just(Token::Minus), just(Token::Minus),
just(Token::Star), just(Token::Star),
just(Token::Ampersand), just(Token::Ampersand),
)); ));
let r#as = just(Token::As).ignore_then(type_parser::<ParserExtra>()); let r#as = just(Token::As).ignore_then(type_parser::<ParserExtra>());
fn into_value(idx: PlaceOrValue, e: &mut E) -> PlaceOrValue { fn into_value(idx: PlaceOrValue, e: &mut E) -> PlaceOrValue {
match idx { match idx {
PlaceOrValue::Place(index) => e.state().push(AstNode::PlaceToValue { expr: index }), PlaceOrValue::Place(index) => e.state().push(AstNode::PlaceToValue { expr: index }),
PlaceOrValue::Value(index) => index, PlaceOrValue::Value(index) => index,
}
.as_value()
} }
.as_value()
}
fn into_place(idx: PlaceOrValue, e: &mut E) -> PlaceOrValue { fn into_place(idx: PlaceOrValue, e: &mut E) -> PlaceOrValue {
match idx { match idx {
PlaceOrValue::Value(index) => e.state().push(AstNode::ValueToPlace { expr: index }), PlaceOrValue::Value(index) => e.state().push(AstNode::ValueToPlace { expr: index }),
PlaceOrValue::Place(index) => index, PlaceOrValue::Place(index) => index,
}
.as_place()
} }
.as_place()
}
// TODO: postfix: function call, field access, array subscript // TODO: postfix: function call, field access, array subscript
recursive(move |_expr| { let _expr = self.expr.clone();
let simple = simple_expr(_expr.clone());
let subscript = _expr let subscript = _expr
.clone() .clone()
@ -780,7 +820,7 @@ fn expr<'a>() -> impl Parser<'a, TokenInput<'a>, PlaceOrValue, ParserExtra> + Cl
let field = just(Token::Dot).ignore_then(select! {Token::Ident(ident) => ident}); let field = just(Token::Dot).ignore_then(select! {Token::Ident(ident) => ident});
let assignment_expr = simple.pratt(( let assignment_expr = self.simple_expr().pratt((
postfix(100, subscript, |expr, index: PlaceOrValue, e: &mut E| { postfix(100, subscript, |expr, index: PlaceOrValue, e: &mut E| {
let node = AstNode::Subscript { let node = AstNode::Subscript {
expr: into_value(expr, e).index(), expr: into_value(expr, e).index(),
@ -964,10 +1004,53 @@ fn expr<'a>() -> impl Parser<'a, TokenInput<'a>, PlaceOrValue, ParserExtra> + Cl
let expr = choice((if_expr, assignment_expr)).labelled("expression"); let expr = choice((if_expr, assignment_expr)).labelled("expression");
Arc::new(expr) Box::new(expr)
}) }
fn global_decl(&self) -> impl Parser<'src, TokenInput<'src>, Index, ParserExtra> + Clone {
let ident = select! {Token::Ident(ident) => ident};
attrs()
.or_not()
.then(visibility())
.then_ignore(just(Token::Let))
.then(ident)
.then_ignore(just(Token::Colon))
.then(type_parser::<ParserExtra>())
.then_ignore(just(Token::Equal))
.then(self.expr.clone())
.then_ignore(just(Token::Semi))
.map_with(|((((_attrs, _vis), name), var_type), value), e| {
e.state().push(AstNode::GlobalDecl {
name: name.to_string(),
var_type,
expr: value.index(),
})
})
}
fn file(&self) -> impl Parser<'src, TokenInput<'src>, Index, ParserExtra> + Clone {
choice((self.function.clone(), self.global_decl()))
.repeated()
.collect::<Vec<_>>()
.map_with(|decls, e: &mut E| e.state().push(AstNode::File { decls }))
}
} }
fn unit<'a>() -> impl Parser<'a, TokenInput<'a>, Index, ParserExtra> + Clone {
just(Token::OpenParens)
.ignored()
.ignore_then(just(Token::CloseParens))
.map_with(|_, e: &mut MapExtra<TokenInput<'a>, ParserExtra>| {
e.state().push(AstNode::Constant {
ty: Intern::new(InnerType::Unit),
value: Intern::new(Value::Unit),
})
})
}
type E<'a, 'b> = MapExtra<'a, 'b, TokenInput<'a>, ParserExtra>;
fn attrs<'a>() -> impl Parser<'a, TokenInput<'a>, Index, ParserExtra> + Clone { fn attrs<'a>() -> impl Parser<'a, TokenInput<'a>, Index, ParserExtra> + Clone {
let docs = select! { Token::DocComment(doc) => doc }.map_with(|doc, e: &mut E| { let docs = select! { Token::DocComment(doc) => doc }.map_with(|doc, e: &mut E| {
e.state().push(AstNode::Doc { e.state().push(AstNode::Doc {
@ -981,86 +1064,6 @@ fn attrs<'a>() -> impl Parser<'a, TokenInput<'a>, Index, ParserExtra> + Clone {
.map_with(|attrs, e: &mut E| e.state().push(AstNode::Attributes { attrs })) .map_with(|attrs, e: &mut E| e.state().push(AstNode::Attributes { attrs }))
} }
fn function_decl<'a>() -> impl Parser<'a, TokenInput<'a>, Index, ParserExtra> + Clone {
let ident = select! {Token::Ident(ident) => ident};
let param = select! {Token::Mutable => ()}
.or_not()
.then(ident)
.then_ignore(just(Token::Colon))
.then(type_parser::<ParserExtra>())
.map_with(|((mutable, name), param_type), e| {
e.state().push(AstNode::Parameter(Parameter {
mutable: mutable.is_some(),
name: name.to_string(),
param_type,
}))
});
let params = param
.separated_by(just(Token::Comma))
.allow_trailing()
.collect::<Vec<_>>()
.delimited_by(just(Token::OpenParens), just(Token::CloseParens))
.labelled("function parameters")
.map_with(|params, e: &mut E| {
e.state()
.push(AstNode::ParameterList(ParameterList { parameters: params }))
});
let ret_type = just(Token::MinusGreater)
.ignore_then(type_parser::<ParserExtra>())
.or_not();
attrs()
.or_not()
.then(visibility())
.then_ignore(just(Token::Fn))
.then(ident)
.then(params)
.then(ret_type)
.then(block())
.map_with(|(((((attrs, vis), ident), params), ret), body), e| {
e.state().push(AstNode::FunctionDecl(FunctionDecl {
attrs,
name: ident.to_string(),
visibility: vis,
return_type: ret.unwrap_or_else(|| Intern::new(InnerType::Unit)),
parameter_list: params,
body: body.index(),
}))
})
}
fn global_decl<'a>() -> impl Parser<'a, TokenInput<'a>, Index, ParserExtra> + Clone {
let ident = select! {Token::Ident(ident) => ident};
attrs()
.or_not()
.then(visibility())
.then_ignore(just(Token::Let))
.then(ident)
.then_ignore(just(Token::Colon))
.then(type_parser::<ParserExtra>())
.then_ignore(just(Token::Equal))
.then(expr())
.then_ignore(just(Token::Semi))
.map_with(|((((_attrs, _vis), name), var_type), value), e| {
e.state().push(AstNode::GlobalDecl {
name: name.to_string(),
var_type,
expr: value.index(),
})
})
}
fn file<'a>() -> impl Parser<'a, TokenInput<'a>, Index, ParserExtra> + Clone {
choice((function_decl(), global_decl()))
.repeated()
.collect::<Vec<_>>()
.map_with(|decls, e: &mut E| e.state().push(AstNode::File { decls }))
}
mod constants; mod constants;
mod pretty; mod pretty;
@ -1130,27 +1133,20 @@ mod tests {
#[test] #[test]
fn parse_exprs() { fn parse_exprs() {
let ctx = crate::ParserCtx::new();
let print_ast = |tokens| { let print_ast = |tokens| {
let mut state = SimpleState(Ast::new()); let mut state = SimpleState(Ast::new());
let out = crate::expr().parse_with_state(tokens, &mut state).unwrap(); let out = ctx.function().parse_with_state(tokens, &mut state).unwrap();
let ast = state.0; let ast = state.0;
let mut pretty = pretty::PrettyPrint::new(); let mut pretty = pretty::PrettyPrint::new();
pretty.print(&ast); pretty.print(&ast);
}; };
print_ast(new_token_input("()"));
print_ast(new_token_input("!() as i32"));
print_ast(new_token_input("1 << 2 & 3"));
print_ast(new_token_input( print_ast(new_token_input(
r#" r#"
fn my_function(a: i32, b: *const u8) -> i32 { fn my_function(a: i32, b: *const u8) -> i32 {
let x: i32;
x = a + 1; x = a + 1;
if (x < *b as i32) { x
return x;
} else {
return 10;
}
} }
"#, "#,
)); ));

View file

@ -165,7 +165,20 @@ impl PrettyPrint {
name, name,
var_type, var_type,
} => todo!(), } => todo!(),
AstNode::Assignment { dest, expr } => todo!(), AstNode::Assignment { dest, expr } => {
self.push_line(format!("{}", node_name(node),));
self.indents.push(Indent::Vertical);
self.push_line("DEST".to_string());
self.with_indent(core::iter::once(*dest), |this, idx| {
this.stuff(ast, idx);
});
*self.indents.last_mut().unwrap() = Indent::End;
self.push_line("EXPR".to_string());
self.with_indent(core::iter::once(*dest), |this, idx| {
this.stuff(ast, idx);
});
self.indents.pop();
}
AstNode::GlobalDecl { AstNode::GlobalDecl {
name, name,
var_type, var_type,