parsing with indirect recursion

This commit is contained in:
janis 2025-10-13 23:08:15 +02:00
parent 2771593605
commit c7d5e4e6c0
Signed by: janis
SSH key fingerprint: SHA256:bB1qbbqmDXZNT0KKD5c2Dfjg53JGhj7B3CFcLIzSqq8
2 changed files with 270 additions and 261 deletions

View file

@ -7,7 +7,7 @@ use chumsky::{
input::{IterInput, MapExtra},
pratt::{infix, left, postfix, prefix, right},
prelude::{Recursive, choice, just, recursive},
recursive::Direct,
recursive::{Direct, Indirect},
select, text,
};
use internment::Intern;
@ -569,195 +569,235 @@ fn visibility<'a>() -> impl Parser<'a, TokenInput<'a>, Visibility, ParserExtra>
.map(|v| v.unwrap_or(Visibility::Private))
}
fn func_parser<'a>() -> impl Parser<'a, TokenInput<'a>, Index, ParserExtra> {
let ident = select! {Token::Ident(ident) => ident};
let param = just(Token::Mutable)
.to(())
.or_not()
.then(ident)
.then_ignore(just(Token::Colon))
.then(type_parser::<ParserExtra>())
.map_with(|((mutable, name), param_type), e| {
e.state().push(AstNode::Parameter(Parameter {
mutable: mutable.is_some(),
name: name.to_string(),
param_type,
}))
});
let params = param
.separated_by(just(Token::Comma))
.allow_trailing()
.collect::<Vec<_>>()
.delimited_by(just(Token::OpenParens), just(Token::CloseParens))
.labelled("function parameters")
.map_with(|params, e: &mut E| {
e.state()
.push(AstNode::ParameterList(ParameterList { parameters: params }))
});
visibility()
.then_ignore(just(Token::Fn))
.then(ident)
.then(params)
// optional return type
.then(
just(Token::MinusGreater)
.ignore_then(type_parser())
.or_not(),
)
.then(block())
.map_with(|((((vis, ident), params), ret), body), e| {
e.state().push(AstNode::FunctionDecl(FunctionDecl {
attrs: None,
name: ident.to_string(),
visibility: vis,
return_type: ret.unwrap_or_else(|| Intern::new(InnerType::Unit)),
parameter_list: params,
body: body.index(),
}))
})
}
type ParserExtra = chumsky::extra::Full<EmptyErr, SimpleState<Ast>, ()>;
fn block<'a>() -> impl Parser<'a, TokenInput<'a>, PlaceOrValue, ParserExtra> + Clone {
expr()
.then_ignore(just(Token::Semi))
.map(PlaceOrValue::index)
.repeated()
.collect::<Vec<_>>()
.then(expr().or_not())
.delimited_by(just(Token::OpenBrace), just(Token::CloseBrace))
.map_with(|(statements, expr), e: &mut E| {
expr.unwrap_or(PlaceOrValue::Value(Index(u32::MAX)))
.with_index(e.state().push(AstNode::Block {
statements,
expr: expr.map(PlaceOrValue::index),
struct ParserCtx<'src, 'b> {
expr: Recursive<Indirect<'src, 'b, TokenInput<'src>, PlaceOrValue, ParserExtra>>,
function: Recursive<Indirect<'src, 'b, TokenInput<'src>, Index, ParserExtra>>,
}
impl<'src, 'b> ParserCtx<'src, 'b>
where
'src: 'b,
{
fn new() -> Self {
let mut this = Self {
expr: Recursive::declare(),
function: Recursive::declare(),
};
let function = this.create_function_decl();
this.function.define(function);
let expr = this.create_expr();
this.expr.define(expr);
this
}
pub fn expr(&self) -> impl Parser<'src, TokenInput<'src>, PlaceOrValue, ParserExtra> + Clone {
self.expr.clone()
}
pub fn function(&self) -> impl Parser<'src, TokenInput<'src>, Index, ParserExtra> + Clone {
self.function.clone()
}
fn stmt(&self) -> impl Parser<'src, TokenInput<'src>, Index, ParserExtra> + Clone {
choice((
self.function.clone(),
self.expr
.clone()
.then_ignore(just(Token::Semi))
.map(PlaceOrValue::index),
))
}
fn simple_expr(
&self,
) -> impl Parser<'src, TokenInput<'src>, PlaceOrValue, ParserExtra> + Clone + use<'src, 'b>
{
let ident = select! {Token::Ident(ident) => ident}.map_with(
|ident, e: &mut MapExtra<TokenInput<'src>, ParserExtra>| {
e.state()
.push(AstNode::UnresolvedDeclRef {
name: ident.to_string(),
})
.as_place()
},
);
let constant = select! {
Token::FloatingConstant(lexeme)|
Token::DotFloatingConstant(lexeme)|
Token::FloatingExpConstant(lexeme)|
Token::DotFloatingExpConstant(lexeme) => {
constants::parse_floating_constant(lexeme)
},
Token::IntegerConstant(lexeme) => {
constants::parse_integer_constant(lexeme, Radix::Dec)
},
tok @ Token::IntegerHexConstant(lexeme)|
tok @ Token::IntegerOctConstant(lexeme)|
tok @ Token::IntegerBinConstant(lexeme) => {
let radix = Radix::from_token(tok).unwrap();
constants::parse_integer_constant(&lexeme[2..], radix)
},
}
.map_with(|(value, ty), e: &mut E| {
e.state().push(AstNode::Constant { ty, value }).as_value()
});
choice((
unit().map(PlaceOrValue::Value),
ident,
constant,
self.expr
.clone()
.delimited_by(just(Token::OpenParens), just(Token::CloseParens)),
self.block(),
))
}
fn create_function_decl(
&self,
) -> impl Parser<'src, TokenInput<'src>, Index, ParserExtra> + Clone + use<'src, 'b> {
let ident = select! {Token::Ident(ident) => ident};
let param = select! {Token::Mutable => ()}
.or_not()
.then(ident)
.then_ignore(just(Token::Colon))
.then(type_parser::<ParserExtra>())
.map_with(|((mutable, name), param_type), e| {
e.state().push(AstNode::Parameter(Parameter {
mutable: mutable.is_some(),
name: name.to_string(),
param_type,
}))
})
}
});
fn unit<'a>() -> impl Parser<'a, TokenInput<'a>, Index, ParserExtra> + Clone {
just(Token::OpenParens)
.ignored()
.ignore_then(just(Token::CloseParens))
.map_with(|_, e: &mut MapExtra<TokenInput<'a>, ParserExtra>| {
e.state().push(AstNode::Constant {
ty: Intern::new(InnerType::Unit),
value: Intern::new(Value::Unit),
})
})
}
let params = param
.separated_by(just(Token::Comma))
.allow_trailing()
.collect::<Vec<_>>()
.delimited_by(just(Token::OpenParens), just(Token::CloseParens))
.labelled("function parameters")
.map_with(|params, e: &mut E| {
e.state()
.push(AstNode::ParameterList(ParameterList { parameters: params }))
});
type E<'a, 'b> = MapExtra<'a, 'b, TokenInput<'a>, ParserExtra>;
let ret_type = just(Token::MinusGreater)
.ignore_then(type_parser::<ParserExtra>())
.or_not();
fn simple_expr<'a, 'b>(
expr: Recursive<Direct<'a, 'b, TokenInput<'a>, PlaceOrValue, ParserExtra>>,
) -> impl Parser<'a, TokenInput<'a>, PlaceOrValue, ParserExtra> + Clone {
let ident = select! {Token::Ident(ident) => ident}.map_with(
|ident, e: &mut MapExtra<TokenInput<'a>, ParserExtra>| {
e.state()
.push(AstNode::UnresolvedDeclRef {
attrs()
.or_not()
.then(visibility())
.then_ignore(just(Token::Fn))
.then(ident)
.then(params)
.then(ret_type)
.then(self.block())
.map_with(|(((((attrs, vis), ident), params), ret), body), e| {
e.state().push(AstNode::FunctionDecl(FunctionDecl {
attrs,
name: ident.to_string(),
})
.as_place()
},
);
let constant = select! {
Token::FloatingConstant(lexeme)|
Token::DotFloatingConstant(lexeme)|
Token::FloatingExpConstant(lexeme)|
Token::DotFloatingExpConstant(lexeme) => {
constants::parse_floating_constant(lexeme)
},
Token::IntegerConstant(lexeme) => {
constants::parse_integer_constant(lexeme, Radix::Dec)
},
tok @ Token::IntegerHexConstant(lexeme)|
tok @ Token::IntegerOctConstant(lexeme)|
tok @ Token::IntegerBinConstant(lexeme) => {
let radix = Radix::from_token(tok).unwrap();
constants::parse_integer_constant(&lexeme[2..], radix)
},
visibility: vis,
return_type: ret.unwrap_or_else(|| Intern::new(InnerType::Unit)),
parameter_list: params,
body: body.index(),
}))
})
}
.map_with(|(value, ty), e: &mut E| e.state().push(AstNode::Constant { ty, value }).as_value());
choice((
unit().map(PlaceOrValue::Value),
ident,
constant,
expr.delimited_by(just(Token::OpenParens), just(Token::CloseParens)),
block(),
))
}
fn block(
&self,
) -> impl Parser<'src, TokenInput<'src>, PlaceOrValue, ParserExtra> + Clone + use<'src, 'b>
{
self.expr
.clone()
.then_ignore(just(Token::Semi))
.map(PlaceOrValue::index)
.repeated()
.collect::<Vec<_>>()
.then(self.expr.clone().or_not())
.delimited_by(just(Token::OpenBrace), just(Token::CloseBrace))
.map_with(|(statements, expr), e: &mut E| {
expr.unwrap_or(PlaceOrValue::Value(Index(u32::MAX)))
.with_index(e.state().push(AstNode::Block {
statements,
expr: expr.map(PlaceOrValue::index),
}))
})
}
fn expr<'a>() -> impl Parser<'a, TokenInput<'a>, PlaceOrValue, ParserExtra> + Clone {
let assignment = choice((
just(Token::Equal),
just(Token::PlusEqual),
just(Token::MinusEqual),
just(Token::StarEqual),
just(Token::SlashEqual),
just(Token::PercentEqual),
just(Token::AmpersandEqual),
just(Token::PipeEqual),
just(Token::CaretEqual),
just(Token::LessLessEqual),
just(Token::GreaterGreaterEqual),
));
fn create_expr(
&'_ self,
) -> Box<impl Parser<'src, TokenInput<'src>, PlaceOrValue, ParserExtra> + Clone + use<'src, 'b>>
{
let assignment = choice((
just(Token::Equal),
just(Token::PlusEqual),
just(Token::MinusEqual),
just(Token::StarEqual),
just(Token::SlashEqual),
just(Token::PercentEqual),
just(Token::AmpersandEqual),
just(Token::PipeEqual),
just(Token::CaretEqual),
just(Token::LessLessEqual),
just(Token::GreaterGreaterEqual),
));
let logical_or = just(Token::PipePipe);
let logical_and = just(Token::AmpersandAmpersand);
let logical_or = just(Token::PipePipe);
let logical_and = just(Token::AmpersandAmpersand);
let or = just(Token::Pipe);
let xor = just(Token::Caret);
let and = just(Token::Ampersand);
let or = just(Token::Pipe);
let xor = just(Token::Caret);
let and = just(Token::Ampersand);
let equality = choice((just(Token::BangEqual), just(Token::EqualEqual)));
let equality = choice((just(Token::BangEqual), just(Token::EqualEqual)));
let relational = choice((
just(Token::LessEqual),
just(Token::Less),
just(Token::GreaterEqual),
just(Token::Greater),
));
let relational = choice((
just(Token::LessEqual),
just(Token::Less),
just(Token::GreaterEqual),
just(Token::Greater),
));
let shift = choice((just(Token::LessLess), just(Token::GreaterGreater)));
let shift = choice((just(Token::LessLess), just(Token::GreaterGreater)));
let additive = choice((just(Token::Plus), just(Token::Minus)));
let multiplicative = choice((just(Token::Star), just(Token::Slash), just(Token::Percent)));
let additive = choice((just(Token::Plus), just(Token::Minus)));
let multiplicative = choice((just(Token::Star), just(Token::Slash), just(Token::Percent)));
let prefixes = choice((
just(Token::Bang),
just(Token::Minus),
just(Token::Star),
just(Token::Ampersand),
));
let prefixes = choice((
just(Token::Bang),
just(Token::Minus),
just(Token::Star),
just(Token::Ampersand),
));
let r#as = just(Token::As).ignore_then(type_parser::<ParserExtra>());
let r#as = just(Token::As).ignore_then(type_parser::<ParserExtra>());
fn into_value(idx: PlaceOrValue, e: &mut E) -> PlaceOrValue {
match idx {
PlaceOrValue::Place(index) => e.state().push(AstNode::PlaceToValue { expr: index }),
PlaceOrValue::Value(index) => index,
fn into_value(idx: PlaceOrValue, e: &mut E) -> PlaceOrValue {
match idx {
PlaceOrValue::Place(index) => e.state().push(AstNode::PlaceToValue { expr: index }),
PlaceOrValue::Value(index) => index,
}
.as_value()
}
.as_value()
}
fn into_place(idx: PlaceOrValue, e: &mut E) -> PlaceOrValue {
match idx {
PlaceOrValue::Value(index) => e.state().push(AstNode::ValueToPlace { expr: index }),
PlaceOrValue::Place(index) => index,
fn into_place(idx: PlaceOrValue, e: &mut E) -> PlaceOrValue {
match idx {
PlaceOrValue::Value(index) => e.state().push(AstNode::ValueToPlace { expr: index }),
PlaceOrValue::Place(index) => index,
}
.as_place()
}
.as_place()
}
// TODO: postfix: function call, field access, array subscript
recursive(move |_expr| {
let simple = simple_expr(_expr.clone());
// TODO: postfix: function call, field access, array subscript
let _expr = self.expr.clone();
let subscript = _expr
.clone()
@ -780,7 +820,7 @@ fn expr<'a>() -> impl Parser<'a, TokenInput<'a>, PlaceOrValue, ParserExtra> + Cl
let field = just(Token::Dot).ignore_then(select! {Token::Ident(ident) => ident});
let assignment_expr = simple.pratt((
let assignment_expr = self.simple_expr().pratt((
postfix(100, subscript, |expr, index: PlaceOrValue, e: &mut E| {
let node = AstNode::Subscript {
expr: into_value(expr, e).index(),
@ -964,10 +1004,53 @@ fn expr<'a>() -> impl Parser<'a, TokenInput<'a>, PlaceOrValue, ParserExtra> + Cl
let expr = choice((if_expr, assignment_expr)).labelled("expression");
Arc::new(expr)
})
Box::new(expr)
}
fn global_decl(&self) -> impl Parser<'src, TokenInput<'src>, Index, ParserExtra> + Clone {
let ident = select! {Token::Ident(ident) => ident};
attrs()
.or_not()
.then(visibility())
.then_ignore(just(Token::Let))
.then(ident)
.then_ignore(just(Token::Colon))
.then(type_parser::<ParserExtra>())
.then_ignore(just(Token::Equal))
.then(self.expr.clone())
.then_ignore(just(Token::Semi))
.map_with(|((((_attrs, _vis), name), var_type), value), e| {
e.state().push(AstNode::GlobalDecl {
name: name.to_string(),
var_type,
expr: value.index(),
})
})
}
fn file(&self) -> impl Parser<'src, TokenInput<'src>, Index, ParserExtra> + Clone {
choice((self.function.clone(), self.global_decl()))
.repeated()
.collect::<Vec<_>>()
.map_with(|decls, e: &mut E| e.state().push(AstNode::File { decls }))
}
}
fn unit<'a>() -> impl Parser<'a, TokenInput<'a>, Index, ParserExtra> + Clone {
just(Token::OpenParens)
.ignored()
.ignore_then(just(Token::CloseParens))
.map_with(|_, e: &mut MapExtra<TokenInput<'a>, ParserExtra>| {
e.state().push(AstNode::Constant {
ty: Intern::new(InnerType::Unit),
value: Intern::new(Value::Unit),
})
})
}
type E<'a, 'b> = MapExtra<'a, 'b, TokenInput<'a>, ParserExtra>;
fn attrs<'a>() -> impl Parser<'a, TokenInput<'a>, Index, ParserExtra> + Clone {
let docs = select! { Token::DocComment(doc) => doc }.map_with(|doc, e: &mut E| {
e.state().push(AstNode::Doc {
@ -981,86 +1064,6 @@ fn attrs<'a>() -> impl Parser<'a, TokenInput<'a>, Index, ParserExtra> + Clone {
.map_with(|attrs, e: &mut E| e.state().push(AstNode::Attributes { attrs }))
}
fn function_decl<'a>() -> impl Parser<'a, TokenInput<'a>, Index, ParserExtra> + Clone {
let ident = select! {Token::Ident(ident) => ident};
let param = select! {Token::Mutable => ()}
.or_not()
.then(ident)
.then_ignore(just(Token::Colon))
.then(type_parser::<ParserExtra>())
.map_with(|((mutable, name), param_type), e| {
e.state().push(AstNode::Parameter(Parameter {
mutable: mutable.is_some(),
name: name.to_string(),
param_type,
}))
});
let params = param
.separated_by(just(Token::Comma))
.allow_trailing()
.collect::<Vec<_>>()
.delimited_by(just(Token::OpenParens), just(Token::CloseParens))
.labelled("function parameters")
.map_with(|params, e: &mut E| {
e.state()
.push(AstNode::ParameterList(ParameterList { parameters: params }))
});
let ret_type = just(Token::MinusGreater)
.ignore_then(type_parser::<ParserExtra>())
.or_not();
attrs()
.or_not()
.then(visibility())
.then_ignore(just(Token::Fn))
.then(ident)
.then(params)
.then(ret_type)
.then(block())
.map_with(|(((((attrs, vis), ident), params), ret), body), e| {
e.state().push(AstNode::FunctionDecl(FunctionDecl {
attrs,
name: ident.to_string(),
visibility: vis,
return_type: ret.unwrap_or_else(|| Intern::new(InnerType::Unit)),
parameter_list: params,
body: body.index(),
}))
})
}
fn global_decl<'a>() -> impl Parser<'a, TokenInput<'a>, Index, ParserExtra> + Clone {
let ident = select! {Token::Ident(ident) => ident};
attrs()
.or_not()
.then(visibility())
.then_ignore(just(Token::Let))
.then(ident)
.then_ignore(just(Token::Colon))
.then(type_parser::<ParserExtra>())
.then_ignore(just(Token::Equal))
.then(expr())
.then_ignore(just(Token::Semi))
.map_with(|((((_attrs, _vis), name), var_type), value), e| {
e.state().push(AstNode::GlobalDecl {
name: name.to_string(),
var_type,
expr: value.index(),
})
})
}
fn file<'a>() -> impl Parser<'a, TokenInput<'a>, Index, ParserExtra> + Clone {
choice((function_decl(), global_decl()))
.repeated()
.collect::<Vec<_>>()
.map_with(|decls, e: &mut E| e.state().push(AstNode::File { decls }))
}
mod constants;
mod pretty;
@ -1130,27 +1133,20 @@ mod tests {
#[test]
fn parse_exprs() {
let ctx = crate::ParserCtx::new();
let print_ast = |tokens| {
let mut state = SimpleState(Ast::new());
let out = crate::expr().parse_with_state(tokens, &mut state).unwrap();
let out = ctx.function().parse_with_state(tokens, &mut state).unwrap();
let ast = state.0;
let mut pretty = pretty::PrettyPrint::new();
pretty.print(&ast);
};
print_ast(new_token_input("()"));
print_ast(new_token_input("!() as i32"));
print_ast(new_token_input("1 << 2 & 3"));
print_ast(new_token_input(
r#"
fn my_function(a: i32, b: *const u8) -> i32 {
let x: i32;
x = a + 1;
if (x < *b as i32) {
return x;
} else {
return 10;
}
x
}
"#,
));

View file

@ -165,7 +165,20 @@ impl PrettyPrint {
name,
var_type,
} => todo!(),
AstNode::Assignment { dest, expr } => todo!(),
AstNode::Assignment { dest, expr } => {
self.push_line(format!("{}", node_name(node),));
self.indents.push(Indent::Vertical);
self.push_line("DEST".to_string());
self.with_indent(core::iter::once(*dest), |this, idx| {
this.stuff(ast, idx);
});
*self.indents.last_mut().unwrap() = Indent::End;
self.push_line("EXPR".to_string());
self.with_indent(core::iter::once(*dest), |this, idx| {
this.stuff(ast, idx);
});
self.indents.pop();
}
AstNode::GlobalDecl {
name,
var_type,