pratt parser for expressions

This commit is contained in:
janis 2025-10-04 00:51:17 +02:00
parent 5aba59b291
commit 9a799ea281
Signed by: janis
SSH key fingerprint: SHA256:bB1qbbqmDXZNT0KKD5c2Dfjg53JGhj7B3CFcLIzSqq8
3 changed files with 317 additions and 7 deletions

View file

@ -38,7 +38,7 @@ pub mod is_things {
/// a formal definition of valid identifier name.
pub fn is_id_start(c: char) -> bool {
// This is XID_Start OR '_' (which formally is not a XID_Start).
c == '_' || c == '-' || unicode_xid::UnicodeXID::is_xid_start(c)
c == '_' || unicode_xid::UnicodeXID::is_xid_start(c)
}
/// True if `c` is valid as a non-first character of an identifier.

View file

@ -13,4 +13,4 @@ internment = "0.8.6"
lexer = { path = "../lexer", version = "0.1.0" }
chumsky = "0.11"
chumsky = {version = "0.11", features = ["pratt"] }

View file

@ -1,11 +1,13 @@
use std::{hash::Hash, ops::Range};
use std::{hash::Hash, ops::Range, sync::Arc};
use chumsky::{
IterParser, Parser,
error::EmptyErr,
extra::{self, SimpleState},
input::{IterInput, MapExtra},
prelude::{choice, just, recursive},
pratt::{infix, left, postfix, prefix, right},
prelude::{Recursive, choice, just, recursive},
recursive::Direct,
select, text,
};
use internment::Intern;
@ -496,7 +498,7 @@ fn func_parser() {
type ParserExtra = chumsky::extra::Full<EmptyErr, SimpleState<Ast>, ()>;
fn block<'a>() -> impl Parser<'a, TokenInput<'a>, Index, ParserExtra> {
fn block<'a>() -> impl Parser<'a, TokenInput<'a>, Index, ParserExtra> + Clone {
just(Token::OpenBrace)
.ignored()
.then_ignore(just(Token::CloseBrace))
@ -508,13 +510,309 @@ fn block<'a>() -> impl Parser<'a, TokenInput<'a>, Index, ParserExtra> {
})
}
fn unit<'a>() -> impl Parser<'a, TokenInput<'a>, Index, ParserExtra> + Clone {
just(Token::OpenParens)
.ignored()
.ignore_then(just(Token::CloseParens))
.map_with(|_, e: &mut MapExtra<TokenInput<'a>, ParserExtra>| {
e.state().push(AstNode::Constant {
ty: Intern::new(InnerType::Unit),
value: Intern::new(Value::Unit),
})
})
}
type E<'a, 'b> = MapExtra<'a, 'b, TokenInput<'a>, ParserExtra>;
fn simple_expr<'a, 'b>(
expr: Recursive<Direct<'a, 'b, TokenInput<'a>, Index, ParserExtra>>,
) -> impl Parser<'a, TokenInput<'a>, Index, ParserExtra> + Clone {
let ident = select! {Token::Ident(ident) => ident}.map_with(
|ident, e: &mut MapExtra<TokenInput<'a>, ParserExtra>| {
e.state().push(AstNode::UnresolvedDeclRef {
name: ident.to_string(),
})
},
);
choice((
unit(),
ident,
expr.delimited_by(just(Token::OpenParens), just(Token::CloseParens)),
block(),
))
}
fn expr<'a>() -> impl Parser<'a, TokenInput<'a>, Index, ParserExtra> {
let assignment = choice((
just(Token::Equal),
just(Token::PlusEqual),
just(Token::MinusEqual),
just(Token::StarEqual),
just(Token::SlashEqual),
just(Token::PercentEqual),
just(Token::AmpersandEqual),
just(Token::PipeEqual),
just(Token::CaretEqual),
just(Token::LessLessEqual),
just(Token::GreaterGreaterEqual),
));
let logical_or = just(Token::PipePipe);
let logical_and = just(Token::AmpersandAmpersand);
let or = just(Token::Pipe);
let xor = just(Token::Caret);
let and = just(Token::Ampersand);
let equality = choice((just(Token::BangEqual), just(Token::EqualEqual)));
let relational = choice((
just(Token::LessEqual),
just(Token::Less),
just(Token::GreaterEqual),
just(Token::Greater),
));
let shift = choice((just(Token::LessLess), just(Token::GreaterGreater)));
let additive = choice((just(Token::Plus), just(Token::Minus)));
let multiplicative = choice((just(Token::Star), just(Token::Slash), just(Token::Percent)));
let prefixes = choice((
just(Token::Bang),
just(Token::Minus),
just(Token::Star),
just(Token::Ampersand),
));
let r#as = just(Token::As).ignore_then(type_parser::<ParserExtra>());
// TODO: postfix: function call, field access, array subscript
recursive(|_expr| {
let simple = simple_expr(_expr);
let expr = simple.pratt((
postfix(99, r#as, |expr, ty, e: &mut E| {
let node = AstNode::ExplicitCast { expr, ty };
e.state().push(node)
}),
prefix(95, prefixes, |op, expr, e: &mut E| {
let node = match op {
Token::Bang => AstNode::Not(expr),
Token::Minus => AstNode::Negate(expr),
Token::Star => AstNode::Deref { expr },
Token::Ampersand => AstNode::AddressOf { expr },
_ => unreachable!(),
};
e.state().push(node)
}),
infix(left(90), multiplicative, |left, op, right, e: &mut E| {
let node = match op {
Token::Star => AstNode::Multiply { left, right },
Token::Slash => AstNode::Divide { left, right },
Token::Percent => AstNode::Modulus { left, right },
_ => unreachable!(),
};
e.state().push(node)
}),
infix(left(80), additive, |left, op, right, e: &mut E| {
let node = match op {
Token::Plus => AstNode::Add { left, right },
Token::Minus => AstNode::Subtract { left, right },
_ => unreachable!(),
};
e.state().push(node)
}),
infix(left(70), shift, |left, op, right, e: &mut E| {
let node = match op {
Token::LessLess => AstNode::ShiftLeft { left, right },
Token::GreaterGreater => AstNode::ShiftRight { left, right },
_ => unreachable!(),
};
e.state().push(node)
}),
infix(left(60), relational, |left, op, right, e: &mut E| {
let node = match op {
Token::Less => AstNode::Less { left, right },
Token::LessEqual => AstNode::LessEq { left, right },
Token::Greater => AstNode::Greater { left, right },
Token::GreaterEqual => AstNode::GreaterEq { left, right },
_ => unreachable!(),
};
e.state().push(node)
}),
infix(left(50), equality, |left, op, right, e: &mut E| {
let node = match op {
Token::EqualEqual => AstNode::Eq { left, right },
Token::BangEqual => AstNode::NotEq { left, right },
_ => unreachable!(),
};
e.state().push(node)
}),
infix(left(40), and, |left, _op, right, e: &mut E| {
let node = AstNode::BitAnd { left, right };
e.state().push(node)
}),
infix(left(30), xor, |left, _op, right, e: &mut E| {
let node = AstNode::BitXor { left, right };
e.state().push(node)
}),
infix(left(20), or, |left, _op, right, e: &mut E| {
let node = AstNode::BitOr { left, right };
e.state().push(node)
}),
infix(left(10), logical_and, |left, _op, right, e: &mut E| {
let node = AstNode::LogicalAnd { left, right };
e.state().push(node)
}),
infix(left(5), logical_or, |left, _op, right, e: &mut E| {
let node = AstNode::LogicalOr { left, right };
e.state().push(node)
}),
infix(right(1), assignment, |left, op, right, e: &mut E| {
let left = match op {
Token::Equal => {
let node = AstNode::Assignment {
dest: left,
expr: right,
};
return e.state().push(node);
}
Token::PlusEqual => e.state().push(AstNode::Add { left, right }),
Token::MinusEqual => e.state().push(AstNode::Subtract { left, right }),
Token::StarEqual => e.state().push(AstNode::Multiply { left, right }),
Token::SlashEqual => e.state().push(AstNode::Divide { left, right }),
Token::PercentEqual => e.state().push(AstNode::Modulus { left, right }),
Token::AmpersandEqual => e.state().push(AstNode::BitAnd { left, right }),
Token::PipeEqual => e.state().push(AstNode::BitOr { left, right }),
Token::CaretEqual => e.state().push(AstNode::BitXor { left, right }),
Token::LessLessEqual => e.state().push(AstNode::ShiftLeft { left, right }),
Token::GreaterGreaterEqual => {
e.state().push(AstNode::ShiftRight { left, right })
}
_ => unreachable!(),
};
let node = AstNode::Assignment {
dest: left,
expr: right,
};
e.state().push(node)
}),
));
// let product = simple.clone().foldl_with(
// multiplicative.then(simple).repeated(),
// |left, (op, right), e| {
// let node = match op {
// Token::Star => AstNode::Multiply { left, right },
// Token::Slash => AstNode::Divide { left, right },
// Token::Percent => AstNode::Modulus { left, right },
// _ => unreachable!(),
// };
// e.state().push(node)
// },
// );
// let sum = product.clone().foldl_with(
// additive.then(product).repeated(),
// |left, (op, right), e| {
// let node = match op {
// Token::Plus => AstNode::Add { left, right },
// Token::Minus => AstNode::Subtract { left, right },
// _ => unreachable!(),
// };
// e.state().push(node)
// },
// );
// let shift = sum
// .clone()
// .foldl_with(shift.then(sum).repeated(), |left, (op, right), e| {
// let node = match op {
// Token::LessLess => AstNode::ShiftLeft { left, right },
// Token::GreaterGreater => AstNode::ShiftRight { left, right },
// _ => unreachable!(),
// };
// e.state().push(node)
// });
// let comparison =
// shift
// .clone()
// .foldl_with(relational.then(shift).repeated(), |left, (op, right), e| {
// let node = match op {
// Token::Less => AstNode::Less { left, right },
// Token::LessEqual => AstNode::LessEq { left, right },
// Token::Greater => AstNode::Greater { left, right },
// Token::GreaterEqual => AstNode::GreaterEq { left, right },
// _ => unreachable!(),
// };
// e.state().push(node)
// });
// let equality = comparison.clone().foldl_with(
// equality.then(comparison).repeated(),
// |left, (op, right), e| {
// let node = match op {
// Token::EqualEqual => AstNode::Eq { left, right },
// Token::BangEqual => AstNode::NotEq { left, right },
// _ => unreachable!(),
// };
// e.state().push(node)
// },
// );
// let bit_and =
// equality
// .clone()
// .foldl_with(and.then(equality).repeated(), |left, (_op, right), e| {
// let node = AstNode::BitAnd { left, right };
// e.state().push(node)
// });
// let bit_xor =
// bit_and
// .clone()
// .foldl_with(xor.then(bit_and).repeated(), |left, (_op, right), e| {
// let node = AstNode::BitXor { left, right };
// e.state().push(node)
// });
// let bit_or =
// bit_xor
// .clone()
// .foldl_with(or.then(bit_xor).repeated(), |left, (_op, right), e| {
// let node = AstNode::BitOr { left, right };
// e.state().push(node)
// });
// let and = bit_or.clone().foldl_with(
// logical_and.then(bit_or).repeated(),
// |left, (_op, right), e| {
// let node = AstNode::LogicalAnd { left, right };
// e.state().push(node)
// },
// );
// let or =
// and.clone()
// .foldl_with(logical_or.then(and).repeated(), |left, (_op, right), e| {
// let node = AstNode::LogicalOr { left, right };
// e.state().push(node)
// });
Arc::new(expr)
})
}
mod constants;
#[cfg(test)]
mod tests {
use chumsky::Parser;
use chumsky::{Parser, extra::SimpleState};
use crate::{AstNode, new_token_input, type_parser};
use crate::{Ast, AstNode, new_token_input, type_parser};
#[test]
fn print_ast_node_size() {
@ -573,4 +871,16 @@ mod tests {
}
);
}
#[test]
fn parse_exprs() {
let print_ast = |tokens| {
let mut state = SimpleState(Ast::new());
let out = crate::expr().parse_with_state(tokens, &mut state).unwrap();
eprintln!("{:?}", state.0);
};
print_ast(new_token_input("()"));
print_ast(new_token_input("!() as i32"));
}
}