From 9a799ea2813fed71c0de7aec0622918216b0ea6d Mon Sep 17 00:00:00 2001 From: janis Date: Sat, 4 Oct 2025 00:51:17 +0200 Subject: [PATCH] pratt parser for expressions --- crates/lexer/src/lib.rs | 2 +- crates/parser/Cargo.toml | 2 +- crates/parser/src/lib.rs | 320 ++++++++++++++++++++++++++++++++++++++- 3 files changed, 317 insertions(+), 7 deletions(-) diff --git a/crates/lexer/src/lib.rs b/crates/lexer/src/lib.rs index d4c5b40..86c2372 100644 --- a/crates/lexer/src/lib.rs +++ b/crates/lexer/src/lib.rs @@ -38,7 +38,7 @@ pub mod is_things { /// a formal definition of valid identifier name. pub fn is_id_start(c: char) -> bool { // This is XID_Start OR '_' (which formally is not a XID_Start). - c == '_' || c == '-' || unicode_xid::UnicodeXID::is_xid_start(c) + c == '_' || unicode_xid::UnicodeXID::is_xid_start(c) } /// True if `c` is valid as a non-first character of an identifier. diff --git a/crates/parser/Cargo.toml b/crates/parser/Cargo.toml index e2247ad..25bbcc9 100644 --- a/crates/parser/Cargo.toml +++ b/crates/parser/Cargo.toml @@ -13,4 +13,4 @@ internment = "0.8.6" lexer = { path = "../lexer", version = "0.1.0" } -chumsky = "0.11" \ No newline at end of file +chumsky = {version = "0.11", features = ["pratt"] } \ No newline at end of file diff --git a/crates/parser/src/lib.rs b/crates/parser/src/lib.rs index 9e5545d..550c42e 100644 --- a/crates/parser/src/lib.rs +++ b/crates/parser/src/lib.rs @@ -1,11 +1,13 @@ -use std::{hash::Hash, ops::Range}; +use std::{hash::Hash, ops::Range, sync::Arc}; use chumsky::{ IterParser, Parser, error::EmptyErr, extra::{self, SimpleState}, input::{IterInput, MapExtra}, - prelude::{choice, just, recursive}, + pratt::{infix, left, postfix, prefix, right}, + prelude::{Recursive, choice, just, recursive}, + recursive::Direct, select, text, }; use internment::Intern; @@ -496,7 +498,7 @@ fn func_parser() { type ParserExtra = chumsky::extra::Full, ()>; -fn block<'a>() -> impl Parser<'a, TokenInput<'a>, Index, ParserExtra> { +fn block<'a>() -> impl Parser<'a, TokenInput<'a>, Index, ParserExtra> + Clone { just(Token::OpenBrace) .ignored() .then_ignore(just(Token::CloseBrace)) @@ -508,13 +510,309 @@ fn block<'a>() -> impl Parser<'a, TokenInput<'a>, Index, ParserExtra> { }) } +fn unit<'a>() -> impl Parser<'a, TokenInput<'a>, Index, ParserExtra> + Clone { + just(Token::OpenParens) + .ignored() + .ignore_then(just(Token::CloseParens)) + .map_with(|_, e: &mut MapExtra, ParserExtra>| { + e.state().push(AstNode::Constant { + ty: Intern::new(InnerType::Unit), + value: Intern::new(Value::Unit), + }) + }) +} + +type E<'a, 'b> = MapExtra<'a, 'b, TokenInput<'a>, ParserExtra>; + +fn simple_expr<'a, 'b>( + expr: Recursive, Index, ParserExtra>>, +) -> impl Parser<'a, TokenInput<'a>, Index, ParserExtra> + Clone { + let ident = select! {Token::Ident(ident) => ident}.map_with( + |ident, e: &mut MapExtra, ParserExtra>| { + e.state().push(AstNode::UnresolvedDeclRef { + name: ident.to_string(), + }) + }, + ); + choice(( + unit(), + ident, + expr.delimited_by(just(Token::OpenParens), just(Token::CloseParens)), + block(), + )) +} + +fn expr<'a>() -> impl Parser<'a, TokenInput<'a>, Index, ParserExtra> { + let assignment = choice(( + just(Token::Equal), + just(Token::PlusEqual), + just(Token::MinusEqual), + just(Token::StarEqual), + just(Token::SlashEqual), + just(Token::PercentEqual), + just(Token::AmpersandEqual), + just(Token::PipeEqual), + just(Token::CaretEqual), + just(Token::LessLessEqual), + just(Token::GreaterGreaterEqual), + )); + + let logical_or = just(Token::PipePipe); + let logical_and = just(Token::AmpersandAmpersand); + + let or = just(Token::Pipe); + let xor = just(Token::Caret); + let and = just(Token::Ampersand); + + let equality = choice((just(Token::BangEqual), just(Token::EqualEqual))); + + let relational = choice(( + just(Token::LessEqual), + just(Token::Less), + just(Token::GreaterEqual), + just(Token::Greater), + )); + + let shift = choice((just(Token::LessLess), just(Token::GreaterGreater))); + + let additive = choice((just(Token::Plus), just(Token::Minus))); + let multiplicative = choice((just(Token::Star), just(Token::Slash), just(Token::Percent))); + + let prefixes = choice(( + just(Token::Bang), + just(Token::Minus), + just(Token::Star), + just(Token::Ampersand), + )); + + let r#as = just(Token::As).ignore_then(type_parser::()); + + // TODO: postfix: function call, field access, array subscript + + recursive(|_expr| { + let simple = simple_expr(_expr); + + let expr = simple.pratt(( + postfix(99, r#as, |expr, ty, e: &mut E| { + let node = AstNode::ExplicitCast { expr, ty }; + e.state().push(node) + }), + prefix(95, prefixes, |op, expr, e: &mut E| { + let node = match op { + Token::Bang => AstNode::Not(expr), + Token::Minus => AstNode::Negate(expr), + Token::Star => AstNode::Deref { expr }, + Token::Ampersand => AstNode::AddressOf { expr }, + _ => unreachable!(), + }; + e.state().push(node) + }), + infix(left(90), multiplicative, |left, op, right, e: &mut E| { + let node = match op { + Token::Star => AstNode::Multiply { left, right }, + Token::Slash => AstNode::Divide { left, right }, + Token::Percent => AstNode::Modulus { left, right }, + _ => unreachable!(), + }; + e.state().push(node) + }), + infix(left(80), additive, |left, op, right, e: &mut E| { + let node = match op { + Token::Plus => AstNode::Add { left, right }, + Token::Minus => AstNode::Subtract { left, right }, + _ => unreachable!(), + }; + e.state().push(node) + }), + infix(left(70), shift, |left, op, right, e: &mut E| { + let node = match op { + Token::LessLess => AstNode::ShiftLeft { left, right }, + Token::GreaterGreater => AstNode::ShiftRight { left, right }, + _ => unreachable!(), + }; + e.state().push(node) + }), + infix(left(60), relational, |left, op, right, e: &mut E| { + let node = match op { + Token::Less => AstNode::Less { left, right }, + Token::LessEqual => AstNode::LessEq { left, right }, + Token::Greater => AstNode::Greater { left, right }, + Token::GreaterEqual => AstNode::GreaterEq { left, right }, + _ => unreachable!(), + }; + e.state().push(node) + }), + infix(left(50), equality, |left, op, right, e: &mut E| { + let node = match op { + Token::EqualEqual => AstNode::Eq { left, right }, + Token::BangEqual => AstNode::NotEq { left, right }, + _ => unreachable!(), + }; + e.state().push(node) + }), + infix(left(40), and, |left, _op, right, e: &mut E| { + let node = AstNode::BitAnd { left, right }; + e.state().push(node) + }), + infix(left(30), xor, |left, _op, right, e: &mut E| { + let node = AstNode::BitXor { left, right }; + e.state().push(node) + }), + infix(left(20), or, |left, _op, right, e: &mut E| { + let node = AstNode::BitOr { left, right }; + e.state().push(node) + }), + infix(left(10), logical_and, |left, _op, right, e: &mut E| { + let node = AstNode::LogicalAnd { left, right }; + e.state().push(node) + }), + infix(left(5), logical_or, |left, _op, right, e: &mut E| { + let node = AstNode::LogicalOr { left, right }; + e.state().push(node) + }), + infix(right(1), assignment, |left, op, right, e: &mut E| { + let left = match op { + Token::Equal => { + let node = AstNode::Assignment { + dest: left, + expr: right, + }; + return e.state().push(node); + } + Token::PlusEqual => e.state().push(AstNode::Add { left, right }), + Token::MinusEqual => e.state().push(AstNode::Subtract { left, right }), + Token::StarEqual => e.state().push(AstNode::Multiply { left, right }), + Token::SlashEqual => e.state().push(AstNode::Divide { left, right }), + Token::PercentEqual => e.state().push(AstNode::Modulus { left, right }), + Token::AmpersandEqual => e.state().push(AstNode::BitAnd { left, right }), + Token::PipeEqual => e.state().push(AstNode::BitOr { left, right }), + Token::CaretEqual => e.state().push(AstNode::BitXor { left, right }), + Token::LessLessEqual => e.state().push(AstNode::ShiftLeft { left, right }), + Token::GreaterGreaterEqual => { + e.state().push(AstNode::ShiftRight { left, right }) + } + _ => unreachable!(), + }; + let node = AstNode::Assignment { + dest: left, + expr: right, + }; + e.state().push(node) + }), + )); + + // let product = simple.clone().foldl_with( + // multiplicative.then(simple).repeated(), + // |left, (op, right), e| { + // let node = match op { + // Token::Star => AstNode::Multiply { left, right }, + // Token::Slash => AstNode::Divide { left, right }, + // Token::Percent => AstNode::Modulus { left, right }, + // _ => unreachable!(), + // }; + // e.state().push(node) + // }, + // ); + + // let sum = product.clone().foldl_with( + // additive.then(product).repeated(), + // |left, (op, right), e| { + // let node = match op { + // Token::Plus => AstNode::Add { left, right }, + // Token::Minus => AstNode::Subtract { left, right }, + // _ => unreachable!(), + // }; + // e.state().push(node) + // }, + // ); + + // let shift = sum + // .clone() + // .foldl_with(shift.then(sum).repeated(), |left, (op, right), e| { + // let node = match op { + // Token::LessLess => AstNode::ShiftLeft { left, right }, + // Token::GreaterGreater => AstNode::ShiftRight { left, right }, + // _ => unreachable!(), + // }; + // e.state().push(node) + // }); + + // let comparison = + // shift + // .clone() + // .foldl_with(relational.then(shift).repeated(), |left, (op, right), e| { + // let node = match op { + // Token::Less => AstNode::Less { left, right }, + // Token::LessEqual => AstNode::LessEq { left, right }, + // Token::Greater => AstNode::Greater { left, right }, + // Token::GreaterEqual => AstNode::GreaterEq { left, right }, + // _ => unreachable!(), + // }; + // e.state().push(node) + // }); + + // let equality = comparison.clone().foldl_with( + // equality.then(comparison).repeated(), + // |left, (op, right), e| { + // let node = match op { + // Token::EqualEqual => AstNode::Eq { left, right }, + // Token::BangEqual => AstNode::NotEq { left, right }, + // _ => unreachable!(), + // }; + // e.state().push(node) + // }, + // ); + + // let bit_and = + // equality + // .clone() + // .foldl_with(and.then(equality).repeated(), |left, (_op, right), e| { + // let node = AstNode::BitAnd { left, right }; + // e.state().push(node) + // }); + + // let bit_xor = + // bit_and + // .clone() + // .foldl_with(xor.then(bit_and).repeated(), |left, (_op, right), e| { + // let node = AstNode::BitXor { left, right }; + // e.state().push(node) + // }); + + // let bit_or = + // bit_xor + // .clone() + // .foldl_with(or.then(bit_xor).repeated(), |left, (_op, right), e| { + // let node = AstNode::BitOr { left, right }; + // e.state().push(node) + // }); + + // let and = bit_or.clone().foldl_with( + // logical_and.then(bit_or).repeated(), + // |left, (_op, right), e| { + // let node = AstNode::LogicalAnd { left, right }; + // e.state().push(node) + // }, + // ); + + // let or = + // and.clone() + // .foldl_with(logical_or.then(and).repeated(), |left, (_op, right), e| { + // let node = AstNode::LogicalOr { left, right }; + // e.state().push(node) + // }); + + Arc::new(expr) + }) +} + mod constants; #[cfg(test)] mod tests { - use chumsky::Parser; + use chumsky::{Parser, extra::SimpleState}; - use crate::{AstNode, new_token_input, type_parser}; + use crate::{Ast, AstNode, new_token_input, type_parser}; #[test] fn print_ast_node_size() { @@ -573,4 +871,16 @@ mod tests { } ); } + + #[test] + fn parse_exprs() { + let print_ast = |tokens| { + let mut state = SimpleState(Ast::new()); + let out = crate::expr().parse_with_state(tokens, &mut state).unwrap(); + eprintln!("{:?}", state.0); + }; + + print_ast(new_token_input("()")); + print_ast(new_token_input("!() as i32")); + } }