Compare commits

..

5 commits

Author SHA1 Message Date
janis b0b87c68f2
placeness, constants 2025-10-10 23:15:40 +02:00
janis f67bb61888
if conditions 2025-10-10 14:54:24 +02:00
janis 77bd4f3f16
more expr parsing 2025-10-08 17:06:24 +02:00
janis 9a799ea281
pratt parser for expressions 2025-10-04 00:51:17 +02:00
janis 5aba59b291
custom integer with falliblity 2025-10-03 19:37:42 +02:00
3 changed files with 399 additions and 20 deletions

View file

@ -38,7 +38,7 @@ pub mod is_things {
/// a formal definition of valid identifier name.
pub fn is_id_start(c: char) -> bool {
// This is XID_Start OR '_' (which formally is not a XID_Start).
c == '_' || c == '-' || unicode_xid::UnicodeXID::is_xid_start(c)
c == '_' || unicode_xid::UnicodeXID::is_xid_start(c)
}
/// True if `c` is valid as a non-first character of an identifier.

View file

@ -13,4 +13,4 @@ internment = "0.8.6"
lexer = { path = "../lexer", version = "0.1.0" }
chumsky = "0.11"
chumsky = {version = "0.11", features = ["pratt"] }

View file

@ -1,15 +1,17 @@
use std::{hash::Hash, ops::Range};
use std::{hash::Hash, ops::Range, sync::Arc};
use chumsky::{
IterParser, Parser,
error::EmptyErr,
extra::{self, SimpleState},
input::{IterInput, MapExtra},
prelude::{choice, just, recursive},
pratt::{infix, left, postfix, prefix, right},
prelude::{Recursive, choice, just, recursive},
recursive::Direct,
select, text,
};
use internment::Intern;
use lexer::{Token, TokenItemIterator, TokenIterator};
use lexer::{Radix, Token, TokenItemIterator, TokenIterator};
use thiserror::Error;
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
@ -109,6 +111,15 @@ pub enum ControlFlowKind {
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub struct Index(u32);
impl Index {
pub fn as_value(self) -> PlaceOrValue {
PlaceOrValue::Value(self)
}
pub fn as_place(self) -> PlaceOrValue {
PlaceOrValue::Place(self)
}
}
#[derive(Debug)]
pub enum AstNode {
Root {
@ -301,6 +312,21 @@ pub enum AstNode {
},
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum PlaceOrValue {
Place(Index),
Value(Index),
}
impl PlaceOrValue {
pub fn index(self) -> Index {
match self {
PlaceOrValue::Place(i) => i,
PlaceOrValue::Value(i) => i,
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default)]
pub enum Visibility {
#[default]
@ -389,7 +415,7 @@ fn new_token_input<'a>(input: &'a str) -> TokenInput<'a> {
fn type_parser<'a, E>() -> impl Parser<'a, TokenInput<'a>, Type, E>
where
E: chumsky::extra::ParserExtra<'a, TokenInput<'a>> + 'a,
E: chumsky::extra::ParserExtra<'a, TokenInput<'a>, Error = EmptyErr> + 'a,
{
let primitives = select! {
Token::Void => InnerType::Unit,
@ -408,16 +434,24 @@ where
Token::I64 => InnerType::Int { signed: true, size: IntSize::Bits(64) },
Token::ISize => InnerType::Int { signed: true, size: IntSize::Pointer },
};
let u16 = text::int(10)
.to_slice()
.from_str::<u16>()
.try_map(|u, _span| u.map_err(|_| EmptyErr::default()));
let custom_int_inner = choice((just::<_, _, extra::Default>('u'), just('i')))
.then(text::int(10).to_slice().from_str::<u16>().unwrapped())
let integral_type = choice((just::<_, _, extra::Default>('u'), just('i')))
.then(u16)
.map(|(sign, size)| InnerType::Int {
signed: sign == 'i',
size: IntSize::Bits(size),
});
let custom_int =
select! {Token::Ident(ident) => ident}.map(move |s| custom_int_inner.parse(s).unwrap());
let custom_int = select! {Token::Ident(ident) => ident}.try_map(move |s, _span| {
integral_type
.parse(s)
.into_result()
.map_err(|_| EmptyErr::default())
});
recursive(|ty| {
let pointer = just(Token::Star)
@ -438,7 +472,7 @@ fn visibility<'a>() -> impl Parser<'a, TokenInput<'a>, Visibility, ParserExtra>
.map(|v| v.unwrap_or(Visibility::Private))
}
fn func_parser() {
fn func_parser<'a>() -> impl Parser<'a, TokenInput<'a>, Index, ParserExtra> {
let ident = select! {Token::Ident(ident) => ident};
let param = just(Token::Mutable)
@ -463,7 +497,7 @@ fn func_parser() {
.labelled("function parameters")
.map(|params| ParameterList { parameters: params });
let func = visibility()
visibility()
.then_ignore(just(Token::Fn))
.then(ident)
.then(params)
@ -481,22 +515,354 @@ fn func_parser() {
visibility: vis,
return_type: ret.unwrap_or_else(|| Intern::new(InnerType::Unit)),
parameter_list: params,
body,
body: body.index(),
}))
});
})
}
type ParserExtra = chumsky::extra::Full<EmptyErr, SimpleState<Ast>, ()>;
fn block<'a>() -> impl Parser<'a, TokenInput<'a>, Index, ParserExtra> {
fn block<'a>() -> impl Parser<'a, TokenInput<'a>, PlaceOrValue, ParserExtra> + Clone {
just(Token::OpenBrace)
.ignored()
.then_ignore(just(Token::CloseBrace))
.map_with(|_, e: &mut MapExtra<'_, '_, _, ParserExtra>| {
e.state().push(AstNode::Block {
e.state()
.push(AstNode::Block {
statements: vec![],
expr: None,
})
.as_value()
// TODO: add statements and expr and map placeness by expr
})
}
fn unit<'a>() -> impl Parser<'a, TokenInput<'a>, Index, ParserExtra> + Clone {
just(Token::OpenParens)
.ignored()
.ignore_then(just(Token::CloseParens))
.map_with(|_, e: &mut MapExtra<TokenInput<'a>, ParserExtra>| {
e.state().push(AstNode::Constant {
ty: Intern::new(InnerType::Unit),
value: Intern::new(Value::Unit),
})
})
}
type E<'a, 'b> = MapExtra<'a, 'b, TokenInput<'a>, ParserExtra>;
fn simple_expr<'a, 'b>(
expr: Recursive<Direct<'a, 'b, TokenInput<'a>, PlaceOrValue, ParserExtra>>,
) -> impl Parser<'a, TokenInput<'a>, PlaceOrValue, ParserExtra> + Clone {
let ident = select! {Token::Ident(ident) => ident}.map_with(
|ident, e: &mut MapExtra<TokenInput<'a>, ParserExtra>| {
e.state()
.push(AstNode::UnresolvedDeclRef {
name: ident.to_string(),
})
.as_place()
},
);
let constant = select! {
Token::FloatingConstant(lexeme)|
Token::DotFloatingConstant(lexeme)|
Token::FloatingExpConstant(lexeme)|
Token::DotFloatingExpConstant(lexeme) => {
constants::parse_floating_constant(lexeme)
},
Token::IntegerConstant(lexeme) => {
constants::parse_integer_constant(lexeme, Radix::Dec)
},
tok @ Token::IntegerHexConstant(lexeme)|
tok @ Token::IntegerOctConstant(lexeme)|
tok @ Token::IntegerBinConstant(lexeme) => {
let radix = Radix::from_token(tok).unwrap();
constants::parse_integer_constant(&lexeme[2..], radix)
},
}
.map_with(|(value, ty), e: &mut E| e.state().push(AstNode::Constant { ty, value }).as_value());
choice((
unit().map(PlaceOrValue::Value),
ident,
constant,
expr.delimited_by(just(Token::OpenParens), just(Token::CloseParens)),
block(),
))
}
fn expr<'a>() -> impl Parser<'a, TokenInput<'a>, PlaceOrValue, ParserExtra> {
let assignment = choice((
just(Token::Equal),
just(Token::PlusEqual),
just(Token::MinusEqual),
just(Token::StarEqual),
just(Token::SlashEqual),
just(Token::PercentEqual),
just(Token::AmpersandEqual),
just(Token::PipeEqual),
just(Token::CaretEqual),
just(Token::LessLessEqual),
just(Token::GreaterGreaterEqual),
));
let logical_or = just(Token::PipePipe);
let logical_and = just(Token::AmpersandAmpersand);
let or = just(Token::Pipe);
let xor = just(Token::Caret);
let and = just(Token::Ampersand);
let equality = choice((just(Token::BangEqual), just(Token::EqualEqual)));
let relational = choice((
just(Token::LessEqual),
just(Token::Less),
just(Token::GreaterEqual),
just(Token::Greater),
));
let shift = choice((just(Token::LessLess), just(Token::GreaterGreater)));
let additive = choice((just(Token::Plus), just(Token::Minus)));
let multiplicative = choice((just(Token::Star), just(Token::Slash), just(Token::Percent)));
let prefixes = choice((
just(Token::Bang),
just(Token::Minus),
just(Token::Star),
just(Token::Ampersand),
));
let r#as = just(Token::As).ignore_then(type_parser::<ParserExtra>());
fn into_value(idx: PlaceOrValue, e: &mut E) -> PlaceOrValue {
match idx {
PlaceOrValue::Place(index) => e.state().push(AstNode::PlaceToValue { expr: index }),
PlaceOrValue::Value(index) => index,
}
.as_value()
}
fn into_place(idx: PlaceOrValue, e: &mut E) -> PlaceOrValue {
match idx {
PlaceOrValue::Value(index) => e.state().push(AstNode::ValueToPlace { expr: index }),
PlaceOrValue::Place(index) => index,
}
.as_place()
}
// TODO: postfix: function call, field access, array subscript
recursive(move |_expr| {
let simple = simple_expr(_expr.clone());
let subscript = _expr
.clone()
.delimited_by(
just(Token::OpenSquareBracket),
just(Token::CloseSquareBracket),
)
// subscript takes a value as the index
.map_with(into_value);
let arguments = _expr
.clone()
// arguments take values
.map_with(into_value)
.map(PlaceOrValue::index)
.separated_by(just(Token::Comma))
.allow_trailing()
.collect::<Vec<_>>()
.delimited_by(just(Token::OpenParens), just(Token::CloseParens));
let field = just(Token::Dot).ignore_then(select! {Token::Ident(ident) => ident});
let assignment_expr = simple.pratt((
postfix(100, subscript, |expr, index: PlaceOrValue, e: &mut E| {
let node = AstNode::Subscript {
expr: into_value(expr, e).index(),
index: index.index(),
};
// subscript yields a place
e.state().push(node).as_place()
}),
postfix(100, arguments, |callee, arguments, e: &mut E| {
let node = AstNode::CallExpr {
callee: into_value(callee, e).index(),
arguments,
};
// function call yields a value
e.state().push(node).as_value()
}),
postfix(100, field, |expr, field: &str, e: &mut E| {
let node = AstNode::FieldAccess {
expr: into_place(expr, e).index(),
field: field.to_string(),
};
// field access yields a place
e.state().push(node).as_place()
}),
postfix(99, r#as, |expr, ty, e: &mut E| {
let node = AstNode::ExplicitCast {
expr: into_value(expr, e).index(),
ty,
};
e.state().push(node).as_value()
}),
prefix(95, prefixes, |op, expr, e: &mut E| {
let node = match op {
Token::Bang => AstNode::Not(into_value(expr, e).index()),
Token::Minus => AstNode::Negate(into_value(expr, e).index()),
Token::Star => {
return e
.state()
.push(AstNode::Deref { expr: expr.index() })
.as_place();
}
Token::Ampersand => AstNode::AddressOf {
expr: into_place(expr, e).index(),
},
_ => unreachable!(),
};
e.state().push(node).as_value()
}),
infix(left(90), multiplicative, |left, op, right, e: &mut E| {
let left = into_value(left, e).index();
let right = into_value(right, e).index();
let node = match op {
Token::Star => AstNode::Multiply { left, right },
Token::Slash => AstNode::Divide { left, right },
Token::Percent => AstNode::Modulus { left, right },
_ => unreachable!(),
};
e.state().push(node).as_value()
}),
infix(left(80), additive, |left, op, right, e: &mut E| {
let left = into_value(left, e).index();
let right = into_value(right, e).index();
let node = match op {
Token::Plus => AstNode::Add { left, right },
Token::Minus => AstNode::Subtract { left, right },
_ => unreachable!(),
};
e.state().push(node).as_value()
}),
infix(left(70), shift, |left, op, right, e: &mut E| {
let left = into_value(left, e).index();
let right = into_value(right, e).index();
let node = match op {
Token::LessLess => AstNode::ShiftLeft { left, right },
Token::GreaterGreater => AstNode::ShiftRight { left, right },
_ => unreachable!(),
};
e.state().push(node).as_value()
}),
infix(left(60), relational, |left, op, right, e: &mut E| {
let left = into_value(left, e).index();
let right = into_value(right, e).index();
let node = match op {
Token::Less => AstNode::Less { left, right },
Token::LessEqual => AstNode::LessEq { left, right },
Token::Greater => AstNode::Greater { left, right },
Token::GreaterEqual => AstNode::GreaterEq { left, right },
_ => unreachable!(),
};
e.state().push(node).as_value()
}),
infix(left(50), equality, |left, op, right, e: &mut E| {
let left = into_value(left, e).index();
let right = into_value(right, e).index();
let node = match op {
Token::EqualEqual => AstNode::Eq { left, right },
Token::BangEqual => AstNode::NotEq { left, right },
_ => unreachable!(),
};
e.state().push(node).as_value()
}),
infix(left(40), and, |left, _op, right, e: &mut E| {
let left = into_value(left, e).index();
let right = into_value(right, e).index();
let node = AstNode::BitAnd { left, right };
e.state().push(node).as_value()
}),
infix(left(30), xor, |left, _op, right, e: &mut E| {
let left = into_value(left, e).index();
let right = into_value(right, e).index();
let node = AstNode::BitXor { left, right };
e.state().push(node).as_value()
}),
infix(left(20), or, |left, _op, right, e: &mut E| {
let left = into_value(left, e).index();
let right = into_value(right, e).index();
let node = AstNode::BitOr { left, right };
e.state().push(node).as_value()
}),
infix(left(10), logical_and, |left, _op, right, e: &mut E| {
let left = into_value(left, e).index();
let right = into_value(right, e).index();
let node = AstNode::LogicalAnd { left, right };
e.state().push(node).as_value()
}),
infix(left(5), logical_or, |left, _op, right, e: &mut E| {
let left = into_value(left, e).index();
let right = into_value(right, e).index();
let node = AstNode::LogicalOr { left, right };
e.state().push(node).as_value()
}),
infix(right(1), assignment, |left, op, right, e: &mut E| {
let dest = into_place(left, e).index();
let right = into_value(right, e).index();
let node = if op == Token::Equal {
AstNode::Assignment { dest, expr: right }
} else {
let left = into_value(left, e).index();
let right = match op {
Token::PlusEqual => e.state().push(AstNode::Add { left, right }),
Token::MinusEqual => e.state().push(AstNode::Subtract { left, right }),
Token::StarEqual => e.state().push(AstNode::Multiply { left, right }),
Token::SlashEqual => e.state().push(AstNode::Divide { left, right }),
Token::PercentEqual => e.state().push(AstNode::Modulus { left, right }),
Token::AmpersandEqual => e.state().push(AstNode::BitAnd { left, right }),
Token::PipeEqual => e.state().push(AstNode::BitOr { left, right }),
Token::CaretEqual => e.state().push(AstNode::BitXor { left, right }),
Token::LessLessEqual => e.state().push(AstNode::ShiftLeft { left, right }),
Token::GreaterGreaterEqual => {
e.state().push(AstNode::ShiftRight { left, right })
}
_ => unreachable!(),
};
AstNode::Assignment { dest, expr: right }
};
e.state().push(node).as_value()
}),
));
let else_expr = just(Token::Else).ignore_then(_expr.clone());
let if_expr = just(Token::If)
.ignore_then(
_expr
.clone()
.map_with(into_value)
.map(PlaceOrValue::index)
.delimited_by(just(Token::OpenParens), just(Token::CloseParens)),
)
.then(_expr.clone())
.then(else_expr.or_not())
.map_with(|((condition, then), or), e: &mut E| {
// TODO: determine placeness from branches
let node = AstNode::If {
condition,
then: then.index(),
r#else: or.map(PlaceOrValue::index),
};
e.state().push(node).as_value()
});
let expr = choice((if_expr, assignment_expr)).labelled("expression");
Arc::new(expr)
})
}
@ -504,9 +870,9 @@ mod constants;
#[cfg(test)]
mod tests {
use chumsky::Parser;
use chumsky::{Parser, extra::SimpleState};
use crate::{AstNode, new_token_input, type_parser};
use crate::{Ast, AstNode, new_token_input, type_parser};
#[test]
fn print_ast_node_size() {
@ -565,4 +931,17 @@ mod tests {
}
);
}
#[test]
fn parse_exprs() {
let print_ast = |tokens| {
let mut state = SimpleState(Ast::new());
let out = crate::expr().parse_with_state(tokens, &mut state).unwrap();
eprintln!("{:?}", state.0);
};
print_ast(new_token_input("()"));
print_ast(new_token_input("!() as i32"));
print_ast(new_token_input("1 << 2 & 3"));
}
}