chumsky! this is cool

This commit is contained in:
janis 2025-10-02 00:14:51 +02:00
parent 0efd60c3e6
commit 882f30371e
Signed by: janis
SSH key fingerprint: SHA256:bB1qbbqmDXZNT0KKD5c2Dfjg53JGhj7B3CFcLIzSqq8
4 changed files with 220 additions and 451 deletions

View file

@ -81,7 +81,6 @@ impl Radix {
} }
} }
#[expect(dead_code)]
pub fn map_digit(self, c: char) -> u8 { pub fn map_digit(self, c: char) -> u8 {
match self { match self {
Radix::Hex => match c { Radix::Hex => match c {
@ -105,7 +104,6 @@ impl Radix {
} }
} }
#[expect(dead_code)]
pub fn folding_method(self) -> fn(u64, char) -> u64 { pub fn folding_method(self) -> fn(u64, char) -> u64 {
match self { match self {
Radix::Hex => { Radix::Hex => {
@ -235,7 +233,6 @@ fn try_parse_exp_part(source: &mut Source) -> Result<Option<()>> {
// `.` DEC_DIGITS EXP_PART? FloatingType? // `.` DEC_DIGITS EXP_PART? FloatingType?
// DEC_DIGITS `.` DEC_DIGITS? EXP_PART? FloatingType? // DEC_DIGITS `.` DEC_DIGITS? EXP_PART? FloatingType?
fn parse_constant_inner(source: &mut Source) -> Result<ConstantKind> { fn parse_constant_inner(source: &mut Source) -> Result<ConstantKind> {
let start = source.count;
let zero = source.next_if(|&c| c == '0').is_some(); let zero = source.next_if(|&c| c == '0').is_some();
let radix = zero let radix = zero
@ -425,9 +422,6 @@ pub(crate) fn parse_comment<'a>(source: &'a mut Source) -> Result<bool> {
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use crate::complex_tokens::parse_comment;
use super::*; use super::*;
fn make_source(s: &'_ str) -> Source<'_> { fn make_source(s: &'_ str) -> Source<'_> {

View file

@ -66,7 +66,6 @@ pub mod is_things {
ch == '0' || ch == '1' ch == '0' || ch == '1'
} }
#[expect(dead_code)]
pub fn is_nonzero_digit(ch: char) -> bool { pub fn is_nonzero_digit(ch: char) -> bool {
('1'..='9').contains(&ch) ('1'..='9').contains(&ch)
} }
@ -311,14 +310,14 @@ impl Token<'_> {
} }
} }
use std::{marker::PhantomData, ops::Range}; use std::{marker::PhantomData, ops::Range, sync::Arc};
use trie::Tree; use trie::Tree;
#[derive(Debug, Clone, Copy)] #[derive(Debug, Clone)]
pub struct TokenItem<'a> { pub struct TokenItem<'a> {
pub token: Token<'a>, pub token: Token<'a>,
pub offset: u32, pub span: Range<u32>,
} }
#[derive(Debug, Clone, Copy)] #[derive(Debug, Clone, Copy)]
@ -378,8 +377,9 @@ impl<I: Iterator<Item = char>> CharCountingIterator<core::iter::Peekable<I>> {
type Source<'a> = CharCountingIterator<core::iter::Peekable<core::str::Chars<'a>>>; type Source<'a> = CharCountingIterator<core::iter::Peekable<core::str::Chars<'a>>>;
#[derive(Clone)]
pub struct TokenIterator<'a> { pub struct TokenIterator<'a> {
trie: Tree<char, Token<'static>>, trie: Arc<Tree<char, Token<'static>>>,
source: &'a str, source: &'a str,
offset: usize, offset: usize,
} }
@ -392,6 +392,8 @@ impl<'a> TokenIterator<'a> {
trie.insert(token_str.chars(), *token); trie.insert(token_str.chars(), *token);
} }
let trie = Arc::new(trie);
Self { Self {
trie, trie,
source, source,
@ -535,7 +537,7 @@ impl<'a> TokenIterator<'a> {
let (token, range) = self.next_token()?; let (token, range) = self.next_token()?;
Some(TokenItem { Some(TokenItem {
token, token,
offset: range.start as u32, span: range.start as u32..range.end as u32,
}) })
} }
@ -552,6 +554,7 @@ impl<'a> Iterator for TokenIterator<'a> {
} }
} }
#[derive(Clone)]
pub struct TokenItemIterator<'a> { pub struct TokenItemIterator<'a> {
inner: TokenIterator<'a>, inner: TokenIterator<'a>,
} }
@ -573,6 +576,7 @@ pub trait TokenConsumer<'a> {
) -> Result<Self::Product, Self::Error>; ) -> Result<Self::Product, Self::Error>;
} }
#[expect(dead_code)]
struct SimpleTokenConsumer<S, T: Default = ()>(S, PhantomData<T>); struct SimpleTokenConsumer<S, T: Default = ()>(S, PhantomData<T>);
impl<'a, S, T> TokenConsumer<'a> for SimpleTokenConsumer<S, T> impl<'a, S, T> TokenConsumer<'a> for SimpleTokenConsumer<S, T>
@ -600,6 +604,7 @@ where
} }
} }
#[expect(dead_code)]
struct TokenSequenceListConsumer<L: TokenSequenceList> { struct TokenSequenceListConsumer<L: TokenSequenceList> {
list: L, list: L,
} }
@ -631,6 +636,7 @@ impl<'a, L: TokenSequenceList> TokenConsumer<'a> for TokenSequenceListConsumer<L
} }
} }
#[expect(dead_code)]
struct StealingIterator<T, I: Iterator<Item = T>> { struct StealingIterator<T, I: Iterator<Item = T>> {
pub iter: I, pub iter: I,
pub yielded: Vec<T>, pub yielded: Vec<T>,

View file

@ -13,5 +13,4 @@ internment = "0.8.6"
lexer = { path = "../lexer", version = "0.1.0" } lexer = { path = "../lexer", version = "0.1.0" }
logos = "0.15" chumsky = "0.11"
pomelo = "0.2"

View file

@ -1,9 +1,15 @@
use std::hash::Hash; use std::{hash::Hash, ops::Range};
use chumsky::{
IterParser, Parser,
error::EmptyErr,
extra::{self, SimpleState},
input::{IterInput, MapExtra},
prelude::{choice, just, recursive},
select, text,
};
use internment::Intern; use internment::Intern;
use lexer::{Token, TokenConsumer, TokenItem, TokenItemIterator}; use lexer::{Token, TokenItemIterator, TokenIterator};
use logos::Logos;
use pomelo::pomelo;
use thiserror::Error; use thiserror::Error;
#[derive(Debug, Clone, PartialEq, Eq, Hash)] #[derive(Debug, Clone, PartialEq, Eq, Hash)]
@ -32,14 +38,14 @@ pub enum InnerType {
float_type: FloatType, float_type: FloatType,
}, },
Pointer { Pointer {
pointee: Box<Type>, pointee: Type,
}, },
Array { Array {
element: Box<Type>, element: Type,
size: usize, size: usize,
}, },
Function { Function {
return_type: Box<Type>, return_type: Type,
parameter_types: Vec<Type>, parameter_types: Vec<Type>,
}, },
Tuple { Tuple {
@ -119,10 +125,7 @@ pub enum AstNode {
ParameterList { ParameterList {
parameters: Vec<Index>, parameters: Vec<Index>,
}, },
Parameter { Parameter(Parameter),
name: String,
param_type: Type,
},
FunctionDecl(FunctionDecl), FunctionDecl(FunctionDecl),
Block { Block {
statements: Vec<Index>, statements: Vec<Index>,
@ -332,420 +335,178 @@ impl Ast {
} }
#[derive(Debug)] #[derive(Debug)]
struct FunctionDecl { pub struct FunctionDecl {
attrs: Option<Index>, attrs: Option<Index>,
name: String, name: String,
visibility: Visibility, visibility: Visibility,
return_type: Type, return_type: Type,
parameter_list: Option<ParameterList>, parameter_list: ParameterList,
body: Index, body: Index,
} }
#[derive(Debug)] #[derive(Debug)]
struct Parameter { pub struct Parameter {
mutable: bool, mutable: bool,
name: String, name: String,
param_type: Type, param_type: Type,
} }
#[derive(Debug)] #[derive(Debug)]
struct ParameterList { pub struct ParameterList {
parameters: Vec<Index>, parameters: Vec<Index>,
} }
#[derive(Debug)] fn parse() {
struct ExtraToken<'a> { todo!()
lexeme: &'a str,
offset: u32,
} }
pomelo! { struct SpannedToken<'a> {
%include { token: Token<'a>,
use super::AstNode; span: std::ops::Range<usize>,
use internment::Intern;
use super::{
Parameter, Ast, ParameterList, FunctionDecl, Type, InnerType,
FloatType, ExtraToken, Index, IntSize, Visibility, Value,
};
};
%extra_argument Ast;
%parser pub struct Parser<'a>{};
%token #[derive(Debug)] pub enum Token<'a> {};
%type Ident &'a str;
%type DocComment &'a str;
%type Comment &'a str;
%type fn_decl FunctionDecl;
%type parameter Parameter;
%type parameter_list ParameterList;
%type typ Type;
%type return_type Type;
%type block Index;
%type decl Index;
%type decl_list Vec<Index>;
%type file Index;
file ::= decl_list?(list) {
let decls = list.unwrap_or_default();
extra.push(AstNode::File { decls })
};
decl_list ::= decl(decl) { vec![decl] };
decl_list ::= decl_list(dl) decl(decl) {
let mut list = dl;
list.push(decl);
list
};
%type attrs Index;
attrs ::= DocComment(text) {
let idx = extra.push(AstNode::Doc { text: text.to_string() });
extra.push(AstNode::Attributes { attrs: vec![idx] })
};
typ ::= Bool { Intern::new(InnerType::Bool) };
typ ::= I1 { Intern::new(InnerType::Int { signed: true, size: IntSize::Bits(1) }) };
typ ::= I8 { Intern::new(InnerType::Int { signed: true, size: IntSize::Bits(8) }) };
typ ::= I16 { Intern::new(InnerType::Int { signed: true, size: IntSize::Bits(16) }) };
typ ::= I32 { Intern::new(InnerType::Int { signed: true, size: IntSize::Bits(32) }) };
typ ::= I64 { Intern::new(InnerType::Int { signed: true, size: IntSize::Bits(64) }) };
typ ::= U1 { Intern::new(InnerType::Int { signed: false, size: IntSize::Bits(1) }) };
typ ::= U8 { Intern::new(InnerType::Int { signed: false, size: IntSize::Bits(8) }) };
typ ::= U16 { Intern::new(InnerType::Int { signed: false, size: IntSize::Bits(16) }) };
typ ::= U32 { Intern::new(InnerType::Int { signed: false, size: IntSize::Bits(32) }) };
typ ::= U64 { Intern::new(InnerType::Int { signed: false, size: IntSize::Bits(64) }) };
typ ::= ISize { Intern::new(InnerType::Int { signed: true, size: IntSize::Pointer }) };
typ ::= USize { Intern::new(InnerType::Int { signed: false, size: IntSize::Pointer }) };
typ ::= F32 { Intern::new(InnerType::Float { float_type: FloatType::F32 }) };
typ ::= F64 { Intern::new(InnerType::Float { float_type: FloatType::F64 }) };
typ ::= Bang { Intern::new(InnerType::Bottom) };
typ ::= Void { Intern::new(InnerType::Unit) };
unit ::= LParen RParen;
%type immediate (Intern<Value>, Type);
immediate ::= unit { (Intern::new(Value::Unit), Intern::new(InnerType::Unit)) };
immediate ::= False { (Intern::new(Value::Bool(false)), Intern::new(InnerType::Bool)) };
immediate ::= True { (Intern::new(Value::Bool(true)), Intern::new(InnerType::Bool)) };
%type Constant lexer::Token<'a>;
immediate ::= Constant(token) {
crate::constants::parse_constant(token)
};
%type expr Index;
%type stmt Index;
%type stmts Vec<Index>;
expr ::= assignment_expr(expr) { expr };
assignment_expr ::= or_expr(expr) { expr };
assignment_expr ::= or_expr(dest) Equal assignment_expr(expr) {
extra.push(AstNode::Assignment { dest, expr })
};
or_expr ::= and_expr(expr) { expr };
or_expr ::= or_expr(left) PipePipe and_expr(right) {
extra.push(AstNode::LogicalOr { left, right })
};
and_expr ::= bitor_expr(expr) { expr };
and_expr ::= and_expr(left) AmpersandAmpersand bitor_expr(right) {
extra.push(AstNode::LogicalAnd { left, right })
};
bitor_expr ::= bitxor_expr(expr) { expr };
bitor_expr ::= bitor_expr(left) Pipe bitxor_expr(right) {
extra.push(AstNode::BitOr { left, right })
};
bitxor_expr ::= bitand_expr(expr) { expr };
bitxor_expr ::= bitxor_expr(left) Caret bitand_expr(right) {
extra.push(AstNode::BitXor { left, right })
};
bitand_expr ::= equality_expr(expr) { expr };
bitand_expr ::= bitand_expr(left) Ampersand equality_expr(right) {
extra.push(AstNode::BitAnd { left, right })
};
equality_expr ::= relational_expr(expr) { expr };
equality_expr ::= equality_expr(left) EqualEqual relational_expr(right) {
extra.push(AstNode::Eq { left, right })
};
equality_expr ::= equality_expr(left) BangEqual relational_expr(right) {
extra.push(AstNode::NotEq { left, right })
};
relational_expr ::= shift_expr(expr) { expr };
relational_expr ::= relational_expr(left) Less shift_expr(right) {
extra.push(AstNode::Less { left, right })
};
relational_expr ::= relational_expr(left) LessEqual shift_expr(right) {
extra.push(AstNode::LessEq { left, right })
};
relational_expr ::= relational_expr(left) Greater shift_expr(right) {
extra.push(AstNode::Greater { left, right })
};
relational_expr ::= relational_expr(left) GreaterEqual shift_expr(right) {
extra.push(AstNode::GreaterEq { left, right })
};
shift_expr ::= additive_expr(expr) { expr };
shift_expr ::= shift_expr(left) LessLess additive_expr(right) {
extra.push(AstNode::ShiftLeft { left, right })
};
shift_expr ::= shift_expr(left) GreaterGreater additive_expr(right) {
extra.push(AstNode::ShiftRight { left, right })
};
additive_expr ::= multiplicative_expr(expr) { expr };
additive_expr ::= additive_expr(left) Plus multiplicative_expr(right) {
extra.push(AstNode::Add { left, right })
};
additive_expr ::= additive_expr(left) Minus multiplicative_expr(right) {
extra.push(AstNode::Subtract { left, right })
};
multiplicative_expr ::= unary_expr(expr) { expr };
multiplicative_expr ::= multiplicative_expr(left) Star unary_expr(right) {
extra.push(AstNode::Multiply { left, right })
};
multiplicative_expr ::= multiplicative_expr(left) Slash unary_expr(right) {
extra.push(AstNode::Divide { left, right })
};
multiplicative_expr ::= multiplicative_expr(left) Percent unary_expr(right) {
extra.push(AstNode::Modulus { left, right })
};
unary_expr ::= as_expr(expr) { expr };
unary_expr ::= Bang unary_expr(expr) {
extra.push(AstNode::Not(expr))
};
unary_expr ::= Minus unary_expr(expr) {
extra.push(AstNode::Negate(expr))
};
unary_expr ::= Star unary_expr(expr) {
extra.push(AstNode::Deref { expr })
};
unary_expr ::= Ampersand unary_expr(expr) {
extra.push(AstNode::AddressOf { expr })
};
as_expr ::= postfix_expr(expr) { expr };
as_expr ::= postfix_expr(expr) As typ(ty) {
extra.push(AstNode::ExplicitCast { expr, ty })
};
postfix_expr ::= primary_expr(expr) { expr };
postfix_expr ::= postfix_expr(expr) LBracket expr(index) RBracket {
extra.push(AstNode::Subscript { expr, index })
};
postfix_expr ::= postfix_expr(expr) Dot Ident(field) {
extra.push(AstNode::FieldAccess { expr, field: field.to_string() })
};
postfix_expr ::= postfix_expr(callee) LParen argument_list?(args) RParen {
let arguments = args.unwrap_or_default();
extra.push(AstNode::CallExpr { callee, arguments })
};
primary_expr ::= LParen expr(expr) RParen { expr };
primary_expr ::= Ident(name) {
let idx = extra.push(AstNode::UnresolvedDeclRef { name: name.to_string() });
idx
};
primary_expr ::= {
let idx = extra.push(AstNode::TypeDeclRef { ty });
idx
};
primary_expr ::= immediate((value, ty)) {
extra.push(AstNode::Constant { ty, value })
};
primary_expr ::= block(expr) { expr };
%type argument_list Vec<Index>;
argument_list ::= expr(e) {
let idx = extra.push(AstNode::Argument { expr: e });
vec![idx]
};
argument_list ::= argument_list(al) Comma expr(e) Comma? {
let mut v = al;
let idx = extra.push(AstNode::Argument { expr: e });
v.push(idx);
v
};
stmt ::= Semi { extra.push(AstNode::NoopExpr) };
stmt ::= Comment(text) { extra.push(AstNode::Comment { text: text.to_string() }) };
stmt ::= expr(expr) Semi { extra.push(AstNode::Stmt { expr }) };
stmts ::= stmt(s) { vec![s] };
stmts ::= stmts(ss) stmt(s) {
let mut v = ss;
v.push(s);
v
};
%type block_inner (Vec<Index>, Option<Index>);
block_inner ::= {(vec![], None)};
block_inner ::= expr(expr) {(vec![], Some(expr))};
block_inner ::= stmts(ss) {(ss, None)};
block_inner ::= stmts(ss) expr(expr) {(ss, Some(expr))};
block ::= LBrace block_inner((ss, expr)) RBrace {
extra.push(AstNode::Block {
statements: ss,
expr
})
};
%type vis Visibility;
vis ::= Pub { Visibility::Public };
%type mutable bool;
mutable ::= Mutable { true };
mutable ::= { false };
return_type ::= Arrow typ(return_type) { return_type };
parameter ::= mutable(mutable) Ident(name) Colon typ(param_type) {
Parameter { mutable, name: name.to_string(), param_type }
};
parameter_list ::= parameter(p) {
let idx = extra.push(AstNode::Parameter { name: p.name, param_type: p.param_type });
ParameterList { parameters: vec![idx] }
};
parameter_list ::= parameter_list(pl) Comma parameter(p) {
let idx = extra.push(AstNode::Parameter { name: p.name, param_type: p.param_type });
let mut parameters = pl.parameters;
parameters.push(idx);
ParameterList { parameters }
};
parameter_list ::= parameter_list(pl) Comma {
pl
};
decl ::= Comment(text) { extra.push(AstNode::Comment { text: text.to_string() }) };
decl ::= fn_decl(f) { extra.push(AstNode::FunctionDecl(f)) };
fn_decl ::= attrs?(attrs) vis?(visibility) Fn Ident(name) LParen parameter_list?(parameters) RParen return_type(rtype) block(body) {
let name = name.to_string();
FunctionDecl {
attrs,
name,
visibility: visibility.unwrap_or_default(),
return_type: rtype,
parameter_list: parameters,
body,
}
};
} }
impl<'a> From<lexer::Token<'a>> for parser::Token<'a> { #[derive(Clone)]
fn from(value: lexer::Token<'a>) -> Self { struct SpannedTokenInput<'a> {
use lexer::Token; inner: TokenItemIterator<'a>,
match value { }
Token::Fn => Self::Fn,
Token::OpenParens => Self::LParen, impl<'a> Iterator for SpannedTokenInput<'a> {
Token::CloseParens => Self::RParen, type Item = (Token<'a>, Range<u32>);
Token::OpenBrace => Self::LBrace,
Token::CloseBrace => Self::RBrace, fn next(&mut self) -> Option<Self::Item> {
Token::Ident(ident) => Self::Ident(ident), self.inner.next().map(|item| (item.token, item.span))
Token::Comment(text) => Self::Comment(text),
Token::DocComment(text) => Self::DocComment(text),
Token::OpenSquareBracket => todo!(), // Self::LBracket,
Token::CloseSquareBracket => todo!(), // Self::RBracket,
Token::Comma => Self::Comma,
Token::Colon => Self::Colon,
Token::Semi => Self::Semi,
Token::Elipsis3 => todo!(),
Token::Elipsis2 => todo!(),
Token::Equal => todo!(),
Token::Void => Self::Void,
Token::Bool => Self::Bool,
Token::F32 => Self::F32,
Token::F64 => Self::F64,
Token::ISize => Self::ISize,
Token::USize => Self::USize,
Token::U1 => Self::U1,
Token::U8 => Self::U8,
Token::U16 => Self::U16,
Token::U32 => Self::U32,
Token::U64 => Self::U64,
Token::I1 => Self::I1,
Token::I8 => Self::I8,
Token::I16 => Self::I16,
Token::I32 => Self::I32,
Token::I64 => Self::I64,
Token::True => Self::True,
Token::False => Self::False,
Token::Const => todo!(), // Self::Const,
Token::Mutable => Self::Mutable,
Token::Volatile => todo!(),
Token::Noalias => todo!(),
Token::Let => todo!(),
Token::Var => todo!(),
Token::If => todo!(),
Token::As => todo!(),
Token::Else => todo!(),
Token::Return => todo!(),
Token::Struct => todo!(),
Token::Type => todo!(),
Token::Union => todo!(),
Token::Enum => todo!(),
Token::Packed => todo!(),
Token::Extern => todo!(),
Token::Pub => Self::Pub,
Token::Module => todo!(),
Token::Dot => todo!(),
Token::MinusGreater => Self::Arrow,
Token::Bang => Self::Bang,
Token::Tilde => todo!(),
Token::Plus => todo!(),
Token::Minus => todo!(),
Token::Star => todo!(),
Token::Slash => todo!(),
Token::Percent => todo!(),
Token::Less => todo!(),
Token::Greater => todo!(),
Token::LessEqual => todo!(),
Token::GreaterEqual => todo!(),
Token::EqualEqual => todo!(),
Token::BangEqual => todo!(),
Token::PipePipe => todo!(),
Token::AmpersandAmpersand => todo!(),
Token::Ampersand => todo!(),
Token::Caret => todo!(),
Token::Pipe => todo!(),
Token::LessLess => todo!(),
Token::GreaterGreater => todo!(),
Token::Question => todo!(),
Token::PlusEqual => todo!(),
Token::MinusEqual => todo!(),
Token::StarEqual => todo!(),
Token::SlashEqual => todo!(),
Token::PercentEqual => todo!(),
Token::AmpersandEqual => todo!(),
Token::PipeEqual => todo!(),
Token::CaretEqual => todo!(),
Token::LessLessEqual => todo!(),
Token::GreaterGreaterEqual => todo!(),
Token::Eof(_) => todo!(),
Token::ParseError(_) => todo!(),
Token::CharConstant(_) => todo!(),
Token::IntegerConstant(_) => Self::Constant(value),
Token::IntegerHexConstant(_) => Self::Constant(value),
Token::IntegerBinConstant(_) => Self::Constant(value),
Token::IntegerOctConstant(_) => Self::Constant(value),
Token::FloatingConstant(_) => Self::Constant(value),
Token::FloatingExpConstant(_) => Self::Constant(value),
Token::DotFloatingConstant(_) => Self::Constant(value),
Token::DotFloatingExpConstant(_) => Self::Constant(value),
Token::StringConstant(_) => todo!(),
}
} }
} }
type TokenInput<'a> = IterInput<SpannedTokenInput<'a>, Range<u32>>;
fn new_token_input<'a>(input: &'a str) -> TokenInput<'a> {
let num_bytes = input.len() as u32;
let token_iter = TokenIterator::new(input).into_token_items();
let spanned_input = SpannedTokenInput { inner: token_iter };
IterInput::new(spanned_input, num_bytes..num_bytes)
}
fn type_parser<'a, E>() -> impl Parser<'a, TokenInput<'a>, Type, E>
where
E: chumsky::extra::ParserExtra<'a, TokenInput<'a>> + 'a,
{
let primitives = select! {
Token::Void => InnerType::Unit,
Token::F32 => InnerType::Float { float_type: FloatType::F32 },
Token::F64 => InnerType::Float { float_type: FloatType::F64 },
Token::Bool => InnerType::Bool,
Token::U1 => InnerType::Int { signed: false, size: IntSize::Bits(1) },
Token::U8 => InnerType::Int { signed: false, size: IntSize::Bits(8) },
Token::U16 => InnerType::Int { signed: false, size: IntSize::Bits(16) },
Token::U32 => InnerType::Int { signed: false, size: IntSize::Bits(32) },
Token::U64 => InnerType::Int { signed: false, size: IntSize::Bits(64) },
Token::USize => InnerType::Int { signed: false, size: IntSize::Pointer },
Token::I8 => InnerType::Int { signed: true, size: IntSize::Bits(8) },
Token::I16 => InnerType::Int { signed: true, size: IntSize::Bits(16) },
Token::I32 => InnerType::Int { signed: true, size: IntSize::Bits(32) },
Token::I64 => InnerType::Int { signed: true, size: IntSize::Bits(64) },
Token::ISize => InnerType::Int { signed: true, size: IntSize::Pointer },
};
let custom_int_inner = choice((just::<_, _, extra::Default>('u'), just('i')))
.then(text::int(10).to_slice().from_str::<u16>().unwrapped())
.map(|(sign, size)| InnerType::Int {
signed: sign == 'i',
size: IntSize::Bits(size),
});
let custom_int =
select! {Token::Ident(ident) => ident}.map(move |s| custom_int_inner.parse(s).unwrap());
recursive(|ty| {
let pointer = just(Token::Star)
.ignore_then(choice((
just(Token::Mutable).to(true),
just(Token::Const).to(false),
)))
.then(ty)
.map(|(_mutable, pointee)| InnerType::Pointer { pointee });
choice((primitives, custom_int, pointer)).map(|p| Intern::new(p))
})
}
fn visibility<'a>() -> impl Parser<'a, TokenInput<'a>, Visibility, ParserExtra> {
choice((just(Token::Pub).to(Visibility::Public),))
.or_not()
.map(|v| v.unwrap_or(Visibility::Private))
}
fn func_parser() {
let ident = select! {Token::Ident(ident) => ident};
let param = just(Token::Mutable)
.to(())
.or_not()
.then(ident)
.then_ignore(just(Token::Colon))
.then(type_parser::<ParserExtra>())
.map_with(|((mutable, name), param_type), e| {
e.state().push(AstNode::Parameter(Parameter {
mutable: mutable.is_some(),
name: name.to_string(),
param_type,
}))
});
let params = param
.separated_by(just(Token::Comma))
.allow_trailing()
.collect::<Vec<_>>()
.delimited_by(just(Token::OpenParens), just(Token::CloseParens))
.labelled("function parameters")
.map(|params| ParameterList { parameters: params });
let func = visibility()
.then_ignore(just(Token::Fn))
.then(ident)
.then(params)
// optional return type
.then(
just(Token::MinusGreater)
.ignore_then(type_parser())
.or_not(),
)
.then(block())
.map_with(|((((vis, ident), params), ret), body), e| {
e.state().push(AstNode::FunctionDecl(FunctionDecl {
attrs: None,
name: ident.to_string(),
visibility: vis,
return_type: ret.unwrap_or_else(|| Intern::new(InnerType::Unit)),
parameter_list: params,
body,
}))
});
}
type ParserExtra = chumsky::extra::Full<EmptyErr, SimpleState<Ast>, ()>;
fn block<'a>() -> impl Parser<'a, TokenInput<'a>, Index, ParserExtra> {
just(Token::OpenBrace)
.ignored()
.then_ignore(just(Token::CloseBrace))
.map_with(|_, e: &mut MapExtra<'_, '_, _, ParserExtra>| {
e.state().push(AstNode::Block {
statements: vec![],
expr: None,
})
})
}
mod constants; mod constants;
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use crate::AstNode; use chumsky::Parser;
use crate::{AstNode, new_token_input, type_parser};
#[test] #[test]
fn print_ast_node_size() { fn print_ast_node_size() {
@ -753,46 +514,55 @@ mod tests {
} }
#[test] #[test]
fn parse_constant() { fn parse_types() {
use crate::parser::{Parser, Token}; let ty = type_parser::<chumsky::extra::Default>()
let input = r#" .parse(new_token_input("i32"))
fn a() -> u32 { .unwrap();
42u32 assert_eq!(
} *ty,
fn b() -> u32 { crate::InnerType::Int {
42i8 signed: true,
} size: crate::IntSize::Bits(32)
fn c() -> f32 { }
42e4 );
}
"#;
let mut lex = lexer::TokenIterator::new(input);
let mut mapped = lex.map(Token::from);
let mut ast = crate::Ast::new();
let mut parser = Parser::new(ast);
while let Some(token) = mapped.next() {
parser.parse(token).unwrap();
}
let (out, ast) = parser.end_of_input().unwrap();
eprintln!("AST: {:#?}", ast);
}
#[test] let ty = type_parser::<chumsky::extra::Default>()
fn parse() { .parse(new_token_input("*const i32"))
use crate::parser::{Parser, Token}; .unwrap();
let input = r#" assert_eq!(
// A simple test case *ty,
/// A function that takes two u32 parameters and returns a u32 crate::InnerType::Pointer {
fn main(a: u32, b: u32) -> u32 {} pointee: crate::Intern::new(crate::InnerType::Int {
"#; signed: true,
let mut lex = lexer::TokenIterator::new(input); size: crate::IntSize::Bits(32)
let mut mapped = lex.map(Token::from); })
let mut ast = crate::Ast::new(); }
let mut parser = Parser::new(ast); );
while let Some(token) = mapped.next() {
parser.parse(token).unwrap(); let ty = type_parser::<chumsky::extra::Default>()
} .parse(new_token_input("*mut *const u8"))
let (out, ast) = parser.end_of_input().unwrap(); .unwrap();
eprintln!("AST: {:#?}", ast); assert_eq!(
*ty,
crate::InnerType::Pointer {
pointee: crate::Intern::new(crate::InnerType::Pointer {
pointee: crate::Intern::new(crate::InnerType::Int {
signed: false,
size: crate::IntSize::Bits(8)
})
})
}
);
let ty = type_parser::<chumsky::extra::Default>()
.parse(new_token_input("i10"))
.unwrap();
assert_eq!(
*ty,
crate::InnerType::Int {
signed: true,
size: crate::IntSize::Bits(10)
}
);
} }
} }