parser crate

This commit is contained in:
janis 2025-09-29 15:56:13 +02:00
parent ae0fb53b90
commit df2bb54272
Signed by: janis
SSH key fingerprint: SHA256:bB1qbbqmDXZNT0KKD5c2Dfjg53JGhj7B3CFcLIzSqq8
2 changed files with 564 additions and 0 deletions

14
crates/parser/Cargo.toml Normal file
View file

@ -0,0 +1,14 @@
[package]
name = "parser"
version = "0.1.0"
edition = "2024"
[dependencies]
tracing = { workspace = true }
werkzeug = { workspace = true }
thiserror = { workspace = true }
itertools = { workspace = true }
internment = "0.8.6"
lexer = { path = "../lexer", version = "0.1.0" }

550
crates/parser/src/lib.rs Normal file
View file

@ -0,0 +1,550 @@
use lexer::{
Consuming, ReborrowingConsumingIterator, ReborrowingIterator, ReborrowingPeekingIterator,
Token, TokenItem, TokenItemIterator,
};
use thiserror::Error;
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub enum Type {
Top,
Bottom,
Unit,
Bool,
Int {
signed: bool,
bits: u8,
},
Float {
float_type: FloatType,
},
Pointer {
pointee: Box<Type>,
},
Array {
element: Box<Type>,
size: usize,
},
Function {
return_type: Box<Type>,
parameter_types: Vec<Type>,
},
Tuple {
elements: Vec<Type>,
},
TypeUnion {
types: Vec<Type>,
},
TypeIntersection {
types: Vec<Type>,
},
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum FloatType {
F32,
F64,
}
#[derive(Debug, Clone)]
pub enum Value {
Bool(bool),
Int(i64),
UInt(u64),
Float(f64),
String(String),
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum ControlFlowKind {
Return,
Break,
Continue,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub struct Index(u32);
#[derive(Debug)]
pub enum AstNode {
Root {
files: Vec<Index>,
},
File {
decls: Vec<Index>,
},
FunctionProto {
name: String,
return_type: Type,
parameter_list: Index,
},
ParameterList {
parameters: Vec<Index>,
},
Parameter {
name: String,
param_type: Type,
},
FunctionDecl {
proto: Index,
body: Index,
},
Block {
statements: Vec<Index>,
expr: Option<Index>,
},
Constant {
ty: Type,
value: Value,
},
ExpressionStatement {
expr: Index,
},
ControlFlow {
kind: ControlFlowKind,
expr: Option<Index>,
},
VarDecl {
mutable: bool,
name: String,
var_type: Type,
},
Assignment {
dest: Index,
expr: Index,
},
GlobalDecl {
name: String,
var_type: Type,
value: Index,
},
StructDecl {
name: String,
fields: Vec<Index>,
},
FieldDecl {
name: String,
field_type: Type,
},
FieldAccess {
expr: Index,
field: String,
},
UnresolvedDeclRef {
name: String,
},
DeclRef {
decl: Index,
},
TypeDeclRef {
ty: Index,
},
ExplicitCast {
expr: Index,
ty: Type,
},
Deref {
expr: Index,
},
AddressOf {
expr: Index,
},
PlaceToValue {
expr: Index,
},
ValueToPlace {
expr: Index,
},
CallExpr {
callee: Index,
arguments: Vec<Index>,
},
Argument {
expr: Index,
},
Not(Index),
Negate(Index),
Multiply {
left: Index,
right: Index,
},
Divide {
left: Index,
right: Index,
},
Modulus {
left: Index,
right: Index,
},
Add {
left: Index,
right: Index,
},
Subtract {
left: Index,
right: Index,
},
BitOr {
left: Index,
right: Index,
},
BitAnd {
left: Index,
right: Index,
},
BitXor {
left: Index,
right: Index,
},
LogicalOr {
left: Index,
right: Index,
},
LogicalAnd {
left: Index,
right: Index,
},
Eq {
left: Index,
right: Index,
},
NotEq {
left: Index,
right: Index,
},
Less {
left: Index,
right: Index,
},
LessEq {
left: Index,
right: Index,
},
Greater {
left: Index,
right: Index,
},
GreaterEq {
left: Index,
right: Index,
},
ShiftLeft {
left: Index,
right: Index,
},
ShiftRight {
left: Index,
right: Index,
},
Subscript {
expr: Index,
index: Index,
},
If {
condition: Index,
then: Index,
r#else: Option<Index>,
},
Else {
expr: Index,
},
Error {
err: Box<dyn core::error::Error>,
},
}
#[derive(Debug, Error)]
pub enum ParseError {
#[error("End of file.")]
EOF,
#[error("Unexpected token: {0:?}")]
UnexpectedToken(Token),
#[error("Not a type.")]
NotAType,
}
#[derive(Default, Debug)]
pub struct Ast {
nodes: Vec<AstNode>,
}
impl Ast {
pub fn new() -> Self {
Self::default()
}
}
struct ParseCtx<'a> {
ast: Ast,
source: ReborrowingIterator<'a, 'a, TokenItemIterator<'a>, TokenItem<'a>, Consuming>,
peeked: Vec<TokenItem<'a>>,
}
impl<'a> ParseCtx<'a> {
fn new(ast: Ast, source: &'a mut TokenItemIterator<'a>) -> Self {
Self {
ast,
source: ReborrowingIterator::new(source),
peeked: Vec::new(),
}
}
/// Parse the entire source into an AST, returning an `AstNode::File` node.
fn parse_file(&mut self) -> Result<AstNode, ParseError> {
todo!()
}
}
struct FileParser<'a> {
ctx: &'a mut ParseCtx<'a>,
}
impl<'a> FileParser<'a> {
pub fn new(ctx: &'a mut ParseCtx<'a>) -> Self {
Self { ctx }
}
pub fn parse_global_decl(&mut self) -> Result<AstNode, ParseError> {
let mut peeking = self.ctx.source.borrow_peeking();
let next = peeking.try_peek_next()?;
match next.token {
Token::Fn => {
// function
todo!("impl function parsing")
}
Token::SlashSlash | Token::SlashSlashSlash => {
// comment
todo!("impl comment parsing")
}
Token::Const => {
// constant
todo!("impl constant parsing")
}
Token::Type => {
// type alias
todo!("impl type parsing")
}
Token::Module => {
// module
todo!("impl module parsing")
}
_ => Err(ParseError::UnexpectedToken(next.token)),
}
}
}
struct TypeParser<'a> {
ctx: &'a mut ParseCtx<'a>,
}
impl<'a> TypeParser<'a> {
pub fn new(ctx: &'a mut ParseCtx<'a>) -> Self {
Self { ctx }
}
fn parse_type(&mut self) -> Result<Type, ParseError> {
todo!()
}
fn parse_primitive_type(&mut self) -> Result<Type, ParseError> {
let mut peeking = self.ctx.source.borrow_peeking();
let next = peeking.try_peek_next()?.token;
let ty = match next {
Token::Bang => {
// Top type
Some(Type::Top)
}
Token::OpenParens if peeking.try_peek_next()?.token == Token::CloseParens => {
// Unit type
Some(Type::Unit)
}
Token::Bool => {
// Bool type
Some(Type::Bool)
}
Token::I8 => {
// i8 type
Some(Type::Int {
signed: true,
bits: 8,
})
}
Token::I16 => {
// i16 type
Some(Type::Int {
signed: true,
bits: 16,
})
}
Token::I32 => {
// i32 type
Some(Type::Int {
signed: true,
bits: 32,
})
}
Token::I64 => {
// i64 type
Some(Type::Int {
signed: true,
bits: 64,
})
}
Token::U8 => {
// u8 type
Some(Type::Int {
signed: false,
bits: 8,
})
}
Token::U16 => {
// u16 type
Some(Type::Int {
signed: false,
bits: 16,
})
}
Token::U32 => {
// u32 type
Some(Type::Int {
signed: false,
bits: 32,
})
}
Token::U64 => {
// u64 type
Some(Type::Int {
signed: false,
bits: 64,
})
}
Token::F32 => {
// f32 type
Some(Type::Float {
float_type: FloatType::F32,
})
}
Token::F64 => {
// f64 type
Some(Type::Float {
float_type: FloatType::F64,
})
}
Token::USize => {
// usize type
Some(Type::Int {
signed: false,
bits: 64,
}) // TODO: Detect pointer size
}
Token::ISize => {
// isize type
Some(Type::Int {
signed: true,
bits: 64,
}) // TODO: Detect pointer size
}
Token::Star => {
// Pointer type
let _const_or_mut = peeking
.peek_one_of([Token::Mutable, Token::Const].iter().copied())
.ok_or(ParseError::NotAType)?;
peeking.drain_peeked();
Some(Type::Pointer {
pointee: Box::new(self.parse_type()?),
})
}
_ => None,
};
if let Some(ty) = ty {
let cursor = peeking.reborrow_consuming_at_cursor();
Some(ty)
} else {
Err(ParseError::NotAType)
}
}
fn parse_array_type(&mut self) -> Result<Type, ParseError> {
todo!()
}
fn parse_function_type(&mut self) -> Result<Type, ParseError> {
todo!()
}
fn parse_tuple_type(&mut self) -> Result<Type, ParseError> {
todo!()
}
}
struct FunctionParser<'a> {
ctx: &'a mut ParseCtx<'a>,
}
impl<'a> FunctionParser<'a> {
pub fn new(ctx: &'a mut ParseCtx<'a>) -> Self {
Self { ctx }
}
fn parse_function_proto(&mut self) -> Result<AstNode, ParseError> {
todo!()
}
fn parse_parameter_list(&mut self) -> Result<AstNode, ParseError> {
todo!()
}
fn parse_parameter(&mut self) -> Result<AstNode, ParseError> {
todo!()
}
fn parse_function_body(&mut self) -> Result<AstNode, ParseError> {
todo!()
}
}
impl<'a> Iterator for FileParser<'a> {
type Item = Result<AstNode, ParseError>;
fn next(&mut self) -> Option<Self::Item> {
todo!()
}
}
trait TryReborrowingPeekingExt<T> {
fn try_peek_next(&mut self) -> Result<&T, ParseError>;
}
trait TryReborrowingConsumingExt<T> {
fn try_next(&mut self) -> Result<T, ParseError>;
}
impl<'a, I, T> TryReborrowingPeekingExt<T> for ReborrowingPeekingIterator<'a, 'a, I, T>
where
I: Iterator<Item = T>,
{
fn try_peek_next(&mut self) -> Result<&T, ParseError> {
self.peek_next().ok_or(ParseError::EOF)
}
}
impl<'a, I, T> TryReborrowingConsumingExt<T> for ReborrowingConsumingIterator<'a, 'a, I, T>
where
I: Iterator<Item = T>,
{
fn try_next(&mut self) -> Result<T, ParseError> {
self.next().ok_or(ParseError::EOF)
}
}
#[cfg(test)]
mod tests {
use crate::AstNode;
#[test]
fn print_ast_node_size() {
eprintln!("Size of AstNode: {}", std::mem::size_of::<AstNode>());
}
}