SeaLang/src/parser.rs

2433 lines
85 KiB
Rust

use itertools::Itertools;
use num_bigint::{BigInt, BigUint};
use crate::{
BitSize,
ast::{self, FloatingType, IntegralType, LetOrVar, Node, Tag, Type},
ast2::intern::{self, AMD64_POINTER_BITS},
common::NextIf,
comptime::{self, ComptimeNumber},
error::{AnalysisError, AnalysisErrorTag},
lexer::{Radix, TokenIterator},
symbol_table::{SymbolKind, SymbolTable},
tokens::{PRECEDENCE_MAP, Token},
};
#[derive(Debug, thiserror::Error)]
pub enum Error {
#[error("Unexpected end of token iter.")]
UnexpectedEndOfTokens,
#[error("Expected primitive type.")]
ExpectedPrimitiveType,
#[error("Expected token {0}.")]
ExpectedTokenNotFound(Token),
#[error("Dummy message.")]
ExpectedLetOrVar,
#[error("Dummy message.")]
IntegralTypeTooWide,
#[error("Dummy message.")]
TriedToDerefNonPointer,
}
pub type Result<T> = core::result::Result<T, Error>;
#[derive(Debug)]
pub struct Nodes {
inner: Vec<Tag>,
}
impl core::ops::Index<Node> for Nodes {
type Output = Tag;
fn index(&self, index: Node) -> &Self::Output {
&self.inner[index.get() as usize]
}
}
impl core::ops::IndexMut<Node> for Nodes {
fn index_mut(&mut self, index: Node) -> &mut Self::Output {
&mut self.inner[index.get() as usize]
}
}
impl Nodes {
fn new() -> Nodes {
Self {
inner: vec![Tag::Root],
}
}
fn len(&self) -> u32 {
self.inner.len() as u32
}
fn set_node(&mut self, node: Node, tag: Tag) {
*self.get_node_mut(node) = tag;
}
fn get_node_mut(&mut self, node: Node) -> &mut Tag {
self.inner.get_mut(node.get() as usize).unwrap()
}
pub fn get_node(&self, node: Node) -> &Tag {
self.inner.get(node.get() as usize).unwrap()
}
fn push_tag(&mut self, tag: Tag) -> Node {
let node = Node::new(self.len()).unwrap();
self.inner.push(tag);
node
}
fn reserve_node(&mut self) -> Node {
self.push_tag(Tag::Undefined)
}
#[allow(dead_code)]
fn swap_nodes(&mut self, lhs: Node, rhs: Node) {
self.inner.swap(lhs.get() as usize, rhs.get() as usize);
}
}
// TODO: add a string-table which stores strings and maybe other bytes and
// returns a range for identifiers, constants, etc. where bytes are stored
// flatly, and next to each other.
#[derive(Debug)]
pub struct Tree {
pub nodes: Nodes,
pub st: SymbolTable,
pub intern_pool: intern::InternPool,
pub global_decls: Vec<Node>,
}
pub fn write_indented_inner<W: core::fmt::Write>(
dst: &mut W,
indent: u32,
nl: bool,
args: core::fmt::Arguments,
) -> std::result::Result<(), std::fmt::Error> {
for _ in 0..indent {
dst.write_char(' ')?;
}
dst.write_fmt(args)?;
if nl {
dst.write_char('\n')?;
}
Ok(())
}
#[macro_export]
macro_rules! write_indented {
($indent:expr, $w:expr, $($arg:tt)*) => {
$crate::parser::write_indented_inner($w, $indent, false, format_args!($($arg)*))
};
}
#[macro_export]
macro_rules! writeln_indented {
($indent:expr, $w:expr, $($arg:tt)*) => {
$crate::parser::write_indented_inner($w, $indent, true, format_args!($($arg)*))
};
}
impl Tree {
pub fn new() -> Tree {
Self {
nodes: Nodes::new(),
st: SymbolTable::new(),
intern_pool: intern::InternPool::new(),
global_decls: Vec::new(),
}
}
pub fn global_decls(&self) -> Vec<(Node, String)> {
self.global_decls
.iter()
.map(|decl| {
let name = match self.nodes.get_node(*decl) {
Tag::FunctionDecl { proto, .. } => {
let Tag::FunctionProto { name, .. } = self.nodes.get_node(*proto) else {
unreachable!()
};
self.get_ident_str(*name).unwrap().to_owned()
}
Tag::GlobalDecl { name, .. } => self.get_ident_str(*name).unwrap().to_owned(),
_ => {
unreachable!()
}
};
(*decl, name)
})
.collect::<Vec<_>>()
}
#[allow(unused)]
fn is_integral_type(lexeme: &str) -> Option<()> {
let mut iter = lexeme.chars();
iter.next_if(|&c| c == 'u' || c == 'i')?;
iter.next_if(|&c| crate::common::is_digit(c))?;
iter.take_while_ref(|&c| crate::common::is_digit(c)).count();
iter.next().is_none().then_some(())
}
// returns an option instead of a result because failure here means the
// lexeme is actually an identifier.
fn try_parse_integral_type(lexeme: &str) -> Result<Option<IntegralType>> {
let mut iter = lexeme.chars().peekable();
let signed = match iter.next() {
Some('u') => false,
Some('i') => true,
_ => {
return Ok(None);
}
};
// need 1 digit for an integral type
if iter.peek().map(|&c| crate::common::is_digit(c)) != Some(true) {
return Ok(None);
}
// need no nondigits after digits
if iter
.clone()
.skip_while(|&c| crate::common::is_digit(c))
.next()
.is_some()
{
return Ok(None);
}
let mut bits = 0u16;
loop {
let Some(digit) = iter.next().map(|c| c as u8 - b'0') else {
break;
};
match bits
.checked_mul(10)
.and_then(|bits| bits.checked_add(digit as u16))
{
Some(val) => {
bits = val;
}
None => {
// this IS an integral type, but it is bigger than u/i65535
return Err(Error::IntegralTypeTooWide);
}
}
}
Ok(Some(IntegralType { signed, bits }))
}
/// returns (signed, bits)
fn parse_integral_type(lexeme: &str) -> IntegralType {
let mut iter = lexeme.chars();
let signed = match iter.next().unwrap() {
'u' => false,
'i' | 's' => true,
_ => unreachable!(),
};
let bits = iter.fold(0u16, |acc, c| {
let digit = c as u8 - b'0';
acc * 10 + digit as u16
});
IntegralType { signed, bits }
}
fn parse_integral_constant(token: Token, lexeme: &str) -> (BigInt, Option<IntegralType>) {
let radix = Radix::from_token(token).unwrap();
// TODO: figure out how to do this safely for bigger types, whether to
// wrap, saturate, or else.
let iter = &mut lexeme.char_indices();
match radix {
Radix::Bin | Radix::Oct | Radix::Hex => {
_ = iter.advance_by(2);
}
_ => {}
}
let digits = iter
.take_while_ref(|&(_, c)| radix.is_digit()(c) || c == '_')
.filter(|&(_, c)| c != '_')
.map(|(_, c)| c)
.collect::<Vec<_>>();
let value = comptime::bigint::parse_bigint(digits.into_iter(), radix);
let ty = match iter.clone().next() {
Some((_, 'u')) | Some((_, 'i')) => {
Some(Self::parse_integral_type(&lexeme[iter.next().unwrap().0..]))
}
_ => None,
};
(
BigInt::from_biguint(num_bigint::Sign::Plus, BigUint::new(value)),
ty,
)
}
fn parse_floating_constant(_token: Token, lexeme: &str) -> (u64, FloatingType) {
// let (dot, exp) = match token {
// Token::DotFloatingExpConstant => (true, true),
// Token::DotFloatingConstant => (true, false),
// Token::FloatingExpConstant => (false, true),
// Token::FloatingConstant => (false, false),
// _ => unreachable!(),
// };
let lexeme = lexeme
.strip_suffix("f32")
.map(|l| (l, FloatingType::Binary32))
.unwrap_or(
lexeme
.strip_suffix("f64")
.map(|l| (l, FloatingType::Binary64))
.unwrap_or((lexeme, FloatingType::Binary64)),
);
let bits = match lexeme.1 {
FloatingType::Binary32 => lexeme.0.parse::<f32>().unwrap().to_bits() as u64,
FloatingType::Binary64 => lexeme.0.parse::<f64>().unwrap().to_bits() as u64,
};
(bits, lexeme.1)
}
fn parse_ident(&mut self, tokens: &mut TokenIterator) -> Result<Node> {
let ident = tokens.expect_token(Token::Ident)?;
let name = self.intern_pool.insert_string(ident.lexeme());
Ok(self.nodes.push_tag(Tag::Ident { name }))
}
fn ident_index(&self, node: Node) -> intern::Index {
match &self.nodes[node] {
Tag::Ident { name } => *name,
_ => intern::Index::EMPTY_STRING,
}
}
pub fn parse_primitive_type(&mut self, tokens: &mut TokenIterator) -> Result<Node> {
let token = tokens.next().ok_or(Error::UnexpectedEndOfTokens)?;
let prim = match token.token() {
Token::Void => intern::Index::VOID,
Token::Bool => intern::Index::BOOL,
Token::F32 => intern::Index::F32,
Token::F64 => intern::Index::F64,
_ => {
return Err(Error::ExpectedPrimitiveType);
}
};
Ok(self.nodes.push_tag(Tag::PrimitiveType(prim)))
}
pub fn parse_pointer(&mut self, tokens: &mut TokenIterator) -> Result<Node> {
tokens.expect_token(Token::Star)?;
let _constness = tokens.eat_token(Token::Const);
let typename = self.parse_typename(tokens)?;
Ok(self.nodes.push_tag(Tag::Pointer { pointee: typename }))
}
pub fn parse_typename(&mut self, tokens: &mut TokenIterator) -> Result<Node> {
match tokens.peek_token_or_err()?.token() {
Token::Star => self.parse_pointer(tokens),
Token::Ident => {
let token = tokens.next().unwrap();
match Self::try_parse_integral_type(token.lexeme())? {
Some(int) => {
let ty = self.intern_pool.get_int_type(int.signed, int.bits);
Ok(self.nodes.push_tag(Tag::IntegralType(ty)))
}
None => {
let name = self.intern_pool.insert_string(token.lexeme());
Ok(self.nodes.push_tag(Tag::Ident { name }))
}
}
}
_ => self.parse_primitive_type(tokens),
}
}
pub fn parse_var_decl(&mut self, tokens: &mut TokenIterator) -> Result<Node> {
let let_or_var = match tokens
.eat_token(Token::Let)
.or_else(|| tokens.eat_token(Token::Var))
.map(|itm| itm.token())
.ok_or(Error::ExpectedLetOrVar)?
{
Token::Let => LetOrVar::Let,
Token::Var => LetOrVar::Var,
_ => unreachable!(),
};
let name = self.parse_ident(tokens)?;
let explicit_type = if tokens.eat_token(Token::Colon).is_some() {
Some(self.parse_typename(tokens)?)
} else {
None
};
let name_str = self.intern_pool.get_str(self.ident_index(name)).to_owned();
let decl = self.nodes.reserve_node();
let assignment = if tokens.eat_token(Token::Equal).is_some() {
Some(self.parse_expr(tokens)?)
} else {
None
};
// insert into symbol table after parsing assignment in case we are shadowing a previous variable binding
self.st.insert_symbol(&name_str, decl, SymbolKind::Var);
self.nodes.set_node(
decl,
Tag::VarDecl {
let_or_var,
name,
explicit_type,
assignment,
},
);
// return assignment if it exists, to make rendering and visiting easier
Ok(decl)
}
/// GLOBAL_DECL <-
/// const IDENTIFIER (: TYPENAME)? = EXPR;
pub fn parse_global_decl(&mut self, tokens: &mut TokenIterator) -> Result<Node> {
_ = tokens.expect_token(Token::Const)?;
let name = self.parse_ident(tokens)?;
let explicit_type = if tokens.eat_token(Token::Colon).is_some() {
Some(self.parse_typename(tokens)?)
} else {
None
};
let name_str = self.get_ident_str(name).unwrap().to_owned();
// inserting root symbol here is fine, because self-referencing globals are stupid
let decl = {
let node = match self.st.find_root_symbol(&name_str) {
Some(r) => r.node(),
None => self
.st
.insert_root_symbol(&name_str, self.nodes.reserve_node())
.node(),
};
node
};
_ = tokens.expect_token(Token::Equal)?;
let assignment = self.parse_expr(tokens)?;
self.nodes.set_node(
decl,
Tag::GlobalDecl {
name,
explicit_type,
assignment,
},
);
tokens.expect_token(Token::Semi)?;
Ok(decl)
}
/// PARAMETER <-
/// IDENTIFIER : TYPENAME
pub fn parse_parameter(&mut self, tokens: &mut TokenIterator) -> Result<Node> {
let name = self.parse_ident(tokens)?;
tokens.expect_token(Token::Colon)?;
let ty = self.parse_typename(tokens)?;
let param = self.nodes.reserve_node();
self.st.insert_symbol(
&self.get_ident_str(name).unwrap().to_owned(),
param,
SymbolKind::Var,
);
self.nodes.set_node(param, Tag::Parameter { name, ty });
Ok(param)
}
/// PARAMETER_LIST <-
/// PARAMETER
/// PARAMETER_LIST , PARAMETER
pub fn parse_parameter_list(&mut self, tokens: &mut TokenIterator) -> Result<Node> {
let mut parameters = Vec::new();
loop {
// PARAMETER
parameters.push(self.parse_parameter(tokens)?);
// COMMA
if !tokens.is_next_token(Token::Comma) {
break;
}
if !tokens.is_next_token2(Token::Ident) {
break;
}
// skip comma
_ = tokens.next();
}
Ok(self.nodes.push_tag(Tag::ParameterList { parameters }))
}
/// FUNCTION_PROTO <-
/// fn IDENTIFIER ()
/// fn IDENTIFIER () -> TYPENAME
/// fn IDENTIFIER ( PARAMETER_LIST ,? )
/// fn IDENTIFIER ( PARAMETER_LIST ,? ) -> TYPENAME
pub fn parse_fn_proto(&mut self, tokens: &mut TokenIterator) -> Result<(Node, Node)> {
tokens.expect_token(Token::Fn)?;
let name = self.parse_ident(tokens)?;
tokens.expect_token(Token::OpenParens)?;
let parameters = if !tokens.is_next_token(Token::CloseParens) {
let parameters = self.parse_parameter_list(tokens)?;
// trailing comma
_ = tokens.eat_token(Token::Comma);
Some(parameters)
} else {
None
};
tokens.expect_token(Token::CloseParens)?;
let return_type = if tokens.eat_token(Token::MinusGreater).is_some() {
self.parse_typename(tokens)?
} else {
self.nodes.push_tag(Tag::PrimitiveType(intern::Index::VOID))
};
let proto = self.nodes.push_tag(Tag::FunctionProto {
name,
parameters,
return_type,
});
Ok((proto, name))
}
/// FUNCTION_DECL <-
/// FUNCTION_PROTO BLOCK
pub fn parse_fn_decl(&mut self, tokens: &mut TokenIterator) -> Result<Node> {
let (proto, name) = self.parse_fn_proto(tokens)?;
let decl = match self
.st
.find_orderless_symbol(self.get_ident_str(name).unwrap())
{
Some(record) => record.node(),
None => {
let decl = self.nodes.reserve_node();
self.st
.insert_orderless_symbol(&self.get_ident_str(name).unwrap().to_owned(), decl);
decl
}
};
let block = self.nodes.reserve_node();
self.st.into_child(block);
let body = self.parse_block(tokens, Some(block))?;
let unresolved = self
.st
.extract_orderless_if(|_, v| self.nodes.get_node(v.node()) == &Tag::Undefined)
.collect::<Vec<_>>();
self.st.into_parent();
self.st.extend_orderless(unresolved);
self.nodes.set_node(decl, Tag::FunctionDecl { proto, body });
Ok(decl)
}
/// BLOCK <-
/// { STATEMENT* EXPRESSION? }
pub fn parse_block(
&mut self,
tokens: &mut TokenIterator,
reserved_node: Option<Node>,
) -> Result<Node> {
let block = reserved_node.unwrap_or_else(|| self.nodes.reserve_node());
let mut stmts = Vec::new();
_ = tokens.expect_token(Token::OpenBrace)?;
loop {
if tokens.is_next_token(Token::CloseBrace) {
break self.nodes.set_node(
block,
Tag::Block {
statements: stmts,
trailing_expr: None,
},
);
}
match tokens.peek_token_or_err()?.token() {
Token::Return => {
stmts.push(self.try_parse_return_stmt(tokens)?.unwrap());
}
Token::Var | Token::Let => {
let node = self.parse_var_decl(tokens)?;
tokens.expect_token(Token::Semi)?;
stmts.push(node);
}
_ => {
let node = self.parse_expr(tokens)?;
match tokens.peek_token_or_err()?.token() {
Token::CloseBrace => {
break self.nodes.set_node(
block,
Tag::Block {
statements: stmts,
trailing_expr: Some(node),
},
);
}
Token::Semi => {
_ = tokens.next();
stmts.push(node);
}
_ => {
unreachable!()
}
}
}
}
}
tokens.expect_token(Token::CloseBrace)?;
Ok(block)
}
/// ASSIGNMENT_EXPR <-
/// BINARY_EXPRESSION
/// BINARY_EXPRESSION ASSIGNMENT_OP EXPRESSION
pub fn parse_assignment_expr(&mut self, tokens: &mut TokenIterator) -> Result<Node> {
let lhs = self.parse_binary_expr(tokens, 0)?;
Ok(self.try_parse_assignment(lhs, tokens)?.unwrap_or(lhs))
}
/// ASSIGNMENT_EXPR <-
/// BINARY_EXPRESSION ASSIGNMENT_OP EXPRESSION
/// ASSIGNMENT_OP <-
/// = += -= *= /= %= ...
pub fn try_parse_assignment(
&mut self,
lhs: Node,
tokens: &mut TokenIterator,
) -> Result<Option<Node>> {
if tokens
.peek_token()
.map(|itm| itm.token().is_assignment_op())
== Some(true)
{
let op = tokens.next().unwrap();
let rhs = self.parse_expr(tokens)?;
let rhs = match op.token() {
Token::PlusEqual => self.nodes.push_tag(Tag::Add { lhs, rhs }),
Token::MinusEqual => self.nodes.push_tag(Tag::Sub { lhs, rhs }),
Token::StarEqual => self.nodes.push_tag(Tag::Mul { lhs, rhs }),
Token::SlashEqual => self.nodes.push_tag(Tag::Sub { lhs, rhs }),
Token::PercentEqual => self.nodes.push_tag(Tag::Rem { lhs, rhs }),
Token::PipeEqual => self.nodes.push_tag(Tag::BitOr { lhs, rhs }),
Token::CaretEqual => self.nodes.push_tag(Tag::BitXOr { lhs, rhs }),
Token::AmpersandEqual => self.nodes.push_tag(Tag::BitAnd { lhs, rhs }),
Token::LessLessEqual => self.nodes.push_tag(Tag::Shl { lhs, rhs }),
Token::GreaterGreaterEqual => self.nodes.push_tag(Tag::Shr { lhs, rhs }),
Token::Equal => rhs,
_ => {
unreachable!()
}
};
Ok(Some(self.nodes.push_tag(Tag::Assign { lhs, rhs })))
} else {
Ok(None)
}
}
/// RETURN_STATEMENT <-
/// return EXPRESSION? ;
pub fn try_parse_return_stmt(&mut self, tokens: &mut TokenIterator) -> Result<Option<Node>> {
if tokens.eat_token(Token::Return).is_some() {
let expr = if !tokens.is_next_token(Token::Semi) {
let expr = Some(self.parse_expr(tokens)?);
expr
} else {
None
};
tokens.expect_token(Token::Semi)?;
Ok(Some(self.nodes.push_tag(Tag::ReturnStmt { expr })))
} else {
Ok(None)
}
}
/// STATEMENT <-
/// RETURN_EXPRESSION
/// VAR_DECL ;
/// EXPRESSION ;
pub fn parse_statement(&mut self, tokens: &mut TokenIterator) -> Result<Node> {
match tokens.peek_token_or_err()?.token() {
Token::Return => Ok(self.try_parse_return_stmt(tokens)?.unwrap()),
Token::Var | Token::Let => {
let node = self.parse_var_decl(tokens)?;
tokens.expect_token(Token::Semi)?;
Ok(node)
}
_ => {
let node = self.parse_expr(tokens)?;
tokens.expect_token(Token::Semi)?;
Ok(node)
}
}
}
/// BINARY_EXPR <-
/// AS_EXPR
/// AS_EXPR * EXPRESSION
/// AS_EXPR / EXPRESSION
/// AS_EXPR % EXPRESSION
/// AS_EXPR + EXPRESSION
/// AS_EXPR - EXPRESSION
/// AS_EXPR << EXPRESSION
/// AS_EXPR >> EXPRESSION
/// AS_EXPR < EXPRESSION
/// AS_EXPR > EXPRESSION
/// AS_EXPR <= EXPRESSION
/// AS_EXPR >= EXPRESSION
/// AS_EXPR == EXPRESSION
/// AS_EXPR != EXPRESSION
/// AS_EXPR & EXPRESSION
/// AS_EXPR ^ EXPRESSION
/// AS_EXPR | EXPRESSION
/// AS_EXPR && EXPRESSION
/// AS_EXPR || EXPRESSION
pub fn parse_binary_expr(
&mut self,
tokens: &mut TokenIterator,
precedence: u32,
) -> Result<Node> {
let mut node = self.parse_as_expr(tokens)?;
loop {
let Some(tok) = tokens.peek_token() else {
break;
};
let Some(prec) = PRECEDENCE_MAP.get(&tok.token()).cloned() else {
break;
};
if prec < precedence {
break;
}
let tok = tokens.next().unwrap();
let lhs = node;
let rhs = self.parse_binary_expr(tokens, prec + 1)?;
let tag = match tok.token() {
Token::PipePipe => Tag::Or { lhs, rhs },
Token::AmpersandAmpersand => Tag::And { lhs, rhs },
Token::Pipe => Tag::BitOr { lhs, rhs },
Token::Caret => Tag::BitXOr { lhs, rhs },
Token::Ampersand => Tag::BitAnd { lhs, rhs },
Token::BangEqual => Tag::NEq { lhs, rhs },
Token::EqualEqual => Tag::Eq { lhs, rhs },
Token::LessEqual => Tag::Le { lhs, rhs },
Token::GreaterEqual => Tag::Ge { lhs, rhs },
Token::Less => Tag::Lt { lhs, rhs },
Token::Greater => Tag::Gt { lhs, rhs },
Token::GreaterGreater => Tag::Shr { lhs, rhs },
Token::LessLess => Tag::Shl { lhs, rhs },
Token::Plus => Tag::Add { lhs, rhs },
Token::Minus => Tag::Sub { lhs, rhs },
Token::Percent => Tag::Rem { lhs, rhs },
Token::Star => Tag::Mul { lhs, rhs },
Token::Slash => Tag::Div { lhs, rhs },
_ => unreachable!(),
};
node = self.nodes.push_tag(tag);
}
Ok(node)
}
/// PREFIX_EXPR <-
/// POSTFIX_EXPR
/// ! POSTFIX_EXPR
/// - POSTFIX_EXPR
/// & POSTFIX_EXPR
/// * POSTFIX_EXPR
pub fn parse_prefix_expr(&mut self, tokens: &mut TokenIterator) -> Result<Node> {
match tokens.peek_token_or_err()?.token() {
Token::Bang => {
_ = tokens.next();
let lhs = self.parse_postfix_expr(tokens)?;
Ok(self.nodes.push_tag(Tag::Not { lhs }))
}
Token::Minus => {
_ = tokens.next();
let lhs = self.parse_postfix_expr(tokens)?;
Ok(self.nodes.push_tag(Tag::Negate { lhs }))
}
Token::Ampersand => {
_ = tokens.next();
let lhs = self.parse_postfix_expr(tokens)?;
Ok(self.nodes.push_tag(Tag::Ref { lhs }))
}
Token::Star => {
_ = tokens.next();
let lhs = self.parse_postfix_expr(tokens)?;
Ok(self.nodes.push_tag(Tag::Deref { lhs }))
}
_ => self.parse_postfix_expr(tokens),
}
}
/// AS_EXPR <-
/// PREFIX_EXPR
/// PREFIX_EXPR as TYPENAME
pub fn parse_as_expr(&mut self, tokens: &mut TokenIterator) -> Result<Node> {
let expr = self.parse_prefix_expr(tokens)?;
if tokens.eat_token(Token::As).is_some() {
let typename = self.parse_typename(tokens)?;
Ok(self.nodes.push_tag(Tag::ExplicitCast {
lhs: expr,
typename,
}))
} else {
Ok(expr)
}
}
/// ARGUMENT <-
/// IDENT : EXPR
/// EXPR
pub fn parse_argument(&mut self, tokens: &mut TokenIterator) -> Result<Node> {
if tokens.is_next_token2(Token::Colon) {
let name = self.parse_ident(tokens)?;
_ = tokens.expect_token(Token::Colon)?;
let expr = self.parse_expr(tokens)?;
Ok(self.nodes.push_tag(Tag::Argument {
name: Some(name),
expr,
}))
} else {
let expr = self.parse_expr(tokens)?;
Ok(self.nodes.push_tag(Tag::Argument { name: None, expr }))
}
}
/// ARGUMENT_LIST <-
/// ARGUMENT
/// ARGUMENT_LIST , ARGUMENT
pub fn parse_argument_list(&mut self, tokens: &mut TokenIterator) -> Result<Node> {
let mut arguments = Vec::new();
loop {
// PARAMETER
arguments.push(self.parse_argument(tokens)?);
// COMMA
if !tokens.is_next_token(Token::Comma) {
break;
}
if tokens.is_next_token2(Token::CloseParens) {
break;
}
// skip comma
_ = tokens.next();
}
Ok(self.nodes.push_tag(Tag::ArgumentList { arguments }))
}
/// POSTFIX_EXPR <-
/// PRIMARY_EXPR
/// PRIMARY_EXPR ( )
/// PRIMARY_EXPR ( ARGUMENT_LIST )
pub fn parse_postfix_expr(&mut self, tokens: &mut TokenIterator) -> Result<Node> {
let lhs = self.parse_primary_expr(tokens)?;
if tokens.eat_token(Token::OpenParens).is_some() {
let rhs = if !tokens.is_next_token(Token::CloseParens) {
let arguments = self.parse_argument_list(tokens)?;
_ = tokens.eat_token(Token::Comma);
Some(arguments)
} else {
None
};
_ = tokens.expect_token(Token::CloseParens)?;
Ok(self.nodes.push_tag(Tag::CallExpr { lhs, rhs }))
} else {
Ok(lhs)
}
}
/// PRIMARY_EXPR <-
/// IDENTIFIER
/// INTEGER_CONSTANT
/// FLOATING_CONSTANT
/// ( EXPRESSION )
/// { STATEMENT* EXPRESSION? }
pub fn parse_primary_expr(&mut self, tokens: &mut TokenIterator) -> Result<Node> {
let token = tokens.peek_token_or_err()?;
match token.token() {
Token::Ident => {
let ident = tokens.expect_token(Token::Ident)?;
let name = ident.lexeme();
if let Some(record) = self.st.find_ordered_symbol(name) {
Ok(self.nodes.push_tag(Tag::DeclRef(record.node())))
} else if let Some(record) = self.st.find_orderless_symbol(name) {
Ok(self.nodes.push_tag(Tag::GlobalRef(record.node())))
} else {
let node = self
.st
.insert_orderless_symbol(name, self.nodes.reserve_node())
.node();
Ok(self.nodes.push_tag(Tag::GlobalRef(node)))
}
}
Token::IntegerBinConstant
| Token::IntegerHexConstant
| Token::IntegerOctConstant
| Token::IntegerConstant => {
_ = tokens.next();
let (bits, ty) = Self::parse_integral_constant(token.token(), token.lexeme());
let (_, bytes) = bits.to_bytes_le();
const BUF_SIZE: usize = core::mem::size_of::<u64>();
let mut buf = [0u8; BUF_SIZE];
buf[..bytes.len().min(BUF_SIZE)]
.copy_from_slice(&bytes[..bytes.len().min(BUF_SIZE)]);
let bytes = match bytes.len() {
0..2 => {
let (buf, _) = buf.split_at(core::mem::size_of::<u32>());
self.intern_pool.get_unsigned_integer(
u32::from_le_bytes(buf.try_into().unwrap()) as u64,
)
}
0..4 => {
let (buf, _) = buf.split_at(core::mem::size_of::<u64>());
self.intern_pool
.get_unsigned_integer(u64::from_le_bytes(buf.try_into().unwrap()))
}
0.. => self.intern_pool.insert_bytes(&bytes),
};
let ty = match ty {
Some(int) => Type::Integer(int),
None => Type::ComptimeNumber,
};
Ok(self.nodes.push_tag(Tag::Constant { bytes, ty }))
}
Token::FloatingConstant
| Token::FloatingExpConstant
| Token::DotFloatingConstant
| Token::DotFloatingExpConstant => {
_ = tokens.next();
let (bits, ty) = Self::parse_floating_constant(token.token(), token.lexeme());
let bytes = match ty {
FloatingType::Binary32 => self.intern_pool.get_or_insert(intern::Key::F32 {
bits: f32::from_bits(bits as u32),
}),
FloatingType::Binary64 => self.intern_pool.get_or_insert(intern::Key::F64 {
bits: f64::from_bits(bits),
}),
};
Ok(self.nodes.push_tag(Tag::Constant {
bytes,
ty: Type::Floating(ty),
}))
}
Token::OpenParens => {
_ = tokens.next();
let node = self.parse_expr(tokens)?;
tokens.expect_token(Token::CloseParens)?;
Ok(node)
}
Token::OpenBrace => {
let node = self.parse_block(tokens, None)?;
Ok(node)
}
_ => unreachable!(),
}
}
/// EXPR_OR_STATEMENT_OR_BLOCK <-
/// BLOCK
/// EXPR
/// EXPR ;
pub fn parse_expr_or_stmt_or_block(&mut self, tokens: &mut TokenIterator) -> Result<Node> {
let peek = tokens.peek_token_or_err()?;
let body = match peek.token() {
// block
Token::OpenBrace => {
let block = self.nodes.reserve_node();
self.parse_block(tokens, Some(block))?
}
// expr
_ => {
let expr = self.parse_expr(tokens)?;
let block = if tokens.eat_token(Token::Semi).is_some() {
Tag::Block {
statements: vec![expr],
trailing_expr: None,
}
} else {
Tag::Block {
statements: vec![],
trailing_expr: Some(expr),
}
};
self.nodes.push_tag(block)
}
};
Ok(body)
}
/// ELSE_EXPR <-
/// 'else' (IF_EXPR | EXPR_OR_STATEMENT_OR_BLOCK)
pub fn try_parse_else_expr(&mut self, tokens: &mut TokenIterator) -> Result<Option<Node>> {
if tokens.eat_token(Token::Else).is_none() {
return Ok(None);
}
let expr = if let Some(if_expr) = self.try_parse_if_expr(tokens)? {
if_expr
} else {
self.parse_expr_or_stmt_or_block(tokens)?
};
Ok(Some(expr))
}
/// IF_EXPR <-
/// 'if' ( EXPR ) EXPR_OR_STATEMENT_OR_BLOCK ELSE_EXPR?
pub fn try_parse_if_expr(&mut self, tokens: &mut TokenIterator) -> Result<Option<Node>> {
if tokens.eat_token(Token::If).is_none() {
return Ok(None);
}
_ = tokens.expect_token(Token::OpenParens)?;
let condition = self.parse_expr(tokens)?;
_ = tokens.expect_token(Token::CloseParens)?;
let body = self.parse_expr_or_stmt_or_block(tokens)?;
if let Some(else_expr) = self.try_parse_else_expr(tokens)? {
Ok(Some(self.nodes.push_tag(Tag::IfElseExpr {
condition,
body,
else_expr,
})))
} else {
Ok(Some(self.nodes.push_tag(Tag::IfExpr { condition, body })))
}
}
/// IF_EXPR <-
/// 'if' ( EXPR ) EXPR_OR_STATEMENT_OR_BLOCK ELSE_EXPR?
pub fn parse_if_expr(&mut self, tokens: &mut TokenIterator) -> Result<Node> {
self.try_parse_if_expr(tokens)?
.ok_or(Error::ExpectedTokenNotFound(Token::If))
}
/// EXPRESSION <-
/// IF_EXPR
/// | ASSIGNMENT_EXPR
pub fn parse_expr(&mut self, tokens: &mut TokenIterator) -> Result<Node> {
match tokens.peek_token_or_err()?.token() {
Token::If => self.parse_if_expr(tokens),
_ => self.parse_assignment_expr(tokens),
}
}
/// PROGRAM <-
/// (FUNCTION_DECL | GLOBAL_DECL)*
pub fn parse_program(&mut self, tokens: &mut TokenIterator) -> Result<()> {
while tokens.peek_token().is_some() {
let Some(token) = tokens.peek_token().map(|itm| itm.token()) else {
break;
};
let decl = match token {
Token::Const => self.parse_global_decl(tokens)?,
Token::Fn => self.parse_fn_decl(tokens)?,
_ => {
eprintln!("unexpected token: {}", token);
panic!("unexpected token at global scope");
}
};
self.global_decls.push(decl);
}
Ok(())
}
pub fn parse(&mut self, mut tokens: TokenIterator) -> Result<()> {
self.parse_program(&mut tokens)
}
pub fn get_ident_str(&self, node: Node) -> Option<&str> {
match &self.nodes[node] {
Tag::Ident { name } => Some(self.intern_pool.get_str(*name)),
_ => None,
}
}
fn get_typename_str(&self, node: Node) -> Option<String> {
match self.nodes.get_node(node) {
Tag::IntegralType(i) => Some(i.to_string()),
Tag::Ident { name } => Some(self.intern_pool.get_str(*name).to_owned()),
Tag::Pointer { pointee } => self.get_typename_str(*pointee),
Tag::PrimitiveType(prim) => Some(prim.to_string()),
_ => None,
}
}
}
impl Tree {
pub fn get_node_children(&self, node: Node) -> Vec<Node> {
match self.nodes.get_node(node) {
Tag::FunctionProto {
name,
parameters,
return_type,
} => {
if let Some(params) = parameters {
vec![*name, *params, *return_type]
} else {
vec![*name, *return_type]
}
}
Tag::ParameterList { parameters } => parameters.clone(),
Tag::Parameter { name, ty } => {
vec![*name, *ty]
}
Tag::Pointer { pointee } => {
vec![*pointee]
}
Tag::FunctionDecl { proto, body } => {
vec![*proto, *body]
}
Tag::Block {
statements,
trailing_expr,
} => {
let mut children = statements.clone();
if let Some(expr) = trailing_expr {
children.push(*expr);
}
children
}
Tag::ReturnStmt { expr } => expr.into_iter().cloned().collect::<Vec<_>>(),
&Tag::ExprStmt { expr } => {
vec![expr]
}
Tag::VarDecl {
name,
explicit_type,
assignment,
..
} => match (*explicit_type, *assignment) {
(None, Some(b)) => vec![*name, b],
(Some(a), None) => vec![*name, a],
(Some(a), Some(b)) => vec![*name, a, b],
_ => unreachable!(),
},
Tag::GlobalDecl {
name,
explicit_type,
assignment,
..
} => {
if let Some(ty) = *explicit_type {
vec![*name, ty, *assignment]
} else {
vec![*name, *assignment]
}
}
&Tag::CallExpr { lhs, rhs } => {
if let Some(rhs) = rhs {
vec![lhs, rhs]
} else {
vec![lhs]
}
}
Tag::ArgumentList {
arguments: parameters,
} => parameters.clone(),
&Tag::Argument { name, expr } => {
if let Some(name) = name {
vec![name, expr]
} else {
vec![expr]
}
}
&Tag::ExplicitCast { lhs, typename } => {
vec![lhs, typename]
}
Tag::Deref { lhs } | Tag::Ref { lhs } | Tag::Not { lhs } | Tag::Negate { lhs } => {
vec![*lhs]
}
Tag::Or { lhs, rhs }
| Tag::And { lhs, rhs }
| Tag::BitOr { lhs, rhs }
| Tag::BitAnd { lhs, rhs }
| Tag::BitXOr { lhs, rhs }
| Tag::Eq { lhs, rhs }
| Tag::NEq { lhs, rhs }
| Tag::Lt { lhs, rhs }
| Tag::Gt { lhs, rhs }
| Tag::Le { lhs, rhs }
| Tag::Ge { lhs, rhs }
| Tag::Shl { lhs, rhs }
| Tag::Shr { lhs, rhs }
| Tag::Add { lhs, rhs }
| Tag::Sub { lhs, rhs }
| Tag::Mul { lhs, rhs }
| Tag::Rem { lhs, rhs }
| Tag::Div { lhs, rhs }
| Tag::Assign { lhs, rhs } => {
vec![*lhs, *rhs]
}
_ => vec![],
}
}
}
impl Tree {
fn render_node<W: core::fmt::Write>(
&mut self,
writer: &mut W,
node: Node,
indent: u32,
) -> core::fmt::Result {
match self.nodes[node].clone() {
Tag::FunctionProto {
name,
parameters,
return_type,
} => {
self.render_node(writer, name, indent)?;
self.render_node(writer, return_type, indent)?;
if let Some(parameters) = parameters {
self.render_node(writer, parameters, indent)?;
}
write_indented!(indent, writer, "%{} = function_proto: {{", node.get())?;
write!(writer, "name: \"{}\"", self.get_ident_str(name).unwrap())?;
if let Some(parameters) = parameters {
write!(writer, ", parameters: %{}", parameters.get())?;
}
write!(writer, ", return_type: %{}", return_type.get())?;
writeln!(writer, "}}")
}
Tag::ParameterList { parameters } => {
writeln_indented!(indent, writer, "%{} = ParameterList [", node.get())?;
for param in parameters {
self.render_node(writer, param, indent + 1)?;
}
writeln_indented!(indent, writer, "]")
}
Tag::Parameter { name, ty } => {
writeln_indented!(
indent,
writer,
"%{} = {}: {},",
node.get(),
self.get_ident_str(name).unwrap(),
self.get_typename_str(ty).unwrap()
)
}
Tag::Pointer { .. } | Tag::IntegralType(_) | Tag::PrimitiveType(_) => {
writeln_indented!(
indent,
writer,
"%{} = type({})",
node.get(),
self.get_typename_str(node).unwrap()
)
}
Tag::PointerQualifier { .. } => todo!(),
Tag::FunctionDecl { proto, body } => {
self.render_node(writer, proto, indent)?;
writeln_indented!(
indent,
writer,
"%{} = function_decl( proto: %{}, body: %{}) {{",
node.get(),
proto.get(),
body.get()
)?;
self.render_node(writer, body, indent + 1)?;
writeln_indented!(indent, writer, "}}")
}
Tag::Ident { name } => {
writeln_indented!(
indent,
writer,
"%{} = identifier(\"{}\")",
node.get(),
self.intern_pool.get_str(name)
)
}
Tag::Block {
statements,
trailing_expr,
} => {
writeln_indented!(indent, writer, "%{} = {{", node.get())?;
self.st.into_child(node);
for stmt in statements {
self.render_node(writer, stmt, indent + 1)?;
}
if let Some(expr) = trailing_expr {
self.render_node(writer, expr, indent + 1)?;
writeln_indented!(
indent + 1,
writer,
"break %{} %{};",
node.get(),
expr.get()
)?;
}
self.st.into_parent();
writeln_indented!(indent, writer, "}}")
}
Tag::ReturnStmt { expr } => {
if let Some(expr) = expr {
self.render_node(writer, expr, indent)?;
writeln_indented!(indent, writer, "%{} = return %{};", node.get(), expr.get())
} else {
writeln_indented!(indent, writer, "%{} = return;", node.get())
}
}
Tag::ExprStmt { expr } => self.render_node(writer, expr, indent),
Tag::VarDecl {
let_or_var,
name,
explicit_type,
assignment,
} => {
self.render_node(writer, name, indent)?;
explicit_type.map(|node| self.render_node(writer, node, indent));
assignment.map(|node| self.render_node(writer, node, indent));
write_indented!(
indent,
writer,
"%{} = decl_{}(name: \"{}\"",
node.get(),
match let_or_var {
LetOrVar::Let => {
"const"
}
LetOrVar::Var => {
"mut"
}
},
self.get_ident_str(name).unwrap()
)?;
if let Some(ty) = explicit_type {
write!(writer, ", ty: {}", self.get_typename_str(ty).unwrap())?;
}
if let Some(assignment) = assignment {
write!(writer, ", value: %{assignment}")?;
}
writeln!(writer, ");")?;
Ok(())
}
Tag::GlobalDecl {
name,
explicit_type,
assignment,
} => {
self.render_node(writer, assignment, indent)?;
self.render_node(writer, name, indent)?;
explicit_type.map(|ty| self.render_node(writer, ty, indent));
write_indented!(
indent,
writer,
"%{} = global_decl(name: \"{}\"",
node.get(),
self.get_ident_str(name).unwrap()
)?;
if let Some(ty) = explicit_type {
write!(writer, ", ty: {}", self.get_typename_str(ty).unwrap())?;
}
write!(writer, ", value: %{assignment}")?;
writeln!(writer, ");")?;
Ok(())
}
Tag::CallExpr { lhs, rhs } => {
self.render_node(writer, lhs, indent)?;
if let Some(rhs) = rhs {
self.render_node(writer, rhs, indent)?;
writeln_indented!(indent, writer, "%{node} = call (%{lhs})(%{rhs})")
} else {
writeln_indented!(indent, writer, "%{node} = call (%{lhs})()")
}
}
Tag::ArgumentList { arguments } => {
writeln_indented!(indent, writer, "%{} = ArgumentList [", node.get())?;
for args in arguments {
self.render_node(writer, args, indent + 1)?;
}
writeln_indented!(indent, writer, "]")
}
Tag::Argument { name, expr } => {
if let Some(name) = name {
writeln_indented!(
indent,
writer,
"%{} = {}: %{expr},",
node.get(),
self.get_ident_str(name).unwrap(),
)
} else {
writeln_indented!(indent, writer, "%{} = %{expr},", node.get(),)
}
}
Tag::ExplicitCast { lhs, typename } => {
self.render_node(writer, lhs, indent)?;
writeln_indented!(
indent,
writer,
"%{} = cast<{}>(%{})",
node.get(),
self.get_typename_str(typename).unwrap(),
lhs.get()
)
}
Tag::Deref { lhs } => {
self.render_node(writer, lhs, indent)?;
writeln_indented!(indent, writer, "%{} = deref(%{})", node.get(), lhs.get())
}
Tag::Ref { lhs } => {
self.render_node(writer, lhs, indent)?;
writeln_indented!(
indent,
writer,
"%{} = address_of(%{})",
node.get(),
lhs.get()
)
}
Tag::Not { lhs } => {
self.render_node(writer, lhs, indent)?;
writeln_indented!(indent, writer, "%{} = ", node.get(),)
}
Tag::Negate { lhs } => {
self.render_node(writer, lhs, indent)?;
writeln_indented!(indent, writer, "%{} = not(%{})", node.get(), lhs.get())
}
Tag::Or { lhs, rhs } => {
self.render_node(writer, lhs, indent)?;
self.render_node(writer, rhs, indent)?;
writeln_indented!(
indent,
writer,
"%{} = %{} || %{}",
node.get(),
lhs.get(),
rhs.get()
)
}
Tag::And { lhs, rhs } => {
self.render_node(writer, lhs, indent)?;
self.render_node(writer, rhs, indent)?;
writeln_indented!(
indent,
writer,
"%{} = %{} && %{}",
node.get(),
lhs.get(),
rhs.get()
)
}
Tag::BitOr { lhs, rhs } => {
self.render_node(writer, lhs, indent)?;
self.render_node(writer, rhs, indent)?;
writeln_indented!(
indent,
writer,
"%{} = %{} | %{}",
node.get(),
lhs.get(),
rhs.get()
)
}
Tag::BitAnd { lhs, rhs } => {
self.render_node(writer, lhs, indent)?;
self.render_node(writer, rhs, indent)?;
writeln_indented!(
indent,
writer,
"%{} = %{} & %{}",
node.get(),
lhs.get(),
rhs.get()
)
}
Tag::BitXOr { lhs, rhs } => {
self.render_node(writer, lhs, indent)?;
self.render_node(writer, rhs, indent)?;
writeln_indented!(
indent,
writer,
"%{} = %{} ^ %{}",
node.get(),
lhs.get(),
rhs.get()
)
}
Tag::Eq { lhs, rhs } => {
self.render_node(writer, lhs, indent)?;
self.render_node(writer, rhs, indent)?;
writeln_indented!(
indent,
writer,
"%{} = %{} == %{}",
node.get(),
lhs.get(),
rhs.get()
)
}
Tag::NEq { lhs, rhs } => {
self.render_node(writer, lhs, indent)?;
self.render_node(writer, rhs, indent)?;
writeln_indented!(
indent,
writer,
"%{} = %{} != %{}",
node.get(),
lhs.get(),
rhs.get()
)
}
Tag::Lt { lhs, rhs } => {
self.render_node(writer, lhs, indent)?;
self.render_node(writer, rhs, indent)?;
writeln_indented!(
indent,
writer,
"%{} = %{} < %{}",
node.get(),
lhs.get(),
rhs.get()
)
}
Tag::Gt { lhs, rhs } => {
self.render_node(writer, lhs, indent)?;
self.render_node(writer, rhs, indent)?;
writeln_indented!(
indent,
writer,
"%{} = %{} > %{}",
node.get(),
lhs.get(),
rhs.get()
)
}
Tag::Le { lhs, rhs } => {
self.render_node(writer, lhs, indent)?;
self.render_node(writer, rhs, indent)?;
writeln_indented!(
indent,
writer,
"%{} = %{} <= %{}",
node.get(),
lhs.get(),
rhs.get()
)
}
Tag::Ge { lhs, rhs } => {
self.render_node(writer, lhs, indent)?;
self.render_node(writer, rhs, indent)?;
writeln_indented!(
indent,
writer,
"%{} = %{} >= %{}",
node.get(),
lhs.get(),
rhs.get()
)
}
Tag::Shl { lhs, rhs } => {
self.render_node(writer, lhs, indent)?;
self.render_node(writer, rhs, indent)?;
writeln_indented!(
indent,
writer,
"%{} = %{} << %{}",
node.get(),
lhs.get(),
rhs.get()
)
}
Tag::Shr { lhs, rhs } => {
self.render_node(writer, lhs, indent)?;
self.render_node(writer, rhs, indent)?;
writeln_indented!(
indent,
writer,
"%{} = %{} >> %{}",
node.get(),
lhs.get(),
rhs.get()
)
}
Tag::Add { lhs, rhs } => {
self.render_node(writer, lhs, indent)?;
self.render_node(writer, rhs, indent)?;
writeln_indented!(
indent,
writer,
"%{} = %{} + %{}",
node.get(),
lhs.get(),
rhs.get()
)
}
Tag::Sub { lhs, rhs } => {
self.render_node(writer, lhs, indent)?;
self.render_node(writer, rhs, indent)?;
writeln_indented!(
indent,
writer,
"%{} = %{} - %{}",
node.get(),
lhs.get(),
rhs.get()
)
}
Tag::Mul { lhs, rhs } => {
self.render_node(writer, lhs, indent)?;
self.render_node(writer, rhs, indent)?;
writeln_indented!(
indent,
writer,
"%{} = %{} * %{}",
node.get(),
lhs.get(),
rhs.get()
)
}
Tag::Div { lhs, rhs } => {
self.render_node(writer, lhs, indent)?;
self.render_node(writer, rhs, indent)?;
writeln_indented!(
indent,
writer,
"%{} = %{} / %{}",
node.get(),
lhs.get(),
rhs.get()
)
}
Tag::Rem { lhs, rhs } => {
self.render_node(writer, lhs, indent)?;
self.render_node(writer, rhs, indent)?;
writeln_indented!(
indent,
writer,
"%{} = %{} % %{}",
node.get(),
lhs.get(),
rhs.get()
)
}
Tag::Assign { lhs, rhs } => {
self.render_node(writer, lhs, indent)?;
self.render_node(writer, rhs, indent)?;
writeln_indented!(
indent,
writer,
"%{} = store (dst: %{}, val: %{})",
node.get(),
lhs.get(),
rhs.get()
)
}
Tag::DeclRef(decl) => {
writeln_indented!(
indent,
writer,
"%{} = decl_ref(%{}, name: {})",
node.get(),
decl.get(),
self.st
.find_symbol_by_decl(decl)
.map(|a| a.name())
.unwrap_or(&format!(
"SymbolTable entry not found?, %{}, %{}",
node.get(),
decl.get()
))
)
}
Tag::GlobalRef(decl) => {
writeln_indented!(
indent,
writer,
"%{} = global_ref(%{}, name: {})",
node.get(),
decl.get(),
self.st
.symbol_path(decl)
.map(|p| p.mangle(self))
.unwrap_or(format!(
"SymbolTable entry not found?, %{}, %{}",
node.get(),
decl.get()
))
)
}
Tag::Constant { bytes, ty } => {
writeln_indented!(
indent,
writer,
"%{} = constant{{ ty: {}, bytes: {:?}}}",
node.get(),
ty,
self.intern_pool.get_key(bytes)
)
}
Tag::ExprStatement { expr } => {
self.render_node(writer, expr, indent)?;
writeln_indented!(indent, writer, "%{node} = expr %{expr}",)
}
Tag::IfExpr { condition, body } => {
self.render_node(writer, condition, indent)?;
writeln_indented!(
indent,
writer,
"%{node} = if (condition: %{condition}, body: %{body}) {{",
)?;
self.render_node(writer, body, indent + 1)?;
writeln_indented!(indent, writer, "}})",)
}
Tag::IfElseExpr {
condition,
body,
else_expr,
} => {
self.render_node(writer, condition, indent)?;
writeln_indented!(
indent,
writer,
"%{node} = if (condition: %{condition}, body: %{body}) {{",
)?;
self.render_node(writer, body, indent + 1)?;
writeln_indented!(indent, writer, "}}, else: %{else_expr} {{",)?;
self.render_node(writer, else_expr, indent + 1)?;
writeln_indented!(indent, writer, "}})",)
}
_ => unreachable!(),
}
}
pub fn render<W: core::fmt::Write>(&mut self, writer: &mut W) -> core::fmt::Result {
for decl in &self.global_decls.clone() {
self.render_node(writer, *decl, 0)?;
}
Ok(())
}
pub fn peer_type_of_nodes_unwrap(&self, lhs: Node, rhs: Node) -> Type {
self.peer_type_of_nodes(lhs, rhs).expect(&{
let at = self.type_of_node(lhs);
let bt = self.type_of_node(rhs);
format!("incompatible types for %{lhs}({at}) and %{rhs}({bt})")
})
}
pub fn peer_type_of_nodes(&self, lhs: Node, rhs: Node) -> Option<Type> {
let lty = self.type_of_node(lhs);
let rty = self.type_of_node(rhs);
let peer = lty.equal_type(&rty)?;
if lty == Type::ComptimeNumber {
let value = self.value_of_comptime_node(lhs)?;
if value.bit_count() > rty.bit_width() {
panic!("comptime number is incompatible with type {rty}!");
}
}
if rty == Type::ComptimeNumber {
let value = self.value_of_comptime_node(rhs)?;
if value.bit_count() > lty.bit_width() {
panic!("comptime number is incompatible with type {lty}!");
}
}
Some(peer)
}
pub fn type_of_node(&self, node: Node) -> crate::ast::Type {
match self.nodes.get_node(node) {
Tag::FunctionDecl { proto, .. } => self.type_of_node(*proto),
Tag::FunctionProto {
parameters,
return_type,
..
} => {
let return_type = self.type_of_node(*return_type);
let parameter_types = parameters
.map(|p| match self.nodes.get_node(p) {
Tag::ParameterList { parameters } => parameters
.iter()
.map(|p| self.type_of_node(*p))
.collect::<Vec<_>>(),
_ => panic!("parameters is not a parameterlist!"),
})
.unwrap_or(Vec::new());
crate::ast::Type::Fn {
parameter_types,
return_type: Box::new(return_type),
}
}
Tag::Parameter { ty, .. } => self.type_of_node(*ty),
Tag::Pointer { pointee } => Type::Pointer {
constness: false,
pointee: Box::new(self.type_of_node(*pointee)),
},
Tag::Constant { ty, .. } => ty.clone(),
Tag::IntegralType(t) => self.intern_pool.as_ast1_type(AMD64_POINTER_BITS, *t),
Tag::PrimitiveType(t) => match *t {
intern::Index::F32 => Type::Floating(FloatingType::Binary32),
intern::Index::F64 => Type::Floating(FloatingType::Binary64),
intern::Index::BOOL => Type::Bool,
intern::Index::VOID => Type::Void,
_ => self.intern_pool.as_ast1_type(AMD64_POINTER_BITS, *t),
},
Tag::Block { trailing_expr, .. } => trailing_expr
.map(|n| self.type_of_node(n))
.unwrap_or(Type::void()),
Tag::GlobalDecl {
explicit_type,
assignment, // this is a Tag::Assign
..
} => {
let ty = match (explicit_type.as_ref(), assignment) {
(None, b) => self.type_of_node(*b),
(Some(a), b) => self.peer_type_of_nodes(*a, *b).expect(&{
let at = self.type_of_node(*a);
let bt = self.type_of_node(*b);
format!("incompatible types for %{a}({at}) and %{b}({bt})")
}),
};
ty
}
Tag::VarDecl {
explicit_type,
assignment, // this is NOT a Tag::Assign
..
} => {
let rhs = *assignment;
let ty = match (explicit_type.as_ref(), rhs.as_ref()) {
(None, None) => panic!("%{node}: no type specified?"),
(None, Some(b)) => self.type_of_node(*b),
(Some(a), None) => self.type_of_node(*a),
(Some(a), Some(b)) => self.peer_type_of_nodes(*a, *b).expect(&{
let at = self.type_of_node(*a);
let bt = self.type_of_node(*b);
format!("incompatible types for %{a}({at}) and %{b}({bt})")
}),
};
ty
}
Tag::CallExpr { lhs, .. } => self.type_of_node(*lhs).return_type().unwrap().clone(),
Tag::ExplicitCast { typename, .. } => self.type_of_node(*typename),
Tag::Deref { lhs } => self.type_of_node(*lhs).remove_ptr().unwrap(),
Tag::Ref { lhs } => self.type_of_node(*lhs).into_ptr(),
Tag::Not { lhs } => self.type_of_node(*lhs),
Tag::Negate { lhs } => self.type_of_node(*lhs),
Tag::Assign { lhs, rhs }
| Tag::Add { lhs, rhs }
| Tag::Sub { lhs, rhs }
| Tag::Mul { lhs, rhs }
| Tag::Rem { lhs, rhs }
| Tag::Div { lhs, rhs }
| Tag::Or { lhs, rhs }
| Tag::And { lhs, rhs }
| Tag::BitOr { lhs, rhs }
| Tag::BitAnd { lhs, rhs }
| Tag::BitXOr { lhs, rhs } => self.peer_type_of_nodes(*lhs, *rhs).expect(&{
let at = self.type_of_node(*lhs);
let bt = self.type_of_node(*rhs);
format!("incompatible types for %{lhs}({at}) and %{rhs}({bt})")
}),
Tag::Shl { lhs, .. } => self.type_of_node(*lhs),
Tag::Shr { lhs, .. } => self.type_of_node(*lhs),
Tag::Eq { .. } => Type::bool(),
Tag::NEq { .. } => Type::bool(),
Tag::Lt { .. } => Type::bool(),
Tag::Gt { .. } => Type::bool(),
Tag::Le { .. } => Type::bool(),
Tag::Ge { .. } => Type::bool(),
Tag::DeclRef(decl) => self.type_of_node(*decl),
Tag::GlobalRef(decl) => self.type_of_node(*decl),
Tag::IfExpr { .. } => Type::void(),
Tag::IfElseExpr {
body, else_expr, ..
} => self.peer_type_of_nodes(*body, *else_expr).expect(&{
let (lhs, rhs) = (body, else_expr);
let at = self.type_of_node(*lhs);
let bt = self.type_of_node(*rhs);
format!("incompatible types for %{lhs}({at}) and %{rhs}({bt})")
}),
_ => Type::void(),
}
}
}
// simplify tree with compile-time math
impl Tree {
pub fn is_node_comptime(&self, node: Node, check_declrefs: bool) -> bool {
match self.nodes.get_node(node) {
Tag::Block {
statements,
trailing_expr,
} => statements
.iter()
.chain(trailing_expr.into_iter())
.all(|n| self.is_node_comptime(*n, true)),
Tag::Constant { .. } => true,
Tag::ExplicitCast { lhs, typename } => {
self.is_node_comptime(*lhs, true)
&& match self.type_of_node(*typename) {
Type::Bool
| Type::ComptimeNumber
| Type::Integer(_)
| Type::Floating(_) => true,
_ => false,
}
}
&Tag::DeclRef(lhs) if check_declrefs => {
let start = lhs;
let end = node;
let mut is_comptime = true;
ast::tree_visitor::Visitor::new_seek(
self,start,
|_: &Tree, _| {
},
|tree: &Tree, node| match tree.nodes.get_node(node) {
&Tag::Assign { lhs, rhs } => {
if lhs == start || matches!(tree.nodes.get_node(lhs), &Tag::DeclRef(decl) if decl == start) {
is_comptime &= self.is_node_comptime(rhs, true);
}
}
&Tag::Ref { lhs } if lhs == start => {
// recursively checking for derefs would get very complicated.
is_comptime = false;
}
_ => {}
},
)
.until_after(end)
.visit(self);
is_comptime
}
Tag::Not { lhs } | Tag::Negate { lhs } => self.is_node_comptime(*lhs, true),
Tag::Or { lhs, rhs }
| Tag::And { lhs, rhs }
| Tag::BitOr { lhs, rhs }
| Tag::BitAnd { lhs, rhs }
| Tag::BitXOr { lhs, rhs }
| Tag::Eq { lhs, rhs }
| Tag::NEq { lhs, rhs }
| Tag::Lt { lhs, rhs }
| Tag::Gt { lhs, rhs }
| Tag::Le { lhs, rhs }
| Tag::Ge { lhs, rhs }
| Tag::Shl { lhs, rhs }
| Tag::Shr { lhs, rhs }
| Tag::Add { lhs, rhs }
| Tag::Sub { lhs, rhs }
| Tag::Mul { lhs, rhs }
| Tag::Rem { lhs, rhs }
| Tag::Div { lhs, rhs } => {
self.is_node_comptime(*lhs, true) && self.is_node_comptime(*rhs, true)
}
_ => false,
}
}
pub fn value_of_comptime_node(&self, node: Node) -> Option<ComptimeNumber> {
match self.nodes.get_node(node) {
Tag::Constant { bytes, ty } => {
let ty = self.intern_pool.from_ast1_type(AMD64_POINTER_BITS, ty);
let number = crate::ast2::interned_type_and_value_to_comptime_number(
&self.intern_pool,
AMD64_POINTER_BITS,
ty,
*bytes,
);
Some(number)
}
Tag::Block { .. } => todo!(),
&Tag::DeclRef(lhs) => {
let start = lhs;
let end = node;
let mut last_value = None;
ast::tree_visitor::Visitor::new_seek(
self,start,
|_: &Tree, _| {
},
|tree: &Tree, node| match tree.nodes.get_node(node) {
&Tag::Assign { lhs, rhs } => {
if lhs == start || matches!(tree.nodes.get_node(lhs), &Tag::DeclRef(decl) if decl == start) {
last_value = Some(rhs);
}
}
_ => {}
},
)
.until_after(end)
.visit(self);
self.value_of_comptime_node(last_value?)
}
_ => None,
}
}
fn fold_comptime_with_visitor(&mut self, decl: Node) {
ast::tree_visitor::Visitor::new(
decl,
|_: &mut Tree, _| {},
|tree: &mut Tree, node| {
if let Ok(value) = tree.fold_comptime_inner(node, false) {
let (value, ty) = crate::ast2::comptime_number_to_interned_type_and_value(
&mut tree.intern_pool,
AMD64_POINTER_BITS,
value,
);
let ty = tree.intern_pool.as_ast1_type(AMD64_POINTER_BITS, ty);
*tree.nodes.get_node_mut(node) = Tag::Constant { bytes: value, ty };
}
},
)
.visit_mut(self);
}
fn fold_comptime_inner(
&mut self,
decl: Node,
check_declrefs: bool,
) -> comptime::Result<ComptimeNumber> {
if self.is_node_comptime(decl, check_declrefs) {
match self.nodes.get_node(decl) {
Tag::Constant { bytes, ty } => {
let ty = self.intern_pool.from_ast1_type(AMD64_POINTER_BITS, ty);
let number = crate::ast2::interned_type_and_value_to_comptime_number(
&self.intern_pool,
AMD64_POINTER_BITS,
ty,
*bytes,
);
return Ok(number);
}
Tag::Negate { lhs } => {
let lhs = self.fold_comptime_inner(*lhs, true)?;
return Ok(lhs.neg()?);
}
Tag::ExplicitCast { lhs, typename } => {
let ty = self.type_of_node(*typename);
let lhs = self.fold_comptime_inner(*lhs, true)?;
return match ty {
Type::Bool => lhs.into_bool(),
Type::Integer(ty) => lhs.into_int(ty),
Type::Floating(ty) => lhs.into_float(ty),
_ => unimplemented!(),
};
}
Tag::Not { lhs } => {
let lhs = self.fold_comptime_inner(*lhs, true)?;
return lhs.not();
}
Tag::Or { lhs, rhs } => {
let (lhs, rhs) = (*lhs, *rhs);
let lhs = self.fold_comptime_inner(lhs, true)?;
let rhs = self.fold_comptime_inner(rhs, true)?;
return lhs.or(rhs);
}
Tag::And { lhs, rhs } => {
let (lhs, rhs) = (*lhs, *rhs);
let lhs = self.fold_comptime_inner(lhs, true)?;
let rhs = self.fold_comptime_inner(rhs, true)?;
return lhs.and(rhs);
}
Tag::Eq { lhs, rhs } => {
let (lhs, rhs) = (*lhs, *rhs);
let lhs = self.fold_comptime_inner(lhs, true)?;
let rhs = self.fold_comptime_inner(rhs, true)?;
return lhs.eq(rhs);
}
Tag::NEq { lhs, rhs } => {
let (lhs, rhs) = (*lhs, *rhs);
let lhs = self.fold_comptime_inner(lhs, true)?;
let rhs = self.fold_comptime_inner(rhs, true)?;
return lhs.eq(rhs)?.not();
}
Tag::Lt { lhs, rhs } => {
let (lhs, rhs) = (*lhs, *rhs);
let lhs = self.fold_comptime_inner(lhs, true)?;
let rhs = self.fold_comptime_inner(rhs, true)?;
return lhs.lt(rhs);
}
Tag::Gt { lhs, rhs } => {
let (lhs, rhs) = (*lhs, *rhs);
let lhs = self.fold_comptime_inner(lhs, true)?;
let rhs = self.fold_comptime_inner(rhs, true)?;
return lhs.gt(rhs);
}
Tag::Le { lhs, rhs } => {
let (lhs, rhs) = (*lhs, *rhs);
let lhs = self.fold_comptime_inner(lhs, true)?;
let rhs = self.fold_comptime_inner(rhs, true)?;
return lhs.le(rhs);
}
Tag::Ge { lhs, rhs } => {
let (lhs, rhs) = (*lhs, *rhs);
let lhs = self.fold_comptime_inner(lhs, true)?;
let rhs = self.fold_comptime_inner(rhs, true)?;
return lhs.ge(rhs);
}
Tag::BitOr { lhs, rhs } => {
let (lhs, rhs) = (*lhs, *rhs);
let lhs = self.fold_comptime_inner(lhs, true)?;
let rhs = self.fold_comptime_inner(rhs, true)?;
return lhs.bitor(rhs);
}
Tag::BitAnd { lhs, rhs } => {
let (lhs, rhs) = (*lhs, *rhs);
let lhs = self.fold_comptime_inner(lhs, true)?;
let rhs = self.fold_comptime_inner(rhs, true)?;
return lhs.bitand(rhs);
}
Tag::BitXOr { lhs, rhs } => {
let (lhs, rhs) = (*lhs, *rhs);
let lhs = self.fold_comptime_inner(lhs, true)?;
let rhs = self.fold_comptime_inner(rhs, true)?;
return lhs.bitxor(rhs);
}
Tag::Shl { lhs, rhs } => {
let (lhs, rhs) = (*lhs, *rhs);
let lhs = self.fold_comptime_inner(lhs, true)?;
let rhs = self.fold_comptime_inner(rhs, true)?;
return lhs.shl(rhs);
}
Tag::Shr { lhs, rhs } => {
let (lhs, rhs) = (*lhs, *rhs);
let lhs = self.fold_comptime_inner(lhs, true)?;
let rhs = self.fold_comptime_inner(rhs, true)?;
return lhs.shr(rhs);
}
Tag::Add { lhs, rhs } => {
let (lhs, rhs) = (*lhs, *rhs);
let lhs = self.fold_comptime_inner(lhs, true)?;
let rhs = self.fold_comptime_inner(rhs, true)?;
return lhs.add(rhs);
}
Tag::Sub { lhs, rhs } => {
let (lhs, rhs) = (*lhs, *rhs);
let lhs = self.fold_comptime_inner(lhs, true)?;
let rhs = self.fold_comptime_inner(rhs, true)?;
return lhs.sub(rhs);
}
Tag::Mul { lhs, rhs } => {
let (lhs, rhs) = (*lhs, *rhs);
let lhs = self.fold_comptime_inner(lhs, true)?;
let rhs = self.fold_comptime_inner(rhs, true)?;
return lhs.mul(rhs);
}
Tag::Rem { lhs, rhs } => {
let (lhs, rhs) = (*lhs, *rhs);
let lhs = self.fold_comptime_inner(lhs, true)?;
let rhs = self.fold_comptime_inner(rhs, true)?;
return lhs.rem(rhs);
}
Tag::Div { lhs, rhs } => {
let (lhs, rhs) = (*lhs, *rhs);
let lhs = self.fold_comptime_inner(lhs, true)?;
let rhs = self.fold_comptime_inner(rhs, true)?;
return lhs.div(rhs);
}
&Tag::DeclRef(lhs) => {
let start = lhs;
let end = decl;
let mut last_value = None;
ast::tree_visitor::Visitor::new_seek(
self,start,
|_: &Tree, _| {
},
|tree: &Tree, node| match tree.nodes.get_node(node) {
&Tag::Assign { lhs, rhs } => {
if lhs == start || matches!(tree.nodes.get_node(lhs), &Tag::DeclRef(decl) if decl == start) {
last_value = Some(rhs);
}
}
_ => {}
},
)
.until_after(end)
.visit(self);
return self.fold_comptime_inner(
last_value.ok_or(comptime::Error::NotComptime)?,
true,
);
}
_ => {
unreachable!()
}
}
} else {
Err(comptime::Error::NotComptime)
}
}
pub fn intern_types(&mut self) {
for decl in self.global_decls.clone() {
ast::tree_visitor::Visitor::new(
decl,
|_: &mut Tree, _| {},
|tree: &mut Tree, node| {
let ty = tree.type_of_node(node);
tree.intern_pool.insert_ast1_type(AMD64_POINTER_BITS, &ty);
},
)
.visit_mut(self);
}
}
pub fn fold_comptime(&mut self) {
for decl in self.global_decls.clone() {
match self.nodes.get_node(decl) {
Tag::FunctionDecl { body, .. } => {
self.fold_comptime_with_visitor(*body);
}
Tag::GlobalDecl { assignment, .. } => {
self.fold_comptime_with_visitor(*assignment);
}
_ => {
eprintln!("reached %{decl}:");
unreachable!()
}
}
}
}
}
impl Tree {
/// type-checks and inserts appropriate explicit-cast nodes.
pub fn typecheck(&mut self) {
let mut errors = Vec::new();
for decl in self.global_decls.clone() {
self.typecheck_node(&mut errors, decl);
}
}
// TODO: inline types into the AST proper before tackling this.
// for now, comptime_number is not supported in IR gen, then.
fn typecheck_node(&mut self, errors: &mut Vec<AnalysisError>, node: Node) {
#[allow(unused_variables)]
match self.nodes[node].clone() {
Tag::FunctionProto { .. } => {}
Tag::FunctionDecl { proto, body } => {
let Tag::FunctionProto { return_type, .. } = self.nodes[proto] else {
unreachable!()
};
let body_t = self.type_of_node(body);
let ret_t = self.type_of_node(return_type);
if let Some(peer_t) = body_t.equal_type(&ret_t) {
if body_t == Type::comptime_number() {
let Tag::Block { trailing_expr, .. } = self.nodes[body] else {
unreachable!()
};
if let Some(expr) = trailing_expr {
let ty = self.nodes.push_tag(Tag::PrimitiveType(
self.intern_pool.from_ast1_type(AMD64_POINTER_BITS, &peer_t),
));
let expr = self.nodes.push_tag(Tag::ExplicitCast {
lhs: expr,
typename: ty,
});
let Tag::Block { trailing_expr, .. } = &mut self.nodes[body] else {
unreachable!()
};
*trailing_expr = Some(expr)
}
}
} else {
errors.push(AnalysisError::new(
AnalysisErrorTag::MismatchingTypesFunctionReturn,
));
}
}
Tag::Constant { bytes, ty } => {
let bits = match self.intern_pool.get_key(bytes) {
intern::Key::F32 { .. } => 32,
intern::Key::F64 { .. } => 64,
intern::Key::UInt64 { bits } => bits.bits(),
intern::Key::UIntSmall { bits } => bits.bits(),
intern::Key::Bytes { bytes } => bytes.bits(),
_ => {
unreachable!()
}
};
if bits < ty.bit_width() as u32 {
errors.push(AnalysisError::new(
AnalysisErrorTag::InsufficientBitsInTypeForConstant(bits, ty.clone()),
));
}
}
Tag::Block {
statements,
trailing_expr,
} => {
for statement in statements {
self.typecheck_node(errors, statement);
}
if let Some(expr) = trailing_expr {
self.typecheck_node(errors, expr);
}
}
Tag::ReturnStmt { expr } => {
if let Some(expr) = expr {
self.typecheck_node(errors, expr);
}
}
Tag::ExprStmt { expr } => {
self.typecheck_node(errors, expr);
}
Tag::VarDecl {
explicit_type,
assignment,
..
} => {
assignment.map(|t| self.typecheck_node(errors, t));
let explicit_t = explicit_type.map(|t| self.type_of_node(t));
let assignment_t = assignment.map(|t| self.type_of_node(t));
match (explicit_t, assignment_t) {
(None, None) => unreachable!(),
(Some(explicit_t), None) => {}
(Some(explicit_t), Some(assignment_t)) => {
// TODO: ensure types match, explicit-cast comptime_number
}
(None, Some(assignment_t)) => {
// TODO: set explicit_type to assignment_t
}
}
}
Tag::GlobalDecl {
name,
explicit_type,
assignment,
} => todo!(),
Tag::DeclRef(_) => todo!(),
Tag::GlobalRef(_) => todo!(),
Tag::CallExpr { lhs, rhs } => todo!(),
Tag::ArgumentList {
arguments: parameters,
} => todo!(),
Tag::Argument { name, expr } => todo!(),
Tag::ExplicitCast { lhs, typename } => todo!(),
Tag::Deref { lhs } => todo!(),
Tag::Ref { lhs } => todo!(),
Tag::Not { lhs } => todo!(),
Tag::Negate { lhs } => todo!(),
Tag::Or { lhs, rhs } => todo!(),
Tag::And { lhs, rhs } => todo!(),
Tag::BitOr { lhs, rhs } => todo!(),
Tag::BitAnd { lhs, rhs } => todo!(),
Tag::BitXOr { lhs, rhs } => todo!(),
Tag::Eq { lhs, rhs } => todo!(),
Tag::NEq { lhs, rhs } => todo!(),
Tag::Lt { lhs, rhs } => todo!(),
Tag::Gt { lhs, rhs } => todo!(),
Tag::Le { lhs, rhs } => todo!(),
Tag::Ge { lhs, rhs } => todo!(),
Tag::Shl { lhs, rhs } => todo!(),
Tag::Shr { lhs, rhs } => todo!(),
Tag::Add { lhs, rhs } => todo!(),
Tag::Sub { lhs, rhs } => todo!(),
Tag::Mul { lhs, rhs } => todo!(),
Tag::Rem { lhs, rhs } => todo!(),
Tag::Div { lhs, rhs } => todo!(),
Tag::Assign { lhs, rhs } => todo!(),
_ => {
unreachable!()
}
}
}
}