SeaLang/src/parser.rs

2242 lines
77 KiB
Rust

use std::{collections::HashMap, fmt::Display};
use itertools::Itertools;
use num_bigint::{BigInt, BigUint};
use crate::{
ast::{self, FloatingType, IntegralType, LetOrVar, Node, PrimitiveType, Tag, Type},
common::NextIf,
comptime::{self, ComptimeNumber},
error::{AnalysisError, AnalysisErrorTag},
lexer::{Radix, TokenIterator},
string_table::{ImmOrIndex, Index, StringTable},
symbol_table::{SymbolKind, SymbolTable},
tokens::Token,
variant,
};
#[derive(Debug, thiserror::Error)]
pub enum Error {
#[error("Unexpected end of token iter.")]
UnexpectedEndOfTokens,
#[error("Expected primitive type.")]
ExpectedPrimitiveType,
#[error("Expected token {0}.")]
ExpectedTokenNotFound(Token),
#[error("Dummy message.")]
ExpectedLetOrVar,
#[error("Dummy message.")]
IntegralTypeTooWide,
#[error("Dummy message.")]
TriedToDerefNonPointer,
}
pub type Result<T> = core::result::Result<T, Error>;
#[derive(Debug)]
pub struct Nodes {
inner: Vec<Tag>,
}
impl core::ops::Index<Node> for Nodes {
type Output = Tag;
fn index(&self, index: Node) -> &Self::Output {
&self.inner[index.get() as usize]
}
}
impl core::ops::IndexMut<Node> for Nodes {
fn index_mut(&mut self, index: Node) -> &mut Self::Output {
&mut self.inner[index.get() as usize]
}
}
impl Nodes {
fn new() -> Nodes {
Self {
inner: vec![Tag::Root],
}
}
fn len(&self) -> u32 {
self.inner.len() as u32
}
fn set_node(&mut self, node: Node, tag: Tag) {
*self.get_node_mut(node) = tag;
}
fn get_node_mut(&mut self, node: Node) -> &mut Tag {
self.inner.get_mut(node.get() as usize).unwrap()
}
pub fn get_node(&self, node: Node) -> &Tag {
self.inner.get(node.get() as usize).unwrap()
}
fn push_tag(&mut self, tag: Tag) -> Node {
let node = Node::new(self.len()).unwrap();
self.inner.push(tag);
node
}
fn reserve_node(&mut self) -> Node {
self.push_tag(Tag::Undefined)
}
fn swap_nodes(&mut self, lhs: Node, rhs: Node) {
self.inner.swap(lhs.get() as usize, rhs.get() as usize);
}
}
// TODO: add a string-table which stores strings and maybe other bytes and
// returns a range for identifiers, constants, etc. where bytes are stored
// flatly, and next to each other.
#[derive(Debug)]
pub struct Tree {
pub nodes: Nodes,
pub st: SymbolTable,
pub strings: StringTable,
pub global_decls: Vec<Node>,
}
pub fn write_indented_inner<W: core::fmt::Write>(
dst: &mut W,
indent: u32,
nl: bool,
args: core::fmt::Arguments,
) -> std::result::Result<(), std::fmt::Error> {
for _ in 0..indent {
dst.write_char(' ')?;
}
dst.write_fmt(args)?;
if nl {
dst.write_char('\n')?;
}
Ok(())
}
#[macro_export]
macro_rules! write_indented {
($indent:expr, $w:expr, $($arg:tt)*) => {
$crate::parser::write_indented_inner($w, $indent, false, format_args!($($arg)*))
};
}
#[macro_export]
macro_rules! writeln_indented {
($indent:expr, $w:expr, $($arg:tt)*) => {
$crate::parser::write_indented_inner($w, $indent, true, format_args!($($arg)*))
};
}
impl Tree {
pub fn new() -> Tree {
Self {
nodes: Nodes::new(),
st: SymbolTable::new(),
strings: StringTable::new(),
global_decls: Vec::new(),
}
}
pub fn global_decls(&self) -> Vec<(Node, String)> {
self.global_decls
.iter()
.map(|decl| {
let name = match self.nodes.get_node(*decl) {
Tag::FunctionDecl { proto, .. } => {
let Tag::FunctionProto { name, .. } = self.nodes.get_node(*proto) else {
unreachable!()
};
self.get_ident_str(*name).unwrap().to_owned()
}
Tag::GlobalDecl { name, .. } => self.get_ident_str(*name).unwrap().to_owned(),
_ => {
unreachable!()
}
};
(*decl, name)
})
.collect::<Vec<_>>()
}
#[allow(unused)]
fn is_integral_type(lexeme: &str) -> Option<()> {
let mut iter = lexeme.chars();
iter.next_if(|&c| c == 'u' || c == 'i')?;
iter.next_if(|&c| crate::common::is_digit(c))?;
iter.take_while_ref(|&c| crate::common::is_digit(c)).count();
iter.next().is_none().then_some(())
}
// returns an option instead of a result because failure here means the
// lexeme is actually an identifier.
fn try_parse_integral_type(lexeme: &str) -> Result<Option<IntegralType>> {
let mut iter = lexeme.chars().peekable();
let signed = match iter.next() {
Some('u') => false,
Some('i') => true,
_ => {
return Ok(None);
}
};
// need 1 digit for an integral type
if iter.peek().map(|&c| crate::common::is_digit(c)) != Some(true) {
return Ok(None);
}
// need no nondigits after digits
if iter
.clone()
.skip_while(|&c| crate::common::is_digit(c))
.next()
.is_some()
{
return Ok(None);
}
let mut bits = 0u16;
loop {
let Some(digit) = iter.next().map(|c| c as u8 - b'0') else {
break;
};
match bits
.checked_mul(10)
.and_then(|bits| bits.checked_add(digit as u16))
{
Some(val) => {
bits = val;
}
None => {
// this IS an integral type, but it is bigger than u/i65535
return Err(Error::IntegralTypeTooWide);
}
}
}
Ok(Some(IntegralType { signed, bits }))
}
/// returns (signed, bits)
fn parse_integral_type(lexeme: &str) -> IntegralType {
let mut iter = lexeme.chars();
let signed = match iter.next().unwrap() {
'u' => false,
'i' | 's' => true,
_ => unreachable!(),
};
let bits = iter.fold(0u16, |acc, c| {
let digit = c as u8 - b'0';
acc * 10 + digit as u16
});
IntegralType { signed, bits }
}
fn parse_integral_constant(token: Token, lexeme: &str) -> (BigInt, Option<IntegralType>) {
let radix = Radix::from_token(token).unwrap();
// TODO: figure out how to do this safely for bigger types, whether to
// wrap, saturate, or else.
let iter = &mut lexeme.char_indices();
match radix {
Radix::Bin | Radix::Oct | Radix::Hex => {
_ = iter.advance_by(2);
}
_ => {}
}
let digits = iter
.take_while_ref(|&(_, c)| radix.is_digit()(c) || c == '_')
.filter(|&(_, c)| c != '_')
.map(|(_, c)| c)
.collect::<Vec<_>>();
let value = comptime::bigint::parse_bigint(digits.into_iter(), radix);
let ty = match iter.clone().next() {
Some((_, 'u')) | Some((_, 'i')) => {
Some(Self::parse_integral_type(&lexeme[iter.next().unwrap().0..]))
}
_ => None,
};
(
BigInt::from_biguint(num_bigint::Sign::Plus, BigUint::new(value)),
ty,
)
}
fn parse_floating_constant(_token: Token, lexeme: &str) -> (u64, FloatingType) {
// let (dot, exp) = match token {
// Token::DotFloatingExpConstant => (true, true),
// Token::DotFloatingConstant => (true, false),
// Token::FloatingExpConstant => (false, true),
// Token::FloatingConstant => (false, false),
// _ => unreachable!(),
// };
let lexeme = lexeme
.strip_suffix("f32")
.map(|l| (l, FloatingType::Binary32))
.unwrap_or(
lexeme
.strip_suffix("f64")
.map(|l| (l, FloatingType::Binary64))
.unwrap_or((lexeme, FloatingType::Binary64)),
);
let bits = match lexeme.1 {
FloatingType::Binary32 => lexeme.0.parse::<f32>().unwrap().to_bits() as u64,
FloatingType::Binary64 => lexeme.0.parse::<f64>().unwrap().to_bits() as u64,
};
(bits, lexeme.1)
}
fn parse_ident(&mut self, tokens: &mut TokenIterator) -> Result<Node> {
let ident = tokens.expect_token(Token::Ident)?;
let name = self.strings.insert(ident.lexeme().as_bytes());
Ok(self.nodes.push_tag(Tag::Ident { name }))
}
fn ident_index(&self, node: Node) -> Index {
match &self.nodes[node] {
Tag::Ident { name } => *name,
_ => Index::new(0, 0),
}
}
pub fn parse_primitive_type(&mut self, tokens: &mut TokenIterator) -> Result<Node> {
let token = tokens.next().ok_or(Error::UnexpectedEndOfTokens)?;
let prim = match token.token() {
Token::Void => PrimitiveType::Void,
Token::Bool => PrimitiveType::Bool,
Token::F32 => PrimitiveType::FloatingType(FloatingType::Binary32),
Token::F64 => PrimitiveType::FloatingType(FloatingType::Binary64),
_ => {
return Err(Error::ExpectedPrimitiveType);
}
};
Ok(self.nodes.push_tag(Tag::PrimitiveType(prim)))
}
pub fn parse_pointer(&mut self, tokens: &mut TokenIterator) -> Result<Node> {
tokens.expect_token(Token::Star)?;
let _constness = tokens.eat_token(Token::Const);
let typename = self.parse_typename(tokens)?;
Ok(self.nodes.push_tag(Tag::Pointer { pointee: typename }))
}
pub fn parse_typename(&mut self, tokens: &mut TokenIterator) -> Result<Node> {
match tokens.peek_token_or_err()?.token() {
Token::Star => self.parse_pointer(tokens),
Token::Ident => {
let token = tokens.next().unwrap();
match Self::try_parse_integral_type(token.lexeme())? {
Some(int) => Ok(self.nodes.push_tag(Tag::IntegralType(int))),
None => {
let name = self.strings.insert(token.lexeme().as_bytes());
Ok(self.nodes.push_tag(Tag::Ident { name }))
}
}
}
_ => self.parse_primitive_type(tokens),
}
}
pub fn parse_var_decl(&mut self, tokens: &mut TokenIterator) -> Result<Node> {
let let_or_var = match tokens
.eat_token(Token::Let)
.or_else(|| tokens.eat_token(Token::Var))
.map(|itm| itm.token())
.ok_or(Error::ExpectedLetOrVar)?
{
Token::Let => LetOrVar::Let,
Token::Var => LetOrVar::Var,
_ => unreachable!(),
};
let name = self.parse_ident(tokens)?;
let explicit_type = if tokens.eat_token(Token::Colon).is_some() {
Some(self.parse_typename(tokens)?)
} else {
None
};
let name_str = self.strings.get_str(self.ident_index(name)).to_owned();
let node = {
let node = self.nodes.reserve_node();
self.st.insert_symbol(&name_str, node, SymbolKind::Var);
node
};
let assignment = if tokens.eat_token(Token::Equal).is_some() {
let expr = self.parse_expr(tokens)?;
Some(self.nodes.push_tag(Tag::Assign {
lhs: node,
rhs: expr,
}))
} else {
None
};
self.nodes.set_node(
node,
Tag::VarDecl {
let_or_var,
name,
explicit_type,
assignment,
},
);
// return assignment if it exists, to make rendering and visiting easier
Ok(assignment.unwrap_or(node))
}
/// GLOBAL_DECL <-
/// const IDENTIFIER (: TYPENAME)? = EXPR;
pub fn parse_global_decl(&mut self, tokens: &mut TokenIterator) -> Result<Node> {
_ = tokens.expect_token(Token::Const)?;
let name = self.parse_ident(tokens)?;
let explicit_type = if tokens.eat_token(Token::Colon).is_some() {
Some(self.parse_typename(tokens)?)
} else {
None
};
let name_str = self.get_ident_str(name).unwrap().to_owned();
let node = {
let node = match self.st.find_root_symbol(&name_str) {
Some(r) => r.node(),
None => self
.st
.insert_root_symbol(&name_str, self.nodes.reserve_node())
.node(),
};
node
};
_ = tokens.expect_token(Token::Equal)?;
let assignment = {
let expr = self.parse_expr(tokens)?;
self.nodes.push_tag(Tag::Assign {
lhs: node,
rhs: expr,
})
};
self.nodes.set_node(
node,
Tag::GlobalDecl {
name,
explicit_type,
assignment,
},
);
tokens.expect_token(Token::Semi)?;
Ok(assignment)
}
/// PARAMETER <-
/// IDENTIFIER : TYPENAME
pub fn parse_parameter(&mut self, tokens: &mut TokenIterator) -> Result<Node> {
let name = self.parse_ident(tokens)?;
tokens.expect_token(Token::Colon)?;
let ty = self.parse_typename(tokens)?;
let param = self.nodes.reserve_node();
self.st.insert_symbol(
&self.get_ident_str(name).unwrap().to_owned(),
param,
SymbolKind::Var,
);
self.nodes.set_node(param, Tag::Parameter { name, ty });
Ok(param)
}
/// PARAMETER_LIST <-
/// PARAMETER
/// PARAMETER_LIST , PARAMETER
pub fn parse_parameter_list(&mut self, tokens: &mut TokenIterator) -> Result<Node> {
let mut parameters = Vec::new();
loop {
// PARAMETER
parameters.push(self.parse_parameter(tokens)?);
// COMMA
if !tokens.is_next_token(Token::Comma) {
break;
}
if !tokens.is_next_token2(Token::Ident) {
break;
}
// skip comma
_ = tokens.next();
}
Ok(self.nodes.push_tag(Tag::ParameterList { parameters }))
}
/// FUNCTION_PROTO <-
/// fn IDENTIFIER ()
/// fn IDENTIFIER () -> TYPENAME
/// fn IDENTIFIER ( PARAMETER_LIST ,? )
/// fn IDENTIFIER ( PARAMETER_LIST ,? ) -> TYPENAME
pub fn parse_fn_proto(&mut self, tokens: &mut TokenIterator) -> Result<(Node, Node)> {
tokens.expect_token(Token::Fn)?;
let name = self.parse_ident(tokens)?;
tokens.expect_token(Token::OpenParens)?;
let parameters = if !tokens.is_next_token(Token::CloseParens) {
let parameters = self.parse_parameter_list(tokens)?;
// trailing comma
_ = tokens.eat_token(Token::Comma);
Some(parameters)
} else {
None
};
tokens.expect_token(Token::CloseParens)?;
let return_type = if tokens.eat_token(Token::MinusGreater).is_some() {
self.parse_typename(tokens)?
} else {
self.nodes.push_tag(Tag::PrimitiveType(PrimitiveType::Void))
};
let proto = self.nodes.push_tag(Tag::FunctionProto {
name,
parameters,
return_type,
});
Ok((proto, name))
}
/// FUNCTION_DECL <-
/// FUNCTION_PROTO BLOCK
pub fn parse_fn_decl(&mut self, tokens: &mut TokenIterator) -> Result<Node> {
let (proto, name) = self.parse_fn_proto(tokens)?;
let decl = match self
.st
.find_orderless_symbol(self.get_ident_str(name).unwrap())
{
Some(record) => record.node(),
None => {
let decl = self.nodes.reserve_node();
self.st
.insert_orderless_symbol(&self.get_ident_str(name).unwrap().to_owned(), decl);
decl
}
};
let block = self.nodes.reserve_node();
self.st.into_child(block);
let body = self.parse_block(tokens, Some(block))?;
let unresolved = self
.st
.extract_orderless_if(|_, v| self.nodes.get_node(v.node()) == &Tag::Undefined)
.collect::<Vec<_>>();
self.st.into_parent();
self.st.extend_orderless(unresolved);
self.nodes.set_node(decl, Tag::FunctionDecl { proto, body });
Ok(decl)
}
/// BLOCK <-
/// { STATEMENT* EXPRESSION? }
pub fn parse_block(
&mut self,
tokens: &mut TokenIterator,
reserved_node: Option<Node>,
) -> Result<Node> {
let block = reserved_node.unwrap_or_else(|| self.nodes.reserve_node());
let mut stmts = Vec::new();
_ = tokens.expect_token(Token::OpenBrace)?;
loop {
if tokens.is_next_token(Token::CloseBrace) {
break self.nodes.set_node(
block,
Tag::Block {
statements: stmts,
trailing_expr: None,
},
);
}
match tokens.peek_token_or_err()?.token() {
Token::Return => {
stmts.push(self.try_parse_return_stmt(tokens)?.unwrap());
}
Token::Var | Token::Let => {
let node = self.parse_var_decl(tokens)?;
tokens.expect_token(Token::Semi)?;
stmts.push(node);
}
_ => {
let node = self.parse_expr(tokens)?;
match tokens.peek_token_or_err()?.token() {
Token::CloseBrace => {
break self.nodes.set_node(
block,
Tag::Block {
statements: stmts,
trailing_expr: Some(node),
},
);
}
Token::Semi => {
_ = tokens.next();
stmts.push(node);
}
_ => {
unreachable!()
}
}
}
}
}
tokens.expect_token(Token::CloseBrace)?;
Ok(block)
}
/// ASSIGNMENT_EXPR <-
/// BINARY_EXPRESSION
/// BINARY_EXPRESSION ASSIGNMENT_OP EXPRESSION
pub fn parse_assignment_expr(&mut self, tokens: &mut TokenIterator) -> Result<Node> {
let lhs = self.parse_binary_expr(tokens, 0)?;
Ok(self.try_parse_assignment(lhs, tokens)?.unwrap_or(lhs))
}
/// ASSIGNMENT_EXPR <-
/// BINARY_EXPRESSION ASSIGNMENT_OP EXPRESSION
/// ASSIGNMENT_OP <-
/// = += -= *= /= %= ...
pub fn try_parse_assignment(
&mut self,
lhs: Node,
tokens: &mut TokenIterator,
) -> Result<Option<Node>> {
if tokens
.peek_token()
.map(|itm| itm.token().is_assignment_op())
== Some(true)
{
let op = tokens.next().unwrap();
let rhs = self.parse_expr(tokens)?;
let rhs = match op.token() {
Token::PlusEqual => self.nodes.push_tag(Tag::Add { lhs, rhs }),
Token::MinusEqual => self.nodes.push_tag(Tag::Sub { lhs, rhs }),
Token::StarEqual => self.nodes.push_tag(Tag::Mul { lhs, rhs }),
Token::SlashEqual => self.nodes.push_tag(Tag::Sub { lhs, rhs }),
Token::PercentEqual => self.nodes.push_tag(Tag::Rem { lhs, rhs }),
Token::PipeEqual => self.nodes.push_tag(Tag::BitOr { lhs, rhs }),
Token::CaretEqual => self.nodes.push_tag(Tag::BitXOr { lhs, rhs }),
Token::AmpersandEqual => self.nodes.push_tag(Tag::BitAnd { lhs, rhs }),
Token::LessLessEqual => self.nodes.push_tag(Tag::Shl { lhs, rhs }),
Token::GreaterGreaterEqual => self.nodes.push_tag(Tag::Shr { lhs, rhs }),
Token::Equal => rhs,
_ => {
unreachable!()
}
};
Ok(Some(self.nodes.push_tag(Tag::Assign { lhs, rhs })))
} else {
Ok(None)
}
}
/// RETURN_STATEMENT <-
/// return EXPRESSION? ;
pub fn try_parse_return_stmt(&mut self, tokens: &mut TokenIterator) -> Result<Option<Node>> {
if tokens.eat_token(Token::Return).is_some() {
let expr = if !tokens.is_next_token(Token::Semi) {
let expr = Some(self.parse_expr(tokens)?);
expr
} else {
None
};
tokens.expect_token(Token::Semi)?;
Ok(Some(self.nodes.push_tag(Tag::ReturnStmt { expr })))
} else {
Ok(None)
}
}
/// STATEMENT <-
/// RETURN_EXPRESSION
/// VAR_DECL ;
/// EXPRESSION ;
pub fn parse_statement(&mut self, tokens: &mut TokenIterator) -> Result<Node> {
match tokens.peek_token_or_err()?.token() {
Token::Return => Ok(self.try_parse_return_stmt(tokens)?.unwrap()),
Token::Var | Token::Let => {
let node = self.parse_var_decl(tokens)?;
tokens.expect_token(Token::Semi)?;
Ok(node)
}
_ => {
let node = self.parse_expr(tokens)?;
tokens.expect_token(Token::Semi)?;
Ok(node)
}
}
}
/// BINARY_EXPR <-
/// AS_EXPR
/// AS_EXPR * EXPRESSION
/// AS_EXPR / EXPRESSION
/// AS_EXPR % EXPRESSION
/// AS_EXPR + EXPRESSION
/// AS_EXPR - EXPRESSION
/// AS_EXPR << EXPRESSION
/// AS_EXPR >> EXPRESSION
/// AS_EXPR < EXPRESSION
/// AS_EXPR > EXPRESSION
/// AS_EXPR <= EXPRESSION
/// AS_EXPR >= EXPRESSION
/// AS_EXPR == EXPRESSION
/// AS_EXPR != EXPRESSION
/// AS_EXPR & EXPRESSION
/// AS_EXPR ^ EXPRESSION
/// AS_EXPR | EXPRESSION
/// AS_EXPR && EXPRESSION
/// AS_EXPR || EXPRESSION
pub fn parse_binary_expr(
&mut self,
tokens: &mut TokenIterator,
precedence: u32,
) -> Result<Node> {
let mut node = self.parse_as_expr(tokens)?;
loop {
let Some(tok) = tokens.peek_token() else {
break;
};
let Some(prec) = PRECEDENCE_MAP.get(&tok.token()).cloned() else {
break;
};
if prec < precedence {
break;
}
let tok = tokens.next().unwrap();
let lhs = node;
let rhs = self.parse_binary_expr(tokens, prec + 1)?;
let tag = match tok.token() {
Token::PipePipe => Tag::Or { lhs, rhs },
Token::AmpersandAmpersand => Tag::And { lhs, rhs },
Token::Pipe => Tag::BitOr { lhs, rhs },
Token::Caret => Tag::BitXOr { lhs, rhs },
Token::Ampersand => Tag::BitAnd { lhs, rhs },
Token::BangEqual => Tag::NEq { lhs, rhs },
Token::EqualEqual => Tag::Eq { lhs, rhs },
Token::LessEqual => Tag::Le { lhs, rhs },
Token::GreaterEqual => Tag::Ge { lhs, rhs },
Token::Less => Tag::Lt { lhs, rhs },
Token::Greater => Tag::Gt { lhs, rhs },
Token::GreaterGreater => Tag::Shr { lhs, rhs },
Token::LessLess => Tag::Shl { lhs, rhs },
Token::Plus => Tag::Add { lhs, rhs },
Token::Minus => Tag::Sub { lhs, rhs },
Token::Percent => Tag::Rem { lhs, rhs },
Token::Star => Tag::Mul { lhs, rhs },
Token::Slash => Tag::Div { lhs, rhs },
_ => unreachable!(),
};
node = self.nodes.push_tag(tag);
}
Ok(node)
}
/// PREFIX_EXPR <-
/// PRIMARY_EXPR
/// ! PRIMARY_EXPR
/// - PRIMARY_EXPR
/// & PRIMARY_EXPR
/// * PRIMARY_EXPR
pub fn parse_prefix_expr(&mut self, tokens: &mut TokenIterator) -> Result<Node> {
match tokens.peek_token_or_err()?.token() {
Token::Bang => {
_ = tokens.next();
let lhs = self.parse_primary_expr(tokens)?;
Ok(self.nodes.push_tag(Tag::Not { lhs }))
}
Token::Minus => {
_ = tokens.next();
let lhs = self.parse_primary_expr(tokens)?;
Ok(self.nodes.push_tag(Tag::Negate { lhs }))
}
Token::Ampersand => {
_ = tokens.next();
let lhs = self.parse_primary_expr(tokens)?;
Ok(self.nodes.push_tag(Tag::Ref { lhs }))
}
Token::Star => {
_ = tokens.next();
let lhs = self.parse_primary_expr(tokens)?;
Ok(self.nodes.push_tag(Tag::Deref { lhs }))
}
_ => self.parse_primary_expr(tokens),
}
}
/// AS_EXPR <-
/// PREFIX_EXPR
/// PREFIX_EXPR as TYPENAME
pub fn parse_as_expr(&mut self, tokens: &mut TokenIterator) -> Result<Node> {
let expr = self.parse_prefix_expr(tokens)?;
if tokens.eat_token(Token::As).is_some() {
let typename = self.parse_typename(tokens)?;
Ok(self.nodes.push_tag(Tag::ExplicitCast {
lhs: expr,
typename,
}))
} else {
Ok(expr)
}
}
pub fn parse_postfix_expr(&mut self, tokens: &mut TokenIterator) -> Result<Node> {
// TODO
self.parse_primary_expr(tokens)
}
/// PRIMARY_EXPR <-
/// IDENTIFIER
/// INTEGER_CONSTANT
/// FLOATING_CONSTANT
/// ( EXPRESSION )
/// { STATEMENT* EXPRESSION? }
pub fn parse_primary_expr(&mut self, tokens: &mut TokenIterator) -> Result<Node> {
let token = tokens.peek_token_or_err()?;
match token.token() {
Token::Ident => {
let ident = tokens.expect_token(Token::Ident)?;
let name = ident.lexeme();
if let Some(record) = self.st.find_ordered_symbol(name) {
Ok(self.nodes.push_tag(Tag::DeclRef(record.node())))
} else if let Some(record) = self.st.find_orderless_symbol(name) {
Ok(self.nodes.push_tag(Tag::GlobalRef(record.node())))
} else {
let node = self
.st
.insert_orderless_symbol(name, self.nodes.reserve_node())
.node();
Ok(self.nodes.push_tag(Tag::GlobalRef(node)))
}
}
Token::IntegerBinConstant
| Token::IntegerHexConstant
| Token::IntegerOctConstant
| Token::IntegerConstant => {
_ = tokens.next();
let (bits, ty) = Self::parse_integral_constant(token.token(), token.lexeme());
let (_, bytes) = bits.to_bytes_le();
const BUF_SIZE: usize = core::mem::size_of::<u64>();
let mut buf = [0u8; BUF_SIZE];
buf[..bytes.len().min(BUF_SIZE)]
.copy_from_slice(&bytes[..bytes.len().min(BUF_SIZE)]);
let bytes = match bytes.len() {
0..2 => {
let (buf, _) = buf.split_at(core::mem::size_of::<u32>());
let dw = u32::from_le_bytes(buf.try_into().unwrap());
ImmOrIndex::U32(dw)
}
0..4 => {
let (buf, _) = buf.split_at(core::mem::size_of::<u64>());
let qw = u64::from_le_bytes(buf.try_into().unwrap());
ImmOrIndex::U64(qw)
}
0.. => {
let idx = self.strings.insert(bytes);
ImmOrIndex::Index(idx)
}
};
let ty = match ty {
Some(int) => Type::Integer(int),
None => Type::ComptimeNumber,
};
Ok(self.nodes.push_tag(Tag::Constant { bytes, ty }))
}
Token::FloatingConstant
| Token::FloatingExpConstant
| Token::DotFloatingConstant
| Token::DotFloatingExpConstant => {
_ = tokens.next();
let (bits, ty) = Self::parse_floating_constant(token.token(), token.lexeme());
let bytes = match ty {
FloatingType::Binary32 => ImmOrIndex::U32(bits as u32),
FloatingType::Binary64 => ImmOrIndex::U64(bits as u64),
};
Ok(self.nodes.push_tag(Tag::Constant {
bytes,
ty: Type::Floating(ty),
}))
}
Token::OpenParens => {
_ = tokens.next();
let node = self.parse_expr(tokens)?;
tokens.expect_token(Token::CloseParens)?;
Ok(node)
}
Token::OpenBrace => {
let node = self.parse_block(tokens, None)?;
Ok(node)
}
_ => unreachable!(),
}
}
/// EXPRESSION <-
/// ASSIGNMENT_EXPR
pub fn parse_expr(&mut self, tokens: &mut TokenIterator) -> Result<Node> {
self.parse_assignment_expr(tokens)
}
/// PROGRAM <-
/// (FUNCTION_DECL | GLOBAL_DECL)*
pub fn parse_program(&mut self, tokens: &mut TokenIterator) -> Result<()> {
while tokens.peek_token().is_some() {
let Some(token) = tokens.peek_token().map(|itm| itm.token()) else {
break;
};
let decl = match token {
Token::Const => self.parse_global_decl(tokens)?,
Token::Fn => self.parse_fn_decl(tokens)?,
_ => {
eprintln!("unexpected token: {}", token);
panic!("unexpected token at global scope");
}
};
self.global_decls.push(decl);
}
Ok(())
}
pub fn parse(&mut self, mut tokens: TokenIterator) -> Result<()> {
self.parse_program(&mut tokens)
}
pub fn get_ident_str(&self, node: Node) -> Option<&str> {
match &self.nodes[node] {
Tag::Ident { name } => Some(self.strings.get_str(*name)),
_ => None,
}
}
fn get_typename_str(&self, node: Node) -> Option<String> {
match self.nodes.get_node(node) {
Tag::IntegralType(i) => Some(i.to_string()),
Tag::Ident { name } => Some(self.strings.get_str(*name).to_owned()),
Tag::Pointer { pointee } => self.get_typename_str(*pointee),
Tag::PrimitiveType(prim) => Some(prim.to_string()),
_ => None,
}
}
}
impl Tree {
pub fn get_node_children(&self, node: Node) -> Vec<Node> {
match self.nodes.get_node(node) {
Tag::FunctionProto {
name,
parameters,
return_type,
} => {
if let Some(params) = parameters {
vec![*name, *params, *return_type]
} else {
vec![*name, *return_type]
}
}
Tag::ParameterList { parameters } => parameters.clone(),
Tag::Parameter { name, ty } => {
vec![*name, *ty]
}
Tag::Pointer { pointee } => {
vec![*pointee]
}
Tag::FunctionDecl { proto, body } => {
vec![*proto, *body]
}
Tag::Block {
statements,
trailing_expr,
} => {
let mut children = statements.clone();
if let Some(expr) = trailing_expr {
children.push(*expr);
}
children
}
Tag::ReturnStmt { expr } => expr.into_iter().cloned().collect::<Vec<_>>(),
&Tag::ExprStmt { expr } => {
vec![expr]
}
Tag::VarDecl {
name,
explicit_type,
..
} => {
if let Some(ty) = *explicit_type {
vec![*name, ty]
} else {
vec![*name]
}
}
Tag::GlobalDecl {
name,
explicit_type,
..
} => {
if let Some(ty) = *explicit_type {
vec![*name, ty]
} else {
vec![*name]
}
}
&Tag::CallExpr { lhs, rhs } => {
if let Some(rhs) = rhs {
vec![lhs, rhs]
} else {
vec![lhs]
}
}
Tag::ArgumentList { parameters } => parameters.clone(),
&Tag::Argument { name, expr } => {
if let Some(name) = name {
vec![name, expr]
} else {
vec![expr]
}
}
&Tag::ExplicitCast { lhs, typename } => {
vec![lhs, typename]
}
Tag::Deref { lhs } | Tag::Ref { lhs } | Tag::Not { lhs } | Tag::Negate { lhs } => {
vec![*lhs]
}
Tag::Or { lhs, rhs }
| Tag::And { lhs, rhs }
| Tag::BitOr { lhs, rhs }
| Tag::BitAnd { lhs, rhs }
| Tag::BitXOr { lhs, rhs }
| Tag::Eq { lhs, rhs }
| Tag::NEq { lhs, rhs }
| Tag::Lt { lhs, rhs }
| Tag::Gt { lhs, rhs }
| Tag::Le { lhs, rhs }
| Tag::Ge { lhs, rhs }
| Tag::Shl { lhs, rhs }
| Tag::Shr { lhs, rhs }
| Tag::Add { lhs, rhs }
| Tag::Sub { lhs, rhs }
| Tag::Mul { lhs, rhs }
| Tag::Rem { lhs, rhs }
| Tag::Div { lhs, rhs }
| Tag::Assign { lhs, rhs } => {
vec![*lhs, *rhs]
}
_ => vec![],
}
}
}
impl Tree {
fn render_node<W: core::fmt::Write>(
&mut self,
writer: &mut W,
node: Node,
indent: u32,
) -> core::fmt::Result {
match self.nodes[node].clone() {
Tag::FunctionProto {
name,
parameters,
return_type,
} => {
self.render_node(writer, name, indent)?;
self.render_node(writer, return_type, indent)?;
if let Some(parameters) = parameters {
self.render_node(writer, parameters, indent)?;
}
write_indented!(indent, writer, "%{} = function_proto: {{", node.get())?;
write!(writer, "name: \"{}\"", self.get_ident_str(name).unwrap())?;
if let Some(parameters) = parameters {
write!(writer, ", parameters: %{}", parameters.get())?;
}
write!(writer, ", return_type: %{}", return_type.get())?;
writeln!(writer, "}}")
}
Tag::ParameterList { parameters } => {
writeln_indented!(indent, writer, "%{} = ParameterList [", node.get())?;
for param in parameters {
self.render_node(writer, param, indent + 1)?;
}
writeln_indented!(indent, writer, "]")
}
Tag::Parameter { name, ty } => {
writeln_indented!(
indent,
writer,
"%{} = {}: {},",
node.get(),
self.get_ident_str(name).unwrap(),
self.get_typename_str(ty).unwrap()
)
}
Tag::Pointer { .. } | Tag::IntegralType(_) | Tag::PrimitiveType(_) => {
writeln_indented!(
indent,
writer,
"%{} = type({})",
node.get(),
self.get_typename_str(node).unwrap()
)
}
Tag::PointerQualifier { .. } => todo!(),
Tag::FunctionDecl { proto, body } => {
self.render_node(writer, proto, indent)?;
writeln_indented!(
indent,
writer,
"%{} = function_decl( proto: %{}, body: %{}) {{",
node.get(),
proto.get(),
body.get()
)?;
self.render_node(writer, body, indent + 1)?;
writeln_indented!(indent, writer, "}}")
}
Tag::Ident { name } => {
writeln_indented!(
indent,
writer,
"%{} = identifier(\"{}\")",
node.get(),
self.strings.get_str(name)
)
}
Tag::Block {
statements,
trailing_expr,
} => {
writeln_indented!(indent, writer, "%{} = {{", node.get())?;
self.st.into_child(node);
for stmt in statements {
self.render_node(writer, stmt, indent + 1)?;
}
if let Some(expr) = trailing_expr {
self.render_node(writer, expr, indent + 1)?;
writeln_indented!(
indent + 1,
writer,
"break %{} %{};",
node.get(),
expr.get()
)?;
}
self.st.into_parent();
writeln_indented!(indent, writer, "}}")
}
Tag::ReturnStmt { expr } => {
if let Some(expr) = expr {
self.render_node(writer, expr, indent)?;
writeln_indented!(indent, writer, "%{} = return %{};", node.get(), expr.get())
} else {
writeln_indented!(indent, writer, "%{} = return;", node.get())
}
}
Tag::ExprStmt { expr } => self.render_node(writer, expr, indent),
Tag::VarDecl {
let_or_var,
name,
explicit_type,
..
} => {
self.render_node(writer, name, indent)?;
explicit_type.map(|ty| self.render_node(writer, ty, indent));
write_indented!(
indent,
writer,
"%{} = decl_{}(name: \"{}\"",
node.get(),
match let_or_var {
LetOrVar::Let => {
"const"
}
LetOrVar::Var => {
"mut"
}
},
self.get_ident_str(name).unwrap()
)?;
if let Some(ty) = explicit_type {
write!(writer, ", ty: {}", self.get_typename_str(ty).unwrap())?;
}
writeln!(writer, ");")?;
Ok(())
}
Tag::GlobalDecl {
name,
explicit_type,
..
} => {
self.render_node(writer, name, indent)?;
explicit_type.map(|ty| self.render_node(writer, ty, indent));
write_indented!(
indent,
writer,
"%{} = global_decl(name: \"{}\"",
node.get(),
self.get_ident_str(name).unwrap()
)?;
if let Some(ty) = explicit_type {
write!(writer, ", ty: {}", self.get_typename_str(ty).unwrap())?;
}
writeln!(writer, ");")?;
Ok(())
}
Tag::CallExpr { .. } => todo!(),
Tag::ArgumentList { .. } => todo!(),
Tag::Argument { .. } => todo!(),
Tag::ExplicitCast { lhs, typename } => {
self.render_node(writer, lhs, indent)?;
writeln_indented!(
indent,
writer,
"%{} = cast<{}>(%{})",
node.get(),
self.get_typename_str(typename).unwrap(),
lhs.get()
)
}
Tag::Deref { lhs } => {
self.render_node(writer, lhs, indent)?;
writeln_indented!(indent, writer, "%{} = deref(%{})", node.get(), lhs.get())
}
Tag::Ref { lhs } => {
self.render_node(writer, lhs, indent)?;
writeln_indented!(
indent,
writer,
"%{} = address_of(%{})",
node.get(),
lhs.get()
)
}
Tag::Not { lhs } => {
self.render_node(writer, lhs, indent)?;
writeln_indented!(indent, writer, "%{} = ", node.get(),)
}
Tag::Negate { lhs } => {
self.render_node(writer, lhs, indent)?;
writeln_indented!(indent, writer, "%{} = not(%{})", node.get(), lhs.get())
}
Tag::Or { lhs, rhs } => {
self.render_node(writer, lhs, indent)?;
self.render_node(writer, rhs, indent)?;
writeln_indented!(
indent,
writer,
"%{} = %{} || %{}",
node.get(),
lhs.get(),
rhs.get()
)
}
Tag::And { lhs, rhs } => {
self.render_node(writer, lhs, indent)?;
self.render_node(writer, rhs, indent)?;
writeln_indented!(
indent,
writer,
"%{} = %{} && %{}",
node.get(),
lhs.get(),
rhs.get()
)
}
Tag::BitOr { lhs, rhs } => {
self.render_node(writer, lhs, indent)?;
self.render_node(writer, rhs, indent)?;
writeln_indented!(
indent,
writer,
"%{} = %{} | %{}",
node.get(),
lhs.get(),
rhs.get()
)
}
Tag::BitAnd { lhs, rhs } => {
self.render_node(writer, lhs, indent)?;
self.render_node(writer, rhs, indent)?;
writeln_indented!(
indent,
writer,
"%{} = %{} & %{}",
node.get(),
lhs.get(),
rhs.get()
)
}
Tag::BitXOr { lhs, rhs } => {
self.render_node(writer, lhs, indent)?;
self.render_node(writer, rhs, indent)?;
writeln_indented!(
indent,
writer,
"%{} = %{} ^ %{}",
node.get(),
lhs.get(),
rhs.get()
)
}
Tag::Eq { lhs, rhs } => {
self.render_node(writer, lhs, indent)?;
self.render_node(writer, rhs, indent)?;
writeln_indented!(
indent,
writer,
"%{} = %{} == %{}",
node.get(),
lhs.get(),
rhs.get()
)
}
Tag::NEq { lhs, rhs } => {
self.render_node(writer, lhs, indent)?;
self.render_node(writer, rhs, indent)?;
writeln_indented!(
indent,
writer,
"%{} = %{} != %{}",
node.get(),
lhs.get(),
rhs.get()
)
}
Tag::Lt { lhs, rhs } => {
self.render_node(writer, lhs, indent)?;
self.render_node(writer, rhs, indent)?;
writeln_indented!(
indent,
writer,
"%{} = %{} < %{}",
node.get(),
lhs.get(),
rhs.get()
)
}
Tag::Gt { lhs, rhs } => {
self.render_node(writer, lhs, indent)?;
self.render_node(writer, rhs, indent)?;
writeln_indented!(
indent,
writer,
"%{} = %{} > %{}",
node.get(),
lhs.get(),
rhs.get()
)
}
Tag::Le { lhs, rhs } => {
self.render_node(writer, lhs, indent)?;
self.render_node(writer, rhs, indent)?;
writeln_indented!(
indent,
writer,
"%{} = %{} <= %{}",
node.get(),
lhs.get(),
rhs.get()
)
}
Tag::Ge { lhs, rhs } => {
self.render_node(writer, lhs, indent)?;
self.render_node(writer, rhs, indent)?;
writeln_indented!(
indent,
writer,
"%{} = %{} >= %{}",
node.get(),
lhs.get(),
rhs.get()
)
}
Tag::Shl { lhs, rhs } => {
self.render_node(writer, lhs, indent)?;
self.render_node(writer, rhs, indent)?;
writeln_indented!(
indent,
writer,
"%{} = %{} << %{}",
node.get(),
lhs.get(),
rhs.get()
)
}
Tag::Shr { lhs, rhs } => {
self.render_node(writer, lhs, indent)?;
self.render_node(writer, rhs, indent)?;
writeln_indented!(
indent,
writer,
"%{} = %{} >> %{}",
node.get(),
lhs.get(),
rhs.get()
)
}
Tag::Add { lhs, rhs } => {
self.render_node(writer, lhs, indent)?;
self.render_node(writer, rhs, indent)?;
writeln_indented!(
indent,
writer,
"%{} = %{} + %{}",
node.get(),
lhs.get(),
rhs.get()
)
}
Tag::Sub { lhs, rhs } => {
self.render_node(writer, lhs, indent)?;
self.render_node(writer, rhs, indent)?;
writeln_indented!(
indent,
writer,
"%{} = %{} - %{}",
node.get(),
lhs.get(),
rhs.get()
)
}
Tag::Mul { lhs, rhs } => {
self.render_node(writer, lhs, indent)?;
self.render_node(writer, rhs, indent)?;
writeln_indented!(
indent,
writer,
"%{} = %{} * %{}",
node.get(),
lhs.get(),
rhs.get()
)
}
Tag::Div { lhs, rhs } => {
self.render_node(writer, lhs, indent)?;
self.render_node(writer, rhs, indent)?;
writeln_indented!(
indent,
writer,
"%{} = %{} / %{}",
node.get(),
lhs.get(),
rhs.get()
)
}
Tag::Rem { lhs, rhs } => {
self.render_node(writer, lhs, indent)?;
self.render_node(writer, rhs, indent)?;
writeln_indented!(
indent,
writer,
"%{} = %{} % %{}",
node.get(),
lhs.get(),
rhs.get()
)
}
Tag::Assign { lhs, rhs } => {
self.render_node(writer, lhs, indent)?;
self.render_node(writer, rhs, indent)?;
writeln_indented!(
indent,
writer,
"%{} = store(dst: %{}, val: %{})",
node.get(),
lhs.get(),
rhs.get()
)
}
Tag::DeclRef(decl) => {
writeln_indented!(
indent,
writer,
"%{} = decl_ref(%{}, name: {})",
node.get(),
decl.get(),
self.st
.find_symbol_by_decl(decl)
.map(|a| a.name())
.unwrap_or(&format!(
"SymbolTable entry not found?, %{}, %{}",
node.get(),
decl.get()
))
)
}
Tag::GlobalRef(decl) => {
writeln_indented!(
indent,
writer,
"%{} = global_ref(%{}, name: {})",
node.get(),
decl.get(),
self.st
.symbol_path(decl)
.map(|p| p.mangle(self))
.unwrap_or(format!(
"SymbolTable entry not found?, %{}, %{}",
node.get(),
decl.get()
))
)
}
Tag::Constant { bytes, ty } => {
writeln_indented!(
indent,
writer,
"%{} = constant{{ ty: {}, bytes: {}}}",
node.get(),
ty,
self.strings.display_idx(bytes)
)
}
_ => unreachable!(),
}
}
pub fn render<W: core::fmt::Write>(&mut self, writer: &mut W) -> core::fmt::Result {
for decl in &self.global_decls.clone() {
self.render_node(writer, *decl, 0)?;
}
Ok(())
}
pub fn type_of_node(&self, node: Node) -> crate::ast::Type {
match self.nodes.get_node(node) {
Tag::FunctionDecl { proto, .. } => self.type_of_node(*proto),
Tag::FunctionProto {
parameters,
return_type,
..
} => {
let return_type = self.type_of_node(*return_type);
let parameter_types = parameters
.map(|p| match self.nodes.get_node(p) {
Tag::ParameterList { parameters } => parameters
.iter()
.map(|p| self.type_of_node(*p))
.collect::<Vec<_>>(),
_ => panic!("parameters is not a parameterlist!"),
})
.unwrap_or(Vec::new());
crate::ast::Type::Fn {
parameter_types,
return_type: Box::new(return_type),
}
}
Tag::Parameter { ty, .. } => self.type_of_node(*ty),
Tag::Pointer { pointee } => Type::Pointer {
constness: false,
pointee: Box::new(self.type_of_node(*pointee)),
},
Tag::Constant { ty, .. } => ty.clone(),
Tag::IntegralType(t) => Type::Integer(*t),
Tag::PrimitiveType(t) => match t {
PrimitiveType::FloatingType(t) => Type::Floating(*t),
PrimitiveType::IntegralType(t) => Type::Integer(*t),
PrimitiveType::Bool => Type::bool(),
PrimitiveType::Void => Type::void(),
},
Tag::Block { trailing_expr, .. } => trailing_expr
.map(|n| self.type_of_node(n))
.unwrap_or(Type::void()),
Tag::VarDecl {
explicit_type,
assignment, // this is a Tag::Assign
..
} => {
let lhs = explicit_type.map(|n| self.type_of_node(n));
let rhs = assignment.map(|n| match self.nodes.get_node(n) {
Tag::Assign { rhs, .. } => self.type_of_node(*rhs),
_ => unreachable!(),
});
if lhs.as_ref().zip(rhs.as_ref()).map(|(l, r)| l != r) == Some(true) {
eprintln!("vardecl: incompatible types {lhs:?} and {rhs:?}.");
}
lhs.or(rhs)
.expect("Type could not be automatically deduced.")
}
Tag::GlobalDecl {
explicit_type,
assignment, // this is a Tag::Assign
..
} => {
let lhs = explicit_type.map(|n| self.type_of_node(n));
let rhs = match self.nodes.get_node(*assignment) {
Tag::Assign { rhs, .. } => self.type_of_node(*rhs),
_ => unreachable!(),
};
if lhs.as_ref().zip(Some(&rhs)).map(|(l, r)| l != r) == Some(true) {
eprintln!("vardecl: incompatible types {lhs:?} and {rhs:?}.");
}
lhs.unwrap_or(rhs)
}
Tag::CallExpr { lhs, .. } => self.type_of_node(*lhs),
Tag::ExplicitCast { typename, .. } => self.type_of_node(*typename),
Tag::Deref { lhs } => self.type_of_node(*lhs).remove_ptr().unwrap(),
Tag::Ref { lhs } => self.type_of_node(*lhs).into_ptr(),
Tag::Not { lhs } => self.type_of_node(*lhs),
Tag::Negate { lhs } => self.type_of_node(*lhs),
Tag::Or { lhs, .. } => self.type_of_node(*lhs),
Tag::And { lhs, .. } => self.type_of_node(*lhs),
Tag::BitOr { lhs, .. } => self.type_of_node(*lhs),
Tag::BitAnd { lhs, .. } => self.type_of_node(*lhs),
Tag::BitXOr { lhs, .. } => self.type_of_node(*lhs),
Tag::Shl { lhs, .. } => self.type_of_node(*lhs),
Tag::Shr { lhs, .. } => self.type_of_node(*lhs),
Tag::Add { lhs, .. } => self.type_of_node(*lhs),
Tag::Sub { lhs, .. } => self.type_of_node(*lhs),
Tag::Mul { lhs, .. } => self.type_of_node(*lhs),
Tag::Rem { lhs, .. } => self.type_of_node(*lhs),
Tag::Div { lhs, .. } => self.type_of_node(*lhs),
Tag::Eq { .. } => Type::bool(),
Tag::NEq { .. } => Type::bool(),
Tag::Lt { .. } => Type::bool(),
Tag::Gt { .. } => Type::bool(),
Tag::Le { .. } => Type::bool(),
Tag::Ge { .. } => Type::bool(),
Tag::DeclRef(decl) => self.type_of_node(*decl),
Tag::GlobalRef(decl) => self.type_of_node(*decl),
_ => Type::void(),
}
}
}
// simplify tree with compile-time math
impl Tree {
fn is_node_comptime(&self, node: Node, check_declrefs: bool) -> bool {
match self.nodes.get_node(node) {
Tag::Block {
statements,
trailing_expr,
} => statements
.iter()
.chain(trailing_expr.into_iter())
.all(|n| self.is_node_comptime(*n, true)),
Tag::Constant { .. } => true,
Tag::ExplicitCast { lhs, typename } => {
self.is_node_comptime(*lhs, true)
&& match self.type_of_node(*typename) {
Type::Bool
| Type::ComptimeNumber
| Type::Integer(_)
| Type::Floating(_) => true,
_ => false,
}
}
&Tag::DeclRef(lhs) if check_declrefs => {
let start = lhs;
let end = node;
let mut is_comptime = true;
ast::tree_visitor::Visitor::new_seek(
self,start,
|_: &Tree, _| {
},
|tree: &Tree, node| match tree.nodes.get_node(node) {
&Tag::Assign { lhs, rhs } => {
if lhs == start || matches!(tree.nodes.get_node(lhs), &Tag::DeclRef(decl) if decl == start) {
is_comptime &= self.is_node_comptime(rhs, true);
}
}
&Tag::Ref { lhs } if lhs == start => {
// recursively checking for derefs would get very complicated.
is_comptime = false;
}
_ => {}
},
)
.until_after(end)
.visit(self);
is_comptime
}
Tag::Not { lhs } | Tag::Negate { lhs } => self.is_node_comptime(*lhs, true),
Tag::Or { lhs, rhs }
| Tag::And { lhs, rhs }
| Tag::BitOr { lhs, rhs }
| Tag::BitAnd { lhs, rhs }
| Tag::BitXOr { lhs, rhs }
| Tag::Eq { lhs, rhs }
| Tag::NEq { lhs, rhs }
| Tag::Lt { lhs, rhs }
| Tag::Gt { lhs, rhs }
| Tag::Le { lhs, rhs }
| Tag::Ge { lhs, rhs }
| Tag::Shl { lhs, rhs }
| Tag::Shr { lhs, rhs }
| Tag::Add { lhs, rhs }
| Tag::Sub { lhs, rhs }
| Tag::Mul { lhs, rhs }
| Tag::Rem { lhs, rhs }
| Tag::Div { lhs, rhs } => {
self.is_node_comptime(*lhs, true) && self.is_node_comptime(*rhs, true)
}
_ => false,
}
}
fn fold_comptime_with_visitor(&mut self, decl: Node) {
ast::tree_visitor::Visitor::new(
decl,
|_: &mut Tree, _| {},
|tree: &mut Tree, node| {
if let Ok(value) = tree.fold_comptime_inner(node, false) {
let (bytes, ty) = value.into_bytes_and_type();
let idx = tree.strings.insert(bytes);
*tree.nodes.get_node_mut(node) = Tag::Constant {
bytes: ImmOrIndex::Index(idx),
ty,
};
}
},
)
.visit_mut(self);
}
fn fold_comptime_inner(
&mut self,
decl: Node,
check_declrefs: bool,
) -> comptime::Result<ComptimeNumber> {
if self.is_node_comptime(decl, check_declrefs) {
match self.nodes.get_node(decl) {
Tag::Constant { bytes, ty } => {
let bytes = match bytes {
ImmOrIndex::U64(v) => &v.to_le_bytes()[..],
ImmOrIndex::U32(v) => &v.to_le_bytes()[..],
ImmOrIndex::Index(idx) => self.strings.get_bytes(*idx),
};
let number: ComptimeNumber = match ty {
Type::Bool => (bytes[0] != 0).into(),
Type::ComptimeNumber => {
BigInt::from_bytes_le(num_bigint::Sign::Plus, bytes).into()
}
Type::Integer(ty) => {
if bytes.len() > core::mem::size_of::<u128>() {
let bits = BigInt::from_bytes_le(num_bigint::Sign::Plus, bytes);
(bits, *ty).into()
} else {
let mut buf = [0u8; core::mem::size_of::<u128>()];
buf[..bytes.len()].copy_from_slice(bytes);
let bits = u128::from_le_bytes(buf);
(bits, *ty).into()
}
}
Type::Floating(ty) => match ty {
FloatingType::Binary32 => {
(f32::from_le_bytes((&bytes[..4]).try_into().unwrap())).into()
}
FloatingType::Binary64 => {
(f64::from_le_bytes((&bytes[..8]).try_into().unwrap())).into()
}
},
_ => unimplemented!(),
};
return Ok(number);
}
Tag::Negate { lhs } => {
let lhs = self.fold_comptime_inner(*lhs, true)?;
return Ok(lhs.neg()?);
}
Tag::ExplicitCast { lhs, typename } => {
let ty = self.type_of_node(*typename);
let lhs = self.fold_comptime_inner(*lhs, true)?;
return match ty {
Type::Bool => lhs.into_bool(),
Type::Integer(ty) => lhs.into_int(ty),
Type::Floating(ty) => lhs.into_float(ty),
_ => unimplemented!(),
};
}
Tag::Not { lhs } => {
let lhs = self.fold_comptime_inner(*lhs, true)?;
return lhs.not();
}
Tag::Or { lhs, rhs } => {
let (lhs, rhs) = (*lhs, *rhs);
let lhs = self.fold_comptime_inner(lhs, true)?;
let rhs = self.fold_comptime_inner(rhs, true)?;
return lhs.or(rhs);
}
Tag::And { lhs, rhs } => {
let (lhs, rhs) = (*lhs, *rhs);
let lhs = self.fold_comptime_inner(lhs, true)?;
let rhs = self.fold_comptime_inner(rhs, true)?;
return lhs.and(rhs);
}
Tag::Eq { lhs, rhs } => {
let (lhs, rhs) = (*lhs, *rhs);
let lhs = self.fold_comptime_inner(lhs, true)?;
let rhs = self.fold_comptime_inner(rhs, true)?;
return lhs.eq(rhs);
}
Tag::NEq { lhs, rhs } => {
let (lhs, rhs) = (*lhs, *rhs);
let lhs = self.fold_comptime_inner(lhs, true)?;
let rhs = self.fold_comptime_inner(rhs, true)?;
return lhs.eq(rhs)?.not();
}
Tag::Lt { lhs, rhs } => {
let (lhs, rhs) = (*lhs, *rhs);
let lhs = self.fold_comptime_inner(lhs, true)?;
let rhs = self.fold_comptime_inner(rhs, true)?;
return lhs.lt(rhs);
}
Tag::Gt { lhs, rhs } => {
let (lhs, rhs) = (*lhs, *rhs);
let lhs = self.fold_comptime_inner(lhs, true)?;
let rhs = self.fold_comptime_inner(rhs, true)?;
return lhs.gt(rhs);
}
Tag::Le { lhs, rhs } => {
let (lhs, rhs) = (*lhs, *rhs);
let lhs = self.fold_comptime_inner(lhs, true)?;
let rhs = self.fold_comptime_inner(rhs, true)?;
return lhs.le(rhs);
}
Tag::Ge { lhs, rhs } => {
let (lhs, rhs) = (*lhs, *rhs);
let lhs = self.fold_comptime_inner(lhs, true)?;
let rhs = self.fold_comptime_inner(rhs, true)?;
return lhs.ge(rhs);
}
Tag::BitOr { lhs, rhs } => {
let (lhs, rhs) = (*lhs, *rhs);
let lhs = self.fold_comptime_inner(lhs, true)?;
let rhs = self.fold_comptime_inner(rhs, true)?;
return lhs.bitor(rhs);
}
Tag::BitAnd { lhs, rhs } => {
let (lhs, rhs) = (*lhs, *rhs);
let lhs = self.fold_comptime_inner(lhs, true)?;
let rhs = self.fold_comptime_inner(rhs, true)?;
return lhs.bitand(rhs);
}
Tag::BitXOr { lhs, rhs } => {
let (lhs, rhs) = (*lhs, *rhs);
let lhs = self.fold_comptime_inner(lhs, true)?;
let rhs = self.fold_comptime_inner(rhs, true)?;
return lhs.bitxor(rhs);
}
Tag::Shl { lhs, rhs } => {
let (lhs, rhs) = (*lhs, *rhs);
let lhs = self.fold_comptime_inner(lhs, true)?;
let rhs = self.fold_comptime_inner(rhs, true)?;
return lhs.shl(rhs);
}
Tag::Shr { lhs, rhs } => {
let (lhs, rhs) = (*lhs, *rhs);
let lhs = self.fold_comptime_inner(lhs, true)?;
let rhs = self.fold_comptime_inner(rhs, true)?;
return lhs.shr(rhs);
}
Tag::Add { lhs, rhs } => {
let (lhs, rhs) = (*lhs, *rhs);
let lhs = self.fold_comptime_inner(lhs, true)?;
let rhs = self.fold_comptime_inner(rhs, true)?;
return lhs.add(rhs);
}
Tag::Sub { lhs, rhs } => {
let (lhs, rhs) = (*lhs, *rhs);
let lhs = self.fold_comptime_inner(lhs, true)?;
let rhs = self.fold_comptime_inner(rhs, true)?;
return lhs.sub(rhs);
}
Tag::Mul { lhs, rhs } => {
let (lhs, rhs) = (*lhs, *rhs);
let lhs = self.fold_comptime_inner(lhs, true)?;
let rhs = self.fold_comptime_inner(rhs, true)?;
return lhs.mul(rhs);
}
Tag::Rem { lhs, rhs } => {
let (lhs, rhs) = (*lhs, *rhs);
let lhs = self.fold_comptime_inner(lhs, true)?;
let rhs = self.fold_comptime_inner(rhs, true)?;
return lhs.rem(rhs);
}
Tag::Div { lhs, rhs } => {
let (lhs, rhs) = (*lhs, *rhs);
let lhs = self.fold_comptime_inner(lhs, true)?;
let rhs = self.fold_comptime_inner(rhs, true)?;
return lhs.div(rhs);
}
&Tag::DeclRef(lhs) => {
let start = lhs;
let end = decl;
let mut last_value = None;
ast::tree_visitor::Visitor::new_seek(
self,start,
|_: &Tree, node| {
},
|tree: &Tree, node| match tree.nodes.get_node(node) {
&Tag::Assign { lhs, rhs } => {
if lhs == start || matches!(tree.nodes.get_node(lhs), &Tag::DeclRef(decl) if decl == start) {
last_value = Some(rhs);
}
}
_ => {}
},
)
.until_after(end)
.visit(self);
return self.fold_comptime_inner(
last_value.ok_or(comptime::Error::NotComptime)?,
true,
);
}
_ => {
unreachable!()
}
}
} else {
Err(comptime::Error::NotComptime)
}
}
pub fn fold_comptime(&mut self) {
for decl in self.global_decls.clone() {
match self.nodes.get_node(decl) {
Tag::FunctionDecl { body, .. } => {
self.fold_comptime_with_visitor(*body);
}
Tag::GlobalDecl { assignment, .. } => {
self.fold_comptime_with_visitor(*assignment);
}
_ => unreachable!(),
}
}
}
}
impl Tree {
/// type-checks and inserts appropriate explicit-cast nodes.
pub fn typecheck(&mut self) {
let mut errors = Vec::new();
for decl in self.global_decls.clone() {
self.typecheck_node(&mut errors, decl);
}
}
// TODO: inline types into the AST proper before tackling this.
// for now, comptime_number is not supported in IR gen, then.
fn typecheck_node(&mut self, errors: &mut Vec<AnalysisError>, node: Node) {
#[allow(unused_variables)]
match self.nodes[node].clone() {
Tag::FunctionProto { .. } => {}
Tag::FunctionDecl { proto, body } => {
let Tag::FunctionProto { return_type, .. } = self.nodes[proto] else {
unreachable!()
};
let body_t = self.type_of_node(body);
let ret_t = self.type_of_node(return_type);
if let Some(peer_t) = body_t.equal_type(&ret_t) {
if body_t == Type::comptime_number() {
let Tag::Block { trailing_expr, .. } = self.nodes[body] else {
unreachable!()
};
if let Some(expr) = trailing_expr {
let ty = self.nodes.push_tag(Tag::PrimitiveType(
peer_t
.as_primitive_type()
.expect("comptime cannot be cast into a non-primitive type"),
));
let expr = self.nodes.push_tag(Tag::ExplicitCast {
lhs: expr,
typename: ty,
});
let Tag::Block { trailing_expr, .. } = &mut self.nodes[body] else {
unreachable!()
};
*trailing_expr = Some(expr)
}
}
} else {
errors.push(AnalysisError::new(
AnalysisErrorTag::MismatchingTypesFunctionReturn,
));
}
}
Tag::Constant { bytes, ty } => {
let bits = self.strings.count_bits(bytes);
if bits < ty.bit_width() as u32 {
errors.push(AnalysisError::new(
AnalysisErrorTag::InsufficientBitsInTypeForConstant(bits, ty.clone()),
));
}
}
Tag::Block {
statements,
trailing_expr,
} => {
for statement in statements {
self.typecheck_node(errors, statement);
}
if let Some(expr) = trailing_expr {
self.typecheck_node(errors, expr);
}
}
Tag::ReturnStmt { expr } => {
if let Some(expr) = expr {
self.typecheck_node(errors, expr);
}
}
Tag::ExprStmt { expr } => {
self.typecheck_node(errors, expr);
}
Tag::VarDecl {
explicit_type,
assignment,
..
} => {
assignment.map(|t| self.typecheck_node(errors, t));
let explicit_t = explicit_type.map(|t| self.type_of_node(t));
let assignment_t = assignment.map(|t| self.type_of_node(t));
match (explicit_t, assignment_t) {
(None, None) => unreachable!(),
(Some(explicit_t), None) => {}
(Some(explicit_t), Some(assignment_t)) => {
// TODO: ensure types match, explicit-cast comptime_number
}
(None, Some(assignment_t)) => {
// TODO: set explicit_type to assignment_t
}
}
}
Tag::GlobalDecl {
name,
explicit_type,
assignment,
} => todo!(),
Tag::DeclRef(_) => todo!(),
Tag::GlobalRef(_) => todo!(),
Tag::CallExpr { lhs, rhs } => todo!(),
Tag::ArgumentList { parameters } => todo!(),
Tag::Argument { name, expr } => todo!(),
Tag::ExplicitCast { lhs, typename } => todo!(),
Tag::Deref { lhs } => todo!(),
Tag::Ref { lhs } => todo!(),
Tag::Not { lhs } => todo!(),
Tag::Negate { lhs } => todo!(),
Tag::Or { lhs, rhs } => todo!(),
Tag::And { lhs, rhs } => todo!(),
Tag::BitOr { lhs, rhs } => todo!(),
Tag::BitAnd { lhs, rhs } => todo!(),
Tag::BitXOr { lhs, rhs } => todo!(),
Tag::Eq { lhs, rhs } => todo!(),
Tag::NEq { lhs, rhs } => todo!(),
Tag::Lt { lhs, rhs } => todo!(),
Tag::Gt { lhs, rhs } => todo!(),
Tag::Le { lhs, rhs } => todo!(),
Tag::Ge { lhs, rhs } => todo!(),
Tag::Shl { lhs, rhs } => todo!(),
Tag::Shr { lhs, rhs } => todo!(),
Tag::Add { lhs, rhs } => todo!(),
Tag::Sub { lhs, rhs } => todo!(),
Tag::Mul { lhs, rhs } => todo!(),
Tag::Rem { lhs, rhs } => todo!(),
Tag::Div { lhs, rhs } => todo!(),
Tag::Assign { lhs, rhs } => todo!(),
_ => {
unreachable!()
}
}
}
}
static PRECEDENCE_MAP: std::sync::LazyLock<HashMap<Token, u32>> = std::sync::LazyLock::new(|| {
HashMap::from([
(Token::PipePipe, 10),
(Token::AmpersandAmpersand, 20),
(Token::Pipe, 30),
(Token::Caret, 40),
(Token::Ampersand, 50),
(Token::BangEqual, 60),
(Token::EqualEqual, 60),
(Token::LessEqual, 70),
(Token::GreaterEqual, 70),
(Token::Less, 70),
(Token::Greater, 70),
(Token::GreaterGreater, 80),
(Token::LessLess, 80),
(Token::Plus, 90),
(Token::Minus, 90),
(Token::Percent, 100),
(Token::Star, 100),
(Token::Slash, 100),
])
});
#[cfg(test)]
mod tests {
use crate::lexer::Tokenizer;
use super::*;
#[test]
fn render_ast() {
let src = "let a: u21 = 3u32;";
let tokens = Tokenizer::new(src.as_bytes()).unwrap();
let mut tree = Tree::new();
tree.parse(tokens.iter()).unwrap();
let mut buf = String::new();
tree.render(&mut buf).unwrap();
println!("{buf}");
}
#[test]
fn render_ast2() {
let src = "
fn main() -> void {
let a: u32 = 0u32;
a == 1u32
}
fn square(x: u32) -> u32 {
x * x
}
";
let tokens = Tokenizer::new(src.as_bytes()).unwrap();
let mut tree = Tree::new();
tree.parse(tokens.iter()).unwrap();
let mut buf = String::new();
tree.render(&mut buf).unwrap();
println!("{buf}");
}
#[test]
fn render_ast3() {
let src = "
fn main() -> void {
let a: u32 = 0u32;
a == global
}
const global: u32 = 42u32;
";
let tokens = Tokenizer::new(src.as_bytes()).unwrap();
let mut tree = Tree::new();
tree.parse(tokens.iter()).unwrap();
let mut buf = String::new();
tree.render(&mut buf).unwrap();
println!("{buf}");
}
#[test]
fn comptime() {
let src = "
fn main() -> void {
let x: u32;
x = 666u32;
let a = x + 3 * 49573 << 4;
}
";
let tokens = Tokenizer::new(src.as_bytes()).unwrap();
let mut tree = Tree::new();
tree.parse(tokens.iter()).unwrap();
let mut buf = String::new();
tree.render(&mut buf).unwrap();
println!("{buf}");
tree.fold_comptime_with_visitor(tree.global_decls.first().cloned().unwrap());
let mut buf = String::new();
tree.render(&mut buf).unwrap();
println!("{buf}");
}
}