can generate naive ast and render

This commit is contained in:
Janis 2024-08-08 19:14:14 +02:00
parent b7105c1235
commit 69e67c882d
5 changed files with 926 additions and 52 deletions

View file

@ -33,8 +33,10 @@
<assignment-statement> ::= <expr> <assignment-op> <expr> ';'
<assignment-op> ::= |= | &= | ^= | /= | *= | %= | <<= | >>= | += | -= | =
<expr> ::= <or-expr>
<expr> ::= <assignment-expr>
<assignment-expr> ::= <or-expr>
| <or-expr> <assignment-op> <assignment-expr>
<or-expr> ::= <and-expr>
| <or-expr> || <and-expr>
<and-expr> ::= <bitor-expr>
@ -57,7 +59,7 @@
| <mul-expr> (* | / | %) <prefix-expr>
<prefix-expr> ::= <prefix-op> <as-expr>
<prefix-op> ::= ! - + & *
<prefix-op> ::= ! - & *
<as-expr> ::= <postfix-expr> as <type-name>
@ -73,6 +75,7 @@
| <literal>
| <ident>
| '(' <expr> ')'
| <block>
<var-decl> ::= (let | var) <ident> (':' <type-name>)? ( = <expr> )?
<global-decl> ::= <var-decl> ';'

View file

@ -2,7 +2,9 @@ use std::num::NonZero;
pub type Node = NonZero<u32>;
#[derive(Debug)]
pub enum Tag {
Undefined,
Root,
FunctionProto {
/// Ident
@ -21,10 +23,7 @@ pub enum Tag {
/// TypeName
ty: Node,
},
TypeName {
/// Ident | PrimitiveType | Pointer
inner: Node,
},
// TypeName: meta-tag of Ident | PrimitiveType | Pointer
Pointer {
/// TypeName
pointee: Node,
@ -34,10 +33,6 @@ pub enum Tag {
},
IntegralType(IntegralType),
PrimitiveType(PrimitiveType),
Decl {
/// FunctionDecl | VarDecl
inner: Node,
},
FunctionDecl {
/// FunctionProto
proto: Node,
@ -57,7 +52,7 @@ pub enum Tag {
},
Block {
/// ReturnStmt | ExprStmt | VarDecl
statements: Node,
statements: Vec<Node>,
trailing_expr: Option<Node>,
},
ReturnStmt {
@ -72,7 +67,7 @@ pub enum Tag {
name: Node,
/// TypeName
explicit_type: Option<Node>,
expr: Option<Node>,
assignment: Option<Node>,
},
CallExpr {
/// Ident | Expr
@ -185,21 +180,39 @@ pub enum Tag {
},
}
#[derive(Debug)]
pub enum LetOrVar {
Let,
Var,
}
#[derive(Debug)]
pub struct IntegralType {
pub signed: bool,
pub bits: u16,
}
impl ToString for IntegralType {
fn to_string(&self) -> String {
format!("{}{}", if self.signed { "i" } else { "u" }, self.bits)
}
}
#[derive(Debug)]
pub enum FloatingType {
Binary32,
Binary64,
}
impl ToString for FloatingType {
fn to_string(&self) -> String {
match self {
FloatingType::Binary32 => "binary32".to_owned(),
FloatingType::Binary64 => "binary64".to_owned(),
}
}
}
impl IntegralType {
pub fn u32() -> IntegralType {
Self {
@ -209,9 +222,21 @@ impl IntegralType {
}
}
#[derive(Debug)]
pub enum PrimitiveType {
FloatingType(FloatingType),
IntegralType(Node),
Bool,
Void,
}
impl ToString for PrimitiveType {
fn to_string(&self) -> String {
match self {
PrimitiveType::FloatingType(f) => f.to_string(),
PrimitiveType::IntegralType(i) => i.to_string(),
PrimitiveType::Bool => "bool".to_owned(),
PrimitiveType::Void => "void".to_owned(),
}
}
}

View file

@ -124,6 +124,7 @@ impl<'a> TokenIterator<'a> {
pub fn eat_token(&mut self, token: Token) -> Option<TokenItem<'a>> {
self.next_if(|item| item.token() == token)
}
pub fn peek_token(&mut self) -> Option<TokenItem<'a>> {
self.clone().next()
}
@ -142,8 +143,15 @@ impl<'a> TokenIterator<'a> {
pub fn is_next_token(&mut self, token: Token) -> bool {
self.clone().next_if(|item| item.token() == token).is_some()
}
pub fn is_next_token2(&mut self, token: Token) -> bool {
self.clone()
.skip(1)
.next_if(|item| item.token() == token)
.is_some()
}
}
#[derive(Debug)]
pub struct TokenItem<'a> {
tokenizer: &'a Tokenizer<'a>,
inner: TokenPos,
@ -183,7 +191,7 @@ impl<'a> Iterator for TokenIterator<'a> {
type Item = TokenItem<'a>;
fn next(&mut self) -> Option<Self::Item> {
if self.tokenizer.tokens.len() >= self.offset {
if self.offset >= self.tokenizer.tokens.len() {
None
} else {
let index = self.offset;
@ -275,16 +283,47 @@ impl<'a> Tokenizer<'a> {
}
}
pub fn new(bytes: &'a [u8]) -> Result<Tokenizer<'a>> {
let mut this = Self {
source: Chars { bytes, offset: 0 },
tokens: Vec::new(),
};
loop {
if this.source.is_eof() {
break;
}
this.next_token().map_err(|e| {
eprintln!("error while tokenizing: {e}");
eprintln!(
"at position {}: {}",
this.source.offset(),
&this.source.as_str()[..this.source.as_str().len().min(16)]
);
e
})?;
}
Ok(this)
}
fn push_token(&mut self, token: Token, start: u32, end: u32) -> Result<()> {
self.tokens.push(TokenPos::new(token, start, end));
Ok(())
}
pub fn next_token(&mut self) -> Result<()> {
fn next_token(&mut self) -> Result<()> {
self.source
.take_while_ref(|&c| crate::common::is_whitespace(c))
.count();
if self.source.is_eof() {
return Ok(());
}
let start = self.source.position();
let token = self.source.try_parse_result(|source| {
@ -303,9 +342,9 @@ impl<'a> Tokenizer<'a> {
}
Ok(None)
});
})?;
if let Some(token) = token? {
if let Some(token) = token {
return self.push_token(token, start, self.source.position());
}
@ -480,7 +519,7 @@ impl Radix {
'A'..='F' => c as u8 - b'A',
_ => unreachable!(),
};
acc + digit as u64 * 16
acc * 16 + digit as u64
}
fold
}
@ -490,7 +529,7 @@ impl Radix {
'0'..='1' => c as u8 - b'0',
_ => unreachable!(),
};
acc + digit as u64 * 2
acc * 2 + digit as u64
}
fold
}
@ -500,7 +539,7 @@ impl Radix {
'0'..='9' => c as u8 - b'0',
_ => unreachable!(),
};
acc + digit as u64 * 10
acc * 10 + digit as u64
}
fold
}
@ -510,7 +549,7 @@ impl Radix {
'0'..='7' => c as u8 - b'0',
_ => unreachable!(),
};
acc + digit as u64 * 8
acc * 8 + digit as u64
}
fold
}
@ -600,7 +639,11 @@ fn parse_constant(source: &mut Chars) -> Result<Token> {
// if zero: `_`* DIGIT (DIGIT|`_`)*
// else: DIGIT (DIGIT|`_`)*
let _digits = parse_digit_part(source, false, Radix::Dec)?;
_ = match parse_digit_part(source, zero, Radix::Dec) {
Ok(_) => Ok(()),
Err(Error::NumericalConstantDigitNoDigit) if zero => Ok(()),
Err(e) => Err(e),
}?;
if let Ok(_) = source.try_parse_result(|source| try_parse_integral_type(source)) {
return Ok(Token::IntegerConstant);

View file

@ -22,17 +22,68 @@ pub enum Error {
pub type Result<T> = core::result::Result<T, Error>;
#[derive(Debug)]
pub struct Tree {
nodes: Vec<Tag>,
global_decls: Vec<Node>,
}
fn write_indented_inner<W: core::fmt::Write>(
dst: &mut W,
indent: u32,
nl: bool,
args: core::fmt::Arguments,
) -> std::result::Result<(), std::fmt::Error> {
for _ in 0..indent {
dst.write_char(' ')?;
}
dst.write_fmt(args)?;
if nl {
dst.write_char('\n')?;
}
Ok(())
}
macro_rules! write_indented {
($indent:expr, $w:expr, $($arg:tt)*) => {
write_indented_inner($w, $indent, false, format_args!($($arg)*))
};
}
macro_rules! writeln_indented {
($indent:expr, $w:expr, $($arg:tt)*) => {
write_indented_inner($w, $indent, true, format_args!($($arg)*))
};
}
impl Tree {
pub fn new() -> Tree {
Self {
nodes: vec![Tag::Root],
global_decls: Vec::new(),
}
}
fn reserve_node(&mut self) -> Node {
let node = Node::new(self.nodes.len() as u32).unwrap();
self.nodes.push(Tag::Undefined);
node
}
fn set_node(&mut self, node: Node, tag: Tag) -> Option<()> {
*self.get_node_mut(node)? = tag;
Some(())
}
fn get_node_mut(&mut self, node: Node) -> Option<&mut Tag> {
self.nodes.get_mut(node.get() as usize)
}
fn get_node(&self, node: Node) -> Option<&Tag> {
self.nodes.get(node.get() as usize)
}
fn push_tag(&mut self, tag: Tag) -> Node {
let node = Node::new(self.nodes.len() as u32).unwrap();
self.nodes.push(tag);
@ -51,7 +102,7 @@ impl Tree {
let bits = iter.fold(0u16, |acc, c| {
let digit = c as u8 - b'0';
acc + digit as u16 * 10
acc * 10 + digit as u16
});
IntegralType { signed, bits }
@ -115,7 +166,7 @@ impl Tree {
let prim = match token.token() {
Token::IntegralType => {
let int = Self::parse_integral_type(token.lexeme());
PrimitiveType::IntegralType(self.push_tag(Tag::IntegralType(int)))
return Ok(self.push_tag(Tag::IntegralType(int)));
}
Token::Void => PrimitiveType::Void,
Token::Bool => PrimitiveType::Bool,
@ -167,18 +218,30 @@ impl Tree {
None
};
let expr = if tokens.eat_token(Token::Equal).is_some() {
Some(self.parse_expr(tokens)?)
let node = self.reserve_node();
let assignment = if tokens.eat_token(Token::Equal).is_some() {
let expr = self.parse_expr(tokens)?;
Some(self.push_tag(Tag::Assign {
lhs: node,
rhs: expr,
}))
} else {
None
};
Ok(self.push_tag(Tag::VarDecl {
let_or_var,
name,
explicit_type,
expr,
}))
self.set_node(
node,
Tag::VarDecl {
let_or_var,
name,
explicit_type,
assignment,
},
)
.unwrap();
Ok(node)
}
pub fn parse_global_decl(&mut self, tokens: &mut TokenIterator) -> Result<Node> {
@ -188,6 +251,8 @@ impl Tree {
Ok(node)
}
/// PARAMETER <-
/// IDENTIFIER : TYPENAME
pub fn parse_parameter(&mut self, tokens: &mut TokenIterator) -> Result<Node> {
let name = self.parse_ident(tokens)?;
tokens.expect_token(Token::Colon)?;
@ -196,28 +261,43 @@ impl Tree {
Ok(self.push_tag(Tag::Parameter { name, ty }))
}
/// PARAMETER_LIST <-
/// PARAMETER
/// PARAMETER_LIST , PARAMETER
pub fn parse_parameter_list(&mut self, tokens: &mut TokenIterator) -> Result<Node> {
let mut parameters = Vec::new();
loop {
// PARAMETER
parameters.push(self.parse_parameter(tokens)?);
if !tokens.eat_token(Token::Comma).is_some() {
// COMMA
if !tokens.is_next_token(Token::Comma) {
break;
}
if !tokens.is_next_token(Token::Ident) {
if !tokens.is_next_token2(Token::Ident) {
break;
}
// skip comma
_ = tokens.next();
}
todo!()
Ok(self.push_tag(Tag::ParameterList { parameters }))
}
/// FUNCTION_PROTO <-
/// fn IDENTIFIER ()
/// fn IDENTIFIER () -> TYPENAME
/// fn IDENTIFIER ( PARAMETER_LIST ,? )
/// fn IDENTIFIER ( PARAMETER_LIST ,? ) -> TYPENAME
pub fn parse_fn_proto(&mut self, tokens: &mut TokenIterator) -> Result<Node> {
tokens.expect_token(Token::Fn)?;
let name = self.parse_ident(tokens)?;
tokens.expect_token(Token::OpenParens)?;
let parameters = if !tokens.is_next_token(Token::CloseParens) {
Some(self.parse_parameter_list(tokens)?)
let parameters = self.parse_parameter_list(tokens)?;
// trailing comma
_ = tokens.eat_token(Token::Comma);
Some(parameters)
} else {
None
};
@ -236,6 +316,8 @@ impl Tree {
}))
}
/// FUNCTION_DECL <-
/// FUNCTION_PROTO BLOCK
pub fn parse_fn_decl(&mut self, tokens: &mut TokenIterator) -> Result<Node> {
let proto = self.parse_fn_proto(tokens)?;
@ -244,22 +326,165 @@ impl Tree {
Ok(self.push_tag(Tag::FunctionDecl { proto, body }))
}
/// BLOCK <-
/// { STATEMENT* EXPRESSION? }
pub fn parse_block(&mut self, tokens: &mut TokenIterator) -> Result<Node> {
todo!()
let mut stmts = Vec::new();
_ = tokens.expect_token(Token::OpenBrace)?;
let node = loop {
if tokens.is_next_token(Token::CloseBrace) {
break self.push_tag(Tag::Block {
statements: stmts,
trailing_expr: None,
});
}
match tokens.peek_token_or_err()?.token() {
Token::Return => {
stmts.push(self.try_parse_return_stmt(tokens)?.unwrap());
}
Token::Var | Token::Let => {
let node = self.parse_var_decl(tokens)?;
tokens.expect_token(Token::Semi)?;
stmts.push(node);
}
_ => {
let node = self.parse_expr(tokens)?;
match tokens.peek_token_or_err()?.token() {
Token::CloseBrace => {
break self.push_tag(Tag::Block {
statements: stmts,
trailing_expr: Some(node),
});
}
Token::Semi => {
_ = tokens.next();
stmts.push(node);
}
_ => {
unreachable!()
}
}
}
}
};
tokens.expect_token(Token::CloseBrace)?;
Ok(node)
}
pub fn parse_assignment(&mut self, tokens: &mut TokenIterator) -> Result<Node> {
todo!()
/// ASSIGNMENT_EXPR <-
/// BINARY_EXPRESSION
/// BINARY_EXPRESSION ASSIGNMENT_OP EXPRESSION
pub fn parse_assignment_expr(&mut self, tokens: &mut TokenIterator) -> Result<Node> {
let lhs = self.parse_binary_expr(tokens, 0)?;
Ok(self.try_parse_assignment(lhs, tokens)?.unwrap_or(lhs))
}
pub fn parse_return_stmt(&mut self, tokens: &mut TokenIterator) -> Result<Node> {
todo!()
/// ASSIGNMENT_EXPR <-
/// BINARY_EXPRESSION ASSIGNMENT_OP EXPRESSION
/// ASSIGNMENT_OP <-
/// = += -= *= /= %= ...
pub fn try_parse_assignment(
&mut self,
lhs: Node,
tokens: &mut TokenIterator,
) -> Result<Option<Node>> {
if tokens
.peek_token()
.map(|itm| itm.token().is_assignment_op())
== Some(true)
{
let op = tokens.next().unwrap();
let rhs = self.parse_expr(tokens)?;
let rhs = match op.token() {
Token::PlusEqual => self.push_tag(Tag::Add { lhs, rhs }),
Token::MinusEqual => self.push_tag(Tag::Sub { lhs, rhs }),
Token::StarEqual => self.push_tag(Tag::Mul { lhs, rhs }),
Token::SlashEqual => self.push_tag(Tag::Sub { lhs, rhs }),
Token::PercentEqual => self.push_tag(Tag::Rem { lhs, rhs }),
Token::PipeEqual => self.push_tag(Tag::BitOr { lhs, rhs }),
Token::CaretEqual => self.push_tag(Tag::BitXOr { lhs, rhs }),
Token::AmpersandEqual => self.push_tag(Tag::BitAnd { lhs, rhs }),
Token::LessLessEqual => self.push_tag(Tag::Shl { lhs, rhs }),
Token::GreaterGreaterEqual => self.push_tag(Tag::Shr { lhs, rhs }),
Token::Equal => rhs,
_ => {
unreachable!()
}
};
Ok(Some(self.push_tag(Tag::Assign { lhs, rhs })))
} else {
Ok(None)
}
}
/// RETURN_STATEMENT <-
/// return EXPRESSION? ;
pub fn try_parse_return_stmt(&mut self, tokens: &mut TokenIterator) -> Result<Option<Node>> {
if tokens.eat_token(Token::Return).is_some() {
let expr = if !tokens.is_next_token(Token::Semi) {
let expr = Some(self.parse_expr(tokens)?);
expr
} else {
None
};
tokens.expect_token(Token::Semi)?;
Ok(Some(self.push_tag(Tag::ReturnStmt { expr })))
} else {
Ok(None)
}
}
/// STATEMENT <-
/// RETURN_EXPRESSION
/// VAR_DECL ;
/// EXPRESSION ;
pub fn parse_statement(&mut self, tokens: &mut TokenIterator) -> Result<Node> {
todo!()
match tokens.peek_token_or_err()?.token() {
Token::Return => Ok(self.try_parse_return_stmt(tokens)?.unwrap()),
Token::Var | Token::Let => {
let node = self.parse_var_decl(tokens)?;
tokens.expect_token(Token::Semi)?;
Ok(node)
}
_ => {
let node = self.parse_expr(tokens)?;
tokens.expect_token(Token::Semi)?;
Ok(node)
}
}
}
/// BINARY_EXPR <-
/// PREFIX_EXPR
/// PREFIX_EXPR * EXPRESSION
/// PREFIX_EXPR / EXPRESSION
/// PREFIX_EXPR % EXPRESSION
/// PREFIX_EXPR + EXPRESSION
/// PREFIX_EXPR - EXPRESSION
/// PREFIX_EXPR << EXPRESSION
/// PREFIX_EXPR >> EXPRESSION
/// PREFIX_EXPR < EXPRESSION
/// PREFIX_EXPR > EXPRESSION
/// PREFIX_EXPR <= EXPRESSION
/// PREFIX_EXPR >= EXPRESSION
/// PREFIX_EXPR == EXPRESSION
/// PREFIX_EXPR != EXPRESSION
/// PREFIX_EXPR & EXPRESSION
/// PREFIX_EXPR ^ EXPRESSION
/// PREFIX_EXPR | EXPRESSION
/// PREFIX_EXPR && EXPRESSION
/// PREFIX_EXPR || EXPRESSION
pub fn parse_binary_expr(
&mut self,
tokens: &mut TokenIterator,
@ -312,6 +537,12 @@ impl Tree {
Ok(node)
}
/// PREFIX_EXPR <-
/// AS_EXPR
/// ! AS_EXPR
/// - AS_EXPR
/// & AS_EXPR
/// * AS_EXPR
pub fn parse_prefix_expr(&mut self, tokens: &mut TokenIterator) -> Result<Node> {
match tokens.peek_token_or_err()?.token() {
Token::Bang => {
@ -324,10 +555,6 @@ impl Tree {
let lhs = self.parse_as_expr(tokens)?;
Ok(self.push_tag(Tag::Negate { lhs }))
}
Token::Plus => {
_ = tokens.next();
self.parse_as_expr(tokens)
}
Token::Ampersand => {
_ = tokens.next();
let lhs = self.parse_as_expr(tokens)?;
@ -342,6 +569,9 @@ impl Tree {
}
}
/// AS_EXPR <-
/// PRIMARY_EXPR
/// PRIMARY_EXPR as TYPENAME
pub fn parse_as_expr(&mut self, tokens: &mut TokenIterator) -> Result<Node> {
let expr = self.parse_primary_expr(tokens)?;
@ -361,6 +591,12 @@ impl Tree {
self.parse_primary_expr(tokens)
}
/// PRIMARY_EXPR <-
/// IDENTIFIER
/// INTEGER_CONSTANT
/// FLOATING_CONSTANT
/// ( EXPRESSION )
/// { STATEMENT* EXPRESSION? }
pub fn parse_primary_expr(&mut self, tokens: &mut TokenIterator) -> Result<Node> {
let token = tokens.peek_token_or_err()?;
match token.token() {
@ -369,6 +605,7 @@ impl Tree {
| Token::IntegerHexConstant
| Token::IntegerOctConstant
| Token::IntegerConstant => {
_ = tokens.next();
let (bits, ty) = Self::parse_integral_constant(token.token(), token.lexeme());
Ok(self.push_tag(Tag::IntegralConstant { bits, ty }))
}
@ -376,6 +613,7 @@ impl Tree {
| Token::FloatingExpConstant
| Token::DotFloatingConstant
| Token::DotFloatingExpConstant => {
_ = tokens.next();
let (bits, ty) = Self::parse_floating_constant(token.token(), token.lexeme());
Ok(self.push_tag(Tag::FloatingConstant { bits, ty }))
@ -386,19 +624,485 @@ impl Tree {
tokens.expect_token(Token::CloseParens)?;
Ok(node)
}
Token::OpenBrace => {
let node = self.parse_block(tokens)?;
Ok(node)
}
_ => unreachable!(),
}
}
/// EXPRESSION <-
/// ASSIGNMENT_EXPR
pub fn parse_expr(&mut self, tokens: &mut TokenIterator) -> Result<Node> {
todo!()
self.parse_assignment_expr(tokens)
}
pub fn parse_program(&mut self, tokens: &mut TokenIterator) -> Result<Vec<Node>> {
todo!()
/// PROGRAM <-
/// (FUNCTION_DECL | GLOBAL_DECL)*
pub fn parse_program(&mut self, tokens: &mut TokenIterator) -> Result<()> {
while tokens.peek_token().is_some() {
let Some(token) = tokens.peek_token().map(|itm| itm.token()) else {
break;
};
let decl = match token {
Token::Var | Token::Let => self.parse_global_decl(tokens)?,
Token::Fn => self.parse_fn_decl(tokens)?,
_ => {
eprintln!("unexpected token: {}", token);
panic!("unexpected token at global scope");
}
};
self.global_decls.push(decl);
}
Ok(())
}
pub fn parse(&mut self, tokens: TokenIterator) {}
pub fn parse(&mut self, mut tokens: TokenIterator) -> Result<()> {
self.parse_program(&mut tokens)
}
fn get_ident_str(&self, node: Node) -> Option<&str> {
match &self.nodes[node.get() as usize] {
Tag::Ident { name } => Some(name.as_str()),
_ => None,
}
}
fn get_typename_str(&self, node: Node) -> Option<String> {
match self.get_node(node)? {
Tag::IntegralType(i) => Some(i.to_string()),
Tag::Ident { name } => Some(name.clone()),
Tag::Pointer { pointee } => self.get_typename_str(*pointee),
Tag::PrimitiveType(prim) => Some(prim.to_string()),
_ => None,
}
}
fn render_node<W: core::fmt::Write>(
&self,
writer: &mut W,
node: Node,
indent: u32,
) -> core::fmt::Result {
match &self.nodes[node.get() as usize] {
Tag::FunctionProto {
name,
parameters,
return_type,
} => {
self.render_node(writer, *name, indent)?;
self.render_node(writer, *return_type, indent)?;
if let Some(parameters) = parameters {
self.render_node(writer, *parameters, indent)?;
}
write_indented!(indent, writer, "%{} = function_proto: {{", node.get())?;
write!(writer, "name: \"{}\"", self.get_ident_str(*name).unwrap())?;
if let Some(parameters) = parameters {
write!(writer, ", parameters: %{}", parameters.get())?;
}
write!(writer, ", return_type: %{}", return_type.get())?;
writeln!(writer, "}}")
}
Tag::ParameterList { parameters } => {
writeln_indented!(indent, writer, "%{} = ParameterList = [", node.get())?;
for param in parameters {
self.render_node(writer, *param, indent + 1)?;
}
writeln_indented!(indent, writer, "]")
}
Tag::Parameter { name, ty } => {
writeln_indented!(
indent,
writer,
"%{} = {}: {},",
node.get(),
self.get_ident_str(*name).unwrap(),
self.get_typename_str(*ty).unwrap()
)
}
Tag::Pointer { .. } | Tag::IntegralType(_) | Tag::PrimitiveType(_) => {
writeln_indented!(
indent,
writer,
"%{} = type({})",
node.get(),
self.get_typename_str(node).unwrap()
)
}
Tag::PointerQualifier { constness } => todo!(),
Tag::FunctionDecl { proto, body } => {
self.render_node(writer, *proto, indent)?;
writeln_indented!(
indent,
writer,
"%{} = function_decl( proto: %{}, body: %{}) {{",
node.get(),
proto.get(),
body.get()
)?;
self.render_node(writer, *body, indent + 1)?;
writeln_indented!(indent, writer, "}}")
}
Tag::Ident { name } => {
writeln_indented!(indent, writer, "%{} = identifier(\"{name}\")", node.get())
}
Tag::IntegralConstant { bits, ty } => {
writeln_indented!(
indent,
writer,
"%{} = {}({})",
node.get(),
ty.to_string(),
bits
)
}
Tag::FloatingConstant { bits, ty } => {
writeln_indented!(
indent,
writer,
"%{} = {}({})",
node.get(),
ty.to_string(),
bits
)
}
Tag::Block {
statements,
trailing_expr,
} => {
writeln_indented!(indent, writer, "%{} = {{", node.get())?;
for stmt in statements {
self.render_node(writer, *stmt, indent + 1)?;
}
if let Some(expr) = trailing_expr {
self.render_node(writer, *expr, indent + 1)?;
writeln_indented!(
indent + 1,
writer,
"break %{} %{};",
node.get(),
expr.get()
)?;
}
writeln_indented!(indent, writer, "}}")
}
Tag::ReturnStmt { expr } => {
if let Some(expr) = expr {
self.render_node(writer, *expr, indent)?;
writeln_indented!(indent, writer, "%{} = return %{};", node.get(), expr.get())
} else {
writeln_indented!(indent, writer, "%{} = return;", node.get())
}
}
Tag::ExprStmt { expr } => self.render_node(writer, *expr, indent),
Tag::VarDecl {
let_or_var,
name,
explicit_type,
assignment,
} => {
self.render_node(writer, *name, indent)?;
explicit_type.map(|ty| self.render_node(writer, ty, indent));
write_indented!(
indent,
writer,
"%{} = decl_{}(name: \"{}\"",
node.get(),
match let_or_var {
LetOrVar::Let => {
"const"
}
LetOrVar::Var => {
"mut"
}
},
self.get_ident_str(*name).unwrap()
)?;
if let Some(ty) = explicit_type {
write!(writer, ", ty: {}", self.get_typename_str(*ty).unwrap())?;
}
writeln!(writer, ");")?;
if let Some(assignment) = assignment {
self.render_node(writer, *assignment, indent)?;
}
Ok(())
}
Tag::CallExpr { lhs, rhs } => todo!(),
Tag::ArgumentList { parameters } => todo!(),
Tag::Argument { name, expr } => todo!(),
Tag::ExplicitCast { lhs, typename } => {
self.render_node(writer, *lhs, indent)?;
writeln_indented!(
indent,
writer,
"%{} = cast<{}>(%{})",
node.get(),
self.get_typename_str(*typename).unwrap(),
lhs.get()
)
}
Tag::Deref { lhs } => {
self.render_node(writer, *lhs, indent)?;
writeln_indented!(indent, writer, "%{} = deref(%{})", node.get(), lhs.get())
}
Tag::Ref { lhs } => {
self.render_node(writer, *lhs, indent)?;
writeln_indented!(
indent,
writer,
"%{} = address_of(%{})",
node.get(),
lhs.get()
)
}
Tag::Not { lhs } => {
self.render_node(writer, *lhs, indent)?;
writeln_indented!(indent, writer, "%{} = ", node.get(),)
}
Tag::Negate { lhs } => {
self.render_node(writer, *lhs, indent)?;
writeln_indented!(indent, writer, "%{} = not(%{})", node.get(), lhs.get())
}
Tag::Or { lhs, rhs } => {
self.render_node(writer, *lhs, indent)?;
self.render_node(writer, *rhs, indent)?;
writeln_indented!(
indent,
writer,
"%{} = %{} || %{}",
node.get(),
lhs.get(),
rhs.get()
)
}
Tag::And { lhs, rhs } => {
self.render_node(writer, *lhs, indent)?;
self.render_node(writer, *rhs, indent)?;
writeln_indented!(
indent,
writer,
"%{} = %{} && %{}",
node.get(),
lhs.get(),
rhs.get()
)
}
Tag::BitOr { lhs, rhs } => {
self.render_node(writer, *lhs, indent)?;
self.render_node(writer, *rhs, indent)?;
writeln_indented!(
indent,
writer,
"%{} = %{} | %{}",
node.get(),
lhs.get(),
rhs.get()
)
}
Tag::BitAnd { lhs, rhs } => {
self.render_node(writer, *lhs, indent)?;
self.render_node(writer, *rhs, indent)?;
writeln_indented!(
indent,
writer,
"%{} = %{} & %{}",
node.get(),
lhs.get(),
rhs.get()
)
}
Tag::BitXOr { lhs, rhs } => {
self.render_node(writer, *lhs, indent)?;
self.render_node(writer, *rhs, indent)?;
writeln_indented!(
indent,
writer,
"%{} = %{} ^ %{}",
node.get(),
lhs.get(),
rhs.get()
)
}
Tag::Eq { lhs, rhs } => {
self.render_node(writer, *lhs, indent)?;
self.render_node(writer, *rhs, indent)?;
writeln_indented!(
indent,
writer,
"%{} = %{} == %{}",
node.get(),
lhs.get(),
rhs.get()
)
}
Tag::NEq { lhs, rhs } => {
self.render_node(writer, *lhs, indent)?;
self.render_node(writer, *rhs, indent)?;
writeln_indented!(
indent,
writer,
"%{} = %{} != %{}",
node.get(),
lhs.get(),
rhs.get()
)
}
Tag::Lt { lhs, rhs } => {
self.render_node(writer, *lhs, indent)?;
self.render_node(writer, *rhs, indent)?;
writeln_indented!(
indent,
writer,
"%{} = %{} < %{}",
node.get(),
lhs.get(),
rhs.get()
)
}
Tag::Gt { lhs, rhs } => {
self.render_node(writer, *lhs, indent)?;
self.render_node(writer, *rhs, indent)?;
writeln_indented!(
indent,
writer,
"%{} = %{} > %{}",
node.get(),
lhs.get(),
rhs.get()
)
}
Tag::Le { lhs, rhs } => {
self.render_node(writer, *lhs, indent)?;
self.render_node(writer, *rhs, indent)?;
writeln_indented!(
indent,
writer,
"%{} = %{} <= %{}",
node.get(),
lhs.get(),
rhs.get()
)
}
Tag::Ge { lhs, rhs } => {
self.render_node(writer, *lhs, indent)?;
self.render_node(writer, *rhs, indent)?;
writeln_indented!(
indent,
writer,
"%{} = %{} >= %{}",
node.get(),
lhs.get(),
rhs.get()
)
}
Tag::Shl { lhs, rhs } => {
self.render_node(writer, *lhs, indent)?;
self.render_node(writer, *rhs, indent)?;
writeln_indented!(
indent,
writer,
"%{} = %{} << %{}",
node.get(),
lhs.get(),
rhs.get()
)
}
Tag::Shr { lhs, rhs } => {
self.render_node(writer, *lhs, indent)?;
self.render_node(writer, *rhs, indent)?;
writeln_indented!(
indent,
writer,
"%{} = %{} >> %{}",
node.get(),
lhs.get(),
rhs.get()
)
}
Tag::Add { lhs, rhs } => {
self.render_node(writer, *lhs, indent)?;
self.render_node(writer, *rhs, indent)?;
writeln_indented!(
indent,
writer,
"%{} = %{} + %{}",
node.get(),
lhs.get(),
rhs.get()
)
}
Tag::Sub { lhs, rhs } => {
self.render_node(writer, *lhs, indent)?;
self.render_node(writer, *rhs, indent)?;
writeln_indented!(
indent,
writer,
"%{} = %{} - %{}",
node.get(),
lhs.get(),
rhs.get()
)
}
Tag::Mul { lhs, rhs } => {
self.render_node(writer, *lhs, indent)?;
self.render_node(writer, *rhs, indent)?;
writeln_indented!(
indent,
writer,
"%{} = %{} * %{}",
node.get(),
lhs.get(),
rhs.get()
)
}
Tag::Div { lhs, rhs } => {
self.render_node(writer, *lhs, indent)?;
self.render_node(writer, *rhs, indent)?;
writeln_indented!(
indent,
writer,
"%{} = %{} / %{}",
node.get(),
lhs.get(),
rhs.get()
)
}
Tag::Rem { lhs, rhs } => {
self.render_node(writer, *lhs, indent)?;
self.render_node(writer, *rhs, indent)?;
writeln_indented!(
indent,
writer,
"%{} = %{} % %{}",
node.get(),
lhs.get(),
rhs.get()
)
}
Tag::Assign { lhs, rhs } => {
self.render_node(writer, *rhs, indent)?;
writeln_indented!(
indent,
writer,
"%{} = store(dst: %{}, val: %{})",
node.get(),
lhs.get(),
rhs.get()
)
}
_ => unreachable!(),
}
}
pub fn render<W: core::fmt::Write>(&self, writer: &mut W) -> core::fmt::Result {
for decl in &self.global_decls {
self.render_node(writer, *decl, 0)?;
}
Ok(())
}
}
static PRECEDENCE_MAP: std::sync::LazyLock<HashMap<Token, u32>> = std::sync::LazyLock::new(|| {
@ -423,3 +1127,43 @@ static PRECEDENCE_MAP: std::sync::LazyLock<HashMap<Token, u32>> = std::sync::Laz
(Token::Slash, 100),
])
});
#[cfg(test)]
mod tests {
use crate::lexer::Tokenizer;
use super::*;
#[test]
fn render_ast() {
let src = "let a: u21 = 3;";
let tokens = Tokenizer::new(src.as_bytes()).unwrap();
let mut tree = Tree::new();
tree.parse(tokens.iter()).unwrap();
let mut buf = String::new();
tree.render(&mut buf).unwrap();
println!("{buf}");
}
#[test]
fn render_ast2() {
let src = "
fn main() -> void {
let a: u32 = 0;
a == 1
}
fn square(x: u32) -> u32 {
x * x
}
";
let tokens = Tokenizer::new(src.as_bytes()).unwrap();
let mut tree = Tree::new();
tree.parse(tokens.iter()).unwrap();
let mut buf = String::new();
tree.render(&mut buf).unwrap();
println!("{buf}");
}
}

View file

@ -139,6 +139,54 @@ tokens!(pub Token: {
GreaterGreaterEqual => ">>="
});
impl Token {
pub fn is_assignment_op(self) -> bool {
match self {
Token::PlusEqual
| Token::MinusEqual
| Token::StarEqual
| Token::SlashEqual
| Token::PercentEqual
| Token::PipeEqual
| Token::CaretEqual
| Token::AmpersandEqual
| Token::LessLessEqual
| Token::GreaterGreaterEqual
| Token::Equal => true,
_ => false,
}
}
pub fn is_unary_op(self) -> bool {
match self {
Token::Plus | Token::Minus | Token::Star | Token::Ampersand | Token::Bang => true,
_ => false,
}
}
pub fn is_binary_op(self) -> bool {
match self {
Token::Star
| Token::Slash
| Token::Percent
| Token::Pipe
| Token::Ampersand
| Token::Caret
| Token::Plus
| Token::Minus
| Token::PipePipe
| Token::AmpersandAmpersand
| Token::BangEqual
| Token::EqualEqual
| Token::Less
| Token::Greater
| Token::LessEqual
| Token::GreaterEqual
| Token::LessLess
| Token::GreaterGreater => true,
_ => false,
}
}
}
/// Helper type for parsing tokens that have a defined lexeme, such as `fn`,
/// `f32`, `const`, etc. Tokens with variable lexemes, such as primitive
/// integral types, constants or identifiers are not parsed by this.
@ -177,11 +225,19 @@ impl LexemeParser {
break;
}
this.advance(ch)?;
match this.advance(ch)? {
None => {}
Some(token) => {
return Some(token);
}
}
}
this.finish()
}
/// Accepts a `char` and returns `Some(None)` until it is done trying to parse the longest lexeme.
/// If no more potential matches are available, returns the longest matched token as `Some(Token)`, or `None` on failure.
/// accepts a char and returns `None` until it is done trying to parse the longest `Token`.
/// when finished, returns a Token, if it parsed one, or `Some(None)`.
pub fn advance(&mut self, ch: char) -> Option<Option<Token>> {
@ -215,10 +271,13 @@ impl LexemeParser {
if self.lexemes.is_empty() {
// return match, if it exists
return Some(self.candidates.pop());
return match self.candidates.pop() {
Some(token) => Some(Some(token)),
None => None,
};
}
return None;
return Some(None);
}
}