diff --git a/grammar.bnf b/grammar.bnf index 4a29053..9d7974d 100644 --- a/grammar.bnf +++ b/grammar.bnf @@ -33,8 +33,10 @@ ::= ';' ::= |= | &= | ^= | /= | *= | %= | <<= | >>= | += | -= | = - ::= + ::= + ::= + | ::= | || ::= @@ -57,7 +59,7 @@ | (* | / | %) ::= - ::= ! - + & * + ::= ! - & * ::= as @@ -73,6 +75,7 @@ | | | '(' ')' + | ::= (let | var) (':' )? ( = )? ::= ';' diff --git a/src/ast.rs b/src/ast.rs index 21838e1..5d424c9 100644 --- a/src/ast.rs +++ b/src/ast.rs @@ -2,7 +2,9 @@ use std::num::NonZero; pub type Node = NonZero; +#[derive(Debug)] pub enum Tag { + Undefined, Root, FunctionProto { /// Ident @@ -21,10 +23,7 @@ pub enum Tag { /// TypeName ty: Node, }, - TypeName { - /// Ident | PrimitiveType | Pointer - inner: Node, - }, + // TypeName: meta-tag of Ident | PrimitiveType | Pointer Pointer { /// TypeName pointee: Node, @@ -34,10 +33,6 @@ pub enum Tag { }, IntegralType(IntegralType), PrimitiveType(PrimitiveType), - Decl { - /// FunctionDecl | VarDecl - inner: Node, - }, FunctionDecl { /// FunctionProto proto: Node, @@ -57,7 +52,7 @@ pub enum Tag { }, Block { /// ReturnStmt | ExprStmt | VarDecl - statements: Node, + statements: Vec, trailing_expr: Option, }, ReturnStmt { @@ -72,7 +67,7 @@ pub enum Tag { name: Node, /// TypeName explicit_type: Option, - expr: Option, + assignment: Option, }, CallExpr { /// Ident | Expr @@ -185,21 +180,39 @@ pub enum Tag { }, } +#[derive(Debug)] pub enum LetOrVar { Let, Var, } +#[derive(Debug)] pub struct IntegralType { pub signed: bool, pub bits: u16, } +impl ToString for IntegralType { + fn to_string(&self) -> String { + format!("{}{}", if self.signed { "i" } else { "u" }, self.bits) + } +} + +#[derive(Debug)] pub enum FloatingType { Binary32, Binary64, } +impl ToString for FloatingType { + fn to_string(&self) -> String { + match self { + FloatingType::Binary32 => "binary32".to_owned(), + FloatingType::Binary64 => "binary64".to_owned(), + } + } +} + impl IntegralType { pub fn u32() -> IntegralType { Self { @@ -209,9 +222,21 @@ impl IntegralType { } } +#[derive(Debug)] pub enum PrimitiveType { FloatingType(FloatingType), IntegralType(Node), Bool, Void, } + +impl ToString for PrimitiveType { + fn to_string(&self) -> String { + match self { + PrimitiveType::FloatingType(f) => f.to_string(), + PrimitiveType::IntegralType(i) => i.to_string(), + PrimitiveType::Bool => "bool".to_owned(), + PrimitiveType::Void => "void".to_owned(), + } + } +} diff --git a/src/lexer.rs b/src/lexer.rs index a3053e3..46965b3 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -124,6 +124,7 @@ impl<'a> TokenIterator<'a> { pub fn eat_token(&mut self, token: Token) -> Option> { self.next_if(|item| item.token() == token) } + pub fn peek_token(&mut self) -> Option> { self.clone().next() } @@ -142,8 +143,15 @@ impl<'a> TokenIterator<'a> { pub fn is_next_token(&mut self, token: Token) -> bool { self.clone().next_if(|item| item.token() == token).is_some() } + pub fn is_next_token2(&mut self, token: Token) -> bool { + self.clone() + .skip(1) + .next_if(|item| item.token() == token) + .is_some() + } } +#[derive(Debug)] pub struct TokenItem<'a> { tokenizer: &'a Tokenizer<'a>, inner: TokenPos, @@ -183,7 +191,7 @@ impl<'a> Iterator for TokenIterator<'a> { type Item = TokenItem<'a>; fn next(&mut self) -> Option { - if self.tokenizer.tokens.len() >= self.offset { + if self.offset >= self.tokenizer.tokens.len() { None } else { let index = self.offset; @@ -275,16 +283,47 @@ impl<'a> Tokenizer<'a> { } } + pub fn new(bytes: &'a [u8]) -> Result> { + let mut this = Self { + source: Chars { bytes, offset: 0 }, + tokens: Vec::new(), + }; + + loop { + if this.source.is_eof() { + break; + } + + this.next_token().map_err(|e| { + eprintln!("error while tokenizing: {e}"); + eprintln!( + "at position {}: {}", + this.source.offset(), + &this.source.as_str()[..this.source.as_str().len().min(16)] + ); + + e + })?; + } + + Ok(this) + } + fn push_token(&mut self, token: Token, start: u32, end: u32) -> Result<()> { self.tokens.push(TokenPos::new(token, start, end)); Ok(()) } - pub fn next_token(&mut self) -> Result<()> { + fn next_token(&mut self) -> Result<()> { self.source .take_while_ref(|&c| crate::common::is_whitespace(c)) .count(); + + if self.source.is_eof() { + return Ok(()); + } + let start = self.source.position(); let token = self.source.try_parse_result(|source| { @@ -303,9 +342,9 @@ impl<'a> Tokenizer<'a> { } Ok(None) - }); + })?; - if let Some(token) = token? { + if let Some(token) = token { return self.push_token(token, start, self.source.position()); } @@ -480,7 +519,7 @@ impl Radix { 'A'..='F' => c as u8 - b'A', _ => unreachable!(), }; - acc + digit as u64 * 16 + acc * 16 + digit as u64 } fold } @@ -490,7 +529,7 @@ impl Radix { '0'..='1' => c as u8 - b'0', _ => unreachable!(), }; - acc + digit as u64 * 2 + acc * 2 + digit as u64 } fold } @@ -500,7 +539,7 @@ impl Radix { '0'..='9' => c as u8 - b'0', _ => unreachable!(), }; - acc + digit as u64 * 10 + acc * 10 + digit as u64 } fold } @@ -510,7 +549,7 @@ impl Radix { '0'..='7' => c as u8 - b'0', _ => unreachable!(), }; - acc + digit as u64 * 8 + acc * 8 + digit as u64 } fold } @@ -600,7 +639,11 @@ fn parse_constant(source: &mut Chars) -> Result { // if zero: `_`* DIGIT (DIGIT|`_`)* // else: DIGIT (DIGIT|`_`)* - let _digits = parse_digit_part(source, false, Radix::Dec)?; + _ = match parse_digit_part(source, zero, Radix::Dec) { + Ok(_) => Ok(()), + Err(Error::NumericalConstantDigitNoDigit) if zero => Ok(()), + Err(e) => Err(e), + }?; if let Ok(_) = source.try_parse_result(|source| try_parse_integral_type(source)) { return Ok(Token::IntegerConstant); diff --git a/src/parser.rs b/src/parser.rs index 2b266f7..2773a2c 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -22,17 +22,68 @@ pub enum Error { pub type Result = core::result::Result; +#[derive(Debug)] pub struct Tree { nodes: Vec, + global_decls: Vec, +} + +fn write_indented_inner( + dst: &mut W, + indent: u32, + nl: bool, + args: core::fmt::Arguments, +) -> std::result::Result<(), std::fmt::Error> { + for _ in 0..indent { + dst.write_char(' ')?; + } + dst.write_fmt(args)?; + if nl { + dst.write_char('\n')?; + } + Ok(()) +} + +macro_rules! write_indented { + ($indent:expr, $w:expr, $($arg:tt)*) => { + write_indented_inner($w, $indent, false, format_args!($($arg)*)) + }; +} + +macro_rules! writeln_indented { + ($indent:expr, $w:expr, $($arg:tt)*) => { + write_indented_inner($w, $indent, true, format_args!($($arg)*)) + }; } impl Tree { pub fn new() -> Tree { Self { nodes: vec![Tag::Root], + global_decls: Vec::new(), } } + fn reserve_node(&mut self) -> Node { + let node = Node::new(self.nodes.len() as u32).unwrap(); + self.nodes.push(Tag::Undefined); + node + } + + fn set_node(&mut self, node: Node, tag: Tag) -> Option<()> { + *self.get_node_mut(node)? = tag; + + Some(()) + } + + fn get_node_mut(&mut self, node: Node) -> Option<&mut Tag> { + self.nodes.get_mut(node.get() as usize) + } + + fn get_node(&self, node: Node) -> Option<&Tag> { + self.nodes.get(node.get() as usize) + } + fn push_tag(&mut self, tag: Tag) -> Node { let node = Node::new(self.nodes.len() as u32).unwrap(); self.nodes.push(tag); @@ -51,7 +102,7 @@ impl Tree { let bits = iter.fold(0u16, |acc, c| { let digit = c as u8 - b'0'; - acc + digit as u16 * 10 + acc * 10 + digit as u16 }); IntegralType { signed, bits } @@ -115,7 +166,7 @@ impl Tree { let prim = match token.token() { Token::IntegralType => { let int = Self::parse_integral_type(token.lexeme()); - PrimitiveType::IntegralType(self.push_tag(Tag::IntegralType(int))) + return Ok(self.push_tag(Tag::IntegralType(int))); } Token::Void => PrimitiveType::Void, Token::Bool => PrimitiveType::Bool, @@ -167,18 +218,30 @@ impl Tree { None }; - let expr = if tokens.eat_token(Token::Equal).is_some() { - Some(self.parse_expr(tokens)?) + let node = self.reserve_node(); + + let assignment = if tokens.eat_token(Token::Equal).is_some() { + let expr = self.parse_expr(tokens)?; + Some(self.push_tag(Tag::Assign { + lhs: node, + rhs: expr, + })) } else { None }; - Ok(self.push_tag(Tag::VarDecl { - let_or_var, - name, - explicit_type, - expr, - })) + self.set_node( + node, + Tag::VarDecl { + let_or_var, + name, + explicit_type, + assignment, + }, + ) + .unwrap(); + + Ok(node) } pub fn parse_global_decl(&mut self, tokens: &mut TokenIterator) -> Result { @@ -188,6 +251,8 @@ impl Tree { Ok(node) } + /// PARAMETER <- + /// IDENTIFIER : TYPENAME pub fn parse_parameter(&mut self, tokens: &mut TokenIterator) -> Result { let name = self.parse_ident(tokens)?; tokens.expect_token(Token::Colon)?; @@ -196,28 +261,43 @@ impl Tree { Ok(self.push_tag(Tag::Parameter { name, ty })) } + /// PARAMETER_LIST <- + /// PARAMETER + /// PARAMETER_LIST , PARAMETER pub fn parse_parameter_list(&mut self, tokens: &mut TokenIterator) -> Result { let mut parameters = Vec::new(); loop { + // PARAMETER parameters.push(self.parse_parameter(tokens)?); - if !tokens.eat_token(Token::Comma).is_some() { + // COMMA + if !tokens.is_next_token(Token::Comma) { break; } - if !tokens.is_next_token(Token::Ident) { + if !tokens.is_next_token2(Token::Ident) { break; } + // skip comma + _ = tokens.next(); } - todo!() + Ok(self.push_tag(Tag::ParameterList { parameters })) } + /// FUNCTION_PROTO <- + /// fn IDENTIFIER () + /// fn IDENTIFIER () -> TYPENAME + /// fn IDENTIFIER ( PARAMETER_LIST ,? ) + /// fn IDENTIFIER ( PARAMETER_LIST ,? ) -> TYPENAME pub fn parse_fn_proto(&mut self, tokens: &mut TokenIterator) -> Result { tokens.expect_token(Token::Fn)?; let name = self.parse_ident(tokens)?; tokens.expect_token(Token::OpenParens)?; let parameters = if !tokens.is_next_token(Token::CloseParens) { - Some(self.parse_parameter_list(tokens)?) + let parameters = self.parse_parameter_list(tokens)?; + // trailing comma + _ = tokens.eat_token(Token::Comma); + Some(parameters) } else { None }; @@ -236,6 +316,8 @@ impl Tree { })) } + /// FUNCTION_DECL <- + /// FUNCTION_PROTO BLOCK pub fn parse_fn_decl(&mut self, tokens: &mut TokenIterator) -> Result { let proto = self.parse_fn_proto(tokens)?; @@ -244,22 +326,165 @@ impl Tree { Ok(self.push_tag(Tag::FunctionDecl { proto, body })) } + /// BLOCK <- + /// { STATEMENT* EXPRESSION? } pub fn parse_block(&mut self, tokens: &mut TokenIterator) -> Result { - todo!() + let mut stmts = Vec::new(); + _ = tokens.expect_token(Token::OpenBrace)?; + + let node = loop { + if tokens.is_next_token(Token::CloseBrace) { + break self.push_tag(Tag::Block { + statements: stmts, + trailing_expr: None, + }); + } + + match tokens.peek_token_or_err()?.token() { + Token::Return => { + stmts.push(self.try_parse_return_stmt(tokens)?.unwrap()); + } + Token::Var | Token::Let => { + let node = self.parse_var_decl(tokens)?; + tokens.expect_token(Token::Semi)?; + + stmts.push(node); + } + _ => { + let node = self.parse_expr(tokens)?; + match tokens.peek_token_or_err()?.token() { + Token::CloseBrace => { + break self.push_tag(Tag::Block { + statements: stmts, + trailing_expr: Some(node), + }); + } + Token::Semi => { + _ = tokens.next(); + stmts.push(node); + } + _ => { + unreachable!() + } + } + } + } + }; + + tokens.expect_token(Token::CloseBrace)?; + + Ok(node) } - pub fn parse_assignment(&mut self, tokens: &mut TokenIterator) -> Result { - todo!() + /// ASSIGNMENT_EXPR <- + /// BINARY_EXPRESSION + /// BINARY_EXPRESSION ASSIGNMENT_OP EXPRESSION + pub fn parse_assignment_expr(&mut self, tokens: &mut TokenIterator) -> Result { + let lhs = self.parse_binary_expr(tokens, 0)?; + + Ok(self.try_parse_assignment(lhs, tokens)?.unwrap_or(lhs)) } - pub fn parse_return_stmt(&mut self, tokens: &mut TokenIterator) -> Result { - todo!() + /// ASSIGNMENT_EXPR <- + /// BINARY_EXPRESSION ASSIGNMENT_OP EXPRESSION + /// ASSIGNMENT_OP <- + /// = += -= *= /= %= ... + pub fn try_parse_assignment( + &mut self, + lhs: Node, + tokens: &mut TokenIterator, + ) -> Result> { + if tokens + .peek_token() + .map(|itm| itm.token().is_assignment_op()) + == Some(true) + { + let op = tokens.next().unwrap(); + let rhs = self.parse_expr(tokens)?; + + let rhs = match op.token() { + Token::PlusEqual => self.push_tag(Tag::Add { lhs, rhs }), + Token::MinusEqual => self.push_tag(Tag::Sub { lhs, rhs }), + Token::StarEqual => self.push_tag(Tag::Mul { lhs, rhs }), + Token::SlashEqual => self.push_tag(Tag::Sub { lhs, rhs }), + Token::PercentEqual => self.push_tag(Tag::Rem { lhs, rhs }), + Token::PipeEqual => self.push_tag(Tag::BitOr { lhs, rhs }), + Token::CaretEqual => self.push_tag(Tag::BitXOr { lhs, rhs }), + Token::AmpersandEqual => self.push_tag(Tag::BitAnd { lhs, rhs }), + Token::LessLessEqual => self.push_tag(Tag::Shl { lhs, rhs }), + Token::GreaterGreaterEqual => self.push_tag(Tag::Shr { lhs, rhs }), + Token::Equal => rhs, + _ => { + unreachable!() + } + }; + Ok(Some(self.push_tag(Tag::Assign { lhs, rhs }))) + } else { + Ok(None) + } } + /// RETURN_STATEMENT <- + /// return EXPRESSION? ; + pub fn try_parse_return_stmt(&mut self, tokens: &mut TokenIterator) -> Result> { + if tokens.eat_token(Token::Return).is_some() { + let expr = if !tokens.is_next_token(Token::Semi) { + let expr = Some(self.parse_expr(tokens)?); + + expr + } else { + None + }; + + tokens.expect_token(Token::Semi)?; + Ok(Some(self.push_tag(Tag::ReturnStmt { expr }))) + } else { + Ok(None) + } + } + + /// STATEMENT <- + /// RETURN_EXPRESSION + /// VAR_DECL ; + /// EXPRESSION ; pub fn parse_statement(&mut self, tokens: &mut TokenIterator) -> Result { - todo!() + match tokens.peek_token_or_err()?.token() { + Token::Return => Ok(self.try_parse_return_stmt(tokens)?.unwrap()), + Token::Var | Token::Let => { + let node = self.parse_var_decl(tokens)?; + tokens.expect_token(Token::Semi)?; + + Ok(node) + } + _ => { + let node = self.parse_expr(tokens)?; + tokens.expect_token(Token::Semi)?; + + Ok(node) + } + } } + /// BINARY_EXPR <- + /// PREFIX_EXPR + /// PREFIX_EXPR * EXPRESSION + /// PREFIX_EXPR / EXPRESSION + /// PREFIX_EXPR % EXPRESSION + /// PREFIX_EXPR + EXPRESSION + /// PREFIX_EXPR - EXPRESSION + /// PREFIX_EXPR << EXPRESSION + /// PREFIX_EXPR >> EXPRESSION + /// PREFIX_EXPR < EXPRESSION + /// PREFIX_EXPR > EXPRESSION + /// PREFIX_EXPR <= EXPRESSION + /// PREFIX_EXPR >= EXPRESSION + /// PREFIX_EXPR == EXPRESSION + /// PREFIX_EXPR != EXPRESSION + /// PREFIX_EXPR & EXPRESSION + /// PREFIX_EXPR ^ EXPRESSION + /// PREFIX_EXPR | EXPRESSION + /// PREFIX_EXPR && EXPRESSION + /// PREFIX_EXPR || EXPRESSION pub fn parse_binary_expr( &mut self, tokens: &mut TokenIterator, @@ -312,6 +537,12 @@ impl Tree { Ok(node) } + /// PREFIX_EXPR <- + /// AS_EXPR + /// ! AS_EXPR + /// - AS_EXPR + /// & AS_EXPR + /// * AS_EXPR pub fn parse_prefix_expr(&mut self, tokens: &mut TokenIterator) -> Result { match tokens.peek_token_or_err()?.token() { Token::Bang => { @@ -324,10 +555,6 @@ impl Tree { let lhs = self.parse_as_expr(tokens)?; Ok(self.push_tag(Tag::Negate { lhs })) } - Token::Plus => { - _ = tokens.next(); - self.parse_as_expr(tokens) - } Token::Ampersand => { _ = tokens.next(); let lhs = self.parse_as_expr(tokens)?; @@ -342,6 +569,9 @@ impl Tree { } } + /// AS_EXPR <- + /// PRIMARY_EXPR + /// PRIMARY_EXPR as TYPENAME pub fn parse_as_expr(&mut self, tokens: &mut TokenIterator) -> Result { let expr = self.parse_primary_expr(tokens)?; @@ -361,6 +591,12 @@ impl Tree { self.parse_primary_expr(tokens) } + /// PRIMARY_EXPR <- + /// IDENTIFIER + /// INTEGER_CONSTANT + /// FLOATING_CONSTANT + /// ( EXPRESSION ) + /// { STATEMENT* EXPRESSION? } pub fn parse_primary_expr(&mut self, tokens: &mut TokenIterator) -> Result { let token = tokens.peek_token_or_err()?; match token.token() { @@ -369,6 +605,7 @@ impl Tree { | Token::IntegerHexConstant | Token::IntegerOctConstant | Token::IntegerConstant => { + _ = tokens.next(); let (bits, ty) = Self::parse_integral_constant(token.token(), token.lexeme()); Ok(self.push_tag(Tag::IntegralConstant { bits, ty })) } @@ -376,6 +613,7 @@ impl Tree { | Token::FloatingExpConstant | Token::DotFloatingConstant | Token::DotFloatingExpConstant => { + _ = tokens.next(); let (bits, ty) = Self::parse_floating_constant(token.token(), token.lexeme()); Ok(self.push_tag(Tag::FloatingConstant { bits, ty })) @@ -386,19 +624,485 @@ impl Tree { tokens.expect_token(Token::CloseParens)?; Ok(node) } + Token::OpenBrace => { + let node = self.parse_block(tokens)?; + Ok(node) + } _ => unreachable!(), } } + /// EXPRESSION <- + /// ASSIGNMENT_EXPR pub fn parse_expr(&mut self, tokens: &mut TokenIterator) -> Result { - todo!() + self.parse_assignment_expr(tokens) } - pub fn parse_program(&mut self, tokens: &mut TokenIterator) -> Result> { - todo!() + /// PROGRAM <- + /// (FUNCTION_DECL | GLOBAL_DECL)* + pub fn parse_program(&mut self, tokens: &mut TokenIterator) -> Result<()> { + while tokens.peek_token().is_some() { + let Some(token) = tokens.peek_token().map(|itm| itm.token()) else { + break; + }; + + let decl = match token { + Token::Var | Token::Let => self.parse_global_decl(tokens)?, + Token::Fn => self.parse_fn_decl(tokens)?, + _ => { + eprintln!("unexpected token: {}", token); + panic!("unexpected token at global scope"); + } + }; + self.global_decls.push(decl); + } + Ok(()) } - pub fn parse(&mut self, tokens: TokenIterator) {} + pub fn parse(&mut self, mut tokens: TokenIterator) -> Result<()> { + self.parse_program(&mut tokens) + } + + fn get_ident_str(&self, node: Node) -> Option<&str> { + match &self.nodes[node.get() as usize] { + Tag::Ident { name } => Some(name.as_str()), + _ => None, + } + } + + fn get_typename_str(&self, node: Node) -> Option { + match self.get_node(node)? { + Tag::IntegralType(i) => Some(i.to_string()), + Tag::Ident { name } => Some(name.clone()), + Tag::Pointer { pointee } => self.get_typename_str(*pointee), + Tag::PrimitiveType(prim) => Some(prim.to_string()), + _ => None, + } + } + + fn render_node( + &self, + writer: &mut W, + node: Node, + indent: u32, + ) -> core::fmt::Result { + match &self.nodes[node.get() as usize] { + Tag::FunctionProto { + name, + parameters, + return_type, + } => { + self.render_node(writer, *name, indent)?; + self.render_node(writer, *return_type, indent)?; + if let Some(parameters) = parameters { + self.render_node(writer, *parameters, indent)?; + } + write_indented!(indent, writer, "%{} = function_proto: {{", node.get())?; + write!(writer, "name: \"{}\"", self.get_ident_str(*name).unwrap())?; + if let Some(parameters) = parameters { + write!(writer, ", parameters: %{}", parameters.get())?; + } + write!(writer, ", return_type: %{}", return_type.get())?; + writeln!(writer, "}}") + } + Tag::ParameterList { parameters } => { + writeln_indented!(indent, writer, "%{} = ParameterList = [", node.get())?; + for param in parameters { + self.render_node(writer, *param, indent + 1)?; + } + writeln_indented!(indent, writer, "]") + } + Tag::Parameter { name, ty } => { + writeln_indented!( + indent, + writer, + "%{} = {}: {},", + node.get(), + self.get_ident_str(*name).unwrap(), + self.get_typename_str(*ty).unwrap() + ) + } + Tag::Pointer { .. } | Tag::IntegralType(_) | Tag::PrimitiveType(_) => { + writeln_indented!( + indent, + writer, + "%{} = type({})", + node.get(), + self.get_typename_str(node).unwrap() + ) + } + Tag::PointerQualifier { constness } => todo!(), + Tag::FunctionDecl { proto, body } => { + self.render_node(writer, *proto, indent)?; + writeln_indented!( + indent, + writer, + "%{} = function_decl( proto: %{}, body: %{}) {{", + node.get(), + proto.get(), + body.get() + )?; + self.render_node(writer, *body, indent + 1)?; + writeln_indented!(indent, writer, "}}") + } + Tag::Ident { name } => { + writeln_indented!(indent, writer, "%{} = identifier(\"{name}\")", node.get()) + } + Tag::IntegralConstant { bits, ty } => { + writeln_indented!( + indent, + writer, + "%{} = {}({})", + node.get(), + ty.to_string(), + bits + ) + } + Tag::FloatingConstant { bits, ty } => { + writeln_indented!( + indent, + writer, + "%{} = {}({})", + node.get(), + ty.to_string(), + bits + ) + } + Tag::Block { + statements, + trailing_expr, + } => { + writeln_indented!(indent, writer, "%{} = {{", node.get())?; + for stmt in statements { + self.render_node(writer, *stmt, indent + 1)?; + } + if let Some(expr) = trailing_expr { + self.render_node(writer, *expr, indent + 1)?; + writeln_indented!( + indent + 1, + writer, + "break %{} %{};", + node.get(), + expr.get() + )?; + } + writeln_indented!(indent, writer, "}}") + } + Tag::ReturnStmt { expr } => { + if let Some(expr) = expr { + self.render_node(writer, *expr, indent)?; + writeln_indented!(indent, writer, "%{} = return %{};", node.get(), expr.get()) + } else { + writeln_indented!(indent, writer, "%{} = return;", node.get()) + } + } + Tag::ExprStmt { expr } => self.render_node(writer, *expr, indent), + Tag::VarDecl { + let_or_var, + name, + explicit_type, + assignment, + } => { + self.render_node(writer, *name, indent)?; + explicit_type.map(|ty| self.render_node(writer, ty, indent)); + write_indented!( + indent, + writer, + "%{} = decl_{}(name: \"{}\"", + node.get(), + match let_or_var { + LetOrVar::Let => { + "const" + } + LetOrVar::Var => { + "mut" + } + }, + self.get_ident_str(*name).unwrap() + )?; + if let Some(ty) = explicit_type { + write!(writer, ", ty: {}", self.get_typename_str(*ty).unwrap())?; + } + writeln!(writer, ");")?; + if let Some(assignment) = assignment { + self.render_node(writer, *assignment, indent)?; + } + Ok(()) + } + Tag::CallExpr { lhs, rhs } => todo!(), + Tag::ArgumentList { parameters } => todo!(), + Tag::Argument { name, expr } => todo!(), + Tag::ExplicitCast { lhs, typename } => { + self.render_node(writer, *lhs, indent)?; + writeln_indented!( + indent, + writer, + "%{} = cast<{}>(%{})", + node.get(), + self.get_typename_str(*typename).unwrap(), + lhs.get() + ) + } + Tag::Deref { lhs } => { + self.render_node(writer, *lhs, indent)?; + writeln_indented!(indent, writer, "%{} = deref(%{})", node.get(), lhs.get()) + } + Tag::Ref { lhs } => { + self.render_node(writer, *lhs, indent)?; + writeln_indented!( + indent, + writer, + "%{} = address_of(%{})", + node.get(), + lhs.get() + ) + } + Tag::Not { lhs } => { + self.render_node(writer, *lhs, indent)?; + writeln_indented!(indent, writer, "%{} = ", node.get(),) + } + Tag::Negate { lhs } => { + self.render_node(writer, *lhs, indent)?; + writeln_indented!(indent, writer, "%{} = not(%{})", node.get(), lhs.get()) + } + Tag::Or { lhs, rhs } => { + self.render_node(writer, *lhs, indent)?; + self.render_node(writer, *rhs, indent)?; + writeln_indented!( + indent, + writer, + "%{} = %{} || %{}", + node.get(), + lhs.get(), + rhs.get() + ) + } + Tag::And { lhs, rhs } => { + self.render_node(writer, *lhs, indent)?; + self.render_node(writer, *rhs, indent)?; + writeln_indented!( + indent, + writer, + "%{} = %{} && %{}", + node.get(), + lhs.get(), + rhs.get() + ) + } + Tag::BitOr { lhs, rhs } => { + self.render_node(writer, *lhs, indent)?; + self.render_node(writer, *rhs, indent)?; + writeln_indented!( + indent, + writer, + "%{} = %{} | %{}", + node.get(), + lhs.get(), + rhs.get() + ) + } + Tag::BitAnd { lhs, rhs } => { + self.render_node(writer, *lhs, indent)?; + self.render_node(writer, *rhs, indent)?; + writeln_indented!( + indent, + writer, + "%{} = %{} & %{}", + node.get(), + lhs.get(), + rhs.get() + ) + } + Tag::BitXOr { lhs, rhs } => { + self.render_node(writer, *lhs, indent)?; + self.render_node(writer, *rhs, indent)?; + writeln_indented!( + indent, + writer, + "%{} = %{} ^ %{}", + node.get(), + lhs.get(), + rhs.get() + ) + } + Tag::Eq { lhs, rhs } => { + self.render_node(writer, *lhs, indent)?; + self.render_node(writer, *rhs, indent)?; + writeln_indented!( + indent, + writer, + "%{} = %{} == %{}", + node.get(), + lhs.get(), + rhs.get() + ) + } + Tag::NEq { lhs, rhs } => { + self.render_node(writer, *lhs, indent)?; + self.render_node(writer, *rhs, indent)?; + writeln_indented!( + indent, + writer, + "%{} = %{} != %{}", + node.get(), + lhs.get(), + rhs.get() + ) + } + Tag::Lt { lhs, rhs } => { + self.render_node(writer, *lhs, indent)?; + self.render_node(writer, *rhs, indent)?; + writeln_indented!( + indent, + writer, + "%{} = %{} < %{}", + node.get(), + lhs.get(), + rhs.get() + ) + } + Tag::Gt { lhs, rhs } => { + self.render_node(writer, *lhs, indent)?; + self.render_node(writer, *rhs, indent)?; + writeln_indented!( + indent, + writer, + "%{} = %{} > %{}", + node.get(), + lhs.get(), + rhs.get() + ) + } + Tag::Le { lhs, rhs } => { + self.render_node(writer, *lhs, indent)?; + self.render_node(writer, *rhs, indent)?; + writeln_indented!( + indent, + writer, + "%{} = %{} <= %{}", + node.get(), + lhs.get(), + rhs.get() + ) + } + Tag::Ge { lhs, rhs } => { + self.render_node(writer, *lhs, indent)?; + self.render_node(writer, *rhs, indent)?; + writeln_indented!( + indent, + writer, + "%{} = %{} >= %{}", + node.get(), + lhs.get(), + rhs.get() + ) + } + Tag::Shl { lhs, rhs } => { + self.render_node(writer, *lhs, indent)?; + self.render_node(writer, *rhs, indent)?; + writeln_indented!( + indent, + writer, + "%{} = %{} << %{}", + node.get(), + lhs.get(), + rhs.get() + ) + } + Tag::Shr { lhs, rhs } => { + self.render_node(writer, *lhs, indent)?; + self.render_node(writer, *rhs, indent)?; + writeln_indented!( + indent, + writer, + "%{} = %{} >> %{}", + node.get(), + lhs.get(), + rhs.get() + ) + } + Tag::Add { lhs, rhs } => { + self.render_node(writer, *lhs, indent)?; + self.render_node(writer, *rhs, indent)?; + writeln_indented!( + indent, + writer, + "%{} = %{} + %{}", + node.get(), + lhs.get(), + rhs.get() + ) + } + Tag::Sub { lhs, rhs } => { + self.render_node(writer, *lhs, indent)?; + self.render_node(writer, *rhs, indent)?; + writeln_indented!( + indent, + writer, + "%{} = %{} - %{}", + node.get(), + lhs.get(), + rhs.get() + ) + } + Tag::Mul { lhs, rhs } => { + self.render_node(writer, *lhs, indent)?; + self.render_node(writer, *rhs, indent)?; + writeln_indented!( + indent, + writer, + "%{} = %{} * %{}", + node.get(), + lhs.get(), + rhs.get() + ) + } + Tag::Div { lhs, rhs } => { + self.render_node(writer, *lhs, indent)?; + self.render_node(writer, *rhs, indent)?; + writeln_indented!( + indent, + writer, + "%{} = %{} / %{}", + node.get(), + lhs.get(), + rhs.get() + ) + } + Tag::Rem { lhs, rhs } => { + self.render_node(writer, *lhs, indent)?; + self.render_node(writer, *rhs, indent)?; + writeln_indented!( + indent, + writer, + "%{} = %{} % %{}", + node.get(), + lhs.get(), + rhs.get() + ) + } + Tag::Assign { lhs, rhs } => { + self.render_node(writer, *rhs, indent)?; + writeln_indented!( + indent, + writer, + "%{} = store(dst: %{}, val: %{})", + node.get(), + lhs.get(), + rhs.get() + ) + } + _ => unreachable!(), + } + } + + pub fn render(&self, writer: &mut W) -> core::fmt::Result { + for decl in &self.global_decls { + self.render_node(writer, *decl, 0)?; + } + + Ok(()) + } } static PRECEDENCE_MAP: std::sync::LazyLock> = std::sync::LazyLock::new(|| { @@ -423,3 +1127,43 @@ static PRECEDENCE_MAP: std::sync::LazyLock> = std::sync::Laz (Token::Slash, 100), ]) }); + +#[cfg(test)] +mod tests { + use crate::lexer::Tokenizer; + + use super::*; + + #[test] + fn render_ast() { + let src = "let a: u21 = 3;"; + let tokens = Tokenizer::new(src.as_bytes()).unwrap(); + + let mut tree = Tree::new(); + tree.parse(tokens.iter()).unwrap(); + + let mut buf = String::new(); + tree.render(&mut buf).unwrap(); + println!("{buf}"); + } + #[test] + fn render_ast2() { + let src = " +fn main() -> void { +let a: u32 = 0; +a == 1 +} +fn square(x: u32) -> u32 { +x * x +} +"; + let tokens = Tokenizer::new(src.as_bytes()).unwrap(); + + let mut tree = Tree::new(); + tree.parse(tokens.iter()).unwrap(); + + let mut buf = String::new(); + tree.render(&mut buf).unwrap(); + println!("{buf}"); + } +} diff --git a/src/tokens.rs b/src/tokens.rs index dad749c..3188943 100644 --- a/src/tokens.rs +++ b/src/tokens.rs @@ -139,6 +139,54 @@ tokens!(pub Token: { GreaterGreaterEqual => ">>=" }); +impl Token { + pub fn is_assignment_op(self) -> bool { + match self { + Token::PlusEqual + | Token::MinusEqual + | Token::StarEqual + | Token::SlashEqual + | Token::PercentEqual + | Token::PipeEqual + | Token::CaretEqual + | Token::AmpersandEqual + | Token::LessLessEqual + | Token::GreaterGreaterEqual + | Token::Equal => true, + _ => false, + } + } + pub fn is_unary_op(self) -> bool { + match self { + Token::Plus | Token::Minus | Token::Star | Token::Ampersand | Token::Bang => true, + _ => false, + } + } + pub fn is_binary_op(self) -> bool { + match self { + Token::Star + | Token::Slash + | Token::Percent + | Token::Pipe + | Token::Ampersand + | Token::Caret + | Token::Plus + | Token::Minus + | Token::PipePipe + | Token::AmpersandAmpersand + | Token::BangEqual + | Token::EqualEqual + | Token::Less + | Token::Greater + | Token::LessEqual + | Token::GreaterEqual + | Token::LessLess + | Token::GreaterGreater => true, + _ => false, + } + } +} + /// Helper type for parsing tokens that have a defined lexeme, such as `fn`, /// `f32`, `const`, etc. Tokens with variable lexemes, such as primitive /// integral types, constants or identifiers are not parsed by this. @@ -177,11 +225,19 @@ impl LexemeParser { break; } - this.advance(ch)?; + match this.advance(ch)? { + None => {} + Some(token) => { + return Some(token); + } + } } this.finish() } + /// Accepts a `char` and returns `Some(None)` until it is done trying to parse the longest lexeme. + /// If no more potential matches are available, returns the longest matched token as `Some(Token)`, or `None` on failure. + /// accepts a char and returns `None` until it is done trying to parse the longest `Token`. /// when finished, returns a Token, if it parsed one, or `Some(None)`. pub fn advance(&mut self, ch: char) -> Option> { @@ -215,10 +271,13 @@ impl LexemeParser { if self.lexemes.is_empty() { // return match, if it exists - return Some(self.candidates.pop()); + return match self.candidates.pop() { + Some(token) => Some(Some(token)), + None => None, + }; } - return None; + return Some(None); } }