diff --git a/src/ast.rs b/src/ast.rs index c680476..5efd82f 100644 --- a/src/ast.rs +++ b/src/ast.rs @@ -2,7 +2,7 @@ use std::num::NonZero; pub type Node = NonZero; -#[derive(Debug)] +#[derive(Debug, Clone, PartialEq, Eq)] pub enum Tag { Undefined, Root, @@ -69,6 +69,7 @@ pub enum Tag { explicit_type: Option, assignment: Option, }, + DeclRef(Node), CallExpr { /// Ident | Expr lhs: Node, @@ -180,7 +181,7 @@ pub enum Tag { }, } -#[derive(Debug)] +#[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum LetOrVar { Let, Var, @@ -339,7 +340,7 @@ impl Type { } } -#[derive(Debug)] +#[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum PrimitiveType { FloatingType(FloatingType), IntegralType(Node), diff --git a/src/lib.rs b/src/lib.rs index 24a7986..a86e629 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,10 +1,12 @@ -#![feature(extract_if, iter_advance_by, box_into_inner)] +#![feature(extract_if, iter_advance_by, box_into_inner, hash_extract_if)] #![allow(dead_code, unused_macros)] pub mod ast; +pub mod codegen; pub mod common; pub mod lexer; pub mod parser; +pub mod symbol_table; pub mod tokens; pub mod triples; diff --git a/src/parser.rs b/src/parser.rs index 7347864..bd92623 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -6,6 +6,7 @@ use crate::{ ast::{FloatingType, IntegralType, LetOrVar, Node, PrimitiveType, Tag, Type}, common::NextIf, lexer::{Radix, TokenIterator}, + symbol_table::{SymbolKind, SymbolTable}, tokens::Token, }; @@ -27,12 +28,65 @@ pub enum Error { pub type Result = core::result::Result; +#[derive(Debug)] +pub struct Nodes { + inner: Vec, +} + +impl core::ops::Index for Nodes { + type Output = Tag; + + fn index(&self, index: Node) -> &Self::Output { + &self.inner[index.get() as usize] + } +} + +impl Nodes { + fn new() -> Nodes { + Self { + inner: vec![Tag::Root], + } + } + pub fn get_ident_str(&self, node: Node) -> Option<&str> { + match &self.inner[node.get() as usize] { + Tag::Ident { name } => Some(name.as_str()), + _ => None, + } + } + fn len(&self) -> u32 { + self.inner.len() as u32 + } + + fn set_node(&mut self, node: Node, tag: Tag) { + *self.get_node_mut(node) = tag; + } + + fn get_node_mut(&mut self, node: Node) -> &mut Tag { + self.inner.get_mut(node.get() as usize).unwrap() + } + + pub fn get_node(&self, node: Node) -> &Tag { + self.inner.get(node.get() as usize).unwrap() + } + + fn push_tag(&mut self, tag: Tag) -> Node { + let node = Node::new(self.len()).unwrap(); + self.inner.push(tag); + + node + } + fn reserve_node(&mut self) -> Node { + self.push_tag(Tag::Undefined) + } +} + // TODO: add a string-table which stores strings and maybe other bytes and // returns a range for identifiers, constants, etc. where bytes are stored // flatly, and next to each other. #[derive(Debug)] pub struct Tree { - nodes: Vec, + pub nodes: Nodes, + st: SymbolTable, pub global_decls: Vec, } @@ -69,36 +123,12 @@ macro_rules! writeln_indented { impl Tree { pub fn new() -> Tree { Self { - nodes: vec![Tag::Root], + nodes: Nodes::new(), + st: SymbolTable::new(), global_decls: Vec::new(), } } - fn reserve_node(&mut self) -> Node { - let node = Node::new(self.nodes.len() as u32).unwrap(); - self.nodes.push(Tag::Undefined); - node - } - - fn set_node(&mut self, node: Node, tag: Tag) { - *self.get_node_mut(node) = tag; - } - - fn get_node_mut(&mut self, node: Node) -> &mut Tag { - self.nodes.get_mut(node.get() as usize).unwrap() - } - - pub fn get_node(&self, node: Node) -> &Tag { - self.nodes.get(node.get() as usize).unwrap() - } - - fn push_tag(&mut self, tag: Tag) -> Node { - let node = Node::new(self.nodes.len() as u32).unwrap(); - self.nodes.push(tag); - - node - } - fn is_integral_type(lexeme: &str) -> Option<()> { let mut iter = lexeme.chars(); iter.next_if(|&c| c == 'u' || c == 'i')?; @@ -224,7 +254,7 @@ impl Tree { fn parse_ident(&mut self, tokens: &mut TokenIterator) -> Result { let name = tokens.expect_token(Token::Ident)?.lexeme().to_owned(); - Ok(self.push_tag(Tag::Ident { name })) + Ok(self.nodes.push_tag(Tag::Ident { name })) } pub fn parse_primitive_type(&mut self, tokens: &mut TokenIterator) -> Result { @@ -239,7 +269,7 @@ impl Tree { } }; - Ok(self.push_tag(Tag::PrimitiveType(prim))) + Ok(self.nodes.push_tag(Tag::PrimitiveType(prim))) } pub fn parse_pointer(&mut self, tokens: &mut TokenIterator) -> Result { @@ -247,7 +277,7 @@ impl Tree { let _constness = tokens.eat_token(Token::Const); let typename = self.parse_typename(tokens)?; - Ok(self.push_tag(Tag::Pointer { pointee: typename })) + Ok(self.nodes.push_tag(Tag::Pointer { pointee: typename })) } pub fn parse_typename(&mut self, tokens: &mut TokenIterator) -> Result { @@ -256,8 +286,8 @@ impl Tree { Token::Ident => { let token = tokens.next().unwrap(); match Self::try_parse_integral_type(token.lexeme())? { - Some(int) => Ok(self.push_tag(Tag::IntegralType(int))), - None => Ok(self.push_tag(Tag::Ident { + Some(int) => Ok(self.nodes.push_tag(Tag::IntegralType(int))), + None => Ok(self.nodes.push_tag(Tag::Ident { name: token.lexeme().to_owned(), })), } @@ -266,7 +296,7 @@ impl Tree { } } - pub fn parse_var_decl(&mut self, tokens: &mut TokenIterator) -> Result { + pub fn parse_var_decl(&mut self, tokens: &mut TokenIterator, global: bool) -> Result { let let_or_var = match tokens .eat_token(Token::Let) .or_else(|| tokens.eat_token(Token::Var)) @@ -286,11 +316,26 @@ impl Tree { None }; - let node = self.reserve_node(); + let name_str = self.nodes.get_ident_str(name).unwrap().to_owned(); + let node = if global { + let node = match self.st.root_mut().find_orderless_symbol(&name_str) { + Some(r) => r.node(), + None => self + .st + .root_mut() + .insert_orderless_symbol(&name_str, self.nodes.reserve_node()) + .node(), + }; + node + } else { + let node = self.nodes.reserve_node(); + self.st.insert_symbol(&name_str, node, SymbolKind::Var); + node + }; let assignment = if tokens.eat_token(Token::Equal).is_some() { let expr = self.parse_expr(tokens)?; - Some(self.push_tag(Tag::Assign { + Some(self.nodes.push_tag(Tag::Assign { lhs: node, rhs: expr, })) @@ -298,7 +343,7 @@ impl Tree { None }; - self.set_node( + self.nodes.set_node( node, Tag::VarDecl { let_or_var, @@ -313,7 +358,7 @@ impl Tree { } pub fn parse_global_decl(&mut self, tokens: &mut TokenIterator) -> Result { - let node = self.parse_var_decl(tokens)?; + let node = self.parse_var_decl(tokens, true)?; tokens.expect_token(Token::Semi)?; Ok(node) @@ -326,7 +371,15 @@ impl Tree { tokens.expect_token(Token::Colon)?; let ty = self.parse_typename(tokens)?; - Ok(self.push_tag(Tag::Parameter { name, ty })) + let param = self.nodes.reserve_node(); + self.st.insert_symbol( + self.nodes.get_ident_str(name).unwrap(), + param, + SymbolKind::Var, + ); + self.nodes.set_node(param, Tag::Parameter { name, ty }); + + Ok(param) } /// PARAMETER_LIST <- @@ -349,7 +402,7 @@ impl Tree { _ = tokens.next(); } - Ok(self.push_tag(Tag::ParameterList { parameters })) + Ok(self.nodes.push_tag(Tag::ParameterList { parameters })) } /// FUNCTION_PROTO <- @@ -357,7 +410,7 @@ impl Tree { /// fn IDENTIFIER () -> TYPENAME /// fn IDENTIFIER ( PARAMETER_LIST ,? ) /// fn IDENTIFIER ( PARAMETER_LIST ,? ) -> TYPENAME - pub fn parse_fn_proto(&mut self, tokens: &mut TokenIterator) -> Result { + pub fn parse_fn_proto(&mut self, tokens: &mut TokenIterator) -> Result<(Node, Node)> { tokens.expect_token(Token::Fn)?; let name = self.parse_ident(tokens)?; tokens.expect_token(Token::OpenParens)?; @@ -374,38 +427,71 @@ impl Tree { let return_type = if tokens.eat_token(Token::MinusGreater).is_some() { self.parse_typename(tokens)? } else { - self.push_tag(Tag::PrimitiveType(PrimitiveType::Void)) + self.nodes.push_tag(Tag::PrimitiveType(PrimitiveType::Void)) }; - Ok(self.push_tag(Tag::FunctionProto { + let proto = self.nodes.push_tag(Tag::FunctionProto { name, parameters, return_type, - })) + }); + + Ok((proto, name)) } /// FUNCTION_DECL <- /// FUNCTION_PROTO BLOCK pub fn parse_fn_decl(&mut self, tokens: &mut TokenIterator) -> Result { - let proto = self.parse_fn_proto(tokens)?; + let (proto, name) = self.parse_fn_proto(tokens)?; - let body = self.parse_block(tokens)?; + let decl = match self + .st + .find_orderless_symbol(self.nodes.get_ident_str(name).unwrap()) + { + Some(record) => record.node(), + None => { + let decl = self.nodes.reserve_node(); + self.st + .insert_orderless_symbol(self.nodes.get_ident_str(name).unwrap(), decl); + decl + } + }; - Ok(self.push_tag(Tag::FunctionDecl { proto, body })) + let block = self.nodes.reserve_node(); + self.st.into_child(block); + let body = self.parse_block(tokens, Some(block))?; + let unresolved = self + .st + .extract_orderless_if(|_, v| self.nodes.get_node(v.node()) == &Tag::Undefined) + .collect::>(); + self.st.into_parent(); + self.st.extend_orderless(unresolved); + + self.nodes.set_node(decl, Tag::FunctionDecl { proto, body }); + + Ok(decl) } /// BLOCK <- /// { STATEMENT* EXPRESSION? } - pub fn parse_block(&mut self, tokens: &mut TokenIterator) -> Result { + pub fn parse_block( + &mut self, + tokens: &mut TokenIterator, + reserved_node: Option, + ) -> Result { + let block = reserved_node.unwrap_or_else(|| self.nodes.reserve_node()); let mut stmts = Vec::new(); _ = tokens.expect_token(Token::OpenBrace)?; - let node = loop { + loop { if tokens.is_next_token(Token::CloseBrace) { - break self.push_tag(Tag::Block { - statements: stmts, - trailing_expr: None, - }); + break self.nodes.set_node( + block, + Tag::Block { + statements: stmts, + trailing_expr: None, + }, + ); } match tokens.peek_token_or_err()?.token() { @@ -413,7 +499,7 @@ impl Tree { stmts.push(self.try_parse_return_stmt(tokens)?.unwrap()); } Token::Var | Token::Let => { - let node = self.parse_var_decl(tokens)?; + let node = self.parse_var_decl(tokens, false)?; tokens.expect_token(Token::Semi)?; stmts.push(node); @@ -422,10 +508,13 @@ impl Tree { let node = self.parse_expr(tokens)?; match tokens.peek_token_or_err()?.token() { Token::CloseBrace => { - break self.push_tag(Tag::Block { - statements: stmts, - trailing_expr: Some(node), - }); + break self.nodes.set_node( + block, + Tag::Block { + statements: stmts, + trailing_expr: Some(node), + }, + ); } Token::Semi => { _ = tokens.next(); @@ -437,11 +526,11 @@ impl Tree { } } } - }; + } tokens.expect_token(Token::CloseBrace)?; - Ok(node) + Ok(block) } /// ASSIGNMENT_EXPR <- @@ -471,22 +560,22 @@ impl Tree { let rhs = self.parse_expr(tokens)?; let rhs = match op.token() { - Token::PlusEqual => self.push_tag(Tag::Add { lhs, rhs }), - Token::MinusEqual => self.push_tag(Tag::Sub { lhs, rhs }), - Token::StarEqual => self.push_tag(Tag::Mul { lhs, rhs }), - Token::SlashEqual => self.push_tag(Tag::Sub { lhs, rhs }), - Token::PercentEqual => self.push_tag(Tag::Rem { lhs, rhs }), - Token::PipeEqual => self.push_tag(Tag::BitOr { lhs, rhs }), - Token::CaretEqual => self.push_tag(Tag::BitXOr { lhs, rhs }), - Token::AmpersandEqual => self.push_tag(Tag::BitAnd { lhs, rhs }), - Token::LessLessEqual => self.push_tag(Tag::Shl { lhs, rhs }), - Token::GreaterGreaterEqual => self.push_tag(Tag::Shr { lhs, rhs }), + Token::PlusEqual => self.nodes.push_tag(Tag::Add { lhs, rhs }), + Token::MinusEqual => self.nodes.push_tag(Tag::Sub { lhs, rhs }), + Token::StarEqual => self.nodes.push_tag(Tag::Mul { lhs, rhs }), + Token::SlashEqual => self.nodes.push_tag(Tag::Sub { lhs, rhs }), + Token::PercentEqual => self.nodes.push_tag(Tag::Rem { lhs, rhs }), + Token::PipeEqual => self.nodes.push_tag(Tag::BitOr { lhs, rhs }), + Token::CaretEqual => self.nodes.push_tag(Tag::BitXOr { lhs, rhs }), + Token::AmpersandEqual => self.nodes.push_tag(Tag::BitAnd { lhs, rhs }), + Token::LessLessEqual => self.nodes.push_tag(Tag::Shl { lhs, rhs }), + Token::GreaterGreaterEqual => self.nodes.push_tag(Tag::Shr { lhs, rhs }), Token::Equal => rhs, _ => { unreachable!() } }; - Ok(Some(self.push_tag(Tag::Assign { lhs, rhs }))) + Ok(Some(self.nodes.push_tag(Tag::Assign { lhs, rhs }))) } else { Ok(None) } @@ -505,7 +594,7 @@ impl Tree { }; tokens.expect_token(Token::Semi)?; - Ok(Some(self.push_tag(Tag::ReturnStmt { expr }))) + Ok(Some(self.nodes.push_tag(Tag::ReturnStmt { expr }))) } else { Ok(None) } @@ -519,7 +608,7 @@ impl Tree { match tokens.peek_token_or_err()?.token() { Token::Return => Ok(self.try_parse_return_stmt(tokens)?.unwrap()), Token::Var | Token::Let => { - let node = self.parse_var_decl(tokens)?; + let node = self.parse_var_decl(tokens, false)?; tokens.expect_token(Token::Semi)?; Ok(node) @@ -599,7 +688,7 @@ impl Tree { _ => unreachable!(), }; - node = self.push_tag(tag); + node = self.nodes.push_tag(tag); } Ok(node) @@ -616,22 +705,22 @@ impl Tree { Token::Bang => { _ = tokens.next(); let lhs = self.parse_as_expr(tokens)?; - Ok(self.push_tag(Tag::Not { lhs })) + Ok(self.nodes.push_tag(Tag::Not { lhs })) } Token::Minus => { _ = tokens.next(); let lhs = self.parse_as_expr(tokens)?; - Ok(self.push_tag(Tag::Negate { lhs })) + Ok(self.nodes.push_tag(Tag::Negate { lhs })) } Token::Ampersand => { _ = tokens.next(); let lhs = self.parse_as_expr(tokens)?; - Ok(self.push_tag(Tag::Ref { lhs })) + Ok(self.nodes.push_tag(Tag::Ref { lhs })) } Token::Star => { _ = tokens.next(); let lhs = self.parse_as_expr(tokens)?; - Ok(self.push_tag(Tag::Deref { lhs })) + Ok(self.nodes.push_tag(Tag::Deref { lhs })) } _ => self.parse_as_expr(tokens), } @@ -645,7 +734,7 @@ impl Tree { if tokens.eat_token(Token::As).is_some() { let typename = self.parse_typename(tokens)?; - Ok(self.push_tag(Tag::ExplicitCast { + Ok(self.nodes.push_tag(Tag::ExplicitCast { lhs: expr, typename, })) @@ -668,14 +757,27 @@ impl Tree { pub fn parse_primary_expr(&mut self, tokens: &mut TokenIterator) -> Result { let token = tokens.peek_token_or_err()?; match token.token() { - Token::Ident => Ok(self.parse_ident(tokens)?), + Token::Ident => { + // Ok(self.parse_ident(tokens)?) + let ident = tokens.expect_token(Token::Ident)?; + + let decl = match self.st.find_symbol(ident.lexeme()) { + Some(rec) => rec.node(), + None => self + .st + .insert_orderless_symbol(ident.lexeme(), self.nodes.reserve_node()) + .node(), + }; + + Ok(self.nodes.push_tag(Tag::DeclRef(decl))) + } Token::IntegerBinConstant | Token::IntegerHexConstant | Token::IntegerOctConstant | Token::IntegerConstant => { _ = tokens.next(); let (bits, ty) = Self::parse_integral_constant(token.token(), token.lexeme()); - Ok(self.push_tag(Tag::IntegralConstant { bits, ty })) + Ok(self.nodes.push_tag(Tag::IntegralConstant { bits, ty })) } Token::FloatingConstant | Token::FloatingExpConstant @@ -684,7 +786,7 @@ impl Tree { _ = tokens.next(); let (bits, ty) = Self::parse_floating_constant(token.token(), token.lexeme()); - Ok(self.push_tag(Tag::FloatingConstant { bits, ty })) + Ok(self.nodes.push_tag(Tag::FloatingConstant { bits, ty })) } Token::OpenParens => { _ = tokens.next(); @@ -693,7 +795,7 @@ impl Tree { Ok(node) } Token::OpenBrace => { - let node = self.parse_block(tokens)?; + let node = self.parse_block(tokens, None)?; Ok(node) } _ => unreachable!(), @@ -731,15 +833,8 @@ impl Tree { self.parse_program(&mut tokens) } - pub fn get_ident_str(&self, node: Node) -> Option<&str> { - match &self.nodes[node.get() as usize] { - Tag::Ident { name } => Some(name.as_str()), - _ => None, - } - } - fn get_typename_str(&self, node: Node) -> Option { - match self.get_node(node) { + match self.nodes.get_node(node) { Tag::IntegralType(i) => Some(i.to_string()), Tag::Ident { name } => Some(name.clone()), Tag::Pointer { pointee } => self.get_typename_str(*pointee), @@ -749,24 +844,28 @@ impl Tree { } fn render_node( - &self, + &mut self, writer: &mut W, node: Node, indent: u32, ) -> core::fmt::Result { - match &self.nodes[node.get() as usize] { + match self.nodes[node].clone() { Tag::FunctionProto { name, parameters, return_type, } => { - self.render_node(writer, *name, indent)?; - self.render_node(writer, *return_type, indent)?; + self.render_node(writer, name, indent)?; + self.render_node(writer, return_type, indent)?; if let Some(parameters) = parameters { - self.render_node(writer, *parameters, indent)?; + self.render_node(writer, parameters, indent)?; } write_indented!(indent, writer, "%{} = function_proto: {{", node.get())?; - write!(writer, "name: \"{}\"", self.get_ident_str(*name).unwrap())?; + write!( + writer, + "name: \"{}\"", + self.nodes.get_ident_str(name).unwrap() + )?; if let Some(parameters) = parameters { write!(writer, ", parameters: %{}", parameters.get())?; } @@ -776,7 +875,7 @@ impl Tree { Tag::ParameterList { parameters } => { writeln_indented!(indent, writer, "%{} = ParameterList [", node.get())?; for param in parameters { - self.render_node(writer, *param, indent + 1)?; + self.render_node(writer, param, indent + 1)?; } writeln_indented!(indent, writer, "]") } @@ -786,8 +885,8 @@ impl Tree { writer, "%{} = {}: {},", node.get(), - self.get_ident_str(*name).unwrap(), - self.get_typename_str(*ty).unwrap() + self.nodes.get_ident_str(name).unwrap(), + self.get_typename_str(ty).unwrap() ) } Tag::Pointer { .. } | Tag::IntegralType(_) | Tag::PrimitiveType(_) => { @@ -801,7 +900,7 @@ impl Tree { } Tag::PointerQualifier { constness } => todo!(), Tag::FunctionDecl { proto, body } => { - self.render_node(writer, *proto, indent)?; + self.render_node(writer, proto, indent)?; writeln_indented!( indent, writer, @@ -810,7 +909,7 @@ impl Tree { proto.get(), body.get() )?; - self.render_node(writer, *body, indent + 1)?; + self.render_node(writer, body, indent + 1)?; writeln_indented!(indent, writer, "}}") } Tag::Ident { name } => { @@ -841,11 +940,12 @@ impl Tree { trailing_expr, } => { writeln_indented!(indent, writer, "%{} = {{", node.get())?; + self.st.into_find_child(node); for stmt in statements { - self.render_node(writer, *stmt, indent + 1)?; + self.render_node(writer, stmt, indent + 1)?; } if let Some(expr) = trailing_expr { - self.render_node(writer, *expr, indent + 1)?; + self.render_node(writer, expr, indent + 1)?; writeln_indented!( indent + 1, writer, @@ -854,24 +954,25 @@ impl Tree { expr.get() )?; } + self.st.into_parent(); writeln_indented!(indent, writer, "}}") } Tag::ReturnStmt { expr } => { if let Some(expr) = expr { - self.render_node(writer, *expr, indent)?; + self.render_node(writer, expr, indent)?; writeln_indented!(indent, writer, "%{} = return %{};", node.get(), expr.get()) } else { writeln_indented!(indent, writer, "%{} = return;", node.get()) } } - Tag::ExprStmt { expr } => self.render_node(writer, *expr, indent), + Tag::ExprStmt { expr } => self.render_node(writer, expr, indent), Tag::VarDecl { let_or_var, name, explicit_type, .. } => { - self.render_node(writer, *name, indent)?; + self.render_node(writer, name, indent)?; explicit_type.map(|ty| self.render_node(writer, ty, indent)); write_indented!( indent, @@ -886,10 +987,10 @@ impl Tree { "mut" } }, - self.get_ident_str(*name).unwrap() + self.nodes.get_ident_str(name).unwrap() )?; if let Some(ty) = explicit_type { - write!(writer, ", ty: {}", self.get_typename_str(*ty).unwrap())?; + write!(writer, ", ty: {}", self.get_typename_str(ty).unwrap())?; } writeln!(writer, ");")?; Ok(()) @@ -898,22 +999,22 @@ impl Tree { Tag::ArgumentList { parameters } => todo!(), Tag::Argument { name, expr } => todo!(), Tag::ExplicitCast { lhs, typename } => { - self.render_node(writer, *lhs, indent)?; + self.render_node(writer, lhs, indent)?; writeln_indented!( indent, writer, "%{} = cast<{}>(%{})", node.get(), - self.get_typename_str(*typename).unwrap(), + self.get_typename_str(typename).unwrap(), lhs.get() ) } Tag::Deref { lhs } => { - self.render_node(writer, *lhs, indent)?; + self.render_node(writer, lhs, indent)?; writeln_indented!(indent, writer, "%{} = deref(%{})", node.get(), lhs.get()) } Tag::Ref { lhs } => { - self.render_node(writer, *lhs, indent)?; + self.render_node(writer, lhs, indent)?; writeln_indented!( indent, writer, @@ -923,16 +1024,16 @@ impl Tree { ) } Tag::Not { lhs } => { - self.render_node(writer, *lhs, indent)?; + self.render_node(writer, lhs, indent)?; writeln_indented!(indent, writer, "%{} = ", node.get(),) } Tag::Negate { lhs } => { - self.render_node(writer, *lhs, indent)?; + self.render_node(writer, lhs, indent)?; writeln_indented!(indent, writer, "%{} = not(%{})", node.get(), lhs.get()) } Tag::Or { lhs, rhs } => { - self.render_node(writer, *lhs, indent)?; - self.render_node(writer, *rhs, indent)?; + self.render_node(writer, lhs, indent)?; + self.render_node(writer, rhs, indent)?; writeln_indented!( indent, writer, @@ -943,8 +1044,8 @@ impl Tree { ) } Tag::And { lhs, rhs } => { - self.render_node(writer, *lhs, indent)?; - self.render_node(writer, *rhs, indent)?; + self.render_node(writer, lhs, indent)?; + self.render_node(writer, rhs, indent)?; writeln_indented!( indent, writer, @@ -955,8 +1056,8 @@ impl Tree { ) } Tag::BitOr { lhs, rhs } => { - self.render_node(writer, *lhs, indent)?; - self.render_node(writer, *rhs, indent)?; + self.render_node(writer, lhs, indent)?; + self.render_node(writer, rhs, indent)?; writeln_indented!( indent, writer, @@ -967,8 +1068,8 @@ impl Tree { ) } Tag::BitAnd { lhs, rhs } => { - self.render_node(writer, *lhs, indent)?; - self.render_node(writer, *rhs, indent)?; + self.render_node(writer, lhs, indent)?; + self.render_node(writer, rhs, indent)?; writeln_indented!( indent, writer, @@ -979,8 +1080,8 @@ impl Tree { ) } Tag::BitXOr { lhs, rhs } => { - self.render_node(writer, *lhs, indent)?; - self.render_node(writer, *rhs, indent)?; + self.render_node(writer, lhs, indent)?; + self.render_node(writer, rhs, indent)?; writeln_indented!( indent, writer, @@ -991,8 +1092,8 @@ impl Tree { ) } Tag::Eq { lhs, rhs } => { - self.render_node(writer, *lhs, indent)?; - self.render_node(writer, *rhs, indent)?; + self.render_node(writer, lhs, indent)?; + self.render_node(writer, rhs, indent)?; writeln_indented!( indent, writer, @@ -1003,8 +1104,8 @@ impl Tree { ) } Tag::NEq { lhs, rhs } => { - self.render_node(writer, *lhs, indent)?; - self.render_node(writer, *rhs, indent)?; + self.render_node(writer, lhs, indent)?; + self.render_node(writer, rhs, indent)?; writeln_indented!( indent, writer, @@ -1015,8 +1116,8 @@ impl Tree { ) } Tag::Lt { lhs, rhs } => { - self.render_node(writer, *lhs, indent)?; - self.render_node(writer, *rhs, indent)?; + self.render_node(writer, lhs, indent)?; + self.render_node(writer, rhs, indent)?; writeln_indented!( indent, writer, @@ -1027,8 +1128,8 @@ impl Tree { ) } Tag::Gt { lhs, rhs } => { - self.render_node(writer, *lhs, indent)?; - self.render_node(writer, *rhs, indent)?; + self.render_node(writer, lhs, indent)?; + self.render_node(writer, rhs, indent)?; writeln_indented!( indent, writer, @@ -1039,8 +1140,8 @@ impl Tree { ) } Tag::Le { lhs, rhs } => { - self.render_node(writer, *lhs, indent)?; - self.render_node(writer, *rhs, indent)?; + self.render_node(writer, lhs, indent)?; + self.render_node(writer, rhs, indent)?; writeln_indented!( indent, writer, @@ -1051,8 +1152,8 @@ impl Tree { ) } Tag::Ge { lhs, rhs } => { - self.render_node(writer, *lhs, indent)?; - self.render_node(writer, *rhs, indent)?; + self.render_node(writer, lhs, indent)?; + self.render_node(writer, rhs, indent)?; writeln_indented!( indent, writer, @@ -1063,8 +1164,8 @@ impl Tree { ) } Tag::Shl { lhs, rhs } => { - self.render_node(writer, *lhs, indent)?; - self.render_node(writer, *rhs, indent)?; + self.render_node(writer, lhs, indent)?; + self.render_node(writer, rhs, indent)?; writeln_indented!( indent, writer, @@ -1075,8 +1176,8 @@ impl Tree { ) } Tag::Shr { lhs, rhs } => { - self.render_node(writer, *lhs, indent)?; - self.render_node(writer, *rhs, indent)?; + self.render_node(writer, lhs, indent)?; + self.render_node(writer, rhs, indent)?; writeln_indented!( indent, writer, @@ -1087,8 +1188,8 @@ impl Tree { ) } Tag::Add { lhs, rhs } => { - self.render_node(writer, *lhs, indent)?; - self.render_node(writer, *rhs, indent)?; + self.render_node(writer, lhs, indent)?; + self.render_node(writer, rhs, indent)?; writeln_indented!( indent, writer, @@ -1099,8 +1200,8 @@ impl Tree { ) } Tag::Sub { lhs, rhs } => { - self.render_node(writer, *lhs, indent)?; - self.render_node(writer, *rhs, indent)?; + self.render_node(writer, lhs, indent)?; + self.render_node(writer, rhs, indent)?; writeln_indented!( indent, writer, @@ -1111,8 +1212,8 @@ impl Tree { ) } Tag::Mul { lhs, rhs } => { - self.render_node(writer, *lhs, indent)?; - self.render_node(writer, *rhs, indent)?; + self.render_node(writer, lhs, indent)?; + self.render_node(writer, rhs, indent)?; writeln_indented!( indent, writer, @@ -1123,8 +1224,8 @@ impl Tree { ) } Tag::Div { lhs, rhs } => { - self.render_node(writer, *lhs, indent)?; - self.render_node(writer, *rhs, indent)?; + self.render_node(writer, lhs, indent)?; + self.render_node(writer, rhs, indent)?; writeln_indented!( indent, writer, @@ -1135,8 +1236,8 @@ impl Tree { ) } Tag::Rem { lhs, rhs } => { - self.render_node(writer, *lhs, indent)?; - self.render_node(writer, *rhs, indent)?; + self.render_node(writer, lhs, indent)?; + self.render_node(writer, rhs, indent)?; writeln_indented!( indent, writer, @@ -1147,8 +1248,8 @@ impl Tree { ) } Tag::Assign { lhs, rhs } => { - self.render_node(writer, *lhs, indent)?; - self.render_node(writer, *rhs, indent)?; + self.render_node(writer, lhs, indent)?; + self.render_node(writer, rhs, indent)?; writeln_indented!( indent, writer, @@ -1158,12 +1259,29 @@ impl Tree { rhs.get() ) } + Tag::DeclRef(decl) => { + writeln_indented!( + indent, + writer, + "%{} = decl_ref(%{}, name: {})", + node.get(), + decl.get(), + self.st + .find_symbol_by_decl(decl) + .map(|a| a.name()) + .unwrap_or(&format!( + "SymbolTable entry not found?, %{}, %{}", + node.get(), + decl.get() + )) + ) + } _ => unreachable!(), } } - pub fn render(&self, writer: &mut W) -> core::fmt::Result { - for decl in &self.global_decls { + pub fn render(&mut self, writer: &mut W) -> core::fmt::Result { + for decl in &self.global_decls.clone() { self.render_node(writer, *decl, 0)?; } @@ -1232,4 +1350,23 @@ x * x tree.render(&mut buf).unwrap(); println!("{buf}"); } + + #[test] + fn render_ast3() { + let src = " +fn main() -> void { +let a: u32 = 0; +a == global +} +let global: u32 = 42; +"; + let tokens = Tokenizer::new(src.as_bytes()).unwrap(); + + let mut tree = Tree::new(); + tree.parse(tokens.iter()).unwrap(); + + let mut buf = String::new(); + tree.render(&mut buf).unwrap(); + println!("{buf}"); + } } diff --git a/src/symbol_table.rs b/src/symbol_table.rs new file mode 100644 index 0000000..9c433cd --- /dev/null +++ b/src/symbol_table.rs @@ -0,0 +1,202 @@ +use std::collections::{BTreeMap, HashMap}; + +use crate::ast::Node as AstNode; + +#[derive(Debug)] +pub struct SymbolRecord { + name: String, + decl: AstNode, +} + +impl SymbolRecord { + pub fn node(&self) -> AstNode { + self.decl + } + + pub fn name(&self) -> &str { + &self.name + } +} + +pub enum SymbolKind { + Var, // or Let + Orderless, +} + +#[derive(Debug, Default)] +pub struct SymbolTable { + // this is a `Vec<_>` because order matters. Some symbols such as functions + // cannot be shadowed, but I really like shadowing variables and function + // parameters, so any `x` may be redefined. + ordered_identifiers: Vec, + orderless_identifiers: HashMap, + parent: Option>, + children: BTreeMap, SymbolTable>, + scope: Option, +} + +impl SymbolTable { + pub fn new() -> SymbolTable { + Self { + ..Default::default() + } + } + + pub fn root_ref(&self) -> &SymbolTable { + match self.parent.as_ref() { + Some(parent) => parent.root_ref(), + None => self, + } + } + + pub fn root_mut(&mut self) -> &mut SymbolTable { + let this = self as *mut Self; + unsafe { + match (&mut *this).parent.as_mut() { + Some(parent) => parent.root_mut(), + None => self, + } + } + } + + pub fn parent_ref(&self) -> &SymbolTable { + match self.parent.as_ref() { + Some(parent) => Box::as_ref(parent), + None => self, + } + } + + pub fn parent_mut(&mut self) -> &mut SymbolTable { + let this = self as *mut Self; + unsafe { + match (&mut *this).parent.as_mut() { + Some(parent) => Box::as_mut(parent), + None => self, + } + } + } + + pub fn insert_symbol(&mut self, name: &str, node: AstNode, kind: SymbolKind) -> &SymbolRecord { + match kind { + SymbolKind::Var => { + self.ordered_identifiers.push(SymbolRecord { + name: name.to_owned(), + decl: node, + }); + self.ordered_identifiers.last().unwrap() + } + _ => { + self.orderless_identifiers.insert( + name.to_owned(), + SymbolRecord { + name: name.to_owned(), + decl: node, + }, + ); + self.orderless_identifiers.get(name).unwrap() + } + } + } + + pub fn insert_orderless_symbol(&mut self, name: &str, node: AstNode) -> &SymbolRecord { + self.orderless_identifiers.insert( + name.to_owned(), + SymbolRecord { + name: name.to_owned(), + decl: node, + }, + ); + self.orderless_identifiers.get(name).unwrap() + } + + pub fn find_symbol_or_insert_with<'a, F>(&'a mut self, name: &str, cb: F) -> &'a SymbolRecord + where + F: FnOnce() -> (AstNode, SymbolKind), + { + let this = self as *mut Self; + if let Some(record) = unsafe { &*this }.find_symbol(name) { + record + } else { + let (node, kind) = cb(); + self.insert_symbol(name, node, kind) + } + } + + pub fn find_symbol_by_decl(&self, decl: AstNode) -> Option<&SymbolRecord> { + self.ordered_identifiers + .iter() + .find(|r| r.decl == decl) + .or_else(|| { + self.orderless_identifiers + .iter() + .find(|(_, v)| v.decl == decl) + .map(|(_, v)| v) + }) + .or_else(|| { + self.parent + .as_ref() + .and_then(|p| p.find_symbol_by_decl(decl)) + }) + } + + pub fn find_symbol(&self, name: &str) -> Option<&SymbolRecord> { + self.ordered_identifiers + .iter() + .find(|r| r.name.as_str() == name) + .or_else(|| self.orderless_identifiers.get(name)) + .or_else(|| self.parent.as_ref().and_then(|p| p.find_symbol(name))) + } + + pub fn find_orderless_symbol(&self, name: &str) -> Option<&SymbolRecord> { + self.orderless_identifiers.get(name).or_else(|| { + self.parent + .as_ref() + .and_then(|p| p.find_orderless_symbol(name)) + }) + } + + pub fn into_find_child(&mut self, scope: AstNode) -> Option<()> { + if let Some(mut parent) = self.children.remove(&Some(scope)) { + core::mem::swap(self, &mut parent); + self.parent = Some(Box::new(parent)); + Some(()) + } else { + None + } + } + + pub fn into_child(&mut self, scope: AstNode) { + let mut parent = Self { + scope: Some(scope), + ..Default::default() + }; + core::mem::swap(self, &mut parent); + self.parent = Some(Box::new(parent)); + } + + pub fn extend_orderless(&mut self, iter: I) + where + I: IntoIterator, + { + self.orderless_identifiers.extend(iter) + } + + pub fn extract_orderless_if( + &mut self, + pred: F, + ) -> std::collections::hash_map::ExtractIf + where + F: FnMut(&String, &mut SymbolRecord) -> bool, + { + self.orderless_identifiers.extract_if(pred) + } + + /// returns `self` if `self.parent` was `Some(_)`. + pub fn into_parent(&mut self) { + if let Some(child) = self.parent.take() { + let mut child = Box::into_inner(child); + core::mem::swap(self, &mut child); + self.children.insert(child.scope, child); + } + } +} diff --git a/src/triples.rs b/src/triples.rs index d4bec9d..59b8aca 100644 --- a/src/triples.rs +++ b/src/triples.rs @@ -43,10 +43,14 @@ impl<'a> SymbolTable<'a> { } pub fn insert_symbol(&mut self, ast_node: AstNode) { - let name = match self.tree.get_node(ast_node) { - Tag::VarDecl { name, .. } => self.tree.get_ident_str(*name).unwrap().to_string(), - Tag::Parameter { name, .. } => self.tree.get_ident_str(*name).unwrap().to_string(), - Tag::FunctionProto { name, .. } => self.tree.get_ident_str(*name).unwrap().to_string(), + let name = match self.tree.nodes.get_node(ast_node) { + Tag::VarDecl { name, .. } => self.tree.nodes.get_ident_str(*name).unwrap().to_string(), + Tag::Parameter { name, .. } => { + self.tree.nodes.get_ident_str(*name).unwrap().to_string() + } + Tag::FunctionProto { name, .. } => { + self.tree.nodes.get_ident_str(*name).unwrap().to_string() + } _ => { panic!("ast_node wasn't any kind of decl!"); } @@ -69,7 +73,7 @@ impl<'a> SymbolTable<'a> { pub fn find_symbol(&self, ident_node: AstNode) -> Option<&SymbolRecord> { self.identifiers .iter() - .find(|r| Some(r.name.as_str()) == self.tree.get_ident_str(ident_node)) + .find(|r| Some(r.name.as_str()) == self.tree.nodes.get_ident_str(ident_node)) .or_else(|| { self.parent .as_ref() @@ -113,7 +117,7 @@ impl<'a> SymbolTable<'a> { } fn type_of_node(&self, node: AstNode) -> crate::ast::Type { - match self.tree.get_node(node) { + match self.tree.nodes.get_node(node) { Tag::Ident { name } => self .find_symbol(node) .map(|r| r.ty.clone()) @@ -126,7 +130,7 @@ impl<'a> SymbolTable<'a> { } => { let return_type = self.type_of_node(*return_type); let parameter_types = parameters - .map(|p| match self.tree.get_node(p) { + .map(|p| match self.tree.nodes.get_node(p) { Tag::ParameterList { parameters } => parameters .iter() .map(|p| self.type_of_node(*p)) @@ -163,7 +167,7 @@ impl<'a> SymbolTable<'a> { .. } => { let lhs = explicit_type.map(|n| self.type_of_node(n)); - let rhs = assignment.map(|n| match self.tree.get_node(n) { + let rhs = assignment.map(|n| match self.tree.nodes.get_node(n) { Tag::Assign { rhs, .. } => self.type_of_node(*rhs), _ => unreachable!(), }); @@ -259,8 +263,7 @@ impl<'tree, 'ir> IRBuilder<'tree, 'ir> { } fn visit(&mut self, node: AstNode) -> Node { - println!("visiting %{}", node.get()); - match self.st.tree.get_node(node) { + match &self.st.tree.nodes[node] { Tag::FunctionDecl { proto, body } => { self.visit(*proto); self.st.into_child_in_place(); @@ -286,7 +289,7 @@ impl<'tree, 'ir> IRBuilder<'tree, 'ir> { parameters.map(|p| self.visit(p)); let label = self.ir.push(Inst::Label( - self.st.tree.get_ident_str(*name).unwrap().to_string(), + self.st.tree.nodes.get_ident_str(*name).unwrap().to_string(), )); self.lookup.insert(node, label); @@ -464,7 +467,7 @@ impl<'tree, 'ir> IRBuilder<'tree, 'ir> { self.ir.push(Inst::AddressOf(lhs)) } _ => { - dbg!(self.st.tree.get_node(node)); + dbg!(&self.st.tree.nodes[node]); todo!() } }