diff --git a/src/ast2/mod.rs b/src/ast2/mod.rs index a17cbe4..c899aa4 100644 --- a/src/ast2/mod.rs +++ b/src/ast2/mod.rs @@ -1,6 +1,11 @@ -use std::num::NonZero; +#![allow(dead_code)] -use crate::{lexer::SourceLocation, tokens::Token}; +use std::{ + fmt::{Debug, Display}, + num::NonZero, +}; + +use crate::{lexer::SourceLocation, tokens::Token, writeln_indented}; pub mod intern { use std::{ @@ -8,7 +13,6 @@ pub mod intern { hash::{Hash, Hasher}, }; - use itertools::Itertools; use num_bigint::{BigInt, BigUint, Sign}; use crate::{ @@ -135,7 +139,13 @@ pub mod intern { pointee, flags, length, - } => (pointee, flags, length).hash(state), + } => (*pointee, *flags, *length).hash(state), + Key::StructType { + name, + packed, + c_like, + fields, + } => (*name, *packed, *c_like, fields).hash(state), Key::FunctionType { return_type, parameters, @@ -251,8 +261,17 @@ pub mod intern { fn index(&self) -> usize { self.0 as usize } + + pub fn is_valid(&self) -> bool { + self.0 != u32::MAX + } + + pub fn invalid() -> Self { + Self(u32::MAX) + } } + #[derive(Debug)] pub struct InternPool { tags: Vec, indices: Vec, @@ -472,7 +491,11 @@ pub mod intern { fn extend_keys<'a, K: IntoIterator>>(&mut self, keys: K) { for k in keys.into_iter() { - self.insert(k); + let mut hasher = std::hash::DefaultHasher::new(); + k.hash(&mut hasher); + let digest = hasher.finish(); + let i = self.insert(k); + self.hashed.insert(digest, i); } } @@ -488,7 +511,9 @@ pub mod intern { if let Some(&idx) = self.hashed.get(&digest) { idx } else { - self.insert(key) + let i = self.insert(key); + self.hashed.insert(digest, i); + i } } @@ -847,9 +872,9 @@ pub mod intern { #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] enum Tag { - /// pseudo tag + /// pseudo tag, contains a range from a..b into extra of all files. Root, - /// `data` is a range from a..b into extra of all global nodes + /// `data` is a range from a..b into extra of all global nodes. File, /// `data` is an intern to a name, and an index into extra of [intern: return_type, index: ParameterList] FunctionProto, @@ -879,12 +904,14 @@ enum Tag { VarDeclAssignment, /// `data` is a range from a..b into `extra` of an intern to a name, an index to an expr, and an optional intern to a type MutVarDeclAssignment, - /// `data` is an index to an expression and an intern to a name + /// `data` is an intern to a name, and an offset into `extra` of [type: intern, expr: index] GlobalDecl, /// `data` is an intern to a struct type StructDecl, /// `data` is an index to a VarDecl, GlobalDecl or FunctionDecl DeclRef, + /// `data` is an inlined key into the symbol table (scope: index, name: intern) + DeclRefUnresolved, /// `data` is an index to an expr and an index to an ArgumentList CallExpr, /// `data` is a range from a..b into extra of indices to arguments @@ -958,6 +985,8 @@ enum ParseError { #[error("Dummy Message.")] ExpectedPrefixExpression, #[error("Dummy Message.")] + ExpectedArgumentList, + #[error("Dummy Message.")] ExpectedStatement, #[error("Dummy Message.")] UnmatchedParens(u32), @@ -975,6 +1004,12 @@ enum ParseError { #[repr(transparent)] pub struct Index(NonZero); +impl Display for Index { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "%{}", self.0.get()) + } +} + impl Index { pub fn new(i: u32) -> Index { Self(NonZero::::new(i).unwrap()) @@ -1013,6 +1048,125 @@ union Data { index_and_extra_offset: (Index, u32), } +#[derive(Debug)] +#[allow(dead_code)] +enum ExpandedData { + None, + Error(ParseError), + Index(Index), + TwoIndices(Index, Index), + Range(Index, Index), + ExtraRange(usize, usize), + Intern(intern::Index), + IndexIntern(Index, intern::Index), + TwoInterns(intern::Index, intern::Index), + InternAndExtraOffset(intern::Index, usize), + IndexAndExtraOffset(Index, usize), +} + +impl ExpandedData { + fn from_none(data: Data) -> Self { + Self::None + } + fn from_error(data: Data) -> Self { + Self::Error(data.as_error()) + } + fn from_index(data: Data) -> Self { + Self::Index(data.as_index()) + } + + fn from_two_indices(data: Data) -> Self { + let data = data.as_two_indices(); + Self::TwoIndices(data.0, data.1) + } + fn from_range(data: Data) -> Self { + let data = data.as_index_range(); + Self::Range(data.0, data.1) + } + fn from_extra_range(data: Data) -> Self { + let data = data.as_extra_range(); + Self::ExtraRange(data.0, data.1) + } + fn from_intern(data: Data) -> Self { + let data = data.as_intern(); + Self::Intern(data) + } + fn from_index_intern(data: Data) -> Self { + let data = data.as_index_intern(); + Self::IndexIntern(data.0, data.1) + } + fn from_two_interns(data: Data) -> Self { + let data = data.as_two_interns(); + Self::TwoInterns(data.0, data.1) + } + fn from_intern_and_extra_offset(data: Data) -> Self { + let data = data.as_intern_and_extra_offset(); + Self::InternAndExtraOffset(data.0, data.1) + } + fn from_index_and_extra_offset(data: Data) -> Self { + let data = data.as_index_and_extra_offset(); + Self::IndexAndExtraOffset(data.0, data.1) + } +} + +impl From<(Tag, Data)> for ExpandedData { + fn from((tag, data): (Tag, Data)) -> Self { + match tag { + Tag::FunctionProto => Self::from_intern_and_extra_offset(data), + Tag::ParameterList => Self::from_extra_range(data), + Tag::Root => Self::from_extra_range(data), + Tag::File => Self::from_extra_range(data), + Tag::ArgumentList + | Tag::VarDecl + | Tag::MutVarDecl + | Tag::VarDeclAssignment + | Tag::MutVarDeclAssignment + | Tag::BlockTrailingExpr + | Tag::Block => Self::from_extra_range(data), + Tag::Constant | Tag::Parameter => Self::from_two_interns(data), + Tag::Or + | Tag::And + | Tag::BitOr + | Tag::BitXOr + | Tag::BitAnd + | Tag::Eq + | Tag::NEq + | Tag::Lt + | Tag::Gt + | Tag::Le + | Tag::Ge + | Tag::Shl + | Tag::Shr + | Tag::Add + | Tag::Sub + | Tag::Mul + | Tag::Div + | Tag::Rem + | Tag::Assign + | Tag::IfExpr + | Tag::SubscriptExpr + | Tag::CallExpr + | Tag::FunctionDecl => Self::from_two_indices(data), + Tag::ReturnExprStmt + | Tag::DeclRef + | Tag::Argument + | Tag::Deref + | Tag::AddressOf + | Tag::Not + | Tag::Negate + | Tag::ExprStmt => Self::from_index(data), + Tag::DeclRefUnresolved | Tag::NamedArgument | Tag::ExplicitCast => { + Self::from_index_intern(data) + } + Tag::GlobalDecl => Self::from_intern_and_extra_offset(data), + Tag::StructDecl => Self::from_intern(data), + Tag::IfElseExpr => Self::from_index_and_extra_offset(data), + Tag::Error => Self::from_error(data), + Tag::ReturnStmt | Tag::Undefined => Self::from_none(data), + } + } +} + impl Data { fn as_error(self) -> ParseError { unsafe { self.error } @@ -1030,12 +1184,14 @@ impl Data { let (a, b) = unsafe { self.extra_range }; (a as usize, b as usize) } - fn as_intern(self) -> Intern { + fn as_intern(self) -> intern::Index { unsafe { self.intern } } - fn as_two_indices(self) -> (Intern, Intern) { - unsafe { self.two_indices } + + fn as_two_interns(self) -> (intern::Index, intern::Index) { + unsafe { self.two_interns } } + fn as_index_intern(self) -> (Index, intern::Index) { unsafe { self.index_intern } } @@ -1097,14 +1253,51 @@ impl Data { } } -struct Ast { +pub struct Ast { tags: Vec, datas: Vec, extra: Vec, source_locs: Vec, } +impl Debug for Ast { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("Ast") + .field_with("nodes", |f| { + let mut list = f.debug_list(); + struct LocDisplay(SourceLocation); + impl Debug for LocDisplay { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "({})", self.0) + } + } + let entries = self + .tags + .iter() + .cloned() + .zip(self.datas.iter().cloned()) + .zip(self.source_locs.iter().cloned()) + .enumerate() + .map(|(i, ((tag, data), loc))| { + (i, tag, ExpandedData::from((tag, data)), LocDisplay(loc)) + }); + list.entries(entries).finish() + }) + .field("extra", &self.extra) + .finish() + } +} + impl Ast { + fn new() -> Ast { + Self { + tags: vec![Tag::Root], + datas: vec![Data::extra_range(0, 0)], + extra: vec![], + source_locs: vec![SourceLocation::new(0, 0)], + } + } + fn reserve_node(&mut self) -> Index { let i = unsafe { Index(NonZero::new_unchecked(self.tags.len() as u32)) }; self.tags.push(Tag::Undefined); @@ -1125,14 +1318,28 @@ impl Ast { i } - fn push_file>(&mut self, decls: I, loc: SourceLocation) -> Index { - let i = self.reserve_node(); + fn set_file>(&mut self, i: Index, decls: I, loc: SourceLocation) { let (extra_start, extra_end) = self.extend_extra_by_indices(decls); self.set_tag_data_source_loc(i, Tag::File, Data::extra_range(extra_start, extra_end), loc); + } + fn push_file>(&mut self, decls: I, loc: SourceLocation) -> Index { + let i = self.reserve_node(); + self.set_file(i, decls, loc); i } + fn set_root>(&mut self, decls: I) { + let (extra_start, extra_end) = self.extend_extra_by_indices(decls); + self.tags[0] = Tag::Root; + self.datas[0] = Data::extra_range(extra_start, extra_end); + } + + fn get_root_file_indices<'a>(&'a self) -> impl Iterator + 'a { + let (a, b) = self.datas[0].as_extra_range(); + self.extra[a..b].iter().cloned().map(|i| Index::new(i)) + } + fn push_global_decl( &mut self, ident: intern::Index, @@ -1152,10 +1359,13 @@ impl Ast { i } + fn set_fn_decl(&mut self, i: Index, proto: Index, body: Index, loc: SourceLocation) { + self.set_tag_data_source_loc(i, Tag::FunctionDecl, Data::two_indices(proto, body), loc); + } + fn push_fn_decl(&mut self, proto: Index, body: Index, loc: SourceLocation) -> Index { let i = self.reserve_node(); - self.set_tag_data_source_loc(i, Tag::FunctionDecl, Data::two_indices(proto, body), loc); - + self.set_fn_decl(i, proto, body, loc); i } @@ -1225,13 +1435,13 @@ impl Ast { i } - fn push_block>( + fn set_block>( &mut self, + i: Index, statements: I, trailing: Option, loc: SourceLocation, - ) -> Index { - let i = self.reserve_node(); + ) { let (extra_start, extra_end) = self.extend_extra_by_indices(statements.into_iter().chain(trailing.into_iter())); if trailing.is_some() { @@ -1249,7 +1459,16 @@ impl Ast { loc, ); } + } + fn push_block>( + &mut self, + statements: I, + trailing: Option, + loc: SourceLocation, + ) -> Index { + let i = self.reserve_node(); + self.set_block(i, statements, trailing, loc); i } @@ -1384,13 +1603,28 @@ impl Ast { i } - fn push_decl_ref(&mut self, ident: intern::Index, loc: SourceLocation) -> Index { + fn push_decl_ref_unresolved( + &mut self, + scope: Index, + ident: intern::Index, + loc: SourceLocation, + ) -> Index { let i = self.reserve_node(); - self.set_tag_data_source_loc(i, Tag::DeclRef, Data::intern(ident), loc); + self.set_tag_data_source_loc( + i, + Tag::DeclRefUnresolved, + Data::index_and_intern(scope, ident), + loc, + ); i } + fn resolve_decl_ref(&mut self, i: Index, decl: Index) { + self.tags[i.index()] = Tag::DeclRef; + self.datas[i.index()] = Data::index(decl); + } + fn push_expr_stmt(&mut self, expr: Index) -> Index { let i = self.reserve_node(); let loc = self.get_loc(expr); @@ -1421,12 +1655,31 @@ impl Ast { (i, self.extra.len() as u32) } fn set_tag_data_source_loc(&mut self, index: Index, tag: Tag, data: Data, loc: SourceLocation) { + eprintln!( + "{index} <- ({tag:?}, {:?}, {loc})", + ExpandedData::from((tag, data)) + ); self.tags[index.index()] = tag; self.datas[index.index()] = data; self.source_locs[index.index()] = loc; } } +struct Children(Vec); + +impl Display for Children { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "[")?; + if let Some((last, rest)) = self.0.split_last() { + for i in rest { + write!(f, "{i}, ")?; + } + write!(f, "{last}")?; + } + write!(f, "]") + } +} + impl Ast { fn get_node_children(&self, index: Index) -> Vec { let tag = self.tags[index.index()]; @@ -1457,22 +1710,16 @@ impl Ast { let a = data.as_index(); vec![a] } - Tag::Parameter => todo!(), - Tag::Constant => todo!(), - Tag::ReturnStmt => todo!(), - Tag::VarDecl => todo!(), - Tag::MutVarDecl => todo!(), Tag::VarDeclAssignment | Tag::MutVarDeclAssignment => { let (a, _) = data.as_extra_range(); let expr = Index::new(self.extra[a + 1]); vec![expr] } Tag::GlobalDecl => { - let (a, _) = data.as_index_intern(); - vec![a] + let (_, offset) = data.as_intern_and_extra_offset(); + let expr = Index::new(self.extra[offset + 1]); + vec![expr] } - Tag::StructDecl => todo!(), - Tag::DeclRef => todo!(), Tag::CallExpr => { let (a, b) = data.as_two_indices(); vec![a, b] @@ -1527,12 +1774,75 @@ impl Ast { let else_ = Index::new(self.extra[b + 1]); vec![a, if_, else_] } + Tag::StructDecl + | Tag::DeclRef + | Tag::Parameter + | Tag::Constant + | Tag::ReturnStmt + | Tag::VarDecl + | Tag::MutVarDecl => vec![], _ => vec![], } } } -mod ast_gen { +pub struct AstRenderer<'a> { + ast: &'a Ast, + #[allow(dead_code)] + syms: &'a crate::symbol_table::syms2::Symbols, + scopes: Vec, +} + +impl<'a> AstRenderer<'a> { + pub fn new(ast: &'a Ast, syms: &'a crate::symbol_table::syms2::Symbols) -> Self { + Self { + ast, + syms, + scopes: Vec::new(), + } + } + + fn render_node( + &mut self, + w: &mut W, + indent: u32, + node: Index, + ) -> core::fmt::Result { + let tag = self.ast.tags[node.index()]; + let loc = self.ast.source_locs[node.index()]; + match tag { + Tag::File | Tag::FunctionDecl | Tag::Block | Tag::BlockTrailingExpr => { + self.scopes.push(node); + } + _ => {} + } + + let children = Children(self.ast.get_node_children(node)); + writeln_indented!(indent, w, "{node} = ({loc}) {tag:?} {}", children)?; + + for child in children.0 { + self.render_node(w, indent + 1, child)?; + } + + match tag { + Tag::File | Tag::FunctionDecl | Tag::Block | Tag::BlockTrailingExpr => { + self.scopes.pop(); + } + _ => {} + } + + Ok(()) + } + fn render(&mut self, w: &mut W) -> core::fmt::Result { + for file in self.ast.get_root_file_indices() { + self.render_node(w, 0, file)?; + } + + Ok(()) + } +} + +pub mod ast_gen { use intern::{PointerFlags, SimpleType}; use itertools::Itertools; @@ -1542,26 +1852,88 @@ mod ast_gen { common::from_lo_hi_dwords, comptime, lexer::{Radix, TokenItem, TokenIterator}, + symbol_table::syms2::SymbolKind, tokens::PRECEDENCE_MAP, }; use super::*; - struct ErrorInfo { + #[derive(Debug)] + pub struct ErrorInfo { error: ParseError, loc: SourceLocation, } - struct Parser { - ast: Ast, - intern: intern::InternPool, - scope: Vec, - errors: Vec, + #[derive(Debug)] + pub struct Parser { + pub ast: Ast, + pub intern: intern::InternPool, + pub syms: crate::symbol_table::syms2::Symbols, + scopes: Vec, + pub errors: Vec, } type ParseResult = core::result::Result; + impl Display for Parser { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.display().render(f) + } + } + impl Parser { + pub fn new() -> Parser { + Self { + ast: Ast::new(), + intern: intern::InternPool::create(), + syms: crate::symbol_table::syms2::Symbols::new(), + scopes: Vec::new(), + errors: Vec::new(), + } + } + + pub fn display(&self) -> AstRenderer<'_> { + AstRenderer::new(&self.ast, &self.syms) + } + + pub fn resolve_decl_refs(&mut self) { + let mut nodes = self.ast.get_root_file_indices().collect::>(); + let mut scopes = Vec::new(); + + while let Some(node) = nodes.pop() { + match self.ast.tags[node.index()] { + Tag::File | Tag::FunctionDecl | Tag::Block | Tag::BlockTrailingExpr => { + scopes.push(node); + } + Tag::DeclRefUnresolved => { + let (scope, name) = self.ast.datas[node.index()].as_index_intern(); + + // look in my_scope + if let Some(decl) = + self.syms + .find_symbol(scope, name, self.ast.source_locs[node.index()]) + { + self.ast.resolve_decl_ref(node, decl) + }; + } + _ => {} + } + + nodes.extend(self.ast.get_node_children(node)); + + match self.ast.tags[node.index()] { + Tag::File | Tag::FunctionDecl | Tag::Block | Tag::BlockTrailingExpr => { + scopes.pop(); + } + _ => {} + } + } + } + + fn current_scope(&self) -> Index { + self.scopes.last().cloned().unwrap() + } + fn parse_ident(&mut self, tokens: &mut TokenIterator) -> Result { let ident = tokens.expect_token(Token::Ident).map_err(|_| ErrorInfo { error: ParseError::ExpectedIdent, @@ -1581,11 +1953,13 @@ mod ast_gen { loc: tokens.current_source_location(), })?; + eprintln!("pointer qualifiers: {:?}", tokens.peek_token()); let &[cnst, vol, noalias] = &tokens.eat_all_zero_or_once(&[Token::Const, Token::Volatile, Token::Noalias])[..3] else { unreachable!() }; + eprintln!("pointee: {:?}", tokens.peek_token()); let pointee = self.parse_type(tokens)?; Ok(self @@ -1814,10 +2188,15 @@ mod ast_gen { } } } - token => self.parse_simple_type(token).ok_or(ErrorInfo { - error: ParseError::ExpectedTypeName, - loc: tokens.current_source_location(), - }), + token => { + let ty = self.parse_simple_type(token).ok_or(ErrorInfo { + error: ParseError::ExpectedTypeName, + loc: tokens.current_source_location(), + })?; + + _ = tokens.next(); + Ok(ty) + } } } @@ -1840,6 +2219,13 @@ mod ast_gen { } }; + let Some(_) = tokens.eat_token(Token::Colon) else { + return Err(ErrorInfo { + error: ParseError::ExpectedToken(Token::Colon), + loc, + }); + }; + let typename = match self.parse_type(tokens) { Ok(i) => i, Err(err) => { @@ -1861,7 +2247,19 @@ mod ast_gen { } }; - return Ok(self.ast.push_global_decl(ident, typename, expr, loc)); + eprintln!("semi: {:?}", tokens.peek_token()); + let Some(_) = tokens.eat_token(Token::Semi) else { + break 'blk ErrorInfo { + error: ParseError::ExpectedToken(Token::Semi), + loc: tokens.current_source_location(), + }; + }; + + let decl = self.ast.push_global_decl(ident, typename, expr, loc); + self.syms + .insert_symbol(self.current_scope(), ident, SymbolKind::Const, decl); + + return Ok(decl); }; tokens.advance_past_semi().ok_or(ErrorInfo { @@ -1886,7 +2284,39 @@ mod ast_gen { let ident = self.parse_ident(tokens)?; - let parameters = self.parse_parameter_list(tokens)?; + let Some(open_parens) = tokens.eat_token(Token::OpenParens) else { + return Err(ErrorInfo { + error: ParseError::ExpectedArgumentList, + loc, + }); + }; + + let parameters = if tokens.is_next_token(Token::CloseParens) { + self.ast.push_parameter_list([], loc) + } else { + match self.parse_parameter_list(tokens) { + Ok(i) => { + _ = tokens.eat_token(Token::Comma); + + let Some(_) = tokens.eat_token(Token::CloseParens) else { + return Err(ErrorInfo { + error: ParseError::UnmatchedParens(open_parens.token_pos().start), + loc: tokens.current_source_location(), + }); + }; + + i + } + Err(err) => { + tokens.advance_past_end_of_parens().ok_or(ErrorInfo { + error: ParseError::UnmatchedParens(open_parens.token_pos().start), + loc: tokens.current_source_location(), + })?; + + self.push_error(err.error, err.loc) + } + } + }; let return_type = if let Some(_) = tokens.eat_token(Token::MinusGreater) { self.parse_type(tokens)? @@ -1899,12 +2329,24 @@ mod ast_gen { fn parse_fn_inner(&mut self, tokens: &mut TokenIterator) -> ParseResult { let loc = tokens.current_source_location(); + let func = self.ast.reserve_node(); - let proto = self.parse_fn_proto(tokens)?; + self.push_scope(func, intern::Index::invalid()); - let body = self.parse_block(tokens)?; + let proto = self.parse_fn_proto(tokens).map_err(|e| { + self.pop_scope(); + e + })?; + let body = self.parse_block(tokens).map_err(|e| { + self.pop_scope(); + e + })?; - Ok(self.ast.push_fn_decl(proto, body, loc)) + self.pop_scope(); + + self.ast.set_fn_decl(func, proto, body, loc); + + Ok(func) } /// FUNCTION_DECL <- @@ -1980,6 +2422,7 @@ mod ast_gen { let is_let = let_or_var.token() == Token::Let; let name = self.parse_ident(tokens)?; + let name_loc = let_or_var.source_location(); let ty = if tokens.eat_token(Token::Colon).is_some() { Some(self.parse_type(tokens)?) @@ -1993,10 +2436,22 @@ mod ast_gen { None }; - Ok(self.ast.push_var_decl(is_let, name, ty, assignment, loc)) + let decl = self.ast.push_var_decl(is_let, name, ty, assignment, loc); + self.syms.insert_symbol( + self.current_scope(), + name, + SymbolKind::Local(name_loc), + decl, + ); + + Ok(decl) } - fn parse_block_inner(&mut self, tokens: &mut TokenIterator) -> ParseResult { + fn parse_block_inner( + &mut self, + block: Index, + tokens: &mut TokenIterator, + ) -> ParseResult { let loc = tokens.current_source_location(); let mut statements = Vec::new(); @@ -2071,7 +2526,8 @@ mod ast_gen { } }; - Ok(self.ast.push_block(statements, trailing, loc)) + self.ast.set_block(block, statements, trailing, loc); + Ok(block) } /// BLOCK <- @@ -2084,16 +2540,19 @@ mod ast_gen { loc, })?; - let block = match self.parse_block_inner(tokens) { - Ok(i) => { - self.scope.pop(); + let block = self.ast.reserve_node(); + self.push_scope(block, intern::Index::invalid()); + let block_result = self.parse_block_inner(block, tokens); + self.pop_scope(); - if !tokens.is_next_token(Token::CloseBrace) { + let block = match block_result { + Ok(i) => { + let Some(_) = tokens.eat_token(Token::CloseBrace) else { return Err(ErrorInfo { error: ParseError::UnmatchedBrace(open_brace.token_pos().start), loc: tokens.current_source_location(), }); - } + }; i } @@ -2101,7 +2560,7 @@ mod ast_gen { tokens.advance_past_end_of_braced().ok_or(ErrorInfo { error: ParseError::UnmatchedBrace(open_brace.token_pos().start), loc: tokens.current_source_location(), - }); + })?; self.push_error(err.error, err.loc) } }; @@ -2114,6 +2573,7 @@ mod ast_gen { /// PARAMETER_LIST , ARGUMENT fn parse_parameter_list(&mut self, tokens: &mut TokenIterator) -> ParseResult { let loc = tokens.current_source_location(); + let mut params = Vec::new(); loop { params.push(self.parse_parameter(tokens)?); @@ -2127,6 +2587,7 @@ mod ast_gen { // skip comma _ = tokens.next(); } + return Ok(self.ast.push_parameter_list(params, loc)); } @@ -2134,10 +2595,23 @@ mod ast_gen { /// IDENT : TYPENAME fn parse_parameter(&mut self, tokens: &mut TokenIterator) -> ParseResult { let loc = tokens.current_source_location(); + eprintln!("param name: {:?}", tokens.peek_token()); let name = self.parse_ident(tokens)?; + eprintln!("colon: {:?}", tokens.peek_token()); + let Some(_) = tokens.eat_token(Token::Colon) else { + return Err(ErrorInfo { + error: ParseError::ExpectedToken(Token::Colon), + loc, + }); + }; + eprintln!("param type: {:?}", tokens.peek_token()); let ty = self.parse_type(tokens)?; - return Ok(self.ast.push_parameter(name, ty, loc)); + let param = self.ast.push_parameter(name, ty, loc); + self.syms + .insert_symbol(self.current_scope(), name, SymbolKind::Local(loc), param); + + return Ok(param); } /// ARGUMENT <- @@ -2202,18 +2676,11 @@ mod ast_gen { }; match next.token() { - Token::Ident => { - _ = tokens.next(); - let ident = next.lexeme(); - let ident = self - .intern - .get_or_insert(intern::Key::String { str: ident }); - return Ok(self.ast.push_decl_ref(ident, loc)); - } Token::IntegerBinConstant | Token::IntegerHexConstant | Token::IntegerOctConstant | Token::IntegerConstant => { + _ = tokens.next(); let (value, ty) = self.try_parse_integral_constant(&next); return Ok(self.ast.push_constant(value, ty, loc)); } @@ -2221,6 +2688,7 @@ mod ast_gen { | Token::FloatingExpConstant | Token::DotFloatingConstant | Token::DotFloatingExpConstant => { + _ = tokens.next(); let (value, ty) = self.parse_floating_constant(&next); return Ok(self.ast.push_constant(value, ty, loc)); } @@ -2253,6 +2721,17 @@ mod ast_gen { Token::OpenBrace => { return self.parse_block(tokens); } + Token::Ident => { + _ = tokens.next(); + let ident = next.lexeme(); + let ident = self + .intern + .get_or_insert(intern::Key::String { str: ident }); + return Ok(self + .ast + .push_decl_ref_unresolved(self.current_scope(), ident, loc)); + } + // TODO: eventually handle paths _ => { return Err(ErrorInfo { error: ParseError::ExpectedPrimaryExpression, @@ -2424,6 +2903,7 @@ mod ast_gen { let Some(tok) = tokens.peek_token() else { break; }; + eprintln!("maybe binop: {tok:?}"); let loc = tok.source_location(); let Some(prec) = PRECEDENCE_MAP.get(&tok.token()).cloned() else { break; @@ -2435,6 +2915,7 @@ mod ast_gen { // SAFETY: we peeked `tok` let tok = tokens.next().unwrap(); + eprintln!("binop: {tok:?}"); let lhs = node; let rhs = self.parse_binary_expr(tokens, prec + 1)?; @@ -2538,7 +3019,16 @@ mod ast_gen { })?; let cond = match self.parse_expr(tokens) { - Ok(i) => i, + Ok(i) => { + let Some(_) = tokens.eat_token(Token::CloseParens) else { + return Err(ErrorInfo { + error: ParseError::UnmatchedParens(open_parens.token_pos().start), + loc: tokens.current_source_location(), + }); + }; + + i + } Err(err) => { tokens.advance_past_end_of_parens().ok_or(ErrorInfo { error: ParseError::UnmatchedParens(open_parens.token_pos().start), @@ -2551,7 +3041,7 @@ mod ast_gen { let body = self.parse_expr_or_block_as_block(tokens)?; if tokens.is_next_token(Token::Else) { - let else_expr = self.parse_expr(tokens)?; + let else_expr = self.parse_else_expr(tokens)?; Ok(self.ast.push_if_else(cond, body, else_expr, loc)) } else { Ok(self.ast.push_if(cond, body, loc)) @@ -2705,11 +3195,20 @@ mod ast_gen { fn parse_file(&mut self, tokens: &mut TokenIterator) -> Index { let start = tokens.current_source_location(); let mut decls = Vec::new(); + let file = self.ast.reserve_node(); + self.push_scope(file, intern::Index::invalid()); while let Some(next) = tokens.peek_token() { match next.token() { - Token::Fn => {} - Token::Const => {} + Token::Fn => { + decls.push(self.parse_fn_decl(tokens)); + } + Token::Const => { + decls.push(match self.parse_const_decl(tokens) { + Ok(i) => i, + Err(err) => self.push_error(err.error, err.loc), + }); + } _ => { // error node: let error = ParseError::UnexpectedTokenAtFileScope; @@ -2720,8 +3219,37 @@ mod ast_gen { } } } + self.pop_scope(); - self.ast.push_file(decls, start) + self.ast.set_file(file, decls, start); + file + } + + /// FILE <- + /// (FUNCTION_DECL | GLOBAL_DECL)* + pub fn parse(&mut self, mut tokens: TokenIterator) { + let file = self.parse_file(&mut tokens); + self.ast.set_root([file]); + self.resolve_decl_refs(); + } + + fn push_scope(&mut self, ast: Index, name: intern::Index) { + let parent = self.scopes.last().cloned(); + self.scopes.push(ast); + + if let Some(parent) = parent { + self.syms.insert_symbol( + ast, + intern::Index::invalid(), + SymbolKind::ParentScope, + parent, + ); + } + self.syms.insert_scope(name, ast); + } + + fn pop_scope(&mut self) { + self.scopes.pop(); } fn is_statement(&self, tokens: &mut TokenIterator) -> bool { diff --git a/src/bin/main.rs b/src/bin/main.rs index 61fcfbc..82e19c3 100644 --- a/src/bin/main.rs +++ b/src/bin/main.rs @@ -18,6 +18,7 @@ fn main() { ) .subcommands([ Command::new("ast").about("output AST."), + Command::new("ast2").about("output AST."), Command::new("mir").about("output machine-level intermediate representation."), Command::new("ir").about("output intermediate representation."), Command::new("asm").about("output x86-64 assembly (intel syntax)."), @@ -47,6 +48,12 @@ fn main() { tree.render(&mut buf).unwrap(); println!("AST:\n{buf}"); } + "ast2" => { + let mut tree2 = compiler::ast2::ast_gen::Parser::new(); + tree2.parse(tokens.iter()); + eprintln!("{tree2:#?}"); + println!("AST (new):\n{tree2}"); + } "ir" => { let mut ir = IR::new(); let builder = ir.build(&mut tree); diff --git a/src/lexer.rs b/src/lexer.rs index 90fe1a9..d5998a8 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -1,3 +1,4 @@ +use std::fmt::Debug; use std::fmt::Display; use crate::tokens::Token; @@ -363,6 +364,8 @@ impl<'a> TokenIterator<'a> { }; if let Some(pos) = tokens.iter().position(|&t| t == next.token()) { occurences[pos] += 1; + } else { + break; } } @@ -416,12 +419,20 @@ impl<'a> TokenIterator<'a> { } } -#[derive(Debug)] pub struct TokenItem<'a> { tokenizer: &'a Tokenizer<'a>, inner: TokenPos, } +impl<'a> Debug for TokenItem<'a> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("TokenItem") + .field("lexeme", &self.lexeme()) + .field("inner", &self.inner) + .finish_non_exhaustive() + } +} + #[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone, Copy, Hash)] pub struct SourceLocation { pub line: u32, @@ -430,7 +441,7 @@ pub struct SourceLocation { impl Display for SourceLocation { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "l:{},c:{}", self.line, self.column) + write!(f, "{}:{}", self.line, self.column) } } diff --git a/src/lib.rs b/src/lib.rs index a0a5b30..a7fe9c6 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -7,7 +7,8 @@ map_try_insert, iter_intersperse, iter_array_chunks, - int_roundings + int_roundings, + debug_closure_helpers )] #![allow(unused_macros)] diff --git a/src/symbol_table.rs b/src/symbol_table.rs index ce515fd..b8bca7a 100644 --- a/src/symbol_table.rs +++ b/src/symbol_table.rs @@ -376,3 +376,186 @@ impl SymbolTableWrapper { } pub type SymbolTable = SymbolTableWrapper; + +pub mod syms2 { + /*! + Coming from the ast, we have a `DeclRef` with an interned identifier `ident` + and want to find the symbol it refers to. + + To help, we have a struct keeping track of all accessible scopes. Now, + we want to look through any accessible scope `s` for a symbol with the + + name `ident`. + Thus: `Symbol {scope: `s`, name: `ident`, ..}`. + + We might also know the type of the symbol we are looking for, if we want to + permit fields/variables and methods/functions sharing names. + + Since I want to allow variable shadowing for local variables, some strategy to differentiate between shadowed variables must be employed: + - keys of type SymbolKind::Local might point to a list of values with source locations + - keys might contain source locations. + + Any symbol pointed at from within the ast must again point at an ast + object. + Thus: `Key` -> `AstIndex` + Exception: `Key::ScopeByIndex` -> `InternIndex` + */ + + use std::collections::BTreeMap; + use std::fmt::Debug; + + use crate::ast2::intern::Index as InternIndex; + use crate::ast2::Index as AstIndex; + use crate::lexer::SourceLocation; + + #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] + pub enum Key { + ScopeByName { + name: InternIndex, + }, + /// not all scopes have a name, as some are anonymous blocks or otherwise nameless + ScopeByIndex { + ast: AstIndex, + }, + Symbol { + scope: AstIndex, + name: InternIndex, + kind: SymbolKind, + }, + } + + #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] + pub enum SymbolKind { + __First, + Const, + Function, + Type, + Scope, + ParentScope, + Local(SourceLocation), + __Last, + } + + #[derive(Clone, Copy)] + pub union Payload { + ast_index: AstIndex, + intern_index: InternIndex, + } + + #[derive(Debug)] + #[allow(dead_code)] + enum ExpandedPayload { + Ast(AstIndex), + Intern(InternIndex), + } + + impl Payload { + fn new_ast(ast: AstIndex) -> Payload { + Self { ast_index: ast } + } + fn new_intern(intern: InternIndex) -> Payload { + Self { + intern_index: intern, + } + } + + fn as_ast(&self) -> AstIndex { + unsafe { self.ast_index } + } + fn as_intern(&self) -> InternIndex { + unsafe { self.intern_index } + } + } + + pub struct Symbols { + inner: BTreeMap, + } + + impl Debug for Symbols { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "Symbols [")?; + if f.alternate() { + writeln!(f, "")?; + } + let entries = self.inner.iter().map(|(key, val)| { + let payload = match key { + Key::ScopeByIndex { .. } => ExpandedPayload::Intern(val.as_intern()), + _ => ExpandedPayload::Ast(val.as_ast()), + }; + + (*key, payload) + }); + f.debug_list().entries(entries).finish()?; + write!(f, "]")?; + if f.alternate() { + writeln!(f, "")?; + } + + Ok(()) + } + } + + // checks for each scope in scopes_in_tree Symbol { scope, kind: SymbolKind::Local, 0}..Symbol { scope, kind: SymbolKind::Scope, u32::MAX} + struct SymbolTreePos { + scopes_in_scope: Vec, + } + + impl Symbols { + pub fn new() -> Symbols { + Self { + inner: BTreeMap::new(), + } + } + pub fn insert_scope(&mut self, name: InternIndex, ast: AstIndex) { + self.inner + .insert(Key::ScopeByIndex { ast }, Payload::new_intern(name)); + self.inner + .insert(Key::ScopeByName { name }, Payload::new_ast(ast)); + } + + pub fn find_symbol( + &self, + scope: AstIndex, + name: InternIndex, + loc: SourceLocation, + ) -> Option { + use SymbolKind::*; + let range = self.inner.range( + Key::Symbol { + scope, + name, + kind: __First, + }..=Key::Symbol { + scope, + name, + kind: Local(loc), + }, + ); + + if let Some((_, payload)) = range.rev().next() { + Some(payload.as_ast()) + } else { + if let Some(parent) = self.inner.get(&Key::Symbol { + scope, + name: InternIndex::invalid(), + kind: ParentScope, + }) { + self.find_symbol(parent.as_ast(), name, loc) + } else { + None + } + } + } + + pub fn insert_symbol( + &mut self, + scope: AstIndex, + name: InternIndex, + kind: SymbolKind, + ast: AstIndex, + ) { + self.inner + .insert(Key::Symbol { scope, name, kind }, Payload::new_ast(ast)); + } + } +}