From a565c1e1e97a463175b517c6c496c991c48368e3 Mon Sep 17 00:00:00 2001 From: Janis Date: Sat, 14 Sep 2024 13:09:17 +0200 Subject: [PATCH] parsing struct definitions in ast --- src/ast2/mod.rs | 425 ++++++++++++++++++----------------------- src/bin/main.rs | 16 +- tests/legal/struct.sea | 8 + 3 files changed, 209 insertions(+), 240 deletions(-) create mode 100644 tests/legal/struct.sea diff --git a/src/ast2/mod.rs b/src/ast2/mod.rs index cba20f3..32fc9e0 100644 --- a/src/ast2/mod.rs +++ b/src/ast2/mod.rs @@ -620,8 +620,7 @@ pub mod intern { fields, } => { let flags = StructFlags::new(packed, c_like, fields.len() as u32).pack(); - let i = self.push_word(name.into_u32()); - let i = self.push_word(flags); + let i = self.extend_words([name.into_u32(), flags]); self.extend_words( fields .into_iter() @@ -1031,6 +1030,10 @@ enum ParseError { #[error("Dummy Message.")] UnmatchedParens(u32), #[error("Dummy Message.")] + ExpectedTypeDeclaration, + #[error("Dummy Message.")] + UnexpectedTypeAttributes, + #[error("Dummy Message.")] UnmatchedSquareBracket(u32), #[error("Dummy Message.")] ExpectedEndOfBlock, @@ -1697,10 +1700,6 @@ impl Ast { (i, self.extra.len() as u32) } fn set_tag_data_source_loc(&mut self, index: Index, tag: Tag, data: Data, loc: SourceLocation) { - eprintln!( - "{index} <- ({tag:?}, {:?}, {loc})", - ExpandedData::from((tag, data)) - ); self.tags[index.index()] = tag; self.datas[index.index()] = data; self.source_locs[index.index()] = loc; @@ -1995,13 +1994,11 @@ pub mod ast_gen { loc: tokens.current_source_location(), })?; - eprintln!("pointer qualifiers: {:?}", tokens.peek_token()); let &[cnst, vol, noalias] = &tokens.eat_all_zero_or_once(&[Token::Const, Token::Volatile, Token::Noalias])[..3] else { unreachable!() }; - eprintln!("pointee: {:?}", tokens.peek_token()); let pointee = self.parse_type(tokens)?; Ok(self @@ -2289,7 +2286,6 @@ pub mod ast_gen { } }; - eprintln!("semi: {:?}", tokens.peek_token()); let Some(_) = tokens.eat_token(Token::Semi) else { break 'blk ErrorInfo { error: ParseError::ExpectedToken(Token::Semi), @@ -2477,66 +2473,40 @@ pub mod ast_gen { loc: tokens.current_source_location(), })?; - match next.token() { - Token::CloseBrace => { - break None; - } - Token::Return => { - statements.push(self.parse_return_stmt(tokens)?); - } - Token::Var | Token::Let => { - statements.push(self.parse_var_decl(tokens)?); - } - Token::Const => { - statements.push(self.parse_const_decl(tokens)?); - } - Token::Fn => { - statements.push(self.parse_fn_decl(tokens)); - } - _ => { - if self.is_statement(tokens) { - // expr -> statements - let expr = match self.parse_expr(tokens) { - Ok(i) => { - _ = tokens.eat_token(Token::Semi).ok_or(ErrorInfo { - error: ParseError::ExpectedToken(Token::Semi), - loc: tokens.current_source_location(), + if let Some(decl) = self.parse_constant_decls(tokens)? { + statements.push(decl); + } else { + match next.token() { + Token::CloseBrace => { + break None; + } + Token::Return => { + statements.push(self.parse_return_stmt(tokens)?); + } + Token::Var | Token::Let => { + statements.push(self.parse_var_decl(tokens)?); + } + _ => { + if self.is_statement(tokens) { + // expr -> statements + let expr = self + .parse_with_trailing_semi(tokens, |this, tokens| { + this.parse_expr(tokens) })?; - i + statements.push(expr); + } else { + // expr -> trailing + let expr = self.parse_expr(tokens)?; + if !tokens.is_next_token(Token::CloseBrace) { + statements.push(self.push_error( + ParseError::ExpectedEndOfBlock, + tokens.current_source_location(), + )); + } else { + break Some(expr); } - Err(err) => { - tokens.advance_past_semi().ok_or(ErrorInfo { - error: ParseError::ExpectedToken(Token::Semi), - loc: tokens.current_source_location(), - })?; - self.push_error(err.error, err.loc) - } - }; - - statements.push(expr); - } else { - // expr -> trailing - let expr = match self.parse_expr(tokens) { - Ok(i) => { - if !tokens.is_next_token(Token::CloseBrace) { - return Err(ErrorInfo { - error: ParseError::ExpectedEndOfBlock, - loc: tokens.current_source_location(), - }); - } - - i - } - Err(err) => { - tokens.advance_past_end_of_braced().ok_or(ErrorInfo { - error: ParseError::ExpectedToken(Token::CloseBrace), - loc: tokens.current_source_location(), - })?; - self.push_error(err.error, err.loc) - } - }; - break Some(expr); + } } } } @@ -2549,38 +2519,15 @@ pub mod ast_gen { /// BLOCK <- /// { STATEMENT* EXPRESSION? } fn parse_block(&mut self, tokens: &mut TokenIterator) -> ParseResult { - let loc = tokens.current_source_location(); + let block = self.parse_braced(tokens, |this, tokens| { + let block = this.ast.reserve_node(); + this.push_scope(block, intern::Index::invalid()); + let block_result = this.parse_block_inner(block, tokens); + this.pop_scope(); - let open_brace = tokens.eat_token(Token::OpenBrace).ok_or(ErrorInfo { - error: ParseError::ExpectedToken(Token::OpenBrace), - loc, + block_result })?; - let block = self.ast.reserve_node(); - self.push_scope(block, intern::Index::invalid()); - let block_result = self.parse_block_inner(block, tokens); - self.pop_scope(); - - let block = match block_result { - Ok(i) => { - let Some(_) = tokens.eat_token(Token::CloseBrace) else { - return Err(ErrorInfo { - error: ParseError::UnmatchedBrace(open_brace.token_pos().start), - loc: tokens.current_source_location(), - }); - }; - - i - } - Err(err) => { - tokens.advance_past_end_of_braced().ok_or(ErrorInfo { - error: ParseError::UnmatchedBrace(open_brace.token_pos().start), - loc: tokens.current_source_location(), - })?; - self.push_error(err.error, err.loc) - } - }; - Ok(block) } @@ -2611,16 +2558,13 @@ pub mod ast_gen { /// IDENT : TYPENAME fn parse_parameter(&mut self, tokens: &mut TokenIterator) -> ParseResult { let loc = tokens.current_source_location(); - eprintln!("param name: {:?}", tokens.peek_token()); let name = self.parse_ident(tokens)?; - eprintln!("colon: {:?}", tokens.peek_token()); let Some(_) = tokens.eat_token(Token::Colon) else { return Err(ErrorInfo { error: ParseError::ExpectedToken(Token::Colon), loc, }); }; - eprintln!("param type: {:?}", tokens.peek_token()); let ty = self.parse_type(tokens)?; let param = self.ast.push_parameter(name, ty, loc); @@ -2710,27 +2654,8 @@ pub mod ast_gen { } Token::OpenParens => { - _ = tokens.next(); - - let expr = match self.parse_expr(tokens) { - Ok(i) => { - let Some(_) = tokens.eat_token(Token::CloseParens) else { - return Err(ErrorInfo { - error: ParseError::UnmatchedParens(next.token_pos().start), - loc, - }); - }; - - i - } - Err(err) => { - tokens.advance_past_end_of_parens().ok_or(ErrorInfo { - error: ParseError::UnmatchedParens(next.token_pos().start), - loc: tokens.current_source_location(), - })?; - self.push_error(err.error, err.loc) - } - }; + let expr = + self.parse_parenthesised(tokens, |this, tokens| this.parse_expr(tokens))?; return Ok(expr); } @@ -2769,51 +2694,19 @@ pub mod ast_gen { let loc = next.source_location(); match next.token() { Token::OpenParens => { - let rhs = if tokens.is_next_token(Token::CloseParens) { - self.ast.push_argument_list([], loc) - } else { - match self.parse_argument_list(tokens) { - Ok(i) => { - _ = tokens.eat_token(Token::Comma); - - let Some(_) = tokens.eat_token(Token::CloseParens) else { - let loc = tokens.current_source_location(); - return Err(ErrorInfo { - error: ParseError::UnmatchedParens( - next.token_pos().start, - ), - loc, - }); - }; - - i - } - Err(err) => { - tokens.advance_past_end_of_parens().ok_or(ErrorInfo { - error: ParseError::UnmatchedParens(next.token_pos().start), - loc: tokens.current_source_location(), - })?; - - self.push_error(err.error, err.loc) - } + let arguments = self.parse_parenthesised(tokens, |this, tokens| { + if tokens.is_next_token(Token::CloseParens) { + Ok(this.ast.push_argument_list([], loc)) + } else { + this.parse_argument_list(tokens) } - }; + })?; - return Ok(self.ast.push_call_expr(lhs, rhs, loc)); + return Ok(self.ast.push_call_expr(lhs, arguments, loc)); } Token::OpenSquareBracket => { - let subscript = match self.parse_expr(tokens) { - Ok(i) => i, - Err(err) => { - tokens.advance_past_end_of_bracketed().ok_or(ErrorInfo { - error: ParseError::UnmatchedSquareBracket( - next.token_pos().start, - ), - loc: tokens.current_source_location(), - })?; - self.push_error(err.error, err.loc) - } - }; + let subscript = + self.parse_bracketed(tokens, |this, tokens| this.parse_expr(tokens))?; return Ok(self .ast @@ -2919,7 +2812,6 @@ pub mod ast_gen { let Some(tok) = tokens.peek_token() else { break; }; - eprintln!("maybe binop: {tok:?}"); let loc = tok.source_location(); let Some(prec) = PRECEDENCE_MAP.get(&tok.token()).cloned() else { break; @@ -2931,7 +2823,6 @@ pub mod ast_gen { // SAFETY: we peeked `tok` let tok = tokens.next().unwrap(); - eprintln!("binop: {tok:?}"); let lhs = node; let rhs = self.parse_binary_expr(tokens, prec + 1)?; @@ -3029,30 +2920,7 @@ pub mod ast_gen { let iff = tokens.eat_token(Token::If).unwrap(); let loc = iff.source_location(); - let open_parens = tokens.eat_token(Token::OpenParens).ok_or(ErrorInfo { - error: ParseError::ExpectedToken(Token::OpenParens), - loc: tokens.current_source_location(), - })?; - - let cond = match self.parse_expr(tokens) { - Ok(i) => { - let Some(_) = tokens.eat_token(Token::CloseParens) else { - return Err(ErrorInfo { - error: ParseError::UnmatchedParens(open_parens.token_pos().start), - loc: tokens.current_source_location(), - }); - }; - - i - } - Err(err) => { - tokens.advance_past_end_of_parens().ok_or(ErrorInfo { - error: ParseError::UnmatchedParens(open_parens.token_pos().start), - loc: tokens.current_source_location(), - })?; - self.push_error(err.error, err.loc) - } - }; + let cond = self.parse_parenthesised(tokens, |this, tokens| this.parse_expr(tokens))?; let body = self.parse_expr_or_block_as_block(tokens)?; @@ -3100,6 +2968,73 @@ pub mod ast_gen { } } + /// TYPE_DECL <- + /// type IDENTIFIER = TYPE_UNION ; + /// type IDENTIFIER = '(' (TYPE,)* ')' ; + /// type IDENTIFIER = extern? union { (IDENTIFIER: TYPE,)* } + /// type IDENTIFIER = extern? packed? enum { (IDENTIFIER (= EXPRESSION),)* } + /// type IDENTIFIER = extern? packed? struct { (IDENTIFIER: TYPE,)* } + fn parse_type_decl(&mut self, tokens: &mut TokenIterator) -> ParseResult { + _ = tokens.eat_token(Token::Type).ok_or(ErrorInfo { + error: ParseError::ExpectedToken(Token::Type), + loc: tokens.current_source_location(), + }); + + let name = self.parse_ident(tokens)?; + let loc = tokens.current_source_location(); + + _ = tokens.eat_token(Token::Equal).ok_or(ErrorInfo { + error: ParseError::ExpectedToken(Token::Equal), + loc: tokens.current_source_location(), + }); + + let (has_attributes, c_like, packed) = { + let vec = tokens.eat_all_zero_or_once(&[Token::Extern, Token::Packed]); + (vec[0] || vec[1], vec[0], vec[1]) + }; + + let Some(next) = tokens.peek_token() else { + return Err(ErrorInfo { + error: ParseError::ExpectedTypeDeclaration, + loc: tokens.current_source_location(), + }); + }; + + match next.token() { + Token::Struct => self.parse_struct_decl(tokens, name, c_like, packed, loc), + Token::Union => { + unimplemented!() + } + Token::Enum => { + unimplemented!() + } + _ => { + if has_attributes { + return Err(ErrorInfo { + error: ParseError::UnexpectedTypeAttributes, + loc: tokens.current_source_location(), + }); + } + match next.token() { + Token::OpenParens => { + // tuple + unimplemented!() + } + Token::Ident => { + // sumtype + unimplemented!() + } + _ => { + return Err(ErrorInfo { + error: ParseError::ExpectedTypeDeclaration, + loc: tokens.current_source_location(), + }); + } + } + } + } + } + /// SUMTYPE_DECL <- /// type IDENTIFIER = TYPE_UNION /// TYPE_UNION <- @@ -3129,48 +3064,30 @@ pub mod ast_gen { /// STRUCT_DECL <- /// type IDENTIFIER = extern? packed? struct { STRUCT_FIELD,* } - fn parse_struct_decl(&mut self, tokens: &mut TokenIterator) -> ParseResult { + fn parse_struct_decl( + &mut self, + tokens: &mut TokenIterator, + name: intern::Index, + c_like: bool, + packed: bool, + loc: SourceLocation, + ) -> ParseResult { // SAFETY: function invariance - let start = tokens.eat_token(Token::Type).unwrap(); - let loc = start.source_location(); - let name = self.parse_ident(tokens)?; - - _ = tokens.eat_token(Token::Equal).ok_or(ErrorInfo { - error: ParseError::ExpectedToken(Token::Equal), - loc: tokens.current_source_location(), - })?; - - let flags = tokens.eat_all_zero_or_once(&[Token::Packed, Token::Extern]); - let (packed, c_like) = (flags[0], flags[1]); _ = tokens.eat_token(Token::Struct).ok_or(ErrorInfo { error: ParseError::ExpectedToken(Token::Struct), loc: tokens.current_source_location(), })?; - _ = tokens.eat_token(Token::OpenBrace).ok_or(ErrorInfo { - error: ParseError::ExpectedToken(Token::OpenBrace), - loc: tokens.current_source_location(), + + let decl = self.parse_braced(tokens, |this, tokens| { + this.parse_struct_fields(tokens).map(|fields| { + _ = tokens.eat_token(Token::Comma); + let struct_type = this.intern.get_struct_type(name, packed, c_like, fields); + this.ast.push_struct_decl(struct_type, loc) + }) })?; - match self.parse_struct_fields(tokens) { - Ok(fields) => { - _ = tokens.eat_token(Token::Comma); - - _ = tokens.eat_token(Token::CloseBrace).ok_or(ErrorInfo { - error: ParseError::ExpectedToken(Token::CloseBrace), - loc: tokens.current_source_location(), - })?; - let struct_type = self.intern.get_struct_type(name, packed, c_like, fields); - return Ok(self.ast.push_struct_decl(struct_type, loc)); - } - Err(err) => { - tokens.advance_past_end_of_braced().ok_or(ErrorInfo { - error: ParseError::ExpectedToken(Token::CloseBrace), - loc: tokens.current_source_location(), - })?; - return Ok(self.push_error(err.error, err.loc)); - } - } + Ok(decl) } fn parse_with_trailing_semi( @@ -3283,6 +3200,18 @@ pub mod ast_gen { }) } + fn parse_bracketed(&mut self, tokens: &mut TokenIterator, parse: F) -> ParseResult + where + F: FnOnce(&mut Self, &mut TokenIterator) -> ParseResult, + { + self.parse_inner2( + tokens, + Token::OpenSquareBracket, + Token::CloseSquareBracket, + parse, + ) + } + fn parse_braced(&mut self, tokens: &mut TokenIterator, parse: F) -> ParseResult where F: FnOnce(&mut Self, &mut TokenIterator) -> ParseResult, @@ -3329,11 +3258,38 @@ pub mod ast_gen { tokens: &mut TokenIterator, ) -> ParseResult<(intern::Index, intern::Index)> { let name = self.parse_ident(tokens)?; + let Some(_) = tokens.eat_token(Token::Colon) else { + return Err(ErrorInfo { + error: ParseError::ExpectedToken(Token::Colon), + loc: tokens.current_source_location(), + }); + }; let ty = self.parse_type(tokens)?; return Ok((name, ty)); } + /// CONSTANT_DECL <- + /// FUNCTION_DECL + /// GLOBAL_DECL + /// STRUCT_DECL + fn parse_constant_decls( + &mut self, + tokens: &mut TokenIterator, + ) -> ParseResult> { + let next = tokens.peek_token().ok_or(ErrorInfo { + error: ParseError::UnexpectedEndOfTokens, + loc: tokens.current_source_location(), + })?; + + match next.token() { + Token::Fn => Ok(Some(self.parse_fn_decl(tokens))), + Token::Const => self.parse_const_decl(tokens).map(|i| Some(i)), + Token::Type => self.parse_type_decl(tokens).map(|i| Some(i)), + _ => Ok(None), + } + } + /// FILE <- /// (FUNCTION_DECL | GLOBAL_DECL)* fn parse_file(&mut self, tokens: &mut TokenIterator) -> Index { @@ -3343,25 +3299,22 @@ pub mod ast_gen { self.push_scope(file, intern::Index::invalid()); while let Some(next) = tokens.peek_token() { - match next.token() { - Token::Fn => { - decls.push(self.parse_fn_decl(tokens)); - } - Token::Const => { - decls.push(match self.parse_const_decl(tokens) { - Ok(i) => i, - Err(err) => self.push_error(err.error, err.loc), - }); - } - _ => { - // error node: + let loc = next.source_location(); + let decl = match self.parse_constant_decls(tokens).and_then(|i| match i { + Some(i) => Ok(i), + None => { let error = ParseError::UnexpectedTokenAtFileScope; - let node = self.push_error(error, next.source_location()); - decls.push(node); + let node = self.push_error(error, loc); self.find_next_fn_or_const(tokens); + + Ok(node) } - } + }) { + Ok(i) => i, + Err(err) => self.push_error(err.error, err.loc), + }; + decls.push(decl); } self.pop_scope(); @@ -3437,7 +3390,7 @@ pub mod ast_gen { fn find_next_fn_or_const(&mut self, tokens: &mut TokenIterator) -> Option<()> { tokens - .advance_until_before_one_of(&[Token::Const, Token::Fn]) + .advance_until_before_one_of(&[Token::Const, Token::Fn, Token::Type]) .map(|_| ()) } } diff --git a/src/bin/main.rs b/src/bin/main.rs index 82e19c3..52cc746 100644 --- a/src/bin/main.rs +++ b/src/bin/main.rs @@ -7,6 +7,14 @@ use compiler::{ triples::{MirBuilder, IR}, }; +fn ast_tree(tokens: &Tokenizer) -> Tree { + let mut tree = Tree::new(); + tree.parse(tokens.iter()).unwrap(); + tree.fold_comptime(); + + tree +} + fn main() { let cmd = clap::Command::new("sea") .bin_name("sea") @@ -37,13 +45,10 @@ fn main() { let tokens = Tokenizer::new(&source).unwrap(); - let mut tree = Tree::new(); - tree.parse(tokens.iter()).unwrap(); - tree.fold_comptime(); - if let Some((cmd, _matches)) = matches.subcommand() { match cmd { "ast" => { + let mut tree = ast_tree(&tokens); let mut buf = String::new(); tree.render(&mut buf).unwrap(); println!("AST:\n{buf}"); @@ -55,6 +60,7 @@ fn main() { println!("AST (new):\n{tree2}"); } "ir" => { + let mut tree = ast_tree(&tokens); let mut ir = IR::new(); let builder = ir.build(&mut tree); let mut buf = String::new(); @@ -62,6 +68,7 @@ fn main() { println!("IR:\n{buf}"); } "mir" => { + let mut tree = ast_tree(&tokens); let mut ir = IR::new(); ir.build(&mut tree); @@ -77,6 +84,7 @@ fn main() { } } "asm" => { + let mut tree = ast_tree(&tokens); let mut ir = IR::new(); ir.build(&mut tree); diff --git a/tests/legal/struct.sea b/tests/legal/struct.sea new file mode 100644 index 0000000..18af87d --- /dev/null +++ b/tests/legal/struct.sea @@ -0,0 +1,8 @@ +type MyStruct = struct { + i: i32, + b: bool, +} + +fn square_if_true(arg: MyStruct) -> i32 { + 0 +} \ No newline at end of file