diff --git a/grammar.bnf b/grammar.bnf index 86296ff..4a29053 100644 --- a/grammar.bnf +++ b/grammar.bnf @@ -10,11 +10,12 @@ ::= | - ::= + ::= * - ::= | + ::= | - ::= fn '(' ,? ')' (-> )? + ::= fn '(' ,? ')' (-> )? + ::= ::= | , @@ -29,7 +30,7 @@ | ';' ::= return ? ';' ::= ';' - ::= ';' + ::= ';' ::= |= | &= | ^= | /= | *= | %= | <<= | >>= | += | -= | = ::= @@ -58,7 +59,15 @@ ::= ::= ! - + & * - ::= as + ::= as + + ::= + | '(' (,?)? ')' + + ::= + | , + ::= + | : ::= | @@ -66,6 +75,7 @@ | '(' ')' ::= (let | var) (':' )? ( = )? + ::= ';' ::= | @@ -78,7 +88,6 @@ ::= ('u' | 'i') + ::= 'f'('32' | '64') - ::= | diff --git a/src/ast.rs b/src/ast.rs new file mode 100644 index 0000000..21838e1 --- /dev/null +++ b/src/ast.rs @@ -0,0 +1,217 @@ +use std::num::NonZero; + +pub type Node = NonZero; + +pub enum Tag { + Root, + FunctionProto { + /// Ident + name: Node, + /// ParameterList + parameters: Option, + return_type: Node, + }, + ParameterList { + /// [Parameter] + parameters: Vec, + }, + Parameter { + /// Ident + name: Node, + /// TypeName + ty: Node, + }, + TypeName { + /// Ident | PrimitiveType | Pointer + inner: Node, + }, + Pointer { + /// TypeName + pointee: Node, + }, + PointerQualifier { + constness: bool, + }, + IntegralType(IntegralType), + PrimitiveType(PrimitiveType), + Decl { + /// FunctionDecl | VarDecl + inner: Node, + }, + FunctionDecl { + /// FunctionProto + proto: Node, + /// Block + body: Node, + }, + Ident { + name: String, + }, + IntegralConstant { + bits: u64, + ty: IntegralType, + }, + FloatingConstant { + bits: u64, + ty: FloatingType, + }, + Block { + /// ReturnStmt | ExprStmt | VarDecl + statements: Node, + trailing_expr: Option, + }, + ReturnStmt { + expr: Option, + }, + ExprStmt { + expr: Node, + }, + VarDecl { + let_or_var: LetOrVar, + /// Ident + name: Node, + /// TypeName + explicit_type: Option, + expr: Option, + }, + CallExpr { + /// Ident | Expr + lhs: Node, + /// ArgumentList + rhs: Option, + }, + ArgumentList { + /// [Argument] + parameters: Vec, + }, + Argument { + /// Ident + name: Option, + /// expr + expr: Node, + }, + ExplicitCast { + lhs: Node, + /// TypeName + typename: Node, + }, + Deref { + lhs: Node, + }, + Ref { + lhs: Node, + }, + Not { + lhs: Node, + }, + Negate { + lhs: Node, + }, + Or { + lhs: Node, + rhs: Node, + }, + And { + lhs: Node, + rhs: Node, + }, + BitOr { + lhs: Node, + rhs: Node, + }, + BitAnd { + lhs: Node, + rhs: Node, + }, + BitXOr { + lhs: Node, + rhs: Node, + }, + Eq { + lhs: Node, + rhs: Node, + }, + NEq { + lhs: Node, + rhs: Node, + }, + Lt { + lhs: Node, + rhs: Node, + }, + Gt { + lhs: Node, + rhs: Node, + }, + Le { + lhs: Node, + rhs: Node, + }, + Ge { + lhs: Node, + rhs: Node, + }, + Shl { + lhs: Node, + rhs: Node, + }, + Shr { + lhs: Node, + rhs: Node, + }, + Add { + lhs: Node, + rhs: Node, + }, + Sub { + lhs: Node, + rhs: Node, + }, + Mul { + lhs: Node, + rhs: Node, + }, + Rem { + lhs: Node, + rhs: Node, + }, + Div { + lhs: Node, + rhs: Node, + }, + Assign { + lhs: Node, + rhs: Node, + }, +} + +pub enum LetOrVar { + Let, + Var, +} + +pub struct IntegralType { + pub signed: bool, + pub bits: u16, +} + +pub enum FloatingType { + Binary32, + Binary64, +} + +impl IntegralType { + pub fn u32() -> IntegralType { + Self { + signed: false, + bits: 32, + } + } +} + +pub enum PrimitiveType { + FloatingType(FloatingType), + IntegralType(Node), + Bool, + Void, +} diff --git a/src/lexer.rs b/src/lexer.rs index 0bfe059..a3053e3 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -6,7 +6,7 @@ use crate::common::FallibleParse; use crate::common::NextIf; #[derive(Debug, thiserror::Error)] -pub enum LexerError { +pub enum Error { #[error("{0}")] StringError(String), #[error("Exp part of floating constant had no digits.")] @@ -25,7 +25,7 @@ pub enum LexerError { ExpectedIdStartForIdentifier, } -pub type LexerResult = core::result::Result; +pub type Result = core::result::Result; #[derive(Debug, Clone)] pub struct Chars<'a> { @@ -52,6 +52,38 @@ impl<'a> Chars<'a> { self.offset } + pub fn get_source_span( + &self, + start: u32, + end: u32, + ) -> std::ops::RangeInclusive { + let (start_l, start_c) = { + let range = self.get_range(0, start); + range.chars().fold((1u32, 0u32), |(line, col), c| { + if c == '\n' { + (line + 1, 0) + } else { + (line, col + 1) + } + }) + }; + let (end_l, end_c) = { + let range = self.get_range(start, end); + range.chars().fold((start_l, start_c), |(line, col), c| { + if c == '\n' { + (line + 1, 0) + } else { + (line, col + 1) + } + }) + }; + + core::ops::RangeInclusive::new( + SourceLocation::new(start_l, start_c), + SourceLocation::new(end_l, end_c), + ) + } + pub fn get_range(&self, start: u32, end: u32) -> &str { unsafe { core::str::from_utf8_unchecked(&self.bytes[start as usize..end as usize]) } } @@ -77,6 +109,101 @@ pub struct Tokenizer<'a> { tokens: Vec, } +#[derive(Debug, Clone)] +pub struct TokenIterator<'a> { + tokenizer: &'a Tokenizer<'a>, + offset: usize, +} + +impl<'a> TokenIterator<'a> { + pub fn expect_token(&mut self, token: Token) -> crate::parser::Result> { + self.next_if(|item| item.token() == token) + .ok_or(crate::parser::Error::ExpectedTokenNotFound(token)) + } + + pub fn eat_token(&mut self, token: Token) -> Option> { + self.next_if(|item| item.token() == token) + } + pub fn peek_token(&mut self) -> Option> { + self.clone().next() + } + pub fn peek_token_or_err(&mut self) -> crate::parser::Result> { + self.clone() + .next() + .ok_or(crate::parser::Error::UnexpectedEndOfTokens) + } + + pub fn peek_expect_token(&mut self, token: Token) -> crate::parser::Result> { + self.clone() + .next() + .ok_or(crate::parser::Error::ExpectedTokenNotFound(token)) + } + + pub fn is_next_token(&mut self, token: Token) -> bool { + self.clone().next_if(|item| item.token() == token).is_some() + } +} + +pub struct TokenItem<'a> { + tokenizer: &'a Tokenizer<'a>, + inner: TokenPos, +} + +#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone, Copy, Hash)] +pub struct SourceLocation { + pub line: u32, + pub column: u32, +} + +impl SourceLocation { + pub fn new(line: u32, column: u32) -> Self { + Self { line, column } + } +} + +impl<'a> TokenItem<'a> { + pub fn token(&self) -> Token { + self.inner.token + } + + pub fn lexeme(&self) -> &str { + self.tokenizer + .source + .get_range(self.inner.start, self.inner.end) + } + + pub fn source_location(&self) -> std::ops::RangeInclusive { + self.tokenizer + .source + .get_source_span(self.inner.start, self.inner.end) + } +} + +impl<'a> Iterator for TokenIterator<'a> { + type Item = TokenItem<'a>; + + fn next(&mut self) -> Option { + if self.tokenizer.tokens.len() >= self.offset { + None + } else { + let index = self.offset; + self.offset += 1; + match self.tokenizer.tokens[index].token { + Token::SlashSlash + | Token::SlashSlashSlash + | Token::SlashStar + | Token::SlashStarStar + | Token::Comment + | Token::DocComment => self.next(), + _ => Some(Self::Item { + tokenizer: self.tokenizer, + inner: self.tokenizer.tokens[index], + }), + } + } + } +} + macro_rules! next_or_eof { ($expr:expr) => { match $expr.next() { @@ -141,13 +268,20 @@ macro_rules! residual { } impl<'a> Tokenizer<'a> { - fn push_token(&mut self, token: Token, start: u32, end: u32) -> LexerResult<()> { + pub fn iter(&self) -> TokenIterator { + TokenIterator { + tokenizer: self, + offset: 0, + } + } + + fn push_token(&mut self, token: Token, start: u32, end: u32) -> Result<()> { self.tokens.push(TokenPos::new(token, start, end)); Ok(()) } - pub fn next_token(&mut self) -> LexerResult<()> { + pub fn next_token(&mut self) -> Result<()> { self.source .take_while_ref(|&c| crate::common::is_whitespace(c)) .count(); @@ -256,7 +390,7 @@ impl<'a> Tokenizer<'a> { self.source .next_if(|&c| crate::common::is_id_start(c)) - .ok_or(LexerError::ExpectedIdStartForIdentifier)?; + .ok_or(Error::ExpectedIdStartForIdentifier)?; self.source .take_while_ref(|&c| crate::common::is_id_continue(c)) .count(); @@ -267,7 +401,7 @@ impl<'a> Tokenizer<'a> { /// IntegralType <- /// ( 'u' | 'i' ) DIGITS+ -fn try_parse_integral_type(source: &mut Chars) -> LexerResult> { +fn try_parse_integral_type(source: &mut Chars) -> Result> { if !source.next_if(|&c| c == 'u' || c == 'i').is_some() { return Ok(None); } @@ -277,14 +411,14 @@ fn try_parse_integral_type(source: &mut Chars) -> LexerResult> { .count() <= 0 { - return Err(LexerError::IntegralTypeExpectedDigit); + return Err(Error::IntegralTypeExpectedDigit); }; Ok(Some(())) } #[derive(Debug, Clone, Copy, PartialEq, Eq)] -enum Radix { +pub enum Radix { Hex, Bin, Dec, @@ -296,21 +430,29 @@ impl Radix { unsafe fn from_char_unchecked(c: char) -> Self { match c.to_ascii_lowercase() { 'o' => Self::Oct, - 'b' => Self::Oct, - 'x' => Self::Oct, - 'd' => Self::Oct, + 'b' => Self::Bin, + 'x' => Self::Hex, + 'd' => Self::Dec, _ => unreachable!(), } } fn from_char(c: char) -> Option { match c.to_ascii_lowercase() { 'o' => Some(Self::Oct), - 'b' => Some(Self::Oct), - 'x' => Some(Self::Oct), - 'd' => Some(Self::Oct), + 'b' => Some(Self::Bin), + 'x' => Some(Self::Hex), + 'd' => Some(Self::Dec), _ => None, } } + fn radix(self) -> u8 { + match self { + Radix::Hex => 16, + Radix::Bin => 2, + Radix::Oct => 8, + Radix::Dec => 10, + } + } fn to_token(self) -> Token { match self { Radix::Hex => Token::IntegerHexConstant, @@ -319,7 +461,62 @@ impl Radix { Radix::Dec => Token::IntegerConstant, } } - fn is_digit(self) -> fn(char) -> bool { + pub fn from_token(token: Token) -> Option { + match token { + Token::IntegerHexConstant => Some(Radix::Hex), + Token::IntegerBinConstant => Some(Radix::Bin), + Token::IntegerOctConstant => Some(Radix::Oct), + Token::IntegerConstant => Some(Radix::Dec), + _ => None, + } + } + pub fn folding_method(self) -> fn(u64, char) -> u64 { + match self { + Radix::Hex => { + fn fold(acc: u64, c: char) -> u64 { + let digit = match c { + '0'..='9' => c as u8 - b'0', + 'a'..='f' => c as u8 - b'a', + 'A'..='F' => c as u8 - b'A', + _ => unreachable!(), + }; + acc + digit as u64 * 16 + } + fold + } + Radix::Bin => { + fn fold(acc: u64, c: char) -> u64 { + let digit = match c { + '0'..='1' => c as u8 - b'0', + _ => unreachable!(), + }; + acc + digit as u64 * 2 + } + fold + } + Radix::Dec => { + fn fold(acc: u64, c: char) -> u64 { + let digit = match c { + '0'..='9' => c as u8 - b'0', + _ => unreachable!(), + }; + acc + digit as u64 * 10 + } + fold + } + Radix::Oct => { + fn fold(acc: u64, c: char) -> u64 { + let digit = match c { + '0'..='7' => c as u8 - b'0', + _ => unreachable!(), + }; + acc + digit as u64 * 8 + } + fold + } + } + } + pub fn is_digit(self) -> fn(char) -> bool { match self { Radix::Hex => crate::common::is_hex_digit, Radix::Bin => crate::common::is_bin_digit, @@ -337,7 +534,7 @@ fn parse_digit_part( source: &mut Chars, allow_leading_underscore: bool, radix: Radix, -) -> LexerResult<()> { +) -> Result<()> { let radix = radix.is_digit(); if allow_leading_underscore { @@ -345,9 +542,9 @@ fn parse_digit_part( } let _need_digit = source.next_if(|&c| radix(c)).ok_or_else(|| { if source.peek() == Some('_') { - LexerError::NumericalConstantDigitLeadingUnderscore + Error::NumericalConstantDigitLeadingUnderscore } else { - LexerError::NumericalConstantDigitNoDigit + Error::NumericalConstantDigitNoDigit } })?; let _rest = source.take_while_ref(|&c| radix(c) || c == '_').count(); @@ -361,7 +558,7 @@ fn parse_digit_part( /// /// EXP_PART <- /// (`e`|`E`) (`-`|`+`)? DEC_DIGITS -fn try_parse_exp_part(source: &mut Chars) -> LexerResult> { +fn try_parse_exp_part(source: &mut Chars) -> Result> { if source.next_if(|&c| c.to_ascii_lowercase() == 'e').is_some() { let _sign = source.next_if(|&c| c == '-' || c == '+'); if source @@ -370,7 +567,7 @@ fn try_parse_exp_part(source: &mut Chars) -> LexerResult> { .lt(&1) { // need digits following exp notation - Err(LexerError::FloatingConstantExpPartNoDigit) + Err(Error::FloatingConstantExpPartNoDigit) } else { Ok(Some(())) } @@ -387,7 +584,7 @@ fn try_parse_exp_part(source: &mut Chars) -> LexerResult> { /// DEC_DIGITS FloatingType? /// `.` DEC_DIGITS EXP_PART? FloatingType? /// DEC_DIGITS `.` DEC_DIGITS? EXP_PART? FloatingType? -fn parse_constant(source: &mut Chars) -> LexerResult { +fn parse_constant(source: &mut Chars) -> Result { let zero = source.next_if(|&c| c == '0').is_some(); let radix = zero .then(|| source.next_if_map(|c| Radix::from_char(c))) @@ -423,7 +620,7 @@ fn parse_constant(source: &mut Chars) -> LexerResult { let digits = source.next_tuple::<(char, char)>(); if !(digits == Some(('6', '4')) || digits == Some(('3', '2'))) { // need either f64 or f32 here! - return Err(LexerError::FloatingConstantInvalidTrailingType); + return Err(Error::FloatingConstantInvalidTrailingType); } true } else { diff --git a/src/lib.rs b/src/lib.rs index 1fdc1f2..500e683 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,5 +1,8 @@ #![feature(extract_if, iter_advance_by)] +#![allow(dead_code, unused_macros)] +mod ast; mod common; mod lexer; +mod parser; mod tokens; diff --git a/src/parser.rs b/src/parser.rs new file mode 100644 index 0000000..2b266f7 --- /dev/null +++ b/src/parser.rs @@ -0,0 +1,425 @@ +use std::collections::HashMap; + +use itertools::Itertools; + +use crate::{ + ast::{FloatingType, IntegralType, LetOrVar, Node, PrimitiveType, Tag}, + lexer::{Radix, TokenIterator}, + tokens::Token, +}; + +#[derive(Debug, thiserror::Error)] +pub enum Error { + #[error("Unexpected end of token iter.")] + UnexpectedEndOfTokens, + #[error("Expected primitive type.")] + ExpectedPrimitiveType, + #[error("Expected token {0}.")] + ExpectedTokenNotFound(Token), + #[error("Dummy message.")] + ExpectedLetOrVar, +} + +pub type Result = core::result::Result; + +pub struct Tree { + nodes: Vec, +} + +impl Tree { + pub fn new() -> Tree { + Self { + nodes: vec![Tag::Root], + } + } + + fn push_tag(&mut self, tag: Tag) -> Node { + let node = Node::new(self.nodes.len() as u32).unwrap(); + self.nodes.push(tag); + + node + } + + /// returns (signed, bits) + fn parse_integral_type(lexeme: &str) -> IntegralType { + let mut iter = lexeme.chars(); + let signed = match iter.next().unwrap() { + 'u' => false, + 'i' | 's' => true, + _ => unreachable!(), + }; + + let bits = iter.fold(0u16, |acc, c| { + let digit = c as u8 - b'0'; + acc + digit as u16 * 10 + }); + + IntegralType { signed, bits } + } + + fn parse_integral_constant(token: Token, lexeme: &str) -> (u64, IntegralType) { + let radix = Radix::from_token(token).unwrap(); + + // TODO: figure out how to do this safely for bigger types, whether to + // wrap, saturate, or else. + let iter = &mut lexeme.char_indices(); + let value = iter + .take_while_ref(|&(_, c)| radix.is_digit()(c) || c == '_') + .filter(|&(_, c)| c != '_') + .fold(0u64, |acc, (_, c)| radix.folding_method()(acc, c)); + + let ty = match iter.clone().next() { + Some((_, 'u')) | Some((_, 'i')) => { + Self::parse_integral_type(&lexeme[iter.next().unwrap().0..]) + } + _ => IntegralType::u32(), + }; + + (value, ty) + } + + fn parse_floating_constant(_token: Token, lexeme: &str) -> (u64, FloatingType) { + // let (dot, exp) = match token { + // Token::DotFloatingExpConstant => (true, true), + // Token::DotFloatingConstant => (true, false), + // Token::FloatingExpConstant => (false, true), + // Token::FloatingConstant => (false, false), + // _ => unreachable!(), + // }; + + let lexeme = lexeme + .strip_suffix("f32") + .map(|l| (l, FloatingType::Binary32)) + .unwrap_or( + lexeme + .strip_suffix("f64") + .map(|l| (l, FloatingType::Binary64)) + .unwrap_or((lexeme, FloatingType::Binary64)), + ); + + let bits = match lexeme.1 { + FloatingType::Binary32 => lexeme.0.parse::().unwrap().to_bits() as u64, + FloatingType::Binary64 => lexeme.0.parse::().unwrap().to_bits() as u64, + }; + + (bits, lexeme.1) + } + + fn parse_ident(&mut self, tokens: &mut TokenIterator) -> Result { + let name = tokens.expect_token(Token::Ident)?.lexeme().to_owned(); + Ok(self.push_tag(Tag::Ident { name })) + } + + pub fn parse_primitive_type(&mut self, tokens: &mut TokenIterator) -> Result { + let token = tokens.next().ok_or(Error::UnexpectedEndOfTokens)?; + let prim = match token.token() { + Token::IntegralType => { + let int = Self::parse_integral_type(token.lexeme()); + PrimitiveType::IntegralType(self.push_tag(Tag::IntegralType(int))) + } + Token::Void => PrimitiveType::Void, + Token::Bool => PrimitiveType::Bool, + Token::F32 => PrimitiveType::FloatingType(FloatingType::Binary32), + Token::F64 => PrimitiveType::FloatingType(FloatingType::Binary64), + _ => { + return Err(Error::ExpectedPrimitiveType); + } + }; + + Ok(self.push_tag(Tag::PrimitiveType(prim))) + } + + pub fn parse_pointer(&mut self, tokens: &mut TokenIterator) -> Result { + tokens.expect_token(Token::Star)?; + let _constness = tokens.eat_token(Token::Const); + let typename = self.parse_typename(tokens)?; + + Ok(self.push_tag(Tag::Pointer { pointee: typename })) + } + + pub fn parse_typename(&mut self, tokens: &mut TokenIterator) -> Result { + match tokens.peek_token_or_err()?.token() { + Token::Star => self.parse_pointer(tokens), + Token::Ident => Ok(self.push_tag(Tag::Ident { + name: tokens.next().unwrap().lexeme().to_owned(), + })), + _ => self.parse_primitive_type(tokens), + } + } + + pub fn parse_var_decl(&mut self, tokens: &mut TokenIterator) -> Result { + let let_or_var = match tokens + .eat_token(Token::Let) + .or_else(|| tokens.eat_token(Token::Var)) + .map(|itm| itm.token()) + .ok_or(Error::ExpectedLetOrVar)? + { + Token::Let => LetOrVar::Let, + Token::Var => LetOrVar::Var, + _ => unreachable!(), + }; + + let name = self.parse_ident(tokens)?; + + let explicit_type = if tokens.eat_token(Token::Colon).is_some() { + Some(self.parse_typename(tokens)?) + } else { + None + }; + + let expr = if tokens.eat_token(Token::Equal).is_some() { + Some(self.parse_expr(tokens)?) + } else { + None + }; + + Ok(self.push_tag(Tag::VarDecl { + let_or_var, + name, + explicit_type, + expr, + })) + } + + pub fn parse_global_decl(&mut self, tokens: &mut TokenIterator) -> Result { + let node = self.parse_var_decl(tokens)?; + tokens.expect_token(Token::Semi)?; + + Ok(node) + } + + pub fn parse_parameter(&mut self, tokens: &mut TokenIterator) -> Result { + let name = self.parse_ident(tokens)?; + tokens.expect_token(Token::Colon)?; + let ty = self.parse_typename(tokens)?; + + Ok(self.push_tag(Tag::Parameter { name, ty })) + } + + pub fn parse_parameter_list(&mut self, tokens: &mut TokenIterator) -> Result { + let mut parameters = Vec::new(); + + loop { + parameters.push(self.parse_parameter(tokens)?); + if !tokens.eat_token(Token::Comma).is_some() { + break; + } + if !tokens.is_next_token(Token::Ident) { + break; + } + } + + todo!() + } + + pub fn parse_fn_proto(&mut self, tokens: &mut TokenIterator) -> Result { + tokens.expect_token(Token::Fn)?; + let name = self.parse_ident(tokens)?; + tokens.expect_token(Token::OpenParens)?; + let parameters = if !tokens.is_next_token(Token::CloseParens) { + Some(self.parse_parameter_list(tokens)?) + } else { + None + }; + tokens.expect_token(Token::CloseParens)?; + + let return_type = if tokens.eat_token(Token::MinusGreater).is_some() { + self.parse_typename(tokens)? + } else { + self.push_tag(Tag::PrimitiveType(PrimitiveType::Void)) + }; + + Ok(self.push_tag(Tag::FunctionProto { + name, + parameters, + return_type, + })) + } + + pub fn parse_fn_decl(&mut self, tokens: &mut TokenIterator) -> Result { + let proto = self.parse_fn_proto(tokens)?; + + let body = self.parse_block(tokens)?; + + Ok(self.push_tag(Tag::FunctionDecl { proto, body })) + } + + pub fn parse_block(&mut self, tokens: &mut TokenIterator) -> Result { + todo!() + } + + pub fn parse_assignment(&mut self, tokens: &mut TokenIterator) -> Result { + todo!() + } + + pub fn parse_return_stmt(&mut self, tokens: &mut TokenIterator) -> Result { + todo!() + } + + pub fn parse_statement(&mut self, tokens: &mut TokenIterator) -> Result { + todo!() + } + + pub fn parse_binary_expr( + &mut self, + tokens: &mut TokenIterator, + precedence: u32, + ) -> Result { + let mut node = self.parse_prefix_expr(tokens)?; + + loop { + let Some(tok) = tokens.peek_token() else { + break; + }; + let Some(prec) = PRECEDENCE_MAP.get(&tok.token()).cloned() else { + break; + }; + + if prec < precedence { + break; + } + + let tok = tokens.next().unwrap(); + + let lhs = node; + let rhs = self.parse_binary_expr(tokens, prec + 1)?; + + let tag = match tok.token() { + Token::PipePipe => Tag::Or { lhs, rhs }, + Token::AmpersandAmpersand => Tag::And { lhs, rhs }, + Token::Pipe => Tag::BitOr { lhs, rhs }, + Token::Caret => Tag::BitXOr { lhs, rhs }, + Token::Ampersand => Tag::BitAnd { lhs, rhs }, + Token::BangEqual => Tag::NEq { lhs, rhs }, + Token::EqualEqual => Tag::Eq { lhs, rhs }, + Token::LessEqual => Tag::Le { lhs, rhs }, + Token::GreaterEqual => Tag::Ge { lhs, rhs }, + Token::Less => Tag::Lt { lhs, rhs }, + Token::Greater => Tag::Gt { lhs, rhs }, + Token::GreaterGreater => Tag::Shr { lhs, rhs }, + Token::LessLess => Tag::Shl { lhs, rhs }, + Token::Plus => Tag::Add { lhs, rhs }, + Token::Minus => Tag::Sub { lhs, rhs }, + Token::Percent => Tag::Rem { lhs, rhs }, + Token::Star => Tag::Mul { lhs, rhs }, + Token::Slash => Tag::Div { lhs, rhs }, + _ => unreachable!(), + }; + + node = self.push_tag(tag); + } + + Ok(node) + } + + pub fn parse_prefix_expr(&mut self, tokens: &mut TokenIterator) -> Result { + match tokens.peek_token_or_err()?.token() { + Token::Bang => { + _ = tokens.next(); + let lhs = self.parse_as_expr(tokens)?; + Ok(self.push_tag(Tag::Not { lhs })) + } + Token::Minus => { + _ = tokens.next(); + let lhs = self.parse_as_expr(tokens)?; + Ok(self.push_tag(Tag::Negate { lhs })) + } + Token::Plus => { + _ = tokens.next(); + self.parse_as_expr(tokens) + } + Token::Ampersand => { + _ = tokens.next(); + let lhs = self.parse_as_expr(tokens)?; + Ok(self.push_tag(Tag::Ref { lhs })) + } + Token::Star => { + _ = tokens.next(); + let lhs = self.parse_as_expr(tokens)?; + Ok(self.push_tag(Tag::Deref { lhs })) + } + _ => self.parse_as_expr(tokens), + } + } + + pub fn parse_as_expr(&mut self, tokens: &mut TokenIterator) -> Result { + let expr = self.parse_primary_expr(tokens)?; + + if tokens.eat_token(Token::As).is_some() { + let typename = self.parse_typename(tokens)?; + Ok(self.push_tag(Tag::ExplicitCast { + lhs: expr, + typename, + })) + } else { + Ok(expr) + } + } + + pub fn parse_postfix_expr(&mut self, tokens: &mut TokenIterator) -> Result { + // TODO + self.parse_primary_expr(tokens) + } + + pub fn parse_primary_expr(&mut self, tokens: &mut TokenIterator) -> Result { + let token = tokens.peek_token_or_err()?; + match token.token() { + Token::Ident => Ok(self.parse_ident(tokens)?), + Token::IntegerBinConstant + | Token::IntegerHexConstant + | Token::IntegerOctConstant + | Token::IntegerConstant => { + let (bits, ty) = Self::parse_integral_constant(token.token(), token.lexeme()); + Ok(self.push_tag(Tag::IntegralConstant { bits, ty })) + } + Token::FloatingConstant + | Token::FloatingExpConstant + | Token::DotFloatingConstant + | Token::DotFloatingExpConstant => { + let (bits, ty) = Self::parse_floating_constant(token.token(), token.lexeme()); + + Ok(self.push_tag(Tag::FloatingConstant { bits, ty })) + } + Token::OpenParens => { + _ = tokens.next(); + let node = self.parse_expr(tokens)?; + tokens.expect_token(Token::CloseParens)?; + Ok(node) + } + _ => unreachable!(), + } + } + + pub fn parse_expr(&mut self, tokens: &mut TokenIterator) -> Result { + todo!() + } + + pub fn parse_program(&mut self, tokens: &mut TokenIterator) -> Result> { + todo!() + } + + pub fn parse(&mut self, tokens: TokenIterator) {} +} + +static PRECEDENCE_MAP: std::sync::LazyLock> = std::sync::LazyLock::new(|| { + HashMap::from([ + (Token::PipePipe, 10), + (Token::AmpersandAmpersand, 20), + (Token::Pipe, 30), + (Token::Caret, 40), + (Token::Ampersand, 50), + (Token::BangEqual, 60), + (Token::EqualEqual, 60), + (Token::LessEqual, 70), + (Token::GreaterEqual, 70), + (Token::Less, 70), + (Token::Greater, 70), + (Token::GreaterGreater, 80), + (Token::LessLess, 80), + (Token::Plus, 90), + (Token::Minus, 90), + (Token::Percent, 100), + (Token::Star, 100), + (Token::Slash, 100), + ]) +});