use crate::{Source, Token, is_things}; use itertools::Itertools; use werkzeug::iter::{FallibleMapIter, NextIf}; #[derive(Debug, thiserror::Error, PartialEq, Eq)] pub enum Error { #[error("{0}")] StringError(String), #[error("Exp part of floating constant had no digits.")] FloatingConstantExpPartNoDigit, #[error("constant cannot start with leading underscore '_'.")] NumericalConstantDigitLeadingUnderscore, #[error("Expected digit here for constant.")] NumericalConstantDigitNoDigit, #[error("Expected digit here for integer constant.")] IntegralTypeExpectedDigit, #[error("Floating constant has invalid trailing type.")] FloatingConstantInvalidTrailingType, #[error("Invalid token.")] InvalidToken, #[error("Unknown suffix in constant.")] NumericalConstantUnknownSuffix, } type Result = core::result::Result; #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum Radix { Hex, Bin, Dec, Oct, } impl Radix { #[allow(unused)] /// must be called with one of `['b','x','d','o']` unsafe fn from_char_unchecked(c: char) -> Self { match c.to_ascii_lowercase() { 'o' => Self::Oct, 'b' => Self::Bin, 'x' => Self::Hex, 'd' => Self::Dec, _ => unreachable!(), } } fn from_char(c: char) -> Option { match c.to_ascii_lowercase() { 'o' => Some(Self::Oct), 'b' => Some(Self::Bin), 'x' => Some(Self::Hex), 'd' => Some(Self::Dec), _ => None, } } pub fn from_token(token: Token) -> Option { match token { Token::IntegerHexConstant(_) => Some(Radix::Hex), Token::IntegerBinConstant(_) => Some(Radix::Bin), Token::IntegerOctConstant(_) => Some(Radix::Oct), Token::IntegerConstant(_) => Some(Radix::Dec), _ => None, } } #[allow(unused)] pub fn radix(self) -> u8 { match self { Radix::Hex => 16, Radix::Bin => 2, Radix::Oct => 8, Radix::Dec => 10, } } fn to_constant_kind(self) -> ConstantKind { match self { Radix::Hex => ConstantKind::HexInteger, Radix::Bin => ConstantKind::BinInteger, Radix::Oct => ConstantKind::OctInteger, Radix::Dec => ConstantKind::Integer, } } pub fn map_digit(self, c: char) -> u8 { match self { Radix::Hex => match c { '0'..='9' => c as u8 - b'0', 'a'..='f' => 10 + c as u8 - b'a', 'A'..='F' => 10 + c as u8 - b'A', _ => unreachable!(), }, Radix::Bin => match c { '0'..='1' => c as u8 - b'0', _ => unreachable!(), }, Radix::Dec => match c { '0'..='9' => c as u8 - b'0', _ => unreachable!(), }, Radix::Oct => match c { '0'..='7' => c as u8 - b'0', _ => unreachable!(), }, } } pub fn folding_method(self) -> fn(u64, char) -> u64 { match self { Radix::Hex => { fn fold(acc: u64, c: char) -> u64 { let digit = match c { '0'..='9' => c as u8 - b'0', 'a'..='f' => c as u8 - b'a', 'A'..='F' => c as u8 - b'A', _ => unreachable!(), }; acc * 16 + digit as u64 } fold } Radix::Bin => { fn fold(acc: u64, c: char) -> u64 { let digit = match c { '0'..='1' => c as u8 - b'0', _ => unreachable!(), }; acc * 2 + digit as u64 } fold } Radix::Dec => { fn fold(acc: u64, c: char) -> u64 { let digit = match c { '0'..='9' => c as u8 - b'0', _ => unreachable!(), }; acc * 10 + digit as u64 } fold } Radix::Oct => { fn fold(acc: u64, c: char) -> u64 { let digit = match c { '0'..='7' => c as u8 - b'0', _ => unreachable!(), }; acc * 8 + digit as u64 } fold } } } pub fn is_digit(self) -> fn(char) -> bool { match self { Radix::Hex => is_things::is_hex_digit, Radix::Bin => is_things::is_bin_digit, Radix::Oct => is_things::is_oct_digit, Radix::Dec => is_things::is_digit, } } } // where DIGIT is defined by radix: // DIGITS <- // if allow_leading_underscore: `_`* DIGIT (DIGIT|`_`)* // else: DIGIT (DIGIT|`_`)* fn parse_digit_part( source: &mut Source, allow_leading_underscore: bool, radix: Radix, ) -> Result<()> { let is_digit = radix.is_digit(); if allow_leading_underscore { let _underscore = source.take_while_ref(|&c| c == '_').count(); } let _need_digit = source.next_if(|&c| is_digit(c)).ok_or_else(|| { if source.peek() == Some(&'_') { Error::NumericalConstantDigitLeadingUnderscore } else { Error::NumericalConstantDigitNoDigit } })?; let _rest = source.take_while_ref(|&c| is_digit(c) || c == '_').count(); Ok(()) } // IntegralType <- // ( 'u' | 'i' ) DIGITS+ fn try_parse_integral_type(source: &mut Source) -> Result> { if !source.next_if(|&c| c == 'u' || c == 'i').is_some() { return Ok(None); } if source.take_while_ref(|&c| is_things::is_digit(c)).count() <= 0 { return Err(Error::IntegralTypeExpectedDigit); }; Ok(Some(())) } // returns `Err(E)` if it failed to parse. // returns `Ok(None)` if no exp part was found. // returns `Ok(Some(()))` if an exp part was found and parsed. // // EXP_PART <- // (`e`|`E`) (`-`|`+`)? DEC_DIGITS fn try_parse_exp_part(source: &mut Source) -> Result> { if source.next_if(|&c| c.to_ascii_lowercase() == 'e').is_some() { let _sign = source.next_if(|&c| c == '-' || c == '+'); if source .take_while_ref(|&c| is_things::is_digit(c)) .count() .lt(&1) { // need digits following exp notation Err(Error::FloatingConstantExpPartNoDigit) } else { Ok(Some(())) } } else { Ok(None) } } // CONSTANT <- // DEC_DIGITS IntegralType? // `0x` HEX_DIGITS IntegralType? // `0b` BIN_DIGITS IntegralType? // `0o` OCT_DIGITS IntegralType? // DEC_DIGITS FloatingType? // `.` DEC_DIGITS EXP_PART? FloatingType? // DEC_DIGITS `.` DEC_DIGITS? EXP_PART? FloatingType? fn parse_constant_inner(source: &mut Source) -> Result { let zero = source.next_if(|&c| c == '0').is_some(); let radix = zero .then(|| source.next_if_map(|c| Radix::from_char(c))) .flatten(); if let Some(radix) = radix { parse_digit_part(source, false, radix)?; if source.peek().map(|&c| c == 'u' || c == 'i') == Some(true) { try_parse_integral_type(source)?; } return Ok(radix.to_constant_kind()); } // if zero: `_`* DIGIT (DIGIT|`_`)* // else: DIGIT (DIGIT|`_`)* _ = match parse_digit_part(source, zero, Radix::Dec) { Ok(_) => Ok(()), Err(Error::NumericalConstantDigitNoDigit) if zero => Ok(()), Err(e) => Err(e), }?; if let Some(_) = source.try_map_iter_if(|source| try_parse_integral_type(source))? { return Ok(ConstantKind::Integer); } let dot = source.next_if(|&c| c == '.').is_some(); if dot { parse_digit_part(source, false, Radix::Dec)?; } // parse exp notation let exp = try_parse_exp_part(source)?.is_some(); // trailing FloatingType? let trailing_float_type = if source.next_if(|&c| c == 'f').is_some() { let digits = source.next_tuple::<(char, char)>(); if !(digits == Some(('6', '4')) || digits == Some(('3', '2'))) { // need either f64 or f32 here! return Err(Error::FloatingConstantInvalidTrailingType); } true } else { false }; let token = match (dot, exp, trailing_float_type) { (false, false, false) => ConstantKind::Integer, (true, false, _) => ConstantKind::DotFloating, (true, true, _) => ConstantKind::DotFloatingExp, (false, true, _) => ConstantKind::FloatingExp, (false, false, _) => ConstantKind::Floating, }; Ok(token) } #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum ConstantKind { Integer, BinInteger, OctInteger, HexInteger, DotFloating, DotFloatingExp, FloatingExp, Floating, Char, String, } impl<'a> From<(ConstantKind, &'a str)> for Token<'a> { fn from((value, lexeme): (ConstantKind, &'a str)) -> Self { match value { ConstantKind::Integer => Token::IntegerConstant(lexeme), ConstantKind::BinInteger => Token::IntegerBinConstant(lexeme), ConstantKind::OctInteger => Token::IntegerOctConstant(lexeme), ConstantKind::HexInteger => Token::IntegerHexConstant(lexeme), ConstantKind::DotFloating => Token::DotFloatingConstant(lexeme), ConstantKind::DotFloatingExp => Token::DotFloatingExpConstant(lexeme), ConstantKind::FloatingExp => Token::FloatingExpConstant(lexeme), ConstantKind::Floating => Token::FloatingConstant(lexeme), ConstantKind::Char => Token::CharConstant(lexeme), ConstantKind::String => Token::StringConstant(lexeme), } } } pub(crate) fn parse_constant(source: &mut Source) -> Result { let constant = parse_constant_inner(source)?; // char following a constant must not be id_continue if source .peek() .map(|&c| is_things::is_id_continue(c)) .unwrap_or(false) { return Err(Error::NumericalConstantUnknownSuffix); } Ok(constant) } pub(crate) fn parse_string_or_char_constant(source: &mut Source) -> Result { let quote = source .next_if(|&c| c == '"' || c == '\'') .ok_or(Error::InvalidToken)?; let is_char = quote == '\''; let mut escaped = false; let mut closed = false; while let Some(c) = source.next() { if escaped { // accept any escaped char escaped = false; continue; } if c == '\\' { escaped = true; continue; } if c == quote { closed = true; break; } } if !closed { return Err(Error::StringError("Unterminated string/char.".into())); } if is_char { Ok(ConstantKind::Char) } else { Ok(ConstantKind::String) } } /// returns `Ok(true)` if it was a doc comment (///) pub(crate) fn parse_comment<'a>(source: &'a mut Source) -> Result { if !(source.next() == Some('/') && source.next() == Some('/')) { return Err(Error::InvalidToken); } let doc = source.next_if_eq(&'/').is_some(); loop { // take until new line source.take_while_inclusive(|&c| c != '\n').for_each(drop); let mut copy = source.clone(); // skip whitespaces after new line to find continuation of comment (&mut copy) .take_while_ref(|&c| is_things::is_whitespace(c) && c != '\n') .for_each(drop); if (copy.next() == Some('/')) && (copy.next() == Some('/')) { match copy.next() { None => break, // docs end here, regular comment starts Some('\n') if doc => break, // this is a comment, so we can just take until this new line Some('\n') if !doc => continue, // continue doc comment Some('/') if doc => {} Some('/') if !doc => break, Some(_) if doc => break, // continue regular comment Some(_) => {} } *source = copy; } else { break; } } Ok(doc) } #[cfg(test)] mod tests { use super::*; fn make_source(s: &'_ str) -> Source<'_> { s.chars().peekable().into() } #[test] fn parse_constant_number() { assert_eq!( parse_constant(&mut make_source("0x1A3F_u32")), Ok(ConstantKind::HexInteger) ); assert_eq!( parse_constant(&mut make_source("13f32")), Ok(ConstantKind::Floating) ); assert_eq!( parse_constant(&mut make_source("0b1011_0010i16")), Ok(ConstantKind::BinInteger) ); assert_eq!( parse_constant(&mut make_source("0o755u8")), Ok(ConstantKind::OctInteger) ); assert_eq!( parse_constant(&mut make_source("42i64")), Ok(ConstantKind::Integer) ); assert_eq!( parse_constant(&mut make_source("3.14f64")), Ok(ConstantKind::DotFloating) ); assert_eq!( parse_constant(&mut make_source("2.71828e0f32")), Ok(ConstantKind::DotFloatingExp) ); assert_eq!( parse_constant(&mut make_source("22e23")), Ok(ConstantKind::FloatingExp) ); } }