395 lines
11 KiB
Rust
395 lines
11 KiB
Rust
use crate::{Source, Token, is_things};
|
|
use itertools::Itertools;
|
|
use werkzeug::iter::{FallibleMapIter, NextIf};
|
|
|
|
#[derive(Debug, thiserror::Error, PartialEq, Eq)]
|
|
pub enum Error {
|
|
#[error("{0}")]
|
|
StringError(String),
|
|
#[error("Exp part of floating constant had no digits.")]
|
|
FloatingConstantExpPartNoDigit,
|
|
#[error("constant cannot start with leading underscore '_'.")]
|
|
NumericalConstantDigitLeadingUnderscore,
|
|
#[error("Expected digit here for constant.")]
|
|
NumericalConstantDigitNoDigit,
|
|
#[error("Expected digit here for integer constant.")]
|
|
IntegralTypeExpectedDigit,
|
|
#[error("Floating constant has invalid trailing type.")]
|
|
FloatingConstantInvalidTrailingType,
|
|
#[error("Invalid token.")]
|
|
InvalidToken,
|
|
#[error("Unknown suffix in constant.")]
|
|
NumericalConstantUnknownSuffix,
|
|
}
|
|
|
|
type Result<T> = core::result::Result<T, Error>;
|
|
|
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
|
pub enum Radix {
|
|
Hex,
|
|
Bin,
|
|
Dec,
|
|
Oct,
|
|
}
|
|
|
|
impl Radix {
|
|
#[allow(unused)]
|
|
/// must be called with one of `['b','x','d','o']`
|
|
unsafe fn from_char_unchecked(c: char) -> Self {
|
|
match c.to_ascii_lowercase() {
|
|
'o' => Self::Oct,
|
|
'b' => Self::Bin,
|
|
'x' => Self::Hex,
|
|
'd' => Self::Dec,
|
|
_ => unreachable!(),
|
|
}
|
|
}
|
|
fn from_char(c: char) -> Option<Self> {
|
|
match c.to_ascii_lowercase() {
|
|
'o' => Some(Self::Oct),
|
|
'b' => Some(Self::Bin),
|
|
'x' => Some(Self::Hex),
|
|
'd' => Some(Self::Dec),
|
|
_ => None,
|
|
}
|
|
}
|
|
|
|
#[allow(unused)]
|
|
pub fn radix(self) -> u8 {
|
|
match self {
|
|
Radix::Hex => 16,
|
|
Radix::Bin => 2,
|
|
Radix::Oct => 8,
|
|
Radix::Dec => 10,
|
|
}
|
|
}
|
|
fn to_token(self) -> Token {
|
|
match self {
|
|
Radix::Hex => Token::IntegerHexConstant,
|
|
Radix::Bin => Token::IntegerBinConstant,
|
|
Radix::Oct => Token::IntegerOctConstant,
|
|
Radix::Dec => Token::IntegerConstant,
|
|
}
|
|
}
|
|
|
|
#[expect(dead_code)]
|
|
pub fn from_token(token: Token) -> Option<Self> {
|
|
match token {
|
|
Token::IntegerHexConstant => Some(Radix::Hex),
|
|
Token::IntegerBinConstant => Some(Radix::Bin),
|
|
Token::IntegerOctConstant => Some(Radix::Oct),
|
|
Token::IntegerConstant => Some(Radix::Dec),
|
|
_ => None,
|
|
}
|
|
}
|
|
|
|
#[expect(dead_code)]
|
|
pub fn map_digit(self, c: char) -> u8 {
|
|
match self {
|
|
Radix::Hex => match c {
|
|
'0'..='9' => c as u8 - b'0',
|
|
'a'..='f' => 10 + c as u8 - b'a',
|
|
'A'..='F' => 10 + c as u8 - b'A',
|
|
_ => unreachable!(),
|
|
},
|
|
Radix::Bin => match c {
|
|
'0'..='1' => c as u8 - b'0',
|
|
_ => unreachable!(),
|
|
},
|
|
Radix::Dec => match c {
|
|
'0'..='9' => c as u8 - b'0',
|
|
_ => unreachable!(),
|
|
},
|
|
Radix::Oct => match c {
|
|
'0'..='7' => c as u8 - b'0',
|
|
_ => unreachable!(),
|
|
},
|
|
}
|
|
}
|
|
|
|
#[expect(dead_code)]
|
|
pub fn folding_method(self) -> fn(u64, char) -> u64 {
|
|
match self {
|
|
Radix::Hex => {
|
|
fn fold(acc: u64, c: char) -> u64 {
|
|
let digit = match c {
|
|
'0'..='9' => c as u8 - b'0',
|
|
'a'..='f' => c as u8 - b'a',
|
|
'A'..='F' => c as u8 - b'A',
|
|
_ => unreachable!(),
|
|
};
|
|
acc * 16 + digit as u64
|
|
}
|
|
fold
|
|
}
|
|
Radix::Bin => {
|
|
fn fold(acc: u64, c: char) -> u64 {
|
|
let digit = match c {
|
|
'0'..='1' => c as u8 - b'0',
|
|
_ => unreachable!(),
|
|
};
|
|
acc * 2 + digit as u64
|
|
}
|
|
fold
|
|
}
|
|
Radix::Dec => {
|
|
fn fold(acc: u64, c: char) -> u64 {
|
|
let digit = match c {
|
|
'0'..='9' => c as u8 - b'0',
|
|
_ => unreachable!(),
|
|
};
|
|
acc * 10 + digit as u64
|
|
}
|
|
fold
|
|
}
|
|
Radix::Oct => {
|
|
fn fold(acc: u64, c: char) -> u64 {
|
|
let digit = match c {
|
|
'0'..='7' => c as u8 - b'0',
|
|
_ => unreachable!(),
|
|
};
|
|
acc * 8 + digit as u64
|
|
}
|
|
fold
|
|
}
|
|
}
|
|
}
|
|
pub fn is_digit(self) -> fn(char) -> bool {
|
|
match self {
|
|
Radix::Hex => is_things::is_hex_digit,
|
|
Radix::Bin => is_things::is_bin_digit,
|
|
Radix::Oct => is_things::is_oct_digit,
|
|
Radix::Dec => is_things::is_digit,
|
|
}
|
|
}
|
|
}
|
|
|
|
// where DIGIT is defined by radix:
|
|
// DIGITS <-
|
|
// if allow_leading_underscore: `_`* DIGIT (DIGIT|`_`)*
|
|
// else: DIGIT (DIGIT|`_`)*
|
|
fn parse_digit_part(
|
|
source: &mut Source,
|
|
allow_leading_underscore: bool,
|
|
radix: Radix,
|
|
) -> Result<()> {
|
|
let is_digit = radix.is_digit();
|
|
|
|
if allow_leading_underscore {
|
|
let _underscore = source.take_while_ref(|&c| c == '_').count();
|
|
}
|
|
let _need_digit = source.next_if(|&c| is_digit(c)).ok_or_else(|| {
|
|
if source.peek() == Some(&'_') {
|
|
Error::NumericalConstantDigitLeadingUnderscore
|
|
} else {
|
|
Error::NumericalConstantDigitNoDigit
|
|
}
|
|
})?;
|
|
let _rest = source.take_while_ref(|&c| is_digit(c) || c == '_').count();
|
|
|
|
Ok(())
|
|
}
|
|
|
|
// IntegralType <-
|
|
// ( 'u' | 'i' ) DIGITS+
|
|
fn try_parse_integral_type(source: &mut Source) -> Result<Option<()>> {
|
|
if !source.next_if(|&c| c == 'u' || c == 'i').is_some() {
|
|
return Ok(None);
|
|
}
|
|
|
|
if source.take_while_ref(|&c| is_things::is_digit(c)).count() <= 0 {
|
|
return Err(Error::IntegralTypeExpectedDigit);
|
|
};
|
|
|
|
Ok(Some(()))
|
|
}
|
|
|
|
// returns `Err(E)` if it failed to parse.
|
|
// returns `Ok(None)` if no exp part was found.
|
|
// returns `Ok(Some(()))` if an exp part was found and parsed.
|
|
//
|
|
// EXP_PART <-
|
|
// (`e`|`E`) (`-`|`+`)? DEC_DIGITS
|
|
fn try_parse_exp_part(source: &mut Source) -> Result<Option<()>> {
|
|
if source.next_if(|&c| c.to_ascii_lowercase() == 'e').is_some() {
|
|
let _sign = source.next_if(|&c| c == '-' || c == '+');
|
|
if source
|
|
.take_while_ref(|&c| is_things::is_digit(c))
|
|
.count()
|
|
.lt(&1)
|
|
{
|
|
// need digits following exp notation
|
|
Err(Error::FloatingConstantExpPartNoDigit)
|
|
} else {
|
|
Ok(Some(()))
|
|
}
|
|
} else {
|
|
Ok(None)
|
|
}
|
|
}
|
|
|
|
// CONSTANT <-
|
|
// DEC_DIGITS IntegralType?
|
|
// `0x` HEX_DIGITS IntegralType?
|
|
// `0b` BIN_DIGITS IntegralType?
|
|
// `0o` OCT_DIGITS IntegralType?
|
|
// DEC_DIGITS FloatingType?
|
|
// `.` DEC_DIGITS EXP_PART? FloatingType?
|
|
// DEC_DIGITS `.` DEC_DIGITS? EXP_PART? FloatingType?
|
|
fn parse_constant_inner(source: &mut Source) -> Result<Token> {
|
|
let zero = source.next_if(|&c| c == '0').is_some();
|
|
|
|
let radix = zero
|
|
.then(|| source.next_if_map(|c| Radix::from_char(c)))
|
|
.flatten();
|
|
|
|
if let Some(radix) = radix {
|
|
parse_digit_part(source, false, radix)?;
|
|
if source.peek().map(|&c| c == 'u' || c == 'i') == Some(true) {
|
|
try_parse_integral_type(source)?;
|
|
}
|
|
return Ok(radix.to_token());
|
|
}
|
|
|
|
// if zero: `_`* DIGIT (DIGIT|`_`)*
|
|
// else: DIGIT (DIGIT|`_`)*
|
|
_ = match parse_digit_part(source, zero, Radix::Dec) {
|
|
Ok(_) => Ok(()),
|
|
Err(Error::NumericalConstantDigitNoDigit) if zero => Ok(()),
|
|
Err(e) => Err(e),
|
|
}?;
|
|
|
|
if let Some(_) = source.try_map_iter_if(|source| try_parse_integral_type(source))? {
|
|
return Ok(Token::IntegerConstant);
|
|
}
|
|
|
|
let dot = source.next_if(|&c| c == '.').is_some();
|
|
|
|
if dot {
|
|
parse_digit_part(source, false, Radix::Dec)?;
|
|
}
|
|
|
|
// parse exp notation
|
|
let exp = try_parse_exp_part(source)?.is_some();
|
|
|
|
// trailing FloatingType?
|
|
let floating = if source.next_if(|&c| c == 'f').is_some() {
|
|
let digits = source.next_tuple::<(char, char)>();
|
|
if !(digits == Some(('6', '4')) || digits == Some(('3', '2'))) {
|
|
// need either f64 or f32 here!
|
|
return Err(Error::FloatingConstantInvalidTrailingType);
|
|
}
|
|
true
|
|
} else {
|
|
false
|
|
};
|
|
|
|
let token = match (dot, exp, floating) {
|
|
(false, false, false) => Token::IntegerConstant,
|
|
(true, false, _) => Token::DotFloatingConstant,
|
|
(true, true, _) => Token::DotFloatingExpConstant,
|
|
(false, true, _) => Token::FloatingExpConstant,
|
|
(false, _, _) => Token::FloatingConstant,
|
|
};
|
|
|
|
Ok(token)
|
|
}
|
|
|
|
pub(crate) fn parse_constant(source: &mut Source) -> Result<Token> {
|
|
let constant = parse_constant_inner(source)?;
|
|
// char following a constant must not be id_continue
|
|
if source
|
|
.peek()
|
|
.map(|&c| is_things::is_id_continue(c))
|
|
.unwrap_or(false)
|
|
{
|
|
return Err(Error::NumericalConstantUnknownSuffix);
|
|
}
|
|
|
|
Ok(constant)
|
|
}
|
|
|
|
pub(crate) fn parse_string_or_char_constant(source: &mut Source) -> Result<Token> {
|
|
let quote = source
|
|
.next_if(|&c| c == '"' || c == '\'')
|
|
.ok_or(Error::InvalidToken)?;
|
|
|
|
let is_char = quote == '\'';
|
|
|
|
let mut escaped = false;
|
|
let mut closed = false;
|
|
|
|
while let Some(c) = source.next() {
|
|
if escaped {
|
|
// accept any escaped char
|
|
escaped = false;
|
|
continue;
|
|
}
|
|
if c == '\\' {
|
|
escaped = true;
|
|
continue;
|
|
}
|
|
if c == quote {
|
|
closed = true;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if !closed {
|
|
return Err(Error::StringError("Unterminated string/char.".into()));
|
|
}
|
|
|
|
if is_char {
|
|
Ok(Token::CharConstant)
|
|
} else {
|
|
Ok(Token::StringConstant)
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
|
|
use super::*;
|
|
|
|
fn make_source(s: &'_ str) -> Source<'_> {
|
|
s.chars().peekable().into()
|
|
}
|
|
|
|
#[test]
|
|
fn parse_constant_number() {
|
|
assert_eq!(
|
|
parse_constant(&mut make_source("0x1A3F_u32")),
|
|
Ok(Token::IntegerHexConstant)
|
|
);
|
|
assert_eq!(
|
|
parse_constant(&mut make_source("13f32")),
|
|
Ok(Token::FloatingConstant)
|
|
);
|
|
|
|
assert_eq!(
|
|
parse_constant(&mut make_source("0b1011_0010i16")),
|
|
Ok(Token::IntegerBinConstant)
|
|
);
|
|
assert_eq!(
|
|
parse_constant(&mut make_source("0o755u8")),
|
|
Ok(Token::IntegerOctConstant)
|
|
);
|
|
assert_eq!(
|
|
parse_constant(&mut make_source("42i64")),
|
|
Ok(Token::IntegerConstant)
|
|
);
|
|
assert_eq!(
|
|
parse_constant(&mut make_source("3.14f64")),
|
|
Ok(Token::DotFloatingConstant)
|
|
);
|
|
assert_eq!(
|
|
parse_constant(&mut make_source("2.71828e0f32")),
|
|
Ok(Token::DotFloatingExpConstant)
|
|
);
|
|
assert_eq!(
|
|
parse_constant(&mut make_source("22e23")),
|
|
Ok(Token::FloatingExpConstant)
|
|
);
|
|
}
|
|
}
|