SeaLang/src/lexer.rs
Janis 010e6d2bec LANGUAGE CHANGES
prefix binds tighter than as-expr, because &a as *u32 should be valid syntax
i think it can actually do pointer arithmetic somewhat validly now?
2024-08-25 03:09:54 +02:00

1113 lines
32 KiB
Rust

use std::fmt::Display;
use crate::tokens::Token;
use crate::tokens::TokenPos;
use itertools::Itertools;
use crate::common::FallibleParse;
use crate::common::NextIf;
#[derive(Debug, thiserror::Error)]
pub enum Error {
#[error("{0}")]
StringError(String),
#[error("Exp part of floating constant had no digits.")]
FloatingConstantExpPartNoDigit,
#[error("constant cannot start with leading underscore '_'.")]
NumericalConstantDigitLeadingUnderscore,
#[error("Expected digit here for constant.")]
NumericalConstantDigitNoDigit,
#[error("Expected digit here for integer constant.")]
IntegralTypeExpectedDigit,
#[error("Floating constant has invalid trailing type.")]
FloatingConstantInvalidTrailingType,
#[error("Invalid token.")]
InvalidToken,
#[error("Identifier starts with invalid character.")]
ExpectedIdStartForIdentifier,
#[error("Unknown suffix in constant.")]
NumericalConstantUnknownSuffix,
}
pub type Result<T> = core::result::Result<T, Error>;
#[derive(Debug, Clone)]
pub struct Chars<'a> {
bytes: &'a [u8],
offset: usize,
}
impl<'a> Chars<'a> {
pub fn as_str(&self) -> &str {
let offset = self.offset.min(self.num_bytes());
unsafe { core::str::from_utf8_unchecked(&self.bytes[offset..]) }
}
pub fn seek(&mut self, offset: u32) {
self.offset = offset as usize;
}
pub fn num_bytes(&self) -> usize {
self.bytes.len()
}
pub fn is_eof(&self) -> bool {
self.offset >= self.bytes.len()
}
pub fn peek(&self) -> Option<char> {
self.clone().next()
}
pub fn position(&self) -> u32 {
self.offset() as u32
}
pub fn offset(&self) -> usize {
self.offset
}
pub fn get_source_span(&self, start: u32, end: u32) -> std::ops::Range<SourceLocation> {
let (start_l, start_c) = {
let range = self.get_from_to(0, start);
range.chars().fold((1u32, 0u32), |(line, col), c| {
if c == '\n' {
(line + 1, 0)
} else {
(line, col + 1)
}
})
};
let (end_l, end_c) = {
let range = self.get_from_to(start, end);
range.chars().fold((start_l, start_c), |(line, col), c| {
if c == '\n' {
(line + 1, 0)
} else {
(line, col + 1)
}
})
};
core::ops::Range {
start: SourceLocation::new(start_l, start_c),
end: SourceLocation::new(end_l, end_c),
}
}
pub fn get_lines(&self, start: u32, end: u32) -> &str {
let range = self.get_from_to(0, start);
let start = range
.char_indices()
.rev()
.skip_while(|&(_, c)| c != '\n')
.next()
.map(|(idx, c)| idx + c.len_utf8())
.unwrap_or(0);
let range = self.get_from_to(end, self.num_bytes() as u32);
let end = range
.char_indices()
.skip_while(|&(_, c)| c != '\n')
.next()
.map(|(idx, _)| idx as u32 + end)
.unwrap_or(self.num_bytes() as u32);
self.get_from_to(start as u32, end as u32)
}
pub fn get_range(&self, range: core::ops::Range<u32>) -> &str {
unsafe {
core::str::from_utf8_unchecked(&self.bytes[range.start as usize..range.end as usize])
}
}
pub fn get_from_to(&self, start: u32, end: u32) -> &str {
unsafe { core::str::from_utf8_unchecked(&self.bytes[start as usize..end as usize]) }
}
fn next_char(&mut self) -> Option<char> {
let ch = self.as_str().chars().next()?;
self.offset += ch.len_utf8();
Some(ch)
}
}
impl<'a> Iterator for Chars<'a> {
type Item = char;
fn next(&mut self) -> Option<Self::Item> {
self.next_char()
}
}
#[derive(Debug, Clone)]
pub struct Tokenizer<'a> {
source: Chars<'a>,
tokens: Vec<TokenPos>,
}
#[derive(Debug, Clone)]
pub struct TokenIterator<'a> {
tokenizer: &'a Tokenizer<'a>,
offset: usize,
}
impl<'a> TokenIterator<'a> {
pub fn expect_token(&mut self, token: Token) -> crate::parser::Result<TokenItem<'a>> {
self.next_if(|item| item.token() == token)
.ok_or(crate::parser::Error::ExpectedTokenNotFound(token))
}
pub fn eat_token(&mut self, token: Token) -> Option<TokenItem<'a>> {
self.next_if(|item| item.token() == token)
}
pub fn peek_token(&mut self) -> Option<TokenItem<'a>> {
self.clone().next()
}
pub fn peek_token_or_err(&mut self) -> crate::parser::Result<TokenItem<'a>> {
self.clone()
.next()
.ok_or(crate::parser::Error::UnexpectedEndOfTokens)
}
pub fn peek_expect_token(&mut self, token: Token) -> crate::parser::Result<TokenItem<'a>> {
self.clone()
.next()
.ok_or(crate::parser::Error::ExpectedTokenNotFound(token))
}
pub fn is_next_token(&mut self, token: Token) -> bool {
self.clone().next_if(|item| item.token() == token).is_some()
}
pub fn is_next_token2(&mut self, token: Token) -> bool {
self.clone()
.skip(1)
.next_if(|item| item.token() == token)
.is_some()
}
}
#[derive(Debug)]
pub struct TokenItem<'a> {
tokenizer: &'a Tokenizer<'a>,
inner: TokenPos,
}
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone, Copy, Hash)]
pub struct SourceLocation {
pub line: u32,
pub column: u32,
}
impl Display for SourceLocation {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "l:{},c:{}", self.line, self.column)
}
}
impl SourceLocation {
pub fn new(line: u32, column: u32) -> Self {
Self { line, column }
}
pub fn squiggle_line(this: core::ops::Range<Self>, lines: &str) {
let lines = lines.lines();
let squiggle_lines = this.end.line - this.start.line;
for (i, line) in lines.enumerate() {
println!("{line}");
let squiggle_range = {
let start = if i == 0 { this.start.column } else { 0 };
let end = if i as u32 + 1 == squiggle_lines {
this.end.column
} else {
line.len() as u32
};
start..end
};
if !squiggle_range.is_empty() {
for _ in 0..squiggle_range.start {
print!(" ");
}
print!("{}", ansi_term::Colour::Red.paint("^"));
for _ in squiggle_range.start..(squiggle_range.end - 1) {
print!("{}", ansi_term::Colour::Red.paint("~"));
}
println!();
}
}
}
}
impl<'a> TokenItem<'a> {
pub fn token(&self) -> Token {
self.inner.token
}
pub fn lexeme(&self) -> &str {
self.tokenizer
.source
.get_from_to(self.inner.start, self.inner.end)
}
pub fn source_location(&self) -> std::ops::Range<SourceLocation> {
self.tokenizer
.source
.get_source_span(self.inner.start, self.inner.end)
}
}
impl<'a> Iterator for TokenIterator<'a> {
type Item = TokenItem<'a>;
fn next(&mut self) -> Option<Self::Item> {
if self.offset >= self.tokenizer.tokens.len() {
None
} else {
let index = self.offset;
self.offset += 1;
match self.tokenizer.tokens[index].token {
Token::SlashSlash
| Token::SlashSlashSlash
| Token::SlashStar
| Token::SlashStarStar
| Token::Comment
| Token::DocComment => self.next(),
_ => Some(Self::Item {
tokenizer: self.tokenizer,
inner: self.tokenizer.tokens[index],
}),
}
}
}
}
macro_rules! next_or_eof {
($expr:expr) => {
match $expr.next() {
Some(c) => c,
None => {
return Ok(Token::Eof);
}
}
};
(?$expr:expr) => {
match $expr.peek() {
Some(c) => c,
None => {
return Ok(Token::Eof);
}
}
};
}
macro_rules! residual {
(ok: $expr:expr) => {
match $expr {
Ok(t) => t,
Err(e) => {
return Err(e);
}
}
};
(none: $expr:expr) => {
match $expr {
Ok(Some(t)) => {
return Ok(Some(t));
}
Ok(val) => val,
Err(e) => {
return Err(e);
}
}
};
(flatten: none: $expr:expr) => {
match $expr {
Ok(Some(t)) => {
return Ok(t);
}
Ok(val) => val,
Err(e) => {
return Err(e);
}
}
};
(some: $expr:expr) => {
match $expr {
Ok(Some(t)) => t,
Ok(None) => {
return Ok(None);
}
Err(e) => {
return Err(e);
}
}
};
}
pub struct TokenizeError {
pub err: Error,
pub range: core::ops::Range<u32>,
}
impl<'a> Tokenizer<'a> {
pub fn iter(&self) -> TokenIterator {
TokenIterator {
tokenizer: self,
offset: 0,
}
}
pub fn src(&self) -> &Chars<'a> {
&self.source
}
pub fn new_with_errors(
bytes: &'a [u8],
) -> core::result::Result<Self, (Self, Vec<TokenizeError>)> {
let mut this = Self {
source: Chars { bytes, offset: 0 },
tokens: Vec::new(),
};
let mut errors = Vec::new();
loop {
if this.source.is_eof() {
break;
}
let start = this.source.position();
match this.next_token() {
Ok(_) => {}
Err(e) => {
// let is_quoted = this
// .source
// .get_range(start, this.source.bytes.len() as u32)
// .chars()
// .take_while_ref(|&c| crate::common::is_whitespace(c))
// .next()
// .map(|c| c == '\'' || c == '"')
// .unwrap_or(false);
let end = this.source.position();
if this.source.peek().map(|c| crate::common::is_whitespace(c)) != Some(true) {
this.source
.take_while_ref(|&c| !crate::common::is_whitespace(c))
.count();
}
_ = this.push_token(Token::ParseError, start, end);
errors.push(TokenizeError {
err: e,
range: start..end,
});
}
}
}
if errors.is_empty() {
Ok(this)
} else {
Err((this, errors))
}
}
pub fn new(bytes: &'a [u8]) -> Result<Tokenizer<'a>> {
let mut this = Self {
source: Chars { bytes, offset: 0 },
tokens: Vec::new(),
};
loop {
if this.source.is_eof() {
break;
}
this.next_token().map_err(|e| {
eprintln!("error while tokenizing: {e}");
eprintln!(
"at position {}: {}",
this.source.offset(),
&this.source.as_str()[..this.source.as_str().len().min(16)]
);
e
})?;
}
Ok(this)
}
fn push_token(&mut self, token: Token, start: u32, end: u32) -> Result<()> {
self.tokens.push(TokenPos::new(token, start, end));
Ok(())
}
fn next_token(&mut self) -> Result<()> {
self.source
.take_while_ref(|&c| crate::common::is_whitespace(c))
.count();
if self.source.is_eof() {
return Ok(());
}
let start = self.source.position();
let token = {
let mut peeking = self.source.clone();
match peeking.next() {
Some('0'..='9') => Some(parse_constant(&mut self.source)?),
Some('.') if peeking.next().map(|c| crate::common::is_digit(c)) == Some(true) => {
Some(parse_constant(&mut self.source)?)
}
_ => None,
}
};
if let Some(token) = token {
return self.push_token(token, start, self.source.position());
}
// lexical tokens
let token = crate::tokens::LexemeParser::parse(self.source.clone());
if let Some(token) = token {
_ = self.source.advance_by(token.lexeme_len());
match token {
Token::SlashSlash | Token::SlashSlashSlash => {
_ = self.push_token(token, start, self.source.position());
let start = self.source.position();
loop {
// advance until either EOF or newline
let Some(ch) = self.source.next() else {
break;
};
if ch == '\n' {
break;
}
}
let end = self.source.position() - 1;
return self.push_token(
if token == Token::SlashSlash {
Token::Comment
} else {
Token::DocComment
},
start,
end,
);
}
Token::SlashStar | Token::SlashStarStar => {
let start = self.source.position();
let mut end = self.source.position();
let mut last = self.source.next();
loop {
// break out of loop if EOF
let Some(l) = last.replace(match self.source.next() {
Some(ch) => ch,
None => {
break;
}
}) else {
break;
};
// break out of loop if end of comment
if (l, last.unwrap()) == ('*', '/') {
break;
}
end = self.source.position() - 1;
}
return self.push_token(
if token == Token::SlashStar {
Token::Comment
} else {
Token::DocComment
},
start,
end,
);
}
_ => {}
}
if token.maybe_ident() {
if self
.source
.take_while_ref(|&c| crate::common::is_id_continue(c))
.count()
.gt(&0)
{
return self.push_token(Token::Ident, start, self.source.position());
}
}
return self.push_token(token, start, self.source.position());
}
self.source
.next_if(|&c| crate::common::is_id_start(c))
.ok_or(Error::ExpectedIdStartForIdentifier)?;
self.source
.take_while_ref(|&c| crate::common::is_id_continue(c))
.count();
return self.push_token(Token::Ident, start, self.source.position());
}
}
/// IntegralType <-
/// ( 'u' | 'i' ) DIGITS+
fn try_parse_integral_type(source: &mut Chars) -> Result<Option<()>> {
if !source.next_if(|&c| c == 'u' || c == 'i').is_some() {
return Ok(None);
}
if source
.take_while_ref(|&c| crate::common::is_digit(c))
.count()
<= 0
{
return Err(Error::IntegralTypeExpectedDigit);
};
Ok(Some(()))
}
pub mod bigint {
use super::Radix;
pub struct BigInt(Vec<u32>);
impl BigInt {
pub fn parse_digits<C: IntoIterator<Item = char>>(text: C, radix: Radix) -> BigInt {
parse_bigint(text.into_iter(), radix)
}
pub fn bit_width(&self) -> u32 {
count_bits(&self.0)
}
pub fn from_bytes_le(bytes: &[u8]) -> BigInt {
let data = bytes
.chunks(4)
.map(|chunk| {
let mut int = [0u8; 4];
int[..chunk.len()].copy_from_slice(chunk);
u32::from_le_bytes(int)
})
.collect::<Vec<_>>();
BigInt(data)
}
pub fn into_bytes_le(&self) -> Vec<u8> {
let mut bytes = Vec::<u8>::new();
for d in &self.0[..] {
bytes.extend(&d.to_le_bytes());
}
let count = bytes.iter().rev().take_while(|&&b| b == 0).count();
bytes.truncate(bytes.len() - count);
bytes
}
}
impl core::ops::Add for BigInt {
type Output = Self;
fn add(mut self, mut rhs: Self) -> Self::Output {
let (mut digits, carry) = if self.0.len() > rhs.0.len() {
let c = add_bigint(&mut self.0, &rhs.0);
(self.0, c)
} else {
let c = add_bigint(&mut rhs.0, &self.0);
(rhs.0, c)
};
if carry {
digits.push(u32::from(carry));
}
BigInt(digits)
}
}
impl core::ops::Sub for BigInt {
type Output = Self;
fn sub(mut self, rhs: Self) -> Self::Output {
if self.0.len() < rhs.0.len() {
println!("extending self by {} zeroes", rhs.0.len() - self.0.len());
self.0
.extend(core::iter::repeat(0).take(rhs.0.len() - self.0.len()));
println!("self: {self:?}");
}
sub_bigint(&mut self.0, &rhs.0);
self
}
}
impl core::fmt::Debug for BigInt {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let mut list = f.debug_list();
list.entries(self.0.iter()).finish()
}
}
/// counts used bits in a u32 slice, discards leading zeros in MSB.
/// `[0xff,0xff,0x00,0x00]` -> 16
/// `[0xff,0xff,0x00]` -> 16
/// `[0xff,0xff,0x0f]` -> 20
pub fn count_bits(bytes: &[u32]) -> u32 {
let mut bits = bytes.len() as u32;
for d in bytes.iter().rev() {
if *d == 0 {
bits -= u32::BITS;
} else {
bits -= d.leading_zeros();
break;
}
}
bits
}
#[test]
fn test_count_bits() {
assert_eq!(count_bits(&[0xffffffff, 0x00, 0x00]), 32);
assert_eq!(count_bits(&[0xffffffff, 0xff, 0x00]), 40);
assert_eq!(count_bits(&[0xffffffff, 0xff]), 40);
assert_eq!(count_bits(&[0xffffffff, 0xff, 0xffff]), 64 + 16);
}
#[allow(unused)]
/// lhs must be bigger than rhs
fn sub_bigint(lhs: &mut [u32], rhs: &[u32]) {
let len = lhs.len().min(rhs.len());
let (l_lo, l_hi) = lhs.split_at_mut(len);
let (r_lo, r_hi) = rhs.split_at(len);
println!("lhs: {{ lo: {l_lo:?}, hi: {l_hi:?} }}");
println!("rhs: {{ lo: {r_lo:?}, hi: {r_hi:?} }}");
let mut borrow = false;
for (lhs, rhs) in l_lo.iter_mut().zip(r_lo) {
(*lhs, borrow) = lhs.borrowing_sub(*rhs, borrow);
}
if borrow {
for lhs in l_hi {
(*lhs, borrow) = lhs.borrowing_sub(0, borrow);
}
}
if borrow || !r_hi.iter().all(|&v| v == 0) {
panic!("sub failed: borrow: {borrow}");
}
}
/// lhs must be bigger than rhs
fn add_bigint(lhs: &mut [u32], rhs: &[u32]) -> bool {
let (l_lo, l_hi) = lhs.split_at_mut(rhs.len());
let mut carry = false;
for (lhs, rhs) in l_lo.iter_mut().zip(rhs) {
(*lhs, carry) = lhs.carrying_add(*rhs, carry);
}
if carry {
for d in l_hi.iter_mut() {
(*d, carry) = d.carrying_add(0, carry);
if !carry {
break;
}
}
}
carry
}
fn parse_bigint(text: impl Iterator<Item = char>, radix: Radix) -> BigInt {
let digits = text
.filter_map(|c| match c {
'_' => None,
c => Some(radix.map_digit(c)),
})
.collect::<Vec<_>>();
let (max, power) = {
let radix = radix.radix() as u64;
let mut power = 1;
let mut base = radix;
while let Some(b) = base.checked_mul(radix) {
if b > u32::MAX as u64 {
break;
}
base = b;
power += 1;
}
(base, power)
};
let radix = radix.radix() as u32;
let r = digits.len() % power;
let i = if r == 0 { power } else { r };
let (head, tail) = digits.split_at(i);
let first = head
.iter()
.fold(0, |acc, &digit| acc * radix + digit as u32);
let mut data = vec![first];
for chunk in tail.chunks(power) {
if data.last() != Some(&0) {
data.push(0);
}
let mut carry = 0u64;
for digit in data.iter_mut() {
carry += *digit as u64 * max as u64;
*digit = carry as u32;
carry >>= u32::BITS;
}
assert!(carry == 0);
let next = chunk
.iter()
.fold(0, |acc, &digit| acc * radix + digit as u32);
let (res, mut carry) = data[0].carrying_add(next, false);
data[0] = res;
if carry {
for digit in data[1..].iter_mut() {
(*digit, carry) = digit.carrying_add(0, carry);
if !carry {
break;
}
}
}
}
BigInt(data)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn parse() {
let bigint = super::parse_bigint("2_cafe_babe_dead_beef".chars(), Radix::Hex);
println!("{:#x?}", bigint);
let bigint = super::parse_bigint("f".chars(), Radix::Hex);
println!("{:#x?}", bigint);
}
#[test]
fn add() {
let a = super::parse_bigint("2_0000_0000_0000_0000".chars(), Radix::Hex);
println!("{:#x?}", a);
let b = super::parse_bigint("cafebabe".chars(), Radix::Hex);
println!("{:#x?}", b);
let sum = a + b;
println!("{:#x?}", sum);
}
#[test]
fn sub() {
let a = super::parse_bigint("2_0000_0000_0000_0000".chars(), Radix::Hex);
println!("{:#x?}", a);
let b = super::parse_bigint("ffff_ffff".chars(), Radix::Hex);
println!("{:#x?}", b);
let sum = a - b;
println!("{:#x?}", sum);
}
#[test]
fn overflowing_sub() {
let a = super::parse_bigint("2_0000_0000_0000_0000".chars(), Radix::Hex);
println!("{:#x?}", a);
let b = super::parse_bigint("ffff_ffff".chars(), Radix::Hex);
println!("{:#x?}", b);
let sum = b - a;
println!("{:#x?}", sum);
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Radix {
Hex,
Bin,
Dec,
Oct,
}
impl Radix {
#[allow(unused)]
/// must be called with one of `['b','x','d','o']`
unsafe fn from_char_unchecked(c: char) -> Self {
match c.to_ascii_lowercase() {
'o' => Self::Oct,
'b' => Self::Bin,
'x' => Self::Hex,
'd' => Self::Dec,
_ => unreachable!(),
}
}
fn from_char(c: char) -> Option<Self> {
match c.to_ascii_lowercase() {
'o' => Some(Self::Oct),
'b' => Some(Self::Bin),
'x' => Some(Self::Hex),
'd' => Some(Self::Dec),
_ => None,
}
}
#[allow(unused)]
fn radix(self) -> u8 {
match self {
Radix::Hex => 16,
Radix::Bin => 2,
Radix::Oct => 8,
Radix::Dec => 10,
}
}
fn to_token(self) -> Token {
match self {
Radix::Hex => Token::IntegerHexConstant,
Radix::Bin => Token::IntegerBinConstant,
Radix::Oct => Token::IntegerOctConstant,
Radix::Dec => Token::IntegerConstant,
}
}
pub fn from_token(token: Token) -> Option<Self> {
match token {
Token::IntegerHexConstant => Some(Radix::Hex),
Token::IntegerBinConstant => Some(Radix::Bin),
Token::IntegerOctConstant => Some(Radix::Oct),
Token::IntegerConstant => Some(Radix::Dec),
_ => None,
}
}
pub fn map_digit(self, c: char) -> u8 {
match self {
Radix::Hex => match c {
'0'..='9' => c as u8 - b'0',
'a'..='f' => 10 + c as u8 - b'a',
'A'..='F' => 10 + c as u8 - b'A',
_ => unreachable!(),
},
Radix::Bin => match c {
'0'..='1' => c as u8 - b'0',
_ => unreachable!(),
},
Radix::Dec => match c {
'0'..='9' => c as u8 - b'0',
_ => unreachable!(),
},
Radix::Oct => match c {
'0'..='7' => c as u8 - b'0',
_ => unreachable!(),
},
}
}
pub fn folding_method(self) -> fn(u64, char) -> u64 {
match self {
Radix::Hex => {
fn fold(acc: u64, c: char) -> u64 {
let digit = match c {
'0'..='9' => c as u8 - b'0',
'a'..='f' => c as u8 - b'a',
'A'..='F' => c as u8 - b'A',
_ => unreachable!(),
};
acc * 16 + digit as u64
}
fold
}
Radix::Bin => {
fn fold(acc: u64, c: char) -> u64 {
let digit = match c {
'0'..='1' => c as u8 - b'0',
_ => unreachable!(),
};
acc * 2 + digit as u64
}
fold
}
Radix::Dec => {
fn fold(acc: u64, c: char) -> u64 {
let digit = match c {
'0'..='9' => c as u8 - b'0',
_ => unreachable!(),
};
acc * 10 + digit as u64
}
fold
}
Radix::Oct => {
fn fold(acc: u64, c: char) -> u64 {
let digit = match c {
'0'..='7' => c as u8 - b'0',
_ => unreachable!(),
};
acc * 8 + digit as u64
}
fold
}
}
}
pub fn is_digit(self) -> fn(char) -> bool {
match self {
Radix::Hex => crate::common::is_hex_digit,
Radix::Bin => crate::common::is_bin_digit,
Radix::Oct => crate::common::is_oct_digit,
Radix::Dec => crate::common::is_digit,
}
}
}
/// where DIGIT is defined by radix:
/// DIGITS <-
/// if allow_leading_underscore: `_`* DIGIT (DIGIT|`_`)*
/// else: DIGIT (DIGIT|`_`)*
fn parse_digit_part(
source: &mut Chars,
allow_leading_underscore: bool,
radix: Radix,
) -> Result<()> {
let is_digit = radix.is_digit();
if allow_leading_underscore {
let _underscore = source.take_while_ref(|&c| c == '_').count();
}
let _need_digit = source.next_if(|&c| is_digit(c)).ok_or_else(|| {
if source.peek() == Some('_') {
Error::NumericalConstantDigitLeadingUnderscore
} else {
Error::NumericalConstantDigitNoDigit
}
})?;
let _rest = source.take_while_ref(|&c| is_digit(c) || c == '_').count();
Ok(())
}
/// returns `Err(E)` if it failed to parse.
/// returns `Ok(None)` if no exp part was found.
/// returns `Ok(Some(()))` if an exp part was found and parsed.
///
/// EXP_PART <-
/// (`e`|`E`) (`-`|`+`)? DEC_DIGITS
fn try_parse_exp_part(source: &mut Chars) -> Result<Option<()>> {
if source.next_if(|&c| c.to_ascii_lowercase() == 'e').is_some() {
let _sign = source.next_if(|&c| c == '-' || c == '+');
if source
.take_while_ref(|&c| crate::common::is_digit(c))
.count()
.lt(&1)
{
// need digits following exp notation
Err(Error::FloatingConstantExpPartNoDigit)
} else {
Ok(Some(()))
}
} else {
Ok(None)
}
}
/// CONSTANT <-
/// DEC_DIGITS IntegralType?
/// `0x` HEX_DIGITS IntegralType?
/// `0b` BIN_DIGITS IntegralType?
/// `0o` OCT_DIGITS IntegralType?
/// DEC_DIGITS FloatingType?
/// `.` DEC_DIGITS EXP_PART? FloatingType?
/// DEC_DIGITS `.` DEC_DIGITS? EXP_PART? FloatingType?
fn parse_constant_inner(source: &mut Chars) -> Result<Token> {
let zero = source.next_if(|&c| c == '0').is_some();
let radix = zero
.then(|| source.next_if_map(|c| Radix::from_char(c)))
.flatten();
if let Some(radix) = radix {
parse_digit_part(source, false, radix)?;
if source.peek().map(|c| c == 'u' || c == 'i') == Some(true) {
try_parse_integral_type(source)?;
}
return Ok(radix.to_token());
}
// if zero: `_`* DIGIT (DIGIT|`_`)*
// else: DIGIT (DIGIT|`_`)*
_ = match parse_digit_part(source, zero, Radix::Dec) {
Ok(_) => Ok(()),
Err(Error::NumericalConstantDigitNoDigit) if zero => Ok(()),
Err(e) => Err(e),
}?;
if let Some(_) = source.try_parse_result(|source| try_parse_integral_type(source))? {
return Ok(Token::IntegerConstant);
}
let dot = source.next_if(|&c| c == '.').is_some();
if dot {
parse_digit_part(source, false, Radix::Dec)?;
}
// parse exp notation
let exp = try_parse_exp_part(source)?.is_some();
// trailing FloatingType?
let floating = if source.next_if(|&c| c == 'f').is_some() {
let digits = source.next_tuple::<(char, char)>();
if !(digits == Some(('6', '4')) || digits == Some(('3', '2'))) {
// need either f64 or f32 here!
return Err(Error::FloatingConstantInvalidTrailingType);
}
true
} else {
false
};
let token = match (dot, exp, floating) {
(false, false, false) => Token::IntegerConstant,
(true, false, _) => Token::DotFloatingConstant,
(true, true, _) => Token::DotFloatingExpConstant,
(false, true, _) => Token::FloatingExpConstant,
(false, _, _) => Token::FloatingConstant,
};
Ok(token)
}
/// CONSTANT <-
/// DEC_DIGITS IntegralType?
/// `0x` HEX_DIGITS IntegralType?
/// `0b` BIN_DIGITS IntegralType?
/// `0o` OCT_DIGITS IntegralType?
/// DEC_DIGITS FloatingType?
/// `.` DEC_DIGITS EXP_PART? FloatingType?
/// DEC_DIGITS `.` DEC_DIGITS? EXP_PART? FloatingType?
fn parse_constant(source: &mut Chars) -> Result<Token> {
let constant = parse_constant_inner(source)?;
// char following a constant must not be id_continue
source
.peek()
.filter(|&c| !crate::common::is_id_continue(c))
.ok_or(Error::NumericalConstantUnknownSuffix)?;
Ok(constant)
}