853 lines
24 KiB
Rust
853 lines
24 KiB
Rust
use std::fmt::Display;
|
|
|
|
use crate::tokens::Token;
|
|
use crate::tokens::TokenPos;
|
|
use itertools::Itertools;
|
|
|
|
use crate::common::FallibleParse;
|
|
use crate::common::NextIf;
|
|
|
|
#[derive(Debug, thiserror::Error)]
|
|
pub enum Error {
|
|
#[error("{0}")]
|
|
StringError(String),
|
|
#[error("Exp part of floating constant had no digits.")]
|
|
FloatingConstantExpPartNoDigit,
|
|
#[error("constant cannot start with leading underscore '_'.")]
|
|
NumericalConstantDigitLeadingUnderscore,
|
|
#[error("Expected digit here for constant.")]
|
|
NumericalConstantDigitNoDigit,
|
|
#[error("Expected digit here for integer constant.")]
|
|
IntegralTypeExpectedDigit,
|
|
#[error("Floating constant has invalid trailing type.")]
|
|
FloatingConstantInvalidTrailingType,
|
|
#[error("Invalid token.")]
|
|
InvalidToken,
|
|
#[error("Identifier starts with invalid character.")]
|
|
ExpectedIdStartForIdentifier,
|
|
#[error("Unknown suffix in constant.")]
|
|
NumericalConstantUnknownSuffix,
|
|
}
|
|
|
|
pub type Result<T> = core::result::Result<T, Error>;
|
|
|
|
#[derive(Debug, Clone)]
|
|
pub struct Chars<'a> {
|
|
bytes: &'a [u8],
|
|
offset: usize,
|
|
}
|
|
|
|
impl<'a> Chars<'a> {
|
|
pub fn as_str(&self) -> &str {
|
|
let offset = self.offset.min(self.num_bytes());
|
|
unsafe { core::str::from_utf8_unchecked(&self.bytes[offset..]) }
|
|
}
|
|
|
|
pub fn seek(&mut self, offset: u32) {
|
|
self.offset = offset as usize;
|
|
}
|
|
|
|
pub fn num_bytes(&self) -> usize {
|
|
self.bytes.len()
|
|
}
|
|
|
|
pub fn is_eof(&self) -> bool {
|
|
self.offset >= self.bytes.len()
|
|
}
|
|
|
|
pub fn peek(&self) -> Option<char> {
|
|
self.clone().next()
|
|
}
|
|
|
|
pub fn position(&self) -> u32 {
|
|
self.offset() as u32
|
|
}
|
|
|
|
pub fn offset(&self) -> usize {
|
|
self.offset
|
|
}
|
|
|
|
pub fn get_source_span(&self, start: u32, end: u32) -> std::ops::Range<SourceLocation> {
|
|
let (start_l, start_c) = {
|
|
let range = self.get_from_to(0, start);
|
|
range.chars().fold((1u32, 0u32), |(line, col), c| {
|
|
if c == '\n' {
|
|
(line + 1, 0)
|
|
} else {
|
|
(line, col + 1)
|
|
}
|
|
})
|
|
};
|
|
let (end_l, end_c) = {
|
|
let range = self.get_from_to(start, end);
|
|
range.chars().fold((start_l, start_c), |(line, col), c| {
|
|
if c == '\n' {
|
|
(line + 1, 0)
|
|
} else {
|
|
(line, col + 1)
|
|
}
|
|
})
|
|
};
|
|
|
|
core::ops::Range {
|
|
start: SourceLocation::new(start_l, start_c),
|
|
end: SourceLocation::new(end_l, end_c),
|
|
}
|
|
}
|
|
|
|
pub fn get_lines(&self, start: u32, end: u32) -> &str {
|
|
let range = self.get_from_to(0, start);
|
|
let start = range
|
|
.char_indices()
|
|
.rev()
|
|
.skip_while(|&(_, c)| c != '\n')
|
|
.next()
|
|
.map(|(idx, c)| idx + c.len_utf8())
|
|
.unwrap_or(0);
|
|
|
|
let range = self.get_from_to(end, self.num_bytes() as u32);
|
|
let end = range
|
|
.char_indices()
|
|
.skip_while(|&(_, c)| c != '\n')
|
|
.next()
|
|
.map(|(idx, _)| idx as u32 + end)
|
|
.unwrap_or(self.num_bytes() as u32);
|
|
|
|
self.get_from_to(start as u32, end as u32)
|
|
}
|
|
|
|
pub fn get_range(&self, range: core::ops::Range<u32>) -> &str {
|
|
unsafe {
|
|
core::str::from_utf8_unchecked(&self.bytes[range.start as usize..range.end as usize])
|
|
}
|
|
}
|
|
|
|
pub fn get_from_to(&self, start: u32, end: u32) -> &str {
|
|
unsafe { core::str::from_utf8_unchecked(&self.bytes[start as usize..end as usize]) }
|
|
}
|
|
|
|
fn next_char(&mut self) -> Option<char> {
|
|
let ch = self.as_str().chars().next()?;
|
|
self.offset += ch.len_utf8();
|
|
Some(ch)
|
|
}
|
|
}
|
|
|
|
impl<'a> Iterator for Chars<'a> {
|
|
type Item = char;
|
|
|
|
fn next(&mut self) -> Option<Self::Item> {
|
|
self.next_char()
|
|
}
|
|
}
|
|
|
|
#[derive(Debug, Clone)]
|
|
pub struct Tokenizer<'a> {
|
|
source: Chars<'a>,
|
|
tokens: Vec<TokenPos>,
|
|
}
|
|
|
|
#[derive(Debug, Clone)]
|
|
pub struct TokenIterator<'a> {
|
|
tokenizer: &'a Tokenizer<'a>,
|
|
offset: usize,
|
|
}
|
|
|
|
impl<'a> TokenIterator<'a> {
|
|
pub fn expect_token(&mut self, token: Token) -> crate::parser::Result<TokenItem<'a>> {
|
|
self.next_if(|item| item.token() == token)
|
|
.ok_or(crate::parser::Error::ExpectedTokenNotFound(token))
|
|
}
|
|
|
|
pub fn eat_token(&mut self, token: Token) -> Option<TokenItem<'a>> {
|
|
self.next_if(|item| item.token() == token)
|
|
}
|
|
|
|
pub fn peek_token(&mut self) -> Option<TokenItem<'a>> {
|
|
self.clone().next()
|
|
}
|
|
pub fn peek_token_or_err(&mut self) -> crate::parser::Result<TokenItem<'a>> {
|
|
self.clone()
|
|
.next()
|
|
.ok_or(crate::parser::Error::UnexpectedEndOfTokens)
|
|
}
|
|
|
|
pub fn peek_expect_token(&mut self, token: Token) -> crate::parser::Result<TokenItem<'a>> {
|
|
self.clone()
|
|
.next()
|
|
.ok_or(crate::parser::Error::ExpectedTokenNotFound(token))
|
|
}
|
|
|
|
pub fn is_next_token(&mut self, token: Token) -> bool {
|
|
self.clone().next_if(|item| item.token() == token).is_some()
|
|
}
|
|
pub fn is_next_token2(&mut self, token: Token) -> bool {
|
|
self.clone()
|
|
.skip(1)
|
|
.next_if(|item| item.token() == token)
|
|
.is_some()
|
|
}
|
|
}
|
|
|
|
#[derive(Debug)]
|
|
pub struct TokenItem<'a> {
|
|
tokenizer: &'a Tokenizer<'a>,
|
|
inner: TokenPos,
|
|
}
|
|
|
|
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone, Copy, Hash)]
|
|
pub struct SourceLocation {
|
|
pub line: u32,
|
|
pub column: u32,
|
|
}
|
|
|
|
impl Display for SourceLocation {
|
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
|
write!(f, "l:{},c:{}", self.line, self.column)
|
|
}
|
|
}
|
|
|
|
impl SourceLocation {
|
|
pub fn new(line: u32, column: u32) -> Self {
|
|
Self { line, column }
|
|
}
|
|
|
|
pub fn squiggle_line(this: core::ops::Range<Self>, lines: &str) {
|
|
let lines = lines.lines();
|
|
let squiggle_lines = this.end.line - this.start.line;
|
|
|
|
for (i, line) in lines.enumerate() {
|
|
println!("{line}");
|
|
let squiggle_range = {
|
|
let start = if i == 0 { this.start.column } else { 0 };
|
|
let end = if i as u32 + 1 == squiggle_lines {
|
|
this.end.column
|
|
} else {
|
|
line.len() as u32
|
|
};
|
|
start..end
|
|
};
|
|
|
|
if !squiggle_range.is_empty() {
|
|
for _ in 0..squiggle_range.start {
|
|
print!(" ");
|
|
}
|
|
print!("{}", ansi_term::Colour::Red.paint("^"));
|
|
for _ in squiggle_range.start..(squiggle_range.end - 1) {
|
|
print!("{}", ansi_term::Colour::Red.paint("~"));
|
|
}
|
|
println!();
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
impl<'a> TokenItem<'a> {
|
|
pub fn token(&self) -> Token {
|
|
self.inner.token
|
|
}
|
|
|
|
pub fn lexeme(&self) -> &str {
|
|
self.tokenizer
|
|
.source
|
|
.get_from_to(self.inner.start, self.inner.end)
|
|
}
|
|
|
|
pub fn source_location(&self) -> std::ops::Range<SourceLocation> {
|
|
self.tokenizer
|
|
.source
|
|
.get_source_span(self.inner.start, self.inner.end)
|
|
}
|
|
}
|
|
|
|
impl<'a> Iterator for TokenIterator<'a> {
|
|
type Item = TokenItem<'a>;
|
|
|
|
fn next(&mut self) -> Option<Self::Item> {
|
|
if self.offset >= self.tokenizer.tokens.len() {
|
|
None
|
|
} else {
|
|
let index = self.offset;
|
|
self.offset += 1;
|
|
match self.tokenizer.tokens[index].token {
|
|
Token::SlashSlash
|
|
| Token::SlashSlashSlash
|
|
| Token::SlashStar
|
|
| Token::SlashStarStar
|
|
| Token::Comment
|
|
| Token::DocComment => self.next(),
|
|
_ => Some(Self::Item {
|
|
tokenizer: self.tokenizer,
|
|
inner: self.tokenizer.tokens[index],
|
|
}),
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
macro_rules! next_or_eof {
|
|
($expr:expr) => {
|
|
match $expr.next() {
|
|
Some(c) => c,
|
|
None => {
|
|
return Ok(Token::Eof);
|
|
}
|
|
}
|
|
};
|
|
(?$expr:expr) => {
|
|
match $expr.peek() {
|
|
Some(c) => c,
|
|
None => {
|
|
return Ok(Token::Eof);
|
|
}
|
|
}
|
|
};
|
|
}
|
|
|
|
macro_rules! residual {
|
|
(ok: $expr:expr) => {
|
|
match $expr {
|
|
Ok(t) => t,
|
|
Err(e) => {
|
|
return Err(e);
|
|
}
|
|
}
|
|
};
|
|
(none: $expr:expr) => {
|
|
match $expr {
|
|
Ok(Some(t)) => {
|
|
return Ok(Some(t));
|
|
}
|
|
Ok(val) => val,
|
|
Err(e) => {
|
|
return Err(e);
|
|
}
|
|
}
|
|
};
|
|
(flatten: none: $expr:expr) => {
|
|
match $expr {
|
|
Ok(Some(t)) => {
|
|
return Ok(t);
|
|
}
|
|
Ok(val) => val,
|
|
Err(e) => {
|
|
return Err(e);
|
|
}
|
|
}
|
|
};
|
|
(some: $expr:expr) => {
|
|
match $expr {
|
|
Ok(Some(t)) => t,
|
|
Ok(None) => {
|
|
return Ok(None);
|
|
}
|
|
Err(e) => {
|
|
return Err(e);
|
|
}
|
|
}
|
|
};
|
|
}
|
|
|
|
pub struct TokenizeError {
|
|
pub err: Error,
|
|
pub range: core::ops::Range<u32>,
|
|
}
|
|
|
|
impl<'a> Tokenizer<'a> {
|
|
pub fn iter(&self) -> TokenIterator {
|
|
TokenIterator {
|
|
tokenizer: self,
|
|
offset: 0,
|
|
}
|
|
}
|
|
|
|
pub fn src(&self) -> &Chars<'a> {
|
|
&self.source
|
|
}
|
|
|
|
pub fn new_with_errors(
|
|
bytes: &'a [u8],
|
|
) -> core::result::Result<Self, (Self, Vec<TokenizeError>)> {
|
|
let mut this = Self {
|
|
source: Chars { bytes, offset: 0 },
|
|
tokens: Vec::new(),
|
|
};
|
|
let mut errors = Vec::new();
|
|
|
|
loop {
|
|
if this.source.is_eof() {
|
|
break;
|
|
}
|
|
|
|
let start = this.source.position();
|
|
|
|
match this.next_token() {
|
|
Ok(_) => {}
|
|
Err(e) => {
|
|
// let is_quoted = this
|
|
// .source
|
|
// .get_range(start, this.source.bytes.len() as u32)
|
|
// .chars()
|
|
// .take_while_ref(|&c| crate::common::is_whitespace(c))
|
|
// .next()
|
|
// .map(|c| c == '\'' || c == '"')
|
|
// .unwrap_or(false);
|
|
let end = this.source.position();
|
|
|
|
if this.source.peek().map(|c| crate::common::is_whitespace(c)) != Some(true) {
|
|
this.source
|
|
.take_while_ref(|&c| !crate::common::is_whitespace(c))
|
|
.count();
|
|
}
|
|
|
|
_ = this.push_token(Token::ParseError, start, end);
|
|
errors.push(TokenizeError {
|
|
err: e,
|
|
range: start..end,
|
|
});
|
|
}
|
|
}
|
|
}
|
|
|
|
if errors.is_empty() {
|
|
Ok(this)
|
|
} else {
|
|
Err((this, errors))
|
|
}
|
|
}
|
|
|
|
pub fn new(bytes: &'a [u8]) -> Result<Tokenizer<'a>> {
|
|
let mut this = Self {
|
|
source: Chars { bytes, offset: 0 },
|
|
tokens: Vec::new(),
|
|
};
|
|
|
|
loop {
|
|
if this.source.is_eof() {
|
|
break;
|
|
}
|
|
|
|
this.next_token().map_err(|e| {
|
|
eprintln!("error while tokenizing: {e}");
|
|
eprintln!(
|
|
"at position {}: {}",
|
|
this.source.offset(),
|
|
&this.source.as_str()[..this.source.as_str().len().min(16)]
|
|
);
|
|
|
|
e
|
|
})?;
|
|
}
|
|
|
|
Ok(this)
|
|
}
|
|
|
|
fn push_token(&mut self, token: Token, start: u32, end: u32) -> Result<()> {
|
|
self.tokens.push(TokenPos::new(token, start, end));
|
|
|
|
Ok(())
|
|
}
|
|
|
|
fn next_token(&mut self) -> Result<()> {
|
|
self.source
|
|
.take_while_ref(|&c| crate::common::is_whitespace(c))
|
|
.count();
|
|
|
|
if self.source.is_eof() {
|
|
return Ok(());
|
|
}
|
|
|
|
let start = self.source.position();
|
|
|
|
let token = {
|
|
let mut peeking = self.source.clone();
|
|
match peeking.next() {
|
|
Some('0'..='9') => Some(parse_constant(&mut self.source)?),
|
|
Some('.') if peeking.next().map(|c| crate::common::is_digit(c)) == Some(true) => {
|
|
Some(parse_constant(&mut self.source)?)
|
|
}
|
|
_ => None,
|
|
}
|
|
};
|
|
|
|
if let Some(token) = token {
|
|
return self.push_token(token, start, self.source.position());
|
|
}
|
|
|
|
// lexical tokens
|
|
let token = crate::tokens::LexemeParser::parse(self.source.clone());
|
|
|
|
if let Some(token) = token {
|
|
_ = self.source.advance_by(token.lexeme_len());
|
|
|
|
match token {
|
|
Token::SlashSlash | Token::SlashSlashSlash => {
|
|
_ = self.push_token(token, start, self.source.position());
|
|
let start = self.source.position();
|
|
loop {
|
|
// advance until either EOF or newline
|
|
let Some(ch) = self.source.next() else {
|
|
break;
|
|
};
|
|
if ch == '\n' {
|
|
break;
|
|
}
|
|
}
|
|
let end = self.source.position() - 1;
|
|
return self.push_token(
|
|
if token == Token::SlashSlash {
|
|
Token::Comment
|
|
} else {
|
|
Token::DocComment
|
|
},
|
|
start,
|
|
end,
|
|
);
|
|
}
|
|
Token::SlashStar | Token::SlashStarStar => {
|
|
let start = self.source.position();
|
|
let mut end = self.source.position();
|
|
|
|
let mut last = self.source.next();
|
|
loop {
|
|
// break out of loop if EOF
|
|
let Some(l) = last.replace(match self.source.next() {
|
|
Some(ch) => ch,
|
|
None => {
|
|
break;
|
|
}
|
|
}) else {
|
|
break;
|
|
};
|
|
|
|
// break out of loop if end of comment
|
|
if (l, last.unwrap()) == ('*', '/') {
|
|
break;
|
|
}
|
|
end = self.source.position() - 1;
|
|
}
|
|
return self.push_token(
|
|
if token == Token::SlashStar {
|
|
Token::Comment
|
|
} else {
|
|
Token::DocComment
|
|
},
|
|
start,
|
|
end,
|
|
);
|
|
}
|
|
_ => {}
|
|
}
|
|
|
|
if token.maybe_ident() {
|
|
if self
|
|
.source
|
|
.take_while_ref(|&c| crate::common::is_id_continue(c))
|
|
.count()
|
|
.gt(&0)
|
|
{
|
|
return self.push_token(Token::Ident, start, self.source.position());
|
|
}
|
|
}
|
|
|
|
return self.push_token(token, start, self.source.position());
|
|
}
|
|
|
|
self.source
|
|
.next_if(|&c| crate::common::is_id_start(c))
|
|
.ok_or(Error::ExpectedIdStartForIdentifier)?;
|
|
self.source
|
|
.take_while_ref(|&c| crate::common::is_id_continue(c))
|
|
.count();
|
|
|
|
return self.push_token(Token::Ident, start, self.source.position());
|
|
}
|
|
}
|
|
|
|
/// IntegralType <-
|
|
/// ( 'u' | 'i' ) DIGITS+
|
|
fn try_parse_integral_type(source: &mut Chars) -> Result<Option<()>> {
|
|
if !source.next_if(|&c| c == 'u' || c == 'i').is_some() {
|
|
return Ok(None);
|
|
}
|
|
|
|
if source
|
|
.take_while_ref(|&c| crate::common::is_digit(c))
|
|
.count()
|
|
<= 0
|
|
{
|
|
return Err(Error::IntegralTypeExpectedDigit);
|
|
};
|
|
|
|
Ok(Some(()))
|
|
}
|
|
|
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
|
pub enum Radix {
|
|
Hex,
|
|
Bin,
|
|
Dec,
|
|
Oct,
|
|
}
|
|
|
|
impl Radix {
|
|
#[allow(unused)]
|
|
/// must be called with one of `['b','x','d','o']`
|
|
unsafe fn from_char_unchecked(c: char) -> Self {
|
|
match c.to_ascii_lowercase() {
|
|
'o' => Self::Oct,
|
|
'b' => Self::Bin,
|
|
'x' => Self::Hex,
|
|
'd' => Self::Dec,
|
|
_ => unreachable!(),
|
|
}
|
|
}
|
|
fn from_char(c: char) -> Option<Self> {
|
|
match c.to_ascii_lowercase() {
|
|
'o' => Some(Self::Oct),
|
|
'b' => Some(Self::Bin),
|
|
'x' => Some(Self::Hex),
|
|
'd' => Some(Self::Dec),
|
|
_ => None,
|
|
}
|
|
}
|
|
|
|
#[allow(unused)]
|
|
pub fn radix(self) -> u8 {
|
|
match self {
|
|
Radix::Hex => 16,
|
|
Radix::Bin => 2,
|
|
Radix::Oct => 8,
|
|
Radix::Dec => 10,
|
|
}
|
|
}
|
|
fn to_token(self) -> Token {
|
|
match self {
|
|
Radix::Hex => Token::IntegerHexConstant,
|
|
Radix::Bin => Token::IntegerBinConstant,
|
|
Radix::Oct => Token::IntegerOctConstant,
|
|
Radix::Dec => Token::IntegerConstant,
|
|
}
|
|
}
|
|
pub fn from_token(token: Token) -> Option<Self> {
|
|
match token {
|
|
Token::IntegerHexConstant => Some(Radix::Hex),
|
|
Token::IntegerBinConstant => Some(Radix::Bin),
|
|
Token::IntegerOctConstant => Some(Radix::Oct),
|
|
Token::IntegerConstant => Some(Radix::Dec),
|
|
_ => None,
|
|
}
|
|
}
|
|
pub fn map_digit(self, c: char) -> u8 {
|
|
match self {
|
|
Radix::Hex => match c {
|
|
'0'..='9' => c as u8 - b'0',
|
|
'a'..='f' => 10 + c as u8 - b'a',
|
|
'A'..='F' => 10 + c as u8 - b'A',
|
|
_ => unreachable!(),
|
|
},
|
|
Radix::Bin => match c {
|
|
'0'..='1' => c as u8 - b'0',
|
|
_ => unreachable!(),
|
|
},
|
|
Radix::Dec => match c {
|
|
'0'..='9' => c as u8 - b'0',
|
|
_ => unreachable!(),
|
|
},
|
|
Radix::Oct => match c {
|
|
'0'..='7' => c as u8 - b'0',
|
|
_ => unreachable!(),
|
|
},
|
|
}
|
|
}
|
|
pub fn folding_method(self) -> fn(u64, char) -> u64 {
|
|
match self {
|
|
Radix::Hex => {
|
|
fn fold(acc: u64, c: char) -> u64 {
|
|
let digit = match c {
|
|
'0'..='9' => c as u8 - b'0',
|
|
'a'..='f' => c as u8 - b'a',
|
|
'A'..='F' => c as u8 - b'A',
|
|
_ => unreachable!(),
|
|
};
|
|
acc * 16 + digit as u64
|
|
}
|
|
fold
|
|
}
|
|
Radix::Bin => {
|
|
fn fold(acc: u64, c: char) -> u64 {
|
|
let digit = match c {
|
|
'0'..='1' => c as u8 - b'0',
|
|
_ => unreachable!(),
|
|
};
|
|
acc * 2 + digit as u64
|
|
}
|
|
fold
|
|
}
|
|
Radix::Dec => {
|
|
fn fold(acc: u64, c: char) -> u64 {
|
|
let digit = match c {
|
|
'0'..='9' => c as u8 - b'0',
|
|
_ => unreachable!(),
|
|
};
|
|
acc * 10 + digit as u64
|
|
}
|
|
fold
|
|
}
|
|
Radix::Oct => {
|
|
fn fold(acc: u64, c: char) -> u64 {
|
|
let digit = match c {
|
|
'0'..='7' => c as u8 - b'0',
|
|
_ => unreachable!(),
|
|
};
|
|
acc * 8 + digit as u64
|
|
}
|
|
fold
|
|
}
|
|
}
|
|
}
|
|
pub fn is_digit(self) -> fn(char) -> bool {
|
|
match self {
|
|
Radix::Hex => crate::common::is_hex_digit,
|
|
Radix::Bin => crate::common::is_bin_digit,
|
|
Radix::Oct => crate::common::is_oct_digit,
|
|
Radix::Dec => crate::common::is_digit,
|
|
}
|
|
}
|
|
}
|
|
|
|
/// where DIGIT is defined by radix:
|
|
/// DIGITS <-
|
|
/// if allow_leading_underscore: `_`* DIGIT (DIGIT|`_`)*
|
|
/// else: DIGIT (DIGIT|`_`)*
|
|
fn parse_digit_part(
|
|
source: &mut Chars,
|
|
allow_leading_underscore: bool,
|
|
radix: Radix,
|
|
) -> Result<()> {
|
|
let is_digit = radix.is_digit();
|
|
|
|
if allow_leading_underscore {
|
|
let _underscore = source.take_while_ref(|&c| c == '_').count();
|
|
}
|
|
let _need_digit = source.next_if(|&c| is_digit(c)).ok_or_else(|| {
|
|
if source.peek() == Some('_') {
|
|
Error::NumericalConstantDigitLeadingUnderscore
|
|
} else {
|
|
Error::NumericalConstantDigitNoDigit
|
|
}
|
|
})?;
|
|
let _rest = source.take_while_ref(|&c| is_digit(c) || c == '_').count();
|
|
|
|
Ok(())
|
|
}
|
|
|
|
/// returns `Err(E)` if it failed to parse.
|
|
/// returns `Ok(None)` if no exp part was found.
|
|
/// returns `Ok(Some(()))` if an exp part was found and parsed.
|
|
///
|
|
/// EXP_PART <-
|
|
/// (`e`|`E`) (`-`|`+`)? DEC_DIGITS
|
|
fn try_parse_exp_part(source: &mut Chars) -> Result<Option<()>> {
|
|
if source.next_if(|&c| c.to_ascii_lowercase() == 'e').is_some() {
|
|
let _sign = source.next_if(|&c| c == '-' || c == '+');
|
|
if source
|
|
.take_while_ref(|&c| crate::common::is_digit(c))
|
|
.count()
|
|
.lt(&1)
|
|
{
|
|
// need digits following exp notation
|
|
Err(Error::FloatingConstantExpPartNoDigit)
|
|
} else {
|
|
Ok(Some(()))
|
|
}
|
|
} else {
|
|
Ok(None)
|
|
}
|
|
}
|
|
|
|
/// CONSTANT <-
|
|
/// DEC_DIGITS IntegralType?
|
|
/// `0x` HEX_DIGITS IntegralType?
|
|
/// `0b` BIN_DIGITS IntegralType?
|
|
/// `0o` OCT_DIGITS IntegralType?
|
|
/// DEC_DIGITS FloatingType?
|
|
/// `.` DEC_DIGITS EXP_PART? FloatingType?
|
|
/// DEC_DIGITS `.` DEC_DIGITS? EXP_PART? FloatingType?
|
|
fn parse_constant_inner(source: &mut Chars) -> Result<Token> {
|
|
let zero = source.next_if(|&c| c == '0').is_some();
|
|
let radix = zero
|
|
.then(|| source.next_if_map(|c| Radix::from_char(c)))
|
|
.flatten();
|
|
|
|
if let Some(radix) = radix {
|
|
parse_digit_part(source, false, radix)?;
|
|
if source.peek().map(|c| c == 'u' || c == 'i') == Some(true) {
|
|
try_parse_integral_type(source)?;
|
|
}
|
|
return Ok(radix.to_token());
|
|
}
|
|
|
|
// if zero: `_`* DIGIT (DIGIT|`_`)*
|
|
// else: DIGIT (DIGIT|`_`)*
|
|
_ = match parse_digit_part(source, zero, Radix::Dec) {
|
|
Ok(_) => Ok(()),
|
|
Err(Error::NumericalConstantDigitNoDigit) if zero => Ok(()),
|
|
Err(e) => Err(e),
|
|
}?;
|
|
|
|
if let Some(_) = source.try_parse_result(|source| try_parse_integral_type(source))? {
|
|
return Ok(Token::IntegerConstant);
|
|
}
|
|
|
|
let dot = source.next_if(|&c| c == '.').is_some();
|
|
|
|
if dot {
|
|
parse_digit_part(source, false, Radix::Dec)?;
|
|
}
|
|
|
|
// parse exp notation
|
|
let exp = try_parse_exp_part(source)?.is_some();
|
|
|
|
// trailing FloatingType?
|
|
let floating = if source.next_if(|&c| c == 'f').is_some() {
|
|
let digits = source.next_tuple::<(char, char)>();
|
|
if !(digits == Some(('6', '4')) || digits == Some(('3', '2'))) {
|
|
// need either f64 or f32 here!
|
|
return Err(Error::FloatingConstantInvalidTrailingType);
|
|
}
|
|
true
|
|
} else {
|
|
false
|
|
};
|
|
|
|
let token = match (dot, exp, floating) {
|
|
(false, false, false) => Token::IntegerConstant,
|
|
(true, false, _) => Token::DotFloatingConstant,
|
|
(true, true, _) => Token::DotFloatingExpConstant,
|
|
(false, true, _) => Token::FloatingExpConstant,
|
|
(false, _, _) => Token::FloatingConstant,
|
|
};
|
|
|
|
Ok(token)
|
|
}
|
|
|
|
/// CONSTANT <-
|
|
/// DEC_DIGITS IntegralType?
|
|
/// `0x` HEX_DIGITS IntegralType?
|
|
/// `0b` BIN_DIGITS IntegralType?
|
|
/// `0o` OCT_DIGITS IntegralType?
|
|
/// DEC_DIGITS FloatingType?
|
|
/// `.` DEC_DIGITS EXP_PART? FloatingType?
|
|
/// DEC_DIGITS `.` DEC_DIGITS? EXP_PART? FloatingType?
|
|
fn parse_constant(source: &mut Chars) -> Result<Token> {
|
|
let constant = parse_constant_inner(source)?;
|
|
// char following a constant must not be id_continue
|
|
source
|
|
.peek()
|
|
.filter(|&c| !crate::common::is_id_continue(c))
|
|
.ok_or(Error::NumericalConstantUnknownSuffix)?;
|
|
|
|
Ok(constant)
|
|
}
|