compiler wranings
This commit is contained in:
parent
2e6b8b0cc3
commit
122f8ff7f1
|
@ -18,8 +18,6 @@ pub enum Error {
|
||||||
FloatingConstantInvalidTrailingType,
|
FloatingConstantInvalidTrailingType,
|
||||||
#[error("Invalid token.")]
|
#[error("Invalid token.")]
|
||||||
InvalidToken,
|
InvalidToken,
|
||||||
#[error("Identifier starts with invalid character.")]
|
|
||||||
ExpectedIdStartForIdentifier,
|
|
||||||
#[error("Unknown suffix in constant.")]
|
#[error("Unknown suffix in constant.")]
|
||||||
NumericalConstantUnknownSuffix,
|
NumericalConstantUnknownSuffix,
|
||||||
}
|
}
|
||||||
|
@ -73,6 +71,8 @@ impl Radix {
|
||||||
Radix::Dec => Token::IntegerConstant,
|
Radix::Dec => Token::IntegerConstant,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[expect(dead_code)]
|
||||||
pub fn from_token(token: Token) -> Option<Self> {
|
pub fn from_token(token: Token) -> Option<Self> {
|
||||||
match token {
|
match token {
|
||||||
Token::IntegerHexConstant => Some(Radix::Hex),
|
Token::IntegerHexConstant => Some(Radix::Hex),
|
||||||
|
@ -82,6 +82,8 @@ impl Radix {
|
||||||
_ => None,
|
_ => None,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[expect(dead_code)]
|
||||||
pub fn map_digit(self, c: char) -> u8 {
|
pub fn map_digit(self, c: char) -> u8 {
|
||||||
match self {
|
match self {
|
||||||
Radix::Hex => match c {
|
Radix::Hex => match c {
|
||||||
|
@ -104,6 +106,8 @@ impl Radix {
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[expect(dead_code)]
|
||||||
pub fn folding_method(self) -> fn(u64, char) -> u64 {
|
pub fn folding_method(self) -> fn(u64, char) -> u64 {
|
||||||
match self {
|
match self {
|
||||||
Radix::Hex => {
|
Radix::Hex => {
|
||||||
|
@ -152,10 +156,10 @@ impl Radix {
|
||||||
}
|
}
|
||||||
pub fn is_digit(self) -> fn(char) -> bool {
|
pub fn is_digit(self) -> fn(char) -> bool {
|
||||||
match self {
|
match self {
|
||||||
Radix::Hex => crate::is_things::is_hex_digit,
|
Radix::Hex => is_things::is_hex_digit,
|
||||||
Radix::Bin => crate::is_things::is_bin_digit,
|
Radix::Bin => is_things::is_bin_digit,
|
||||||
Radix::Oct => crate::is_things::is_oct_digit,
|
Radix::Oct => is_things::is_oct_digit,
|
||||||
Radix::Dec => crate::is_things::is_digit,
|
Radix::Dec => is_things::is_digit,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -193,11 +197,7 @@ fn try_parse_integral_type(source: &mut Source) -> Result<Option<()>> {
|
||||||
return Ok(None);
|
return Ok(None);
|
||||||
}
|
}
|
||||||
|
|
||||||
if source
|
if source.take_while_ref(|&c| is_things::is_digit(c)).count() <= 0 {
|
||||||
.take_while_ref(|&c| crate::is_things::is_digit(c))
|
|
||||||
.count()
|
|
||||||
<= 0
|
|
||||||
{
|
|
||||||
return Err(Error::IntegralTypeExpectedDigit);
|
return Err(Error::IntegralTypeExpectedDigit);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -214,7 +214,7 @@ fn try_parse_exp_part(source: &mut Source) -> Result<Option<()>> {
|
||||||
if source.next_if(|&c| c.to_ascii_lowercase() == 'e').is_some() {
|
if source.next_if(|&c| c.to_ascii_lowercase() == 'e').is_some() {
|
||||||
let _sign = source.next_if(|&c| c == '-' || c == '+');
|
let _sign = source.next_if(|&c| c == '-' || c == '+');
|
||||||
if source
|
if source
|
||||||
.take_while_ref(|&c| crate::is_things::is_digit(c))
|
.take_while_ref(|&c| is_things::is_digit(c))
|
||||||
.count()
|
.count()
|
||||||
.lt(&1)
|
.lt(&1)
|
||||||
{
|
{
|
||||||
|
@ -300,7 +300,7 @@ pub(crate) fn parse_constant(source: &mut Source) -> Result<Token> {
|
||||||
// char following a constant must not be id_continue
|
// char following a constant must not be id_continue
|
||||||
if source
|
if source
|
||||||
.peek()
|
.peek()
|
||||||
.map(|&c| crate::is_things::is_id_continue(c))
|
.map(|&c| is_things::is_id_continue(c))
|
||||||
.unwrap_or(false)
|
.unwrap_or(false)
|
||||||
{
|
{
|
||||||
return Err(Error::NumericalConstantUnknownSuffix);
|
return Err(Error::NumericalConstantUnknownSuffix);
|
||||||
|
|
|
@ -58,12 +58,10 @@ mod is_things {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[expect(dead_code)]
|
|
||||||
pub fn is_digit(ch: char) -> bool {
|
pub fn is_digit(ch: char) -> bool {
|
||||||
('0'..='9').contains(&ch)
|
('0'..='9').contains(&ch)
|
||||||
}
|
}
|
||||||
|
|
||||||
#[expect(dead_code)]
|
|
||||||
pub fn is_bin_digit(ch: char) -> bool {
|
pub fn is_bin_digit(ch: char) -> bool {
|
||||||
ch == '0' || ch == '1'
|
ch == '0' || ch == '1'
|
||||||
}
|
}
|
||||||
|
@ -73,12 +71,10 @@ mod is_things {
|
||||||
('1'..='9').contains(&ch)
|
('1'..='9').contains(&ch)
|
||||||
}
|
}
|
||||||
|
|
||||||
#[expect(dead_code)]
|
|
||||||
pub fn is_oct_digit(ch: char) -> bool {
|
pub fn is_oct_digit(ch: char) -> bool {
|
||||||
('0'..='7').contains(&ch)
|
('0'..='7').contains(&ch)
|
||||||
}
|
}
|
||||||
|
|
||||||
#[expect(dead_code)]
|
|
||||||
pub fn is_hex_digit(ch: char) -> bool {
|
pub fn is_hex_digit(ch: char) -> bool {
|
||||||
('0'..='9').contains(&ch) || ('a'..='f').contains(&ch) || ('A'..='F').contains(&ch)
|
('0'..='9').contains(&ch) || ('a'..='f').contains(&ch) || ('A'..='F').contains(&ch)
|
||||||
}
|
}
|
||||||
|
@ -301,170 +297,6 @@ impl Token {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// A list of lexemes used by the `LexemeParser`.
|
|
||||||
/// `lexemes` contains every token that has a defined lexeme, such as `fn`, `f32`, `const`, etc.
|
|
||||||
/// The `LexemeList` keeps track of two offsets into the `lexemes` array,
|
|
||||||
/// splitting it into three windows:
|
|
||||||
/// - [0, start_candidates) - tokens that are still being considered for parsing
|
|
||||||
/// - [start_candidates, end_candidates) - the tokens which this lexeme matches
|
|
||||||
/// - [end_candidates, len) - tokens that have been filtered out and are no longer considered
|
|
||||||
/// On each iteration of the parsing loop, the remaining tokens are matched
|
|
||||||
/// against the next character and, if they match completely, are swapped into
|
|
||||||
/// the candidates window, or swapped to the end if they don't.
|
|
||||||
struct LexemeList {
|
|
||||||
lexemes: Box<[Token]>,
|
|
||||||
start_candidates: usize,
|
|
||||||
end_candidates: usize,
|
|
||||||
filtered: Vec<(usize, FilterResult)>,
|
|
||||||
}
|
|
||||||
|
|
||||||
enum FilterResult {
|
|
||||||
Remove,
|
|
||||||
Candidate,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl LexemeList {
|
|
||||||
fn new() -> Self {
|
|
||||||
let lexemes = Token::lexemes()
|
|
||||||
.iter()
|
|
||||||
.map(|(tok, _)| tok.clone())
|
|
||||||
.collect::<Box<_>>();
|
|
||||||
|
|
||||||
Self {
|
|
||||||
start_candidates: lexemes.len(),
|
|
||||||
end_candidates: lexemes.len(),
|
|
||||||
lexemes,
|
|
||||||
filtered: Vec::new(),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn clear(&mut self) {
|
|
||||||
self.start_candidates = self.lexemes.len();
|
|
||||||
self.end_candidates = self.lexemes.len();
|
|
||||||
}
|
|
||||||
|
|
||||||
fn remaining(&self) -> &[Token] {
|
|
||||||
&self.lexemes[0..self.start_candidates]
|
|
||||||
}
|
|
||||||
|
|
||||||
fn candidates(&self) -> &[Token] {
|
|
||||||
&self.lexemes[self.start_candidates..self.end_candidates]
|
|
||||||
}
|
|
||||||
|
|
||||||
fn step(&mut self, ch: char, pos: usize) {
|
|
||||||
// smartly reuse allocation for `filtered`
|
|
||||||
// truly one of the premature optimizations.
|
|
||||||
// but it just feels good, innit?
|
|
||||||
let mut filtered = core::mem::take(&mut self.filtered);
|
|
||||||
|
|
||||||
self.remaining()
|
|
||||||
.iter()
|
|
||||||
.enumerate()
|
|
||||||
.filter_map(|(i, tok)| {
|
|
||||||
let bytes = tok.lexeme().unwrap().as_bytes();
|
|
||||||
// SAFETY: all tokens in `self.remaining()` are lexical tokens, and
|
|
||||||
// they are all valid ascii
|
|
||||||
let c = unsafe {
|
|
||||||
// TODO: maybe keep a list of `Char<'_>`s around in order to
|
|
||||||
// support fully utf8 tokens?
|
|
||||||
char::from_u32_unchecked(bytes[pos] as u32)
|
|
||||||
};
|
|
||||||
match c == ch {
|
|
||||||
false => Some((i, FilterResult::Remove)),
|
|
||||||
true if bytes.len() <= pos + 1 => Some((i, FilterResult::Candidate)),
|
|
||||||
true => None,
|
|
||||||
}
|
|
||||||
})
|
|
||||||
.collect_into(&mut filtered);
|
|
||||||
|
|
||||||
// iterate in reverse order so that we can safely swap elements
|
|
||||||
// drain here so that we can possibly reuse the `filtered` Vec allcoation
|
|
||||||
filtered.drain(..).rev().for_each(|(i, f)| {
|
|
||||||
match f {
|
|
||||||
// for candidates, swap the candidate with the last remaining
|
|
||||||
// token, then dec `start_candidates`
|
|
||||||
FilterResult::Candidate => {
|
|
||||||
// SAFETY: we know that `i` and `self.start_candidates - 1`
|
|
||||||
// are both valid indices: `self.start_candidates` starts at
|
|
||||||
// the end and each time it is decremented, one more element
|
|
||||||
// is removed from the front, so that as long as an element
|
|
||||||
// is remaining, `self.start_candidates` is always greater
|
|
||||||
// than 0.
|
|
||||||
// the order of the remaining elements is not meaningfully
|
|
||||||
// impacted because we only ever swap with elements after
|
|
||||||
// `i`, and `i` is the greatest index we will touch.
|
|
||||||
unsafe {
|
|
||||||
self.lexemes.swap_unchecked(i, self.start_candidates - 1);
|
|
||||||
self.start_candidates = self.start_candidates.saturating_sub(1);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// for removes, swap the last candidate with the last remainign
|
|
||||||
// token, then swap the remove with the last candidate, then dec
|
|
||||||
// `end_candidates` and `start_candidates`
|
|
||||||
FilterResult::Remove => {
|
|
||||||
unsafe {
|
|
||||||
// in the case that `start_candidates` ==
|
|
||||||
// `end_candidates`, no swap happens and that's fine.
|
|
||||||
// remove this: v
|
|
||||||
// [a,b,c][d,e,f][g,h,i]
|
|
||||||
// swap these: ^ ^
|
|
||||||
// [a,b,f][d,e,c][g,h,i]
|
|
||||||
// swap these: ^ ^
|
|
||||||
// [a,c,f][d,e,b][g,h,i]
|
|
||||||
// decrement both counters:
|
|
||||||
// [a,c][f,d,e][b,g,h,i]
|
|
||||||
self.lexemes
|
|
||||||
.swap_unchecked(self.start_candidates - 1, self.end_candidates - 1);
|
|
||||||
self.lexemes.swap_unchecked(i, self.end_candidates - 1);
|
|
||||||
self.start_candidates = self.start_candidates.saturating_sub(1);
|
|
||||||
self.end_candidates = self.end_candidates.saturating_sub(1);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
// replace `filtered`
|
|
||||||
self.filtered = filtered;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Helper type for parsing tokens that have a defined lexeme, such as `fn`,
|
|
||||||
/// `f32`, `const`, etc. Tokens with variable lexemes, such as primitive
|
|
||||||
/// integral types, constants or identifiers are not parsed by this.
|
|
||||||
pub struct LexemeParser {
|
|
||||||
lexemes: LexemeList,
|
|
||||||
len: usize,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl LexemeParser {
|
|
||||||
pub fn new() -> Self {
|
|
||||||
Self {
|
|
||||||
lexemes: LexemeList::new(),
|
|
||||||
len: 0,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn parse(&mut self, mut tokens: impl Iterator<Item = char>) -> Option<Token> {
|
|
||||||
self.lexemes.clear();
|
|
||||||
loop {
|
|
||||||
let Some(ch) = tokens.next() else {
|
|
||||||
break;
|
|
||||||
};
|
|
||||||
|
|
||||||
if crate::is_things::is_whitespace(ch) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
self.lexemes.step(ch, self.len);
|
|
||||||
if self.lexemes.remaining().is_empty() {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
self.lexemes.candidates().last().copied()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
use itertools::Itertools;
|
|
||||||
use trie::Tree;
|
use trie::Tree;
|
||||||
|
|
||||||
#[derive(Debug, Clone, Copy)]
|
#[derive(Debug, Clone, Copy)]
|
||||||
|
@ -607,14 +439,15 @@ impl<'a> Iterator for TokenIterator<'a> {
|
||||||
|
|
||||||
Some((token, &self.source[start..self.offset]))
|
Some((token, &self.source[start..self.offset]))
|
||||||
}
|
}
|
||||||
|
// `//`-style comments or doc-comments
|
||||||
_ => match self.parse().map(|tok| match tok {
|
_ => match self.parse().map(|tok| match tok {
|
||||||
Token::SlashSlash => {
|
Token::SlashSlash => {
|
||||||
self.skip_while(|c| c == '\n');
|
self.skip_while(|c| c == '\n');
|
||||||
(Token::Comment)
|
Token::Comment
|
||||||
}
|
}
|
||||||
Token::SlashSlashSlash => {
|
Token::SlashSlashSlash => {
|
||||||
self.skip_while(|c| c == '\n');
|
self.skip_while(|c| c == '\n');
|
||||||
(Token::DocComment)
|
Token::DocComment
|
||||||
}
|
}
|
||||||
_ => tok,
|
_ => tok,
|
||||||
}) {
|
}) {
|
||||||
|
|
Loading…
Reference in a new issue