compiler wranings

2025-09-26 14:58:39 +02:00 · 2025-09-26 14:58:39 +02:00 · 122f8ff7f1
parent 2e6b8b0cc3
commit 122f8ff7f1
2 changed files with 16 additions and 183 deletions
--- a/crates/lexer/src/complex_tokens.rs
+++ b/crates/lexer/src/complex_tokens.rs
@ -18,8 +18,6 @@ pub enum Error {
    FloatingConstantInvalidTrailingType,
    #[error("Invalid token.")]
    InvalidToken,
    #[error("Identifier starts with invalid character.")]
    ExpectedIdStartForIdentifier,
    #[error("Unknown suffix in constant.")]
    NumericalConstantUnknownSuffix,
 }
@ -73,6 +71,8 @@ impl Radix {
            Radix::Dec => Token::IntegerConstant,
        }
    }
    #[expect(dead_code)]
    pub fn from_token(token: Token) -> Option<Self> {
        match token {
            Token::IntegerHexConstant => Some(Radix::Hex),
@ -82,6 +82,8 @@ impl Radix {
            _ => None,
        }
    }
    #[expect(dead_code)]
    pub fn map_digit(self, c: char) -> u8 {
        match self {
            Radix::Hex => match c {
@ -104,6 +106,8 @@ impl Radix {
            },
        }
    }
    #[expect(dead_code)]
    pub fn folding_method(self) -> fn(u64, char) -> u64 {
        match self {
            Radix::Hex => {
@ -152,10 +156,10 @@ impl Radix {
    }
    pub fn is_digit(self) -> fn(char) -> bool {
        match self {
-            Radix::Hex => crate::is_things::is_hex_digit,
+            Radix::Hex => is_things::is_hex_digit,
-            Radix::Bin => crate::is_things::is_bin_digit,
+            Radix::Bin => is_things::is_bin_digit,
-            Radix::Oct => crate::is_things::is_oct_digit,
+            Radix::Oct => is_things::is_oct_digit,
-            Radix::Dec => crate::is_things::is_digit,
+            Radix::Dec => is_things::is_digit,
        }
    }
 }
@ -193,11 +197,7 @@ fn try_parse_integral_type(source: &mut Source) -> Result<Option<()>> {
        return Ok(None);
    }
-    if source
+    if source.take_while_ref(|&c| is_things::is_digit(c)).count() <= 0 {
        .take_while_ref(|&c| crate::is_things::is_digit(c))
        .count()
        <= 0
    {
        return Err(Error::IntegralTypeExpectedDigit);
    };
@ -214,7 +214,7 @@ fn try_parse_exp_part(source: &mut Source) -> Result<Option<()>> {
    if source.next_if(|&c| c.to_ascii_lowercase() == 'e').is_some() {
        let _sign = source.next_if(|&c| c == '-' || c == '+');
        if source
-            .take_while_ref(|&c| crate::is_things::is_digit(c))
+            .take_while_ref(|&c| is_things::is_digit(c))
            .count()
            .lt(&1)
        {
@ -300,7 +300,7 @@ pub(crate) fn parse_constant(source: &mut Source) -> Result<Token> {
    // char following a constant must not be id_continue
    if source
        .peek()
-        .map(|&c| crate::is_things::is_id_continue(c))
+        .map(|&c| is_things::is_id_continue(c))
        .unwrap_or(false)
    {
        return Err(Error::NumericalConstantUnknownSuffix);
--- a/crates/lexer/src/lib.rs
+++ b/crates/lexer/src/lib.rs
@ -58,12 +58,10 @@ mod is_things {
        }
    }
    #[expect(dead_code)]
    pub fn is_digit(ch: char) -> bool {
        ('0'..='9').contains(&ch)
    }
    #[expect(dead_code)]
    pub fn is_bin_digit(ch: char) -> bool {
        ch == '0' || ch == '1'
    }
@ -73,12 +71,10 @@ mod is_things {
        ('1'..='9').contains(&ch)
    }
    #[expect(dead_code)]
    pub fn is_oct_digit(ch: char) -> bool {
        ('0'..='7').contains(&ch)
    }
    #[expect(dead_code)]
    pub fn is_hex_digit(ch: char) -> bool {
        ('0'..='9').contains(&ch) || ('a'..='f').contains(&ch) || ('A'..='F').contains(&ch)
    }
@ -301,170 +297,6 @@ impl Token {
    }
 }
 /// A list of lexemes used by the `LexemeParser`.
 /// `lexemes` contains every token that has a defined lexeme, such as `fn`, `f32`, `const`, etc.
 /// The `LexemeList` keeps track of two offsets into the `lexemes` array,
 /// splitting it into three windows:
 /// - [0, start_candidates) - tokens that are still being considered for parsing
 /// - [start_candidates, end_candidates) - the tokens which this lexeme matches
 /// - [end_candidates, len) - tokens that have been filtered out and are no longer considered
 /// On each iteration of the parsing loop, the remaining tokens are matched
 /// against the next character and, if they match completely, are swapped into
 /// the candidates window, or swapped to the end if they don't.
 struct LexemeList {
    lexemes: Box<[Token]>,
    start_candidates: usize,
    end_candidates: usize,
    filtered: Vec<(usize, FilterResult)>,
 }
 enum FilterResult {
    Remove,
    Candidate,
 }
 impl LexemeList {
    fn new() -> Self {
        let lexemes = Token::lexemes()
            .iter()
            .map(|(tok, _)| tok.clone())
            .collect::<Box<_>>();
        Self {
            start_candidates: lexemes.len(),
            end_candidates: lexemes.len(),
            lexemes,
            filtered: Vec::new(),
        }
    }
    fn clear(&mut self) {
        self.start_candidates = self.lexemes.len();
        self.end_candidates = self.lexemes.len();
    }
    fn remaining(&self) -> &[Token] {
        &self.lexemes[0..self.start_candidates]
    }
    fn candidates(&self) -> &[Token] {
        &self.lexemes[self.start_candidates..self.end_candidates]
    }
    fn step(&mut self, ch: char, pos: usize) {
        // smartly reuse allocation for `filtered`
        // truly one of the premature optimizations.
        // but it just feels good, innit?
        let mut filtered = core::mem::take(&mut self.filtered);
        self.remaining()
            .iter()
            .enumerate()
            .filter_map(|(i, tok)| {
                let bytes = tok.lexeme().unwrap().as_bytes();
                // SAFETY: all tokens in `self.remaining()` are lexical tokens, and
                // they are all valid ascii
                let c = unsafe {
                    // TODO: maybe keep a list of `Char<'_>`s around in order to
                    // support fully utf8 tokens?
                    char::from_u32_unchecked(bytes[pos] as u32)
                };
                match c == ch {
                    false => Some((i, FilterResult::Remove)),
                    true if bytes.len() <= pos + 1 => Some((i, FilterResult::Candidate)),
                    true => None,
                }
            })
            .collect_into(&mut filtered);
        // iterate in reverse order so that we can safely swap elements
        // drain here so that we can possibly reuse the `filtered` Vec allcoation
        filtered.drain(..).rev().for_each(|(i, f)| {
            match f {
                // for candidates, swap the candidate with the last remaining
                // token, then dec `start_candidates`
                FilterResult::Candidate => {
                    // SAFETY: we know that `i` and `self.start_candidates - 1`
                    // are both valid indices: `self.start_candidates` starts at
                    // the end and each time it is decremented, one more element
                    // is removed from the front, so that as long as an element
                    // is remaining, `self.start_candidates` is always greater
                    // than 0.
                    // the order of the remaining elements is not meaningfully
                    // impacted because we only ever swap with elements after
                    // `i`, and `i` is the greatest index we will touch.
                    unsafe {
                        self.lexemes.swap_unchecked(i, self.start_candidates - 1);
                        self.start_candidates = self.start_candidates.saturating_sub(1);
                    }
                }
                // for removes, swap the last candidate with the last remainign
                // token, then swap the remove with the last candidate, then dec
                // `end_candidates` and `start_candidates`
                FilterResult::Remove => {
                    unsafe {
                        // in the case that `start_candidates` ==
                        // `end_candidates`, no swap happens and that's fine.
                        // remove this: v
                        //           [a,b,c][d,e,f][g,h,i]
                        // swap these:    ^      ^
                        //           [a,b,f][d,e,c][g,h,i]
                        // swap these:  ^        ^
                        //           [a,c,f][d,e,b][g,h,i]
                        // decrement both counters:
                        //           [a,c][f,d,e][b,g,h,i]
                        self.lexemes
                            .swap_unchecked(self.start_candidates - 1, self.end_candidates - 1);
                        self.lexemes.swap_unchecked(i, self.end_candidates - 1);
                        self.start_candidates = self.start_candidates.saturating_sub(1);
                        self.end_candidates = self.end_candidates.saturating_sub(1);
                    }
                }
            }
        });
        // replace `filtered`
        self.filtered = filtered;
    }
 }
 /// Helper type for parsing tokens that have a defined lexeme, such as `fn`,
 /// `f32`, `const`, etc. Tokens with variable lexemes, such as primitive
 /// integral types, constants or identifiers are not parsed by this.
 pub struct LexemeParser {
    lexemes: LexemeList,
    len: usize,
 }
 impl LexemeParser {
    pub fn new() -> Self {
        Self {
            lexemes: LexemeList::new(),
            len: 0,
        }
    }
    pub fn parse(&mut self, mut tokens: impl Iterator<Item = char>) -> Option<Token> {
        self.lexemes.clear();
        loop {
            let Some(ch) = tokens.next() else {
                break;
            };
            if crate::is_things::is_whitespace(ch) {
                break;
            }
            self.lexemes.step(ch, self.len);
            if self.lexemes.remaining().is_empty() {
                break;
            }
        }
        self.lexemes.candidates().last().copied()
    }
 }
 use itertools::Itertools;
 use trie::Tree;
 #[derive(Debug, Clone, Copy)]
@ -607,14 +439,15 @@ impl<'a> Iterator for TokenIterator<'a> {
                Some((token, &self.source[start..self.offset]))
            }
            // `//`-style comments or doc-comments
            _ => match self.parse().map(|tok| match tok {
                Token::SlashSlash => {
                    self.skip_while(|c| c == '\n');
-                    (Token::Comment)
+                    Token::Comment
                }
                Token::SlashSlashSlash => {
                    self.skip_while(|c| c == '\n');
-                    (Token::DocComment)
+                    Token::DocComment
                }
                _ => tok,
            }) {