from-scratch/lang/tests/tokens.rs

#[unsafe(no_mangle)]
extern "C" fn panic() -> ! {
    panic!("Called panic from external code.");
}

struct Lexeme(u8);

impl Lexeme {
    fn lex(&self) -> &'static str {
        // SAFETY: lens contains the correct length for each lexeme, and lexemes
        // contains pointers to valid 'static UTF-8 data.
        unsafe {
            core::str::from_utf8_unchecked(
                core::slice::from_raw_parts(
                    (&raw const LEXEMES).add((self.0) as usize).read(),
                    (&raw const LEXEME_LENS).add((self.0) as usize).read(),
                )
            )
        }
    }
}

trait AsLexeme {
    fn as_lexeme(self) -> Option<Lexeme>;
}

impl AsLexeme for u8 {
    fn as_lexeme(self) -> Option<Lexeme> {
        match self {
            1..=10 => Some(Lexeme(self)),
            _ => None,
        }
    }
}

#[allow(dead_code)]
unsafe extern "C" {
    unsafe fn tokeniser_init(path: *const i8) -> ();
    unsafe fn tokeniser_print() -> ();
    unsafe fn is_ident(len: usize) -> bool;
    unsafe fn is_number(len: usize) -> bool;
    unsafe fn skip_whitespace() -> ();
    unsafe fn find_lexeme() -> u8;

    static mut LEXEMES: *const u8;
    static mut LEXEME_LENS: usize;
    static mut NUM_LEXEMES: usize;
    static mut TOKENS: u8;

    static mut input_file: u32;
    static mut buffer: *mut u8;
    static mut cursor: usize;
    static mut buffer_len: usize;

    unsafe fn exit(code: i32) -> !;
}

// fn lexemes_raw() -> &'static [*const u8] {
//     unsafe {
//         core::slice::from_raw_parts(
//             (&raw const LEXEMES),
//             (&raw const NUM_LEXEMES).read(),
//         )
//     }
// }

// fn lexeme_lens() -> &'static [usize] {
//     unsafe {
//         core::slice::from_raw_parts(
//             (&raw const LEXEME_LENS),
//             (&raw const NUM_LEXEMES).read(),
//         )
//     }
// }

// fn lexeme_iter() -> impl Iterator<Item = &'static str> {
//     lexemes_raw().iter().zip(lexeme_lens().iter()).map(|(&ptr, &len)| {
//         // SAFETY: lexemes_raw and lexeme_lens are guaranteed to contain valid
//         // UTF-8 data and correct lengths.
//         unsafe {
//             core::str::from_utf8_unchecked(core::slice::from_raw_parts(ptr, len))
//         }
//     })
// }

fn main() {
    let path = c"tests/tokens.l";

    unsafe {
        assert_eq!((&raw const input_file).read(), 0);
        assert_eq!((&raw const buffer_len).read(), 0);
        assert_eq!((&raw const cursor).read(), 0);
        assert_eq!((&raw const buffer).read(), core::ptr::null_mut());
        eprint!("Initializing tokeniser.. ");
        tokeniser_init(path.as_ptr());
        eprintln!("ok.");
        eprintln!("{}: {:?}[{}..{}]", (&raw const input_file).read(), (&raw const buffer).read(), (&raw const cursor).read(), (&raw const buffer_len).read());
        tokeniser_print();

        while let Some(lexeme) = find_lexeme().as_lexeme() {
            eprintln!("Found lexeme: {}", lexeme.lex());
        }
        eprintln!("Finished tokenising.");
    }
}