from-scratch/lang/tests/tokens.rs
2025-10-28 12:44:08 +01:00

106 lines
3 KiB
Rust

#[unsafe(no_mangle)]
extern "C" fn panic() -> ! {
panic!("Called panic from external code.");
}
struct Lexeme(u8);
impl Lexeme {
fn lex(&self) -> &'static str {
// SAFETY: lens contains the correct length for each lexeme, and lexemes
// contains pointers to valid 'static UTF-8 data.
unsafe {
core::str::from_utf8_unchecked(
core::slice::from_raw_parts(
(&raw const LEXEMES).add((self.0) as usize).read(),
(&raw const LEXEME_LENS).add((self.0) as usize).read(),
)
)
}
}
}
trait AsLexeme {
fn as_lexeme(self) -> Option<Lexeme>;
}
impl AsLexeme for u8 {
fn as_lexeme(self) -> Option<Lexeme> {
match self {
1..=10 => Some(Lexeme(self)),
_ => None,
}
}
}
#[allow(dead_code)]
unsafe extern "C" {
unsafe fn tokeniser_init(path: *const i8) -> ();
unsafe fn tokeniser_print() -> ();
unsafe fn is_ident(len: usize) -> bool;
unsafe fn is_number(len: usize) -> bool;
unsafe fn skip_whitespace() -> ();
unsafe fn find_lexeme() -> u8;
static mut LEXEMES: *const u8;
static mut LEXEME_LENS: usize;
static mut NUM_LEXEMES: usize;
static mut TOKENS: u8;
static mut input_file: u32;
static mut buffer: *mut u8;
static mut cursor: usize;
static mut buffer_len: usize;
unsafe fn exit(code: i32) -> !;
}
// fn lexemes_raw() -> &'static [*const u8] {
// unsafe {
// core::slice::from_raw_parts(
// (&raw const LEXEMES),
// (&raw const NUM_LEXEMES).read(),
// )
// }
// }
// fn lexeme_lens() -> &'static [usize] {
// unsafe {
// core::slice::from_raw_parts(
// (&raw const LEXEME_LENS),
// (&raw const NUM_LEXEMES).read(),
// )
// }
// }
// fn lexeme_iter() -> impl Iterator<Item = &'static str> {
// lexemes_raw().iter().zip(lexeme_lens().iter()).map(|(&ptr, &len)| {
// // SAFETY: lexemes_raw and lexeme_lens are guaranteed to contain valid
// // UTF-8 data and correct lengths.
// unsafe {
// core::str::from_utf8_unchecked(core::slice::from_raw_parts(ptr, len))
// }
// })
// }
fn main() {
let path = c"tests/tokens.l";
unsafe {
assert_eq!((&raw const input_file).read(), 0);
assert_eq!((&raw const buffer_len).read(), 0);
assert_eq!((&raw const cursor).read(), 0);
assert_eq!((&raw const buffer).read(), core::ptr::null_mut());
eprint!("Initializing tokeniser.. ");
tokeniser_init(path.as_ptr());
eprintln!("ok.");
eprintln!("{}: {:?}[{}..{}]", (&raw const input_file).read(), (&raw const buffer).read(), (&raw const cursor).read(), (&raw const buffer_len).read());
tokeniser_print();
while let Some(lexeme) = find_lexeme().as_lexeme() {
eprintln!("Found lexeme: {}", lexeme.lex());
}
eprintln!("Finished tokenising.");
}
}