106 lines
3 KiB
Rust
106 lines
3 KiB
Rust
#[unsafe(no_mangle)]
|
|
extern "C" fn panic() -> ! {
|
|
panic!("Called panic from external code.");
|
|
}
|
|
|
|
struct Lexeme(u8);
|
|
|
|
impl Lexeme {
|
|
fn lex(&self) -> &'static str {
|
|
// SAFETY: lens contains the correct length for each lexeme, and lexemes
|
|
// contains pointers to valid 'static UTF-8 data.
|
|
unsafe {
|
|
core::str::from_utf8_unchecked(
|
|
core::slice::from_raw_parts(
|
|
(&raw const LEXEMES).add((self.0) as usize).read(),
|
|
(&raw const LEXEME_LENS).add((self.0) as usize).read(),
|
|
)
|
|
)
|
|
}
|
|
}
|
|
}
|
|
|
|
trait AsLexeme {
|
|
fn as_lexeme(self) -> Option<Lexeme>;
|
|
}
|
|
|
|
impl AsLexeme for u8 {
|
|
fn as_lexeme(self) -> Option<Lexeme> {
|
|
match self {
|
|
1..=10 => Some(Lexeme(self)),
|
|
_ => None,
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(dead_code)]
|
|
unsafe extern "C" {
|
|
unsafe fn tokeniser_init(path: *const i8) -> ();
|
|
unsafe fn tokeniser_print() -> ();
|
|
unsafe fn is_ident(len: usize) -> bool;
|
|
unsafe fn is_number(len: usize) -> bool;
|
|
unsafe fn skip_whitespace() -> ();
|
|
unsafe fn find_lexeme() -> u8;
|
|
|
|
static mut LEXEMES: *const u8;
|
|
static mut LEXEME_LENS: usize;
|
|
static mut NUM_LEXEMES: usize;
|
|
static mut TOKENS: u8;
|
|
|
|
static mut input_file: u32;
|
|
static mut buffer: *mut u8;
|
|
static mut cursor: usize;
|
|
static mut buffer_len: usize;
|
|
|
|
unsafe fn exit(code: i32) -> !;
|
|
}
|
|
|
|
// fn lexemes_raw() -> &'static [*const u8] {
|
|
// unsafe {
|
|
// core::slice::from_raw_parts(
|
|
// (&raw const LEXEMES),
|
|
// (&raw const NUM_LEXEMES).read(),
|
|
// )
|
|
// }
|
|
// }
|
|
|
|
// fn lexeme_lens() -> &'static [usize] {
|
|
// unsafe {
|
|
// core::slice::from_raw_parts(
|
|
// (&raw const LEXEME_LENS),
|
|
// (&raw const NUM_LEXEMES).read(),
|
|
// )
|
|
// }
|
|
// }
|
|
|
|
// fn lexeme_iter() -> impl Iterator<Item = &'static str> {
|
|
// lexemes_raw().iter().zip(lexeme_lens().iter()).map(|(&ptr, &len)| {
|
|
// // SAFETY: lexemes_raw and lexeme_lens are guaranteed to contain valid
|
|
// // UTF-8 data and correct lengths.
|
|
// unsafe {
|
|
// core::str::from_utf8_unchecked(core::slice::from_raw_parts(ptr, len))
|
|
// }
|
|
// })
|
|
// }
|
|
|
|
fn main() {
|
|
let path = c"tests/tokens.l";
|
|
|
|
unsafe {
|
|
assert_eq!((&raw const input_file).read(), 0);
|
|
assert_eq!((&raw const buffer_len).read(), 0);
|
|
assert_eq!((&raw const cursor).read(), 0);
|
|
assert_eq!((&raw const buffer).read(), core::ptr::null_mut());
|
|
eprint!("Initializing tokeniser.. ");
|
|
tokeniser_init(path.as_ptr());
|
|
eprintln!("ok.");
|
|
eprintln!("{}: {:?}[{}..{}]", (&raw const input_file).read(), (&raw const buffer).read(), (&raw const cursor).read(), (&raw const buffer_len).read());
|
|
tokeniser_print();
|
|
|
|
while let Some(lexeme) = find_lexeme().as_lexeme() {
|
|
eprintln!("Found lexeme: {}", lexeme.lex());
|
|
}
|
|
eprintln!("Finished tokenising.");
|
|
}
|
|
}
|