#[unsafe(no_mangle)] extern "C" fn panic() -> ! { panic!("Called panic from external code."); } #[derive(Debug, Clone, Copy, PartialEq, Eq)] struct Lexeme(u8); impl Lexeme { fn lex(&self) -> &'static str { // SAFETY: lens contains the correct length for each lexeme, and lexemes // contains pointers to valid 'static UTF-8 data. unsafe { core::str::from_utf8_unchecked( core::slice::from_raw_parts( (&raw const LEXEMES).add((self.0) as usize).read(), (&raw const LEXEME_LENS).add((self.0) as usize).read(), ) ) } } } trait AsLexeme { fn as_lexeme(self) -> Option; } impl AsLexeme for u8 { fn as_lexeme(self) -> Option { match self { 1.. => Some(Lexeme(self)), _ => None, } } } #[allow(dead_code)] unsafe extern "C" { unsafe fn tokeniser_init(path: *const i8) -> (); unsafe fn tokeniser_print() -> (); unsafe fn is_ident(len: usize) -> bool; unsafe fn is_number(len: usize) -> bool; unsafe fn skip_whitespace() -> (); unsafe fn find_lexeme() -> u8; static mut LEXEMES: *const u8; static mut LEXEME_LENS: usize; static mut NUM_LEXEMES: usize; static mut TOKENS: u8; static mut input_file: u32; static mut buffer: *mut u8; static mut cursor: usize; static mut buffer_len: usize; unsafe fn exit(code: i32) -> !; } fn collect_tokens() -> Vec { let mut lexemes = Vec::new(); unsafe { while let Some(lexeme) = find_lexeme().as_lexeme() { lexemes.push(lexeme); } } lexemes } fn main() { unsafe { // assert initial state assert_eq!((&raw const input_file).read(), 0); assert_eq!((&raw const buffer_len).read(), 0); assert_eq!((&raw const cursor).read(), 0); assert_eq!((&raw const buffer).read(), core::ptr::null_mut()); eprint!("Initializing tokeniser.. "); tokeniser_init(c"tests/tokens/keywords.l".as_ptr()); eprintln!("ok."); assert_eq!(&collect_tokens()[..], &[ Lexeme(4), Lexeme(1), Lexeme(2), Lexeme(3), Lexeme(4), Lexeme(8), Lexeme(13), Lexeme(11), Lexeme(10), Lexeme(9), Lexeme(5), ][..]); eprint!("Initializing tokeniser.. "); tokeniser_init(c"tests/tokens/delimiters.l".as_ptr()); eprintln!("ok."); assert_eq!(&collect_tokens()[..], &[ Lexeme(19), Lexeme(18), Lexeme(28), Lexeme(29), Lexeme(21), Lexeme(20), Lexeme(24), Lexeme(12), Lexeme(23), Lexeme(22), Lexeme(15), ][..]); eprintln!("Finished tokenising."); } }