117 lines
2.9 KiB
Rust
117 lines
2.9 KiB
Rust
#[unsafe(no_mangle)]
|
|
extern "C" fn panic() -> ! {
|
|
panic!("Called panic from external code.");
|
|
}
|
|
|
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
|
struct Lexeme(u8);
|
|
|
|
impl Lexeme {
|
|
fn lex(&self) -> &'static str {
|
|
// SAFETY: lens contains the correct length for each lexeme, and lexemes
|
|
// contains pointers to valid 'static UTF-8 data.
|
|
unsafe {
|
|
core::str::from_utf8_unchecked(
|
|
core::slice::from_raw_parts(
|
|
(&raw const LEXEMES).add((self.0) as usize).read(),
|
|
(&raw const LEXEME_LENS).add((self.0) as usize).read(),
|
|
)
|
|
)
|
|
}
|
|
}
|
|
}
|
|
|
|
trait AsLexeme {
|
|
fn as_lexeme(self) -> Option<Lexeme>;
|
|
}
|
|
|
|
impl AsLexeme for u8 {
|
|
fn as_lexeme(self) -> Option<Lexeme> {
|
|
match self {
|
|
1.. => Some(Lexeme(self)),
|
|
_ => None,
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(dead_code)]
|
|
unsafe extern "C" {
|
|
unsafe fn tokeniser_init(path: *const i8) -> ();
|
|
unsafe fn tokeniser_print() -> ();
|
|
unsafe fn is_ident(len: usize) -> bool;
|
|
unsafe fn is_number(len: usize) -> bool;
|
|
unsafe fn skip_whitespace() -> ();
|
|
unsafe fn find_lexeme() -> u8;
|
|
|
|
static mut LEXEMES: *const u8;
|
|
static mut LEXEME_LENS: usize;
|
|
static mut NUM_LEXEMES: usize;
|
|
static mut TOKENS: u8;
|
|
|
|
static mut input_file: u32;
|
|
static mut buffer: *mut u8;
|
|
static mut cursor: usize;
|
|
static mut buffer_len: usize;
|
|
|
|
unsafe fn exit(code: i32) -> !;
|
|
}
|
|
|
|
fn collect_tokens() -> Vec<Lexeme> {
|
|
let mut lexemes = Vec::new();
|
|
unsafe {
|
|
while let Some(lexeme) = find_lexeme().as_lexeme() {
|
|
lexemes.push(lexeme);
|
|
}
|
|
}
|
|
|
|
lexemes
|
|
}
|
|
|
|
fn main() {
|
|
unsafe {
|
|
// assert initial state
|
|
assert_eq!((&raw const input_file).read(), 0);
|
|
assert_eq!((&raw const buffer_len).read(), 0);
|
|
assert_eq!((&raw const cursor).read(), 0);
|
|
assert_eq!((&raw const buffer).read(), core::ptr::null_mut());
|
|
|
|
eprint!("Initializing tokeniser.. ");
|
|
tokeniser_init(c"tests/tokens/keywords.l".as_ptr());
|
|
eprintln!("ok.");
|
|
|
|
assert_eq!(&collect_tokens()[..], &[
|
|
Lexeme(4),
|
|
Lexeme(1),
|
|
Lexeme(2),
|
|
Lexeme(3),
|
|
Lexeme(4),
|
|
Lexeme(8),
|
|
Lexeme(13),
|
|
Lexeme(11),
|
|
Lexeme(10),
|
|
Lexeme(9),
|
|
Lexeme(5),
|
|
][..]);
|
|
|
|
eprint!("Initializing tokeniser.. ");
|
|
tokeniser_init(c"tests/tokens/delimiters.l".as_ptr());
|
|
eprintln!("ok.");
|
|
|
|
assert_eq!(&collect_tokens()[..], &[
|
|
Lexeme(19),
|
|
Lexeme(18),
|
|
Lexeme(28),
|
|
Lexeme(29),
|
|
Lexeme(21),
|
|
Lexeme(20),
|
|
Lexeme(24),
|
|
Lexeme(12),
|
|
Lexeme(23),
|
|
Lexeme(22),
|
|
Lexeme(15),
|
|
][..]);
|
|
|
|
eprintln!("Finished tokenising.");
|
|
}
|
|
}
|