#[path = "shared/shared.rs"] mod util; use util::*; #[derive(Debug)] struct Lexeme(u8, &'static str); impl PartialEq for Lexeme { fn eq(&self, other: &Self) -> bool { match self.0 { // Identifiers and numbers compare both token and lexeme 30 | 31 => self.0 == other.0 && self.1 == other.1, _ => self.0 == other.0, } } } impl Eq for Lexeme {} trait AsLexeme { fn as_lexeme(self) -> Option; } #[repr(C)] struct LexemeRaw { token: u8, lexeme: *const u8, len: usize, } impl AsLexeme for LexemeRaw { fn as_lexeme(self) -> Option { let Self { token, lexeme, len } = self; let slice = unsafe { core::str::from_utf8_unchecked(core::slice::from_raw_parts(lexeme, len)) }; match token { 1.. => Some(Lexeme(token, slice)), _ => None, } } } #[allow(dead_code)] unsafe extern "C" { unsafe fn tokeniser_init(path: *const i8) -> (); unsafe fn tokeniser_init_buf(bytes: *const u8, len: usize) -> (); unsafe fn tokeniser_print() -> (); unsafe fn is_ident(len: usize) -> bool; unsafe fn is_number(len: usize) -> bool; unsafe fn skip_whitespace() -> (); unsafe fn find_lexeme() -> LexemeRaw; unsafe fn expect_token(token: u8) -> MaybeFFISlice; unsafe fn unwrap_token(token: u8) -> FFISlice; static mut LEXEMES: *const u8; static mut LEXEME_LENS: usize; static mut NUM_LEXEMES: usize; static mut TOKENS: u8; static mut input_file: u32; static mut buffer: *mut u8; static mut cursor: usize; static mut buffer_len: usize; unsafe fn exit(code: i32) -> !; } fn collect_tokens() -> Vec { let mut lexemes = Vec::new(); unsafe { while let Some(lexeme) = find_lexeme().as_lexeme() { lexemes.push(lexeme); } } lexemes } fn main() { unsafe { // assert initial state assert_eq!((&raw const input_file).read(), 0); assert_eq!((&raw const buffer_len).read(), 0); assert_eq!((&raw const cursor).read(), 0); assert_eq!((&raw const buffer).read(), core::ptr::null_mut()); eprint!("Initializing tokeniser.. "); tokeniser_init(c"tests/tokens/keywords.l".as_ptr()); eprintln!("ok."); assert_eq!( &collect_tokens()[..], &[ Lexeme(4, ""), Lexeme(1, ""), Lexeme(2, ""), Lexeme(3, ""), Lexeme(4, ""), Lexeme(8, ""), Lexeme(13, ""), Lexeme(11, ""), Lexeme(10, ""), Lexeme(9, ""), Lexeme(5, ""), ][..] ); eprint!("Initializing tokeniser.. "); tokeniser_init(c"tests/tokens/delimiters.l".as_ptr()); eprintln!("ok."); assert_eq!( &collect_tokens()[..], &[ Lexeme(19, ""), Lexeme(18, ""), Lexeme(28, ""), Lexeme(29, ""), Lexeme(21, ""), Lexeme(20, ""), Lexeme(24, ""), Lexeme(12, ""), Lexeme(23, ""), Lexeme(22, ""), Lexeme(15, ""), ][..] ); eprint!("Initializing tokeniser.. "); tokeniser_init(c"tests/tokens/identifier.l".as_ptr()); eprintln!("ok."); assert_eq!( &collect_tokens()[..], &[ Lexeme(31, "this-is-an-ident"), Lexeme(31, "another_ident123"), Lexeme(31, "_underscore_test"), Lexeme(31, "mixedCASEIdent"), Lexeme(31, "number12345"), Lexeme(31, "____"), Lexeme(31, "_"), Lexeme(17, ""), Lexeme(31, "leading-minus"), Lexeme(31, "trailing-minus-"), ] ); eprint!("Initializing tokeniser.. "); tokeniser_init(c"tests/tokens/function.l".as_ptr()); eprintln!("ok."); assert_eq!( &collect_tokens()[..], &[ Lexeme(4, ""), Lexeme(31, "my-function"), Lexeme(19, ""), Lexeme(18, ""), Lexeme(12, ""), Lexeme(11, ""), Lexeme(21, ""), Lexeme(5, ""), Lexeme(10, ""), Lexeme(23, ""), Lexeme(20, ""), ] ); eprint!("Initializing tokeniser.. "); tokeniser_init(c"tests/tokens/function.l".as_ptr()); eprintln!("ok."); assert_eq!(expect_token(2).into_option(), None); assert_eq!(expect_token(4).into_option().unwrap().as_str(), "fn"); assert_eq!(unwrap_token(31).as_str(), "my-function"); eprint!("Initializing tokeniser.. "); tokeniser_init(c"tests/tokens/comment.l".as_ptr()); eprintln!("ok."); assert_eq!( &collect_tokens()[..], &[ Lexeme(34, ""), Lexeme(4, ""), Lexeme(31, "my-function"), Lexeme(19, ""), Lexeme(18, ""), Lexeme(12, ""), Lexeme(11, ""), Lexeme(21, ""), Lexeme(34, ""), Lexeme(5, ""), Lexeme(10, ""), Lexeme(23, ""), Lexeme(20, ""), ] ); eprint!("Initializing tokeniser.. "); tokeniser_init(c"tests/tokens/number.l".as_ptr()); eprintln!("ok."); assert_eq!( &collect_tokens()[..], &[ Lexeme(32, "1234"), Lexeme(32, "123_345_"), Lexeme(32, "1234____56"), Lexeme(32, "1"), Lexeme(32, "0"), ] ); eprint!("Initializing tokeniser.. "); tokeniser_init(c"tests/tokens/strings.l".as_ptr()); eprintln!("ok."); assert_eq!( &collect_tokens()[..], &[ Lexeme(33, "\"this is a string\""), Lexeme(33, "\"another\nstring\nspanning multiple\n lines\""), Lexeme(33, "\"string with a \\\"quoted\\\" word\""), Lexeme(33, "\"a\""), Lexeme(33, "\"\"") ], ); eprint!("Initializing tokeniser.. "); let src = b"3 + 4"; tokeniser_init_buf(src.as_ptr(), src.len()); eprintln!("ok."); assert_eq!( &collect_tokens()[..], &[Lexeme(32, "3"), Lexeme(16, "+"), Lexeme(32, "4")], ); eprint!("Initializing tokeniser.. "); let src = b"fn main() -> void { return 1 + 2; }"; tokeniser_init_buf(src.as_ptr(), src.len()); eprintln!("ok."); assert_eq!( &collect_tokens()[..], &[ Lexeme(4, "fn"), Lexeme(31, "main"), Lexeme(19, "("), Lexeme(18, ")"), Lexeme(12, "->"), Lexeme(30, "void"), Lexeme(21, "{"), Lexeme(5, "return"), Lexeme(32, "1"), Lexeme(16, "+"), Lexeme(32, "2"), Lexeme(23, ";"), Lexeme(20, "}"), ], ); eprintln!("Finished tokenising."); } }