diff --git a/lang/src/tokeniser.asm b/lang/src/tokeniser.asm index 6b050a7..9a84f37 100644 --- a/lang/src/tokeniser.asm +++ b/lang/src/tokeniser.asm @@ -19,6 +19,7 @@ extern is_id_start extern is_whitespace global tokeniser_init +global tokeniser_init_buf global tokeniser_print global find_lexeme global expect_token @@ -54,6 +55,22 @@ section .bss statbuf: resb 144 section .text + +;; rdi: pointer to buffer +;; rsi: length of buffer +tokeniser_init_buf: + push rbp + mov rbp, rsp + + mov dword [rel input_file], 0 + mov qword [rel buffer], rdi + mov qword [rel buffer_len], rsi + mov qword [rel cursor], 0 + + pop rbp + ret + + ;; Initialises the tokeniser ;; rdx: pointer to filename (null-terminated) tokeniser_init: diff --git a/lang/tests/shared/shared.rs b/lang/tests/shared/shared.rs index 091e770..7eed257 100644 --- a/lang/tests/shared/shared.rs +++ b/lang/tests/shared/shared.rs @@ -1,8 +1,13 @@ #![allow(dead_code)] +#[inline(never)] +fn __do_panic() -> ! { + panic!("Called panic from external code."); +} + #[unsafe(no_mangle)] extern "C" fn panic() -> ! { - panic!("Called panic from external code."); + __do_panic() } #[repr(C)] diff --git a/lang/tests/tokens.rs b/lang/tests/tokens.rs index c96102b..24c80fe 100644 --- a/lang/tests/tokens.rs +++ b/lang/tests/tokens.rs @@ -43,6 +43,7 @@ impl AsLexeme for LexemeRaw { #[allow(dead_code)] unsafe extern "C" { unsafe fn tokeniser_init(path: *const i8) -> (); + unsafe fn tokeniser_init_buf(bytes: *const u8, len: usize) -> (); unsafe fn tokeniser_print() -> (); unsafe fn is_ident(len: usize) -> bool; unsafe fn is_number(len: usize) -> bool; @@ -133,16 +134,16 @@ fn main() { assert_eq!( &collect_tokens()[..], &[ - Lexeme(30, "this-is-an-ident"), - Lexeme(30, "another_ident123"), - Lexeme(30, "_underscore_test"), - Lexeme(30, "mixedCASEIdent"), - Lexeme(30, "number12345"), - Lexeme(30, "____"), - Lexeme(30, "_"), + Lexeme(31, "this-is-an-ident"), + Lexeme(31, "another_ident123"), + Lexeme(31, "_underscore_test"), + Lexeme(31, "mixedCASEIdent"), + Lexeme(31, "number12345"), + Lexeme(31, "____"), + Lexeme(31, "_"), Lexeme(17, ""), - Lexeme(30, "leading-minus"), - Lexeme(30, "trailing-minus-"), + Lexeme(31, "leading-minus"), + Lexeme(31, "trailing-minus-"), ] ); @@ -154,7 +155,7 @@ fn main() { &collect_tokens()[..], &[ Lexeme(4, ""), - Lexeme(30, "my-function"), + Lexeme(31, "my-function"), Lexeme(19, ""), Lexeme(18, ""), Lexeme(12, ""), @@ -173,7 +174,7 @@ fn main() { assert_eq!(expect_token(2).into_option(), None); assert_eq!(expect_token(4).into_option().unwrap().as_str(), "fn"); - assert_eq!(unwrap_token(30).as_str(), "my-function"); + assert_eq!(unwrap_token(31).as_str(), "my-function"); eprint!("Initializing tokeniser.. "); tokeniser_init(c"tests/tokens/comment.l".as_ptr()); @@ -182,15 +183,15 @@ fn main() { assert_eq!( &collect_tokens()[..], &[ - Lexeme(33, ""), + Lexeme(34, ""), Lexeme(4, ""), - Lexeme(30, "my-function"), + Lexeme(31, "my-function"), Lexeme(19, ""), Lexeme(18, ""), Lexeme(12, ""), Lexeme(11, ""), Lexeme(21, ""), - Lexeme(33, ""), + Lexeme(34, ""), Lexeme(5, ""), Lexeme(10, ""), Lexeme(23, ""), @@ -205,11 +206,11 @@ fn main() { assert_eq!( &collect_tokens()[..], &[ - Lexeme(31, "1234"), - Lexeme(31, "123_345_"), - Lexeme(31, "1234____56"), - Lexeme(31, "1"), - Lexeme(31, "0"), + Lexeme(32, "1234"), + Lexeme(32, "123_345_"), + Lexeme(32, "1234____56"), + Lexeme(32, "1"), + Lexeme(32, "0"), ] ); @@ -220,14 +221,24 @@ fn main() { assert_eq!( &collect_tokens()[..], &[ - Lexeme(32, "\"this is a string\""), - Lexeme(32, "\"another\nstring\nspanning multiple\n lines\""), - Lexeme(32, "\"string with a \\\"quoted\\\" word\""), - Lexeme(32, "\"a\""), - Lexeme(32, "\"\"") + Lexeme(33, "\"this is a string\""), + Lexeme(33, "\"another\nstring\nspanning multiple\n lines\""), + Lexeme(33, "\"string with a \\\"quoted\\\" word\""), + Lexeme(33, "\"a\""), + Lexeme(33, "\"\"") ], ); + eprint!("Initializing tokeniser.. "); + let src = b"3 + 4"; + tokeniser_init_buf(src.as_ptr(), src.len()); + eprintln!("ok."); + + assert_eq!( + &collect_tokens()[..], + &[Lexeme(32, "3"), Lexeme(16, "+"), Lexeme(32, "4")], + ); + eprintln!("Finished tokenising."); } }