init tokeniser with buffer

This commit is contained in:
janis 2025-10-29 20:39:22 +01:00
parent 46053090f4
commit bf9d07b462
Signed by: janis
SSH key fingerprint: SHA256:bB1qbbqmDXZNT0KKD5c2Dfjg53JGhj7B3CFcLIzSqq8
3 changed files with 58 additions and 25 deletions

View file

@ -19,6 +19,7 @@ extern is_id_start
extern is_whitespace extern is_whitespace
global tokeniser_init global tokeniser_init
global tokeniser_init_buf
global tokeniser_print global tokeniser_print
global find_lexeme global find_lexeme
global expect_token global expect_token
@ -54,6 +55,22 @@ section .bss
statbuf: resb 144 statbuf: resb 144
section .text section .text
;; rdi: pointer to buffer
;; rsi: length of buffer
tokeniser_init_buf:
push rbp
mov rbp, rsp
mov dword [rel input_file], 0
mov qword [rel buffer], rdi
mov qword [rel buffer_len], rsi
mov qword [rel cursor], 0
pop rbp
ret
;; Initialises the tokeniser ;; Initialises the tokeniser
;; rdx: pointer to filename (null-terminated) ;; rdx: pointer to filename (null-terminated)
tokeniser_init: tokeniser_init:

View file

@ -1,8 +1,13 @@
#![allow(dead_code)] #![allow(dead_code)]
#[inline(never)]
fn __do_panic() -> ! {
panic!("Called panic from external code.");
}
#[unsafe(no_mangle)] #[unsafe(no_mangle)]
extern "C" fn panic() -> ! { extern "C" fn panic() -> ! {
panic!("Called panic from external code."); __do_panic()
} }
#[repr(C)] #[repr(C)]

View file

@ -43,6 +43,7 @@ impl AsLexeme for LexemeRaw {
#[allow(dead_code)] #[allow(dead_code)]
unsafe extern "C" { unsafe extern "C" {
unsafe fn tokeniser_init(path: *const i8) -> (); unsafe fn tokeniser_init(path: *const i8) -> ();
unsafe fn tokeniser_init_buf(bytes: *const u8, len: usize) -> ();
unsafe fn tokeniser_print() -> (); unsafe fn tokeniser_print() -> ();
unsafe fn is_ident(len: usize) -> bool; unsafe fn is_ident(len: usize) -> bool;
unsafe fn is_number(len: usize) -> bool; unsafe fn is_number(len: usize) -> bool;
@ -133,16 +134,16 @@ fn main() {
assert_eq!( assert_eq!(
&collect_tokens()[..], &collect_tokens()[..],
&[ &[
Lexeme(30, "this-is-an-ident"), Lexeme(31, "this-is-an-ident"),
Lexeme(30, "another_ident123"), Lexeme(31, "another_ident123"),
Lexeme(30, "_underscore_test"), Lexeme(31, "_underscore_test"),
Lexeme(30, "mixedCASEIdent"), Lexeme(31, "mixedCASEIdent"),
Lexeme(30, "number12345"), Lexeme(31, "number12345"),
Lexeme(30, "____"), Lexeme(31, "____"),
Lexeme(30, "_"), Lexeme(31, "_"),
Lexeme(17, ""), Lexeme(17, ""),
Lexeme(30, "leading-minus"), Lexeme(31, "leading-minus"),
Lexeme(30, "trailing-minus-"), Lexeme(31, "trailing-minus-"),
] ]
); );
@ -154,7 +155,7 @@ fn main() {
&collect_tokens()[..], &collect_tokens()[..],
&[ &[
Lexeme(4, ""), Lexeme(4, ""),
Lexeme(30, "my-function"), Lexeme(31, "my-function"),
Lexeme(19, ""), Lexeme(19, ""),
Lexeme(18, ""), Lexeme(18, ""),
Lexeme(12, ""), Lexeme(12, ""),
@ -173,7 +174,7 @@ fn main() {
assert_eq!(expect_token(2).into_option(), None); assert_eq!(expect_token(2).into_option(), None);
assert_eq!(expect_token(4).into_option().unwrap().as_str(), "fn"); assert_eq!(expect_token(4).into_option().unwrap().as_str(), "fn");
assert_eq!(unwrap_token(30).as_str(), "my-function"); assert_eq!(unwrap_token(31).as_str(), "my-function");
eprint!("Initializing tokeniser.. "); eprint!("Initializing tokeniser.. ");
tokeniser_init(c"tests/tokens/comment.l".as_ptr()); tokeniser_init(c"tests/tokens/comment.l".as_ptr());
@ -182,15 +183,15 @@ fn main() {
assert_eq!( assert_eq!(
&collect_tokens()[..], &collect_tokens()[..],
&[ &[
Lexeme(33, ""), Lexeme(34, ""),
Lexeme(4, ""), Lexeme(4, ""),
Lexeme(30, "my-function"), Lexeme(31, "my-function"),
Lexeme(19, ""), Lexeme(19, ""),
Lexeme(18, ""), Lexeme(18, ""),
Lexeme(12, ""), Lexeme(12, ""),
Lexeme(11, ""), Lexeme(11, ""),
Lexeme(21, ""), Lexeme(21, ""),
Lexeme(33, ""), Lexeme(34, ""),
Lexeme(5, ""), Lexeme(5, ""),
Lexeme(10, ""), Lexeme(10, ""),
Lexeme(23, ""), Lexeme(23, ""),
@ -205,11 +206,11 @@ fn main() {
assert_eq!( assert_eq!(
&collect_tokens()[..], &collect_tokens()[..],
&[ &[
Lexeme(31, "1234"), Lexeme(32, "1234"),
Lexeme(31, "123_345_"), Lexeme(32, "123_345_"),
Lexeme(31, "1234____56"), Lexeme(32, "1234____56"),
Lexeme(31, "1"), Lexeme(32, "1"),
Lexeme(31, "0"), Lexeme(32, "0"),
] ]
); );
@ -220,14 +221,24 @@ fn main() {
assert_eq!( assert_eq!(
&collect_tokens()[..], &collect_tokens()[..],
&[ &[
Lexeme(32, "\"this is a string\""), Lexeme(33, "\"this is a string\""),
Lexeme(32, "\"another\nstring\nspanning multiple\n lines\""), Lexeme(33, "\"another\nstring\nspanning multiple\n lines\""),
Lexeme(32, "\"string with a \\\"quoted\\\" word\""), Lexeme(33, "\"string with a \\\"quoted\\\" word\""),
Lexeme(32, "\"a\""), Lexeme(33, "\"a\""),
Lexeme(32, "\"\"") Lexeme(33, "\"\"")
], ],
); );
eprint!("Initializing tokeniser.. ");
let src = b"3 + 4";
tokeniser_init_buf(src.as_ptr(), src.len());
eprintln!("ok.");
assert_eq!(
&collect_tokens()[..],
&[Lexeme(32, "3"), Lexeme(16, "+"), Lexeme(32, "4")],
);
eprintln!("Finished tokenising."); eprintln!("Finished tokenising.");
} }
} }