init tokeniser with buffer
This commit is contained in:
parent
46053090f4
commit
bf9d07b462
|
|
@ -19,6 +19,7 @@ extern is_id_start
|
||||||
extern is_whitespace
|
extern is_whitespace
|
||||||
|
|
||||||
global tokeniser_init
|
global tokeniser_init
|
||||||
|
global tokeniser_init_buf
|
||||||
global tokeniser_print
|
global tokeniser_print
|
||||||
global find_lexeme
|
global find_lexeme
|
||||||
global expect_token
|
global expect_token
|
||||||
|
|
@ -54,6 +55,22 @@ section .bss
|
||||||
statbuf: resb 144
|
statbuf: resb 144
|
||||||
|
|
||||||
section .text
|
section .text
|
||||||
|
|
||||||
|
;; rdi: pointer to buffer
|
||||||
|
;; rsi: length of buffer
|
||||||
|
tokeniser_init_buf:
|
||||||
|
push rbp
|
||||||
|
mov rbp, rsp
|
||||||
|
|
||||||
|
mov dword [rel input_file], 0
|
||||||
|
mov qword [rel buffer], rdi
|
||||||
|
mov qword [rel buffer_len], rsi
|
||||||
|
mov qword [rel cursor], 0
|
||||||
|
|
||||||
|
pop rbp
|
||||||
|
ret
|
||||||
|
|
||||||
|
|
||||||
;; Initialises the tokeniser
|
;; Initialises the tokeniser
|
||||||
;; rdx: pointer to filename (null-terminated)
|
;; rdx: pointer to filename (null-terminated)
|
||||||
tokeniser_init:
|
tokeniser_init:
|
||||||
|
|
|
||||||
|
|
@ -1,8 +1,13 @@
|
||||||
#![allow(dead_code)]
|
#![allow(dead_code)]
|
||||||
|
|
||||||
|
#[inline(never)]
|
||||||
|
fn __do_panic() -> ! {
|
||||||
|
panic!("Called panic from external code.");
|
||||||
|
}
|
||||||
|
|
||||||
#[unsafe(no_mangle)]
|
#[unsafe(no_mangle)]
|
||||||
extern "C" fn panic() -> ! {
|
extern "C" fn panic() -> ! {
|
||||||
panic!("Called panic from external code.");
|
__do_panic()
|
||||||
}
|
}
|
||||||
|
|
||||||
#[repr(C)]
|
#[repr(C)]
|
||||||
|
|
|
||||||
|
|
@ -43,6 +43,7 @@ impl AsLexeme for LexemeRaw {
|
||||||
#[allow(dead_code)]
|
#[allow(dead_code)]
|
||||||
unsafe extern "C" {
|
unsafe extern "C" {
|
||||||
unsafe fn tokeniser_init(path: *const i8) -> ();
|
unsafe fn tokeniser_init(path: *const i8) -> ();
|
||||||
|
unsafe fn tokeniser_init_buf(bytes: *const u8, len: usize) -> ();
|
||||||
unsafe fn tokeniser_print() -> ();
|
unsafe fn tokeniser_print() -> ();
|
||||||
unsafe fn is_ident(len: usize) -> bool;
|
unsafe fn is_ident(len: usize) -> bool;
|
||||||
unsafe fn is_number(len: usize) -> bool;
|
unsafe fn is_number(len: usize) -> bool;
|
||||||
|
|
@ -133,16 +134,16 @@ fn main() {
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
&collect_tokens()[..],
|
&collect_tokens()[..],
|
||||||
&[
|
&[
|
||||||
Lexeme(30, "this-is-an-ident"),
|
Lexeme(31, "this-is-an-ident"),
|
||||||
Lexeme(30, "another_ident123"),
|
Lexeme(31, "another_ident123"),
|
||||||
Lexeme(30, "_underscore_test"),
|
Lexeme(31, "_underscore_test"),
|
||||||
Lexeme(30, "mixedCASEIdent"),
|
Lexeme(31, "mixedCASEIdent"),
|
||||||
Lexeme(30, "number12345"),
|
Lexeme(31, "number12345"),
|
||||||
Lexeme(30, "____"),
|
Lexeme(31, "____"),
|
||||||
Lexeme(30, "_"),
|
Lexeme(31, "_"),
|
||||||
Lexeme(17, ""),
|
Lexeme(17, ""),
|
||||||
Lexeme(30, "leading-minus"),
|
Lexeme(31, "leading-minus"),
|
||||||
Lexeme(30, "trailing-minus-"),
|
Lexeme(31, "trailing-minus-"),
|
||||||
]
|
]
|
||||||
);
|
);
|
||||||
|
|
||||||
|
|
@ -154,7 +155,7 @@ fn main() {
|
||||||
&collect_tokens()[..],
|
&collect_tokens()[..],
|
||||||
&[
|
&[
|
||||||
Lexeme(4, ""),
|
Lexeme(4, ""),
|
||||||
Lexeme(30, "my-function"),
|
Lexeme(31, "my-function"),
|
||||||
Lexeme(19, ""),
|
Lexeme(19, ""),
|
||||||
Lexeme(18, ""),
|
Lexeme(18, ""),
|
||||||
Lexeme(12, ""),
|
Lexeme(12, ""),
|
||||||
|
|
@ -173,7 +174,7 @@ fn main() {
|
||||||
|
|
||||||
assert_eq!(expect_token(2).into_option(), None);
|
assert_eq!(expect_token(2).into_option(), None);
|
||||||
assert_eq!(expect_token(4).into_option().unwrap().as_str(), "fn");
|
assert_eq!(expect_token(4).into_option().unwrap().as_str(), "fn");
|
||||||
assert_eq!(unwrap_token(30).as_str(), "my-function");
|
assert_eq!(unwrap_token(31).as_str(), "my-function");
|
||||||
|
|
||||||
eprint!("Initializing tokeniser.. ");
|
eprint!("Initializing tokeniser.. ");
|
||||||
tokeniser_init(c"tests/tokens/comment.l".as_ptr());
|
tokeniser_init(c"tests/tokens/comment.l".as_ptr());
|
||||||
|
|
@ -182,15 +183,15 @@ fn main() {
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
&collect_tokens()[..],
|
&collect_tokens()[..],
|
||||||
&[
|
&[
|
||||||
Lexeme(33, ""),
|
Lexeme(34, ""),
|
||||||
Lexeme(4, ""),
|
Lexeme(4, ""),
|
||||||
Lexeme(30, "my-function"),
|
Lexeme(31, "my-function"),
|
||||||
Lexeme(19, ""),
|
Lexeme(19, ""),
|
||||||
Lexeme(18, ""),
|
Lexeme(18, ""),
|
||||||
Lexeme(12, ""),
|
Lexeme(12, ""),
|
||||||
Lexeme(11, ""),
|
Lexeme(11, ""),
|
||||||
Lexeme(21, ""),
|
Lexeme(21, ""),
|
||||||
Lexeme(33, ""),
|
Lexeme(34, ""),
|
||||||
Lexeme(5, ""),
|
Lexeme(5, ""),
|
||||||
Lexeme(10, ""),
|
Lexeme(10, ""),
|
||||||
Lexeme(23, ""),
|
Lexeme(23, ""),
|
||||||
|
|
@ -205,11 +206,11 @@ fn main() {
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
&collect_tokens()[..],
|
&collect_tokens()[..],
|
||||||
&[
|
&[
|
||||||
Lexeme(31, "1234"),
|
Lexeme(32, "1234"),
|
||||||
Lexeme(31, "123_345_"),
|
Lexeme(32, "123_345_"),
|
||||||
Lexeme(31, "1234____56"),
|
Lexeme(32, "1234____56"),
|
||||||
Lexeme(31, "1"),
|
Lexeme(32, "1"),
|
||||||
Lexeme(31, "0"),
|
Lexeme(32, "0"),
|
||||||
]
|
]
|
||||||
);
|
);
|
||||||
|
|
||||||
|
|
@ -220,14 +221,24 @@ fn main() {
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
&collect_tokens()[..],
|
&collect_tokens()[..],
|
||||||
&[
|
&[
|
||||||
Lexeme(32, "\"this is a string\""),
|
Lexeme(33, "\"this is a string\""),
|
||||||
Lexeme(32, "\"another\nstring\nspanning multiple\n lines\""),
|
Lexeme(33, "\"another\nstring\nspanning multiple\n lines\""),
|
||||||
Lexeme(32, "\"string with a \\\"quoted\\\" word\""),
|
Lexeme(33, "\"string with a \\\"quoted\\\" word\""),
|
||||||
Lexeme(32, "\"a\""),
|
Lexeme(33, "\"a\""),
|
||||||
Lexeme(32, "\"\"")
|
Lexeme(33, "\"\"")
|
||||||
],
|
],
|
||||||
);
|
);
|
||||||
|
|
||||||
|
eprint!("Initializing tokeniser.. ");
|
||||||
|
let src = b"3 + 4";
|
||||||
|
tokeniser_init_buf(src.as_ptr(), src.len());
|
||||||
|
eprintln!("ok.");
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
&collect_tokens()[..],
|
||||||
|
&[Lexeme(32, "3"), Lexeme(16, "+"), Lexeme(32, "4")],
|
||||||
|
);
|
||||||
|
|
||||||
eprintln!("Finished tokenising.");
|
eprintln!("Finished tokenising.");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue