ast tests

parse structs and functions from asm for rust tests
initial ast
2025-10-29 22:10:34 +01:00 · 2025-10-29 22:10:14 +01:00 · 2025-10-29 20:39:32 +01:00 · 2025-10-29 20:39:22 +01:00 · 2025-10-29 16:21:15 +01:00 · 2025-10-29 16:21:01 +01:00
12 changed files with 1686 additions and 522 deletions
--- a/lang/Makefile
+++ b/lang/Makefile
@ -1,7 +1,7 @@
 # Makefile: Compile and link main.asm using nasm and mold, intermediate files in target/
 TARGET_DIR := target
-SRC := src/lib.asm src/int_to_str.asm src/vec.asm src/tokeniser.asm src/file.asm src/alloc.asm
+SRC := src/lib.asm src/int_to_str.asm src/vec.asm src/tokeniser.asm src/file.asm src/alloc.asm src/ast.asm
 OBJ := $(patsubst src/%.asm,$(TARGET_DIR)/%.o,$(SRC))
 BIN_SRC := src/main.asm src/panic.asm
@ -29,8 +29,12 @@ fmt: $(wildcard tests/*.rs)
 	@echo "Formatting test source files..."
 	rustfmt --edition 2024 $^
 tests/shared/defs.rs: $(wildcard src/*)
 	@echo "Generating shared definitions for tests..."
 	./tests/asm_to_rust.py $^ -o $@
 # pattern rule: compile each .rs into a binary with the same base name
-$(TARGET_DIR)/tests/%: tests/%.rs | $(OBJ) $(TARGET_DIR)/tests
+$(TARGET_DIR)/tests/%: tests/%.rs tests/shared/defs.rs $(OBJ) | $(TARGET_DIR)/tests
 	@echo "[$(RUSTC)] $< -> $@"
 	rustc -Clink-arg=-fuse-ld=mold --edition=2024 $(OBJ_LINK_ARGS) -g  -o $@ $< 
--- a/lang/src/ast.asm
+++ b/lang/src/ast.asm
@ -0,0 +1,489 @@
 default rel
 %include "src/tokeniser.inc"
 section .rdata
 ;; start-consts
    AST_FUNCTION equ 1
    AST_BLOCK equ 2
    AST_VARIABLE equ 3
    AST_NUMBER equ 4
    AST_BINARY_OP equ 5
    AST_RETURN_STATEMENT equ 6
    TYPE_VOID equ 1
    TYPE_BOOL equ 2
    TYPE_I32 equ 3
    TYPE_U32 equ 4
    TYPE_STR equ 5
 ;; end-consts
 section .text
 extern vec_init_with
 extern vec_push
 extern vec_get
 extern panic
 extern memcpy
 extern vec_binary_search_by
 extern vec_insert
 extern bump_alloc
 extern tokeniser_init
 extern find_lexeme
 extern peek_lexeme
 extern expect_token
 extern unwrap_token
 extern peek_expect_token
 extern str_to_int
 global parse_func
 global parse_args
 global parse_expr
 global parse_binary_expr
 global parse_primary_expr
 global parse_statement
 global parse_block
 ;; start very simple, with only functions and addition
 ;; ```rust
 ;; use super::vec::Vec;
 ;; ```
 ;; start-structs
 ;; struct Ast {
 ;;   nodes: Vec<AstNode>,
 ;; }
 ;;
 ;; struct AstNode {
 ;;   kind: u8,
 ;;   data: *const (),
 ;; }
 ;;
 ;; struct Argument {
 ;;   name: *const u8,
 ;;   name_len: usize,
 ;;   arg_type: Type,
 ;; }
 ;;
 ;; struct Type {
 ;;   kind: u8,
 ;; }
 ;; end-structs
 ;; rdi: *mut Ast
 ;; define-fn: fn parse_func(ast: *mut Ast) -> u64
 parse_func:
    push rbp
    mov rbp, rsp
    push rdi
    sub rsp, 48
    ; name: *const u8 [0..8]
    ; name_len: usize [8..16]
    ; args_ptr: *const Arg [16..24]
    ; args_len: usize [24..32]
    ; return_type: Type [32..40]
    ; body: u64 [40..48]
    mov dil, TOKEN_FN
    call unwrap_token
    mov dil, TOKEN_IDENT
    call unwrap_token
    mov [rsp], rax        ; function name
    mov [rsp + 8], rdx    ; function name length
    mov dil, TOKEN_LPARENS
    call unwrap_token
    mov dil, TOKEN_RPARENS
    call expect_token
    test rax, rax
    je .args
 .after_args:
    mov dil, TOKEN_ARROW
    call unwrap_token
    mov rdi, [rsp + 48]         ; Ast
    call parse_type
    mov [rsp + 32], rax   ; return type
    mov dil, TOKEN_LBRACE
    call peek_expect_token
    test rax, rax
    je panic
    mov rdi, [rsp + 48]         ; Ast
    call parse_block
    mov [rsp + 40], rax   ; body
 .epilogue:
    mov rdi, 48
    mov rsi, 8
    call bump_alloc
    mov rsi, rsp
    mov rdi, rax
    mov rdx, 48
    call memcpy
    mov byte [rsp], AST_FUNCTION    ; kind
    mov [rsp + 8], rdi         ; data
    mov rdi, [rsp + 48]         ; Ast
    lea rsi, [rsp]
    call vec_push
    mov rax, [rsp + 48]         ; Ast
    mov rax, [rdi + 8]         ; return Ast.nodes.len()
    dec rax
    add rsp, 48
    pop rdi
    pop rbp
    ret
 .args:
    mov rdi, [rsp + 48]         ; Ast
    call parse_args
    mov [rsp + 16], rax   ; args_ptr
    mov [rsp + 24], rdx   ; args_len
    jmp .after_args
 .panic:
    call panic
 ;; rdi: *mut Ast
 ;; define-fn: fn parse_args(ast: *mut Ast) -> (*const Argument, usize)
 parse_args:
    push rbp
    mov rbp, rsp
    push rdi
    sub rsp, 64
    lea rdi, [rsp + 24]         ; vec
    mov rsi, 24                 ; size of Argument
    mov rdx, 0                  ; drop = None
    mov rcx, 16                 ; capacity
    call vec_init_with
 .loop:
    mov dil, TOKEN_RPARENS
    call expect_token
    test rax, rax
    jz .done_args
    mov dil, TOKEN_IDENT
    call unwrap_token
    mov [rsp], rax        ; arg name
    mov [rsp + 8], rdx    ; arg name length
    mov dil, TOKEN_COLON
    call unwrap_token
    mov rdi, [rsp + 64]         ; Ast
    call parse_type
    mov [rsp + 16], rax         ; arg type
    lea rdi, [rsp + 24]         ; vec
    lea rsi, [rsp]              ; arg
    call vec_push
    mov dil, TOKEN_COMMA
    call expect_token
    test rax, rax
    jz .end_loop
    jmp .loop
 .end_loop:
    mov dil, TOKEN_RPARENS
    call unwrap_token
 .done_args:
    mov rax, [rsp + 24]   ; args_ptr
    mov rdx, [rsp + 32]   ; args_len
    add rsp, 64
    pop rdi
    pop rbp
    ret
 ;; rdi: lexeme ptr
 ;; rsi: lexeme len
 ;; fn parse_number(lexeme: *const u8, lexeme_len: usize) -> u64
 parse_number:
    push rbp
    mov rbp, rsp
    push rbx
    sub rsp, 16
    mov [rsp], rdi         ; lexeme ptr
    mov [rsp + 8], rsi     ; lexeme len
    cmp rsi, 2
    jbe .dec_radix
    mov al, byte [rdi]
    mov bl, byte [rdi + 1]
    cmp bl, 'x'
    jne .dec_radix
    cmp al, '0'
    je .hex_radix
    cmp al, 'o'
    je .oct_radix
    cmp al, 'b'
    je .bin_radix
    jmp .panic                  ; invalid radix prefix
 .hex_radix:
    mov rax, 16
    jmp .radix_set
 .oct_radix:
    mov rax, 8
    jmp .radix_set
 .bin_radix:
    mov rax, 2
    jmp .radix_set
 .dec_radix:
    mov rax, 10
    jmp .parse
 .radix_set:
    add qword [rsp], 2
    sub qword [rsp + 8], 2
 .parse:
    mov rdi, [rsp]          ; lexeme ptr
    mov rsi, [rsp + 8]    ; lexeme len
    mov rdx, rax          ; radix
    call str_to_int
    add rsp, 16
    pop rbx
    pop rbp
    ret
 .panic:
    call panic
 ;; rdi: *mut Ast
 ;; define-fn: fn parse_primary_expr(ast: *mut Ast) -> u64
 parse_primary_expr:
    push rbp
    mov rbp, rsp
    sub rsp, 24
    mov [rsp], rdi         ; Ast
    mov dil, TOKEN_NUMBER
    call expect_token
    test rax, rax
    jnz .number
    jmp .panic
 .number:
    mov rdi, rax         ; lexeme ptr
    mov rsi, rdx         ; lexeme len
    call parse_number
    mov rdi, [rsp]                 ; Ast
    mov byte [rsp + 8], AST_NUMBER ; kind
    mov [rsp + 16], rax            ; data
    lea rsi, [rsp + 8]             ; AstNode
    call vec_push
    mov rdi, [rsp]              ; Ast
    mov rax, [rdi + 8]          ; return Ast.nodes.len()
    dec rax
    add rsp, 24
    pop rbp
    ret
 .panic:
    call panic
 ;; rdi: *mut Ast
 ;; sil: precedence
 ;; define-fn: fn parse_binary_expr(ast: *mut Ast, precedence: u8) -> u64
 parse_binary_expr:
    push rbp
    mov rbp, rsp
    ; size: 24, align: 8
    ; start-structs
    ; struct BinaryExpr {
    ;   left: u64,
    ;   operator: u8,
    ;   right: u64,
    ; }
    ; end-structs
    sub rsp, 64
    ; lexeme: Lexeme [32..56]
    ; right: u64 [24..32]
    ; precedence: u8 [17..18]
    ; operator: u8 [16..17]
    ; left: u64 [8..16]
    ; rdi: *mut Ast [0..8]
    mov [rsp], rdi         ; Ast
    mov byte [rsp + 17], sil ; upper_precedence
    mov byte [rsp + 16], 0
    call parse_primary_expr
    mov [rsp + 8], rax           ; left
 .loop:
    lea rdi, [rsp + 32]         ; lexeme
    call peek_lexeme
    mov rax, [rsp + 32]
    mov dil, [rsp + 17]
    cmp al, dil          ; our_precedence <= upper_precedence
    jle .done            ; also covers some non-binary operator tokens
    cmp al, TOKEN_PLUS
    je .plus
    jmp .done
 .plus:
    mov dil, TOKEN_PLUS
    call unwrap_token
    mov byte [rsp + 16], TOKEN_PLUS
    jmp .right
 .right:
    mov rdi, [rsp]         ; Ast
    mov sil, [rsp + 16]
    call parse_binary_expr
    mov [rsp + 24], rax             ; right
    mov rdi, 24
    mov rsi, 8
    call bump_alloc
    mov rdx, [rsp + 8]          ; left
    mov [rax + 0], rdx          ; left
    mov dl, byte [rsp + 16]     ; operator
    mov byte [rax + 8], dl      ; operator
    mov rdx, [rsp + 24]         ; right
    mov [rax + 16], rdx         ; right
    mov byte [rsp + 32], AST_BINARY_OP ; AstNode.kind
    mov [rsp + 40], rax                ; AstNode.data
    mov rdi, [rsp]                  ; Ast
    lea rsi, [rsp + 32]             ; &AstNode
    call vec_push
    mov rdi, [rsp]              ; Ast
    mov rax, [rdi + 8]          ; Ast.nodes.len()
    dec rax
    mov [rsp + 8], rax          ; left
    jmp .loop
 .done:
    mov rax, [rsp + 8]          ; left
    add rsp, 64
    pop rbp
    ret
 ;; rdi: *mut Ast
 ;; define-fn: fn parse_expr(ast: *mut Ast) -> u64
 parse_expr:
    push rbp
    mov rbp, rsp
    sub rsp, 8
    mov [rsp], rdi         ; Ast
    mov sil, 0
    call parse_binary_expr
    add rsp, 8
    pop rbp
    ret
 ;; rdi: *mut Ast
 ;; define-fn: fn parse_statement(ast: *mut Ast) -> u64
 parse_statement:
    push rbp
    mov rbp, rsp
    sub rsp, 32
    mov [rsp + 24], rdi         ; Ast
    mov dil, TOKEN_RETURN
    call expect_token
    test rax, rax
    jz .return
    jmp .panic
 .return:
    call parse_expr
    mov [rsp + 16], rax             ; expression
    mov byte [rsp], AST_RETURN_STATEMENT ; kind
    lea rax, [rsp + 16]             ; data ptr
    mov [rsp + 8], rax              ; data
    mov rdi, [rsp + 24]             ; Ast
    mov rsi, rsp                    ; AstNode
    call vec_push
    mov rdi, [rsp + 24]         ; Ast
    mov rax, [rdi + 8]          ; Ast.nodes.len()
    dec rax
    mov [rsp], rax
    mov dil, TOKEN_SEMI
    call unwrap_token
    mov rax, [rsp]              ; expression
    add rsp, 32
    pop rbp
    ret
 .panic:
    call panic
 ;; rdi: *mut Ast
 ;; define-fn: fn parse_block(ast: *mut Ast) -> u64
 parse_block:
    push rbp
    mov rbp, rsp
    push rdi
    ; start-structs
    ; struct Block {
    ;   statements: *const u64,
    ;   statements_len: usize,
    ; }
    ; end-structs
    sub rsp, 56
    ; statements: Vec<Statement> [0..40]
    ; statement: u64             [40..48]
    mov dil, TOKEN_LBRACE
    call unwrap_token
 .loop:
    mov rdi, [rsp + 16]         ; Ast
    call parse_statement
    test rax, rax
    je .done
    lea rdi, [rsp + 16]                ; vec
    mov [rsp + 8], rax                 ; statement
    lea rsi, [rsp + 8]
    call vec_push
    jmp .loop
 .done:
    mov rdi, [rsp + 56]         ; Ast
    lea rsi, [rsp + 16]         ; statements vec-slice
    call vec_push
    mov rdi, [rsp + 56]         ; Ast
    mov rax, [rdi + 8]         ; Ast.nodes.len()
    dec rax
    add rsp, 56
    pop rdi
    pop rbp
    ret
 ;; rdi: *mut Ast
 ;; define-fn: fn parse_type(ast: *mut Ast) -> Type
 parse_type:
    push rbp
    mov rbp, rsp
    push rdi
    sub rsp, 24
    mov rdi, rsp
    call find_lexeme            ; TODO: use peek here to allow failing gracefully
    mov rax, [rsp]              ; token kind
    cmp al, TOKEN_I32
    je .i32_type
    cmp al, TOKEN_U32
    je .u32_type
    cmp al, TOKEN_VOID
    je .void_type
    cmp al, TOKEN_BOOL
    je .bool_type
    jmp .panic
 .i32_type:
    mov rax, TYPE_I32
    jmp .epilogue
 .u32_type:
    mov rax, TYPE_U32
    jmp .epilogue
 .void_type:
    mov rax, TYPE_VOID
    jmp .epilogue
 .bool_type:
    mov rax, TYPE_BOOL
    jmp .epilogue
 .epilogue:
    add rsp, 24
    pop rdi
    pop rbp
    ret
 .panic:
    call panic
--- a/lang/src/tokeniser.asm
+++ b/lang/src/tokeniser.asm
@ -19,8 +19,13 @@ extern is_id_start
 extern is_whitespace
 global tokeniser_init
 global tokeniser_init_buf
 global tokeniser_print
 global find_lexeme
 global expect_token
 global unwrap_token
 global peek_expect_token
 global peek_lexeme
 ;; =============================
 ;; Tokeniser functions
@ -50,6 +55,22 @@ section .bss
 statbuf: resb 144
 section .text
 ;; rdi: pointer to buffer
 ;; rsi: length of buffer
 tokeniser_init_buf:
    push rbp
    mov rbp, rsp
    mov dword [rel input_file], 0
    mov qword [rel buffer], rdi
    mov qword [rel buffer_len], rsi
    mov qword [rel cursor], 0
    pop rbp
    ret
 ;; Initialises the tokeniser
 ;; rdx: pointer to filename (null-terminated)
 tokeniser_init:
@ -113,210 +134,7 @@ global TOKENS
 global LEXEME_LENS
 global NUM_LEXEMES
-align 8
+%include "src/tokeniser.inc"
 LEXEMES:
    dq LEX_NOT_A_LEXEME
    dq LEX_LET
    dq LEX_IF
    dq LEX_ELSE
    dq LEX_FN
    dq LEX_RETURN
    dq LEX_LOOP
    dq LEX_BREAK
    dq LEX_CONTINUE
    dq LEX_TRUE
    dq LEX_FALSE
    dq LEX_BOOL
    dq LEX_ARROW
    dq LEX_I32
    dq LEX_U32
    dq LEX_EQUALS
    dq LEX_PLUS
    dq LEX_MINUS
    dq LEX_RPARENS
    dq LEX_LPARENS
    dq LEX_RBRACE
    dq LEX_LBRACE
    dq LEX_COLON
    dq LEX_SEMI
    dq LEX_COMMA
    dq LEX_PIPE
    dq LEX_AMP
    dq LEX_EQEQ
    dq LEX_LBRACKET
    dq LEX_RBRACKET
 align 8
 TOKENS:
    db TOKEN_EOF                   ;; 0
    db TOKEN_LET                   ;; 1
    db TOKEN_IF                    ;; 2
    db TOKEN_ELSE                  ;; 3
    db TOKEN_FN                    ;; 4
    db TOKEN_RETURN                ;; 5
    db TOKEN_LOOP                  ;; 6
    db TOKEN_BREAK                 ;; 7
    db TOKEN_CONTINUE              ;; 8
    db TOKEN_TRUE                  ;; 9
    db TOKEN_FALSE                 ;; 10
    db TOKEN_BOOL                  ;; 11
    db TOKEN_ARROW                 ;; 12
    db TOKEN_I32                   ;; 13
    db TOKEN_U32                   ;; 14
    db TOKEN_EQUALS                ;; 15
    db TOKEN_PLUS                  ;; 16
    db TOKEN_MINUS                 ;; 17
    db TOKEN_RPARENS               ;; 18
    db TOKEN_LPARENS               ;; 19
    db TOKEN_RBRACE                ;; 20
    db TOKEN_LBRACE                ;; 21
    db TOKEN_COLON                 ;; 22
    db TOKEN_SEMI                  ;; 23
    db TOKEN_COMMA                 ;; 24
    db TOKEN_PIPE                  ;; 25
    db TOKEN_AMP                   ;; 26
    db TOKEN_EQEQ                  ;; 27
    db TOKEN_LBRACKET              ;; 28
    db TOKEN_RBRACKET              ;; 29
 align 8
 LEXEME_LENS:
    dq 0
    dq LEX_LET_len
    dq LEX_IF_len
    dq LEX_ELSE_len
    dq LEX_FN_len
    dq LEX_RETURN_len
    dq LEX_LOOP_len
    dq LEX_BREAK_len
    dq LEX_CONTINUE_len
    dq LEX_TRUE_len
    dq LEX_FALSE_len
    dq LEX_BOOL_len
    dq LEX_ARROW_len
    dq LEX_I32_len
    dq LEX_U32_len
    dq LEX_EQUALS_len
    dq LEX_PLUS_len
    dq LEX_MINUS_len
    dq LEX_RPARENS_len
    dq LEX_LPARENS_len
    dq LEX_RBRACE_len
    dq LEX_LBRACE_len
    dq LEX_COLON_len
    dq LEX_SEMI_len
    dq LEX_COMMA_len
    dq LEX_PIPE_len
    dq LEX_AMP_len
    dq LEX_EQEQ_len
    dq LEX_LBRACKET_len
    dq LEX_RBRACKET_len
 align 8
 NUM_LEXEMES: dq 30
    LEX_NOT_A_LEXEME db "<not a lexeme>", 0
    TOKEN_EOF       equ 0
    TOKEN_LET       equ 1
    LEX_LET db "let"
    LEX_LET_len equ $ - LEX_LET
    TOKEN_IF        equ 2
    LEX_IF db "if"
    LEX_IF_len equ $ - LEX_IF
    TOKEN_ELSE      equ 3
    LEX_ELSE db "else"
    LEX_ELSE_len equ $ - LEX_ELSE
    TOKEN_FN        equ 4
    LEX_FN db "fn"
    LEX_FN_len equ $ - LEX_FN
    TOKEN_RETURN    equ 5
    LEX_RETURN db "return"
    LEX_RETURN_len equ $ - LEX_RETURN
    TOKEN_LOOP      equ 6
    LEX_LOOP db "loop"
    LEX_LOOP_len equ $ - LEX_LOOP
    TOKEN_BREAK     equ 7
    LEX_BREAK db "break"
    LEX_BREAK_len equ $ - LEX_BREAK
    TOKEN_CONTINUE  equ 8
    LEX_CONTINUE db "continue"
    LEX_CONTINUE_len equ $ - LEX_CONTINUE
    TOKEN_TRUE      equ 9
    LEX_TRUE db "true"
    LEX_TRUE_len equ $ - LEX_TRUE
    TOKEN_FALSE     equ 10
    LEX_FALSE db "false"
    LEX_FALSE_len equ $ - LEX_FALSE
    TOKEN_BOOL      equ 11
    LEX_BOOL db "bool"
    LEX_BOOL_len equ $ - LEX_BOOL
    TOKEN_ARROW     equ 12
    LEX_ARROW db "->"
    LEX_ARROW_len equ $ - LEX_ARROW
    TOKEN_I32       equ 13
    LEX_I32 db "i32"
    LEX_I32_len equ $ - LEX_I32
    TOKEN_U32       equ 14
    LEX_U32 db "u32"
    LEX_U32_len equ $ - LEX_U32
    TOKEN_EQUALS    equ 15
    LEX_EQUALS db "="
    LEX_EQUALS_len equ $ - LEX_EQUALS
    TOKEN_PLUS      equ 16
    LEX_PLUS db "+"
    LEX_PLUS_len equ $ - LEX_PLUS
    TOKEN_MINUS     equ 17
    LEX_MINUS db "-"
    LEX_MINUS_len equ $ - LEX_MINUS
    TOKEN_RPARENS   equ 18
    LEX_RPARENS db ")"
    LEX_RPARENS_len equ $ - LEX_RPARENS
    TOKEN_LPARENS   equ 19
    LEX_LPARENS db "("
    LEX_LPARENS_len equ $ - LEX_LPARENS
    TOKEN_RBRACE    equ 20
    LEX_RBRACE db "}"
    LEX_RBRACE_len equ $ - LEX_RBRACE
    TOKEN_LBRACE    equ 21
    LEX_LBRACE db "{"
    LEX_LBRACE_len equ $ - LEX_LBRACE
    TOKEN_COLON     equ 22
    LEX_COLON db ":"
    LEX_COLON_len equ $ - LEX_COLON
    TOKEN_SEMI      equ 23
    LEX_SEMI db ";"
    LEX_SEMI_len equ $ - LEX_SEMI
    TOKEN_COMMA     equ 24
    LEX_COMMA db ","
    LEX_COMMA_len equ $ - LEX_COMMA
    TOKEN_PIPE      equ 25
    LEX_PIPE db "|"
    LEX_PIPE_len equ $ - LEX_PIPE
    TOKEN_AMP       equ 26
    LEX_AMP db "&"
    LEX_AMP_len equ $ - LEX_AMP
    TOKEN_EQEQ     equ 27
    LEX_EQEQ db "=="
    LEX_EQEQ_len equ $ - LEX_EQEQ
    TOKEN_LBRACKET  equ 28
    LEX_LBRACKET db "["
    LEX_LBRACKET_len equ $ - LEX_LBRACKET
    TOKEN_RBRACKET  equ 29
    LEX_RBRACKET db "]"
    LEX_RBRACKET_len equ $ - LEX_RBRACKET
    TOKEN_IDENT     equ 30
    LEX_IDENT db "<identifier>"
    LEX_IDENT_len equ $ - LEX_IDENT
    TOKEN_NUMBER    equ 31
    LEX_NUMBER db "<number>"
    LEX_NUMBER_len equ $ - LEX_NUMBER
    TOKEN_STRING    equ 32
    LEX_STRING db "<string>"
    LEX_STRING_len equ $ - LEX_STRING
    TOKEN_COMMENT   equ 33
    LEX_COMMENT db "<comment>"
    LEX_COMMENT_len equ $ - LEX_COMMENT
 section .text
 ;; rdi: length of previously matched lexeme
@ -702,3 +520,66 @@ find_lexeme:
    mov qword [rdi], TOKEN_COMMENT
    mov [rdi + 16], rax
    jmp .epilogue
 ;; dil: expected token
 expect_token:
    push rbp
    mov rbp, rsp
    sub rsp, 0x30
    mov [rsp], dil
    mov rax, [rel cursor]       ; current cursor
    mov [rsp + 8], rax
    lea rdi, [rsp + 0x10]
    call find_lexeme
    mov rax, [rsp + 0x10]    ; found token
    mov dil, [rsp]          ; expected token
    cmp al, dil
    je .matched
    mov rdi, [rsp + 8]     ; restore cursor
    mov [rel cursor], rdi    ; restore cursor
    xor rax, rax
    xor rdx, rdx
    jmp .epilogue
 .matched:
    mov rax, [rsp + 0x18]   ; lexeme pointer
    mov rdx, [rsp + 0x20]   ; lexeme length
 .epilogue:
    add rsp, 0x30
    pop rbp
    ret
 ;; dil: expected token
 unwrap_token:
    push rbp
    mov rbp, rsp
    call expect_token
    test rax, rax
    jz .panic
    pop rbp
    ret
 .panic:
    call panic
 ;; dil: expected token
 peek_expect_token:
    push rbp
    mov rbp, rsp
    push qword [rel cursor]
    call expect_token
    pop rdi
    mov [rel cursor], rdi
    pop rbp
    ret
 ;; rdi: out-struct pointer
 peek_lexeme:
    push rbp
    mov rbp, rsp
    push rdi
    push qword [rel cursor]           ; save cursor
    call find_lexeme
    pop rdi
    mov [rel cursor], rdi       ; restore cursor
    pop rax
    pop rbp
    ret
--- a/lang/src/tokeniser.inc
+++ b/lang/src/tokeniser.inc
@ -0,0 +1,213 @@
 section .rdata
 align 8
 LEXEMES:
    dq LEX_NOT_A_LEXEME
    dq LEX_LET
    dq LEX_IF
    dq LEX_ELSE
    dq LEX_FN
    dq LEX_RETURN
    dq LEX_LOOP
    dq LEX_BREAK
    dq LEX_CONTINUE
    dq LEX_TRUE
    dq LEX_FALSE
    dq LEX_BOOL
    dq LEX_ARROW
    dq LEX_I32
    dq LEX_U32
    dq LEX_EQUALS
    dq LEX_PLUS
    dq LEX_MINUS
    dq LEX_RPARENS
    dq LEX_LPARENS
    dq LEX_RBRACE
    dq LEX_LBRACE
    dq LEX_COLON
    dq LEX_SEMI
    dq LEX_COMMA
    dq LEX_PIPE
    dq LEX_AMP
    dq LEX_EQEQ
    dq LEX_LBRACKET
    dq LEX_RBRACKET
    dq LEX_VOID
 align 8
 TOKENS:
    db TOKEN_EOF                   ;; 0
    db TOKEN_LET                   ;; 1
    db TOKEN_IF                    ;; 2
    db TOKEN_ELSE                  ;; 3
    db TOKEN_FN                    ;; 4
    db TOKEN_RETURN                ;; 5
    db TOKEN_LOOP                  ;; 6
    db TOKEN_BREAK                 ;; 7
    db TOKEN_CONTINUE              ;; 8
    db TOKEN_TRUE                  ;; 9
    db TOKEN_FALSE                 ;; 10
    db TOKEN_BOOL                  ;; 11
    db TOKEN_ARROW                 ;; 12
    db TOKEN_I32                   ;; 13
    db TOKEN_U32                   ;; 14
    db TOKEN_EQUALS                ;; 15
    db TOKEN_PLUS                  ;; 16
    db TOKEN_MINUS                 ;; 17
    db TOKEN_RPARENS               ;; 18
    db TOKEN_LPARENS               ;; 19
    db TOKEN_RBRACE                ;; 20
    db TOKEN_LBRACE                ;; 21
    db TOKEN_COLON                 ;; 22
    db TOKEN_SEMI                  ;; 23
    db TOKEN_COMMA                 ;; 24
    db TOKEN_PIPE                  ;; 25
    db TOKEN_AMP                   ;; 26
    db TOKEN_EQEQ                  ;; 27
    db TOKEN_LBRACKET              ;; 28
    db TOKEN_RBRACKET              ;; 29
    db TOKEN_VOID                  ;; 30
 align 8
 LEXEME_LENS:
    dq 0
    dq LEX_LET_len
    dq LEX_IF_len
    dq LEX_ELSE_len
    dq LEX_FN_len
    dq LEX_RETURN_len
    dq LEX_LOOP_len
    dq LEX_BREAK_len
    dq LEX_CONTINUE_len
    dq LEX_TRUE_len
    dq LEX_FALSE_len
    dq LEX_BOOL_len
    dq LEX_ARROW_len
    dq LEX_I32_len
    dq LEX_U32_len
    dq LEX_EQUALS_len
    dq LEX_PLUS_len
    dq LEX_MINUS_len
    dq LEX_RPARENS_len
    dq LEX_LPARENS_len
    dq LEX_RBRACE_len
    dq LEX_LBRACE_len
    dq LEX_COLON_len
    dq LEX_SEMI_len
    dq LEX_COMMA_len
    dq LEX_PIPE_len
    dq LEX_AMP_len
    dq LEX_EQEQ_len
    dq LEX_LBRACKET_len
    dq LEX_RBRACKET_len
    dq LEX_VOID_len
 align 8
 NUM_LEXEMES: dq 30
    LEX_NOT_A_LEXEME db "<not a lexeme>", 0
    LEX_LET db "let"
    LEX_LET_len equ $ - LEX_LET
    LEX_IF db "if"
    LEX_IF_len equ $ - LEX_IF
    LEX_ELSE db "else"
    LEX_ELSE_len equ $ - LEX_ELSE
    LEX_FN db "fn"
    LEX_FN_len equ $ - LEX_FN
    LEX_RETURN db "return"
    LEX_RETURN_len equ $ - LEX_RETURN
    LEX_LOOP db "loop"
    LEX_LOOP_len equ $ - LEX_LOOP
    LEX_BREAK db "break"
    LEX_BREAK_len equ $ - LEX_BREAK
    LEX_CONTINUE db "continue"
    LEX_CONTINUE_len equ $ - LEX_CONTINUE
    LEX_TRUE db "true"
    LEX_TRUE_len equ $ - LEX_TRUE
    LEX_FALSE db "false"
    LEX_FALSE_len equ $ - LEX_FALSE
    LEX_BOOL db "bool"
    LEX_BOOL_len equ $ - LEX_BOOL
    LEX_ARROW db "->"
    LEX_ARROW_len equ $ - LEX_ARROW
    LEX_I32 db "i32"
    LEX_I32_len equ $ - LEX_I32
    LEX_U32 db "u32"
    LEX_U32_len equ $ - LEX_U32
    LEX_EQUALS db "="
    LEX_EQUALS_len equ $ - LEX_EQUALS
    LEX_PLUS db "+"
    LEX_PLUS_len equ $ - LEX_PLUS
    LEX_MINUS db "-"
    LEX_MINUS_len equ $ - LEX_MINUS
    LEX_RPARENS db ")"
    LEX_RPARENS_len equ $ - LEX_RPARENS
    LEX_LPARENS db "("
    LEX_LPARENS_len equ $ - LEX_LPARENS
    LEX_RBRACE db "}"
    LEX_RBRACE_len equ $ - LEX_RBRACE
    LEX_LBRACE db "{"
    LEX_LBRACE_len equ $ - LEX_LBRACE
    LEX_COLON db ":"
    LEX_COLON_len equ $ - LEX_COLON
    LEX_SEMI db ";"
    LEX_SEMI_len equ $ - LEX_SEMI
    LEX_COMMA db ","
    LEX_COMMA_len equ $ - LEX_COMMA
    LEX_PIPE db "|"
    LEX_PIPE_len equ $ - LEX_PIPE
    LEX_AMP db "&"
    LEX_AMP_len equ $ - LEX_AMP
    LEX_EQEQ db "=="
    LEX_EQEQ_len equ $ - LEX_EQEQ
    LEX_LBRACKET db "["
    LEX_LBRACKET_len equ $ - LEX_LBRACKET
    LEX_RBRACKET db "]"
    LEX_RBRACKET_len equ $ - LEX_RBRACKET
    LEX_VOID db "void"
    LEX_VOID_len equ $ - LEX_VOID
    LEX_IDENT db "<identifier>"
    LEX_IDENT_len equ $ - LEX_IDENT
    LEX_NUMBER db "<number>"
    LEX_NUMBER_len equ $ - LEX_NUMBER
    LEX_STRING db "<string>"
    LEX_STRING_len equ $ - LEX_STRING
    LEX_COMMENT db "<comment>"
    LEX_COMMENT_len equ $ - LEX_COMMENT
    ;; start-consts
    TOKEN_EOF       equ 0
    TOKEN_LET       equ 1
    TOKEN_IF        equ 2
    TOKEN_ELSE      equ 3
    TOKEN_FN        equ 4
    TOKEN_RETURN    equ 5
    TOKEN_LOOP      equ 6
    TOKEN_BREAK     equ 7
    TOKEN_CONTINUE  equ 8
    TOKEN_TRUE      equ 9
    TOKEN_FALSE     equ 10
    TOKEN_BOOL      equ 11
    TOKEN_ARROW     equ 12
    TOKEN_I32       equ 13
    TOKEN_U32       equ 14
    TOKEN_EQUALS    equ 15
    TOKEN_PLUS      equ 16
    TOKEN_MINUS     equ 17
    TOKEN_RPARENS   equ 18
    TOKEN_LPARENS   equ 19
    TOKEN_RBRACE    equ 20
    TOKEN_LBRACE    equ 21
    TOKEN_COLON     equ 22
    TOKEN_SEMI      equ 23
    TOKEN_COMMA     equ 24
    TOKEN_PIPE      equ 25
    TOKEN_AMP       equ 26
    TOKEN_EQEQ      equ 27
    TOKEN_LBRACKET  equ 28
    TOKEN_RBRACKET  equ 29
    TOKEN_VOID      equ 30
    TOKEN_IDENT     equ 31
    TOKEN_NUMBER    equ 32
    TOKEN_STRING    equ 33
    TOKEN_COMMENT   equ 34
    ;; end-consts
--- a/lang/tests/asm_to_rust.py
+++ b/lang/tests/asm_to_rust.py
@ -0,0 +1,310 @@
 #!/usr/bin/env python3
 """
 parse_asm_to_rust.py
 Scan one or more assembly source files and extract:
 - commented struct definitions inside `start-structs` / `end-structs` spans
 - constant definitions inside `start-consts` / `end-consts` spans
 - commented function-definition directives of the form `define-fn: fn ...`
 - commented markdown rust fenced code blocks (```rust) and copy their inner code
   into the generated Rust output (fences are removed and comment markers stripped)
 Produce Rust source code containing:
 - an `extern "C"` block with `pub unsafe fn ...;` declarations for each define-fn
 - `pub const NAME: u32 = <value>;` lines for each `equ` constant found in const spans
 - `#[repr(C)] pub struct Name { pub field: Type, ... }` for each struct found in struct spans
 - verbatim Rust code copied from commented ```rust``` blocks (fences removed)
 Notes:
 - Struct and function definitions must appear on commented lines. Any number of leading semicolons
   (e.g. `;`, `;;`, `;;;`) and surrounding spaces are allowed and will be stripped.
 - Constant lines inside const spans may be commented or not; the script strips leading semicolons
   before parsing.
 - Commented rust blocks are expected to use commented fenced code blocks, e.g.:
     ;; ```rust
     ;; extern "C" { ... }
     ;; ```
   The inner lines will be uncommented (leading semicolons removed) and included in output.
 - By default the script writes to stdout. Use `-o` to write combined output to a file, or `-d`
   to write one .rs file per input with the same basename.
 """
 import argparse
 import re
 import sys
 from pathlib import Path
 from typing import List, Tuple, Dict, Any
 LEADING_COMMENT_RE = re.compile(r'^\s*;+\s*')  # lines that start with one or more semicolons
 START_STRUCTS_RE = re.compile(r'^\s*;+\s*start-structs\b', re.IGNORECASE)
 END_STRUCTS_RE = re.compile(r'^\s*;+\s*end-structs\b', re.IGNORECASE)
 START_CONSTS_RE = re.compile(r'^\s*;+\s*start-consts\b', re.IGNORECASE)
 END_CONSTS_RE = re.compile(r'^\s*;+\s*end-consts\b', re.IGNORECASE)
 DEFINE_FN_RE = re.compile(r'^\s*;+\s*define-fn:\s*(.+)$', re.IGNORECASE)
 CONST_EQU_RE = re.compile(r'^\s*([A-Za-z_]\w*)\s+equ\s+(.+)$', re.IGNORECASE)
 STRUCT_START_RE = re.compile(r'^\s*struct\s+([A-Za-z_]\w*)\s*\{')  # after comment markers stripped
 RUST_FENCE_RE = re.compile(r'^\s*```\s*(rust)?\s*$', re.IGNORECASE)  # matches ``` or ```rust (after stripping leading comment)
 def strip_leading_semicolons(line: str) -> str:
    """Remove leading semicolons and surrounding spaces from a commented line."""
    return LEADING_COMMENT_RE.sub('', line).rstrip('\n')
 def extract_structs_from_commented_lines(lines: List[str]) -> List[Tuple[str, List[str]]]:
    """
    Given a list of lines (with comments already stripped of leading ';'), find all 'struct Name { ... }'
    blocks. Return list of (name, field_lines).
    This uses a simple brace-balanced scan so struct bodies can contain nested braces in types.
    """
    structs = []
    i = 0
    n = len(lines)
    while i < n:
        m = STRUCT_START_RE.match(lines[i])
        if m:
            name = m.group(1)
            body_lines = []
            # Count braces: the opening brace on the start line
            brace_level = lines[i].count('{') - lines[i].count('}')
            i += 1
            while i < n and brace_level > 0:
                line = lines[i]
                brace_level += line.count('{') - line.count('}')
                body_lines.append(line)
                i += 1
            # Trim any trailing '}' line from body_lines if present
            if body_lines and body_lines[-1].strip() == '}':
                body_lines = body_lines[:-1]
            structs.append((name, body_lines))
        else:
            i += 1
    return structs
 def format_rust_struct(name: str, field_lines: List[str]) -> str:
    """
    Convert a list of field lines like '  nodes: Vec<AstNode>,' into a Rust struct with pub fields and #[repr(C)].
    Minimal parsing: split each field on the first ':' to find name and type, otherwise preserve line.
    """
    out_lines = []
    out_lines.append('#[repr(C)]')
    out_lines.append('#[derive(Debug)]')
    out_lines.append(f'pub struct {name} {{')
    for raw in field_lines:
        line = raw.strip().rstrip(',')
        if not line:
            continue
        if ':' in line:
            parts = line.split(':', 1)
            fname = parts[0].strip()
            ftype = parts[1].strip()
            out_lines.append(f'    pub {fname}: {ftype},')
        else:
            out_lines.append(f'    pub {line},')
    out_lines.append('}')
    return '\n'.join(out_lines)
 def parse_file(path: Path) -> Dict[str, Any]:
    """
    Parse a single assembly file and return dict with keys: 'functions', 'consts', 'structs', 'rust_blocks'
    - functions: list of signature strings (e.g. "parse_ast(data: *const u8) -> Ast")
    - consts: list of (name, value)
    - structs: list of (name, field_lines)
    - rust_blocks: list of rust code blocks; each block is list[str] of code lines (no fences, uncommented)
    """
    functions: List[str] = []
    consts: List[Tuple[str, str]] = []
    structs: List[Tuple[str, List[str]]] = []
    rust_blocks: List[List[str]] = []
    with path.open('r', encoding='utf-8') as f:
        lines = f.readlines()
    i = 0
    n = len(lines)
    in_structs = False
    in_consts = False
    struct_buffer: List[str] = []
    const_buffer: List[str] = []
    while i < n:
        raw = lines[i]
        # state transitions for start/end spans
        if not in_structs and START_STRUCTS_RE.match(raw):
            in_structs = True
            struct_buffer = []
            i += 1
            continue
        if in_structs and END_STRUCTS_RE.match(raw):
            stripped = [strip_leading_semicolons(l) for l in struct_buffer if l.strip()]
            found = extract_structs_from_commented_lines(stripped)
            structs.extend(found)
            in_structs = False
            struct_buffer = []
            i += 1
            continue
        if not in_consts and START_CONSTS_RE.match(raw):
            in_consts = True
            const_buffer = []
            i += 1
            continue
        if in_consts and END_CONSTS_RE.match(raw):
            for l in const_buffer:
                s = strip_leading_semicolons(l)
                m = CONST_EQU_RE.match(s)
                if m:
                    name = m.group(1)
                    value = m.group(2).strip()
                    consts.append((name, value))
            in_consts = False
            const_buffer = []
            i += 1
            continue
        # If inside special spans, collect lines
        if in_structs:
            if LEADING_COMMENT_RE.match(raw):
                struct_buffer.append(raw)
        elif in_consts:
            const_buffer.append(raw)
        else:
            # Top-level: look for define-fn directives (must be commented lines)
            mfn = DEFINE_FN_RE.match(raw)
            if mfn:
                sig = mfn.group(1).strip()
                if sig.startswith('fn '):
                    sig = sig[len('fn '):].strip()
                functions.append(sig)
            else:
                # Check for commented rust fenced block start
                if LEADING_COMMENT_RE.match(raw):
                    stripped = strip_leading_semicolons(raw)
                    if RUST_FENCE_RE.match(stripped):
                        # start collecting rust block until a closing fence is found
                        block_lines: List[str] = []
                        i += 1
                        while i < n:
                            cur = lines[i]
                            # If it's a commented fence closing, stop
                            if LEADING_COMMENT_RE.match(cur):
                                inner_stripped = strip_leading_semicolons(cur)
                                if RUST_FENCE_RE.match(inner_stripped):
                                    break
                                # otherwise, this is a commented code line; strip leading semicolons and append
                                block_lines.append(strip_leading_semicolons(cur))
                            else:
                                # If it's an uncommented line inside the block, include as-is (trim newline)
                                block_lines.append(cur.rstrip('\n'))
                            i += 1
                        rust_blocks.append(block_lines)
                        # advance past the closing fence line if present
                        # current i points at closing fence or EOF; advance one to continue main loop
                        i += 1
                        continue  # continue outer loop without incrementing i further
        i += 1
    return {
        'functions': functions,
        'consts': consts,
        'structs': structs,
        'rust_blocks': rust_blocks,
    }
 def render_rust(function_sigs: List[str], consts: List[Tuple[str, str]],
                structs: List[Tuple[str, List[str]]], rust_blocks: List[List[str]]) -> str:
    parts: List[str] = []
    parts.append('#![allow(non_camel_case_types, dead_code, non_upper_case_globals, improper_ctypes)]')
    parts.append('// Auto-generated Rust bindings from assembly source\n')
    # Functions: wrap in single extern "C" block if any
    if function_sigs:
        parts.append('unsafe extern "C" {')
        for sig in function_sigs:
            parts.append(f'    pub unsafe fn {sig};')
        parts.append('}')
        parts.append('')  # blank line
    # Consts
    for name, value in consts:
        parts.append(f'pub const {name}: u32 = {value};')
    if consts:
        parts.append('')
    # Structs
    for name, field_lines in structs:
        parts.append(format_rust_struct(name, field_lines))
        parts.append('')  # blank line between structs
    # Rust blocks copied verbatim (these are already uncommented and fence-less)
    for block in rust_blocks:
        # Ensure there's a blank line before inserted blocks for separation
        if parts and parts[-1] != '':
            parts.append('')
        # append each line exactly as collected
        parts.extend(line.rstrip('\n') for line in block)
        parts.append('')  # trailing blank line after block
    # Trim trailing blank lines
    while parts and parts[-1] == '':
        parts.pop()
    return '\n'.join(parts) + '\n' if parts else ''
 def main(argv=None):
    parser = argparse.ArgumentParser(description='Parse assembly files and emit Rust externs, consts, struct defs, and commented ```rust``` blocks.')
    parser.add_argument('inputs', metavar='INPUT', type=Path, nargs='+', help='assembly source files to parse')
    group = parser.add_mutually_exclusive_group()
    group.add_argument('-o', '--out', type=Path, help='write combined Rust to this file (default stdout)')
    group.add_argument('-d', '--out-dir', type=Path, help='write one .rs file per input into this directory')
    args = parser.parse_args(argv)
    combined_functions: List[str] = []
    combined_consts: List[Tuple[str, str]] = []
    combined_structs: List[Tuple[str, List[str]]] = []
    combined_rust_blocks: List[List[str]] = []
    per_file_output: Dict[Path, str] = {}
    for inp in args.inputs:
        if not inp.exists():
            print(f'warning: input file {inp} does not exist, skipping', file=sys.stderr)
            continue
        parsed = parse_file(inp)
        rust_src = render_rust(parsed['functions'], parsed['consts'], parsed['structs'], parsed['rust_blocks'])
        per_file_output[inp] = rust_src
        combined_functions.extend(parsed['functions'])
        combined_consts.extend(parsed['consts'])
        combined_structs.extend(parsed['structs'])
        combined_rust_blocks.extend(parsed['rust_blocks'])
    if args.out_dir:
        outdir = args.out_dir
        outdir.mkdir(parents=True, exist_ok=True)
        for inp, src in per_file_output.items():
            outpath = outdir / (inp.stem + '.rs')
            with outpath.open('w', encoding='utf-8') as f:
                f.write(src)
            print(f'Wrote {outpath}', file=sys.stderr)
        return 0
    combined_src = render_rust(combined_functions, combined_consts, combined_structs, combined_rust_blocks)
    if args.out:
        with args.out.open('w', encoding='utf-8') as f:
            f.write(combined_src)
        print(f'Wrote {args.out}', file=sys.stderr)
    else:
        sys.stdout.write(combined_src)
    return 0
 if __name__ == '__main__':
    raise SystemExit(main())
--- a/lang/tests/ast.rs
+++ b/lang/tests/ast.rs
@ -0,0 +1,66 @@
 #[path = "shared/shared.rs"]
 mod util;
 unsafe extern "C" {
    unsafe fn bump_init();
    unsafe fn tokeniser_init_buf(bytes: *const u8, len: usize) -> ();
 }
 use util::defs::{parse_expr, Ast, AstNode};
 fn main() {
    unsafe {
        bump_init();
    }
    println!("Bump allocator initialized.");
    let src = b"3 + 4";
    unsafe {
        tokeniser_init_buf(src.as_ptr(), src.len());
        let mut ast = Ast {
            nodes: util::vec::Vec::new(),
        };
        let expr_id = parse_expr(&mut ast);
        println!("Parsed expression with ID: {}", expr_id);
        println!("{:#}", &ast);
    }
 }
 impl std::fmt::Display for AstNode {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        use util::defs::{BinaryExpr, AST_BINARY_OP, AST_NUMBER};
        match self.kind as u32 {
            AST_NUMBER => {
                write!(f, "Number({})", self.data as usize)
            }
            AST_BINARY_OP => {
                let BinaryExpr {
                    left,
                    operator,
                    right,
                } = unsafe { self.data.cast::<util::defs::BinaryExpr>().read() };
                write!(
                    f,
                    "BinaryOp(op: {}, left: {}, right: {})",
                    operator, left, right
                )
            }
            _ => write!(f, "UnknownNode"),
        }
    }
 }
 impl core::fmt::Display for Ast {
    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
        writeln!(f, "[")?;
        for (i, item) in self.nodes.as_slice().iter().enumerate() {
            if i > 0 {
                writeln!(f, ", ")?;
            }
            write!(f, "\t{i}: {}", item)?;
        }
        writeln!(f, "\n]")
    }
 }
--- a/lang/tests/bump.rs
+++ b/lang/tests/bump.rs
@ -1,9 +1,7 @@
 #![feature(allocator_api, box_as_ptr)]
-#[unsafe(no_mangle)]
+#[path = "shared/shared.rs"]
-extern "C" fn panic() -> ! {
+mod util;
    panic!("Called panic from external code.");
 }
 unsafe extern "C" {
    unsafe fn bump_init();
--- a/lang/tests/int_to_str.rs
+++ b/lang/tests/int_to_str.rs
@ -1,22 +1,7 @@
-#[unsafe(no_mangle)]
+#[path = "shared/shared.rs"]
-extern "C" fn panic() -> ! {
+mod util;
    panic!("Called panic from external code.");
 }
-#[repr(C)]
+use util::FFISlice;
 struct FFISlice {
    ptr: *const u8,
    len: usize,
 }
 impl FFISlice {
    fn as_slice(&self) -> &[u8] {
        unsafe { core::slice::from_raw_parts(self.ptr, self.len) }
    }
    fn as_str(&self) -> &str {
        unsafe { core::str::from_utf8_unchecked(self.as_slice()) }
    }
 }
 unsafe extern "C" {
    unsafe fn int_to_str2(value: isize, buffer: *mut u8, buffer_len: usize, radix: u8) -> FFISlice;
--- a/lang/tests/shared/defs.rs
+++ b/lang/tests/shared/defs.rs
@ -0,0 +1,104 @@
 #![allow(non_camel_case_types, dead_code, non_upper_case_globals, improper_ctypes)]
 // Auto-generated Rust bindings from assembly source
 unsafe extern "C" {
    pub unsafe fn parse_func(ast: *mut Ast) -> u64;
    pub unsafe fn parse_args(ast: *mut Ast) -> (*const Argument, usize);
    pub unsafe fn parse_primary_expr(ast: *mut Ast) -> u64;
    pub unsafe fn parse_binary_expr(ast: *mut Ast, precedence: u8) -> u64;
    pub unsafe fn parse_expr(ast: *mut Ast) -> u64;
    pub unsafe fn parse_statement(ast: *mut Ast) -> u64;
    pub unsafe fn parse_block(ast: *mut Ast) -> u64;
    pub unsafe fn parse_type(ast: *mut Ast) -> Type;
 }
 pub const AST_FUNCTION: u32 = 1;
 pub const AST_BLOCK: u32 = 2;
 pub const AST_VARIABLE: u32 = 3;
 pub const AST_NUMBER: u32 = 4;
 pub const AST_BINARY_OP: u32 = 5;
 pub const AST_RETURN_STATEMENT: u32 = 6;
 pub const TYPE_VOID: u32 = 1;
 pub const TYPE_BOOL: u32 = 2;
 pub const TYPE_I32: u32 = 3;
 pub const TYPE_U32: u32 = 4;
 pub const TYPE_STR: u32 = 5;
 pub const TOKEN_EOF: u32 = 0;
 pub const TOKEN_LET: u32 = 1;
 pub const TOKEN_IF: u32 = 2;
 pub const TOKEN_ELSE: u32 = 3;
 pub const TOKEN_FN: u32 = 4;
 pub const TOKEN_RETURN: u32 = 5;
 pub const TOKEN_LOOP: u32 = 6;
 pub const TOKEN_BREAK: u32 = 7;
 pub const TOKEN_CONTINUE: u32 = 8;
 pub const TOKEN_TRUE: u32 = 9;
 pub const TOKEN_FALSE: u32 = 10;
 pub const TOKEN_BOOL: u32 = 11;
 pub const TOKEN_ARROW: u32 = 12;
 pub const TOKEN_I32: u32 = 13;
 pub const TOKEN_U32: u32 = 14;
 pub const TOKEN_EQUALS: u32 = 15;
 pub const TOKEN_PLUS: u32 = 16;
 pub const TOKEN_MINUS: u32 = 17;
 pub const TOKEN_RPARENS: u32 = 18;
 pub const TOKEN_LPARENS: u32 = 19;
 pub const TOKEN_RBRACE: u32 = 20;
 pub const TOKEN_LBRACE: u32 = 21;
 pub const TOKEN_COLON: u32 = 22;
 pub const TOKEN_SEMI: u32 = 23;
 pub const TOKEN_COMMA: u32 = 24;
 pub const TOKEN_PIPE: u32 = 25;
 pub const TOKEN_AMP: u32 = 26;
 pub const TOKEN_EQEQ: u32 = 27;
 pub const TOKEN_LBRACKET: u32 = 28;
 pub const TOKEN_RBRACKET: u32 = 29;
 pub const TOKEN_VOID: u32 = 30;
 pub const TOKEN_IDENT: u32 = 31;
 pub const TOKEN_NUMBER: u32 = 32;
 pub const TOKEN_STRING: u32 = 33;
 pub const TOKEN_COMMENT: u32 = 34;
 #[repr(C)]
 #[derive(Debug)]
 pub struct Ast {
    pub nodes: Vec<AstNode>,
 }
 #[repr(C)]
 #[derive(Debug)]
 pub struct AstNode {
    pub kind: u8,
    pub data: *const (),
 }
 #[repr(C)]
 #[derive(Debug)]
 pub struct Argument {
    pub name: *const u8,
    pub name_len: usize,
    pub arg_type: Type,
 }
 #[repr(C)]
 #[derive(Debug)]
 pub struct Type {
    pub kind: u8,
 }
 #[repr(C)]
 #[derive(Debug)]
 pub struct BinaryExpr {
    pub left: u64,
    pub operator: u8,
    pub right: u64,
 }
 #[repr(C)]
 #[derive(Debug)]
 pub struct Block {
    pub statements: *const u64,
    pub statements_len: usize,
 }
 use super::vec::Vec;
--- a/lang/tests/shared/shared.rs
+++ b/lang/tests/shared/shared.rs
@ -0,0 +1,355 @@
 #![allow(dead_code)]
 #[path = "defs.rs"]
 pub mod defs;
 #[inline(never)]
 fn __do_panic() -> ! {
    panic!("Called panic from external code.");
 }
 #[unsafe(no_mangle)]
 extern "C" fn panic() -> ! {
    __do_panic()
 }
 #[repr(C)]
 #[derive(Debug, PartialEq, Eq)]
 pub struct FFISlice {
    pub ptr: *const u8,
    pub len: usize,
 }
 #[repr(transparent)]
 #[derive(Debug, PartialEq, Eq)]
 pub struct MaybeFFISlice {
    inner: FFISlice,
 }
 impl MaybeFFISlice {
    pub fn is_none(&self) -> bool {
        self.inner.ptr.is_null()
    }
    pub fn into_option(self) -> Option<FFISlice> {
        if self.is_none() {
            None
        } else {
            Some(self.inner)
        }
    }
 }
 impl FFISlice {
    pub unsafe fn as_slice<T: Sized>(&self) -> &[T] {
        unsafe { core::slice::from_raw_parts(self.ptr.cast(), self.len) }
    }
    pub unsafe fn as_bytes(&self) -> &[u8] {
        unsafe { core::slice::from_raw_parts(self.ptr, self.len) }
    }
    pub unsafe fn as_str(&self) -> &str {
        unsafe { core::str::from_utf8_unchecked(self.as_bytes()) }
    }
 }
 #[repr(C)]
 #[derive(Debug)]
 pub struct BlobVec {
    pub data: *mut u8,
    pub len: usize,
    pub cap: usize,
    pub elem_size: usize,
    pub drop: Option<extern "C" fn(*mut u8)>,
 }
 impl Default for BlobVec {
    fn default() -> Self {
        Self {
            data: core::ptr::null_mut(),
            len: 0,
            cap: 0,
            elem_size: 0,
            drop: None,
        }
    }
 }
 unsafe impl Send for BlobVec {}
 unsafe impl Sync for BlobVec {}
 pub mod vec {
    #![allow(dead_code)]
    use super::ffi::*;
    use super::*;
    #[repr(transparent)]
    #[derive(Debug)]
    pub struct Vec<T> {
        pub vec: BlobVec,
        _marker: core::marker::PhantomData<T>,
    }
    impl<T> Vec<T> {
        pub fn new() -> Self {
            Self::new_with(32)
        }
        pub fn new_with(capacity: usize) -> Self {
            let mut vec = BlobVec {
                data: core::ptr::null_mut(),
                len: 0,
                cap: 0,
                elem_size: 0,
                drop: None,
            };
            extern "C" fn drop_fn<T>(ptr: *mut u8) {
                unsafe {
                    core::ptr::drop_in_place::<T>(ptr as *mut T);
                }
            }
            unsafe {
                vec_init_with(
                    &mut vec,
                    core::mem::size_of::<T>(),
                    Some(drop_fn::<T>),
                    capacity,
                );
            }
            Self {
                vec,
                _marker: core::marker::PhantomData,
            }
        }
        pub fn as_slice(&self) -> &[T] {
            assert_eq!(self.vec.elem_size, core::mem::size_of::<T>());
            unsafe { core::slice::from_raw_parts(self.vec.data as *const T, self.vec.len) }
        }
        pub fn as_slice_mut(&mut self) -> &mut [T] {
            assert_eq!(self.vec.elem_size, core::mem::size_of::<T>());
            unsafe { core::slice::from_raw_parts_mut(self.vec.data as *mut T, self.vec.len) }
        }
        pub fn push(&mut self, value: T) {
            let value = core::mem::ManuallyDrop::new(value);
            unsafe {
                vec_push(&mut self.vec, &raw const value as *const T as *const u8);
            }
        }
        pub fn insert(&mut self, value: T, index: usize) {
            if index > self.vec.len {
                return;
            }
            let value = core::mem::ManuallyDrop::new(value);
            unsafe {
                vec_insert(
                    &mut self.vec,
                    index,
                    &raw const value as *const T as *const u8,
                );
            }
        }
        pub fn pop(&mut self) -> Option<T> {
            if self.vec.len == 0 {
                return None;
            }
            unsafe {
                let ptr = vec_get(&mut self.vec, self.vec.len - 1) as *mut T;
                let value = ptr.read();
                vec_pop(&mut self.vec);
                Some(value)
            }
        }
        pub fn get(&self, index: usize) -> Option<&T> {
            if index >= self.vec.len {
                return None;
            }
            unsafe {
                let ptr = vec_get(&raw const self.vec as *mut _, index) as *mut T;
                Some(&*ptr)
            }
        }
        pub fn get_mut(&mut self, index: usize) -> Option<&mut T> {
            if index >= self.vec.len {
                return None;
            }
            unsafe {
                let ptr = vec_get(&raw mut self.vec, index) as *mut T;
                Some(&mut *ptr)
            }
        }
        pub fn remove(&mut self, index: usize) {
            if index >= self.vec.len {
                return;
            }
            unsafe {
                vec_remove(&mut self.vec, index);
            }
        }
        pub fn len(&self) -> usize {
            self.vec.len
        }
        pub fn position<F>(&self, elem: &T, mut cmp: F) -> Option<usize>
        where
            F: FnMut(&T, &T) -> bool,
        {
            extern "C" fn cmp_trampoline<T, F: FnMut(&T, &T) -> bool>(
                f: *const (),
                a: *const u8,
                b: *const u8,
            ) -> bool {
                let f = unsafe { &mut *(f as *mut F) };
                let a = unsafe { &*(a as *const T) };
                let b = unsafe { &*(b as *const T) };
                f(a, b)
            }
            unsafe {
                let index = vec_find(
                    &raw const self.vec as *mut _,
                    elem as *const T as *const u8,
                    cmp_trampoline::<T, F>,
                    &raw mut cmp as *mut F as *mut (),
                );
                if index == usize::MAX {
                    None
                } else {
                    Some(index)
                }
            }
        }
        pub fn binary_search_by<F>(&self, elem: &T, mut cmp: F) -> Result<usize, usize>
        where
            F: FnMut(&T, &T) -> i32,
        {
            extern "C" fn cmp_trampoline<T, F: FnMut(&T, &T) -> i32>(
                f: *const (),
                a: *const u8,
                b: *const u8,
            ) -> i32 {
                let f = unsafe { &mut *(f as *mut F) };
                let a = unsafe { &*(a as *const T) };
                let b = unsafe { &*(b as *const T) };
                f(a, b)
            }
            unsafe {
                let (index, vacant) = vec_binary_search_by(
                    &raw const self.vec as *mut _,
                    elem as *const T as *const u8,
                    cmp_trampoline::<T, F>,
                    &raw mut cmp as *mut F as *mut (),
                );
                if vacant {
                    Err(index)
                } else {
                    Ok(index)
                }
            }
        }
        pub fn insert_sorted<F>(&self, elem: T, mut cmp: F) -> Result<usize, usize>
        where
            F: FnMut(&T, &T) -> i32,
        {
            extern "C" fn cmp_trampoline<T, F: FnMut(&T, &T) -> i32>(
                f: *const (),
                a: *const u8,
                b: *const u8,
            ) -> i32 {
                let f = unsafe { &mut *(f as *mut F) };
                let a = unsafe { &*(a as *const T) };
                let b = unsafe { &*(b as *const T) };
                f(a, b)
            }
            let mut elem = core::mem::ManuallyDrop::new(elem);
            unsafe {
                let (index, _inserted) = vec_insert_sorted(
                    &raw const self.vec as *mut _,
                    &raw mut elem as *const u8,
                    cmp_trampoline::<T, F>,
                    &raw mut cmp as *mut F as *mut (),
                );
                Ok(index)
            }
        }
    }
 }
 pub mod ffi {
    #![allow(improper_ctypes)]
    use super::*;
    #[allow(dead_code)]
    unsafe extern "C" {
        pub unsafe fn vec_init(
            vec: *mut BlobVec,
            elem_size: usize,
            drop: Option<extern "C" fn(*mut u8)>,
        );
        pub unsafe fn vec_init_with(
            vec: *mut BlobVec,
            elem_size: usize,
            drop: Option<extern "C" fn(*mut u8)>,
            cap: usize,
        );
        pub unsafe fn vec_push(vec: *mut BlobVec, elem: *const u8);
        pub unsafe fn vec_insert(vec: *mut BlobVec, index: usize, elem: *const u8);
        pub unsafe fn vec_pop(vec: *mut BlobVec);
        pub unsafe fn vec_drop_last(vec: *mut BlobVec);
        pub unsafe fn vec_get(vec: *mut BlobVec, index: usize) -> *mut u8;
        pub unsafe fn vec_remove(vec: *mut BlobVec, index: usize);
        pub unsafe fn vec_drop(vec: *mut BlobVec);
        pub unsafe fn vec_find(
            vec: *mut BlobVec,
            elem: *const u8,
            cmp: extern "C" fn(*const (), *const u8, *const u8) -> bool,
            cmp_data: *mut (),
        ) -> usize;
        pub unsafe fn vec_binary_search_by(
            vec: *mut BlobVec,
            elem: *const u8,
            cmp: extern "C" fn(*const (), *const u8, *const u8) -> i32,
            cmp_data: *mut (),
        ) -> (usize, bool);
        pub unsafe fn vec_insert_sorted(
            vec: *mut BlobVec,
            elem: *const u8,
            cmp: extern "C" fn(*const (), *const u8, *const u8) -> i32,
            cmp_data: *mut (),
        ) -> (usize, bool);
    }
 }
 pub struct DisplaySlice<'a, T>(pub &'a [T]);
 impl<'a, T: core::fmt::Display> core::fmt::Display for DisplaySlice<'a, T> {
    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
        write!(f, "[")?;
        for (i, item) in self.0.iter().enumerate() {
            if i > 0 {
                write!(f, ", ")?;
            }
            write!(f, "{}", item)?;
        }
        write!(f, "]")
    }
 }
--- a/lang/tests/tokens.rs
+++ b/lang/tests/tokens.rs
@ -1,7 +1,7 @@
-#[unsafe(no_mangle)]
+#[path = "shared/shared.rs"]
-extern "C" fn panic() -> ! {
+mod util;
-    panic!("Called panic from external code.");
+
-}
+use util::*;
 #[derive(Debug)]
 struct Lexeme(u8, &'static str);
@ -18,12 +18,6 @@ impl PartialEq for Lexeme {
 impl Eq for Lexeme {}
 impl Lexeme {
    fn lex(&self) -> &'static str {
        self.1
    }
 }
 trait AsLexeme {
    fn as_lexeme(self) -> Option<Lexeme>;
 }
@ -49,12 +43,15 @@ impl AsLexeme for LexemeRaw {
 #[allow(dead_code)]
 unsafe extern "C" {
    unsafe fn tokeniser_init(path: *const i8) -> ();
    unsafe fn tokeniser_init_buf(bytes: *const u8, len: usize) -> ();
    unsafe fn tokeniser_print() -> ();
    unsafe fn is_ident(len: usize) -> bool;
    unsafe fn is_number(len: usize) -> bool;
    unsafe fn skip_whitespace() -> ();
    unsafe fn find_lexeme() -> LexemeRaw;
    unsafe fn expect_token(token: u8) -> MaybeFFISlice;
    unsafe fn unwrap_token(token: u8) -> FFISlice;
    static mut LEXEMES: *const u8;
    static mut LEXEME_LENS: usize;
@ -137,16 +134,16 @@ fn main() {
        assert_eq!(
            &collect_tokens()[..],
            &[
-                Lexeme(30, "this-is-an-ident"),
+                Lexeme(31, "this-is-an-ident"),
-                Lexeme(30, "another_ident123"),
+                Lexeme(31, "another_ident123"),
-                Lexeme(30, "_underscore_test"),
+                Lexeme(31, "_underscore_test"),
-                Lexeme(30, "mixedCASEIdent"),
+                Lexeme(31, "mixedCASEIdent"),
-                Lexeme(30, "number12345"),
+                Lexeme(31, "number12345"),
-                Lexeme(30, "____"),
+                Lexeme(31, "____"),
-                Lexeme(30, "_"),
+                Lexeme(31, "_"),
                Lexeme(17, ""),
-                Lexeme(30, "leading-minus"),
+                Lexeme(31, "leading-minus"),
-                Lexeme(30, "trailing-minus-"),
+                Lexeme(31, "trailing-minus-"),
            ]
        );
@ -158,7 +155,7 @@ fn main() {
            &collect_tokens()[..],
            &[
                Lexeme(4, ""),
-                Lexeme(30, "my-function"),
+                Lexeme(31, "my-function"),
                Lexeme(19, ""),
                Lexeme(18, ""),
                Lexeme(12, ""),
@ -171,6 +168,14 @@ fn main() {
            ]
        );
        eprint!("Initializing tokeniser.. ");
        tokeniser_init(c"tests/tokens/function.l".as_ptr());
        eprintln!("ok.");
        assert_eq!(expect_token(2).into_option(), None);
        assert_eq!(expect_token(4).into_option().unwrap().as_str(), "fn");
        assert_eq!(unwrap_token(31).as_str(), "my-function");
        eprint!("Initializing tokeniser.. ");
        tokeniser_init(c"tests/tokens/comment.l".as_ptr());
        eprintln!("ok.");
@ -178,15 +183,15 @@ fn main() {
        assert_eq!(
            &collect_tokens()[..],
            &[
-                Lexeme(33, ""),
+                Lexeme(34, ""),
                Lexeme(4, ""),
-                Lexeme(30, "my-function"),
+                Lexeme(31, "my-function"),
                Lexeme(19, ""),
                Lexeme(18, ""),
                Lexeme(12, ""),
                Lexeme(11, ""),
                Lexeme(21, ""),
-                Lexeme(33, ""),
+                Lexeme(34, ""),
                Lexeme(5, ""),
                Lexeme(10, ""),
                Lexeme(23, ""),
@ -201,11 +206,11 @@ fn main() {
        assert_eq!(
            &collect_tokens()[..],
            &[
-                Lexeme(31, "1234"),
+                Lexeme(32, "1234"),
-                Lexeme(31, "123_345_"),
+                Lexeme(32, "123_345_"),
-                Lexeme(31, "1234____56"),
+                Lexeme(32, "1234____56"),
-                Lexeme(31, "1"),
+                Lexeme(32, "1"),
-                Lexeme(31, "0"),
+                Lexeme(32, "0"),
            ]
        );
@ -216,14 +221,24 @@ fn main() {
        assert_eq!(
            &collect_tokens()[..],
            &[
-                Lexeme(32, "\"this is a string\""),
+                Lexeme(33, "\"this is a string\""),
-                Lexeme(32, "\"another\nstring\nspanning multiple\n   lines\""),
+                Lexeme(33, "\"another\nstring\nspanning multiple\n   lines\""),
-                Lexeme(32, "\"string with a \\\"quoted\\\" word\""),
+                Lexeme(33, "\"string with a \\\"quoted\\\" word\""),
-                Lexeme(32, "\"a\""),
+                Lexeme(33, "\"a\""),
-                Lexeme(32, "\"\"")
+                Lexeme(33, "\"\"")
            ],
        );
        eprint!("Initializing tokeniser.. ");
        let src = b"3 + 4";
        tokeniser_init_buf(src.as_ptr(), src.len());
        eprintln!("ok.");
        assert_eq!(
            &collect_tokens()[..],
            &[Lexeme(32, "3"), Lexeme(16, "+"), Lexeme(32, "4")],
        );
        eprintln!("Finished tokenising.");
    }
 }
--- a/lang/tests/vec.rs
+++ b/lang/tests/vec.rs
@ -1,263 +1,7 @@
-#[repr(C)]
+#[path = "shared/shared.rs"]
-pub struct BlobVec {
+mod util;
    pub data: *mut u8,
    pub len: usize,
    pub cap: usize,
    pub elem_size: usize,
    pub drop: Option<extern "C" fn(*mut u8)>,
 }
-struct VecT<T> {
+use util::{ffi::*, vec::Vec, BlobVec};
    vec: BlobVec,
    _marker: core::marker::PhantomData<T>,
 }
 impl<T> VecT<T> {
    fn new() -> Self {
        Self::new_with(32)
    }
    fn new_with(capacity: usize) -> Self {
        let mut vec = BlobVec {
            data: core::ptr::null_mut(),
            len: 0,
            cap: 0,
            elem_size: 0,
            drop: None,
        };
        extern "C" fn drop_fn<T>(ptr: *mut u8) {
            unsafe {
                core::ptr::drop_in_place::<T>(ptr as *mut T);
            }
        }
        unsafe {
            vec_init_with(
                &mut vec,
                core::mem::size_of::<T>(),
                Some(drop_fn::<T>),
                capacity,
            );
        }
        Self {
            vec,
            _marker: core::marker::PhantomData,
        }
    }
    fn as_slice(&self) -> &[T] {
        assert_eq!(self.vec.elem_size, core::mem::size_of::<T>());
        unsafe { core::slice::from_raw_parts(self.vec.data as *const T, self.vec.len) }
    }
    fn as_slice_mut(&mut self) -> &mut [T] {
        assert_eq!(self.vec.elem_size, core::mem::size_of::<T>());
        unsafe { core::slice::from_raw_parts_mut(self.vec.data as *mut T, self.vec.len) }
    }
    fn push(&mut self, value: T) {
        let value = core::mem::ManuallyDrop::new(value);
        unsafe {
            vec_push(&mut self.vec, &raw const value as *const T as *const u8);
        }
    }
    fn insert(&mut self, value: T, index: usize) {
        if index > self.vec.len {
            return;
        }
        let value = core::mem::ManuallyDrop::new(value);
        unsafe {
            vec_insert(
                &mut self.vec,
                index,
                &raw const value as *const T as *const u8,
            );
        }
    }
    fn pop(&mut self) -> Option<T> {
        if self.vec.len == 0 {
            return None;
        }
        unsafe {
            let ptr = vec_get(&mut self.vec, self.vec.len - 1) as *mut T;
            let value = ptr.read();
            vec_pop(&mut self.vec);
            Some(value)
        }
    }
    fn get(&self, index: usize) -> Option<&T> {
        if index >= self.vec.len {
            return None;
        }
        unsafe {
            let ptr = vec_get(&raw const self.vec as *mut _, index) as *mut T;
            Some(&*ptr)
        }
    }
    fn get_mut(&mut self, index: usize) -> Option<&mut T> {
        if index >= self.vec.len {
            return None;
        }
        unsafe {
            let ptr = vec_get(&raw mut self.vec, index) as *mut T;
            Some(&mut *ptr)
        }
    }
    fn remove(&mut self, index: usize) {
        if index >= self.vec.len {
            return;
        }
        unsafe {
            vec_remove(&mut self.vec, index);
        }
    }
    fn len(&self) -> usize {
        self.vec.len
    }
    fn position<F>(&self, elem: &T, mut cmp: F) -> Option<usize>
    where
        F: FnMut(&T, &T) -> bool,
    {
        extern "C" fn cmp_trampoline<T, F: FnMut(&T, &T) -> bool>(
            f: *const (),
            a: *const u8,
            b: *const u8,
        ) -> bool {
            let f = unsafe { &mut *(f as *mut F) };
            let a = unsafe { &*(a as *const T) };
            let b = unsafe { &*(b as *const T) };
            f(a, b)
        }
        unsafe {
            let index = vec_find(
                &raw const self.vec as *mut _,
                elem as *const T as *const u8,
                cmp_trampoline::<T, F>,
                &raw mut cmp as *mut F as *mut (),
            );
            if index == usize::MAX {
                None
            } else {
                Some(index)
            }
        }
    }
    fn binary_search_by<F>(&self, elem: &T, mut cmp: F) -> Result<usize, usize>
    where
        F: FnMut(&T, &T) -> i32,
    {
        extern "C" fn cmp_trampoline<T, F: FnMut(&T, &T) -> i32>(
            f: *const (),
            a: *const u8,
            b: *const u8,
        ) -> i32 {
            let f = unsafe { &mut *(f as *mut F) };
            let a = unsafe { &*(a as *const T) };
            let b = unsafe { &*(b as *const T) };
            f(a, b)
        }
        unsafe {
            let (index, vacant) = vec_binary_search_by(
                &raw const self.vec as *mut _,
                elem as *const T as *const u8,
                cmp_trampoline::<T, F>,
                &raw mut cmp as *mut F as *mut (),
            );
            if vacant {
                Err(index)
            } else {
                Ok(index)
            }
        }
    }
    fn insert_sorted<F>(&self, elem: T, mut cmp: F) -> Result<usize, usize>
    where
        F: FnMut(&T, &T) -> i32,
    {
        extern "C" fn cmp_trampoline<T, F: FnMut(&T, &T) -> i32>(
            f: *const (),
            a: *const u8,
            b: *const u8,
        ) -> i32 {
            let f = unsafe { &mut *(f as *mut F) };
            let a = unsafe { &*(a as *const T) };
            let b = unsafe { &*(b as *const T) };
            f(a, b)
        }
        let mut elem = core::mem::ManuallyDrop::new(elem);
        unsafe {
            let (index, inserted) = vec_insert_sorted(
                &raw const self.vec as *mut _,
                &raw const elem as *const u8,
                cmp_trampoline::<T, F>,
                &raw mut cmp as *mut F as *mut (),
            );
            Ok(index)
        }
    }
 }
 #[unsafe(no_mangle)]
 extern "C" fn panic() -> ! {
    panic!("Called panic from external code.");
 }
 unsafe impl Send for BlobVec {}
 unsafe impl Sync for BlobVec {}
 unsafe extern "C" {
    unsafe fn vec_init(vec: *mut BlobVec, elem_size: usize, drop: Option<extern "C" fn(*mut u8)>);
    unsafe fn vec_init_with(
        vec: *mut BlobVec,
        elem_size: usize,
        drop: Option<extern "C" fn(*mut u8)>,
        cap: usize,
    );
    unsafe fn vec_push(vec: *mut BlobVec, elem: *const u8);
    unsafe fn vec_insert(vec: *mut BlobVec, index: usize, elem: *const u8);
    unsafe fn vec_pop(vec: *mut BlobVec);
    unsafe fn vec_drop_last(vec: *mut BlobVec);
    unsafe fn vec_get(vec: *mut BlobVec, index: usize) -> *mut u8;
    #[allow(dead_code)]
    unsafe fn vec_remove(vec: *mut BlobVec, index: usize);
    #[allow(dead_code)]
    unsafe fn vec_drop(vec: *mut BlobVec);
    unsafe fn vec_find(
        vec: *mut BlobVec,
        elem: *const u8,
        cmp: extern "C" fn(*const (), *const u8, *const u8) -> bool,
        cmp_data: *mut (),
    ) -> usize;
    unsafe fn vec_binary_search_by(
        vec: *mut BlobVec,
        elem: *const u8,
        cmp: extern "C" fn(*const (), *const u8, *const u8) -> i32,
        cmp_data: *mut (),
    ) -> (usize, bool);
    unsafe fn vec_insert_sorted(
        vec: *mut BlobVec,
        elem: *const u8,
        cmp: extern "C" fn(*const (), *const u8, *const u8) -> i32,
        cmp_data: *mut (),
    ) -> (usize, bool);
 }
 fn main() {
    static mut DROPS: usize = 1;
@ -318,7 +62,7 @@ fn main() {
        eprintln!("Push/pop test passed\n");
    }
-    let mut vec = VecT::<u32>::new_with(100);
+    let mut vec = Vec::<u32>::new_with(100);
    assert_eq!(vec.len(), 0);
    vec.push(10);
    vec.push(20);
@ -358,6 +102,6 @@ fn main() {
    assert_eq!(vec.binary_search_by(&5, cmp), Err(0));
    assert_eq!(vec.binary_search_by(&55, cmp), Err(4));
-    vec.insert_sorted(35, cmp);
+    _ = vec.insert_sorted(35, cmp);
    assert_eq!(vec.as_slice(), &[20, 30, 35, 40, 50]);
 }
Author	SHA1	Message	Date
janis	8f4d626968	ast tests	2025-10-29 22:10:34 +01:00
janis	4e55fa74f4	parse structs and functions from asm for rust tests	2025-10-29 22:10:14 +01:00
janis	5ae3e17693	initial ast	2025-10-29 20:39:32 +01:00
janis	bf9d07b462	init tokeniser with buffer	2025-10-29 20:39:22 +01:00
janis	46053090f4	initial parsing	2025-10-29 16:21:15 +01:00
janis	39e8d6ae96	move defintions out of tokeniser into include file	2025-10-29 16:21:01 +01:00
janis	62751f30ab	move more stuff to shared test file	2025-10-29 16:20:42 +01:00
janis	86bbab90c3	modularise test with shared rust structs add expect/unwrap token methods to tokeniser to aid with parsing	2025-10-29 14:00:17 +01:00