ast tests

parse structs and functions from asm for rust tests
initial ast
2025-10-29 22:10:34 +01:00 · 2025-10-29 22:10:14 +01:00 · 2025-10-29 20:39:32 +01:00 · 2025-10-29 20:39:22 +01:00 · 2025-10-29 16:21:15 +01:00 · 2025-10-29 16:21:01 +01:00
12 changed files with 1686 additions and 522 deletions
--- a/lang/Makefile
+++ b/lang/Makefile
@ -1,7 +1,7 @@
 # Makefile: Compile and link main.asm using nasm and mold, intermediate files in target/

 TARGET_DIR := target
-SRC := src/lib.asm src/int_to_str.asm src/vec.asm src/tokeniser.asm src/file.asm src/alloc.asm
+SRC := src/lib.asm src/int_to_str.asm src/vec.asm src/tokeniser.asm src/file.asm src/alloc.asm src/ast.asm
 OBJ := $(patsubst src/%.asm,$(TARGET_DIR)/%.o,$(SRC))

 BIN_SRC := src/main.asm src/panic.asm
@ -29,8 +29,12 @@ fmt: $(wildcard tests/*.rs)
 	@echo "Formatting test source files..."
 	rustfmt --edition 2024 $^

+tests/shared/defs.rs: $(wildcard src/*)
+	@echo "Generating shared definitions for tests..."
+	./tests/asm_to_rust.py $^ -o $@
+
 # pattern rule: compile each .rs into a binary with the same base name
-$(TARGET_DIR)/tests/%: tests/%.rs | $(OBJ) $(TARGET_DIR)/tests
+$(TARGET_DIR)/tests/%: tests/%.rs tests/shared/defs.rs $(OBJ) | $(TARGET_DIR)/tests
 	@echo "[$(RUSTC)] $< -> $@"
 	rustc -Clink-arg=-fuse-ld=mold --edition=2024 $(OBJ_LINK_ARGS) -g  -o $@ $< 

--- a/lang/src/ast.asm
+++ b/lang/src/ast.asm
@ -0,0 +1,489 @@
+default rel
+
+%include "src/tokeniser.inc"
+
+section .rdata
+;; start-consts
+    AST_FUNCTION equ 1
+    AST_BLOCK equ 2
+    AST_VARIABLE equ 3
+    AST_NUMBER equ 4
+    AST_BINARY_OP equ 5
+    AST_RETURN_STATEMENT equ 6
+
+    TYPE_VOID equ 1
+    TYPE_BOOL equ 2
+    TYPE_I32 equ 3
+    TYPE_U32 equ 4
+    TYPE_STR equ 5
+;; end-consts
+
+section .text
+extern vec_init_with
+extern vec_push
+extern vec_get
+extern panic
+extern memcpy
+extern vec_binary_search_by
+extern vec_insert
+
+extern bump_alloc
+    
+extern tokeniser_init
+extern find_lexeme
+extern peek_lexeme
+extern expect_token
+extern unwrap_token
+extern peek_expect_token
+
+extern str_to_int
+
+global parse_func
+global parse_args
+global parse_expr
+global parse_binary_expr
+global parse_primary_expr
+global parse_statement
+global parse_block
+
+;; start very simple, with only functions and addition
+;; ```rust
+;; use super::vec::Vec;
+;; ```
+;; start-structs
+;; struct Ast {
+;;   nodes: Vec<AstNode>,
+;; }
+;;
+;; struct AstNode {
+;;   kind: u8,
+;;   data: *const (),
+;; }
+;;
+;; struct Argument {
+;;   name: *const u8,
+;;   name_len: usize,
+;;   arg_type: Type,
+;; }
+;;
+;; struct Type {
+;;   kind: u8,
+;; }
+;; end-structs
+
+;; rdi: *mut Ast
+;; define-fn: fn parse_func(ast: *mut Ast) -> u64
+parse_func:
+    push rbp
+    mov rbp, rsp
+    push rdi
+    sub rsp, 48
+    ; name: *const u8 [0..8]
+    ; name_len: usize [8..16]
+    ; args_ptr: *const Arg [16..24]
+    ; args_len: usize [24..32]
+    ; return_type: Type [32..40]
+    ; body: u64 [40..48]
+
+    mov dil, TOKEN_FN
+    call unwrap_token
+    mov dil, TOKEN_IDENT
+    call unwrap_token
+    mov [rsp], rax        ; function name
+    mov [rsp + 8], rdx    ; function name length
+    mov dil, TOKEN_LPARENS
+    call unwrap_token
+    mov dil, TOKEN_RPARENS
+    call expect_token
+    test rax, rax
+    je .args
+
+.after_args:
+    mov dil, TOKEN_ARROW
+    call unwrap_token
+    mov rdi, [rsp + 48]         ; Ast
+    call parse_type
+    mov [rsp + 32], rax   ; return type
+    mov dil, TOKEN_LBRACE
+    call peek_expect_token
+    test rax, rax
+    je panic
+    mov rdi, [rsp + 48]         ; Ast
+    call parse_block
+    mov [rsp + 40], rax   ; body
+.epilogue:
+    mov rdi, 48
+    mov rsi, 8
+    call bump_alloc
+    mov rsi, rsp
+    mov rdi, rax
+    mov rdx, 48
+    call memcpy
+    mov byte [rsp], AST_FUNCTION    ; kind
+    mov [rsp + 8], rdi         ; data
+    mov rdi, [rsp + 48]         ; Ast
+    lea rsi, [rsp]
+    call vec_push
+    mov rax, [rsp + 48]         ; Ast
+    mov rax, [rdi + 8]         ; return Ast.nodes.len()
+    dec rax
+    add rsp, 48
+    pop rdi
+    pop rbp
+    ret
+    
+.args:
+    mov rdi, [rsp + 48]         ; Ast
+    call parse_args
+    mov [rsp + 16], rax   ; args_ptr
+    mov [rsp + 24], rdx   ; args_len
+    jmp .after_args
+.panic:
+    call panic
+
+;; rdi: *mut Ast
+;; define-fn: fn parse_args(ast: *mut Ast) -> (*const Argument, usize)
+parse_args:
+    push rbp
+    mov rbp, rsp
+    push rdi
+    sub rsp, 64
+
+    lea rdi, [rsp + 24]         ; vec
+    mov rsi, 24                 ; size of Argument
+    mov rdx, 0                  ; drop = None
+    mov rcx, 16                 ; capacity
+    call vec_init_with
+
+.loop:
+    mov dil, TOKEN_RPARENS
+    call expect_token
+    test rax, rax
+    jz .done_args
+    mov dil, TOKEN_IDENT
+    call unwrap_token
+    mov [rsp], rax        ; arg name
+    mov [rsp + 8], rdx    ; arg name length
+    mov dil, TOKEN_COLON
+    call unwrap_token
+    mov rdi, [rsp + 64]         ; Ast
+    call parse_type
+    mov [rsp + 16], rax         ; arg type
+
+    lea rdi, [rsp + 24]         ; vec
+    lea rsi, [rsp]              ; arg
+    call vec_push
+
+    mov dil, TOKEN_COMMA
+    call expect_token
+    test rax, rax
+    jz .end_loop
+    jmp .loop
+.end_loop:
+    mov dil, TOKEN_RPARENS
+    call unwrap_token
+.done_args:
+    mov rax, [rsp + 24]   ; args_ptr
+    mov rdx, [rsp + 32]   ; args_len
+    add rsp, 64
+    pop rdi
+    pop rbp
+    ret
+
+;; rdi: lexeme ptr
+;; rsi: lexeme len
+;; fn parse_number(lexeme: *const u8, lexeme_len: usize) -> u64
+parse_number:
+    push rbp
+    mov rbp, rsp
+    push rbx
+    sub rsp, 16
+    mov [rsp], rdi         ; lexeme ptr
+    mov [rsp + 8], rsi     ; lexeme len
+
+    cmp rsi, 2
+    jbe .dec_radix
+
+    mov al, byte [rdi]
+    mov bl, byte [rdi + 1]
+    cmp bl, 'x'
+    jne .dec_radix
+    cmp al, '0'
+    je .hex_radix
+    cmp al, 'o'
+    je .oct_radix
+    cmp al, 'b'
+    je .bin_radix
+    jmp .panic                  ; invalid radix prefix
+.hex_radix:
+    mov rax, 16
+    jmp .radix_set
+.oct_radix:
+    mov rax, 8
+    jmp .radix_set
+.bin_radix:
+    mov rax, 2
+    jmp .radix_set
+.dec_radix:
+    mov rax, 10
+    jmp .parse
+.radix_set:
+    add qword [rsp], 2
+    sub qword [rsp + 8], 2
+.parse:
+    mov rdi, [rsp]          ; lexeme ptr
+    mov rsi, [rsp + 8]    ; lexeme len
+    mov rdx, rax          ; radix
+    call str_to_int
+    add rsp, 16
+    pop rbx
+    pop rbp
+    ret
+.panic:
+    call panic
+
+;; rdi: *mut Ast
+;; define-fn: fn parse_primary_expr(ast: *mut Ast) -> u64
+parse_primary_expr:
+    push rbp
+    mov rbp, rsp
+    sub rsp, 24
+    mov [rsp], rdi         ; Ast
+
+    mov dil, TOKEN_NUMBER
+    call expect_token
+    test rax, rax
+    jnz .number
+    jmp .panic
+.number:
+    mov rdi, rax         ; lexeme ptr
+    mov rsi, rdx         ; lexeme len
+    call parse_number
+    mov rdi, [rsp]                 ; Ast
+    mov byte [rsp + 8], AST_NUMBER ; kind
+    mov [rsp + 16], rax            ; data
+    lea rsi, [rsp + 8]             ; AstNode
+    call vec_push
+    mov rdi, [rsp]              ; Ast
+    mov rax, [rdi + 8]          ; return Ast.nodes.len()
+    dec rax
+    add rsp, 24
+    pop rbp
+    ret
+.panic:
+    call panic
+
+
+;; rdi: *mut Ast
+;; sil: precedence
+;; define-fn: fn parse_binary_expr(ast: *mut Ast, precedence: u8) -> u64
+parse_binary_expr:
+    push rbp
+    mov rbp, rsp
+
+    ; size: 24, align: 8
+    ; start-structs
+    ; struct BinaryExpr {
+    ;   left: u64,
+    ;   operator: u8,
+    ;   right: u64,
+    ; }
+    ; end-structs
+
+    sub rsp, 64
+    ; lexeme: Lexeme [32..56]
+    ; right: u64 [24..32]
+    ; precedence: u8 [17..18]
+    ; operator: u8 [16..17]
+    ; left: u64 [8..16]
+    ; rdi: *mut Ast [0..8]
+
+    mov [rsp], rdi         ; Ast
+    mov byte [rsp + 17], sil ; upper_precedence
+    mov byte [rsp + 16], 0
+
+    call parse_primary_expr
+    mov [rsp + 8], rax           ; left
+
+.loop:
+    lea rdi, [rsp + 32]         ; lexeme
+    call peek_lexeme
+    mov rax, [rsp + 32]
+    mov dil, [rsp + 17]
+    cmp al, dil          ; our_precedence <= upper_precedence
+    jle .done            ; also covers some non-binary operator tokens
+    cmp al, TOKEN_PLUS
+    je .plus
+    jmp .done
+
+.plus:
+    mov dil, TOKEN_PLUS
+    call unwrap_token
+    mov byte [rsp + 16], TOKEN_PLUS
+    jmp .right
+
+.right:
+    mov rdi, [rsp]         ; Ast
+    mov sil, [rsp + 16]
+    call parse_binary_expr
+    mov [rsp + 24], rax             ; right
+
+    mov rdi, 24
+    mov rsi, 8
+    call bump_alloc
+    mov rdx, [rsp + 8]          ; left
+    mov [rax + 0], rdx          ; left
+    mov dl, byte [rsp + 16]     ; operator
+    mov byte [rax + 8], dl      ; operator
+    mov rdx, [rsp + 24]         ; right
+    mov [rax + 16], rdx         ; right
+
+    mov byte [rsp + 32], AST_BINARY_OP ; AstNode.kind
+    mov [rsp + 40], rax                ; AstNode.data
+    mov rdi, [rsp]                  ; Ast
+    lea rsi, [rsp + 32]             ; &AstNode
+    call vec_push
+    mov rdi, [rsp]              ; Ast
+    mov rax, [rdi + 8]          ; Ast.nodes.len()
+    dec rax
+    mov [rsp + 8], rax          ; left
+    jmp .loop
+
+.done:
+    mov rax, [rsp + 8]          ; left
+    add rsp, 64
+    pop rbp
+    ret
+
+
+;; rdi: *mut Ast
+;; define-fn: fn parse_expr(ast: *mut Ast) -> u64
+parse_expr:
+    push rbp
+    mov rbp, rsp
+    sub rsp, 8
+    mov [rsp], rdi         ; Ast
+    mov sil, 0
+    call parse_binary_expr
+    add rsp, 8
+    pop rbp
+    ret
+
+;; rdi: *mut Ast
+;; define-fn: fn parse_statement(ast: *mut Ast) -> u64
+parse_statement:
+    push rbp
+    mov rbp, rsp
+    sub rsp, 32
+    mov [rsp + 24], rdi         ; Ast
+
+    mov dil, TOKEN_RETURN
+    call expect_token
+    test rax, rax
+    jz .return
+    jmp .panic
+
+.return:
+    call parse_expr
+    mov [rsp + 16], rax             ; expression
+    mov byte [rsp], AST_RETURN_STATEMENT ; kind
+    lea rax, [rsp + 16]             ; data ptr
+    mov [rsp + 8], rax              ; data
+    mov rdi, [rsp + 24]             ; Ast
+    mov rsi, rsp                    ; AstNode
+    call vec_push
+    mov rdi, [rsp + 24]         ; Ast
+    mov rax, [rdi + 8]          ; Ast.nodes.len()
+    dec rax
+    mov [rsp], rax
+
+    mov dil, TOKEN_SEMI
+    call unwrap_token
+    mov rax, [rsp]              ; expression
+    add rsp, 32
+    pop rbp
+    ret
+.panic:
+    call panic
+
+;; rdi: *mut Ast
+;; define-fn: fn parse_block(ast: *mut Ast) -> u64
+parse_block:
+    push rbp
+    mov rbp, rsp
+    push rdi
+
+    ; start-structs
+    ; struct Block {
+    ;   statements: *const u64,
+    ;   statements_len: usize,
+    ; }
+    ; end-structs
+
+    sub rsp, 56
+    ; statements: Vec<Statement> [0..40]
+    ; statement: u64             [40..48]
+
+    mov dil, TOKEN_LBRACE
+    call unwrap_token
+.loop:
+    mov rdi, [rsp + 16]         ; Ast
+    call parse_statement
+    test rax, rax
+    je .done
+    lea rdi, [rsp + 16]                ; vec
+    mov [rsp + 8], rax                 ; statement
+    lea rsi, [rsp + 8]
+    call vec_push
+    jmp .loop
+.done:
+    mov rdi, [rsp + 56]         ; Ast
+    lea rsi, [rsp + 16]         ; statements vec-slice
+    call vec_push
+    mov rdi, [rsp + 56]         ; Ast
+    mov rax, [rdi + 8]         ; Ast.nodes.len()
+    dec rax
+    add rsp, 56
+    pop rdi
+    pop rbp
+    ret
+    
+;; rdi: *mut Ast
+;; define-fn: fn parse_type(ast: *mut Ast) -> Type
+parse_type:
+    push rbp
+    mov rbp, rsp
+    push rdi
+
+    sub rsp, 24
+    mov rdi, rsp
+    call find_lexeme            ; TODO: use peek here to allow failing gracefully
+    mov rax, [rsp]              ; token kind
+    cmp al, TOKEN_I32
+    je .i32_type
+    cmp al, TOKEN_U32
+    je .u32_type
+    cmp al, TOKEN_VOID
+    je .void_type
+    cmp al, TOKEN_BOOL
+    je .bool_type
+    jmp .panic
+.i32_type:
+    mov rax, TYPE_I32
+    jmp .epilogue
+.u32_type:
+    mov rax, TYPE_U32
+    jmp .epilogue
+.void_type:
+    mov rax, TYPE_VOID
+    jmp .epilogue
+.bool_type:
+    mov rax, TYPE_BOOL
+    jmp .epilogue
+.epilogue:
+    add rsp, 24
+    pop rdi
+    pop rbp
+    ret
+.panic:
+    call panic
--- a/lang/src/tokeniser.asm
+++ b/lang/src/tokeniser.asm
@ -19,8 +19,13 @@ extern is_id_start
 extern is_whitespace

 global tokeniser_init
+global tokeniser_init_buf
 global tokeniser_print
 global find_lexeme
+global expect_token
+global unwrap_token
+global peek_expect_token
+global peek_lexeme

 ;; =============================
 ;; Tokeniser functions
@ -50,6 +55,22 @@ section .bss
 statbuf: resb 144

 section .text
+
+;; rdi: pointer to buffer
+;; rsi: length of buffer
+tokeniser_init_buf:
+    push rbp
+    mov rbp, rsp
+
+    mov dword [rel input_file], 0
+    mov qword [rel buffer], rdi
+    mov qword [rel buffer_len], rsi
+    mov qword [rel cursor], 0
+
+    pop rbp
+    ret
+
+
 ;; Initialises the tokeniser
 ;; rdx: pointer to filename (null-terminated)
 tokeniser_init:
@ -113,210 +134,7 @@ global TOKENS
 global LEXEME_LENS
 global NUM_LEXEMES

-align 8
-LEXEMES:
-    dq LEX_NOT_A_LEXEME
-    dq LEX_LET
-    dq LEX_IF
-    dq LEX_ELSE
-    dq LEX_FN
-    dq LEX_RETURN
-    dq LEX_LOOP
-    dq LEX_BREAK
-    dq LEX_CONTINUE
-    dq LEX_TRUE
-    dq LEX_FALSE
-    dq LEX_BOOL
-    dq LEX_ARROW
-    dq LEX_I32
-    dq LEX_U32
-    dq LEX_EQUALS
-    dq LEX_PLUS
-    dq LEX_MINUS
-    dq LEX_RPARENS
-    dq LEX_LPARENS
-    dq LEX_RBRACE
-    dq LEX_LBRACE
-    dq LEX_COLON
-    dq LEX_SEMI
-    dq LEX_COMMA
-    dq LEX_PIPE
-    dq LEX_AMP
-    dq LEX_EQEQ
-    dq LEX_LBRACKET
-    dq LEX_RBRACKET
-
-align 8
-TOKENS:
-    db TOKEN_EOF                   ;; 0
-    db TOKEN_LET                   ;; 1
-    db TOKEN_IF                    ;; 2
-    db TOKEN_ELSE                  ;; 3
-    db TOKEN_FN                    ;; 4
-    db TOKEN_RETURN                ;; 5
-    db TOKEN_LOOP                  ;; 6
-    db TOKEN_BREAK                 ;; 7
-    db TOKEN_CONTINUE              ;; 8
-    db TOKEN_TRUE                  ;; 9
-    db TOKEN_FALSE                 ;; 10
-    db TOKEN_BOOL                  ;; 11
-    db TOKEN_ARROW                 ;; 12
-    db TOKEN_I32                   ;; 13
-    db TOKEN_U32                   ;; 14
-    db TOKEN_EQUALS                ;; 15
-    db TOKEN_PLUS                  ;; 16
-    db TOKEN_MINUS                 ;; 17
-    db TOKEN_RPARENS               ;; 18
-    db TOKEN_LPARENS               ;; 19
-    db TOKEN_RBRACE                ;; 20
-    db TOKEN_LBRACE                ;; 21
-    db TOKEN_COLON                 ;; 22
-    db TOKEN_SEMI                  ;; 23
-    db TOKEN_COMMA                 ;; 24
-    db TOKEN_PIPE                  ;; 25
-    db TOKEN_AMP                   ;; 26
-    db TOKEN_EQEQ                  ;; 27
-    db TOKEN_LBRACKET              ;; 28
-    db TOKEN_RBRACKET              ;; 29
-
-align 8
-LEXEME_LENS:
-    dq 0
-    dq LEX_LET_len
-    dq LEX_IF_len
-    dq LEX_ELSE_len
-    dq LEX_FN_len
-    dq LEX_RETURN_len
-    dq LEX_LOOP_len
-    dq LEX_BREAK_len
-    dq LEX_CONTINUE_len
-    dq LEX_TRUE_len
-    dq LEX_FALSE_len
-    dq LEX_BOOL_len
-    dq LEX_ARROW_len
-    dq LEX_I32_len
-    dq LEX_U32_len
-    dq LEX_EQUALS_len
-    dq LEX_PLUS_len
-    dq LEX_MINUS_len
-    dq LEX_RPARENS_len
-    dq LEX_LPARENS_len
-    dq LEX_RBRACE_len
-    dq LEX_LBRACE_len
-    dq LEX_COLON_len
-    dq LEX_SEMI_len
-    dq LEX_COMMA_len
-    dq LEX_PIPE_len
-    dq LEX_AMP_len
-    dq LEX_EQEQ_len
-    dq LEX_LBRACKET_len
-    dq LEX_RBRACKET_len
-
-align 8
-NUM_LEXEMES: dq 30
-
-    LEX_NOT_A_LEXEME db "<not a lexeme>", 0
-    TOKEN_EOF       equ 0
-    TOKEN_LET       equ 1
-    LEX_LET db "let"
-    LEX_LET_len equ $ - LEX_LET
-    TOKEN_IF        equ 2
-    LEX_IF db "if"
-    LEX_IF_len equ $ - LEX_IF
-    TOKEN_ELSE      equ 3
-    LEX_ELSE db "else"
-    LEX_ELSE_len equ $ - LEX_ELSE
-    TOKEN_FN        equ 4
-    LEX_FN db "fn"
-    LEX_FN_len equ $ - LEX_FN
-    TOKEN_RETURN    equ 5
-    LEX_RETURN db "return"
-    LEX_RETURN_len equ $ - LEX_RETURN
-    TOKEN_LOOP      equ 6
-    LEX_LOOP db "loop"
-    LEX_LOOP_len equ $ - LEX_LOOP
-    TOKEN_BREAK     equ 7
-    LEX_BREAK db "break"
-    LEX_BREAK_len equ $ - LEX_BREAK
-    TOKEN_CONTINUE  equ 8
-    LEX_CONTINUE db "continue"
-    LEX_CONTINUE_len equ $ - LEX_CONTINUE
-    TOKEN_TRUE      equ 9
-    LEX_TRUE db "true"
-    LEX_TRUE_len equ $ - LEX_TRUE
-    TOKEN_FALSE     equ 10
-    LEX_FALSE db "false"
-    LEX_FALSE_len equ $ - LEX_FALSE
-    TOKEN_BOOL      equ 11
-    LEX_BOOL db "bool"
-    LEX_BOOL_len equ $ - LEX_BOOL
-    TOKEN_ARROW     equ 12
-    LEX_ARROW db "->"
-    LEX_ARROW_len equ $ - LEX_ARROW
-    TOKEN_I32       equ 13
-    LEX_I32 db "i32"
-    LEX_I32_len equ $ - LEX_I32
-    TOKEN_U32       equ 14
-    LEX_U32 db "u32"
-    LEX_U32_len equ $ - LEX_U32
-    TOKEN_EQUALS    equ 15
-    LEX_EQUALS db "="
-    LEX_EQUALS_len equ $ - LEX_EQUALS
-    TOKEN_PLUS      equ 16
-    LEX_PLUS db "+"
-    LEX_PLUS_len equ $ - LEX_PLUS
-    TOKEN_MINUS     equ 17
-    LEX_MINUS db "-"
-    LEX_MINUS_len equ $ - LEX_MINUS
-    TOKEN_RPARENS   equ 18
-    LEX_RPARENS db ")"
-    LEX_RPARENS_len equ $ - LEX_RPARENS
-    TOKEN_LPARENS   equ 19
-    LEX_LPARENS db "("
-    LEX_LPARENS_len equ $ - LEX_LPARENS
-    TOKEN_RBRACE    equ 20
-    LEX_RBRACE db "}"
-    LEX_RBRACE_len equ $ - LEX_RBRACE
-    TOKEN_LBRACE    equ 21
-    LEX_LBRACE db "{"
-    LEX_LBRACE_len equ $ - LEX_LBRACE
-    TOKEN_COLON     equ 22
-    LEX_COLON db ":"
-    LEX_COLON_len equ $ - LEX_COLON
-    TOKEN_SEMI      equ 23
-    LEX_SEMI db ";"
-    LEX_SEMI_len equ $ - LEX_SEMI
-    TOKEN_COMMA     equ 24
-    LEX_COMMA db ","
-    LEX_COMMA_len equ $ - LEX_COMMA
-    TOKEN_PIPE      equ 25
-    LEX_PIPE db "|"
-    LEX_PIPE_len equ $ - LEX_PIPE
-    TOKEN_AMP       equ 26
-    LEX_AMP db "&"
-    LEX_AMP_len equ $ - LEX_AMP
-    TOKEN_EQEQ     equ 27
-    LEX_EQEQ db "=="
-    LEX_EQEQ_len equ $ - LEX_EQEQ
-    TOKEN_LBRACKET  equ 28
-    LEX_LBRACKET db "["
-    LEX_LBRACKET_len equ $ - LEX_LBRACKET
-    TOKEN_RBRACKET  equ 29
-    LEX_RBRACKET db "]"
-    LEX_RBRACKET_len equ $ - LEX_RBRACKET
-    TOKEN_IDENT     equ 30
-    LEX_IDENT db "<identifier>"
-    LEX_IDENT_len equ $ - LEX_IDENT
-    TOKEN_NUMBER    equ 31
-    LEX_NUMBER db "<number>"
-    LEX_NUMBER_len equ $ - LEX_NUMBER
-    TOKEN_STRING    equ 32
-    LEX_STRING db "<string>"
-    LEX_STRING_len equ $ - LEX_STRING
-    TOKEN_COMMENT   equ 33
-    LEX_COMMENT db "<comment>"
-    LEX_COMMENT_len equ $ - LEX_COMMENT
-    
+%include "src/tokeniser.inc"

 section .text
 ;; rdi: length of previously matched lexeme
@ -702,3 +520,66 @@ find_lexeme:
    mov qword [rdi], TOKEN_COMMENT
    mov [rdi + 16], rax
    jmp .epilogue
+
+;; dil: expected token
+expect_token:
+    push rbp
+    mov rbp, rsp
+    sub rsp, 0x30
+    mov [rsp], dil
+    mov rax, [rel cursor]       ; current cursor
+    mov [rsp + 8], rax
+    lea rdi, [rsp + 0x10]
+    call find_lexeme
+    mov rax, [rsp + 0x10]    ; found token
+    mov dil, [rsp]          ; expected token
+    cmp al, dil
+    je .matched
+    mov rdi, [rsp + 8]     ; restore cursor
+    mov [rel cursor], rdi    ; restore cursor
+    xor rax, rax
+    xor rdx, rdx
+    jmp .epilogue
+.matched:
+    mov rax, [rsp + 0x18]   ; lexeme pointer
+    mov rdx, [rsp + 0x20]   ; lexeme length
+.epilogue:
+    add rsp, 0x30
+    pop rbp
+    ret
+
+;; dil: expected token
+unwrap_token:
+    push rbp
+    mov rbp, rsp
+    call expect_token
+    test rax, rax
+    jz .panic
+    pop rbp
+    ret
+.panic:
+    call panic
+
+;; dil: expected token
+peek_expect_token:
+    push rbp
+    mov rbp, rsp
+    push qword [rel cursor]
+    call expect_token
+    pop rdi
+    mov [rel cursor], rdi
+    pop rbp
+    ret
+
+;; rdi: out-struct pointer
+peek_lexeme:
+    push rbp
+    mov rbp, rsp
+    push rdi
+    push qword [rel cursor]           ; save cursor
+    call find_lexeme
+    pop rdi
+    mov [rel cursor], rdi       ; restore cursor
+    pop rax
+    pop rbp
+    ret
--- a/lang/src/tokeniser.inc
+++ b/lang/src/tokeniser.inc
@ -0,0 +1,213 @@
+section .rdata
+align 8
+LEXEMES:
+    dq LEX_NOT_A_LEXEME
+    dq LEX_LET
+    dq LEX_IF
+    dq LEX_ELSE
+    dq LEX_FN
+    dq LEX_RETURN
+    dq LEX_LOOP
+    dq LEX_BREAK
+    dq LEX_CONTINUE
+    dq LEX_TRUE
+    dq LEX_FALSE
+    dq LEX_BOOL
+    dq LEX_ARROW
+    dq LEX_I32
+    dq LEX_U32
+    dq LEX_EQUALS
+    dq LEX_PLUS
+    dq LEX_MINUS
+    dq LEX_RPARENS
+    dq LEX_LPARENS
+    dq LEX_RBRACE
+    dq LEX_LBRACE
+    dq LEX_COLON
+    dq LEX_SEMI
+    dq LEX_COMMA
+    dq LEX_PIPE
+    dq LEX_AMP
+    dq LEX_EQEQ
+    dq LEX_LBRACKET
+    dq LEX_RBRACKET
+    dq LEX_VOID
+
+align 8
+TOKENS:
+    db TOKEN_EOF                   ;; 0
+    db TOKEN_LET                   ;; 1
+    db TOKEN_IF                    ;; 2
+    db TOKEN_ELSE                  ;; 3
+    db TOKEN_FN                    ;; 4
+    db TOKEN_RETURN                ;; 5
+    db TOKEN_LOOP                  ;; 6
+    db TOKEN_BREAK                 ;; 7
+    db TOKEN_CONTINUE              ;; 8
+    db TOKEN_TRUE                  ;; 9
+    db TOKEN_FALSE                 ;; 10
+    db TOKEN_BOOL                  ;; 11
+    db TOKEN_ARROW                 ;; 12
+    db TOKEN_I32                   ;; 13
+    db TOKEN_U32                   ;; 14
+    db TOKEN_EQUALS                ;; 15
+    db TOKEN_PLUS                  ;; 16
+    db TOKEN_MINUS                 ;; 17
+    db TOKEN_RPARENS               ;; 18
+    db TOKEN_LPARENS               ;; 19
+    db TOKEN_RBRACE                ;; 20
+    db TOKEN_LBRACE                ;; 21
+    db TOKEN_COLON                 ;; 22
+    db TOKEN_SEMI                  ;; 23
+    db TOKEN_COMMA                 ;; 24
+    db TOKEN_PIPE                  ;; 25
+    db TOKEN_AMP                   ;; 26
+    db TOKEN_EQEQ                  ;; 27
+    db TOKEN_LBRACKET              ;; 28
+    db TOKEN_RBRACKET              ;; 29
+    db TOKEN_VOID                  ;; 30
+
+align 8
+LEXEME_LENS:
+    dq 0
+    dq LEX_LET_len
+    dq LEX_IF_len
+    dq LEX_ELSE_len
+    dq LEX_FN_len
+    dq LEX_RETURN_len
+    dq LEX_LOOP_len
+    dq LEX_BREAK_len
+    dq LEX_CONTINUE_len
+    dq LEX_TRUE_len
+    dq LEX_FALSE_len
+    dq LEX_BOOL_len
+    dq LEX_ARROW_len
+    dq LEX_I32_len
+    dq LEX_U32_len
+    dq LEX_EQUALS_len
+    dq LEX_PLUS_len
+    dq LEX_MINUS_len
+    dq LEX_RPARENS_len
+    dq LEX_LPARENS_len
+    dq LEX_RBRACE_len
+    dq LEX_LBRACE_len
+    dq LEX_COLON_len
+    dq LEX_SEMI_len
+    dq LEX_COMMA_len
+    dq LEX_PIPE_len
+    dq LEX_AMP_len
+    dq LEX_EQEQ_len
+    dq LEX_LBRACKET_len
+    dq LEX_RBRACKET_len
+    dq LEX_VOID_len
+
+align 8
+NUM_LEXEMES: dq 30
+
+    LEX_NOT_A_LEXEME db "<not a lexeme>", 0
+    LEX_LET db "let"
+    LEX_LET_len equ $ - LEX_LET
+    LEX_IF db "if"
+    LEX_IF_len equ $ - LEX_IF
+    LEX_ELSE db "else"
+    LEX_ELSE_len equ $ - LEX_ELSE
+    LEX_FN db "fn"
+    LEX_FN_len equ $ - LEX_FN
+    LEX_RETURN db "return"
+    LEX_RETURN_len equ $ - LEX_RETURN
+    LEX_LOOP db "loop"
+    LEX_LOOP_len equ $ - LEX_LOOP
+    LEX_BREAK db "break"
+    LEX_BREAK_len equ $ - LEX_BREAK
+    LEX_CONTINUE db "continue"
+    LEX_CONTINUE_len equ $ - LEX_CONTINUE
+    LEX_TRUE db "true"
+    LEX_TRUE_len equ $ - LEX_TRUE
+    LEX_FALSE db "false"
+    LEX_FALSE_len equ $ - LEX_FALSE
+    LEX_BOOL db "bool"
+    LEX_BOOL_len equ $ - LEX_BOOL
+    LEX_ARROW db "->"
+    LEX_ARROW_len equ $ - LEX_ARROW
+    LEX_I32 db "i32"
+    LEX_I32_len equ $ - LEX_I32
+    LEX_U32 db "u32"
+    LEX_U32_len equ $ - LEX_U32
+    LEX_EQUALS db "="
+    LEX_EQUALS_len equ $ - LEX_EQUALS
+    LEX_PLUS db "+"
+    LEX_PLUS_len equ $ - LEX_PLUS
+    LEX_MINUS db "-"
+    LEX_MINUS_len equ $ - LEX_MINUS
+    LEX_RPARENS db ")"
+    LEX_RPARENS_len equ $ - LEX_RPARENS
+    LEX_LPARENS db "("
+    LEX_LPARENS_len equ $ - LEX_LPARENS
+    LEX_RBRACE db "}"
+    LEX_RBRACE_len equ $ - LEX_RBRACE
+    LEX_LBRACE db "{"
+    LEX_LBRACE_len equ $ - LEX_LBRACE
+    LEX_COLON db ":"
+    LEX_COLON_len equ $ - LEX_COLON
+    LEX_SEMI db ";"
+    LEX_SEMI_len equ $ - LEX_SEMI
+    LEX_COMMA db ","
+    LEX_COMMA_len equ $ - LEX_COMMA
+    LEX_PIPE db "|"
+    LEX_PIPE_len equ $ - LEX_PIPE
+    LEX_AMP db "&"
+    LEX_AMP_len equ $ - LEX_AMP
+    LEX_EQEQ db "=="
+    LEX_EQEQ_len equ $ - LEX_EQEQ
+    LEX_LBRACKET db "["
+    LEX_LBRACKET_len equ $ - LEX_LBRACKET
+    LEX_RBRACKET db "]"
+    LEX_RBRACKET_len equ $ - LEX_RBRACKET
+    LEX_VOID db "void"
+    LEX_VOID_len equ $ - LEX_VOID
+    LEX_IDENT db "<identifier>"
+    LEX_IDENT_len equ $ - LEX_IDENT
+    LEX_NUMBER db "<number>"
+    LEX_NUMBER_len equ $ - LEX_NUMBER
+    LEX_STRING db "<string>"
+    LEX_STRING_len equ $ - LEX_STRING
+    LEX_COMMENT db "<comment>"
+    LEX_COMMENT_len equ $ - LEX_COMMENT
+
+    ;; start-consts
+    TOKEN_EOF       equ 0
+    TOKEN_LET       equ 1
+    TOKEN_IF        equ 2
+    TOKEN_ELSE      equ 3
+    TOKEN_FN        equ 4
+    TOKEN_RETURN    equ 5
+    TOKEN_LOOP      equ 6
+    TOKEN_BREAK     equ 7
+    TOKEN_CONTINUE  equ 8
+    TOKEN_TRUE      equ 9
+    TOKEN_FALSE     equ 10
+    TOKEN_BOOL      equ 11
+    TOKEN_ARROW     equ 12
+    TOKEN_I32       equ 13
+    TOKEN_U32       equ 14
+    TOKEN_EQUALS    equ 15
+    TOKEN_PLUS      equ 16
+    TOKEN_MINUS     equ 17
+    TOKEN_RPARENS   equ 18
+    TOKEN_LPARENS   equ 19
+    TOKEN_RBRACE    equ 20
+    TOKEN_LBRACE    equ 21
+    TOKEN_COLON     equ 22
+    TOKEN_SEMI      equ 23
+    TOKEN_COMMA     equ 24
+    TOKEN_PIPE      equ 25
+    TOKEN_AMP       equ 26
+    TOKEN_EQEQ      equ 27
+    TOKEN_LBRACKET  equ 28
+    TOKEN_RBRACKET  equ 29
+    TOKEN_VOID      equ 30
+    TOKEN_IDENT     equ 31
+    TOKEN_NUMBER    equ 32
+    TOKEN_STRING    equ 33
+    TOKEN_COMMENT   equ 34
+    ;; end-consts
--- a/lang/tests/asm_to_rust.py
+++ b/lang/tests/asm_to_rust.py
@ -0,0 +1,310 @@
+#!/usr/bin/env python3
+"""
+parse_asm_to_rust.py
+
+Scan one or more assembly source files and extract:
+ - commented struct definitions inside `start-structs` / `end-structs` spans
+ - constant definitions inside `start-consts` / `end-consts` spans
+ - commented function-definition directives of the form `define-fn: fn ...`
+ - commented markdown rust fenced code blocks (```rust) and copy their inner code
+   into the generated Rust output (fences are removed and comment markers stripped)
+
+Produce Rust source code containing:
+ - an `extern "C"` block with `pub unsafe fn ...;` declarations for each define-fn
+ - `pub const NAME: u32 = <value>;` lines for each `equ` constant found in const spans
+ - `#[repr(C)] pub struct Name { pub field: Type, ... }` for each struct found in struct spans
+ - verbatim Rust code copied from commented ```rust``` blocks (fences removed)
+
+Notes:
+ - Struct and function definitions must appear on commented lines. Any number of leading semicolons
+   (e.g. `;`, `;;`, `;;;`) and surrounding spaces are allowed and will be stripped.
+ - Constant lines inside const spans may be commented or not; the script strips leading semicolons
+   before parsing.
+ - Commented rust blocks are expected to use commented fenced code blocks, e.g.:
+     ;; ```rust
+     ;; extern "C" { ... }
+     ;; ```
+   The inner lines will be uncommented (leading semicolons removed) and included in output.
+ - By default the script writes to stdout. Use `-o` to write combined output to a file, or `-d`
+   to write one .rs file per input with the same basename.
+"""
+
+import argparse
+import re
+import sys
+from pathlib import Path
+from typing import List, Tuple, Dict, Any
+
+LEADING_COMMENT_RE = re.compile(r'^\s*;+\s*')  # lines that start with one or more semicolons
+START_STRUCTS_RE = re.compile(r'^\s*;+\s*start-structs\b', re.IGNORECASE)
+END_STRUCTS_RE = re.compile(r'^\s*;+\s*end-structs\b', re.IGNORECASE)
+START_CONSTS_RE = re.compile(r'^\s*;+\s*start-consts\b', re.IGNORECASE)
+END_CONSTS_RE = re.compile(r'^\s*;+\s*end-consts\b', re.IGNORECASE)
+DEFINE_FN_RE = re.compile(r'^\s*;+\s*define-fn:\s*(.+)$', re.IGNORECASE)
+CONST_EQU_RE = re.compile(r'^\s*([A-Za-z_]\w*)\s+equ\s+(.+)$', re.IGNORECASE)
+STRUCT_START_RE = re.compile(r'^\s*struct\s+([A-Za-z_]\w*)\s*\{')  # after comment markers stripped
+RUST_FENCE_RE = re.compile(r'^\s*```\s*(rust)?\s*$', re.IGNORECASE)  # matches ``` or ```rust (after stripping leading comment)
+
+
+def strip_leading_semicolons(line: str) -> str:
+    """Remove leading semicolons and surrounding spaces from a commented line."""
+    return LEADING_COMMENT_RE.sub('', line).rstrip('\n')
+
+
+def extract_structs_from_commented_lines(lines: List[str]) -> List[Tuple[str, List[str]]]:
+    """
+    Given a list of lines (with comments already stripped of leading ';'), find all 'struct Name { ... }'
+    blocks. Return list of (name, field_lines).
+    This uses a simple brace-balanced scan so struct bodies can contain nested braces in types.
+    """
+    structs = []
+    i = 0
+    n = len(lines)
+    while i < n:
+        m = STRUCT_START_RE.match(lines[i])
+        if m:
+            name = m.group(1)
+            body_lines = []
+            # Count braces: the opening brace on the start line
+            brace_level = lines[i].count('{') - lines[i].count('}')
+            i += 1
+            while i < n and brace_level > 0:
+                line = lines[i]
+                brace_level += line.count('{') - line.count('}')
+                body_lines.append(line)
+                i += 1
+            # Trim any trailing '}' line from body_lines if present
+            if body_lines and body_lines[-1].strip() == '}':
+                body_lines = body_lines[:-1]
+            structs.append((name, body_lines))
+        else:
+            i += 1
+    return structs
+
+
+def format_rust_struct(name: str, field_lines: List[str]) -> str:
+    """
+    Convert a list of field lines like '  nodes: Vec<AstNode>,' into a Rust struct with pub fields and #[repr(C)].
+    Minimal parsing: split each field on the first ':' to find name and type, otherwise preserve line.
+    """
+    out_lines = []
+    out_lines.append('#[repr(C)]')
+    out_lines.append('#[derive(Debug)]')
+    out_lines.append(f'pub struct {name} {{')
+    for raw in field_lines:
+        line = raw.strip().rstrip(',')
+        if not line:
+            continue
+        if ':' in line:
+            parts = line.split(':', 1)
+            fname = parts[0].strip()
+            ftype = parts[1].strip()
+            out_lines.append(f'    pub {fname}: {ftype},')
+        else:
+            out_lines.append(f'    pub {line},')
+    out_lines.append('}')
+    return '\n'.join(out_lines)
+
+
+def parse_file(path: Path) -> Dict[str, Any]:
+    """
+    Parse a single assembly file and return dict with keys: 'functions', 'consts', 'structs', 'rust_blocks'
+    - functions: list of signature strings (e.g. "parse_ast(data: *const u8) -> Ast")
+    - consts: list of (name, value)
+    - structs: list of (name, field_lines)
+    - rust_blocks: list of rust code blocks; each block is list[str] of code lines (no fences, uncommented)
+    """
+    functions: List[str] = []
+    consts: List[Tuple[str, str]] = []
+    structs: List[Tuple[str, List[str]]] = []
+    rust_blocks: List[List[str]] = []
+
+    with path.open('r', encoding='utf-8') as f:
+        lines = f.readlines()
+
+    i = 0
+    n = len(lines)
+    in_structs = False
+    in_consts = False
+    struct_buffer: List[str] = []
+    const_buffer: List[str] = []
+
+    while i < n:
+        raw = lines[i]
+
+        # state transitions for start/end spans
+        if not in_structs and START_STRUCTS_RE.match(raw):
+            in_structs = True
+            struct_buffer = []
+            i += 1
+            continue
+        if in_structs and END_STRUCTS_RE.match(raw):
+            stripped = [strip_leading_semicolons(l) for l in struct_buffer if l.strip()]
+            found = extract_structs_from_commented_lines(stripped)
+            structs.extend(found)
+            in_structs = False
+            struct_buffer = []
+            i += 1
+            continue
+
+        if not in_consts and START_CONSTS_RE.match(raw):
+            in_consts = True
+            const_buffer = []
+            i += 1
+            continue
+        if in_consts and END_CONSTS_RE.match(raw):
+            for l in const_buffer:
+                s = strip_leading_semicolons(l)
+                m = CONST_EQU_RE.match(s)
+                if m:
+                    name = m.group(1)
+                    value = m.group(2).strip()
+                    consts.append((name, value))
+            in_consts = False
+            const_buffer = []
+            i += 1
+            continue
+
+        # If inside special spans, collect lines
+        if in_structs:
+            if LEADING_COMMENT_RE.match(raw):
+                struct_buffer.append(raw)
+        elif in_consts:
+            const_buffer.append(raw)
+        else:
+            # Top-level: look for define-fn directives (must be commented lines)
+            mfn = DEFINE_FN_RE.match(raw)
+            if mfn:
+                sig = mfn.group(1).strip()
+                if sig.startswith('fn '):
+                    sig = sig[len('fn '):].strip()
+                functions.append(sig)
+            else:
+                # Check for commented rust fenced block start
+                if LEADING_COMMENT_RE.match(raw):
+                    stripped = strip_leading_semicolons(raw)
+                    if RUST_FENCE_RE.match(stripped):
+                        # start collecting rust block until a closing fence is found
+                        block_lines: List[str] = []
+                        i += 1
+                        while i < n:
+                            cur = lines[i]
+                            # If it's a commented fence closing, stop
+                            if LEADING_COMMENT_RE.match(cur):
+                                inner_stripped = strip_leading_semicolons(cur)
+                                if RUST_FENCE_RE.match(inner_stripped):
+                                    break
+                                # otherwise, this is a commented code line; strip leading semicolons and append
+                                block_lines.append(strip_leading_semicolons(cur))
+                            else:
+                                # If it's an uncommented line inside the block, include as-is (trim newline)
+                                block_lines.append(cur.rstrip('\n'))
+                            i += 1
+                        rust_blocks.append(block_lines)
+                        # advance past the closing fence line if present
+                        # current i points at closing fence or EOF; advance one to continue main loop
+                        i += 1
+                        continue  # continue outer loop without incrementing i further
+        i += 1
+
+    return {
+        'functions': functions,
+        'consts': consts,
+        'structs': structs,
+        'rust_blocks': rust_blocks,
+    }
+
+
+def render_rust(function_sigs: List[str], consts: List[Tuple[str, str]],
+                structs: List[Tuple[str, List[str]]], rust_blocks: List[List[str]]) -> str:
+    parts: List[str] = []
+    parts.append('#![allow(non_camel_case_types, dead_code, non_upper_case_globals, improper_ctypes)]')
+    parts.append('// Auto-generated Rust bindings from assembly source\n')
+
+    # Functions: wrap in single extern "C" block if any
+    if function_sigs:
+        parts.append('unsafe extern "C" {')
+        for sig in function_sigs:
+            parts.append(f'    pub unsafe fn {sig};')
+        parts.append('}')
+        parts.append('')  # blank line
+
+    # Consts
+    for name, value in consts:
+        parts.append(f'pub const {name}: u32 = {value};')
+    if consts:
+        parts.append('')
+
+    # Structs
+    for name, field_lines in structs:
+        parts.append(format_rust_struct(name, field_lines))
+        parts.append('')  # blank line between structs
+
+    # Rust blocks copied verbatim (these are already uncommented and fence-less)
+    for block in rust_blocks:
+        # Ensure there's a blank line before inserted blocks for separation
+        if parts and parts[-1] != '':
+            parts.append('')
+        # append each line exactly as collected
+        parts.extend(line.rstrip('\n') for line in block)
+        parts.append('')  # trailing blank line after block
+
+    # Trim trailing blank lines
+    while parts and parts[-1] == '':
+        parts.pop()
+
+    return '\n'.join(parts) + '\n' if parts else ''
+
+
+def main(argv=None):
+    parser = argparse.ArgumentParser(description='Parse assembly files and emit Rust externs, consts, struct defs, and commented ```rust``` blocks.')
+    parser.add_argument('inputs', metavar='INPUT', type=Path, nargs='+', help='assembly source files to parse')
+    group = parser.add_mutually_exclusive_group()
+    group.add_argument('-o', '--out', type=Path, help='write combined Rust to this file (default stdout)')
+    group.add_argument('-d', '--out-dir', type=Path, help='write one .rs file per input into this directory')
+    args = parser.parse_args(argv)
+
+    combined_functions: List[str] = []
+    combined_consts: List[Tuple[str, str]] = []
+    combined_structs: List[Tuple[str, List[str]]] = []
+    combined_rust_blocks: List[List[str]] = []
+
+    per_file_output: Dict[Path, str] = {}
+
+    for inp in args.inputs:
+        if not inp.exists():
+            print(f'warning: input file {inp} does not exist, skipping', file=sys.stderr)
+            continue
+        parsed = parse_file(inp)
+        rust_src = render_rust(parsed['functions'], parsed['consts'], parsed['structs'], parsed['rust_blocks'])
+        per_file_output[inp] = rust_src
+
+        combined_functions.extend(parsed['functions'])
+        combined_consts.extend(parsed['consts'])
+        combined_structs.extend(parsed['structs'])
+        combined_rust_blocks.extend(parsed['rust_blocks'])
+
+    if args.out_dir:
+        outdir = args.out_dir
+        outdir.mkdir(parents=True, exist_ok=True)
+        for inp, src in per_file_output.items():
+            outpath = outdir / (inp.stem + '.rs')
+            with outpath.open('w', encoding='utf-8') as f:
+                f.write(src)
+            print(f'Wrote {outpath}', file=sys.stderr)
+        return 0
+
+    combined_src = render_rust(combined_functions, combined_consts, combined_structs, combined_rust_blocks)
+
+    if args.out:
+        with args.out.open('w', encoding='utf-8') as f:
+            f.write(combined_src)
+        print(f'Wrote {args.out}', file=sys.stderr)
+    else:
+        sys.stdout.write(combined_src)
+
+    return 0
+
+
+if __name__ == '__main__':
+    raise SystemExit(main())
--- a/lang/tests/ast.rs
+++ b/lang/tests/ast.rs
@ -0,0 +1,66 @@
+#[path = "shared/shared.rs"]
+mod util;
+
+unsafe extern "C" {
+    unsafe fn bump_init();
+
+    unsafe fn tokeniser_init_buf(bytes: *const u8, len: usize) -> ();
+}
+
+use util::defs::{parse_expr, Ast, AstNode};
+
+fn main() {
+    unsafe {
+        bump_init();
+    }
+    println!("Bump allocator initialized.");
+
+    let src = b"3 + 4";
+
+    unsafe {
+        tokeniser_init_buf(src.as_ptr(), src.len());
+        let mut ast = Ast {
+            nodes: util::vec::Vec::new(),
+        };
+        let expr_id = parse_expr(&mut ast);
+        println!("Parsed expression with ID: {}", expr_id);
+        println!("{:#}", &ast);
+    }
+}
+
+impl std::fmt::Display for AstNode {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        use util::defs::{BinaryExpr, AST_BINARY_OP, AST_NUMBER};
+        match self.kind as u32 {
+            AST_NUMBER => {
+                write!(f, "Number({})", self.data as usize)
+            }
+            AST_BINARY_OP => {
+                let BinaryExpr {
+                    left,
+                    operator,
+                    right,
+                } = unsafe { self.data.cast::<util::defs::BinaryExpr>().read() };
+                write!(
+                    f,
+                    "BinaryOp(op: {}, left: {}, right: {})",
+                    operator, left, right
+                )
+            }
+            _ => write!(f, "UnknownNode"),
+        }
+    }
+}
+
+impl core::fmt::Display for Ast {
+    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+        writeln!(f, "[")?;
+        for (i, item) in self.nodes.as_slice().iter().enumerate() {
+            if i > 0 {
+                writeln!(f, ", ")?;
+            }
+            write!(f, "\t{i}: {}", item)?;
+        }
+        writeln!(f, "\n]")
+    }
+}
--- a/lang/tests/bump.rs
+++ b/lang/tests/bump.rs
@ -1,9 +1,7 @@
 #![feature(allocator_api, box_as_ptr)]

-#[unsafe(no_mangle)]
-extern "C" fn panic() -> ! {
-    panic!("Called panic from external code.");
-}
+#[path = "shared/shared.rs"]
+mod util;

 unsafe extern "C" {
    unsafe fn bump_init();
--- a/lang/tests/int_to_str.rs
+++ b/lang/tests/int_to_str.rs
@ -1,22 +1,7 @@
-#[unsafe(no_mangle)]
-extern "C" fn panic() -> ! {
-    panic!("Called panic from external code.");
-}
+#[path = "shared/shared.rs"]
+mod util;

-#[repr(C)]
-struct FFISlice {
-    ptr: *const u8,
-    len: usize,
-}
-
-impl FFISlice {
-    fn as_slice(&self) -> &[u8] {
-        unsafe { core::slice::from_raw_parts(self.ptr, self.len) }
-    }
-    fn as_str(&self) -> &str {
-        unsafe { core::str::from_utf8_unchecked(self.as_slice()) }
-    }
-}
+use util::FFISlice;

 unsafe extern "C" {
    unsafe fn int_to_str2(value: isize, buffer: *mut u8, buffer_len: usize, radix: u8) -> FFISlice;
--- a/lang/tests/shared/defs.rs
+++ b/lang/tests/shared/defs.rs
@ -0,0 +1,104 @@
+#![allow(non_camel_case_types, dead_code, non_upper_case_globals, improper_ctypes)]
+// Auto-generated Rust bindings from assembly source
+
+unsafe extern "C" {
+    pub unsafe fn parse_func(ast: *mut Ast) -> u64;
+    pub unsafe fn parse_args(ast: *mut Ast) -> (*const Argument, usize);
+    pub unsafe fn parse_primary_expr(ast: *mut Ast) -> u64;
+    pub unsafe fn parse_binary_expr(ast: *mut Ast, precedence: u8) -> u64;
+    pub unsafe fn parse_expr(ast: *mut Ast) -> u64;
+    pub unsafe fn parse_statement(ast: *mut Ast) -> u64;
+    pub unsafe fn parse_block(ast: *mut Ast) -> u64;
+    pub unsafe fn parse_type(ast: *mut Ast) -> Type;
+}
+
+pub const AST_FUNCTION: u32 = 1;
+pub const AST_BLOCK: u32 = 2;
+pub const AST_VARIABLE: u32 = 3;
+pub const AST_NUMBER: u32 = 4;
+pub const AST_BINARY_OP: u32 = 5;
+pub const AST_RETURN_STATEMENT: u32 = 6;
+pub const TYPE_VOID: u32 = 1;
+pub const TYPE_BOOL: u32 = 2;
+pub const TYPE_I32: u32 = 3;
+pub const TYPE_U32: u32 = 4;
+pub const TYPE_STR: u32 = 5;
+pub const TOKEN_EOF: u32 = 0;
+pub const TOKEN_LET: u32 = 1;
+pub const TOKEN_IF: u32 = 2;
+pub const TOKEN_ELSE: u32 = 3;
+pub const TOKEN_FN: u32 = 4;
+pub const TOKEN_RETURN: u32 = 5;
+pub const TOKEN_LOOP: u32 = 6;
+pub const TOKEN_BREAK: u32 = 7;
+pub const TOKEN_CONTINUE: u32 = 8;
+pub const TOKEN_TRUE: u32 = 9;
+pub const TOKEN_FALSE: u32 = 10;
+pub const TOKEN_BOOL: u32 = 11;
+pub const TOKEN_ARROW: u32 = 12;
+pub const TOKEN_I32: u32 = 13;
+pub const TOKEN_U32: u32 = 14;
+pub const TOKEN_EQUALS: u32 = 15;
+pub const TOKEN_PLUS: u32 = 16;
+pub const TOKEN_MINUS: u32 = 17;
+pub const TOKEN_RPARENS: u32 = 18;
+pub const TOKEN_LPARENS: u32 = 19;
+pub const TOKEN_RBRACE: u32 = 20;
+pub const TOKEN_LBRACE: u32 = 21;
+pub const TOKEN_COLON: u32 = 22;
+pub const TOKEN_SEMI: u32 = 23;
+pub const TOKEN_COMMA: u32 = 24;
+pub const TOKEN_PIPE: u32 = 25;
+pub const TOKEN_AMP: u32 = 26;
+pub const TOKEN_EQEQ: u32 = 27;
+pub const TOKEN_LBRACKET: u32 = 28;
+pub const TOKEN_RBRACKET: u32 = 29;
+pub const TOKEN_VOID: u32 = 30;
+pub const TOKEN_IDENT: u32 = 31;
+pub const TOKEN_NUMBER: u32 = 32;
+pub const TOKEN_STRING: u32 = 33;
+pub const TOKEN_COMMENT: u32 = 34;
+
+#[repr(C)]
+#[derive(Debug)]
+pub struct Ast {
+    pub nodes: Vec<AstNode>,
+}
+
+#[repr(C)]
+#[derive(Debug)]
+pub struct AstNode {
+    pub kind: u8,
+    pub data: *const (),
+}
+
+#[repr(C)]
+#[derive(Debug)]
+pub struct Argument {
+    pub name: *const u8,
+    pub name_len: usize,
+    pub arg_type: Type,
+}
+
+#[repr(C)]
+#[derive(Debug)]
+pub struct Type {
+    pub kind: u8,
+}
+
+#[repr(C)]
+#[derive(Debug)]
+pub struct BinaryExpr {
+    pub left: u64,
+    pub operator: u8,
+    pub right: u64,
+}
+
+#[repr(C)]
+#[derive(Debug)]
+pub struct Block {
+    pub statements: *const u64,
+    pub statements_len: usize,
+}
+
+use super::vec::Vec;
--- a/lang/tests/shared/shared.rs
+++ b/lang/tests/shared/shared.rs
@ -0,0 +1,355 @@
+#![allow(dead_code)]
+
+#[path = "defs.rs"]
+pub mod defs;
+
+#[inline(never)]
+fn __do_panic() -> ! {
+    panic!("Called panic from external code.");
+}
+
+#[unsafe(no_mangle)]
+extern "C" fn panic() -> ! {
+    __do_panic()
+}
+
+#[repr(C)]
+#[derive(Debug, PartialEq, Eq)]
+pub struct FFISlice {
+    pub ptr: *const u8,
+    pub len: usize,
+}
+
+#[repr(transparent)]
+#[derive(Debug, PartialEq, Eq)]
+pub struct MaybeFFISlice {
+    inner: FFISlice,
+}
+
+impl MaybeFFISlice {
+    pub fn is_none(&self) -> bool {
+        self.inner.ptr.is_null()
+    }
+
+    pub fn into_option(self) -> Option<FFISlice> {
+        if self.is_none() {
+            None
+        } else {
+            Some(self.inner)
+        }
+    }
+}
+
+impl FFISlice {
+    pub unsafe fn as_slice<T: Sized>(&self) -> &[T] {
+        unsafe { core::slice::from_raw_parts(self.ptr.cast(), self.len) }
+    }
+    pub unsafe fn as_bytes(&self) -> &[u8] {
+        unsafe { core::slice::from_raw_parts(self.ptr, self.len) }
+    }
+    pub unsafe fn as_str(&self) -> &str {
+        unsafe { core::str::from_utf8_unchecked(self.as_bytes()) }
+    }
+}
+
+#[repr(C)]
+#[derive(Debug)]
+pub struct BlobVec {
+    pub data: *mut u8,
+    pub len: usize,
+    pub cap: usize,
+    pub elem_size: usize,
+    pub drop: Option<extern "C" fn(*mut u8)>,
+}
+
+impl Default for BlobVec {
+    fn default() -> Self {
+        Self {
+            data: core::ptr::null_mut(),
+            len: 0,
+            cap: 0,
+            elem_size: 0,
+            drop: None,
+        }
+    }
+}
+
+unsafe impl Send for BlobVec {}
+unsafe impl Sync for BlobVec {}
+
+pub mod vec {
+    #![allow(dead_code)]
+
+    use super::ffi::*;
+    use super::*;
+
+    #[repr(transparent)]
+    #[derive(Debug)]
+    pub struct Vec<T> {
+        pub vec: BlobVec,
+        _marker: core::marker::PhantomData<T>,
+    }
+
+    impl<T> Vec<T> {
+        pub fn new() -> Self {
+            Self::new_with(32)
+        }
+
+        pub fn new_with(capacity: usize) -> Self {
+            let mut vec = BlobVec {
+                data: core::ptr::null_mut(),
+                len: 0,
+                cap: 0,
+                elem_size: 0,
+                drop: None,
+            };
+
+            extern "C" fn drop_fn<T>(ptr: *mut u8) {
+                unsafe {
+                    core::ptr::drop_in_place::<T>(ptr as *mut T);
+                }
+            }
+
+            unsafe {
+                vec_init_with(
+                    &mut vec,
+                    core::mem::size_of::<T>(),
+                    Some(drop_fn::<T>),
+                    capacity,
+                );
+            }
+
+            Self {
+                vec,
+                _marker: core::marker::PhantomData,
+            }
+        }
+
+        pub fn as_slice(&self) -> &[T] {
+            assert_eq!(self.vec.elem_size, core::mem::size_of::<T>());
+            unsafe { core::slice::from_raw_parts(self.vec.data as *const T, self.vec.len) }
+        }
+
+        pub fn as_slice_mut(&mut self) -> &mut [T] {
+            assert_eq!(self.vec.elem_size, core::mem::size_of::<T>());
+            unsafe { core::slice::from_raw_parts_mut(self.vec.data as *mut T, self.vec.len) }
+        }
+
+        pub fn push(&mut self, value: T) {
+            let value = core::mem::ManuallyDrop::new(value);
+            unsafe {
+                vec_push(&mut self.vec, &raw const value as *const T as *const u8);
+            }
+        }
+
+        pub fn insert(&mut self, value: T, index: usize) {
+            if index > self.vec.len {
+                return;
+            }
+            let value = core::mem::ManuallyDrop::new(value);
+            unsafe {
+                vec_insert(
+                    &mut self.vec,
+                    index,
+                    &raw const value as *const T as *const u8,
+                );
+            }
+        }
+
+        pub fn pop(&mut self) -> Option<T> {
+            if self.vec.len == 0 {
+                return None;
+            }
+            unsafe {
+                let ptr = vec_get(&mut self.vec, self.vec.len - 1) as *mut T;
+                let value = ptr.read();
+                vec_pop(&mut self.vec);
+                Some(value)
+            }
+        }
+
+        pub fn get(&self, index: usize) -> Option<&T> {
+            if index >= self.vec.len {
+                return None;
+            }
+            unsafe {
+                let ptr = vec_get(&raw const self.vec as *mut _, index) as *mut T;
+                Some(&*ptr)
+            }
+        }
+
+        pub fn get_mut(&mut self, index: usize) -> Option<&mut T> {
+            if index >= self.vec.len {
+                return None;
+            }
+            unsafe {
+                let ptr = vec_get(&raw mut self.vec, index) as *mut T;
+                Some(&mut *ptr)
+            }
+        }
+
+        pub fn remove(&mut self, index: usize) {
+            if index >= self.vec.len {
+                return;
+            }
+            unsafe {
+                vec_remove(&mut self.vec, index);
+            }
+        }
+
+        pub fn len(&self) -> usize {
+            self.vec.len
+        }
+
+        pub fn position<F>(&self, elem: &T, mut cmp: F) -> Option<usize>
+        where
+            F: FnMut(&T, &T) -> bool,
+        {
+            extern "C" fn cmp_trampoline<T, F: FnMut(&T, &T) -> bool>(
+                f: *const (),
+                a: *const u8,
+                b: *const u8,
+            ) -> bool {
+                let f = unsafe { &mut *(f as *mut F) };
+                let a = unsafe { &*(a as *const T) };
+                let b = unsafe { &*(b as *const T) };
+                f(a, b)
+            }
+
+            unsafe {
+                let index = vec_find(
+                    &raw const self.vec as *mut _,
+                    elem as *const T as *const u8,
+                    cmp_trampoline::<T, F>,
+                    &raw mut cmp as *mut F as *mut (),
+                );
+                if index == usize::MAX {
+                    None
+                } else {
+                    Some(index)
+                }
+            }
+        }
+
+        pub fn binary_search_by<F>(&self, elem: &T, mut cmp: F) -> Result<usize, usize>
+        where
+            F: FnMut(&T, &T) -> i32,
+        {
+            extern "C" fn cmp_trampoline<T, F: FnMut(&T, &T) -> i32>(
+                f: *const (),
+                a: *const u8,
+                b: *const u8,
+            ) -> i32 {
+                let f = unsafe { &mut *(f as *mut F) };
+                let a = unsafe { &*(a as *const T) };
+                let b = unsafe { &*(b as *const T) };
+                f(a, b)
+            }
+
+            unsafe {
+                let (index, vacant) = vec_binary_search_by(
+                    &raw const self.vec as *mut _,
+                    elem as *const T as *const u8,
+                    cmp_trampoline::<T, F>,
+                    &raw mut cmp as *mut F as *mut (),
+                );
+                if vacant {
+                    Err(index)
+                } else {
+                    Ok(index)
+                }
+            }
+        }
+
+        pub fn insert_sorted<F>(&self, elem: T, mut cmp: F) -> Result<usize, usize>
+        where
+            F: FnMut(&T, &T) -> i32,
+        {
+            extern "C" fn cmp_trampoline<T, F: FnMut(&T, &T) -> i32>(
+                f: *const (),
+                a: *const u8,
+                b: *const u8,
+            ) -> i32 {
+                let f = unsafe { &mut *(f as *mut F) };
+                let a = unsafe { &*(a as *const T) };
+                let b = unsafe { &*(b as *const T) };
+                f(a, b)
+            }
+
+            let mut elem = core::mem::ManuallyDrop::new(elem);
+
+            unsafe {
+                let (index, _inserted) = vec_insert_sorted(
+                    &raw const self.vec as *mut _,
+                    &raw mut elem as *const u8,
+                    cmp_trampoline::<T, F>,
+                    &raw mut cmp as *mut F as *mut (),
+                );
+                Ok(index)
+            }
+        }
+    }
+}
+
+pub mod ffi {
+    #![allow(improper_ctypes)]
+    use super::*;
+
+    #[allow(dead_code)]
+    unsafe extern "C" {
+        pub unsafe fn vec_init(
+            vec: *mut BlobVec,
+            elem_size: usize,
+            drop: Option<extern "C" fn(*mut u8)>,
+        );
+        pub unsafe fn vec_init_with(
+            vec: *mut BlobVec,
+            elem_size: usize,
+            drop: Option<extern "C" fn(*mut u8)>,
+            cap: usize,
+        );
+        pub unsafe fn vec_push(vec: *mut BlobVec, elem: *const u8);
+        pub unsafe fn vec_insert(vec: *mut BlobVec, index: usize, elem: *const u8);
+        pub unsafe fn vec_pop(vec: *mut BlobVec);
+        pub unsafe fn vec_drop_last(vec: *mut BlobVec);
+        pub unsafe fn vec_get(vec: *mut BlobVec, index: usize) -> *mut u8;
+
+        pub unsafe fn vec_remove(vec: *mut BlobVec, index: usize);
+        pub unsafe fn vec_drop(vec: *mut BlobVec);
+
+        pub unsafe fn vec_find(
+            vec: *mut BlobVec,
+            elem: *const u8,
+            cmp: extern "C" fn(*const (), *const u8, *const u8) -> bool,
+            cmp_data: *mut (),
+        ) -> usize;
+
+        pub unsafe fn vec_binary_search_by(
+            vec: *mut BlobVec,
+            elem: *const u8,
+            cmp: extern "C" fn(*const (), *const u8, *const u8) -> i32,
+            cmp_data: *mut (),
+        ) -> (usize, bool);
+        pub unsafe fn vec_insert_sorted(
+            vec: *mut BlobVec,
+            elem: *const u8,
+            cmp: extern "C" fn(*const (), *const u8, *const u8) -> i32,
+            cmp_data: *mut (),
+        ) -> (usize, bool);
+    }
+}
+
+pub struct DisplaySlice<'a, T>(pub &'a [T]);
+
+impl<'a, T: core::fmt::Display> core::fmt::Display for DisplaySlice<'a, T> {
+    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+        write!(f, "[")?;
+        for (i, item) in self.0.iter().enumerate() {
+            if i > 0 {
+                write!(f, ", ")?;
+            }
+            write!(f, "{}", item)?;
+        }
+        write!(f, "]")
+    }
+}
--- a/lang/tests/tokens.rs
+++ b/lang/tests/tokens.rs
@ -1,7 +1,7 @@
-#[unsafe(no_mangle)]
-extern "C" fn panic() -> ! {
-    panic!("Called panic from external code.");
-}
+#[path = "shared/shared.rs"]
+mod util;
+
+use util::*;

 #[derive(Debug)]
 struct Lexeme(u8, &'static str);
@ -18,12 +18,6 @@ impl PartialEq for Lexeme {

 impl Eq for Lexeme {}

-impl Lexeme {
-    fn lex(&self) -> &'static str {
-        self.1
-    }
-}
-
 trait AsLexeme {
    fn as_lexeme(self) -> Option<Lexeme>;
 }
@ -49,12 +43,15 @@ impl AsLexeme for LexemeRaw {
 #[allow(dead_code)]
 unsafe extern "C" {
    unsafe fn tokeniser_init(path: *const i8) -> ();
+    unsafe fn tokeniser_init_buf(bytes: *const u8, len: usize) -> ();
    unsafe fn tokeniser_print() -> ();
    unsafe fn is_ident(len: usize) -> bool;
    unsafe fn is_number(len: usize) -> bool;
    unsafe fn skip_whitespace() -> ();

    unsafe fn find_lexeme() -> LexemeRaw;
+    unsafe fn expect_token(token: u8) -> MaybeFFISlice;
+    unsafe fn unwrap_token(token: u8) -> FFISlice;

    static mut LEXEMES: *const u8;
    static mut LEXEME_LENS: usize;
@ -137,16 +134,16 @@ fn main() {
        assert_eq!(
            &collect_tokens()[..],
            &[
-                Lexeme(30, "this-is-an-ident"),
-                Lexeme(30, "another_ident123"),
-                Lexeme(30, "_underscore_test"),
-                Lexeme(30, "mixedCASEIdent"),
-                Lexeme(30, "number12345"),
-                Lexeme(30, "____"),
-                Lexeme(30, "_"),
+                Lexeme(31, "this-is-an-ident"),
+                Lexeme(31, "another_ident123"),
+                Lexeme(31, "_underscore_test"),
+                Lexeme(31, "mixedCASEIdent"),
+                Lexeme(31, "number12345"),
+                Lexeme(31, "____"),
+                Lexeme(31, "_"),
                Lexeme(17, ""),
-                Lexeme(30, "leading-minus"),
-                Lexeme(30, "trailing-minus-"),
+                Lexeme(31, "leading-minus"),
+                Lexeme(31, "trailing-minus-"),
            ]
        );

@ -158,7 +155,7 @@ fn main() {
            &collect_tokens()[..],
            &[
                Lexeme(4, ""),
-                Lexeme(30, "my-function"),
+                Lexeme(31, "my-function"),
                Lexeme(19, ""),
                Lexeme(18, ""),
                Lexeme(12, ""),
@ -171,6 +168,14 @@ fn main() {
            ]
        );

+        eprint!("Initializing tokeniser.. ");
+        tokeniser_init(c"tests/tokens/function.l".as_ptr());
+        eprintln!("ok.");
+
+        assert_eq!(expect_token(2).into_option(), None);
+        assert_eq!(expect_token(4).into_option().unwrap().as_str(), "fn");
+        assert_eq!(unwrap_token(31).as_str(), "my-function");
+
        eprint!("Initializing tokeniser.. ");
        tokeniser_init(c"tests/tokens/comment.l".as_ptr());
        eprintln!("ok.");
@ -178,15 +183,15 @@ fn main() {
        assert_eq!(
            &collect_tokens()[..],
            &[
-                Lexeme(33, ""),
+                Lexeme(34, ""),
                Lexeme(4, ""),
-                Lexeme(30, "my-function"),
+                Lexeme(31, "my-function"),
                Lexeme(19, ""),
                Lexeme(18, ""),
                Lexeme(12, ""),
                Lexeme(11, ""),
                Lexeme(21, ""),
-                Lexeme(33, ""),
+                Lexeme(34, ""),
                Lexeme(5, ""),
                Lexeme(10, ""),
                Lexeme(23, ""),
@ -201,11 +206,11 @@ fn main() {
        assert_eq!(
            &collect_tokens()[..],
            &[
-                Lexeme(31, "1234"),
-                Lexeme(31, "123_345_"),
-                Lexeme(31, "1234____56"),
-                Lexeme(31, "1"),
-                Lexeme(31, "0"),
+                Lexeme(32, "1234"),
+                Lexeme(32, "123_345_"),
+                Lexeme(32, "1234____56"),
+                Lexeme(32, "1"),
+                Lexeme(32, "0"),
            ]
        );

@ -216,14 +221,24 @@ fn main() {
        assert_eq!(
            &collect_tokens()[..],
            &[
-                Lexeme(32, "\"this is a string\""),
-                Lexeme(32, "\"another\nstring\nspanning multiple\n   lines\""),
-                Lexeme(32, "\"string with a \\\"quoted\\\" word\""),
-                Lexeme(32, "\"a\""),
-                Lexeme(32, "\"\"")
+                Lexeme(33, "\"this is a string\""),
+                Lexeme(33, "\"another\nstring\nspanning multiple\n   lines\""),
+                Lexeme(33, "\"string with a \\\"quoted\\\" word\""),
+                Lexeme(33, "\"a\""),
+                Lexeme(33, "\"\"")
            ],
        );

+        eprint!("Initializing tokeniser.. ");
+        let src = b"3 + 4";
+        tokeniser_init_buf(src.as_ptr(), src.len());
+        eprintln!("ok.");
+
+        assert_eq!(
+            &collect_tokens()[..],
+            &[Lexeme(32, "3"), Lexeme(16, "+"), Lexeme(32, "4")],
+        );
+
        eprintln!("Finished tokenising.");
    }
 }
--- a/lang/tests/vec.rs
+++ b/lang/tests/vec.rs
@ -1,263 +1,7 @@
-#[repr(C)]
-pub struct BlobVec {
-    pub data: *mut u8,
-    pub len: usize,
-    pub cap: usize,
-    pub elem_size: usize,
-    pub drop: Option<extern "C" fn(*mut u8)>,
-}
+#[path = "shared/shared.rs"]
+mod util;

-struct VecT<T> {
-    vec: BlobVec,
-    _marker: core::marker::PhantomData<T>,
-}
-
-impl<T> VecT<T> {
-    fn new() -> Self {
-        Self::new_with(32)
-    }
-
-    fn new_with(capacity: usize) -> Self {
-        let mut vec = BlobVec {
-            data: core::ptr::null_mut(),
-            len: 0,
-            cap: 0,
-            elem_size: 0,
-            drop: None,
-        };
-
-        extern "C" fn drop_fn<T>(ptr: *mut u8) {
-            unsafe {
-                core::ptr::drop_in_place::<T>(ptr as *mut T);
-            }
-        }
-
-        unsafe {
-            vec_init_with(
-                &mut vec,
-                core::mem::size_of::<T>(),
-                Some(drop_fn::<T>),
-                capacity,
-            );
-        }
-
-        Self {
-            vec,
-            _marker: core::marker::PhantomData,
-        }
-    }
-
-    fn as_slice(&self) -> &[T] {
-        assert_eq!(self.vec.elem_size, core::mem::size_of::<T>());
-        unsafe { core::slice::from_raw_parts(self.vec.data as *const T, self.vec.len) }
-    }
-
-    fn as_slice_mut(&mut self) -> &mut [T] {
-        assert_eq!(self.vec.elem_size, core::mem::size_of::<T>());
-        unsafe { core::slice::from_raw_parts_mut(self.vec.data as *mut T, self.vec.len) }
-    }
-
-    fn push(&mut self, value: T) {
-        let value = core::mem::ManuallyDrop::new(value);
-        unsafe {
-            vec_push(&mut self.vec, &raw const value as *const T as *const u8);
-        }
-    }
-
-    fn insert(&mut self, value: T, index: usize) {
-        if index > self.vec.len {
-            return;
-        }
-        let value = core::mem::ManuallyDrop::new(value);
-        unsafe {
-            vec_insert(
-                &mut self.vec,
-                index,
-                &raw const value as *const T as *const u8,
-            );
-        }
-    }
-
-    fn pop(&mut self) -> Option<T> {
-        if self.vec.len == 0 {
-            return None;
-        }
-        unsafe {
-            let ptr = vec_get(&mut self.vec, self.vec.len - 1) as *mut T;
-            let value = ptr.read();
-            vec_pop(&mut self.vec);
-            Some(value)
-        }
-    }
-
-    fn get(&self, index: usize) -> Option<&T> {
-        if index >= self.vec.len {
-            return None;
-        }
-        unsafe {
-            let ptr = vec_get(&raw const self.vec as *mut _, index) as *mut T;
-            Some(&*ptr)
-        }
-    }
-    fn get_mut(&mut self, index: usize) -> Option<&mut T> {
-        if index >= self.vec.len {
-            return None;
-        }
-        unsafe {
-            let ptr = vec_get(&raw mut self.vec, index) as *mut T;
-            Some(&mut *ptr)
-        }
-    }
-
-    fn remove(&mut self, index: usize) {
-        if index >= self.vec.len {
-            return;
-        }
-        unsafe {
-            vec_remove(&mut self.vec, index);
-        }
-    }
-
-    fn len(&self) -> usize {
-        self.vec.len
-    }
-
-    fn position<F>(&self, elem: &T, mut cmp: F) -> Option<usize>
-    where
-        F: FnMut(&T, &T) -> bool,
-    {
-        extern "C" fn cmp_trampoline<T, F: FnMut(&T, &T) -> bool>(
-            f: *const (),
-            a: *const u8,
-            b: *const u8,
-        ) -> bool {
-            let f = unsafe { &mut *(f as *mut F) };
-            let a = unsafe { &*(a as *const T) };
-            let b = unsafe { &*(b as *const T) };
-            f(a, b)
-        }
-
-        unsafe {
-            let index = vec_find(
-                &raw const self.vec as *mut _,
-                elem as *const T as *const u8,
-                cmp_trampoline::<T, F>,
-                &raw mut cmp as *mut F as *mut (),
-            );
-            if index == usize::MAX {
-                None
-            } else {
-                Some(index)
-            }
-        }
-    }
-
-    fn binary_search_by<F>(&self, elem: &T, mut cmp: F) -> Result<usize, usize>
-    where
-        F: FnMut(&T, &T) -> i32,
-    {
-        extern "C" fn cmp_trampoline<T, F: FnMut(&T, &T) -> i32>(
-            f: *const (),
-            a: *const u8,
-            b: *const u8,
-        ) -> i32 {
-            let f = unsafe { &mut *(f as *mut F) };
-            let a = unsafe { &*(a as *const T) };
-            let b = unsafe { &*(b as *const T) };
-            f(a, b)
-        }
-
-        unsafe {
-            let (index, vacant) = vec_binary_search_by(
-                &raw const self.vec as *mut _,
-                elem as *const T as *const u8,
-                cmp_trampoline::<T, F>,
-                &raw mut cmp as *mut F as *mut (),
-            );
-            if vacant {
-                Err(index)
-            } else {
-                Ok(index)
-            }
-        }
-    }
-
-    fn insert_sorted<F>(&self, elem: T, mut cmp: F) -> Result<usize, usize>
-    where
-        F: FnMut(&T, &T) -> i32,
-    {
-        extern "C" fn cmp_trampoline<T, F: FnMut(&T, &T) -> i32>(
-            f: *const (),
-            a: *const u8,
-            b: *const u8,
-        ) -> i32 {
-            let f = unsafe { &mut *(f as *mut F) };
-            let a = unsafe { &*(a as *const T) };
-            let b = unsafe { &*(b as *const T) };
-            f(a, b)
-        }
-
-        let mut elem = core::mem::ManuallyDrop::new(elem);
-
-        unsafe {
-            let (index, inserted) = vec_insert_sorted(
-                &raw const self.vec as *mut _,
-                &raw const elem as *const u8,
-                cmp_trampoline::<T, F>,
-                &raw mut cmp as *mut F as *mut (),
-            );
-            Ok(index)
-        }
-    }
-}
-
-#[unsafe(no_mangle)]
-extern "C" fn panic() -> ! {
-    panic!("Called panic from external code.");
-}
-
-unsafe impl Send for BlobVec {}
-unsafe impl Sync for BlobVec {}
-
-unsafe extern "C" {
-    unsafe fn vec_init(vec: *mut BlobVec, elem_size: usize, drop: Option<extern "C" fn(*mut u8)>);
-    unsafe fn vec_init_with(
-        vec: *mut BlobVec,
-        elem_size: usize,
-        drop: Option<extern "C" fn(*mut u8)>,
-        cap: usize,
-    );
-    unsafe fn vec_push(vec: *mut BlobVec, elem: *const u8);
-    unsafe fn vec_insert(vec: *mut BlobVec, index: usize, elem: *const u8);
-    unsafe fn vec_pop(vec: *mut BlobVec);
-    unsafe fn vec_drop_last(vec: *mut BlobVec);
-    unsafe fn vec_get(vec: *mut BlobVec, index: usize) -> *mut u8;
-
-    #[allow(dead_code)]
-    unsafe fn vec_remove(vec: *mut BlobVec, index: usize);
-    #[allow(dead_code)]
-    unsafe fn vec_drop(vec: *mut BlobVec);
-
-    unsafe fn vec_find(
-        vec: *mut BlobVec,
-        elem: *const u8,
-        cmp: extern "C" fn(*const (), *const u8, *const u8) -> bool,
-        cmp_data: *mut (),
-    ) -> usize;
-
-    unsafe fn vec_binary_search_by(
-        vec: *mut BlobVec,
-        elem: *const u8,
-        cmp: extern "C" fn(*const (), *const u8, *const u8) -> i32,
-        cmp_data: *mut (),
-    ) -> (usize, bool);
-    unsafe fn vec_insert_sorted(
-        vec: *mut BlobVec,
-        elem: *const u8,
-        cmp: extern "C" fn(*const (), *const u8, *const u8) -> i32,
-        cmp_data: *mut (),
-    ) -> (usize, bool);
-}
+use util::{ffi::*, vec::Vec, BlobVec};

 fn main() {
    static mut DROPS: usize = 1;
@ -318,7 +62,7 @@ fn main() {
        eprintln!("Push/pop test passed\n");
    }

-    let mut vec = VecT::<u32>::new_with(100);
+    let mut vec = Vec::<u32>::new_with(100);
    assert_eq!(vec.len(), 0);
    vec.push(10);
    vec.push(20);
@ -358,6 +102,6 @@ fn main() {
    assert_eq!(vec.binary_search_by(&5, cmp), Err(0));
    assert_eq!(vec.binary_search_by(&55, cmp), Err(4));

-    vec.insert_sorted(35, cmp);
+    _ = vec.insert_sorted(35, cmp);
    assert_eq!(vec.as_slice(), &[20, 30, 35, 40, 50]);
 }
Author	SHA1	Message	Date
janis	8f4d626968	ast tests	2025-10-29 22:10:34 +01:00
janis	4e55fa74f4	parse structs and functions from asm for rust tests	2025-10-29 22:10:14 +01:00
janis	5ae3e17693	initial ast	2025-10-29 20:39:32 +01:00
janis	bf9d07b462	init tokeniser with buffer	2025-10-29 20:39:22 +01:00
janis	46053090f4	initial parsing	2025-10-29 16:21:15 +01:00
janis	39e8d6ae96	move defintions out of tokeniser into include file	2025-10-29 16:21:01 +01:00
janis	62751f30ab	move more stuff to shared test file	2025-10-29 16:20:42 +01:00
janis	86bbab90c3	modularise test with shared rust structs add expect/unwrap token methods to tokeniser to aid with parsing	2025-10-29 14:00:17 +01:00