parse structs and functions from asm for rust tests

2025-10-29 22:10:14 +01:00 · 2025-10-29 22:10:14 +01:00 · 4e55fa74f4
parent 5ae3e17693
commit 4e55fa74f4
6 changed files with 497 additions and 46 deletions
--- a/lang/Makefile
+++ b/lang/Makefile
@ -29,8 +29,12 @@ fmt: $(wildcard tests/*.rs)
 	@echo "Formatting test source files..."
 	rustfmt --edition 2024 $^

+tests/shared/defs.rs: $(wildcard src/*)
+	@echo "Generating shared definitions for tests..."
+	./tests/asm_to_rust.py $^ -o $@
+
 # pattern rule: compile each .rs into a binary with the same base name
-$(TARGET_DIR)/tests/%: tests/%.rs | $(OBJ) $(TARGET_DIR)/tests
+$(TARGET_DIR)/tests/%: tests/%.rs tests/shared/defs.rs $(OBJ) | $(TARGET_DIR)/tests
 	@echo "[$(RUSTC)] $< -> $@"
 	rustc -Clink-arg=-fuse-ld=mold --edition=2024 $(OBJ_LINK_ARGS) -g  -o $@ $< 

--- a/lang/src/ast.asm
+++ b/lang/src/ast.asm
@ -3,7 +3,7 @@ default rel
 %include "src/tokeniser.inc"

 section .rdata
-;; start-constants
+;; start-consts
    AST_FUNCTION equ 1
    AST_BLOCK equ 2
    AST_VARIABLE equ 3
@ -16,7 +16,7 @@ section .rdata
    TYPE_I32 equ 3
    TYPE_U32 equ 4
    TYPE_STR equ 5
-;; end-constants
+;; end-consts

 section .text
 extern vec_init_with
@ -47,6 +47,9 @@ global parse_statement
 global parse_block

 ;; start very simple, with only functions and addition
+;; ```rust
+;; use super::vec::Vec;
+;; ```
 ;; start-structs
 ;; struct Ast {
 ;;   nodes: Vec<AstNode>,
@ -189,6 +192,7 @@ parse_args:

 ;; rdi: lexeme ptr
 ;; rsi: lexeme len
+;; fn parse_number(lexeme: *const u8, lexeme_len: usize) -> u64
 parse_number:
    push rbp
    mov rbp, rsp
@ -239,7 +243,7 @@ parse_number:
    call panic

 ;; rdi: *mut Ast
-;; fn parse_expr(ast: *mut Ast) -> u64
+;; define-fn: fn parse_primary_expr(ast: *mut Ast) -> u64
 parse_primary_expr:
    push rbp
    mov rbp, rsp
@ -272,17 +276,19 @@ parse_primary_expr:

 ;; rdi: *mut Ast
 ;; sil: precedence
-;; fn parse_expr(ast: *mut Ast) -> u64
+;; define-fn: fn parse_binary_expr(ast: *mut Ast, precedence: u8) -> u64
 parse_binary_expr:
    push rbp
    mov rbp, rsp

    ; size: 24, align: 8
+    ; start-structs
    ; struct BinaryExpr {
    ;   left: u64,
    ;   operator: u8,
    ;   right: u64,
    ; }
+    ; end-structs

    sub rsp, 64
    ; lexeme: Lexeme [32..56]
@ -351,7 +357,7 @@ parse_binary_expr:


 ;; rdi: *mut Ast
-;; fn parse_expr(ast: *mut Ast) -> u64
+;; define-fn: fn parse_expr(ast: *mut Ast) -> u64
 parse_expr:
    push rbp
    mov rbp, rsp
@ -364,7 +370,7 @@ parse_expr:
    ret

 ;; rdi: *mut Ast
-;; fn parse_statement(ast: *mut Ast) -> u64
+;; define-fn: fn parse_statement(ast: *mut Ast) -> u64
 parse_statement:
    push rbp
    mov rbp, rsp
@ -401,15 +407,18 @@ parse_statement:
    call panic

 ;; rdi: *mut Ast
-;; fn parse_block(ast: *mut Ast) -> u64
+;; define-fn: fn parse_block(ast: *mut Ast) -> u64
 parse_block:
    push rbp
    mov rbp, rsp
    push rdi

+    ; start-structs
    ; struct Block {
-    ;   statements: &[u64],
+    ;   statements: *const u64,
+    ;   statements_len: usize,
    ; }
+    ; end-structs

    sub rsp, 56
    ; statements: Vec<Statement> [0..40]
@ -440,7 +449,7 @@ parse_block:
    ret
    
 ;; rdi: *mut Ast
-;; fn parse_type(ast: *mut Ast) -> Type
+;; define-fn: fn parse_type(ast: *mut Ast) -> Type
 parse_type:
    push rbp
    mov rbp, rsp
--- a/lang/src/tokeniser.inc
+++ b/lang/src/tokeniser.inc
@ -105,106 +105,109 @@ align 8
 NUM_LEXEMES: dq 30

    LEX_NOT_A_LEXEME db "<not a lexeme>", 0
-    TOKEN_EOF       equ 0
-    TOKEN_LET       equ 1
    LEX_LET db "let"
    LEX_LET_len equ $ - LEX_LET
-    TOKEN_IF        equ 2
    LEX_IF db "if"
    LEX_IF_len equ $ - LEX_IF
-    TOKEN_ELSE      equ 3
    LEX_ELSE db "else"
    LEX_ELSE_len equ $ - LEX_ELSE
-    TOKEN_FN        equ 4
    LEX_FN db "fn"
    LEX_FN_len equ $ - LEX_FN
-    TOKEN_RETURN    equ 5
    LEX_RETURN db "return"
    LEX_RETURN_len equ $ - LEX_RETURN
-    TOKEN_LOOP      equ 6
    LEX_LOOP db "loop"
    LEX_LOOP_len equ $ - LEX_LOOP
-    TOKEN_BREAK     equ 7
    LEX_BREAK db "break"
    LEX_BREAK_len equ $ - LEX_BREAK
-    TOKEN_CONTINUE  equ 8
    LEX_CONTINUE db "continue"
    LEX_CONTINUE_len equ $ - LEX_CONTINUE
-    TOKEN_TRUE      equ 9
    LEX_TRUE db "true"
    LEX_TRUE_len equ $ - LEX_TRUE
-    TOKEN_FALSE     equ 10
    LEX_FALSE db "false"
    LEX_FALSE_len equ $ - LEX_FALSE
-    TOKEN_BOOL      equ 11
    LEX_BOOL db "bool"
    LEX_BOOL_len equ $ - LEX_BOOL
-    TOKEN_ARROW     equ 12
    LEX_ARROW db "->"
    LEX_ARROW_len equ $ - LEX_ARROW
-    TOKEN_I32       equ 13
    LEX_I32 db "i32"
    LEX_I32_len equ $ - LEX_I32
-    TOKEN_U32       equ 14
    LEX_U32 db "u32"
    LEX_U32_len equ $ - LEX_U32
-    TOKEN_EQUALS    equ 15
    LEX_EQUALS db "="
    LEX_EQUALS_len equ $ - LEX_EQUALS
-    TOKEN_PLUS      equ 16
    LEX_PLUS db "+"
    LEX_PLUS_len equ $ - LEX_PLUS
-    TOKEN_MINUS     equ 17
    LEX_MINUS db "-"
    LEX_MINUS_len equ $ - LEX_MINUS
-    TOKEN_RPARENS   equ 18
    LEX_RPARENS db ")"
    LEX_RPARENS_len equ $ - LEX_RPARENS
-    TOKEN_LPARENS   equ 19
    LEX_LPARENS db "("
    LEX_LPARENS_len equ $ - LEX_LPARENS
-    TOKEN_RBRACE    equ 20
    LEX_RBRACE db "}"
    LEX_RBRACE_len equ $ - LEX_RBRACE
-    TOKEN_LBRACE    equ 21
    LEX_LBRACE db "{"
    LEX_LBRACE_len equ $ - LEX_LBRACE
-    TOKEN_COLON     equ 22
    LEX_COLON db ":"
    LEX_COLON_len equ $ - LEX_COLON
-    TOKEN_SEMI      equ 23
    LEX_SEMI db ";"
    LEX_SEMI_len equ $ - LEX_SEMI
-    TOKEN_COMMA     equ 24
    LEX_COMMA db ","
    LEX_COMMA_len equ $ - LEX_COMMA
-    TOKEN_PIPE      equ 25
    LEX_PIPE db "|"
    LEX_PIPE_len equ $ - LEX_PIPE
-    TOKEN_AMP       equ 26
    LEX_AMP db "&"
    LEX_AMP_len equ $ - LEX_AMP
-    TOKEN_EQEQ     equ 27
    LEX_EQEQ db "=="
    LEX_EQEQ_len equ $ - LEX_EQEQ
-    TOKEN_LBRACKET  equ 28
    LEX_LBRACKET db "["
    LEX_LBRACKET_len equ $ - LEX_LBRACKET
-    TOKEN_RBRACKET  equ 29
    LEX_RBRACKET db "]"
    LEX_RBRACKET_len equ $ - LEX_RBRACKET
-    TOKEN_VOID equ 30
    LEX_VOID db "void"
    LEX_VOID_len equ $ - LEX_VOID
-    TOKEN_IDENT     equ 31
    LEX_IDENT db "<identifier>"
    LEX_IDENT_len equ $ - LEX_IDENT
-    TOKEN_NUMBER    equ 32
    LEX_NUMBER db "<number>"
    LEX_NUMBER_len equ $ - LEX_NUMBER
-    TOKEN_STRING    equ 33
    LEX_STRING db "<string>"
    LEX_STRING_len equ $ - LEX_STRING
-    TOKEN_COMMENT   equ 34
    LEX_COMMENT db "<comment>"
-    LEX_COMMENT_len equ $ - LEX_COMMENT
+    LEX_COMMENT_len equ $ - LEX_COMMENT
+
+    ;; start-consts
+    TOKEN_EOF       equ 0
+    TOKEN_LET       equ 1
+    TOKEN_IF        equ 2
+    TOKEN_ELSE      equ 3
+    TOKEN_FN        equ 4
+    TOKEN_RETURN    equ 5
+    TOKEN_LOOP      equ 6
+    TOKEN_BREAK     equ 7
+    TOKEN_CONTINUE  equ 8
+    TOKEN_TRUE      equ 9
+    TOKEN_FALSE     equ 10
+    TOKEN_BOOL      equ 11
+    TOKEN_ARROW     equ 12
+    TOKEN_I32       equ 13
+    TOKEN_U32       equ 14
+    TOKEN_EQUALS    equ 15
+    TOKEN_PLUS      equ 16
+    TOKEN_MINUS     equ 17
+    TOKEN_RPARENS   equ 18
+    TOKEN_LPARENS   equ 19
+    TOKEN_RBRACE    equ 20
+    TOKEN_LBRACE    equ 21
+    TOKEN_COLON     equ 22
+    TOKEN_SEMI      equ 23
+    TOKEN_COMMA     equ 24
+    TOKEN_PIPE      equ 25
+    TOKEN_AMP       equ 26
+    TOKEN_EQEQ      equ 27
+    TOKEN_LBRACKET  equ 28
+    TOKEN_RBRACKET  equ 29
+    TOKEN_VOID      equ 30
+    TOKEN_IDENT     equ 31
+    TOKEN_NUMBER    equ 32
+    TOKEN_STRING    equ 33
+    TOKEN_COMMENT   equ 34
+    ;; end-consts
--- a/lang/tests/asm_to_rust.py
+++ b/lang/tests/asm_to_rust.py
@ -0,0 +1,310 @@
+#!/usr/bin/env python3
+"""
+parse_asm_to_rust.py
+
+Scan one or more assembly source files and extract:
+ - commented struct definitions inside `start-structs` / `end-structs` spans
+ - constant definitions inside `start-consts` / `end-consts` spans
+ - commented function-definition directives of the form `define-fn: fn ...`
+ - commented markdown rust fenced code blocks (```rust) and copy their inner code
+   into the generated Rust output (fences are removed and comment markers stripped)
+
+Produce Rust source code containing:
+ - an `extern "C"` block with `pub unsafe fn ...;` declarations for each define-fn
+ - `pub const NAME: u32 = <value>;` lines for each `equ` constant found in const spans
+ - `#[repr(C)] pub struct Name { pub field: Type, ... }` for each struct found in struct spans
+ - verbatim Rust code copied from commented ```rust``` blocks (fences removed)
+
+Notes:
+ - Struct and function definitions must appear on commented lines. Any number of leading semicolons
+   (e.g. `;`, `;;`, `;;;`) and surrounding spaces are allowed and will be stripped.
+ - Constant lines inside const spans may be commented or not; the script strips leading semicolons
+   before parsing.
+ - Commented rust blocks are expected to use commented fenced code blocks, e.g.:
+     ;; ```rust
+     ;; extern "C" { ... }
+     ;; ```
+   The inner lines will be uncommented (leading semicolons removed) and included in output.
+ - By default the script writes to stdout. Use `-o` to write combined output to a file, or `-d`
+   to write one .rs file per input with the same basename.
+"""
+
+import argparse
+import re
+import sys
+from pathlib import Path
+from typing import List, Tuple, Dict, Any
+
+LEADING_COMMENT_RE = re.compile(r'^\s*;+\s*')  # lines that start with one or more semicolons
+START_STRUCTS_RE = re.compile(r'^\s*;+\s*start-structs\b', re.IGNORECASE)
+END_STRUCTS_RE = re.compile(r'^\s*;+\s*end-structs\b', re.IGNORECASE)
+START_CONSTS_RE = re.compile(r'^\s*;+\s*start-consts\b', re.IGNORECASE)
+END_CONSTS_RE = re.compile(r'^\s*;+\s*end-consts\b', re.IGNORECASE)
+DEFINE_FN_RE = re.compile(r'^\s*;+\s*define-fn:\s*(.+)$', re.IGNORECASE)
+CONST_EQU_RE = re.compile(r'^\s*([A-Za-z_]\w*)\s+equ\s+(.+)$', re.IGNORECASE)
+STRUCT_START_RE = re.compile(r'^\s*struct\s+([A-Za-z_]\w*)\s*\{')  # after comment markers stripped
+RUST_FENCE_RE = re.compile(r'^\s*```\s*(rust)?\s*$', re.IGNORECASE)  # matches ``` or ```rust (after stripping leading comment)
+
+
+def strip_leading_semicolons(line: str) -> str:
+    """Remove leading semicolons and surrounding spaces from a commented line."""
+    return LEADING_COMMENT_RE.sub('', line).rstrip('\n')
+
+
+def extract_structs_from_commented_lines(lines: List[str]) -> List[Tuple[str, List[str]]]:
+    """
+    Given a list of lines (with comments already stripped of leading ';'), find all 'struct Name { ... }'
+    blocks. Return list of (name, field_lines).
+    This uses a simple brace-balanced scan so struct bodies can contain nested braces in types.
+    """
+    structs = []
+    i = 0
+    n = len(lines)
+    while i < n:
+        m = STRUCT_START_RE.match(lines[i])
+        if m:
+            name = m.group(1)
+            body_lines = []
+            # Count braces: the opening brace on the start line
+            brace_level = lines[i].count('{') - lines[i].count('}')
+            i += 1
+            while i < n and brace_level > 0:
+                line = lines[i]
+                brace_level += line.count('{') - line.count('}')
+                body_lines.append(line)
+                i += 1
+            # Trim any trailing '}' line from body_lines if present
+            if body_lines and body_lines[-1].strip() == '}':
+                body_lines = body_lines[:-1]
+            structs.append((name, body_lines))
+        else:
+            i += 1
+    return structs
+
+
+def format_rust_struct(name: str, field_lines: List[str]) -> str:
+    """
+    Convert a list of field lines like '  nodes: Vec<AstNode>,' into a Rust struct with pub fields and #[repr(C)].
+    Minimal parsing: split each field on the first ':' to find name and type, otherwise preserve line.
+    """
+    out_lines = []
+    out_lines.append('#[repr(C)]')
+    out_lines.append('#[derive(Debug)]')
+    out_lines.append(f'pub struct {name} {{')
+    for raw in field_lines:
+        line = raw.strip().rstrip(',')
+        if not line:
+            continue
+        if ':' in line:
+            parts = line.split(':', 1)
+            fname = parts[0].strip()
+            ftype = parts[1].strip()
+            out_lines.append(f'    pub {fname}: {ftype},')
+        else:
+            out_lines.append(f'    pub {line},')
+    out_lines.append('}')
+    return '\n'.join(out_lines)
+
+
+def parse_file(path: Path) -> Dict[str, Any]:
+    """
+    Parse a single assembly file and return dict with keys: 'functions', 'consts', 'structs', 'rust_blocks'
+    - functions: list of signature strings (e.g. "parse_ast(data: *const u8) -> Ast")
+    - consts: list of (name, value)
+    - structs: list of (name, field_lines)
+    - rust_blocks: list of rust code blocks; each block is list[str] of code lines (no fences, uncommented)
+    """
+    functions: List[str] = []
+    consts: List[Tuple[str, str]] = []
+    structs: List[Tuple[str, List[str]]] = []
+    rust_blocks: List[List[str]] = []
+
+    with path.open('r', encoding='utf-8') as f:
+        lines = f.readlines()
+
+    i = 0
+    n = len(lines)
+    in_structs = False
+    in_consts = False
+    struct_buffer: List[str] = []
+    const_buffer: List[str] = []
+
+    while i < n:
+        raw = lines[i]
+
+        # state transitions for start/end spans
+        if not in_structs and START_STRUCTS_RE.match(raw):
+            in_structs = True
+            struct_buffer = []
+            i += 1
+            continue
+        if in_structs and END_STRUCTS_RE.match(raw):
+            stripped = [strip_leading_semicolons(l) for l in struct_buffer if l.strip()]
+            found = extract_structs_from_commented_lines(stripped)
+            structs.extend(found)
+            in_structs = False
+            struct_buffer = []
+            i += 1
+            continue
+
+        if not in_consts and START_CONSTS_RE.match(raw):
+            in_consts = True
+            const_buffer = []
+            i += 1
+            continue
+        if in_consts and END_CONSTS_RE.match(raw):
+            for l in const_buffer:
+                s = strip_leading_semicolons(l)
+                m = CONST_EQU_RE.match(s)
+                if m:
+                    name = m.group(1)
+                    value = m.group(2).strip()
+                    consts.append((name, value))
+            in_consts = False
+            const_buffer = []
+            i += 1
+            continue
+
+        # If inside special spans, collect lines
+        if in_structs:
+            if LEADING_COMMENT_RE.match(raw):
+                struct_buffer.append(raw)
+        elif in_consts:
+            const_buffer.append(raw)
+        else:
+            # Top-level: look for define-fn directives (must be commented lines)
+            mfn = DEFINE_FN_RE.match(raw)
+            if mfn:
+                sig = mfn.group(1).strip()
+                if sig.startswith('fn '):
+                    sig = sig[len('fn '):].strip()
+                functions.append(sig)
+            else:
+                # Check for commented rust fenced block start
+                if LEADING_COMMENT_RE.match(raw):
+                    stripped = strip_leading_semicolons(raw)
+                    if RUST_FENCE_RE.match(stripped):
+                        # start collecting rust block until a closing fence is found
+                        block_lines: List[str] = []
+                        i += 1
+                        while i < n:
+                            cur = lines[i]
+                            # If it's a commented fence closing, stop
+                            if LEADING_COMMENT_RE.match(cur):
+                                inner_stripped = strip_leading_semicolons(cur)
+                                if RUST_FENCE_RE.match(inner_stripped):
+                                    break
+                                # otherwise, this is a commented code line; strip leading semicolons and append
+                                block_lines.append(strip_leading_semicolons(cur))
+                            else:
+                                # If it's an uncommented line inside the block, include as-is (trim newline)
+                                block_lines.append(cur.rstrip('\n'))
+                            i += 1
+                        rust_blocks.append(block_lines)
+                        # advance past the closing fence line if present
+                        # current i points at closing fence or EOF; advance one to continue main loop
+                        i += 1
+                        continue  # continue outer loop without incrementing i further
+        i += 1
+
+    return {
+        'functions': functions,
+        'consts': consts,
+        'structs': structs,
+        'rust_blocks': rust_blocks,
+    }
+
+
+def render_rust(function_sigs: List[str], consts: List[Tuple[str, str]],
+                structs: List[Tuple[str, List[str]]], rust_blocks: List[List[str]]) -> str:
+    parts: List[str] = []
+    parts.append('#![allow(non_camel_case_types, dead_code, non_upper_case_globals, improper_ctypes)]')
+    parts.append('// Auto-generated Rust bindings from assembly source\n')
+
+    # Functions: wrap in single extern "C" block if any
+    if function_sigs:
+        parts.append('unsafe extern "C" {')
+        for sig in function_sigs:
+            parts.append(f'    pub unsafe fn {sig};')
+        parts.append('}')
+        parts.append('')  # blank line
+
+    # Consts
+    for name, value in consts:
+        parts.append(f'pub const {name}: u32 = {value};')
+    if consts:
+        parts.append('')
+
+    # Structs
+    for name, field_lines in structs:
+        parts.append(format_rust_struct(name, field_lines))
+        parts.append('')  # blank line between structs
+
+    # Rust blocks copied verbatim (these are already uncommented and fence-less)
+    for block in rust_blocks:
+        # Ensure there's a blank line before inserted blocks for separation
+        if parts and parts[-1] != '':
+            parts.append('')
+        # append each line exactly as collected
+        parts.extend(line.rstrip('\n') for line in block)
+        parts.append('')  # trailing blank line after block
+
+    # Trim trailing blank lines
+    while parts and parts[-1] == '':
+        parts.pop()
+
+    return '\n'.join(parts) + '\n' if parts else ''
+
+
+def main(argv=None):
+    parser = argparse.ArgumentParser(description='Parse assembly files and emit Rust externs, consts, struct defs, and commented ```rust``` blocks.')
+    parser.add_argument('inputs', metavar='INPUT', type=Path, nargs='+', help='assembly source files to parse')
+    group = parser.add_mutually_exclusive_group()
+    group.add_argument('-o', '--out', type=Path, help='write combined Rust to this file (default stdout)')
+    group.add_argument('-d', '--out-dir', type=Path, help='write one .rs file per input into this directory')
+    args = parser.parse_args(argv)
+
+    combined_functions: List[str] = []
+    combined_consts: List[Tuple[str, str]] = []
+    combined_structs: List[Tuple[str, List[str]]] = []
+    combined_rust_blocks: List[List[str]] = []
+
+    per_file_output: Dict[Path, str] = {}
+
+    for inp in args.inputs:
+        if not inp.exists():
+            print(f'warning: input file {inp} does not exist, skipping', file=sys.stderr)
+            continue
+        parsed = parse_file(inp)
+        rust_src = render_rust(parsed['functions'], parsed['consts'], parsed['structs'], parsed['rust_blocks'])
+        per_file_output[inp] = rust_src
+
+        combined_functions.extend(parsed['functions'])
+        combined_consts.extend(parsed['consts'])
+        combined_structs.extend(parsed['structs'])
+        combined_rust_blocks.extend(parsed['rust_blocks'])
+
+    if args.out_dir:
+        outdir = args.out_dir
+        outdir.mkdir(parents=True, exist_ok=True)
+        for inp, src in per_file_output.items():
+            outpath = outdir / (inp.stem + '.rs')
+            with outpath.open('w', encoding='utf-8') as f:
+                f.write(src)
+            print(f'Wrote {outpath}', file=sys.stderr)
+        return 0
+
+    combined_src = render_rust(combined_functions, combined_consts, combined_structs, combined_rust_blocks)
+
+    if args.out:
+        with args.out.open('w', encoding='utf-8') as f:
+            f.write(combined_src)
+        print(f'Wrote {args.out}', file=sys.stderr)
+    else:
+        sys.stdout.write(combined_src)
+
+    return 0
+
+
+if __name__ == '__main__':
+    raise SystemExit(main())
--- a/lang/tests/shared/defs.rs
+++ b/lang/tests/shared/defs.rs
@ -0,0 +1,104 @@
+#![allow(non_camel_case_types, dead_code, non_upper_case_globals, improper_ctypes)]
+// Auto-generated Rust bindings from assembly source
+
+unsafe extern "C" {
+    pub unsafe fn parse_func(ast: *mut Ast) -> u64;
+    pub unsafe fn parse_args(ast: *mut Ast) -> (*const Argument, usize);
+    pub unsafe fn parse_primary_expr(ast: *mut Ast) -> u64;
+    pub unsafe fn parse_binary_expr(ast: *mut Ast, precedence: u8) -> u64;
+    pub unsafe fn parse_expr(ast: *mut Ast) -> u64;
+    pub unsafe fn parse_statement(ast: *mut Ast) -> u64;
+    pub unsafe fn parse_block(ast: *mut Ast) -> u64;
+    pub unsafe fn parse_type(ast: *mut Ast) -> Type;
+}
+
+pub const AST_FUNCTION: u32 = 1;
+pub const AST_BLOCK: u32 = 2;
+pub const AST_VARIABLE: u32 = 3;
+pub const AST_NUMBER: u32 = 4;
+pub const AST_BINARY_OP: u32 = 5;
+pub const AST_RETURN_STATEMENT: u32 = 6;
+pub const TYPE_VOID: u32 = 1;
+pub const TYPE_BOOL: u32 = 2;
+pub const TYPE_I32: u32 = 3;
+pub const TYPE_U32: u32 = 4;
+pub const TYPE_STR: u32 = 5;
+pub const TOKEN_EOF: u32 = 0;
+pub const TOKEN_LET: u32 = 1;
+pub const TOKEN_IF: u32 = 2;
+pub const TOKEN_ELSE: u32 = 3;
+pub const TOKEN_FN: u32 = 4;
+pub const TOKEN_RETURN: u32 = 5;
+pub const TOKEN_LOOP: u32 = 6;
+pub const TOKEN_BREAK: u32 = 7;
+pub const TOKEN_CONTINUE: u32 = 8;
+pub const TOKEN_TRUE: u32 = 9;
+pub const TOKEN_FALSE: u32 = 10;
+pub const TOKEN_BOOL: u32 = 11;
+pub const TOKEN_ARROW: u32 = 12;
+pub const TOKEN_I32: u32 = 13;
+pub const TOKEN_U32: u32 = 14;
+pub const TOKEN_EQUALS: u32 = 15;
+pub const TOKEN_PLUS: u32 = 16;
+pub const TOKEN_MINUS: u32 = 17;
+pub const TOKEN_RPARENS: u32 = 18;
+pub const TOKEN_LPARENS: u32 = 19;
+pub const TOKEN_RBRACE: u32 = 20;
+pub const TOKEN_LBRACE: u32 = 21;
+pub const TOKEN_COLON: u32 = 22;
+pub const TOKEN_SEMI: u32 = 23;
+pub const TOKEN_COMMA: u32 = 24;
+pub const TOKEN_PIPE: u32 = 25;
+pub const TOKEN_AMP: u32 = 26;
+pub const TOKEN_EQEQ: u32 = 27;
+pub const TOKEN_LBRACKET: u32 = 28;
+pub const TOKEN_RBRACKET: u32 = 29;
+pub const TOKEN_VOID: u32 = 30;
+pub const TOKEN_IDENT: u32 = 31;
+pub const TOKEN_NUMBER: u32 = 32;
+pub const TOKEN_STRING: u32 = 33;
+pub const TOKEN_COMMENT: u32 = 34;
+
+#[repr(C)]
+#[derive(Debug)]
+pub struct Ast {
+    pub nodes: Vec<AstNode>,
+}
+
+#[repr(C)]
+#[derive(Debug)]
+pub struct AstNode {
+    pub kind: u8,
+    pub data: *const (),
+}
+
+#[repr(C)]
+#[derive(Debug)]
+pub struct Argument {
+    pub name: *const u8,
+    pub name_len: usize,
+    pub arg_type: Type,
+}
+
+#[repr(C)]
+#[derive(Debug)]
+pub struct Type {
+    pub kind: u8,
+}
+
+#[repr(C)]
+#[derive(Debug)]
+pub struct BinaryExpr {
+    pub left: u64,
+    pub operator: u8,
+    pub right: u64,
+}
+
+#[repr(C)]
+#[derive(Debug)]
+pub struct Block {
+    pub statements: *const u64,
+    pub statements_len: usize,
+}
+
+use super::vec::Vec;
--- a/lang/tests/shared/shared.rs
+++ b/lang/tests/shared/shared.rs
@ -1,5 +1,8 @@
 #![allow(dead_code)]

+#[path = "defs.rs"]
+pub mod defs;
+
 #[inline(never)]
 fn __do_panic() -> ! {
    panic!("Called panic from external code.");
@ -50,6 +53,7 @@ impl FFISlice {
 }

 #[repr(C)]
+#[derive(Debug)]
 pub struct BlobVec {
    pub data: *mut u8,
    pub len: usize,
@ -79,6 +83,8 @@ pub mod vec {
    use super::ffi::*;
    use super::*;

+    #[repr(transparent)]
+    #[derive(Debug)]
    pub struct Vec<T> {
        pub vec: BlobVec,
        _marker: core::marker::PhantomData<T>,
@ -332,3 +338,18 @@ pub mod ffi {
        ) -> (usize, bool);
    }
 }
+
+pub struct DisplaySlice<'a, T>(pub &'a [T]);
+
+impl<'a, T: core::fmt::Display> core::fmt::Display for DisplaySlice<'a, T> {
+    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+        write!(f, "[")?;
+        for (i, item) in self.0.iter().enumerate() {
+            if i > 0 {
+                write!(f, ", ")?;
+            }
+            write!(f, "{}", item)?;
+        }
+        write!(f, "]")
+    }
+}