parse structs and functions from asm for rust tests
This commit is contained in:
parent
5ae3e17693
commit
4e55fa74f4
|
|
@ -29,8 +29,12 @@ fmt: $(wildcard tests/*.rs)
|
|||
@echo "Formatting test source files..."
|
||||
rustfmt --edition 2024 $^
|
||||
|
||||
tests/shared/defs.rs: $(wildcard src/*)
|
||||
@echo "Generating shared definitions for tests..."
|
||||
./tests/asm_to_rust.py $^ -o $@
|
||||
|
||||
# pattern rule: compile each .rs into a binary with the same base name
|
||||
$(TARGET_DIR)/tests/%: tests/%.rs | $(OBJ) $(TARGET_DIR)/tests
|
||||
$(TARGET_DIR)/tests/%: tests/%.rs tests/shared/defs.rs $(OBJ) | $(TARGET_DIR)/tests
|
||||
@echo "[$(RUSTC)] $< -> $@"
|
||||
rustc -Clink-arg=-fuse-ld=mold --edition=2024 $(OBJ_LINK_ARGS) -g -o $@ $<
|
||||
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@ default rel
|
|||
%include "src/tokeniser.inc"
|
||||
|
||||
section .rdata
|
||||
;; start-constants
|
||||
;; start-consts
|
||||
AST_FUNCTION equ 1
|
||||
AST_BLOCK equ 2
|
||||
AST_VARIABLE equ 3
|
||||
|
|
@ -16,7 +16,7 @@ section .rdata
|
|||
TYPE_I32 equ 3
|
||||
TYPE_U32 equ 4
|
||||
TYPE_STR equ 5
|
||||
;; end-constants
|
||||
;; end-consts
|
||||
|
||||
section .text
|
||||
extern vec_init_with
|
||||
|
|
@ -47,6 +47,9 @@ global parse_statement
|
|||
global parse_block
|
||||
|
||||
;; start very simple, with only functions and addition
|
||||
;; ```rust
|
||||
;; use super::vec::Vec;
|
||||
;; ```
|
||||
;; start-structs
|
||||
;; struct Ast {
|
||||
;; nodes: Vec<AstNode>,
|
||||
|
|
@ -189,6 +192,7 @@ parse_args:
|
|||
|
||||
;; rdi: lexeme ptr
|
||||
;; rsi: lexeme len
|
||||
;; fn parse_number(lexeme: *const u8, lexeme_len: usize) -> u64
|
||||
parse_number:
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
|
|
@ -239,7 +243,7 @@ parse_number:
|
|||
call panic
|
||||
|
||||
;; rdi: *mut Ast
|
||||
;; fn parse_expr(ast: *mut Ast) -> u64
|
||||
;; define-fn: fn parse_primary_expr(ast: *mut Ast) -> u64
|
||||
parse_primary_expr:
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
|
|
@ -272,17 +276,19 @@ parse_primary_expr:
|
|||
|
||||
;; rdi: *mut Ast
|
||||
;; sil: precedence
|
||||
;; fn parse_expr(ast: *mut Ast) -> u64
|
||||
;; define-fn: fn parse_binary_expr(ast: *mut Ast, precedence: u8) -> u64
|
||||
parse_binary_expr:
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
|
||||
; size: 24, align: 8
|
||||
; start-structs
|
||||
; struct BinaryExpr {
|
||||
; left: u64,
|
||||
; operator: u8,
|
||||
; right: u64,
|
||||
; }
|
||||
; end-structs
|
||||
|
||||
sub rsp, 64
|
||||
; lexeme: Lexeme [32..56]
|
||||
|
|
@ -351,7 +357,7 @@ parse_binary_expr:
|
|||
|
||||
|
||||
;; rdi: *mut Ast
|
||||
;; fn parse_expr(ast: *mut Ast) -> u64
|
||||
;; define-fn: fn parse_expr(ast: *mut Ast) -> u64
|
||||
parse_expr:
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
|
|
@ -364,7 +370,7 @@ parse_expr:
|
|||
ret
|
||||
|
||||
;; rdi: *mut Ast
|
||||
;; fn parse_statement(ast: *mut Ast) -> u64
|
||||
;; define-fn: fn parse_statement(ast: *mut Ast) -> u64
|
||||
parse_statement:
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
|
|
@ -401,15 +407,18 @@ parse_statement:
|
|||
call panic
|
||||
|
||||
;; rdi: *mut Ast
|
||||
;; fn parse_block(ast: *mut Ast) -> u64
|
||||
;; define-fn: fn parse_block(ast: *mut Ast) -> u64
|
||||
parse_block:
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
push rdi
|
||||
|
||||
; start-structs
|
||||
; struct Block {
|
||||
; statements: &[u64],
|
||||
; statements: *const u64,
|
||||
; statements_len: usize,
|
||||
; }
|
||||
; end-structs
|
||||
|
||||
sub rsp, 56
|
||||
; statements: Vec<Statement> [0..40]
|
||||
|
|
@ -440,7 +449,7 @@ parse_block:
|
|||
ret
|
||||
|
||||
;; rdi: *mut Ast
|
||||
;; fn parse_type(ast: *mut Ast) -> Type
|
||||
;; define-fn: fn parse_type(ast: *mut Ast) -> Type
|
||||
parse_type:
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
|
|
|
|||
|
|
@ -105,106 +105,109 @@ align 8
|
|||
NUM_LEXEMES: dq 30
|
||||
|
||||
LEX_NOT_A_LEXEME db "<not a lexeme>", 0
|
||||
TOKEN_EOF equ 0
|
||||
TOKEN_LET equ 1
|
||||
LEX_LET db "let"
|
||||
LEX_LET_len equ $ - LEX_LET
|
||||
TOKEN_IF equ 2
|
||||
LEX_IF db "if"
|
||||
LEX_IF_len equ $ - LEX_IF
|
||||
TOKEN_ELSE equ 3
|
||||
LEX_ELSE db "else"
|
||||
LEX_ELSE_len equ $ - LEX_ELSE
|
||||
TOKEN_FN equ 4
|
||||
LEX_FN db "fn"
|
||||
LEX_FN_len equ $ - LEX_FN
|
||||
TOKEN_RETURN equ 5
|
||||
LEX_RETURN db "return"
|
||||
LEX_RETURN_len equ $ - LEX_RETURN
|
||||
TOKEN_LOOP equ 6
|
||||
LEX_LOOP db "loop"
|
||||
LEX_LOOP_len equ $ - LEX_LOOP
|
||||
TOKEN_BREAK equ 7
|
||||
LEX_BREAK db "break"
|
||||
LEX_BREAK_len equ $ - LEX_BREAK
|
||||
TOKEN_CONTINUE equ 8
|
||||
LEX_CONTINUE db "continue"
|
||||
LEX_CONTINUE_len equ $ - LEX_CONTINUE
|
||||
TOKEN_TRUE equ 9
|
||||
LEX_TRUE db "true"
|
||||
LEX_TRUE_len equ $ - LEX_TRUE
|
||||
TOKEN_FALSE equ 10
|
||||
LEX_FALSE db "false"
|
||||
LEX_FALSE_len equ $ - LEX_FALSE
|
||||
TOKEN_BOOL equ 11
|
||||
LEX_BOOL db "bool"
|
||||
LEX_BOOL_len equ $ - LEX_BOOL
|
||||
TOKEN_ARROW equ 12
|
||||
LEX_ARROW db "->"
|
||||
LEX_ARROW_len equ $ - LEX_ARROW
|
||||
TOKEN_I32 equ 13
|
||||
LEX_I32 db "i32"
|
||||
LEX_I32_len equ $ - LEX_I32
|
||||
TOKEN_U32 equ 14
|
||||
LEX_U32 db "u32"
|
||||
LEX_U32_len equ $ - LEX_U32
|
||||
TOKEN_EQUALS equ 15
|
||||
LEX_EQUALS db "="
|
||||
LEX_EQUALS_len equ $ - LEX_EQUALS
|
||||
TOKEN_PLUS equ 16
|
||||
LEX_PLUS db "+"
|
||||
LEX_PLUS_len equ $ - LEX_PLUS
|
||||
TOKEN_MINUS equ 17
|
||||
LEX_MINUS db "-"
|
||||
LEX_MINUS_len equ $ - LEX_MINUS
|
||||
TOKEN_RPARENS equ 18
|
||||
LEX_RPARENS db ")"
|
||||
LEX_RPARENS_len equ $ - LEX_RPARENS
|
||||
TOKEN_LPARENS equ 19
|
||||
LEX_LPARENS db "("
|
||||
LEX_LPARENS_len equ $ - LEX_LPARENS
|
||||
TOKEN_RBRACE equ 20
|
||||
LEX_RBRACE db "}"
|
||||
LEX_RBRACE_len equ $ - LEX_RBRACE
|
||||
TOKEN_LBRACE equ 21
|
||||
LEX_LBRACE db "{"
|
||||
LEX_LBRACE_len equ $ - LEX_LBRACE
|
||||
TOKEN_COLON equ 22
|
||||
LEX_COLON db ":"
|
||||
LEX_COLON_len equ $ - LEX_COLON
|
||||
TOKEN_SEMI equ 23
|
||||
LEX_SEMI db ";"
|
||||
LEX_SEMI_len equ $ - LEX_SEMI
|
||||
TOKEN_COMMA equ 24
|
||||
LEX_COMMA db ","
|
||||
LEX_COMMA_len equ $ - LEX_COMMA
|
||||
TOKEN_PIPE equ 25
|
||||
LEX_PIPE db "|"
|
||||
LEX_PIPE_len equ $ - LEX_PIPE
|
||||
TOKEN_AMP equ 26
|
||||
LEX_AMP db "&"
|
||||
LEX_AMP_len equ $ - LEX_AMP
|
||||
TOKEN_EQEQ equ 27
|
||||
LEX_EQEQ db "=="
|
||||
LEX_EQEQ_len equ $ - LEX_EQEQ
|
||||
TOKEN_LBRACKET equ 28
|
||||
LEX_LBRACKET db "["
|
||||
LEX_LBRACKET_len equ $ - LEX_LBRACKET
|
||||
TOKEN_RBRACKET equ 29
|
||||
LEX_RBRACKET db "]"
|
||||
LEX_RBRACKET_len equ $ - LEX_RBRACKET
|
||||
TOKEN_VOID equ 30
|
||||
LEX_VOID db "void"
|
||||
LEX_VOID_len equ $ - LEX_VOID
|
||||
TOKEN_IDENT equ 31
|
||||
LEX_IDENT db "<identifier>"
|
||||
LEX_IDENT_len equ $ - LEX_IDENT
|
||||
TOKEN_NUMBER equ 32
|
||||
LEX_NUMBER db "<number>"
|
||||
LEX_NUMBER_len equ $ - LEX_NUMBER
|
||||
TOKEN_STRING equ 33
|
||||
LEX_STRING db "<string>"
|
||||
LEX_STRING_len equ $ - LEX_STRING
|
||||
TOKEN_COMMENT equ 34
|
||||
LEX_COMMENT db "<comment>"
|
||||
LEX_COMMENT_len equ $ - LEX_COMMENT
|
||||
LEX_COMMENT_len equ $ - LEX_COMMENT
|
||||
|
||||
;; start-consts
|
||||
TOKEN_EOF equ 0
|
||||
TOKEN_LET equ 1
|
||||
TOKEN_IF equ 2
|
||||
TOKEN_ELSE equ 3
|
||||
TOKEN_FN equ 4
|
||||
TOKEN_RETURN equ 5
|
||||
TOKEN_LOOP equ 6
|
||||
TOKEN_BREAK equ 7
|
||||
TOKEN_CONTINUE equ 8
|
||||
TOKEN_TRUE equ 9
|
||||
TOKEN_FALSE equ 10
|
||||
TOKEN_BOOL equ 11
|
||||
TOKEN_ARROW equ 12
|
||||
TOKEN_I32 equ 13
|
||||
TOKEN_U32 equ 14
|
||||
TOKEN_EQUALS equ 15
|
||||
TOKEN_PLUS equ 16
|
||||
TOKEN_MINUS equ 17
|
||||
TOKEN_RPARENS equ 18
|
||||
TOKEN_LPARENS equ 19
|
||||
TOKEN_RBRACE equ 20
|
||||
TOKEN_LBRACE equ 21
|
||||
TOKEN_COLON equ 22
|
||||
TOKEN_SEMI equ 23
|
||||
TOKEN_COMMA equ 24
|
||||
TOKEN_PIPE equ 25
|
||||
TOKEN_AMP equ 26
|
||||
TOKEN_EQEQ equ 27
|
||||
TOKEN_LBRACKET equ 28
|
||||
TOKEN_RBRACKET equ 29
|
||||
TOKEN_VOID equ 30
|
||||
TOKEN_IDENT equ 31
|
||||
TOKEN_NUMBER equ 32
|
||||
TOKEN_STRING equ 33
|
||||
TOKEN_COMMENT equ 34
|
||||
;; end-consts
|
||||
|
|
|
|||
310
lang/tests/asm_to_rust.py
Executable file
310
lang/tests/asm_to_rust.py
Executable file
|
|
@ -0,0 +1,310 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
parse_asm_to_rust.py
|
||||
|
||||
Scan one or more assembly source files and extract:
|
||||
- commented struct definitions inside `start-structs` / `end-structs` spans
|
||||
- constant definitions inside `start-consts` / `end-consts` spans
|
||||
- commented function-definition directives of the form `define-fn: fn ...`
|
||||
- commented markdown rust fenced code blocks (```rust) and copy their inner code
|
||||
into the generated Rust output (fences are removed and comment markers stripped)
|
||||
|
||||
Produce Rust source code containing:
|
||||
- an `extern "C"` block with `pub unsafe fn ...;` declarations for each define-fn
|
||||
- `pub const NAME: u32 = <value>;` lines for each `equ` constant found in const spans
|
||||
- `#[repr(C)] pub struct Name { pub field: Type, ... }` for each struct found in struct spans
|
||||
- verbatim Rust code copied from commented ```rust``` blocks (fences removed)
|
||||
|
||||
Notes:
|
||||
- Struct and function definitions must appear on commented lines. Any number of leading semicolons
|
||||
(e.g. `;`, `;;`, `;;;`) and surrounding spaces are allowed and will be stripped.
|
||||
- Constant lines inside const spans may be commented or not; the script strips leading semicolons
|
||||
before parsing.
|
||||
- Commented rust blocks are expected to use commented fenced code blocks, e.g.:
|
||||
;; ```rust
|
||||
;; extern "C" { ... }
|
||||
;; ```
|
||||
The inner lines will be uncommented (leading semicolons removed) and included in output.
|
||||
- By default the script writes to stdout. Use `-o` to write combined output to a file, or `-d`
|
||||
to write one .rs file per input with the same basename.
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import re
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import List, Tuple, Dict, Any
|
||||
|
||||
LEADING_COMMENT_RE = re.compile(r'^\s*;+\s*') # lines that start with one or more semicolons
|
||||
START_STRUCTS_RE = re.compile(r'^\s*;+\s*start-structs\b', re.IGNORECASE)
|
||||
END_STRUCTS_RE = re.compile(r'^\s*;+\s*end-structs\b', re.IGNORECASE)
|
||||
START_CONSTS_RE = re.compile(r'^\s*;+\s*start-consts\b', re.IGNORECASE)
|
||||
END_CONSTS_RE = re.compile(r'^\s*;+\s*end-consts\b', re.IGNORECASE)
|
||||
DEFINE_FN_RE = re.compile(r'^\s*;+\s*define-fn:\s*(.+)$', re.IGNORECASE)
|
||||
CONST_EQU_RE = re.compile(r'^\s*([A-Za-z_]\w*)\s+equ\s+(.+)$', re.IGNORECASE)
|
||||
STRUCT_START_RE = re.compile(r'^\s*struct\s+([A-Za-z_]\w*)\s*\{') # after comment markers stripped
|
||||
RUST_FENCE_RE = re.compile(r'^\s*```\s*(rust)?\s*$', re.IGNORECASE) # matches ``` or ```rust (after stripping leading comment)
|
||||
|
||||
|
||||
def strip_leading_semicolons(line: str) -> str:
|
||||
"""Remove leading semicolons and surrounding spaces from a commented line."""
|
||||
return LEADING_COMMENT_RE.sub('', line).rstrip('\n')
|
||||
|
||||
|
||||
def extract_structs_from_commented_lines(lines: List[str]) -> List[Tuple[str, List[str]]]:
|
||||
"""
|
||||
Given a list of lines (with comments already stripped of leading ';'), find all 'struct Name { ... }'
|
||||
blocks. Return list of (name, field_lines).
|
||||
This uses a simple brace-balanced scan so struct bodies can contain nested braces in types.
|
||||
"""
|
||||
structs = []
|
||||
i = 0
|
||||
n = len(lines)
|
||||
while i < n:
|
||||
m = STRUCT_START_RE.match(lines[i])
|
||||
if m:
|
||||
name = m.group(1)
|
||||
body_lines = []
|
||||
# Count braces: the opening brace on the start line
|
||||
brace_level = lines[i].count('{') - lines[i].count('}')
|
||||
i += 1
|
||||
while i < n and brace_level > 0:
|
||||
line = lines[i]
|
||||
brace_level += line.count('{') - line.count('}')
|
||||
body_lines.append(line)
|
||||
i += 1
|
||||
# Trim any trailing '}' line from body_lines if present
|
||||
if body_lines and body_lines[-1].strip() == '}':
|
||||
body_lines = body_lines[:-1]
|
||||
structs.append((name, body_lines))
|
||||
else:
|
||||
i += 1
|
||||
return structs
|
||||
|
||||
|
||||
def format_rust_struct(name: str, field_lines: List[str]) -> str:
|
||||
"""
|
||||
Convert a list of field lines like ' nodes: Vec<AstNode>,' into a Rust struct with pub fields and #[repr(C)].
|
||||
Minimal parsing: split each field on the first ':' to find name and type, otherwise preserve line.
|
||||
"""
|
||||
out_lines = []
|
||||
out_lines.append('#[repr(C)]')
|
||||
out_lines.append('#[derive(Debug)]')
|
||||
out_lines.append(f'pub struct {name} {{')
|
||||
for raw in field_lines:
|
||||
line = raw.strip().rstrip(',')
|
||||
if not line:
|
||||
continue
|
||||
if ':' in line:
|
||||
parts = line.split(':', 1)
|
||||
fname = parts[0].strip()
|
||||
ftype = parts[1].strip()
|
||||
out_lines.append(f' pub {fname}: {ftype},')
|
||||
else:
|
||||
out_lines.append(f' pub {line},')
|
||||
out_lines.append('}')
|
||||
return '\n'.join(out_lines)
|
||||
|
||||
|
||||
def parse_file(path: Path) -> Dict[str, Any]:
|
||||
"""
|
||||
Parse a single assembly file and return dict with keys: 'functions', 'consts', 'structs', 'rust_blocks'
|
||||
- functions: list of signature strings (e.g. "parse_ast(data: *const u8) -> Ast")
|
||||
- consts: list of (name, value)
|
||||
- structs: list of (name, field_lines)
|
||||
- rust_blocks: list of rust code blocks; each block is list[str] of code lines (no fences, uncommented)
|
||||
"""
|
||||
functions: List[str] = []
|
||||
consts: List[Tuple[str, str]] = []
|
||||
structs: List[Tuple[str, List[str]]] = []
|
||||
rust_blocks: List[List[str]] = []
|
||||
|
||||
with path.open('r', encoding='utf-8') as f:
|
||||
lines = f.readlines()
|
||||
|
||||
i = 0
|
||||
n = len(lines)
|
||||
in_structs = False
|
||||
in_consts = False
|
||||
struct_buffer: List[str] = []
|
||||
const_buffer: List[str] = []
|
||||
|
||||
while i < n:
|
||||
raw = lines[i]
|
||||
|
||||
# state transitions for start/end spans
|
||||
if not in_structs and START_STRUCTS_RE.match(raw):
|
||||
in_structs = True
|
||||
struct_buffer = []
|
||||
i += 1
|
||||
continue
|
||||
if in_structs and END_STRUCTS_RE.match(raw):
|
||||
stripped = [strip_leading_semicolons(l) for l in struct_buffer if l.strip()]
|
||||
found = extract_structs_from_commented_lines(stripped)
|
||||
structs.extend(found)
|
||||
in_structs = False
|
||||
struct_buffer = []
|
||||
i += 1
|
||||
continue
|
||||
|
||||
if not in_consts and START_CONSTS_RE.match(raw):
|
||||
in_consts = True
|
||||
const_buffer = []
|
||||
i += 1
|
||||
continue
|
||||
if in_consts and END_CONSTS_RE.match(raw):
|
||||
for l in const_buffer:
|
||||
s = strip_leading_semicolons(l)
|
||||
m = CONST_EQU_RE.match(s)
|
||||
if m:
|
||||
name = m.group(1)
|
||||
value = m.group(2).strip()
|
||||
consts.append((name, value))
|
||||
in_consts = False
|
||||
const_buffer = []
|
||||
i += 1
|
||||
continue
|
||||
|
||||
# If inside special spans, collect lines
|
||||
if in_structs:
|
||||
if LEADING_COMMENT_RE.match(raw):
|
||||
struct_buffer.append(raw)
|
||||
elif in_consts:
|
||||
const_buffer.append(raw)
|
||||
else:
|
||||
# Top-level: look for define-fn directives (must be commented lines)
|
||||
mfn = DEFINE_FN_RE.match(raw)
|
||||
if mfn:
|
||||
sig = mfn.group(1).strip()
|
||||
if sig.startswith('fn '):
|
||||
sig = sig[len('fn '):].strip()
|
||||
functions.append(sig)
|
||||
else:
|
||||
# Check for commented rust fenced block start
|
||||
if LEADING_COMMENT_RE.match(raw):
|
||||
stripped = strip_leading_semicolons(raw)
|
||||
if RUST_FENCE_RE.match(stripped):
|
||||
# start collecting rust block until a closing fence is found
|
||||
block_lines: List[str] = []
|
||||
i += 1
|
||||
while i < n:
|
||||
cur = lines[i]
|
||||
# If it's a commented fence closing, stop
|
||||
if LEADING_COMMENT_RE.match(cur):
|
||||
inner_stripped = strip_leading_semicolons(cur)
|
||||
if RUST_FENCE_RE.match(inner_stripped):
|
||||
break
|
||||
# otherwise, this is a commented code line; strip leading semicolons and append
|
||||
block_lines.append(strip_leading_semicolons(cur))
|
||||
else:
|
||||
# If it's an uncommented line inside the block, include as-is (trim newline)
|
||||
block_lines.append(cur.rstrip('\n'))
|
||||
i += 1
|
||||
rust_blocks.append(block_lines)
|
||||
# advance past the closing fence line if present
|
||||
# current i points at closing fence or EOF; advance one to continue main loop
|
||||
i += 1
|
||||
continue # continue outer loop without incrementing i further
|
||||
i += 1
|
||||
|
||||
return {
|
||||
'functions': functions,
|
||||
'consts': consts,
|
||||
'structs': structs,
|
||||
'rust_blocks': rust_blocks,
|
||||
}
|
||||
|
||||
|
||||
def render_rust(function_sigs: List[str], consts: List[Tuple[str, str]],
|
||||
structs: List[Tuple[str, List[str]]], rust_blocks: List[List[str]]) -> str:
|
||||
parts: List[str] = []
|
||||
parts.append('#![allow(non_camel_case_types, dead_code, non_upper_case_globals, improper_ctypes)]')
|
||||
parts.append('// Auto-generated Rust bindings from assembly source\n')
|
||||
|
||||
# Functions: wrap in single extern "C" block if any
|
||||
if function_sigs:
|
||||
parts.append('unsafe extern "C" {')
|
||||
for sig in function_sigs:
|
||||
parts.append(f' pub unsafe fn {sig};')
|
||||
parts.append('}')
|
||||
parts.append('') # blank line
|
||||
|
||||
# Consts
|
||||
for name, value in consts:
|
||||
parts.append(f'pub const {name}: u32 = {value};')
|
||||
if consts:
|
||||
parts.append('')
|
||||
|
||||
# Structs
|
||||
for name, field_lines in structs:
|
||||
parts.append(format_rust_struct(name, field_lines))
|
||||
parts.append('') # blank line between structs
|
||||
|
||||
# Rust blocks copied verbatim (these are already uncommented and fence-less)
|
||||
for block in rust_blocks:
|
||||
# Ensure there's a blank line before inserted blocks for separation
|
||||
if parts and parts[-1] != '':
|
||||
parts.append('')
|
||||
# append each line exactly as collected
|
||||
parts.extend(line.rstrip('\n') for line in block)
|
||||
parts.append('') # trailing blank line after block
|
||||
|
||||
# Trim trailing blank lines
|
||||
while parts and parts[-1] == '':
|
||||
parts.pop()
|
||||
|
||||
return '\n'.join(parts) + '\n' if parts else ''
|
||||
|
||||
|
||||
def main(argv=None):
|
||||
parser = argparse.ArgumentParser(description='Parse assembly files and emit Rust externs, consts, struct defs, and commented ```rust``` blocks.')
|
||||
parser.add_argument('inputs', metavar='INPUT', type=Path, nargs='+', help='assembly source files to parse')
|
||||
group = parser.add_mutually_exclusive_group()
|
||||
group.add_argument('-o', '--out', type=Path, help='write combined Rust to this file (default stdout)')
|
||||
group.add_argument('-d', '--out-dir', type=Path, help='write one .rs file per input into this directory')
|
||||
args = parser.parse_args(argv)
|
||||
|
||||
combined_functions: List[str] = []
|
||||
combined_consts: List[Tuple[str, str]] = []
|
||||
combined_structs: List[Tuple[str, List[str]]] = []
|
||||
combined_rust_blocks: List[List[str]] = []
|
||||
|
||||
per_file_output: Dict[Path, str] = {}
|
||||
|
||||
for inp in args.inputs:
|
||||
if not inp.exists():
|
||||
print(f'warning: input file {inp} does not exist, skipping', file=sys.stderr)
|
||||
continue
|
||||
parsed = parse_file(inp)
|
||||
rust_src = render_rust(parsed['functions'], parsed['consts'], parsed['structs'], parsed['rust_blocks'])
|
||||
per_file_output[inp] = rust_src
|
||||
|
||||
combined_functions.extend(parsed['functions'])
|
||||
combined_consts.extend(parsed['consts'])
|
||||
combined_structs.extend(parsed['structs'])
|
||||
combined_rust_blocks.extend(parsed['rust_blocks'])
|
||||
|
||||
if args.out_dir:
|
||||
outdir = args.out_dir
|
||||
outdir.mkdir(parents=True, exist_ok=True)
|
||||
for inp, src in per_file_output.items():
|
||||
outpath = outdir / (inp.stem + '.rs')
|
||||
with outpath.open('w', encoding='utf-8') as f:
|
||||
f.write(src)
|
||||
print(f'Wrote {outpath}', file=sys.stderr)
|
||||
return 0
|
||||
|
||||
combined_src = render_rust(combined_functions, combined_consts, combined_structs, combined_rust_blocks)
|
||||
|
||||
if args.out:
|
||||
with args.out.open('w', encoding='utf-8') as f:
|
||||
f.write(combined_src)
|
||||
print(f'Wrote {args.out}', file=sys.stderr)
|
||||
else:
|
||||
sys.stdout.write(combined_src)
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
raise SystemExit(main())
|
||||
104
lang/tests/shared/defs.rs
Normal file
104
lang/tests/shared/defs.rs
Normal file
|
|
@ -0,0 +1,104 @@
|
|||
#![allow(non_camel_case_types, dead_code, non_upper_case_globals, improper_ctypes)]
|
||||
// Auto-generated Rust bindings from assembly source
|
||||
|
||||
unsafe extern "C" {
|
||||
pub unsafe fn parse_func(ast: *mut Ast) -> u64;
|
||||
pub unsafe fn parse_args(ast: *mut Ast) -> (*const Argument, usize);
|
||||
pub unsafe fn parse_primary_expr(ast: *mut Ast) -> u64;
|
||||
pub unsafe fn parse_binary_expr(ast: *mut Ast, precedence: u8) -> u64;
|
||||
pub unsafe fn parse_expr(ast: *mut Ast) -> u64;
|
||||
pub unsafe fn parse_statement(ast: *mut Ast) -> u64;
|
||||
pub unsafe fn parse_block(ast: *mut Ast) -> u64;
|
||||
pub unsafe fn parse_type(ast: *mut Ast) -> Type;
|
||||
}
|
||||
|
||||
pub const AST_FUNCTION: u32 = 1;
|
||||
pub const AST_BLOCK: u32 = 2;
|
||||
pub const AST_VARIABLE: u32 = 3;
|
||||
pub const AST_NUMBER: u32 = 4;
|
||||
pub const AST_BINARY_OP: u32 = 5;
|
||||
pub const AST_RETURN_STATEMENT: u32 = 6;
|
||||
pub const TYPE_VOID: u32 = 1;
|
||||
pub const TYPE_BOOL: u32 = 2;
|
||||
pub const TYPE_I32: u32 = 3;
|
||||
pub const TYPE_U32: u32 = 4;
|
||||
pub const TYPE_STR: u32 = 5;
|
||||
pub const TOKEN_EOF: u32 = 0;
|
||||
pub const TOKEN_LET: u32 = 1;
|
||||
pub const TOKEN_IF: u32 = 2;
|
||||
pub const TOKEN_ELSE: u32 = 3;
|
||||
pub const TOKEN_FN: u32 = 4;
|
||||
pub const TOKEN_RETURN: u32 = 5;
|
||||
pub const TOKEN_LOOP: u32 = 6;
|
||||
pub const TOKEN_BREAK: u32 = 7;
|
||||
pub const TOKEN_CONTINUE: u32 = 8;
|
||||
pub const TOKEN_TRUE: u32 = 9;
|
||||
pub const TOKEN_FALSE: u32 = 10;
|
||||
pub const TOKEN_BOOL: u32 = 11;
|
||||
pub const TOKEN_ARROW: u32 = 12;
|
||||
pub const TOKEN_I32: u32 = 13;
|
||||
pub const TOKEN_U32: u32 = 14;
|
||||
pub const TOKEN_EQUALS: u32 = 15;
|
||||
pub const TOKEN_PLUS: u32 = 16;
|
||||
pub const TOKEN_MINUS: u32 = 17;
|
||||
pub const TOKEN_RPARENS: u32 = 18;
|
||||
pub const TOKEN_LPARENS: u32 = 19;
|
||||
pub const TOKEN_RBRACE: u32 = 20;
|
||||
pub const TOKEN_LBRACE: u32 = 21;
|
||||
pub const TOKEN_COLON: u32 = 22;
|
||||
pub const TOKEN_SEMI: u32 = 23;
|
||||
pub const TOKEN_COMMA: u32 = 24;
|
||||
pub const TOKEN_PIPE: u32 = 25;
|
||||
pub const TOKEN_AMP: u32 = 26;
|
||||
pub const TOKEN_EQEQ: u32 = 27;
|
||||
pub const TOKEN_LBRACKET: u32 = 28;
|
||||
pub const TOKEN_RBRACKET: u32 = 29;
|
||||
pub const TOKEN_VOID: u32 = 30;
|
||||
pub const TOKEN_IDENT: u32 = 31;
|
||||
pub const TOKEN_NUMBER: u32 = 32;
|
||||
pub const TOKEN_STRING: u32 = 33;
|
||||
pub const TOKEN_COMMENT: u32 = 34;
|
||||
|
||||
#[repr(C)]
|
||||
#[derive(Debug)]
|
||||
pub struct Ast {
|
||||
pub nodes: Vec<AstNode>,
|
||||
}
|
||||
|
||||
#[repr(C)]
|
||||
#[derive(Debug)]
|
||||
pub struct AstNode {
|
||||
pub kind: u8,
|
||||
pub data: *const (),
|
||||
}
|
||||
|
||||
#[repr(C)]
|
||||
#[derive(Debug)]
|
||||
pub struct Argument {
|
||||
pub name: *const u8,
|
||||
pub name_len: usize,
|
||||
pub arg_type: Type,
|
||||
}
|
||||
|
||||
#[repr(C)]
|
||||
#[derive(Debug)]
|
||||
pub struct Type {
|
||||
pub kind: u8,
|
||||
}
|
||||
|
||||
#[repr(C)]
|
||||
#[derive(Debug)]
|
||||
pub struct BinaryExpr {
|
||||
pub left: u64,
|
||||
pub operator: u8,
|
||||
pub right: u64,
|
||||
}
|
||||
|
||||
#[repr(C)]
|
||||
#[derive(Debug)]
|
||||
pub struct Block {
|
||||
pub statements: *const u64,
|
||||
pub statements_len: usize,
|
||||
}
|
||||
|
||||
use super::vec::Vec;
|
||||
|
|
@ -1,5 +1,8 @@
|
|||
#![allow(dead_code)]
|
||||
|
||||
#[path = "defs.rs"]
|
||||
pub mod defs;
|
||||
|
||||
#[inline(never)]
|
||||
fn __do_panic() -> ! {
|
||||
panic!("Called panic from external code.");
|
||||
|
|
@ -50,6 +53,7 @@ impl FFISlice {
|
|||
}
|
||||
|
||||
#[repr(C)]
|
||||
#[derive(Debug)]
|
||||
pub struct BlobVec {
|
||||
pub data: *mut u8,
|
||||
pub len: usize,
|
||||
|
|
@ -79,6 +83,8 @@ pub mod vec {
|
|||
use super::ffi::*;
|
||||
use super::*;
|
||||
|
||||
#[repr(transparent)]
|
||||
#[derive(Debug)]
|
||||
pub struct Vec<T> {
|
||||
pub vec: BlobVec,
|
||||
_marker: core::marker::PhantomData<T>,
|
||||
|
|
@ -332,3 +338,18 @@ pub mod ffi {
|
|||
) -> (usize, bool);
|
||||
}
|
||||
}
|
||||
|
||||
pub struct DisplaySlice<'a, T>(pub &'a [T]);
|
||||
|
||||
impl<'a, T: core::fmt::Display> core::fmt::Display for DisplaySlice<'a, T> {
|
||||
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
|
||||
write!(f, "[")?;
|
||||
for (i, item) in self.0.iter().enumerate() {
|
||||
if i > 0 {
|
||||
write!(f, ", ")?;
|
||||
}
|
||||
write!(f, "{}", item)?;
|
||||
}
|
||||
write!(f, "]")
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in a new issue