Compare commits

...

14 commits

14 changed files with 884 additions and 265 deletions

View file

@ -31,7 +31,7 @@
just-formatter
just-lsp
gdb
gdbgui
rr
nasm
nasmfmt
git

View file

@ -1,7 +1,7 @@
# Makefile: Compile and link main.asm using nasm and mold, intermediate files in target/
TARGET_DIR := target
SRC := src/lib.asm src/int_to_str.asm src/vec.asm src/tokeniser.asm src/file.asm src/alloc.asm src/ast.asm
SRC := src/lib.asm src/int_to_str.asm src/vec.asm src/tokeniser.asm src/file.asm src/alloc.asm src/ast.asm src/codegen.asm
OBJ := $(patsubst src/%.asm,$(TARGET_DIR)/%.o,$(SRC))
BIN_SRC := src/main.asm src/panic.asm

View file

@ -1,32 +1,9 @@
default rel
%include "src/tokeniser.inc"
%include "src/ast.inc"
section .rdata
;; start-consts
AST_FUNCTION equ 1 ; :u8
AST_BLOCK equ 2 ; :u8
AST_VARIABLE equ 3 ; :u8
AST_NUMBER equ 4 ; :u8
AST_BINARY_OP equ 5 ; :u8
AST_RETURN_STATEMENT equ 6 ; :u8
AST_VALUE_TO_PLACE equ 7 ; :u8
AST_PLACE_TO_VALUE equ 8 ; :u8
AST_ASSIGNMENT equ 9 ; :u8
AST_DEREF equ 10 ; :u8
AST_ADDRESS_OF equ 11 ; :u8
AST_VAR_DECL equ 12 ; :u8
AST_VAR_REF equ 13 ; :u8
AST_ARG equ 14 ; :u8
TYPE_VOID equ 1 ; :u8
TYPE_BOOL equ 2 ; :u8
TYPE_I32 equ 3 ; :u8
TYPE_U32 equ 4 ; :u8
TYPE_STR equ 5 ; :u8
TYPE_POINTER equ 6 ; :u8
;; end-consts
PRECEDENCE_ADD equ 90
PRECEDENCE_SUB equ 90
PRECEDENCE_MUL equ 100
@ -591,8 +568,17 @@ parse_statement:
call expect_token
test rax, rax
jnz .let
mov dil, TOKEN_LBRACE
call peek_expect_token ; parse_block expects lbrace to still be there
jnz .block
jmp .panic
.block:
mov rdi, [rsp] ; Ast
call parse_block
; Blocks don't require a trailing semicolon
jmp .epilogue
.let:
mov rdi, [rsp] ; Ast
call ast_parse_let
@ -617,6 +603,7 @@ parse_statement:
mov dil, TOKEN_SEMI
call unwrap_token
mov rax, [rsp] ; expression
.epilogue:
add rsp, 40
pop rbp
ret
@ -652,12 +639,19 @@ parse_block:
mov rdx, 0 ; drop = None
mov rcx, 64 ; capacity
call vec_init_with
nop
.loop:
mov dil, TOKEN_RBRACE
call peek_expect_token
test rax, rax
jnz .done
; skip semicolons
mov dil, TOKEN_SEMI
call expect_token
test rax, rax
jnz .loop
mov rdi, [rsp + 56] ; Ast
call parse_statement
lea rdi, [rsp + 8] ; vec
@ -666,6 +660,10 @@ parse_block:
call vec_push
jmp .loop
.done:
; eat the closing brace
mov dil, TOKEN_RBRACE
call unwrap_token
mov rdi, [rsp + 56] ; Ast
mov qword [rsp], AST_BLOCK ; AstNode.kind
mov rsi, [rsp + 64] ; span
@ -1154,6 +1152,11 @@ ast_build_symtable:
pop rbp
ret
section .rdata
ANONYMOUS_SCOPE_NAME db "<anonymous>"
ANONYMOUS_SCOPE_NAME_LEN equ $ - ANONYMOUS_SCOPE_NAME
section .text
;; symtable is a sorted vec pretending to be a b-tree:
;; entries are sorted by a key in order to get the following ordering:
;; scope (index0) -> (ident0)
@ -1242,8 +1245,29 @@ ast_build_symtable_for_each:
je .var_decl
cmp bl, AST_ARG
je .arg
cmp bl, AST_BLOCK
je .block
jmp .done
.block:
; insert scope entry
lea rdx, [rel ANONYMOUS_SCOPE_NAME]
mov rcx, ANONYMOUS_SCOPE_NAME_LEN
mov [rsp + 72], rdx ; SymEntry.index
mov [rsp + 80], rcx ; SymEntry.extra
jmp .insert_scope
.func:
; use function name as scope name
mov rbx, [rax + 8] ; AstNode.data
mov rdx, [rbx + 0] ; Func.name
mov rcx, [rbx + 8] ; Func.name_len
mov [rsp + 72], rdx ; SymEntry.index
mov [rsp + 80], rcx ; SymEntry.extra
.insert_scope:
; insert scope entry
mov byte [rsp + 32], SYM_KEY_SCOPE ; SymKey.kind
mov rdx, [rsp + 16] ; index
@ -1253,13 +1277,6 @@ ast_build_symtable_for_each:
mov qword [rsp + 56], 1 ; SymKey.ident
mov qword [rsp + 64], 0 ; SymKey.ident_len
mov rbx, [rax + 8] ; AstNode.data
mov rdx, [rbx + 0] ; Func.name
mov rcx, [rbx + 8] ; Func.name_len
mov [rsp + 72], rdx ; SymEntry.index
mov [rsp + 80], rcx ; SymEntry.extra
mov rdi, [rsp + 8] ; Ctx.symtable
lea rsi, [rsp + 32] ; &SymEntry
mov rcx, 0 ; cmp_ctx
@ -1316,7 +1333,7 @@ ast_build_symtable_for_each:
.arg:
; insert variable entry
mov byte [rsp + 32], SYM_KEY_ARG ; SymKey.kind
mov byte [rsp + 32], SYM_KEY_VAR ; SymKey.kind
mov rdx, [rax + 24] ; AstNode.span
mov qword [rsp + 48], rdx ; SymKey.span
@ -1459,6 +1476,12 @@ ast_walk_for_each:
jmp .check_scope
.block:
; push scope
push qword [rbp + 32] ; scope-ptr
push qword [rbp + 40] ; current_index
mov [rbp + 32], rsp ; update current_scope
; push statements onto stack
mov rbx, [rax + 8] ; AstNode.data
mov r15, [rax + 16] ; AstNode.extra
@ -1560,14 +1583,21 @@ ast_resolve_var_refs_for_each:
cmp bl, AST_VAR_REF
jne .epilogue
.var_ref:
; lookup variable in symbol table
; binary search lower bound
mov byte [rsp + 24 + 0], SYM_KEY_START_LOCALS ; SymKey.kind
mov qword [rsp + 24 + 8], 0 ; SymKey.scope_index
mov rax, [rsp + 80] ; scope
mov [rsp + 24 + 8], rax ; SymKey.scope_index
mov qword [rsp + 24 + 16], 0 ; SymKey.span
mov qword [rsp + 24 + 24], 1 ; SymKey.name
mov qword [rsp + 24 + 32], 0 ; SymKey.name_len
; name
mov rax, [rsp + 16] ; *AstNode
mov rbx, [rax + 8] ; AstNode.data
mov rax, [rbx + 8] ; AstVarRef.name
mov rbx, [rbx + 16] ; AstVarRef.name_len
mov [rsp + 24 + 24], rax ; SymKey.ident
mov [rsp + 24 + 32], rbx ; SymKey.ident_len
; binary search in symbol table
mov rdi, [rsp + 8] ; *Ctx
@ -1584,11 +1614,6 @@ ast_resolve_var_refs_for_each:
mov rax, [rsp + 16] ; *AstNode
mov rbx, [rax + 24] ; AstNode.span
mov [rsp + 24 + 16], rbx ; SymKey.span
mov rbx, [rax + 8] ; AstNode.data
mov rax, [rbx + 8] ; AstVarRef.name
mov rbx, [rbx + 16] ; AstVarRef.name_len
mov [rsp + 24 + 24], rax ; SymKey.ident
mov [rsp + 24 + 32], rbx ; SymKey.ident_len
; binary search in symbol table
mov rdi, [rsp + 8] ; *Ctx
@ -1602,16 +1627,60 @@ ast_resolve_var_refs_for_each:
.fixup:
cmp rax, [rsp + 88] ; lower_bound
jl .panic
jl .parent
mov rdi, [rsp + 8] ; *Ctx
mov rsi, rax ; index
call vec_get
mov rax, [rax + 40] ; SymEntry.index
mov rbx, rax ; *SymEntry
; compare symbol ident with var_ref ident
mov rdi, [rbx + 24] ; SymEntry.key.ident
mov rsi, [rbx + 32] ; SymEntry.key.ident_len
mov rax, [rsp + 16] ; *AstNode
mov rax, [rax + 8] ; AstNode.data
mov rdx, [rax + 8] ; AstVarRef.name
mov rcx, [rax + 16] ; AstVarRef.name_len
call strcmp
test rax, rax
jnz .parent
; load SymEntry.index
mov rax, [rbx + 40] ; SymEntry.index
mov rdx, [rsp + 16] ; *AstNode
mov rdx, [rdx + 8] ; AstNode.data
mov [rdx + 0], rax ; AstVarRef.resolved_index
jmp .epilogue
.parent:
; binary search for parent scope
mov byte [rsp + 24 + 0], SYM_KEY_PARENT_SCOPE ; SymKey.kind
mov rax, [rsp + 80] ; scope
mov [rsp + 24 + 8], rax ; SymKey.scope_index
mov qword [rsp + 24 + 16], 0 ; SymKey.span
mov qword [rsp + 24 + 24], 1 ; SymKey.ident
mov qword [rsp + 24 + 32], 0 ; SymKey.ident_len
; binary search in symbol table
mov rdi, [rsp + 8] ; *Ctx
lea rsi, [rsp + 24] ; &SymKey
mov rdx, symkey_cmp ; cmp
mov rcx, 0 ; cmp_ctx
call vec_binary_search_by
test rdx, rdx
jnz .panic ; can't find the symbol entry for this var-ref
; load parent scope sym entry
mov rdi, [rsp + 8] ; *Ctx
mov rsi, rax ; index
call vec_get
mov rbx, rax ; *SymEntry
mov rdx, [rax + 40] ; SymEntry.index (parent scope)
mov [rsp + 80], rdx ; update scope
jmp .var_ref
.epilogue:
add rsp, 96

24
lang/src/ast.inc Normal file
View file

@ -0,0 +1,24 @@
section .rdata
;; start-consts
AST_FUNCTION equ 1 ; :u8
AST_BLOCK equ 2 ; :u8
AST_VARIABLE equ 3 ; :u8
AST_NUMBER equ 4 ; :u8
AST_BINARY_OP equ 5 ; :u8
AST_RETURN_STATEMENT equ 6 ; :u8
AST_VALUE_TO_PLACE equ 7 ; :u8
AST_PLACE_TO_VALUE equ 8 ; :u8
AST_ASSIGNMENT equ 9 ; :u8
AST_DEREF equ 10 ; :u8
AST_ADDRESS_OF equ 11 ; :u8
AST_VAR_DECL equ 12 ; :u8
AST_VAR_REF equ 13 ; :u8
AST_ARG equ 14 ; :u8
TYPE_VOID equ 1 ; :u8
TYPE_BOOL equ 2 ; :u8
TYPE_I32 equ 3 ; :u8
TYPE_U32 equ 4 ; :u8
TYPE_STR equ 5 ; :u8
TYPE_POINTER equ 6 ; :u8
;; end-consts

321
lang/src/codegen.asm Normal file
View file

@ -0,0 +1,321 @@
default rel
%include "src/ast.inc"
extern panic
extern vec_extend
extern vec_get
extern vec_push
extern vec_init_with
global codegen_function
global get_register_name
section .rdata
SECTION_TEXT db "section .text", 10
SECTION_TEXT_LEN equ $ - SECTION_TEXT
GLOBAL_ db "global "
GLOBAL_LEN equ $ - GLOBAL_
COLON_NL db ":", 10
COLON_NL_LEN equ $ - COLON_NL
RET_NL db "ret", 10
RET_NL_LEN equ $ - RET_NL
PROLOGUE db "push rbp", 10, "mov rbp, rsp", 10
PROLOGUE_LEN equ $ - PROLOGUE
EPILOGUE db "mov rsp, rbp", 10, "pop rbp", 10, "ret", 10
EPILOGUE_LEN equ $ - EPILOGUE
REGISTER_NAMES db "abcdsidibpspr8r9r10r11r12r13r14r15"
WIDTHS db "erxliwdbp"
section .text
;; ```rust
;; use super::FFISlice;
;; ```
;; rdi: register index
;; rsi: register width (1=byte,2=word,4=dword,8=qword)
;; rdx: *mut u8 (buffer, at least 4 bytes)
;; define-fn: fn get_register_name(reg_idx: u8, width: u8, buffer: *mut u8) -> FFISlice
get_register_name:
push rbp
mov rbp, rsp
push rdx
push rdi
cmp rdi, 8
jge .skip_prefix
mov rcx, 0
cmp rsi, 8
mov rax, 'r'
cmove rcx, rax
cmp rsi, 4
mov rax, 'e'
cmove rcx, rax
cmp cl, 0
je .skip_prefix
mov byte [rdx], cl
inc rdx
.skip_prefix:
push rsi
call write_register_name
pop rsi
mov rdi, [rsp]
cmp rdi, 4
jge .check81
cmp rsi, 2
jl .check81
mov byte [rdx], 'x'
inc rdx
jmp .done
.check81:
cmp rdi, 8
jge .ext_suffix
cmp rsi, 1
jne .done
mov byte [rdx], 'l'
inc rdx
jmp .done
.ext_suffix:
mov rcx, 0
cmp rsi, 4
mov rax, 'd'
cmove rcx, rax
cmp rsi, 2
mov rax, 'w'
cmove rcx, rax
cmp rsi, 1
mov rax, 'b'
cmove rcx, rax
cmp rcx, 0
je .done
mov byte [rdx], cl
inc rdx
.done:
pop rdi
pop rax
xchg rax, rdx
sub rax, rdx
xchg rax, rdx
pop rbp
ret
.panic:
call panic
;; rdi: register index
;; rdx: *mut u8 (buffer, at least 2 bytes)
write_register_name:
cmp rdi, 4
jl .abcd
cmp rdi, 10
jl .two_digit
; 10,11,12,13,14,15
lea rsi, [rel REGISTER_NAMES + 16]
sub rdi, 10
lea rsi, [rsi + rdi * 2]
add rsi, rdi
mov al, [rsi + 0]
mov [rdx], al
inc rdx
mov al, [rsi + 1]
mov [rdx], al
inc rdx
mov al, [rsi + 2]
mov [rdx], al
inc rdx
jmp .done
.two_digit:
lea rsi, [rel REGISTER_NAMES + 4]
sub rdi, 4
lea rsi, [rsi + rdi * 2]
mov al, [rsi + 0]
mov [rdx], al
inc rdx
mov al, [rsi + 1]
mov [rdx], al
inc rdx
jmp .done
.abcd:
lea rsi, [rel REGISTER_NAMES + 0]
lea rsi, [rsi + rdi * 1]
mov al, [rsi + 0]
mov [rdx], al
inc rdx
.done:
ret
;; rdi: ctx
;; rsi: a: *const (index, offset)
;; rdx: b: *const (index, offset)
;; define-fn: fn stackvar_cmp(a: *const (u64, u64), b: *const (u64, u64)) -> i32
stackvar_cmp:
push rbp
mov rbp, rsp
mov rax, [rdi + 0] ; a.index
mov rcx, [rsi + 0] ; b.index
cmp rax, 0
jl .less
jg .greater
xor rax, rax
jmp .done
.less:
mov rax, -1
jmp .done
.greater:
mov rax, 1
.done:
pop rbp
ret
;; start-structs
;; struct CodegenCtx {
;; ast: *const Ast,
;; text: Vec<u8>,
;; }
;; end-structs
;; rdi: *Ctx
;; rsi: function index
;; define-fn: fn codegen_function(ast: *const CodegenCtx, func_idx: u64) -> ()
codegen_function:
push rbp
mov rbp, rsp
push rbx
push r15
push r14
; scratch [88..104]
; register-bitset [72..88] [a,b,c,d,si,di,bp,sp,8,9,10,11,12,13,14,15]
; stack-vars: Vec<(index, offset)> [32..72]
; current_stack_size: [24..32]
; func_idx [16..24]
; ast [8..16]
; ctx [0..8]
sub rsp, 104
mov [rsp], rdi ; ctx
mov rax, [rdi]
mov [rsp + 8], rax ; ast
mov [rsp + 16], rsi ; func_idx
mov qword [rsp + 24], 0 ; current_stack_size = 0
lea rdi, [rsp + 32] ; stack-vars
mov rsi, 16 ; size_of::<(u64, u64)>
mov rdx, 0 ; drop = None
mov rcx, 16 ; initial capacity
call vec_init_with
bts qword [rsp + 72], 7 ; mark rsp as used
bts qword [rsp + 72], 6 ; mark rbp as used
; push "section .text\n"
mov rdi, [rsp] ; ctx
lea rdi, [rdi + 8] ; &ctx.text
lea rsi, [rel SECTION_TEXT]
mov rdx, SECTION_TEXT_LEN
call vec_extend
mov rdi, [rsp + 8] ; ast
mov rsi, [rsp + 16] ; func_idx
call vec_get
cmp byte [rax + 0], AST_FUNCTION ; AstNode.kind
mov rbx, [rax + 8] ; AstNode.data
jne .panic
; push "global {function_name}\n"
mov rdi, [rsp] ; ctx
lea rdi, [rdi + 8] ; &ctx.text
lea rsi, [rel GLOBAL_]
mov rdx, GLOBAL_LEN
call vec_extend
; get function name
mov rdi, [rsp] ; ctx
lea rdi, [rdi + 8] ; &ctx.text
mov rsi, [rbx + 0] ; AstFunction.name
mov rdx, [rbx + 8] ; AstFunction.name_len
call vec_extend
; push "\n"
mov rdi, [rsp] ; ctx
lea rdi, [rdi + 8] ; &ctx.text
lea rsi, [rel COLON_NL]
inc rsi
mov rdx, 1
call vec_extend
; push "{function_name}:\n"
mov rdi, [rsp] ; ctx
lea rdi, [rdi + 8] ; &ctx.text
mov rsi, [rbx + 0] ; AstFunction.name
mov rdx, [rbx + 8] ; AstFunction.name_len
call vec_extend
; push ":\n"
mov rdi, [rsp] ; ctx
lea rdi, [rdi + 8] ; &ctx.text
lea rsi, [rel COLON_NL]
mov rdx, 2
call vec_extend
; push prologue
mov rdi, [rsp] ; ctx
lea rdi, [rdi + 8] ; &ctx.text
lea rsi, [rel PROLOGUE]
mov rdx, PROLOGUE_LEN
call vec_extend
; allocate args on stack
; rbx = *AstFunction
mov r15, [rbx + 24] ; AstFunction.args_len
xor r14, r14 ; arg index
.arg_loop:
cmp r14, r15
jge .arg_loop_done
mov rax, [rbx + 16] ; AstFunction.args
lea rsi, [rax + r14 * 8] ;
mov rsi, [rsi] ; AstFunction.args[i]
mov [rsp + 88], rsi
mov rax, [rsp + 24] ; current_stack_size
mov [rsp + 96], rax
add rax, 8 ; size_of::<u64>
mov [rsp + 24], rax ; current_stack_size += size_of::<u64>
lea rdi, [rsp + 32] ; stack-vars
lea rsi, [rsp + 88] ; &(index, offset)
call vec_push
inc r14
jmp .arg_loop
.arg_loop_done:
mov rdi, [rsp] ; ctx
lea rsi, [rsp + 24] ; &function_ctx
call codegen_block
; TODO: generate function body
; push "ret\n"
mov rdi, [rsp] ; ctx
lea rdi, [rdi + 8] ; &ctx.text
lea rsi, [rel RET_NL]
mov rdx, RET_NL_LEN
call vec_extend
add rsp, 104
pop r15
pop r14
pop rbx
pop rbp
ret
.panic:
call panic
;; rdi: ctx
;; rsi: &function_ctx
codegen_block:
ret

View file

@ -31,8 +31,6 @@ global is_whitespace
global is_id_continue
global is_id_start
extern panic
;; ==============================
;; Helper functions
;; ==============================
@ -446,3 +444,8 @@ is_whitespace:
mov rax, 1
ret
extern panic_impl
global panic
panic:
and rsp, -16
call panic_impl

View file

@ -548,6 +548,7 @@ expect_token:
pop rbp
ret
;; Returns the next token if it matches the expected token, else panics
;; dil: expected token
unwrap_token:
push rbp
@ -561,6 +562,7 @@ unwrap_token:
call panic
;; returns 0 if token not found, else returns lexeme (ptr, len)
;; doesn't advance the cursor
;; dil: expected token
peek_expect_token:
push rbp
@ -573,6 +575,7 @@ peek_expect_token:
pop rbp
ret
;; returns the next lexeme without advancing the cursor
;; rdi: out-struct pointer
peek_lexeme:
push rbp

View file

@ -12,6 +12,7 @@ extern allocate
global vec_init
global vec_init_with
global vec_extend
global vec_push
global vec_pop
global vec_drop_last
@ -29,13 +30,15 @@ global vec_tests
;; Byte vector structure
;; struct Vec {
;; start-structs
;; struct BlobVec {
;; data: *mut u8,
;; len: usize,
;; capacity: usize,
;; item_size: usize,
;; drop: Option<fn(*mut u8)>,
;; cap: usize,
;; elem_size: usize,
;; drop: Option<extern "C" fn(*mut u8)>,
;; }
;; end-structs
;; size: 40 bytes
;; align: 8 bytes
@ -629,3 +632,49 @@ vec_insert_sorted:
add rsp, 0x18
pop rbp
ret
;; rdi: *Vec
;; rsi: *const u8
;; rdx: number of elements
;; define-fn: fn vec_extend(vec: *mut BlobVec, elements: *const u8, count: usize) -> ()
vec_extend:
push rbp
mov rbp, rsp
; bytes [24..32]
; count [16..24]
; elements [8..16]
; vec [0..8]
sub rsp, 32
mov [rsp], rdi ; vec
mov [rsp + 8], rsi ; elements
mov [rsp + 16], rdx ; count
mov rax, [rdi + 24] ; item_size
mul rdx ; count * item_size
mov [rsp + 24], rax ; bytes
mov rsi, [rsp + 16] ; count
add rsi, [rdi + 8] ; vec.len + count
call vec_try_grow
mov rdi, [rsp] ; vec
mov rsi, [rdi + 8] ; vec.len
mov rax, [rdi + 24] ; item_size
mul rsi ; vec.len * item_size
add rax, [rdi] ; vec.data + vec.len * item_size
mov rdi, rax ; dest
mov rsi, [rsp + 8] ; elements
mov rdx, [rsp + 24] ; bytes
call memcpy
mov rdi, [rsp] ; vec
mov rax, [rdi + 8] ; vec.len
add rax, [rsp + 16] ; vec.len + count
mov [rdi + 8], rax
add rsp, 32
pop rbp
ret

View file

@ -3,6 +3,9 @@
#[path = "shared/shared.rs"]
mod util;
#[path = "shared/ast_debug.rs"]
mod ast_debug;
unsafe extern "C" {
unsafe fn bump_init();
@ -57,28 +60,29 @@ fn main() {
};
}
// print_ast(b"3 + 4", |ast| unsafe { parse_expr(ast) });
// print_ast(b"fn main() -> void { return 1 + 2; }", |ast| unsafe {
// parse_func(ast)
// });
// print_ast(b"fn main() -> void { return (1 + (2)); }", |ast| unsafe {
// parse_func(ast)
// });
// print_ast(
// b"fn main() -> void { return (1 + (2 * 3)) / 4; }",
// |ast| unsafe { parse_func(ast) },
// );
// print_ast(b"fn main() -> void { return 1 + 2 * 3; }", |ast| unsafe {
// parse_func(ast)
// });
print_ast(b"3 + 4", |ast| unsafe { parse_expr(ast) });
print_ast(b"fn main() -> void { return 1 + 2; }", |ast| unsafe {
parse_func(ast)
});
print_ast(
b"fn main() -> void { ;;;return (1 + (2)); }",
|ast| unsafe { parse_func(ast) },
);
print_ast(
b"fn main() -> void { return (1 + (2 * 3)) / 4; }",
|ast| unsafe { parse_func(ast) },
);
print_ast(b"fn main() -> void { return 1 + 2 * 3; }", |ast| unsafe {
parse_func(ast)
});
// print_ast(b"fn main() -> void { let x: u32 = 4; }", |ast| unsafe {
// parse_func(ast)
// });
// print_ast(
// b"fn main(a: u32) -> void { let x: u32 = a + 4; }",
// |ast| unsafe { parse_func(ast) },
// );
print_ast(b"fn main() -> void { let x: u32 = 4; }", |ast| unsafe {
parse_func(ast)
});
print_ast(
b"fn main(a: u32) -> void { let x: u32 = a + 4; }",
|ast| unsafe { parse_func(ast) },
);
print_ast(
b"fn main(a: u32) -> void {
let y: u32 = a + 4;
@ -87,177 +91,15 @@ return *y;
}",
|ast| unsafe { parse_func(ast) },
);
print_ast(
b"fn main(a: u32) -> void {
let y: u32 = a + 4;
{
let y: u32 = 10;
}
impl std::fmt::Display for AstNode {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
use util::defs::{
BinaryExpr, AST_ADDRESS_OF, AST_ARG, AST_ASSIGNMENT, AST_BINARY_OP, AST_BLOCK,
AST_DEREF, AST_FUNCTION, AST_NUMBER, AST_PLACE_TO_VALUE, AST_RETURN_STATEMENT,
AST_VALUE_TO_PLACE, AST_VAR_DECL, AST_VAR_REF,
};
match self.kind {
AST_NUMBER => {
write!(f, "Number({})", self.data as usize)
}
AST_DEREF => {
write!(f, "Deref(expr: {})", self.data as usize)
}
AST_ADDRESS_OF => {
write!(f, "AddressOf(expr: {})", self.data as usize)
}
AST_ARG => {
let arg = unsafe { self.data.cast::<util::defs::AstArgument>().read() };
write!(
f,
"Arg(name: {:?}, arg_type: {})",
unsafe {
std::str::from_utf8(std::slice::from_raw_parts(arg.name, arg.name_len))
},
arg.arg_type,
)
}
AST_VAR_REF => {
let var_ref = unsafe { self.data.cast::<util::defs::AstVarRef>().read() };
if var_ref.resolved != u64::MAX {
write!(f, "VarRef({})", var_ref.resolved)
} else {
write!(f, "VarRef(name: {:?})", unsafe {
std::str::from_utf8(std::slice::from_raw_parts(
var_ref.name,
var_ref.name_len,
))
},)
}
}
AST_VAR_DECL => {
let var_decl = unsafe { self.data.cast::<util::defs::AstVarDecl>().read() };
write!(
f,
"VarDecl(name: {:?}, var_type: {})",
unsafe {
std::str::from_utf8(std::slice::from_raw_parts(
var_decl.name,
var_decl.name_len,
))
},
var_decl.var_type,
)
}
AST_ASSIGNMENT => {
write!(
f,
"Assignment(dest: {}, src: {})",
self.data as usize, self.extra
)
}
AST_BINARY_OP => {
let BinaryExpr {
left,
operator,
right,
} = unsafe { self.data.cast::<util::defs::BinaryExpr>().read() };
write!(
f,
"BinaryOp(op: {}, left: {}, right: {})",
operator, left, right
)
}
AST_RETURN_STATEMENT => {
let return_expr_id = self.data as usize;
write!(f, "ReturnStatement(expr: {})", return_expr_id)
}
AST_FUNCTION => {
let func = unsafe { self.data.cast::<util::defs::AstFunction>().read() };
write!(
f,
"Function(name: {:?}, args: {:?}, return_type: {}, body: {})",
unsafe {
std::str::from_utf8(std::slice::from_raw_parts(func.name, func.name_len))
},
unsafe {
std::slice::from_raw_parts(func.args.cast::<u64>(), func.args_len as usize)
},
func.return_type,
func.body
)
}
AST_BLOCK => {
write!(f, "Block(statements: {:?})", unsafe {
std::slice::from_raw_parts(self.data.cast::<u64>(), self.extra as usize)
})
}
AST_PLACE_TO_VALUE => {
write!(f, "PlaceToValue(place: {})", self.data as usize)
}
AST_VALUE_TO_PLACE => {
write!(f, "ValueToPlace(value: {})", self.data as usize)
}
kind => write!(f, "UnknownNode(kind: {kind})"),
}
}
}
impl core::fmt::Display for Ast {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
writeln!(f, "[")?;
for (i, item) in self.nodes.as_slice().iter().enumerate() {
if i > 0 {
writeln!(f, ", ")?;
}
write!(f, "\t{i}: {}", item)?;
}
writeln!(f, "\n]")
}
}
impl core::fmt::Display for util::defs::SymEntry {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
f.debug_struct("SymEntry")
.field_with("key", |f| {
f.debug_struct("Key")
.field_with("kind", |f| {
f.write_str(match self.key.kind {
util::defs::SYM_KEY_SCOPE => "Scope",
util::defs::SYM_KEY_SCOPE_NAME => "ScopeName",
util::defs::SYM_KEY_PARENT_SCOPE => "ParentScope",
util::defs::SYM_KEY_ARG => "Argument",
util::defs::SYM_KEY_VAR => "Variable",
_ => "Unknown",
})
})
.field("scope", &self.key.scope_index)
.field("span", &self.key.span)
.field_with("ident", |f| {
f.write_str(unsafe {
&core::str::from_utf8_unchecked(core::slice::from_raw_parts(
self.key.ident,
self.key.ident_len,
))
})
})
.finish()
})
.field_with("value", |f| {
let stct = &mut f.debug_struct("Value");
if self.extra == 0 {
stct.field("ast_index", &self.index).finish()
} else if self.index != 0 {
stct.field_with("ident", |f| {
f.write_str(unsafe {
core::str::from_utf8_unchecked(core::slice::from_raw_parts(
self.index as *const u8,
self.extra as usize,
))
})
})
.finish()
} else {
stct.field("index", &self.index)
.field("extra", &self.extra)
.finish()
}
})
.finish()
}
let y: *u32 = &y;
return *y;
}",
|ast| unsafe { parse_func(ast) },
);
}

102
lang/tests/codegen.rs Normal file
View file

@ -0,0 +1,102 @@
#![feature(debug_closure_helpers)]
#[path = "shared/shared.rs"]
mod util;
#[path = "shared/ast_debug.rs"]
mod ast_debug;
use util::defs::{parse_func, Ast, AstNode};
unsafe extern "C" {
unsafe fn bump_init();
unsafe fn tokeniser_init_buf(bytes: *const u8, len: usize) -> ();
}
fn main() {
unsafe {
bump_init();
}
println!("Bump allocator initialized.");
unsafe {
let mut buf = [0u8; 4];
assert_eq!(
util::defs::get_register_name(0, 4, buf.as_mut_ptr()).as_str(),
"eax"
);
assert_eq!(
util::defs::get_register_name(7, 8, buf.as_mut_ptr()).as_str(),
"rsp"
);
assert_eq!(
util::defs::get_register_name(7, 4, buf.as_mut_ptr()).as_str(),
"esp"
);
assert_eq!(
util::defs::get_register_name(7, 2, buf.as_mut_ptr()).as_str(),
"sp"
);
assert_eq!(
util::defs::get_register_name(7, 1, buf.as_mut_ptr()).as_str(),
"spl"
);
assert_eq!(
util::defs::get_register_name(9, 1, buf.as_mut_ptr()).as_str(),
"r9b"
);
assert_eq!(
util::defs::get_register_name(12, 1, buf.as_mut_ptr()).as_str(),
"r12b"
);
assert_eq!(
util::defs::get_register_name(12, 4, buf.as_mut_ptr()).as_str(),
"r12d"
);
assert_eq!(
util::defs::get_register_name(8, 2, buf.as_mut_ptr()).as_str(),
"r8w"
);
}
fn print_ast(src: &[u8], parser: impl FnOnce(&mut Ast) -> u64) {
unsafe {
tokeniser_init_buf(src.as_ptr(), src.len());
let mut ast = Ast {
nodes: util::vec::Vec::new(),
};
let expr_id = parser(&mut ast);
eprintln!("Parsed expression ID: {}", expr_id);
let mut symtable = core::mem::MaybeUninit::<util::defs::SymbolTable>::uninit();
util::defs::ast_build_symtable(&mut ast, expr_id, &mut symtable);
let mut symtable = symtable.assume_init();
util::defs::ast_resolve_var_refs(&mut ast, &mut symtable, expr_id);
println!("{:#}", &ast);
let mut codegen = util::defs::CodegenCtx {
ast: &mut ast,
text: util::vec::Vec::new(),
};
util::defs::codegen_function(&mut codegen, expr_id);
println!(
"Generated code:\n{}",
core::str::from_utf8(codegen.text.as_slice()).unwrap()
);
};
}
// print_ast(
// b"fn main(a: u32) -> void {
// let y: u32 = a + 4;
// {
// let y: u32 = 10;
// }
// let y: *u32 = &y;
// return *y;
// }",
// |ast| unsafe { parse_func(ast) },
// );
}

View file

@ -0,0 +1,174 @@
use super::util;
impl core::fmt::Display for util::defs::AstNode {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
use util::defs::{
BinaryExpr, AST_ADDRESS_OF, AST_ARG, AST_ASSIGNMENT, AST_BINARY_OP, AST_BLOCK,
AST_DEREF, AST_FUNCTION, AST_NUMBER, AST_PLACE_TO_VALUE, AST_RETURN_STATEMENT,
AST_VALUE_TO_PLACE, AST_VAR_DECL, AST_VAR_REF,
};
match self.kind {
AST_NUMBER => {
write!(f, "Number({})", self.data as usize)
}
AST_DEREF => {
write!(f, "Deref(expr: {})", self.data as usize)
}
AST_ADDRESS_OF => {
write!(f, "AddressOf(expr: {})", self.data as usize)
}
AST_ARG => {
let arg = unsafe { self.data.cast::<util::defs::AstArgument>().read() };
write!(
f,
"Arg(name: {:?}, arg_type: {})",
unsafe {
std::str::from_utf8(std::slice::from_raw_parts(arg.name, arg.name_len))
},
arg.arg_type,
)
}
AST_VAR_REF => {
let var_ref = unsafe { self.data.cast::<util::defs::AstVarRef>().read() };
if var_ref.resolved != u64::MAX {
write!(f, "VarRef({})", var_ref.resolved)
} else {
write!(f, "VarRef(name: {:?})", unsafe {
std::str::from_utf8(std::slice::from_raw_parts(
var_ref.name,
var_ref.name_len,
))
},)
}
}
AST_VAR_DECL => {
let var_decl = unsafe { self.data.cast::<util::defs::AstVarDecl>().read() };
write!(
f,
"VarDecl(name: {:?}, var_type: {})",
unsafe {
std::str::from_utf8(std::slice::from_raw_parts(
var_decl.name,
var_decl.name_len,
))
},
var_decl.var_type,
)
}
AST_ASSIGNMENT => {
write!(
f,
"Assignment(dest: {}, src: {})",
self.data as usize, self.extra
)
}
AST_BINARY_OP => {
let BinaryExpr {
left,
operator,
right,
} = unsafe { self.data.cast::<util::defs::BinaryExpr>().read() };
write!(
f,
"BinaryOp(op: {}, left: {}, right: {})",
operator, left, right
)
}
AST_RETURN_STATEMENT => {
let return_expr_id = self.data as usize;
write!(f, "ReturnStatement(expr: {})", return_expr_id)
}
AST_FUNCTION => {
let func = unsafe { self.data.cast::<util::defs::AstFunction>().read() };
write!(
f,
"Function(name: {:?}, args: {:?}, return_type: {}, body: {})",
unsafe {
std::str::from_utf8(std::slice::from_raw_parts(func.name, func.name_len))
},
unsafe {
std::slice::from_raw_parts(func.args.cast::<u64>(), func.args_len as usize)
},
func.return_type,
func.body
)
}
AST_BLOCK => {
write!(f, "Block(statements: {:?})", unsafe {
std::slice::from_raw_parts(self.data.cast::<u64>(), self.extra as usize)
})
}
AST_PLACE_TO_VALUE => {
write!(f, "PlaceToValue(place: {})", self.data as usize)
}
AST_VALUE_TO_PLACE => {
write!(f, "ValueToPlace(value: {})", self.data as usize)
}
kind => write!(f, "UnknownNode(kind: {kind})"),
}
}
}
impl core::fmt::Display for util::defs::Ast {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
writeln!(f, "[")?;
for (i, item) in self.nodes.as_slice().iter().enumerate() {
if i > 0 {
writeln!(f, ", ")?;
}
write!(f, "\t{i}: {}", item)?;
}
write!(f, "\n]")
}
}
impl core::fmt::Display for util::defs::SymEntry {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
f.debug_struct("SymEntry")
.field_with("key", |f| {
f.debug_struct("Key")
.field_with("kind", |f| {
f.write_str(match self.key.kind {
util::defs::SYM_KEY_SCOPE => "Scope",
util::defs::SYM_KEY_SCOPE_NAME => "ScopeName",
util::defs::SYM_KEY_PARENT_SCOPE => "ParentScope",
util::defs::SYM_KEY_ARG => "Argument",
util::defs::SYM_KEY_VAR => "Variable",
_ => "Unknown",
})
})
.field("scope", &self.key.scope_index)
.field("span", &self.key.span)
.field_with("ident", |f| {
f.write_str(unsafe {
&core::str::from_utf8_unchecked(core::slice::from_raw_parts(
self.key.ident,
self.key.ident_len,
))
})
})
.finish()
})
.field_with("value", |f| {
let stct = &mut f.debug_struct("Value");
if self.extra == 0 {
stct.field("ast_index", &self.index).finish()
} else if self.index != 0 {
stct.field_with("ident", |f| {
f.write_str(unsafe {
core::str::from_utf8_unchecked(core::slice::from_raw_parts(
self.index as *const u8,
self.extra as usize,
))
})
})
.finish()
} else {
stct.field("index", &self.index)
.field("extra", &self.extra)
.finish()
}
})
.finish()
}
}

View file

@ -17,8 +17,19 @@ unsafe extern "C" {
pub unsafe fn ast_build_symtable(ast: *mut Ast, root_index: u64, symtable: *mut core::mem::MaybeUninit<SymbolTable>);
pub unsafe fn ast_walk_for_each(ast: *mut Ast, start_index: u64, ctx: *mut (), for_each: unsafe extern "C" fn(ctx: *mut (), *mut Ast, node_index: u64, scope: u64));
pub unsafe fn ast_resolve_var_refs(ast: *mut Ast, ctx: *mut SymbolTable, root_index: u64);
pub unsafe fn get_register_name(reg_idx: u8, width: u8, buffer: *mut u8) -> FFISlice;
pub unsafe fn stackvar_cmp(a: *const (u64, u64), b: *const (u64, u64)) -> i32;
pub unsafe fn codegen_function(ast: *const CodegenCtx, func_idx: u64) -> ();
pub unsafe fn vec_extend(vec: *mut BlobVec, elements: *const u8, count: usize) -> ();
}
pub const SYM_KEY_SCOPE: u8 = 1;
pub const SYM_KEY_SCOPE_NAME: u8 = 2;
pub const SYM_KEY_PARENT_SCOPE: u8 = 3;
pub const SYM_KEY_START_LOCALS: u8 = 4;
pub const SYM_KEY_ARG: u8 = 5;
pub const SYM_KEY_VAR: u8 = 6;
pub const SYM_KEY_END_LOCALS: u8 = 7;
pub const AST_FUNCTION: u8 = 1;
pub const AST_BLOCK: u8 = 2;
pub const AST_VARIABLE: u8 = 3;
@ -39,13 +50,6 @@ pub const TYPE_I32: u8 = 3;
pub const TYPE_U32: u8 = 4;
pub const TYPE_STR: u8 = 5;
pub const TYPE_POINTER: u8 = 6;
pub const SYM_KEY_SCOPE: u8 = 1;
pub const SYM_KEY_SCOPE_NAME: u8 = 2;
pub const SYM_KEY_PARENT_SCOPE: u8 = 3;
pub const SYM_KEY_START_LOCALS: u8 = 4;
pub const SYM_KEY_ARG: u8 = 5;
pub const SYM_KEY_VAR: u8 = 6;
pub const SYM_KEY_END_LOCALS: u8 = 7;
pub const TOKEN_EOF: u8 = 0;
pub const TOKEN_LET: u8 = 1;
pub const TOKEN_IF: u8 = 2;
@ -190,4 +194,23 @@ pub struct SymEntry {
pub extra: u64,
}
#[repr(C)]
#[derive(Debug)]
pub struct CodegenCtx {
pub ast: *const Ast,
pub text: Vec<u8>,
}
#[repr(C)]
#[derive(Debug)]
pub struct BlobVec {
pub data: *mut u8,
pub len: usize,
pub cap: usize,
pub elem_size: usize,
pub drop: Option<extern "C" fn(*mut u8)>,
}
use super::vec::Vec;
use super::FFISlice;

View file

@ -9,7 +9,7 @@ fn __do_panic() -> ! {
}
#[unsafe(no_mangle)]
extern "C" fn panic() -> ! {
extern "C" fn panic_impl() -> ! {
__do_panic()
}
@ -52,15 +52,7 @@ impl FFISlice {
}
}
#[repr(C)]
#[derive(Debug)]
pub struct BlobVec {
pub data: *mut u8,
pub len: usize,
pub cap: usize,
pub elem_size: usize,
pub drop: Option<extern "C" fn(*mut u8)>,
}
pub use defs::BlobVec;
impl Default for BlobVec {
fn default() -> Self {
@ -135,6 +127,18 @@ pub mod vec {
unsafe { core::slice::from_raw_parts_mut(self.vec.data as *mut T, self.vec.len) }
}
pub fn extend(&mut self, elements: Box<[T]>) {
unsafe {
let elements =
core::mem::transmute::<Box<[T]>, Box<[core::mem::ManuallyDrop<T>]>>(elements);
super::defs::vec_extend(
&mut self.vec,
elements.as_ptr() as *const u8,
elements.len(),
);
}
}
pub fn push(&mut self, value: T) {
let value = core::mem::ManuallyDrop::new(value);
unsafe {

View file

@ -106,6 +106,11 @@ fn main() {
assert_eq!(vec.as_slice(), &[20, 30, 35, 40, 50]);
let mut vec = Vec::<u32>::new_with(100);
vec.insert_sorted(50, cmp);
_ = vec.insert_sorted(50, cmp);
assert_eq!(vec.as_slice(), &[50]);
// vec extend
let elements = Box::new([1, 2, 3, 4, 5]);
vec.extend(elements);
assert_eq!(vec.as_slice(), &[50, 1, 2, 3, 4, 5]);
}