safety commit

This commit is contained in:
janis 2025-10-31 22:14:44 +01:00
parent d144537c3b
commit 9509931bc2
Signed by: janis
SSH key fingerprint: SHA256:bB1qbbqmDXZNT0KKD5c2Dfjg53JGhj7B3CFcLIzSqq8

View file

@ -25,6 +25,10 @@ section .rdata
PROLOGUE_LEN equ $ - PROLOGUE
EPILOGUE db "mov rsp, rbp", 10, "pop rbp", 10, "ret", 10
EPILOGUE_LEN equ $ - EPILOGUE
MOV_RAX db "mov rax, "
MOV_RAX_LEN equ $ - MOV_RAX
JMP_EPILOGUE db 10, "jmp .epilogue", 10
JMP_EPILOGUE_LEN equ $ - JMP_EPILOGUE
REGISTER_NAMES db "abcdsidibpspr8r9r10r11r12r13r14r15"
WIDTHS db "erxliwdbp"
@ -179,8 +183,73 @@ stackvar_cmp:
;; ast: *const Ast,
;; text: Vec<u8>,
;; }
;; struct FunctionCtx {
;; current_stack_size: u64,
;; stack_vars: Vec<(u64, u64)>,
;; register_bitset: u128,
;; dirtied_register_bitset: u128,
;; }
;; end-structs
;; rdi: *FunctionCtx
;; define-fn: fn codegen_allocate_register(ctx: *mut FunctionCtx) -> u8
codegen_allocate_register:
push rbp
mov rbp, rsp
push rbx
; scan register_bitset for free register
; example: (rax, rbp and rsp are always reserved)
; register_bitset = 0b10000011_00000000
; to do this, we first invert the bitset
; register_bitset = 0b01111100_11111111
; then we find the first set bit
; first_bit = 2
; we now set the bit in the original bitset and mark the register as dirtied
; dirtied registers are those that have been used in the function and need
; to be saved/restored in the prologue/epilogue
mov rax, [rdi + 48] ; register_bitset
xor rcx, rcx
; flip bits
not rax
; find first set bit
bsf rcx, rax
test rcx, rcx
jnz .found
mov rax, [rdi + 56] ; higher 64 bits
not rax
bsf rcx, rax
test rcx, rcx
jz .panic
add rcx, 8
.found:
mov rbx, rcx
cmp rcx, 8
jl .set_low
sub rcx, 8
bts rax, rcx
mov [rdi + 56], rax ; update register_bitset
mov rax, [rdi + 72] ; dirtied_register_bitset
bts rax, rcx
mov [rdi + 72], rax ; update dirtied_register_bitset
jmp .done
.set_low:
bts rax, rcx
mov [rdi + 48], rax ; update register_bitset
mov rax, [rdi + 64] ; dirtied_register_bitset
bts rax, rcx
mov [rdi + 64], rax ; update dirtied_register_bitset
.done:
pop rax
pop rbp
ret
.panic:
; no free registers!
call panic
;; rdi: *Ctx
;; rsi: function index
;; define-fn: fn codegen_function(ast: *const CodegenCtx, func_idx: u64) -> ()
@ -191,14 +260,15 @@ codegen_function:
push r15
push r14
; scratch [88..104]
; scratch [104..120]
; dirtied-register-bitset [88..104] [a,b,c,d,si,di,bp,sp,8,9,10,11,12,13,14,15]
; register-bitset [72..88] [a,b,c,d,si,di,bp,sp,8,9,10,11,12,13,14,15]
; stack-vars: Vec<(index, offset)> [32..72]
; current_stack_size: [24..32]
; func_idx [16..24]
; ast [8..16]
; ctx [0..8]
sub rsp, 104
sub rsp, 120
mov [rsp], rdi ; ctx
mov rax, [rdi]
mov [rsp + 8], rax ; ast
@ -279,13 +349,13 @@ codegen_function:
lea rsi, [rax + r14 * 8] ;
mov rsi, [rsi] ; AstFunction.args[i]
mov [rsp + 88], rsi
mov [rsp + 104], rsi ; scratch
mov rax, [rsp + 24] ; current_stack_size
mov [rsp + 96], rax
mov [rsp + 112], rax
add rax, 8 ; size_of::<u64>
mov [rsp + 24], rax ; current_stack_size += size_of::<u64>
lea rdi, [rsp + 32] ; stack-vars
lea rsi, [rsp + 88] ; &(index, offset)
lea rsi, [rsp + 104] ; &scratch: &(index, offset)
mov rdx, stackvar_cmp
mov rcx, 0
call vec_insert_sorted
@ -308,7 +378,7 @@ codegen_function:
mov rdx, RET_NL_LEN
call vec_extend
add rsp, 104
add rsp, 120
pop r15
pop r14
pop rbx
@ -322,4 +392,409 @@ codegen_function:
;; rsi: &function_ctx
;; rdx: block index
codegen_block:
push rbp
mov rbp, rsp
push r15
push r14
push rbx
sub rsp, 16
mov [rsp], rdi ; ctx
mov [rsp + 8], rsi ; &function_ctx
mov rdi, [rdi] ; ast
mov rsi, rdx ; block index
call vec_get
mov 15, [rax + 8] ; AstNode.extra
mov rbx, [rax + 0] ; AstNode.data
xor r14, r14 ; statement index
.stmt_loop:
cmp r14, r15
jge .stmt_loop_done
mov rdi, [rsp] ; ctx
lea mov, [rsp + 8] ; &function_ctx
mov rdx, [rbx + r14 * 8] ; statements[i]
call codegen_statement
inc r14
jmp .stmt_loop
.stmt_loop_done:
add rsp, 16
pop rbx
pop r14
pop r15
pop rbp
ret
;; rdi: ctx
;; rsi: &function_ctx
;; rdx: expr index
;; returns: register index in rax if rdx=0, else stack-offset in rax
;; define-fn: fn codegen_expr(ctx: *const CodegenCtx, function_ctx: &FunctionCtx, expr_idx: u64) -> (u64, bool)
codegen_expr:
push rbp
mov rbp, rsp
push rbx
; scratch [16..32]
; function_ctx: [8..16]
; ctx [0..8]
sub rsp, 32
mov [rsp], rdi ; ctx
mov [rsp + 8], rsi ; &function_ctx
mov rdi, [rdi] ; ast
mov rsi, rdx ; statement index
call vec_get
; rax: *AstNode
mov rbx, [rax] ; AstNode.kind
cmp bl, AST_RETURN_STATEMENT
je .return
cmp bl, AST_BLOCK
je .block
cmp bl, AST_VAR_DECL
je .var_decl
cmp bl, AST_VAR_REF
je .var_ref
cmp bl, AST_NUMBER
je .number
cmp bl, AST_BINARY_OP
je .binary_op
cmp bl, AST_ASSIGNMENT
je .assignment
cmp bl, AST_PLACE_TO_VALUE
je .place_to_value
cmp bl, AST_VALUE_TO_PLACE
je .value_to_place
cmp bl, AST_DEREF
je .deref
cmp bl, AST_ADDRESS_OF
je .address_of
jmp .panic
.return:
; codegen inner expr
mov rdi, [rsp] ; ctx
mov rsi, [rsp + 8] ; &function_ctx
mov rdx, [rax + 8] ; AstNode.data
call codegen_expr
mov rbx, rax
mov rdi, [rsp] ; ctx
mov rdi, [rdi + 8] ; &ctx.text
lea rsi, [rel MOV_RAX]
mov rdx, MOV_RAX_LEN
call vec_extend
mov rdi, rbx
mov rsi, 8
lea rdx, [rsp + 16] ; scratch
call get_register_name
mov rdi, [rsp] ; ctx
mov rdi, [rdi + 8] ; &ctx.text
mov rsi, rax
call vec_extend
mov rdi, [rsp] ; ctx
mov rdi, [rdi + 8] ; &ctx.text
lea rsi, [rel JMP_EPILOGUE]
mov rdx, JMP_EPILOGUE_LEN
call vec_extend
mov rax, 0
jmp .done
.number:
; rax = *AstNode
mov [rsp + 16], rax ; scratch = *AstNode
mov rdi, [rsp + 8] ; &function_ctx
call codegen_allocate_register
; TODO
.done:
add rsp, 32
pop rbx
pop rbp
ret
.panic:
call panic
;; start-structs
;; struct Operand {
;; kind: u8
;; register_and_width: u8,
;; len: u16,
;; value: u64,
;; }
;; end-structs
;; or: register: u4, width: u4
section .rdata
;; start-consts
OPERAND_REGISTER db 1 ; e.g. rax, rbx
OPERAND_RBP_OFFSET db 2 ; e.g. [rbp - 8]
OPERAND_RSP_OFFSET db 3 ; e.g. [rsp + 16]
OPERAND_ADDRESS db 4 ; e.g. [rel OPERAND_ADDRESS]
OPERAND_IMMEDIATE db 5 ; e.g. 0x10
OPERAND_CONSTANT db 6 ; e.g. OPERAND_CONSTANT
OPERAND_LABEL db 7 ; e.g. label_1234
;; end-consts
WIDTH_BYTE db 'byte '
WIDTH_WORD db 'word '
WIDTH_DWORD db 'dword '
WIDTH_QWORD db 'qword '
;; rdi: *text
;; rsi: op: *Operand
codegen_write_operand:
push rbp
mov rbp, rsp
push rbx
; scratch [16..40]
; *operand [8..16]
; *text [0..8]
sub rsp, 40
mov [rsp], rdi ; *text
mov [rsp + 8], rsi ; op
mov bl, byte [rsi + 0] ; op.kind
cmp bl, OPERAND_REGISTER
je .register
cmp bl, OPERAND_RBP_OFFSET
je .rbp_offset
cmp bl, OPERAND_RSP_OFFSET
je .rsp_offset
cmp bl, OPERAND_ADDRESS
je .address
cmp bl, OPERAND_IMMEDIATE
je .immediate
cmp bl, OPERAND_CONSTANT
je .constant
cmp bl, OPERAND_LABEL
je .label
jmp .panic
.register:
mov rbx, rsi
mov rdi, [rbx + 1] ; register_and_width
mov rsi, rdi
mov dil, dil ; low 4 bits = register
shr rsi, 4 ; high 4 bits = width
lea rdx, [rsp + 16] ; buffer
call get_register_name
mov rdi, [rsp] ; *text
mov rsi, rax ; buffer
call vec_extend
jmp .epilogue
.rbp_offset:
.rsp_offset:
; {width} [rbp {+/-} offset]
mov rsi, [rsp + 8] ; op
mov sil, byte [rsi + 1] ; register_and_width
shr sil, 4 ; width
mov rdi, [rsp] ; *text
call codegen_write_width
mov rbx, rax
mov byte [rsp + 16], '['
mov rdi, [rsp] ; *text
lea rsi, [rsp + 16]
call vec_push
; if op.kind == OPERAND_RBP_OFFSET
mov rax, [rsp + 8] ; op
mov al, byte [rax + 0] ; op.kind
cmp al, OPERAND_RBP_OFFSET
jne .rsp_offset_write
mov qword [rsp + 16], ' pbr'
mov rdi, [rsp] ; *text
lea rsi, [rsp + 16]
mov rdx, 4
call vec_extend
jmp .check_sign
.rsp_offset_write:
mov qword [rsp + 16], ' psr'
mov rdi, [rsp] ; *text
lea rsi, [rsp + 16]
mov rdx, 4
call vec_extend
.check_sign:
; if value >= 0
mov rax, [rsp + 8] ; op
mov rdi, [rax + 8] ; op.value
cmp rdi, 0
jl .skip_plus
mov qword [rsp + 16], ' + '
mov rdi, [rsp] ; *text
lea rsi, [rsp + 16]
mov rdx, 3
call vec_extend
.skip_plus:
; write offset
mov rax, [rsp + 8] ; op
mov rdi, [rax + 8] ; op.value
lea rsi, [rsp + 16]
mov rdx, 24 ; max length
mov rcx, 10 ; radix
call int_to_str2
mov rdi, [rsp] ; *text
mov rsi, rax ; buffer
call vec_extend
mov byte [rsp + 16], ']'
mov rdi, [rsp] ; *text
lea rsi, [rsp + 16]
call vec_push
jmp .epilogue
.address:
mov byte [rsp + 16], '['
mov rdi, [rsp] ; *text
lea rsi, [rsp + 16]
call vec_push
mov qword [rsp + 16], ' ler'
mov rdi, [rsp] ; *text
lea rsi, [rsp + 16]
mov rdx, 4
call vec_extend
; write address name
mov rax, [rsp + 8] ; op
mov rdi, [rsp] ; *text
mov rsi, [rax + 8] ; op.value
mov rdx, [rax + 4] ; op.len
mov dx, dx ; low 16 bits
call vec_extend
mov byte [rsp + 16], ']'
mov rdi, [rsp] ; *text
lea rsi, [rsp + 16]
call vec_push
jmp .epilogue
.immediate:
; write immediate value
mov rax, [rsp + 8] ; op
mov rdi, [rax + 8] ; op.value
lea rsi, [rsp + 16]
mov rdx, 24 ; max length
mov rcx, 10 ; radix
call int_to_str2
mov rdi, [rsp] ; *text
mov rsi, rax ; buffer
call vec_extend
jmp .epilogue
.constant:
; write constant name
mov rax, [rsp + 8] ; op
mov rdi, [rsp] ; *text
mov rsi, [rax + 8] ; op.value
mov rdx, [rax + 4] ; op.len
mov dx, dx ; low 16 bits
call vec_extend
jmp .epilogue
.epilogue:
add rsp, 40
pop rbx
pop rbp
ret
.panic:
call panic
;; rdi: *text
;; rsi: width: u8
codegen_write_width:
cmp sil, 8
lea rdx, [rel WIDTH_QWORD]
mov rcx, 6
je .write
cmp sil, 4
lea rdx, [rel WIDTH_DWORD]
je .write
cmp sil, 2
lea rdx, [rel WIDTH_WORD]
mov rcx, 5
je .write
cmp sil, 1
lea rdx, [rel WIDTH_BYTE]
je .write
jmp .panic
.write:
mov rsi, rdx
mov rdx, rcx
push rdx
call vec_extend
pop rax ; length written
ret
;; rdi: *text
;; rsi: dst: *Operand
;; rdx: src: *Operand
codegen_move_dst_src:
push rbp
mov rbp, rsp
push rbx
sub rsp, 24
mov [rsp], rdi ; *text
mov [rsp + 8], rsi ; dst
mov [rsp + 16], rdx ; src
cmp byte [rsi + 0], OPERAND_ADDRESS
jg .panic ; unsupported dst kind
; if dst.width != src.width
mov cl, byte [rsi + 2] ; dst.register_and_width
mov bl, byte [rdx + 2] ; src.register_and_width
shr cl, 4
shr bl, 4
cmp cl, bl
jne .panic ; mismatched widths
; if dst.width == 8 && src.kind == OPERAND_ADDRESS
xor rbx, rbx
cmp cl, 8
cmovne rbx, [rdx + 0]
cmp bl, OPERAND_ADDRESS
je .panic ; address can only be moved to full-sized destinations
cmp byte [rsi + 0], OPERAND_REGISTER
je .do_move
cmp byte [rdx + 0], OPERAND_REGISTER
jne .xchg_rax ; if dst != register and src != register, xchg via rax
jmp .do_move
.xchg_rax:
; xchg rax, [src]
; mov [dst], rax
; xchg rax, [src]
.do_move:
.epilogue:
add rsp, 24
pop rbx
pop rbp
ret
.panic:
call panic