split current_stack_size u64 into current/max u32 to allow deallocating the top stack allocation

This commit is contained in:
janis 2025-11-01 17:39:17 +01:00
parent 16bdac93ad
commit 703aa299c8
Signed by: janis
SSH key fingerprint: SHA256:bB1qbbqmDXZNT0KKD5c2Dfjg53JGhj7B3CFcLIzSqq8
2 changed files with 71 additions and 40 deletions

View file

@ -271,7 +271,8 @@ stackvar_cmp:
;; text: Vec<u8>,
;; }
;; struct FunctionCtx {
;; current_stack_size: u64,
;; current_stack_size: u32,
;; max_stack_size: u32,
;; stack_vars: Vec<(u64, u64)>,
;; register_bitset: u16,
;; dirtied_register_bitset: u16,
@ -326,6 +327,38 @@ codegen_allocate_register:
pop rbp
ret
;; rdi: *FunctionCtx
;; rsi: width
;; define-fn: fn codegen_allocate_place(ctx: *mut FunctionCtx, width: u16) -> Operand
codegen_allocate_place:
push rbp
mov rbp, rsp
xor rdx, rdx
mov edx, dword [rdi + 0] ; current_stack_size
add edx, esi ; width
mov dword [rdi + 0], edx ; current_stack_size += width
mov eax, dword [rdi + 4] ; max_stack_size
cmp eax, edx
cmovb eax, edx
mov dword [rdi + 4], eax ; max_stack_size = max(max_stack_size, current_stack_size)
; construct Operand
xor rax, rax
mov eax, 0 ; Operand.len = 0
shl eax, 16
or eax, esi ; Operand.width
shl eax, 4
; or eax, 0 ; Operand.register = undef
shl eax, 8
or eax, OPERAND_RBP_OFFSET ; Operand.kind
neg rdx ; Operand.value = -current_stack_size
.done:
pop rbp
ret
;; rdi: *FunctionCtx
;; rsi: width
;; define-fn: fn codegen_allocate_value(ctx: *mut FunctionCtx, width: u16) -> Operand
@ -365,21 +398,7 @@ codegen_allocate_value:
jmp .done
.alloc_stack:
mov rdi, [rsp] ; ctx
mov rdx, [rdi + 0] ; current_stack_size
add rdx, [rsp + 8] ; width
mov [rdi + 0], rdx ; current_stack_size += width
; construct Operand
xor rax, rax
mov eax, 0 ; Operand.len = 0
shl eax, 16
mov rsi, [rsp + 8] ; width
or eax, esi ; Operand.width
shl eax, 4
; or eax, 0 ; Operand.register = undef
shl eax, 8
or eax, OPERAND_RBP_OFFSET ; Operand.kind
call codegen_allocate_place
.done:
add rsp, 16
@ -407,16 +426,17 @@ codegen_free_operand:
jmp .done
.free_stack:
mov rbx, [rdi + 0] ; current_stack_size
xor rbx, rbx
mov ebx, dword [rdi + 0] ; current_stack_size
mov rax, [rsi + 8] ; Operand.value
cmp rbx, rax
jne .done ; operand not at top of stack, can't free
mov al, byte [rsi + 1] ; Operand.width
shl al, 4
shr al, 4
movzx rax, al
sub rbx, rax
mov [rdi + 0], rbx ; current_stack_size -= width
mov dword [rdi + 0], ebx ; current_stack_size -= width
jmp .done
.free_reg:
@ -452,7 +472,8 @@ codegen_function:
; dirtied-register-bitset [76..80] [a,b,c,d,si,di,bp,sp,8,9,10,11,12,13,14,15]
; register-bitset [72..76] [a,b,c,d,si,di,bp,sp,8,9,10,11,12,13,14,15]
; stack-vars: Vec<(index, offset)> [32..72]
; current_stack_size: [24..32]
; max_stack_size: [28..32]
; current_stack_size: [24..28]
; func_idx [16..24]
; ast [8..16]
; ctx [0..8]
@ -461,7 +482,7 @@ codegen_function:
mov rax, [rdi]
mov [rsp + 8], rax ; ast
mov [rsp + 16], rsi ; func_idx
mov qword [rsp + 24], 0 ; current_stack_size = 0
mov qword [rsp + 24], 0 ; current_stack_size = 0, max_stack_size = 0
lea rdi, [rsp + 32] ; stack-vars
mov rsi, 16 ; size_of::<(u64, u64)>
@ -469,10 +490,12 @@ codegen_function:
mov rcx, 16 ; initial capacity
call vec_init_with
bts word [rsp + 72], 7 ; mark rsp as used
bts word [rsp + 72], 6 ; mark rbp as used
bts word [rsp + 72], 0 ; mark rax as used
mov word [rsp + 76], 0 ; dirtied_register_bitset = 0
bts word [rsp + 72], 7 ; mark rsp as used
bts word [rsp + 72], 6 ; mark rbp as used
bts word [rsp + 72], 0 ; mark rax as used
mov word [rsp + 76], 0 ; dirtied_register_bitset = 0
mov word [rsp + 72], -1 ; reserve all registers (test)
; push "section .text\n"
mov rdi, [rsp] ; ctx
@ -554,11 +577,18 @@ codegen_function:
lea rsi, [rax + r14 * 8] ;
mov rsi, [rsi] ; AstFunction.args[i]
mov [rsp + 104], rsi ; scratch
mov rax, [rsp + 24] ; current_stack_size
add rax, 8 ; size_of::<u64>
mov [rsp + 24], rax ; current_stack_size += size_of::<u64>
mov [rsp + 112], rax
mov [rsp + 104], rsi ; &(index, _)
lea rdi, [rsp + 24] ; &function_ctx
; TODO: get arg type size
mov rsi, 8 ; size_of::<u64>
call codegen_allocate_place
; rdx = stack offset
mov [rsp + 80], rax ; Operand
mov [rsp + 88], rdx ; Operand.value
neg rdx
mov [rsp + 112], rdx ; &(_, offset)
lea rdi, [rsp + 32] ; stack-vars
lea rsi, [rsp + 104] ; &scratch: &(index, offset)
mov rdx, stackvar_cmp
@ -567,17 +597,16 @@ codegen_function:
; spill arg from register to newly allocated stack slot
; get source Operand
mov r13, [rsp + 112] ; current_stack_size before increment
mov rdi, [rsp + 104] ; arg index
mov rdi, r14 ; arg index
call codegen_arg_to_operand
mov [rsp + 104], rax
mov [rsp + 112], rdx
mov rdx, r13 ; offset
neg rdx
mov rdi, [rsp] ; ctx
lea rdi, [rdi + 8] ; &ctx.text
lea rsi, [rsp + 104] ; src
call codegen_move_rbp_slot_src
lea rdx, [rsp + 80] ; dst
call codegen_move_dst_src
inc r14
jmp .arg_loop
@ -614,7 +643,7 @@ codegen_function:
mov rdx, 1 ; pop = true
call codegen_push_pop_dirtied_registers
; "add rsp, {current_stack_size}\n"
; "add rsp, {max_stack_size}\n"
mov rdi, [rsp] ; ctx
lea rdi, [rdi + 8] ; &ctx.text
lea rsi, [rel ADD_RSP]
@ -622,7 +651,7 @@ codegen_function:
call vec_extend
lea rdi, [rsp + 24] ; &function_ctx
mov rdi, [rdi + 0] ; current_stack_size
mov edi, dword [rdi + 4] ; max_stack_size
lea rsi, [rsp + 104] ; scratch
mov rdx, 16 ; buffer length
mov rcx, 10 ; radix
@ -658,7 +687,7 @@ codegen_function:
mov rdx, DOT_PROLOGUE_LEN
call vec_extend
; "sub rsp, {current_stack_size}\n"
; "sub rsp, {max_stack_size}\n"
mov rdi, [rsp] ; ctx
lea rdi, [rdi + 8] ; &ctx.text
lea rsi, [rel SUB_RSP]
@ -666,7 +695,7 @@ codegen_function:
call vec_extend
lea rdi, [rsp + 24] ; &function_ctx
mov rdi, [rdi + 0] ; current_stack_size
mov edi, dword [rdi + 4] ; max_stack_size
lea rsi, [rsp + 104] ; scratch
mov rdx, 16 ; buffer length
mov rcx, 10 ; radix

View file

@ -20,6 +20,7 @@ unsafe extern "C" {
pub unsafe fn get_register_name(reg_idx: u8, width: u8, buffer: *mut u8) -> FFISlice;
pub unsafe fn stackvar_cmp(a: *const (u64, u64), b: *const (u64, u64)) -> i32;
pub unsafe fn codegen_allocate_register(ctx: *mut FunctionCtx) -> u8;
pub unsafe fn codegen_allocate_place(ctx: *mut FunctionCtx, width: u16) -> Operand;
pub unsafe fn codegen_allocate_value(ctx: *mut FunctionCtx, width: u16) -> Operand;
pub unsafe fn codegen_free_operand(ctx: *mut FunctionCtx, operand: *const Operand) -> ();
pub unsafe fn codegen_function(ast: *const CodegenCtx, func_idx: u64) -> ();
@ -216,7 +217,8 @@ pub struct CodegenCtx {
#[repr(C)]
#[derive(Debug)]
pub struct FunctionCtx {
pub current_stack_size: u64,
pub current_stack_size: u32,
pub max_stack_size: u32,
pub stack_vars: Vec<(u64, u64)>,
pub register_bitset: u16,
pub dirtied_register_bitset: u16,