from-scratch/lang/src/codegen.asm
2025-11-03 16:35:46 +01:00

2247 lines
55 KiB
NASM

default rel
%include "src/ast.inc"
%include "src/tokeniser.inc"
extern panic
extern vec_extend
extern vec_get
extern vec_push
extern vec_insert_sorted
extern vec_binary_search_by
extern vec_insert_many
extern vec_init_with
extern int_to_str2
extern strlen
global codegen_function
global get_register_name
section .rdata
SECTION_TEXT db "section .text", 10
SECTION_TEXT_LEN equ $ - SECTION_TEXT
GLOBAL_ db "global "
GLOBAL_LEN equ $ - GLOBAL_
COLON_NL db ":", 10
COLON_NL_LEN equ $ - COLON_NL
PROLOGUE db "push rbp", 10, "mov rbp, rsp", 10
PROLOGUE_LEN equ $ - PROLOGUE
EPILOGUE db "pop rbp", 10, "ret", 10
EPILOGUE_LEN equ $ - EPILOGUE
MOV_RAX db "mov rax, "
MOV_RAX_LEN equ $ - MOV_RAX
JMP_EPILOGUE db "jmp .epilogue", 10
JMP_EPILOGUE_LEN equ $ - JMP_EPILOGUE
DOT_ARGS db ".args:", 10
DOT_ARGS_LEN equ $ - DOT_ARGS
DOT_BODY db ".body:", 10
DOT_BODY_LEN equ $ - DOT_BODY
DOT_PROLOGUE db ".prologue:", 10
DOT_PROLOGUE_LEN equ $ - DOT_PROLOGUE
DOT_EPILOGUE db ".epilogue:", 10
DOT_EPILOGUE_LEN equ $ - DOT_EPILOGUE
JMP_ARGS db "jmp .args", 10
JMP_ARGS_LEN equ $ - JMP_ARGS
JMP_BODY db "jmp .body", 10
JMP_BODY_LEN equ $ - JMP_BODY
JMP_PROLOGUE db "jmp .prologue", 10
JMP_PROLOGUE_LEN equ $ - JMP_PROLOGUE
SUB_RSP db "sub rsp, "
SUB_RSP_LEN equ $ - SUB_RSP
ADD_RSP db "add rsp, "
ADD_RSP_LEN equ $ - ADD_RSP
BODY db ".body:", 10
BODY_LEN equ $ - BODY
REGISTER_NAMES db "abcdsidibpspr8r9r10r11r12r13r14r15"
section .text
;; ```rust
;; use super::FFISlice;
;; ```
;; rdi: register index
;; rsi: register width (1=byte,2=word,4=dword,8=qword)
;; rdx: *mut u8 (buffer, at least 4 bytes)
;; define-fn: fn get_register_name(reg_idx: u8, width: u8, buffer: *mut u8) -> FFISlice
get_register_name:
push rbp
mov rbp, rsp
push rdx
push rdi
cmp rdi, 8
jge .skip_prefix
mov rcx, 0
cmp rsi, 8
mov rax, 'r'
cmove rcx, rax
cmp rsi, 4
mov rax, 'e'
cmove rcx, rax
cmp cl, 0
je .skip_prefix
mov byte [rdx], cl
inc rdx
.skip_prefix:
push rsi
call write_register_name
pop rsi
mov rdi, [rsp]
cmp rdi, 4
jge .check81
cmp rsi, 2
jl .check81
mov byte [rdx], 'x'
inc rdx
jmp .done
.check81:
cmp rdi, 8
jge .ext_suffix
cmp rsi, 1
jne .done
mov byte [rdx], 'l'
inc rdx
jmp .done
.ext_suffix:
mov rcx, 0
cmp rsi, 4
mov rax, 'd'
cmove rcx, rax
cmp rsi, 2
mov rax, 'w'
cmove rcx, rax
cmp rsi, 1
mov rax, 'b'
cmove rcx, rax
cmp rcx, 0
je .done
mov byte [rdx], cl
inc rdx
.done:
pop rdi
pop rax
xchg rax, rdx
sub rax, rdx
xchg rax, rdx
pop rbp
ret
.panic:
call panic
;; rdi: register index
;; rdx: *mut u8 (buffer, at least 2 bytes)
write_register_name:
cmp rdi, 4
jl .abcd
cmp rdi, 10
jl .two_digit
; 10,11,12,13,14,15
lea rsi, [rel REGISTER_NAMES + 16]
sub rdi, 10
lea rsi, [rsi + rdi * 2]
add rsi, rdi
mov al, [rsi + 0]
mov [rdx], al
inc rdx
mov al, [rsi + 1]
mov [rdx], al
inc rdx
mov al, [rsi + 2]
mov [rdx], al
inc rdx
jmp .done
.two_digit:
lea rsi, [rel REGISTER_NAMES + 4]
sub rdi, 4
lea rsi, [rsi + rdi * 2]
mov al, [rsi + 0]
mov [rdx], al
inc rdx
mov al, [rsi + 1]
mov [rdx], al
inc rdx
jmp .done
.abcd:
lea rsi, [rel REGISTER_NAMES + 0]
lea rsi, [rsi + rdi * 1]
mov al, [rsi + 0]
mov [rdx], al
inc rdx
.done:
ret
;; rdi: arg index
;; Returns the input `Operand` for the given argument index:
;; On the SysV ABI, the first 6 integer/pointer args are passed in registers:
;; rdi, rsi, rdx, rcx, r8, r9
codegen_arg_to_operand:
push rbp
mov rbp, rsp
cmp rdi, 6
jge .stack_arg
; register args in sysV make this strange movement through the canonical register indices:
; 3 2 1 0 4 5
; [a,b,c,d,si,di,bp,sp,8,9,10,11,12,13,14,15]
; at least there's 6 register args and not 4 line on win64..
cmp rdi, 2
jge .rdx
mov rax, 5
sub rax, rdi
jmp .reg_arg
.rdx:
cmp rdi, 4
jge .r8
mov rax, 5
sub rax, rdi
jmp .reg_arg
.r8:
add rdi, 4 ; offset to r8
mov rax, rdi
.reg_arg:
mov rdi, rax
xor rax, rax
mov eax, 0 ; Operand.len = 0
shl eax, 16
or eax, 8 ; Operand.width = 8
shl eax, 8
or eax, edi ; Operand.register
shl eax, 8
or eax, OPERAND_REGISTER ; Operand.kind
mov rdx, 0 ; Operand.value = 0
jmp .epilogue
.stack_arg:
; TODO: use these offsets in stack-vars instead of copying into new stack slots
sub rdi, 6
mov rax, 8
mul rdi
add rax, 16 ; return address + old rbp
; construct Operand
mov rdx, rax ; Operand.value = offset
xor rax, rax
mov eax, 0 ; Operand.len = 0
shl eax, 16
or eax, 8 ; Operand.width = 8
shl eax, 8
; or eax, 0 ; Operand.register = undef
shl eax, 8
or eax, OPERAND_RBP_VALUE ; Operand.kind
.epilogue:
pop rbp
ret
;; rdi: ctx
;; rsi: a: *const (index, offset)
;; rdx: b: *const (index, offset)
;; define-fn: fn stackvar_cmp(a: *const (u64, u64), b: *const (u64, u64)) -> i32
stackvar_cmp:
push rbp
mov rbp, rsp
mov rax, [rsi + 0] ; a.index
mov rcx, [rdx + 0] ; b.index
cmp rax, 0
jl .less
jg .greater
xor rax, rax
jmp .done
.less:
mov rax, -1
jmp .done
.greater:
mov rax, 1
.done:
pop rbp
ret
;; start-structs
;; struct CodegenCtx {
;; ast: *const Ast,
;; text: Vec<u8>,
;; }
;; struct FunctionCtx {
;; current_stack_size: u32,
;; max_stack_size: u32,
;; stack_vars: Vec<(u64, u64)>,
;; register_bitset: u16,
;; dirtied_register_bitset: u16,
;; }
;; end-structs
;; rdi: *FunctionCtx
;; define-fn: fn codegen_allocate_register(ctx: *mut FunctionCtx) -> u8
codegen_allocate_register:
push rbp
mov rbp, rsp
push rbx
; scan register_bitset for free register
; example: (rax, rbp and rsp are always reserved)
; register_bitset = 0b10000011_00000000
; to do this, we first invert the bitset
; register_bitset = 0b01111100_11111111
; then we find the first set bit
; first_bit = 2
; we now set the bit in the original bitset and mark the register as dirtied
; dirtied registers are those that have been used in the function and need
; to be saved/restored in the prologue/epilogue
mov ax, word [rdi + 48] ; register_bitset
xor rcx, rcx
; flip bits
not ax
test ax, ax
jz .no_regs
; find first set bit
bsf cx, ax
.found:
mov rbx, rcx
mov ax, word [rdi + 48] ; update register_bitset
bts ax, cx
mov word [rdi + 48], ax ; update register_bitset
mov ax, word [rdi + 52] ; dirtied_register_bitset
bts ax, cx
mov word [rdi + 52], ax ; update dirtied_register_bitset
jmp .done
.no_regs:
; return -1u64 to indicate no free registers
; the caller should panic or allocate a stack slot instead
mov rbx, -1
.done:
mov rax, rbx
pop rbx
pop rbp
ret
;; rdi: *FunctionCtx
;; rsi: width
;; define-fn: fn codegen_allocate_place(ctx: *mut FunctionCtx, width: u16) -> Operand
codegen_allocate_place:
push rbp
mov rbp, rsp
xor rdx, rdx
mov edx, dword [rdi + 0] ; current_stack_size
add edx, esi ; width
mov dword [rdi + 0], edx ; current_stack_size += width
mov eax, dword [rdi + 4] ; max_stack_size
cmp eax, edx
cmovb eax, edx
mov dword [rdi + 4], eax ; max_stack_size = max(max_stack_size, current_stack_size)
; construct Operand
xor rax, rax
mov eax, 0 ; Operand.len = 0
shl eax, 16
or eax, esi ; Operand.width
shl eax, 8
; or eax, 0 ; Operand.register = undef
shl eax, 8
or eax, OPERAND_RBP_OFFSET ; Operand.kind
neg rdx ; Operand.value = -current_stack_size
.done:
pop rbp
ret
;; rdi: *FunctionCtx
;; rsi: width
;; define-fn: fn codegen_allocate_stack_value(ctx: *mut FunctionCtx, width: u16) -> Operand
codegen_allocate_stack_value:
push rbp
mov rbp, rsp
call codegen_allocate_place
mov rsi, 0xF
not rsi
and rax, rsi ; clear kind to make it a value
and rax, OPERAND_RBP_VALUE ; Operand.kind = OPERAND_RBP_VALUE
pop rbp
ret
;; rdi: *FunctionCtx
;; rsi: width
;; define-fn: fn codegen_allocate_value(ctx: *mut FunctionCtx, width: u16) -> Operand
codegen_allocate_value:
push rbp
mov rbp, rsp
push rbx
; width [8..16]
; *FunctionCtx [0..8]
sub rsp, 16
mov [rsp], rdi ; ctx
mov [rsp + 8], rsi ; width
cmp rsi, 8
jg .alloc_stack
.alloc_reg:
call codegen_allocate_register
cmp rax, -1
je .alloc_stack
mov rbx, rax
; construct Operand
xor rax, rax
mov eax, 0 ; Operand.len = 0
shl eax, 16
mov rsi, [rsp + 8] ; width
or eax, esi ; Operand.width
shl eax, 8
or eax, ebx ; Operand.register
shl eax, 8
or eax, OPERAND_REGISTER ; Operand.kind
mov rdx, 0 ; Operand.value = 0
jmp .done
.alloc_stack:
call codegen_allocate_stack_value
.done:
add rsp, 16
pop rbx
pop rbp
ret
;; rdi: *FunctionCtx
;; rsi: *Operand
;; define-fn: fn codegen_free_operand(ctx: *mut FunctionCtx, operand: *const Operand) -> ()
codegen_free_operand:
push rbp
mov rbp, rsp
push rbx
sub rsp, 16
mov [rsp], rdi ; ctx
mov [rsp + 8], rsi ; operand
mov al, byte [rsi] ; Operand.kind
cmp al, OPERAND_REGISTER
je .free_reg
cmp al, OPERAND_REGISTER_PLACE
je .free_reg
cmp al, OPERAND_RBP_VALUE
je .free_stack
cmp al, OPERAND_RBP_PLACE
je .free_stack
jmp .done
.free_stack:
xor rbx, rbx
mov ebx, dword [rdi + 0] ; current_stack_size
mov rax, [rsi + 8] ; Operand.value
cmp rbx, rax
jne .done ; operand not at top of stack, can't free
movzx rax, word [rsi + 2] ; Operand.width
sub rbx, rax
mov dword [rdi + 0], ebx ; current_stack_size -= width
jmp .done
.free_reg:
movzx rax, byte [rsi + 1] ; Operand.register
mov bx, word [rdi + 48] ; register_bitset
btr bx, ax
jnc .panic ; trying to free unallocated register
mov word [rdi + 48], bx ; update register_bitset
.done:
add rsp, 16
pop rbx
pop rbp
ret
.panic:
call panic
;; rdi: *Ctx
;; rsi: function index
;; define-fn: fn codegen_function(ast: *const CodegenCtx, func_idx: u64) -> ()
codegen_function:
push rbp
mov rbp, rsp
push rbx
push r15
push r14
push r13
; scratch [104..120]
; scratch2 [80..104]
; dirtied-register-bitset [76..80] [a,b,c,d,si,di,bp,sp,8,9,10,11,12,13,14,15]
; register-bitset [72..76] [a,b,c,d,si,di,bp,sp,8,9,10,11,12,13,14,15]
; stack-vars: Vec<(index, offset)> [32..72]
; max_stack_size: [28..32]
; current_stack_size: [24..28]
; func_idx [16..24]
; ast [8..16]
; ctx [0..8]
sub rsp, 120
mov [rsp], rdi ; ctx
mov rax, [rdi]
mov [rsp + 8], rax ; ast
mov [rsp + 16], rsi ; func_idx
mov qword [rsp + 24], 0 ; current_stack_size = 0, max_stack_size = 0
lea rdi, [rsp + 32] ; stack-vars
mov rsi, 16 ; size_of::<(u64, u64)>
mov rdx, 0 ; drop = None
mov rcx, 16 ; initial capacity
call vec_init_with
bts word [rsp + 72], 7 ; mark rsp as used
bts word [rsp + 72], 6 ; mark rbp as used
bts word [rsp + 72], 0 ; mark rax as used
mov word [rsp + 76], 0 ; dirtied_register_bitset = 0
; push "section .text\n"
mov rdi, [rsp] ; ctx
lea rdi, [rdi + 8] ; &ctx.text
lea rsi, [rel SECTION_TEXT]
mov rdx, SECTION_TEXT_LEN
call vec_extend
mov rdi, [rsp + 8] ; ast
mov rsi, [rsp + 16] ; func_idx
call vec_get
cmp byte [rax + 0], AST_FUNCTION ; AstNode.kind
mov rbx, [rax + 8] ; AstNode.data
jne .panic
; push "global {function_name}\n"
mov rdi, [rsp] ; ctx
lea rdi, [rdi + 8] ; &ctx.text
lea rsi, [rel GLOBAL_]
mov rdx, GLOBAL_LEN
call vec_extend
; get function name
mov rdi, [rsp] ; ctx
lea rdi, [rdi + 8] ; &ctx.text
mov rsi, [rbx + 0] ; AstFunction.name
mov rdx, [rbx + 8] ; AstFunction.name_len
call vec_extend
; push "\n"
mov rdi, [rsp] ; ctx
lea rdi, [rdi + 8] ; &ctx.text
lea rsi, [rel COLON_NL]
inc rsi
mov rdx, 1
call vec_extend
; push "{function_name}:\n"
mov rdi, [rsp] ; ctx
lea rdi, [rdi + 8] ; &ctx.text
mov rsi, [rbx + 0] ; AstFunction.name
mov rdx, [rbx + 8] ; AstFunction.name_len
call vec_extend
; push ":\n"
mov rdi, [rsp] ; ctx
lea rdi, [rdi + 8] ; &ctx.text
lea rsi, [rel COLON_NL]
mov rdx, 2
call vec_extend
; push prologue
mov rdi, [rsp] ; ctx
lea rdi, [rdi + 8] ; &ctx.text
lea rsi, [rel PROLOGUE]
mov rdx, PROLOGUE_LEN
call vec_extend
; allocate args on stack
; rbx = *AstFunction
; "jmp .prologue\n"
mov rdi, [rsp] ; ctx
lea rdi, [rdi + 8] ; &ctx.text
lea rsi, [rel JMP_PROLOGUE]
mov rdx, JMP_PROLOGUE_LEN
call vec_extend
; ".args:\n"
mov rdi, [rsp] ; ctx
lea rdi, [rdi + 8] ; &ctx.text
lea rsi, [rel DOT_ARGS]
mov rdx, DOT_ARGS_LEN
call vec_extend
mov r15, [rbx + 24] ; AstFunction.args_len
xor r14, r14 ; arg index
.arg_loop:
cmp r14, r15
jge .arg_loop_done
mov rax, [rbx + 16] ; AstFunction.args
lea rsi, [rax + r14 * 8] ;
mov rsi, [rsi] ; AstFunction.args[i]
mov [rsp + 104], rsi ; &(index, _)
lea rdi, [rsp + 24] ; &function_ctx
; TODO: get arg type size
mov rsi, 8 ; size_of::<u64>
call codegen_allocate_place
; rdx = stack offset
mov [rsp + 80], rax ; Operand
mov [rsp + 88], rdx ; Operand.value
neg rdx
mov [rsp + 112], rdx ; &(_, offset)
lea rdi, [rsp + 32] ; stack-vars
lea rsi, [rsp + 104] ; &scratch: &(index, offset)
mov rdx, stackvar_cmp
mov rcx, 0
call vec_insert_sorted
; spill arg from register to newly allocated stack slot
; get source Operand
mov rdi, r14 ; arg index
call codegen_arg_to_operand
mov [rsp + 104], rax
mov [rsp + 112], rdx
mov rdi, [rsp] ; ctx
lea rdi, [rdi + 8] ; &ctx.text
lea rsi, [rsp + 80] ; dst
lea rdx, [rsp + 104] ; src
call codegen_move_dst_src
inc r14
jmp .arg_loop
.arg_loop_done:
; "jmp .body\n"
; ".body:\n"
mov rdi, [rsp] ; ctx
lea rdi, [rdi + 8] ; &ctx.text
lea rsi, [rel BODY]
mov rdx, BODY_LEN
call vec_extend
mov rdi, [rsp] ; ctx
lea rsi, [rsp + 24] ; &function_ctx
mov rdx, [rbx + 48] ; AstFunction.body
call codegen_expr
; TODO: generate function body
; ".epilogue:\n"
mov rdi, [rsp] ; ctx
lea rdi, [rdi + 8] ; &ctx.text
lea rsi, [rel DOT_EPILOGUE]
mov rdx, DOT_EPILOGUE_LEN
call vec_extend
; "pop {dirtied registers}\n"
mov rdi, [rsp] ; ctx
lea rdi, [rdi + 8] ; &ctx.text
lea rsi, [rsp + 24] ; &function_ctx
mov rdx, 1 ; pop = true
call codegen_push_pop_dirtied_registers
; "add rsp, {max_stack_size}\n"
mov rdi, [rsp] ; ctx
lea rdi, [rdi + 8] ; &ctx.text
lea rsi, [rel ADD_RSP]
mov rdx, ADD_RSP_LEN
call vec_extend
lea rdi, [rsp + 24] ; &function_ctx
mov edi, dword [rdi + 4] ; max_stack_size
lea rsi, [rsp + 104] ; scratch
mov rdx, 16 ; buffer length
mov rcx, 10 ; radix
call int_to_str2
mov rdi, [rsp] ; ctx
lea rdi, [rdi + 8] ; &ctx.text
lea rsi, [rax + rdx]
mov byte [rsi], 10 ; add newline
mov rsi, rax
mov rdx, rdx ; length from int_to_str2
call vec_extend
; push "\n"
mov rdi, [rsp] ; ctx
lea rdi, [rdi + 8] ; &ctx.text
lea rsi, [rel COLON_NL]
inc rsi
call vec_push
; "pop rbp\n"
; "ret\n"
mov rdi, [rsp] ; ctx
lea rdi, [rdi + 8] ; &ctx.text
lea rsi, [rel EPILOGUE]
mov rdx, EPILOGUE_LEN
call vec_extend
; ".prologue:\n"
mov rdi, [rsp] ; ctx
lea rdi, [rdi + 8] ; &ctx.text
lea rsi, [rel DOT_PROLOGUE]
mov rdx, DOT_PROLOGUE_LEN
call vec_extend
; "sub rsp, {max_stack_size}\n"
mov rdi, [rsp] ; ctx
lea rdi, [rdi + 8] ; &ctx.text
lea rsi, [rel SUB_RSP]
mov rdx, SUB_RSP_LEN
call vec_extend
lea rdi, [rsp + 24] ; &function_ctx
mov edi, dword [rdi + 4] ; max_stack_size
lea rsi, [rsp + 104] ; scratch
mov rdx, 16 ; buffer length
mov rcx, 10 ; radix
call int_to_str2
mov rdi, [rsp] ; ctx
lea rdi, [rdi + 8] ; &ctx.text
lea rsi, [rax + rdx]
mov byte [rsi], 10 ; add newline
mov rsi, rax
mov rdx, rdx ; length from int_to_str2
call vec_extend
; push "\n"
mov rdi, [rsp] ; ctx
lea rdi, [rdi + 8] ; &ctx.text
lea rsi, [rel COLON_NL]
inc rsi
call vec_push
; "push{dirtied registers}\n"
mov rdi, [rsp] ; ctx
lea rdi, [rdi + 8] ; &ctx.text
lea rsi, [rsp + 24] ; &function_ctx
mov rdx, 0 ; pop = false
call codegen_push_pop_dirtied_registers
; "jmp .args\n"
mov rdi, [rsp] ; ctx
lea rdi, [rdi + 8] ; &ctx.text
lea rsi, [rel JMP_ARGS]
mov rdx, JMP_ARGS_LEN
call vec_extend
.epilogue:
add rsp, 120
pop r13
pop r14
pop r15
pop rbx
pop rbp
ret
.panic:
call panic
;; rdi: *text
;; rsi: &function_ctx
;; rdx: pop: bool
codegen_push_pop_dirtied_registers:
push rbp
mov rbp, rsp
push rbx
push r15
push r14
sub rsp, 32
mov [rsp], rdi
mov [rsp + 8], rsi
mov byte [rsp + 29 + 3], 10 ; newline
mov qword [rsp + 16], -1
mov rax, 'push '
mov qword [rsp + 24], rax
test rdx, rdx
jz .skip_setup_pop
mov rax, 'pop '
mov qword [rsp + 24], rax
mov rdi, [rsp] ; text
mov rax, [rdi + 8] ; text.len()
mov [rsp + 16], rax
.skip_setup_pop:
; abcdsdpp_89abcdef
; 01000000_00001111
; preserved registers:
mov bx, word [rsi + 48 + 4] ; dirtied_register_bitset
mov ax, 0b11110000_00000010
and bx, ax
test bx, bx
jz .done
mov r15, 16
xor r14, r14
.reg_loop:
cmp r14, r15
jge .done
bt bx, r14w
jnc .next_reg
mov rdi, r14
mov rsi, 8
lea rdx, [rsp + 29]
call get_register_name
mov rax, -1
cmp [rsp + 16], rax
jne .reg_pop
mov rdi, [rsp] ; text
lea rsi, [rsp + 24]
mov rdx, 9
call vec_extend
jmp .next_reg
.reg_pop:
mov rdi, [rsp] ; text
mov rsi, [rsp + 16] ; text.len()
lea rdx, [rsp + 24]
mov rcx, 9
call vec_insert_many
nop
.next_reg:
inc r14
jmp .reg_loop
.done:
add rsp, 32
pop r14
pop r15
pop rbx
pop rbp
ret
;; rdi: *text
;; rsi: &function_ctx
;; rdx: pop: bool
;; Returns the number of registers pushed/popped in rax
;; define-fn: fn codegen_push_pop_used_registers(text: *mut Vec<u8>, function_ctx: &FunctionCtx, pop: bool) -> u8
codegen_push_pop_used_registers:
push rbp
mov rbp, rsp
push rbx
push r15
push r14
push r13
sub rsp, 32
mov [rsp], rdi
mov [rsp + 8], rsi
mov byte [rsp + 29 + 3], 10 ; newline
mov qword [rsp + 16], -1
mov rax, 'push '
mov qword [rsp + 24], rax
test rdx, rdx
jz .skip_setup_pop
mov rax, 'pop '
mov qword [rsp + 24], rax
mov rdi, [rsp] ; text
mov rax, [rdi + 8] ; text.len()
mov [rsp + 16], rax
.skip_setup_pop:
; volatile registers:
; abcdsdpp_89abcdef
; 00111100_11110000b0
; additionally, rax is never preserved by this compiler
mov bx, word [rsi + 48 + 4] ; dirtied_register_bitset
mov ax, 0b00001111_00111100
and bx, ax
test bx, bx
jz .done
mov r15, 16
xor r14, r14
xor r13, r13 ; num_regs pushed/popped
.reg_loop:
cmp r14, r15
jge .done
bt bx, r14w
jnc .next_reg
inc r13 ; num_regs += 1
mov rdi, r14
mov rsi, 8
lea rdx, [rsp + 29]
call get_register_name
mov rax, -1
cmp [rsp + 16], rax
jne .reg_pop
mov rdi, [rsp] ; text
lea rsi, [rsp + 24]
mov rdx, 9
call vec_extend
jmp .next_reg
.reg_pop:
mov rdi, [rsp] ; text
mov rsi, [rsp + 16] ; text.len()
lea rdx, [rsp + 24]
mov rcx, 9
call vec_insert_many
nop
.next_reg:
inc r14
jmp .reg_loop
.done:
add rsp, 32
mov rax, r13
pop r13
pop r14
pop r15
pop rbx
pop rbp
ret
;; rdi: ctx
;; rsi: &function_ctx
;; rdx: block index
codegen_block:
push rbp
mov rbp, rsp
push r15
push r14
push rbx
sub rsp, 16
mov [rsp], rdi ; ctx
mov [rsp + 8], rsi ; &function_ctx
mov rdi, [rdi] ; ast
mov rsi, rdx ; block index
call vec_get
mov r15, [rax + 8] ; AstNode.extra
mov rbx, [rax + 0] ; AstNode.data
xor r14, r14 ; statement index
.stmt_loop:
cmp r14, r15
jge .stmt_loop_done
mov rdi, [rsp] ; ctx
lea rbx, [rsp + 8] ; &function_ctx
mov rdx, [rbx + r14 * 8] ; statements[i]
call codegen_expr
inc r14
jmp .stmt_loop
.stmt_loop_done:
add rsp, 16
pop rbx
pop r14
pop r15
pop rbp
ret
;; rdi: ctx
;; rsi: &function_ctx
;; rdx: expr index
;; returns: register index in rax if rdx=0, else stack-offset in rax
;; define-fn: fn codegen_expr(ctx: *const CodegenCtx, function_ctx: &FunctionCtx, expr_idx: u64) -> (u64, bool)
codegen_expr:
push rbp
mov rbp, rsp
push rbx
push r15
push r14
; scratch [16..80]
; function_ctx: [8..16]
; ctx [0..8]
sub rsp, 80
mov [rsp], rdi ; ctx
mov [rsp + 8], rsi ; &function_ctx
mov [rsp + 16], rdx ; expr index
mov rdi, [rdi] ; ast
mov rsi, rdx ; statement index
call vec_get
; rax: *AstNode
mov rbx, [rax] ; AstNode.kind
cmp bl, AST_RETURN_STATEMENT
je .return
cmp bl, AST_BLOCK
je .block
cmp bl, AST_VAR_DECL
je .var_decl
cmp bl, AST_VAR_REF
je .var_ref
cmp bl, AST_NUMBER
je .number
cmp bl, AST_BINARY_OP
je .binary_op
cmp bl, AST_ASSIGNMENT
je .assignment
cmp bl, AST_PLACE_TO_VALUE
je .place_to_value
cmp bl, AST_VALUE_TO_PLACE
je .value_to_place
cmp bl, AST_DEREF
je .deref
cmp bl, AST_ADDRESS_OF
je .address_of
jmp .panic
.block:
mov rbx, [rax + 8] ; AstNode.data
mov r15, [rax + 16] ; AstNode.extra
xor r14, r14 ; statement index
.stmt_loop:
cmp r14, r15
jge .stmt_loop_done
mov rdi, [rsp] ; ctx
mov rsi, [rsp + 8] ; &function_ctx
mov rdx, [rbx + r14 * 8] ; statements[i]
call codegen_expr
inc r14
jmp .stmt_loop
.stmt_loop_done:
xor rax, rax
xor rdx, rdx
jmp .done
.return:
; codegen inner expr
mov rdi, [rsp] ; ctx
mov rsi, [rsp + 8] ; &function_ctx
mov rdx, [rax + 8] ; AstNode.data
call codegen_expr
; mov rax, {inner expr result}
mov [rsp + 16], rax
mov [rsp + 24], rdx
mov rdi, [rsp] ; ctx
lea rdi, [rdi + 8] ; &ctx.text
lea rsi, [rel OPERAND_RAX] ; dst
lea rdx, [rsp + 16] ; src
call codegen_move_dst_src
mov rdi, [rsp + 8] ; &function_ctx
lea rsi, [rsp + 16] ; src
call codegen_free_operand
; push "jmp .epilogue\n"
mov rdi, [rsp] ; ctx
lea rdi, [rdi + 8] ; &ctx.text
lea rsi, [rel JMP_EPILOGUE]
mov rdx, JMP_EPILOGUE_LEN
call vec_extend
; construct return operand
mov rax, [rel OPERAND_RAX]
mov rdx, 0 ; value = 0
jmp .done
.number:
; rax = *AstNode
mov rbx, [rax + 8] ; AstNode.data
mov [rsp + 40], rbx ; Imm value
mov rdi, [rsp + 8] ; &function_ctx
mov rsi, 8 ; width
call codegen_allocate_value
mov [rsp + 16], rax
mov [rsp + 24], rdx
mov rdx, [rsp + 40] ; Imm value
test rdx, rdx
jz .load_imm ; skip if zero
; fill bits
; !(!1u64 << imm.ilog2()) >> 1;
lzcnt rax, rdx
not al
mov rcx, -2
shlx rax, rcx, rax
not rax
shr rax, 1
; this gives a mask of the imm bits extended to the power of two bytes
movsx ecx, al ; sign-extend byte to word
movzx ecx, cx ; zero-extend to dword
movsx edx, ax ; sign-extend word to dword
or edx, ecx
mov eax, eax ; sign-extend dword to qword
or rdx, rax
je .load_imm ; skip if zero
; count bytes needed
; (z.ilog2() + 1) / 3
lzcnt rcx, rdx
mov edx, 64
sub edx, ecx
shr edx, 3
.load_imm:
test rdx, rdx
setz dl
mov rdi, [rsp] ; ctx
lea rdi, [rdi + 8] ; &ctx.text
lea rsi, [rsp + 16] ; dst
mov rcx, rdx ; byte count
mov rdx, [rsp + 40] ; immediate value
call codegen_load_imm
mov rax, qword [rsp + 16]
mov rdx, qword [rsp + 24]
jmp .done
.binary_op:
mov rax, [rax + 8] ; AstNode.data
mov [rsp + 16], rax ; scratch = *AstBinaryOp
mov rdi, [rsp] ; ctx
mov rsi, [rsp + 8] ; &function_ctx
mov rdx, [rax + 0] ; left operand index
call codegen_expr
mov [rsp + 32], rax ; left operand
mov [rsp + 40], rdx
mov rdi, [rsp] ; ctx
mov rsi, [rsp + 8] ; &function_ctx
mov rdx, [rsp + 16] ; *AstBinaryOp
mov rdx, [rdx + 16] ; right operand index
call codegen_expr
mov [rsp + 48], rax ; right operand
mov [rsp + 56], rdx
mov rax, [rsp + 16] ; *AstBinaryOp
mov al, byte [rax + 8] ; operator
mov rbx, -1
cmp al, TOKEN_PLUS
cmove rbx, [rel ADD_]
cmp al, TOKEN_MINUS
cmove rbx, [rel SUB_]
cmp rbx, -1
jne .gen_op
cmp al, TOKEN_STAR
cmove rbx, [rel MUL_]
cmp al, TOKEN_SLASH
cmove rbx, [rel DIV_]
cmp al, TOKEN_PERCENT
cmove rbx, [rel DIV_]
cmp rbx, -1
je .panic ; unknown operator
.mul_div:
; mul/div need to clobber rax:rdx
; TODO only check for div
mov rax, [rsp + 8] ; &function_ctx
mov ax, word [rax + 48] ; register_bitset
bt ax, 3 ; is rdx used?
jnc .after_spill_rdx
; allocate scratch value for rdx
mov rdi, [rsp + 8] ; &function_ctx
mov rsi, 8 ; width
call codegen_allocate_value
mov [rsp + 64], rax
mov [rsp + 72], rdx
; mov scratch, rdx
mov rdi, [rsp] ; ctx
lea rdi, [rdi + 8] ; &ctx.text
lea rsi, [rsp + 64] ; scratch value
lea rdx, [rel OPERAND_RDX] ; rax
call codegen_move_dst_src
; check if rhs is rdx
mov rax, [rsp + 48] ; right operand
and rax, 0xFFFF
mov rdx, [rel OPERAND_RDX]
and rdx, 0xFFFF
cmp rax, rdx
jne .after_spill_rdx
; free rhs
mov rdi, [rsp + 8] ; &function_ctx
lea rsi, [rsp + 48] ; right operand
call codegen_free_operand
; move scratch to rhs, but preserve width
movzx rdx, word [rsp + 48 + 2] ; rhs.width
mov word [rsp + 64 + 2], dx ; scratch.width
mov rax, [rsp + 64] ; scratch value
mov rdx, [rsp + 72]
mov [rsp + 48], rax ; right operand
mov [rsp + 56], rdx
.after_spill_rdx:
mov rax, [rsp + 16] ; *AstBinaryOp
mov al, byte [rax + 8] ; operator
cmp al, TOKEN_STAR
je .after_clear_rdx
; clear rdx for div
; xor rdx, rdx
mov rdi, [rsp] ; ctx
lea rdi, [rdi + 8] ; &ctx.text
lea rsi, [rel XOR_RDX_RDX] ; rdx
mov rdx, XOR_RDX_RDX_LEN
call vec_extend
.after_clear_rdx:
; mov rax, lhs
mov rdi, [rsp] ; ctx
lea rdi, [rdi + 8] ; &ctx.text
lea rsi, [rel OPERAND_RAX] ; rax
lea rdx, [rsp + 32] ; left operand
call codegen_move_dst_src
; op rhs
mov rdi, [rsp] ; ctx
lea rdi, [rdi + 8] ; &ctx.text
push rbx
lea rsi, [rsp] ; op
mov rdx, 4
call vec_extend
pop rbx
mov rdi, [rsp] ; ctx
lea rdi, [rdi + 8] ; &ctx.text
lea rsi, [rsp + 48] ; left operand
call codegen_write_operand
mov rdi, [rsp] ; ctx
lea rdi, [rdi + 8] ; &ctx.text
mov rsi, 10
push rsi
lea rsi, [rsp] ; newline
call vec_push
pop rsi
mov rax, [rsp + 16] ; *AstBinaryOp
mov al, byte [rax + 8] ; operator
cmp al, TOKEN_PERCENT
jne .after_rem
; mov rax, rdx // only for rem
mov rdi, [rsp] ; ctx
lea rdi, [rdi + 8] ; &ctx.text
lea rsi, [rel OPERAND_RAX] ; rax
lea rdx, [rel OPERAND_RDX] ; rdx
call codegen_move_dst_src
.after_rem:
mov rax, [rsp + 8] ; &function_ctx
mov ax, word [rax + 48] ; register_bitset
bt ax, 3 ; is rdx used?
jnc .after_unspill_rdx
; mov rdx, scratch
mov rdi, [rsp] ; ctx
lea rdi, [rdi + 8] ; &ctx.text
lea rsi, [rel OPERAND_RDX] ; rdx
lea rdx, [rsp + 64] ; scratch value
call codegen_move_dst_src
; free scratch
mov rdi, [rsp + 8] ; &function_ctx
lea rsi, [rsp + 64] ; scratch value
call codegen_free_operand
.after_unspill_rdx:
; free [rhs, lhs]
mov rdi, [rsp + 8] ; &function_ctx
lea rsi, [rsp + 48] ; right operand
call codegen_free_operand
mov rdi, [rsp + 8] ; &function_ctx
lea rsi, [rsp + 32] ; left operand
call codegen_free_operand
; alloca dst
mov rdi, [rsp + 8] ; &function_ctx
mov rsi, 8 ; width
call codegen_allocate_value
mov [rsp + 32], rax
mov [rsp + 40], rdx
; mov dst, rax
mov rdi, [rsp] ; ctx
lea rdi, [rdi + 8] ; &ctx.text
lea rsi, [rsp + 32] ; dst
lea rdx, [rel OPERAND_RAX] ; rax
call codegen_move_dst_src
; return dst
mov rax, [rsp + 32]
mov rdx, [rsp + 40]
jmp .done
.gen_op:
mov rdi, [rsp + 8] ; &function_ctx
mov rsi, [rsp] ; ctx
lea rsi, [rsi + 8] ; &ctx.text
lea rdx, [rsp + 32] ; left operand
lea rcx, [rsp + 48] ; right operand
mov r8, rbx ; operation
call codegen_binary_op_rm64_rm64
jmp .done
.var_decl:
; allocate place for variable
mov rdi, [rsp + 8] ; &function_ctx
mov rsi, 8 ; size_of::<u64>
call codegen_allocate_place
mov [rsp + 64], rax ; Operand
mov [rsp + 72], rdx ; Operand.value
neg rdx
mov [rsp + 24], rdx ; &(_, offset)
mov rdi, [rsp + 8] ; &function_ctx
lea rdi, [rsp + 8] ; stack_vars
lea rsi, [rsp + 16] ; &(index, offset)
mov rdx, stackvar_cmp
mov rcx, 0
call vec_insert_sorted
mov rax, [rsp + 64] ; Operand
mov rdx, [rsp + 72] ; Operand.value
jmp .done
.var_ref:
mov rax, [rax + 8] ; AstNode.data
mov rax, [rax] ; variable index
mov [rsp + 16], rax
mov qword [rsp + 24], 0
; lookup variable in stack_vars
mov rdi, [rsp + 8] ; &function_ctx
lea rdi, [rdi + 8] ; stack_vars
lea rsi, [rsp + 16] ; &(index, offset)
mov rdx, stackvar_cmp
mov rcx, 0
call vec_binary_search_by
cmp rdx, 1
je .panic ; variable not found
mov rdi, [rsp + 8] ; &function_ctx
lea rdi, [rdi + 8] ; stack_vars
mov rsi, rax ; index
call vec_get
mov rdx, [rax + 8] ; offset
neg rdx
mov rax, qword [rel OPERAND_RBP_OFFS]
jmp .done
.place_to_value:
; codegen inner expr
mov rdi, [rsp] ; ctx
mov rsi, [rsp + 8] ; &function_ctx
mov rdx, [rax + 8] ; AstNode.data
call codegen_expr
mov [rsp + 16], rax
mov [rsp + 24], rdx
cmp al, OPERAND_RBP_OFFSET
je .ptv_rbp_offset
cmp al, OPERAND_ADDRESS_PLACE
je .ptv_address_place
cmp al, OPERAND_REGISTER_PLACE
je .ptv_register_place
cmp al, OPERAND_RBP_PLACE
je .ptv_rbp_place
.ptv_rbp_offset:
mov byte [rsp + 16], OPERAND_RBP_PVALUE
jmp .ptv_done
.ptv_address_place:
mov byte [rsp + 16], OPERAND_ADDRESS_VALUE
jmp .ptv_done
.ptv_register_place:
mov rax, [rsp + 16]
mov rdx, [rsp + 24]
mov [rsp + 32], rax
mov [rsp + 40], rdx
; mov rdx, [rdx]
mov byte [rsp + 16], OPERAND_REGISTER
mov rdi, [rsp] ; ctx
lea rdi, [rdi + 8] ; &ctx.text
lea rsi, [rsp + 16] ; dst
lea rdx, [rsp + 32] ; src
mov rcx, 'mov '
call codegen_binary_op_unchecked
jmp .ptv_done
.ptv_rbp_place:
; mov rax, [src]
; mov rax, [rax]
; mov [src], rax
mov rdi, [rsp]
lea rdi, [rdi + 8] ; &ctx.text
lea rsi, [rel OPERAND_RAX] ; dst
lea rdx, [rsp + 16] ; src
call codegen_move_dst_src
mov rdi, [rsp] ; ctx
lea rdi, [rdi + 8] ; &ctx.text
lea rsi, [rel OPERAND_RAX] ; rax
lea rdx, [rel OPERAND_RAX_P] ; [rax]
mov byte [rsp + 16], OPERAND_RBP_VALUE
mov rdi, [rsp] ; ctx
lea rdi, [rdi + 8] ; &ctx.text
lea rsi, [rsp + 16] ; dst
lea rdx, [rel OPERAND_RAX_P] ; rax
jmp .ptv_done
.ptv_done:
mov rax, [rsp + 16]
mov rdx, [rsp + 24]
jmp .done
.value_to_place:
; codegen inner expr
mov rdi, [rsp] ; ctx
mov rsi, [rsp + 8] ; &function_ctx
mov rdx, [rax + 8] ; AstNode.data
call codegen_expr
mov [rsp + 16], rax
mov [rsp + 24], rdx
cmp al, OPERAND_RBP_PVALUE
je .vtp_rbp_pvalue
cmp al, OPERAND_ADDRESS_VALUE
je .vtp_address_value
cmp al, OPERAND_LAST_VALUE
jg .panic
; dst = allocate_place
mov rdi, [rsp + 8] ; &function_ctx
movzx rsi, word [rsp + 16 + 2] ; width
call codegen_allocate_place
mov [rsp + 32], rax
mov [rsp + 40], rdx
; mov dst, src
mov rdi, [rsp] ; ctx
lea rdi, [rdi + 8] ; &ctx.text
lea rsi, [rsp + 32] ; dst
lea rdx, [rsp + 16] ; src
call codegen_move_dst_src
; free src
mov rdi, [rsp + 8] ; &function_ctx
lea rsi, [rsp + 16] ; src
call codegen_free_operand
; return dst
mov rax, [rsp + 32]
mov rdx, [rsp + 40]
jmp .done
.vtp_rbp_pvalue:
mov byte [rsp + 16], OPERAND_RBP_OFFSET
jmp .vtp_done
.vtp_address_value:
mov byte [rsp + 16], OPERAND_ADDRESS_PLACE
jmp .vtp_done
.vtp_done:
mov rax, [rsp + 16]
mov rdx, [rsp + 24]
jmp .done
.assignment:
mov rcx, [rax + 8] ; AstNode.data
mov rdx, [rax + 16] ; AstNode.extra
mov [rsp + 16], rcx ; dst
mov [rsp + 24], rdx ; src
; codegen src
mov rdi, [rsp] ; ctx
mov rsi, [rsp + 8] ; &function_ctx
call codegen_expr
mov [rsp + 32], rax ; src
mov [rsp + 40], rdx
mov rdi, [rsp] ; ctx
mov rsi, [rsp + 8] ; &function_ctx
mov rdx, [rsp + 16] ; dst
call codegen_expr
mov [rsp + 48], rax ; dst
mov [rsp + 56], rdx
; mov dst, src
mov rdi, [rsp] ; ctx
lea rdi, [rdi + 8] ; &ctx.text
lea rsi, [rsp + 48] ; dst
lea rdx, [rsp + 32] ; src
call codegen_move_dst_src
; free [dst, src]
mov rdi, [rsp + 8] ; &function_ctx
lea rsi, [rsp + 48] ; dst
call codegen_free_operand
mov rdi, [rsp + 8] ; &function_ctx
lea rsi, [rsp + 48] ; src
call codegen_free_operand
xor rax, rax
xor rdx, rdx
jmp .done
.deref:
.address_of:
; codegen inner expr
mov rdi, [rsp] ; ctx
mov rsi, [rsp + 8] ; &function_ctx
mov rdx, [rax + 8] ; AstNode.data
call codegen_expr
jmp .done
.done:
add rsp, 80
pop r14
pop r15
pop rbx
pop rbp
ret
.panic:
call panic
;; start-structs
;; struct Operand {
;; kind: u8
;; register: u8,
;; width: u16,
;; len: u16,
;; value: u64,
;; }
;; end-structs
;; or: register: u4, width: u4
section .rdata
;; start-consts
; Register containing a Value
OPERAND_REGISTER equ 1 ; e.g. rax, rbx
; RSP-relative stack-slot containing a Value, semantically not addressable
OPERAND_RBP_VALUE equ 2 ; e.g. [rsp + 16]
; RBP-relative stack-slot containing a Value, but addressable (derived from a Place)
OPERAND_RBP_PVALUE equ 3 ; e.g. mov [rbp - 8]
; Static-slot containing a Value
OPERAND_ADDRESS_VALUE equ 4 ; e.g. mov [rel OPERAND_ADDRESS]
OPERAND_LAST_VALUE equ 5 ; operand kinds > this are places
; RBP-relative stack-slot containing a Value, semantically addressable
OPERAND_RBP_OFFSET equ 6 ; e.g. mov [rbp - 8], rax or lea rax, [rbp - 8]
; Address of a static-slot containing a Value
OPERAND_ADDRESS_PLACE equ 7 ; e.g. lea [rel OPERAND_ADDRESS]
; Register containing an address pointing at a Value (Place)
OPERAND_REGISTER_PLACE equ 8 ; e.g. [rax]
; RBP-relative stack-slot containing an address pointing at a Value (Place)
OPERAND_RBP_PLACE equ 9 ; e.g. mov rax, [rbp - 8]; mov [rax], src
OPERAND_LAST_PLACE equ 10 ; operand kinds > this are not memory operands
; Immediate constant Value
OPERAND_IMMEDIATE equ 11 ; e.g. 0x10
; Special Operands whose semantics aren't fully defined yet
OPERAND_CONSTANT equ 12 ; e.g. OPERAND_CONSTANT
OPERAND_LABEL equ 13 ; e.g. label_1234
;; end-consts
WIDTH_BYTE db 'byte '
WIDTH_WORD db 'word '
WIDTH_DWORD db 'dword '
WIDTH_QWORD db 'qword '
; Operand { kind: REGISTER, register: 0, width: 8, len: 0, padding: 0, value: 0 }
align 8
OPERAND_IMM dq 0x0008_000b, 0
align 8
OPERAND_RAX dq 0x0008_0001, 0
align 8
OPERAND_RAX_P dq 0x0008_0008, 0
align 8
OPERAND_RDX dq 0x0008_0301, 0
align 8
OPERAND_RDX_P dq 0x0008_0308, 0
align 8
OPERAND_RBP_OFFS dq 0x0008_0006, 0
align 8
OPERAND_RBP_PV dq 0x0008_0003, 0
align 8
OPERAND_RBP_V dq 0x0008_0002, 0
align 8
OPERAND_RBP_P dq 0x0008_0009, 0
section .text
;; rdi: *text
;; rsi: op: *Operand
codegen_write_operand:
push rbp
mov rbp, rsp
push rbx
; scratch [16..40]
; *operand [8..16]
; *text [0..8]
sub rsp, 40
mov [rsp], rdi ; *text
mov [rsp + 8], rsi ; op
mov bl, byte [rsi + 0] ; op.kind
cmp bl, OPERAND_REGISTER
je .register
cmp bl, OPERAND_RBP_OFFSET
je .rbp_offset
cmp bl, OPERAND_RBP_VALUE
je .rbp_offset
cmp bl, OPERAND_RBP_PVALUE
je .rbp_offset
cmp bl, OPERAND_ADDRESS_VALUE
je .address
cmp bl, OPERAND_ADDRESS_PLACE
je .address
cmp bl, OPERAND_IMMEDIATE
je .immediate
cmp bl, OPERAND_CONSTANT
je .constant
cmp bl, OPERAND_LABEL
je .label
cmp bl, OPERAND_REGISTER_PLACE
je .reg_p
cmp bl, OPERAND_RBP_PLACE
je .rbp_offset
jmp .panic
.reg_p:
mov rbx, rsi
mov byte [rsp + 16], '['
mov rdi, [rsp] ; *text
lea rsi, [rsp + 16]
call vec_push
movzx rdi, byte [rbx + 1] ; Operand.register
movzx rsi, word [rbx + 2] ; Operand.width
lea rdx, [rsp + 16] ; buffer
call get_register_name
mov rdi, [rsp] ; *text
mov rsi, rax ; buffer
call vec_extend
mov byte [rsp + 16], ']'
mov rdi, [rsp] ; *text
lea rsi, [rsp + 16]
call vec_push
.register:
mov rbx, rsi
movzx rdi, byte [rbx + 1] ; Operand.register
movzx rsi, word [rbx + 2] ; Operand.width
lea rdx, [rsp + 16] ; buffer
call get_register_name
mov rdi, [rsp] ; *text
mov rsi, rax ; buffer
call vec_extend
jmp .epilogue
.rbp_offset:
; {width} [rbp {+/-} offset]
mov rsi, [rsp + 8] ; op
movzx rsi, word [rsi + 2] ; Operand.width
mov rdi, [rsp] ; *text
call codegen_write_width
mov rbx, rax
mov byte [rsp + 16], '['
mov rdi, [rsp] ; *text
lea rsi, [rsp + 16]
call vec_push
; if op.kind == OPERAND_RBP_OFFSET
mov qword [rsp + 16], 'rbp '
mov rdi, [rsp] ; *text
lea rsi, [rsp + 16]
mov rdx, 4
call vec_extend
; if value >= 0
mov rax, [rsp + 8] ; op
mov rdi, [rax + 8] ; op.value
cmp rdi, 0
jl .skip_plus
mov qword [rsp + 16], ' + '
mov rdi, [rsp] ; *text
lea rsi, [rsp + 16]
mov rdx, 3
call vec_extend
nop
.skip_plus:
; write offset
mov rax, [rsp + 8] ; op
mov rdi, [rax + 8] ; op.value
lea rsi, [rsp + 16]
mov rdx, 24 ; max length
mov rcx, 10 ; radix
call int_to_str2
mov rdi, [rsp] ; *text
mov rsi, rax ; buffer
call vec_extend
mov byte [rsp + 16], ']'
mov rdi, [rsp] ; *text
lea rsi, [rsp + 16]
call vec_push
jmp .epilogue
.address:
mov byte [rsp + 16], '['
mov rdi, [rsp] ; *text
lea rsi, [rsp + 16]
call vec_push
mov qword [rsp + 16], 'rel '
mov rdi, [rsp] ; *text
lea rsi, [rsp + 16]
mov rdx, 4
call vec_extend
; write address name
mov rax, [rsp + 8] ; op
mov rdi, [rsp] ; *text
mov rsi, [rax + 8] ; Operand.value
mov rdx, [rax + 4] ; Operand.len
mov dx, dx ; low 16 bits
call vec_extend
mov byte [rsp + 16], ']'
mov rdi, [rsp] ; *text
lea rsi, [rsp + 16]
call vec_push
jmp .epilogue
.immediate:
; write immediate value
mov rax, [rsp + 8] ; op
mov rdi, [rax + 8] ; op.value
lea rsi, [rsp + 16]
mov rdx, 24 ; max length
mov rcx, 10 ; radix
call int_to_str2
mov rdi, [rsp] ; *text
mov rsi, rax ; buffer
call vec_extend
jmp .epilogue
.constant:
; write constant name
mov rax, [rsp + 8] ; op
mov rdi, [rsp] ; *text
mov rsi, [rax + 8] ; op.value
mov rdx, [rax + 2] ; op.len
mov dx, dx ; low 16 bits
call vec_extend
jmp .epilogue
.label:
jmp .panic
.epilogue:
add rsp, 40
pop rbx
pop rbp
ret
.panic:
call panic
;; rdi: *text
;; rsi: width: u8
codegen_write_width:
cmp sil, 8
lea rdx, [rel WIDTH_QWORD]
mov rcx, 6
je .write
cmp sil, 4
lea rdx, [rel WIDTH_DWORD]
je .write
cmp sil, 2
lea rdx, [rel WIDTH_WORD]
mov rcx, 5
je .write
cmp sil, 1
lea rdx, [rel WIDTH_BYTE]
je .write
cmp sil, 0
je .write
jmp .panic
.write:
mov rsi, rdx
mov rdx, rcx
push rdx
call vec_extend
pop rax ; length written
ret
.panic:
call panic
;; rdi: *function_ctx
;; rsi: *text
;; rdx: lhs: *Operand
;; rcx: rhs: *Operand
;; r8: op: [u8; 8]
;; Generates: {op} {lhs}, {rhs} for a binary operation that has the encodings rN, rmN and rmN, rN
codegen_binary_op_rm64_rm64:
push rbp
mov rbp, rsp
push rbx
; dst [32..48]
sub rsp, 56
mov [rsp], rdi ; *function_ctx
mov [rsp + 8], rsi ; *text
mov [rsp + 16], rdx ; lhs
mov [rsp + 24], rcx ; rhs
mov [rsp + 32], r8 ; op
; match (lhs.kind, rhs.kind) {
cmp byte [rdx + 0], OPERAND_REGISTER
sete al
cmp byte [rcx + 0], OPERAND_RBP_PVALUE
setb bl
test al, bl
jnz .simple
; (REGISTER, kind < RBP_PVALUE) => simple
cmp byte [rdx + 0], OPERAND_RBP_PVALUE
setb al
cmp byte [rcx + 0], OPERAND_REGISTER
sete bl
test al, bl
jnz .simple
; (kind < RBP_PVALUE, REGISTER) => simple
test al, al
jnz .rax_lhs
; (kind < RBP_PVALUE, _) => rax_lhs
cmp byte [rdx + 0], OPERAND_RBP_PVALUE
jb .rax_rhs
; (_, kind < RBP_PVALUE) => rax_rhs
jmp .rax_dst
; (_, _) => rax_dst
; }
.simple:
; { op lhs, rhs; lhs }
lea rdi, [rsp + 32] ; op
call strlen
mov rdi, [rsp + 8] ; *text
lea rsi, [rsp + 32] ; op
mov rdx, rax ; op length
call vec_extend
mov rdi, [rsp + 8] ; *text
mov rsi, [rsp + 16] ; lhs
call codegen_write_operand
mov rdi, [rsp + 8] ; *text
lea rsi, [rel COMMA_RAX]
mov rdx, 2
call vec_extend
mov rdi, [rsp + 8] ; *text
mov rsi, [rsp + 24] ; rhs
call codegen_write_operand
mov byte [rsp + 32], 10 ; newline
mov rdi, [rsp + 8] ; *text
lea rsi, [rsp + 32]
call vec_push
; free rhs
mov rdi, [rsp] ; *function_ctx
mov rsi, [rsp + 24] ; rhs
call codegen_free_operand
; ret lhs
mov rbx, [rsp + 16] ; lhs
mov rax, [rbx]
mov rdx, [rbx + 8]
jmp .epilogue
.rax_lhs:
; { mov rax, rhs; op lhs, rax; lhs }
; mov rax, rhs
mov rdi, [rsp + 8] ; *text
lea rsi, [rel OPERAND_RAX]
mov rdx, [rsp + 24] ; rhs
call codegen_move_dst_src
; op lhs, rax
mov rdi, [rsp + 8] ; *text
mov rsi, [rsp + 16] ; lhs
lea rdx, [rel OPERAND_RAX] ; rax
mov rcx, [rsp + 32] ; op
call codegen_binary_op_unchecked
; free rhs
mov rdi, [rsp] ; *function_ctx
mov rsi, [rsp + 24] ; rhs
call codegen_free_operand
; ret lhs
mov rbx, [rsp + 16] ; lhs
mov rax, [rbx]
mov rdx, [rbx + 8]
jmp .epilogue
.rax_rhs:
; { mov rax, lhs; op rax, rhs; mov rhs, rax; rhs }
; mov rax, lhs
mov rdi, [rsp + 8] ; *text
lea rsi, [rel OPERAND_RAX]
mov rdx, [rsp + 16] ; rhs
call codegen_move_dst_src
; op rax, rhs
mov rdi, [rsp + 8] ; *text
lea rsi, [rel OPERAND_RAX] ; rax
mov rdx, [rsp + 24] ; rhs
mov rcx, [rsp + 32] ; op
call codegen_binary_op_unchecked
; free lhs
mov rdi, [rsp] ; *function_ctx
mov rsi, [rsp + 16] ; lhs
call codegen_free_operand
; ret rhs
mov rbx, [rsp + 24] ; rhs
mov rax, [rbx]
mov rdx, [rbx + 8]
jmp .epilogue
.rax_dst:
; { dst = allocate_value; mov dst, lhs; mov rax, rhs; op dst, rax; dst }
; dst = allocate_value
mov rdi, [rsp] ; *function_ctx
mov rsi, 8 ; width = 8
call codegen_allocate_value
mov [rsp + 40], rax ; dst
mov [rsp + 48], rdx
; mov dst, lhs
mov rdi, [rsp + 8] ; *text
lea rsi, [rsp + 40] ; dst
mov rdx, [rsp + 16] ; lhs
call codegen_move_dst_src
; mov rax, rhs
mov rdi, [rsp + 8] ; *text
lea rsi, [rel OPERAND_RAX] ; rax
mov rdx, [rsp + 24] ; rhs
call codegen_move_dst_src
; op dst, rax
mov rdi, [rsp + 8] ; *text
lea rsi, [rsp + 40] ; dst
lea rdx, [rel OPERAND_RAX] ; rax
mov rcx, [rsp + 32] ; op
call codegen_binary_op_unchecked
; free rhs
mov rdi, [rsp] ; *function_ctx
mov rsi, [rsp + 24] ; lhs
call codegen_free_operand
; free lhs
mov rdi, [rsp] ; *function_ctx
mov rsi, [rsp + 16] ; lhs
call codegen_free_operand
; ret dst
mov rax, [rsp + 32] ; dst
mov rdx, [rsp + 40]
; }
.epilogue:
add rsp, 56
pop rbx
pop rbp
ret
;; rdi: *text
;; rsi: lhs: *Operand
;; rdx: rhs: *Operand
;; rcx: op: [u8; 8]
codegen_binary_op_unchecked:
push rbp
mov rbp, rsp
sub rsp, 32
mov [rsp], rdi ; *text
mov [rsp + 8], rsi ; lhs
mov [rsp + 16], rdx ; rhs
mov [rsp + 24], rcx ; op
; op lhs, rax
lea rdi, [rsp + 24] ; op
call strlen
mov rdi, [rsp] ; *text
lea rsi, [rsp + 24] ; op
mov rdx, rax ; op length
call vec_extend
mov rdi, [rsp] ; *text
mov rsi, [rsp + 8] ; lhs
call codegen_write_operand
mov rdi, [rsp] ; *text
lea rsi, [rel COMMA_RAX]
mov rdx, 2
call vec_extend
mov rdi, [rsp] ; *text
mov rsi, [rsp + 16] ; rhs
call codegen_write_operand
mov byte [rsp + 31], 10 ; newline
mov rdi, [rsp] ; *text
lea rsi, [rsp + 31]
call vec_push
add rsp, 32
pop rbp
ret
;; rdi: *text
;; rsi: dst: *Operand
;; rdx: bits
;; rcx: width
codegen_load_imm:
push rbp
mov rbp, rsp
sub rsp, 32
mov [rsp], rdi ; *text
mov [rsp + 8], rsi ; dst
mov rax, [rel OPERAND_IMM]
mov [rsp + 16], rax ; Operand
mov [rsp + 24], rdx ; Operand.value
cmp byte [rsi + 0], OPERAND_LAST_VALUE
jg .panic ; unsupported dst kind
cmp rcx, 8
jl .direct
jg .panic ; unsupported immediate size
.indirect:
; mov rax, bits
; mov dst, rax
mov rdi, [rsp] ; *text
lea rsi, [rel OPERAND_RAX]
lea rdx, [rsp + 16] ; imm
mov rcx, 'mov '
call codegen_binary_op_unchecked
mov rdi, [rsp] ; *text
mov rsi, [rsp + 8] ; dst
lea rdx, [rel OPERAND_RAX]
mov rcx, 'mov '
call codegen_binary_op_unchecked
.direct:
; mov dst, bits
mov rdi, [rsp] ; *text
mov rsi, [rsp + 8] ; dst
lea rdx, [rsp + 16] ; imm
mov rcx, 'mov '
call codegen_binary_op_unchecked
add rsp, 32
pop rbp
ret
.panic:
call panic
;; rdi: *text
;; rsi: dst: *Operand
;; rdx: src: *Operand
codegen_move_dst_src:
push rbp
mov rbp, rsp
push rbx
sub rsp, 32
mov [rsp], rdi ; *text
mov [rsp + 8], rsi ; dst
mov [rsp + 16], rdx ; src
mov qword [rsp + 24], 'mov ' ; op
cmp byte [rsi + 0], OPERAND_LAST_PLACE
jg .panic ; unsupported dst kind
cmp byte [rdx + 0], OPERAND_LAST_PLACE
jg .panic ; unsupported src kind
; op = src.is_place().then("lea ").else("mov ")
cmp byte [rdx + 0], OPERAND_LAST_VALUE
jb .mov
; an address stored in a stack slot has to be moved
cmp byte [rdx + 0], OPERAND_RBP_PLACE
je .mov
mov qword [rsp + 24], 'lea '
.mov:
; if dst.width != src.width
mov cx, word [rsi + 2] ; dst.width
mov bx, word [rdx + 2] ; src.width
cmp cx, bx
jne .panic ; mismatched widths
; ; if dst.width == 8 && src.kind == OPERAND_ADDRESS
; xor rbx, rbx
; cmp cx, 8
; cmovne rbx, [rdx + 0]
; cmp bl, OPERAND_ADDRESS
; je .panic ; address can only be moved to full-sized destinations
cmp byte [rsi + 0], OPERAND_REGISTER
je .do_move ; if dst == register, do move
; If dst != register and src != register, we cannot move directly into memory:
; there is no MOV m64, m64 instruction.
cmp byte [rdx + 0], OPERAND_REGISTER
jne .indirect_rax
jmp .do_move
.indirect_rax:
cmp byte [rsi + 0], OPERAND_RBP_PLACE
je .indirect_xchg
; op rax, [src]
; mov [dst], rax
mov rdi, [rsp] ; *text
lea rsi, [rel OPERAND_RAX]
mov rdx, [rsp + 16] ; src
mov rcx, [rsp + 24] ; op
call codegen_binary_op_unchecked
mov rdi, [rsp] ; *text
mov rsi, [rsp + 8] ; dst
lea rdx, [rel OPERAND_RAX]
mov rcx, 'mov '
call codegen_binary_op_unchecked
jmp .epilogue
.indirect_xchg:
; xchg rdx, dst
; mov rax, src
; mov [rdx], rax
; xchg rdx, dst
mov rdi, [rsp] ; *text
lea rsi, [rel OPERAND_RDX] ; rdx
mov rdx, [rsp + 8] ; dst
mov rcx, 'xchg '
call codegen_binary_op_unchecked
mov rdi, [rsp] ; *text
lea rsi, [rel OPERAND_RAX]
mov rdx, [rsp + 16] ; src
mov rcx, [rsp + 24] ; op
call codegen_binary_op_unchecked
mov rdi, [rsp] ; *text
lea rsi, [rel OPERAND_RDX_P] ; [rdx]
lea rdx, [rel OPERAND_RAX] ; rax
mov rcx, 'mov '
call codegen_binary_op_unchecked
mov rdi, [rsp] ; *text
lea rsi, [rel OPERAND_RDX] ; rdx
mov rdx, [rsp + 8] ; dst
mov rcx, 'xchg '
call codegen_binary_op_unchecked
jmp .epilogue
.do_move:
mov rdi, [rsp] ; *text
mov rsi, [rsp + 8] ; dst
mov rdx, [rsp + 16] ; src
mov rcx, [rsp + 24] ; op
call codegen_binary_op_unchecked
.epilogue:
add rsp, 32
pop rbx
pop rbp
ret
.panic:
call panic
section .rdata
XCHG_RAX db "xchg rax, "
XCHG_RAX_LEN equ $ - XCHG_RAX
MOV_RAX_COMMA db "mov rax, "
MOV_RAX_COMMA_LEN equ $ - MOV_RAX_COMMA
COMMA_RAX db ", rax"
COMMA_RAX_LEN equ $ - COMMA_RAX
XOR_RDX_RDX db "xor rdx, rdx", 10
XOR_RDX_RDX_LEN equ $ - XOR_RDX_RDX
ADD_ dq "add "
SUB_ dq "sub "
MUL_ dq "mul "
DIV_ dq "div "