diff --git a/lang/src/codegen.asm b/lang/src/codegen.asm index e74a7e7..e062340 100644 --- a/lang/src/codegen.asm +++ b/lang/src/codegen.asm @@ -25,6 +25,10 @@ section .rdata PROLOGUE_LEN equ $ - PROLOGUE EPILOGUE db "mov rsp, rbp", 10, "pop rbp", 10, "ret", 10 EPILOGUE_LEN equ $ - EPILOGUE + MOV_RAX db "mov rax, " + MOV_RAX_LEN equ $ - MOV_RAX + JMP_EPILOGUE db 10, "jmp .epilogue", 10 + JMP_EPILOGUE_LEN equ $ - JMP_EPILOGUE REGISTER_NAMES db "abcdsidibpspr8r9r10r11r12r13r14r15" WIDTHS db "erxliwdbp" @@ -179,8 +183,73 @@ stackvar_cmp: ;; ast: *const Ast, ;; text: Vec, ;; } +;; struct FunctionCtx { +;; current_stack_size: u64, +;; stack_vars: Vec<(u64, u64)>, +;; register_bitset: u128, +;; dirtied_register_bitset: u128, +;; } ;; end-structs +;; rdi: *FunctionCtx +;; define-fn: fn codegen_allocate_register(ctx: *mut FunctionCtx) -> u8 +codegen_allocate_register: + push rbp + mov rbp, rsp + push rbx + + ; scan register_bitset for free register + ; example: (rax, rbp and rsp are always reserved) + ; register_bitset = 0b10000011_00000000 + ; to do this, we first invert the bitset + ; register_bitset = 0b01111100_11111111 + ; then we find the first set bit + ; first_bit = 2 + ; we now set the bit in the original bitset and mark the register as dirtied + ; dirtied registers are those that have been used in the function and need + ; to be saved/restored in the prologue/epilogue + + mov rax, [rdi + 48] ; register_bitset + xor rcx, rcx + ; flip bits + not rax + ; find first set bit + bsf rcx, rax + test rcx, rcx + jnz .found + mov rax, [rdi + 56] ; higher 64 bits + not rax + bsf rcx, rax + test rcx, rcx + jz .panic + add rcx, 8 + +.found: + mov rbx, rcx + cmp rcx, 8 + jl .set_low + sub rcx, 8 + bts rax, rcx + mov [rdi + 56], rax ; update register_bitset + mov rax, [rdi + 72] ; dirtied_register_bitset + bts rax, rcx + mov [rdi + 72], rax ; update dirtied_register_bitset + jmp .done +.set_low: + bts rax, rcx + mov [rdi + 48], rax ; update register_bitset + mov rax, [rdi + 64] ; dirtied_register_bitset + bts rax, rcx + mov [rdi + 64], rax ; update dirtied_register_bitset + +.done: + pop rax + pop rbp + ret +.panic: + ; no free registers! + call panic + ;; rdi: *Ctx ;; rsi: function index ;; define-fn: fn codegen_function(ast: *const CodegenCtx, func_idx: u64) -> () @@ -191,14 +260,15 @@ codegen_function: push r15 push r14 - ; scratch [88..104] + ; scratch [104..120] + ; dirtied-register-bitset [88..104] [a,b,c,d,si,di,bp,sp,8,9,10,11,12,13,14,15] ; register-bitset [72..88] [a,b,c,d,si,di,bp,sp,8,9,10,11,12,13,14,15] ; stack-vars: Vec<(index, offset)> [32..72] ; current_stack_size: [24..32] ; func_idx [16..24] ; ast [8..16] ; ctx [0..8] - sub rsp, 104 + sub rsp, 120 mov [rsp], rdi ; ctx mov rax, [rdi] mov [rsp + 8], rax ; ast @@ -279,13 +349,13 @@ codegen_function: lea rsi, [rax + r14 * 8] ; mov rsi, [rsi] ; AstFunction.args[i] - mov [rsp + 88], rsi + mov [rsp + 104], rsi ; scratch mov rax, [rsp + 24] ; current_stack_size - mov [rsp + 96], rax + mov [rsp + 112], rax add rax, 8 ; size_of:: mov [rsp + 24], rax ; current_stack_size += size_of:: lea rdi, [rsp + 32] ; stack-vars - lea rsi, [rsp + 88] ; &(index, offset) + lea rsi, [rsp + 104] ; &scratch: &(index, offset) mov rdx, stackvar_cmp mov rcx, 0 call vec_insert_sorted @@ -308,7 +378,7 @@ codegen_function: mov rdx, RET_NL_LEN call vec_extend - add rsp, 104 + add rsp, 120 pop r15 pop r14 pop rbx @@ -322,4 +392,409 @@ codegen_function: ;; rsi: &function_ctx ;; rdx: block index codegen_block: + push rbp + mov rbp, rsp + push r15 + push r14 + push rbx + + sub rsp, 16 + mov [rsp], rdi ; ctx + mov [rsp + 8], rsi ; &function_ctx + + mov rdi, [rdi] ; ast + mov rsi, rdx ; block index + call vec_get + mov 15, [rax + 8] ; AstNode.extra + mov rbx, [rax + 0] ; AstNode.data + + xor r14, r14 ; statement index +.stmt_loop: + cmp r14, r15 + jge .stmt_loop_done + mov rdi, [rsp] ; ctx + lea mov, [rsp + 8] ; &function_ctx + mov rdx, [rbx + r14 * 8] ; statements[i] + call codegen_statement + inc r14 + jmp .stmt_loop + +.stmt_loop_done: + add rsp, 16 + pop rbx + pop r14 + pop r15 + pop rbp ret + +;; rdi: ctx +;; rsi: &function_ctx +;; rdx: expr index +;; returns: register index in rax if rdx=0, else stack-offset in rax +;; define-fn: fn codegen_expr(ctx: *const CodegenCtx, function_ctx: &FunctionCtx, expr_idx: u64) -> (u64, bool) +codegen_expr: + push rbp + mov rbp, rsp + push rbx + + ; scratch [16..32] + ; function_ctx: [8..16] + ; ctx [0..8] + sub rsp, 32 + mov [rsp], rdi ; ctx + mov [rsp + 8], rsi ; &function_ctx + + mov rdi, [rdi] ; ast + mov rsi, rdx ; statement index + call vec_get + ; rax: *AstNode + + + mov rbx, [rax] ; AstNode.kind + cmp bl, AST_RETURN_STATEMENT + je .return + cmp bl, AST_BLOCK + je .block + cmp bl, AST_VAR_DECL + je .var_decl + cmp bl, AST_VAR_REF + je .var_ref + cmp bl, AST_NUMBER + je .number + cmp bl, AST_BINARY_OP + je .binary_op + cmp bl, AST_ASSIGNMENT + je .assignment + cmp bl, AST_PLACE_TO_VALUE + je .place_to_value + cmp bl, AST_VALUE_TO_PLACE + je .value_to_place + cmp bl, AST_DEREF + je .deref + cmp bl, AST_ADDRESS_OF + je .address_of + jmp .panic + +.return: + ; codegen inner expr + mov rdi, [rsp] ; ctx + mov rsi, [rsp + 8] ; &function_ctx + mov rdx, [rax + 8] ; AstNode.data + call codegen_expr + mov rbx, rax + + mov rdi, [rsp] ; ctx + mov rdi, [rdi + 8] ; &ctx.text + lea rsi, [rel MOV_RAX] + mov rdx, MOV_RAX_LEN + call vec_extend + + mov rdi, rbx + mov rsi, 8 + lea rdx, [rsp + 16] ; scratch + call get_register_name + + mov rdi, [rsp] ; ctx + mov rdi, [rdi + 8] ; &ctx.text + mov rsi, rax + call vec_extend + + mov rdi, [rsp] ; ctx + mov rdi, [rdi + 8] ; &ctx.text + lea rsi, [rel JMP_EPILOGUE] + mov rdx, JMP_EPILOGUE_LEN + call vec_extend + + mov rax, 0 + jmp .done + +.number: + ; rax = *AstNode + mov [rsp + 16], rax ; scratch = *AstNode + mov rdi, [rsp + 8] ; &function_ctx + call codegen_allocate_register + + + + + ; TODO + +.done: + add rsp, 32 + pop rbx + pop rbp + ret +.panic: + call panic + + +;; start-structs +;; struct Operand { +;; kind: u8 +;; register_and_width: u8, +;; len: u16, +;; value: u64, +;; } +;; end-structs +;; or: register: u4, width: u4 +section .rdata +;; start-consts + OPERAND_REGISTER db 1 ; e.g. rax, rbx + OPERAND_RBP_OFFSET db 2 ; e.g. [rbp - 8] + OPERAND_RSP_OFFSET db 3 ; e.g. [rsp + 16] + OPERAND_ADDRESS db 4 ; e.g. [rel OPERAND_ADDRESS] + OPERAND_IMMEDIATE db 5 ; e.g. 0x10 + OPERAND_CONSTANT db 6 ; e.g. OPERAND_CONSTANT + OPERAND_LABEL db 7 ; e.g. label_1234 +;; end-consts + WIDTH_BYTE db 'byte ' + WIDTH_WORD db 'word ' + WIDTH_DWORD db 'dword ' + WIDTH_QWORD db 'qword ' + +;; rdi: *text +;; rsi: op: *Operand +codegen_write_operand: + push rbp + mov rbp, rsp + push rbx + + ; scratch [16..40] + ; *operand [8..16] + ; *text [0..8] + sub rsp, 40 + mov [rsp], rdi ; *text + mov [rsp + 8], rsi ; op + + mov bl, byte [rsi + 0] ; op.kind + cmp bl, OPERAND_REGISTER + je .register + cmp bl, OPERAND_RBP_OFFSET + je .rbp_offset + cmp bl, OPERAND_RSP_OFFSET + je .rsp_offset + cmp bl, OPERAND_ADDRESS + je .address + cmp bl, OPERAND_IMMEDIATE + je .immediate + cmp bl, OPERAND_CONSTANT + je .constant + cmp bl, OPERAND_LABEL + je .label + jmp .panic + +.register: + mov rbx, rsi + mov rdi, [rbx + 1] ; register_and_width + mov rsi, rdi + mov dil, dil ; low 4 bits = register + shr rsi, 4 ; high 4 bits = width + lea rdx, [rsp + 16] ; buffer + call get_register_name + + mov rdi, [rsp] ; *text + mov rsi, rax ; buffer + call vec_extend + jmp .epilogue + +.rbp_offset: +.rsp_offset: + ; {width} [rbp {+/-} offset] + mov rsi, [rsp + 8] ; op + mov sil, byte [rsi + 1] ; register_and_width + shr sil, 4 ; width + mov rdi, [rsp] ; *text + call codegen_write_width + mov rbx, rax + + mov byte [rsp + 16], '[' + mov rdi, [rsp] ; *text + lea rsi, [rsp + 16] + call vec_push + + ; if op.kind == OPERAND_RBP_OFFSET + mov rax, [rsp + 8] ; op + mov al, byte [rax + 0] ; op.kind + cmp al, OPERAND_RBP_OFFSET + jne .rsp_offset_write + mov qword [rsp + 16], ' pbr' + mov rdi, [rsp] ; *text + lea rsi, [rsp + 16] + mov rdx, 4 + call vec_extend + jmp .check_sign + +.rsp_offset_write: + mov qword [rsp + 16], ' psr' + mov rdi, [rsp] ; *text + lea rsi, [rsp + 16] + mov rdx, 4 + call vec_extend + +.check_sign: + ; if value >= 0 + mov rax, [rsp + 8] ; op + mov rdi, [rax + 8] ; op.value + cmp rdi, 0 + jl .skip_plus + mov qword [rsp + 16], ' + ' + mov rdi, [rsp] ; *text + lea rsi, [rsp + 16] + mov rdx, 3 + call vec_extend +.skip_plus: + + ; write offset + mov rax, [rsp + 8] ; op + mov rdi, [rax + 8] ; op.value + lea rsi, [rsp + 16] + mov rdx, 24 ; max length + mov rcx, 10 ; radix + call int_to_str2 + + mov rdi, [rsp] ; *text + mov rsi, rax ; buffer + call vec_extend + + mov byte [rsp + 16], ']' + mov rdi, [rsp] ; *text + lea rsi, [rsp + 16] + call vec_push + jmp .epilogue + +.address: + mov byte [rsp + 16], '[' + mov rdi, [rsp] ; *text + lea rsi, [rsp + 16] + call vec_push + + mov qword [rsp + 16], ' ler' + mov rdi, [rsp] ; *text + lea rsi, [rsp + 16] + mov rdx, 4 + call vec_extend + + ; write address name + mov rax, [rsp + 8] ; op + mov rdi, [rsp] ; *text + mov rsi, [rax + 8] ; op.value + mov rdx, [rax + 4] ; op.len + mov dx, dx ; low 16 bits + call vec_extend + + mov byte [rsp + 16], ']' + mov rdi, [rsp] ; *text + lea rsi, [rsp + 16] + call vec_push + jmp .epilogue + +.immediate: + ; write immediate value + mov rax, [rsp + 8] ; op + mov rdi, [rax + 8] ; op.value + lea rsi, [rsp + 16] + mov rdx, 24 ; max length + mov rcx, 10 ; radix + call int_to_str2 + + mov rdi, [rsp] ; *text + mov rsi, rax ; buffer + call vec_extend + jmp .epilogue + +.constant: + ; write constant name + mov rax, [rsp + 8] ; op + mov rdi, [rsp] ; *text + mov rsi, [rax + 8] ; op.value + mov rdx, [rax + 4] ; op.len + mov dx, dx ; low 16 bits + call vec_extend + jmp .epilogue + +.epilogue: + add rsp, 40 + pop rbx + pop rbp + ret +.panic: + call panic + +;; rdi: *text +;; rsi: width: u8 +codegen_write_width: + cmp sil, 8 + lea rdx, [rel WIDTH_QWORD] + mov rcx, 6 + je .write + cmp sil, 4 + lea rdx, [rel WIDTH_DWORD] + je .write + cmp sil, 2 + lea rdx, [rel WIDTH_WORD] + mov rcx, 5 + je .write + cmp sil, 1 + lea rdx, [rel WIDTH_BYTE] + je .write + jmp .panic +.write: + mov rsi, rdx + mov rdx, rcx + push rdx + call vec_extend + pop rax ; length written + ret + + +;; rdi: *text +;; rsi: dst: *Operand +;; rdx: src: *Operand +codegen_move_dst_src: + push rbp + mov rbp, rsp + push rbx + + sub rsp, 24 + mov [rsp], rdi ; *text + mov [rsp + 8], rsi ; dst + mov [rsp + 16], rdx ; src + + cmp byte [rsi + 0], OPERAND_ADDRESS + jg .panic ; unsupported dst kind + + ; if dst.width != src.width + mov cl, byte [rsi + 2] ; dst.register_and_width + mov bl, byte [rdx + 2] ; src.register_and_width + shr cl, 4 + shr bl, 4 + cmp cl, bl + jne .panic ; mismatched widths + + ; if dst.width == 8 && src.kind == OPERAND_ADDRESS + xor rbx, rbx + cmp cl, 8 + cmovne rbx, [rdx + 0] + cmp bl, OPERAND_ADDRESS + je .panic ; address can only be moved to full-sized destinations + + cmp byte [rsi + 0], OPERAND_REGISTER + je .do_move + cmp byte [rdx + 0], OPERAND_REGISTER + jne .xchg_rax ; if dst != register and src != register, xchg via rax + jmp .do_move + +.xchg_rax: + ; xchg rax, [src] + ; mov [dst], rax + ; xchg rax, [src] +.do_move: + +.epilogue: + add rsp, 24 + pop rbx + pop rbp + ret + +.panic: + call panic