codegen for binop

fix ast gen with nested binary ops
skip_token method
2025-11-02 01:07:37 +01:00 · 2025-11-02 00:30:00 +01:00 · 2025-11-02 00:29:45 +01:00 · 2025-11-02 00:00:22 +01:00 · 2025-11-01 18:24:37 +01:00 · 2025-11-01 17:57:11 +01:00
8 changed files with 1020 additions and 243 deletions
--- a/lang/src/ast.asm
+++ b/lang/src/ast.asm
@ -4,11 +4,11 @@ default rel
 %include "src/ast.inc"

 section .rdata
-    PRECEDENCE_ADD equ 90
-    PRECEDENCE_SUB equ 90
-    PRECEDENCE_MUL equ 100
-    PRECEDENCE_DIV equ 100
-    PRECEDENCE_REM equ 100
+    PRECEDENCE_ADD dw 90
+    PRECEDENCE_SUB dw 90
+    PRECEDENCE_MUL dw 100
+    PRECEDENCE_DIV dw 100
+    PRECEDENCE_REM dw 100

 section .text
 extern vec_init_with
@ -28,6 +28,7 @@ extern tokeniser_init
 extern find_lexeme
 extern peek_lexeme
 extern expect_token
+extern skip_token
 extern unwrap_token
 extern peek_expect_token

@ -400,6 +401,7 @@ parse_primary_expr:
 parse_binary_expr:
    push rbp
    mov rbp, rsp
+    push rbx

    ; size: 24, align: 8
    ; start-structs
@ -438,40 +440,19 @@ parse_binary_expr:
    lea rdi, [rsp + 32]         ; lexeme
    call peek_lexeme
    mov rax, [rsp + 32]
+    mov byte [rsp + 16], al     ; operator
+    mov bx, -1
    cmp al, TOKEN_PLUS
-    je .add
+    cmove bx, word [rel PRECEDENCE_ADD]
    cmp al, TOKEN_MINUS
-    je .sub
+    cmove bx, word [rel PRECEDENCE_SUB]
    cmp al, TOKEN_STAR
-    je .mul
+    cmove bx, word [rel PRECEDENCE_MUL]
    cmp al, TOKEN_SLASH
-    je .div
-    jmp .done
-
-.add:
-    mov dil, TOKEN_PLUS
-    call unwrap_token
-    mov byte [rsp + 16], TOKEN_PLUS
-    mov byte [rsp + 18], PRECEDENCE_ADD
-    jmp .right
-.sub:
-    mov dil, TOKEN_MINUS
-    call unwrap_token
-    mov byte [rsp + 16], TOKEN_MINUS
-    mov byte [rsp + 18], PRECEDENCE_SUB
-    jmp .right
-.mul:
-    mov dil, TOKEN_STAR
-    call unwrap_token
-    mov byte [rsp + 16], TOKEN_STAR
-    mov byte [rsp + 18], PRECEDENCE_MUL
-    jmp .right
-.div:
-    mov dil, TOKEN_SLASH
-    call unwrap_token
-    mov byte [rsp + 16], TOKEN_SLASH
-    mov byte [rsp + 18], PRECEDENCE_DIV
-    jmp .right
+    cmove bx, word [rel PRECEDENCE_DIV]
+    cmp bx, -1
+    je .done
+    mov byte [rsp + 18], bl

 .right:
    mov dil, [rsp + 17]
@ -479,6 +460,8 @@ parse_binary_expr:
    cmp al, dil                 ; our_precedence <= upper_precedence
    jle .done

+    call skip_token             ; consume operator
+
    mov rdi, [rsp]         ; Ast
    mov sil, [rsp + 18]
    call parse_binary_expr
@ -529,6 +512,7 @@ parse_binary_expr:
    mov rax, [rsp + 8]          ; left
    movzx rdx, byte [rsp + 19]  ; left_placeness
    add rsp, 64
+    pop rbx
    pop rbp
    ret

--- a/lang/src/codegen.asm
+++ b/lang/src/codegen.asm
@ -1,6 +1,7 @@
 default rel

 %include "src/ast.inc"
+%include "src/tokeniser.inc"

 extern panic
 extern vec_extend
@ -10,6 +11,7 @@ extern vec_insert_sorted
 extern vec_insert_many
 extern vec_init_with
 extern int_to_str2
+extern strlen

 global codegen_function
 global get_register_name
@ -271,7 +273,8 @@ stackvar_cmp:
 ;;   text: Vec<u8>,
 ;; }
 ;; struct FunctionCtx {
-;;   current_stack_size: u64,
+;;   current_stack_size: u32,
+;;   max_stack_size: u32,
 ;;   stack_vars: Vec<(u64, u64)>,
 ;;   register_bitset: u16,
 ;;   dirtied_register_bitset: u16,
@ -301,7 +304,7 @@ codegen_allocate_register:
    ; flip bits
    not ax
    test ax, ax
-    jz .panic
+    jz .no_regs
    ; find first set bit
    bsf cx, ax

@ -313,14 +316,146 @@ codegen_allocate_register:
    mov ax, word [rdi + 52]     ; dirtied_register_bitset
    bts ax, cx
    mov word [rdi + 52], ax     ; update dirtied_register_bitset
+    jmp .done
+
+.no_regs:
+    ; return -1u64 to indicate no free registers
+    ; the caller should panic or allocate a stack slot instead
+    mov rbx, -1

 .done:
    mov rax, rbx
    pop rbx
    pop rbp
    ret
+
+;; rdi: *FunctionCtx
+;; rsi: width
+;; define-fn: fn codegen_allocate_place(ctx: *mut FunctionCtx, width: u16) -> Operand
+codegen_allocate_place:
+    push rbp
+    mov rbp, rsp
+
+    xor rdx, rdx
+    mov edx, dword [rdi + 0]    ; current_stack_size
+    add edx, esi                ; width
+    mov dword [rdi + 0], edx    ; current_stack_size += width
+
+    mov eax, dword [rdi + 4]    ; max_stack_size
+    cmp eax, edx
+    cmovb eax, edx
+    mov dword [rdi + 4], eax    ; max_stack_size = max(max_stack_size, current_stack_size)
+
+    ; construct Operand
+    xor rax, rax
+    mov eax, 0                  ; Operand.len = 0
+    shl eax, 16
+    or eax, esi                 ; Operand.width
+    shl eax, 4
+    ; or eax, 0                 ; Operand.register = undef
+    shl eax, 8
+    or eax, OPERAND_RBP_OFFSET  ; Operand.kind
+    neg rdx                     ; Operand.value = -current_stack_size
+
+.done:
+    pop rbp
+    ret
+
+;; rdi: *FunctionCtx
+;; rsi: width
+;; define-fn: fn codegen_allocate_value(ctx: *mut FunctionCtx, width: u16) -> Operand
+codegen_allocate_value:
+    push rbp
+    mov rbp, rsp
+    push rbx
+
+    ; width [8..16]
+    ; *FunctionCtx [0..8]
+    sub rsp, 16
+
+    mov [rsp], rdi              ; ctx
+    mov [rsp + 8], rsi         ; width
+
+    cmp rsi, 8
+    jg .alloc_stack
+
+.alloc_reg:
+    call codegen_allocate_register
+    cmp rax, -1
+    je .alloc_stack
+
+    mov rbx, rax
+
+    ; construct Operand
+    xor rax, rax
+    mov eax, 0                  ; Operand.len = 0
+    shl eax, 16
+    mov rsi, [rsp + 8]          ; width
+    or eax, esi                 ; Operand.width
+    shl eax, 4
+    or eax, ebx                 ; Operand.register
+    shl eax, 8
+    or eax, OPERAND_REGISTER    ; Operand.kind
+    mov rdx, 0                  ; Operand.value = 0
+    jmp .done
+
+.alloc_stack:
+    call codegen_allocate_place
+
+.done:
+    add rsp, 16
+    pop rbx
+    pop rbp
+    ret
+    
+;; rdi: *FunctionCtx
+;; rsi: *Operand
+;; define-fn: fn codegen_free_operand(ctx: *mut FunctionCtx, operand: *const Operand) -> ()
+codegen_free_operand:
+    push rbp
+    mov rbp, rsp
+    push rbx
+
+    sub rsp, 16
+    mov [rsp], rdi              ; ctx
+    mov [rsp + 8], rsi          ; operand
+
+    mov al, byte [rsi]          ; Operand.kind
+    cmp al, OPERAND_REGISTER
+    je .free_reg
+    cmp al, OPERAND_RBP_OFFSET
+    je .free_stack
+    jmp .done
+
+.free_stack:
+    xor rbx, rbx
+    mov ebx, dword [rdi + 0]    ; current_stack_size
+    mov rax, [rsi + 8]          ; Operand.value
+    cmp rbx, rax
+    jne .done                   ; operand not at top of stack, can't free
+
+    mov al, byte [rsi + 1]      ; Operand.width
+    shr al, 4
+    movzx rax, al
+    sub rbx, rax
+    mov dword [rdi + 0], ebx    ; current_stack_size -= width
+    jmp .done
+
+.free_reg:
+    xor rax, rax
+    mov al, byte [rsi + 1]      ; Operand.register_and_width
+    and al, 0x0F                ; get register index
+    mov bx, word [rdi + 48]     ; register_bitset
+    btr bx, ax
+    jnc .panic                  ; trying to free unallocated register
+    mov word [rdi + 48], bx     ; update register_bitset
+
+.done:
+    add rsp, 16
+    pop rbx
+    pop rbp
+    ret
 .panic:
-    ; no free registers!
    call panic

 ;; rdi: *Ctx
@ -339,7 +474,8 @@ codegen_function:
    ; dirtied-register-bitset [76..80]  [a,b,c,d,si,di,bp,sp,8,9,10,11,12,13,14,15]
    ; register-bitset [72..76]  [a,b,c,d,si,di,bp,sp,8,9,10,11,12,13,14,15]
    ; stack-vars: Vec<(index, offset)> [32..72]
-    ; current_stack_size: [24..32]
+    ; max_stack_size: [28..32]
+    ; current_stack_size: [24..28]
    ; func_idx [16..24]
    ; ast [8..16]
    ; ctx [0..8]
@ -348,7 +484,7 @@ codegen_function:
    mov rax, [rdi]
    mov [rsp + 8], rax          ; ast
    mov [rsp + 16], rsi         ; func_idx
-    mov qword [rsp + 24], 0     ; current_stack_size = 0
+    mov qword [rsp + 24], 0     ; current_stack_size = 0, max_stack_size = 0

    lea rdi, [rsp + 32]         ; stack-vars
    mov rsi, 16                 ; size_of::<(u64, u64)>
@ -356,10 +492,10 @@ codegen_function:
    mov rcx, 16                 ; initial capacity
    call vec_init_with

-    bts word [rsp + 72], 7     ; mark rsp as used
-    bts word [rsp + 72], 6     ; mark rbp as used
-    bts word [rsp + 72], 0     ; mark rax as used
-    mov word [rsp + 76], 0     ; dirtied_register_bitset = 0
+    bts word [rsp + 72], 7      ; mark rsp as used
+    bts word [rsp + 72], 6      ; mark rbp as used
+    bts word [rsp + 72], 0      ; mark rax as used
+    mov word [rsp + 76], 0      ; dirtied_register_bitset = 0

    ; push "section .text\n"
    mov rdi, [rsp]              ; ctx
@ -441,11 +577,18 @@ codegen_function:
    lea rsi, [rax + r14 * 8]    ;
    mov rsi, [rsi]              ; AstFunction.args[i]

-    mov [rsp + 104], rsi        ; scratch
-    mov rax, [rsp + 24]         ; current_stack_size
-    add rax, 8                   ; size_of::<u64>
-    mov [rsp + 24], rax         ; current_stack_size += size_of::<u64>
-    mov [rsp + 112], rax
+    mov [rsp + 104], rsi        ; &(index, _)
+
+    lea rdi, [rsp + 24]         ; &function_ctx
+    ; TODO: get arg type size
+    mov rsi, 8                  ; size_of::<u64>
+    call codegen_allocate_place
+    ; rdx = stack offset
+    mov [rsp + 80], rax         ; Operand
+    mov [rsp + 88], rdx         ; Operand.value
+    neg rdx
+    mov [rsp + 112], rdx        ; &(_, offset)
+
    lea rdi, [rsp + 32]         ; stack-vars
    lea rsi, [rsp + 104]         ; &scratch: &(index, offset)
    mov rdx, stackvar_cmp
@ -454,17 +597,16 @@ codegen_function:

    ; spill arg from register to newly allocated stack slot
    ; get source Operand
-    mov r13, [rsp + 112]        ; current_stack_size before increment
-    mov rdi, [rsp + 104]        ; arg index
+    mov rdi, r14                ; arg index
    call codegen_arg_to_operand
    mov [rsp + 104], rax
    mov [rsp + 112], rdx
-    mov rdx, r13                 ; offset
-    neg rdx
+
    mov rdi, [rsp]              ; ctx
    lea rdi, [rdi + 8]          ; &ctx.text
    lea rsi, [rsp + 104]        ; src
-    call codegen_move_rbp_slot_src
+    lea rdx, [rsp + 80]         ; dst
+    call codegen_move_dst_src

    inc r14
    jmp .arg_loop
@ -501,7 +643,7 @@ codegen_function:
    mov rdx, 1                  ; pop = true
    call codegen_push_pop_dirtied_registers

-    ; "add rsp, {current_stack_size}\n"
+    ; "add rsp, {max_stack_size}\n"
    mov rdi, [rsp]              ; ctx
    lea rdi, [rdi + 8]          ; &ctx.text
    lea rsi, [rel ADD_RSP]
@ -509,7 +651,7 @@ codegen_function:
    call vec_extend

    lea rdi, [rsp + 24]         ; &function_ctx
-    mov rdi, [rdi + 0]          ; current_stack_size
+    mov edi, dword [rdi + 4]    ; max_stack_size
    lea rsi, [rsp + 104]        ; scratch
    mov rdx, 16                 ; buffer length
    mov rcx, 10                 ; radix
@ -545,7 +687,7 @@ codegen_function:
    mov rdx, DOT_PROLOGUE_LEN
    call vec_extend

-    ; "sub rsp, {current_stack_size}\n"
+    ; "sub rsp, {max_stack_size}\n"
    mov rdi, [rsp]              ; ctx
    lea rdi, [rdi + 8]          ; &ctx.text
    lea rsi, [rel SUB_RSP]
@ -553,7 +695,7 @@ codegen_function:
    call vec_extend

    lea rdi, [rsp + 24]         ; &function_ctx
-    mov rdi, [rdi + 0]          ; current_stack_size
+    mov edi, dword [rdi + 4]  ; max_stack_size
    lea rsi, [rsp + 104]        ; scratch
    mov rdx, 16                 ; buffer length
    mov rcx, 10                 ; radix
@ -680,6 +822,95 @@ codegen_push_pop_dirtied_registers:
    pop rbp
    ret
    
+;; rdi: *text
+;; rsi: &function_ctx
+;; rdx: pop: bool
+;; Returns the number of registers pushed/popped in rax
+;; define-fn: fn codegen_push_pop_used_registers(text: *mut Vec<u8>, function_ctx: &FunctionCtx, pop: bool) -> u8
+codegen_push_pop_used_registers:
+    push rbp
+    mov rbp, rsp
+    push rbx
+    push r15
+    push r14
+    push r13
+
+    sub rsp, 32
+    mov [rsp], rdi
+    mov [rsp + 8], rsi
+
+    mov byte [rsp + 29 + 3], 10 ; newline
+    mov qword [rsp + 16], -1
+    mov rax, 'push '
+    mov qword [rsp + 24], rax
+    test rdx, rdx
+    jz .skip_setup_pop
+    mov rax, 'pop  '
+    mov qword [rsp + 24], rax
+
+    mov rdi, [rsp]              ; text
+    mov rax, [rdi + 8]          ; text.len()
+    mov [rsp + 16], rax
+.skip_setup_pop:
+
+    ; volatile registers:
+    ; abcdsdpp_89abcdef
+    ; 00111100_11110000b0
+    ; additionally, rax is never preserved by this compiler
+    mov bx, word [rsi + 48 + 4]    ; dirtied_register_bitset
+    mov ax, 0b00001111_00111100
+    and bx, ax
+    test bx, bx
+    jz .done
+
+    mov r15, 16
+    xor r14, r14
+    xor r13, r13                ; num_regs pushed/popped
+.reg_loop:
+    cmp r14, r15
+    jge .done
+    bt bx, r14w
+    jnc .next_reg
+
+    inc r13                     ; num_regs += 1
+
+    mov rdi, r14
+    mov rsi, 8
+    lea rdx, [rsp + 29]
+    call get_register_name
+
+    mov rax, -1
+    cmp [rsp + 16], rax
+    jne .reg_pop
+
+    mov rdi, [rsp]              ; text
+    lea rsi, [rsp + 24]
+    mov rdx, 9
+    call vec_extend
+    jmp .next_reg
+.reg_pop:
+
+    mov rdi, [rsp]              ; text
+    mov rsi, [rsp + 16]         ; text.len()
+    lea rdx, [rsp + 24]
+    mov rcx, 9
+    call vec_insert_many
+    nop
+
+.next_reg:
+    inc r14
+    jmp .reg_loop
+
+.done:
+    add rsp, 32
+    mov rax, r13
+    pop r13
+    pop r14
+    pop r15
+    pop rbx
+    pop rbp
+    ret
+    

 ;; rdi: ctx
 ;; rsi: &function_ctx
@ -732,10 +963,10 @@ codegen_expr:
    push r15
    push r14

-    ; scratch [16..48]
+    ; scratch [16..80]
    ; function_ctx: [8..16]
    ; ctx [0..8]
-    sub rsp, 48
+    sub rsp, 80
    mov [rsp], rdi          ; ctx
    mov [rsp + 8], rsi    ; &function_ctx

@ -810,6 +1041,10 @@ codegen_expr:
    lea rdx, [rsp + 16]         ; src
    call codegen_move_dst_src

+    mov rdi, [rsp + 8]          ; &function_ctx
+    lea rsi, [rsp + 16]         ; src
+    call codegen_free_operand
+
    ; push "jmp .epilogue\n"
    mov rdi, [rsp]              ; ctx
    lea rdi, [rdi + 8]          ; &ctx.text
@ -832,25 +1067,20 @@ codegen_expr:
 .number:
    ; rax = *AstNode
    mov [rsp + 16], rax         ; scratch = *AstNode
-    mov rbx, [rax + 8]          ; AstNode.data = value
-    mov [rsp + 40], rbx         ; Operand.value
-    mov rdi, [rsp + 8]          ; &function_ctx
-    call codegen_allocate_register
-
-    xor rbx, rbx
-    or rbx, 8                  ; width = 8
-    shl rbx, 4
-    or rbx, rax                ; register
-    shl rbx, 8
-    or rbx, OPERAND_REGISTER   ; kind
-    mov [rsp + 16], rbx         ;
-    mov qword [rsp + 24], 0     ; value = 0

    mov byte [rsp + 32], OPERAND_IMMEDIATE ; Operand.kind
    mov bl, 8                              ; width = 8
    shl bl, 4                              ; register = undef
    mov byte [rsp + 33], bl                ; Operand.register_and_width
    mov word [rsp + 34], 0                 ; Operand.len = 0
+    mov rbx, [rax + 8]                     ; AstNode.data
+    mov [rsp + 40], rbx                    ; Operand.value
+
+    mov rdi, [rsp + 8]          ; &function_ctx
+    mov rsi, 8                  ; width
+    call codegen_allocate_value
+    mov [rsp + 16], rax
+    mov [rsp + 24], rdx

    mov rdi, [rsp]              ; ctx
    lea rdi, [rdi + 8]          ; &ctx.text
@ -862,9 +1092,201 @@ codegen_expr:
    mov rdx, qword [rsp + 24]
    jmp .done

+.binary_op:
+    mov rax, [rax + 8]          ; AstNode.data
+    mov [rsp + 16], rax         ; scratch = *AstBinaryOp
+
+    mov rdi, [rsp]              ; ctx
+    mov rsi, [rsp + 8]          ; &function_ctx
+    mov rdx, [rax + 0]         ; left operand index
+    call codegen_expr
+    mov [rsp + 32], rax         ; left operand
+    mov [rsp + 40], rdx
+
+    mov rdi, [rsp]              ; ctx
+    mov rsi, [rsp + 8]          ; &function_ctx
+    mov rdx, [rsp + 16]         ; *AstBinaryOp
+    mov rdx, [rdx + 16]         ; right operand index
+    call codegen_expr
+    mov [rsp + 48], rax         ; right operand
+    mov [rsp + 56], rdx
+
+    mov rax, [rsp + 16]         ; *AstBinaryOp
+    mov al, byte [rax + 8]      ; operator
+    mov rbx, -1
+    cmp al, TOKEN_PLUS
+    cmove rbx, [rel ADD_]
+    cmp al, TOKEN_MINUS
+    cmove rbx, [rel SUB_]
+    cmp rbx, -1
+    jne .gen_op
+    cmp al, TOKEN_STAR
+    cmove rbx, [rel MUL_]
+    cmp al, TOKEN_SLASH
+    cmove rbx, [rel DIV_]
+    cmp al, TOKEN_PERCENT
+    cmove rbx, [rel DIV_]
+    cmp rbx, -1
+    je .panic                   ; unknown operator
+.mul_div:
+    ; mul/div need to clobber rax:rdx
+
+    ; TODO only check for div
+    mov rax, [rsp + 8]          ; &function_ctx
+    mov ax, word [rax + 48]     ; register_bitset
+    bt ax, 3                    ; is rdx used?
+    jnc .after_spill_rdx
+    
+    ; allocate scratch value for rdx
+    mov rdi, [rsp + 8]          ; &function_ctx
+    mov rsi, 8                  ; width
+    call codegen_allocate_place
+    mov [rsp + 64], rax
+    mov [rsp + 72], rdx
+
+    ; mov scratch, rdx
+    mov rdi, [rsp]              ; ctx
+    lea rdi, [rdi + 8]          ; &ctx.text
+    lea rsi, [rsp + 64]         ; scratch value
+    lea rdx, [rel OPERAND_RDX]  ; rax
+    call codegen_move_dst_src
+
+    ; check if rhs is rdx
+    mov rax, [rsp + 48]         ; right operand
+    and rax, 0xFFF
+    mov rdx, [rel OPERAND_RDX]
+    and rdx, 0xFFF
+    cmp rax, rdx
+    jne .after_spill_rdx
+
+    ; free rhs
+    mov rdi, [rsp + 8]          ; &function_ctx
+    lea rsi, [rsp + 48]         ; right operand
+    call codegen_free_operand
+
+    mov rdx, [rsp + 48]         ; right operand
+    and rdx, 0xF000             ; Operand.width
+    mov rax, [rsp + 64]         ; scratch value
+    or rax, rdx                 ; preserve width
+    mov rdx, [rsp + 72]
+    mov [rsp + 48], rax         ; right operand
+    mov [rsp + 56], rdx
+
+.after_spill_rdx:
+    mov rax, [rsp + 16]         ; *AstBinaryOp
+    mov al, byte [rax + 8]      ; operator
+    cmp al, TOKEN_STAR
+    je .after_clear_rdx
+
+    ; clear rdx for div
+    ; xor rdx, rdx
+    mov rdi, [rsp]              ; ctx
+    lea rdi, [rdi + 8]          ; &ctx.text
+    lea rsi, [rel XOR_RDX_RDX]  ; rdx
+    mov rdx, XOR_RDX_RDX_LEN
+    call vec_extend
+
+.after_clear_rdx:
+    ; mov rax, lhs
+    mov rdi, [rsp]              ; ctx
+    lea rdi, [rdi + 8]          ; &ctx.text
+    lea rsi, [rel OPERAND_RAX]  ; rax
+    lea rdx, [rsp + 32]         ; left operand
+    call codegen_move_dst_src
+
+    ; op rhs
+    mov rdi, [rsp]              ; ctx
+    lea rdi, [rdi + 8]          ; &ctx.text
+    push rbx
+    lea rsi, [rsp]  ; op
+    mov rdx, 4
+    call vec_extend
+    pop rbx
+
+    mov rdi, [rsp]              ; ctx
+    lea rdi, [rdi + 8]          ; &ctx.text
+    lea rsi, [rsp + 48]         ; left operand
+    call codegen_write_operand
+
+    mov rdi, [rsp]              ; ctx
+    lea rdi, [rdi + 8]          ; &ctx.text
+    mov rsi, 10
+    push rsi
+    lea rsi, [rsp]              ; newline
+    call vec_push
+    pop rsi
+
+    mov rax, [rsp + 16]         ; *AstBinaryOp
+    mov al, byte [rax + 8]      ; operator
+    cmp al, TOKEN_PERCENT
+    jne .after_rem
+
+    ; mov rax, rdx  // only for rem
+    mov rdi, [rsp]              ; ctx
+    lea rdi, [rdi + 8]          ; &ctx.text
+    lea rsi, [rel OPERAND_RAX]  ; rax
+    lea rdx, [rel OPERAND_RDX]  ; rdx
+    call codegen_move_dst_src
+
+.after_rem:
+    mov rax, [rsp + 8]          ; &function_ctx
+    mov ax, word [rax + 48]     ; register_bitset
+    bt ax, 3                    ; is rdx used?
+    jnc .after_unspill_rdx
+    
+    ; mov rdx, scratch
+    mov rdi, [rsp]              ; ctx
+    lea rdi, [rdi + 8]          ; &ctx.text
+    lea rsi, [rel OPERAND_RDX]  ; rdx
+    lea rdx, [rsp + 64]         ; scratch value
+    call codegen_move_dst_src
+
+.after_unspill_rdx:
+    ; free [scratch, rhs, lhs]
+    mov rdi, [rsp + 8]          ; &function_ctx
+    lea rsi, [rsp + 64]         ; scratch value
+    call codegen_free_operand
+
+    mov rdi, [rsp + 8]          ; &function_ctx
+    lea rsi, [rsp + 48]         ; right operand
+    call codegen_free_operand
+
+    mov rdi, [rsp + 8]          ; &function_ctx
+    lea rsi, [rsp + 32]         ; left operand
+    call codegen_free_operand
+
+    ; alloca dst
+    mov rdi, [rsp + 8]          ; &function_ctx
+    mov rsi, 8                  ; width
+    call codegen_allocate_value
+    mov [rsp + 32], rax
+    mov [rsp + 40], rdx
+
+    ; mov dst, rax
+    mov rdi, [rsp]              ; ctx
+    lea rdi, [rdi + 8]          ; &ctx.text
+    lea rsi, [rsp + 32]         ; dst
+    lea rdx, [rel OPERAND_RAX]  ; rax
+    call codegen_move_dst_src
+
+    ; return dst
+    mov rax, [rsp + 32]
+    mov rdx, [rsp + 40]
+    jmp .done
+
+.gen_op:
+
+    mov rdi, [rsp + 8]          ; &function_ctx
+    mov rsi, [rsp]              ; ctx
+    lea rsi, [rsi + 8]          ; &ctx.text
+    lea rdx, [rsp + 32]         ; left operand
+    lea rcx, [rsp + 48]         ; right operand
+    mov r8, rbx                 ; operation
+    call codegen_binary_op_rm64_rm64
+    jmp .done
+
 .var_decl:
 .var_ref:
-.binary_op:
 .assignment:
 .place_to_value:
 .value_to_place:
@ -873,7 +1295,7 @@ codegen_expr:
    ; TODO

 .done:
-    add rsp, 48
+    add rsp, 80
    pop r14
    pop r15
    pop rbx
@ -1134,6 +1556,218 @@ codegen_move_rbp_slot_src:
    pop rbp
    ret

+;; rdi: *function_ctx
+;; rsi: *text
+;; rdx: lhs: *Operand
+;; rcx: rhs: *Operand
+;; r8: op: [u8; 8]
+;;  Generates: {op} {lhs}, {rhs} for a binary operation that has the encodings rN, rmN and rmN, rN
+codegen_binary_op_rm64_rm64:
+    push rbp
+    mov rbp, rsp
+    push rbx
+
+    ; dst [32..48]
+    sub rsp, 48
+    mov [rsp], rdi              ; *function_ctx
+    mov [rsp + 8], rsi          ; *text
+    mov [rsp + 16], rdx         ; lhs
+    mov [rsp + 24], rcx         ; rhs
+    mov [rsp + 32], r8          ; op
+
+    ; if lhs.kind == REGISTER || lhs.kind < ADDRESS && rhs.kind == REGISTER {
+    cmp byte [rdx + 0], OPERAND_REGISTER
+    je .simple
+    cmp byte [rdx + 0], OPERAND_ADDRESS
+    setb al
+    cmp byte [rcx + 0], OPERAND_REGISTER
+    sete bl
+    test al, bl
+    jne .simple
+    jmp .complex
+.simple:
+    ;   op lhs, rhs
+    lea rdi, [rsp + 32]         ; op
+    call strlen
+    mov rdi, [rsp + 8]          ; *text
+    lea rsi, [rsp + 32]         ; op
+    mov rdx, rax                ; op length
+    call vec_extend
+
+    mov rdi, [rsp + 8]          ; *text
+    mov rsi, [rsp + 16]         ; lhs
+    call codegen_write_operand
+
+    mov rdi, [rsp + 8]          ; *text
+    lea rsi, [rel COMMA_RAX]
+    mov rdx, 2
+    call vec_extend
+
+    mov rdi, [rsp + 8]          ; *text
+    mov rsi, [rsp + 24]         ; rhs
+    call codegen_write_operand
+
+    mov byte [rsp + 32], 10 ; newline
+    mov rdi, [rsp + 8]          ; *text
+    lea rsi, [rsp + 32]
+    call vec_push
+
+    ;   free rhs
+    mov rdi, [rsp]              ; *function_ctx
+    mov rsi, [rsp + 24]         ; rhs
+    call codegen_free_operand
+
+    ;   ret lhs
+    mov rbx, [rsp + 16]         ; lhs
+    mov rax, [rbx]
+    mov rdx, [rbx + 8]
+    jmp .epilogue
+    ; } else {
+.complex:
+    ;   if lhs.kind < ADDRESS {
+    cmp byte [rdx + 0], OPERAND_ADDRESS
+    jae .check_rhs
+
+    ;     mov rax, rhs
+    mov rdi, [rsp + 8]          ; *text
+    lea rsi, [rel OPERAND_RAX]
+    mov rdx, [rsp + 24]         ; rhs
+    call codegen_move_dst_src
+
+    ;     op lhs, rax
+    mov rdi, [rsp + 8]          ; *text
+    mov rsi, [rsp + 16]         ; lhs
+    lea rdx, [rel OPERAND_RAX]  ; rax
+    mov rcx, [rsp + 32]         ; op
+    call codegen_binary_op_unchecked
+
+    ;   free rhs
+    mov rdi, [rsp]              ; *function_ctx
+    mov rsi, [rsp + 24]         ; rhs
+    call codegen_free_operand
+
+    ;   ret lhs
+    mov rbx, [rsp + 16]         ; lhs
+    mov rax, [rbx]
+    mov rdx, [rbx + 8]
+    jmp .epilogue
+
+.check_rhs:
+    ;   } else if rhs.kind < ADDRESS {
+    cmp byte [rcx + 0], OPERAND_ADDRESS
+    jae .allocate_dst
+    ;     mov rax, lhs
+    mov rdi, [rsp + 8]          ; *text
+    lea rsi, [rel OPERAND_RAX]
+    mov rdx, [rsp + 16]         ; lhs
+    call codegen_move_dst_src
+
+    ;     op rax, rhs
+    mov rdi, [rsp + 8]          ; *text
+    lea rsi, [rel OPERAND_RAX]  ; rax
+    mov rdx, [rsp + 24]         ; rhs
+    mov rcx, [rsp + 32]         ; op
+    call codegen_binary_op_unchecked
+
+    ;     mov rhs, rax
+    mov rdi, [rsp + 8]          ; *text
+    mov rsi, [rsp + 24]         ; rhs
+    lea rdx, [rel OPERAND_RAX]
+    call codegen_move_dst_src
+
+    ;     free lhs
+    mov rdi, [rsp]              ; *function_ctx
+    mov rsi, [rsp + 16]         ; rhs
+    call codegen_free_operand
+
+    ;     ret rhs
+    mov rbx, [rsp + 24]         ; rhs
+    mov rax, [rbx]
+    mov rdx, [rbx + 8]
+    jmp .epilogue
+    ;   } else {
+.allocate_dst:
+    ;     dst = allocate_value
+    mov rdi, [rsp]              ; *function_ctx
+    mov rsi, 8                  ; width = 8
+    call codegen_allocate_value
+    mov [rsp + 32], rax         ; dst
+    mov [rsp + 40], rdx
+    
+    ;     mov dst, lhs
+    mov rdi, [rsp + 8]          ; *text
+    lea rsi, [rsp + 32]         ; dst
+    mov rdx, [rsp + 16]         ; lhs
+    call codegen_move_dst_src
+
+    ;     mov rax, rhs
+    mov rdi, [rsp + 8]          ; *text
+    lea rsi, [rel OPERAND_RAX]  ; rax
+    mov rdx, [rsp + 24]         ; rhs
+    call codegen_move_dst_src
+
+    ;     op dst, rax
+    mov rdi, [rsp + 8]          ; *text
+    lea rsi, [rsp + 32]         ; dst
+    lea rdx, [rel OPERAND_RAX]  ; rax
+    mov rcx, [rsp + 32]         ; op
+    call codegen_binary_op_unchecked
+
+    ;     ret dst
+    mov rax, [rsp + 32]         ; dst
+    mov rdx, [rsp + 40]
+    ;   }
+
+.epilogue:
+    add rsp, 48
+    pop rbx
+    pop rbp
+    ret
+
+;; rdi: *text
+;; rsi: lhs: *Operand
+;; rdx: rhs: *Operand
+;; rcx: op: [u8; 8]
+codegen_binary_op_unchecked:
+    push rbp
+    mov rbp, rsp
+
+    sub rsp, 32
+    mov [rsp], rdi              ; *text
+    mov [rsp + 8], rsi          ; lhs
+    mov [rsp + 16], rdx         ; rhs
+    mov [rsp + 24], rcx         ; op
+
+    ;     op lhs, rax
+    lea rdi, [rsp + 24]         ; op
+    call strlen
+    mov rdi, [rsp]              ; *text
+    lea rsi, [rsp + 24]         ; op
+    mov rdx, rax                ; op length
+    call vec_extend
+
+    mov rdi, [rsp]              ; *text
+    mov rsi, [rsp + 8]          ; lhs
+    call codegen_write_operand
+
+    mov rdi, [rsp]              ; *text
+    lea rsi, [rel COMMA_RAX]
+    mov rdx, 2
+    call vec_extend
+
+    mov rdi, [rsp]              ; *text
+    mov rsi, [rsp + 16]         ; rhs
+    call codegen_write_operand
+
+    mov byte [rsp + 31], 10     ; newline
+    mov rdi, [rsp + 8]          ; *text
+    lea rsi, [rsp + 31]
+    call vec_push
+
+    add rsp, 32
+    pop rbp
+    ret
+
 ;; rdi: *text
 ;; rsi: dst: *Operand
 ;; rdx: src: *Operand
@ -1166,52 +1800,28 @@ codegen_move_dst_src:
    je .panic                   ; address can only be moved to full-sized destinations

    cmp byte [rsi + 0], OPERAND_REGISTER
-    je .do_move
+    je .do_move                 ; if dst == register, do move
+    ; If dst != register and src != register, we cannot move directly into memory:
+    ; there is no MOV m64, m64 or MOV m64, imm64 instruction.
+    ; A smarter compiler could test for the immediate size and move most
+    ; immediates directly into memory, but we are quite stupid!
    cmp byte [rdx + 0], OPERAND_REGISTER
-    jne .xchg_rax               ; if dst != register and src != register, xchg via rax
+    jne .xchg_rax
    jmp .do_move

 .xchg_rax:
-    ; xchg rax, [src]
+    ; mov rax, [src]
    ; mov [dst], rax
-    ; xchg rax, [src]

    mov rdi, [rsp]              ; *text
-    lea rsi, [rel XCHG_RAX]
-    mov rdx, XCHG_RAX_LEN
-    call vec_extend
+    lea rsi, [rel OPERAND_RAX]
+    mov rdx, [rsp + 16]         ; src
+    call codegen_move_dst_src

    mov rdi, [rsp]              ; *text
-    mov rsi, [rsp + 16]         ; src
-    call codegen_write_operand
-
-    mov rdi, [rsp]              ; *text
-    lea rsi, [rel COMMA_RAX]
-    mov rdx, COMMA_RAX_LEN
-    call vec_extend
-
-    mov rdi, [rsp]              ; *text
-    lea rsi, [rel MOV_RAX_COMMA]
-    mov rdx, 4
-    call vec_extend
-
-    mov rdi, [rsp]              ; *text
-    mov rsi, [rsp + 8]          ; dst
-    call codegen_write_operand
-
-    mov rdi, [rsp]              ; *text
-    lea rsi, [rel COMMA_RAX]
-    mov rdx, COMMA_RAX_LEN
-    call vec_extend
-
-    mov rdi, [rsp]              ; *text
-    lea rsi, [rel XCHG_RAX]
-    mov rdx, XCHG_RAX_LEN
-    call vec_extend
-
-    mov rdi, [rsp]              ; *text
-    mov rsi, [rsp + 16]         ; src
-    call codegen_write_operand
+    mov rsi, [rsp + 8]         ; src
+    lea rdx, [rel OPERAND_RAX]
+    call codegen_move_dst_src
    jmp .epilogue

 .do_move:
@ -1233,12 +1843,12 @@ codegen_move_dst_src:
    mov rsi, [rsp + 16]         ; src
    call codegen_write_operand

-.epilogue:
    mov rdi, [rsp]              ; *text
    lea rsi, [rel COLON_NL]
    inc rsi
-    mov rdx, 1
-    call vec_extend
+    call vec_push
+
+.epilogue:

    add rsp, 24
    pop rbx
@ -1254,7 +1864,15 @@ section .rdata
    MOV_RAX_COMMA_LEN equ $ - MOV_RAX_COMMA
    COMMA_RAX db ", rax"
    COMMA_RAX_LEN equ $ - COMMA_RAX
+    XOR_RDX_RDX db "xor rdx, rdx", 10
+    XOR_RDX_RDX_LEN equ $ - XOR_RDX_RDX
+    ADD_ dq "add "
+    SUB_ dq "sub "
+    MUL_ dq "mul "
+    DIV_ dq "div "
+    

    ; Operand { kind: REGISTER, register: 0, width: 8, len: 0, padding: 0, value: 0 }
    align 8
    OPERAND_RAX dq 0x0000_8001, 0
+    OPERAND_RDX dq 0x0000_8301, 0
--- a/lang/src/tokeniser.asm
+++ b/lang/src/tokeniser.asm
@ -24,6 +24,7 @@ global tokeniser_print
 global find_lexeme
 global expect_token
 global unwrap_token
+global skip_token
 global peek_expect_token
 global peek_lexeme

@ -590,6 +591,17 @@ peek_lexeme:
    pop rbp
    ret

+;; Skips one token ahead, without returning it.
+skip_token:
+    push rbp
+    mov rbp, rsp
+
+    sub rsp, 24
+    lea rdi, [rsp]
+    call find_lexeme
+    add rsp, 24
+    pop rbp
+
 tokeniser_get_cursor:
    mov rax, [rel cursor]
    ret
--- a/lang/src/tokeniser.inc
+++ b/lang/src/tokeniser.inc
@ -16,6 +16,7 @@ LEXEMES:
    dq LEX_ARROW
    dq LEX_I32
    dq LEX_U32
+    dq LEX_EQEQ
    dq LEX_EQUALS
    dq LEX_PLUS
    dq LEX_MINUS
@ -23,12 +24,14 @@ LEXEMES:
    dq LEX_LPARENS
    dq LEX_RBRACE
    dq LEX_LBRACE
+    dq LEX_COLON2
    dq LEX_COLON
    dq LEX_SEMI
    dq LEX_COMMA
+    dq LEX_PIPE2
    dq LEX_PIPE
+    dq LEX_AMP2
    dq LEX_AMP
-    dq LEX_EQEQ
    dq LEX_LBRACKET
    dq LEX_RBRACKET
    dq LEX_VOID
@ -44,6 +47,21 @@ LEXEMES:
    dq LEX_ISIZE
    dq LEX_F32
    dq LEX_F64
+    dq LEX_PERCENT
+    dq LEX_CARET
+    dq LEX_BANGEQ
+    dq LEX_BANG
+    dq LEX_TILDE
+    dq LEX_LEQ
+    dq LEX_GEQ
+    dq LEX_LESSLESS
+    dq LEX_GTGT
+    dq LEX_LT
+    dq LEX_GT
+    dq LEX_DOT3
+    dq LEX_DOT2
+    dq LEX_DOT
+    dq LEX_BACKTICK

 align 8
 TOKENS:
@ -62,6 +80,7 @@ TOKENS:
    db TOKEN_ARROW                 ;; 12
    db TOKEN_I32                   ;; 13
    db TOKEN_U32                   ;; 14
+    db TOKEN_EQEQ                  ;; 15
    db TOKEN_EQUALS                ;; 15
    db TOKEN_PLUS                  ;; 16
    db TOKEN_MINUS                 ;; 17
@ -69,12 +88,14 @@ TOKENS:
    db TOKEN_LPARENS               ;; 19
    db TOKEN_RBRACE                ;; 20
    db TOKEN_LBRACE                ;; 21
+    db TOKEN_COLON2                ;; 22
    db TOKEN_COLON                 ;; 22
    db TOKEN_SEMI                  ;; 23
    db TOKEN_COMMA                 ;; 24
+    db TOKEN_PIPE2                 ;; 25
    db TOKEN_PIPE                  ;; 25
+    db TOKEN_AMP2                  ;; 26
    db TOKEN_AMP                   ;; 26
-    db TOKEN_EQEQ                  ;; 27
    db TOKEN_LBRACKET              ;; 28
    db TOKEN_RBRACKET              ;; 29
    db TOKEN_VOID                  ;; 30
@ -90,6 +111,21 @@ TOKENS:
    db TOKEN_ISIZE                 ;; 40
    db TOKEN_F32                   ;; 41
    db TOKEN_F64                   ;; 42
+    db TOKEN_PERCENT               ;; 43
+    db TOKEN_CARET                 ;; 44
+    db TOKEN_BANGEQ                ;; 45
+    db TOKEN_BANG                  ;; 46
+    db TOKEN_TILDE                 ;; 47
+    db TOKEN_LEQ                   ;; 48
+    db TOKEN_GEQ                   ;; 49
+    db TOKEN_LESSLESS              ;; 50
+    db TOKEN_GTGT                  ;; 51
+    db TOKEN_LT                    ;; 50
+    db TOKEN_GT                    ;; 51
+    db TOKEN_DOT3                  ;; 54
+    db TOKEN_DOT2                  ;; 53
+    db TOKEN_DOT                   ;; 52
+    db TOKEN_BACKTICK              ;; 55

 align 8
 LEXEME_LENS:
@ -108,6 +144,7 @@ LEXEME_LENS:
    dq LEX_ARROW_len
    dq LEX_I32_len
    dq LEX_U32_len
+    dq LEX_EQEQ_len
    dq LEX_EQUALS_len
    dq LEX_PLUS_len
    dq LEX_MINUS_len
@ -115,12 +152,14 @@ LEXEME_LENS:
    dq LEX_LPARENS_len
    dq LEX_RBRACE_len
    dq LEX_LBRACE_len
+    dq LEX_COLON2_len
    dq LEX_COLON_len
    dq LEX_SEMI_len
    dq LEX_COMMA_len
+    dq LEX_PIPE2_len
    dq LEX_PIPE_len
+    dq LEX_AMP2_len
    dq LEX_AMP_len
-    dq LEX_EQEQ_len
    dq LEX_LBRACKET_len
    dq LEX_RBRACKET_len
    dq LEX_VOID_len
@ -136,9 +175,24 @@ LEXEME_LENS:
    dq LEX_ISIZE_len
    dq LEX_F32_len
    dq LEX_F64_len
+    dq LEX_PERCENT_len
+    dq LEX_CARET_len
+    dq LEX_BANGEQ_len
+    dq LEX_BANG_len
+    dq LEX_TILDE_len
+    dq LEX_LEQ_len
+    dq LEX_GEQ_len
+    dq LEX_LESSLESS_len
+    dq LEX_GTGT_len
+    dq LEX_LT_len
+    dq LEX_GT_len
+    dq LEX_DOT3_len
+    dq LEX_DOT2_len
+    dq LEX_DOT_len
+    dq LEX_BACKTICK_len

 align 8
-NUM_LEXEMES: dq 43
+NUM_LEXEMES: dq 61

    LEX_NOT_A_LEXEME db "<not a lexeme>", 0
    LEX_LET db "let"
@ -169,6 +223,8 @@ NUM_LEXEMES: dq 43
    LEX_I32_len equ $ - LEX_I32
    LEX_U32 db "u32"
    LEX_U32_len equ $ - LEX_U32
+    LEX_EQEQ db "=="
+    LEX_EQEQ_len equ $ - LEX_EQEQ
    LEX_EQUALS db "="
    LEX_EQUALS_len equ $ - LEX_EQUALS
    LEX_PLUS db "+"
@ -183,18 +239,22 @@ NUM_LEXEMES: dq 43
    LEX_RBRACE_len equ $ - LEX_RBRACE
    LEX_LBRACE db "{"
    LEX_LBRACE_len equ $ - LEX_LBRACE
+    LEX_COLON2 db "::"
+    LEX_COLON2_len equ $ - LEX_COLON2
    LEX_COLON db ":"
    LEX_COLON_len equ $ - LEX_COLON
    LEX_SEMI db ";"
    LEX_SEMI_len equ $ - LEX_SEMI
    LEX_COMMA db ","
    LEX_COMMA_len equ $ - LEX_COMMA
+    LEX_PIPE2 db "||"
+    LEX_PIPE2_len equ $ - LEX_PIPE2
    LEX_PIPE db "|"
    LEX_PIPE_len equ $ - LEX_PIPE
+    LEX_AMP2 db "&&"
+    LEX_AMP2_len equ $ - LEX_AMP2
    LEX_AMP db "&"
    LEX_AMP_len equ $ - LEX_AMP
-    LEX_EQEQ db "=="
-    LEX_EQEQ_len equ $ - LEX_EQEQ
    LEX_LBRACKET db "["
    LEX_LBRACKET_len equ $ - LEX_LBRACKET
    LEX_RBRACKET db "]"
@ -225,6 +285,36 @@ NUM_LEXEMES: dq 43
    LEX_F32_len equ $ - LEX_F32
    LEX_F64 db "f64"
    LEX_F64_len equ $ - LEX_F64
+    LEX_PERCENT db "%"
+    LEX_PERCENT_len equ $ - LEX_PERCENT
+    LEX_CARET db "^"
+    LEX_CARET_len equ $ - LEX_CARET
+    LEX_BANGEQ db "!="
+    LEX_BANGEQ_len equ $ - LEX_BANGEQ
+    LEX_BANG db "!"
+    LEX_BANG_len equ $ - LEX_BANG
+    LEX_TILDE db "~"
+    LEX_TILDE_len equ $ - LEX_TILDE
+    LEX_LEQ db "<="
+    LEX_LEQ_len equ $ - LEX_LEQ
+    LEX_GEQ db ">="
+    LEX_GEQ_len equ $ - LEX_GEQ
+    LEX_LESSLESS db "<<"
+    LEX_LESSLESS_len equ $ - LEX_LESSLESS
+    LEX_GTGT db ">>"
+    LEX_GTGT_len equ $ - LEX_GTGT
+    LEX_LT db "<"
+    LEX_LT_len equ $ - LEX_LT
+    LEX_GT db ">"
+    LEX_GT_len equ $ - LEX_GT
+    LEX_DOT3 db "..."
+    LEX_DOT3_len equ $ - LEX_DOT3
+    LEX_DOT2 db ".."
+    LEX_DOT2_len equ $ - LEX_DOT2
+    LEX_DOT db "."
+    LEX_DOT_len equ $ - LEX_DOT
+    LEX_BACKTICK db "`"
+    LEX_BACKTICK_len equ $ - LEX_BACKTICK
    LEX_IDENT db "<identifier>"
    LEX_IDENT_len equ $ - LEX_IDENT
    LEX_NUMBER db "<number>"
@ -250,36 +340,54 @@ NUM_LEXEMES: dq 43
    TOKEN_ARROW     equ 12 ; :u8
    TOKEN_I32       equ 13 ; :u8
    TOKEN_U32       equ 14 ; :u8
-    TOKEN_EQUALS    equ 15 ; :u8
-    TOKEN_PLUS      equ 16 ; :u8
-    TOKEN_MINUS     equ 17 ; :u8
-    TOKEN_RPARENS   equ 18 ; :u8
-    TOKEN_LPARENS   equ 19 ; :u8
-    TOKEN_RBRACE    equ 20 ; :u8
-    TOKEN_LBRACE    equ 21 ; :u8
-    TOKEN_COLON     equ 22 ; :u8
-    TOKEN_SEMI      equ 23 ; :u8
-    TOKEN_COMMA     equ 24 ; :u8
-    TOKEN_PIPE      equ 25 ; :u8
-    TOKEN_AMP       equ 26 ; :u8
-    TOKEN_EQEQ      equ 27 ; :u8
-    TOKEN_LBRACKET  equ 28 ; :u8
-    TOKEN_RBRACKET  equ 29 ; :u8
-    TOKEN_VOID      equ 30 ; :u8
-    TOKEN_SLASH     equ 31 ; :u8
-    TOKEN_STAR      equ 32 ; :u8
-    TOKEN_U8        equ 33 ; :u8
-    TOKEN_I8        equ 34 ; :u8
-    TOKEN_U16       equ 35 ; :u8
-    TOKEN_I16       equ 36 ; :u8
-    TOKEN_U64       equ 37 ; :u8
-    TOKEN_I64       equ 38 ; :u8
-    TOKEN_USIZE     equ 39 ; :u8
-    TOKEN_ISIZE     equ 40 ; :u8
-    TOKEN_F32       equ 41 ; :u8
-    TOKEN_F64       equ 42 ; :u8
-    TOKEN_IDENT     equ 43 ; :u8
-    TOKEN_NUMBER    equ 44 ; :u8
-    TOKEN_STRING    equ 45 ; :u8
-    TOKEN_COMMENT   equ 46 ; :u8
+    TOKEN_EQEQ      equ 15 ; :u8
+    TOKEN_EQUALS    equ 16 ; :u8
+    TOKEN_PLUS      equ 17 ; :u8
+    TOKEN_MINUS     equ 18 ; :u8
+    TOKEN_RPARENS   equ 19 ; :u8
+    TOKEN_LPARENS   equ 20 ; :u8
+    TOKEN_RBRACE    equ 21 ; :u8
+    TOKEN_LBRACE    equ 22 ; :u8
+    TOKEN_COLON     equ 23 ; :u8
+    TOKEN_COLON2    equ 24 ; :u8
+    TOKEN_SEMI      equ 25 ; :u8
+    TOKEN_COMMA     equ 26 ; :u8
+    TOKEN_PIPE      equ 27 ; :u8
+    TOKEN_PIPE2     equ 28 ; :u8
+    TOKEN_AMP       equ 29 ; :u8
+    TOKEN_AMP2      equ 30 ; :u8
+    TOKEN_LBRACKET  equ 31 ; :u8
+    TOKEN_RBRACKET  equ 32 ; :u8
+    TOKEN_VOID      equ 33 ; :u8
+    TOKEN_SLASH     equ 34 ; :u8
+    TOKEN_STAR      equ 35 ; :u8
+    TOKEN_U8        equ 36 ; :u8
+    TOKEN_I8        equ 37 ; :u8
+    TOKEN_U16       equ 38 ; :u8
+    TOKEN_I16       equ 39 ; :u8
+    TOKEN_U64       equ 40 ; :u8
+    TOKEN_I64       equ 41 ; :u8
+    TOKEN_USIZE     equ 42 ; :u8
+    TOKEN_ISIZE     equ 43 ; :u8
+    TOKEN_F32       equ 44 ; :u8
+    TOKEN_F64       equ 45 ; :u8
+    TOKEN_PERCENT   equ 46 ; :u8
+    TOKEN_CARET     equ 47 ; :u8
+    TOKEN_BANGEQ    equ 48 ; :u8
+    TOKEN_BANG      equ 49 ; :u8
+    TOKEN_TILDE     equ 50 ; :u8
+    TOKEN_LEQ       equ 51 ; :u8
+    TOKEN_GEQ       equ 52 ; :u8
+    TOKEN_LESSLESS  equ 53 ; :u8
+    TOKEN_GTGT      equ 54 ; :u8
+    TOKEN_LT        equ 55 ; :u8
+    TOKEN_GT        equ 56 ; :u8
+    TOKEN_DOT3      equ 57 ; :u8
+    TOKEN_DOT2      equ 58 ; :u8
+    TOKEN_DOT       equ 59 ; :u8
+    TOKEN_BACKTICK  equ 60 ; :u8
+    TOKEN_IDENT     equ 61 ; :u8
+    TOKEN_NUMBER    equ 62 ; :u8
+    TOKEN_STRING    equ 63 ; :u8
+    TOKEN_COMMENT   equ 64 ; :u8
    ;; end-consts
--- a/lang/tests/ast.rs
+++ b/lang/tests/ast.rs
@ -60,6 +60,11 @@ fn main() {
        };
    }

+    print_ast(
+        b"fn main() -> void { return 1 * 2 + 3 * 4; }",
+        |ast| unsafe { parse_func(ast) },
+    );
+
    print_ast(b"3 + 4", |ast| unsafe { parse_expr(ast) });
    print_ast(b"fn main() -> void { return 1 + 2; }", |ast| unsafe {
        parse_func(ast)
--- a/lang/tests/codegen.rs
+++ b/lang/tests/codegen.rs
@ -93,7 +93,7 @@ fn main() {

    print_ast(
        b"fn main(a: u32) -> void {
-    return 4;
+    return 2 * 3 + 4 * 5;
    }",
        |ast| unsafe { parse_func(ast) },
    );
--- a/lang/tests/shared/defs.rs
+++ b/lang/tests/shared/defs.rs
@ -20,7 +20,11 @@ unsafe extern "C" {
    pub unsafe fn get_register_name(reg_idx: u8, width: u8, buffer: *mut u8) -> FFISlice;
    pub unsafe fn stackvar_cmp(a: *const (u64, u64), b: *const (u64, u64)) -> i32;
    pub unsafe fn codegen_allocate_register(ctx: *mut FunctionCtx) -> u8;
+    pub unsafe fn codegen_allocate_place(ctx: *mut FunctionCtx, width: u16) -> Operand;
+    pub unsafe fn codegen_allocate_value(ctx: *mut FunctionCtx, width: u16) -> Operand;
+    pub unsafe fn codegen_free_operand(ctx: *mut FunctionCtx, operand: *const Operand) -> ();
    pub unsafe fn codegen_function(ast: *const CodegenCtx, func_idx: u64) -> ();
+    pub unsafe fn codegen_push_pop_used_registers(text: *mut Vec<u8>, function_ctx: &FunctionCtx, pop: bool) -> u8;
    pub unsafe fn codegen_expr(ctx: *const CodegenCtx, function_ctx: &FunctionCtx, expr_idx: u64) -> (u64, bool);
    pub unsafe fn vec_insert_many(vec: *mut BlobVec, index: usize, data: *const u8, count: usize);
    pub unsafe fn vec_extend(vec: *mut BlobVec, elements: *const u8, count: usize) -> ();
@ -75,38 +79,56 @@ pub const TOKEN_BOOL: u8 = 11;
 pub const TOKEN_ARROW: u8 = 12;
 pub const TOKEN_I32: u8 = 13;
 pub const TOKEN_U32: u8 = 14;
-pub const TOKEN_EQUALS: u8 = 15;
-pub const TOKEN_PLUS: u8 = 16;
-pub const TOKEN_MINUS: u8 = 17;
-pub const TOKEN_RPARENS: u8 = 18;
-pub const TOKEN_LPARENS: u8 = 19;
-pub const TOKEN_RBRACE: u8 = 20;
-pub const TOKEN_LBRACE: u8 = 21;
-pub const TOKEN_COLON: u8 = 22;
-pub const TOKEN_SEMI: u8 = 23;
-pub const TOKEN_COMMA: u8 = 24;
-pub const TOKEN_PIPE: u8 = 25;
-pub const TOKEN_AMP: u8 = 26;
-pub const TOKEN_EQEQ: u8 = 27;
-pub const TOKEN_LBRACKET: u8 = 28;
-pub const TOKEN_RBRACKET: u8 = 29;
-pub const TOKEN_VOID: u8 = 30;
-pub const TOKEN_SLASH: u8 = 31;
-pub const TOKEN_STAR: u8 = 32;
-pub const TOKEN_U8: u8 = 33;
-pub const TOKEN_I8: u8 = 34;
-pub const TOKEN_U16: u8 = 35;
-pub const TOKEN_I16: u8 = 36;
-pub const TOKEN_U64: u8 = 37;
-pub const TOKEN_I64: u8 = 38;
-pub const TOKEN_USIZE: u8 = 39;
-pub const TOKEN_ISIZE: u8 = 40;
-pub const TOKEN_F32: u8 = 41;
-pub const TOKEN_F64: u8 = 42;
-pub const TOKEN_IDENT: u8 = 43;
-pub const TOKEN_NUMBER: u8 = 44;
-pub const TOKEN_STRING: u8 = 45;
-pub const TOKEN_COMMENT: u8 = 46;
+pub const TOKEN_EQEQ: u8 = 15;
+pub const TOKEN_EQUALS: u8 = 16;
+pub const TOKEN_PLUS: u8 = 17;
+pub const TOKEN_MINUS: u8 = 18;
+pub const TOKEN_RPARENS: u8 = 19;
+pub const TOKEN_LPARENS: u8 = 20;
+pub const TOKEN_RBRACE: u8 = 21;
+pub const TOKEN_LBRACE: u8 = 22;
+pub const TOKEN_COLON: u8 = 23;
+pub const TOKEN_COLON2: u8 = 24;
+pub const TOKEN_SEMI: u8 = 25;
+pub const TOKEN_COMMA: u8 = 26;
+pub const TOKEN_PIPE: u8 = 27;
+pub const TOKEN_PIPE2: u8 = 28;
+pub const TOKEN_AMP: u8 = 29;
+pub const TOKEN_AMP2: u8 = 30;
+pub const TOKEN_LBRACKET: u8 = 31;
+pub const TOKEN_RBRACKET: u8 = 32;
+pub const TOKEN_VOID: u8 = 33;
+pub const TOKEN_SLASH: u8 = 34;
+pub const TOKEN_STAR: u8 = 35;
+pub const TOKEN_U8: u8 = 36;
+pub const TOKEN_I8: u8 = 37;
+pub const TOKEN_U16: u8 = 38;
+pub const TOKEN_I16: u8 = 39;
+pub const TOKEN_U64: u8 = 40;
+pub const TOKEN_I64: u8 = 41;
+pub const TOKEN_USIZE: u8 = 42;
+pub const TOKEN_ISIZE: u8 = 43;
+pub const TOKEN_F32: u8 = 44;
+pub const TOKEN_F64: u8 = 45;
+pub const TOKEN_PERCENT: u8 = 46;
+pub const TOKEN_CARET: u8 = 47;
+pub const TOKEN_BANGEQ: u8 = 48;
+pub const TOKEN_BANG: u8 = 49;
+pub const TOKEN_TILDE: u8 = 50;
+pub const TOKEN_LEQ: u8 = 51;
+pub const TOKEN_GEQ: u8 = 52;
+pub const TOKEN_LESSLESS: u8 = 53;
+pub const TOKEN_GTGT: u8 = 54;
+pub const TOKEN_LT: u8 = 55;
+pub const TOKEN_GT: u8 = 56;
+pub const TOKEN_DOT3: u8 = 57;
+pub const TOKEN_DOT2: u8 = 58;
+pub const TOKEN_DOT: u8 = 59;
+pub const TOKEN_BACKTICK: u8 = 60;
+pub const TOKEN_IDENT: u8 = 61;
+pub const TOKEN_NUMBER: u8 = 62;
+pub const TOKEN_STRING: u8 = 63;
+pub const TOKEN_COMMENT: u8 = 64;

 #[repr(C)]
 #[derive(Debug)]
@ -214,7 +236,8 @@ pub struct CodegenCtx {
 #[repr(C)]
 #[derive(Debug)]
 pub struct FunctionCtx {
-    pub current_stack_size: u64,
+    pub current_stack_size: u32,
+    pub max_stack_size: u32,
    pub stack_vars: Vec<(u64, u64)>,
    pub register_bitset: u16,
    pub dirtied_register_bitset: u16,
--- a/lang/tests/tokens.rs
+++ b/lang/tests/tokens.rs
@ -8,9 +8,10 @@ struct Lexeme(u8, &'static str);

 impl PartialEq for Lexeme {
    fn eq(&self, other: &Self) -> bool {
+        use util::defs::{TOKEN_IDENT, TOKEN_NUMBER};
        match self.0 {
            // Identifiers and numbers compare both token and lexeme
-            30 | 31 => self.0 == other.0 && self.1 == other.1,
+            TOKEN_IDENT | TOKEN_NUMBER => self.0 == other.0 && self.1 == other.1,
            _ => self.0 == other.0,
        }
    }
@ -115,17 +116,17 @@ fn main() {
        assert_eq!(
            &collect_tokens()[..],
            &[
-                Lexeme(19, ""),
-                Lexeme(18, ""),
-                Lexeme(28, ""),
-                Lexeme(29, ""),
-                Lexeme(21, ""),
-                Lexeme(20, ""),
-                Lexeme(24, ""),
-                Lexeme(12, ""),
-                Lexeme(23, ""),
-                Lexeme(22, ""),
-                Lexeme(15, ""),
+                Lexeme(TOKEN_LPARENS, ""),
+                Lexeme(TOKEN_RPARENS, ""),
+                Lexeme(TOKEN_LBRACKET, ""),
+                Lexeme(TOKEN_RBRACKET, ""),
+                Lexeme(TOKEN_LBRACE, ""),
+                Lexeme(TOKEN_RBRACE, ""),
+                Lexeme(TOKEN_COMMA, ""),
+                Lexeme(TOKEN_ARROW, ""),
+                Lexeme(TOKEN_SEMI, ""),
+                Lexeme(TOKEN_COLON, ""),
+                Lexeme(TOKEN_EQUALS, ""),
            ][..]
        );

@ -143,7 +144,7 @@ fn main() {
                Lexeme(TOKEN_IDENT, "number12345"),
                Lexeme(TOKEN_IDENT, "____"),
                Lexeme(TOKEN_IDENT, "_"),
-                Lexeme(17, ""),
+                Lexeme(TOKEN_MINUS, ""),
                Lexeme(TOKEN_IDENT, "leading-minus"),
                Lexeme(TOKEN_IDENT, "trailing-minus-"),
            ]
@ -156,17 +157,17 @@ fn main() {
        assert_eq!(
            &collect_tokens()[..],
            &[
-                Lexeme(4, ""),
+                Lexeme(TOKEN_FN, ""),
                Lexeme(TOKEN_IDENT, "my-function"),
-                Lexeme(19, ""),
-                Lexeme(18, ""),
-                Lexeme(12, ""),
-                Lexeme(11, ""),
-                Lexeme(21, ""),
-                Lexeme(5, ""),
-                Lexeme(10, ""),
-                Lexeme(23, ""),
-                Lexeme(20, ""),
+                Lexeme(TOKEN_LPARENS, ""),
+                Lexeme(TOKEN_RPARENS, ""),
+                Lexeme(TOKEN_ARROW, ""),
+                Lexeme(TOKEN_BOOL, ""),
+                Lexeme(TOKEN_LBRACE, ""),
+                Lexeme(TOKEN_RETURN, ""),
+                Lexeme(TOKEN_FALSE, ""),
+                Lexeme(TOKEN_SEMI, ""),
+                Lexeme(TOKEN_RBRACE, ""),
            ]
        );

@ -174,8 +175,8 @@ fn main() {
        tokeniser_init(c"tests/tokens/function.l".as_ptr());
        eprintln!("ok.");

-        assert_eq!(expect_token(2).into_option(), None);
-        assert_eq!(expect_token(4).into_option().unwrap().as_str(), "fn");
+        assert_eq!(expect_token(TOKEN_IF).into_option(), None);
+        assert_eq!(expect_token(TOKEN_FN).into_option().unwrap().as_str(), "fn");
        assert_eq!(unwrap_token(TOKEN_IDENT).as_str(), "my-function");

        eprint!("Initializing tokeniser.. ");
@ -186,18 +187,18 @@ fn main() {
            &collect_tokens()[..],
            &[
                Lexeme(TOKEN_COMMENT, ""),
-                Lexeme(4, ""),
+                Lexeme(TOKEN_FN, ""),
                Lexeme(TOKEN_IDENT, "my-function"),
-                Lexeme(19, ""),
-                Lexeme(18, ""),
-                Lexeme(12, ""),
-                Lexeme(11, ""),
-                Lexeme(21, ""),
+                Lexeme(TOKEN_LPARENS, ""),
+                Lexeme(TOKEN_RPARENS, ""),
+                Lexeme(TOKEN_ARROW, ""),
+                Lexeme(TOKEN_BOOL, ""),
+                Lexeme(TOKEN_LBRACE, ""),
                Lexeme(TOKEN_COMMENT, ""),
-                Lexeme(5, ""),
-                Lexeme(10, ""),
-                Lexeme(23, ""),
-                Lexeme(20, ""),
+                Lexeme(TOKEN_RETURN, ""),
+                Lexeme(TOKEN_FALSE, ""),
+                Lexeme(TOKEN_SEMI, ""),
+                Lexeme(TOKEN_RBRACE, ""),
            ]
        );

@ -243,7 +244,7 @@ fn main() {
            &collect_tokens()[..],
            &[
                Lexeme(TOKEN_NUMBER, "3"),
-                Lexeme(16, "+"),
+                Lexeme(TOKEN_PLUS, "+"),
                Lexeme(TOKEN_NUMBER, "4")
            ],
        );
@ -256,19 +257,19 @@ fn main() {
        assert_eq!(
            &collect_tokens()[..],
            &[
-                Lexeme(4, "fn"),
+                Lexeme(TOKEN_FN, "fn"),
                Lexeme(TOKEN_IDENT, "main"),
-                Lexeme(19, "("),
-                Lexeme(18, ")"),
-                Lexeme(12, "->"),
-                Lexeme(30, "void"),
-                Lexeme(21, "{"),
-                Lexeme(5, "return"),
+                Lexeme(TOKEN_LPARENS, "("),
+                Lexeme(TOKEN_RPARENS, ")"),
+                Lexeme(TOKEN_ARROW, "->"),
+                Lexeme(TOKEN_VOID, "void"),
+                Lexeme(TOKEN_LBRACE, "{"),
+                Lexeme(TOKEN_RETURN, "return"),
                Lexeme(TOKEN_NUMBER, "1"),
-                Lexeme(16, "+"),
+                Lexeme(TOKEN_PLUS, "+"),
                Lexeme(TOKEN_NUMBER, "2"),
-                Lexeme(23, ";"),
-                Lexeme(20, "}"),
+                Lexeme(TOKEN_SEMI, ";"),
+                Lexeme(TOKEN_RBRACE, "}"),
            ],
        );

@ -280,16 +281,42 @@ fn main() {
        assert_eq!(
            &collect_tokens()[..],
            &[
-                Lexeme(19, "("),
-                Lexeme(33, "b"),
-                Lexeme(31, "/"),
-                Lexeme(33, "d"),
-                Lexeme(16, "+"),
-                Lexeme(33, "c"),
-                Lexeme(18, ")"),
-                Lexeme(32, "*"),
-                Lexeme(34, "42"),
-                Lexeme(23, ";")
+                Lexeme(TOKEN_LPARENS, "("),
+                Lexeme(TOKEN_IDENT, "b"),
+                Lexeme(TOKEN_SLASH, "/"),
+                Lexeme(TOKEN_IDENT, "d"),
+                Lexeme(TOKEN_PLUS, "+"),
+                Lexeme(TOKEN_IDENT, "c"),
+                Lexeme(TOKEN_RPARENS, ")"),
+                Lexeme(TOKEN_STAR, "*"),
+                Lexeme(TOKEN_NUMBER, "42"),
+                Lexeme(TOKEN_SEMI, ";")
+            ],
+        );
+
+        eprint!("Initializing tokeniser.. ");
+        let src = b"<<<=<a == b = c ||| &||&&|&";
+        tokeniser_init_buf(src.as_ptr(), src.len());
+        eprintln!("ok.");
+
+        assert_eq!(
+            &collect_tokens()[..],
+            &[
+                Lexeme(TOKEN_LESSLESS, ""),
+                Lexeme(TOKEN_LEQ, ""),
+                Lexeme(TOKEN_LT, ""),
+                Lexeme(TOKEN_IDENT, "a"),
+                Lexeme(TOKEN_EQEQ, ""),
+                Lexeme(TOKEN_IDENT, "b"),
+                Lexeme(TOKEN_EQUALS, ""),
+                Lexeme(TOKEN_IDENT, "c"),
+                Lexeme(TOKEN_PIPE2, ""),
+                Lexeme(TOKEN_PIPE, ""),
+                Lexeme(TOKEN_AMP, ""),
+                Lexeme(TOKEN_PIPE2, ""),
+                Lexeme(TOKEN_AMP2, ""),
+                Lexeme(TOKEN_PIPE, ""),
+                Lexeme(TOKEN_AMP, ""),
            ],
        );
Author	SHA1	Message	Date
janis	6ebc6afb2b	codegen for binop	2025-11-02 01:07:37 +01:00
janis	b9712dacfb	fix ast gen with nested binary ops	2025-11-02 00:30:00 +01:00
janis	a0f4b56c64	skip_token method	2025-11-02 00:29:45 +01:00
janis	c0dc1361ae	add more tokens	2025-11-02 00:00:22 +01:00
janis	9cb3331f60	method for pushing/poping registers before calling functions	2025-11-01 18:24:37 +01:00
janis	0436b23361	remove test line	2025-11-01 17:57:11 +01:00
janis	0be7ffba5a	change move_dst_src to call itself and use rax as a scratch register rather than xchging because immediate values	2025-11-01 17:51:46 +01:00
janis	703aa299c8	split current_stack_size u64 into current/max u32 to allow deallocating the top stack allocation	2025-11-01 17:39:17 +01:00
janis	16bdac93ad	free operands after use	2025-11-01 16:43:26 +01:00
janis	ecb4a83153	allocate/free operand methods	2025-11-01 16:18:01 +01:00