Compare commits

...

10 commits

8 changed files with 1020 additions and 243 deletions

View file

@ -4,11 +4,11 @@ default rel
%include "src/ast.inc"
section .rdata
PRECEDENCE_ADD equ 90
PRECEDENCE_SUB equ 90
PRECEDENCE_MUL equ 100
PRECEDENCE_DIV equ 100
PRECEDENCE_REM equ 100
PRECEDENCE_ADD dw 90
PRECEDENCE_SUB dw 90
PRECEDENCE_MUL dw 100
PRECEDENCE_DIV dw 100
PRECEDENCE_REM dw 100
section .text
extern vec_init_with
@ -28,6 +28,7 @@ extern tokeniser_init
extern find_lexeme
extern peek_lexeme
extern expect_token
extern skip_token
extern unwrap_token
extern peek_expect_token
@ -400,6 +401,7 @@ parse_primary_expr:
parse_binary_expr:
push rbp
mov rbp, rsp
push rbx
; size: 24, align: 8
; start-structs
@ -438,40 +440,19 @@ parse_binary_expr:
lea rdi, [rsp + 32] ; lexeme
call peek_lexeme
mov rax, [rsp + 32]
mov byte [rsp + 16], al ; operator
mov bx, -1
cmp al, TOKEN_PLUS
je .add
cmove bx, word [rel PRECEDENCE_ADD]
cmp al, TOKEN_MINUS
je .sub
cmove bx, word [rel PRECEDENCE_SUB]
cmp al, TOKEN_STAR
je .mul
cmove bx, word [rel PRECEDENCE_MUL]
cmp al, TOKEN_SLASH
je .div
jmp .done
.add:
mov dil, TOKEN_PLUS
call unwrap_token
mov byte [rsp + 16], TOKEN_PLUS
mov byte [rsp + 18], PRECEDENCE_ADD
jmp .right
.sub:
mov dil, TOKEN_MINUS
call unwrap_token
mov byte [rsp + 16], TOKEN_MINUS
mov byte [rsp + 18], PRECEDENCE_SUB
jmp .right
.mul:
mov dil, TOKEN_STAR
call unwrap_token
mov byte [rsp + 16], TOKEN_STAR
mov byte [rsp + 18], PRECEDENCE_MUL
jmp .right
.div:
mov dil, TOKEN_SLASH
call unwrap_token
mov byte [rsp + 16], TOKEN_SLASH
mov byte [rsp + 18], PRECEDENCE_DIV
jmp .right
cmove bx, word [rel PRECEDENCE_DIV]
cmp bx, -1
je .done
mov byte [rsp + 18], bl
.right:
mov dil, [rsp + 17]
@ -479,6 +460,8 @@ parse_binary_expr:
cmp al, dil ; our_precedence <= upper_precedence
jle .done
call skip_token ; consume operator
mov rdi, [rsp] ; Ast
mov sil, [rsp + 18]
call parse_binary_expr
@ -529,6 +512,7 @@ parse_binary_expr:
mov rax, [rsp + 8] ; left
movzx rdx, byte [rsp + 19] ; left_placeness
add rsp, 64
pop rbx
pop rbp
ret

View file

@ -1,6 +1,7 @@
default rel
%include "src/ast.inc"
%include "src/tokeniser.inc"
extern panic
extern vec_extend
@ -10,6 +11,7 @@ extern vec_insert_sorted
extern vec_insert_many
extern vec_init_with
extern int_to_str2
extern strlen
global codegen_function
global get_register_name
@ -271,7 +273,8 @@ stackvar_cmp:
;; text: Vec<u8>,
;; }
;; struct FunctionCtx {
;; current_stack_size: u64,
;; current_stack_size: u32,
;; max_stack_size: u32,
;; stack_vars: Vec<(u64, u64)>,
;; register_bitset: u16,
;; dirtied_register_bitset: u16,
@ -301,7 +304,7 @@ codegen_allocate_register:
; flip bits
not ax
test ax, ax
jz .panic
jz .no_regs
; find first set bit
bsf cx, ax
@ -313,14 +316,146 @@ codegen_allocate_register:
mov ax, word [rdi + 52] ; dirtied_register_bitset
bts ax, cx
mov word [rdi + 52], ax ; update dirtied_register_bitset
jmp .done
.no_regs:
; return -1u64 to indicate no free registers
; the caller should panic or allocate a stack slot instead
mov rbx, -1
.done:
mov rax, rbx
pop rbx
pop rbp
ret
;; rdi: *FunctionCtx
;; rsi: width
;; define-fn: fn codegen_allocate_place(ctx: *mut FunctionCtx, width: u16) -> Operand
codegen_allocate_place:
push rbp
mov rbp, rsp
xor rdx, rdx
mov edx, dword [rdi + 0] ; current_stack_size
add edx, esi ; width
mov dword [rdi + 0], edx ; current_stack_size += width
mov eax, dword [rdi + 4] ; max_stack_size
cmp eax, edx
cmovb eax, edx
mov dword [rdi + 4], eax ; max_stack_size = max(max_stack_size, current_stack_size)
; construct Operand
xor rax, rax
mov eax, 0 ; Operand.len = 0
shl eax, 16
or eax, esi ; Operand.width
shl eax, 4
; or eax, 0 ; Operand.register = undef
shl eax, 8
or eax, OPERAND_RBP_OFFSET ; Operand.kind
neg rdx ; Operand.value = -current_stack_size
.done:
pop rbp
ret
;; rdi: *FunctionCtx
;; rsi: width
;; define-fn: fn codegen_allocate_value(ctx: *mut FunctionCtx, width: u16) -> Operand
codegen_allocate_value:
push rbp
mov rbp, rsp
push rbx
; width [8..16]
; *FunctionCtx [0..8]
sub rsp, 16
mov [rsp], rdi ; ctx
mov [rsp + 8], rsi ; width
cmp rsi, 8
jg .alloc_stack
.alloc_reg:
call codegen_allocate_register
cmp rax, -1
je .alloc_stack
mov rbx, rax
; construct Operand
xor rax, rax
mov eax, 0 ; Operand.len = 0
shl eax, 16
mov rsi, [rsp + 8] ; width
or eax, esi ; Operand.width
shl eax, 4
or eax, ebx ; Operand.register
shl eax, 8
or eax, OPERAND_REGISTER ; Operand.kind
mov rdx, 0 ; Operand.value = 0
jmp .done
.alloc_stack:
call codegen_allocate_place
.done:
add rsp, 16
pop rbx
pop rbp
ret
;; rdi: *FunctionCtx
;; rsi: *Operand
;; define-fn: fn codegen_free_operand(ctx: *mut FunctionCtx, operand: *const Operand) -> ()
codegen_free_operand:
push rbp
mov rbp, rsp
push rbx
sub rsp, 16
mov [rsp], rdi ; ctx
mov [rsp + 8], rsi ; operand
mov al, byte [rsi] ; Operand.kind
cmp al, OPERAND_REGISTER
je .free_reg
cmp al, OPERAND_RBP_OFFSET
je .free_stack
jmp .done
.free_stack:
xor rbx, rbx
mov ebx, dword [rdi + 0] ; current_stack_size
mov rax, [rsi + 8] ; Operand.value
cmp rbx, rax
jne .done ; operand not at top of stack, can't free
mov al, byte [rsi + 1] ; Operand.width
shr al, 4
movzx rax, al
sub rbx, rax
mov dword [rdi + 0], ebx ; current_stack_size -= width
jmp .done
.free_reg:
xor rax, rax
mov al, byte [rsi + 1] ; Operand.register_and_width
and al, 0x0F ; get register index
mov bx, word [rdi + 48] ; register_bitset
btr bx, ax
jnc .panic ; trying to free unallocated register
mov word [rdi + 48], bx ; update register_bitset
.done:
add rsp, 16
pop rbx
pop rbp
ret
.panic:
; no free registers!
call panic
;; rdi: *Ctx
@ -339,7 +474,8 @@ codegen_function:
; dirtied-register-bitset [76..80] [a,b,c,d,si,di,bp,sp,8,9,10,11,12,13,14,15]
; register-bitset [72..76] [a,b,c,d,si,di,bp,sp,8,9,10,11,12,13,14,15]
; stack-vars: Vec<(index, offset)> [32..72]
; current_stack_size: [24..32]
; max_stack_size: [28..32]
; current_stack_size: [24..28]
; func_idx [16..24]
; ast [8..16]
; ctx [0..8]
@ -348,7 +484,7 @@ codegen_function:
mov rax, [rdi]
mov [rsp + 8], rax ; ast
mov [rsp + 16], rsi ; func_idx
mov qword [rsp + 24], 0 ; current_stack_size = 0
mov qword [rsp + 24], 0 ; current_stack_size = 0, max_stack_size = 0
lea rdi, [rsp + 32] ; stack-vars
mov rsi, 16 ; size_of::<(u64, u64)>
@ -356,10 +492,10 @@ codegen_function:
mov rcx, 16 ; initial capacity
call vec_init_with
bts word [rsp + 72], 7 ; mark rsp as used
bts word [rsp + 72], 6 ; mark rbp as used
bts word [rsp + 72], 0 ; mark rax as used
mov word [rsp + 76], 0 ; dirtied_register_bitset = 0
bts word [rsp + 72], 7 ; mark rsp as used
bts word [rsp + 72], 6 ; mark rbp as used
bts word [rsp + 72], 0 ; mark rax as used
mov word [rsp + 76], 0 ; dirtied_register_bitset = 0
; push "section .text\n"
mov rdi, [rsp] ; ctx
@ -441,11 +577,18 @@ codegen_function:
lea rsi, [rax + r14 * 8] ;
mov rsi, [rsi] ; AstFunction.args[i]
mov [rsp + 104], rsi ; scratch
mov rax, [rsp + 24] ; current_stack_size
add rax, 8 ; size_of::<u64>
mov [rsp + 24], rax ; current_stack_size += size_of::<u64>
mov [rsp + 112], rax
mov [rsp + 104], rsi ; &(index, _)
lea rdi, [rsp + 24] ; &function_ctx
; TODO: get arg type size
mov rsi, 8 ; size_of::<u64>
call codegen_allocate_place
; rdx = stack offset
mov [rsp + 80], rax ; Operand
mov [rsp + 88], rdx ; Operand.value
neg rdx
mov [rsp + 112], rdx ; &(_, offset)
lea rdi, [rsp + 32] ; stack-vars
lea rsi, [rsp + 104] ; &scratch: &(index, offset)
mov rdx, stackvar_cmp
@ -454,17 +597,16 @@ codegen_function:
; spill arg from register to newly allocated stack slot
; get source Operand
mov r13, [rsp + 112] ; current_stack_size before increment
mov rdi, [rsp + 104] ; arg index
mov rdi, r14 ; arg index
call codegen_arg_to_operand
mov [rsp + 104], rax
mov [rsp + 112], rdx
mov rdx, r13 ; offset
neg rdx
mov rdi, [rsp] ; ctx
lea rdi, [rdi + 8] ; &ctx.text
lea rsi, [rsp + 104] ; src
call codegen_move_rbp_slot_src
lea rdx, [rsp + 80] ; dst
call codegen_move_dst_src
inc r14
jmp .arg_loop
@ -501,7 +643,7 @@ codegen_function:
mov rdx, 1 ; pop = true
call codegen_push_pop_dirtied_registers
; "add rsp, {current_stack_size}\n"
; "add rsp, {max_stack_size}\n"
mov rdi, [rsp] ; ctx
lea rdi, [rdi + 8] ; &ctx.text
lea rsi, [rel ADD_RSP]
@ -509,7 +651,7 @@ codegen_function:
call vec_extend
lea rdi, [rsp + 24] ; &function_ctx
mov rdi, [rdi + 0] ; current_stack_size
mov edi, dword [rdi + 4] ; max_stack_size
lea rsi, [rsp + 104] ; scratch
mov rdx, 16 ; buffer length
mov rcx, 10 ; radix
@ -545,7 +687,7 @@ codegen_function:
mov rdx, DOT_PROLOGUE_LEN
call vec_extend
; "sub rsp, {current_stack_size}\n"
; "sub rsp, {max_stack_size}\n"
mov rdi, [rsp] ; ctx
lea rdi, [rdi + 8] ; &ctx.text
lea rsi, [rel SUB_RSP]
@ -553,7 +695,7 @@ codegen_function:
call vec_extend
lea rdi, [rsp + 24] ; &function_ctx
mov rdi, [rdi + 0] ; current_stack_size
mov edi, dword [rdi + 4] ; max_stack_size
lea rsi, [rsp + 104] ; scratch
mov rdx, 16 ; buffer length
mov rcx, 10 ; radix
@ -680,6 +822,95 @@ codegen_push_pop_dirtied_registers:
pop rbp
ret
;; rdi: *text
;; rsi: &function_ctx
;; rdx: pop: bool
;; Returns the number of registers pushed/popped in rax
;; define-fn: fn codegen_push_pop_used_registers(text: *mut Vec<u8>, function_ctx: &FunctionCtx, pop: bool) -> u8
codegen_push_pop_used_registers:
push rbp
mov rbp, rsp
push rbx
push r15
push r14
push r13
sub rsp, 32
mov [rsp], rdi
mov [rsp + 8], rsi
mov byte [rsp + 29 + 3], 10 ; newline
mov qword [rsp + 16], -1
mov rax, 'push '
mov qword [rsp + 24], rax
test rdx, rdx
jz .skip_setup_pop
mov rax, 'pop '
mov qword [rsp + 24], rax
mov rdi, [rsp] ; text
mov rax, [rdi + 8] ; text.len()
mov [rsp + 16], rax
.skip_setup_pop:
; volatile registers:
; abcdsdpp_89abcdef
; 00111100_11110000b0
; additionally, rax is never preserved by this compiler
mov bx, word [rsi + 48 + 4] ; dirtied_register_bitset
mov ax, 0b00001111_00111100
and bx, ax
test bx, bx
jz .done
mov r15, 16
xor r14, r14
xor r13, r13 ; num_regs pushed/popped
.reg_loop:
cmp r14, r15
jge .done
bt bx, r14w
jnc .next_reg
inc r13 ; num_regs += 1
mov rdi, r14
mov rsi, 8
lea rdx, [rsp + 29]
call get_register_name
mov rax, -1
cmp [rsp + 16], rax
jne .reg_pop
mov rdi, [rsp] ; text
lea rsi, [rsp + 24]
mov rdx, 9
call vec_extend
jmp .next_reg
.reg_pop:
mov rdi, [rsp] ; text
mov rsi, [rsp + 16] ; text.len()
lea rdx, [rsp + 24]
mov rcx, 9
call vec_insert_many
nop
.next_reg:
inc r14
jmp .reg_loop
.done:
add rsp, 32
mov rax, r13
pop r13
pop r14
pop r15
pop rbx
pop rbp
ret
;; rdi: ctx
;; rsi: &function_ctx
@ -732,10 +963,10 @@ codegen_expr:
push r15
push r14
; scratch [16..48]
; scratch [16..80]
; function_ctx: [8..16]
; ctx [0..8]
sub rsp, 48
sub rsp, 80
mov [rsp], rdi ; ctx
mov [rsp + 8], rsi ; &function_ctx
@ -810,6 +1041,10 @@ codegen_expr:
lea rdx, [rsp + 16] ; src
call codegen_move_dst_src
mov rdi, [rsp + 8] ; &function_ctx
lea rsi, [rsp + 16] ; src
call codegen_free_operand
; push "jmp .epilogue\n"
mov rdi, [rsp] ; ctx
lea rdi, [rdi + 8] ; &ctx.text
@ -832,25 +1067,20 @@ codegen_expr:
.number:
; rax = *AstNode
mov [rsp + 16], rax ; scratch = *AstNode
mov rbx, [rax + 8] ; AstNode.data = value
mov [rsp + 40], rbx ; Operand.value
mov rdi, [rsp + 8] ; &function_ctx
call codegen_allocate_register
xor rbx, rbx
or rbx, 8 ; width = 8
shl rbx, 4
or rbx, rax ; register
shl rbx, 8
or rbx, OPERAND_REGISTER ; kind
mov [rsp + 16], rbx ;
mov qword [rsp + 24], 0 ; value = 0
mov byte [rsp + 32], OPERAND_IMMEDIATE ; Operand.kind
mov bl, 8 ; width = 8
shl bl, 4 ; register = undef
mov byte [rsp + 33], bl ; Operand.register_and_width
mov word [rsp + 34], 0 ; Operand.len = 0
mov rbx, [rax + 8] ; AstNode.data
mov [rsp + 40], rbx ; Operand.value
mov rdi, [rsp + 8] ; &function_ctx
mov rsi, 8 ; width
call codegen_allocate_value
mov [rsp + 16], rax
mov [rsp + 24], rdx
mov rdi, [rsp] ; ctx
lea rdi, [rdi + 8] ; &ctx.text
@ -862,9 +1092,201 @@ codegen_expr:
mov rdx, qword [rsp + 24]
jmp .done
.binary_op:
mov rax, [rax + 8] ; AstNode.data
mov [rsp + 16], rax ; scratch = *AstBinaryOp
mov rdi, [rsp] ; ctx
mov rsi, [rsp + 8] ; &function_ctx
mov rdx, [rax + 0] ; left operand index
call codegen_expr
mov [rsp + 32], rax ; left operand
mov [rsp + 40], rdx
mov rdi, [rsp] ; ctx
mov rsi, [rsp + 8] ; &function_ctx
mov rdx, [rsp + 16] ; *AstBinaryOp
mov rdx, [rdx + 16] ; right operand index
call codegen_expr
mov [rsp + 48], rax ; right operand
mov [rsp + 56], rdx
mov rax, [rsp + 16] ; *AstBinaryOp
mov al, byte [rax + 8] ; operator
mov rbx, -1
cmp al, TOKEN_PLUS
cmove rbx, [rel ADD_]
cmp al, TOKEN_MINUS
cmove rbx, [rel SUB_]
cmp rbx, -1
jne .gen_op
cmp al, TOKEN_STAR
cmove rbx, [rel MUL_]
cmp al, TOKEN_SLASH
cmove rbx, [rel DIV_]
cmp al, TOKEN_PERCENT
cmove rbx, [rel DIV_]
cmp rbx, -1
je .panic ; unknown operator
.mul_div:
; mul/div need to clobber rax:rdx
; TODO only check for div
mov rax, [rsp + 8] ; &function_ctx
mov ax, word [rax + 48] ; register_bitset
bt ax, 3 ; is rdx used?
jnc .after_spill_rdx
; allocate scratch value for rdx
mov rdi, [rsp + 8] ; &function_ctx
mov rsi, 8 ; width
call codegen_allocate_place
mov [rsp + 64], rax
mov [rsp + 72], rdx
; mov scratch, rdx
mov rdi, [rsp] ; ctx
lea rdi, [rdi + 8] ; &ctx.text
lea rsi, [rsp + 64] ; scratch value
lea rdx, [rel OPERAND_RDX] ; rax
call codegen_move_dst_src
; check if rhs is rdx
mov rax, [rsp + 48] ; right operand
and rax, 0xFFF
mov rdx, [rel OPERAND_RDX]
and rdx, 0xFFF
cmp rax, rdx
jne .after_spill_rdx
; free rhs
mov rdi, [rsp + 8] ; &function_ctx
lea rsi, [rsp + 48] ; right operand
call codegen_free_operand
mov rdx, [rsp + 48] ; right operand
and rdx, 0xF000 ; Operand.width
mov rax, [rsp + 64] ; scratch value
or rax, rdx ; preserve width
mov rdx, [rsp + 72]
mov [rsp + 48], rax ; right operand
mov [rsp + 56], rdx
.after_spill_rdx:
mov rax, [rsp + 16] ; *AstBinaryOp
mov al, byte [rax + 8] ; operator
cmp al, TOKEN_STAR
je .after_clear_rdx
; clear rdx for div
; xor rdx, rdx
mov rdi, [rsp] ; ctx
lea rdi, [rdi + 8] ; &ctx.text
lea rsi, [rel XOR_RDX_RDX] ; rdx
mov rdx, XOR_RDX_RDX_LEN
call vec_extend
.after_clear_rdx:
; mov rax, lhs
mov rdi, [rsp] ; ctx
lea rdi, [rdi + 8] ; &ctx.text
lea rsi, [rel OPERAND_RAX] ; rax
lea rdx, [rsp + 32] ; left operand
call codegen_move_dst_src
; op rhs
mov rdi, [rsp] ; ctx
lea rdi, [rdi + 8] ; &ctx.text
push rbx
lea rsi, [rsp] ; op
mov rdx, 4
call vec_extend
pop rbx
mov rdi, [rsp] ; ctx
lea rdi, [rdi + 8] ; &ctx.text
lea rsi, [rsp + 48] ; left operand
call codegen_write_operand
mov rdi, [rsp] ; ctx
lea rdi, [rdi + 8] ; &ctx.text
mov rsi, 10
push rsi
lea rsi, [rsp] ; newline
call vec_push
pop rsi
mov rax, [rsp + 16] ; *AstBinaryOp
mov al, byte [rax + 8] ; operator
cmp al, TOKEN_PERCENT
jne .after_rem
; mov rax, rdx // only for rem
mov rdi, [rsp] ; ctx
lea rdi, [rdi + 8] ; &ctx.text
lea rsi, [rel OPERAND_RAX] ; rax
lea rdx, [rel OPERAND_RDX] ; rdx
call codegen_move_dst_src
.after_rem:
mov rax, [rsp + 8] ; &function_ctx
mov ax, word [rax + 48] ; register_bitset
bt ax, 3 ; is rdx used?
jnc .after_unspill_rdx
; mov rdx, scratch
mov rdi, [rsp] ; ctx
lea rdi, [rdi + 8] ; &ctx.text
lea rsi, [rel OPERAND_RDX] ; rdx
lea rdx, [rsp + 64] ; scratch value
call codegen_move_dst_src
.after_unspill_rdx:
; free [scratch, rhs, lhs]
mov rdi, [rsp + 8] ; &function_ctx
lea rsi, [rsp + 64] ; scratch value
call codegen_free_operand
mov rdi, [rsp + 8] ; &function_ctx
lea rsi, [rsp + 48] ; right operand
call codegen_free_operand
mov rdi, [rsp + 8] ; &function_ctx
lea rsi, [rsp + 32] ; left operand
call codegen_free_operand
; alloca dst
mov rdi, [rsp + 8] ; &function_ctx
mov rsi, 8 ; width
call codegen_allocate_value
mov [rsp + 32], rax
mov [rsp + 40], rdx
; mov dst, rax
mov rdi, [rsp] ; ctx
lea rdi, [rdi + 8] ; &ctx.text
lea rsi, [rsp + 32] ; dst
lea rdx, [rel OPERAND_RAX] ; rax
call codegen_move_dst_src
; return dst
mov rax, [rsp + 32]
mov rdx, [rsp + 40]
jmp .done
.gen_op:
mov rdi, [rsp + 8] ; &function_ctx
mov rsi, [rsp] ; ctx
lea rsi, [rsi + 8] ; &ctx.text
lea rdx, [rsp + 32] ; left operand
lea rcx, [rsp + 48] ; right operand
mov r8, rbx ; operation
call codegen_binary_op_rm64_rm64
jmp .done
.var_decl:
.var_ref:
.binary_op:
.assignment:
.place_to_value:
.value_to_place:
@ -873,7 +1295,7 @@ codegen_expr:
; TODO
.done:
add rsp, 48
add rsp, 80
pop r14
pop r15
pop rbx
@ -1134,6 +1556,218 @@ codegen_move_rbp_slot_src:
pop rbp
ret
;; rdi: *function_ctx
;; rsi: *text
;; rdx: lhs: *Operand
;; rcx: rhs: *Operand
;; r8: op: [u8; 8]
;; Generates: {op} {lhs}, {rhs} for a binary operation that has the encodings rN, rmN and rmN, rN
codegen_binary_op_rm64_rm64:
push rbp
mov rbp, rsp
push rbx
; dst [32..48]
sub rsp, 48
mov [rsp], rdi ; *function_ctx
mov [rsp + 8], rsi ; *text
mov [rsp + 16], rdx ; lhs
mov [rsp + 24], rcx ; rhs
mov [rsp + 32], r8 ; op
; if lhs.kind == REGISTER || lhs.kind < ADDRESS && rhs.kind == REGISTER {
cmp byte [rdx + 0], OPERAND_REGISTER
je .simple
cmp byte [rdx + 0], OPERAND_ADDRESS
setb al
cmp byte [rcx + 0], OPERAND_REGISTER
sete bl
test al, bl
jne .simple
jmp .complex
.simple:
; op lhs, rhs
lea rdi, [rsp + 32] ; op
call strlen
mov rdi, [rsp + 8] ; *text
lea rsi, [rsp + 32] ; op
mov rdx, rax ; op length
call vec_extend
mov rdi, [rsp + 8] ; *text
mov rsi, [rsp + 16] ; lhs
call codegen_write_operand
mov rdi, [rsp + 8] ; *text
lea rsi, [rel COMMA_RAX]
mov rdx, 2
call vec_extend
mov rdi, [rsp + 8] ; *text
mov rsi, [rsp + 24] ; rhs
call codegen_write_operand
mov byte [rsp + 32], 10 ; newline
mov rdi, [rsp + 8] ; *text
lea rsi, [rsp + 32]
call vec_push
; free rhs
mov rdi, [rsp] ; *function_ctx
mov rsi, [rsp + 24] ; rhs
call codegen_free_operand
; ret lhs
mov rbx, [rsp + 16] ; lhs
mov rax, [rbx]
mov rdx, [rbx + 8]
jmp .epilogue
; } else {
.complex:
; if lhs.kind < ADDRESS {
cmp byte [rdx + 0], OPERAND_ADDRESS
jae .check_rhs
; mov rax, rhs
mov rdi, [rsp + 8] ; *text
lea rsi, [rel OPERAND_RAX]
mov rdx, [rsp + 24] ; rhs
call codegen_move_dst_src
; op lhs, rax
mov rdi, [rsp + 8] ; *text
mov rsi, [rsp + 16] ; lhs
lea rdx, [rel OPERAND_RAX] ; rax
mov rcx, [rsp + 32] ; op
call codegen_binary_op_unchecked
; free rhs
mov rdi, [rsp] ; *function_ctx
mov rsi, [rsp + 24] ; rhs
call codegen_free_operand
; ret lhs
mov rbx, [rsp + 16] ; lhs
mov rax, [rbx]
mov rdx, [rbx + 8]
jmp .epilogue
.check_rhs:
; } else if rhs.kind < ADDRESS {
cmp byte [rcx + 0], OPERAND_ADDRESS
jae .allocate_dst
; mov rax, lhs
mov rdi, [rsp + 8] ; *text
lea rsi, [rel OPERAND_RAX]
mov rdx, [rsp + 16] ; lhs
call codegen_move_dst_src
; op rax, rhs
mov rdi, [rsp + 8] ; *text
lea rsi, [rel OPERAND_RAX] ; rax
mov rdx, [rsp + 24] ; rhs
mov rcx, [rsp + 32] ; op
call codegen_binary_op_unchecked
; mov rhs, rax
mov rdi, [rsp + 8] ; *text
mov rsi, [rsp + 24] ; rhs
lea rdx, [rel OPERAND_RAX]
call codegen_move_dst_src
; free lhs
mov rdi, [rsp] ; *function_ctx
mov rsi, [rsp + 16] ; rhs
call codegen_free_operand
; ret rhs
mov rbx, [rsp + 24] ; rhs
mov rax, [rbx]
mov rdx, [rbx + 8]
jmp .epilogue
; } else {
.allocate_dst:
; dst = allocate_value
mov rdi, [rsp] ; *function_ctx
mov rsi, 8 ; width = 8
call codegen_allocate_value
mov [rsp + 32], rax ; dst
mov [rsp + 40], rdx
; mov dst, lhs
mov rdi, [rsp + 8] ; *text
lea rsi, [rsp + 32] ; dst
mov rdx, [rsp + 16] ; lhs
call codegen_move_dst_src
; mov rax, rhs
mov rdi, [rsp + 8] ; *text
lea rsi, [rel OPERAND_RAX] ; rax
mov rdx, [rsp + 24] ; rhs
call codegen_move_dst_src
; op dst, rax
mov rdi, [rsp + 8] ; *text
lea rsi, [rsp + 32] ; dst
lea rdx, [rel OPERAND_RAX] ; rax
mov rcx, [rsp + 32] ; op
call codegen_binary_op_unchecked
; ret dst
mov rax, [rsp + 32] ; dst
mov rdx, [rsp + 40]
; }
.epilogue:
add rsp, 48
pop rbx
pop rbp
ret
;; rdi: *text
;; rsi: lhs: *Operand
;; rdx: rhs: *Operand
;; rcx: op: [u8; 8]
codegen_binary_op_unchecked:
push rbp
mov rbp, rsp
sub rsp, 32
mov [rsp], rdi ; *text
mov [rsp + 8], rsi ; lhs
mov [rsp + 16], rdx ; rhs
mov [rsp + 24], rcx ; op
; op lhs, rax
lea rdi, [rsp + 24] ; op
call strlen
mov rdi, [rsp] ; *text
lea rsi, [rsp + 24] ; op
mov rdx, rax ; op length
call vec_extend
mov rdi, [rsp] ; *text
mov rsi, [rsp + 8] ; lhs
call codegen_write_operand
mov rdi, [rsp] ; *text
lea rsi, [rel COMMA_RAX]
mov rdx, 2
call vec_extend
mov rdi, [rsp] ; *text
mov rsi, [rsp + 16] ; rhs
call codegen_write_operand
mov byte [rsp + 31], 10 ; newline
mov rdi, [rsp + 8] ; *text
lea rsi, [rsp + 31]
call vec_push
add rsp, 32
pop rbp
ret
;; rdi: *text
;; rsi: dst: *Operand
;; rdx: src: *Operand
@ -1166,52 +1800,28 @@ codegen_move_dst_src:
je .panic ; address can only be moved to full-sized destinations
cmp byte [rsi + 0], OPERAND_REGISTER
je .do_move
je .do_move ; if dst == register, do move
; If dst != register and src != register, we cannot move directly into memory:
; there is no MOV m64, m64 or MOV m64, imm64 instruction.
; A smarter compiler could test for the immediate size and move most
; immediates directly into memory, but we are quite stupid!
cmp byte [rdx + 0], OPERAND_REGISTER
jne .xchg_rax ; if dst != register and src != register, xchg via rax
jne .xchg_rax
jmp .do_move
.xchg_rax:
; xchg rax, [src]
; mov rax, [src]
; mov [dst], rax
; xchg rax, [src]
mov rdi, [rsp] ; *text
lea rsi, [rel XCHG_RAX]
mov rdx, XCHG_RAX_LEN
call vec_extend
lea rsi, [rel OPERAND_RAX]
mov rdx, [rsp + 16] ; src
call codegen_move_dst_src
mov rdi, [rsp] ; *text
mov rsi, [rsp + 16] ; src
call codegen_write_operand
mov rdi, [rsp] ; *text
lea rsi, [rel COMMA_RAX]
mov rdx, COMMA_RAX_LEN
call vec_extend
mov rdi, [rsp] ; *text
lea rsi, [rel MOV_RAX_COMMA]
mov rdx, 4
call vec_extend
mov rdi, [rsp] ; *text
mov rsi, [rsp + 8] ; dst
call codegen_write_operand
mov rdi, [rsp] ; *text
lea rsi, [rel COMMA_RAX]
mov rdx, COMMA_RAX_LEN
call vec_extend
mov rdi, [rsp] ; *text
lea rsi, [rel XCHG_RAX]
mov rdx, XCHG_RAX_LEN
call vec_extend
mov rdi, [rsp] ; *text
mov rsi, [rsp + 16] ; src
call codegen_write_operand
mov rsi, [rsp + 8] ; src
lea rdx, [rel OPERAND_RAX]
call codegen_move_dst_src
jmp .epilogue
.do_move:
@ -1233,12 +1843,12 @@ codegen_move_dst_src:
mov rsi, [rsp + 16] ; src
call codegen_write_operand
.epilogue:
mov rdi, [rsp] ; *text
lea rsi, [rel COLON_NL]
inc rsi
mov rdx, 1
call vec_extend
call vec_push
.epilogue:
add rsp, 24
pop rbx
@ -1254,7 +1864,15 @@ section .rdata
MOV_RAX_COMMA_LEN equ $ - MOV_RAX_COMMA
COMMA_RAX db ", rax"
COMMA_RAX_LEN equ $ - COMMA_RAX
XOR_RDX_RDX db "xor rdx, rdx", 10
XOR_RDX_RDX_LEN equ $ - XOR_RDX_RDX
ADD_ dq "add "
SUB_ dq "sub "
MUL_ dq "mul "
DIV_ dq "div "
; Operand { kind: REGISTER, register: 0, width: 8, len: 0, padding: 0, value: 0 }
align 8
OPERAND_RAX dq 0x0000_8001, 0
OPERAND_RDX dq 0x0000_8301, 0

View file

@ -24,6 +24,7 @@ global tokeniser_print
global find_lexeme
global expect_token
global unwrap_token
global skip_token
global peek_expect_token
global peek_lexeme
@ -590,6 +591,17 @@ peek_lexeme:
pop rbp
ret
;; Skips one token ahead, without returning it.
skip_token:
push rbp
mov rbp, rsp
sub rsp, 24
lea rdi, [rsp]
call find_lexeme
add rsp, 24
pop rbp
tokeniser_get_cursor:
mov rax, [rel cursor]
ret

View file

@ -16,6 +16,7 @@ LEXEMES:
dq LEX_ARROW
dq LEX_I32
dq LEX_U32
dq LEX_EQEQ
dq LEX_EQUALS
dq LEX_PLUS
dq LEX_MINUS
@ -23,12 +24,14 @@ LEXEMES:
dq LEX_LPARENS
dq LEX_RBRACE
dq LEX_LBRACE
dq LEX_COLON2
dq LEX_COLON
dq LEX_SEMI
dq LEX_COMMA
dq LEX_PIPE2
dq LEX_PIPE
dq LEX_AMP2
dq LEX_AMP
dq LEX_EQEQ
dq LEX_LBRACKET
dq LEX_RBRACKET
dq LEX_VOID
@ -44,6 +47,21 @@ LEXEMES:
dq LEX_ISIZE
dq LEX_F32
dq LEX_F64
dq LEX_PERCENT
dq LEX_CARET
dq LEX_BANGEQ
dq LEX_BANG
dq LEX_TILDE
dq LEX_LEQ
dq LEX_GEQ
dq LEX_LESSLESS
dq LEX_GTGT
dq LEX_LT
dq LEX_GT
dq LEX_DOT3
dq LEX_DOT2
dq LEX_DOT
dq LEX_BACKTICK
align 8
TOKENS:
@ -62,6 +80,7 @@ TOKENS:
db TOKEN_ARROW ;; 12
db TOKEN_I32 ;; 13
db TOKEN_U32 ;; 14
db TOKEN_EQEQ ;; 15
db TOKEN_EQUALS ;; 15
db TOKEN_PLUS ;; 16
db TOKEN_MINUS ;; 17
@ -69,12 +88,14 @@ TOKENS:
db TOKEN_LPARENS ;; 19
db TOKEN_RBRACE ;; 20
db TOKEN_LBRACE ;; 21
db TOKEN_COLON2 ;; 22
db TOKEN_COLON ;; 22
db TOKEN_SEMI ;; 23
db TOKEN_COMMA ;; 24
db TOKEN_PIPE2 ;; 25
db TOKEN_PIPE ;; 25
db TOKEN_AMP2 ;; 26
db TOKEN_AMP ;; 26
db TOKEN_EQEQ ;; 27
db TOKEN_LBRACKET ;; 28
db TOKEN_RBRACKET ;; 29
db TOKEN_VOID ;; 30
@ -90,6 +111,21 @@ TOKENS:
db TOKEN_ISIZE ;; 40
db TOKEN_F32 ;; 41
db TOKEN_F64 ;; 42
db TOKEN_PERCENT ;; 43
db TOKEN_CARET ;; 44
db TOKEN_BANGEQ ;; 45
db TOKEN_BANG ;; 46
db TOKEN_TILDE ;; 47
db TOKEN_LEQ ;; 48
db TOKEN_GEQ ;; 49
db TOKEN_LESSLESS ;; 50
db TOKEN_GTGT ;; 51
db TOKEN_LT ;; 50
db TOKEN_GT ;; 51
db TOKEN_DOT3 ;; 54
db TOKEN_DOT2 ;; 53
db TOKEN_DOT ;; 52
db TOKEN_BACKTICK ;; 55
align 8
LEXEME_LENS:
@ -108,6 +144,7 @@ LEXEME_LENS:
dq LEX_ARROW_len
dq LEX_I32_len
dq LEX_U32_len
dq LEX_EQEQ_len
dq LEX_EQUALS_len
dq LEX_PLUS_len
dq LEX_MINUS_len
@ -115,12 +152,14 @@ LEXEME_LENS:
dq LEX_LPARENS_len
dq LEX_RBRACE_len
dq LEX_LBRACE_len
dq LEX_COLON2_len
dq LEX_COLON_len
dq LEX_SEMI_len
dq LEX_COMMA_len
dq LEX_PIPE2_len
dq LEX_PIPE_len
dq LEX_AMP2_len
dq LEX_AMP_len
dq LEX_EQEQ_len
dq LEX_LBRACKET_len
dq LEX_RBRACKET_len
dq LEX_VOID_len
@ -136,9 +175,24 @@ LEXEME_LENS:
dq LEX_ISIZE_len
dq LEX_F32_len
dq LEX_F64_len
dq LEX_PERCENT_len
dq LEX_CARET_len
dq LEX_BANGEQ_len
dq LEX_BANG_len
dq LEX_TILDE_len
dq LEX_LEQ_len
dq LEX_GEQ_len
dq LEX_LESSLESS_len
dq LEX_GTGT_len
dq LEX_LT_len
dq LEX_GT_len
dq LEX_DOT3_len
dq LEX_DOT2_len
dq LEX_DOT_len
dq LEX_BACKTICK_len
align 8
NUM_LEXEMES: dq 43
NUM_LEXEMES: dq 61
LEX_NOT_A_LEXEME db "<not a lexeme>", 0
LEX_LET db "let"
@ -169,6 +223,8 @@ NUM_LEXEMES: dq 43
LEX_I32_len equ $ - LEX_I32
LEX_U32 db "u32"
LEX_U32_len equ $ - LEX_U32
LEX_EQEQ db "=="
LEX_EQEQ_len equ $ - LEX_EQEQ
LEX_EQUALS db "="
LEX_EQUALS_len equ $ - LEX_EQUALS
LEX_PLUS db "+"
@ -183,18 +239,22 @@ NUM_LEXEMES: dq 43
LEX_RBRACE_len equ $ - LEX_RBRACE
LEX_LBRACE db "{"
LEX_LBRACE_len equ $ - LEX_LBRACE
LEX_COLON2 db "::"
LEX_COLON2_len equ $ - LEX_COLON2
LEX_COLON db ":"
LEX_COLON_len equ $ - LEX_COLON
LEX_SEMI db ";"
LEX_SEMI_len equ $ - LEX_SEMI
LEX_COMMA db ","
LEX_COMMA_len equ $ - LEX_COMMA
LEX_PIPE2 db "||"
LEX_PIPE2_len equ $ - LEX_PIPE2
LEX_PIPE db "|"
LEX_PIPE_len equ $ - LEX_PIPE
LEX_AMP2 db "&&"
LEX_AMP2_len equ $ - LEX_AMP2
LEX_AMP db "&"
LEX_AMP_len equ $ - LEX_AMP
LEX_EQEQ db "=="
LEX_EQEQ_len equ $ - LEX_EQEQ
LEX_LBRACKET db "["
LEX_LBRACKET_len equ $ - LEX_LBRACKET
LEX_RBRACKET db "]"
@ -225,6 +285,36 @@ NUM_LEXEMES: dq 43
LEX_F32_len equ $ - LEX_F32
LEX_F64 db "f64"
LEX_F64_len equ $ - LEX_F64
LEX_PERCENT db "%"
LEX_PERCENT_len equ $ - LEX_PERCENT
LEX_CARET db "^"
LEX_CARET_len equ $ - LEX_CARET
LEX_BANGEQ db "!="
LEX_BANGEQ_len equ $ - LEX_BANGEQ
LEX_BANG db "!"
LEX_BANG_len equ $ - LEX_BANG
LEX_TILDE db "~"
LEX_TILDE_len equ $ - LEX_TILDE
LEX_LEQ db "<="
LEX_LEQ_len equ $ - LEX_LEQ
LEX_GEQ db ">="
LEX_GEQ_len equ $ - LEX_GEQ
LEX_LESSLESS db "<<"
LEX_LESSLESS_len equ $ - LEX_LESSLESS
LEX_GTGT db ">>"
LEX_GTGT_len equ $ - LEX_GTGT
LEX_LT db "<"
LEX_LT_len equ $ - LEX_LT
LEX_GT db ">"
LEX_GT_len equ $ - LEX_GT
LEX_DOT3 db "..."
LEX_DOT3_len equ $ - LEX_DOT3
LEX_DOT2 db ".."
LEX_DOT2_len equ $ - LEX_DOT2
LEX_DOT db "."
LEX_DOT_len equ $ - LEX_DOT
LEX_BACKTICK db "`"
LEX_BACKTICK_len equ $ - LEX_BACKTICK
LEX_IDENT db "<identifier>"
LEX_IDENT_len equ $ - LEX_IDENT
LEX_NUMBER db "<number>"
@ -250,36 +340,54 @@ NUM_LEXEMES: dq 43
TOKEN_ARROW equ 12 ; :u8
TOKEN_I32 equ 13 ; :u8
TOKEN_U32 equ 14 ; :u8
TOKEN_EQUALS equ 15 ; :u8
TOKEN_PLUS equ 16 ; :u8
TOKEN_MINUS equ 17 ; :u8
TOKEN_RPARENS equ 18 ; :u8
TOKEN_LPARENS equ 19 ; :u8
TOKEN_RBRACE equ 20 ; :u8
TOKEN_LBRACE equ 21 ; :u8
TOKEN_COLON equ 22 ; :u8
TOKEN_SEMI equ 23 ; :u8
TOKEN_COMMA equ 24 ; :u8
TOKEN_PIPE equ 25 ; :u8
TOKEN_AMP equ 26 ; :u8
TOKEN_EQEQ equ 27 ; :u8
TOKEN_LBRACKET equ 28 ; :u8
TOKEN_RBRACKET equ 29 ; :u8
TOKEN_VOID equ 30 ; :u8
TOKEN_SLASH equ 31 ; :u8
TOKEN_STAR equ 32 ; :u8
TOKEN_U8 equ 33 ; :u8
TOKEN_I8 equ 34 ; :u8
TOKEN_U16 equ 35 ; :u8
TOKEN_I16 equ 36 ; :u8
TOKEN_U64 equ 37 ; :u8
TOKEN_I64 equ 38 ; :u8
TOKEN_USIZE equ 39 ; :u8
TOKEN_ISIZE equ 40 ; :u8
TOKEN_F32 equ 41 ; :u8
TOKEN_F64 equ 42 ; :u8
TOKEN_IDENT equ 43 ; :u8
TOKEN_NUMBER equ 44 ; :u8
TOKEN_STRING equ 45 ; :u8
TOKEN_COMMENT equ 46 ; :u8
TOKEN_EQEQ equ 15 ; :u8
TOKEN_EQUALS equ 16 ; :u8
TOKEN_PLUS equ 17 ; :u8
TOKEN_MINUS equ 18 ; :u8
TOKEN_RPARENS equ 19 ; :u8
TOKEN_LPARENS equ 20 ; :u8
TOKEN_RBRACE equ 21 ; :u8
TOKEN_LBRACE equ 22 ; :u8
TOKEN_COLON equ 23 ; :u8
TOKEN_COLON2 equ 24 ; :u8
TOKEN_SEMI equ 25 ; :u8
TOKEN_COMMA equ 26 ; :u8
TOKEN_PIPE equ 27 ; :u8
TOKEN_PIPE2 equ 28 ; :u8
TOKEN_AMP equ 29 ; :u8
TOKEN_AMP2 equ 30 ; :u8
TOKEN_LBRACKET equ 31 ; :u8
TOKEN_RBRACKET equ 32 ; :u8
TOKEN_VOID equ 33 ; :u8
TOKEN_SLASH equ 34 ; :u8
TOKEN_STAR equ 35 ; :u8
TOKEN_U8 equ 36 ; :u8
TOKEN_I8 equ 37 ; :u8
TOKEN_U16 equ 38 ; :u8
TOKEN_I16 equ 39 ; :u8
TOKEN_U64 equ 40 ; :u8
TOKEN_I64 equ 41 ; :u8
TOKEN_USIZE equ 42 ; :u8
TOKEN_ISIZE equ 43 ; :u8
TOKEN_F32 equ 44 ; :u8
TOKEN_F64 equ 45 ; :u8
TOKEN_PERCENT equ 46 ; :u8
TOKEN_CARET equ 47 ; :u8
TOKEN_BANGEQ equ 48 ; :u8
TOKEN_BANG equ 49 ; :u8
TOKEN_TILDE equ 50 ; :u8
TOKEN_LEQ equ 51 ; :u8
TOKEN_GEQ equ 52 ; :u8
TOKEN_LESSLESS equ 53 ; :u8
TOKEN_GTGT equ 54 ; :u8
TOKEN_LT equ 55 ; :u8
TOKEN_GT equ 56 ; :u8
TOKEN_DOT3 equ 57 ; :u8
TOKEN_DOT2 equ 58 ; :u8
TOKEN_DOT equ 59 ; :u8
TOKEN_BACKTICK equ 60 ; :u8
TOKEN_IDENT equ 61 ; :u8
TOKEN_NUMBER equ 62 ; :u8
TOKEN_STRING equ 63 ; :u8
TOKEN_COMMENT equ 64 ; :u8
;; end-consts

View file

@ -60,6 +60,11 @@ fn main() {
};
}
print_ast(
b"fn main() -> void { return 1 * 2 + 3 * 4; }",
|ast| unsafe { parse_func(ast) },
);
print_ast(b"3 + 4", |ast| unsafe { parse_expr(ast) });
print_ast(b"fn main() -> void { return 1 + 2; }", |ast| unsafe {
parse_func(ast)

View file

@ -93,7 +93,7 @@ fn main() {
print_ast(
b"fn main(a: u32) -> void {
return 4;
return 2 * 3 + 4 * 5;
}",
|ast| unsafe { parse_func(ast) },
);

View file

@ -20,7 +20,11 @@ unsafe extern "C" {
pub unsafe fn get_register_name(reg_idx: u8, width: u8, buffer: *mut u8) -> FFISlice;
pub unsafe fn stackvar_cmp(a: *const (u64, u64), b: *const (u64, u64)) -> i32;
pub unsafe fn codegen_allocate_register(ctx: *mut FunctionCtx) -> u8;
pub unsafe fn codegen_allocate_place(ctx: *mut FunctionCtx, width: u16) -> Operand;
pub unsafe fn codegen_allocate_value(ctx: *mut FunctionCtx, width: u16) -> Operand;
pub unsafe fn codegen_free_operand(ctx: *mut FunctionCtx, operand: *const Operand) -> ();
pub unsafe fn codegen_function(ast: *const CodegenCtx, func_idx: u64) -> ();
pub unsafe fn codegen_push_pop_used_registers(text: *mut Vec<u8>, function_ctx: &FunctionCtx, pop: bool) -> u8;
pub unsafe fn codegen_expr(ctx: *const CodegenCtx, function_ctx: &FunctionCtx, expr_idx: u64) -> (u64, bool);
pub unsafe fn vec_insert_many(vec: *mut BlobVec, index: usize, data: *const u8, count: usize);
pub unsafe fn vec_extend(vec: *mut BlobVec, elements: *const u8, count: usize) -> ();
@ -75,38 +79,56 @@ pub const TOKEN_BOOL: u8 = 11;
pub const TOKEN_ARROW: u8 = 12;
pub const TOKEN_I32: u8 = 13;
pub const TOKEN_U32: u8 = 14;
pub const TOKEN_EQUALS: u8 = 15;
pub const TOKEN_PLUS: u8 = 16;
pub const TOKEN_MINUS: u8 = 17;
pub const TOKEN_RPARENS: u8 = 18;
pub const TOKEN_LPARENS: u8 = 19;
pub const TOKEN_RBRACE: u8 = 20;
pub const TOKEN_LBRACE: u8 = 21;
pub const TOKEN_COLON: u8 = 22;
pub const TOKEN_SEMI: u8 = 23;
pub const TOKEN_COMMA: u8 = 24;
pub const TOKEN_PIPE: u8 = 25;
pub const TOKEN_AMP: u8 = 26;
pub const TOKEN_EQEQ: u8 = 27;
pub const TOKEN_LBRACKET: u8 = 28;
pub const TOKEN_RBRACKET: u8 = 29;
pub const TOKEN_VOID: u8 = 30;
pub const TOKEN_SLASH: u8 = 31;
pub const TOKEN_STAR: u8 = 32;
pub const TOKEN_U8: u8 = 33;
pub const TOKEN_I8: u8 = 34;
pub const TOKEN_U16: u8 = 35;
pub const TOKEN_I16: u8 = 36;
pub const TOKEN_U64: u8 = 37;
pub const TOKEN_I64: u8 = 38;
pub const TOKEN_USIZE: u8 = 39;
pub const TOKEN_ISIZE: u8 = 40;
pub const TOKEN_F32: u8 = 41;
pub const TOKEN_F64: u8 = 42;
pub const TOKEN_IDENT: u8 = 43;
pub const TOKEN_NUMBER: u8 = 44;
pub const TOKEN_STRING: u8 = 45;
pub const TOKEN_COMMENT: u8 = 46;
pub const TOKEN_EQEQ: u8 = 15;
pub const TOKEN_EQUALS: u8 = 16;
pub const TOKEN_PLUS: u8 = 17;
pub const TOKEN_MINUS: u8 = 18;
pub const TOKEN_RPARENS: u8 = 19;
pub const TOKEN_LPARENS: u8 = 20;
pub const TOKEN_RBRACE: u8 = 21;
pub const TOKEN_LBRACE: u8 = 22;
pub const TOKEN_COLON: u8 = 23;
pub const TOKEN_COLON2: u8 = 24;
pub const TOKEN_SEMI: u8 = 25;
pub const TOKEN_COMMA: u8 = 26;
pub const TOKEN_PIPE: u8 = 27;
pub const TOKEN_PIPE2: u8 = 28;
pub const TOKEN_AMP: u8 = 29;
pub const TOKEN_AMP2: u8 = 30;
pub const TOKEN_LBRACKET: u8 = 31;
pub const TOKEN_RBRACKET: u8 = 32;
pub const TOKEN_VOID: u8 = 33;
pub const TOKEN_SLASH: u8 = 34;
pub const TOKEN_STAR: u8 = 35;
pub const TOKEN_U8: u8 = 36;
pub const TOKEN_I8: u8 = 37;
pub const TOKEN_U16: u8 = 38;
pub const TOKEN_I16: u8 = 39;
pub const TOKEN_U64: u8 = 40;
pub const TOKEN_I64: u8 = 41;
pub const TOKEN_USIZE: u8 = 42;
pub const TOKEN_ISIZE: u8 = 43;
pub const TOKEN_F32: u8 = 44;
pub const TOKEN_F64: u8 = 45;
pub const TOKEN_PERCENT: u8 = 46;
pub const TOKEN_CARET: u8 = 47;
pub const TOKEN_BANGEQ: u8 = 48;
pub const TOKEN_BANG: u8 = 49;
pub const TOKEN_TILDE: u8 = 50;
pub const TOKEN_LEQ: u8 = 51;
pub const TOKEN_GEQ: u8 = 52;
pub const TOKEN_LESSLESS: u8 = 53;
pub const TOKEN_GTGT: u8 = 54;
pub const TOKEN_LT: u8 = 55;
pub const TOKEN_GT: u8 = 56;
pub const TOKEN_DOT3: u8 = 57;
pub const TOKEN_DOT2: u8 = 58;
pub const TOKEN_DOT: u8 = 59;
pub const TOKEN_BACKTICK: u8 = 60;
pub const TOKEN_IDENT: u8 = 61;
pub const TOKEN_NUMBER: u8 = 62;
pub const TOKEN_STRING: u8 = 63;
pub const TOKEN_COMMENT: u8 = 64;
#[repr(C)]
#[derive(Debug)]
@ -214,7 +236,8 @@ pub struct CodegenCtx {
#[repr(C)]
#[derive(Debug)]
pub struct FunctionCtx {
pub current_stack_size: u64,
pub current_stack_size: u32,
pub max_stack_size: u32,
pub stack_vars: Vec<(u64, u64)>,
pub register_bitset: u16,
pub dirtied_register_bitset: u16,

View file

@ -8,9 +8,10 @@ struct Lexeme(u8, &'static str);
impl PartialEq for Lexeme {
fn eq(&self, other: &Self) -> bool {
use util::defs::{TOKEN_IDENT, TOKEN_NUMBER};
match self.0 {
// Identifiers and numbers compare both token and lexeme
30 | 31 => self.0 == other.0 && self.1 == other.1,
TOKEN_IDENT | TOKEN_NUMBER => self.0 == other.0 && self.1 == other.1,
_ => self.0 == other.0,
}
}
@ -115,17 +116,17 @@ fn main() {
assert_eq!(
&collect_tokens()[..],
&[
Lexeme(19, ""),
Lexeme(18, ""),
Lexeme(28, ""),
Lexeme(29, ""),
Lexeme(21, ""),
Lexeme(20, ""),
Lexeme(24, ""),
Lexeme(12, ""),
Lexeme(23, ""),
Lexeme(22, ""),
Lexeme(15, ""),
Lexeme(TOKEN_LPARENS, ""),
Lexeme(TOKEN_RPARENS, ""),
Lexeme(TOKEN_LBRACKET, ""),
Lexeme(TOKEN_RBRACKET, ""),
Lexeme(TOKEN_LBRACE, ""),
Lexeme(TOKEN_RBRACE, ""),
Lexeme(TOKEN_COMMA, ""),
Lexeme(TOKEN_ARROW, ""),
Lexeme(TOKEN_SEMI, ""),
Lexeme(TOKEN_COLON, ""),
Lexeme(TOKEN_EQUALS, ""),
][..]
);
@ -143,7 +144,7 @@ fn main() {
Lexeme(TOKEN_IDENT, "number12345"),
Lexeme(TOKEN_IDENT, "____"),
Lexeme(TOKEN_IDENT, "_"),
Lexeme(17, ""),
Lexeme(TOKEN_MINUS, ""),
Lexeme(TOKEN_IDENT, "leading-minus"),
Lexeme(TOKEN_IDENT, "trailing-minus-"),
]
@ -156,17 +157,17 @@ fn main() {
assert_eq!(
&collect_tokens()[..],
&[
Lexeme(4, ""),
Lexeme(TOKEN_FN, ""),
Lexeme(TOKEN_IDENT, "my-function"),
Lexeme(19, ""),
Lexeme(18, ""),
Lexeme(12, ""),
Lexeme(11, ""),
Lexeme(21, ""),
Lexeme(5, ""),
Lexeme(10, ""),
Lexeme(23, ""),
Lexeme(20, ""),
Lexeme(TOKEN_LPARENS, ""),
Lexeme(TOKEN_RPARENS, ""),
Lexeme(TOKEN_ARROW, ""),
Lexeme(TOKEN_BOOL, ""),
Lexeme(TOKEN_LBRACE, ""),
Lexeme(TOKEN_RETURN, ""),
Lexeme(TOKEN_FALSE, ""),
Lexeme(TOKEN_SEMI, ""),
Lexeme(TOKEN_RBRACE, ""),
]
);
@ -174,8 +175,8 @@ fn main() {
tokeniser_init(c"tests/tokens/function.l".as_ptr());
eprintln!("ok.");
assert_eq!(expect_token(2).into_option(), None);
assert_eq!(expect_token(4).into_option().unwrap().as_str(), "fn");
assert_eq!(expect_token(TOKEN_IF).into_option(), None);
assert_eq!(expect_token(TOKEN_FN).into_option().unwrap().as_str(), "fn");
assert_eq!(unwrap_token(TOKEN_IDENT).as_str(), "my-function");
eprint!("Initializing tokeniser.. ");
@ -186,18 +187,18 @@ fn main() {
&collect_tokens()[..],
&[
Lexeme(TOKEN_COMMENT, ""),
Lexeme(4, ""),
Lexeme(TOKEN_FN, ""),
Lexeme(TOKEN_IDENT, "my-function"),
Lexeme(19, ""),
Lexeme(18, ""),
Lexeme(12, ""),
Lexeme(11, ""),
Lexeme(21, ""),
Lexeme(TOKEN_LPARENS, ""),
Lexeme(TOKEN_RPARENS, ""),
Lexeme(TOKEN_ARROW, ""),
Lexeme(TOKEN_BOOL, ""),
Lexeme(TOKEN_LBRACE, ""),
Lexeme(TOKEN_COMMENT, ""),
Lexeme(5, ""),
Lexeme(10, ""),
Lexeme(23, ""),
Lexeme(20, ""),
Lexeme(TOKEN_RETURN, ""),
Lexeme(TOKEN_FALSE, ""),
Lexeme(TOKEN_SEMI, ""),
Lexeme(TOKEN_RBRACE, ""),
]
);
@ -243,7 +244,7 @@ fn main() {
&collect_tokens()[..],
&[
Lexeme(TOKEN_NUMBER, "3"),
Lexeme(16, "+"),
Lexeme(TOKEN_PLUS, "+"),
Lexeme(TOKEN_NUMBER, "4")
],
);
@ -256,19 +257,19 @@ fn main() {
assert_eq!(
&collect_tokens()[..],
&[
Lexeme(4, "fn"),
Lexeme(TOKEN_FN, "fn"),
Lexeme(TOKEN_IDENT, "main"),
Lexeme(19, "("),
Lexeme(18, ")"),
Lexeme(12, "->"),
Lexeme(30, "void"),
Lexeme(21, "{"),
Lexeme(5, "return"),
Lexeme(TOKEN_LPARENS, "("),
Lexeme(TOKEN_RPARENS, ")"),
Lexeme(TOKEN_ARROW, "->"),
Lexeme(TOKEN_VOID, "void"),
Lexeme(TOKEN_LBRACE, "{"),
Lexeme(TOKEN_RETURN, "return"),
Lexeme(TOKEN_NUMBER, "1"),
Lexeme(16, "+"),
Lexeme(TOKEN_PLUS, "+"),
Lexeme(TOKEN_NUMBER, "2"),
Lexeme(23, ";"),
Lexeme(20, "}"),
Lexeme(TOKEN_SEMI, ";"),
Lexeme(TOKEN_RBRACE, "}"),
],
);
@ -280,16 +281,42 @@ fn main() {
assert_eq!(
&collect_tokens()[..],
&[
Lexeme(19, "("),
Lexeme(33, "b"),
Lexeme(31, "/"),
Lexeme(33, "d"),
Lexeme(16, "+"),
Lexeme(33, "c"),
Lexeme(18, ")"),
Lexeme(32, "*"),
Lexeme(34, "42"),
Lexeme(23, ";")
Lexeme(TOKEN_LPARENS, "("),
Lexeme(TOKEN_IDENT, "b"),
Lexeme(TOKEN_SLASH, "/"),
Lexeme(TOKEN_IDENT, "d"),
Lexeme(TOKEN_PLUS, "+"),
Lexeme(TOKEN_IDENT, "c"),
Lexeme(TOKEN_RPARENS, ")"),
Lexeme(TOKEN_STAR, "*"),
Lexeme(TOKEN_NUMBER, "42"),
Lexeme(TOKEN_SEMI, ";")
],
);
eprint!("Initializing tokeniser.. ");
let src = b"<<<=<a == b = c ||| &||&&|&";
tokeniser_init_buf(src.as_ptr(), src.len());
eprintln!("ok.");
assert_eq!(
&collect_tokens()[..],
&[
Lexeme(TOKEN_LESSLESS, ""),
Lexeme(TOKEN_LEQ, ""),
Lexeme(TOKEN_LT, ""),
Lexeme(TOKEN_IDENT, "a"),
Lexeme(TOKEN_EQEQ, ""),
Lexeme(TOKEN_IDENT, "b"),
Lexeme(TOKEN_EQUALS, ""),
Lexeme(TOKEN_IDENT, "c"),
Lexeme(TOKEN_PIPE2, ""),
Lexeme(TOKEN_PIPE, ""),
Lexeme(TOKEN_AMP, ""),
Lexeme(TOKEN_PIPE2, ""),
Lexeme(TOKEN_AMP2, ""),
Lexeme(TOKEN_PIPE, ""),
Lexeme(TOKEN_AMP, ""),
],
);