from-scratch/lang/src/ast.asm

1692 lines
43 KiB
NASM

default rel
%include "src/tokeniser.inc"
%include "src/ast.inc"
section .rdata
PRECEDENCE_ADD dw 90
PRECEDENCE_SUB dw 90
PRECEDENCE_MUL dw 100
PRECEDENCE_DIV dw 100
PRECEDENCE_REM dw 100
section .text
extern vec_init_with
extern vec_push
extern vec_get
extern vec_insert_sorted
extern vec_get_or
extern panic
extern memcpy
extern strcmp
extern vec_binary_search_by
extern vec_insert
extern bump_alloc
extern tokeniser_init
extern find_lexeme
extern peek_lexeme
extern expect_token
extern skip_token
extern unwrap_token
extern peek_expect_token
extern tokeniser_get_cursor
extern tokeniser_set_cursor
extern str_to_int
global parse_func
global parse_args
global parse_expr
global parse_binary_expr
global parse_primary_expr
global parse_statement
global parse_block
global ast_build_symtable
global ast_walk_for_each
global ast_resolve_var_refs
;; start very simple, with only functions and addition
;; ```rust
;; use super::vec::Vec;
;; ```
;; start-structs
;; struct Ast {
;; nodes: Vec<AstNode>,
;; }
;;
;; struct AstNode {
;; kind: u8,
;; data: *const (),
;; extra: usize,
;; span: u64,
;; }
;;
;; struct AstArgument {
;; name: *const u8,
;; name_len: usize,
;; arg_type: Type,
;; }
;;
;; struct Type {
;; kind: u8,
;; data: u64,
;; }
;; end-structs
;; rdi: *mut Ast
;; define-fn: fn parse_func(ast: *mut Ast) -> u64
parse_func:
push rbp
mov rbp, rsp
sub rsp, 8 ; span
push rdi
; start-structs
; struct AstFunction {
; name: *const u8,
; name_len: usize,
; args: *const u64,
; args_len: usize,
; return_type: Type,
; body: u64,
; }
; end-structs
; span: u64 [64..72]
; ast: *mut Ast [56..64]
; body: u64 [48..56]
; return_type: Type [32..48]
; args_len: usize [24..32]
; args_ptr: *const u64 [16..24]
; name_len: usize [8..16]
; name: *const u8 [0..8]
sub rsp, 56
mov qword [rsp + 16], 8 ; <*u64>::dangling()
mov qword [rsp + 24], 0 ; args_len
call tokeniser_get_cursor
mov [rsp + 64], rax ; span
mov dil, TOKEN_FN
call unwrap_token
mov dil, TOKEN_IDENT
call unwrap_token
mov [rsp], rax ; function name
mov [rsp + 8], rdx ; function name length
mov dil, TOKEN_LPARENS
call unwrap_token
mov dil, TOKEN_RPARENS
call expect_token
test rax, rax
je .args
.after_args:
mov dil, TOKEN_ARROW
call unwrap_token
mov rdi, [rsp + 56] ; Ast
call parse_type
mov [rsp + 32], rax ; return_type.kind
mov [rsp + 40], rdx ; return_type.data
mov dil, TOKEN_LBRACE
call peek_expect_token
test rax, rax
je panic
mov rdi, [rsp + 56] ; Ast
call parse_block
mov [rsp + 48], rax ; body
.epilogue:
mov rdi, 56 ; size_of::<AstFunction>
mov rsi, 8 ; align_of::<AstFunction>
call bump_alloc
lea rsi, [rsp] ; &AstFunction
mov rdi, rax ; destination ptr
mov rdx, 56 ; size_of::<AstFunction>
call memcpy
mov byte [rsp], AST_FUNCTION ; AstNode.kind
mov [rsp + 8], rdi ; AstNode.data
mov qword [rsp + 16], 0 ; AstNode.extra
mov rdi, [rsp + 64] ; span
mov [rsp + 24], rdi ; AstNode.span
mov rdi, [rsp + 56] ; Ast
lea rsi, [rsp] ; &AstNode
call vec_push
mov rdi, [rsp + 56] ; Ast
mov rax, [rdi + 8] ; return Ast.nodes.len()
dec rax
add rsp, 56
pop rdi
add rsp, 8
pop rbp
ret
.args:
mov rdi, [rsp + 56] ; Ast
call parse_args
mov [rsp + 16], rax ; args_ptr
mov [rsp + 24], rdx ; args_len
jmp .after_args
.panic:
call panic
;; rdi: *mut Ast
;; define-fn: fn parse_args(ast: *mut Ast) -> (*const u64, usize)
parse_args:
push rbp
mov rbp, rsp
; span: u64 [80..88]
; vec: [40..80]
; argument: AstArgument { [8..40]
; name: *const u8 [8..16]
; name_len: usize [16..24]
; arg_type: Type [24..40]
; }
; ast [0..8]
sub rsp, 88
mov [rsp], rdi ; Ast
lea rdi, [rsp + 40] ; vec
mov rsi, 8 ; size of u64 (Index)
mov rdx, 0 ; drop = None
mov rcx, 16 ; capacity
call vec_init_with
.loop:
mov dil, TOKEN_RPARENS
call expect_token
test rax, rax
jnz .done_args
call tokeniser_get_cursor
mov [rsp + 80], rax ; span
mov dil, TOKEN_IDENT
call unwrap_token
mov [rsp + 8], rax ; AstArgument.name
mov [rsp + 16], rdx ; AstArgument.name_len
mov dil, TOKEN_COLON
call unwrap_token
mov rdi, [rsp] ; Ast
call parse_type
mov [rsp + 24], rax ; AstArgument.arg_type
mov [rsp + 32], rdx ; AstArgument.arg_type.data
mov rdi, 32 ; size_of::<AstArgument>
mov rsi, 8 ; align_of::<AstArgument>
call bump_alloc
mov rdi, rax
lea rsi, [rsp + 8] ; &AstArgument
mov rdx, 32 ; size_of::<AstArgument>
call memcpy
mov qword [rsp + 8], AST_ARG ; AstNode.kind
mov [rsp + 16], rdi ; AstNode.data
mov qword [rsp + 24], 0 ; AstNode.extra
mov rdi, [rsp + 80] ; span
mov [rsp + 32], rdi ; AstNode.span
mov rdi, [rsp] ; Ast
lea rsi, [rsp + 8] ; &AstNode
call vec_push
mov rdi, [rsp] ; Ast
mov rax, [rdi + 8] ; Ast.nodes.len()
dec rax
lea rdi, [rsp + 40] ; vec
mov [rsp + 8], rax ; argument
lea rsi, [rsp + 8] ; &argument
call vec_push
mov dil, TOKEN_COMMA
call expect_token
test rax, rax
jz .end_loop
jmp .loop
.end_loop:
mov dil, TOKEN_RPARENS
call unwrap_token
.done_args:
mov rax, [rsp + 40] ; args_ptr
mov rdx, [rsp + 48] ; args_len
add rsp, 88
pop rbp
ret
;; rdi: lexeme ptr
;; rsi: lexeme len
;; fn parse_number(lexeme: *const u8, lexeme_len: usize) -> u64
parse_number:
push rbp
mov rbp, rsp
push rbx
sub rsp, 16
mov [rsp], rdi ; lexeme ptr
mov [rsp + 8], rsi ; lexeme len
cmp rsi, 2
jbe .dec_radix
mov al, byte [rdi]
mov bl, byte [rdi + 1]
cmp bl, 'x'
jne .dec_radix
cmp al, '0'
je .hex_radix
cmp al, 'o'
je .oct_radix
cmp al, 'b'
je .bin_radix
jmp .panic ; invalid radix prefix
.hex_radix:
mov rax, 16
jmp .radix_set
.oct_radix:
mov rax, 8
jmp .radix_set
.bin_radix:
mov rax, 2
jmp .radix_set
.dec_radix:
mov rax, 10
jmp .parse
.radix_set:
add qword [rsp], 2
sub qword [rsp + 8], 2
.parse:
mov rdi, [rsp] ; lexeme ptr
mov rsi, [rsp + 8] ; lexeme len
mov rdx, rax ; radix
call str_to_int
add rsp, 16
pop rbx
pop rbp
ret
.panic:
call panic
;; rdi: *mut Ast
;; define-fn: fn parse_primary_expr(ast: *mut Ast) -> (u64, bool)
parse_primary_expr:
push rbp
mov rbp, rsp
sub rsp, 40
mov [rsp], rdi ; Ast
; start-structs
; struct AstVarRef {
; resolved: u64,
; name: *const u8,
; name_len: usize,
; }
; end-structs
call tokeniser_get_cursor
mov [rsp + 32], rax ; span
mov dil, TOKEN_NUMBER
call expect_token
test rax, rax
jnz .number
mov dil, TOKEN_LPARENS
call expect_token
test rax, rax
jnz .paren_expr
mov dil, TOKEN_IDENT
call expect_token
test rax, rax
jnz .var_ref
jmp .panic
.var_ref:
mov qword [rsp + 8], -1 ; AstVarRef.resolved
mov [rsp + 16], rax ; AstVarRef.name
mov [rsp + 24], rdx ; AstVarRef.name_len
mov rdi, 24
mov rsi, 8
call bump_alloc
mov rdi, rax
lea rsi, [rsp + 8]
mov rdx, 24
call memcpy
mov qword [rsp + 8], AST_VAR_REF ; AstNode.kind
mov [rsp + 16], rdi ; AstNode.data
mov qword [rsp + 24], 0 ; AstNode.extra
; mov [rsp + 32], span ; AstNode.span
mov rdi, [rsp] ; Ast
lea rsi, [rsp + 8] ; &AstNode
call vec_push
mov rdi, [rsp] ; Ast
mov rax, [rdi + 8] ; return Ast.nodes.len()
dec rax
mov rdx, 1 ; placeness = true
jmp .epilogue
.number:
mov rdi, rax ; lexeme ptr
mov rsi, rdx ; lexeme len
call parse_number
mov rdi, [rsp] ; Ast
mov byte [rsp + 8], AST_NUMBER ; AstNode.kind
mov [rsp + 16], rax ; AstNode.data
mov qword [rsp + 24], 0 ; AstNode.extra
; mov [rsp + 32], [rsp + 32] ; AstNode.span
lea rsi, [rsp + 8] ; &AstNode
call vec_push
mov rdi, [rsp] ; Ast
mov rax, [rdi + 8] ; return Ast.nodes.len()
dec rax
mov rdx, 0 ; placeness = false
jmp .epilogue
.paren_expr:
mov rdi, [rsp] ; Ast
call parse_expr
mov [rsp + 8], rax ; expr
mov dil, TOKEN_RPARENS
call unwrap_token
mov rax, [rsp + 8] ; expr
.epilogue:
add rsp, 40
pop rbp
ret
.panic:
call panic
;; rdi: *mut Ast
;; sil: precedence
;; define-fn: fn parse_binary_expr(ast: *mut Ast, precedence: u8) -> (u64, bool)
parse_binary_expr:
push rbp
mov rbp, rsp
push rbx
; size: 24, align: 8
; start-structs
; struct BinaryExpr {
; left: u64,
; operator: u8,
; right: u64,
; }
; end-structs
sub rsp, 64
; span: u64 [56..64]
; AstNode [32..64]
; lexeme: Lexeme [32..56]
; right: u64 [24..32]
; right_placeness: u8 [20..21]
; left_placeness: u8 [19..20]
; our_precedence: u8 [18..19]
; upper_precedence: u8 [17..18]
; operator: u8 [16..17]
; left: u64 [8..16]
; rdi: *mut Ast [0..8]
mov [rsp], rdi ; Ast
mov byte [rsp + 17], sil ; upper_precedence
mov byte [rsp + 16], 0
call tokeniser_get_cursor
mov [rsp + 56], rax ; span
call parse_prefix_expr
mov [rsp + 8], rax ; left
mov [rsp + 19], dl ; left_placeness
.loop:
lea rdi, [rsp + 32] ; lexeme
call peek_lexeme
mov rax, [rsp + 32]
mov byte [rsp + 16], al ; operator
mov bx, -1
cmp al, TOKEN_PLUS
cmove bx, word [rel PRECEDENCE_ADD]
cmp al, TOKEN_MINUS
cmove bx, word [rel PRECEDENCE_SUB]
cmp al, TOKEN_STAR
cmove bx, word [rel PRECEDENCE_MUL]
cmp al, TOKEN_SLASH
cmove bx, word [rel PRECEDENCE_DIV]
cmp bx, -1
je .done
mov byte [rsp + 18], bl
.right:
mov dil, [rsp + 17]
mov al, [rsp + 18] ; our_precedence
cmp al, dil ; our_precedence <= upper_precedence
jle .done
call skip_token ; consume operator
mov rdi, [rsp] ; Ast
mov sil, [rsp + 18]
call parse_binary_expr
mov [rsp + 24], rax ; right
mov [rsp + 20], dl ; right_placeness
; convert left and right to values
mov rdi, [rsp] ; Ast
mov rsi, [rsp + 8] ; left
mov dl, [rsp + 19] ; left_placeness
call ast_place_to_value
mov [rsp + 8], rax ; left
mov byte [rsp + 19], 0 ; left_placeness = false
mov rdi, [rsp] ; Ast
mov rsi, [rsp + 24] ; right
mov dl, [rsp + 20] ; right_placeness
call ast_place_to_value
mov [rsp + 24], rax ; right
mov byte [rsp + 20], 0 ; right_placeness = false
mov rdi, 24
mov rsi, 8
call bump_alloc
mov rdx, [rsp + 8] ; left
mov [rax + 0], rdx ; left
mov dl, byte [rsp + 16] ; operator
mov byte [rax + 8], dl ; operator
mov rdx, [rsp + 24] ; right
mov [rax + 16], rdx ; right
mov byte [rsp + 32], AST_BINARY_OP ; AstNode.kind
mov [rsp + 40], rax ; AstNode.data
mov qword [rsp + 48], 0 ; AstNode.extra
; mov [rsp + 56], [rsp + 56] ; AstNode.span
mov rdi, [rsp] ; Ast
lea rsi, [rsp + 32] ; &AstNode
call vec_push
mov rdi, [rsp] ; Ast
mov rax, [rdi + 8] ; Ast.nodes.len()
dec rax
mov [rsp + 8], rax ; left
mov byte [rsp + 19], 0 ; left_placeness = false
jmp .loop
.done:
mov rax, [rsp + 8] ; left
movzx rdx, byte [rsp + 19] ; left_placeness
add rsp, 64
pop rbx
pop rbp
ret
;; rdi: *mut Ast
;; define-fn: fn parse_expr(ast: *mut Ast) -> u64
parse_expr:
push rbp
mov rbp, rsp
sub rsp, 8
mov [rsp], rdi ; Ast
mov sil, 0
call parse_assignment_expr
add rsp, 8
pop rbp
ret
;; rdi: *mut Ast
;; define-fn: fn parse_statement(ast: *mut Ast) -> u64
parse_statement:
push rbp
mov rbp, rsp
; AstNode [8..40]
; Ast [0..8]
sub rsp, 40
mov [rsp], rdi ; Ast
call tokeniser_get_cursor
mov [rsp + 32], rax ; AstNode.span
mov dil, TOKEN_RETURN
call expect_token
test rax, rax
jnz .return
mov dil, TOKEN_LET
call expect_token
test rax, rax
jnz .let
mov dil, TOKEN_LBRACE
call peek_expect_token ; parse_block expects lbrace to still be there
jnz .block
jmp .panic
.block:
mov rdi, [rsp] ; Ast
call parse_block
; Blocks don't require a trailing semicolon
jmp .epilogue
.let:
mov rdi, [rsp] ; Ast
call ast_parse_let
mov [rsp], rax ; statement
jmp .semi
.return:
mov rdi, [rsp] ; Ast
call parse_expr
mov byte [rsp + 8], AST_RETURN_STATEMENT ; AstNode.kind
mov [rsp + 16], rax ; AstNode.data
mov qword [rsp + 24], 0 ; AstNode.extra
mov rdi, [rsp] ; Ast
lea rsi, [rsp + 8] ; &AstNode
call vec_push
mov rdi, [rsp] ; Ast
mov rax, [rdi + 8] ; Ast.nodes.len()
dec rax
mov [rsp], rax
.semi:
mov dil, TOKEN_SEMI
call unwrap_token
mov rax, [rsp] ; expression
.epilogue:
add rsp, 40
pop rbp
ret
.panic:
call panic
;; rdi: *mut Ast
;; define-fn: fn parse_block(ast: *mut Ast) -> u64
parse_block:
push rbp
mov rbp, rsp
; span: u64 [64..72]
; Ast: *mut Ast [56..64]
; statements: Vec<Statement> [8..56]
; statement: u64 [0..8]
sub rsp, 72
mov [rsp + 56], rdi ; Ast
call tokeniser_get_cursor
mov [rsp + 64], rax ; span
mov dil, TOKEN_LBRACE
call unwrap_token
mov dil, TOKEN_RBRACE
call peek_expect_token
test rax, rax
jnz .done
lea rdi, [rsp + 8]
mov rsi, 8 ; size of statement
mov rdx, 0 ; drop = None
mov rcx, 64 ; capacity
call vec_init_with
nop
.loop:
mov dil, TOKEN_RBRACE
call peek_expect_token
test rax, rax
jnz .done
; skip semicolons
mov dil, TOKEN_SEMI
call expect_token
test rax, rax
jnz .loop
mov rdi, [rsp + 56] ; Ast
call parse_statement
lea rdi, [rsp + 8] ; vec
mov [rsp], rax ; statement
lea rsi, [rsp]
call vec_push
jmp .loop
.done:
; eat the closing brace
mov dil, TOKEN_RBRACE
call unwrap_token
mov rdi, [rsp + 56] ; Ast
mov qword [rsp], AST_BLOCK ; AstNode.kind
mov rsi, [rsp + 64] ; span
mov [rsp + 24], rsi ; AstNode.span
lea rsi, [rsp] ; &AstNode
call vec_push
mov rdi, [rsp + 56] ; Ast
mov rax, [rdi + 8] ; Ast.nodes.len()
dec rax
add rsp, 72
pop rbp
ret
;; rdi: *mut Ast
;; define-fn: fn parse_type(ast: *mut Ast) -> Type
parse_type:
push rbp
mov rbp, rsp
sub rsp, 32
mov [rsp], rdi ; Ast
lea rdi, [rsp + 8]
call find_lexeme ; TODO: use peek here to allow failing gracefully
xor rdx, rdx
mov rax, [rsp + 8] ; token kind
cmp al, TOKEN_I32
je .i32_type
cmp al, TOKEN_U32
je .u32_type
cmp al, TOKEN_VOID
je .void_type
cmp al, TOKEN_BOOL
je .bool_type
cmp al, TOKEN_STAR
je .pointer_type
jmp .panic
.i32_type:
mov rax, TYPE_I32
jmp .epilogue
.u32_type:
mov rax, TYPE_U32
jmp .epilogue
.void_type:
mov rax, TYPE_VOID
jmp .epilogue
.bool_type:
mov rax, TYPE_BOOL
jmp .epilogue
.pointer_type:
mov rdi, [rsp] ; Ast
call parse_type
mov [rsp + 8], rax ; Type.kind
mov [rsp + 16], rdx ; Type.data
mov rdi, 16 ; size_of::<Type>
mov rsi, 8 ; align_of::<Type>
call bump_alloc
mov rdi, rax
lea rsi, [rsp + 8]
mov rdx, 16 ; size_of::<Type>
call memcpy
mov rax, TYPE_POINTER
mov rdx, rdi
jmp .epilogue
.epilogue:
add rsp, 32
pop rbp
ret
.panic:
call panic
;; rdi: *mut Ast
;; define-fn: fn parse_prefix_expr(ast: *mut Ast) -> (u64, bool)
parse_prefix_expr:
push rbp
mov rbp, rsp
; AstNode [8..40]
; ast [0..8]
sub rsp, 40
mov [rsp], rdi ; Ast
call tokeniser_get_cursor
mov [rsp + 32], rax ; AstNode.span
mov dil, TOKEN_STAR
call expect_token
test rax, rax
jnz .dereference
mov dil, TOKEN_AMP
call expect_token
test rax, rax
jnz .address_of
mov rdi, [rsp] ; Ast
call parse_primary_expr
jmp .done
.dereference:
mov rdi, [rsp] ; Ast
call parse_prefix_expr
mov qword [rsp + 8], AST_DEREF ; AstNode.kind
mov [rsp + 16], rax ; AstNode.data
mov qword [rsp + 24], 0 ; AstNode.extra
mov rdi, [rsp] ; Ast
lea rsi, [rsp + 8] ; &AstNode
call vec_push
mov rdi, [rsp] ; Ast
mov rax, [rdi + 8] ; Ast.nodes.len()
dec rax
mov rdx, 1 ; placeness = true
jmp .done
.address_of:
; address-of must be applied to a place
; so we convert the inner expression to a place first
mov rdi, [rsp] ; Ast
call parse_prefix_expr
mov rdi, [rsp] ; Ast
mov rsi, rax ; expr
; mov rdx, rdx ; placeness
call ast_value_to_place
mov qword [rsp + 8], AST_ADDRESS_OF ; AstNode.kind
mov [rsp + 16], rax ; AstNode.data
mov qword [rsp + 24], 0 ; AstNode.extra
mov rdi, [rsp] ; Ast
lea rsi, [rsp + 8] ; &AstNode
call vec_push
mov rdi, [rsp] ; Ast
mov rax, [rdi + 8] ; Ast.nodes.len()
dec rax
xor rdx, rdx ; placeness = false
jmp .done
.done:
add rsp, 40
pop rbp
ret
;; rdi: *mut Ast
;; define-fn: fn parse_assignment(ast: *mut Ast) -> (u64, bool)
parse_assignment_expr:
push rbp
mov rbp, rsp
; span: u64 [32..40]
; source [24..32]
; dest [16..24]
; dest_placeness [8..9]
; ast [0..8]
sub rsp, 40
mov [rsp], rdi ; Ast
call tokeniser_get_cursor
mov [rsp + 32], rax ; span
mov rdi, [rsp] ; Ast
call parse_binary_expr
mov [rsp + 16], rax ; dest
mov [rsp + 8], dl ; placeness
mov dil, TOKEN_EQUALS
call expect_token
test rax, rax
jnz .assignment
jmp .done
.assignment:
mov rdi, [rsp] ; Ast
mov rsi, [rsp + 16] ; dest
movzx rdx, byte [rsp + 8] ; placeness
call ast_value_to_place
mov [rsp + 16], rax ; dest
mov rdi, [rsp] ; Ast
call parse_expr
mov rdi, [rsp] ; Ast
mov rsi, rax ; expr
; mov rdx, rdx ; placeness
call ast_place_to_value
mov [rsp + 24], rax ; source
mov qword [rsp + 8], AST_ASSIGNMENT ; AstNode.kind
mov rdi, [rsp] ; Ast
lea rsi, [rsp + 8] ; &AstNode
call vec_push
mov rdi, [rsp] ; Ast
mov rax, [rdi + 8] ; Ast.nodes.len()
dec rax
mov [rsp + 16], rax ; dest
mov byte [rsp + 8], 0 ; placeness = false
.done:
mov rax, [rsp + 16] ; dest
movzx rdx, byte [rsp + 8] ; placeness
add rsp, 40
pop rbp
ret
;; rdi: *mut Ast
;; define-fn: fn ast_parse_let(ast: *mut Ast) -> (u64, bool)
ast_parse_let:
push rbp
mov rbp, rsp
; start-structs
; struct AstVarDecl {
; name: *const u8,
; name_len: usize,
; var_type: Type,
; }
;
; struct AstAssignment {
; variable: u64,
; expr: u64,
; }
; end-structs
; expr: u64 [48..56]
; *AstVarDecl [40..48]
; AstNode [8..40]
; AstVarDecl [8..40]
; Ast [0..8]
sub rsp, 56
mov [rsp], rdi ; Ast
call tokeniser_get_cursor
mov [rsp + 40], rax ; span
; skipped in parse_statement
; mov dil, TOKEN_LET
; call unwrap_token
mov dil, TOKEN_IDENT
call unwrap_token
mov [rsp + 8], rax ; AstVarDecl.name
mov [rsp + 16], rdx ; AstVarDecl.name_len
mov dil, TOKEN_COLON
call unwrap_token
mov rdi, [rsp] ; Ast
call parse_type
mov [rsp + 24], rax ; AstVarDecl.var_type.kind
mov [rsp + 32], rdx ; AstVarDecl.var_type.data
mov rdi, 32 ; size_of::<AstVarDecl>
mov rsi, 8 ; align_of::<AstVarDecl>
call bump_alloc
mov rdi, rax ; AstVarDecl ptr
lea rsi, [rsp + 8] ; &AstVarDecl
mov rdx, 32 ; size_of::<AstVarDecl>
call memcpy
mov [rsp + 40], rdi ; AstVarDecl ptr
; parse the expression
mov dil, TOKEN_EQUALS
call unwrap_token
mov rdi, [rsp] ; Ast
call parse_expr
mov rdi, [rsp] ; Ast
mov rsi, rax ; expr
; mov rdx, rdx ; placeness
call ast_place_to_value
mov [rsp + 48], rax ; expr index
; variable is defined at this point so that the expression cannot reference it
call tokeniser_get_cursor
mov rdi, [rsp + 40] ; AstVarDecl ptr
mov qword [rsp + 8], AST_VAR_DECL ; AstNode.kind
mov [rsp + 16], rdi ; AstNode.data
mov qword [rsp + 24], 0 ; AstNode.extra
mov [rsp + 32], rax ; AstNode.span
mov rdi, [rsp] ; Ast
lea rsi, [rsp + 8] ; &AstNode
call vec_push
; variables are places
mov rdi, [rsp] ; Ast
mov rax, [rdi + 8] ; Ast.nodes.len()
dec rax
mov qword [rsp + 8], AST_ASSIGNMENT ; AstNode.kind
mov [rsp + 16], rax ; AstNode.data (variable index)
mov rax, [rsp + 48] ; expr index
mov [rsp + 24], rax ; AstNode.extra (expr index)
; reuse span from variable declaration
mov rdi, [rsp] ; Ast
lea rsi, [rsp + 8] ; &AstNode
call vec_push
mov rdi, [rsp] ; Ast
mov rax, [rdi + 8] ; Ast.nodes.len()
dec rax
add rsp, 56
xor rdx, rdx ; placeness = false
pop rbp
ret
;; rdi: *mut Ast
;; rsi: index of node
;; rdx: is_placeness
;; fn ast_value_to_place(ast: *mut Ast, node_index: u64, is_placeness: bool) -> u64
ast_value_to_place:
push rbp
mov rbp, rsp
cmp dl, 1
mov rax, rsi
je .done
; create new AST node
sub rsp, 40
mov [rsp], rdi ; Ast
mov [rsp + 8], rsi ; node_index
; load original node's span
call vec_get
mov rdi, [rax + 24] ; AstNode.span
mov [rsp + 32], rdi ; AstNode.span
mov rsi, [rsp + 8] ; node_index
mov byte [rsp + 8], AST_VALUE_TO_PLACE ; AstNode.kind
mov [rsp + 16], rsi ; AstNode.data
mov qword [rsp + 24], 0 ; AstNode.extra
lea rsi, [rsp + 8] ; &AstNode
mov rdi, [rsp] ; Ast
call vec_push
mov rdi, [rsp] ; Ast
mov rax, [rdi + 8] ; Ast.nodes.len()
dec rax
add rsp, 40
.done:
pop rbp
ret
;; rdi: *mut Ast
;; rsi: index of node
;; rdx: is_placeness
;; fn ast_place_to_value(ast: *mut Ast, node_index: u64, is_placeness: bool) -> u64
ast_place_to_value:
push rbp
mov rbp, rsp
cmp dl, 0
mov rax, rsi
je .done
; create new AST node
sub rsp, 40
mov [rsp], rdi ; Ast
mov [rsp + 8], rsi ; node_index
; load original node's span
call vec_get
mov rdi, [rax + 24] ; AstNode.span
mov [rsp + 32], rdi ; AstNode.span
mov rsi, [rsp + 8] ; node_index
mov byte [rsp + 8], AST_PLACE_TO_VALUE ; AstNode.kind
mov [rsp + 16], rsi ; AstNode.data
mov qword [rsp + 24], 0 ; AstNode.extra
lea rsi, [rsp + 8] ; &AstNode
mov rdi, [rsp] ; Ast
call vec_push
mov rdi, [rsp] ; Ast
mov rax, [rdi + 8] ; Ast.nodes.len()
dec rax
add rsp, 40
.done:
pop rbp
ret
;; rdi: ctx
;; rsi: a: *const SymKey
;; rdx: b: *const SymKey
;; define-fn: fn symkey_cmp(a: *const SymKey, b: *const SymKey) -> i32
symkey_cmp:
push rbp
mov rbp, rsp
push rbx
sub rsp, 16
mov [rsp], rsi
mov [rsp + 8], rdx
mov al, byte [rsi] ; a.kind
mov bl, byte [rdx] ; b.kind
cmp al, bl
jl .a_less
jg .a_greater
mov rax, [rsi + 8] ; a.scope_index
mov rbx, [rdx + 8] ; b.scope_index
cmp rax, rbx
jl .a_less
jg .a_greater
mov rdi, [rsi + 24] ; a.ident
mov rsi, [rsi + 32] ; a.ident_len
mov rcx, [rdx + 32] ; b.ident_len
mov rdx, [rdx + 24] ; b.ident
call strcmp
cmp rax, 0
jl .a_less
jg .a_greater
mov rsi, [rsp]
mov rdx, [rsp + 8]
mov rax, [rsi + 16] ; a.span
mov rbx, [rdx + 16] ; b.span
cmp rax, rbx
jl .a_less
jg .a_greater
xor rax, rax
jmp .epilogue
.a_less:
mov rax, -1
jmp .epilogue
.a_greater:
mov rax, 1
.epilogue:
add rsp, 16
pop rbx
pop rbp
ret
section .rdata
;; start-consts
SYM_KEY_SCOPE equ 1 ; :u8
SYM_KEY_SCOPE_NAME equ 2 ; :u8
SYM_KEY_PARENT_SCOPE equ 3 ; :u8
SYM_KEY_START_LOCALS equ 4 ; :u8
SYM_KEY_ARG equ 5 ; :u8
SYM_KEY_VAR equ 6 ; :u8
SYM_KEY_END_LOCALS equ 7 ; :u8
;; end-consts
section .text
;; rdi: Ast
;; rsi: root index
;; rdx: *SymbolTable
;; define-fn: fn ast_build_symtable(ast: *mut Ast, root_index: u64, symtable: *mut core::mem::MaybeUninit<SymbolTable>)
ast_build_symtable:
push rbp
mov rbp, rsp
; BuildSymtableCtx [24..64]
; *SymbolTable [16..24]
; root_index [8..16]
; Ast [0..8]
sub rsp, 64
mov [rsp], rdi ; Ast
mov [rsp + 8], rsi ; root_index
mov [rsp + 16], rdx ; *SymbolTable
; initialise scope_stack and symtable vecs
lea rdi, [rsp + 24] ; &BuildSymtableCtx.symtable
mov rsi, 56 ; size_of::<SymEntry>
mov rdx, 0 ; drop = None
mov rcx, 128 ; capacity
call vec_init_with
mov rdi, [rsp] ; Ast
mov rsi, [rsp + 8] ; root_index
lea rdx, [rsp + 24] ; &BuildSymtableCtx
mov rcx, ast_build_symtable_for_each
call ast_walk_for_each
; memcpy symtable out
mov rdi, [rsp + 16] ; *SymbolTable
lea rsi, [rsp + 24] ; &BuildSymtableCtx.symtable
mov rdx, 40 ; size_of::<Vec<SymEntry>>
call memcpy
add rsp, 64
pop rbp
ret
section .rdata
ANONYMOUS_SCOPE_NAME db "<anonymous>"
ANONYMOUS_SCOPE_NAME_LEN equ $ - ANONYMOUS_SCOPE_NAME
section .text
;; symtable is a sorted vec pretending to be a b-tree:
;; entries are sorted by a key in order to get the following ordering:
;; scope (index0) -> (ident0)
;; scope (index1) -> (ident1)
;; scope (index2) -> (ident2)
;; scope-name (ident1) -> (index1)
;; scope-name (ident1) -> (index1)
;; parent-scope (scope1) -> (index0)
;; arg (scope1, span, ident) -> (index)
;; var (scope1, span, ident) -> (index)
;; var (scope1, span, ident) -> (index)
;; arg (scope0, span, ident) -> (index)
;; var (scope0, span, ident) -> (index)
;; var (scope0, span, ident) -> (index)
;;
;; arguments are ordered before variables in order to allow shadowing of variables by arguments.
;; variables are ordered by span in order to allow shadowing of variables by variables.
;; all references within a scope are in the range parent-scope(scopeN)..var
;; (scopeN, u64::MAX, u64::MAX)
;;
;; the symtable contains `SymEntries`, which hold a `SymKey` and an index into the AST node list.
;; for scope entries, the index holds the pointer to the scope's ident,
;; and `extra` holds the length; for other keys, `extra` is 0.
;;
;; start-structs
;; struct SymbolTable {
;; symtable: Vec<SymEntry>,
;; }
;; struct SymKey {
;; kind: u8,
;; scope_index: u64,
;; span: u64,
;; ident: *const u8,
;; ident_len: usize,
;; }
;; struct SymEntry {
;; key: SymKey,
;; index: u64,
;; extra: u64,
;; }
;; end-structs
;; size_of::<SymKey> == 40
;; size_of::<SymEntry> == 56
;;
;; #start-structs
;; struct BuildSymtableCtx {
;; symtable: Vec<SymEntry>,
;; }
;; #end-structs
;;
;; scope_stack [0..40]
;; symtable [40..80]
;;
;; rdi: Ctx
;; rsi: Ast
;; rdx: index
;; rcx: scope
ast_build_symtable_for_each:
push rbp
mov rbp, rsp
push rbx
; scope: u64 [88..96]
; SymEntry [32..88]
; SymKey [32..72]
; *AstNode [24..32]
; index [16..24]
; ctx [8..16]
; ast [0..8]
sub rsp, 96
mov [rsp], rsi ; Ast
mov [rsp + 8], rdi ; Ctx
mov [rsp + 16], rdx ; index
mov [rsp + 88], rcx ; scope
mov rdi, rsi ; Ast
mov rsi, rdx ; index
call vec_get
mov [rsp + 24], rax ; *AstNode
mov bl, byte [rax] ; AstNode.kind
cmp bl, AST_FUNCTION
je .func
cmp bl, AST_VAR_DECL
je .var_decl
cmp bl, AST_ARG
je .arg
cmp bl, AST_BLOCK
je .block
jmp .done
.block:
; insert scope entry
lea rdx, [rel ANONYMOUS_SCOPE_NAME]
mov rcx, ANONYMOUS_SCOPE_NAME_LEN
mov [rsp + 72], rdx ; SymEntry.index
mov [rsp + 80], rcx ; SymEntry.extra
jmp .insert_scope
.func:
; use function name as scope name
mov rbx, [rax + 8] ; AstNode.data
mov rdx, [rbx + 0] ; Func.name
mov rcx, [rbx + 8] ; Func.name_len
mov [rsp + 72], rdx ; SymEntry.index
mov [rsp + 80], rcx ; SymEntry.extra
.insert_scope:
; insert scope entry
mov byte [rsp + 32], SYM_KEY_SCOPE ; SymKey.kind
mov rdx, [rsp + 16] ; index
mov qword [rsp + 40], rdx ; SymKey.scope_index
mov rdx, [rax + 24] ; AstNode.span
mov qword [rsp + 48], rdx ; SymKey.span
mov qword [rsp + 56], 1 ; SymKey.ident
mov qword [rsp + 64], 0 ; SymKey.ident_len
mov rdi, [rsp + 8] ; Ctx.symtable
lea rsi, [rsp + 32] ; &SymEntry
mov rcx, 0 ; cmp_ctx
mov rdx, symkey_cmp ; cmp
call vec_insert_sorted
; add parent scope
mov byte [rsp + 32], SYM_KEY_PARENT_SCOPE ; SymKey.kind
mov rdx, [rsp + 16] ; index
mov qword [rsp + 40], rdx ; SymKey.scope_index
mov qword [rsp + 48], 0 ; SymKey.span
mov qword [rsp + 56], 1 ; SymKey.ident
mov qword [rsp + 64], 0 ; SymKey.ident_len
mov rdx, [rsp + 88] ; parent scope
mov [rsp + 72], rdx ; SymEntry.index
mov qword [rsp + 80], 0 ; SymEntry.extra
mov rdi, [rsp + 8] ; *Ctx
lea rsi, [rsp + 32] ; &SymEntry
mov rcx, 0 ; cmp_ctx
mov rdx, symkey_cmp ; cmp
call vec_insert_sorted
jmp .done
.var_decl:
; insert variable entry
mov byte [rsp + 32], SYM_KEY_VAR ; SymKey.kind
mov rdx, [rax + 24] ; AstNode.span
mov qword [rsp + 48], rdx ; SymKey.span
mov rbx, [rsp + 24] ; AstNode.data
mov rbx, [rbx + 8] ; AstNode.data
mov rdx, [rbx + 0] ; AstVarDecl.name
mov rcx, [rbx + 8] ; AstVarDecl.name_len
mov [rsp + 56], rdx ; SymKey.ident
mov [rsp + 64], rcx ; SymKey.ident_len
mov rdx, [rsp + 16] ; index
mov [rsp + 72], rdx ; SymEntry.index
mov qword [rsp + 80], 0 ; SymEntry.extra
mov rdx, [rsp + 88] ; current scope
mov [rsp + 40], rdx ; SymKey.scope_index = default
mov rdi, [rsp + 8] ; *Ctx
lea rsi, [rsp + 32] ; &SymEntry
mov rcx, 0 ; cmp_ctx
mov rdx, symkey_cmp ; cmp
call vec_insert_sorted
jmp .done
.arg:
; insert variable entry
mov byte [rsp + 32], SYM_KEY_VAR ; SymKey.kind
mov rdx, [rax + 24] ; AstNode.span
mov qword [rsp + 48], rdx ; SymKey.span
mov rbx, [rsp + 24] ; *AstNode
mov rbx, [rbx + 8] ; AstNode.data
mov rdx, [rbx + 0] ; AstArgument.name
mov rcx, [rbx + 8] ; AstArgument.name_len
mov [rsp + 56], rdx ; SymKey.ident
mov [rsp + 64], rcx ; SymKey.ident_len
mov rdx, [rsp + 16] ; index
mov [rsp + 72], rdx ; SymEntry.index
mov qword [rsp + 80], 0 ; SymEntry.extra
mov rdx, [rsp + 88] ; current scope
mov [rsp + 40], rdx ; SymKey.scope_index = default
mov rdi, [rsp + 8] ; *Ctx
lea rsi, [rsp + 32] ; &SymEntry
mov rcx, 0 ; cmp_ctx
mov rdx, symkey_cmp ; cmp
call vec_insert_sorted
jmp .done
.done:
add rsp, 96
pop rbx
pop rbp
ret
;; rdi: Ast
;; rsi: start_index
;; rdx: ctx
;; rcx: for_each
;; define-fn: fn ast_walk_for_each(ast: *mut Ast, start_index: u64, ctx: *mut (), for_each: unsafe extern "C" fn(ctx: *mut (), *mut Ast, node_index: u64, scope: u64))
ast_walk_for_each:
push rbp
push r15
push r14
push rbx
; INVALID_SCOPE [48..56]
; current_index [40..48]
; *current_scope [32..40]
; current_node_ptr [24..32]
; for_each [16..24]
; ctx [8..16]
; ast [0..8]
sub rsp, 56
mov [rsp], rdi ; Ast
mov [rsp + 8], rdx ; ctx
mov [rsp + 16], rcx ; for_each
mov qword [rsp + 24], 0 ; current_node_ptr
lea rdi, [rsp + 48] ; &INVALID_SCOPE
mov [rsp + 32], rdi ; current_scope
mov [rsp + 40], rsi ; current_index
mov qword [rsp + 48], -1 ; INVALID_SCOPE
mov rbp, rsp
push rsi
; `current_scope` points to the index of the current scope on the stack.
; When we enter a new scope, we push `current_scope` onto the stack, then
; update it to point to the new scope index.
; When `rsp` is equal to `current_scope`, we need to additionally pop into
; `current_scope` after popping the current index.
.loop:
cmp rsp, rbp
jge .done
; call for_each(ctx, ast, current_index)
mov rdi, [rbp + 8] ; ctx
mov rsi, [rbp] ; Ast
mov rdx, [rsp] ; current_index
mov rcx, [rbp + 32] ; current_scope
mov rcx, [rcx] ; current_scope value
mov rax, [rbp + 16] ; for_each
; align stack to 16 bytes before call
mov rbx, rsp
sub rsp, 8
and rsp, -16
mov [rsp], rbx
call rax
pop rsp
; get current_node_ptr
mov rdi, [rbp] ; Ast
pop rsi ; current_index
mov [rbp + 40], rsi ; update current_index
call vec_get
mov [rbp + 24], rax ; current_node_ptr
mov bl, byte [rax] ; AstNode.kind
cmp bl, AST_FUNCTION
je .func
cmp bl, AST_BLOCK
je .block
cmp bl, AST_BINARY_OP
je .binary_op
cmp bl, AST_ASSIGNMENT
je .assignment
cmp bl, AST_VALUE_TO_PLACE
je .value_to_place
cmp bl, AST_PLACE_TO_VALUE
je .place_to_value
cmp bl, AST_DEREF
je .deref
cmp bl, AST_ADDRESS_OF
je .address_of
cmp bl, AST_RETURN_STATEMENT
je .return_statement
jmp .check_scope
.func:
; push scope
push qword [rbp + 32] ; scope-ptr
push qword [rbp + 40] ; current_index
mov [rbp + 32], rsp ; update current_scope
; push child indices to stack
mov rbx, [rax + 8] ; AstNode.data
mov r15, [rbx + 48] ; AstFunction.body
push r15 ; push body index
mov r15, [rbx + 24] ; AstFunction.args_len
xor r14, r14 ; index
.arg_loop:
cmp r14, r15
jge .arg_loop_done
mov rdx, [rbx + 16] ; AstFunction.args
lea rdx, [rdx + r14*8]
push qword [rdx] ; push arg index
inc r14
jmp .arg_loop
.arg_loop_done:
jmp .check_scope
.block:
; push scope
push qword [rbp + 32] ; scope-ptr
push qword [rbp + 40] ; current_index
mov [rbp + 32], rsp ; update current_scope
; push statements onto stack
mov rbx, [rax + 8] ; AstNode.data
mov r15, [rax + 16] ; AstNode.extra
.stmt_loop:
cmp r15, 0
jle .stmt_loop_done
dec r15
mov rdx, [rbx + r15*8] ; statement index
push rdx ; push statement index
jmp .stmt_loop
.stmt_loop_done:
jmp .check_scope
.binary_op:
mov rbx, [rax + 8] ; AstNode.data
mov rdx, [rbx + 16] ; right index
push rdx ; push right index
mov rdx, [rbx + 0] ; left index
push rdx ; push left index
jmp .check_scope
.assignment:
mov rbx, [rax + 8] ; AstNode.data = dest
mov rdx, [rax + 16] ; AstNode.extra = source
push rdx ; push source index
push rbx ; push dest index
jmp .check_scope
.value_to_place:
.place_to_value:
.deref:
.address_of:
.return_statement:
mov rbx, [rax + 8] ; AstNode.data
push rbx ; push inner expr index
jmp .check_scope
.check_scope:
cmp rsp, [rbp + 32] ; current_scope
je .pop_scope
jmp .loop
.pop_scope:
; pop current_scope
; the stack may look something like this:
; current_scope---+
; == stack == ^ | points here
; scope-ptr[0]-+ |
; scope: func0 <-----+
; stmt0 | |
; stmt1 | |
; scope-ptr[1] ------+
; scope: block0 <-+
; stmt2
; stmt3
; ...
pop rax ; scope
pop rax ; scope_ptr
mov [rbp + 32], rax ; update current_scope pointer
jmp .check_scope
.done:
add rsp, 56
pop rbx
pop r14
pop r15
pop rbp
ret
;; rdi: *mut SymbolTable
;; rsi: *mut Ast
;; rdx: node_index
;; rcx: scope
ast_resolve_var_refs_for_each:
push rbp
mov rbp, rsp
push rbx
; lower_bound [88..96]
; scope: u64 [80..88]
; SymEntry [24..80]
; *AstNode [16..24]
; *BuildSymtableCtx [8..16]
; *Ast [0..8]
sub rsp, 96
mov [rsp], rsi ; Ast
mov [rsp + 8], rdi ; Ctx
mov [rsp + 80], rcx ; SymKey.scope_index
mov rdi, rsi ; Ast
mov rsi, rdx ; node_index
call vec_get
mov [rsp + 16], rax ; *AstNode
mov bl, byte [rax] ; AstNode.kind
cmp bl, AST_VAR_REF
jne .epilogue
.var_ref:
; lookup variable in symbol table
; binary search lower bound
mov byte [rsp + 24 + 0], SYM_KEY_START_LOCALS ; SymKey.kind
mov rax, [rsp + 80] ; scope
mov [rsp + 24 + 8], rax ; SymKey.scope_index
mov qword [rsp + 24 + 16], 0 ; SymKey.span
; name
mov rax, [rsp + 16] ; *AstNode
mov rbx, [rax + 8] ; AstNode.data
mov rax, [rbx + 8] ; AstVarRef.name
mov rbx, [rbx + 16] ; AstVarRef.name_len
mov [rsp + 24 + 24], rax ; SymKey.ident
mov [rsp + 24 + 32], rbx ; SymKey.ident_len
; binary search in symbol table
mov rdi, [rsp + 8] ; *Ctx
lea rsi, [rsp + 24] ; &SymKey
mov rdx, symkey_cmp ; cmp
mov rcx, 0 ; cmp_ctx
call vec_binary_search_by
mov [rsp + 88], rax ; lower_bound
; construct key
mov byte [rsp + 24 + 0], SYM_KEY_VAR ; SymKey.kind
mov rax, [rsp + 80] ; scope
mov [rsp + 24 + 8], rax ; SymKey.scope_index
mov rax, [rsp + 16] ; *AstNode
mov rbx, [rax + 24] ; AstNode.span
mov [rsp + 24 + 16], rbx ; SymKey.span
; binary search in symbol table
mov rdi, [rsp + 8] ; *Ctx
lea rsi, [rsp + 24] ; &SymKey
mov rdx, symkey_cmp ; cmp
mov rcx, 0 ; cmp_ctx
call vec_binary_search_by
test rdx, rdx
jz .fixup
dec rax
.fixup:
cmp rax, [rsp + 88] ; lower_bound
jl .parent
mov rdi, [rsp + 8] ; *Ctx
mov rsi, rax ; index
call vec_get
mov rbx, rax ; *SymEntry
; compare symbol ident with var_ref ident
mov rdi, [rbx + 24] ; SymEntry.key.ident
mov rsi, [rbx + 32] ; SymEntry.key.ident_len
mov rax, [rsp + 16] ; *AstNode
mov rax, [rax + 8] ; AstNode.data
mov rdx, [rax + 8] ; AstVarRef.name
mov rcx, [rax + 16] ; AstVarRef.name_len
call strcmp
test rax, rax
jnz .parent
; load SymEntry.index
mov rax, [rbx + 40] ; SymEntry.index
mov rdx, [rsp + 16] ; *AstNode
mov rdx, [rdx + 8] ; AstNode.data
mov [rdx + 0], rax ; AstVarRef.resolved_index
jmp .epilogue
.parent:
; binary search for parent scope
mov byte [rsp + 24 + 0], SYM_KEY_PARENT_SCOPE ; SymKey.kind
mov rax, [rsp + 80] ; scope
mov [rsp + 24 + 8], rax ; SymKey.scope_index
mov qword [rsp + 24 + 16], 0 ; SymKey.span
mov qword [rsp + 24 + 24], 1 ; SymKey.ident
mov qword [rsp + 24 + 32], 0 ; SymKey.ident_len
; binary search in symbol table
mov rdi, [rsp + 8] ; *Ctx
lea rsi, [rsp + 24] ; &SymKey
mov rdx, symkey_cmp ; cmp
mov rcx, 0 ; cmp_ctx
call vec_binary_search_by
test rdx, rdx
jnz .panic ; can't find the symbol entry for this var-ref
; load parent scope sym entry
mov rdi, [rsp + 8] ; *Ctx
mov rsi, rax ; index
call vec_get
mov rbx, rax ; *SymEntry
mov rdx, [rax + 40] ; SymEntry.index (parent scope)
mov [rsp + 80], rdx ; update scope
jmp .var_ref
.epilogue:
add rsp, 96
pop rbx
pop rbp
ret
.panic:
call panic
;; rdi: Ast
;; rsi: *mut SymbolTable
;; rdx: root_index
;; define-fn: fn ast_resolve_var_refs(ast: *mut Ast, ctx: *mut SymbolTable, root_index: u64)
ast_resolve_var_refs:
push rbp
mov rbp, rsp
xchg rsi, rdx
mov rcx, ast_resolve_var_refs_for_each
call ast_walk_for_each
.epilogue:
pop rbp
ret