from-scratch/lang/src/ast.asm

532 lines
11 KiB
NASM

default rel
%include "src/tokeniser.inc"
section .rdata
;; start-consts
AST_FUNCTION equ 1
AST_BLOCK equ 2
AST_VARIABLE equ 3
AST_NUMBER equ 4
AST_BINARY_OP equ 5
AST_RETURN_STATEMENT equ 6
AST_VALUE_TO_PLACE equ 7
AST_PLACE_TO_VALUE equ 8
TYPE_VOID equ 1
TYPE_BOOL equ 2
TYPE_I32 equ 3
TYPE_U32 equ 4
TYPE_STR equ 5
;; end-consts
section .text
extern vec_init_with
extern vec_push
extern vec_get
extern panic
extern memcpy
extern vec_binary_search_by
extern vec_insert
extern bump_alloc
extern tokeniser_init
extern find_lexeme
extern peek_lexeme
extern expect_token
extern unwrap_token
extern peek_expect_token
extern str_to_int
global parse_func
global parse_args
global parse_expr
global parse_binary_expr
global parse_primary_expr
global parse_statement
global parse_block
;; start very simple, with only functions and addition
;; ```rust
;; use super::vec::Vec;
;; ```
;; start-structs
;; struct Ast {
;; nodes: Vec<AstNode>,
;; }
;;
;; struct AstNode {
;; kind: u8,
;; data: *const (),
;; extra: usize,
;; }
;;
;; struct Argument {
;; name: *const u8,
;; name_len: usize,
;; arg_type: Type,
;; }
;;
;; struct Type {
;; kind: u8,
;; }
;; end-structs
;; rdi: *mut Ast
;; define-fn: fn parse_func(ast: *mut Ast) -> u64
parse_func:
push rbp
mov rbp, rsp
push rdi
; start-structs
; struct AstFunction {
; name: *const u8,
; name_len: usize,
; args: *const Argument,
; args_len: usize,
; return_type: Type,
; body: u64,
; }
; end-structs
sub rsp, 48
; name: *const u8 [0..8]
; name_len: usize [8..16]
; args_ptr: *const Arg [16..24]
; args_len: usize [24..32]
; return_type: Type [32..40]
; body: u64 [40..48]
mov dil, TOKEN_FN
call unwrap_token
mov dil, TOKEN_IDENT
call unwrap_token
mov [rsp], rax ; function name
mov [rsp + 8], rdx ; function name length
mov dil, TOKEN_LPARENS
call unwrap_token
mov dil, TOKEN_RPARENS
call expect_token
test rax, rax
je .args
.after_args:
mov dil, TOKEN_ARROW
call unwrap_token
mov rdi, [rsp + 48] ; Ast
call parse_type
mov [rsp + 32], rax ; return type
mov dil, TOKEN_LBRACE
call peek_expect_token
test rax, rax
je panic
mov rdi, [rsp + 48] ; Ast
call parse_block
mov [rsp + 40], rax ; body
.epilogue:
mov rdi, 48
mov rsi, 8
call bump_alloc
mov rsi, rsp
mov rdi, rax
mov rdx, 48
call memcpy
mov byte [rsp], AST_FUNCTION ; kind
mov [rsp + 8], rdi ; data
mov qword [rsp + 16], 0 ; extra
mov rdi, [rsp + 48] ; Ast
lea rsi, [rsp] ; &AstNode
call vec_push
mov rax, [rsp + 48] ; Ast
mov rax, [rdi + 8] ; return Ast.nodes.len()
dec rax
add rsp, 48
pop rdi
pop rbp
ret
.args:
mov rdi, [rsp + 48] ; Ast
call parse_args
mov [rsp + 16], rax ; args_ptr
mov [rsp + 24], rdx ; args_len
jmp .after_args
.panic:
call panic
;; rdi: *mut Ast
;; define-fn: fn parse_args(ast: *mut Ast) -> (*const Argument, usize)
parse_args:
push rbp
mov rbp, rsp
push rdi
sub rsp, 64
lea rdi, [rsp + 24] ; vec
mov rsi, 24 ; size of Argument
mov rdx, 0 ; drop = None
mov rcx, 16 ; capacity
call vec_init_with
.loop:
mov dil, TOKEN_RPARENS
call expect_token
test rax, rax
jz .done_args
mov dil, TOKEN_IDENT
call unwrap_token
mov [rsp], rax ; arg name
mov [rsp + 8], rdx ; arg name length
mov dil, TOKEN_COLON
call unwrap_token
mov rdi, [rsp + 64] ; Ast
call parse_type
mov [rsp + 16], rax ; arg type
lea rdi, [rsp + 24] ; vec
lea rsi, [rsp] ; arg
call vec_push
mov dil, TOKEN_COMMA
call expect_token
test rax, rax
jz .end_loop
jmp .loop
.end_loop:
mov dil, TOKEN_RPARENS
call unwrap_token
.done_args:
mov rax, [rsp + 24] ; args_ptr
mov rdx, [rsp + 32] ; args_len
add rsp, 64
pop rdi
pop rbp
ret
;; rdi: lexeme ptr
;; rsi: lexeme len
;; fn parse_number(lexeme: *const u8, lexeme_len: usize) -> u64
parse_number:
push rbp
mov rbp, rsp
push rbx
sub rsp, 16
mov [rsp], rdi ; lexeme ptr
mov [rsp + 8], rsi ; lexeme len
cmp rsi, 2
jbe .dec_radix
mov al, byte [rdi]
mov bl, byte [rdi + 1]
cmp bl, 'x'
jne .dec_radix
cmp al, '0'
je .hex_radix
cmp al, 'o'
je .oct_radix
cmp al, 'b'
je .bin_radix
jmp .panic ; invalid radix prefix
.hex_radix:
mov rax, 16
jmp .radix_set
.oct_radix:
mov rax, 8
jmp .radix_set
.bin_radix:
mov rax, 2
jmp .radix_set
.dec_radix:
mov rax, 10
jmp .parse
.radix_set:
add qword [rsp], 2
sub qword [rsp + 8], 2
.parse:
mov rdi, [rsp] ; lexeme ptr
mov rsi, [rsp + 8] ; lexeme len
mov rdx, rax ; radix
call str_to_int
add rsp, 16
pop rbx
pop rbp
ret
.panic:
call panic
;; rdi: *mut Ast
;; define-fn: fn parse_primary_expr(ast: *mut Ast) -> u64
parse_primary_expr:
push rbp
mov rbp, rsp
sub rsp, 32
mov [rsp], rdi ; Ast
mov dil, TOKEN_NUMBER
call expect_token
test rax, rax
jnz .number
mov dil, TOKEN_LPARENS
call expect_token
test rax, rax
jnz .paren_expr
jmp .panic
.number:
mov rdi, rax ; lexeme ptr
mov rsi, rdx ; lexeme len
call parse_number
mov rdi, [rsp] ; Ast
mov byte [rsp + 8], AST_NUMBER ; kind
mov [rsp + 16], rax ; data
mov qword [rsp + 24], 0 ; extra
lea rsi, [rsp + 8] ; &AstNode
call vec_push
mov rdi, [rsp] ; Ast
mov rax, [rdi + 8] ; return Ast.nodes.len()
dec rax
jmp .epilogue
.paren_expr:
mov rdi, [rsp] ; Ast
call parse_expr
mov [rsp + 8], rax ; expr
mov dil, TOKEN_RPARENS
call unwrap_token
mov rax, [rsp + 8] ; expr
.epilogue:
add rsp, 32
pop rbp
ret
.panic:
call panic
;; rdi: *mut Ast
;; sil: precedence
;; define-fn: fn parse_binary_expr(ast: *mut Ast, precedence: u8) -> u64
parse_binary_expr:
push rbp
mov rbp, rsp
; size: 24, align: 8
; start-structs
; struct BinaryExpr {
; left: u64,
; operator: u8,
; right: u64,
; }
; end-structs
sub rsp, 64
; lexeme: Lexeme [32..56]
; right: u64 [24..32]
; precedence: u8 [17..18]
; operator: u8 [16..17]
; left: u64 [8..16]
; rdi: *mut Ast [0..8]
mov [rsp], rdi ; Ast
mov byte [rsp + 17], sil ; upper_precedence
mov byte [rsp + 16], 0
call parse_primary_expr
mov [rsp + 8], rax ; left
.loop:
lea rdi, [rsp + 32] ; lexeme
call peek_lexeme
mov rax, [rsp + 32]
mov dil, [rsp + 17]
cmp al, dil ; our_precedence <= upper_precedence
jle .done ; also covers some non-binary operator tokens
cmp al, TOKEN_PLUS
je .plus
jmp .done
.plus:
mov dil, TOKEN_PLUS
call unwrap_token
mov byte [rsp + 16], TOKEN_PLUS
jmp .right
.right:
mov rdi, [rsp] ; Ast
mov sil, [rsp + 16]
call parse_binary_expr
mov [rsp + 24], rax ; right
mov rdi, 24
mov rsi, 8
call bump_alloc
mov rdx, [rsp + 8] ; left
mov [rax + 0], rdx ; left
mov dl, byte [rsp + 16] ; operator
mov byte [rax + 8], dl ; operator
mov rdx, [rsp + 24] ; right
mov [rax + 16], rdx ; right
mov byte [rsp + 32], AST_BINARY_OP ; AstNode.kind
mov [rsp + 40], rax ; AstNode.data
mov qword [rsp + 48], 0 ; AstNode.extra
mov rdi, [rsp] ; Ast
lea rsi, [rsp + 32] ; &AstNode
call vec_push
mov rdi, [rsp] ; Ast
mov rax, [rdi + 8] ; Ast.nodes.len()
dec rax
mov [rsp + 8], rax ; left
jmp .loop
.done:
mov rax, [rsp + 8] ; left
add rsp, 64
pop rbp
ret
;; rdi: *mut Ast
;; define-fn: fn parse_expr(ast: *mut Ast) -> u64
parse_expr:
push rbp
mov rbp, rsp
sub rsp, 8
mov [rsp], rdi ; Ast
mov sil, 0
call parse_binary_expr
add rsp, 8
pop rbp
ret
;; rdi: *mut Ast
;; define-fn: fn parse_statement(ast: *mut Ast) -> u64
parse_statement:
push rbp
mov rbp, rsp
; Ast [24..32]
; AstNode [0..24]
sub rsp, 32
mov [rsp + 24], rdi ; Ast
mov dil, TOKEN_RETURN
call expect_token
test rax, rax
jnz .return
jmp .panic
.return:
mov rdi, [rsp + 24] ; Ast
call parse_expr
mov byte [rsp], AST_RETURN_STATEMENT ; kind
mov [rsp + 8], rax ; data
mov qword [rsp + 16], 0 ; extra
mov rdi, [rsp + 24] ; Ast
lea rsi, [rsp] ; &AstNode
call vec_push
mov rdi, [rsp + 24] ; Ast
mov rax, [rdi + 8] ; Ast.nodes.len()
dec rax
mov [rsp], rax
mov dil, TOKEN_SEMI
call unwrap_token
mov rax, [rsp] ; expression
add rsp, 32
pop rbp
ret
.panic:
call panic
;; rdi: *mut Ast
;; define-fn: fn parse_block(ast: *mut Ast) -> u64
parse_block:
push rbp
mov rbp, rsp
; Ast: *mut Ast [56..64]
; statements: Vec<Statement> [8..56]
; statement: u64 [0..8]
sub rsp, 64
mov [rsp + 56], rdi ; Ast
mov dil, TOKEN_LBRACE
call unwrap_token
mov dil, TOKEN_RBRACE
call peek_expect_token
test rax, rax
jnz .done
lea rdi, [rsp + 8]
mov rsi, 8 ; size of statement
mov rdx, 0 ; drop = None
mov rcx, 64 ; capacity
call vec_init_with
.loop:
mov dil, TOKEN_RBRACE
call peek_expect_token
test rax, rax
jnz .done
mov rdi, [rsp + 56] ; Ast
call parse_statement
lea rdi, [rsp + 8] ; vec
mov [rsp], rax ; statement
lea rsi, [rsp]
call vec_push
jmp .loop
.done:
mov rdi, [rsp + 56] ; Ast
mov qword [rsp], AST_BLOCK ; kind
lea rsi, [rsp] ; &AstNode
call vec_push
mov rdi, [rsp + 56] ; Ast
mov rax, [rdi + 8] ; Ast.nodes.len()
dec rax
add rsp, 64
pop rbp
ret
;; rdi: *mut Ast
;; define-fn: fn parse_type(ast: *mut Ast) -> Type
parse_type:
push rbp
mov rbp, rsp
push rdi
sub rsp, 24
mov rdi, rsp
call find_lexeme ; TODO: use peek here to allow failing gracefully
mov rax, [rsp] ; token kind
cmp al, TOKEN_I32
je .i32_type
cmp al, TOKEN_U32
je .u32_type
cmp al, TOKEN_VOID
je .void_type
cmp al, TOKEN_BOOL
je .bool_type
jmp .panic
.i32_type:
mov rax, TYPE_I32
jmp .epilogue
.u32_type:
mov rax, TYPE_U32
jmp .epilogue
.void_type:
mov rax, TYPE_VOID
jmp .epilogue
.bool_type:
mov rax, TYPE_BOOL
jmp .epilogue
.epilogue:
add rsp, 24
pop rdi
pop rbp
ret
.panic:
call panic