diff --git a/lang/Makefile b/lang/Makefile index e4c30c4..60880de 100644 --- a/lang/Makefile +++ b/lang/Makefile @@ -1,7 +1,7 @@ # Makefile: Compile and link main.asm using nasm and mold, intermediate files in target/ TARGET_DIR := target -SRC := src/lib.asm src/int_to_str.asm src/vec.asm src/tokeniser.asm src/file.asm src/alloc.asm +SRC := src/lib.asm src/int_to_str.asm src/vec.asm src/tokeniser.asm src/file.asm src/alloc.asm src/ast.asm OBJ := $(patsubst src/%.asm,$(TARGET_DIR)/%.o,$(SRC)) BIN_SRC := src/main.asm src/panic.asm diff --git a/lang/src/ast.asm b/lang/src/ast.asm new file mode 100644 index 0000000..8017761 --- /dev/null +++ b/lang/src/ast.asm @@ -0,0 +1,439 @@ +default rel + +%include "src/tokeniser.inc" + +section .rdata + AST_FUNCTION equ 1 + AST_BLOCK equ 2 + AST_VARIABLE equ 3 + AST_NUMBER equ 4 + AST_BINARY_OP equ 5 + AST_RETURN_STATEMENT equ 6 + + TYPE_VOID equ 1 + TYPE_BOOL equ 2 + TYPE_I32 equ 3 + TYPE_U32 equ 4 + TYPE_STR equ 5 + +section .text +extern vec_init_with +extern vec_push +extern vec_get +extern panic +extern memcpy +extern vec_binary_search_by +extern vec_insert + +extern bump_alloc + +extern tokeniser_init +extern find_lexeme +extern peek_lexeme +extern expect_token +extern unwrap_token +extern peek_expect_token + +extern str_to_int + +;; start very simple, with only functions and addition +;; struct Ast { +;; nodes: Vec, +;; } +;; +;; struct AstNode { +;; kind: u8, +;; data: *const (), +;; } +;; +;; struct Argument { +;; name: *const u8, +;; name_len: usize, +;; arg_type: Type, +;; } +;; +;; struct Type { +;; kind: u8, +;; } + +;; rdi: *mut Ast +;; fn parse_func(ast: *mut Ast) -> u64 +parse_func: + push rbp + mov rbp, rsp + push rdi + sub rsp, 48 + ; name: *const u8 [0..8] + ; name_len: usize [8..16] + ; args_ptr: *const Arg [16..24] + ; args_len: usize [24..32] + ; return_type: Type [32..40] + ; body: u64 [40..48] + + mov dil, TOKEN_FN + call unwrap_token + mov dil, TOKEN_IDENT + call unwrap_token + mov [rsp], rax ; function name + mov [rsp + 8], rdx ; function name length + mov dil, TOKEN_LPARENS + call unwrap_token + mov dil, TOKEN_RPARENS + call expect_token + test rax, rax + je .args + +.after_args: + mov dil, TOKEN_ARROW + call unwrap_token + mov rdi, [rsp + 48] ; Ast + call parse_type + mov [rsp + 32], rax ; return type + mov dil, TOKEN_LBRACE + call peek_expect_token + test rax, rax + je panic + mov rdi, [rsp + 48] ; Ast + call parse_block + mov [rsp + 40], rax ; body +.epilogue: + mov rdi, 48 + call bump_alloc + mov rsi, rsp + mov rdi, rax + mov rdx, 48 + call memcpy + mov byte [rsp], AST_FUNCTION ; kind + mov [rsp + 8], rdi ; data + mov rdi, [rsp + 48] ; Ast + lea rsi, [rsp] + call vec_push + mov rax, [rsp + 48] ; Ast + mov rax, [rdi + 8] ; return Ast.nodes.len() + add rsp, 48 + pop rdi + pop rbp + ret + +.args: + mov rdi, [rsp + 48] ; Ast + call parse_args + mov [rsp + 16], rax ; args_ptr + mov [rsp + 24], rdx ; args_len + jmp .after_args +.panic: + call panic + +;; rdi: *mut Ast +;; fn parse_args(ast: *mut Ast) -> (*const Argument, usize) +parse_args: + push rbp + mov rbp, rsp + push rdi + sub rsp, 64 + + lea rdi, [rsp + 24] ; vec + mov rsi, 24 ; size of Argument + mov rdx, 0 ; drop = None + mov rcx, 16 ; capacity + call vec_init_with + +.loop: + mov dil, TOKEN_RPARENS + call expect_token + test rax, rax + jz .done_args + mov dil, TOKEN_IDENT + call unwrap_token + mov [rsp], rax ; arg name + mov [rsp + 8], rdx ; arg name length + mov dil, TOKEN_COLON + call unwrap_token + mov rdi, [rsp + 64] ; Ast + call parse_type + mov [rsp + 16], rax ; arg type + + lea rdi, [rsp + 24] ; vec + lea rsi, [rsp] ; arg + call vec_push + + mov dil, TOKEN_COMMA + call expect_token + test rax, rax + jz .end_loop + jmp .loop +.end_loop: + mov dil, TOKEN_RPARENS + call unwrap_token +.done_args: + mov rax, [rsp + 24] ; args_ptr + mov rdx, [rsp + 32] ; args_len + add rsp, 64 + pop rdi + pop rbp + ret + +;; rdi: lexeme ptr +;; rsi: lexeme len +parse_number: + push rbp + mov rbp, rsp + push rbx + sub rsp, 16 + mov [rsp], rdi ; lexeme ptr + mov [rsp + 8], rsi ; lexeme len + + cmp rsi, 2 + jbe .dec_radix + + mov al, byte [rdi] + mov bl, byte [rdi + 1] + cmp bl, 'x' + jne .dec_radix + cmp al, '0' + je .hex_radix + cmp al, 'o' + je .oct_radix + cmp al, 'b' + je .bin_radix + jmp .panic ; invalid radix prefix +.hex_radix: + mov rax, 16 + jmp .radix_set +.oct_radix: + mov rax, 8 + jmp .radix_set +.bin_radix: + mov rax, 2 + jmp .radix_set +.dec_radix: + mov rax, 10 + jmp .parse +.radix_set: + add qword [rsp], 2 + sub qword [rsp + 8], 2 +.parse: + mov rdi, [rsp] ; lexeme ptr + mov rsi, [rsp + 8] ; lexeme len + mov rdx, rax ; radix + call str_to_int + add rsp, 16 + pop rbx + pop rbp + ret +.panic: + call panic + +;; rdi: *mut Ast +;; fn parse_expr(ast: *mut Ast) -> u64 +parse_primary_expr: + push rbp + mov rbp, rsp + sub rsp, 8 + mov [rsp], rdi ; Ast + + mov dil, TOKEN_NUMBER + call expect_token + test rax, rax + jz .number + jmp .panic +.number: + mov rdi, rax ; lexeme ptr + mov rsi, rdx ; lexeme len + call parse_number + mov rdi, [rsp] + push rdi + mov byte [rsp], AST_NUMBER ; kind + mov [rsp + 8], rax ; data + lea rsi, [rsp] + call vec_push + pop rdi + mov rax, [rdi + 8] ; return Ast.nodes.len() + add rsp, 8 + pop rbp + ret +.panic: + call panic + + +;; rdi: *mut Ast +;; sil: precedence +;; fn parse_expr(ast: *mut Ast) -> u64 +parse_binary_expr: + push rbp + mov rbp, rsp + sub rsp, 64 + ; lexeme: Lexeme [32..56] + ; right: u64 [24..32] + ; precedence: u8 [17..18] + ; operator: u8 [16..17] + ; left: u64 [8..16] + ; rdi: *mut Ast [0..8] + mov [rsp], rdi ; Ast + mov byte [rsp + 17], sil ; upper_precedence + mov byte [rsp + 16], 0 + + call parse_primary_expr + mov [rsp + 8], rax ; left + +.loop: + lea rdi, [rsp + 32] ; lexeme + call peek_lexeme + mov rax, [rsp + 32] + mov dil, [rsp + 17] + cmp al, dil ; our_precedence <= upper_precedence + jle .done ; also covers some non-binary operator tokens + cmp al, TOKEN_PLUS + je .plus + jmp .done + +.plus: + mov dil, TOKEN_PLUS + call unwrap_token + mov byte [rsp + 16], TOKEN_PLUS + jmp .right + +.right: + mov rdi, [rsp] ; Ast + mov sil, [rsp + 16] + call parse_binary_expr + mov [rsp + 24], rax ; right + + mov byte [rsp + 32], AST_BINARY_OP ; kind + lea rax, [rsp + 8] + mov [rsp + 40], rax ; data ptr + mov rdi, [rsp] ; Ast + lea rsi, [rsp + 32] ; AstNode + call vec_push + mov rdi, [rsp] ; Ast + mov rax, [rdi + 8] ; Ast.nodes.len() + mov [rsp + 8], rax ; left + +.done: + mov rax, [rsp + 8] ; left + add rsp, 56 + pop rbp + ret + + +;; rdi: *mut Ast +;; fn parse_expr(ast: *mut Ast) -> u64 +parse_expr: + push rbp + mov rbp, rsp + sub rsp, 8 + mov [rsp], rdi ; Ast + +;; rdi: *mut Ast +;; fn parse_statement(ast: *mut Ast) -> u64 +parse_statement: + push rbp + mov rbp, rsp + sub rsp, 32 + mov [rsp + 24], rdi ; Ast + + mov dil, TOKEN_RETURN + call expect_token + test rax, rax + jz .return + jmp .panic + +.return: + call parse_expr + mov [rsp + 16], rax ; expression + mov byte [rsp], AST_RETURN_STATEMENT ; kind + lea rax, [rsp + 16] ; data ptr + mov [rsp + 8], rax ; data + mov rdi, [rsp + 24] ; Ast + mov rsi, rsp ; AstNode + call vec_push + mov rdi, [rsp + 24] ; Ast + mov rax, [rdi + 8] ; Ast.nodes.len() + mov [rsp], rax + + mov dil, TOKEN_SEMI + call unwrap_token + mov rax, [rsp] ; expression + add rsp, 32 + pop rbp + ret +.panic: + call panic + +;; rdi: *mut Ast +;; fn parse_block(ast: *mut Ast) -> u64 +parse_block: + push rbp + mov rbp, rsp + push rdi + + ; struct Block { + ; statements: &[u64], + ; } + + sub rsp, 56 + ; statements: Vec [0..40] + ; statement: u64 [40..48] + + mov dil, TOKEN_LBRACE + call unwrap_token +.loop: + mov rdi, [rsp + 16] ; Ast + call parse_statement + test rax, rax + je .done + lea rdi, [rsp + 16] ; vec + mov [rsp + 8], rax ; statement + lea rsi, [rsp + 8] + call vec_push + jmp .loop +.done: + mov rdi, [rsp + 56] ; Ast + lea rsi, [rsp + 16] ; statements vec-slice + call vec_push + mov rdi, [rsp + 56] ; Ast + mov rax, [rdi + 8] ; Ast.nodes.len() + add rsp, 56 + pop rdi + pop rbp + ret + +;; rdi: *mut Ast +;; fn parse_type(ast: *mut Ast) -> Type +parse_type: + push rbp + mov rbp, rsp + push rdi + + sub rsp, 24 + mov rdi, rsp + call find_lexeme ; TODO: use peek here to allow failing gracefully + mov rax, [rsp] ; token kind + cmp al, TOKEN_I32 + je .i32_type + cmp al, TOKEN_U32 + je .u32_type + cmp al, TOKEN_VOID + je .void_type + cmp al, TOKEN_BOOL + je .bool_type + jmp .panic +.i32_type: + mov rax, TYPE_I32 + jmp .epilogue +.u32_type: + mov rax, TYPE_U32 + jmp .epilogue +.void_type: + mov rax, TYPE_VOID + jmp .epilogue +.bool_type: + mov rax, TYPE_BOOL + jmp .epilogue +.epilogue: + add rsp, 24 + pop rdi + pop rbp + ret +.panic: + call panic diff --git a/lang/tests/ast.rs b/lang/tests/ast.rs new file mode 100644 index 0000000..7f7b9b4 --- /dev/null +++ b/lang/tests/ast.rs @@ -0,0 +1,35 @@ +#[path = "shared/shared.rs"] +mod util; + +#[repr(C)] +struct Ast { + nodes: util::BlobVec, +} + +#[repr(C)] +struct Argument { + name: *const u8, + name_len: usize, + arg_type: u8, +} + +#[repr(C)] +struct AstNode { + node_type: u8, + data: usize, +} + +unsafe extern "C" { + unsafe fn bump_init(); + + unsafe fn parse_func(ast: *mut Ast) -> u64; + unsafe fn parse_args(ast: *mut Ast) -> (*const Argument, usize); + unsafe fn tokeniser_init(path: *const i8) -> (); +} + +fn main() { + unsafe { + bump_init(); + } + println!("Bump allocator initialized."); +}