default rel %include "src/tokeniser.inc" section .rdata ;; start-consts AST_FUNCTION equ 1 ; :u8 AST_BLOCK equ 2 ; :u8 AST_VARIABLE equ 3 ; :u8 AST_NUMBER equ 4 ; :u8 AST_BINARY_OP equ 5 ; :u8 AST_RETURN_STATEMENT equ 6 ; :u8 AST_VALUE_TO_PLACE equ 7 ; :u8 AST_PLACE_TO_VALUE equ 8 ; :u8 AST_ASSIGNMENT equ 9 ; :u8 AST_DEREF equ 10 ; :u8 AST_ADDRESS_OF equ 11 ; :u8 AST_VAR_DECL equ 12 ; :u8 AST_VAR_REF equ 13 ; :u8 AST_ARG equ 14 ; :u8 TYPE_VOID equ 1 ; :u8 TYPE_BOOL equ 2 ; :u8 TYPE_I32 equ 3 ; :u8 TYPE_U32 equ 4 ; :u8 TYPE_STR equ 5 ; :u8 TYPE_POINTER equ 6 ; :u8 ;; end-consts PRECEDENCE_ADD equ 90 PRECEDENCE_SUB equ 90 PRECEDENCE_MUL equ 100 PRECEDENCE_DIV equ 100 PRECEDENCE_REM equ 100 section .text extern vec_init_with extern vec_push extern vec_get extern vec_insert_sorted extern vec_get_or extern panic extern memcpy extern strcmp extern vec_binary_search_by extern vec_insert extern bump_alloc extern tokeniser_init extern find_lexeme extern peek_lexeme extern expect_token extern unwrap_token extern peek_expect_token extern tokeniser_get_cursor extern tokeniser_set_cursor extern str_to_int global parse_func global parse_args global parse_expr global parse_binary_expr global parse_primary_expr global parse_statement global parse_block global ast_build_symtable global ast_walk_for_each global ast_resolve_var_refs ;; start very simple, with only functions and addition ;; ```rust ;; use super::vec::Vec; ;; ``` ;; start-structs ;; struct Ast { ;; nodes: Vec, ;; } ;; ;; struct AstNode { ;; kind: u8, ;; data: *const (), ;; extra: usize, ;; span: u64, ;; } ;; ;; struct AstArgument { ;; name: *const u8, ;; name_len: usize, ;; arg_type: Type, ;; } ;; ;; struct Type { ;; kind: u8, ;; data: u64, ;; } ;; end-structs ;; rdi: *mut Ast ;; define-fn: fn parse_func(ast: *mut Ast) -> u64 parse_func: push rbp mov rbp, rsp sub rsp, 8 ; span push rdi ; start-structs ; struct AstFunction { ; name: *const u8, ; name_len: usize, ; args: *const u64, ; args_len: usize, ; return_type: Type, ; body: u64, ; } ; end-structs ; span: u64 [64..72] ; ast: *mut Ast [56..64] ; body: u64 [48..56] ; return_type: Type [32..48] ; args_len: usize [24..32] ; args_ptr: *const u64 [16..24] ; name_len: usize [8..16] ; name: *const u8 [0..8] sub rsp, 56 mov qword [rsp + 16], 8 ; <*u64>::dangling() mov qword [rsp + 24], 0 ; args_len call tokeniser_get_cursor mov [rsp + 64], rax ; span mov dil, TOKEN_FN call unwrap_token mov dil, TOKEN_IDENT call unwrap_token mov [rsp], rax ; function name mov [rsp + 8], rdx ; function name length mov dil, TOKEN_LPARENS call unwrap_token mov dil, TOKEN_RPARENS call expect_token test rax, rax je .args .after_args: mov dil, TOKEN_ARROW call unwrap_token mov rdi, [rsp + 56] ; Ast call parse_type mov [rsp + 32], rax ; return_type.kind mov [rsp + 40], rdx ; return_type.data mov dil, TOKEN_LBRACE call peek_expect_token test rax, rax je panic mov rdi, [rsp + 56] ; Ast call parse_block mov [rsp + 48], rax ; body .epilogue: mov rdi, 56 ; size_of:: mov rsi, 8 ; align_of:: call bump_alloc lea rsi, [rsp] ; &AstFunction mov rdi, rax ; destination ptr mov rdx, 56 ; size_of:: call memcpy mov byte [rsp], AST_FUNCTION ; AstNode.kind mov [rsp + 8], rdi ; AstNode.data mov qword [rsp + 16], 0 ; AstNode.extra mov rdi, [rsp + 64] ; span mov [rsp + 24], rdi ; AstNode.span mov rdi, [rsp + 56] ; Ast lea rsi, [rsp] ; &AstNode call vec_push mov rdi, [rsp + 56] ; Ast mov rax, [rdi + 8] ; return Ast.nodes.len() dec rax add rsp, 56 pop rdi add rsp, 8 pop rbp ret .args: mov rdi, [rsp + 56] ; Ast call parse_args mov [rsp + 16], rax ; args_ptr mov [rsp + 24], rdx ; args_len jmp .after_args .panic: call panic ;; rdi: *mut Ast ;; define-fn: fn parse_args(ast: *mut Ast) -> (*const u64, usize) parse_args: push rbp mov rbp, rsp ; span: u64 [80..88] ; vec: [40..80] ; argument: AstArgument { [8..40] ; name: *const u8 [8..16] ; name_len: usize [16..24] ; arg_type: Type [24..40] ; } ; ast [0..8] sub rsp, 88 mov [rsp], rdi ; Ast lea rdi, [rsp + 40] ; vec mov rsi, 8 ; size of u64 (Index) mov rdx, 0 ; drop = None mov rcx, 16 ; capacity call vec_init_with .loop: mov dil, TOKEN_RPARENS call expect_token test rax, rax jnz .done_args call tokeniser_get_cursor mov [rsp + 80], rax ; span mov dil, TOKEN_IDENT call unwrap_token mov [rsp + 8], rax ; AstArgument.name mov [rsp + 16], rdx ; AstArgument.name_len mov dil, TOKEN_COLON call unwrap_token mov rdi, [rsp] ; Ast call parse_type mov [rsp + 24], rax ; AstArgument.arg_type mov [rsp + 32], rdx ; AstArgument.arg_type.data mov rdi, 32 ; size_of:: mov rsi, 8 ; align_of:: call bump_alloc mov rdi, rax lea rsi, [rsp + 8] ; &AstArgument mov rdx, 32 ; size_of:: call memcpy mov qword [rsp + 8], AST_ARG ; AstNode.kind mov [rsp + 16], rdi ; AstNode.data mov qword [rsp + 24], 0 ; AstNode.extra mov rdi, [rsp + 80] ; span mov [rsp + 32], rdi ; AstNode.span mov rdi, [rsp] ; Ast lea rsi, [rsp + 8] ; &AstNode call vec_push mov rdi, [rsp] ; Ast mov rax, [rdi + 8] ; Ast.nodes.len() dec rax lea rdi, [rsp + 40] ; vec mov [rsp + 8], rax ; argument lea rsi, [rsp + 8] ; &argument call vec_push mov dil, TOKEN_COMMA call expect_token test rax, rax jz .end_loop jmp .loop .end_loop: mov dil, TOKEN_RPARENS call unwrap_token .done_args: mov rax, [rsp + 40] ; args_ptr mov rdx, [rsp + 48] ; args_len add rsp, 88 pop rbp ret ;; rdi: lexeme ptr ;; rsi: lexeme len ;; fn parse_number(lexeme: *const u8, lexeme_len: usize) -> u64 parse_number: push rbp mov rbp, rsp push rbx sub rsp, 16 mov [rsp], rdi ; lexeme ptr mov [rsp + 8], rsi ; lexeme len cmp rsi, 2 jbe .dec_radix mov al, byte [rdi] mov bl, byte [rdi + 1] cmp bl, 'x' jne .dec_radix cmp al, '0' je .hex_radix cmp al, 'o' je .oct_radix cmp al, 'b' je .bin_radix jmp .panic ; invalid radix prefix .hex_radix: mov rax, 16 jmp .radix_set .oct_radix: mov rax, 8 jmp .radix_set .bin_radix: mov rax, 2 jmp .radix_set .dec_radix: mov rax, 10 jmp .parse .radix_set: add qword [rsp], 2 sub qword [rsp + 8], 2 .parse: mov rdi, [rsp] ; lexeme ptr mov rsi, [rsp + 8] ; lexeme len mov rdx, rax ; radix call str_to_int add rsp, 16 pop rbx pop rbp ret .panic: call panic ;; rdi: *mut Ast ;; define-fn: fn parse_primary_expr(ast: *mut Ast) -> (u64, bool) parse_primary_expr: push rbp mov rbp, rsp sub rsp, 40 mov [rsp], rdi ; Ast ; start-structs ; struct AstVarRef { ; resolved: u64, ; name: *const u8, ; name_len: usize, ; } ; end-structs call tokeniser_get_cursor mov [rsp + 32], rax ; span mov dil, TOKEN_NUMBER call expect_token test rax, rax jnz .number mov dil, TOKEN_LPARENS call expect_token test rax, rax jnz .paren_expr mov dil, TOKEN_IDENT call expect_token test rax, rax jnz .var_ref jmp .panic .var_ref: mov qword [rsp + 8], -1 ; AstVarRef.resolved mov [rsp + 16], rax ; AstVarRef.name mov [rsp + 24], rdx ; AstVarRef.name_len mov rdi, 24 mov rsi, 8 call bump_alloc mov rdi, rax lea rsi, [rsp + 8] mov rdx, 24 call memcpy mov qword [rsp + 8], AST_VAR_REF ; AstNode.kind mov [rsp + 16], rdi ; AstNode.data mov qword [rsp + 24], 0 ; AstNode.extra ; mov [rsp + 32], span ; AstNode.span mov rdi, [rsp] ; Ast lea rsi, [rsp + 8] ; &AstNode call vec_push mov rdi, [rsp] ; Ast mov rax, [rdi + 8] ; return Ast.nodes.len() dec rax mov rdx, 1 ; placeness = true jmp .epilogue .number: mov rdi, rax ; lexeme ptr mov rsi, rdx ; lexeme len call parse_number mov rdi, [rsp] ; Ast mov byte [rsp + 8], AST_NUMBER ; AstNode.kind mov [rsp + 16], rax ; AstNode.data mov qword [rsp + 24], 0 ; AstNode.extra ; mov [rsp + 32], [rsp + 32] ; AstNode.span lea rsi, [rsp + 8] ; &AstNode call vec_push mov rdi, [rsp] ; Ast mov rax, [rdi + 8] ; return Ast.nodes.len() dec rax mov rdx, 0 ; placeness = false jmp .epilogue .paren_expr: mov rdi, [rsp] ; Ast call parse_expr mov [rsp + 8], rax ; expr mov dil, TOKEN_RPARENS call unwrap_token mov rax, [rsp + 8] ; expr .epilogue: add rsp, 40 pop rbp ret .panic: call panic ;; rdi: *mut Ast ;; sil: precedence ;; define-fn: fn parse_binary_expr(ast: *mut Ast, precedence: u8) -> (u64, bool) parse_binary_expr: push rbp mov rbp, rsp ; size: 24, align: 8 ; start-structs ; struct BinaryExpr { ; left: u64, ; operator: u8, ; right: u64, ; } ; end-structs sub rsp, 64 ; span: u64 [56..64] ; AstNode [32..64] ; lexeme: Lexeme [32..56] ; right: u64 [24..32] ; right_placeness: u8 [20..21] ; left_placeness: u8 [19..20] ; our_precedence: u8 [18..19] ; upper_precedence: u8 [17..18] ; operator: u8 [16..17] ; left: u64 [8..16] ; rdi: *mut Ast [0..8] mov [rsp], rdi ; Ast mov byte [rsp + 17], sil ; upper_precedence mov byte [rsp + 16], 0 call tokeniser_get_cursor mov [rsp + 56], rax ; span call parse_prefix_expr mov [rsp + 8], rax ; left mov [rsp + 19], dl ; left_placeness .loop: lea rdi, [rsp + 32] ; lexeme call peek_lexeme mov rax, [rsp + 32] cmp al, TOKEN_PLUS je .add cmp al, TOKEN_MINUS je .sub cmp al, TOKEN_STAR je .mul cmp al, TOKEN_SLASH je .div jmp .done .add: mov dil, TOKEN_PLUS call unwrap_token mov byte [rsp + 16], TOKEN_PLUS mov byte [rsp + 18], PRECEDENCE_ADD jmp .right .sub: mov dil, TOKEN_MINUS call unwrap_token mov byte [rsp + 16], TOKEN_MINUS mov byte [rsp + 18], PRECEDENCE_SUB jmp .right .mul: mov dil, TOKEN_STAR call unwrap_token mov byte [rsp + 16], TOKEN_STAR mov byte [rsp + 18], PRECEDENCE_MUL jmp .right .div: mov dil, TOKEN_SLASH call unwrap_token mov byte [rsp + 16], TOKEN_SLASH mov byte [rsp + 18], PRECEDENCE_DIV jmp .right .right: mov dil, [rsp + 17] mov al, [rsp + 18] ; our_precedence cmp al, dil ; our_precedence <= upper_precedence jle .done mov rdi, [rsp] ; Ast mov sil, [rsp + 18] call parse_binary_expr mov [rsp + 24], rax ; right mov [rsp + 20], dl ; right_placeness ; convert left and right to values mov rdi, [rsp] ; Ast mov rsi, [rsp + 8] ; left mov dl, [rsp + 19] ; left_placeness call ast_place_to_value mov [rsp + 8], rax ; left mov byte [rsp + 19], 0 ; left_placeness = false mov rdi, [rsp] ; Ast mov rsi, [rsp + 24] ; right mov dl, [rsp + 20] ; right_placeness call ast_place_to_value mov [rsp + 24], rax ; right mov byte [rsp + 20], 0 ; right_placeness = false mov rdi, 24 mov rsi, 8 call bump_alloc mov rdx, [rsp + 8] ; left mov [rax + 0], rdx ; left mov dl, byte [rsp + 16] ; operator mov byte [rax + 8], dl ; operator mov rdx, [rsp + 24] ; right mov [rax + 16], rdx ; right mov byte [rsp + 32], AST_BINARY_OP ; AstNode.kind mov [rsp + 40], rax ; AstNode.data mov qword [rsp + 48], 0 ; AstNode.extra ; mov [rsp + 56], [rsp + 56] ; AstNode.span mov rdi, [rsp] ; Ast lea rsi, [rsp + 32] ; &AstNode call vec_push mov rdi, [rsp] ; Ast mov rax, [rdi + 8] ; Ast.nodes.len() dec rax mov [rsp + 8], rax ; left mov byte [rsp + 19], 0 ; left_placeness = false jmp .loop .done: mov rax, [rsp + 8] ; left movzx rdx, byte [rsp + 19] ; left_placeness add rsp, 64 pop rbp ret ;; rdi: *mut Ast ;; define-fn: fn parse_expr(ast: *mut Ast) -> u64 parse_expr: push rbp mov rbp, rsp sub rsp, 8 mov [rsp], rdi ; Ast mov sil, 0 call parse_assignment_expr add rsp, 8 pop rbp ret ;; rdi: *mut Ast ;; define-fn: fn parse_statement(ast: *mut Ast) -> u64 parse_statement: push rbp mov rbp, rsp ; AstNode [8..40] ; Ast [0..8] sub rsp, 40 mov [rsp], rdi ; Ast call tokeniser_get_cursor mov [rsp + 32], rax ; AstNode.span mov dil, TOKEN_RETURN call expect_token test rax, rax jnz .return mov dil, TOKEN_LET call expect_token test rax, rax jnz .let jmp .panic .let: mov rdi, [rsp] ; Ast call ast_parse_let mov [rsp], rax ; statement jmp .semi .return: mov rdi, [rsp] ; Ast call parse_expr mov byte [rsp + 8], AST_RETURN_STATEMENT ; AstNode.kind mov [rsp + 16], rax ; AstNode.data mov qword [rsp + 24], 0 ; AstNode.extra mov rdi, [rsp] ; Ast lea rsi, [rsp + 8] ; &AstNode call vec_push mov rdi, [rsp] ; Ast mov rax, [rdi + 8] ; Ast.nodes.len() dec rax mov [rsp], rax .semi: mov dil, TOKEN_SEMI call unwrap_token mov rax, [rsp] ; expression add rsp, 40 pop rbp ret .panic: call panic ;; rdi: *mut Ast ;; define-fn: fn parse_block(ast: *mut Ast) -> u64 parse_block: push rbp mov rbp, rsp ; span: u64 [64..72] ; Ast: *mut Ast [56..64] ; statements: Vec [8..56] ; statement: u64 [0..8] sub rsp, 72 mov [rsp + 56], rdi ; Ast call tokeniser_get_cursor mov [rsp + 64], rax ; span mov dil, TOKEN_LBRACE call unwrap_token mov dil, TOKEN_RBRACE call peek_expect_token test rax, rax jnz .done lea rdi, [rsp + 8] mov rsi, 8 ; size of statement mov rdx, 0 ; drop = None mov rcx, 64 ; capacity call vec_init_with .loop: mov dil, TOKEN_RBRACE call peek_expect_token test rax, rax jnz .done ; skip semicolons mov dil, TOKEN_SEMI call expect_token test rax, rax jnz .loop mov rdi, [rsp + 56] ; Ast call parse_statement lea rdi, [rsp + 8] ; vec mov [rsp], rax ; statement lea rsi, [rsp] call vec_push jmp .loop .done: mov rdi, [rsp + 56] ; Ast mov qword [rsp], AST_BLOCK ; AstNode.kind mov rsi, [rsp + 64] ; span mov [rsp + 24], rsi ; AstNode.span lea rsi, [rsp] ; &AstNode call vec_push mov rdi, [rsp + 56] ; Ast mov rax, [rdi + 8] ; Ast.nodes.len() dec rax add rsp, 72 pop rbp ret ;; rdi: *mut Ast ;; define-fn: fn parse_type(ast: *mut Ast) -> Type parse_type: push rbp mov rbp, rsp sub rsp, 32 mov [rsp], rdi ; Ast lea rdi, [rsp + 8] call find_lexeme ; TODO: use peek here to allow failing gracefully xor rdx, rdx mov rax, [rsp + 8] ; token kind cmp al, TOKEN_I32 je .i32_type cmp al, TOKEN_U32 je .u32_type cmp al, TOKEN_VOID je .void_type cmp al, TOKEN_BOOL je .bool_type cmp al, TOKEN_STAR je .pointer_type jmp .panic .i32_type: mov rax, TYPE_I32 jmp .epilogue .u32_type: mov rax, TYPE_U32 jmp .epilogue .void_type: mov rax, TYPE_VOID jmp .epilogue .bool_type: mov rax, TYPE_BOOL jmp .epilogue .pointer_type: mov rdi, [rsp] ; Ast call parse_type mov [rsp + 8], rax ; Type.kind mov [rsp + 16], rdx ; Type.data mov rdi, 16 ; size_of:: mov rsi, 8 ; align_of:: call bump_alloc mov rdi, rax lea rsi, [rsp + 8] mov rdx, 16 ; size_of:: call memcpy mov rax, TYPE_POINTER mov rdx, rdi jmp .epilogue .epilogue: add rsp, 32 pop rbp ret .panic: call panic ;; rdi: *mut Ast ;; define-fn: fn parse_prefix_expr(ast: *mut Ast) -> (u64, bool) parse_prefix_expr: push rbp mov rbp, rsp ; AstNode [8..40] ; ast [0..8] sub rsp, 40 mov [rsp], rdi ; Ast call tokeniser_get_cursor mov [rsp + 32], rax ; AstNode.span mov dil, TOKEN_STAR call expect_token test rax, rax jnz .dereference mov dil, TOKEN_AMP call expect_token test rax, rax jnz .address_of mov rdi, [rsp] ; Ast call parse_primary_expr jmp .done .dereference: mov rdi, [rsp] ; Ast call parse_prefix_expr mov qword [rsp + 8], AST_DEREF ; AstNode.kind mov [rsp + 16], rax ; AstNode.data mov qword [rsp + 24], 0 ; AstNode.extra mov rdi, [rsp] ; Ast lea rsi, [rsp + 8] ; &AstNode call vec_push mov rdi, [rsp] ; Ast mov rax, [rdi + 8] ; Ast.nodes.len() dec rax mov rdx, 1 ; placeness = true jmp .done .address_of: ; address-of must be applied to a place ; so we convert the inner expression to a place first mov rdi, [rsp] ; Ast call parse_prefix_expr mov rdi, [rsp] ; Ast mov rsi, rax ; expr ; mov rdx, rdx ; placeness call ast_value_to_place mov qword [rsp + 8], AST_ADDRESS_OF ; AstNode.kind mov [rsp + 16], rax ; AstNode.data mov qword [rsp + 24], 0 ; AstNode.extra mov rdi, [rsp] ; Ast lea rsi, [rsp + 8] ; &AstNode call vec_push mov rdi, [rsp] ; Ast mov rax, [rdi + 8] ; Ast.nodes.len() dec rax xor rdx, rdx ; placeness = false jmp .done .done: add rsp, 40 pop rbp ret ;; rdi: *mut Ast ;; define-fn: fn parse_assignment(ast: *mut Ast) -> (u64, bool) parse_assignment_expr: push rbp mov rbp, rsp ; span: u64 [32..40] ; source [24..32] ; dest [16..24] ; dest_placeness [8..9] ; ast [0..8] sub rsp, 40 mov [rsp], rdi ; Ast call tokeniser_get_cursor mov [rsp + 32], rax ; span mov rdi, [rsp] ; Ast call parse_binary_expr mov [rsp + 16], rax ; dest mov [rsp + 8], dl ; placeness mov dil, TOKEN_EQUALS call expect_token test rax, rax jnz .assignment jmp .done .assignment: mov rdi, [rsp] ; Ast mov rsi, [rsp + 16] ; dest movzx rdx, byte [rsp + 8] ; placeness call ast_value_to_place mov [rsp + 16], rax ; dest mov rdi, [rsp] ; Ast call parse_expr mov rdi, [rsp] ; Ast mov rsi, rax ; expr ; mov rdx, rdx ; placeness call ast_place_to_value mov [rsp + 24], rax ; source mov qword [rsp + 8], AST_ASSIGNMENT ; AstNode.kind mov rdi, [rsp] ; Ast lea rsi, [rsp + 8] ; &AstNode call vec_push mov rdi, [rsp] ; Ast mov rax, [rdi + 8] ; Ast.nodes.len() dec rax mov [rsp + 16], rax ; dest mov byte [rsp + 8], 0 ; placeness = false .done: mov rax, [rsp + 16] ; dest movzx rdx, byte [rsp + 8] ; placeness add rsp, 40 pop rbp ret ;; rdi: *mut Ast ;; define-fn: fn ast_parse_let(ast: *mut Ast) -> (u64, bool) ast_parse_let: push rbp mov rbp, rsp ; start-structs ; struct AstVarDecl { ; name: *const u8, ; name_len: usize, ; var_type: Type, ; } ; ; struct AstAssignment { ; variable: u64, ; expr: u64, ; } ; end-structs ; expr: u64 [48..56] ; *AstVarDecl [40..48] ; AstNode [8..40] ; AstVarDecl [8..40] ; Ast [0..8] sub rsp, 56 mov [rsp], rdi ; Ast call tokeniser_get_cursor mov [rsp + 40], rax ; span ; skipped in parse_statement ; mov dil, TOKEN_LET ; call unwrap_token mov dil, TOKEN_IDENT call unwrap_token mov [rsp + 8], rax ; AstVarDecl.name mov [rsp + 16], rdx ; AstVarDecl.name_len mov dil, TOKEN_COLON call unwrap_token mov rdi, [rsp] ; Ast call parse_type mov [rsp + 24], rax ; AstVarDecl.var_type.kind mov [rsp + 32], rdx ; AstVarDecl.var_type.data mov rdi, 32 ; size_of:: mov rsi, 8 ; align_of:: call bump_alloc mov rdi, rax ; AstVarDecl ptr lea rsi, [rsp + 8] ; &AstVarDecl mov rdx, 32 ; size_of:: call memcpy mov [rsp + 40], rdi ; AstVarDecl ptr ; parse the expression mov dil, TOKEN_EQUALS call unwrap_token mov rdi, [rsp] ; Ast call parse_expr mov rdi, [rsp] ; Ast mov rsi, rax ; expr ; mov rdx, rdx ; placeness call ast_place_to_value mov [rsp + 48], rax ; expr index ; variable is defined at this point so that the expression cannot reference it call tokeniser_get_cursor mov rdi, [rsp + 40] ; AstVarDecl ptr mov qword [rsp + 8], AST_VAR_DECL ; AstNode.kind mov [rsp + 16], rdi ; AstNode.data mov qword [rsp + 24], 0 ; AstNode.extra mov [rsp + 32], rax ; AstNode.span mov rdi, [rsp] ; Ast lea rsi, [rsp + 8] ; &AstNode call vec_push ; variables are places mov rdi, [rsp] ; Ast mov rax, [rdi + 8] ; Ast.nodes.len() dec rax mov qword [rsp + 8], AST_ASSIGNMENT ; AstNode.kind mov [rsp + 16], rax ; AstNode.data (variable index) mov rax, [rsp + 48] ; expr index mov [rsp + 24], rax ; AstNode.extra (expr index) ; reuse span from variable declaration mov rdi, [rsp] ; Ast lea rsi, [rsp + 8] ; &AstNode call vec_push mov rdi, [rsp] ; Ast mov rax, [rdi + 8] ; Ast.nodes.len() dec rax add rsp, 56 xor rdx, rdx ; placeness = false pop rbp ret ;; rdi: *mut Ast ;; rsi: index of node ;; rdx: is_placeness ;; fn ast_value_to_place(ast: *mut Ast, node_index: u64, is_placeness: bool) -> u64 ast_value_to_place: push rbp mov rbp, rsp cmp dl, 1 mov rax, rsi je .done ; create new AST node sub rsp, 40 mov [rsp], rdi ; Ast mov [rsp + 8], rsi ; node_index ; load original node's span call vec_get mov rdi, [rax + 24] ; AstNode.span mov [rsp + 32], rdi ; AstNode.span mov rsi, [rsp + 8] ; node_index mov byte [rsp + 8], AST_VALUE_TO_PLACE ; AstNode.kind mov [rsp + 16], rsi ; AstNode.data mov qword [rsp + 24], 0 ; AstNode.extra lea rsi, [rsp + 8] ; &AstNode mov rdi, [rsp] ; Ast call vec_push mov rdi, [rsp] ; Ast mov rax, [rdi + 8] ; Ast.nodes.len() dec rax add rsp, 40 .done: pop rbp ret ;; rdi: *mut Ast ;; rsi: index of node ;; rdx: is_placeness ;; fn ast_place_to_value(ast: *mut Ast, node_index: u64, is_placeness: bool) -> u64 ast_place_to_value: push rbp mov rbp, rsp cmp dl, 0 mov rax, rsi je .done ; create new AST node sub rsp, 40 mov [rsp], rdi ; Ast mov [rsp + 8], rsi ; node_index ; load original node's span call vec_get mov rdi, [rax + 24] ; AstNode.span mov [rsp + 32], rdi ; AstNode.span mov rsi, [rsp + 8] ; node_index mov byte [rsp + 8], AST_PLACE_TO_VALUE ; AstNode.kind mov [rsp + 16], rsi ; AstNode.data mov qword [rsp + 24], 0 ; AstNode.extra lea rsi, [rsp + 8] ; &AstNode mov rdi, [rsp] ; Ast call vec_push mov rdi, [rsp] ; Ast mov rax, [rdi + 8] ; Ast.nodes.len() dec rax add rsp, 40 .done: pop rbp ret ;; rdi: ctx ;; rsi: a: *const SymKey ;; rdx: b: *const SymKey ;; define-fn: fn symkey_cmp(a: *const SymKey, b: *const SymKey) -> i32 symkey_cmp: push rbp mov rbp, rsp push rbx sub rsp, 16 mov [rsp], rsi mov [rsp + 8], rdx mov al, byte [rsi] ; a.kind mov bl, byte [rdx] ; b.kind cmp al, bl jl .a_less jg .a_greater mov rax, [rsi + 8] ; a.scope_index mov rbx, [rdx + 8] ; b.scope_index cmp rax, rbx jl .a_less jg .a_greater mov rdi, [rsi + 24] ; a.ident mov rsi, [rsi + 32] ; a.ident_len mov rcx, [rdx + 32] ; b.ident_len mov rdx, [rdx + 24] ; b.ident call strcmp cmp rax, 0 jl .a_less jg .a_greater mov rsi, [rsp] mov rdx, [rsp + 8] mov rax, [rsi + 16] ; a.span mov rbx, [rdx + 16] ; b.span cmp rax, rbx jl .a_less jg .a_greater xor rax, rax jmp .epilogue .a_less: mov rax, -1 jmp .epilogue .a_greater: mov rax, 1 .epilogue: add rsp, 16 pop rbx pop rbp ret section .rdata ;; start-consts SYM_KEY_SCOPE equ 1 ; :u8 SYM_KEY_SCOPE_NAME equ 2 ; :u8 SYM_KEY_PARENT_SCOPE equ 3 ; :u8 SYM_KEY_START_LOCALS equ 4 ; :u8 SYM_KEY_ARG equ 5 ; :u8 SYM_KEY_VAR equ 6 ; :u8 SYM_KEY_END_LOCALS equ 7 ; :u8 ;; end-consts section .text ;; rdi: Ast ;; rsi: root index ;; rdx: *SymbolTable ;; define-fn: fn ast_build_symtable(ast: *mut Ast, root_index: u64, symtable: *mut core::mem::MaybeUninit) ast_build_symtable: push rbp mov rbp, rsp ; BuildSymtableCtx [24..64] ; *SymbolTable [16..24] ; root_index [8..16] ; Ast [0..8] sub rsp, 64 mov [rsp], rdi ; Ast mov [rsp + 8], rsi ; root_index mov [rsp + 16], rdx ; *SymbolTable ; initialise scope_stack and symtable vecs lea rdi, [rsp + 24] ; &BuildSymtableCtx.symtable mov rsi, 56 ; size_of:: mov rdx, 0 ; drop = None mov rcx, 128 ; capacity call vec_init_with mov rdi, [rsp] ; Ast mov rsi, [rsp + 8] ; root_index lea rdx, [rsp + 24] ; &BuildSymtableCtx mov rcx, ast_build_symtable_for_each call ast_walk_for_each ; memcpy symtable out mov rdi, [rsp + 16] ; *SymbolTable lea rsi, [rsp + 24] ; &BuildSymtableCtx.symtable mov rdx, 40 ; size_of::> call memcpy add rsp, 64 pop rbp ret ;; symtable is a sorted vec pretending to be a b-tree: ;; entries are sorted by a key in order to get the following ordering: ;; scope (index0) -> (ident0) ;; scope (index1) -> (ident1) ;; scope (index2) -> (ident2) ;; scope-name (ident1) -> (index1) ;; scope-name (ident1) -> (index1) ;; parent-scope (scope1) -> (index0) ;; arg (scope1, span, ident) -> (index) ;; var (scope1, span, ident) -> (index) ;; var (scope1, span, ident) -> (index) ;; arg (scope0, span, ident) -> (index) ;; var (scope0, span, ident) -> (index) ;; var (scope0, span, ident) -> (index) ;; ;; arguments are ordered before variables in order to allow shadowing of variables by arguments. ;; variables are ordered by span in order to allow shadowing of variables by variables. ;; all references within a scope are in the range parent-scope(scopeN)..var ;; (scopeN, u64::MAX, u64::MAX) ;; ;; the symtable contains `SymEntries`, which hold a `SymKey` and an index into the AST node list. ;; for scope entries, the index holds the pointer to the scope's ident, ;; and `extra` holds the length; for other keys, `extra` is 0. ;; ;; start-structs ;; struct SymbolTable { ;; symtable: Vec, ;; } ;; struct SymKey { ;; kind: u8, ;; scope_index: u64, ;; span: u64, ;; ident: *const u8, ;; ident_len: usize, ;; } ;; struct SymEntry { ;; key: SymKey, ;; index: u64, ;; extra: u64, ;; } ;; end-structs ;; size_of:: == 40 ;; size_of:: == 56 ;; ;; #start-structs ;; struct BuildSymtableCtx { ;; symtable: Vec, ;; } ;; #end-structs ;; ;; scope_stack [0..40] ;; symtable [40..80] ;; ;; rdi: Ctx ;; rsi: Ast ;; rdx: index ;; rcx: scope ast_build_symtable_for_each: push rbp mov rbp, rsp push rbx ; scope: u64 [88..96] ; SymEntry [32..88] ; SymKey [32..72] ; *AstNode [24..32] ; index [16..24] ; ctx [8..16] ; ast [0..8] sub rsp, 96 mov [rsp], rsi ; Ast mov [rsp + 8], rdi ; Ctx mov [rsp + 16], rdx ; index mov [rsp + 88], rcx ; scope mov rdi, rsi ; Ast mov rsi, rdx ; index call vec_get mov [rsp + 24], rax ; *AstNode mov bl, byte [rax] ; AstNode.kind cmp bl, AST_FUNCTION je .func cmp bl, AST_VAR_DECL je .var_decl cmp bl, AST_ARG je .arg jmp .done .func: ; insert scope entry mov byte [rsp + 32], SYM_KEY_SCOPE ; SymKey.kind mov rdx, [rsp + 16] ; index mov qword [rsp + 40], rdx ; SymKey.scope_index mov rdx, [rax + 24] ; AstNode.span mov qword [rsp + 48], rdx ; SymKey.span mov qword [rsp + 56], 1 ; SymKey.ident mov qword [rsp + 64], 0 ; SymKey.ident_len mov rbx, [rax + 8] ; AstNode.data mov rdx, [rbx + 0] ; Func.name mov rcx, [rbx + 8] ; Func.name_len mov [rsp + 72], rdx ; SymEntry.index mov [rsp + 80], rcx ; SymEntry.extra mov rdi, [rsp + 8] ; Ctx.symtable lea rsi, [rsp + 32] ; &SymEntry mov rcx, 0 ; cmp_ctx mov rdx, symkey_cmp ; cmp call vec_insert_sorted ; add parent scope mov byte [rsp + 32], SYM_KEY_PARENT_SCOPE ; SymKey.kind mov rdx, [rsp + 16] ; index mov qword [rsp + 40], rdx ; SymKey.scope_index mov qword [rsp + 48], 0 ; SymKey.span mov qword [rsp + 56], 1 ; SymKey.ident mov qword [rsp + 64], 0 ; SymKey.ident_len mov rdx, [rsp + 88] ; parent scope mov [rsp + 72], rdx ; SymEntry.index mov qword [rsp + 80], 0 ; SymEntry.extra mov rdi, [rsp + 8] ; *Ctx lea rsi, [rsp + 32] ; &SymEntry mov rcx, 0 ; cmp_ctx mov rdx, symkey_cmp ; cmp call vec_insert_sorted jmp .done .var_decl: ; insert variable entry mov byte [rsp + 32], SYM_KEY_VAR ; SymKey.kind mov rdx, [rax + 24] ; AstNode.span mov qword [rsp + 48], rdx ; SymKey.span mov rbx, [rsp + 24] ; AstNode.data mov rbx, [rbx + 8] ; AstNode.data mov rdx, [rbx + 0] ; AstVarDecl.name mov rcx, [rbx + 8] ; AstVarDecl.name_len mov [rsp + 56], rdx ; SymKey.ident mov [rsp + 64], rcx ; SymKey.ident_len mov rdx, [rsp + 16] ; index mov [rsp + 72], rdx ; SymEntry.index mov qword [rsp + 80], 0 ; SymEntry.extra mov rdx, [rsp + 88] ; current scope mov [rsp + 40], rdx ; SymKey.scope_index = default mov rdi, [rsp + 8] ; *Ctx lea rsi, [rsp + 32] ; &SymEntry mov rcx, 0 ; cmp_ctx mov rdx, symkey_cmp ; cmp call vec_insert_sorted jmp .done .arg: ; insert variable entry mov byte [rsp + 32], SYM_KEY_ARG ; SymKey.kind mov rdx, [rax + 24] ; AstNode.span mov qword [rsp + 48], rdx ; SymKey.span mov rbx, [rsp + 24] ; *AstNode mov rbx, [rbx + 8] ; AstNode.data mov rdx, [rbx + 0] ; AstArgument.name mov rcx, [rbx + 8] ; AstArgument.name_len mov [rsp + 56], rdx ; SymKey.ident mov [rsp + 64], rcx ; SymKey.ident_len mov rdx, [rsp + 16] ; index mov [rsp + 72], rdx ; SymEntry.index mov qword [rsp + 80], 0 ; SymEntry.extra mov rdx, [rsp + 88] ; current scope mov [rsp + 40], rdx ; SymKey.scope_index = default mov rdi, [rsp + 8] ; *Ctx lea rsi, [rsp + 32] ; &SymEntry mov rcx, 0 ; cmp_ctx mov rdx, symkey_cmp ; cmp call vec_insert_sorted jmp .done .done: add rsp, 96 pop rbx pop rbp ret ;; rdi: Ast ;; rsi: start_index ;; rdx: ctx ;; rcx: for_each ;; define-fn: fn ast_walk_for_each(ast: *mut Ast, start_index: u64, ctx: *mut (), for_each: unsafe extern "C" fn(ctx: *mut (), *mut Ast, node_index: u64, scope: u64)) ast_walk_for_each: push rbp push r15 push r14 push rbx ; INVALID_SCOPE [48..56] ; current_index [40..48] ; *current_scope [32..40] ; current_node_ptr [24..32] ; for_each [16..24] ; ctx [8..16] ; ast [0..8] sub rsp, 56 mov [rsp], rdi ; Ast mov [rsp + 8], rdx ; ctx mov [rsp + 16], rcx ; for_each mov qword [rsp + 24], 0 ; current_node_ptr lea rdi, [rsp + 48] ; &INVALID_SCOPE mov [rsp + 32], rdi ; current_scope mov [rsp + 40], rsi ; current_index mov qword [rsp + 48], -1 ; INVALID_SCOPE mov rbp, rsp push rsi ; `current_scope` points to the index of the current scope on the stack. ; When we enter a new scope, we push `current_scope` onto the stack, then ; update it to point to the new scope index. ; When `rsp` is equal to `current_scope`, we need to additionally pop into ; `current_scope` after popping the current index. .loop: cmp rsp, rbp jge .done ; call for_each(ctx, ast, current_index) mov rdi, [rbp + 8] ; ctx mov rsi, [rbp] ; Ast mov rdx, [rsp] ; current_index mov rcx, [rbp + 32] ; current_scope mov rcx, [rcx] ; current_scope value mov rax, [rbp + 16] ; for_each ; align stack to 16 bytes before call mov rbx, rsp sub rsp, 8 and rsp, -16 mov [rsp], rbx call rax pop rsp ; get current_node_ptr mov rdi, [rbp] ; Ast pop rsi ; current_index mov [rbp + 40], rsi ; update current_index call vec_get mov [rbp + 24], rax ; current_node_ptr mov bl, byte [rax] ; AstNode.kind cmp bl, AST_FUNCTION je .func cmp bl, AST_BLOCK je .block cmp bl, AST_BINARY_OP je .binary_op cmp bl, AST_ASSIGNMENT je .assignment cmp bl, AST_VALUE_TO_PLACE je .value_to_place cmp bl, AST_PLACE_TO_VALUE je .place_to_value cmp bl, AST_DEREF je .deref cmp bl, AST_ADDRESS_OF je .address_of cmp bl, AST_RETURN_STATEMENT je .return_statement jmp .check_scope .func: ; push scope push qword [rbp + 32] ; scope-ptr push qword [rbp + 40] ; current_index mov [rbp + 32], rsp ; update current_scope ; push child indices to stack mov rbx, [rax + 8] ; AstNode.data mov r15, [rbx + 48] ; AstFunction.body push r15 ; push body index mov r15, [rbx + 24] ; AstFunction.args_len xor r14, r14 ; index .arg_loop: cmp r14, r15 jge .arg_loop_done mov rdx, [rbx + 16] ; AstFunction.args lea rdx, [rdx + r14*8] push qword [rdx] ; push arg index inc r14 jmp .arg_loop .arg_loop_done: jmp .check_scope .block: mov rbx, [rax + 8] ; AstNode.data mov r15, [rax + 16] ; AstNode.extra .stmt_loop: cmp r15, 0 jle .stmt_loop_done dec r15 mov rdx, [rbx + r15*8] ; statement index push rdx ; push statement index jmp .stmt_loop .stmt_loop_done: jmp .check_scope .binary_op: mov rbx, [rax + 8] ; AstNode.data mov rdx, [rbx + 16] ; right index push rdx ; push right index mov rdx, [rbx + 0] ; left index push rdx ; push left index jmp .check_scope .assignment: mov rbx, [rax + 8] ; AstNode.data = dest mov rdx, [rax + 16] ; AstNode.extra = source push rdx ; push source index push rbx ; push dest index jmp .check_scope .value_to_place: .place_to_value: .deref: .address_of: .return_statement: mov rbx, [rax + 8] ; AstNode.data push rbx ; push inner expr index jmp .check_scope .check_scope: cmp rsp, [rbp + 32] ; current_scope je .pop_scope jmp .loop .pop_scope: ; pop current_scope ; the stack may look something like this: ; current_scope---+ ; == stack == ^ | points here ; scope-ptr[0]-+ | ; scope: func0 <-----+ ; stmt0 | | ; stmt1 | | ; scope-ptr[1] ------+ ; scope: block0 <-+ ; stmt2 ; stmt3 ; ... pop rax ; scope pop rax ; scope_ptr mov [rbp + 32], rax ; update current_scope pointer jmp .check_scope .done: add rsp, 56 pop rbx pop r14 pop r15 pop rbp ret ;; rdi: *mut SymbolTable ;; rsi: *mut Ast ;; rdx: node_index ;; rcx: scope ast_resolve_var_refs_for_each: push rbp mov rbp, rsp push rbx ; lower_bound [88..96] ; scope: u64 [80..88] ; SymEntry [24..80] ; *AstNode [16..24] ; *BuildSymtableCtx [8..16] ; *Ast [0..8] sub rsp, 96 mov [rsp], rsi ; Ast mov [rsp + 8], rdi ; Ctx mov [rsp + 80], rcx ; SymKey.scope_index mov rdi, rsi ; Ast mov rsi, rdx ; node_index call vec_get mov [rsp + 16], rax ; *AstNode mov bl, byte [rax] ; AstNode.kind cmp bl, AST_VAR_REF jne .epilogue ; lookup variable in symbol table ; binary search lower bound mov byte [rsp + 24 + 0], SYM_KEY_START_LOCALS ; SymKey.kind mov qword [rsp + 24 + 8], 0 ; SymKey.scope_index mov qword [rsp + 24 + 16], 0 ; SymKey.span mov qword [rsp + 24 + 24], 1 ; SymKey.name mov qword [rsp + 24 + 32], 0 ; SymKey.name_len ; binary search in symbol table mov rdi, [rsp + 8] ; *Ctx lea rsi, [rsp + 24] ; &SymKey mov rdx, symkey_cmp ; cmp mov rcx, 0 ; cmp_ctx call vec_binary_search_by mov [rsp + 88], rax ; lower_bound ; construct key mov byte [rsp + 24 + 0], SYM_KEY_VAR ; SymKey.kind mov rax, [rsp + 80] ; scope mov [rsp + 24 + 8], rax ; SymKey.scope_index mov rax, [rsp + 16] ; *AstNode mov rbx, [rax + 24] ; AstNode.span mov [rsp + 24 + 16], rbx ; SymKey.span mov rbx, [rax + 8] ; AstNode.data mov rax, [rbx + 8] ; AstVarRef.name mov rbx, [rbx + 16] ; AstVarRef.name_len mov [rsp + 24 + 24], rax ; SymKey.ident mov [rsp + 24 + 32], rbx ; SymKey.ident_len ; binary search in symbol table mov rdi, [rsp + 8] ; *Ctx lea rsi, [rsp + 24] ; &SymKey mov rdx, symkey_cmp ; cmp mov rcx, 0 ; cmp_ctx call vec_binary_search_by test rdx, rdx jz .fixup dec rax .fixup: cmp rax, [rsp + 88] ; lower_bound jl .panic mov rdi, [rsp + 8] ; *Ctx mov rsi, rax ; index call vec_get mov rax, [rax + 40] ; SymEntry.index mov rdx, [rsp + 16] ; *AstNode mov rdx, [rdx + 8] ; AstNode.data mov [rdx + 0], rax ; AstVarRef.resolved_index .epilogue: add rsp, 96 pop rbx pop rbp ret .panic: call panic ;; rdi: Ast ;; rsi: *mut SymbolTable ;; rdx: root_index ;; define-fn: fn ast_resolve_var_refs(ast: *mut Ast, ctx: *mut SymbolTable, root_index: u64) ast_resolve_var_refs: push rbp mov rbp, rsp xchg rsi, rdx mov rcx, ast_resolve_var_refs_for_each call ast_walk_for_each .epilogue: pop rbp ret