ast node: span

This commit is contained in:
janis 2025-10-30 22:54:14 +01:00
parent 56354237c6
commit adb30e983c
Signed by: janis
SSH key fingerprint: SHA256:bB1qbbqmDXZNT0KKD5c2Dfjg53JGhj7B3CFcLIzSqq8
3 changed files with 142 additions and 76 deletions

View file

@ -82,6 +82,7 @@ global ast_walk_for_each
;; kind: u8, ;; kind: u8,
;; data: *const (), ;; data: *const (),
;; extra: usize, ;; extra: usize,
;; span: u64,
;; } ;; }
;; ;;
;; struct AstArgument { ;; struct AstArgument {
@ -101,6 +102,7 @@ global ast_walk_for_each
parse_func: parse_func:
push rbp push rbp
mov rbp, rsp mov rbp, rsp
sub rsp, 8 ; span
push rdi push rdi
; start-structs ; start-structs
@ -114,17 +116,21 @@ parse_func:
; } ; }
; end-structs ; end-structs
sub rsp, 56 ; span: u64 [64..72]
; name: *const u8 [0..8]
; name_len: usize [8..16]
; args_ptr: *const u64 [16..24]
; args_len: usize [24..32]
; return_type: Type [32..48]
; body: u64 [48..56]
; ast: *mut Ast [56..64] ; ast: *mut Ast [56..64]
; body: u64 [48..56]
; return_type: Type [32..48]
; args_len: usize [24..32]
; args_ptr: *const u64 [16..24]
; name_len: usize [8..16]
; name: *const u8 [0..8]
sub rsp, 56
mov qword [rsp + 16], 8 ; <*u64>::dangling() mov qword [rsp + 16], 8 ; <*u64>::dangling()
mov qword [rsp + 24], 0 ; args_len mov qword [rsp + 24], 0 ; args_len
call tokeniser_get_cursor
mov [rsp + 64], rax ; span
mov dil, TOKEN_FN mov dil, TOKEN_FN
call unwrap_token call unwrap_token
mov dil, TOKEN_IDENT mov dil, TOKEN_IDENT
@ -161,9 +167,11 @@ parse_func:
mov rdx, 56 ; size_of::<AstFunction> mov rdx, 56 ; size_of::<AstFunction>
call memcpy call memcpy
mov byte [rsp], AST_FUNCTION ; kind mov byte [rsp], AST_FUNCTION ; AstNode.kind
mov [rsp + 8], rdi ; data mov [rsp + 8], rdi ; AstNode.data
mov qword [rsp + 16], 0 ; extra mov qword [rsp + 16], 0 ; AstNode.extra
mov rdi, [rsp + 64] ; span
mov [rsp + 24], rdi ; AstNode.span
mov rdi, [rsp + 56] ; Ast mov rdi, [rsp + 56] ; Ast
lea rsi, [rsp] ; &AstNode lea rsi, [rsp] ; &AstNode
call vec_push call vec_push
@ -172,6 +180,7 @@ parse_func:
dec rax dec rax
add rsp, 56 add rsp, 56
pop rdi pop rdi
add rsp, 8
pop rbp pop rbp
ret ret
@ -189,6 +198,7 @@ parse_func:
parse_args: parse_args:
push rbp push rbp
mov rbp, rsp mov rbp, rsp
; span: u64 [80..88]
; vec: [40..80] ; vec: [40..80]
; argument: AstArgument { [8..40] ; argument: AstArgument { [8..40]
; name: *const u8 [8..16] ; name: *const u8 [8..16]
@ -196,7 +206,7 @@ parse_args:
; arg_type: Type [24..40] ; arg_type: Type [24..40]
; } ; }
; ast [0..8] ; ast [0..8]
sub rsp, 80 sub rsp, 88
mov [rsp], rdi ; Ast mov [rsp], rdi ; Ast
lea rdi, [rsp + 40] ; vec lea rdi, [rsp + 40] ; vec
@ -210,6 +220,10 @@ parse_args:
call expect_token call expect_token
test rax, rax test rax, rax
jnz .done_args jnz .done_args
call tokeniser_get_cursor
mov [rsp + 80], rax ; span
mov dil, TOKEN_IDENT mov dil, TOKEN_IDENT
call unwrap_token call unwrap_token
mov [rsp + 8], rax ; AstArgument.name mov [rsp + 8], rax ; AstArgument.name
@ -232,6 +246,8 @@ parse_args:
mov qword [rsp + 8], AST_ARG ; AstNode.kind mov qword [rsp + 8], AST_ARG ; AstNode.kind
mov [rsp + 16], rdi ; AstNode.data mov [rsp + 16], rdi ; AstNode.data
mov qword [rsp + 24], 0 ; AstNode.extra mov qword [rsp + 24], 0 ; AstNode.extra
mov rdi, [rsp + 80] ; span
mov [rsp + 32], rdi ; AstNode.span
mov rdi, [rsp] ; Ast mov rdi, [rsp] ; Ast
lea rsi, [rsp + 8] ; &AstNode lea rsi, [rsp + 8] ; &AstNode
@ -256,7 +272,7 @@ parse_args:
.done_args: .done_args:
mov rax, [rsp + 40] ; args_ptr mov rax, [rsp + 40] ; args_ptr
mov rdx, [rsp + 48] ; args_len mov rdx, [rsp + 48] ; args_len
add rsp, 80 add rsp, 88
pop rbp pop rbp
ret ret
@ -317,7 +333,7 @@ parse_number:
parse_primary_expr: parse_primary_expr:
push rbp push rbp
mov rbp, rsp mov rbp, rsp
sub rsp, 32 sub rsp, 40
mov [rsp], rdi ; Ast mov [rsp], rdi ; Ast
; start-structs ; start-structs
@ -328,6 +344,9 @@ parse_primary_expr:
; } ; }
; end-structs ; end-structs
call tokeniser_get_cursor
mov [rsp + 32], rax ; span
mov dil, TOKEN_NUMBER mov dil, TOKEN_NUMBER
call expect_token call expect_token
test rax, rax test rax, rax
@ -357,6 +376,7 @@ parse_primary_expr:
mov qword [rsp + 8], AST_VAR_REF ; AstNode.kind mov qword [rsp + 8], AST_VAR_REF ; AstNode.kind
mov [rsp + 16], rdi ; AstNode.data mov [rsp + 16], rdi ; AstNode.data
mov qword [rsp + 24], 0 ; AstNode.extra mov qword [rsp + 24], 0 ; AstNode.extra
; mov [rsp + 32], span ; AstNode.span
mov rdi, [rsp] ; Ast mov rdi, [rsp] ; Ast
lea rsi, [rsp + 8] ; &AstNode lea rsi, [rsp + 8] ; &AstNode
call vec_push call vec_push
@ -370,9 +390,10 @@ parse_primary_expr:
mov rsi, rdx ; lexeme len mov rsi, rdx ; lexeme len
call parse_number call parse_number
mov rdi, [rsp] ; Ast mov rdi, [rsp] ; Ast
mov byte [rsp + 8], AST_NUMBER ; kind mov byte [rsp + 8], AST_NUMBER ; AstNode.kind
mov [rsp + 16], rax ; data mov [rsp + 16], rax ; AstNode.data
mov qword [rsp + 24], 0 ; extra mov qword [rsp + 24], 0 ; AstNode.extra
; mov [rsp + 32], [rsp + 32] ; AstNode.span
lea rsi, [rsp + 8] ; &AstNode lea rsi, [rsp + 8] ; &AstNode
call vec_push call vec_push
mov rdi, [rsp] ; Ast mov rdi, [rsp] ; Ast
@ -388,7 +409,7 @@ parse_primary_expr:
call unwrap_token call unwrap_token
mov rax, [rsp + 8] ; expr mov rax, [rsp + 8] ; expr
.epilogue: .epilogue:
add rsp, 32 add rsp, 40
pop rbp pop rbp
ret ret
.panic: .panic:
@ -412,6 +433,8 @@ parse_binary_expr:
; end-structs ; end-structs
sub rsp, 64 sub rsp, 64
; span: u64 [56..64]
; AstNode [32..64]
; lexeme: Lexeme [32..56] ; lexeme: Lexeme [32..56]
; right: u64 [24..32] ; right: u64 [24..32]
; right_placeness: u8 [20..21] ; right_placeness: u8 [20..21]
@ -426,6 +449,9 @@ parse_binary_expr:
mov byte [rsp + 17], sil ; upper_precedence mov byte [rsp + 17], sil ; upper_precedence
mov byte [rsp + 16], 0 mov byte [rsp + 16], 0
call tokeniser_get_cursor
mov [rsp + 56], rax ; span
call parse_prefix_expr call parse_prefix_expr
mov [rsp + 8], rax ; left mov [rsp + 8], rax ; left
mov [rsp + 19], dl ; left_placeness mov [rsp + 19], dl ; left_placeness
@ -510,6 +536,7 @@ parse_binary_expr:
mov byte [rsp + 32], AST_BINARY_OP ; AstNode.kind mov byte [rsp + 32], AST_BINARY_OP ; AstNode.kind
mov [rsp + 40], rax ; AstNode.data mov [rsp + 40], rax ; AstNode.data
mov qword [rsp + 48], 0 ; AstNode.extra mov qword [rsp + 48], 0 ; AstNode.extra
; mov [rsp + 56], [rsp + 56] ; AstNode.span
mov rdi, [rsp] ; Ast mov rdi, [rsp] ; Ast
lea rsi, [rsp + 32] ; &AstNode lea rsi, [rsp + 32] ; &AstNode
call vec_push call vec_push
@ -547,10 +574,13 @@ parse_statement:
push rbp push rbp
mov rbp, rsp mov rbp, rsp
; Ast [24..32] ; AstNode [8..40]
; AstNode [0..24] ; Ast [0..8]
sub rsp, 32 sub rsp, 40
mov [rsp + 24], rdi ; Ast mov [rsp], rdi ; Ast
call tokeniser_get_cursor
mov [rsp + 32], rax ; AstNode.span
mov dil, TOKEN_RETURN mov dil, TOKEN_RETURN
call expect_token call expect_token
@ -563,21 +593,21 @@ parse_statement:
jmp .panic jmp .panic
.let: .let:
mov rdi, [rsp + 24] ; Ast mov rdi, [rsp] ; Ast
call ast_parse_let call ast_parse_let
mov [rsp], rax ; statement mov [rsp], rax ; statement
jmp .semi jmp .semi
.return: .return:
mov rdi, [rsp + 24] ; Ast mov rdi, [rsp] ; Ast
call parse_expr call parse_expr
mov byte [rsp], AST_RETURN_STATEMENT ; kind mov byte [rsp + 8], AST_RETURN_STATEMENT ; AstNode.kind
mov [rsp + 8], rax ; data mov [rsp + 16], rax ; AstNode.data
mov qword [rsp + 16], 0 ; extra mov qword [rsp + 24], 0 ; AstNode.extra
mov rdi, [rsp + 24] ; Ast mov rdi, [rsp] ; Ast
lea rsi, [rsp] ; &AstNode lea rsi, [rsp] ; &AstNode
call vec_push call vec_push
mov rdi, [rsp + 24] ; Ast mov rdi, [rsp] ; Ast
mov rax, [rdi + 8] ; Ast.nodes.len() mov rax, [rdi + 8] ; Ast.nodes.len()
dec rax dec rax
mov [rsp], rax mov [rsp], rax
@ -586,7 +616,7 @@ parse_statement:
mov dil, TOKEN_SEMI mov dil, TOKEN_SEMI
call unwrap_token call unwrap_token
mov rax, [rsp] ; expression mov rax, [rsp] ; expression
add rsp, 32 add rsp, 40
pop rbp pop rbp
ret ret
.panic: .panic:
@ -598,12 +628,16 @@ parse_block:
push rbp push rbp
mov rbp, rsp mov rbp, rsp
; span: u64 [64..72]
; Ast: *mut Ast [56..64] ; Ast: *mut Ast [56..64]
; statements: Vec<Statement> [8..56] ; statements: Vec<Statement> [8..56]
; statement: u64 [0..8] ; statement: u64 [0..8]
sub rsp, 64 sub rsp, 72
mov [rsp + 56], rdi ; Ast mov [rsp + 56], rdi ; Ast
call tokeniser_get_cursor
mov [rsp + 64], rax ; span
mov dil, TOKEN_LBRACE mov dil, TOKEN_LBRACE
call unwrap_token call unwrap_token
@ -632,13 +666,15 @@ parse_block:
jmp .loop jmp .loop
.done: .done:
mov rdi, [rsp + 56] ; Ast mov rdi, [rsp + 56] ; Ast
mov qword [rsp], AST_BLOCK ; kind mov qword [rsp], AST_BLOCK ; AstNode.kind
mov rsi, [rsp + 64] ; span
mov [rsp + 24], rsi ; AstNode.span
lea rsi, [rsp] ; &AstNode lea rsi, [rsp] ; &AstNode
call vec_push call vec_push
mov rdi, [rsp + 56] ; Ast mov rdi, [rsp + 56] ; Ast
mov rax, [rdi + 8] ; Ast.nodes.len() mov rax, [rdi + 8] ; Ast.nodes.len()
dec rax dec rax
add rsp, 64 add rsp, 72
pop rbp pop rbp
ret ret
@ -707,11 +743,14 @@ parse_prefix_expr:
push rbp push rbp
mov rbp, rsp mov rbp, rsp
; AstNode [8..40]
; ast [0..8] ; ast [0..8]
sub rsp, 32 sub rsp, 40
mov [rsp], rdi ; Ast mov [rsp], rdi ; Ast
call tokeniser_get_cursor
mov [rsp + 32], rax ; AstNode.span
mov dil, TOKEN_STAR mov dil, TOKEN_STAR
call expect_token call expect_token
test rax, rax test rax, rax
@ -763,7 +802,7 @@ parse_prefix_expr:
jmp .done jmp .done
.done: .done:
add rsp, 32 add rsp, 40
pop rbp pop rbp
ret ret
@ -773,13 +812,17 @@ parse_assignment_expr:
push rbp push rbp
mov rbp, rsp mov rbp, rsp
; span: u64 [32..40]
; source [24..32] ; source [24..32]
; dest [16..24] ; dest [16..24]
; dest_placeness [8..9] ; dest_placeness [8..9]
; ast [0..8] ; ast [0..8]
sub rsp, 32 sub rsp, 40
mov [rsp], rdi ; Ast mov [rsp], rdi ; Ast
call tokeniser_get_cursor
mov [rsp + 32], rax ; span
mov rdi, [rsp] ; Ast mov rdi, [rsp] ; Ast
call parse_binary_expr call parse_binary_expr
mov [rsp + 16], rax ; dest mov [rsp + 16], rax ; dest
@ -796,7 +839,7 @@ parse_assignment_expr:
mov rsi, [rsp + 16] ; dest mov rsi, [rsp + 16] ; dest
movzx rdx, byte [rsp + 8] ; placeness movzx rdx, byte [rsp + 8] ; placeness
call ast_value_to_place call ast_value_to_place
mov [rsp + 16], rax ; source mov [rsp + 16], rax ; dest
mov rdi, [rsp] ; Ast mov rdi, [rsp] ; Ast
call parse_expr call parse_expr
@ -820,7 +863,7 @@ parse_assignment_expr:
.done: .done:
mov rax, [rsp + 16] ; dest mov rax, [rsp + 16] ; dest
movzx rdx, byte [rsp + 8] ; placeness movzx rdx, byte [rsp + 8] ; placeness
add rsp, 32 add rsp, 40
pop rbp pop rbp
ret ret
@ -843,12 +886,16 @@ ast_parse_let:
; } ; }
; end-structs ; end-structs
; AstNode [8..32] ; span: u64 [40..48]
; AstNode [8..40]
; AstVarDecl [8..40] ; AstVarDecl [8..40]
; Ast [0..8] ; Ast [0..8]
sub rsp, 48 sub rsp, 48
mov [rsp], rdi ; Ast mov [rsp], rdi ; Ast
call tokeniser_get_cursor
mov [rsp + 40], rax ; span
; skipped in parse_statement ; skipped in parse_statement
; mov dil, TOKEN_LET ; mov dil, TOKEN_LET
; call unwrap_token ; call unwrap_token
@ -877,6 +924,8 @@ ast_parse_let:
mov qword [rsp + 8], AST_VAR_DECL ; AstNode.kind mov qword [rsp + 8], AST_VAR_DECL ; AstNode.kind
mov [rsp + 16], rdi ; AstNode.data mov [rsp + 16], rdi ; AstNode.data
mov qword [rsp + 24], 0 ; AstNode.extra mov qword [rsp + 24], 0 ; AstNode.extra
mov rdi, [rsp + 40] ; span
mov [rsp + 32], rdi ; AstNode.span
mov rdi, [rsp] ; Ast mov rdi, [rsp] ; Ast
lea rsi, [rsp + 8] ; &AstNode lea rsi, [rsp + 8] ; &AstNode
@ -924,17 +973,26 @@ ast_value_to_place:
mov rax, rsi mov rax, rsi
je .done je .done
; create new AST node ; create new AST node
sub rsp, 32 sub rsp, 40
mov [rsp], rdi mov [rsp], rdi ; Ast
mov byte [rsp + 8], AST_VALUE_TO_PLACE ; kind mov [rsp + 8], rsi ; node_index
mov [rsp + 16], rsi ; data
mov qword [rsp + 24], 0 ; extra ; load original node's span
call vec_get
mov rdi, [rax + 24] ; AstNode.span
mov [rsp + 32], rdi ; AstNode.span
mov rsi, [rsp + 8] ; node_index
mov byte [rsp + 8], AST_VALUE_TO_PLACE ; AstNode.kind
mov [rsp + 16], rsi ; AstNode.data
mov qword [rsp + 24], 0 ; AstNode.extra
lea rsi, [rsp + 8] ; &AstNode lea rsi, [rsp + 8] ; &AstNode
mov rdi, [rsp] ; Ast
call vec_push call vec_push
mov rdi, [rsp] ; Ast mov rdi, [rsp] ; Ast
mov rax, [rdi + 8] ; Ast.nodes.len() mov rax, [rdi + 8] ; Ast.nodes.len()
dec rax dec rax
add rsp, 32 add rsp, 40
.done: .done:
pop rbp pop rbp
ret ret
@ -951,17 +1009,26 @@ ast_place_to_value:
mov rax, rsi mov rax, rsi
je .done je .done
; create new AST node ; create new AST node
sub rsp, 32 sub rsp, 40
mov [rsp], rdi mov [rsp], rdi ; Ast
mov byte [rsp + 8], AST_PLACE_TO_VALUE ; kind mov [rsp + 8], rsi ; node_index
mov [rsp + 16], rsi ; data
mov qword [rsp + 24], 0 ; extra ; load original node's span
call vec_get
mov rdi, [rax + 24] ; AstNode.span
mov [rsp + 32], rdi ; AstNode.span
mov rsi, [rsp + 8] ; node_index
mov byte [rsp + 8], AST_PLACE_TO_VALUE ; AstNode.kind
mov [rsp + 16], rsi ; AstNode.data
mov qword [rsp + 24], 0 ; AstNode.extra
lea rsi, [rsp + 8] ; &AstNode lea rsi, [rsp + 8] ; &AstNode
mov rdi, [rsp] ; Ast
call vec_push call vec_push
mov rdi, [rsp] ; Ast mov rdi, [rsp] ; Ast
mov rax, [rdi + 8] ; Ast.nodes.len() mov rax, [rdi + 8] ; Ast.nodes.len()
dec rax dec rax
add rsp, 32 add rsp, 40
.done: .done:
pop rbp pop rbp
ret ret
@ -1164,19 +1231,17 @@ ast_build_symtable_for_each:
mov byte [rsp + 32], SYM_KEY_SCOPE ; SymKey.kind mov byte [rsp + 32], SYM_KEY_SCOPE ; SymKey.kind
mov rdx, [rsp + 16] ; index mov rdx, [rsp + 16] ; index
mov qword [rsp + 40], rdx ; SymKey.scope_index mov qword [rsp + 40], rdx ; SymKey.scope_index
mov qword [rsp + 48], 0 ; SymKey.span mov rdx, [rax + 24] ; AstNode.span
mov qword [rsp + 48], rdx ; SymKey.span
mov qword [rsp + 56], 1 ; SymKey.ident mov qword [rsp + 56], 1 ; SymKey.ident
mov qword [rsp + 64], 0 ; SymKey.ident_len mov qword [rsp + 64], 0 ; SymKey.ident_len
; mov rbx, [rax + 16] ; AstNode.data
; mov rdx, [rbx + 8] ; Func.name
; mov rcx, [rbx + 16] ; Func.name_len
mov rbx, [rax + 8] ; AstNode.data mov rbx, [rax + 8] ; AstNode.data
mov rdx, [rbx + 0] ; Func.name mov rdx, [rbx + 0] ; Func.name
mov rcx, [rbx + 8] ; Func.name_len mov rcx, [rbx + 8] ; Func.name_len
mov qword [rsp + 72], rdx ; SymEntry.index mov [rsp + 72], rdx ; SymEntry.index
mov qword [rsp + 80], rcx ; SymEntry.extra mov [rsp + 80], rcx ; SymEntry.extra
mov rdi, [rsp + 8] ; *Ctx mov rdi, [rsp + 8] ; *Ctx
lea rdi, [rdi + 40] ; Ctx.symtable lea rdi, [rdi + 40] ; Ctx.symtable
@ -1196,8 +1261,8 @@ ast_build_symtable_for_each:
; insert variable entry ; insert variable entry
mov byte [rsp + 32], SYM_KEY_VAR ; SymKey.kind mov byte [rsp + 32], SYM_KEY_VAR ; SymKey.kind
; TODO: set span correctly mov rdx, [rax + 24] ; AstNode.span
mov qword [rsp + 48], 0 ; SymKey.span mov qword [rsp + 48], rdx ; SymKey.span
mov rbx, [rsp + 24] ; AstNode.data mov rbx, [rsp + 24] ; AstNode.data
mov rbx, [rbx + 8] ; AstNode.data mov rbx, [rbx + 8] ; AstNode.data
@ -1232,8 +1297,8 @@ ast_build_symtable_for_each:
; insert variable entry ; insert variable entry
mov byte [rsp + 32], SYM_KEY_ARG ; SymKey.kind mov byte [rsp + 32], SYM_KEY_ARG ; SymKey.kind
; TODO: set span correctly mov rdx, [rax + 24] ; AstNode.span
mov qword [rsp + 48], 0 ; SymKey.span mov qword [rsp + 48], rdx ; SymKey.span
mov rbx, [rsp + 24] ; *AstNode mov rbx, [rsp + 24] ; *AstNode
mov rbx, [rbx + 8] ; AstNode.data mov rbx, [rbx + 8] ; AstNode.data

View file

@ -48,7 +48,7 @@ fn main() {
}; };
} }
// print_ast(b"3 + 4", |ast| unsafe { parse_expr(ast) }); print_ast(b"3 + 4", |ast| unsafe { parse_expr(ast) });
// print_ast(b"fn main() -> void { return 1 + 2; }", |ast| unsafe { // print_ast(b"fn main() -> void { return 1 + 2; }", |ast| unsafe {
// parse_func(ast) // parse_func(ast)
// }); // });

View file

@ -106,6 +106,7 @@ pub struct AstNode {
pub kind: u8, pub kind: u8,
pub data: *const (), pub data: *const (),
pub extra: usize, pub extra: usize,
pub span: u64,
} }
#[repr(C)] #[repr(C)]