From 525b78cdf4fb181109aa11ba28840b5296568115 Mon Sep 17 00:00:00 2001 From: janis Date: Thu, 30 Oct 2025 14:35:49 +0100 Subject: [PATCH] pointer type --- lang/src/ast.asm | 155 ++++++++++++++++++++++-------------- lang/tests/ast.rs | 65 ++++++++------- lang/tests/shared/defs.rs | 12 +-- lang/tests/shared/shared.rs | 35 +++++++- 4 files changed, 173 insertions(+), 94 deletions(-) diff --git a/lang/src/ast.asm b/lang/src/ast.asm index bfca7e3..46bacba 100644 --- a/lang/src/ast.asm +++ b/lang/src/ast.asm @@ -19,11 +19,12 @@ section .rdata AST_VAR_REF equ 13 ; :u8 AST_ARG equ 14 ; :u8 - TYPE_VOID equ 1 - TYPE_BOOL equ 2 - TYPE_I32 equ 3 - TYPE_U32 equ 4 - TYPE_STR equ 5 + TYPE_VOID equ 1 ; :u8 + TYPE_BOOL equ 2 ; :u8 + TYPE_I32 equ 3 ; :u8 + TYPE_U32 equ 4 ; :u8 + TYPE_STR equ 5 ; :u8 + TYPE_POINTER equ 6 ; :u8 ;; end-consts PRECEDENCE_ADD equ 90 @@ -86,6 +87,7 @@ global parse_block ;; ;; struct Type { ;; kind: u8, +;; data: u64, ;; } ;; end-structs @@ -107,13 +109,16 @@ parse_func: ; } ; end-structs - sub rsp, 48 + sub rsp, 56 ; name: *const u8 [0..8] ; name_len: usize [8..16] - ; args_ptr: *const Arg [16..24] + ; args_ptr: *const u64 [16..24] ; args_len: usize [24..32] - ; return_type: Type [32..40] - ; body: u64 [40..48] + ; return_type: Type [32..48] + ; body: u64 [48..56] + ; ast: *mut Ast [56..64] + mov qword [rsp + 16], 8 ; <*u64>::dangling() + mov qword [rsp + 24], 0 ; args_len mov dil, TOKEN_FN call unwrap_token @@ -131,40 +136,42 @@ parse_func: .after_args: mov dil, TOKEN_ARROW call unwrap_token - mov rdi, [rsp + 48] ; Ast + mov rdi, [rsp + 56] ; Ast call parse_type - mov [rsp + 32], rax ; return type + mov [rsp + 32], rax ; return_type.kind + mov [rsp + 40], rdx ; return_type.data mov dil, TOKEN_LBRACE call peek_expect_token test rax, rax je panic - mov rdi, [rsp + 48] ; Ast + mov rdi, [rsp + 56] ; Ast call parse_block - mov [rsp + 40], rax ; body + mov [rsp + 48], rax ; body .epilogue: - mov rdi, 48 - mov rsi, 8 + mov rdi, 56 ; size_of:: + mov rsi, 8 ; align_of:: call bump_alloc - mov rsi, rsp - mov rdi, rax - mov rdx, 48 + lea rsi, [rsp] ; &AstFunction + mov rdi, rax ; destination ptr + mov rdx, 56 ; size_of:: call memcpy + mov byte [rsp], AST_FUNCTION ; kind mov [rsp + 8], rdi ; data mov qword [rsp + 16], 0 ; extra - mov rdi, [rsp + 48] ; Ast + mov rdi, [rsp + 56] ; Ast lea rsi, [rsp] ; &AstNode call vec_push - mov rax, [rsp + 48] ; Ast + mov rdi, [rsp + 56] ; Ast mov rax, [rdi + 8] ; return Ast.nodes.len() dec rax - add rsp, 48 + add rsp, 56 pop rdi pop rbp ret .args: - mov rdi, [rsp + 48] ; Ast + mov rdi, [rsp + 56] ; Ast call parse_args mov [rsp + 16], rax ; args_ptr mov [rsp + 24], rdx ; args_len @@ -177,14 +184,18 @@ parse_func: parse_args: push rbp mov rbp, rsp - ; vec: [32..72] - ; argument: [8..32] + ; vec: [40..80] + ; argument: AstArgument { [8..40] + ; name: *const u8 [8..16] + ; name_len: usize [16..24] + ; arg_type: Type [24..40] + ; } ; ast [0..8] - sub rsp, 72 + sub rsp, 80 mov [rsp], rdi ; Ast - lea rdi, [rsp + 32] ; vec - mov rsi, 8 ; size of AstArgument + lea rdi, [rsp + 40] ; vec + mov rsi, 8 ; size of u64 (Index) mov rdx, 0 ; drop = None mov rcx, 16 ; capacity call vec_init_with @@ -203,18 +214,19 @@ parse_args: mov rdi, [rsp] ; Ast call parse_type mov [rsp + 24], rax ; AstArgument.arg_type + mov [rsp + 32], rdx ; AstArgument.arg_type.data - mov rdi, 24 - mov rsi, 8 + mov rdi, 32 ; size_of:: + mov rsi, 8 ; align_of:: call bump_alloc mov rdi, rax lea rsi, [rsp + 8] ; &AstArgument - mov rdx, 24 + mov rdx, 32 ; size_of:: call memcpy mov qword [rsp + 8], AST_ARG ; AstNode.kind - mov [rsp + 16], rdi ; AstNode.data - mov qword [rsp + 24], 0 ; AstNode.extra + mov [rsp + 16], rdi ; AstNode.data + mov qword [rsp + 24], 0 ; AstNode.extra mov rdi, [rsp] ; Ast lea rsi, [rsp + 8] ; &AstNode @@ -223,7 +235,7 @@ parse_args: mov rax, [rdi + 8] ; Ast.nodes.len() dec rax - lea rdi, [rsp + 32] ; vec + lea rdi, [rsp + 40] ; vec mov [rsp + 8], rax ; argument lea rsi, [rsp + 8] ; &argument call vec_push @@ -237,9 +249,9 @@ parse_args: mov dil, TOKEN_RPARENS call unwrap_token .done_args: - mov rax, [rsp + 32] ; args_ptr - mov rdx, [rsp + 40] ; args_len - add rsp, 72 + mov rax, [rsp + 40] ; args_ptr + mov rdx, [rsp + 48] ; args_len + add rsp, 80 pop rbp ret @@ -630,12 +642,14 @@ parse_block: parse_type: push rbp mov rbp, rsp - push rdi - sub rsp, 24 - mov rdi, rsp + sub rsp, 32 + mov [rsp], rdi ; Ast + + lea rdi, [rsp + 8] call find_lexeme ; TODO: use peek here to allow failing gracefully - mov rax, [rsp] ; token kind + xor rdx, rdx + mov rax, [rsp + 8] ; token kind cmp al, TOKEN_I32 je .i32_type cmp al, TOKEN_U32 @@ -644,6 +658,8 @@ parse_type: je .void_type cmp al, TOKEN_BOOL je .bool_type + cmp al, TOKEN_STAR + je .pointer_type jmp .panic .i32_type: mov rax, TYPE_I32 @@ -657,9 +673,24 @@ parse_type: .bool_type: mov rax, TYPE_BOOL jmp .epilogue +.pointer_type: + mov rdi, [rsp] ; Ast + call parse_type + mov [rsp + 8], rax ; Type.kind + mov [rsp + 16], rdx ; Type.data + + mov rdi, 16 ; size_of:: + mov rsi, 8 ; align_of:: + call bump_alloc + mov rdi, rax + lea rsi, [rsp + 8] + mov rdx, 16 ; size_of:: + call memcpy + mov rax, TYPE_POINTER + mov rdx, rdi + jmp .epilogue .epilogue: - add rsp, 24 - pop rdi + add rsp, 32 pop rbp ret .panic: @@ -807,7 +838,10 @@ ast_parse_let: ; } ; end-structs - sub rsp, 64 + ; AstNode [8..32] + ; AstVarDecl [8..40] + ; Ast [0..8] + sub rsp, 48 mov [rsp], rdi ; Ast ; skipped in parse_statement @@ -816,38 +850,39 @@ ast_parse_let: mov dil, TOKEN_IDENT call unwrap_token - mov [rsp + 8], rax ; variable name - mov [rsp + 16], rdx ; variable name length + mov [rsp + 8], rax ; AstVarDecl.name + mov [rsp + 16], rdx ; AstVarDecl.name_len mov dil, TOKEN_COLON call unwrap_token mov rdi, [rsp] ; Ast call parse_type - mov [rsp + 24], rax ; variable type + mov [rsp + 24], rax ; AstVarDecl.var_type.kind + mov [rsp + 32], rdx ; AstVarDecl.var_type.data - mov rdi, 24 - mov rsi, 8 + mov rdi, 32 ; size_of:: + mov rsi, 8 ; align_of:: call bump_alloc - mov rdi, rax ; AstVariable ptr - lea rsi, [rsp + 8] ; &AstVariable - mov rdx, 24 + mov rdi, rax ; AstVarDecl ptr + lea rsi, [rsp + 8] ; &AstVarDecl + mov rdx, 32 ; size_of:: call memcpy - mov qword [rsp + 32], AST_VAR_DECL ; AstNode.kind - mov [rsp + 40], rdi ; AstNode.data - mov qword [rsp + 48], 0 ; AstNode.extra + mov qword [rsp + 8], AST_VAR_DECL ; AstNode.kind + mov [rsp + 16], rdi ; AstNode.data + mov qword [rsp + 24], 0 ; AstNode.extra mov rdi, [rsp] ; Ast - lea rsi, [rsp + 32] ; &AstNode + lea rsi, [rsp + 8] ; &AstNode call vec_push ; variable is already a place mov rdi, [rsp] ; Ast mov rax, [rdi + 8] ; Ast.nodes.len() dec rax - mov qword [rsp + 32], AST_ASSIGNMENT ; AstNode.kind - mov [rsp + 40], rax ; AstNode.data (variable index) + mov qword [rsp + 8], AST_ASSIGNMENT ; AstNode.kind + mov [rsp + 16], rax ; AstNode.data (variable index) mov dil, TOKEN_EQUALS call unwrap_token @@ -859,15 +894,15 @@ ast_parse_let: ; mov rdx, rdx ; is_placeness call ast_place_to_value - mov [rsp + 48], rax ; AstNode.extra (expr index) + mov [rsp + 24], rax ; AstNode.extra (expr index) mov rdi, [rsp] ; Ast - lea rsi, [rsp + 32] ; &AstNode + lea rsi, [rsp + 8] ; &AstNode call vec_push mov rdi, [rsp] ; Ast mov rax, [rdi + 8] ; Ast.nodes.len() dec rax - add rsp, 64 + add rsp, 48 xor rdx, rdx ; placeness = false pop rbp ret diff --git a/lang/tests/ast.rs b/lang/tests/ast.rs index e920bed..67a9d26 100644 --- a/lang/tests/ast.rs +++ b/lang/tests/ast.rs @@ -17,44 +17,47 @@ fn main() { let src = b"3 + 4"; - fn print_ast(src: &[u8], parser: impl FnOnce(&mut Ast)) { + fn print_ast(src: &[u8], parser: impl FnOnce(&mut Ast) -> u64) { unsafe { tokeniser_init_buf(src.as_ptr(), src.len()); let mut ast = Ast { nodes: util::vec::Vec::new(), }; let expr_id = parser(&mut ast); + eprintln!("Parsed expression ID: {}", expr_id); println!("{:#}", &ast); }; } - // print_ast(b"3 + 4", |ast| unsafe { - // parse_expr(ast); - // }); - // print_ast(b"fn main() -> void { return 1 + 2; }", |ast| unsafe { - // parse_func(ast); - // }); - // print_ast(b"fn main() -> void { return (1 + (2)); }", |ast| unsafe { - // parse_func(ast); - // }); - // print_ast( - // b"fn main() -> void { return (1 + (2 * 3)) / 4; }", - // |ast| unsafe { - // parse_func(ast); - // }, - // ); - // print_ast(b"fn main() -> void { return 1 + 2 * 3; }", |ast| unsafe { - // parse_func(ast); - // }); + print_ast(b"3 + 4", |ast| unsafe { parse_expr(ast) }); + print_ast(b"fn main() -> void { return 1 + 2; }", |ast| unsafe { + parse_func(ast) + }); + print_ast(b"fn main() -> void { return (1 + (2)); }", |ast| unsafe { + parse_func(ast) + }); + print_ast( + b"fn main() -> void { return (1 + (2 * 3)) / 4; }", + |ast| unsafe { parse_func(ast) }, + ); + print_ast(b"fn main() -> void { return 1 + 2 * 3; }", |ast| unsafe { + parse_func(ast) + }); - // print_ast(b"fn main() -> void { let x: u32 = 4; }", |ast| unsafe { - // parse_func(ast); - // }); + print_ast(b"fn main() -> void { let x: u32 = 4; }", |ast| unsafe { + parse_func(ast) + }); print_ast( b"fn main(a: u32) -> void { let x: u32 = a + 4; }", - |ast| unsafe { - parse_func(ast); - }, + |ast| unsafe { parse_func(ast) }, + ); + print_ast( + b"fn main(a: u32) -> void { +let x: u32 = a + 4; +let y: *u32 = &x; +return *y; +}", + |ast| unsafe { parse_func(ast) }, ); } @@ -69,11 +72,17 @@ impl std::fmt::Display for AstNode { AST_NUMBER => { write!(f, "Number({})", self.data as usize) } + AST_DEREF => { + write!(f, "Deref(expr: {})", self.data as usize) + } + AST_ADDRESS_OF => { + write!(f, "AddressOf(expr: {})", self.data as usize) + } AST_ARG => { let arg = unsafe { self.data.cast::().read() }; write!( f, - "Arg(name: {:?}, arg_type: {:?})", + "Arg(name: {:?}, arg_type: {})", unsafe { std::str::from_utf8(std::slice::from_raw_parts(arg.name, arg.name_len)) }, @@ -97,7 +106,7 @@ impl std::fmt::Display for AstNode { let var_decl = unsafe { self.data.cast::().read() }; write!( f, - "VarDecl(name: {:?}, var_type: {:?})", + "VarDecl(name: {:?}, var_type: {})", unsafe { std::str::from_utf8(std::slice::from_raw_parts( var_decl.name, @@ -134,7 +143,7 @@ impl std::fmt::Display for AstNode { let func = unsafe { self.data.cast::().read() }; write!( f, - "Function(name: {:?}, args: {:?}, return_type: {:?}, body: {})", + "Function(name: {:?}, args: {:?}, return_type: {}, body: {})", unsafe { std::str::from_utf8(std::slice::from_raw_parts(func.name, func.name_len)) }, diff --git a/lang/tests/shared/defs.rs b/lang/tests/shared/defs.rs index d775478..ed96a8c 100644 --- a/lang/tests/shared/defs.rs +++ b/lang/tests/shared/defs.rs @@ -29,11 +29,12 @@ pub const AST_ADDRESS_OF: u8 = 11; pub const AST_VAR_DECL: u8 = 12; pub const AST_VAR_REF: u8 = 13; pub const AST_ARG: u8 = 14; -pub const TYPE_VOID: u32 = 1; -pub const TYPE_BOOL: u32 = 2; -pub const TYPE_I32: u32 = 3; -pub const TYPE_U32: u32 = 4; -pub const TYPE_STR: u32 = 5; +pub const TYPE_VOID: u8 = 1; +pub const TYPE_BOOL: u8 = 2; +pub const TYPE_I32: u8 = 3; +pub const TYPE_U32: u8 = 4; +pub const TYPE_STR: u8 = 5; +pub const TYPE_POINTER: u8 = 6; pub const TOKEN_EOF: u8 = 0; pub const TOKEN_LET: u8 = 1; pub const TOKEN_IF: u8 = 2; @@ -108,6 +109,7 @@ pub struct AstArgument { #[derive(Debug)] pub struct Type { pub kind: u8, + pub data: u64, } #[repr(C)] diff --git a/lang/tests/shared/shared.rs b/lang/tests/shared/shared.rs index 4b0d1ad..2b4b7d3 100644 --- a/lang/tests/shared/shared.rs +++ b/lang/tests/shared/shared.rs @@ -253,7 +253,11 @@ pub mod vec { cmp_trampoline::, &raw mut cmp as *mut F as *mut (), ); - if vacant { Err(index) } else { Ok(index) } + if vacant { + Err(index) + } else { + Ok(index) + } } } @@ -349,3 +353,32 @@ impl<'a, T: core::fmt::Display> core::fmt::Display for DisplaySlice<'a, T> { write!(f, "]") } } + +impl core::fmt::Display for defs::Type { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + match self.kind { + defs::TYPE_VOID => { + write!(f, "void") + } + defs::TYPE_BOOL => { + write!(f, "bool") + } + defs::TYPE_I32 => { + write!(f, "i32") + } + defs::TYPE_U32 => { + write!(f, "u32") + } + defs::TYPE_STR => { + write!(f, "str") + } + defs::TYPE_POINTER => { + let pointee = unsafe { (self.data as *const defs::Type).read() }; + write!(f, "*{pointee}",) + } + _ => { + write!(f, "UnknownType") + } + } + } +}