pointer type

This commit is contained in:
janis 2025-10-30 14:35:49 +01:00
parent e54313b6a2
commit 525b78cdf4
Signed by: janis
SSH key fingerprint: SHA256:bB1qbbqmDXZNT0KKD5c2Dfjg53JGhj7B3CFcLIzSqq8
4 changed files with 173 additions and 94 deletions

View file

@ -19,11 +19,12 @@ section .rdata
AST_VAR_REF equ 13 ; :u8
AST_ARG equ 14 ; :u8
TYPE_VOID equ 1
TYPE_BOOL equ 2
TYPE_I32 equ 3
TYPE_U32 equ 4
TYPE_STR equ 5
TYPE_VOID equ 1 ; :u8
TYPE_BOOL equ 2 ; :u8
TYPE_I32 equ 3 ; :u8
TYPE_U32 equ 4 ; :u8
TYPE_STR equ 5 ; :u8
TYPE_POINTER equ 6 ; :u8
;; end-consts
PRECEDENCE_ADD equ 90
@ -86,6 +87,7 @@ global parse_block
;;
;; struct Type {
;; kind: u8,
;; data: u64,
;; }
;; end-structs
@ -107,13 +109,16 @@ parse_func:
; }
; end-structs
sub rsp, 48
sub rsp, 56
; name: *const u8 [0..8]
; name_len: usize [8..16]
; args_ptr: *const Arg [16..24]
; args_ptr: *const u64 [16..24]
; args_len: usize [24..32]
; return_type: Type [32..40]
; body: u64 [40..48]
; return_type: Type [32..48]
; body: u64 [48..56]
; ast: *mut Ast [56..64]
mov qword [rsp + 16], 8 ; <*u64>::dangling()
mov qword [rsp + 24], 0 ; args_len
mov dil, TOKEN_FN
call unwrap_token
@ -131,40 +136,42 @@ parse_func:
.after_args:
mov dil, TOKEN_ARROW
call unwrap_token
mov rdi, [rsp + 48] ; Ast
mov rdi, [rsp + 56] ; Ast
call parse_type
mov [rsp + 32], rax ; return type
mov [rsp + 32], rax ; return_type.kind
mov [rsp + 40], rdx ; return_type.data
mov dil, TOKEN_LBRACE
call peek_expect_token
test rax, rax
je panic
mov rdi, [rsp + 48] ; Ast
mov rdi, [rsp + 56] ; Ast
call parse_block
mov [rsp + 40], rax ; body
mov [rsp + 48], rax ; body
.epilogue:
mov rdi, 48
mov rsi, 8
mov rdi, 56 ; size_of::<AstFunction>
mov rsi, 8 ; align_of::<AstFunction>
call bump_alloc
mov rsi, rsp
mov rdi, rax
mov rdx, 48
lea rsi, [rsp] ; &AstFunction
mov rdi, rax ; destination ptr
mov rdx, 56 ; size_of::<AstFunction>
call memcpy
mov byte [rsp], AST_FUNCTION ; kind
mov [rsp + 8], rdi ; data
mov qword [rsp + 16], 0 ; extra
mov rdi, [rsp + 48] ; Ast
mov rdi, [rsp + 56] ; Ast
lea rsi, [rsp] ; &AstNode
call vec_push
mov rax, [rsp + 48] ; Ast
mov rdi, [rsp + 56] ; Ast
mov rax, [rdi + 8] ; return Ast.nodes.len()
dec rax
add rsp, 48
add rsp, 56
pop rdi
pop rbp
ret
.args:
mov rdi, [rsp + 48] ; Ast
mov rdi, [rsp + 56] ; Ast
call parse_args
mov [rsp + 16], rax ; args_ptr
mov [rsp + 24], rdx ; args_len
@ -177,14 +184,18 @@ parse_func:
parse_args:
push rbp
mov rbp, rsp
; vec: [32..72]
; argument: [8..32]
; vec: [40..80]
; argument: AstArgument { [8..40]
; name: *const u8 [8..16]
; name_len: usize [16..24]
; arg_type: Type [24..40]
; }
; ast [0..8]
sub rsp, 72
sub rsp, 80
mov [rsp], rdi ; Ast
lea rdi, [rsp + 32] ; vec
mov rsi, 8 ; size of AstArgument
lea rdi, [rsp + 40] ; vec
mov rsi, 8 ; size of u64 (Index)
mov rdx, 0 ; drop = None
mov rcx, 16 ; capacity
call vec_init_with
@ -203,18 +214,19 @@ parse_args:
mov rdi, [rsp] ; Ast
call parse_type
mov [rsp + 24], rax ; AstArgument.arg_type
mov [rsp + 32], rdx ; AstArgument.arg_type.data
mov rdi, 24
mov rsi, 8
mov rdi, 32 ; size_of::<AstArgument>
mov rsi, 8 ; align_of::<AstArgument>
call bump_alloc
mov rdi, rax
lea rsi, [rsp + 8] ; &AstArgument
mov rdx, 24
mov rdx, 32 ; size_of::<AstArgument>
call memcpy
mov qword [rsp + 8], AST_ARG ; AstNode.kind
mov [rsp + 16], rdi ; AstNode.data
mov qword [rsp + 24], 0 ; AstNode.extra
mov [rsp + 16], rdi ; AstNode.data
mov qword [rsp + 24], 0 ; AstNode.extra
mov rdi, [rsp] ; Ast
lea rsi, [rsp + 8] ; &AstNode
@ -223,7 +235,7 @@ parse_args:
mov rax, [rdi + 8] ; Ast.nodes.len()
dec rax
lea rdi, [rsp + 32] ; vec
lea rdi, [rsp + 40] ; vec
mov [rsp + 8], rax ; argument
lea rsi, [rsp + 8] ; &argument
call vec_push
@ -237,9 +249,9 @@ parse_args:
mov dil, TOKEN_RPARENS
call unwrap_token
.done_args:
mov rax, [rsp + 32] ; args_ptr
mov rdx, [rsp + 40] ; args_len
add rsp, 72
mov rax, [rsp + 40] ; args_ptr
mov rdx, [rsp + 48] ; args_len
add rsp, 80
pop rbp
ret
@ -630,12 +642,14 @@ parse_block:
parse_type:
push rbp
mov rbp, rsp
push rdi
sub rsp, 24
mov rdi, rsp
sub rsp, 32
mov [rsp], rdi ; Ast
lea rdi, [rsp + 8]
call find_lexeme ; TODO: use peek here to allow failing gracefully
mov rax, [rsp] ; token kind
xor rdx, rdx
mov rax, [rsp + 8] ; token kind
cmp al, TOKEN_I32
je .i32_type
cmp al, TOKEN_U32
@ -644,6 +658,8 @@ parse_type:
je .void_type
cmp al, TOKEN_BOOL
je .bool_type
cmp al, TOKEN_STAR
je .pointer_type
jmp .panic
.i32_type:
mov rax, TYPE_I32
@ -657,9 +673,24 @@ parse_type:
.bool_type:
mov rax, TYPE_BOOL
jmp .epilogue
.pointer_type:
mov rdi, [rsp] ; Ast
call parse_type
mov [rsp + 8], rax ; Type.kind
mov [rsp + 16], rdx ; Type.data
mov rdi, 16 ; size_of::<Type>
mov rsi, 8 ; align_of::<Type>
call bump_alloc
mov rdi, rax
lea rsi, [rsp + 8]
mov rdx, 16 ; size_of::<Type>
call memcpy
mov rax, TYPE_POINTER
mov rdx, rdi
jmp .epilogue
.epilogue:
add rsp, 24
pop rdi
add rsp, 32
pop rbp
ret
.panic:
@ -807,7 +838,10 @@ ast_parse_let:
; }
; end-structs
sub rsp, 64
; AstNode [8..32]
; AstVarDecl [8..40]
; Ast [0..8]
sub rsp, 48
mov [rsp], rdi ; Ast
; skipped in parse_statement
@ -816,38 +850,39 @@ ast_parse_let:
mov dil, TOKEN_IDENT
call unwrap_token
mov [rsp + 8], rax ; variable name
mov [rsp + 16], rdx ; variable name length
mov [rsp + 8], rax ; AstVarDecl.name
mov [rsp + 16], rdx ; AstVarDecl.name_len
mov dil, TOKEN_COLON
call unwrap_token
mov rdi, [rsp] ; Ast
call parse_type
mov [rsp + 24], rax ; variable type
mov [rsp + 24], rax ; AstVarDecl.var_type.kind
mov [rsp + 32], rdx ; AstVarDecl.var_type.data
mov rdi, 24
mov rsi, 8
mov rdi, 32 ; size_of::<AstVarDecl>
mov rsi, 8 ; align_of::<AstVarDecl>
call bump_alloc
mov rdi, rax ; AstVariable ptr
lea rsi, [rsp + 8] ; &AstVariable
mov rdx, 24
mov rdi, rax ; AstVarDecl ptr
lea rsi, [rsp + 8] ; &AstVarDecl
mov rdx, 32 ; size_of::<AstVarDecl>
call memcpy
mov qword [rsp + 32], AST_VAR_DECL ; AstNode.kind
mov [rsp + 40], rdi ; AstNode.data
mov qword [rsp + 48], 0 ; AstNode.extra
mov qword [rsp + 8], AST_VAR_DECL ; AstNode.kind
mov [rsp + 16], rdi ; AstNode.data
mov qword [rsp + 24], 0 ; AstNode.extra
mov rdi, [rsp] ; Ast
lea rsi, [rsp + 32] ; &AstNode
lea rsi, [rsp + 8] ; &AstNode
call vec_push
; variable is already a place
mov rdi, [rsp] ; Ast
mov rax, [rdi + 8] ; Ast.nodes.len()
dec rax
mov qword [rsp + 32], AST_ASSIGNMENT ; AstNode.kind
mov [rsp + 40], rax ; AstNode.data (variable index)
mov qword [rsp + 8], AST_ASSIGNMENT ; AstNode.kind
mov [rsp + 16], rax ; AstNode.data (variable index)
mov dil, TOKEN_EQUALS
call unwrap_token
@ -859,15 +894,15 @@ ast_parse_let:
; mov rdx, rdx ; is_placeness
call ast_place_to_value
mov [rsp + 48], rax ; AstNode.extra (expr index)
mov [rsp + 24], rax ; AstNode.extra (expr index)
mov rdi, [rsp] ; Ast
lea rsi, [rsp + 32] ; &AstNode
lea rsi, [rsp + 8] ; &AstNode
call vec_push
mov rdi, [rsp] ; Ast
mov rax, [rdi + 8] ; Ast.nodes.len()
dec rax
add rsp, 64
add rsp, 48
xor rdx, rdx ; placeness = false
pop rbp
ret

View file

@ -17,44 +17,47 @@ fn main() {
let src = b"3 + 4";
fn print_ast(src: &[u8], parser: impl FnOnce(&mut Ast)) {
fn print_ast(src: &[u8], parser: impl FnOnce(&mut Ast) -> u64) {
unsafe {
tokeniser_init_buf(src.as_ptr(), src.len());
let mut ast = Ast {
nodes: util::vec::Vec::new(),
};
let expr_id = parser(&mut ast);
eprintln!("Parsed expression ID: {}", expr_id);
println!("{:#}", &ast);
};
}
// print_ast(b"3 + 4", |ast| unsafe {
// parse_expr(ast);
// });
// print_ast(b"fn main() -> void { return 1 + 2; }", |ast| unsafe {
// parse_func(ast);
// });
// print_ast(b"fn main() -> void { return (1 + (2)); }", |ast| unsafe {
// parse_func(ast);
// });
// print_ast(
// b"fn main() -> void { return (1 + (2 * 3)) / 4; }",
// |ast| unsafe {
// parse_func(ast);
// },
// );
// print_ast(b"fn main() -> void { return 1 + 2 * 3; }", |ast| unsafe {
// parse_func(ast);
// });
print_ast(b"3 + 4", |ast| unsafe { parse_expr(ast) });
print_ast(b"fn main() -> void { return 1 + 2; }", |ast| unsafe {
parse_func(ast)
});
print_ast(b"fn main() -> void { return (1 + (2)); }", |ast| unsafe {
parse_func(ast)
});
print_ast(
b"fn main() -> void { return (1 + (2 * 3)) / 4; }",
|ast| unsafe { parse_func(ast) },
);
print_ast(b"fn main() -> void { return 1 + 2 * 3; }", |ast| unsafe {
parse_func(ast)
});
// print_ast(b"fn main() -> void { let x: u32 = 4; }", |ast| unsafe {
// parse_func(ast);
// });
print_ast(b"fn main() -> void { let x: u32 = 4; }", |ast| unsafe {
parse_func(ast)
});
print_ast(
b"fn main(a: u32) -> void { let x: u32 = a + 4; }",
|ast| unsafe {
parse_func(ast);
},
|ast| unsafe { parse_func(ast) },
);
print_ast(
b"fn main(a: u32) -> void {
let x: u32 = a + 4;
let y: *u32 = &x;
return *y;
}",
|ast| unsafe { parse_func(ast) },
);
}
@ -69,11 +72,17 @@ impl std::fmt::Display for AstNode {
AST_NUMBER => {
write!(f, "Number({})", self.data as usize)
}
AST_DEREF => {
write!(f, "Deref(expr: {})", self.data as usize)
}
AST_ADDRESS_OF => {
write!(f, "AddressOf(expr: {})", self.data as usize)
}
AST_ARG => {
let arg = unsafe { self.data.cast::<util::defs::AstArgument>().read() };
write!(
f,
"Arg(name: {:?}, arg_type: {:?})",
"Arg(name: {:?}, arg_type: {})",
unsafe {
std::str::from_utf8(std::slice::from_raw_parts(arg.name, arg.name_len))
},
@ -97,7 +106,7 @@ impl std::fmt::Display for AstNode {
let var_decl = unsafe { self.data.cast::<util::defs::AstVarDecl>().read() };
write!(
f,
"VarDecl(name: {:?}, var_type: {:?})",
"VarDecl(name: {:?}, var_type: {})",
unsafe {
std::str::from_utf8(std::slice::from_raw_parts(
var_decl.name,
@ -134,7 +143,7 @@ impl std::fmt::Display for AstNode {
let func = unsafe { self.data.cast::<util::defs::AstFunction>().read() };
write!(
f,
"Function(name: {:?}, args: {:?}, return_type: {:?}, body: {})",
"Function(name: {:?}, args: {:?}, return_type: {}, body: {})",
unsafe {
std::str::from_utf8(std::slice::from_raw_parts(func.name, func.name_len))
},

View file

@ -29,11 +29,12 @@ pub const AST_ADDRESS_OF: u8 = 11;
pub const AST_VAR_DECL: u8 = 12;
pub const AST_VAR_REF: u8 = 13;
pub const AST_ARG: u8 = 14;
pub const TYPE_VOID: u32 = 1;
pub const TYPE_BOOL: u32 = 2;
pub const TYPE_I32: u32 = 3;
pub const TYPE_U32: u32 = 4;
pub const TYPE_STR: u32 = 5;
pub const TYPE_VOID: u8 = 1;
pub const TYPE_BOOL: u8 = 2;
pub const TYPE_I32: u8 = 3;
pub const TYPE_U32: u8 = 4;
pub const TYPE_STR: u8 = 5;
pub const TYPE_POINTER: u8 = 6;
pub const TOKEN_EOF: u8 = 0;
pub const TOKEN_LET: u8 = 1;
pub const TOKEN_IF: u8 = 2;
@ -108,6 +109,7 @@ pub struct AstArgument {
#[derive(Debug)]
pub struct Type {
pub kind: u8,
pub data: u64,
}
#[repr(C)]

View file

@ -253,7 +253,11 @@ pub mod vec {
cmp_trampoline::<T, F>,
&raw mut cmp as *mut F as *mut (),
);
if vacant { Err(index) } else { Ok(index) }
if vacant {
Err(index)
} else {
Ok(index)
}
}
}
@ -349,3 +353,32 @@ impl<'a, T: core::fmt::Display> core::fmt::Display for DisplaySlice<'a, T> {
write!(f, "]")
}
}
impl core::fmt::Display for defs::Type {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
match self.kind {
defs::TYPE_VOID => {
write!(f, "void")
}
defs::TYPE_BOOL => {
write!(f, "bool")
}
defs::TYPE_I32 => {
write!(f, "i32")
}
defs::TYPE_U32 => {
write!(f, "u32")
}
defs::TYPE_STR => {
write!(f, "str")
}
defs::TYPE_POINTER => {
let pointee = unsafe { (self.data as *const defs::Type).read() };
write!(f, "*{pointee}",)
}
_ => {
write!(f, "UnknownType")
}
}
}
}