initial ast

This commit is contained in:
janis 2025-10-29 20:39:32 +01:00
parent bf9d07b462
commit 5ae3e17693
Signed by: janis
SSH key fingerprint: SHA256:bB1qbbqmDXZNT0KKD5c2Dfjg53JGhj7B3CFcLIzSqq8
2 changed files with 82 additions and 21 deletions

View file

@ -3,6 +3,7 @@ default rel
%include "src/tokeniser.inc" %include "src/tokeniser.inc"
section .rdata section .rdata
;; start-constants
AST_FUNCTION equ 1 AST_FUNCTION equ 1
AST_BLOCK equ 2 AST_BLOCK equ 2
AST_VARIABLE equ 3 AST_VARIABLE equ 3
@ -15,6 +16,7 @@ section .rdata
TYPE_I32 equ 3 TYPE_I32 equ 3
TYPE_U32 equ 4 TYPE_U32 equ 4
TYPE_STR equ 5 TYPE_STR equ 5
;; end-constants
section .text section .text
extern vec_init_with extern vec_init_with
@ -36,7 +38,16 @@ extern peek_expect_token
extern str_to_int extern str_to_int
global parse_func
global parse_args
global parse_expr
global parse_binary_expr
global parse_primary_expr
global parse_statement
global parse_block
;; start very simple, with only functions and addition ;; start very simple, with only functions and addition
;; start-structs
;; struct Ast { ;; struct Ast {
;; nodes: Vec<AstNode>, ;; nodes: Vec<AstNode>,
;; } ;; }
@ -55,9 +66,10 @@ extern str_to_int
;; struct Type { ;; struct Type {
;; kind: u8, ;; kind: u8,
;; } ;; }
;; end-structs
;; rdi: *mut Ast ;; rdi: *mut Ast
;; fn parse_func(ast: *mut Ast) -> u64 ;; define-fn: fn parse_func(ast: *mut Ast) -> u64
parse_func: parse_func:
push rbp push rbp
mov rbp, rsp mov rbp, rsp
@ -98,6 +110,7 @@ parse_func:
mov [rsp + 40], rax ; body mov [rsp + 40], rax ; body
.epilogue: .epilogue:
mov rdi, 48 mov rdi, 48
mov rsi, 8
call bump_alloc call bump_alloc
mov rsi, rsp mov rsi, rsp
mov rdi, rax mov rdi, rax
@ -110,6 +123,7 @@ parse_func:
call vec_push call vec_push
mov rax, [rsp + 48] ; Ast mov rax, [rsp + 48] ; Ast
mov rax, [rdi + 8] ; return Ast.nodes.len() mov rax, [rdi + 8] ; return Ast.nodes.len()
dec rax
add rsp, 48 add rsp, 48
pop rdi pop rdi
pop rbp pop rbp
@ -125,7 +139,7 @@ parse_func:
call panic call panic
;; rdi: *mut Ast ;; rdi: *mut Ast
;; fn parse_args(ast: *mut Ast) -> (*const Argument, usize) ;; define-fn: fn parse_args(ast: *mut Ast) -> (*const Argument, usize)
parse_args: parse_args:
push rbp push rbp
mov rbp, rsp mov rbp, rsp
@ -229,27 +243,27 @@ parse_number:
parse_primary_expr: parse_primary_expr:
push rbp push rbp
mov rbp, rsp mov rbp, rsp
sub rsp, 8 sub rsp, 24
mov [rsp], rdi ; Ast mov [rsp], rdi ; Ast
mov dil, TOKEN_NUMBER mov dil, TOKEN_NUMBER
call expect_token call expect_token
test rax, rax test rax, rax
jz .number jnz .number
jmp .panic jmp .panic
.number: .number:
mov rdi, rax ; lexeme ptr mov rdi, rax ; lexeme ptr
mov rsi, rdx ; lexeme len mov rsi, rdx ; lexeme len
call parse_number call parse_number
mov rdi, [rsp] mov rdi, [rsp] ; Ast
push rdi mov byte [rsp + 8], AST_NUMBER ; kind
mov byte [rsp], AST_NUMBER ; kind mov [rsp + 16], rax ; data
mov [rsp + 8], rax ; data lea rsi, [rsp + 8] ; AstNode
lea rsi, [rsp]
call vec_push call vec_push
pop rdi mov rdi, [rsp] ; Ast
mov rax, [rdi + 8] ; return Ast.nodes.len() mov rax, [rdi + 8] ; return Ast.nodes.len()
add rsp, 8 dec rax
add rsp, 24
pop rbp pop rbp
ret ret
.panic: .panic:
@ -262,6 +276,14 @@ parse_primary_expr:
parse_binary_expr: parse_binary_expr:
push rbp push rbp
mov rbp, rsp mov rbp, rsp
; size: 24, align: 8
; struct BinaryExpr {
; left: u64,
; operator: u8,
; right: u64,
; }
sub rsp, 64 sub rsp, 64
; lexeme: Lexeme [32..56] ; lexeme: Lexeme [32..56]
; right: u64 [24..32] ; right: u64 [24..32]
@ -269,6 +291,7 @@ parse_binary_expr:
; operator: u8 [16..17] ; operator: u8 [16..17]
; left: u64 [8..16] ; left: u64 [8..16]
; rdi: *mut Ast [0..8] ; rdi: *mut Ast [0..8]
mov [rsp], rdi ; Ast mov [rsp], rdi ; Ast
mov byte [rsp + 17], sil ; upper_precedence mov byte [rsp + 17], sil ; upper_precedence
mov byte [rsp + 16], 0 mov byte [rsp + 16], 0
@ -299,19 +322,30 @@ parse_binary_expr:
call parse_binary_expr call parse_binary_expr
mov [rsp + 24], rax ; right mov [rsp + 24], rax ; right
mov byte [rsp + 32], AST_BINARY_OP ; kind mov rdi, 24
lea rax, [rsp + 8] mov rsi, 8
mov [rsp + 40], rax ; data ptr call bump_alloc
mov rdx, [rsp + 8] ; left
mov [rax + 0], rdx ; left
mov dl, byte [rsp + 16] ; operator
mov byte [rax + 8], dl ; operator
mov rdx, [rsp + 24] ; right
mov [rax + 16], rdx ; right
mov byte [rsp + 32], AST_BINARY_OP ; AstNode.kind
mov [rsp + 40], rax ; AstNode.data
mov rdi, [rsp] ; Ast mov rdi, [rsp] ; Ast
lea rsi, [rsp + 32] ; AstNode lea rsi, [rsp + 32] ; &AstNode
call vec_push call vec_push
mov rdi, [rsp] ; Ast mov rdi, [rsp] ; Ast
mov rax, [rdi + 8] ; Ast.nodes.len() mov rax, [rdi + 8] ; Ast.nodes.len()
dec rax
mov [rsp + 8], rax ; left mov [rsp + 8], rax ; left
jmp .loop
.done: .done:
mov rax, [rsp + 8] ; left mov rax, [rsp + 8] ; left
add rsp, 56 add rsp, 64
pop rbp pop rbp
ret ret
@ -323,6 +357,11 @@ parse_expr:
mov rbp, rsp mov rbp, rsp
sub rsp, 8 sub rsp, 8
mov [rsp], rdi ; Ast mov [rsp], rdi ; Ast
mov sil, 0
call parse_binary_expr
add rsp, 8
pop rbp
ret
;; rdi: *mut Ast ;; rdi: *mut Ast
;; fn parse_statement(ast: *mut Ast) -> u64 ;; fn parse_statement(ast: *mut Ast) -> u64
@ -345,10 +384,11 @@ parse_statement:
lea rax, [rsp + 16] ; data ptr lea rax, [rsp + 16] ; data ptr
mov [rsp + 8], rax ; data mov [rsp + 8], rax ; data
mov rdi, [rsp + 24] ; Ast mov rdi, [rsp + 24] ; Ast
mov rsi, rsp ; AstNode mov rsi, rsp ; AstNode
call vec_push call vec_push
mov rdi, [rsp + 24] ; Ast mov rdi, [rsp + 24] ; Ast
mov rax, [rdi + 8] ; Ast.nodes.len() mov rax, [rdi + 8] ; Ast.nodes.len()
dec rax
mov [rsp], rax mov [rsp], rax
mov dil, TOKEN_SEMI mov dil, TOKEN_SEMI
@ -393,6 +433,7 @@ parse_block:
call vec_push call vec_push
mov rdi, [rsp + 56] ; Ast mov rdi, [rsp + 56] ; Ast
mov rax, [rdi + 8] ; Ast.nodes.len() mov rax, [rdi + 8] ; Ast.nodes.len()
dec rax
add rsp, 56 add rsp, 56
pop rdi pop rdi
pop rbp pop rbp

View file

@ -3,7 +3,7 @@ mod util;
#[repr(C)] #[repr(C)]
struct Ast { struct Ast {
nodes: util::BlobVec, nodes: util::vec::Vec<AstNode>,
} }
#[repr(C)] #[repr(C)]
@ -14,17 +14,25 @@ struct Argument {
} }
#[repr(C)] #[repr(C)]
#[derive(Debug)]
struct AstNode { struct AstNode {
node_type: u8, node_type: u8,
data: usize, data: usize,
} }
use util::FFISlice;
unsafe extern "C" { unsafe extern "C" {
unsafe fn bump_init(); unsafe fn bump_init();
unsafe fn parse_func(ast: *mut Ast) -> u64; unsafe fn parse_func(ast: *mut Ast) -> u64;
unsafe fn parse_args(ast: *mut Ast) -> (*const Argument, usize); unsafe fn parse_args(ast: *mut Ast) -> FFISlice;
unsafe fn tokeniser_init(path: *const i8) -> (); unsafe fn parse_expr(ast: *mut Ast) -> u64;
unsafe fn parse_binary_expr(ast: *mut Ast) -> u64;
unsafe fn parse_primary_expr(ast: *mut Ast) -> u64;
unsafe fn parse_statement(ast: *mut Ast) -> u64;
unsafe fn parse_block(ast: *mut Ast) -> u64;
unsafe fn tokeniser_init_buf(bytes: *const u8, len: usize) -> ();
} }
fn main() { fn main() {
@ -32,4 +40,16 @@ fn main() {
bump_init(); bump_init();
} }
println!("Bump allocator initialized."); println!("Bump allocator initialized.");
let src = b"3 + 4";
unsafe {
tokeniser_init_buf(src.as_ptr(), src.len());
let mut ast = Ast {
nodes: util::vec::Vec::new(),
};
let expr_id = parse_expr(&mut ast);
println!("Parsed expression with ID: {}", expr_id);
println!("{:#?}", ast.nodes.as_slice());
}
} }