Compare commits

...

2 commits

Author SHA1 Message Date
janis d2597d2de7
can parse basic function 2025-10-29 23:17:02 +01:00
janis 8740fea99d
fix tokeniser for: void 2025-10-29 22:39:27 +01:00
8 changed files with 120 additions and 37 deletions

View file

@ -77,6 +77,18 @@ parse_func:
push rbp
mov rbp, rsp
push rdi
; start-structs
; struct AstFunction {
; name: *const u8,
; name_len: usize,
; args: *const Argument,
; args_len: usize,
; return_type: Type,
; body: u64,
; }
; end-structs
sub rsp, 48
; name: *const u8 [0..8]
; name_len: usize [8..16]
@ -380,17 +392,16 @@ parse_statement:
mov dil, TOKEN_RETURN
call expect_token
test rax, rax
jz .return
jnz .return
jmp .panic
.return:
mov rdi, [rsp + 24] ; Ast
call parse_expr
mov [rsp + 16], rax ; expression
mov byte [rsp], AST_RETURN_STATEMENT ; kind
lea rax, [rsp + 16] ; data ptr
mov [rsp + 8], rax ; data
mov rdi, [rsp + 24] ; Ast
mov rsi, rsp ; AstNode
mov [rsp + 8], rax ; data
mov rdi, [rsp + 24] ; Ast
lea rsi, [rsp] ; AstNode
call vec_push
mov rdi, [rsp + 24] ; Ast
mov rax, [rdi + 8] ; Ast.nodes.len()
@ -411,7 +422,6 @@ parse_statement:
parse_block:
push rbp
mov rbp, rsp
push rdi
; start-structs
; struct Block {
@ -420,31 +430,46 @@ parse_block:
; }
; end-structs
sub rsp, 56
; statements: Vec<Statement> [0..40]
; statement: u64 [40..48]
; Ast: *mut Ast [56..64]
; statements: Vec<Statement> [8..56]
; statement: u64 [0..8]
sub rsp, 64
mov [rsp + 56], rdi ; Ast
mov dil, TOKEN_LBRACE
call unwrap_token
.loop:
mov rdi, [rsp + 16] ; Ast
call parse_statement
mov dil, TOKEN_RBRACE
call peek_expect_token
test rax, rax
je .done
lea rdi, [rsp + 16] ; vec
mov [rsp + 8], rax ; statement
lea rsi, [rsp + 8]
jnz .done
lea rdi, [rsp + 8]
mov rsi, 8 ; size of statement
mov rdx, 0 ; drop = None
mov rcx, 64 ; capacity
call vec_init_with
.loop:
mov dil, TOKEN_RBRACE
call peek_expect_token
test rax, rax
jnz .done
mov rdi, [rsp + 56] ; Ast
call parse_statement
lea rdi, [rsp + 8] ; vec
mov [rsp], rax ; statement
lea rsi, [rsp]
call vec_push
jmp .loop
.done:
mov rdi, [rsp + 56] ; Ast
lea rsi, [rsp + 16] ; statements vec-slice
lea rsi, [rsp + 8] ; statements vec-slice
call vec_push
mov rdi, [rsp + 56] ; Ast
mov rax, [rdi + 8] ; Ast.nodes.len()
dec rax
add rsp, 56
pop rdi
add rsp, 64
pop rbp
ret

View file

@ -318,14 +318,11 @@ is_number:
cmp r14, r13
jge .number
mov dil, [r12 + r14]
call is_whitespace
test rax, rax
jne .number
cmp dil, '_'
je .loop_next
call is_numeric
test rax, rax
je .not_number
je .number
.loop_next:
inc r14
jmp .loop
@ -560,11 +557,13 @@ unwrap_token:
.panic:
call panic
;; returns 0 if token not found, else returns lexeme (ptr, len)
;; dil: expected token
peek_expect_token:
push rbp
mov rbp, rsp
push qword [rel cursor]
mov rax, [rel cursor]
push rax
call expect_token
pop rdi
mov [rel cursor], rdi
@ -576,7 +575,8 @@ peek_lexeme:
push rbp
mov rbp, rsp
push rdi
push qword [rel cursor] ; save cursor
mov rax, [rel cursor] ; current cursor
push rax
call find_lexeme
pop rdi
mov [rel cursor], rdi ; restore cursor

View file

@ -102,7 +102,7 @@ LEXEME_LENS:
dq LEX_VOID_len
align 8
NUM_LEXEMES: dq 30
NUM_LEXEMES: dq 31
LEX_NOT_A_LEXEME db "<not a lexeme>", 0
LEX_LET db "let"

View file

@ -7,7 +7,7 @@ unsafe extern "C" {
unsafe fn tokeniser_init_buf(bytes: *const u8, len: usize) -> ();
}
use util::defs::{parse_expr, Ast, AstNode};
use util::defs::{parse_expr, parse_func, Ast, AstNode};
fn main() {
unsafe {
@ -18,19 +18,30 @@ fn main() {
let src = b"3 + 4";
unsafe {
// tokeniser_init_buf(src.as_ptr(), src.len());
// let mut ast = Ast {
// nodes: util::vec::Vec::new(),
// };
// let expr_id = parse_expr(&mut ast);
// println!("Parsed expression with ID: {}", expr_id);
// println!("{:#}", &ast);
let src = b"fn main() -> void { return 1 + 2; }";
tokeniser_init_buf(src.as_ptr(), src.len());
let mut ast = Ast {
nodes: util::vec::Vec::new(),
};
let expr_id = parse_expr(&mut ast);
println!("Parsed expression with ID: {}", expr_id);
let expr_id = parse_func(&mut ast);
println!("Parsed function with ID: {}", expr_id);
println!("{:#}", &ast);
}
}
impl std::fmt::Display for AstNode {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
use util::defs::{BinaryExpr, AST_BINARY_OP, AST_NUMBER};
use util::defs::{
BinaryExpr, AST_BINARY_OP, AST_FUNCTION, AST_NUMBER, AST_RETURN_STATEMENT,
};
match self.kind as u32 {
AST_NUMBER => {
write!(f, "Number({})", self.data as usize)
@ -47,6 +58,22 @@ impl std::fmt::Display for AstNode {
operator, left, right
)
}
AST_RETURN_STATEMENT => {
let return_expr_id = self.data as usize;
write!(f, "ReturnStatement(expr: {})", return_expr_id)
}
AST_FUNCTION => {
let func = unsafe { self.data.cast::<util::defs::AstFunction>().read() };
write!(
f,
"Function(name: {:?}, return_type: {:?}, body: {})",
unsafe {
std::str::from_utf8(std::slice::from_raw_parts(func.name, func.name_len))
},
func.return_type,
func.body
)
}
_ => write!(f, "UnknownNode"),
}
}

View file

@ -86,6 +86,17 @@ pub struct Type {
pub kind: u8,
}
#[repr(C)]
#[derive(Debug)]
pub struct AstFunction {
pub name: *const u8,
pub name_len: usize,
pub args: *const Argument,
pub args_len: usize,
pub return_type: Type,
pub body: u64,
}
#[repr(C)]
#[derive(Debug)]
pub struct BinaryExpr {

View file

@ -253,11 +253,7 @@ pub mod vec {
cmp_trampoline::<T, F>,
&raw mut cmp as *mut F as *mut (),
);
if vacant {
Err(index)
} else {
Ok(index)
}
if vacant { Err(index) } else { Ok(index) }
}
}

View file

@ -239,6 +239,30 @@ fn main() {
&[Lexeme(32, "3"), Lexeme(16, "+"), Lexeme(32, "4")],
);
eprint!("Initializing tokeniser.. ");
let src = b"fn main() -> void { return 1 + 2; }";
tokeniser_init_buf(src.as_ptr(), src.len());
eprintln!("ok.");
assert_eq!(
&collect_tokens()[..],
&[
Lexeme(4, "fn"),
Lexeme(31, "main"),
Lexeme(19, "("),
Lexeme(18, ")"),
Lexeme(12, "->"),
Lexeme(30, "void"),
Lexeme(21, "{"),
Lexeme(5, "return"),
Lexeme(32, "1"),
Lexeme(16, "+"),
Lexeme(32, "2"),
Lexeme(23, ";"),
Lexeme(20, "}"),
],
);
eprintln!("Finished tokenising.");
}
}

View file

@ -1,7 +1,7 @@
#[path = "shared/shared.rs"]
mod util;
use util::{ffi::*, vec::Vec, BlobVec};
use util::{BlobVec, ffi::*, vec::Vec};
fn main() {
static mut DROPS: usize = 1;