Compare commits

..

6 commits

Author SHA1 Message Date
janis 56354237c6
build symbol table 2025-10-30 22:16:45 +01:00
janis ad3b0205c2
ast walking 2025-10-30 21:41:27 +01:00
janis 525b78cdf4
pointer type 2025-10-30 14:35:49 +01:00
janis e54313b6a2
args, var-ref 2025-10-30 13:48:08 +01:00
janis 84e87824ba
ast: placeness, assignment, var decl, deref, address-of 2025-10-30 01:41:16 +01:00
janis 4491df79a2
tokeniser: int/float types 2025-10-30 00:35:35 +01:00
8 changed files with 1268 additions and 109 deletions

File diff suppressed because it is too large Load diff

View file

@ -27,6 +27,9 @@ global unwrap_token
global peek_expect_token
global peek_lexeme
global tokeniser_get_cursor
global tokeniser_set_cursor
;; =============================
;; Tokeniser functions
;; =============================
@ -583,3 +586,11 @@ peek_lexeme:
pop rax
pop rbp
ret
tokeniser_get_cursor:
mov rax, [rel cursor]
ret
tokeniser_set_cursor:
mov [rel cursor], rdi
ret

View file

@ -34,6 +34,16 @@ LEXEMES:
dq LEX_VOID
dq LEX_SLASH
dq LEX_STAR
dq LEX_U8
dq LEX_I8
dq LEX_U16
dq LEX_I16
dq LEX_U64
dq LEX_I64
dq LEX_USIZE
dq LEX_ISIZE
dq LEX_F32
dq LEX_F64
align 8
TOKENS:
@ -70,6 +80,16 @@ TOKENS:
db TOKEN_VOID ;; 30
db TOKEN_SLASH ;; 31
db TOKEN_STAR ;; 32
db TOKEN_U8 ;; 33
db TOKEN_I8 ;; 34
db TOKEN_U16 ;; 35
db TOKEN_I16 ;; 36
db TOKEN_U64 ;; 37
db TOKEN_I64 ;; 38
db TOKEN_USIZE ;; 39
db TOKEN_ISIZE ;; 40
db TOKEN_F32 ;; 41
db TOKEN_F64 ;; 42
align 8
LEXEME_LENS:
@ -106,9 +126,19 @@ LEXEME_LENS:
dq LEX_VOID_len
dq LEX_SLASH_len
dq LEX_STAR_len
dq LEX_U8_len
dq LEX_I8_len
dq LEX_U16_len
dq LEX_I16_len
dq LEX_U64_len
dq LEX_I64_len
dq LEX_USIZE_len
dq LEX_ISIZE_len
dq LEX_F32_len
dq LEX_F64_len
align 8
NUM_LEXEMES: dq 33
NUM_LEXEMES: dq 43
LEX_NOT_A_LEXEME db "<not a lexeme>", 0
LEX_LET db "let"
@ -175,6 +205,26 @@ NUM_LEXEMES: dq 33
LEX_SLASH_len equ $ - LEX_SLASH
LEX_STAR db "*"
LEX_STAR_len equ $ - LEX_STAR
LEX_U8 db "u8"
LEX_U8_len equ $ - LEX_U8
LEX_I8 db "i8"
LEX_I8_len equ $ - LEX_I8
LEX_U16 db "u16"
LEX_U16_len equ $ - LEX_U16
LEX_I16 db "i16"
LEX_I16_len equ $ - LEX_I16
LEX_U64 db "u64"
LEX_U64_len equ $ - LEX_U64
LEX_I64 db "i64"
LEX_I64_len equ $ - LEX_I64
LEX_USIZE db "usize"
LEX_USIZE_len equ $ - LEX_USIZE
LEX_ISIZE db "isize"
LEX_ISIZE_len equ $ - LEX_ISIZE
LEX_F32 db "f32"
LEX_F32_len equ $ - LEX_F32
LEX_F64 db "f64"
LEX_F64_len equ $ - LEX_F64
LEX_IDENT db "<identifier>"
LEX_IDENT_len equ $ - LEX_IDENT
LEX_NUMBER db "<number>"
@ -218,8 +268,18 @@ NUM_LEXEMES: dq 33
TOKEN_VOID equ 30 ; :u8
TOKEN_SLASH equ 31 ; :u8
TOKEN_STAR equ 32 ; :u8
TOKEN_IDENT equ 33 ; :u8
TOKEN_NUMBER equ 34 ; :u8
TOKEN_STRING equ 35 ; :u8
TOKEN_COMMENT equ 36 ; :u8
TOKEN_U8 equ 33 ; :u8
TOKEN_I8 equ 34 ; :u8
TOKEN_U16 equ 35 ; :u8
TOKEN_I16 equ 36 ; :u8
TOKEN_U64 equ 37 ; :u8
TOKEN_I64 equ 38 ; :u8
TOKEN_USIZE equ 39 ; :u8
TOKEN_ISIZE equ 40 ; :u8
TOKEN_F32 equ 41 ; :u8
TOKEN_F64 equ 42 ; :u8
TOKEN_IDENT equ 43 ; :u8
TOKEN_NUMBER equ 44 ; :u8
TOKEN_STRING equ 45 ; :u8
TOKEN_COMMENT equ 46 ; :u8
;; end-consts

View file

@ -18,6 +18,7 @@ global vec_drop_last
global vec_swap
global vec_remove
global vec_get
global vec_get_or
global vec_drop
global vec_find
global vec_insert
@ -104,6 +105,29 @@ vec_init_with:
pop rbp
ret
;; rdi: pointer to Vec struct
;; rsi: index
;; rdx: pointer to default value
;; fn vec_get(vec: *mut Vec, index: usize, default: *mut u8) -> *mut u8
vec_get_or:
push rbp
mov rbp, rsp
; if (index >= vec.len) panic();
mov rax, [rdi + 8] ; len
cmp rsi, rax
jge .default
; return &mut vec.data[index * vec.item_size];
mov rax, [rdi + 24] ; item_size
mul rsi ; index * item_size
mov rsi, [rdi] ; data
add rax, rsi ; data + index * item_size
pop rbp
ret
.default:
mov rax, rdx
pop rbp
ret
;; rdi: pointer to Vec struct
;; rsi: index
;; fn vec_get(vec: *mut Vec, index: usize) -> *mut u8
@ -520,8 +544,8 @@ vec_binary_search_by:
mov rax, [rdi + 8] ; len
dec rax ; high
mov qword [rsp + 0x20], 0 ; low
mov [rsp + 0x28], rax ; mid
mov qword [rsp + 0x20], 0 ; low
mov qword [rsp + 0x28], 0 ; mid
mov [rsp + 0x30], rax ; high
mov rax, [rdi + 24] ; item_size
mov [rsp + 0x38], rax ; item_size

View file

@ -1,3 +1,5 @@
#![feature(debug_closure_helpers)]
#[path = "shared/shared.rs"]
mod util;
@ -17,46 +19,129 @@ fn main() {
let src = b"3 + 4";
fn print_ast(src: &[u8], parser: impl FnOnce(&mut Ast)) {
fn print_ast(src: &[u8], parser: impl FnOnce(&mut Ast) -> u64) {
unsafe {
tokeniser_init_buf(src.as_ptr(), src.len());
let mut ast = Ast {
nodes: util::vec::Vec::new(),
};
let expr_id = parser(&mut ast);
eprintln!("Parsed expression ID: {}", expr_id);
println!("{:#}", &ast);
// unsafe extern "C" fn visit_node(_this: *mut (), ast: *mut Ast, node_id: u64) {
// let ast = unsafe { &*ast };
// let node = ast.nodes.get(node_id as usize).unwrap();
// eprintln!("Visiting node {node_id}: {node}");
// }
// util::defs::ast_walk_for_each(&mut ast, expr_id, core::ptr::null_mut(), visit_node);
let mut symtable = core::mem::MaybeUninit::<util::defs::SymbolTable>::uninit();
util::defs::ast_build_symtable(&mut ast, expr_id, &mut symtable);
let symtable = symtable.assume_init();
use util::DisplayedSliceExt;
println!(
"Symbol Table: {:#?}",
symtable.symtable.as_slice().displayed()
);
};
}
print_ast(b"3 + 4", |ast| unsafe {
parse_expr(ast);
});
print_ast(b"fn main() -> void { return 1 + 2; }", |ast| unsafe {
parse_func(ast);
});
print_ast(b"fn main() -> void { return (1 + (2)); }", |ast| unsafe {
parse_func(ast);
});
// print_ast(b"3 + 4", |ast| unsafe { parse_expr(ast) });
// print_ast(b"fn main() -> void { return 1 + 2; }", |ast| unsafe {
// parse_func(ast)
// });
// print_ast(b"fn main() -> void { return (1 + (2)); }", |ast| unsafe {
// parse_func(ast)
// });
// print_ast(
// b"fn main() -> void { return (1 + (2 * 3)) / 4; }",
// |ast| unsafe { parse_func(ast) },
// );
// print_ast(b"fn main() -> void { return 1 + 2 * 3; }", |ast| unsafe {
// parse_func(ast)
// });
// print_ast(b"fn main() -> void { let x: u32 = 4; }", |ast| unsafe {
// parse_func(ast)
// });
print_ast(
b"fn main() -> void { return (1 + (2 * 3)) / 4; }",
|ast| unsafe {
parse_func(ast);
},
b"fn main(a: u32) -> void { let x: u32 = a + 4; }",
|ast| unsafe { parse_func(ast) },
);
print_ast(
b"fn main(a: u32) -> void {
let x: u32 = a + 4;
let y: *u32 = &x;
return *y;
}",
|ast| unsafe { parse_func(ast) },
);
print_ast(b"fn main() -> void { return 1 + 2 * 3; }", |ast| unsafe {
parse_func(ast);
});
}
impl std::fmt::Display for AstNode {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
use util::defs::{
BinaryExpr, AST_BINARY_OP, AST_BLOCK, AST_FUNCTION, AST_NUMBER, AST_RETURN_STATEMENT,
BinaryExpr, AST_ADDRESS_OF, AST_ARG, AST_ASSIGNMENT, AST_BINARY_OP, AST_BLOCK,
AST_DEREF, AST_FUNCTION, AST_NUMBER, AST_PLACE_TO_VALUE, AST_RETURN_STATEMENT,
AST_VALUE_TO_PLACE, AST_VAR_DECL, AST_VAR_REF,
};
match self.kind as u32 {
match self.kind {
AST_NUMBER => {
write!(f, "Number({})", self.data as usize)
}
AST_DEREF => {
write!(f, "Deref(expr: {})", self.data as usize)
}
AST_ADDRESS_OF => {
write!(f, "AddressOf(expr: {})", self.data as usize)
}
AST_ARG => {
let arg = unsafe { self.data.cast::<util::defs::AstArgument>().read() };
write!(
f,
"Arg(name: {:?}, arg_type: {})",
unsafe {
std::str::from_utf8(std::slice::from_raw_parts(arg.name, arg.name_len))
},
arg.arg_type,
)
}
AST_VAR_REF => {
let var_ref = unsafe { self.data.cast::<util::defs::AstVarRef>().read() };
if var_ref.resolved != 0 {
write!(f, "VarRef({})", var_ref.resolved)
} else {
write!(f, "VarRef(name: {:?})", unsafe {
std::str::from_utf8(std::slice::from_raw_parts(
var_ref.name,
var_ref.name_len,
))
},)
}
}
AST_VAR_DECL => {
let var_decl = unsafe { self.data.cast::<util::defs::AstVarDecl>().read() };
write!(
f,
"VarDecl(name: {:?}, var_type: {})",
unsafe {
std::str::from_utf8(std::slice::from_raw_parts(
var_decl.name,
var_decl.name_len,
))
},
var_decl.var_type,
)
}
AST_ASSIGNMENT => {
write!(
f,
"Assignment(dest: {}, src: {})",
self.data as usize, self.extra
)
}
AST_BINARY_OP => {
let BinaryExpr {
left,
@ -77,10 +162,13 @@ impl std::fmt::Display for AstNode {
let func = unsafe { self.data.cast::<util::defs::AstFunction>().read() };
write!(
f,
"Function(name: {:?}, return_type: {:?}, body: {})",
"Function(name: {:?}, args: {:?}, return_type: {}, body: {})",
unsafe {
std::str::from_utf8(std::slice::from_raw_parts(func.name, func.name_len))
},
unsafe {
std::slice::from_raw_parts(func.args.cast::<u64>(), func.args_len as usize)
},
func.return_type,
func.body
)
@ -90,6 +178,12 @@ impl std::fmt::Display for AstNode {
std::slice::from_raw_parts(self.data.cast::<u64>(), self.extra as usize)
})
}
AST_PLACE_TO_VALUE => {
write!(f, "PlaceToValue(place: {})", self.data as usize)
}
AST_VALUE_TO_PLACE => {
write!(f, "ValueToPlace(value: {})", self.data as usize)
}
_ => write!(f, "UnknownNode"),
}
}
@ -107,3 +201,54 @@ impl core::fmt::Display for Ast {
writeln!(f, "\n]")
}
}
impl core::fmt::Display for util::defs::SymEntry {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
f.debug_struct("SymEntry")
.field_with("key", |f| {
f.debug_struct("Key")
.field_with("kind", |f| {
f.write_str(match self.key.kind {
util::defs::SYM_KEY_SCOPE => "Scope",
util::defs::SYM_KEY_SCOPE_NAME => "ScopeName",
util::defs::SYM_KEY_PARENT_SCOPE => "ParentScope",
util::defs::SYM_KEY_ARG => "Argument",
util::defs::SYM_KEY_VAR => "Variable",
_ => "Unknown",
})
})
.field("scope", &self.key.scope_index)
.field("span", &self.key.span)
.field_with("ident", |f| {
f.write_str(unsafe {
&core::str::from_utf8_unchecked(core::slice::from_raw_parts(
self.key.ident,
self.key.ident_len,
))
})
})
.finish()
})
.field_with("value", |f| {
let stct = &mut f.debug_struct("Value");
if self.extra == 0 {
stct.field("ast_index", &self.index).finish()
} else if self.index != 0 {
stct.field_with("ident", |f| {
f.write_str(unsafe {
core::str::from_utf8_unchecked(core::slice::from_raw_parts(
self.index as *const u8,
self.extra as usize,
))
})
})
.finish()
} else {
stct.field("index", &self.index)
.field("extra", &self.extra)
.finish()
}
})
.finish()
}
}

View file

@ -3,28 +3,49 @@
unsafe extern "C" {
pub unsafe fn parse_func(ast: *mut Ast) -> u64;
pub unsafe fn parse_args(ast: *mut Ast) -> (*const Argument, usize);
pub unsafe fn parse_args(ast: *mut Ast) -> (*const u64, usize);
pub unsafe fn parse_primary_expr(ast: *mut Ast) -> (u64, bool);
pub unsafe fn parse_binary_expr(ast: *mut Ast, precedence: u8) -> (u64, bool);
pub unsafe fn parse_expr(ast: *mut Ast) -> u64;
pub unsafe fn parse_statement(ast: *mut Ast) -> u64;
pub unsafe fn parse_block(ast: *mut Ast) -> u64;
pub unsafe fn parse_type(ast: *mut Ast) -> Type;
pub unsafe fn parse_prefix_expr(ast: *mut Ast) -> (u64, bool);
pub unsafe fn parse_assignment(ast: *mut Ast) -> (u64, bool);
pub unsafe fn ast_parse_let(ast: *mut Ast) -> (u64, bool);
pub unsafe fn symkey_cmp(a: *const SymKey, b: *const SymKey) -> i32;
pub unsafe fn ast_build_symtable(ast: *mut Ast, root_index: u64, symtable: *mut core::mem::MaybeUninit<SymbolTable>);
pub unsafe fn ast_walk_for_each(ast: *mut Ast, start_index: u64, ctx: *mut (), for_each: unsafe extern "C" fn(ctx: *mut (), *mut Ast, node_index: u64));
pub unsafe fn ast_resolve_var_refs(ast: *mut Ast);
}
pub const AST_FUNCTION: u32 = 1;
pub const AST_BLOCK: u32 = 2;
pub const AST_VARIABLE: u32 = 3;
pub const AST_NUMBER: u32 = 4;
pub const AST_BINARY_OP: u32 = 5;
pub const AST_RETURN_STATEMENT: u32 = 6;
pub const AST_VALUE_TO_PLACE: u32 = 7;
pub const AST_PLACE_TO_VALUE: u32 = 8;
pub const TYPE_VOID: u32 = 1;
pub const TYPE_BOOL: u32 = 2;
pub const TYPE_I32: u32 = 3;
pub const TYPE_U32: u32 = 4;
pub const TYPE_STR: u32 = 5;
pub const AST_FUNCTION: u8 = 1;
pub const AST_BLOCK: u8 = 2;
pub const AST_VARIABLE: u8 = 3;
pub const AST_NUMBER: u8 = 4;
pub const AST_BINARY_OP: u8 = 5;
pub const AST_RETURN_STATEMENT: u8 = 6;
pub const AST_VALUE_TO_PLACE: u8 = 7;
pub const AST_PLACE_TO_VALUE: u8 = 8;
pub const AST_ASSIGNMENT: u8 = 9;
pub const AST_DEREF: u8 = 10;
pub const AST_ADDRESS_OF: u8 = 11;
pub const AST_VAR_DECL: u8 = 12;
pub const AST_VAR_REF: u8 = 13;
pub const AST_ARG: u8 = 14;
pub const TYPE_VOID: u8 = 1;
pub const TYPE_BOOL: u8 = 2;
pub const TYPE_I32: u8 = 3;
pub const TYPE_U32: u8 = 4;
pub const TYPE_STR: u8 = 5;
pub const TYPE_POINTER: u8 = 6;
pub const SYM_KEY_SCOPE: u8 = 1;
pub const SYM_KEY_SCOPE_NAME: u8 = 2;
pub const SYM_KEY_PARENT_SCOPE: u8 = 3;
pub const SYM_KEY_START_LOCALS: u8 = 4;
pub const SYM_KEY_ARG: u8 = 5;
pub const SYM_KEY_VAR: u8 = 6;
pub const SYM_KEY_END_LOCALS: u8 = 7;
pub const TOKEN_EOF: u8 = 0;
pub const TOKEN_LET: u8 = 1;
pub const TOKEN_IF: u8 = 2;
@ -58,10 +79,20 @@ pub const TOKEN_RBRACKET: u8 = 29;
pub const TOKEN_VOID: u8 = 30;
pub const TOKEN_SLASH: u8 = 31;
pub const TOKEN_STAR: u8 = 32;
pub const TOKEN_IDENT: u8 = 33;
pub const TOKEN_NUMBER: u8 = 34;
pub const TOKEN_STRING: u8 = 35;
pub const TOKEN_COMMENT: u8 = 36;
pub const TOKEN_U8: u8 = 33;
pub const TOKEN_I8: u8 = 34;
pub const TOKEN_U16: u8 = 35;
pub const TOKEN_I16: u8 = 36;
pub const TOKEN_U64: u8 = 37;
pub const TOKEN_I64: u8 = 38;
pub const TOKEN_USIZE: u8 = 39;
pub const TOKEN_ISIZE: u8 = 40;
pub const TOKEN_F32: u8 = 41;
pub const TOKEN_F64: u8 = 42;
pub const TOKEN_IDENT: u8 = 43;
pub const TOKEN_NUMBER: u8 = 44;
pub const TOKEN_STRING: u8 = 45;
pub const TOKEN_COMMENT: u8 = 46;
#[repr(C)]
#[derive(Debug)]
@ -79,7 +110,7 @@ pub struct AstNode {
#[repr(C)]
#[derive(Debug)]
pub struct Argument {
pub struct AstArgument {
pub name: *const u8,
pub name_len: usize,
pub arg_type: Type,
@ -89,6 +120,7 @@ pub struct Argument {
#[derive(Debug)]
pub struct Type {
pub kind: u8,
pub data: u64,
}
#[repr(C)]
@ -96,12 +128,20 @@ pub struct Type {
pub struct AstFunction {
pub name: *const u8,
pub name_len: usize,
pub args: *const Argument,
pub args: *const u64,
pub args_len: usize,
pub return_type: Type,
pub body: u64,
}
#[repr(C)]
#[derive(Debug)]
pub struct AstVarRef {
pub resolved: u64,
pub name: *const u8,
pub name_len: usize,
}
#[repr(C)]
#[derive(Debug)]
pub struct BinaryExpr {
@ -110,4 +150,43 @@ pub struct BinaryExpr {
pub right: u64,
}
#[repr(C)]
#[derive(Debug)]
pub struct AstVarDecl {
pub name: *const u8,
pub name_len: usize,
pub var_type: Type,
}
#[repr(C)]
#[derive(Debug)]
pub struct AstAssignment {
pub variable: u64,
pub expr: u64,
}
#[repr(C)]
#[derive(Debug)]
pub struct SymbolTable {
pub symtable: Vec<SymEntry>,
}
#[repr(C)]
#[derive(Debug)]
pub struct SymKey {
pub kind: u8,
pub scope_index: u64,
pub span: u64,
pub ident: *const u8,
pub ident_len: usize,
}
#[repr(C)]
#[derive(Debug)]
pub struct SymEntry {
pub key: SymKey,
pub index: u64,
pub extra: u64,
}
use super::vec::Vec;

View file

@ -253,7 +253,11 @@ pub mod vec {
cmp_trampoline::<T, F>,
&raw mut cmp as *mut F as *mut (),
);
if vacant { Err(index) } else { Ok(index) }
if vacant {
Err(index)
} else {
Ok(index)
}
}
}
@ -349,3 +353,52 @@ impl<'a, T: core::fmt::Display> core::fmt::Display for DisplaySlice<'a, T> {
write!(f, "]")
}
}
impl core::fmt::Display for defs::Type {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
match self.kind {
defs::TYPE_VOID => {
write!(f, "void")
}
defs::TYPE_BOOL => {
write!(f, "bool")
}
defs::TYPE_I32 => {
write!(f, "i32")
}
defs::TYPE_U32 => {
write!(f, "u32")
}
defs::TYPE_STR => {
write!(f, "str")
}
defs::TYPE_POINTER => {
let pointee = unsafe { (self.data as *const defs::Type).read() };
write!(f, "*{pointee}",)
}
_ => {
write!(f, "UnknownType")
}
}
}
}
#[repr(transparent)]
pub struct Displayed<T: core::fmt::Display>(pub T);
impl<T: core::fmt::Display> core::fmt::Debug for Displayed<T> {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
write!(f, "{}", self.0)
}
}
pub trait DisplayedSliceExt {
type Displayed: core::fmt::Debug;
fn displayed(self) -> Self::Displayed;
}
impl<'a, T: core::fmt::Display> DisplayedSliceExt for &'a [T] {
type Displayed = &'a [Displayed<T>];
fn displayed(self) -> Self::Displayed {
unsafe { core::mem::transmute(self) }
}
}

View file

@ -1,7 +1,7 @@
#[path = "shared/shared.rs"]
mod util;
use util::{BlobVec, ffi::*, vec::Vec};
use util::{ffi::*, vec::Vec, BlobVec};
fn main() {
static mut DROPS: usize = 1;
@ -104,4 +104,8 @@ fn main() {
_ = vec.insert_sorted(35, cmp);
assert_eq!(vec.as_slice(), &[20, 30, 35, 40, 50]);
let mut vec = Vec::<u32>::new_with(100);
vec.insert_sorted(50, cmp);
assert_eq!(vec.as_slice(), &[50]);
}