ast: placeness

ast: sub, mul, div
update python script, add slash and star tokens
2025-10-30 00:31:51 +01:00 · 2025-10-30 00:22:24 +01:00 · 2025-10-30 00:11:46 +01:00 · 2025-10-29 23:48:55 +01:00 · 2025-10-29 23:31:52 +01:00
6 changed files with 353 additions and 160 deletions
--- a/lang/src/ast.asm
+++ b/lang/src/ast.asm
@ -10,6 +10,8 @@ section .rdata
    AST_NUMBER equ 4
    AST_BINARY_OP equ 5
    AST_RETURN_STATEMENT equ 6
+    AST_VALUE_TO_PLACE equ 7
+    AST_PLACE_TO_VALUE equ 8

    TYPE_VOID equ 1
    TYPE_BOOL equ 2
@ -18,6 +20,12 @@ section .rdata
    TYPE_STR equ 5
 ;; end-consts

+    PRECEDENCE_ADD equ 90
+    PRECEDENCE_SUB equ 90
+    PRECEDENCE_MUL equ 100
+    PRECEDENCE_DIV equ 100
+    PRECEDENCE_REM equ 100
+
 section .text
 extern vec_init_with
 extern vec_push
@ -58,6 +66,7 @@ global parse_block
 ;; struct AstNode {
 ;;   kind: u8,
 ;;   data: *const (),
+;;   extra: usize,
 ;; }
 ;;
 ;; struct Argument {
@ -132,12 +141,13 @@ parse_func:
    mov rdx, 48
    call memcpy
    mov byte [rsp], AST_FUNCTION    ; kind
-    mov [rsp + 8], rdi         ; data
-    mov rdi, [rsp + 48]         ; Ast
-    lea rsi, [rsp]
+    mov [rsp + 8], rdi              ; data
+    mov qword [rsp + 16], 0         ; extra
+    mov rdi, [rsp + 48]             ; Ast
+    lea rsi, [rsp]                  ; &AstNode
    call vec_push
    mov rax, [rsp + 48]         ; Ast
-    mov rax, [rdi + 8]         ; return Ast.nodes.len()
+    mov rax, [rdi + 8]          ; return Ast.nodes.len()
    dec rax
    add rsp, 48
    pop rdi
@ -255,17 +265,21 @@ parse_number:
    call panic

 ;; rdi: *mut Ast
-;; define-fn: fn parse_primary_expr(ast: *mut Ast) -> u64
+;; define-fn: fn parse_primary_expr(ast: *mut Ast) -> (u64, bool)
 parse_primary_expr:
    push rbp
    mov rbp, rsp
-    sub rsp, 24
+    sub rsp, 32
    mov [rsp], rdi         ; Ast

    mov dil, TOKEN_NUMBER
    call expect_token
    test rax, rax
    jnz .number
+    mov dil, TOKEN_LPARENS
+    call expect_token
+    test rax, rax
+    jnz .paren_expr
    jmp .panic
 .number:
    mov rdi, rax         ; lexeme ptr
@ -274,12 +288,23 @@ parse_primary_expr:
    mov rdi, [rsp]                 ; Ast
    mov byte [rsp + 8], AST_NUMBER ; kind
    mov [rsp + 16], rax            ; data
-    lea rsi, [rsp + 8]             ; AstNode
+    mov qword [rsp + 24], 0        ; extra
+    lea rsi, [rsp + 8]             ; &AstNode
    call vec_push
    mov rdi, [rsp]              ; Ast
    mov rax, [rdi + 8]          ; return Ast.nodes.len()
    dec rax
-    add rsp, 24
+    mov rdx, 0                  ; placeness = false
+    jmp .epilogue
+.paren_expr:
+    mov rdi, [rsp]              ; Ast
+    call parse_expr
+    mov [rsp + 8], rax          ; expr
+    mov dil, TOKEN_RPARENS
+    call unwrap_token
+    mov rax, [rsp + 8]          ; expr
+.epilogue:
+    add rsp, 32
    pop rbp
    ret
 .panic:
@ -288,7 +313,7 @@ parse_primary_expr:

 ;; rdi: *mut Ast
 ;; sil: precedence
-;; define-fn: fn parse_binary_expr(ast: *mut Ast, precedence: u8) -> u64
+;; define-fn: fn parse_binary_expr(ast: *mut Ast, precedence: u8) -> (u64, bool)
 parse_binary_expr:
    push rbp
    mov rbp, rsp
@ -305,7 +330,10 @@ parse_binary_expr:
    sub rsp, 64
    ; lexeme: Lexeme [32..56]
    ; right: u64 [24..32]
-    ; precedence: u8 [17..18]
+    ; right_placeness: u8 [20..21]
+    ; left_placeness: u8 [19..20]
+    ; our_precedence: u8 [18..19]
+    ; upper_precedence: u8 [17..18]
    ; operator: u8 [16..17]
    ; left: u64 [8..16]
    ; rdi: *mut Ast [0..8]
@ -316,29 +344,72 @@ parse_binary_expr:

    call parse_primary_expr
    mov [rsp + 8], rax           ; left
+    mov [rsp + 19], dl          ; left_placeness

 .loop:
    lea rdi, [rsp + 32]         ; lexeme
    call peek_lexeme
    mov rax, [rsp + 32]
-    mov dil, [rsp + 17]
-    cmp al, dil          ; our_precedence <= upper_precedence
-    jle .done            ; also covers some non-binary operator tokens
    cmp al, TOKEN_PLUS
-    je .plus
+    je .add
+    cmp al, TOKEN_MINUS
+    je .sub
+    cmp al, TOKEN_STAR
+    je .mul
+    cmp al, TOKEN_SLASH
+    je .div
    jmp .done

-.plus:
+.add:
    mov dil, TOKEN_PLUS
    call unwrap_token
    mov byte [rsp + 16], TOKEN_PLUS
+    mov byte [rsp + 18], PRECEDENCE_ADD
+    jmp .right
+.sub:
+    mov dil, TOKEN_MINUS
+    call unwrap_token
+    mov byte [rsp + 16], TOKEN_MINUS
+    mov byte [rsp + 18], PRECEDENCE_SUB
+    jmp .right
+.mul:
+    mov dil, TOKEN_STAR
+    call unwrap_token
+    mov byte [rsp + 16], TOKEN_STAR
+    mov byte [rsp + 18], PRECEDENCE_MUL
+    jmp .right
+.div:
+    mov dil, TOKEN_SLASH
+    call unwrap_token
+    mov byte [rsp + 16], TOKEN_SLASH
+    mov byte [rsp + 18], PRECEDENCE_DIV
    jmp .right

 .right:
+    mov dil, [rsp + 17]
+    mov al, [rsp + 18]          ; our_precedence
+    cmp al, dil                 ; our_precedence <= upper_precedence
+    jle .done
+
    mov rdi, [rsp]         ; Ast
-    mov sil, [rsp + 16]
+    mov sil, [rsp + 18]
    call parse_binary_expr
    mov [rsp + 24], rax             ; right
+    mov [rsp + 20], dl              ; right_placeness
+
+    ; convert left and right to values
+
+    mov rdi, [rsp]                 ; Ast
+    mov rsi, [rsp + 8]             ; left
+    mov dl, [rsp + 19]             ; left_placeness
+    call ast_place_to_value
+    mov [rsp + 8], rax             ; left
+
+    mov rdi, [rsp]                 ; Ast
+    mov rsi, [rsp + 24]            ; right
+    mov dl, [rsp + 20]             ; right_placeness
+    call ast_place_to_value
+    mov [rsp + 24], rax            ; right

    mov rdi, 24
    mov rsi, 8
@ -352,6 +423,7 @@ parse_binary_expr:

    mov byte [rsp + 32], AST_BINARY_OP ; AstNode.kind
    mov [rsp + 40], rax                ; AstNode.data
+    mov qword [rsp + 48], 0            ; AstNode.extra
    mov rdi, [rsp]                  ; Ast
    lea rsi, [rsp + 32]             ; &AstNode
    call vec_push
@ -386,6 +458,9 @@ parse_expr:
 parse_statement:
    push rbp
    mov rbp, rsp
+
+    ; Ast     [24..32]
+    ; AstNode [0..24]
    sub rsp, 32
    mov [rsp + 24], rdi         ; Ast

@ -400,8 +475,9 @@ parse_statement:
    call parse_expr
    mov byte [rsp], AST_RETURN_STATEMENT ; kind
    mov [rsp + 8], rax                   ; data
+    mov qword [rsp + 16], 0              ; extra
    mov rdi, [rsp + 24]                  ; Ast
-    lea rsi, [rsp]                       ; AstNode
+    lea rsi, [rsp]                       ; &AstNode
    call vec_push
    mov rdi, [rsp + 24]         ; Ast
    mov rax, [rdi + 8]          ; Ast.nodes.len()
@ -423,13 +499,6 @@ parse_block:
    push rbp
    mov rbp, rsp

-    ; start-structs
-    ; struct Block {
-    ;   statements: *const u64,
-    ;   statements_len: usize,
-    ; }
-    ; end-structs
-
    ; Ast: *mut Ast              [56..64]
    ; statements: Vec<Statement> [8..56]
    ; statement: u64             [0..8]
@ -457,14 +526,15 @@ parse_block:
    jnz .done
    mov rdi, [rsp + 56]         ; Ast
    call parse_statement
-    lea rdi, [rsp + 8]                ; vec
-    mov [rsp], rax                 ; statement
+    lea rdi, [rsp + 8]          ; vec
+    mov [rsp], rax              ; statement
    lea rsi, [rsp]
    call vec_push
    jmp .loop
 .done:
    mov rdi, [rsp + 56]         ; Ast
-    lea rsi, [rsp + 8]         ; statements vec-slice
+    mov qword [rsp], AST_BLOCK  ; kind
+    lea rsi, [rsp]              ; &AstNode
    call vec_push
    mov rdi, [rsp + 56]         ; Ast
    mov rax, [rdi + 8]         ; Ast.nodes.len()
@ -512,3 +582,57 @@ parse_type:
    ret
 .panic:
    call panic
+
+;; rdi: *mut Ast
+;; rsi: index of node
+;; rdx: is_placeness
+;; fn ast_value_to_place(ast: *mut Ast, node_index: u64, is_placeness: bool) -> u64
+ast_value_to_place:
+    push rbp
+    mov rbp, rsp
+
+    cmp dl, 0
+    xor rax, rax
+    je .done
+    ; create new AST node
+    sub rsp, 32
+    mov [rsp], rdi
+    mov byte [rsp + 8], AST_VALUE_TO_PLACE ; kind
+    mov [rsp + 16], rsi                    ; data
+    mov qword [rsp + 24], 0                ; extra
+    lea rsi, [rsp + 8]                     ; &AstNode
+    call vec_push
+    mov rdi, [rsp]                 ; Ast
+    mov rax, [rdi + 8]            ; Ast.nodes.len()
+    dec rax
+    add rsp, 32
+.done:
+    pop rbp
+    ret
+
+;; rdi: *mut Ast
+;; rsi: index of node
+;; rdx: is_placeness
+;; fn ast_place_to_value(ast: *mut Ast, node_index: u64, is_placeness: bool) -> u64
+ast_place_to_value:
+    push rbp
+    mov rbp, rsp
+
+    cmp dl, 1
+    xor rax, rax
+    je .done
+    ; create new AST node
+    sub rsp, 32
+    mov [rsp], rdi
+    mov byte [rsp + 8], AST_PLACE_TO_VALUE ; kind
+    mov [rsp + 16], rsi                    ; data
+    mov qword [rsp + 24], 0                ; extra
+    lea rsi, [rsp + 8]                     ; &AstNode
+    call vec_push
+    mov rdi, [rsp]                 ; Ast
+    mov rax, [rdi + 8]            ; Ast.nodes.len()
+    dec rax
+    add rsp, 32
+.done:
+    pop rbp
+    ret
--- a/lang/src/tokeniser.inc
+++ b/lang/src/tokeniser.inc
@ -32,6 +32,8 @@ LEXEMES:
    dq LEX_LBRACKET
    dq LEX_RBRACKET
    dq LEX_VOID
+    dq LEX_SLASH
+    dq LEX_STAR

 align 8
 TOKENS:
@ -66,6 +68,8 @@ TOKENS:
    db TOKEN_LBRACKET              ;; 28
    db TOKEN_RBRACKET              ;; 29
    db TOKEN_VOID                  ;; 30
+    db TOKEN_SLASH                 ;; 31
+    db TOKEN_STAR                  ;; 32

 align 8
 LEXEME_LENS:
@ -100,9 +104,11 @@ LEXEME_LENS:
    dq LEX_LBRACKET_len
    dq LEX_RBRACKET_len
    dq LEX_VOID_len
+    dq LEX_SLASH_len
+    dq LEX_STAR_len

 align 8
-NUM_LEXEMES: dq 31
+NUM_LEXEMES: dq 33

    LEX_NOT_A_LEXEME db "<not a lexeme>", 0
    LEX_LET db "let"
@ -165,6 +171,10 @@ NUM_LEXEMES: dq 31
    LEX_RBRACKET_len equ $ - LEX_RBRACKET
    LEX_VOID db "void"
    LEX_VOID_len equ $ - LEX_VOID
+    LEX_SLASH db "/"
+    LEX_SLASH_len equ $ - LEX_SLASH
+    LEX_STAR db "*"
+    LEX_STAR_len equ $ - LEX_STAR
    LEX_IDENT db "<identifier>"
    LEX_IDENT_len equ $ - LEX_IDENT
    LEX_NUMBER db "<number>"
@ -175,39 +185,41 @@ NUM_LEXEMES: dq 31
    LEX_COMMENT_len equ $ - LEX_COMMENT

    ;; start-consts
-    TOKEN_EOF       equ 0
-    TOKEN_LET       equ 1
-    TOKEN_IF        equ 2
-    TOKEN_ELSE      equ 3
-    TOKEN_FN        equ 4
-    TOKEN_RETURN    equ 5
-    TOKEN_LOOP      equ 6
-    TOKEN_BREAK     equ 7
-    TOKEN_CONTINUE  equ 8
-    TOKEN_TRUE      equ 9
-    TOKEN_FALSE     equ 10
-    TOKEN_BOOL      equ 11
-    TOKEN_ARROW     equ 12
-    TOKEN_I32       equ 13
-    TOKEN_U32       equ 14
-    TOKEN_EQUALS    equ 15
-    TOKEN_PLUS      equ 16
-    TOKEN_MINUS     equ 17
-    TOKEN_RPARENS   equ 18
-    TOKEN_LPARENS   equ 19
-    TOKEN_RBRACE    equ 20
-    TOKEN_LBRACE    equ 21
-    TOKEN_COLON     equ 22
-    TOKEN_SEMI      equ 23
-    TOKEN_COMMA     equ 24
-    TOKEN_PIPE      equ 25
-    TOKEN_AMP       equ 26
-    TOKEN_EQEQ      equ 27
-    TOKEN_LBRACKET  equ 28
-    TOKEN_RBRACKET  equ 29
-    TOKEN_VOID      equ 30
-    TOKEN_IDENT     equ 31
-    TOKEN_NUMBER    equ 32
-    TOKEN_STRING    equ 33
-    TOKEN_COMMENT   equ 34
+    TOKEN_EOF       equ 0 ; :u8
+    TOKEN_LET       equ 1 ; :u8
+    TOKEN_IF        equ 2 ; :u8
+    TOKEN_ELSE      equ 3 ; :u8
+    TOKEN_FN        equ 4 ; :u8
+    TOKEN_RETURN    equ 5 ; :u8
+    TOKEN_LOOP      equ 6 ; :u8
+    TOKEN_BREAK     equ 7 ; :u8
+    TOKEN_CONTINUE  equ 8 ; :u8
+    TOKEN_TRUE      equ 9 ; :u8
+    TOKEN_FALSE     equ 10 ; :u8
+    TOKEN_BOOL      equ 11 ; :u8
+    TOKEN_ARROW     equ 12 ; :u8
+    TOKEN_I32       equ 13 ; :u8
+    TOKEN_U32       equ 14 ; :u8
+    TOKEN_EQUALS    equ 15 ; :u8
+    TOKEN_PLUS      equ 16 ; :u8
+    TOKEN_MINUS     equ 17 ; :u8
+    TOKEN_RPARENS   equ 18 ; :u8
+    TOKEN_LPARENS   equ 19 ; :u8
+    TOKEN_RBRACE    equ 20 ; :u8
+    TOKEN_LBRACE    equ 21 ; :u8
+    TOKEN_COLON     equ 22 ; :u8
+    TOKEN_SEMI      equ 23 ; :u8
+    TOKEN_COMMA     equ 24 ; :u8
+    TOKEN_PIPE      equ 25 ; :u8
+    TOKEN_AMP       equ 26 ; :u8
+    TOKEN_EQEQ      equ 27 ; :u8
+    TOKEN_LBRACKET  equ 28 ; :u8
+    TOKEN_RBRACKET  equ 29 ; :u8
+    TOKEN_VOID      equ 30 ; :u8
+    TOKEN_SLASH     equ 31 ; :u8
+    TOKEN_STAR      equ 32 ; :u8
+    TOKEN_IDENT     equ 33 ; :u8
+    TOKEN_NUMBER    equ 34 ; :u8
+    TOKEN_STRING    equ 35 ; :u8
+    TOKEN_COMMENT   equ 36 ; :u8
    ;; end-consts
--- a/lang/tests/asm_to_rust.py
+++ b/lang/tests/asm_to_rust.py
@ -44,6 +44,7 @@ DEFINE_FN_RE = re.compile(r'^\s*;+\s*define-fn:\s*(.+)$', re.IGNORECASE)
 CONST_EQU_RE = re.compile(r'^\s*([A-Za-z_]\w*)\s+equ\s+(.+)$', re.IGNORECASE)
 STRUCT_START_RE = re.compile(r'^\s*struct\s+([A-Za-z_]\w*)\s*\{')  # after comment markers stripped
 RUST_FENCE_RE = re.compile(r'^\s*```\s*(rust)?\s*$', re.IGNORECASE)  # matches ``` or ```rust (after stripping leading comment)
+TYPE_ANNOT_RE = re.compile(r':\s*([A-Za-z0-9_\<\>\*\s\[\]\:&]+)')  # matches :u8, : *const u8, Vec<T>, etc.


 def strip_leading_semicolons(line: str) -> str:
@ -110,12 +111,12 @@ def parse_file(path: Path) -> Dict[str, Any]:
    """
    Parse a single assembly file and return dict with keys: 'functions', 'consts', 'structs', 'rust_blocks'
    - functions: list of signature strings (e.g. "parse_ast(data: *const u8) -> Ast")
-    - consts: list of (name, value)
+    - consts: list of (name, value, type)
    - structs: list of (name, field_lines)
    - rust_blocks: list of rust code blocks; each block is list[str] of code lines (no fences, uncommented)
    """
    functions: List[str] = []
-    consts: List[Tuple[str, str]] = []
+    consts: List[Tuple[str, str, str]] = []
    structs: List[Tuple[str, List[str]]] = []
    rust_blocks: List[List[str]] = []

@ -158,8 +159,20 @@ def parse_file(path: Path) -> Dict[str, Any]:
                m = CONST_EQU_RE.match(s)
                if m:
                    name = m.group(1)
-                    value = m.group(2).strip()
-                    consts.append((name, value))
+                    rest = m.group(2).strip()
+                    # Defaults
+                    value = rest
+                    ctype = 'u32'
+                    # If there's an inline comment (assembly comments start with ';'), split it off.
+                    if ';' in rest:
+                        val_part, comment_part = rest.split(';', 1)
+                        value = val_part.strip()
+                        # Strip any leading semicolons left in comment_part (e.g. ";; :u8")
+                        comment = comment_part.lstrip(';').strip()
+                        mtype = TYPE_ANNOT_RE.search(comment)
+                        if mtype:
+                            ctype = mtype.group(1).strip()
+                    consts.append((name, value, ctype))
            in_consts = False
            const_buffer = []
            i += 1
@ -215,7 +228,7 @@ def parse_file(path: Path) -> Dict[str, Any]:
    }


-def render_rust(function_sigs: List[str], consts: List[Tuple[str, str]],
+def render_rust(function_sigs: List[str], consts: List[Tuple[str, str, str]],
                structs: List[Tuple[str, List[str]]], rust_blocks: List[List[str]]) -> str:
    parts: List[str] = []
    parts.append('#![allow(non_camel_case_types, dead_code, non_upper_case_globals, improper_ctypes)]')
@ -230,8 +243,8 @@ def render_rust(function_sigs: List[str], consts: List[Tuple[str, str]],
        parts.append('')  # blank line

    # Consts
-    for name, value in consts:
-        parts.append(f'pub const {name}: u32 = {value};')
+    for name, value, ctype in consts:
+        parts.append(f'pub const {name}: {ctype} = {value};')
    if consts:
        parts.append('')

@ -265,7 +278,7 @@ def main(argv=None):
    args = parser.parse_args(argv)

    combined_functions: List[str] = []
-    combined_consts: List[Tuple[str, str]] = []
+    combined_consts: List[Tuple[str, str, str]] = []
    combined_structs: List[Tuple[str, List[str]]] = []
    combined_rust_blocks: List[List[str]] = []

--- a/lang/tests/ast.rs
+++ b/lang/tests/ast.rs
@ -17,30 +17,41 @@ fn main() {

    let src = b"3 + 4";

-    unsafe {
-        // tokeniser_init_buf(src.as_ptr(), src.len());
-        // let mut ast = Ast {
-        //     nodes: util::vec::Vec::new(),
-        // };
-        // let expr_id = parse_expr(&mut ast);
-        // println!("Parsed expression with ID: {}", expr_id);
-        // println!("{:#}", &ast);
-
-        let src = b"fn main() -> void { return 1 + 2; }";
-        tokeniser_init_buf(src.as_ptr(), src.len());
-        let mut ast = Ast {
-            nodes: util::vec::Vec::new(),
+    fn print_ast(src: &[u8], parser: impl FnOnce(&mut Ast)) {
+        unsafe {
+            tokeniser_init_buf(src.as_ptr(), src.len());
+            let mut ast = Ast {
+                nodes: util::vec::Vec::new(),
+            };
+            let expr_id = parser(&mut ast);
+            println!("{:#}", &ast);
        };
-        let expr_id = parse_func(&mut ast);
-        println!("Parsed function with ID: {}", expr_id);
-        println!("{:#}", &ast);
    }
+
+    print_ast(b"3 + 4", |ast| unsafe {
+        parse_expr(ast);
+    });
+    print_ast(b"fn main() -> void { return 1 + 2; }", |ast| unsafe {
+        parse_func(ast);
+    });
+    print_ast(b"fn main() -> void { return (1 + (2)); }", |ast| unsafe {
+        parse_func(ast);
+    });
+    print_ast(
+        b"fn main() -> void { return (1 + (2 * 3)) / 4; }",
+        |ast| unsafe {
+            parse_func(ast);
+        },
+    );
+    print_ast(b"fn main() -> void { return 1 + 2 * 3; }", |ast| unsafe {
+        parse_func(ast);
+    });
 }

 impl std::fmt::Display for AstNode {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        use util::defs::{
-            BinaryExpr, AST_BINARY_OP, AST_FUNCTION, AST_NUMBER, AST_RETURN_STATEMENT,
+            BinaryExpr, AST_BINARY_OP, AST_BLOCK, AST_FUNCTION, AST_NUMBER, AST_RETURN_STATEMENT,
        };
        match self.kind as u32 {
            AST_NUMBER => {
@ -74,6 +85,11 @@ impl std::fmt::Display for AstNode {
                    func.body
                )
            }
+            AST_BLOCK => {
+                write!(f, "Block(statements: {:?})", unsafe {
+                    std::slice::from_raw_parts(self.data.cast::<u64>(), self.extra as usize)
+                })
+            }
            _ => write!(f, "UnknownNode"),
        }
    }
--- a/lang/tests/shared/defs.rs
+++ b/lang/tests/shared/defs.rs
@ -4,8 +4,8 @@
 unsafe extern "C" {
    pub unsafe fn parse_func(ast: *mut Ast) -> u64;
    pub unsafe fn parse_args(ast: *mut Ast) -> (*const Argument, usize);
-    pub unsafe fn parse_primary_expr(ast: *mut Ast) -> u64;
-    pub unsafe fn parse_binary_expr(ast: *mut Ast, precedence: u8) -> u64;
+    pub unsafe fn parse_primary_expr(ast: *mut Ast) -> (u64, bool);
+    pub unsafe fn parse_binary_expr(ast: *mut Ast, precedence: u8) -> (u64, bool);
    pub unsafe fn parse_expr(ast: *mut Ast) -> u64;
    pub unsafe fn parse_statement(ast: *mut Ast) -> u64;
    pub unsafe fn parse_block(ast: *mut Ast) -> u64;
@ -18,46 +18,50 @@ pub const AST_VARIABLE: u32 = 3;
 pub const AST_NUMBER: u32 = 4;
 pub const AST_BINARY_OP: u32 = 5;
 pub const AST_RETURN_STATEMENT: u32 = 6;
+pub const AST_VALUE_TO_PLACE: u32 = 7;
+pub const AST_PLACE_TO_VALUE: u32 = 8;
 pub const TYPE_VOID: u32 = 1;
 pub const TYPE_BOOL: u32 = 2;
 pub const TYPE_I32: u32 = 3;
 pub const TYPE_U32: u32 = 4;
 pub const TYPE_STR: u32 = 5;
-pub const TOKEN_EOF: u32 = 0;
-pub const TOKEN_LET: u32 = 1;
-pub const TOKEN_IF: u32 = 2;
-pub const TOKEN_ELSE: u32 = 3;
-pub const TOKEN_FN: u32 = 4;
-pub const TOKEN_RETURN: u32 = 5;
-pub const TOKEN_LOOP: u32 = 6;
-pub const TOKEN_BREAK: u32 = 7;
-pub const TOKEN_CONTINUE: u32 = 8;
-pub const TOKEN_TRUE: u32 = 9;
-pub const TOKEN_FALSE: u32 = 10;
-pub const TOKEN_BOOL: u32 = 11;
-pub const TOKEN_ARROW: u32 = 12;
-pub const TOKEN_I32: u32 = 13;
-pub const TOKEN_U32: u32 = 14;
-pub const TOKEN_EQUALS: u32 = 15;
-pub const TOKEN_PLUS: u32 = 16;
-pub const TOKEN_MINUS: u32 = 17;
-pub const TOKEN_RPARENS: u32 = 18;
-pub const TOKEN_LPARENS: u32 = 19;
-pub const TOKEN_RBRACE: u32 = 20;
-pub const TOKEN_LBRACE: u32 = 21;
-pub const TOKEN_COLON: u32 = 22;
-pub const TOKEN_SEMI: u32 = 23;
-pub const TOKEN_COMMA: u32 = 24;
-pub const TOKEN_PIPE: u32 = 25;
-pub const TOKEN_AMP: u32 = 26;
-pub const TOKEN_EQEQ: u32 = 27;
-pub const TOKEN_LBRACKET: u32 = 28;
-pub const TOKEN_RBRACKET: u32 = 29;
-pub const TOKEN_VOID: u32 = 30;
-pub const TOKEN_IDENT: u32 = 31;
-pub const TOKEN_NUMBER: u32 = 32;
-pub const TOKEN_STRING: u32 = 33;
-pub const TOKEN_COMMENT: u32 = 34;
+pub const TOKEN_EOF: u8 = 0;
+pub const TOKEN_LET: u8 = 1;
+pub const TOKEN_IF: u8 = 2;
+pub const TOKEN_ELSE: u8 = 3;
+pub const TOKEN_FN: u8 = 4;
+pub const TOKEN_RETURN: u8 = 5;
+pub const TOKEN_LOOP: u8 = 6;
+pub const TOKEN_BREAK: u8 = 7;
+pub const TOKEN_CONTINUE: u8 = 8;
+pub const TOKEN_TRUE: u8 = 9;
+pub const TOKEN_FALSE: u8 = 10;
+pub const TOKEN_BOOL: u8 = 11;
+pub const TOKEN_ARROW: u8 = 12;
+pub const TOKEN_I32: u8 = 13;
+pub const TOKEN_U32: u8 = 14;
+pub const TOKEN_EQUALS: u8 = 15;
+pub const TOKEN_PLUS: u8 = 16;
+pub const TOKEN_MINUS: u8 = 17;
+pub const TOKEN_RPARENS: u8 = 18;
+pub const TOKEN_LPARENS: u8 = 19;
+pub const TOKEN_RBRACE: u8 = 20;
+pub const TOKEN_LBRACE: u8 = 21;
+pub const TOKEN_COLON: u8 = 22;
+pub const TOKEN_SEMI: u8 = 23;
+pub const TOKEN_COMMA: u8 = 24;
+pub const TOKEN_PIPE: u8 = 25;
+pub const TOKEN_AMP: u8 = 26;
+pub const TOKEN_EQEQ: u8 = 27;
+pub const TOKEN_LBRACKET: u8 = 28;
+pub const TOKEN_RBRACKET: u8 = 29;
+pub const TOKEN_VOID: u8 = 30;
+pub const TOKEN_SLASH: u8 = 31;
+pub const TOKEN_STAR: u8 = 32;
+pub const TOKEN_IDENT: u8 = 33;
+pub const TOKEN_NUMBER: u8 = 34;
+pub const TOKEN_STRING: u8 = 35;
+pub const TOKEN_COMMENT: u8 = 36;

 #[repr(C)]
 #[derive(Debug)]
@ -70,6 +74,7 @@ pub struct Ast {
 pub struct AstNode {
    pub kind: u8,
    pub data: *const (),
+    pub extra: usize,
 }

 #[repr(C)]
@ -105,11 +110,4 @@ pub struct BinaryExpr {
    pub right: u64,
 }

-#[repr(C)]
-#[derive(Debug)]
-pub struct Block {
-    pub statements: *const u64,
-    pub statements_len: usize,
-}
-
 use super::vec::Vec;
--- a/lang/tests/tokens.rs
+++ b/lang/tests/tokens.rs
@ -79,6 +79,8 @@ fn collect_tokens() -> Vec<Lexeme> {

 fn main() {
    unsafe {
+        use util::defs::*;
+
        // assert initial state
        assert_eq!((&raw const input_file).read(), 0);
        assert_eq!((&raw const buffer_len).read(), 0);
@ -134,16 +136,16 @@ fn main() {
        assert_eq!(
            &collect_tokens()[..],
            &[
-                Lexeme(31, "this-is-an-ident"),
-                Lexeme(31, "another_ident123"),
-                Lexeme(31, "_underscore_test"),
-                Lexeme(31, "mixedCASEIdent"),
-                Lexeme(31, "number12345"),
-                Lexeme(31, "____"),
-                Lexeme(31, "_"),
+                Lexeme(TOKEN_IDENT, "this-is-an-ident"),
+                Lexeme(TOKEN_IDENT, "another_ident123"),
+                Lexeme(TOKEN_IDENT, "_underscore_test"),
+                Lexeme(TOKEN_IDENT, "mixedCASEIdent"),
+                Lexeme(TOKEN_IDENT, "number12345"),
+                Lexeme(TOKEN_IDENT, "____"),
+                Lexeme(TOKEN_IDENT, "_"),
                Lexeme(17, ""),
-                Lexeme(31, "leading-minus"),
-                Lexeme(31, "trailing-minus-"),
+                Lexeme(TOKEN_IDENT, "leading-minus"),
+                Lexeme(TOKEN_IDENT, "trailing-minus-"),
            ]
        );

@ -155,7 +157,7 @@ fn main() {
            &collect_tokens()[..],
            &[
                Lexeme(4, ""),
-                Lexeme(31, "my-function"),
+                Lexeme(TOKEN_IDENT, "my-function"),
                Lexeme(19, ""),
                Lexeme(18, ""),
                Lexeme(12, ""),
@ -174,7 +176,7 @@ fn main() {

        assert_eq!(expect_token(2).into_option(), None);
        assert_eq!(expect_token(4).into_option().unwrap().as_str(), "fn");
-        assert_eq!(unwrap_token(31).as_str(), "my-function");
+        assert_eq!(unwrap_token(TOKEN_IDENT).as_str(), "my-function");

        eprint!("Initializing tokeniser.. ");
        tokeniser_init(c"tests/tokens/comment.l".as_ptr());
@ -183,15 +185,15 @@ fn main() {
        assert_eq!(
            &collect_tokens()[..],
            &[
-                Lexeme(34, ""),
+                Lexeme(TOKEN_COMMENT, ""),
                Lexeme(4, ""),
-                Lexeme(31, "my-function"),
+                Lexeme(TOKEN_IDENT, "my-function"),
                Lexeme(19, ""),
                Lexeme(18, ""),
                Lexeme(12, ""),
                Lexeme(11, ""),
                Lexeme(21, ""),
-                Lexeme(34, ""),
+                Lexeme(TOKEN_COMMENT, ""),
                Lexeme(5, ""),
                Lexeme(10, ""),
                Lexeme(23, ""),
@ -206,11 +208,11 @@ fn main() {
        assert_eq!(
            &collect_tokens()[..],
            &[
-                Lexeme(32, "1234"),
-                Lexeme(32, "123_345_"),
-                Lexeme(32, "1234____56"),
-                Lexeme(32, "1"),
-                Lexeme(32, "0"),
+                Lexeme(TOKEN_NUMBER, "1234"),
+                Lexeme(TOKEN_NUMBER, "123_345_"),
+                Lexeme(TOKEN_NUMBER, "1234____56"),
+                Lexeme(TOKEN_NUMBER, "1"),
+                Lexeme(TOKEN_NUMBER, "0"),
            ]
        );

@ -221,11 +223,14 @@ fn main() {
        assert_eq!(
            &collect_tokens()[..],
            &[
-                Lexeme(33, "\"this is a string\""),
-                Lexeme(33, "\"another\nstring\nspanning multiple\n   lines\""),
-                Lexeme(33, "\"string with a \\\"quoted\\\" word\""),
-                Lexeme(33, "\"a\""),
-                Lexeme(33, "\"\"")
+                Lexeme(TOKEN_STRING, "\"this is a string\""),
+                Lexeme(
+                    TOKEN_STRING,
+                    "\"another\nstring\nspanning multiple\n   lines\""
+                ),
+                Lexeme(TOKEN_STRING, "\"string with a \\\"quoted\\\" word\""),
+                Lexeme(TOKEN_STRING, "\"a\""),
+                Lexeme(TOKEN_STRING, "\"\"")
            ],
        );

@ -236,7 +241,11 @@ fn main() {

        assert_eq!(
            &collect_tokens()[..],
-            &[Lexeme(32, "3"), Lexeme(16, "+"), Lexeme(32, "4")],
+            &[
+                Lexeme(TOKEN_NUMBER, "3"),
+                Lexeme(16, "+"),
+                Lexeme(TOKEN_NUMBER, "4")
+            ],
        );

        eprint!("Initializing tokeniser.. ");
@ -248,21 +257,42 @@ fn main() {
            &collect_tokens()[..],
            &[
                Lexeme(4, "fn"),
-                Lexeme(31, "main"),
+                Lexeme(TOKEN_IDENT, "main"),
                Lexeme(19, "("),
                Lexeme(18, ")"),
                Lexeme(12, "->"),
                Lexeme(30, "void"),
                Lexeme(21, "{"),
                Lexeme(5, "return"),
-                Lexeme(32, "1"),
+                Lexeme(TOKEN_NUMBER, "1"),
                Lexeme(16, "+"),
-                Lexeme(32, "2"),
+                Lexeme(TOKEN_NUMBER, "2"),
                Lexeme(23, ";"),
                Lexeme(20, "}"),
            ],
        );

+        eprint!("Initializing tokeniser.. ");
+        let src = b"(b / d + c) * 42;";
+        tokeniser_init_buf(src.as_ptr(), src.len());
+        eprintln!("ok.");
+
+        assert_eq!(
+            &collect_tokens()[..],
+            &[
+                Lexeme(19, "("),
+                Lexeme(33, "b"),
+                Lexeme(31, "/"),
+                Lexeme(33, "d"),
+                Lexeme(16, "+"),
+                Lexeme(33, "c"),
+                Lexeme(18, ")"),
+                Lexeme(32, "*"),
+                Lexeme(34, "42"),
+                Lexeme(23, ";")
+            ],
+        );
+
        eprintln!("Finished tokenising.");
    }
 }
Author	SHA1	Message	Date
janis	2df4d182f9	ast: placeness	2025-10-30 00:31:51 +01:00
janis	099d774634	ast: sub, mul, div	2025-10-30 00:22:24 +01:00
janis	838c96f04f	update python script, add slash and star tokens	2025-10-30 00:11:46 +01:00
janis	886525cf7e	support parenthesised expressions	2025-10-29 23:48:55 +01:00
janis	c609fe4ec6	fix blocks, increase AstNode by 1 qword	2025-10-29 23:31:52 +01:00