From 8740fea99dc2dcc5f1c18299f0434c236fd84d42 Mon Sep 17 00:00:00 2001 From: janis Date: Wed, 29 Oct 2025 22:39:27 +0100 Subject: [PATCH] fix tokeniser for: void --- lang/src/tokeniser.asm | 5 +---- lang/src/tokeniser.inc | 2 +- lang/tests/ast.rs | 17 +++++++++++++---- lang/tests/shared/shared.rs | 6 +----- lang/tests/tokens.rs | 24 ++++++++++++++++++++++++ lang/tests/vec.rs | 2 +- 6 files changed, 41 insertions(+), 15 deletions(-) diff --git a/lang/src/tokeniser.asm b/lang/src/tokeniser.asm index 9a84f37..d94e0f4 100644 --- a/lang/src/tokeniser.asm +++ b/lang/src/tokeniser.asm @@ -318,14 +318,11 @@ is_number: cmp r14, r13 jge .number mov dil, [r12 + r14] - call is_whitespace - test rax, rax - jne .number cmp dil, '_' je .loop_next call is_numeric test rax, rax - je .not_number + je .number .loop_next: inc r14 jmp .loop diff --git a/lang/src/tokeniser.inc b/lang/src/tokeniser.inc index a0e48a0..ca061b8 100644 --- a/lang/src/tokeniser.inc +++ b/lang/src/tokeniser.inc @@ -102,7 +102,7 @@ LEXEME_LENS: dq LEX_VOID_len align 8 -NUM_LEXEMES: dq 30 +NUM_LEXEMES: dq 31 LEX_NOT_A_LEXEME db "", 0 LEX_LET db "let" diff --git a/lang/tests/ast.rs b/lang/tests/ast.rs index 1f00627..64c2d95 100644 --- a/lang/tests/ast.rs +++ b/lang/tests/ast.rs @@ -7,7 +7,7 @@ unsafe extern "C" { unsafe fn tokeniser_init_buf(bytes: *const u8, len: usize) -> (); } -use util::defs::{parse_expr, Ast, AstNode}; +use util::defs::{Ast, AstNode, parse_expr, parse_func}; fn main() { unsafe { @@ -18,19 +18,28 @@ fn main() { let src = b"3 + 4"; unsafe { + // tokeniser_init_buf(src.as_ptr(), src.len()); + // let mut ast = Ast { + // nodes: util::vec::Vec::new(), + // }; + // let expr_id = parse_expr(&mut ast); + // println!("Parsed expression with ID: {}", expr_id); + // println!("{:#}", &ast); + + let src = b"fn main() -> void { return 1 + 2; }"; tokeniser_init_buf(src.as_ptr(), src.len()); let mut ast = Ast { nodes: util::vec::Vec::new(), }; - let expr_id = parse_expr(&mut ast); - println!("Parsed expression with ID: {}", expr_id); + let expr_id = parse_func(&mut ast); + println!("Parsed function with ID: {}", expr_id); println!("{:#}", &ast); } } impl std::fmt::Display for AstNode { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - use util::defs::{BinaryExpr, AST_BINARY_OP, AST_NUMBER}; + use util::defs::{AST_BINARY_OP, AST_NUMBER, BinaryExpr}; match self.kind as u32 { AST_NUMBER => { write!(f, "Number({})", self.data as usize) diff --git a/lang/tests/shared/shared.rs b/lang/tests/shared/shared.rs index 2e586d2..4b0d1ad 100644 --- a/lang/tests/shared/shared.rs +++ b/lang/tests/shared/shared.rs @@ -253,11 +253,7 @@ pub mod vec { cmp_trampoline::, &raw mut cmp as *mut F as *mut (), ); - if vacant { - Err(index) - } else { - Ok(index) - } + if vacant { Err(index) } else { Ok(index) } } } diff --git a/lang/tests/tokens.rs b/lang/tests/tokens.rs index 24c80fe..9236ce2 100644 --- a/lang/tests/tokens.rs +++ b/lang/tests/tokens.rs @@ -239,6 +239,30 @@ fn main() { &[Lexeme(32, "3"), Lexeme(16, "+"), Lexeme(32, "4")], ); + eprint!("Initializing tokeniser.. "); + let src = b"fn main() -> void { return 1 + 2; }"; + tokeniser_init_buf(src.as_ptr(), src.len()); + eprintln!("ok."); + + assert_eq!( + &collect_tokens()[..], + &[ + Lexeme(4, "fn"), + Lexeme(31, "main"), + Lexeme(19, "("), + Lexeme(18, ")"), + Lexeme(12, "->"), + Lexeme(30, "void"), + Lexeme(21, "{"), + Lexeme(5, "return"), + Lexeme(32, "1"), + Lexeme(16, "+"), + Lexeme(32, "2"), + Lexeme(23, ";"), + Lexeme(20, "}"), + ], + ); + eprintln!("Finished tokenising."); } } diff --git a/lang/tests/vec.rs b/lang/tests/vec.rs index 8c83d0f..da791ff 100644 --- a/lang/tests/vec.rs +++ b/lang/tests/vec.rs @@ -1,7 +1,7 @@ #[path = "shared/shared.rs"] mod util; -use util::{ffi::*, vec::Vec, BlobVec}; +use util::{BlobVec, ffi::*, vec::Vec}; fn main() { static mut DROPS: usize = 1;