From 7b43442ba832aa164815f53fc6896887eee77c03 Mon Sep 17 00:00:00 2001 From: janis Date: Thu, 16 Oct 2025 23:50:30 +0200 Subject: [PATCH] overcomplicating things --- lang/Makefile | 8 +- lang/src/int_to_str.asm | 70 +++++++++ lang/src/main.asm | 306 +++++++++++++++++++++++++++++++++++++--- 3 files changed, 357 insertions(+), 27 deletions(-) create mode 100644 lang/src/int_to_str.asm diff --git a/lang/Makefile b/lang/Makefile index 955f18f..3391362 100644 --- a/lang/Makefile +++ b/lang/Makefile @@ -1,8 +1,8 @@ # Makefile: Compile and link main.asm using nasm and mold, intermediate files in target/ TARGET_DIR := target -SRC := src/main.asm -OBJ := $(TARGET_DIR)/main.o +SRC := src/main.asm src/int_to_str.asm +OBJ := $(addprefix $(TARGET_DIR)/, $(notdir $(SRC:.asm=.o))) BIN := $(TARGET_DIR)/main .PHONY: all clean @@ -12,8 +12,8 @@ all: $(BIN) $(TARGET_DIR): mkdir -p $(TARGET_DIR) -$(OBJ): $(SRC) | $(TARGET_DIR) - nasm -f elf64 -g $(SRC) -o $(OBJ) +$(TARGET_DIR)/%.o: src/%.asm | $(TARGET_DIR) + nasm -f elf64 -g $< -o $@ $(BIN): $(OBJ) mold -run ld -o $(BIN) $(OBJ) diff --git a/lang/src/int_to_str.asm b/lang/src/int_to_str.asm new file mode 100644 index 0000000..1e1f71c --- /dev/null +++ b/lang/src/int_to_str.asm @@ -0,0 +1,70 @@ +section .text +global int_to_str + +;; Converts integer in rcx to string at rdx +;; rcx: input integer +;; rdx: pointer to output buffer (at least 21 bytes) +int_to_str: + mov rbx, rdx ; rbx = buffer pointer + mov r8, rbx ; r8 = start of buffer + + ; Check sign + mov rdx, rcx ; copy value + sar rdx, 63 ; rdx = 0 if positive, -1 if negative + cmp rdx, 0 + jne .negative + +.positive: + mov rsi, rcx + jmp .convert + +.negative: + mov byte [rbx], '-' ; write minus sign + inc rbx + neg rcx + mov rsi, rcx + +.convert: + ; Count digits + mov rax, rsi + mov r9, rbx + mov r10, 0 ; digit count + mov r11, 10 + + test rax, rax + jnz .digits_loop + mov byte [rbx], '0' + inc rbx + mov r10, 1 + jmp .done_digits + +.digits_loop: + mov rdx, 0 + div r11 ; rax = rax / 10, rdx = rax % 10 + add rdx, '0' ; convert digit to ASCII + mov byte [rbx + r10], dl + inc r10 + test rax, rax + jnz .digits_loop + +.done_digits: + ; Digits are in reverse order in [rbx..rbx+r10) + ; Reverse them + mov rsi, 0 + mov rdi, rbx + mov rdx, r10 + dec rdx ; last digit index +.reverse_loop: + cmp rsi, rdx + jae .done_reverse + mov al, [rdi + rsi] + mov bl, [rdi + rdx] + mov [rdi + rsi], bl + mov [rdi + rdx], al + inc rsi + dec rdx + jmp .reverse_loop +.done_reverse: + add rbx, r10 ; move pointer past digits + mov byte [rbx], 0 ; null-terminate + ret diff --git a/lang/src/main.asm b/lang/src/main.asm index 795c961..a0a5966 100644 --- a/lang/src/main.asm +++ b/lang/src/main.asm @@ -1,20 +1,28 @@ ;; Compile with: ;; nasm -f elf64 main.asm -o main.o +extern int_to_str section .data hello_msg db "Hello, World!", 10 hello_msg_len equ $ - hello_msg panic_msg db "panic occured!", 10 panic_msg_len equ $ - panic_msg + oom_msg db "panic: oom!", 10 + oom_msg_len equ $ - oom_msg file_error_msg db "Could not open file: " file_error_msg_len equ $ - file_error_msg error_msg db "Error: " error_msg_len equ $ - error_msg - buffer_size equ 1024 - buffer times buffer_size db 0 section .text global _start +test_fn: + push r11 + push r10 + pop r10 + pop r11 + ret + _start: ; get filename from argv[1] @@ -29,28 +37,16 @@ _start: ; get argv[1] mov rax, [rsp + 16] ; argv[1] - ; open file for reading - mov rdx, rax ; filename pointer - call fopen_read - + ; init tokeniser + mov rdx, rax ; rdx = pointer to filename + call tokeniser_init + ; read until rax = 0 .read_loop: - mov r9, rax ; file descriptor - mov rax, 0 ; syscall: read - mov rdi, r9 ; fd - lea rsi, [buffer] ; buffer - mov rdx, buffer_size ; size - syscall - cmp rax, 0 ; check for EOF - jle .exit ; if rax <= 0, exit loop - mov rcx, rax ; number of bytes read - - ; write to stdout for now - mov rax, 1 ; syscall: write - mov rdi, 1 ; fd: stdout - lea rsi, [buffer] ; buffer - mov rdx, rcx ; len: bytes read - syscall - jmp .read_loop + call tokeniser_read_chunk + cmp rax, 0 + jne .read_loop + call tokeniser_print + jmp .exit .no_filename: call panic @@ -72,6 +68,16 @@ panic: mov rdi, 1 ; status: 1 syscall +;; Abort the program with a default panic message +oom: + mov rdx, oom_msg + mov rcx, oom_msg_len + call eprint_str + ; exit with error code 1 + mov rax, 60 ; syscall: exit + mov rdi, 1 ; status: 1 + syscall + ;; abort the program ;; rdx: status code exit: @@ -107,6 +113,7 @@ strlen: ;; rcx: source pointer ;; r8: number of bytes to copy memcpy: + push r10 xor r10, r10 .memcpy_loop_byte: cmp r10, r8 @@ -116,6 +123,7 @@ memcpy: inc r10 jmp .memcpy_loop_byte .memcpy_done: + pop r10 ret ;; Opens file for reading: @@ -131,6 +139,7 @@ fopen_read: ret ;fd in rax .file_error: + call eprint_error mov rdx, rdi ; filename is in rdi call strlen ; get length of filename mov r9, rax ; r9 = filename length @@ -169,3 +178,254 @@ fopen_read: pop rsi add rsp, rsi ; dealloc call panic + +section .rodata + e_is_dir db "Is a directory", 10 + e_is_dir_len equ $ - e_is_dir + e_io db "I/O error", 10 + e_io_len equ $ - e_io + e_bad_fd db "Bad file descriptor", 10 + e_bad_fd_len equ $ - e_bad_fd + e_unknown db "Unknown error", 10 + e_unknown_len equ $ - e_unknown + +section .text +;; Converts an error code to a str (pointer, length) pair +;; rdx: error code +;; Returns: +;; rax: pointer to string +;; rdx: length of string +error_to_str: + cmp rdx, -21 + je .e_is_dir + cmp rdx, -5 + je .e_io + cmp rdx, -9 + je .e_bad_fd + + ; unknown error + lea rax, [e_unknown] + mov rdx, e_unknown_len + ret +.e_is_dir: + lea rax, [e_is_dir] + mov rdx, e_is_dir_len + ret +.e_io: + lea rax, [e_io] + mov rdx, e_io_len + ret +.e_bad_fd: + lea rax, [e_bad_fd] + mov rdx, e_bad_fd_len + ret + +;; rdx: error code +eprint_error: + ; prologue + push r11 + push r10 + push rsi + + ; get error string + call error_to_str + mov r11, rax ; r11 = pointer to error string + mov r10, rdx ; r10 = length of error string + mov rsi, r10 + add rsi, error_msg_len + add rsi, 1 + add rsi, 15 + and rsi, -16 ; align up to 16 + sub rsp, rsi ; allocate buffer + push rsi ; save allocation size + ; copy error_msg + lea rdx, [rsp + 8] + mov rcx, error_msg + mov r8, error_msg_len + call memcpy + ; copy error string + lea rdx, [rsp + 8 + error_msg_len] + mov rcx, r11 + mov r8, r10 + call memcpy + ; trailing newline + lea rdx, [rsp + 8 + error_msg_len + r10] + mov byte [rdx], 10 + ; print error message + lea rdx, [rsp + 8] + mov rcx, error_msg_len + add rcx, r10 + add rcx, 1 ; include newline + call eprint_str + pop rsi + add rsp, rsi ; dealloc + + ; epilogue + pop rsi + pop r10 + pop r11 + ret + +;; Allocates n pages of memory +;; rdx: number of pages +;; Returns: +;; rax: pointer to allocated memory + alloc_pages: + mov rax, 9 ; syscall: mmap + xor rdi, rdi ; addr: NULL + mov rsi, rdx ; length: number of pages + shl rsi, 12 ; length in bytes (page size = 4096) + mov rdx, 3 ; prot: PROT_READ | PROT_WRITE + mov r10, 34 ; flags: MAP_PRIVATE | MAP_ANONYMOUS + xor r8, r8 ; fd: -1 + xor r9, r9 ; offset: 0 + syscall + cmp rax, -4095 ; check for error + jae .alloc_error + ret +.alloc_error: + mov rdx, rax ; error code + call eprint_error + call oom + +;; ============================= +;; Linked list functions +;; ============================ + +;; ============================= +;; Tokeniser functions +;; ============================= + +;; tokeniser state +section .data + ; vec of buffer headers + buffer_headers dq 0 + buffer_headers_size dd 0 + buffer_headers_capacity dd 0 + chunk_size equ 4096 + current_buffer dd 0 + current_offset dd 0 + input_file dd 0 + +;; each buffer is chunk_size bytes large +;; buffer header structure: +;; +0 (8 bytes): pointer buffer +;; +8 (8 bytes): size of buffer + +;; Tokens: +;; [let, if, else, fn, return, loop, break, continue, true, false, i32, u32, bool, =, +, -, *, /, %, ==, !=, <, <=, >, >=, &&, ||, !, (, ), {, }, [, ], ;, ',', ] + + +section .text +;; Initialises the tokeniser +;; rdx: pointer to filename (null-terminated) +tokeniser_init: + ; open file for reading + ; this panics if the file doesn't exist + call fopen_read + mov [input_file], eax ; store file descriptor + mov dword [current_buffer], 0 + mov dword [current_offset], 0 + mov rdx, 1 ; allocate 1 page + call alloc_pages + mov [buffer_headers], rax ; store pointer to buffer headers + mov dword [buffer_headers_capacity], 4096 / 16 ; initial capacity for 4096 bytes + mov dword [buffer_headers_size], 0 + + ; read initial chunk into file_buffer + call tokeniser_read_chunk + ret + +section .rodata + num_headers db "Number of buffer headers: " + num_headers_len equ $ - num_headers +section .bss +scratch_str: resb 1024 + +section .text +tokeniser_print: + mov r15, [buffer_headers_size] + mov rdx, num_headers + mov rcx, num_headers_len + call eprint_str + mov rcx, r15 + mov rdx, scratch_str + call int_to_str + xor r14, r14 ; index +.print_loop: + cmp r14, r15 + jge .print_done + mov rax, r14 + shl rax, 4 ; rax = index * 16 + lea rax, [buffer_headers + rax] + mov rdx, [rax] ; pointer to buffer + mov rcx, [rax + 8] ; size of buffer + call eprint_str + inc r14 + jmp .print_loop +.print_done: + ret + +;; Reads a chunk (4096 bytes) from the file into file_buffer +tokeniser_read_chunk: + ; allocate new buffer + mov ecx, [buffer_headers_size] + mov eax, [buffer_headers_capacity] + cmp eax, ecx + jl .alloc_more_headers +.read_chunk: + mov rdx, 1 ; allocate 1 page + call alloc_pages + mov r14, rax ; r14 = pointer to new buffer + lea r15, [buffer_headers_size] + shl r15, 4 ; r15 = size * 16 + lea r15, [buffer_headers + r15] + mov [r15], rax ; store pointer to new buffer + mov dword [r15 + 8], 0 ; size = 0 for now + + mov rax, 0 ; syscall: read + mov rdi, [input_file] ; fd + mov rsi, r14 ; buffer + mov rdx, 4096 ; size + syscall + ; check error + cmp rax, 0 + jl .read_error + ; store size of buffer + mov [r15 + 8], eax + ret +.read_error: + mov rdx, rax + call eprint_error + mov rax, 0 + ret +.alloc_more_headers: + shl ecx, 1 ; double capacity + ; capacity in items, not bytes, 256 items per page + ; calculate capacity in pages: + shr ecx, 8 ; ecx = capacity / 256 + mov edx, 1 + cmp ecx, edx + cmovl ecx, edx + + xor rdx, rdx + mov r15, rcx + shl r15, 8 ; r15= new_capacity + mov edx, ecx + call alloc_pages + mov rdx, rax + mov rcx, [buffer_headers] + mov r8, [buffer_headers_size] + shl r8, 4 ; size * 16 + call memcpy + mov [buffer_headers], rdx + mov dword [buffer_headers_capacity], r15d + jmp .read_chunk + + + + +;; Read the next token from the buffer +;; Returns: +;; rax: token type +;; rdx: pointer to token text