overcomplicating things

This commit is contained in:
janis 2025-10-16 23:50:30 +02:00
parent 703a8ba968
commit 7b43442ba8
Signed by: janis
SSH key fingerprint: SHA256:bB1qbbqmDXZNT0KKD5c2Dfjg53JGhj7B3CFcLIzSqq8
3 changed files with 357 additions and 27 deletions

View file

@ -1,8 +1,8 @@
# Makefile: Compile and link main.asm using nasm and mold, intermediate files in target/
TARGET_DIR := target
SRC := src/main.asm
OBJ := $(TARGET_DIR)/main.o
SRC := src/main.asm src/int_to_str.asm
OBJ := $(addprefix $(TARGET_DIR)/, $(notdir $(SRC:.asm=.o)))
BIN := $(TARGET_DIR)/main
.PHONY: all clean
@ -12,8 +12,8 @@ all: $(BIN)
$(TARGET_DIR):
mkdir -p $(TARGET_DIR)
$(OBJ): $(SRC) | $(TARGET_DIR)
nasm -f elf64 -g $(SRC) -o $(OBJ)
$(TARGET_DIR)/%.o: src/%.asm | $(TARGET_DIR)
nasm -f elf64 -g $< -o $@
$(BIN): $(OBJ)
mold -run ld -o $(BIN) $(OBJ)

70
lang/src/int_to_str.asm Normal file
View file

@ -0,0 +1,70 @@
section .text
global int_to_str
;; Converts integer in rcx to string at rdx
;; rcx: input integer
;; rdx: pointer to output buffer (at least 21 bytes)
int_to_str:
mov rbx, rdx ; rbx = buffer pointer
mov r8, rbx ; r8 = start of buffer
; Check sign
mov rdx, rcx ; copy value
sar rdx, 63 ; rdx = 0 if positive, -1 if negative
cmp rdx, 0
jne .negative
.positive:
mov rsi, rcx
jmp .convert
.negative:
mov byte [rbx], '-' ; write minus sign
inc rbx
neg rcx
mov rsi, rcx
.convert:
; Count digits
mov rax, rsi
mov r9, rbx
mov r10, 0 ; digit count
mov r11, 10
test rax, rax
jnz .digits_loop
mov byte [rbx], '0'
inc rbx
mov r10, 1
jmp .done_digits
.digits_loop:
mov rdx, 0
div r11 ; rax = rax / 10, rdx = rax % 10
add rdx, '0' ; convert digit to ASCII
mov byte [rbx + r10], dl
inc r10
test rax, rax
jnz .digits_loop
.done_digits:
; Digits are in reverse order in [rbx..rbx+r10)
; Reverse them
mov rsi, 0
mov rdi, rbx
mov rdx, r10
dec rdx ; last digit index
.reverse_loop:
cmp rsi, rdx
jae .done_reverse
mov al, [rdi + rsi]
mov bl, [rdi + rdx]
mov [rdi + rsi], bl
mov [rdi + rdx], al
inc rsi
dec rdx
jmp .reverse_loop
.done_reverse:
add rbx, r10 ; move pointer past digits
mov byte [rbx], 0 ; null-terminate
ret

View file

@ -1,20 +1,28 @@
;; Compile with:
;; nasm -f elf64 main.asm -o main.o
extern int_to_str
section .data
hello_msg db "Hello, World!", 10
hello_msg_len equ $ - hello_msg
panic_msg db "panic occured!", 10
panic_msg_len equ $ - panic_msg
oom_msg db "panic: oom!", 10
oom_msg_len equ $ - oom_msg
file_error_msg db "Could not open file: "
file_error_msg_len equ $ - file_error_msg
error_msg db "Error: "
error_msg_len equ $ - error_msg
buffer_size equ 1024
buffer times buffer_size db 0
section .text
global _start
test_fn:
push r11
push r10
pop r10
pop r11
ret
_start:
; get filename from argv[1]
@ -29,28 +37,16 @@ _start:
; get argv[1]
mov rax, [rsp + 16] ; argv[1]
; open file for reading
mov rdx, rax ; filename pointer
call fopen_read
; init tokeniser
mov rdx, rax ; rdx = pointer to filename
call tokeniser_init
; read until rax = 0
.read_loop:
mov r9, rax ; file descriptor
mov rax, 0 ; syscall: read
mov rdi, r9 ; fd
lea rsi, [buffer] ; buffer
mov rdx, buffer_size ; size
syscall
cmp rax, 0 ; check for EOF
jle .exit ; if rax <= 0, exit loop
mov rcx, rax ; number of bytes read
; write to stdout for now
mov rax, 1 ; syscall: write
mov rdi, 1 ; fd: stdout
lea rsi, [buffer] ; buffer
mov rdx, rcx ; len: bytes read
syscall
jmp .read_loop
call tokeniser_read_chunk
cmp rax, 0
jne .read_loop
call tokeniser_print
jmp .exit
.no_filename:
call panic
@ -72,6 +68,16 @@ panic:
mov rdi, 1 ; status: 1
syscall
;; Abort the program with a default panic message
oom:
mov rdx, oom_msg
mov rcx, oom_msg_len
call eprint_str
; exit with error code 1
mov rax, 60 ; syscall: exit
mov rdi, 1 ; status: 1
syscall
;; abort the program
;; rdx: status code
exit:
@ -107,6 +113,7 @@ strlen:
;; rcx: source pointer
;; r8: number of bytes to copy
memcpy:
push r10
xor r10, r10
.memcpy_loop_byte:
cmp r10, r8
@ -116,6 +123,7 @@ memcpy:
inc r10
jmp .memcpy_loop_byte
.memcpy_done:
pop r10
ret
;; Opens file for reading:
@ -131,6 +139,7 @@ fopen_read:
ret ;fd in rax
.file_error:
call eprint_error
mov rdx, rdi ; filename is in rdi
call strlen ; get length of filename
mov r9, rax ; r9 = filename length
@ -169,3 +178,254 @@ fopen_read:
pop rsi
add rsp, rsi ; dealloc
call panic
section .rodata
e_is_dir db "Is a directory", 10
e_is_dir_len equ $ - e_is_dir
e_io db "I/O error", 10
e_io_len equ $ - e_io
e_bad_fd db "Bad file descriptor", 10
e_bad_fd_len equ $ - e_bad_fd
e_unknown db "Unknown error", 10
e_unknown_len equ $ - e_unknown
section .text
;; Converts an error code to a str (pointer, length) pair
;; rdx: error code
;; Returns:
;; rax: pointer to string
;; rdx: length of string
error_to_str:
cmp rdx, -21
je .e_is_dir
cmp rdx, -5
je .e_io
cmp rdx, -9
je .e_bad_fd
; unknown error
lea rax, [e_unknown]
mov rdx, e_unknown_len
ret
.e_is_dir:
lea rax, [e_is_dir]
mov rdx, e_is_dir_len
ret
.e_io:
lea rax, [e_io]
mov rdx, e_io_len
ret
.e_bad_fd:
lea rax, [e_bad_fd]
mov rdx, e_bad_fd_len
ret
;; rdx: error code
eprint_error:
; prologue
push r11
push r10
push rsi
; get error string
call error_to_str
mov r11, rax ; r11 = pointer to error string
mov r10, rdx ; r10 = length of error string
mov rsi, r10
add rsi, error_msg_len
add rsi, 1
add rsi, 15
and rsi, -16 ; align up to 16
sub rsp, rsi ; allocate buffer
push rsi ; save allocation size
; copy error_msg
lea rdx, [rsp + 8]
mov rcx, error_msg
mov r8, error_msg_len
call memcpy
; copy error string
lea rdx, [rsp + 8 + error_msg_len]
mov rcx, r11
mov r8, r10
call memcpy
; trailing newline
lea rdx, [rsp + 8 + error_msg_len + r10]
mov byte [rdx], 10
; print error message
lea rdx, [rsp + 8]
mov rcx, error_msg_len
add rcx, r10
add rcx, 1 ; include newline
call eprint_str
pop rsi
add rsp, rsi ; dealloc
; epilogue
pop rsi
pop r10
pop r11
ret
;; Allocates n pages of memory
;; rdx: number of pages
;; Returns:
;; rax: pointer to allocated memory
alloc_pages:
mov rax, 9 ; syscall: mmap
xor rdi, rdi ; addr: NULL
mov rsi, rdx ; length: number of pages
shl rsi, 12 ; length in bytes (page size = 4096)
mov rdx, 3 ; prot: PROT_READ | PROT_WRITE
mov r10, 34 ; flags: MAP_PRIVATE | MAP_ANONYMOUS
xor r8, r8 ; fd: -1
xor r9, r9 ; offset: 0
syscall
cmp rax, -4095 ; check for error
jae .alloc_error
ret
.alloc_error:
mov rdx, rax ; error code
call eprint_error
call oom
;; =============================
;; Linked list functions
;; ============================
;; =============================
;; Tokeniser functions
;; =============================
;; tokeniser state
section .data
; vec of buffer headers
buffer_headers dq 0
buffer_headers_size dd 0
buffer_headers_capacity dd 0
chunk_size equ 4096
current_buffer dd 0
current_offset dd 0
input_file dd 0
;; each buffer is chunk_size bytes large
;; buffer header structure:
;; +0 (8 bytes): pointer buffer
;; +8 (8 bytes): size of buffer
;; Tokens:
;; [let, if, else, fn, return, loop, break, continue, true, false, i32, u32, bool, =, +, -, *, /, %, ==, !=, <, <=, >, >=, &&, ||, !, (, ), {, }, [, ], ;, ',', ]
section .text
;; Initialises the tokeniser
;; rdx: pointer to filename (null-terminated)
tokeniser_init:
; open file for reading
; this panics if the file doesn't exist
call fopen_read
mov [input_file], eax ; store file descriptor
mov dword [current_buffer], 0
mov dword [current_offset], 0
mov rdx, 1 ; allocate 1 page
call alloc_pages
mov [buffer_headers], rax ; store pointer to buffer headers
mov dword [buffer_headers_capacity], 4096 / 16 ; initial capacity for 4096 bytes
mov dword [buffer_headers_size], 0
; read initial chunk into file_buffer
call tokeniser_read_chunk
ret
section .rodata
num_headers db "Number of buffer headers: "
num_headers_len equ $ - num_headers
section .bss
scratch_str: resb 1024
section .text
tokeniser_print:
mov r15, [buffer_headers_size]
mov rdx, num_headers
mov rcx, num_headers_len
call eprint_str
mov rcx, r15
mov rdx, scratch_str
call int_to_str
xor r14, r14 ; index
.print_loop:
cmp r14, r15
jge .print_done
mov rax, r14
shl rax, 4 ; rax = index * 16
lea rax, [buffer_headers + rax]
mov rdx, [rax] ; pointer to buffer
mov rcx, [rax + 8] ; size of buffer
call eprint_str
inc r14
jmp .print_loop
.print_done:
ret
;; Reads a chunk (4096 bytes) from the file into file_buffer
tokeniser_read_chunk:
; allocate new buffer
mov ecx, [buffer_headers_size]
mov eax, [buffer_headers_capacity]
cmp eax, ecx
jl .alloc_more_headers
.read_chunk:
mov rdx, 1 ; allocate 1 page
call alloc_pages
mov r14, rax ; r14 = pointer to new buffer
lea r15, [buffer_headers_size]
shl r15, 4 ; r15 = size * 16
lea r15, [buffer_headers + r15]
mov [r15], rax ; store pointer to new buffer
mov dword [r15 + 8], 0 ; size = 0 for now
mov rax, 0 ; syscall: read
mov rdi, [input_file] ; fd
mov rsi, r14 ; buffer
mov rdx, 4096 ; size
syscall
; check error
cmp rax, 0
jl .read_error
; store size of buffer
mov [r15 + 8], eax
ret
.read_error:
mov rdx, rax
call eprint_error
mov rax, 0
ret
.alloc_more_headers:
shl ecx, 1 ; double capacity
; capacity in items, not bytes, 256 items per page
; calculate capacity in pages:
shr ecx, 8 ; ecx = capacity / 256
mov edx, 1
cmp ecx, edx
cmovl ecx, edx
xor rdx, rdx
mov r15, rcx
shl r15, 8 ; r15= new_capacity
mov edx, ecx
call alloc_pages
mov rdx, rax
mov rcx, [buffer_headers]
mov r8, [buffer_headers_size]
shl r8, 4 ; size * 16
call memcpy
mov [buffer_headers], rdx
mov dword [buffer_headers_capacity], r15d
jmp .read_chunk
;; Read the next token from the buffer
;; Returns:
;; rax: token type
;; rdx: pointer to token text