diff --git a/lang/Makefile b/lang/Makefile index 3391362..7c70e5a 100644 --- a/lang/Makefile +++ b/lang/Makefile @@ -1,7 +1,7 @@ # Makefile: Compile and link main.asm using nasm and mold, intermediate files in target/ TARGET_DIR := target -SRC := src/main.asm src/int_to_str.asm +SRC := src/main.asm src/lib.asm src/int_to_str.asm OBJ := $(addprefix $(TARGET_DIR)/, $(notdir $(SRC:.asm=.o))) BIN := $(TARGET_DIR)/main diff --git a/lang/src/lib.asm b/lang/src/lib.asm new file mode 100644 index 0000000..fea6f52 --- /dev/null +++ b/lang/src/lib.asm @@ -0,0 +1,195 @@ +section .rdata + panic_msg db "panic occured!", 10 + panic_msg_len equ $ - panic_msg + oom_msg db "panic: oom!", 10 + oom_msg_len equ $ - oom_msg + file_error_msg db "Could not open file: " + file_error_msg_len equ $ - file_error_msg + error_msg db "Error: " + error_msg_len equ $ - error_msg + + +section .text +global oom +global panic +global strlen +global memcpy +global eprint_str +global exit +global error_to_str +global eprint_error +global alloc_pages + +;; ============================== +;; Helper functions +;; ============================== + +;; Abort the program with a default panic message +panic: + mov rcx, panic_msg + mov rdx, panic_msg_len + call eprint_str + ; exit with error code 1 + mov rax, 60 ; syscall: exit + mov rdi, 1 ; status: 1 + syscall + +;; Abort the program with a default panic message +oom: + mov rcx, oom_msg + mov rdx, oom_msg_len + call eprint_str + ; exit with error code 1 + mov rax, 60 ; syscall: exit + mov rdi, 1 ; status: 1 + syscall + +;; abort the program +;; rcx: status code +exit: + mov rax, 60 ; syscall: exit + mov rdi, rcx + syscall + +;; Writes a string to stderr: +;; rcx: pointer to string +;; rdx: length of string +eprint_str: + mov rax, 1 ; syscall: write + mov rdi, 2 ; fd: stderr + mov rsi, rcx ; buf: str + syscall + ret + +;; calculates length of null-terminated string +;; rcx: pointer to string +strlen: + xor rax, rax ; length counter +.strlen_loop: + cmp byte [rcx + rax], 0 + je .strlen_done + inc rax + jmp .strlen_loop +.strlen_done: + ret + +;; Copy bytes from one memory location to another +;; rcx: destination pointer +;; rdx: source pointer +;; r8: number of bytes to copy +memcpy: + xor r10, r10 +.memcpy_loop_byte: + cmp r10, r8 + jge .memcpy_done + mov al, [rdx + r10] + mov [rcx + r10], al + inc r10 + jmp .memcpy_loop_byte +.memcpy_done: + ret + +section .rdata + e_is_dir db "Is a directory", 10 + e_is_dir_len equ $ - e_is_dir + e_io db "I/O error", 10 + e_io_len equ $ - e_io + e_bad_fd db "Bad file descriptor", 10 + e_bad_fd_len equ $ - e_bad_fd + e_unknown db "Unknown error", 10 + e_unknown_len equ $ - e_unknown + +section .text +;; Converts an error code to a str (pointer, length) pair +;; rcx: error code +;; Returns: +;; rax: pointer to string +;; rdx: length of string +error_to_str: + cmp rcx, -21 + je .e_is_dir + cmp rcx, -5 + je .e_io + cmp rcx, -9 + je .e_bad_fd + + ; unknown error + lea rax, [e_unknown] + mov rdx, e_unknown_len + ret +.e_is_dir: + lea rax, [e_is_dir] + mov rdx, e_is_dir_len + ret +.e_io: + lea rax, [e_io] + mov rdx, e_io_len + ret +.e_bad_fd: + lea rax, [e_bad_fd] + mov rdx, e_bad_fd_len + ret + +;; rcx: error code +eprint_error: + ; prologue + push rsi + + ; get error string + call error_to_str + mov r12, rax ; r12 = pointer to error string + mov r13, rdx ; r13 = length of error string + mov rsi, r13 + add rsi, error_msg_len + add rsi, 1 + add rsi, 15 + and rsi, -16 ; align up to 16 + sub rsp, rsi ; allocate buffer + push rsi ; save allocation size + ; copy error_msg + lea rcx, [rsp + 8] + mov rdx, error_msg + mov r8, error_msg_len + call memcpy + ; copy error string + lea rcx, [rsp + 8 + error_msg_len] + mov rdx, r12 + mov r8, r13 + call memcpy + ; trailing newline + lea rdx, [rsp + 8 + error_msg_len + r13] + mov byte [rdx], 10 + ; print error message + lea rcx, [rsp + 8] + mov rdx, error_msg_len + add rdx, r13 + add rdx, 1 ; include newline + call eprint_str + pop rsi + add rsp, rsi ; dealloc + + ; epilogue + pop rsi + ret + +;; Allocates n pages of memory +;; rcx: number of pages +;; Returns: +;; rax: pointer to allocated memory +alloc_pages: + mov rax, 9 ; syscall: mmap + xor rdi, rdi ; addr: NULL + mov rsi, rcx ; length: number of pages + shl rsi, 12 ; length in bytes (page size = 4096) + mov rdx, 3 ; prot: PROT_READ | PROT_WRITE + mov r10, 34 ; flags: MAP_PRIVATE | MAP_ANONYMOUS + mov r8, -1 ; fd: -1 + xor r9, r9 ; offset: 0 + syscall + cmp rax, -4095 ; check for error + jae .alloc_error + ret +.alloc_error: + mov rcx, rax ; error code + call eprint_error + call oom diff --git a/lang/src/main.asm b/lang/src/main.asm index a0a5966..432cba5 100644 --- a/lang/src/main.asm +++ b/lang/src/main.asm @@ -1,18 +1,21 @@ ;; Compile with: ;; nasm -f elf64 main.asm -o main.o extern int_to_str +extern oom +extern panic +extern strlen +extern memcpy +extern eprint_str +extern exit +extern error_to_str +extern eprint_error +extern alloc_pages section .data hello_msg db "Hello, World!", 10 hello_msg_len equ $ - hello_msg - panic_msg db "panic occured!", 10 - panic_msg_len equ $ - panic_msg - oom_msg db "panic: oom!", 10 - oom_msg_len equ $ - oom_msg file_error_msg db "Could not open file: " file_error_msg_len equ $ - file_error_msg - error_msg db "Error: " - error_msg_len equ $ - error_msg section .text global _start @@ -28,8 +31,8 @@ _start: ; get filename from argv[1] ; argv is at rsp + 8 ; check if argc > 1 - mov rdx, hello_msg - mov rcx, hello_msg_len + mov rcx, hello_msg + mov rdx, hello_msg_len call eprint_str mov rax, [rsp] ; argc cmp rax, 1 @@ -40,11 +43,6 @@ _start: ; init tokeniser mov rdx, rax ; rdx = pointer to filename call tokeniser_init - ; read until rax = 0 -.read_loop: - call tokeniser_read_chunk - cmp rax, 0 - jne .read_loop call tokeniser_print jmp .exit @@ -54,78 +52,6 @@ _start: call exit -;; ============================== -;; Helper functions -;; ============================== - -;; Abort the program with a default panic message -panic: - mov rdx, panic_msg - mov rcx, panic_msg_len - call eprint_str - ; exit with error code 1 - mov rax, 60 ; syscall: exit - mov rdi, 1 ; status: 1 - syscall - -;; Abort the program with a default panic message -oom: - mov rdx, oom_msg - mov rcx, oom_msg_len - call eprint_str - ; exit with error code 1 - mov rax, 60 ; syscall: exit - mov rdi, 1 ; status: 1 - syscall - -;; abort the program -;; rdx: status code -exit: - mov rax, 60 ; syscall: exit - mov rdi, rdx - syscall - -;; Writes a string to stderr: -;; rdx: pointer to string -;; rcx: length of string -eprint_str: - mov rax, 1 ; syscall: write - mov rdi, 2 ; fd: stderr - mov rsi, rdx ; buf: str - mov rdx, rcx ; len: length - syscall - ret - -;; calculates length of null-terminated string -;; rdx: pointer to string -strlen: - xor rax, rax ; length counter -.strlen_loop: - cmp byte [rdx + rax], 0 - je .strlen_done - inc rax - jmp .strlen_loop -.strlen_done: - ret - -;; Copy bytes from one memory location to another -;; rdx: destination pointer -;; rcx: source pointer -;; r8: number of bytes to copy -memcpy: - push r10 - xor r10, r10 -.memcpy_loop_byte: - cmp r10, r8 - jge .memcpy_done - mov al, [rcx + r10] - mov [rdx + r10], al - inc r10 - jmp .memcpy_loop_byte -.memcpy_done: - pop r10 - ret - ;; Opens file for reading: ;; rdx: pointer to filename (null-terminated) fopen_read: @@ -139,8 +65,11 @@ fopen_read: ret ;fd in rax .file_error: + push rdi + mov rcx, rax call eprint_error - mov rdx, rdi ; filename is in rdi + pop rdi + mov rcx, rdi ; filename is in rdi call strlen ; get length of filename mov r9, rax ; r9 = filename length @@ -153,14 +82,14 @@ fopen_read: push rsi ; save allocation size ; copy file_error_msg - lea rdx, [rsp + 8] - mov rcx, file_error_msg + lea rcx, [rsp + 8] + mov rdx, file_error_msg mov r8, file_error_msg_len call memcpy ; copy filename - lea rdx, [rsp + 8 + file_error_msg_len] - mov rcx, rdi + lea rcx, [rsp + 8 + file_error_msg_len] + mov rdx, rdi mov r8, r9 call memcpy @@ -169,143 +98,26 @@ fopen_read: mov byte [rdx], 10 ; print error message - lea rdx, [rsp + 8] - mov rcx, file_error_msg_len - add rcx, r9 - add rcx, 1 ; include newline + lea rcx, [rsp + 8] + mov rdx, file_error_msg_len + add rdx, r9 + add rdx, 1 ; include newline call eprint_str pop rsi add rsp, rsi ; dealloc call panic -section .rodata - e_is_dir db "Is a directory", 10 - e_is_dir_len equ $ - e_is_dir - e_io db "I/O error", 10 - e_io_len equ $ - e_io - e_bad_fd db "Bad file descriptor", 10 - e_bad_fd_len equ $ - e_bad_fd - e_unknown db "Unknown error", 10 - e_unknown_len equ $ - e_unknown - -section .text -;; Converts an error code to a str (pointer, length) pair -;; rdx: error code -;; Returns: -;; rax: pointer to string -;; rdx: length of string -error_to_str: - cmp rdx, -21 - je .e_is_dir - cmp rdx, -5 - je .e_io - cmp rdx, -9 - je .e_bad_fd - - ; unknown error - lea rax, [e_unknown] - mov rdx, e_unknown_len - ret -.e_is_dir: - lea rax, [e_is_dir] - mov rdx, e_is_dir_len - ret -.e_io: - lea rax, [e_io] - mov rdx, e_io_len - ret -.e_bad_fd: - lea rax, [e_bad_fd] - mov rdx, e_bad_fd_len - ret - -;; rdx: error code -eprint_error: - ; prologue - push r11 - push r10 - push rsi - - ; get error string - call error_to_str - mov r11, rax ; r11 = pointer to error string - mov r10, rdx ; r10 = length of error string - mov rsi, r10 - add rsi, error_msg_len - add rsi, 1 - add rsi, 15 - and rsi, -16 ; align up to 16 - sub rsp, rsi ; allocate buffer - push rsi ; save allocation size - ; copy error_msg - lea rdx, [rsp + 8] - mov rcx, error_msg - mov r8, error_msg_len - call memcpy - ; copy error string - lea rdx, [rsp + 8 + error_msg_len] - mov rcx, r11 - mov r8, r10 - call memcpy - ; trailing newline - lea rdx, [rsp + 8 + error_msg_len + r10] - mov byte [rdx], 10 - ; print error message - lea rdx, [rsp + 8] - mov rcx, error_msg_len - add rcx, r10 - add rcx, 1 ; include newline - call eprint_str - pop rsi - add rsp, rsi ; dealloc - - ; epilogue - pop rsi - pop r10 - pop r11 - ret - -;; Allocates n pages of memory -;; rdx: number of pages -;; Returns: -;; rax: pointer to allocated memory - alloc_pages: - mov rax, 9 ; syscall: mmap - xor rdi, rdi ; addr: NULL - mov rsi, rdx ; length: number of pages - shl rsi, 12 ; length in bytes (page size = 4096) - mov rdx, 3 ; prot: PROT_READ | PROT_WRITE - mov r10, 34 ; flags: MAP_PRIVATE | MAP_ANONYMOUS - xor r8, r8 ; fd: -1 - xor r9, r9 ; offset: 0 - syscall - cmp rax, -4095 ; check for error - jae .alloc_error - ret -.alloc_error: - mov rdx, rax ; error code - call eprint_error - call oom - -;; ============================= -;; Linked list functions -;; ============================ - ;; ============================= ;; Tokeniser functions ;; ============================= ;; tokeniser state section .data - ; vec of buffer headers - buffer_headers dq 0 - buffer_headers_size dd 0 - buffer_headers_capacity dd 0 - chunk_size equ 4096 - current_buffer dd 0 - current_offset dd 0 input_file dd 0 + buffer dq 0 + cursor dq 0 + buffer_len dq 0 ;; each buffer is chunk_size bytes large ;; buffer header structure: @@ -315,6 +127,8 @@ section .data ;; Tokens: ;; [let, if, else, fn, return, loop, break, continue, true, false, i32, u32, bool, =, +, -, *, /, %, ==, !=, <, <=, >, >=, &&, ||, !, (, ), {, }, [, ], ;, ',', ] +section .bss +statbuf: resb 144 section .text ;; Initialises the tokeniser @@ -324,108 +138,51 @@ tokeniser_init: ; this panics if the file doesn't exist call fopen_read mov [input_file], eax ; store file descriptor - mov dword [current_buffer], 0 - mov dword [current_offset], 0 - mov rdx, 1 ; allocate 1 page + mov dword [cursor], 0 + mov dword [buffer_len], 0 + ; fstat + mov rax, 5 ; syscall: fstat + mov rdi, [input_file] ; fd + lea rsi, [statbuf] ; statbuf + syscall + cmp rax, 0 + jl .report_error + ; get file size from statbuf + mov r15, [statbuf + 48] ; st_size + + ; allocate buffer + mov rcx, r15 + add rcx, 4095 + shr rcx, 12 ; divide by 4096 call alloc_pages - mov [buffer_headers], rax ; store pointer to buffer headers - mov dword [buffer_headers_capacity], 4096 / 16 ; initial capacity for 4096 bytes - mov dword [buffer_headers_size], 0 + mov [buffer], rax + mov [buffer_len], r15 - ; read initial chunk into file_buffer - call tokeniser_read_chunk + ; read file into buffer + mov rax, 0 ; syscall: read + mov rdi, [input_file] ; fd + mov rsi, [buffer] ; buf + mov rdx, [buffer_len] ; count + syscall + cmp rax, 0 + jl .report_error ret +.report_error: + mov rcx, rax + call eprint_error + call panic -section .rodata - num_headers db "Number of buffer headers: " - num_headers_len equ $ - num_headers -section .bss -scratch_str: resb 1024 +section .rdata + tokeniser_buffer db "Tokeniser buffer: ", 10 + tokeniser_buffer_len equ $ - tokeniser_buffer section .text tokeniser_print: - mov r15, [buffer_headers_size] - mov rdx, num_headers - mov rcx, num_headers_len + mov rcx, tokeniser_buffer + mov rdx, tokeniser_buffer_len call eprint_str - mov rcx, r15 - mov rdx, scratch_str - call int_to_str - xor r14, r14 ; index -.print_loop: - cmp r14, r15 - jge .print_done - mov rax, r14 - shl rax, 4 ; rax = index * 16 - lea rax, [buffer_headers + rax] - mov rdx, [rax] ; pointer to buffer - mov rcx, [rax + 8] ; size of buffer + + mov rax, [cursor] + mov rcx, [buffer + rax] + mov rdx, [buffer_len] call eprint_str - inc r14 - jmp .print_loop -.print_done: - ret - -;; Reads a chunk (4096 bytes) from the file into file_buffer -tokeniser_read_chunk: - ; allocate new buffer - mov ecx, [buffer_headers_size] - mov eax, [buffer_headers_capacity] - cmp eax, ecx - jl .alloc_more_headers -.read_chunk: - mov rdx, 1 ; allocate 1 page - call alloc_pages - mov r14, rax ; r14 = pointer to new buffer - lea r15, [buffer_headers_size] - shl r15, 4 ; r15 = size * 16 - lea r15, [buffer_headers + r15] - mov [r15], rax ; store pointer to new buffer - mov dword [r15 + 8], 0 ; size = 0 for now - - mov rax, 0 ; syscall: read - mov rdi, [input_file] ; fd - mov rsi, r14 ; buffer - mov rdx, 4096 ; size - syscall - ; check error - cmp rax, 0 - jl .read_error - ; store size of buffer - mov [r15 + 8], eax - ret -.read_error: - mov rdx, rax - call eprint_error - mov rax, 0 - ret -.alloc_more_headers: - shl ecx, 1 ; double capacity - ; capacity in items, not bytes, 256 items per page - ; calculate capacity in pages: - shr ecx, 8 ; ecx = capacity / 256 - mov edx, 1 - cmp ecx, edx - cmovl ecx, edx - - xor rdx, rdx - mov r15, rcx - shl r15, 8 ; r15= new_capacity - mov edx, ecx - call alloc_pages - mov rdx, rax - mov rcx, [buffer_headers] - mov r8, [buffer_headers_size] - shl r8, 4 ; size * 16 - call memcpy - mov [buffer_headers], rdx - mov dword [buffer_headers_capacity], r15d - jmp .read_chunk - - - - -;; Read the next token from the buffer -;; Returns: -;; rax: token type -;; rdx: pointer to token text