This commit is contained in:
janis 2025-10-17 01:11:51 +02:00
parent 7b43442ba8
commit 90061bf50a
Signed by: janis
SSH key fingerprint: SHA256:bB1qbbqmDXZNT0KKD5c2Dfjg53JGhj7B3CFcLIzSqq8
3 changed files with 263 additions and 311 deletions

View file

@ -1,7 +1,7 @@
# Makefile: Compile and link main.asm using nasm and mold, intermediate files in target/
TARGET_DIR := target
SRC := src/main.asm src/int_to_str.asm
SRC := src/main.asm src/lib.asm src/int_to_str.asm
OBJ := $(addprefix $(TARGET_DIR)/, $(notdir $(SRC:.asm=.o)))
BIN := $(TARGET_DIR)/main

195
lang/src/lib.asm Normal file
View file

@ -0,0 +1,195 @@
section .rdata
panic_msg db "panic occured!", 10
panic_msg_len equ $ - panic_msg
oom_msg db "panic: oom!", 10
oom_msg_len equ $ - oom_msg
file_error_msg db "Could not open file: "
file_error_msg_len equ $ - file_error_msg
error_msg db "Error: "
error_msg_len equ $ - error_msg
section .text
global oom
global panic
global strlen
global memcpy
global eprint_str
global exit
global error_to_str
global eprint_error
global alloc_pages
;; ==============================
;; Helper functions
;; ==============================
;; Abort the program with a default panic message
panic:
mov rcx, panic_msg
mov rdx, panic_msg_len
call eprint_str
; exit with error code 1
mov rax, 60 ; syscall: exit
mov rdi, 1 ; status: 1
syscall
;; Abort the program with a default panic message
oom:
mov rcx, oom_msg
mov rdx, oom_msg_len
call eprint_str
; exit with error code 1
mov rax, 60 ; syscall: exit
mov rdi, 1 ; status: 1
syscall
;; abort the program
;; rcx: status code
exit:
mov rax, 60 ; syscall: exit
mov rdi, rcx
syscall
;; Writes a string to stderr:
;; rcx: pointer to string
;; rdx: length of string
eprint_str:
mov rax, 1 ; syscall: write
mov rdi, 2 ; fd: stderr
mov rsi, rcx ; buf: str
syscall
ret
;; calculates length of null-terminated string
;; rcx: pointer to string
strlen:
xor rax, rax ; length counter
.strlen_loop:
cmp byte [rcx + rax], 0
je .strlen_done
inc rax
jmp .strlen_loop
.strlen_done:
ret
;; Copy bytes from one memory location to another
;; rcx: destination pointer
;; rdx: source pointer
;; r8: number of bytes to copy
memcpy:
xor r10, r10
.memcpy_loop_byte:
cmp r10, r8
jge .memcpy_done
mov al, [rdx + r10]
mov [rcx + r10], al
inc r10
jmp .memcpy_loop_byte
.memcpy_done:
ret
section .rdata
e_is_dir db "Is a directory", 10
e_is_dir_len equ $ - e_is_dir
e_io db "I/O error", 10
e_io_len equ $ - e_io
e_bad_fd db "Bad file descriptor", 10
e_bad_fd_len equ $ - e_bad_fd
e_unknown db "Unknown error", 10
e_unknown_len equ $ - e_unknown
section .text
;; Converts an error code to a str (pointer, length) pair
;; rcx: error code
;; Returns:
;; rax: pointer to string
;; rdx: length of string
error_to_str:
cmp rcx, -21
je .e_is_dir
cmp rcx, -5
je .e_io
cmp rcx, -9
je .e_bad_fd
; unknown error
lea rax, [e_unknown]
mov rdx, e_unknown_len
ret
.e_is_dir:
lea rax, [e_is_dir]
mov rdx, e_is_dir_len
ret
.e_io:
lea rax, [e_io]
mov rdx, e_io_len
ret
.e_bad_fd:
lea rax, [e_bad_fd]
mov rdx, e_bad_fd_len
ret
;; rcx: error code
eprint_error:
; prologue
push rsi
; get error string
call error_to_str
mov r12, rax ; r12 = pointer to error string
mov r13, rdx ; r13 = length of error string
mov rsi, r13
add rsi, error_msg_len
add rsi, 1
add rsi, 15
and rsi, -16 ; align up to 16
sub rsp, rsi ; allocate buffer
push rsi ; save allocation size
; copy error_msg
lea rcx, [rsp + 8]
mov rdx, error_msg
mov r8, error_msg_len
call memcpy
; copy error string
lea rcx, [rsp + 8 + error_msg_len]
mov rdx, r12
mov r8, r13
call memcpy
; trailing newline
lea rdx, [rsp + 8 + error_msg_len + r13]
mov byte [rdx], 10
; print error message
lea rcx, [rsp + 8]
mov rdx, error_msg_len
add rdx, r13
add rdx, 1 ; include newline
call eprint_str
pop rsi
add rsp, rsi ; dealloc
; epilogue
pop rsi
ret
;; Allocates n pages of memory
;; rcx: number of pages
;; Returns:
;; rax: pointer to allocated memory
alloc_pages:
mov rax, 9 ; syscall: mmap
xor rdi, rdi ; addr: NULL
mov rsi, rcx ; length: number of pages
shl rsi, 12 ; length in bytes (page size = 4096)
mov rdx, 3 ; prot: PROT_READ | PROT_WRITE
mov r10, 34 ; flags: MAP_PRIVATE | MAP_ANONYMOUS
mov r8, -1 ; fd: -1
xor r9, r9 ; offset: 0
syscall
cmp rax, -4095 ; check for error
jae .alloc_error
ret
.alloc_error:
mov rcx, rax ; error code
call eprint_error
call oom

View file

@ -1,18 +1,21 @@
;; Compile with:
;; nasm -f elf64 main.asm -o main.o
extern int_to_str
extern oom
extern panic
extern strlen
extern memcpy
extern eprint_str
extern exit
extern error_to_str
extern eprint_error
extern alloc_pages
section .data
hello_msg db "Hello, World!", 10
hello_msg_len equ $ - hello_msg
panic_msg db "panic occured!", 10
panic_msg_len equ $ - panic_msg
oom_msg db "panic: oom!", 10
oom_msg_len equ $ - oom_msg
file_error_msg db "Could not open file: "
file_error_msg_len equ $ - file_error_msg
error_msg db "Error: "
error_msg_len equ $ - error_msg
section .text
global _start
@ -28,8 +31,8 @@ _start:
; get filename from argv[1]
; argv is at rsp + 8
; check if argc > 1
mov rdx, hello_msg
mov rcx, hello_msg_len
mov rcx, hello_msg
mov rdx, hello_msg_len
call eprint_str
mov rax, [rsp] ; argc
cmp rax, 1
@ -40,11 +43,6 @@ _start:
; init tokeniser
mov rdx, rax ; rdx = pointer to filename
call tokeniser_init
; read until rax = 0
.read_loop:
call tokeniser_read_chunk
cmp rax, 0
jne .read_loop
call tokeniser_print
jmp .exit
@ -54,78 +52,6 @@ _start:
call exit
;; ==============================
;; Helper functions
;; ==============================
;; Abort the program with a default panic message
panic:
mov rdx, panic_msg
mov rcx, panic_msg_len
call eprint_str
; exit with error code 1
mov rax, 60 ; syscall: exit
mov rdi, 1 ; status: 1
syscall
;; Abort the program with a default panic message
oom:
mov rdx, oom_msg
mov rcx, oom_msg_len
call eprint_str
; exit with error code 1
mov rax, 60 ; syscall: exit
mov rdi, 1 ; status: 1
syscall
;; abort the program
;; rdx: status code
exit:
mov rax, 60 ; syscall: exit
mov rdi, rdx
syscall
;; Writes a string to stderr:
;; rdx: pointer to string
;; rcx: length of string
eprint_str:
mov rax, 1 ; syscall: write
mov rdi, 2 ; fd: stderr
mov rsi, rdx ; buf: str
mov rdx, rcx ; len: length
syscall
ret
;; calculates length of null-terminated string
;; rdx: pointer to string
strlen:
xor rax, rax ; length counter
.strlen_loop:
cmp byte [rdx + rax], 0
je .strlen_done
inc rax
jmp .strlen_loop
.strlen_done:
ret
;; Copy bytes from one memory location to another
;; rdx: destination pointer
;; rcx: source pointer
;; r8: number of bytes to copy
memcpy:
push r10
xor r10, r10
.memcpy_loop_byte:
cmp r10, r8
jge .memcpy_done
mov al, [rcx + r10]
mov [rdx + r10], al
inc r10
jmp .memcpy_loop_byte
.memcpy_done:
pop r10
ret
;; Opens file for reading:
;; rdx: pointer to filename (null-terminated)
fopen_read:
@ -139,8 +65,11 @@ fopen_read:
ret ;fd in rax
.file_error:
push rdi
mov rcx, rax
call eprint_error
mov rdx, rdi ; filename is in rdi
pop rdi
mov rcx, rdi ; filename is in rdi
call strlen ; get length of filename
mov r9, rax ; r9 = filename length
@ -153,14 +82,14 @@ fopen_read:
push rsi ; save allocation size
; copy file_error_msg
lea rdx, [rsp + 8]
mov rcx, file_error_msg
lea rcx, [rsp + 8]
mov rdx, file_error_msg
mov r8, file_error_msg_len
call memcpy
; copy filename
lea rdx, [rsp + 8 + file_error_msg_len]
mov rcx, rdi
lea rcx, [rsp + 8 + file_error_msg_len]
mov rdx, rdi
mov r8, r9
call memcpy
@ -169,143 +98,26 @@ fopen_read:
mov byte [rdx], 10
; print error message
lea rdx, [rsp + 8]
mov rcx, file_error_msg_len
add rcx, r9
add rcx, 1 ; include newline
lea rcx, [rsp + 8]
mov rdx, file_error_msg_len
add rdx, r9
add rdx, 1 ; include newline
call eprint_str
pop rsi
add rsp, rsi ; dealloc
call panic
section .rodata
e_is_dir db "Is a directory", 10
e_is_dir_len equ $ - e_is_dir
e_io db "I/O error", 10
e_io_len equ $ - e_io
e_bad_fd db "Bad file descriptor", 10
e_bad_fd_len equ $ - e_bad_fd
e_unknown db "Unknown error", 10
e_unknown_len equ $ - e_unknown
section .text
;; Converts an error code to a str (pointer, length) pair
;; rdx: error code
;; Returns:
;; rax: pointer to string
;; rdx: length of string
error_to_str:
cmp rdx, -21
je .e_is_dir
cmp rdx, -5
je .e_io
cmp rdx, -9
je .e_bad_fd
; unknown error
lea rax, [e_unknown]
mov rdx, e_unknown_len
ret
.e_is_dir:
lea rax, [e_is_dir]
mov rdx, e_is_dir_len
ret
.e_io:
lea rax, [e_io]
mov rdx, e_io_len
ret
.e_bad_fd:
lea rax, [e_bad_fd]
mov rdx, e_bad_fd_len
ret
;; rdx: error code
eprint_error:
; prologue
push r11
push r10
push rsi
; get error string
call error_to_str
mov r11, rax ; r11 = pointer to error string
mov r10, rdx ; r10 = length of error string
mov rsi, r10
add rsi, error_msg_len
add rsi, 1
add rsi, 15
and rsi, -16 ; align up to 16
sub rsp, rsi ; allocate buffer
push rsi ; save allocation size
; copy error_msg
lea rdx, [rsp + 8]
mov rcx, error_msg
mov r8, error_msg_len
call memcpy
; copy error string
lea rdx, [rsp + 8 + error_msg_len]
mov rcx, r11
mov r8, r10
call memcpy
; trailing newline
lea rdx, [rsp + 8 + error_msg_len + r10]
mov byte [rdx], 10
; print error message
lea rdx, [rsp + 8]
mov rcx, error_msg_len
add rcx, r10
add rcx, 1 ; include newline
call eprint_str
pop rsi
add rsp, rsi ; dealloc
; epilogue
pop rsi
pop r10
pop r11
ret
;; Allocates n pages of memory
;; rdx: number of pages
;; Returns:
;; rax: pointer to allocated memory
alloc_pages:
mov rax, 9 ; syscall: mmap
xor rdi, rdi ; addr: NULL
mov rsi, rdx ; length: number of pages
shl rsi, 12 ; length in bytes (page size = 4096)
mov rdx, 3 ; prot: PROT_READ | PROT_WRITE
mov r10, 34 ; flags: MAP_PRIVATE | MAP_ANONYMOUS
xor r8, r8 ; fd: -1
xor r9, r9 ; offset: 0
syscall
cmp rax, -4095 ; check for error
jae .alloc_error
ret
.alloc_error:
mov rdx, rax ; error code
call eprint_error
call oom
;; =============================
;; Linked list functions
;; ============================
;; =============================
;; Tokeniser functions
;; =============================
;; tokeniser state
section .data
; vec of buffer headers
buffer_headers dq 0
buffer_headers_size dd 0
buffer_headers_capacity dd 0
chunk_size equ 4096
current_buffer dd 0
current_offset dd 0
input_file dd 0
buffer dq 0
cursor dq 0
buffer_len dq 0
;; each buffer is chunk_size bytes large
;; buffer header structure:
@ -315,6 +127,8 @@ section .data
;; Tokens:
;; [let, if, else, fn, return, loop, break, continue, true, false, i32, u32, bool, =, +, -, *, /, %, ==, !=, <, <=, >, >=, &&, ||, !, (, ), {, }, [, ], ;, ',', ]
section .bss
statbuf: resb 144
section .text
;; Initialises the tokeniser
@ -324,108 +138,51 @@ tokeniser_init:
; this panics if the file doesn't exist
call fopen_read
mov [input_file], eax ; store file descriptor
mov dword [current_buffer], 0
mov dword [current_offset], 0
mov rdx, 1 ; allocate 1 page
mov dword [cursor], 0
mov dword [buffer_len], 0
; fstat
mov rax, 5 ; syscall: fstat
mov rdi, [input_file] ; fd
lea rsi, [statbuf] ; statbuf
syscall
cmp rax, 0
jl .report_error
; get file size from statbuf
mov r15, [statbuf + 48] ; st_size
; allocate buffer
mov rcx, r15
add rcx, 4095
shr rcx, 12 ; divide by 4096
call alloc_pages
mov [buffer_headers], rax ; store pointer to buffer headers
mov dword [buffer_headers_capacity], 4096 / 16 ; initial capacity for 4096 bytes
mov dword [buffer_headers_size], 0
mov [buffer], rax
mov [buffer_len], r15
; read initial chunk into file_buffer
call tokeniser_read_chunk
; read file into buffer
mov rax, 0 ; syscall: read
mov rdi, [input_file] ; fd
mov rsi, [buffer] ; buf
mov rdx, [buffer_len] ; count
syscall
cmp rax, 0
jl .report_error
ret
.report_error:
mov rcx, rax
call eprint_error
call panic
section .rodata
num_headers db "Number of buffer headers: "
num_headers_len equ $ - num_headers
section .bss
scratch_str: resb 1024
section .rdata
tokeniser_buffer db "Tokeniser buffer: ", 10
tokeniser_buffer_len equ $ - tokeniser_buffer
section .text
tokeniser_print:
mov r15, [buffer_headers_size]
mov rdx, num_headers
mov rcx, num_headers_len
mov rcx, tokeniser_buffer
mov rdx, tokeniser_buffer_len
call eprint_str
mov rcx, r15
mov rdx, scratch_str
call int_to_str
xor r14, r14 ; index
.print_loop:
cmp r14, r15
jge .print_done
mov rax, r14
shl rax, 4 ; rax = index * 16
lea rax, [buffer_headers + rax]
mov rdx, [rax] ; pointer to buffer
mov rcx, [rax + 8] ; size of buffer
mov rax, [cursor]
mov rcx, [buffer + rax]
mov rdx, [buffer_len]
call eprint_str
inc r14
jmp .print_loop
.print_done:
ret
;; Reads a chunk (4096 bytes) from the file into file_buffer
tokeniser_read_chunk:
; allocate new buffer
mov ecx, [buffer_headers_size]
mov eax, [buffer_headers_capacity]
cmp eax, ecx
jl .alloc_more_headers
.read_chunk:
mov rdx, 1 ; allocate 1 page
call alloc_pages
mov r14, rax ; r14 = pointer to new buffer
lea r15, [buffer_headers_size]
shl r15, 4 ; r15 = size * 16
lea r15, [buffer_headers + r15]
mov [r15], rax ; store pointer to new buffer
mov dword [r15 + 8], 0 ; size = 0 for now
mov rax, 0 ; syscall: read
mov rdi, [input_file] ; fd
mov rsi, r14 ; buffer
mov rdx, 4096 ; size
syscall
; check error
cmp rax, 0
jl .read_error
; store size of buffer
mov [r15 + 8], eax
ret
.read_error:
mov rdx, rax
call eprint_error
mov rax, 0
ret
.alloc_more_headers:
shl ecx, 1 ; double capacity
; capacity in items, not bytes, 256 items per page
; calculate capacity in pages:
shr ecx, 8 ; ecx = capacity / 256
mov edx, 1
cmp ecx, edx
cmovl ecx, edx
xor rdx, rdx
mov r15, rcx
shl r15, 8 ; r15= new_capacity
mov edx, ecx
call alloc_pages
mov rdx, rax
mov rcx, [buffer_headers]
mov r8, [buffer_headers_size]
shl r8, 4 ; size * 16
call memcpy
mov [buffer_headers], rdx
mov dword [buffer_headers_capacity], r15d
jmp .read_chunk
;; Read the next token from the buffer
;; Returns:
;; rax: token type
;; rdx: pointer to token text