overcomplicating things
This commit is contained in:
parent
703a8ba968
commit
7b43442ba8
|
|
@ -1,8 +1,8 @@
|
|||
# Makefile: Compile and link main.asm using nasm and mold, intermediate files in target/
|
||||
|
||||
TARGET_DIR := target
|
||||
SRC := src/main.asm
|
||||
OBJ := $(TARGET_DIR)/main.o
|
||||
SRC := src/main.asm src/int_to_str.asm
|
||||
OBJ := $(addprefix $(TARGET_DIR)/, $(notdir $(SRC:.asm=.o)))
|
||||
BIN := $(TARGET_DIR)/main
|
||||
|
||||
.PHONY: all clean
|
||||
|
|
@ -12,8 +12,8 @@ all: $(BIN)
|
|||
$(TARGET_DIR):
|
||||
mkdir -p $(TARGET_DIR)
|
||||
|
||||
$(OBJ): $(SRC) | $(TARGET_DIR)
|
||||
nasm -f elf64 -g $(SRC) -o $(OBJ)
|
||||
$(TARGET_DIR)/%.o: src/%.asm | $(TARGET_DIR)
|
||||
nasm -f elf64 -g $< -o $@
|
||||
|
||||
$(BIN): $(OBJ)
|
||||
mold -run ld -o $(BIN) $(OBJ)
|
||||
|
|
|
|||
70
lang/src/int_to_str.asm
Normal file
70
lang/src/int_to_str.asm
Normal file
|
|
@ -0,0 +1,70 @@
|
|||
section .text
|
||||
global int_to_str
|
||||
|
||||
;; Converts integer in rcx to string at rdx
|
||||
;; rcx: input integer
|
||||
;; rdx: pointer to output buffer (at least 21 bytes)
|
||||
int_to_str:
|
||||
mov rbx, rdx ; rbx = buffer pointer
|
||||
mov r8, rbx ; r8 = start of buffer
|
||||
|
||||
; Check sign
|
||||
mov rdx, rcx ; copy value
|
||||
sar rdx, 63 ; rdx = 0 if positive, -1 if negative
|
||||
cmp rdx, 0
|
||||
jne .negative
|
||||
|
||||
.positive:
|
||||
mov rsi, rcx
|
||||
jmp .convert
|
||||
|
||||
.negative:
|
||||
mov byte [rbx], '-' ; write minus sign
|
||||
inc rbx
|
||||
neg rcx
|
||||
mov rsi, rcx
|
||||
|
||||
.convert:
|
||||
; Count digits
|
||||
mov rax, rsi
|
||||
mov r9, rbx
|
||||
mov r10, 0 ; digit count
|
||||
mov r11, 10
|
||||
|
||||
test rax, rax
|
||||
jnz .digits_loop
|
||||
mov byte [rbx], '0'
|
||||
inc rbx
|
||||
mov r10, 1
|
||||
jmp .done_digits
|
||||
|
||||
.digits_loop:
|
||||
mov rdx, 0
|
||||
div r11 ; rax = rax / 10, rdx = rax % 10
|
||||
add rdx, '0' ; convert digit to ASCII
|
||||
mov byte [rbx + r10], dl
|
||||
inc r10
|
||||
test rax, rax
|
||||
jnz .digits_loop
|
||||
|
||||
.done_digits:
|
||||
; Digits are in reverse order in [rbx..rbx+r10)
|
||||
; Reverse them
|
||||
mov rsi, 0
|
||||
mov rdi, rbx
|
||||
mov rdx, r10
|
||||
dec rdx ; last digit index
|
||||
.reverse_loop:
|
||||
cmp rsi, rdx
|
||||
jae .done_reverse
|
||||
mov al, [rdi + rsi]
|
||||
mov bl, [rdi + rdx]
|
||||
mov [rdi + rsi], bl
|
||||
mov [rdi + rdx], al
|
||||
inc rsi
|
||||
dec rdx
|
||||
jmp .reverse_loop
|
||||
.done_reverse:
|
||||
add rbx, r10 ; move pointer past digits
|
||||
mov byte [rbx], 0 ; null-terminate
|
||||
ret
|
||||
|
|
@ -1,20 +1,28 @@
|
|||
;; Compile with:
|
||||
;; nasm -f elf64 main.asm -o main.o
|
||||
extern int_to_str
|
||||
|
||||
section .data
|
||||
hello_msg db "Hello, World!", 10
|
||||
hello_msg_len equ $ - hello_msg
|
||||
panic_msg db "panic occured!", 10
|
||||
panic_msg_len equ $ - panic_msg
|
||||
oom_msg db "panic: oom!", 10
|
||||
oom_msg_len equ $ - oom_msg
|
||||
file_error_msg db "Could not open file: "
|
||||
file_error_msg_len equ $ - file_error_msg
|
||||
error_msg db "Error: "
|
||||
error_msg_len equ $ - error_msg
|
||||
buffer_size equ 1024
|
||||
buffer times buffer_size db 0
|
||||
|
||||
section .text
|
||||
global _start
|
||||
test_fn:
|
||||
push r11
|
||||
push r10
|
||||
pop r10
|
||||
pop r11
|
||||
ret
|
||||
|
||||
|
||||
_start:
|
||||
; get filename from argv[1]
|
||||
|
|
@ -29,28 +37,16 @@ _start:
|
|||
; get argv[1]
|
||||
mov rax, [rsp + 16] ; argv[1]
|
||||
|
||||
; open file for reading
|
||||
mov rdx, rax ; filename pointer
|
||||
call fopen_read
|
||||
|
||||
; init tokeniser
|
||||
mov rdx, rax ; rdx = pointer to filename
|
||||
call tokeniser_init
|
||||
; read until rax = 0
|
||||
.read_loop:
|
||||
mov r9, rax ; file descriptor
|
||||
mov rax, 0 ; syscall: read
|
||||
mov rdi, r9 ; fd
|
||||
lea rsi, [buffer] ; buffer
|
||||
mov rdx, buffer_size ; size
|
||||
syscall
|
||||
cmp rax, 0 ; check for EOF
|
||||
jle .exit ; if rax <= 0, exit loop
|
||||
mov rcx, rax ; number of bytes read
|
||||
|
||||
; write to stdout for now
|
||||
mov rax, 1 ; syscall: write
|
||||
mov rdi, 1 ; fd: stdout
|
||||
lea rsi, [buffer] ; buffer
|
||||
mov rdx, rcx ; len: bytes read
|
||||
syscall
|
||||
jmp .read_loop
|
||||
call tokeniser_read_chunk
|
||||
cmp rax, 0
|
||||
jne .read_loop
|
||||
call tokeniser_print
|
||||
jmp .exit
|
||||
|
||||
.no_filename:
|
||||
call panic
|
||||
|
|
@ -72,6 +68,16 @@ panic:
|
|||
mov rdi, 1 ; status: 1
|
||||
syscall
|
||||
|
||||
;; Abort the program with a default panic message
|
||||
oom:
|
||||
mov rdx, oom_msg
|
||||
mov rcx, oom_msg_len
|
||||
call eprint_str
|
||||
; exit with error code 1
|
||||
mov rax, 60 ; syscall: exit
|
||||
mov rdi, 1 ; status: 1
|
||||
syscall
|
||||
|
||||
;; abort the program
|
||||
;; rdx: status code
|
||||
exit:
|
||||
|
|
@ -107,6 +113,7 @@ strlen:
|
|||
;; rcx: source pointer
|
||||
;; r8: number of bytes to copy
|
||||
memcpy:
|
||||
push r10
|
||||
xor r10, r10
|
||||
.memcpy_loop_byte:
|
||||
cmp r10, r8
|
||||
|
|
@ -116,6 +123,7 @@ memcpy:
|
|||
inc r10
|
||||
jmp .memcpy_loop_byte
|
||||
.memcpy_done:
|
||||
pop r10
|
||||
ret
|
||||
|
||||
;; Opens file for reading:
|
||||
|
|
@ -131,6 +139,7 @@ fopen_read:
|
|||
ret ;fd in rax
|
||||
|
||||
.file_error:
|
||||
call eprint_error
|
||||
mov rdx, rdi ; filename is in rdi
|
||||
call strlen ; get length of filename
|
||||
mov r9, rax ; r9 = filename length
|
||||
|
|
@ -169,3 +178,254 @@ fopen_read:
|
|||
pop rsi
|
||||
add rsp, rsi ; dealloc
|
||||
call panic
|
||||
|
||||
section .rodata
|
||||
e_is_dir db "Is a directory", 10
|
||||
e_is_dir_len equ $ - e_is_dir
|
||||
e_io db "I/O error", 10
|
||||
e_io_len equ $ - e_io
|
||||
e_bad_fd db "Bad file descriptor", 10
|
||||
e_bad_fd_len equ $ - e_bad_fd
|
||||
e_unknown db "Unknown error", 10
|
||||
e_unknown_len equ $ - e_unknown
|
||||
|
||||
section .text
|
||||
;; Converts an error code to a str (pointer, length) pair
|
||||
;; rdx: error code
|
||||
;; Returns:
|
||||
;; rax: pointer to string
|
||||
;; rdx: length of string
|
||||
error_to_str:
|
||||
cmp rdx, -21
|
||||
je .e_is_dir
|
||||
cmp rdx, -5
|
||||
je .e_io
|
||||
cmp rdx, -9
|
||||
je .e_bad_fd
|
||||
|
||||
; unknown error
|
||||
lea rax, [e_unknown]
|
||||
mov rdx, e_unknown_len
|
||||
ret
|
||||
.e_is_dir:
|
||||
lea rax, [e_is_dir]
|
||||
mov rdx, e_is_dir_len
|
||||
ret
|
||||
.e_io:
|
||||
lea rax, [e_io]
|
||||
mov rdx, e_io_len
|
||||
ret
|
||||
.e_bad_fd:
|
||||
lea rax, [e_bad_fd]
|
||||
mov rdx, e_bad_fd_len
|
||||
ret
|
||||
|
||||
;; rdx: error code
|
||||
eprint_error:
|
||||
; prologue
|
||||
push r11
|
||||
push r10
|
||||
push rsi
|
||||
|
||||
; get error string
|
||||
call error_to_str
|
||||
mov r11, rax ; r11 = pointer to error string
|
||||
mov r10, rdx ; r10 = length of error string
|
||||
mov rsi, r10
|
||||
add rsi, error_msg_len
|
||||
add rsi, 1
|
||||
add rsi, 15
|
||||
and rsi, -16 ; align up to 16
|
||||
sub rsp, rsi ; allocate buffer
|
||||
push rsi ; save allocation size
|
||||
; copy error_msg
|
||||
lea rdx, [rsp + 8]
|
||||
mov rcx, error_msg
|
||||
mov r8, error_msg_len
|
||||
call memcpy
|
||||
; copy error string
|
||||
lea rdx, [rsp + 8 + error_msg_len]
|
||||
mov rcx, r11
|
||||
mov r8, r10
|
||||
call memcpy
|
||||
; trailing newline
|
||||
lea rdx, [rsp + 8 + error_msg_len + r10]
|
||||
mov byte [rdx], 10
|
||||
; print error message
|
||||
lea rdx, [rsp + 8]
|
||||
mov rcx, error_msg_len
|
||||
add rcx, r10
|
||||
add rcx, 1 ; include newline
|
||||
call eprint_str
|
||||
pop rsi
|
||||
add rsp, rsi ; dealloc
|
||||
|
||||
; epilogue
|
||||
pop rsi
|
||||
pop r10
|
||||
pop r11
|
||||
ret
|
||||
|
||||
;; Allocates n pages of memory
|
||||
;; rdx: number of pages
|
||||
;; Returns:
|
||||
;; rax: pointer to allocated memory
|
||||
alloc_pages:
|
||||
mov rax, 9 ; syscall: mmap
|
||||
xor rdi, rdi ; addr: NULL
|
||||
mov rsi, rdx ; length: number of pages
|
||||
shl rsi, 12 ; length in bytes (page size = 4096)
|
||||
mov rdx, 3 ; prot: PROT_READ | PROT_WRITE
|
||||
mov r10, 34 ; flags: MAP_PRIVATE | MAP_ANONYMOUS
|
||||
xor r8, r8 ; fd: -1
|
||||
xor r9, r9 ; offset: 0
|
||||
syscall
|
||||
cmp rax, -4095 ; check for error
|
||||
jae .alloc_error
|
||||
ret
|
||||
.alloc_error:
|
||||
mov rdx, rax ; error code
|
||||
call eprint_error
|
||||
call oom
|
||||
|
||||
;; =============================
|
||||
;; Linked list functions
|
||||
;; ============================
|
||||
|
||||
;; =============================
|
||||
;; Tokeniser functions
|
||||
;; =============================
|
||||
|
||||
;; tokeniser state
|
||||
section .data
|
||||
; vec of buffer headers
|
||||
buffer_headers dq 0
|
||||
buffer_headers_size dd 0
|
||||
buffer_headers_capacity dd 0
|
||||
chunk_size equ 4096
|
||||
current_buffer dd 0
|
||||
current_offset dd 0
|
||||
input_file dd 0
|
||||
|
||||
;; each buffer is chunk_size bytes large
|
||||
;; buffer header structure:
|
||||
;; +0 (8 bytes): pointer buffer
|
||||
;; +8 (8 bytes): size of buffer
|
||||
|
||||
;; Tokens:
|
||||
;; [let, if, else, fn, return, loop, break, continue, true, false, i32, u32, bool, =, +, -, *, /, %, ==, !=, <, <=, >, >=, &&, ||, !, (, ), {, }, [, ], ;, ',', ]
|
||||
|
||||
|
||||
section .text
|
||||
;; Initialises the tokeniser
|
||||
;; rdx: pointer to filename (null-terminated)
|
||||
tokeniser_init:
|
||||
; open file for reading
|
||||
; this panics if the file doesn't exist
|
||||
call fopen_read
|
||||
mov [input_file], eax ; store file descriptor
|
||||
mov dword [current_buffer], 0
|
||||
mov dword [current_offset], 0
|
||||
mov rdx, 1 ; allocate 1 page
|
||||
call alloc_pages
|
||||
mov [buffer_headers], rax ; store pointer to buffer headers
|
||||
mov dword [buffer_headers_capacity], 4096 / 16 ; initial capacity for 4096 bytes
|
||||
mov dword [buffer_headers_size], 0
|
||||
|
||||
; read initial chunk into file_buffer
|
||||
call tokeniser_read_chunk
|
||||
ret
|
||||
|
||||
section .rodata
|
||||
num_headers db "Number of buffer headers: "
|
||||
num_headers_len equ $ - num_headers
|
||||
section .bss
|
||||
scratch_str: resb 1024
|
||||
|
||||
section .text
|
||||
tokeniser_print:
|
||||
mov r15, [buffer_headers_size]
|
||||
mov rdx, num_headers
|
||||
mov rcx, num_headers_len
|
||||
call eprint_str
|
||||
mov rcx, r15
|
||||
mov rdx, scratch_str
|
||||
call int_to_str
|
||||
xor r14, r14 ; index
|
||||
.print_loop:
|
||||
cmp r14, r15
|
||||
jge .print_done
|
||||
mov rax, r14
|
||||
shl rax, 4 ; rax = index * 16
|
||||
lea rax, [buffer_headers + rax]
|
||||
mov rdx, [rax] ; pointer to buffer
|
||||
mov rcx, [rax + 8] ; size of buffer
|
||||
call eprint_str
|
||||
inc r14
|
||||
jmp .print_loop
|
||||
.print_done:
|
||||
ret
|
||||
|
||||
;; Reads a chunk (4096 bytes) from the file into file_buffer
|
||||
tokeniser_read_chunk:
|
||||
; allocate new buffer
|
||||
mov ecx, [buffer_headers_size]
|
||||
mov eax, [buffer_headers_capacity]
|
||||
cmp eax, ecx
|
||||
jl .alloc_more_headers
|
||||
.read_chunk:
|
||||
mov rdx, 1 ; allocate 1 page
|
||||
call alloc_pages
|
||||
mov r14, rax ; r14 = pointer to new buffer
|
||||
lea r15, [buffer_headers_size]
|
||||
shl r15, 4 ; r15 = size * 16
|
||||
lea r15, [buffer_headers + r15]
|
||||
mov [r15], rax ; store pointer to new buffer
|
||||
mov dword [r15 + 8], 0 ; size = 0 for now
|
||||
|
||||
mov rax, 0 ; syscall: read
|
||||
mov rdi, [input_file] ; fd
|
||||
mov rsi, r14 ; buffer
|
||||
mov rdx, 4096 ; size
|
||||
syscall
|
||||
; check error
|
||||
cmp rax, 0
|
||||
jl .read_error
|
||||
; store size of buffer
|
||||
mov [r15 + 8], eax
|
||||
ret
|
||||
.read_error:
|
||||
mov rdx, rax
|
||||
call eprint_error
|
||||
mov rax, 0
|
||||
ret
|
||||
.alloc_more_headers:
|
||||
shl ecx, 1 ; double capacity
|
||||
; capacity in items, not bytes, 256 items per page
|
||||
; calculate capacity in pages:
|
||||
shr ecx, 8 ; ecx = capacity / 256
|
||||
mov edx, 1
|
||||
cmp ecx, edx
|
||||
cmovl ecx, edx
|
||||
|
||||
xor rdx, rdx
|
||||
mov r15, rcx
|
||||
shl r15, 8 ; r15= new_capacity
|
||||
mov edx, ecx
|
||||
call alloc_pages
|
||||
mov rdx, rax
|
||||
mov rcx, [buffer_headers]
|
||||
mov r8, [buffer_headers_size]
|
||||
shl r8, 4 ; size * 16
|
||||
call memcpy
|
||||
mov [buffer_headers], rdx
|
||||
mov dword [buffer_headers_capacity], r15d
|
||||
jmp .read_chunk
|
||||
|
||||
|
||||
|
||||
|
||||
;; Read the next token from the buffer
|
||||
;; Returns:
|
||||
;; rax: token type
|
||||
;; rdx: pointer to token text
|
||||
|
|
|
|||
Loading…
Reference in a new issue