from-scratch/lang/src/lib.asm
2025-10-28 12:44:08 +01:00

410 lines
7.8 KiB
NASM

default rel
section .rdata
panic_msg db "panic occured!", 10
panic_msg_len equ $ - panic_msg
oom_msg db "oom!", 10
oom_msg_len equ $ - oom_msg
error_msg db "Error: "
error_msg_len equ $ - error_msg
section .text
global oom
global strlen
global strcmp
global strchr
global streq
global memcpy
global memswap
global eprint_str
global exit
global error_to_str
global eprint_error
global alloc_pages
global allocate
global is_alpha
global is_numeric
global is_whitespace
global is_id_continue
global is_id_start
extern panic
;; ==============================
;; Helper functions
;; ==============================
;; Abort the program with a default panic message
oom:
lea rdi, [rel oom_msg]
mov rsi, oom_msg_len
call eprint_str
; exit with error code 1
mov rax, 60 ; syscall: exit
mov rdi, 1 ; status: 1
syscall
;; abort the program
;; rdi: status code
exit:
mov rax, 60 ; syscall: exit
syscall
;; Writes a string to stderr:
;; rdi: pointer to string
;; rsi: length of string
eprint_str:
mov rax, 1 ; syscall: write
mov rdx, rsi ; len: length
mov rsi, rdi ; buf: str
mov rdi, 2 ; fd: stderr
syscall
ret
;; calculates length of null-terminated string
;; rdi: pointer to string
strlen:
xor rax, rax ; length counter
.strlen_loop:
cmp byte [rdi + rax], 0
je .strlen_done
inc rax
jmp .strlen_loop
.strlen_done:
ret
;; Checks two byte slices for equality
;; rdi: pointer to first slice
;; rsi: length of first slice
;; rdx: pointer to second slice
;; rcx: length of second slice
;; returns: 1 if equal, 0 if not equal
;; fn streq(a: &[u8], b: &[u8]) -> bool
streq:
; if a.len() == b.len() {
cmp rsi, rcx
jne .false
; for i in 0..a.len() {
xor r8, r8
.loop:
cmp r8, rsi
jge .true
; if a[i] != b[i] {
mov al, [rdi + r8]
mov cl, [rdx + r8]
cmp al, cl
; return false;
jne .false
; }
inc r8
jmp .loop
; }
; return true;
.true:
mov rax, 1
ret
; } else {
; return false;
.false:
xor rax, rax
ret
; }
;; Compares two byte slices
;; rdi: pointer to first slice
;; rsi: length of first slice
;; rdx: pointer to second slice
;; rcx: length of second slice
;; returns: -1, 0, or 1 in rax
;; fn strcmp(a: &[u8], b: &[u8]) -> Ordering
strcmp:
; let min_len = min(a.len(), b.len());
mov rax, rsi
cmp rsi, rcx
cmovg rax, rcx
; for i in 0..min_len {
xor r8, r8
.loop:
cmp r8, rax
jge .length_check
mov r9b, [rdi + r8]
mov r10b, [rdx + r8]
; if a[i] < b[i] {
cmp r9b, r10b
; return Ordering::Less;
jb .less
; } else if a[i] > b[i] {
; return Ordering::Greater;
ja .greater
; }
inc r8
jmp .loop
; }
; if a.len() < b.len() {
.length_check:
cmp rsi, rcx
; return Ordering::Less;
jb .less
; } else if a.len() > b.len() {
; return Ordering::Greater;
ja .greater
; } else {
; return Ordering::Equal;
xor rax, rax
ret
; }
.less:
mov rax, -1
ret
.greater:
mov rax, 1
ret
;; Copy bytes from one memory location to another
;; rdi: destination pointer
;; rsi: source pointer
;; rdx: number of bytes to copy
;; fn memcpy(dest: *mut u8, src: *const u8, n: usize)
memcpy:
; for i in 0..n {
xor r8, r8
.loop:
cmp r8, rdx
jge .done
; dest[i] = src[i];
mov al, [rsi + r8]
mov [rdi + r8], al
inc r8
jmp .loop
; }
.done:
ret
;; Swap bytes between two memory locations
;; rdi: pointer to first memory location
;; rsi: pointer to second memory location
;; rdx: number of bytes to swap
;; fn memswap(a: *mut u8, b: *mut u8, n: usize)
memswap:
; for i in 0..n {
xor r8, r8
.loop:
cmp r8, rdx
jge .done
; let temp = a[i];
mov al, [rdi + r8]
; a[i] = b[i];
mov cl, [rsi + r8]
mov [rdi + r8], cl
; b[i] = temp;
mov [rsi + r8], al
inc r8
jmp .loop
; }
.done:
ret
;; Returns a pointer to the first occurrence of c in s, or null if not found
;; rdi: pointer to byte slice
;; rsi: length of byte slice
;; dl: byte to find
;; fn strchr(s: &[u8], c: u8) -> &[u8]
strchr:
; if s.len() == 0 {
test rsi, rsi
; return &[];
je .null
; } else {
xor r8, r8
; for i in 0..s.len() {
.loop:
cmp r8, rsi
jge .null
; if s[i] == c {
mov al, [rdi + r8]
cmp al, dl
; return &s[i..];
je .found
; }
inc r8
; }
jmp .loop
; }
.null:
xor rax, rax
xor rdx, rdx
ret
.found:
add rdi, r8
mov rax, rdi
mov rdx, rsi
sub rdx, r8
ret
section .rdata
e_is_dir db "Is a directory", 10
e_is_dir_len equ $ - e_is_dir
e_io db "I/O error", 10
e_io_len equ $ - e_io
e_bad_fd db "Bad file descriptor", 10
e_bad_fd_len equ $ - e_bad_fd
e_unknown db "Unknown error", 10
e_unknown_len equ $ - e_unknown
section .text
;; Converts an error code to a str (pointer, length) pair
;; rdi: error code
;; Returns:
;; rax: pointer to string
;; rdx: length of string
error_to_str:
cmp rdi, -21
je .e_is_dir
cmp rdi, -5
je .e_io
cmp rdi, -9
je .e_bad_fd
; unknown error
lea rax, [e_unknown]
mov rdx, e_unknown_len
ret
.e_is_dir:
lea rax, [e_is_dir]
mov rdx, e_is_dir_len
ret
.e_io:
lea rax, [e_io]
mov rdx, e_io_len
ret
.e_bad_fd:
lea rax, [e_bad_fd]
mov rdx, e_bad_fd_len
ret
;; rdi: error code
;; fn eprint_error(err_code: isize)
eprint_error:
; let err_code = err_code;
push rdi
; eprint_str(ERROR_STR, ERROR_STR.len());
lea rdi, [rel error_msg]
mov rsi, error_msg_len
call eprint_str
; let (err, len) = error_to_str(err_code);
pop rdi
call error_to_str
; eprint_str(err, len);
mov rdi, rax
mov rsi, rdx
call eprint_str
ret
;; rdi: number of bytes to allocate
;; fn allocate(bytes: usize) -> *mut u8
allocate:
mov rax, 9 ; syscall: mmap
mov rsi, rdi ; length: number of bytes
xor rdi, rdi ; addr: NULL
mov rdx, 3 ; prot: PROT_READ | PROT_WRITE
mov r10, 34 ; flags: MAP_PRIVATE | MAP_ANONYMOUS
mov r8, -1 ; fd: -1
xor r9, r9 ; offset: 0
syscall
cmp rax, -1 ; check for error
jae .alloc_error
ret
.alloc_error:
mov rdi, rax
call eprint_error
call oom
;; Returns 1 if cl is an ASCII alphabetic character, 0 otherwise
;; rdi: byte to check
;; fn is_alpha(c: u8) -> bool
is_alpha:
; if ('A' <= c
cmp dil, 'A'
jb .false
; && c <= 'Z')
cmp dil, 'Z'
jbe .true
; || ('a' <= c
cmp dil, 'a'
jb .false
; && c <= 'z') {
cmp dil, 'z'
jbe .true
; return true;
.true:
mov rax, 1
ret
; } else {
; return false;
.false:
xor rax, rax
ret
;; check if dil is numeric (decimal)
is_numeric:
cmp dil, '0'
jb .not_numeric
cmp dil, '9'
jbe .is_numeric_ret
.is_numeric_ret:
mov rax, 1
ret
.not_numeric:
xor rax, rax
ret
;; dil: byte to check
is_id_continue:
call is_alpha
test rax, rax
jne .is_id_continue_ret
call is_numeric
test rax, rax
jne .is_id_continue_ret
cmp cl, '_'
je .is_id_continue_ret
xor rax, rax
ret
.is_id_continue_ret:
mov rax, 1
ret
;; dil: byte to check
is_id_start:
call is_alpha
test rax, rax
jne .is_ret
cmp cl, '_'
je .is_ret
xor rax, rax
ret
.is_ret:
mov rax, 1
ret
;; dil: byte to check
is_whitespace:
cmp dil, 32 ; space
je .is_ws
cmp dil, 9 ; tab
je .is_ws
cmp dil, 10 ; newline
je .is_ws
cmp dil, 13 ; carriage return
je .is_ws
xor rax, rax
ret
.is_ws:
mov rax, 1
ret