Compare commits

...

10 commits

Author SHA1 Message Date
janis 81417de6ca
asdf 2025-10-18 10:59:30 +02:00
janis 8a0c822533
can tokenise stuff 2025-10-17 18:22:27 +02:00
janis 90061bf50a
stuff 2025-10-17 01:11:51 +02:00
janis 7b43442ba8
overcomplicating things 2025-10-16 23:50:30 +02:00
janis 703a8ba968
read-print file 2025-10-16 21:33:58 +02:00
janis 7b413d540c
rename die to panic, add exit function 2025-10-16 19:16:12 +02:00
janis 7bc428caf3
exit() instead of ret in main 2025-10-16 19:14:28 +02:00
janis 3d4cfc8e2b
trying not to segment 2025-10-16 19:11:48 +02:00
janis 6214d15888
initial asm file 2025-10-16 18:11:05 +02:00
janis 149ee03810
flake lock / direnv 2025-10-16 17:12:42 +02:00
8 changed files with 979 additions and 0 deletions

2
.gitignore vendored Normal file
View file

@ -0,0 +1,2 @@
/.direnv/
lang/target

96
flake.lock Normal file
View file

@ -0,0 +1,96 @@
{
"nodes": {
"flake-utils": {
"inputs": {
"systems": "systems"
},
"locked": {
"lastModified": 1731533236,
"narHash": "sha256-l0KFg5HjrsfsO/JpG+r7fRrqm12kzFHyUHqHCVpMMbI=",
"owner": "numtide",
"repo": "flake-utils",
"rev": "11707dc2f618dd54ca8739b309ec4fc024de578b",
"type": "github"
},
"original": {
"owner": "numtide",
"repo": "flake-utils",
"type": "github"
}
},
"nixpkgs": {
"locked": {
"lastModified": 1760524057,
"narHash": "sha256-EVAqOteLBFmd7pKkb0+FIUyzTF61VKi7YmvP1tw4nEw=",
"owner": "NixOS",
"repo": "nixpkgs",
"rev": "544961dfcce86422ba200ed9a0b00dd4b1486ec5",
"type": "github"
},
"original": {
"owner": "NixOS",
"ref": "nixos-unstable",
"repo": "nixpkgs",
"type": "github"
}
},
"nixpkgs_2": {
"locked": {
"lastModified": 1744536153,
"narHash": "sha256-awS2zRgF4uTwrOKwwiJcByDzDOdo3Q1rPZbiHQg/N38=",
"owner": "NixOS",
"repo": "nixpkgs",
"rev": "18dd725c29603f582cf1900e0d25f9f1063dbf11",
"type": "github"
},
"original": {
"owner": "NixOS",
"ref": "nixpkgs-unstable",
"repo": "nixpkgs",
"type": "github"
}
},
"root": {
"inputs": {
"flake-utils": "flake-utils",
"nixpkgs": "nixpkgs",
"rust-overlays": "rust-overlays"
}
},
"rust-overlays": {
"inputs": {
"nixpkgs": "nixpkgs_2"
},
"locked": {
"lastModified": 1760582142,
"narHash": "sha256-RSLRjAoS75szOc9fFzRi9/jzPbYsiqPISSLZTloaKtM=",
"owner": "oxalica",
"repo": "rust-overlay",
"rev": "9ea094253b9389ba7dd4f18637f66b5824276d1d",
"type": "github"
},
"original": {
"owner": "oxalica",
"repo": "rust-overlay",
"type": "github"
}
},
"systems": {
"locked": {
"lastModified": 1681028828,
"narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=",
"owner": "nix-systems",
"repo": "default",
"rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e",
"type": "github"
},
"original": {
"owner": "nix-systems",
"repo": "default",
"type": "github"
}
}
},
"root": "root",
"version": 7
}

View file

@ -23,8 +23,15 @@
devShells.default = pkgs.mkShell { devShells.default = pkgs.mkShell {
buildInputs = [ buildInputs = [
pkg-config pkg-config
man-pages
mold mold
clang clang
clang-tools
just
just-formatter
just-lsp
gdb
gdbgui
nasm nasm
nasmfmt nasmfmt
git git

25
lang/Makefile Normal file
View file

@ -0,0 +1,25 @@
# Makefile: Compile and link main.asm using nasm and mold, intermediate files in target/
TARGET_DIR := target
SRC := src/main.asm src/lib.asm src/int_to_str.asm
OBJ := $(addprefix $(TARGET_DIR)/, $(notdir $(SRC:.asm=.o)))
BIN := $(TARGET_DIR)/main
.PHONY: all clean
all: $(BIN)
$(TARGET_DIR):
mkdir -p $(TARGET_DIR)
$(TARGET_DIR)/%.o: src/%.asm | $(TARGET_DIR)
nasm -f elf64 -g $< -o $@
$(BIN): $(OBJ)
mold -run ld -o $(BIN) $(OBJ)
run: $(BIN)
$(BIN)
clean:
rm -rf $(TARGET_DIR)

70
lang/src/int_to_str.asm Normal file
View file

@ -0,0 +1,70 @@
section .text
global int_to_str
;; Converts integer in rcx to string at rdx
;; rcx: input integer
;; rdx: pointer to output buffer (at least 21 bytes)
int_to_str:
mov rbx, rdx ; rbx = buffer pointer
mov r8, rbx ; r8 = start of buffer
; Check sign
mov rdx, rcx ; copy value
sar rdx, 63 ; rdx = 0 if positive, -1 if negative
cmp rdx, 0
jne .negative
.positive:
mov rsi, rcx
jmp .convert
.negative:
mov byte [rbx], '-' ; write minus sign
inc rbx
neg rcx
mov rsi, rcx
.convert:
; Count digits
mov rax, rsi
mov r9, rbx
mov r10, 0 ; digit count
mov r11, 10
test rax, rax
jnz .digits_loop
mov byte [rbx], '0'
inc rbx
mov r10, 1
jmp .done_digits
.digits_loop:
mov rdx, 0
div r11 ; rax = rax / 10, rdx = rax % 10
add rdx, '0' ; convert digit to ASCII
mov byte [rbx + r10], dl
inc r10
test rax, rax
jnz .digits_loop
.done_digits:
; Digits are in reverse order in [rbx..rbx+r10)
; Reverse them
mov rsi, 0
mov rdi, rbx
mov rdx, r10
dec rdx ; last digit index
.reverse_loop:
cmp rsi, rdx
jae .done_reverse
mov al, [rdi + rsi]
mov bl, [rdi + rdx]
mov [rdi + rsi], bl
mov [rdi + rdx], al
inc rsi
dec rdx
jmp .reverse_loop
.done_reverse:
add rbx, r10 ; move pointer past digits
mov byte [rbx], 0 ; null-terminate
ret

351
lang/src/lib.asm Normal file
View file

@ -0,0 +1,351 @@
section .rdata
panic_msg db "panic occured!", 10
panic_msg_len equ $ - panic_msg
oom_msg db "panic: oom!", 10
oom_msg_len equ $ - oom_msg
file_error_msg db "Could not open file: "
file_error_msg_len equ $ - file_error_msg
error_msg db "Error: "
error_msg_len equ $ - error_msg
section .text
global oom
global panic
global strlen
global strcmp
global streq
global memcpy
global eprint_str
global exit
global error_to_str
global eprint_error
global alloc_pages
global is_alpha
global is_numeric
global is_id_continue
global is_id_start
;; ==============================
;; Helper functions
;; ==============================
;; Abort the program with a default panic message
panic:
mov rcx, panic_msg
mov rdx, panic_msg_len
call eprint_str
; exit with error code 1
mov rax, 60 ; syscall: exit
mov rdi, 1 ; status: 1
syscall
;; Abort the program with a default panic message
oom:
mov rcx, oom_msg
mov rdx, oom_msg_len
call eprint_str
; exit with error code 1
mov rax, 60 ; syscall: exit
mov rdi, 1 ; status: 1
syscall
;; abort the program
;; rcx: status code
exit:
mov rax, 60 ; syscall: exit
mov rdi, rcx
syscall
;; Writes a string to stderr:
;; rcx: pointer to string
;; rdx: length of string
eprint_str:
mov rax, 1 ; syscall: write
mov rdi, 2 ; fd: stderr
mov rsi, rcx ; buf: str
syscall
ret
;; calculates length of null-terminated string
;; rcx: pointer to string
strlen:
xor rax, rax ; length counter
.strlen_loop:
cmp byte [rcx + rax], 0
je .strlen_done
inc rax
jmp .strlen_loop
.strlen_done:
ret
;; Checks two byte slices for equality
;; rcx: pointer to first slice
;; rdx: length of first slice
;; r8: pointer to second slice
;; r9: length of second slice
;; returns: 1 if equal, 0 if not equal
streq:
cmp rdx, r9
jne .not_equal ; lengths differ
xor r10, r10 ; index = 0
.loop:
cmp r10, rdx
jge .equal ; done all bytes
mov al, [rcx + r10] ; char from a
mov bl, [r8 + r10] ; char from b
cmp al, bl
jne .not_equal ; chars differ
inc r10
jmp .loop
.equal:
mov rax, 1 ; equal
ret
.not_equal:
xor rax, rax ; not equal
ret
;; Compares two byte slices
;; rcx: pointer to first slice
;; rdx: length of first slice
;; r8: pointer to second slice
;; r9: length of second slice
; returns: -1, 0, or 1 in rax
strcmp:
xor rax, rax ; result = 0 (assume equal)
test rdx, rdx
jz .check_empty_b ; if len a == 0
test r9, r9
jz .check_empty_a ; if len b == 0
mov r10, rdx ; min(len a, len b) in r10
cmp r9, rdx
cmovb r10, r9
xor r11, r11 ; index = 0
.loop:
cmp r11, r10
jge .after_loop ; done min(len a, len b) bytes
mov al, [rcx + r11] ; char from a
mov bl, [r8 + r11] ; char from b
cmp al, bl
jb .less ; if al < bl: return -1
ja .greater ; if al > bl: return 1
inc r11
jmp .loop
.after_loop:
cmp rdx, r9
je .equal ; lengths equal, strings equal
jb .less ; a shorter than b
ja .greater ; a longer than b
.equal:
xor rax, rax ; 0
ret
.less:
mov rax, -1
ret
.greater:
mov rax, 1
ret
.check_empty_a:
test rdx, rdx
jz .equal ; both empty
jmp .greater ; a not empty, b empty
.check_empty_b:
test r9, r9
jz .equal ; both empty
jmp .less ; b not empty, a empty
;; Copy bytes from one memory location to another
;; rcx: destination pointer
;; rdx: source pointer
;; r8: number of bytes to copy
memcpy:
xor r10, r10
.memcpy_loop_byte:
cmp r10, r8
jge .memcpy_done
mov al, [rdx + r10]
mov [rcx + r10], al
inc r10
jmp .memcpy_loop_byte
.memcpy_done:
ret
section .rdata
e_is_dir db "Is a directory", 10
e_is_dir_len equ $ - e_is_dir
e_io db "I/O error", 10
e_io_len equ $ - e_io
e_bad_fd db "Bad file descriptor", 10
e_bad_fd_len equ $ - e_bad_fd
e_unknown db "Unknown error", 10
e_unknown_len equ $ - e_unknown
section .text
;; Converts an error code to a str (pointer, length) pair
;; rcx: error code
;; Returns:
;; rax: pointer to string
;; rdx: length of string
error_to_str:
cmp rcx, -21
je .e_is_dir
cmp rcx, -5
je .e_io
cmp rcx, -9
je .e_bad_fd
; unknown error
lea rax, [e_unknown]
mov rdx, e_unknown_len
ret
.e_is_dir:
lea rax, [e_is_dir]
mov rdx, e_is_dir_len
ret
.e_io:
lea rax, [e_io]
mov rdx, e_io_len
ret
.e_bad_fd:
lea rax, [e_bad_fd]
mov rdx, e_bad_fd_len
ret
;; rcx: error code
eprint_error:
; prologue
push rsi
; get error string
call error_to_str
mov r12, rax ; r12 = pointer to error string
mov r13, rdx ; r13 = length of error string
mov rsi, r13
add rsi, error_msg_len
add rsi, 1
add rsi, 15
and rsi, -16 ; align up to 16
sub rsp, rsi ; allocate buffer
push rsi ; save allocation size
; copy error_msg
lea rcx, [rsp + 8]
mov rdx, error_msg
mov r8, error_msg_len
call memcpy
; copy error string
lea rcx, [rsp + 8 + error_msg_len]
mov rdx, r12
mov r8, r13
call memcpy
; trailing newline
lea rdx, [rsp + 8 + error_msg_len + r13]
mov byte [rdx], 10
; print error message
lea rcx, [rsp + 8]
mov rdx, error_msg_len
add rdx, r13
add rdx, 1 ; include newline
call eprint_str
pop rsi
add rsp, rsi ; dealloc
; epilogue
pop rsi
ret
;; Allocates n pages of memory
;; rcx: number of pages
;; Returns:
;; rax: pointer to allocated memory
alloc_pages:
mov rax, 9 ; syscall: mmap
xor rdi, rdi ; addr: NULL
mov rsi, rcx ; length: number of pages
shl rsi, 12 ; length in bytes (page size = 4096)
mov rdx, 3 ; prot: PROT_READ | PROT_WRITE
mov r10, 34 ; flags: MAP_PRIVATE | MAP_ANONYMOUS
mov r8, -1 ; fd: -1
xor r9, r9 ; offset: 0
syscall
cmp rax, -4095 ; check for error
jae .alloc_error
ret
.alloc_error:
mov rcx, rax ; error code
call eprint_error
call oom
;; Returns 1 if cl is an ASCII alphabetic character, 0 otherwise
;; cl: byte to check
is_alpha:
cmp cl, 'A'
jb .not_alpha
cmp cl, 'Z'
jbe .is_alpha_ret
cmp cl, 'a'
jb .not_alpha
cmp cl, 'z'
jbe .is_alpha_ret
.is_alpha_ret:
mov rax, 1
ret
.not_alpha:
xor rax, rax
ret
is_numeric:
cmp cl, '0'
jb .not_numeric
cmp cl, '9'
jbe .is_numeric_ret
.is_numeric_ret:
mov rax, 1
ret
.not_numeric:
xor rax, rax
ret
;; cl: byte to check
is_id_continue:
call is_alpha
cmp rax, 1
je .is_id_continue_ret
call is_numeric
cmp rax, 1
je .is_id_continue_ret
cmp cl, '_'
je .is_id_continue_ret
xor rax, rax
ret
.is_id_continue_ret:
mov rax, 1
ret
;; cl: byte to check
is_id_start:
call is_alpha
cmp rax, 1
je .is_ret
cmp cl, '_'
je .is_ret
xor rax, rax
ret
.is_ret:
mov rax, 1
ret

427
lang/src/main.asm Normal file
View file

@ -0,0 +1,427 @@
;; Compile with:
;; nasm -f elf64 main.asm -o main.o
extern int_to_str
extern oom
extern panic
extern strlen
extern strcmp
extern streq
extern memcpy
extern eprint_str
extern exit
extern error_to_str
extern eprint_error
extern alloc_pages
extern is_alpha
extern is_numeric
extern is_id_continue
extern is_id_start
section .data
hello_msg db "Hello, World!", 10
hello_msg_len equ $ - hello_msg
file_error_msg db "Could not open file: "
file_error_msg_len equ $ - file_error_msg
section .text
global _start
_start:
; get filename from argv[1]
; argv is at rsp + 8
; check if argc > 1
mov rcx, hello_msg
mov rdx, hello_msg_len
call eprint_str
mov rax, [rsp] ; argc
cmp rax, 1
jle .no_filename ; if argc <= 1, no filename provided
; get argv[1]
mov rax, [rsp + 16] ; argv[1]
; init tokeniser
mov rdx, rax ; rdx = pointer to filename
call tokeniser_init
call tokeniser_print
.loop:
call tokeniser_next_token
cmp rax, 0
je .exit
mov rcx, [LEXEMES + rax*8]
mov rdx, [LEXEME_LENS + rax*8]
call eprint_str
jmp .loop
.no_filename:
call panic
.exit:
call exit
;; Opens file for reading:
;; rdx: pointer to filename (null-terminated)
fopen_read:
mov rax, 2 ; syscall: open
mov rdi, rdx ; filename
mov rsi, 0 ; flags: O_RDONLY
mov rdx, 0 ; mode
syscall
cmp rax, 0
jl .file_error
ret ;fd in rax
.file_error:
push rdi
mov rcx, rax
call eprint_error
pop rdi
mov rcx, rdi ; filename is in rdi
call strlen ; get length of filename
mov r9, rax ; r9 = filename length
mov rsi, r9
add rsi, file_error_msg_len ; + prefix
add rsi, 1 ; + newline
add rsi, 15
and rsi, -16 ; align up to 16
sub rsp, rsi ; allocate buffer
push rsi ; save allocation size
; copy file_error_msg
lea rcx, [rsp + 8]
mov rdx, file_error_msg
mov r8, file_error_msg_len
call memcpy
; copy filename
lea rcx, [rsp + 8 + file_error_msg_len]
mov rdx, rdi
mov r8, r9
call memcpy
; trailing newline
lea rdx, [rsp + 8 + file_error_msg_len + r9]
mov byte [rdx], 10
; print error message
lea rcx, [rsp + 8]
mov rdx, file_error_msg_len
add rdx, r9
add rdx, 1 ; include newline
call eprint_str
pop rsi
add rsp, rsi ; dealloc
call panic
;; =============================
;; Tokeniser functions
;; =============================
;; tokeniser state
section .data
input_file dd 0
buffer dq 0
cursor dq 0
buffer_len dq 0
;; each buffer is chunk_size bytes large
;; buffer header structure:
;; +0 (8 bytes): pointer buffer
;; +8 (8 bytes): size of buffer
;; Tokens:
;; [let, if, else, fn, return, loop, break, continue, true, false, i32, u32, bool, =, +, -, *, /, %, ==, !=, <, <=, >, >=, &&, ||, !, (, ), {, }, [, ], ;, ',', ]
section .bss
statbuf: resb 144
section .text
;; Initialises the tokeniser
;; rdx: pointer to filename (null-terminated)
tokeniser_init:
; open file for reading
; this panics if the file doesn't exist
call fopen_read
mov [input_file], eax ; store file descriptor
mov dword [cursor], 0
mov dword [buffer_len], 0
; fstat
mov rax, 5 ; syscall: fstat
mov rdi, [input_file] ; fd
lea rsi, [statbuf] ; statbuf
syscall
cmp rax, 0
jl .report_error
; get file size from statbuf
mov r15, [statbuf + 48] ; st_size
; allocate buffer
mov rcx, r15
add rcx, 4095
shr rcx, 12 ; divide by 4096
call alloc_pages
mov [buffer], rax
mov [buffer_len], r15
; read file into buffer
mov rax, 0 ; syscall: read
mov rdi, [input_file] ; fd
mov rsi, [buffer] ; buf
mov rdx, [buffer_len] ; count
syscall
cmp rax, 0
jl .report_error
ret
.report_error:
mov rcx, rax
call eprint_error
call panic
section .rdata
tokeniser_buffer db "Tokeniser buffer: ", 10
tokeniser_buffer_len equ $ - tokeniser_buffer
section .text
tokeniser_print:
mov rcx, tokeniser_buffer
mov rdx, tokeniser_buffer_len
call eprint_str
mov rax, [cursor]
mov rcx, [buffer + rax]
mov rdx, [buffer_len]
call eprint_str
section .rdata
LEXEMES: dq \
LEX_NOT_A_LEXEME, \
LEX_LET, \
LEX_IF, \
LEX_ELSE, \
LEX_FN, \
LEX_ARROW
TOKENS: db \
TOKEN_EOF, \
TOKEN_LET, \
TOKEN_IF, \
TOKEN_ELSE, \
TOKEN_FN, \
TOKEN_ARROW
LEXEME_LENS: dq \
0, \
LEX_LET_len, \
LEX_IF_len, \
LEX_ELSE_len, \
LEX_FN_len, \
LEX_ARROW_len
NUM_LEXEMES equ 5
LEX_NOT_A_LEXEME db "<not a lexeme>", 0
TOKEN_EOF equ 0
TOKEN_LET equ 1
LEX_LET db "let"
LEX_LET_len equ $ - LEX_LET
TOKEN_IF equ 2
LEX_IF db "if"
LEX_IF_len equ $ - LEX_IF
TOKEN_ELSE equ 3
LEX_ELSE db "else"
LEX_ELSE_len equ $ - LEX_ELSE
TOKEN_FN equ 4
LEX_FN db "fn"
LEX_FN_len equ $ - LEX_FN
TOKEN_RETURN equ 5
LEX_RETURN db "return"
LEX_RETURN_len equ $ - LEX_RETURN
TOKEN_LOOP equ 6
LEX_LOOP db "loop"
LEX_LOOP_len equ $ - LEX_LOOP
TOKEN_BREAK equ 7
LEX_BREAK db "break"
LEX_BREAK_len equ $ - LEX_BREAK
TOKEN_CONTINUE equ 8
LEX_CONTINUE db "continue"
LEX_CONTINUE_len equ $ - LEX_CONTINUE
TOKEN_TRUE equ 9
LEX_TRUE db "true"
LEX_TRUE_len equ $ - LEX_TRUE
TOKEN_FALSE equ 10
LEX_FALSE db "false"
LEX_FALSE_len equ $ - LEX_FALSE
TOKEN_BOOL equ 11
LEX_BOOL db "bool"
LEX_BOOL_len equ $ - LEX_BOOL
TOKEN_ARROW equ 12
LEX_ARROW db "->"
LEX_ARROW_len equ $ - LEX_ARROW
TOKEN_I32 equ 13
LEX_I32 db "i32"
LEX_I32_len equ $ - LEX_I32
TOKEN_U32 equ 14
LEX_U32 db "u32"
LEX_U32_len equ $ - LEX_U32
TOKEN_EQUALS equ 15
LEX_EQUALS db "="
LEX_EQUALS_len equ $ - LEX_EQUALS
TOKEN_PLUS equ 16
LEX_PLUS db "+"
LEX_PLUS_len equ $ - LEX_PLUS
TOKEN_MINUS equ 17
LEX_MINUS db "-"
LEX_MINUS_len equ $ - LEX_MINUS
TOKEN_RPARENS equ 18
LEX_RPARENS db ")"
LEX_RPARENS_len equ $ - LEX_RPARENS
TOKEN_LPARENS equ 19
LEX_LPARENS db "("
LEX_LPARENS_len equ $ - LEX_LPARENS
TOKEN_RBRACE equ 20
LEX_RBRACE db "}"
LEX_RBRACE_len equ $ - LEX_RBRACE
TOKEN_LBRACE equ 21
LEX_LBRACE db "{"
LEX_LBRACE_len equ $ - LEX_LBRACE
TOKEN_COLON equ 22
LEX_COLON db ":"
LEX_COLON_len equ $ - LEX_COLON
TOKEN_SEMI equ 23
LEX_SEMI db ";"
LEX_SEMI_len equ $ - LEX_SEMI
TOKEN_COMMA equ 24
LEX_COMMA db ","
LEX_COMMA_len equ $ - LEX_COMMA
TOKEN_PIPE equ 25
LEX_PIPE db "|"
LEX_PIPE_len equ $ - LEX_PIPE
TOKEN_AMP equ 26
LEX_AMP db "&"
LEX_AMP_len equ $ - LEX_AMP
TOKEN_EQEQ equ 27
LEX_EQEQ db "=="
LEX_EQEQ_len equ $ - LEX_EQEQ
TOKEN_IDENT equ 28
LEX_IDENT db "<identifier>"
LEX_IDENT_len equ $ - LEX_IDENT
TOKEN_NUMBER equ 29
LEX_NUMBER db "<number>"
LEX_NUMBER_len equ $ - LEX_NUMBER
section .text
;; rcx: lexeme index
;; Returns:
;; rax: token if matched, 0 if not matched
try_lexeme:
push r8
push r15
push rcx ; save lexeme index
; compare lexeme with cursor
mov r9, [cursor]
mov r8, [buffer]
add r8, r9
mov rax, [buffer_len]
sub rax, r9
mov rdx, [LEXEME_LENS + rcx*8]
cmp rax, rdx
jl .not_equal
; compare memory
mov rcx, [LEXEMES + rcx*8]
mov r9, rdx
call streq
cmp rax, 1
jne .not_equal
; check if it could be an ident:
; the buffer must not have ended
; the next char must be id_continue
; first char must be id_start
mov rax, [cursor]
add rax, rdx
cmp rax, [buffer_len]
jge .not_ident
mov cl, [buffer + rax]
call is_id_continue
cmp rax, 1
jne .not_ident
; check first char
mov rax, [cursor]
mov cl, [buffer + rax]
call is_id_start
cmp rax, 1
jne .not_ident
; this is an ident
; move cursor forward while is_id_continue
mov r15, [cursor]
add r15, rdx
.try_lexeme_loop:
cmp r15, [buffer_len]
jge .done_ident
mov cl, [buffer + r15]
call is_id_continue
cmp rax, 1
jne .done_ident
inc r15
jmp .try_lexeme_loop
.done_ident:
mov [cursor], r15
pop rcx
pop r15
pop r8
mov rax, TOKEN_IDENT
ret
.not_ident:
mov rax, [cursor]
add rax, rdx
mov [cursor], rax
pop rcx
pop r15
pop r8
mov rax, rcx
ret
.not_equal:
pop rcx
pop r15
pop r8
xor rax, rax
ret
;; Returns: token enumerator
tokeniser_next_token:
; check if at end of buffer
.loop:
mov rax, [cursor]
cmp rax, [buffer_len]
jge .eof
; get next 4 bytes
mov rbx, [buffer]
add rbx, rax
mov bl, byte [rbx]
; skip whitespace
cmp bl, ' '
je .skip
mov r15, 1 ; lexeme index
.inner_loop:
cmp r15, NUM_LEXEMES
; TODO: numbers, idents
jge .skip
; try lexeme
mov rcx, r15
call try_lexeme
cmp rax, 0
jne .return_token
inc r15
jmp .inner_loop
.skip:
mov rax, [cursor]
inc rax
mov [cursor], rax
jmp .loop
.eof:
mov rax, TOKEN_EOF
ret
.return_token:
mov rax, r15
ret

1
lang/src/test.m Normal file
View file

@ -0,0 +1 @@
let fn if