diff --git a/lang/libcompiler/.cargo/config.toml b/lang/libcompiler/.cargo/config.toml new file mode 100644 index 0000000..da06a0a --- /dev/null +++ b/lang/libcompiler/.cargo/config.toml @@ -0,0 +1,25 @@ +[target.x86_64-unknown-linux-gnu] +linker = "clang" +rustflags = [ + # LLD linker + # + # You may need to install it: + # + # - Ubuntu: `sudo apt-get install lld clang` + # - Fedora: `sudo dnf install lld clang` + # - Arch: `sudo pacman -S lld clang` + # "-Clink-arg=-fuse-ld=lld", + + # Mold linker + # + # You may need to install it: + # + # - Ubuntu: `sudo apt-get install mold clang` + # - Fedora: `sudo dnf install mold clang` + # - Arch: `sudo pacman -S mold clang` + "-Clink-arg=-fuse-ld=mold", + + # Nightly + # "-Zshare-generics=y", + # "-Zthreads=0", +] \ No newline at end of file diff --git a/lang/libcompiler/build.rs b/lang/libcompiler/build.rs index eeb4729..922e2a9 100644 --- a/lang/libcompiler/build.rs +++ b/lang/libcompiler/build.rs @@ -30,41 +30,42 @@ fn main() { println!("cargo:rustc-link-search=native={}", out_dir.display()); let working_dir = manifest_dir.parent().unwrap(); - for file in assembly_files.iter().map(|f| Path::new(f)) { + for file in assembly_files.iter().map(Path::new) { let path = working_dir.join(file); let obj = file.with_extension("o").file_name().unwrap().to_owned(); - let lib = format!("lib{}.a", file.file_stem().unwrap().to_str().unwrap()); + let obj_path = out_dir.join(&obj); std::process::Command::new("nasm") .current_dir(working_dir) .arg(path) + .arg("-wreloc-abs") .arg("-g") .arg("-f") .arg("elf64") .arg("-o") - .arg(out_dir.join(&obj)) + .arg(&obj_path) .status() .expect("Failed to assemble assembly files"); - std::process::Command::new("ar") - .current_dir(working_dir) - .arg("crs") - .arg(out_dir.join(lib)) - .arg(out_dir.join(obj)) - .status() - .expect("Failed to create static library from object files"); - println!( - "cargo:rustc-link-lib=static={}", - file.file_stem().unwrap().to_str().unwrap() - ); + println!("cargo:rustc-link-arg={}", obj_path.display()); + // let _lib = format!("lib{}.a", file.file_stem().unwrap().to_str().unwrap()); + // std::process::Command::new("ar") + // .current_dir(working_dir) + // .arg("crs") + // .arg(out_dir.join(lib)) + // .arg(out_dir.join(obj)) + // .status() + // .expect("Failed to create static library from object files"); + // println!( + // "cargo:rustc-link-lib=static={}", + // file.file_stem().unwrap().to_str().unwrap() + // ); } - std::process::Command::new("../tools/asm2rust") .current_dir(working_dir) - .args(&assembly_files) - .args(&include_files) + .args(assembly_files) + .args(include_files) .arg("-o") - .arg(out_dir - .join("bindings.rs") -) - .status().expect("Failed to generate Rust bindings from assembly files"); + .arg(out_dir.join("bindings.rs")) + .status() + .expect("Failed to generate Rust bindings from assembly files"); } diff --git a/lang/libcompiler/src/lib.rs b/lang/libcompiler/src/lib.rs index f8e5568..2896919 100644 --- a/lang/libcompiler/src/lib.rs +++ b/lang/libcompiler/src/lib.rs @@ -1,4 +1,7 @@ -#![feature(debug_closure_helpers)] +#![feature(debug_closure_helpers, box_as_ptr, allocator_api)] + +#[cfg(test)] +mod tests; pub mod ffi { #![allow( @@ -12,14 +15,14 @@ pub mod ffi { } #[repr(C)] -#[derive(Debug, PartialEq, Eq)] +#[derive(Debug, PartialEq, Eq, Copy, Clone)] pub struct FFISlice { pub ptr: *const u8, pub len: usize, } #[repr(transparent)] -#[derive(Debug, PartialEq, Eq)] +#[derive(Debug, PartialEq, Eq, Copy, Clone)] pub struct MaybeFFISlice { inner: FFISlice, } @@ -40,20 +43,27 @@ impl MaybeFFISlice { impl FFISlice { /// # Safety - /// The caller must ensure that the slice is valid for type T. - pub unsafe fn as_slice(&self) -> &[T] { + /// The caller must ensure that the slice is valid for type T, and lasts for 'a. + pub unsafe fn as_slice_unchecked<'a, T: Sized>(self) -> &'a [T] { // SAFETY: The caller ensures that the FFISlice is valid for type T. unsafe { core::slice::from_raw_parts(self.ptr.cast(), self.len) } } - pub fn as_bytes(&self) -> &[u8] { + + /// # Safety + /// The caller ensures that the slice is valid byte slice. + /// Namely, the pointer must be well-aligned and point to `len` bytes, and + /// must last for at least 'a. + pub fn as_u8s_unchecked<'a>(self) -> &'a [u8] { // SAFETY: The FFISlice is guaranteed to be a valid byte slice. - unsafe { core::slice::from_raw_parts(self.ptr, self.len) } + unsafe { self.as_slice_unchecked() } } /// # Safety /// The caller must ensure that the slice is a valid utf8 string. - pub unsafe fn as_str(&self) -> &str { + /// Furthermore, the pointer must be well-aligned, point to `len` bytes, and + /// must last for at least 'a. + pub unsafe fn as_str_unchecked<'a>(self) -> &'a str { // SAFETY: The caller ensures that the FFISlice is a valid utf8 string. - unsafe { core::str::from_utf8_unchecked(self.as_bytes()) } + unsafe { core::str::from_utf8_unchecked(self.as_u8s_unchecked()) } } } @@ -610,3 +620,11 @@ mod display { } pub use display::{Displayed, DisplayedSliceExt}; + +impl Default for ffi::Ast { + fn default() -> Self { + ffi::Ast { + nodes: vec::Vec::default(), + } + } +} diff --git a/lang/src/alloc.asm b/lang/src/alloc.asm index 50f8892..0446d78 100644 --- a/lang/src/alloc.asm +++ b/lang/src/alloc.asm @@ -1,6 +1,6 @@ default rel -section .bss +section .data align 8 free_list: resb 40 @@ -67,7 +67,7 @@ bump_new_block: shr rax, 1 add rax, 1 add rsi, rax - mov rdx, mmap_alloc + lea rdx, [rel mmap_alloc] call vec_try_grow_with pop rdi @@ -141,6 +141,8 @@ bump_alloc: lea rdi, [rel free_list] mov r12, [rdi + 8] xor r13, r13 + cmp qword [rdi + 0], 0 + je .init .alloc_loop: cmp r13, r12 jae .no_block @@ -183,6 +185,11 @@ bump_alloc: mov r12, r13 dec r13 jmp .alloc_loop +.init: + call bump_init + mov rdi, [rsp] ; size + call bump_new_block + jmp .alloc_loop .found_space: mov r12, [rsp + 32] ; block entry ptr mov rcx, [r12] ; block_ptr diff --git a/lang/src/int_to_str.asm b/lang/src/int_to_str.asm index 39b3e8a..b8b3c58 100644 --- a/lang/src/int_to_str.asm +++ b/lang/src/int_to_str.asm @@ -6,7 +6,7 @@ global str_to_int ;; rdi: pointer to input string ;; rsi: length of input string ;; dl: radix -;; fn str_to_int(s: *const u8, len: usize, radix: u8) -> i64 +;; define-fn: str_to_int(s: *const u8, len: usize, radix: u8) -> i64 str_to_int: push rbp mov rbp, rsp @@ -73,7 +73,7 @@ str_to_int: ;; rsi: pointer to output buffer (at least 21 bytes) ;; rdx: length of buffer ;; cl: radix -;; fn int_to_str2(value: i64, buffer: *mut u8, len: usize, radix: u8) -> (*mut u8, usize) +;; define-fn: int_to_str2(value: i64, buffer: *mut u8, len: usize, radix: u8) -> FFISlice int_to_str2: push rbp mov rbp, rsp diff --git a/lang/src/tokeniser.asm b/lang/src/tokeniser.asm index ad6273a..cc666e2 100644 --- a/lang/src/tokeniser.asm +++ b/lang/src/tokeniser.asm @@ -1,3 +1,5 @@ +default rel + section .text extern panic extern strlen @@ -143,8 +145,8 @@ global NUM_LEXEMES section .text ;; rdi: length of previously matched lexeme -;; returns the length of the ident -;; fn is_ident(lexeme_len: usize) -> usize +;; returns the length of the ident, or 0 if not an ident +;; define-fn: is_ident(lexeme_len: usize) -> usize is_ident: push rbp mov rbp, rsp @@ -376,8 +378,15 @@ skip_whitespaces: ret +;; start-structs +;; struct RawLexeme { +;; token: u8, +;; slice: FFISlice, +;; } +;; +;; end-structs ;; rdi: pointer to out-struct -;; fn find_lexeme() -> (u8, *const u8, usize) +;; define-fn: find_lexeme() -> RawLexeme find_lexeme: push rbp mov rbp, rsp @@ -523,7 +532,11 @@ find_lexeme: mov [rdi + 16], rax jmp .epilogue +;; ```rust +;; use crate::MaybeFFISlice; +;; ``` ;; dil: expected token +;; define-fn: fn expect_token(expected: u8) -> MaybeFFISlice expect_token: push rbp mov rbp, rsp @@ -552,6 +565,7 @@ expect_token: ;; Returns the next token if it matches the expected token, else panics ;; dil: expected token +;; define-fn: fn unwrap_token(expected: u8) -> FFISlice unwrap_token: push rbp mov rbp, rsp @@ -566,6 +580,7 @@ unwrap_token: ;; returns 0 if token not found, else returns lexeme (ptr, len) ;; doesn't advance the cursor ;; dil: expected token +;; define-fn: fn peek_expect_token(expected: u8) -> MaybeFFISlice peek_expect_token: push rbp mov rbp, rsp @@ -579,6 +594,7 @@ peek_expect_token: ;; returns the next lexeme without advancing the cursor ;; rdi: out-struct pointer +;; define-fn: fn peek_lexeme() -> RawLexeme peek_lexeme: push rbp mov rbp, rsp @@ -593,6 +609,7 @@ peek_lexeme: ret ;; Skips one token ahead, without returning it. +;; define-fn: fn skip_token() skip_token: push rbp mov rbp, rsp @@ -603,6 +620,7 @@ skip_token: add rsp, 24 pop rbp +;; define-fn: fn tokeniser_get_cursor() -> usize tokeniser_get_cursor: mov rax, [rel cursor] ret diff --git a/lang/src/vec.asm b/lang/src/vec.asm index e2cd6d3..f32f28b 100644 --- a/lang/src/vec.asm +++ b/lang/src/vec.asm @@ -345,7 +345,7 @@ vec_remove: ;; rsi: desired size ;; define-fn: fn vec_try_grow(vec: *mut BlobVec, new_size: usize) -> bool vec_try_grow: - mov rdx, bump_alloc + lea rdx, [rel bump_alloc] call vec_try_grow_with ret