Compare commits

..

2 commits

Author SHA1 Message Date
janis 04044a78ff
tests for alloc,vec,tokeniser 2025-11-14 12:59:31 +01:00
janis 7bc3c14095
move tests to use cargo 2025-11-13 14:39:45 +01:00
8 changed files with 927 additions and 60 deletions

View file

@ -0,0 +1,25 @@
[target.x86_64-unknown-linux-gnu]
linker = "clang"
rustflags = [
# LLD linker
#
# You may need to install it:
#
# - Ubuntu: `sudo apt-get install lld clang`
# - Fedora: `sudo dnf install lld clang`
# - Arch: `sudo pacman -S lld clang`
# "-Clink-arg=-fuse-ld=lld",
# Mold linker
#
# You may need to install it:
#
# - Ubuntu: `sudo apt-get install mold clang`
# - Fedora: `sudo dnf install mold clang`
# - Arch: `sudo pacman -S mold clang`
"-Clink-arg=-fuse-ld=mold",
# Nightly
# "-Zshare-generics=y",
# "-Zthreads=0",
]

View file

@ -30,41 +30,42 @@ fn main() {
println!("cargo:rustc-link-search=native={}", out_dir.display());
let working_dir = manifest_dir.parent().unwrap();
for file in assembly_files.iter().map(|f| Path::new(f)) {
for file in assembly_files.iter().map(Path::new) {
let path = working_dir.join(file);
let obj = file.with_extension("o").file_name().unwrap().to_owned();
let lib = format!("lib{}.a", file.file_stem().unwrap().to_str().unwrap());
let obj_path = out_dir.join(&obj);
std::process::Command::new("nasm")
.current_dir(working_dir)
.arg(path)
.arg("-wreloc-abs")
.arg("-g")
.arg("-f")
.arg("elf64")
.arg("-o")
.arg(out_dir.join(&obj))
.arg(&obj_path)
.status()
.expect("Failed to assemble assembly files");
std::process::Command::new("ar")
.current_dir(working_dir)
.arg("crs")
.arg(out_dir.join(lib))
.arg(out_dir.join(obj))
.status()
.expect("Failed to create static library from object files");
println!(
"cargo:rustc-link-lib=static={}",
file.file_stem().unwrap().to_str().unwrap()
);
println!("cargo:rustc-link-arg={}", obj_path.display());
// let _lib = format!("lib{}.a", file.file_stem().unwrap().to_str().unwrap());
// std::process::Command::new("ar")
// .current_dir(working_dir)
// .arg("crs")
// .arg(out_dir.join(lib))
// .arg(out_dir.join(obj))
// .status()
// .expect("Failed to create static library from object files");
// println!(
// "cargo:rustc-link-lib=static={}",
// file.file_stem().unwrap().to_str().unwrap()
// );
}
std::process::Command::new("../tools/asm2rust")
.current_dir(working_dir)
.args(&assembly_files)
.args(&include_files)
.args(assembly_files)
.args(include_files)
.arg("-o")
.arg(out_dir
.join("bindings.rs")
)
.status().expect("Failed to generate Rust bindings from assembly files");
.arg(out_dir.join("bindings.rs"))
.status()
.expect("Failed to generate Rust bindings from assembly files");
}

View file

@ -1,4 +1,7 @@
#![feature(debug_closure_helpers)]
#![feature(debug_closure_helpers, box_as_ptr, allocator_api)]
#[cfg(test)]
mod tests;
pub mod ffi {
#![allow(
@ -12,14 +15,23 @@ pub mod ffi {
}
#[repr(C)]
#[derive(Debug, PartialEq, Eq)]
#[derive(Debug, PartialEq, Eq, Copy, Clone)]
pub struct FFISlice {
pub ptr: *const u8,
pub len: usize,
}
impl Default for FFISlice {
fn default() -> Self {
Self {
ptr: core::ptr::dangling::<u8>(),
len: 0,
}
}
}
#[repr(transparent)]
#[derive(Debug, PartialEq, Eq)]
#[derive(Debug, PartialEq, Eq, Copy, Clone)]
pub struct MaybeFFISlice {
inner: FFISlice,
}
@ -40,20 +52,34 @@ impl MaybeFFISlice {
impl FFISlice {
/// # Safety
/// The caller must ensure that the slice is valid for type T.
pub unsafe fn as_slice<T: Sized>(&self) -> &[T] {
/// The caller must ensure that the slice is valid for type T, and lasts for 'a.
pub unsafe fn as_slice_unchecked<'a, T: Sized>(self) -> &'a [T] {
// SAFETY: The caller ensures that the FFISlice is valid for type T.
unsafe { core::slice::from_raw_parts(self.ptr.cast(), self.len) }
}
pub fn as_bytes(&self) -> &[u8] {
/// # Safety
/// The caller must ensure that the slice is valid for type T, and lasts for 'a.
pub unsafe fn as_slice_mut_unchecked<'a, T: Sized>(self) -> &'a mut [T] {
// SAFETY: The caller ensures that the FFISlice is valid for type T.
unsafe { core::slice::from_raw_parts_mut(self.ptr.cast_mut().cast(), self.len) }
}
/// # Safety
/// The caller ensures that the slice is valid byte slice.
/// Namely, the pointer must be well-aligned and point to `len` bytes, and
/// must last for at least 'a.
pub fn as_u8s_unchecked<'a>(self) -> &'a [u8] {
// SAFETY: The FFISlice is guaranteed to be a valid byte slice.
unsafe { core::slice::from_raw_parts(self.ptr, self.len) }
unsafe { self.as_slice_unchecked() }
}
/// # Safety
/// The caller must ensure that the slice is a valid utf8 string.
pub unsafe fn as_str(&self) -> &str {
/// Furthermore, the pointer must be well-aligned, point to `len` bytes, and
/// must last for at least 'a.
pub unsafe fn as_str_unchecked<'a>(self) -> &'a str {
// SAFETY: The caller ensures that the FFISlice is a valid utf8 string.
unsafe { core::str::from_utf8_unchecked(self.as_bytes()) }
unsafe { core::str::from_utf8_unchecked(self.as_u8s_unchecked()) }
}
}
@ -63,8 +89,7 @@ pub mod vec {
impl Default for BlobVec {
fn default() -> Self {
Self {
data: core::ptr::null_mut(),
len: 0,
slice: FFISlice::default(),
cap: 0,
elem_size: 0,
drop: None,
@ -75,6 +100,8 @@ pub mod vec {
unsafe impl Send for BlobVec {}
unsafe impl Sync for BlobVec {}
use crate::FFISlice;
use super::ffi::*;
#[repr(transparent)]
@ -105,13 +132,7 @@ pub mod vec {
}
pub fn new_with(capacity: usize) -> Self {
let mut vec = BlobVec {
data: core::ptr::null_mut(),
len: 0,
cap: 0,
elem_size: 0,
drop: None,
};
let mut vec = BlobVec::default();
unsafe extern "C" fn drop_fn<T>(ptr: *mut ()) {
unsafe {
@ -134,14 +155,23 @@ pub mod vec {
}
}
pub fn leak<'a>(self) -> &'a mut [T] {
assert_eq!(self.vec.elem_size, core::mem::size_of::<T>());
unsafe {
let slice = self.vec.slice.as_slice_mut_unchecked();
core::mem::forget(self);
slice
}
}
pub fn as_slice(&self) -> &[T] {
assert_eq!(self.vec.elem_size, core::mem::size_of::<T>());
unsafe { core::slice::from_raw_parts(self.vec.data as *const T, self.vec.len) }
unsafe { self.vec.slice.as_slice_unchecked() }
}
pub fn as_slice_mut(&mut self) -> &mut [T] {
assert_eq!(self.vec.elem_size, core::mem::size_of::<T>());
unsafe { core::slice::from_raw_parts_mut(self.vec.data as *mut T, self.vec.len) }
unsafe { self.vec.slice.as_slice_mut_unchecked() }
}
pub fn extend(&mut self, elements: Box<[T]>) {
@ -160,7 +190,7 @@ pub mod vec {
}
pub fn insert(&mut self, value: T, index: usize) {
if index > self.vec.len {
if index > self.vec.slice.len {
return;
}
let value = core::mem::ManuallyDrop::new(value);
@ -187,11 +217,11 @@ pub mod vec {
}
pub fn pop(&mut self) -> Option<T> {
if self.vec.len == 0 {
if self.vec.slice.len == 0 {
return None;
}
unsafe {
let ptr = vec_get(&mut self.vec, self.vec.len - 1) as *mut T;
let ptr = vec_get(&mut self.vec, self.vec.slice.len - 1) as *mut T;
let value = ptr.read();
vec_pop(&mut self.vec);
Some(value)
@ -199,7 +229,7 @@ pub mod vec {
}
pub fn get(&self, index: usize) -> Option<&T> {
if index >= self.vec.len {
if index >= self.vec.slice.len {
return None;
}
unsafe {
@ -209,7 +239,7 @@ pub mod vec {
}
pub fn get_mut(&mut self, index: usize) -> Option<&mut T> {
if index >= self.vec.len {
if index >= self.vec.slice.len {
return None;
}
unsafe {
@ -219,7 +249,7 @@ pub mod vec {
}
pub fn remove(&mut self, index: usize) {
if index >= self.vec.len {
if index >= self.vec.slice.len {
return;
}
unsafe {
@ -228,11 +258,11 @@ pub mod vec {
}
pub fn len(&self) -> usize {
self.vec.len
self.vec.slice.len
}
pub fn is_empty(&self) -> bool {
self.vec.len == 0
self.vec.slice.len == 0
}
pub fn position<F>(&self, elem: &T, mut cmp: F) -> Option<usize>
@ -526,7 +556,7 @@ mod display {
impl core::fmt::Display for ffi::Type {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
use ffi::{
use crate::ffi::{
TYPE_BOOL, TYPE_F32, TYPE_F64, TYPE_I8, TYPE_I16, TYPE_I32, TYPE_I64, TYPE_ISIZE,
TYPE_POINTER, TYPE_STR, TYPE_U8, TYPE_U16, TYPE_U32, TYPE_U64, TYPE_USIZE,
TYPE_VOID,
@ -610,3 +640,15 @@ mod display {
}
pub use display::{Displayed, DisplayedSliceExt};
#[expect(
clippy::derivable_impls,
reason = "Struct is defined in auto-generated file"
)]
impl Default for ffi::Ast {
fn default() -> Self {
ffi::Ast {
nodes: vec::Vec::default(),
}
}
}

View file

@ -0,0 +1,775 @@
#[inline(never)]
fn __do_panic() -> ! {
panic!("Called panic from external code.");
}
#[unsafe(no_mangle)]
extern "C" fn panic_impl() -> ! {
__do_panic()
}
mod alloc {
use crate::ffi::bump_alloc;
pub struct BumpAllocator;
unsafe impl std::alloc::Allocator for BumpAllocator {
fn allocate(
&self,
layout: std::alloc::Layout,
) -> Result<std::ptr::NonNull<[u8]>, std::alloc::AllocError> {
unsafe {
let ptr = bump_alloc(layout.size(), layout.align());
if ptr.is_null() {
Err(std::alloc::AllocError)
} else {
Ok(std::ptr::NonNull::slice_from_raw_parts(
std::ptr::NonNull::new_unchecked(ptr),
layout.size(),
))
}
}
}
unsafe fn deallocate(&self, _ptr: std::ptr::NonNull<u8>, _layout: std::alloc::Layout) {
// Bump allocator does not deallocate individual allocations
}
}
#[test]
fn box_neq() {
let a = Box::new_in(42u32, BumpAllocator);
let b = Box::new_in(42u32, BumpAllocator);
let c = Box::new_in(52u32, BumpAllocator);
eprintln!("a: {a}, b: {b}, c: {c}");
assert_ne!(Box::as_ptr(&a), Box::as_ptr(&b));
assert_eq!(*a, *b);
}
#[test]
fn box_big() {
struct BigType {
data: [u8; 0x1010],
}
let a = Box::new_in(42u32, BumpAllocator);
let mut big = Box::new_in(BigType { data: [0; 0x1010] }, BumpAllocator);
assert_ne!(Box::as_ptr(&big) as *const (), Box::as_ptr(&a) as *const ());
big.data[47] = 123;
assert_eq!(big.data[47], 123);
}
#[test]
fn align() {
#[repr(align(256))]
struct AlignedType {
#[allow(dead_code)]
data: [u8; 512],
}
let aligned = Box::new_in(AlignedType { data: [0; 512] }, BumpAllocator);
assert_eq!(
(Box::as_ptr(&aligned) as usize) % 256,
0,
"Aligned allocation should be aligned to 256 bytes"
);
}
}
mod int2str {
use crate::ffi::int_to_str2;
use crate::ffi::str_to_int;
#[test]
fn to_str() {
let value = 1234567890i64;
let mut buffer = [0u8; 32];
unsafe {
let slice = int_to_str2(0, buffer.as_mut_ptr(), buffer.len(), 10);
let s = slice.as_str_unchecked();
println!("Integer: {}, String: {}", 0, s);
assert_eq!(s, format!("{}", 0));
let slice = int_to_str2(value, buffer.as_mut_ptr(), buffer.len(), 10);
let s = slice.as_str_unchecked();
println!("Integer: {}, String: {}", value, s);
assert_eq!(s, format!("{value}"));
let slice = int_to_str2(0 - value, buffer.as_mut_ptr(), buffer.len(), 10);
let s = slice.as_str_unchecked();
println!("Integer: {}, String: {}", 0 - value, s);
assert_eq!(s, format!("{}", 0 - value));
let slice = int_to_str2(value, buffer.as_mut_ptr(), buffer.len(), 16);
let s = slice.as_str_unchecked();
println!("Integer: {:x}, String: {}", value, s);
assert_eq!(s, format!("{value:x}"));
let slice = int_to_str2(value, buffer.as_mut_ptr(), buffer.len(), 8);
let s = slice.as_str_unchecked();
println!("Integer: {:o}, String: {}", value, s);
assert_eq!(s, format!("{value:o}"));
let value = 235i64;
let slice = int_to_str2(value, buffer.as_mut_ptr(), buffer.len(), 2);
let s = slice.as_str_unchecked();
println!("Integer: {:b}, String: {}", value, s);
assert_eq!(s, format!("{value:b}"));
}
}
#[test]
fn from_str() {
unsafe {
let s = "1234567890";
let parsed = str_to_int(s.as_ptr(), s.len(), 10);
println!("String: {}, Integer: {}", s, parsed);
assert_eq!(parsed, 1234567890i64);
let s = "499602d2";
let parsed = str_to_int(s.as_ptr(), s.len(), 16);
println!("String: {}, Integer: {}", s, parsed);
assert_eq!(parsed, 1234567890i64);
let s = "11145401322";
let parsed = str_to_int(s.as_ptr(), s.len(), 8);
println!("String: {}, Integer: {}", s, parsed);
assert_eq!(parsed, 1234567890i64);
let s = "11101011";
let parsed = str_to_int(s.as_ptr(), s.len(), 2);
println!("String: {}, Integer: {}", s, parsed);
assert_eq!(parsed, 235i64);
let s = "9999999999999999999999999999999999999999";
let parsed = str_to_int(s.as_ptr(), s.len(), 10);
println!("String: {}, Integer: {}", s, parsed);
assert_eq!(parsed, i64::MAX);
}
}
}
mod vec {
use crate::{
ffi::{BlobVec, vec_drop_last, vec_get, vec_init, vec_push},
vec::Vec,
};
static mut DROPS: usize = 1;
fn get_drops() -> usize {
unsafe { (&raw const DROPS).read() }
}
unsafe extern "C" fn update_drops(f: impl FnOnce(&mut usize)) {
unsafe {
let drops = &raw mut DROPS;
f(&mut *drops);
}
}
extern "C" fn drop_element(ptr: *mut ()) {
unsafe {
update_drops(|drops| {
*drops *= ptr.cast::<u32>().read() as usize;
});
}
}
fn as_slice<T>(vec: &BlobVec) -> &[T] {
assert_eq!(vec.elem_size, core::mem::size_of::<T>());
unsafe { vec.slice.as_slice_unchecked() }
}
#[test]
fn push_pop() {
let mut vec = BlobVec::default();
unsafe {
vec_init(&mut vec, 4, Some(drop_element));
assert_eq!(vec.slice.len, 0);
let mut value = 2;
vec_push(&mut vec, &raw const value as _);
assert_eq!(vec.slice.len, 1);
assert_eq!(as_slice::<u32>(&vec), &[2]);
let retrieved = *(vec_get(&mut vec, 0) as *mut u32);
assert_eq!(retrieved, 2);
assert_eq!(get_drops(), 1);
vec_drop_last(&mut vec);
assert_eq!(vec.slice.len, 0);
assert_eq!(get_drops(), 2);
value = 3;
vec_push(&mut vec, &raw const value as _);
assert_eq!(as_slice::<u32>(&vec), &[3]);
value = 5;
vec_push(&mut vec, &raw const value as _);
assert_eq!(as_slice::<u32>(&vec), &[3, 5]);
assert_eq!(vec.slice.len, 2);
vec_drop_last(&mut vec);
vec_drop_last(&mut vec);
assert_eq!(get_drops(), 2 * 3 * 5);
}
}
#[test]
fn vec_impl() {
let mut vec = Vec::<u32>::new_with(100);
assert_eq!(vec.len(), 0);
vec.push(10);
vec.push(20);
vec.push(30);
assert_eq!(vec.len(), 3);
assert_eq!(vec.get(0), Some(&10));
assert_eq!(vec.get(1), Some(&20));
assert_eq!(vec.get(2), Some(&30));
assert_eq!(vec.pop(), Some(30));
assert_eq!(vec.len(), 2);
vec.remove(0);
assert_eq!(vec.len(), 1);
assert_eq!(vec.get(0), Some(&20));
vec.push(40);
vec.push(50);
}
#[test]
fn vec_extend() {
let mut vec = Vec::<u32>::new_with(100);
vec.extend(Box::new([10, 20, 30, 40, 50]));
assert_eq!(vec.len(), 5);
assert_eq!(vec.get(0), Some(&10));
assert_eq!(vec.as_slice(), &[10, 20, 30, 40, 50]);
}
#[test]
fn vec_insert() {
let mut vec = Vec::<u32>::new_with(100);
vec.extend(Box::new([10, 20, 40, 50]));
vec.insert(30, 2);
assert_eq!(vec.as_slice(), &[10, 20, 30, 40, 50]);
}
#[test]
fn vec_binary_search() {
let mut vec = Vec::<u32>::new_with(100);
vec.extend(Box::new([20, 30, 40, 50]));
let cmp = |a: &u32, b: &u32| match a.cmp(b) {
core::cmp::Ordering::Less => -1,
core::cmp::Ordering::Equal => 0,
core::cmp::Ordering::Greater => 1,
};
assert_eq!(vec.binary_search_by(&35, cmp), Err(2));
assert_eq!(vec.binary_search_by(&25, cmp), Err(1));
assert_eq!(vec.binary_search_by(&30, cmp), Ok(1));
assert_eq!(vec.binary_search_by(&5, cmp), Err(0));
assert_eq!(vec.binary_search_by(&55, cmp), Err(4));
_ = vec.insert_sorted(35, cmp);
assert_eq!(vec.as_slice(), &[20, 30, 35, 40, 50]);
}
#[test]
fn vec_binary_serach_empty() {
let cmp = |a: &u32, b: &u32| match a.cmp(b) {
core::cmp::Ordering::Less => -1,
core::cmp::Ordering::Equal => 0,
core::cmp::Ordering::Greater => 1,
};
let mut vec = Vec::<u32>::new_with(100);
assert_eq!(vec.len(), 0);
_ = vec.insert_sorted(5, cmp);
assert_eq!(vec.len(), 1);
assert_eq!(vec.as_slice(), &[5]);
_ = vec.insert_sorted(2, cmp);
assert_eq!(vec.len(), 2);
assert_eq!(vec.as_slice(), &[2, 5]);
_ = vec.insert_sorted(7, cmp);
assert_eq!(vec.len(), 3);
assert_eq!(vec.as_slice(), &[2, 5, 7]);
}
#[test]
fn vec_insert_many() {
let mut vec = Vec::<u32>::new_with(100);
vec.extend(Box::new([10, 20, 30, 40, 50]));
assert_eq!(vec.as_slice(), &[10, 20, 30, 40, 50]);
vec.insert_many(2, Box::new([22, 23, 24]));
assert_eq!(vec.as_slice(), &[10, 20, 22, 23, 24, 30, 40, 50]);
}
#[test]
fn vec_position() {
let mut vec = Vec::<u32>::new_with(100);
vec.extend(Box::new([10, 20, 30, 40, 50]));
assert_eq!(
vec.position(&40, |a, b| {
eprintln!("Comparing {} and {}", a, b);
a == b
}),
Some(3)
);
}
}
mod tokens {
use crate::ffi::{RawLexeme, find_lexeme, tokeniser_init_buf};
fn collect_tokens() -> Vec<Lexeme> {
let mut lexemes = Vec::new();
unsafe {
while let Some(lexeme) = find_lexeme().into_lexeme() {
lexemes.push(lexeme);
}
}
lexemes
}
#[derive(Debug)]
struct Lexeme(u8, &'static str);
impl PartialEq for Lexeme {
fn eq(&self, other: &Self) -> bool {
use crate::ffi::{TOKEN_IDENT, TOKEN_NUMBER};
match self.0 {
// Identifiers and numbers compare both token and lexeme
TOKEN_IDENT | TOKEN_NUMBER => self.0 == other.0 && self.1 == other.1,
_ => self.0 == other.0,
}
}
}
impl Eq for Lexeme {}
trait AsLexeme {
fn into_lexeme(self) -> Option<Lexeme>;
}
impl AsLexeme for RawLexeme {
fn into_lexeme(self) -> Option<Lexeme> {
let Self { token, slice } = self;
let slice = unsafe { slice.as_str_unchecked() };
match token {
1.. => Some(Lexeme(token, slice)),
_ => None,
}
}
}
fn init_tokeniser(s: &str) {
unsafe {
tokeniser_init_buf(s.as_ptr(), s.len());
}
}
macro_rules! token {
($token:ident) => {
Lexeme(crate::ffi::$token, "")
};
({$token:ident: $lexeme:expr}) => {
Lexeme(crate::ffi::$token, $lexeme)
};
}
macro_rules! tokens {
[$( $token:tt ),* $(,)?] => {
[
$( token!($token) ),*
]
};
}
#[test]
fn keywords() {
init_tokeniser(
r#"
fn let if else fn continue loop break return while for match switch as
i32 bool false true void
return usize isize f32 f64
i8 u8 i16 u16 i32 u32 i64 u64
"#,
);
assert_eq!(
collect_tokens().as_slice(),
&tokens![
TOKEN_FN,
TOKEN_LET,
TOKEN_IF,
TOKEN_ELSE,
TOKEN_FN,
TOKEN_CONTINUE,
TOKEN_LOOP,
TOKEN_BREAK,
TOKEN_RETURN,
TOKEN_WHILE,
TOKEN_FOR,
TOKEN_MATCH,
TOKEN_SWITCH,
TOKEN_AS,
TOKEN_I32,
TOKEN_BOOL,
TOKEN_FALSE,
TOKEN_TRUE,
TOKEN_VOID,
TOKEN_RETURN,
TOKEN_USIZE,
TOKEN_ISIZE,
TOKEN_F32,
TOKEN_F64,
TOKEN_I8,
TOKEN_U8,
TOKEN_I16,
TOKEN_U16,
TOKEN_I32,
TOKEN_U32,
TOKEN_I64,
TOKEN_U64,
]
);
}
#[test]
fn delimiters() {
init_tokeniser("()[]{},->;:=");
assert_eq!(
collect_tokens().as_slice(),
&tokens![
TOKEN_LPARENS,
TOKEN_RPARENS,
TOKEN_LBRACKET,
TOKEN_RBRACKET,
TOKEN_LBRACE,
TOKEN_RBRACE,
TOKEN_COMMA,
TOKEN_ARROW,
TOKEN_SEMI,
TOKEN_COLON,
TOKEN_EQUALS,
]
);
}
#[test]
fn identifiers() {
init_tokeniser(
r#"
this-is-an-ident
another_ident123
_underscore_test
mixedCASEIdent
number12345
____
_
-leading-minus
trailing-minus-
"#,
);
assert_eq!(
collect_tokens().as_slice(),
&tokens![
{TOKEN_IDENT: "this-is-an-ident"},
{TOKEN_IDENT: "another_ident123"},
{TOKEN_IDENT: "_underscore_test"},
{TOKEN_IDENT: "mixedCASEIdent"},
{TOKEN_IDENT: "number12345"},
{TOKEN_IDENT: "____"},
{TOKEN_IDENT: "_"},
TOKEN_MINUS,
{TOKEN_IDENT: "leading-minus"},
{TOKEN_IDENT: "trailing-minus-"},
],
);
}
#[test]
fn simple_function() {
init_tokeniser(
"fn my-function() -> bool {
return false;
}",
);
assert_eq!(
collect_tokens().as_slice(),
&tokens![
TOKEN_FN,
{TOKEN_IDENT: "my-function"},
TOKEN_LPARENS,
TOKEN_RPARENS,
TOKEN_ARROW,
TOKEN_BOOL,
TOKEN_LBRACE,
TOKEN_RETURN,
TOKEN_FALSE,
TOKEN_SEMI,
TOKEN_RBRACE,
],
);
}
#[test]
fn simple_function_commented() {
init_tokeniser(
"// This is a comment line
fn my-function() -> bool {
// This function always returns false
return false;
}
",
);
assert_eq!(
collect_tokens().as_slice(),
&tokens![
TOKEN_COMMENT,
TOKEN_FN,
{TOKEN_IDENT: "my-function"},
TOKEN_LPARENS,
TOKEN_RPARENS,
TOKEN_ARROW,
TOKEN_BOOL,
TOKEN_LBRACE,
TOKEN_COMMENT,
TOKEN_RETURN,
TOKEN_FALSE,
TOKEN_SEMI,
TOKEN_RBRACE,
],
);
}
#[test]
fn numbers() {
init_tokeniser(
"
1234
123_345_
1234____56
1
0",
);
assert_eq!(
collect_tokens().as_slice(),
&tokens![
{TOKEN_NUMBER: "1234"},
{TOKEN_NUMBER: "123_345_"},
{TOKEN_NUMBER: "1234____56"},
{TOKEN_NUMBER: "1"},
{TOKEN_NUMBER: "0"},
],
);
}
#[test]
fn strings() {
init_tokeniser(
r#"
"this is a string"
"another
string
spanning multiple
lines"
"string with a \"quoted\" word"
"a"
""
"#,
);
assert_eq!(
collect_tokens().as_slice(),
&tokens![
{TOKEN_STRING: "this is a string"},
{TOKEN_STRING: "another\nstring\nspanning multiple\n lines"},
{TOKEN_STRING: "string with a \\\"quoted\\\" word"},
{TOKEN_STRING: "a"},
{TOKEN_STRING: ""},
]
);
}
#[test]
fn complex_tokens() {
init_tokeniser("<<<=<a == b = c ||| &||&&|&");
assert_eq!(
collect_tokens().as_slice(),
&tokens![
TOKEN_LESSLESS,
TOKEN_LEQ,
TOKEN_LT,
{TOKEN_IDENT: "a"},
TOKEN_EQEQ,
{TOKEN_IDENT: "b"},
TOKEN_EQUALS,
{TOKEN_IDENT: "c"},
TOKEN_PIPE2,
TOKEN_PIPE,
TOKEN_AMP,
TOKEN_PIPE2,
TOKEN_AMP2,
TOKEN_PIPE,
TOKEN_AMP,
]
);
}
}
mod ast {
use crate::ffi::Ast;
macro_rules! ast_node {
(@expr $ast:expr, $($expr:tt)*) => {};
(@expr $ast:expr, Num($expr:tt)) => {
{
let num_id = $ast.nodes.len();
$ast.nodes.push(crate::ffi::AstNode {
kind: crate::ffi::AST_NUMBER,
data: $expr as _,
extra: 0,
span: 0,
});
num_id
}
};
(@expr $ast:expr, $($a:tt)* + $($b:tt)*) => {
{
let left_id = ast_node!(@expr $ast, $($a)*);
let right_id = ast_node!(@expr $ast, $($b)*);
let binop = Box::new_in(
crate::ffi::AstBinaryOp {
op: crate::ffi::TOKEN_PLUS,
left: left_id as u64,
right: right_id as u64,
},
super::alloc::BumpAllocator,
);
let binop_id = $ast.nodes.len();
$ast.nodes.push(crate::ffi::AstNode {
kind: crate::ffi::AST_BINARY_OP,
data: Box::into_raw(binop.into()) as _,
extra: 0,
span: 0,
});
binop_id
}
};
(@stmt $ast:expr, $($expr:tt)*) => {};
// (@stmt $ast:expr, let $name:ident: $ty:tt = $($expr:tt)*) => {};
(@stmt $ast:expr, return $($expr:tt)*) => {
{
let expr_id = ast_node!(@expr $ast, $expr);
let return_id = $ast.nodes.len();
$ast.nodes.push(crate::ffi::AstNode {
kind: crate::ffi::AST_RETURN,
data: expr_id as _,
extra: 0,
span: 0,
});
return_id
}
};
($ast:expr, Fn{name: $name:expr, args: [$($arg_name:expr => $arg:tt),*], ret: $ret:tt, body: $body:tt}) => {
{
#[allow(unused_mut)]
let mut args = Vec::<usize>::with_capacity(128);
$(
args.push(ast_node!($ast, Arg{$arg_name => $arg}));
)*
let args = args.leak();
let func = Box::new_in(
crate::ffi::AstFunction {
name: $name.as_ptr(),
name_len: $name.len(),
args: args.as_ptr().cast(),
args_len: args.len(),
return_type: crate::ffi::Type $ret,
body: ast_node!($ast, Block $body) as u64,
},
super::alloc::BumpAllocator,
);
let func_id = $ast.nodes.len();
$ast.nodes.push(crate::ffi::AstNode {
kind: crate::ffi::AST_FUNCTION,
data: Box::into_raw(func.into()) as _,
extra: 0,
span: 0,
});
func_id
}
};
($ast:expr, Block[$(($($stmt:tt)*)),* $(,)?]) => {
{
#[allow(unused_mut)]
let mut stmts = Vec::with_capacity_in(128, super::alloc::BumpAllocator);
$(
stmts.push(ast_node!($ast, $($stmt)*));
)*
let stmts = stmts.leak();
let block_id = $ast.nodes.len();
$ast.nodes.push(crate::ffi::AstNode {
kind: crate::ffi::AST_BLOCK,
data: stmts.as_ptr() as _,
extra: stmts.len(),
span: 0,
});
block_id
}
};
($ast:expr, Arg{$name:expr => $ty:tt}) => {
{
let arg = Box::new_in(
crate::ffi::AstArgument {
name: $name.as_ptr(),
name_len: $name.len(),
arg_type: crate::ffi::Type $ty,
},
super::alloc::BumpAllocator,
);
let arg_id = $ast.nodes.len();
$ast.nodes.push(crate::ffi::AstNode {
kind: crate::ffi::AST_ARG,
data: Box::into_raw(arg.into()) as _,
extra: 0,
span: 0,
});
arg_id
}
};
($ast:expr, File[$($tag:ident $then:tt),* $(,)?]) => {
let mut gdecls = Vec::with_capacity_in(128, super::alloc::BumpAllocator);
$(
gdecls.push(ast_node!($ast, $tag $then));
)*
};
}
macro_rules! AST {
($($ast:tt)*) => {{
let mut _ast = Ast::default();
ast_node!(_ast, $($ast)*);
_ast
}};
}
fn asdf() {
AST!(
File[
Fn {
name: "main",
args: ["a" => {kind: crate::ffi::TYPE_I32, data: 0}],
ret: { kind: 1, data: 0 },
body: []
},
]
);
}
fn parse_and_print() {}
}

View file

@ -1,6 +1,6 @@
default rel
section .bss
section .data
align 8
free_list: resb 40
@ -67,7 +67,7 @@ bump_new_block:
shr rax, 1
add rax, 1
add rsi, rax
mov rdx, mmap_alloc
lea rdx, [rel mmap_alloc]
call vec_try_grow_with
pop rdi
@ -141,6 +141,8 @@ bump_alloc:
lea rdi, [rel free_list]
mov r12, [rdi + 8]
xor r13, r13
cmp qword [rdi + 0], 0
je .init
.alloc_loop:
cmp r13, r12
jae .no_block
@ -183,6 +185,11 @@ bump_alloc:
mov r12, r13
dec r13
jmp .alloc_loop
.init:
call bump_init
mov rdi, [rsp] ; size
call bump_new_block
jmp .alloc_loop
.found_space:
mov r12, [rsp + 32] ; block entry ptr
mov rcx, [r12] ; block_ptr

View file

@ -6,7 +6,7 @@ global str_to_int
;; rdi: pointer to input string
;; rsi: length of input string
;; dl: radix
;; fn str_to_int(s: *const u8, len: usize, radix: u8) -> i64
;; define-fn: str_to_int(s: *const u8, len: usize, radix: u8) -> i64
str_to_int:
push rbp
mov rbp, rsp
@ -73,7 +73,7 @@ str_to_int:
;; rsi: pointer to output buffer (at least 21 bytes)
;; rdx: length of buffer
;; cl: radix
;; fn int_to_str2(value: i64, buffer: *mut u8, len: usize, radix: u8) -> (*mut u8, usize)
;; define-fn: int_to_str2(value: i64, buffer: *mut u8, len: usize, radix: u8) -> FFISlice
int_to_str2:
push rbp
mov rbp, rsp

View file

@ -1,3 +1,5 @@
default rel
section .text
extern panic
extern strlen
@ -143,8 +145,8 @@ global NUM_LEXEMES
section .text
;; rdi: length of previously matched lexeme
;; returns the length of the ident
;; fn is_ident(lexeme_len: usize) -> usize
;; returns the length of the ident, or 0 if not an ident
;; define-fn: is_ident(lexeme_len: usize) -> usize
is_ident:
push rbp
mov rbp, rsp
@ -376,8 +378,15 @@ skip_whitespaces:
ret
;; start-structs
;; struct RawLexeme {
;; token: u8,
;; slice: FFISlice,
;; }
;;
;; end-structs
;; rdi: pointer to out-struct
;; fn find_lexeme() -> (u8, *const u8, usize)
;; define-fn: find_lexeme() -> RawLexeme
find_lexeme:
push rbp
mov rbp, rsp
@ -523,7 +532,11 @@ find_lexeme:
mov [rdi + 16], rax
jmp .epilogue
;; ```rust
;; use crate::MaybeFFISlice;
;; ```
;; dil: expected token
;; define-fn: fn expect_token(expected: u8) -> MaybeFFISlice
expect_token:
push rbp
mov rbp, rsp
@ -552,6 +565,7 @@ expect_token:
;; Returns the next token if it matches the expected token, else panics
;; dil: expected token
;; define-fn: fn unwrap_token(expected: u8) -> FFISlice
unwrap_token:
push rbp
mov rbp, rsp
@ -566,6 +580,7 @@ unwrap_token:
;; returns 0 if token not found, else returns lexeme (ptr, len)
;; doesn't advance the cursor
;; dil: expected token
;; define-fn: fn peek_expect_token(expected: u8) -> MaybeFFISlice
peek_expect_token:
push rbp
mov rbp, rsp
@ -579,6 +594,7 @@ peek_expect_token:
;; returns the next lexeme without advancing the cursor
;; rdi: out-struct pointer
;; define-fn: fn peek_lexeme() -> RawLexeme
peek_lexeme:
push rbp
mov rbp, rsp
@ -593,6 +609,7 @@ peek_lexeme:
ret
;; Skips one token ahead, without returning it.
;; define-fn: fn skip_token()
skip_token:
push rbp
mov rbp, rsp
@ -603,6 +620,7 @@ skip_token:
add rsp, 24
pop rbp
;; define-fn: fn tokeniser_get_cursor() -> usize
tokeniser_get_cursor:
mov rax, [rel cursor]
ret

View file

@ -35,8 +35,7 @@ global vec_tests
;; Byte vector structure
;; start-structs
;; struct BlobVec {
;; data: *mut (),
;; len: usize,
;; slice: FFISlice,
;; cap: usize,
;; elem_size: usize,
;; drop: Option<unsafe extern "C" fn(*mut ())>,
@ -345,7 +344,7 @@ vec_remove:
;; rsi: desired size
;; define-fn: fn vec_try_grow(vec: *mut BlobVec, new_size: usize) -> bool
vec_try_grow:
mov rdx, bump_alloc
lea rdx, [rel bump_alloc]
call vec_try_grow_with
ret