From 7eca9adde5247dce6046bc3429e954b06e80f420 Mon Sep 17 00:00:00 2001 From: Janis Date: Wed, 2 Jul 2025 17:42:01 +0200 Subject: [PATCH] initial commit --- .gitignore | 1 + Cargo.lock | 7 ++ Cargo.toml | 11 +++ src/cachepadded.rs | 218 +++++++++++++++++++++++++++++++++++++++++++++ src/drop_guard.rs | 34 +++++++ src/lib.rs | 11 +++ src/ptr.rs | 158 ++++++++++++++++++++++++++++++++ 7 files changed, 440 insertions(+) create mode 100644 .gitignore create mode 100644 Cargo.lock create mode 100644 Cargo.toml create mode 100644 src/cachepadded.rs create mode 100644 src/drop_guard.rs create mode 100644 src/lib.rs create mode 100644 src/ptr.rs diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ea8c4bf --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +/target diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..829e65f --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,7 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "werkzeug" +version = "0.1.0" diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..643d3cb --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,11 @@ +[package] +name = "werkzeug" +version = "0.1.0" +edition = "2024" + +[features] +default = [] +alloc = [] +std = [] + +[dependencies] diff --git a/src/cachepadded.rs b/src/cachepadded.rs new file mode 100644 index 0000000..9f4222c --- /dev/null +++ b/src/cachepadded.rs @@ -0,0 +1,218 @@ +//! This is taken from `crossbeam-utils`, MIT and Apache-2.0 +use core::fmt; +use core::ops::{Deref, DerefMut}; + +/// Pads and aligns a value to the length of a cache line. +/// +/// In concurrent programming, sometimes it is desirable to make sure commonly accessed pieces of +/// data are not placed into the same cache line. Updating an atomic value invalidates the whole +/// cache line it belongs to, which makes the next access to the same cache line slower for other +/// CPU cores. Use `CachePadded` to ensure updating one piece of data doesn't invalidate other +/// cached data. +/// +/// # Size and alignment +/// +/// Cache lines are assumed to be N bytes long, depending on the architecture: +/// +/// * On x86-64, aarch64, and powerpc64, N = 128. +/// * On arm, mips, mips64, sparc, and hexagon, N = 32. +/// * On m68k, N = 16. +/// * On s390x, N = 256. +/// * On all others, N = 64. +/// +/// Note that N is just a reasonable guess and is not guaranteed to match the actual cache line +/// length of the machine the program is running on. On modern Intel architectures, spatial +/// prefetcher is pulling pairs of 64-byte cache lines at a time, so we pessimistically assume that +/// cache lines are 128 bytes long. +/// +/// The size of `CachePadded` is the smallest multiple of N bytes large enough to accommodate +/// a value of type `T`. +/// +/// The alignment of `CachePadded` is the maximum of N bytes and the alignment of `T`. +/// +/// # Examples +/// +/// Alignment and padding: +/// +/// ``` +/// use crossbeam_utils::CachePadded; +/// +/// let array = [CachePadded::new(1i8), CachePadded::new(2i8)]; +/// let addr1 = &*array[0] as *const i8 as usize; +/// let addr2 = &*array[1] as *const i8 as usize; +/// +/// assert!(addr2 - addr1 >= 32); +/// assert_eq!(addr1 % 32, 0); +/// assert_eq!(addr2 % 32, 0); +/// ``` +/// +/// When building a concurrent queue with a head and a tail index, it is wise to place them in +/// different cache lines so that concurrent threads pushing and popping elements don't invalidate +/// each other's cache lines: +/// +/// ``` +/// use crossbeam_utils::CachePadded; +/// use std::sync::atomic::AtomicUsize; +/// +/// struct Queue { +/// head: CachePadded, +/// tail: CachePadded, +/// buffer: *mut T, +/// } +/// ``` +#[derive(Clone, Copy, Default, Hash, PartialEq, Eq)] +// Starting from Intel's Sandy Bridge, spatial prefetcher is now pulling pairs of 64-byte cache +// lines at a time, so we have to align to 128 bytes rather than 64. +// +// Sources: +// - https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-optimization-manual.pdf +// - https://github.com/facebook/folly/blob/1b5288e6eea6df074758f877c849b6e73bbb9fbb/folly/lang/Align.h#L107 +// +// aarch64/arm64ec's big.LITTLE architecture has asymmetric cores and "big" cores have 128-byte cache line size. +// +// Sources: +// - https://www.mono-project.com/news/2016/09/12/arm64-icache/ +// +// powerpc64 has 128-byte cache line size. +// +// Sources: +// - https://github.com/golang/go/blob/3dd58676054223962cd915bb0934d1f9f489d4d2/src/internal/cpu/cpu_ppc64x.go#L9 +// - https://github.com/torvalds/linux/blob/3516bd729358a2a9b090c1905bd2a3fa926e24c6/arch/powerpc/include/asm/cache.h#L26 +#[cfg_attr( + any( + target_arch = "x86_64", + target_arch = "aarch64", + target_arch = "arm64ec", + target_arch = "powerpc64", + ), + repr(align(128)) +)] +// arm, mips, mips64, sparc, and hexagon have 32-byte cache line size. +// +// Sources: +// - https://github.com/golang/go/blob/3dd58676054223962cd915bb0934d1f9f489d4d2/src/internal/cpu/cpu_arm.go#L7 +// - https://github.com/golang/go/blob/3dd58676054223962cd915bb0934d1f9f489d4d2/src/internal/cpu/cpu_mips.go#L7 +// - https://github.com/golang/go/blob/3dd58676054223962cd915bb0934d1f9f489d4d2/src/internal/cpu/cpu_mipsle.go#L7 +// - https://github.com/golang/go/blob/3dd58676054223962cd915bb0934d1f9f489d4d2/src/internal/cpu/cpu_mips64x.go#L9 +// - https://github.com/torvalds/linux/blob/3516bd729358a2a9b090c1905bd2a3fa926e24c6/arch/sparc/include/asm/cache.h#L17 +// - https://github.com/torvalds/linux/blob/3516bd729358a2a9b090c1905bd2a3fa926e24c6/arch/hexagon/include/asm/cache.h#L12 +#[cfg_attr( + any( + target_arch = "arm", + target_arch = "mips", + target_arch = "mips32r6", + target_arch = "mips64", + target_arch = "mips64r6", + target_arch = "sparc", + target_arch = "hexagon", + ), + repr(align(32)) +)] +// m68k has 16-byte cache line size. +// +// Sources: +// - https://github.com/torvalds/linux/blob/3516bd729358a2a9b090c1905bd2a3fa926e24c6/arch/m68k/include/asm/cache.h#L9 +#[cfg_attr(target_arch = "m68k", repr(align(16)))] +// s390x has 256-byte cache line size. +// +// Sources: +// - https://github.com/golang/go/blob/3dd58676054223962cd915bb0934d1f9f489d4d2/src/internal/cpu/cpu_s390x.go#L7 +// - https://github.com/torvalds/linux/blob/3516bd729358a2a9b090c1905bd2a3fa926e24c6/arch/s390/include/asm/cache.h#L13 +#[cfg_attr(target_arch = "s390x", repr(align(256)))] +// x86, wasm, riscv, and sparc64 have 64-byte cache line size. +// +// Sources: +// - https://github.com/golang/go/blob/dda2991c2ea0c5914714469c4defc2562a907230/src/internal/cpu/cpu_x86.go#L9 +// - https://github.com/golang/go/blob/3dd58676054223962cd915bb0934d1f9f489d4d2/src/internal/cpu/cpu_wasm.go#L7 +// - https://github.com/torvalds/linux/blob/3516bd729358a2a9b090c1905bd2a3fa926e24c6/arch/riscv/include/asm/cache.h#L10 +// - https://github.com/torvalds/linux/blob/3516bd729358a2a9b090c1905bd2a3fa926e24c6/arch/sparc/include/asm/cache.h#L19 +// +// All others are assumed to have 64-byte cache line size. +#[cfg_attr( + not(any( + target_arch = "x86_64", + target_arch = "aarch64", + target_arch = "arm64ec", + target_arch = "powerpc64", + target_arch = "arm", + target_arch = "mips", + target_arch = "mips32r6", + target_arch = "mips64", + target_arch = "mips64r6", + target_arch = "sparc", + target_arch = "hexagon", + target_arch = "m68k", + target_arch = "s390x", + )), + repr(align(64)) +)] +pub struct CachePadded { + value: T, +} + +unsafe impl Send for CachePadded {} +unsafe impl Sync for CachePadded {} + +impl CachePadded { + /// Pads and aligns a value to the length of a cache line. + /// + /// # Examples + /// + /// ``` + /// use crossbeam_utils::CachePadded; + /// + /// let padded_value = CachePadded::new(1); + /// ``` + pub const fn new(t: T) -> CachePadded { + CachePadded:: { value: t } + } + + /// Returns the inner value. + /// + /// # Examples + /// + /// ``` + /// use crossbeam_utils::CachePadded; + /// + /// let padded_value = CachePadded::new(7); + /// let value = padded_value.into_inner(); + /// assert_eq!(value, 7); + /// ``` + pub fn into_inner(self) -> T { + self.value + } +} + +impl Deref for CachePadded { + type Target = T; + + fn deref(&self) -> &T { + &self.value + } +} + +impl DerefMut for CachePadded { + fn deref_mut(&mut self) -> &mut T { + &mut self.value + } +} + +impl fmt::Debug for CachePadded { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("CachePadded") + .field("value", &self.value) + .finish() + } +} + +impl From for CachePadded { + fn from(t: T) -> Self { + CachePadded::new(t) + } +} + +impl fmt::Display for CachePadded { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Display::fmt(&self.value, f) + } +} diff --git a/src/drop_guard.rs b/src/drop_guard.rs new file mode 100644 index 0000000..a708d32 --- /dev/null +++ b/src/drop_guard.rs @@ -0,0 +1,34 @@ +use core::{cell::UnsafeCell, mem::ManuallyDrop}; + +/// A guard that runs a closure when it is dropped. +pub struct DropGuard(UnsafeCell>); + +impl DropGuard +where + F: FnOnce(), +{ + pub fn new(f: F) -> DropGuard { + Self(UnsafeCell::new(ManuallyDrop::new(f))) + } +} + +impl Drop for DropGuard +where + F: FnOnce(), +{ + fn drop(&mut self) { + // SAFETY: drop() is called exactly once. + unsafe { + ManuallyDrop::take(&mut *self.0.get())(); + } + } +} + +impl From for DropGuard +where + F: FnOnce(), +{ + fn from(f: F) -> Self { + DropGuard::new(f) + } +} diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..c20f881 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,11 @@ +#![cfg_attr(not(feature = "std"), no_std)] + +#[cfg(feature = "alloc")] +extern crate alloc; + +#[cfg(feature = "std")] +extern crate std; + +pub mod cachepadded; +pub mod drop_guard; +pub mod ptr; diff --git a/src/ptr.rs b/src/ptr.rs new file mode 100644 index 0000000..59ac4f0 --- /dev/null +++ b/src/ptr.rs @@ -0,0 +1,158 @@ +use core::{ + cmp::Ordering, + fmt, hash, + marker::Send, + num::NonZero, + ops::{Deref, DerefMut}, + ptr::NonNull, +}; + +#[repr(transparent)] +pub struct SendNonNull(NonNull); + +impl fmt::Debug for SendNonNull { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Pointer::fmt(&self.as_ptr(), f) + } +} + +impl Copy for SendNonNull {} + +impl Clone for SendNonNull { + fn clone(&self) -> Self { + *self + } +} + +impl Eq for SendNonNull {} + +impl PartialEq for SendNonNull { + fn eq(&self, other: &Self) -> bool { + self.as_ptr() == other.as_ptr() + } +} + +impl Ord for SendNonNull { + fn cmp(&self, other: &Self) -> Ordering { + self.as_ptr().cmp(&other.as_ptr()) + } +} + +impl PartialOrd for SendNonNull { + fn partial_cmp(&self, other: &Self) -> Option { + self.as_ptr().partial_cmp(&other.as_ptr()) + } +} + +impl hash::Hash for SendNonNull { + fn hash(&self, state: &mut H) { + self.as_ptr().hash(state); + } +} + +impl From> for SendNonNull { + fn from(ptr: NonNull) -> Self { + Self(ptr) + } +} + +impl From> for NonNull { + fn from(ptr: SendNonNull) -> Self { + ptr.0 + } +} + +impl From<&mut T> for SendNonNull { + fn from(ptr: &mut T) -> Self { + Self(NonNull::from(ptr)) + } +} + +impl From<&T> for SendNonNull { + fn from(ptr: &T) -> Self { + Self(NonNull::from(ptr)) + } +} + +impl fmt::Pointer for SendNonNull { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + as fmt::Pointer>::fmt(&self.0, f) + } +} + +unsafe impl Send for SendNonNull {} + +impl Deref for SendNonNull { + type Target = NonNull; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl DerefMut for SendNonNull { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.0 + } +} + +impl SendNonNull { + pub const fn new(ptr: *mut T) -> Option { + match NonNull::new(ptr) { + Some(ptr) => Some(Self(ptr)), + None => None, + } + } + + pub const fn dangling() -> Self { + Self(NonNull::dangling()) + } + + pub const fn cast(self) -> SendNonNull { + SendNonNull(self.0.cast()) + } + + pub fn with_addr(self, addr: NonZero) -> Self { + // SAFETY: addr is non-zero, so the pointer is valid. + unsafe { + Self(NonNull::new_unchecked( + self.as_ptr().with_addr(addr.get()) as *mut _ + )) + } + } + + pub fn map_addr(self, f: impl FnOnce(NonZero) -> NonZero) -> Self { + // SAFETY: addr is non-zero, so the pointer is valid. + self.with_addr(f(self.addr())) + } + + pub unsafe fn offset(self, offset: isize) -> Self { + // SAFETY: self is a valid pointer, offset is guaranteed to point to a valid memory location by the contract of `offset` + unsafe { Self(NonNull::new_unchecked(self.as_ptr().offset(offset))) } + } + + pub unsafe fn byte_offset(self, offset: isize) -> Self { + // SAFETY: self is a valid pointer, offset is guaranteed to point to a valid memory location by the contract of `offset` + unsafe { Self(NonNull::new_unchecked(self.as_ptr().byte_offset(offset))) } + } + + pub unsafe fn add(self, count: usize) -> Self { + // SAFETY: self is a valid pointer, count is guaranteed to point to a valid memory location by the contract of `add` + unsafe { Self(NonNull::new_unchecked(self.as_ptr().add(count))) } + } + + pub unsafe fn byte_add(self, count: usize) -> Self { + // SAFETY: self is a valid pointer, count is guaranteed to point to a valid memory location by the contract of `add` + unsafe { Self(NonNull::new_unchecked(self.as_ptr().byte_add(count))) } + } + + pub const fn new_const(ptr: *const T) -> Option { + Self::new(ptr.cast_mut()) + } + + /// ptr must be non-null + pub const unsafe fn new_unchecked(ptr: *mut T) -> Self { + // SAFETY: ptr must be non-null, which is guaranteed by the caller. + unsafe { Self(NonNull::new_unchecked(ptr)) } + } +}