From df6ab625ccd48e9f7d454baca2af366f1c946379 Mon Sep 17 00:00:00 2001 From: janis Date: Wed, 1 Oct 2025 14:18:37 +0200 Subject: [PATCH] a bunch of stuff init --- src/iter.rs | 39 ++++++++++ src/tree.rs | 216 +++++++++++++++++++++++++++++++++++++++------------- src/util.rs | 22 ++++++ 3 files changed, 223 insertions(+), 54 deletions(-) diff --git a/src/iter.rs b/src/iter.rs index c754f81..ae4b472 100644 --- a/src/iter.rs +++ b/src/iter.rs @@ -59,3 +59,42 @@ pub trait AdvanceWhile: Iterator + Clone { } impl AdvanceWhile for T where T: Iterator + Clone {} + +pub trait FallibleMapIter: Iterator + Clone { + /// consumes items from `self` if and only if `map` yields `Some`. + #[must_use] + fn map_iter_if(&mut self, map: F) -> Option + where + F: FnOnce(&mut Self) -> Option, + { + // clone iterator and keep around + let old = self.clone(); + match map(self) { + Some(result) => Some(result), + None => { + // the map function failed, restore iterator and yield None. + *self = old; + None + } + } + } + + #[must_use] + fn try_map_iter_if(&mut self, map: F) -> Result + where + F: FnOnce(&mut Self) -> Result, + { + // clone iterator and keep around + let old = self.clone(); + match map(self) { + Ok(result) => Ok(result), + Err(e) => { + // the map function failed, restore iterator and yield None. + *self = old; + Err(e) + } + } + } +} + +impl FallibleMapIter for T where T: Iterator + Clone {} diff --git a/src/tree.rs b/src/tree.rs index 6165e1d..ab34838 100644 --- a/src/tree.rs +++ b/src/tree.rs @@ -309,6 +309,8 @@ impl<'a, K, V> NodeRef, K, V> { // SAFETY: we have exclusive access to the entire node. unsafe { &mut (*Self::as_leaf_ptr(self)).len } } + + #[expect(dead_code)] pub(super) fn capacity_mut(&mut self) -> &mut u16 { // SAFETY: we have exclusive access to the entire node. unsafe { &mut (*Self::as_leaf_ptr(self)).len } @@ -583,11 +585,16 @@ impl<'a, K: 'a, V: 'a> NodeRef, K, V> { } impl<'a, K: 'a, V: 'a> Handle, K, V>, marker::Value> { - pub(super) unsafe fn into_value(self) -> &'a V { + pub(super) unsafe fn into_value_unchecked(self) -> &'a V { let leaf = self.node.into_leaf(); let v = leaf.value.as_ref().unwrap(); v } + pub(super) unsafe fn into_value(self) -> Option<&'a V> { + let leaf = self.node.into_leaf(); + let v = leaf.value.as_ref(); + v + } } impl NodeRef { @@ -767,7 +774,7 @@ impl NodeRef { impl Handle, marker::Edge> { /// Converts this edge handle into a value handle. /// IMPORTANT: this handle points to the value of the node, not the edge. - pub(super) fn into_value(self) -> Handle, marker::Value> { + pub(super) unsafe fn into_value(self) -> Handle, marker::Value> { unsafe { Handle::new_value(self.node) } } @@ -803,6 +810,12 @@ impl<'a, K: 'a, V: 'a> Handle, K, V>, marker::Edge> { K: Ord, { let Some(key) = key_seq.next() else { + #[cfg(feature = "std")] + std::eprintln!( + "Inserting value at leaf ({:?}, {})", + self.node.node, + self.idx + ); // key has run out: insert value here. self.node.as_leaf_mut().value = Some(val); // TODO: handle occupied values. @@ -963,14 +976,14 @@ mod search { } } -pub(crate) enum TreeOr<'a, K, V, T> { - Tree(borrow::DormantMutRef<'a, Tree>), +pub(crate) enum TreeOr { + Tree(borrow::DormantRef, BorrowType>), Other(T), } -impl<'a, K, V, T> TreeOr<'a, K, V, T> { +impl TreeOr { #[allow(dead_code)] - fn as_tree_mut(&mut self) -> &mut borrow::DormantMutRef<'a, Tree> { + fn as_tree_mut(&mut self) -> &mut borrow::DormantRef, BorrowType> { match self { TreeOr::Tree(tree) => tree, TreeOr::Other(_) => panic!("no tree present"), @@ -986,8 +999,8 @@ impl<'a, K, V, T> TreeOr<'a, K, V, T> { } } -type TreeOrHandle<'a, BorrowType, K, V, HandleType> = - TreeOr<'a, K, V, Handle, HandleType>>; +type TreeOrHandle = + TreeOr, HandleType>>; mod entry { use core::marker::PhantomData; @@ -1044,7 +1057,7 @@ mod entry { Q: Iterator, { pub(super) key: Q, - pub(super) handle: super::TreeOrHandle<'a, marker::Mut<'a>, K, V, marker::Edge>, + pub(super) handle: super::TreeOrHandle, K, V, marker::Edge>, pub(super) _marker: PhantomData<&'a mut (K, V)>, } @@ -1055,13 +1068,13 @@ mod entry { impl<'a, K, V> OccupiedEntry<'a, K, V> { pub fn get(&self) -> &V { - unsafe { self.handle.reborrow().into_value() } + unsafe { self.handle.reborrow().into_value_unchecked() } } pub fn get_mut(&mut self) -> &mut V { unsafe { self.handle.value_mut() } } - pub fn into_subtree(self) -> super::subtree::Subtree<'a, K, V, marker::Mut<'a>> { + pub fn into_subtree(self) -> super::subtree::Subtree> { Subtree::new_root(self.handle.node) } } @@ -1082,12 +1095,26 @@ mod entry { where Q: Iterator, { + /// Attempts to convert this vacant entry into an occupied entry + /// pointing to the closest existing value to the given key. + pub fn try_into_occupied(self) -> Result, Self> { + match self.handle { + TreeOrHandle::Other(handle) if handle.node.as_leaf().value.is_some() => { + Ok(OccupiedEntry { + handle: unsafe { handle.into_value() }, + _marker: PhantomData, + }) + } + _ => Err(self), + } + } + pub fn insert_entry(self, value: V) -> OccupiedEntry<'a, K, V> { let handle = match self.handle { // no root node yet TreeOrHandle::Tree(mut tree) => { // SAFETY: there are no nodes in the tree yet - let tree = unsafe { tree.reborrow() }; + let tree = unsafe { tree.reborrow_mut() }; let root = tree.root.insert(NodeRef::new()); unsafe { @@ -1110,7 +1137,11 @@ mod subtree { use either::Either; - use crate::tree::{TreeOr, TreeOrHandle, borrow::DormantMutRef, search}; + use crate::tree::{ + Handle, TreeOr, TreeOrHandle, + borrow::{DormantMutRef, DormantRef}, + search, + }; use super::{ NodeRef, OnceAndIter, @@ -1119,11 +1150,11 @@ mod subtree { }; // BorrowType may be one of `Immut`, `Mut`. - pub struct Subtree<'tree, K, V, BorrowType> { - pub(super) root: TreeOr<'tree, K, V, NodeRef>, + pub struct Subtree { + pub(super) root: TreeOr>, } - impl<'tree, K, V, BorrowType> Subtree<'tree, K, V, BorrowType> { + impl Subtree { fn root(&self) -> Option<&NodeRef> { match &self.root { TreeOr::Tree(_) => None, @@ -1137,14 +1168,14 @@ mod subtree { } } - pub(crate) fn new_empty(tree: DormantMutRef<'tree, super::Tree>) -> Self { + pub(crate) fn new_empty(tree: DormantRef, BorrowType>) -> Self { Self { root: TreeOr::Tree(tree), } } } - impl<'tree, K, V> Subtree<'tree, K, V, marker::Mut<'tree>> { + impl<'tree, K: 'tree, V: 'tree> Subtree> { /// Returns a mutable reference to the root node of this subtree. /// This function rechecks whether the tree is empty. fn root_mut<'a>( @@ -1152,7 +1183,7 @@ mod subtree { ) -> Either, K, V>, DormantMutRef<'a, super::Tree>> { let node = match &mut self.root { TreeOr::Tree(tree) => { - let (tree, borrow) = DormantMutRef::new(unsafe { tree.reborrow() }); + let (tree, borrow) = DormantRef::new_mut(unsafe { tree.reborrow_mut() }); match tree.root.as_mut() { Some(node) => node.borrow_mut().dormant(), None => return Either::Right(borrow), @@ -1171,7 +1202,7 @@ mod subtree { } } - impl<'tree, K, V, BorrowType> Subtree<'tree, K, V, BorrowType> + impl Subtree where K: Ord, { @@ -1181,7 +1212,7 @@ mod subtree { { let root = self.root()?.reborrow(); match root.search_tree(&mut key_seq) { - search::SearchResult::Found(handle) => Some(unsafe { handle.into_value() }), + search::SearchResult::Found(handle) => unsafe { handle.into_value() }, _ => { // key not found None @@ -1189,10 +1220,34 @@ mod subtree { } } - pub fn get_subtree( - &'_ self, - mut key_seq: Q, - ) -> Option>> + pub fn get_closest(&self, mut key_seq: Q) -> Option<(Option, &V)> + where + Q: Iterator, + { + let root = self.root()?.reborrow(); + + match root.search_tree(&mut key_seq) { + search::SearchResult::Found(handle) => unsafe { + handle.into_value().map(|v| (None, v)) + }, + search::SearchResult::GoDown(edge) => { + // key not found, return the closest existing value in the subtree + assert!(edge.node.as_leaf().value.is_some()); + + let handle = unsafe { edge.into_value() }; + Some((None, unsafe { handle.into_value().unwrap() })) + } + search::SearchResult::Insert(key, edge) => { + // key not found, return the closest existing value in the subtree + assert!(edge.node.as_leaf().value.is_some()); + + let handle = unsafe { edge.into_value() }; + Some((Some(key), unsafe { handle.into_value().unwrap() })) + } + } + } + + pub fn get_subtree(&'_ self, mut key_seq: Q) -> Option>> where Q: Iterator, { @@ -1207,7 +1262,7 @@ mod subtree { } } - impl<'tree, K, V> Subtree<'tree, K, V, marker::Mut<'tree>> + impl<'tree, K: 'tree, V: 'tree> Subtree> where K: Ord, { @@ -1228,7 +1283,7 @@ mod subtree { pub fn get_subtree_mut( &'_ mut self, mut key_seq: Q, - ) -> Option>> + ) -> Option>> where Q: Iterator, { @@ -1257,10 +1312,24 @@ mod subtree { }); } Either::Left(root) => match root.search_tree(&mut key_seq) { - search::SearchResult::Found(handle) => Occupied(OccupiedEntry { - handle, - _marker: PhantomData, - }), + search::SearchResult::Found(handle) => { + if handle.node.as_leaf().value.is_some() { + Occupied(OccupiedEntry { + handle, + _marker: PhantomData, + }) + } else { + Vacant(VacantEntry { + key: OnceAndIter::empty(), + // SAFETY: this is safe because key is empty + // this is never used as an edge + handle: TreeOrHandle::Other(unsafe { + Handle::new_edge(handle.node, 0) + }), + _marker: PhantomData, + }) + } + } search::SearchResult::GoDown(handle) => Vacant(VacantEntry { key: key_seq.into(), handle: TreeOrHandle::Other(handle), @@ -1282,6 +1351,8 @@ mod borrow { use core::{marker::PhantomData, ptr::NonNull}; + use crate::tree::marker; + /// Models a reborrow of some unique reference, when you know that the reborrow /// and all its descendants (i.e., all pointers and references derived from it) /// will not be used any more at some point, after which you want to use the @@ -1292,19 +1363,40 @@ mod borrow { /// the compiler to follow. A `DormantMutRef` allows you to check borrowing /// yourself, while still expressing its stacked nature, and encapsulating /// the raw pointer code needed to do this without undefined behavior. - pub(crate) struct DormantMutRef<'a, T> { + pub(crate) struct DormantRef { ptr: NonNull, - _marker: PhantomData<&'a mut T>, + _marker: PhantomData, } - unsafe impl<'a, T> Sync for DormantMutRef<'a, T> where &'a mut T: Sync {} - unsafe impl<'a, T> Send for DormantMutRef<'a, T> where &'a mut T: Send {} + pub(crate) type DormantMutRef<'a, T> = DormantRef>; + pub(crate) type DormantImmutRef<'a, T> = DormantRef>; - impl<'a, T> DormantMutRef<'a, T> { + unsafe impl<'a, T: 'a, BorrowType: 'a> Sync for DormantRef where &'a mut T: Sync {} + unsafe impl<'a, T: 'a, BorrowType: 'a> Send for DormantRef where &'a mut T: Send {} + + impl<'a, T> DormantRef> { /// Capture a unique borrow, and immediately reborrow it. For the compiler, /// the lifetime of the new reference is the same as the lifetime of the /// original reference, but you promise to use it for a shorter period. - pub(super) fn new(t: &'a mut T) -> (&'a mut T, Self) { + pub(super) fn new_immut(t: &'a T) -> (&'a T, Self) { + let ptr = NonNull::from(t); + // SAFETY: we hold the borrow throughout 'a via `_marker`, and we expose + // only this reference, so it is unique. + let new_ref = unsafe { &*ptr.as_ptr() }; + ( + new_ref, + Self { + ptr, + _marker: PhantomData, + }, + ) + } + } + impl<'a, T> DormantRef> { + /// Capture a unique borrow, and immediately reborrow it. For the compiler, + /// the lifetime of the new reference is the same as the lifetime of the + /// original reference, but you promise to use it for a shorter period. + pub(super) fn new_mut(t: &'a mut T) -> (&'a mut T, Self) { let ptr = NonNull::from(t); // SAFETY: we hold the borrow throughout 'a via `_marker`, and we expose // only this reference, so it is unique. @@ -1335,18 +1427,20 @@ mod borrow { /// /// The reborrow must have ended, i.e., the reference returned by `new` and /// all pointers and references derived from it, must not be used anymore. - pub(super) unsafe fn reborrow(&mut self) -> &'a mut T { + pub(super) unsafe fn reborrow_mut(&mut self) -> &'a mut T { // SAFETY: our own safety conditions imply this reference is again unique. unsafe { &mut *self.ptr.as_ptr() } } + } + impl<'a, T, BorrowType: 'a> DormantRef { /// Borrows a new shared reference from the unique borrow initially captured. /// /// # Safety /// /// The reborrow must have ended, i.e., the reference returned by `new` and /// all pointers and references derived from it, must not be used anymore. - pub(super) unsafe fn reborrow_shared(&self) -> &'a T { + pub(super) unsafe fn reborrow(&self) -> &'a T { // SAFETY: our own safety conditions imply this reference is again unique. unsafe { &*self.ptr.as_ptr() } } @@ -1439,7 +1533,7 @@ where I: Iterator, { once: Option, - iter: I, + iter: Option, } impl Iterator for OnceAndIter @@ -1452,7 +1546,7 @@ where if let Some(once) = self.once.take() { Some(once) } else { - self.iter.next() + self.iter.as_mut()?.next() } } } @@ -1462,7 +1556,10 @@ where I: Iterator, { fn from(iter: I) -> Self { - Self { once: None, iter } + Self { + once: None, + iter: Some(iter), + } } } @@ -1473,7 +1570,14 @@ where pub fn once(once: T, iter: I) -> Self { Self { once: Some(once), - iter, + iter: Some(iter), + } + } + + pub fn empty() -> Self { + Self { + once: None, + iter: None, } } } @@ -1482,27 +1586,24 @@ impl Tree where K: Ord, { - pub fn as_subtree_mut<'a>(&'a mut self) -> subtree::Subtree<'a, K, V, marker::Mut<'a>> { + pub fn as_subtree_mut<'a>(&'a mut self) -> subtree::Subtree> { match self.root.as_mut() { Some(node) => { let dormant = node.borrow_mut().dormant(); Subtree::new_root(unsafe { dormant.awaken() }) } None => { - let (_, dormant) = borrow::DormantMutRef::new(self); + let (_, dormant) = borrow::DormantMutRef::new_mut(self); Subtree::new_empty(dormant) } } } - pub fn as_subtree<'a>(&'a self) -> subtree::Subtree<'a, K, V, marker::Immut<'a>> { - match self.root.as_mut() { - Some(node) => { - let dormant = node.borrow_mut().dormant(); - Subtree::new_root(unsafe { dormant.awaken().reborrow() }) - } + pub fn as_subtree<'a>(&'a self) -> subtree::Subtree> { + match self.root.as_ref() { + Some(node) => Subtree::new_root(node.reborrow()), None => { - let (_, dormant) = borrow::DormantMutRef::new(self); + let (_, dormant) = borrow::DormantRef::new_immut(self); Subtree::new_empty(dormant) } } @@ -1799,13 +1900,20 @@ mod tests { #[test] fn drop_tree() { - let tree = build_tree(); + let _ = build_tree(); } #[test] fn entry() { let tree = build_tree(); - assert_eq!(tree.as_subtree_mut().get("asdf".chars()), Some(&1)); + assert_eq!(tree.as_subtree().get("asdf".chars()), Some(&1)); + assert_eq!(tree.as_subtree().get("asd".chars()), Some(&2)); + assert_eq!(tree.as_subtree().get("asdg".chars()), Some(&3)); + assert_eq!(tree.as_subtree().get("asda".chars()), Some(&4)); + assert_eq!( + tree.as_subtree().get_closest("asdaf".chars()), + Some((Some('f'), &4)) + ); } } diff --git a/src/util.rs b/src/util.rs index f1ef7a0..ab5e2c1 100644 --- a/src/util.rs +++ b/src/util.rs @@ -81,3 +81,25 @@ pub fn is_whitespace(c: char) -> bool { | '\u{2029}' // PARAGRAPH SEPARATOR ) } + +pub fn hash_f32(state: &mut H, value: &f32) { + use core::hash::Hash; + if value.is_nan() { + f32::NAN.to_bits().hash(state); + } else if *value == 0.0 { + 0u32.hash(state); + } else { + value.to_bits().hash(state); + } +} + +pub fn hash_f64(state: &mut H, value: &f64) { + use core::hash::Hash; + if value.is_nan() { + f64::NAN.to_bits().hash(state); + } else if *value == 0.0 { + 0u64.hash(state); + } else { + value.to_bits().hash(state); + } +}