//! A Prefix-Tree (Trie) implementation in Rust. //! This Trie is structured with key-value pairs at the same level. // tree: // root: { kvs: [('f', v0), ('g', v1), ('i', _)], edges: [child0, None, child1] } // child0: { kvs: [('a', v2), ('b', v3)], edges: [] } // child1: { kvs: [('a', v4)], edges: [] } // tree.find() -> () // tree.find('f') -> v0 // tree.find('g') -> v1 // tree.find('fa') -> v2 // tree.find('fb') -> v3 // tree.find('ia') -> v4 // tree.find('i') -> None use alloc::boxed::Box; use core::{ marker::PhantomData, mem::{ManuallyDrop, MaybeUninit}, ptr::NonNull, }; use crate::tree::subtree::Subtree; mod marker { #![allow(dead_code)] use core::marker::PhantomData; pub trait NodeType {} #[derive(Debug)] pub struct Internal; #[derive(Debug)] pub struct Leaf; #[derive(Debug)] pub struct LeafOrInternal; impl NodeType for Internal {} impl NodeType for Leaf {} impl NodeType for LeafOrInternal {} pub trait HandleType {} #[derive(Debug)] pub struct Edge; #[derive(Debug)] pub struct Value; impl HandleType for Edge {} impl HandleType for Value {} #[derive(Debug)] pub struct Owned; #[derive(Debug)] pub struct Mut<'a>(PhantomData<&'a mut ()>); #[derive(Debug)] pub struct ValMut<'a>(PhantomData<&'a mut ()>); #[derive(Debug)] pub struct Immut<'a>(PhantomData<&'a ()>); #[derive(Debug)] pub struct Dying; #[derive(Debug)] pub struct DormantMut; pub trait BorrowType { const TRAVERSAL_PERMIT: bool = true; } impl BorrowType for Owned { const TRAVERSAL_PERMIT: bool = false; } impl<'a> BorrowType for Mut<'a> {} impl<'a> BorrowType for ValMut<'a> {} impl<'a> BorrowType for Immut<'a> {} impl BorrowType for Dying {} impl BorrowType for DormantMut {} } #[derive(Debug)] #[repr(C)] pub(crate) struct LeafNode { parent: Option>, parent_idx: MaybeUninit, value: Option, len: u16, capacity: u16, keys: NonNull>, edges: NonNull>>, } type BoxedNode = NonNull>; #[derive(Debug)] pub(crate) struct NodeRef { node: NonNull>, _marker: PhantomData, } #[derive(Debug)] pub(crate) struct Handle { node: Node, idx: usize, _marker: PhantomData, } impl Copy for Handle {} impl Clone for Handle { fn clone(&self) -> Self { *self } } impl<'a, K: 'a, V: 'a> Copy for NodeRef, K, V> {} impl<'a, K: 'a, V: 'a> Clone for NodeRef, K, V> { fn clone(&self) -> Self { *self } } unsafe impl Sync for NodeRef {} unsafe impl Send for NodeRef, K, V> {} unsafe impl Send for NodeRef, K, V> {} unsafe impl Send for NodeRef, K, V> {} unsafe impl Send for NodeRef {} unsafe impl Send for NodeRef {} impl Handle, marker::Edge> { unsafe fn new_edge(node: NodeRef, idx: usize) -> Self { Self { node, idx, _marker: PhantomData, } } } impl Handle, marker::Value> { unsafe fn new_value(node: NodeRef) -> Self { Self { node, idx: 0, _marker: PhantomData, } } } impl LeafNode { unsafe fn init(this: *mut Self) { unsafe { (&raw mut (*this).parent).write(None); // parent_idx may be left uninitialized (&raw mut (*this).value).write(None); (&raw mut (*this).len).write(0); (&raw mut (*this).capacity).write(0); // keys and edges are dangling pointers (&raw mut (*this).keys).write(NonNull::dangling()); (&raw mut (*this).edges).write(NonNull::dangling()); } } fn new() -> Box { let mut this = Box::new_uninit(); unsafe { Self::init(this.as_mut_ptr()); this.assume_init() } } } impl NodeRef { fn new() -> Self { let node = LeafNode::new(); NodeRef { node: Box::into_non_null(node), _marker: PhantomData, } } } impl NodeRef { /// Finds the parent of the current node. Returns `Ok(handle)` if the current /// node actually has a parent, where `handle` points to the edge of the parent /// that points to the current node. Returns `Err(self)` if the current node has /// no parent, giving back the original `NodeRef`. /// /// The method name assumes you picture trees with the root node on top. /// /// `edge.descend().ascend().unwrap()` and `node.ascend().unwrap().descend()` should /// both, upon success, do nothing. #[allow(dead_code)] pub(super) fn ascend(self) -> Result, marker::Edge>, Self> { const { assert!(BorrowType::TRAVERSAL_PERMIT); } // We need to use raw pointers to nodes because, if BorrowType is marker::ValMut, // there might be outstanding mutable references to values that we must not invalidate. let leaf_ptr = Self::as_leaf_ptr(&self).cast_const(); unsafe { (*leaf_ptr).parent } .map(|parent| Handle { node: NodeRef { node: parent, _marker: PhantomData, }, idx: unsafe { usize::from((*leaf_ptr).parent_idx.assume_init()) }, _marker: PhantomData, }) .ok_or(self) } pub(crate) fn first_edge(self) -> Handle, marker::Edge> { unsafe { Handle::new_edge(self, 0) } } pub(crate) fn last_edge(self) -> Handle, marker::Edge> { let len = self.len(); assert!(len > 0); unsafe { Handle::new_edge(self, len) } } } impl Handle, marker::Edge> { /// Finds the node pointed to by this edge. /// /// The method name assumes you picture trees with the root node on top. /// /// `edge.descend().ascend().unwrap()` and `node.ascend().unwrap().descend()` should /// both, upon success, do nothing. pub(super) fn descend(self) -> NodeRef { const { assert!(BorrowType::TRAVERSAL_PERMIT); } // We need to use raw pointers to nodes because, if BorrowType is // marker::ValMut, there might be outstanding mutable references to // values that we must not invalidate. There's no worry accessing the // height field because that value is copied. Beware that, once the // node pointer is dereferenced, we access the edges array with a // reference (Rust issue #73987) and invalidate any other references // to or inside the array, should any be around. let node = unsafe { // in this case, no references to the node are created. self.node.edge_area(self.idx).assume_init_read() }; NodeRef { node, _marker: PhantomData, } } } impl NodeRef { #[allow(dead_code)] pub(super) fn deallocate_and_ascend( self, ) -> Option, marker::Edge>> { let node = self.node; let parent = self.ascend().ok(); unsafe { match (*node.as_ptr()).capacity as usize { 0 => {} capacity => { let keys = (*node.as_ptr()).keys; let edges = (*node.as_ptr()).edges; _ = Box::from_non_null(NonNull::slice_from_raw_parts(keys, capacity)); _ = Box::from_non_null(NonNull::slice_from_raw_parts(edges, capacity)); } } _ = Box::from_non_null(node); } parent } } impl<'a, K, V, HandleType> Handle, K, V>, HandleType> { /// Temporarily takes out another mutable handle on the same location. Beware, as /// this method is very dangerous, doubly so since it might not immediately appear /// dangerous. /// /// For details, see `NodeRef::reborrow_mut`. #[allow(dead_code)] pub(super) unsafe fn reborrow_mut( &mut self, ) -> Handle, K, V>, HandleType> { // We can't use Handle::new_kv or Handle::new_edge because we don't know our type Handle { node: unsafe { self.node.reborrow_mut() }, idx: self.idx, _marker: PhantomData, } } /// Returns a dormant copy of this handle which can be reawakened later. /// /// See `DormantMutRef` for more details. pub(super) fn dormant(&self) -> Handle, HandleType> { Handle { node: self.node.dormant(), idx: self.idx, _marker: PhantomData, } } } impl<'a, K, V> NodeRef, K, V> { pub(super) fn len_mut(&mut self) -> &mut u16 { // SAFETY: we have exclusive access to the entire node. unsafe { &mut (*Self::as_leaf_ptr(self)).len } } #[expect(dead_code)] pub(super) fn capacity_mut(&mut self) -> &mut u16 { // SAFETY: we have exclusive access to the entire node. unsafe { &mut (*Self::as_leaf_ptr(self)).len } } } impl NodeRef { /// Mutably borrows the owned root node. Unlike `reborrow_mut`, this is safe /// because the return value cannot be used to destroy the root, and there /// cannot be other references to the tree. pub(super) fn borrow_mut(&mut self) -> NodeRef, K, V> { NodeRef { node: self.node, _marker: PhantomData, } } /// Slightly mutably borrows the owned root node. #[allow(dead_code)] pub(super) fn borrow_valmut(&mut self) -> NodeRef, K, V> { NodeRef { node: self.node, _marker: PhantomData, } } /// Irreversibly transitions to a reference that permits traversal and offers /// destructive methods and little else. #[allow(dead_code)] pub(super) fn into_dying(self) -> NodeRef { NodeRef { node: self.node, _marker: PhantomData, } } } impl<'a, K, V> NodeRef, K, V> { /// Temporarily takes out another mutable reference to the same node. Beware, as /// this method is very dangerous, doubly so since it might not immediately appear /// dangerous. /// /// Because mutable pointers can roam anywhere around the tree, the returned /// pointer can easily be used to make the original pointer dangling, out of /// bounds, or invalid under stacked borrow rules. // FIXME(@gereeter) consider adding yet another type parameter to `NodeRef` // that restricts the use of navigation methods on reborrowed pointers, // preventing this unsafety. unsafe fn reborrow_mut(&mut self) -> NodeRef, K, V> { NodeRef { node: self.node, _marker: PhantomData, } } /// Borrows exclusive access to the leaf portion of a leaf or internal node. fn as_leaf_mut(&mut self) -> &mut LeafNode { let ptr = Self::as_leaf_ptr(self); // SAFETY: we have exclusive access to the entire node. unsafe { &mut *ptr } } /// Offers exclusive access to the leaf portion of a leaf or internal node. fn into_leaf_mut(mut self) -> &'a mut LeafNode { let ptr = Self::as_leaf_ptr(&mut self); // SAFETY: we have exclusive access to the entire node. unsafe { &mut *ptr } } /// Returns a dormant copy of this node with its lifetime erased which can /// be reawakened later. pub(super) fn dormant(&self) -> NodeRef { NodeRef { node: self.node, _marker: PhantomData, } } } impl<'a, K: 'a, V: 'a, BorrowType> NodeRef { /// Borrows shared access to an element of the key storage area. /// /// # Safety /// `index` is in bounds of 0..CAPACITY unsafe fn key_area(&self, index: I) -> &Output where I: core::slice::SliceIndex<[MaybeUninit], Output = Output>, { // SAFETY: the caller will not be able to call further methods on self // until the key slice reference is dropped, as we have unique access // for the lifetime of the borrow. unsafe { let keys = (*self.node.as_ptr()).keys; let capacity = (*self.node.as_ptr()).capacity as usize; core::slice::from_raw_parts(keys.as_ptr(), capacity).get_unchecked(index) } } /// Borrows shared access to an element or slice of the node's value storage area. /// /// # Safety /// `index` is in bounds of 0..node.capacity unsafe fn edge_area(&self, index: I) -> &Output where I: core::slice::SliceIndex<[MaybeUninit>], Output = Output>, { // SAFETY: the caller will not be able to call further methods on self // until the value slice reference is dropped, as we have unique access // for the lifetime of the borrow. unsafe { let edges = (*self.node.as_ptr()).edges; let capacity = (*self.node.as_ptr()).capacity as usize; core::slice::from_raw_parts(edges.as_ptr(), capacity).get_unchecked(index) } } } impl<'a, K: 'a, V: 'a> NodeRef, K, V> { /// Borrows exclusive access to an element of the key storage area. /// /// # Safety /// `index` is in bounds of 0..CAPACITY unsafe fn key_area_mut(&mut self, index: I) -> &mut Output where I: core::slice::SliceIndex<[MaybeUninit], Output = Output>, { // SAFETY: the caller will not be able to call further methods on self // until the key slice reference is dropped, as we have unique access // for the lifetime of the borrow. unsafe { let leaf = self.as_leaf_mut(); core::slice::from_raw_parts_mut(leaf.keys.as_ptr(), leaf.capacity as usize) .get_unchecked_mut(index) } } /// Borrows exclusive access to an element or slice of the node's value storage area. /// /// # Safety /// `index` is in bounds of 0..node.capacity unsafe fn edge_area_mut(&mut self, index: I) -> &mut Output where I: core::slice::SliceIndex<[MaybeUninit>], Output = Output>, { // SAFETY: the caller will not be able to call further methods on self // until the value slice reference is dropped, as we have unique access // for the lifetime of the borrow. unsafe { let leaf = self.as_leaf_mut(); core::slice::from_raw_parts_mut(leaf.edges.as_ptr(), leaf.capacity as usize) .get_unchecked_mut(index) } } } impl NodeRef { /// Revert to the unique borrow initially captured. /// /// # Safety /// /// The reborrow must have ended, i.e., the reference returned by `new` and /// all pointers and references derived from it, must not be used anymore. pub(super) unsafe fn awaken<'a>(self) -> NodeRef, K, V> { NodeRef { node: self.node, _marker: PhantomData, } } } impl Handle, HandleType> { /// Revert to the unique borrow initially captured. /// /// # Safety /// /// The reborrow must have ended, i.e., the reference returned by `new` and /// all pointers and references derived from it, must not be used anymore. pub(super) unsafe fn awaken<'a>(self) -> Handle, K, V>, HandleType> { Handle { node: unsafe { self.node.awaken() }, idx: self.idx, _marker: PhantomData, } } } impl NodeRef { /// Finds the length of the node. This is the number of keys or values. /// The number of edges is `len() + 1`. /// Note that, despite being safe, calling this function can have the side effect /// of invalidating mutable references that unsafe code has created. pub(super) fn len(&self) -> usize { // Crucially, we only access the `len` field here. If BorrowType is marker::ValMut, // there might be outstanding mutable references to values that we must not invalidate. unsafe { usize::from((*Self::as_leaf_ptr(self)).len) } } pub(super) fn capacity(&self) -> usize { unsafe { usize::from((*Self::as_leaf_ptr(self)).capacity) } } } impl NodeRef { pub(super) fn is_leaf(&self) -> bool { unsafe { (*Self::as_leaf_ptr(self)).len == 0 } } pub(crate) fn has_descendants(&self) -> bool { self.as_leaf().len > 0 } /// Temporarily takes out another, immutable reference to the same node. pub(super) fn reborrow(&self) -> NodeRef, K, V> { NodeRef { node: self.node, _marker: PhantomData, } } /// Exposes the leaf portion of any leaf or internal node. /// /// Returns a raw ptr to avoid invalidating other references to this node. fn as_leaf_ptr(this: &Self) -> *mut LeafNode { // The node must be valid for at least the LeafNode portion. // This is not a reference in the NodeRef type because we don't know if // it should be unique or shared. this.node.as_ptr() } #[allow(dead_code)] fn as_leaf(&self) -> &LeafNode { // SAFETY: the static node type is `Leaf`. unsafe { &*Self::as_leaf_ptr(self) } } fn as_leaf_non_null(this: &Self) -> NonNull> { // SAFETY: the static node type is `Leaf`. unsafe { NonNull::new_unchecked(Self::as_leaf_ptr(this)) } } } impl<'a, K: 'a, V: 'a> NodeRef, K, V> { /// Borrows a view into the keys stored in the node. pub(super) fn keys(&self) -> &[K] { unsafe { self.key_area(..self.len()).assume_init_ref() } } pub(super) fn edges(&self) -> &[BoxedNode] { unsafe { self.edge_area(..self.len()).assume_init_ref() } } } impl Handle, HandleType> { /// Temporarily takes out another immutable handle on the same location. pub(super) fn reborrow(&self) -> Handle, K, V>, HandleType> { // We can't use Handle::new_kv or Handle::new_edge because we don't know our type Handle { node: self.node.reborrow(), idx: self.idx, _marker: PhantomData, } } } impl<'a, K: 'a, V: 'a> NodeRef, K, V> { /// Exposes the leaf portion of any leaf or internal node in an immutable tree. fn into_leaf(self) -> &'a LeafNode { let ptr = Self::as_leaf_ptr(&self); // SAFETY: there can be no mutable references into this tree borrowed as `Immut`. unsafe { &*ptr } } } impl<'a, K: 'a, V: 'a> Handle, K, V>, marker::Value> { pub(super) unsafe fn into_value_unchecked(self) -> &'a V { let leaf = self.node.into_leaf(); let v = leaf.value.as_ref().unwrap(); v } pub(super) unsafe fn into_value(self) -> Option<&'a V> { let leaf = self.node.into_leaf(); let v = leaf.value.as_ref(); v } } impl NodeRef { #[allow(dead_code)] /// Borrows exclusive access to the leaf portion of a dying leaf or internal node. fn as_leaf_dying(&mut self) -> &mut LeafNode { let ptr = Self::as_leaf_ptr(self); // SAFETY: we have exclusive access to the entire node. unsafe { &mut *ptr } } } impl NodeRef { #[allow(dead_code)] pub(super) unsafe fn into_value(mut self) -> Option { let leaf = self.as_leaf_dying(); leaf.value.take() } } impl<'a, K: 'a, V: 'a> Handle, K, V>, marker::Value> { pub(super) unsafe fn value_mut(&mut self) -> &mut V { let leaf = self.node.as_leaf_mut(); let v = leaf.value.as_mut().unwrap(); v } pub fn into_value_mut(self) -> &'a mut V { let leaf = self.node.into_leaf_mut(); leaf.value.as_mut().unwrap() } } pub(super) enum ForceResult { Leaf(Node), Internal(Node), } #[allow(dead_code)] impl Handle, HandleType> { fn force(self) -> ForceResult, HandleType>> { match self.node.force() { ForceResult::Leaf(leaf) => ForceResult::Leaf(Handle { node: leaf, idx: self.idx, _marker: PhantomData, }), ForceResult::Internal(internal) => ForceResult::Internal(Handle { node: internal, idx: self.idx, _marker: PhantomData, }), } } } impl NodeRef { fn force(self) -> ForceResult> { if self.is_leaf() { ForceResult::Leaf(self) } else { ForceResult::Internal(self) } } } impl NodeRef { fn first_leaf_edge(self) -> Handle, marker::Edge> { let mut node = self; loop { match node.force() { ForceResult::Leaf(leaf) => break leaf.first_edge(), ForceResult::Internal(internal) => node = internal.first_edge().descend(), } } } fn last_leaf_edge(self) -> Handle, marker::Edge> { let mut node = self; loop { match node.force() { ForceResult::Leaf(leaf) => break leaf.last_edge(), ForceResult::Internal(internal) => node = internal.last_edge().descend(), } } } } impl<'a, K, V> NodeRef, K, V> { pub(super) fn grow_node(&mut self) { // grow the node let capacity = self.capacity(); let Some(new_capacity) = capacity.checked_mul(2) else { panic!("Node capacity overflow"); }; // Ensure the new capacity is at least 16. // capacity starts off at 0, so the first time it will grow to 16. let new_capacity = new_capacity.max(16) as usize; // Allocate new keys and edges. let new_keys = Box::into_non_null(Box::new_uninit_slice(new_capacity)); let new_edges = Box::into_non_null(Box::new_uninit_slice(new_capacity)); let leaf = self.as_leaf_mut(); let old_keys = core::mem::replace(&mut leaf.keys, new_keys.as_non_null_ptr()); let old_edges = core::mem::replace(&mut leaf.edges, new_edges.as_non_null_ptr()); // we don't want to copy and deallocate the old keys & edges if the // capacity was 0, because then the old pointers are dangling. if leaf.capacity > 0 { unsafe { let len = leaf.len as usize; core::ptr::copy_nonoverlapping(old_keys.as_ptr(), new_keys.as_mut_ptr(), len); core::ptr::copy_nonoverlapping(old_edges.as_ptr(), new_edges.as_mut_ptr(), len); // dealloc old keys and edges. // This doesn't drop because the keys and edges were moved. _ = Box::<[_]>::from_non_null(NonNull::slice_from_raw_parts(old_keys, capacity)); _ = Box::<[_]>::from_non_null(NonNull::slice_from_raw_parts(old_edges, capacity)); } } // SAFETY: new_capacity fits in a u16. leaf.capacity = new_capacity as u16; } } impl NodeRef { pub(super) fn reparent<'a>( mut self, mut parent: Handle, K, V>, marker::Edge>, key: K, ) -> NodeRef, K, V> where K: 'a, V: 'a, { self.borrow_mut().as_leaf_mut().parent = Some(NodeRef::as_leaf_non_null(&parent.node)); self.borrow_mut() .as_leaf_mut() .parent_idx .write(parent.idx as u16); let old_capacity = parent.node.capacity(); let old_len = parent.node.len(); let new_len = old_len + 1; if new_len > old_capacity { parent.node.grow_node(); } // insert new key and child node. // SAFETY: we just grew the allocations. unsafe { slice_insert(parent.node.key_area_mut(..new_len), parent.idx, key); slice_insert(parent.node.edge_area_mut(..new_len), parent.idx, self.node); } *parent.node.len_mut() = new_len as u16; // adjust parent indices of siblings to the right. for i in (parent.idx + 1)..new_len { unsafe { let sibling = parent.node.edge_area_mut(i).assume_init_read(); (&raw mut (*sibling.as_ptr()).parent_idx).write(MaybeUninit::new(i as u16)); } } unsafe { self.borrow_mut().dormant().awaken() } } } #[allow(dead_code)] impl Handle, marker::Edge> { /// Converts this edge handle into a value handle. /// IMPORTANT: this handle points to the value of the node, not the edge. pub(super) unsafe fn into_value(self) -> Handle, marker::Value> { unsafe { Handle::new_value(self.node) } } pub(crate) fn right_edge( self, ) -> Result, marker::Edge>, Self> { let len = self.node.len(); if self.idx + 1 < len { Ok(unsafe { Handle::new_edge(self.node, self.idx + 1) }) } else { Err(self) } } pub(crate) fn left_edge(self) -> Result, marker::Edge>, Self> { if self.idx > 0 { Ok(unsafe { Handle::new_edge(self.node, self.idx - 1) }) } else { Err(self) } } } impl<'a, K: 'a, V: 'a> Handle, K, V>, marker::Edge> { /// Inserts a key-value pair into pub(super) unsafe fn insert_recursing( mut self, mut key_seq: Q, val: V, ) -> Handle, K, V>, marker::Value> where Q: Iterator, K: Ord, { let Some(key) = key_seq.next() else { #[cfg(feature = "std")] std::eprintln!( "Inserting value at leaf ({:?}, {})", self.node.node, self.idx ); // key has run out: insert value here. self.node.as_leaf_mut().value = Some(val); // TODO: handle occupied values. return unsafe { Handle::new_value(self.node) }; }; let last = unsafe { let child = NodeRef::new().reparent(self, key); Handle::new_edge(child, 0) .insert_recursing(key_seq, val) .dormant() }; unsafe { last.awaken() } } } // search: mod search { use super::{ForceResult, Handle, NodeRef, marker}; use core::borrow::Borrow; use core::cmp::Ordering; pub(super) enum SearchResult { /// The node which contains the value for the key. Found(Handle, marker::Value>), /// The key was found, and the search should continue at the given edge. GoDown(Handle, marker::Edge>), /// The key was not found, and should be inserted at the given position. Insert(K, Handle, marker::Edge>), } pub(super) enum IndexResult { Edge(usize), Insert(usize), } impl NodeRef { pub(super) fn search_tree(mut self, mut key: Q) -> SearchResult where Q: Iterator, K: Ord, { use SearchResult::*; loop { self = match self.search_node(&mut key) { Found(handle) => { return Found(handle); } GoDown(handle) => handle.descend(), Insert(key, handle) => return Insert(key, handle), } } } } impl NodeRef { fn search_node(self, mut key: Q) -> SearchResult where Q: Iterator, K: Ord, { use SearchResult::*; let Some(key) = key.next() else { // key has run out, a value is either occupying this // node, or belongs here. return SearchResult::Found(unsafe { Handle::new_value(self) }); }; match self.force() { // self is a leaf node and doesn't contain any keys: // a new leaf node should be inserted at this point. ForceResult::Leaf(leaf) => { SearchResult::Insert(key, unsafe { Handle::new_edge(leaf, 0) }) } ForceResult::Internal(internal) => { // search through the keys of the internal node match unsafe { internal.find_key_index(&key, 0) } { IndexResult::Insert(idx) => Insert(key, unsafe { // the key wasn't present, but should be inserted at `idx`. Handle::new_edge(internal, idx) }), IndexResult::Edge(idx) => { // the key was found, continue searching down the edge GoDown(unsafe { Handle::new_edge(internal, idx) }) } } } } } } impl NodeRef { /// # Safety /// `start_index` must be a valid edge index for the node. unsafe fn find_key_index(&self, key: &Q, start_index: usize) -> IndexResult where Q: Ord, K: Borrow, { let node = self.reborrow(); let keys = node.keys(); debug_assert!(start_index <= keys.len()); for (offset, k) in unsafe { keys.get_unchecked(start_index..) } .iter() .enumerate() { match key.cmp(k.borrow()) { Ordering::Greater => {} Ordering::Equal => { return IndexResult::Edge(start_index + offset); } Ordering::Less => { return IndexResult::Insert(start_index + offset); } } } IndexResult::Insert(keys.len()) } } #[cfg(test)] mod tests { use super::super::Tree; fn insert_and_dbg<'a>( tree: &'a mut Tree, key: &'a str, value: &'static str, ) { let entry = tree.entry(key.chars()); std::dbg!(&entry); let entry = entry.or_insert(value); std::dbg!(&entry); } #[test] fn asdf() { let mut tree = Tree::new(); insert_and_dbg(&mut tree, "+", "Plus"); insert_and_dbg(&mut tree, "++", "PlusPlus"); insert_and_dbg(&mut tree, "+=", "PlusEqual"); insert_and_dbg(&mut tree, "++-", "PlusPlusMinus"); std::eprintln!("tree: {:?}", &tree); assert_eq!( tree.entry("++".chars()).or_insert("asdf").get(), &"PlusPlus" ); std::dbg!(tree.entry("+".chars())); assert_eq!(tree.entry("+".chars()).or_insert("asdf").get(), &"Plus"); } } } pub(crate) enum TreeOr { Tree(borrow::DormantRef, BorrowType>), Other(T), } impl TreeOr { #[allow(dead_code)] fn as_tree_mut(&mut self) -> &mut borrow::DormantRef, BorrowType> { match self { TreeOr::Tree(tree) => tree, TreeOr::Other(_) => panic!("no tree present"), } } #[allow(dead_code)] fn as_other_mut(&mut self) -> &mut T { match self { TreeOr::Tree(_) => panic!("no other present"), TreeOr::Other(other) => other, } } } type TreeOrHandle = TreeOr, HandleType>>; mod entry { use core::marker::PhantomData; use crate::tree::{TreeOrHandle, subtree::Subtree}; use super::{Handle, NodeRef, marker}; pub enum Entry<'a, Q: 'a, K: 'a, V: 'a> where Q: Iterator, { Vacant(VacantEntry<'a, Q, K, V>), Occupied(OccupiedEntry<'a, K, V>), } use core::fmt::Debug; impl Debug for Entry<'_, Q, K, V> where Q: Iterator, { fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { match self { Entry::Vacant(vacant) => write!(f, "Vacant({:?})", vacant), Entry::Occupied(occupied) => write!(f, "Occupied({:?})", occupied), } } } impl Debug for VacantEntry<'_, Q, K, V> where Q: Iterator, { fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { f.debug_struct("VacantEntry") .field("key", &self.key) // .field("handle", &self.handle) .finish() } } impl Debug for OccupiedEntry<'_, K, V> { fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { f.debug_struct("OccupiedEntry") .field("value", &self.get()) .field("handle", &self.handle) .finish() } } pub struct VacantEntry<'a, Q, K, V> where Q: Iterator, { pub(super) key: Q, pub(super) handle: super::TreeOrHandle, K, V, marker::Edge>, pub(super) _marker: PhantomData<&'a mut (K, V)>, } pub struct OccupiedEntry<'a, K, V> { pub(super) handle: Handle, K, V>, marker::Value>, pub(super) _marker: PhantomData<&'a mut (K, V)>, } impl<'a, K, V> OccupiedEntry<'a, K, V> { pub fn get(&self) -> &V { unsafe { self.handle.reborrow().into_value_unchecked() } } pub fn get_mut(&mut self) -> &mut V { unsafe { self.handle.value_mut() } } pub fn into_subtree(self) -> super::subtree::Subtree> { Subtree::new_root(self.handle.node) } } impl<'a, Q, K: Ord, V> Entry<'a, Q, K, V> where Q: Iterator, { pub fn or_insert(self, value: V) -> OccupiedEntry<'a, K, V> { match self { Entry::Vacant(vacant) => vacant.insert_entry(value), Entry::Occupied(occupied) => occupied, } } } impl<'a, Q, K: Ord, V> VacantEntry<'a, Q, K, V> where Q: Iterator, { /// Attempts to convert this vacant entry into an occupied entry /// pointing to the closest existing value to the given key. pub fn try_into_occupied(self) -> Result, Self> { match self.handle { TreeOrHandle::Other(handle) if handle.node.as_leaf().value.is_some() => { Ok(OccupiedEntry { handle: unsafe { handle.into_value() }, _marker: PhantomData, }) } _ => Err(self), } } pub fn insert_entry(self, value: V) -> OccupiedEntry<'a, K, V> { let handle = match self.handle { // no root node yet TreeOrHandle::Tree(mut tree) => { // SAFETY: there are no nodes in the tree yet let tree = unsafe { tree.reborrow_mut() }; let root = tree.root.insert(NodeRef::new()); unsafe { Handle::new_edge(root.borrow_mut(), 0).insert_recursing(self.key, value) } } TreeOrHandle::Other(handle) => unsafe { handle.insert_recursing(self.key, value) }, }; OccupiedEntry { handle, _marker: PhantomData, } } } } mod subtree { use core::marker::PhantomData; use either::Either; use crate::tree::{ Handle, TreeOr, TreeOrHandle, borrow::{DormantMutRef, DormantRef}, search, }; use super::{ NodeRef, OnceAndIter, entry::{Entry, OccupiedEntry, VacantEntry}, marker, }; // BorrowType may be one of `Immut`, `Mut`. pub struct Subtree { pub(super) root: TreeOr>, } impl Subtree { fn root(&self) -> Option<&NodeRef> { match &self.root { TreeOr::Tree(_) => None, TreeOr::Other(node) => Some(node), } } pub(crate) fn new_root(node: NodeRef) -> Self { Self { root: TreeOr::Other(node), } } pub(crate) fn new_empty(tree: DormantRef, BorrowType>) -> Self { Self { root: TreeOr::Tree(tree), } } } impl<'tree, K: 'tree, V: 'tree> Subtree> { /// Returns a mutable reference to the root node of this subtree. /// This function rechecks whether the tree is empty. fn root_mut<'a>( &'a mut self, ) -> Either, K, V>, DormantMutRef<'a, super::Tree>> { let node = match &mut self.root { TreeOr::Tree(tree) => { let (tree, borrow) = DormantRef::new_mut(unsafe { tree.reborrow_mut() }); match tree.root.as_mut() { Some(node) => node.borrow_mut().dormant(), None => return Either::Right(borrow), } } TreeOr::Other(node) => node.dormant(), }; self.root = TreeOr::Other(unsafe { node.awaken() }); let TreeOr::Other(node) = &mut self.root else { unreachable!() }; return Either::Left(unsafe { node.reborrow_mut() }); } } impl Subtree where K: Ord, { pub fn get(&self, mut key_seq: Q) -> Option<&V> where Q: Iterator, { let root = self.root()?.reborrow(); match root.search_tree(&mut key_seq) { search::SearchResult::Found(handle) => unsafe { handle.into_value() }, _ => { // key not found None } } } pub fn get_closest(&self, mut key_seq: Q) -> Option<(Option, &V)> where Q: Iterator, { let root = self.root()?.reborrow(); match root.search_tree(&mut key_seq) { search::SearchResult::Found(handle) => unsafe { handle.into_value().map(|v| (None, v)) }, search::SearchResult::GoDown(edge) => { // key not found, return the closest existing value in the subtree assert!(edge.node.as_leaf().value.is_some()); let handle = unsafe { edge.into_value() }; Some((None, unsafe { handle.into_value().unwrap() })) } search::SearchResult::Insert(key, edge) => { // key not found, return the closest existing value in the subtree assert!(edge.node.as_leaf().value.is_some()); let handle = unsafe { edge.into_value() }; Some((Some(key), unsafe { handle.into_value().unwrap() })) } } } pub fn get_subtree(&'_ self, mut key_seq: Q) -> Option>> where Q: Iterator, { let root = self.root()?.reborrow(); match root.search_tree(&mut key_seq) { search::SearchResult::Found(handle) => Some(Subtree::new_root(handle.node)), _ => { // key not found None } } } } impl<'tree, K: 'tree, V: 'tree> Subtree> where K: Ord, { pub fn get_mut(&mut self, mut key_seq: Q) -> Option<&mut V> where Q: Iterator, { let root = self.root_mut().left()?; match root.search_tree(&mut key_seq) { search::SearchResult::Found(handle) => Some(handle.into_value_mut()), _ => { // key not found None } } } pub fn get_subtree_mut( &'_ mut self, mut key_seq: Q, ) -> Option>> where Q: Iterator, { let root = self.root_mut().left()?; match root.search_tree(&mut key_seq) { search::SearchResult::Found(handle) => Some(Subtree::new_root(handle.node)), _ => { // key not found None } } } pub fn entry(&'_ mut self, mut key_seq: Q) -> Entry<'_, OnceAndIter, K, V> where Q: Iterator, { use Entry::*; match self.root_mut() { Either::Right(tree) => { return Vacant(VacantEntry { key: key_seq.into(), handle: TreeOrHandle::Tree(tree), _marker: PhantomData, }); } Either::Left(root) => match root.search_tree(&mut key_seq) { search::SearchResult::Found(handle) => { if handle.node.as_leaf().value.is_some() { Occupied(OccupiedEntry { handle, _marker: PhantomData, }) } else { Vacant(VacantEntry { key: OnceAndIter::empty(), // SAFETY: this is safe because key is empty // this is never used as an edge handle: TreeOrHandle::Other(unsafe { Handle::new_edge(handle.node, 0) }), _marker: PhantomData, }) } } search::SearchResult::GoDown(handle) => Vacant(VacantEntry { key: key_seq.into(), handle: TreeOrHandle::Other(handle), _marker: PhantomData, }), search::SearchResult::Insert(key, handle) => Vacant(VacantEntry { key: OnceAndIter::once(key, key_seq), handle: TreeOrHandle::Other(handle), _marker: PhantomData, }), }, } } } } mod borrow { #![allow(dead_code)] use core::{marker::PhantomData, ptr::NonNull}; use crate::tree::marker; /// Models a reborrow of some unique reference, when you know that the reborrow /// and all its descendants (i.e., all pointers and references derived from it) /// will not be used any more at some point, after which you want to use the /// original unique reference again. /// /// The borrow checker usually handles this stacking of borrows for you, but /// some control flows that accomplish this stacking are too complicated for /// the compiler to follow. A `DormantMutRef` allows you to check borrowing /// yourself, while still expressing its stacked nature, and encapsulating /// the raw pointer code needed to do this without undefined behavior. pub(crate) struct DormantRef { ptr: NonNull, _marker: PhantomData, } pub(crate) type DormantMutRef<'a, T> = DormantRef>; pub(crate) type DormantImmutRef<'a, T> = DormantRef>; unsafe impl<'a, T: 'a, BorrowType: 'a> Sync for DormantRef where &'a mut T: Sync {} unsafe impl<'a, T: 'a, BorrowType: 'a> Send for DormantRef where &'a mut T: Send {} impl<'a, T> DormantRef> { /// Capture a unique borrow, and immediately reborrow it. For the compiler, /// the lifetime of the new reference is the same as the lifetime of the /// original reference, but you promise to use it for a shorter period. pub(super) fn new_immut(t: &'a T) -> (&'a T, Self) { let ptr = NonNull::from(t); // SAFETY: we hold the borrow throughout 'a via `_marker`, and we expose // only this reference, so it is unique. let new_ref = unsafe { &*ptr.as_ptr() }; ( new_ref, Self { ptr, _marker: PhantomData, }, ) } } impl<'a, T> DormantRef> { /// Capture a unique borrow, and immediately reborrow it. For the compiler, /// the lifetime of the new reference is the same as the lifetime of the /// original reference, but you promise to use it for a shorter period. pub(super) fn new_mut(t: &'a mut T) -> (&'a mut T, Self) { let ptr = NonNull::from(t); // SAFETY: we hold the borrow throughout 'a via `_marker`, and we expose // only this reference, so it is unique. let new_ref = unsafe { &mut *ptr.as_ptr() }; ( new_ref, Self { ptr, _marker: PhantomData, }, ) } /// Revert to the unique borrow initially captured. /// /// # Safety /// /// The reborrow must have ended, i.e., the reference returned by `new` and /// all pointers and references derived from it, must not be used anymore. pub(super) unsafe fn awaken(self) -> &'a mut T { // SAFETY: our own safety conditions imply this reference is again unique. unsafe { &mut *self.ptr.as_ptr() } } /// Borrows a new mutable reference from the unique borrow initially captured. /// /// # Safety /// /// The reborrow must have ended, i.e., the reference returned by `new` and /// all pointers and references derived from it, must not be used anymore. pub(super) unsafe fn reborrow_mut(&mut self) -> &'a mut T { // SAFETY: our own safety conditions imply this reference is again unique. unsafe { &mut *self.ptr.as_ptr() } } } impl<'a, T, BorrowType: 'a> DormantRef { /// Borrows a new shared reference from the unique borrow initially captured. /// /// # Safety /// /// The reborrow must have ended, i.e., the reference returned by `new` and /// all pointers and references derived from it, must not be used anymore. pub(super) unsafe fn reborrow(&self) -> &'a T { // SAFETY: our own safety conditions imply this reference is again unique. unsafe { &*self.ptr.as_ptr() } } } } type Root = NodeRef; pub struct Tree { root: Option>, _marker: PhantomData>, } impl Default for Tree { fn default() -> Self { Self::new() } } impl<'a, K: core::fmt::Debug + 'a, V: core::fmt::Debug + 'a> core::fmt::Debug for &'a Tree { fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { let mut map = f.debug_map(); fn format_node_ref<'a, K: 'a, V: 'a>( f: &mut core::fmt::Formatter<'_>, node: &NodeRef, K, V>, ) -> core::fmt::Result where K: core::fmt::Debug, V: core::fmt::Debug, { match node.force() { ForceResult::Leaf(leaf) => f .debug_struct("Leaf") .field("value", &leaf.into_leaf().value) .finish(), ForceResult::Internal(internal) => f .debug_struct("Internal") .field("value", &internal.into_leaf().value) .field_with("edges", |f| { let mut list = f.debug_list(); for (key, edge) in internal.keys().iter().zip(internal.edges()) { list.entry_with(|f| { f.debug_set() .entry(key) .entry_with(|f| { format_node_ref( f, &NodeRef { node: *edge, _marker: PhantomData, }, ) }) .finish() }); } list.finish() }) .finish(), } } match self.root { Some(ref root) => { map.key(&"root").value_with(|f| { let internal = root.reborrow(); format_node_ref(f, &internal) }); map.finish() } None => map.finish(), } } } impl Tree { pub fn new() -> Self { Self { root: None, _marker: PhantomData, } } } #[derive(Debug)] pub struct OnceAndIter where I: Iterator, { once: Option, iter: Option, } impl Iterator for OnceAndIter where I: Iterator, { type Item = T; fn next(&mut self) -> Option { if let Some(once) = self.once.take() { Some(once) } else { self.iter.as_mut()?.next() } } } impl From for OnceAndIter where I: Iterator, { fn from(iter: I) -> Self { Self { once: None, iter: Some(iter), } } } impl OnceAndIter where I: Iterator, { pub fn once(once: T, iter: I) -> Self { Self { once: Some(once), iter: Some(iter), } } pub fn empty() -> Self { Self { once: None, iter: None, } } } impl Tree where K: Ord, { pub fn as_subtree_mut<'a>(&'a mut self) -> subtree::Subtree> { match self.root.as_mut() { Some(node) => { let dormant = node.borrow_mut().dormant(); Subtree::new_root(unsafe { dormant.awaken() }) } None => { let (_, dormant) = borrow::DormantMutRef::new_mut(self); Subtree::new_empty(dormant) } } } pub fn as_subtree<'a>(&'a self) -> subtree::Subtree> { match self.root.as_ref() { Some(node) => Subtree::new_root(node.reborrow()), None => { let (_, dormant) = borrow::DormantRef::new_immut(self); Subtree::new_empty(dormant) } } } pub fn entry<'a, Q>(&'a mut self, key_seq: Q) -> entry::Entry<'a, OnceAndIter, K, V> where Q: Iterator, { let mut subtree = self.as_subtree_mut(); let entry = subtree.entry(key_seq); // SAFETY: extending the lifetime is fine because we borrow the tree for 'a, // and no references to the subtree are live after this. // The same could be achieved using `dormant.reborrow()` a bunch // of times while destructuring the entry. unsafe { core::mem::transmute::< entry::Entry<'_, OnceAndIter, K, V>, entry::Entry<'a, OnceAndIter, K, V>, >(entry) } } } unsafe fn slice_insert(slice: &mut [MaybeUninit], idx: usize, value: T) { unsafe { let len = slice.len(); debug_assert!(len > idx); let slice_ptr = slice.as_mut_ptr(); if len > idx + 1 { core::ptr::copy(slice_ptr.add(idx), slice_ptr.add(idx + 1), len - idx - 1); } (*slice_ptr.add(idx)).write(value); } } mod range { #![allow(dead_code)] use core::ptr; use super::marker; use crate::tree::{Handle, NodeRef}; pub(crate) enum LeafHandle { Root(NodeRef), Edge(Handle, marker::Edge>), } pub(crate) struct LeafRange { front: Option>, back: Option>, } impl<'a, K: 'a, V: 'a> Clone for LeafHandle, K, V> { fn clone(&self) -> Self { match self { LeafHandle::Root(node) => LeafHandle::Root(*node), LeafHandle::Edge(handle) => LeafHandle::Edge(*handle), } } } impl LeafHandle { fn reborrow(&self) -> LeafHandle, K, V> { match self { Self::Root(node) => LeafHandle::Root(node.reborrow()), Self::Edge(handle) => LeafHandle::Edge(handle.reborrow()), } } } impl Handle, marker::Edge> { unsafe fn deallocating_next( self, ) -> Option, marker::Edge>> { let mut edge = self; loop { // if this node has descendants, we want to drop them first. if edge.node.has_descendants() { return Some(edge.descend().first_edge()); } let mut last_edge = edge; edge = loop { // no more descendants: deallocate this node and ascend. // after ascending, check if we need to go right and drop // more descendants of the parent. // otherwise, continue deallocating and ascending match last_edge.node.deallocate_and_ascend() { Some(parent) => match parent.right_edge() { Ok(next) => break next, Err(last) => last_edge = last, }, None => return None, } }; } } unsafe fn dellocating_next_back( self, ) -> Option, marker::Edge>> { let mut edge = self; loop { if edge.node.has_descendants() { match edge.left_edge() { Ok(next) => return Some(next), Err(node) => edge = node, } } else { match edge.node.deallocate_and_ascend() { Some(parent) => edge = parent, None => return None, } } } } pub(crate) unsafe fn deallocating_next_unchecked(&mut self) { super::replace(self, |edge| { (unsafe { edge.deallocating_next().unwrap() }, ()) }) } pub(crate) unsafe fn try_deallocating_next(&mut self) -> bool { super::maybe_replace(self, |edge| unsafe { let err = core::ptr::read(&edge); edge.deallocating_next().map(|e| (e, ())).ok_or(err) }) .is_some() } unsafe fn deallocating_next_back_unchecked(&mut self) { super::replace(self, |edge| { (unsafe { edge.dellocating_next_back().unwrap() }, ()) }) } pub(crate) fn deallocating_end(self) { let mut edge = self; while let Some(parent) = edge.node.deallocate_and_ascend() { edge = parent; } } } impl LeafRange { pub(crate) fn init_front( &mut self, ) -> Option<&mut Handle, marker::Edge>> { if let Some(LeafHandle::Root(root)) = &self.front { self.front = Some(LeafHandle::Edge( unsafe { ptr::read(root) }.first_leaf_edge(), )); } match &mut self.front { None => None, Some(LeafHandle::Edge(edge)) => Some(edge), // if it was root, we've just replaced it with an edge. Some(LeafHandle::Root(_)) => unreachable!(), } } fn init_back(&mut self) -> Option<&mut Handle, marker::Edge>> { if let Some(LeafHandle::Root(root)) = &self.back { self.back = Some(LeafHandle::Edge( unsafe { ptr::read(root) }.last_leaf_edge(), )); } match &mut self.back { None => None, Some(LeafHandle::Edge(edge)) => Some(edge), // if it was root, we've just replaced it with an edge. Some(LeafHandle::Root(_)) => unreachable!(), } } } impl LeafRange { pub(crate) fn dying_next(&mut self) -> Option<()> { let front = self.init_front().unwrap(); unsafe { front.try_deallocating_next().then_some(()) } } } pub(crate) fn full_range( start: Option>, end: Option>, ) -> LeafRange { LeafRange { front: start.map(LeafHandle::Root), back: end.map(LeafHandle::Root), } } } /// This replaces the value behind the `v` unique reference by calling the /// relevant function, and returns a result obtained along the way. /// /// If a panic occurs in the `change` closure, the entire process will be aborted. #[inline] pub(super) fn replace(v: &mut T, change: impl FnOnce(T) -> (T, R)) -> R { use core::{mem, ptr}; struct PanicGuard; impl Drop for PanicGuard { fn drop(&mut self) { panic!() } } let guard = PanicGuard; let value = unsafe { ptr::read(v) }; let (new_value, ret) = change(value); unsafe { ptr::write(v, new_value); } mem::forget(guard); ret } pub(crate) fn maybe_replace( v: &mut T, change: impl FnOnce(T) -> Result<(T, R), T>, ) -> Option { use core::{mem, ptr}; struct PanicGuard; impl Drop for PanicGuard { fn drop(&mut self) { panic!() } } let guard = PanicGuard; let value = unsafe { ptr::read(v) }; let (new_value, ret) = match change(value) { Err(old_value) => (old_value, None), Ok((new_value, ret)) => (new_value, Some(ret)), }; unsafe { ptr::write(v, new_value); } mem::forget(guard); ret } struct IntoIter { range: range::LeafRange, } impl IntoIter { fn new(tree: Tree) -> Self { let mut tree = ManuallyDrop::new(tree); if let Some(root) = tree.root.take() { let root = root.into_dying(); let root2 = unsafe { core::ptr::read(&root) }; Self { range: range::full_range(Some(root), Some(root2)), } } else { Self { range: range::full_range(None, None), } } } } impl Drop for IntoIter { fn drop(&mut self) { while let Some(_) = self.range.dying_next() {} } } impl Drop for Tree { fn drop(&mut self) { drop(unsafe { IntoIter::new(core::ptr::read(self)) }); } } #[cfg(test)] mod tests { use super::*; fn build_tree() -> Tree { let mut tree = Tree::new(); tree.entry("asdf".chars()).or_insert(1); tree.entry("asd".chars()).or_insert(2); tree.entry("asdg".chars()).or_insert(3); tree.entry("asda".chars()).or_insert(4); tree } #[test] fn drop_tree() { let _ = build_tree(); } #[test] fn entry() { let tree = build_tree(); assert_eq!(tree.as_subtree().get("asdf".chars()), Some(&1)); assert_eq!(tree.as_subtree().get("asd".chars()), Some(&2)); assert_eq!(tree.as_subtree().get("asdg".chars()), Some(&3)); assert_eq!(tree.as_subtree().get("asda".chars()), Some(&4)); assert_eq!( tree.as_subtree().get_closest("asdaf".chars()), Some((Some('f'), &4)) ); } }