werkzeug/src/tree.rs
Janis 9663e95210 refactor(tree): simplify NodeRef type by removing redundant parameters
- Removed the `Type` parameter from `NodeRef` and related structs and methods
2025-08-15 23:25:05 +02:00

1344 lines
44 KiB
Rust

//! A Prefix-Tree (Trie) implementation in Rust.
//! This Trie is structured with key-value pairs at the same level.
// tree:
// root: { kvs: [('f', v0), ('g', v1), ('i', _)], edges: [child0, None, child1] }
// child0: { kvs: [('a', v2), ('b', v3)], edges: [] }
// child1: { kvs: [('a', v4)], edges: [] }
// tree.find() -> ()
// tree.find('f') -> v0
// tree.find('g') -> v1
// tree.find('fa') -> v2
// tree.find('fb') -> v3
// tree.find('ia') -> v4
// tree.find('i') -> None
use alloc::boxed::Box;
use core::{marker::PhantomData, mem::MaybeUninit, ptr::NonNull};
mod marker {
use core::marker::PhantomData;
#[derive(Debug)]
pub struct LeafOrInternal;
#[derive(Debug)]
pub struct Leaf;
#[derive(Debug)]
pub struct Internal;
#[derive(Debug)]
pub struct Edge;
#[derive(Debug)]
pub struct Value;
#[derive(Debug)]
pub struct Owned;
#[derive(Debug)]
pub struct Mut<'a>(PhantomData<&'a mut ()>);
#[derive(Debug)]
pub struct ValMut<'a>(PhantomData<&'a mut ()>);
#[derive(Debug)]
pub struct Immut<'a>(PhantomData<&'a ()>);
#[derive(Debug)]
pub struct Dying;
#[derive(Debug)]
pub struct DormantMut;
pub trait BorrowType {
const TRAVERSAL_PERMIT: bool = true;
}
impl BorrowType for Owned {
const TRAVERSAL_PERMIT: bool = false;
}
impl<'a> BorrowType for Mut<'a> {}
impl<'a> BorrowType for ValMut<'a> {}
impl<'a> BorrowType for Immut<'a> {}
impl BorrowType for Dying {}
impl BorrowType for DormantMut {}
}
const CAPACITY: usize = 16;
#[derive(Debug)]
struct LeafNode<K, V> {
parent: Option<BoxedNode<K, V>>,
parent_idx: MaybeUninit<u16>,
value: Option<V>,
len: u16,
capacity: u16,
keys: NonNull<MaybeUninit<K>>,
edges: NonNull<MaybeUninit<BoxedNode<K, V>>>,
}
type BoxedNode<K, V> = NonNull<LeafNode<K, V>>;
#[derive(Debug)]
struct NodeRef<BorrowType, K, V> {
node: NonNull<LeafNode<K, V>>,
_marker: PhantomData<BorrowType>,
}
#[derive(Debug)]
struct Handle<Node, Type> {
node: Node,
idx: usize,
_marker: PhantomData<Type>,
}
impl<'a, K: 'a, V: 'a> Copy for NodeRef<marker::Immut<'a>, K, V> {}
impl<'a, K: 'a, V: 'a> Clone for NodeRef<marker::Immut<'a>, K, V> {
fn clone(&self) -> Self {
*self
}
}
unsafe impl<BorrowType, K: Sync, V: Sync> Sync for NodeRef<BorrowType, K, V> {}
unsafe impl<K: Sync, V: Sync> Send for NodeRef<marker::Immut<'_>, K, V> {}
unsafe impl<K: Send, V: Send> Send for NodeRef<marker::Mut<'_>, K, V> {}
unsafe impl<K: Send, V: Send> Send for NodeRef<marker::ValMut<'_>, K, V> {}
unsafe impl<K: Send, V: Send> Send for NodeRef<marker::Owned, K, V> {}
unsafe impl<K: Send, V: Send> Send for NodeRef<marker::Dying, K, V> {}
impl<BorrowType, K, V> Handle<NodeRef<K, V, BorrowType>, marker::Edge> {
unsafe fn new_edge(node: NodeRef<K, V, BorrowType>, idx: usize) -> Self {
Self {
node,
idx,
_marker: PhantomData,
}
}
}
impl<BorrowType, K, V> Handle<NodeRef<K, V, BorrowType>, marker::Value> {
unsafe fn new_value(node: NodeRef<K, V, BorrowType>) -> Self {
Self {
node,
idx: 0,
_marker: PhantomData,
}
}
}
impl<K, V> LeafNode<K, V> {
unsafe fn init(this: *mut Self) {
unsafe {
(&raw mut (*this).parent).write(None);
// parent_idx may be left uninitialized
(&raw mut (*this).value).write(None);
(&raw mut (*this).len).write(0);
(&raw mut (*this).capacity).write(0);
// keys and edges are dangling pointers
(&raw mut (*this).keys).write(NonNull::dangling());
(&raw mut (*this).edges).write(NonNull::dangling());
}
}
fn new() -> Box<Self> {
let mut this = Box::new_uninit();
unsafe {
Self::init(this.as_mut_ptr());
this.assume_init()
}
}
}
impl<K, V> NodeRef<marker::Owned, K, V> {
fn new() -> Self {
let node = LeafNode::new();
NodeRef {
node: Box::into_non_null(node),
_marker: PhantomData,
}
}
}
impl<BorrowType: marker::BorrowType, K, V> NodeRef<BorrowType, K, V> {
/// Finds the parent of the current node. Returns `Ok(handle)` if the current
/// node actually has a parent, where `handle` points to the edge of the parent
/// that points to the current node. Returns `Err(self)` if the current node has
/// no parent, giving back the original `NodeRef`.
///
/// The method name assumes you picture trees with the root node on top.
///
/// `edge.descend().ascend().unwrap()` and `node.ascend().unwrap().descend()` should
/// both, upon success, do nothing.
pub(super) fn ascend(self) -> Result<Handle<NodeRef<BorrowType, K, V>, marker::Edge>, Self> {
const {
assert!(BorrowType::TRAVERSAL_PERMIT);
}
// We need to use raw pointers to nodes because, if BorrowType is marker::ValMut,
// there might be outstanding mutable references to values that we must not invalidate.
let leaf_ptr = Self::as_leaf_ptr(&self).cast_const();
unsafe { (*leaf_ptr).parent }
.map(|parent| Handle {
node: NodeRef {
node: parent,
_marker: PhantomData,
},
idx: unsafe { usize::from((*leaf_ptr).parent_idx.assume_init()) },
_marker: PhantomData,
})
.ok_or(self)
}
}
impl<BorrowType: marker::BorrowType, K, V> Handle<NodeRef<BorrowType, K, V>, marker::Edge> {
/// Finds the node pointed to by this edge.
///
/// The method name assumes you picture trees with the root node on top.
///
/// `edge.descend().ascend().unwrap()` and `node.ascend().unwrap().descend()` should
/// both, upon success, do nothing.
pub(super) fn descend(self) -> NodeRef<BorrowType, K, V> {
const {
assert!(BorrowType::TRAVERSAL_PERMIT);
}
// We need to use raw pointers to nodes because, if BorrowType is
// marker::ValMut, there might be outstanding mutable references to
// values that we must not invalidate. There's no worry accessing the
// height field because that value is copied. Beware that, once the
// node pointer is dereferenced, we access the edges array with a
// reference (Rust issue #73987) and invalidate any other references
// to or inside the array, should any be around.
let node = unsafe {
// in this case, no references to the node are created.
self.node.edge_area(self.idx).assume_init_read()
};
NodeRef {
node,
_marker: PhantomData,
}
}
}
impl<'a, K, V, HandleType> Handle<NodeRef<marker::Mut<'a>, K, V>, HandleType> {
/// Temporarily takes out another mutable handle on the same location. Beware, as
/// this method is very dangerous, doubly so since it might not immediately appear
/// dangerous.
///
/// For details, see `NodeRef::reborrow_mut`.
pub(super) unsafe fn reborrow_mut(
&mut self,
) -> Handle<NodeRef<marker::Mut<'_>, K, V>, HandleType> {
// We can't use Handle::new_kv or Handle::new_edge because we don't know our type
Handle {
node: unsafe { self.node.reborrow_mut() },
idx: self.idx,
_marker: PhantomData,
}
}
/// Returns a dormant copy of this handle which can be reawakened later.
///
/// See `DormantMutRef` for more details.
pub(super) fn dormant(&self) -> Handle<NodeRef<marker::DormantMut, K, V>, HandleType> {
Handle {
node: self.node.dormant(),
idx: self.idx,
_marker: PhantomData,
}
}
}
impl<'a, K, V> NodeRef<marker::Mut<'a>, K, V> {
pub(super) fn len_mut(&mut self) -> &mut u16 {
// SAFETY: we have exclusive access to the entire node.
unsafe { &mut (*Self::as_leaf_ptr(self)).len }
}
pub(super) fn capacity_mut(&mut self) -> &mut u16 {
// SAFETY: we have exclusive access to the entire node.
unsafe { &mut (*Self::as_leaf_ptr(self)).len }
}
}
impl<K, V> NodeRef<marker::Owned, K, V> {
/// Mutably borrows the owned root node. Unlike `reborrow_mut`, this is safe
/// because the return value cannot be used to destroy the root, and there
/// cannot be other references to the tree.
pub(super) fn borrow_mut(&mut self) -> NodeRef<marker::Mut<'_>, K, V> {
NodeRef {
node: self.node,
_marker: PhantomData,
}
}
/// Slightly mutably borrows the owned root node.
pub(super) fn borrow_valmut(&mut self) -> NodeRef<marker::ValMut<'_>, K, V> {
NodeRef {
node: self.node,
_marker: PhantomData,
}
}
/// Irreversibly transitions to a reference that permits traversal and offers
/// destructive methods and little else.
pub(super) fn into_dying(self) -> NodeRef<marker::Dying, K, V> {
NodeRef {
node: self.node,
_marker: PhantomData,
}
}
}
impl<'a, K, V> NodeRef<marker::Mut<'a>, K, V> {
/// Temporarily takes out another mutable reference to the same node. Beware, as
/// this method is very dangerous, doubly so since it might not immediately appear
/// dangerous.
///
/// Because mutable pointers can roam anywhere around the tree, the returned
/// pointer can easily be used to make the original pointer dangling, out of
/// bounds, or invalid under stacked borrow rules.
// FIXME(@gereeter) consider adding yet another type parameter to `NodeRef`
// that restricts the use of navigation methods on reborrowed pointers,
// preventing this unsafety.
unsafe fn reborrow_mut(&mut self) -> NodeRef<marker::Mut<'_>, K, V> {
NodeRef {
node: self.node,
_marker: PhantomData,
}
}
/// Borrows exclusive access to the leaf portion of a leaf or internal node.
fn as_leaf_mut(&mut self) -> &mut LeafNode<K, V> {
let ptr = Self::as_leaf_ptr(self);
// SAFETY: we have exclusive access to the entire node.
unsafe { &mut *ptr }
}
/// Offers exclusive access to the leaf portion of a leaf or internal node.
fn into_leaf_mut(mut self) -> &'a mut LeafNode<K, V> {
let ptr = Self::as_leaf_ptr(&mut self);
// SAFETY: we have exclusive access to the entire node.
unsafe { &mut *ptr }
}
/// Returns a dormant copy of this node with its lifetime erased which can
/// be reawakened later.
pub(super) fn dormant(&self) -> NodeRef<marker::DormantMut, K, V> {
NodeRef {
node: self.node,
_marker: PhantomData,
}
}
}
impl<'a, K: 'a, V: 'a, BorrowType> NodeRef<BorrowType, K, V> {
/// Borrows shared access to an element of the key storage area.
///
/// # Safety
/// `index` is in bounds of 0..CAPACITY
unsafe fn key_area<I, Output: ?Sized>(&self, index: I) -> &Output
where
I: core::slice::SliceIndex<[MaybeUninit<K>], Output = Output>,
{
// SAFETY: the caller will not be able to call further methods on self
// until the key slice reference is dropped, as we have unique access
// for the lifetime of the borrow.
unsafe {
let keys = (*self.node.as_ptr()).keys;
let capacity = (*self.node.as_ptr()).capacity as usize;
core::slice::from_raw_parts(keys.as_ptr(), capacity).get_unchecked(index)
}
}
/// Borrows shared access to an element or slice of the node's value storage area.
///
/// # Safety
/// `index` is in bounds of 0..node.capacity
unsafe fn edge_area<I, Output: ?Sized>(&self, index: I) -> &Output
where
I: core::slice::SliceIndex<[MaybeUninit<BoxedNode<K, V>>], Output = Output>,
{
// SAFETY: the caller will not be able to call further methods on self
// until the value slice reference is dropped, as we have unique access
// for the lifetime of the borrow.
unsafe {
let edges = (*self.node.as_ptr()).edges;
let capacity = (*self.node.as_ptr()).capacity as usize;
core::slice::from_raw_parts(edges.as_ptr(), capacity).get_unchecked(index)
}
}
}
impl<'a, K: 'a, V: 'a> NodeRef<marker::Mut<'a>, K, V> {
/// Borrows exclusive access to an element of the key storage area.
///
/// # Safety
/// `index` is in bounds of 0..CAPACITY
unsafe fn key_area_mut<I, Output: ?Sized>(&mut self, index: I) -> &mut Output
where
I: core::slice::SliceIndex<[MaybeUninit<K>], Output = Output>,
{
// SAFETY: the caller will not be able to call further methods on self
// until the key slice reference is dropped, as we have unique access
// for the lifetime of the borrow.
unsafe {
let leaf = self.as_leaf_mut();
core::slice::from_raw_parts_mut(leaf.keys.as_ptr(), leaf.capacity as usize)
.get_unchecked_mut(index)
}
}
/// Borrows exclusive access to an element or slice of the node's value storage area.
///
/// # Safety
/// `index` is in bounds of 0..node.capacity
unsafe fn edge_area_mut<I, Output: ?Sized>(&mut self, index: I) -> &mut Output
where
I: core::slice::SliceIndex<[MaybeUninit<BoxedNode<K, V>>], Output = Output>,
{
// SAFETY: the caller will not be able to call further methods on self
// until the value slice reference is dropped, as we have unique access
// for the lifetime of the borrow.
unsafe {
let leaf = self.as_leaf_mut();
core::slice::from_raw_parts_mut(leaf.edges.as_ptr(), leaf.capacity as usize)
.get_unchecked_mut(index)
}
}
}
impl<K, V> NodeRef<marker::DormantMut, K, V> {
/// Revert to the unique borrow initially captured.
///
/// # Safety
///
/// The reborrow must have ended, i.e., the reference returned by `new` and
/// all pointers and references derived from it, must not be used anymore.
pub(super) unsafe fn awaken<'a>(self) -> NodeRef<marker::Mut<'a>, K, V> {
NodeRef {
node: self.node,
_marker: PhantomData,
}
}
}
impl<K, V, HandleType> Handle<NodeRef<marker::DormantMut, K, V>, HandleType> {
/// Revert to the unique borrow initially captured.
///
/// # Safety
///
/// The reborrow must have ended, i.e., the reference returned by `new` and
/// all pointers and references derived from it, must not be used anymore.
pub(super) unsafe fn awaken<'a>(self) -> Handle<NodeRef<marker::Mut<'a>, K, V>, HandleType> {
Handle {
node: unsafe { self.node.awaken() },
idx: self.idx,
_marker: PhantomData,
}
}
}
impl<BorrowType, K, V> NodeRef<BorrowType, K, V> {
/// Finds the length of the node. This is the number of keys or values.
/// The number of edges is `len() + 1`.
/// Note that, despite being safe, calling this function can have the side effect
/// of invalidating mutable references that unsafe code has created.
pub(super) fn len(&self) -> usize {
// Crucially, we only access the `len` field here. If BorrowType is marker::ValMut,
// there might be outstanding mutable references to values that we must not invalidate.
unsafe { usize::from((*Self::as_leaf_ptr(self)).len) }
}
pub(super) fn capacity(&self) -> usize {
unsafe { usize::from((*Self::as_leaf_ptr(self)).len) }
}
}
impl<BorrowType, K, V> NodeRef<BorrowType, K, V> {
pub(super) fn is_leaf(&self) -> bool {
unsafe { (*Self::as_leaf_ptr(self)).len == 0 }
}
/// Temporarily takes out another, immutable reference to the same node.
pub(super) fn reborrow(&self) -> NodeRef<marker::Immut<'_>, K, V> {
NodeRef {
node: self.node,
_marker: PhantomData,
}
}
/// Exposes the leaf portion of any leaf or internal node.
///
/// Returns a raw ptr to avoid invalidating other references to this node.
fn as_leaf_ptr(this: &Self) -> *mut LeafNode<K, V> {
// The node must be valid for at least the LeafNode portion.
// This is not a reference in the NodeRef type because we don't know if
// it should be unique or shared.
this.node.as_ptr()
}
fn as_leaf(&self) -> &LeafNode<K, V> {
// SAFETY: the static node type is `Leaf`.
unsafe { &*Self::as_leaf_ptr(self) }
}
fn as_leaf_non_null(this: &Self) -> NonNull<LeafNode<K, V>> {
// SAFETY: the static node type is `Leaf`.
unsafe { NonNull::new_unchecked(Self::as_leaf_ptr(this)) }
}
}
impl<'a, K: 'a, V: 'a> NodeRef<marker::Immut<'a>, K, V> {
/// Borrows a view into the keys stored in the node.
pub(super) fn keys(&self) -> &[K] {
unsafe { self.key_area(..self.len()).assume_init_ref() }
}
pub(super) fn edges(&self) -> &[BoxedNode<K, V>] {
unsafe { self.edge_area(..self.len()).assume_init_ref() }
}
}
impl<BorrowType, K, V, HandleType> Handle<NodeRef<BorrowType, K, V>, HandleType> {
/// Temporarily takes out another immutable handle on the same location.
pub(super) fn reborrow(&self) -> Handle<NodeRef<marker::Immut<'_>, K, V>, HandleType> {
// We can't use Handle::new_kv or Handle::new_edge because we don't know our type
Handle {
node: self.node.reborrow(),
idx: self.idx,
_marker: PhantomData,
}
}
}
impl<'a, K: 'a, V: 'a> NodeRef<marker::Immut<'a>, K, V> {
/// Exposes the leaf portion of any leaf or internal node in an immutable tree.
fn into_leaf(self) -> &'a LeafNode<K, V> {
let ptr = Self::as_leaf_ptr(&self);
// SAFETY: there can be no mutable references into this tree borrowed as `Immut`.
unsafe { &*ptr }
}
}
impl<'a, K: 'a, V: 'a> Handle<NodeRef<marker::Immut<'a>, K, V>, marker::Value> {
pub(super) unsafe fn into_value(self) -> &'a V {
let leaf = self.node.into_leaf();
let v = leaf.value.as_ref().unwrap();
v
}
}
impl<'a, K: 'a, V: 'a> Handle<NodeRef<marker::Mut<'a>, K, V>, marker::Value> {
pub(super) unsafe fn value_mut(&mut self) -> &mut V {
let leaf = self.node.as_leaf_mut();
let v = leaf.value.as_mut().unwrap();
v
}
pub fn into_value_mut(self) -> &'a mut V {
let leaf = self.node.into_leaf_mut();
leaf.value.as_mut().unwrap()
}
}
pub(super) enum ForceResult<Node> {
Leaf(Node),
Internal(Node),
}
impl<BorrowType, K, V, HandleType> Handle<NodeRef<BorrowType, K, V>, HandleType> {
fn force(self) -> ForceResult<Handle<NodeRef<BorrowType, K, V>, HandleType>> {
match self.node.force() {
ForceResult::Leaf(leaf) => ForceResult::Leaf(Handle {
node: leaf,
idx: self.idx,
_marker: PhantomData,
}),
ForceResult::Internal(internal) => ForceResult::Internal(Handle {
node: internal,
idx: self.idx,
_marker: PhantomData,
}),
}
}
}
impl<BorrowType, K, V> NodeRef<BorrowType, K, V> {
fn force(self) -> ForceResult<NodeRef<BorrowType, K, V>> {
if self.is_leaf() {
ForceResult::Leaf(self)
} else {
ForceResult::Internal(self)
}
}
}
impl<'a, K, V> NodeRef<marker::Mut<'a>, K, V> {
pub(super) fn grow_node(&mut self) {
// grow the node
let Some(new_capacity) = self.capacity_mut().checked_mul(2) else {
panic!("Node capacity overflow");
};
// Ensure the new capacity is at least 16.
// capacity starts off at 0, so the first time it will grow to 16.
let new_capacity = new_capacity.max(16) as usize;
// Allocate new keys and edges.
let new_keys = Box::into_non_null(Box::new_uninit_slice(new_capacity));
let new_edges = Box::into_non_null(Box::new_uninit_slice(new_capacity));
let leaf = self.as_leaf_mut();
let old_keys = core::mem::replace(&mut leaf.keys, new_keys.as_non_null_ptr());
let old_edges = core::mem::replace(&mut leaf.edges, new_edges.as_non_null_ptr());
// we don't want to copy and deallocate the old keys & edges if the
// capacity was 0, because then the old pointers are dangling.
if leaf.capacity > 0 {
unsafe {
let len = leaf.len as usize;
core::ptr::copy_nonoverlapping(old_keys.as_ptr(), new_keys.as_mut_ptr(), len);
core::ptr::copy_nonoverlapping(old_edges.as_ptr(), new_edges.as_mut_ptr(), len);
// dealloc old keys and edges.
// This doesn't drop because the keys and edges were moved.
_ = Box::from_non_null(old_keys);
_ = Box::from_non_null(old_edges);
}
}
// SAFETY: new_capacity fits in a u16.
leaf.capacity = new_capacity as u16;
}
}
impl<K, V> NodeRef<marker::Owned, K, V> {
pub(super) fn reparent<'a>(
mut self,
mut parent: Handle<NodeRef<marker::Mut<'a>, K, V>, marker::Edge>,
key: K,
) -> NodeRef<marker::Mut<'a>, K, V>
where
K: 'a,
V: 'a,
{
self.borrow_mut().as_leaf_mut().parent = Some(NodeRef::as_leaf_non_null(&parent.node));
self.borrow_mut()
.as_leaf_mut()
.parent_idx
.write(parent.idx as u16);
let old_capacity = parent.node.capacity();
let old_len = parent.node.len();
let new_len = old_len + 1;
if new_len > old_capacity {
parent.node.grow_node();
}
// insert new key and child node.
// SAFETY: we just grew the allocations.
unsafe {
slice_insert(parent.node.key_area_mut(..new_len), parent.idx, key);
slice_insert(parent.node.edge_area_mut(..new_len), parent.idx, self.node);
}
*parent.node.len_mut() = new_len as u16;
unsafe { self.borrow_mut().dormant().awaken() }
}
}
impl<BorrowType, K, V> Handle<NodeRef<BorrowType, K, V>, marker::Edge> {
/// Converts this edge handle into a value handle.
/// IMPORTANT: this handle points to the value of the node, not the edge.
pub(super) fn into_value(self) -> Handle<NodeRef<BorrowType, K, V>, marker::Value> {
unsafe { Handle::new_value(self.node) }
}
}
impl<'a, K: 'a, V: 'a> Handle<NodeRef<marker::Mut<'a>, K, V>, marker::Edge> {
/// Inserts a key-value pair into
pub(super) unsafe fn insert_recursing<Q>(
mut self,
mut key_seq: Q,
val: V,
) -> Handle<NodeRef<marker::Mut<'a>, K, V>, marker::Value>
where
Q: Iterator<Item = K>,
K: Ord,
{
let Some(key) = key_seq.next() else {
// key has run out: insert value here.
self.node.as_leaf_mut().value = Some(val);
// TODO: handle occupied values.
return unsafe { Handle::new_value(self.node) };
};
let last = unsafe {
let child = NodeRef::new().reparent(self, key);
Handle::new_edge(child, 0)
.insert_recursing(key_seq, val)
.dormant()
};
unsafe { last.awaken() }
}
}
// search:
mod search {
use super::{ForceResult, Handle, NodeRef, marker};
use core::borrow::Borrow;
use core::cmp::Ordering;
pub(super) enum SearchResult<BorrowType, K, V> {
/// The node which contains the value for the key.
Found(Handle<NodeRef<BorrowType, K, V>, marker::Value>),
/// The key was found, and the search should continue at the given edge.
GoDown(Handle<NodeRef<BorrowType, K, V>, marker::Edge>),
/// The key was not found, and should be inserted at the given position.
Insert(K, Handle<NodeRef<BorrowType, K, V>, marker::Edge>),
}
pub(super) enum IndexResult {
Edge(usize),
Insert(usize),
}
impl<BorrowType: marker::BorrowType, K, V> NodeRef<BorrowType, K, V> {
pub(super) fn search_tree<Q>(mut self, mut key: Q) -> SearchResult<BorrowType, K, V>
where
Q: Iterator<Item = K>,
K: Ord,
{
use SearchResult::*;
loop {
self = match self.search_node(&mut key) {
Found(handle) => {
return Found(handle);
}
GoDown(handle) => handle.descend(),
Insert(key, handle) => return Insert(key, handle),
}
}
}
}
impl<BorrowType, K, V> NodeRef<BorrowType, K, V> {
fn search_node<Q>(self, mut key: Q) -> SearchResult<BorrowType, K, V>
where
Q: Iterator<Item = K>,
K: Ord,
{
use SearchResult::*;
let Some(key) = key.next() else {
// key has run out, a value is either occupying this
// node, or belongs here.
return SearchResult::Found(unsafe { Handle::new_value(self) });
};
match self.force() {
// self is a leaf node and doesn't contain any keys:
// a new leaf node should be inserted at this point.
ForceResult::Leaf(leaf) => {
SearchResult::Insert(key, unsafe { Handle::new_edge(leaf, 0) })
}
ForceResult::Internal(internal) => {
// search through the keys of the internal node
match unsafe { internal.find_key_index(&key, 0) } {
IndexResult::Insert(idx) => Insert(key, unsafe {
// the key wasn't present, but should be inserted at `idx`.
Handle::new_edge(internal, idx)
}),
IndexResult::Edge(idx) => {
// the key was found, continue searching down the edge
GoDown(unsafe { Handle::new_edge(internal, idx) })
}
}
}
}
}
}
impl<BorrowType, K, V> NodeRef<BorrowType, K, V> {
/// # Safety
/// `start_index` must be a valid edge index for the node.
unsafe fn find_key_index<Q: ?Sized>(&self, key: &Q, start_index: usize) -> IndexResult
where
Q: Ord,
K: Borrow<Q>,
{
let node = self.reborrow();
let keys = node.keys();
debug_assert!(start_index <= keys.len());
for (offset, k) in unsafe { keys.get_unchecked(start_index..) }
.iter()
.enumerate()
{
match key.cmp(k.borrow()) {
Ordering::Greater => {}
Ordering::Equal => {
std::eprintln!("found key at index {}", start_index + offset);
return IndexResult::Edge(start_index + offset);
}
Ordering::Less => {
std::eprintln!("insert key at index {}", start_index + offset);
return IndexResult::Insert(start_index + offset);
}
}
}
std::eprintln!("push_back key at index {}", keys.len());
IndexResult::Insert(keys.len())
}
}
#[cfg(test)]
mod tests {
use super::super::Tree;
use super::*;
fn insert_and_dbg<'a>(
tree: &'a mut Tree<char, &'static str>,
key: &'a str,
value: &'static str,
) {
let entry = tree.entry(key.chars());
std::dbg!(&entry);
let entry = entry.or_insert(value);
std::dbg!(&entry);
}
#[test]
fn asdf() {
let mut tree = Tree::new();
insert_and_dbg(&mut tree, "+", "Plus");
insert_and_dbg(&mut tree, "++", "PlusPlus");
insert_and_dbg(&mut tree, "+=", "PlusEqual");
insert_and_dbg(&mut tree, "++-", "PlusPlusMinus");
std::eprintln!("tree: {:?}", &tree);
assert_eq!(
tree.entry("++".chars()).or_insert("asdf").get(),
&"PlusPlus"
);
std::dbg!(tree.entry("+".chars()));
assert_eq!(tree.entry("+".chars()).or_insert("asdf").get(), &"Plus");
}
}
}
enum HandleOrTree<'a, BorrowType, K, V, HandleType> {
Handle(Handle<NodeRef<BorrowType, K, V>, HandleType>),
Tree(borrow::DormantMutRef<'a, Tree<K, V>>),
}
mod entry {
use core::marker::PhantomData;
use crate::tree::LeafNode;
use super::{Handle, NodeRef, Tree, borrow::DormantMutRef, marker};
pub enum Entry<'a, Q: 'a, K: 'a, V: 'a>
where
Q: Iterator<Item = K>,
{
Vacant(VacantEntry<'a, Q, K, V>),
Occupied(OccupiedEntry<'a, K, V>),
}
use core::fmt::Debug;
impl<Q: Debug, K: Debug, V: Debug> Debug for Entry<'_, Q, K, V>
where
Q: Iterator<Item = K>,
{
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
match self {
Entry::Vacant(vacant) => write!(f, "Vacant({:?})", vacant),
Entry::Occupied(occupied) => write!(f, "Occupied({:?})", occupied),
}
}
}
impl<Q: Debug, K: Debug, V: Debug> Debug for VacantEntry<'_, Q, K, V>
where
Q: Iterator<Item = K>,
{
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
f.debug_struct("VacantEntry")
.field("key", &self.key)
// .field("handle", &self.handle)
.finish()
}
}
impl<K: Debug, V: Debug> Debug for OccupiedEntry<'_, K, V> {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
f.debug_struct("OccupiedEntry")
.field("value", &self.get())
.field("handle", &self.handle)
.finish()
}
}
pub struct VacantEntry<'a, Q, K, V>
where
Q: Iterator<Item = K>,
{
pub(super) key: Q,
pub(super) handle: super::HandleOrTree<'a, marker::Mut<'a>, K, V, marker::Edge>,
pub(super) _marker: PhantomData<&'a mut (K, V)>,
}
pub struct OccupiedEntry<'a, K, V> {
pub(super) handle: Handle<NodeRef<marker::Mut<'a>, K, V>, marker::Value>,
pub(super) _marker: PhantomData<&'a mut (K, V)>,
}
impl<'a, K, V> OccupiedEntry<'a, K, V> {
pub fn get(&self) -> &V {
unsafe { self.handle.reborrow().into_value() }
}
pub fn get_mut(&mut self) -> &mut V {
unsafe { self.handle.value_mut() }
}
pub fn into_subtree(self) -> super::subtree::Subtree<K, V, marker::Mut<'a>> {
super::subtree::Subtree {
root: self.handle.node,
}
}
}
impl<'a, Q, K: Ord, V> Entry<'a, Q, K, V>
where
Q: Iterator<Item = K>,
{
pub fn or_insert(self, value: V) -> OccupiedEntry<'a, K, V> {
match self {
Entry::Vacant(vacant) => vacant.insert_entry(value),
Entry::Occupied(occupied) => occupied,
}
}
}
impl<'a, Q, K: Ord, V> VacantEntry<'a, Q, K, V>
where
Q: Iterator<Item = K>,
{
pub fn insert_entry(self, value: V) -> OccupiedEntry<'a, K, V> {
use super::HandleOrTree;
let handle = match self.handle {
// no root node yet
HandleOrTree::Tree(mut tree) => {
// SAFETY: there are no nodes in the tree yet
let tree = unsafe { tree.reborrow() };
let root = tree.root.insert(NodeRef::new());
unsafe {
Handle::new_edge(root.borrow_mut(), 0).insert_recursing(self.key, value)
}
}
HandleOrTree::Handle(handle) => unsafe { handle.insert_recursing(self.key, value) },
};
OccupiedEntry {
handle,
_marker: PhantomData,
}
}
}
}
mod subtree {
use core::marker::PhantomData;
use crate::tree::{HandleOrTree, search};
use super::{
NodeRef, OnceAndIter,
entry::{Entry, OccupiedEntry, VacantEntry},
marker,
};
// BorrowType may be one of `Immut`, `Mut`.
pub struct Subtree<K, V, BorrowType> {
pub(super) root: NodeRef<BorrowType, K, V>,
}
impl<K, V, BorrowType> Subtree<K, V, BorrowType>
where
K: Ord,
{
pub fn get<Q>(&self, mut key_seq: Q) -> Option<&V>
where
Q: Iterator<Item = K>,
{
let root = self.root.reborrow();
match root.search_tree(&mut key_seq) {
search::SearchResult::Found(handle) => Some(unsafe { handle.into_value() }),
_ => {
// key not found
None
}
}
}
pub fn get_subtree<Q>(&self, mut key_seq: Q) -> Option<Subtree<K, V, marker::Immut<'_>>>
where
Q: Iterator<Item = K>,
{
let root = self.root.reborrow();
match root.search_tree(&mut key_seq) {
search::SearchResult::Found(handle) => Some(Subtree { root: handle.node }),
_ => {
// key not found
None
}
}
}
}
impl<'a, K, V> Subtree<K, V, marker::Mut<'a>>
where
K: Ord,
{
pub fn get_mut<Q>(&mut self, mut key_seq: Q) -> Option<&mut V>
where
Q: Iterator<Item = K>,
{
let root = unsafe { self.root.reborrow_mut() };
match root.search_tree(&mut key_seq) {
search::SearchResult::Found(handle) => Some(handle.into_value_mut()),
_ => {
// key not found
None
}
}
}
pub fn get_subtree_mut<Q>(
&'_ mut self,
mut key_seq: Q,
) -> Option<Subtree<K, V, marker::Mut<'_>>>
where
Q: Iterator<Item = K>,
{
let root = unsafe { self.root.reborrow_mut() };
match root.search_tree(&mut key_seq) {
search::SearchResult::Found(handle) => Some(Subtree { root: handle.node }),
_ => {
// key not found
None
}
}
}
pub fn entry<Q>(&'_ mut self, mut key_seq: Q) -> Entry<'_, OnceAndIter<Q, K>, K, V>
where
Q: Iterator<Item = K>,
{
use Entry::*;
// SAFETY: this is actually our borrow?
let root = unsafe { self.root.reborrow_mut() };
match root.search_tree(&mut key_seq) {
search::SearchResult::Found(handle) => Occupied(OccupiedEntry {
handle,
_marker: PhantomData,
}),
search::SearchResult::GoDown(handle) => Vacant(VacantEntry {
key: key_seq.into(),
handle: HandleOrTree::Handle(handle),
_marker: PhantomData,
}),
search::SearchResult::Insert(key, handle) => Vacant(VacantEntry {
key: OnceAndIter::once(key, key_seq),
handle: HandleOrTree::Handle(handle),
_marker: PhantomData,
}),
}
}
}
}
mod borrow {
use core::{marker::PhantomData, ptr::NonNull};
/// Models a reborrow of some unique reference, when you know that the reborrow
/// and all its descendants (i.e., all pointers and references derived from it)
/// will not be used any more at some point, after which you want to use the
/// original unique reference again.
///
/// The borrow checker usually handles this stacking of borrows for you, but
/// some control flows that accomplish this stacking are too complicated for
/// the compiler to follow. A `DormantMutRef` allows you to check borrowing
/// yourself, while still expressing its stacked nature, and encapsulating
/// the raw pointer code needed to do this without undefined behavior.
pub(super) struct DormantMutRef<'a, T> {
ptr: NonNull<T>,
_marker: PhantomData<&'a mut T>,
}
unsafe impl<'a, T> Sync for DormantMutRef<'a, T> where &'a mut T: Sync {}
unsafe impl<'a, T> Send for DormantMutRef<'a, T> where &'a mut T: Send {}
impl<'a, T> DormantMutRef<'a, T> {
/// Capture a unique borrow, and immediately reborrow it. For the compiler,
/// the lifetime of the new reference is the same as the lifetime of the
/// original reference, but you promise to use it for a shorter period.
pub(super) fn new(t: &'a mut T) -> (&'a mut T, Self) {
let ptr = NonNull::from(t);
// SAFETY: we hold the borrow throughout 'a via `_marker`, and we expose
// only this reference, so it is unique.
let new_ref = unsafe { &mut *ptr.as_ptr() };
(
new_ref,
Self {
ptr,
_marker: PhantomData,
},
)
}
/// Revert to the unique borrow initially captured.
///
/// # Safety
///
/// The reborrow must have ended, i.e., the reference returned by `new` and
/// all pointers and references derived from it, must not be used anymore.
pub(super) unsafe fn awaken(self) -> &'a mut T {
// SAFETY: our own safety conditions imply this reference is again unique.
unsafe { &mut *self.ptr.as_ptr() }
}
/// Borrows a new mutable reference from the unique borrow initially captured.
///
/// # Safety
///
/// The reborrow must have ended, i.e., the reference returned by `new` and
/// all pointers and references derived from it, must not be used anymore.
pub(super) unsafe fn reborrow(&mut self) -> &'a mut T {
// SAFETY: our own safety conditions imply this reference is again unique.
unsafe { &mut *self.ptr.as_ptr() }
}
/// Borrows a new shared reference from the unique borrow initially captured.
///
/// # Safety
///
/// The reborrow must have ended, i.e., the reference returned by `new` and
/// all pointers and references derived from it, must not be used anymore.
pub(super) unsafe fn reborrow_shared(&self) -> &'a T {
// SAFETY: our own safety conditions imply this reference is again unique.
unsafe { &*self.ptr.as_ptr() }
}
}
}
type Root<K, V> = NodeRef<marker::Owned, K, V>;
struct Tree<K, V> {
root: Option<Root<K, V>>,
_marker: PhantomData<alloc::boxed::Box<(K, V)>>,
}
impl<'a, K: core::fmt::Debug + 'a, V: core::fmt::Debug + 'a> core::fmt::Debug for &'a Tree<K, V> {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
let mut map = f.debug_map();
fn format_node_ref<'a, K: 'a, V: 'a>(
f: &mut core::fmt::Formatter<'_>,
node: &NodeRef<marker::Immut<'a>, K, V>,
) -> core::fmt::Result
where
K: core::fmt::Debug,
V: core::fmt::Debug,
{
match node.force() {
ForceResult::Leaf(leaf) => f
.debug_struct("Leaf")
.field("value", &leaf.into_leaf().value)
.finish(),
ForceResult::Internal(internal) => f
.debug_struct("Internal")
.field("value", &internal.into_leaf().value)
.field_with("edges", |f| {
let mut list = f.debug_list();
for (key, edge) in internal.keys().iter().zip(internal.edges()) {
list.entry_with(|f| {
f.debug_set()
.entry(key)
.entry_with(|f| {
format_node_ref(
f,
&NodeRef {
node: *edge,
_marker: PhantomData,
},
)
})
.finish()
});
}
list.finish()
})
.finish(),
}
}
match self.root {
Some(ref root) => {
map.key(&"root").value_with(|f| {
let internal = root.reborrow();
format_node_ref(f, &internal)
});
map.finish()
}
None => map.finish(),
}
}
}
impl<K, V> Tree<K, V> {
pub fn new() -> Self {
Self {
root: None,
_marker: PhantomData,
}
}
}
#[derive(Debug)]
pub struct OnceAndIter<I, T>
where
I: Iterator<Item = T>,
{
once: Option<T>,
iter: I,
}
impl<I, T> Iterator for OnceAndIter<I, T>
where
I: Iterator<Item = T>,
{
type Item = T;
fn next(&mut self) -> Option<T> {
if let Some(once) = self.once.take() {
Some(once)
} else {
self.iter.next()
}
}
}
impl<I> From<I> for OnceAndIter<I, I::Item>
where
I: Iterator,
{
fn from(iter: I) -> Self {
Self { once: None, iter }
}
}
impl<I, T> OnceAndIter<I, T>
where
I: Iterator<Item = T>,
{
pub fn once(once: T, iter: I) -> Self {
Self {
once: Some(once),
iter,
}
}
}
impl<K, V> Tree<K, V>
where
K: Ord,
{
fn as_subtree_mut<'a>(&'a mut self) -> Option<subtree::Subtree<K, V, marker::Mut<'a>>> {
let root = self.root.as_mut()?.borrow_mut().dormant();
Some(subtree::Subtree {
root: unsafe { root.awaken() },
})
}
pub fn entry<'a, Q>(&'a mut self, key_seq: Q) -> entry::Entry<'a, OnceAndIter<Q, K>, K, V>
where
Q: Iterator<Item = K>,
{
use borrow::DormantMutRef;
use entry::{Entry::*, VacantEntry};
let (tree, dormant) = DormantMutRef::new(self);
let entry = match tree.as_subtree_mut() {
Some(mut subtree) => {
let entry = subtree.entry(key_seq);
// SAFETY: extending the lifetime is fine because we borrow the tree for 'a,
// and no references to the subtree are live after this.
// The same could be achieved using `dormant.reborrow()` a bunch
// of times while destructuring the entry.
unsafe {
core::mem::transmute::<
entry::Entry<'_, OnceAndIter<Q, K>, K, V>,
entry::Entry<'a, OnceAndIter<Q, K>, K, V>,
>(entry)
}
}
None => Vacant(VacantEntry {
key: key_seq.into(),
handle: HandleOrTree::Tree(dormant),
_marker: PhantomData,
}),
};
entry
}
}
unsafe fn slice_insert<T>(slice: &mut [MaybeUninit<T>], idx: usize, value: T) {
unsafe {
let len = slice.len();
debug_assert!(len > idx);
let slice_ptr = slice.as_mut_ptr();
if len > idx + 1 {
core::ptr::copy(slice_ptr.add(idx), slice_ptr.add(idx + 1), len - idx - 1);
}
(*slice_ptr.add(idx)).write(value);
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn promote_leaf() {
#[derive(Debug, PartialEq, Eq)]
struct Test(&'static str);
impl Drop for Test {
fn drop(&mut self) {
std::eprintln!("Dropping: {}", self.0);
}
}
let mut leaf = NodeRef::<_, (), Test>::new();
leaf.borrow_mut().as_leaf_mut().value = Some(Test("test"));
let mut root = NodeRef::new();
let mut leaf = leaf.reparent(unsafe { Handle::new_edge(root.borrow_mut(), 0) }, ());
assert_eq!(leaf.as_leaf_mut().value, Some(Test("test")));
}
}