optional type and simple rebalancing

This commit is contained in:
Janis 2023-03-20 00:17:26 +01:00
parent 482f01ed5e
commit d3670bcf69

View file

@ -1,5 +1,143 @@
const std = @import("std");
const Optional = struct {
const OptionEnum = enum {
Some,
None,
};
pub fn into_option(value: anytype) Option(@typeInfo(@TypeOf(value)).Optional.child) {
return Option(@typeInfo(@TypeOf(value)).Optional.child).from_optional(value);
}
pub fn Option(comptime T: type) type {
return union(OptionEnum) {
Some: T,
None,
pub inline fn type_() type {
return T;
}
pub inline fn from_optional(value: ?T) Self {
if (value) |v| {
return Self.some(v);
} else {
return Self.none();
}
}
pub fn into_optional_t(self: Self) ?T {
return switch (self) {
.Some => |value| value,
.None => null,
};
}
pub inline fn some(value: T) Self {
return .{ .Some = value };
}
pub inline fn none() Self {
return Option(T).None;
}
const Self = @This();
pub fn is_some(self: Self) bool {
return switch (self) {
.Some => true,
.None => false,
};
}
pub fn is_none(self: Self) bool {
return !self.is_some();
}
pub fn is_unwrap(self: Self) T {
return self.Some;
}
pub fn map(self: Self, comptime U: type, func: fn (T) U) Option(U) {
return switch (self) {
.Some => |value| Option(U).some(func(value)),
.None => Option(U).none(),
};
}
pub fn or_option(self: Self, other: Self) Self {
return switch (self) {
.Some => self,
.None => other,
};
}
pub fn map_with(self: Self, with: anytype) Option(@typeInfo(@TypeOf(@TypeOf(with).call)).Fn.return_type.?) {
const Return = Option(@typeInfo(@TypeOf(@TypeOf(with).call)).Fn.return_type.?);
return switch (self) {
.Some => |value| Return.some(with.call(value)),
.None => Return.none(),
};
}
pub fn join(self: Self, comptime U: type, func: fn (T) Option(U)) Option(U) {
return switch (self) {
.Some => |value| func(value),
.None => Option(U).none(),
};
}
pub fn join_with(self: Self, with: anytype) Option(@typeInfo(@TypeOf(@TypeOf(with).call)).Fn.return_type.?.type_()) {
const Return = Option(@typeInfo(@TypeOf(@TypeOf(with).call)).Fn.return_type.?.type_());
return switch (self) {
.Some => |value| with.call(value),
.None => Return.none(),
};
}
};
}
};
fn double(v: u32) u32 {
return v * 2;
}
fn option_test() void {
const Option = Optional.Option;
const opt = Option(u32).some(5);
const b = opt.map(u32, double);
std.debug.print("{}\n", .{b});
const c = b.map_with(struct {
val: i32,
fn call(self: @This(), v: u32) i32 {
return @intCast(i32, v) + self.val;
}
}{ .val = 3 });
std.debug.print("{}\n", .{c});
const d1: ?u32 = 4;
const d = Optional.into_option(d1).map_with(struct {
val: i32,
fn call(self: @This(), v: u32) i32 {
return @intCast(i32, v) + self.val;
}
}{ .val = 10 });
std.debug.print("{}\n", .{d});
std.debug.assert(b.Some == 10);
}
test "option" {
std.debug.print("\n", .{});
option_test();
}
const BTree = struct {
const Self = @This();
@ -206,6 +344,66 @@ const BTree = struct {
leaf.len = leaf.len + 1;
}
fn remove_key_from_node(self: NodeOrLeaf, key: u32) void {
switch (self.force()) {
.internal => |node| {
//const Option = Optional.Option;
// need to check if the edges to the left and right of the to-be-removed key exist,
// and if both exist, find the immediate neighbors of the key and promote
// the one in the bigger leaf up as a replacement.
// this node will not require rebalancement in that case, but the leaf
// from which we removed the replacement key might.
// safety: we already know the key is in this exact node.
const index = node.leaf.find_key(key).Leaf.idx;
const left = Optional.into_option(node.get_edge(index));
const right = Optional.into_option(node.get_edge(index + 1));
if (right.is_some() and left.is_some()) {
const right_value = right.unwrap().find_least_significant_key();
const left_value = left.unwrap().find_most_significant_key();
// pick the leaf with more values to reduce unneccesairy rebalances
const myleaf = if (right_value.leaf.len > left_value.leaf.len) {
// use right leaf
right_value;
} else {
// use left leaf
left_value;
};
const promoted = myleaf.get_values()[myleaf.idx];
myleaf.remove_key(promoted);
self.leaf.values[index] = promoted;
// TODO: check `myleaf` for rebalance
} else {
// no adjacent edges
// (I don't think this can actually happen in a proper rtree)
// no need to promote any value from an adjecent subtree because
// there is no subtree on one side.
// instead we need to check for rebalance of self here TODO
node.shift_edges_left(index, 1);
}
},
.leaf => |node| {
// we can just remove the key and then check afterwards if the
// tree needs to be rebalanced
node.remove_key(key);
if (self.as_leaf().needs_rebalance()) {
std.debug.print("---------- require rebalance! -----------\n", .{});
}
},
}
}
fn find_key(self: NodeOrLeaf, key: u32) Leaf.SearchResult {
var leaf = self.as_leaf();
while (true) {
@ -237,7 +435,7 @@ const BTree = struct {
}
}
fn find_most_significant_key(self: NodeOrLeaf) Leaf.SearchResult {
fn find_most_significant_key(self: NodeOrLeaf) struct { leaf: *Leaf, idx: u16 } {
var node = self;
while (true) {
switch (node.force()) {
@ -248,13 +446,13 @@ const BTree = struct {
}
},
.leaf => |leaf| {
return .{ .Leaf = .{ .leaf = leaf, .idx = leaf.len - 1 } };
return .{ .leaf = leaf, .idx = leaf.len - 1 };
},
}
}
}
fn find_least_significant_key(self: NodeOrLeaf) Leaf.SearchResult {
fn find_least_significant_key(self: NodeOrLeaf) struct { leaf: *Leaf, idx: u16 } {
var node = self;
while (true) {
switch (node.force()) {
@ -265,7 +463,7 @@ const BTree = struct {
}
},
.leaf => |leaf| {
return .{ .Leaf = .{ .leaf = leaf, .idx = 0 } };
return .{ .leaf = leaf, .idx = 0 };
},
}
}
@ -402,6 +600,35 @@ const BTree = struct {
return self.edges[0..len];
}
fn get_edge(self: *Node, i: u16) ?NodeOrLeaf {
if (i <= self.leaf.len) {
const edge = self.get_edges()[i];
if (edge) |node| {
return node.force();
}
}
return null;
}
fn remove_edge(self: *Node, index: u16) ?NodeOrLeaf {
const edge = self.get_edge(index);
self.shift_edges_left(index + 1, 1);
return edge;
}
fn shift_edges_left(self: *Node, start: u16, count: u16) void {
std.debug.assert(start >= count);
if (start <= self.leaf.len) {
for (self.get_edges()[start..], start..) |edg, i| {
if (edg) |edge| {
edge.as_leaf().parent = .{ .parent = self, .idx = @intCast(u16, i) };
self.edges[i - count] = edge;
}
}
}
}
fn dbg(self: *Node) void {
const values = self.leaf.get_values();
const edges = self.get_edges()[0..values.len];
@ -528,9 +755,144 @@ const BTree = struct {
Leaf: struct { leaf: *Leaf, idx: u16 },
};
fn needs_rebalance(self: *Leaf) bool {
return self.len < MIN_AFTER_SPLIT;
}
fn rebalance(self: *Leaf) void {
if (self.needs_rebalance()) {
if (self.parent) |parent| {
const left_sib = Optional.into_option(parent.parent.get_edge(parent.idx - 1));
const right_sib = Optional.into_option(parent.parent.get_edge(parent.idx + 1));
const Side = enum { Left, Right };
const sibling: struct { node: NodeOrLeaf, side: Side } = blk: {
switch (left_sib) {
.Some => |left| {
switch (right_sib) {
.Some => |right| if (left.as_leaf().len > right.as_leaf().len) {
break :blk .{ .node = left, .side = .Left };
} else {
break :blk .{ .node = right, .side = .Right };
},
.None => {
break :blk .{ .node = left, .side = .Left };
},
}
},
.None => {
switch (right_sib) {
.Some => |right| {
break :blk .{ .node = right, .side = .Right };
},
.None => {
// unreachable
},
}
},
}
};
const offset = @intCast(i32, sibling.node.as_leaf().parent.?.idx) - @intCast(i32, parent.idx);
if (sibling.node.as_leaf().len > MIN_AFTER_SPLIT) {
// rotate
const index = @intCast(usize, parent.idx + offset);
NodeOrLeaf.from_leaf(self).push_value(parent.parent.leaf.values[index]);
parent.parent.leaf.values[index] = blk: {
if (offset < 0) {
const value = sibling.node.get_most_significant_value();
sibling.node.as_leaf().remove_key(value);
break :blk value;
} else {
const value = sibling.node.get_least_significant_value();
sibling.node.as_leaf().remove_key(value);
break :blk value;
}
};
} else {
// merge
const left = switch (sibling.side) {
.Left => sibling.node.as_leaf(),
.Right => self,
};
const right = switch (sibling.side) {
.Right => sibling.node.as_leaf(),
.Left => self,
};
defer right.destroy();
// merge parent seperator into left
const key = parent.parent.leaf.get_value(left.parent.?.idx);
parent.parent.leaf.remove_key(key);
NodeOrLeaf.from_leaf(left).push_value(key);
// remove superfluous parent edge to right
_ = parent.parent.remove_edge(right.parent.?.idx);
// copy values from right to left
std.mem.copy(u32, left.values[left.len..], right.values[0..right.len]);
left.len = left.len + right.len;
parent.parent.as_leaf().rebalance();
}
} else {
// root node: if len is 0, just copy paste from child node if exists
if (self.len == 0) {
switch (NodeOrLeaf.from_leaf(self)) {
.internal => |root| {
if (root.edges[0]) |edge| {
defer edge.destroy();
// copy values
root.as_leaf().len = edge.as_leaf().len;
root.as_leaf().level = edge.as_leaf().level;
std.mem.copy(u32, root.leaf.values[0..], edge.as_leaf().values[0..]);
switch (edge.force()) {
.internal => |child| {
std.mem.copy(?NodeOrLeaf, root.edges[0..], child.edges[0..]);
for (root.get_edges(), 0..) |e, i| {
if (e) |ee| {
ee.as_leaf().parent = .{ .parent = root, .idx = @intCast(u16, i) };
}
}
},
.leaf => {
// idk. just empty?
for (&root.edges) |*e| {
e.* = null;
}
},
}
}
},
.leaf => {
// idk. just empty?
},
}
}
}
}
}
fn remove_key(self: *Leaf, key: u32) void {
_ = self;
_ = key;
// safety: key must be contained by .values
var n: u16 = 0;
for (self.get_values(), 0..) |val, i| {
if (val >= key) {
n = @intCast(u16, i);
break;
}
}
for (self.get_values()[n + 1 .. self.len], n..) |val, j| {
self.values[j] = val;
}
self.len = self.len - 1;
}
fn find_key(self: *Leaf, key: u32) SearchResult {
@ -572,6 +934,10 @@ const BTree = struct {
const len = self.len;
return self.values[0..len];
}
fn get_value(self: *Leaf, idx: u16) u32 {
return self.get_values()[idx];
}
};
};
@ -673,6 +1039,52 @@ test "btree rand insert" {
tree.dbg();
}
test "ref removal" {
std.testing.refAllDeclsRecursive(BTree.Leaf);
std.testing.refAllDeclsRecursive(BTree.NodeOrLeaf);
}
test "btree rebalance" {
std.debug.print("rebalance simple leaves\n", .{});
var tree = BTree.create(std.testing.allocator);
defer tree.destroy();
for (0..9) |i| {
_ = try tree.insert(@intCast(u32, i));
}
tree.dbg();
std.debug.print("\n", .{});
{
const node = tree.root.?.internal.edges[1].?;
node.as_leaf().len = node.as_leaf().len - 1;
node.as_leaf().rebalance();
tree.dbg();
std.debug.print("\n", .{});
}
{
const node = tree.root.?.internal.edges[1].?;
node.as_leaf().len = node.as_leaf().len - 1;
node.as_leaf().rebalance();
tree.dbg();
std.debug.print("\n", .{});
}
{
const node = tree.root.?.internal.edges[1].?;
node.as_leaf().len = node.as_leaf().len - 1;
node.as_leaf().rebalance();
tree.dbg();
std.debug.print("\n", .{});
}
}
test "btree least most sig value" {
std.debug.print("least/most significant values\n", .{});