From d3670bcf697c6575803a4c61d3a4ae88d79c23d2 Mon Sep 17 00:00:00 2001 From: Janis Date: Mon, 20 Mar 2023 00:17:26 +0100 Subject: [PATCH] optional type and simple rebalancing --- src/main.zig | 424 ++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 418 insertions(+), 6 deletions(-) diff --git a/src/main.zig b/src/main.zig index 7916e06..3fb454c 100644 --- a/src/main.zig +++ b/src/main.zig @@ -1,5 +1,143 @@ const std = @import("std"); +const Optional = struct { + const OptionEnum = enum { + Some, + None, + }; + + pub fn into_option(value: anytype) Option(@typeInfo(@TypeOf(value)).Optional.child) { + return Option(@typeInfo(@TypeOf(value)).Optional.child).from_optional(value); + } + + pub fn Option(comptime T: type) type { + return union(OptionEnum) { + Some: T, + None, + + pub inline fn type_() type { + return T; + } + + pub inline fn from_optional(value: ?T) Self { + if (value) |v| { + return Self.some(v); + } else { + return Self.none(); + } + } + + pub fn into_optional_t(self: Self) ?T { + return switch (self) { + .Some => |value| value, + .None => null, + }; + } + + pub inline fn some(value: T) Self { + return .{ .Some = value }; + } + + pub inline fn none() Self { + return Option(T).None; + } + + const Self = @This(); + + pub fn is_some(self: Self) bool { + return switch (self) { + .Some => true, + .None => false, + }; + } + + pub fn is_none(self: Self) bool { + return !self.is_some(); + } + + pub fn is_unwrap(self: Self) T { + return self.Some; + } + + pub fn map(self: Self, comptime U: type, func: fn (T) U) Option(U) { + return switch (self) { + .Some => |value| Option(U).some(func(value)), + .None => Option(U).none(), + }; + } + + pub fn or_option(self: Self, other: Self) Self { + return switch (self) { + .Some => self, + .None => other, + }; + } + + pub fn map_with(self: Self, with: anytype) Option(@typeInfo(@TypeOf(@TypeOf(with).call)).Fn.return_type.?) { + const Return = Option(@typeInfo(@TypeOf(@TypeOf(with).call)).Fn.return_type.?); + return switch (self) { + .Some => |value| Return.some(with.call(value)), + .None => Return.none(), + }; + } + + pub fn join(self: Self, comptime U: type, func: fn (T) Option(U)) Option(U) { + return switch (self) { + .Some => |value| func(value), + .None => Option(U).none(), + }; + } + + pub fn join_with(self: Self, with: anytype) Option(@typeInfo(@TypeOf(@TypeOf(with).call)).Fn.return_type.?.type_()) { + const Return = Option(@typeInfo(@TypeOf(@TypeOf(with).call)).Fn.return_type.?.type_()); + return switch (self) { + .Some => |value| with.call(value), + .None => Return.none(), + }; + } + }; + } +}; + +fn double(v: u32) u32 { + return v * 2; +} + +fn option_test() void { + const Option = Optional.Option; + const opt = Option(u32).some(5); + const b = opt.map(u32, double); + + std.debug.print("{}\n", .{b}); + + const c = b.map_with(struct { + val: i32, + + fn call(self: @This(), v: u32) i32 { + return @intCast(i32, v) + self.val; + } + }{ .val = 3 }); + + std.debug.print("{}\n", .{c}); + + const d1: ?u32 = 4; + const d = Optional.into_option(d1).map_with(struct { + val: i32, + + fn call(self: @This(), v: u32) i32 { + return @intCast(i32, v) + self.val; + } + }{ .val = 10 }); + std.debug.print("{}\n", .{d}); + + std.debug.assert(b.Some == 10); +} + +test "option" { + std.debug.print("\n", .{}); + option_test(); +} + const BTree = struct { const Self = @This(); @@ -206,6 +344,66 @@ const BTree = struct { leaf.len = leaf.len + 1; } + fn remove_key_from_node(self: NodeOrLeaf, key: u32) void { + switch (self.force()) { + .internal => |node| { + //const Option = Optional.Option; + // need to check if the edges to the left and right of the to-be-removed key exist, + // and if both exist, find the immediate neighbors of the key and promote + // the one in the bigger leaf up as a replacement. + + // this node will not require rebalancement in that case, but the leaf + // from which we removed the replacement key might. + + // safety: we already know the key is in this exact node. + const index = node.leaf.find_key(key).Leaf.idx; + + const left = Optional.into_option(node.get_edge(index)); + const right = Optional.into_option(node.get_edge(index + 1)); + + if (right.is_some() and left.is_some()) { + const right_value = right.unwrap().find_least_significant_key(); + const left_value = left.unwrap().find_most_significant_key(); + + // pick the leaf with more values to reduce unneccesairy rebalances + const myleaf = if (right_value.leaf.len > left_value.leaf.len) { + // use right leaf + right_value; + } else { + // use left leaf + left_value; + }; + + const promoted = myleaf.get_values()[myleaf.idx]; + myleaf.remove_key(promoted); + self.leaf.values[index] = promoted; + + // TODO: check `myleaf` for rebalance + } else { + // no adjacent edges + // (I don't think this can actually happen in a proper rtree) + + // no need to promote any value from an adjecent subtree because + // there is no subtree on one side. + + // instead we need to check for rebalance of self here TODO + + node.shift_edges_left(index, 1); + } + }, + .leaf => |node| { + // we can just remove the key and then check afterwards if the + // tree needs to be rebalanced + + node.remove_key(key); + + if (self.as_leaf().needs_rebalance()) { + std.debug.print("---------- require rebalance! -----------\n", .{}); + } + }, + } + } + fn find_key(self: NodeOrLeaf, key: u32) Leaf.SearchResult { var leaf = self.as_leaf(); while (true) { @@ -237,7 +435,7 @@ const BTree = struct { } } - fn find_most_significant_key(self: NodeOrLeaf) Leaf.SearchResult { + fn find_most_significant_key(self: NodeOrLeaf) struct { leaf: *Leaf, idx: u16 } { var node = self; while (true) { switch (node.force()) { @@ -248,13 +446,13 @@ const BTree = struct { } }, .leaf => |leaf| { - return .{ .Leaf = .{ .leaf = leaf, .idx = leaf.len - 1 } }; + return .{ .leaf = leaf, .idx = leaf.len - 1 }; }, } } } - fn find_least_significant_key(self: NodeOrLeaf) Leaf.SearchResult { + fn find_least_significant_key(self: NodeOrLeaf) struct { leaf: *Leaf, idx: u16 } { var node = self; while (true) { switch (node.force()) { @@ -265,7 +463,7 @@ const BTree = struct { } }, .leaf => |leaf| { - return .{ .Leaf = .{ .leaf = leaf, .idx = 0 } }; + return .{ .leaf = leaf, .idx = 0 }; }, } } @@ -402,6 +600,35 @@ const BTree = struct { return self.edges[0..len]; } + fn get_edge(self: *Node, i: u16) ?NodeOrLeaf { + if (i <= self.leaf.len) { + const edge = self.get_edges()[i]; + if (edge) |node| { + return node.force(); + } + } + return null; + } + + fn remove_edge(self: *Node, index: u16) ?NodeOrLeaf { + const edge = self.get_edge(index); + self.shift_edges_left(index + 1, 1); + + return edge; + } + + fn shift_edges_left(self: *Node, start: u16, count: u16) void { + std.debug.assert(start >= count); + if (start <= self.leaf.len) { + for (self.get_edges()[start..], start..) |edg, i| { + if (edg) |edge| { + edge.as_leaf().parent = .{ .parent = self, .idx = @intCast(u16, i) }; + self.edges[i - count] = edge; + } + } + } + } + fn dbg(self: *Node) void { const values = self.leaf.get_values(); const edges = self.get_edges()[0..values.len]; @@ -528,9 +755,144 @@ const BTree = struct { Leaf: struct { leaf: *Leaf, idx: u16 }, }; + fn needs_rebalance(self: *Leaf) bool { + return self.len < MIN_AFTER_SPLIT; + } + + fn rebalance(self: *Leaf) void { + if (self.needs_rebalance()) { + if (self.parent) |parent| { + const left_sib = Optional.into_option(parent.parent.get_edge(parent.idx - 1)); + const right_sib = Optional.into_option(parent.parent.get_edge(parent.idx + 1)); + + const Side = enum { Left, Right }; + + const sibling: struct { node: NodeOrLeaf, side: Side } = blk: { + switch (left_sib) { + .Some => |left| { + switch (right_sib) { + .Some => |right| if (left.as_leaf().len > right.as_leaf().len) { + break :blk .{ .node = left, .side = .Left }; + } else { + break :blk .{ .node = right, .side = .Right }; + }, + .None => { + break :blk .{ .node = left, .side = .Left }; + }, + } + }, + .None => { + switch (right_sib) { + .Some => |right| { + break :blk .{ .node = right, .side = .Right }; + }, + .None => { + // unreachable + }, + } + }, + } + }; + + const offset = @intCast(i32, sibling.node.as_leaf().parent.?.idx) - @intCast(i32, parent.idx); + + if (sibling.node.as_leaf().len > MIN_AFTER_SPLIT) { + // rotate + const index = @intCast(usize, parent.idx + offset); + + NodeOrLeaf.from_leaf(self).push_value(parent.parent.leaf.values[index]); + parent.parent.leaf.values[index] = blk: { + if (offset < 0) { + const value = sibling.node.get_most_significant_value(); + sibling.node.as_leaf().remove_key(value); + break :blk value; + } else { + const value = sibling.node.get_least_significant_value(); + sibling.node.as_leaf().remove_key(value); + break :blk value; + } + }; + } else { + // merge + const left = switch (sibling.side) { + .Left => sibling.node.as_leaf(), + .Right => self, + }; + const right = switch (sibling.side) { + .Right => sibling.node.as_leaf(), + .Left => self, + }; + + defer right.destroy(); + + // merge parent seperator into left + const key = parent.parent.leaf.get_value(left.parent.?.idx); + parent.parent.leaf.remove_key(key); + NodeOrLeaf.from_leaf(left).push_value(key); + + // remove superfluous parent edge to right + _ = parent.parent.remove_edge(right.parent.?.idx); + + // copy values from right to left + std.mem.copy(u32, left.values[left.len..], right.values[0..right.len]); + left.len = left.len + right.len; + + parent.parent.as_leaf().rebalance(); + } + } else { + // root node: if len is 0, just copy paste from child node if exists + if (self.len == 0) { + switch (NodeOrLeaf.from_leaf(self)) { + .internal => |root| { + if (root.edges[0]) |edge| { + defer edge.destroy(); + // copy values + root.as_leaf().len = edge.as_leaf().len; + root.as_leaf().level = edge.as_leaf().level; + std.mem.copy(u32, root.leaf.values[0..], edge.as_leaf().values[0..]); + + switch (edge.force()) { + .internal => |child| { + std.mem.copy(?NodeOrLeaf, root.edges[0..], child.edges[0..]); + + for (root.get_edges(), 0..) |e, i| { + if (e) |ee| { + ee.as_leaf().parent = .{ .parent = root, .idx = @intCast(u16, i) }; + } + } + }, + .leaf => { + // idk. just empty? + for (&root.edges) |*e| { + e.* = null; + } + }, + } + } + }, + .leaf => { + // idk. just empty? + }, + } + } + } + } + } + fn remove_key(self: *Leaf, key: u32) void { - _ = self; - _ = key; + // safety: key must be contained by .values + var n: u16 = 0; + for (self.get_values(), 0..) |val, i| { + if (val >= key) { + n = @intCast(u16, i); + break; + } + } + + for (self.get_values()[n + 1 .. self.len], n..) |val, j| { + self.values[j] = val; + } + self.len = self.len - 1; } fn find_key(self: *Leaf, key: u32) SearchResult { @@ -572,6 +934,10 @@ const BTree = struct { const len = self.len; return self.values[0..len]; } + + fn get_value(self: *Leaf, idx: u16) u32 { + return self.get_values()[idx]; + } }; }; @@ -673,6 +1039,52 @@ test "btree rand insert" { tree.dbg(); } +test "ref removal" { + std.testing.refAllDeclsRecursive(BTree.Leaf); + std.testing.refAllDeclsRecursive(BTree.NodeOrLeaf); +} + +test "btree rebalance" { + std.debug.print("rebalance simple leaves\n", .{}); + + var tree = BTree.create(std.testing.allocator); + defer tree.destroy(); + + for (0..9) |i| { + _ = try tree.insert(@intCast(u32, i)); + } + + tree.dbg(); + std.debug.print("\n", .{}); + + { + const node = tree.root.?.internal.edges[1].?; + node.as_leaf().len = node.as_leaf().len - 1; + node.as_leaf().rebalance(); + + tree.dbg(); + std.debug.print("\n", .{}); + } + + { + const node = tree.root.?.internal.edges[1].?; + node.as_leaf().len = node.as_leaf().len - 1; + node.as_leaf().rebalance(); + + tree.dbg(); + std.debug.print("\n", .{}); + } + + { + const node = tree.root.?.internal.edges[1].?; + node.as_leaf().len = node.as_leaf().len - 1; + node.as_leaf().rebalance(); + + tree.dbg(); + std.debug.print("\n", .{}); + } +} + test "btree least most sig value" { std.debug.print("least/most significant values\n", .{});