zig-btree/src/main.zig
2023-03-18 03:26:36 +01:00

650 lines
20 KiB
Zig

const std = @import("std");
const BTree = struct {
const Self = @This();
const B: usize = 3;
const CAPACITY: usize = 2 * B - 1;
const NUM_EDGES: usize = 2 * B;
ally: std.mem.Allocator = std.heap.c_allocator,
root: ?NodeOrLeaf,
fn create(ally: std.mem.Allocator) Self {
return Self{
.ally = ally,
.root = null,
};
}
fn insert(self: *Self, value: u32) !void {
std.debug.print("attempting to insert {} into ", .{value});
self.dbg();
if (self.root) |*root| {
const search = root.find_key(value);
switch (search) {
.Leaf => |node| {
std.debug.print("key already present: {}", .{node});
return error.Occupied;
},
.Edge => |edge| {
const result = try edge.leaf.insert_value(value);
if (result) |split| {
std.debug.print("reparenting root\n", .{});
// create new node which will replace self.
const parent = try Node.create(self.ally);
parent.leaf.level = split.left.level + 1;
NodeOrLeaf.from_leaf(parent.as_leaf()).push_value(split.middle);
parent.insert_node(NodeOrLeaf.from_leaf(split.left));
parent.insert_node(NodeOrLeaf.from_leaf(split.right));
self.root = .{ .internal = parent };
}
},
}
} else {
var leaf: *Leaf = try self.ally.create(Leaf);
errdefer self.ally.destroy(leaf);
leaf.init(self.ally);
NodeOrLeaf.from_leaf(leaf).push_value(value);
self.root = NodeOrLeaf{ .leaf = leaf };
}
}
fn find_key(self: *Self, key: u32) ?u32 {
std.debug.print("attempting to find {}\n", .{key});
switch (self.root.?.find_key(key)) {
.Leaf => |leaf| {
return leaf.leaf.get_values()[leaf.idx];
},
else => {
return null;
},
}
}
fn dbg(self: *Self) void {
if (self.root) |root| {
root.dbg();
std.debug.print("\n", .{});
}
}
fn destroy(self: *Self) void {
if (self.root) |*root| {
root.destroy();
}
}
const NodeOrLeafTag = enum {
internal,
leaf,
};
const NodeOrLeaf = union(NodeOrLeafTag) {
internal: *Node,
leaf: *Leaf,
fn force(self: NodeOrLeaf) NodeOrLeaf {
return NodeOrLeaf.from_leaf(self.as_leaf());
}
fn destroy(self: NodeOrLeaf) void {
std.debug.print("destroying node\n", .{});
switch (self.force()) {
.internal => |node| {
node.destroy();
},
.leaf => |leaf| {
leaf.destroy();
},
}
}
fn as_leaf(self: NodeOrLeaf) *Leaf {
switch (self) {
.internal => |node| {
return node.as_leaf();
},
.leaf => |leaf| {
return leaf;
},
}
}
fn from_leaf(leaf: *Leaf) NodeOrLeaf {
if (leaf.level == 0) {
return .{ .leaf = leaf };
} else {
return .{ .internal = @ptrCast(*Node, leaf) };
}
}
fn split_at(self: NodeOrLeaf, value: u32) !Leaf.SplitResult {
const leaf = self.as_leaf();
var idx: u16 = 0;
for (leaf.get_values(), 0..) |v, i| {
idx = @intCast(u16, i);
if (v > value) {
break;
}
}
std.debug.assert(leaf.len == CAPACITY);
//std.debug.assert(idx > 0 and idx < CAPACITY - 1);
var new: *Leaf = undefined;
switch (self) {
.internal => |internal| {
const node = try Node.create(leaf.ally);
std.mem.copy(?NodeOrLeaf, &node.edges, internal.edges[B..]);
new = node.as_leaf();
},
.leaf => {
const node = try Leaf.create(leaf.ally);
new = node;
},
}
new.level = leaf.level;
new.len = B - 1;
std.mem.copy(u32, &new.values, leaf.values[B..]);
const middle = leaf.values[B - 1];
leaf.len = B - 1;
// take from right half
if (idx >= B) {
NodeOrLeaf.from_leaf(new).push_value(value);
} else {
NodeOrLeaf.from_leaf(leaf).push_value(value);
}
return .{ .left = leaf, .middle = middle, .right = new };
}
fn push_value(self: NodeOrLeaf, value: u32) void {
const leaf = self.as_leaf();
std.debug.assert(leaf.len < CAPACITY);
var n = leaf.len;
for (leaf.get_values(), 0..) |val, i| {
if (val >= value) {
n = @intCast(u16, i);
break;
}
}
std.debug.print("placing {} in {}/{}th position\n", .{ value, n, leaf.len });
var tmp = value;
for (leaf.get_values()[n..]) |*val| {
const t = val.*;
val.* = tmp;
tmp = t;
}
leaf.values[leaf.len] = tmp;
switch (self) {
.internal => |node| {
var tmp2: ?NodeOrLeaf = null;
for (node.get_edges()[n + 1 ..]) |*edge| {
const t = edge.*;
edge.* = tmp2;
tmp2 = t;
}
node.edges[leaf.len + 1] = tmp2;
},
else => {},
}
leaf.len = leaf.len + 1;
}
fn find_key(self: NodeOrLeaf, key: u32) Leaf.SearchResult {
var leaf = self.as_leaf();
while (true) {
const search = leaf.find_key(key);
switch (search) {
.Leaf => {
return search;
},
.Edge => |edge| {
const node = NodeOrLeaf.from_leaf(edge.leaf);
switch (node) {
.internal => |internal| {
if (internal.get_edges()[edge.idx]) |child| {
leaf = child.as_leaf();
// TODO: incredibly hacky I think..
// gotta figure out WHERE this would even happen..
leaf.parent = .{ .parent = internal, .idx = edge.idx };
continue;
}
},
.leaf => {},
}
return search;
},
}
}
}
fn dbg(self: NodeOrLeaf) void {
switch (self) {
.internal => |node| {
node.dbg();
},
.leaf => |node| {
node.dbg();
},
}
}
};
const Node = struct {
leaf: Leaf,
edges: [NUM_EDGES]?NodeOrLeaf = [_]?NodeOrLeaf{null} ** NUM_EDGES,
fn create(ally: std.mem.Allocator) !*Node {
var node = try ally.create(Node);
node.init(ally);
return node;
}
fn destroy(self: *Node) void {
for (self.get_edges()) |edge| {
if (edge) |*edg| {
edg.destroy();
}
}
self.leaf.ally.destroy(self);
}
fn init(self: *Node, ally: std.mem.Allocator) void {
self.* = Node{ .leaf = Leaf{ .ally = ally } };
}
fn as_leaf(self: *Node) *Leaf {
return &self.leaf;
}
fn insert_node(self: *Node, child: NodeOrLeaf) void {
const self_leaf = self.as_leaf();
const ls = child.as_leaf().get_values()[0];
var idx: u16 = self_leaf.len;
for (self_leaf.get_values(), 0..) |v, i| {
if (v > ls) {
idx = @intCast(u16, i);
break;
}
}
if (self.get_edges()[idx]) |edge| {
std.debug.print("edge already present:", .{});
child.dbg();
std.debug.print(" - ", .{});
edge.dbg();
std.debug.print("\n", .{});
} else {
child.as_leaf().parent = .{ .parent = self, .idx = idx };
self.get_edges()[idx] = child;
}
}
const InsertResultTag = enum {
Split,
RightFromParent,
};
const InsertResult = union(InsertResultTag) {
Split: Leaf.SplitResult,
RightFromParent: *Leaf,
};
fn insert_split(self: *Node, split: Leaf.SplitResult) !?Leaf.SplitResult {
std.debug.print("inserting split\n", .{});
const leaf = self.as_leaf();
const value = split.middle;
if (leaf.len < CAPACITY) {
std.debug.print("pushing value {} into ", .{value});
self.dbg();
std.debug.print("\n", .{});
NodeOrLeaf.from_leaf(leaf).push_value(value);
std.debug.print("insert_split_insert_node ", .{});
self.insert_node(NodeOrLeaf.from_leaf(split.right));
} else {
std.debug.print("splitting node ", .{});
self.dbg();
const parent_split = try leaf.split_at(value);
std.debug.print(" into [ ", .{});
split.left.dbg();
std.debug.print(", {}, ", .{split.middle});
split.right.dbg();
std.debug.print("]\n", .{});
std.debug.print("concatinating splits\n", .{});
const next_split = Leaf.SplitResult.concat(parent_split, split);
if (leaf.parent) |parent| {
std.debug.print("forwarding concat split\n", .{});
return parent.parent.insert_split(next_split);
} else {
return next_split;
}
}
return null;
}
fn get_edges(self: *Node) []?NodeOrLeaf {
const len = self.leaf.len + 1;
return self.edges[0..len];
}
fn dbg(self: *Node) void {
const values = self.leaf.get_values();
const edges = self.get_edges()[0..values.len];
std.debug.print("{{ ", .{});
std.debug.print("[{}] ", .{self.leaf.level});
for (values, edges) |v, e| {
if (e) |edge| {
edge.dbg();
std.debug.print(", ", .{});
}
std.debug.print("{}, ", .{v});
}
if (self.get_edges()[values.len]) |edge| {
edge.dbg();
}
std.debug.print(" }}", .{});
}
};
const ParentPtr = struct {
parent: *Node,
idx: u16,
fn into_node_or_leaf(self: ?ParentPtr) ?NodeOrLeaf {
if (self) |ptr| {
return .{ .node = ptr.parent };
} else {
return null;
}
}
};
const Leaf = struct {
ally: std.mem.Allocator,
level: usize = 0,
parent: ?ParentPtr = null,
len: u16 = 0,
values: [CAPACITY]u32 = undefined,
fn create(ally: std.mem.Allocator) !*Leaf {
var leaf = try ally.create(Leaf);
leaf.init(ally);
return leaf;
}
fn dbg(self: *Leaf) void {
const values = self.get_values();
std.debug.print("{any}", .{values});
}
fn init(self: *Leaf, ally: std.mem.Allocator) void {
self.* = Leaf{ .ally = ally };
}
fn destroy(self: *Leaf) void {
self.ally.destroy(self);
}
const SplitResult = struct {
// attached, old node that may be modified
left: *Leaf,
// lose value, previously attacked, must be inserted
// if we go all the way to the top, the root node must have some value and this is it
middle: u32,
// new, free floating leaf, must be attached
right: *Leaf,
fn dbg(self: SplitResult) void {
std.debug.print("[ ", .{});
self.left.dbg();
std.debug.print(", {}, ", .{self.middle});
self.right.dbg();
std.debug.print(" ]", .{});
}
fn dbg_verbose(self: SplitResult) void {
std.debug.print("[ ", .{});
NodeOrLeaf.from_leaf(self.left).dbg();
std.debug.print(", {}, ", .{self.middle});
NodeOrLeaf.from_leaf(self.right).dbg();
std.debug.print(" ]", .{});
}
fn concat(parent: SplitResult, child: SplitResult) SplitResult {
// safety @ptrCast(): we know parent left and right are nodes because
// they originated from childs parent
std.debug.print("concatinating ", .{});
parent.dbg();
std.debug.print(" and ", .{});
child.dbg();
std.debug.print("\n", .{});
// we only care about the childs middle and left, and since they are ordered we
// can learn about the right part from the middle part
if (child.middle < parent.middle) {
// I'm not sure if checking the childs right part is actually needed?
// I don't think so but can't think of a solid enough reason why
// child is entirely between two values of the parent, so any relation between
// the childs mid point and any of the parents values is true for any of
// the childs values, right?
std.debug.print("concatinate ", .{});
@ptrCast(*Node, parent.left).insert_node(NodeOrLeaf.from_leaf(child.right));
}
// since they cant be equal, this must mean child is bigger than parent
else {
std.debug.print("concatinate {} {} ", .{ child.middle, parent.middle });
@ptrCast(*Node, parent.right).insert_node(NodeOrLeaf.from_leaf(child.right));
}
std.debug.print("concatinating into ", .{});
parent.dbg_verbose();
std.debug.print("\n", .{});
return parent;
}
};
fn split_at(self: *Leaf, value: u32) !SplitResult {
return NodeOrLeaf.from_leaf(self).split_at(value);
}
const SearchResultTag = enum { Edge, Leaf };
const SearchResult = union(SearchResultTag) {
Edge: struct { leaf: *Leaf, idx: u16 },
Leaf: struct { leaf: *Leaf, idx: u16 },
};
fn find_key(self: *Leaf, key: u32) SearchResult {
std.debug.print("looking for {} in {any}\n", .{ key, self.get_values() });
for (self.get_values(), 0..) |v, i| {
if (key < v) {
std.debug.print("decending left of {}\n", .{v});
return .{ .Edge = .{ .leaf = self, .idx = @intCast(u16, i) } };
} else if (key == v) {
std.debug.print("located {} at {}\n", .{ key, v });
return .{ .Leaf = .{ .leaf = self, .idx = @intCast(u16, i) } };
}
}
std.debug.print("decending right of {}\n", .{self.get_values()[self.len - 1]});
return .{ .Edge = .{ .leaf = self, .idx = self.len } };
}
// returns null on success, or a split result which could not be merged
// up because we are at the root node
fn insert_value(self: *Leaf, value: u32) !?SplitResult {
const leaf = self;
if (leaf.len < CAPACITY) {
std.debug.print("pushing value {} into ", .{value});
self.dbg();
std.debug.print("\n", .{});
NodeOrLeaf.from_leaf(leaf).push_value(value);
} else {
std.debug.print("splitting node ", .{});
self.dbg();
var split = try leaf.split_at(value);
std.debug.print(" into [ ", .{});
split.left.dbg();
std.debug.print(", {}, ", .{split.middle});
split.right.dbg();
std.debug.print("]\n", .{});
if (leaf.parent) |parent| {
return parent.parent.insert_split(split);
} else {
return split;
}
}
return null;
}
fn get_values(self: *Leaf) []u32 {
const len = self.len;
return self.values[0..len];
}
};
};
pub fn main() !void {
// Prints to stderr (it's a shortcut based on `std.io.getStdErr()`)
std.debug.print("All your {s} are belong to us.\n", .{"codebase"});
// stdout is for the actual output of your application, for example if you
// are implementing gzip, then only the compressed bytes should be sent to
// stdout, not any debugging messages.
const stdout_file = std.io.getStdOut().writer();
var bw = std.io.bufferedWriter(stdout_file);
const stdout = bw.writer();
try stdout.print("Run `zig build test` to run the tests.\n", .{});
try bw.flush(); // don't forget to flush!
}
test "btree leaf" {
std.testing.refAllDeclsRecursive(BTree);
std.testing.refAllDeclsRecursive(BTree.Leaf);
var leaf = BTree.Leaf{ .ally = std.testing.allocator, .parent = null, .len = 2, .values = [_]u32{ 5, 6, undefined, undefined, undefined } };
const values = leaf.get_values();
std.debug.print("{?}\n", .{leaf});
std.debug.print("{any}\n", .{values});
}
fn printValues(leaf: *BTree.Leaf) void {
const values = leaf.get_values();
std.debug.print("{any}\n", .{values});
}
// test "leaf split" {
// std.debug.print("testing splitting\n", .{});
// var tree = BTree.create(std.testing.allocator);
// defer tree.destroy();
// try tree.insert(2);
// try tree.insert(4);
// try tree.insert(6);
// try tree.insert(3);
// try tree.insert(7);
// std.debug.print("before split:", .{});
// printValues(tree.root.?.as_leaf());
// const split = try tree.root.?.as_leaf().split_at(5);
// std.debug.print("after split:", .{});
// printValues(tree.root.?.as_leaf());
// std.debug.print("split: {?}\n", .{split});
// tree.ally.destroy(split.right);
// }
// test "btree insert" {
// std.debug.print("testing insertion\n", .{});
// var tree = BTree.create(std.testing.allocator);
// defer tree.destroy();
// try tree.insert(10);
// try tree.insert(4);
// try tree.insert(6);
// try tree.insert(3);
// try tree.insert(9);
// try tree.insert(8);
// tree.dbg();
// }
test "btree seq insert" {
std.debug.print("sequential insertions\n", .{});
var tree = BTree.create(std.testing.allocator);
defer tree.destroy();
for (0..100) |i| {
tree.insert(@intCast(u32, i)) catch {
std.debug.print("{} already present - ignoring\n", .{i});
};
}
tree.dbg();
}
test "btree rand insert" {
std.debug.print("random insertions\n", .{});
var tree = BTree.create(std.testing.allocator);
defer tree.destroy();
var buf = std.ArrayList(u32).init(std.testing.allocator);
defer buf.deinit();
var rng = std.rand.DefaultPrng.init(0);
for (0..1000) |_| {
const i = rng.random().intRangeAtMost(u32, 0, 512);
try buf.append(i);
// const i = rng.random().int(u32);
tree.insert(i) catch {
std.debug.print("{} already present - ignoring\n", .{i});
};
}
for (buf.items) |i| {
if (tree.find_key(i)) |_| {} else {
std.debug.print("{} lost\n", .{i});
}
}
tree.dbg();
}