650 lines
20 KiB
Zig
650 lines
20 KiB
Zig
const std = @import("std");
|
|
|
|
const BTree = struct {
|
|
const Self = @This();
|
|
|
|
const B: usize = 3;
|
|
const CAPACITY: usize = 2 * B - 1;
|
|
const NUM_EDGES: usize = 2 * B;
|
|
|
|
ally: std.mem.Allocator = std.heap.c_allocator,
|
|
root: ?NodeOrLeaf,
|
|
|
|
fn create(ally: std.mem.Allocator) Self {
|
|
return Self{
|
|
.ally = ally,
|
|
.root = null,
|
|
};
|
|
}
|
|
|
|
fn insert(self: *Self, value: u32) !void {
|
|
std.debug.print("attempting to insert {} into ", .{value});
|
|
self.dbg();
|
|
|
|
if (self.root) |*root| {
|
|
const search = root.find_key(value);
|
|
|
|
switch (search) {
|
|
.Leaf => |node| {
|
|
std.debug.print("key already present: {}", .{node});
|
|
return error.Occupied;
|
|
},
|
|
.Edge => |edge| {
|
|
const result = try edge.leaf.insert_value(value);
|
|
if (result) |split| {
|
|
std.debug.print("reparenting root\n", .{});
|
|
// create new node which will replace self.
|
|
const parent = try Node.create(self.ally);
|
|
parent.leaf.level = split.left.level + 1;
|
|
NodeOrLeaf.from_leaf(parent.as_leaf()).push_value(split.middle);
|
|
parent.insert_node(NodeOrLeaf.from_leaf(split.left));
|
|
parent.insert_node(NodeOrLeaf.from_leaf(split.right));
|
|
|
|
self.root = .{ .internal = parent };
|
|
}
|
|
},
|
|
}
|
|
} else {
|
|
var leaf: *Leaf = try self.ally.create(Leaf);
|
|
errdefer self.ally.destroy(leaf);
|
|
leaf.init(self.ally);
|
|
NodeOrLeaf.from_leaf(leaf).push_value(value);
|
|
self.root = NodeOrLeaf{ .leaf = leaf };
|
|
}
|
|
}
|
|
|
|
fn find_key(self: *Self, key: u32) ?u32 {
|
|
std.debug.print("attempting to find {}\n", .{key});
|
|
|
|
switch (self.root.?.find_key(key)) {
|
|
.Leaf => |leaf| {
|
|
return leaf.leaf.get_values()[leaf.idx];
|
|
},
|
|
else => {
|
|
return null;
|
|
},
|
|
}
|
|
}
|
|
|
|
fn dbg(self: *Self) void {
|
|
if (self.root) |root| {
|
|
root.dbg();
|
|
std.debug.print("\n", .{});
|
|
}
|
|
}
|
|
|
|
fn destroy(self: *Self) void {
|
|
if (self.root) |*root| {
|
|
root.destroy();
|
|
}
|
|
}
|
|
|
|
const NodeOrLeafTag = enum {
|
|
internal,
|
|
leaf,
|
|
};
|
|
|
|
const NodeOrLeaf = union(NodeOrLeafTag) {
|
|
internal: *Node,
|
|
leaf: *Leaf,
|
|
|
|
fn force(self: NodeOrLeaf) NodeOrLeaf {
|
|
return NodeOrLeaf.from_leaf(self.as_leaf());
|
|
}
|
|
|
|
fn destroy(self: NodeOrLeaf) void {
|
|
std.debug.print("destroying node\n", .{});
|
|
switch (self.force()) {
|
|
.internal => |node| {
|
|
node.destroy();
|
|
},
|
|
.leaf => |leaf| {
|
|
leaf.destroy();
|
|
},
|
|
}
|
|
}
|
|
|
|
fn as_leaf(self: NodeOrLeaf) *Leaf {
|
|
switch (self) {
|
|
.internal => |node| {
|
|
return node.as_leaf();
|
|
},
|
|
.leaf => |leaf| {
|
|
return leaf;
|
|
},
|
|
}
|
|
}
|
|
|
|
fn from_leaf(leaf: *Leaf) NodeOrLeaf {
|
|
if (leaf.level == 0) {
|
|
return .{ .leaf = leaf };
|
|
} else {
|
|
return .{ .internal = @ptrCast(*Node, leaf) };
|
|
}
|
|
}
|
|
|
|
fn split_at(self: NodeOrLeaf, value: u32) !Leaf.SplitResult {
|
|
const leaf = self.as_leaf();
|
|
|
|
var idx: u16 = 0;
|
|
for (leaf.get_values(), 0..) |v, i| {
|
|
idx = @intCast(u16, i);
|
|
if (v > value) {
|
|
break;
|
|
}
|
|
}
|
|
|
|
std.debug.assert(leaf.len == CAPACITY);
|
|
//std.debug.assert(idx > 0 and idx < CAPACITY - 1);
|
|
|
|
var new: *Leaf = undefined;
|
|
switch (self) {
|
|
.internal => |internal| {
|
|
const node = try Node.create(leaf.ally);
|
|
std.mem.copy(?NodeOrLeaf, &node.edges, internal.edges[B..]);
|
|
|
|
new = node.as_leaf();
|
|
},
|
|
.leaf => {
|
|
const node = try Leaf.create(leaf.ally);
|
|
new = node;
|
|
},
|
|
}
|
|
new.level = leaf.level;
|
|
|
|
new.len = B - 1;
|
|
std.mem.copy(u32, &new.values, leaf.values[B..]);
|
|
const middle = leaf.values[B - 1];
|
|
leaf.len = B - 1;
|
|
|
|
// take from right half
|
|
if (idx >= B) {
|
|
NodeOrLeaf.from_leaf(new).push_value(value);
|
|
} else {
|
|
NodeOrLeaf.from_leaf(leaf).push_value(value);
|
|
}
|
|
|
|
return .{ .left = leaf, .middle = middle, .right = new };
|
|
}
|
|
|
|
fn push_value(self: NodeOrLeaf, value: u32) void {
|
|
const leaf = self.as_leaf();
|
|
|
|
std.debug.assert(leaf.len < CAPACITY);
|
|
|
|
var n = leaf.len;
|
|
for (leaf.get_values(), 0..) |val, i| {
|
|
if (val >= value) {
|
|
n = @intCast(u16, i);
|
|
break;
|
|
}
|
|
}
|
|
|
|
std.debug.print("placing {} in {}/{}th position\n", .{ value, n, leaf.len });
|
|
|
|
var tmp = value;
|
|
for (leaf.get_values()[n..]) |*val| {
|
|
const t = val.*;
|
|
val.* = tmp;
|
|
tmp = t;
|
|
}
|
|
leaf.values[leaf.len] = tmp;
|
|
|
|
switch (self) {
|
|
.internal => |node| {
|
|
var tmp2: ?NodeOrLeaf = null;
|
|
for (node.get_edges()[n + 1 ..]) |*edge| {
|
|
const t = edge.*;
|
|
edge.* = tmp2;
|
|
tmp2 = t;
|
|
}
|
|
node.edges[leaf.len + 1] = tmp2;
|
|
},
|
|
else => {},
|
|
}
|
|
|
|
leaf.len = leaf.len + 1;
|
|
}
|
|
|
|
fn find_key(self: NodeOrLeaf, key: u32) Leaf.SearchResult {
|
|
var leaf = self.as_leaf();
|
|
while (true) {
|
|
const search = leaf.find_key(key);
|
|
|
|
switch (search) {
|
|
.Leaf => {
|
|
return search;
|
|
},
|
|
.Edge => |edge| {
|
|
const node = NodeOrLeaf.from_leaf(edge.leaf);
|
|
switch (node) {
|
|
.internal => |internal| {
|
|
if (internal.get_edges()[edge.idx]) |child| {
|
|
leaf = child.as_leaf();
|
|
// TODO: incredibly hacky I think..
|
|
// gotta figure out WHERE this would even happen..
|
|
leaf.parent = .{ .parent = internal, .idx = edge.idx };
|
|
continue;
|
|
}
|
|
},
|
|
.leaf => {},
|
|
}
|
|
return search;
|
|
},
|
|
}
|
|
}
|
|
}
|
|
|
|
fn dbg(self: NodeOrLeaf) void {
|
|
switch (self) {
|
|
.internal => |node| {
|
|
node.dbg();
|
|
},
|
|
.leaf => |node| {
|
|
node.dbg();
|
|
},
|
|
}
|
|
}
|
|
};
|
|
|
|
const Node = struct {
|
|
leaf: Leaf,
|
|
|
|
edges: [NUM_EDGES]?NodeOrLeaf = [_]?NodeOrLeaf{null} ** NUM_EDGES,
|
|
|
|
fn create(ally: std.mem.Allocator) !*Node {
|
|
var node = try ally.create(Node);
|
|
node.init(ally);
|
|
|
|
return node;
|
|
}
|
|
|
|
fn destroy(self: *Node) void {
|
|
for (self.get_edges()) |edge| {
|
|
if (edge) |*edg| {
|
|
edg.destroy();
|
|
}
|
|
}
|
|
self.leaf.ally.destroy(self);
|
|
}
|
|
|
|
fn init(self: *Node, ally: std.mem.Allocator) void {
|
|
self.* = Node{ .leaf = Leaf{ .ally = ally } };
|
|
}
|
|
|
|
fn as_leaf(self: *Node) *Leaf {
|
|
return &self.leaf;
|
|
}
|
|
|
|
fn insert_node(self: *Node, child: NodeOrLeaf) void {
|
|
const self_leaf = self.as_leaf();
|
|
const ls = child.as_leaf().get_values()[0];
|
|
|
|
var idx: u16 = self_leaf.len;
|
|
for (self_leaf.get_values(), 0..) |v, i| {
|
|
if (v > ls) {
|
|
idx = @intCast(u16, i);
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (self.get_edges()[idx]) |edge| {
|
|
std.debug.print("edge already present:", .{});
|
|
child.dbg();
|
|
std.debug.print(" - ", .{});
|
|
edge.dbg();
|
|
std.debug.print("\n", .{});
|
|
} else {
|
|
child.as_leaf().parent = .{ .parent = self, .idx = idx };
|
|
self.get_edges()[idx] = child;
|
|
}
|
|
}
|
|
|
|
const InsertResultTag = enum {
|
|
Split,
|
|
RightFromParent,
|
|
};
|
|
|
|
const InsertResult = union(InsertResultTag) {
|
|
Split: Leaf.SplitResult,
|
|
RightFromParent: *Leaf,
|
|
};
|
|
|
|
fn insert_split(self: *Node, split: Leaf.SplitResult) !?Leaf.SplitResult {
|
|
std.debug.print("inserting split\n", .{});
|
|
|
|
const leaf = self.as_leaf();
|
|
const value = split.middle;
|
|
|
|
if (leaf.len < CAPACITY) {
|
|
std.debug.print("pushing value {} into ", .{value});
|
|
self.dbg();
|
|
std.debug.print("\n", .{});
|
|
|
|
NodeOrLeaf.from_leaf(leaf).push_value(value);
|
|
std.debug.print("insert_split_insert_node ", .{});
|
|
self.insert_node(NodeOrLeaf.from_leaf(split.right));
|
|
} else {
|
|
std.debug.print("splitting node ", .{});
|
|
self.dbg();
|
|
const parent_split = try leaf.split_at(value);
|
|
std.debug.print(" into [ ", .{});
|
|
split.left.dbg();
|
|
std.debug.print(", {}, ", .{split.middle});
|
|
split.right.dbg();
|
|
std.debug.print("]\n", .{});
|
|
|
|
std.debug.print("concatinating splits\n", .{});
|
|
const next_split = Leaf.SplitResult.concat(parent_split, split);
|
|
|
|
if (leaf.parent) |parent| {
|
|
std.debug.print("forwarding concat split\n", .{});
|
|
return parent.parent.insert_split(next_split);
|
|
} else {
|
|
return next_split;
|
|
}
|
|
}
|
|
|
|
return null;
|
|
}
|
|
|
|
fn get_edges(self: *Node) []?NodeOrLeaf {
|
|
const len = self.leaf.len + 1;
|
|
return self.edges[0..len];
|
|
}
|
|
|
|
fn dbg(self: *Node) void {
|
|
const values = self.leaf.get_values();
|
|
const edges = self.get_edges()[0..values.len];
|
|
std.debug.print("{{ ", .{});
|
|
std.debug.print("[{}] ", .{self.leaf.level});
|
|
for (values, edges) |v, e| {
|
|
if (e) |edge| {
|
|
edge.dbg();
|
|
std.debug.print(", ", .{});
|
|
}
|
|
std.debug.print("{}, ", .{v});
|
|
}
|
|
if (self.get_edges()[values.len]) |edge| {
|
|
edge.dbg();
|
|
}
|
|
std.debug.print(" }}", .{});
|
|
}
|
|
};
|
|
|
|
const ParentPtr = struct {
|
|
parent: *Node,
|
|
idx: u16,
|
|
|
|
fn into_node_or_leaf(self: ?ParentPtr) ?NodeOrLeaf {
|
|
if (self) |ptr| {
|
|
return .{ .node = ptr.parent };
|
|
} else {
|
|
return null;
|
|
}
|
|
}
|
|
};
|
|
|
|
const Leaf = struct {
|
|
ally: std.mem.Allocator,
|
|
level: usize = 0,
|
|
parent: ?ParentPtr = null,
|
|
|
|
len: u16 = 0,
|
|
values: [CAPACITY]u32 = undefined,
|
|
|
|
fn create(ally: std.mem.Allocator) !*Leaf {
|
|
var leaf = try ally.create(Leaf);
|
|
leaf.init(ally);
|
|
|
|
return leaf;
|
|
}
|
|
|
|
fn dbg(self: *Leaf) void {
|
|
const values = self.get_values();
|
|
std.debug.print("{any}", .{values});
|
|
}
|
|
|
|
fn init(self: *Leaf, ally: std.mem.Allocator) void {
|
|
self.* = Leaf{ .ally = ally };
|
|
}
|
|
|
|
fn destroy(self: *Leaf) void {
|
|
self.ally.destroy(self);
|
|
}
|
|
|
|
const SplitResult = struct {
|
|
// attached, old node that may be modified
|
|
left: *Leaf,
|
|
// lose value, previously attacked, must be inserted
|
|
// if we go all the way to the top, the root node must have some value and this is it
|
|
middle: u32,
|
|
// new, free floating leaf, must be attached
|
|
right: *Leaf,
|
|
|
|
fn dbg(self: SplitResult) void {
|
|
std.debug.print("[ ", .{});
|
|
self.left.dbg();
|
|
std.debug.print(", {}, ", .{self.middle});
|
|
self.right.dbg();
|
|
std.debug.print(" ]", .{});
|
|
}
|
|
|
|
fn dbg_verbose(self: SplitResult) void {
|
|
std.debug.print("[ ", .{});
|
|
NodeOrLeaf.from_leaf(self.left).dbg();
|
|
std.debug.print(", {}, ", .{self.middle});
|
|
NodeOrLeaf.from_leaf(self.right).dbg();
|
|
std.debug.print(" ]", .{});
|
|
}
|
|
|
|
fn concat(parent: SplitResult, child: SplitResult) SplitResult {
|
|
// safety @ptrCast(): we know parent left and right are nodes because
|
|
// they originated from childs parent
|
|
|
|
std.debug.print("concatinating ", .{});
|
|
parent.dbg();
|
|
std.debug.print(" and ", .{});
|
|
child.dbg();
|
|
std.debug.print("\n", .{});
|
|
|
|
// we only care about the childs middle and left, and since they are ordered we
|
|
// can learn about the right part from the middle part
|
|
if (child.middle < parent.middle) {
|
|
// I'm not sure if checking the childs right part is actually needed?
|
|
// I don't think so but can't think of a solid enough reason why
|
|
|
|
// child is entirely between two values of the parent, so any relation between
|
|
// the childs mid point and any of the parents values is true for any of
|
|
// the childs values, right?
|
|
std.debug.print("concatinate ", .{});
|
|
@ptrCast(*Node, parent.left).insert_node(NodeOrLeaf.from_leaf(child.right));
|
|
}
|
|
// since they cant be equal, this must mean child is bigger than parent
|
|
else {
|
|
std.debug.print("concatinate {} {} ", .{ child.middle, parent.middle });
|
|
@ptrCast(*Node, parent.right).insert_node(NodeOrLeaf.from_leaf(child.right));
|
|
}
|
|
|
|
std.debug.print("concatinating into ", .{});
|
|
parent.dbg_verbose();
|
|
std.debug.print("\n", .{});
|
|
|
|
return parent;
|
|
}
|
|
};
|
|
|
|
fn split_at(self: *Leaf, value: u32) !SplitResult {
|
|
return NodeOrLeaf.from_leaf(self).split_at(value);
|
|
}
|
|
|
|
const SearchResultTag = enum { Edge, Leaf };
|
|
|
|
const SearchResult = union(SearchResultTag) {
|
|
Edge: struct { leaf: *Leaf, idx: u16 },
|
|
Leaf: struct { leaf: *Leaf, idx: u16 },
|
|
};
|
|
|
|
fn find_key(self: *Leaf, key: u32) SearchResult {
|
|
std.debug.print("looking for {} in {any}\n", .{ key, self.get_values() });
|
|
for (self.get_values(), 0..) |v, i| {
|
|
if (key < v) {
|
|
std.debug.print("decending left of {}\n", .{v});
|
|
return .{ .Edge = .{ .leaf = self, .idx = @intCast(u16, i) } };
|
|
} else if (key == v) {
|
|
std.debug.print("located {} at {}\n", .{ key, v });
|
|
return .{ .Leaf = .{ .leaf = self, .idx = @intCast(u16, i) } };
|
|
}
|
|
}
|
|
|
|
std.debug.print("decending right of {}\n", .{self.get_values()[self.len - 1]});
|
|
return .{ .Edge = .{ .leaf = self, .idx = self.len } };
|
|
}
|
|
|
|
// returns null on success, or a split result which could not be merged
|
|
// up because we are at the root node
|
|
fn insert_value(self: *Leaf, value: u32) !?SplitResult {
|
|
const leaf = self;
|
|
|
|
if (leaf.len < CAPACITY) {
|
|
std.debug.print("pushing value {} into ", .{value});
|
|
self.dbg();
|
|
std.debug.print("\n", .{});
|
|
|
|
NodeOrLeaf.from_leaf(leaf).push_value(value);
|
|
} else {
|
|
std.debug.print("splitting node ", .{});
|
|
self.dbg();
|
|
var split = try leaf.split_at(value);
|
|
std.debug.print(" into [ ", .{});
|
|
split.left.dbg();
|
|
std.debug.print(", {}, ", .{split.middle});
|
|
split.right.dbg();
|
|
std.debug.print("]\n", .{});
|
|
|
|
if (leaf.parent) |parent| {
|
|
return parent.parent.insert_split(split);
|
|
} else {
|
|
return split;
|
|
}
|
|
}
|
|
|
|
return null;
|
|
}
|
|
|
|
fn get_values(self: *Leaf) []u32 {
|
|
const len = self.len;
|
|
return self.values[0..len];
|
|
}
|
|
};
|
|
};
|
|
|
|
pub fn main() !void {
|
|
// Prints to stderr (it's a shortcut based on `std.io.getStdErr()`)
|
|
std.debug.print("All your {s} are belong to us.\n", .{"codebase"});
|
|
|
|
// stdout is for the actual output of your application, for example if you
|
|
// are implementing gzip, then only the compressed bytes should be sent to
|
|
// stdout, not any debugging messages.
|
|
const stdout_file = std.io.getStdOut().writer();
|
|
var bw = std.io.bufferedWriter(stdout_file);
|
|
const stdout = bw.writer();
|
|
|
|
try stdout.print("Run `zig build test` to run the tests.\n", .{});
|
|
|
|
try bw.flush(); // don't forget to flush!
|
|
}
|
|
|
|
test "btree leaf" {
|
|
std.testing.refAllDeclsRecursive(BTree);
|
|
std.testing.refAllDeclsRecursive(BTree.Leaf);
|
|
|
|
var leaf = BTree.Leaf{ .ally = std.testing.allocator, .parent = null, .len = 2, .values = [_]u32{ 5, 6, undefined, undefined, undefined } };
|
|
const values = leaf.get_values();
|
|
|
|
std.debug.print("{?}\n", .{leaf});
|
|
std.debug.print("{any}\n", .{values});
|
|
}
|
|
|
|
fn printValues(leaf: *BTree.Leaf) void {
|
|
const values = leaf.get_values();
|
|
std.debug.print("{any}\n", .{values});
|
|
}
|
|
|
|
// test "leaf split" {
|
|
// std.debug.print("testing splitting\n", .{});
|
|
|
|
// var tree = BTree.create(std.testing.allocator);
|
|
// defer tree.destroy();
|
|
// try tree.insert(2);
|
|
// try tree.insert(4);
|
|
// try tree.insert(6);
|
|
// try tree.insert(3);
|
|
// try tree.insert(7);
|
|
// std.debug.print("before split:", .{});
|
|
// printValues(tree.root.?.as_leaf());
|
|
|
|
// const split = try tree.root.?.as_leaf().split_at(5);
|
|
|
|
// std.debug.print("after split:", .{});
|
|
// printValues(tree.root.?.as_leaf());
|
|
|
|
// std.debug.print("split: {?}\n", .{split});
|
|
// tree.ally.destroy(split.right);
|
|
// }
|
|
|
|
// test "btree insert" {
|
|
// std.debug.print("testing insertion\n", .{});
|
|
// var tree = BTree.create(std.testing.allocator);
|
|
// defer tree.destroy();
|
|
// try tree.insert(10);
|
|
// try tree.insert(4);
|
|
// try tree.insert(6);
|
|
// try tree.insert(3);
|
|
// try tree.insert(9);
|
|
// try tree.insert(8);
|
|
// tree.dbg();
|
|
// }
|
|
|
|
test "btree seq insert" {
|
|
std.debug.print("sequential insertions\n", .{});
|
|
|
|
var tree = BTree.create(std.testing.allocator);
|
|
defer tree.destroy();
|
|
|
|
for (0..100) |i| {
|
|
tree.insert(@intCast(u32, i)) catch {
|
|
std.debug.print("{} already present - ignoring\n", .{i});
|
|
};
|
|
}
|
|
tree.dbg();
|
|
}
|
|
|
|
test "btree rand insert" {
|
|
std.debug.print("random insertions\n", .{});
|
|
|
|
var tree = BTree.create(std.testing.allocator);
|
|
defer tree.destroy();
|
|
var buf = std.ArrayList(u32).init(std.testing.allocator);
|
|
defer buf.deinit();
|
|
|
|
var rng = std.rand.DefaultPrng.init(0);
|
|
|
|
for (0..1000) |_| {
|
|
const i = rng.random().intRangeAtMost(u32, 0, 512);
|
|
try buf.append(i);
|
|
// const i = rng.random().int(u32);
|
|
tree.insert(i) catch {
|
|
std.debug.print("{} already present - ignoring\n", .{i});
|
|
};
|
|
}
|
|
|
|
for (buf.items) |i| {
|
|
if (tree.find_key(i)) |_| {} else {
|
|
std.debug.print("{} lost\n", .{i});
|
|
}
|
|
}
|
|
|
|
tree.dbg();
|
|
}
|