const std = @import("std"); const BTree = struct { const Self = @This(); const B: usize = 3; const CAPACITY: usize = 2 * B - 1; const NUM_EDGES: usize = 2 * B; ally: std.mem.Allocator = std.heap.c_allocator, root: ?NodeOrLeaf, fn create(ally: std.mem.Allocator) Self { return Self{ .ally = ally, .root = null, }; } fn insert(self: *Self, value: u32) !void { std.debug.print("attempting to insert {} into ", .{value}); self.dbg(); if (self.root) |*root| { const search = root.find_key(value); switch (search) { .Leaf => |node| { std.debug.print("key already present: {}", .{node}); return error.Occupied; }, .Edge => |edge| { const result = try edge.leaf.insert_value(value); if (result) |split| { std.debug.print("reparenting root\n", .{}); // create new node which will replace self. const parent = try Node.create(self.ally); parent.leaf.level = split.left.level + 1; NodeOrLeaf.from_leaf(parent.as_leaf()).push_value(split.middle); parent.insert_node(NodeOrLeaf.from_leaf(split.left)); parent.insert_node(NodeOrLeaf.from_leaf(split.right)); self.root = .{ .internal = parent }; } }, } } else { var leaf: *Leaf = try self.ally.create(Leaf); errdefer self.ally.destroy(leaf); leaf.init(self.ally); NodeOrLeaf.from_leaf(leaf).push_value(value); self.root = NodeOrLeaf{ .leaf = leaf }; } } fn find_key(self: *Self, key: u32) ?u32 { std.debug.print("attempting to find {}\n", .{key}); switch (self.root.?.find_key(key)) { .Leaf => |leaf| { return leaf.leaf.get_values()[leaf.idx]; }, else => { return null; }, } } fn dbg(self: *Self) void { if (self.root) |root| { root.dbg(); std.debug.print("\n", .{}); } } fn destroy(self: *Self) void { if (self.root) |*root| { root.destroy(); } } const NodeOrLeafTag = enum { internal, leaf, }; const NodeOrLeaf = union(NodeOrLeafTag) { internal: *Node, leaf: *Leaf, fn force(self: NodeOrLeaf) NodeOrLeaf { return NodeOrLeaf.from_leaf(self.as_leaf()); } fn destroy(self: NodeOrLeaf) void { std.debug.print("destroying node\n", .{}); switch (self.force()) { .internal => |node| { node.destroy(); }, .leaf => |leaf| { leaf.destroy(); }, } } fn as_leaf(self: NodeOrLeaf) *Leaf { switch (self) { .internal => |node| { return node.as_leaf(); }, .leaf => |leaf| { return leaf; }, } } fn from_leaf(leaf: *Leaf) NodeOrLeaf { if (leaf.level == 0) { return .{ .leaf = leaf }; } else { return .{ .internal = @ptrCast(*Node, leaf) }; } } fn split_at(self: NodeOrLeaf, value: u32) !Leaf.SplitResult { const leaf = self.as_leaf(); var idx: u16 = 0; for (leaf.get_values(), 0..) |v, i| { idx = @intCast(u16, i); if (v > value) { break; } } std.debug.assert(leaf.len == CAPACITY); //std.debug.assert(idx > 0 and idx < CAPACITY - 1); var new: *Leaf = undefined; switch (self) { .internal => |internal| { const node = try Node.create(leaf.ally); std.mem.copy(?NodeOrLeaf, &node.edges, internal.edges[B..]); new = node.as_leaf(); }, .leaf => { const node = try Leaf.create(leaf.ally); new = node; }, } new.level = leaf.level; new.len = B - 1; std.mem.copy(u32, &new.values, leaf.values[B..]); const middle = leaf.values[B - 1]; leaf.len = B - 1; // take from right half if (idx >= B) { NodeOrLeaf.from_leaf(new).push_value(value); } else { NodeOrLeaf.from_leaf(leaf).push_value(value); } return .{ .left = leaf, .middle = middle, .right = new }; } fn push_value(self: NodeOrLeaf, value: u32) void { const leaf = self.as_leaf(); std.debug.assert(leaf.len < CAPACITY); var n = leaf.len; for (leaf.get_values(), 0..) |val, i| { if (val >= value) { n = @intCast(u16, i); break; } } std.debug.print("placing {} in {}/{}th position\n", .{ value, n, leaf.len }); var tmp = value; for (leaf.get_values()[n..]) |*val| { const t = val.*; val.* = tmp; tmp = t; } leaf.values[leaf.len] = tmp; switch (self) { .internal => |node| { var tmp2: ?NodeOrLeaf = null; for (node.get_edges()[n + 1 ..]) |*edge| { const t = edge.*; edge.* = tmp2; tmp2 = t; } node.edges[leaf.len + 1] = tmp2; }, else => {}, } leaf.len = leaf.len + 1; } fn find_key(self: NodeOrLeaf, key: u32) Leaf.SearchResult { var leaf = self.as_leaf(); while (true) { const search = leaf.find_key(key); switch (search) { .Leaf => { return search; }, .Edge => |edge| { const node = NodeOrLeaf.from_leaf(edge.leaf); switch (node) { .internal => |internal| { if (internal.get_edges()[edge.idx]) |child| { leaf = child.as_leaf(); // TODO: incredibly hacky I think.. // gotta figure out WHERE this would even happen.. leaf.parent = .{ .parent = internal, .idx = edge.idx }; continue; } }, .leaf => {}, } return search; }, } } } fn dbg(self: NodeOrLeaf) void { switch (self) { .internal => |node| { node.dbg(); }, .leaf => |node| { node.dbg(); }, } } }; const Node = struct { leaf: Leaf, edges: [NUM_EDGES]?NodeOrLeaf = [_]?NodeOrLeaf{null} ** NUM_EDGES, fn create(ally: std.mem.Allocator) !*Node { var node = try ally.create(Node); node.init(ally); return node; } fn destroy(self: *Node) void { for (self.get_edges()) |edge| { if (edge) |*edg| { edg.destroy(); } } self.leaf.ally.destroy(self); } fn init(self: *Node, ally: std.mem.Allocator) void { self.* = Node{ .leaf = Leaf{ .ally = ally } }; } fn as_leaf(self: *Node) *Leaf { return &self.leaf; } fn insert_node(self: *Node, child: NodeOrLeaf) void { const self_leaf = self.as_leaf(); const ls = child.as_leaf().get_values()[0]; var idx: u16 = self_leaf.len; for (self_leaf.get_values(), 0..) |v, i| { if (v > ls) { idx = @intCast(u16, i); break; } } if (self.get_edges()[idx]) |edge| { std.debug.print("edge already present:", .{}); child.dbg(); std.debug.print(" - ", .{}); edge.dbg(); std.debug.print("\n", .{}); } else { child.as_leaf().parent = .{ .parent = self, .idx = idx }; self.get_edges()[idx] = child; } } const InsertResultTag = enum { Split, RightFromParent, }; const InsertResult = union(InsertResultTag) { Split: Leaf.SplitResult, RightFromParent: *Leaf, }; fn insert_split(self: *Node, split: Leaf.SplitResult) !?Leaf.SplitResult { std.debug.print("inserting split\n", .{}); const leaf = self.as_leaf(); const value = split.middle; if (leaf.len < CAPACITY) { std.debug.print("pushing value {} into ", .{value}); self.dbg(); std.debug.print("\n", .{}); NodeOrLeaf.from_leaf(leaf).push_value(value); std.debug.print("insert_split_insert_node ", .{}); self.insert_node(NodeOrLeaf.from_leaf(split.right)); } else { std.debug.print("splitting node ", .{}); self.dbg(); const parent_split = try leaf.split_at(value); std.debug.print(" into [ ", .{}); split.left.dbg(); std.debug.print(", {}, ", .{split.middle}); split.right.dbg(); std.debug.print("]\n", .{}); std.debug.print("concatinating splits\n", .{}); const next_split = Leaf.SplitResult.concat(parent_split, split); if (leaf.parent) |parent| { std.debug.print("forwarding concat split\n", .{}); return parent.parent.insert_split(next_split); } else { return next_split; } } return null; } fn get_edges(self: *Node) []?NodeOrLeaf { const len = self.leaf.len + 1; return self.edges[0..len]; } fn dbg(self: *Node) void { const values = self.leaf.get_values(); const edges = self.get_edges()[0..values.len]; std.debug.print("{{ ", .{}); std.debug.print("[{}] ", .{self.leaf.level}); for (values, edges) |v, e| { if (e) |edge| { edge.dbg(); std.debug.print(", ", .{}); } std.debug.print("{}, ", .{v}); } if (self.get_edges()[values.len]) |edge| { edge.dbg(); } std.debug.print(" }}", .{}); } }; const ParentPtr = struct { parent: *Node, idx: u16, fn into_node_or_leaf(self: ?ParentPtr) ?NodeOrLeaf { if (self) |ptr| { return .{ .node = ptr.parent }; } else { return null; } } }; const Leaf = struct { ally: std.mem.Allocator, level: usize = 0, parent: ?ParentPtr = null, len: u16 = 0, values: [CAPACITY]u32 = undefined, fn create(ally: std.mem.Allocator) !*Leaf { var leaf = try ally.create(Leaf); leaf.init(ally); return leaf; } fn dbg(self: *Leaf) void { const values = self.get_values(); std.debug.print("{any}", .{values}); } fn init(self: *Leaf, ally: std.mem.Allocator) void { self.* = Leaf{ .ally = ally }; } fn destroy(self: *Leaf) void { self.ally.destroy(self); } const SplitResult = struct { // attached, old node that may be modified left: *Leaf, // lose value, previously attacked, must be inserted // if we go all the way to the top, the root node must have some value and this is it middle: u32, // new, free floating leaf, must be attached right: *Leaf, fn dbg(self: SplitResult) void { std.debug.print("[ ", .{}); self.left.dbg(); std.debug.print(", {}, ", .{self.middle}); self.right.dbg(); std.debug.print(" ]", .{}); } fn dbg_verbose(self: SplitResult) void { std.debug.print("[ ", .{}); NodeOrLeaf.from_leaf(self.left).dbg(); std.debug.print(", {}, ", .{self.middle}); NodeOrLeaf.from_leaf(self.right).dbg(); std.debug.print(" ]", .{}); } fn concat(parent: SplitResult, child: SplitResult) SplitResult { // safety @ptrCast(): we know parent left and right are nodes because // they originated from childs parent std.debug.print("concatinating ", .{}); parent.dbg(); std.debug.print(" and ", .{}); child.dbg(); std.debug.print("\n", .{}); // we only care about the childs middle and left, and since they are ordered we // can learn about the right part from the middle part if (child.middle < parent.middle) { // I'm not sure if checking the childs right part is actually needed? // I don't think so but can't think of a solid enough reason why // child is entirely between two values of the parent, so any relation between // the childs mid point and any of the parents values is true for any of // the childs values, right? std.debug.print("concatinate ", .{}); @ptrCast(*Node, parent.left).insert_node(NodeOrLeaf.from_leaf(child.right)); } // since they cant be equal, this must mean child is bigger than parent else { std.debug.print("concatinate {} {} ", .{ child.middle, parent.middle }); @ptrCast(*Node, parent.right).insert_node(NodeOrLeaf.from_leaf(child.right)); } std.debug.print("concatinating into ", .{}); parent.dbg_verbose(); std.debug.print("\n", .{}); return parent; } }; fn split_at(self: *Leaf, value: u32) !SplitResult { return NodeOrLeaf.from_leaf(self).split_at(value); } const SearchResultTag = enum { Edge, Leaf }; const SearchResult = union(SearchResultTag) { Edge: struct { leaf: *Leaf, idx: u16 }, Leaf: struct { leaf: *Leaf, idx: u16 }, }; fn find_key(self: *Leaf, key: u32) SearchResult { std.debug.print("looking for {} in {any}\n", .{ key, self.get_values() }); for (self.get_values(), 0..) |v, i| { if (key < v) { std.debug.print("decending left of {}\n", .{v}); return .{ .Edge = .{ .leaf = self, .idx = @intCast(u16, i) } }; } else if (key == v) { std.debug.print("located {} at {}\n", .{ key, v }); return .{ .Leaf = .{ .leaf = self, .idx = @intCast(u16, i) } }; } } std.debug.print("decending right of {}\n", .{self.get_values()[self.len - 1]}); return .{ .Edge = .{ .leaf = self, .idx = self.len } }; } // returns null on success, or a split result which could not be merged // up because we are at the root node fn insert_value(self: *Leaf, value: u32) !?SplitResult { const leaf = self; if (leaf.len < CAPACITY) { std.debug.print("pushing value {} into ", .{value}); self.dbg(); std.debug.print("\n", .{}); NodeOrLeaf.from_leaf(leaf).push_value(value); } else { std.debug.print("splitting node ", .{}); self.dbg(); var split = try leaf.split_at(value); std.debug.print(" into [ ", .{}); split.left.dbg(); std.debug.print(", {}, ", .{split.middle}); split.right.dbg(); std.debug.print("]\n", .{}); if (leaf.parent) |parent| { return parent.parent.insert_split(split); } else { return split; } } return null; } fn get_values(self: *Leaf) []u32 { const len = self.len; return self.values[0..len]; } }; }; pub fn main() !void { // Prints to stderr (it's a shortcut based on `std.io.getStdErr()`) std.debug.print("All your {s} are belong to us.\n", .{"codebase"}); // stdout is for the actual output of your application, for example if you // are implementing gzip, then only the compressed bytes should be sent to // stdout, not any debugging messages. const stdout_file = std.io.getStdOut().writer(); var bw = std.io.bufferedWriter(stdout_file); const stdout = bw.writer(); try stdout.print("Run `zig build test` to run the tests.\n", .{}); try bw.flush(); // don't forget to flush! } test "btree leaf" { std.testing.refAllDeclsRecursive(BTree); std.testing.refAllDeclsRecursive(BTree.Leaf); var leaf = BTree.Leaf{ .ally = std.testing.allocator, .parent = null, .len = 2, .values = [_]u32{ 5, 6, undefined, undefined, undefined } }; const values = leaf.get_values(); std.debug.print("{?}\n", .{leaf}); std.debug.print("{any}\n", .{values}); } fn printValues(leaf: *BTree.Leaf) void { const values = leaf.get_values(); std.debug.print("{any}\n", .{values}); } // test "leaf split" { // std.debug.print("testing splitting\n", .{}); // var tree = BTree.create(std.testing.allocator); // defer tree.destroy(); // try tree.insert(2); // try tree.insert(4); // try tree.insert(6); // try tree.insert(3); // try tree.insert(7); // std.debug.print("before split:", .{}); // printValues(tree.root.?.as_leaf()); // const split = try tree.root.?.as_leaf().split_at(5); // std.debug.print("after split:", .{}); // printValues(tree.root.?.as_leaf()); // std.debug.print("split: {?}\n", .{split}); // tree.ally.destroy(split.right); // } // test "btree insert" { // std.debug.print("testing insertion\n", .{}); // var tree = BTree.create(std.testing.allocator); // defer tree.destroy(); // try tree.insert(10); // try tree.insert(4); // try tree.insert(6); // try tree.insert(3); // try tree.insert(9); // try tree.insert(8); // tree.dbg(); // } test "btree seq insert" { std.debug.print("sequential insertions\n", .{}); var tree = BTree.create(std.testing.allocator); defer tree.destroy(); for (0..100) |i| { tree.insert(@intCast(u32, i)) catch { std.debug.print("{} already present - ignoring\n", .{i}); }; } tree.dbg(); } test "btree rand insert" { std.debug.print("random insertions\n", .{}); var tree = BTree.create(std.testing.allocator); defer tree.destroy(); var buf = std.ArrayList(u32).init(std.testing.allocator); defer buf.deinit(); var rng = std.rand.DefaultPrng.init(0); for (0..1000) |_| { const i = rng.random().intRangeAtMost(u32, 0, 512); try buf.append(i); // const i = rng.random().int(u32); tree.insert(i) catch { std.debug.print("{} already present - ignoring\n", .{i}); }; } for (buf.items) |i| { if (tree.find_key(i)) |_| {} else { std.debug.print("{} lost\n", .{i}); } } tree.dbg(); }