finding nodes by range (fast)

This commit is contained in:
Janis 2023-04-01 16:35:39 +02:00
parent 8ce787a837
commit 2af01747e7
3 changed files with 331 additions and 73 deletions

View file

@ -795,7 +795,7 @@ pub enum DirItemType {
}
#[repr(C, packed)]
#[derive(Debug, Clone, Copy, FromBytes, AsBytes)]
#[derive(Clone, Copy, FromBytes, AsBytes)]
pub struct DirItem {
pub location: Key,
pub transid: U64<LE>,
@ -804,6 +804,18 @@ pub struct DirItem {
ty: u8,
}
impl Debug for DirItem {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
f.debug_struct("DirItem")
.field("location", &self.location)
.field("transid", &self.transid)
.field("data_len", &self.data_len)
.field("name_len", &self.name_len)
.field("ty", &self.ty())
.finish()
}
}
impl NameLength for DirItem {
fn name_length(&self) -> usize {
self.name_len.get() as usize

View file

@ -68,7 +68,6 @@ pub struct NodeHandle {
#[derive(Debug)]
pub struct Range<R: super::Read> {
volume: Rc<Volume<R>>,
parents: Vec<NodeHandle>,
start: RootOrEdge,
end: RootOrEdge,
}
@ -100,17 +99,18 @@ impl<R: super::Read> Tree<R> {
Ok(Self { volume, root })
}
pub fn find_key<K: PartialEq<Key> + PartialOrd<Key>>(
&self,
key: &K,
) -> Result<Option<(Item, TreeItem)>> {
fn find_node_inner<K, F>(&self, key: &K, find: F) -> Result<Option<NodeHandle>>
where
K: PartialEq<Key> + PartialOrd<Key>,
F: Fn(NodeHandle, &K) -> SearchResult,
{
let mut node = NodeHandle::start(self.root.clone());
loop {
let search = node.find_key(key);
let search = find(node, key);
match search {
SearchResult::Leaf(a) => {
return a.parse_item();
return Ok(Some(a));
}
SearchResult::Edge(mut edge) => match &edge.node.inner {
BTreeNode::Internal(internal) => {
@ -135,6 +135,54 @@ impl<R: super::Read> Tree<R> {
}
}
pub fn find_node_rev<K>(&self, key: &K) -> Result<Option<NodeHandle>>
where
K: PartialEq<Key> + PartialOrd<Key>,
{
self.find_node_inner(key, NodeHandle::find_key_rev)
}
pub fn find_node<K>(&self, key: &K) -> Result<Option<NodeHandle>>
where
K: PartialEq<Key> + PartialOrd<Key>,
{
self.find_node_inner(key, NodeHandle::find_key)
}
pub fn find_key<K>(&self, key: &K) -> Result<Option<(Item, TreeItem)>>
where
K: PartialEq<Key> + PartialOrd<Key>,
{
match self.find_node(key)? {
Some(node) => node.parse_item(),
None => Ok(None),
}
}
pub fn find_key_rev<K>(&self, key: &K) -> Result<Option<(Item, TreeItem)>>
where
K: PartialEq<Key> + PartialOrd<Key>,
{
match self.find_node_rev(key)? {
Some(node) => node.parse_item(),
None => Ok(None),
}
}
pub fn find_range<K>(&self, key: &K) -> Result<Range<R>>
where
K: PartialEq<Key> + PartialOrd<Key>,
{
let start = self.find_node(key)?;
let end = self.find_node_rev(key)?;
Ok(Range::from_handles(
self.volume.clone(),
start.unwrap_or(NodeHandle::start(self.root.clone())),
end.unwrap_or(NodeHandle::end(self.root.clone())),
))
}
pub fn iter(&self) -> Range<R> {
Range::new(self.volume.clone(), self.root.clone(), self.root.clone())
}
@ -279,7 +327,7 @@ impl RootOrEdge {
.clone()
}
pub fn into_next_leaf_any<R, F>(
fn into_next_node_any<R, F>(
self,
volume: &super::volume::Volume<R>,
f: F,
@ -291,38 +339,64 @@ impl RootOrEdge {
) -> core::result::Result<NodeHandle, NodeHandle>,
R: super::Read,
{
let mut node = match self {
RootOrEdge::Root(root) => {
// use self if is leaf already, otherwise forward to next leaf
if root.node.inner.is_leaf() {
return Ok(Self::Edge(root));
} else {
root
}
}
RootOrEdge::Edge(edge) => edge,
let node = match self {
RootOrEdge::Root(root) => Ok(root),
RootOrEdge::Edge(edge) => f(edge, volume),
};
let leaf = loop {
match f(node, volume) {
Ok(next) => {
if next.node.inner.is_leaf() {
break Ok(next);
} else {
node = next;
}
}
Err(last) => {
break Err(Self::Edge(last));
}
}
};
// turn leaf into Self::Edge
leaf.map(|leaf| Self::Edge(leaf))
match node {
Ok(node) => Ok(Self::Edge(node)),
Err(node) => Ok(Self::Edge(node)),
}
}
/// returns the next RootOrEdge, or the end of the tree as Err
pub fn into_next_node<R>(
self,
volume: &super::volume::Volume<R>,
) -> core::result::Result<Self, Self>
where
R: super::Read,
{
self.into_next_node_any(volume, NodeHandle::into_next)
}
pub fn into_next_back_node<R>(
self,
volume: &super::volume::Volume<R>,
) -> core::result::Result<Self, Self>
where
R: super::Read,
{
self.into_next_node_any(volume, NodeHandle::into_next_back)
}
fn into_next_leaf_any<R, F>(
self,
volume: &super::volume::Volume<R>,
f: F,
) -> core::result::Result<Self, Self>
where
F: Fn(
NodeHandle,
&super::volume::Volume<R>,
) -> core::result::Result<NodeHandle, NodeHandle>,
R: super::Read,
{
// this will either give us the next immediate node, or self if self was a Root.
let mut this = self.into_next_node_any(volume, &f)?;
loop {
// we are only interested in leaf nodes
if this.node.inner.is_leaf() {
break Ok(this);
} else {
// this will eventually return Err(self) so will always return
this = this.into_next_node_any(volume, &f)?;
}
}
}
/// returns the next leaf, or the end of the tree as Err
pub fn into_next_leaf<R: super::Read>(
self,
volume: &super::volume::Volume<R>,
@ -392,15 +466,18 @@ impl<R> Range<R>
where
R: super::Read,
{
pub fn new(volume: Rc<Volume<R>>, start: Rc<Node>, end: Rc<Node>) -> Self {
pub fn from_handles(volume: Rc<Volume<R>>, start: NodeHandle, end: NodeHandle) -> Self {
Self {
volume,
parents: Default::default(),
start: RootOrEdge::Root(NodeHandle::start(start)),
end: RootOrEdge::Root(NodeHandle::end(end)),
start: RootOrEdge::Root(start),
end: RootOrEdge::Root(end),
}
}
pub fn new(volume: Rc<Volume<R>>, start: Rc<Node>, end: Rc<Node>) -> Self {
Self::from_handles(volume, NodeHandle::start(start), NodeHandle::end(end))
}
pub fn is_empty(&self) -> bool {
return self.start == self.end;
}
@ -413,19 +490,22 @@ where
type Item = (Item, TreeItem);
fn next(&mut self) -> Option<Self::Item> {
if !self.is_empty() {
replace_with::replace_with_or_abort(&mut self.start, |start| {
match start.into_next_leaf(&self.volume) {
Ok(next) => next,
Err(next) => next,
loop {
if !self.is_empty() {
replace_with::replace_with_or_abort(&mut self.start, |start| {
match start.into_next_node(&self.volume) {
Ok(next) => next,
Err(next) => next,
}
});
if self.start.node.inner.is_leaf() {
break self.start.as_handle().parse_item().expect("range item");
}
});
let item = self.start.as_handle().parse_item().expect("range item");
item
} else {
None
// else repeat
} else {
break None;
}
}
}
}
@ -435,19 +515,22 @@ where
R: super::Read,
{
fn next_back(&mut self) -> Option<Self::Item> {
if !self.is_empty() {
replace_with::replace_with_or_abort(&mut self.end, |end| {
match end.into_next_back_leaf(&self.volume) {
Ok(next) => next,
Err(next) => next,
loop {
if !self.is_empty() {
replace_with::replace_with_or_abort(&mut self.end, |start| {
match start.into_next_back_node(&self.volume) {
Ok(next) => next,
Err(next) => next,
}
});
if self.end.node.inner.is_leaf() {
break self.end.as_handle().parse_item().expect("range item");
}
});
let item = self.start.as_handle().parse_item().expect("range item");
item
} else {
None
// else repeat
} else {
break None;
}
}
}
}
@ -481,6 +564,41 @@ impl NodeHandle {
}
}
pub fn find_key_rev<K: PartialEq<Key> + PartialOrd<Key>>(self, key: &K) -> SearchResult {
match &self.node.inner {
BTreeNode::Internal(node) => {
for (i, child) in node.children.iter().enumerate().rev() {
match key.partial_cmp(&child.key) {
Some(core::cmp::Ordering::Greater)
| Some(core::cmp::Ordering::Equal)
| None => {
return SearchResult::Edge(NodeHandle {
idx: i as u32,
..self
});
}
_ => {}
}
}
SearchResult::Edge(NodeHandle { idx: 0, ..self })
}
BTreeNode::Leaf(node) => {
for (i, child) in node.items.iter().enumerate().rev() {
if key.eq(&child.key) {
return SearchResult::Leaf(NodeHandle {
idx: i as u32,
..self
});
}
}
log::debug!("key definitely not found!");
SearchResult::Edge(NodeHandle { idx: 0, ..self })
}
}
}
pub fn find_key<K: PartialEq<Key> + PartialOrd<Key>>(self, key: &K) -> SearchResult {
match &self.node.inner {
BTreeNode::Internal(node) => {
@ -607,8 +725,8 @@ impl NodeHandle {
// go up
match parents.pop() {
Some((node, idx)) => Self {
idx: (idx + 1).min(node.inner.header().nritems.get()),
node,
idx: idx + 1,
parents,
}
.into_next(volume),

View file

@ -78,13 +78,14 @@ pub struct Volume<R: super::Read> {
#[derive(Debug, Clone)]
pub struct Volume2<R: super::Read> {
inner: Rc<Volume<R>>,
roots: BTreeMap<KnownObjectId, Tree<R>>,
roots: BTreeMap<KnownObjectId, (RootItem, Tree<R>)>,
}
// TODO: find better name
#[derive(Debug, Clone)]
pub struct Fs<R: super::Read> {
volume: Rc<Volume2<R>>,
root_item: RootItem,
fs_root: Tree<R>,
}
@ -172,7 +173,7 @@ impl<R: super::Read> Volume<R> {
Ok(self)
}
fn parse_root_tree(self: Rc<Self>) -> Result<BTreeMap<KnownObjectId, Tree<R>>> {
fn parse_root_tree(self: Rc<Self>) -> Result<BTreeMap<KnownObjectId, (RootItem, Tree<R>)>> {
let root_tree_root = self.superblock().root.get();
let root_tree = Tree::from_logical_offset(self.clone(), root_tree_root)?;
@ -184,9 +185,9 @@ impl<R: super::Read> Volume<R> {
})
.map(|(item, root)| {
let id = item.key.id();
let a = Tree::from_logical_offset(self.clone(), root.bytenr.get());
let tree = Tree::from_logical_offset(self.clone(), root.bytenr.get());
a.map(|root| (id, root))
tree.map(|tree| (id, (root, tree)))
})
.collect::<Result<BTreeMap<_, _>>>()?;
@ -332,20 +333,23 @@ impl<R: super::Read> Volume2<R> {
.location
.id();
let root = self
let (root_item, fs_root) = self
.roots
.get(&subvol_id)
.ok_or(Error::NoDefaultSubvolFsRoot)?
.clone();
Ok(Fs {
volume: self.clone(),
fs_root: root.clone(),
root_item,
fs_root,
})
}
}
#[cfg(test)]
mod tests {
use crate::v2::tree::PartialKey;
use super::*;
use std::fs::File;
use test_log::test;
@ -371,10 +375,134 @@ mod tests {
log::info!("roots:");
for (id, v) in v2.roots.iter() {
log::info!("[{id:?}] {v:#?}");
log::info!("[{id:?}] ");
}
}
#[test]
fn iter_roots_rev() {
let file = open_btrfs_file();
let vol = Volume::new(file).expect("volume");
let v2 = vol.into_volume2().expect("volume2");
log::info!("roots:");
for (id, v) in v2.roots.iter() {
log::info!("[{id:?}] ");
}
log::info!("roots rev:");
for (id, v) in v2.roots.iter().rev() {
log::info!("[{id:?}] ");
}
}
#[test]
fn find_key_sym() -> Result<()> {
let file = open_btrfs_file();
let vol = Volume::new(file).expect("volume");
let v2 = vol.into_volume2().expect("volume2");
let root_tree =
Tree::from_logical_offset(v2.inner.clone(), v2.inner.superblock().root.get())?;
// we are looking for the root tree directory (?)
// this is a DIR_ITEM entry in the root tree, with the name "default",
// and the crc32 of "default" as its offset
let key = Key::new(
KnownObjectId::Custom(v2.inner.superblock().root_dir_objectid.get()),
ObjectType::DirItem,
0x8dbfc2d2, // crc of "default"
);
let subvol_root = root_tree.find_node(&key)?;
let other = root_tree.find_node_rev(&key)?;
assert_eq!(subvol_root, other);
log::info!("{subvol_root:?}");
Ok(())
}
#[test]
fn default_subvol_items() -> Result<()> {
let file = open_btrfs_file();
let vol = Volume::new(file).expect("volume");
let v2 = vol.into_volume2().expect("volume2");
let root_tree =
Tree::from_logical_offset(v2.inner.clone(), v2.inner.superblock().root.get())?;
// we are looking for the root tree directory (?)
// this is a DIR_ITEM entry in the root tree, with the name "default",
// and the crc32 of "default" as its offset
let key = Key::new(
KnownObjectId::Custom(v2.inner.superblock().root_dir_objectid.get()),
ObjectType::DirItem,
0x8dbfc2d2, // crc of "default"
);
let subvol_root = root_tree
.find_key(&key)?
.ok_or(Error::NoDefaultSubvolRoot)?;
// if we found the dir entry of the "default subvol" (mharmstone nomenclature)
// we then look for the root fs tree in the root tree with the ID found
// in the `.location` of the dir_item only (from mharmstone)
let subvol_id = subvol_root
.1
.as_dir_item()
.expect("dir item")
.first()
.expect("dir item entry")
.item()
.location
.id();
log::info!("subvol_id: {subvol_id:?}");
let search_key = PartialKey::new(Some(subvol_id), None, None);
for (key, v) in root_tree.iter() {
if search_key.eq(&key.key) {
log::info!("[{key:?}] {v:#?}");
}
}
let fs = v2.default_subvolume().expect("default subvol");
// so we go from root_item.dirid as inode number
// to (dir_id, dir_index, ?) which is all the files in the directory
// generally, (<inode_number>, dir_index, idx) will give us all indices in the directory
// walking the entire tree for this seems quite slow though...
// so we would want a function here that can find a range and a way to
// define a range with a partial key.
// that would look something like limiting the range to everything equal
// to the id or type, if present, in that order. ofc if the id is
// missing, the range cannot be restricted at all.
let search_key = PartialKey::new(
Some(fs.root_item.root_dirid.get().into()),
Some(ObjectType::DirIndex),
None,
);
log::info!("iter:");
for (key, v) in fs.fs_root.iter() {
if search_key.eq(&key.key) {
log::info!("[{key:?}] {v:#?}");
}
}
log::info!("iter: [end]");
// with range
log::info!("range:");
for (key, v) in fs.fs_root.find_range(&search_key)? {
log::info!("[{key:?}] {v:#?}");
}
log::info!("range: [end]");
Ok(())
}
#[test]
fn iter_default_subvol() {
let file = open_btrfs_file();