From 2af01747e7682274d8ec6ac01c4cb3b1429854d1 Mon Sep 17 00:00:00 2001 From: Janis <janis@nirgendwo.xyz> Date: Sat, 1 Apr 2023 16:35:39 +0200 Subject: [PATCH] finding nodes by range (fast) --- btrfs/src/structs.rs | 14 ++- btrfs/src/v2/tree.rs | 248 ++++++++++++++++++++++++++++++----------- btrfs/src/v2/volume.rs | 142 +++++++++++++++++++++-- 3 files changed, 331 insertions(+), 73 deletions(-) diff --git a/btrfs/src/structs.rs b/btrfs/src/structs.rs index c6d3e99..99507cd 100644 --- a/btrfs/src/structs.rs +++ b/btrfs/src/structs.rs @@ -795,7 +795,7 @@ pub enum DirItemType { } #[repr(C, packed)] -#[derive(Debug, Clone, Copy, FromBytes, AsBytes)] +#[derive(Clone, Copy, FromBytes, AsBytes)] pub struct DirItem { pub location: Key, pub transid: U64<LE>, @@ -804,6 +804,18 @@ pub struct DirItem { ty: u8, } +impl Debug for DirItem { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + f.debug_struct("DirItem") + .field("location", &self.location) + .field("transid", &self.transid) + .field("data_len", &self.data_len) + .field("name_len", &self.name_len) + .field("ty", &self.ty()) + .finish() + } +} + impl NameLength for DirItem { fn name_length(&self) -> usize { self.name_len.get() as usize diff --git a/btrfs/src/v2/tree.rs b/btrfs/src/v2/tree.rs index 129e2c4..685a76e 100644 --- a/btrfs/src/v2/tree.rs +++ b/btrfs/src/v2/tree.rs @@ -68,7 +68,6 @@ pub struct NodeHandle { #[derive(Debug)] pub struct Range<R: super::Read> { volume: Rc<Volume<R>>, - parents: Vec<NodeHandle>, start: RootOrEdge, end: RootOrEdge, } @@ -100,17 +99,18 @@ impl<R: super::Read> Tree<R> { Ok(Self { volume, root }) } - pub fn find_key<K: PartialEq<Key> + PartialOrd<Key>>( - &self, - key: &K, - ) -> Result<Option<(Item, TreeItem)>> { + fn find_node_inner<K, F>(&self, key: &K, find: F) -> Result<Option<NodeHandle>> + where + K: PartialEq<Key> + PartialOrd<Key>, + F: Fn(NodeHandle, &K) -> SearchResult, + { let mut node = NodeHandle::start(self.root.clone()); loop { - let search = node.find_key(key); + let search = find(node, key); match search { SearchResult::Leaf(a) => { - return a.parse_item(); + return Ok(Some(a)); } SearchResult::Edge(mut edge) => match &edge.node.inner { BTreeNode::Internal(internal) => { @@ -135,6 +135,54 @@ impl<R: super::Read> Tree<R> { } } + pub fn find_node_rev<K>(&self, key: &K) -> Result<Option<NodeHandle>> + where + K: PartialEq<Key> + PartialOrd<Key>, + { + self.find_node_inner(key, NodeHandle::find_key_rev) + } + + pub fn find_node<K>(&self, key: &K) -> Result<Option<NodeHandle>> + where + K: PartialEq<Key> + PartialOrd<Key>, + { + self.find_node_inner(key, NodeHandle::find_key) + } + + pub fn find_key<K>(&self, key: &K) -> Result<Option<(Item, TreeItem)>> + where + K: PartialEq<Key> + PartialOrd<Key>, + { + match self.find_node(key)? { + Some(node) => node.parse_item(), + None => Ok(None), + } + } + + pub fn find_key_rev<K>(&self, key: &K) -> Result<Option<(Item, TreeItem)>> + where + K: PartialEq<Key> + PartialOrd<Key>, + { + match self.find_node_rev(key)? { + Some(node) => node.parse_item(), + None => Ok(None), + } + } + + pub fn find_range<K>(&self, key: &K) -> Result<Range<R>> + where + K: PartialEq<Key> + PartialOrd<Key>, + { + let start = self.find_node(key)?; + let end = self.find_node_rev(key)?; + + Ok(Range::from_handles( + self.volume.clone(), + start.unwrap_or(NodeHandle::start(self.root.clone())), + end.unwrap_or(NodeHandle::end(self.root.clone())), + )) + } + pub fn iter(&self) -> Range<R> { Range::new(self.volume.clone(), self.root.clone(), self.root.clone()) } @@ -279,7 +327,7 @@ impl RootOrEdge { .clone() } - pub fn into_next_leaf_any<R, F>( + fn into_next_node_any<R, F>( self, volume: &super::volume::Volume<R>, f: F, @@ -291,38 +339,64 @@ impl RootOrEdge { ) -> core::result::Result<NodeHandle, NodeHandle>, R: super::Read, { - let mut node = match self { - RootOrEdge::Root(root) => { - // use self if is leaf already, otherwise forward to next leaf - if root.node.inner.is_leaf() { - return Ok(Self::Edge(root)); - } else { - root - } - } - RootOrEdge::Edge(edge) => edge, + let node = match self { + RootOrEdge::Root(root) => Ok(root), + RootOrEdge::Edge(edge) => f(edge, volume), }; - let leaf = loop { - match f(node, volume) { - Ok(next) => { - if next.node.inner.is_leaf() { - break Ok(next); - } else { - node = next; - } - } - Err(last) => { - break Err(Self::Edge(last)); - } - } - }; - - // turn leaf into Self::Edge - leaf.map(|leaf| Self::Edge(leaf)) + match node { + Ok(node) => Ok(Self::Edge(node)), + Err(node) => Ok(Self::Edge(node)), + } } - /// returns the next RootOrEdge, or the end of the tree as Err + pub fn into_next_node<R>( + self, + volume: &super::volume::Volume<R>, + ) -> core::result::Result<Self, Self> + where + R: super::Read, + { + self.into_next_node_any(volume, NodeHandle::into_next) + } + + pub fn into_next_back_node<R>( + self, + volume: &super::volume::Volume<R>, + ) -> core::result::Result<Self, Self> + where + R: super::Read, + { + self.into_next_node_any(volume, NodeHandle::into_next_back) + } + + fn into_next_leaf_any<R, F>( + self, + volume: &super::volume::Volume<R>, + f: F, + ) -> core::result::Result<Self, Self> + where + F: Fn( + NodeHandle, + &super::volume::Volume<R>, + ) -> core::result::Result<NodeHandle, NodeHandle>, + R: super::Read, + { + // this will either give us the next immediate node, or self if self was a Root. + let mut this = self.into_next_node_any(volume, &f)?; + + loop { + // we are only interested in leaf nodes + if this.node.inner.is_leaf() { + break Ok(this); + } else { + // this will eventually return Err(self) so will always return + this = this.into_next_node_any(volume, &f)?; + } + } + } + + /// returns the next leaf, or the end of the tree as Err pub fn into_next_leaf<R: super::Read>( self, volume: &super::volume::Volume<R>, @@ -392,15 +466,18 @@ impl<R> Range<R> where R: super::Read, { - pub fn new(volume: Rc<Volume<R>>, start: Rc<Node>, end: Rc<Node>) -> Self { + pub fn from_handles(volume: Rc<Volume<R>>, start: NodeHandle, end: NodeHandle) -> Self { Self { volume, - parents: Default::default(), - start: RootOrEdge::Root(NodeHandle::start(start)), - end: RootOrEdge::Root(NodeHandle::end(end)), + start: RootOrEdge::Root(start), + end: RootOrEdge::Root(end), } } + pub fn new(volume: Rc<Volume<R>>, start: Rc<Node>, end: Rc<Node>) -> Self { + Self::from_handles(volume, NodeHandle::start(start), NodeHandle::end(end)) + } + pub fn is_empty(&self) -> bool { return self.start == self.end; } @@ -413,19 +490,22 @@ where type Item = (Item, TreeItem); fn next(&mut self) -> Option<Self::Item> { - if !self.is_empty() { - replace_with::replace_with_or_abort(&mut self.start, |start| { - match start.into_next_leaf(&self.volume) { - Ok(next) => next, - Err(next) => next, + loop { + if !self.is_empty() { + replace_with::replace_with_or_abort(&mut self.start, |start| { + match start.into_next_node(&self.volume) { + Ok(next) => next, + Err(next) => next, + } + }); + + if self.start.node.inner.is_leaf() { + break self.start.as_handle().parse_item().expect("range item"); } - }); - - let item = self.start.as_handle().parse_item().expect("range item"); - - item - } else { - None + // else repeat + } else { + break None; + } } } } @@ -435,19 +515,22 @@ where R: super::Read, { fn next_back(&mut self) -> Option<Self::Item> { - if !self.is_empty() { - replace_with::replace_with_or_abort(&mut self.end, |end| { - match end.into_next_back_leaf(&self.volume) { - Ok(next) => next, - Err(next) => next, + loop { + if !self.is_empty() { + replace_with::replace_with_or_abort(&mut self.end, |start| { + match start.into_next_back_node(&self.volume) { + Ok(next) => next, + Err(next) => next, + } + }); + + if self.end.node.inner.is_leaf() { + break self.end.as_handle().parse_item().expect("range item"); } - }); - - let item = self.start.as_handle().parse_item().expect("range item"); - - item - } else { - None + // else repeat + } else { + break None; + } } } } @@ -481,6 +564,41 @@ impl NodeHandle { } } + pub fn find_key_rev<K: PartialEq<Key> + PartialOrd<Key>>(self, key: &K) -> SearchResult { + match &self.node.inner { + BTreeNode::Internal(node) => { + for (i, child) in node.children.iter().enumerate().rev() { + match key.partial_cmp(&child.key) { + Some(core::cmp::Ordering::Greater) + | Some(core::cmp::Ordering::Equal) + | None => { + return SearchResult::Edge(NodeHandle { + idx: i as u32, + ..self + }); + } + _ => {} + } + } + + SearchResult::Edge(NodeHandle { idx: 0, ..self }) + } + BTreeNode::Leaf(node) => { + for (i, child) in node.items.iter().enumerate().rev() { + if key.eq(&child.key) { + return SearchResult::Leaf(NodeHandle { + idx: i as u32, + ..self + }); + } + } + + log::debug!("key definitely not found!"); + SearchResult::Edge(NodeHandle { idx: 0, ..self }) + } + } + } + pub fn find_key<K: PartialEq<Key> + PartialOrd<Key>>(self, key: &K) -> SearchResult { match &self.node.inner { BTreeNode::Internal(node) => { @@ -607,8 +725,8 @@ impl NodeHandle { // go up match parents.pop() { Some((node, idx)) => Self { + idx: (idx + 1).min(node.inner.header().nritems.get()), node, - idx: idx + 1, parents, } .into_next(volume), diff --git a/btrfs/src/v2/volume.rs b/btrfs/src/v2/volume.rs index 44bbeb8..5deed2f 100644 --- a/btrfs/src/v2/volume.rs +++ b/btrfs/src/v2/volume.rs @@ -78,13 +78,14 @@ pub struct Volume<R: super::Read> { #[derive(Debug, Clone)] pub struct Volume2<R: super::Read> { inner: Rc<Volume<R>>, - roots: BTreeMap<KnownObjectId, Tree<R>>, + roots: BTreeMap<KnownObjectId, (RootItem, Tree<R>)>, } // TODO: find better name #[derive(Debug, Clone)] pub struct Fs<R: super::Read> { volume: Rc<Volume2<R>>, + root_item: RootItem, fs_root: Tree<R>, } @@ -172,7 +173,7 @@ impl<R: super::Read> Volume<R> { Ok(self) } - fn parse_root_tree(self: Rc<Self>) -> Result<BTreeMap<KnownObjectId, Tree<R>>> { + fn parse_root_tree(self: Rc<Self>) -> Result<BTreeMap<KnownObjectId, (RootItem, Tree<R>)>> { let root_tree_root = self.superblock().root.get(); let root_tree = Tree::from_logical_offset(self.clone(), root_tree_root)?; @@ -184,9 +185,9 @@ impl<R: super::Read> Volume<R> { }) .map(|(item, root)| { let id = item.key.id(); - let a = Tree::from_logical_offset(self.clone(), root.bytenr.get()); + let tree = Tree::from_logical_offset(self.clone(), root.bytenr.get()); - a.map(|root| (id, root)) + tree.map(|tree| (id, (root, tree))) }) .collect::<Result<BTreeMap<_, _>>>()?; @@ -332,20 +333,23 @@ impl<R: super::Read> Volume2<R> { .location .id(); - let root = self + let (root_item, fs_root) = self .roots .get(&subvol_id) .ok_or(Error::NoDefaultSubvolFsRoot)? .clone(); Ok(Fs { volume: self.clone(), - fs_root: root.clone(), + root_item, + fs_root, }) } } #[cfg(test)] mod tests { + use crate::v2::tree::PartialKey; + use super::*; use std::fs::File; use test_log::test; @@ -371,10 +375,134 @@ mod tests { log::info!("roots:"); for (id, v) in v2.roots.iter() { - log::info!("[{id:?}] {v:#?}"); + log::info!("[{id:?}] "); } } + #[test] + fn iter_roots_rev() { + let file = open_btrfs_file(); + let vol = Volume::new(file).expect("volume"); + let v2 = vol.into_volume2().expect("volume2"); + + log::info!("roots:"); + for (id, v) in v2.roots.iter() { + log::info!("[{id:?}] "); + } + log::info!("roots rev:"); + for (id, v) in v2.roots.iter().rev() { + log::info!("[{id:?}] "); + } + } + + #[test] + fn find_key_sym() -> Result<()> { + let file = open_btrfs_file(); + let vol = Volume::new(file).expect("volume"); + let v2 = vol.into_volume2().expect("volume2"); + + let root_tree = + Tree::from_logical_offset(v2.inner.clone(), v2.inner.superblock().root.get())?; + + // we are looking for the root tree directory (?) + // this is a DIR_ITEM entry in the root tree, with the name "default", + // and the crc32 of "default" as its offset + let key = Key::new( + KnownObjectId::Custom(v2.inner.superblock().root_dir_objectid.get()), + ObjectType::DirItem, + 0x8dbfc2d2, // crc of "default" + ); + + let subvol_root = root_tree.find_node(&key)?; + let other = root_tree.find_node_rev(&key)?; + assert_eq!(subvol_root, other); + log::info!("{subvol_root:?}"); + Ok(()) + } + + #[test] + fn default_subvol_items() -> Result<()> { + let file = open_btrfs_file(); + let vol = Volume::new(file).expect("volume"); + let v2 = vol.into_volume2().expect("volume2"); + + let root_tree = + Tree::from_logical_offset(v2.inner.clone(), v2.inner.superblock().root.get())?; + + // we are looking for the root tree directory (?) + // this is a DIR_ITEM entry in the root tree, with the name "default", + // and the crc32 of "default" as its offset + let key = Key::new( + KnownObjectId::Custom(v2.inner.superblock().root_dir_objectid.get()), + ObjectType::DirItem, + 0x8dbfc2d2, // crc of "default" + ); + + let subvol_root = root_tree + .find_key(&key)? + .ok_or(Error::NoDefaultSubvolRoot)?; + // if we found the dir entry of the "default subvol" (mharmstone nomenclature) + // we then look for the root fs tree in the root tree with the ID found + // in the `.location` of the dir_item only (from mharmstone) + + let subvol_id = subvol_root + .1 + .as_dir_item() + .expect("dir item") + .first() + .expect("dir item entry") + .item() + .location + .id(); + + log::info!("subvol_id: {subvol_id:?}"); + + let search_key = PartialKey::new(Some(subvol_id), None, None); + + for (key, v) in root_tree.iter() { + if search_key.eq(&key.key) { + log::info!("[{key:?}] {v:#?}"); + } + } + + let fs = v2.default_subvolume().expect("default subvol"); + + // so we go from root_item.dirid as inode number + // to (dir_id, dir_index, ?) which is all the files in the directory + // generally, (<inode_number>, dir_index, idx) will give us all indices in the directory + // walking the entire tree for this seems quite slow though... + // so we would want a function here that can find a range and a way to + // define a range with a partial key. + + // that would look something like limiting the range to everything equal + // to the id or type, if present, in that order. ofc if the id is + // missing, the range cannot be restricted at all. + + let search_key = PartialKey::new( + Some(fs.root_item.root_dirid.get().into()), + Some(ObjectType::DirIndex), + None, + ); + + log::info!("iter:"); + for (key, v) in fs.fs_root.iter() { + if search_key.eq(&key.key) { + log::info!("[{key:?}] {v:#?}"); + } + } + log::info!("iter: [end]"); + + // with range + + log::info!("range:"); + for (key, v) in fs.fs_root.find_range(&search_key)? { + log::info!("[{key:?}] {v:#?}"); + } + log::info!("range: [end]"); + + Ok(()) + } + #[test] fn iter_default_subvol() { let file = open_btrfs_file();