From 2af01747e7682274d8ec6ac01c4cb3b1429854d1 Mon Sep 17 00:00:00 2001
From: Janis <janis@nirgendwo.xyz>
Date: Sat, 1 Apr 2023 16:35:39 +0200
Subject: [PATCH] finding nodes by range (fast)

---
 btrfs/src/structs.rs   |  14 ++-
 btrfs/src/v2/tree.rs   | 248 ++++++++++++++++++++++++++++++-----------
 btrfs/src/v2/volume.rs | 142 +++++++++++++++++++++--
 3 files changed, 331 insertions(+), 73 deletions(-)

diff --git a/btrfs/src/structs.rs b/btrfs/src/structs.rs
index c6d3e99..99507cd 100644
--- a/btrfs/src/structs.rs
+++ b/btrfs/src/structs.rs
@@ -795,7 +795,7 @@ pub enum DirItemType {
 }
 
 #[repr(C, packed)]
-#[derive(Debug, Clone, Copy, FromBytes, AsBytes)]
+#[derive(Clone, Copy, FromBytes, AsBytes)]
 pub struct DirItem {
     pub location: Key,
     pub transid: U64<LE>,
@@ -804,6 +804,18 @@ pub struct DirItem {
     ty: u8,
 }
 
+impl Debug for DirItem {
+    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+        f.debug_struct("DirItem")
+            .field("location", &self.location)
+            .field("transid", &self.transid)
+            .field("data_len", &self.data_len)
+            .field("name_len", &self.name_len)
+            .field("ty", &self.ty())
+            .finish()
+    }
+}
+
 impl NameLength for DirItem {
     fn name_length(&self) -> usize {
         self.name_len.get() as usize
diff --git a/btrfs/src/v2/tree.rs b/btrfs/src/v2/tree.rs
index 129e2c4..685a76e 100644
--- a/btrfs/src/v2/tree.rs
+++ b/btrfs/src/v2/tree.rs
@@ -68,7 +68,6 @@ pub struct NodeHandle {
 #[derive(Debug)]
 pub struct Range<R: super::Read> {
     volume: Rc<Volume<R>>,
-    parents: Vec<NodeHandle>,
     start: RootOrEdge,
     end: RootOrEdge,
 }
@@ -100,17 +99,18 @@ impl<R: super::Read> Tree<R> {
         Ok(Self { volume, root })
     }
 
-    pub fn find_key<K: PartialEq<Key> + PartialOrd<Key>>(
-        &self,
-        key: &K,
-    ) -> Result<Option<(Item, TreeItem)>> {
+    fn find_node_inner<K, F>(&self, key: &K, find: F) -> Result<Option<NodeHandle>>
+    where
+        K: PartialEq<Key> + PartialOrd<Key>,
+        F: Fn(NodeHandle, &K) -> SearchResult,
+    {
         let mut node = NodeHandle::start(self.root.clone());
 
         loop {
-            let search = node.find_key(key);
+            let search = find(node, key);
             match search {
                 SearchResult::Leaf(a) => {
-                    return a.parse_item();
+                    return Ok(Some(a));
                 }
                 SearchResult::Edge(mut edge) => match &edge.node.inner {
                     BTreeNode::Internal(internal) => {
@@ -135,6 +135,54 @@ impl<R: super::Read> Tree<R> {
         }
     }
 
+    pub fn find_node_rev<K>(&self, key: &K) -> Result<Option<NodeHandle>>
+    where
+        K: PartialEq<Key> + PartialOrd<Key>,
+    {
+        self.find_node_inner(key, NodeHandle::find_key_rev)
+    }
+
+    pub fn find_node<K>(&self, key: &K) -> Result<Option<NodeHandle>>
+    where
+        K: PartialEq<Key> + PartialOrd<Key>,
+    {
+        self.find_node_inner(key, NodeHandle::find_key)
+    }
+
+    pub fn find_key<K>(&self, key: &K) -> Result<Option<(Item, TreeItem)>>
+    where
+        K: PartialEq<Key> + PartialOrd<Key>,
+    {
+        match self.find_node(key)? {
+            Some(node) => node.parse_item(),
+            None => Ok(None),
+        }
+    }
+
+    pub fn find_key_rev<K>(&self, key: &K) -> Result<Option<(Item, TreeItem)>>
+    where
+        K: PartialEq<Key> + PartialOrd<Key>,
+    {
+        match self.find_node_rev(key)? {
+            Some(node) => node.parse_item(),
+            None => Ok(None),
+        }
+    }
+
+    pub fn find_range<K>(&self, key: &K) -> Result<Range<R>>
+    where
+        K: PartialEq<Key> + PartialOrd<Key>,
+    {
+        let start = self.find_node(key)?;
+        let end = self.find_node_rev(key)?;
+
+        Ok(Range::from_handles(
+            self.volume.clone(),
+            start.unwrap_or(NodeHandle::start(self.root.clone())),
+            end.unwrap_or(NodeHandle::end(self.root.clone())),
+        ))
+    }
+
     pub fn iter(&self) -> Range<R> {
         Range::new(self.volume.clone(), self.root.clone(), self.root.clone())
     }
@@ -279,7 +327,7 @@ impl RootOrEdge {
         .clone()
     }
 
-    pub fn into_next_leaf_any<R, F>(
+    fn into_next_node_any<R, F>(
         self,
         volume: &super::volume::Volume<R>,
         f: F,
@@ -291,38 +339,64 @@ impl RootOrEdge {
         ) -> core::result::Result<NodeHandle, NodeHandle>,
         R: super::Read,
     {
-        let mut node = match self {
-            RootOrEdge::Root(root) => {
-                // use self if is leaf already, otherwise forward to next leaf
-                if root.node.inner.is_leaf() {
-                    return Ok(Self::Edge(root));
-                } else {
-                    root
-                }
-            }
-            RootOrEdge::Edge(edge) => edge,
+        let node = match self {
+            RootOrEdge::Root(root) => Ok(root),
+            RootOrEdge::Edge(edge) => f(edge, volume),
         };
 
-        let leaf = loop {
-            match f(node, volume) {
-                Ok(next) => {
-                    if next.node.inner.is_leaf() {
-                        break Ok(next);
-                    } else {
-                        node = next;
-                    }
-                }
-                Err(last) => {
-                    break Err(Self::Edge(last));
-                }
-            }
-        };
-
-        // turn leaf into Self::Edge
-        leaf.map(|leaf| Self::Edge(leaf))
+        match node {
+            Ok(node) => Ok(Self::Edge(node)),
+            Err(node) => Ok(Self::Edge(node)),
+        }
     }
 
-    /// returns the next RootOrEdge, or the end of the tree as Err
+    pub fn into_next_node<R>(
+        self,
+        volume: &super::volume::Volume<R>,
+    ) -> core::result::Result<Self, Self>
+    where
+        R: super::Read,
+    {
+        self.into_next_node_any(volume, NodeHandle::into_next)
+    }
+
+    pub fn into_next_back_node<R>(
+        self,
+        volume: &super::volume::Volume<R>,
+    ) -> core::result::Result<Self, Self>
+    where
+        R: super::Read,
+    {
+        self.into_next_node_any(volume, NodeHandle::into_next_back)
+    }
+
+    fn into_next_leaf_any<R, F>(
+        self,
+        volume: &super::volume::Volume<R>,
+        f: F,
+    ) -> core::result::Result<Self, Self>
+    where
+        F: Fn(
+            NodeHandle,
+            &super::volume::Volume<R>,
+        ) -> core::result::Result<NodeHandle, NodeHandle>,
+        R: super::Read,
+    {
+        // this will either give us the next immediate node, or self if self was a Root.
+        let mut this = self.into_next_node_any(volume, &f)?;
+
+        loop {
+            // we are only interested in leaf nodes
+            if this.node.inner.is_leaf() {
+                break Ok(this);
+            } else {
+                // this will eventually return Err(self) so will always return
+                this = this.into_next_node_any(volume, &f)?;
+            }
+        }
+    }
+
+    /// returns the next leaf, or the end of the tree as Err
     pub fn into_next_leaf<R: super::Read>(
         self,
         volume: &super::volume::Volume<R>,
@@ -392,15 +466,18 @@ impl<R> Range<R>
 where
     R: super::Read,
 {
-    pub fn new(volume: Rc<Volume<R>>, start: Rc<Node>, end: Rc<Node>) -> Self {
+    pub fn from_handles(volume: Rc<Volume<R>>, start: NodeHandle, end: NodeHandle) -> Self {
         Self {
             volume,
-            parents: Default::default(),
-            start: RootOrEdge::Root(NodeHandle::start(start)),
-            end: RootOrEdge::Root(NodeHandle::end(end)),
+            start: RootOrEdge::Root(start),
+            end: RootOrEdge::Root(end),
         }
     }
 
+    pub fn new(volume: Rc<Volume<R>>, start: Rc<Node>, end: Rc<Node>) -> Self {
+        Self::from_handles(volume, NodeHandle::start(start), NodeHandle::end(end))
+    }
+
     pub fn is_empty(&self) -> bool {
         return self.start == self.end;
     }
@@ -413,19 +490,22 @@ where
     type Item = (Item, TreeItem);
 
     fn next(&mut self) -> Option<Self::Item> {
-        if !self.is_empty() {
-            replace_with::replace_with_or_abort(&mut self.start, |start| {
-                match start.into_next_leaf(&self.volume) {
-                    Ok(next) => next,
-                    Err(next) => next,
+        loop {
+            if !self.is_empty() {
+                replace_with::replace_with_or_abort(&mut self.start, |start| {
+                    match start.into_next_node(&self.volume) {
+                        Ok(next) => next,
+                        Err(next) => next,
+                    }
+                });
+
+                if self.start.node.inner.is_leaf() {
+                    break self.start.as_handle().parse_item().expect("range item");
                 }
-            });
-
-            let item = self.start.as_handle().parse_item().expect("range item");
-
-            item
-        } else {
-            None
+                // else repeat
+            } else {
+                break None;
+            }
         }
     }
 }
@@ -435,19 +515,22 @@ where
     R: super::Read,
 {
     fn next_back(&mut self) -> Option<Self::Item> {
-        if !self.is_empty() {
-            replace_with::replace_with_or_abort(&mut self.end, |end| {
-                match end.into_next_back_leaf(&self.volume) {
-                    Ok(next) => next,
-                    Err(next) => next,
+        loop {
+            if !self.is_empty() {
+                replace_with::replace_with_or_abort(&mut self.end, |start| {
+                    match start.into_next_back_node(&self.volume) {
+                        Ok(next) => next,
+                        Err(next) => next,
+                    }
+                });
+
+                if self.end.node.inner.is_leaf() {
+                    break self.end.as_handle().parse_item().expect("range item");
                 }
-            });
-
-            let item = self.start.as_handle().parse_item().expect("range item");
-
-            item
-        } else {
-            None
+                // else repeat
+            } else {
+                break None;
+            }
         }
     }
 }
@@ -481,6 +564,41 @@ impl NodeHandle {
         }
     }
 
+    pub fn find_key_rev<K: PartialEq<Key> + PartialOrd<Key>>(self, key: &K) -> SearchResult {
+        match &self.node.inner {
+            BTreeNode::Internal(node) => {
+                for (i, child) in node.children.iter().enumerate().rev() {
+                    match key.partial_cmp(&child.key) {
+                        Some(core::cmp::Ordering::Greater)
+                        | Some(core::cmp::Ordering::Equal)
+                        | None => {
+                            return SearchResult::Edge(NodeHandle {
+                                idx: i as u32,
+                                ..self
+                            });
+                        }
+                        _ => {}
+                    }
+                }
+
+                SearchResult::Edge(NodeHandle { idx: 0, ..self })
+            }
+            BTreeNode::Leaf(node) => {
+                for (i, child) in node.items.iter().enumerate().rev() {
+                    if key.eq(&child.key) {
+                        return SearchResult::Leaf(NodeHandle {
+                            idx: i as u32,
+                            ..self
+                        });
+                    }
+                }
+
+                log::debug!("key definitely not found!");
+                SearchResult::Edge(NodeHandle { idx: 0, ..self })
+            }
+        }
+    }
+
     pub fn find_key<K: PartialEq<Key> + PartialOrd<Key>>(self, key: &K) -> SearchResult {
         match &self.node.inner {
             BTreeNode::Internal(node) => {
@@ -607,8 +725,8 @@ impl NodeHandle {
             // go up
             match parents.pop() {
                 Some((node, idx)) => Self {
+                    idx: (idx + 1).min(node.inner.header().nritems.get()),
                     node,
-                    idx: idx + 1,
                     parents,
                 }
                 .into_next(volume),
diff --git a/btrfs/src/v2/volume.rs b/btrfs/src/v2/volume.rs
index 44bbeb8..5deed2f 100644
--- a/btrfs/src/v2/volume.rs
+++ b/btrfs/src/v2/volume.rs
@@ -78,13 +78,14 @@ pub struct Volume<R: super::Read> {
 #[derive(Debug, Clone)]
 pub struct Volume2<R: super::Read> {
     inner: Rc<Volume<R>>,
-    roots: BTreeMap<KnownObjectId, Tree<R>>,
+    roots: BTreeMap<KnownObjectId, (RootItem, Tree<R>)>,
 }
 
 // TODO: find better name
 #[derive(Debug, Clone)]
 pub struct Fs<R: super::Read> {
     volume: Rc<Volume2<R>>,
+    root_item: RootItem,
     fs_root: Tree<R>,
 }
 
@@ -172,7 +173,7 @@ impl<R: super::Read> Volume<R> {
         Ok(self)
     }
 
-    fn parse_root_tree(self: Rc<Self>) -> Result<BTreeMap<KnownObjectId, Tree<R>>> {
+    fn parse_root_tree(self: Rc<Self>) -> Result<BTreeMap<KnownObjectId, (RootItem, Tree<R>)>> {
         let root_tree_root = self.superblock().root.get();
         let root_tree = Tree::from_logical_offset(self.clone(), root_tree_root)?;
 
@@ -184,9 +185,9 @@ impl<R: super::Read> Volume<R> {
             })
             .map(|(item, root)| {
                 let id = item.key.id();
-                let a = Tree::from_logical_offset(self.clone(), root.bytenr.get());
+                let tree = Tree::from_logical_offset(self.clone(), root.bytenr.get());
 
-                a.map(|root| (id, root))
+                tree.map(|tree| (id, (root, tree)))
             })
             .collect::<Result<BTreeMap<_, _>>>()?;
 
@@ -332,20 +333,23 @@ impl<R: super::Read> Volume2<R> {
             .location
             .id();
 
-        let root = self
+        let (root_item, fs_root) = self
             .roots
             .get(&subvol_id)
             .ok_or(Error::NoDefaultSubvolFsRoot)?
             .clone();
         Ok(Fs {
             volume: self.clone(),
-            fs_root: root.clone(),
+            root_item,
+            fs_root,
         })
     }
 }
 
 #[cfg(test)]
 mod tests {
+    use crate::v2::tree::PartialKey;
+
     use super::*;
     use std::fs::File;
     use test_log::test;
@@ -371,10 +375,134 @@ mod tests {
 
         log::info!("roots:");
         for (id, v) in v2.roots.iter() {
-            log::info!("[{id:?}] {v:#?}");
+            log::info!("[{id:?}] ");
         }
     }
 
+    #[test]
+    fn iter_roots_rev() {
+        let file = open_btrfs_file();
+        let vol = Volume::new(file).expect("volume");
+        let v2 = vol.into_volume2().expect("volume2");
+
+        log::info!("roots:");
+        for (id, v) in v2.roots.iter() {
+            log::info!("[{id:?}] ");
+        }
+        log::info!("roots rev:");
+        for (id, v) in v2.roots.iter().rev() {
+            log::info!("[{id:?}] ");
+        }
+    }
+
+    #[test]
+    fn find_key_sym() -> Result<()> {
+        let file = open_btrfs_file();
+        let vol = Volume::new(file).expect("volume");
+        let v2 = vol.into_volume2().expect("volume2");
+
+        let root_tree =
+            Tree::from_logical_offset(v2.inner.clone(), v2.inner.superblock().root.get())?;
+
+        // we are looking for the root tree directory (?)
+        // this is a DIR_ITEM entry in the root tree, with the name "default",
+        // and the crc32 of "default" as its offset
+        let key = Key::new(
+            KnownObjectId::Custom(v2.inner.superblock().root_dir_objectid.get()),
+            ObjectType::DirItem,
+            0x8dbfc2d2, // crc of "default"
+        );
+
+        let subvol_root = root_tree.find_node(&key)?;
+        let other = root_tree.find_node_rev(&key)?;
+        assert_eq!(subvol_root, other);
+        log::info!("{subvol_root:?}");
+        Ok(())
+    }
+
+    #[test]
+    fn default_subvol_items() -> Result<()> {
+        let file = open_btrfs_file();
+        let vol = Volume::new(file).expect("volume");
+        let v2 = vol.into_volume2().expect("volume2");
+
+        let root_tree =
+            Tree::from_logical_offset(v2.inner.clone(), v2.inner.superblock().root.get())?;
+
+        // we are looking for the root tree directory (?)
+        // this is a DIR_ITEM entry in the root tree, with the name "default",
+        // and the crc32 of "default" as its offset
+        let key = Key::new(
+            KnownObjectId::Custom(v2.inner.superblock().root_dir_objectid.get()),
+            ObjectType::DirItem,
+            0x8dbfc2d2, // crc of "default"
+        );
+
+        let subvol_root = root_tree
+            .find_key(&key)?
+            .ok_or(Error::NoDefaultSubvolRoot)?;
+        // if we found the dir entry of the "default subvol" (mharmstone nomenclature)
+        // we then look for the root fs tree in the root tree with the ID found
+        // in the `.location` of the dir_item only (from mharmstone)
+
+        let subvol_id = subvol_root
+            .1
+            .as_dir_item()
+            .expect("dir item")
+            .first()
+            .expect("dir item entry")
+            .item()
+            .location
+            .id();
+
+        log::info!("subvol_id: {subvol_id:?}");
+
+        let search_key = PartialKey::new(Some(subvol_id), None, None);
+
+        for (key, v) in root_tree.iter() {
+            if search_key.eq(&key.key) {
+                log::info!("[{key:?}] {v:#?}");
+            }
+        }
+
+        let fs = v2.default_subvolume().expect("default subvol");
+
+        // so we go from root_item.dirid as inode number
+        // to (dir_id, dir_index, ?) which is all the files in the directory
+        // generally, (<inode_number>, dir_index, idx) will give us all indices in the directory
+        // walking the entire tree for this seems quite slow though...
+        // so we would want a function here that can find a range and a way to
+        // define a range with a partial key.
+
+        // that would look something like limiting the range to everything equal
+        // to the id or type, if present, in that order. ofc if the id is
+        // missing, the range cannot be restricted at all.
+
+        let search_key = PartialKey::new(
+            Some(fs.root_item.root_dirid.get().into()),
+            Some(ObjectType::DirIndex),
+            None,
+        );
+
+        log::info!("iter:");
+        for (key, v) in fs.fs_root.iter() {
+            if search_key.eq(&key.key) {
+                log::info!("[{key:?}] {v:#?}");
+            }
+        }
+        log::info!("iter: [end]");
+
+        // with range
+
+        log::info!("range:");
+        for (key, v) in fs.fs_root.find_range(&search_key)? {
+            log::info!("[{key:?}] {v:#?}");
+        }
+        log::info!("range: [end]");
+
+        Ok(())
+    }
+
     #[test]
     fn iter_default_subvol() {
         let file = open_btrfs_file();