finding nodes by range (fast)

This commit is contained in:
Janis 2023-04-01 16:35:39 +02:00
parent 8ce787a837
commit 2af01747e7
3 changed files with 331 additions and 73 deletions

View file

@ -795,7 +795,7 @@ pub enum DirItemType {
} }
#[repr(C, packed)] #[repr(C, packed)]
#[derive(Debug, Clone, Copy, FromBytes, AsBytes)] #[derive(Clone, Copy, FromBytes, AsBytes)]
pub struct DirItem { pub struct DirItem {
pub location: Key, pub location: Key,
pub transid: U64<LE>, pub transid: U64<LE>,
@ -804,6 +804,18 @@ pub struct DirItem {
ty: u8, ty: u8,
} }
impl Debug for DirItem {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
f.debug_struct("DirItem")
.field("location", &self.location)
.field("transid", &self.transid)
.field("data_len", &self.data_len)
.field("name_len", &self.name_len)
.field("ty", &self.ty())
.finish()
}
}
impl NameLength for DirItem { impl NameLength for DirItem {
fn name_length(&self) -> usize { fn name_length(&self) -> usize {
self.name_len.get() as usize self.name_len.get() as usize

View file

@ -68,7 +68,6 @@ pub struct NodeHandle {
#[derive(Debug)] #[derive(Debug)]
pub struct Range<R: super::Read> { pub struct Range<R: super::Read> {
volume: Rc<Volume<R>>, volume: Rc<Volume<R>>,
parents: Vec<NodeHandle>,
start: RootOrEdge, start: RootOrEdge,
end: RootOrEdge, end: RootOrEdge,
} }
@ -100,17 +99,18 @@ impl<R: super::Read> Tree<R> {
Ok(Self { volume, root }) Ok(Self { volume, root })
} }
pub fn find_key<K: PartialEq<Key> + PartialOrd<Key>>( fn find_node_inner<K, F>(&self, key: &K, find: F) -> Result<Option<NodeHandle>>
&self, where
key: &K, K: PartialEq<Key> + PartialOrd<Key>,
) -> Result<Option<(Item, TreeItem)>> { F: Fn(NodeHandle, &K) -> SearchResult,
{
let mut node = NodeHandle::start(self.root.clone()); let mut node = NodeHandle::start(self.root.clone());
loop { loop {
let search = node.find_key(key); let search = find(node, key);
match search { match search {
SearchResult::Leaf(a) => { SearchResult::Leaf(a) => {
return a.parse_item(); return Ok(Some(a));
} }
SearchResult::Edge(mut edge) => match &edge.node.inner { SearchResult::Edge(mut edge) => match &edge.node.inner {
BTreeNode::Internal(internal) => { BTreeNode::Internal(internal) => {
@ -135,6 +135,54 @@ impl<R: super::Read> Tree<R> {
} }
} }
pub fn find_node_rev<K>(&self, key: &K) -> Result<Option<NodeHandle>>
where
K: PartialEq<Key> + PartialOrd<Key>,
{
self.find_node_inner(key, NodeHandle::find_key_rev)
}
pub fn find_node<K>(&self, key: &K) -> Result<Option<NodeHandle>>
where
K: PartialEq<Key> + PartialOrd<Key>,
{
self.find_node_inner(key, NodeHandle::find_key)
}
pub fn find_key<K>(&self, key: &K) -> Result<Option<(Item, TreeItem)>>
where
K: PartialEq<Key> + PartialOrd<Key>,
{
match self.find_node(key)? {
Some(node) => node.parse_item(),
None => Ok(None),
}
}
pub fn find_key_rev<K>(&self, key: &K) -> Result<Option<(Item, TreeItem)>>
where
K: PartialEq<Key> + PartialOrd<Key>,
{
match self.find_node_rev(key)? {
Some(node) => node.parse_item(),
None => Ok(None),
}
}
pub fn find_range<K>(&self, key: &K) -> Result<Range<R>>
where
K: PartialEq<Key> + PartialOrd<Key>,
{
let start = self.find_node(key)?;
let end = self.find_node_rev(key)?;
Ok(Range::from_handles(
self.volume.clone(),
start.unwrap_or(NodeHandle::start(self.root.clone())),
end.unwrap_or(NodeHandle::end(self.root.clone())),
))
}
pub fn iter(&self) -> Range<R> { pub fn iter(&self) -> Range<R> {
Range::new(self.volume.clone(), self.root.clone(), self.root.clone()) Range::new(self.volume.clone(), self.root.clone(), self.root.clone())
} }
@ -279,7 +327,7 @@ impl RootOrEdge {
.clone() .clone()
} }
pub fn into_next_leaf_any<R, F>( fn into_next_node_any<R, F>(
self, self,
volume: &super::volume::Volume<R>, volume: &super::volume::Volume<R>,
f: F, f: F,
@ -291,38 +339,64 @@ impl RootOrEdge {
) -> core::result::Result<NodeHandle, NodeHandle>, ) -> core::result::Result<NodeHandle, NodeHandle>,
R: super::Read, R: super::Read,
{ {
let mut node = match self { let node = match self {
RootOrEdge::Root(root) => { RootOrEdge::Root(root) => Ok(root),
// use self if is leaf already, otherwise forward to next leaf RootOrEdge::Edge(edge) => f(edge, volume),
if root.node.inner.is_leaf() {
return Ok(Self::Edge(root));
} else {
root
}
}
RootOrEdge::Edge(edge) => edge,
}; };
let leaf = loop { match node {
match f(node, volume) { Ok(node) => Ok(Self::Edge(node)),
Ok(next) => { Err(node) => Ok(Self::Edge(node)),
if next.node.inner.is_leaf() { }
break Ok(next);
} else {
node = next;
}
}
Err(last) => {
break Err(Self::Edge(last));
}
}
};
// turn leaf into Self::Edge
leaf.map(|leaf| Self::Edge(leaf))
} }
/// returns the next RootOrEdge, or the end of the tree as Err pub fn into_next_node<R>(
self,
volume: &super::volume::Volume<R>,
) -> core::result::Result<Self, Self>
where
R: super::Read,
{
self.into_next_node_any(volume, NodeHandle::into_next)
}
pub fn into_next_back_node<R>(
self,
volume: &super::volume::Volume<R>,
) -> core::result::Result<Self, Self>
where
R: super::Read,
{
self.into_next_node_any(volume, NodeHandle::into_next_back)
}
fn into_next_leaf_any<R, F>(
self,
volume: &super::volume::Volume<R>,
f: F,
) -> core::result::Result<Self, Self>
where
F: Fn(
NodeHandle,
&super::volume::Volume<R>,
) -> core::result::Result<NodeHandle, NodeHandle>,
R: super::Read,
{
// this will either give us the next immediate node, or self if self was a Root.
let mut this = self.into_next_node_any(volume, &f)?;
loop {
// we are only interested in leaf nodes
if this.node.inner.is_leaf() {
break Ok(this);
} else {
// this will eventually return Err(self) so will always return
this = this.into_next_node_any(volume, &f)?;
}
}
}
/// returns the next leaf, or the end of the tree as Err
pub fn into_next_leaf<R: super::Read>( pub fn into_next_leaf<R: super::Read>(
self, self,
volume: &super::volume::Volume<R>, volume: &super::volume::Volume<R>,
@ -392,15 +466,18 @@ impl<R> Range<R>
where where
R: super::Read, R: super::Read,
{ {
pub fn new(volume: Rc<Volume<R>>, start: Rc<Node>, end: Rc<Node>) -> Self { pub fn from_handles(volume: Rc<Volume<R>>, start: NodeHandle, end: NodeHandle) -> Self {
Self { Self {
volume, volume,
parents: Default::default(), start: RootOrEdge::Root(start),
start: RootOrEdge::Root(NodeHandle::start(start)), end: RootOrEdge::Root(end),
end: RootOrEdge::Root(NodeHandle::end(end)),
} }
} }
pub fn new(volume: Rc<Volume<R>>, start: Rc<Node>, end: Rc<Node>) -> Self {
Self::from_handles(volume, NodeHandle::start(start), NodeHandle::end(end))
}
pub fn is_empty(&self) -> bool { pub fn is_empty(&self) -> bool {
return self.start == self.end; return self.start == self.end;
} }
@ -413,19 +490,22 @@ where
type Item = (Item, TreeItem); type Item = (Item, TreeItem);
fn next(&mut self) -> Option<Self::Item> { fn next(&mut self) -> Option<Self::Item> {
if !self.is_empty() { loop {
replace_with::replace_with_or_abort(&mut self.start, |start| { if !self.is_empty() {
match start.into_next_leaf(&self.volume) { replace_with::replace_with_or_abort(&mut self.start, |start| {
Ok(next) => next, match start.into_next_node(&self.volume) {
Err(next) => next, Ok(next) => next,
Err(next) => next,
}
});
if self.start.node.inner.is_leaf() {
break self.start.as_handle().parse_item().expect("range item");
} }
}); // else repeat
} else {
let item = self.start.as_handle().parse_item().expect("range item"); break None;
}
item
} else {
None
} }
} }
} }
@ -435,19 +515,22 @@ where
R: super::Read, R: super::Read,
{ {
fn next_back(&mut self) -> Option<Self::Item> { fn next_back(&mut self) -> Option<Self::Item> {
if !self.is_empty() { loop {
replace_with::replace_with_or_abort(&mut self.end, |end| { if !self.is_empty() {
match end.into_next_back_leaf(&self.volume) { replace_with::replace_with_or_abort(&mut self.end, |start| {
Ok(next) => next, match start.into_next_back_node(&self.volume) {
Err(next) => next, Ok(next) => next,
Err(next) => next,
}
});
if self.end.node.inner.is_leaf() {
break self.end.as_handle().parse_item().expect("range item");
} }
}); // else repeat
} else {
let item = self.start.as_handle().parse_item().expect("range item"); break None;
}
item
} else {
None
} }
} }
} }
@ -481,6 +564,41 @@ impl NodeHandle {
} }
} }
pub fn find_key_rev<K: PartialEq<Key> + PartialOrd<Key>>(self, key: &K) -> SearchResult {
match &self.node.inner {
BTreeNode::Internal(node) => {
for (i, child) in node.children.iter().enumerate().rev() {
match key.partial_cmp(&child.key) {
Some(core::cmp::Ordering::Greater)
| Some(core::cmp::Ordering::Equal)
| None => {
return SearchResult::Edge(NodeHandle {
idx: i as u32,
..self
});
}
_ => {}
}
}
SearchResult::Edge(NodeHandle { idx: 0, ..self })
}
BTreeNode::Leaf(node) => {
for (i, child) in node.items.iter().enumerate().rev() {
if key.eq(&child.key) {
return SearchResult::Leaf(NodeHandle {
idx: i as u32,
..self
});
}
}
log::debug!("key definitely not found!");
SearchResult::Edge(NodeHandle { idx: 0, ..self })
}
}
}
pub fn find_key<K: PartialEq<Key> + PartialOrd<Key>>(self, key: &K) -> SearchResult { pub fn find_key<K: PartialEq<Key> + PartialOrd<Key>>(self, key: &K) -> SearchResult {
match &self.node.inner { match &self.node.inner {
BTreeNode::Internal(node) => { BTreeNode::Internal(node) => {
@ -607,8 +725,8 @@ impl NodeHandle {
// go up // go up
match parents.pop() { match parents.pop() {
Some((node, idx)) => Self { Some((node, idx)) => Self {
idx: (idx + 1).min(node.inner.header().nritems.get()),
node, node,
idx: idx + 1,
parents, parents,
} }
.into_next(volume), .into_next(volume),

View file

@ -78,13 +78,14 @@ pub struct Volume<R: super::Read> {
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
pub struct Volume2<R: super::Read> { pub struct Volume2<R: super::Read> {
inner: Rc<Volume<R>>, inner: Rc<Volume<R>>,
roots: BTreeMap<KnownObjectId, Tree<R>>, roots: BTreeMap<KnownObjectId, (RootItem, Tree<R>)>,
} }
// TODO: find better name // TODO: find better name
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
pub struct Fs<R: super::Read> { pub struct Fs<R: super::Read> {
volume: Rc<Volume2<R>>, volume: Rc<Volume2<R>>,
root_item: RootItem,
fs_root: Tree<R>, fs_root: Tree<R>,
} }
@ -172,7 +173,7 @@ impl<R: super::Read> Volume<R> {
Ok(self) Ok(self)
} }
fn parse_root_tree(self: Rc<Self>) -> Result<BTreeMap<KnownObjectId, Tree<R>>> { fn parse_root_tree(self: Rc<Self>) -> Result<BTreeMap<KnownObjectId, (RootItem, Tree<R>)>> {
let root_tree_root = self.superblock().root.get(); let root_tree_root = self.superblock().root.get();
let root_tree = Tree::from_logical_offset(self.clone(), root_tree_root)?; let root_tree = Tree::from_logical_offset(self.clone(), root_tree_root)?;
@ -184,9 +185,9 @@ impl<R: super::Read> Volume<R> {
}) })
.map(|(item, root)| { .map(|(item, root)| {
let id = item.key.id(); let id = item.key.id();
let a = Tree::from_logical_offset(self.clone(), root.bytenr.get()); let tree = Tree::from_logical_offset(self.clone(), root.bytenr.get());
a.map(|root| (id, root)) tree.map(|tree| (id, (root, tree)))
}) })
.collect::<Result<BTreeMap<_, _>>>()?; .collect::<Result<BTreeMap<_, _>>>()?;
@ -332,20 +333,23 @@ impl<R: super::Read> Volume2<R> {
.location .location
.id(); .id();
let root = self let (root_item, fs_root) = self
.roots .roots
.get(&subvol_id) .get(&subvol_id)
.ok_or(Error::NoDefaultSubvolFsRoot)? .ok_or(Error::NoDefaultSubvolFsRoot)?
.clone(); .clone();
Ok(Fs { Ok(Fs {
volume: self.clone(), volume: self.clone(),
fs_root: root.clone(), root_item,
fs_root,
}) })
} }
} }
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use crate::v2::tree::PartialKey;
use super::*; use super::*;
use std::fs::File; use std::fs::File;
use test_log::test; use test_log::test;
@ -371,10 +375,134 @@ mod tests {
log::info!("roots:"); log::info!("roots:");
for (id, v) in v2.roots.iter() { for (id, v) in v2.roots.iter() {
log::info!("[{id:?}] {v:#?}"); log::info!("[{id:?}] ");
} }
} }
#[test]
fn iter_roots_rev() {
let file = open_btrfs_file();
let vol = Volume::new(file).expect("volume");
let v2 = vol.into_volume2().expect("volume2");
log::info!("roots:");
for (id, v) in v2.roots.iter() {
log::info!("[{id:?}] ");
}
log::info!("roots rev:");
for (id, v) in v2.roots.iter().rev() {
log::info!("[{id:?}] ");
}
}
#[test]
fn find_key_sym() -> Result<()> {
let file = open_btrfs_file();
let vol = Volume::new(file).expect("volume");
let v2 = vol.into_volume2().expect("volume2");
let root_tree =
Tree::from_logical_offset(v2.inner.clone(), v2.inner.superblock().root.get())?;
// we are looking for the root tree directory (?)
// this is a DIR_ITEM entry in the root tree, with the name "default",
// and the crc32 of "default" as its offset
let key = Key::new(
KnownObjectId::Custom(v2.inner.superblock().root_dir_objectid.get()),
ObjectType::DirItem,
0x8dbfc2d2, // crc of "default"
);
let subvol_root = root_tree.find_node(&key)?;
let other = root_tree.find_node_rev(&key)?;
assert_eq!(subvol_root, other);
log::info!("{subvol_root:?}");
Ok(())
}
#[test]
fn default_subvol_items() -> Result<()> {
let file = open_btrfs_file();
let vol = Volume::new(file).expect("volume");
let v2 = vol.into_volume2().expect("volume2");
let root_tree =
Tree::from_logical_offset(v2.inner.clone(), v2.inner.superblock().root.get())?;
// we are looking for the root tree directory (?)
// this is a DIR_ITEM entry in the root tree, with the name "default",
// and the crc32 of "default" as its offset
let key = Key::new(
KnownObjectId::Custom(v2.inner.superblock().root_dir_objectid.get()),
ObjectType::DirItem,
0x8dbfc2d2, // crc of "default"
);
let subvol_root = root_tree
.find_key(&key)?
.ok_or(Error::NoDefaultSubvolRoot)?;
// if we found the dir entry of the "default subvol" (mharmstone nomenclature)
// we then look for the root fs tree in the root tree with the ID found
// in the `.location` of the dir_item only (from mharmstone)
let subvol_id = subvol_root
.1
.as_dir_item()
.expect("dir item")
.first()
.expect("dir item entry")
.item()
.location
.id();
log::info!("subvol_id: {subvol_id:?}");
let search_key = PartialKey::new(Some(subvol_id), None, None);
for (key, v) in root_tree.iter() {
if search_key.eq(&key.key) {
log::info!("[{key:?}] {v:#?}");
}
}
let fs = v2.default_subvolume().expect("default subvol");
// so we go from root_item.dirid as inode number
// to (dir_id, dir_index, ?) which is all the files in the directory
// generally, (<inode_number>, dir_index, idx) will give us all indices in the directory
// walking the entire tree for this seems quite slow though...
// so we would want a function here that can find a range and a way to
// define a range with a partial key.
// that would look something like limiting the range to everything equal
// to the id or type, if present, in that order. ofc if the id is
// missing, the range cannot be restricted at all.
let search_key = PartialKey::new(
Some(fs.root_item.root_dirid.get().into()),
Some(ObjectType::DirIndex),
None,
);
log::info!("iter:");
for (key, v) in fs.fs_root.iter() {
if search_key.eq(&key.key) {
log::info!("[{key:?}] {v:#?}");
}
}
log::info!("iter: [end]");
// with range
log::info!("range:");
for (key, v) in fs.fs_root.find_range(&search_key)? {
log::info!("[{key:?}] {v:#?}");
}
log::info!("range: [end]");
Ok(())
}
#[test] #[test]
fn iter_default_subvol() { fn iter_default_subvol() {
let file = open_btrfs_file(); let file = open_btrfs_file();