use core::mem::size_of; use core::ops::{Range, RangeBounds}; use alloc::collections::btree_map::Entry; use alloc::{collections::BTreeMap, rc::Rc, vec, vec::Vec}; use scroll::Pread; use crate::crc32c::calculate_crc32c; use crate::path::Path; use crate::structs::{ Chunk, DirItemEntry, DirItemType, ExtentData, INodeItem, INodeRefEntry, Item, Key, KeyPtr, KnownObjectId, ObjectType, RootItem, Stripe, Superblock, TreeItem, }; use crate::{Error, Result}; use super::file::INode; use super::tree::{PartialKey, Tree}; /// equal if overlapping, ordered by lower bound #[derive(Debug, Clone)] pub struct ChunkTreeKey { range: core::ops::Range, } impl From for ChunkTreeKey { fn from(value: u64) -> Self { Self { range: value..value, } } } impl ChunkTreeKey { pub fn start(&self) -> u64 { self.range.start } pub fn end(&self) -> u64 { self.range.end } pub fn size(&self) -> u64 { self.range.end - self.range.start } pub fn delta(&self, point: u64) -> u64 { point - self.range.start } pub fn sub_range(&self, point: u64) -> core::ops::Range { self.delta(point)..self.end() } } impl Eq for ChunkTreeKey {} impl Ord for ChunkTreeKey { fn cmp(&self, other: &Self) -> core::cmp::Ordering { self.partial_cmp(other).unwrap() } } impl PartialEq for ChunkTreeKey { fn eq(&self, other: &Self) -> bool { self.range.contains(&other.range.start) || other.range.contains(&self.range.start) } } impl PartialOrd for ChunkTreeKey { fn partial_cmp(&self, other: &Self) -> Option { self.eq(other) .then_some(core::cmp::Ordering::Equal) .or_else(|| self.range.start.partial_cmp(&other.range.start)) } } /// inner volume struct that contains all the info needed to read/parse trees #[derive(Debug, Clone)] pub struct Volume { read: R, superblock: Superblock, chunk_cache: BTreeMap, } // TODO: find better name #[derive(Debug, Clone)] pub struct Volume2 { inner: Rc>, roots: BTreeMap)>, } // TODO: find better name #[derive(Debug, Clone)] pub struct Fs { volume: Rc>, root_item: RootItem, fs_root: Tree, } impl Volume { pub fn new(read: R) -> Result> { let mut sb = vec![0u8; size_of::()]; read.read(&mut sb, Superblock::SUPERBLOCK_BASE_OFFSET as _)?; let superblock = Superblock::parse(&sb)?; let chunk_cache = Self::bootstrap_chunk_tree(&superblock)?; let volume = Rc::new( Self { read, superblock, chunk_cache, } .parse_chunk_tree()?, ); Ok(volume) } pub fn into_volume2(self: Rc) -> Result>> { Ok(Rc::new(Volume2 { inner: self.clone(), roots: self.parse_root_tree()?, })) } fn parse_chunk_tree(mut self) -> Result { log::debug!("parsing chunk tree"); let this = Rc::new(self); let chunk_tree = Tree::from_logical_offset(this.clone(), this.superblock().chunk_root.get())?; let chunks = chunk_tree .iter() .filter_map(|(item, v)| { log::debug!("{:?}", item); match v { TreeItem::Chunk(chunk) => Some((item, chunk)), _ => None, } }) .collect::>(); drop(chunk_tree); self = match Rc::try_unwrap(this) { Ok(v) => v, Err(_) => unreachable!(), }; for (item, chunk) in chunks { let start = item.key.offset.get() as u64; let end = start + chunk.length.get(); match self.chunk_cache.entry(ChunkTreeKey { range: start..end }) { Entry::Vacant(entry) => { log::info!("inserting chunk [{start}, {end})"); entry.insert(chunk.stripe.offset.get()); } Entry::Occupied(entry) => { log::warn!("overlapping stripes!"); log::warn!( "\t{:?} and {:?}", entry.key(), ChunkTreeKey { range: start..end } ); log::warn!( "\twith offsets: {} and {}", entry.get(), chunk.stripe.offset.get() ); if *entry.get() != chunk.stripe.offset.get() { log::error!("\tprobably an error?"); } } } } Ok(self) } fn parse_root_tree(self: Rc) -> Result)>> { let root_tree_root = self.superblock().root.get(); let root_tree = Tree::from_logical_offset(self.clone(), root_tree_root)?; let roots = root_tree .iter() .filter_map(|(item, v)| match v { TreeItem::Root(root) => Some((item, root)), _ => None, }) .map(|(item, root)| { let id = item.key.id(); let tree = Tree::from_logical_offset(self.clone(), root.bytenr.get()); tree.map(|tree| (id, (root, tree))) }) .collect::>>()?; Ok(roots) } fn size_from_logical(&self, logical: u64) -> Option { self.chunk_cache .get_key_value(&logical.into()) .map(|(key, _)| key.size()) } fn offset_from_logical(&self, logical: u64) -> Option { self.chunk_cache .get_key_value(&logical.into()) .map(|(key, offset)| offset + key.delta(logical)) } fn range_from_logical(&self, logical: u64) -> Option> { self.chunk_cache .get_key_value(&logical.into()) .map(|(key, offset)| { let delta = key.delta(logical); (offset + delta)..(offset + key.size() - delta) }) } pub fn read_range_from_logical(&self, logical: u64) -> Result>> { if let Some(range) = self.range_from_logical(logical) { Ok(Some(self.read_range(range)?)) } else { Ok(None) } } pub fn read_range(&self, range: core::ops::Range) -> Result> { let mut buf = vec![0; (range.end - range.start) as usize]; self.read.read(&mut buf, range.start)?; Ok(buf) } pub fn read_keyptr(&self, keyptr: &KeyPtr) -> Result> { self.read_range( self.range_from_logical(keyptr.blockptr.get()) .ok_or(Error::ReadFailed)?, ) } fn bootstrap_chunk_tree(superblock: &Superblock) -> Result> { let array_size = superblock.sys_chunk_array_size.get() as usize; let mut offset: usize = 0; let key_size = size_of::(); let mut chunk_tree = BTreeMap::new(); let bytes = &superblock.sys_chunk_array; while offset < array_size { if offset + key_size > array_size { log::error!("short key read"); return Err(Error::InvalidOffset); } let key = bytes.gread::(&mut offset)?; if key.ty() != ObjectType::ChunkItem { log::error!("key is not of type ChunkItem"); return Err(Error::InvalidOffset); } let chunk = bytes.gread::(&mut offset)?; let num_stripes = chunk.num_stripes.get(); // copy to prevent unaligned access if num_stripes == 0 { log::error!("num_stripes cannot be 0"); return Err(Error::InvalidOffset); } if num_stripes != 1 { log::warn!( "warning: {} stripes detected but only processing 1", num_stripes ); } let key_offset = key.offset.get(); let chunk_length = chunk.length.get(); match chunk_tree.entry(ChunkTreeKey { range: key_offset..(key_offset + chunk_length), }) { Entry::Vacant(entry) => { entry.insert(chunk.stripe.offset.get()); } Entry::Occupied(_) => { log::error!("overlapping stripes!"); return Err(Error::InvalidOffset); } }; offset += (num_stripes - 1) as usize * size_of::(); if offset > array_size { log::error!("short chunk item + stripes read"); return Err(Error::InvalidOffset); } } Ok(chunk_tree) } pub fn superblock(&self) -> Superblock { self.superblock } } impl Volume2 { pub fn default_subvolume(self: Rc) -> Result> { let root_tree = Tree::from_logical_offset(self.inner.clone(), self.inner.superblock().root.get())?; // we are looking for the root tree directory (?) // this is a DIR_ITEM entry in the root tree, with the name "default", // and the crc32 of "default" as its offset let key = Key::new( KnownObjectId::Custom(self.inner.superblock().root_dir_objectid.get()), ObjectType::DirItem, 0x8dbfc2d2, // crc of "default" ); let subvol_root = root_tree .find_key(&key)? .ok_or(Error::NoDefaultSubvolRoot)?; // if we found the dir entry of the "default subvol" (mharmstone nomenclature) // we then look for the root fs tree in the root tree with the ID found in the `.location` of the dir_item only (from mharmstone) let subvol_id = subvol_root .1 .as_dir_item() .expect("dir item") .first() .expect("dir item entry") .item() .location .id(); let (root_item, fs_root) = self .roots .get(&subvol_id) .ok_or(Error::NoDefaultSubvolFsRoot)? .clone(); Ok(Fs { volume: self.clone(), root_item, fs_root, }) } } impl Fs { fn get_inode_item(&self, inode_id: u64) -> Result> { if let Some((item, inoderef)) = self.find_inode_ref(inode_id)? { if let Some(diritem) = self.find_dir_index(item.key.offset.get(), &inoderef)? { let inode = self.find_inode_item(&diritem)?; return Ok(inode); } } Ok(None) } fn get_root_dir(&self) -> INode { INode { id: self.root_item.root_dirid.get(), path: vec![], } } pub fn get_inode_children_inodes( &self, inode: &INode, ) -> Result + '_> { let inode = inode.clone(); self.get_inode_children(&inode).map(|children| { children.map(move |child| { let id: u64 = child.item().location.id().into(); inode.clone().into_child(id, child.into_name()) }) }) } pub fn get_inode_children( &self, inode: &INode, ) -> Result + '_> { let key = PartialKey::new(Some(inode.id()), Some(ObjectType::DirIndex), None); let children = self.fs_root.find_range(&key)?; let a = children.map(|(_, v)| v.try_into_dir_index().expect("dir index")); Ok(a) } pub fn get_inode_parent(&self, inode: &INode) -> Result { if let Some((inode_ref, _)) = self.find_inode_ref(inode.id)? { Ok(INode { id: inode_ref.key.offset.get(), path: inode .path .iter() .take(inode.path.len() - 1) .cloned() .collect(), }) } else { Err(Error::INodeNotFound) } } pub fn get_inode_by_relative_path

(&self, inode: INode, path: P) -> Result where P: Path, { if path.is_absolute() { // stuff self.get_inode_by_path(path) } else { let path = path.normalize().into_iter(); let mut inode = inode; for segment in path { match segment { crate::path::Segment::ParentDir => { inode = self.get_inode_parent(&inode)?; } crate::path::Segment::File(child_name) => { let child = self .get_inode_children_inodes(&inode)? .find(|child| { child.path.last().map(|bytes| bytes.as_slice()) == Some(child_name) }) .ok_or(Error::INodeNotFound)? .clone(); // silly borrow checker inode = child; } _ => unreachable!(), } } Ok(inode) } } pub fn get_inode_by_path

(&self, path: P) -> Result where P: Path, { let mut normalized = path.normalize(); if !path.is_absolute() { log::error!("path is not absolute!"); } else { // pop root _ = normalized.pop_segment(); } let mut inode = self.get_root_dir(); while let Some(segment) = normalized.pop_segment() { match segment { crate::path::Segment::Root | crate::path::Segment::NoOp => {} // do nothing crate::path::Segment::CurrentDir | crate::path::Segment::ParentDir => { unimplemented!() } // not normalized? crate::path::Segment::File(child) => { let dir_item = self .find_inode_child(inode.id, child)? .ok_or(Error::INodeNotFound)?; inode = inode.into_child(dir_item.item().location.id().into(), child.to_vec()); } } } Ok(inode) } fn find_inode_child(&self, parent_inode: u64, child: &[u8]) -> Result> { let crc = calculate_crc32c(0xfffffffe, child); let key = PartialKey::new( Some(parent_inode.into()), Some(ObjectType::DirItem), Some(crc as u64), ); if let Some((_, value)) = self.fs_root.find_key(&key)? { let dir_items = value.as_dir_item().expect("dir index"); let item = dir_items.iter().find(|item| item.name() == child).cloned(); Ok(item) } else { Ok(None) } } fn get_inode_dir_index(&self, inode_id: u64) -> Result> { if let Some((item, inoderef)) = self.find_inode_ref(inode_id)? { self.find_dir_index(item.key.offset.get(), &inoderef) } else { Ok(None) } } fn get_inode_extents(&self, inode_id: u64) -> Result> { if let Some(dir_entry) = self.get_inode_dir_index(inode_id)? { if dir_entry.item().ty() == DirItemType::RegFile { let key = PartialKey::new(Some(inode_id.into()), Some(ObjectType::ExtentData), None); let extents = self.fs_root.find_range(&key)?; let extents = extents .map(|(key, item)| { ( key.key.offset.get(), item.as_extent_data().expect("extent data").clone(), ) }) .collect::>(); Ok(extents) } else { Ok(vec![]) } } else { Err(Error::INodeNotFound) } } fn read_inode_raw>(&self, inode: &INode, range: I) -> Result> { let mut contents = Vec::new(); let extents = self.get_inode_extents(inode.id)?; let start = match range.start_bound() { core::ops::Bound::Included(v) => *v, core::ops::Bound::Excluded(v) => *v + 1, core::ops::Bound::Unbounded => 0, }; let end = match range.end_bound() { core::ops::Bound::Included(v) => Some(*v + 1), core::ops::Bound::Excluded(v) => Some(*v), core::ops::Bound::Unbounded => None, }; for (offset, extent) in extents.into_iter().filter(|(offset, extent)| { let extent_start = *offset; let extent_end = extent_start + extent.len(); let range_len = end.map(|end| end - start); let start2 = start.min(extent_start); let end = end.map(|end| end.max(extent_end)); let len = end.map(|end| (end - start2)); if let (Some(len), Some(range_len)) = (len, range_len) { range_len + range_len < len } else { start2 < extent_end } }) { // let start = start.saturating_sub(offset); let end = end.map(|end| end - offset); log::info!("reading {}..{:?} from extent.", start, end); let data: alloc::borrow::Cow<[u8]> = match &extent { ExtentData::Inline { data, .. } => { // TODO: handle compression and encryption let data = if let Some(end) = end { &data[start as usize..end as usize] } else { &data[start as usize..] }; data.into() } ExtentData::Other(extent) => { let address = extent.address() + extent.offset() + start; let data = self .volume .inner .read_range(address..address + end.unwrap_or(extent.num_bytes())) .expect("bytes"); data.into() } }; log::info!("reading {} bytes from file", data.len()); contents.extend_from_slice(&data); } Ok(contents) } fn find_inode_ref(&self, inode_id: u64) -> Result> { let key = PartialKey::new(Some(inode_id.into()), Some(ObjectType::INodeRef), None); if let Some((item, value)) = self.fs_root.find_key(&key)? { let inode = value.as_inode_ref().expect("inoderef").clone(); Ok(Some((item, inode))) } else { Ok(None) } } fn find_dir_index( &self, parent_inode: u64, inoderef: &INodeRefEntry, ) -> Result> { //let crc = calculate_crc32c(0xfffffffe, &inoderef.name()); let key = PartialKey::new( Some(parent_inode.into()), Some(ObjectType::DirIndex), Some(inoderef.item().index.get()), ); if let Some((_, value)) = self.fs_root.find_key(&key)? { let dir_index = value.as_dir_index().expect("dir index").clone(); Ok(Some(dir_index)) } else { Ok(None) } } fn find_inode_item(&self, dir_item: &DirItemEntry) -> Result> { dir_item.item().location; if let Some((_, value)) = self.fs_root.find_key(&dir_item.item().location)? { let inode = value.as_inode_item().expect("inode item").clone(); Ok(Some(inode)) } else { Ok(None) } } } #[cfg(test)] mod tests { use crate::v2::tree::PartialKey; use super::*; use std::fs::File; use test_log::test; fn open_btrfs_file() -> File { let file = std::fs::File::open("btrfs.img").expect("btrfs image"); file } #[test] fn create_volume() { let file = open_btrfs_file(); let vol = Volume::new(file).expect("volume"); let v2 = vol.into_volume2().expect("volume2"); v2.default_subvolume().expect("default subvol"); } #[test] fn iter_roots() { let file = open_btrfs_file(); let vol = Volume::new(file).expect("volume"); let v2 = vol.into_volume2().expect("volume2"); log::info!("roots:"); for (id, v) in v2.roots.iter() { log::info!("[{id:?}] "); } } #[test] fn iter_roots_rev() { let file = open_btrfs_file(); let vol = Volume::new(file).expect("volume"); let v2 = vol.into_volume2().expect("volume2"); log::info!("roots:"); for (id, v) in v2.roots.iter() { log::info!("[{id:?}] "); } log::info!("roots rev:"); for (id, v) in v2.roots.iter().rev() { log::info!("[{id:?}] "); } } #[test] fn find_key_sym() -> Result<()> { let file = open_btrfs_file(); let vol = Volume::new(file).expect("volume"); let v2 = vol.into_volume2().expect("volume2"); let root_tree = Tree::from_logical_offset(v2.inner.clone(), v2.inner.superblock().root.get())?; // we are looking for the root tree directory (?) // this is a DIR_ITEM entry in the root tree, with the name "default", // and the crc32 of "default" as its offset let key = Key::new( KnownObjectId::Custom(v2.inner.superblock().root_dir_objectid.get()), ObjectType::DirItem, 0x8dbfc2d2, // crc of "default" ); let subvol_root = root_tree.find_node(&key)?; let other = root_tree.find_node_rev(&key)?; assert_eq!(subvol_root, other); log::info!("{subvol_root:?}"); Ok(()) } #[test] fn default_subvol_items() -> Result<()> { let file = open_btrfs_file(); let vol = Volume::new(file).expect("volume"); let v2 = vol.into_volume2().expect("volume2"); let fs = v2.default_subvolume().expect("subvol"); Ok(()) } #[test] fn get_inode_items() -> Result<()> { let file = open_btrfs_file(); let vol = Volume::new(file).expect("volume"); let v2 = vol.into_volume2().expect("volume2"); let fs = v2.default_subvolume().expect("default subvol"); let search_key = PartialKey::new( Some(fs.root_item.root_dirid.get().into()), Some(ObjectType::DirIndex), None, ); // with range log::info!("range:"); for (key, v) in fs.fs_root.find_range(&search_key)? { let dirindex = v.as_dir_index().unwrap(); let inode_id: u64 = dirindex.item().location.id().into(); log::info!("[{key:?}] {v:#?}"); log::info!("inode: {inode_id}"); let inode_item = fs.get_inode_item(inode_id)?; log::info!("inode: {inode_item:#?}"); let extents = fs.get_inode_extents(inode_id)?; for (_, extent) in extents { match extent { ExtentData::Inline { header, data } => { log::info!("{header:?}\n{}", String::from_utf8_lossy(&data)); } _ => {} } } } log::info!("range: [end]"); Ok(()) } #[test] fn find_file() -> Result<()> { let file = open_btrfs_file(); let vol = Volume::new(file).expect("volume"); let v2 = vol.into_volume2().expect("volume2"); let fs = v2.default_subvolume().expect("default subvol"); let root_dir = fs.get_root_dir(); let children = fs.get_inode_children(&root_dir)?.collect::>(); log::info!("chidlren: {:?}", children); let home = fs.get_inode_by_path(b"/home/user")?; let children = fs.get_inode_children(&home)?.collect::>(); log::info!("chidlren: {:?}", children); let hii = fs.get_inode_by_path(b"/home/user/hii.txt")?; let hii2 = fs.get_inode_by_relative_path(home, b"./hii.txt")?; let extents = fs.get_inode_extents(hii.id)?; assert_eq!(hii, hii2); for (_offset, extent) in extents { match extent { ExtentData::Inline { header, data } => { log::info!("{header:?}\n{}", String::from_utf8_lossy(&data)); } _ => {} } } let btrfs = fs.get_inode_by_path(b"/home/user/btrfs")?; let children = fs.get_inode_children_inodes(&btrfs)?.collect::>(); log::info!("chidlren: {:?}", children); for child in children { let file_contents = fs.read_inode_raw(&child, ..).expect("file contents"); log::info!("{}", String::from_utf8_lossy(&file_contents)); } let cmake_list = fs.get_inode_by_path(b"/home/user/btrfs/CMakeLists.txt")?; let file_contents = fs .read_inode_raw(&cmake_list, ..100) .expect("file contents"); log::info!("cmakelists file:"); log::info!("{}", String::from_utf8_lossy(&file_contents)); Ok(()) } #[test] fn iter_default_subvol() { let file = open_btrfs_file(); let vol = Volume::new(file).expect("volume"); let v2 = vol.into_volume2().expect("volume2"); let fs = v2.default_subvolume().expect("default subvol"); log::info!("files 1:"); let now = std::time::Instant::now(); for (_id, entry) in fs.fs_root.iter() { if let Some(dir) = entry.as_dir_index() { //log::info!("{}", dir.name_as_string_lossy()); } } log::info!("files 1: [took {}ms]", now.elapsed().as_millis()); log::info!("files 2:"); let now = std::time::Instant::now(); for (_id, entry) in fs.fs_root.iter() { if let Some(dir) = entry.as_dir_index() { //log::info!("{}", dir.name_as_string_lossy()); } } log::info!("files 2: [took {}ms]", now.elapsed().as_millis()); } }