From 3332e59453a0cdd903289e05580855485f9ea9fb Mon Sep 17 00:00:00 2001 From: Janis Date: Thu, 9 Jan 2025 18:12:47 +0100 Subject: [PATCH] i genuinely believe this is close to working --- crates/renderer/src/buffers.rs | 25 + crates/renderer/src/images.rs | 1 - crates/renderer/src/render_graph.rs | 782 ++++----------------------- crates/renderer/src/util.rs | 796 ++++++++++++++++++++++++++++ 4 files changed, 932 insertions(+), 672 deletions(-) diff --git a/crates/renderer/src/buffers.rs b/crates/renderer/src/buffers.rs index e74d1f2..7ea570b 100644 --- a/crates/renderer/src/buffers.rs +++ b/crates/renderer/src/buffers.rs @@ -26,6 +26,17 @@ pub struct BufferDesc { pub alloc_flags: vk_mem::AllocationCreateFlags, } +impl std::hash::Hash for BufferDesc { + fn hash(&self, state: &mut H) { + self.flags.hash(state); + self.size.hash(state); + self.usage.hash(state); + self.queue_families.hash(state); + self.mem_usage.hash(state); + self.alloc_flags.bits().hash(state); + } +} + impl std::fmt::Debug for BufferDesc { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.debug_struct("BufferDesc") @@ -49,6 +60,20 @@ impl std::fmt::Debug for BufferDesc { } } +impl Eq for BufferDesc {} +impl PartialEq for BufferDesc { + fn eq(&self, other: &Self) -> bool { + self.flags == other.flags + // for hashmaps, `Eq` may be more strict than `Hash` + && self.name == other.name + && self.size == other.size + && self.usage == other.usage + && self.queue_families == other.queue_families + && self.mem_usage == other.mem_usage + && self.alloc_flags.bits() == other.alloc_flags.bits() + } +} + impl Default for BufferDesc { fn default() -> Self { Self { diff --git a/crates/renderer/src/images.rs b/crates/renderer/src/images.rs index fa6da7d..23041fc 100644 --- a/crates/renderer/src/images.rs +++ b/crates/renderer/src/images.rs @@ -37,7 +37,6 @@ pub struct ImageDesc { impl std::hash::Hash for ImageDesc { fn hash(&self, state: &mut H) { self.flags.hash(state); - self.name.hash(state); self.format.hash(state); self.kind.hash(state); self.mip_levels.hash(state); diff --git a/crates/renderer/src/render_graph.rs b/crates/renderer/src/render_graph.rs index a5618dd..0984de7 100644 --- a/crates/renderer/src/render_graph.rs +++ b/crates/renderer/src/render_graph.rs @@ -22,7 +22,15 @@ use petgraph::{ visit::{EdgeRef, IntoNodeReferences, NodeRef}, }; -def_monotonic_id!(pub GraphResourceId); +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] +#[repr(transparent)] +pub struct GraphResourceId(pub(crate) u32); + +impl Display for GraphResourceId { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "#{}", self.0) + } +} #[derive(Debug, Clone)] pub enum GraphResourceDesc { @@ -30,13 +38,50 @@ pub enum GraphResourceDesc { Buffer(BufferDesc), } -#[derive(Debug, PartialEq, Eq)] +impl From for GraphResource { + fn from(value: GraphResourceDesc) -> Self { + match value { + GraphResourceDesc::Image(image_desc) => Self::ImageDesc(image_desc), + GraphResourceDesc::Buffer(buffer_desc) => Self::BufferDesc(buffer_desc), + } + } +} + +#[derive(Default, Debug, PartialEq, Eq)] pub enum GraphResource { Framebuffer(Arc), ImportedImage(Arc), ImportedBuffer(Arc), Image(Arc), Buffer(Buffer), + ImageDesc(ImageDesc), + BufferDesc(BufferDesc), + #[default] + Default, +} + +impl GraphResource { + fn simple_hash(&self) -> u64 { + use std::hash::{Hash, Hasher}; + let mut state = std::hash::DefaultHasher::new(); + let discr = core::mem::discriminant(self); + discr.hash(&mut state); + + match self { + GraphResource::Framebuffer(swapchain_frame) => { + (swapchain_frame.index, swapchain_frame.image.handle()).hash(&mut state) + } + GraphResource::ImportedImage(image) => image.handle().hash(&mut state), + GraphResource::ImportedBuffer(buffer) => buffer.handle().hash(&mut state), + GraphResource::Image(image) => image.handle().hash(&mut state), + GraphResource::Buffer(buffer) => buffer.handle().hash(&mut state), + GraphResource::ImageDesc(image_desc) => image_desc.hash(&mut state), + GraphResource::BufferDesc(buffer_desc) => buffer_desc.hash(&mut state), + GraphResource::Default => {} + } + + state.finish() + } } #[derive(Debug, Clone, Copy)] @@ -55,12 +100,12 @@ pub enum StoreOp { pub struct RenderContext<'a> { pub device: device::Device, pub cmd: commands::SingleUseCommand, - pub resources: &'a BTreeMap, + pub resources: &'a [GraphResource], } impl RenderContext<'_> { pub fn get_image(&self, id: GraphResourceId) -> Option<&Arc> { - self.resources.get(&id).and_then(|res| match res { + self.resources.get(id.0 as usize).and_then(|res| match res { GraphResource::ImportedImage(arc) => Some(arc), GraphResource::Image(image) => Some(image), GraphResource::Framebuffer(fb) => Some(&fb.image), @@ -68,7 +113,7 @@ impl RenderContext<'_> { }) } pub fn get_buffer(&self, id: GraphResourceId) -> Option<&Buffer> { - self.resources.get(&id).and_then(|res| match res { + self.resources.get(id.0 as usize).and_then(|res| match res { GraphResource::ImportedBuffer(arc) => Some(arc.as_ref()), GraphResource::Buffer(buffer) => Some(buffer), _ => None, @@ -102,13 +147,13 @@ pub struct AccessMask { pub mask: vk::AccessFlags2, } impl AccessMask { - fn undefined() -> Self { + pub fn empty() -> Self { Self { stage: vk::PipelineStageFlags2::NONE, mask: vk::AccessFlags2::empty(), } } - fn is_empty(&self) -> bool { + pub fn is_empty(&self) -> bool { self.stage.is_empty() && self.mask.is_empty() } } @@ -305,9 +350,8 @@ def_monotonic_id!(pub RenderGraphPassId); // to find resource_descs which are eq, but whose liveness doesn't overlap. #[derive(Debug)] pub struct RenderGraph { - resource_descs: BTreeMap, - resources: BTreeMap, - accesses: BTreeMap, + resources: Vec, + accesses: Vec, pass_descs: Vec, /// the rendergraph produces these resources. Any passes on which these /// outputs do not depend are pruned. @@ -317,18 +361,21 @@ pub struct RenderGraph { impl RenderGraph { pub fn new() -> Self { Self { - resource_descs: BTreeMap::new(), - resources: BTreeMap::new(), + resources: Vec::new(), pass_descs: Vec::new(), - accesses: BTreeMap::new(), + accesses: Vec::new(), outputs: Vec::new(), } } + fn get_next_resource_id(&mut self) -> GraphResourceId { + GraphResourceId(self.resources.len() as u32) + } + pub fn add_resource(&mut self, desc: GraphResourceDesc) -> GraphResourceId { - let id = GraphResourceId::new(); - self.resource_descs.insert(id, desc); - self.accesses.insert(id, Access::undefined()); + let id = self.get_next_resource_id(); + self.resources.push(desc.into()); + self.accesses.push(Access::undefined()); id } pub fn mark_as_output(&mut self, id: GraphResourceId) { @@ -336,16 +383,16 @@ impl RenderGraph { self.outputs.push(id); } pub fn import_resource(&mut self, res: GraphResource, access: Access) -> GraphResourceId { - if let Some((&id, _)) = self + if let Some(i) = self .resources .iter() - .find(|(_, resident)| &&res == resident) + .position(|other| res.simple_hash() == other.simple_hash()) { - id + GraphResourceId(i as u32) } else { - let id = GraphResourceId::new(); - self.resources.insert(id, res); - self.accesses.insert(id, access); + let id = self.get_next_resource_id(); + self.resources.push(res); + self.accesses.push(access); id } } @@ -358,10 +405,7 @@ impl RenderGraph { self.import_resource(res, access) } pub fn import_framebuffer(&mut self, frame: Arc) -> GraphResourceId { - let id = GraphResourceId::new(); - self.resources.insert(id, GraphResource::Framebuffer(frame)); - self.mark_as_output(id); - id + self.import_resource(GraphResource::Framebuffer(frame), Access::undefined()) } pub fn add_pass(&mut self, pass: PassDesc) { self.pass_descs.push(pass); @@ -374,670 +418,63 @@ impl RenderGraph { &mut self, device: device::Device, ) -> crate::Result>> { - // create internal resources: - for (&id, desc) in self.resource_descs.iter() { - tracing::trace!("creating resource {id:?} with {desc:?}"); - match desc.clone() { - GraphResourceDesc::Image(image_desc) => { - self.resources.insert( - id, - GraphResource::Image(Arc::new(Image::new(device.clone(), image_desc)?)), - ); - } - GraphResourceDesc::Buffer(buffer_desc) => { - self.resources.insert( - id, - GraphResource::Buffer(Buffer::new(device.clone(), buffer_desc)?), - ); - } - } - } let now = std::time::Instant::now(); - #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] - enum PassNode { - First, - Pass(usize), - Last, - } + let mut refmap = util::asdf::NodeRefsMap::new(self.resources.len(), self.pass_descs.len()); - impl PassNode { - fn into_node_idx(&self) -> u32 { - match self { - PassNode::First => 0, - PassNode::Last => 1, - PassNode::Pass(i) => 2 + *i as u32, - } - } - fn into_u32(&self, max_i: u32) -> u32 { - match self { - PassNode::First => 0, - PassNode::Last => max_i + 1, - PassNode::Pass(i) => 1 + *i as u32, - } - } - fn range_full(from: Self, to: Self, max_i: u32) -> std::ops::RangeInclusive { - from.into_u32(max_i)..=to.into_u32(max_i) - } - fn from_u32(v: u32, max_i: u32) -> Self { - match v { - 0 => Self::First, - n if n == 1 + max_i => Self::Last, - n => Self::Pass(n as usize - 1), - } - } - } + refmap.allocate_ref_ranges(&self.pass_descs); + refmap.ref_passes(&self.pass_descs); + refmap.ref_inputs(&self.resources); + refmap.ref_outputs(&self.outputs); + let dag = refmap.build_dag(); + let topo = refmap.toposort_dag(dag); - #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] - enum RefAccess { - __Min, - None, - Read(Access), - Write(Access), - __Max, - } - - #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] - struct GraphRef { - pass: PassNode, - resource: GraphResourceId, - access: RefAccess, - } - - // gather references to resources. - let (references, intervals) = util::timed("build reference and interval trees", || { - let mut references = BTreeSet::::new(); - - // interval for each resource from (first pass referencing, last pass referencing) - let mut intervals = BTreeMap::::new(); - - // the root node creates and transitions resources added to the - // graph. don't want to create any resources which are never used - // after `First`. a newly created resource has no layout and no - // writes to make-available. - for rid in self.resource_descs.keys() { - references.insert(GraphRef { - pass: PassNode::First, - resource: *rid, - access: RefAccess::Write(Access::undefined()), - }); - } - for rid in self.resources.keys() { - references.insert(GraphRef { - pass: PassNode::First, - resource: *rid, - access: RefAccess::Write( - self.accesses - .get(rid) - .cloned() - .unwrap_or(Access::undefined()), - ), - }); - - intervals.insert(*rid, (PassNode::First, PassNode::First)); - } - - for (i, pass) in self.pass_descs.iter().enumerate() { - let mut reads = BTreeMap::new(); - for (rid, access) in &pass.reads { - reads - .entry(*rid) - .and_modify(|entry| { - *entry = *entry | *access; - }) - .or_insert(*access); - - intervals - .entry(*rid) - .and_modify(|entry| { - entry.1 = PassNode::Pass(i); - }) - .or_insert((PassNode::Pass(i), PassNode::Pass(i))); - } - references.extend(reads.into_iter().map(|(resource, access)| GraphRef { - pass: PassNode::Pass(i), - resource, - access: RefAccess::Read(access), - })); - - let mut writes = BTreeMap::new(); - for (rid, access) in &pass.writes { - writes - .entry(*rid) - .and_modify(|entry| { - *entry = *entry | *access; - }) - .or_insert(*access); - - intervals - .entry(*rid) - .and_modify(|entry| { - entry.1 = PassNode::Pass(i); - }) - .or_insert((PassNode::Pass(i), PassNode::Pass(i))); - } - - references.extend(writes.into_iter().map(|(resource, access)| GraphRef { - pass: PassNode::Pass(i), - resource, - access: RefAccess::Write(access), - })); - } - - // any resource marked as output should be created and returned even - // if it isn't referenced by any pass. - for rid in &self.outputs { - references.insert(GraphRef { - pass: PassNode::Last, - resource: *rid, - access: RefAccess::None, - }); - - intervals - .entry(*rid) - .and_modify(|entry| { - entry.1 = PassNode::Last; - }) - .or_insert((PassNode::Last, PassNode::Last)); - } - - (references, intervals) - }); - - #[derive(Debug, Clone, Copy)] - enum Barrier { - Logical, - Execution { - src: vk::PipelineStageFlags2, - dst: vk::PipelineStageFlags2, - }, - LayoutTransition { - src: (vk::PipelineStageFlags2, vk::ImageLayout), - dst: (vk::PipelineStageFlags2, vk::ImageLayout), - }, - - MakeAvailable { - src: (vk::PipelineStageFlags2, vk::AccessFlags2), - dst: vk::PipelineStageFlags2, - }, - MakeVisible { - src: vk::PipelineStageFlags2, - dst: (vk::PipelineStageFlags2, vk::AccessFlags2), - }, - MemoryBarrier { - src: (vk::PipelineStageFlags2, vk::AccessFlags2), - dst: (vk::PipelineStageFlags2, vk::AccessFlags2), - }, - } - - impl Display for Barrier { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - Barrier::Logical => write!(f, "Logical"), - Barrier::Execution { .. } => write!(f, "Execution"), - Barrier::LayoutTransition { .. } => write!(f, "Layout"), - Barrier::MakeAvailable { .. } => write!(f, "MakeAvailable"), - Barrier::MakeVisible { .. } => write!(f, "MakeVisible"), - Barrier::MemoryBarrier { .. } => write!(f, "MemoryBarrier"), - } - } - } - - let pass_count = self.pass_descs.len() as u32; - - // build graph from references. - let mut dag = util::timed("construct dag", || { - let mut edges = Vec::new(); - - intervals.iter().for_each(|(&rid, &(from, to))| { - #[derive(Clone, Copy, Debug)] - enum PreviousRef { - Write(PassNode, Access), - Read(PassNode, Access), - } - impl PreviousRef { - fn node(&self) -> PassNode { - match self { - PreviousRef::Write(pass_node, _) => *pass_node, - PreviousRef::Read(pass_node, _) => *pass_node, - } - } - fn access(&self) -> Access { - match self { - PreviousRef::Write(_, access) => *access, - PreviousRef::Read(_, access) => *access, - } - } - } - // writes not yet made available - let mut to_make_available = AccessMask::undefined(); - // writes already made-visible - let mut made_visible = AccessMask::undefined(); - let mut last_read = Option::::None; - let mut last_write = Option::::None; - let mut last_ref = Option::::None; - - // pass, read, write - let mut current_pass = (from, Access::empty(), Access::empty()); - for pass in PassNode::range_full(from, to, pass_count) - .map(|i| PassNode::from_u32(i, pass_count)) - { - let mut range = references.range( - GraphRef { - pass, - resource: rid, - access: RefAccess::__Min, - }..GraphRef { - pass, - resource: rid, - access: RefAccess::__Max, - }, - ); - - while let Some(a) = range.next() { - if a.pass != current_pass.0 { - if current_pass.1 != Access::empty() { - last_read = Some(PreviousRef::Read(current_pass.0, current_pass.1)); - last_ref = Some(PreviousRef::Read(current_pass.0, current_pass.1)); - } - if current_pass.2 != Access::empty() { - last_write = - Some(PreviousRef::Write(current_pass.0, current_pass.2)); - last_ref = Some(PreviousRef::Write(current_pass.0, current_pass.2)); - } - - current_pass = (a.pass, Access::empty(), Access::empty()); - } - - // TODO: VkEvents can make this more - // fine-grained but also probably have zero - // real-world benefit :< - - match a.access { - RefAccess::None => { - // make-available previous writes - // no-op edge between previous reference and a.pass - - if let Some(last_ref) = last_ref.as_ref() { - edges - .push(((last_ref.node(), a.pass), (rid, Barrier::Logical))); - } - // because this is the last node, setting last_ref isn't required. - } - RefAccess::Read(access) => { - // - if read: no writes pending, check for - // layout transition, otherwise an edge to - // previous write. make sure it is only executed - // once. - // - if write: make-available writes + make-visible for reads - let make_visible_mask = access.into_access_mask() & !made_visible; - made_visible = made_visible | make_visible_mask; - - match last_ref { - None => {} - Some(PreviousRef::Read(pass_node, before)) => { - if let Some(last_write) = last_write.as_ref() { - if !make_visible_mask.is_empty() { - // make-visible reads. - edges.push(( - (last_write.node(), a.pass), - ( - rid, - Barrier::MakeVisible { - src: last_write.access().stage, - dst: ( - make_visible_mask.stage, - make_visible_mask.mask, - ), - }, - ), - )); - } else { - // still require a after b - edges.push(( - (last_write.node(), a.pass), - ( - rid, - Barrier::Execution { - src: last_write.access().stage, - dst: access.stage, - }, - ), - )); - } - } - - if before.layout != access.layout { - edges.push(( - (pass_node, a.pass), - ( - rid, - Barrier::LayoutTransition { - src: (before.stage, before.layout.unwrap()), - dst: (access.stage, access.layout.unwrap()), - }, - ), - )); - } - } - Some(PreviousRef::Write(write, before)) => { - // make writes visible - if !make_visible_mask.is_empty() { - edges.push(( - (write, a.pass), - ( - rid, - Barrier::MakeVisible { - src: before.stage, - dst: ( - make_visible_mask.stage, - make_visible_mask.mask, - ), - }, - ), - )); - } - // make all writes available - if !to_make_available.is_empty() { - edges.push(( - (write, a.pass), - ( - rid, - Barrier::MakeAvailable { - src: ( - to_make_available.stage, - to_make_available.mask, - ), - dst: access.stage, - }, - ), - )); - // mark that we've made all pending writes available - to_make_available = AccessMask::undefined(); - } - - if make_visible_mask.is_empty() - && to_make_available.is_empty() - { - // still require a after b - edges.push(( - (write, a.pass), - ( - rid, - Barrier::Execution { - src: before.stage, - dst: access.stage, - }, - ), - )); - } - } - } - current_pass.1 = current_pass.1 | access; - } - RefAccess::Write(access) => { - // - if read: execution barrier against write-after-read - // - if write: check for layout transition, otherwise a no-op edge. - to_make_available = to_make_available | access.into_access_mask(); - - match last_ref { - None => {} - Some(PreviousRef::Read(pass_node, before)) => { - // execution barrier to ward against write-after-read - edges.push(( - (pass_node, a.pass), - ( - rid, - Barrier::Execution { - src: before.stage, - dst: access.stage, - }, - ), - )); - } - Some(PreviousRef::Write(pass_node, before)) => { - if before.layout != access.layout { - // as far as I understand the spec, - // this already makes-available - edges.push(( - (pass_node, a.pass), - ( - rid, - Barrier::LayoutTransition { - src: (before.stage, before.layout.unwrap()), - dst: (access.stage, access.layout.unwrap()), - }, - ), - )); - } - // write_no_sync: pass tells us that - // writes do not interleave. - if let Some(last_read) = last_read.as_ref() { - edges.push(( - (last_read.node(), a.pass), - (rid, Barrier::Logical), - )); - } - } - } - - current_pass.2 = current_pass.2 | access; - } - _ => unreachable!(), - }; - } - } - }); - - let mut dag = petgraph::stable_graph::StableDiGraph::new(); - dag.add_node(PassNode::First); - dag.add_node(PassNode::Last); - for i in 0..self.pass_descs.len() { - dag.add_node(PassNode::Pass(i)); - } - - for ((from, to), weight) in edges { - dag.add_edge( - from.into_node_idx().into(), - to.into_node_idx().into(), - weight, - ); - } - - loop { - let sinks = dag - .externals(petgraph::Direction::Outgoing) - .filter(|idx| dag.node_weight(*idx) != Some(&PassNode::Last)) - .collect::>(); - if sinks.is_empty() { - break; - } - for sink in sinks { - dag.remove_node(sink); - } - } - - // #[cfg(any(debug_assertions, test))] - // std::fs::write( - // "render_graph2.dot", - // &format!( - // "{:?}", - // petgraph::dot::Dot::with_attr_getters( - // &dag, - // &[], - // &|_graph, edgeref| { - // format!( - // "label = \"{},{:#?}\"", - // edgeref.weight().0.as_u32(), - // edgeref.weight().1, - // ) - // }, - // &|_graph, noderef| { format!("label = \"Pass({:?})\"", noderef.weight()) } - // ) - // ), - // ) - // .expect("writing render_graph repr"); - - dag - }); - - // TODO: rewrite finding edges properly. - // finding out if this graph is cyclical is actually non-trivial - // some pass might require both a read of a resource 1, and a read of a resource 2, where 2 is the product of another pass writing to resource 1. - // this could be resolved by copying resource 1 before the write pass. - // tl;dr: write-after-read makes this all more complicated - - let mut topological_map = Vec::new(); - - // create topological map of DAG from sink to source - loop { - let (sinks, passes): (Vec<_>, Vec<_>) = dag - .externals(petgraph::Direction::Outgoing) - //.filter(|&id| id != root) - .filter_map(|id| dag.node_weight(id).cloned().map(|idx| (id, idx))) - .unzip(); - - if sinks.is_empty() { - break; - } - - let mut barriers = BTreeMap::new(); - - for &sink in &sinks { - dag.edges_directed(sink, petgraph::Direction::Incoming) - .for_each(|edge| { - let (rid, barrier) = edge.weight(); - - let before_and_after = match *barrier { - Barrier::Logical => None, - Barrier::Execution { src, dst } => Some(( - Access { - stage: src, - ..Access::empty() - }, - Access { - stage: dst, - ..Access::empty() - }, - )), - Barrier::LayoutTransition { - src: (src, from), - dst: (dst, to), - } => Some(( - Access { - stage: src, - layout: Some(from), - ..Access::empty() - }, - Access { - stage: dst, - layout: Some(to), - ..Access::empty() - }, - )), - Barrier::MakeAvailable { - src: (stage, mask), - dst, - } => Some(( - Access { - stage, - mask, - ..Access::empty() - }, - Access { - stage: dst, - ..Access::empty() - }, - )), - Barrier::MakeVisible { - src, - dst: (stage, mask), - } => Some(( - Access { - stage: src, - ..Access::empty() - }, - Access { - stage, - mask, - ..Access::empty() - }, - )), - Barrier::MemoryBarrier { - src: (src_stage, src_mask), - dst: (dst_stage, dst_mask), - } => Some(( - Access { - stage: src_stage, - mask: src_mask, - ..Access::empty() - }, - Access { - stage: dst_stage, - mask: dst_mask, - ..Access::empty() - }, - )), - }; - - if let Some((before, after)) = before_and_after { - // initial access is transitioned at the beginning - // this affects imported resources only. - barriers - .entry(*rid) - .and_modify(|(from, to)| { - *from = *from | before; - *to = *to | after; - }) - .or_insert((before, after)); - } - }); - dag.remove_node(sink); - } - - let passes = passes - .into_iter() - .filter_map(|pass| { - if let PassNode::Pass(i) = pass { - Some(i) - } else { - None - } - }) - .map(|i| core::mem::take(&mut self.pass_descs[i])) - .collect::>(); - - topological_map.push((passes, barriers)); - } - - //tracing::debug!("mapping: {topological_map:#?}"); - // I don't think this can currently happen with the way passes are added. - dag.remove_node(0.into()); - if dag.node_count() > 0 { - eprintln!("dag: {dag:?}"); - panic!("dag is cyclic!"); - } - - tracing::debug!( - "resolving render graph: {}ms", + eprintln!( + "resolved render graph in {}ms", now.elapsed().as_micros() as f32 / 1e3 ); + // create internal resources: + for (i, res) in self.resources.iter_mut().enumerate() { + match res { + GraphResource::ImageDesc(image_desc) => { + tracing::trace!("creating resource #{i:?} with {image_desc:?}"); + *res = GraphResource::Image(Arc::new(Image::new( + device.clone(), + image_desc.clone(), + )?)); + } + GraphResource::BufferDesc(buffer_desc) => { + tracing::trace!("creating resource #{i:?} with {buffer_desc:?}"); + *res = GraphResource::Buffer(Buffer::new(device.clone(), buffer_desc.clone())?); + } + _ => {} + } + } + let now = std::time::Instant::now(); let pool = commands::SingleUseCommandPool::new(device.clone(), device.graphics_queue().clone())?; let resources = &self.resources; - let cmds = topological_map + let cmds = topo .into_iter() .rev() + .map(|(passes, accesses)| { + let passes = passes + .into_iter() + .filter_map(|i| i.unpack().get_pass_idx()) + .map(|i| core::mem::take(&mut self.pass_descs[i])) + .collect::>(); + (passes, accesses) + }) .map({ |(passes, accesses)| { let cmd = pool.alloc()?; // transitions for (&id, &(from, to)) in accesses.iter() { Self::transition_resource( - resources.get(&id).unwrap(), + &resources[id.0 as usize], device.dev(), unsafe { &cmd.buffer() }, from, @@ -1075,7 +512,7 @@ impl RenderGraph { let outputs = self .outputs .iter() - .filter_map(|id| self.resources.remove(id).map(|res| (*id, res))) + .map(|id| (*id, core::mem::take(&mut self.resources[id.0 as usize]))) .collect::>(); outputs @@ -1104,6 +541,9 @@ impl RenderGraph { GraphResource::Buffer(buffer) => { buffer_barrier(buffer.handle(), 0, buffer.len(), from, to, None).into() } + _ => { + unreachable!() + } }; unsafe { diff --git a/crates/renderer/src/util.rs b/crates/renderer/src/util.rs index 316b657..1be9b69 100644 --- a/crates/renderer/src/util.rs +++ b/crates/renderer/src/util.rs @@ -414,3 +414,799 @@ impl<'a, T: 'a> DerefMut for WithLifetime<'a, T> { &mut self.0 } } + +bitflags::bitflags! { + pub struct PipelineAccess: u32 { + const TRANSFER = 1 << 0; + const VERTEX_ATTRIBUTE_INPUT = 1 << 1; + const DRAW_INDIRECT = 1 << 2; + const VERTEX_INPUT = 1 << 3; + } +} + +pub mod asdf { + use std::collections::{BTreeMap, BTreeSet}; + use std::fmt::Display; + + use ash::vk; + + use crate::render_graph::*; + + #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] + #[repr(u8)] + pub enum PassNode { + First, + Pass(u16), + Last, + } + + #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] + #[repr(transparent)] + pub struct PackedPassNode(u16); + impl PackedPassNode { + pub fn unpack(self) -> PassNode { + match self.0 { + 0 => PassNode::First, + 1 => PassNode::Last, + n => PassNode::Pass(n - 2), + } + } + pub fn pack(i: PassNode) -> Self { + Self(match i { + PassNode::First => 0, + PassNode::Last => 1, + PassNode::Pass(n) => n + 2, + }) + } + } + + impl From for PackedPassNode { + fn from(value: PassNode) -> Self { + Self::pack(value) + } + } + impl From for PassNode { + fn from(value: PackedPassNode) -> Self { + PackedPassNode::unpack(value) + } + } + + impl PassNode { + pub fn dag_index(&self) -> u32 { + match self { + PassNode::First => 0, + PassNode::Last => 1, + PassNode::Pass(i) => 2 + *i as u32, + } + } + pub fn into_u32(&self, max_i: u32) -> u32 { + match self { + PassNode::First => 0, + PassNode::Last => max_i + 1, + PassNode::Pass(i) => 1 + *i as u32, + } + } + fn range_full(from: Self, to: Self, max_i: u32) -> std::ops::RangeInclusive { + from.into_u32(max_i)..=to.into_u32(max_i) + } + pub fn pass(i: usize) -> Self { + Self::Pass(i as u16) + } + pub fn get_pass_idx(&self) -> Option { + match self { + PassNode::First | PassNode::Last => None, + PassNode::Pass(i) => Some(*i as usize), + } + } + pub fn from_u32(v: u32, max_i: u32) -> Self { + match v { + 0 => Self::First, + n if n == 1 + max_i => Self::Last, + n => Self::Pass(n as u16 - 1), + } + } + } + + #[derive(Debug, Clone, Copy)] + pub enum Barrier { + Logical, + Execution { + src: vk::PipelineStageFlags2, + dst: vk::PipelineStageFlags2, + }, + LayoutTransition { + src: (vk::PipelineStageFlags2, vk::ImageLayout), + dst: (vk::PipelineStageFlags2, vk::ImageLayout), + }, + + MakeAvailable { + src: (vk::PipelineStageFlags2, vk::AccessFlags2), + dst: vk::PipelineStageFlags2, + }, + MakeVisible { + src: vk::PipelineStageFlags2, + dst: (vk::PipelineStageFlags2, vk::AccessFlags2), + }, + MemoryBarrier { + src: (vk::PipelineStageFlags2, vk::AccessFlags2), + dst: (vk::PipelineStageFlags2, vk::AccessFlags2), + }, + } + + impl Display for Barrier { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Barrier::Logical => write!(f, "Logical"), + Barrier::Execution { .. } => write!(f, "Execution"), + Barrier::LayoutTransition { .. } => write!(f, "Layout"), + Barrier::MakeAvailable { .. } => write!(f, "MakeAvailable"), + Barrier::MakeVisible { .. } => write!(f, "MakeVisible"), + Barrier::MemoryBarrier { .. } => write!(f, "MemoryBarrier"), + } + } + } + + pub struct NodeRefsMap { + num_resources: usize, + num_passes: usize, + // bitmap of passes referencing rid + references: Vec, + + // range into ref_accesses*: start, end, index + ref_ranges: Vec<(u32, u32, u32)>, + ref_accesses: Vec<(Access, Access)>, + ref_access_passid: Vec, + } + + impl NodeRefsMap { + pub fn new(num_resources: usize, num_passes: usize) -> Self { + Self { + num_resources, + num_passes, + references: vec![0; ((num_passes + 2) * num_resources).div_ceil(64) as usize], + ref_ranges: Vec::new(), + ref_accesses: Vec::new(), + ref_access_passid: Vec::new(), + } + } + + pub fn allocate_ref_ranges(&mut self, passes: &[PassDesc]) { + let mut rid_passcount = vec![0; self.num_resources]; + + for pass in passes.iter() { + for rid in pass + .reads + .iter() + .chain(pass.writes.iter()) + .map(|id| id.0) + .collect::>() + { + rid_passcount[rid.0 as usize] += 1; + } + } + + tracing::debug!("per-resource pass-count: {rid_passcount:?}"); + + let mut total = 0; + for num_passes in rid_passcount { + self.ref_ranges.push((total, total + num_passes, 0)); + self.ref_accesses + .extend((0..num_passes).map(|_| (Access::empty(), Access::empty()))); + self.ref_access_passid + .extend((0..num_passes).map(|_| PackedPassNode(0))); + total += num_passes; + } + + tracing::debug!( + "ref_ranges and ref_accesses:\n{:?}\n{:?}\n{:?}", + self.ref_ranges, + self.ref_accesses, + self.ref_access_passid + ); + // for resourcedesc in resources: ref first pass + } + + fn get_accesses_for_rid_pass_mut( + &mut self, + rid: GraphResourceId, + pass: PackedPassNode, + ) -> &mut (Access, Access) { + let (start, _, i) = self.ref_ranges[rid.0 as usize]; + + let idx = self.ref_access_passid[start as usize..(start + i) as usize] + .binary_search(&pass) + .unwrap_or_else(|_| { + // increase counter + self.ref_ranges[rid.0 as usize].2 += 1; + i as usize + }) + + start as usize; + + self.ref_access_passid[idx] = pass; + + &mut self.ref_accesses[idx] + } + + fn get_reads_for_rid_pass_mut( + &mut self, + rid: GraphResourceId, + pass: PackedPassNode, + ) -> &mut Access { + &mut self.get_accesses_for_rid_pass_mut(rid, pass).0 + } + fn get_writes_for_rid_pass_mut( + &mut self, + rid: GraphResourceId, + pass: PackedPassNode, + ) -> &mut Access { + &mut self.get_accesses_for_rid_pass_mut(rid, pass).1 + } + + fn get_accesses_for_rid_pass( + &self, + rid: GraphResourceId, + pass: PackedPassNode, + ) -> Option<(Access, Access)> { + let (start, _, i) = self.ref_ranges[rid.0 as usize]; + + let idx = self.ref_access_passid[start as usize..(start + i) as usize] + .binary_search(&pass) + .ok()? + + start as usize; + + Some(self.ref_accesses[idx]) + } + + fn get_reads_for_rid_pass( + &self, + rid: GraphResourceId, + pass: PackedPassNode, + ) -> Option { + Some(self.get_accesses_for_rid_pass(rid, pass)?.0) + } + fn get_writes_for_rid_pass( + &self, + rid: GraphResourceId, + pass: PackedPassNode, + ) -> Option { + Some(self.get_accesses_for_rid_pass(rid, pass)?.1) + } + + fn reference_rid_pass(&mut self, rid: GraphResourceId, pass: PackedPassNode) { + let bit_idx = rid.0 as usize * (self.num_passes + 2) + pass.0 as usize; + let word_idx = bit_idx / 64; + let word_offset = bit_idx % 64; + tracing::debug!( + bit_idx, + word_idx, + word_offset, + "pass: {pass:?} references rid: {rid:?} " + ); + self.references[word_idx] |= 1 << word_offset; + } + + pub fn ref_passes(&mut self, passes: &[PassDesc]) { + for (i, pass) in passes.iter().enumerate() { + let packed_pass = PassNode::pass(i).into(); + + for &(rid, access) in &pass.reads { + let read = self.get_reads_for_rid_pass_mut(rid, packed_pass); + *read = *read | access; + + // TODO: check for first pass as well + self.reference_rid_pass(rid, PassNode::pass(i).into()); + } + + for &(rid, access) in &pass.writes { + let write = self.get_writes_for_rid_pass_mut(rid, packed_pass); + *write = *write | access; + + // TODO: check for first pass as well + self.reference_rid_pass(rid, PassNode::pass(i).into()); + } + } + } + + pub fn ref_inputs(&mut self, resources: &[GraphResource]) { + for (i, resource) in resources.iter().enumerate() { + match resource { + GraphResource::ImageDesc(_) | GraphResource::BufferDesc(_) => { + self.reference_rid_pass(GraphResourceId(i as u32), PassNode::First.into()); + } + _ => {} + } + } + } + + pub fn ref_outputs(&mut self, outputs: &[GraphResourceId]) { + for &rid in outputs { + self.reference_rid_pass(rid, PassNode::Last.into()); + } + } + + pub fn build_dag( + &self, + ) -> petgraph::stable_graph::StableDiGraph + { + struct Edge { + from: PackedPassNode, + to: PackedPassNode, + rid: GraphResourceId, + barrier: Barrier, + } + + #[derive(Debug, Clone, Copy)] + enum Ref { + Write(PackedPassNode, Access), + Read(PackedPassNode, Access), + } + + impl Ref { + fn node(&self) -> PackedPassNode { + match self { + Ref::Write(node, _) | Ref::Read(node, _) => *node, + } + } + fn access(&self) -> Access { + match self { + Ref::Write(_, access) | Ref::Read(_, access) => *access, + } + } + } + + let mut edges = Vec::::new(); + + let bits = crate::util::BitIter::new( + &self.references, + self.num_resources * (self.num_passes + 2), + ) + .chunks(self.num_passes + 2); + + tracing::debug!("building edges:"); + tracing::debug!("chunks: {bits:#?}"); + for (i, bits) in bits.enumerate() { + let rid = GraphResourceId(i as u32); + tracing::debug!("rid: {rid:?}"); + tracing::debug!("passes: {bits}"); + + let mut to_make_available = AccessMask::empty(); + let mut made_available = AccessMask::empty(); + + let mut last_ref = Option::::None; + let mut last_read = Option::::None; + let mut last_write = Option::::None; + + for pass in bits { + let packed_pass = PackedPassNode(pass as u16); + tracing::debug!("pass: {:?}", packed_pass.unpack()); + + let read = self.get_reads_for_rid_pass(rid, packed_pass); + if let Some(read) = read { + tracing::debug!("read: {:?}", read); + let make_visible = read.into_access_mask() & !made_available; + if let Some(last_write) = last_write.as_ref() { + let from = last_write.node(); + let to = packed_pass; + let from_write = last_write.access(); + + // if last_write is some, make visible the writes + if !make_visible.is_empty() { + made_available = made_available | make_visible; + + edges.push(Edge { + from, + to, + rid, + barrier: Barrier::MakeVisible { + src: from_write.stage, + dst: (make_visible.stage, make_visible.mask), + }, + }); + } + + // make available any changes + if !to_make_available.is_empty() { + edges.push(Edge { + from, + to, + rid, + barrier: Barrier::MakeAvailable { + src: (to_make_available.stage, to_make_available.mask), + dst: read.stage, + }, + }); + to_make_available = AccessMask::empty(); + } + + if make_visible.is_empty() && !to_make_available.is_empty() { + // still require a-after-b + edges.push(Edge { + from, + to, + rid, + barrier: Barrier::Execution { + src: from_write.stage, + dst: read.stage, + }, + }); + } + } + + // layout transition from previous pass, either read or write + if let Some(last_ref) = last_ref.as_ref() { + if last_ref.access().layout != read.layout { + let from = last_ref.node(); + let to = packed_pass; + edges.push(Edge { + from, + to, + rid, + barrier: Barrier::LayoutTransition { + src: ( + last_ref.access().stage, + last_ref.access().layout.unwrap(), + ), + dst: (read.stage, read.layout.unwrap()), + }, + }); + } + } + } + + let write = self.get_writes_for_rid_pass(rid, packed_pass); + if let Some(write) = write { + tracing::debug!("write: {:?}", write); + match last_ref.as_ref() { + Some(Ref::Read(node, before)) => { + // execution barrier to ward against write-after-read + + edges.push(Edge { + from: *node, + to: packed_pass, + rid, + barrier: Barrier::Execution { + src: before.stage, + dst: write.stage, + }, + }); + } + Some(Ref::Write(node, before)) => { + // check for layout transition here + if before.layout != write.layout { + edges.push(Edge { + from: *node, + to: packed_pass, + rid, + barrier: Barrier::LayoutTransition { + src: (before.stage, before.layout.unwrap()), + dst: (write.stage, write.layout.unwrap()), + }, + }); + } + } + _ => {} + } + } + + if let Some(read) = read { + last_read = Some(Ref::Read(packed_pass, read)); + last_ref = last_read; + } + if let Some(write) = write { + last_write = Some(Ref::Write(packed_pass, write)); + last_ref = last_write; + } + } + } + + let mut dag = petgraph::stable_graph::StableDiGraph::< + PackedPassNode, + (GraphResourceId, Barrier), + >::new(); + + let root = dag.add_node(PassNode::First.into()); + let output = dag.add_node(PassNode::Last.into()); + + _ = (root, output); + + for i in 0..self.num_passes { + dag.add_node(PassNode::pass(i).into()); + } + + // insert edges + for edge in edges { + let Edge { + from, + to, + rid, + barrier, + } = edge; + dag.add_edge( + from.unpack().dag_index().into(), + to.unpack().dag_index().into(), + (rid, barrier), + ); + } + + #[cfg(any(debug_assertions, test))] + std::fs::write( + "render_graph2.dot", + &format!( + "{:?}", + petgraph::dot::Dot::with_attr_getters( + &dag, + &[], + &|_graph, edgeref| { + format!( + "label = \"{},{:#?}\"", + edgeref.weight().0, + edgeref.weight().1, + ) + }, + &|_graph, noderef| { + format!( + "label = \"Pass({:?})\"", + petgraph::visit::NodeRef::weight(&noderef) + ) + } + ) + ), + ) + .expect("writing render_graph repr"); + + // prune dead ends + let mut sinks = dag + .externals(petgraph::Direction::Outgoing) + .filter(|idx| dag.node_weight(*idx) != Some(&PassNode::Last.into())) + .collect::>(); + + while let Some(sink) = sinks.pop() { + let mut neighbors = dag + .neighbors_directed(sink, petgraph::Direction::Incoming) + .detach(); + while let Some((edge, node)) = neighbors.next(&dag) { + dag.remove_edge(edge); + + if dag + .neighbors_directed(node, petgraph::Direction::Outgoing) + .count() + == 0 + { + sinks.push(node); + } + } + dag.remove_node(sink); + } + + dag + } + + pub fn toposort_dag( + &self, + mut dag: petgraph::stable_graph::StableDiGraph< + PackedPassNode, + (GraphResourceId, Barrier), + >, + ) -> Vec<( + Vec, + BTreeMap, + )> { + let mut topomap = Vec::new(); + + let mut sinks = dag + .externals(petgraph::Direction::Incoming) + .collect::>(); + let mut next_sinks = vec![]; + + loop { + if sinks.is_empty() { + break; + } + + let mut passes = Vec::with_capacity(self.num_passes); + let mut barriers = BTreeMap::new(); + for &sink in &sinks { + for &(rid, barrier) in dag + .edges_directed(sink, petgraph::Direction::Incoming) + .map(|edge| edge.weight()) + { + let before_and_after = match barrier { + Barrier::Logical => None, + Barrier::Execution { src, dst } => Some(( + Access { + stage: src, + ..Access::empty() + }, + Access { + stage: dst, + ..Access::empty() + }, + )), + Barrier::LayoutTransition { + src: (src, from), + dst: (dst, to), + } => Some(( + Access { + stage: src, + layout: Some(from), + ..Access::empty() + }, + Access { + stage: dst, + layout: Some(to), + ..Access::empty() + }, + )), + Barrier::MakeAvailable { + src: (stage, mask), + dst, + } => Some(( + Access { + stage, + mask, + ..Access::empty() + }, + Access { + stage: dst, + ..Access::empty() + }, + )), + Barrier::MakeVisible { + src, + dst: (stage, mask), + } => Some(( + Access { + stage: src, + ..Access::empty() + }, + Access { + stage, + mask, + ..Access::empty() + }, + )), + Barrier::MemoryBarrier { + src: (src_stage, src_mask), + dst: (dst_stage, dst_mask), + } => Some(( + Access { + stage: src_stage, + mask: src_mask, + ..Access::empty() + }, + Access { + stage: dst_stage, + mask: dst_mask, + ..Access::empty() + }, + )), + }; + + if let Some((before, after)) = before_and_after { + // initial access is transitioned at the beginning + // this affects imported resources only. + barriers + .entry(rid) + .and_modify(|(from, to)| { + *from = *from | before; + *to = *to | after; + }) + .or_insert((before, after)); + } + } + + let mut neighbors = dag + .neighbors_directed(sink, petgraph::Direction::Incoming) + .detach(); + while let Some((edge, node)) = neighbors.next(&dag) { + dag.remove_edge(edge); + + if dag + .neighbors_directed(node, petgraph::Direction::Outgoing) + .count() + == 0 + { + next_sinks.push(node); + } + } + + passes.push(*dag.node_weight(sink).unwrap()); + dag.remove_node(sink); + } + + topomap.push((passes, barriers)); + core::mem::swap(&mut sinks, &mut next_sinks); + } + + topomap + } + } +} + +#[derive(Debug, Clone)] +struct BitIter<'a> { + bits: &'a [u64], + num_bits: usize, + bit_offset: usize, + bit_index: usize, +} + +impl<'a> std::fmt::Display for BitIter<'a> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("BitIter") + .field_with("bits", |f| { + write!(f, "[")?; + for bit in self.clone() { + write!(f, "{bit}, ")?; + } + write!(f, " ]") + }) + .finish() + } +} + +impl<'a> BitIter<'a> { + fn new(bits: &'a [u64], num_bits: usize) -> Self { + Self { + bits, + num_bits, + bit_index: 0, + bit_offset: 0, + } + } + + fn chunks(self, chunk_size: usize) -> ChunkedBitIter<'a> { + ChunkedBitIter { + inner: self, + chunk_size, + pos: 0, + } + } +} +impl Iterator for BitIter<'_> { + type Item = usize; + + fn next(&mut self) -> Option { + loop { + if self.bit_index >= self.num_bits { + return None; + } + + let bit_index = self.bit_index + self.bit_offset; + let byte_idx = bit_index / 64; + let byte_offset = bit_index % 64; + self.bit_index += 1; + + if (self.bits[byte_idx] >> byte_offset) & 1 == 1 { + return Some(self.bit_index - 1); + } + } + } +} + +#[derive(Debug)] +struct ChunkedBitIter<'a> { + inner: BitIter<'a>, + chunk_size: usize, + pos: usize, +} + +impl<'a> Iterator for ChunkedBitIter<'a> { + type Item = BitIter<'a>; + fn next(&mut self) -> Option { + if self.pos >= self.inner.num_bits { + return None; + } + let bits = (self.inner.num_bits - self.pos).min(self.chunk_size); + + let iter = BitIter { + bits: &self.inner.bits[self.pos / 64..], + bit_offset: self.pos % 64, + bit_index: 0, + num_bits: bits, + }; + self.pos += bits; + + Some(iter) + } +}