fix singleuse command buffer, waiting on swapchain drop

This commit is contained in:
Janis 2024-12-21 04:15:18 +01:00
parent 93e64e872f
commit 9d132066d3
3 changed files with 201 additions and 95 deletions
crates/renderer/src

View file

@ -8,6 +8,7 @@ use ash::{prelude::*, vk};
pub struct SingleUseCommandPool {
device: Device,
pool: vk::CommandPool,
queue: Queue,
}
impl Drop for SingleUseCommandPool {
@ -19,15 +20,27 @@ impl Drop for SingleUseCommandPool {
}
impl SingleUseCommandPool {
pub fn new(device: Device, family_index: u32) -> VkResult<Self> {
pub fn new(device: Device, queue: Queue) -> VkResult<Arc<Self>> {
let pool_info = vk::CommandPoolCreateInfo::default()
.queue_family_index(family_index)
.queue_family_index(queue.family())
.flags(vk::CommandPoolCreateFlags::TRANSIENT);
let pool =
unsafe { device.dev().create_command_pool(&pool_info, None)? };
Ok(Self { device, pool })
Ok(Arc::new(Self {
device,
pool,
queue,
}))
}
pub fn alloc(self: &Arc<Self>) -> VkResult<SingleUseCommand> {
SingleUseCommand::new(self.device.clone(), self.clone())
}
pub fn queue(&self) -> &Queue {
&self.queue
}
pub fn pool(&self) -> vk::CommandPool {
@ -37,7 +50,7 @@ impl SingleUseCommandPool {
pub struct SingleUseCommand {
device: Device,
pool: vk::CommandPool,
pool: Arc<SingleUseCommandPool>,
buffer: vk::CommandBuffer,
}
@ -46,23 +59,28 @@ impl Drop for SingleUseCommand {
unsafe {
self.device
.dev()
.free_command_buffers(self.pool, &[self.buffer])
.free_command_buffers(self.pool.pool(), &[self.buffer])
};
}
}
impl SingleUseCommand {
pub fn new(device: Device, pool: vk::CommandPool) -> VkResult<Self> {
pub fn new(
device: Device,
pool: Arc<SingleUseCommandPool>,
) -> VkResult<Self> {
let buffer = unsafe {
let alloc_info = vk::CommandBufferAllocateInfo::default()
.command_buffer_count(1)
.command_pool(pool)
.command_pool(pool.pool())
.level(vk::CommandBufferLevel::PRIMARY);
let buffer = device.dev().allocate_command_buffers(&alloc_info)?[0];
let begin_info = vk::CommandBufferBeginInfo::default()
.flags(vk::CommandBufferUsageFlags::ONE_TIME_SUBMIT);
device.dev().begin_command_buffer(buffer, &begin_info)?;
device.dev().begin_command_buffer(
buffer,
&vk::CommandBufferBeginInfo::default()
.flags(vk::CommandBufferUsageFlags::ONE_TIME_SUBMIT),
)?;
buffer
};
@ -73,13 +91,12 @@ impl SingleUseCommand {
})
}
pub fn command_buffer(&self) -> vk::CommandBuffer {
pub fn buffer(&self) -> vk::CommandBuffer {
self.buffer
}
pub fn submit_fence(
&self,
queue: Queue,
wait: Option<(vk::Semaphore, vk::PipelineStageFlags)>,
signal: Option<vk::Semaphore>,
fence: Option<vk::Fence>,
@ -105,35 +122,37 @@ impl SingleUseCommand {
}
let fence = fence.unwrap_or(vk::Fence::null());
queue.with_locked(|queue| unsafe {
self.pool.queue().with_locked(|queue| unsafe {
self.device.dev().queue_submit(queue, &[submit_info], fence)
})?;
tracing::info!("submitted queue {:?} and fence {:?}", queue, fence);
tracing::info!(
"submitted queue {:?} and fence {:?}",
self.pool.queue(),
fence
);
Ok(())
}
pub fn submit_async<'a>(
&'a self,
queue: Queue,
wait: Option<(vk::Semaphore, vk::PipelineStageFlags)>,
signal: Option<vk::Semaphore>,
fence: Arc<sync::Fence>,
) -> VkResult<FenceFuture<'a>> {
let device = self.device.clone();
self.submit_fence(queue, wait, signal, Some(fence.fence()))?;
self.submit_fence(wait, signal, Some(fence.fence()))?;
Ok(unsafe { FenceFuture::new(fence) })
}
pub fn submit_blocking(
self,
queue: Queue,
wait: Option<(vk::Semaphore, vk::PipelineStageFlags)>,
signal: Option<vk::Semaphore>,
) -> VkResult<()> {
let fence = Arc::new(sync::Fence::create(self.device.clone())?);
let future = self.submit_async(queue, wait, signal, fence)?;
let future = self.submit_async(wait, signal, fence)?;
future.block();
Ok(())
}
@ -145,7 +164,6 @@ mod tests {
async fn async_submit(cmd: SingleUseCommand, queue: Queue) {
cmd.submit_async(
queue,
None,
None,
Arc::new(sync::Fence::create(cmd.device.clone()).unwrap()),

View file

@ -1,4 +1,10 @@
#![feature(c_str_module, closure_lifetime_binder, let_chains, negative_impls)]
#![feature(
c_str_module,
closure_lifetime_binder,
let_chains,
negative_impls,
map_try_insert
)]
#![allow(unused)]
use std::{
borrow::Borrow,
@ -104,13 +110,20 @@ fn compatible_extension_properties(
}
#[derive(Clone, Debug)]
struct Queue(Arc<Mutex<vk::Queue>>);
struct Queue(Arc<Mutex<vk::Queue>>, u32);
impl Queue {
fn new(device: &ash::Device, family: u32, index: u32) -> Self {
Self(Arc::new(Mutex::new(unsafe {
device.get_device_queue(family, index)
})))
Self(
Arc::new(Mutex::new(unsafe {
device.get_device_queue(family, index)
})),
family,
)
}
pub fn family(&self) -> u32 {
self.1
}
pub fn with_locked<T, F: FnOnce(vk::Queue) -> T>(&self, map: F) -> T {
@ -432,20 +445,21 @@ impl AsRef<ash::khr::surface::Instance> for Instance {
#[derive(Debug, Default)]
struct DeviceQueueFamilies {
graphics: u32,
present: Option<u32>,
async_compute: Option<u32>,
transfer: Option<u32>,
families: Vec<(u32, u32)>,
graphics: (u32, u32),
present: Option<(u32, u32)>,
async_compute: Option<(u32, u32)>,
transfer: Option<(u32, u32)>,
}
impl DeviceQueueFamilies {
fn swapchain_family_indices(&self) -> ArrayVec<[u32; 2]> {
let mut indices = array_vec!([u32; 2] => self.graphics);
let mut indices = array_vec!([u32; 2] => self.graphics.0);
if let Some(present) = self.present
&& present != self.graphics
&& present.0 != self.graphics.0
{
indices.push(present);
indices.push(present.0);
}
indices
@ -458,6 +472,9 @@ struct DeviceInner {
device: ash::Device,
swapchain: khr::swapchain::Device,
debug_utils: ash::ext::debug_utils::Device,
allocated_queues: BTreeMap<(u32, u32), Queue>,
// these are resident in allocated_queues, and may in fact be clones of each
// other, for ease of access
main_queue: Queue,
compute_queue: Queue,
transfer_queue: Queue,
@ -465,7 +482,15 @@ struct DeviceInner {
sync_threadpool: sync::SyncThreadpool,
}
#[derive(Clone)]
impl core::fmt::Debug for DeviceInner {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("DeviceInner")
.field("device", &self.device.handle())
.finish()
}
}
#[derive(Clone, Debug)]
pub struct Device(Arc<DeviceInner>);
pub type WeakDevice = std::sync::Weak<DeviceInner>;
@ -500,6 +525,32 @@ impl Device {
fn present_queue(&self) -> &Queue {
&self.0.present_queue
}
unsafe fn lock_queues(&self) {
// this is obviously awful, allocating for this
self.0
.allocated_queues
.values()
.for_each(|q| core::mem::forget(q.lock()));
}
unsafe fn unlock_queues(&self) {
self.0
.allocated_queues
.values()
.for_each(|q| unsafe { q.0.force_unlock() });
}
fn wait_idle(&self) -> VkResult<()> {
tracing::warn!("locking all queues and waiting for device to idle");
unsafe {
self.lock_queues();
self.dev().device_wait_idle()?;
self.unlock_queues();
}
tracing::warn!("finished waiting: unlocking all queues.");
Ok(())
}
}
impl AsRef<khr::swapchain::Device> for Device {
@ -594,6 +645,7 @@ pub struct Swapchain {
impl Drop for Swapchain {
fn drop(&mut self) {
unsafe {
self.device.wait_idle();
info!("dropping swapchain {:?}", self.swapchain);
for view in &self.image_views {
self.device.dev().destroy_image_view(*view, None);
@ -1497,18 +1549,7 @@ impl Vulkan {
queue_families.find_first(|family| family.is_present)
} else {
None
};
let async_compute =
queue_families.find_first(|family| family.is_compute);
let transfer = queue_families.find_first(|family| family.is_transfer);
// family of each queue, of which one is allocated for each queue, with graphics being the fallback queue for compute and transfer, and present possibly being `None`, in which case it is Graphics
let queues = DeviceQueueFamilies {
graphics,
async_compute,
transfer,
present: present.or({
}.or({
if display_handle.is_none() {
// in this case the graphics queue will be used by default
tracing::info!("no present queue available, using graphics queue as fallback for headless_surface");
@ -1516,17 +1557,12 @@ impl Vulkan {
} else {
tracing::warn!("no present queue available, this is unexpected!");
None}
}),
};
});
queues
}
let async_compute =
queue_families.find_first(|family| family.is_compute);
let transfer = queue_families.find_first(|family| family.is_transfer);
fn create_device(
instance: Arc<Instance>,
pdev: PhysicalDevice,
features: &mut PhysicalDeviceFeatures,
) -> Result<Device> {
let mut unique_families = BTreeMap::<u32, u32>::new();
let mut helper = |family: u32| {
@ -1546,24 +1582,43 @@ impl Vulkan {
(family, index)
};
let graphics_family_and_index = helper(pdev.queue_families.graphics);
let compute_family_and_index =
pdev.queue_families.async_compute.map(|f| helper(f));
let transfer_family_and_index =
pdev.queue_families.transfer.map(|f| helper(f));
let present_family_and_index =
pdev.queue_families.present.map(|f| helper(f));
let graphics = helper(graphics);
let async_compute = async_compute.map(|f| helper(f));
let transfer = transfer.map(|f| helper(f));
let present = present.map(|f| helper(f));
let priorities = vec![
1.0f32;
unique_families.iter().fold(0, |acc, (_, num)| acc + *num)
as usize
];
let queue_infos = unique_families
let families = unique_families
.into_iter()
.filter(|&(_, count)| count > 0)
.map(|(family, queues)| {
.filter(|&(_family, count)| count > 0)
.collect::<Vec<_>>();
// family of each queue, of which one is allocated for each queue, with
// graphics being the fallback queue for compute and transfer, and
// present possibly being `None`, in which case it is Graphics
let queues = DeviceQueueFamilies {
families,
graphics,
async_compute,
transfer,
present,
};
queues
}
fn create_device(
instance: Arc<Instance>,
pdev: PhysicalDevice,
features: &mut PhysicalDeviceFeatures,
) -> Result<Device> {
// we have 4 queues at most: graphics, compute, transfer, present
let priorities = [1.0f32; 4];
let queue_infos = pdev
.queue_families
.families
.iter()
.map(|&(family, queues)| {
vk::DeviceQueueCreateInfo::default()
.queue_family_index(family)
.queue_priorities(&priorities[..queues as usize])
@ -1588,23 +1643,38 @@ impl Vulkan {
&device_info,
None,
)?;
let main_queue = Queue::new(
&device,
graphics_family_and_index.0,
graphics_family_and_index.1,
);
device.get_device_queue(
graphics_family_and_index.0,
graphics_family_and_index.1,
);
let present_queue = present_family_and_index
.map(|(f, i)| Queue::new(&device, f, i))
let allocated_queues = queue_infos
.iter()
.flat_map(|info| {
(0..info.queue_count).map(|i| {
(
(info.queue_family_index, i),
Queue::new(&device, info.queue_family_index, i),
)
})
})
.collect::<BTreeMap<_, _>>();
let get_queue = |(family, index)| {
allocated_queues.get(&(family, index)).cloned().unwrap()
};
let main_queue = get_queue(pdev.queue_families.graphics);
let present_queue = pdev
.queue_families
.present
.map(get_queue)
.unwrap_or(main_queue.clone());
let compute_queue = compute_family_and_index
.map(|(f, i)| Queue::new(&device, f, i))
let compute_queue = pdev
.queue_families
.async_compute
.map(get_queue)
.unwrap_or(main_queue.clone());
let transfer_queue = transfer_family_and_index
.map(|(f, i)| Queue::new(&device, f, i))
let transfer_queue = pdev
.queue_families
.transfer
.map(get_queue)
.unwrap_or(compute_queue.clone());
Device::new(DeviceInner {
@ -1619,6 +1689,7 @@ impl Vulkan {
&device,
),
instance,
allocated_queues,
main_queue,
present_queue,
compute_queue,
@ -1944,13 +2015,12 @@ impl Renderer {
let pool = commands::SingleUseCommandPool::new(
dev.clone(),
dev.queue_families().graphics,
dev.graphics_queue().clone(),
)?;
for ctx in self.window_contexts.values() {
let cmd =
commands::SingleUseCommand::new(dev.clone(), pool.pool())?;
let buffer = cmd.command_buffer();
let cmd = pool.alloc()?;
let buffer = cmd.buffer();
let (frame, suboptimal) = smol::block_on(
ctx.current_swapchain.read().clone().acquire_image(),
@ -2035,7 +2105,6 @@ impl Renderer {
dev.dev().cmd_pipeline_barrier2(buffer, &dependency_info);
let future = cmd.submit_async(
dev.graphics_queue().clone(),
Some((frame.acquire, vk::PipelineStageFlags::ALL_COMMANDS)),
Some(frame.release),
Arc::new(sync::Fence::create(dev.clone())?),

View file

@ -9,7 +9,7 @@ use super::Device;
use ash::{prelude::*, vk};
use crossbeam::channel::{Receiver, Sender};
type Message = (Arc<Fence>, std::task::Waker);
type Message = (SyncPrimitive, std::task::Waker);
pub struct SyncThreadpool {
channel: (Sender<Message>, Receiver<Message>),
@ -19,6 +19,14 @@ pub struct SyncThreadpool {
num_threads: Arc<AtomicU32>,
}
#[derive(Debug)]
enum SyncPrimitive {
Fence(Arc<Fence>),
// actually, I think this is an awful idea because I would have to hold a
// lock on all queues.
DeviceIdle(Device),
}
impl SyncThreadpool {
pub fn new() -> SyncThreadpool {
Self::with_max_threads(512)
@ -60,16 +68,25 @@ impl SyncThreadpool {
fn run(self, barrier: Arc<std::sync::Barrier>) {
tracing::info!("spawned new sync thread");
barrier.wait();
while let Ok((fence, waker)) =
while let Ok((sync, waker)) =
self.rx.recv_timeout(self.thread_dies_after)
{
tracing::info!(
"received ({:?}, {:?})",
fence,
sync,
waker
);
loop {
match fence.wait_on(Some(self.timeout)) {
let wait_result = match &sync {
SyncPrimitive::Fence(fence) => {
fence.wait_on(Some(self.timeout))
}
SyncPrimitive::DeviceIdle(device) => {
device.wait_idle()
}
};
match wait_result {
Ok(_) => {
waker.wake();
break;
@ -77,13 +94,15 @@ impl SyncThreadpool {
Err(vk::Result::TIMEOUT) => {}
Err(err) => {
tracing::error!(
"failed to wait on fence in waiter thread: {err}"
"failed to wait on {sync:?} in waiter thread: {err}"
);
break;
}
}
}
}
// because I don't want some thread to not spawn as soon as this one exists
self.num_threads.fetch_sub(1, Ordering::AcqRel);
}
}
@ -120,7 +139,7 @@ impl SyncThreadpool {
fn spawn_waiter(&self, fence: Arc<Fence>, waker: std::task::Waker) {
use std::sync::atomic::Ordering;
let mut msg = (fence, waker);
let mut msg = (SyncPrimitive::Fence(fence), waker);
while let Err(err) = self.channel.0.try_send(msg) {
match err {
crossbeam::channel::TrySendError::Full(msg2) => {