still deadlocked?

This commit is contained in:
Janis 2025-06-30 16:48:49 +02:00
parent 228aa4d544
commit 09166a8eb7
3 changed files with 38 additions and 180 deletions

View file

@ -1086,15 +1086,16 @@ impl<T> JobSender<T> {
// //
// This concludes my TED talk on why we need to lock here. // This concludes my TED talk on why we need to lock here.
let _guard = (!mutex.is_null()).then(|| { let _guard = unsafe { mutex.as_ref() }.map(|mutex| {
// SAFETY: mutex is a valid pointer to a WorkerLatch let guard = mutex.lock();
unsafe { DropGuard::new(move || {
(&*mutex).lock(); // // SAFETY: we forget the guard here so we no longer borrow the mutex.
DropGuard::new(|| { // mem::forget(guard);
(&*mutex).wake(); _ = guard;
(&*mutex).unlock() mutex.wake();
}) // // SAFETY: we can safely unlock the mutex here, as we are the only ones holding it.
} // mutex.force_unlock();
})
}); });
assert!(self.channel.tag.tag(Ordering::Acquire) & FINISHED == 0); assert!(self.channel.tag.tag(Ordering::Acquire) & FINISHED == 0);

View file

@ -341,20 +341,29 @@ impl AsCoreLatch for MutexLatch {
pub struct WorkerLatch { pub struct WorkerLatch {
// this boolean is set when the worker is waiting. // this boolean is set when the worker is waiting.
mutex: Mutex<bool>, mutex: Mutex<bool>,
condvar: AtomicUsize, condvar: Condvar,
} }
impl WorkerLatch { impl WorkerLatch {
pub fn new() -> Self { pub fn new() -> Self {
Self { Self {
mutex: Mutex::new(false), mutex: Mutex::new(false),
condvar: AtomicUsize::new(0), condvar: Condvar::new(),
} }
} }
pub fn lock(&self) {
mem::forget(self.mutex.lock()); #[tracing::instrument(level = "trace", skip_all, fields(
this = self as *const Self as usize,
))]
pub fn lock(&self) -> parking_lot::MutexGuard<'_, bool> {
tracing::trace!("aquiring mutex..");
let guard = self.mutex.lock();
tracing::trace!("mutex acquired.");
guard
} }
pub fn unlock(&self) {
pub unsafe fn force_unlock(&self) {
unsafe { unsafe {
self.mutex.force_unlock(); self.mutex.force_unlock();
} }
@ -362,144 +371,15 @@ impl WorkerLatch {
pub fn wait(&self) { pub fn wait(&self) {
let condvar = &self.condvar; let condvar = &self.condvar;
let mut guard = self.mutex.lock(); let mut guard = self.lock();
Self::wait_internal(condvar, &mut guard); Self::wait_internal(condvar, &mut guard);
} }
fn wait_internal(condvar: &AtomicUsize, guard: &mut parking_lot::MutexGuard<'_, bool>) { fn wait_internal(condvar: &Condvar, guard: &mut parking_lot::MutexGuard<'_, bool>) {
let mutex = parking_lot::MutexGuard::mutex(guard);
let key = condvar as *const _ as usize;
let lock_addr = mutex as *const _ as usize;
let mut requeued = false;
let state = unsafe { AtomicUsize::from_ptr(condvar as *const _ as *mut usize) };
**guard = true; // set the mutex to true to indicate that the worker is waiting **guard = true; // set the mutex to true to indicate that the worker is waiting
condvar.wait(guard);
unsafe { **guard = false;
parking_lot_core::park(
key,
|| {
let old = state.load(Ordering::Relaxed);
if old == 0 {
state.store(lock_addr, Ordering::Relaxed);
} else if old != lock_addr {
return false;
}
true
},
|| {
mutex.force_unlock();
},
|k, was_last_thread| {
requeued = k != key;
if !requeued && was_last_thread {
state.store(0, Ordering::Relaxed);
}
},
parking_lot_core::DEFAULT_PARK_TOKEN,
None,
);
}
// relock
let mut new = mutex.lock();
mem::swap(&mut new, guard);
mem::forget(new); // forget the new guard to avoid dropping it
**guard = false; // reset the mutex to false after waking up
}
fn wait_with_lock_internal<T>(
condvar: &AtomicUsize,
mutex: &mut parking_lot::MutexGuard<'_, bool>,
other: &mut parking_lot::MutexGuard<'_, T>,
) {
**mutex = true;
let key = condvar as *const _ as usize;
let lock_addr = parking_lot::MutexGuard::mutex(mutex) as *const _ as usize;
let mut requeued = false;
let state = condvar;
unsafe {
let token = parking_lot_core::park(
key,
|| {
let old = state.load(Ordering::Relaxed);
if old == 0 {
state.store(lock_addr, Ordering::Relaxed);
} else if old != lock_addr {
return false;
}
true
},
|| {
parking_lot::MutexGuard::mutex(&mutex).force_unlock();
parking_lot::MutexGuard::mutex(&other).force_unlock();
},
|k, was_last_thread| {
requeued = k != key;
if !requeued && was_last_thread {
state.store(0, Ordering::Relaxed);
}
},
parking_lot_core::DEFAULT_PARK_TOKEN,
None,
);
tracing::trace!(
"WorkerLatch wait_with_lock_internal: unparked with token {:?}",
token
);
}
// because `other` is logically unlocked, we swap it with `other2` and then forget `other2`
let mut other2 = parking_lot::MutexGuard::mutex(&other).lock();
core::mem::swap(&mut other2, other);
core::mem::forget(other2);
// because `other` is logically unlocked, we swap it with `other2` and then forget `other2`
let mut mutex2 = parking_lot::MutexGuard::mutex(&mutex).lock();
core::mem::swap(&mut mutex2, mutex);
core::mem::forget(mutex2);
**mutex = false;
}
#[tracing::instrument(level = "trace", skip_all, fields(
this = self as *const Self as usize,
))]
pub fn wait_with_lock<T>(&self, other: &mut parking_lot::MutexGuard<'_, T>) {
Self::wait_with_lock_internal(&self.condvar, &mut self.mutex.lock(), other);
}
#[tracing::instrument(level = "trace", skip_all, fields(
this = self as *const Self as usize,
))]
pub fn wait_with_lock_unless<F, T>(
&self,
other: &mut parking_lot::MutexGuard<'_, T>,
mut pred: F,
) where
F: FnMut(&mut T) -> bool,
{
let mut guard = self.mutex.lock();
if !pred(other.deref_mut()) {
Self::wait_with_lock_internal(&self.condvar, &mut guard, other);
}
}
pub fn wait_with_lock_while<T, F>(&self, other: &mut parking_lot::MutexGuard<'_, T>, mut f: F)
where
F: FnMut(&mut T) -> bool,
{
let mut guard = self.mutex.lock();
while f(other.deref_mut()) {
Self::wait_with_lock_internal(&self.condvar, &mut guard, other);
}
} }
#[tracing::instrument(level = "trace", skip_all, fields( #[tracing::instrument(level = "trace", skip_all, fields(
@ -509,7 +389,7 @@ impl WorkerLatch {
where where
F: FnMut() -> bool, F: FnMut() -> bool,
{ {
let mut guard = self.mutex.lock(); let mut guard = self.lock();
if !f() { if !f() {
Self::wait_internal(&self.condvar, &mut guard); Self::wait_internal(&self.condvar, &mut guard);
} }
@ -522,7 +402,7 @@ impl WorkerLatch {
where where
F: FnMut() -> Option<T>, F: FnMut() -> Option<T>,
{ {
let mut guard = self.mutex.lock(); let mut guard = self.lock();
loop { loop {
if let Some(result) = f() { if let Some(result) = f() {
return result; return result;
@ -539,29 +419,8 @@ impl WorkerLatch {
this = self as *const Self as usize, this = self as *const Self as usize,
))] ))]
fn notify(&self) { fn notify(&self) {
let from = &self.condvar as *const _ as usize; let n = self.condvar.notify_all();
let to = &self.mutex as *const _ as usize; tracing::trace!("WorkerLatch notify: notified {} threads", n);
let validate = || {
if self.condvar.load(Ordering::Relaxed) != to {
return parking_lot_core::RequeueOp::Abort;
}
self.condvar.store(0, Ordering::Relaxed);
parking_lot_core::RequeueOp::UnparkOneRequeueRest
};
let callback = |_op: parking_lot_core::RequeueOp,
_result: parking_lot_core::UnparkResult| {
parking_lot_core::DEFAULT_UNPARK_TOKEN
};
unsafe {
//let n = parking_lot_core::unpark_requeue(from, to, validate, callback);
let n = parking_lot_core::unpark_all(from, parking_lot_core::DEFAULT_UNPARK_TOKEN);
tracing::trace!("WorkerLatch notify_one: unparked {:?}", n);
}
} }
pub fn wake(&self) { pub fn wake(&self) {
@ -598,7 +457,7 @@ mod tests {
barrier.wait(); barrier.wait();
tracing::info!("Thread waiting on latch"); tracing::info!("Thread waiting on latch");
latch.wait_with_lock(&mut guard); latch.wait();
count.fetch_add(1, Ordering::SeqCst); count.fetch_add(1, Ordering::SeqCst);
tracing::info!("Thread woke up from latch"); tracing::info!("Thread woke up from latch");
barrier.wait(); barrier.wait();

View file

@ -118,15 +118,15 @@ impl WorkerThread {
if let Some(job) = self.find_work_inner() { if let Some(job) = self.find_work_inner() {
return Some(job); return Some(job);
} }
// check the predicate while holding the lock
// !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! // !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
// this is very important, because the lock must be held when // Check the predicate while holding the lock. This is very important,
// notifying us of the result of a job we scheduled. // because the lock must be held when notifying us of the result of a
// job we scheduled.
// !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! // !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
// no jobs found, wait for a heartbeat or a new job // no jobs found, wait for a heartbeat or a new job
// tracing::trace!(worker = self.heartbeat.index(), "waiting for new job"); tracing::trace!(worker = self.heartbeat.index(), "waiting for new job");
self.heartbeat.latch().wait_unless(pred); self.heartbeat.latch().wait_unless(pred);
// tracing::trace!(worker = self.heartbeat.index(), "woken up from wait"); tracing::trace!(worker = self.heartbeat.index(), "woken up from wait");
None None
} }
@ -341,9 +341,7 @@ impl WorkerThread {
} }
} }
self.heartbeat.latch().lock();
out = recv.poll(); out = recv.poll();
self.heartbeat.latch().unlock();
} }
out out