executor/distaff/src/workerthread.rs

361 lines
10 KiB
Rust

#[cfg(feature = "metrics")]
use std::sync::atomic::Ordering;
use std::{
cell::{Cell, UnsafeCell},
ptr::NonNull,
sync::{Arc, Barrier},
time::Duration,
};
use crossbeam_utils::CachePadded;
use crate::{
context::Context,
heartbeat::OwnedHeartbeatReceiver,
job::{Job2 as Job, JobQueue as JobList, SharedJob},
latch::Probe,
util::DropGuard,
};
pub struct WorkerThread {
pub(crate) context: Arc<Context>,
pub(crate) queue: UnsafeCell<JobList>,
pub(crate) heartbeat: OwnedHeartbeatReceiver,
pub(crate) join_count: Cell<u8>,
#[cfg(feature = "metrics")]
pub(crate) metrics: CachePadded<crate::metrics::WorkerMetrics>,
}
thread_local! {
static WORKER: UnsafeCell<Option<NonNull<WorkerThread>>> = const { UnsafeCell::new(None) };
}
impl WorkerThread {
pub fn new_in(context: Arc<Context>) -> Self {
let heartbeat = context.heartbeats.new_heartbeat();
Self {
context,
queue: UnsafeCell::new(JobList::new()),
heartbeat,
join_count: Cell::new(0),
#[cfg(feature = "metrics")]
metrics: CachePadded::new(crate::metrics::WorkerMetrics::default()),
}
}
}
impl WorkerThread {
#[tracing::instrument(level = "trace", skip_all, fields(
worker = self.heartbeat.index(),
))]
pub fn run(self: Box<Self>, barrier: Arc<Barrier>) {
let this = Box::into_raw(self);
unsafe {
Self::set_current(this);
}
let _guard = DropGuard::new(|| unsafe {
// SAFETY: this is only called when the thread is exiting
Self::unset_current();
Self::drop_in_place(this);
});
tracing::trace!("WorkerThread::run: starting worker thread");
barrier.wait();
unsafe {
(&*this).run_inner();
}
#[cfg(feature = "metrics")]
unsafe {
eprintln!("{:?}", (&*this).metrics);
}
tracing::trace!("WorkerThread::run: worker thread finished");
}
#[tracing::instrument(level = "trace", skip_all)]
fn run_inner(&self) {
let mut job = None;
'outer: loop {
if let Some(job) = job.take() {
self.execute(job);
}
// no more jobs, wait to be notified of a new job or a heartbeat.
while job.is_none() {
if self.context.should_exit() {
// if the context is stopped, break out of the outer loop which
// will exit the thread.
break 'outer;
}
job = self.find_work_or_wait();
}
}
}
}
impl WorkerThread {
/// Looks for work in the local queue, then in the shared context, and if no
/// work is found, waits for the thread to be notified of a new job, after
/// which it returns `None`.
/// The caller should then check for `should_exit` to determine if the
/// thread should exit, or look for work again.
#[tracing::instrument(level = "trace", skip_all)]
pub(crate) fn find_work_or_wait(&self) -> Option<SharedJob> {
if let Some(job) = self.find_work() {
return Some(job);
}
tracing::trace!("waiting for new job");
self.heartbeat.parker().park();
tracing::trace!("woken up from wait");
None
}
#[tracing::instrument(level = "trace", skip_all)]
pub(crate) fn find_work_or_wait_unless<F>(&self, mut pred: F) -> Option<SharedJob>
where
F: FnMut() -> bool,
{
if let Some(job) = self.find_work() {
return Some(job);
}
// !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
// Check the predicate while holding the lock. This is very important,
// because the lock must be held when notifying us of the result of a
// job we scheduled.
// !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
// no jobs found, wait for a heartbeat or a new job
tracing::trace!(worker = self.heartbeat.index(), "waiting for new job");
if !pred() {
self.heartbeat.parker().park();
}
tracing::trace!(worker = self.heartbeat.index(), "woken up from wait");
None
}
#[inline]
fn find_work(&self) -> Option<SharedJob> {
let mut guard = self.context.shared();
if let Some(job) = guard.pop_job() {
#[cfg(feature = "metrics")]
self.metrics.num_jobs_stolen.fetch_add(1, Ordering::Relaxed);
tracing::trace!("WorkerThread::find_work_inner: found shared job: {:?}", job);
return Some(job);
}
None
}
#[inline(always)]
pub(crate) fn tick(&self) {
if self.heartbeat.take() {
#[cfg(feature = "metrics")]
self.metrics.num_heartbeats.fetch_add(1, Ordering::Relaxed);
tracing::trace!(
"received heartbeat, thread id: {:?}",
self.heartbeat.index()
);
self.heartbeat_cold();
}
}
#[inline]
#[tracing::instrument(level = "trace", skip(self))]
fn execute(&self, job: SharedJob) {
unsafe { SharedJob::execute(job, self) };
self.tick();
}
#[cold]
fn heartbeat_cold(&self) {
let mut guard = self.context.shared();
if !guard.jobs.contains_key(&self.heartbeat.id()) {
if let Some(job) = self.pop_back() {
tracing::trace!("heartbeat: sharing job: {:?}", job);
#[cfg(feature = "metrics")]
self.metrics.num_jobs_shared.fetch_add(1, Ordering::Relaxed);
unsafe {
guard.jobs.insert(
self.heartbeat.id(),
job.as_ref().share(Some(self.heartbeat.parker())),
);
// SAFETY: we are holding the lock on the shared context.
self.context.notify_job_shared();
}
}
}
}
}
impl WorkerThread {
#[inline]
pub fn pop_back(&self) -> Option<NonNull<Job>> {
unsafe { self.queue.as_mut_unchecked().pop_back() }
}
#[inline]
pub fn push_back<T>(&self, job: *const Job<T>) {
unsafe { self.queue.as_mut_unchecked().push_back(job.cast()) }
}
#[inline]
pub fn push_front<T>(&self, job: *const Job<T>) {
unsafe { self.queue.as_mut_unchecked().push_front(job.cast()) }
}
#[inline]
pub fn pop_front(&self) -> Option<NonNull<Job>> {
unsafe { self.queue.as_mut_unchecked().pop_front() }
}
}
impl WorkerThread {
#[inline]
pub fn current_ref<'a>() -> Option<&'a Self> {
unsafe { (*WORKER.with(UnsafeCell::get)).map(|ptr| ptr.as_ref()) }
}
unsafe fn set_current(this: *const Self) {
WORKER.with(|cell| {
unsafe {
// SAFETY: this cell is only ever accessed from the current thread
assert!(
(&mut *cell.get())
.replace(NonNull::new_unchecked(
this as *const WorkerThread as *mut WorkerThread,
))
.is_none()
);
}
});
}
unsafe fn unset_current() {
WORKER.with(|cell| {
unsafe {
// SAFETY: this cell is only ever accessed from the current thread
(&mut *cell.get()).take();
}
});
}
unsafe fn drop_in_place(this: *mut Self) {
unsafe {
// SAFETY: this is only called when the thread is exiting, so we can
// safely drop the thread. We use `drop_in_place` to prevent `Box`
// from creating a no-alias reference to the worker thread.
core::ptr::drop_in_place(this);
_ = Box::<core::mem::ManuallyDrop<Self>>::from_raw(this as _);
}
}
}
pub struct HeartbeatThread {
ctx: Arc<Context>,
}
impl HeartbeatThread {
const HEARTBEAT_INTERVAL: Duration = Duration::from_micros(100);
pub fn new(ctx: Arc<Context>) -> Self {
Self { ctx }
}
#[tracing::instrument(level = "trace", skip(self))]
pub fn run(self, barrier: Arc<Barrier>) {
tracing::trace!("new heartbeat thread {:?}", std::thread::current());
barrier.wait();
let mut i = 0;
loop {
let sleep_for = {
if self.ctx.should_exit() {
break;
}
self.ctx.heartbeats.notify_nth(i);
let num_heartbeats = self.ctx.heartbeats.len();
if i >= num_heartbeats {
i = 0;
} else {
i += 1;
}
Self::HEARTBEAT_INTERVAL.checked_div(num_heartbeats as u32)
};
if let Some(duration) = sleep_for {
std::thread::sleep(duration);
}
}
}
}
impl WorkerThread {
#[tracing::instrument(level = "trace", skip(self))]
pub fn wait_until_shared_job<T: Send>(&self, job: &Job<T>) -> Option<std::thread::Result<T>> {
let recv = (*job).take_receiver()?;
let mut out = recv.poll();
while std::hint::unlikely(out.is_none()) {
if let Some(job) = self.find_work() {
unsafe {
SharedJob::execute(job, self);
}
}
out = recv.poll();
}
out
}
#[tracing::instrument(level = "trace", skip_all)]
pub fn wait_until_pred<F>(&self, mut pred: F)
where
F: FnMut() -> bool,
{
if !pred() {
tracing::trace!("thread {:?} waiting on predicate", self.heartbeat.index());
self.wait_until_latch_cold(pred);
}
}
#[cold]
fn wait_until_latch_cold<F>(&self, mut pred: F)
where
F: FnMut() -> bool,
{
if let Some(shared_job) = self.context.shared().jobs.remove(&self.heartbeat.id()) {
tracing::trace!(
"thread {:?} reclaiming shared job: {:?}",
self.heartbeat.index(),
shared_job
);
unsafe { SharedJob::execute(shared_job, self) };
}
// do the usual thing and wait for the job's latch
// do the usual thing??? chatgipity really said this..
while !pred() {
// check local jobs before locking shared context
if let Some(job) = self.find_work() {
unsafe {
SharedJob::execute(job, self);
}
}
}
}
}