Index: sys/kern/kern_tc.c =================================================================== --- sys/kern/kern_tc.c +++ sys/kern/kern_tc.c @@ -28,7 +28,9 @@ #include #include #include +#include #include +#include #include #include #include @@ -126,6 +128,8 @@ sysctl_kern_timecounter_adjprecision, "I", "Allowed time interval deviation in percents"); +volatile int rtc_generation = 1; + static int tc_chosen; /* Non-zero if a specific tc was chosen via sysctl. */ static void tc_windup(struct bintime *new_boottimebin); @@ -1261,6 +1265,19 @@ return (timehands->th_counter->tc_frequency); } +static bool +sleeping_on_old_rtc(struct thread *td) +{ + + if (td->td_rtcgen != 0 && + td->td_rtcgen != atomic_load_acq_int(&rtc_generation)) { + td->td_rtcgen = 0; + return (true); + } + + return (false); +} + static struct mtx tc_setclock_mtx; MTX_SYSINIT(tc_setclock_init, &tc_setclock_mtx, "tcsetc", MTX_SPIN); @@ -1284,6 +1301,9 @@ /* XXX fiddle all the little crinkly bits around the fiords... */ tc_windup(&bt); mtx_unlock_spin(&tc_setclock_mtx); + /* Avoid rtc_generation == 0, since td_rtcgen == 0 is special. */ + atomic_add_rel_int(&rtc_generation, 2); + sleepq_chains_remove_matching(sleeping_on_old_rtc); if (timestepwarnings) { nanotime(&taft); log(LOG_INFO, Index: sys/kern/kern_umtx.c =================================================================== --- sys/kern/kern_umtx.c +++ sys/kern/kern_umtx.c @@ -59,6 +59,7 @@ #include #include #include +#include #include #include @@ -70,6 +71,7 @@ #include #include +#include #include #ifdef COMPAT_FREEBSD32 @@ -210,6 +212,7 @@ struct abs_timeout { int clockid; + bool is_realtime; struct timespec cur; struct timespec end; }; @@ -257,6 +260,8 @@ static SYSCTL_NODE(_debug_umtx, OID_AUTO, chains, CTLFLAG_RD, 0, "umtx chain stats"); #endif +static void abs_timeout_update(struct abs_timeout *timo); + static void umtx_shm_init(void); static void umtxq_sysinit(void *); static void umtxq_hash(struct umtx_key *key); @@ -771,13 +776,15 @@ { timo->clockid = clockid; + timo->is_realtime = clockid == CLOCK_REALTIME || + clockid == CLOCK_REALTIME_FAST || + clockid == CLOCK_REALTIME_PRECISE; + abs_timeout_update(timo); if (!absolute) { - kern_clock_gettime(curthread, clockid, &timo->end); - timo->cur = timo->end; + timo->end = timo->cur; timespecadd(&timo->end, timeout); } else { timo->end = *timeout; - kern_clock_gettime(curthread, clockid, &timo->cur); } } @@ -831,26 +838,42 @@ struct umtxq_chain *uc; int error, timo; + if (abstime != NULL && abstime->is_realtime) { + curthread->td_rtcgen = atomic_load_acq_int(&rtc_generation); + abs_timeout_update(abstime); + } + uc = umtxq_getchain(&uq->uq_key); UMTXQ_LOCKED_ASSERT(uc); for (;;) { - if (!(uq->uq_flags & UQF_UMTXQ)) - return (0); + if (!(uq->uq_flags & UQF_UMTXQ)) { + error = 0; + break; + } if (abstime != NULL) { timo = abs_timeout_gethz(abstime); - if (timo < 0) - return (ETIMEDOUT); + if (timo < 0) { + error = ETIMEDOUT; + break; + } } else timo = 0; error = msleep(uq, &uc->uc_lock, PCATCH | PDROP, wmesg, timo); - if (error != EWOULDBLOCK) { + if (error == EINTR || error == ERESTART) { umtxq_lock(&uq->uq_key); break; } - if (abstime != NULL) + if (abstime != NULL) { + if (abstime->is_realtime) + curthread->td_rtcgen = + atomic_load_acq_int(&rtc_generation); abs_timeout_update(abstime); + } umtxq_lock(&uq->uq_key); } + + curthread->td_rtcgen = 0; + return (error); } Index: sys/kern/subr_sleepqueue.c =================================================================== --- sys/kern/subr_sleepqueue.c +++ sys/kern/subr_sleepqueue.c @@ -26,7 +26,7 @@ /* * Implementation of sleep queues used to hold queue of threads blocked on - * a wait channel. Sleep queues different from turnstiles in that wait + * a wait channel. Sleep queues are different from turnstiles in that wait * channels are not owned by anyone, so there is no priority propagation. * Sleep queues can also provide a timeout and can also be interrupted by * signals. That said, there are several similarities between the turnstile @@ -36,7 +36,7 @@ * a linked list of queues. An individual queue is located by using a hash * to pick a chain, locking the chain, and then walking the chain searching * for the queue. This means that a wait channel object does not need to - * embed it's queue head just as locks do not embed their turnstile queue + * embed its queue head just as locks do not embed their turnstile queue * head. Threads also carry around a sleep queue that they lend to the * wait channel when blocking. Just as in turnstiles, the queue includes * a free list of the sleep queues of other threads blocked on the same @@ -78,6 +78,9 @@ #include #include #include +#include + +#include #include @@ -98,7 +101,7 @@ #define SC_LOOKUP(wc) &sleepq_chains[SC_HASH(wc)] #define NR_SLEEPQS 2 /* - * There two different lists of sleep queues. Both lists are connected + * There are two different lists of sleep queues. Both lists are connected * via the sq_hash entries. The first list is the sleep queue chain list * that a sleep queue is on when it is attached to a wait channel. The * second list is the free list hung off of a sleep queue that is attached @@ -539,6 +542,7 @@ struct sleepqueue_chain *sc; struct sleepqueue *sq; struct thread *td; + bool rtc_changed; td = curthread; sc = SC_LOOKUP(wchan); @@ -557,9 +561,16 @@ /* * If TDF_TIMEOUT is set, then our sleep has been timed out * already but we are still on the sleep queue, so dequeue the - * thread and return. + * thread and return. Do the same if the real-time clock has + * been adjusted since this thread calculated its timeout + * based on that clock. */ - if (td->td_flags & TDF_TIMEOUT) { + rtc_changed = td->td_rtcgen != 0 && + td->td_rtcgen != atomic_load_acq_int(&rtc_generation); + if ((td->td_flags & TDF_TIMEOUT) || rtc_changed) { + if (rtc_changed) { + td->td_rtcgen = 0; + } MPASS(TD_ON_SLEEPQ(td)); sq = sleepq_lookup(wchan); if (sleepq_resume_thread(sq, td, 0)) { @@ -572,7 +583,7 @@ #endif } mtx_unlock_spin(&sc->sc_lock); - return; + return; } #ifdef SLEEPQUEUE_PROFILING if (prof_enabled) @@ -893,8 +904,6 @@ sleepq_broadcast(void *wchan, int flags, int pri, int queue) { struct sleepqueue *sq; - struct thread *td, *tdn; - int wakeup_swapper; CTR2(KTR_PROC, "sleepq_broadcast(%p, %d)", wchan, flags); KASSERT(wchan != NULL, ("%s: invalid NULL wait channel", __func__)); @@ -905,18 +914,34 @@ KASSERT(sq->sq_type == (flags & SLEEPQ_TYPE), ("%s: mismatch between sleep/wakeup and cv_*", __func__)); + return (sleepq_remove_matching(sq, queue, NULL, pri)); +} + +/* + * Resume threads on the sleep queue that match the given predicate, + * or all threads if the predicate is NULL. + */ +int +sleepq_remove_matching(struct sleepqueue *sq, int queue, + bool (*matches)(struct thread *), int pri) +{ + struct thread *td, *tdn; + int wakeup_swapper; + /* - * Resume all blocked threads on the sleep queue. The last thread will - * be given ownership of sq and may re-enqueue itself before - * sleepq_resume_thread() returns, so we must cache the "next" queue - * item at the beginning of the final iteration. + * The last thread will be given ownership of sq and may + * re-enqueue itself before sleepq_resume_thread() returns, + * so we must cache the "next" queue item at the beginning + * of the final iteration. */ wakeup_swapper = 0; TAILQ_FOREACH_SAFE(td, &sq->sq_blocked[queue], td_slpq, tdn) { thread_lock(td); - wakeup_swapper |= sleepq_resume_thread(sq, td, pri); + if (matches == NULL || matches(td)) + wakeup_swapper |= sleepq_resume_thread(sq, td, pri); thread_unlock(td); } + return (wakeup_swapper); } @@ -1052,6 +1077,32 @@ return (sleepq_resume_thread(sq, td, 0)); } +void +sleepq_chains_remove_matching(bool (*matches)(struct thread *)) +{ + struct sleepqueue_chain *sc; + struct sleepqueue *sq; + int i, wakeup_swapper; + + wakeup_swapper = 0; + for (sc = &sleepq_chains[0]; sc < sleepq_chains + SC_TABLESIZE; ++sc) { + if (LIST_EMPTY(&sc->sc_queues)) { + continue; + } + mtx_lock_spin(&sc->sc_lock); + LIST_FOREACH(sq, &sc->sc_queues, sq_hash) { + for (i = 0; i < NR_SLEEPQS; ++i) { + wakeup_swapper |= sleepq_remove_matching(sq, i, + matches, 0); + } + } + mtx_unlock_spin(&sc->sc_lock); + } + if (wakeup_swapper) { + kick_proc0(); + } +} + /* * Prints the stacks of all threads presently sleeping on wchan/queue to * the sbuf sb. Sets count_stacks_printed to the number of stacks actually Index: sys/sys/proc.h =================================================================== --- sys/sys/proc.h +++ sys/sys/proc.h @@ -148,6 +148,7 @@ * o - ktrace lock * q - td_contested lock * r - p_peers lock + * s - by curthread, or by others when curthread is on sleepqueue * t - thread lock * u - process stat lock * w - process timer lock @@ -283,6 +284,7 @@ int td_dom_rr_idx; /* (k) RR Numa domain selection. */ void *td_su; /* (k) FFS SU private */ sbintime_t td_sleeptimo; /* (t) Sleep timeout. */ + int td_rtcgen; /* (s) rtc_generation of sleep */ #define td_endzero td_sigmask /* Copied during fork1() or create_thread(). */ Index: sys/sys/sleepqueue.h =================================================================== --- sys/sys/sleepqueue.h +++ sys/sys/sleepqueue.h @@ -90,11 +90,14 @@ int flags, int queue); struct sleepqueue *sleepq_alloc(void); int sleepq_broadcast(void *wchan, int flags, int pri, int queue); +void sleepq_chains_remove_matching(bool (*matches)(struct thread *)); void sleepq_free(struct sleepqueue *sq); void sleepq_lock(void *wchan); struct sleepqueue *sleepq_lookup(void *wchan); void sleepq_release(void *wchan); void sleepq_remove(struct thread *td, void *wchan); +int sleepq_remove_matching(struct sleepqueue *sq, int queue, + bool (*matches)(struct thread *), int pri); int sleepq_signal(void *wchan, int flags, int pri, int queue); void sleepq_set_timeout_sbt(void *wchan, sbintime_t sbt, sbintime_t pr, int flags); Index: sys/sys/time.h =================================================================== --- sys/sys/time.h +++ sys/sys/time.h @@ -383,6 +383,8 @@ extern sbintime_t sbt_timethreshold; extern sbintime_t sbt_tickthreshold; +extern volatile int rtc_generation; + /* * Functions for looking at our clock: [get]{bin,nano,micro}[up]time() *