Index: sys/kern/kern_synch.c =================================================================== --- sys/kern/kern_synch.c +++ sys/kern/kern_synch.c @@ -213,13 +213,33 @@ lock_state = class->lc_unlock(lock); sleepq_lock(ident); } - if (sbt != 0 && catch) - rval = sleepq_timedwait_sig(ident, pri); - else if (sbt != 0) - rval = sleepq_timedwait(ident, pri); - else if (catch) + if (sbt != 0) { + if (priority & PRTCLK) { + thread_lock(td); + td->td_flags |= TDF_SLEEPRTC; + thread_unlock(td); + } + if (catch) + rval = sleepq_timedwait_sig(ident, pri); + else + rval = sleepq_timedwait(ident, pri); + if (priority & PRTCLK) { + thread_lock(td); + if (td->td_flags & TDF_SLEEPRTC) { + td->td_flags &= ~TDF_SLEEPRTC; + } else if (rval == 0) { + /* + * The thread was awoken by an adjustment of + * the real-time clock. It should read the + * RTC again and act on the new value. + */ + rval = ERELOOKUP; + } + thread_unlock(td); + } + } else if (catch) { rval = sleepq_wait_sig(ident, pri); - else { + } else { sleepq_wait(ident, pri); rval = 0; } Index: sys/kern/kern_tc.c =================================================================== --- sys/kern/kern_tc.c +++ sys/kern/kern_tc.c @@ -28,10 +28,13 @@ #include #include #include +#include #include +#include #include #include #include +#include #include #include #include @@ -126,6 +129,8 @@ sysctl_kern_timecounter_adjprecision, "I", "Allowed time interval deviation in percents"); +volatile int rtc_generation; + static int tc_chosen; /* Non-zero if a specific tc was chosen via sysctl. */ static void tc_windup(struct bintime *new_boottimebin); @@ -1261,6 +1266,20 @@ return (timehands->th_counter->tc_frequency); } +static bool +sleeping_on_old_rtc(struct thread *td) +{ + + THREAD_LOCK_ASSERT(td, MA_OWNED); + if ((td->td_flags & TDF_SLEEPRTC) && + td->td_rtcgen != atomic_load_acq_int(&rtc_generation)) { + td->td_flags &= ~TDF_SLEEPRTC; + return (true); + } + + return (false); +} + static struct mtx tc_setclock_mtx; MTX_SYSINIT(tc_setclock_init, &tc_setclock_mtx, "tcsetc", MTX_SPIN); @@ -1284,6 +1303,7 @@ /* XXX fiddle all the little crinkly bits around the fiords... */ tc_windup(&bt); mtx_unlock_spin(&tc_setclock_mtx); + sleepq_chains_remove_matching(sleeping_on_old_rtc); if (timestepwarnings) { nanotime(&taft); log(LOG_INFO, @@ -1463,6 +1483,9 @@ timehands = th; timekeep_push_vdso(); + + if (new_boottimebin != NULL) + atomic_add_rel_int(&rtc_generation, 1); } /* Report or change the active timecounter hardware. */ Index: sys/kern/kern_umtx.c =================================================================== --- sys/kern/kern_umtx.c +++ sys/kern/kern_umtx.c @@ -59,6 +59,7 @@ #include #include #include +#include #include #include @@ -70,6 +71,7 @@ #include #include +#include #include #ifdef COMPAT_FREEBSD32 @@ -210,6 +212,7 @@ struct abs_timeout { int clockid; + bool is_realtime; struct timespec cur; struct timespec end; }; @@ -257,6 +260,8 @@ static SYSCTL_NODE(_debug_umtx, OID_AUTO, chains, CTLFLAG_RD, 0, "umtx chain stats"); #endif +static void abs_timeout_update(struct abs_timeout *timo); + static void umtx_shm_init(void); static void umtxq_sysinit(void *); static void umtxq_hash(struct umtx_key *key); @@ -771,13 +776,15 @@ { timo->clockid = clockid; + timo->is_realtime = (clockid == CLOCK_REALTIME || + clockid == CLOCK_REALTIME_FAST || + clockid == CLOCK_REALTIME_PRECISE); + abs_timeout_update(timo); if (!absolute) { - kern_clock_gettime(curthread, clockid, &timo->end); - timo->cur = timo->end; + timo->end = timo->cur; timespecadd(&timo->end, timeout); } else { timo->end = *timeout; - kern_clock_gettime(curthread, clockid, &timo->cur); } } @@ -793,6 +800,8 @@ abs_timeout_update(struct abs_timeout *timo) { + if (timo->is_realtime) + curthread->td_rtcgen = atomic_load_acq_int(&rtc_generation); kern_clock_gettime(curthread, timo->clockid, &timo->cur); } @@ -821,6 +830,12 @@ } +#if 1 || defined(TEST_RACE) +static int umtx_rtc_race = 0; +SYSCTL_INT(_debug_umtx, OID_AUTO, rtc_race, CTLFLAG_RW, + &umtx_rtc_race, 0, ""); +#endif + /* * Put thread into sleep state, before sleeping, check if * thread was removed from umtx queue. @@ -829,7 +844,18 @@ umtxq_sleep(struct umtx_q *uq, const char *wmesg, struct abs_timeout *abstime) { struct umtxq_chain *uc; - int error, timo; + int error, timo, priority; + + priority = PCATCH | PDROP; + if (abstime != NULL && abstime->is_realtime) { + priority |= PRTCLK; + +#if 1 || defined(TEST_RACE) + while (umtx_rtc_race) { + pause("rtcrace", hz); + } +#endif + } uc = umtxq_getchain(&uq->uq_key); UMTXQ_LOCKED_ASSERT(uc); @@ -842,8 +868,8 @@ return (ETIMEDOUT); } else timo = 0; - error = msleep(uq, &uc->uc_lock, PCATCH | PDROP, wmesg, timo); - if (error != EWOULDBLOCK) { + error = msleep(uq, &uc->uc_lock, priority, wmesg, timo); + if (error != EWOULDBLOCK && error != ERELOOKUP) { umtxq_lock(&uq->uq_key); break; } Index: sys/kern/subr_sleepqueue.c =================================================================== --- sys/kern/subr_sleepqueue.c +++ sys/kern/subr_sleepqueue.c @@ -26,7 +26,7 @@ /* * Implementation of sleep queues used to hold queue of threads blocked on - * a wait channel. Sleep queues different from turnstiles in that wait + * a wait channel. Sleep queues are different from turnstiles in that wait * channels are not owned by anyone, so there is no priority propagation. * Sleep queues can also provide a timeout and can also be interrupted by * signals. That said, there are several similarities between the turnstile @@ -36,7 +36,7 @@ * a linked list of queues. An individual queue is located by using a hash * to pick a chain, locking the chain, and then walking the chain searching * for the queue. This means that a wait channel object does not need to - * embed it's queue head just as locks do not embed their turnstile queue + * embed its queue head just as locks do not embed their turnstile queue * head. Threads also carry around a sleep queue that they lend to the * wait channel when blocking. Just as in turnstiles, the queue includes * a free list of the sleep queues of other threads blocked on the same @@ -78,6 +78,9 @@ #include #include #include +#include + +#include #include @@ -98,7 +101,7 @@ #define SC_LOOKUP(wc) &sleepq_chains[SC_HASH(wc)] #define NR_SLEEPQS 2 /* - * There two different lists of sleep queues. Both lists are connected + * There are two different lists of sleep queues. Both lists are connected * via the sq_hash entries. The first list is the sleep queue chain list * that a sleep queue is on when it is attached to a wait channel. The * second list is the free list hung off of a sleep queue that is attached @@ -539,6 +542,7 @@ struct sleepqueue_chain *sc; struct sleepqueue *sq; struct thread *td; + bool rtc_changed; td = curthread; sc = SC_LOOKUP(wchan); @@ -557,9 +561,16 @@ /* * If TDF_TIMEOUT is set, then our sleep has been timed out * already but we are still on the sleep queue, so dequeue the - * thread and return. + * thread and return. Do the same if the real-time clock has + * been adjusted since this thread calculated its timeout + * based on that clock. */ - if (td->td_flags & TDF_TIMEOUT) { + rtc_changed = (td->td_flags & TDF_SLEEPRTC) && + td->td_rtcgen != atomic_load_acq_int(&rtc_generation); + if ((td->td_flags & TDF_TIMEOUT) || rtc_changed) { + if (rtc_changed) { + td->td_flags &= ~TDF_SLEEPRTC; + } MPASS(TD_ON_SLEEPQ(td)); sq = sleepq_lookup(wchan); if (sleepq_resume_thread(sq, td, 0)) { @@ -572,7 +583,7 @@ #endif } mtx_unlock_spin(&sc->sc_lock); - return; + return; } #ifdef SLEEPQUEUE_PROFILING if (prof_enabled) @@ -893,8 +904,6 @@ sleepq_broadcast(void *wchan, int flags, int pri, int queue) { struct sleepqueue *sq; - struct thread *td, *tdn; - int wakeup_swapper; CTR2(KTR_PROC, "sleepq_broadcast(%p, %d)", wchan, flags); KASSERT(wchan != NULL, ("%s: invalid NULL wait channel", __func__)); @@ -905,18 +914,34 @@ KASSERT(sq->sq_type == (flags & SLEEPQ_TYPE), ("%s: mismatch between sleep/wakeup and cv_*", __func__)); + return (sleepq_remove_matching(sq, queue, NULL, pri)); +} + +/* + * Resume threads on the sleep queue that match the given predicate, + * or all threads if the predicate is NULL. + */ +int +sleepq_remove_matching(struct sleepqueue *sq, int queue, + bool (*matches)(struct thread *), int pri) +{ + struct thread *td, *tdn; + int wakeup_swapper; + /* - * Resume all blocked threads on the sleep queue. The last thread will - * be given ownership of sq and may re-enqueue itself before - * sleepq_resume_thread() returns, so we must cache the "next" queue - * item at the beginning of the final iteration. + * The last thread will be given ownership of sq and may + * re-enqueue itself before sleepq_resume_thread() returns, + * so we must cache the "next" queue item at the beginning + * of the final iteration. */ wakeup_swapper = 0; TAILQ_FOREACH_SAFE(td, &sq->sq_blocked[queue], td_slpq, tdn) { thread_lock(td); - wakeup_swapper |= sleepq_resume_thread(sq, td, pri); + if (matches == NULL || matches(td)) + wakeup_swapper |= sleepq_resume_thread(sq, td, pri); thread_unlock(td); } + return (wakeup_swapper); } @@ -1052,6 +1077,32 @@ return (sleepq_resume_thread(sq, td, 0)); } +void +sleepq_chains_remove_matching(bool (*matches)(struct thread *)) +{ + struct sleepqueue_chain *sc; + struct sleepqueue *sq; + int i, wakeup_swapper; + + wakeup_swapper = 0; + for (sc = &sleepq_chains[0]; sc < sleepq_chains + SC_TABLESIZE; ++sc) { + if (LIST_EMPTY(&sc->sc_queues)) { + continue; + } + mtx_lock_spin(&sc->sc_lock); + LIST_FOREACH(sq, &sc->sc_queues, sq_hash) { + for (i = 0; i < NR_SLEEPQS; ++i) { + wakeup_swapper |= sleepq_remove_matching(sq, i, + matches, 0); + } + } + mtx_unlock_spin(&sc->sc_lock); + } + if (wakeup_swapper) { + kick_proc0(); + } +} + /* * Prints the stacks of all threads presently sleeping on wchan/queue to * the sbuf sb. Sets count_stacks_printed to the number of stacks actually Index: sys/sys/param.h =================================================================== --- sys/sys/param.h +++ sys/sys/param.h @@ -215,6 +215,7 @@ #define PRIMASK 0x0ff #define PCATCH 0x100 /* OR'd with pri for tsleep to check signals */ #define PDROP 0x200 /* OR'd with pri to stop re-entry of interlock mutex */ +#define PRTCLK 0x400 /* OR'd with pri when sleep is based on the RT clock */ #define NZERO 0 /* default "nice" */ Index: sys/sys/proc.h =================================================================== --- sys/sys/proc.h +++ sys/sys/proc.h @@ -283,6 +283,7 @@ int td_dom_rr_idx; /* (k) RR Numa domain selection. */ void *td_su; /* (k) FFS SU private */ sbintime_t td_sleeptimo; /* (t) Sleep timeout. */ + int td_rtcgen; /* (?) rtc_generation of sleep */ #define td_endzero td_sigmask /* Copied during fork1() or create_thread(). */ @@ -390,7 +391,7 @@ #define TDF_ALLPROCSUSP 0x00000200 /* suspended by SINGLE_ALLPROC */ #define TDF_BOUNDARY 0x00000400 /* Thread suspended at user boundary */ #define TDF_ASTPENDING 0x00000800 /* Thread has some asynchronous events. */ -#define TDF_UNUSED12 0x00001000 /* --available-- */ +#define TDF_SLEEPRTC 0x00001000 /* Sleep is based on the real-time clock */ #define TDF_SBDRY 0x00002000 /* Stop only on usermode boundary. */ #define TDF_UPIBLOCKED 0x00004000 /* Thread blocked on user PI mutex. */ #define TDF_NEEDSUSPCHK 0x00008000 /* Thread may need to suspend. */ Index: sys/sys/sleepqueue.h =================================================================== --- sys/sys/sleepqueue.h +++ sys/sys/sleepqueue.h @@ -90,11 +90,14 @@ int flags, int queue); struct sleepqueue *sleepq_alloc(void); int sleepq_broadcast(void *wchan, int flags, int pri, int queue); +void sleepq_chains_remove_matching(bool (*matches)(struct thread *)); void sleepq_free(struct sleepqueue *sq); void sleepq_lock(void *wchan); struct sleepqueue *sleepq_lookup(void *wchan); void sleepq_release(void *wchan); void sleepq_remove(struct thread *td, void *wchan); +int sleepq_remove_matching(struct sleepqueue *sq, int queue, + bool (*matches)(struct thread *), int pri); int sleepq_signal(void *wchan, int flags, int pri, int queue); void sleepq_set_timeout_sbt(void *wchan, sbintime_t sbt, sbintime_t pr, int flags); Index: sys/sys/time.h =================================================================== --- sys/sys/time.h +++ sys/sys/time.h @@ -383,6 +383,8 @@ extern sbintime_t sbt_timethreshold; extern sbintime_t sbt_tickthreshold; +extern volatile int rtc_generation; + /* * Functions for looking at our clock: [get]{bin,nano,micro}[up]time() *