Index: share/man/man9/sleep.9 =================================================================== --- share/man/man9/sleep.9 +++ share/man/man9/sleep.9 @@ -280,6 +280,21 @@ pay particular attention to ensure that no other threads wait on the same .Fa chan . +.Pp +If the timeout given by +.Fa timo +or +.Fa sbt +is based on an absolute real-time clock value, +then the thread should copy the global +.Va rtc_generation +into its +.Va td_rtcgen +member before reading the RTC. +If the real-time clock is adjusted, these functions will set +.Va td_rtcgen +to zero and return zero. +The caller should reconsider its orientation with the new RTC value. .Sh RETURN VALUES When awakened by a call to .Fn wakeup @@ -298,6 +313,9 @@ .Fn msleep_spin , .Fn tsleep , and locking primitive sleep functions return 0. +Zero can also be returned when the real-time clock is adjusted; +see above regarding +.Va td_rtcgen . Otherwise, a non-zero error code is returned. .Sh ERRORS .Fn msleep , Index: sys/kern/kern_tc.c =================================================================== --- sys/kern/kern_tc.c +++ sys/kern/kern_tc.c @@ -28,7 +28,9 @@ #include #include #include +#include #include +#include #include #include #include @@ -126,6 +128,8 @@ sysctl_kern_timecounter_adjprecision, "I", "Allowed time interval deviation in percents"); +volatile int rtc_generation = 1; + static int tc_chosen; /* Non-zero if a specific tc was chosen via sysctl. */ static void tc_windup(struct bintime *new_boottimebin); @@ -1261,6 +1265,26 @@ return (timehands->th_counter->tc_frequency); } +static bool +sleeping_on_old_rtc(struct thread *td) +{ + + /* + * td_rtcgen is modified by curthread when it is running, + * and by other threads in this function. By finding the thread + * on a sleepqueue and holding the lock on the sleepqueue + * chain, we guarantee that the thread is not running and that + * modifying td_rtcgen is safe. Setting td_rtcgen to zero informs + * the thread that it was woken due to a real-time clock adjustment. + * (The declaration of td_rtcgen refers to this comment.) + */ + if (td->td_rtcgen != 0 && td->td_rtcgen != rtc_generation) { + td->td_rtcgen = 0; + return (true); + } + return (false); +} + static struct mtx tc_setclock_mtx; MTX_SYSINIT(tc_setclock_init, &tc_setclock_mtx, "tcsetc", MTX_SPIN); @@ -1284,6 +1308,10 @@ /* XXX fiddle all the little crinkly bits around the fiords... */ tc_windup(&bt); mtx_unlock_spin(&tc_setclock_mtx); + + /* Avoid rtc_generation == 0, since td_rtcgen == 0 is special. */ + atomic_add_rel_int(&rtc_generation, 2); + sleepq_chains_remove_matching(sleeping_on_old_rtc); if (timestepwarnings) { nanotime(&taft); log(LOG_INFO, Index: sys/kern/kern_umtx.c =================================================================== --- sys/kern/kern_umtx.c +++ sys/kern/kern_umtx.c @@ -59,6 +59,7 @@ #include #include #include +#include #include #include @@ -70,6 +71,7 @@ #include #include +#include #include #ifdef COMPAT_FREEBSD32 @@ -210,6 +212,7 @@ struct abs_timeout { int clockid; + bool is_abs_real; /* TIMER_ABSTIME && CLOCK_REALTIME* */ struct timespec cur; struct timespec end; }; @@ -257,6 +260,8 @@ static SYSCTL_NODE(_debug_umtx, OID_AUTO, chains, CTLFLAG_RD, 0, "umtx chain stats"); #endif +static void abs_timeout_update(struct abs_timeout *timo); + static void umtx_shm_init(void); static void umtxq_sysinit(void *); static void umtxq_hash(struct umtx_key *key); @@ -772,12 +777,22 @@ timo->clockid = clockid; if (!absolute) { - kern_clock_gettime(curthread, clockid, &timo->end); - timo->cur = timo->end; + timo->is_abs_real = false; + abs_timeout_update(timo); + timo->end = timo->cur; timespecadd(&timo->end, timeout); } else { timo->end = *timeout; - kern_clock_gettime(curthread, clockid, &timo->cur); + timo->is_abs_real = clockid == CLOCK_REALTIME || + clockid == CLOCK_REALTIME_FAST || + clockid == CLOCK_REALTIME_PRECISE; + /* + * If is_abs_real, umtxq_sleep will read the clock + * after setting td_rtcgen; otherwise, read it here. + */ + if (!timo->is_abs_real) { + abs_timeout_update(timo); + } } } @@ -831,26 +846,41 @@ struct umtxq_chain *uc; int error, timo; + if (abstime != NULL && abstime->is_abs_real) { + curthread->td_rtcgen = atomic_load_acq_int(&rtc_generation); + abs_timeout_update(abstime); + } + uc = umtxq_getchain(&uq->uq_key); UMTXQ_LOCKED_ASSERT(uc); for (;;) { - if (!(uq->uq_flags & UQF_UMTXQ)) - return (0); + if (!(uq->uq_flags & UQF_UMTXQ)) { + error = 0; + break; + } if (abstime != NULL) { timo = abs_timeout_gethz(abstime); - if (timo < 0) - return (ETIMEDOUT); + if (timo < 0) { + error = ETIMEDOUT; + break; + } } else timo = 0; error = msleep(uq, &uc->uc_lock, PCATCH | PDROP, wmesg, timo); - if (error != EWOULDBLOCK) { + if (error == EINTR || error == ERESTART) { umtxq_lock(&uq->uq_key); break; } - if (abstime != NULL) + if (abstime != NULL) { + if (abstime->is_abs_real) + curthread->td_rtcgen = + atomic_load_acq_int(&rtc_generation); abs_timeout_update(abstime); + } umtxq_lock(&uq->uq_key); } + + curthread->td_rtcgen = 0; return (error); } Index: sys/kern/subr_sleepqueue.c =================================================================== --- sys/kern/subr_sleepqueue.c +++ sys/kern/subr_sleepqueue.c @@ -78,6 +78,9 @@ #include #include #include +#include + +#include #include @@ -539,6 +542,7 @@ struct sleepqueue_chain *sc; struct sleepqueue *sq; struct thread *td; + bool rtc_changed; td = curthread; sc = SC_LOOKUP(wchan); @@ -558,8 +562,26 @@ * If TDF_TIMEOUT is set, then our sleep has been timed out * already but we are still on the sleep queue, so dequeue the * thread and return. + * + * Do the same if the real-time clock has been adjusted since this + * thread calculated its timeout based on that clock. This handles + * the following race: + * - The Ts thread needs to sleep until an absolute real-clock time. + * It copies the global rtc_generation into curthread->td_rtcgen, + * reads the RTC, and calculates a sleep duration based on that time. + * See umtxq_sleep() for an example. + * - The Tc thread adjusts the RTC, bumps rtc_generation, and wakes + * threads that are sleeping until an absolute real-clock time. + * See tc_setclock() and the POSIX specification of clock_settime(). + * - Ts reaches the code below. It holds the sleepqueue chain lock, + * so Tc has finished waking, so this thread must test td_rtcgen. + * (The declaration of td_rtcgen refers to this comment.) */ - if (td->td_flags & TDF_TIMEOUT) { + rtc_changed = td->td_rtcgen != 0 && td->td_rtcgen != rtc_generation; + if ((td->td_flags & TDF_TIMEOUT) || rtc_changed) { + if (rtc_changed) { + td->td_rtcgen = 0; + } MPASS(TD_ON_SLEEPQ(td)); sq = sleepq_lookup(wchan); if (sleepq_resume_thread(sq, td, 0)) { @@ -886,6 +908,13 @@ return (wakeup_swapper); } +static bool +match_any(struct thread *td __unused) +{ + + return (true); +} + /* * Resume all threads sleeping on a specified wait channel. */ @@ -893,8 +922,6 @@ sleepq_broadcast(void *wchan, int flags, int pri, int queue) { struct sleepqueue *sq; - struct thread *td, *tdn; - int wakeup_swapper; CTR2(KTR_PROC, "sleepq_broadcast(%p, %d)", wchan, flags); KASSERT(wchan != NULL, ("%s: invalid NULL wait channel", __func__)); @@ -905,18 +932,33 @@ KASSERT(sq->sq_type == (flags & SLEEPQ_TYPE), ("%s: mismatch between sleep/wakeup and cv_*", __func__)); + return (sleepq_remove_matching(sq, queue, match_any, pri)); +} + +/* + * Resume threads on the sleep queue that match the given predicate. + */ +int +sleepq_remove_matching(struct sleepqueue *sq, int queue, + bool (*matches)(struct thread *), int pri) +{ + struct thread *td, *tdn; + int wakeup_swapper; + /* - * Resume all blocked threads on the sleep queue. The last thread will - * be given ownership of sq and may re-enqueue itself before - * sleepq_resume_thread() returns, so we must cache the "next" queue - * item at the beginning of the final iteration. + * The last thread will be given ownership of sq and may + * re-enqueue itself before sleepq_resume_thread() returns, + * so we must cache the "next" queue item at the beginning + * of the final iteration. */ wakeup_swapper = 0; TAILQ_FOREACH_SAFE(td, &sq->sq_blocked[queue], td_slpq, tdn) { thread_lock(td); - wakeup_swapper |= sleepq_resume_thread(sq, td, pri); + if (matches(td)) + wakeup_swapper |= sleepq_resume_thread(sq, td, pri); thread_unlock(td); } + return (wakeup_swapper); } @@ -1052,6 +1094,32 @@ return (sleepq_resume_thread(sq, td, 0)); } +void +sleepq_chains_remove_matching(bool (*matches)(struct thread *)) +{ + struct sleepqueue_chain *sc; + struct sleepqueue *sq; + int i, wakeup_swapper; + + wakeup_swapper = 0; + for (sc = &sleepq_chains[0]; sc < sleepq_chains + SC_TABLESIZE; ++sc) { + if (LIST_EMPTY(&sc->sc_queues)) { + continue; + } + mtx_lock_spin(&sc->sc_lock); + LIST_FOREACH(sq, &sc->sc_queues, sq_hash) { + for (i = 0; i < NR_SLEEPQS; ++i) { + wakeup_swapper |= sleepq_remove_matching(sq, i, + matches, 0); + } + } + mtx_unlock_spin(&sc->sc_lock); + } + if (wakeup_swapper) { + kick_proc0(); + } +} + /* * Prints the stacks of all threads presently sleeping on wchan/queue to * the sbuf sb. Sets count_stacks_printed to the number of stacks actually Index: sys/sys/proc.h =================================================================== --- sys/sys/proc.h +++ sys/sys/proc.h @@ -148,6 +148,7 @@ * o - ktrace lock * q - td_contested lock * r - p_peers lock + * s - see sleepq_switch(), sleeping_on_old_rtc(), and sleep(9) * t - thread lock * u - process stat lock * w - process timer lock @@ -283,6 +284,7 @@ int td_dom_rr_idx; /* (k) RR Numa domain selection. */ void *td_su; /* (k) FFS SU private */ sbintime_t td_sleeptimo; /* (t) Sleep timeout. */ + int td_rtcgen; /* (s) rtc_generation of abs. sleep */ #define td_endzero td_sigmask /* Copied during fork1() or create_thread(). */ Index: sys/sys/sleepqueue.h =================================================================== --- sys/sys/sleepqueue.h +++ sys/sys/sleepqueue.h @@ -90,11 +90,14 @@ int flags, int queue); struct sleepqueue *sleepq_alloc(void); int sleepq_broadcast(void *wchan, int flags, int pri, int queue); +void sleepq_chains_remove_matching(bool (*matches)(struct thread *)); void sleepq_free(struct sleepqueue *sq); void sleepq_lock(void *wchan); struct sleepqueue *sleepq_lookup(void *wchan); void sleepq_release(void *wchan); void sleepq_remove(struct thread *td, void *wchan); +int sleepq_remove_matching(struct sleepqueue *sq, int queue, + bool (*matches)(struct thread *), int pri); int sleepq_signal(void *wchan, int flags, int pri, int queue); void sleepq_set_timeout_sbt(void *wchan, sbintime_t sbt, sbintime_t pr, int flags); Index: sys/sys/time.h =================================================================== --- sys/sys/time.h +++ sys/sys/time.h @@ -383,6 +383,8 @@ extern sbintime_t sbt_timethreshold; extern sbintime_t sbt_tickthreshold; +extern volatile int rtc_generation; + /* * Functions for looking at our clock: [get]{bin,nano,micro}[up]time() *