diff --git a/lib/libsys/nanosleep.2 b/lib/libsys/nanosleep.2 --- a/lib/libsys/nanosleep.2 +++ b/lib/libsys/nanosleep.2 @@ -27,7 +27,7 @@ .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" -.Dd April 3, 2022 +.Dd April 29, 2025 .Dt NANOSLEEP 2 .Os .Sh NAME @@ -87,14 +87,6 @@ is less than or equal to the time value of the specified clock, then .Fn clock_nanosleep returns immediately and the calling thread is not suspended. -.Pp -The suspension time may be longer than requested due to the -scheduling of other activity by the system. -It is also subject to the allowed time interval deviation -specified by the -.Va kern.timecounter.alloweddeviation -.Xr sysctl 8 -variable. An unmasked signal will terminate the sleep early, regardless of the .Dv SA_RESTART value on the interrupting signal. @@ -131,6 +123,32 @@ CLOCK_UPTIME_PRECISE .El .Pp +The suspension time may be longer than requested due to the +scheduling of other activity by the system. +The clocks with the +.Dv _FAST +suffix and the +.Dv CLOCK_SECOND +are subject to the allowed time interval deviation specified by the +.Va kern.timecounter.alloweddeviation +.Xr sysctl 8 +variable. +The clocks with the +.Dv _PRECISE +suffix are always as precise as possible. +The +.Dv CLOCK_MONOTONIC , +.Dv CLOCK_REALTIME +and +.Dv CLOCK_UPTIME +are precise by default. +Setting the +.Va kern.timecounter.nanosleep_precise +.Xr sysctl 8 +to a false value would make those clocks to behave like the +.Dv _FAST +clocks. +.Pp The .Fn nanosleep function behaves like @@ -217,3 +235,19 @@ .Ox 2.1 and .Fx 3.0 . +The +.Fn clock_nanosleep +system call has been available since +.Fx 11.1 . +.Pp +In +.Fx 15.0 +the default behavior of +.Fn clock_nanosleep +with +.Dv CLOCK_MONOTONIC , +.Dv CLOCK_REALTIME , +.Dv CLOCK_UPTIME +clocks and +.Fn nanosleep +has been switched to use precise clock. diff --git a/sys/kern/kern_time.c b/sys/kern/kern_time.c --- a/sys/kern/kern_time.c +++ b/sys/kern/kern_time.c @@ -494,6 +494,10 @@ rmt)); } +static __read_mostly bool nanosleep_precise = true; +SYSCTL_BOOL(_kern_timecounter, OID_AUTO, nanosleep_precise, CTLFLAG_RW, + &nanosleep_precise, 0, "clock_nanosleep() with CLOCK_REALTIME, " + "CLOCK_MONOTONIC, CLOCK_UPTIME and nanosleep(2) use precise clock"); static uint8_t nanowait[MAXCPU]; int @@ -504,7 +508,7 @@ sbintime_t sbt, sbtt, prec, tmp; time_t over; int error; - bool is_abs_real; + bool is_abs_real, precise; if (rqt->tv_nsec < 0 || rqt->tv_nsec >= NS_PER_SEC) return (EINVAL); @@ -512,17 +516,31 @@ return (EINVAL); switch (clock_id) { case CLOCK_REALTIME: + precise = nanosleep_precise; + is_abs_real = (flags & TIMER_ABSTIME) != 0; + break; case CLOCK_REALTIME_PRECISE: + precise = true; + is_abs_real = (flags & TIMER_ABSTIME) != 0; + break; case CLOCK_REALTIME_FAST: case CLOCK_SECOND: + precise = false; is_abs_real = (flags & TIMER_ABSTIME) != 0; break; case CLOCK_MONOTONIC: - case CLOCK_MONOTONIC_PRECISE: - case CLOCK_MONOTONIC_FAST: case CLOCK_UPTIME: + precise = nanosleep_precise; + is_abs_real = false; + break; + case CLOCK_MONOTONIC_PRECISE: case CLOCK_UPTIME_PRECISE: + precise = true; + is_abs_real = false; + break; + case CLOCK_MONOTONIC_FAST: case CLOCK_UPTIME_FAST: + precise = false; is_abs_real = false; break; case CLOCK_VIRTUAL: @@ -553,10 +571,14 @@ } else over = 0; tmp = tstosbt(ts); - prec = tmp; - prec >>= tc_precexp; - if (TIMESEL(&sbt, tmp)) - sbt += tc_tick_sbt; + if (precise) { + prec = 0; + sbt = sbinuptime(); + } else { + prec = tmp >> tc_precexp; + if (TIMESEL(&sbt, tmp)) + sbt += tc_tick_sbt; + } sbt += tmp; error = tsleep_sbt(&nanowait[curcpu], PWAIT | PCATCH, "nanslp", sbt, prec, C_ABSOLUTE);