diff --git a/sys/kern/kern_time.c b/sys/kern/kern_time.c index aa7060c7cb7f..8b103c6fe729 100644 --- a/sys/kern/kern_time.c +++ b/sys/kern/kern_time.c @@ -1,1835 +1,1837 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1982, 1986, 1989, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)kern_time.c 8.1 (Berkeley) 6/10/93 */ #include __FBSDID("$FreeBSD$"); #include "opt_ktrace.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef KTRACE #include #endif #include #include #define MAX_CLOCKS (CLOCK_MONOTONIC+1) #define CPUCLOCK_BIT 0x80000000 #define CPUCLOCK_PROCESS_BIT 0x40000000 #define CPUCLOCK_ID_MASK (~(CPUCLOCK_BIT|CPUCLOCK_PROCESS_BIT)) #define MAKE_THREAD_CPUCLOCK(tid) (CPUCLOCK_BIT|(tid)) #define MAKE_PROCESS_CPUCLOCK(pid) \ (CPUCLOCK_BIT|CPUCLOCK_PROCESS_BIT|(pid)) #define NS_PER_SEC 1000000000 static struct kclock posix_clocks[MAX_CLOCKS]; static uma_zone_t itimer_zone = NULL; /* * Time of day and interval timer support. * * These routines provide the kernel entry points to get and set * the time-of-day and per-process interval timers. Subroutines * here provide support for adding and subtracting timeval structures * and decrementing interval timers, optionally reloading the interval * timers when they expire. */ static int settime(struct thread *, struct timeval *); static void timevalfix(struct timeval *); static int user_clock_nanosleep(struct thread *td, clockid_t clock_id, int flags, const struct timespec *ua_rqtp, struct timespec *ua_rmtp); static void itimer_start(void); static int itimer_init(void *, int, int); static void itimer_fini(void *, int); static void itimer_enter(struct itimer *); static void itimer_leave(struct itimer *); static struct itimer *itimer_find(struct proc *, int); static void itimers_alloc(struct proc *); static int realtimer_create(struct itimer *); static int realtimer_gettime(struct itimer *, struct itimerspec *); static int realtimer_settime(struct itimer *, int, struct itimerspec *, struct itimerspec *); static int realtimer_delete(struct itimer *); static void realtimer_clocktime(clockid_t, struct timespec *); static void realtimer_expire(void *); static void realtimer_expire_l(struct itimer *it, bool proc_locked); +static void realitexpire(void *arg); + static int register_posix_clock(int, const struct kclock *); static void itimer_fire(struct itimer *it); static int itimespecfix(struct timespec *ts); #define CLOCK_CALL(clock, call, arglist) \ ((*posix_clocks[clock].call) arglist) SYSINIT(posix_timer, SI_SUB_P1003_1B, SI_ORDER_FIRST+4, itimer_start, NULL); static int settime(struct thread *td, struct timeval *tv) { struct timeval delta, tv1, tv2; static struct timeval maxtime, laststep; struct timespec ts; microtime(&tv1); delta = *tv; timevalsub(&delta, &tv1); /* * If the system is secure, we do not allow the time to be * set to a value earlier than 1 second less than the highest * time we have yet seen. The worst a miscreant can do in * this circumstance is "freeze" time. He couldn't go * back to the past. * * We similarly do not allow the clock to be stepped more * than one second, nor more than once per second. This allows * a miscreant to make the clock march double-time, but no worse. */ if (securelevel_gt(td->td_ucred, 1) != 0) { if (delta.tv_sec < 0 || delta.tv_usec < 0) { /* * Update maxtime to latest time we've seen. */ if (tv1.tv_sec > maxtime.tv_sec) maxtime = tv1; tv2 = *tv; timevalsub(&tv2, &maxtime); if (tv2.tv_sec < -1) { tv->tv_sec = maxtime.tv_sec - 1; printf("Time adjustment clamped to -1 second\n"); } } else { if (tv1.tv_sec == laststep.tv_sec) return (EPERM); if (delta.tv_sec > 1) { tv->tv_sec = tv1.tv_sec + 1; printf("Time adjustment clamped to +1 second\n"); } laststep = *tv; } } ts.tv_sec = tv->tv_sec; ts.tv_nsec = tv->tv_usec * 1000; tc_setclock(&ts); resettodr(); return (0); } #ifndef _SYS_SYSPROTO_H_ struct clock_getcpuclockid2_args { id_t id; int which, clockid_t *clock_id; }; #endif /* ARGSUSED */ int sys_clock_getcpuclockid2(struct thread *td, struct clock_getcpuclockid2_args *uap) { clockid_t clk_id; int error; error = kern_clock_getcpuclockid2(td, uap->id, uap->which, &clk_id); if (error == 0) error = copyout(&clk_id, uap->clock_id, sizeof(clockid_t)); return (error); } int kern_clock_getcpuclockid2(struct thread *td, id_t id, int which, clockid_t *clk_id) { struct proc *p; pid_t pid; lwpid_t tid; int error; switch (which) { case CPUCLOCK_WHICH_PID: if (id != 0) { error = pget(id, PGET_CANSEE | PGET_NOTID, &p); if (error != 0) return (error); PROC_UNLOCK(p); pid = id; } else { pid = td->td_proc->p_pid; } *clk_id = MAKE_PROCESS_CPUCLOCK(pid); return (0); case CPUCLOCK_WHICH_TID: tid = id == 0 ? td->td_tid : id; *clk_id = MAKE_THREAD_CPUCLOCK(tid); return (0); default: return (EINVAL); } } #ifndef _SYS_SYSPROTO_H_ struct clock_gettime_args { clockid_t clock_id; struct timespec *tp; }; #endif /* ARGSUSED */ int sys_clock_gettime(struct thread *td, struct clock_gettime_args *uap) { struct timespec ats; int error; error = kern_clock_gettime(td, uap->clock_id, &ats); if (error == 0) error = copyout(&ats, uap->tp, sizeof(ats)); return (error); } static inline void cputick2timespec(uint64_t runtime, struct timespec *ats) { runtime = cputick2usec(runtime); ats->tv_sec = runtime / 1000000; ats->tv_nsec = runtime % 1000000 * 1000; } void kern_thread_cputime(struct thread *targettd, struct timespec *ats) { uint64_t runtime, curtime, switchtime; if (targettd == NULL) { /* current thread */ spinlock_enter(); switchtime = PCPU_GET(switchtime); curtime = cpu_ticks(); runtime = curthread->td_runtime; spinlock_exit(); runtime += curtime - switchtime; } else { PROC_LOCK_ASSERT(targettd->td_proc, MA_OWNED); thread_lock(targettd); runtime = targettd->td_runtime; thread_unlock(targettd); } cputick2timespec(runtime, ats); } void kern_process_cputime(struct proc *targetp, struct timespec *ats) { uint64_t runtime; struct rusage ru; PROC_LOCK_ASSERT(targetp, MA_OWNED); PROC_STATLOCK(targetp); rufetch(targetp, &ru); runtime = targetp->p_rux.rux_runtime; if (curthread->td_proc == targetp) runtime += cpu_ticks() - PCPU_GET(switchtime); PROC_STATUNLOCK(targetp); cputick2timespec(runtime, ats); } static int get_cputime(struct thread *td, clockid_t clock_id, struct timespec *ats) { struct proc *p, *p2; struct thread *td2; lwpid_t tid; pid_t pid; int error; p = td->td_proc; if ((clock_id & CPUCLOCK_PROCESS_BIT) == 0) { tid = clock_id & CPUCLOCK_ID_MASK; td2 = tdfind(tid, p->p_pid); if (td2 == NULL) return (EINVAL); kern_thread_cputime(td2, ats); PROC_UNLOCK(td2->td_proc); } else { pid = clock_id & CPUCLOCK_ID_MASK; error = pget(pid, PGET_CANSEE, &p2); if (error != 0) return (EINVAL); kern_process_cputime(p2, ats); PROC_UNLOCK(p2); } return (0); } int kern_clock_gettime(struct thread *td, clockid_t clock_id, struct timespec *ats) { struct timeval sys, user; struct proc *p; p = td->td_proc; switch (clock_id) { case CLOCK_REALTIME: /* Default to precise. */ case CLOCK_REALTIME_PRECISE: nanotime(ats); break; case CLOCK_REALTIME_FAST: getnanotime(ats); break; case CLOCK_VIRTUAL: PROC_LOCK(p); PROC_STATLOCK(p); calcru(p, &user, &sys); PROC_STATUNLOCK(p); PROC_UNLOCK(p); TIMEVAL_TO_TIMESPEC(&user, ats); break; case CLOCK_PROF: PROC_LOCK(p); PROC_STATLOCK(p); calcru(p, &user, &sys); PROC_STATUNLOCK(p); PROC_UNLOCK(p); timevaladd(&user, &sys); TIMEVAL_TO_TIMESPEC(&user, ats); break; case CLOCK_MONOTONIC: /* Default to precise. */ case CLOCK_MONOTONIC_PRECISE: case CLOCK_UPTIME: case CLOCK_UPTIME_PRECISE: nanouptime(ats); break; case CLOCK_UPTIME_FAST: case CLOCK_MONOTONIC_FAST: getnanouptime(ats); break; case CLOCK_SECOND: ats->tv_sec = time_second; ats->tv_nsec = 0; break; case CLOCK_THREAD_CPUTIME_ID: kern_thread_cputime(NULL, ats); break; case CLOCK_PROCESS_CPUTIME_ID: PROC_LOCK(p); kern_process_cputime(p, ats); PROC_UNLOCK(p); break; default: if ((int)clock_id >= 0) return (EINVAL); return (get_cputime(td, clock_id, ats)); } return (0); } #ifndef _SYS_SYSPROTO_H_ struct clock_settime_args { clockid_t clock_id; const struct timespec *tp; }; #endif /* ARGSUSED */ int sys_clock_settime(struct thread *td, struct clock_settime_args *uap) { struct timespec ats; int error; if ((error = copyin(uap->tp, &ats, sizeof(ats))) != 0) return (error); return (kern_clock_settime(td, uap->clock_id, &ats)); } static int allow_insane_settime = 0; SYSCTL_INT(_debug, OID_AUTO, allow_insane_settime, CTLFLAG_RWTUN, &allow_insane_settime, 0, "do not perform possibly restrictive checks on settime(2) args"); int kern_clock_settime(struct thread *td, clockid_t clock_id, struct timespec *ats) { struct timeval atv; int error; if ((error = priv_check(td, PRIV_CLOCK_SETTIME)) != 0) return (error); if (clock_id != CLOCK_REALTIME) return (EINVAL); if (!timespecvalid_interval(ats)) return (EINVAL); if (!allow_insane_settime && (ats->tv_sec > 8000ULL * 365 * 24 * 60 * 60 || ats->tv_sec < utc_offset())) return (EINVAL); /* XXX Don't convert nsec->usec and back */ TIMESPEC_TO_TIMEVAL(&atv, ats); error = settime(td, &atv); return (error); } #ifndef _SYS_SYSPROTO_H_ struct clock_getres_args { clockid_t clock_id; struct timespec *tp; }; #endif int sys_clock_getres(struct thread *td, struct clock_getres_args *uap) { struct timespec ts; int error; if (uap->tp == NULL) return (0); error = kern_clock_getres(td, uap->clock_id, &ts); if (error == 0) error = copyout(&ts, uap->tp, sizeof(ts)); return (error); } int kern_clock_getres(struct thread *td, clockid_t clock_id, struct timespec *ts) { ts->tv_sec = 0; switch (clock_id) { case CLOCK_REALTIME: case CLOCK_REALTIME_FAST: case CLOCK_REALTIME_PRECISE: case CLOCK_MONOTONIC: case CLOCK_MONOTONIC_FAST: case CLOCK_MONOTONIC_PRECISE: case CLOCK_UPTIME: case CLOCK_UPTIME_FAST: case CLOCK_UPTIME_PRECISE: /* * Round up the result of the division cheaply by adding 1. * Rounding up is especially important if rounding down * would give 0. Perfect rounding is unimportant. */ ts->tv_nsec = NS_PER_SEC / tc_getfrequency() + 1; break; case CLOCK_VIRTUAL: case CLOCK_PROF: /* Accurately round up here because we can do so cheaply. */ ts->tv_nsec = howmany(NS_PER_SEC, hz); break; case CLOCK_SECOND: ts->tv_sec = 1; ts->tv_nsec = 0; break; case CLOCK_THREAD_CPUTIME_ID: case CLOCK_PROCESS_CPUTIME_ID: cputime: /* sync with cputick2usec */ ts->tv_nsec = 1000000 / cpu_tickrate(); if (ts->tv_nsec == 0) ts->tv_nsec = 1000; break; default: if ((int)clock_id < 0) goto cputime; return (EINVAL); } return (0); } int kern_nanosleep(struct thread *td, struct timespec *rqt, struct timespec *rmt) { return (kern_clock_nanosleep(td, CLOCK_REALTIME, TIMER_RELTIME, rqt, rmt)); } static uint8_t nanowait[MAXCPU]; int kern_clock_nanosleep(struct thread *td, clockid_t clock_id, int flags, const struct timespec *rqt, struct timespec *rmt) { struct timespec ts, now; sbintime_t sbt, sbtt, prec, tmp; time_t over; int error; bool is_abs_real; if (rqt->tv_nsec < 0 || rqt->tv_nsec >= NS_PER_SEC) return (EINVAL); if ((flags & ~TIMER_ABSTIME) != 0) return (EINVAL); switch (clock_id) { case CLOCK_REALTIME: case CLOCK_REALTIME_PRECISE: case CLOCK_REALTIME_FAST: case CLOCK_SECOND: is_abs_real = (flags & TIMER_ABSTIME) != 0; break; case CLOCK_MONOTONIC: case CLOCK_MONOTONIC_PRECISE: case CLOCK_MONOTONIC_FAST: case CLOCK_UPTIME: case CLOCK_UPTIME_PRECISE: case CLOCK_UPTIME_FAST: is_abs_real = false; break; case CLOCK_VIRTUAL: case CLOCK_PROF: case CLOCK_PROCESS_CPUTIME_ID: return (ENOTSUP); case CLOCK_THREAD_CPUTIME_ID: default: return (EINVAL); } do { ts = *rqt; if ((flags & TIMER_ABSTIME) != 0) { if (is_abs_real) td->td_rtcgen = atomic_load_acq_int(&rtc_generation); error = kern_clock_gettime(td, clock_id, &now); KASSERT(error == 0, ("kern_clock_gettime: %d", error)); timespecsub(&ts, &now, &ts); } if (ts.tv_sec < 0 || (ts.tv_sec == 0 && ts.tv_nsec == 0)) { error = EWOULDBLOCK; break; } if (ts.tv_sec > INT32_MAX / 2) { over = ts.tv_sec - INT32_MAX / 2; ts.tv_sec -= over; } else over = 0; tmp = tstosbt(ts); prec = tmp; prec >>= tc_precexp; if (TIMESEL(&sbt, tmp)) sbt += tc_tick_sbt; sbt += tmp; error = tsleep_sbt(&nanowait[curcpu], PWAIT | PCATCH, "nanslp", sbt, prec, C_ABSOLUTE); } while (error == 0 && is_abs_real && td->td_rtcgen == 0); td->td_rtcgen = 0; if (error != EWOULDBLOCK) { if (TIMESEL(&sbtt, tmp)) sbtt += tc_tick_sbt; if (sbtt >= sbt) return (0); if (error == ERESTART) error = EINTR; if ((flags & TIMER_ABSTIME) == 0 && rmt != NULL) { ts = sbttots(sbt - sbtt); ts.tv_sec += over; if (ts.tv_sec < 0) timespecclear(&ts); *rmt = ts; } return (error); } return (0); } #ifndef _SYS_SYSPROTO_H_ struct nanosleep_args { struct timespec *rqtp; struct timespec *rmtp; }; #endif /* ARGSUSED */ int sys_nanosleep(struct thread *td, struct nanosleep_args *uap) { return (user_clock_nanosleep(td, CLOCK_REALTIME, TIMER_RELTIME, uap->rqtp, uap->rmtp)); } #ifndef _SYS_SYSPROTO_H_ struct clock_nanosleep_args { clockid_t clock_id; int flags; struct timespec *rqtp; struct timespec *rmtp; }; #endif /* ARGSUSED */ int sys_clock_nanosleep(struct thread *td, struct clock_nanosleep_args *uap) { int error; error = user_clock_nanosleep(td, uap->clock_id, uap->flags, uap->rqtp, uap->rmtp); return (kern_posix_error(td, error)); } static int user_clock_nanosleep(struct thread *td, clockid_t clock_id, int flags, const struct timespec *ua_rqtp, struct timespec *ua_rmtp) { struct timespec rmt, rqt; int error, error2; error = copyin(ua_rqtp, &rqt, sizeof(rqt)); if (error) return (error); error = kern_clock_nanosleep(td, clock_id, flags, &rqt, &rmt); if (error == EINTR && ua_rmtp != NULL && (flags & TIMER_ABSTIME) == 0) { error2 = copyout(&rmt, ua_rmtp, sizeof(rmt)); if (error2 != 0) error = error2; } return (error); } #ifndef _SYS_SYSPROTO_H_ struct gettimeofday_args { struct timeval *tp; struct timezone *tzp; }; #endif /* ARGSUSED */ int sys_gettimeofday(struct thread *td, struct gettimeofday_args *uap) { struct timeval atv; struct timezone rtz; int error = 0; if (uap->tp) { microtime(&atv); error = copyout(&atv, uap->tp, sizeof (atv)); } if (error == 0 && uap->tzp != NULL) { rtz.tz_minuteswest = 0; rtz.tz_dsttime = 0; error = copyout(&rtz, uap->tzp, sizeof (rtz)); } return (error); } #ifndef _SYS_SYSPROTO_H_ struct settimeofday_args { struct timeval *tv; struct timezone *tzp; }; #endif /* ARGSUSED */ int sys_settimeofday(struct thread *td, struct settimeofday_args *uap) { struct timeval atv, *tvp; struct timezone atz, *tzp; int error; if (uap->tv) { error = copyin(uap->tv, &atv, sizeof(atv)); if (error) return (error); tvp = &atv; } else tvp = NULL; if (uap->tzp) { error = copyin(uap->tzp, &atz, sizeof(atz)); if (error) return (error); tzp = &atz; } else tzp = NULL; return (kern_settimeofday(td, tvp, tzp)); } int kern_settimeofday(struct thread *td, struct timeval *tv, struct timezone *tzp) { int error; error = priv_check(td, PRIV_SETTIMEOFDAY); if (error) return (error); /* Verify all parameters before changing time. */ if (tv) { if (tv->tv_usec < 0 || tv->tv_usec >= 1000000 || tv->tv_sec < 0) return (EINVAL); error = settime(td, tv); } return (error); } /* * Get value of an interval timer. The process virtual and profiling virtual * time timers are kept in the p_stats area, since they can be swapped out. * These are kept internally in the way they are specified externally: in * time until they expire. * * The real time interval timer is kept in the process table slot for the * process, and its value (it_value) is kept as an absolute time rather than * as a delta, so that it is easy to keep periodic real-time signals from * drifting. * * Virtual time timers are processed in the hardclock() routine of * kern_clock.c. The real time timer is processed by a timeout routine, * called from the softclock() routine. Since a callout may be delayed in * real time due to interrupt processing in the system, it is possible for * the real time timeout routine (realitexpire, given below), to be delayed * in real time past when it is supposed to occur. It does not suffice, * therefore, to reload the real timer .it_value from the real time timers * .it_interval. Rather, we compute the next time in absolute time the timer * should go off. */ #ifndef _SYS_SYSPROTO_H_ struct getitimer_args { u_int which; struct itimerval *itv; }; #endif int sys_getitimer(struct thread *td, struct getitimer_args *uap) { struct itimerval aitv; int error; error = kern_getitimer(td, uap->which, &aitv); if (error != 0) return (error); return (copyout(&aitv, uap->itv, sizeof (struct itimerval))); } int kern_getitimer(struct thread *td, u_int which, struct itimerval *aitv) { struct proc *p = td->td_proc; struct timeval ctv; if (which > ITIMER_PROF) return (EINVAL); if (which == ITIMER_REAL) { /* * Convert from absolute to relative time in .it_value * part of real time timer. If time for real time timer * has passed return 0, else return difference between * current time and time for the timer to go off. */ PROC_LOCK(p); *aitv = p->p_realtimer; PROC_UNLOCK(p); if (timevalisset(&aitv->it_value)) { microuptime(&ctv); if (timevalcmp(&aitv->it_value, &ctv, <)) timevalclear(&aitv->it_value); else timevalsub(&aitv->it_value, &ctv); } } else { PROC_ITIMLOCK(p); *aitv = p->p_stats->p_timer[which]; PROC_ITIMUNLOCK(p); } #ifdef KTRACE if (KTRPOINT(td, KTR_STRUCT)) ktritimerval(aitv); #endif return (0); } #ifndef _SYS_SYSPROTO_H_ struct setitimer_args { u_int which; struct itimerval *itv, *oitv; }; #endif int sys_setitimer(struct thread *td, struct setitimer_args *uap) { struct itimerval aitv, oitv; int error; if (uap->itv == NULL) { uap->itv = uap->oitv; return (sys_getitimer(td, (struct getitimer_args *)uap)); } if ((error = copyin(uap->itv, &aitv, sizeof(struct itimerval)))) return (error); error = kern_setitimer(td, uap->which, &aitv, &oitv); if (error != 0 || uap->oitv == NULL) return (error); return (copyout(&oitv, uap->oitv, sizeof(struct itimerval))); } int kern_setitimer(struct thread *td, u_int which, struct itimerval *aitv, struct itimerval *oitv) { struct proc *p = td->td_proc; struct timeval ctv; sbintime_t sbt, pr; if (aitv == NULL) return (kern_getitimer(td, which, oitv)); if (which > ITIMER_PROF) return (EINVAL); #ifdef KTRACE if (KTRPOINT(td, KTR_STRUCT)) ktritimerval(aitv); #endif if (itimerfix(&aitv->it_value) || aitv->it_value.tv_sec > INT32_MAX / 2) return (EINVAL); if (!timevalisset(&aitv->it_value)) timevalclear(&aitv->it_interval); else if (itimerfix(&aitv->it_interval) || aitv->it_interval.tv_sec > INT32_MAX / 2) return (EINVAL); if (which == ITIMER_REAL) { PROC_LOCK(p); if (timevalisset(&p->p_realtimer.it_value)) callout_stop(&p->p_itcallout); microuptime(&ctv); if (timevalisset(&aitv->it_value)) { pr = tvtosbt(aitv->it_value) >> tc_precexp; timevaladd(&aitv->it_value, &ctv); sbt = tvtosbt(aitv->it_value); callout_reset_sbt(&p->p_itcallout, sbt, pr, realitexpire, p, C_ABSOLUTE); } *oitv = p->p_realtimer; p->p_realtimer = *aitv; PROC_UNLOCK(p); if (timevalisset(&oitv->it_value)) { if (timevalcmp(&oitv->it_value, &ctv, <)) timevalclear(&oitv->it_value); else timevalsub(&oitv->it_value, &ctv); } } else { if (aitv->it_interval.tv_sec == 0 && aitv->it_interval.tv_usec != 0 && aitv->it_interval.tv_usec < tick) aitv->it_interval.tv_usec = tick; if (aitv->it_value.tv_sec == 0 && aitv->it_value.tv_usec != 0 && aitv->it_value.tv_usec < tick) aitv->it_value.tv_usec = tick; PROC_ITIMLOCK(p); *oitv = p->p_stats->p_timer[which]; p->p_stats->p_timer[which] = *aitv; PROC_ITIMUNLOCK(p); } #ifdef KTRACE if (KTRPOINT(td, KTR_STRUCT)) ktritimerval(oitv); #endif return (0); } static void realitexpire_reset_callout(struct proc *p, sbintime_t *isbtp) { sbintime_t prec; prec = isbtp == NULL ? tvtosbt(p->p_realtimer.it_interval) : *isbtp; callout_reset_sbt(&p->p_itcallout, tvtosbt(p->p_realtimer.it_value), prec >> tc_precexp, realitexpire, p, C_ABSOLUTE); } void itimer_proc_continue(struct proc *p) { struct timeval ctv; struct itimer *it; int id; PROC_LOCK_ASSERT(p, MA_OWNED); if ((p->p_flag2 & P2_ITSTOPPED) != 0) { p->p_flag2 &= ~P2_ITSTOPPED; microuptime(&ctv); if (timevalcmp(&p->p_realtimer.it_value, &ctv, >=)) realitexpire(p); else realitexpire_reset_callout(p, NULL); } if (p->p_itimers != NULL) { for (id = 3; id < TIMER_MAX; id++) { it = p->p_itimers->its_timers[id]; if (it == NULL) continue; if ((it->it_flags & ITF_PSTOPPED) != 0) { ITIMER_LOCK(it); if ((it->it_flags & ITF_PSTOPPED) != 0) { it->it_flags &= ~ITF_PSTOPPED; if ((it->it_flags & ITF_DELETING) == 0) realtimer_expire_l(it, true); } ITIMER_UNLOCK(it); } } } } /* * Real interval timer expired: * send process whose timer expired an alarm signal. * If time is not set up to reload, then just return. * Else compute next time timer should go off which is > current time. * This is where delay in processing this timeout causes multiple * SIGALRM calls to be compressed into one. * tvtohz() always adds 1 to allow for the time until the next clock * interrupt being strictly less than 1 clock tick, but we don't want * that here since we want to appear to be in sync with the clock * interrupt even when we're delayed. */ -void +static void realitexpire(void *arg) { struct proc *p; struct timeval ctv; sbintime_t isbt; p = (struct proc *)arg; kern_psignal(p, SIGALRM); if (!timevalisset(&p->p_realtimer.it_interval)) { timevalclear(&p->p_realtimer.it_value); return; } isbt = tvtosbt(p->p_realtimer.it_interval); if (isbt >= sbt_timethreshold) getmicrouptime(&ctv); else microuptime(&ctv); do { timevaladd(&p->p_realtimer.it_value, &p->p_realtimer.it_interval); } while (timevalcmp(&p->p_realtimer.it_value, &ctv, <=)); if (P_SHOULDSTOP(p) || P_KILLED(p)) { p->p_flag2 |= P2_ITSTOPPED; return; } p->p_flag2 &= ~P2_ITSTOPPED; realitexpire_reset_callout(p, &isbt); } /* * Check that a proposed value to load into the .it_value or * .it_interval part of an interval timer is acceptable, and * fix it to have at least minimal value (i.e. if it is less * than the resolution of the clock, round it up.) */ int itimerfix(struct timeval *tv) { if (tv->tv_sec < 0 || tv->tv_usec < 0 || tv->tv_usec >= 1000000) return (EINVAL); if (tv->tv_sec == 0 && tv->tv_usec != 0 && tv->tv_usec < (u_int)tick / 16) tv->tv_usec = (u_int)tick / 16; return (0); } /* * Decrement an interval timer by a specified number * of microseconds, which must be less than a second, * i.e. < 1000000. If the timer expires, then reload * it. In this case, carry over (usec - old value) to * reduce the value reloaded into the timer so that * the timer does not drift. This routine assumes * that it is called in a context where the timers * on which it is operating cannot change in value. */ int itimerdecr(struct itimerval *itp, int usec) { if (itp->it_value.tv_usec < usec) { if (itp->it_value.tv_sec == 0) { /* expired, and already in next interval */ usec -= itp->it_value.tv_usec; goto expire; } itp->it_value.tv_usec += 1000000; itp->it_value.tv_sec--; } itp->it_value.tv_usec -= usec; usec = 0; if (timevalisset(&itp->it_value)) return (1); /* expired, exactly at end of interval */ expire: if (timevalisset(&itp->it_interval)) { itp->it_value = itp->it_interval; itp->it_value.tv_usec -= usec; if (itp->it_value.tv_usec < 0) { itp->it_value.tv_usec += 1000000; itp->it_value.tv_sec--; } } else itp->it_value.tv_usec = 0; /* sec is already 0 */ return (0); } /* * Add and subtract routines for timevals. * N.B.: subtract routine doesn't deal with * results which are before the beginning, * it just gets very confused in this case. * Caveat emptor. */ void timevaladd(struct timeval *t1, const struct timeval *t2) { t1->tv_sec += t2->tv_sec; t1->tv_usec += t2->tv_usec; timevalfix(t1); } void timevalsub(struct timeval *t1, const struct timeval *t2) { t1->tv_sec -= t2->tv_sec; t1->tv_usec -= t2->tv_usec; timevalfix(t1); } static void timevalfix(struct timeval *t1) { if (t1->tv_usec < 0) { t1->tv_sec--; t1->tv_usec += 1000000; } if (t1->tv_usec >= 1000000) { t1->tv_sec++; t1->tv_usec -= 1000000; } } /* * ratecheck(): simple time-based rate-limit checking. */ int ratecheck(struct timeval *lasttime, const struct timeval *mininterval) { struct timeval tv, delta; int rv = 0; getmicrouptime(&tv); /* NB: 10ms precision */ delta = tv; timevalsub(&delta, lasttime); /* * check for 0,0 is so that the message will be seen at least once, * even if interval is huge. */ if (timevalcmp(&delta, mininterval, >=) || (lasttime->tv_sec == 0 && lasttime->tv_usec == 0)) { *lasttime = tv; rv = 1; } return (rv); } /* * ppsratecheck(): packets (or events) per second limitation. * * Return 0 if the limit is to be enforced (e.g. the caller * should drop a packet because of the rate limitation). * * maxpps of 0 always causes zero to be returned. maxpps of -1 * always causes 1 to be returned; this effectively defeats rate * limiting. * * Note that we maintain the struct timeval for compatibility * with other bsd systems. We reuse the storage and just monitor * clock ticks for minimal overhead. */ int ppsratecheck(struct timeval *lasttime, int *curpps, int maxpps) { int now; /* * Reset the last time and counter if this is the first call * or more than a second has passed since the last update of * lasttime. */ now = ticks; if (lasttime->tv_sec == 0 || (u_int)(now - lasttime->tv_sec) >= hz) { lasttime->tv_sec = now; *curpps = 1; return (maxpps != 0); } else { (*curpps)++; /* NB: ignore potential overflow */ return (maxpps < 0 || *curpps <= maxpps); } } static void itimer_start(void) { static const struct kclock rt_clock = { .timer_create = realtimer_create, .timer_delete = realtimer_delete, .timer_settime = realtimer_settime, .timer_gettime = realtimer_gettime, }; itimer_zone = uma_zcreate("itimer", sizeof(struct itimer), NULL, NULL, itimer_init, itimer_fini, UMA_ALIGN_PTR, 0); register_posix_clock(CLOCK_REALTIME, &rt_clock); register_posix_clock(CLOCK_MONOTONIC, &rt_clock); p31b_setcfg(CTL_P1003_1B_TIMERS, 200112L); p31b_setcfg(CTL_P1003_1B_DELAYTIMER_MAX, INT_MAX); p31b_setcfg(CTL_P1003_1B_TIMER_MAX, TIMER_MAX); } static int register_posix_clock(int clockid, const struct kclock *clk) { if ((unsigned)clockid >= MAX_CLOCKS) { printf("%s: invalid clockid\n", __func__); return (0); } posix_clocks[clockid] = *clk; return (1); } static int itimer_init(void *mem, int size, int flags) { struct itimer *it; it = (struct itimer *)mem; mtx_init(&it->it_mtx, "itimer lock", NULL, MTX_DEF); return (0); } static void itimer_fini(void *mem, int size) { struct itimer *it; it = (struct itimer *)mem; mtx_destroy(&it->it_mtx); } static void itimer_enter(struct itimer *it) { mtx_assert(&it->it_mtx, MA_OWNED); it->it_usecount++; } static void itimer_leave(struct itimer *it) { mtx_assert(&it->it_mtx, MA_OWNED); KASSERT(it->it_usecount > 0, ("invalid it_usecount")); if (--it->it_usecount == 0 && (it->it_flags & ITF_WANTED) != 0) wakeup(it); } #ifndef _SYS_SYSPROTO_H_ struct ktimer_create_args { clockid_t clock_id; struct sigevent * evp; int * timerid; }; #endif int sys_ktimer_create(struct thread *td, struct ktimer_create_args *uap) { struct sigevent *evp, ev; int id; int error; if (uap->evp == NULL) { evp = NULL; } else { error = copyin(uap->evp, &ev, sizeof(ev)); if (error != 0) return (error); evp = &ev; } error = kern_ktimer_create(td, uap->clock_id, evp, &id, -1); if (error == 0) { error = copyout(&id, uap->timerid, sizeof(int)); if (error != 0) kern_ktimer_delete(td, id); } return (error); } int kern_ktimer_create(struct thread *td, clockid_t clock_id, struct sigevent *evp, int *timerid, int preset_id) { struct proc *p = td->td_proc; struct itimer *it; int id; int error; if (clock_id < 0 || clock_id >= MAX_CLOCKS) return (EINVAL); if (posix_clocks[clock_id].timer_create == NULL) return (EINVAL); if (evp != NULL) { if (evp->sigev_notify != SIGEV_NONE && evp->sigev_notify != SIGEV_SIGNAL && evp->sigev_notify != SIGEV_THREAD_ID) return (EINVAL); if ((evp->sigev_notify == SIGEV_SIGNAL || evp->sigev_notify == SIGEV_THREAD_ID) && !_SIG_VALID(evp->sigev_signo)) return (EINVAL); } if (p->p_itimers == NULL) itimers_alloc(p); it = uma_zalloc(itimer_zone, M_WAITOK); it->it_flags = 0; it->it_usecount = 0; timespecclear(&it->it_time.it_value); timespecclear(&it->it_time.it_interval); it->it_overrun = 0; it->it_overrun_last = 0; it->it_clockid = clock_id; it->it_proc = p; ksiginfo_init(&it->it_ksi); it->it_ksi.ksi_flags |= KSI_INS | KSI_EXT; error = CLOCK_CALL(clock_id, timer_create, (it)); if (error != 0) goto out; PROC_LOCK(p); if (preset_id != -1) { KASSERT(preset_id >= 0 && preset_id < 3, ("invalid preset_id")); id = preset_id; if (p->p_itimers->its_timers[id] != NULL) { PROC_UNLOCK(p); error = 0; goto out; } } else { /* * Find a free timer slot, skipping those reserved * for setitimer(). */ for (id = 3; id < TIMER_MAX; id++) if (p->p_itimers->its_timers[id] == NULL) break; if (id == TIMER_MAX) { PROC_UNLOCK(p); error = EAGAIN; goto out; } } p->p_itimers->its_timers[id] = it; if (evp != NULL) it->it_sigev = *evp; else { it->it_sigev.sigev_notify = SIGEV_SIGNAL; switch (clock_id) { default: case CLOCK_REALTIME: it->it_sigev.sigev_signo = SIGALRM; break; case CLOCK_VIRTUAL: it->it_sigev.sigev_signo = SIGVTALRM; break; case CLOCK_PROF: it->it_sigev.sigev_signo = SIGPROF; break; } it->it_sigev.sigev_value.sival_int = id; } if (it->it_sigev.sigev_notify == SIGEV_SIGNAL || it->it_sigev.sigev_notify == SIGEV_THREAD_ID) { it->it_ksi.ksi_signo = it->it_sigev.sigev_signo; it->it_ksi.ksi_code = SI_TIMER; it->it_ksi.ksi_value = it->it_sigev.sigev_value; it->it_ksi.ksi_timerid = id; } PROC_UNLOCK(p); *timerid = id; return (0); out: ITIMER_LOCK(it); CLOCK_CALL(it->it_clockid, timer_delete, (it)); ITIMER_UNLOCK(it); uma_zfree(itimer_zone, it); return (error); } #ifndef _SYS_SYSPROTO_H_ struct ktimer_delete_args { int timerid; }; #endif int sys_ktimer_delete(struct thread *td, struct ktimer_delete_args *uap) { return (kern_ktimer_delete(td, uap->timerid)); } static struct itimer * itimer_find(struct proc *p, int timerid) { struct itimer *it; PROC_LOCK_ASSERT(p, MA_OWNED); if ((p->p_itimers == NULL) || (timerid < 0) || (timerid >= TIMER_MAX) || (it = p->p_itimers->its_timers[timerid]) == NULL) { return (NULL); } ITIMER_LOCK(it); if ((it->it_flags & ITF_DELETING) != 0) { ITIMER_UNLOCK(it); it = NULL; } return (it); } int kern_ktimer_delete(struct thread *td, int timerid) { struct proc *p = td->td_proc; struct itimer *it; PROC_LOCK(p); it = itimer_find(p, timerid); if (it == NULL) { PROC_UNLOCK(p); return (EINVAL); } PROC_UNLOCK(p); it->it_flags |= ITF_DELETING; while (it->it_usecount > 0) { it->it_flags |= ITF_WANTED; msleep(it, &it->it_mtx, PPAUSE, "itimer", 0); } it->it_flags &= ~ITF_WANTED; CLOCK_CALL(it->it_clockid, timer_delete, (it)); ITIMER_UNLOCK(it); PROC_LOCK(p); if (KSI_ONQ(&it->it_ksi)) sigqueue_take(&it->it_ksi); p->p_itimers->its_timers[timerid] = NULL; PROC_UNLOCK(p); uma_zfree(itimer_zone, it); return (0); } #ifndef _SYS_SYSPROTO_H_ struct ktimer_settime_args { int timerid; int flags; const struct itimerspec * value; struct itimerspec * ovalue; }; #endif int sys_ktimer_settime(struct thread *td, struct ktimer_settime_args *uap) { struct itimerspec val, oval, *ovalp; int error; error = copyin(uap->value, &val, sizeof(val)); if (error != 0) return (error); ovalp = uap->ovalue != NULL ? &oval : NULL; error = kern_ktimer_settime(td, uap->timerid, uap->flags, &val, ovalp); if (error == 0 && uap->ovalue != NULL) error = copyout(ovalp, uap->ovalue, sizeof(*ovalp)); return (error); } int kern_ktimer_settime(struct thread *td, int timer_id, int flags, struct itimerspec *val, struct itimerspec *oval) { struct proc *p; struct itimer *it; int error; p = td->td_proc; PROC_LOCK(p); if (timer_id < 3 || (it = itimer_find(p, timer_id)) == NULL) { PROC_UNLOCK(p); error = EINVAL; } else { PROC_UNLOCK(p); itimer_enter(it); error = CLOCK_CALL(it->it_clockid, timer_settime, (it, flags, val, oval)); itimer_leave(it); ITIMER_UNLOCK(it); } return (error); } #ifndef _SYS_SYSPROTO_H_ struct ktimer_gettime_args { int timerid; struct itimerspec * value; }; #endif int sys_ktimer_gettime(struct thread *td, struct ktimer_gettime_args *uap) { struct itimerspec val; int error; error = kern_ktimer_gettime(td, uap->timerid, &val); if (error == 0) error = copyout(&val, uap->value, sizeof(val)); return (error); } int kern_ktimer_gettime(struct thread *td, int timer_id, struct itimerspec *val) { struct proc *p; struct itimer *it; int error; p = td->td_proc; PROC_LOCK(p); if (timer_id < 3 || (it = itimer_find(p, timer_id)) == NULL) { PROC_UNLOCK(p); error = EINVAL; } else { PROC_UNLOCK(p); itimer_enter(it); error = CLOCK_CALL(it->it_clockid, timer_gettime, (it, val)); itimer_leave(it); ITIMER_UNLOCK(it); } return (error); } #ifndef _SYS_SYSPROTO_H_ struct timer_getoverrun_args { int timerid; }; #endif int sys_ktimer_getoverrun(struct thread *td, struct ktimer_getoverrun_args *uap) { return (kern_ktimer_getoverrun(td, uap->timerid)); } int kern_ktimer_getoverrun(struct thread *td, int timer_id) { struct proc *p = td->td_proc; struct itimer *it; int error ; PROC_LOCK(p); if (timer_id < 3 || (it = itimer_find(p, timer_id)) == NULL) { PROC_UNLOCK(p); error = EINVAL; } else { td->td_retval[0] = it->it_overrun_last; ITIMER_UNLOCK(it); PROC_UNLOCK(p); error = 0; } return (error); } static int realtimer_create(struct itimer *it) { callout_init_mtx(&it->it_callout, &it->it_mtx, 0); return (0); } static int realtimer_delete(struct itimer *it) { mtx_assert(&it->it_mtx, MA_OWNED); /* * clear timer's value and interval to tell realtimer_expire * to not rearm the timer. */ timespecclear(&it->it_time.it_value); timespecclear(&it->it_time.it_interval); ITIMER_UNLOCK(it); callout_drain(&it->it_callout); ITIMER_LOCK(it); return (0); } static int realtimer_gettime(struct itimer *it, struct itimerspec *ovalue) { struct timespec cts; mtx_assert(&it->it_mtx, MA_OWNED); realtimer_clocktime(it->it_clockid, &cts); *ovalue = it->it_time; if (ovalue->it_value.tv_sec != 0 || ovalue->it_value.tv_nsec != 0) { timespecsub(&ovalue->it_value, &cts, &ovalue->it_value); if (ovalue->it_value.tv_sec < 0 || (ovalue->it_value.tv_sec == 0 && ovalue->it_value.tv_nsec == 0)) { ovalue->it_value.tv_sec = 0; ovalue->it_value.tv_nsec = 1; } } return (0); } static int realtimer_settime(struct itimer *it, int flags, struct itimerspec *value, struct itimerspec *ovalue) { struct timespec cts, ts; struct timeval tv; struct itimerspec val; mtx_assert(&it->it_mtx, MA_OWNED); val = *value; if (itimespecfix(&val.it_value)) return (EINVAL); if (timespecisset(&val.it_value)) { if (itimespecfix(&val.it_interval)) return (EINVAL); } else { timespecclear(&val.it_interval); } if (ovalue != NULL) realtimer_gettime(it, ovalue); it->it_time = val; if (timespecisset(&val.it_value)) { realtimer_clocktime(it->it_clockid, &cts); ts = val.it_value; if ((flags & TIMER_ABSTIME) == 0) { /* Convert to absolute time. */ timespecadd(&it->it_time.it_value, &cts, &it->it_time.it_value); } else { timespecsub(&ts, &cts, &ts); /* * We don't care if ts is negative, tztohz will * fix it. */ } TIMESPEC_TO_TIMEVAL(&tv, &ts); callout_reset(&it->it_callout, tvtohz(&tv), realtimer_expire, it); } else { callout_stop(&it->it_callout); } return (0); } static void realtimer_clocktime(clockid_t id, struct timespec *ts) { if (id == CLOCK_REALTIME) getnanotime(ts); else /* CLOCK_MONOTONIC */ getnanouptime(ts); } int itimer_accept(struct proc *p, int timerid, ksiginfo_t *ksi) { struct itimer *it; PROC_LOCK_ASSERT(p, MA_OWNED); it = itimer_find(p, timerid); if (it != NULL) { ksi->ksi_overrun = it->it_overrun; it->it_overrun_last = it->it_overrun; it->it_overrun = 0; ITIMER_UNLOCK(it); return (0); } return (EINVAL); } static int itimespecfix(struct timespec *ts) { if (!timespecvalid_interval(ts)) return (EINVAL); if ((UINT64_MAX - ts->tv_nsec) / NS_PER_SEC < ts->tv_sec) return (EINVAL); if (ts->tv_sec == 0 && ts->tv_nsec != 0 && ts->tv_nsec < tick * 1000) ts->tv_nsec = tick * 1000; return (0); } #define timespectons(tsp) \ ((uint64_t)(tsp)->tv_sec * NS_PER_SEC + (tsp)->tv_nsec) #define timespecfromns(ns) (struct timespec){ \ .tv_sec = (ns) / NS_PER_SEC, \ .tv_nsec = (ns) % NS_PER_SEC \ } static void realtimer_expire_l(struct itimer *it, bool proc_locked) { struct timespec cts, ts; struct timeval tv; struct proc *p; uint64_t interval, now, overruns, value; realtimer_clocktime(it->it_clockid, &cts); /* Only fire if time is reached. */ if (timespeccmp(&cts, &it->it_time.it_value, >=)) { if (timespecisset(&it->it_time.it_interval)) { timespecadd(&it->it_time.it_value, &it->it_time.it_interval, &it->it_time.it_value); interval = timespectons(&it->it_time.it_interval); value = timespectons(&it->it_time.it_value); now = timespectons(&cts); if (now >= value) { /* * We missed at least one period. */ overruns = howmany(now - value + 1, interval); if (it->it_overrun + overruns >= it->it_overrun && it->it_overrun + overruns <= INT_MAX) { it->it_overrun += (int)overruns; } else { it->it_overrun = INT_MAX; it->it_ksi.ksi_errno = ERANGE; } value = now + interval - (now - value) % interval; it->it_time.it_value = timespecfromns(value); } } else { /* single shot timer ? */ timespecclear(&it->it_time.it_value); } p = it->it_proc; if (timespecisset(&it->it_time.it_value)) { if (P_SHOULDSTOP(p) || P_KILLED(p)) { it->it_flags |= ITF_PSTOPPED; } else { timespecsub(&it->it_time.it_value, &cts, &ts); TIMESPEC_TO_TIMEVAL(&tv, &ts); callout_reset(&it->it_callout, tvtohz(&tv), realtimer_expire, it); } } itimer_enter(it); ITIMER_UNLOCK(it); if (proc_locked) PROC_UNLOCK(p); itimer_fire(it); if (proc_locked) PROC_LOCK(p); ITIMER_LOCK(it); itimer_leave(it); } else if (timespecisset(&it->it_time.it_value)) { p = it->it_proc; if (P_SHOULDSTOP(p) || P_KILLED(p)) { it->it_flags |= ITF_PSTOPPED; } else { ts = it->it_time.it_value; timespecsub(&ts, &cts, &ts); TIMESPEC_TO_TIMEVAL(&tv, &ts); callout_reset(&it->it_callout, tvtohz(&tv), realtimer_expire, it); } } } /* Timeout callback for realtime timer */ static void realtimer_expire(void *arg) { realtimer_expire_l(arg, false); } static void itimer_fire(struct itimer *it) { struct proc *p = it->it_proc; struct thread *td; if (it->it_sigev.sigev_notify == SIGEV_SIGNAL || it->it_sigev.sigev_notify == SIGEV_THREAD_ID) { if (sigev_findtd(p, &it->it_sigev, &td) != 0) { ITIMER_LOCK(it); timespecclear(&it->it_time.it_value); timespecclear(&it->it_time.it_interval); callout_stop(&it->it_callout); ITIMER_UNLOCK(it); return; } if (!KSI_ONQ(&it->it_ksi)) { it->it_ksi.ksi_errno = 0; ksiginfo_set_sigev(&it->it_ksi, &it->it_sigev); tdsendsignal(p, td, it->it_ksi.ksi_signo, &it->it_ksi); } else { if (it->it_overrun < INT_MAX) it->it_overrun++; else it->it_ksi.ksi_errno = ERANGE; } PROC_UNLOCK(p); } } static void itimers_alloc(struct proc *p) { struct itimers *its; its = malloc(sizeof (struct itimers), M_SUBPROC, M_WAITOK | M_ZERO); PROC_LOCK(p); if (p->p_itimers == NULL) { p->p_itimers = its; PROC_UNLOCK(p); } else { PROC_UNLOCK(p); free(its, M_SUBPROC); } } /* Clean up timers when some process events are being triggered. */ static void itimers_event_exit_exec(int start_idx, struct proc *p) { struct itimers *its; struct itimer *it; int i; its = p->p_itimers; if (its == NULL) return; for (i = start_idx; i < TIMER_MAX; ++i) { if ((it = its->its_timers[i]) != NULL) kern_ktimer_delete(curthread, i); } if (its->its_timers[0] == NULL && its->its_timers[1] == NULL && its->its_timers[2] == NULL) { /* Synchronize with itimer_proc_continue(). */ PROC_LOCK(p); p->p_itimers = NULL; PROC_UNLOCK(p); free(its, M_SUBPROC); } } void itimers_exec(struct proc *p) { /* * According to susv3, XSI interval timers should be inherited * by new image. */ itimers_event_exit_exec(3, p); } void itimers_exit(struct proc *p) { itimers_event_exit_exec(0, p); } diff --git a/sys/sys/systm.h b/sys/sys/systm.h index 0820bdcde676..b2b84d0fc722 100644 --- a/sys/sys/systm.h +++ b/sys/sys/systm.h @@ -1,546 +1,544 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1982, 1988, 1991, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)systm.h 8.7 (Berkeley) 3/29/95 * $FreeBSD$ */ #ifndef _SYS_SYSTM_H_ #define _SYS_SYSTM_H_ #include #include #include #include #include #include #include /* for people using printf mainly */ __NULLABILITY_PRAGMA_PUSH #ifdef _KERNEL extern int cold; /* nonzero if we are doing a cold boot */ extern int suspend_blocked; /* block suspend due to pending shutdown */ extern int rebooting; /* kern_reboot() has been called. */ extern char version[]; /* system version */ extern char compiler_version[]; /* compiler version */ extern char copyright[]; /* system copyright */ extern int kstack_pages; /* number of kernel stack pages */ extern u_long pagesizes[]; /* supported page sizes */ extern long physmem; /* physical memory */ extern long realmem; /* 'real' memory */ extern char *rootdevnames[2]; /* names of possible root devices */ extern int boothowto; /* reboot flags, from console subsystem */ extern int bootverbose; /* nonzero to print verbose messages */ extern int maxusers; /* system tune hint */ extern int ngroups_max; /* max # of supplemental groups */ extern int vm_guest; /* Running as virtual machine guest? */ extern u_long maxphys; /* max raw I/O transfer size */ /* * Detected virtual machine guest types. The intention is to expand * and/or add to the VM_GUEST_VM type if specific VM functionality is * ever implemented (e.g. vendor-specific paravirtualization features). * Keep in sync with vm_guest_sysctl_names[]. */ enum VM_GUEST { VM_GUEST_NO = 0, VM_GUEST_VM, VM_GUEST_XEN, VM_GUEST_HV, VM_GUEST_VMWARE, VM_GUEST_KVM, VM_GUEST_BHYVE, VM_GUEST_VBOX, VM_GUEST_PARALLELS, VM_LAST }; #endif /* KERNEL */ /* * Align variables. */ #define __read_mostly __section(".data.read_mostly") #define __read_frequently __section(".data.read_frequently") #define __exclusive_cache_line __aligned(CACHE_LINE_SIZE) \ __section(".data.exclusive_cache_line") #if defined(_STANDALONE) struct ucred; #endif #ifdef _KERNEL #include /* MAXCPU */ #include /* curthread */ #include extern int osreldate; extern const void *zero_region; /* address space maps to a zeroed page */ extern int unmapped_buf_allowed; #ifdef __LP64__ #define IOSIZE_MAX iosize_max() #define DEVFS_IOSIZE_MAX devfs_iosize_max() #else #define IOSIZE_MAX SSIZE_MAX #define DEVFS_IOSIZE_MAX SSIZE_MAX #endif /* * General function declarations. */ struct inpcb; struct lock_object; struct malloc_type; struct mtx; struct proc; struct socket; struct thread; struct tty; struct ucred; struct uio; struct _jmp_buf; struct trapframe; struct eventtimer; int setjmp(struct _jmp_buf *) __returns_twice; void longjmp(struct _jmp_buf *, int) __dead2; int dumpstatus(vm_offset_t addr, off_t count); int nullop(void); int eopnotsupp(void); int ureadc(int, struct uio *); void hashdestroy(void *, struct malloc_type *, u_long); void *hashinit(int count, struct malloc_type *type, u_long *hashmask); void *hashinit_flags(int count, struct malloc_type *type, u_long *hashmask, int flags); #define HASH_NOWAIT 0x00000001 #define HASH_WAITOK 0x00000002 void *phashinit(int count, struct malloc_type *type, u_long *nentries); void *phashinit_flags(int count, struct malloc_type *type, u_long *nentries, int flags); void g_waitidle(void); void cpu_flush_dcache(void *, size_t); void cpu_rootconf(void); void critical_enter_KBI(void); void critical_exit_KBI(void); void critical_exit_preempt(void); void init_param1(void); void init_param2(long physpages); void init_static_kenv(char *, size_t); void tablefull(const char *); /* * Allocate per-thread "current" state in the linuxkpi */ extern int (*lkpi_alloc_current)(struct thread *, int); int linux_alloc_current_noop(struct thread *, int); #if defined(KLD_MODULE) || defined(KTR_CRITICAL) || !defined(_KERNEL) || defined(GENOFFSET) #define critical_enter() critical_enter_KBI() #define critical_exit() critical_exit_KBI() #else static __inline void critical_enter(void) { struct thread_lite *td; td = (struct thread_lite *)curthread; td->td_critnest++; atomic_interrupt_fence(); } static __inline void critical_exit(void) { struct thread_lite *td; td = (struct thread_lite *)curthread; KASSERT(td->td_critnest != 0, ("critical_exit: td_critnest == 0")); atomic_interrupt_fence(); td->td_critnest--; atomic_interrupt_fence(); if (__predict_false(td->td_owepreempt)) critical_exit_preempt(); } #endif #ifdef EARLY_PRINTF typedef void early_putc_t(int ch); extern early_putc_t *early_putc; #endif int kvprintf(char const *, void (*)(int, void*), void *, int, __va_list) __printflike(1, 0); void log(int, const char *, ...) __printflike(2, 3); void log_console(struct uio *); void vlog(int, const char *, __va_list) __printflike(2, 0); int asprintf(char **ret, struct malloc_type *mtp, const char *format, ...) __printflike(3, 4); int printf(const char *, ...) __printflike(1, 2); int snprintf(char *, size_t, const char *, ...) __printflike(3, 4); int sprintf(char *buf, const char *, ...) __printflike(2, 3); int uprintf(const char *, ...) __printflike(1, 2); int vprintf(const char *, __va_list) __printflike(1, 0); int vasprintf(char **ret, struct malloc_type *mtp, const char *format, __va_list ap) __printflike(3, 0); int vsnprintf(char *, size_t, const char *, __va_list) __printflike(3, 0); int vsnrprintf(char *, size_t, int, const char *, __va_list) __printflike(4, 0); int vsprintf(char *buf, const char *, __va_list) __printflike(2, 0); int sscanf(const char *, char const * _Nonnull, ...) __scanflike(2, 3); int vsscanf(const char * _Nonnull, char const * _Nonnull, __va_list) __scanflike(2, 0); long strtol(const char *, char **, int); u_long strtoul(const char *, char **, int); quad_t strtoq(const char *, char **, int); u_quad_t strtouq(const char *, char **, int); void tprintf(struct proc *p, int pri, const char *, ...) __printflike(3, 4); void vtprintf(struct proc *, int, const char *, __va_list) __printflike(3, 0); void hexdump(const void *ptr, int length, const char *hdr, int flags); #define HD_COLUMN_MASK 0xff #define HD_DELIM_MASK 0xff00 #define HD_OMIT_COUNT (1 << 16) #define HD_OMIT_HEX (1 << 17) #define HD_OMIT_CHARS (1 << 18) #define ovbcopy(f, t, l) bcopy((f), (t), (l)) void bcopy(const void * _Nonnull from, void * _Nonnull to, size_t len); void bzero(void * _Nonnull buf, size_t len); void explicit_bzero(void * _Nonnull, size_t); int bcmp(const void *b1, const void *b2, size_t len); void *memset(void * _Nonnull buf, int c, size_t len); void *memcpy(void * _Nonnull to, const void * _Nonnull from, size_t len); void *memmove(void * _Nonnull dest, const void * _Nonnull src, size_t n); int memcmp(const void *b1, const void *b2, size_t len); #if defined(KASAN) #define SAN_PREFIX kasan_ #elif defined(KCSAN) #define SAN_PREFIX kcsan_ #endif #ifdef SAN_PREFIX #define SAN_INTERCEPTOR(func) __CONCAT(SAN_PREFIX, func) void *SAN_INTERCEPTOR(memset)(void *, int, size_t); void *SAN_INTERCEPTOR(memcpy)(void *, const void *, size_t); void *SAN_INTERCEPTOR(memmove)(void *, const void *, size_t); int SAN_INTERCEPTOR(memcmp)(const void *, const void *, size_t); #ifndef SAN_RUNTIME #define bcopy(from, to, len) SAN_INTERCEPTOR(memmove)((to), (from), (len)) #define bzero(buf, len) SAN_INTERCEPTOR(memset)((buf), 0, (len)) #define bcmp(b1, b2, len) SAN_INTERCEPTOR(memcmp)((b1), (b2), (len)) #define memset(buf, c, len) SAN_INTERCEPTOR(memset)((buf), (c), (len)) #define memcpy(to, from, len) SAN_INTERCEPTOR(memcpy)((to), (from), (len)) #define memmove(dest, src, n) SAN_INTERCEPTOR(memmove)((dest), (src), (n)) #define memcmp(b1, b2, len) SAN_INTERCEPTOR(memcmp)((b1), (b2), (len)) #endif /* !SAN_RUNTIME */ #else #define bcopy(from, to, len) __builtin_memmove((to), (from), (len)) #define bzero(buf, len) __builtin_memset((buf), 0, (len)) #define bcmp(b1, b2, len) __builtin_memcmp((b1), (b2), (len)) #define memset(buf, c, len) __builtin_memset((buf), (c), (len)) #define memcpy(to, from, len) __builtin_memcpy((to), (from), (len)) #define memmove(dest, src, n) __builtin_memmove((dest), (src), (n)) #define memcmp(b1, b2, len) __builtin_memcmp((b1), (b2), (len)) #endif /* !SAN_PREFIX */ void *memset_early(void * _Nonnull buf, int c, size_t len); #define bzero_early(buf, len) memset_early((buf), 0, (len)) void *memcpy_early(void * _Nonnull to, const void * _Nonnull from, size_t len); void *memmove_early(void * _Nonnull dest, const void * _Nonnull src, size_t n); #define bcopy_early(from, to, len) memmove_early((to), (from), (len)) #define copystr(src, dst, len, outlen) ({ \ size_t __r, __len, *__outlen; \ \ __len = (len); \ __outlen = (outlen); \ __r = strlcpy((dst), (src), __len); \ if (__outlen != NULL) \ *__outlen = ((__r >= __len) ? __len : __r + 1); \ ((__r >= __len) ? ENAMETOOLONG : 0); \ }) int copyinstr(const void * __restrict udaddr, void * _Nonnull __restrict kaddr, size_t len, size_t * __restrict lencopied); int copyin(const void * __restrict udaddr, void * _Nonnull __restrict kaddr, size_t len); int copyin_nofault(const void * __restrict udaddr, void * _Nonnull __restrict kaddr, size_t len); int copyout(const void * _Nonnull __restrict kaddr, void * __restrict udaddr, size_t len); int copyout_nofault(const void * _Nonnull __restrict kaddr, void * __restrict udaddr, size_t len); #ifdef SAN_PREFIX int SAN_INTERCEPTOR(copyin)(const void *, void *, size_t); int SAN_INTERCEPTOR(copyinstr)(const void *, void *, size_t, size_t *); int SAN_INTERCEPTOR(copyout)(const void *, void *, size_t); #ifndef SAN_RUNTIME #define copyin(u, k, l) SAN_INTERCEPTOR(copyin)((u), (k), (l)) #define copyinstr(u, k, l, lc) SAN_INTERCEPTOR(copyinstr)((u), (k), (l), (lc)) #define copyout(k, u, l) SAN_INTERCEPTOR(copyout)((k), (u), (l)) #endif /* !SAN_RUNTIME */ #endif /* SAN_PREFIX */ int fubyte(volatile const void *base); long fuword(volatile const void *base); int fuword16(volatile const void *base); int32_t fuword32(volatile const void *base); int64_t fuword64(volatile const void *base); int fueword(volatile const void *base, long *val); int fueword32(volatile const void *base, int32_t *val); int fueword64(volatile const void *base, int64_t *val); int subyte(volatile void *base, int byte); int suword(volatile void *base, long word); int suword16(volatile void *base, int word); int suword32(volatile void *base, int32_t word); int suword64(volatile void *base, int64_t word); uint32_t casuword32(volatile uint32_t *base, uint32_t oldval, uint32_t newval); u_long casuword(volatile u_long *p, u_long oldval, u_long newval); int casueword32(volatile uint32_t *base, uint32_t oldval, uint32_t *oldvalp, uint32_t newval); int casueword(volatile u_long *p, u_long oldval, u_long *oldvalp, u_long newval); -void realitexpire(void *); - int sysbeep(int hertz, sbintime_t duration); void hardclock(int cnt, int usermode); void hardclock_sync(int cpu); void softclock(void *); void statclock(int cnt, int usermode); void profclock(int cnt, int usermode, uintfptr_t pc); int hardclockintr(void); void startprofclock(struct proc *); void stopprofclock(struct proc *); void cpu_startprofclock(void); void cpu_stopprofclock(void); void suspendclock(void); void resumeclock(void); sbintime_t cpu_idleclock(void); void cpu_activeclock(void); void cpu_new_callout(int cpu, sbintime_t bt, sbintime_t bt_opt); void cpu_et_frequency(struct eventtimer *et, uint64_t newfreq); extern int cpu_disable_c2_sleep; extern int cpu_disable_c3_sleep; char *kern_getenv(const char *name); void freeenv(char *env); int getenv_int(const char *name, int *data); int getenv_uint(const char *name, unsigned int *data); int getenv_long(const char *name, long *data); int getenv_ulong(const char *name, unsigned long *data); int getenv_string(const char *name, char *data, int size); int getenv_int64(const char *name, int64_t *data); int getenv_uint64(const char *name, uint64_t *data); int getenv_quad(const char *name, quad_t *data); int getenv_bool(const char *name, bool *data); bool getenv_is_true(const char *name); bool getenv_is_false(const char *name); int kern_setenv(const char *name, const char *value); int kern_unsetenv(const char *name); int testenv(const char *name); int getenv_array(const char *name, void *data, int size, int *psize, int type_size, bool allow_signed); #define GETENV_UNSIGNED false /* negative numbers not allowed */ #define GETENV_SIGNED true /* negative numbers allowed */ typedef uint64_t (cpu_tick_f)(void); void set_cputicker(cpu_tick_f *func, uint64_t freq, bool isvariable); extern cpu_tick_f *cpu_ticks; uint64_t cpu_tickrate(void); uint64_t cputick2usec(uint64_t tick); #include /* Initialize the world */ void consinit(void); void cpu_initclocks(void); void cpu_initclocks_bsp(void); void cpu_initclocks_ap(void); void usrinfoinit(void); /* Finalize the world */ void kern_reboot(int) __dead2; void shutdown_nice(int); /* Stubs for obsolete functions that used to be for interrupt management */ static __inline intrmask_t splhigh(void) { return 0; } static __inline intrmask_t splimp(void) { return 0; } static __inline intrmask_t splnet(void) { return 0; } static __inline intrmask_t spltty(void) { return 0; } static __inline void splx(intrmask_t ipl __unused) { return; } /* * Common `proc' functions are declared here so that proc.h can be included * less often. */ int _sleep(const void * _Nonnull chan, struct lock_object *lock, int pri, const char *wmesg, sbintime_t sbt, sbintime_t pr, int flags); #define msleep(chan, mtx, pri, wmesg, timo) \ _sleep((chan), &(mtx)->lock_object, (pri), (wmesg), \ tick_sbt * (timo), 0, C_HARDCLOCK) #define msleep_sbt(chan, mtx, pri, wmesg, bt, pr, flags) \ _sleep((chan), &(mtx)->lock_object, (pri), (wmesg), (bt), (pr), \ (flags)) int msleep_spin_sbt(const void * _Nonnull chan, struct mtx *mtx, const char *wmesg, sbintime_t sbt, sbintime_t pr, int flags); #define msleep_spin(chan, mtx, wmesg, timo) \ msleep_spin_sbt((chan), (mtx), (wmesg), tick_sbt * (timo), \ 0, C_HARDCLOCK) int pause_sbt(const char *wmesg, sbintime_t sbt, sbintime_t pr, int flags); #define pause(wmesg, timo) \ pause_sbt((wmesg), tick_sbt * (timo), 0, C_HARDCLOCK) #define pause_sig(wmesg, timo) \ pause_sbt((wmesg), tick_sbt * (timo), 0, C_HARDCLOCK | C_CATCH) #define tsleep(chan, pri, wmesg, timo) \ _sleep((chan), NULL, (pri), (wmesg), tick_sbt * (timo), \ 0, C_HARDCLOCK) #define tsleep_sbt(chan, pri, wmesg, bt, pr, flags) \ _sleep((chan), NULL, (pri), (wmesg), (bt), (pr), (flags)) void wakeup(const void *chan); void wakeup_one(const void *chan); void wakeup_any(const void *chan); /* * Common `struct cdev *' stuff are declared here to avoid #include poisoning */ struct cdev; dev_t dev2udev(struct cdev *x); const char *devtoname(struct cdev *cdev); #ifdef __LP64__ size_t devfs_iosize_max(void); size_t iosize_max(void); #endif int poll_no_poll(int events); /* XXX: Should be void nanodelay(u_int nsec); */ void DELAY(int usec); /* Root mount holdback API */ struct root_hold_token { int flags; const char *who; TAILQ_ENTRY(root_hold_token) list; }; struct root_hold_token *root_mount_hold(const char *identifier); void root_mount_hold_token(const char *identifier, struct root_hold_token *h); void root_mount_rel(struct root_hold_token *h); int root_mounted(void); /* * Unit number allocation API. (kern/subr_unit.c) */ struct unrhdr; #define UNR_NO_MTX ((void *)(uintptr_t)-1) struct unrhdr *new_unrhdr(int low, int high, struct mtx *mutex); void init_unrhdr(struct unrhdr *uh, int low, int high, struct mtx *mutex); void delete_unrhdr(struct unrhdr *uh); void clear_unrhdr(struct unrhdr *uh); void clean_unrhdr(struct unrhdr *uh); void clean_unrhdrl(struct unrhdr *uh); int alloc_unr(struct unrhdr *uh); int alloc_unr_specific(struct unrhdr *uh, u_int item); int alloc_unrl(struct unrhdr *uh); void free_unr(struct unrhdr *uh, u_int item); #ifndef __LP64__ #define UNR64_LOCKED #endif struct unrhdr64 { uint64_t counter; }; static __inline void new_unrhdr64(struct unrhdr64 *unr64, uint64_t low) { unr64->counter = low; } #ifdef UNR64_LOCKED uint64_t alloc_unr64(struct unrhdr64 *); #else static __inline uint64_t alloc_unr64(struct unrhdr64 *unr64) { return (atomic_fetchadd_64(&unr64->counter, 1)); } #endif void intr_prof_stack_use(struct thread *td, struct trapframe *frame); void counted_warning(unsigned *counter, const char *msg); /* * APIs to manage deprecation and obsolescence. */ void _gone_in(int major, const char *msg); void _gone_in_dev(device_t dev, int major, const char *msg); #ifdef NO_OBSOLETE_CODE #define __gone_ok(m, msg) \ _Static_assert(m < P_OSREL_MAJOR(__FreeBSD_version)), \ "Obsolete code: " msg); #else #define __gone_ok(m, msg) #endif #define gone_in(major, msg) __gone_ok(major, msg) _gone_in(major, msg) #define gone_in_dev(dev, major, msg) __gone_ok(major, msg) _gone_in_dev(dev, major, msg) #if defined(INVARIANTS) || defined(WITNESS) #define __diagused #else #define __diagused __unused #endif #endif /* _KERNEL */ __NULLABILITY_PRAGMA_POP #endif /* !_SYS_SYSTM_H_ */