diff --git a/sys/kern/kern_clock.c b/sys/kern/kern_clock.c index 5f2492c473b8..c4bb648e3f25 100644 --- a/sys/kern/kern_clock.c +++ b/sys/kern/kern_clock.c @@ -1,863 +1,884 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1982, 1986, 1991, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)kern_clock.c 8.5 (Berkeley) 1/21/94 */ #include __FBSDID("$FreeBSD$"); #include "opt_kdb.h" #include "opt_device_polling.h" #include "opt_hwpmc_hooks.h" #include "opt_ntp.h" #include "opt_watchdog.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef GPROF #include #endif #ifdef HWPMC_HOOKS #include PMC_SOFT_DEFINE( , , clock, hard); PMC_SOFT_DEFINE( , , clock, stat); PMC_SOFT_DEFINE_EX( , , clock, prof, \ cpu_startprofclock, cpu_stopprofclock); #endif #ifdef DEVICE_POLLING extern void hardclock_device_poll(void); #endif /* DEVICE_POLLING */ /* Spin-lock protecting profiling statistics. */ static struct mtx time_lock; SDT_PROVIDER_DECLARE(sched); SDT_PROBE_DEFINE2(sched, , , tick, "struct thread *", "struct proc *"); static int sysctl_kern_cp_time(SYSCTL_HANDLER_ARGS) { int error; long cp_time[CPUSTATES]; #ifdef SCTL_MASK32 int i; unsigned int cp_time32[CPUSTATES]; #endif read_cpu_time(cp_time); #ifdef SCTL_MASK32 if (req->flags & SCTL_MASK32) { if (!req->oldptr) return SYSCTL_OUT(req, 0, sizeof(cp_time32)); for (i = 0; i < CPUSTATES; i++) cp_time32[i] = (unsigned int)cp_time[i]; error = SYSCTL_OUT(req, cp_time32, sizeof(cp_time32)); } else #endif { if (!req->oldptr) return SYSCTL_OUT(req, 0, sizeof(cp_time)); error = SYSCTL_OUT(req, cp_time, sizeof(cp_time)); } return error; } SYSCTL_PROC(_kern, OID_AUTO, cp_time, CTLTYPE_LONG|CTLFLAG_RD|CTLFLAG_MPSAFE, 0,0, sysctl_kern_cp_time, "LU", "CPU time statistics"); static long empty[CPUSTATES]; static int sysctl_kern_cp_times(SYSCTL_HANDLER_ARGS) { struct pcpu *pcpu; int error; int c; long *cp_time; #ifdef SCTL_MASK32 unsigned int cp_time32[CPUSTATES]; int i; #endif if (!req->oldptr) { #ifdef SCTL_MASK32 if (req->flags & SCTL_MASK32) return SYSCTL_OUT(req, 0, sizeof(cp_time32) * (mp_maxid + 1)); else #endif return SYSCTL_OUT(req, 0, sizeof(long) * CPUSTATES * (mp_maxid + 1)); } for (error = 0, c = 0; error == 0 && c <= mp_maxid; c++) { if (!CPU_ABSENT(c)) { pcpu = pcpu_find(c); cp_time = pcpu->pc_cp_time; } else { cp_time = empty; } #ifdef SCTL_MASK32 if (req->flags & SCTL_MASK32) { for (i = 0; i < CPUSTATES; i++) cp_time32[i] = (unsigned int)cp_time[i]; error = SYSCTL_OUT(req, cp_time32, sizeof(cp_time32)); } else #endif error = SYSCTL_OUT(req, cp_time, sizeof(long) * CPUSTATES); } return error; } SYSCTL_PROC(_kern, OID_AUTO, cp_times, CTLTYPE_LONG|CTLFLAG_RD|CTLFLAG_MPSAFE, 0,0, sysctl_kern_cp_times, "LU", "per-CPU time statistics"); #ifdef DEADLKRES static const char *blessed[] = { "getblk", "so_snd_sx", "so_rcv_sx", NULL }; static int slptime_threshold = 1800; static int blktime_threshold = 900; static int sleepfreq = 3; static void deadlres_td_on_lock(struct proc *p, struct thread *td, int blkticks) { int tticks; sx_assert(&allproc_lock, SX_LOCKED); PROC_LOCK_ASSERT(p, MA_OWNED); THREAD_LOCK_ASSERT(td, MA_OWNED); /* * The thread should be blocked on a turnstile, simply check * if the turnstile channel is in good state. */ MPASS(td->td_blocked != NULL); tticks = ticks - td->td_blktick; if (tticks > blkticks) /* * Accordingly with provided thresholds, this thread is stuck * for too long on a turnstile. */ panic("%s: possible deadlock detected for %p (%s), " "blocked for %d ticks\n", __func__, td, sched_tdname(td), tticks); } static void deadlres_td_sleep_q(struct proc *p, struct thread *td, int slpticks) { const void *wchan; int i, slptype, tticks; sx_assert(&allproc_lock, SX_LOCKED); PROC_LOCK_ASSERT(p, MA_OWNED); THREAD_LOCK_ASSERT(td, MA_OWNED); /* * Check if the thread is sleeping on a lock, otherwise skip the check. * Drop the thread lock in order to avoid a LOR with the sleepqueue * spinlock. */ wchan = td->td_wchan; tticks = ticks - td->td_slptick; slptype = sleepq_type(wchan); if ((slptype == SLEEPQ_SX || slptype == SLEEPQ_LK) && tticks > slpticks) { /* * Accordingly with provided thresholds, this thread is stuck * for too long on a sleepqueue. * However, being on a sleepqueue, we might still check for the * blessed list. */ for (i = 0; blessed[i] != NULL; i++) if (!strcmp(blessed[i], td->td_wmesg)) return; panic("%s: possible deadlock detected for %p (%s), " "blocked for %d ticks\n", __func__, td, sched_tdname(td), tticks); } } static void deadlkres(void) { struct proc *p; struct thread *td; int blkticks, slpticks, tryl; tryl = 0; for (;;) { blkticks = blktime_threshold * hz; slpticks = slptime_threshold * hz; /* * Avoid to sleep on the sx_lock in order to avoid a * possible priority inversion problem leading to * starvation. * If the lock can't be held after 100 tries, panic. */ if (!sx_try_slock(&allproc_lock)) { if (tryl > 100) panic("%s: possible deadlock detected " "on allproc_lock\n", __func__); tryl++; pause("allproc", sleepfreq * hz); continue; } tryl = 0; FOREACH_PROC_IN_SYSTEM(p) { PROC_LOCK(p); if (p->p_state == PRS_NEW) { PROC_UNLOCK(p); continue; } FOREACH_THREAD_IN_PROC(p, td) { thread_lock(td); if (TD_ON_LOCK(td)) deadlres_td_on_lock(p, td, blkticks); else if (TD_IS_SLEEPING(td)) deadlres_td_sleep_q(p, td, slpticks); thread_unlock(td); } PROC_UNLOCK(p); } sx_sunlock(&allproc_lock); /* Sleep for sleepfreq seconds. */ pause("-", sleepfreq * hz); } } static struct kthread_desc deadlkres_kd = { "deadlkres", deadlkres, (struct thread **)NULL }; SYSINIT(deadlkres, SI_SUB_CLOCKS, SI_ORDER_ANY, kthread_start, &deadlkres_kd); static SYSCTL_NODE(_debug, OID_AUTO, deadlkres, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, "Deadlock resolver"); SYSCTL_INT(_debug_deadlkres, OID_AUTO, slptime_threshold, CTLFLAG_RW, &slptime_threshold, 0, "Number of seconds within is valid to sleep on a sleepqueue"); SYSCTL_INT(_debug_deadlkres, OID_AUTO, blktime_threshold, CTLFLAG_RW, &blktime_threshold, 0, "Number of seconds within is valid to block on a turnstile"); SYSCTL_INT(_debug_deadlkres, OID_AUTO, sleepfreq, CTLFLAG_RW, &sleepfreq, 0, "Number of seconds between any deadlock resolver thread run"); #endif /* DEADLKRES */ void read_cpu_time(long *cp_time) { struct pcpu *pc; int i, j; /* Sum up global cp_time[]. */ bzero(cp_time, sizeof(long) * CPUSTATES); CPU_FOREACH(i) { pc = pcpu_find(i); for (j = 0; j < CPUSTATES; j++) cp_time[j] += pc->pc_cp_time[j]; } } #include static int watchdog_ticks; static int watchdog_enabled; static void watchdog_fire(void); static void watchdog_config(void *, u_int, int *); static void watchdog_attach(void) { EVENTHANDLER_REGISTER(watchdog_list, watchdog_config, NULL, 0); } /* * Clock handling routines. * * This code is written to operate with two timers that run independently of * each other. * * The main timer, running hz times per second, is used to trigger interval * timers, timeouts and rescheduling as needed. * * The second timer handles kernel and user profiling, * and does resource use estimation. If the second timer is programmable, * it is randomized to avoid aliasing between the two clocks. For example, * the randomization prevents an adversary from always giving up the cpu * just before its quantum expires. Otherwise, it would never accumulate * cpu ticks. The mean frequency of the second timer is stathz. * * If no second timer exists, stathz will be zero; in this case we drive * profiling and statistics off the main clock. This WILL NOT be accurate; * do not do it unless absolutely necessary. * * The statistics clock may (or may not) be run at a higher rate while * profiling. This profile clock runs at profhz. We require that profhz * be an integral multiple of stathz. * * If the statistics clock is running fast, it must be divided by the ratio * profhz/stathz for statistics. (For profiling, every tick counts.) * * Time-of-day is maintained using a "timecounter", which may or may * not be related to the hardware generating the above mentioned * interrupts. */ int stathz; int profhz; int profprocs; volatile int ticks; int psratio; DPCPU_DEFINE_STATIC(int, pcputicks); /* Per-CPU version of ticks. */ #ifdef DEVICE_POLLING static int devpoll_run = 0; #endif static void ast_oweupc(struct thread *td, int tda __unused) { if ((td->td_proc->p_flag & P_PROFIL) == 0) return; addupc_task(td, td->td_profil_addr, td->td_profil_ticks); td->td_profil_ticks = 0; td->td_pflags &= ~TDP_OWEUPC; } static void ast_alrm(struct thread *td, int tda __unused) { struct proc *p; p = td->td_proc; PROC_LOCK(p); kern_psignal(p, SIGVTALRM); PROC_UNLOCK(p); } static void ast_prof(struct thread *td, int tda __unused) { struct proc *p; p = td->td_proc; PROC_LOCK(p); kern_psignal(p, SIGPROF); PROC_UNLOCK(p); } /* * Initialize clock frequencies and start both clocks running. */ static void initclocks(void *dummy __unused) { int i; /* * Set divisors to 1 (normal case) and let the machine-specific * code do its bit. */ mtx_init(&time_lock, "time lock", NULL, MTX_DEF); cpu_initclocks(); /* * Compute profhz/stathz, and fix profhz if needed. */ i = stathz ? stathz : hz; if (profhz == 0) profhz = i; psratio = profhz / i; ast_register(TDA_OWEUPC, ASTR_ASTF_REQUIRED, 0, ast_oweupc); ast_register(TDA_ALRM, ASTR_ASTF_REQUIRED, 0, ast_alrm); ast_register(TDA_PROF, ASTR_ASTF_REQUIRED, 0, ast_prof); #ifdef SW_WATCHDOG /* Enable hardclock watchdog now, even if a hardware watchdog exists. */ watchdog_attach(); #else /* Volunteer to run a software watchdog. */ if (wdog_software_attach == NULL) wdog_software_attach = watchdog_attach; #endif } SYSINIT(clocks, SI_SUB_CLOCKS, SI_ORDER_FIRST, initclocks, NULL); static __noinline void hardclock_itimer(struct thread *td, struct pstats *pstats, int cnt, int usermode) { struct proc *p; int ast; ast = 0; p = td->td_proc; if (usermode && timevalisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value)) { PROC_ITIMLOCK(p); if (itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], tick * cnt) == 0) ast |= TDAI(TDA_ALRM); PROC_ITIMUNLOCK(p); } if (timevalisset(&pstats->p_timer[ITIMER_PROF].it_value)) { PROC_ITIMLOCK(p); if (itimerdecr(&pstats->p_timer[ITIMER_PROF], tick * cnt) == 0) ast |= TDAI(TDA_PROF); PROC_ITIMUNLOCK(p); } if (ast != 0) ast_sched_mask(td, ast); } void hardclock(int cnt, int usermode) { struct pstats *pstats; struct thread *td = curthread; struct proc *p = td->td_proc; int *t = DPCPU_PTR(pcputicks); int global, i, newticks; /* * Update per-CPU and possibly global ticks values. */ *t += cnt; global = ticks; do { newticks = *t - global; if (newticks <= 0) { if (newticks < -1) *t = global - 1; newticks = 0; break; } } while (!atomic_fcmpset_int(&ticks, &global, *t)); /* * Run current process's virtual and profile time, as needed. */ pstats = p->p_stats; if (__predict_false( timevalisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value) || timevalisset(&pstats->p_timer[ITIMER_PROF].it_value))) hardclock_itimer(td, pstats, cnt, usermode); #ifdef HWPMC_HOOKS if (PMC_CPU_HAS_SAMPLES(PCPU_GET(cpuid))) PMC_CALL_HOOK_UNLOCKED(curthread, PMC_FN_DO_SAMPLES, NULL); if (td->td_intr_frame != NULL) PMC_SOFT_CALL_TF( , , clock, hard, td->td_intr_frame); #endif /* We are in charge to handle this tick duty. */ if (newticks > 0) { tc_ticktock(newticks); #ifdef DEVICE_POLLING /* Dangerous and no need to call these things concurrently. */ if (atomic_cmpset_acq_int(&devpoll_run, 0, 1)) { /* This is very short and quick. */ hardclock_device_poll(); atomic_store_rel_int(&devpoll_run, 0); } #endif /* DEVICE_POLLING */ if (watchdog_enabled > 0) { i = atomic_fetchadd_int(&watchdog_ticks, -newticks); if (i > 0 && i <= newticks) watchdog_fire(); } intr_event_handle(clk_intr_event, NULL); } if (curcpu == CPU_FIRST()) cpu_tick_calibration(); if (__predict_false(DPCPU_GET(epoch_cb_count))) GROUPTASK_ENQUEUE(DPCPU_PTR(epoch_cb_task)); } void hardclock_sync(int cpu) { int *t; KASSERT(!CPU_ABSENT(cpu), ("Absent CPU %d", cpu)); t = DPCPU_ID_PTR(cpu, pcputicks); *t = ticks; } /* - * Compute number of ticks in the specified amount of time. + * Regular integer scaling formula without loosing precision: + */ +#define TIME_INT_SCALE(value, mul, div) \ + (((value) / (div)) * (mul) + (((value) % (div)) * (mul)) / (div)) + +/* + * Macro for converting seconds and microseconds into actual ticks, + * based on the given hz value: + */ +#define TIME_TO_TICKS(sec, usec, hz) \ + ((sec) * (hz) + TIME_INT_SCALE(usec, hz, 1 << 6) / (1000000 >> 6)) + +#define TIME_ASSERT_VALID_HZ(hz) \ + _Static_assert(TIME_TO_TICKS(INT_MAX / (hz) - 1, 999999, hz) >= 0 && \ + TIME_TO_TICKS(INT_MAX / (hz) - 1, 999999, hz) < INT_MAX, \ + "tvtohz() can overflow the regular integer type") + +/* + * Compile time assert the maximum and minimum values to fit into a + * regular integer when computing TIME_TO_TICKS(): + */ +TIME_ASSERT_VALID_HZ(HZ_MAXIMUM); +TIME_ASSERT_VALID_HZ(HZ_MINIMUM); + +/* + * The forumla is mostly linear, but test some more common values just + * in case: + */ +TIME_ASSERT_VALID_HZ(1024); +TIME_ASSERT_VALID_HZ(1000); +TIME_ASSERT_VALID_HZ(128); +TIME_ASSERT_VALID_HZ(100); + +/* + * Compute number of ticks representing the specified amount of time. + * If the specified time is negative, a value of 1 is returned. This + * function returns a value from 1 up to and including INT_MAX. */ int tvtohz(struct timeval *tv) { - unsigned long ticks; - long sec, usec; + int retval; /* - * If the number of usecs in the whole seconds part of the time - * difference fits in a long, then the total number of usecs will - * fit in an unsigned long. Compute the total and convert it to - * ticks, rounding up and adding 1 to allow for the current tick - * to expire. Rounding also depends on unsigned long arithmetic - * to avoid overflow. - * - * Otherwise, if the number of ticks in the whole seconds part of - * the time difference fits in a long, then convert the parts to - * ticks separately and add, using similar rounding methods and - * overflow avoidance. This method would work in the previous - * case but it is slightly slower and assumes that hz is integral. - * - * Otherwise, round the time difference down to the maximum - * representable value. - * - * If ints have 32 bits, then the maximum value for any timeout in - * 10ms ticks is 248 days. + * The values passed here may come from user-space and these + * checks ensure "tv_usec" is within its allowed range: */ - sec = tv->tv_sec; - usec = tv->tv_usec; - if (usec < 0) { - sec--; - usec += 1000000; - } - if (sec < 0) { -#ifdef DIAGNOSTIC - if (usec > 0) { - sec++; - usec -= 1000000; + + /* check for tv_usec underflow */ + if (__predict_false(tv->tv_usec < 0)) { + tv->tv_sec += tv->tv_usec / 1000000; + tv->tv_usec = tv->tv_usec % 1000000; + /* convert tv_usec to a positive value */ + if (__predict_true(tv->tv_usec < 0)) { + tv->tv_usec += 1000000; + tv->tv_sec -= 1; } - printf("tvotohz: negative time difference %ld sec %ld usec\n", - sec, usec); -#endif - ticks = 1; - } else if (sec <= LONG_MAX / 1000000) - ticks = howmany(sec * 1000000 + (unsigned long)usec, tick) + 1; - else if (sec <= LONG_MAX / hz) - ticks = sec * hz - + howmany((unsigned long)usec, tick) + 1; - else - ticks = LONG_MAX; - if (ticks > INT_MAX) - ticks = INT_MAX; - return ((int)ticks); + /* check for tv_usec overflow */ + } else if (__predict_false(tv->tv_usec >= 1000000)) { + tv->tv_sec += tv->tv_usec / 1000000; + tv->tv_usec = tv->tv_usec % 1000000; + } + + /* check for tv_sec underflow */ + if (__predict_false(tv->tv_sec < 0)) + return (1); + /* check for tv_sec overflow (including room for the tv_usec part) */ + else if (__predict_false(tv->tv_sec >= tick_seconds_max)) + return (INT_MAX); + + /* cast to "int" to avoid platform differences */ + retval = TIME_TO_TICKS((int)tv->tv_sec, (int)tv->tv_usec, hz); + + /* add one additional tick */ + return (retval + 1); } /* * Start profiling on a process. * * Kernel profiling passes proc0 which never exits and hence * keeps the profile clock running constantly. */ void startprofclock(struct proc *p) { PROC_LOCK_ASSERT(p, MA_OWNED); if (p->p_flag & P_STOPPROF) return; if ((p->p_flag & P_PROFIL) == 0) { p->p_flag |= P_PROFIL; mtx_lock(&time_lock); if (++profprocs == 1) cpu_startprofclock(); mtx_unlock(&time_lock); } } /* * Stop profiling on a process. */ void stopprofclock(struct proc *p) { PROC_LOCK_ASSERT(p, MA_OWNED); if (p->p_flag & P_PROFIL) { if (p->p_profthreads != 0) { while (p->p_profthreads != 0) { p->p_flag |= P_STOPPROF; msleep(&p->p_profthreads, &p->p_mtx, PPAUSE, "stopprof", 0); } } if ((p->p_flag & P_PROFIL) == 0) return; p->p_flag &= ~P_PROFIL; mtx_lock(&time_lock); if (--profprocs == 0) cpu_stopprofclock(); mtx_unlock(&time_lock); } } /* * Statistics clock. Updates rusage information and calls the scheduler * to adjust priorities of the active thread. * * This should be called by all active processors. */ void statclock(int cnt, int usermode) { struct rusage *ru; struct vmspace *vm; struct thread *td; struct proc *p; long rss; long *cp_time; uint64_t runtime, new_switchtime; td = curthread; p = td->td_proc; cp_time = (long *)PCPU_PTR(cp_time); if (usermode) { /* * Charge the time as appropriate. */ td->td_uticks += cnt; if (p->p_nice > NZERO) cp_time[CP_NICE] += cnt; else cp_time[CP_USER] += cnt; } else { /* * Came from kernel mode, so we were: * - handling an interrupt, * - doing syscall or trap work on behalf of the current * user process, or * - spinning in the idle loop. * Whichever it is, charge the time as appropriate. * Note that we charge interrupts to the current process, * regardless of whether they are ``for'' that process, * so that we know how much of its real time was spent * in ``non-process'' (i.e., interrupt) work. */ if ((td->td_pflags & TDP_ITHREAD) || td->td_intr_nesting_level >= 2) { td->td_iticks += cnt; cp_time[CP_INTR] += cnt; } else { td->td_pticks += cnt; td->td_sticks += cnt; if (!TD_IS_IDLETHREAD(td)) cp_time[CP_SYS] += cnt; else cp_time[CP_IDLE] += cnt; } } /* Update resource usage integrals and maximums. */ MPASS(p->p_vmspace != NULL); vm = p->p_vmspace; ru = &td->td_ru; ru->ru_ixrss += pgtok(vm->vm_tsize) * cnt; ru->ru_idrss += pgtok(vm->vm_dsize) * cnt; ru->ru_isrss += pgtok(vm->vm_ssize) * cnt; rss = pgtok(vmspace_resident_count(vm)); if (ru->ru_maxrss < rss) ru->ru_maxrss = rss; KTR_POINT2(KTR_SCHED, "thread", sched_tdname(td), "statclock", "prio:%d", td->td_priority, "stathz:%d", (stathz)?stathz:hz); SDT_PROBE2(sched, , , tick, td, td->td_proc); thread_lock_flags(td, MTX_QUIET); /* * Compute the amount of time during which the current * thread was running, and add that to its total so far. */ new_switchtime = cpu_ticks(); runtime = new_switchtime - PCPU_GET(switchtime); td->td_runtime += runtime; td->td_incruntime += runtime; PCPU_SET(switchtime, new_switchtime); sched_clock(td, cnt); thread_unlock(td); #ifdef HWPMC_HOOKS if (td->td_intr_frame != NULL) PMC_SOFT_CALL_TF( , , clock, stat, td->td_intr_frame); #endif } void profclock(int cnt, int usermode, uintfptr_t pc) { struct thread *td; #ifdef GPROF struct gmonparam *g; uintfptr_t i; #endif td = curthread; if (usermode) { /* * Came from user mode; CPU was in user state. * If this process is being profiled, record the tick. * if there is no related user location yet, don't * bother trying to count it. */ if (td->td_proc->p_flag & P_PROFIL) addupc_intr(td, pc, cnt); } #ifdef GPROF else { /* * Kernel statistics are just like addupc_intr, only easier. */ g = &_gmonparam; if (g->state == GMON_PROF_ON && pc >= g->lowpc) { i = PC_TO_I(g, pc); if (i < g->textsize) { KCOUNT(g, i) += cnt; } } } #endif #ifdef HWPMC_HOOKS if (td->td_intr_frame != NULL) PMC_SOFT_CALL_TF( , , clock, prof, td->td_intr_frame); #endif } /* * Return information about system clocks. */ static int sysctl_kern_clockrate(SYSCTL_HANDLER_ARGS) { struct clockinfo clkinfo; /* * Construct clockinfo structure. */ bzero(&clkinfo, sizeof(clkinfo)); clkinfo.hz = hz; clkinfo.tick = tick; clkinfo.profhz = profhz; clkinfo.stathz = stathz ? stathz : hz; return (sysctl_handle_opaque(oidp, &clkinfo, sizeof clkinfo, req)); } SYSCTL_PROC(_kern, KERN_CLOCKRATE, clockrate, CTLTYPE_STRUCT|CTLFLAG_RD|CTLFLAG_MPSAFE, 0, 0, sysctl_kern_clockrate, "S,clockinfo", "Rate and period of various kernel clocks"); static void watchdog_config(void *unused __unused, u_int cmd, int *error) { u_int u; u = cmd & WD_INTERVAL; if (u >= WD_TO_1SEC) { watchdog_ticks = (1 << (u - WD_TO_1SEC)) * hz; watchdog_enabled = 1; *error = 0; } else { watchdog_enabled = 0; } } /* * Handle a watchdog timeout by dumping interrupt information and * then either dropping to DDB or panicking. */ static void watchdog_fire(void) { int nintr; uint64_t inttotal; u_long *curintr; char *curname; curintr = intrcnt; curname = intrnames; inttotal = 0; nintr = sintrcnt / sizeof(u_long); printf("interrupt total\n"); while (--nintr >= 0) { if (*curintr) printf("%-12s %20lu\n", curname, *curintr); curname += strlen(curname) + 1; inttotal += *curintr++; } printf("Total %20ju\n", (uintmax_t)inttotal); #if defined(KDB) && !defined(KDB_UNATTENDED) kdb_backtrace(); kdb_enter(KDB_WHY_WATCHDOG, "watchdog timeout"); #else panic("watchdog timeout"); #endif } diff --git a/sys/kern/subr_param.c b/sys/kern/subr_param.c index f9c2f8df87b8..e9d7ddd49e63 100644 --- a/sys/kern/subr_param.c +++ b/sys/kern/subr_param.c @@ -1,345 +1,347 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1980, 1986, 1989, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)param.c 8.3 (Berkeley) 8/20/94 */ #include __FBSDID("$FreeBSD$"); #include "opt_param.h" #include "opt_msgbuf.h" #include "opt_maxphys.h" #include "opt_maxusers.h" #include #include #include #include #include #include #include #include #include #include #include #include /* * System parameter formulae. */ #ifndef HZ # define HZ 1000 # ifndef HZ_VM # define HZ_VM 100 # endif #else # ifndef HZ_VM # define HZ_VM HZ # endif #endif #define NPROC (20 + 16 * maxusers) #ifndef NBUF #define NBUF 0 #endif #ifndef MAXFILES #define MAXFILES (40 + 32 * maxusers) #endif static int sysctl_kern_vm_guest(SYSCTL_HANDLER_ARGS); int hz; /* system clock's frequency */ int tick; /* usec per tick (1000000 / hz) */ +time_t tick_seconds_max; /* max hz * seconds an integer can hold */ struct bintime tick_bt; /* bintime per tick (1s / hz) */ sbintime_t tick_sbt; int maxusers; /* base tunable */ int maxproc; /* maximum # of processes */ int maxprocperuid; /* max # of procs per user */ int maxfiles; /* sys. wide open files limit */ int maxfilesperproc; /* per-proc open files limit */ int msgbufsize; /* size of kernel message buffer */ int nbuf; /* number of bcache bufs */ int bio_transient_maxcnt; int ngroups_max; /* max # groups per process */ int nswbuf; pid_t pid_max = PID_MAX; u_long maxswzone; /* max swmeta KVA storage */ u_long maxbcache; /* max buffer cache KVA storage */ u_long maxpipekva; /* Limit on pipe KVA */ u_long maxphys; /* max raw I/O transfer size */ int vm_guest = VM_GUEST_NO; /* Running as virtual machine guest? */ u_long maxtsiz; /* max text size */ u_long dfldsiz; /* initial data size limit */ u_long maxdsiz; /* max data size */ u_long dflssiz; /* initial stack size limit */ u_long maxssiz; /* max stack size */ u_long sgrowsiz; /* amount to grow stack */ SYSCTL_INT(_kern, OID_AUTO, hz, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &hz, 0, "Number of clock ticks per second"); SYSCTL_INT(_kern, OID_AUTO, hz_max, CTLFLAG_RD, SYSCTL_NULL_INT_PTR, HZ_MAXIMUM, "Maximum hz value supported"); SYSCTL_INT(_kern, OID_AUTO, hz_min, CTLFLAG_RD, SYSCTL_NULL_INT_PTR, HZ_MINIMUM, "Minimum hz value supported"); SYSCTL_INT(_kern, OID_AUTO, nbuf, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &nbuf, 0, "Number of buffers in the buffer cache"); SYSCTL_INT(_kern, OID_AUTO, nswbuf, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &nswbuf, 0, "Number of swap buffers"); SYSCTL_INT(_kern, OID_AUTO, msgbufsize, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &msgbufsize, 0, "Size of the kernel message buffer"); SYSCTL_LONG(_kern, OID_AUTO, maxswzone, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &maxswzone, 0, "Maximum memory for swap metadata"); SYSCTL_LONG(_kern, OID_AUTO, maxbcache, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &maxbcache, 0, "Maximum value of vfs.maxbufspace"); SYSCTL_INT(_kern, OID_AUTO, bio_transient_maxcnt, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &bio_transient_maxcnt, 0, "Maximum number of transient BIOs mappings"); SYSCTL_ULONG(_kern, OID_AUTO, maxtsiz, CTLFLAG_RWTUN | CTLFLAG_NOFETCH, &maxtsiz, 0, "Maximum text size"); SYSCTL_ULONG(_kern, OID_AUTO, dfldsiz, CTLFLAG_RWTUN | CTLFLAG_NOFETCH, &dfldsiz, 0, "Initial data size limit"); SYSCTL_ULONG(_kern, OID_AUTO, maxdsiz, CTLFLAG_RWTUN | CTLFLAG_NOFETCH, &maxdsiz, 0, "Maximum data size"); SYSCTL_ULONG(_kern, OID_AUTO, dflssiz, CTLFLAG_RWTUN | CTLFLAG_NOFETCH, &dflssiz, 0, "Initial stack size limit"); SYSCTL_ULONG(_kern, OID_AUTO, maxssiz, CTLFLAG_RWTUN | CTLFLAG_NOFETCH, &maxssiz, 0, "Maximum stack size"); SYSCTL_ULONG(_kern, OID_AUTO, sgrowsiz, CTLFLAG_RWTUN | CTLFLAG_NOFETCH, &sgrowsiz, 0, "Amount to grow stack on a stack fault"); SYSCTL_PROC(_kern, OID_AUTO, vm_guest, CTLFLAG_RD | CTLTYPE_STRING | CTLFLAG_MPSAFE, NULL, 0, sysctl_kern_vm_guest, "A", "Virtual machine guest detected?"); /* * The elements of this array are ordered based upon the values of the * corresponding enum VM_GUEST members. */ static const char *const vm_guest_sysctl_names[] = { [VM_GUEST_NO] = "none", [VM_GUEST_VM] = "generic", [VM_GUEST_XEN] = "xen", [VM_GUEST_HV] = "hv", [VM_GUEST_VMWARE] = "vmware", [VM_GUEST_KVM] = "kvm", [VM_GUEST_BHYVE] = "bhyve", [VM_GUEST_VBOX] = "vbox", [VM_GUEST_PARALLELS] = "parallels", [VM_LAST] = NULL }; CTASSERT(nitems(vm_guest_sysctl_names) - 1 == VM_LAST); /* * Boot time overrides that are not scaled against main memory */ void init_param1(void) { #if !defined(__mips__) && !defined(__arm64__) TUNABLE_INT_FETCH("kern.kstack_pages", &kstack_pages); #endif hz = -1; TUNABLE_INT_FETCH("kern.hz", &hz); if (hz == -1) hz = vm_guest > VM_GUEST_NO ? HZ_VM : HZ; /* range check the "hz" value */ if (__predict_false(hz < HZ_MINIMUM)) hz = HZ_MINIMUM; else if (__predict_false(hz > HZ_MAXIMUM)) hz = HZ_MAXIMUM; tick = 1000000 / hz; tick_sbt = SBT_1S / hz; tick_bt = sbttobt(tick_sbt); + tick_seconds_max = INT_MAX / hz; /* * Arrange for ticks to wrap 10 minutes after boot to help catch * sign problems sooner. */ ticks = INT_MAX - (hz * 10 * 60); vn_lock_pair_pause_max = hz / 100; if (vn_lock_pair_pause_max == 0) vn_lock_pair_pause_max = 1; #ifdef VM_SWZONE_SIZE_MAX maxswzone = VM_SWZONE_SIZE_MAX; #endif TUNABLE_LONG_FETCH("kern.maxswzone", &maxswzone); #ifdef VM_BCACHE_SIZE_MAX maxbcache = VM_BCACHE_SIZE_MAX; #endif TUNABLE_LONG_FETCH("kern.maxbcache", &maxbcache); msgbufsize = MSGBUF_SIZE; TUNABLE_INT_FETCH("kern.msgbufsize", &msgbufsize); maxtsiz = MAXTSIZ; TUNABLE_ULONG_FETCH("kern.maxtsiz", &maxtsiz); dfldsiz = DFLDSIZ; TUNABLE_ULONG_FETCH("kern.dfldsiz", &dfldsiz); maxdsiz = MAXDSIZ; TUNABLE_ULONG_FETCH("kern.maxdsiz", &maxdsiz); dflssiz = DFLSSIZ; TUNABLE_ULONG_FETCH("kern.dflssiz", &dflssiz); maxssiz = MAXSSIZ; TUNABLE_ULONG_FETCH("kern.maxssiz", &maxssiz); sgrowsiz = SGROWSIZ; TUNABLE_ULONG_FETCH("kern.sgrowsiz", &sgrowsiz); /* * Let the administrator set {NGROUPS_MAX}, but disallow values * less than NGROUPS_MAX which would violate POSIX.1-2008 or * greater than INT_MAX-1 which would result in overflow. */ ngroups_max = NGROUPS_MAX; TUNABLE_INT_FETCH("kern.ngroups", &ngroups_max); if (ngroups_max < NGROUPS_MAX) ngroups_max = NGROUPS_MAX; /* * Only allow to lower the maximal pid. * Prevent setting up a non-bootable system if pid_max is too low. */ TUNABLE_INT_FETCH("kern.pid_max", &pid_max); if (pid_max > PID_MAX) pid_max = PID_MAX; else if (pid_max < 300) pid_max = 300; TUNABLE_INT_FETCH("vfs.unmapped_buf_allowed", &unmapped_buf_allowed); } /* * Boot time overrides that are scaled against main memory */ void init_param2(long physpages) { /* Base parameters */ maxusers = MAXUSERS; TUNABLE_INT_FETCH("kern.maxusers", &maxusers); if (maxusers == 0) { maxusers = physpages / (2 * 1024 * 1024 / PAGE_SIZE); if (maxusers < 32) maxusers = 32; #ifdef VM_MAX_AUTOTUNE_MAXUSERS if (maxusers > VM_MAX_AUTOTUNE_MAXUSERS) maxusers = VM_MAX_AUTOTUNE_MAXUSERS; #endif /* * Scales down the function in which maxusers grows once * we hit 384. */ if (maxusers > 384) maxusers = 384 + ((maxusers - 384) / 8); } /* * The following can be overridden after boot via sysctl. Note: * unless overridden, these macros are ultimately based on maxusers. * Limit maxproc so that kmap entries cannot be exhausted by * processes. */ maxproc = NPROC; TUNABLE_INT_FETCH("kern.maxproc", &maxproc); if (maxproc > (physpages / 12)) maxproc = physpages / 12; if (maxproc > pid_max) maxproc = pid_max; maxprocperuid = (maxproc * 9) / 10; /* * The default limit for maxfiles is 1/12 of the number of * physical page but not less than 16 times maxusers. * At most it can be 1/6 the number of physical pages. */ maxfiles = imax(MAXFILES, physpages / 8); TUNABLE_INT_FETCH("kern.maxfiles", &maxfiles); if (maxfiles > (physpages / 4)) maxfiles = physpages / 4; maxfilesperproc = (maxfiles / 10) * 9; TUNABLE_INT_FETCH("kern.maxfilesperproc", &maxfilesperproc); /* * Cannot be changed after boot. */ nbuf = NBUF; TUNABLE_INT_FETCH("kern.nbuf", &nbuf); TUNABLE_INT_FETCH("kern.bio_transient_maxcnt", &bio_transient_maxcnt); maxphys = MAXPHYS; TUNABLE_ULONG_FETCH("kern.maxphys", &maxphys); if (maxphys == 0) { maxphys = MAXPHYS; } else if (__bitcountl(maxphys) != 1) { /* power of two */ if (flsl(maxphys) == NBBY * sizeof(maxphys)) maxphys = MAXPHYS; else maxphys = 1UL << flsl(maxphys); } if (maxphys < PAGE_SIZE) maxphys = MAXPHYS; /* * Physical buffers are pre-allocated buffers (struct buf) that * are used as temporary holders for I/O, such as paging I/O. */ TUNABLE_INT_FETCH("kern.nswbuf", &nswbuf); /* * The default for maxpipekva is min(1/64 of the kernel address space, * max(1/64 of main memory, 512KB)). See sys_pipe.c for more details. */ maxpipekva = ptoa(physpages / 64); TUNABLE_LONG_FETCH("kern.ipc.maxpipekva", &maxpipekva); if (maxpipekva < 512 * 1024) maxpipekva = 512 * 1024; if (maxpipekva > (VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS) / 64) maxpipekva = (VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS) / 64; } /* * Sysctl stringifying handler for kern.vm_guest. */ static int sysctl_kern_vm_guest(SYSCTL_HANDLER_ARGS) { return (SYSCTL_OUT_STR(req, vm_guest_sysctl_names[vm_guest])); } diff --git a/sys/sys/time.h b/sys/sys/time.h index 5d7f3f07234e..20375b9a82af 100644 --- a/sys/sys/time.h +++ b/sys/sys/time.h @@ -1,639 +1,640 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1982, 1986, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)time.h 8.5 (Berkeley) 5/4/95 * $FreeBSD$ */ #ifndef _SYS_TIME_H_ #define _SYS_TIME_H_ #include #include #include #include struct timezone { int tz_minuteswest; /* minutes west of Greenwich */ int tz_dsttime; /* type of dst correction */ }; #define DST_NONE 0 /* not on dst */ #define DST_USA 1 /* USA style dst */ #define DST_AUST 2 /* Australian style dst */ #define DST_WET 3 /* Western European dst */ #define DST_MET 4 /* Middle European dst */ #define DST_EET 5 /* Eastern European dst */ #define DST_CAN 6 /* Canada */ #if __BSD_VISIBLE struct bintime { time_t sec; uint64_t frac; }; static __inline void bintime_addx(struct bintime *_bt, uint64_t _x) { uint64_t _u; _u = _bt->frac; _bt->frac += _x; if (_u > _bt->frac) _bt->sec++; } static __inline void bintime_add(struct bintime *_bt, const struct bintime *_bt2) { uint64_t _u; _u = _bt->frac; _bt->frac += _bt2->frac; if (_u > _bt->frac) _bt->sec++; _bt->sec += _bt2->sec; } static __inline void bintime_sub(struct bintime *_bt, const struct bintime *_bt2) { uint64_t _u; _u = _bt->frac; _bt->frac -= _bt2->frac; if (_u < _bt->frac) _bt->sec--; _bt->sec -= _bt2->sec; } static __inline void bintime_mul(struct bintime *_bt, u_int _x) { uint64_t _p1, _p2; _p1 = (_bt->frac & 0xffffffffull) * _x; _p2 = (_bt->frac >> 32) * _x + (_p1 >> 32); _bt->sec *= _x; _bt->sec += (_p2 >> 32); _bt->frac = (_p2 << 32) | (_p1 & 0xffffffffull); } static __inline void bintime_shift(struct bintime *_bt, int _exp) { if (_exp > 0) { _bt->sec <<= _exp; _bt->sec |= _bt->frac >> (64 - _exp); _bt->frac <<= _exp; } else if (_exp < 0) { _bt->frac >>= -_exp; _bt->frac |= (uint64_t)_bt->sec << (64 + _exp); _bt->sec >>= -_exp; } } #define bintime_clear(a) ((a)->sec = (a)->frac = 0) #define bintime_isset(a) ((a)->sec || (a)->frac) #define bintime_cmp(a, b, cmp) \ (((a)->sec == (b)->sec) ? \ ((a)->frac cmp (b)->frac) : \ ((a)->sec cmp (b)->sec)) #define SBT_1S ((sbintime_t)1 << 32) #define SBT_1M (SBT_1S * 60) #define SBT_1MS (SBT_1S / 1000) #define SBT_1US (SBT_1S / 1000000) #define SBT_1NS (SBT_1S / 1000000000) /* beware rounding, see nstosbt() */ #define SBT_MAX 0x7fffffffffffffffLL static __inline int sbintime_getsec(sbintime_t _sbt) { return (_sbt >> 32); } static __inline sbintime_t bttosbt(const struct bintime _bt) { return (((sbintime_t)_bt.sec << 32) + (_bt.frac >> 32)); } static __inline struct bintime sbttobt(sbintime_t _sbt) { struct bintime _bt; _bt.sec = _sbt >> 32; _bt.frac = _sbt << 32; return (_bt); } /* * Scaling functions for signed and unsigned 64-bit time using any * 32-bit fraction: */ static __inline int64_t __stime64_scale32_ceil(int64_t x, int32_t factor, int32_t divisor) { const int64_t rem = x % divisor; return (x / divisor * factor + (rem * factor + divisor - 1) / divisor); } static __inline int64_t __stime64_scale32_floor(int64_t x, int32_t factor, int32_t divisor) { const int64_t rem = x % divisor; return (x / divisor * factor + (rem * factor) / divisor); } static __inline uint64_t __utime64_scale32_ceil(uint64_t x, uint32_t factor, uint32_t divisor) { const uint64_t rem = x % divisor; return (x / divisor * factor + (rem * factor + divisor - 1) / divisor); } static __inline uint64_t __utime64_scale32_floor(uint64_t x, uint32_t factor, uint32_t divisor) { const uint64_t rem = x % divisor; return (x / divisor * factor + (rem * factor) / divisor); } /* * This function finds the common divisor between the two arguments, * in powers of two. Use a macro, so the compiler will output a * warning if the value overflows! * * Detailed description: * * Create a variable with 1's at the positions of the leading 0's * starting at the least significant bit, producing 0 if none (e.g., * 01011000 -> 0000 0111). Then these two variables are bitwise AND'ed * together, to produce the greatest common power of two minus one. In * the end add one to flip the value to the actual power of two (e.g., * 0000 0111 + 1 -> 0000 1000). */ #define __common_powers_of_two(a, b) \ ((~(a) & ((a) - 1) & ~(b) & ((b) - 1)) + 1) /* * Scaling functions for signed and unsigned 64-bit time assuming * reducable 64-bit fractions to 32-bit fractions: */ static __inline int64_t __stime64_scale64_ceil(int64_t x, int64_t factor, int64_t divisor) { const int64_t gcd = __common_powers_of_two(factor, divisor); return (__stime64_scale32_ceil(x, factor / gcd, divisor / gcd)); } static __inline int64_t __stime64_scale64_floor(int64_t x, int64_t factor, int64_t divisor) { const int64_t gcd = __common_powers_of_two(factor, divisor); return (__stime64_scale32_floor(x, factor / gcd, divisor / gcd)); } static __inline uint64_t __utime64_scale64_ceil(uint64_t x, uint64_t factor, uint64_t divisor) { const uint64_t gcd = __common_powers_of_two(factor, divisor); return (__utime64_scale32_ceil(x, factor / gcd, divisor / gcd)); } static __inline uint64_t __utime64_scale64_floor(uint64_t x, uint64_t factor, uint64_t divisor) { const uint64_t gcd = __common_powers_of_two(factor, divisor); return (__utime64_scale32_floor(x, factor / gcd, divisor / gcd)); } /* * Decimal<->sbt conversions. Multiplying or dividing by SBT_1NS * results in large roundoff errors which sbttons() and nstosbt() * avoid. Millisecond and microsecond functions are also provided for * completeness. * * When converting from sbt to another unit, the result is always * rounded down. When converting back to sbt the result is always * rounded up. This gives the property that sbttoX(Xtosbt(y)) == y . * * The conversion functions can also handle negative values. */ #define SBT_DECLARE_CONVERSION_PAIR(name, units_per_second) \ static __inline int64_t \ sbtto##name(sbintime_t sbt) \ { \ return (__stime64_scale64_floor(sbt, units_per_second, SBT_1S)); \ } \ static __inline sbintime_t \ name##tosbt(int64_t name) \ { \ return (__stime64_scale64_ceil(name, SBT_1S, units_per_second)); \ } SBT_DECLARE_CONVERSION_PAIR(ns, 1000000000) SBT_DECLARE_CONVERSION_PAIR(us, 1000000) SBT_DECLARE_CONVERSION_PAIR(ms, 1000) /*- * Background information: * * When converting between timestamps on parallel timescales of differing * resolutions it is historical and scientific practice to round down rather * than doing 4/5 rounding. * * The date changes at midnight, not at noon. * * Even at 15:59:59.999999999 it's not four'o'clock. * * time_second ticks after N.999999999 not after N.4999999999 */ static __inline void bintime2timespec(const struct bintime *_bt, struct timespec *_ts) { _ts->tv_sec = _bt->sec; _ts->tv_nsec = __utime64_scale64_floor( _bt->frac, 1000000000, 1ULL << 32) >> 32; } static __inline uint64_t bintime2ns(const struct bintime *_bt) { uint64_t ret; ret = (uint64_t)(_bt->sec) * (uint64_t)1000000000; ret += __utime64_scale64_floor( _bt->frac, 1000000000, 1ULL << 32) >> 32; return (ret); } static __inline void timespec2bintime(const struct timespec *_ts, struct bintime *_bt) { _bt->sec = _ts->tv_sec; _bt->frac = __utime64_scale64_floor( (uint64_t)_ts->tv_nsec << 32, 1ULL << 32, 1000000000); } static __inline void bintime2timeval(const struct bintime *_bt, struct timeval *_tv) { _tv->tv_sec = _bt->sec; _tv->tv_usec = __utime64_scale64_floor( _bt->frac, 1000000, 1ULL << 32) >> 32; } static __inline void timeval2bintime(const struct timeval *_tv, struct bintime *_bt) { _bt->sec = _tv->tv_sec; _bt->frac = __utime64_scale64_floor( (uint64_t)_tv->tv_usec << 32, 1ULL << 32, 1000000); } static __inline struct timespec sbttots(sbintime_t _sbt) { struct timespec _ts; _ts.tv_sec = _sbt >> 32; _ts.tv_nsec = sbttons((uint32_t)_sbt); return (_ts); } static __inline sbintime_t tstosbt(struct timespec _ts) { return (((sbintime_t)_ts.tv_sec << 32) + nstosbt(_ts.tv_nsec)); } static __inline struct timeval sbttotv(sbintime_t _sbt) { struct timeval _tv; _tv.tv_sec = _sbt >> 32; _tv.tv_usec = sbttous((uint32_t)_sbt); return (_tv); } static __inline sbintime_t tvtosbt(struct timeval _tv) { return (((sbintime_t)_tv.tv_sec << 32) + ustosbt(_tv.tv_usec)); } #endif /* __BSD_VISIBLE */ #ifdef _KERNEL /* * Simple macros to convert ticks to milliseconds * or microseconds and vice-versa. The answer * will always be at least 1. Note the return * value is a uint32_t however we step up the * operations to 64 bit to avoid any overflow/underflow * problems. */ #define TICKS_2_MSEC(t) max(1, (uint32_t)(hz == 1000) ? \ (t) : (((uint64_t)(t) * (uint64_t)1000)/(uint64_t)hz)) #define TICKS_2_USEC(t) max(1, (uint32_t)(hz == 1000) ? \ ((t) * 1000) : (((uint64_t)(t) * (uint64_t)1000000)/(uint64_t)hz)) #define MSEC_2_TICKS(m) max(1, (uint32_t)((hz == 1000) ? \ (m) : ((uint64_t)(m) * (uint64_t)hz)/(uint64_t)1000)) #define USEC_2_TICKS(u) max(1, (uint32_t)((hz == 1000) ? \ ((u) / 1000) : ((uint64_t)(u) * (uint64_t)hz)/(uint64_t)1000000)) #endif /* Operations on timespecs */ #define timespecclear(tvp) ((tvp)->tv_sec = (tvp)->tv_nsec = 0) #define timespecisset(tvp) ((tvp)->tv_sec || (tvp)->tv_nsec) #define timespeccmp(tvp, uvp, cmp) \ (((tvp)->tv_sec == (uvp)->tv_sec) ? \ ((tvp)->tv_nsec cmp (uvp)->tv_nsec) : \ ((tvp)->tv_sec cmp (uvp)->tv_sec)) #define timespecadd(tsp, usp, vsp) \ do { \ (vsp)->tv_sec = (tsp)->tv_sec + (usp)->tv_sec; \ (vsp)->tv_nsec = (tsp)->tv_nsec + (usp)->tv_nsec; \ if ((vsp)->tv_nsec >= 1000000000L) { \ (vsp)->tv_sec++; \ (vsp)->tv_nsec -= 1000000000L; \ } \ } while (0) #define timespecsub(tsp, usp, vsp) \ do { \ (vsp)->tv_sec = (tsp)->tv_sec - (usp)->tv_sec; \ (vsp)->tv_nsec = (tsp)->tv_nsec - (usp)->tv_nsec; \ if ((vsp)->tv_nsec < 0) { \ (vsp)->tv_sec--; \ (vsp)->tv_nsec += 1000000000L; \ } \ } while (0) #define timespecvalid_interval(tsp) ((tsp)->tv_sec >= 0 && \ (tsp)->tv_nsec >= 0 && (tsp)->tv_nsec < 1000000000L) #ifdef _KERNEL /* Operations on timevals. */ #define timevalclear(tvp) ((tvp)->tv_sec = (tvp)->tv_usec = 0) #define timevalisset(tvp) ((tvp)->tv_sec || (tvp)->tv_usec) #define timevalcmp(tvp, uvp, cmp) \ (((tvp)->tv_sec == (uvp)->tv_sec) ? \ ((tvp)->tv_usec cmp (uvp)->tv_usec) : \ ((tvp)->tv_sec cmp (uvp)->tv_sec)) /* timevaladd and timevalsub are not inlined */ #endif /* _KERNEL */ #ifndef _KERNEL /* NetBSD/OpenBSD compatible interfaces */ #define timerclear(tvp) ((tvp)->tv_sec = (tvp)->tv_usec = 0) #define timerisset(tvp) ((tvp)->tv_sec || (tvp)->tv_usec) #define timercmp(tvp, uvp, cmp) \ (((tvp)->tv_sec == (uvp)->tv_sec) ? \ ((tvp)->tv_usec cmp (uvp)->tv_usec) : \ ((tvp)->tv_sec cmp (uvp)->tv_sec)) #define timeradd(tvp, uvp, vvp) \ do { \ (vvp)->tv_sec = (tvp)->tv_sec + (uvp)->tv_sec; \ (vvp)->tv_usec = (tvp)->tv_usec + (uvp)->tv_usec; \ if ((vvp)->tv_usec >= 1000000) { \ (vvp)->tv_sec++; \ (vvp)->tv_usec -= 1000000; \ } \ } while (0) #define timersub(tvp, uvp, vvp) \ do { \ (vvp)->tv_sec = (tvp)->tv_sec - (uvp)->tv_sec; \ (vvp)->tv_usec = (tvp)->tv_usec - (uvp)->tv_usec; \ if ((vvp)->tv_usec < 0) { \ (vvp)->tv_sec--; \ (vvp)->tv_usec += 1000000; \ } \ } while (0) #endif /* * Names of the interval timers, and structure * defining a timer setting. */ #define ITIMER_REAL 0 #define ITIMER_VIRTUAL 1 #define ITIMER_PROF 2 struct itimerval { struct timeval it_interval; /* timer interval */ struct timeval it_value; /* current value */ }; /* * Getkerninfo clock information structure */ struct clockinfo { int hz; /* clock frequency */ int tick; /* micro-seconds per hz tick */ int spare; int stathz; /* statistics clock frequency */ int profhz; /* profiling clock frequency */ }; #if __BSD_VISIBLE #define CPUCLOCK_WHICH_PID 0 #define CPUCLOCK_WHICH_TID 1 #endif #if defined(_KERNEL) || defined(_STANDALONE) /* * Kernel to clock driver interface. */ void inittodr(time_t base); void resettodr(void); extern volatile time_t time_second; extern volatile time_t time_uptime; extern struct bintime tc_tick_bt; extern sbintime_t tc_tick_sbt; +extern time_t tick_seconds_max; extern struct bintime tick_bt; extern sbintime_t tick_sbt; extern int tc_precexp; extern int tc_timepercentage; extern struct bintime bt_timethreshold; extern struct bintime bt_tickthreshold; extern sbintime_t sbt_timethreshold; extern sbintime_t sbt_tickthreshold; extern volatile int rtc_generation; /* * Functions for looking at our clock: [get]{bin,nano,micro}[up]time() * * Functions without the "get" prefix returns the best timestamp * we can produce in the given format. * * "bin" == struct bintime == seconds + 64 bit fraction of seconds. * "nano" == struct timespec == seconds + nanoseconds. * "micro" == struct timeval == seconds + microseconds. * * Functions containing "up" returns time relative to boot and * should be used for calculating time intervals. * * Functions without "up" returns UTC time. * * Functions with the "get" prefix returns a less precise result * much faster than the functions without "get" prefix and should * be used where a precision of 1/hz seconds is acceptable or where * performance is priority. (NB: "precision", _not_ "resolution" !) */ void binuptime(struct bintime *bt); void nanouptime(struct timespec *tsp); void microuptime(struct timeval *tvp); static __inline sbintime_t sbinuptime(void) { struct bintime _bt; binuptime(&_bt); return (bttosbt(_bt)); } void bintime(struct bintime *bt); void nanotime(struct timespec *tsp); void microtime(struct timeval *tvp); void getbinuptime(struct bintime *bt); void getnanouptime(struct timespec *tsp); void getmicrouptime(struct timeval *tvp); static __inline sbintime_t getsbinuptime(void) { struct bintime _bt; getbinuptime(&_bt); return (bttosbt(_bt)); } void getbintime(struct bintime *bt); void getnanotime(struct timespec *tsp); void getmicrotime(struct timeval *tvp); void getboottime(struct timeval *boottime); void getboottimebin(struct bintime *boottimebin); /* Other functions */ int itimerdecr(struct itimerval *itp, int usec); int itimerfix(struct timeval *tv); int ppsratecheck(struct timeval *, int *, int); int ratecheck(struct timeval *, const struct timeval *); void timevaladd(struct timeval *t1, const struct timeval *t2); void timevalsub(struct timeval *t1, const struct timeval *t2); int tvtohz(struct timeval *tv); /* * The following HZ limits allow the tvtohz() function * to only use integer computations. */ #define HZ_MAXIMUM (INT_MAX / (1000000 >> 6)) /* 137kHz */ #define HZ_MINIMUM 8 /* hz */ #define TC_DEFAULTPERC 5 #define BT2FREQ(bt) \ (((uint64_t)0x8000000000000000 + ((bt)->frac >> 2)) / \ ((bt)->frac >> 1)) #define SBT2FREQ(sbt) ((SBT_1S + ((sbt) >> 1)) / (sbt)) #define FREQ2BT(freq, bt) \ { \ (bt)->sec = 0; \ (bt)->frac = ((uint64_t)0x8000000000000000 / (freq)) << 1; \ } #define TIMESEL(sbt, sbt2) \ (((sbt2) >= sbt_timethreshold) ? \ ((*(sbt) = getsbinuptime()), 1) : ((*(sbt) = sbinuptime()), 0)) #else /* !_KERNEL && !_STANDALONE */ #include #include #include __BEGIN_DECLS int setitimer(int, const struct itimerval *, struct itimerval *); int utimes(const char *, const struct timeval *); #if __BSD_VISIBLE int adjtime(const struct timeval *, struct timeval *); int clock_getcpuclockid2(id_t, int, clockid_t *); int futimes(int, const struct timeval *); int futimesat(int, const char *, const struct timeval [2]); int lutimes(const char *, const struct timeval *); int settimeofday(const struct timeval *, const struct timezone *); #endif #if __XSI_VISIBLE int getitimer(int, struct itimerval *); int gettimeofday(struct timeval *, struct timezone *); #endif __END_DECLS #endif /* !_KERNEL */ #endif /* !_SYS_TIME_H_ */