Index: head/sys/kern/kern_clock.c =================================================================== --- head/sys/kern/kern_clock.c (revision 338486) +++ head/sys/kern/kern_clock.c (revision 338487) @@ -1,895 +1,808 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1982, 1986, 1991, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)kern_clock.c 8.5 (Berkeley) 1/21/94 */ #include __FBSDID("$FreeBSD$"); #include "opt_kdb.h" #include "opt_device_polling.h" #include "opt_hwpmc_hooks.h" #include "opt_ntp.h" #include "opt_watchdog.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef GPROF #include #endif #ifdef HWPMC_HOOKS #include PMC_SOFT_DEFINE( , , clock, hard); PMC_SOFT_DEFINE( , , clock, stat); PMC_SOFT_DEFINE_EX( , , clock, prof, \ cpu_startprofclock, cpu_stopprofclock); #endif #ifdef DEVICE_POLLING extern void hardclock_device_poll(void); #endif /* DEVICE_POLLING */ static void initclocks(void *dummy); SYSINIT(clocks, SI_SUB_CLOCKS, SI_ORDER_FIRST, initclocks, NULL); /* Spin-lock protecting profiling statistics. */ static struct mtx time_lock; SDT_PROVIDER_DECLARE(sched); SDT_PROBE_DEFINE2(sched, , , tick, "struct thread *", "struct proc *"); static int sysctl_kern_cp_time(SYSCTL_HANDLER_ARGS) { int error; long cp_time[CPUSTATES]; #ifdef SCTL_MASK32 int i; unsigned int cp_time32[CPUSTATES]; #endif read_cpu_time(cp_time); #ifdef SCTL_MASK32 if (req->flags & SCTL_MASK32) { if (!req->oldptr) return SYSCTL_OUT(req, 0, sizeof(cp_time32)); for (i = 0; i < CPUSTATES; i++) cp_time32[i] = (unsigned int)cp_time[i]; error = SYSCTL_OUT(req, cp_time32, sizeof(cp_time32)); } else #endif { if (!req->oldptr) return SYSCTL_OUT(req, 0, sizeof(cp_time)); error = SYSCTL_OUT(req, cp_time, sizeof(cp_time)); } return error; } SYSCTL_PROC(_kern, OID_AUTO, cp_time, CTLTYPE_LONG|CTLFLAG_RD|CTLFLAG_MPSAFE, 0,0, sysctl_kern_cp_time, "LU", "CPU time statistics"); static long empty[CPUSTATES]; static int sysctl_kern_cp_times(SYSCTL_HANDLER_ARGS) { struct pcpu *pcpu; int error; int c; long *cp_time; #ifdef SCTL_MASK32 unsigned int cp_time32[CPUSTATES]; int i; #endif if (!req->oldptr) { #ifdef SCTL_MASK32 if (req->flags & SCTL_MASK32) return SYSCTL_OUT(req, 0, sizeof(cp_time32) * (mp_maxid + 1)); else #endif return SYSCTL_OUT(req, 0, sizeof(long) * CPUSTATES * (mp_maxid + 1)); } for (error = 0, c = 0; error == 0 && c <= mp_maxid; c++) { if (!CPU_ABSENT(c)) { pcpu = pcpu_find(c); cp_time = pcpu->pc_cp_time; } else { cp_time = empty; } #ifdef SCTL_MASK32 if (req->flags & SCTL_MASK32) { for (i = 0; i < CPUSTATES; i++) cp_time32[i] = (unsigned int)cp_time[i]; error = SYSCTL_OUT(req, cp_time32, sizeof(cp_time32)); } else #endif error = SYSCTL_OUT(req, cp_time, sizeof(long) * CPUSTATES); } return error; } SYSCTL_PROC(_kern, OID_AUTO, cp_times, CTLTYPE_LONG|CTLFLAG_RD|CTLFLAG_MPSAFE, 0,0, sysctl_kern_cp_times, "LU", "per-CPU time statistics"); #ifdef DEADLKRES static const char *blessed[] = { "getblk", "so_snd_sx", "so_rcv_sx", NULL }; static int slptime_threshold = 1800; static int blktime_threshold = 900; static int sleepfreq = 3; static void deadlres_td_on_lock(struct proc *p, struct thread *td, int blkticks) { int tticks; sx_assert(&allproc_lock, SX_LOCKED); PROC_LOCK_ASSERT(p, MA_OWNED); THREAD_LOCK_ASSERT(td, MA_OWNED); /* * The thread should be blocked on a turnstile, simply check * if the turnstile channel is in good state. */ MPASS(td->td_blocked != NULL); tticks = ticks - td->td_blktick; if (tticks > blkticks) /* * Accordingly with provided thresholds, this thread is stuck * for too long on a turnstile. */ panic("%s: possible deadlock detected for %p, " "blocked for %d ticks\n", __func__, td, tticks); } static void deadlres_td_sleep_q(struct proc *p, struct thread *td, int slpticks) { void *wchan; int i, slptype, tticks; sx_assert(&allproc_lock, SX_LOCKED); PROC_LOCK_ASSERT(p, MA_OWNED); THREAD_LOCK_ASSERT(td, MA_OWNED); /* * Check if the thread is sleeping on a lock, otherwise skip the check. * Drop the thread lock in order to avoid a LOR with the sleepqueue * spinlock. */ wchan = td->td_wchan; tticks = ticks - td->td_slptick; slptype = sleepq_type(wchan); if ((slptype == SLEEPQ_SX || slptype == SLEEPQ_LK) && tticks > slpticks) { /* * Accordingly with provided thresholds, this thread is stuck * for too long on a sleepqueue. * However, being on a sleepqueue, we might still check for the * blessed list. */ for (i = 0; blessed[i] != NULL; i++) if (!strcmp(blessed[i], td->td_wmesg)) return; panic("%s: possible deadlock detected for %p, " "blocked for %d ticks\n", __func__, td, tticks); } } static void deadlkres(void) { struct proc *p; struct thread *td; int blkticks, slpticks, tryl; tryl = 0; for (;;) { blkticks = blktime_threshold * hz; slpticks = slptime_threshold * hz; /* * Avoid to sleep on the sx_lock in order to avoid a * possible priority inversion problem leading to * starvation. * If the lock can't be held after 100 tries, panic. */ if (!sx_try_slock(&allproc_lock)) { if (tryl > 100) panic("%s: possible deadlock detected " "on allproc_lock\n", __func__); tryl++; pause("allproc", sleepfreq * hz); continue; } tryl = 0; FOREACH_PROC_IN_SYSTEM(p) { PROC_LOCK(p); if (p->p_state == PRS_NEW) { PROC_UNLOCK(p); continue; } FOREACH_THREAD_IN_PROC(p, td) { thread_lock(td); if (TD_ON_LOCK(td)) deadlres_td_on_lock(p, td, blkticks); else if (TD_IS_SLEEPING(td) && TD_ON_SLEEPQ(td)) deadlres_td_sleep_q(p, td, slpticks); thread_unlock(td); } PROC_UNLOCK(p); } sx_sunlock(&allproc_lock); /* Sleep for sleepfreq seconds. */ pause("-", sleepfreq * hz); } } static struct kthread_desc deadlkres_kd = { "deadlkres", deadlkres, (struct thread **)NULL }; SYSINIT(deadlkres, SI_SUB_CLOCKS, SI_ORDER_ANY, kthread_start, &deadlkres_kd); static SYSCTL_NODE(_debug, OID_AUTO, deadlkres, CTLFLAG_RW, 0, "Deadlock resolver"); SYSCTL_INT(_debug_deadlkres, OID_AUTO, slptime_threshold, CTLFLAG_RW, &slptime_threshold, 0, "Number of seconds within is valid to sleep on a sleepqueue"); SYSCTL_INT(_debug_deadlkres, OID_AUTO, blktime_threshold, CTLFLAG_RW, &blktime_threshold, 0, "Number of seconds within is valid to block on a turnstile"); SYSCTL_INT(_debug_deadlkres, OID_AUTO, sleepfreq, CTLFLAG_RW, &sleepfreq, 0, "Number of seconds between any deadlock resolver thread run"); #endif /* DEADLKRES */ void read_cpu_time(long *cp_time) { struct pcpu *pc; int i, j; /* Sum up global cp_time[]. */ bzero(cp_time, sizeof(long) * CPUSTATES); CPU_FOREACH(i) { pc = pcpu_find(i); for (j = 0; j < CPUSTATES; j++) cp_time[j] += pc->pc_cp_time[j]; } } #include static int watchdog_ticks; static int watchdog_enabled; static void watchdog_fire(void); static void watchdog_config(void *, u_int, int *); static void watchdog_attach(void) { EVENTHANDLER_REGISTER(watchdog_list, watchdog_config, NULL, 0); } /* * Clock handling routines. * * This code is written to operate with two timers that run independently of * each other. * * The main timer, running hz times per second, is used to trigger interval * timers, timeouts and rescheduling as needed. * * The second timer handles kernel and user profiling, * and does resource use estimation. If the second timer is programmable, * it is randomized to avoid aliasing between the two clocks. For example, * the randomization prevents an adversary from always giving up the cpu * just before its quantum expires. Otherwise, it would never accumulate * cpu ticks. The mean frequency of the second timer is stathz. * * If no second timer exists, stathz will be zero; in this case we drive * profiling and statistics off the main clock. This WILL NOT be accurate; * do not do it unless absolutely necessary. * * The statistics clock may (or may not) be run at a higher rate while * profiling. This profile clock runs at profhz. We require that profhz * be an integral multiple of stathz. * * If the statistics clock is running fast, it must be divided by the ratio * profhz/stathz for statistics. (For profiling, every tick counts.) * * Time-of-day is maintained using a "timecounter", which may or may * not be related to the hardware generating the above mentioned * interrupts. */ int stathz; int profhz; int profprocs; volatile int ticks; int psratio; DPCPU_DEFINE_STATIC(int, pcputicks); /* Per-CPU version of ticks. */ #ifdef DEVICE_POLLING static int devpoll_run = 0; #endif /* * Initialize clock frequencies and start both clocks running. */ /* ARGSUSED*/ static void initclocks(void *dummy) { int i; /* * Set divisors to 1 (normal case) and let the machine-specific * code do its bit. */ mtx_init(&time_lock, "time lock", NULL, MTX_DEF); cpu_initclocks(); /* * Compute profhz/stathz, and fix profhz if needed. */ i = stathz ? stathz : hz; if (profhz == 0) profhz = i; psratio = profhz / i; #ifdef SW_WATCHDOG /* Enable hardclock watchdog now, even if a hardware watchdog exists. */ watchdog_attach(); #else /* Volunteer to run a software watchdog. */ if (wdog_software_attach == NULL) wdog_software_attach = watchdog_attach; #endif } -/* - * Each time the real-time timer fires, this function is called on all CPUs. - * Note that hardclock() calls hardclock_cpu() for the boot CPU, so only - * the other CPUs in the system need to call this function. - */ void -hardclock_cpu(int usermode) +hardclock(int cnt, int usermode) { struct pstats *pstats; struct thread *td = curthread; struct proc *p = td->td_proc; - int flags; - - /* - * Run current process's virtual and profile time, as needed. - */ - pstats = p->p_stats; - flags = 0; - if (usermode && - timevalisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value)) { - PROC_ITIMLOCK(p); - if (itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], tick) == 0) - flags |= TDF_ALRMPEND | TDF_ASTPENDING; - PROC_ITIMUNLOCK(p); - } - if (timevalisset(&pstats->p_timer[ITIMER_PROF].it_value)) { - PROC_ITIMLOCK(p); - if (itimerdecr(&pstats->p_timer[ITIMER_PROF], tick) == 0) - flags |= TDF_PROFPEND | TDF_ASTPENDING; - PROC_ITIMUNLOCK(p); - } - thread_lock(td); - td->td_flags |= flags; - thread_unlock(td); - -#ifdef HWPMC_HOOKS - if (PMC_CPU_HAS_SAMPLES(PCPU_GET(cpuid))) - PMC_CALL_HOOK_UNLOCKED(curthread, PMC_FN_DO_SAMPLES, NULL); - if (td->td_intr_frame != NULL) - PMC_SOFT_CALL_TF( , , clock, hard, td->td_intr_frame); -#endif - callout_process(sbinuptime()); - if (__predict_false(DPCPU_GET(epoch_cb_count))) - GROUPTASK_ENQUEUE(DPCPU_PTR(epoch_cb_task)); -} - -/* - * The real-time timer, interrupting hz times per second. - */ -void -hardclock(int usermode, uintfptr_t pc) -{ - - atomic_add_int(&ticks, 1); - hardclock_cpu(usermode); - tc_ticktock(1); - cpu_tick_calibration(); - /* - * If no separate statistics clock is available, run it from here. - * - * XXX: this only works for UP - */ - if (stathz == 0) { - profclock(usermode, pc); - statclock(usermode); - } -#ifdef DEVICE_POLLING - hardclock_device_poll(); /* this is very short and quick */ -#endif /* DEVICE_POLLING */ - if (watchdog_enabled > 0 && --watchdog_ticks <= 0) - watchdog_fire(); -} - -void -hardclock_cnt(int cnt, int usermode) -{ - struct pstats *pstats; - struct thread *td = curthread; - struct proc *p = td->td_proc; int *t = DPCPU_PTR(pcputicks); int flags, global, newticks; int i; /* * Update per-CPU and possibly global ticks values. */ *t += cnt; do { global = ticks; newticks = *t - global; if (newticks <= 0) { if (newticks < -1) *t = global - 1; newticks = 0; break; } } while (!atomic_cmpset_int(&ticks, global, *t)); /* * Run current process's virtual and profile time, as needed. */ pstats = p->p_stats; flags = 0; if (usermode && timevalisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value)) { PROC_ITIMLOCK(p); if (itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], tick * cnt) == 0) flags |= TDF_ALRMPEND | TDF_ASTPENDING; PROC_ITIMUNLOCK(p); } if (timevalisset(&pstats->p_timer[ITIMER_PROF].it_value)) { PROC_ITIMLOCK(p); if (itimerdecr(&pstats->p_timer[ITIMER_PROF], tick * cnt) == 0) flags |= TDF_PROFPEND | TDF_ASTPENDING; PROC_ITIMUNLOCK(p); } if (flags != 0) { thread_lock(td); td->td_flags |= flags; thread_unlock(td); } #ifdef HWPMC_HOOKS if (PMC_CPU_HAS_SAMPLES(PCPU_GET(cpuid))) PMC_CALL_HOOK_UNLOCKED(curthread, PMC_FN_DO_SAMPLES, NULL); if (td->td_intr_frame != NULL) PMC_SOFT_CALL_TF( , , clock, hard, td->td_intr_frame); #endif /* We are in charge to handle this tick duty. */ if (newticks > 0) { tc_ticktock(newticks); #ifdef DEVICE_POLLING /* Dangerous and no need to call these things concurrently. */ if (atomic_cmpset_acq_int(&devpoll_run, 0, 1)) { /* This is very short and quick. */ hardclock_device_poll(); atomic_store_rel_int(&devpoll_run, 0); } #endif /* DEVICE_POLLING */ if (watchdog_enabled > 0) { i = atomic_fetchadd_int(&watchdog_ticks, -newticks); if (i > 0 && i <= newticks) watchdog_fire(); } } if (curcpu == CPU_FIRST()) cpu_tick_calibration(); if (__predict_false(DPCPU_GET(epoch_cb_count))) GROUPTASK_ENQUEUE(DPCPU_PTR(epoch_cb_task)); } void hardclock_sync(int cpu) { int *t; KASSERT(!CPU_ABSENT(cpu), ("Absent CPU %d", cpu)); t = DPCPU_ID_PTR(cpu, pcputicks); *t = ticks; } /* * Compute number of ticks in the specified amount of time. */ int tvtohz(struct timeval *tv) { unsigned long ticks; long sec, usec; /* * If the number of usecs in the whole seconds part of the time * difference fits in a long, then the total number of usecs will * fit in an unsigned long. Compute the total and convert it to * ticks, rounding up and adding 1 to allow for the current tick * to expire. Rounding also depends on unsigned long arithmetic * to avoid overflow. * * Otherwise, if the number of ticks in the whole seconds part of * the time difference fits in a long, then convert the parts to * ticks separately and add, using similar rounding methods and * overflow avoidance. This method would work in the previous * case but it is slightly slower and assumes that hz is integral. * * Otherwise, round the time difference down to the maximum * representable value. * * If ints have 32 bits, then the maximum value for any timeout in * 10ms ticks is 248 days. */ sec = tv->tv_sec; usec = tv->tv_usec; if (usec < 0) { sec--; usec += 1000000; } if (sec < 0) { #ifdef DIAGNOSTIC if (usec > 0) { sec++; usec -= 1000000; } printf("tvotohz: negative time difference %ld sec %ld usec\n", sec, usec); #endif ticks = 1; } else if (sec <= LONG_MAX / 1000000) ticks = howmany(sec * 1000000 + (unsigned long)usec, tick) + 1; else if (sec <= LONG_MAX / hz) ticks = sec * hz + howmany((unsigned long)usec, tick) + 1; else ticks = LONG_MAX; if (ticks > INT_MAX) ticks = INT_MAX; return ((int)ticks); } /* * Start profiling on a process. * * Kernel profiling passes proc0 which never exits and hence * keeps the profile clock running constantly. */ void startprofclock(struct proc *p) { PROC_LOCK_ASSERT(p, MA_OWNED); if (p->p_flag & P_STOPPROF) return; if ((p->p_flag & P_PROFIL) == 0) { p->p_flag |= P_PROFIL; mtx_lock(&time_lock); if (++profprocs == 1) cpu_startprofclock(); mtx_unlock(&time_lock); } } /* * Stop profiling on a process. */ void stopprofclock(struct proc *p) { PROC_LOCK_ASSERT(p, MA_OWNED); if (p->p_flag & P_PROFIL) { if (p->p_profthreads != 0) { while (p->p_profthreads != 0) { p->p_flag |= P_STOPPROF; msleep(&p->p_profthreads, &p->p_mtx, PPAUSE, "stopprof", 0); } } if ((p->p_flag & P_PROFIL) == 0) return; p->p_flag &= ~P_PROFIL; mtx_lock(&time_lock); if (--profprocs == 0) cpu_stopprofclock(); mtx_unlock(&time_lock); } } /* * Statistics clock. Updates rusage information and calls the scheduler * to adjust priorities of the active thread. * * This should be called by all active processors. */ void -statclock(int usermode) +statclock(int cnt, int usermode) { - - statclock_cnt(1, usermode); -} - -void -statclock_cnt(int cnt, int usermode) -{ struct rusage *ru; struct vmspace *vm; struct thread *td; struct proc *p; long rss; long *cp_time; td = curthread; p = td->td_proc; cp_time = (long *)PCPU_PTR(cp_time); if (usermode) { /* * Charge the time as appropriate. */ td->td_uticks += cnt; if (p->p_nice > NZERO) cp_time[CP_NICE] += cnt; else cp_time[CP_USER] += cnt; } else { /* * Came from kernel mode, so we were: * - handling an interrupt, * - doing syscall or trap work on behalf of the current * user process, or * - spinning in the idle loop. * Whichever it is, charge the time as appropriate. * Note that we charge interrupts to the current process, * regardless of whether they are ``for'' that process, * so that we know how much of its real time was spent * in ``non-process'' (i.e., interrupt) work. */ if ((td->td_pflags & TDP_ITHREAD) || td->td_intr_nesting_level >= 2) { td->td_iticks += cnt; cp_time[CP_INTR] += cnt; } else { td->td_pticks += cnt; td->td_sticks += cnt; if (!TD_IS_IDLETHREAD(td)) cp_time[CP_SYS] += cnt; else cp_time[CP_IDLE] += cnt; } } /* Update resource usage integrals and maximums. */ MPASS(p->p_vmspace != NULL); vm = p->p_vmspace; ru = &td->td_ru; ru->ru_ixrss += pgtok(vm->vm_tsize) * cnt; ru->ru_idrss += pgtok(vm->vm_dsize) * cnt; ru->ru_isrss += pgtok(vm->vm_ssize) * cnt; rss = pgtok(vmspace_resident_count(vm)); if (ru->ru_maxrss < rss) ru->ru_maxrss = rss; KTR_POINT2(KTR_SCHED, "thread", sched_tdname(td), "statclock", "prio:%d", td->td_priority, "stathz:%d", (stathz)?stathz:hz); SDT_PROBE2(sched, , , tick, td, td->td_proc); thread_lock_flags(td, MTX_QUIET); for ( ; cnt > 0; cnt--) sched_clock(td); thread_unlock(td); #ifdef HWPMC_HOOKS if (td->td_intr_frame != NULL) PMC_SOFT_CALL_TF( , , clock, stat, td->td_intr_frame); #endif } void -profclock(int usermode, uintfptr_t pc) -{ - - profclock_cnt(1, usermode, pc); -} - -void -profclock_cnt(int cnt, int usermode, uintfptr_t pc) +profclock(int cnt, int usermode, uintfptr_t pc) { struct thread *td; #ifdef GPROF struct gmonparam *g; uintfptr_t i; #endif td = curthread; if (usermode) { /* * Came from user mode; CPU was in user state. * If this process is being profiled, record the tick. * if there is no related user location yet, don't * bother trying to count it. */ if (td->td_proc->p_flag & P_PROFIL) addupc_intr(td, pc, cnt); } #ifdef GPROF else { /* * Kernel statistics are just like addupc_intr, only easier. */ g = &_gmonparam; if (g->state == GMON_PROF_ON && pc >= g->lowpc) { i = PC_TO_I(g, pc); if (i < g->textsize) { KCOUNT(g, i) += cnt; } } } #endif #ifdef HWPMC_HOOKS if (td->td_intr_frame != NULL) PMC_SOFT_CALL_TF( , , clock, prof, td->td_intr_frame); #endif } /* * Return information about system clocks. */ static int sysctl_kern_clockrate(SYSCTL_HANDLER_ARGS) { struct clockinfo clkinfo; /* * Construct clockinfo structure. */ bzero(&clkinfo, sizeof(clkinfo)); clkinfo.hz = hz; clkinfo.tick = tick; clkinfo.profhz = profhz; clkinfo.stathz = stathz ? stathz : hz; return (sysctl_handle_opaque(oidp, &clkinfo, sizeof clkinfo, req)); } SYSCTL_PROC(_kern, KERN_CLOCKRATE, clockrate, CTLTYPE_STRUCT|CTLFLAG_RD|CTLFLAG_MPSAFE, 0, 0, sysctl_kern_clockrate, "S,clockinfo", "Rate and period of various kernel clocks"); static void watchdog_config(void *unused __unused, u_int cmd, int *error) { u_int u; u = cmd & WD_INTERVAL; if (u >= WD_TO_1SEC) { watchdog_ticks = (1 << (u - WD_TO_1SEC)) * hz; watchdog_enabled = 1; *error = 0; } else { watchdog_enabled = 0; } } /* * Handle a watchdog timeout by dumping interrupt information and * then either dropping to DDB or panicking. */ static void watchdog_fire(void) { int nintr; uint64_t inttotal; u_long *curintr; char *curname; curintr = intrcnt; curname = intrnames; inttotal = 0; nintr = sintrcnt / sizeof(u_long); printf("interrupt total\n"); while (--nintr >= 0) { if (*curintr) printf("%-12s %20lu\n", curname, *curintr); curname += strlen(curname) + 1; inttotal += *curintr++; } printf("Total %20ju\n", (uintmax_t)inttotal); #if defined(KDB) && !defined(KDB_UNATTENDED) kdb_backtrace(); kdb_enter(KDB_WHY_WATCHDOG, "watchdog timeout"); #else panic("watchdog timeout"); #endif } Index: head/sys/kern/kern_clocksource.c =================================================================== --- head/sys/kern/kern_clocksource.c (revision 338486) +++ head/sys/kern/kern_clocksource.c (revision 338487) @@ -1,989 +1,989 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2010-2013 Alexander Motin * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer, * without modification, immediately at the beginning of the file. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); /* * Common routines to manage event timers hardware. */ #include "opt_device_polling.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include int cpu_disable_c2_sleep = 0; /* Timer dies in C2. */ int cpu_disable_c3_sleep = 0; /* Timer dies in C3. */ static void setuptimer(void); static void loadtimer(sbintime_t now, int first); static int doconfigtimer(void); static void configtimer(int start); static int round_freq(struct eventtimer *et, int freq); static sbintime_t getnextcpuevent(int idle); static sbintime_t getnextevent(void); static int handleevents(sbintime_t now, int fake); static struct mtx et_hw_mtx; #define ET_HW_LOCK(state) \ { \ if (timer->et_flags & ET_FLAGS_PERCPU) \ mtx_lock_spin(&(state)->et_hw_mtx); \ else \ mtx_lock_spin(&et_hw_mtx); \ } #define ET_HW_UNLOCK(state) \ { \ if (timer->et_flags & ET_FLAGS_PERCPU) \ mtx_unlock_spin(&(state)->et_hw_mtx); \ else \ mtx_unlock_spin(&et_hw_mtx); \ } static struct eventtimer *timer = NULL; static sbintime_t timerperiod; /* Timer period for periodic mode. */ static sbintime_t statperiod; /* statclock() events period. */ static sbintime_t profperiod; /* profclock() events period. */ static sbintime_t nexttick; /* Next global timer tick time. */ static u_int busy = 1; /* Reconfiguration is in progress. */ static int profiling; /* Profiling events enabled. */ static char timername[32]; /* Wanted timer. */ TUNABLE_STR("kern.eventtimer.timer", timername, sizeof(timername)); static int singlemul; /* Multiplier for periodic mode. */ SYSCTL_INT(_kern_eventtimer, OID_AUTO, singlemul, CTLFLAG_RWTUN, &singlemul, 0, "Multiplier for periodic mode"); static u_int idletick; /* Run periodic events when idle. */ SYSCTL_UINT(_kern_eventtimer, OID_AUTO, idletick, CTLFLAG_RWTUN, &idletick, 0, "Run periodic events when idle"); static int periodic; /* Periodic or one-shot mode. */ static int want_periodic; /* What mode to prefer. */ TUNABLE_INT("kern.eventtimer.periodic", &want_periodic); struct pcpu_state { struct mtx et_hw_mtx; /* Per-CPU timer mutex. */ u_int action; /* Reconfiguration requests. */ u_int handle; /* Immediate handle resuests. */ sbintime_t now; /* Last tick time. */ sbintime_t nextevent; /* Next scheduled event on this CPU. */ sbintime_t nexttick; /* Next timer tick time. */ sbintime_t nexthard; /* Next hardclock() event. */ sbintime_t nextstat; /* Next statclock() event. */ sbintime_t nextprof; /* Next profclock() event. */ sbintime_t nextcall; /* Next callout event. */ sbintime_t nextcallopt; /* Next optional callout event. */ int ipi; /* This CPU needs IPI. */ int idle; /* This CPU is in idle mode. */ }; DPCPU_DEFINE_STATIC(struct pcpu_state, timerstate); DPCPU_DEFINE(sbintime_t, hardclocktime); /* * Timer broadcast IPI handler. */ int hardclockintr(void) { sbintime_t now; struct pcpu_state *state; int done; if (doconfigtimer() || busy) return (FILTER_HANDLED); state = DPCPU_PTR(timerstate); now = state->now; CTR3(KTR_SPARE2, "ipi at %d: now %d.%08x", curcpu, (int)(now >> 32), (u_int)(now & 0xffffffff)); done = handleevents(now, 0); return (done ? FILTER_HANDLED : FILTER_STRAY); } /* * Handle all events for specified time on this CPU */ static int handleevents(sbintime_t now, int fake) { sbintime_t t, *hct; struct trapframe *frame; struct pcpu_state *state; int usermode; int done, runs; CTR3(KTR_SPARE2, "handle at %d: now %d.%08x", curcpu, (int)(now >> 32), (u_int)(now & 0xffffffff)); done = 0; if (fake) { frame = NULL; usermode = 0; } else { frame = curthread->td_intr_frame; usermode = TRAPF_USERMODE(frame); } state = DPCPU_PTR(timerstate); runs = 0; while (now >= state->nexthard) { state->nexthard += tick_sbt; runs++; } if (runs) { hct = DPCPU_PTR(hardclocktime); *hct = state->nexthard - tick_sbt; if (fake < 2) { - hardclock_cnt(runs, usermode); + hardclock(runs, usermode); done = 1; } } runs = 0; while (now >= state->nextstat) { state->nextstat += statperiod; runs++; } if (runs && fake < 2) { - statclock_cnt(runs, usermode); + statclock(runs, usermode); done = 1; } if (profiling) { runs = 0; while (now >= state->nextprof) { state->nextprof += profperiod; runs++; } if (runs && !fake) { - profclock_cnt(runs, usermode, TRAPF_PC(frame)); + profclock(runs, usermode, TRAPF_PC(frame)); done = 1; } } else state->nextprof = state->nextstat; if (now >= state->nextcallopt || now >= state->nextcall) { state->nextcall = state->nextcallopt = SBT_MAX; callout_process(now); } t = getnextcpuevent(0); ET_HW_LOCK(state); if (!busy) { state->idle = 0; state->nextevent = t; loadtimer(now, (fake == 2) && (timer->et_flags & ET_FLAGS_PERCPU)); } ET_HW_UNLOCK(state); return (done); } /* * Schedule binuptime of the next event on current CPU. */ static sbintime_t getnextcpuevent(int idle) { sbintime_t event; struct pcpu_state *state; u_int hardfreq; state = DPCPU_PTR(timerstate); /* Handle hardclock() events, skipping some if CPU is idle. */ event = state->nexthard; if (idle) { hardfreq = (u_int)hz / 2; if (tc_min_ticktock_freq > 2 #ifdef SMP && curcpu == CPU_FIRST() #endif ) hardfreq = hz / tc_min_ticktock_freq; if (hardfreq > 1) event += tick_sbt * (hardfreq - 1); } /* Handle callout events. */ if (event > state->nextcall) event = state->nextcall; if (!idle) { /* If CPU is active - handle other types of events. */ if (event > state->nextstat) event = state->nextstat; if (profiling && event > state->nextprof) event = state->nextprof; } return (event); } /* * Schedule binuptime of the next event on all CPUs. */ static sbintime_t getnextevent(void) { struct pcpu_state *state; sbintime_t event; #ifdef SMP int cpu; #endif #ifdef KTR int c; c = -1; #endif state = DPCPU_PTR(timerstate); event = state->nextevent; #ifdef SMP if ((timer->et_flags & ET_FLAGS_PERCPU) == 0) { CPU_FOREACH(cpu) { state = DPCPU_ID_PTR(cpu, timerstate); if (event > state->nextevent) { event = state->nextevent; #ifdef KTR c = cpu; #endif } } } #endif CTR4(KTR_SPARE2, "next at %d: next %d.%08x by %d", curcpu, (int)(event >> 32), (u_int)(event & 0xffffffff), c); return (event); } /* Hardware timer callback function. */ static void timercb(struct eventtimer *et, void *arg) { sbintime_t now; sbintime_t *next; struct pcpu_state *state; #ifdef SMP int cpu, bcast; #endif /* Do not touch anything if somebody reconfiguring timers. */ if (busy) return; /* Update present and next tick times. */ state = DPCPU_PTR(timerstate); if (et->et_flags & ET_FLAGS_PERCPU) { next = &state->nexttick; } else next = &nexttick; now = sbinuptime(); if (periodic) *next = now + timerperiod; else *next = -1; /* Next tick is not scheduled yet. */ state->now = now; CTR3(KTR_SPARE2, "intr at %d: now %d.%08x", curcpu, (int)(now >> 32), (u_int)(now & 0xffffffff)); #ifdef SMP #ifdef EARLY_AP_STARTUP MPASS(mp_ncpus == 1 || smp_started); #endif /* Prepare broadcasting to other CPUs for non-per-CPU timers. */ bcast = 0; #ifdef EARLY_AP_STARTUP if ((et->et_flags & ET_FLAGS_PERCPU) == 0) { #else if ((et->et_flags & ET_FLAGS_PERCPU) == 0 && smp_started) { #endif CPU_FOREACH(cpu) { state = DPCPU_ID_PTR(cpu, timerstate); ET_HW_LOCK(state); state->now = now; if (now >= state->nextevent) { state->nextevent += SBT_1S; if (curcpu != cpu) { state->ipi = 1; bcast = 1; } } ET_HW_UNLOCK(state); } } #endif /* Handle events for this time on this CPU. */ handleevents(now, 0); #ifdef SMP /* Broadcast interrupt to other CPUs for non-per-CPU timers. */ if (bcast) { CPU_FOREACH(cpu) { if (curcpu == cpu) continue; state = DPCPU_ID_PTR(cpu, timerstate); if (state->ipi) { state->ipi = 0; ipi_cpu(cpu, IPI_HARDCLOCK); } } } #endif } /* * Load new value into hardware timer. */ static void loadtimer(sbintime_t now, int start) { struct pcpu_state *state; sbintime_t new; sbintime_t *next; uint64_t tmp; int eq; if (timer->et_flags & ET_FLAGS_PERCPU) { state = DPCPU_PTR(timerstate); next = &state->nexttick; } else next = &nexttick; if (periodic) { if (start) { /* * Try to start all periodic timers aligned * to period to make events synchronous. */ tmp = now % timerperiod; new = timerperiod - tmp; if (new < tmp) /* Left less then passed. */ new += timerperiod; CTR5(KTR_SPARE2, "load p at %d: now %d.%08x first in %d.%08x", curcpu, (int)(now >> 32), (u_int)(now & 0xffffffff), (int)(new >> 32), (u_int)(new & 0xffffffff)); *next = new + now; et_start(timer, new, timerperiod); } } else { new = getnextevent(); eq = (new == *next); CTR4(KTR_SPARE2, "load at %d: next %d.%08x eq %d", curcpu, (int)(new >> 32), (u_int)(new & 0xffffffff), eq); if (!eq) { *next = new; et_start(timer, new - now, 0); } } } /* * Prepare event timer parameters after configuration changes. */ static void setuptimer(void) { int freq; if (periodic && (timer->et_flags & ET_FLAGS_PERIODIC) == 0) periodic = 0; else if (!periodic && (timer->et_flags & ET_FLAGS_ONESHOT) == 0) periodic = 1; singlemul = MIN(MAX(singlemul, 1), 20); freq = hz * singlemul; while (freq < (profiling ? profhz : stathz)) freq += hz; freq = round_freq(timer, freq); timerperiod = SBT_1S / freq; } /* * Reconfigure specified per-CPU timer on other CPU. Called from IPI handler. */ static int doconfigtimer(void) { sbintime_t now; struct pcpu_state *state; state = DPCPU_PTR(timerstate); switch (atomic_load_acq_int(&state->action)) { case 1: now = sbinuptime(); ET_HW_LOCK(state); loadtimer(now, 1); ET_HW_UNLOCK(state); state->handle = 0; atomic_store_rel_int(&state->action, 0); return (1); case 2: ET_HW_LOCK(state); et_stop(timer); ET_HW_UNLOCK(state); state->handle = 0; atomic_store_rel_int(&state->action, 0); return (1); } if (atomic_readandclear_int(&state->handle) && !busy) { now = sbinuptime(); handleevents(now, 0); return (1); } return (0); } /* * Reconfigure specified timer. * For per-CPU timers use IPI to make other CPUs to reconfigure. */ static void configtimer(int start) { sbintime_t now, next; struct pcpu_state *state; int cpu; if (start) { setuptimer(); now = sbinuptime(); } else now = 0; critical_enter(); ET_HW_LOCK(DPCPU_PTR(timerstate)); if (start) { /* Initialize time machine parameters. */ next = now + timerperiod; if (periodic) nexttick = next; else nexttick = -1; #ifdef EARLY_AP_STARTUP MPASS(mp_ncpus == 1 || smp_started); #endif CPU_FOREACH(cpu) { state = DPCPU_ID_PTR(cpu, timerstate); state->now = now; #ifndef EARLY_AP_STARTUP if (!smp_started && cpu != CPU_FIRST()) state->nextevent = SBT_MAX; else #endif state->nextevent = next; if (periodic) state->nexttick = next; else state->nexttick = -1; state->nexthard = next; state->nextstat = next; state->nextprof = next; state->nextcall = next; state->nextcallopt = next; hardclock_sync(cpu); } busy = 0; /* Start global timer or per-CPU timer of this CPU. */ loadtimer(now, 1); } else { busy = 1; /* Stop global timer or per-CPU timer of this CPU. */ et_stop(timer); } ET_HW_UNLOCK(DPCPU_PTR(timerstate)); #ifdef SMP #ifdef EARLY_AP_STARTUP /* If timer is global we are done. */ if ((timer->et_flags & ET_FLAGS_PERCPU) == 0) { #else /* If timer is global or there is no other CPUs yet - we are done. */ if ((timer->et_flags & ET_FLAGS_PERCPU) == 0 || !smp_started) { #endif critical_exit(); return; } /* Set reconfigure flags for other CPUs. */ CPU_FOREACH(cpu) { state = DPCPU_ID_PTR(cpu, timerstate); atomic_store_rel_int(&state->action, (cpu == curcpu) ? 0 : ( start ? 1 : 2)); } /* Broadcast reconfigure IPI. */ ipi_all_but_self(IPI_HARDCLOCK); /* Wait for reconfiguration completed. */ restart: cpu_spinwait(); CPU_FOREACH(cpu) { if (cpu == curcpu) continue; state = DPCPU_ID_PTR(cpu, timerstate); if (atomic_load_acq_int(&state->action)) goto restart; } #endif critical_exit(); } /* * Calculate nearest frequency supported by hardware timer. */ static int round_freq(struct eventtimer *et, int freq) { uint64_t div; if (et->et_frequency != 0) { div = lmax((et->et_frequency + freq / 2) / freq, 1); if (et->et_flags & ET_FLAGS_POW2DIV) div = 1 << (flsl(div + div / 2) - 1); freq = (et->et_frequency + div / 2) / div; } if (et->et_min_period > SBT_1S) panic("Event timer \"%s\" doesn't support sub-second periods!", et->et_name); else if (et->et_min_period != 0) freq = min(freq, SBT2FREQ(et->et_min_period)); if (et->et_max_period < SBT_1S && et->et_max_period != 0) freq = max(freq, SBT2FREQ(et->et_max_period)); return (freq); } /* * Configure and start event timers (BSP part). */ void cpu_initclocks_bsp(void) { struct pcpu_state *state; int base, div, cpu; mtx_init(&et_hw_mtx, "et_hw_mtx", NULL, MTX_SPIN); CPU_FOREACH(cpu) { state = DPCPU_ID_PTR(cpu, timerstate); mtx_init(&state->et_hw_mtx, "et_hw_mtx", NULL, MTX_SPIN); state->nextcall = SBT_MAX; state->nextcallopt = SBT_MAX; } periodic = want_periodic; /* Grab requested timer or the best of present. */ if (timername[0]) timer = et_find(timername, 0, 0); if (timer == NULL && periodic) { timer = et_find(NULL, ET_FLAGS_PERIODIC, ET_FLAGS_PERIODIC); } if (timer == NULL) { timer = et_find(NULL, ET_FLAGS_ONESHOT, ET_FLAGS_ONESHOT); } if (timer == NULL && !periodic) { timer = et_find(NULL, ET_FLAGS_PERIODIC, ET_FLAGS_PERIODIC); } if (timer == NULL) panic("No usable event timer found!"); et_init(timer, timercb, NULL, NULL); /* Adapt to timer capabilities. */ if (periodic && (timer->et_flags & ET_FLAGS_PERIODIC) == 0) periodic = 0; else if (!periodic && (timer->et_flags & ET_FLAGS_ONESHOT) == 0) periodic = 1; if (timer->et_flags & ET_FLAGS_C3STOP) cpu_disable_c3_sleep++; /* * We honor the requested 'hz' value. * We want to run stathz in the neighborhood of 128hz. * We would like profhz to run as often as possible. */ if (singlemul <= 0 || singlemul > 20) { if (hz >= 1500 || (hz % 128) == 0) singlemul = 1; else if (hz >= 750) singlemul = 2; else singlemul = 4; } if (periodic) { base = round_freq(timer, hz * singlemul); singlemul = max((base + hz / 2) / hz, 1); hz = (base + singlemul / 2) / singlemul; if (base <= 128) stathz = base; else { div = base / 128; if (div >= singlemul && (div % singlemul) == 0) div++; stathz = base / div; } profhz = stathz; while ((profhz + stathz) <= 128 * 64) profhz += stathz; profhz = round_freq(timer, profhz); } else { hz = round_freq(timer, hz); stathz = round_freq(timer, 127); profhz = round_freq(timer, stathz * 64); } tick = 1000000 / hz; tick_sbt = SBT_1S / hz; tick_bt = sbttobt(tick_sbt); statperiod = SBT_1S / stathz; profperiod = SBT_1S / profhz; ET_LOCK(); configtimer(1); ET_UNLOCK(); } /* * Start per-CPU event timers on APs. */ void cpu_initclocks_ap(void) { sbintime_t now; struct pcpu_state *state; struct thread *td; state = DPCPU_PTR(timerstate); now = sbinuptime(); ET_HW_LOCK(state); state->now = now; hardclock_sync(curcpu); spinlock_enter(); ET_HW_UNLOCK(state); td = curthread; td->td_intr_nesting_level++; handleevents(state->now, 2); td->td_intr_nesting_level--; spinlock_exit(); } void suspendclock(void) { ET_LOCK(); configtimer(0); ET_UNLOCK(); } void resumeclock(void) { ET_LOCK(); configtimer(1); ET_UNLOCK(); } /* * Switch to profiling clock rates. */ void cpu_startprofclock(void) { ET_LOCK(); if (profiling == 0) { if (periodic) { configtimer(0); profiling = 1; configtimer(1); } else profiling = 1; } else profiling++; ET_UNLOCK(); } /* * Switch to regular clock rates. */ void cpu_stopprofclock(void) { ET_LOCK(); if (profiling == 1) { if (periodic) { configtimer(0); profiling = 0; configtimer(1); } else profiling = 0; } else profiling--; ET_UNLOCK(); } /* * Switch to idle mode (all ticks handled). */ sbintime_t cpu_idleclock(void) { sbintime_t now, t; struct pcpu_state *state; if (idletick || busy || (periodic && (timer->et_flags & ET_FLAGS_PERCPU)) #ifdef DEVICE_POLLING || curcpu == CPU_FIRST() #endif ) return (-1); state = DPCPU_PTR(timerstate); if (periodic) now = state->now; else now = sbinuptime(); CTR3(KTR_SPARE2, "idle at %d: now %d.%08x", curcpu, (int)(now >> 32), (u_int)(now & 0xffffffff)); t = getnextcpuevent(1); ET_HW_LOCK(state); state->idle = 1; state->nextevent = t; if (!periodic) loadtimer(now, 0); ET_HW_UNLOCK(state); return (MAX(t - now, 0)); } /* * Switch to active mode (skip empty ticks). */ void cpu_activeclock(void) { sbintime_t now; struct pcpu_state *state; struct thread *td; state = DPCPU_PTR(timerstate); if (state->idle == 0 || busy) return; if (periodic) now = state->now; else now = sbinuptime(); CTR3(KTR_SPARE2, "active at %d: now %d.%08x", curcpu, (int)(now >> 32), (u_int)(now & 0xffffffff)); spinlock_enter(); td = curthread; td->td_intr_nesting_level++; handleevents(now, 1); td->td_intr_nesting_level--; spinlock_exit(); } /* * Change the frequency of the given timer. This changes et->et_frequency and * if et is the active timer it reconfigures the timer on all CPUs. This is * intended to be a private interface for the use of et_change_frequency() only. */ void cpu_et_frequency(struct eventtimer *et, uint64_t newfreq) { ET_LOCK(); if (et == timer) { configtimer(0); et->et_frequency = newfreq; configtimer(1); } else et->et_frequency = newfreq; ET_UNLOCK(); } void cpu_new_callout(int cpu, sbintime_t bt, sbintime_t bt_opt) { struct pcpu_state *state; /* Do not touch anything if somebody reconfiguring timers. */ if (busy) return; CTR6(KTR_SPARE2, "new co at %d: on %d at %d.%08x - %d.%08x", curcpu, cpu, (int)(bt_opt >> 32), (u_int)(bt_opt & 0xffffffff), (int)(bt >> 32), (u_int)(bt & 0xffffffff)); KASSERT(!CPU_ABSENT(cpu), ("Absent CPU %d", cpu)); state = DPCPU_ID_PTR(cpu, timerstate); ET_HW_LOCK(state); /* * If there is callout time already set earlier -- do nothing. * This check may appear redundant because we check already in * callout_process() but this double check guarantees we're safe * with respect to race conditions between interrupts execution * and scheduling. */ state->nextcallopt = bt_opt; if (bt >= state->nextcall) goto done; state->nextcall = bt; /* If there is some other event set earlier -- do nothing. */ if (bt >= state->nextevent) goto done; state->nextevent = bt; /* If timer is periodic -- there is nothing to reprogram. */ if (periodic) goto done; /* If timer is global or of the current CPU -- reprogram it. */ if ((timer->et_flags & ET_FLAGS_PERCPU) == 0 || cpu == curcpu) { loadtimer(sbinuptime(), 0); done: ET_HW_UNLOCK(state); return; } /* Otherwise make other CPU to reprogram it. */ state->handle = 1; ET_HW_UNLOCK(state); #ifdef SMP ipi_cpu(cpu, IPI_HARDCLOCK); #endif } /* * Report or change the active event timers hardware. */ static int sysctl_kern_eventtimer_timer(SYSCTL_HANDLER_ARGS) { char buf[32]; struct eventtimer *et; int error; ET_LOCK(); et = timer; snprintf(buf, sizeof(buf), "%s", et->et_name); ET_UNLOCK(); error = sysctl_handle_string(oidp, buf, sizeof(buf), req); ET_LOCK(); et = timer; if (error != 0 || req->newptr == NULL || strcasecmp(buf, et->et_name) == 0) { ET_UNLOCK(); return (error); } et = et_find(buf, 0, 0); if (et == NULL) { ET_UNLOCK(); return (ENOENT); } configtimer(0); et_free(timer); if (et->et_flags & ET_FLAGS_C3STOP) cpu_disable_c3_sleep++; if (timer->et_flags & ET_FLAGS_C3STOP) cpu_disable_c3_sleep--; periodic = want_periodic; timer = et; et_init(timer, timercb, NULL, NULL); configtimer(1); ET_UNLOCK(); return (error); } SYSCTL_PROC(_kern_eventtimer, OID_AUTO, timer, CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 0, sysctl_kern_eventtimer_timer, "A", "Chosen event timer"); /* * Report or change the active event timer periodicity. */ static int sysctl_kern_eventtimer_periodic(SYSCTL_HANDLER_ARGS) { int error, val; val = periodic; error = sysctl_handle_int(oidp, &val, 0, req); if (error != 0 || req->newptr == NULL) return (error); ET_LOCK(); configtimer(0); periodic = want_periodic = val; configtimer(1); ET_UNLOCK(); return (error); } SYSCTL_PROC(_kern_eventtimer, OID_AUTO, periodic, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 0, sysctl_kern_eventtimer_periodic, "I", "Enable event timer periodic mode"); #include "opt_ddb.h" #ifdef DDB #include DB_SHOW_COMMAND(clocksource, db_show_clocksource) { struct pcpu_state *st; int c; CPU_FOREACH(c) { st = DPCPU_ID_PTR(c, timerstate); db_printf( "CPU %2d: action %d handle %d ipi %d idle %d\n" " now %#jx nevent %#jx (%jd)\n" " ntick %#jx (%jd) nhard %#jx (%jd)\n" " nstat %#jx (%jd) nprof %#jx (%jd)\n" " ncall %#jx (%jd) ncallopt %#jx (%jd)\n", c, st->action, st->handle, st->ipi, st->idle, (uintmax_t)st->now, (uintmax_t)st->nextevent, (uintmax_t)(st->nextevent - st->now) / tick_sbt, (uintmax_t)st->nexttick, (uintmax_t)(st->nexttick - st->now) / tick_sbt, (uintmax_t)st->nexthard, (uintmax_t)(st->nexthard - st->now) / tick_sbt, (uintmax_t)st->nextstat, (uintmax_t)(st->nextstat - st->now) / tick_sbt, (uintmax_t)st->nextprof, (uintmax_t)(st->nextprof - st->now) / tick_sbt, (uintmax_t)st->nextcall, (uintmax_t)(st->nextcall - st->now) / tick_sbt, (uintmax_t)st->nextcallopt, (uintmax_t)(st->nextcallopt - st->now) / tick_sbt); } } #endif Index: head/sys/sys/systm.h =================================================================== --- head/sys/sys/systm.h (revision 338486) +++ head/sys/sys/systm.h (revision 338487) @@ -1,547 +1,543 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1982, 1988, 1991, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)systm.h 8.7 (Berkeley) 3/29/95 * $FreeBSD$ */ #ifndef _SYS_SYSTM_H_ #define _SYS_SYSTM_H_ #include #include #include #include #include #include /* for people using printf mainly */ __NULLABILITY_PRAGMA_PUSH extern int cold; /* nonzero if we are doing a cold boot */ extern int suspend_blocked; /* block suspend due to pending shutdown */ extern int rebooting; /* kern_reboot() has been called. */ extern const char *panicstr; /* panic message */ extern char version[]; /* system version */ extern char compiler_version[]; /* compiler version */ extern char copyright[]; /* system copyright */ extern int kstack_pages; /* number of kernel stack pages */ extern u_long pagesizes[]; /* supported page sizes */ extern long physmem; /* physical memory */ extern long realmem; /* 'real' memory */ extern char *rootdevnames[2]; /* names of possible root devices */ extern int boothowto; /* reboot flags, from console subsystem */ extern int bootverbose; /* nonzero to print verbose messages */ extern int maxusers; /* system tune hint */ extern int ngroups_max; /* max # of supplemental groups */ extern int vm_guest; /* Running as virtual machine guest? */ /* * Detected virtual machine guest types. The intention is to expand * and/or add to the VM_GUEST_VM type if specific VM functionality is * ever implemented (e.g. vendor-specific paravirtualization features). * Keep in sync with vm_guest_sysctl_names[]. */ enum VM_GUEST { VM_GUEST_NO = 0, VM_GUEST_VM, VM_GUEST_XEN, VM_GUEST_HV, VM_GUEST_VMWARE, VM_GUEST_KVM, VM_GUEST_BHYVE, VM_LAST }; /* * These functions need to be declared before the KASSERT macro is invoked in * !KASSERT_PANIC_OPTIONAL builds, so their declarations are sort of out of * place compared to other function definitions in this header. On the other * hand, this header is a bit disorganized anyway. */ void panic(const char *, ...) __dead2 __printflike(1, 2); void vpanic(const char *, __va_list) __dead2 __printflike(1, 0); #if defined(WITNESS) || defined(INVARIANT_SUPPORT) #ifdef KASSERT_PANIC_OPTIONAL void kassert_panic(const char *fmt, ...) __printflike(1, 2); #else #define kassert_panic panic #endif #endif #ifdef INVARIANTS /* The option is always available */ #define KASSERT(exp,msg) do { \ if (__predict_false(!(exp))) \ kassert_panic msg; \ } while (0) #define VNASSERT(exp, vp, msg) do { \ if (__predict_false(!(exp))) { \ vn_printf(vp, "VNASSERT failed\n"); \ kassert_panic msg; \ } \ } while (0) #else #define KASSERT(exp,msg) do { \ } while (0) #define VNASSERT(exp, vp, msg) do { \ } while (0) #endif #ifndef CTASSERT /* Allow lint to override */ #define CTASSERT(x) _Static_assert(x, "compile-time assertion failed") #endif #if defined(_KERNEL) #include /* MAXCPU */ #include /* curthread */ #include #endif /* * Assert that a pointer can be loaded from memory atomically. * * This assertion enforces stronger alignment than necessary. For example, * on some architectures, atomicity for unaligned loads will depend on * whether or not the load spans multiple cache lines. */ #define ASSERT_ATOMIC_LOAD_PTR(var, msg) \ KASSERT(sizeof(var) == sizeof(void *) && \ ((uintptr_t)&(var) & (sizeof(void *) - 1)) == 0, msg) /* * Assert that a thread is in critical(9) section. */ #define CRITICAL_ASSERT(td) \ KASSERT((td)->td_critnest >= 1, ("Not in critical section")); /* * If we have already panic'd and this is the thread that called * panic(), then don't block on any mutexes but silently succeed. * Otherwise, the kernel will deadlock since the scheduler isn't * going to run the thread that holds any lock we need. */ #define SCHEDULER_STOPPED_TD(td) ({ \ MPASS((td) == curthread); \ __predict_false((td)->td_stopsched); \ }) #define SCHEDULER_STOPPED() SCHEDULER_STOPPED_TD(curthread) /* * Align variables. */ #define __read_mostly __section(".data.read_mostly") #define __read_frequently __section(".data.read_frequently") #define __exclusive_cache_line __aligned(CACHE_LINE_SIZE) \ __section(".data.exclusive_cache_line") /* * XXX the hints declarations are even more misplaced than most declarations * in this file, since they are needed in one file (per arch) and only used * in two files. * XXX most of these variables should be const. */ extern int osreldate; extern bool dynamic_kenv; extern struct mtx kenv_lock; extern char *kern_envp; extern char *md_envp; extern char static_env[]; extern char static_hints[]; /* by config for now */ extern char **kenvp; extern const void *zero_region; /* address space maps to a zeroed page */ extern int unmapped_buf_allowed; #ifdef __LP64__ #define IOSIZE_MAX iosize_max() #define DEVFS_IOSIZE_MAX devfs_iosize_max() #else #define IOSIZE_MAX SSIZE_MAX #define DEVFS_IOSIZE_MAX SSIZE_MAX #endif /* * General function declarations. */ struct inpcb; struct lock_object; struct malloc_type; struct mtx; struct proc; struct socket; struct thread; struct tty; struct ucred; struct uio; struct _jmp_buf; struct trapframe; struct eventtimer; int setjmp(struct _jmp_buf *) __returns_twice; void longjmp(struct _jmp_buf *, int) __dead2; int dumpstatus(vm_offset_t addr, off_t count); int nullop(void); int eopnotsupp(void); int ureadc(int, struct uio *); void hashdestroy(void *, struct malloc_type *, u_long); void *hashinit(int count, struct malloc_type *type, u_long *hashmask); void *hashinit_flags(int count, struct malloc_type *type, u_long *hashmask, int flags); #define HASH_NOWAIT 0x00000001 #define HASH_WAITOK 0x00000002 void *phashinit(int count, struct malloc_type *type, u_long *nentries); void *phashinit_flags(int count, struct malloc_type *type, u_long *nentries, int flags); void g_waitidle(void); void cpu_boot(int); void cpu_flush_dcache(void *, size_t); void cpu_rootconf(void); void critical_enter_KBI(void); void critical_exit_KBI(void); void critical_exit_preempt(void); void init_param1(void); void init_param2(long physpages); void init_static_kenv(char *, size_t); void tablefull(const char *); #if defined(KLD_MODULE) || defined(KTR_CRITICAL) || !defined(_KERNEL) || defined(GENOFFSET) #define critical_enter() critical_enter_KBI() #define critical_exit() critical_exit_KBI() #else static __inline void critical_enter(void) { struct thread_lite *td; td = (struct thread_lite *)curthread; td->td_critnest++; __compiler_membar(); } static __inline void critical_exit(void) { struct thread_lite *td; td = (struct thread_lite *)curthread; KASSERT(td->td_critnest != 0, ("critical_exit: td_critnest == 0")); __compiler_membar(); td->td_critnest--; __compiler_membar(); if (__predict_false(td->td_owepreempt)) critical_exit_preempt(); } #endif #ifdef EARLY_PRINTF typedef void early_putc_t(int ch); extern early_putc_t *early_putc; #endif int kvprintf(char const *, void (*)(int, void*), void *, int, __va_list) __printflike(1, 0); void log(int, const char *, ...) __printflike(2, 3); void log_console(struct uio *); void vlog(int, const char *, __va_list) __printflike(2, 0); int asprintf(char **ret, struct malloc_type *mtp, const char *format, ...) __printflike(3, 4); int printf(const char *, ...) __printflike(1, 2); int snprintf(char *, size_t, const char *, ...) __printflike(3, 4); int sprintf(char *buf, const char *, ...) __printflike(2, 3); int uprintf(const char *, ...) __printflike(1, 2); int vprintf(const char *, __va_list) __printflike(1, 0); int vasprintf(char **ret, struct malloc_type *mtp, const char *format, __va_list ap) __printflike(3, 0); int vsnprintf(char *, size_t, const char *, __va_list) __printflike(3, 0); int vsnrprintf(char *, size_t, int, const char *, __va_list) __printflike(4, 0); int vsprintf(char *buf, const char *, __va_list) __printflike(2, 0); int ttyprintf(struct tty *, const char *, ...) __printflike(2, 3); int sscanf(const char *, char const * _Nonnull, ...) __scanflike(2, 3); int vsscanf(const char * _Nonnull, char const * _Nonnull, __va_list) __scanflike(2, 0); long strtol(const char *, char **, int); u_long strtoul(const char *, char **, int); quad_t strtoq(const char *, char **, int); u_quad_t strtouq(const char *, char **, int); void tprintf(struct proc *p, int pri, const char *, ...) __printflike(3, 4); void vtprintf(struct proc *, int, const char *, __va_list) __printflike(3, 0); void hexdump(const void *ptr, int length, const char *hdr, int flags); #define HD_COLUMN_MASK 0xff #define HD_DELIM_MASK 0xff00 #define HD_OMIT_COUNT (1 << 16) #define HD_OMIT_HEX (1 << 17) #define HD_OMIT_CHARS (1 << 18) #define ovbcopy(f, t, l) bcopy((f), (t), (l)) void bcopy(const void * _Nonnull from, void * _Nonnull to, size_t len); #define bcopy(from, to, len) __builtin_memmove((to), (from), (len)) void bzero(void * _Nonnull buf, size_t len); #define bzero(buf, len) __builtin_memset((buf), 0, (len)) void explicit_bzero(void * _Nonnull, size_t); int bcmp(const void *b1, const void *b2, size_t len); #define bcmp(b1, b2, len) __builtin_memcmp((b1), (b2), (len)) void *memset(void * _Nonnull buf, int c, size_t len); #define memset(buf, c, len) __builtin_memset((buf), (c), (len)) void *memcpy(void * _Nonnull to, const void * _Nonnull from, size_t len); #define memcpy(to, from, len) __builtin_memcpy((to), (from), (len)) void *memmove(void * _Nonnull dest, const void * _Nonnull src, size_t n); #define memmove(dest, src, n) __builtin_memmove((dest), (src), (n)) int memcmp(const void *b1, const void *b2, size_t len); #define memcmp(b1, b2, len) __builtin_memcmp((b1), (b2), (len)) int copystr(const void * _Nonnull __restrict kfaddr, void * _Nonnull __restrict kdaddr, size_t len, size_t * __restrict lencopied); int copyinstr(const void * __restrict udaddr, void * _Nonnull __restrict kaddr, size_t len, size_t * __restrict lencopied); int copyin(const void * __restrict udaddr, void * _Nonnull __restrict kaddr, size_t len); int copyin_nofault(const void * __restrict udaddr, void * _Nonnull __restrict kaddr, size_t len); int copyout(const void * _Nonnull __restrict kaddr, void * __restrict udaddr, size_t len); int copyout_nofault(const void * _Nonnull __restrict kaddr, void * __restrict udaddr, size_t len); int fubyte(volatile const void *base); long fuword(volatile const void *base); int fuword16(volatile const void *base); int32_t fuword32(volatile const void *base); int64_t fuword64(volatile const void *base); int fueword(volatile const void *base, long *val); int fueword32(volatile const void *base, int32_t *val); int fueword64(volatile const void *base, int64_t *val); int subyte(volatile void *base, int byte); int suword(volatile void *base, long word); int suword16(volatile void *base, int word); int suword32(volatile void *base, int32_t word); int suword64(volatile void *base, int64_t word); uint32_t casuword32(volatile uint32_t *base, uint32_t oldval, uint32_t newval); u_long casuword(volatile u_long *p, u_long oldval, u_long newval); int casueword32(volatile uint32_t *base, uint32_t oldval, uint32_t *oldvalp, uint32_t newval); int casueword(volatile u_long *p, u_long oldval, u_long *oldvalp, u_long newval); void realitexpire(void *); int sysbeep(int hertz, int period); -void hardclock(int usermode, uintfptr_t pc); -void hardclock_cnt(int cnt, int usermode); -void hardclock_cpu(int usermode); +void hardclock(int cnt, int usermode); void hardclock_sync(int cpu); void softclock(void *); -void statclock(int usermode); -void statclock_cnt(int cnt, int usermode); -void profclock(int usermode, uintfptr_t pc); -void profclock_cnt(int cnt, int usermode, uintfptr_t pc); +void statclock(int cnt, int usermode); +void profclock(int cnt, int usermode, uintfptr_t pc); int hardclockintr(void); void startprofclock(struct proc *); void stopprofclock(struct proc *); void cpu_startprofclock(void); void cpu_stopprofclock(void); void suspendclock(void); void resumeclock(void); sbintime_t cpu_idleclock(void); void cpu_activeclock(void); void cpu_new_callout(int cpu, sbintime_t bt, sbintime_t bt_opt); void cpu_et_frequency(struct eventtimer *et, uint64_t newfreq); extern int cpu_disable_c2_sleep; extern int cpu_disable_c3_sleep; char *kern_getenv(const char *name); void freeenv(char *env); int getenv_int(const char *name, int *data); int getenv_uint(const char *name, unsigned int *data); int getenv_long(const char *name, long *data); int getenv_ulong(const char *name, unsigned long *data); int getenv_string(const char *name, char *data, int size); int getenv_int64(const char *name, int64_t *data); int getenv_uint64(const char *name, uint64_t *data); int getenv_quad(const char *name, quad_t *data); int kern_setenv(const char *name, const char *value); int kern_unsetenv(const char *name); int testenv(const char *name); int getenv_array(const char *name, void *data, int size, int *psize, int type_size, bool allow_signed); #define GETENV_UNSIGNED false /* negative numbers not allowed */ #define GETENV_SIGNED true /* negative numbers allowed */ typedef uint64_t (cpu_tick_f)(void); void set_cputicker(cpu_tick_f *func, uint64_t freq, unsigned var); extern cpu_tick_f *cpu_ticks; uint64_t cpu_tickrate(void); uint64_t cputick2usec(uint64_t tick); #ifdef APM_FIXUP_CALLTODO struct timeval; void adjust_timeout_calltodo(struct timeval *time_change); #endif /* APM_FIXUP_CALLTODO */ #include /* Initialize the world */ void consinit(void); void cpu_initclocks(void); void cpu_initclocks_bsp(void); void cpu_initclocks_ap(void); void usrinfoinit(void); /* Finalize the world */ void kern_reboot(int) __dead2; void shutdown_nice(int); /* Timeouts */ typedef void timeout_t(void *); /* timeout function type */ #define CALLOUT_HANDLE_INITIALIZER(handle) \ { NULL } void callout_handle_init(struct callout_handle *); struct callout_handle timeout(timeout_t *, void *, int); void untimeout(timeout_t *, void *, struct callout_handle); /* Stubs for obsolete functions that used to be for interrupt management */ static __inline intrmask_t splbio(void) { return 0; } static __inline intrmask_t splcam(void) { return 0; } static __inline intrmask_t splclock(void) { return 0; } static __inline intrmask_t splhigh(void) { return 0; } static __inline intrmask_t splimp(void) { return 0; } static __inline intrmask_t splnet(void) { return 0; } static __inline intrmask_t spltty(void) { return 0; } static __inline void splx(intrmask_t ipl __unused) { return; } /* * Common `proc' functions are declared here so that proc.h can be included * less often. */ int _sleep(void * _Nonnull chan, struct lock_object *lock, int pri, const char *wmesg, sbintime_t sbt, sbintime_t pr, int flags); #define msleep(chan, mtx, pri, wmesg, timo) \ _sleep((chan), &(mtx)->lock_object, (pri), (wmesg), \ tick_sbt * (timo), 0, C_HARDCLOCK) #define msleep_sbt(chan, mtx, pri, wmesg, bt, pr, flags) \ _sleep((chan), &(mtx)->lock_object, (pri), (wmesg), (bt), (pr), \ (flags)) int msleep_spin_sbt(void * _Nonnull chan, struct mtx *mtx, const char *wmesg, sbintime_t sbt, sbintime_t pr, int flags); #define msleep_spin(chan, mtx, wmesg, timo) \ msleep_spin_sbt((chan), (mtx), (wmesg), tick_sbt * (timo), \ 0, C_HARDCLOCK) int pause_sbt(const char *wmesg, sbintime_t sbt, sbintime_t pr, int flags); #define pause(wmesg, timo) \ pause_sbt((wmesg), tick_sbt * (timo), 0, C_HARDCLOCK) #define pause_sig(wmesg, timo) \ pause_sbt((wmesg), tick_sbt * (timo), 0, C_HARDCLOCK | C_CATCH) #define tsleep(chan, pri, wmesg, timo) \ _sleep((chan), NULL, (pri), (wmesg), tick_sbt * (timo), \ 0, C_HARDCLOCK) #define tsleep_sbt(chan, pri, wmesg, bt, pr, flags) \ _sleep((chan), NULL, (pri), (wmesg), (bt), (pr), (flags)) void wakeup(void * chan); void wakeup_one(void * chan); /* * Common `struct cdev *' stuff are declared here to avoid #include poisoning */ struct cdev; dev_t dev2udev(struct cdev *x); const char *devtoname(struct cdev *cdev); #ifdef __LP64__ size_t devfs_iosize_max(void); size_t iosize_max(void); #endif int poll_no_poll(int events); /* XXX: Should be void nanodelay(u_int nsec); */ void DELAY(int usec); /* Root mount holdback API */ struct root_hold_token; struct root_hold_token *root_mount_hold(const char *identifier); void root_mount_rel(struct root_hold_token *h); int root_mounted(void); /* * Unit number allocation API. (kern/subr_unit.c) */ struct unrhdr; struct unrhdr *new_unrhdr(int low, int high, struct mtx *mutex); void init_unrhdr(struct unrhdr *uh, int low, int high, struct mtx *mutex); void delete_unrhdr(struct unrhdr *uh); void clear_unrhdr(struct unrhdr *uh); void clean_unrhdr(struct unrhdr *uh); void clean_unrhdrl(struct unrhdr *uh); int alloc_unr(struct unrhdr *uh); int alloc_unr_specific(struct unrhdr *uh, u_int item); int alloc_unrl(struct unrhdr *uh); void free_unr(struct unrhdr *uh, u_int item); void intr_prof_stack_use(struct thread *td, struct trapframe *frame); void counted_warning(unsigned *counter, const char *msg); /* * APIs to manage deprecation and obsolescence. */ struct device; void _gone_in(int major, const char *msg); void _gone_in_dev(struct device *dev, int major, const char *msg); #ifdef NO_OBSOLETE_CODE #define __gone_ok(m, msg) \ _Static_assert(m < P_OSREL_MAJOR(__FreeBSD_version)), \ "Obsolete code" msg); #else #define __gone_ok(m, msg) #endif #define gone_in(major, msg) __gone_ok(major, msg) _gone_in(major, msg) #define gone_in_dev(dev, major, msg) __gone_ok(major, msg) _gone_in_dev(dev, major, msg) __NULLABILITY_PRAGMA_POP #endif /* !_SYS_SYSTM_H_ */