diff --git a/sys/kern/kern_timeout.c b/sys/kern/kern_timeout.c
index 992a093d30ab..3923c214be8d 100644
--- a/sys/kern/kern_timeout.c
+++ b/sys/kern/kern_timeout.c
@@ -1,1519 +1,1556 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1982, 1986, 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  * (c) UNIX System Laboratories, Inc.
  * All or some portions of this file are derived from material licensed
  * to the University of California by American Telephone and Telegraph
  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  * the permission of UNIX System Laboratories, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	From: @(#)kern_clock.c	8.5 (Berkeley) 1/21/94
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_callout_profiling.h"
 #include "opt_ddb.h"
 #include "opt_rss.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bus.h>
 #include <sys/callout.h>
 #include <sys/domainset.h>
 #include <sys/file.h>
 #include <sys/interrupt.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
+#include <sys/kthread.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
+#include <sys/sched.h>
 #include <sys/sdt.h>
 #include <sys/sleepqueue.h>
 #include <sys/sysctl.h>
 #include <sys/smp.h>
+#include <sys/unistd.h>
 
 #ifdef DDB
 #include <ddb/ddb.h>
 #include <ddb/db_sym.h>
 #include <machine/_inttypes.h>
 #endif
 
 #ifdef SMP
 #include <machine/cpu.h>
 #endif
 
 DPCPU_DECLARE(sbintime_t, hardclocktime);
 
 SDT_PROVIDER_DEFINE(callout_execute);
 SDT_PROBE_DEFINE1(callout_execute, , , callout__start, "struct callout *");
 SDT_PROBE_DEFINE1(callout_execute, , , callout__end, "struct callout *");
 
+static void	softclock_thread(void *arg);
+
 #ifdef CALLOUT_PROFILING
 static int avg_depth;
 SYSCTL_INT(_debug, OID_AUTO, to_avg_depth, CTLFLAG_RD, &avg_depth, 0,
     "Average number of items examined per softclock call. Units = 1/1000");
 static int avg_gcalls;
 SYSCTL_INT(_debug, OID_AUTO, to_avg_gcalls, CTLFLAG_RD, &avg_gcalls, 0,
     "Average number of Giant callouts made per softclock call. Units = 1/1000");
 static int avg_lockcalls;
 SYSCTL_INT(_debug, OID_AUTO, to_avg_lockcalls, CTLFLAG_RD, &avg_lockcalls, 0,
     "Average number of lock callouts made per softclock call. Units = 1/1000");
 static int avg_mpcalls;
 SYSCTL_INT(_debug, OID_AUTO, to_avg_mpcalls, CTLFLAG_RD, &avg_mpcalls, 0,
     "Average number of MP callouts made per softclock call. Units = 1/1000");
 static int avg_depth_dir;
 SYSCTL_INT(_debug, OID_AUTO, to_avg_depth_dir, CTLFLAG_RD, &avg_depth_dir, 0,
     "Average number of direct callouts examined per callout_process call. "
     "Units = 1/1000");
 static int avg_lockcalls_dir;
 SYSCTL_INT(_debug, OID_AUTO, to_avg_lockcalls_dir, CTLFLAG_RD,
     &avg_lockcalls_dir, 0, "Average number of lock direct callouts made per "
     "callout_process call. Units = 1/1000");
 static int avg_mpcalls_dir;
 SYSCTL_INT(_debug, OID_AUTO, to_avg_mpcalls_dir, CTLFLAG_RD, &avg_mpcalls_dir,
     0, "Average number of MP direct callouts made per callout_process call. "
     "Units = 1/1000");
 #endif
 
 static int ncallout;
 SYSCTL_INT(_kern, OID_AUTO, ncallout, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &ncallout, 0,
     "Number of entries in callwheel and size of timeout() preallocation");
 
 #ifdef	RSS
 static int pin_default_swi = 1;
 static int pin_pcpu_swi = 1;
 #else
 static int pin_default_swi = 0;
 static int pin_pcpu_swi = 0;
 #endif
 
 SYSCTL_INT(_kern, OID_AUTO, pin_default_swi, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &pin_default_swi,
     0, "Pin the default (non-per-cpu) swi (shared with PCPU 0 swi)");
 SYSCTL_INT(_kern, OID_AUTO, pin_pcpu_swi, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &pin_pcpu_swi,
     0, "Pin the per-CPU swis (except PCPU 0, which is also default)");
 
 /*
  * TODO:
  *	allocate more timeout table slots when table overflows.
  */
 static u_int __read_mostly callwheelsize;
 static u_int __read_mostly callwheelmask;
 
 /*
  * The callout cpu exec entities represent informations necessary for
  * describing the state of callouts currently running on the CPU and the ones
  * necessary for migrating callouts to the new callout cpu. In particular,
  * the first entry of the array cc_exec_entity holds informations for callout
  * running in SWI thread context, while the second one holds informations
  * for callout running directly from hardware interrupt context.
  * The cached informations are very important for deferring migration when
  * the migrating callout is already running.
  */
 struct cc_exec {
 	struct callout		*cc_curr;
 	callout_func_t		*cc_drain;
 	void			*cc_last_func;
 	void			*cc_last_arg;
 #ifdef SMP
 	callout_func_t		*ce_migration_func;
 	void			*ce_migration_arg;
 	sbintime_t		ce_migration_time;
 	sbintime_t		ce_migration_prec;
 	int			ce_migration_cpu;
 #endif
 	bool			cc_cancel;
 	bool			cc_waiting;
 };
 
 /*
  * There is one struct callout_cpu per cpu, holding all relevant
  * state for the callout processing thread on the individual CPU.
  */
 struct callout_cpu {
 	struct mtx_padalign	cc_lock;
 	struct cc_exec 		cc_exec_entity[2];
 	struct callout		*cc_next;
 	struct callout_list	*cc_callwheel;
 	struct callout_tailq	cc_expireq;
 	sbintime_t		cc_firstevent;
 	sbintime_t		cc_lastscan;
-	void			*cc_cookie;
+	struct thread		*cc_thread;
 	u_int			cc_bucket;
 	u_int			cc_inited;
 #ifdef KTR
 	char			cc_ktr_event_name[20];
 #endif
 };
 
 #define	callout_migrating(c)	((c)->c_iflags & CALLOUT_DFRMIGRATION)
 
 #define	cc_exec_curr(cc, dir)		cc->cc_exec_entity[dir].cc_curr
 #define	cc_exec_last_func(cc, dir)	cc->cc_exec_entity[dir].cc_last_func
 #define	cc_exec_last_arg(cc, dir)	cc->cc_exec_entity[dir].cc_last_arg
 #define	cc_exec_drain(cc, dir)		cc->cc_exec_entity[dir].cc_drain
 #define	cc_exec_next(cc)		cc->cc_next
 #define	cc_exec_cancel(cc, dir)		cc->cc_exec_entity[dir].cc_cancel
 #define	cc_exec_waiting(cc, dir)	cc->cc_exec_entity[dir].cc_waiting
 #ifdef SMP
 #define	cc_migration_func(cc, dir)	cc->cc_exec_entity[dir].ce_migration_func
 #define	cc_migration_arg(cc, dir)	cc->cc_exec_entity[dir].ce_migration_arg
 #define	cc_migration_cpu(cc, dir)	cc->cc_exec_entity[dir].ce_migration_cpu
 #define	cc_migration_time(cc, dir)	cc->cc_exec_entity[dir].ce_migration_time
 #define	cc_migration_prec(cc, dir)	cc->cc_exec_entity[dir].ce_migration_prec
 
 static struct callout_cpu cc_cpu[MAXCPU];
 #define	CPUBLOCK	MAXCPU
 #define	CC_CPU(cpu)	(&cc_cpu[(cpu)])
 #define	CC_SELF()	CC_CPU(PCPU_GET(cpuid))
 #else
 static struct callout_cpu cc_cpu;
 #define	CC_CPU(cpu)	(&cc_cpu)
 #define	CC_SELF()	(&cc_cpu)
 #endif
 #define	CC_LOCK(cc)	mtx_lock_spin(&(cc)->cc_lock)
 #define	CC_UNLOCK(cc)	mtx_unlock_spin(&(cc)->cc_lock)
 #define	CC_LOCK_ASSERT(cc)	mtx_assert(&(cc)->cc_lock, MA_OWNED)
 
 static int __read_mostly cc_default_cpu;
 
 static void	callout_cpu_init(struct callout_cpu *cc, int cpu);
 static void	softclock_call_cc(struct callout *c, struct callout_cpu *cc,
 #ifdef CALLOUT_PROFILING
 		    int *mpcalls, int *lockcalls, int *gcalls,
 #endif
 		    int direct);
 
 static MALLOC_DEFINE(M_CALLOUT, "callout", "Callout datastructures");
 
 /**
  * Locked by cc_lock:
  *   cc_curr         - If a callout is in progress, it is cc_curr.
  *                     If cc_curr is non-NULL, threads waiting in
  *                     callout_drain() will be woken up as soon as the
  *                     relevant callout completes.
  *   cc_cancel       - Changing to 1 with both callout_lock and cc_lock held
  *                     guarantees that the current callout will not run.
- *                     The softclock() function sets this to 0 before it
+ *                     The softclock_call_cc() function sets this to 0 before it
  *                     drops callout_lock to acquire c_lock, and it calls
  *                     the handler only if curr_cancelled is still 0 after
  *                     cc_lock is successfully acquired.
  *   cc_waiting      - If a thread is waiting in callout_drain(), then
  *                     callout_wait is nonzero.  Set only when
  *                     cc_curr is non-NULL.
  */
 
 /*
  * Resets the execution entity tied to a specific callout cpu.
  */
 static void
 cc_cce_cleanup(struct callout_cpu *cc, int direct)
 {
 
 	cc_exec_curr(cc, direct) = NULL;
 	cc_exec_cancel(cc, direct) = false;
 	cc_exec_waiting(cc, direct) = false;
 #ifdef SMP
 	cc_migration_cpu(cc, direct) = CPUBLOCK;
 	cc_migration_time(cc, direct) = 0;
 	cc_migration_prec(cc, direct) = 0;
 	cc_migration_func(cc, direct) = NULL;
 	cc_migration_arg(cc, direct) = NULL;
 #endif
 }
 
 /*
  * Checks if migration is requested by a specific callout cpu.
  */
 static int
 cc_cce_migrating(struct callout_cpu *cc, int direct)
 {
 
 #ifdef SMP
 	return (cc_migration_cpu(cc, direct) != CPUBLOCK);
 #else
 	return (0);
 #endif
 }
 
 /*
  * Kernel low level callwheel initialization
  * called on the BSP during kernel startup.
  */
 static void
 callout_callwheel_init(void *dummy)
 {
 	struct callout_cpu *cc;
 	int cpu;
 
 	/*
 	 * Calculate the size of the callout wheel and the preallocated
 	 * timeout() structures.
 	 * XXX: Clip callout to result of previous function of maxusers
 	 * maximum 384.  This is still huge, but acceptable.
 	 */
 	ncallout = imin(16 + maxproc + maxfiles, 18508);
 	TUNABLE_INT_FETCH("kern.ncallout", &ncallout);
 
 	/*
 	 * Calculate callout wheel size, should be next power of two higher
 	 * than 'ncallout'.
 	 */
 	callwheelsize = 1 << fls(ncallout);
 	callwheelmask = callwheelsize - 1;
 
 	/*
 	 * Fetch whether we're pinning the swi's or not.
 	 */
 	TUNABLE_INT_FETCH("kern.pin_default_swi", &pin_default_swi);
 	TUNABLE_INT_FETCH("kern.pin_pcpu_swi", &pin_pcpu_swi);
 
 	/*
 	 * Initialize callout wheels.  The software interrupt threads
 	 * are created later.
 	 */
 	cc_default_cpu = PCPU_GET(cpuid);
 	CPU_FOREACH(cpu) {
 		cc = CC_CPU(cpu);
 		callout_cpu_init(cc, cpu);
 	}
 }
 SYSINIT(callwheel_init, SI_SUB_CPU, SI_ORDER_ANY, callout_callwheel_init, NULL);
 
 /*
  * Initialize the per-cpu callout structures.
  */
 static void
 callout_cpu_init(struct callout_cpu *cc, int cpu)
 {
 	int i;
 
-	mtx_init(&cc->cc_lock, "callout", NULL, MTX_SPIN | MTX_RECURSE);
+	mtx_init(&cc->cc_lock, "callout", NULL, MTX_SPIN);
 	cc->cc_inited = 1;
 	cc->cc_callwheel = malloc_domainset(sizeof(struct callout_list) *
 	    callwheelsize, M_CALLOUT,
 	    DOMAINSET_PREF(pcpu_find(cpu)->pc_domain), M_WAITOK);
 	for (i = 0; i < callwheelsize; i++)
 		LIST_INIT(&cc->cc_callwheel[i]);
 	TAILQ_INIT(&cc->cc_expireq);
 	cc->cc_firstevent = SBT_MAX;
 	for (i = 0; i < 2; i++)
 		cc_cce_cleanup(cc, i);
 #ifdef KTR
 	snprintf(cc->cc_ktr_event_name, sizeof(cc->cc_ktr_event_name),
 	    "callwheel cpu %d", cpu);
 #endif
 }
 
 #ifdef SMP
 /*
  * Switches the cpu tied to a specific callout.
  * The function expects a locked incoming callout cpu and returns with
  * locked outcoming callout cpu.
  */
 static struct callout_cpu *
 callout_cpu_switch(struct callout *c, struct callout_cpu *cc, int new_cpu)
 {
 	struct callout_cpu *new_cc;
 
 	MPASS(c != NULL && cc != NULL);
 	CC_LOCK_ASSERT(cc);
 
 	/*
 	 * Avoid interrupts and preemption firing after the callout cpu
 	 * is blocked in order to avoid deadlocks as the new thread
 	 * may be willing to acquire the callout cpu lock.
 	 */
 	c->c_cpu = CPUBLOCK;
 	spinlock_enter();
 	CC_UNLOCK(cc);
 	new_cc = CC_CPU(new_cpu);
 	CC_LOCK(new_cc);
 	spinlock_exit();
 	c->c_cpu = new_cpu;
 	return (new_cc);
 }
 #endif
 
 /*
  * Start softclock threads.
  */
 static void
 start_softclock(void *dummy)
 {
+	struct proc *p;
+	struct thread *td;
 	struct callout_cpu *cc;
-	char name[MAXCOMLEN];
-	int cpu;
+	int cpu, error;
 	bool pin_swi;
-	struct intr_event *ie;
 
+	p = NULL;
 	CPU_FOREACH(cpu) {
 		cc = CC_CPU(cpu);
-		snprintf(name, sizeof(name), "clock (%d)", cpu);
-		ie = NULL;
-		if (swi_add(&ie, name, softclock, cc, SWI_CLOCK,
-		    INTR_MPSAFE, &cc->cc_cookie))
-			panic("died while creating standard software ithreads");
+		error = kproc_kthread_add(softclock_thread, cc, &p, &td,
+		    RFSTOPPED, 0, "clock", "clock (%d)", cpu);
+		if (error != 0)
+			panic("failed to create softclock thread for cpu %d: %d",
+			    cpu, error);
+		CC_LOCK(cc);
+		cc->cc_thread = td;
+		thread_lock(td);
+		sched_class(td, PRI_ITHD);
+		sched_prio(td, PI_SWI(SWI_CLOCK));
+		TD_SET_IWAIT(td);
+		thread_lock_set(td, (struct mtx *)&cc->cc_lock);
+		thread_unlock(td);
 		if (cpu == cc_default_cpu)
 			pin_swi = pin_default_swi;
 		else
 			pin_swi = pin_pcpu_swi;
-		if (pin_swi && (intr_event_bind(ie, cpu) != 0)) {
-			printf("%s: %s clock couldn't be pinned to cpu %d\n",
-			    __func__,
-			    cpu == cc_default_cpu ? "default" : "per-cpu",
-			    cpu);
+		if (pin_swi) {
+			error = cpuset_setithread(td->td_tid, cpu);
+			if (error != 0)
+				printf("%s: %s clock couldn't be pinned to cpu %d: %d\n",
+				    __func__, cpu == cc_default_cpu ?
+				    "default" : "per-cpu", cpu, error);
 		}
 	}
 }
 SYSINIT(start_softclock, SI_SUB_SOFTINTR, SI_ORDER_FIRST, start_softclock, NULL);
 
 #define	CC_HASH_SHIFT	8
 
 static inline u_int
 callout_hash(sbintime_t sbt)
 {
 
 	return (sbt >> (32 - CC_HASH_SHIFT));
 }
 
 static inline u_int
 callout_get_bucket(sbintime_t sbt)
 {
 
 	return (callout_hash(sbt) & callwheelmask);
 }
 
 void
 callout_process(sbintime_t now)
 {
 	struct callout *tmp, *tmpn;
 	struct callout_cpu *cc;
 	struct callout_list *sc;
+	struct thread *td;
 	sbintime_t first, last, max, tmp_max;
 	uint32_t lookahead;
 	u_int firstb, lastb, nowb;
 #ifdef CALLOUT_PROFILING
 	int depth_dir = 0, mpcalls_dir = 0, lockcalls_dir = 0;
 #endif
 
 	cc = CC_SELF();
 	mtx_lock_spin_flags(&cc->cc_lock, MTX_QUIET);
 
 	/* Compute the buckets of the last scan and present times. */
 	firstb = callout_hash(cc->cc_lastscan);
 	cc->cc_lastscan = now;
 	nowb = callout_hash(now);
 
 	/* Compute the last bucket and minimum time of the bucket after it. */
 	if (nowb == firstb)
 		lookahead = (SBT_1S / 16);
 	else if (nowb - firstb == 1)
 		lookahead = (SBT_1S / 8);
 	else
 		lookahead = (SBT_1S / 2);
 	first = last = now;
 	first += (lookahead / 2);
 	last += lookahead;
 	last &= (0xffffffffffffffffLLU << (32 - CC_HASH_SHIFT));
 	lastb = callout_hash(last) - 1;
 	max = last;
 
 	/*
 	 * Check if we wrapped around the entire wheel from the last scan.
 	 * In case, we need to scan entirely the wheel for pending callouts.
 	 */
 	if (lastb - firstb >= callwheelsize) {
 		lastb = firstb + callwheelsize - 1;
 		if (nowb - firstb >= callwheelsize)
 			nowb = lastb;
 	}
 
 	/* Iterate callwheel from firstb to nowb and then up to lastb. */
 	do {
 		sc = &cc->cc_callwheel[firstb & callwheelmask];
 		tmp = LIST_FIRST(sc);
 		while (tmp != NULL) {
 			/* Run the callout if present time within allowed. */
 			if (tmp->c_time <= now) {
 				/*
 				 * Consumer told us the callout may be run
 				 * directly from hardware interrupt context.
 				 */
 				if (tmp->c_iflags & CALLOUT_DIRECT) {
 #ifdef CALLOUT_PROFILING
 					++depth_dir;
 #endif
 					cc_exec_next(cc) =
 					    LIST_NEXT(tmp, c_links.le);
 					cc->cc_bucket = firstb & callwheelmask;
 					LIST_REMOVE(tmp, c_links.le);
 					softclock_call_cc(tmp, cc,
 #ifdef CALLOUT_PROFILING
 					    &mpcalls_dir, &lockcalls_dir, NULL,
 #endif
 					    1);
 					tmp = cc_exec_next(cc);
 					cc_exec_next(cc) = NULL;
 				} else {
 					tmpn = LIST_NEXT(tmp, c_links.le);
 					LIST_REMOVE(tmp, c_links.le);
 					TAILQ_INSERT_TAIL(&cc->cc_expireq,
 					    tmp, c_links.tqe);
 					tmp->c_iflags |= CALLOUT_PROCESSED;
 					tmp = tmpn;
 				}
 				continue;
 			}
 			/* Skip events from distant future. */
 			if (tmp->c_time >= max)
 				goto next;
 			/*
 			 * Event minimal time is bigger than present maximal
 			 * time, so it cannot be aggregated.
 			 */
 			if (tmp->c_time > last) {
 				lastb = nowb;
 				goto next;
 			}
 			/* Update first and last time, respecting this event. */
 			if (tmp->c_time < first)
 				first = tmp->c_time;
 			tmp_max = tmp->c_time + tmp->c_precision;
 			if (tmp_max < last)
 				last = tmp_max;
 next:
 			tmp = LIST_NEXT(tmp, c_links.le);
 		}
 		/* Proceed with the next bucket. */
 		firstb++;
 		/*
 		 * Stop if we looked after present time and found
 		 * some event we can't execute at now.
 		 * Stop if we looked far enough into the future.
 		 */
 	} while (((int)(firstb - lastb)) <= 0);
 	cc->cc_firstevent = last;
 	cpu_new_callout(curcpu, last, first);
 
 #ifdef CALLOUT_PROFILING
 	avg_depth_dir += (depth_dir * 1000 - avg_depth_dir) >> 8;
 	avg_mpcalls_dir += (mpcalls_dir * 1000 - avg_mpcalls_dir) >> 8;
 	avg_lockcalls_dir += (lockcalls_dir * 1000 - avg_lockcalls_dir) >> 8;
 #endif
-	mtx_unlock_spin_flags(&cc->cc_lock, MTX_QUIET);
-	/*
-	 * swi_sched acquires the thread lock, so we don't want to call it
-	 * with cc_lock held; incorrect locking order.
-	 */
-	if (!TAILQ_EMPTY(&cc->cc_expireq))
-		swi_sched(cc->cc_cookie, 0);
+	if (!TAILQ_EMPTY(&cc->cc_expireq)) {
+		td = cc->cc_thread;
+		if (TD_AWAITING_INTR(td)) {
+			TD_CLR_IWAIT(td);
+			sched_add(td, SRQ_INTR);
+		} else
+			mtx_unlock_spin_flags(&cc->cc_lock, MTX_QUIET);
+	} else
+		mtx_unlock_spin_flags(&cc->cc_lock, MTX_QUIET);
 }
 
 static struct callout_cpu *
 callout_lock(struct callout *c)
 {
 	struct callout_cpu *cc;
 	int cpu;
 
 	for (;;) {
 		cpu = c->c_cpu;
 #ifdef SMP
 		if (cpu == CPUBLOCK) {
 			while (c->c_cpu == CPUBLOCK)
 				cpu_spinwait();
 			continue;
 		}
 #endif
 		cc = CC_CPU(cpu);
 		CC_LOCK(cc);
 		if (cpu == c->c_cpu)
 			break;
 		CC_UNLOCK(cc);
 	}
 	return (cc);
 }
 
 static void
 callout_cc_add(struct callout *c, struct callout_cpu *cc,
     sbintime_t sbt, sbintime_t precision, void (*func)(void *),
     void *arg, int cpu, int flags)
 {
 	int bucket;
 
 	CC_LOCK_ASSERT(cc);
 	if (sbt < cc->cc_lastscan)
 		sbt = cc->cc_lastscan;
 	c->c_arg = arg;
 	c->c_iflags |= CALLOUT_PENDING;
 	c->c_iflags &= ~CALLOUT_PROCESSED;
 	c->c_flags |= CALLOUT_ACTIVE;
 	if (flags & C_DIRECT_EXEC)
 		c->c_iflags |= CALLOUT_DIRECT;
 	c->c_func = func;
 	c->c_time = sbt;
 	c->c_precision = precision;
 	bucket = callout_get_bucket(c->c_time);
 	CTR3(KTR_CALLOUT, "precision set for %p: %d.%08x",
 	    c, (int)(c->c_precision >> 32),
 	    (u_int)(c->c_precision & 0xffffffff));
 	LIST_INSERT_HEAD(&cc->cc_callwheel[bucket], c, c_links.le);
 	if (cc->cc_bucket == bucket)
 		cc_exec_next(cc) = c;
 
 	/*
 	 * Inform the eventtimers(4) subsystem there's a new callout
 	 * that has been inserted, but only if really required.
 	 */
 	if (SBT_MAX - c->c_time < c->c_precision)
 		c->c_precision = SBT_MAX - c->c_time;
 	sbt = c->c_time + c->c_precision;
 	if (sbt < cc->cc_firstevent) {
 		cc->cc_firstevent = sbt;
 		cpu_new_callout(cpu, sbt, c->c_time);
 	}
 }
 
 static void
 softclock_call_cc(struct callout *c, struct callout_cpu *cc,
 #ifdef CALLOUT_PROFILING
     int *mpcalls, int *lockcalls, int *gcalls,
 #endif
     int direct)
 {
 	struct rm_priotracker tracker;
 	callout_func_t *c_func, *drain;
 	void *c_arg;
 	struct lock_class *class;
 	struct lock_object *c_lock;
 	uintptr_t lock_status;
 	int c_iflags;
 #ifdef SMP
 	struct callout_cpu *new_cc;
 	callout_func_t *new_func;
 	void *new_arg;
 	int flags, new_cpu;
 	sbintime_t new_prec, new_time;
 #endif
 #if defined(DIAGNOSTIC) || defined(CALLOUT_PROFILING) 
 	sbintime_t sbt1, sbt2;
 	struct timespec ts2;
 	static sbintime_t maxdt = 2 * SBT_1MS;	/* 2 msec */
 	static callout_func_t *lastfunc;
 #endif
 
 	KASSERT((c->c_iflags & CALLOUT_PENDING) == CALLOUT_PENDING,
 	    ("softclock_call_cc: pend %p %x", c, c->c_iflags));
 	KASSERT((c->c_flags & CALLOUT_ACTIVE) == CALLOUT_ACTIVE,
 	    ("softclock_call_cc: act %p %x", c, c->c_flags));
 	class = (c->c_lock != NULL) ? LOCK_CLASS(c->c_lock) : NULL;
 	lock_status = 0;
 	if (c->c_flags & CALLOUT_SHAREDLOCK) {
 		if (class == &lock_class_rm)
 			lock_status = (uintptr_t)&tracker;
 		else
 			lock_status = 1;
 	}
 	c_lock = c->c_lock;
 	c_func = c->c_func;
 	c_arg = c->c_arg;
 	c_iflags = c->c_iflags;
 	c->c_iflags &= ~CALLOUT_PENDING;
 
 	cc_exec_curr(cc, direct) = c;
 	cc_exec_last_func(cc, direct) = c_func;
 	cc_exec_last_arg(cc, direct) = c_arg;
 	cc_exec_cancel(cc, direct) = false;
 	cc_exec_drain(cc, direct) = NULL;
 	CC_UNLOCK(cc);
 	if (c_lock != NULL) {
 		class->lc_lock(c_lock, lock_status);
 		/*
 		 * The callout may have been cancelled
 		 * while we switched locks.
 		 */
 		if (cc_exec_cancel(cc, direct)) {
 			class->lc_unlock(c_lock);
 			goto skip;
 		}
 		/* The callout cannot be stopped now. */
 		cc_exec_cancel(cc, direct) = true;
 		if (c_lock == &Giant.lock_object) {
 #ifdef CALLOUT_PROFILING
 			(*gcalls)++;
 #endif
 			CTR3(KTR_CALLOUT, "callout giant %p func %p arg %p",
 			    c, c_func, c_arg);
 		} else {
 #ifdef CALLOUT_PROFILING
 			(*lockcalls)++;
 #endif
 			CTR3(KTR_CALLOUT, "callout lock %p func %p arg %p",
 			    c, c_func, c_arg);
 		}
 	} else {
 #ifdef CALLOUT_PROFILING
 		(*mpcalls)++;
 #endif
 		CTR3(KTR_CALLOUT, "callout %p func %p arg %p",
 		    c, c_func, c_arg);
 	}
 	KTR_STATE3(KTR_SCHED, "callout", cc->cc_ktr_event_name, "running",
 	    "func:%p", c_func, "arg:%p", c_arg, "direct:%d", direct);
 #if defined(DIAGNOSTIC) || defined(CALLOUT_PROFILING)
 	sbt1 = sbinuptime();
 #endif
 	THREAD_NO_SLEEPING();
 	SDT_PROBE1(callout_execute, , , callout__start, c);
 	c_func(c_arg);
 	SDT_PROBE1(callout_execute, , , callout__end, c);
 	THREAD_SLEEPING_OK();
 #if defined(DIAGNOSTIC) || defined(CALLOUT_PROFILING)
 	sbt2 = sbinuptime();
 	sbt2 -= sbt1;
 	if (sbt2 > maxdt) {
 		if (lastfunc != c_func || sbt2 > maxdt * 2) {
 			ts2 = sbttots(sbt2);
 			printf(
 		"Expensive timeout(9) function: %p(%p) %jd.%09ld s\n",
 			    c_func, c_arg, (intmax_t)ts2.tv_sec, ts2.tv_nsec);
 		}
 		maxdt = sbt2;
 		lastfunc = c_func;
 	}
 #endif
 	KTR_STATE0(KTR_SCHED, "callout", cc->cc_ktr_event_name, "idle");
 	CTR1(KTR_CALLOUT, "callout %p finished", c);
 	if ((c_iflags & CALLOUT_RETURNUNLOCKED) == 0)
 		class->lc_unlock(c_lock);
 skip:
 	CC_LOCK(cc);
 	KASSERT(cc_exec_curr(cc, direct) == c, ("mishandled cc_curr"));
 	cc_exec_curr(cc, direct) = NULL;
 	if (cc_exec_drain(cc, direct)) {
 		drain = cc_exec_drain(cc, direct);
 		cc_exec_drain(cc, direct) = NULL;
 		CC_UNLOCK(cc);
 		drain(c_arg);
 		CC_LOCK(cc);
 	}
 	if (cc_exec_waiting(cc, direct)) {
 		/*
 		 * There is someone waiting for the
 		 * callout to complete.
 		 * If the callout was scheduled for
 		 * migration just cancel it.
 		 */
 		if (cc_cce_migrating(cc, direct)) {
 			cc_cce_cleanup(cc, direct);
 
 			/*
 			 * It should be assert here that the callout is not
 			 * destroyed but that is not easy.
 			 */
 			c->c_iflags &= ~CALLOUT_DFRMIGRATION;
 		}
 		cc_exec_waiting(cc, direct) = false;
 		CC_UNLOCK(cc);
 		wakeup(&cc_exec_waiting(cc, direct));
 		CC_LOCK(cc);
 	} else if (cc_cce_migrating(cc, direct)) {
 #ifdef SMP
 		/*
 		 * If the callout was scheduled for
 		 * migration just perform it now.
 		 */
 		new_cpu = cc_migration_cpu(cc, direct);
 		new_time = cc_migration_time(cc, direct);
 		new_prec = cc_migration_prec(cc, direct);
 		new_func = cc_migration_func(cc, direct);
 		new_arg = cc_migration_arg(cc, direct);
 		cc_cce_cleanup(cc, direct);
 
 		/*
 		 * It should be assert here that the callout is not destroyed
 		 * but that is not easy.
 		 *
 		 * As first thing, handle deferred callout stops.
 		 */
 		if (!callout_migrating(c)) {
 			CTR3(KTR_CALLOUT,
 			     "deferred cancelled %p func %p arg %p",
 			     c, new_func, new_arg);
 			return;
 		}
 		c->c_iflags &= ~CALLOUT_DFRMIGRATION;
 
 		new_cc = callout_cpu_switch(c, cc, new_cpu);
 		flags = (direct) ? C_DIRECT_EXEC : 0;
 		callout_cc_add(c, new_cc, new_time, new_prec, new_func,
 		    new_arg, new_cpu, flags);
 		CC_UNLOCK(new_cc);
 		CC_LOCK(cc);
 #else
 		panic("migration should not happen");
 #endif
 	}
 }
 
 /*
  * The callout mechanism is based on the work of Adam M. Costello and
  * George Varghese, published in a technical report entitled "Redesigning
  * the BSD Callout and Timer Facilities" and modified slightly for inclusion
  * in FreeBSD by Justin T. Gibbs.  The original work on the data structures
  * used in this implementation was published by G. Varghese and T. Lauck in
  * the paper "Hashed and Hierarchical Timing Wheels: Data Structures for
  * the Efficient Implementation of a Timer Facility" in the Proceedings of
  * the 11th ACM Annual Symposium on Operating Systems Principles,
  * Austin, Texas Nov 1987.
  */
 
 /*
- * Software (low priority) clock interrupt.
+ * Software (low priority) clock interrupt thread handler.
  * Run periodic events from timeout queue.
  */
-void
-softclock(void *arg)
+static void
+softclock_thread(void *arg)
 {
+	struct thread *td = curthread;
 	struct callout_cpu *cc;
 	struct callout *c;
 #ifdef CALLOUT_PROFILING
-	int depth = 0, gcalls = 0, lockcalls = 0, mpcalls = 0;
+	int depth, gcalls, lockcalls, mpcalls;
 #endif
 
 	cc = (struct callout_cpu *)arg;
 	CC_LOCK(cc);
-	while ((c = TAILQ_FIRST(&cc->cc_expireq)) != NULL) {
-		TAILQ_REMOVE(&cc->cc_expireq, c, c_links.tqe);
-		softclock_call_cc(c, cc,
+	for (;;) {
+		while (TAILQ_EMPTY(&cc->cc_expireq)) {
+			/*
+			 * Use CC_LOCK(cc) as the thread_lock while
+			 * idle.
+			 */
+			thread_lock(td);
+			thread_lock_set(td, (struct mtx *)&cc->cc_lock);
+			TD_SET_IWAIT(td);
+			mi_switch(SW_VOL | SWT_IWAIT);
+
+			/* mi_switch() drops thread_lock(). */
+			CC_LOCK(cc);
+		}
+
 #ifdef CALLOUT_PROFILING
-		    &mpcalls, &lockcalls, &gcalls,
+		depth = gcalls = lockcalls = mpcalls = 0;
 #endif
-		    0);
+		while ((c = TAILQ_FIRST(&cc->cc_expireq)) != NULL) {
+			TAILQ_REMOVE(&cc->cc_expireq, c, c_links.tqe);
+			softclock_call_cc(c, cc,
 #ifdef CALLOUT_PROFILING
-		++depth;
+			    &mpcalls, &lockcalls, &gcalls,
 #endif
-	}
+			    0);
 #ifdef CALLOUT_PROFILING
-	avg_depth += (depth * 1000 - avg_depth) >> 8;
-	avg_mpcalls += (mpcalls * 1000 - avg_mpcalls) >> 8;
-	avg_lockcalls += (lockcalls * 1000 - avg_lockcalls) >> 8;
-	avg_gcalls += (gcalls * 1000 - avg_gcalls) >> 8;
+			++depth;
 #endif
-	CC_UNLOCK(cc);
+		}
+#ifdef CALLOUT_PROFILING
+		avg_depth += (depth * 1000 - avg_depth) >> 8;
+		avg_mpcalls += (mpcalls * 1000 - avg_mpcalls) >> 8;
+		avg_lockcalls += (lockcalls * 1000 - avg_lockcalls) >> 8;
+		avg_gcalls += (gcalls * 1000 - avg_gcalls) >> 8;
+#endif
+	}
 }
 
 void
 callout_when(sbintime_t sbt, sbintime_t precision, int flags,
     sbintime_t *res, sbintime_t *prec_res)
 {
 	sbintime_t to_sbt, to_pr;
 
 	if ((flags & (C_ABSOLUTE | C_PRECALC)) != 0) {
 		*res = sbt;
 		*prec_res = precision;
 		return;
 	}
 	if ((flags & C_HARDCLOCK) != 0 && sbt < tick_sbt)
 		sbt = tick_sbt;
 	if ((flags & C_HARDCLOCK) != 0 || sbt >= sbt_tickthreshold) {
 		/*
 		 * Obtain the time of the last hardclock() call on
 		 * this CPU directly from the kern_clocksource.c.
 		 * This value is per-CPU, but it is equal for all
 		 * active ones.
 		 */
 #ifdef __LP64__
 		to_sbt = DPCPU_GET(hardclocktime);
 #else
 		spinlock_enter();
 		to_sbt = DPCPU_GET(hardclocktime);
 		spinlock_exit();
 #endif
 		if (cold && to_sbt == 0)
 			to_sbt = sbinuptime();
 		if ((flags & C_HARDCLOCK) == 0)
 			to_sbt += tick_sbt;
 	} else
 		to_sbt = sbinuptime();
 	if (SBT_MAX - to_sbt < sbt)
 		to_sbt = SBT_MAX;
 	else
 		to_sbt += sbt;
 	*res = to_sbt;
 	to_pr = ((C_PRELGET(flags) < 0) ? sbt >> tc_precexp :
 	    sbt >> C_PRELGET(flags));
 	*prec_res = to_pr > precision ? to_pr : precision;
 }
 
 /*
  * New interface; clients allocate their own callout structures.
  *
  * callout_reset() - establish or change a timeout
  * callout_stop() - disestablish a timeout
  * callout_init() - initialize a callout structure so that it can
  *	safely be passed to callout_reset() and callout_stop()
  *
  * <sys/callout.h> defines three convenience macros:
  *
  * callout_active() - returns truth if callout has not been stopped,
  *	drained, or deactivated since the last time the callout was
  *	reset.
  * callout_pending() - returns truth if callout is still waiting for timeout
  * callout_deactivate() - marks the callout as having been serviced
  */
 int
 callout_reset_sbt_on(struct callout *c, sbintime_t sbt, sbintime_t prec,
     callout_func_t *ftn, void *arg, int cpu, int flags)
 {
 	sbintime_t to_sbt, precision;
 	struct callout_cpu *cc;
 	int cancelled, direct;
 	int ignore_cpu=0;
 
 	cancelled = 0;
 	if (cpu == -1) {
 		ignore_cpu = 1;
 	} else if ((cpu >= MAXCPU) ||
 		   ((CC_CPU(cpu))->cc_inited == 0)) {
 		/* Invalid CPU spec */
 		panic("Invalid CPU in callout %d", cpu);
 	}
 	callout_when(sbt, prec, flags, &to_sbt, &precision);
 
 	/* 
 	 * This flag used to be added by callout_cc_add, but the
 	 * first time you call this we could end up with the
 	 * wrong direct flag if we don't do it before we add.
 	 */
 	if (flags & C_DIRECT_EXEC) {
 		direct = 1;
 	} else {
 		direct = 0;
 	}
 	KASSERT(!direct || c->c_lock == NULL ||
 	    (LOCK_CLASS(c->c_lock)->lc_flags & LC_SPINLOCK),
 	    ("%s: direct callout %p has non-spin lock", __func__, c));
 	cc = callout_lock(c);
 	/*
 	 * Don't allow migration if the user does not care.
 	 */
 	if (ignore_cpu) {
 		cpu = c->c_cpu;
 	}
 
 	if (cc_exec_curr(cc, direct) == c) {
 		/*
 		 * We're being asked to reschedule a callout which is
 		 * currently in progress.  If there is a lock then we
 		 * can cancel the callout if it has not really started.
 		 */
 		if (c->c_lock != NULL && !cc_exec_cancel(cc, direct))
 			cancelled = cc_exec_cancel(cc, direct) = true;
 		if (cc_exec_waiting(cc, direct) || cc_exec_drain(cc, direct)) {
 			/*
 			 * Someone has called callout_drain to kill this
 			 * callout.  Don't reschedule.
 			 */
 			CTR4(KTR_CALLOUT, "%s %p func %p arg %p",
 			    cancelled ? "cancelled" : "failed to cancel",
 			    c, c->c_func, c->c_arg);
 			CC_UNLOCK(cc);
 			return (cancelled);
 		}
 #ifdef SMP
 		if (callout_migrating(c)) {
 			/* 
 			 * This only occurs when a second callout_reset_sbt_on
 			 * is made after a previous one moved it into
 			 * deferred migration (below). Note we do *not* change
 			 * the prev_cpu even though the previous target may
 			 * be different.
 			 */
 			cc_migration_cpu(cc, direct) = cpu;
 			cc_migration_time(cc, direct) = to_sbt;
 			cc_migration_prec(cc, direct) = precision;
 			cc_migration_func(cc, direct) = ftn;
 			cc_migration_arg(cc, direct) = arg;
 			cancelled = 1;
 			CC_UNLOCK(cc);
 			return (cancelled);
 		}
 #endif
 	}
 	if (c->c_iflags & CALLOUT_PENDING) {
 		if ((c->c_iflags & CALLOUT_PROCESSED) == 0) {
 			if (cc_exec_next(cc) == c)
 				cc_exec_next(cc) = LIST_NEXT(c, c_links.le);
 			LIST_REMOVE(c, c_links.le);
 		} else {
 			TAILQ_REMOVE(&cc->cc_expireq, c, c_links.tqe);
 		}
 		cancelled = 1;
 		c->c_iflags &= ~ CALLOUT_PENDING;
 		c->c_flags &= ~ CALLOUT_ACTIVE;
 	}
 
 #ifdef SMP
 	/*
 	 * If the callout must migrate try to perform it immediately.
 	 * If the callout is currently running, just defer the migration
 	 * to a more appropriate moment.
 	 */
 	if (c->c_cpu != cpu) {
 		if (cc_exec_curr(cc, direct) == c) {
 			/* 
 			 * Pending will have been removed since we are
 			 * actually executing the callout on another
 			 * CPU. That callout should be waiting on the
 			 * lock the caller holds. If we set both
 			 * active/and/pending after we return and the
 			 * lock on the executing callout proceeds, it
 			 * will then see pending is true and return.
 			 * At the return from the actual callout execution
 			 * the migration will occur in softclock_call_cc
 			 * and this new callout will be placed on the 
 			 * new CPU via a call to callout_cpu_switch() which
 			 * will get the lock on the right CPU followed
 			 * by a call callout_cc_add() which will add it there.
 			 * (see above in softclock_call_cc()).
 			 */
 			cc_migration_cpu(cc, direct) = cpu;
 			cc_migration_time(cc, direct) = to_sbt;
 			cc_migration_prec(cc, direct) = precision;
 			cc_migration_func(cc, direct) = ftn;
 			cc_migration_arg(cc, direct) = arg;
 			c->c_iflags |= (CALLOUT_DFRMIGRATION | CALLOUT_PENDING);
 			c->c_flags |= CALLOUT_ACTIVE;
 			CTR6(KTR_CALLOUT,
 		    "migration of %p func %p arg %p in %d.%08x to %u deferred",
 			    c, c->c_func, c->c_arg, (int)(to_sbt >> 32),
 			    (u_int)(to_sbt & 0xffffffff), cpu);
 			CC_UNLOCK(cc);
 			return (cancelled);
 		}
 		cc = callout_cpu_switch(c, cc, cpu);
 	}
 #endif
 
 	callout_cc_add(c, cc, to_sbt, precision, ftn, arg, cpu, flags);
 	CTR6(KTR_CALLOUT, "%sscheduled %p func %p arg %p in %d.%08x",
 	    cancelled ? "re" : "", c, c->c_func, c->c_arg, (int)(to_sbt >> 32),
 	    (u_int)(to_sbt & 0xffffffff));
 	CC_UNLOCK(cc);
 
 	return (cancelled);
 }
 
 /*
  * Common idioms that can be optimized in the future.
  */
 int
 callout_schedule_on(struct callout *c, int to_ticks, int cpu)
 {
 	return callout_reset_on(c, to_ticks, c->c_func, c->c_arg, cpu);
 }
 
 int
 callout_schedule(struct callout *c, int to_ticks)
 {
 	return callout_reset_on(c, to_ticks, c->c_func, c->c_arg, c->c_cpu);
 }
 
 int
 _callout_stop_safe(struct callout *c, int flags, callout_func_t *drain)
 {
 	struct callout_cpu *cc, *old_cc;
 	struct lock_class *class;
 	int direct, sq_locked, use_lock;
 	int cancelled, not_on_a_list;
 
 	if ((flags & CS_DRAIN) != 0)
 		WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, c->c_lock,
 		    "calling %s", __func__);
 
 	KASSERT((flags & CS_DRAIN) == 0 || drain == NULL,
 	    ("Cannot set drain callback and CS_DRAIN flag at the same time"));
 
 	/*
 	 * Some old subsystems don't hold Giant while running a callout_stop(),
 	 * so just discard this check for the moment.
 	 */
 	if ((flags & CS_DRAIN) == 0 && c->c_lock != NULL) {
 		if (c->c_lock == &Giant.lock_object)
 			use_lock = mtx_owned(&Giant);
 		else {
 			use_lock = 1;
 			class = LOCK_CLASS(c->c_lock);
 			class->lc_assert(c->c_lock, LA_XLOCKED);
 		}
 	} else
 		use_lock = 0;
 	if (c->c_iflags & CALLOUT_DIRECT) {
 		direct = 1;
 	} else {
 		direct = 0;
 	}
 	sq_locked = 0;
 	old_cc = NULL;
 again:
 	cc = callout_lock(c);
 
 	if ((c->c_iflags & (CALLOUT_DFRMIGRATION | CALLOUT_PENDING)) ==
 	    (CALLOUT_DFRMIGRATION | CALLOUT_PENDING) &&
 	    ((c->c_flags & CALLOUT_ACTIVE) == CALLOUT_ACTIVE)) {
 		/*
 		 * Special case where this slipped in while we
 		 * were migrating *as* the callout is about to
 		 * execute. The caller probably holds the lock
 		 * the callout wants.
 		 *
 		 * Get rid of the migration first. Then set
 		 * the flag that tells this code *not* to
 		 * try to remove it from any lists (its not
 		 * on one yet). When the callout wheel runs,
 		 * it will ignore this callout.
 		 */
 		c->c_iflags &= ~CALLOUT_PENDING;
 		c->c_flags &= ~CALLOUT_ACTIVE;
 		not_on_a_list = 1;
 	} else {
 		not_on_a_list = 0;
 	}
 
 	/*
 	 * If the callout was migrating while the callout cpu lock was
 	 * dropped,  just drop the sleepqueue lock and check the states
 	 * again.
 	 */
 	if (sq_locked != 0 && cc != old_cc) {
 #ifdef SMP
 		CC_UNLOCK(cc);
 		sleepq_release(&cc_exec_waiting(old_cc, direct));
 		sq_locked = 0;
 		old_cc = NULL;
 		goto again;
 #else
 		panic("migration should not happen");
 #endif
 	}
 
 	/*
 	 * If the callout is running, try to stop it or drain it.
 	 */
 	if (cc_exec_curr(cc, direct) == c) {
 		/*
 		 * Succeed we to stop it or not, we must clear the
 		 * active flag - this is what API users expect.  If we're
 		 * draining and the callout is currently executing, first wait
 		 * until it finishes.
 		 */
 		if ((flags & CS_DRAIN) == 0)
 			c->c_flags &= ~CALLOUT_ACTIVE;
 
 		if ((flags & CS_DRAIN) != 0) {
 			/*
 			 * The current callout is running (or just
 			 * about to run) and blocking is allowed, so
 			 * just wait for the current invocation to
 			 * finish.
 			 */
 			if (cc_exec_curr(cc, direct) == c) {
 				/*
 				 * Use direct calls to sleepqueue interface
 				 * instead of cv/msleep in order to avoid
 				 * a LOR between cc_lock and sleepqueue
 				 * chain spinlocks.  This piece of code
 				 * emulates a msleep_spin() call actually.
 				 *
 				 * If we already have the sleepqueue chain
 				 * locked, then we can safely block.  If we
 				 * don't already have it locked, however,
 				 * we have to drop the cc_lock to lock
 				 * it.  This opens several races, so we
 				 * restart at the beginning once we have
 				 * both locks.  If nothing has changed, then
 				 * we will end up back here with sq_locked
 				 * set.
 				 */
 				if (!sq_locked) {
 					CC_UNLOCK(cc);
 					sleepq_lock(
 					    &cc_exec_waiting(cc, direct));
 					sq_locked = 1;
 					old_cc = cc;
 					goto again;
 				}
 
 				/*
 				 * Migration could be cancelled here, but
 				 * as long as it is still not sure when it
 				 * will be packed up, just let softclock()
 				 * take care of it.
 				 */
 				cc_exec_waiting(cc, direct) = true;
 				DROP_GIANT();
 				CC_UNLOCK(cc);
 				sleepq_add(
 				    &cc_exec_waiting(cc, direct),
 				    &cc->cc_lock.lock_object, "codrain",
 				    SLEEPQ_SLEEP, 0);
 				sleepq_wait(
 				    &cc_exec_waiting(cc, direct),
 					     0);
 				sq_locked = 0;
 				old_cc = NULL;
 
 				/* Reacquire locks previously released. */
 				PICKUP_GIANT();
 				goto again;
 			}
 			c->c_flags &= ~CALLOUT_ACTIVE;
 		} else if (use_lock &&
 			   !cc_exec_cancel(cc, direct) && (drain == NULL)) {
 			
 			/*
 			 * The current callout is waiting for its
 			 * lock which we hold.  Cancel the callout
 			 * and return.  After our caller drops the
 			 * lock, the callout will be skipped in
 			 * softclock(). This *only* works with a
 			 * callout_stop() *not* callout_drain() or
 			 * callout_async_drain().
 			 */
 			cc_exec_cancel(cc, direct) = true;
 			CTR3(KTR_CALLOUT, "cancelled %p func %p arg %p",
 			    c, c->c_func, c->c_arg);
 			KASSERT(!cc_cce_migrating(cc, direct),
 			    ("callout wrongly scheduled for migration"));
 			if (callout_migrating(c)) {
 				c->c_iflags &= ~CALLOUT_DFRMIGRATION;
 #ifdef SMP
 				cc_migration_cpu(cc, direct) = CPUBLOCK;
 				cc_migration_time(cc, direct) = 0;
 				cc_migration_prec(cc, direct) = 0;
 				cc_migration_func(cc, direct) = NULL;
 				cc_migration_arg(cc, direct) = NULL;
 #endif
 			}
 			CC_UNLOCK(cc);
 			KASSERT(!sq_locked, ("sleepqueue chain locked"));
 			return (1);
 		} else if (callout_migrating(c)) {
 			/*
 			 * The callout is currently being serviced
 			 * and the "next" callout is scheduled at
 			 * its completion with a migration. We remove
 			 * the migration flag so it *won't* get rescheduled,
 			 * but we can't stop the one thats running so
 			 * we return 0.
 			 */
 			c->c_iflags &= ~CALLOUT_DFRMIGRATION;
 #ifdef SMP
 			/* 
 			 * We can't call cc_cce_cleanup here since
 			 * if we do it will remove .ce_curr and
 			 * its still running. This will prevent a
 			 * reschedule of the callout when the 
 			 * execution completes.
 			 */
 			cc_migration_cpu(cc, direct) = CPUBLOCK;
 			cc_migration_time(cc, direct) = 0;
 			cc_migration_prec(cc, direct) = 0;
 			cc_migration_func(cc, direct) = NULL;
 			cc_migration_arg(cc, direct) = NULL;
 #endif
 			CTR3(KTR_CALLOUT, "postponing stop %p func %p arg %p",
 			    c, c->c_func, c->c_arg);
  			if (drain) {
 				KASSERT(cc_exec_drain(cc, direct) == NULL,
 				    ("callout drain function already set to %p",
 				    cc_exec_drain(cc, direct)));
 				cc_exec_drain(cc, direct) = drain;
 			}
 			CC_UNLOCK(cc);
 			return ((flags & CS_EXECUTING) != 0);
 		} else {
 			CTR3(KTR_CALLOUT, "failed to stop %p func %p arg %p",
 			    c, c->c_func, c->c_arg);
 			if (drain) {
 				KASSERT(cc_exec_drain(cc, direct) == NULL,
 				    ("callout drain function already set to %p",
 				    cc_exec_drain(cc, direct)));
 				cc_exec_drain(cc, direct) = drain;
 			}
 		}
 		KASSERT(!sq_locked, ("sleepqueue chain still locked"));
 		cancelled = ((flags & CS_EXECUTING) != 0);
 	} else
 		cancelled = 1;
 
 	if (sq_locked)
 		sleepq_release(&cc_exec_waiting(cc, direct));
 
 	if ((c->c_iflags & CALLOUT_PENDING) == 0) {
 		CTR3(KTR_CALLOUT, "failed to stop %p func %p arg %p",
 		    c, c->c_func, c->c_arg);
 		/*
 		 * For not scheduled and not executing callout return
 		 * negative value.
 		 */
 		if (cc_exec_curr(cc, direct) != c)
 			cancelled = -1;
 		CC_UNLOCK(cc);
 		return (cancelled);
 	}
 
 	c->c_iflags &= ~CALLOUT_PENDING;
 	c->c_flags &= ~CALLOUT_ACTIVE;
 
 	CTR3(KTR_CALLOUT, "cancelled %p func %p arg %p",
 	    c, c->c_func, c->c_arg);
 	if (not_on_a_list == 0) {
 		if ((c->c_iflags & CALLOUT_PROCESSED) == 0) {
 			if (cc_exec_next(cc) == c)
 				cc_exec_next(cc) = LIST_NEXT(c, c_links.le);
 			LIST_REMOVE(c, c_links.le);
 		} else {
 			TAILQ_REMOVE(&cc->cc_expireq, c, c_links.tqe);
 		}
 	}
 	CC_UNLOCK(cc);
 	return (cancelled);
 }
 
 void
 callout_init(struct callout *c, int mpsafe)
 {
 	bzero(c, sizeof *c);
 	if (mpsafe) {
 		c->c_lock = NULL;
 		c->c_iflags = CALLOUT_RETURNUNLOCKED;
 	} else {
 		c->c_lock = &Giant.lock_object;
 		c->c_iflags = 0;
 	}
 	c->c_cpu = cc_default_cpu;
 }
 
 void
 _callout_init_lock(struct callout *c, struct lock_object *lock, int flags)
 {
 	bzero(c, sizeof *c);
 	c->c_lock = lock;
 	KASSERT((flags & ~(CALLOUT_RETURNUNLOCKED | CALLOUT_SHAREDLOCK)) == 0,
 	    ("callout_init_lock: bad flags %d", flags));
 	KASSERT(lock != NULL || (flags & CALLOUT_RETURNUNLOCKED) == 0,
 	    ("callout_init_lock: CALLOUT_RETURNUNLOCKED with no lock"));
 	KASSERT(lock == NULL || !(LOCK_CLASS(lock)->lc_flags & LC_SLEEPABLE),
 	    ("%s: callout %p has sleepable lock", __func__, c));
 	c->c_iflags = flags & (CALLOUT_RETURNUNLOCKED | CALLOUT_SHAREDLOCK);
 	c->c_cpu = cc_default_cpu;
 }
 
 static int
 flssbt(sbintime_t sbt)
 {
 
 	sbt += (uint64_t)sbt >> 1;
 	if (sizeof(long) >= sizeof(sbintime_t))
 		return (flsl(sbt));
 	if (sbt >= SBT_1S)
 		return (flsl(((uint64_t)sbt) >> 32) + 32);
 	return (flsl(sbt));
 }
 
 /*
  * Dump immediate statistic snapshot of the scheduled callouts.
  */
 static int
 sysctl_kern_callout_stat(SYSCTL_HANDLER_ARGS)
 {
 	struct callout *tmp;
 	struct callout_cpu *cc;
 	struct callout_list *sc;
 	sbintime_t maxpr, maxt, medpr, medt, now, spr, st, t;
 	int ct[64], cpr[64], ccpbk[32];
 	int error, val, i, count, tcum, pcum, maxc, c, medc;
 	int cpu;
 
 	val = 0;
 	error = sysctl_handle_int(oidp, &val, 0, req);
 	if (error != 0 || req->newptr == NULL)
 		return (error);
 	count = maxc = 0;
 	st = spr = maxt = maxpr = 0;
 	bzero(ccpbk, sizeof(ccpbk));
 	bzero(ct, sizeof(ct));
 	bzero(cpr, sizeof(cpr));
 	now = sbinuptime();
 	CPU_FOREACH(cpu) {
 		cc = CC_CPU(cpu);
 		CC_LOCK(cc);
 		for (i = 0; i < callwheelsize; i++) {
 			sc = &cc->cc_callwheel[i];
 			c = 0;
 			LIST_FOREACH(tmp, sc, c_links.le) {
 				c++;
 				t = tmp->c_time - now;
 				if (t < 0)
 					t = 0;
 				st += t / SBT_1US;
 				spr += tmp->c_precision / SBT_1US;
 				if (t > maxt)
 					maxt = t;
 				if (tmp->c_precision > maxpr)
 					maxpr = tmp->c_precision;
 				ct[flssbt(t)]++;
 				cpr[flssbt(tmp->c_precision)]++;
 			}
 			if (c > maxc)
 				maxc = c;
 			ccpbk[fls(c + c / 2)]++;
 			count += c;
 		}
 		CC_UNLOCK(cc);
 	}
 
 	for (i = 0, tcum = 0; i < 64 && tcum < count / 2; i++)
 		tcum += ct[i];
 	medt = (i >= 2) ? (((sbintime_t)1) << (i - 2)) : 0;
 	for (i = 0, pcum = 0; i < 64 && pcum < count / 2; i++)
 		pcum += cpr[i];
 	medpr = (i >= 2) ? (((sbintime_t)1) << (i - 2)) : 0;
 	for (i = 0, c = 0; i < 32 && c < count / 2; i++)
 		c += ccpbk[i];
 	medc = (i >= 2) ? (1 << (i - 2)) : 0;
 
 	printf("Scheduled callouts statistic snapshot:\n");
 	printf("  Callouts: %6d  Buckets: %6d*%-3d  Bucket size: 0.%06ds\n",
 	    count, callwheelsize, mp_ncpus, 1000000 >> CC_HASH_SHIFT);
 	printf("  C/Bk: med %5d         avg %6d.%06jd  max %6d\n",
 	    medc,
 	    count / callwheelsize / mp_ncpus,
 	    (uint64_t)count * 1000000 / callwheelsize / mp_ncpus % 1000000,
 	    maxc);
 	printf("  Time: med %5jd.%06jds avg %6jd.%06jds max %6jd.%06jds\n",
 	    medt / SBT_1S, (medt & 0xffffffff) * 1000000 >> 32,
 	    (st / count) / 1000000, (st / count) % 1000000,
 	    maxt / SBT_1S, (maxt & 0xffffffff) * 1000000 >> 32);
 	printf("  Prec: med %5jd.%06jds avg %6jd.%06jds max %6jd.%06jds\n",
 	    medpr / SBT_1S, (medpr & 0xffffffff) * 1000000 >> 32,
 	    (spr / count) / 1000000, (spr / count) % 1000000,
 	    maxpr / SBT_1S, (maxpr & 0xffffffff) * 1000000 >> 32);
 	printf("  Distribution:       \tbuckets\t   time\t   tcum\t"
 	    "   prec\t   pcum\n");
 	for (i = 0, tcum = pcum = 0; i < 64; i++) {
 		if (ct[i] == 0 && cpr[i] == 0)
 			continue;
 		t = (i != 0) ? (((sbintime_t)1) << (i - 1)) : 0;
 		tcum += ct[i];
 		pcum += cpr[i];
 		printf("  %10jd.%06jds\t 2**%d\t%7d\t%7d\t%7d\t%7d\n",
 		    t / SBT_1S, (t & 0xffffffff) * 1000000 >> 32,
 		    i - 1 - (32 - CC_HASH_SHIFT),
 		    ct[i], tcum, cpr[i], pcum);
 	}
 	return (error);
 }
 SYSCTL_PROC(_kern, OID_AUTO, callout_stat,
     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
     0, 0, sysctl_kern_callout_stat, "I",
     "Dump immediate statistic snapshot of the scheduled callouts");
 
 #ifdef DDB
 static void
 _show_callout(struct callout *c)
 {
 
 	db_printf("callout %p\n", c);
 #define	C_DB_PRINTF(f, e)	db_printf("   %s = " f "\n", #e, c->e);
 	db_printf("   &c_links = %p\n", &(c->c_links));
 	C_DB_PRINTF("%" PRId64,	c_time);
 	C_DB_PRINTF("%" PRId64,	c_precision);
 	C_DB_PRINTF("%p",	c_arg);
 	C_DB_PRINTF("%p",	c_func);
 	C_DB_PRINTF("%p",	c_lock);
 	C_DB_PRINTF("%#x",	c_flags);
 	C_DB_PRINTF("%#x",	c_iflags);
 	C_DB_PRINTF("%d",	c_cpu);
 #undef	C_DB_PRINTF
 }
 
 DB_SHOW_COMMAND(callout, db_show_callout)
 {
 
 	if (!have_addr) {
 		db_printf("usage: show callout <struct callout *>\n");
 		return;
 	}
 
 	_show_callout((struct callout *)addr);
 }
 
 static void
 _show_last_callout(int cpu, int direct, const char *dirstr)
 {
 	struct callout_cpu *cc;
 	void *func, *arg;
 
 	cc = CC_CPU(cpu);
 	func = cc_exec_last_func(cc, direct);
 	arg = cc_exec_last_arg(cc, direct);
 	db_printf("cpu %d last%s callout function: %p ", cpu, dirstr, func);
 	db_printsym((db_expr_t)func, DB_STGY_ANY);
 	db_printf("\ncpu %d last%s callout argument: %p\n", cpu, dirstr, arg);
 }
 
 DB_SHOW_COMMAND(callout_last, db_show_callout_last)
 {
 	int cpu, last;
 
 	if (have_addr) {
 		if (addr < 0 || addr > mp_maxid || CPU_ABSENT(addr)) {
 			db_printf("no such cpu: %d\n", (int)addr);
 			return;
 		}
 		cpu = last = addr;
 	} else {
 		cpu = 0;
 		last = mp_maxid;
 	}
 
 	while (cpu <= last) {
 		if (!CPU_ABSENT(cpu)) {
 			_show_last_callout(cpu, 0, "");
 			_show_last_callout(cpu, 1, " direct");
 		}
 		cpu++;
 	}
 }
 #endif /* DDB */
diff --git a/sys/sys/systm.h b/sys/sys/systm.h
index ffe014eb8b42..25a2d0c41e82 100644
--- a/sys/sys/systm.h
+++ b/sys/sys/systm.h
@@ -1,672 +1,671 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1982, 1988, 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  * (c) UNIX System Laboratories, Inc.
  * All or some portions of this file are derived from material licensed
  * to the University of California by American Telephone and Telegraph
  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  * the permission of UNIX System Laboratories, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)systm.h	8.7 (Berkeley) 3/29/95
  * $FreeBSD$
  */
 
 #ifndef _SYS_SYSTM_H_
 #define	_SYS_SYSTM_H_
 
 #include <sys/cdefs.h>
 #include <machine/atomic.h>
 #include <machine/cpufunc.h>
 #include <sys/callout.h>
 #include <sys/queue.h>
 #include <sys/stdint.h>		/* for people using printf mainly */
 
 __NULLABILITY_PRAGMA_PUSH
 
 #ifdef _KERNEL
 extern int cold;		/* nonzero if we are doing a cold boot */
 extern int suspend_blocked;	/* block suspend due to pending shutdown */
 extern int rebooting;		/* kern_reboot() has been called. */
 extern const char *panicstr;	/* panic message */
 extern bool panicked;
 #define	KERNEL_PANICKED()	__predict_false(panicked)
 extern char version[];		/* system version */
 extern char compiler_version[];	/* compiler version */
 extern char copyright[];	/* system copyright */
 extern int kstack_pages;	/* number of kernel stack pages */
 
 extern u_long pagesizes[];	/* supported page sizes */
 extern long physmem;		/* physical memory */
 extern long realmem;		/* 'real' memory */
 
 extern char *rootdevnames[2];	/* names of possible root devices */
 
 extern int boothowto;		/* reboot flags, from console subsystem */
 extern int bootverbose;		/* nonzero to print verbose messages */
 
 extern int maxusers;		/* system tune hint */
 extern int ngroups_max;		/* max # of supplemental groups */
 extern int vm_guest;		/* Running as virtual machine guest? */
 
 extern u_long maxphys;		/* max raw I/O transfer size */
 
 /*
  * Detected virtual machine guest types. The intention is to expand
  * and/or add to the VM_GUEST_VM type if specific VM functionality is
  * ever implemented (e.g. vendor-specific paravirtualization features).
  * Keep in sync with vm_guest_sysctl_names[].
  */
 enum VM_GUEST { VM_GUEST_NO = 0, VM_GUEST_VM, VM_GUEST_XEN, VM_GUEST_HV,
 		VM_GUEST_VMWARE, VM_GUEST_KVM, VM_GUEST_BHYVE, VM_GUEST_VBOX,
 		VM_GUEST_PARALLELS, VM_LAST };
 
 #ifdef	INVARIANTS		/* The option is always available */
 #define	VNASSERT(exp, vp, msg) do {					\
 	if (__predict_false(!(exp))) {					\
 		vn_printf(vp, "VNASSERT failed: %s not true at %s:%d (%s)\n",\
 		   #exp, __FILE__, __LINE__, __func__);	 		\
 		kassert_panic msg;					\
 	}								\
 } while (0)
 #define	VNPASS(exp, vp)	do {						\
 	const char *_exp = #exp;					\
 	VNASSERT(exp, vp, ("condition %s not met at %s:%d (%s)",	\
 	    _exp, __FILE__, __LINE__, __func__));			\
 } while (0)
 #define	__assert_unreachable() \
 	panic("executing segment marked as unreachable at %s:%d (%s)\n", \
 	    __FILE__, __LINE__, __func__)
 #else
 #define	VNASSERT(exp, vp, msg) do { \
 } while (0)
 #define	VNPASS(exp, vp) do { \
 } while (0)
 #define	__assert_unreachable()	__unreachable()
 #endif
 
 #ifndef CTASSERT	/* Allow lint to override */
 #define	CTASSERT(x)	_Static_assert(x, "compile-time assertion failed")
 #endif
 #endif /* KERNEL */
 
 /*
  * These functions need to be declared before the KASSERT macro is invoked in
  * !KASSERT_PANIC_OPTIONAL builds, so their declarations are sort of out of
  * place compared to other function definitions in this header.  On the other
  * hand, this header is a bit disorganized anyway.
  */
 void	panic(const char *, ...) __dead2 __printflike(1, 2);
 void	vpanic(const char *, __va_list) __dead2 __printflike(1, 0);
 
 
 #if defined(_STANDALONE)
 struct ucred;
 /*
  * Until we have more experience with KASSERTS that are called
  * from the boot loader, they are off. The bootloader does this
  * a little differently than the kernel (we just call printf atm).
  * we avoid most of the common functions in the boot loader, so
  * declare printf() here too.
  */
 int	printf(const char *, ...) __printflike(1, 2);
 #  define kassert_panic printf
 #else /* !_STANDALONE */
 #  if defined(WITNESS) || defined(INVARIANT_SUPPORT)
 #    ifdef KASSERT_PANIC_OPTIONAL
 void	kassert_panic(const char *fmt, ...)  __printflike(1, 2);
 #    else
 #      define kassert_panic	panic
 #    endif /* KASSERT_PANIC_OPTIONAL */
 #  endif /* defined(WITNESS) || defined(INVARIANT_SUPPORT) */
 #endif /* _STANDALONE */
 
 #if defined(INVARIANTS) || defined(_STANDALONE)
 #define	KASSERT(exp,msg) do {						\
 	if (__predict_false(!(exp)))					\
 		kassert_panic msg;					\
 } while (0)
 #else /* !INVARIANTS && !_STANDALONE */
 #define	KASSERT(exp,msg) do { \
 } while (0)
 #endif /* INVARIANTS || _STANDALONE */
 
 /*
  * Helpful macros for quickly coming up with assertions with informative
  * panic messages.
  */
 #define MPASS(ex)		MPASS4(ex, #ex, __FILE__, __LINE__)
 #define MPASS2(ex, what)	MPASS4(ex, what, __FILE__, __LINE__)
 #define MPASS3(ex, file, line)	MPASS4(ex, #ex, file, line)
 #define MPASS4(ex, what, file, line)					\
 	KASSERT((ex), ("Assertion %s failed at %s:%d", what, file, line))
 
 /*
  * Align variables.
  */
 #define	__read_mostly		__section(".data.read_mostly")
 #define	__read_frequently	__section(".data.read_frequently")
 #define	__exclusive_cache_line	__aligned(CACHE_LINE_SIZE) \
 				    __section(".data.exclusive_cache_line")
 #ifdef _KERNEL
 #include <sys/param.h>		/* MAXCPU */
 #include <sys/pcpu.h>		/* curthread */
 #include <sys/kpilite.h>
 
 /*
  * Assert that a pointer can be loaded from memory atomically.
  *
  * This assertion enforces stronger alignment than necessary.  For example,
  * on some architectures, atomicity for unaligned loads will depend on
  * whether or not the load spans multiple cache lines.
  */
 #define	ASSERT_ATOMIC_LOAD_PTR(var, msg)				\
 	KASSERT(sizeof(var) == sizeof(void *) &&			\
 	    ((uintptr_t)&(var) & (sizeof(void *) - 1)) == 0, msg)
 
 /*
  * Assert that a thread is in critical(9) section.
  */
 #define	CRITICAL_ASSERT(td)						\
 	KASSERT((td)->td_critnest >= 1, ("Not in critical section"))
 
 /*
  * If we have already panic'd and this is the thread that called
  * panic(), then don't block on any mutexes but silently succeed.
  * Otherwise, the kernel will deadlock since the scheduler isn't
  * going to run the thread that holds any lock we need.
  */
 #define	SCHEDULER_STOPPED_TD(td)  ({					\
 	MPASS((td) == curthread);					\
 	__predict_false((td)->td_stopsched);				\
 })
 #define	SCHEDULER_STOPPED() SCHEDULER_STOPPED_TD(curthread)
 
 extern int osreldate;
 
 extern const void *zero_region;	/* address space maps to a zeroed page	*/
 
 extern int unmapped_buf_allowed;
 
 #ifdef __LP64__
 #define	IOSIZE_MAX		iosize_max()
 #define	DEVFS_IOSIZE_MAX	devfs_iosize_max()
 #else
 #define	IOSIZE_MAX		SSIZE_MAX
 #define	DEVFS_IOSIZE_MAX	SSIZE_MAX
 #endif
 
 /*
  * General function declarations.
  */
 
 struct inpcb;
 struct lock_object;
 struct malloc_type;
 struct mtx;
 struct proc;
 struct socket;
 struct thread;
 struct tty;
 struct ucred;
 struct uio;
 struct _jmp_buf;
 struct trapframe;
 struct eventtimer;
 
 int	setjmp(struct _jmp_buf *) __returns_twice;
 void	longjmp(struct _jmp_buf *, int) __dead2;
 int	dumpstatus(vm_offset_t addr, off_t count);
 int	nullop(void);
 int	eopnotsupp(void);
 int	ureadc(int, struct uio *);
 void	hashdestroy(void *, struct malloc_type *, u_long);
 void	*hashinit(int count, struct malloc_type *type, u_long *hashmask);
 void	*hashinit_flags(int count, struct malloc_type *type,
     u_long *hashmask, int flags);
 #define	HASH_NOWAIT	0x00000001
 #define	HASH_WAITOK	0x00000002
 
 void	*phashinit(int count, struct malloc_type *type, u_long *nentries);
 void	*phashinit_flags(int count, struct malloc_type *type, u_long *nentries,
     int flags);
 void	g_waitidle(void);
 
 void	cpu_flush_dcache(void *, size_t);
 void	cpu_rootconf(void);
 void	critical_enter_KBI(void);
 void	critical_exit_KBI(void);
 void	critical_exit_preempt(void);
 void	init_param1(void);
 void	init_param2(long physpages);
 void	init_static_kenv(char *, size_t);
 void	tablefull(const char *);
 
 /*
  * Allocate per-thread "current" state in the linuxkpi
  */
 extern int (*lkpi_alloc_current)(struct thread *, int);
 int linux_alloc_current_noop(struct thread *, int);
 
 #if (defined(KLD_MODULE) && !defined(KLD_TIED)) || defined(KTR_CRITICAL) || !defined(_KERNEL) || defined(GENOFFSET)
 #define critical_enter() critical_enter_KBI()
 #define critical_exit() critical_exit_KBI()
 #else
 static __inline void
 critical_enter(void)
 {
 	struct thread_lite *td;
 
 	td = (struct thread_lite *)curthread;
 	td->td_critnest++;
 	atomic_interrupt_fence();
 }
 
 static __inline void
 critical_exit(void)
 {
 	struct thread_lite *td;
 
 	td = (struct thread_lite *)curthread;
 	KASSERT(td->td_critnest != 0,
 	    ("critical_exit: td_critnest == 0"));
 	atomic_interrupt_fence();
 	td->td_critnest--;
 	atomic_interrupt_fence();
 	if (__predict_false(td->td_owepreempt))
 		critical_exit_preempt();
 
 }
 #endif
 
 #ifdef  EARLY_PRINTF
 typedef void early_putc_t(int ch);
 extern early_putc_t *early_putc;
 #endif
 int	kvprintf(char const *, void (*)(int, void*), void *, int,
 	    __va_list) __printflike(1, 0);
 void	log(int, const char *, ...) __printflike(2, 3);
 void	log_console(struct uio *);
 void	vlog(int, const char *, __va_list) __printflike(2, 0);
 int	asprintf(char **ret, struct malloc_type *mtp, const char *format, 
 	    ...) __printflike(3, 4);
 int	printf(const char *, ...) __printflike(1, 2);
 int	snprintf(char *, size_t, const char *, ...) __printflike(3, 4);
 int	sprintf(char *buf, const char *, ...) __printflike(2, 3);
 int	uprintf(const char *, ...) __printflike(1, 2);
 int	vprintf(const char *, __va_list) __printflike(1, 0);
 int	vasprintf(char **ret, struct malloc_type *mtp, const char *format,
 	    __va_list ap) __printflike(3, 0);
 int	vsnprintf(char *, size_t, const char *, __va_list) __printflike(3, 0);
 int	vsnrprintf(char *, size_t, int, const char *, __va_list) __printflike(4, 0);
 int	vsprintf(char *buf, const char *, __va_list) __printflike(2, 0);
 int	sscanf(const char *, char const * _Nonnull, ...) __scanflike(2, 3);
 int	vsscanf(const char * _Nonnull, char const * _Nonnull, __va_list)  __scanflike(2, 0);
 long	strtol(const char *, char **, int);
 u_long	strtoul(const char *, char **, int);
 quad_t	strtoq(const char *, char **, int);
 u_quad_t strtouq(const char *, char **, int);
 void	tprintf(struct proc *p, int pri, const char *, ...) __printflike(3, 4);
 void	vtprintf(struct proc *, int, const char *, __va_list) __printflike(3, 0);
 void	hexdump(const void *ptr, int length, const char *hdr, int flags);
 #define	HD_COLUMN_MASK	0xff
 #define	HD_DELIM_MASK	0xff00
 #define	HD_OMIT_COUNT	(1 << 16)
 #define	HD_OMIT_HEX	(1 << 17)
 #define	HD_OMIT_CHARS	(1 << 18)
 
 #define ovbcopy(f, t, l) bcopy((f), (t), (l))
 void	explicit_bzero(void * _Nonnull, size_t);
 
 void	*memset(void * _Nonnull buf, int c, size_t len);
 void	*memcpy(void * _Nonnull to, const void * _Nonnull from, size_t len);
 void	*memmove(void * _Nonnull dest, const void * _Nonnull src, size_t n);
 int	memcmp(const void *b1, const void *b2, size_t len);
 
 #ifdef SAN_NEEDS_INTERCEPTORS
 #define	SAN_INTERCEPTOR(func)	\
 	__CONCAT(SAN_INTERCEPTOR_PREFIX, __CONCAT(_, func))
 void	*SAN_INTERCEPTOR(memset)(void *, int, size_t);
 void	*SAN_INTERCEPTOR(memcpy)(void *, const void *, size_t);
 void	*SAN_INTERCEPTOR(memmove)(void *, const void *, size_t);
 int	SAN_INTERCEPTOR(memcmp)(const void *, const void *, size_t);
 #ifndef SAN_RUNTIME
 #define bcopy(from, to, len)	SAN_INTERCEPTOR(memmove)((to), (from), (len))
 #define bzero(buf, len)		SAN_INTERCEPTOR(memset)((buf), 0, (len))
 #define bcmp(b1, b2, len)	SAN_INTERCEPTOR(memcmp)((b1), (b2), (len))
 #define memset(buf, c, len)	SAN_INTERCEPTOR(memset)((buf), (c), (len))
 #define memcpy(to, from, len)	SAN_INTERCEPTOR(memcpy)((to), (from), (len))
 #define memmove(dest, src, n)	SAN_INTERCEPTOR(memmove)((dest), (src), (n))
 #define memcmp(b1, b2, len)	SAN_INTERCEPTOR(memcmp)((b1), (b2), (len))
 #endif /* !SAN_RUNTIME */
 #else /* !SAN_NEEDS_INTERCEPTORS */
 #define bcopy(from, to, len)	__builtin_memmove((to), (from), (len))
 #define bzero(buf, len)		__builtin_memset((buf), 0, (len))
 #define bcmp(b1, b2, len)	__builtin_memcmp((b1), (b2), (len))
 #define memset(buf, c, len)	__builtin_memset((buf), (c), (len))
 #define memcpy(to, from, len)	__builtin_memcpy((to), (from), (len))
 #define memmove(dest, src, n)	__builtin_memmove((dest), (src), (n))
 #define memcmp(b1, b2, len)	__builtin_memcmp((b1), (b2), (len))
 #endif /* SAN_NEEDS_INTERCEPTORS */
 
 void	*memset_early(void * _Nonnull buf, int c, size_t len);
 #define bzero_early(buf, len) memset_early((buf), 0, (len))
 void	*memcpy_early(void * _Nonnull to, const void * _Nonnull from, size_t len);
 void	*memmove_early(void * _Nonnull dest, const void * _Nonnull src, size_t n);
 #define bcopy_early(from, to, len) memmove_early((to), (from), (len))
 
 #define	copystr(src, dst, len, outlen)	({			\
 	size_t __r, __len, *__outlen;				\
 								\
 	__len = (len);						\
 	__outlen = (outlen);					\
 	__r = strlcpy((dst), (src), __len);			\
 	if (__outlen != NULL)					\
 		*__outlen = ((__r >= __len) ? __len : __r + 1);	\
 	((__r >= __len) ? ENAMETOOLONG : 0);			\
 })
 
 int	copyinstr(const void * __restrict udaddr,
 	    void * _Nonnull __restrict kaddr, size_t len,
 	    size_t * __restrict lencopied);
 int	copyin(const void * __restrict udaddr,
 	    void * _Nonnull __restrict kaddr, size_t len);
 int	copyin_nofault(const void * __restrict udaddr,
 	    void * _Nonnull __restrict kaddr, size_t len);
 int	copyout(const void * _Nonnull __restrict kaddr,
 	    void * __restrict udaddr, size_t len);
 int	copyout_nofault(const void * _Nonnull __restrict kaddr,
 	    void * __restrict udaddr, size_t len);
 
 #ifdef SAN_NEEDS_INTERCEPTORS
 int	SAN_INTERCEPTOR(copyin)(const void *, void *, size_t);
 int	SAN_INTERCEPTOR(copyinstr)(const void *, void *, size_t, size_t *);
 int	SAN_INTERCEPTOR(copyout)(const void *, void *, size_t);
 #ifndef SAN_RUNTIME
 #define	copyin(u, k, l)		SAN_INTERCEPTOR(copyin)((u), (k), (l))
 #define	copyinstr(u, k, l, lc)	SAN_INTERCEPTOR(copyinstr)((u), (k), (l), (lc))
 #define	copyout(k, u, l)	SAN_INTERCEPTOR(copyout)((k), (u), (l))
 #endif /* !SAN_RUNTIME */
 #endif /* SAN_NEEDS_INTERCEPTORS */
 
 int	fubyte(volatile const void *base);
 long	fuword(volatile const void *base);
 int	fuword16(volatile const void *base);
 int32_t	fuword32(volatile const void *base);
 int64_t	fuword64(volatile const void *base);
 int	fueword(volatile const void *base, long *val);
 int	fueword32(volatile const void *base, int32_t *val);
 int	fueword64(volatile const void *base, int64_t *val);
 int	subyte(volatile void *base, int byte);
 int	suword(volatile void *base, long word);
 int	suword16(volatile void *base, int word);
 int	suword32(volatile void *base, int32_t word);
 int	suword64(volatile void *base, int64_t word);
 uint32_t casuword32(volatile uint32_t *base, uint32_t oldval, uint32_t newval);
 u_long	casuword(volatile u_long *p, u_long oldval, u_long newval);
 int	casueword32(volatile uint32_t *base, uint32_t oldval, uint32_t *oldvalp,
 	    uint32_t newval);
 int	casueword(volatile u_long *p, u_long oldval, u_long *oldvalp,
 	    u_long newval);
 
 #if defined(SAN_NEEDS_INTERCEPTORS) && !defined(KCSAN)
 int	SAN_INTERCEPTOR(fubyte)(volatile const void *base);
 int	SAN_INTERCEPTOR(fuword16)(volatile const void *base);
 int	SAN_INTERCEPTOR(fueword)(volatile const void *base, long *val);
 int	SAN_INTERCEPTOR(fueword32)(volatile const void *base, int32_t *val);
 int	SAN_INTERCEPTOR(fueword64)(volatile const void *base, int64_t *val);
 int	SAN_INTERCEPTOR(subyte)(volatile void *base, int byte);
 int	SAN_INTERCEPTOR(suword)(volatile void *base, long word);
 int	SAN_INTERCEPTOR(suword16)(volatile void *base, int word);
 int	SAN_INTERCEPTOR(suword32)(volatile void *base, int32_t word);
 int	SAN_INTERCEPTOR(suword64)(volatile void *base, int64_t word);
 int	SAN_INTERCEPTOR(casueword32)(volatile uint32_t *base, uint32_t oldval,
 	    uint32_t *oldvalp, uint32_t newval);
 int	SAN_INTERCEPTOR(casueword)(volatile u_long *p, u_long oldval,
 	    u_long *oldvalp, u_long newval);
 #ifndef SAN_RUNTIME
 #define	fubyte(b)		SAN_INTERCEPTOR(fubyte)((b))
 #define	fuword16(b)		SAN_INTERCEPTOR(fuword16)((b))
 #define	fueword(b, v)		SAN_INTERCEPTOR(fueword)((b), (v))
 #define	fueword32(b, v)		SAN_INTERCEPTOR(fueword32)((b), (v))
 #define	fueword64(b, v)		SAN_INTERCEPTOR(fueword64)((b), (v))
 #define	subyte(b, w)		SAN_INTERCEPTOR(subyte)((b), (w))
 #define	suword(b, w)		SAN_INTERCEPTOR(suword)((b), (w))
 #define	suword16(b, w)		SAN_INTERCEPTOR(suword16)((b), (w))
 #define	suword32(b, w)		SAN_INTERCEPTOR(suword32)((b), (w))
 #define	suword64(b, w)		SAN_INTERCEPTOR(suword64)((b), (w))
 #define	casueword32(b, o, p, n)	SAN_INTERCEPTOR(casueword32)((b), (o), (p), (n))
 #define	casueword(b, o, p, n)	SAN_INTERCEPTOR(casueword)((b), (o), (p), (n))
 #endif /* !SAN_RUNTIME */
 #endif /* SAN_NEEDS_INTERCEPTORS && !KCSAN */
 
 void	realitexpire(void *);
 
 int	sysbeep(int hertz, sbintime_t duration);
 
 void	hardclock(int cnt, int usermode);
 void	hardclock_sync(int cpu);
-void	softclock(void *);
 void	statclock(int cnt, int usermode);
 void	profclock(int cnt, int usermode, uintfptr_t pc);
 
 int	hardclockintr(void);
 
 void	startprofclock(struct proc *);
 void	stopprofclock(struct proc *);
 void	cpu_startprofclock(void);
 void	cpu_stopprofclock(void);
 void	suspendclock(void);
 void	resumeclock(void);
 sbintime_t 	cpu_idleclock(void);
 void	cpu_activeclock(void);
 void	cpu_new_callout(int cpu, sbintime_t bt, sbintime_t bt_opt);
 void	cpu_et_frequency(struct eventtimer *et, uint64_t newfreq);
 extern int	cpu_disable_c2_sleep;
 extern int	cpu_disable_c3_sleep;
 
 char	*kern_getenv(const char *name);
 void	freeenv(char *env);
 int	getenv_int(const char *name, int *data);
 int	getenv_uint(const char *name, unsigned int *data);
 int	getenv_long(const char *name, long *data);
 int	getenv_ulong(const char *name, unsigned long *data);
 int	getenv_string(const char *name, char *data, int size);
 int	getenv_int64(const char *name, int64_t *data);
 int	getenv_uint64(const char *name, uint64_t *data);
 int	getenv_quad(const char *name, quad_t *data);
 int	getenv_bool(const char *name, bool *data);
 bool	getenv_is_true(const char *name);
 bool	getenv_is_false(const char *name);
 int	kern_setenv(const char *name, const char *value);
 int	kern_unsetenv(const char *name);
 int	testenv(const char *name);
 
 int	getenv_array(const char *name, void *data, int size, int *psize,
     int type_size, bool allow_signed);
 #define	GETENV_UNSIGNED	false	/* negative numbers not allowed */
 #define	GETENV_SIGNED	true	/* negative numbers allowed */
 
 typedef uint64_t (cpu_tick_f)(void);
 void set_cputicker(cpu_tick_f *func, uint64_t freq, unsigned var);
 extern cpu_tick_f *cpu_ticks;
 uint64_t cpu_tickrate(void);
 uint64_t cputick2usec(uint64_t tick);
 
 #include <sys/libkern.h>
 
 /* Initialize the world */
 void	consinit(void);
 void	cpu_initclocks(void);
 void	cpu_initclocks_bsp(void);
 void	cpu_initclocks_ap(void);
 void	usrinfoinit(void);
 
 /* Finalize the world */
 void	kern_reboot(int) __dead2;
 void	shutdown_nice(int);
 
 /* Stubs for obsolete functions that used to be for interrupt management */
 static __inline intrmask_t	splhigh(void)		{ return 0; }
 static __inline intrmask_t	splimp(void)		{ return 0; }
 static __inline intrmask_t	splnet(void)		{ return 0; }
 static __inline intrmask_t	spltty(void)		{ return 0; }
 static __inline void		splx(intrmask_t ipl __unused)	{ return; }
 
 /*
  * Common `proc' functions are declared here so that proc.h can be included
  * less often.
  */
 int	_sleep(const void * _Nonnull chan, struct lock_object *lock, int pri,
 	   const char *wmesg, sbintime_t sbt, sbintime_t pr, int flags);
 #define	msleep(chan, mtx, pri, wmesg, timo)				\
 	_sleep((chan), &(mtx)->lock_object, (pri), (wmesg),		\
 	    tick_sbt * (timo), 0, C_HARDCLOCK)
 #define	msleep_sbt(chan, mtx, pri, wmesg, bt, pr, flags)		\
 	_sleep((chan), &(mtx)->lock_object, (pri), (wmesg), (bt), (pr),	\
 	    (flags))
 int	msleep_spin_sbt(const void * _Nonnull chan, struct mtx *mtx,
 	    const char *wmesg, sbintime_t sbt, sbintime_t pr, int flags);
 #define	msleep_spin(chan, mtx, wmesg, timo)				\
 	msleep_spin_sbt((chan), (mtx), (wmesg), tick_sbt * (timo),	\
 	    0, C_HARDCLOCK)
 int	pause_sbt(const char *wmesg, sbintime_t sbt, sbintime_t pr,
 	    int flags);
 #define	pause(wmesg, timo)						\
 	pause_sbt((wmesg), tick_sbt * (timo), 0, C_HARDCLOCK)
 #define	pause_sig(wmesg, timo)						\
 	pause_sbt((wmesg), tick_sbt * (timo), 0, C_HARDCLOCK | C_CATCH)
 #define	tsleep(chan, pri, wmesg, timo)					\
 	_sleep((chan), NULL, (pri), (wmesg), tick_sbt * (timo),		\
 	    0, C_HARDCLOCK)
 #define	tsleep_sbt(chan, pri, wmesg, bt, pr, flags)			\
 	_sleep((chan), NULL, (pri), (wmesg), (bt), (pr), (flags))
 void	wakeup(const void *chan);
 void	wakeup_one(const void *chan);
 void	wakeup_any(const void *chan);
 
 /*
  * Common `struct cdev *' stuff are declared here to avoid #include poisoning
  */
 
 struct cdev;
 dev_t dev2udev(struct cdev *x);
 const char *devtoname(struct cdev *cdev);
 
 #ifdef __LP64__
 size_t	devfs_iosize_max(void);
 size_t	iosize_max(void);
 #endif
 
 int poll_no_poll(int events);
 
 /* XXX: Should be void nanodelay(u_int nsec); */
 void	DELAY(int usec);
 
 /* Root mount holdback API */
 struct root_hold_token {
 	int				flags;
 	const char			*who;
 	TAILQ_ENTRY(root_hold_token)	list;
 };
 
 struct root_hold_token *root_mount_hold(const char *identifier);
 void root_mount_hold_token(const char *identifier, struct root_hold_token *h);
 void root_mount_rel(struct root_hold_token *h);
 int root_mounted(void);
 
 /*
  * Unit number allocation API. (kern/subr_unit.c)
  */
 struct unrhdr;
 struct unrhdr *new_unrhdr(int low, int high, struct mtx *mutex);
 void init_unrhdr(struct unrhdr *uh, int low, int high, struct mtx *mutex);
 void delete_unrhdr(struct unrhdr *uh);
 void clear_unrhdr(struct unrhdr *uh);
 void clean_unrhdr(struct unrhdr *uh);
 void clean_unrhdrl(struct unrhdr *uh);
 int alloc_unr(struct unrhdr *uh);
 int alloc_unr_specific(struct unrhdr *uh, u_int item);
 int alloc_unrl(struct unrhdr *uh);
 void free_unr(struct unrhdr *uh, u_int item);
 
 #ifndef __LP64__
 #define UNR64_LOCKED
 #endif
 
 struct unrhdr64 {
         uint64_t	counter;
 };
 
 static __inline void
 new_unrhdr64(struct unrhdr64 *unr64, uint64_t low)
 {
 
 	unr64->counter = low;
 }
 
 #ifdef UNR64_LOCKED
 uint64_t alloc_unr64(struct unrhdr64 *);
 #else
 static __inline uint64_t
 alloc_unr64(struct unrhdr64 *unr64)
 {
 
 	return (atomic_fetchadd_64(&unr64->counter, 1));
 }
 #endif
 
 void	intr_prof_stack_use(struct thread *td, struct trapframe *frame);
 
 void counted_warning(unsigned *counter, const char *msg);
 
 /*
  * APIs to manage deprecation and obsolescence.
  */
 void _gone_in(int major, const char *msg);
 void _gone_in_dev(device_t dev, int major, const char *msg);
 #ifdef NO_OBSOLETE_CODE
 #define __gone_ok(m, msg)					 \
 	_Static_assert(m < P_OSREL_MAJOR(__FreeBSD_version)),	 \
 	    "Obsolete code: " msg);
 #else
 #define	__gone_ok(m, msg)
 #endif
 #define gone_in(major, msg)		__gone_ok(major, msg) _gone_in(major, msg)
 #define gone_in_dev(dev, major, msg)	__gone_ok(major, msg) _gone_in_dev(dev, major, msg)
 
 #if defined(INVARIANTS) || defined(WITNESS)
 #define	__diagused
 #else
 #define	__diagused	__unused
 #endif
 
 #endif /* _KERNEL */
 
 __NULLABILITY_PRAGMA_POP
 #endif /* !_SYS_SYSTM_H_ */