diff --git a/sys/kern/kern_condvar.c b/sys/kern/kern_condvar.c
index 2731f581a29f..0d470aeafcd5 100644
--- a/sys/kern/kern_condvar.c
+++ b/sys/kern/kern_condvar.c
@@ -1,484 +1,484 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause
  *
  * Copyright (c) 2000 Jake Burkholder <jake@freebsd.org>.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 #include "opt_ktrace.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/limits.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/condvar.h>
 #include <sys/sched.h>
 #include <sys/signalvar.h>
 #include <sys/sleepqueue.h>
 #include <sys/resourcevar.h>
 #ifdef KTRACE
 #include <sys/ktrace.h>
 #include <sys/uio.h>
 #include <sys/user.h>
 #endif
 
 /*
  * A bound below which cv_waiters is valid.  Once cv_waiters reaches this bound,
  * cv_signal must manually check the wait queue for threads.
  */
 #define	CV_WAITERS_BOUND	INT_MAX
 
 #define	CV_WAITERS_INC(cvp) do {					\
 	if ((cvp)->cv_waiters < CV_WAITERS_BOUND)			\
 		(cvp)->cv_waiters++;					\
 } while (0)
 
 /*
  * Common sanity checks for cv_wait* functions.
  */
 #define	CV_ASSERT(cvp, lock, td) do {					\
 	KASSERT((td) != NULL, ("%s: td NULL", __func__));		\
 	KASSERT(TD_IS_RUNNING(td), ("%s: not TDS_RUNNING", __func__));	\
 	KASSERT((cvp) != NULL, ("%s: cvp NULL", __func__));		\
 	KASSERT((lock) != NULL, ("%s: lock NULL", __func__));		\
 } while (0)
 
 /*
  * Initialize a condition variable.  Must be called before use.
  */
 void
 cv_init(struct cv *cvp, const char *desc)
 {
 
 	cvp->cv_description = desc;
 	cvp->cv_waiters = 0;
 }
 
 /*
  * Destroy a condition variable.  The condition variable must be re-initialized
  * in order to be re-used.
  */
 void
 cv_destroy(struct cv *cvp)
 {
 #ifdef INVARIANTS
 	struct sleepqueue *sq;
 
 	sleepq_lock(cvp);
 	sq = sleepq_lookup(cvp);
 	sleepq_release(cvp);
 	KASSERT(sq == NULL, ("%s: associated sleep queue non-empty", __func__));
 #endif
 }
 
 /*
  * Wait on a condition variable.  The current thread is placed on the condition
  * variable's wait queue and suspended.  A cv_signal or cv_broadcast on the same
  * condition variable will resume the thread.  The mutex is released before
  * sleeping and will be held on return.  It is recommended that the mutex be
  * held when cv_signal or cv_broadcast are called.
  */
 void
 _cv_wait(struct cv *cvp, struct lock_object *lock)
 {
 	WITNESS_SAVE_DECL(lock_witness);
 #ifdef KTRACE
 	char wmesg[WMESGLEN + 1];
 #endif
 	struct lock_class *class;
 	struct thread *td;
 	uintptr_t lock_state;
 
 	td = curthread;
 	CV_ASSERT(cvp, lock, td);
 	WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, lock,
 	    "Waiting on \"%s\"", cvp->cv_description);
 
-	if (SCHEDULER_STOPPED_TD(td))
+	if (SCHEDULER_STOPPED())
 		return;
 
 #ifdef KTRACE
 	if (KTRPOINT(td, KTR_CSW)) {
 		strlcpy(wmesg, cv_wmesg(cvp), sizeof(wmesg));
 		ktrcsw(1, 0, wmesg);
 	} else {
 		wmesg[0] = '\0';
 	}
 #endif
 
 	class = LOCK_CLASS(lock);
 	lock_state = 0;
 	sleepq_lock(cvp);
 
 	CV_WAITERS_INC(cvp);
 	if (lock == &Giant.lock_object)
 		mtx_assert(&Giant, MA_OWNED);
 	DROP_GIANT();
 
 	sleepq_add(cvp, lock, cvp->cv_description, SLEEPQ_CONDVAR, 0);
 	if (lock != &Giant.lock_object) {
 		if (class->lc_flags & LC_SLEEPABLE)
 			sleepq_release(cvp);
 		WITNESS_SAVE(lock, lock_witness);
 		lock_state = class->lc_unlock(lock);
 		if (class->lc_flags & LC_SLEEPABLE)
 			sleepq_lock(cvp);
 	}
 	sleepq_wait(cvp, 0);
 
 #ifdef KTRACE
 	if (KTRPOINT(td, KTR_CSW))
 		ktrcsw(0, 0, wmesg);
 #endif
 	PICKUP_GIANT();
 	if (lock != &Giant.lock_object) {
 		class->lc_lock(lock, lock_state);
 		WITNESS_RESTORE(lock, lock_witness);
 	}
 }
 
 /*
  * Wait on a condition variable.  This function differs from cv_wait by
  * not acquiring the mutex after condition variable was signaled.
  */
 void
 _cv_wait_unlock(struct cv *cvp, struct lock_object *lock)
 {
 #ifdef KTRACE
 	char wmesg[WMESGLEN + 1];
 #endif
 	struct lock_class *class;
 	struct thread *td;
 
 	td = curthread;
 	CV_ASSERT(cvp, lock, td);
 	WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, lock,
 	    "Waiting on \"%s\"", cvp->cv_description);
 	KASSERT(lock != &Giant.lock_object,
 	    ("cv_wait_unlock cannot be used with Giant"));
 	class = LOCK_CLASS(lock);
 
-	if (SCHEDULER_STOPPED_TD(td)) {
+	if (SCHEDULER_STOPPED()) {
 		class->lc_unlock(lock);
 		return;
 	}
 
 #ifdef KTRACE
 	if (KTRPOINT(td, KTR_CSW)) {
 		strlcpy(wmesg, cv_wmesg(cvp), sizeof(wmesg));
 		ktrcsw(1, 0, wmesg);
 	} else {
 		wmesg[0] = '\0';
 	}
 #endif
 
 	sleepq_lock(cvp);
 
 	CV_WAITERS_INC(cvp);
 	DROP_GIANT();
 
 	sleepq_add(cvp, lock, cvp->cv_description, SLEEPQ_CONDVAR, 0);
 	if (class->lc_flags & LC_SLEEPABLE)
 		sleepq_release(cvp);
 	class->lc_unlock(lock);
 	if (class->lc_flags & LC_SLEEPABLE)
 		sleepq_lock(cvp);
 	sleepq_wait(cvp, 0);
 
 #ifdef KTRACE
 	if (KTRPOINT(td, KTR_CSW))
 		ktrcsw(0, 0, wmesg);
 #endif
 	PICKUP_GIANT();
 }
 
 /*
  * Wait on a condition variable, allowing interruption by signals.  Return 0 if
  * the thread was resumed with cv_signal or cv_broadcast, EINTR or ERESTART if
  * a signal was caught.  If ERESTART is returned the system call should be
  * restarted if possible.
  */
 int
 _cv_wait_sig(struct cv *cvp, struct lock_object *lock)
 {
 	WITNESS_SAVE_DECL(lock_witness);
 #ifdef KTRACE
 	char wmesg[WMESGLEN + 1];
 #endif
 	struct lock_class *class;
 	struct thread *td;
 	uintptr_t lock_state;
 	int rval;
 
 	td = curthread;
 	CV_ASSERT(cvp, lock, td);
 	WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, lock,
 	    "Waiting on \"%s\"", cvp->cv_description);
 
-	if (SCHEDULER_STOPPED_TD(td))
+	if (SCHEDULER_STOPPED())
 		return (0);
 
 #ifdef KTRACE
 	if (KTRPOINT(td, KTR_CSW)) {
 		strlcpy(wmesg, cv_wmesg(cvp), sizeof(wmesg));
 		ktrcsw(1, 0, wmesg);
 	} else {
 		wmesg[0] = '\0';
 	}
 #endif
 
 	class = LOCK_CLASS(lock);
 	lock_state = 0;
 	sleepq_lock(cvp);
 
 	CV_WAITERS_INC(cvp);
 	if (lock == &Giant.lock_object)
 		mtx_assert(&Giant, MA_OWNED);
 	DROP_GIANT();
 
 	sleepq_add(cvp, lock, cvp->cv_description, SLEEPQ_CONDVAR |
 	    SLEEPQ_INTERRUPTIBLE, 0);
 	if (lock != &Giant.lock_object) {
 		if (class->lc_flags & LC_SLEEPABLE)
 			sleepq_release(cvp);
 		WITNESS_SAVE(lock, lock_witness);
 		lock_state = class->lc_unlock(lock);
 		if (class->lc_flags & LC_SLEEPABLE)
 			sleepq_lock(cvp);
 	}
 	rval = sleepq_wait_sig(cvp, 0);
 
 #ifdef KTRACE
 	if (KTRPOINT(td, KTR_CSW))
 		ktrcsw(0, 0, wmesg);
 #endif
 	PICKUP_GIANT();
 	if (lock != &Giant.lock_object) {
 		class->lc_lock(lock, lock_state);
 		WITNESS_RESTORE(lock, lock_witness);
 	}
 
 	return (rval);
 }
 
 /*
  * Wait on a condition variable for (at most) the value specified in sbt
  * argument. Returns 0 if the process was resumed by cv_signal or cv_broadcast,
  * EWOULDBLOCK if the timeout expires.
  */
 int
 _cv_timedwait_sbt(struct cv *cvp, struct lock_object *lock, sbintime_t sbt,
     sbintime_t pr, int flags)
 {
 	WITNESS_SAVE_DECL(lock_witness);
 #ifdef KTRACE
 	char wmesg[WMESGLEN + 1];
 #endif
 	struct lock_class *class;
 	struct thread *td;
 	int lock_state, rval;
 
 	td = curthread;
 	CV_ASSERT(cvp, lock, td);
 	WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, lock,
 	    "Waiting on \"%s\"", cvp->cv_description);
 
-	if (SCHEDULER_STOPPED_TD(td))
+	if (SCHEDULER_STOPPED())
 		return (0);
 
 #ifdef KTRACE
 	if (KTRPOINT(td, KTR_CSW)) {
 		strlcpy(wmesg, cv_wmesg(cvp), sizeof(wmesg));
 		ktrcsw(1, 0, wmesg);
 	} else {
 		wmesg[0] = '\0';
 	}
 #endif
 
 	class = LOCK_CLASS(lock);
 	lock_state = 0;
 	sleepq_lock(cvp);
 
 	CV_WAITERS_INC(cvp);
 	if (lock == &Giant.lock_object)
 		mtx_assert(&Giant, MA_OWNED);
 	DROP_GIANT();
 
 	sleepq_add(cvp, lock, cvp->cv_description, SLEEPQ_CONDVAR, 0);
 	sleepq_set_timeout_sbt(cvp, sbt, pr, flags);
 	if (lock != &Giant.lock_object) {
 		if (class->lc_flags & LC_SLEEPABLE)
 			sleepq_release(cvp);
 		WITNESS_SAVE(lock, lock_witness);
 		lock_state = class->lc_unlock(lock);
 		if (class->lc_flags & LC_SLEEPABLE)
 			sleepq_lock(cvp);
 	}
 	rval = sleepq_timedwait(cvp, 0);
 
 #ifdef KTRACE
 	if (KTRPOINT(td, KTR_CSW))
 		ktrcsw(0, 0, wmesg);
 #endif
 	PICKUP_GIANT();
 	if (lock != &Giant.lock_object) {
 		class->lc_lock(lock, lock_state);
 		WITNESS_RESTORE(lock, lock_witness);
 	}
 
 	return (rval);
 }
 
 /*
  * Wait on a condition variable for (at most) the value specified in sbt 
  * argument, allowing interruption by signals.
  * Returns 0 if the thread was resumed by cv_signal or cv_broadcast,
  * EWOULDBLOCK if the timeout expires, and EINTR or ERESTART if a signal
  * was caught.
  */
 int
 _cv_timedwait_sig_sbt(struct cv *cvp, struct lock_object *lock,
     sbintime_t sbt, sbintime_t pr, int flags)
 {
 	WITNESS_SAVE_DECL(lock_witness);
 #ifdef KTRACE
 	char wmesg[WMESGLEN + 1];
 #endif
 	struct lock_class *class;
 	struct thread *td;
 	int lock_state, rval;
 
 	td = curthread;
 	CV_ASSERT(cvp, lock, td);
 	WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, lock,
 	    "Waiting on \"%s\"", cvp->cv_description);
 
-	if (SCHEDULER_STOPPED_TD(td))
+	if (SCHEDULER_STOPPED())
 		return (0);
 
 #ifdef KTRACE
 	if (KTRPOINT(td, KTR_CSW)) {
 		strlcpy(wmesg, cv_wmesg(cvp), sizeof(wmesg));
 		ktrcsw(1, 0, wmesg);
 	} else {
 		wmesg[0] = '\0';
 	}
 #endif
 
 	class = LOCK_CLASS(lock);
 	lock_state = 0;
 	sleepq_lock(cvp);
 
 	CV_WAITERS_INC(cvp);
 	if (lock == &Giant.lock_object)
 		mtx_assert(&Giant, MA_OWNED);
 	DROP_GIANT();
 
 	sleepq_add(cvp, lock, cvp->cv_description, SLEEPQ_CONDVAR |
 	    SLEEPQ_INTERRUPTIBLE, 0);
 	sleepq_set_timeout_sbt(cvp, sbt, pr, flags);
 	if (lock != &Giant.lock_object) {
 		if (class->lc_flags & LC_SLEEPABLE)
 			sleepq_release(cvp);
 		WITNESS_SAVE(lock, lock_witness);
 		lock_state = class->lc_unlock(lock);
 		if (class->lc_flags & LC_SLEEPABLE)
 			sleepq_lock(cvp);
 	}
 	rval = sleepq_timedwait_sig(cvp, 0);
 
 #ifdef KTRACE
 	if (KTRPOINT(td, KTR_CSW))
 		ktrcsw(0, 0, wmesg);
 #endif
 	PICKUP_GIANT();
 	if (lock != &Giant.lock_object) {
 		class->lc_lock(lock, lock_state);
 		WITNESS_RESTORE(lock, lock_witness);
 	}
 
 	return (rval);
 }
 
 /*
  * Signal a condition variable, wakes up one waiting thread.  Will also wakeup
  * the swapper if the process is not in memory, so that it can bring the
  * sleeping process in.  Note that this may also result in additional threads
  * being made runnable.  Should be called with the same mutex as was passed to
  * cv_wait held.
  */
 void
 cv_signal(struct cv *cvp)
 {
 
 	if (cvp->cv_waiters == 0)
 		return;
 	sleepq_lock(cvp);
 	if (cvp->cv_waiters == 0) {
 		sleepq_release(cvp);
 		return;
 	}
 	if (cvp->cv_waiters == CV_WAITERS_BOUND && sleepq_lookup(cvp) == NULL) {
 		cvp->cv_waiters = 0;
 		sleepq_release(cvp);
 	} else {
 		if (cvp->cv_waiters < CV_WAITERS_BOUND)
 			cvp->cv_waiters--;
 		if (sleepq_signal(cvp, SLEEPQ_CONDVAR | SLEEPQ_DROP, 0, 0))
 			kick_proc0();
 	}
 }
 
 /*
  * Broadcast a signal to a condition variable.  Wakes up all waiting threads.
  * Should be called with the same mutex as was passed to cv_wait held.
  */
 void
 cv_broadcastpri(struct cv *cvp, int pri)
 {
 	int wakeup_swapper;
 
 	if (cvp->cv_waiters == 0)
 		return;
 	/*
 	 * XXX sleepq_broadcast pri argument changed from -1 meaning
 	 * no pri to 0 meaning no pri.
 	 */
 	wakeup_swapper = 0;
 	if (pri == -1)
 		pri = 0;
 	sleepq_lock(cvp);
 	if (cvp->cv_waiters > 0) {
 		cvp->cv_waiters = 0;
 		wakeup_swapper = sleepq_broadcast(cvp, SLEEPQ_CONDVAR, pri, 0);
 	}
 	sleepq_release(cvp);
 	if (wakeup_swapper)
 		kick_proc0();
 }
diff --git a/sys/kern/kern_mutex.c b/sys/kern/kern_mutex.c
index 6071ac7fd6f1..92be72546b46 100644
--- a/sys/kern/kern_mutex.c
+++ b/sys/kern/kern_mutex.c
@@ -1,1356 +1,1356 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1998 Berkeley Software Design, Inc. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Berkeley Software Design Inc's name may not be used to endorse or
  *    promote products derived from this software without specific prior
  *    written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from BSDI $Id: mutex_witness.c,v 1.1.2.20 2000/04/27 03:10:27 cp Exp $
  *	and BSDI $Id: synch_machdep.c,v 2.3.2.39 2000/04/27 03:10:25 cp Exp $
  */
 
 /*
  * Machine independent bits of mutex implementation.
  */
 
 #include <sys/cdefs.h>
 #include "opt_adaptive_mutexes.h"
 #include "opt_ddb.h"
 #include "opt_hwpmc_hooks.h"
 #include "opt_sched.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bus.h>
 #include <sys/conf.h>
 #include <sys/kdb.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/resourcevar.h>
 #include <sys/sched.h>
 #include <sys/sbuf.h>
 #include <sys/smp.h>
 #include <sys/sysctl.h>
 #include <sys/turnstile.h>
 #include <sys/vmmeter.h>
 #include <sys/lock_profile.h>
 
 #include <machine/atomic.h>
 #include <machine/bus.h>
 #include <machine/cpu.h>
 
 #include <ddb/ddb.h>
 
 #include <fs/devfs/devfs_int.h>
 
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 
 #if defined(SMP) && !defined(NO_ADAPTIVE_MUTEXES)
 #define	ADAPTIVE_MUTEXES
 #endif
 
 #ifdef HWPMC_HOOKS
 #include <sys/pmckern.h>
 PMC_SOFT_DEFINE( , , lock, failed);
 #endif
 
 /*
  * Return the mutex address when the lock cookie address is provided.
  * This functionality assumes that struct mtx* have a member named mtx_lock.
  */
 #define	mtxlock2mtx(c)	(__containerof(c, struct mtx, mtx_lock))
 
 /*
  * Internal utility macros.
  */
 #define mtx_unowned(m)	((m)->mtx_lock == MTX_UNOWNED)
 
 #define	mtx_destroyed(m) ((m)->mtx_lock == MTX_DESTROYED)
 
 static void	assert_mtx(const struct lock_object *lock, int what);
 #ifdef DDB
 static void	db_show_mtx(const struct lock_object *lock);
 #endif
 static void	lock_mtx(struct lock_object *lock, uintptr_t how);
 static void	lock_spin(struct lock_object *lock, uintptr_t how);
 #ifdef KDTRACE_HOOKS
 static int	owner_mtx(const struct lock_object *lock,
 		    struct thread **owner);
 #endif
 static uintptr_t unlock_mtx(struct lock_object *lock);
 static uintptr_t unlock_spin(struct lock_object *lock);
 
 /*
  * Lock classes for sleep and spin mutexes.
  */
 struct lock_class lock_class_mtx_sleep = {
 	.lc_name = "sleep mutex",
 	.lc_flags = LC_SLEEPLOCK | LC_RECURSABLE,
 	.lc_assert = assert_mtx,
 #ifdef DDB
 	.lc_ddb_show = db_show_mtx,
 #endif
 	.lc_lock = lock_mtx,
 	.lc_unlock = unlock_mtx,
 #ifdef KDTRACE_HOOKS
 	.lc_owner = owner_mtx,
 #endif
 };
 struct lock_class lock_class_mtx_spin = {
 	.lc_name = "spin mutex",
 	.lc_flags = LC_SPINLOCK | LC_RECURSABLE,
 	.lc_assert = assert_mtx,
 #ifdef DDB
 	.lc_ddb_show = db_show_mtx,
 #endif
 	.lc_lock = lock_spin,
 	.lc_unlock = unlock_spin,
 #ifdef KDTRACE_HOOKS
 	.lc_owner = owner_mtx,
 #endif
 };
 
 #ifdef ADAPTIVE_MUTEXES
 #ifdef MUTEX_CUSTOM_BACKOFF
 static SYSCTL_NODE(_debug, OID_AUTO, mtx, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
     "mtx debugging");
 
 static struct lock_delay_config __read_frequently mtx_delay;
 
 SYSCTL_U16(_debug_mtx, OID_AUTO, delay_base, CTLFLAG_RW, &mtx_delay.base,
     0, "");
 SYSCTL_U16(_debug_mtx, OID_AUTO, delay_max, CTLFLAG_RW, &mtx_delay.max,
     0, "");
 
 LOCK_DELAY_SYSINIT_DEFAULT(mtx_delay);
 #else
 #define mtx_delay	locks_delay
 #endif
 #endif
 
 #ifdef MUTEX_SPIN_CUSTOM_BACKOFF
 static SYSCTL_NODE(_debug, OID_AUTO, mtx_spin,
     CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
     "mtx spin debugging");
 
 static struct lock_delay_config __read_frequently mtx_spin_delay;
 
 SYSCTL_INT(_debug_mtx_spin, OID_AUTO, delay_base, CTLFLAG_RW,
     &mtx_spin_delay.base, 0, "");
 SYSCTL_INT(_debug_mtx_spin, OID_AUTO, delay_max, CTLFLAG_RW,
     &mtx_spin_delay.max, 0, "");
 
 LOCK_DELAY_SYSINIT_DEFAULT(mtx_spin_delay);
 #else
 #define mtx_spin_delay	locks_delay
 #endif
 
 /*
  * System-wide mutexes
  */
 struct mtx blocked_lock;
 struct mtx __exclusive_cache_line Giant;
 
 static void _mtx_lock_indefinite_check(struct mtx *, struct lock_delay_arg *);
 
 void
 assert_mtx(const struct lock_object *lock, int what)
 {
 
 	/*
 	 * Treat LA_LOCKED as if LA_XLOCKED was asserted.
 	 *
 	 * Some callers of lc_assert uses LA_LOCKED to indicate that either
 	 * a shared lock or write lock was held, while other callers uses
 	 * the more strict LA_XLOCKED (used as MA_OWNED).
 	 *
 	 * Mutex is the only lock class that can not be shared, as a result,
 	 * we can reasonably consider the caller really intends to assert
 	 * LA_XLOCKED when they are asserting LA_LOCKED on a mutex object.
 	 */
 	if (what & LA_LOCKED) {
 		what &= ~LA_LOCKED;
 		what |= LA_XLOCKED;
 	}
 	mtx_assert((const struct mtx *)lock, what);
 }
 
 void
 lock_mtx(struct lock_object *lock, uintptr_t how)
 {
 
 	mtx_lock((struct mtx *)lock);
 }
 
 void
 lock_spin(struct lock_object *lock, uintptr_t how)
 {
 
 	mtx_lock_spin((struct mtx *)lock);
 }
 
 uintptr_t
 unlock_mtx(struct lock_object *lock)
 {
 	struct mtx *m;
 
 	m = (struct mtx *)lock;
 	mtx_assert(m, MA_OWNED | MA_NOTRECURSED);
 	mtx_unlock(m);
 	return (0);
 }
 
 uintptr_t
 unlock_spin(struct lock_object *lock)
 {
 	struct mtx *m;
 
 	m = (struct mtx *)lock;
 	mtx_assert(m, MA_OWNED | MA_NOTRECURSED);
 	mtx_unlock_spin(m);
 	return (0);
 }
 
 #ifdef KDTRACE_HOOKS
 int
 owner_mtx(const struct lock_object *lock, struct thread **owner)
 {
 	const struct mtx *m;
 	uintptr_t x;
 
 	m = (const struct mtx *)lock;
 	x = m->mtx_lock;
 	*owner = (struct thread *)(x & ~MTX_FLAGMASK);
 	return (*owner != NULL);
 }
 #endif
 
 /*
  * Function versions of the inlined __mtx_* macros.  These are used by
  * modules and can also be called from assembly language if needed.
  */
 void
 __mtx_lock_flags(volatile uintptr_t *c, int opts, const char *file, int line)
 {
 	struct mtx *m;
 	uintptr_t tid, v;
 
 	m = mtxlock2mtx(c);
 
 	KASSERT(kdb_active != 0 || SCHEDULER_STOPPED() ||
 	    !TD_IS_IDLETHREAD(curthread),
 	    ("mtx_lock() by idle thread %p on sleep mutex %s @ %s:%d",
 	    curthread, m->lock_object.lo_name, file, line));
 	KASSERT(m->mtx_lock != MTX_DESTROYED,
 	    ("mtx_lock() of destroyed mutex @ %s:%d", file, line));
 	KASSERT(LOCK_CLASS(&m->lock_object) == &lock_class_mtx_sleep,
 	    ("mtx_lock() of spin mutex %s @ %s:%d", m->lock_object.lo_name,
 	    file, line));
 	WITNESS_CHECKORDER(&m->lock_object, (opts & ~MTX_RECURSE) |
 	    LOP_NEWORDER | LOP_EXCLUSIVE, file, line, NULL);
 
 	tid = (uintptr_t)curthread;
 	v = MTX_UNOWNED;
 	if (!_mtx_obtain_lock_fetch(m, &v, tid))
 		_mtx_lock_sleep(m, v, opts, file, line);
 	else
 		LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(adaptive__acquire,
 		    m, 0, 0, file, line);
 	LOCK_LOG_LOCK("LOCK", &m->lock_object, opts, m->mtx_recurse, file,
 	    line);
 	WITNESS_LOCK(&m->lock_object, (opts & ~MTX_RECURSE) | LOP_EXCLUSIVE,
 	    file, line);
 	TD_LOCKS_INC(curthread);
 }
 
 void
 __mtx_unlock_flags(volatile uintptr_t *c, int opts, const char *file, int line)
 {
 	struct mtx *m;
 
 	m = mtxlock2mtx(c);
 
 	KASSERT(m->mtx_lock != MTX_DESTROYED,
 	    ("mtx_unlock() of destroyed mutex @ %s:%d", file, line));
 	KASSERT(LOCK_CLASS(&m->lock_object) == &lock_class_mtx_sleep,
 	    ("mtx_unlock() of spin mutex %s @ %s:%d", m->lock_object.lo_name,
 	    file, line));
 	WITNESS_UNLOCK(&m->lock_object, opts | LOP_EXCLUSIVE, file, line);
 	LOCK_LOG_LOCK("UNLOCK", &m->lock_object, opts, m->mtx_recurse, file,
 	    line);
 	mtx_assert(m, MA_OWNED);
 
 #ifdef LOCK_PROFILING
 	__mtx_unlock_sleep(c, (uintptr_t)curthread, opts, file, line);
 #else
 	__mtx_unlock(m, curthread, opts, file, line);
 #endif
 	TD_LOCKS_DEC(curthread);
 }
 
 void
 __mtx_lock_spin_flags(volatile uintptr_t *c, int opts, const char *file,
     int line)
 {
 	struct mtx *m;
 #ifdef SMP
 	uintptr_t tid, v;
 #endif
 
 	m = mtxlock2mtx(c);
 
 	KASSERT(m->mtx_lock != MTX_DESTROYED,
 	    ("mtx_lock_spin() of destroyed mutex @ %s:%d", file, line));
 	KASSERT(LOCK_CLASS(&m->lock_object) == &lock_class_mtx_spin,
 	    ("mtx_lock_spin() of sleep mutex %s @ %s:%d",
 	    m->lock_object.lo_name, file, line));
 	if (mtx_owned(m))
 		KASSERT((m->lock_object.lo_flags & LO_RECURSABLE) != 0 ||
 		    (opts & MTX_RECURSE) != 0,
 	    ("mtx_lock_spin: recursed on non-recursive mutex %s @ %s:%d\n",
 		    m->lock_object.lo_name, file, line));
 	opts &= ~MTX_RECURSE;
 	WITNESS_CHECKORDER(&m->lock_object, opts | LOP_NEWORDER | LOP_EXCLUSIVE,
 	    file, line, NULL);
 #ifdef SMP
 	spinlock_enter();
 	tid = (uintptr_t)curthread;
 	v = MTX_UNOWNED;
 	if (!_mtx_obtain_lock_fetch(m, &v, tid))
 		_mtx_lock_spin(m, v, opts, file, line);
 	else
 		LOCKSTAT_PROFILE_OBTAIN_SPIN_LOCK_SUCCESS(spin__acquire,
 		    m, 0, 0, file, line);
 #else
 	__mtx_lock_spin(m, curthread, opts, file, line);
 #endif
 	LOCK_LOG_LOCK("LOCK", &m->lock_object, opts, m->mtx_recurse, file,
 	    line);
 	WITNESS_LOCK(&m->lock_object, opts | LOP_EXCLUSIVE, file, line);
 }
 
 int
 __mtx_trylock_spin_flags(volatile uintptr_t *c, int opts, const char *file,
     int line)
 {
 	struct mtx *m;
 
 	if (SCHEDULER_STOPPED())
 		return (1);
 
 	m = mtxlock2mtx(c);
 
 	KASSERT(m->mtx_lock != MTX_DESTROYED,
 	    ("mtx_trylock_spin() of destroyed mutex @ %s:%d", file, line));
 	KASSERT(LOCK_CLASS(&m->lock_object) == &lock_class_mtx_spin,
 	    ("mtx_trylock_spin() of sleep mutex %s @ %s:%d",
 	    m->lock_object.lo_name, file, line));
 	KASSERT((opts & MTX_RECURSE) == 0,
 	    ("mtx_trylock_spin: unsupp. opt MTX_RECURSE on mutex %s @ %s:%d\n",
 	    m->lock_object.lo_name, file, line));
 	if (__mtx_trylock_spin(m, curthread, opts, file, line)) {
 		LOCK_LOG_TRY("LOCK", &m->lock_object, opts, 1, file, line);
 		WITNESS_LOCK(&m->lock_object, opts | LOP_EXCLUSIVE, file, line);
 		return (1);
 	}
 	LOCK_LOG_TRY("LOCK", &m->lock_object, opts, 0, file, line);
 	return (0);
 }
 
 void
 __mtx_unlock_spin_flags(volatile uintptr_t *c, int opts, const char *file,
     int line)
 {
 	struct mtx *m;
 
 	m = mtxlock2mtx(c);
 
 	KASSERT(m->mtx_lock != MTX_DESTROYED,
 	    ("mtx_unlock_spin() of destroyed mutex @ %s:%d", file, line));
 	KASSERT(LOCK_CLASS(&m->lock_object) == &lock_class_mtx_spin,
 	    ("mtx_unlock_spin() of sleep mutex %s @ %s:%d",
 	    m->lock_object.lo_name, file, line));
 	WITNESS_UNLOCK(&m->lock_object, opts | LOP_EXCLUSIVE, file, line);
 	LOCK_LOG_LOCK("UNLOCK", &m->lock_object, opts, m->mtx_recurse, file,
 	    line);
 	mtx_assert(m, MA_OWNED);
 
 	__mtx_unlock_spin(m);
 }
 
 /*
  * The important part of mtx_trylock{,_flags}()
  * Tries to acquire lock `m.'  If this function is called on a mutex that
  * is already owned, it will recursively acquire the lock.
  */
 int
 _mtx_trylock_flags_int(struct mtx *m, int opts LOCK_FILE_LINE_ARG_DEF)
 {
 	struct thread *td;
 	uintptr_t tid, v;
 #ifdef LOCK_PROFILING
 	uint64_t waittime = 0;
 	int contested = 0;
 #endif
 	int rval;
 	bool recursed;
 
 	td = curthread;
 	tid = (uintptr_t)td;
-	if (SCHEDULER_STOPPED_TD(td))
+	if (SCHEDULER_STOPPED())
 		return (1);
 
 	KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(td),
 	    ("mtx_trylock() by idle thread %p on sleep mutex %s @ %s:%d",
 	    curthread, m->lock_object.lo_name, file, line));
 	KASSERT(m->mtx_lock != MTX_DESTROYED,
 	    ("mtx_trylock() of destroyed mutex @ %s:%d", file, line));
 	KASSERT(LOCK_CLASS(&m->lock_object) == &lock_class_mtx_sleep,
 	    ("mtx_trylock() of spin mutex %s @ %s:%d", m->lock_object.lo_name,
 	    file, line));
 
 	rval = 1;
 	recursed = false;
 	v = MTX_UNOWNED;
 	for (;;) {
 		if (_mtx_obtain_lock_fetch(m, &v, tid))
 			break;
 		if (v == MTX_UNOWNED)
 			continue;
 		if (v == tid &&
 		    ((m->lock_object.lo_flags & LO_RECURSABLE) != 0 ||
 		    (opts & MTX_RECURSE) != 0)) {
 			m->mtx_recurse++;
 			atomic_set_ptr(&m->mtx_lock, MTX_RECURSED);
 			recursed = true;
 			break;
 		}
 		rval = 0;
 		break;
 	}
 
 	opts &= ~MTX_RECURSE;
 
 	LOCK_LOG_TRY("LOCK", &m->lock_object, opts, rval, file, line);
 	if (rval) {
 		WITNESS_LOCK(&m->lock_object, opts | LOP_EXCLUSIVE | LOP_TRYLOCK,
 		    file, line);
 		TD_LOCKS_INC(curthread);
 		if (!recursed)
 			LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(adaptive__acquire,
 			    m, contested, waittime, file, line);
 	}
 
 	return (rval);
 }
 
 int
 _mtx_trylock_flags_(volatile uintptr_t *c, int opts, const char *file, int line)
 {
 	struct mtx *m;
 
 	m = mtxlock2mtx(c);
 	return (_mtx_trylock_flags_int(m, opts LOCK_FILE_LINE_ARG));
 }
 
 /*
  * __mtx_lock_sleep: the tougher part of acquiring an MTX_DEF lock.
  *
  * We call this if the lock is either contested (i.e. we need to go to
  * sleep waiting for it), or if we need to recurse on it.
  */
 #if LOCK_DEBUG > 0
 void
 __mtx_lock_sleep(volatile uintptr_t *c, uintptr_t v, int opts, const char *file,
     int line)
 #else
 void
 __mtx_lock_sleep(volatile uintptr_t *c, uintptr_t v)
 #endif
 {
 	struct thread *td;
 	struct mtx *m;
 	struct turnstile *ts;
 	uintptr_t tid;
 	struct thread *owner;
 #ifdef LOCK_PROFILING
 	int contested = 0;
 	uint64_t waittime = 0;
 #endif
 #if defined(ADAPTIVE_MUTEXES) || defined(KDTRACE_HOOKS)
 	struct lock_delay_arg lda;
 #endif
 #ifdef KDTRACE_HOOKS
 	u_int sleep_cnt = 0;
 	int64_t sleep_time = 0;
 	int64_t all_time = 0;
 #endif
 #if defined(KDTRACE_HOOKS) || defined(LOCK_PROFILING)
 	int doing_lockprof = 0;
 #endif
 
 	td = curthread;
 	tid = (uintptr_t)td;
 	m = mtxlock2mtx(c);
 
 #ifdef KDTRACE_HOOKS
 	if (LOCKSTAT_PROFILE_ENABLED(adaptive__acquire)) {
 		while (v == MTX_UNOWNED) {
 			if (_mtx_obtain_lock_fetch(m, &v, tid))
 				goto out_lockstat;
 		}
 		doing_lockprof = 1;
 		all_time -= lockstat_nsecs(&m->lock_object);
 	}
 #endif
 #ifdef LOCK_PROFILING
 	doing_lockprof = 1;
 #endif
 
-	if (SCHEDULER_STOPPED_TD(td))
+	if (SCHEDULER_STOPPED())
 		return;
 
 	if (__predict_false(v == MTX_UNOWNED))
 		v = MTX_READ_VALUE(m);
 
 	if (__predict_false(lv_mtx_owner(v) == td)) {
 		KASSERT((m->lock_object.lo_flags & LO_RECURSABLE) != 0 ||
 		    (opts & MTX_RECURSE) != 0,
 	    ("_mtx_lock_sleep: recursed on non-recursive mutex %s @ %s:%d\n",
 		    m->lock_object.lo_name, file, line));
 #if LOCK_DEBUG > 0
 		opts &= ~MTX_RECURSE;
 #endif
 		m->mtx_recurse++;
 		atomic_set_ptr(&m->mtx_lock, MTX_RECURSED);
 		if (LOCK_LOG_TEST(&m->lock_object, opts))
 			CTR1(KTR_LOCK, "_mtx_lock_sleep: %p recursing", m);
 		return;
 	}
 #if LOCK_DEBUG > 0
 	opts &= ~MTX_RECURSE;
 #endif
 
 #if defined(ADAPTIVE_MUTEXES)
 	lock_delay_arg_init(&lda, &mtx_delay);
 #elif defined(KDTRACE_HOOKS)
 	lock_delay_arg_init_noadapt(&lda);
 #endif
 
 #ifdef HWPMC_HOOKS
 	PMC_SOFT_CALL( , , lock, failed);
 #endif
 	lock_profile_obtain_lock_failed(&m->lock_object, false,
 		    &contested, &waittime);
 	if (LOCK_LOG_TEST(&m->lock_object, opts))
 		CTR4(KTR_LOCK,
 		    "_mtx_lock_sleep: %s contested (lock=%p) at %s:%d",
 		    m->lock_object.lo_name, (void *)m->mtx_lock, file, line);
 
 	THREAD_CONTENDS_ON_LOCK(&m->lock_object);
 
 	for (;;) {
 		if (v == MTX_UNOWNED) {
 			if (_mtx_obtain_lock_fetch(m, &v, tid))
 				break;
 			continue;
 		}
 #ifdef KDTRACE_HOOKS
 		lda.spin_cnt++;
 #endif
 #ifdef ADAPTIVE_MUTEXES
 		/*
 		 * If the owner is running on another CPU, spin until the
 		 * owner stops running or the state of the lock changes.
 		 */
 		owner = lv_mtx_owner(v);
 		if (TD_IS_RUNNING(owner)) {
 			if (LOCK_LOG_TEST(&m->lock_object, 0))
 				CTR3(KTR_LOCK,
 				    "%s: spinning on %p held by %p",
 				    __func__, m, owner);
 			KTR_STATE1(KTR_SCHED, "thread",
 			    sched_tdname((struct thread *)tid),
 			    "spinning", "lockname:\"%s\"",
 			    m->lock_object.lo_name);
 			do {
 				lock_delay(&lda);
 				v = MTX_READ_VALUE(m);
 				owner = lv_mtx_owner(v);
 			} while (v != MTX_UNOWNED && TD_IS_RUNNING(owner));
 			KTR_STATE0(KTR_SCHED, "thread",
 			    sched_tdname((struct thread *)tid),
 			    "running");
 			continue;
 		}
 #endif
 
 		ts = turnstile_trywait(&m->lock_object);
 		v = MTX_READ_VALUE(m);
 retry_turnstile:
 
 		/*
 		 * Check if the lock has been released while spinning for
 		 * the turnstile chain lock.
 		 */
 		if (v == MTX_UNOWNED) {
 			turnstile_cancel(ts);
 			continue;
 		}
 
 #ifdef ADAPTIVE_MUTEXES
 		/*
 		 * The current lock owner might have started executing
 		 * on another CPU (or the lock could have changed
 		 * owners) while we were waiting on the turnstile
 		 * chain lock.  If so, drop the turnstile lock and try
 		 * again.
 		 */
 		owner = lv_mtx_owner(v);
 		if (TD_IS_RUNNING(owner)) {
 			turnstile_cancel(ts);
 			continue;
 		}
 #endif
 
 		/*
 		 * If the mutex isn't already contested and a failure occurs
 		 * setting the contested bit, the mutex was either released
 		 * or the state of the MTX_RECURSED bit changed.
 		 */
 		if ((v & MTX_CONTESTED) == 0 &&
 		    !atomic_fcmpset_ptr(&m->mtx_lock, &v, v | MTX_CONTESTED)) {
 			goto retry_turnstile;
 		}
 
 		/*
 		 * We definitely must sleep for this lock.
 		 */
 		mtx_assert(m, MA_NOTOWNED);
 
 		/*
 		 * Block on the turnstile.
 		 */
 #ifdef KDTRACE_HOOKS
 		sleep_time -= lockstat_nsecs(&m->lock_object);
 #endif
 #ifndef ADAPTIVE_MUTEXES
 		owner = mtx_owner(m);
 #endif
 		MPASS(owner == mtx_owner(m));
 		turnstile_wait(ts, owner, TS_EXCLUSIVE_QUEUE);
 #ifdef KDTRACE_HOOKS
 		sleep_time += lockstat_nsecs(&m->lock_object);
 		sleep_cnt++;
 #endif
 		v = MTX_READ_VALUE(m);
 	}
 	THREAD_CONTENTION_DONE(&m->lock_object);
 #if defined(KDTRACE_HOOKS) || defined(LOCK_PROFILING)
 	if (__predict_true(!doing_lockprof))
 		return;
 #endif
 #ifdef KDTRACE_HOOKS
 	all_time += lockstat_nsecs(&m->lock_object);
 	if (sleep_time)
 		LOCKSTAT_RECORD1(adaptive__block, m, sleep_time);
 
 	/*
 	 * Only record the loops spinning and not sleeping.
 	 */
 	if (lda.spin_cnt > sleep_cnt)
 		LOCKSTAT_RECORD1(adaptive__spin, m, all_time - sleep_time);
 out_lockstat:
 #endif
 	LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(adaptive__acquire, m, contested,
 	    waittime, file, line);
 }
 
 #ifdef SMP
 /*
  * _mtx_lock_spin_cookie: the tougher part of acquiring an MTX_SPIN lock.
  *
  * This is only called if we need to actually spin for the lock. Recursion
  * is handled inline.
  */
 #if LOCK_DEBUG > 0
 void
 _mtx_lock_spin_cookie(volatile uintptr_t *c, uintptr_t v, int opts,
     const char *file, int line)
 #else
 void
 _mtx_lock_spin_cookie(volatile uintptr_t *c, uintptr_t v)
 #endif
 {
 	struct mtx *m;
 	struct lock_delay_arg lda;
 	uintptr_t tid;
 #ifdef LOCK_PROFILING
 	int contested = 0;
 	uint64_t waittime = 0;
 #endif
 #ifdef KDTRACE_HOOKS
 	int64_t spin_time = 0;
 #endif
 #if defined(KDTRACE_HOOKS) || defined(LOCK_PROFILING)
 	int doing_lockprof = 0;
 #endif
 
 	tid = (uintptr_t)curthread;
 	m = mtxlock2mtx(c);
 
 #ifdef KDTRACE_HOOKS
 	if (LOCKSTAT_PROFILE_ENABLED(adaptive__acquire)) {
 		while (v == MTX_UNOWNED) {
 			if (_mtx_obtain_lock_fetch(m, &v, tid))
 				goto out_lockstat;
 		}
 		doing_lockprof = 1;
 		spin_time -= lockstat_nsecs(&m->lock_object);
 	}
 #endif
 #ifdef LOCK_PROFILING
 	doing_lockprof = 1;
 #endif
 
 	if (__predict_false(v == MTX_UNOWNED))
 		v = MTX_READ_VALUE(m);
 
 	if (__predict_false(v == tid)) {
 		m->mtx_recurse++;
 		return;
 	}
 
 	if (SCHEDULER_STOPPED())
 		return;
 
 	if (LOCK_LOG_TEST(&m->lock_object, opts))
 		CTR1(KTR_LOCK, "_mtx_lock_spin: %p spinning", m);
 	KTR_STATE1(KTR_SCHED, "thread", sched_tdname((struct thread *)tid),
 	    "spinning", "lockname:\"%s\"", m->lock_object.lo_name);
 
 	lock_delay_arg_init(&lda, &mtx_spin_delay);
 
 #ifdef HWPMC_HOOKS
 	PMC_SOFT_CALL( , , lock, failed);
 #endif
 	lock_profile_obtain_lock_failed(&m->lock_object, true, &contested, &waittime);
 
 	for (;;) {
 		if (v == MTX_UNOWNED) {
 			if (_mtx_obtain_lock_fetch(m, &v, tid))
 				break;
 			continue;
 		}
 		/* Give interrupts a chance while we spin. */
 		spinlock_exit();
 		do {
 			if (__predict_true(lda.spin_cnt < 10000000)) {
 				lock_delay(&lda);
 			} else {
 				_mtx_lock_indefinite_check(m, &lda);
 			}
 			v = MTX_READ_VALUE(m);
 		} while (v != MTX_UNOWNED);
 		spinlock_enter();
 	}
 
 	if (LOCK_LOG_TEST(&m->lock_object, opts))
 		CTR1(KTR_LOCK, "_mtx_lock_spin: %p spin done", m);
 	KTR_STATE0(KTR_SCHED, "thread", sched_tdname((struct thread *)tid),
 	    "running");
 
 #if defined(KDTRACE_HOOKS) || defined(LOCK_PROFILING)
 	if (__predict_true(!doing_lockprof))
 		return;
 #endif
 #ifdef KDTRACE_HOOKS
 	spin_time += lockstat_nsecs(&m->lock_object);
 	if (lda.spin_cnt != 0)
 		LOCKSTAT_RECORD1(spin__spin, m, spin_time);
 out_lockstat:
 #endif
 	LOCKSTAT_PROFILE_OBTAIN_SPIN_LOCK_SUCCESS(spin__acquire, m,
 	    contested, waittime, file, line);
 }
 #endif /* SMP */
 
 #ifdef INVARIANTS
 static void
 thread_lock_validate(struct mtx *m, int opts, const char *file, int line)
 {
 
 	KASSERT(m->mtx_lock != MTX_DESTROYED,
 	    ("thread_lock() of destroyed mutex @ %s:%d", file, line));
 	KASSERT(LOCK_CLASS(&m->lock_object) == &lock_class_mtx_spin,
 	    ("thread_lock() of sleep mutex %s @ %s:%d",
 	    m->lock_object.lo_name, file, line));
 	KASSERT((m->lock_object.lo_flags & LO_RECURSABLE) == 0,
 	    ("thread_lock: got a recursive mutex %s @ %s:%d\n",
 	    m->lock_object.lo_name, file, line));
 	WITNESS_CHECKORDER(&m->lock_object,
 	    opts | LOP_NEWORDER | LOP_EXCLUSIVE, file, line, NULL);
 }
 #else
 #define thread_lock_validate(m, opts, file, line) do { } while (0)
 #endif
 
 #ifndef LOCK_PROFILING
 #if LOCK_DEBUG > 0
 void
 _thread_lock(struct thread *td, int opts, const char *file, int line)
 #else
 void
 _thread_lock(struct thread *td)
 #endif
 {
 	struct mtx *m;
 	uintptr_t tid;
 
 	tid = (uintptr_t)curthread;
 
 	if (__predict_false(LOCKSTAT_PROFILE_ENABLED(spin__acquire)))
 		goto slowpath_noirq;
 	spinlock_enter();
 	m = td->td_lock;
 	thread_lock_validate(m, 0, file, line);
 	if (__predict_false(m == &blocked_lock))
 		goto slowpath_unlocked;
 	if (__predict_false(!_mtx_obtain_lock(m, tid)))
 		goto slowpath_unlocked;
 	if (__predict_true(m == td->td_lock)) {
 		WITNESS_LOCK(&m->lock_object, LOP_EXCLUSIVE, file, line);
 		return;
 	}
 	_mtx_release_lock_quick(m);
 slowpath_unlocked:
 	spinlock_exit();
 slowpath_noirq:
 #if LOCK_DEBUG > 0
 	thread_lock_flags_(td, opts, file, line);
 #else
 	thread_lock_flags_(td, 0, 0, 0);
 #endif
 }
 #endif
 
 void
 thread_lock_flags_(struct thread *td, int opts, const char *file, int line)
 {
 	struct mtx *m;
 	uintptr_t tid, v;
 	struct lock_delay_arg lda;
 #ifdef LOCK_PROFILING
 	int contested = 0;
 	uint64_t waittime = 0;
 #endif
 #ifdef KDTRACE_HOOKS
 	int64_t spin_time = 0;
 #endif
 #if defined(KDTRACE_HOOKS) || defined(LOCK_PROFILING)
 	int doing_lockprof = 1;
 #endif
 
 	tid = (uintptr_t)curthread;
 
 	if (SCHEDULER_STOPPED()) {
 		/*
 		 * Ensure that spinlock sections are balanced even when the
 		 * scheduler is stopped, since we may otherwise inadvertently
 		 * re-enable interrupts while dumping core.
 		 */
 		spinlock_enter();
 		return;
 	}
 
 	lock_delay_arg_init(&lda, &mtx_spin_delay);
 
 #ifdef HWPMC_HOOKS
 	PMC_SOFT_CALL( , , lock, failed);
 #endif
 
 #ifdef LOCK_PROFILING
 	doing_lockprof = 1;
 #elif defined(KDTRACE_HOOKS)
 	doing_lockprof = lockstat_enabled;
 #endif
 #ifdef KDTRACE_HOOKS
 	if (__predict_false(doing_lockprof))
 		spin_time -= lockstat_nsecs(&td->td_lock->lock_object);
 #endif
 	spinlock_enter();
 
 	for (;;) {
 retry:
 		m = td->td_lock;
 		thread_lock_validate(m, opts, file, line);
 		v = MTX_READ_VALUE(m);
 		for (;;) {
 			if (v == MTX_UNOWNED) {
 				if (_mtx_obtain_lock_fetch(m, &v, tid))
 					break;
 				continue;
 			}
 			MPASS(v != tid);
 			lock_profile_obtain_lock_failed(&m->lock_object, true,
 			    &contested, &waittime);
 			/* Give interrupts a chance while we spin. */
 			spinlock_exit();
 			do {
 				if (__predict_true(lda.spin_cnt < 10000000)) {
 					lock_delay(&lda);
 				} else {
 					_mtx_lock_indefinite_check(m, &lda);
 				}
 				if (m != td->td_lock) {
 					spinlock_enter();
 					goto retry;
 				}
 				v = MTX_READ_VALUE(m);
 			} while (v != MTX_UNOWNED);
 			spinlock_enter();
 		}
 		if (m == td->td_lock)
 			break;
 		_mtx_release_lock_quick(m);
 	}
 	LOCK_LOG_LOCK("LOCK", &m->lock_object, opts, m->mtx_recurse, file,
 	    line);
 	WITNESS_LOCK(&m->lock_object, opts | LOP_EXCLUSIVE, file, line);
 
 #if defined(KDTRACE_HOOKS) || defined(LOCK_PROFILING)
 	if (__predict_true(!doing_lockprof))
 		return;
 #endif
 #ifdef KDTRACE_HOOKS
 	spin_time += lockstat_nsecs(&m->lock_object);
 #endif
 	LOCKSTAT_PROFILE_OBTAIN_SPIN_LOCK_SUCCESS(spin__acquire, m, contested,
 	    waittime, file, line);
 #ifdef KDTRACE_HOOKS
 	if (lda.spin_cnt != 0)
 		LOCKSTAT_RECORD1(thread__spin, m, spin_time);
 #endif
 }
 
 struct mtx *
 thread_lock_block(struct thread *td)
 {
 	struct mtx *lock;
 
 	lock = td->td_lock;
 	mtx_assert(lock, MA_OWNED);
 	td->td_lock = &blocked_lock;
 
 	return (lock);
 }
 
 void
 thread_lock_unblock(struct thread *td, struct mtx *new)
 {
 
 	mtx_assert(new, MA_OWNED);
 	KASSERT(td->td_lock == &blocked_lock,
 	    ("thread %p lock %p not blocked_lock %p",
 	    td, td->td_lock, &blocked_lock));
 	atomic_store_rel_ptr((volatile void *)&td->td_lock, (uintptr_t)new);
 }
 
 void
 thread_lock_block_wait(struct thread *td)
 {
 
 	while (td->td_lock == &blocked_lock)
 		cpu_spinwait();
 
 	/* Acquire fence to be certain that all thread state is visible. */
 	atomic_thread_fence_acq();
 }
 
 void
 thread_lock_set(struct thread *td, struct mtx *new)
 {
 	struct mtx *lock;
 
 	mtx_assert(new, MA_OWNED);
 	lock = td->td_lock;
 	mtx_assert(lock, MA_OWNED);
 	td->td_lock = new;
 	mtx_unlock_spin(lock);
 }
 
 /*
  * __mtx_unlock_sleep: the tougher part of releasing an MTX_DEF lock.
  *
  * We are only called here if the lock is recursed, contested (i.e. we
  * need to wake up a blocked thread) or lockstat probe is active.
  */
 #if LOCK_DEBUG > 0
 void
 __mtx_unlock_sleep(volatile uintptr_t *c, uintptr_t v, int opts,
     const char *file, int line)
 #else
 void
 __mtx_unlock_sleep(volatile uintptr_t *c, uintptr_t v)
 #endif
 {
 	struct mtx *m;
 	struct turnstile *ts;
 	uintptr_t tid;
 
 	if (SCHEDULER_STOPPED())
 		return;
 
 	tid = (uintptr_t)curthread;
 	m = mtxlock2mtx(c);
 
 	if (__predict_false(v == tid))
 		v = MTX_READ_VALUE(m);
 
 	if (__predict_false(v & MTX_RECURSED)) {
 		if (--(m->mtx_recurse) == 0)
 			atomic_clear_ptr(&m->mtx_lock, MTX_RECURSED);
 		if (LOCK_LOG_TEST(&m->lock_object, opts))
 			CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p unrecurse", m);
 		return;
 	}
 
 	LOCKSTAT_PROFILE_RELEASE_LOCK(adaptive__release, m);
 	if (v == tid && _mtx_release_lock(m, tid))
 		return;
 
 	/*
 	 * We have to lock the chain before the turnstile so this turnstile
 	 * can be removed from the hash list if it is empty.
 	 */
 	turnstile_chain_lock(&m->lock_object);
 	_mtx_release_lock_quick(m);
 	ts = turnstile_lookup(&m->lock_object);
 	MPASS(ts != NULL);
 	if (LOCK_LOG_TEST(&m->lock_object, opts))
 		CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p contested", m);
 	turnstile_broadcast(ts, TS_EXCLUSIVE_QUEUE);
 
 	/*
 	 * This turnstile is now no longer associated with the mutex.  We can
 	 * unlock the chain lock so a new turnstile may take it's place.
 	 */
 	turnstile_unpend(ts);
 	turnstile_chain_unlock(&m->lock_object);
 }
 
 /*
  * All the unlocking of MTX_SPIN locks is done inline.
  * See the __mtx_unlock_spin() macro for the details.
  */
 
 /*
  * The backing function for the INVARIANTS-enabled mtx_assert()
  */
 #ifdef INVARIANT_SUPPORT
 void
 __mtx_assert(const volatile uintptr_t *c, int what, const char *file, int line)
 {
 	const struct mtx *m;
 
 	if (KERNEL_PANICKED() || dumping || SCHEDULER_STOPPED())
 		return;
 
 	m = mtxlock2mtx(c);
 
 	switch (what) {
 	case MA_OWNED:
 	case MA_OWNED | MA_RECURSED:
 	case MA_OWNED | MA_NOTRECURSED:
 		if (!mtx_owned(m))
 			panic("mutex %s not owned at %s:%d",
 			    m->lock_object.lo_name, file, line);
 		if (mtx_recursed(m)) {
 			if ((what & MA_NOTRECURSED) != 0)
 				panic("mutex %s recursed at %s:%d",
 				    m->lock_object.lo_name, file, line);
 		} else if ((what & MA_RECURSED) != 0) {
 			panic("mutex %s unrecursed at %s:%d",
 			    m->lock_object.lo_name, file, line);
 		}
 		break;
 	case MA_NOTOWNED:
 		if (mtx_owned(m))
 			panic("mutex %s owned at %s:%d",
 			    m->lock_object.lo_name, file, line);
 		break;
 	default:
 		panic("unknown mtx_assert at %s:%d", file, line);
 	}
 }
 #endif
 
 /*
  * General init routine used by the MTX_SYSINIT() macro.
  */
 void
 mtx_sysinit(void *arg)
 {
 	struct mtx_args *margs = arg;
 
 	mtx_init((struct mtx *)margs->ma_mtx, margs->ma_desc, NULL,
 	    margs->ma_opts);
 }
 
 /*
  * Mutex initialization routine; initialize lock `m' of type contained in
  * `opts' with options contained in `opts' and name `name.'  The optional
  * lock type `type' is used as a general lock category name for use with
  * witness.
  */
 void
 _mtx_init(volatile uintptr_t *c, const char *name, const char *type, int opts)
 {
 	struct mtx *m;
 	struct lock_class *class;
 	int flags;
 
 	m = mtxlock2mtx(c);
 
 	MPASS((opts & ~(MTX_SPIN | MTX_QUIET | MTX_RECURSE |
 	    MTX_NOWITNESS | MTX_DUPOK | MTX_NOPROFILE | MTX_NEW)) == 0);
 	ASSERT_ATOMIC_LOAD_PTR(m->mtx_lock,
 	    ("%s: mtx_lock not aligned for %s: %p", __func__, name,
 	    &m->mtx_lock));
 
 	/* Determine lock class and lock flags. */
 	if (opts & MTX_SPIN)
 		class = &lock_class_mtx_spin;
 	else
 		class = &lock_class_mtx_sleep;
 	flags = 0;
 	if (opts & MTX_QUIET)
 		flags |= LO_QUIET;
 	if (opts & MTX_RECURSE)
 		flags |= LO_RECURSABLE;
 	if ((opts & MTX_NOWITNESS) == 0)
 		flags |= LO_WITNESS;
 	if (opts & MTX_DUPOK)
 		flags |= LO_DUPOK;
 	if (opts & MTX_NOPROFILE)
 		flags |= LO_NOPROFILE;
 	if (opts & MTX_NEW)
 		flags |= LO_NEW;
 
 	/* Initialize mutex. */
 	lock_init(&m->lock_object, class, name, type, flags);
 
 	m->mtx_lock = MTX_UNOWNED;
 	m->mtx_recurse = 0;
 }
 
 /*
  * Remove lock `m' from all_mtx queue.  We don't allow MTX_QUIET to be
  * passed in as a flag here because if the corresponding mtx_init() was
  * called with MTX_QUIET set, then it will already be set in the mutex's
  * flags.
  */
 void
 _mtx_destroy(volatile uintptr_t *c)
 {
 	struct mtx *m;
 
 	m = mtxlock2mtx(c);
 
 	if (!mtx_owned(m))
 		MPASS(mtx_unowned(m));
 	else {
 		MPASS((m->mtx_lock & (MTX_RECURSED|MTX_CONTESTED)) == 0);
 
 		/* Perform the non-mtx related part of mtx_unlock_spin(). */
 		if (LOCK_CLASS(&m->lock_object) == &lock_class_mtx_spin) {
 			lock_profile_release_lock(&m->lock_object, true);
 			spinlock_exit();
 		} else {
 			TD_LOCKS_DEC(curthread);
 			lock_profile_release_lock(&m->lock_object, false);
 		}
 
 		/* Tell witness this isn't locked to make it happy. */
 		WITNESS_UNLOCK(&m->lock_object, LOP_EXCLUSIVE, __FILE__,
 		    __LINE__);
 	}
 
 	m->mtx_lock = MTX_DESTROYED;
 	lock_destroy(&m->lock_object);
 }
 
 /*
  * Intialize the mutex code and system mutexes.  This is called from the MD
  * startup code prior to mi_startup().  The per-CPU data space needs to be
  * setup before this is called.
  */
 void
 mutex_init(void)
 {
 
 	/* Setup turnstiles so that sleep mutexes work. */
 	init_turnstiles();
 
 	/*
 	 * Initialize mutexes.
 	 */
 	mtx_init(&Giant, "Giant", NULL, MTX_DEF | MTX_RECURSE);
 	mtx_init(&blocked_lock, "blocked lock", NULL, MTX_SPIN);
 	blocked_lock.mtx_lock = 0xdeadc0de;	/* Always blocked. */
 	mtx_init(&proc0.p_mtx, "process lock", NULL, MTX_DEF | MTX_DUPOK);
 	mtx_init(&proc0.p_slock, "process slock", NULL, MTX_SPIN);
 	mtx_init(&proc0.p_statmtx, "pstatl", NULL, MTX_SPIN);
 	mtx_init(&proc0.p_itimmtx, "pitiml", NULL, MTX_SPIN);
 	mtx_init(&proc0.p_profmtx, "pprofl", NULL, MTX_SPIN);
 	mtx_init(&devmtx, "cdev", NULL, MTX_DEF);
 	mtx_lock(&Giant);
 }
 
 static void __noinline
 _mtx_lock_indefinite_check(struct mtx *m, struct lock_delay_arg *ldap)
 {
 	struct thread *td;
 
 	ldap->spin_cnt++;
 	if (ldap->spin_cnt < 60000000 || kdb_active || KERNEL_PANICKED())
 		cpu_lock_delay();
 	else {
 		td = mtx_owner(m);
 
 		/* If the mutex is unlocked, try again. */
 		if (td == NULL)
 			return;
 
 		printf( "spin lock %p (%s) held by %p (tid %d) too long\n",
 		    m, m->lock_object.lo_name, td, td->td_tid);
 #ifdef WITNESS
 		witness_display_spinlock(&m->lock_object, td, printf);
 #endif
 		panic("spin lock held too long");
 	}
 	cpu_spinwait();
 }
 
 void
 mtx_spin_wait_unlocked(struct mtx *m)
 {
 	struct lock_delay_arg lda;
 
 	KASSERT(m->mtx_lock != MTX_DESTROYED,
 	    ("%s() of destroyed mutex %p", __func__, m));
 	KASSERT(LOCK_CLASS(&m->lock_object) == &lock_class_mtx_spin,
 	    ("%s() of sleep mutex %p (%s)", __func__, m,
 	    m->lock_object.lo_name));
 	KASSERT(!mtx_owned(m), ("%s() waiting on myself on lock %p (%s)", __func__, m,
 	    m->lock_object.lo_name));
 
 	lda.spin_cnt = 0;
 
 	while (atomic_load_acq_ptr(&m->mtx_lock) != MTX_UNOWNED) {
 		if (__predict_true(lda.spin_cnt < 10000000)) {
 			cpu_spinwait();
 			lda.spin_cnt++;
 		} else {
 			_mtx_lock_indefinite_check(m, &lda);
 		}
 	}
 }
 
 void
 mtx_wait_unlocked(struct mtx *m)
 {
 	struct thread *owner;
 	uintptr_t v;
 
 	KASSERT(m->mtx_lock != MTX_DESTROYED,
 	    ("%s() of destroyed mutex %p", __func__, m));
 	KASSERT(LOCK_CLASS(&m->lock_object) == &lock_class_mtx_sleep,
 	    ("%s() not a sleep mutex %p (%s)", __func__, m,
 	    m->lock_object.lo_name));
 	KASSERT(!mtx_owned(m), ("%s() waiting on myself on lock %p (%s)", __func__, m,
 	    m->lock_object.lo_name));
 
 	for (;;) {
 		v = atomic_load_acq_ptr(&m->mtx_lock);
 		if (v == MTX_UNOWNED) {
 			break;
 		}
 		owner = lv_mtx_owner(v);
 		if (!TD_IS_RUNNING(owner)) {
 			mtx_lock(m);
 			mtx_unlock(m);
 			break;
 		}
 		cpu_spinwait();
 	}
 }
 
 #ifdef DDB
 void
 db_show_mtx(const struct lock_object *lock)
 {
 	struct thread *td;
 	const struct mtx *m;
 
 	m = (const struct mtx *)lock;
 
 	db_printf(" flags: {");
 	if (LOCK_CLASS(lock) == &lock_class_mtx_spin)
 		db_printf("SPIN");
 	else
 		db_printf("DEF");
 	if (m->lock_object.lo_flags & LO_RECURSABLE)
 		db_printf(", RECURSE");
 	if (m->lock_object.lo_flags & LO_DUPOK)
 		db_printf(", DUPOK");
 	db_printf("}\n");
 	db_printf(" state: {");
 	if (mtx_unowned(m))
 		db_printf("UNOWNED");
 	else if (mtx_destroyed(m))
 		db_printf("DESTROYED");
 	else {
 		db_printf("OWNED");
 		if (m->mtx_lock & MTX_CONTESTED)
 			db_printf(", CONTESTED");
 		if (m->mtx_lock & MTX_RECURSED)
 			db_printf(", RECURSED");
 	}
 	db_printf("}\n");
 	if (!mtx_unowned(m) && !mtx_destroyed(m)) {
 		td = mtx_owner(m);
 		db_printf(" owner: %p (tid %d, pid %d, \"%s\")\n", td,
 		    td->td_tid, td->td_proc->p_pid, td->td_name);
 		if (mtx_recursed(m))
 			db_printf(" recursed: %d\n", m->mtx_recurse);
 	}
 }
 #endif
diff --git a/sys/kern/kern_rwlock.c b/sys/kern/kern_rwlock.c
index 83d5862a6667..28dddb950966 100644
--- a/sys/kern/kern_rwlock.c
+++ b/sys/kern/kern_rwlock.c
@@ -1,1568 +1,1568 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause
  *
  * Copyright (c) 2006 John Baldwin <jhb@FreeBSD.org>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 /*
  * Machine independent bits of reader/writer lock implementation.
  */
 
 #include <sys/cdefs.h>
 #include "opt_ddb.h"
 #include "opt_hwpmc_hooks.h"
 #include "opt_no_adaptive_rwlocks.h"
 
 #include <sys/param.h>
 #include <sys/kdb.h>
 #include <sys/ktr.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/rwlock.h>
 #include <sys/sched.h>
 #include <sys/smp.h>
 #include <sys/sysctl.h>
 #include <sys/systm.h>
 #include <sys/turnstile.h>
 
 #include <machine/cpu.h>
 
 #if defined(SMP) && !defined(NO_ADAPTIVE_RWLOCKS)
 #define	ADAPTIVE_RWLOCKS
 #endif
 
 #ifdef HWPMC_HOOKS
 #include <sys/pmckern.h>
 PMC_SOFT_DECLARE( , , lock, failed);
 #endif
 
 /*
  * Return the rwlock address when the lock cookie address is provided.
  * This functionality assumes that struct rwlock* have a member named rw_lock.
  */
 #define	rwlock2rw(c)	(__containerof(c, struct rwlock, rw_lock))
 
 #ifdef DDB
 #include <ddb/ddb.h>
 
 static void	db_show_rwlock(const struct lock_object *lock);
 #endif
 static void	assert_rw(const struct lock_object *lock, int what);
 static void	lock_rw(struct lock_object *lock, uintptr_t how);
 #ifdef KDTRACE_HOOKS
 static int	owner_rw(const struct lock_object *lock, struct thread **owner);
 #endif
 static uintptr_t unlock_rw(struct lock_object *lock);
 
 struct lock_class lock_class_rw = {
 	.lc_name = "rw",
 	.lc_flags = LC_SLEEPLOCK | LC_RECURSABLE | LC_UPGRADABLE,
 	.lc_assert = assert_rw,
 #ifdef DDB
 	.lc_ddb_show = db_show_rwlock,
 #endif
 	.lc_lock = lock_rw,
 	.lc_unlock = unlock_rw,
 #ifdef KDTRACE_HOOKS
 	.lc_owner = owner_rw,
 #endif
 };
 
 #ifdef ADAPTIVE_RWLOCKS
 #ifdef RWLOCK_CUSTOM_BACKOFF
 static u_short __read_frequently rowner_retries;
 static u_short __read_frequently rowner_loops;
 static SYSCTL_NODE(_debug, OID_AUTO, rwlock,
     CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
     "rwlock debugging");
 SYSCTL_U16(_debug_rwlock, OID_AUTO, retry, CTLFLAG_RW, &rowner_retries, 0, "");
 SYSCTL_U16(_debug_rwlock, OID_AUTO, loops, CTLFLAG_RW, &rowner_loops, 0, "");
 
 static struct lock_delay_config __read_frequently rw_delay;
 
 SYSCTL_U16(_debug_rwlock, OID_AUTO, delay_base, CTLFLAG_RW, &rw_delay.base,
     0, "");
 SYSCTL_U16(_debug_rwlock, OID_AUTO, delay_max, CTLFLAG_RW, &rw_delay.max,
     0, "");
 
 static void
 rw_lock_delay_init(void *arg __unused)
 {
 
 	lock_delay_default_init(&rw_delay);
 	rowner_retries = 10;
 	rowner_loops = max(10000, rw_delay.max);
 }
 LOCK_DELAY_SYSINIT(rw_lock_delay_init);
 #else
 #define rw_delay	locks_delay
 #define rowner_retries	locks_delay_retries
 #define rowner_loops	locks_delay_loops
 #endif
 #endif
 
 /*
  * Return a pointer to the owning thread if the lock is write-locked or
  * NULL if the lock is unlocked or read-locked.
  */
 
 #define	lv_rw_wowner(v)							\
 	((v) & RW_LOCK_READ ? NULL :					\
 	 (struct thread *)RW_OWNER((v)))
 
 #define	rw_wowner(rw)	lv_rw_wowner(RW_READ_VALUE(rw))
 
 /*
  * Returns if a write owner is recursed.  Write ownership is not assured
  * here and should be previously checked.
  */
 #define	rw_recursed(rw)		((rw)->rw_recurse != 0)
 
 /*
  * Return true if curthread helds the lock.
  */
 #define	rw_wlocked(rw)		(rw_wowner((rw)) == curthread)
 
 /*
  * Return a pointer to the owning thread for this lock who should receive
  * any priority lent by threads that block on this lock.  Currently this
  * is identical to rw_wowner().
  */
 #define	rw_owner(rw)		rw_wowner(rw)
 
 #ifndef INVARIANTS
 #define	__rw_assert(c, what, file, line)
 #endif
 
 void
 assert_rw(const struct lock_object *lock, int what)
 {
 
 	rw_assert((const struct rwlock *)lock, what);
 }
 
 void
 lock_rw(struct lock_object *lock, uintptr_t how)
 {
 	struct rwlock *rw;
 
 	rw = (struct rwlock *)lock;
 	if (how)
 		rw_rlock(rw);
 	else
 		rw_wlock(rw);
 }
 
 uintptr_t
 unlock_rw(struct lock_object *lock)
 {
 	struct rwlock *rw;
 
 	rw = (struct rwlock *)lock;
 	rw_assert(rw, RA_LOCKED | LA_NOTRECURSED);
 	if (rw->rw_lock & RW_LOCK_READ) {
 		rw_runlock(rw);
 		return (1);
 	} else {
 		rw_wunlock(rw);
 		return (0);
 	}
 }
 
 #ifdef KDTRACE_HOOKS
 int
 owner_rw(const struct lock_object *lock, struct thread **owner)
 {
 	const struct rwlock *rw = (const struct rwlock *)lock;
 	uintptr_t x = rw->rw_lock;
 
 	*owner = rw_wowner(rw);
 	return ((x & RW_LOCK_READ) != 0 ?  (RW_READERS(x) != 0) :
 	    (*owner != NULL));
 }
 #endif
 
 void
 _rw_init_flags(volatile uintptr_t *c, const char *name, int opts)
 {
 	struct rwlock *rw;
 	int flags;
 
 	rw = rwlock2rw(c);
 
 	MPASS((opts & ~(RW_DUPOK | RW_NOPROFILE | RW_NOWITNESS | RW_QUIET |
 	    RW_RECURSE | RW_NEW)) == 0);
 	ASSERT_ATOMIC_LOAD_PTR(rw->rw_lock,
 	    ("%s: rw_lock not aligned for %s: %p", __func__, name,
 	    &rw->rw_lock));
 
 	flags = LO_UPGRADABLE;
 	if (opts & RW_DUPOK)
 		flags |= LO_DUPOK;
 	if (opts & RW_NOPROFILE)
 		flags |= LO_NOPROFILE;
 	if (!(opts & RW_NOWITNESS))
 		flags |= LO_WITNESS;
 	if (opts & RW_RECURSE)
 		flags |= LO_RECURSABLE;
 	if (opts & RW_QUIET)
 		flags |= LO_QUIET;
 	if (opts & RW_NEW)
 		flags |= LO_NEW;
 
 	lock_init(&rw->lock_object, &lock_class_rw, name, NULL, flags);
 	rw->rw_lock = RW_UNLOCKED;
 	rw->rw_recurse = 0;
 }
 
 void
 _rw_destroy(volatile uintptr_t *c)
 {
 	struct rwlock *rw;
 
 	rw = rwlock2rw(c);
 
 	KASSERT(rw->rw_lock == RW_UNLOCKED, ("rw lock %p not unlocked", rw));
 	KASSERT(rw->rw_recurse == 0, ("rw lock %p still recursed", rw));
 	rw->rw_lock = RW_DESTROYED;
 	lock_destroy(&rw->lock_object);
 }
 
 void
 rw_sysinit(void *arg)
 {
 	struct rw_args *args;
 
 	args = arg;
 	rw_init_flags((struct rwlock *)args->ra_rw, args->ra_desc,
 	    args->ra_flags);
 }
 
 int
 _rw_wowned(const volatile uintptr_t *c)
 {
 
 	return (rw_wowner(rwlock2rw(c)) == curthread);
 }
 
 void
 _rw_wlock_cookie(volatile uintptr_t *c, const char *file, int line)
 {
 	struct rwlock *rw;
 	uintptr_t tid, v;
 
 	rw = rwlock2rw(c);
 
 	KASSERT(kdb_active != 0 || SCHEDULER_STOPPED() ||
 	    !TD_IS_IDLETHREAD(curthread),
 	    ("rw_wlock() by idle thread %p on rwlock %s @ %s:%d",
 	    curthread, rw->lock_object.lo_name, file, line));
 	KASSERT(rw->rw_lock != RW_DESTROYED,
 	    ("rw_wlock() of destroyed rwlock @ %s:%d", file, line));
 	WITNESS_CHECKORDER(&rw->lock_object, LOP_NEWORDER | LOP_EXCLUSIVE, file,
 	    line, NULL);
 	tid = (uintptr_t)curthread;
 	v = RW_UNLOCKED;
 	if (!_rw_write_lock_fetch(rw, &v, tid))
 		_rw_wlock_hard(rw, v, file, line);
 	else
 		LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(rw__acquire, rw,
 		    0, 0, file, line, LOCKSTAT_WRITER);
 
 	LOCK_LOG_LOCK("WLOCK", &rw->lock_object, 0, rw->rw_recurse, file, line);
 	WITNESS_LOCK(&rw->lock_object, LOP_EXCLUSIVE, file, line);
 	TD_LOCKS_INC(curthread);
 }
 
 int
 __rw_try_wlock_int(struct rwlock *rw LOCK_FILE_LINE_ARG_DEF)
 {
 	struct thread *td;
 	uintptr_t tid, v;
 	int rval;
 	bool recursed;
 
 	td = curthread;
 	tid = (uintptr_t)td;
-	if (SCHEDULER_STOPPED_TD(td))
+	if (SCHEDULER_STOPPED())
 		return (1);
 
 	KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(td),
 	    ("rw_try_wlock() by idle thread %p on rwlock %s @ %s:%d",
 	    curthread, rw->lock_object.lo_name, file, line));
 	KASSERT(rw->rw_lock != RW_DESTROYED,
 	    ("rw_try_wlock() of destroyed rwlock @ %s:%d", file, line));
 
 	rval = 1;
 	recursed = false;
 	v = RW_UNLOCKED;
 	for (;;) {
 		if (atomic_fcmpset_acq_ptr(&rw->rw_lock, &v, tid))
 			break;
 		if (v == RW_UNLOCKED)
 			continue;
 		if (v == tid && (rw->lock_object.lo_flags & LO_RECURSABLE)) {
 			rw->rw_recurse++;
 			atomic_set_ptr(&rw->rw_lock, RW_LOCK_WRITER_RECURSED);
 			break;
 		}
 		rval = 0;
 		break;
 	}
 
 	LOCK_LOG_TRY("WLOCK", &rw->lock_object, 0, rval, file, line);
 	if (rval) {
 		WITNESS_LOCK(&rw->lock_object, LOP_EXCLUSIVE | LOP_TRYLOCK,
 		    file, line);
 		if (!recursed)
 			LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(rw__acquire,
 			    rw, 0, 0, file, line, LOCKSTAT_WRITER);
 		TD_LOCKS_INC(curthread);
 	}
 	return (rval);
 }
 
 int
 __rw_try_wlock(volatile uintptr_t *c, const char *file, int line)
 {
 	struct rwlock *rw;
 
 	rw = rwlock2rw(c);
 	return (__rw_try_wlock_int(rw LOCK_FILE_LINE_ARG));
 }
 
 void
 _rw_wunlock_cookie(volatile uintptr_t *c, const char *file, int line)
 {
 	struct rwlock *rw;
 
 	rw = rwlock2rw(c);
 
 	KASSERT(rw->rw_lock != RW_DESTROYED,
 	    ("rw_wunlock() of destroyed rwlock @ %s:%d", file, line));
 	__rw_assert(c, RA_WLOCKED, file, line);
 	WITNESS_UNLOCK(&rw->lock_object, LOP_EXCLUSIVE, file, line);
 	LOCK_LOG_LOCK("WUNLOCK", &rw->lock_object, 0, rw->rw_recurse, file,
 	    line);
 
 #ifdef LOCK_PROFILING
 	_rw_wunlock_hard(rw, (uintptr_t)curthread, file, line);
 #else
 	__rw_wunlock(rw, curthread, file, line);
 #endif
 
 	TD_LOCKS_DEC(curthread);
 }
 
 /*
  * Determines whether a new reader can acquire a lock.  Succeeds if the
  * reader already owns a read lock and the lock is locked for read to
  * prevent deadlock from reader recursion.  Also succeeds if the lock
  * is unlocked and has no writer waiters or spinners.  Failing otherwise
  * prioritizes writers before readers.
  */
 static bool __always_inline
 __rw_can_read(struct thread *td, uintptr_t v, bool fp)
 {
 
 	if ((v & (RW_LOCK_READ | RW_LOCK_WRITE_WAITERS | RW_LOCK_WRITE_SPINNER))
 	    == RW_LOCK_READ)
 		return (true);
 	if (!fp && td->td_rw_rlocks && (v & RW_LOCK_READ))
 		return (true);
 	return (false);
 }
 
 static bool __always_inline
 __rw_rlock_try(struct rwlock *rw, struct thread *td, uintptr_t *vp, bool fp
     LOCK_FILE_LINE_ARG_DEF)
 {
 
 	/*
 	 * Handle the easy case.  If no other thread has a write
 	 * lock, then try to bump up the count of read locks.  Note
 	 * that we have to preserve the current state of the
 	 * RW_LOCK_WRITE_WAITERS flag.  If we fail to acquire a
 	 * read lock, then rw_lock must have changed, so restart
 	 * the loop.  Note that this handles the case of a
 	 * completely unlocked rwlock since such a lock is encoded
 	 * as a read lock with no waiters.
 	 */
 	while (__rw_can_read(td, *vp, fp)) {
 		if (atomic_fcmpset_acq_ptr(&rw->rw_lock, vp,
 			*vp + RW_ONE_READER)) {
 			if (LOCK_LOG_TEST(&rw->lock_object, 0))
 				CTR4(KTR_LOCK,
 				    "%s: %p succeed %p -> %p", __func__,
 				    rw, (void *)*vp,
 				    (void *)(*vp + RW_ONE_READER));
 			td->td_rw_rlocks++;
 			return (true);
 		}
 	}
 	return (false);
 }
 
 static void __noinline
 __rw_rlock_hard(struct rwlock *rw, struct thread *td, uintptr_t v
     LOCK_FILE_LINE_ARG_DEF)
 {
 	struct turnstile *ts;
 	struct thread *owner;
 #ifdef ADAPTIVE_RWLOCKS
 	int spintries = 0;
 	int i, n;
 #endif
 #ifdef LOCK_PROFILING
 	uint64_t waittime = 0;
 	int contested = 0;
 #endif
 #if defined(ADAPTIVE_RWLOCKS) || defined(KDTRACE_HOOKS)
 	struct lock_delay_arg lda;
 #endif
 #ifdef KDTRACE_HOOKS
 	u_int sleep_cnt = 0;
 	int64_t sleep_time = 0;
 	int64_t all_time = 0;
 #endif
 #if defined(KDTRACE_HOOKS) || defined(LOCK_PROFILING)
 	uintptr_t state = 0;
 	int doing_lockprof = 0;
 #endif
 
 #ifdef KDTRACE_HOOKS
 	if (LOCKSTAT_PROFILE_ENABLED(rw__acquire)) {
 		if (__rw_rlock_try(rw, td, &v, false LOCK_FILE_LINE_ARG))
 			goto out_lockstat;
 		doing_lockprof = 1;
 		all_time -= lockstat_nsecs(&rw->lock_object);
 		state = v;
 	}
 #endif
 #ifdef LOCK_PROFILING
 	doing_lockprof = 1;
 	state = v;
 #endif
 
 	if (SCHEDULER_STOPPED())
 		return;
 
 #if defined(ADAPTIVE_RWLOCKS)
 	lock_delay_arg_init(&lda, &rw_delay);
 #elif defined(KDTRACE_HOOKS)
 	lock_delay_arg_init_noadapt(&lda);
 #endif
 
 #ifdef HWPMC_HOOKS
 	PMC_SOFT_CALL( , , lock, failed);
 #endif
 	lock_profile_obtain_lock_failed(&rw->lock_object, false,
 	    &contested, &waittime);
 
 	THREAD_CONTENDS_ON_LOCK(&rw->lock_object);
 
 	for (;;) {
 		if (__rw_rlock_try(rw, td, &v, false LOCK_FILE_LINE_ARG))
 			break;
 #ifdef KDTRACE_HOOKS
 		lda.spin_cnt++;
 #endif
 
 #ifdef ADAPTIVE_RWLOCKS
 		/*
 		 * If the owner is running on another CPU, spin until
 		 * the owner stops running or the state of the lock
 		 * changes.
 		 */
 		if ((v & RW_LOCK_READ) == 0) {
 			owner = (struct thread *)RW_OWNER(v);
 			if (TD_IS_RUNNING(owner)) {
 				if (LOCK_LOG_TEST(&rw->lock_object, 0))
 					CTR3(KTR_LOCK,
 					    "%s: spinning on %p held by %p",
 					    __func__, rw, owner);
 				KTR_STATE1(KTR_SCHED, "thread",
 				    sched_tdname(curthread), "spinning",
 				    "lockname:\"%s\"", rw->lock_object.lo_name);
 				do {
 					lock_delay(&lda);
 					v = RW_READ_VALUE(rw);
 					owner = lv_rw_wowner(v);
 				} while (owner != NULL && TD_IS_RUNNING(owner));
 				KTR_STATE0(KTR_SCHED, "thread",
 				    sched_tdname(curthread), "running");
 				continue;
 			}
 		} else {
 			if ((v & RW_LOCK_WRITE_SPINNER) && RW_READERS(v) == 0) {
 				MPASS(!__rw_can_read(td, v, false));
 				lock_delay_spin(2);
 				v = RW_READ_VALUE(rw);
 				continue;
 			}
 			if (spintries < rowner_retries) {
 				spintries++;
 				KTR_STATE1(KTR_SCHED, "thread", sched_tdname(curthread),
 				    "spinning", "lockname:\"%s\"",
 				    rw->lock_object.lo_name);
 				n = RW_READERS(v);
 				for (i = 0; i < rowner_loops; i += n) {
 					lock_delay_spin(n);
 					v = RW_READ_VALUE(rw);
 					if (!(v & RW_LOCK_READ))
 						break;
 					n = RW_READERS(v);
 					if (n == 0)
 						break;
 					if (__rw_can_read(td, v, false))
 						break;
 				}
 #ifdef KDTRACE_HOOKS
 				lda.spin_cnt += rowner_loops - i;
 #endif
 				KTR_STATE0(KTR_SCHED, "thread", sched_tdname(curthread),
 				    "running");
 				if (i < rowner_loops)
 					continue;
 			}
 		}
 #endif
 
 		/*
 		 * Okay, now it's the hard case.  Some other thread already
 		 * has a write lock or there are write waiters present,
 		 * acquire the turnstile lock so we can begin the process
 		 * of blocking.
 		 */
 		ts = turnstile_trywait(&rw->lock_object);
 
 		/*
 		 * The lock might have been released while we spun, so
 		 * recheck its state and restart the loop if needed.
 		 */
 		v = RW_READ_VALUE(rw);
 retry_ts:
 		if (((v & RW_LOCK_WRITE_SPINNER) && RW_READERS(v) == 0) ||
 		    __rw_can_read(td, v, false)) {
 			turnstile_cancel(ts);
 			continue;
 		}
 
 		owner = lv_rw_wowner(v);
 
 #ifdef ADAPTIVE_RWLOCKS
 		/*
 		 * The current lock owner might have started executing
 		 * on another CPU (or the lock could have changed
 		 * owners) while we were waiting on the turnstile
 		 * chain lock.  If so, drop the turnstile lock and try
 		 * again.
 		 */
 		if (owner != NULL) {
 			if (TD_IS_RUNNING(owner)) {
 				turnstile_cancel(ts);
 				continue;
 			}
 		}
 #endif
 
 		/*
 		 * The lock is held in write mode or it already has waiters.
 		 */
 		MPASS(!__rw_can_read(td, v, false));
 
 		/*
 		 * If the RW_LOCK_READ_WAITERS flag is already set, then
 		 * we can go ahead and block.  If it is not set then try
 		 * to set it.  If we fail to set it drop the turnstile
 		 * lock and restart the loop.
 		 */
 		if (!(v & RW_LOCK_READ_WAITERS)) {
 			if (!atomic_fcmpset_ptr(&rw->rw_lock, &v,
 			    v | RW_LOCK_READ_WAITERS))
 				goto retry_ts;
 			if (LOCK_LOG_TEST(&rw->lock_object, 0))
 				CTR2(KTR_LOCK, "%s: %p set read waiters flag",
 				    __func__, rw);
 		}
 
 		/*
 		 * We were unable to acquire the lock and the read waiters
 		 * flag is set, so we must block on the turnstile.
 		 */
 		if (LOCK_LOG_TEST(&rw->lock_object, 0))
 			CTR2(KTR_LOCK, "%s: %p blocking on turnstile", __func__,
 			    rw);
 #ifdef KDTRACE_HOOKS
 		sleep_time -= lockstat_nsecs(&rw->lock_object);
 #endif
 		MPASS(owner == rw_owner(rw));
 		turnstile_wait(ts, owner, TS_SHARED_QUEUE);
 #ifdef KDTRACE_HOOKS
 		sleep_time += lockstat_nsecs(&rw->lock_object);
 		sleep_cnt++;
 #endif
 		if (LOCK_LOG_TEST(&rw->lock_object, 0))
 			CTR2(KTR_LOCK, "%s: %p resuming from turnstile",
 			    __func__, rw);
 		v = RW_READ_VALUE(rw);
 	}
 	THREAD_CONTENTION_DONE(&rw->lock_object);
 #if defined(KDTRACE_HOOKS) || defined(LOCK_PROFILING)
 	if (__predict_true(!doing_lockprof))
 		return;
 #endif
 #ifdef KDTRACE_HOOKS
 	all_time += lockstat_nsecs(&rw->lock_object);
 	if (sleep_time)
 		LOCKSTAT_RECORD4(rw__block, rw, sleep_time,
 		    LOCKSTAT_READER, (state & RW_LOCK_READ) == 0,
 		    (state & RW_LOCK_READ) == 0 ? 0 : RW_READERS(state));
 
 	/* Record only the loops spinning and not sleeping. */
 	if (lda.spin_cnt > sleep_cnt)
 		LOCKSTAT_RECORD4(rw__spin, rw, all_time - sleep_time,
 		    LOCKSTAT_READER, (state & RW_LOCK_READ) == 0,
 		    (state & RW_LOCK_READ) == 0 ? 0 : RW_READERS(state));
 out_lockstat:
 #endif
 	/*
 	 * TODO: acquire "owner of record" here.  Here be turnstile dragons
 	 * however.  turnstiles don't like owners changing between calls to
 	 * turnstile_wait() currently.
 	 */
 	LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(rw__acquire, rw, contested,
 	    waittime, file, line, LOCKSTAT_READER);
 }
 
 void
 __rw_rlock_int(struct rwlock *rw LOCK_FILE_LINE_ARG_DEF)
 {
 	struct thread *td;
 	uintptr_t v;
 
 	td = curthread;
 
-	KASSERT(kdb_active != 0 || SCHEDULER_STOPPED_TD(td) ||
+	KASSERT(kdb_active != 0 || SCHEDULER_STOPPED() ||
 	    !TD_IS_IDLETHREAD(td),
 	    ("rw_rlock() by idle thread %p on rwlock %s @ %s:%d",
 	    td, rw->lock_object.lo_name, file, line));
 	KASSERT(rw->rw_lock != RW_DESTROYED,
 	    ("rw_rlock() of destroyed rwlock @ %s:%d", file, line));
 	KASSERT(rw_wowner(rw) != td,
 	    ("rw_rlock: wlock already held for %s @ %s:%d",
 	    rw->lock_object.lo_name, file, line));
 	WITNESS_CHECKORDER(&rw->lock_object, LOP_NEWORDER, file, line, NULL);
 
 	v = RW_READ_VALUE(rw);
 	if (__predict_false(LOCKSTAT_PROFILE_ENABLED(rw__acquire) ||
 	    !__rw_rlock_try(rw, td, &v, true LOCK_FILE_LINE_ARG)))
 		__rw_rlock_hard(rw, td, v LOCK_FILE_LINE_ARG);
 	else
 		lock_profile_obtain_lock_success(&rw->lock_object, false, 0, 0,
 		    file, line);
 
 	LOCK_LOG_LOCK("RLOCK", &rw->lock_object, 0, 0, file, line);
 	WITNESS_LOCK(&rw->lock_object, 0, file, line);
 	TD_LOCKS_INC(curthread);
 }
 
 void
 __rw_rlock(volatile uintptr_t *c, const char *file, int line)
 {
 	struct rwlock *rw;
 
 	rw = rwlock2rw(c);
 	__rw_rlock_int(rw LOCK_FILE_LINE_ARG);
 }
 
 int
 __rw_try_rlock_int(struct rwlock *rw LOCK_FILE_LINE_ARG_DEF)
 {
 	uintptr_t x;
 
 	if (SCHEDULER_STOPPED())
 		return (1);
 
 	KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread),
 	    ("rw_try_rlock() by idle thread %p on rwlock %s @ %s:%d",
 	    curthread, rw->lock_object.lo_name, file, line));
 
 	x = rw->rw_lock;
 	for (;;) {
 		KASSERT(rw->rw_lock != RW_DESTROYED,
 		    ("rw_try_rlock() of destroyed rwlock @ %s:%d", file, line));
 		if (!(x & RW_LOCK_READ))
 			break;
 		if (atomic_fcmpset_acq_ptr(&rw->rw_lock, &x, x + RW_ONE_READER)) {
 			LOCK_LOG_TRY("RLOCK", &rw->lock_object, 0, 1, file,
 			    line);
 			WITNESS_LOCK(&rw->lock_object, LOP_TRYLOCK, file, line);
 			LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(rw__acquire,
 			    rw, 0, 0, file, line, LOCKSTAT_READER);
 			TD_LOCKS_INC(curthread);
 			curthread->td_rw_rlocks++;
 			return (1);
 		}
 	}
 
 	LOCK_LOG_TRY("RLOCK", &rw->lock_object, 0, 0, file, line);
 	return (0);
 }
 
 int
 __rw_try_rlock(volatile uintptr_t *c, const char *file, int line)
 {
 	struct rwlock *rw;
 
 	rw = rwlock2rw(c);
 	return (__rw_try_rlock_int(rw LOCK_FILE_LINE_ARG));
 }
 
 static bool __always_inline
 __rw_runlock_try(struct rwlock *rw, struct thread *td, uintptr_t *vp)
 {
 
 	for (;;) {
 		if (RW_READERS(*vp) > 1 || !(*vp & RW_LOCK_WAITERS)) {
 			if (atomic_fcmpset_rel_ptr(&rw->rw_lock, vp,
 			    *vp - RW_ONE_READER)) {
 				if (LOCK_LOG_TEST(&rw->lock_object, 0))
 					CTR4(KTR_LOCK,
 					    "%s: %p succeeded %p -> %p",
 					    __func__, rw, (void *)*vp,
 					    (void *)(*vp - RW_ONE_READER));
 				td->td_rw_rlocks--;
 				return (true);
 			}
 			continue;
 		}
 		break;
 	}
 	return (false);
 }
 
 static void __noinline
 __rw_runlock_hard(struct rwlock *rw, struct thread *td, uintptr_t v
     LOCK_FILE_LINE_ARG_DEF)
 {
 	struct turnstile *ts;
 	uintptr_t setv, queue;
 
 	if (SCHEDULER_STOPPED())
 		return;
 
 	if (__rw_runlock_try(rw, td, &v))
 		goto out_lockstat;
 
 	/*
 	 * Ok, we know we have waiters and we think we are the
 	 * last reader, so grab the turnstile lock.
 	 */
 	turnstile_chain_lock(&rw->lock_object);
 	v = RW_READ_VALUE(rw);
 	for (;;) {
 		if (__rw_runlock_try(rw, td, &v))
 			break;
 
 		MPASS(v & RW_LOCK_WAITERS);
 
 		/*
 		 * Try to drop our lock leaving the lock in a unlocked
 		 * state.
 		 *
 		 * If you wanted to do explicit lock handoff you'd have to
 		 * do it here.  You'd also want to use turnstile_signal()
 		 * and you'd have to handle the race where a higher
 		 * priority thread blocks on the write lock before the
 		 * thread you wakeup actually runs and have the new thread
 		 * "steal" the lock.  For now it's a lot simpler to just
 		 * wakeup all of the waiters.
 		 *
 		 * As above, if we fail, then another thread might have
 		 * acquired a read lock, so drop the turnstile lock and
 		 * restart.
 		 */
 		setv = RW_UNLOCKED;
 		queue = TS_SHARED_QUEUE;
 		if (v & RW_LOCK_WRITE_WAITERS) {
 			queue = TS_EXCLUSIVE_QUEUE;
 			setv |= (v & RW_LOCK_READ_WAITERS);
 		}
 		setv |= (v & RW_LOCK_WRITE_SPINNER);
 		if (!atomic_fcmpset_rel_ptr(&rw->rw_lock, &v, setv))
 			continue;
 		if (LOCK_LOG_TEST(&rw->lock_object, 0))
 			CTR2(KTR_LOCK, "%s: %p last succeeded with waiters",
 			    __func__, rw);
 
 		/*
 		 * Ok.  The lock is released and all that's left is to
 		 * wake up the waiters.  Note that the lock might not be
 		 * free anymore, but in that case the writers will just
 		 * block again if they run before the new lock holder(s)
 		 * release the lock.
 		 */
 		ts = turnstile_lookup(&rw->lock_object);
 		MPASS(ts != NULL);
 		turnstile_broadcast(ts, queue);
 		turnstile_unpend(ts);
 		td->td_rw_rlocks--;
 		break;
 	}
 	turnstile_chain_unlock(&rw->lock_object);
 out_lockstat:
 	LOCKSTAT_PROFILE_RELEASE_RWLOCK(rw__release, rw, LOCKSTAT_READER);
 }
 
 void
 _rw_runlock_cookie_int(struct rwlock *rw LOCK_FILE_LINE_ARG_DEF)
 {
 	struct thread *td;
 	uintptr_t v;
 
 	KASSERT(rw->rw_lock != RW_DESTROYED,
 	    ("rw_runlock() of destroyed rwlock @ %s:%d", file, line));
 	__rw_assert(&rw->rw_lock, RA_RLOCKED, file, line);
 	WITNESS_UNLOCK(&rw->lock_object, 0, file, line);
 	LOCK_LOG_LOCK("RUNLOCK", &rw->lock_object, 0, 0, file, line);
 
 	td = curthread;
 	v = RW_READ_VALUE(rw);
 
 	if (__predict_false(LOCKSTAT_PROFILE_ENABLED(rw__release) ||
 	    !__rw_runlock_try(rw, td, &v)))
 		__rw_runlock_hard(rw, td, v LOCK_FILE_LINE_ARG);
 	else
 		lock_profile_release_lock(&rw->lock_object, false);
 
 	TD_LOCKS_DEC(curthread);
 }
 
 void
 _rw_runlock_cookie(volatile uintptr_t *c, const char *file, int line)
 {
 	struct rwlock *rw;
 
 	rw = rwlock2rw(c);
 	_rw_runlock_cookie_int(rw LOCK_FILE_LINE_ARG);
 }
 
 #ifdef ADAPTIVE_RWLOCKS
 static inline void
 rw_drop_critical(uintptr_t v, bool *in_critical, int *extra_work)
 {
 
 	if (v & RW_LOCK_WRITE_SPINNER)
 		return;
 	if (*in_critical) {
 		critical_exit();
 		*in_critical = false;
 		(*extra_work)--;
 	}
 }
 #else
 #define rw_drop_critical(v, in_critical, extra_work) do { } while (0)
 #endif
 
 /*
  * This function is called when we are unable to obtain a write lock on the
  * first try.  This means that at least one other thread holds either a
  * read or write lock.
  */
 void
 __rw_wlock_hard(volatile uintptr_t *c, uintptr_t v LOCK_FILE_LINE_ARG_DEF)
 {
 	uintptr_t tid;
 	struct rwlock *rw;
 	struct turnstile *ts;
 	struct thread *owner;
 #ifdef ADAPTIVE_RWLOCKS
 	int spintries = 0;
 	int i, n;
 	enum { READERS, WRITER } sleep_reason = READERS;
 	bool in_critical = false;
 #endif
 	uintptr_t setv;
 #ifdef LOCK_PROFILING
 	uint64_t waittime = 0;
 	int contested = 0;
 #endif
 #if defined(ADAPTIVE_RWLOCKS) || defined(KDTRACE_HOOKS)
 	struct lock_delay_arg lda;
 #endif
 #ifdef KDTRACE_HOOKS
 	u_int sleep_cnt = 0;
 	int64_t sleep_time = 0;
 	int64_t all_time = 0;
 #endif
 #if defined(KDTRACE_HOOKS) || defined(LOCK_PROFILING)
 	uintptr_t state = 0;
 	int doing_lockprof = 0;
 #endif
 	int extra_work = 0;
 
 	tid = (uintptr_t)curthread;
 	rw = rwlock2rw(c);
 
 #ifdef KDTRACE_HOOKS
 	if (LOCKSTAT_PROFILE_ENABLED(rw__acquire)) {
 		while (v == RW_UNLOCKED) {
 			if (_rw_write_lock_fetch(rw, &v, tid))
 				goto out_lockstat;
 		}
 		extra_work = 1;
 		doing_lockprof = 1;
 		all_time -= lockstat_nsecs(&rw->lock_object);
 		state = v;
 	}
 #endif
 #ifdef LOCK_PROFILING
 	extra_work = 1;
 	doing_lockprof = 1;
 	state = v;
 #endif
 
 	if (SCHEDULER_STOPPED())
 		return;
 
 	if (__predict_false(v == RW_UNLOCKED))
 		v = RW_READ_VALUE(rw);
 
 	if (__predict_false(lv_rw_wowner(v) == (struct thread *)tid)) {
 		KASSERT(rw->lock_object.lo_flags & LO_RECURSABLE,
 		    ("%s: recursing but non-recursive rw %s @ %s:%d\n",
 		    __func__, rw->lock_object.lo_name, file, line));
 		rw->rw_recurse++;
 		atomic_set_ptr(&rw->rw_lock, RW_LOCK_WRITER_RECURSED);
 		if (LOCK_LOG_TEST(&rw->lock_object, 0))
 			CTR2(KTR_LOCK, "%s: %p recursing", __func__, rw);
 		return;
 	}
 
 	if (LOCK_LOG_TEST(&rw->lock_object, 0))
 		CTR5(KTR_LOCK, "%s: %s contested (lock=%p) at %s:%d", __func__,
 		    rw->lock_object.lo_name, (void *)rw->rw_lock, file, line);
 
 #if defined(ADAPTIVE_RWLOCKS)
 	lock_delay_arg_init(&lda, &rw_delay);
 #elif defined(KDTRACE_HOOKS)
 	lock_delay_arg_init_noadapt(&lda);
 #endif
 
 #ifdef HWPMC_HOOKS
 	PMC_SOFT_CALL( , , lock, failed);
 #endif
 	lock_profile_obtain_lock_failed(&rw->lock_object, false,
 	    &contested, &waittime);
 
 	THREAD_CONTENDS_ON_LOCK(&rw->lock_object);
 
 	for (;;) {
 		if (v == RW_UNLOCKED) {
 			if (_rw_write_lock_fetch(rw, &v, tid))
 				break;
 			continue;
 		}
 #ifdef KDTRACE_HOOKS
 		lda.spin_cnt++;
 #endif
 
 #ifdef ADAPTIVE_RWLOCKS
 		if (v == (RW_LOCK_READ | RW_LOCK_WRITE_SPINNER)) {
 			if (atomic_fcmpset_acq_ptr(&rw->rw_lock, &v, tid))
 				break;
 			continue;
 		}
 
 		/*
 		 * If the lock is write locked and the owner is
 		 * running on another CPU, spin until the owner stops
 		 * running or the state of the lock changes.
 		 */
 		if (!(v & RW_LOCK_READ)) {
 			rw_drop_critical(v, &in_critical, &extra_work);
 			sleep_reason = WRITER;
 			owner = lv_rw_wowner(v);
 			if (!TD_IS_RUNNING(owner))
 				goto ts;
 			if (LOCK_LOG_TEST(&rw->lock_object, 0))
 				CTR3(KTR_LOCK, "%s: spinning on %p held by %p",
 				    __func__, rw, owner);
 			KTR_STATE1(KTR_SCHED, "thread", sched_tdname(curthread),
 			    "spinning", "lockname:\"%s\"",
 			    rw->lock_object.lo_name);
 			do {
 				lock_delay(&lda);
 				v = RW_READ_VALUE(rw);
 				owner = lv_rw_wowner(v);
 			} while (owner != NULL && TD_IS_RUNNING(owner));
 			KTR_STATE0(KTR_SCHED, "thread", sched_tdname(curthread),
 			    "running");
 			continue;
 		} else if (RW_READERS(v) > 0) {
 			sleep_reason = READERS;
 			if (spintries == rowner_retries)
 				goto ts;
 			if (!(v & RW_LOCK_WRITE_SPINNER)) {
 				if (!in_critical) {
 					critical_enter();
 					in_critical = true;
 					extra_work++;
 				}
 				if (!atomic_fcmpset_ptr(&rw->rw_lock, &v,
 				    v | RW_LOCK_WRITE_SPINNER)) {
 					critical_exit();
 					in_critical = false;
 					extra_work--;
 					continue;
 				}
 			}
 			spintries++;
 			KTR_STATE1(KTR_SCHED, "thread", sched_tdname(curthread),
 			    "spinning", "lockname:\"%s\"",
 			    rw->lock_object.lo_name);
 			n = RW_READERS(v);
 			for (i = 0; i < rowner_loops; i += n) {
 				lock_delay_spin(n);
 				v = RW_READ_VALUE(rw);
 				if (!(v & RW_LOCK_WRITE_SPINNER))
 					break;
 				if (!(v & RW_LOCK_READ))
 					break;
 				n = RW_READERS(v);
 				if (n == 0)
 					break;
 			}
 #ifdef KDTRACE_HOOKS
 			lda.spin_cnt += i;
 #endif
 			KTR_STATE0(KTR_SCHED, "thread", sched_tdname(curthread),
 			    "running");
 			if (i < rowner_loops)
 				continue;
 		}
 ts:
 #endif
 		ts = turnstile_trywait(&rw->lock_object);
 		v = RW_READ_VALUE(rw);
 retry_ts:
 		owner = lv_rw_wowner(v);
 
 #ifdef ADAPTIVE_RWLOCKS
 		/*
 		 * The current lock owner might have started executing
 		 * on another CPU (or the lock could have changed
 		 * owners) while we were waiting on the turnstile
 		 * chain lock.  If so, drop the turnstile lock and try
 		 * again.
 		 */
 		if (owner != NULL) {
 			if (TD_IS_RUNNING(owner)) {
 				turnstile_cancel(ts);
 				rw_drop_critical(v, &in_critical, &extra_work);
 				continue;
 			}
 		} else if (RW_READERS(v) > 0 && sleep_reason == WRITER) {
 			turnstile_cancel(ts);
 			rw_drop_critical(v, &in_critical, &extra_work);
 			continue;
 		}
 #endif
 		/*
 		 * Check for the waiters flags about this rwlock.
 		 * If the lock was released, without maintain any pending
 		 * waiters queue, simply try to acquire it.
 		 * If a pending waiters queue is present, claim the lock
 		 * ownership and maintain the pending queue.
 		 */
 		setv = v & (RW_LOCK_WAITERS | RW_LOCK_WRITE_SPINNER);
 		if ((v & ~setv) == RW_UNLOCKED) {
 			setv &= ~RW_LOCK_WRITE_SPINNER;
 			if (atomic_fcmpset_acq_ptr(&rw->rw_lock, &v, tid | setv)) {
 				if (setv)
 					turnstile_claim(ts);
 				else
 					turnstile_cancel(ts);
 				break;
 			}
 			goto retry_ts;
 		}
 
 #ifdef ADAPTIVE_RWLOCKS
 		if (in_critical) {
 			if ((v & RW_LOCK_WRITE_SPINNER) ||
 			    !((v & RW_LOCK_WRITE_WAITERS))) {
 				setv = v & ~RW_LOCK_WRITE_SPINNER;
 				setv |= RW_LOCK_WRITE_WAITERS;
 				if (!atomic_fcmpset_ptr(&rw->rw_lock, &v, setv))
 					goto retry_ts;
 			}
 			critical_exit();
 			in_critical = false;
 			extra_work--;
 		} else {
 #endif
 			/*
 			 * If the RW_LOCK_WRITE_WAITERS flag isn't set, then try to
 			 * set it.  If we fail to set it, then loop back and try
 			 * again.
 			 */
 			if (!(v & RW_LOCK_WRITE_WAITERS)) {
 				if (!atomic_fcmpset_ptr(&rw->rw_lock, &v,
 				    v | RW_LOCK_WRITE_WAITERS))
 					goto retry_ts;
 				if (LOCK_LOG_TEST(&rw->lock_object, 0))
 					CTR2(KTR_LOCK, "%s: %p set write waiters flag",
 					    __func__, rw);
 			}
 #ifdef ADAPTIVE_RWLOCKS
 		}
 #endif
 		/*
 		 * We were unable to acquire the lock and the write waiters
 		 * flag is set, so we must block on the turnstile.
 		 */
 		if (LOCK_LOG_TEST(&rw->lock_object, 0))
 			CTR2(KTR_LOCK, "%s: %p blocking on turnstile", __func__,
 			    rw);
 #ifdef KDTRACE_HOOKS
 		sleep_time -= lockstat_nsecs(&rw->lock_object);
 #endif
 		MPASS(owner == rw_owner(rw));
 		turnstile_wait(ts, owner, TS_EXCLUSIVE_QUEUE);
 #ifdef KDTRACE_HOOKS
 		sleep_time += lockstat_nsecs(&rw->lock_object);
 		sleep_cnt++;
 #endif
 		if (LOCK_LOG_TEST(&rw->lock_object, 0))
 			CTR2(KTR_LOCK, "%s: %p resuming from turnstile",
 			    __func__, rw);
 #ifdef ADAPTIVE_RWLOCKS
 		spintries = 0;
 #endif
 		v = RW_READ_VALUE(rw);
 	}
 	THREAD_CONTENTION_DONE(&rw->lock_object);
 	if (__predict_true(!extra_work))
 		return;
 #ifdef ADAPTIVE_RWLOCKS
 	if (in_critical)
 		critical_exit();
 #endif
 #if defined(KDTRACE_HOOKS) || defined(LOCK_PROFILING)
 	if (__predict_true(!doing_lockprof))
 		return;
 #endif
 #ifdef KDTRACE_HOOKS
 	all_time += lockstat_nsecs(&rw->lock_object);
 	if (sleep_time)
 		LOCKSTAT_RECORD4(rw__block, rw, sleep_time,
 		    LOCKSTAT_WRITER, (state & RW_LOCK_READ) == 0,
 		    (state & RW_LOCK_READ) == 0 ? 0 : RW_READERS(state));
 
 	/* Record only the loops spinning and not sleeping. */
 	if (lda.spin_cnt > sleep_cnt)
 		LOCKSTAT_RECORD4(rw__spin, rw, all_time - sleep_time,
 		    LOCKSTAT_WRITER, (state & RW_LOCK_READ) == 0,
 		    (state & RW_LOCK_READ) == 0 ? 0 : RW_READERS(state));
 out_lockstat:
 #endif
 	LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(rw__acquire, rw, contested,
 	    waittime, file, line, LOCKSTAT_WRITER);
 }
 
 /*
  * This function is called if lockstat is active or the first try at releasing
  * a write lock failed.  The latter means that the lock is recursed or one of
  * the 2 waiter bits must be set indicating that at least one thread is waiting
  * on this lock.
  */
 void
 __rw_wunlock_hard(volatile uintptr_t *c, uintptr_t v LOCK_FILE_LINE_ARG_DEF)
 {
 	struct rwlock *rw;
 	struct turnstile *ts;
 	uintptr_t tid, setv;
 	int queue;
 
 	tid = (uintptr_t)curthread;
 	if (SCHEDULER_STOPPED())
 		return;
 
 	rw = rwlock2rw(c);
 	if (__predict_false(v == tid))
 		v = RW_READ_VALUE(rw);
 
 	if (v & RW_LOCK_WRITER_RECURSED) {
 		if (--(rw->rw_recurse) == 0)
 			atomic_clear_ptr(&rw->rw_lock, RW_LOCK_WRITER_RECURSED);
 		if (LOCK_LOG_TEST(&rw->lock_object, 0))
 			CTR2(KTR_LOCK, "%s: %p unrecursing", __func__, rw);
 		return;
 	}
 
 	LOCKSTAT_PROFILE_RELEASE_RWLOCK(rw__release, rw, LOCKSTAT_WRITER);
 	if (v == tid && _rw_write_unlock(rw, tid))
 		return;
 
 	KASSERT(rw->rw_lock & (RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS),
 	    ("%s: neither of the waiter flags are set", __func__));
 
 	if (LOCK_LOG_TEST(&rw->lock_object, 0))
 		CTR2(KTR_LOCK, "%s: %p contested", __func__, rw);
 
 	turnstile_chain_lock(&rw->lock_object);
 
 	/*
 	 * Use the same algo as sx locks for now.  Prefer waking up shared
 	 * waiters if we have any over writers.  This is probably not ideal.
 	 *
 	 * 'v' is the value we are going to write back to rw_lock.  If we
 	 * have waiters on both queues, we need to preserve the state of
 	 * the waiter flag for the queue we don't wake up.  For now this is
 	 * hardcoded for the algorithm mentioned above.
 	 *
 	 * In the case of both readers and writers waiting we wakeup the
 	 * readers but leave the RW_LOCK_WRITE_WAITERS flag set.  If a
 	 * new writer comes in before a reader it will claim the lock up
 	 * above.  There is probably a potential priority inversion in
 	 * there that could be worked around either by waking both queues
 	 * of waiters or doing some complicated lock handoff gymnastics.
 	 */
 	setv = RW_UNLOCKED;
 	v = RW_READ_VALUE(rw);
 	queue = TS_SHARED_QUEUE;
 	if (v & RW_LOCK_WRITE_WAITERS) {
 		queue = TS_EXCLUSIVE_QUEUE;
 		setv |= (v & RW_LOCK_READ_WAITERS);
 	}
 	atomic_store_rel_ptr(&rw->rw_lock, setv);
 
 	/* Wake up all waiters for the specific queue. */
 	if (LOCK_LOG_TEST(&rw->lock_object, 0))
 		CTR3(KTR_LOCK, "%s: %p waking up %s waiters", __func__, rw,
 		    queue == TS_SHARED_QUEUE ? "read" : "write");
 
 	ts = turnstile_lookup(&rw->lock_object);
 	MPASS(ts != NULL);
 	turnstile_broadcast(ts, queue);
 	turnstile_unpend(ts);
 	turnstile_chain_unlock(&rw->lock_object);
 }
 
 /*
  * Attempt to do a non-blocking upgrade from a read lock to a write
  * lock.  This will only succeed if this thread holds a single read
  * lock.  Returns true if the upgrade succeeded and false otherwise.
  */
 int
 __rw_try_upgrade_int(struct rwlock *rw LOCK_FILE_LINE_ARG_DEF)
 {
 	uintptr_t v, setv, tid;
 	struct turnstile *ts;
 	int success;
 
 	if (SCHEDULER_STOPPED())
 		return (1);
 
 	KASSERT(rw->rw_lock != RW_DESTROYED,
 	    ("rw_try_upgrade() of destroyed rwlock @ %s:%d", file, line));
 	__rw_assert(&rw->rw_lock, RA_RLOCKED, file, line);
 
 	/*
 	 * Attempt to switch from one reader to a writer.  If there
 	 * are any write waiters, then we will have to lock the
 	 * turnstile first to prevent races with another writer
 	 * calling turnstile_wait() before we have claimed this
 	 * turnstile.  So, do the simple case of no waiters first.
 	 */
 	tid = (uintptr_t)curthread;
 	success = 0;
 	v = RW_READ_VALUE(rw);
 	for (;;) {
 		if (RW_READERS(v) > 1)
 			break;
 		if (!(v & RW_LOCK_WAITERS)) {
 			success = atomic_fcmpset_acq_ptr(&rw->rw_lock, &v, tid);
 			if (!success)
 				continue;
 			break;
 		}
 
 		/*
 		 * Ok, we think we have waiters, so lock the turnstile.
 		 */
 		ts = turnstile_trywait(&rw->lock_object);
 		v = RW_READ_VALUE(rw);
 retry_ts:
 		if (RW_READERS(v) > 1) {
 			turnstile_cancel(ts);
 			break;
 		}
 		/*
 		 * Try to switch from one reader to a writer again.  This time
 		 * we honor the current state of the waiters flags.
 		 * If we obtain the lock with the flags set, then claim
 		 * ownership of the turnstile.
 		 */
 		setv = tid | (v & RW_LOCK_WAITERS);
 		success = atomic_fcmpset_ptr(&rw->rw_lock, &v, setv);
 		if (success) {
 			if (v & RW_LOCK_WAITERS)
 				turnstile_claim(ts);
 			else
 				turnstile_cancel(ts);
 			break;
 		}
 		goto retry_ts;
 	}
 	LOCK_LOG_TRY("WUPGRADE", &rw->lock_object, 0, success, file, line);
 	if (success) {
 		curthread->td_rw_rlocks--;
 		WITNESS_UPGRADE(&rw->lock_object, LOP_EXCLUSIVE | LOP_TRYLOCK,
 		    file, line);
 		LOCKSTAT_RECORD0(rw__upgrade, rw);
 	}
 	return (success);
 }
 
 int
 __rw_try_upgrade(volatile uintptr_t *c, const char *file, int line)
 {
 	struct rwlock *rw;
 
 	rw = rwlock2rw(c);
 	return (__rw_try_upgrade_int(rw LOCK_FILE_LINE_ARG));
 }
 
 /*
  * Downgrade a write lock into a single read lock.
  */
 void
 __rw_downgrade_int(struct rwlock *rw LOCK_FILE_LINE_ARG_DEF)
 {
 	struct turnstile *ts;
 	uintptr_t tid, v;
 	int rwait, wwait;
 
 	if (SCHEDULER_STOPPED())
 		return;
 
 	KASSERT(rw->rw_lock != RW_DESTROYED,
 	    ("rw_downgrade() of destroyed rwlock @ %s:%d", file, line));
 	__rw_assert(&rw->rw_lock, RA_WLOCKED | RA_NOTRECURSED, file, line);
 #ifndef INVARIANTS
 	if (rw_recursed(rw))
 		panic("downgrade of a recursed lock");
 #endif
 
 	WITNESS_DOWNGRADE(&rw->lock_object, 0, file, line);
 
 	/*
 	 * Convert from a writer to a single reader.  First we handle
 	 * the easy case with no waiters.  If there are any waiters, we
 	 * lock the turnstile and "disown" the lock.
 	 */
 	tid = (uintptr_t)curthread;
 	if (atomic_cmpset_rel_ptr(&rw->rw_lock, tid, RW_READERS_LOCK(1)))
 		goto out;
 
 	/*
 	 * Ok, we think we have waiters, so lock the turnstile so we can
 	 * read the waiter flags without any races.
 	 */
 	turnstile_chain_lock(&rw->lock_object);
 	v = rw->rw_lock & RW_LOCK_WAITERS;
 	rwait = v & RW_LOCK_READ_WAITERS;
 	wwait = v & RW_LOCK_WRITE_WAITERS;
 	MPASS(rwait | wwait);
 
 	/*
 	 * Downgrade from a write lock while preserving waiters flag
 	 * and give up ownership of the turnstile.
 	 */
 	ts = turnstile_lookup(&rw->lock_object);
 	MPASS(ts != NULL);
 	if (!wwait)
 		v &= ~RW_LOCK_READ_WAITERS;
 	atomic_store_rel_ptr(&rw->rw_lock, RW_READERS_LOCK(1) | v);
 	/*
 	 * Wake other readers if there are no writers pending.  Otherwise they
 	 * won't be able to acquire the lock anyway.
 	 */
 	if (rwait && !wwait) {
 		turnstile_broadcast(ts, TS_SHARED_QUEUE);
 		turnstile_unpend(ts);
 	} else
 		turnstile_disown(ts);
 	turnstile_chain_unlock(&rw->lock_object);
 out:
 	curthread->td_rw_rlocks++;
 	LOCK_LOG_LOCK("WDOWNGRADE", &rw->lock_object, 0, 0, file, line);
 	LOCKSTAT_RECORD0(rw__downgrade, rw);
 }
 
 void
 __rw_downgrade(volatile uintptr_t *c, const char *file, int line)
 {
 	struct rwlock *rw;
 
 	rw = rwlock2rw(c);
 	__rw_downgrade_int(rw LOCK_FILE_LINE_ARG);
 }
 
 #ifdef INVARIANT_SUPPORT
 #ifndef INVARIANTS
 #undef __rw_assert
 #endif
 
 /*
  * In the non-WITNESS case, rw_assert() can only detect that at least
  * *some* thread owns an rlock, but it cannot guarantee that *this*
  * thread owns an rlock.
  */
 void
 __rw_assert(const volatile uintptr_t *c, int what, const char *file, int line)
 {
 	const struct rwlock *rw;
 
 	if (SCHEDULER_STOPPED())
 		return;
 
 	rw = rwlock2rw(c);
 
 	switch (what) {
 	case RA_LOCKED:
 	case RA_LOCKED | RA_RECURSED:
 	case RA_LOCKED | RA_NOTRECURSED:
 	case RA_RLOCKED:
 	case RA_RLOCKED | RA_RECURSED:
 	case RA_RLOCKED | RA_NOTRECURSED:
 #ifdef WITNESS
 		witness_assert(&rw->lock_object, what, file, line);
 #else
 		/*
 		 * If some other thread has a write lock or we have one
 		 * and are asserting a read lock, fail.  Also, if no one
 		 * has a lock at all, fail.
 		 */
 		if (rw->rw_lock == RW_UNLOCKED ||
 		    (!(rw->rw_lock & RW_LOCK_READ) && (what & RA_RLOCKED ||
 		    rw_wowner(rw) != curthread)))
 			panic("Lock %s not %slocked @ %s:%d\n",
 			    rw->lock_object.lo_name, (what & RA_RLOCKED) ?
 			    "read " : "", file, line);
 
 		if (!(rw->rw_lock & RW_LOCK_READ) && !(what & RA_RLOCKED)) {
 			if (rw_recursed(rw)) {
 				if (what & RA_NOTRECURSED)
 					panic("Lock %s recursed @ %s:%d\n",
 					    rw->lock_object.lo_name, file,
 					    line);
 			} else if (what & RA_RECURSED)
 				panic("Lock %s not recursed @ %s:%d\n",
 				    rw->lock_object.lo_name, file, line);
 		}
 #endif
 		break;
 	case RA_WLOCKED:
 	case RA_WLOCKED | RA_RECURSED:
 	case RA_WLOCKED | RA_NOTRECURSED:
 		if (rw_wowner(rw) != curthread)
 			panic("Lock %s not exclusively locked @ %s:%d\n",
 			    rw->lock_object.lo_name, file, line);
 		if (rw_recursed(rw)) {
 			if (what & RA_NOTRECURSED)
 				panic("Lock %s recursed @ %s:%d\n",
 				    rw->lock_object.lo_name, file, line);
 		} else if (what & RA_RECURSED)
 			panic("Lock %s not recursed @ %s:%d\n",
 			    rw->lock_object.lo_name, file, line);
 		break;
 	case RA_UNLOCKED:
 #ifdef WITNESS
 		witness_assert(&rw->lock_object, what, file, line);
 #else
 		/*
 		 * If we hold a write lock fail.  We can't reliably check
 		 * to see if we hold a read lock or not.
 		 */
 		if (rw_wowner(rw) == curthread)
 			panic("Lock %s exclusively locked @ %s:%d\n",
 			    rw->lock_object.lo_name, file, line);
 #endif
 		break;
 	default:
 		panic("Unknown rw lock assertion: %d @ %s:%d", what, file,
 		    line);
 	}
 }
 #endif /* INVARIANT_SUPPORT */
 
 #ifdef DDB
 void
 db_show_rwlock(const struct lock_object *lock)
 {
 	const struct rwlock *rw;
 	struct thread *td;
 
 	rw = (const struct rwlock *)lock;
 
 	db_printf(" state: ");
 	if (rw->rw_lock == RW_UNLOCKED)
 		db_printf("UNLOCKED\n");
 	else if (rw->rw_lock == RW_DESTROYED) {
 		db_printf("DESTROYED\n");
 		return;
 	} else if (rw->rw_lock & RW_LOCK_READ)
 		db_printf("RLOCK: %ju locks\n",
 		    (uintmax_t)(RW_READERS(rw->rw_lock)));
 	else {
 		td = rw_wowner(rw);
 		db_printf("WLOCK: %p (tid %d, pid %d, \"%s\")\n", td,
 		    td->td_tid, td->td_proc->p_pid, td->td_name);
 		if (rw_recursed(rw))
 			db_printf(" recursed: %u\n", rw->rw_recurse);
 	}
 	db_printf(" waiters: ");
 	switch (rw->rw_lock & (RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS)) {
 	case RW_LOCK_READ_WAITERS:
 		db_printf("readers\n");
 		break;
 	case RW_LOCK_WRITE_WAITERS:
 		db_printf("writers\n");
 		break;
 	case RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS:
 		db_printf("readers and writers\n");
 		break;
 	default:
 		db_printf("none\n");
 		break;
 	}
 }
 
 #endif
diff --git a/sys/kern/kern_shutdown.c b/sys/kern/kern_shutdown.c
index 17d40ff0429c..ee666281418f 100644
--- a/sys/kern/kern_shutdown.c
+++ b/sys/kern/kern_shutdown.c
@@ -1,1835 +1,1836 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1986, 1988, 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  * (c) UNIX System Laboratories, Inc.
  * All or some portions of this file are derived from material licensed
  * to the University of California by American Telephone and Telegraph
  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  * the permission of UNIX System Laboratories, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 #include "opt_ddb.h"
 #include "opt_ekcd.h"
 #include "opt_kdb.h"
 #include "opt_panic.h"
 #include "opt_printf.h"
 #include "opt_sched.h"
 #include "opt_watchdog.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bio.h>
 #include <sys/boottrace.h>
 #include <sys/buf.h>
 #include <sys/conf.h>
 #include <sys/compressor.h>
 #include <sys/cons.h>
 #include <sys/disk.h>
 #include <sys/eventhandler.h>
 #include <sys/filedesc.h>
 #include <sys/jail.h>
 #include <sys/kdb.h>
 #include <sys/kernel.h>
 #include <sys/kerneldump.h>
 #include <sys/kthread.h>
 #include <sys/ktr.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/mount.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/reboot.h>
 #include <sys/resourcevar.h>
 #include <sys/rwlock.h>
 #include <sys/sbuf.h>
 #include <sys/sched.h>
 #include <sys/smp.h>
 #include <sys/sysctl.h>
 #include <sys/sysproto.h>
 #include <sys/taskqueue.h>
 #include <sys/vnode.h>
 #include <sys/watchdog.h>
 
 #include <crypto/chacha20/chacha.h>
 #include <crypto/rijndael/rijndael-api-fst.h>
 #include <crypto/sha2/sha256.h>
 
 #include <ddb/ddb.h>
 
 #include <machine/cpu.h>
 #include <machine/dump.h>
 #include <machine/pcb.h>
 #include <machine/smp.h>
 
 #include <security/mac/mac_framework.h>
 
 #include <vm/vm.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
 #include <vm/vm_pager.h>
 #include <vm/swap_pager.h>
 
 #include <sys/signalvar.h>
 
 static MALLOC_DEFINE(M_DUMPER, "dumper", "dumper block buffer");
 
 #ifndef PANIC_REBOOT_WAIT_TIME
 #define PANIC_REBOOT_WAIT_TIME 15 /* default to 15 seconds */
 #endif
 static int panic_reboot_wait_time = PANIC_REBOOT_WAIT_TIME;
 SYSCTL_INT(_kern, OID_AUTO, panic_reboot_wait_time, CTLFLAG_RWTUN,
     &panic_reboot_wait_time, 0,
     "Seconds to wait before rebooting after a panic");
 static int reboot_wait_time = 0;
 SYSCTL_INT(_kern, OID_AUTO, reboot_wait_time, CTLFLAG_RWTUN,
     &reboot_wait_time, 0,
     "Seconds to wait before rebooting");
 
 /*
  * Note that stdarg.h and the ANSI style va_start macro is used for both
  * ANSI and traditional C compilers.
  */
 #include <machine/stdarg.h>
 
 #ifdef KDB
 #ifdef KDB_UNATTENDED
 int debugger_on_panic = 0;
 #else
 int debugger_on_panic = 1;
 #endif
 SYSCTL_INT(_debug, OID_AUTO, debugger_on_panic,
     CTLFLAG_RWTUN, &debugger_on_panic, 0,
     "Run debugger on kernel panic");
 
 static bool debugger_on_recursive_panic = false;
 SYSCTL_BOOL(_debug, OID_AUTO, debugger_on_recursive_panic,
     CTLFLAG_RWTUN, &debugger_on_recursive_panic, 0,
     "Run debugger on recursive kernel panic");
 
 int debugger_on_trap = 0;
 SYSCTL_INT(_debug, OID_AUTO, debugger_on_trap,
     CTLFLAG_RWTUN, &debugger_on_trap, 0,
     "Run debugger on kernel trap before panic");
 
 #ifdef KDB_TRACE
 static int trace_on_panic = 1;
 static bool trace_all_panics = true;
 #else
 static int trace_on_panic = 0;
 static bool trace_all_panics = false;
 #endif
 SYSCTL_INT(_debug, OID_AUTO, trace_on_panic,
     CTLFLAG_RWTUN | CTLFLAG_SECURE,
     &trace_on_panic, 0, "Print stack trace on kernel panic");
 SYSCTL_BOOL(_debug, OID_AUTO, trace_all_panics, CTLFLAG_RWTUN,
     &trace_all_panics, 0, "Print stack traces on secondary kernel panics");
 #endif /* KDB */
 
 static int sync_on_panic = 0;
 SYSCTL_INT(_kern, OID_AUTO, sync_on_panic, CTLFLAG_RWTUN,
 	&sync_on_panic, 0, "Do a sync before rebooting from a panic");
 
 static bool poweroff_on_panic = 0;
 SYSCTL_BOOL(_kern, OID_AUTO, poweroff_on_panic, CTLFLAG_RWTUN,
 	&poweroff_on_panic, 0, "Do a power off instead of a reboot on a panic");
 
 static bool powercycle_on_panic = 0;
 SYSCTL_BOOL(_kern, OID_AUTO, powercycle_on_panic, CTLFLAG_RWTUN,
 	&powercycle_on_panic, 0, "Do a power cycle instead of a reboot on a panic");
 
 static SYSCTL_NODE(_kern, OID_AUTO, shutdown, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "Shutdown environment");
 
 #ifndef DIAGNOSTIC
 static int show_busybufs;
 #else
 static int show_busybufs = 1;
 #endif
 SYSCTL_INT(_kern_shutdown, OID_AUTO, show_busybufs, CTLFLAG_RW,
     &show_busybufs, 0,
     "Show busy buffers during shutdown");
 
 int suspend_blocked = 0;
 SYSCTL_INT(_kern, OID_AUTO, suspend_blocked, CTLFLAG_RW,
 	&suspend_blocked, 0, "Block suspend due to a pending shutdown");
 
 #ifdef EKCD
 FEATURE(ekcd, "Encrypted kernel crash dumps support");
 
 MALLOC_DEFINE(M_EKCD, "ekcd", "Encrypted kernel crash dumps data");
 
 struct kerneldumpcrypto {
 	uint8_t			kdc_encryption;
 	uint8_t			kdc_iv[KERNELDUMP_IV_MAX_SIZE];
 	union {
 		struct {
 			keyInstance	aes_ki;
 			cipherInstance	aes_ci;
 		} u_aes;
 		struct chacha_ctx	u_chacha;
 	} u;
 #define	kdc_ki	u.u_aes.aes_ki
 #define	kdc_ci	u.u_aes.aes_ci
 #define	kdc_chacha	u.u_chacha
 	uint32_t		kdc_dumpkeysize;
 	struct kerneldumpkey	kdc_dumpkey[];
 };
 #endif
 
 struct kerneldumpcomp {
 	uint8_t			kdc_format;
 	struct compressor	*kdc_stream;
 	uint8_t			*kdc_buf;
 	size_t			kdc_resid;
 };
 
 static struct kerneldumpcomp *kerneldumpcomp_create(struct dumperinfo *di,
 		    uint8_t compression);
 static void	kerneldumpcomp_destroy(struct dumperinfo *di);
 static int	kerneldumpcomp_write_cb(void *base, size_t len, off_t off, void *arg);
 
 static int kerneldump_gzlevel = 6;
 SYSCTL_INT(_kern, OID_AUTO, kerneldump_gzlevel, CTLFLAG_RWTUN,
     &kerneldump_gzlevel, 0,
     "Kernel crash dump compression level");
 
 /*
  * Variable panicstr contains argument to first call to panic; used as flag
  * to indicate that the kernel has already called panic.
  */
 const char *panicstr __read_mostly;
+bool scheduler_stopped __read_frequently;
 
 int dumping __read_mostly;		/* system is dumping */
 int rebooting __read_mostly;		/* system is rebooting */
 /*
  * Used to serialize between sysctl kern.shutdown.dumpdevname and list
  * modifications via ioctl.
  */
 static struct mtx dumpconf_list_lk;
 MTX_SYSINIT(dumper_configs, &dumpconf_list_lk, "dumper config list", MTX_DEF);
 
 /* Our selected dumper(s). */
 static TAILQ_HEAD(dumpconflist, dumperinfo) dumper_configs =
     TAILQ_HEAD_INITIALIZER(dumper_configs);
 
 /* Context information for dump-debuggers, saved by the dump_savectx() macro. */
 struct pcb dumppcb;			/* Registers. */
 lwpid_t dumptid;			/* Thread ID. */
 
 static struct cdevsw reroot_cdevsw = {
      .d_version = D_VERSION,
      .d_name    = "reroot",
 };
 
 static void poweroff_wait(void *, int);
 static void shutdown_halt(void *junk, int howto);
 static void shutdown_panic(void *junk, int howto);
 static void shutdown_reset(void *junk, int howto);
 static int kern_reroot(void);
 
 /* register various local shutdown events */
 static void
 shutdown_conf(void *unused)
 {
 
 	EVENTHANDLER_REGISTER(shutdown_final, poweroff_wait, NULL,
 	    SHUTDOWN_PRI_FIRST);
 	EVENTHANDLER_REGISTER(shutdown_final, shutdown_halt, NULL,
 	    SHUTDOWN_PRI_LAST + 100);
 	EVENTHANDLER_REGISTER(shutdown_final, shutdown_panic, NULL,
 	    SHUTDOWN_PRI_LAST + 100);
 }
 
 SYSINIT(shutdown_conf, SI_SUB_INTRINSIC, SI_ORDER_ANY, shutdown_conf, NULL);
 
 /*
  * The only reason this exists is to create the /dev/reroot/ directory,
  * used by reroot code in init(8) as a mountpoint for tmpfs.
  */
 static void
 reroot_conf(void *unused)
 {
 	int error;
 	struct cdev *cdev;
 
 	error = make_dev_p(MAKEDEV_CHECKNAME | MAKEDEV_WAITOK, &cdev,
 	    &reroot_cdevsw, NULL, UID_ROOT, GID_WHEEL, 0600, "reroot/reroot");
 	if (error != 0) {
 		printf("%s: failed to create device node, error %d",
 		    __func__, error);
 	}
 }
 
 SYSINIT(reroot_conf, SI_SUB_DEVFS, SI_ORDER_ANY, reroot_conf, NULL);
 
 /*
  * The system call that results in a reboot.
  */
 /* ARGSUSED */
 int
 sys_reboot(struct thread *td, struct reboot_args *uap)
 {
 	int error;
 
 	error = 0;
 #ifdef MAC
 	error = mac_system_check_reboot(td->td_ucred, uap->opt);
 #endif
 	if (error == 0)
 		error = priv_check(td, PRIV_REBOOT);
 	if (error == 0) {
 		if (uap->opt & RB_REROOT)
 			error = kern_reroot();
 		else
 			kern_reboot(uap->opt);
 	}
 	return (error);
 }
 
 static void
 shutdown_nice_task_fn(void *arg, int pending __unused)
 {
 	int howto;
 
 	howto = (uintptr_t)arg;
 	/* Send a signal to init(8) and have it shutdown the world. */
 	PROC_LOCK(initproc);
 	if ((howto & RB_POWEROFF) != 0) {
 		BOOTTRACE("SIGUSR2 to init(8)");
 		kern_psignal(initproc, SIGUSR2);
 	} else if ((howto & RB_POWERCYCLE) != 0) {
 		BOOTTRACE("SIGWINCH to init(8)");
 		kern_psignal(initproc, SIGWINCH);
 	} else if ((howto & RB_HALT) != 0) {
 		BOOTTRACE("SIGUSR1 to init(8)");
 		kern_psignal(initproc, SIGUSR1);
 	} else {
 		BOOTTRACE("SIGINT to init(8)");
 		kern_psignal(initproc, SIGINT);
 	}
 	PROC_UNLOCK(initproc);
 }
 
 static struct task shutdown_nice_task = TASK_INITIALIZER(0,
     &shutdown_nice_task_fn, NULL);
 
 /*
  * Called by events that want to shut down.. e.g  <CTL><ALT><DEL> on a PC
  */
 void
 shutdown_nice(int howto)
 {
 
 	if (initproc != NULL && !SCHEDULER_STOPPED()) {
 		BOOTTRACE("shutdown initiated");
 		shutdown_nice_task.ta_context = (void *)(uintptr_t)howto;
 		taskqueue_enqueue(taskqueue_fast, &shutdown_nice_task);
 	} else {
 		/*
 		 * No init(8) running, or scheduler would not allow it
 		 * to run, so simply reboot.
 		 */
 		kern_reboot(howto | RB_NOSYNC);
 	}
 }
 
 static void
 print_uptime(void)
 {
 	int f;
 	struct timespec ts;
 
 	getnanouptime(&ts);
 	printf("Uptime: ");
 	f = 0;
 	if (ts.tv_sec >= 86400) {
 		printf("%ldd", (long)ts.tv_sec / 86400);
 		ts.tv_sec %= 86400;
 		f = 1;
 	}
 	if (f || ts.tv_sec >= 3600) {
 		printf("%ldh", (long)ts.tv_sec / 3600);
 		ts.tv_sec %= 3600;
 		f = 1;
 	}
 	if (f || ts.tv_sec >= 60) {
 		printf("%ldm", (long)ts.tv_sec / 60);
 		ts.tv_sec %= 60;
 		f = 1;
 	}
 	printf("%lds\n", (long)ts.tv_sec);
 }
 
 int
 doadump(boolean_t textdump)
 {
 	boolean_t coredump;
 	int error;
 
 	error = 0;
 	if (dumping)
 		return (EBUSY);
 	if (TAILQ_EMPTY(&dumper_configs))
 		return (ENXIO);
 
 	dump_savectx();
 	dumping++;
 
 	coredump = TRUE;
 #ifdef DDB
 	if (textdump && textdump_pending) {
 		coredump = FALSE;
 		textdump_dumpsys(TAILQ_FIRST(&dumper_configs));
 	}
 #endif
 	if (coredump) {
 		struct dumperinfo *di;
 
 		TAILQ_FOREACH(di, &dumper_configs, di_next) {
 			error = dumpsys(di);
 			if (error == 0)
 				break;
 		}
 	}
 
 	dumping--;
 	return (error);
 }
 
 /*
  * Trace the shutdown reason.
  */
 static void
 reboottrace(int howto)
 {
 	if ((howto & RB_DUMP) != 0) {
 		if ((howto & RB_HALT) != 0)
 			BOOTTRACE("system panic: halting...");
 		if ((howto & RB_POWEROFF) != 0)
 			BOOTTRACE("system panic: powering off...");
 		if ((howto & (RB_HALT|RB_POWEROFF)) == 0)
 			BOOTTRACE("system panic: rebooting...");
 	} else {
 		if ((howto & RB_HALT) != 0)
 			BOOTTRACE("system halting...");
 		if ((howto & RB_POWEROFF) != 0)
 			BOOTTRACE("system powering off...");
 		if ((howto & (RB_HALT|RB_POWEROFF)) == 0)
 			BOOTTRACE("system rebooting...");
 	}
 }
 
 /*
  * kern_reboot(9): Shut down the system cleanly to prepare for reboot, halt, or
  * power off.
  */
 void
 kern_reboot(int howto)
 {
 	static int once = 0;
 
 	if (initproc != NULL && curproc != initproc)
 		BOOTTRACE("kernel shutdown (dirty) started");
 	else
 		BOOTTRACE("kernel shutdown (clean) started");
 
 	/*
 	 * Normal paths here don't hold Giant, but we can wind up here
 	 * unexpectedly with it held.  Drop it now so we don't have to
 	 * drop and pick it up elsewhere. The paths it is locking will
 	 * never be returned to, and it is preferable to preclude
 	 * deadlock than to lock against code that won't ever
 	 * continue.
 	 */
 	while (!SCHEDULER_STOPPED() && mtx_owned(&Giant))
 		mtx_unlock(&Giant);
 
 #if defined(SMP)
 	/*
 	 * Bind us to the first CPU so that all shutdown code runs there.  Some
 	 * systems don't shutdown properly (i.e., ACPI power off) if we
 	 * run on another processor.
 	 */
 	if (!SCHEDULER_STOPPED()) {
 		thread_lock(curthread);
 		sched_bind(curthread, CPU_FIRST());
 		thread_unlock(curthread);
 		KASSERT(PCPU_GET(cpuid) == CPU_FIRST(),
 		    ("%s: not running on cpu 0", __func__));
 	}
 #endif
 	/* We're in the process of rebooting. */
 	rebooting = 1;
 	reboottrace(howto);
 
 	/*
 	 * Do any callouts that should be done BEFORE syncing the filesystems.
 	 */
 	EVENTHANDLER_INVOKE(shutdown_pre_sync, howto);
 	BOOTTRACE("shutdown pre sync complete");
 
 	/* 
 	 * Now sync filesystems
 	 */
 	if (!cold && (howto & RB_NOSYNC) == 0 && once == 0) {
 		once = 1;
 		BOOTTRACE("bufshutdown begin");
 		bufshutdown(show_busybufs);
 		BOOTTRACE("bufshutdown end");
 	}
 
 	print_uptime();
 
 	cngrab();
 
 	/*
 	 * Ok, now do things that assume all filesystem activity has
 	 * been completed.
 	 */
 	EVENTHANDLER_INVOKE(shutdown_post_sync, howto);
 	BOOTTRACE("shutdown post sync complete");
 
 	if ((howto & (RB_HALT|RB_DUMP)) == RB_DUMP && !cold && !dumping) 
 		doadump(TRUE);
 
 	/* Now that we're going to really halt the system... */
 	BOOTTRACE("shutdown final begin");
 
 	if (shutdown_trace)
 		boottrace_dump_console();
 
 	EVENTHANDLER_INVOKE(shutdown_final, howto);
 
 	/*
 	 * Call this directly so that reset is attempted even if shutdown
 	 * handlers are not yet registered.
 	 */
 	shutdown_reset(NULL, howto);
 
 	for(;;) ;	/* safety against shutdown_reset not working */
 	/* NOTREACHED */
 }
 
 /*
  * The system call that results in changing the rootfs.
  */
 static int
 kern_reroot(void)
 {
 	struct vnode *oldrootvnode, *vp;
 	struct mount *mp, *devmp;
 	int error;
 
 	if (curproc != initproc)
 		return (EPERM);
 
 	/*
 	 * Mark the filesystem containing currently-running executable
 	 * (the temporary copy of init(8)) busy.
 	 */
 	vp = curproc->p_textvp;
 	error = vn_lock(vp, LK_SHARED);
 	if (error != 0)
 		return (error);
 	mp = vp->v_mount;
 	error = vfs_busy(mp, MBF_NOWAIT);
 	if (error != 0) {
 		vfs_ref(mp);
 		VOP_UNLOCK(vp);
 		error = vfs_busy(mp, 0);
 		vn_lock(vp, LK_SHARED | LK_RETRY);
 		vfs_rel(mp);
 		if (error != 0) {
 			VOP_UNLOCK(vp);
 			return (ENOENT);
 		}
 		if (VN_IS_DOOMED(vp)) {
 			VOP_UNLOCK(vp);
 			vfs_unbusy(mp);
 			return (ENOENT);
 		}
 	}
 	VOP_UNLOCK(vp);
 
 	/*
 	 * Remove the filesystem containing currently-running executable
 	 * from the mount list, to prevent it from being unmounted
 	 * by vfs_unmountall(), and to avoid confusing vfs_mountroot().
 	 *
 	 * Also preserve /dev - forcibly unmounting it could cause driver
 	 * reinitialization.
 	 */
 
 	vfs_ref(rootdevmp);
 	devmp = rootdevmp;
 	rootdevmp = NULL;
 
 	mtx_lock(&mountlist_mtx);
 	TAILQ_REMOVE(&mountlist, mp, mnt_list);
 	TAILQ_REMOVE(&mountlist, devmp, mnt_list);
 	mtx_unlock(&mountlist_mtx);
 
 	oldrootvnode = rootvnode;
 
 	/*
 	 * Unmount everything except for the two filesystems preserved above.
 	 */
 	vfs_unmountall();
 
 	/*
 	 * Add /dev back; vfs_mountroot() will move it into its new place.
 	 */
 	mtx_lock(&mountlist_mtx);
 	TAILQ_INSERT_HEAD(&mountlist, devmp, mnt_list);
 	mtx_unlock(&mountlist_mtx);
 	rootdevmp = devmp;
 	vfs_rel(rootdevmp);
 
 	/*
 	 * Mount the new rootfs.
 	 */
 	vfs_mountroot();
 
 	/*
 	 * Update all references to the old rootvnode.
 	 */
 	mountcheckdirs(oldrootvnode, rootvnode);
 
 	/*
 	 * Add the temporary filesystem back and unbusy it.
 	 */
 	mtx_lock(&mountlist_mtx);
 	TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);
 	mtx_unlock(&mountlist_mtx);
 	vfs_unbusy(mp);
 
 	return (0);
 }
 
 /*
  * If the shutdown was a clean halt, behave accordingly.
  */
 static void
 shutdown_halt(void *junk, int howto)
 {
 
 	if (howto & RB_HALT) {
 		printf("\n");
 		printf("The operating system has halted.\n");
 		printf("Please press any key to reboot.\n\n");
 
 		wdog_kern_pat(WD_TO_NEVER);
 
 		switch (cngetc()) {
 		case -1:		/* No console, just die */
 			cpu_halt();
 			/* NOTREACHED */
 		default:
 			break;
 		}
 	}
 }
 
 /*
  * Check to see if the system panicked, pause and then reboot
  * according to the specified delay.
  */
 static void
 shutdown_panic(void *junk, int howto)
 {
 	int loop;
 
 	if (howto & RB_DUMP) {
 		if (panic_reboot_wait_time != 0) {
 			if (panic_reboot_wait_time != -1) {
 				printf("Automatic reboot in %d seconds - "
 				       "press a key on the console to abort\n",
 					panic_reboot_wait_time);
 				for (loop = panic_reboot_wait_time * 10;
 				     loop > 0; --loop) {
 					DELAY(1000 * 100); /* 1/10th second */
 					/* Did user type a key? */
 					if (cncheckc() != -1)
 						break;
 				}
 				if (!loop)
 					return;
 			}
 		} else { /* zero time specified - reboot NOW */
 			return;
 		}
 		printf("--> Press a key on the console to reboot,\n");
 		printf("--> or switch off the system now.\n");
 		cngetc();
 	}
 }
 
 /*
  * Everything done, now reset
  */
 static void
 shutdown_reset(void *junk, int howto)
 {
 
 	printf("Rebooting...\n");
 	DELAY(reboot_wait_time * 1000000);
 
 	/*
 	 * Acquiring smp_ipi_mtx here has a double effect:
 	 * - it disables interrupts avoiding CPU0 preemption
 	 *   by fast handlers (thus deadlocking  against other CPUs)
 	 * - it avoids deadlocks against smp_rendezvous() or, more 
 	 *   generally, threads busy-waiting, with this spinlock held,
 	 *   and waiting for responses by threads on other CPUs
 	 *   (ie. smp_tlb_shootdown()).
 	 *
 	 * For the !SMP case it just needs to handle the former problem.
 	 */
 #ifdef SMP
 	mtx_lock_spin(&smp_ipi_mtx);
 #else
 	spinlock_enter();
 #endif
 
 	cpu_reset();
 	/* NOTREACHED */ /* assuming reset worked */
 }
 
 #if defined(WITNESS) || defined(INVARIANT_SUPPORT)
 static int kassert_warn_only = 0;
 #ifdef KDB
 static int kassert_do_kdb = 0;
 #endif
 #ifdef KTR
 static int kassert_do_ktr = 0;
 #endif
 static int kassert_do_log = 1;
 static int kassert_log_pps_limit = 4;
 static int kassert_log_mute_at = 0;
 static int kassert_log_panic_at = 0;
 static int kassert_suppress_in_panic = 0;
 static int kassert_warnings = 0;
 
 SYSCTL_NODE(_debug, OID_AUTO, kassert, CTLFLAG_RW | CTLFLAG_MPSAFE, NULL,
     "kassert options");
 
 #ifdef KASSERT_PANIC_OPTIONAL
 #define KASSERT_RWTUN	CTLFLAG_RWTUN
 #else
 #define KASSERT_RWTUN	CTLFLAG_RDTUN
 #endif
 
 SYSCTL_INT(_debug_kassert, OID_AUTO, warn_only, KASSERT_RWTUN,
     &kassert_warn_only, 0,
     "KASSERT triggers a panic (0) or just a warning (1)");
 
 #ifdef KDB
 SYSCTL_INT(_debug_kassert, OID_AUTO, do_kdb, KASSERT_RWTUN,
     &kassert_do_kdb, 0, "KASSERT will enter the debugger");
 #endif
 
 #ifdef KTR
 SYSCTL_UINT(_debug_kassert, OID_AUTO, do_ktr, KASSERT_RWTUN,
     &kassert_do_ktr, 0,
     "KASSERT does a KTR, set this to the KTRMASK you want");
 #endif
 
 SYSCTL_INT(_debug_kassert, OID_AUTO, do_log, KASSERT_RWTUN,
     &kassert_do_log, 0,
     "If warn_only is enabled, log (1) or do not log (0) assertion violations");
 
 SYSCTL_INT(_debug_kassert, OID_AUTO, warnings, CTLFLAG_RD | CTLFLAG_STATS,
     &kassert_warnings, 0, "number of KASSERTs that have been triggered");
 
 SYSCTL_INT(_debug_kassert, OID_AUTO, log_panic_at, KASSERT_RWTUN,
     &kassert_log_panic_at, 0, "max number of KASSERTS before we will panic");
 
 SYSCTL_INT(_debug_kassert, OID_AUTO, log_pps_limit, KASSERT_RWTUN,
     &kassert_log_pps_limit, 0, "limit number of log messages per second");
 
 SYSCTL_INT(_debug_kassert, OID_AUTO, log_mute_at, KASSERT_RWTUN,
     &kassert_log_mute_at, 0, "max number of KASSERTS to log");
 
 SYSCTL_INT(_debug_kassert, OID_AUTO, suppress_in_panic, KASSERT_RWTUN,
     &kassert_suppress_in_panic, 0,
     "KASSERTs will be suppressed while handling a panic");
 #undef KASSERT_RWTUN
 
 static int kassert_sysctl_kassert(SYSCTL_HANDLER_ARGS);
 
 SYSCTL_PROC(_debug_kassert, OID_AUTO, kassert,
     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_SECURE | CTLFLAG_MPSAFE, NULL, 0,
     kassert_sysctl_kassert, "I",
     "set to trigger a test kassert");
 
 static int
 kassert_sysctl_kassert(SYSCTL_HANDLER_ARGS)
 {
 	int error, i;
 
 	error = sysctl_wire_old_buffer(req, sizeof(int));
 	if (error == 0) {
 		i = 0;
 		error = sysctl_handle_int(oidp, &i, 0, req);
 	}
 	if (error != 0 || req->newptr == NULL)
 		return (error);
 	KASSERT(0, ("kassert_sysctl_kassert triggered kassert %d", i));
 	return (0);
 }
 
 #ifdef KASSERT_PANIC_OPTIONAL
 /*
  * Called by KASSERT, this decides if we will panic
  * or if we will log via printf and/or ktr.
  */
 void
 kassert_panic(const char *fmt, ...)
 {
 	static char buf[256];
 	va_list ap;
 
 	va_start(ap, fmt);
 	(void)vsnprintf(buf, sizeof(buf), fmt, ap);
 	va_end(ap);
 
 	/*
 	 * If we are suppressing secondary panics, log the warning but do not
 	 * re-enter panic/kdb.
 	 */
 	if (KERNEL_PANICKED() && kassert_suppress_in_panic) {
 		if (kassert_do_log) {
 			printf("KASSERT failed: %s\n", buf);
 #ifdef KDB
 			if (trace_all_panics && trace_on_panic)
 				kdb_backtrace();
 #endif
 		}
 		return;
 	}
 
 	/*
 	 * panic if we're not just warning, or if we've exceeded
 	 * kassert_log_panic_at warnings.
 	 */
 	if (!kassert_warn_only ||
 	    (kassert_log_panic_at > 0 &&
 	     kassert_warnings >= kassert_log_panic_at)) {
 		va_start(ap, fmt);
 		vpanic(fmt, ap);
 		/* NORETURN */
 	}
 #ifdef KTR
 	if (kassert_do_ktr)
 		CTR0(ktr_mask, buf);
 #endif /* KTR */
 	/*
 	 * log if we've not yet met the mute limit.
 	 */
 	if (kassert_do_log &&
 	    (kassert_log_mute_at == 0 ||
 	     kassert_warnings < kassert_log_mute_at)) {
 		static  struct timeval lasterr;
 		static  int curerr;
 
 		if (ppsratecheck(&lasterr, &curerr, kassert_log_pps_limit)) {
 			printf("KASSERT failed: %s\n", buf);
 			kdb_backtrace();
 		}
 	}
 #ifdef KDB
 	if (kassert_do_kdb) {
 		kdb_enter(KDB_WHY_KASSERT, buf);
 	}
 #endif
 	atomic_add_int(&kassert_warnings, 1);
 }
 #endif /* KASSERT_PANIC_OPTIONAL */
 #endif
 
 /*
  * Panic is called on unresolvable fatal errors.  It prints "panic: mesg",
  * and then reboots.  If we are called twice, then we avoid trying to sync
  * the disks as this often leads to recursive panics.
  */
 void
 panic(const char *fmt, ...)
 {
 	va_list ap;
 
 	va_start(ap, fmt);
 	vpanic(fmt, ap);
 }
 
 void
 vpanic(const char *fmt, va_list ap)
 {
 #ifdef SMP
 	cpuset_t other_cpus;
 #endif
 	struct thread *td = curthread;
 	int bootopt, newpanic;
 	static char buf[256];
 
 	/*
 	 * 'fmt' must not be NULL as it is put into 'panicstr' which is then
 	 * used as a flag to detect if the kernel has panicked.  Also, although
 	 * vsnprintf() supports a NULL 'fmt' argument, use a more informative
 	 * message.
 	 */
 	if (fmt == NULL)
 		fmt = "<no panic string!>";
 
 	spinlock_enter();
 
 #ifdef SMP
 	/*
 	 * stop_cpus_hard(other_cpus) should prevent multiple CPUs from
 	 * concurrently entering panic.  Only the winner will proceed
 	 * further.
 	 */
 	if (!KERNEL_PANICKED() && !kdb_active) {
 		other_cpus = all_cpus;
 		CPU_CLR(PCPU_GET(cpuid), &other_cpus);
 		stop_cpus_hard(other_cpus);
 	}
 #endif
 
 	/*
 	 * Ensure that the scheduler is stopped while panicking, even if panic
 	 * has been entered from kdb.
 	 */
-	td->td_stopsched = 1;
+	scheduler_stopped = true;
 
 	bootopt = RB_AUTOBOOT;
 	newpanic = 0;
 	if (KERNEL_PANICKED())
 		bootopt |= RB_NOSYNC;
 	else {
 		bootopt |= RB_DUMP;
 		panicstr = fmt;
 		newpanic = 1;
 	}
 
 	if (newpanic) {
 		(void)vsnprintf(buf, sizeof(buf), fmt, ap);
 		panicstr = buf;
 		cngrab();
 		printf("panic: %s\n", buf);
 	} else {
 		printf("panic: ");
 		vprintf(fmt, ap);
 		printf("\n");
 	}
 #ifdef SMP
 	printf("cpuid = %d\n", PCPU_GET(cpuid));
 #endif
 	printf("time = %jd\n", (intmax_t )time_second);
 #ifdef KDB
 	if ((newpanic || trace_all_panics) && trace_on_panic)
 		kdb_backtrace();
 	if (debugger_on_panic)
 		kdb_enter(KDB_WHY_PANIC, "panic");
 	else if (!newpanic && debugger_on_recursive_panic)
 		kdb_enter(KDB_WHY_PANIC, "re-panic");
 #endif
 	/*thread_lock(td); */
 	td->td_flags |= TDF_INPANIC;
 	/* thread_unlock(td); */
 	if (!sync_on_panic)
 		bootopt |= RB_NOSYNC;
 	if (poweroff_on_panic)
 		bootopt |= RB_POWEROFF;
 	if (powercycle_on_panic)
 		bootopt |= RB_POWERCYCLE;
 	kern_reboot(bootopt);
 }
 
 /*
  * Support for poweroff delay.
  *
  * Please note that setting this delay too short might power off your machine
  * before the write cache on your hard disk has been flushed, leading to
  * soft-updates inconsistencies.
  */
 #ifndef POWEROFF_DELAY
 # define POWEROFF_DELAY 5000
 #endif
 static int poweroff_delay = POWEROFF_DELAY;
 
 SYSCTL_INT(_kern_shutdown, OID_AUTO, poweroff_delay, CTLFLAG_RW,
     &poweroff_delay, 0, "Delay before poweroff to write disk caches (msec)");
 
 static void
 poweroff_wait(void *junk, int howto)
 {
 
 	if ((howto & (RB_POWEROFF | RB_POWERCYCLE)) == 0 || poweroff_delay <= 0)
 		return;
 	DELAY(poweroff_delay * 1000);
 }
 
 /*
  * Some system processes (e.g. syncer) need to be stopped at appropriate
  * points in their main loops prior to a system shutdown, so that they
  * won't interfere with the shutdown process (e.g. by holding a disk buf
  * to cause sync to fail).  For each of these system processes, register
  * shutdown_kproc() as a handler for one of shutdown events.
  */
 static int kproc_shutdown_wait = 60;
 SYSCTL_INT(_kern_shutdown, OID_AUTO, kproc_shutdown_wait, CTLFLAG_RW,
     &kproc_shutdown_wait, 0, "Max wait time (sec) to stop for each process");
 
 void
 kproc_shutdown(void *arg, int howto)
 {
 	struct proc *p;
 	int error;
 
 	if (SCHEDULER_STOPPED())
 		return;
 
 	p = (struct proc *)arg;
 	printf("Waiting (max %d seconds) for system process `%s' to stop... ",
 	    kproc_shutdown_wait, p->p_comm);
 	error = kproc_suspend(p, kproc_shutdown_wait * hz);
 
 	if (error == EWOULDBLOCK)
 		printf("timed out\n");
 	else
 		printf("done\n");
 }
 
 void
 kthread_shutdown(void *arg, int howto)
 {
 	struct thread *td;
 	int error;
 
 	if (SCHEDULER_STOPPED())
 		return;
 
 	td = (struct thread *)arg;
 	printf("Waiting (max %d seconds) for system thread `%s' to stop... ",
 	    kproc_shutdown_wait, td->td_name);
 	error = kthread_suspend(td, kproc_shutdown_wait * hz);
 
 	if (error == EWOULDBLOCK)
 		printf("timed out\n");
 	else
 		printf("done\n");
 }
 
 static int
 dumpdevname_sysctl_handler(SYSCTL_HANDLER_ARGS)
 {
 	char buf[256];
 	struct dumperinfo *di;
 	struct sbuf sb;
 	int error;
 
 	error = sysctl_wire_old_buffer(req, 0);
 	if (error != 0)
 		return (error);
 
 	sbuf_new_for_sysctl(&sb, buf, sizeof(buf), req);
 
 	mtx_lock(&dumpconf_list_lk);
 	TAILQ_FOREACH(di, &dumper_configs, di_next) {
 		if (di != TAILQ_FIRST(&dumper_configs))
 			sbuf_putc(&sb, ',');
 		sbuf_cat(&sb, di->di_devname);
 	}
 	mtx_unlock(&dumpconf_list_lk);
 
 	error = sbuf_finish(&sb);
 	sbuf_delete(&sb);
 	return (error);
 }
 SYSCTL_PROC(_kern_shutdown, OID_AUTO, dumpdevname,
     CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, &dumper_configs, 0,
     dumpdevname_sysctl_handler, "A",
     "Device(s) for kernel dumps");
 
 static int _dump_append(struct dumperinfo *di, void *virtual, size_t length);
 
 #ifdef EKCD
 static struct kerneldumpcrypto *
 kerneldumpcrypto_create(size_t blocksize, uint8_t encryption,
     const uint8_t *key, uint32_t encryptedkeysize, const uint8_t *encryptedkey)
 {
 	struct kerneldumpcrypto *kdc;
 	struct kerneldumpkey *kdk;
 	uint32_t dumpkeysize;
 
 	dumpkeysize = roundup2(sizeof(*kdk) + encryptedkeysize, blocksize);
 	kdc = malloc(sizeof(*kdc) + dumpkeysize, M_EKCD, M_WAITOK | M_ZERO);
 
 	arc4rand(kdc->kdc_iv, sizeof(kdc->kdc_iv), 0);
 
 	kdc->kdc_encryption = encryption;
 	switch (kdc->kdc_encryption) {
 	case KERNELDUMP_ENC_AES_256_CBC:
 		if (rijndael_makeKey(&kdc->kdc_ki, DIR_ENCRYPT, 256, key) <= 0)
 			goto failed;
 		break;
 	case KERNELDUMP_ENC_CHACHA20:
 		chacha_keysetup(&kdc->kdc_chacha, key, 256);
 		break;
 	default:
 		goto failed;
 	}
 
 	kdc->kdc_dumpkeysize = dumpkeysize;
 	kdk = kdc->kdc_dumpkey;
 	kdk->kdk_encryption = kdc->kdc_encryption;
 	memcpy(kdk->kdk_iv, kdc->kdc_iv, sizeof(kdk->kdk_iv));
 	kdk->kdk_encryptedkeysize = htod32(encryptedkeysize);
 	memcpy(kdk->kdk_encryptedkey, encryptedkey, encryptedkeysize);
 
 	return (kdc);
 failed:
 	zfree(kdc, M_EKCD);
 	return (NULL);
 }
 
 static int
 kerneldumpcrypto_init(struct kerneldumpcrypto *kdc)
 {
 	uint8_t hash[SHA256_DIGEST_LENGTH];
 	SHA256_CTX ctx;
 	struct kerneldumpkey *kdk;
 	int error;
 
 	error = 0;
 
 	if (kdc == NULL)
 		return (0);
 
 	/*
 	 * When a user enters ddb it can write a crash dump multiple times.
 	 * Each time it should be encrypted using a different IV.
 	 */
 	SHA256_Init(&ctx);
 	SHA256_Update(&ctx, kdc->kdc_iv, sizeof(kdc->kdc_iv));
 	SHA256_Final(hash, &ctx);
 	bcopy(hash, kdc->kdc_iv, sizeof(kdc->kdc_iv));
 
 	switch (kdc->kdc_encryption) {
 	case KERNELDUMP_ENC_AES_256_CBC:
 		if (rijndael_cipherInit(&kdc->kdc_ci, MODE_CBC,
 		    kdc->kdc_iv) <= 0) {
 			error = EINVAL;
 			goto out;
 		}
 		break;
 	case KERNELDUMP_ENC_CHACHA20:
 		chacha_ivsetup(&kdc->kdc_chacha, kdc->kdc_iv, NULL);
 		break;
 	default:
 		error = EINVAL;
 		goto out;
 	}
 
 	kdk = kdc->kdc_dumpkey;
 	memcpy(kdk->kdk_iv, kdc->kdc_iv, sizeof(kdk->kdk_iv));
 out:
 	explicit_bzero(hash, sizeof(hash));
 	return (error);
 }
 
 static uint32_t
 kerneldumpcrypto_dumpkeysize(const struct kerneldumpcrypto *kdc)
 {
 
 	if (kdc == NULL)
 		return (0);
 	return (kdc->kdc_dumpkeysize);
 }
 #endif /* EKCD */
 
 static struct kerneldumpcomp *
 kerneldumpcomp_create(struct dumperinfo *di, uint8_t compression)
 {
 	struct kerneldumpcomp *kdcomp;
 	int format;
 
 	switch (compression) {
 	case KERNELDUMP_COMP_GZIP:
 		format = COMPRESS_GZIP;
 		break;
 	case KERNELDUMP_COMP_ZSTD:
 		format = COMPRESS_ZSTD;
 		break;
 	default:
 		return (NULL);
 	}
 
 	kdcomp = malloc(sizeof(*kdcomp), M_DUMPER, M_WAITOK | M_ZERO);
 	kdcomp->kdc_format = compression;
 	kdcomp->kdc_stream = compressor_init(kerneldumpcomp_write_cb,
 	    format, di->maxiosize, kerneldump_gzlevel, di);
 	if (kdcomp->kdc_stream == NULL) {
 		free(kdcomp, M_DUMPER);
 		return (NULL);
 	}
 	kdcomp->kdc_buf = malloc(di->maxiosize, M_DUMPER, M_WAITOK | M_NODUMP);
 	return (kdcomp);
 }
 
 static void
 kerneldumpcomp_destroy(struct dumperinfo *di)
 {
 	struct kerneldumpcomp *kdcomp;
 
 	kdcomp = di->kdcomp;
 	if (kdcomp == NULL)
 		return;
 	compressor_fini(kdcomp->kdc_stream);
 	zfree(kdcomp->kdc_buf, M_DUMPER);
 	free(kdcomp, M_DUMPER);
 }
 
 /*
  * Free a dumper. Must not be present on global list.
  */
 void
 dumper_destroy(struct dumperinfo *di)
 {
 
 	if (di == NULL)
 		return;
 
 	zfree(di->blockbuf, M_DUMPER);
 	kerneldumpcomp_destroy(di);
 #ifdef EKCD
 	zfree(di->kdcrypto, M_EKCD);
 #endif
 	zfree(di, M_DUMPER);
 }
 
 /*
  * Allocate and set up a new dumper from the provided template.
  */
 int
 dumper_create(const struct dumperinfo *di_template, const char *devname,
     const struct diocskerneldump_arg *kda, struct dumperinfo **dip)
 {
 	struct dumperinfo *newdi;
 	int error = 0;
 
 	if (dip == NULL)
 		return (EINVAL);
 
 	/* Allocate a new dumper */
 	newdi = malloc(sizeof(*newdi) + strlen(devname) + 1, M_DUMPER,
 	    M_WAITOK | M_ZERO);
 	memcpy(newdi, di_template, sizeof(*newdi));
 	newdi->blockbuf = NULL;
 	newdi->kdcrypto = NULL;
 	newdi->kdcomp = NULL;
 	strcpy(newdi->di_devname, devname);
 
 	if (kda->kda_encryption != KERNELDUMP_ENC_NONE) {
 #ifdef EKCD
 		newdi->kdcrypto = kerneldumpcrypto_create(newdi->blocksize,
 		    kda->kda_encryption, kda->kda_key,
 		    kda->kda_encryptedkeysize, kda->kda_encryptedkey);
 		if (newdi->kdcrypto == NULL) {
 			error = EINVAL;
 			goto cleanup;
 		}
 #else
 		error = EOPNOTSUPP;
 		goto cleanup;
 #endif
 	}
 	if (kda->kda_compression != KERNELDUMP_COMP_NONE) {
 #ifdef EKCD
 		/*
 		 * We can't support simultaneous unpadded block cipher
 		 * encryption and compression because there is no guarantee the
 		 * length of the compressed result is exactly a multiple of the
 		 * cipher block size.
 		 */
 		if (kda->kda_encryption == KERNELDUMP_ENC_AES_256_CBC) {
 			error = EOPNOTSUPP;
 			goto cleanup;
 		}
 #endif
 		newdi->kdcomp = kerneldumpcomp_create(newdi,
 		    kda->kda_compression);
 		if (newdi->kdcomp == NULL) {
 			error = EINVAL;
 			goto cleanup;
 		}
 	}
 	newdi->blockbuf = malloc(newdi->blocksize, M_DUMPER, M_WAITOK | M_ZERO);
 
 	*dip = newdi;
 	return (0);
 cleanup:
 	dumper_destroy(newdi);
 	return (error);
 }
 
 /*
  * Create a new dumper and register it in the global list.
  */
 int
 dumper_insert(const struct dumperinfo *di_template, const char *devname,
     const struct diocskerneldump_arg *kda)
 {
 	struct dumperinfo *newdi, *listdi;
 	bool inserted;
 	uint8_t index;
 	int error;
 
 	index = kda->kda_index;
 	MPASS(index != KDA_REMOVE && index != KDA_REMOVE_DEV &&
 	    index != KDA_REMOVE_ALL);
 
 	error = priv_check(curthread, PRIV_SETDUMPER);
 	if (error != 0)
 		return (error);
 
 	error = dumper_create(di_template, devname, kda, &newdi);
 	if (error != 0)
 		return (error);
 
 	/* Add the new configuration to the queue */
 	mtx_lock(&dumpconf_list_lk);
 	inserted = false;
 	TAILQ_FOREACH(listdi, &dumper_configs, di_next) {
 		if (index == 0) {
 			TAILQ_INSERT_BEFORE(listdi, newdi, di_next);
 			inserted = true;
 			break;
 		}
 		index--;
 	}
 	if (!inserted)
 		TAILQ_INSERT_TAIL(&dumper_configs, newdi, di_next);
 	mtx_unlock(&dumpconf_list_lk);
 
 	return (0);
 }
 
 #ifdef DDB
 void
 dumper_ddb_insert(struct dumperinfo *newdi)
 {
 	TAILQ_INSERT_HEAD(&dumper_configs, newdi, di_next);
 }
 
 void
 dumper_ddb_remove(struct dumperinfo *di)
 {
 	TAILQ_REMOVE(&dumper_configs, di, di_next);
 }
 #endif
 
 static bool
 dumper_config_match(const struct dumperinfo *di, const char *devname,
     const struct diocskerneldump_arg *kda)
 {
 	if (kda->kda_index == KDA_REMOVE_ALL)
 		return (true);
 
 	if (strcmp(di->di_devname, devname) != 0)
 		return (false);
 
 	/*
 	 * Allow wildcard removal of configs matching a device on g_dev_orphan.
 	 */
 	if (kda->kda_index == KDA_REMOVE_DEV)
 		return (true);
 
 	if (di->kdcomp != NULL) {
 		if (di->kdcomp->kdc_format != kda->kda_compression)
 			return (false);
 	} else if (kda->kda_compression != KERNELDUMP_COMP_NONE)
 		return (false);
 #ifdef EKCD
 	if (di->kdcrypto != NULL) {
 		if (di->kdcrypto->kdc_encryption != kda->kda_encryption)
 			return (false);
 		/*
 		 * Do we care to verify keys match to delete?  It seems weird
 		 * to expect multiple fallback dump configurations on the same
 		 * device that only differ in crypto key.
 		 */
 	} else
 #endif
 		if (kda->kda_encryption != KERNELDUMP_ENC_NONE)
 			return (false);
 
 	return (true);
 }
 
 /*
  * Remove and free the requested dumper(s) from the global list.
  */
 int
 dumper_remove(const char *devname, const struct diocskerneldump_arg *kda)
 {
 	struct dumperinfo *di, *sdi;
 	bool found;
 	int error;
 
 	error = priv_check(curthread, PRIV_SETDUMPER);
 	if (error != 0)
 		return (error);
 
 	/*
 	 * Try to find a matching configuration, and kill it.
 	 *
 	 * NULL 'kda' indicates remove any configuration matching 'devname',
 	 * which may remove multiple configurations in atypical configurations.
 	 */
 	found = false;
 	mtx_lock(&dumpconf_list_lk);
 	TAILQ_FOREACH_SAFE(di, &dumper_configs, di_next, sdi) {
 		if (dumper_config_match(di, devname, kda)) {
 			found = true;
 			TAILQ_REMOVE(&dumper_configs, di, di_next);
 			dumper_destroy(di);
 		}
 	}
 	mtx_unlock(&dumpconf_list_lk);
 
 	/* Only produce ENOENT if a more targeted match didn't match. */
 	if (!found && kda->kda_index == KDA_REMOVE)
 		return (ENOENT);
 	return (0);
 }
 
 static int
 dump_check_bounds(struct dumperinfo *di, off_t offset, size_t length)
 {
 
 	if (di->mediasize > 0 && length != 0 && (offset < di->mediaoffset ||
 	    offset - di->mediaoffset + length > di->mediasize)) {
 		if (di->kdcomp != NULL && offset >= di->mediaoffset) {
 			printf(
 		    "Compressed dump failed to fit in device boundaries.\n");
 			return (E2BIG);
 		}
 
 		printf("Attempt to write outside dump device boundaries.\n"
 	    "offset(%jd), mediaoffset(%jd), length(%ju), mediasize(%jd).\n",
 		    (intmax_t)offset, (intmax_t)di->mediaoffset,
 		    (uintmax_t)length, (intmax_t)di->mediasize);
 		return (ENOSPC);
 	}
 	if (length % di->blocksize != 0) {
 		printf("Attempt to write partial block of length %ju.\n",
 		    (uintmax_t)length);
 		return (EINVAL);
 	}
 	if (offset % di->blocksize != 0) {
 		printf("Attempt to write at unaligned offset %jd.\n",
 		    (intmax_t)offset);
 		return (EINVAL);
 	}
 
 	return (0);
 }
 
 #ifdef EKCD
 static int
 dump_encrypt(struct kerneldumpcrypto *kdc, uint8_t *buf, size_t size)
 {
 
 	switch (kdc->kdc_encryption) {
 	case KERNELDUMP_ENC_AES_256_CBC:
 		if (rijndael_blockEncrypt(&kdc->kdc_ci, &kdc->kdc_ki, buf,
 		    8 * size, buf) <= 0) {
 			return (EIO);
 		}
 		if (rijndael_cipherInit(&kdc->kdc_ci, MODE_CBC,
 		    buf + size - 16 /* IV size for AES-256-CBC */) <= 0) {
 			return (EIO);
 		}
 		break;
 	case KERNELDUMP_ENC_CHACHA20:
 		chacha_encrypt_bytes(&kdc->kdc_chacha, buf, buf, size);
 		break;
 	default:
 		return (EINVAL);
 	}
 
 	return (0);
 }
 
 /* Encrypt data and call dumper. */
 static int
 dump_encrypted_write(struct dumperinfo *di, void *virtual, off_t offset,
     size_t length)
 {
 	static uint8_t buf[KERNELDUMP_BUFFER_SIZE];
 	struct kerneldumpcrypto *kdc;
 	int error;
 	size_t nbytes;
 
 	kdc = di->kdcrypto;
 
 	while (length > 0) {
 		nbytes = MIN(length, sizeof(buf));
 		bcopy(virtual, buf, nbytes);
 
 		if (dump_encrypt(kdc, buf, nbytes) != 0)
 			return (EIO);
 
 		error = dump_write(di, buf, offset, nbytes);
 		if (error != 0)
 			return (error);
 
 		offset += nbytes;
 		virtual = (void *)((uint8_t *)virtual + nbytes);
 		length -= nbytes;
 	}
 
 	return (0);
 }
 #endif /* EKCD */
 
 static int
 kerneldumpcomp_write_cb(void *base, size_t length, off_t offset, void *arg)
 {
 	struct dumperinfo *di;
 	size_t resid, rlength;
 	int error;
 
 	di = arg;
 
 	if (length % di->blocksize != 0) {
 		/*
 		 * This must be the final write after flushing the compression
 		 * stream. Write as many full blocks as possible and stash the
 		 * residual data in the dumper's block buffer. It will be
 		 * padded and written in dump_finish().
 		 */
 		rlength = rounddown(length, di->blocksize);
 		if (rlength != 0) {
 			error = _dump_append(di, base, rlength);
 			if (error != 0)
 				return (error);
 		}
 		resid = length - rlength;
 		memmove(di->blockbuf, (uint8_t *)base + rlength, resid);
 		bzero((uint8_t *)di->blockbuf + resid, di->blocksize - resid);
 		di->kdcomp->kdc_resid = resid;
 		return (EAGAIN);
 	}
 	return (_dump_append(di, base, length));
 }
 
 /*
  * Write kernel dump headers at the beginning and end of the dump extent.
  * Write the kernel dump encryption key after the leading header if we were
  * configured to do so.
  */
 static int
 dump_write_headers(struct dumperinfo *di, struct kerneldumpheader *kdh)
 {
 #ifdef EKCD
 	struct kerneldumpcrypto *kdc;
 #endif
 	void *buf;
 	size_t hdrsz;
 	uint64_t extent;
 	uint32_t keysize;
 	int error;
 
 	hdrsz = sizeof(*kdh);
 	if (hdrsz > di->blocksize)
 		return (ENOMEM);
 
 #ifdef EKCD
 	kdc = di->kdcrypto;
 	keysize = kerneldumpcrypto_dumpkeysize(kdc);
 #else
 	keysize = 0;
 #endif
 
 	/*
 	 * If the dump device has special handling for headers, let it take care
 	 * of writing them out.
 	 */
 	if (di->dumper_hdr != NULL)
 		return (di->dumper_hdr(di, kdh));
 
 	if (hdrsz == di->blocksize)
 		buf = kdh;
 	else {
 		buf = di->blockbuf;
 		memset(buf, 0, di->blocksize);
 		memcpy(buf, kdh, hdrsz);
 	}
 
 	extent = dtoh64(kdh->dumpextent);
 #ifdef EKCD
 	if (kdc != NULL) {
 		error = dump_write(di, kdc->kdc_dumpkey,
 		    di->mediaoffset + di->mediasize - di->blocksize - extent -
 		    keysize, keysize);
 		if (error != 0)
 			return (error);
 	}
 #endif
 
 	error = dump_write(di, buf,
 	    di->mediaoffset + di->mediasize - 2 * di->blocksize - extent -
 	    keysize, di->blocksize);
 	if (error == 0)
 		error = dump_write(di, buf, di->mediaoffset + di->mediasize -
 		    di->blocksize, di->blocksize);
 	return (error);
 }
 
 /*
  * Don't touch the first SIZEOF_METADATA bytes on the dump device.  This is to
  * protect us from metadata and metadata from us.
  */
 #define	SIZEOF_METADATA		(64 * 1024)
 
 /*
  * Do some preliminary setup for a kernel dump: initialize state for encryption,
  * if requested, and make sure that we have enough space on the dump device.
  *
  * We set things up so that the dump ends before the last sector of the dump
  * device, at which the trailing header is written.
  *
  *     +-----------+------+-----+----------------------------+------+
  *     |           | lhdr | key |    ... kernel dump ...     | thdr |
  *     +-----------+------+-----+----------------------------+------+
  *                   1 blk  opt <------- dump extent --------> 1 blk
  *
  * Dumps written using dump_append() start at the beginning of the extent.
  * Uncompressed dumps will use the entire extent, but compressed dumps typically
  * will not. The true length of the dump is recorded in the leading and trailing
  * headers once the dump has been completed.
  *
  * The dump device may provide a callback, in which case it will initialize
  * dumpoff and take care of laying out the headers.
  */
 int
 dump_start(struct dumperinfo *di, struct kerneldumpheader *kdh)
 {
 #ifdef EKCD
 	struct kerneldumpcrypto *kdc;
 #endif
 	void *key;
 	uint64_t dumpextent, span;
 	uint32_t keysize;
 	int error;
 
 #ifdef EKCD
 	/* Send the key before the dump so a partial dump is still usable. */
 	kdc = di->kdcrypto;
 	error = kerneldumpcrypto_init(kdc);
 	if (error != 0)
 		return (error);
 	keysize = kerneldumpcrypto_dumpkeysize(kdc);
 	key = keysize > 0 ? kdc->kdc_dumpkey : NULL;
 #else
 	error = 0;
 	keysize = 0;
 	key = NULL;
 #endif
 
 	if (di->dumper_start != NULL) {
 		error = di->dumper_start(di, key, keysize);
 	} else {
 		dumpextent = dtoh64(kdh->dumpextent);
 		span = SIZEOF_METADATA + dumpextent + 2 * di->blocksize +
 		    keysize;
 		if (di->mediasize < span) {
 			if (di->kdcomp == NULL)
 				return (E2BIG);
 
 			/*
 			 * We don't yet know how much space the compressed dump
 			 * will occupy, so try to use the whole swap partition
 			 * (minus the first 64KB) in the hope that the
 			 * compressed dump will fit. If that doesn't turn out to
 			 * be enough, the bounds checking in dump_write()
 			 * will catch us and cause the dump to fail.
 			 */
 			dumpextent = di->mediasize - span + dumpextent;
 			kdh->dumpextent = htod64(dumpextent);
 		}
 
 		/*
 		 * The offset at which to begin writing the dump.
 		 */
 		di->dumpoff = di->mediaoffset + di->mediasize - di->blocksize -
 		    dumpextent;
 	}
 	di->origdumpoff = di->dumpoff;
 	return (error);
 }
 
 static int
 _dump_append(struct dumperinfo *di, void *virtual, size_t length)
 {
 	int error;
 
 #ifdef EKCD
 	if (di->kdcrypto != NULL)
 		error = dump_encrypted_write(di, virtual, di->dumpoff, length);
 	else
 #endif
 		error = dump_write(di, virtual, di->dumpoff, length);
 	if (error == 0)
 		di->dumpoff += length;
 	return (error);
 }
 
 /*
  * Write to the dump device starting at dumpoff. When compression is enabled,
  * writes to the device will be performed using a callback that gets invoked
  * when the compression stream's output buffer is full.
  */
 int
 dump_append(struct dumperinfo *di, void *virtual, size_t length)
 {
 	void *buf;
 
 	if (di->kdcomp != NULL) {
 		/* Bounce through a buffer to avoid CRC errors. */
 		if (length > di->maxiosize)
 			return (EINVAL);
 		buf = di->kdcomp->kdc_buf;
 		memmove(buf, virtual, length);
 		return (compressor_write(di->kdcomp->kdc_stream, buf, length));
 	}
 	return (_dump_append(di, virtual, length));
 }
 
 /*
  * Write to the dump device at the specified offset.
  */
 int
 dump_write(struct dumperinfo *di, void *virtual, off_t offset, size_t length)
 {
 	int error;
 
 	error = dump_check_bounds(di, offset, length);
 	if (error != 0)
 		return (error);
 	return (di->dumper(di->priv, virtual, offset, length));
 }
 
 /*
  * Perform kernel dump finalization: flush the compression stream, if necessary,
  * write the leading and trailing kernel dump headers now that we know the true
  * length of the dump, and optionally write the encryption key following the
  * leading header.
  */
 int
 dump_finish(struct dumperinfo *di, struct kerneldumpheader *kdh)
 {
 	int error;
 
 	if (di->kdcomp != NULL) {
 		error = compressor_flush(di->kdcomp->kdc_stream);
 		if (error == EAGAIN) {
 			/* We have residual data in di->blockbuf. */
 			error = _dump_append(di, di->blockbuf, di->blocksize);
 			if (error == 0)
 				/* Compensate for _dump_append()'s adjustment. */
 				di->dumpoff -= di->blocksize - di->kdcomp->kdc_resid;
 			di->kdcomp->kdc_resid = 0;
 		}
 		if (error != 0)
 			return (error);
 
 		/*
 		 * We now know the size of the compressed dump, so update the
 		 * header accordingly and recompute parity.
 		 */
 		kdh->dumplength = htod64(di->dumpoff - di->origdumpoff);
 		kdh->parity = 0;
 		kdh->parity = kerneldump_parity(kdh);
 
 		compressor_reset(di->kdcomp->kdc_stream);
 	}
 
 	error = dump_write_headers(di, kdh);
 	if (error != 0)
 		return (error);
 
 	(void)dump_write(di, NULL, 0, 0);
 	return (0);
 }
 
 void
 dump_init_header(const struct dumperinfo *di, struct kerneldumpheader *kdh,
     const char *magic, uint32_t archver, uint64_t dumplen)
 {
 	size_t dstsize;
 
 	bzero(kdh, sizeof(*kdh));
 	strlcpy(kdh->magic, magic, sizeof(kdh->magic));
 	strlcpy(kdh->architecture, MACHINE_ARCH, sizeof(kdh->architecture));
 	kdh->version = htod32(KERNELDUMPVERSION);
 	kdh->architectureversion = htod32(archver);
 	kdh->dumplength = htod64(dumplen);
 	kdh->dumpextent = kdh->dumplength;
 	kdh->dumptime = htod64(time_second);
 #ifdef EKCD
 	kdh->dumpkeysize = htod32(kerneldumpcrypto_dumpkeysize(di->kdcrypto));
 #else
 	kdh->dumpkeysize = 0;
 #endif
 	kdh->blocksize = htod32(di->blocksize);
 	strlcpy(kdh->hostname, prison0.pr_hostname, sizeof(kdh->hostname));
 	dstsize = sizeof(kdh->versionstring);
 	if (strlcpy(kdh->versionstring, version, dstsize) >= dstsize)
 		kdh->versionstring[dstsize - 2] = '\n';
 	if (panicstr != NULL)
 		strlcpy(kdh->panicstring, panicstr, sizeof(kdh->panicstring));
 	if (di->kdcomp != NULL)
 		kdh->compression = di->kdcomp->kdc_format;
 	kdh->parity = kerneldump_parity(kdh);
 }
 
 #ifdef DDB
 DB_SHOW_COMMAND_FLAGS(panic, db_show_panic, DB_CMD_MEMSAFE)
 {
 
 	if (panicstr == NULL)
 		db_printf("panicstr not set\n");
 	else
 		db_printf("panic: %s\n", panicstr);
 }
 #endif
diff --git a/sys/kern/kern_sx.c b/sys/kern/kern_sx.c
index 706ae90ef9af..d302fa45161e 100644
--- a/sys/kern/kern_sx.c
+++ b/sys/kern/kern_sx.c
@@ -1,1575 +1,1575 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause
  *
  * Copyright (c) 2007 Attilio Rao <attilio@freebsd.org>
  * Copyright (c) 2001 Jason Evans <jasone@freebsd.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice(s), this list of conditions and the following disclaimer as
  *    the first lines of this file unmodified other than the possible
  *    addition of one or more copyright notices.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice(s), this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY
  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  * DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY
  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
  * DAMAGE.
  */
 
 /*
  * Shared/exclusive locks.  This implementation attempts to ensure
  * deterministic lock granting behavior, so that slocks and xlocks are
  * interleaved.
  *
  * Priority propagation will not generally raise the priority of lock holders,
  * so should not be relied upon in combination with sx locks.
  */
 
 #include "opt_ddb.h"
 #include "opt_hwpmc_hooks.h"
 #include "opt_no_adaptive_sx.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kdb.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/sched.h>
 #include <sys/sleepqueue.h>
 #include <sys/sx.h>
 #include <sys/smp.h>
 #include <sys/sysctl.h>
 
 #if defined(SMP) && !defined(NO_ADAPTIVE_SX)
 #include <machine/cpu.h>
 #endif
 
 #ifdef DDB
 #include <ddb/ddb.h>
 #endif
 
 #if defined(SMP) && !defined(NO_ADAPTIVE_SX)
 #define	ADAPTIVE_SX
 #endif
 
 #ifdef HWPMC_HOOKS
 #include <sys/pmckern.h>
 PMC_SOFT_DECLARE( , , lock, failed);
 #endif
 
 /* Handy macros for sleep queues. */
 #define	SQ_EXCLUSIVE_QUEUE	0
 #define	SQ_SHARED_QUEUE		1
 
 /*
  * Variations on DROP_GIANT()/PICKUP_GIANT() for use in this file.  We
  * drop Giant anytime we have to sleep or if we adaptively spin.
  */
 #define	GIANT_DECLARE							\
 	int _giantcnt = 0;						\
 	WITNESS_SAVE_DECL(Giant)					\
 
 #define	GIANT_SAVE(work) do {						\
 	if (__predict_false(mtx_owned(&Giant))) {			\
 		work++;							\
 		WITNESS_SAVE(&Giant.lock_object, Giant);		\
 		while (mtx_owned(&Giant)) {				\
 			_giantcnt++;					\
 			mtx_unlock(&Giant);				\
 		}							\
 	}								\
 } while (0)
 
 #define GIANT_RESTORE() do {						\
 	if (_giantcnt > 0) {						\
 		mtx_assert(&Giant, MA_NOTOWNED);			\
 		while (_giantcnt--)					\
 			mtx_lock(&Giant);				\
 		WITNESS_RESTORE(&Giant.lock_object, Giant);		\
 	}								\
 } while (0)
 
 /*
  * Returns true if an exclusive lock is recursed.  It assumes
  * curthread currently has an exclusive lock.
  */
 #define	sx_recursed(sx)		((sx)->sx_recurse != 0)
 
 static void	assert_sx(const struct lock_object *lock, int what);
 #ifdef DDB
 static void	db_show_sx(const struct lock_object *lock);
 #endif
 static void	lock_sx(struct lock_object *lock, uintptr_t how);
 #ifdef KDTRACE_HOOKS
 static int	owner_sx(const struct lock_object *lock, struct thread **owner);
 #endif
 static uintptr_t unlock_sx(struct lock_object *lock);
 
 struct lock_class lock_class_sx = {
 	.lc_name = "sx",
 	.lc_flags = LC_SLEEPLOCK | LC_SLEEPABLE | LC_RECURSABLE | LC_UPGRADABLE,
 	.lc_assert = assert_sx,
 #ifdef DDB
 	.lc_ddb_show = db_show_sx,
 #endif
 	.lc_lock = lock_sx,
 	.lc_unlock = unlock_sx,
 #ifdef KDTRACE_HOOKS
 	.lc_owner = owner_sx,
 #endif
 };
 
 #ifndef INVARIANTS
 #define	_sx_assert(sx, what, file, line)
 #endif
 
 #ifdef ADAPTIVE_SX
 #ifdef SX_CUSTOM_BACKOFF
 static u_short __read_frequently asx_retries;
 static u_short __read_frequently asx_loops;
 static SYSCTL_NODE(_debug, OID_AUTO, sx, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
     "sxlock debugging");
 SYSCTL_U16(_debug_sx, OID_AUTO, retries, CTLFLAG_RW, &asx_retries, 0, "");
 SYSCTL_U16(_debug_sx, OID_AUTO, loops, CTLFLAG_RW, &asx_loops, 0, "");
 
 static struct lock_delay_config __read_frequently sx_delay;
 
 SYSCTL_U16(_debug_sx, OID_AUTO, delay_base, CTLFLAG_RW, &sx_delay.base,
     0, "");
 SYSCTL_U16(_debug_sx, OID_AUTO, delay_max, CTLFLAG_RW, &sx_delay.max,
     0, "");
 
 static void
 sx_lock_delay_init(void *arg __unused)
 {
 
 	lock_delay_default_init(&sx_delay);
 	asx_retries = 10;
 	asx_loops = max(10000, sx_delay.max);
 }
 LOCK_DELAY_SYSINIT(sx_lock_delay_init);
 #else
 #define sx_delay	locks_delay
 #define asx_retries	locks_delay_retries
 #define asx_loops	locks_delay_loops
 #endif
 #endif
 
 void
 assert_sx(const struct lock_object *lock, int what)
 {
 
 	sx_assert((const struct sx *)lock, what);
 }
 
 void
 lock_sx(struct lock_object *lock, uintptr_t how)
 {
 	struct sx *sx;
 
 	sx = (struct sx *)lock;
 	if (how)
 		sx_slock(sx);
 	else
 		sx_xlock(sx);
 }
 
 uintptr_t
 unlock_sx(struct lock_object *lock)
 {
 	struct sx *sx;
 
 	sx = (struct sx *)lock;
 	sx_assert(sx, SA_LOCKED | SA_NOTRECURSED);
 	if (sx_xlocked(sx)) {
 		sx_xunlock(sx);
 		return (0);
 	} else {
 		sx_sunlock(sx);
 		return (1);
 	}
 }
 
 #ifdef KDTRACE_HOOKS
 int
 owner_sx(const struct lock_object *lock, struct thread **owner)
 {
 	const struct sx *sx;
 	uintptr_t x;
 
 	sx = (const struct sx *)lock;
 	x = sx->sx_lock;
 	*owner = NULL;
 	return ((x & SX_LOCK_SHARED) != 0 ? (SX_SHARERS(x) != 0) :
 	    ((*owner = (struct thread *)SX_OWNER(x)) != NULL));
 }
 #endif
 
 void
 sx_sysinit(void *arg)
 {
 	struct sx_args *sargs = arg;
 
 	sx_init_flags(sargs->sa_sx, sargs->sa_desc, sargs->sa_flags);
 }
 
 void
 sx_init_flags(struct sx *sx, const char *description, int opts)
 {
 	int flags;
 
 	MPASS((opts & ~(SX_QUIET | SX_RECURSE | SX_NOWITNESS | SX_DUPOK |
 	    SX_NOPROFILE | SX_NEW)) == 0);
 	ASSERT_ATOMIC_LOAD_PTR(sx->sx_lock,
 	    ("%s: sx_lock not aligned for %s: %p", __func__, description,
 	    &sx->sx_lock));
 
 	flags = LO_SLEEPABLE | LO_UPGRADABLE;
 	if (opts & SX_DUPOK)
 		flags |= LO_DUPOK;
 	if (opts & SX_NOPROFILE)
 		flags |= LO_NOPROFILE;
 	if (!(opts & SX_NOWITNESS))
 		flags |= LO_WITNESS;
 	if (opts & SX_RECURSE)
 		flags |= LO_RECURSABLE;
 	if (opts & SX_QUIET)
 		flags |= LO_QUIET;
 	if (opts & SX_NEW)
 		flags |= LO_NEW;
 
 	lock_init(&sx->lock_object, &lock_class_sx, description, NULL, flags);
 	sx->sx_lock = SX_LOCK_UNLOCKED;
 	sx->sx_recurse = 0;
 }
 
 void
 sx_destroy(struct sx *sx)
 {
 
 	KASSERT(sx->sx_lock == SX_LOCK_UNLOCKED, ("sx lock still held"));
 	KASSERT(sx->sx_recurse == 0, ("sx lock still recursed"));
 	sx->sx_lock = SX_LOCK_DESTROYED;
 	lock_destroy(&sx->lock_object);
 }
 
 int
 sx_try_slock_int(struct sx *sx LOCK_FILE_LINE_ARG_DEF)
 {
 	uintptr_t x;
 
 	if (SCHEDULER_STOPPED())
 		return (1);
 
 	KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread),
 	    ("sx_try_slock() by idle thread %p on sx %s @ %s:%d",
 	    curthread, sx->lock_object.lo_name, file, line));
 
 	x = sx->sx_lock;
 	for (;;) {
 		KASSERT(x != SX_LOCK_DESTROYED,
 		    ("sx_try_slock() of destroyed sx @ %s:%d", file, line));
 		if (!(x & SX_LOCK_SHARED))
 			break;
 		if (atomic_fcmpset_acq_ptr(&sx->sx_lock, &x, x + SX_ONE_SHARER)) {
 			LOCK_LOG_TRY("SLOCK", &sx->lock_object, 0, 1, file, line);
 			WITNESS_LOCK(&sx->lock_object, LOP_TRYLOCK, file, line);
 			LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(sx__acquire,
 			    sx, 0, 0, file, line, LOCKSTAT_READER);
 			TD_LOCKS_INC(curthread);
 			curthread->td_sx_slocks++;
 			return (1);
 		}
 	}
 
 	LOCK_LOG_TRY("SLOCK", &sx->lock_object, 0, 0, file, line);
 	return (0);
 }
 
 int
 sx_try_slock_(struct sx *sx, const char *file, int line)
 {
 
 	return (sx_try_slock_int(sx LOCK_FILE_LINE_ARG));
 }
 
 int
 _sx_xlock(struct sx *sx, int opts, const char *file, int line)
 {
 	uintptr_t tid, x;
 	int error = 0;
 
 	KASSERT(kdb_active != 0 || SCHEDULER_STOPPED() ||
 	    !TD_IS_IDLETHREAD(curthread),
 	    ("sx_xlock() by idle thread %p on sx %s @ %s:%d",
 	    curthread, sx->lock_object.lo_name, file, line));
 	KASSERT(sx->sx_lock != SX_LOCK_DESTROYED,
 	    ("sx_xlock() of destroyed sx @ %s:%d", file, line));
 	WITNESS_CHECKORDER(&sx->lock_object, LOP_NEWORDER | LOP_EXCLUSIVE, file,
 	    line, NULL);
 	tid = (uintptr_t)curthread;
 	x = SX_LOCK_UNLOCKED;
 	if (!atomic_fcmpset_acq_ptr(&sx->sx_lock, &x, tid))
 		error = _sx_xlock_hard(sx, x, opts LOCK_FILE_LINE_ARG);
 	else
 		LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(sx__acquire, sx,
 		    0, 0, file, line, LOCKSTAT_WRITER);
 	if (!error) {
 		LOCK_LOG_LOCK("XLOCK", &sx->lock_object, 0, sx->sx_recurse,
 		    file, line);
 		WITNESS_LOCK(&sx->lock_object, LOP_EXCLUSIVE, file, line);
 		TD_LOCKS_INC(curthread);
 	}
 
 	return (error);
 }
 
 int
 sx_try_xlock_int(struct sx *sx LOCK_FILE_LINE_ARG_DEF)
 {
 	struct thread *td;
 	uintptr_t tid, x;
 	int rval;
 	bool recursed;
 
 	td = curthread;
 	tid = (uintptr_t)td;
-	if (SCHEDULER_STOPPED_TD(td))
+	if (SCHEDULER_STOPPED())
 		return (1);
 
 	KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(td),
 	    ("sx_try_xlock() by idle thread %p on sx %s @ %s:%d",
 	    curthread, sx->lock_object.lo_name, file, line));
 	KASSERT(sx->sx_lock != SX_LOCK_DESTROYED,
 	    ("sx_try_xlock() of destroyed sx @ %s:%d", file, line));
 
 	rval = 1;
 	recursed = false;
 	x = SX_LOCK_UNLOCKED;
 	for (;;) {
 		if (atomic_fcmpset_acq_ptr(&sx->sx_lock, &x, tid))
 			break;
 		if (x == SX_LOCK_UNLOCKED)
 			continue;
 		if (x == tid && (sx->lock_object.lo_flags & LO_RECURSABLE)) {
 			sx->sx_recurse++;
 			atomic_set_ptr(&sx->sx_lock, SX_LOCK_RECURSED);
 			break;
 		}
 		rval = 0;
 		break;
 	}
 
 	LOCK_LOG_TRY("XLOCK", &sx->lock_object, 0, rval, file, line);
 	if (rval) {
 		WITNESS_LOCK(&sx->lock_object, LOP_EXCLUSIVE | LOP_TRYLOCK,
 		    file, line);
 		if (!recursed)
 			LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(sx__acquire,
 			    sx, 0, 0, file, line, LOCKSTAT_WRITER);
 		TD_LOCKS_INC(curthread);
 	}
 
 	return (rval);
 }
 
 int
 sx_try_xlock_(struct sx *sx, const char *file, int line)
 {
 
 	return (sx_try_xlock_int(sx LOCK_FILE_LINE_ARG));
 }
 
 void
 _sx_xunlock(struct sx *sx, const char *file, int line)
 {
 
 	KASSERT(sx->sx_lock != SX_LOCK_DESTROYED,
 	    ("sx_xunlock() of destroyed sx @ %s:%d", file, line));
 	_sx_assert(sx, SA_XLOCKED, file, line);
 	WITNESS_UNLOCK(&sx->lock_object, LOP_EXCLUSIVE, file, line);
 	LOCK_LOG_LOCK("XUNLOCK", &sx->lock_object, 0, sx->sx_recurse, file,
 	    line);
 #if LOCK_DEBUG > 0
 	_sx_xunlock_hard(sx, (uintptr_t)curthread, file, line);
 #else
 	__sx_xunlock(sx, curthread, file, line);
 #endif
 	TD_LOCKS_DEC(curthread);
 }
 
 /*
  * Try to do a non-blocking upgrade from a shared lock to an exclusive lock.
  * This will only succeed if this thread holds a single shared lock.
  * Return 1 if if the upgrade succeed, 0 otherwise.
  */
 int
 sx_try_upgrade_int(struct sx *sx LOCK_FILE_LINE_ARG_DEF)
 {
 	uintptr_t x;
 	uintptr_t waiters;
 	int success;
 
 	if (SCHEDULER_STOPPED())
 		return (1);
 
 	KASSERT(sx->sx_lock != SX_LOCK_DESTROYED,
 	    ("sx_try_upgrade() of destroyed sx @ %s:%d", file, line));
 	_sx_assert(sx, SA_SLOCKED, file, line);
 
 	/*
 	 * Try to switch from one shared lock to an exclusive lock.  We need
 	 * to maintain the SX_LOCK_EXCLUSIVE_WAITERS flag if set so that
 	 * we will wake up the exclusive waiters when we drop the lock.
 	 */
 	success = 0;
 	x = SX_READ_VALUE(sx);
 	for (;;) {
 		if (SX_SHARERS(x) > 1)
 			break;
 		waiters = (x & SX_LOCK_WAITERS);
 		if (atomic_fcmpset_acq_ptr(&sx->sx_lock, &x,
 		    (uintptr_t)curthread | waiters)) {
 			success = 1;
 			break;
 		}
 	}
 	LOCK_LOG_TRY("XUPGRADE", &sx->lock_object, 0, success, file, line);
 	if (success) {
 		curthread->td_sx_slocks--;
 		WITNESS_UPGRADE(&sx->lock_object, LOP_EXCLUSIVE | LOP_TRYLOCK,
 		    file, line);
 		LOCKSTAT_RECORD0(sx__upgrade, sx);
 	}
 	return (success);
 }
 
 int
 sx_try_upgrade_(struct sx *sx, const char *file, int line)
 {
 
 	return (sx_try_upgrade_int(sx LOCK_FILE_LINE_ARG));
 }
 
 /*
  * Downgrade an unrecursed exclusive lock into a single shared lock.
  */
 void
 sx_downgrade_int(struct sx *sx LOCK_FILE_LINE_ARG_DEF)
 {
 	uintptr_t x;
 	int wakeup_swapper;
 
 	if (SCHEDULER_STOPPED())
 		return;
 
 	KASSERT(sx->sx_lock != SX_LOCK_DESTROYED,
 	    ("sx_downgrade() of destroyed sx @ %s:%d", file, line));
 	_sx_assert(sx, SA_XLOCKED | SA_NOTRECURSED, file, line);
 #ifndef INVARIANTS
 	if (sx_recursed(sx))
 		panic("downgrade of a recursed lock");
 #endif
 
 	WITNESS_DOWNGRADE(&sx->lock_object, 0, file, line);
 
 	/*
 	 * Try to switch from an exclusive lock with no shared waiters
 	 * to one sharer with no shared waiters.  If there are
 	 * exclusive waiters, we don't need to lock the sleep queue so
 	 * long as we preserve the flag.  We do one quick try and if
 	 * that fails we grab the sleepq lock to keep the flags from
 	 * changing and do it the slow way.
 	 *
 	 * We have to lock the sleep queue if there are shared waiters
 	 * so we can wake them up.
 	 */
 	x = sx->sx_lock;
 	if (!(x & SX_LOCK_SHARED_WAITERS) &&
 	    atomic_cmpset_rel_ptr(&sx->sx_lock, x, SX_SHARERS_LOCK(1) |
 	    (x & SX_LOCK_EXCLUSIVE_WAITERS)))
 		goto out;
 
 	/*
 	 * Lock the sleep queue so we can read the waiters bits
 	 * without any races and wakeup any shared waiters.
 	 */
 	sleepq_lock(&sx->lock_object);
 
 	/*
 	 * Preserve SX_LOCK_EXCLUSIVE_WAITERS while downgraded to a single
 	 * shared lock.  If there are any shared waiters, wake them up.
 	 */
 	wakeup_swapper = 0;
 	x = sx->sx_lock;
 	atomic_store_rel_ptr(&sx->sx_lock, SX_SHARERS_LOCK(1) |
 	    (x & SX_LOCK_EXCLUSIVE_WAITERS));
 	if (x & SX_LOCK_SHARED_WAITERS)
 		wakeup_swapper = sleepq_broadcast(&sx->lock_object, SLEEPQ_SX,
 		    0, SQ_SHARED_QUEUE);
 	sleepq_release(&sx->lock_object);
 
 	if (wakeup_swapper)
 		kick_proc0();
 
 out:
 	curthread->td_sx_slocks++;
 	LOCK_LOG_LOCK("XDOWNGRADE", &sx->lock_object, 0, 0, file, line);
 	LOCKSTAT_RECORD0(sx__downgrade, sx);
 }
 
 void
 sx_downgrade_(struct sx *sx, const char *file, int line)
 {
 
 	sx_downgrade_int(sx LOCK_FILE_LINE_ARG);
 }
 
 #ifdef	ADAPTIVE_SX
 static inline void
 sx_drop_critical(uintptr_t x, bool *in_critical, int *extra_work)
 {
 
 	if (x & SX_LOCK_WRITE_SPINNER)
 		return;
 	if (*in_critical) {
 		critical_exit();
 		*in_critical = false;
 		(*extra_work)--;
 	}
 }
 #else
 #define sx_drop_critical(x, in_critical, extra_work) do { } while (0)
 #endif
 
 /*
  * This function represents the so-called 'hard case' for sx_xlock
  * operation.  All 'easy case' failures are redirected to this.  Note
  * that ideally this would be a static function, but it needs to be
  * accessible from at least sx.h.
  */
 int
 _sx_xlock_hard(struct sx *sx, uintptr_t x, int opts LOCK_FILE_LINE_ARG_DEF)
 {
 	GIANT_DECLARE;
 	uintptr_t tid, setx;
 #ifdef ADAPTIVE_SX
 	struct thread *owner;
 	u_int i, n, spintries = 0;
 	enum { READERS, WRITER } sleep_reason = READERS;
 	bool in_critical = false;
 #endif
 #ifdef LOCK_PROFILING
 	uint64_t waittime = 0;
 	int contested = 0;
 #endif
 	int error = 0;
 #if defined(ADAPTIVE_SX) || defined(KDTRACE_HOOKS)
 	struct lock_delay_arg lda;
 #endif
 #ifdef	KDTRACE_HOOKS
 	u_int sleep_cnt = 0;
 	int64_t sleep_time = 0;
 	int64_t all_time = 0;
 #endif
 #if defined(KDTRACE_HOOKS) || defined(LOCK_PROFILING)
 	uintptr_t state = 0;
 	int doing_lockprof = 0;
 #endif
 	int extra_work = 0;
 
 	tid = (uintptr_t)curthread;
 
 #ifdef KDTRACE_HOOKS
 	if (LOCKSTAT_PROFILE_ENABLED(sx__acquire)) {
 		while (x == SX_LOCK_UNLOCKED) {
 			if (atomic_fcmpset_acq_ptr(&sx->sx_lock, &x, tid))
 				goto out_lockstat;
 		}
 		extra_work = 1;
 		doing_lockprof = 1;
 		all_time -= lockstat_nsecs(&sx->lock_object);
 		state = x;
 	}
 #endif
 #ifdef LOCK_PROFILING
 	extra_work = 1;
 	doing_lockprof = 1;
 	state = x;
 #endif
 
 	if (SCHEDULER_STOPPED())
 		return (0);
 
 	if (__predict_false(x == SX_LOCK_UNLOCKED))
 		x = SX_READ_VALUE(sx);
 
 	/* If we already hold an exclusive lock, then recurse. */
 	if (__predict_false(lv_sx_owner(x) == (struct thread *)tid)) {
 		KASSERT((sx->lock_object.lo_flags & LO_RECURSABLE) != 0,
 	    ("_sx_xlock_hard: recursed on non-recursive sx %s @ %s:%d\n",
 		    sx->lock_object.lo_name, file, line));
 		sx->sx_recurse++;
 		atomic_set_ptr(&sx->sx_lock, SX_LOCK_RECURSED);
 		if (LOCK_LOG_TEST(&sx->lock_object, 0))
 			CTR2(KTR_LOCK, "%s: %p recursing", __func__, sx);
 		return (0);
 	}
 
 	if (LOCK_LOG_TEST(&sx->lock_object, 0))
 		CTR5(KTR_LOCK, "%s: %s contested (lock=%p) at %s:%d", __func__,
 		    sx->lock_object.lo_name, (void *)sx->sx_lock, file, line);
 
 #if defined(ADAPTIVE_SX)
 	lock_delay_arg_init(&lda, &sx_delay);
 #elif defined(KDTRACE_HOOKS)
 	lock_delay_arg_init_noadapt(&lda);
 #endif
 
 #ifdef HWPMC_HOOKS
 	PMC_SOFT_CALL( , , lock, failed);
 #endif
 	lock_profile_obtain_lock_failed(&sx->lock_object, false, &contested,
 	    &waittime);
 
 #ifndef INVARIANTS
 	GIANT_SAVE(extra_work);
 #endif
 
 	THREAD_CONTENDS_ON_LOCK(&sx->lock_object);
 
 	for (;;) {
 		if (x == SX_LOCK_UNLOCKED) {
 			if (atomic_fcmpset_acq_ptr(&sx->sx_lock, &x, tid))
 				break;
 			continue;
 		}
 #ifdef INVARIANTS
 		GIANT_SAVE(extra_work);
 #endif
 #ifdef KDTRACE_HOOKS
 		lda.spin_cnt++;
 #endif
 #ifdef ADAPTIVE_SX
 		if (x == (SX_LOCK_SHARED | SX_LOCK_WRITE_SPINNER)) {
 			if (atomic_fcmpset_acq_ptr(&sx->sx_lock, &x, tid))
 				break;
 			continue;
 		}
 
 		/*
 		 * If the lock is write locked and the owner is
 		 * running on another CPU, spin until the owner stops
 		 * running or the state of the lock changes.
 		 */
 		if ((x & SX_LOCK_SHARED) == 0) {
 			sx_drop_critical(x, &in_critical, &extra_work);
 			sleep_reason = WRITER;
 			owner = lv_sx_owner(x);
 			if (!TD_IS_RUNNING(owner))
 				goto sleepq;
 			if (LOCK_LOG_TEST(&sx->lock_object, 0))
 				CTR3(KTR_LOCK, "%s: spinning on %p held by %p",
 				    __func__, sx, owner);
 			KTR_STATE1(KTR_SCHED, "thread", sched_tdname(curthread),
 			    "spinning", "lockname:\"%s\"",
 			    sx->lock_object.lo_name);
 			do {
 				lock_delay(&lda);
 				x = SX_READ_VALUE(sx);
 				owner = lv_sx_owner(x);
 			} while (owner != NULL && TD_IS_RUNNING(owner));
 			KTR_STATE0(KTR_SCHED, "thread", sched_tdname(curthread),
 			    "running");
 			continue;
 		} else if (SX_SHARERS(x) > 0) {
 			sleep_reason = READERS;
 			if (spintries == asx_retries)
 				goto sleepq;
 			if (!(x & SX_LOCK_WRITE_SPINNER)) {
 				if (!in_critical) {
 					critical_enter();
 					in_critical = true;
 					extra_work++;
 				}
 				if (!atomic_fcmpset_ptr(&sx->sx_lock, &x,
 				    x | SX_LOCK_WRITE_SPINNER)) {
 					critical_exit();
 					in_critical = false;
 					extra_work--;
 					continue;
 				}
 			}
 			spintries++;
 			KTR_STATE1(KTR_SCHED, "thread", sched_tdname(curthread),
 			    "spinning", "lockname:\"%s\"",
 			    sx->lock_object.lo_name);
 			n = SX_SHARERS(x);
 			for (i = 0; i < asx_loops; i += n) {
 				lock_delay_spin(n);
 				x = SX_READ_VALUE(sx);
 				if (!(x & SX_LOCK_WRITE_SPINNER))
 					break;
 				if (!(x & SX_LOCK_SHARED))
 					break;
 				n = SX_SHARERS(x);
 				if (n == 0)
 					break;
 			}
 #ifdef KDTRACE_HOOKS
 			lda.spin_cnt += i;
 #endif
 			KTR_STATE0(KTR_SCHED, "thread", sched_tdname(curthread),
 			    "running");
 			if (i < asx_loops)
 				continue;
 		}
 sleepq:
 #endif
 		sleepq_lock(&sx->lock_object);
 		x = SX_READ_VALUE(sx);
 retry_sleepq:
 
 		/*
 		 * If the lock was released while spinning on the
 		 * sleep queue chain lock, try again.
 		 */
 		if (x == SX_LOCK_UNLOCKED) {
 			sleepq_release(&sx->lock_object);
 			sx_drop_critical(x, &in_critical, &extra_work);
 			continue;
 		}
 
 #ifdef ADAPTIVE_SX
 		/*
 		 * The current lock owner might have started executing
 		 * on another CPU (or the lock could have changed
 		 * owners) while we were waiting on the sleep queue
 		 * chain lock.  If so, drop the sleep queue lock and try
 		 * again.
 		 */
 		if (!(x & SX_LOCK_SHARED)) {
 			owner = (struct thread *)SX_OWNER(x);
 			if (TD_IS_RUNNING(owner)) {
 				sleepq_release(&sx->lock_object);
 				sx_drop_critical(x, &in_critical,
 				    &extra_work);
 				continue;
 			}
 		} else if (SX_SHARERS(x) > 0 && sleep_reason == WRITER) {
 			sleepq_release(&sx->lock_object);
 			sx_drop_critical(x, &in_critical, &extra_work);
 			continue;
 		}
 #endif
 
 		/*
 		 * If an exclusive lock was released with both shared
 		 * and exclusive waiters and a shared waiter hasn't
 		 * woken up and acquired the lock yet, sx_lock will be
 		 * set to SX_LOCK_UNLOCKED | SX_LOCK_EXCLUSIVE_WAITERS.
 		 * If we see that value, try to acquire it once.  Note
 		 * that we have to preserve SX_LOCK_EXCLUSIVE_WAITERS
 		 * as there are other exclusive waiters still.  If we
 		 * fail, restart the loop.
 		 */
 		setx = x & (SX_LOCK_WAITERS | SX_LOCK_WRITE_SPINNER);
 		if ((x & ~setx) == SX_LOCK_SHARED) {
 			setx &= ~SX_LOCK_WRITE_SPINNER;
 			if (!atomic_fcmpset_acq_ptr(&sx->sx_lock, &x, tid | setx))
 				goto retry_sleepq;
 			sleepq_release(&sx->lock_object);
 			CTR2(KTR_LOCK, "%s: %p claimed by new writer",
 			    __func__, sx);
 			break;
 		}
 
 #ifdef ADAPTIVE_SX
 		/*
 		 * It is possible we set the SX_LOCK_WRITE_SPINNER bit.
 		 * It is an invariant that when the bit is set, there is
 		 * a writer ready to grab the lock. Thus clear the bit since
 		 * we are going to sleep.
 		 */
 		if (in_critical) {
 			if ((x & SX_LOCK_WRITE_SPINNER) ||
 			    !((x & SX_LOCK_EXCLUSIVE_WAITERS))) {
 				setx = x & ~SX_LOCK_WRITE_SPINNER;
 				setx |= SX_LOCK_EXCLUSIVE_WAITERS;
 				if (!atomic_fcmpset_ptr(&sx->sx_lock, &x,
 				    setx)) {
 					goto retry_sleepq;
 				}
 			}
 			critical_exit();
 			in_critical = false;
 		} else {
 #endif
 			/*
 			 * Try to set the SX_LOCK_EXCLUSIVE_WAITERS.  If we fail,
 			 * than loop back and retry.
 			 */
 			if (!(x & SX_LOCK_EXCLUSIVE_WAITERS)) {
 				if (!atomic_fcmpset_ptr(&sx->sx_lock, &x,
 				    x | SX_LOCK_EXCLUSIVE_WAITERS)) {
 					goto retry_sleepq;
 				}
 				if (LOCK_LOG_TEST(&sx->lock_object, 0))
 					CTR2(KTR_LOCK, "%s: %p set excl waiters flag",
 					    __func__, sx);
 			}
 #ifdef ADAPTIVE_SX
 		}
 #endif
 
 		/*
 		 * Since we have been unable to acquire the exclusive
 		 * lock and the exclusive waiters flag is set, we have
 		 * to sleep.
 		 */
 		if (LOCK_LOG_TEST(&sx->lock_object, 0))
 			CTR2(KTR_LOCK, "%s: %p blocking on sleep queue",
 			    __func__, sx);
 
 #ifdef KDTRACE_HOOKS
 		sleep_time -= lockstat_nsecs(&sx->lock_object);
 #endif
 		sleepq_add(&sx->lock_object, NULL, sx->lock_object.lo_name,
 		    SLEEPQ_SX | ((opts & SX_INTERRUPTIBLE) ?
 		    SLEEPQ_INTERRUPTIBLE : 0), SQ_EXCLUSIVE_QUEUE);
 		/*
 		 * Hack: this can land in thread_suspend_check which will
 		 * conditionally take a mutex, tripping over an assert if a
 		 * lock we are waiting for is set.
 		 */
 		THREAD_CONTENTION_DONE(&sx->lock_object);
 		if (!(opts & SX_INTERRUPTIBLE))
 			sleepq_wait(&sx->lock_object, 0);
 		else
 			error = sleepq_wait_sig(&sx->lock_object, 0);
 		THREAD_CONTENDS_ON_LOCK(&sx->lock_object);
 #ifdef KDTRACE_HOOKS
 		sleep_time += lockstat_nsecs(&sx->lock_object);
 		sleep_cnt++;
 #endif
 		if (error) {
 			if (LOCK_LOG_TEST(&sx->lock_object, 0))
 				CTR2(KTR_LOCK,
 			"%s: interruptible sleep by %p suspended by signal",
 				    __func__, sx);
 			break;
 		}
 		if (LOCK_LOG_TEST(&sx->lock_object, 0))
 			CTR2(KTR_LOCK, "%s: %p resuming from sleep queue",
 			    __func__, sx);
 		x = SX_READ_VALUE(sx);
 	}
 	THREAD_CONTENTION_DONE(&sx->lock_object);
 	if (__predict_true(!extra_work))
 		return (error);
 #ifdef ADAPTIVE_SX
 	if (in_critical)
 		critical_exit();
 #endif
 	GIANT_RESTORE();
 #if defined(KDTRACE_HOOKS) || defined(LOCK_PROFILING)
 	if (__predict_true(!doing_lockprof))
 		return (error);
 #endif
 #ifdef KDTRACE_HOOKS
 	all_time += lockstat_nsecs(&sx->lock_object);
 	if (sleep_time)
 		LOCKSTAT_RECORD4(sx__block, sx, sleep_time,
 		    LOCKSTAT_WRITER, (state & SX_LOCK_SHARED) == 0,
 		    (state & SX_LOCK_SHARED) == 0 ? 0 : SX_SHARERS(state));
 	if (lda.spin_cnt > sleep_cnt)
 		LOCKSTAT_RECORD4(sx__spin, sx, all_time - sleep_time,
 		    LOCKSTAT_WRITER, (state & SX_LOCK_SHARED) == 0,
 		    (state & SX_LOCK_SHARED) == 0 ? 0 : SX_SHARERS(state));
 out_lockstat:
 #endif
 	if (!error)
 		LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(sx__acquire, sx,
 		    contested, waittime, file, line, LOCKSTAT_WRITER);
 	return (error);
 }
 
 /*
  * This function represents the so-called 'hard case' for sx_xunlock
  * operation.  All 'easy case' failures are redirected to this.  Note
  * that ideally this would be a static function, but it needs to be
  * accessible from at least sx.h.
  */
 void
 _sx_xunlock_hard(struct sx *sx, uintptr_t x LOCK_FILE_LINE_ARG_DEF)
 {
 	uintptr_t tid, setx;
 	int queue, wakeup_swapper;
 
 	if (SCHEDULER_STOPPED())
 		return;
 
 	tid = (uintptr_t)curthread;
 
 	if (__predict_false(x == tid))
 		x = SX_READ_VALUE(sx);
 
 	MPASS(!(x & SX_LOCK_SHARED));
 
 	if (__predict_false(x & SX_LOCK_RECURSED)) {
 		/* The lock is recursed, unrecurse one level. */
 		if ((--sx->sx_recurse) == 0)
 			atomic_clear_ptr(&sx->sx_lock, SX_LOCK_RECURSED);
 		if (LOCK_LOG_TEST(&sx->lock_object, 0))
 			CTR2(KTR_LOCK, "%s: %p unrecursing", __func__, sx);
 		return;
 	}
 
 	LOCKSTAT_PROFILE_RELEASE_RWLOCK(sx__release, sx, LOCKSTAT_WRITER);
 	if (x == tid &&
 	    atomic_cmpset_rel_ptr(&sx->sx_lock, tid, SX_LOCK_UNLOCKED))
 		return;
 
 	if (LOCK_LOG_TEST(&sx->lock_object, 0))
 		CTR2(KTR_LOCK, "%s: %p contested", __func__, sx);
 
 	sleepq_lock(&sx->lock_object);
 	x = SX_READ_VALUE(sx);
 	MPASS(x & (SX_LOCK_SHARED_WAITERS | SX_LOCK_EXCLUSIVE_WAITERS));
 
 	/*
 	 * The wake up algorithm here is quite simple and probably not
 	 * ideal.  It gives precedence to shared waiters if they are
 	 * present.  For this condition, we have to preserve the
 	 * state of the exclusive waiters flag.
 	 * If interruptible sleeps left the shared queue empty avoid a
 	 * starvation for the threads sleeping on the exclusive queue by giving
 	 * them precedence and cleaning up the shared waiters bit anyway.
 	 */
 	setx = SX_LOCK_UNLOCKED;
 	queue = SQ_SHARED_QUEUE;
 	if ((x & SX_LOCK_EXCLUSIVE_WAITERS) != 0 &&
 	    sleepq_sleepcnt(&sx->lock_object, SQ_EXCLUSIVE_QUEUE) != 0) {
 		queue = SQ_EXCLUSIVE_QUEUE;
 		setx |= (x & SX_LOCK_SHARED_WAITERS);
 	}
 	atomic_store_rel_ptr(&sx->sx_lock, setx);
 
 	/* Wake up all the waiters for the specific queue. */
 	if (LOCK_LOG_TEST(&sx->lock_object, 0))
 		CTR3(KTR_LOCK, "%s: %p waking up all threads on %s queue",
 		    __func__, sx, queue == SQ_SHARED_QUEUE ? "shared" :
 		    "exclusive");
 
 	wakeup_swapper = sleepq_broadcast(&sx->lock_object, SLEEPQ_SX, 0,
 	    queue);
 	sleepq_release(&sx->lock_object);
 	if (wakeup_swapper)
 		kick_proc0();
 }
 
 static bool __always_inline
 __sx_can_read(struct thread *td, uintptr_t x, bool fp)
 {
 
 	if ((x & (SX_LOCK_SHARED | SX_LOCK_EXCLUSIVE_WAITERS | SX_LOCK_WRITE_SPINNER))
 			== SX_LOCK_SHARED)
 		return (true);
 	if (!fp && td->td_sx_slocks && (x & SX_LOCK_SHARED))
 		return (true);
 	return (false);
 }
 
 static bool __always_inline
 __sx_slock_try(struct sx *sx, struct thread *td, uintptr_t *xp, bool fp
     LOCK_FILE_LINE_ARG_DEF)
 {
 
 	/*
 	 * If no other thread has an exclusive lock then try to bump up
 	 * the count of sharers.  Since we have to preserve the state
 	 * of SX_LOCK_EXCLUSIVE_WAITERS, if we fail to acquire the
 	 * shared lock loop back and retry.
 	 */
 	while (__sx_can_read(td, *xp, fp)) {
 		if (atomic_fcmpset_acq_ptr(&sx->sx_lock, xp,
 		    *xp + SX_ONE_SHARER)) {
 			if (LOCK_LOG_TEST(&sx->lock_object, 0))
 				CTR4(KTR_LOCK, "%s: %p succeed %p -> %p",
 				    __func__, sx, (void *)*xp,
 				    (void *)(*xp + SX_ONE_SHARER));
 			td->td_sx_slocks++;
 			return (true);
 		}
 	}
 	return (false);
 }
 
 static int __noinline
 _sx_slock_hard(struct sx *sx, int opts, uintptr_t x LOCK_FILE_LINE_ARG_DEF)
 {
 	GIANT_DECLARE;
 	struct thread *td;
 #ifdef ADAPTIVE_SX
 	struct thread *owner;
 	u_int i, n, spintries = 0;
 #endif
 #ifdef LOCK_PROFILING
 	uint64_t waittime = 0;
 	int contested = 0;
 #endif
 	int error = 0;
 #if defined(ADAPTIVE_SX) || defined(KDTRACE_HOOKS)
 	struct lock_delay_arg lda;
 #endif
 #ifdef KDTRACE_HOOKS
 	u_int sleep_cnt = 0;
 	int64_t sleep_time = 0;
 	int64_t all_time = 0;
 #endif
 #if defined(KDTRACE_HOOKS) || defined(LOCK_PROFILING)
 	uintptr_t state = 0;
 #endif
 	int extra_work __sdt_used = 0;
 
 	td = curthread;
 
 #ifdef KDTRACE_HOOKS
 	if (LOCKSTAT_PROFILE_ENABLED(sx__acquire)) {
 		if (__sx_slock_try(sx, td, &x, false LOCK_FILE_LINE_ARG))
 			goto out_lockstat;
 		extra_work = 1;
 		all_time -= lockstat_nsecs(&sx->lock_object);
 		state = x;
 	}
 #endif
 #ifdef LOCK_PROFILING
 	extra_work = 1;
 	state = x;
 #endif
 
 	if (SCHEDULER_STOPPED())
 		return (0);
 
 #if defined(ADAPTIVE_SX)
 	lock_delay_arg_init(&lda, &sx_delay);
 #elif defined(KDTRACE_HOOKS)
 	lock_delay_arg_init_noadapt(&lda);
 #endif
 
 #ifdef HWPMC_HOOKS
 	PMC_SOFT_CALL( , , lock, failed);
 #endif
 	lock_profile_obtain_lock_failed(&sx->lock_object, false, &contested,
 	    &waittime);
 
 #ifndef INVARIANTS
 	GIANT_SAVE(extra_work);
 #endif
 
 	THREAD_CONTENDS_ON_LOCK(&sx->lock_object);
 
 	/*
 	 * As with rwlocks, we don't make any attempt to try to block
 	 * shared locks once there is an exclusive waiter.
 	 */
 	for (;;) {
 		if (__sx_slock_try(sx, td, &x, false LOCK_FILE_LINE_ARG))
 			break;
 #ifdef INVARIANTS
 		GIANT_SAVE(extra_work);
 #endif
 #ifdef KDTRACE_HOOKS
 		lda.spin_cnt++;
 #endif
 
 #ifdef ADAPTIVE_SX
 		/*
 		 * If the owner is running on another CPU, spin until
 		 * the owner stops running or the state of the lock
 		 * changes.
 		 */
 		if ((x & SX_LOCK_SHARED) == 0) {
 			owner = lv_sx_owner(x);
 			if (TD_IS_RUNNING(owner)) {
 				if (LOCK_LOG_TEST(&sx->lock_object, 0))
 					CTR3(KTR_LOCK,
 					    "%s: spinning on %p held by %p",
 					    __func__, sx, owner);
 				KTR_STATE1(KTR_SCHED, "thread",
 				    sched_tdname(curthread), "spinning",
 				    "lockname:\"%s\"", sx->lock_object.lo_name);
 				do {
 					lock_delay(&lda);
 					x = SX_READ_VALUE(sx);
 					owner = lv_sx_owner(x);
 				} while (owner != NULL && TD_IS_RUNNING(owner));
 				KTR_STATE0(KTR_SCHED, "thread",
 				    sched_tdname(curthread), "running");
 				continue;
 			}
 		} else {
 			if ((x & SX_LOCK_WRITE_SPINNER) && SX_SHARERS(x) == 0) {
 				MPASS(!__sx_can_read(td, x, false));
 				lock_delay_spin(2);
 				x = SX_READ_VALUE(sx);
 				continue;
 			}
 			if (spintries < asx_retries) {
 				KTR_STATE1(KTR_SCHED, "thread", sched_tdname(curthread),
 				    "spinning", "lockname:\"%s\"",
 				    sx->lock_object.lo_name);
 				n = SX_SHARERS(x);
 				for (i = 0; i < asx_loops; i += n) {
 					lock_delay_spin(n);
 					x = SX_READ_VALUE(sx);
 					if (!(x & SX_LOCK_SHARED))
 						break;
 					n = SX_SHARERS(x);
 					if (n == 0)
 						break;
 					if (__sx_can_read(td, x, false))
 						break;
 				}
 #ifdef KDTRACE_HOOKS
 				lda.spin_cnt += i;
 #endif
 				KTR_STATE0(KTR_SCHED, "thread", sched_tdname(curthread),
 				    "running");
 				if (i < asx_loops)
 					continue;
 			}
 		}
 #endif
 
 		/*
 		 * Some other thread already has an exclusive lock, so
 		 * start the process of blocking.
 		 */
 		sleepq_lock(&sx->lock_object);
 		x = SX_READ_VALUE(sx);
 retry_sleepq:
 		if (((x & SX_LOCK_WRITE_SPINNER) && SX_SHARERS(x) == 0) ||
 		    __sx_can_read(td, x, false)) {
 			sleepq_release(&sx->lock_object);
 			continue;
 		}
 
 #ifdef ADAPTIVE_SX
 		/*
 		 * If the owner is running on another CPU, spin until
 		 * the owner stops running or the state of the lock
 		 * changes.
 		 */
 		if (!(x & SX_LOCK_SHARED)) {
 			owner = (struct thread *)SX_OWNER(x);
 			if (TD_IS_RUNNING(owner)) {
 				sleepq_release(&sx->lock_object);
 				x = SX_READ_VALUE(sx);
 				continue;
 			}
 		}
 #endif
 
 		/*
 		 * Try to set the SX_LOCK_SHARED_WAITERS flag.  If we
 		 * fail to set it drop the sleep queue lock and loop
 		 * back.
 		 */
 		if (!(x & SX_LOCK_SHARED_WAITERS)) {
 			if (!atomic_fcmpset_ptr(&sx->sx_lock, &x,
 			    x | SX_LOCK_SHARED_WAITERS))
 				goto retry_sleepq;
 			if (LOCK_LOG_TEST(&sx->lock_object, 0))
 				CTR2(KTR_LOCK, "%s: %p set shared waiters flag",
 				    __func__, sx);
 		}
 
 		/*
 		 * Since we have been unable to acquire the shared lock,
 		 * we have to sleep.
 		 */
 		if (LOCK_LOG_TEST(&sx->lock_object, 0))
 			CTR2(KTR_LOCK, "%s: %p blocking on sleep queue",
 			    __func__, sx);
 
 #ifdef KDTRACE_HOOKS
 		sleep_time -= lockstat_nsecs(&sx->lock_object);
 #endif
 		sleepq_add(&sx->lock_object, NULL, sx->lock_object.lo_name,
 		    SLEEPQ_SX | ((opts & SX_INTERRUPTIBLE) ?
 		    SLEEPQ_INTERRUPTIBLE : 0), SQ_SHARED_QUEUE);
 		/*
 		 * Hack: this can land in thread_suspend_check which will
 		 * conditionally take a mutex, tripping over an assert if a
 		 * lock we are waiting for is set.
 		 */
 		THREAD_CONTENTION_DONE(&sx->lock_object);
 		if (!(opts & SX_INTERRUPTIBLE))
 			sleepq_wait(&sx->lock_object, 0);
 		else
 			error = sleepq_wait_sig(&sx->lock_object, 0);
 		THREAD_CONTENDS_ON_LOCK(&sx->lock_object);
 #ifdef KDTRACE_HOOKS
 		sleep_time += lockstat_nsecs(&sx->lock_object);
 		sleep_cnt++;
 #endif
 		if (error) {
 			if (LOCK_LOG_TEST(&sx->lock_object, 0))
 				CTR2(KTR_LOCK,
 			"%s: interruptible sleep by %p suspended by signal",
 				    __func__, sx);
 			break;
 		}
 		if (LOCK_LOG_TEST(&sx->lock_object, 0))
 			CTR2(KTR_LOCK, "%s: %p resuming from sleep queue",
 			    __func__, sx);
 		x = SX_READ_VALUE(sx);
 	}
 	THREAD_CONTENTION_DONE(&sx->lock_object);
 #if defined(KDTRACE_HOOKS) || defined(LOCK_PROFILING)
 	if (__predict_true(!extra_work))
 		return (error);
 #endif
 #ifdef KDTRACE_HOOKS
 	all_time += lockstat_nsecs(&sx->lock_object);
 	if (sleep_time)
 		LOCKSTAT_RECORD4(sx__block, sx, sleep_time,
 		    LOCKSTAT_READER, (state & SX_LOCK_SHARED) == 0,
 		    (state & SX_LOCK_SHARED) == 0 ? 0 : SX_SHARERS(state));
 	if (lda.spin_cnt > sleep_cnt)
 		LOCKSTAT_RECORD4(sx__spin, sx, all_time - sleep_time,
 		    LOCKSTAT_READER, (state & SX_LOCK_SHARED) == 0,
 		    (state & SX_LOCK_SHARED) == 0 ? 0 : SX_SHARERS(state));
 out_lockstat:
 #endif
 	if (error == 0) {
 		LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(sx__acquire, sx,
 		    contested, waittime, file, line, LOCKSTAT_READER);
 	}
 	GIANT_RESTORE();
 	return (error);
 }
 
 int
 _sx_slock_int(struct sx *sx, int opts LOCK_FILE_LINE_ARG_DEF)
 {
 	struct thread *td;
 	uintptr_t x;
 	int error;
 
 	KASSERT(kdb_active != 0 || SCHEDULER_STOPPED() ||
 	    !TD_IS_IDLETHREAD(curthread),
 	    ("sx_slock() by idle thread %p on sx %s @ %s:%d",
 	    curthread, sx->lock_object.lo_name, file, line));
 	KASSERT(sx->sx_lock != SX_LOCK_DESTROYED,
 	    ("sx_slock() of destroyed sx @ %s:%d", file, line));
 	WITNESS_CHECKORDER(&sx->lock_object, LOP_NEWORDER, file, line, NULL);
 
 	error = 0;
 	td = curthread;
 	x = SX_READ_VALUE(sx);
 	if (__predict_false(LOCKSTAT_PROFILE_ENABLED(sx__acquire) ||
 	    !__sx_slock_try(sx, td, &x, true LOCK_FILE_LINE_ARG)))
 		error = _sx_slock_hard(sx, opts, x LOCK_FILE_LINE_ARG);
 	else
 		lock_profile_obtain_lock_success(&sx->lock_object, false, 0, 0,
 		    file, line);
 	if (error == 0) {
 		LOCK_LOG_LOCK("SLOCK", &sx->lock_object, 0, 0, file, line);
 		WITNESS_LOCK(&sx->lock_object, 0, file, line);
 		TD_LOCKS_INC(curthread);
 	}
 	return (error);
 }
 
 int
 _sx_slock(struct sx *sx, int opts, const char *file, int line)
 {
 
 	return (_sx_slock_int(sx, opts LOCK_FILE_LINE_ARG));
 }
 
 static bool __always_inline
 _sx_sunlock_try(struct sx *sx, struct thread *td, uintptr_t *xp)
 {
 
 	for (;;) {
 		if (SX_SHARERS(*xp) > 1 || !(*xp & SX_LOCK_WAITERS)) {
 			if (atomic_fcmpset_rel_ptr(&sx->sx_lock, xp,
 			    *xp - SX_ONE_SHARER)) {
 				if (LOCK_LOG_TEST(&sx->lock_object, 0))
 					CTR4(KTR_LOCK,
 					    "%s: %p succeeded %p -> %p",
 					    __func__, sx, (void *)*xp,
 					    (void *)(*xp - SX_ONE_SHARER));
 				td->td_sx_slocks--;
 				return (true);
 			}
 			continue;
 		}
 		break;
 	}
 	return (false);
 }
 
 static void __noinline
 _sx_sunlock_hard(struct sx *sx, struct thread *td, uintptr_t x
     LOCK_FILE_LINE_ARG_DEF)
 {
 	int wakeup_swapper = 0;
 	uintptr_t setx, queue;
 
 	if (SCHEDULER_STOPPED())
 		return;
 
 	if (_sx_sunlock_try(sx, td, &x))
 		goto out_lockstat;
 
 	sleepq_lock(&sx->lock_object);
 	x = SX_READ_VALUE(sx);
 	for (;;) {
 		if (_sx_sunlock_try(sx, td, &x))
 			break;
 
 		/*
 		 * Wake up semantic here is quite simple:
 		 * Just wake up all the exclusive waiters.
 		 * Note that the state of the lock could have changed,
 		 * so if it fails loop back and retry.
 		 */
 		setx = SX_LOCK_UNLOCKED;
 		queue = SQ_SHARED_QUEUE;
 		if (x & SX_LOCK_EXCLUSIVE_WAITERS) {
 			setx |= (x & SX_LOCK_SHARED_WAITERS);
 			queue = SQ_EXCLUSIVE_QUEUE;
 		}
 		setx |= (x & SX_LOCK_WRITE_SPINNER);
 		if (!atomic_fcmpset_rel_ptr(&sx->sx_lock, &x, setx))
 			continue;
 		if (LOCK_LOG_TEST(&sx->lock_object, 0))
 			CTR2(KTR_LOCK, "%s: %p waking up all thread on"
 			    "exclusive queue", __func__, sx);
 		wakeup_swapper = sleepq_broadcast(&sx->lock_object, SLEEPQ_SX,
 		    0, queue);
 		td->td_sx_slocks--;
 		break;
 	}
 	sleepq_release(&sx->lock_object);
 	if (wakeup_swapper)
 		kick_proc0();
 out_lockstat:
 	LOCKSTAT_PROFILE_RELEASE_RWLOCK(sx__release, sx, LOCKSTAT_READER);
 }
 
 void
 _sx_sunlock_int(struct sx *sx LOCK_FILE_LINE_ARG_DEF)
 {
 	struct thread *td;
 	uintptr_t x;
 
 	KASSERT(sx->sx_lock != SX_LOCK_DESTROYED,
 	    ("sx_sunlock() of destroyed sx @ %s:%d", file, line));
 	_sx_assert(sx, SA_SLOCKED, file, line);
 	WITNESS_UNLOCK(&sx->lock_object, 0, file, line);
 	LOCK_LOG_LOCK("SUNLOCK", &sx->lock_object, 0, 0, file, line);
 
 	td = curthread;
 	x = SX_READ_VALUE(sx);
 	if (__predict_false(LOCKSTAT_PROFILE_ENABLED(sx__release) ||
 	    !_sx_sunlock_try(sx, td, &x)))
 		_sx_sunlock_hard(sx, td, x LOCK_FILE_LINE_ARG);
 	else
 		lock_profile_release_lock(&sx->lock_object, false);
 
 	TD_LOCKS_DEC(curthread);
 }
 
 void
 _sx_sunlock(struct sx *sx, const char *file, int line)
 {
 
 	_sx_sunlock_int(sx LOCK_FILE_LINE_ARG);
 }
 
 #ifdef INVARIANT_SUPPORT
 #ifndef INVARIANTS
 #undef	_sx_assert
 #endif
 
 /*
  * In the non-WITNESS case, sx_assert() can only detect that at least
  * *some* thread owns an slock, but it cannot guarantee that *this*
  * thread owns an slock.
  */
 void
 _sx_assert(const struct sx *sx, int what, const char *file, int line)
 {
 #ifndef WITNESS
 	int slocked = 0;
 #endif
 
 	if (SCHEDULER_STOPPED())
 		return;
 	switch (what) {
 	case SA_SLOCKED:
 	case SA_SLOCKED | SA_NOTRECURSED:
 	case SA_SLOCKED | SA_RECURSED:
 #ifndef WITNESS
 		slocked = 1;
 		/* FALLTHROUGH */
 #endif
 	case SA_LOCKED:
 	case SA_LOCKED | SA_NOTRECURSED:
 	case SA_LOCKED | SA_RECURSED:
 #ifdef WITNESS
 		witness_assert(&sx->lock_object, what, file, line);
 #else
 		/*
 		 * If some other thread has an exclusive lock or we
 		 * have one and are asserting a shared lock, fail.
 		 * Also, if no one has a lock at all, fail.
 		 */
 		if (sx->sx_lock == SX_LOCK_UNLOCKED ||
 		    (!(sx->sx_lock & SX_LOCK_SHARED) && (slocked ||
 		    sx_xholder(sx) != curthread)))
 			panic("Lock %s not %slocked @ %s:%d\n",
 			    sx->lock_object.lo_name, slocked ? "share " : "",
 			    file, line);
 
 		if (!(sx->sx_lock & SX_LOCK_SHARED)) {
 			if (sx_recursed(sx)) {
 				if (what & SA_NOTRECURSED)
 					panic("Lock %s recursed @ %s:%d\n",
 					    sx->lock_object.lo_name, file,
 					    line);
 			} else if (what & SA_RECURSED)
 				panic("Lock %s not recursed @ %s:%d\n",
 				    sx->lock_object.lo_name, file, line);
 		}
 #endif
 		break;
 	case SA_XLOCKED:
 	case SA_XLOCKED | SA_NOTRECURSED:
 	case SA_XLOCKED | SA_RECURSED:
 		if (sx_xholder(sx) != curthread)
 			panic("Lock %s not exclusively locked @ %s:%d\n",
 			    sx->lock_object.lo_name, file, line);
 		if (sx_recursed(sx)) {
 			if (what & SA_NOTRECURSED)
 				panic("Lock %s recursed @ %s:%d\n",
 				    sx->lock_object.lo_name, file, line);
 		} else if (what & SA_RECURSED)
 			panic("Lock %s not recursed @ %s:%d\n",
 			    sx->lock_object.lo_name, file, line);
 		break;
 	case SA_UNLOCKED:
 #ifdef WITNESS
 		witness_assert(&sx->lock_object, what, file, line);
 #else
 		/*
 		 * If we hold an exclusve lock fail.  We can't
 		 * reliably check to see if we hold a shared lock or
 		 * not.
 		 */
 		if (sx_xholder(sx) == curthread)
 			panic("Lock %s exclusively locked @ %s:%d\n",
 			    sx->lock_object.lo_name, file, line);
 #endif
 		break;
 	default:
 		panic("Unknown sx lock assertion: %d @ %s:%d", what, file,
 		    line);
 	}
 }
 #endif	/* INVARIANT_SUPPORT */
 
 #ifdef DDB
 static void
 db_show_sx(const struct lock_object *lock)
 {
 	struct thread *td;
 	const struct sx *sx;
 
 	sx = (const struct sx *)lock;
 
 	db_printf(" state: ");
 	if (sx->sx_lock == SX_LOCK_UNLOCKED)
 		db_printf("UNLOCKED\n");
 	else if (sx->sx_lock == SX_LOCK_DESTROYED) {
 		db_printf("DESTROYED\n");
 		return;
 	} else if (sx->sx_lock & SX_LOCK_SHARED)
 		db_printf("SLOCK: %ju\n", (uintmax_t)SX_SHARERS(sx->sx_lock));
 	else {
 		td = sx_xholder(sx);
 		db_printf("XLOCK: %p (tid %d, pid %d, \"%s\")\n", td,
 		    td->td_tid, td->td_proc->p_pid, td->td_name);
 		if (sx_recursed(sx))
 			db_printf(" recursed: %d\n", sx->sx_recurse);
 	}
 
 	db_printf(" waiters: ");
 	switch(sx->sx_lock &
 	    (SX_LOCK_SHARED_WAITERS | SX_LOCK_EXCLUSIVE_WAITERS)) {
 	case SX_LOCK_SHARED_WAITERS:
 		db_printf("shared\n");
 		break;
 	case SX_LOCK_EXCLUSIVE_WAITERS:
 		db_printf("exclusive\n");
 		break;
 	case SX_LOCK_SHARED_WAITERS | SX_LOCK_EXCLUSIVE_WAITERS:
 		db_printf("exclusive and shared\n");
 		break;
 	default:
 		db_printf("none\n");
 	}
 }
 
 /*
  * Check to see if a thread that is blocked on a sleep queue is actually
  * blocked on an sx lock.  If so, output some details and return true.
  * If the lock has an exclusive owner, return that in *ownerp.
  */
 int
 sx_chain(struct thread *td, struct thread **ownerp)
 {
 	const struct sx *sx;
 
 	/*
 	 * Check to see if this thread is blocked on an sx lock.
 	 * First, we check the lock class.  If that is ok, then we
 	 * compare the lock name against the wait message.
 	 */
 	sx = td->td_wchan;
 	if (LOCK_CLASS(&sx->lock_object) != &lock_class_sx ||
 	    sx->lock_object.lo_name != td->td_wmesg)
 		return (0);
 
 	/* We think we have an sx lock, so output some details. */
 	db_printf("blocked on sx \"%s\" ", td->td_wmesg);
 	*ownerp = sx_xholder(sx);
 	if (sx->sx_lock & SX_LOCK_SHARED)
 		db_printf("SLOCK (count %ju)\n",
 		    (uintmax_t)SX_SHARERS(sx->sx_lock));
 	else
 		db_printf("XLOCK\n");
 	return (1);
 }
 #endif
diff --git a/sys/kern/kern_synch.c b/sys/kern/kern_synch.c
index 8cb847fe2a2d..f12054a04b23 100644
--- a/sys/kern/kern_synch.c
+++ b/sys/kern/kern_synch.c
@@ -1,713 +1,713 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1982, 1986, 1990, 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  * (c) UNIX System Laboratories, Inc.
  * All or some portions of this file are derived from material licensed
  * to the University of California by American Telephone and Telegraph
  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  * the permission of UNIX System Laboratories, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 #include "opt_ktrace.h"
 #include "opt_sched.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/blockcount.h>
 #include <sys/condvar.h>
 #include <sys/kdb.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/resourcevar.h>
 #include <sys/sched.h>
 #include <sys/sdt.h>
 #include <sys/signalvar.h>
 #include <sys/sleepqueue.h>
 #include <sys/smp.h>
 #include <sys/sx.h>
 #include <sys/sysctl.h>
 #include <sys/sysproto.h>
 #include <sys/vmmeter.h>
 #ifdef KTRACE
 #include <sys/uio.h>
 #include <sys/ktrace.h>
 #endif
 #ifdef EPOCH_TRACE
 #include <sys/epoch.h>
 #endif
 
 #include <machine/cpu.h>
 
 static void synch_setup(void *dummy);
 SYSINIT(synch_setup, SI_SUB_KICK_SCHEDULER, SI_ORDER_FIRST, synch_setup,
     NULL);
 
 int	hogticks;
 static const char pause_wchan[MAXCPU];
 
 static struct callout loadav_callout;
 
 struct loadavg averunnable =
 	{ {0, 0, 0}, FSCALE };	/* load average, of runnable procs */
 /*
  * Constants for averages over 1, 5, and 15 minutes
  * when sampling at 5 second intervals.
  */
 static uint64_t cexp[3] = {
 	0.9200444146293232 * FSCALE,	/* exp(-1/12) */
 	0.9834714538216174 * FSCALE,	/* exp(-1/60) */
 	0.9944598480048967 * FSCALE,	/* exp(-1/180) */
 };
 
 /* kernel uses `FSCALE', userland (SHOULD) use kern.fscale */
 SYSCTL_INT(_kern, OID_AUTO, fscale, CTLFLAG_RD, SYSCTL_NULL_INT_PTR, FSCALE,
     "Fixed-point scale factor used for calculating load average values");
 
 static void	loadav(void *arg);
 
 SDT_PROVIDER_DECLARE(sched);
 SDT_PROBE_DEFINE(sched, , , preempt);
 
 static void
 sleepinit(void *unused)
 {
 
 	hogticks = (hz / 10) * 2;	/* Default only. */
 	init_sleepqueues();
 }
 
 /*
  * vmem tries to lock the sleepq mutexes when free'ing kva, so make sure
  * it is available.
  */
 SYSINIT(sleepinit, SI_SUB_KMEM, SI_ORDER_ANY, sleepinit, NULL);
 
 /*
  * General sleep call.  Suspends the current thread until a wakeup is
  * performed on the specified identifier.  The thread will then be made
  * runnable with the specified priority.  Sleeps at most sbt units of time
  * (0 means no timeout).  If pri includes the PCATCH flag, let signals
  * interrupt the sleep, otherwise ignore them while sleeping.  Returns 0 if
  * awakened, EWOULDBLOCK if the timeout expires.  If PCATCH is set and a
  * signal becomes pending, ERESTART is returned if the current system
  * call should be restarted if possible, and EINTR is returned if the system
  * call should be interrupted by the signal (return EINTR).
  *
  * The lock argument is unlocked before the caller is suspended, and
  * re-locked before _sleep() returns.  If priority includes the PDROP
  * flag the lock is not re-locked before returning.
  */
 int
 _sleep(const void *ident, struct lock_object *lock, int priority,
     const char *wmesg, sbintime_t sbt, sbintime_t pr, int flags)
 {
 	struct thread *td;
 	struct lock_class *class;
 	uintptr_t lock_state;
 	int catch, pri, rval, sleepq_flags;
 	WITNESS_SAVE_DECL(lock_witness);
 
 	TSENTER();
 	td = curthread;
 #ifdef KTRACE
 	if (KTRPOINT(td, KTR_CSW))
 		ktrcsw(1, 0, wmesg);
 #endif
 	WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, lock,
 	    "Sleeping on \"%s\"", wmesg);
 	KASSERT(sbt != 0 || mtx_owned(&Giant) || lock != NULL ||
 	    (priority & PNOLOCK) != 0,
 	    ("sleeping without a lock"));
 	KASSERT(ident != NULL, ("_sleep: NULL ident"));
 	KASSERT(TD_IS_RUNNING(td), ("_sleep: curthread not running"));
 	if (priority & PDROP)
 		KASSERT(lock != NULL && lock != &Giant.lock_object,
 		    ("PDROP requires a non-Giant lock"));
 	if (lock != NULL)
 		class = LOCK_CLASS(lock);
 	else
 		class = NULL;
 
-	if (SCHEDULER_STOPPED_TD(td)) {
+	if (SCHEDULER_STOPPED()) {
 		if (lock != NULL && priority & PDROP)
 			class->lc_unlock(lock);
 		return (0);
 	}
 	catch = priority & PCATCH;
 	pri = priority & PRIMASK;
 
 	KASSERT(!TD_ON_SLEEPQ(td), ("recursive sleep"));
 
 	if ((uintptr_t)ident >= (uintptr_t)&pause_wchan[0] &&
 	    (uintptr_t)ident <= (uintptr_t)&pause_wchan[MAXCPU - 1])
 		sleepq_flags = SLEEPQ_PAUSE;
 	else
 		sleepq_flags = SLEEPQ_SLEEP;
 	if (catch)
 		sleepq_flags |= SLEEPQ_INTERRUPTIBLE;
 
 	sleepq_lock(ident);
 	CTR5(KTR_PROC, "sleep: thread %ld (pid %ld, %s) on %s (%p)",
 	    td->td_tid, td->td_proc->p_pid, td->td_name, wmesg, ident);
 
 	if (lock == &Giant.lock_object)
 		mtx_assert(&Giant, MA_OWNED);
 	DROP_GIANT();
 	if (lock != NULL && lock != &Giant.lock_object &&
 	    !(class->lc_flags & LC_SLEEPABLE)) {
 		KASSERT(!(class->lc_flags & LC_SPINLOCK),
 		    ("spin locks can only use msleep_spin"));
 		WITNESS_SAVE(lock, lock_witness);
 		lock_state = class->lc_unlock(lock);
 	} else
 		/* GCC needs to follow the Yellow Brick Road */
 		lock_state = -1;
 
 	/*
 	 * We put ourselves on the sleep queue and start our timeout
 	 * before calling thread_suspend_check, as we could stop there,
 	 * and a wakeup or a SIGCONT (or both) could occur while we were
 	 * stopped without resuming us.  Thus, we must be ready for sleep
 	 * when cursig() is called.  If the wakeup happens while we're
 	 * stopped, then td will no longer be on a sleep queue upon
 	 * return from cursig().
 	 */
 	sleepq_add(ident, lock, wmesg, sleepq_flags, 0);
 	if (sbt != 0)
 		sleepq_set_timeout_sbt(ident, sbt, pr, flags);
 	if (lock != NULL && class->lc_flags & LC_SLEEPABLE) {
 		sleepq_release(ident);
 		WITNESS_SAVE(lock, lock_witness);
 		lock_state = class->lc_unlock(lock);
 		sleepq_lock(ident);
 	}
 	if (sbt != 0 && catch)
 		rval = sleepq_timedwait_sig(ident, pri);
 	else if (sbt != 0)
 		rval = sleepq_timedwait(ident, pri);
 	else if (catch)
 		rval = sleepq_wait_sig(ident, pri);
 	else {
 		sleepq_wait(ident, pri);
 		rval = 0;
 	}
 #ifdef KTRACE
 	if (KTRPOINT(td, KTR_CSW))
 		ktrcsw(0, 0, wmesg);
 #endif
 	PICKUP_GIANT();
 	if (lock != NULL && lock != &Giant.lock_object && !(priority & PDROP)) {
 		class->lc_lock(lock, lock_state);
 		WITNESS_RESTORE(lock, lock_witness);
 	}
 	TSEXIT();
 	return (rval);
 }
 
 int
 msleep_spin_sbt(const void *ident, struct mtx *mtx, const char *wmesg,
     sbintime_t sbt, sbintime_t pr, int flags)
 {
 	struct thread *td;
 	int rval;
 	WITNESS_SAVE_DECL(mtx);
 
 	td = curthread;
 	KASSERT(mtx != NULL, ("sleeping without a mutex"));
 	KASSERT(ident != NULL, ("msleep_spin_sbt: NULL ident"));
 	KASSERT(TD_IS_RUNNING(td), ("msleep_spin_sbt: curthread not running"));
 
-	if (SCHEDULER_STOPPED_TD(td))
+	if (SCHEDULER_STOPPED())
 		return (0);
 
 	sleepq_lock(ident);
 	CTR5(KTR_PROC, "msleep_spin: thread %ld (pid %ld, %s) on %s (%p)",
 	    td->td_tid, td->td_proc->p_pid, td->td_name, wmesg, ident);
 
 	DROP_GIANT();
 	mtx_assert(mtx, MA_OWNED | MA_NOTRECURSED);
 	WITNESS_SAVE(&mtx->lock_object, mtx);
 	mtx_unlock_spin(mtx);
 
 	/*
 	 * We put ourselves on the sleep queue and start our timeout.
 	 */
 	sleepq_add(ident, &mtx->lock_object, wmesg, SLEEPQ_SLEEP, 0);
 	if (sbt != 0)
 		sleepq_set_timeout_sbt(ident, sbt, pr, flags);
 
 	/*
 	 * Can't call ktrace with any spin locks held so it can lock the
 	 * ktrace_mtx lock, and WITNESS_WARN considers it an error to hold
 	 * any spin lock.  Thus, we have to drop the sleepq spin lock while
 	 * we handle those requests.  This is safe since we have placed our
 	 * thread on the sleep queue already.
 	 */
 #ifdef KTRACE
 	if (KTRPOINT(td, KTR_CSW)) {
 		sleepq_release(ident);
 		ktrcsw(1, 0, wmesg);
 		sleepq_lock(ident);
 	}
 #endif
 #ifdef WITNESS
 	sleepq_release(ident);
 	WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, "Sleeping on \"%s\"",
 	    wmesg);
 	sleepq_lock(ident);
 #endif
 	if (sbt != 0)
 		rval = sleepq_timedwait(ident, 0);
 	else {
 		sleepq_wait(ident, 0);
 		rval = 0;
 	}
 #ifdef KTRACE
 	if (KTRPOINT(td, KTR_CSW))
 		ktrcsw(0, 0, wmesg);
 #endif
 	PICKUP_GIANT();
 	mtx_lock_spin(mtx);
 	WITNESS_RESTORE(&mtx->lock_object, mtx);
 	return (rval);
 }
 
 /*
  * pause_sbt() delays the calling thread by the given signed binary
  * time. During cold bootup, pause_sbt() uses the DELAY() function
  * instead of the _sleep() function to do the waiting. The "sbt"
  * argument must be greater than or equal to zero. A "sbt" value of
  * zero is equivalent to a "sbt" value of one tick.
  */
 int
 pause_sbt(const char *wmesg, sbintime_t sbt, sbintime_t pr, int flags)
 {
 	KASSERT(sbt >= 0, ("pause_sbt: timeout must be >= 0"));
 
 	/* silently convert invalid timeouts */
 	if (sbt == 0)
 		sbt = tick_sbt;
 
 	if ((cold && curthread == &thread0) || kdb_active ||
 	    SCHEDULER_STOPPED()) {
 		/*
 		 * We delay one second at a time to avoid overflowing the
 		 * system specific DELAY() function(s):
 		 */
 		while (sbt >= SBT_1S) {
 			DELAY(1000000);
 			sbt -= SBT_1S;
 		}
 		/* Do the delay remainder, if any */
 		sbt = howmany(sbt, SBT_1US);
 		if (sbt > 0)
 			DELAY(sbt);
 		return (EWOULDBLOCK);
 	}
 	return (_sleep(&pause_wchan[curcpu], NULL,
 	    (flags & C_CATCH) ? PCATCH : 0, wmesg, sbt, pr, flags));
 }
 
 /*
  * Make all threads sleeping on the specified identifier runnable.
  */
 void
 wakeup(const void *ident)
 {
 	int wakeup_swapper;
 
 	sleepq_lock(ident);
 	wakeup_swapper = sleepq_broadcast(ident, SLEEPQ_SLEEP, 0, 0);
 	sleepq_release(ident);
 	if (wakeup_swapper) {
 		KASSERT(ident != &proc0,
 		    ("wakeup and wakeup_swapper and proc0"));
 		kick_proc0();
 	}
 }
 
 /*
  * Make a thread sleeping on the specified identifier runnable.
  * May wake more than one thread if a target thread is currently
  * swapped out.
  */
 void
 wakeup_one(const void *ident)
 {
 	int wakeup_swapper;
 
 	sleepq_lock(ident);
 	wakeup_swapper = sleepq_signal(ident, SLEEPQ_SLEEP | SLEEPQ_DROP, 0, 0);
 	if (wakeup_swapper)
 		kick_proc0();
 }
 
 void
 wakeup_any(const void *ident)
 {
 	int wakeup_swapper;
 
 	sleepq_lock(ident);
 	wakeup_swapper = sleepq_signal(ident, SLEEPQ_SLEEP | SLEEPQ_UNFAIR |
 	    SLEEPQ_DROP, 0, 0);
 	if (wakeup_swapper)
 		kick_proc0();
 }
 
 /*
  * Signal sleeping waiters after the counter has reached zero.
  */
 void
 _blockcount_wakeup(blockcount_t *bc, u_int old)
 {
 
 	KASSERT(_BLOCKCOUNT_WAITERS(old),
 	    ("%s: no waiters on %p", __func__, bc));
 
 	if (atomic_cmpset_int(&bc->__count, _BLOCKCOUNT_WAITERS_FLAG, 0))
 		wakeup(bc);
 }
 
 /*
  * Wait for a wakeup or a signal.  This does not guarantee that the count is
  * still zero on return.  Callers wanting a precise answer should use
  * blockcount_wait() with an interlock.
  *
  * If there is no work to wait for, return 0.  If the sleep was interrupted by a
  * signal, return EINTR or ERESTART, and return EAGAIN otherwise.
  */
 int
 _blockcount_sleep(blockcount_t *bc, struct lock_object *lock, const char *wmesg,
     int prio)
 {
 	void *wchan;
 	uintptr_t lock_state;
 	u_int old;
 	int ret;
 	bool catch, drop;
 
 	KASSERT(lock != &Giant.lock_object,
 	    ("%s: cannot use Giant as the interlock", __func__));
 
 	catch = (prio & PCATCH) != 0;
 	drop = (prio & PDROP) != 0;
 	prio &= PRIMASK;
 
 	/*
 	 * Synchronize with the fence in blockcount_release().  If we end up
 	 * waiting, the sleepqueue lock acquisition will provide the required
 	 * side effects.
 	 *
 	 * If there is no work to wait for, but waiters are present, try to put
 	 * ourselves to sleep to avoid jumping ahead.
 	 */
 	if (atomic_load_acq_int(&bc->__count) == 0) {
 		if (lock != NULL && drop)
 			LOCK_CLASS(lock)->lc_unlock(lock);
 		return (0);
 	}
 	lock_state = 0;
 	wchan = bc;
 	sleepq_lock(wchan);
 	DROP_GIANT();
 	if (lock != NULL)
 		lock_state = LOCK_CLASS(lock)->lc_unlock(lock);
 	old = blockcount_read(bc);
 	ret = 0;
 	do {
 		if (_BLOCKCOUNT_COUNT(old) == 0) {
 			sleepq_release(wchan);
 			goto out;
 		}
 		if (_BLOCKCOUNT_WAITERS(old))
 			break;
 	} while (!atomic_fcmpset_int(&bc->__count, &old,
 	    old | _BLOCKCOUNT_WAITERS_FLAG));
 	sleepq_add(wchan, NULL, wmesg, catch ? SLEEPQ_INTERRUPTIBLE : 0, 0);
 	if (catch)
 		ret = sleepq_wait_sig(wchan, prio);
 	else
 		sleepq_wait(wchan, prio);
 	if (ret == 0)
 		ret = EAGAIN;
 
 out:
 	PICKUP_GIANT();
 	if (lock != NULL && !drop)
 		LOCK_CLASS(lock)->lc_lock(lock, lock_state);
 
 	return (ret);
 }
 
 static void
 kdb_switch(void)
 {
 	thread_unlock(curthread);
 	kdb_backtrace();
 	kdb_reenter();
 	panic("%s: did not reenter debugger", __func__);
 }
 
 /*
  * mi_switch(9): The machine-independent parts of context switching.
  *
  * The thread lock is required on entry and is no longer held on return.
  */
 void
 mi_switch(int flags)
 {
 	uint64_t runtime, new_switchtime;
 	struct thread *td;
 
 	td = curthread;			/* XXX */
 	THREAD_LOCK_ASSERT(td, MA_OWNED | MA_NOTRECURSED);
 	KASSERT(!TD_ON_RUNQ(td), ("mi_switch: called by old code"));
 #ifdef INVARIANTS
 	if (!TD_ON_LOCK(td) && !TD_IS_RUNNING(td))
 		mtx_assert(&Giant, MA_NOTOWNED);
 #endif
 	/* thread_lock() performs spinlock_enter(). */
 	KASSERT(td->td_critnest == 1 || KERNEL_PANICKED(),
 	    ("mi_switch: switch in a critical section"));
 	KASSERT((flags & (SW_INVOL | SW_VOL)) != 0,
 	    ("mi_switch: switch must be voluntary or involuntary"));
 	KASSERT((flags & SW_TYPE_MASK) != 0,
 	    ("mi_switch: a switch reason (type) must be specified"));
 	KASSERT((flags & SW_TYPE_MASK) < SWT_COUNT,
 	    ("mi_switch: invalid switch reason %d", (flags & SW_TYPE_MASK)));
 
 	/*
 	 * Don't perform context switches from the debugger.
 	 */
 	if (kdb_active)
 		kdb_switch();
-	if (SCHEDULER_STOPPED_TD(td))
+	if (SCHEDULER_STOPPED())
 		return;
 	if (flags & SW_VOL) {
 		td->td_ru.ru_nvcsw++;
 		td->td_swvoltick = ticks;
 	} else {
 		td->td_ru.ru_nivcsw++;
 		td->td_swinvoltick = ticks;
 	}
 #ifdef SCHED_STATS
 	SCHED_STAT_INC(sched_switch_stats[flags & SW_TYPE_MASK]);
 #endif
 	/*
 	 * Compute the amount of time during which the current
 	 * thread was running, and add that to its total so far.
 	 */
 	new_switchtime = cpu_ticks();
 	runtime = new_switchtime - PCPU_GET(switchtime);
 	td->td_runtime += runtime;
 	td->td_incruntime += runtime;
 	PCPU_SET(switchtime, new_switchtime);
 	td->td_generation++;	/* bump preempt-detect counter */
 	VM_CNT_INC(v_swtch);
 	PCPU_SET(switchticks, ticks);
 	CTR4(KTR_PROC, "mi_switch: old thread %ld (td_sched %p, pid %ld, %s)",
 	    td->td_tid, td_get_sched(td), td->td_proc->p_pid, td->td_name);
 #ifdef KDTRACE_HOOKS
 	if (SDT_PROBES_ENABLED() &&
 	    ((flags & SW_PREEMPT) != 0 || ((flags & SW_INVOL) != 0 &&
 	    (flags & SW_TYPE_MASK) == SWT_NEEDRESCHED)))
 		SDT_PROBE0(sched, , , preempt);
 #endif
 	sched_switch(td, flags);
 	CTR4(KTR_PROC, "mi_switch: new thread %ld (td_sched %p, pid %ld, %s)",
 	    td->td_tid, td_get_sched(td), td->td_proc->p_pid, td->td_name);
 
 	/* 
 	 * If the last thread was exiting, finish cleaning it up.
 	 */
 	if ((td = PCPU_GET(deadthread))) {
 		PCPU_SET(deadthread, NULL);
 		thread_stash(td);
 	}
 	spinlock_exit();
 }
 
 /*
  * Change thread state to be runnable, placing it on the run queue if
  * it is in memory.  If it is swapped out, return true so our caller
  * will know to awaken the swapper.
  *
  * Requires the thread lock on entry, drops on exit.
  */
 int
 setrunnable(struct thread *td, int srqflags)
 {
 	int swapin;
 
 	THREAD_LOCK_ASSERT(td, MA_OWNED);
 	KASSERT(td->td_proc->p_state != PRS_ZOMBIE,
 	    ("setrunnable: pid %d is a zombie", td->td_proc->p_pid));
 
 	swapin = 0;
 	switch (TD_GET_STATE(td)) {
 	case TDS_RUNNING:
 	case TDS_RUNQ:
 		break;
 	case TDS_CAN_RUN:
 		KASSERT((td->td_flags & TDF_INMEM) != 0,
 		    ("setrunnable: td %p not in mem, flags 0x%X inhibit 0x%X",
 		    td, td->td_flags, td->td_inhibitors));
 		/* unlocks thread lock according to flags */
 		sched_wakeup(td, srqflags);
 		return (0);
 	case TDS_INHIBITED:
 		/*
 		 * If we are only inhibited because we are swapped out
 		 * arrange to swap in this process.
 		 */
 		if (td->td_inhibitors == TDI_SWAPPED &&
 		    (td->td_flags & TDF_SWAPINREQ) == 0) {
 			td->td_flags |= TDF_SWAPINREQ;
 			swapin = 1;
 		}
 		break;
 	default:
 		panic("setrunnable: state 0x%x", TD_GET_STATE(td));
 	}
 	if ((srqflags & (SRQ_HOLD | SRQ_HOLDTD)) == 0)
 		thread_unlock(td);
 
 	return (swapin);
 }
 
 /*
  * Compute a tenex style load average of a quantity on
  * 1, 5 and 15 minute intervals.
  */
 static void
 loadav(void *arg)
 {
 	int i;
 	uint64_t nrun;
 	struct loadavg *avg;
 
 	nrun = (uint64_t)sched_load();
 	avg = &averunnable;
 
 	for (i = 0; i < 3; i++)
 		avg->ldavg[i] = (cexp[i] * (uint64_t)avg->ldavg[i] +
 		    nrun * FSCALE * (FSCALE - cexp[i])) >> FSHIFT;
 
 	/*
 	 * Schedule the next update to occur after 5 seconds, but add a
 	 * random variation to avoid synchronisation with processes that
 	 * run at regular intervals.
 	 */
 	callout_reset_sbt(&loadav_callout,
 	    SBT_1US * (4000000 + (int)(random() % 2000001)), SBT_1US,
 	    loadav, NULL, C_DIRECT_EXEC | C_PREL(32));
 }
 
 static void
 ast_scheduler(struct thread *td, int tda __unused)
 {
 #ifdef KTRACE
 	if (KTRPOINT(td, KTR_CSW))
 		ktrcsw(1, 1, __func__);
 #endif
 	thread_lock(td);
 	sched_prio(td, td->td_user_pri);
 	mi_switch(SW_INVOL | SWT_NEEDRESCHED);
 #ifdef KTRACE
 	if (KTRPOINT(td, KTR_CSW))
 		ktrcsw(0, 1, __func__);
 #endif
 }
 
 static void
 synch_setup(void *dummy __unused)
 {
 	callout_init(&loadav_callout, 1);
 	ast_register(TDA_SCHED, ASTR_ASTF_REQUIRED, 0, ast_scheduler);
 
 	/* Kick off timeout driven events by calling first time. */
 	loadav(NULL);
 }
 
 bool
 should_yield(void)
 {
 
 	return ((u_int)ticks - (u_int)curthread->td_swvoltick >= hogticks);
 }
 
 void
 maybe_yield(void)
 {
 
 	if (should_yield())
 		kern_yield(PRI_USER);
 }
 
 void
 kern_yield(int prio)
 {
 	struct thread *td;
 
 	td = curthread;
 	DROP_GIANT();
 	thread_lock(td);
 	if (prio == PRI_USER)
 		prio = td->td_user_pri;
 	if (prio >= 0)
 		sched_prio(td, prio);
 	mi_switch(SW_VOL | SWT_RELINQUISH);
 	PICKUP_GIANT();
 }
 
 /*
  * General purpose yield system call.
  */
 int
 sys_yield(struct thread *td, struct yield_args *uap)
 {
 
 	thread_lock(td);
 	if (PRI_BASE(td->td_pri_class) == PRI_TIMESHARE)
 		sched_prio(td, PRI_MAX_TIMESHARE);
 	mi_switch(SW_VOL | SWT_RELINQUISH);
 	td->td_retval[0] = 0;
 	return (0);
 }
 
 int
 sys_sched_getcpu(struct thread *td, struct sched_getcpu_args *uap)
 {
 	td->td_retval[0] = td->td_oncpu;
 	return (0);
 }
diff --git a/sys/kern/subr_kdb.c b/sys/kern/subr_kdb.c
index 86f392485a4b..a7fc2284cbcf 100644
--- a/sys/kern/subr_kdb.c
+++ b/sys/kern/subr_kdb.c
@@ -1,814 +1,814 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause
  *
  * Copyright (c) 2004 The FreeBSD Project
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  *
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 #include "opt_kdb.h"
 #include "opt_stack.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/cons.h>
 #include <sys/kdb.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/lock.h>
 #include <sys/pcpu.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/sbuf.h>
 #include <sys/smp.h>
 #include <sys/stack.h>
 #include <sys/sysctl.h>
 #include <sys/tslog.h>
 
 #include <machine/kdb.h>
 #include <machine/pcb.h>
 
 #ifdef SMP
 #include <machine/smp.h>
 #endif
 
 #include <security/mac/mac_framework.h>
 
 u_char __read_frequently kdb_active = 0;
 static void *kdb_jmpbufp = NULL;
 struct kdb_dbbe *kdb_dbbe = NULL;
 static struct pcb kdb_pcb;
 struct pcb *kdb_thrctx = NULL;
 struct thread *kdb_thread = NULL;
 struct trapframe *kdb_frame = NULL;
 
 #ifdef BREAK_TO_DEBUGGER
 #define	KDB_BREAK_TO_DEBUGGER	1
 #else
 #define	KDB_BREAK_TO_DEBUGGER	0
 #endif
 
 #ifdef ALT_BREAK_TO_DEBUGGER
 #define	KDB_ALT_BREAK_TO_DEBUGGER	1
 #else
 #define	KDB_ALT_BREAK_TO_DEBUGGER	0
 #endif
 
 static int	kdb_break_to_debugger = KDB_BREAK_TO_DEBUGGER;
 static int	kdb_alt_break_to_debugger = KDB_ALT_BREAK_TO_DEBUGGER;
 static int	kdb_enter_securelevel = 0;
 
 KDB_BACKEND(null, NULL, NULL, NULL, NULL);
 
 static int kdb_sysctl_available(SYSCTL_HANDLER_ARGS);
 static int kdb_sysctl_current(SYSCTL_HANDLER_ARGS);
 static int kdb_sysctl_enter(SYSCTL_HANDLER_ARGS);
 static int kdb_sysctl_panic(SYSCTL_HANDLER_ARGS);
 static int kdb_sysctl_panic_str(SYSCTL_HANDLER_ARGS);
 static int kdb_sysctl_trap(SYSCTL_HANDLER_ARGS);
 static int kdb_sysctl_trap_code(SYSCTL_HANDLER_ARGS);
 static int kdb_sysctl_stack_overflow(SYSCTL_HANDLER_ARGS);
 
 static SYSCTL_NODE(_debug, OID_AUTO, kdb, CTLFLAG_RW | CTLFLAG_MPSAFE, NULL,
     "KDB nodes");
 
 SYSCTL_PROC(_debug_kdb, OID_AUTO, available,
     CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0,
     kdb_sysctl_available, "A",
     "list of available KDB backends");
 
 SYSCTL_PROC(_debug_kdb, OID_AUTO, current,
     CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, 0,
     kdb_sysctl_current, "A",
     "currently selected KDB backend");
 
 SYSCTL_PROC(_debug_kdb, OID_AUTO, enter,
     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, 0,
     kdb_sysctl_enter, "I",
     "set to enter the debugger");
 
 SYSCTL_PROC(_debug_kdb, OID_AUTO, panic,
     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_SECURE | CTLFLAG_MPSAFE, NULL, 0,
     kdb_sysctl_panic, "I",
     "set to panic the kernel");
 
 SYSCTL_PROC(_debug_kdb, OID_AUTO, panic_str,
     CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_SECURE | CTLFLAG_MPSAFE, NULL, 0,
     kdb_sysctl_panic_str, "A",
     "trigger a kernel panic, using the provided string as the panic message");
 
 SYSCTL_PROC(_debug_kdb, OID_AUTO, trap,
     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_SECURE | CTLFLAG_MPSAFE, NULL, 0,
     kdb_sysctl_trap, "I",
     "set to cause a page fault via data access");
 
 SYSCTL_PROC(_debug_kdb, OID_AUTO, trap_code,
     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_SECURE | CTLFLAG_MPSAFE, NULL, 0,
     kdb_sysctl_trap_code, "I",
     "set to cause a page fault via code access");
 
 SYSCTL_PROC(_debug_kdb, OID_AUTO, stack_overflow,
     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_SECURE | CTLFLAG_MPSAFE, NULL, 0,
     kdb_sysctl_stack_overflow, "I",
     "set to cause a stack overflow");
 
 SYSCTL_INT(_debug_kdb, OID_AUTO, break_to_debugger,
     CTLFLAG_RWTUN,
     &kdb_break_to_debugger, 0, "Enable break to debugger");
 
 SYSCTL_INT(_debug_kdb, OID_AUTO, alt_break_to_debugger,
     CTLFLAG_RWTUN,
     &kdb_alt_break_to_debugger, 0, "Enable alternative break to debugger");
 
 SYSCTL_INT(_debug_kdb, OID_AUTO, enter_securelevel,
     CTLFLAG_RWTUN | CTLFLAG_SECURE,
     &kdb_enter_securelevel, 0,
     "Maximum securelevel to enter a KDB backend");
 
 /*
  * Flag to indicate to debuggers why the debugger was entered.
  */
 const char * volatile kdb_why = KDB_WHY_UNSET;
 
 static int
 kdb_sysctl_available(SYSCTL_HANDLER_ARGS)
 {
 	struct kdb_dbbe **iter;
 	struct sbuf sbuf;
 	int error;
 
 	sbuf_new_for_sysctl(&sbuf, NULL, 64, req);
 	SET_FOREACH(iter, kdb_dbbe_set) {
 		if ((*iter)->dbbe_active == 0)
 			sbuf_printf(&sbuf, "%s ", (*iter)->dbbe_name);
 	}
 	error = sbuf_finish(&sbuf);
 	sbuf_delete(&sbuf);
 	return (error);
 }
 
 static int
 kdb_sysctl_current(SYSCTL_HANDLER_ARGS)
 {
 	char buf[16];
 	int error;
 
 	if (kdb_dbbe != NULL)
 		strlcpy(buf, kdb_dbbe->dbbe_name, sizeof(buf));
 	else
 		*buf = '\0';
 	error = sysctl_handle_string(oidp, buf, sizeof(buf), req);
 	if (error != 0 || req->newptr == NULL)
 		return (error);
 	if (kdb_active)
 		return (EBUSY);
 	return (kdb_dbbe_select(buf));
 }
 
 static int
 kdb_sysctl_enter(SYSCTL_HANDLER_ARGS)
 {
 	int error, i;
 
 	error = sysctl_wire_old_buffer(req, sizeof(int));
 	if (error == 0) {
 		i = 0;
 		error = sysctl_handle_int(oidp, &i, 0, req);
 	}
 	if (error != 0 || req->newptr == NULL)
 		return (error);
 	if (kdb_active)
 		return (EBUSY);
 	kdb_enter(KDB_WHY_SYSCTL, "sysctl debug.kdb.enter");
 	return (0);
 }
 
 static int
 kdb_sysctl_panic(SYSCTL_HANDLER_ARGS)
 {
 	int error, i;
 
 	error = sysctl_wire_old_buffer(req, sizeof(int));
 	if (error == 0) {
 		i = 0;
 		error = sysctl_handle_int(oidp, &i, 0, req);
 	}
 	if (error != 0 || req->newptr == NULL)
 		return (error);
 	panic("kdb_sysctl_panic");
 	return (0);
 }
 
 static int
 kdb_sysctl_panic_str(SYSCTL_HANDLER_ARGS)
 {
 	int error;
 	static char buf[256]; /* static buffer to limit mallocs when panicing */
 
 	*buf = '\0';
 	error = sysctl_handle_string(oidp, buf, sizeof(buf), req);
 	if (error != 0 || req->newptr == NULL)
 		return (error);
 	panic("kdb_sysctl_panic: %s", buf);
 	return (0);
 }
 
 static int
 kdb_sysctl_trap(SYSCTL_HANDLER_ARGS)
 {
 	int error, i;
 	int *addr = (int *)0x10;
 
 	error = sysctl_wire_old_buffer(req, sizeof(int));
 	if (error == 0) {
 		i = 0;
 		error = sysctl_handle_int(oidp, &i, 0, req);
 	}
 	if (error != 0 || req->newptr == NULL)
 		return (error);
 	return (*addr);
 }
 
 static int
 kdb_sysctl_trap_code(SYSCTL_HANDLER_ARGS)
 {
 	int error, i;
 	void (*fp)(u_int, u_int, u_int) = (void *)0xdeadc0de;
 
 	error = sysctl_wire_old_buffer(req, sizeof(int));
 	if (error == 0) {
 		i = 0;
 		error = sysctl_handle_int(oidp, &i, 0, req);
 	}
 	if (error != 0 || req->newptr == NULL)
 		return (error);
 	(*fp)(0x11111111, 0x22222222, 0x33333333);
 	return (0);
 }
 
 static void kdb_stack_overflow(volatile int *x)  __noinline;
 static void
 kdb_stack_overflow(volatile int *x)
 {
 
 	if (*x > 10000000)
 		return;
 	kdb_stack_overflow(x);
 	*x += PCPU_GET(cpuid) / 1000000;
 }
 
 static int
 kdb_sysctl_stack_overflow(SYSCTL_HANDLER_ARGS)
 {
 	int error, i;
 	volatile int x;
 
 	error = sysctl_wire_old_buffer(req, sizeof(int));
 	if (error == 0) {
 		i = 0;
 		error = sysctl_handle_int(oidp, &i, 0, req);
 	}
 	if (error != 0 || req->newptr == NULL)
 		return (error);
 	x = 0;
 	kdb_stack_overflow(&x);
 	return (0);
 }
 
 void
 kdb_panic(const char *msg)
 {
 
 	kdb_why = KDB_WHY_PANIC;
 	printf("KDB: panic\n");
 	panic("%s", msg);
 }
 
 void
 kdb_reboot(void)
 {
 
 	kdb_why = KDB_WHY_REBOOT;
 	printf("KDB: reboot requested\n");
 	shutdown_nice(0);
 }
 
 /*
  * Solaris implements a new BREAK which is initiated by a character sequence
  * CR ~ ^b which is similar to a familiar pattern used on Sun servers by the
  * Remote Console.
  *
  * Note that this function may be called from almost anywhere, with interrupts
  * disabled and with unknown locks held, so it must not access data other than
  * its arguments.  Its up to the caller to ensure that the state variable is
  * consistent.
  */
 #define	KEY_CR		13	/* CR '\r' */
 #define	KEY_TILDE	126	/* ~ */
 #define	KEY_CRTLB	2	/* ^B */
 #define	KEY_CRTLP	16	/* ^P */
 #define	KEY_CRTLR	18	/* ^R */
 
 /* States of th KDB "alternate break sequence" detecting state machine. */
 enum {
 	KDB_ALT_BREAK_SEEN_NONE,
 	KDB_ALT_BREAK_SEEN_CR,
 	KDB_ALT_BREAK_SEEN_CR_TILDE,
 };
 
 int
 kdb_break(void)
 {
 
 	if (!kdb_break_to_debugger)
 		return (0);
 	kdb_enter(KDB_WHY_BREAK, "Break to debugger");
 	return (KDB_REQ_DEBUGGER);
 }
 
 static int
 kdb_alt_break_state(int key, int *state)
 {
 	int brk;
 
 	/* All states transition to KDB_ALT_BREAK_SEEN_CR on a CR. */
 	if (key == KEY_CR) {
 		*state = KDB_ALT_BREAK_SEEN_CR;
 		return (0);
 	}
 
 	brk = 0;
 	switch (*state) {
 	case KDB_ALT_BREAK_SEEN_CR:
 		*state = KDB_ALT_BREAK_SEEN_NONE;
 		if (key == KEY_TILDE)
 			*state = KDB_ALT_BREAK_SEEN_CR_TILDE;
 		break;
 	case KDB_ALT_BREAK_SEEN_CR_TILDE:
 		*state = KDB_ALT_BREAK_SEEN_NONE;
 		if (key == KEY_CRTLB)
 			brk = KDB_REQ_DEBUGGER;
 		else if (key == KEY_CRTLP)
 			brk = KDB_REQ_PANIC;
 		else if (key == KEY_CRTLR)
 			brk = KDB_REQ_REBOOT;
 		break;
 	case KDB_ALT_BREAK_SEEN_NONE:
 	default:
 		*state = KDB_ALT_BREAK_SEEN_NONE;
 		break;
 	}
 	return (brk);
 }
 
 static int
 kdb_alt_break_internal(int key, int *state, int force_gdb)
 {
 	int brk;
 
 	if (!kdb_alt_break_to_debugger)
 		return (0);
 	brk = kdb_alt_break_state(key, state);
 	switch (brk) {
 	case KDB_REQ_DEBUGGER:
 		if (force_gdb)
 			kdb_dbbe_select("gdb");
 		kdb_enter(KDB_WHY_BREAK, "Break to debugger");
 		break;
 
 	case KDB_REQ_PANIC:
 		if (force_gdb)
 			kdb_dbbe_select("gdb");
 		kdb_panic("Panic sequence on console");
 		break;
 
 	case KDB_REQ_REBOOT:
 		kdb_reboot();
 		break;
 	}
 	return (0);
 }
 
 int
 kdb_alt_break(int key, int *state)
 {
 
 	return (kdb_alt_break_internal(key, state, 0));
 }
 
 /*
  * This variation on kdb_alt_break() is used only by dcons, which has its own
  * configuration flag to force GDB use regardless of the global KDB
  * configuration.
  */
 int
 kdb_alt_break_gdb(int key, int *state)
 {
 
 	return (kdb_alt_break_internal(key, state, 1));
 }
 
 /*
  * Print a backtrace of the calling thread. The backtrace is generated by
  * the selected debugger, provided it supports backtraces. If no debugger
  * is selected or the current debugger does not support backtraces, this
  * function silently returns.
  */
 void
 kdb_backtrace(void)
 {
 
 	if (kdb_dbbe != NULL && kdb_dbbe->dbbe_trace != NULL) {
 		printf("KDB: stack backtrace:\n");
 		kdb_dbbe->dbbe_trace();
 	}
 #ifdef STACK
 	else {
 		struct stack st;
 
 		printf("KDB: stack backtrace:\n");
 		stack_save(&st);
 		stack_print_ddb(&st);
 	}
 #endif
 }
 
 /*
  * Similar to kdb_backtrace() except that it prints a backtrace of an
  * arbitrary thread rather than the calling thread.
  */
 void
 kdb_backtrace_thread(struct thread *td)
 {
 
 	if (kdb_dbbe != NULL && kdb_dbbe->dbbe_trace_thread != NULL) {
 		printf("KDB: stack backtrace of thread %d:\n", td->td_tid);
 		kdb_dbbe->dbbe_trace_thread(td);
 	}
 #ifdef STACK
 	else {
 		struct stack st;
 
 		printf("KDB: stack backtrace of thread %d:\n", td->td_tid);
 		if (stack_save_td(&st, td) == 0)
 			stack_print_ddb(&st);
 	}
 #endif
 }
 
 /*
  * Set/change the current backend.
  */
 int
 kdb_dbbe_select(const char *name)
 {
 	struct kdb_dbbe *be, **iter;
 	int error;
 
 	error = priv_check(curthread, PRIV_KDB_SET_BACKEND);
 	if (error)
 		return (error);
 
 	SET_FOREACH(iter, kdb_dbbe_set) {
 		be = *iter;
 		if (be->dbbe_active == 0 && strcmp(be->dbbe_name, name) == 0) {
 			kdb_dbbe = be;
 			return (0);
 		}
 	}
 	return (EINVAL);
 }
 
 static bool
 kdb_backend_permitted(struct kdb_dbbe *be, struct thread *td)
 {
 	struct ucred *cred;
 	int error;
 
 	cred = td->td_ucred;
 	if (cred == NULL) {
 		KASSERT(td == &thread0 && cold,
 		    ("%s: missing cred for %p", __func__, td));
 		error = 0;
 	} else {
 		error = securelevel_gt(cred, kdb_enter_securelevel);
 	}
 #ifdef MAC
 	/*
 	 * Give MAC a chance to weigh in on the policy: if the securelevel is
 	 * not raised, then MAC may veto the backend, otherwise MAC may
 	 * explicitly grant access.
 	 */
 	if (error == 0) {
 		error = mac_kdb_check_backend(be);
 		if (error != 0) {
 			printf("MAC prevented execution of KDB backend: %s\n",
 			    be->dbbe_name);
 			return (false);
 		}
 	} else if (mac_kdb_grant_backend(be) == 0) {
 		error = 0;
 	}
 #endif
 	if (error != 0)
 		printf("refusing to enter KDB with elevated securelevel\n");
 	return (error == 0);
 }
 
 /*
  * Enter the currently selected debugger. If a message has been provided,
  * it is printed first. If the debugger does not support the enter method,
  * it is entered by using breakpoint(), which enters the debugger through
  * kdb_trap().  The 'why' argument will contain a more mechanically usable
  * string than 'msg', and is relied upon by DDB scripting to identify the
  * reason for entering the debugger so that the right script can be run.
  */
 void
 kdb_enter(const char *why, const char *msg)
 {
 
 	if (kdb_dbbe != NULL && kdb_active == 0) {
 		kdb_why = why;
 		if (msg != NULL)
 			printf("KDB: enter: %s\n", msg);
 		breakpoint();
 		kdb_why = KDB_WHY_UNSET;
 	}
 }
 
 /*
  * Initialize the kernel debugger interface.
  */
 void
 kdb_init(void)
 {
 	struct kdb_dbbe *be, **iter;
 	int cur_pri, pri;
 
 	TSENTER();
 	kdb_active = 0;
 	kdb_dbbe = NULL;
 	cur_pri = -1;
 	SET_FOREACH(iter, kdb_dbbe_set) {
 		be = *iter;
 		pri = (be->dbbe_init != NULL) ? be->dbbe_init() : -1;
 		be->dbbe_active = (pri >= 0) ? 0 : -1;
 		if (pri > cur_pri) {
 			cur_pri = pri;
 			kdb_dbbe = be;
 		}
 	}
 	if (kdb_dbbe != NULL) {
 		printf("KDB: debugger backends:");
 		SET_FOREACH(iter, kdb_dbbe_set) {
 			be = *iter;
 			if (be->dbbe_active == 0)
 				printf(" %s", be->dbbe_name);
 		}
 		printf("\n");
 		printf("KDB: current backend: %s\n",
 		    kdb_dbbe->dbbe_name);
 	}
 	TSEXIT();
 }
 
 /*
  * Handle contexts.
  */
 void *
 kdb_jmpbuf(jmp_buf new)
 {
 	void *old;
 
 	old = kdb_jmpbufp;
 	kdb_jmpbufp = new;
 	return (old);
 }
 
 void
 kdb_reenter(void)
 {
 
 	if (!kdb_active || kdb_jmpbufp == NULL)
 		return;
 
 	printf("KDB: reentering\n");
 	kdb_backtrace();
 	longjmp(kdb_jmpbufp, 1);
 	/* NOTREACHED */
 }
 
 void
 kdb_reenter_silent(void)
 {
 
 	if (!kdb_active || kdb_jmpbufp == NULL)
 		return;
 
 	longjmp(kdb_jmpbufp, 1);
 	/* NOTREACHED */
 }
 
 /*
  * Thread-related support functions.
  */
 struct pcb *
 kdb_thr_ctx(struct thread *thr)
 {
 #ifdef SMP
 	struct pcpu *pc;
 #endif
 
 	if (thr == curthread)
 		return (&kdb_pcb);
 
 #ifdef SMP
 	STAILQ_FOREACH(pc, &cpuhead, pc_allcpu)  {
 		if (pc->pc_curthread == thr &&
 		    CPU_ISSET(pc->pc_cpuid, &stopped_cpus))
 			return (&stoppcbs[pc->pc_cpuid]);
 	}
 #endif
 	return (thr->td_pcb);
 }
 
 struct thread *
 kdb_thr_first(void)
 {
 	struct proc *p;
 	struct thread *thr;
 	u_int i;
 
 	/* This function may be called early. */
 	if (pidhashtbl == NULL)
 		return (&thread0);
 
 	for (i = 0; i <= pidhash; i++) {
 		LIST_FOREACH(p, &pidhashtbl[i], p_hash) {
 			thr = FIRST_THREAD_IN_PROC(p);
 			if (thr != NULL)
 				return (thr);
 		}
 	}
 	return (NULL);
 }
 
 struct thread *
 kdb_thr_from_pid(pid_t pid)
 {
 	struct proc *p;
 
 	LIST_FOREACH(p, PIDHASH(pid), p_hash) {
 		if (p->p_pid == pid)
 			return (FIRST_THREAD_IN_PROC(p));
 	}
 	return (NULL);
 }
 
 struct thread *
 kdb_thr_lookup(lwpid_t tid)
 {
 	struct thread *thr;
 
 	thr = kdb_thr_first();
 	while (thr != NULL && thr->td_tid != tid)
 		thr = kdb_thr_next(thr);
 	return (thr);
 }
 
 struct thread *
 kdb_thr_next(struct thread *thr)
 {
 	struct proc *p;
 	u_int hash;
 
 	p = thr->td_proc;
 	thr = TAILQ_NEXT(thr, td_plist);
 	if (thr != NULL)
 		return (thr);
 	if (pidhashtbl == NULL)
 		return (NULL);
 	hash = p->p_pid & pidhash;
 	for (;;) {
 		p = LIST_NEXT(p, p_hash);
 		while (p == NULL) {
 			if (++hash > pidhash)
 				return (NULL);
 			p = LIST_FIRST(&pidhashtbl[hash]);
 		}
 		thr = FIRST_THREAD_IN_PROC(p);
 		if (thr != NULL)
 			return (thr);
 	}
 }
 
 int
 kdb_thr_select(struct thread *thr)
 {
 	if (thr == NULL)
 		return (EINVAL);
 	kdb_thread = thr;
 	kdb_thrctx = kdb_thr_ctx(thr);
 	return (0);
 }
 
 /*
  * Enter the debugger due to a trap.
  */
 int
 kdb_trap(int type, int code, struct trapframe *tf)
 {
 #ifdef SMP
 	cpuset_t other_cpus;
 #endif
 	struct kdb_dbbe *be;
 	register_t intr;
 	int handled;
 	int did_stop_cpus;
 
 	be = kdb_dbbe;
 	if (be == NULL || be->dbbe_trap == NULL)
 		return (0);
 
 	/* We reenter the debugger through kdb_reenter(). */
 	if (kdb_active)
 		return (0);
 
 	intr = intr_disable();
 
 	if (!SCHEDULER_STOPPED()) {
 #ifdef SMP
 		other_cpus = all_cpus;
 		CPU_ANDNOT(&other_cpus, &other_cpus, &stopped_cpus);
 		CPU_CLR(PCPU_GET(cpuid), &other_cpus);
 		stop_cpus_hard(other_cpus);
 #endif
-		curthread->td_stopsched = 1;
+		scheduler_stopped = true;
 		did_stop_cpus = 1;
 	} else
 		did_stop_cpus = 0;
 
 	kdb_active++;
 
 	kdb_frame = tf;
 
 	/* Let MD code do its thing first... */
 	kdb_cpu_trap(type, code);
 
 	makectx(tf, &kdb_pcb);
 	kdb_thr_select(curthread);
 
 	cngrab();
 
 	for (;;) {
 		if (!kdb_backend_permitted(be, curthread)) {
 			/* Unhandled breakpoint traps are fatal. */
 			handled = 1;
 			break;
 		}
 		handled = be->dbbe_trap(type, code);
 		if (be == kdb_dbbe)
 			break;
 		be = kdb_dbbe;
 		if (be == NULL || be->dbbe_trap == NULL)
 			break;
 		printf("Switching to %s back-end\n", be->dbbe_name);
 	}
 
 	cnungrab();
 
 	kdb_active--;
 
 	if (did_stop_cpus) {
-		curthread->td_stopsched = 0;
+		scheduler_stopped = false;
 #ifdef SMP
 		CPU_AND(&other_cpus, &other_cpus, &stopped_cpus);
 		restart_cpus(other_cpus);
 #endif
 	}
 
 	intr_restore(intr);
 
 	return (handled);
 }
diff --git a/sys/sys/proc.h b/sys/sys/proc.h
index 33a878dc46aa..b08226c89dfd 100644
--- a/sys/sys/proc.h
+++ b/sys/sys/proc.h
@@ -1,1372 +1,1372 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1986, 1989, 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  * (c) UNIX System Laboratories, Inc.
  * All or some portions of this file are derived from material licensed
  * to the University of California by American Telephone and Telegraph
  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  * the permission of UNIX System Laboratories, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #ifndef _SYS_PROC_H_
 #define	_SYS_PROC_H_
 
 #include <sys/callout.h>		/* For struct callout. */
 #include <sys/event.h>			/* For struct klist. */
 #ifdef _KERNEL
 #include <sys/_eventhandler.h>
 #endif
 #include <sys/condvar.h>
 #ifndef _KERNEL
 #include <sys/filedesc.h>
 #endif
 #include <sys/queue.h>
 #include <sys/_lock.h>
 #include <sys/lock_profile.h>
 #include <sys/_mutex.h>
 #include <sys/osd.h>
 #include <sys/priority.h>
 #include <sys/rtprio.h>			/* XXX. */
 #include <sys/runq.h>
 #include <sys/resource.h>
 #include <sys/sigio.h>
 #include <sys/signal.h>
 #include <sys/signalvar.h>
 #ifndef _KERNEL
 #include <sys/time.h>			/* For structs itimerval, timeval. */
 #else
 #include <sys/pcpu.h>
 #include <sys/systm.h>
 #endif
 #include <sys/ucontext.h>
 #include <sys/ucred.h>
 #include <sys/types.h>
 #include <sys/_domainset.h>
 
 #include <machine/proc.h>		/* Machine-dependent proc substruct. */
 #ifdef _KERNEL
 #include <machine/cpu.h>
 #endif
 
 /*
  * One structure allocated per session.
  *
  * List of locks
  * (m)		locked by s_mtx mtx
  * (e)		locked by proctree_lock sx
  * (c)		const until freeing
  */
 struct session {
 	u_int		s_count;	/* Ref cnt; pgrps in session - atomic. */
 	struct proc	*s_leader;	/* (m + e) Session leader. */
 	struct vnode	*s_ttyvp;	/* (m) Vnode of controlling tty. */
 	struct cdev_priv *s_ttydp;	/* (m) Device of controlling tty.  */
 	struct tty	*s_ttyp;	/* (e) Controlling tty. */
 	pid_t		s_sid;		/* (c) Session ID. */
 					/* (m) Setlogin() name: */
 	char		s_login[roundup(MAXLOGNAME, sizeof(long))];
 	struct mtx	s_mtx;		/* Mutex to protect members. */
 };
 
 /*
  * One structure allocated per process group.
  *
  * List of locks
  * (m)		locked by pg_mtx mtx
  * (e)		locked by proctree_lock sx
  * (c)		const until freeing
  */
 struct pgrp {
 	LIST_ENTRY(pgrp) pg_hash;	/* (e) Hash chain. */
 	LIST_HEAD(, proc) pg_members;	/* (m + e) Pointer to pgrp members. */
 	struct session	*pg_session;	/* (c) Pointer to session. */
 	struct sigiolst	pg_sigiolst;	/* (m) List of sigio sources. */
 	pid_t		pg_id;		/* (c) Process group id. */
 	struct mtx	pg_mtx;		/* Mutex to protect members */
 	int		pg_flags;	/* (m) PGRP_ flags */
 	struct sx	pg_killsx;	/* Mutual exclusion between group member
 					 * fork() and killpg() */
 };
 
 #define	PGRP_ORPHANED	0x00000001	/* Group is orphaned */
 
 /*
  * pargs, used to hold a copy of the command line, if it had a sane length.
  */
 struct pargs {
 	u_int	ar_ref;		/* Reference count. */
 	u_int	ar_length;	/* Length. */
 	u_char	ar_args[1];	/* Arguments. */
 };
 
 /*-
  * Description of a process.
  *
  * This structure contains the information needed to manage a thread of
  * control, known in UN*X as a process; it has references to substructures
  * containing descriptions of things that the process uses, but may share
  * with related processes.  The process structure and the substructures
  * are always addressable except for those marked "(CPU)" below,
  * which might be addressable only on a processor on which the process
  * is running.
  *
  * Below is a key of locks used to protect each member of struct proc.  The
  * lock is indicated by a reference to a specific character in parens in the
  * associated comment.
  *      * - not yet protected
  *      a - only touched by curproc or parent during fork/wait
  *      b - created at fork, never changes
  *		(exception aiods switch vmspaces, but they are also
  *		marked 'P_SYSTEM' so hopefully it will be left alone)
  *      c - locked by proc mtx
  *      d - locked by allproc_lock lock
  *      e - locked by proctree_lock lock
  *      f - session mtx
  *      g - process group mtx
  *      h - callout_lock mtx
  *      i - by curproc or the master session mtx
  *      j - locked by proc slock
  *      k - only accessed by curthread
  *	k*- only accessed by curthread and from an interrupt
  *	kx- only accessed by curthread and by debugger
  *      l - the attaching proc or attaching proc parent
  *      n - not locked, lazy
  *      o - ktrace lock
  *      q - td_contested lock
  *      r - p_peers lock
  *      s - see sleepq_switch(), sleeping_on_old_rtc(), and sleep(9)
  *      t - thread lock
  *	u - process stat lock
  *	w - process timer lock
  *      x - created at fork, only changes during single threading in exec
  *      y - created at first aio, doesn't change until exit or exec at which
  *          point we are single-threaded and only curthread changes it
  *
  * If the locking key specifies two identifiers (for example, p_pptr) then
  * either lock is sufficient for read access, but both locks must be held
  * for write access.
  */
 struct cpuset;
 struct filecaps;
 struct filemon;
 struct kaioinfo;
 struct kaudit_record;
 struct kcov_info;
 struct kdtrace_proc;
 struct kdtrace_thread;
 struct kmsan_td;
 struct kq_timer_cb_data;
 struct mqueue_notifier;
 struct p_sched;
 struct proc;
 struct procdesc;
 struct racct;
 struct sbuf;
 struct sleepqueue;
 struct socket;
 struct td_sched;
 struct thread;
 struct trapframe;
 struct turnstile;
 struct vm_map;
 struct vm_map_entry;
 struct epoch_tracker;
 
 struct syscall_args {
 	u_int code;
 	u_int original_code;
 	struct sysent *callp;
 	register_t args[8];
 };
 
 /*
  * XXX: Does this belong in resource.h or resourcevar.h instead?
  * Resource usage extension.  The times in rusage structs in the kernel are
  * never up to date.  The actual times are kept as runtimes and tick counts
  * (with control info in the "previous" times), and are converted when
  * userland asks for rusage info.  Backwards compatibility prevents putting
  * this directly in the user-visible rusage struct.
  *
  * Locking for p_rux: (cu) means (u) for p_rux and (c) for p_crux.
  * Locking for td_rux: (t) for all fields.
  */
 struct rusage_ext {
 	uint64_t	rux_runtime;    /* (cu) Real time. */
 	uint64_t	rux_uticks;     /* (cu) Statclock hits in user mode. */
 	uint64_t	rux_sticks;     /* (cu) Statclock hits in sys mode. */
 	uint64_t	rux_iticks;     /* (cu) Statclock hits in intr mode. */
 	uint64_t	rux_uu;         /* (c) Previous user time in usec. */
 	uint64_t	rux_su;         /* (c) Previous sys time in usec. */
 	uint64_t	rux_tu;         /* (c) Previous total time in usec. */
 };
 
 /*
  * Kernel runnable context (thread).
  * This is what is put to sleep and reactivated.
  * Thread context.  Processes may have multiple threads.
  */
 struct thread {
 	struct mtx	*volatile td_lock; /* replaces sched lock */
 	struct proc	*td_proc;	/* (*) Associated process. */
 	TAILQ_ENTRY(thread) td_plist;	/* (*) All threads in this proc. */
 	TAILQ_ENTRY(thread) td_runq;	/* (t) Run queue. */
 	union	{
 		TAILQ_ENTRY(thread) td_slpq;	/* (t) Sleep queue. */
 		struct thread *td_zombie; /* Zombie list linkage */
 	};
 	TAILQ_ENTRY(thread) td_lockq;	/* (t) Lock queue. */
 	LIST_ENTRY(thread) td_hash;	/* (d) Hash chain. */
 	struct cpuset	*td_cpuset;	/* (t) CPU affinity mask. */
 	struct domainset_ref td_domain;	/* (a) NUMA policy */
 	struct seltd	*td_sel;	/* Select queue/channel. */
 	struct sleepqueue *td_sleepqueue; /* (k) Associated sleep queue. */
 	struct turnstile *td_turnstile;	/* (k) Associated turnstile. */
 	struct rl_q_entry *td_rlqe;	/* (k) Associated range lock entry. */
 	struct umtx_q   *td_umtxq;	/* (c?) Link for when we're blocked. */
 	lwpid_t		td_tid;		/* (b) Thread ID. */
 	sigqueue_t	td_sigqueue;	/* (c) Sigs arrived, not delivered. */
 #define	td_siglist	td_sigqueue.sq_signals
 	u_char		td_lend_user_pri; /* (t) Lend user pri. */
 	u_char		td_allocdomain;	/* (b) NUMA domain backing this struct thread. */
 	u_char		td_base_ithread_pri; /* (t) Base ithread pri */
 	struct kmsan_td	*td_kmsan;	/* (k) KMSAN state */
 
 /* Cleared during fork1(), thread_create(), or kthread_add(). */
 #define	td_startzero td_flags
 	int		td_flags;	/* (t) TDF_* flags. */
 	int		td_ast;		/* (t) TDA_* indicators */
 	int		td_inhibitors;	/* (t) Why can not run. */
 	int		td_pflags;	/* (k) Private thread (TDP_*) flags. */
 	int		td_pflags2;	/* (k) Private thread (TDP2_*) flags. */
 	int		td_dupfd;	/* (k) Ret value from fdopen. XXX */
 	int		td_sqqueue;	/* (t) Sleepqueue queue blocked on. */
 	const void	*td_wchan;	/* (t) Sleep address. */
 	const char	*td_wmesg;	/* (t) Reason for sleep. */
 	volatile u_char td_owepreempt;  /* (k*) Preempt on last critical_exit */
 	u_char		td_tsqueue;	/* (t) Turnstile queue blocked on. */
-	u_char		td_stopsched;	/* (k) Scheduler stopped. */
+	u_char		_td_pad0[2];	/* Available. */
 	int		td_locks;	/* (k) Debug: count of non-spin locks */
 	int		td_rw_rlocks;	/* (k) Count of rwlock read locks. */
 	int		td_sx_slocks;	/* (k) Count of sx shared locks. */
 	int		td_lk_slocks;	/* (k) Count of lockmgr shared locks. */
 	struct lock_object *td_wantedlock; /* (k) Lock we are contending on */
 	struct turnstile *td_blocked;	/* (t) Lock thread is blocked on. */
 	const char	*td_lockname;	/* (t) Name of lock blocked on. */
 	LIST_HEAD(, turnstile) td_contested;	/* (q) Contested locks. */
 	struct lock_list_entry *td_sleeplocks; /* (k) Held sleep locks. */
 	int		td_intr_nesting_level; /* (k) Interrupt recursion. */
 	int		td_pinned;	/* (k) Temporary cpu pin count. */
 	struct ucred	*td_realucred;	/* (k) Reference to credentials. */
 	struct ucred	*td_ucred;	/* (k) Used credentials, temporarily switchable. */
 	struct plimit	*td_limit;	/* (k) Resource limits. */
 	int		td_slptick;	/* (t) Time at sleep. */
 	int		td_blktick;	/* (t) Time spent blocked. */
 	int		td_swvoltick;	/* (t) Time at last SW_VOL switch. */
 	int		td_swinvoltick;	/* (t) Time at last SW_INVOL switch. */
 	u_int		td_cow;		/* (*) Number of copy-on-write faults */
 	struct rusage	td_ru;		/* (t) rusage information. */
 	struct rusage_ext td_rux;	/* (t) Internal rusage information. */
 	uint64_t	td_incruntime;	/* (t) Cpu ticks to transfer to proc. */
 	uint64_t	td_runtime;	/* (t) How many cpu ticks we've run. */
 	u_int 		td_pticks;	/* (t) Statclock hits for profiling */
 	u_int		td_sticks;	/* (t) Statclock hits in system mode. */
 	u_int		td_iticks;	/* (t) Statclock hits in intr mode. */
 	u_int		td_uticks;	/* (t) Statclock hits in user mode. */
 	int		td_intrval;	/* (t) Return value for sleepq. */
 	sigset_t	td_oldsigmask;	/* (k) Saved mask from pre sigpause. */
 	volatile u_int	td_generation;	/* (k) For detection of preemption */
 	stack_t		td_sigstk;	/* (k) Stack ptr and on-stack flag. */
 	int		td_xsig;	/* (c) Signal for ptrace */
 	u_long		td_profil_addr;	/* (k) Temporary addr until AST. */
 	u_int		td_profil_ticks; /* (k) Temporary ticks until AST. */
 	char		td_name[MAXCOMLEN + 1];	/* (*) Thread name. */
 	struct file	*td_fpop;	/* (k) file referencing cdev under op */
 	int		td_dbgflags;	/* (c) Userland debugger flags */
 	siginfo_t	td_si;		/* (c) For debugger or core file */
 	int		td_ng_outbound;	/* (k) Thread entered ng from above. */
 	struct osd	td_osd;		/* (k) Object specific data. */
 	struct vm_map_entry *td_map_def_user; /* (k) Deferred entries. */
 	pid_t		td_dbg_forked;	/* (c) Child pid for debugger. */
 	u_int		td_no_sleeping;	/* (k) Sleeping disabled count. */
 	struct vnode	*td_vp_reserved;/* (k) Preallocated vnode. */
 	void		*td_su;		/* (k) FFS SU private */
 	sbintime_t	td_sleeptimo;	/* (t) Sleep timeout. */
 	int		td_rtcgen;	/* (s) rtc_generation of abs. sleep */
 	int		td_errno;	/* (k) Error from last syscall. */
 	size_t		td_vslock_sz;	/* (k) amount of vslock-ed space */
 	struct kcov_info *td_kcov_info;	/* (*) Kernel code coverage data */
 	long		td_ucredref;	/* (k) references on td_realucred */
 #define	td_endzero td_sigmask
 
 /* Copied during fork1(), thread_create(), or kthread_add(). */
 #define	td_startcopy td_endzero
 	sigset_t	td_sigmask;	/* (c) Current signal mask. */
 	u_char		td_rqindex;	/* (t) Run queue index. */
 	u_char		td_base_pri;	/* (t) Thread base kernel priority. */
 	u_char		td_priority;	/* (t) Thread active priority. */
 	u_char		td_pri_class;	/* (t) Scheduling class. */
 	u_char		td_user_pri;	/* (t) User pri from estcpu and nice. */
 	u_char		td_base_user_pri; /* (t) Base user pri */
 	uintptr_t	td_rb_list;	/* (k) Robust list head. */
 	uintptr_t	td_rbp_list;	/* (k) Robust priv list head. */
 	uintptr_t	td_rb_inact;	/* (k) Current in-action mutex loc. */
 	struct syscall_args td_sa;	/* (kx) Syscall parameters. Copied on
 					   fork for child tracing. */
 	void		*td_sigblock_ptr; /* (k) uptr for fast sigblock. */
 	uint32_t	td_sigblock_val;  /* (k) fast sigblock value read at
 					     td_sigblock_ptr on kern entry */
 #define	td_endcopy td_pcb
 
 /*
  * Fields that must be manually set in fork1(), thread_create(), kthread_add(),
  * or already have been set in the allocator, constructor, etc.
  */
 	struct pcb	*td_pcb;	/* (k) Kernel VA of pcb and kstack. */
 	enum td_states {
 		TDS_INACTIVE = 0x0,
 		TDS_INHIBITED,
 		TDS_CAN_RUN,
 		TDS_RUNQ,
 		TDS_RUNNING
 	} td_state;			/* (t) thread state */
 	/* Note: td_state must be accessed using TD_{GET,SET}_STATE(). */
 	union {
 		syscallarg_t	tdu_retval[2];
 		off_t		tdu_off;
 	} td_uretoff;			/* (k) Syscall aux returns. */
 #define td_retval	td_uretoff.tdu_retval
 	u_int		td_cowgen;	/* (k) Generation of COW pointers. */
 	/* LP64 hole */
 	struct callout	td_slpcallout;	/* (h) Callout for sleep. */
 	struct trapframe *td_frame;	/* (k) */
 	vm_offset_t	td_kstack;	/* (a) Kernel VA of kstack. */
 	int		td_kstack_pages; /* (a) Size of the kstack. */
 	volatile u_int	td_critnest;	/* (k*) Critical section nest level. */
 	struct mdthread td_md;		/* (k) Any machine-dependent fields. */
 	struct kaudit_record	*td_ar;	/* (k) Active audit record, if any. */
 	struct lpohead	td_lprof[2];	/* (a) lock profiling objects. */
 	struct kdtrace_thread	*td_dtrace; /* (*) DTrace-specific data. */
 	struct vnet	*td_vnet;	/* (k) Effective vnet. */
 	const char	*td_vnet_lpush;	/* (k) Debugging vnet push / pop. */
 	struct trapframe *td_intr_frame;/* (k) Frame of the current irq */
 	struct proc	*td_rfppwait_p;	/* (k) The vforked child */
 	struct vm_page	**td_ma;	/* (k) uio pages held */
 	int		td_ma_cnt;	/* (k) size of *td_ma */
 	/* LP64 hole */
 	void		*td_emuldata;	/* Emulator state data */
 	int		td_lastcpu;	/* (t) Last cpu we were on. */
 	int		td_oncpu;	/* (t) Which cpu we are on. */
 	void		*td_lkpi_task;	/* LinuxKPI task struct pointer */
 	int		td_pmcpend;
 	void		*td_remotereq;	/* (c) dbg remote request. */
 	off_t		td_ktr_io_lim;	/* (k) limit for ktrace file size */
 #ifdef EPOCH_TRACE
 	SLIST_HEAD(, epoch_tracker) td_epochs;
 #endif
 };
 
 struct thread0_storage {
 	struct thread t0st_thread;
 	uint64_t t0st_sched[10];
 };
 
 struct mtx *thread_lock_block(struct thread *);
 void thread_lock_block_wait(struct thread *);
 void thread_lock_set(struct thread *, struct mtx *);
 void thread_lock_unblock(struct thread *, struct mtx *);
 #define	THREAD_LOCK_ASSERT(td, type)					\
 	mtx_assert((td)->td_lock, (type))
 
 #define	THREAD_LOCK_BLOCKED_ASSERT(td, type)				\
 do {									\
 	struct mtx *__m = (td)->td_lock;				\
 	if (__m != &blocked_lock)					\
 		mtx_assert(__m, (type));				\
 } while (0)
 
 #ifdef INVARIANTS
 #define	THREAD_LOCKPTR_ASSERT(td, lock)					\
 do {									\
 	struct mtx *__m;						\
 	__m = (td)->td_lock;						\
 	KASSERT(__m == (lock),						\
 	    ("Thread %p lock %p does not match %p", td, __m, (lock)));	\
 } while (0)
 
 #define	THREAD_LOCKPTR_BLOCKED_ASSERT(td, lock)				\
 do {									\
 	struct mtx *__m;						\
 	__m = (td)->td_lock;						\
 	KASSERT(__m == (lock) || __m == &blocked_lock,			\
 	    ("Thread %p lock %p does not match %p", td, __m, (lock)));	\
 } while (0)
 
 #define	TD_LOCKS_INC(td)	((td)->td_locks++)
 #define	TD_LOCKS_DEC(td) do {						\
-	KASSERT(SCHEDULER_STOPPED_TD(td) || (td)->td_locks > 0,		\
+	KASSERT(SCHEDULER_STOPPED() || (td)->td_locks > 0,		\
 	    ("Thread %p owns no locks", (td)));				\
 	(td)->td_locks--;						\
 } while (0)
 #else
 #define	THREAD_LOCKPTR_ASSERT(td, lock)
 #define	THREAD_LOCKPTR_BLOCKED_ASSERT(td, lock)
 
 #define	TD_LOCKS_INC(td)
 #define	TD_LOCKS_DEC(td)
 #endif
 
 /*
  * Flags kept in td_flags:
  * To change these you MUST have the scheduler lock.
  */
 #define	TDF_BORROWING	0x00000001 /* Thread is borrowing pri from another. */
 #define	TDF_INPANIC	0x00000002 /* Caused a panic, let it drive crashdump. */
 #define	TDF_INMEM	0x00000004 /* Thread's stack is in memory. */
 #define	TDF_SINTR	0x00000008 /* Sleep is interruptible. */
 #define	TDF_TIMEOUT	0x00000010 /* Timing out during sleep. */
 #define	TDF_IDLETD	0x00000020 /* This is a per-CPU idle thread. */
 #define	TDF_CANSWAP	0x00000040 /* Thread can be swapped. */
 #define	TDF_SIGWAIT	0x00000080 /* Ignore ignored signals */
 #define	TDF_KTH_SUSP	0x00000100 /* kthread is suspended */
 #define	TDF_ALLPROCSUSP	0x00000200 /* suspended by SINGLE_ALLPROC */
 #define	TDF_BOUNDARY	0x00000400 /* Thread suspended at user boundary */
 #define	TDF_UNUSED1	0x00000800 /* Available */
 #define	TDF_UNUSED2	0x00001000 /* Available */
 #define	TDF_SBDRY	0x00002000 /* Stop only on usermode boundary. */
 #define	TDF_UPIBLOCKED	0x00004000 /* Thread blocked on user PI mutex. */
 #define	TDF_UNUSED3	0x00008000 /* Available */
 #define	TDF_UNUSED4	0x00010000 /* Available */
 #define	TDF_UNUSED5	0x00020000 /* Available */
 #define	TDF_NOLOAD	0x00040000 /* Ignore during load avg calculations. */
 #define	TDF_SERESTART	0x00080000 /* ERESTART on stop attempts. */
 #define	TDF_THRWAKEUP	0x00100000 /* Libthr thread must not suspend itself. */
 #define	TDF_SEINTR	0x00200000 /* EINTR on stop attempts. */
 #define	TDF_SWAPINREQ	0x00400000 /* Swapin request due to wakeup. */
 #define	TDF_UNUSED6	0x00800000 /* Available */
 #define	TDF_SCHED0	0x01000000 /* Reserved for scheduler private use */
 #define	TDF_SCHED1	0x02000000 /* Reserved for scheduler private use */
 #define	TDF_SCHED2	0x04000000 /* Reserved for scheduler private use */
 #define	TDF_SCHED3	0x08000000 /* Reserved for scheduler private use */
 #define	TDF_UNUSED7	0x10000000 /* Available */
 #define	TDF_UNUSED8	0x20000000 /* Available */
 #define	TDF_UNUSED9	0x40000000 /* Available */
 #define	TDF_UNUSED10	0x80000000 /* Available */
 
 enum {
 	TDA_AST = 0,		/* Special: call all non-flagged AST handlers */
 	TDA_OWEUPC,
 	TDA_HWPMC,
 	TDA_VFORK,
 	TDA_ALRM,
 	TDA_PROF,
 	TDA_MAC,
 	TDA_SCHED,
 	TDA_UFS,
 	TDA_GEOM,
 	TDA_KQUEUE,
 	TDA_RACCT,
 	TDA_MOD1,		/* For third party use, before signals are */
 	TAD_MOD2,		/* processed .. */
 	TDA_SIG,
 	TDA_KTRACE,
 	TDA_SUSPEND,
 	TDA_SIGSUSPEND,
 	TDA_MOD3,		/* .. and after */
 	TAD_MOD4,
 	TDA_MAX,
 };
 #define	TDAI(tda)		(1U << (tda))
 #define	td_ast_pending(td, tda)	((td->td_ast & TDAI(tda)) != 0)
 
 /* Userland debug flags */
 #define	TDB_SUSPEND	0x00000001 /* Thread is suspended by debugger */
 #define	TDB_XSIG	0x00000002 /* Thread is exchanging signal under trace */
 #define	TDB_USERWR	0x00000004 /* Debugger modified memory or registers */
 #define	TDB_SCE		0x00000008 /* Thread performs syscall enter */
 #define	TDB_SCX		0x00000010 /* Thread performs syscall exit */
 #define	TDB_EXEC	0x00000020 /* TDB_SCX from exec(2) family */
 #define	TDB_FORK	0x00000040 /* TDB_SCX from fork(2) that created new
 				      process */
 #define	TDB_STOPATFORK	0x00000080 /* Stop at the return from fork (child
 				      only) */
 #define	TDB_CHILD	0x00000100 /* New child indicator for ptrace() */
 #define	TDB_BORN	0x00000200 /* New LWP indicator for ptrace() */
 #define	TDB_EXIT	0x00000400 /* Exiting LWP indicator for ptrace() */
 #define	TDB_VFORK	0x00000800 /* vfork indicator for ptrace() */
 #define	TDB_FSTP	0x00001000 /* The thread is PT_ATTACH leader */
 #define	TDB_STEP	0x00002000 /* (x86) PSL_T set for PT_STEP */
 #define	TDB_SSWITCH	0x00004000 /* Suspended in ptracestop */
 #define	TDB_BOUNDARY	0x00008000 /* ptracestop() at boundary */
 #define	TDB_COREDUMPREQ	0x00010000 /* Coredump request */
 #define	TDB_SCREMOTEREQ	0x00020000 /* Remote syscall request */
 
 /*
  * "Private" flags kept in td_pflags:
  * These are only written by curthread and thus need no locking.
  */
 #define	TDP_OLDMASK	0x00000001 /* Need to restore mask after suspend. */
 #define	TDP_INKTR	0x00000002 /* Thread is currently in KTR code. */
 #define	TDP_INKTRACE	0x00000004 /* Thread is currently in KTRACE code. */
 #define	TDP_BUFNEED	0x00000008 /* Do not recurse into the buf flush */
 #define	TDP_COWINPROGRESS 0x00000010 /* Snapshot copy-on-write in progress. */
 #define	TDP_ALTSTACK	0x00000020 /* Have alternate signal stack. */
 #define	TDP_DEADLKTREAT	0x00000040 /* Lock acquisition - deadlock treatment. */
 #define	TDP_NOFAULTING	0x00000080 /* Do not handle page faults. */
 #define	TDP_SIGFASTBLOCK 0x00000100 /* Fast sigblock active */
 #define	TDP_OWEUPC	0x00000200 /* Call addupc() at next AST. */
 #define	TDP_ITHREAD	0x00000400 /* Thread is an interrupt thread. */
 #define	TDP_SYNCIO	0x00000800 /* Local override, disable async i/o. */
 #define	TDP_SCHED1	0x00001000 /* Reserved for scheduler private use */
 #define	TDP_SCHED2	0x00002000 /* Reserved for scheduler private use */
 #define	TDP_SCHED3	0x00004000 /* Reserved for scheduler private use */
 #define	TDP_SCHED4	0x00008000 /* Reserved for scheduler private use */
 #define	TDP_GEOM	0x00010000 /* Settle GEOM before finishing syscall */
 #define	TDP_SOFTDEP	0x00020000 /* Stuck processing softdep worklist */
 #define	TDP_NORUNNINGBUF 0x00040000 /* Ignore runningbufspace check */
 #define	TDP_WAKEUP	0x00080000 /* Don't sleep in umtx cond_wait */
 #define	TDP_INBDFLUSH	0x00100000 /* Already in BO_BDFLUSH, do not recurse */
 #define	TDP_KTHREAD	0x00200000 /* This is an official kernel thread */
 #define	TDP_CALLCHAIN	0x00400000 /* Capture thread's callchain */
 #define	TDP_IGNSUSP	0x00800000 /* Permission to ignore the MNTK_SUSPEND* */
 #define	TDP_AUDITREC	0x01000000 /* Audit record pending on thread */
 #define	TDP_RFPPWAIT	0x02000000 /* Handle RFPPWAIT on syscall exit */
 #define	TDP_RESETSPUR	0x04000000 /* Reset spurious page fault history. */
 #define	TDP_NERRNO	0x08000000 /* Last errno is already in td_errno */
 #define	TDP_UIOHELD	0x10000000 /* Current uio has pages held in td_ma */
 #define	TDP_INTCPCALLOUT 0x20000000 /* used by netinet/tcp_timer.c */
 #define	TDP_EXECVMSPC	0x40000000 /* Execve destroyed old vmspace */
 #define	TDP_SIGFASTPENDING 0x80000000 /* Pending signal due to sigfastblock */
 
 #define	TDP2_SBPAGES	0x00000001 /* Owns sbusy on some pages */
 #define	TDP2_COMPAT32RB	0x00000002 /* compat32 ABI for robust lists */
 #define	TDP2_ACCT	0x00000004 /* Doing accounting */
 
 /*
  * Reasons that the current thread can not be run yet.
  * More than one may apply.
  */
 #define	TDI_SUSPENDED	0x0001	/* On suspension queue. */
 #define	TDI_SLEEPING	0x0002	/* Actually asleep! (tricky). */
 #define	TDI_SWAPPED	0x0004	/* Stack not in mem.  Bad juju if run. */
 #define	TDI_LOCK	0x0008	/* Stopped on a lock. */
 #define	TDI_IWAIT	0x0010	/* Awaiting interrupt. */
 
 #define	TD_IS_SLEEPING(td)	((td)->td_inhibitors & TDI_SLEEPING)
 #define	TD_ON_SLEEPQ(td)	((td)->td_wchan != NULL)
 #define	TD_IS_SUSPENDED(td)	((td)->td_inhibitors & TDI_SUSPENDED)
 #define	TD_IS_SWAPPED(td)	((td)->td_inhibitors & TDI_SWAPPED)
 #define	TD_ON_LOCK(td)		((td)->td_inhibitors & TDI_LOCK)
 #define	TD_AWAITING_INTR(td)	((td)->td_inhibitors & TDI_IWAIT)
 #ifdef _KERNEL
 #define	TD_GET_STATE(td)	atomic_load_int(&(td)->td_state)
 #else
 #define	TD_GET_STATE(td)	((td)->td_state)
 #endif
 #define	TD_IS_RUNNING(td)	(TD_GET_STATE(td) == TDS_RUNNING)
 #define	TD_ON_RUNQ(td)		(TD_GET_STATE(td) == TDS_RUNQ)
 #define	TD_CAN_RUN(td)		(TD_GET_STATE(td) == TDS_CAN_RUN)
 #define	TD_IS_INHIBITED(td)	(TD_GET_STATE(td) == TDS_INHIBITED)
 #define	TD_ON_UPILOCK(td)	((td)->td_flags & TDF_UPIBLOCKED)
 #define TD_IS_IDLETHREAD(td)	((td)->td_flags & TDF_IDLETD)
 
 #define	TD_CAN_ABORT(td)	(TD_ON_SLEEPQ((td)) &&			\
 				    ((td)->td_flags & TDF_SINTR) != 0)
 
 #define	KTDSTATE(td)							\
 	(((td)->td_inhibitors & TDI_SLEEPING) != 0 ? "sleep"  :		\
 	((td)->td_inhibitors & TDI_SUSPENDED) != 0 ? "suspended" :	\
 	((td)->td_inhibitors & TDI_SWAPPED) != 0 ? "swapped" :		\
 	((td)->td_inhibitors & TDI_LOCK) != 0 ? "blocked" :		\
 	((td)->td_inhibitors & TDI_IWAIT) != 0 ? "iwait" : "yielding")
 
 #define	TD_SET_INHIB(td, inhib) do {		\
 	TD_SET_STATE(td, TDS_INHIBITED);	\
 	(td)->td_inhibitors |= (inhib);		\
 } while (0)
 
 #define	TD_CLR_INHIB(td, inhib) do {			\
 	if (((td)->td_inhibitors & (inhib)) &&		\
 	    (((td)->td_inhibitors &= ~(inhib)) == 0))	\
 		TD_SET_STATE(td, TDS_CAN_RUN);		\
 } while (0)
 
 #define	TD_SET_SLEEPING(td)	TD_SET_INHIB((td), TDI_SLEEPING)
 #define	TD_SET_SWAPPED(td)	TD_SET_INHIB((td), TDI_SWAPPED)
 #define	TD_SET_LOCK(td)		TD_SET_INHIB((td), TDI_LOCK)
 #define	TD_SET_SUSPENDED(td)	TD_SET_INHIB((td), TDI_SUSPENDED)
 #define	TD_SET_IWAIT(td)	TD_SET_INHIB((td), TDI_IWAIT)
 #define	TD_SET_EXITING(td)	TD_SET_INHIB((td), TDI_EXITING)
 
 #define	TD_CLR_SLEEPING(td)	TD_CLR_INHIB((td), TDI_SLEEPING)
 #define	TD_CLR_SWAPPED(td)	TD_CLR_INHIB((td), TDI_SWAPPED)
 #define	TD_CLR_LOCK(td)		TD_CLR_INHIB((td), TDI_LOCK)
 #define	TD_CLR_SUSPENDED(td)	TD_CLR_INHIB((td), TDI_SUSPENDED)
 #define	TD_CLR_IWAIT(td)	TD_CLR_INHIB((td), TDI_IWAIT)
 
 #ifdef _KERNEL
 #define	TD_SET_STATE(td, state)	atomic_store_int(&(td)->td_state, state)
 #else
 #define	TD_SET_STATE(td, state)	(td)->td_state = state
 #endif
 #define	TD_SET_RUNNING(td)	TD_SET_STATE(td, TDS_RUNNING)
 #define	TD_SET_RUNQ(td)		TD_SET_STATE(td, TDS_RUNQ)
 #define	TD_SET_CAN_RUN(td)	TD_SET_STATE(td, TDS_CAN_RUN)
 
 
 #define	TD_SBDRY_INTR(td) \
     (((td)->td_flags & (TDF_SEINTR | TDF_SERESTART)) != 0)
 #define	TD_SBDRY_ERRNO(td) \
     (((td)->td_flags & TDF_SEINTR) != 0 ? EINTR : ERESTART)
 
 /*
  * Process structure.
  */
 struct proc {
 	LIST_ENTRY(proc) p_list;	/* (d) List of all processes. */
 	TAILQ_HEAD(, thread) p_threads;	/* (c) all threads. */
 	struct mtx	p_slock;	/* process spin lock */
 	struct ucred	*p_ucred;	/* (c) Process owner's identity. */
 	struct filedesc	*p_fd;		/* (b) Open files. */
 	struct filedesc_to_leader *p_fdtol; /* (b) Tracking node */
 	struct pwddesc	*p_pd;		/* (b) Cwd, chroot, jail, umask */
 	struct pstats	*p_stats;	/* (b) Accounting/statistics (CPU). */
 	struct plimit	*p_limit;	/* (c) Resource limits. */
 	struct callout	p_limco;	/* (c) Limit callout handle */
 	struct sigacts	*p_sigacts;	/* (x) Signal actions, state (CPU). */
 
 	int		p_flag;		/* (c) P_* flags. */
 	int		p_flag2;	/* (c) P2_* flags. */
 	enum p_states {
 		PRS_NEW = 0,		/* In creation */
 		PRS_NORMAL,		/* threads can be run. */
 		PRS_ZOMBIE
 	} p_state;			/* (j/c) Process status. */
 	pid_t		p_pid;		/* (b) Process identifier. */
 	LIST_ENTRY(proc) p_hash;	/* (d) Hash chain. */
 	LIST_ENTRY(proc) p_pglist;	/* (g + e) List of processes in pgrp. */
 	struct proc	*p_pptr;	/* (c + e) Pointer to parent process. */
 	LIST_ENTRY(proc) p_sibling;	/* (e) List of sibling processes. */
 	LIST_HEAD(, proc) p_children;	/* (e) Pointer to list of children. */
 	struct proc	*p_reaper;	/* (e) My reaper. */
 	LIST_HEAD(, proc) p_reaplist;	/* (e) List of my descendants
 					       (if I am reaper). */
 	LIST_ENTRY(proc) p_reapsibling;	/* (e) List of siblings - descendants of
 					       the same reaper. */
 	struct mtx	p_mtx;		/* (n) Lock for this struct. */
 	struct mtx	p_statmtx;	/* Lock for the stats */
 	struct mtx	p_itimmtx;	/* Lock for the virt/prof timers */
 	struct mtx	p_profmtx;	/* Lock for the profiling */
 	struct ksiginfo *p_ksi;	/* Locked by parent proc lock */
 	sigqueue_t	p_sigqueue;	/* (c) Sigs not delivered to a td. */
 #define p_siglist	p_sigqueue.sq_signals
 	pid_t		p_oppid;	/* (c + e) Real parent pid. */
 
 /* The following fields are all zeroed upon creation in fork. */
 #define	p_startzero	p_vmspace
 	struct vmspace	*p_vmspace;	/* (b) Address space. */
 	u_int		p_swtick;	/* (c) Tick when swapped in or out. */
 	u_int		p_cowgen;	/* (c) Generation of COW pointers. */
 	struct itimerval p_realtimer;	/* (c) Alarm timer. */
 	struct rusage	p_ru;		/* (a) Exit information. */
 	struct rusage_ext p_rux;	/* (cu) Internal resource usage. */
 	struct rusage_ext p_crux;	/* (c) Internal child resource usage. */
 	int		p_profthreads;	/* (c) Num threads in addupc_task. */
 	volatile int	p_exitthreads;	/* (j) Number of threads exiting */
 	int		p_traceflag;	/* (o) Kernel trace points. */
 	struct ktr_io_params	*p_ktrioparms;	/* (c + o) Params for ktrace. */
 	struct vnode	*p_textvp;	/* (b) Vnode of executable. */
 	struct vnode	*p_textdvp;	/* (b) Dir containing textvp. */
 	char		*p_binname;	/* (b) Binary hardlink name. */
 	u_int		p_lock;		/* (c) Proclock (prevent swap) count. */
 	struct sigiolst	p_sigiolst;	/* (c) List of sigio sources. */
 	int		p_sigparent;	/* (c) Signal to parent on exit. */
 	int		p_sig;		/* (n) For core dump/debugger XXX. */
 	u_int		p_ptevents;	/* (c + e) ptrace() event mask. */
 	struct kaioinfo	*p_aioinfo;	/* (y) ASYNC I/O info. */
 	struct thread	*p_singlethread;/* (c + j) If single threading this is it */
 	int		p_suspcount;	/* (j) Num threads in suspended mode. */
 	struct thread	*p_xthread;	/* (c) Trap thread */
 	int		p_boundary_count;/* (j) Num threads at user boundary */
 	int		p_pendingcnt;	/* how many signals are pending */
 	struct itimers	*p_itimers;	/* (c) POSIX interval timers. */
 	struct procdesc	*p_procdesc;	/* (e) Process descriptor, if any. */
 	u_int		p_treeflag;	/* (e) P_TREE flags */
 	int		p_pendingexits; /* (c) Count of pending thread exits. */
 	struct filemon	*p_filemon;	/* (c) filemon-specific data. */
 	int		p_pdeathsig;	/* (c) Signal from parent on exit. */
 /* End area that is zeroed on creation. */
 #define	p_endzero	p_magic
 
 /* The following fields are all copied upon creation in fork. */
 #define	p_startcopy	p_endzero
 	u_int		p_magic;	/* (b) Magic number. */
 	int		p_osrel;	/* (x) osreldate for the
 					       binary (from ELF note, if any) */
 	uint32_t	p_fctl0;	/* (x) ABI feature control, ELF note */
 	char		p_comm[MAXCOMLEN + 1];	/* (x) Process name. */
 	struct sysentvec *p_sysent;	/* (b) Syscall dispatch info. */
 	struct pargs	*p_args;	/* (c) Process arguments. */
 	rlim_t		p_cpulimit;	/* (c) Current CPU limit in seconds. */
 	signed char	p_nice;		/* (c) Process "nice" value. */
 	int		p_fibnum;	/* in this routing domain XXX MRT */
 	pid_t		p_reapsubtree;	/* (e) Pid of the direct child of the
 					       reaper which spawned
 					       our subtree. */
 	uint64_t	p_elf_flags;	/* (x) ELF flags */
 	void		*p_elf_brandinfo; /* (x) Elf_Brandinfo, NULL for
 						 non ELF binaries. */
 	sbintime_t	p_umtx_min_timeout;
 /* End area that is copied on creation. */
 #define	p_endcopy	p_xexit
 
 	u_int		p_xexit;	/* (c) Exit code. */
 	u_int		p_xsig;		/* (c) Stop/kill sig. */
 	struct pgrp	*p_pgrp;	/* (c + e) Pointer to process group. */
 	struct knlist	*p_klist;	/* (c) Knotes attached to this proc. */
 	int		p_numthreads;	/* (c) Number of threads. */
 	struct mdproc	p_md;		/* Any machine-dependent fields. */
 	struct callout	p_itcallout;	/* (h + c) Interval timer callout. */
 	u_short		p_acflag;	/* (c) Accounting flags. */
 	struct proc	*p_peers;	/* (r) */
 	struct proc	*p_leader;	/* (b) */
 	void		*p_emuldata;	/* (c) Emulator state data. */
 	struct label	*p_label;	/* (*) Proc (not subject) MAC label. */
 	STAILQ_HEAD(, ktr_request)	p_ktr;	/* (o) KTR event queue. */
 	LIST_HEAD(, mqueue_notifier)	p_mqnotifier; /* (c) mqueue notifiers.*/
 	struct kdtrace_proc	*p_dtrace; /* (*) DTrace-specific data. */
 	struct cv	p_pwait;	/* (*) wait cv for exit/exec. */
 	uint64_t	p_prev_runtime;	/* (c) Resource usage accounting. */
 	struct racct	*p_racct;	/* (b) Resource accounting. */
 	int		p_throttled;	/* (c) Flag for racct pcpu throttling */
 	/*
 	 * An orphan is the child that has been re-parented to the
 	 * debugger as a result of attaching to it.  Need to keep
 	 * track of them for parent to be able to collect the exit
 	 * status of what used to be children.
 	 */
 	LIST_ENTRY(proc) p_orphan;	/* (e) List of orphan processes. */
 	LIST_HEAD(, proc) p_orphans;	/* (e) Pointer to list of orphans. */
 
 	TAILQ_HEAD(, kq_timer_cb_data)	p_kqtim_stop;	/* (c) */
 	LIST_ENTRY(proc) p_jaillist;	/* (d) Jail process linkage. */
 };
 
 #define	p_session	p_pgrp->pg_session
 #define	p_pgid		p_pgrp->pg_id
 
 #define	NOCPU		(-1)	/* For when we aren't on a CPU. */
 #define	NOCPU_OLD	(255)
 #define	MAXCPU_OLD	(254)
 
 #define	PROC_SLOCK(p)	mtx_lock_spin(&(p)->p_slock)
 #define	PROC_SUNLOCK(p)	mtx_unlock_spin(&(p)->p_slock)
 #define	PROC_SLOCK_ASSERT(p, type)	mtx_assert(&(p)->p_slock, (type))
 
 #define	PROC_STATLOCK(p)	mtx_lock_spin(&(p)->p_statmtx)
 #define	PROC_STATUNLOCK(p)	mtx_unlock_spin(&(p)->p_statmtx)
 #define	PROC_STATLOCK_ASSERT(p, type)	mtx_assert(&(p)->p_statmtx, (type))
 
 #define	PROC_ITIMLOCK(p)	mtx_lock_spin(&(p)->p_itimmtx)
 #define	PROC_ITIMUNLOCK(p)	mtx_unlock_spin(&(p)->p_itimmtx)
 #define	PROC_ITIMLOCK_ASSERT(p, type)	mtx_assert(&(p)->p_itimmtx, (type))
 
 #define	PROC_PROFLOCK(p)	mtx_lock_spin(&(p)->p_profmtx)
 #define	PROC_PROFUNLOCK(p)	mtx_unlock_spin(&(p)->p_profmtx)
 #define	PROC_PROFLOCK_ASSERT(p, type)	mtx_assert(&(p)->p_profmtx, (type))
 
 /* These flags are kept in p_flag. */
 #define	P_ADVLOCK	0x00000001	/* Process may hold a POSIX advisory
 					   lock. */
 #define	P_CONTROLT	0x00000002	/* Has a controlling terminal. */
 #define	P_KPROC		0x00000004	/* Kernel process. */
 #define	P_UNUSED3	0x00000008	/* --available-- */
 #define	P_PPWAIT	0x00000010	/* Parent is waiting for child to
 					   exec/exit. */
 #define	P_PROFIL	0x00000020	/* Has started profiling. */
 #define	P_STOPPROF	0x00000040	/* Has thread requesting to stop
 					   profiling. */
 #define	P_HADTHREADS	0x00000080	/* Has had threads (no cleanup
 					   shortcuts) */
 #define	P_SUGID		0x00000100	/* Had set id privileges since last
 					   exec. */
 #define	P_SYSTEM	0x00000200	/* System proc: no sigs, stats or
 					   swapping. */
 #define	P_SINGLE_EXIT	0x00000400	/* Threads suspending should exit,
 					   not wait. */
 #define	P_TRACED	0x00000800	/* Debugged process being traced. */
 #define	P_WAITED	0x00001000	/* Someone is waiting for us. */
 #define	P_WEXIT		0x00002000	/* Working on exiting. */
 #define	P_EXEC		0x00004000	/* Process called exec. */
 #define	P_WKILLED	0x00008000	/* Killed, go to kernel/user boundary
 					   ASAP. */
 #define	P_CONTINUED	0x00010000	/* Proc has continued from a stopped
 					   state. */
 #define	P_STOPPED_SIG	0x00020000	/* Stopped due to SIGSTOP/SIGTSTP. */
 #define	P_STOPPED_TRACE	0x00040000	/* Stopped because of tracing. */
 #define	P_STOPPED_SINGLE 0x00080000	/* Only 1 thread can continue (not to
 					   user). */
 #define	P_PROTECTED	0x00100000	/* Do not kill on memory overcommit. */
 #define	P_SIGEVENT	0x00200000	/* Process pending signals changed. */
 #define	P_SINGLE_BOUNDARY 0x00400000	/* Threads should suspend at user
 					   boundary. */
 #define	P_HWPMC		0x00800000	/* Process is using HWPMCs */
 #define	P_JAILED	0x01000000	/* Process is in jail. */
 #define	P_TOTAL_STOP	0x02000000	/* Stopped in stop_all_proc. */
 #define	P_INEXEC	0x04000000	/* Process is in execve(). */
 #define	P_STATCHILD	0x08000000	/* Child process stopped or exited. */
 #define	P_INMEM		0x10000000	/* Loaded into memory. */
 #define	P_SWAPPINGOUT	0x20000000	/* Process is being swapped out. */
 #define	P_SWAPPINGIN	0x40000000	/* Process is being swapped in. */
 #define	P_PPTRACE	0x80000000	/* PT_TRACEME by vforked child. */
 
 #define	P_STOPPED	(P_STOPPED_SIG|P_STOPPED_SINGLE|P_STOPPED_TRACE)
 #define	P_SHOULDSTOP(p)	((p)->p_flag & P_STOPPED)
 #define	P_KILLED(p)	((p)->p_flag & P_WKILLED)
 
 /* These flags are kept in p_flag2. */
 #define	P2_INHERIT_PROTECTED	0x00000001	/* New children get
 						   P_PROTECTED. */
 #define	P2_NOTRACE		0x00000002	/* No ptrace(2) attach or
 						   coredumps. */
 #define	P2_NOTRACE_EXEC		0x00000004	/* Keep P2_NOPTRACE on
 						   exec(2). */
 #define	P2_AST_SU		0x00000008	/* Handles SU ast for
 						   kthreads. */
 #define	P2_PTRACE_FSTP		0x00000010	/* SIGSTOP from PT_ATTACH not
 						   yet handled. */
 #define	P2_TRAPCAP		0x00000020	/* SIGTRAP on ENOTCAPABLE */
 #define	P2_ASLR_ENABLE		0x00000040	/* Force enable ASLR. */
 #define	P2_ASLR_DISABLE		0x00000080	/* Force disable ASLR. */
 #define	P2_ASLR_IGNSTART	0x00000100	/* Enable ASLR to consume sbrk
 						   area. */
 #define	P2_PROTMAX_ENABLE	0x00000200	/* Force enable implied
 						   PROT_MAX. */
 #define	P2_PROTMAX_DISABLE	0x00000400	/* Force disable implied
 						   PROT_MAX. */
 #define	P2_STKGAP_DISABLE	0x00000800	/* Disable stack gap for
 						   MAP_STACK */
 #define	P2_STKGAP_DISABLE_EXEC	0x00001000	/* Stack gap disabled
 						   after exec */
 #define	P2_ITSTOPPED		0x00002000
 #define	P2_PTRACEREQ		0x00004000	/* Active ptrace req */
 #define	P2_NO_NEW_PRIVS		0x00008000	/* Ignore setuid */
 #define	P2_WXORX_DISABLE	0x00010000	/* WX mappings enabled */
 #define	P2_WXORX_ENABLE_EXEC	0x00020000	/* WXORX enabled after exec */
 #define	P2_WEXIT		0x00040000	/* exit just started, no
 						   external thread_single() is
 						   permitted */
 #define	P2_REAPKILLED		0x00080000
 #define	P2_MEMBAR_PRIVE		0x00100000	/* membar private expedited
 						   registered */
 #define	P2_MEMBAR_PRIVE_SYNCORE	0x00200000	/* membar private expedited
 						   sync core registered */
 #define	P2_MEMBAR_GLOBE		0x00400000	/* membar global expedited
 						   registered */
 
 /* Flags protected by proctree_lock, kept in p_treeflags. */
 #define	P_TREE_ORPHANED		0x00000001	/* Reparented, on orphan list */
 #define	P_TREE_FIRST_ORPHAN	0x00000002	/* First element of orphan
 						   list */
 #define	P_TREE_REAPER		0x00000004	/* Reaper of subtree */
 #define	P_TREE_GRPEXITED	0x00000008	/* exit1() done with job ctl */
 
 /*
  * These were process status values (p_stat), now they are only used in
  * legacy conversion code.
  */
 #define	SIDL	1		/* Process being created by fork. */
 #define	SRUN	2		/* Currently runnable. */
 #define	SSLEEP	3		/* Sleeping on an address. */
 #define	SSTOP	4		/* Process debugging or suspension. */
 #define	SZOMB	5		/* Awaiting collection by parent. */
 #define	SWAIT	6		/* Waiting for interrupt. */
 #define	SLOCK	7		/* Blocked on a lock. */
 
 #define	P_MAGIC		0xbeefface
 
 #ifdef _KERNEL
 
 /* Types and flags for mi_switch(9). */
 #define	SW_TYPE_MASK		0xff	/* First 8 bits are switch type */
 #define	SWT_OWEPREEMPT		1	/* Switching due to owepreempt. */
 #define	SWT_TURNSTILE		2	/* Turnstile contention. */
 #define	SWT_SLEEPQ		3	/* Sleepq wait. */
 #define	SWT_RELINQUISH		4	/* yield call. */
 #define	SWT_NEEDRESCHED		5	/* NEEDRESCHED was set. */
 #define	SWT_IDLE		6	/* Switching from the idle thread. */
 #define	SWT_IWAIT		7	/* Waiting for interrupts. */
 #define	SWT_SUSPEND		8	/* Thread suspended. */
 #define	SWT_REMOTEPREEMPT	9	/* Remote processor preempted. */
 #define	SWT_REMOTEWAKEIDLE	10	/* Remote processor preempted idle. */
 #define	SWT_BIND		11	/* Thread bound to a new CPU. */
 #define	SWT_COUNT		12	/* Number of switch types. */
 /* Flags */
 #define	SW_VOL		0x0100		/* Voluntary switch. */
 #define	SW_INVOL	0x0200		/* Involuntary switch. */
 #define SW_PREEMPT	0x0400		/* The invol switch is a preemption */
 
 /* How values for thread_single(). */
 #define	SINGLE_NO_EXIT	0
 #define	SINGLE_EXIT	1
 #define	SINGLE_BOUNDARY	2
 #define	SINGLE_ALLPROC	3
 
 #ifdef MALLOC_DECLARE
 MALLOC_DECLARE(M_PARGS);
 MALLOC_DECLARE(M_SESSION);
 MALLOC_DECLARE(M_SUBPROC);
 #endif
 
 #define	FOREACH_PROC_IN_SYSTEM(p)					\
 	LIST_FOREACH((p), &allproc, p_list)
 #define	FOREACH_THREAD_IN_PROC(p, td)					\
 	TAILQ_FOREACH((td), &(p)->p_threads, td_plist)
 
 #define	FIRST_THREAD_IN_PROC(p)	TAILQ_FIRST(&(p)->p_threads)
 
 /*
  * We use process IDs <= pid_max <= PID_MAX; PID_MAX + 1 must also fit
  * in a pid_t, as it is used to represent "no process group".
  */
 #define	PID_MAX		99999
 #define	NO_PID		(PID_MAX + 1)
 #define	THREAD0_TID	NO_PID
 extern pid_t pid_max;
 
 #define	SESS_LEADER(p)	((p)->p_session->s_leader == (p))
 
 /* Lock and unlock a process. */
 #define	PROC_LOCK(p)	mtx_lock(&(p)->p_mtx)
 #define	PROC_TRYLOCK(p)	mtx_trylock(&(p)->p_mtx)
 #define	PROC_UNLOCK(p)	mtx_unlock(&(p)->p_mtx)
 #define	PROC_LOCKED(p)	mtx_owned(&(p)->p_mtx)
 #define	PROC_WAIT_UNLOCKED(p)	mtx_wait_unlocked(&(p)->p_mtx)
 #define	PROC_LOCK_ASSERT(p, type)	mtx_assert(&(p)->p_mtx, (type))
 
 /* Lock and unlock a process group. */
 #define	PGRP_LOCK(pg)	mtx_lock(&(pg)->pg_mtx)
 #define	PGRP_UNLOCK(pg)	mtx_unlock(&(pg)->pg_mtx)
 #define	PGRP_LOCKED(pg)	mtx_owned(&(pg)->pg_mtx)
 #define	PGRP_LOCK_ASSERT(pg, type)	mtx_assert(&(pg)->pg_mtx, (type))
 
 #define	PGRP_LOCK_PGSIGNAL(pg) do {					\
 	if ((pg) != NULL)						\
 		PGRP_LOCK(pg);						\
 } while (0)
 #define	PGRP_UNLOCK_PGSIGNAL(pg) do {					\
 	if ((pg) != NULL)						\
 		PGRP_UNLOCK(pg);					\
 } while (0)
 
 /* Lock and unlock a session. */
 #define	SESS_LOCK(s)	mtx_lock(&(s)->s_mtx)
 #define	SESS_UNLOCK(s)	mtx_unlock(&(s)->s_mtx)
 #define	SESS_LOCKED(s)	mtx_owned(&(s)->s_mtx)
 #define	SESS_LOCK_ASSERT(s, type)	mtx_assert(&(s)->s_mtx, (type))
 
 /*
  * Non-zero p_lock ensures that:
  * - exit1() is not performed until p_lock reaches zero;
  * - the process' threads stack are not swapped out if they are currently
  *   not (P_INMEM).
  *
  * PHOLD() asserts that the process (except the current process) is
  * not exiting, increments p_lock and swaps threads stacks into memory,
  * if needed.
  * _PHOLD() is same as PHOLD(), it takes the process locked.
  * _PHOLD_LITE() also takes the process locked, but comparing with
  * _PHOLD(), it only guarantees that exit1() is not executed,
  * faultin() is not called.
  */
 #define	PHOLD(p) do {							\
 	PROC_LOCK(p);							\
 	_PHOLD(p);							\
 	PROC_UNLOCK(p);							\
 } while (0)
 #define	_PHOLD(p) do {							\
 	PROC_LOCK_ASSERT((p), MA_OWNED);				\
 	KASSERT(!((p)->p_flag & P_WEXIT) || (p) == curproc,		\
 	    ("PHOLD of exiting process %p", p));			\
 	(p)->p_lock++;							\
 	if (((p)->p_flag & P_INMEM) == 0)				\
 		faultin((p));						\
 } while (0)
 #define	_PHOLD_LITE(p) do {						\
 	PROC_LOCK_ASSERT((p), MA_OWNED);				\
 	KASSERT(!((p)->p_flag & P_WEXIT) || (p) == curproc,		\
 	    ("PHOLD of exiting process %p", p));			\
 	(p)->p_lock++;							\
 } while (0)
 #define	PROC_ASSERT_HELD(p) do {					\
 	KASSERT((p)->p_lock > 0, ("process %p not held", p));		\
 } while (0)
 
 #define	PRELE(p) do {							\
 	PROC_LOCK((p));							\
 	_PRELE((p));							\
 	PROC_UNLOCK((p));						\
 } while (0)
 #define	_PRELE(p) do {							\
 	PROC_LOCK_ASSERT((p), MA_OWNED);				\
 	PROC_ASSERT_HELD(p);						\
 	(--(p)->p_lock);						\
 	if (((p)->p_flag & P_WEXIT) && (p)->p_lock == 0)		\
 		wakeup(&(p)->p_lock);					\
 } while (0)
 #define	PROC_ASSERT_NOT_HELD(p) do {					\
 	KASSERT((p)->p_lock == 0, ("process %p held", p));		\
 } while (0)
 
 #define	PROC_UPDATE_COW(p) do {						\
 	struct proc *_p = (p);						\
 	PROC_LOCK_ASSERT((_p), MA_OWNED);				\
 	atomic_store_int(&_p->p_cowgen, _p->p_cowgen + 1);		\
 } while (0)
 
 #define	PROC_COW_CHANGECOUNT(td, p) ({					\
 	struct thread *_td = (td);					\
 	struct proc *_p = (p);						\
 	MPASS(_td == curthread);					\
 	PROC_LOCK_ASSERT(_p, MA_OWNED);					\
 	_p->p_cowgen - _td->td_cowgen;					\
 })
 
 /* Check whether a thread is safe to be swapped out. */
 #define	thread_safetoswapout(td)	((td)->td_flags & TDF_CANSWAP)
 
 /* Control whether or not it is safe for curthread to sleep. */
 #define	THREAD_NO_SLEEPING()		do {				\
 	curthread->td_no_sleeping++;					\
 	MPASS(curthread->td_no_sleeping > 0);				\
 } while (0)
 
 #define	THREAD_SLEEPING_OK()		do {				\
 	MPASS(curthread->td_no_sleeping > 0);				\
 	curthread->td_no_sleeping--;					\
 } while (0)
 
 #define	THREAD_CAN_SLEEP()		((curthread)->td_no_sleeping == 0)
 
 #define	THREAD_CONTENDS_ON_LOCK(lo)		do {			\
 	MPASS(curthread->td_wantedlock == NULL);			\
 	curthread->td_wantedlock = lo;					\
 } while (0)
 
 #define	THREAD_CONTENTION_DONE(lo)		do {			\
 	MPASS(curthread->td_wantedlock == lo);				\
 	curthread->td_wantedlock = NULL;				\
 } while (0)
 
 #define	PIDHASH(pid)	(&pidhashtbl[(pid) & pidhash])
 #define	PIDHASHLOCK(pid) (&pidhashtbl_lock[((pid) & pidhashlock)])
 extern LIST_HEAD(pidhashhead, proc) *pidhashtbl;
 extern struct sx *pidhashtbl_lock;
 extern u_long pidhash;
 extern u_long pidhashlock;
 
 #define	PGRPHASH(pgid)	(&pgrphashtbl[(pgid) & pgrphash])
 extern LIST_HEAD(pgrphashhead, pgrp) *pgrphashtbl;
 extern u_long pgrphash;
 
 extern struct sx allproc_lock;
 extern int allproc_gen;
 extern struct sx proctree_lock;
 extern struct mtx ppeers_lock;
 extern struct mtx procid_lock;
 extern struct proc proc0;		/* Process slot for swapper. */
 extern struct thread0_storage thread0_st;	/* Primary thread in proc0. */
 #define	thread0 (thread0_st.t0st_thread)
 extern struct vmspace vmspace0;		/* VM space for proc0. */
 extern int hogticks;			/* Limit on kernel cpu hogs. */
 extern int lastpid;
 extern int nprocs, maxproc;		/* Current and max number of procs. */
 extern int maxprocperuid;		/* Max procs per uid. */
 extern u_long ps_arg_cache_limit;
 
 LIST_HEAD(proclist, proc);
 TAILQ_HEAD(procqueue, proc);
 TAILQ_HEAD(threadqueue, thread);
 extern struct proclist allproc;		/* List of all processes. */
 extern struct proc *initproc, *pageproc; /* Process slots for init, pager. */
 
 extern struct uma_zone *proc_zone;
 extern struct uma_zone *pgrp_zone;
 
 struct	proc *pfind(pid_t);		/* Find process by id. */
 struct	proc *pfind_any(pid_t);		/* Find (zombie) process by id. */
 struct	proc *pfind_any_locked(pid_t pid); /* Find process by id, locked. */
 struct	pgrp *pgfind(pid_t);		/* Find process group by id. */
 void	pidhash_slockall(void);		/* Shared lock all pid hash lists. */
 void	pidhash_sunlockall(void);	/* Shared unlock all pid hash lists. */
 
 struct	fork_req {
 	int		fr_flags;
 	int		fr_pages;
 	int 		*fr_pidp;
 	struct proc 	**fr_procp;
 	int 		*fr_pd_fd;
 	int 		fr_pd_flags;
 	struct filecaps	*fr_pd_fcaps;
 	int 		fr_flags2;
 #define	FR2_DROPSIG_CAUGHT	0x00000001 /* Drop caught non-DFL signals */
 #define	FR2_SHARE_PATHS		0x00000002 /* Invert sense of RFFDG for paths */
 #define	FR2_KPROC		0x00000004 /* Create a kernel process */
 };
 
 /*
  * pget() flags.
  */
 #define	PGET_HOLD	0x00001	/* Hold the process. */
 #define	PGET_CANSEE	0x00002	/* Check against p_cansee(). */
 #define	PGET_CANDEBUG	0x00004	/* Check against p_candebug(). */
 #define	PGET_ISCURRENT	0x00008	/* Check that the found process is current. */
 #define	PGET_NOTWEXIT	0x00010	/* Check that the process is not in P_WEXIT. */
 #define	PGET_NOTINEXEC	0x00020	/* Check that the process is not in P_INEXEC. */
 #define	PGET_NOTID	0x00040	/* Do not assume tid if pid > PID_MAX. */
 
 #define	PGET_WANTREAD	(PGET_HOLD | PGET_CANDEBUG | PGET_NOTWEXIT)
 
 int	pget(pid_t pid, int flags, struct proc **pp);
 
 /* ast_register() flags */
 #define	ASTR_ASTF_REQUIRED	0x0001	/* td_ast TDAI(TDA_X) flag set is
 					   required for call */
 #define	ASTR_TDP		0x0002	/* td_pflags flag set is required */
 #define	ASTR_KCLEAR		0x0004	/* call me on ast_kclear() */
 #define	ASTR_UNCOND		0x0008	/* call me always */
 
 void	ast(struct trapframe *framep);
 void	ast_kclear(struct thread *td);
 void	ast_register(int ast, int ast_flags, int tdp,
 	    void (*f)(struct thread *td, int asts));
 void	ast_deregister(int tda);
 void	ast_sched_locked(struct thread *td, int tda);
 void	ast_sched_mask(struct thread *td, int ast);
 void	ast_sched(struct thread *td, int tda);
 void	ast_unsched_locked(struct thread *td, int tda);
 
 struct	thread *choosethread(void);
 int	cr_bsd_visible(struct ucred *u1, struct ucred *u2);
 int	cr_cansee(struct ucred *u1, struct ucred *u2);
 int	cr_canseesocket(struct ucred *cred, struct socket *so);
 int	cr_cansignal(struct ucred *cred, struct proc *proc, int signum);
 int	enterpgrp(struct proc *p, pid_t pgid, struct pgrp *pgrp,
 	    struct session *sess);
 int	enterthispgrp(struct proc *p, struct pgrp *pgrp);
 void	faultin(struct proc *p);
 int	fork1(struct thread *, struct fork_req *);
 void	fork_exit(void (*)(void *, struct trapframe *), void *,
 	    struct trapframe *);
 void	fork_return(struct thread *, struct trapframe *);
 int	inferior(struct proc *p);
 void	itimer_proc_continue(struct proc *p);
 void	kqtimer_proc_continue(struct proc *p);
 void	kern_proc_vmmap_resident(struct vm_map *map, struct vm_map_entry *entry,
 	    int *resident_count, bool *super);
 void	kern_yield(int);
 void 	kick_proc0(void);
 void	killjobc(void);
 int	leavepgrp(struct proc *p);
 int	maybe_preempt(struct thread *td);
 void	maybe_yield(void);
 void	mi_switch(int flags);
 int	p_candebug(struct thread *td, struct proc *p);
 int	p_cansee(struct thread *td, struct proc *p);
 int	p_cansched(struct thread *td, struct proc *p);
 int	p_cansignal(struct thread *td, struct proc *p, int signum);
 int	p_canwait(struct thread *td, struct proc *p);
 struct	pargs *pargs_alloc(int len);
 void	pargs_drop(struct pargs *pa);
 void	pargs_hold(struct pargs *pa);
 void	proc_add_orphan(struct proc *child, struct proc *parent);
 int	proc_get_binpath(struct proc *p, char *binname, char **fullpath,
 	    char **freepath);
 int	proc_getargv(struct thread *td, struct proc *p, struct sbuf *sb);
 int	proc_getauxv(struct thread *td, struct proc *p, struct sbuf *sb);
 int	proc_getenvv(struct thread *td, struct proc *p, struct sbuf *sb);
 void	procinit(void);
 int	proc_iterate(int (*cb)(struct proc *, void *), void *cbarg);
 void	proc_linkup0(struct proc *p, struct thread *td);
 void	proc_linkup(struct proc *p, struct thread *td);
 struct proc *proc_realparent(struct proc *child);
 void	proc_reap(struct thread *td, struct proc *p, int *status, int options);
 void	proc_reparent(struct proc *child, struct proc *newparent, bool set_oppid);
 void	proc_set_p2_wexit(struct proc *p);
 void	proc_set_traced(struct proc *p, bool stop);
 void	proc_wkilled(struct proc *p);
 struct	pstats *pstats_alloc(void);
 void	pstats_fork(struct pstats *src, struct pstats *dst);
 void	pstats_free(struct pstats *ps);
 void	proc_clear_orphan(struct proc *p);
 void	reaper_abandon_children(struct proc *p, bool exiting);
 int	securelevel_ge(struct ucred *cr, int level);
 int	securelevel_gt(struct ucred *cr, int level);
 void	sess_hold(struct session *);
 void	sess_release(struct session *);
 int	setrunnable(struct thread *, int);
 void	setsugid(struct proc *p);
 bool	should_yield(void);
 int	sigonstack(size_t sp);
 void	stopevent(struct proc *, u_int, u_int);
 struct	thread *tdfind(lwpid_t, pid_t);
 void	threadinit(void);
 void	tidhash_add(struct thread *);
 void	tidhash_remove(struct thread *);
 void	cpu_idle(int);
 int	cpu_idle_wakeup(int);
 extern	void (*cpu_idle_hook)(sbintime_t);	/* Hook to machdep CPU idler. */
 void	cpu_switch(struct thread *, struct thread *, struct mtx *);
 void	cpu_sync_core(void);
 void	cpu_throw(struct thread *, struct thread *) __dead2;
 bool	curproc_sigkilled(void);
 void	userret(struct thread *, struct trapframe *);
 
 void	cpu_exit(struct thread *);
 void	exit1(struct thread *, int, int) __dead2;
 void	cpu_copy_thread(struct thread *td, struct thread *td0);
 bool	cpu_exec_vmspace_reuse(struct proc *p, struct vm_map *map);
 int	cpu_fetch_syscall_args(struct thread *td);
 void	cpu_fork(struct thread *, struct proc *, struct thread *, int);
 void	cpu_fork_kthread_handler(struct thread *, void (*)(void *), void *);
 int	cpu_procctl(struct thread *td, int idtype, id_t id, int com,
 	    void *data);
 void	cpu_set_syscall_retval(struct thread *, int);
 int	cpu_set_upcall(struct thread *, void (*)(void *), void *,
 	    stack_t *);
 int	cpu_set_user_tls(struct thread *, void *tls_base);
 void	cpu_thread_alloc(struct thread *);
 void	cpu_thread_clean(struct thread *);
 void	cpu_thread_exit(struct thread *);
 void	cpu_thread_free(struct thread *);
 void	cpu_thread_swapin(struct thread *);
 void	cpu_thread_swapout(struct thread *);
 struct	thread *thread_alloc(int pages);
 int	thread_alloc_stack(struct thread *, int pages);
 int	thread_check_susp(struct thread *td, bool sleep);
 void	thread_cow_get_proc(struct thread *newtd, struct proc *p);
 void	thread_cow_get(struct thread *newtd, struct thread *td);
 void	thread_cow_free(struct thread *td);
 void	thread_cow_update(struct thread *td);
 void	thread_cow_synced(struct thread *td);
 int	thread_create(struct thread *td, struct rtprio *rtp,
 	    int (*initialize_thread)(struct thread *, void *), void *thunk);
 void	thread_exit(void) __dead2;
 void	thread_free(struct thread *td);
 void	thread_link(struct thread *td, struct proc *p);
 void	thread_reap_barrier(void);
 int	thread_single(struct proc *p, int how);
 void	thread_single_end(struct proc *p, int how);
 void	thread_stash(struct thread *td);
 void	thread_stopped(struct proc *p);
 void	childproc_stopped(struct proc *child, int reason);
 void	childproc_continued(struct proc *child);
 void	childproc_exited(struct proc *child);
 void	thread_run_flash(struct thread *td);
 int	thread_suspend_check(int how);
 bool	thread_suspend_check_needed(void);
 void	thread_suspend_switch(struct thread *, struct proc *p);
 void	thread_suspend_one(struct thread *td);
 void	thread_unlink(struct thread *td);
 void	thread_unsuspend(struct proc *p);
 void	thread_wait(struct proc *p);
 
 bool	stop_all_proc_block(void);
 void	stop_all_proc_unblock(void);
 void	stop_all_proc(void);
 void	resume_all_proc(void);
 
 static __inline int
 curthread_pflags_set(int flags)
 {
 	struct thread *td;
 	int save;
 
 	td = curthread;
 	save = ~flags | (td->td_pflags & flags);
 	td->td_pflags |= flags;
 	return (save);
 }
 
 static __inline void
 curthread_pflags_restore(int save)
 {
 
 	curthread->td_pflags &= save;
 }
 
 static __inline int
 curthread_pflags2_set(int flags)
 {
 	struct thread *td;
 	int save;
 
 	td = curthread;
 	save = ~flags | (td->td_pflags2 & flags);
 	td->td_pflags2 |= flags;
 	return (save);
 }
 
 static __inline void
 curthread_pflags2_restore(int save)
 {
 
 	curthread->td_pflags2 &= save;
 }
 
 static __inline __pure2 struct td_sched *
 td_get_sched(struct thread *td)
 {
 
 	return ((struct td_sched *)&td[1]);
 }
 
 #define	PROC_ID_PID	0
 #define	PROC_ID_GROUP	1
 #define	PROC_ID_SESSION	2
 #define	PROC_ID_REAP	3
 
 void	proc_id_set(int type, pid_t id);
 void	proc_id_set_cond(int type, pid_t id);
 void	proc_id_clear(int type, pid_t id);
 
 EVENTHANDLER_LIST_DECLARE(process_ctor);
 EVENTHANDLER_LIST_DECLARE(process_dtor);
 EVENTHANDLER_LIST_DECLARE(process_init);
 EVENTHANDLER_LIST_DECLARE(process_fini);
 EVENTHANDLER_LIST_DECLARE(process_exit);
 EVENTHANDLER_LIST_DECLARE(process_fork);
 EVENTHANDLER_LIST_DECLARE(process_exec);
 
 EVENTHANDLER_LIST_DECLARE(thread_ctor);
 EVENTHANDLER_LIST_DECLARE(thread_dtor);
 EVENTHANDLER_LIST_DECLARE(thread_init);
 
 #endif	/* _KERNEL */
 
 #endif	/* !_SYS_PROC_H_ */
diff --git a/sys/sys/systm.h b/sys/sys/systm.h
index 0d3f9fe98893..29c8bfc3c768 100644
--- a/sys/sys/systm.h
+++ b/sys/sys/systm.h
@@ -1,585 +1,583 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1982, 1988, 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  * (c) UNIX System Laboratories, Inc.
  * All or some portions of this file are derived from material licensed
  * to the University of California by American Telephone and Telegraph
  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  * the permission of UNIX System Laboratories, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #ifndef _SYS_SYSTM_H_
 #define	_SYS_SYSTM_H_
 
 #include <sys/types.h>
 #include <sys/callout.h>
 #include <sys/kassert.h>
 #include <sys/queue.h>
 #include <sys/stdint.h>		/* for people using printf mainly */
 #include <machine/atomic.h>
 #include <machine/cpufunc.h>
 
 __NULLABILITY_PRAGMA_PUSH
 
 #ifdef _KERNEL
 extern int cold;		/* nonzero if we are doing a cold boot */
 extern int suspend_blocked;	/* block suspend due to pending shutdown */
 extern int rebooting;		/* kern_reboot() has been called. */
 extern char version[];		/* system version */
 extern char compiler_version[];	/* compiler version */
 extern char copyright[];	/* system copyright */
 extern int kstack_pages;	/* number of kernel stack pages */
 
 extern u_long pagesizes[];	/* supported page sizes */
 extern long physmem;		/* physical memory */
 extern long realmem;		/* 'real' memory */
 
 extern char *rootdevnames[2];	/* names of possible root devices */
 
 extern int boothowto;		/* reboot flags, from console subsystem */
 extern int bootverbose;		/* nonzero to print verbose messages */
 
 extern int maxusers;		/* system tune hint */
 extern int ngroups_max;		/* max # of supplemental groups */
 extern int vm_guest;		/* Running as virtual machine guest? */
 
 extern u_long maxphys;		/* max raw I/O transfer size */
 
 /*
  * Detected virtual machine guest types. The intention is to expand
  * and/or add to the VM_GUEST_VM type if specific VM functionality is
  * ever implemented (e.g. vendor-specific paravirtualization features).
  * Keep in sync with vm_guest_sysctl_names[].
  */
 enum VM_GUEST { VM_GUEST_NO = 0, VM_GUEST_VM, VM_GUEST_XEN, VM_GUEST_HV,
 		VM_GUEST_VMWARE, VM_GUEST_KVM, VM_GUEST_BHYVE, VM_GUEST_VBOX,
 		VM_GUEST_PARALLELS, VM_LAST };
 
 #endif /* KERNEL */
 
 /*
  * Align variables.
  */
 #define	__read_mostly		__section(".data.read_mostly")
 #define	__read_frequently	__section(".data.read_frequently")
 #define	__exclusive_cache_line	__aligned(CACHE_LINE_SIZE) \
 				    __section(".data.exclusive_cache_line")
 #if defined(_STANDALONE)
 struct ucred;
 #endif
 
 #ifdef _KERNEL
 #include <sys/param.h>		/* MAXCPU */
 #include <sys/pcpu.h>		/* curthread */
 #include <sys/kpilite.h>
 
+extern bool scheduler_stopped;
+
 /*
  * If we have already panic'd and this is the thread that called
  * panic(), then don't block on any mutexes but silently succeed.
  * Otherwise, the kernel will deadlock since the scheduler isn't
  * going to run the thread that holds any lock we need.
  */
-#define	SCHEDULER_STOPPED_TD(td)  ({					\
-	MPASS((td) == curthread);					\
-	__predict_false((td)->td_stopsched);				\
-})
-#define	SCHEDULER_STOPPED() SCHEDULER_STOPPED_TD(curthread)
+#define	SCHEDULER_STOPPED()	__predict_false(scheduler_stopped)
 
 extern int osreldate;
 
 extern const void *zero_region;	/* address space maps to a zeroed page	*/
 
 extern int unmapped_buf_allowed;
 
 #ifdef __LP64__
 #define	IOSIZE_MAX		iosize_max()
 #define	DEVFS_IOSIZE_MAX	devfs_iosize_max()
 #else
 #define	IOSIZE_MAX		SSIZE_MAX
 #define	DEVFS_IOSIZE_MAX	SSIZE_MAX
 #endif
 
 /*
  * General function declarations.
  */
 
 struct inpcb;
 struct lock_object;
 struct malloc_type;
 struct mtx;
 struct proc;
 struct socket;
 struct thread;
 struct tty;
 struct ucred;
 struct uio;
 struct _jmp_buf;
 struct trapframe;
 struct eventtimer;
 
 int	setjmp(struct _jmp_buf *) __returns_twice;
 void	longjmp(struct _jmp_buf *, int) __dead2;
 int	dumpstatus(vm_offset_t addr, off_t count);
 int	nullop(void);
 int	eopnotsupp(void);
 int	ureadc(int, struct uio *);
 void	hashdestroy(void *, struct malloc_type *, u_long);
 void	*hashinit(int count, struct malloc_type *type, u_long *hashmask);
 void	*hashinit_flags(int count, struct malloc_type *type,
     u_long *hashmask, int flags);
 #define	HASH_NOWAIT	0x00000001
 #define	HASH_WAITOK	0x00000002
 
 void	*phashinit(int count, struct malloc_type *type, u_long *nentries);
 void	*phashinit_flags(int count, struct malloc_type *type, u_long *nentries,
     int flags);
 
 void	cpu_flush_dcache(void *, size_t);
 void	cpu_rootconf(void);
 void	critical_enter_KBI(void);
 void	critical_exit_KBI(void);
 void	critical_exit_preempt(void);
 void	init_param1(void);
 void	init_param2(long physpages);
 void	init_static_kenv(char *, size_t);
 void	tablefull(const char *);
 
 /*
  * Allocate per-thread "current" state in the linuxkpi
  */
 extern int (*lkpi_alloc_current)(struct thread *, int);
 int linux_alloc_current_noop(struct thread *, int);
 
 #if (defined(KLD_MODULE) && !defined(KLD_TIED)) || defined(KTR_CRITICAL) || !defined(_KERNEL) || defined(GENOFFSET)
 #define critical_enter() critical_enter_KBI()
 #define critical_exit() critical_exit_KBI()
 #else
 static __inline void
 critical_enter(void)
 {
 	struct thread_lite *td;
 
 	td = (struct thread_lite *)curthread;
 	td->td_critnest++;
 	atomic_interrupt_fence();
 }
 
 static __inline void
 critical_exit(void)
 {
 	struct thread_lite *td;
 
 	td = (struct thread_lite *)curthread;
 	KASSERT(td->td_critnest != 0,
 	    ("critical_exit: td_critnest == 0"));
 	atomic_interrupt_fence();
 	td->td_critnest--;
 	atomic_interrupt_fence();
 	if (__predict_false(td->td_owepreempt))
 		critical_exit_preempt();
 
 }
 #endif
 
 #ifdef  EARLY_PRINTF
 typedef void early_putc_t(int ch);
 extern early_putc_t *early_putc;
 #endif
 int	kvprintf(char const *, void (*)(int, void*), void *, int,
 	    __va_list) __printflike(1, 0);
 void	log(int, const char *, ...) __printflike(2, 3);
 void	log_console(struct uio *);
 void	vlog(int, const char *, __va_list) __printflike(2, 0);
 int	asprintf(char **ret, struct malloc_type *mtp, const char *format, 
 	    ...) __printflike(3, 4);
 int	printf(const char *, ...) __printflike(1, 2);
 int	snprintf(char *, size_t, const char *, ...) __printflike(3, 4);
 int	sprintf(char *buf, const char *, ...) __printflike(2, 3);
 int	uprintf(const char *, ...) __printflike(1, 2);
 int	vprintf(const char *, __va_list) __printflike(1, 0);
 int	vasprintf(char **ret, struct malloc_type *mtp, const char *format,
 	    __va_list ap) __printflike(3, 0);
 int	vsnprintf(char *, size_t, const char *, __va_list) __printflike(3, 0);
 int	vsnrprintf(char *, size_t, int, const char *, __va_list) __printflike(4, 0);
 int	vsprintf(char *buf, const char *, __va_list) __printflike(2, 0);
 int	sscanf(const char *, char const * _Nonnull, ...) __scanflike(2, 3);
 int	vsscanf(const char * _Nonnull, char const * _Nonnull, __va_list)  __scanflike(2, 0);
 long	strtol(const char *, char **, int);
 u_long	strtoul(const char *, char **, int);
 quad_t	strtoq(const char *, char **, int);
 u_quad_t strtouq(const char *, char **, int);
 void	tprintf(struct proc *p, int pri, const char *, ...) __printflike(3, 4);
 void	vtprintf(struct proc *, int, const char *, __va_list) __printflike(3, 0);
 void	hexdump(const void *ptr, int length, const char *hdr, int flags);
 #define	HD_COLUMN_MASK	0xff
 #define	HD_DELIM_MASK	0xff00
 #define	HD_OMIT_COUNT	(1 << 16)
 #define	HD_OMIT_HEX	(1 << 17)
 #define	HD_OMIT_CHARS	(1 << 18)
 
 #define ovbcopy(f, t, l) bcopy((f), (t), (l))
 void	explicit_bzero(void * _Nonnull, size_t);
 
 void	*memset(void * _Nonnull buf, int c, size_t len);
 void	*memcpy(void * _Nonnull to, const void * _Nonnull from, size_t len);
 void	*memmove(void * _Nonnull dest, const void * _Nonnull src, size_t n);
 int	memcmp(const void *b1, const void *b2, size_t len);
 
 #ifdef SAN_NEEDS_INTERCEPTORS
 #define	SAN_INTERCEPTOR(func)	\
 	__CONCAT(SAN_INTERCEPTOR_PREFIX, __CONCAT(_, func))
 void	*SAN_INTERCEPTOR(memset)(void *, int, size_t);
 void	*SAN_INTERCEPTOR(memcpy)(void *, const void *, size_t);
 void	*SAN_INTERCEPTOR(memmove)(void *, const void *, size_t);
 int	SAN_INTERCEPTOR(memcmp)(const void *, const void *, size_t);
 #ifndef SAN_RUNTIME
 #define bcopy(from, to, len)	SAN_INTERCEPTOR(memmove)((to), (from), (len))
 #define bzero(buf, len)		SAN_INTERCEPTOR(memset)((buf), 0, (len))
 #define bcmp(b1, b2, len)	SAN_INTERCEPTOR(memcmp)((b1), (b2), (len))
 #define memset(buf, c, len)	SAN_INTERCEPTOR(memset)((buf), (c), (len))
 #define memcpy(to, from, len)	SAN_INTERCEPTOR(memcpy)((to), (from), (len))
 #define memmove(dest, src, n)	SAN_INTERCEPTOR(memmove)((dest), (src), (n))
 #define memcmp(b1, b2, len)	SAN_INTERCEPTOR(memcmp)((b1), (b2), (len))
 #endif /* !SAN_RUNTIME */
 #else /* !SAN_NEEDS_INTERCEPTORS */
 #define bcopy(from, to, len)	__builtin_memmove((to), (from), (len))
 #define bzero(buf, len)		__builtin_memset((buf), 0, (len))
 #define bcmp(b1, b2, len)	__builtin_memcmp((b1), (b2), (len))
 #define memset(buf, c, len)	__builtin_memset((buf), (c), (len))
 #define memcpy(to, from, len)	__builtin_memcpy((to), (from), (len))
 #define memmove(dest, src, n)	__builtin_memmove((dest), (src), (n))
 #define memcmp(b1, b2, len)	__builtin_memcmp((b1), (b2), (len))
 #endif /* SAN_NEEDS_INTERCEPTORS */
 
 void	*memset_early(void * _Nonnull buf, int c, size_t len);
 #define bzero_early(buf, len) memset_early((buf), 0, (len))
 void	*memcpy_early(void * _Nonnull to, const void * _Nonnull from, size_t len);
 void	*memmove_early(void * _Nonnull dest, const void * _Nonnull src, size_t n);
 #define bcopy_early(from, to, len) memmove_early((to), (from), (len))
 
 #define	copystr(src, dst, len, outlen)	({			\
 	size_t __r, __len, *__outlen;				\
 								\
 	__len = (len);						\
 	__outlen = (outlen);					\
 	__r = strlcpy((dst), (src), __len);			\
 	if (__outlen != NULL)					\
 		*__outlen = ((__r >= __len) ? __len : __r + 1);	\
 	((__r >= __len) ? ENAMETOOLONG : 0);			\
 })
 
 int __result_use_check copyinstr(const void * __restrict udaddr,
     void * _Nonnull __restrict kaddr, size_t len,
     size_t * __restrict lencopied);
 int __result_use_check copyin(const void * __restrict udaddr,
     void * _Nonnull __restrict kaddr, size_t len);
 int __result_use_check copyin_nofault(const void * __restrict udaddr,
     void * _Nonnull __restrict kaddr, size_t len);
 int copyout(const void * _Nonnull __restrict kaddr,
     void * __restrict udaddr, size_t len);
 int copyout_nofault(const void * _Nonnull __restrict kaddr,
     void * __restrict udaddr, size_t len);
 
 #ifdef SAN_NEEDS_INTERCEPTORS
 int	SAN_INTERCEPTOR(copyin)(const void *, void *, size_t);
 int	SAN_INTERCEPTOR(copyinstr)(const void *, void *, size_t, size_t *);
 int	SAN_INTERCEPTOR(copyout)(const void *, void *, size_t);
 #ifndef SAN_RUNTIME
 #define	copyin(u, k, l)		SAN_INTERCEPTOR(copyin)((u), (k), (l))
 #define	copyinstr(u, k, l, lc)	SAN_INTERCEPTOR(copyinstr)((u), (k), (l), (lc))
 #define	copyout(k, u, l)	SAN_INTERCEPTOR(copyout)((k), (u), (l))
 #endif /* !SAN_RUNTIME */
 #endif /* SAN_NEEDS_INTERCEPTORS */
 
 int	fubyte(volatile const void *base);
 long	fuword(volatile const void *base);
 int	fuword16(volatile const void *base);
 int32_t	fuword32(volatile const void *base);
 int64_t	fuword64(volatile const void *base);
 int __result_use_check fueword(volatile const void *base, long *val);
 int __result_use_check fueword32(volatile const void *base, int32_t *val);
 int __result_use_check fueword64(volatile const void *base, int64_t *val);
 int subyte(volatile void *base, int byte);
 int suword(volatile void *base, long word);
 int suword16(volatile void *base, int word);
 int suword32(volatile void *base, int32_t word);
 int suword64(volatile void *base, int64_t word);
 uint32_t casuword32(volatile uint32_t *base, uint32_t oldval, uint32_t newval);
 u_long	casuword(volatile u_long *p, u_long oldval, u_long newval);
 int	casueword32(volatile uint32_t *base, uint32_t oldval, uint32_t *oldvalp,
 	    uint32_t newval);
 int	casueword(volatile u_long *p, u_long oldval, u_long *oldvalp,
 	    u_long newval);
 
 #if defined(SAN_NEEDS_INTERCEPTORS) && !defined(KCSAN)
 int	SAN_INTERCEPTOR(fubyte)(volatile const void *base);
 int	SAN_INTERCEPTOR(fuword16)(volatile const void *base);
 int	SAN_INTERCEPTOR(fueword)(volatile const void *base, long *val);
 int	SAN_INTERCEPTOR(fueword32)(volatile const void *base, int32_t *val);
 int	SAN_INTERCEPTOR(fueword64)(volatile const void *base, int64_t *val);
 int	SAN_INTERCEPTOR(subyte)(volatile void *base, int byte);
 int	SAN_INTERCEPTOR(suword)(volatile void *base, long word);
 int	SAN_INTERCEPTOR(suword16)(volatile void *base, int word);
 int	SAN_INTERCEPTOR(suword32)(volatile void *base, int32_t word);
 int	SAN_INTERCEPTOR(suword64)(volatile void *base, int64_t word);
 int	SAN_INTERCEPTOR(casueword32)(volatile uint32_t *base, uint32_t oldval,
 	    uint32_t *oldvalp, uint32_t newval);
 int	SAN_INTERCEPTOR(casueword)(volatile u_long *p, u_long oldval,
 	    u_long *oldvalp, u_long newval);
 #ifndef SAN_RUNTIME
 #define	fubyte(b)		SAN_INTERCEPTOR(fubyte)((b))
 #define	fuword16(b)		SAN_INTERCEPTOR(fuword16)((b))
 #define	fueword(b, v)		SAN_INTERCEPTOR(fueword)((b), (v))
 #define	fueword32(b, v)		SAN_INTERCEPTOR(fueword32)((b), (v))
 #define	fueword64(b, v)		SAN_INTERCEPTOR(fueword64)((b), (v))
 #define	subyte(b, w)		SAN_INTERCEPTOR(subyte)((b), (w))
 #define	suword(b, w)		SAN_INTERCEPTOR(suword)((b), (w))
 #define	suword16(b, w)		SAN_INTERCEPTOR(suword16)((b), (w))
 #define	suword32(b, w)		SAN_INTERCEPTOR(suword32)((b), (w))
 #define	suword64(b, w)		SAN_INTERCEPTOR(suword64)((b), (w))
 #define	casueword32(b, o, p, n)	SAN_INTERCEPTOR(casueword32)((b), (o), (p), (n))
 #define	casueword(b, o, p, n)	SAN_INTERCEPTOR(casueword)((b), (o), (p), (n))
 #endif /* !SAN_RUNTIME */
 #endif /* SAN_NEEDS_INTERCEPTORS && !KCSAN */
 
 int	sysbeep(int hertz, sbintime_t duration);
 
 void	hardclock(int cnt, int usermode);
 void	hardclock_sync(int cpu);
 void	statclock(int cnt, int usermode);
 void	profclock(int cnt, int usermode, uintfptr_t pc);
 
 int	hardclockintr(void);
 
 void	startprofclock(struct proc *);
 void	stopprofclock(struct proc *);
 void	cpu_startprofclock(void);
 void	cpu_stopprofclock(void);
 void	suspendclock(void);
 void	resumeclock(void);
 sbintime_t 	cpu_idleclock(void);
 void	cpu_activeclock(void);
 void	cpu_new_callout(int cpu, sbintime_t bt, sbintime_t bt_opt);
 void	cpu_et_frequency(struct eventtimer *et, uint64_t newfreq);
 extern int	cpu_disable_c2_sleep;
 extern int	cpu_disable_c3_sleep;
 
 extern void	(*tcp_hpts_softclock)(void);
 #define	tcp_hpts_softclock()	do {					\
 		if (tcp_hpts_softclock != NULL)				\
 			tcp_hpts_softclock();				\
 } while (0)
 
 char	*kern_getenv(const char *name);
 void	freeenv(char *env);
 int	getenv_int(const char *name, int *data);
 int	getenv_uint(const char *name, unsigned int *data);
 int	getenv_long(const char *name, long *data);
 int	getenv_ulong(const char *name, unsigned long *data);
 int	getenv_string(const char *name, char *data, int size);
 int	getenv_int64(const char *name, int64_t *data);
 int	getenv_uint64(const char *name, uint64_t *data);
 int	getenv_quad(const char *name, quad_t *data);
 int	getenv_bool(const char *name, bool *data);
 bool	getenv_is_true(const char *name);
 bool	getenv_is_false(const char *name);
 int	kern_setenv(const char *name, const char *value);
 int	kern_unsetenv(const char *name);
 int	testenv(const char *name);
 
 int	getenv_array(const char *name, void *data, int size, int *psize,
     int type_size, bool allow_signed);
 #define	GETENV_UNSIGNED	false	/* negative numbers not allowed */
 #define	GETENV_SIGNED	true	/* negative numbers allowed */
 
 typedef uint64_t (cpu_tick_f)(void);
 void set_cputicker(cpu_tick_f *func, uint64_t freq, bool isvariable);
 extern cpu_tick_f *cpu_ticks;
 uint64_t cpu_tickrate(void);
 uint64_t cputick2usec(uint64_t tick);
 
 #include <sys/libkern.h>
 
 /* Initialize the world */
 void	consinit(void);
 void	cpu_initclocks(void);
 void	cpu_initclocks_bsp(void);
 void	cpu_initclocks_ap(void);
 void	usrinfoinit(void);
 
 /* Finalize the world */
 void	kern_reboot(int) __dead2;
 void	shutdown_nice(int);
 
 /* Stubs for obsolete functions that used to be for interrupt management */
 static __inline intrmask_t	splhigh(void)		{ return 0; }
 static __inline intrmask_t	splimp(void)		{ return 0; }
 static __inline intrmask_t	splnet(void)		{ return 0; }
 static __inline intrmask_t	spltty(void)		{ return 0; }
 static __inline void		splx(intrmask_t ipl __unused)	{ return; }
 
 /*
  * Common `proc' functions are declared here so that proc.h can be included
  * less often.
  */
 int	_sleep(const void * _Nonnull chan, struct lock_object *lock, int pri,
 	   const char *wmesg, sbintime_t sbt, sbintime_t pr, int flags);
 #define	msleep(chan, mtx, pri, wmesg, timo)				\
 	_sleep((chan), &(mtx)->lock_object, (pri), (wmesg),		\
 	    tick_sbt * (timo), 0, C_HARDCLOCK)
 #define	msleep_sbt(chan, mtx, pri, wmesg, bt, pr, flags)		\
 	_sleep((chan), &(mtx)->lock_object, (pri), (wmesg), (bt), (pr),	\
 	    (flags))
 int	msleep_spin_sbt(const void * _Nonnull chan, struct mtx *mtx,
 	    const char *wmesg, sbintime_t sbt, sbintime_t pr, int flags);
 #define	msleep_spin(chan, mtx, wmesg, timo)				\
 	msleep_spin_sbt((chan), (mtx), (wmesg), tick_sbt * (timo),	\
 	    0, C_HARDCLOCK)
 int	pause_sbt(const char *wmesg, sbintime_t sbt, sbintime_t pr,
 	    int flags);
 static __inline int
 pause(const char *wmesg, int timo)
 {
 	return (pause_sbt(wmesg, tick_sbt * timo, 0, C_HARDCLOCK));
 }
 #define	pause_sig(wmesg, timo)						\
 	pause_sbt((wmesg), tick_sbt * (timo), 0, C_HARDCLOCK | C_CATCH)
 #define	tsleep(chan, pri, wmesg, timo)					\
 	_sleep((chan), NULL, (pri), (wmesg), tick_sbt * (timo),		\
 	    0, C_HARDCLOCK)
 #define	tsleep_sbt(chan, pri, wmesg, bt, pr, flags)			\
 	_sleep((chan), NULL, (pri), (wmesg), (bt), (pr), (flags))
 void	wakeup(const void *chan);
 void	wakeup_one(const void *chan);
 void	wakeup_any(const void *chan);
 
 /*
  * Common `struct cdev *' stuff are declared here to avoid #include poisoning
  */
 
 struct cdev;
 dev_t dev2udev(struct cdev *x);
 const char *devtoname(struct cdev *cdev);
 
 #ifdef __LP64__
 size_t	devfs_iosize_max(void);
 size_t	iosize_max(void);
 #endif
 
 int poll_no_poll(int events);
 
 /* XXX: Should be void nanodelay(u_int nsec); */
 void	DELAY(int usec);
 
 int kcmp_cmp(uintptr_t a, uintptr_t b);
 
 /* Root mount holdback API */
 struct root_hold_token {
 	int				flags;
 	const char			*who;
 	TAILQ_ENTRY(root_hold_token)	list;
 };
 
 struct root_hold_token *root_mount_hold(const char *identifier);
 void root_mount_hold_token(const char *identifier, struct root_hold_token *h);
 void root_mount_rel(struct root_hold_token *h);
 int root_mounted(void);
 
 /*
  * Unit number allocation API. (kern/subr_unit.c)
  */
 struct unrhdr;
 #define	UNR_NO_MTX	((void *)(uintptr_t)-1)
 struct unrhdr *new_unrhdr(int low, int high, struct mtx *mutex);
 void init_unrhdr(struct unrhdr *uh, int low, int high, struct mtx *mutex);
 void delete_unrhdr(struct unrhdr *uh);
 void clear_unrhdr(struct unrhdr *uh);
 void clean_unrhdr(struct unrhdr *uh);
 void clean_unrhdrl(struct unrhdr *uh);
 int alloc_unr(struct unrhdr *uh);
 int alloc_unr_specific(struct unrhdr *uh, u_int item);
 int alloc_unrl(struct unrhdr *uh);
 void free_unr(struct unrhdr *uh, u_int item);
 void *create_iter_unr(struct unrhdr *uh);
 int next_iter_unr(void *handle);
 void free_iter_unr(void *handle);
 
 struct unrhdr64 {
         uint64_t	counter;
 };
 
 static __inline void
 new_unrhdr64(struct unrhdr64 *unr64, uint64_t low)
 {
 
 	unr64->counter = low;
 }
 
 static __inline uint64_t
 alloc_unr64(struct unrhdr64 *unr64)
 {
 
 	return (atomic_fetchadd_64(&unr64->counter, 1));
 }
 
 void	intr_prof_stack_use(struct thread *td, struct trapframe *frame);
 
 void counted_warning(unsigned *counter, const char *msg);
 
 /*
  * APIs to manage deprecation and obsolescence.
  */
 void _gone_in(int major, const char *msg);
 void _gone_in_dev(device_t dev, int major, const char *msg);
 #ifdef NO_OBSOLETE_CODE
 #define __gone_ok(m, msg)					 \
 	_Static_assert(m < P_OSREL_MAJOR(__FreeBSD_version)),	 \
 	    "Obsolete code: " msg);
 #else
 #define	__gone_ok(m, msg)
 #endif
 #define gone_in(major, msg)		__gone_ok(major, msg) _gone_in(major, msg)
 #define gone_in_dev(dev, major, msg)	__gone_ok(major, msg) _gone_in_dev(dev, major, msg)
 
 #ifdef INVARIANTS
 #define	__diagused
 #else
 #define	__diagused	__unused
 #endif
 
 #ifdef WITNESS
 #define	__witness_used
 #else
 #define	__witness_used	__unused
 #endif
 
 #endif /* _KERNEL */
 
 __NULLABILITY_PRAGMA_POP
 #endif /* !_SYS_SYSTM_H_ */