diff --git a/sys/compat/linuxkpi/common/src/linux_compat.c b/sys/compat/linuxkpi/common/src/linux_compat.c
index 9962154cffd9..ebc1dbabd567 100644
--- a/sys/compat/linuxkpi/common/src/linux_compat.c
+++ b/sys/compat/linuxkpi/common/src/linux_compat.c
@@ -1,2871 +1,2867 @@
 /*-
  * Copyright (c) 2010 Isilon Systems, Inc.
  * Copyright (c) 2010 iX Systems, Inc.
  * Copyright (c) 2010 Panasas, Inc.
  * Copyright (c) 2013-2021 Mellanox Technologies, Ltd.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 #include "opt_global.h"
 #include "opt_stack.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/malloc.h>
 #include <sys/kernel.h>
 #include <sys/sysctl.h>
 #include <sys/proc.h>
 #include <sys/sglist.h>
 #include <sys/sleepqueue.h>
 #include <sys/refcount.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/bus.h>
 #include <sys/eventhandler.h>
 #include <sys/fcntl.h>
 #include <sys/file.h>
 #include <sys/filio.h>
 #include <sys/rwlock.h>
 #include <sys/mman.h>
 #include <sys/stack.h>
 #include <sys/sysent.h>
 #include <sys/time.h>
 #include <sys/user.h>
 
 #include <vm/vm.h>
 #include <vm/pmap.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
 #include <vm/vm_pager.h>
 
 #include <machine/stdarg.h>
 
 #if defined(__i386__) || defined(__amd64__)
 #include <machine/cputypes.h>
 #include <machine/md_var.h>
 #endif
 
 #include <linux/kobject.h>
 #include <linux/cpu.h>
 #include <linux/device.h>
 #include <linux/slab.h>
 #include <linux/module.h>
 #include <linux/moduleparam.h>
 #include <linux/cdev.h>
 #include <linux/file.h>
 #include <linux/sysfs.h>
 #include <linux/mm.h>
 #include <linux/io.h>
 #include <linux/vmalloc.h>
 #include <linux/netdevice.h>
 #include <linux/timer.h>
 #include <linux/interrupt.h>
 #include <linux/uaccess.h>
 #include <linux/utsname.h>
 #include <linux/list.h>
 #include <linux/kthread.h>
 #include <linux/kernel.h>
 #include <linux/compat.h>
 #include <linux/io-mapping.h>
 #include <linux/poll.h>
 #include <linux/smp.h>
 #include <linux/wait_bit.h>
 #include <linux/rcupdate.h>
 #include <linux/interval_tree.h>
 #include <linux/interval_tree_generic.h>
 
 #if defined(__i386__) || defined(__amd64__)
 #include <asm/smp.h>
 #include <asm/processor.h>
 #endif
 
 #include <xen/xen.h>
 #ifdef XENHVM
 #undef xen_pv_domain
 #undef xen_initial_domain
 /* xen/xen-os.h redefines __must_check */
 #undef __must_check
 #include <xen/xen-os.h>
 #endif
 
 SYSCTL_NODE(_compat, OID_AUTO, linuxkpi, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "LinuxKPI parameters");
 
 int linuxkpi_debug;
 SYSCTL_INT(_compat_linuxkpi, OID_AUTO, debug, CTLFLAG_RWTUN,
     &linuxkpi_debug, 0, "Set to enable pr_debug() prints. Clear to disable.");
 
 int linuxkpi_warn_dump_stack = 0;
 SYSCTL_INT(_compat_linuxkpi, OID_AUTO, warn_dump_stack, CTLFLAG_RWTUN,
     &linuxkpi_warn_dump_stack, 0,
     "Set to enable stack traces from WARN_ON(). Clear to disable.");
 
 static struct timeval lkpi_net_lastlog;
 static int lkpi_net_curpps;
 static int lkpi_net_maxpps = 99;
 SYSCTL_INT(_compat_linuxkpi, OID_AUTO, net_ratelimit, CTLFLAG_RWTUN,
     &lkpi_net_maxpps, 0, "Limit number of LinuxKPI net messages per second.");
 
 MALLOC_DEFINE(M_KMALLOC, "lkpikmalloc", "Linux kmalloc compat");
 
 #include <linux/rbtree.h>
 /* Undo Linux compat changes. */
 #undef RB_ROOT
 #undef file
 #undef cdev
 #define	RB_ROOT(head)	(head)->rbh_root
 
 static void linux_destroy_dev(struct linux_cdev *);
 static void linux_cdev_deref(struct linux_cdev *ldev);
 static struct vm_area_struct *linux_cdev_handle_find(void *handle);
 
 cpumask_t cpu_online_mask;
 static cpumask_t **static_single_cpu_mask;
 static cpumask_t *static_single_cpu_mask_lcs;
 struct kobject linux_class_root;
 struct device linux_root_device;
 struct class linux_class_misc;
 struct list_head pci_drivers;
 struct list_head pci_devices;
 spinlock_t pci_lock;
 struct uts_namespace init_uts_ns;
 
 unsigned long linux_timer_hz_mask;
 
 wait_queue_head_t linux_bit_waitq;
 wait_queue_head_t linux_var_waitq;
 
 int
 panic_cmp(struct rb_node *one, struct rb_node *two)
 {
 	panic("no cmp");
 }
 
 RB_GENERATE(linux_root, rb_node, __entry, panic_cmp);
 
 #define	START(node)	((node)->start)
 #define	LAST(node)	((node)->last)
 
 INTERVAL_TREE_DEFINE(struct interval_tree_node, rb, unsigned long,, START,
     LAST,, lkpi_interval_tree)
 
 static void
 linux_device_release(struct device *dev)
 {
 	pr_debug("linux_device_release: %s\n", dev_name(dev));
 	kfree(dev);
 }
 
 static ssize_t
 linux_class_show(struct kobject *kobj, struct attribute *attr, char *buf)
 {
 	struct class_attribute *dattr;
 	ssize_t error;
 
 	dattr = container_of(attr, struct class_attribute, attr);
 	error = -EIO;
 	if (dattr->show)
 		error = dattr->show(container_of(kobj, struct class, kobj),
 		    dattr, buf);
 	return (error);
 }
 
 static ssize_t
 linux_class_store(struct kobject *kobj, struct attribute *attr, const char *buf,
     size_t count)
 {
 	struct class_attribute *dattr;
 	ssize_t error;
 
 	dattr = container_of(attr, struct class_attribute, attr);
 	error = -EIO;
 	if (dattr->store)
 		error = dattr->store(container_of(kobj, struct class, kobj),
 		    dattr, buf, count);
 	return (error);
 }
 
 static void
 linux_class_release(struct kobject *kobj)
 {
 	struct class *class;
 
 	class = container_of(kobj, struct class, kobj);
 	if (class->class_release)
 		class->class_release(class);
 }
 
 static const struct sysfs_ops linux_class_sysfs = {
 	.show  = linux_class_show,
 	.store = linux_class_store,
 };
 
 const struct kobj_type linux_class_ktype = {
 	.release = linux_class_release,
 	.sysfs_ops = &linux_class_sysfs
 };
 
 static void
 linux_dev_release(struct kobject *kobj)
 {
 	struct device *dev;
 
 	dev = container_of(kobj, struct device, kobj);
 	/* This is the precedence defined by linux. */
 	if (dev->release)
 		dev->release(dev);
 	else if (dev->class && dev->class->dev_release)
 		dev->class->dev_release(dev);
 }
 
 static ssize_t
 linux_dev_show(struct kobject *kobj, struct attribute *attr, char *buf)
 {
 	struct device_attribute *dattr;
 	ssize_t error;
 
 	dattr = container_of(attr, struct device_attribute, attr);
 	error = -EIO;
 	if (dattr->show)
 		error = dattr->show(container_of(kobj, struct device, kobj),
 		    dattr, buf);
 	return (error);
 }
 
 static ssize_t
 linux_dev_store(struct kobject *kobj, struct attribute *attr, const char *buf,
     size_t count)
 {
 	struct device_attribute *dattr;
 	ssize_t error;
 
 	dattr = container_of(attr, struct device_attribute, attr);
 	error = -EIO;
 	if (dattr->store)
 		error = dattr->store(container_of(kobj, struct device, kobj),
 		    dattr, buf, count);
 	return (error);
 }
 
 static const struct sysfs_ops linux_dev_sysfs = {
 	.show  = linux_dev_show,
 	.store = linux_dev_store,
 };
 
 const struct kobj_type linux_dev_ktype = {
 	.release = linux_dev_release,
 	.sysfs_ops = &linux_dev_sysfs
 };
 
 struct device *
 device_create(struct class *class, struct device *parent, dev_t devt,
     void *drvdata, const char *fmt, ...)
 {
 	struct device *dev;
 	va_list args;
 
 	dev = kzalloc(sizeof(*dev), M_WAITOK);
 	dev->parent = parent;
 	dev->class = class;
 	dev->devt = devt;
 	dev->driver_data = drvdata;
 	dev->release = linux_device_release;
 	va_start(args, fmt);
 	kobject_set_name_vargs(&dev->kobj, fmt, args);
 	va_end(args);
 	device_register(dev);
 
 	return (dev);
 }
 
 struct device *
 device_create_groups_vargs(struct class *class, struct device *parent,
     dev_t devt, void *drvdata, const struct attribute_group **groups,
     const char *fmt, va_list args)
 {
 	struct device *dev = NULL;
 	int retval = -ENODEV;
 
 	if (class == NULL || IS_ERR(class))
 		goto error;
 
 	dev = kzalloc(sizeof(*dev), GFP_KERNEL);
 	if (!dev) {
 		retval = -ENOMEM;
 		goto error;
 	}
 
 	dev->devt = devt;
 	dev->class = class;
 	dev->parent = parent;
 	dev->groups = groups;
 	dev->release = device_create_release;
 	/* device_initialize() needs the class and parent to be set */
 	device_initialize(dev);
 	dev_set_drvdata(dev, drvdata);
 
 	retval = kobject_set_name_vargs(&dev->kobj, fmt, args);
 	if (retval)
 		goto error;
 
 	retval = device_add(dev);
 	if (retval)
 		goto error;
 
 	return dev;
 
 error:
 	put_device(dev);
 	return ERR_PTR(retval);
 }
 
 struct class *
 lkpi_class_create(const char *name)
 {
 	struct class *class;
 	int error;
 
 	class = kzalloc(sizeof(*class), M_WAITOK);
 	class->name = name;
 	class->class_release = linux_class_kfree;
 	error = class_register(class);
 	if (error) {
 		kfree(class);
 		return (NULL);
 	}
 
 	return (class);
 }
 
 static void
 linux_kq_lock(void *arg)
 {
 	spinlock_t *s = arg;
 
 	spin_lock(s);
 }
 static void
 linux_kq_unlock(void *arg)
 {
 	spinlock_t *s = arg;
 
 	spin_unlock(s);
 }
 
 static void
 linux_kq_assert_lock(void *arg, int what)
 {
 #ifdef INVARIANTS
 	spinlock_t *s = arg;
 
 	if (what == LA_LOCKED)
 		mtx_assert(s, MA_OWNED);
 	else
 		mtx_assert(s, MA_NOTOWNED);
 #endif
 }
 
 static void
 linux_file_kqfilter_poll(struct linux_file *, int);
 
 struct linux_file *
 linux_file_alloc(void)
 {
 	struct linux_file *filp;
 
 	filp = kzalloc(sizeof(*filp), GFP_KERNEL);
 
 	/* set initial refcount */
 	filp->f_count = 1;
 
 	/* setup fields needed by kqueue support */
 	spin_lock_init(&filp->f_kqlock);
 	knlist_init(&filp->f_selinfo.si_note, &filp->f_kqlock,
 	    linux_kq_lock, linux_kq_unlock, linux_kq_assert_lock);
 
 	return (filp);
 }
 
 void
 linux_file_free(struct linux_file *filp)
 {
 	if (filp->_file == NULL) {
 		if (filp->f_op != NULL && filp->f_op->release != NULL)
 			filp->f_op->release(filp->f_vnode, filp);
 		if (filp->f_shmem != NULL)
 			vm_object_deallocate(filp->f_shmem);
 		kfree_rcu(filp, rcu);
 	} else {
 		/*
 		 * The close method of the character device or file
 		 * will free the linux_file structure:
 		 */
 		_fdrop(filp->_file, curthread);
 	}
 }
 
 struct linux_cdev *
 cdev_alloc(void)
 {
 	struct linux_cdev *cdev;
 
 	cdev = kzalloc(sizeof(struct linux_cdev), M_WAITOK);
 	kobject_init(&cdev->kobj, &linux_cdev_ktype);
 	cdev->refs = 1;
 	return (cdev);
 }
 
 static int
 linux_cdev_pager_fault(vm_object_t vm_obj, vm_ooffset_t offset, int prot,
     vm_page_t *mres)
 {
 	struct vm_area_struct *vmap;
 
 	vmap = linux_cdev_handle_find(vm_obj->handle);
 
 	MPASS(vmap != NULL);
 	MPASS(vmap->vm_private_data == vm_obj->handle);
 
 	if (likely(vmap->vm_ops != NULL && offset < vmap->vm_len)) {
 		vm_paddr_t paddr = IDX_TO_OFF(vmap->vm_pfn) + offset;
 		vm_page_t page;
 
 		if (((*mres)->flags & PG_FICTITIOUS) != 0) {
 			/*
 			 * If the passed in result page is a fake
 			 * page, update it with the new physical
 			 * address.
 			 */
 			page = *mres;
 			vm_page_updatefake(page, paddr, vm_obj->memattr);
 		} else {
 			/*
 			 * Replace the passed in "mres" page with our
 			 * own fake page and free up the all of the
 			 * original pages.
 			 */
 			VM_OBJECT_WUNLOCK(vm_obj);
 			page = vm_page_getfake(paddr, vm_obj->memattr);
 			VM_OBJECT_WLOCK(vm_obj);
 
 			vm_page_replace(page, vm_obj, (*mres)->pindex, *mres);
 			*mres = page;
 		}
 		vm_page_valid(page);
 		return (VM_PAGER_OK);
 	}
 	return (VM_PAGER_FAIL);
 }
 
 static int
 linux_cdev_pager_populate(vm_object_t vm_obj, vm_pindex_t pidx, int fault_type,
     vm_prot_t max_prot, vm_pindex_t *first, vm_pindex_t *last)
 {
 	struct vm_area_struct *vmap;
 	int err;
 
 	/* get VM area structure */
 	vmap = linux_cdev_handle_find(vm_obj->handle);
 	MPASS(vmap != NULL);
 	MPASS(vmap->vm_private_data == vm_obj->handle);
 
 	VM_OBJECT_WUNLOCK(vm_obj);
 
 	linux_set_current(curthread);
 
 	down_write(&vmap->vm_mm->mmap_sem);
 	if (unlikely(vmap->vm_ops == NULL)) {
 		err = VM_FAULT_SIGBUS;
 	} else {
 		struct vm_fault vmf;
 
 		/* fill out VM fault structure */
 		vmf.virtual_address = (void *)(uintptr_t)IDX_TO_OFF(pidx);
 		vmf.flags = (fault_type & VM_PROT_WRITE) ? FAULT_FLAG_WRITE : 0;
 		vmf.pgoff = 0;
 		vmf.page = NULL;
 		vmf.vma = vmap;
 
 		vmap->vm_pfn_count = 0;
 		vmap->vm_pfn_pcount = &vmap->vm_pfn_count;
 		vmap->vm_obj = vm_obj;
 
 		err = vmap->vm_ops->fault(&vmf);
 
 		while (vmap->vm_pfn_count == 0 && err == VM_FAULT_NOPAGE) {
 			kern_yield(PRI_USER);
 			err = vmap->vm_ops->fault(&vmf);
 		}
 	}
 
 	/* translate return code */
 	switch (err) {
 	case VM_FAULT_OOM:
 		err = VM_PAGER_AGAIN;
 		break;
 	case VM_FAULT_SIGBUS:
 		err = VM_PAGER_BAD;
 		break;
 	case VM_FAULT_NOPAGE:
 		/*
 		 * By contract the fault handler will return having
 		 * busied all the pages itself. If pidx is already
 		 * found in the object, it will simply xbusy the first
 		 * page and return with vm_pfn_count set to 1.
 		 */
 		*first = vmap->vm_pfn_first;
 		*last = *first + vmap->vm_pfn_count - 1;
 		err = VM_PAGER_OK;
 		break;
 	default:
 		err = VM_PAGER_ERROR;
 		break;
 	}
 	up_write(&vmap->vm_mm->mmap_sem);
 	VM_OBJECT_WLOCK(vm_obj);
 	return (err);
 }
 
 static struct rwlock linux_vma_lock;
 static TAILQ_HEAD(, vm_area_struct) linux_vma_head =
     TAILQ_HEAD_INITIALIZER(linux_vma_head);
 
 static void
 linux_cdev_handle_free(struct vm_area_struct *vmap)
 {
 	/* Drop reference on vm_file */
 	if (vmap->vm_file != NULL)
 		fput(vmap->vm_file);
 
 	/* Drop reference on mm_struct */
 	mmput(vmap->vm_mm);
 
 	kfree(vmap);
 }
 
 static void
 linux_cdev_handle_remove(struct vm_area_struct *vmap)
 {
 	rw_wlock(&linux_vma_lock);
 	TAILQ_REMOVE(&linux_vma_head, vmap, vm_entry);
 	rw_wunlock(&linux_vma_lock);
 }
 
 static struct vm_area_struct *
 linux_cdev_handle_find(void *handle)
 {
 	struct vm_area_struct *vmap;
 
 	rw_rlock(&linux_vma_lock);
 	TAILQ_FOREACH(vmap, &linux_vma_head, vm_entry) {
 		if (vmap->vm_private_data == handle)
 			break;
 	}
 	rw_runlock(&linux_vma_lock);
 	return (vmap);
 }
 
 static int
 linux_cdev_pager_ctor(void *handle, vm_ooffset_t size, vm_prot_t prot,
 		      vm_ooffset_t foff, struct ucred *cred, u_short *color)
 {
 
 	MPASS(linux_cdev_handle_find(handle) != NULL);
 	*color = 0;
 	return (0);
 }
 
 static void
 linux_cdev_pager_dtor(void *handle)
 {
 	const struct vm_operations_struct *vm_ops;
 	struct vm_area_struct *vmap;
 
 	vmap = linux_cdev_handle_find(handle);
 	MPASS(vmap != NULL);
 
 	/*
 	 * Remove handle before calling close operation to prevent
 	 * other threads from reusing the handle pointer.
 	 */
 	linux_cdev_handle_remove(vmap);
 
 	down_write(&vmap->vm_mm->mmap_sem);
 	vm_ops = vmap->vm_ops;
 	if (likely(vm_ops != NULL))
 		vm_ops->close(vmap);
 	up_write(&vmap->vm_mm->mmap_sem);
 
 	linux_cdev_handle_free(vmap);
 }
 
 static struct cdev_pager_ops linux_cdev_pager_ops[2] = {
   {
 	/* OBJT_MGTDEVICE */
 	.cdev_pg_populate	= linux_cdev_pager_populate,
 	.cdev_pg_ctor	= linux_cdev_pager_ctor,
 	.cdev_pg_dtor	= linux_cdev_pager_dtor
   },
   {
 	/* OBJT_DEVICE */
 	.cdev_pg_fault	= linux_cdev_pager_fault,
 	.cdev_pg_ctor	= linux_cdev_pager_ctor,
 	.cdev_pg_dtor	= linux_cdev_pager_dtor
   },
 };
 
 int
 zap_vma_ptes(struct vm_area_struct *vma, unsigned long address,
     unsigned long size)
 {
 	vm_object_t obj;
 	vm_page_t m;
 
 	obj = vma->vm_obj;
 	if (obj == NULL || (obj->flags & OBJ_UNMANAGED) != 0)
 		return (-ENOTSUP);
 	VM_OBJECT_RLOCK(obj);
 	for (m = vm_page_find_least(obj, OFF_TO_IDX(address));
 	    m != NULL && m->pindex < OFF_TO_IDX(address + size);
 	    m = TAILQ_NEXT(m, listq))
 		pmap_remove_all(m);
 	VM_OBJECT_RUNLOCK(obj);
 	return (0);
 }
 
 void
 vma_set_file(struct vm_area_struct *vma, struct linux_file *file)
 {
 	struct linux_file *tmp;
 
 	/* Changing an anonymous vma with this is illegal */
 	get_file(file);
 	tmp = vma->vm_file;
 	vma->vm_file = file;
 	fput(tmp);
 }
 
 static struct file_operations dummy_ldev_ops = {
 	/* XXXKIB */
 };
 
 static struct linux_cdev dummy_ldev = {
 	.ops = &dummy_ldev_ops,
 };
 
 #define	LDEV_SI_DTR	0x0001
 #define	LDEV_SI_REF	0x0002
 
 static void
 linux_get_fop(struct linux_file *filp, const struct file_operations **fop,
     struct linux_cdev **dev)
 {
 	struct linux_cdev *ldev;
 	u_int siref;
 
 	ldev = filp->f_cdev;
 	*fop = filp->f_op;
 	if (ldev != NULL) {
 		if (ldev->kobj.ktype == &linux_cdev_static_ktype) {
 			refcount_acquire(&ldev->refs);
 		} else {
 			for (siref = ldev->siref;;) {
 				if ((siref & LDEV_SI_DTR) != 0) {
 					ldev = &dummy_ldev;
 					*fop = ldev->ops;
 					siref = ldev->siref;
 					MPASS((ldev->siref & LDEV_SI_DTR) == 0);
 				} else if (atomic_fcmpset_int(&ldev->siref,
 				    &siref, siref + LDEV_SI_REF)) {
 					break;
 				}
 			}
 		}
 	}
 	*dev = ldev;
 }
 
 static void
 linux_drop_fop(struct linux_cdev *ldev)
 {
 
 	if (ldev == NULL)
 		return;
 	if (ldev->kobj.ktype == &linux_cdev_static_ktype) {
 		linux_cdev_deref(ldev);
 	} else {
 		MPASS(ldev->kobj.ktype == &linux_cdev_ktype);
 		MPASS((ldev->siref & ~LDEV_SI_DTR) != 0);
 		atomic_subtract_int(&ldev->siref, LDEV_SI_REF);
 	}
 }
 
 #define	OPW(fp,td,code) ({			\
 	struct file *__fpop;			\
 	__typeof(code) __retval;		\
 						\
 	__fpop = (td)->td_fpop;			\
 	(td)->td_fpop = (fp);			\
 	__retval = (code);			\
 	(td)->td_fpop = __fpop;			\
 	__retval;				\
 })
 
 static int
 linux_dev_fdopen(struct cdev *dev, int fflags, struct thread *td,
     struct file *file)
 {
 	struct linux_cdev *ldev;
 	struct linux_file *filp;
 	const struct file_operations *fop;
 	int error;
 
 	ldev = dev->si_drv1;
 
 	filp = linux_file_alloc();
 	filp->f_dentry = &filp->f_dentry_store;
 	filp->f_op = ldev->ops;
 	filp->f_mode = file->f_flag;
 	filp->f_flags = file->f_flag;
 	filp->f_vnode = file->f_vnode;
 	filp->_file = file;
 	refcount_acquire(&ldev->refs);
 	filp->f_cdev = ldev;
 
 	linux_set_current(td);
 	linux_get_fop(filp, &fop, &ldev);
 
 	if (fop->open != NULL) {
 		error = -fop->open(file->f_vnode, filp);
 		if (error != 0) {
 			linux_drop_fop(ldev);
 			linux_cdev_deref(filp->f_cdev);
 			kfree(filp);
 			return (error);
 		}
 	}
 
 	/* hold on to the vnode - used for fstat() */
 	vhold(filp->f_vnode);
 
 	/* release the file from devfs */
 	finit(file, filp->f_mode, DTYPE_DEV, filp, &linuxfileops);
 	linux_drop_fop(ldev);
 	return (ENXIO);
 }
 
 #define	LINUX_IOCTL_MIN_PTR 0x10000UL
 #define	LINUX_IOCTL_MAX_PTR (LINUX_IOCTL_MIN_PTR + IOCPARM_MAX)
 
 static inline int
 linux_remap_address(void **uaddr, size_t len)
 {
 	uintptr_t uaddr_val = (uintptr_t)(*uaddr);
 
 	if (unlikely(uaddr_val >= LINUX_IOCTL_MIN_PTR &&
 	    uaddr_val < LINUX_IOCTL_MAX_PTR)) {
 		struct task_struct *pts = current;
 		if (pts == NULL) {
 			*uaddr = NULL;
 			return (1);
 		}
 
 		/* compute data offset */
 		uaddr_val -= LINUX_IOCTL_MIN_PTR;
 
 		/* check that length is within bounds */
 		if ((len > IOCPARM_MAX) ||
 		    (uaddr_val + len) > pts->bsd_ioctl_len) {
 			*uaddr = NULL;
 			return (1);
 		}
 
 		/* re-add kernel buffer address */
 		uaddr_val += (uintptr_t)pts->bsd_ioctl_data;
 
 		/* update address location */
 		*uaddr = (void *)uaddr_val;
 		return (1);
 	}
 	return (0);
 }
 
 int
 linux_copyin(const void *uaddr, void *kaddr, size_t len)
 {
 	if (linux_remap_address(__DECONST(void **, &uaddr), len)) {
 		if (uaddr == NULL)
 			return (-EFAULT);
 		memcpy(kaddr, uaddr, len);
 		return (0);
 	}
 	return (-copyin(uaddr, kaddr, len));
 }
 
 int
 linux_copyout(const void *kaddr, void *uaddr, size_t len)
 {
 	if (linux_remap_address(&uaddr, len)) {
 		if (uaddr == NULL)
 			return (-EFAULT);
 		memcpy(uaddr, kaddr, len);
 		return (0);
 	}
 	return (-copyout(kaddr, uaddr, len));
 }
 
 size_t
 linux_clear_user(void *_uaddr, size_t _len)
 {
 	uint8_t *uaddr = _uaddr;
 	size_t len = _len;
 
 	/* make sure uaddr is aligned before going into the fast loop */
 	while (((uintptr_t)uaddr & 7) != 0 && len > 7) {
 		if (subyte(uaddr, 0))
 			return (_len);
 		uaddr++;
 		len--;
 	}
 
 	/* zero 8 bytes at a time */
 	while (len > 7) {
 #ifdef __LP64__
 		if (suword64(uaddr, 0))
 			return (_len);
 #else
 		if (suword32(uaddr, 0))
 			return (_len);
 		if (suword32(uaddr + 4, 0))
 			return (_len);
 #endif
 		uaddr += 8;
 		len -= 8;
 	}
 
 	/* zero fill end, if any */
 	while (len > 0) {
 		if (subyte(uaddr, 0))
 			return (_len);
 		uaddr++;
 		len--;
 	}
 	return (0);
 }
 
 int
 linux_access_ok(const void *uaddr, size_t len)
 {
 	uintptr_t saddr;
 	uintptr_t eaddr;
 
 	/* get start and end address */
 	saddr = (uintptr_t)uaddr;
 	eaddr = (uintptr_t)uaddr + len;
 
 	/* verify addresses are valid for userspace */
 	return ((saddr == eaddr) ||
 	    (eaddr > saddr && eaddr <= VM_MAXUSER_ADDRESS));
 }
 
 /*
  * This function should return either EINTR or ERESTART depending on
  * the signal type sent to this thread:
  */
 static int
 linux_get_error(struct task_struct *task, int error)
 {
 	/* check for signal type interrupt code */
 	if (error == EINTR || error == ERESTARTSYS || error == ERESTART) {
 		error = -linux_schedule_get_interrupt_value(task);
 		if (error == 0)
 			error = EINTR;
 	}
 	return (error);
 }
 
 static int
 linux_file_ioctl_sub(struct file *fp, struct linux_file *filp,
     const struct file_operations *fop, u_long cmd, caddr_t data,
     struct thread *td)
 {
 	struct task_struct *task = current;
 	unsigned size;
 	int error;
 
 	size = IOCPARM_LEN(cmd);
 	/* refer to logic in sys_ioctl() */
 	if (size > 0) {
 		/*
 		 * Setup hint for linux_copyin() and linux_copyout().
 		 *
 		 * Background: Linux code expects a user-space address
 		 * while FreeBSD supplies a kernel-space address.
 		 */
 		task->bsd_ioctl_data = data;
 		task->bsd_ioctl_len = size;
 		data = (void *)LINUX_IOCTL_MIN_PTR;
 	} else {
 		/* fetch user-space pointer */
 		data = *(void **)data;
 	}
 #ifdef COMPAT_FREEBSD32
 	if (SV_PROC_FLAG(td->td_proc, SV_ILP32)) {
 		/* try the compat IOCTL handler first */
 		if (fop->compat_ioctl != NULL) {
 			error = -OPW(fp, td, fop->compat_ioctl(filp,
 			    cmd, (u_long)data));
 		} else {
 			error = ENOTTY;
 		}
 
 		/* fallback to the regular IOCTL handler, if any */
 		if (error == ENOTTY && fop->unlocked_ioctl != NULL) {
 			error = -OPW(fp, td, fop->unlocked_ioctl(filp,
 			    cmd, (u_long)data));
 		}
 	} else
 #endif
 	{
 		if (fop->unlocked_ioctl != NULL) {
 			error = -OPW(fp, td, fop->unlocked_ioctl(filp,
 			    cmd, (u_long)data));
 		} else {
 			error = ENOTTY;
 		}
 	}
 	if (size > 0) {
 		task->bsd_ioctl_data = NULL;
 		task->bsd_ioctl_len = 0;
 	}
 
 	if (error == EWOULDBLOCK) {
 		/* update kqfilter status, if any */
 		linux_file_kqfilter_poll(filp,
 		    LINUX_KQ_FLAG_HAS_READ | LINUX_KQ_FLAG_HAS_WRITE);
 	} else {
 		error = linux_get_error(task, error);
 	}
 	return (error);
 }
 
 #define	LINUX_POLL_TABLE_NORMAL ((poll_table *)1)
 
 /*
  * This function atomically updates the poll wakeup state and returns
  * the previous state at the time of update.
  */
 static uint8_t
 linux_poll_wakeup_state(atomic_t *v, const uint8_t *pstate)
 {
 	int c, old;
 
 	c = v->counter;
 
 	while ((old = atomic_cmpxchg(v, c, pstate[c])) != c)
 		c = old;
 
 	return (c);
 }
 
 static int
 linux_poll_wakeup_callback(wait_queue_t *wq, unsigned int wq_state, int flags, void *key)
 {
 	static const uint8_t state[LINUX_FWQ_STATE_MAX] = {
 		[LINUX_FWQ_STATE_INIT] = LINUX_FWQ_STATE_INIT, /* NOP */
 		[LINUX_FWQ_STATE_NOT_READY] = LINUX_FWQ_STATE_NOT_READY, /* NOP */
 		[LINUX_FWQ_STATE_QUEUED] = LINUX_FWQ_STATE_READY,
 		[LINUX_FWQ_STATE_READY] = LINUX_FWQ_STATE_READY, /* NOP */
 	};
 	struct linux_file *filp = container_of(wq, struct linux_file, f_wait_queue.wq);
 
 	switch (linux_poll_wakeup_state(&filp->f_wait_queue.state, state)) {
 	case LINUX_FWQ_STATE_QUEUED:
 		linux_poll_wakeup(filp);
 		return (1);
 	default:
 		return (0);
 	}
 }
 
 void
 linux_poll_wait(struct linux_file *filp, wait_queue_head_t *wqh, poll_table *p)
 {
 	static const uint8_t state[LINUX_FWQ_STATE_MAX] = {
 		[LINUX_FWQ_STATE_INIT] = LINUX_FWQ_STATE_NOT_READY,
 		[LINUX_FWQ_STATE_NOT_READY] = LINUX_FWQ_STATE_NOT_READY, /* NOP */
 		[LINUX_FWQ_STATE_QUEUED] = LINUX_FWQ_STATE_QUEUED, /* NOP */
 		[LINUX_FWQ_STATE_READY] = LINUX_FWQ_STATE_QUEUED,
 	};
 
 	/* check if we are called inside the select system call */
 	if (p == LINUX_POLL_TABLE_NORMAL)
 		selrecord(curthread, &filp->f_selinfo);
 
 	switch (linux_poll_wakeup_state(&filp->f_wait_queue.state, state)) {
 	case LINUX_FWQ_STATE_INIT:
 		/* NOTE: file handles can only belong to one wait-queue */
 		filp->f_wait_queue.wqh = wqh;
 		filp->f_wait_queue.wq.func = &linux_poll_wakeup_callback;
 		add_wait_queue(wqh, &filp->f_wait_queue.wq);
 		atomic_set(&filp->f_wait_queue.state, LINUX_FWQ_STATE_QUEUED);
 		break;
 	default:
 		break;
 	}
 }
 
 static void
 linux_poll_wait_dequeue(struct linux_file *filp)
 {
 	static const uint8_t state[LINUX_FWQ_STATE_MAX] = {
 		[LINUX_FWQ_STATE_INIT] = LINUX_FWQ_STATE_INIT,	/* NOP */
 		[LINUX_FWQ_STATE_NOT_READY] = LINUX_FWQ_STATE_INIT,
 		[LINUX_FWQ_STATE_QUEUED] = LINUX_FWQ_STATE_INIT,
 		[LINUX_FWQ_STATE_READY] = LINUX_FWQ_STATE_INIT,
 	};
 
 	seldrain(&filp->f_selinfo);
 
 	switch (linux_poll_wakeup_state(&filp->f_wait_queue.state, state)) {
 	case LINUX_FWQ_STATE_NOT_READY:
 	case LINUX_FWQ_STATE_QUEUED:
 	case LINUX_FWQ_STATE_READY:
 		remove_wait_queue(filp->f_wait_queue.wqh, &filp->f_wait_queue.wq);
 		break;
 	default:
 		break;
 	}
 }
 
 void
 linux_poll_wakeup(struct linux_file *filp)
 {
 	/* this function should be NULL-safe */
 	if (filp == NULL)
 		return;
 
 	selwakeup(&filp->f_selinfo);
 
 	spin_lock(&filp->f_kqlock);
 	filp->f_kqflags |= LINUX_KQ_FLAG_NEED_READ |
 	    LINUX_KQ_FLAG_NEED_WRITE;
 
 	/* make sure the "knote" gets woken up */
 	KNOTE_LOCKED(&filp->f_selinfo.si_note, 1);
 	spin_unlock(&filp->f_kqlock);
 }
 
 static void
 linux_file_kqfilter_detach(struct knote *kn)
 {
 	struct linux_file *filp = kn->kn_hook;
 
 	spin_lock(&filp->f_kqlock);
 	knlist_remove(&filp->f_selinfo.si_note, kn, 1);
 	spin_unlock(&filp->f_kqlock);
 }
 
 static int
 linux_file_kqfilter_read_event(struct knote *kn, long hint)
 {
 	struct linux_file *filp = kn->kn_hook;
 
 	mtx_assert(&filp->f_kqlock, MA_OWNED);
 
 	return ((filp->f_kqflags & LINUX_KQ_FLAG_NEED_READ) ? 1 : 0);
 }
 
 static int
 linux_file_kqfilter_write_event(struct knote *kn, long hint)
 {
 	struct linux_file *filp = kn->kn_hook;
 
 	mtx_assert(&filp->f_kqlock, MA_OWNED);
 
 	return ((filp->f_kqflags & LINUX_KQ_FLAG_NEED_WRITE) ? 1 : 0);
 }
 
 static struct filterops linux_dev_kqfiltops_read = {
 	.f_isfd = 1,
 	.f_detach = linux_file_kqfilter_detach,
 	.f_event = linux_file_kqfilter_read_event,
 };
 
 static struct filterops linux_dev_kqfiltops_write = {
 	.f_isfd = 1,
 	.f_detach = linux_file_kqfilter_detach,
 	.f_event = linux_file_kqfilter_write_event,
 };
 
 static void
 linux_file_kqfilter_poll(struct linux_file *filp, int kqflags)
 {
 	struct thread *td;
 	const struct file_operations *fop;
 	struct linux_cdev *ldev;
 	int temp;
 
 	if ((filp->f_kqflags & kqflags) == 0)
 		return;
 
 	td = curthread;
 
 	linux_get_fop(filp, &fop, &ldev);
 	/* get the latest polling state */
 	temp = OPW(filp->_file, td, fop->poll(filp, NULL));
 	linux_drop_fop(ldev);
 
 	spin_lock(&filp->f_kqlock);
 	/* clear kqflags */
 	filp->f_kqflags &= ~(LINUX_KQ_FLAG_NEED_READ |
 	    LINUX_KQ_FLAG_NEED_WRITE);
 	/* update kqflags */
 	if ((temp & (POLLIN | POLLOUT)) != 0) {
 		if ((temp & POLLIN) != 0)
 			filp->f_kqflags |= LINUX_KQ_FLAG_NEED_READ;
 		if ((temp & POLLOUT) != 0)
 			filp->f_kqflags |= LINUX_KQ_FLAG_NEED_WRITE;
 
 		/* make sure the "knote" gets woken up */
 		KNOTE_LOCKED(&filp->f_selinfo.si_note, 0);
 	}
 	spin_unlock(&filp->f_kqlock);
 }
 
 static int
 linux_file_kqfilter(struct file *file, struct knote *kn)
 {
 	struct linux_file *filp;
 	struct thread *td;
 	int error;
 
 	td = curthread;
 	filp = (struct linux_file *)file->f_data;
 	filp->f_flags = file->f_flag;
 	if (filp->f_op->poll == NULL)
 		return (EINVAL);
 
 	spin_lock(&filp->f_kqlock);
 	switch (kn->kn_filter) {
 	case EVFILT_READ:
 		filp->f_kqflags |= LINUX_KQ_FLAG_HAS_READ;
 		kn->kn_fop = &linux_dev_kqfiltops_read;
 		kn->kn_hook = filp;
 		knlist_add(&filp->f_selinfo.si_note, kn, 1);
 		error = 0;
 		break;
 	case EVFILT_WRITE:
 		filp->f_kqflags |= LINUX_KQ_FLAG_HAS_WRITE;
 		kn->kn_fop = &linux_dev_kqfiltops_write;
 		kn->kn_hook = filp;
 		knlist_add(&filp->f_selinfo.si_note, kn, 1);
 		error = 0;
 		break;
 	default:
 		error = EINVAL;
 		break;
 	}
 	spin_unlock(&filp->f_kqlock);
 
 	if (error == 0) {
 		linux_set_current(td);
 
 		/* update kqfilter status, if any */
 		linux_file_kqfilter_poll(filp,
 		    LINUX_KQ_FLAG_HAS_READ | LINUX_KQ_FLAG_HAS_WRITE);
 	}
 	return (error);
 }
 
 static int
 linux_file_mmap_single(struct file *fp, const struct file_operations *fop,
     vm_ooffset_t *offset, vm_size_t size, struct vm_object **object,
     int nprot, bool is_shared, struct thread *td)
 {
 	struct task_struct *task;
 	struct vm_area_struct *vmap;
 	struct mm_struct *mm;
 	struct linux_file *filp;
 	vm_memattr_t attr;
 	int error;
 
 	filp = (struct linux_file *)fp->f_data;
 	filp->f_flags = fp->f_flag;
 
 	if (fop->mmap == NULL)
 		return (EOPNOTSUPP);
 
 	linux_set_current(td);
 
 	/*
 	 * The same VM object might be shared by multiple processes
 	 * and the mm_struct is usually freed when a process exits.
 	 *
 	 * The atomic reference below makes sure the mm_struct is
 	 * available as long as the vmap is in the linux_vma_head.
 	 */
 	task = current;
 	mm = task->mm;
 	if (atomic_inc_not_zero(&mm->mm_users) == 0)
 		return (EINVAL);
 
 	vmap = kzalloc(sizeof(*vmap), GFP_KERNEL);
 	vmap->vm_start = 0;
 	vmap->vm_end = size;
 	vmap->vm_pgoff = *offset / PAGE_SIZE;
 	vmap->vm_pfn = 0;
 	vmap->vm_flags = vmap->vm_page_prot = (nprot & VM_PROT_ALL);
 	if (is_shared)
 		vmap->vm_flags |= VM_SHARED;
 	vmap->vm_ops = NULL;
 	vmap->vm_file = get_file(filp);
 	vmap->vm_mm = mm;
 
 	if (unlikely(down_write_killable(&vmap->vm_mm->mmap_sem))) {
 		error = linux_get_error(task, EINTR);
 	} else {
 		error = -OPW(fp, td, fop->mmap(filp, vmap));
 		error = linux_get_error(task, error);
 		up_write(&vmap->vm_mm->mmap_sem);
 	}
 
 	if (error != 0) {
 		linux_cdev_handle_free(vmap);
 		return (error);
 	}
 
 	attr = pgprot2cachemode(vmap->vm_page_prot);
 
 	if (vmap->vm_ops != NULL) {
 		struct vm_area_struct *ptr;
 		void *vm_private_data;
 		bool vm_no_fault;
 
 		if (vmap->vm_ops->open == NULL ||
 		    vmap->vm_ops->close == NULL ||
 		    vmap->vm_private_data == NULL) {
 			/* free allocated VM area struct */
 			linux_cdev_handle_free(vmap);
 			return (EINVAL);
 		}
 
 		vm_private_data = vmap->vm_private_data;
 
 		rw_wlock(&linux_vma_lock);
 		TAILQ_FOREACH(ptr, &linux_vma_head, vm_entry) {
 			if (ptr->vm_private_data == vm_private_data)
 				break;
 		}
 		/* check if there is an existing VM area struct */
 		if (ptr != NULL) {
 			/* check if the VM area structure is invalid */
 			if (ptr->vm_ops == NULL ||
 			    ptr->vm_ops->open == NULL ||
 			    ptr->vm_ops->close == NULL) {
 				error = ESTALE;
 				vm_no_fault = 1;
 			} else {
 				error = EEXIST;
 				vm_no_fault = (ptr->vm_ops->fault == NULL);
 			}
 		} else {
 			/* insert VM area structure into list */
 			TAILQ_INSERT_TAIL(&linux_vma_head, vmap, vm_entry);
 			error = 0;
 			vm_no_fault = (vmap->vm_ops->fault == NULL);
 		}
 		rw_wunlock(&linux_vma_lock);
 
 		if (error != 0) {
 			/* free allocated VM area struct */
 			linux_cdev_handle_free(vmap);
 			/* check for stale VM area struct */
 			if (error != EEXIST)
 				return (error);
 		}
 
 		/* check if there is no fault handler */
 		if (vm_no_fault) {
 			*object = cdev_pager_allocate(vm_private_data, OBJT_DEVICE,
 			    &linux_cdev_pager_ops[1], size, nprot, *offset,
 			    td->td_ucred);
 		} else {
 			*object = cdev_pager_allocate(vm_private_data, OBJT_MGTDEVICE,
 			    &linux_cdev_pager_ops[0], size, nprot, *offset,
 			    td->td_ucred);
 		}
 
 		/* check if allocating the VM object failed */
 		if (*object == NULL) {
 			if (error == 0) {
 				/* remove VM area struct from list */
 				linux_cdev_handle_remove(vmap);
 				/* free allocated VM area struct */
 				linux_cdev_handle_free(vmap);
 			}
 			return (EINVAL);
 		}
 	} else {
 		struct sglist *sg;
 
 		sg = sglist_alloc(1, M_WAITOK);
 		sglist_append_phys(sg,
 		    (vm_paddr_t)vmap->vm_pfn << PAGE_SHIFT, vmap->vm_len);
 
 		*object = vm_pager_allocate(OBJT_SG, sg, vmap->vm_len,
 		    nprot, 0, td->td_ucred);
 
 		linux_cdev_handle_free(vmap);
 
 		if (*object == NULL) {
 			sglist_free(sg);
 			return (EINVAL);
 		}
 	}
 
 	if (attr != VM_MEMATTR_DEFAULT) {
 		VM_OBJECT_WLOCK(*object);
 		vm_object_set_memattr(*object, attr);
 		VM_OBJECT_WUNLOCK(*object);
 	}
 	*offset = 0;
 	return (0);
 }
 
 struct cdevsw linuxcdevsw = {
 	.d_version = D_VERSION,
 	.d_fdopen = linux_dev_fdopen,
 	.d_name = "lkpidev",
 };
 
 static int
 linux_file_read(struct file *file, struct uio *uio, struct ucred *active_cred,
     int flags, struct thread *td)
 {
 	struct linux_file *filp;
 	const struct file_operations *fop;
 	struct linux_cdev *ldev;
 	ssize_t bytes;
 	int error;
 
 	error = 0;
 	filp = (struct linux_file *)file->f_data;
 	filp->f_flags = file->f_flag;
 	/* XXX no support for I/O vectors currently */
 	if (uio->uio_iovcnt != 1)
 		return (EOPNOTSUPP);
 	if (uio->uio_resid > DEVFS_IOSIZE_MAX)
 		return (EINVAL);
 	linux_set_current(td);
 	linux_get_fop(filp, &fop, &ldev);
 	if (fop->read != NULL) {
 		bytes = OPW(file, td, fop->read(filp,
 		    uio->uio_iov->iov_base,
 		    uio->uio_iov->iov_len, &uio->uio_offset));
 		if (bytes >= 0) {
 			uio->uio_iov->iov_base =
 			    ((uint8_t *)uio->uio_iov->iov_base) + bytes;
 			uio->uio_iov->iov_len -= bytes;
 			uio->uio_resid -= bytes;
 		} else {
 			error = linux_get_error(current, -bytes);
 		}
 	} else
 		error = ENXIO;
 
 	/* update kqfilter status, if any */
 	linux_file_kqfilter_poll(filp, LINUX_KQ_FLAG_HAS_READ);
 	linux_drop_fop(ldev);
 
 	return (error);
 }
 
 static int
 linux_file_write(struct file *file, struct uio *uio, struct ucred *active_cred,
     int flags, struct thread *td)
 {
 	struct linux_file *filp;
 	const struct file_operations *fop;
 	struct linux_cdev *ldev;
 	ssize_t bytes;
 	int error;
 
 	filp = (struct linux_file *)file->f_data;
 	filp->f_flags = file->f_flag;
 	/* XXX no support for I/O vectors currently */
 	if (uio->uio_iovcnt != 1)
 		return (EOPNOTSUPP);
 	if (uio->uio_resid > DEVFS_IOSIZE_MAX)
 		return (EINVAL);
 	linux_set_current(td);
 	linux_get_fop(filp, &fop, &ldev);
 	if (fop->write != NULL) {
 		bytes = OPW(file, td, fop->write(filp,
 		    uio->uio_iov->iov_base,
 		    uio->uio_iov->iov_len, &uio->uio_offset));
 		if (bytes >= 0) {
 			uio->uio_iov->iov_base =
 			    ((uint8_t *)uio->uio_iov->iov_base) + bytes;
 			uio->uio_iov->iov_len -= bytes;
 			uio->uio_resid -= bytes;
 			error = 0;
 		} else {
 			error = linux_get_error(current, -bytes);
 		}
 	} else
 		error = ENXIO;
 
 	/* update kqfilter status, if any */
 	linux_file_kqfilter_poll(filp, LINUX_KQ_FLAG_HAS_WRITE);
 
 	linux_drop_fop(ldev);
 
 	return (error);
 }
 
 static int
 linux_file_poll(struct file *file, int events, struct ucred *active_cred,
     struct thread *td)
 {
 	struct linux_file *filp;
 	const struct file_operations *fop;
 	struct linux_cdev *ldev;
 	int revents;
 
 	filp = (struct linux_file *)file->f_data;
 	filp->f_flags = file->f_flag;
 	linux_set_current(td);
 	linux_get_fop(filp, &fop, &ldev);
 	if (fop->poll != NULL) {
 		revents = OPW(file, td, fop->poll(filp,
 		    LINUX_POLL_TABLE_NORMAL)) & events;
 	} else {
 		revents = 0;
 	}
 	linux_drop_fop(ldev);
 	return (revents);
 }
 
 static int
 linux_file_close(struct file *file, struct thread *td)
 {
 	struct linux_file *filp;
 	int (*release)(struct inode *, struct linux_file *);
 	const struct file_operations *fop;
 	struct linux_cdev *ldev;
 	int error;
 
 	filp = (struct linux_file *)file->f_data;
 
 	KASSERT(file_count(filp) == 0,
 	    ("File refcount(%d) is not zero", file_count(filp)));
 
 	if (td == NULL)
 		td = curthread;
 
 	error = 0;
 	filp->f_flags = file->f_flag;
 	linux_set_current(td);
 	linux_poll_wait_dequeue(filp);
 	linux_get_fop(filp, &fop, &ldev);
 	/*
 	 * Always use the real release function, if any, to avoid
 	 * leaking device resources:
 	 */
 	release = filp->f_op->release;
 	if (release != NULL)
 		error = -OPW(file, td, release(filp->f_vnode, filp));
 	funsetown(&filp->f_sigio);
 	if (filp->f_vnode != NULL)
 		vdrop(filp->f_vnode);
 	linux_drop_fop(ldev);
 	ldev = filp->f_cdev;
 	if (ldev != NULL)
 		linux_cdev_deref(ldev);
 	linux_synchronize_rcu(RCU_TYPE_REGULAR);
 	kfree(filp);
 
 	return (error);
 }
 
 static int
 linux_file_ioctl(struct file *fp, u_long cmd, void *data, struct ucred *cred,
     struct thread *td)
 {
 	struct linux_file *filp;
 	const struct file_operations *fop;
 	struct linux_cdev *ldev;
 	struct fiodgname_arg *fgn;
 	const char *p;
 	int error, i;
 
 	error = 0;
 	filp = (struct linux_file *)fp->f_data;
 	filp->f_flags = fp->f_flag;
 	linux_get_fop(filp, &fop, &ldev);
 
 	linux_set_current(td);
 	switch (cmd) {
 	case FIONBIO:
 		break;
 	case FIOASYNC:
 		if (fop->fasync == NULL)
 			break;
 		error = -OPW(fp, td, fop->fasync(0, filp, fp->f_flag & FASYNC));
 		break;
 	case FIOSETOWN:
 		error = fsetown(*(int *)data, &filp->f_sigio);
 		if (error == 0) {
 			if (fop->fasync == NULL)
 				break;
 			error = -OPW(fp, td, fop->fasync(0, filp,
 			    fp->f_flag & FASYNC));
 		}
 		break;
 	case FIOGETOWN:
 		*(int *)data = fgetown(&filp->f_sigio);
 		break;
 	case FIODGNAME:
 #ifdef	COMPAT_FREEBSD32
 	case FIODGNAME_32:
 #endif
 		if (filp->f_cdev == NULL || filp->f_cdev->cdev == NULL) {
 			error = ENXIO;
 			break;
 		}
 		fgn = data;
 		p = devtoname(filp->f_cdev->cdev);
 		i = strlen(p) + 1;
 		if (i > fgn->len) {
 			error = EINVAL;
 			break;
 		}
 		error = copyout(p, fiodgname_buf_get_ptr(fgn, cmd), i);
 		break;
 	default:
 		error = linux_file_ioctl_sub(fp, filp, fop, cmd, data, td);
 		break;
 	}
 	linux_drop_fop(ldev);
 	return (error);
 }
 
 static int
 linux_file_mmap_sub(struct thread *td, vm_size_t objsize, vm_prot_t prot,
     vm_prot_t maxprot, int flags, struct file *fp,
     vm_ooffset_t *foff, const struct file_operations *fop, vm_object_t *objp)
 {
 	/*
 	 * Character devices do not provide private mappings
 	 * of any kind:
 	 */
 	if ((maxprot & VM_PROT_WRITE) == 0 &&
 	    (prot & VM_PROT_WRITE) != 0)
 		return (EACCES);
 	if ((flags & (MAP_PRIVATE | MAP_COPY)) != 0)
 		return (EINVAL);
 
 	return (linux_file_mmap_single(fp, fop, foff, objsize, objp,
 	    (int)prot, (flags & MAP_SHARED) ? true : false, td));
 }
 
 static int
 linux_file_mmap(struct file *fp, vm_map_t map, vm_offset_t *addr, vm_size_t size,
     vm_prot_t prot, vm_prot_t cap_maxprot, int flags, vm_ooffset_t foff,
     struct thread *td)
 {
 	struct linux_file *filp;
 	const struct file_operations *fop;
 	struct linux_cdev *ldev;
 	struct mount *mp;
 	struct vnode *vp;
 	vm_object_t object;
 	vm_prot_t maxprot;
 	int error;
 
 	filp = (struct linux_file *)fp->f_data;
 
 	vp = filp->f_vnode;
 	if (vp == NULL)
 		return (EOPNOTSUPP);
 
 	/*
 	 * Ensure that file and memory protections are
 	 * compatible.
 	 */
 	mp = vp->v_mount;
 	if (mp != NULL && (mp->mnt_flag & MNT_NOEXEC) != 0) {
 		maxprot = VM_PROT_NONE;
 		if ((prot & VM_PROT_EXECUTE) != 0)
 			return (EACCES);
 	} else
 		maxprot = VM_PROT_EXECUTE;
 	if ((fp->f_flag & FREAD) != 0)
 		maxprot |= VM_PROT_READ;
 	else if ((prot & VM_PROT_READ) != 0)
 		return (EACCES);
 
 	/*
 	 * If we are sharing potential changes via MAP_SHARED and we
 	 * are trying to get write permission although we opened it
 	 * without asking for it, bail out.
 	 *
 	 * Note that most character devices always share mappings.
 	 *
 	 * Rely on linux_file_mmap_sub() to fail invalid MAP_PRIVATE
 	 * requests rather than doing it here.
 	 */
 	if ((flags & MAP_SHARED) != 0) {
 		if ((fp->f_flag & FWRITE) != 0)
 			maxprot |= VM_PROT_WRITE;
 		else if ((prot & VM_PROT_WRITE) != 0)
 			return (EACCES);
 	}
 	maxprot &= cap_maxprot;
 
 	linux_get_fop(filp, &fop, &ldev);
 	error = linux_file_mmap_sub(td, size, prot, maxprot, flags, fp,
 	    &foff, fop, &object);
 	if (error != 0)
 		goto out;
 
 	error = vm_mmap_object(map, addr, size, prot, maxprot, flags, object,
 	    foff, FALSE, td);
 	if (error != 0)
 		vm_object_deallocate(object);
 out:
 	linux_drop_fop(ldev);
 	return (error);
 }
 
 static int
 linux_file_stat(struct file *fp, struct stat *sb, struct ucred *active_cred)
 {
 	struct linux_file *filp;
 	struct vnode *vp;
 	int error;
 
 	filp = (struct linux_file *)fp->f_data;
 	if (filp->f_vnode == NULL)
 		return (EOPNOTSUPP);
 
 	vp = filp->f_vnode;
 
 	vn_lock(vp, LK_SHARED | LK_RETRY);
 	error = VOP_STAT(vp, sb, curthread->td_ucred, NOCRED);
 	VOP_UNLOCK(vp);
 
 	return (error);
 }
 
 static int
 linux_file_fill_kinfo(struct file *fp, struct kinfo_file *kif,
     struct filedesc *fdp)
 {
 	struct linux_file *filp;
 	struct vnode *vp;
 	int error;
 
 	filp = fp->f_data;
 	vp = filp->f_vnode;
 	if (vp == NULL) {
 		error = 0;
 		kif->kf_type = KF_TYPE_DEV;
 	} else {
 		vref(vp);
 		FILEDESC_SUNLOCK(fdp);
 		error = vn_fill_kinfo_vnode(vp, kif);
 		vrele(vp);
 		kif->kf_type = KF_TYPE_VNODE;
 		FILEDESC_SLOCK(fdp);
 	}
 	return (error);
 }
 
 unsigned int
 linux_iminor(struct inode *inode)
 {
 	struct linux_cdev *ldev;
 
 	if (inode == NULL || inode->v_rdev == NULL ||
 	    inode->v_rdev->si_devsw != &linuxcdevsw)
 		return (-1U);
 	ldev = inode->v_rdev->si_drv1;
 	if (ldev == NULL)
 		return (-1U);
 
 	return (minor(ldev->dev));
 }
 
 static int
 linux_file_kcmp(struct file *fp1, struct file *fp2, struct thread *td)
 {
 	struct linux_file *filp1, *filp2;
 
 	if (fp2->f_type != DTYPE_DEV)
 		return (3);
 
 	filp1 = fp1->f_data;
 	filp2 = fp2->f_data;
 	return (kcmp_cmp((uintptr_t)filp1->f_cdev, (uintptr_t)filp2->f_cdev));
 }
 
 struct fileops linuxfileops = {
 	.fo_read = linux_file_read,
 	.fo_write = linux_file_write,
 	.fo_truncate = invfo_truncate,
 	.fo_kqfilter = linux_file_kqfilter,
 	.fo_stat = linux_file_stat,
 	.fo_fill_kinfo = linux_file_fill_kinfo,
 	.fo_poll = linux_file_poll,
 	.fo_close = linux_file_close,
 	.fo_ioctl = linux_file_ioctl,
 	.fo_mmap = linux_file_mmap,
 	.fo_chmod = invfo_chmod,
 	.fo_chown = invfo_chown,
 	.fo_sendfile = invfo_sendfile,
 	.fo_cmp = linux_file_kcmp,
 	.fo_flags = DFLAG_PASSABLE,
 };
 
 /*
  * Hash of vmmap addresses.  This is infrequently accessed and does not
  * need to be particularly large.  This is done because we must store the
  * caller's idea of the map size to properly unmap.
  */
 struct vmmap {
 	LIST_ENTRY(vmmap)	vm_next;
 	void 			*vm_addr;
 	unsigned long		vm_size;
 };
 
 struct vmmaphd {
 	struct vmmap *lh_first;
 };
 #define	VMMAP_HASH_SIZE	64
 #define	VMMAP_HASH_MASK	(VMMAP_HASH_SIZE - 1)
 #define	VM_HASH(addr)	((uintptr_t)(addr) >> PAGE_SHIFT) & VMMAP_HASH_MASK
 static struct vmmaphd vmmaphead[VMMAP_HASH_SIZE];
 static struct mtx vmmaplock;
 
 static void
 vmmap_add(void *addr, unsigned long size)
 {
 	struct vmmap *vmmap;
 
 	vmmap = kmalloc(sizeof(*vmmap), GFP_KERNEL);
 	mtx_lock(&vmmaplock);
 	vmmap->vm_size = size;
 	vmmap->vm_addr = addr;
 	LIST_INSERT_HEAD(&vmmaphead[VM_HASH(addr)], vmmap, vm_next);
 	mtx_unlock(&vmmaplock);
 }
 
 static struct vmmap *
 vmmap_remove(void *addr)
 {
 	struct vmmap *vmmap;
 
 	mtx_lock(&vmmaplock);
 	LIST_FOREACH(vmmap, &vmmaphead[VM_HASH(addr)], vm_next)
 		if (vmmap->vm_addr == addr)
 			break;
 	if (vmmap)
 		LIST_REMOVE(vmmap, vm_next);
 	mtx_unlock(&vmmaplock);
 
 	return (vmmap);
 }
 
 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__) || defined(__aarch64__) || defined(__riscv)
 void *
 _ioremap_attr(vm_paddr_t phys_addr, unsigned long size, int attr)
 {
 	void *addr;
 
 	addr = pmap_mapdev_attr(phys_addr, size, attr);
 	if (addr == NULL)
 		return (NULL);
 	vmmap_add(addr, size);
 
 	return (addr);
 }
 #endif
 
 void
 iounmap(void *addr)
 {
 	struct vmmap *vmmap;
 
 	vmmap = vmmap_remove(addr);
 	if (vmmap == NULL)
 		return;
 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__) || defined(__aarch64__) || defined(__riscv)
 	pmap_unmapdev(addr, vmmap->vm_size);
 #endif
 	kfree(vmmap);
 }
 
 void *
 vmap(struct page **pages, unsigned int count, unsigned long flags, int prot)
 {
 	vm_offset_t off;
 	size_t size;
 
 	size = count * PAGE_SIZE;
 	off = kva_alloc(size);
 	if (off == 0)
 		return (NULL);
 	vmmap_add((void *)off, size);
 	pmap_qenter(off, pages, count);
 
 	return ((void *)off);
 }
 
 void
 vunmap(void *addr)
 {
 	struct vmmap *vmmap;
 
 	vmmap = vmmap_remove(addr);
 	if (vmmap == NULL)
 		return;
 	pmap_qremove((vm_offset_t)addr, vmmap->vm_size / PAGE_SIZE);
 	kva_free((vm_offset_t)addr, vmmap->vm_size);
 	kfree(vmmap);
 }
 
 static char *
 devm_kvasprintf(struct device *dev, gfp_t gfp, const char *fmt, va_list ap)
 {
 	unsigned int len;
 	char *p;
 	va_list aq;
 
 	va_copy(aq, ap);
 	len = vsnprintf(NULL, 0, fmt, aq);
 	va_end(aq);
 
 	if (dev != NULL)
 		p = devm_kmalloc(dev, len + 1, gfp);
 	else
 		p = kmalloc(len + 1, gfp);
 	if (p != NULL)
 		vsnprintf(p, len + 1, fmt, ap);
 
 	return (p);
 }
 
 char *
 kvasprintf(gfp_t gfp, const char *fmt, va_list ap)
 {
 
 	return (devm_kvasprintf(NULL, gfp, fmt, ap));
 }
 
 char *
 lkpi_devm_kasprintf(struct device *dev, gfp_t gfp, const char *fmt, ...)
 {
 	va_list ap;
 	char *p;
 
 	va_start(ap, fmt);
 	p = devm_kvasprintf(dev, gfp, fmt, ap);
 	va_end(ap);
 
 	return (p);
 }
 
 char *
 kasprintf(gfp_t gfp, const char *fmt, ...)
 {
 	va_list ap;
 	char *p;
 
 	va_start(ap, fmt);
 	p = kvasprintf(gfp, fmt, ap);
 	va_end(ap);
 
 	return (p);
 }
 
 static void
 linux_timer_callback_wrapper(void *context)
 {
 	struct timer_list *timer;
 
 	timer = context;
 
 	/* the timer is about to be shutdown permanently */
 	if (timer->function == NULL)
 		return;
 
 	if (linux_set_current_flags(curthread, M_NOWAIT)) {
 		/* try again later */
 		callout_reset(&timer->callout, 1,
 		    &linux_timer_callback_wrapper, timer);
 		return;
 	}
 
 	timer->function(timer->data);
 }
 
 int
 mod_timer(struct timer_list *timer, int expires)
 {
 	int ret;
 
 	timer->expires = expires;
 	ret = callout_reset(&timer->callout,
 	    linux_timer_jiffies_until(expires),
 	    &linux_timer_callback_wrapper, timer);
 
 	MPASS(ret == 0 || ret == 1);
 
 	return (ret == 1);
 }
 
 void
 add_timer(struct timer_list *timer)
 {
 
 	callout_reset(&timer->callout,
 	    linux_timer_jiffies_until(timer->expires),
 	    &linux_timer_callback_wrapper, timer);
 }
 
 void
 add_timer_on(struct timer_list *timer, int cpu)
 {
 
 	callout_reset_on(&timer->callout,
 	    linux_timer_jiffies_until(timer->expires),
 	    &linux_timer_callback_wrapper, timer, cpu);
 }
 
 int
 del_timer(struct timer_list *timer)
 {
 
 	if (callout_stop(&(timer)->callout) == -1)
 		return (0);
 	return (1);
 }
 
 int
 del_timer_sync(struct timer_list *timer)
 {
 
 	if (callout_drain(&(timer)->callout) == -1)
 		return (0);
 	return (1);
 }
 
 int
 timer_delete_sync(struct timer_list *timer)
 {
 
 	return (del_timer_sync(timer));
 }
 
 int
 timer_shutdown_sync(struct timer_list *timer)
 {
 
 	timer->function = NULL;
 	return (del_timer_sync(timer));
 }
 
 /* greatest common divisor, Euclid equation */
 static uint64_t
 lkpi_gcd_64(uint64_t a, uint64_t b)
 {
 	uint64_t an;
 	uint64_t bn;
 
 	while (b != 0) {
 		an = b;
 		bn = a % b;
 		a = an;
 		b = bn;
 	}
 	return (a);
 }
 
 uint64_t lkpi_nsec2hz_rem;
 uint64_t lkpi_nsec2hz_div = 1000000000ULL;
 uint64_t lkpi_nsec2hz_max;
 
 uint64_t lkpi_usec2hz_rem;
 uint64_t lkpi_usec2hz_div = 1000000ULL;
 uint64_t lkpi_usec2hz_max;
 
 uint64_t lkpi_msec2hz_rem;
 uint64_t lkpi_msec2hz_div = 1000ULL;
 uint64_t lkpi_msec2hz_max;
 
 static void
 linux_timer_init(void *arg)
 {
 	uint64_t gcd;
 
 	/*
 	 * Compute an internal HZ value which can divide 2**32 to
 	 * avoid timer rounding problems when the tick value wraps
 	 * around 2**32:
 	 */
 	linux_timer_hz_mask = 1;
 	while (linux_timer_hz_mask < (unsigned long)hz)
 		linux_timer_hz_mask *= 2;
 	linux_timer_hz_mask--;
 
 	/* compute some internal constants */
 
 	lkpi_nsec2hz_rem = hz;
 	lkpi_usec2hz_rem = hz;
 	lkpi_msec2hz_rem = hz;
 
 	gcd = lkpi_gcd_64(lkpi_nsec2hz_rem, lkpi_nsec2hz_div);
 	lkpi_nsec2hz_rem /= gcd;
 	lkpi_nsec2hz_div /= gcd;
 	lkpi_nsec2hz_max = -1ULL / lkpi_nsec2hz_rem;
 
 	gcd = lkpi_gcd_64(lkpi_usec2hz_rem, lkpi_usec2hz_div);
 	lkpi_usec2hz_rem /= gcd;
 	lkpi_usec2hz_div /= gcd;
 	lkpi_usec2hz_max = -1ULL / lkpi_usec2hz_rem;
 
 	gcd = lkpi_gcd_64(lkpi_msec2hz_rem, lkpi_msec2hz_div);
 	lkpi_msec2hz_rem /= gcd;
 	lkpi_msec2hz_div /= gcd;
 	lkpi_msec2hz_max = -1ULL / lkpi_msec2hz_rem;
 }
 SYSINIT(linux_timer, SI_SUB_DRIVERS, SI_ORDER_FIRST, linux_timer_init, NULL);
 
 void
 linux_complete_common(struct completion *c, int all)
 {
-	int wakeup_swapper;
-
 	sleepq_lock(c);
 	if (all) {
 		c->done = UINT_MAX;
-		wakeup_swapper = sleepq_broadcast(c, SLEEPQ_SLEEP, 0, 0);
+		sleepq_broadcast(c, SLEEPQ_SLEEP, 0, 0);
 	} else {
 		if (c->done != UINT_MAX)
 			c->done++;
-		wakeup_swapper = sleepq_signal(c, SLEEPQ_SLEEP, 0, 0);
+		sleepq_signal(c, SLEEPQ_SLEEP, 0, 0);
 	}
 	sleepq_release(c);
-	if (wakeup_swapper)
-		kick_proc0();
 }
 
 /*
  * Indefinite wait for done != 0 with or without signals.
  */
 int
 linux_wait_for_common(struct completion *c, int flags)
 {
 	struct task_struct *task;
 	int error;
 
 	if (SCHEDULER_STOPPED())
 		return (0);
 
 	task = current;
 
 	if (flags != 0)
 		flags = SLEEPQ_INTERRUPTIBLE | SLEEPQ_SLEEP;
 	else
 		flags = SLEEPQ_SLEEP;
 	error = 0;
 	for (;;) {
 		sleepq_lock(c);
 		if (c->done)
 			break;
 		sleepq_add(c, NULL, "completion", flags, 0);
 		if (flags & SLEEPQ_INTERRUPTIBLE) {
 			DROP_GIANT();
 			error = -sleepq_wait_sig(c, 0);
 			PICKUP_GIANT();
 			if (error != 0) {
 				linux_schedule_save_interrupt_value(task, error);
 				error = -ERESTARTSYS;
 				goto intr;
 			}
 		} else {
 			DROP_GIANT();
 			sleepq_wait(c, 0);
 			PICKUP_GIANT();
 		}
 	}
 	if (c->done != UINT_MAX)
 		c->done--;
 	sleepq_release(c);
 
 intr:
 	return (error);
 }
 
 /*
  * Time limited wait for done != 0 with or without signals.
  */
 int
 linux_wait_for_timeout_common(struct completion *c, int timeout, int flags)
 {
 	struct task_struct *task;
 	int end = jiffies + timeout;
 	int error;
 
 	if (SCHEDULER_STOPPED())
 		return (0);
 
 	task = current;
 
 	if (flags != 0)
 		flags = SLEEPQ_INTERRUPTIBLE | SLEEPQ_SLEEP;
 	else
 		flags = SLEEPQ_SLEEP;
 
 	for (;;) {
 		sleepq_lock(c);
 		if (c->done)
 			break;
 		sleepq_add(c, NULL, "completion", flags, 0);
 		sleepq_set_timeout(c, linux_timer_jiffies_until(end));
 
 		DROP_GIANT();
 		if (flags & SLEEPQ_INTERRUPTIBLE)
 			error = -sleepq_timedwait_sig(c, 0);
 		else
 			error = -sleepq_timedwait(c, 0);
 		PICKUP_GIANT();
 
 		if (error != 0) {
 			/* check for timeout */
 			if (error == -EWOULDBLOCK) {
 				error = 0;	/* timeout */
 			} else {
 				/* signal happened */
 				linux_schedule_save_interrupt_value(task, error);
 				error = -ERESTARTSYS;
 			}
 			goto done;
 		}
 	}
 	if (c->done != UINT_MAX)
 		c->done--;
 	sleepq_release(c);
 
 	/* return how many jiffies are left */
 	error = linux_timer_jiffies_until(end);
 done:
 	return (error);
 }
 
 int
 linux_try_wait_for_completion(struct completion *c)
 {
 	int isdone;
 
 	sleepq_lock(c);
 	isdone = (c->done != 0);
 	if (c->done != 0 && c->done != UINT_MAX)
 		c->done--;
 	sleepq_release(c);
 	return (isdone);
 }
 
 int
 linux_completion_done(struct completion *c)
 {
 	int isdone;
 
 	sleepq_lock(c);
 	isdone = (c->done != 0);
 	sleepq_release(c);
 	return (isdone);
 }
 
 static void
 linux_cdev_deref(struct linux_cdev *ldev)
 {
 	if (refcount_release(&ldev->refs) &&
 	    ldev->kobj.ktype == &linux_cdev_ktype)
 		kfree(ldev);
 }
 
 static void
 linux_cdev_release(struct kobject *kobj)
 {
 	struct linux_cdev *cdev;
 	struct kobject *parent;
 
 	cdev = container_of(kobj, struct linux_cdev, kobj);
 	parent = kobj->parent;
 	linux_destroy_dev(cdev);
 	linux_cdev_deref(cdev);
 	kobject_put(parent);
 }
 
 static void
 linux_cdev_static_release(struct kobject *kobj)
 {
 	struct cdev *cdev;
 	struct linux_cdev *ldev;
 
 	ldev = container_of(kobj, struct linux_cdev, kobj);
 	cdev = ldev->cdev;
 	if (cdev != NULL) {
 		destroy_dev(cdev);
 		ldev->cdev = NULL;
 	}
 	kobject_put(kobj->parent);
 }
 
 int
 linux_cdev_device_add(struct linux_cdev *ldev, struct device *dev)
 {
 	int ret;
 
 	if (dev->devt != 0) {
 		/* Set parent kernel object. */
 		ldev->kobj.parent = &dev->kobj;
 
 		/*
 		 * Unlike Linux we require the kobject of the
 		 * character device structure to have a valid name
 		 * before calling this function:
 		 */
 		if (ldev->kobj.name == NULL)
 			return (-EINVAL);
 
 		ret = cdev_add(ldev, dev->devt, 1);
 		if (ret)
 			return (ret);
 	}
 	ret = device_add(dev);
 	if (ret != 0 && dev->devt != 0)
 		cdev_del(ldev);
 	return (ret);
 }
 
 void
 linux_cdev_device_del(struct linux_cdev *ldev, struct device *dev)
 {
 	device_del(dev);
 
 	if (dev->devt != 0)
 		cdev_del(ldev);
 }
 
 static void
 linux_destroy_dev(struct linux_cdev *ldev)
 {
 
 	if (ldev->cdev == NULL)
 		return;
 
 	MPASS((ldev->siref & LDEV_SI_DTR) == 0);
 	MPASS(ldev->kobj.ktype == &linux_cdev_ktype);
 
 	atomic_set_int(&ldev->siref, LDEV_SI_DTR);
 	while ((atomic_load_int(&ldev->siref) & ~LDEV_SI_DTR) != 0)
 		pause("ldevdtr", hz / 4);
 
 	destroy_dev(ldev->cdev);
 	ldev->cdev = NULL;
 }
 
 const struct kobj_type linux_cdev_ktype = {
 	.release = linux_cdev_release,
 };
 
 const struct kobj_type linux_cdev_static_ktype = {
 	.release = linux_cdev_static_release,
 };
 
 static void
 linux_handle_ifnet_link_event(void *arg, struct ifnet *ifp, int linkstate)
 {
 	struct notifier_block *nb;
 	struct netdev_notifier_info ni;
 
 	nb = arg;
 	ni.ifp = ifp;
 	ni.dev = (struct net_device *)ifp;
 	if (linkstate == LINK_STATE_UP)
 		nb->notifier_call(nb, NETDEV_UP, &ni);
 	else
 		nb->notifier_call(nb, NETDEV_DOWN, &ni);
 }
 
 static void
 linux_handle_ifnet_arrival_event(void *arg, struct ifnet *ifp)
 {
 	struct notifier_block *nb;
 	struct netdev_notifier_info ni;
 
 	nb = arg;
 	ni.ifp = ifp;
 	ni.dev = (struct net_device *)ifp;
 	nb->notifier_call(nb, NETDEV_REGISTER, &ni);
 }
 
 static void
 linux_handle_ifnet_departure_event(void *arg, struct ifnet *ifp)
 {
 	struct notifier_block *nb;
 	struct netdev_notifier_info ni;
 
 	nb = arg;
 	ni.ifp = ifp;
 	ni.dev = (struct net_device *)ifp;
 	nb->notifier_call(nb, NETDEV_UNREGISTER, &ni);
 }
 
 static void
 linux_handle_iflladdr_event(void *arg, struct ifnet *ifp)
 {
 	struct notifier_block *nb;
 	struct netdev_notifier_info ni;
 
 	nb = arg;
 	ni.ifp = ifp;
 	ni.dev = (struct net_device *)ifp;
 	nb->notifier_call(nb, NETDEV_CHANGEADDR, &ni);
 }
 
 static void
 linux_handle_ifaddr_event(void *arg, struct ifnet *ifp)
 {
 	struct notifier_block *nb;
 	struct netdev_notifier_info ni;
 
 	nb = arg;
 	ni.ifp = ifp;
 	ni.dev = (struct net_device *)ifp;
 	nb->notifier_call(nb, NETDEV_CHANGEIFADDR, &ni);
 }
 
 int
 register_netdevice_notifier(struct notifier_block *nb)
 {
 
 	nb->tags[NETDEV_UP] = EVENTHANDLER_REGISTER(
 	    ifnet_link_event, linux_handle_ifnet_link_event, nb, 0);
 	nb->tags[NETDEV_REGISTER] = EVENTHANDLER_REGISTER(
 	    ifnet_arrival_event, linux_handle_ifnet_arrival_event, nb, 0);
 	nb->tags[NETDEV_UNREGISTER] = EVENTHANDLER_REGISTER(
 	    ifnet_departure_event, linux_handle_ifnet_departure_event, nb, 0);
 	nb->tags[NETDEV_CHANGEADDR] = EVENTHANDLER_REGISTER(
 	    iflladdr_event, linux_handle_iflladdr_event, nb, 0);
 
 	return (0);
 }
 
 int
 register_inetaddr_notifier(struct notifier_block *nb)
 {
 
 	nb->tags[NETDEV_CHANGEIFADDR] = EVENTHANDLER_REGISTER(
 	    ifaddr_event, linux_handle_ifaddr_event, nb, 0);
 	return (0);
 }
 
 int
 unregister_netdevice_notifier(struct notifier_block *nb)
 {
 
 	EVENTHANDLER_DEREGISTER(ifnet_link_event,
 	    nb->tags[NETDEV_UP]);
 	EVENTHANDLER_DEREGISTER(ifnet_arrival_event,
 	    nb->tags[NETDEV_REGISTER]);
 	EVENTHANDLER_DEREGISTER(ifnet_departure_event,
 	    nb->tags[NETDEV_UNREGISTER]);
 	EVENTHANDLER_DEREGISTER(iflladdr_event,
 	    nb->tags[NETDEV_CHANGEADDR]);
 
 	return (0);
 }
 
 int
 unregister_inetaddr_notifier(struct notifier_block *nb)
 {
 
 	EVENTHANDLER_DEREGISTER(ifaddr_event,
 	    nb->tags[NETDEV_CHANGEIFADDR]);
 
 	return (0);
 }
 
 struct list_sort_thunk {
 	int (*cmp)(void *, struct list_head *, struct list_head *);
 	void *priv;
 };
 
 static inline int
 linux_le_cmp(const void *d1, const void *d2, void *priv)
 {
 	struct list_head *le1, *le2;
 	struct list_sort_thunk *thunk;
 
 	thunk = priv;
 	le1 = *(__DECONST(struct list_head **, d1));
 	le2 = *(__DECONST(struct list_head **, d2));
 	return ((thunk->cmp)(thunk->priv, le1, le2));
 }
 
 void
 list_sort(void *priv, struct list_head *head, int (*cmp)(void *priv,
     struct list_head *a, struct list_head *b))
 {
 	struct list_sort_thunk thunk;
 	struct list_head **ar, *le;
 	size_t count, i;
 
 	count = 0;
 	list_for_each(le, head)
 		count++;
 	ar = malloc(sizeof(struct list_head *) * count, M_KMALLOC, M_WAITOK);
 	i = 0;
 	list_for_each(le, head)
 		ar[i++] = le;
 	thunk.cmp = cmp;
 	thunk.priv = priv;
 	qsort_r(ar, count, sizeof(struct list_head *), linux_le_cmp, &thunk);
 	INIT_LIST_HEAD(head);
 	for (i = 0; i < count; i++)
 		list_add_tail(ar[i], head);
 	free(ar, M_KMALLOC);
 }
 
 #if defined(__i386__) || defined(__amd64__)
 int
 linux_wbinvd_on_all_cpus(void)
 {
 
 	pmap_invalidate_cache();
 	return (0);
 }
 #endif
 
 int
 linux_on_each_cpu(void callback(void *), void *data)
 {
 
 	smp_rendezvous(smp_no_rendezvous_barrier, callback,
 	    smp_no_rendezvous_barrier, data);
 	return (0);
 }
 
 int
 linux_in_atomic(void)
 {
 
 	return ((curthread->td_pflags & TDP_NOFAULTING) != 0);
 }
 
 struct linux_cdev *
 linux_find_cdev(const char *name, unsigned major, unsigned minor)
 {
 	dev_t dev = MKDEV(major, minor);
 	struct cdev *cdev;
 
 	dev_lock();
 	LIST_FOREACH(cdev, &linuxcdevsw.d_devs, si_list) {
 		struct linux_cdev *ldev = cdev->si_drv1;
 		if (ldev->dev == dev &&
 		    strcmp(kobject_name(&ldev->kobj), name) == 0) {
 			break;
 		}
 	}
 	dev_unlock();
 
 	return (cdev != NULL ? cdev->si_drv1 : NULL);
 }
 
 int
 __register_chrdev(unsigned int major, unsigned int baseminor,
     unsigned int count, const char *name,
     const struct file_operations *fops)
 {
 	struct linux_cdev *cdev;
 	int ret = 0;
 	int i;
 
 	for (i = baseminor; i < baseminor + count; i++) {
 		cdev = cdev_alloc();
 		cdev->ops = fops;
 		kobject_set_name(&cdev->kobj, name);
 
 		ret = cdev_add(cdev, makedev(major, i), 1);
 		if (ret != 0)
 			break;
 	}
 	return (ret);
 }
 
 int
 __register_chrdev_p(unsigned int major, unsigned int baseminor,
     unsigned int count, const char *name,
     const struct file_operations *fops, uid_t uid,
     gid_t gid, int mode)
 {
 	struct linux_cdev *cdev;
 	int ret = 0;
 	int i;
 
 	for (i = baseminor; i < baseminor + count; i++) {
 		cdev = cdev_alloc();
 		cdev->ops = fops;
 		kobject_set_name(&cdev->kobj, name);
 
 		ret = cdev_add_ext(cdev, makedev(major, i), uid, gid, mode);
 		if (ret != 0)
 			break;
 	}
 	return (ret);
 }
 
 void
 __unregister_chrdev(unsigned int major, unsigned int baseminor,
     unsigned int count, const char *name)
 {
 	struct linux_cdev *cdevp;
 	int i;
 
 	for (i = baseminor; i < baseminor + count; i++) {
 		cdevp = linux_find_cdev(name, major, i);
 		if (cdevp != NULL)
 			cdev_del(cdevp);
 	}
 }
 
 void
 linux_dump_stack(void)
 {
 #ifdef STACK
 	struct stack st;
 
 	stack_save(&st);
 	stack_print(&st);
 #endif
 }
 
 int
 linuxkpi_net_ratelimit(void)
 {
 
 	return (ppsratecheck(&lkpi_net_lastlog, &lkpi_net_curpps,
 	   lkpi_net_maxpps));
 }
 
 struct io_mapping *
 io_mapping_create_wc(resource_size_t base, unsigned long size)
 {
 	struct io_mapping *mapping;
 
 	mapping = kmalloc(sizeof(*mapping), GFP_KERNEL);
 	if (mapping == NULL)
 		return (NULL);
 	return (io_mapping_init_wc(mapping, base, size));
 }
 
 /* We likely want a linuxkpi_device.c at some point. */
 bool
 device_can_wakeup(struct device *dev)
 {
 
 	if (dev == NULL)
 		return (false);
 	/*
 	 * XXX-BZ iwlwifi queries it as part of enabling WoWLAN.
 	 * Normally this would be based on a bool in dev->power.XXX.
 	 * Check such as PCI PCIM_PCAP_*PME.  We have no way to enable this yet.
 	 * We may get away by directly calling into bsddev for as long as
 	 * we can assume PCI only avoiding changing struct device breaking KBI.
 	 */
 	pr_debug("%s:%d: not enabled; see comment.\n", __func__, __LINE__);
 	return (false);
 }
 
 static void
 devm_device_group_remove(struct device *dev, void *p)
 {
 	const struct attribute_group **dr = p;
 	const struct attribute_group *group = *dr;
 
 	sysfs_remove_group(&dev->kobj, group);
 }
 
 int
 lkpi_devm_device_add_group(struct device *dev,
     const struct attribute_group *group)
 {
 	const struct attribute_group **dr;
 	int ret;
 
 	dr = devres_alloc(devm_device_group_remove, sizeof(*dr), GFP_KERNEL);
 	if (dr == NULL)
 		return (-ENOMEM);
 
 	ret = sysfs_create_group(&dev->kobj, group);
 	if (ret == 0) {
 		*dr = group;
 		devres_add(dev, dr);
 	} else
 		devres_free(dr);
 
 	return (ret);
 }
 
 #if defined(__i386__) || defined(__amd64__)
 bool linux_cpu_has_clflush;
 struct cpuinfo_x86 boot_cpu_data;
 struct cpuinfo_x86 *__cpu_data;
 #endif
 
 cpumask_t *
 lkpi_get_static_single_cpu_mask(int cpuid)
 {
 
 	KASSERT((cpuid >= 0 && cpuid <= mp_maxid), ("%s: invalid cpuid %d\n",
 	    __func__, cpuid));
 	KASSERT(!CPU_ABSENT(cpuid), ("%s: cpu with cpuid %d is absent\n",
 	    __func__, cpuid));
 
 	return (static_single_cpu_mask[cpuid]);
 }
 
 bool
 lkpi_xen_initial_domain(void)
 {
 #ifdef XENHVM
 	return (xen_initial_domain());
 #else
 	return (false);
 #endif
 }
 
 bool
 lkpi_xen_pv_domain(void)
 {
 #ifdef XENHVM
 	return (xen_pv_domain());
 #else
 	return (false);
 #endif
 }
 
 static void
 linux_compat_init(void *arg)
 {
 	struct sysctl_oid *rootoid;
 	int i;
 
 #if defined(__i386__) || defined(__amd64__)
 	static const uint32_t x86_vendors[X86_VENDOR_NUM] = {
 		[X86_VENDOR_INTEL] = CPU_VENDOR_INTEL,
 		[X86_VENDOR_CYRIX] = CPU_VENDOR_CYRIX,
 		[X86_VENDOR_AMD] = CPU_VENDOR_AMD,
 		[X86_VENDOR_UMC] = CPU_VENDOR_UMC,
 		[X86_VENDOR_CENTAUR] = CPU_VENDOR_CENTAUR,
 		[X86_VENDOR_TRANSMETA] = CPU_VENDOR_TRANSMETA,
 		[X86_VENDOR_NSC] = CPU_VENDOR_NSC,
 		[X86_VENDOR_HYGON] = CPU_VENDOR_HYGON,
 	};
 	uint8_t x86_vendor = X86_VENDOR_UNKNOWN;
 
 	for (i = 0; i < X86_VENDOR_NUM; i++) {
 		if (cpu_vendor_id != 0 && cpu_vendor_id == x86_vendors[i]) {
 			x86_vendor = i;
 			break;
 		}
 	}
 	linux_cpu_has_clflush = (cpu_feature & CPUID_CLFSH);
 	boot_cpu_data.x86_clflush_size = cpu_clflush_line_size;
 	boot_cpu_data.x86_max_cores = mp_ncpus;
 	boot_cpu_data.x86 = CPUID_TO_FAMILY(cpu_id);
 	boot_cpu_data.x86_model = CPUID_TO_MODEL(cpu_id);
 	boot_cpu_data.x86_vendor = x86_vendor;
 
 	__cpu_data = mallocarray(mp_maxid + 1,
 	    sizeof(*__cpu_data), M_KMALLOC, M_WAITOK | M_ZERO);
 	CPU_FOREACH(i) {
 		__cpu_data[i].x86_clflush_size = cpu_clflush_line_size;
 		__cpu_data[i].x86_max_cores = mp_ncpus;
 		__cpu_data[i].x86 = CPUID_TO_FAMILY(cpu_id);
 		__cpu_data[i].x86_model = CPUID_TO_MODEL(cpu_id);
 		__cpu_data[i].x86_vendor = x86_vendor;
 	}
 #endif
 	rw_init(&linux_vma_lock, "lkpi-vma-lock");
 
 	rootoid = SYSCTL_ADD_ROOT_NODE(NULL,
 	    OID_AUTO, "sys", CTLFLAG_RD|CTLFLAG_MPSAFE, NULL, "sys");
 	kobject_init(&linux_class_root, &linux_class_ktype);
 	kobject_set_name(&linux_class_root, "class");
 	linux_class_root.oidp = SYSCTL_ADD_NODE(NULL, SYSCTL_CHILDREN(rootoid),
 	    OID_AUTO, "class", CTLFLAG_RD|CTLFLAG_MPSAFE, NULL, "class");
 	kobject_init(&linux_root_device.kobj, &linux_dev_ktype);
 	kobject_set_name(&linux_root_device.kobj, "device");
 	linux_root_device.kobj.oidp = SYSCTL_ADD_NODE(NULL,
 	    SYSCTL_CHILDREN(rootoid), OID_AUTO, "device",
 	    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "device");
 	linux_root_device.bsddev = root_bus;
 	linux_class_misc.name = "misc";
 	class_register(&linux_class_misc);
 	INIT_LIST_HEAD(&pci_drivers);
 	INIT_LIST_HEAD(&pci_devices);
 	spin_lock_init(&pci_lock);
 	mtx_init(&vmmaplock, "IO Map lock", NULL, MTX_DEF);
 	for (i = 0; i < VMMAP_HASH_SIZE; i++)
 		LIST_INIT(&vmmaphead[i]);
 	init_waitqueue_head(&linux_bit_waitq);
 	init_waitqueue_head(&linux_var_waitq);
 
 	CPU_COPY(&all_cpus, &cpu_online_mask);
 	/*
 	 * Generate a single-CPU cpumask_t for each CPU (possibly) in the system.
 	 * CPUs are indexed from 0..(mp_maxid).  The entry for cpuid 0 will only
 	 * have itself in the cpumask, cupid 1 only itself on entry 1, and so on.
 	 * This is used by cpumask_of() (and possibly others in the future) for,
 	 * e.g., drivers to pass hints to irq_set_affinity_hint().
 	 */
 	static_single_cpu_mask = mallocarray(mp_maxid + 1,
 	    sizeof(static_single_cpu_mask), M_KMALLOC, M_WAITOK | M_ZERO);
 
 	/*
 	 * When the number of CPUs reach a threshold, we start to save memory
 	 * given the sets are static by overlapping those having their single
 	 * bit set at same position in a bitset word.  Asymptotically, this
 	 * regular scheme is in O(n²) whereas the overlapping one is in O(n)
 	 * only with n being the maximum number of CPUs, so the gain will become
 	 * huge quite quickly.  The threshold for 64-bit architectures is 128
 	 * CPUs.
 	 */
 	if (mp_ncpus < (2 * _BITSET_BITS)) {
 		cpumask_t *sscm_ptr;
 
 		/*
 		 * This represents 'mp_ncpus * __bitset_words(CPU_SETSIZE) *
 		 * (_BITSET_BITS / 8)' bytes (for comparison with the
 		 * overlapping scheme).
 		 */
 		static_single_cpu_mask_lcs = mallocarray(mp_ncpus,
 		    sizeof(*static_single_cpu_mask_lcs),
 		    M_KMALLOC, M_WAITOK | M_ZERO);
 
 		sscm_ptr = static_single_cpu_mask_lcs;
 		CPU_FOREACH(i) {
 			static_single_cpu_mask[i] = sscm_ptr++;
 			CPU_SET(i, static_single_cpu_mask[i]);
 		}
 	} else {
 		/* Pointer to a bitset word. */
 		__typeof(((cpuset_t *)NULL)->__bits[0]) *bwp;
 
 		/*
 		 * Allocate memory for (static) spans of 'cpumask_t' ('cpuset_t'
 		 * really) with a single bit set that can be reused for all
 		 * single CPU masks by making them start at different offsets.
 		 * We need '__bitset_words(CPU_SETSIZE) - 1' bitset words before
 		 * the word having its single bit set, and the same amount
 		 * after.
 		 */
 		static_single_cpu_mask_lcs = mallocarray(_BITSET_BITS,
 		    (2 * __bitset_words(CPU_SETSIZE) - 1) * (_BITSET_BITS / 8),
 		    M_KMALLOC, M_WAITOK | M_ZERO);
 
 		/*
 		 * We rely below on cpuset_t and the bitset generic
 		 * implementation assigning words in the '__bits' array in the
 		 * same order of bits (i.e., little-endian ordering, not to be
 		 * confused with machine endianness, which concerns bits in
 		 * words and other integers).  This is an imperfect test, but it
 		 * will detect a change to big-endian ordering.
 		 */
 		_Static_assert(
 		    __bitset_word(_BITSET_BITS + 1, _BITSET_BITS) == 1,
 		    "Assumes a bitset implementation that is little-endian "
 		    "on its words");
 
 		/* Initialize the single bit of each static span. */
 		bwp = (__typeof(bwp))static_single_cpu_mask_lcs +
 		    (__bitset_words(CPU_SETSIZE) - 1);
 		for (i = 0; i < _BITSET_BITS; i++) {
 			CPU_SET(i, (cpuset_t *)bwp);
 			bwp += (2 * __bitset_words(CPU_SETSIZE) - 1);
 		}
 
 		/*
 		 * Finally set all CPU masks to the proper word in their
 		 * relevant span.
 		 */
 		CPU_FOREACH(i) {
 			bwp = (__typeof(bwp))static_single_cpu_mask_lcs;
 			/* Find the non-zero word of the relevant span. */
 			bwp += (2 * __bitset_words(CPU_SETSIZE) - 1) *
 			    (i % _BITSET_BITS) +
 			    __bitset_words(CPU_SETSIZE) - 1;
 			/* Shift to find the CPU mask start. */
 			bwp -= (i / _BITSET_BITS);
 			static_single_cpu_mask[i] = (cpuset_t *)bwp;
 		}
 	}
 
 	strlcpy(init_uts_ns.name.release, osrelease, sizeof(init_uts_ns.name.release));
 }
 SYSINIT(linux_compat, SI_SUB_DRIVERS, SI_ORDER_SECOND, linux_compat_init, NULL);
 
 static void
 linux_compat_uninit(void *arg)
 {
 	linux_kobject_kfree_name(&linux_class_root);
 	linux_kobject_kfree_name(&linux_root_device.kobj);
 	linux_kobject_kfree_name(&linux_class_misc.kobj);
 
 	free(static_single_cpu_mask_lcs, M_KMALLOC);
 	free(static_single_cpu_mask, M_KMALLOC);
 #if defined(__i386__) || defined(__amd64__)
 	free(__cpu_data, M_KMALLOC);
 #endif
 
 	mtx_destroy(&vmmaplock);
 	spin_lock_destroy(&pci_lock);
 	rw_destroy(&linux_vma_lock);
 }
 SYSUNINIT(linux_compat, SI_SUB_DRIVERS, SI_ORDER_SECOND, linux_compat_uninit, NULL);
 
 /*
  * NOTE: Linux frequently uses "unsigned long" for pointer to integer
  * conversion and vice versa, where in FreeBSD "uintptr_t" would be
  * used. Assert these types have the same size, else some parts of the
  * LinuxKPI may not work like expected:
  */
 CTASSERT(sizeof(unsigned long) == sizeof(uintptr_t));
diff --git a/sys/compat/linuxkpi/common/src/linux_schedule.c b/sys/compat/linuxkpi/common/src/linux_schedule.c
index 3349a4aa8d72..fa20a11f5ec7 100644
--- a/sys/compat/linuxkpi/common/src/linux_schedule.c
+++ b/sys/compat/linuxkpi/common/src/linux_schedule.c
@@ -1,423 +1,417 @@
 /*-
  * Copyright (c) 2017 Mark Johnston <markj@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conds
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conds, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conds and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/proc.h>
 #include <sys/signalvar.h>
 #include <sys/sleepqueue.h>
 
 #include <linux/delay.h>
 #include <linux/errno.h>
 #include <linux/kernel.h>
 #include <linux/list.h>
 #include <linux/sched.h>
 #include <linux/spinlock.h>
 #include <linux/wait.h>
 
 static int
 linux_add_to_sleepqueue(void *wchan, struct task_struct *task,
     const char *wmesg, int timeout, int state)
 {
 	int flags, ret;
 
 	MPASS((state & ~(TASK_PARKED | TASK_NORMAL)) == 0);
 
 	flags = SLEEPQ_SLEEP | ((state & TASK_INTERRUPTIBLE) != 0 ?
 	    SLEEPQ_INTERRUPTIBLE : 0);
 
 	sleepq_add(wchan, NULL, wmesg, flags, 0);
 	if (timeout != 0)
 		sleepq_set_timeout(wchan, timeout);
 
 	DROP_GIANT();
 	if ((state & TASK_INTERRUPTIBLE) != 0) {
 		if (timeout == 0)
 			ret = -sleepq_wait_sig(wchan, 0);
 		else
 			ret = -sleepq_timedwait_sig(wchan, 0);
 	} else {
 		if (timeout == 0) {
 			sleepq_wait(wchan, 0);
 			ret = 0;
 		} else
 			ret = -sleepq_timedwait(wchan, 0);
 	}
 	PICKUP_GIANT();
 
 	/* filter return value */
 	if (ret != 0 && ret != -EWOULDBLOCK) {
 		linux_schedule_save_interrupt_value(task, ret);
 		ret = -ERESTARTSYS;
 	}
 	return (ret);
 }
 
 unsigned int
 linux_msleep_interruptible(unsigned int ms)
 {
 	int ret;
 
 	/* guard against invalid values */
 	if (ms == 0)
 		ms = 1;
 	ret = -pause_sbt("lnxsleep", mstosbt(ms), 0, C_HARDCLOCK | C_CATCH);
 
 	switch (ret) {
 	case -EWOULDBLOCK:
 		return (0);
 	default:
 		linux_schedule_save_interrupt_value(current, ret);
 		return (ms);
 	}
 }
 
 static int
 wake_up_task(struct task_struct *task, unsigned int state)
 {
-	int ret, wakeup_swapper;
+	int ret;
 
-	ret = wakeup_swapper = 0;
+	ret = 0;
 	sleepq_lock(task);
 	if ((atomic_read(&task->state) & state) != 0) {
 		set_task_state(task, TASK_WAKING);
-		wakeup_swapper = sleepq_signal(task, SLEEPQ_SLEEP, 0, 0);
+		sleepq_signal(task, SLEEPQ_SLEEP, 0, 0);
 		ret = 1;
 	}
 	sleepq_release(task);
-	if (wakeup_swapper)
-		kick_proc0();
 	return (ret);
 }
 
 bool
 linux_signal_pending(struct task_struct *task)
 {
 	struct thread *td;
 	sigset_t pending;
 
 	td = task->task_thread;
 	PROC_LOCK(td->td_proc);
 	pending = td->td_siglist;
 	SIGSETOR(pending, td->td_proc->p_siglist);
 	SIGSETNAND(pending, td->td_sigmask);
 	PROC_UNLOCK(td->td_proc);
 	return (!SIGISEMPTY(pending));
 }
 
 bool
 linux_fatal_signal_pending(struct task_struct *task)
 {
 	struct thread *td;
 	bool ret;
 
 	td = task->task_thread;
 	PROC_LOCK(td->td_proc);
 	ret = SIGISMEMBER(td->td_siglist, SIGKILL) ||
 	    SIGISMEMBER(td->td_proc->p_siglist, SIGKILL);
 	PROC_UNLOCK(td->td_proc);
 	return (ret);
 }
 
 bool
 linux_signal_pending_state(long state, struct task_struct *task)
 {
 
 	MPASS((state & ~TASK_NORMAL) == 0);
 
 	if ((state & TASK_INTERRUPTIBLE) == 0)
 		return (false);
 	return (linux_signal_pending(task));
 }
 
 void
 linux_send_sig(int signo, struct task_struct *task)
 {
 	struct thread *td;
 
 	td = task->task_thread;
 	PROC_LOCK(td->td_proc);
 	tdsignal(td, signo);
 	PROC_UNLOCK(td->td_proc);
 }
 
 int
 autoremove_wake_function(wait_queue_t *wq, unsigned int state, int flags,
     void *key __unused)
 {
 	struct task_struct *task;
 	int ret;
 
 	task = wq->private;
 	if ((ret = wake_up_task(task, state)) != 0)
 		list_del_init(&wq->task_list);
 	return (ret);
 }
 
 int
 default_wake_function(wait_queue_t *wq, unsigned int state, int flags,
     void *key __unused)
 {
 	return (wake_up_task(wq->private, state));
 }
 
 void
 linux_init_wait_entry(wait_queue_t *wq, int flags)
 {
 
 	memset(wq, 0, sizeof(*wq));
 	wq->flags = flags;
 	wq->private = current;
 	wq->func = autoremove_wake_function;
 	INIT_LIST_HEAD(&wq->task_list);
 }
 
 void
 linux_wake_up(wait_queue_head_t *wqh, unsigned int state, int nr, bool locked)
 {
 	wait_queue_t *pos, *next;
 
 	if (!locked)
 		spin_lock(&wqh->lock);
 	list_for_each_entry_safe(pos, next, &wqh->task_list, task_list) {
 		if (pos->func == NULL) {
 			if (wake_up_task(pos->private, state) != 0 && --nr == 0)
 				break;
 		} else {
 			if (pos->func(pos, state, 0, NULL) != 0 && --nr == 0)
 				break;
 		}
 	}
 	if (!locked)
 		spin_unlock(&wqh->lock);
 }
 
 void
 linux_prepare_to_wait(wait_queue_head_t *wqh, wait_queue_t *wq, int state)
 {
 
 	spin_lock(&wqh->lock);
 	if (list_empty(&wq->task_list))
 		__add_wait_queue(wqh, wq);
 	set_task_state(current, state);
 	spin_unlock(&wqh->lock);
 }
 
 void
 linux_finish_wait(wait_queue_head_t *wqh, wait_queue_t *wq)
 {
 
 	spin_lock(&wqh->lock);
 	set_task_state(current, TASK_RUNNING);
 	if (!list_empty(&wq->task_list)) {
 		__remove_wait_queue(wqh, wq);
 		INIT_LIST_HEAD(&wq->task_list);
 	}
 	spin_unlock(&wqh->lock);
 }
 
 bool
 linux_waitqueue_active(wait_queue_head_t *wqh)
 {
 	bool ret;
 
 	spin_lock(&wqh->lock);
 	ret = !list_empty(&wqh->task_list);
 	spin_unlock(&wqh->lock);
 	return (ret);
 }
 
 int
 linux_wait_event_common(wait_queue_head_t *wqh, wait_queue_t *wq, int timeout,
     unsigned int state, spinlock_t *lock)
 {
 	struct task_struct *task;
 	int ret;
 
 	if (lock != NULL)
 		spin_unlock_irq(lock);
 
 	/* range check timeout */
 	if (timeout < 1)
 		timeout = 1;
 	else if (timeout == MAX_SCHEDULE_TIMEOUT)
 		timeout = 0;
 
 	task = current;
 
 	sleepq_lock(task);
 	if (atomic_read(&task->state) != TASK_WAKING) {
 		ret = linux_add_to_sleepqueue(task, task, "wevent", timeout,
 		    state);
 	} else {
 		sleepq_release(task);
 		ret = 0;
 	}
 
 	if (lock != NULL)
 		spin_lock_irq(lock);
 	return (ret);
 }
 
 int
 linux_schedule_timeout(int timeout)
 {
 	struct task_struct *task;
 	int ret;
 	int state;
 	int remainder;
 
 	task = current;
 
 	/* range check timeout */
 	if (timeout < 1)
 		timeout = 1;
 	else if (timeout == MAX_SCHEDULE_TIMEOUT)
 		timeout = 0;
 
 	remainder = ticks + timeout;
 
 	sleepq_lock(task);
 	state = atomic_read(&task->state);
 	if (state != TASK_WAKING) {
 		ret = linux_add_to_sleepqueue(task, task, "sched", timeout,
 		    state);
 	} else {
 		sleepq_release(task);
 		ret = 0;
 	}
 	set_task_state(task, TASK_RUNNING);
 
 	if (timeout == 0)
 		return (MAX_SCHEDULE_TIMEOUT);
 
 	/* range check return value */
 	remainder -= ticks;
 
 	/* range check return value */
 	if (ret == -ERESTARTSYS && remainder < 1)
 		remainder = 1;
 	else if (remainder < 0)
 		remainder = 0;
 	else if (remainder > timeout)
 		remainder = timeout;
 	return (remainder);
 }
 
 static void
 wake_up_sleepers(void *wchan)
 {
-	int wakeup_swapper;
-
 	sleepq_lock(wchan);
-	wakeup_swapper = sleepq_signal(wchan, SLEEPQ_SLEEP, 0, 0);
+	sleepq_signal(wchan, SLEEPQ_SLEEP, 0, 0);
 	sleepq_release(wchan);
-	if (wakeup_swapper)
-		kick_proc0();
 }
 
 #define	bit_to_wchan(word, bit)	((void *)(((uintptr_t)(word) << 6) | (bit)))
 
 void
 linux_wake_up_bit(void *word, int bit)
 {
 
 	wake_up_sleepers(bit_to_wchan(word, bit));
 }
 
 int
 linux_wait_on_bit_timeout(unsigned long *word, int bit, unsigned int state,
     int timeout)
 {
 	struct task_struct *task;
 	void *wchan;
 	int ret;
 
 	/* range check timeout */
 	if (timeout < 1)
 		timeout = 1;
 	else if (timeout == MAX_SCHEDULE_TIMEOUT)
 		timeout = 0;
 
 	task = current;
 	wchan = bit_to_wchan(word, bit);
 	for (;;) {
 		sleepq_lock(wchan);
 		if ((*word & (1 << bit)) == 0) {
 			sleepq_release(wchan);
 			ret = 0;
 			break;
 		}
 		set_task_state(task, state);
 		ret = linux_add_to_sleepqueue(wchan, task, "wbit", timeout,
 		    state);
 		if (ret != 0)
 			break;
 	}
 	set_task_state(task, TASK_RUNNING);
 
 	return (ret);
 }
 
 void
 linux_wake_up_atomic_t(atomic_t *a)
 {
 
 	wake_up_sleepers(a);
 }
 
 int
 linux_wait_on_atomic_t(atomic_t *a, unsigned int state)
 {
 	struct task_struct *task;
 	void *wchan;
 	int ret;
 
 	task = current;
 	wchan = a;
 	for (;;) {
 		sleepq_lock(wchan);
 		if (atomic_read(a) == 0) {
 			sleepq_release(wchan);
 			ret = 0;
 			break;
 		}
 		set_task_state(task, state);
 		ret = linux_add_to_sleepqueue(wchan, task, "watomic", 0, state);
 		if (ret != 0)
 			break;
 	}
 	set_task_state(task, TASK_RUNNING);
 
 	return (ret);
 }
 
 bool
 linux_wake_up_state(struct task_struct *task, unsigned int state)
 {
 
 	return (wake_up_task(task, state) != 0);
 }
diff --git a/sys/dev/qat/qat_common/adf_aer.c b/sys/dev/qat/qat_common/adf_aer.c
index 7fdeba873420..6be8ab04bbd7 100644
--- a/sys/dev/qat/qat_common/adf_aer.c
+++ b/sys/dev/qat/qat_common/adf_aer.c
@@ -1,339 +1,335 @@
 /* SPDX-License-Identifier: BSD-3-Clause */
 /* Copyright(c) 2007-2022 Intel Corporation */
 #include "qat_freebsd.h"
 #include "adf_cfg.h"
 #include "adf_common_drv.h"
 #include "adf_accel_devices.h"
 #include "icp_qat_uclo.h"
 #include "icp_qat_fw.h"
 #include "icp_qat_fw_init_admin.h"
 #include "adf_cfg_strings.h"
 #include "adf_transport_access_macros.h"
 #include "adf_transport_internal.h"
 #include <sys/bus.h>
 #include <dev/pci/pcireg.h>
 #include <dev/pci/pcivar.h>
 #include <sys/systm.h>
 
 #define ADF_PPAERUCM_MASK (BIT(14) | BIT(20) | BIT(22))
 
 static struct workqueue_struct *fatal_error_wq;
 struct adf_fatal_error_data {
 	struct adf_accel_dev *accel_dev;
 	struct work_struct work;
 };
 
 static struct workqueue_struct *device_reset_wq;
 
 void
 linux_complete_common(struct completion *c, int all)
 {
-	int wakeup_swapper;
-
 	sleepq_lock(c);
 	c->done++;
 	if (all)
-		wakeup_swapper = sleepq_broadcast(c, SLEEPQ_SLEEP, 0, 0);
+		sleepq_broadcast(c, SLEEPQ_SLEEP, 0, 0);
 	else
-		wakeup_swapper = sleepq_signal(c, SLEEPQ_SLEEP, 0, 0);
+		sleepq_signal(c, SLEEPQ_SLEEP, 0, 0);
 	sleepq_release(c);
-	if (wakeup_swapper)
-		kick_proc0();
 }
 
 /* reset dev data */
 struct adf_reset_dev_data {
 	int mode;
 	struct adf_accel_dev *accel_dev;
 	struct completion compl;
 	struct work_struct reset_work;
 };
 
 int
 adf_aer_store_ppaerucm_reg(device_t dev, struct adf_hw_device_data *hw_data)
 {
 	unsigned int aer_offset, reg_val = 0;
 
 	if (!hw_data)
 		return -EINVAL;
 
 	if (pci_find_extcap(dev, PCIZ_AER, &aer_offset) == 0) {
 		reg_val =
 		    pci_read_config(dev, aer_offset + PCIR_AER_UC_MASK, 4);
 
 		hw_data->aerucm_mask = reg_val;
 	} else {
 		device_printf(dev,
 			      "Unable to find AER capability of the device\n");
 		return -ENODEV;
 	}
 
 	return 0;
 }
 
 void
 adf_reset_sbr(struct adf_accel_dev *accel_dev)
 {
 	device_t pdev = accel_to_pci_dev(accel_dev);
 	device_t parent = device_get_parent(device_get_parent(pdev));
 	uint16_t bridge_ctl = 0;
 
 	if (accel_dev->is_vf)
 		return;
 
 	if (!parent)
 		parent = pdev;
 
 	if (!pcie_wait_for_pending_transactions(pdev, 0))
 		device_printf(GET_DEV(accel_dev),
 			      "Transaction still in progress. Proceeding\n");
 
 	device_printf(GET_DEV(accel_dev), "Secondary bus reset\n");
 
 	pci_save_state(pdev);
 	bridge_ctl = pci_read_config(parent, PCIR_BRIDGECTL_1, 2);
 	bridge_ctl |= PCIB_BCR_SECBUS_RESET;
 	pci_write_config(parent, PCIR_BRIDGECTL_1, bridge_ctl, 2);
 	pause_ms("adfrst", 100);
 	bridge_ctl &= ~PCIB_BCR_SECBUS_RESET;
 	pci_write_config(parent, PCIR_BRIDGECTL_1, bridge_ctl, 2);
 	pause_ms("adfrst", 100);
 	pci_restore_state(pdev);
 }
 
 void
 adf_reset_flr(struct adf_accel_dev *accel_dev)
 {
 	device_t pdev = accel_to_pci_dev(accel_dev);
 
 	pci_save_state(pdev);
 	if (pcie_flr(pdev,
 		     max(pcie_get_max_completion_timeout(pdev) / 1000, 10),
 		     true)) {
 		pci_restore_state(pdev);
 		return;
 	}
 	pci_restore_state(pdev);
 	device_printf(GET_DEV(accel_dev),
 		      "FLR qat_dev%d failed trying secondary bus reset\n",
 		      accel_dev->accel_id);
 	adf_reset_sbr(accel_dev);
 }
 
 void
 adf_dev_pre_reset(struct adf_accel_dev *accel_dev)
 {
 	struct adf_hw_device_data *hw_device = accel_dev->hw_device;
 	device_t pdev = accel_to_pci_dev(accel_dev);
 	u32 aer_offset, reg_val = 0;
 
 	if (pci_find_extcap(pdev, PCIZ_AER, &aer_offset) == 0) {
 		reg_val =
 		    pci_read_config(pdev, aer_offset + PCIR_AER_UC_MASK, 4);
 		reg_val |= ADF_PPAERUCM_MASK;
 		pci_write_config(pdev,
 				 aer_offset + PCIR_AER_UC_MASK,
 				 reg_val,
 				 4);
 	} else {
 		device_printf(pdev,
 			      "Unable to find AER capability of the device\n");
 	}
 
 	if (hw_device->disable_arb) {
 		device_printf(GET_DEV(accel_dev), "Disable arbiter.\n");
 		hw_device->disable_arb(accel_dev);
 	}
 }
 
 void
 adf_dev_post_reset(struct adf_accel_dev *accel_dev)
 {
 	struct adf_hw_device_data *hw_device = accel_dev->hw_device;
 	device_t pdev = accel_to_pci_dev(accel_dev);
 	u32 aer_offset;
 
 	if (pci_find_extcap(pdev, PCIZ_AER, &aer_offset) == 0) {
 		pci_write_config(pdev,
 				 aer_offset + PCIR_AER_UC_MASK,
 				 hw_device->aerucm_mask,
 				 4);
 	} else {
 		device_printf(pdev,
 			      "Unable to find AER capability of the device\n");
 	}
 }
 
 void
 adf_dev_restore(struct adf_accel_dev *accel_dev)
 {
 	struct adf_hw_device_data *hw_device = accel_dev->hw_device;
 	device_t pdev = accel_to_pci_dev(accel_dev);
 
 	if (hw_device->pre_reset) {
 		dev_dbg(GET_DEV(accel_dev), "Performing pre reset save\n");
 		hw_device->pre_reset(accel_dev);
 	}
 
 	if (hw_device->reset_device) {
 		device_printf(GET_DEV(accel_dev),
 			      "Resetting device qat_dev%d\n",
 			      accel_dev->accel_id);
 		hw_device->reset_device(accel_dev);
 		pci_restore_state(pdev);
 		pci_save_state(pdev);
 	}
 
 	if (hw_device->post_reset) {
 		dev_dbg(GET_DEV(accel_dev), "Performing post reset restore\n");
 		hw_device->post_reset(accel_dev);
 	}
 }
 
 static void
 adf_device_reset_worker(struct work_struct *work)
 {
 	struct adf_reset_dev_data *reset_data =
 	    container_of(work, struct adf_reset_dev_data, reset_work);
 	struct adf_accel_dev *accel_dev = reset_data->accel_dev;
 
 	if (adf_dev_restarting_notify(accel_dev)) {
 		device_printf(GET_DEV(accel_dev),
 			      "Unable to send RESTARTING notification.\n");
 		return;
 	}
 
 	if (adf_dev_stop(accel_dev)) {
 		device_printf(GET_DEV(accel_dev), "Stopping device failed.\n");
 		return;
 	}
 
 	adf_dev_shutdown(accel_dev);
 
 	if (adf_dev_init(accel_dev) || adf_dev_start(accel_dev)) {
 		/* The device hanged and we can't restart it */
 		/* so stop here */
 		device_printf(GET_DEV(accel_dev), "Restart device failed\n");
 		if (reset_data->mode == ADF_DEV_RESET_ASYNC)
 			kfree(reset_data);
 		WARN(1, "QAT: device restart failed. Device is unusable\n");
 		return;
 	}
 
 	adf_dev_restarted_notify(accel_dev);
 	clear_bit(ADF_STATUS_RESTARTING, &accel_dev->status);
 
 	/* The dev is back alive. Notify the caller if in sync mode */
 	if (reset_data->mode == ADF_DEV_RESET_SYNC)
 		complete(&reset_data->compl);
 	else
 		kfree(reset_data);
 }
 
 int
 adf_dev_aer_schedule_reset(struct adf_accel_dev *accel_dev,
 			   enum adf_dev_reset_mode mode)
 {
 	struct adf_reset_dev_data *reset_data;
 	if (!adf_dev_started(accel_dev) ||
 	    test_bit(ADF_STATUS_RESTARTING, &accel_dev->status))
 		return 0;
 	set_bit(ADF_STATUS_RESTARTING, &accel_dev->status);
 	reset_data = kzalloc(sizeof(*reset_data), GFP_ATOMIC);
 	if (!reset_data)
 		return -ENOMEM;
 	reset_data->accel_dev = accel_dev;
 	init_completion(&reset_data->compl);
 	reset_data->mode = mode;
 	INIT_WORK(&reset_data->reset_work, adf_device_reset_worker);
 	queue_work(device_reset_wq, &reset_data->reset_work);
 	/* If in sync mode wait for the result */
 	if (mode == ADF_DEV_RESET_SYNC) {
 		int ret = 0;
 		/* Maximum device reset time is 10 seconds */
 		unsigned long wait_jiffies = msecs_to_jiffies(10000);
 		unsigned long timeout =
 		    wait_for_completion_timeout(&reset_data->compl,
 						wait_jiffies);
 		if (!timeout) {
 			device_printf(GET_DEV(accel_dev),
 				      "Reset device timeout expired\n");
 			ret = -EFAULT;
 		}
 		kfree(reset_data);
 		return ret;
 	}
 	return 0;
 }
 
 int
 adf_dev_autoreset(struct adf_accel_dev *accel_dev)
 {
 	if (accel_dev->autoreset_on_error)
 		return adf_dev_reset(accel_dev, ADF_DEV_RESET_ASYNC);
 	return 0;
 }
 
 static void
 adf_notify_fatal_error_work(struct work_struct *work)
 {
 	struct adf_fatal_error_data *wq_data =
 	    container_of(work, struct adf_fatal_error_data, work);
 	struct adf_accel_dev *accel_dev = wq_data->accel_dev;
 
 	adf_error_notifier((uintptr_t)accel_dev);
 	if (!accel_dev->is_vf) {
 		adf_dev_autoreset(accel_dev);
 	}
 
 	kfree(wq_data);
 }
 
 int
 adf_notify_fatal_error(struct adf_accel_dev *accel_dev)
 {
 	struct adf_fatal_error_data *wq_data;
 
 	wq_data = kzalloc(sizeof(*wq_data), GFP_ATOMIC);
 	if (!wq_data) {
 		device_printf(GET_DEV(accel_dev),
 			      "Failed to allocate memory\n");
 		return ENOMEM;
 	}
 	wq_data->accel_dev = accel_dev;
 
 	INIT_WORK(&wq_data->work, adf_notify_fatal_error_work);
 	queue_work(fatal_error_wq, &wq_data->work);
 
 	return 0;
 }
 
 int __init
 adf_init_fatal_error_wq(void)
 {
 	fatal_error_wq = create_workqueue("qat_fatal_error_wq");
 	return !fatal_error_wq ? EFAULT : 0;
 }
 
 void
 adf_exit_fatal_error_wq(void)
 {
 	if (fatal_error_wq)
 		destroy_workqueue(fatal_error_wq);
 	fatal_error_wq = NULL;
 }
 
 int
 adf_init_aer(void)
 {
 	device_reset_wq = create_workqueue("qat_device_reset_wq");
 	return !device_reset_wq ? -EFAULT : 0;
 }
 
 void
 adf_exit_aer(void)
 {
 	if (device_reset_wq)
 		destroy_workqueue(device_reset_wq);
 	device_reset_wq = NULL;
 }
diff --git a/sys/kern/kern_condvar.c b/sys/kern/kern_condvar.c
index a55863595ce2..517b83d90243 100644
--- a/sys/kern/kern_condvar.c
+++ b/sys/kern/kern_condvar.c
@@ -1,484 +1,475 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause
  *
  * Copyright (c) 2000 Jake Burkholder <jake@freebsd.org>.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 #include "opt_ktrace.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/limits.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/ktrace.h>
 #include <sys/condvar.h>
 #include <sys/sched.h>
 #include <sys/signalvar.h>
 #include <sys/sleepqueue.h>
 #include <sys/resourcevar.h>
 #ifdef KTRACE
 #include <sys/uio.h>
 #include <sys/user.h>
 #endif
 
 /*
  * A bound below which cv_waiters is valid.  Once cv_waiters reaches this bound,
  * cv_signal must manually check the wait queue for threads.
  */
 #define	CV_WAITERS_BOUND	INT_MAX
 
 #define	CV_WAITERS_INC(cvp) do {					\
 	if ((cvp)->cv_waiters < CV_WAITERS_BOUND)			\
 		(cvp)->cv_waiters++;					\
 } while (0)
 
 /*
  * Common sanity checks for cv_wait* functions.
  */
 #define	CV_ASSERT(cvp, lock, td) do {					\
 	KASSERT((td) != NULL, ("%s: td NULL", __func__));		\
 	KASSERT(TD_IS_RUNNING(td), ("%s: not TDS_RUNNING", __func__));	\
 	KASSERT((cvp) != NULL, ("%s: cvp NULL", __func__));		\
 	KASSERT((lock) != NULL, ("%s: lock NULL", __func__));		\
 } while (0)
 
 /*
  * Initialize a condition variable.  Must be called before use.
  */
 void
 cv_init(struct cv *cvp, const char *desc)
 {
 
 	cvp->cv_description = desc;
 	cvp->cv_waiters = 0;
 }
 
 /*
  * Destroy a condition variable.  The condition variable must be re-initialized
  * in order to be re-used.
  */
 void
 cv_destroy(struct cv *cvp)
 {
 #ifdef INVARIANTS
 	struct sleepqueue *sq;
 
 	sleepq_lock(cvp);
 	sq = sleepq_lookup(cvp);
 	sleepq_release(cvp);
 	KASSERT(sq == NULL, ("%s: associated sleep queue non-empty", __func__));
 #endif
 }
 
 /*
  * Wait on a condition variable.  The current thread is placed on the condition
  * variable's wait queue and suspended.  A cv_signal or cv_broadcast on the same
  * condition variable will resume the thread.  The mutex is released before
  * sleeping and will be held on return.  It is recommended that the mutex be
  * held when cv_signal or cv_broadcast are called.
  */
 void
 _cv_wait(struct cv *cvp, struct lock_object *lock)
 {
 	WITNESS_SAVE_DECL(lock_witness);
 #ifdef KTRACE
 	char wmesg[WMESGLEN + 1];
 #endif
 	struct lock_class *class;
 	struct thread *td __ktrace_used;
 	uintptr_t lock_state;
 
 	td = curthread;
 	CV_ASSERT(cvp, lock, td);
 	WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, lock,
 	    "Waiting on \"%s\"", cvp->cv_description);
 
 	if (SCHEDULER_STOPPED())
 		return;
 
 #ifdef KTRACE
 	if (KTRPOINT(td, KTR_CSW)) {
 		strlcpy(wmesg, cv_wmesg(cvp), sizeof(wmesg));
 		ktrcsw(1, 0, wmesg);
 	} else {
 		wmesg[0] = '\0';
 	}
 #endif
 
 	class = LOCK_CLASS(lock);
 	lock_state = 0;
 	sleepq_lock(cvp);
 
 	CV_WAITERS_INC(cvp);
 	if (lock == &Giant.lock_object)
 		mtx_assert(&Giant, MA_OWNED);
 	DROP_GIANT();
 
 	sleepq_add(cvp, lock, cvp->cv_description, SLEEPQ_CONDVAR, 0);
 	if (lock != &Giant.lock_object) {
 		if (class->lc_flags & LC_SLEEPABLE)
 			sleepq_release(cvp);
 		WITNESS_SAVE(lock, lock_witness);
 		lock_state = class->lc_unlock(lock);
 		if (class->lc_flags & LC_SLEEPABLE)
 			sleepq_lock(cvp);
 	}
 	sleepq_wait(cvp, 0);
 
 #ifdef KTRACE
 	if (KTRPOINT(td, KTR_CSW))
 		ktrcsw(0, 0, wmesg);
 #endif
 	PICKUP_GIANT();
 	if (lock != &Giant.lock_object) {
 		class->lc_lock(lock, lock_state);
 		WITNESS_RESTORE(lock, lock_witness);
 	}
 }
 
 /*
  * Wait on a condition variable.  This function differs from cv_wait by
  * not acquiring the mutex after condition variable was signaled.
  */
 void
 _cv_wait_unlock(struct cv *cvp, struct lock_object *lock)
 {
 #ifdef KTRACE
 	char wmesg[WMESGLEN + 1];
 #endif
 	struct lock_class *class;
 	struct thread *td __ktrace_used;
 
 	td = curthread;
 	CV_ASSERT(cvp, lock, td);
 	WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, lock,
 	    "Waiting on \"%s\"", cvp->cv_description);
 	KASSERT(lock != &Giant.lock_object,
 	    ("cv_wait_unlock cannot be used with Giant"));
 	class = LOCK_CLASS(lock);
 
 	if (SCHEDULER_STOPPED()) {
 		class->lc_unlock(lock);
 		return;
 	}
 
 #ifdef KTRACE
 	if (KTRPOINT(td, KTR_CSW)) {
 		strlcpy(wmesg, cv_wmesg(cvp), sizeof(wmesg));
 		ktrcsw(1, 0, wmesg);
 	} else {
 		wmesg[0] = '\0';
 	}
 #endif
 
 	sleepq_lock(cvp);
 
 	CV_WAITERS_INC(cvp);
 	DROP_GIANT();
 
 	sleepq_add(cvp, lock, cvp->cv_description, SLEEPQ_CONDVAR, 0);
 	if (class->lc_flags & LC_SLEEPABLE)
 		sleepq_release(cvp);
 	class->lc_unlock(lock);
 	if (class->lc_flags & LC_SLEEPABLE)
 		sleepq_lock(cvp);
 	sleepq_wait(cvp, 0);
 
 #ifdef KTRACE
 	if (KTRPOINT(td, KTR_CSW))
 		ktrcsw(0, 0, wmesg);
 #endif
 	PICKUP_GIANT();
 }
 
 /*
  * Wait on a condition variable, allowing interruption by signals.  Return 0 if
  * the thread was resumed with cv_signal or cv_broadcast, EINTR or ERESTART if
  * a signal was caught.  If ERESTART is returned the system call should be
  * restarted if possible.
  */
 int
 _cv_wait_sig(struct cv *cvp, struct lock_object *lock)
 {
 	WITNESS_SAVE_DECL(lock_witness);
 #ifdef KTRACE
 	char wmesg[WMESGLEN + 1];
 #endif
 	struct lock_class *class;
 	struct thread *td __ktrace_used;
 	uintptr_t lock_state;
 	int rval;
 
 	td = curthread;
 	CV_ASSERT(cvp, lock, td);
 	WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, lock,
 	    "Waiting on \"%s\"", cvp->cv_description);
 
 	if (SCHEDULER_STOPPED())
 		return (0);
 
 #ifdef KTRACE
 	if (KTRPOINT(td, KTR_CSW)) {
 		strlcpy(wmesg, cv_wmesg(cvp), sizeof(wmesg));
 		ktrcsw(1, 0, wmesg);
 	} else {
 		wmesg[0] = '\0';
 	}
 #endif
 
 	class = LOCK_CLASS(lock);
 	lock_state = 0;
 	sleepq_lock(cvp);
 
 	CV_WAITERS_INC(cvp);
 	if (lock == &Giant.lock_object)
 		mtx_assert(&Giant, MA_OWNED);
 	DROP_GIANT();
 
 	sleepq_add(cvp, lock, cvp->cv_description, SLEEPQ_CONDVAR |
 	    SLEEPQ_INTERRUPTIBLE, 0);
 	if (lock != &Giant.lock_object) {
 		if (class->lc_flags & LC_SLEEPABLE)
 			sleepq_release(cvp);
 		WITNESS_SAVE(lock, lock_witness);
 		lock_state = class->lc_unlock(lock);
 		if (class->lc_flags & LC_SLEEPABLE)
 			sleepq_lock(cvp);
 	}
 	rval = sleepq_wait_sig(cvp, 0);
 
 #ifdef KTRACE
 	if (KTRPOINT(td, KTR_CSW))
 		ktrcsw(0, 0, wmesg);
 #endif
 	PICKUP_GIANT();
 	if (lock != &Giant.lock_object) {
 		class->lc_lock(lock, lock_state);
 		WITNESS_RESTORE(lock, lock_witness);
 	}
 
 	return (rval);
 }
 
 /*
  * Wait on a condition variable for (at most) the value specified in sbt
  * argument. Returns 0 if the process was resumed by cv_signal or cv_broadcast,
  * EWOULDBLOCK if the timeout expires.
  */
 int
 _cv_timedwait_sbt(struct cv *cvp, struct lock_object *lock, sbintime_t sbt,
     sbintime_t pr, int flags)
 {
 	WITNESS_SAVE_DECL(lock_witness);
 #ifdef KTRACE
 	char wmesg[WMESGLEN + 1];
 #endif
 	struct lock_class *class;
 	struct thread *td __ktrace_used;
 	int lock_state, rval;
 
 	td = curthread;
 	CV_ASSERT(cvp, lock, td);
 	WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, lock,
 	    "Waiting on \"%s\"", cvp->cv_description);
 
 	if (SCHEDULER_STOPPED())
 		return (0);
 
 #ifdef KTRACE
 	if (KTRPOINT(td, KTR_CSW)) {
 		strlcpy(wmesg, cv_wmesg(cvp), sizeof(wmesg));
 		ktrcsw(1, 0, wmesg);
 	} else {
 		wmesg[0] = '\0';
 	}
 #endif
 
 	class = LOCK_CLASS(lock);
 	lock_state = 0;
 	sleepq_lock(cvp);
 
 	CV_WAITERS_INC(cvp);
 	if (lock == &Giant.lock_object)
 		mtx_assert(&Giant, MA_OWNED);
 	DROP_GIANT();
 
 	sleepq_add(cvp, lock, cvp->cv_description, SLEEPQ_CONDVAR, 0);
 	sleepq_set_timeout_sbt(cvp, sbt, pr, flags);
 	if (lock != &Giant.lock_object) {
 		if (class->lc_flags & LC_SLEEPABLE)
 			sleepq_release(cvp);
 		WITNESS_SAVE(lock, lock_witness);
 		lock_state = class->lc_unlock(lock);
 		if (class->lc_flags & LC_SLEEPABLE)
 			sleepq_lock(cvp);
 	}
 	rval = sleepq_timedwait(cvp, 0);
 
 #ifdef KTRACE
 	if (KTRPOINT(td, KTR_CSW))
 		ktrcsw(0, 0, wmesg);
 #endif
 	PICKUP_GIANT();
 	if (lock != &Giant.lock_object) {
 		class->lc_lock(lock, lock_state);
 		WITNESS_RESTORE(lock, lock_witness);
 	}
 
 	return (rval);
 }
 
 /*
  * Wait on a condition variable for (at most) the value specified in sbt 
  * argument, allowing interruption by signals.
  * Returns 0 if the thread was resumed by cv_signal or cv_broadcast,
  * EWOULDBLOCK if the timeout expires, and EINTR or ERESTART if a signal
  * was caught.
  */
 int
 _cv_timedwait_sig_sbt(struct cv *cvp, struct lock_object *lock,
     sbintime_t sbt, sbintime_t pr, int flags)
 {
 	WITNESS_SAVE_DECL(lock_witness);
 #ifdef KTRACE
 	char wmesg[WMESGLEN + 1];
 #endif
 	struct lock_class *class;
 	struct thread *td __ktrace_used;
 	int lock_state, rval;
 
 	td = curthread;
 	CV_ASSERT(cvp, lock, td);
 	WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, lock,
 	    "Waiting on \"%s\"", cvp->cv_description);
 
 	if (SCHEDULER_STOPPED())
 		return (0);
 
 #ifdef KTRACE
 	if (KTRPOINT(td, KTR_CSW)) {
 		strlcpy(wmesg, cv_wmesg(cvp), sizeof(wmesg));
 		ktrcsw(1, 0, wmesg);
 	} else {
 		wmesg[0] = '\0';
 	}
 #endif
 
 	class = LOCK_CLASS(lock);
 	lock_state = 0;
 	sleepq_lock(cvp);
 
 	CV_WAITERS_INC(cvp);
 	if (lock == &Giant.lock_object)
 		mtx_assert(&Giant, MA_OWNED);
 	DROP_GIANT();
 
 	sleepq_add(cvp, lock, cvp->cv_description, SLEEPQ_CONDVAR |
 	    SLEEPQ_INTERRUPTIBLE, 0);
 	sleepq_set_timeout_sbt(cvp, sbt, pr, flags);
 	if (lock != &Giant.lock_object) {
 		if (class->lc_flags & LC_SLEEPABLE)
 			sleepq_release(cvp);
 		WITNESS_SAVE(lock, lock_witness);
 		lock_state = class->lc_unlock(lock);
 		if (class->lc_flags & LC_SLEEPABLE)
 			sleepq_lock(cvp);
 	}
 	rval = sleepq_timedwait_sig(cvp, 0);
 
 #ifdef KTRACE
 	if (KTRPOINT(td, KTR_CSW))
 		ktrcsw(0, 0, wmesg);
 #endif
 	PICKUP_GIANT();
 	if (lock != &Giant.lock_object) {
 		class->lc_lock(lock, lock_state);
 		WITNESS_RESTORE(lock, lock_witness);
 	}
 
 	return (rval);
 }
 
 /*
- * Signal a condition variable, wakes up one waiting thread.  Will also wakeup
- * the swapper if the process is not in memory, so that it can bring the
- * sleeping process in.  Note that this may also result in additional threads
- * being made runnable.  Should be called with the same mutex as was passed to
- * cv_wait held.
+ * Signal a condition variable, wakes up one waiting thread.  Note that this may
+ * also result in additional threads being made runnable.  Should be called with
+ * the same mutex as was passed to cv_wait held.
  */
 void
 cv_signal(struct cv *cvp)
 {
-
 	if (cvp->cv_waiters == 0)
 		return;
 	sleepq_lock(cvp);
 	if (cvp->cv_waiters == 0) {
 		sleepq_release(cvp);
 		return;
 	}
 	if (cvp->cv_waiters == CV_WAITERS_BOUND && sleepq_lookup(cvp) == NULL) {
 		cvp->cv_waiters = 0;
 		sleepq_release(cvp);
 	} else {
 		if (cvp->cv_waiters < CV_WAITERS_BOUND)
 			cvp->cv_waiters--;
-		if (sleepq_signal(cvp, SLEEPQ_CONDVAR | SLEEPQ_DROP, 0, 0))
-			kick_proc0();
+		sleepq_signal(cvp, SLEEPQ_CONDVAR | SLEEPQ_DROP, 0, 0);
 	}
 }
 
 /*
  * Broadcast a signal to a condition variable.  Wakes up all waiting threads.
  * Should be called with the same mutex as was passed to cv_wait held.
  */
 void
 cv_broadcastpri(struct cv *cvp, int pri)
 {
-	int wakeup_swapper;
-
 	if (cvp->cv_waiters == 0)
 		return;
 	/*
 	 * XXX sleepq_broadcast pri argument changed from -1 meaning
 	 * no pri to 0 meaning no pri.
 	 */
-	wakeup_swapper = 0;
 	if (pri == -1)
 		pri = 0;
 	sleepq_lock(cvp);
 	if (cvp->cv_waiters > 0) {
 		cvp->cv_waiters = 0;
-		wakeup_swapper = sleepq_broadcast(cvp, SLEEPQ_CONDVAR, pri, 0);
+		sleepq_broadcast(cvp, SLEEPQ_CONDVAR, pri, 0);
 	}
 	sleepq_release(cvp);
-	if (wakeup_swapper)
-		kick_proc0();
 }
diff --git a/sys/kern/kern_lock.c b/sys/kern/kern_lock.c
index 4700ee0f8f98..4771496f950a 100644
--- a/sys/kern/kern_lock.c
+++ b/sys/kern/kern_lock.c
@@ -1,1859 +1,1840 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause
  *
  * Copyright (c) 2008 Attilio Rao <attilio@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice(s), this list of conditions and the following disclaimer as
  *    the first lines of this file unmodified other than the possible
  *    addition of one or more copyright notices.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice(s), this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY
  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  * DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY
  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
  * DAMAGE.
  */
 
 #include "opt_ddb.h"
 #include "opt_hwpmc_hooks.h"
 
 #include <sys/param.h>
 #include <sys/kdb.h>
 #include <sys/ktr.h>
 #include <sys/limits.h>
 #include <sys/lock.h>
 #include <sys/lock_profile.h>
 #include <sys/lockmgr.h>
 #include <sys/lockstat.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/sleepqueue.h>
 #ifdef DEBUG_LOCKS
 #include <sys/stack.h>
 #endif
 #include <sys/sysctl.h>
 #include <sys/systm.h>
 
 #include <machine/cpu.h>
 
 #ifdef DDB
 #include <ddb/ddb.h>
 #endif
 
 #ifdef HWPMC_HOOKS
 #include <sys/pmckern.h>
 PMC_SOFT_DECLARE( , , lock, failed);
 #endif
 
 /*
  * Hack. There should be prio_t or similar so that this is not necessary.
  */
 _Static_assert((PRILASTFLAG * 2) - 1 <= USHRT_MAX,
     "prio flags wont fit in u_short pri in struct lock");
 
 CTASSERT(LK_UNLOCKED == (LK_UNLOCKED &
     ~(LK_ALL_WAITERS | LK_EXCLUSIVE_SPINNERS)));
 
 #define	SQ_EXCLUSIVE_QUEUE	0
 #define	SQ_SHARED_QUEUE		1
 
 #ifndef INVARIANTS
 #define	_lockmgr_assert(lk, what, file, line)
 #endif
 
 #define	TD_SLOCKS_INC(td)	((td)->td_lk_slocks++)
 #define	TD_SLOCKS_DEC(td)	((td)->td_lk_slocks--)
 
 #ifndef DEBUG_LOCKS
 #define	STACK_PRINT(lk)
 #define	STACK_SAVE(lk)
 #define	STACK_ZERO(lk)
 #else
 #define	STACK_PRINT(lk)	stack_print_ddb(&(lk)->lk_stack)
 #define	STACK_SAVE(lk)	stack_save(&(lk)->lk_stack)
 #define	STACK_ZERO(lk)	stack_zero(&(lk)->lk_stack)
 #endif
 
 #define	LOCK_LOG2(lk, string, arg1, arg2)				\
 	if (LOCK_LOG_TEST(&(lk)->lock_object, 0))			\
 		CTR2(KTR_LOCK, (string), (arg1), (arg2))
 #define	LOCK_LOG3(lk, string, arg1, arg2, arg3)				\
 	if (LOCK_LOG_TEST(&(lk)->lock_object, 0))			\
 		CTR3(KTR_LOCK, (string), (arg1), (arg2), (arg3))
 
 #define	GIANT_DECLARE							\
 	int _i = 0;							\
 	WITNESS_SAVE_DECL(Giant)
 #define	GIANT_RESTORE() do {						\
 	if (__predict_false(_i > 0)) {					\
 		while (_i--)						\
 			mtx_lock(&Giant);				\
 		WITNESS_RESTORE(&Giant.lock_object, Giant);		\
 	}								\
 } while (0)
 #define	GIANT_SAVE() do {						\
 	if (__predict_false(mtx_owned(&Giant))) {			\
 		WITNESS_SAVE(&Giant.lock_object, Giant);		\
 		while (mtx_owned(&Giant)) {				\
 			_i++;						\
 			mtx_unlock(&Giant);				\
 		}							\
 	}								\
 } while (0)
 
 static __always_inline bool
 LK_CAN_SHARE(uintptr_t x, int flags, bool fp)
 {
 
 	if ((x & (LK_SHARE | LK_EXCLUSIVE_WAITERS | LK_EXCLUSIVE_SPINNERS)) ==
 	    LK_SHARE)
 		return (true);
 	if (fp || (!(x & LK_SHARE)))
 		return (false);
 	if ((curthread->td_lk_slocks != 0 && !(flags & LK_NODDLKTREAT)) ||
 	    (curthread->td_pflags & TDP_DEADLKTREAT))
 		return (true);
 	return (false);
 }
 
 #define	LK_TRYOP(x)							\
 	((x) & LK_NOWAIT)
 
 #define	LK_CAN_WITNESS(x)						\
 	(((x) & LK_NOWITNESS) == 0 && !LK_TRYOP(x))
 #define	LK_TRYWIT(x)							\
 	(LK_TRYOP(x) ? LOP_TRYLOCK : 0)
 
 #define	lockmgr_xlocked_v(v)						\
 	(((v) & ~(LK_FLAGMASK & ~LK_SHARE)) == (uintptr_t)curthread)
 
 #define	lockmgr_xlocked(lk) lockmgr_xlocked_v(lockmgr_read_value(lk))
 
 static void	assert_lockmgr(const struct lock_object *lock, int how);
 #ifdef DDB
 static void	db_show_lockmgr(const struct lock_object *lock);
 #endif
 static void	lock_lockmgr(struct lock_object *lock, uintptr_t how);
 #ifdef KDTRACE_HOOKS
 static int	owner_lockmgr(const struct lock_object *lock,
 		    struct thread **owner);
 #endif
 static uintptr_t unlock_lockmgr(struct lock_object *lock);
 
 struct lock_class lock_class_lockmgr = {
 	.lc_name = "lockmgr",
 	.lc_flags = LC_RECURSABLE | LC_SLEEPABLE | LC_SLEEPLOCK | LC_UPGRADABLE,
 	.lc_assert = assert_lockmgr,
 #ifdef DDB
 	.lc_ddb_show = db_show_lockmgr,
 #endif
 	.lc_lock = lock_lockmgr,
 	.lc_unlock = unlock_lockmgr,
 #ifdef KDTRACE_HOOKS
 	.lc_owner = owner_lockmgr,
 #endif
 };
 
 static __read_mostly bool lk_adaptive = true;
 static SYSCTL_NODE(_debug, OID_AUTO, lockmgr, CTLFLAG_RD, NULL, "lockmgr debugging");
 SYSCTL_BOOL(_debug_lockmgr, OID_AUTO, adaptive_spinning, CTLFLAG_RW, &lk_adaptive,
     0, "");
 #define lockmgr_delay  locks_delay
 
 struct lockmgr_wait {
 	const char *iwmesg;
 	int ipri;
 	int itimo;
 };
 
 static __always_inline bool lockmgr_slock_try(struct lock *lk, uintptr_t *xp,
     int flags, bool fp);
 static __always_inline bool lockmgr_sunlock_try(struct lock *lk,
     uintptr_t *xp);
 
 static void
-lockmgr_exit(u_int flags, struct lock_object *ilk, int wakeup_swapper)
+lockmgr_exit(u_int flags, struct lock_object *ilk)
 {
 	struct lock_class *class;
 
 	if (flags & LK_INTERLOCK) {
 		class = LOCK_CLASS(ilk);
 		class->lc_unlock(ilk);
 	}
-
-	if (__predict_false(wakeup_swapper))
-		kick_proc0();
 }
 
 static void
 lockmgr_note_shared_acquire(struct lock *lk, int contested,
     uint64_t waittime, const char *file, int line, int flags)
 {
 
 	LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(lockmgr__acquire, lk, contested,
 	    waittime, file, line, LOCKSTAT_READER);
 	LOCK_LOG_LOCK("SLOCK", &lk->lock_object, 0, 0, file, line);
 	WITNESS_LOCK(&lk->lock_object, LK_TRYWIT(flags), file, line);
 	TD_LOCKS_INC(curthread);
 	TD_SLOCKS_INC(curthread);
 	STACK_SAVE(lk);
 }
 
 static void
 lockmgr_note_shared_release(struct lock *lk, const char *file, int line)
 {
 
 	WITNESS_UNLOCK(&lk->lock_object, 0, file, line);
 	LOCK_LOG_LOCK("SUNLOCK", &lk->lock_object, 0, 0, file, line);
 	TD_LOCKS_DEC(curthread);
 	TD_SLOCKS_DEC(curthread);
 }
 
 static void
 lockmgr_note_exclusive_acquire(struct lock *lk, int contested,
     uint64_t waittime, const char *file, int line, int flags)
 {
 
 	LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(lockmgr__acquire, lk, contested,
 	    waittime, file, line, LOCKSTAT_WRITER);
 	LOCK_LOG_LOCK("XLOCK", &lk->lock_object, 0, lk->lk_recurse, file, line);
 	WITNESS_LOCK(&lk->lock_object, LOP_EXCLUSIVE | LK_TRYWIT(flags), file,
 	    line);
 	TD_LOCKS_INC(curthread);
 	STACK_SAVE(lk);
 }
 
 static void
 lockmgr_note_exclusive_release(struct lock *lk, const char *file, int line)
 {
 
 	if (!lockmgr_disowned(lk)) {
 		WITNESS_UNLOCK(&lk->lock_object, LOP_EXCLUSIVE, file, line);
 		TD_LOCKS_DEC(curthread);
 	}
 	LOCK_LOG_LOCK("XUNLOCK", &lk->lock_object, 0, lk->lk_recurse, file,
 	    line);
 }
 
 static __inline struct thread *
 lockmgr_xholder(const struct lock *lk)
 {
 	uintptr_t x;
 
 	x = lockmgr_read_value(lk);
 	return ((x & LK_SHARE) ? NULL : (struct thread *)LK_HOLDER(x));
 }
 
 /*
  * It assumes sleepq_lock held and returns with this one unheld.
  * It also assumes the generic interlock is sane and previously checked.
  * If LK_INTERLOCK is specified the interlock is not reacquired after the
  * sleep.
  */
 static __inline int
 sleeplk(struct lock *lk, u_int flags, struct lock_object *ilk,
     const char *wmesg, int pri, int timo, int queue)
 {
 	GIANT_DECLARE;
 	struct lock_class *class;
 	int catch, error;
 
 	class = (flags & LK_INTERLOCK) ? LOCK_CLASS(ilk) : NULL;
 	catch = pri & PCATCH;
 	pri &= PRIMASK;
 	error = 0;
 
 	LOCK_LOG3(lk, "%s: %p blocking on the %s sleepqueue", __func__, lk,
 	    (queue == SQ_EXCLUSIVE_QUEUE) ? "exclusive" : "shared");
 
 	if (flags & LK_INTERLOCK)
 		class->lc_unlock(ilk);
 	if (queue == SQ_EXCLUSIVE_QUEUE && (flags & LK_SLEEPFAIL) != 0) {
 		if (lk->lk_exslpfail < USHRT_MAX)
 			lk->lk_exslpfail++;
 	}
 	GIANT_SAVE();
 	sleepq_add(&lk->lock_object, NULL, wmesg, SLEEPQ_LK | (catch ?
 	    SLEEPQ_INTERRUPTIBLE : 0), queue);
 	if ((flags & LK_TIMELOCK) && timo)
 		sleepq_set_timeout(&lk->lock_object, timo);
 
 	/*
 	 * Decisional switch for real sleeping.
 	 */
 	if ((flags & LK_TIMELOCK) && timo && catch)
 		error = sleepq_timedwait_sig(&lk->lock_object, pri);
 	else if ((flags & LK_TIMELOCK) && timo)
 		error = sleepq_timedwait(&lk->lock_object, pri);
 	else if (catch)
 		error = sleepq_wait_sig(&lk->lock_object, pri);
 	else
 		sleepq_wait(&lk->lock_object, pri);
 	GIANT_RESTORE();
 	if ((flags & LK_SLEEPFAIL) && error == 0)
 		error = ENOLCK;
 
 	return (error);
 }
 
-static __inline int
+static __inline void
 wakeupshlk(struct lock *lk, const char *file, int line)
 {
 	uintptr_t v, x, orig_x;
 	u_int realexslp;
-	int queue, wakeup_swapper;
+	int queue;
 
-	wakeup_swapper = 0;
 	for (;;) {
 		x = lockmgr_read_value(lk);
 		if (lockmgr_sunlock_try(lk, &x))
 			break;
 
 		/*
 		 * We should have a sharer with waiters, so enter the hard
 		 * path in order to handle wakeups correctly.
 		 */
 		sleepq_lock(&lk->lock_object);
 		orig_x = lockmgr_read_value(lk);
 retry_sleepq:
 		x = orig_x & (LK_ALL_WAITERS | LK_EXCLUSIVE_SPINNERS);
 		v = LK_UNLOCKED;
 
 		/*
 		 * If the lock has exclusive waiters, give them preference in
 		 * order to avoid deadlock with shared runners up.
 		 * If interruptible sleeps left the exclusive queue empty
 		 * avoid a starvation for the threads sleeping on the shared
 		 * queue by giving them precedence and cleaning up the
 		 * exclusive waiters bit anyway.
 		 * Please note that lk_exslpfail count may be lying about
 		 * the real number of waiters with the LK_SLEEPFAIL flag on
 		 * because they may be used in conjunction with interruptible
 		 * sleeps so lk_exslpfail might be considered an 'upper limit'
 		 * bound, including the edge cases.
 		 */
 		realexslp = sleepq_sleepcnt(&lk->lock_object,
 		    SQ_EXCLUSIVE_QUEUE);
 		if ((x & LK_EXCLUSIVE_WAITERS) != 0 && realexslp != 0) {
 			if (lk->lk_exslpfail != USHRT_MAX && lk->lk_exslpfail < realexslp) {
 				lk->lk_exslpfail = 0;
 				queue = SQ_EXCLUSIVE_QUEUE;
 				v |= (x & LK_SHARED_WAITERS);
 			} else {
 				lk->lk_exslpfail = 0;
 				LOCK_LOG2(lk,
 				    "%s: %p has only LK_SLEEPFAIL sleepers",
 				    __func__, lk);
 				LOCK_LOG2(lk,
 			    "%s: %p waking up threads on the exclusive queue",
 				    __func__, lk);
-				wakeup_swapper =
-				    sleepq_broadcast(&lk->lock_object,
-				    SLEEPQ_LK, 0, SQ_EXCLUSIVE_QUEUE);
+				sleepq_broadcast(&lk->lock_object, SLEEPQ_LK, 0,
+				    SQ_EXCLUSIVE_QUEUE);
 				queue = SQ_SHARED_QUEUE;
 			}
 		} else {
 			/*
 			 * Exclusive waiters sleeping with LK_SLEEPFAIL on
 			 * and using interruptible sleeps/timeout may have
 			 * left spourious lk_exslpfail counts on, so clean
 			 * it up anyway.
 			 */
 			lk->lk_exslpfail = 0;
 			queue = SQ_SHARED_QUEUE;
 		}
 
 		if (lockmgr_sunlock_try(lk, &orig_x)) {
 			sleepq_release(&lk->lock_object);
 			break;
 		}
 
 		x |= LK_SHARERS_LOCK(1);
 		if (!atomic_fcmpset_rel_ptr(&lk->lk_lock, &x, v)) {
 			orig_x = x;
 			goto retry_sleepq;
 		}
 		LOCK_LOG3(lk, "%s: %p waking up threads on the %s queue",
 		    __func__, lk, queue == SQ_SHARED_QUEUE ? "shared" :
 		    "exclusive");
-		wakeup_swapper |= sleepq_broadcast(&lk->lock_object, SLEEPQ_LK,
-		    0, queue);
+		sleepq_broadcast(&lk->lock_object, SLEEPQ_LK, 0, queue);
 		sleepq_release(&lk->lock_object);
 		break;
 	}
 
 	LOCKSTAT_PROFILE_RELEASE_RWLOCK(lockmgr__release, lk, LOCKSTAT_READER);
-	return (wakeup_swapper);
 }
 
 static void
 assert_lockmgr(const struct lock_object *lock, int what)
 {
 
 	panic("lockmgr locks do not support assertions");
 }
 
 static void
 lock_lockmgr(struct lock_object *lock, uintptr_t how)
 {
 
 	panic("lockmgr locks do not support sleep interlocking");
 }
 
 static uintptr_t
 unlock_lockmgr(struct lock_object *lock)
 {
 
 	panic("lockmgr locks do not support sleep interlocking");
 }
 
 #ifdef KDTRACE_HOOKS
 static int
 owner_lockmgr(const struct lock_object *lock, struct thread **owner)
 {
 
 	panic("lockmgr locks do not support owner inquiring");
 }
 #endif
 
 void
 lockinit(struct lock *lk, int pri, const char *wmesg, int timo, int flags)
 {
 	int iflags;
 
 	MPASS((flags & ~LK_INIT_MASK) == 0);
 	ASSERT_ATOMIC_LOAD_PTR(lk->lk_lock,
             ("%s: lockmgr not aligned for %s: %p", __func__, wmesg,
             &lk->lk_lock));
 
 	iflags = LO_SLEEPABLE | LO_UPGRADABLE;
 	if (flags & LK_CANRECURSE)
 		iflags |= LO_RECURSABLE;
 	if ((flags & LK_NODUP) == 0)
 		iflags |= LO_DUPOK;
 	if (flags & LK_NOPROFILE)
 		iflags |= LO_NOPROFILE;
 	if ((flags & LK_NOWITNESS) == 0)
 		iflags |= LO_WITNESS;
 	if (flags & LK_QUIET)
 		iflags |= LO_QUIET;
 	if (flags & LK_IS_VNODE)
 		iflags |= LO_IS_VNODE;
 	if (flags & LK_NEW)
 		iflags |= LO_NEW;
 	iflags |= flags & LK_NOSHARE;
 
 	lock_init(&lk->lock_object, &lock_class_lockmgr, wmesg, NULL, iflags);
 	lk->lk_lock = LK_UNLOCKED;
 	lk->lk_recurse = 0;
 	lk->lk_exslpfail = 0;
 	lk->lk_timo = timo;
 	lk->lk_pri = pri;
 	STACK_ZERO(lk);
 }
 
 /*
  * XXX: Gross hacks to manipulate external lock flags after
  * initialization.  Used for certain vnode and buf locks.
  */
 void
 lockallowshare(struct lock *lk)
 {
 
 	lockmgr_assert(lk, KA_XLOCKED);
 	lk->lock_object.lo_flags &= ~LK_NOSHARE;
 }
 
 void
 lockdisableshare(struct lock *lk)
 {
 
 	lockmgr_assert(lk, KA_XLOCKED);
 	lk->lock_object.lo_flags |= LK_NOSHARE;
 }
 
 void
 lockallowrecurse(struct lock *lk)
 {
 
 	lockmgr_assert(lk, KA_XLOCKED);
 	lk->lock_object.lo_flags |= LO_RECURSABLE;
 }
 
 void
 lockdisablerecurse(struct lock *lk)
 {
 
 	lockmgr_assert(lk, KA_XLOCKED);
 	lk->lock_object.lo_flags &= ~LO_RECURSABLE;
 }
 
 void
 lockdestroy(struct lock *lk)
 {
 
 	KASSERT(lk->lk_lock == LK_UNLOCKED, ("lockmgr still held"));
 	KASSERT(lk->lk_recurse == 0, ("lockmgr still recursed"));
 	KASSERT(lk->lk_exslpfail == 0, ("lockmgr still exclusive waiters"));
 	lock_destroy(&lk->lock_object);
 }
 
 static __always_inline bool
 lockmgr_slock_try(struct lock *lk, uintptr_t *xp, int flags, bool fp)
 {
 
 	/*
 	 * If no other thread has an exclusive lock, or
 	 * no exclusive waiter is present, bump the count of
 	 * sharers.  Since we have to preserve the state of
 	 * waiters, if we fail to acquire the shared lock
 	 * loop back and retry.
 	 */
 	while (LK_CAN_SHARE(*xp, flags, fp)) {
 		if (atomic_fcmpset_acq_ptr(&lk->lk_lock, xp,
 		    *xp + LK_ONE_SHARER)) {
 			return (true);
 		}
 	}
 	return (false);
 }
 
 static __always_inline bool
 lockmgr_sunlock_try(struct lock *lk, uintptr_t *xp)
 {
 
 	for (;;) {
 		if (LK_SHARERS(*xp) > 1 || !(*xp & LK_ALL_WAITERS)) {
 			if (atomic_fcmpset_rel_ptr(&lk->lk_lock, xp,
 			    *xp - LK_ONE_SHARER))
 				return (true);
 			continue;
 		}
 		break;
 	}
 	return (false);
 }
 
 static bool
 lockmgr_slock_adaptive(struct lock_delay_arg *lda, struct lock *lk, uintptr_t *xp,
     int flags)
 {
 	struct thread *owner;
 	uintptr_t x;
 
 	x = *xp;
 	MPASS(x != LK_UNLOCKED);
 	owner = (struct thread *)LK_HOLDER(x);
 	for (;;) {
 		MPASS(owner != curthread);
 		if (owner == (struct thread *)LK_KERNPROC)
 			return (false);
 		if ((x & LK_SHARE) && LK_SHARERS(x) > 0)
 			return (false);
 		if (owner == NULL)
 			return (false);
 		if (!TD_IS_RUNNING(owner))
 			return (false);
 		if ((x & LK_ALL_WAITERS) != 0)
 			return (false);
 		lock_delay(lda);
 		x = lockmgr_read_value(lk);
 		if (LK_CAN_SHARE(x, flags, false)) {
 			*xp = x;
 			return (true);
 		}
 		owner = (struct thread *)LK_HOLDER(x);
 	}
 }
 
 static __noinline int
 lockmgr_slock_hard(struct lock *lk, u_int flags, struct lock_object *ilk,
     const char *file, int line, struct lockmgr_wait *lwa)
 {
 	uintptr_t tid, x;
 	int error = 0;
 	const char *iwmesg;
 	int ipri, itimo;
 
 #ifdef KDTRACE_HOOKS
 	uint64_t sleep_time = 0;
 #endif
 #ifdef LOCK_PROFILING
 	uint64_t waittime = 0;
 	int contested = 0;
 #endif
 	struct lock_delay_arg lda;
 
 	if (SCHEDULER_STOPPED())
 		goto out;
 
 	tid = (uintptr_t)curthread;
 
 	if (LK_CAN_WITNESS(flags))
 		WITNESS_CHECKORDER(&lk->lock_object, LOP_NEWORDER,
 		    file, line, flags & LK_INTERLOCK ? ilk : NULL);
 	x = lockmgr_read_value(lk);
 	lock_delay_arg_init(&lda, &lockmgr_delay);
 	if (!lk_adaptive)
 		flags &= ~LK_ADAPTIVE;
 	/*
 	 * The lock may already be locked exclusive by curthread,
 	 * avoid deadlock.
 	 */
 	if (LK_HOLDER(x) == tid) {
 		LOCK_LOG2(lk,
 		    "%s: %p already held in exclusive mode",
 		    __func__, lk);
 		error = EDEADLK;
 		goto out;
 	}
 
 	for (;;) {
 		if (lockmgr_slock_try(lk, &x, flags, false))
 			break;
 
 		lock_profile_obtain_lock_failed(&lk->lock_object, false,
 		    &contested, &waittime);
 
 		if ((flags & (LK_ADAPTIVE | LK_INTERLOCK)) == LK_ADAPTIVE) {
 			if (lockmgr_slock_adaptive(&lda, lk, &x, flags))
 				continue;
 		}
 
 #ifdef HWPMC_HOOKS
 		PMC_SOFT_CALL( , , lock, failed);
 #endif
 
 		/*
 		 * If the lock is expected to not sleep just give up
 		 * and return.
 		 */
 		if (LK_TRYOP(flags)) {
 			LOCK_LOG2(lk, "%s: %p fails the try operation",
 			    __func__, lk);
 			error = EBUSY;
 			break;
 		}
 
 		/*
 		 * Acquire the sleepqueue chain lock because we
 		 * probabilly will need to manipulate waiters flags.
 		 */
 		sleepq_lock(&lk->lock_object);
 		x = lockmgr_read_value(lk);
 retry_sleepq:
 
 		/*
 		 * if the lock can be acquired in shared mode, try
 		 * again.
 		 */
 		if (LK_CAN_SHARE(x, flags, false)) {
 			sleepq_release(&lk->lock_object);
 			continue;
 		}
 
 		/*
 		 * Try to set the LK_SHARED_WAITERS flag.  If we fail,
 		 * loop back and retry.
 		 */
 		if ((x & LK_SHARED_WAITERS) == 0) {
 			if (!atomic_fcmpset_acq_ptr(&lk->lk_lock, &x,
 			    x | LK_SHARED_WAITERS)) {
 				goto retry_sleepq;
 			}
 			LOCK_LOG2(lk, "%s: %p set shared waiters flag",
 			    __func__, lk);
 		}
 
 		if (lwa == NULL) {
 			iwmesg = lk->lock_object.lo_name;
 			ipri = lk->lk_pri;
 			itimo = lk->lk_timo;
 		} else {
 			iwmesg = lwa->iwmesg;
 			ipri = lwa->ipri;
 			itimo = lwa->itimo;
 		}
 
 		/*
 		 * As far as we have been unable to acquire the
 		 * shared lock and the shared waiters flag is set,
 		 * we will sleep.
 		 */
 #ifdef KDTRACE_HOOKS
 		sleep_time -= lockstat_nsecs(&lk->lock_object);
 #endif
 		error = sleeplk(lk, flags, ilk, iwmesg, ipri, itimo,
 		    SQ_SHARED_QUEUE);
 #ifdef KDTRACE_HOOKS
 		sleep_time += lockstat_nsecs(&lk->lock_object);
 #endif
 		flags &= ~LK_INTERLOCK;
 		if (error) {
 			LOCK_LOG3(lk,
 			    "%s: interrupted sleep for %p with %d",
 			    __func__, lk, error);
 			break;
 		}
 		LOCK_LOG2(lk, "%s: %p resuming from the sleep queue",
 		    __func__, lk);
 		x = lockmgr_read_value(lk);
 	}
 	if (error == 0) {
 #ifdef KDTRACE_HOOKS
 		if (sleep_time != 0)
 			LOCKSTAT_RECORD4(lockmgr__block, lk, sleep_time,
 			    LOCKSTAT_READER, (x & LK_SHARE) == 0,
 			    (x & LK_SHARE) == 0 ? 0 : LK_SHARERS(x));
 #endif
 #ifdef LOCK_PROFILING
 		lockmgr_note_shared_acquire(lk, contested, waittime,
 		    file, line, flags);
 #else
 		lockmgr_note_shared_acquire(lk, 0, 0, file, line,
 		    flags);
 #endif
 	}
 
 out:
-	lockmgr_exit(flags, ilk, 0);
+	lockmgr_exit(flags, ilk);
 	return (error);
 }
 
 static bool
 lockmgr_xlock_adaptive(struct lock_delay_arg *lda, struct lock *lk, uintptr_t *xp)
 {
 	struct thread *owner;
 	uintptr_t x;
 
 	x = *xp;
 	MPASS(x != LK_UNLOCKED);
 	owner = (struct thread *)LK_HOLDER(x);
 	for (;;) {
 		MPASS(owner != curthread);
 		if (owner == NULL)
 			return (false);
 		if ((x & LK_SHARE) && LK_SHARERS(x) > 0)
 			return (false);
 		if (owner == (struct thread *)LK_KERNPROC)
 			return (false);
 		if (!TD_IS_RUNNING(owner))
 			return (false);
 		if ((x & LK_ALL_WAITERS) != 0)
 			return (false);
 		lock_delay(lda);
 		x = lockmgr_read_value(lk);
 		if (x == LK_UNLOCKED) {
 			*xp = x;
 			return (true);
 		}
 		owner = (struct thread *)LK_HOLDER(x);
 	}
 }
 
 static __noinline int
 lockmgr_xlock_hard(struct lock *lk, u_int flags, struct lock_object *ilk,
     const char *file, int line, struct lockmgr_wait *lwa)
 {
 	struct lock_class *class;
 	uintptr_t tid, x, v;
 	int error = 0;
 	const char *iwmesg;
 	int ipri, itimo;
 
 #ifdef KDTRACE_HOOKS
 	uint64_t sleep_time = 0;
 #endif
 #ifdef LOCK_PROFILING
 	uint64_t waittime = 0;
 	int contested = 0;
 #endif
 	struct lock_delay_arg lda;
 
 	if (SCHEDULER_STOPPED())
 		goto out;
 
 	tid = (uintptr_t)curthread;
 
 	if (LK_CAN_WITNESS(flags))
 		WITNESS_CHECKORDER(&lk->lock_object, LOP_NEWORDER |
 		    LOP_EXCLUSIVE, file, line, flags & LK_INTERLOCK ?
 		    ilk : NULL);
 
 	/*
 	 * If curthread already holds the lock and this one is
 	 * allowed to recurse, simply recurse on it.
 	 */
 	if (lockmgr_xlocked(lk)) {
 		if ((flags & LK_CANRECURSE) == 0 &&
 		    (lk->lock_object.lo_flags & LO_RECURSABLE) == 0) {
 			/*
 			 * If the lock is expected to not panic just
 			 * give up and return.
 			 */
 			if (LK_TRYOP(flags)) {
 				LOCK_LOG2(lk,
 				    "%s: %p fails the try operation",
 				    __func__, lk);
 				error = EBUSY;
 				goto out;
 			}
 			if (flags & LK_INTERLOCK) {
 				class = LOCK_CLASS(ilk);
 				class->lc_unlock(ilk);
 			}
 			STACK_PRINT(lk);
 			panic("%s: recursing on non recursive lockmgr %p "
 			    "@ %s:%d\n", __func__, lk, file, line);
 		}
 		atomic_set_ptr(&lk->lk_lock, LK_WRITER_RECURSED);
 		lk->lk_recurse++;
 		LOCK_LOG2(lk, "%s: %p recursing", __func__, lk);
 		LOCK_LOG_LOCK("XLOCK", &lk->lock_object, 0,
 		    lk->lk_recurse, file, line);
 		WITNESS_LOCK(&lk->lock_object, LOP_EXCLUSIVE |
 		    LK_TRYWIT(flags), file, line);
 		TD_LOCKS_INC(curthread);
 		goto out;
 	}
 
 	x = LK_UNLOCKED;
 	lock_delay_arg_init(&lda, &lockmgr_delay);
 	if (!lk_adaptive)
 		flags &= ~LK_ADAPTIVE;
 	for (;;) {
 		if (x == LK_UNLOCKED) {
 			if (atomic_fcmpset_acq_ptr(&lk->lk_lock, &x, tid))
 				break;
 			continue;
 		}
 
 		lock_profile_obtain_lock_failed(&lk->lock_object, false,
 		    &contested, &waittime);
 
 		if ((flags & (LK_ADAPTIVE | LK_INTERLOCK)) == LK_ADAPTIVE) {
 			if (lockmgr_xlock_adaptive(&lda, lk, &x))
 				continue;
 		}
 #ifdef HWPMC_HOOKS
 		PMC_SOFT_CALL( , , lock, failed);
 #endif
 
 		/*
 		 * If the lock is expected to not sleep just give up
 		 * and return.
 		 */
 		if (LK_TRYOP(flags)) {
 			LOCK_LOG2(lk, "%s: %p fails the try operation",
 			    __func__, lk);
 			error = EBUSY;
 			break;
 		}
 
 		/*
 		 * Acquire the sleepqueue chain lock because we
 		 * probabilly will need to manipulate waiters flags.
 		 */
 		sleepq_lock(&lk->lock_object);
 		x = lockmgr_read_value(lk);
 retry_sleepq:
 
 		/*
 		 * if the lock has been released while we spun on
 		 * the sleepqueue chain lock just try again.
 		 */
 		if (x == LK_UNLOCKED) {
 			sleepq_release(&lk->lock_object);
 			continue;
 		}
 
 		/*
 		 * The lock can be in the state where there is a
 		 * pending queue of waiters, but still no owner.
 		 * This happens when the lock is contested and an
 		 * owner is going to claim the lock.
 		 * If curthread is the one successfully acquiring it
 		 * claim lock ownership and return, preserving waiters
 		 * flags.
 		 */
 		v = x & (LK_ALL_WAITERS | LK_EXCLUSIVE_SPINNERS);
 		if ((x & ~v) == LK_UNLOCKED) {
 			v &= ~LK_EXCLUSIVE_SPINNERS;
 			if (atomic_fcmpset_acq_ptr(&lk->lk_lock, &x,
 			    tid | v)) {
 				sleepq_release(&lk->lock_object);
 				LOCK_LOG2(lk,
 				    "%s: %p claimed by a new writer",
 				    __func__, lk);
 				break;
 			}
 			goto retry_sleepq;
 		}
 
 		/*
 		 * Try to set the LK_EXCLUSIVE_WAITERS flag.  If we
 		 * fail, loop back and retry.
 		 */
 		if ((x & LK_EXCLUSIVE_WAITERS) == 0) {
 			if (!atomic_fcmpset_ptr(&lk->lk_lock, &x,
 			    x | LK_EXCLUSIVE_WAITERS)) {
 				goto retry_sleepq;
 			}
 			LOCK_LOG2(lk, "%s: %p set excl waiters flag",
 			    __func__, lk);
 		}
 
 		if (lwa == NULL) {
 			iwmesg = lk->lock_object.lo_name;
 			ipri = lk->lk_pri;
 			itimo = lk->lk_timo;
 		} else {
 			iwmesg = lwa->iwmesg;
 			ipri = lwa->ipri;
 			itimo = lwa->itimo;
 		}
 
 		/*
 		 * As far as we have been unable to acquire the
 		 * exclusive lock and the exclusive waiters flag
 		 * is set, we will sleep.
 		 */
 #ifdef KDTRACE_HOOKS
 		sleep_time -= lockstat_nsecs(&lk->lock_object);
 #endif
 		error = sleeplk(lk, flags, ilk, iwmesg, ipri, itimo,
 		    SQ_EXCLUSIVE_QUEUE);
 #ifdef KDTRACE_HOOKS
 		sleep_time += lockstat_nsecs(&lk->lock_object);
 #endif
 		flags &= ~LK_INTERLOCK;
 		if (error) {
 			LOCK_LOG3(lk,
 			    "%s: interrupted sleep for %p with %d",
 			    __func__, lk, error);
 			break;
 		}
 		LOCK_LOG2(lk, "%s: %p resuming from the sleep queue",
 		    __func__, lk);
 		x = lockmgr_read_value(lk);
 	}
 	if (error == 0) {
 #ifdef KDTRACE_HOOKS
 		if (sleep_time != 0)
 			LOCKSTAT_RECORD4(lockmgr__block, lk, sleep_time,
 			    LOCKSTAT_WRITER, (x & LK_SHARE) == 0,
 			    (x & LK_SHARE) == 0 ? 0 : LK_SHARERS(x));
 #endif
 #ifdef LOCK_PROFILING
 		lockmgr_note_exclusive_acquire(lk, contested, waittime,
 		    file, line, flags);
 #else
 		lockmgr_note_exclusive_acquire(lk, 0, 0, file, line,
 		    flags);
 #endif
 	}
 
 out:
-	lockmgr_exit(flags, ilk, 0);
+	lockmgr_exit(flags, ilk);
 	return (error);
 }
 
 static __noinline int
 lockmgr_upgrade(struct lock *lk, u_int flags, struct lock_object *ilk,
     const char *file, int line, struct lockmgr_wait *lwa)
 {
 	uintptr_t tid, v, setv;
 	int error = 0;
 	int op;
 
 	if (SCHEDULER_STOPPED())
 		goto out;
 
 	tid = (uintptr_t)curthread;
 
 	_lockmgr_assert(lk, KA_SLOCKED, file, line);
 
 	op = flags & LK_TYPE_MASK;
 	v = lockmgr_read_value(lk);
 	for (;;) {
 		if (LK_SHARERS(v) > 1) {
 			if (op == LK_TRYUPGRADE) {
 				LOCK_LOG2(lk, "%s: %p failed the nowait upgrade",
 				    __func__, lk);
 				error = EBUSY;
 				goto out;
 			}
 			if (atomic_fcmpset_rel_ptr(&lk->lk_lock, &v,
 			    v - LK_ONE_SHARER)) {
 				lockmgr_note_shared_release(lk, file, line);
 				goto out_xlock;
 			}
 			continue;
 		}
 		MPASS((v & ~LK_ALL_WAITERS) == LK_SHARERS_LOCK(1));
 
 		setv = tid;
 		setv |= (v & LK_ALL_WAITERS);
 
 		/*
 		 * Try to switch from one shared lock to an exclusive one.
 		 * We need to preserve waiters flags during the operation.
 		 */
 		if (atomic_fcmpset_ptr(&lk->lk_lock, &v, setv)) {
 			LOCK_LOG_LOCK("XUPGRADE", &lk->lock_object, 0, 0, file,
 			    line);
 			WITNESS_UPGRADE(&lk->lock_object, LOP_EXCLUSIVE |
 			    LK_TRYWIT(flags), file, line);
 			LOCKSTAT_RECORD0(lockmgr__upgrade, lk);
 			TD_SLOCKS_DEC(curthread);
 			goto out;
 		}
 	}
 
 out_xlock:
 	error = lockmgr_xlock_hard(lk, flags, ilk, file, line, lwa);
 	flags &= ~LK_INTERLOCK;
 out:
-	lockmgr_exit(flags, ilk, 0);
+	lockmgr_exit(flags, ilk);
 	return (error);
 }
 
 int
 lockmgr_lock_flags(struct lock *lk, u_int flags, struct lock_object *ilk,
     const char *file, int line)
 {
 	struct lock_class *class;
 	uintptr_t x, tid;
 	u_int op;
 	bool locked;
 
 	if (SCHEDULER_STOPPED())
 		return (0);
 
 	op = flags & LK_TYPE_MASK;
 	locked = false;
 	switch (op) {
 	case LK_SHARED:
 		if (LK_CAN_WITNESS(flags))
 			WITNESS_CHECKORDER(&lk->lock_object, LOP_NEWORDER,
 			    file, line, flags & LK_INTERLOCK ? ilk : NULL);
 		if (__predict_false(lk->lock_object.lo_flags & LK_NOSHARE))
 			break;
 		x = lockmgr_read_value(lk);
 		if (lockmgr_slock_try(lk, &x, flags, true)) {
 			lockmgr_note_shared_acquire(lk, 0, 0,
 			    file, line, flags);
 			locked = true;
 		} else {
 			return (lockmgr_slock_hard(lk, flags, ilk, file, line,
 			    NULL));
 		}
 		break;
 	case LK_EXCLUSIVE:
 		if (LK_CAN_WITNESS(flags))
 			WITNESS_CHECKORDER(&lk->lock_object, LOP_NEWORDER |
 			    LOP_EXCLUSIVE, file, line, flags & LK_INTERLOCK ?
 			    ilk : NULL);
 		tid = (uintptr_t)curthread;
 		if (lockmgr_read_value(lk) == LK_UNLOCKED &&
 		    atomic_cmpset_acq_ptr(&lk->lk_lock, LK_UNLOCKED, tid)) {
 			lockmgr_note_exclusive_acquire(lk, 0, 0, file, line,
 			    flags);
 			locked = true;
 		} else {
 			return (lockmgr_xlock_hard(lk, flags, ilk, file, line,
 			    NULL));
 		}
 		break;
 	case LK_UPGRADE:
 	case LK_TRYUPGRADE:
 		return (lockmgr_upgrade(lk, flags, ilk, file, line, NULL));
 	default:
 		break;
 	}
 	if (__predict_true(locked)) {
 		if (__predict_false(flags & LK_INTERLOCK)) {
 			class = LOCK_CLASS(ilk);
 			class->lc_unlock(ilk);
 		}
 		return (0);
 	} else {
 		return (__lockmgr_args(lk, flags, ilk, LK_WMESG_DEFAULT,
 		    LK_PRIO_DEFAULT, LK_TIMO_DEFAULT, file, line));
 	}
 }
 
 static __noinline int
 lockmgr_sunlock_hard(struct lock *lk, uintptr_t x, u_int flags, struct lock_object *ilk,
     const char *file, int line)
-
 {
-	int wakeup_swapper = 0;
-
-	if (SCHEDULER_STOPPED())
-		goto out;
-
-	wakeup_swapper = wakeupshlk(lk, file, line);
-
-out:
-	lockmgr_exit(flags, ilk, wakeup_swapper);
+	if (!SCHEDULER_STOPPED())
+		wakeupshlk(lk, file, line);
+	lockmgr_exit(flags, ilk);
 	return (0);
 }
 
 static __noinline int
 lockmgr_xunlock_hard(struct lock *lk, uintptr_t x, u_int flags, struct lock_object *ilk,
     const char *file, int line)
 {
 	uintptr_t tid, v;
-	int wakeup_swapper = 0;
 	u_int realexslp;
 	int queue;
 
 	if (SCHEDULER_STOPPED())
 		goto out;
 
 	tid = (uintptr_t)curthread;
 
 	/*
 	 * As first option, treact the lock as if it has not
 	 * any waiter.
 	 * Fix-up the tid var if the lock has been disowned.
 	 */
 	if (lockmgr_disowned_v(x))
 		tid = LK_KERNPROC;
 
 	/*
 	 * The lock is held in exclusive mode.
 	 * If the lock is recursed also, then unrecurse it.
 	 */
 	if (lockmgr_recursed_v(x)) {
 		LOCK_LOG2(lk, "%s: %p unrecursing", __func__, lk);
 		lk->lk_recurse--;
 		if (lk->lk_recurse == 0)
 			atomic_clear_ptr(&lk->lk_lock, LK_WRITER_RECURSED);
 		goto out;
 	}
 	if (tid != LK_KERNPROC)
 		LOCKSTAT_PROFILE_RELEASE_RWLOCK(lockmgr__release, lk,
 		    LOCKSTAT_WRITER);
 
 	if (x == tid && atomic_cmpset_rel_ptr(&lk->lk_lock, tid, LK_UNLOCKED))
 		goto out;
 
 	sleepq_lock(&lk->lock_object);
 	x = lockmgr_read_value(lk);
 	v = LK_UNLOCKED;
 
 	/*
 	 * If the lock has exclusive waiters, give them
 	 * preference in order to avoid deadlock with
 	 * shared runners up.
 	 * If interruptible sleeps left the exclusive queue
 	 * empty avoid a starvation for the threads sleeping
 	 * on the shared queue by giving them precedence
 	 * and cleaning up the exclusive waiters bit anyway.
 	 * Please note that lk_exslpfail count may be lying
 	 * about the real number of waiters with the
 	 * LK_SLEEPFAIL flag on because they may be used in
 	 * conjunction with interruptible sleeps so
 	 * lk_exslpfail might be considered an 'upper limit'
 	 * bound, including the edge cases.
 	 */
 	MPASS((x & LK_EXCLUSIVE_SPINNERS) == 0);
 	realexslp = sleepq_sleepcnt(&lk->lock_object, SQ_EXCLUSIVE_QUEUE);
 	if ((x & LK_EXCLUSIVE_WAITERS) != 0 && realexslp != 0) {
 		if (lk->lk_exslpfail != USHRT_MAX && lk->lk_exslpfail < realexslp) {
 			lk->lk_exslpfail = 0;
 			queue = SQ_EXCLUSIVE_QUEUE;
 			v |= (x & LK_SHARED_WAITERS);
 		} else {
 			lk->lk_exslpfail = 0;
 			LOCK_LOG2(lk,
 			    "%s: %p has only LK_SLEEPFAIL sleepers",
 			    __func__, lk);
 			LOCK_LOG2(lk,
 			    "%s: %p waking up threads on the exclusive queue",
 			    __func__, lk);
-			wakeup_swapper = sleepq_broadcast(&lk->lock_object,
-			    SLEEPQ_LK, 0, SQ_EXCLUSIVE_QUEUE);
+			sleepq_broadcast(&lk->lock_object, SLEEPQ_LK, 0,
+			    SQ_EXCLUSIVE_QUEUE);
 			queue = SQ_SHARED_QUEUE;
 		}
 	} else {
 		/*
 		 * Exclusive waiters sleeping with LK_SLEEPFAIL
 		 * on and using interruptible sleeps/timeout
 		 * may have left spourious lk_exslpfail counts
 		 * on, so clean it up anyway.
 		 */
 		lk->lk_exslpfail = 0;
 		queue = SQ_SHARED_QUEUE;
 	}
 
 	LOCK_LOG3(lk, "%s: %p waking up threads on the %s queue",
 	    __func__, lk, queue == SQ_SHARED_QUEUE ? "shared" :
 	    "exclusive");
 	atomic_store_rel_ptr(&lk->lk_lock, v);
-	wakeup_swapper |= sleepq_broadcast(&lk->lock_object, SLEEPQ_LK, 0, queue);
+	sleepq_broadcast(&lk->lock_object, SLEEPQ_LK, 0, queue);
 	sleepq_release(&lk->lock_object);
 
 out:
-	lockmgr_exit(flags, ilk, wakeup_swapper);
+	lockmgr_exit(flags, ilk);
 	return (0);
 }
 
 /*
  * Lightweight entry points for common operations.
  *
  * Functionality is similar to sx locks, in that none of the additional lockmgr
  * features are supported. To be clear, these are NOT supported:
  * 1. shared locking disablement
  * 2. returning with an error after sleep
  * 3. unlocking the interlock
  *
  * If in doubt, use lockmgr_lock_flags.
  */
 int
 lockmgr_slock(struct lock *lk, u_int flags, const char *file, int line)
 {
 	uintptr_t x;
 
 	MPASS((flags & LK_TYPE_MASK) == LK_SHARED);
 	MPASS((flags & LK_INTERLOCK) == 0);
 	MPASS((lk->lock_object.lo_flags & LK_NOSHARE) == 0);
 
 	if (LK_CAN_WITNESS(flags))
 		WITNESS_CHECKORDER(&lk->lock_object, LOP_NEWORDER,
 		    file, line, NULL);
 	x = lockmgr_read_value(lk);
 	if (__predict_true(lockmgr_slock_try(lk, &x, flags, true))) {
 		lockmgr_note_shared_acquire(lk, 0, 0, file, line, flags);
 		return (0);
 	}
 
 	return (lockmgr_slock_hard(lk, flags | LK_ADAPTIVE, NULL, file, line, NULL));
 }
 
 int
 lockmgr_xlock(struct lock *lk, u_int flags, const char *file, int line)
 {
 	uintptr_t tid;
 
 	MPASS((flags & LK_TYPE_MASK) == LK_EXCLUSIVE);
 	MPASS((flags & LK_INTERLOCK) == 0);
 
 	if (LK_CAN_WITNESS(flags))
 		WITNESS_CHECKORDER(&lk->lock_object, LOP_NEWORDER |
 		    LOP_EXCLUSIVE, file, line, NULL);
 	tid = (uintptr_t)curthread;
 	if (atomic_cmpset_acq_ptr(&lk->lk_lock, LK_UNLOCKED, tid)) {
 		lockmgr_note_exclusive_acquire(lk, 0, 0, file, line,
 		    flags);
 		return (0);
 	}
 
 	return (lockmgr_xlock_hard(lk, flags | LK_ADAPTIVE, NULL, file, line, NULL));
 }
 
 int
 lockmgr_unlock(struct lock *lk)
 {
 	uintptr_t x, tid;
 	const char *file;
 	int line;
 
 	file = __FILE__;
 	line = __LINE__;
 
 	_lockmgr_assert(lk, KA_LOCKED, file, line);
 	x = lockmgr_read_value(lk);
 	if (__predict_true(x & LK_SHARE) != 0) {
 		lockmgr_note_shared_release(lk, file, line);
 		if (lockmgr_sunlock_try(lk, &x)) {
 			LOCKSTAT_PROFILE_RELEASE_RWLOCK(lockmgr__release, lk, LOCKSTAT_READER);
 		} else {
 			return (lockmgr_sunlock_hard(lk, x, LK_RELEASE, NULL, file, line));
 		}
 	} else {
 		tid = (uintptr_t)curthread;
 		lockmgr_note_exclusive_release(lk, file, line);
 		if (x == tid && atomic_cmpset_rel_ptr(&lk->lk_lock, tid, LK_UNLOCKED)) {
 			LOCKSTAT_PROFILE_RELEASE_RWLOCK(lockmgr__release, lk,LOCKSTAT_WRITER);
 		} else {
 			return (lockmgr_xunlock_hard(lk, x, LK_RELEASE, NULL, file, line));
 		}
 	}
 	return (0);
 }
 
 int
 __lockmgr_args(struct lock *lk, u_int flags, struct lock_object *ilk,
     const char *wmesg, int pri, int timo, const char *file, int line)
 {
 	GIANT_DECLARE;
 	struct lockmgr_wait lwa;
 	struct lock_class *class;
 	const char *iwmesg;
 	uintptr_t tid, v, x;
 	u_int op, realexslp;
-	int error, ipri, itimo, queue, wakeup_swapper;
+	int error, ipri, itimo, queue;
 #ifdef LOCK_PROFILING
 	uint64_t waittime = 0;
 	int contested = 0;
 #endif
 
 	if (SCHEDULER_STOPPED())
 		return (0);
 
 	error = 0;
 	tid = (uintptr_t)curthread;
 	op = (flags & LK_TYPE_MASK);
 	iwmesg = (wmesg == LK_WMESG_DEFAULT) ? lk->lock_object.lo_name : wmesg;
 	ipri = (pri == LK_PRIO_DEFAULT) ? lk->lk_pri : pri;
 	itimo = (timo == LK_TIMO_DEFAULT) ? lk->lk_timo : timo;
 
 	lwa.iwmesg = iwmesg;
 	lwa.ipri = ipri;
 	lwa.itimo = itimo;
 
 	MPASS((flags & ~LK_TOTAL_MASK) == 0);
 	KASSERT((op & (op - 1)) == 0,
 	    ("%s: Invalid requested operation @ %s:%d", __func__, file, line));
 	KASSERT((flags & (LK_NOWAIT | LK_SLEEPFAIL)) == 0 ||
 	    (op != LK_DOWNGRADE && op != LK_RELEASE),
 	    ("%s: Invalid flags in regard of the operation desired @ %s:%d",
 	    __func__, file, line));
 	KASSERT((flags & LK_INTERLOCK) == 0 || ilk != NULL,
 	    ("%s: LK_INTERLOCK passed without valid interlock @ %s:%d",
 	    __func__, file, line));
 	KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread),
 	    ("%s: idle thread %p on lockmgr %s @ %s:%d", __func__, curthread,
 	    lk->lock_object.lo_name, file, line));
 
 	class = (flags & LK_INTERLOCK) ? LOCK_CLASS(ilk) : NULL;
 
 	if (lk->lock_object.lo_flags & LK_NOSHARE) {
 		switch (op) {
 		case LK_SHARED:
 			op = LK_EXCLUSIVE;
 			break;
 		case LK_UPGRADE:
 		case LK_TRYUPGRADE:
 		case LK_DOWNGRADE:
 			_lockmgr_assert(lk, KA_XLOCKED | KA_NOTRECURSED,
 			    file, line);
 			if (flags & LK_INTERLOCK)
 				class->lc_unlock(ilk);
 			return (0);
 		}
 	}
 
-	wakeup_swapper = 0;
 	switch (op) {
 	case LK_SHARED:
 		return (lockmgr_slock_hard(lk, flags, ilk, file, line, &lwa));
 		break;
 	case LK_UPGRADE:
 	case LK_TRYUPGRADE:
 		return (lockmgr_upgrade(lk, flags, ilk, file, line, &lwa));
 		break;
 	case LK_EXCLUSIVE:
 		return (lockmgr_xlock_hard(lk, flags, ilk, file, line, &lwa));
 		break;
 	case LK_DOWNGRADE:
 		_lockmgr_assert(lk, KA_XLOCKED, file, line);
 		WITNESS_DOWNGRADE(&lk->lock_object, 0, file, line);
 
 		/*
 		 * Panic if the lock is recursed.
 		 */
 		if (lockmgr_xlocked(lk) && lockmgr_recursed(lk)) {
 			if (flags & LK_INTERLOCK)
 				class->lc_unlock(ilk);
 			panic("%s: downgrade a recursed lockmgr %s @ %s:%d\n",
 			    __func__, iwmesg, file, line);
 		}
 		TD_SLOCKS_INC(curthread);
 
 		/*
 		 * In order to preserve waiters flags, just spin.
 		 */
 		for (;;) {
 			x = lockmgr_read_value(lk);
 			MPASS((x & LK_EXCLUSIVE_SPINNERS) == 0);
 			x &= LK_ALL_WAITERS;
 			if (atomic_cmpset_rel_ptr(&lk->lk_lock, tid | x,
 			    LK_SHARERS_LOCK(1) | x))
 				break;
 			cpu_spinwait();
 		}
 		LOCK_LOG_LOCK("XDOWNGRADE", &lk->lock_object, 0, 0, file, line);
 		LOCKSTAT_RECORD0(lockmgr__downgrade, lk);
 		break;
 	case LK_RELEASE:
 		_lockmgr_assert(lk, KA_LOCKED, file, line);
 		x = lockmgr_read_value(lk);
 
 		if (__predict_true(x & LK_SHARE) != 0) {
 			lockmgr_note_shared_release(lk, file, line);
 			return (lockmgr_sunlock_hard(lk, x, flags, ilk, file, line));
 		} else {
 			lockmgr_note_exclusive_release(lk, file, line);
 			return (lockmgr_xunlock_hard(lk, x, flags, ilk, file, line));
 		}
 		break;
 	case LK_DRAIN:
 		if (LK_CAN_WITNESS(flags))
 			WITNESS_CHECKORDER(&lk->lock_object, LOP_NEWORDER |
 			    LOP_EXCLUSIVE, file, line, flags & LK_INTERLOCK ?
 			    ilk : NULL);
 
 		/*
 		 * Trying to drain a lock we already own will result in a
 		 * deadlock.
 		 */
 		if (lockmgr_xlocked(lk)) {
 			if (flags & LK_INTERLOCK)
 				class->lc_unlock(ilk);
 			panic("%s: draining %s with the lock held @ %s:%d\n",
 			    __func__, iwmesg, file, line);
 		}
 
 		for (;;) {
 			if (lk->lk_lock == LK_UNLOCKED &&
 			    atomic_cmpset_acq_ptr(&lk->lk_lock, LK_UNLOCKED, tid))
 				break;
 
 #ifdef HWPMC_HOOKS
 			PMC_SOFT_CALL( , , lock, failed);
 #endif
 			lock_profile_obtain_lock_failed(&lk->lock_object, false,
 			    &contested, &waittime);
 
 			/*
 			 * If the lock is expected to not sleep just give up
 			 * and return.
 			 */
 			if (LK_TRYOP(flags)) {
 				LOCK_LOG2(lk, "%s: %p fails the try operation",
 				    __func__, lk);
 				error = EBUSY;
 				break;
 			}
 
 			/*
 			 * Acquire the sleepqueue chain lock because we
 			 * probabilly will need to manipulate waiters flags.
 			 */
 			sleepq_lock(&lk->lock_object);
 			x = lockmgr_read_value(lk);
 
 			/*
 			 * if the lock has been released while we spun on
 			 * the sleepqueue chain lock just try again.
 			 */
 			if (x == LK_UNLOCKED) {
 				sleepq_release(&lk->lock_object);
 				continue;
 			}
 
 			v = x & (LK_ALL_WAITERS | LK_EXCLUSIVE_SPINNERS);
 			if ((x & ~v) == LK_UNLOCKED) {
 				v = (x & ~LK_EXCLUSIVE_SPINNERS);
 
 				/*
 				 * If interruptible sleeps left the exclusive
 				 * queue empty avoid a starvation for the
 				 * threads sleeping on the shared queue by
 				 * giving them precedence and cleaning up the
 				 * exclusive waiters bit anyway.
 				 * Please note that lk_exslpfail count may be
 				 * lying about the real number of waiters with
 				 * the LK_SLEEPFAIL flag on because they may
 				 * be used in conjunction with interruptible
 				 * sleeps so lk_exslpfail might be considered
 				 * an 'upper limit' bound, including the edge
 				 * cases.
 				 */
 				if (v & LK_EXCLUSIVE_WAITERS) {
 					queue = SQ_EXCLUSIVE_QUEUE;
 					v &= ~LK_EXCLUSIVE_WAITERS;
 				} else {
 					/*
 					 * Exclusive waiters sleeping with
 					 * LK_SLEEPFAIL on and using
 					 * interruptible sleeps/timeout may
 					 * have left spourious lk_exslpfail
 					 * counts on, so clean it up anyway.
 					 */
 					MPASS(v & LK_SHARED_WAITERS);
 					lk->lk_exslpfail = 0;
 					queue = SQ_SHARED_QUEUE;
 					v &= ~LK_SHARED_WAITERS;
 				}
 				if (queue == SQ_EXCLUSIVE_QUEUE) {
 					realexslp =
 					    sleepq_sleepcnt(&lk->lock_object,
 					    SQ_EXCLUSIVE_QUEUE);
 					if (lk->lk_exslpfail >= realexslp) {
 						lk->lk_exslpfail = 0;
 						queue = SQ_SHARED_QUEUE;
 						v &= ~LK_SHARED_WAITERS;
 						if (realexslp != 0) {
 							LOCK_LOG2(lk,
 					"%s: %p has only LK_SLEEPFAIL sleepers",
 							    __func__, lk);
 							LOCK_LOG2(lk,
 			"%s: %p waking up threads on the exclusive queue",
 							    __func__, lk);
-							wakeup_swapper =
-							    sleepq_broadcast(
+							sleepq_broadcast(
 							    &lk->lock_object,
 							    SLEEPQ_LK, 0,
 							    SQ_EXCLUSIVE_QUEUE);
 						}
 					} else
 						lk->lk_exslpfail = 0;
 				}
 				if (!atomic_cmpset_ptr(&lk->lk_lock, x, v)) {
 					sleepq_release(&lk->lock_object);
 					continue;
 				}
 				LOCK_LOG3(lk,
 				"%s: %p waking up all threads on the %s queue",
 				    __func__, lk, queue == SQ_SHARED_QUEUE ?
 				    "shared" : "exclusive");
-				wakeup_swapper |= sleepq_broadcast(
-				    &lk->lock_object, SLEEPQ_LK, 0, queue);
+				sleepq_broadcast(&lk->lock_object, SLEEPQ_LK, 0,
+				    queue);
 
 				/*
 				 * If shared waiters have been woken up we need
 				 * to wait for one of them to acquire the lock
 				 * before to set the exclusive waiters in
 				 * order to avoid a deadlock.
 				 */
 				if (queue == SQ_SHARED_QUEUE) {
 					for (v = lk->lk_lock;
 					    (v & LK_SHARE) && !LK_SHARERS(v);
 					    v = lk->lk_lock)
 						cpu_spinwait();
 				}
 			}
 
 			/*
 			 * Try to set the LK_EXCLUSIVE_WAITERS flag.  If we
 			 * fail, loop back and retry.
 			 */
 			if ((x & LK_EXCLUSIVE_WAITERS) == 0) {
 				if (!atomic_cmpset_ptr(&lk->lk_lock, x,
 				    x | LK_EXCLUSIVE_WAITERS)) {
 					sleepq_release(&lk->lock_object);
 					continue;
 				}
 				LOCK_LOG2(lk, "%s: %p set drain waiters flag",
 				    __func__, lk);
 			}
 
 			/*
 			 * As far as we have been unable to acquire the
 			 * exclusive lock and the exclusive waiters flag
 			 * is set, we will sleep.
 			 */
 			if (flags & LK_INTERLOCK) {
 				class->lc_unlock(ilk);
 				flags &= ~LK_INTERLOCK;
 			}
 			GIANT_SAVE();
 			sleepq_add(&lk->lock_object, NULL, iwmesg, SLEEPQ_LK,
 			    SQ_EXCLUSIVE_QUEUE);
 			sleepq_wait(&lk->lock_object, ipri & PRIMASK);
 			GIANT_RESTORE();
 			LOCK_LOG2(lk, "%s: %p resuming from the sleep queue",
 			    __func__, lk);
 		}
 
 		if (error == 0) {
 			lock_profile_obtain_lock_success(&lk->lock_object,
 			    false, contested, waittime, file, line);
 			LOCK_LOG_LOCK("DRAIN", &lk->lock_object, 0,
 			    lk->lk_recurse, file, line);
 			WITNESS_LOCK(&lk->lock_object, LOP_EXCLUSIVE |
 			    LK_TRYWIT(flags), file, line);
 			TD_LOCKS_INC(curthread);
 			STACK_SAVE(lk);
 		}
 		break;
 	default:
 		if (flags & LK_INTERLOCK)
 			class->lc_unlock(ilk);
 		panic("%s: unknown lockmgr request 0x%x\n", __func__, op);
 	}
 
 	if (flags & LK_INTERLOCK)
 		class->lc_unlock(ilk);
-	if (wakeup_swapper)
-		kick_proc0();
 
 	return (error);
 }
 
 void
 _lockmgr_disown(struct lock *lk, const char *file, int line)
 {
 	uintptr_t tid, x;
 
 	if (SCHEDULER_STOPPED())
 		return;
 
 	tid = (uintptr_t)curthread;
 	_lockmgr_assert(lk, KA_XLOCKED, file, line);
 
 	/*
 	 * Panic if the lock is recursed.
 	 */
 	if (lockmgr_xlocked(lk) && lockmgr_recursed(lk))
 		panic("%s: disown a recursed lockmgr @ %s:%d\n",
 		    __func__,  file, line);
 
 	/*
 	 * If the owner is already LK_KERNPROC just skip the whole operation.
 	 */
 	if (LK_HOLDER(lk->lk_lock) != tid)
 		return;
 	lock_profile_release_lock(&lk->lock_object, false);
 	LOCKSTAT_RECORD1(lockmgr__disown, lk, LOCKSTAT_WRITER);
 	LOCK_LOG_LOCK("XDISOWN", &lk->lock_object, 0, 0, file, line);
 	WITNESS_UNLOCK(&lk->lock_object, LOP_EXCLUSIVE, file, line);
 	TD_LOCKS_DEC(curthread);
 	STACK_SAVE(lk);
 
 	/*
 	 * In order to preserve waiters flags, just spin.
 	 */
 	for (;;) {
 		x = lockmgr_read_value(lk);
 		MPASS((x & LK_EXCLUSIVE_SPINNERS) == 0);
 		x &= LK_ALL_WAITERS;
 		if (atomic_cmpset_rel_ptr(&lk->lk_lock, tid | x,
 		    LK_KERNPROC | x))
 			return;
 		cpu_spinwait();
 	}
 }
 
 void
 lockmgr_printinfo(const struct lock *lk)
 {
 	struct thread *td;
 	uintptr_t x;
 
 	if (lk->lk_lock == LK_UNLOCKED)
 		printf("lock type %s: UNLOCKED\n", lk->lock_object.lo_name);
 	else if (lk->lk_lock & LK_SHARE)
 		printf("lock type %s: SHARED (count %ju)\n",
 		    lk->lock_object.lo_name,
 		    (uintmax_t)LK_SHARERS(lk->lk_lock));
 	else {
 		td = lockmgr_xholder(lk);
 		if (td == (struct thread *)LK_KERNPROC)
 			printf("lock type %s: EXCL by KERNPROC\n",
 			    lk->lock_object.lo_name);
 		else
 			printf("lock type %s: EXCL by thread %p "
 			    "(pid %d, %s, tid %d)\n", lk->lock_object.lo_name,
 			    td, td->td_proc->p_pid, td->td_proc->p_comm,
 			    td->td_tid);
 	}
 
 	x = lk->lk_lock;
 	if (x & LK_EXCLUSIVE_WAITERS)
 		printf(" with exclusive waiters pending\n");
 	if (x & LK_SHARED_WAITERS)
 		printf(" with shared waiters pending\n");
 	if (x & LK_EXCLUSIVE_SPINNERS)
 		printf(" with exclusive spinners pending\n");
 
 	STACK_PRINT(lk);
 }
 
 int
 lockstatus(const struct lock *lk)
 {
 	uintptr_t v, x;
 	int ret;
 
 	ret = LK_SHARED;
 	x = lockmgr_read_value(lk);
 	v = LK_HOLDER(x);
 
 	if ((x & LK_SHARE) == 0) {
 		if (v == (uintptr_t)curthread || v == LK_KERNPROC)
 			ret = LK_EXCLUSIVE;
 		else
 			ret = LK_EXCLOTHER;
 	} else if (x == LK_UNLOCKED)
 		ret = 0;
 
 	return (ret);
 }
 
 #ifdef INVARIANT_SUPPORT
 
 FEATURE(invariant_support,
     "Support for modules compiled with INVARIANTS option");
 
 #ifndef INVARIANTS
 #undef	_lockmgr_assert
 #endif
 
 void
 _lockmgr_assert(const struct lock *lk, int what, const char *file, int line)
 {
 	int slocked = 0;
 
 	if (SCHEDULER_STOPPED())
 		return;
 	switch (what) {
 	case KA_SLOCKED:
 	case KA_SLOCKED | KA_NOTRECURSED:
 	case KA_SLOCKED | KA_RECURSED:
 		slocked = 1;
 	case KA_LOCKED:
 	case KA_LOCKED | KA_NOTRECURSED:
 	case KA_LOCKED | KA_RECURSED:
 #ifdef WITNESS
 
 		/*
 		 * We cannot trust WITNESS if the lock is held in exclusive
 		 * mode and a call to lockmgr_disown() happened.
 		 * Workaround this skipping the check if the lock is held in
 		 * exclusive mode even for the KA_LOCKED case.
 		 */
 		if (slocked || (lk->lk_lock & LK_SHARE)) {
 			witness_assert(&lk->lock_object, what, file, line);
 			break;
 		}
 #endif
 		if (lk->lk_lock == LK_UNLOCKED ||
 		    ((lk->lk_lock & LK_SHARE) == 0 && (slocked ||
 		    (!lockmgr_xlocked(lk) && !lockmgr_disowned(lk)))))
 			panic("Lock %s not %slocked @ %s:%d\n",
 			    lk->lock_object.lo_name, slocked ? "share" : "",
 			    file, line);
 
 		if ((lk->lk_lock & LK_SHARE) == 0) {
 			if (lockmgr_recursed(lk)) {
 				if (what & KA_NOTRECURSED)
 					panic("Lock %s recursed @ %s:%d\n",
 					    lk->lock_object.lo_name, file,
 					    line);
 			} else if (what & KA_RECURSED)
 				panic("Lock %s not recursed @ %s:%d\n",
 				    lk->lock_object.lo_name, file, line);
 		}
 		break;
 	case KA_XLOCKED:
 	case KA_XLOCKED | KA_NOTRECURSED:
 	case KA_XLOCKED | KA_RECURSED:
 		if (!lockmgr_xlocked(lk) && !lockmgr_disowned(lk))
 			panic("Lock %s not exclusively locked @ %s:%d\n",
 			    lk->lock_object.lo_name, file, line);
 		if (lockmgr_recursed(lk)) {
 			if (what & KA_NOTRECURSED)
 				panic("Lock %s recursed @ %s:%d\n",
 				    lk->lock_object.lo_name, file, line);
 		} else if (what & KA_RECURSED)
 			panic("Lock %s not recursed @ %s:%d\n",
 			    lk->lock_object.lo_name, file, line);
 		break;
 	case KA_UNLOCKED:
 		if (lockmgr_xlocked(lk) || lockmgr_disowned(lk))
 			panic("Lock %s exclusively locked @ %s:%d\n",
 			    lk->lock_object.lo_name, file, line);
 		break;
 	default:
 		panic("Unknown lockmgr assertion: %d @ %s:%d\n", what, file,
 		    line);
 	}
 }
 #endif
 
 #ifdef DDB
 int
 lockmgr_chain(struct thread *td, struct thread **ownerp)
 {
 	const struct lock *lk;
 
 	lk = td->td_wchan;
 
 	if (LOCK_CLASS(&lk->lock_object) != &lock_class_lockmgr)
 		return (0);
 	db_printf("blocked on lockmgr %s", lk->lock_object.lo_name);
 	if (lk->lk_lock & LK_SHARE)
 		db_printf("SHARED (count %ju)\n",
 		    (uintmax_t)LK_SHARERS(lk->lk_lock));
 	else
 		db_printf("EXCL\n");
 	*ownerp = lockmgr_xholder(lk);
 
 	return (1);
 }
 
 static void
 db_show_lockmgr(const struct lock_object *lock)
 {
 	struct thread *td;
 	const struct lock *lk;
 
 	lk = (const struct lock *)lock;
 
 	db_printf(" state: ");
 	if (lk->lk_lock == LK_UNLOCKED)
 		db_printf("UNLOCKED\n");
 	else if (lk->lk_lock & LK_SHARE)
 		db_printf("SLOCK: %ju\n", (uintmax_t)LK_SHARERS(lk->lk_lock));
 	else {
 		td = lockmgr_xholder(lk);
 		if (td == (struct thread *)LK_KERNPROC)
 			db_printf("XLOCK: LK_KERNPROC\n");
 		else
 			db_printf("XLOCK: %p (tid %d, pid %d, \"%s\")\n", td,
 			    td->td_tid, td->td_proc->p_pid,
 			    td->td_proc->p_comm);
 		if (lockmgr_recursed(lk))
 			db_printf(" recursed: %d\n", lk->lk_recurse);
 	}
 	db_printf(" waiters: ");
 	switch (lk->lk_lock & LK_ALL_WAITERS) {
 	case LK_SHARED_WAITERS:
 		db_printf("shared\n");
 		break;
 	case LK_EXCLUSIVE_WAITERS:
 		db_printf("exclusive\n");
 		break;
 	case LK_ALL_WAITERS:
 		db_printf("shared and exclusive\n");
 		break;
 	default:
 		db_printf("none\n");
 	}
 	db_printf(" spinners: ");
 	if (lk->lk_lock & LK_EXCLUSIVE_SPINNERS)
 		db_printf("exclusive\n");
 	else
 		db_printf("none\n");
 }
 #endif
diff --git a/sys/kern/kern_sx.c b/sys/kern/kern_sx.c
index 18e6ba232c4a..4cc8ac8ce106 100644
--- a/sys/kern/kern_sx.c
+++ b/sys/kern/kern_sx.c
@@ -1,1575 +1,1563 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause
  *
  * Copyright (c) 2007 Attilio Rao <attilio@freebsd.org>
  * Copyright (c) 2001 Jason Evans <jasone@freebsd.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice(s), this list of conditions and the following disclaimer as
  *    the first lines of this file unmodified other than the possible
  *    addition of one or more copyright notices.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice(s), this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY
  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  * DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY
  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
  * DAMAGE.
  */
 
 /*
  * Shared/exclusive locks.  This implementation attempts to ensure
  * deterministic lock granting behavior, so that slocks and xlocks are
  * interleaved.
  *
  * Priority propagation will not generally raise the priority of lock holders,
  * so should not be relied upon in combination with sx locks.
  */
 
 #include "opt_ddb.h"
 #include "opt_hwpmc_hooks.h"
 #include "opt_no_adaptive_sx.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kdb.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/sched.h>
 #include <sys/sleepqueue.h>
 #include <sys/sx.h>
 #include <sys/smp.h>
 #include <sys/sysctl.h>
 
 #if defined(SMP) && !defined(NO_ADAPTIVE_SX)
 #include <machine/cpu.h>
 #endif
 
 #ifdef DDB
 #include <ddb/ddb.h>
 #endif
 
 #if defined(SMP) && !defined(NO_ADAPTIVE_SX)
 #define	ADAPTIVE_SX
 #endif
 
 #ifdef HWPMC_HOOKS
 #include <sys/pmckern.h>
 PMC_SOFT_DECLARE( , , lock, failed);
 #endif
 
 /* Handy macros for sleep queues. */
 #define	SQ_EXCLUSIVE_QUEUE	0
 #define	SQ_SHARED_QUEUE		1
 
 /*
  * Variations on DROP_GIANT()/PICKUP_GIANT() for use in this file.  We
  * drop Giant anytime we have to sleep or if we adaptively spin.
  */
 #define	GIANT_DECLARE							\
 	int _giantcnt = 0;						\
 	WITNESS_SAVE_DECL(Giant)					\
 
 #define	GIANT_SAVE(work) do {						\
 	if (__predict_false(mtx_owned(&Giant))) {			\
 		work++;							\
 		WITNESS_SAVE(&Giant.lock_object, Giant);		\
 		while (mtx_owned(&Giant)) {				\
 			_giantcnt++;					\
 			mtx_unlock(&Giant);				\
 		}							\
 	}								\
 } while (0)
 
 #define GIANT_RESTORE() do {						\
 	if (_giantcnt > 0) {						\
 		mtx_assert(&Giant, MA_NOTOWNED);			\
 		while (_giantcnt--)					\
 			mtx_lock(&Giant);				\
 		WITNESS_RESTORE(&Giant.lock_object, Giant);		\
 	}								\
 } while (0)
 
 /*
  * Returns true if an exclusive lock is recursed.  It assumes
  * curthread currently has an exclusive lock.
  */
 #define	sx_recursed(sx)		((sx)->sx_recurse != 0)
 
 static void	assert_sx(const struct lock_object *lock, int what);
 #ifdef DDB
 static void	db_show_sx(const struct lock_object *lock);
 #endif
 static void	lock_sx(struct lock_object *lock, uintptr_t how);
 #ifdef KDTRACE_HOOKS
 static int	owner_sx(const struct lock_object *lock, struct thread **owner);
 #endif
 static uintptr_t unlock_sx(struct lock_object *lock);
 
 struct lock_class lock_class_sx = {
 	.lc_name = "sx",
 	.lc_flags = LC_SLEEPLOCK | LC_SLEEPABLE | LC_RECURSABLE | LC_UPGRADABLE,
 	.lc_assert = assert_sx,
 #ifdef DDB
 	.lc_ddb_show = db_show_sx,
 #endif
 	.lc_lock = lock_sx,
 	.lc_unlock = unlock_sx,
 #ifdef KDTRACE_HOOKS
 	.lc_owner = owner_sx,
 #endif
 };
 
 #ifndef INVARIANTS
 #define	_sx_assert(sx, what, file, line)
 #endif
 
 #ifdef ADAPTIVE_SX
 #ifdef SX_CUSTOM_BACKOFF
 static u_short __read_frequently asx_retries;
 static u_short __read_frequently asx_loops;
 static SYSCTL_NODE(_debug, OID_AUTO, sx, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
     "sxlock debugging");
 SYSCTL_U16(_debug_sx, OID_AUTO, retries, CTLFLAG_RW, &asx_retries, 0, "");
 SYSCTL_U16(_debug_sx, OID_AUTO, loops, CTLFLAG_RW, &asx_loops, 0, "");
 
 static struct lock_delay_config __read_frequently sx_delay;
 
 SYSCTL_U16(_debug_sx, OID_AUTO, delay_base, CTLFLAG_RW, &sx_delay.base,
     0, "");
 SYSCTL_U16(_debug_sx, OID_AUTO, delay_max, CTLFLAG_RW, &sx_delay.max,
     0, "");
 
 static void
 sx_lock_delay_init(void *arg __unused)
 {
 
 	lock_delay_default_init(&sx_delay);
 	asx_retries = 10;
 	asx_loops = max(10000, sx_delay.max);
 }
 LOCK_DELAY_SYSINIT(sx_lock_delay_init);
 #else
 #define sx_delay	locks_delay
 #define asx_retries	locks_delay_retries
 #define asx_loops	locks_delay_loops
 #endif
 #endif
 
 void
 assert_sx(const struct lock_object *lock, int what)
 {
 
 	sx_assert((const struct sx *)lock, what);
 }
 
 void
 lock_sx(struct lock_object *lock, uintptr_t how)
 {
 	struct sx *sx;
 
 	sx = (struct sx *)lock;
 	if (how)
 		sx_slock(sx);
 	else
 		sx_xlock(sx);
 }
 
 uintptr_t
 unlock_sx(struct lock_object *lock)
 {
 	struct sx *sx;
 
 	sx = (struct sx *)lock;
 	sx_assert(sx, SA_LOCKED | SA_NOTRECURSED);
 	if (sx_xlocked(sx)) {
 		sx_xunlock(sx);
 		return (0);
 	} else {
 		sx_sunlock(sx);
 		return (1);
 	}
 }
 
 #ifdef KDTRACE_HOOKS
 int
 owner_sx(const struct lock_object *lock, struct thread **owner)
 {
 	const struct sx *sx;
 	uintptr_t x;
 
 	sx = (const struct sx *)lock;
 	x = sx->sx_lock;
 	*owner = NULL;
 	return ((x & SX_LOCK_SHARED) != 0 ? (SX_SHARERS(x) != 0) :
 	    ((*owner = (struct thread *)SX_OWNER(x)) != NULL));
 }
 #endif
 
 void
 sx_sysinit(void *arg)
 {
 	struct sx_args *sargs = arg;
 
 	sx_init_flags(sargs->sa_sx, sargs->sa_desc, sargs->sa_flags);
 }
 
 void
 sx_init_flags(struct sx *sx, const char *description, int opts)
 {
 	int flags;
 
 	MPASS((opts & ~(SX_QUIET | SX_RECURSE | SX_NOWITNESS | SX_DUPOK |
 	    SX_NOPROFILE | SX_NEW)) == 0);
 	ASSERT_ATOMIC_LOAD_PTR(sx->sx_lock,
 	    ("%s: sx_lock not aligned for %s: %p", __func__, description,
 	    &sx->sx_lock));
 
 	flags = LO_SLEEPABLE | LO_UPGRADABLE;
 	if (opts & SX_DUPOK)
 		flags |= LO_DUPOK;
 	if (opts & SX_NOPROFILE)
 		flags |= LO_NOPROFILE;
 	if (!(opts & SX_NOWITNESS))
 		flags |= LO_WITNESS;
 	if (opts & SX_RECURSE)
 		flags |= LO_RECURSABLE;
 	if (opts & SX_QUIET)
 		flags |= LO_QUIET;
 	if (opts & SX_NEW)
 		flags |= LO_NEW;
 
 	lock_init(&sx->lock_object, &lock_class_sx, description, NULL, flags);
 	sx->sx_lock = SX_LOCK_UNLOCKED;
 	sx->sx_recurse = 0;
 }
 
 void
 sx_destroy(struct sx *sx)
 {
 
 	KASSERT(sx->sx_lock == SX_LOCK_UNLOCKED, ("sx lock still held"));
 	KASSERT(sx->sx_recurse == 0, ("sx lock still recursed"));
 	sx->sx_lock = SX_LOCK_DESTROYED;
 	lock_destroy(&sx->lock_object);
 }
 
 int
 sx_try_slock_int(struct sx *sx LOCK_FILE_LINE_ARG_DEF)
 {
 	uintptr_t x;
 
 	if (SCHEDULER_STOPPED())
 		return (1);
 
 	KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread),
 	    ("sx_try_slock() by idle thread %p on sx %s @ %s:%d",
 	    curthread, sx->lock_object.lo_name, file, line));
 
 	x = sx->sx_lock;
 	for (;;) {
 		KASSERT(x != SX_LOCK_DESTROYED,
 		    ("sx_try_slock() of destroyed sx @ %s:%d", file, line));
 		if (!(x & SX_LOCK_SHARED))
 			break;
 		if (atomic_fcmpset_acq_ptr(&sx->sx_lock, &x, x + SX_ONE_SHARER)) {
 			LOCK_LOG_TRY("SLOCK", &sx->lock_object, 0, 1, file, line);
 			WITNESS_LOCK(&sx->lock_object, LOP_TRYLOCK, file, line);
 			LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(sx__acquire,
 			    sx, 0, 0, file, line, LOCKSTAT_READER);
 			TD_LOCKS_INC(curthread);
 			curthread->td_sx_slocks++;
 			return (1);
 		}
 	}
 
 	LOCK_LOG_TRY("SLOCK", &sx->lock_object, 0, 0, file, line);
 	return (0);
 }
 
 int
 sx_try_slock_(struct sx *sx, const char *file, int line)
 {
 
 	return (sx_try_slock_int(sx LOCK_FILE_LINE_ARG));
 }
 
 int
 _sx_xlock(struct sx *sx, int opts, const char *file, int line)
 {
 	uintptr_t tid, x;
 	int error = 0;
 
 	KASSERT(kdb_active != 0 || SCHEDULER_STOPPED() ||
 	    !TD_IS_IDLETHREAD(curthread),
 	    ("sx_xlock() by idle thread %p on sx %s @ %s:%d",
 	    curthread, sx->lock_object.lo_name, file, line));
 	KASSERT(sx->sx_lock != SX_LOCK_DESTROYED,
 	    ("sx_xlock() of destroyed sx @ %s:%d", file, line));
 	WITNESS_CHECKORDER(&sx->lock_object, LOP_NEWORDER | LOP_EXCLUSIVE, file,
 	    line, NULL);
 	tid = (uintptr_t)curthread;
 	x = SX_LOCK_UNLOCKED;
 	if (!atomic_fcmpset_acq_ptr(&sx->sx_lock, &x, tid))
 		error = _sx_xlock_hard(sx, x, opts LOCK_FILE_LINE_ARG);
 	else
 		LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(sx__acquire, sx,
 		    0, 0, file, line, LOCKSTAT_WRITER);
 	if (!error) {
 		LOCK_LOG_LOCK("XLOCK", &sx->lock_object, 0, sx->sx_recurse,
 		    file, line);
 		WITNESS_LOCK(&sx->lock_object, LOP_EXCLUSIVE, file, line);
 		TD_LOCKS_INC(curthread);
 	}
 
 	return (error);
 }
 
 int
 sx_try_xlock_int(struct sx *sx LOCK_FILE_LINE_ARG_DEF)
 {
 	struct thread *td;
 	uintptr_t tid, x;
 	int rval;
 	bool recursed;
 
 	td = curthread;
 	tid = (uintptr_t)td;
 	if (SCHEDULER_STOPPED())
 		return (1);
 
 	KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(td),
 	    ("sx_try_xlock() by idle thread %p on sx %s @ %s:%d",
 	    curthread, sx->lock_object.lo_name, file, line));
 	KASSERT(sx->sx_lock != SX_LOCK_DESTROYED,
 	    ("sx_try_xlock() of destroyed sx @ %s:%d", file, line));
 
 	rval = 1;
 	recursed = false;
 	x = SX_LOCK_UNLOCKED;
 	for (;;) {
 		if (atomic_fcmpset_acq_ptr(&sx->sx_lock, &x, tid))
 			break;
 		if (x == SX_LOCK_UNLOCKED)
 			continue;
 		if (x == tid && (sx->lock_object.lo_flags & LO_RECURSABLE)) {
 			sx->sx_recurse++;
 			atomic_set_ptr(&sx->sx_lock, SX_LOCK_RECURSED);
 			break;
 		}
 		rval = 0;
 		break;
 	}
 
 	LOCK_LOG_TRY("XLOCK", &sx->lock_object, 0, rval, file, line);
 	if (rval) {
 		WITNESS_LOCK(&sx->lock_object, LOP_EXCLUSIVE | LOP_TRYLOCK,
 		    file, line);
 		if (!recursed)
 			LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(sx__acquire,
 			    sx, 0, 0, file, line, LOCKSTAT_WRITER);
 		TD_LOCKS_INC(curthread);
 	}
 
 	return (rval);
 }
 
 int
 sx_try_xlock_(struct sx *sx, const char *file, int line)
 {
 
 	return (sx_try_xlock_int(sx LOCK_FILE_LINE_ARG));
 }
 
 void
 _sx_xunlock(struct sx *sx, const char *file, int line)
 {
 
 	KASSERT(sx->sx_lock != SX_LOCK_DESTROYED,
 	    ("sx_xunlock() of destroyed sx @ %s:%d", file, line));
 	_sx_assert(sx, SA_XLOCKED, file, line);
 	WITNESS_UNLOCK(&sx->lock_object, LOP_EXCLUSIVE, file, line);
 	LOCK_LOG_LOCK("XUNLOCK", &sx->lock_object, 0, sx->sx_recurse, file,
 	    line);
 #if LOCK_DEBUG > 0
 	_sx_xunlock_hard(sx, (uintptr_t)curthread, file, line);
 #else
 	__sx_xunlock(sx, curthread, file, line);
 #endif
 	TD_LOCKS_DEC(curthread);
 }
 
 /*
  * Try to do a non-blocking upgrade from a shared lock to an exclusive lock.
  * This will only succeed if this thread holds a single shared lock.
  * Return 1 if if the upgrade succeed, 0 otherwise.
  */
 int
 sx_try_upgrade_int(struct sx *sx LOCK_FILE_LINE_ARG_DEF)
 {
 	uintptr_t x;
 	uintptr_t waiters;
 	int success;
 
 	if (SCHEDULER_STOPPED())
 		return (1);
 
 	KASSERT(sx->sx_lock != SX_LOCK_DESTROYED,
 	    ("sx_try_upgrade() of destroyed sx @ %s:%d", file, line));
 	_sx_assert(sx, SA_SLOCKED, file, line);
 
 	/*
 	 * Try to switch from one shared lock to an exclusive lock.  We need
 	 * to maintain the SX_LOCK_EXCLUSIVE_WAITERS flag if set so that
 	 * we will wake up the exclusive waiters when we drop the lock.
 	 */
 	success = 0;
 	x = SX_READ_VALUE(sx);
 	for (;;) {
 		if (SX_SHARERS(x) > 1)
 			break;
 		waiters = (x & SX_LOCK_WAITERS);
 		if (atomic_fcmpset_acq_ptr(&sx->sx_lock, &x,
 		    (uintptr_t)curthread | waiters)) {
 			success = 1;
 			break;
 		}
 	}
 	LOCK_LOG_TRY("XUPGRADE", &sx->lock_object, 0, success, file, line);
 	if (success) {
 		curthread->td_sx_slocks--;
 		WITNESS_UPGRADE(&sx->lock_object, LOP_EXCLUSIVE | LOP_TRYLOCK,
 		    file, line);
 		LOCKSTAT_RECORD0(sx__upgrade, sx);
 	}
 	return (success);
 }
 
 int
 sx_try_upgrade_(struct sx *sx, const char *file, int line)
 {
 
 	return (sx_try_upgrade_int(sx LOCK_FILE_LINE_ARG));
 }
 
 /*
  * Downgrade an unrecursed exclusive lock into a single shared lock.
  */
 void
 sx_downgrade_int(struct sx *sx LOCK_FILE_LINE_ARG_DEF)
 {
 	uintptr_t x;
-	int wakeup_swapper;
 
 	if (SCHEDULER_STOPPED())
 		return;
 
 	KASSERT(sx->sx_lock != SX_LOCK_DESTROYED,
 	    ("sx_downgrade() of destroyed sx @ %s:%d", file, line));
 	_sx_assert(sx, SA_XLOCKED | SA_NOTRECURSED, file, line);
 #ifndef INVARIANTS
 	if (sx_recursed(sx))
 		panic("downgrade of a recursed lock");
 #endif
 
 	WITNESS_DOWNGRADE(&sx->lock_object, 0, file, line);
 
 	/*
 	 * Try to switch from an exclusive lock with no shared waiters
 	 * to one sharer with no shared waiters.  If there are
 	 * exclusive waiters, we don't need to lock the sleep queue so
 	 * long as we preserve the flag.  We do one quick try and if
 	 * that fails we grab the sleepq lock to keep the flags from
 	 * changing and do it the slow way.
 	 *
 	 * We have to lock the sleep queue if there are shared waiters
 	 * so we can wake them up.
 	 */
 	x = sx->sx_lock;
 	if (!(x & SX_LOCK_SHARED_WAITERS) &&
 	    atomic_cmpset_rel_ptr(&sx->sx_lock, x, SX_SHARERS_LOCK(1) |
 	    (x & SX_LOCK_EXCLUSIVE_WAITERS)))
 		goto out;
 
 	/*
 	 * Lock the sleep queue so we can read the waiters bits
 	 * without any races and wakeup any shared waiters.
 	 */
 	sleepq_lock(&sx->lock_object);
 
 	/*
 	 * Preserve SX_LOCK_EXCLUSIVE_WAITERS while downgraded to a single
 	 * shared lock.  If there are any shared waiters, wake them up.
 	 */
-	wakeup_swapper = 0;
 	x = sx->sx_lock;
 	atomic_store_rel_ptr(&sx->sx_lock, SX_SHARERS_LOCK(1) |
 	    (x & SX_LOCK_EXCLUSIVE_WAITERS));
 	if (x & SX_LOCK_SHARED_WAITERS)
-		wakeup_swapper = sleepq_broadcast(&sx->lock_object, SLEEPQ_SX,
-		    0, SQ_SHARED_QUEUE);
+		sleepq_broadcast(&sx->lock_object, SLEEPQ_SX, 0,
+		    SQ_SHARED_QUEUE);
 	sleepq_release(&sx->lock_object);
 
-	if (wakeup_swapper)
-		kick_proc0();
-
 out:
 	curthread->td_sx_slocks++;
 	LOCK_LOG_LOCK("XDOWNGRADE", &sx->lock_object, 0, 0, file, line);
 	LOCKSTAT_RECORD0(sx__downgrade, sx);
 }
 
 void
 sx_downgrade_(struct sx *sx, const char *file, int line)
 {
 
 	sx_downgrade_int(sx LOCK_FILE_LINE_ARG);
 }
 
 #ifdef	ADAPTIVE_SX
 static inline void
 sx_drop_critical(uintptr_t x, bool *in_critical, int *extra_work)
 {
 
 	if (x & SX_LOCK_WRITE_SPINNER)
 		return;
 	if (*in_critical) {
 		critical_exit();
 		*in_critical = false;
 		(*extra_work)--;
 	}
 }
 #else
 #define sx_drop_critical(x, in_critical, extra_work) do { } while (0)
 #endif
 
 /*
  * This function represents the so-called 'hard case' for sx_xlock
  * operation.  All 'easy case' failures are redirected to this.  Note
  * that ideally this would be a static function, but it needs to be
  * accessible from at least sx.h.
  */
 int
 _sx_xlock_hard(struct sx *sx, uintptr_t x, int opts LOCK_FILE_LINE_ARG_DEF)
 {
 	GIANT_DECLARE;
 	uintptr_t tid, setx;
 #ifdef ADAPTIVE_SX
 	struct thread *owner;
 	u_int i, n, spintries = 0;
 	enum { READERS, WRITER } sleep_reason = READERS;
 	bool in_critical = false;
 #endif
 #ifdef LOCK_PROFILING
 	uint64_t waittime = 0;
 	int contested = 0;
 #endif
 	int error = 0;
 #if defined(ADAPTIVE_SX) || defined(KDTRACE_HOOKS)
 	struct lock_delay_arg lda;
 #endif
 #ifdef	KDTRACE_HOOKS
 	u_int sleep_cnt = 0;
 	int64_t sleep_time = 0;
 	int64_t all_time = 0;
 #endif
 #if defined(KDTRACE_HOOKS) || defined(LOCK_PROFILING)
 	uintptr_t state = 0;
 	int doing_lockprof = 0;
 #endif
 	int extra_work = 0;
 
 	tid = (uintptr_t)curthread;
 
 #ifdef KDTRACE_HOOKS
 	if (LOCKSTAT_PROFILE_ENABLED(sx__acquire)) {
 		while (x == SX_LOCK_UNLOCKED) {
 			if (atomic_fcmpset_acq_ptr(&sx->sx_lock, &x, tid))
 				goto out_lockstat;
 		}
 		extra_work = 1;
 		doing_lockprof = 1;
 		all_time -= lockstat_nsecs(&sx->lock_object);
 		state = x;
 	}
 #endif
 #ifdef LOCK_PROFILING
 	extra_work = 1;
 	doing_lockprof = 1;
 	state = x;
 #endif
 
 	if (SCHEDULER_STOPPED())
 		return (0);
 
 	if (__predict_false(x == SX_LOCK_UNLOCKED))
 		x = SX_READ_VALUE(sx);
 
 	/* If we already hold an exclusive lock, then recurse. */
 	if (__predict_false(lv_sx_owner(x) == (struct thread *)tid)) {
 		KASSERT((sx->lock_object.lo_flags & LO_RECURSABLE) != 0,
 	    ("_sx_xlock_hard: recursed on non-recursive sx %s @ %s:%d\n",
 		    sx->lock_object.lo_name, file, line));
 		sx->sx_recurse++;
 		atomic_set_ptr(&sx->sx_lock, SX_LOCK_RECURSED);
 		if (LOCK_LOG_TEST(&sx->lock_object, 0))
 			CTR2(KTR_LOCK, "%s: %p recursing", __func__, sx);
 		return (0);
 	}
 
 	if (LOCK_LOG_TEST(&sx->lock_object, 0))
 		CTR5(KTR_LOCK, "%s: %s contested (lock=%p) at %s:%d", __func__,
 		    sx->lock_object.lo_name, (void *)sx->sx_lock, file, line);
 
 #if defined(ADAPTIVE_SX)
 	lock_delay_arg_init(&lda, &sx_delay);
 #elif defined(KDTRACE_HOOKS)
 	lock_delay_arg_init_noadapt(&lda);
 #endif
 
 #ifdef HWPMC_HOOKS
 	PMC_SOFT_CALL( , , lock, failed);
 #endif
 	lock_profile_obtain_lock_failed(&sx->lock_object, false, &contested,
 	    &waittime);
 
 #ifndef INVARIANTS
 	GIANT_SAVE(extra_work);
 #endif
 
 	THREAD_CONTENDS_ON_LOCK(&sx->lock_object);
 
 	for (;;) {
 		if (x == SX_LOCK_UNLOCKED) {
 			if (atomic_fcmpset_acq_ptr(&sx->sx_lock, &x, tid))
 				break;
 			continue;
 		}
 #ifdef INVARIANTS
 		GIANT_SAVE(extra_work);
 #endif
 #ifdef KDTRACE_HOOKS
 		lda.spin_cnt++;
 #endif
 #ifdef ADAPTIVE_SX
 		if (x == (SX_LOCK_SHARED | SX_LOCK_WRITE_SPINNER)) {
 			if (atomic_fcmpset_acq_ptr(&sx->sx_lock, &x, tid))
 				break;
 			continue;
 		}
 
 		/*
 		 * If the lock is write locked and the owner is
 		 * running on another CPU, spin until the owner stops
 		 * running or the state of the lock changes.
 		 */
 		if ((x & SX_LOCK_SHARED) == 0) {
 			sx_drop_critical(x, &in_critical, &extra_work);
 			sleep_reason = WRITER;
 			owner = lv_sx_owner(x);
 			if (!TD_IS_RUNNING(owner))
 				goto sleepq;
 			if (LOCK_LOG_TEST(&sx->lock_object, 0))
 				CTR3(KTR_LOCK, "%s: spinning on %p held by %p",
 				    __func__, sx, owner);
 			KTR_STATE1(KTR_SCHED, "thread", sched_tdname(curthread),
 			    "spinning", "lockname:\"%s\"",
 			    sx->lock_object.lo_name);
 			do {
 				lock_delay(&lda);
 				x = SX_READ_VALUE(sx);
 				owner = lv_sx_owner(x);
 			} while (owner != NULL && TD_IS_RUNNING(owner));
 			KTR_STATE0(KTR_SCHED, "thread", sched_tdname(curthread),
 			    "running");
 			continue;
 		} else if (SX_SHARERS(x) > 0) {
 			sleep_reason = READERS;
 			if (spintries == asx_retries)
 				goto sleepq;
 			if (!(x & SX_LOCK_WRITE_SPINNER)) {
 				if (!in_critical) {
 					critical_enter();
 					in_critical = true;
 					extra_work++;
 				}
 				if (!atomic_fcmpset_ptr(&sx->sx_lock, &x,
 				    x | SX_LOCK_WRITE_SPINNER)) {
 					critical_exit();
 					in_critical = false;
 					extra_work--;
 					continue;
 				}
 			}
 			spintries++;
 			KTR_STATE1(KTR_SCHED, "thread", sched_tdname(curthread),
 			    "spinning", "lockname:\"%s\"",
 			    sx->lock_object.lo_name);
 			n = SX_SHARERS(x);
 			for (i = 0; i < asx_loops; i += n) {
 				lock_delay_spin(n);
 				x = SX_READ_VALUE(sx);
 				if (!(x & SX_LOCK_WRITE_SPINNER))
 					break;
 				if (!(x & SX_LOCK_SHARED))
 					break;
 				n = SX_SHARERS(x);
 				if (n == 0)
 					break;
 			}
 #ifdef KDTRACE_HOOKS
 			lda.spin_cnt += i;
 #endif
 			KTR_STATE0(KTR_SCHED, "thread", sched_tdname(curthread),
 			    "running");
 			if (i < asx_loops)
 				continue;
 		}
 sleepq:
 #endif
 		sleepq_lock(&sx->lock_object);
 		x = SX_READ_VALUE(sx);
 retry_sleepq:
 
 		/*
 		 * If the lock was released while spinning on the
 		 * sleep queue chain lock, try again.
 		 */
 		if (x == SX_LOCK_UNLOCKED) {
 			sleepq_release(&sx->lock_object);
 			sx_drop_critical(x, &in_critical, &extra_work);
 			continue;
 		}
 
 #ifdef ADAPTIVE_SX
 		/*
 		 * The current lock owner might have started executing
 		 * on another CPU (or the lock could have changed
 		 * owners) while we were waiting on the sleep queue
 		 * chain lock.  If so, drop the sleep queue lock and try
 		 * again.
 		 */
 		if (!(x & SX_LOCK_SHARED)) {
 			owner = (struct thread *)SX_OWNER(x);
 			if (TD_IS_RUNNING(owner)) {
 				sleepq_release(&sx->lock_object);
 				sx_drop_critical(x, &in_critical,
 				    &extra_work);
 				continue;
 			}
 		} else if (SX_SHARERS(x) > 0 && sleep_reason == WRITER) {
 			sleepq_release(&sx->lock_object);
 			sx_drop_critical(x, &in_critical, &extra_work);
 			continue;
 		}
 #endif
 
 		/*
 		 * If an exclusive lock was released with both shared
 		 * and exclusive waiters and a shared waiter hasn't
 		 * woken up and acquired the lock yet, sx_lock will be
 		 * set to SX_LOCK_UNLOCKED | SX_LOCK_EXCLUSIVE_WAITERS.
 		 * If we see that value, try to acquire it once.  Note
 		 * that we have to preserve SX_LOCK_EXCLUSIVE_WAITERS
 		 * as there are other exclusive waiters still.  If we
 		 * fail, restart the loop.
 		 */
 		setx = x & (SX_LOCK_WAITERS | SX_LOCK_WRITE_SPINNER);
 		if ((x & ~setx) == SX_LOCK_SHARED) {
 			setx &= ~SX_LOCK_WRITE_SPINNER;
 			if (!atomic_fcmpset_acq_ptr(&sx->sx_lock, &x, tid | setx))
 				goto retry_sleepq;
 			sleepq_release(&sx->lock_object);
 			CTR2(KTR_LOCK, "%s: %p claimed by new writer",
 			    __func__, sx);
 			break;
 		}
 
 #ifdef ADAPTIVE_SX
 		/*
 		 * It is possible we set the SX_LOCK_WRITE_SPINNER bit.
 		 * It is an invariant that when the bit is set, there is
 		 * a writer ready to grab the lock. Thus clear the bit since
 		 * we are going to sleep.
 		 */
 		if (in_critical) {
 			if ((x & SX_LOCK_WRITE_SPINNER) ||
 			    !((x & SX_LOCK_EXCLUSIVE_WAITERS))) {
 				setx = x & ~SX_LOCK_WRITE_SPINNER;
 				setx |= SX_LOCK_EXCLUSIVE_WAITERS;
 				if (!atomic_fcmpset_ptr(&sx->sx_lock, &x,
 				    setx)) {
 					goto retry_sleepq;
 				}
 			}
 			critical_exit();
 			in_critical = false;
 		} else {
 #endif
 			/*
 			 * Try to set the SX_LOCK_EXCLUSIVE_WAITERS.  If we fail,
 			 * than loop back and retry.
 			 */
 			if (!(x & SX_LOCK_EXCLUSIVE_WAITERS)) {
 				if (!atomic_fcmpset_ptr(&sx->sx_lock, &x,
 				    x | SX_LOCK_EXCLUSIVE_WAITERS)) {
 					goto retry_sleepq;
 				}
 				if (LOCK_LOG_TEST(&sx->lock_object, 0))
 					CTR2(KTR_LOCK, "%s: %p set excl waiters flag",
 					    __func__, sx);
 			}
 #ifdef ADAPTIVE_SX
 		}
 #endif
 
 		/*
 		 * Since we have been unable to acquire the exclusive
 		 * lock and the exclusive waiters flag is set, we have
 		 * to sleep.
 		 */
 		if (LOCK_LOG_TEST(&sx->lock_object, 0))
 			CTR2(KTR_LOCK, "%s: %p blocking on sleep queue",
 			    __func__, sx);
 
 #ifdef KDTRACE_HOOKS
 		sleep_time -= lockstat_nsecs(&sx->lock_object);
 #endif
 		sleepq_add(&sx->lock_object, NULL, sx->lock_object.lo_name,
 		    SLEEPQ_SX | ((opts & SX_INTERRUPTIBLE) ?
 		    SLEEPQ_INTERRUPTIBLE : 0), SQ_EXCLUSIVE_QUEUE);
 		/*
 		 * Hack: this can land in thread_suspend_check which will
 		 * conditionally take a mutex, tripping over an assert if a
 		 * lock we are waiting for is set.
 		 */
 		THREAD_CONTENTION_DONE(&sx->lock_object);
 		if (!(opts & SX_INTERRUPTIBLE))
 			sleepq_wait(&sx->lock_object, 0);
 		else
 			error = sleepq_wait_sig(&sx->lock_object, 0);
 		THREAD_CONTENDS_ON_LOCK(&sx->lock_object);
 #ifdef KDTRACE_HOOKS
 		sleep_time += lockstat_nsecs(&sx->lock_object);
 		sleep_cnt++;
 #endif
 		if (error) {
 			if (LOCK_LOG_TEST(&sx->lock_object, 0))
 				CTR2(KTR_LOCK,
 			"%s: interruptible sleep by %p suspended by signal",
 				    __func__, sx);
 			break;
 		}
 		if (LOCK_LOG_TEST(&sx->lock_object, 0))
 			CTR2(KTR_LOCK, "%s: %p resuming from sleep queue",
 			    __func__, sx);
 		x = SX_READ_VALUE(sx);
 	}
 	THREAD_CONTENTION_DONE(&sx->lock_object);
 	if (__predict_true(!extra_work))
 		return (error);
 #ifdef ADAPTIVE_SX
 	if (in_critical)
 		critical_exit();
 #endif
 	GIANT_RESTORE();
 #if defined(KDTRACE_HOOKS) || defined(LOCK_PROFILING)
 	if (__predict_true(!doing_lockprof))
 		return (error);
 #endif
 #ifdef KDTRACE_HOOKS
 	all_time += lockstat_nsecs(&sx->lock_object);
 	if (sleep_time)
 		LOCKSTAT_RECORD4(sx__block, sx, sleep_time,
 		    LOCKSTAT_WRITER, (state & SX_LOCK_SHARED) == 0,
 		    (state & SX_LOCK_SHARED) == 0 ? 0 : SX_SHARERS(state));
 	if (lda.spin_cnt > sleep_cnt)
 		LOCKSTAT_RECORD4(sx__spin, sx, all_time - sleep_time,
 		    LOCKSTAT_WRITER, (state & SX_LOCK_SHARED) == 0,
 		    (state & SX_LOCK_SHARED) == 0 ? 0 : SX_SHARERS(state));
 out_lockstat:
 #endif
 	if (!error)
 		LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(sx__acquire, sx,
 		    contested, waittime, file, line, LOCKSTAT_WRITER);
 	return (error);
 }
 
 /*
  * This function represents the so-called 'hard case' for sx_xunlock
  * operation.  All 'easy case' failures are redirected to this.  Note
  * that ideally this would be a static function, but it needs to be
  * accessible from at least sx.h.
  */
 void
 _sx_xunlock_hard(struct sx *sx, uintptr_t x LOCK_FILE_LINE_ARG_DEF)
 {
 	uintptr_t tid, setx;
-	int queue, wakeup_swapper;
+	int queue;
 
 	if (SCHEDULER_STOPPED())
 		return;
 
 	tid = (uintptr_t)curthread;
 
 	if (__predict_false(x == tid))
 		x = SX_READ_VALUE(sx);
 
 	MPASS(!(x & SX_LOCK_SHARED));
 
 	if (__predict_false(x & SX_LOCK_RECURSED)) {
 		/* The lock is recursed, unrecurse one level. */
 		if ((--sx->sx_recurse) == 0)
 			atomic_clear_ptr(&sx->sx_lock, SX_LOCK_RECURSED);
 		if (LOCK_LOG_TEST(&sx->lock_object, 0))
 			CTR2(KTR_LOCK, "%s: %p unrecursing", __func__, sx);
 		return;
 	}
 
 	LOCKSTAT_PROFILE_RELEASE_RWLOCK(sx__release, sx, LOCKSTAT_WRITER);
 	if (x == tid &&
 	    atomic_cmpset_rel_ptr(&sx->sx_lock, tid, SX_LOCK_UNLOCKED))
 		return;
 
 	if (LOCK_LOG_TEST(&sx->lock_object, 0))
 		CTR2(KTR_LOCK, "%s: %p contested", __func__, sx);
 
 	sleepq_lock(&sx->lock_object);
 	x = SX_READ_VALUE(sx);
 	MPASS(x & (SX_LOCK_SHARED_WAITERS | SX_LOCK_EXCLUSIVE_WAITERS));
 
 	/*
 	 * The wake up algorithm here is quite simple and probably not
 	 * ideal.  It gives precedence to shared waiters if they are
 	 * present.  For this condition, we have to preserve the
 	 * state of the exclusive waiters flag.
 	 * If interruptible sleeps left the shared queue empty avoid a
 	 * starvation for the threads sleeping on the exclusive queue by giving
 	 * them precedence and cleaning up the shared waiters bit anyway.
 	 */
 	setx = SX_LOCK_UNLOCKED;
 	queue = SQ_SHARED_QUEUE;
 	if ((x & SX_LOCK_EXCLUSIVE_WAITERS) != 0 &&
 	    sleepq_sleepcnt(&sx->lock_object, SQ_EXCLUSIVE_QUEUE) != 0) {
 		queue = SQ_EXCLUSIVE_QUEUE;
 		setx |= (x & SX_LOCK_SHARED_WAITERS);
 	}
 	atomic_store_rel_ptr(&sx->sx_lock, setx);
 
 	/* Wake up all the waiters for the specific queue. */
 	if (LOCK_LOG_TEST(&sx->lock_object, 0))
 		CTR3(KTR_LOCK, "%s: %p waking up all threads on %s queue",
 		    __func__, sx, queue == SQ_SHARED_QUEUE ? "shared" :
 		    "exclusive");
 
-	wakeup_swapper = sleepq_broadcast(&sx->lock_object, SLEEPQ_SX, 0,
-	    queue);
+	sleepq_broadcast(&sx->lock_object, SLEEPQ_SX, 0, queue);
 	sleepq_release(&sx->lock_object);
-	if (wakeup_swapper)
-		kick_proc0();
 }
 
 static __always_inline bool
 __sx_can_read(struct thread *td, uintptr_t x, bool fp)
 {
 
 	if ((x & (SX_LOCK_SHARED | SX_LOCK_EXCLUSIVE_WAITERS | SX_LOCK_WRITE_SPINNER))
 			== SX_LOCK_SHARED)
 		return (true);
 	if (!fp && td->td_sx_slocks && (x & SX_LOCK_SHARED))
 		return (true);
 	return (false);
 }
 
 static __always_inline bool
 __sx_slock_try(struct sx *sx, struct thread *td, uintptr_t *xp, bool fp
     LOCK_FILE_LINE_ARG_DEF)
 {
 
 	/*
 	 * If no other thread has an exclusive lock then try to bump up
 	 * the count of sharers.  Since we have to preserve the state
 	 * of SX_LOCK_EXCLUSIVE_WAITERS, if we fail to acquire the
 	 * shared lock loop back and retry.
 	 */
 	while (__sx_can_read(td, *xp, fp)) {
 		if (atomic_fcmpset_acq_ptr(&sx->sx_lock, xp,
 		    *xp + SX_ONE_SHARER)) {
 			if (LOCK_LOG_TEST(&sx->lock_object, 0))
 				CTR4(KTR_LOCK, "%s: %p succeed %p -> %p",
 				    __func__, sx, (void *)*xp,
 				    (void *)(*xp + SX_ONE_SHARER));
 			td->td_sx_slocks++;
 			return (true);
 		}
 	}
 	return (false);
 }
 
 static int __noinline
 _sx_slock_hard(struct sx *sx, int opts, uintptr_t x LOCK_FILE_LINE_ARG_DEF)
 {
 	GIANT_DECLARE;
 	struct thread *td;
 #ifdef ADAPTIVE_SX
 	struct thread *owner;
 	u_int i, n, spintries = 0;
 #endif
 #ifdef LOCK_PROFILING
 	uint64_t waittime = 0;
 	int contested = 0;
 #endif
 	int error = 0;
 #if defined(ADAPTIVE_SX) || defined(KDTRACE_HOOKS)
 	struct lock_delay_arg lda;
 #endif
 #ifdef KDTRACE_HOOKS
 	u_int sleep_cnt = 0;
 	int64_t sleep_time = 0;
 	int64_t all_time = 0;
 #endif
 #if defined(KDTRACE_HOOKS) || defined(LOCK_PROFILING)
 	uintptr_t state = 0;
 #endif
 	int extra_work __sdt_used = 0;
 
 	td = curthread;
 
 #ifdef KDTRACE_HOOKS
 	if (LOCKSTAT_PROFILE_ENABLED(sx__acquire)) {
 		if (__sx_slock_try(sx, td, &x, false LOCK_FILE_LINE_ARG))
 			goto out_lockstat;
 		extra_work = 1;
 		all_time -= lockstat_nsecs(&sx->lock_object);
 		state = x;
 	}
 #endif
 #ifdef LOCK_PROFILING
 	extra_work = 1;
 	state = x;
 #endif
 
 	if (SCHEDULER_STOPPED())
 		return (0);
 
 #if defined(ADAPTIVE_SX)
 	lock_delay_arg_init(&lda, &sx_delay);
 #elif defined(KDTRACE_HOOKS)
 	lock_delay_arg_init_noadapt(&lda);
 #endif
 
 #ifdef HWPMC_HOOKS
 	PMC_SOFT_CALL( , , lock, failed);
 #endif
 	lock_profile_obtain_lock_failed(&sx->lock_object, false, &contested,
 	    &waittime);
 
 #ifndef INVARIANTS
 	GIANT_SAVE(extra_work);
 #endif
 
 	THREAD_CONTENDS_ON_LOCK(&sx->lock_object);
 
 	/*
 	 * As with rwlocks, we don't make any attempt to try to block
 	 * shared locks once there is an exclusive waiter.
 	 */
 	for (;;) {
 		if (__sx_slock_try(sx, td, &x, false LOCK_FILE_LINE_ARG))
 			break;
 #ifdef INVARIANTS
 		GIANT_SAVE(extra_work);
 #endif
 #ifdef KDTRACE_HOOKS
 		lda.spin_cnt++;
 #endif
 
 #ifdef ADAPTIVE_SX
 		/*
 		 * If the owner is running on another CPU, spin until
 		 * the owner stops running or the state of the lock
 		 * changes.
 		 */
 		if ((x & SX_LOCK_SHARED) == 0) {
 			owner = lv_sx_owner(x);
 			if (TD_IS_RUNNING(owner)) {
 				if (LOCK_LOG_TEST(&sx->lock_object, 0))
 					CTR3(KTR_LOCK,
 					    "%s: spinning on %p held by %p",
 					    __func__, sx, owner);
 				KTR_STATE1(KTR_SCHED, "thread",
 				    sched_tdname(curthread), "spinning",
 				    "lockname:\"%s\"", sx->lock_object.lo_name);
 				do {
 					lock_delay(&lda);
 					x = SX_READ_VALUE(sx);
 					owner = lv_sx_owner(x);
 				} while (owner != NULL && TD_IS_RUNNING(owner));
 				KTR_STATE0(KTR_SCHED, "thread",
 				    sched_tdname(curthread), "running");
 				continue;
 			}
 		} else {
 			if ((x & SX_LOCK_WRITE_SPINNER) && SX_SHARERS(x) == 0) {
 				MPASS(!__sx_can_read(td, x, false));
 				lock_delay_spin(2);
 				x = SX_READ_VALUE(sx);
 				continue;
 			}
 			if (spintries < asx_retries) {
 				KTR_STATE1(KTR_SCHED, "thread", sched_tdname(curthread),
 				    "spinning", "lockname:\"%s\"",
 				    sx->lock_object.lo_name);
 				n = SX_SHARERS(x);
 				for (i = 0; i < asx_loops; i += n) {
 					lock_delay_spin(n);
 					x = SX_READ_VALUE(sx);
 					if (!(x & SX_LOCK_SHARED))
 						break;
 					n = SX_SHARERS(x);
 					if (n == 0)
 						break;
 					if (__sx_can_read(td, x, false))
 						break;
 				}
 #ifdef KDTRACE_HOOKS
 				lda.spin_cnt += i;
 #endif
 				KTR_STATE0(KTR_SCHED, "thread", sched_tdname(curthread),
 				    "running");
 				if (i < asx_loops)
 					continue;
 			}
 		}
 #endif
 
 		/*
 		 * Some other thread already has an exclusive lock, so
 		 * start the process of blocking.
 		 */
 		sleepq_lock(&sx->lock_object);
 		x = SX_READ_VALUE(sx);
 retry_sleepq:
 		if (((x & SX_LOCK_WRITE_SPINNER) && SX_SHARERS(x) == 0) ||
 		    __sx_can_read(td, x, false)) {
 			sleepq_release(&sx->lock_object);
 			continue;
 		}
 
 #ifdef ADAPTIVE_SX
 		/*
 		 * If the owner is running on another CPU, spin until
 		 * the owner stops running or the state of the lock
 		 * changes.
 		 */
 		if (!(x & SX_LOCK_SHARED)) {
 			owner = (struct thread *)SX_OWNER(x);
 			if (TD_IS_RUNNING(owner)) {
 				sleepq_release(&sx->lock_object);
 				x = SX_READ_VALUE(sx);
 				continue;
 			}
 		}
 #endif
 
 		/*
 		 * Try to set the SX_LOCK_SHARED_WAITERS flag.  If we
 		 * fail to set it drop the sleep queue lock and loop
 		 * back.
 		 */
 		if (!(x & SX_LOCK_SHARED_WAITERS)) {
 			if (!atomic_fcmpset_ptr(&sx->sx_lock, &x,
 			    x | SX_LOCK_SHARED_WAITERS))
 				goto retry_sleepq;
 			if (LOCK_LOG_TEST(&sx->lock_object, 0))
 				CTR2(KTR_LOCK, "%s: %p set shared waiters flag",
 				    __func__, sx);
 		}
 
 		/*
 		 * Since we have been unable to acquire the shared lock,
 		 * we have to sleep.
 		 */
 		if (LOCK_LOG_TEST(&sx->lock_object, 0))
 			CTR2(KTR_LOCK, "%s: %p blocking on sleep queue",
 			    __func__, sx);
 
 #ifdef KDTRACE_HOOKS
 		sleep_time -= lockstat_nsecs(&sx->lock_object);
 #endif
 		sleepq_add(&sx->lock_object, NULL, sx->lock_object.lo_name,
 		    SLEEPQ_SX | ((opts & SX_INTERRUPTIBLE) ?
 		    SLEEPQ_INTERRUPTIBLE : 0), SQ_SHARED_QUEUE);
 		/*
 		 * Hack: this can land in thread_suspend_check which will
 		 * conditionally take a mutex, tripping over an assert if a
 		 * lock we are waiting for is set.
 		 */
 		THREAD_CONTENTION_DONE(&sx->lock_object);
 		if (!(opts & SX_INTERRUPTIBLE))
 			sleepq_wait(&sx->lock_object, 0);
 		else
 			error = sleepq_wait_sig(&sx->lock_object, 0);
 		THREAD_CONTENDS_ON_LOCK(&sx->lock_object);
 #ifdef KDTRACE_HOOKS
 		sleep_time += lockstat_nsecs(&sx->lock_object);
 		sleep_cnt++;
 #endif
 		if (error) {
 			if (LOCK_LOG_TEST(&sx->lock_object, 0))
 				CTR2(KTR_LOCK,
 			"%s: interruptible sleep by %p suspended by signal",
 				    __func__, sx);
 			break;
 		}
 		if (LOCK_LOG_TEST(&sx->lock_object, 0))
 			CTR2(KTR_LOCK, "%s: %p resuming from sleep queue",
 			    __func__, sx);
 		x = SX_READ_VALUE(sx);
 	}
 	THREAD_CONTENTION_DONE(&sx->lock_object);
 #if defined(KDTRACE_HOOKS) || defined(LOCK_PROFILING)
 	if (__predict_true(!extra_work))
 		return (error);
 #endif
 #ifdef KDTRACE_HOOKS
 	all_time += lockstat_nsecs(&sx->lock_object);
 	if (sleep_time)
 		LOCKSTAT_RECORD4(sx__block, sx, sleep_time,
 		    LOCKSTAT_READER, (state & SX_LOCK_SHARED) == 0,
 		    (state & SX_LOCK_SHARED) == 0 ? 0 : SX_SHARERS(state));
 	if (lda.spin_cnt > sleep_cnt)
 		LOCKSTAT_RECORD4(sx__spin, sx, all_time - sleep_time,
 		    LOCKSTAT_READER, (state & SX_LOCK_SHARED) == 0,
 		    (state & SX_LOCK_SHARED) == 0 ? 0 : SX_SHARERS(state));
 out_lockstat:
 #endif
 	if (error == 0) {
 		LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(sx__acquire, sx,
 		    contested, waittime, file, line, LOCKSTAT_READER);
 	}
 	GIANT_RESTORE();
 	return (error);
 }
 
 int
 _sx_slock_int(struct sx *sx, int opts LOCK_FILE_LINE_ARG_DEF)
 {
 	struct thread *td;
 	uintptr_t x;
 	int error;
 
 	KASSERT(kdb_active != 0 || SCHEDULER_STOPPED() ||
 	    !TD_IS_IDLETHREAD(curthread),
 	    ("sx_slock() by idle thread %p on sx %s @ %s:%d",
 	    curthread, sx->lock_object.lo_name, file, line));
 	KASSERT(sx->sx_lock != SX_LOCK_DESTROYED,
 	    ("sx_slock() of destroyed sx @ %s:%d", file, line));
 	WITNESS_CHECKORDER(&sx->lock_object, LOP_NEWORDER, file, line, NULL);
 
 	error = 0;
 	td = curthread;
 	x = SX_READ_VALUE(sx);
 	if (__predict_false(LOCKSTAT_PROFILE_ENABLED(sx__acquire) ||
 	    !__sx_slock_try(sx, td, &x, true LOCK_FILE_LINE_ARG)))
 		error = _sx_slock_hard(sx, opts, x LOCK_FILE_LINE_ARG);
 	else
 		lock_profile_obtain_lock_success(&sx->lock_object, false, 0, 0,
 		    file, line);
 	if (error == 0) {
 		LOCK_LOG_LOCK("SLOCK", &sx->lock_object, 0, 0, file, line);
 		WITNESS_LOCK(&sx->lock_object, 0, file, line);
 		TD_LOCKS_INC(curthread);
 	}
 	return (error);
 }
 
 int
 _sx_slock(struct sx *sx, int opts, const char *file, int line)
 {
 
 	return (_sx_slock_int(sx, opts LOCK_FILE_LINE_ARG));
 }
 
 static __always_inline bool
 _sx_sunlock_try(struct sx *sx, struct thread *td, uintptr_t *xp)
 {
 
 	for (;;) {
 		if (SX_SHARERS(*xp) > 1 || !(*xp & SX_LOCK_WAITERS)) {
 			if (atomic_fcmpset_rel_ptr(&sx->sx_lock, xp,
 			    *xp - SX_ONE_SHARER)) {
 				if (LOCK_LOG_TEST(&sx->lock_object, 0))
 					CTR4(KTR_LOCK,
 					    "%s: %p succeeded %p -> %p",
 					    __func__, sx, (void *)*xp,
 					    (void *)(*xp - SX_ONE_SHARER));
 				td->td_sx_slocks--;
 				return (true);
 			}
 			continue;
 		}
 		break;
 	}
 	return (false);
 }
 
 static void __noinline
 _sx_sunlock_hard(struct sx *sx, struct thread *td, uintptr_t x
     LOCK_FILE_LINE_ARG_DEF)
 {
-	int wakeup_swapper = 0;
 	uintptr_t setx, queue;
 
 	if (SCHEDULER_STOPPED())
 		return;
 
 	if (_sx_sunlock_try(sx, td, &x))
 		goto out_lockstat;
 
 	sleepq_lock(&sx->lock_object);
 	x = SX_READ_VALUE(sx);
 	for (;;) {
 		if (_sx_sunlock_try(sx, td, &x))
 			break;
 
 		/*
 		 * Wake up semantic here is quite simple:
 		 * Just wake up all the exclusive waiters.
 		 * Note that the state of the lock could have changed,
 		 * so if it fails loop back and retry.
 		 */
 		setx = SX_LOCK_UNLOCKED;
 		queue = SQ_SHARED_QUEUE;
 		if (x & SX_LOCK_EXCLUSIVE_WAITERS) {
 			setx |= (x & SX_LOCK_SHARED_WAITERS);
 			queue = SQ_EXCLUSIVE_QUEUE;
 		}
 		setx |= (x & SX_LOCK_WRITE_SPINNER);
 		if (!atomic_fcmpset_rel_ptr(&sx->sx_lock, &x, setx))
 			continue;
 		if (LOCK_LOG_TEST(&sx->lock_object, 0))
 			CTR2(KTR_LOCK, "%s: %p waking up all thread on"
 			    "exclusive queue", __func__, sx);
-		wakeup_swapper = sleepq_broadcast(&sx->lock_object, SLEEPQ_SX,
-		    0, queue);
+		sleepq_broadcast(&sx->lock_object, SLEEPQ_SX, 0, queue);
 		td->td_sx_slocks--;
 		break;
 	}
 	sleepq_release(&sx->lock_object);
-	if (wakeup_swapper)
-		kick_proc0();
 out_lockstat:
 	LOCKSTAT_PROFILE_RELEASE_RWLOCK(sx__release, sx, LOCKSTAT_READER);
 }
 
 void
 _sx_sunlock_int(struct sx *sx LOCK_FILE_LINE_ARG_DEF)
 {
 	struct thread *td;
 	uintptr_t x;
 
 	KASSERT(sx->sx_lock != SX_LOCK_DESTROYED,
 	    ("sx_sunlock() of destroyed sx @ %s:%d", file, line));
 	_sx_assert(sx, SA_SLOCKED, file, line);
 	WITNESS_UNLOCK(&sx->lock_object, 0, file, line);
 	LOCK_LOG_LOCK("SUNLOCK", &sx->lock_object, 0, 0, file, line);
 
 	td = curthread;
 	x = SX_READ_VALUE(sx);
 	if (__predict_false(LOCKSTAT_PROFILE_ENABLED(sx__release) ||
 	    !_sx_sunlock_try(sx, td, &x)))
 		_sx_sunlock_hard(sx, td, x LOCK_FILE_LINE_ARG);
 	else
 		lock_profile_release_lock(&sx->lock_object, false);
 
 	TD_LOCKS_DEC(curthread);
 }
 
 void
 _sx_sunlock(struct sx *sx, const char *file, int line)
 {
 
 	_sx_sunlock_int(sx LOCK_FILE_LINE_ARG);
 }
 
 #ifdef INVARIANT_SUPPORT
 #ifndef INVARIANTS
 #undef	_sx_assert
 #endif
 
 /*
  * In the non-WITNESS case, sx_assert() can only detect that at least
  * *some* thread owns an slock, but it cannot guarantee that *this*
  * thread owns an slock.
  */
 void
 _sx_assert(const struct sx *sx, int what, const char *file, int line)
 {
 #ifndef WITNESS
 	int slocked = 0;
 #endif
 
 	if (SCHEDULER_STOPPED())
 		return;
 	switch (what) {
 	case SA_SLOCKED:
 	case SA_SLOCKED | SA_NOTRECURSED:
 	case SA_SLOCKED | SA_RECURSED:
 #ifndef WITNESS
 		slocked = 1;
 		/* FALLTHROUGH */
 #endif
 	case SA_LOCKED:
 	case SA_LOCKED | SA_NOTRECURSED:
 	case SA_LOCKED | SA_RECURSED:
 #ifdef WITNESS
 		witness_assert(&sx->lock_object, what, file, line);
 #else
 		/*
 		 * If some other thread has an exclusive lock or we
 		 * have one and are asserting a shared lock, fail.
 		 * Also, if no one has a lock at all, fail.
 		 */
 		if (sx->sx_lock == SX_LOCK_UNLOCKED ||
 		    (!(sx->sx_lock & SX_LOCK_SHARED) && (slocked ||
 		    sx_xholder(sx) != curthread)))
 			panic("Lock %s not %slocked @ %s:%d\n",
 			    sx->lock_object.lo_name, slocked ? "share " : "",
 			    file, line);
 
 		if (!(sx->sx_lock & SX_LOCK_SHARED)) {
 			if (sx_recursed(sx)) {
 				if (what & SA_NOTRECURSED)
 					panic("Lock %s recursed @ %s:%d\n",
 					    sx->lock_object.lo_name, file,
 					    line);
 			} else if (what & SA_RECURSED)
 				panic("Lock %s not recursed @ %s:%d\n",
 				    sx->lock_object.lo_name, file, line);
 		}
 #endif
 		break;
 	case SA_XLOCKED:
 	case SA_XLOCKED | SA_NOTRECURSED:
 	case SA_XLOCKED | SA_RECURSED:
 		if (sx_xholder(sx) != curthread)
 			panic("Lock %s not exclusively locked @ %s:%d\n",
 			    sx->lock_object.lo_name, file, line);
 		if (sx_recursed(sx)) {
 			if (what & SA_NOTRECURSED)
 				panic("Lock %s recursed @ %s:%d\n",
 				    sx->lock_object.lo_name, file, line);
 		} else if (what & SA_RECURSED)
 			panic("Lock %s not recursed @ %s:%d\n",
 			    sx->lock_object.lo_name, file, line);
 		break;
 	case SA_UNLOCKED:
 #ifdef WITNESS
 		witness_assert(&sx->lock_object, what, file, line);
 #else
 		/*
 		 * If we hold an exclusve lock fail.  We can't
 		 * reliably check to see if we hold a shared lock or
 		 * not.
 		 */
 		if (sx_xholder(sx) == curthread)
 			panic("Lock %s exclusively locked @ %s:%d\n",
 			    sx->lock_object.lo_name, file, line);
 #endif
 		break;
 	default:
 		panic("Unknown sx lock assertion: %d @ %s:%d", what, file,
 		    line);
 	}
 }
 #endif	/* INVARIANT_SUPPORT */
 
 #ifdef DDB
 static void
 db_show_sx(const struct lock_object *lock)
 {
 	struct thread *td;
 	const struct sx *sx;
 
 	sx = (const struct sx *)lock;
 
 	db_printf(" state: ");
 	if (sx->sx_lock == SX_LOCK_UNLOCKED)
 		db_printf("UNLOCKED\n");
 	else if (sx->sx_lock == SX_LOCK_DESTROYED) {
 		db_printf("DESTROYED\n");
 		return;
 	} else if (sx->sx_lock & SX_LOCK_SHARED)
 		db_printf("SLOCK: %ju\n", (uintmax_t)SX_SHARERS(sx->sx_lock));
 	else {
 		td = sx_xholder(sx);
 		db_printf("XLOCK: %p (tid %d, pid %d, \"%s\")\n", td,
 		    td->td_tid, td->td_proc->p_pid, td->td_name);
 		if (sx_recursed(sx))
 			db_printf(" recursed: %d\n", sx->sx_recurse);
 	}
 
 	db_printf(" waiters: ");
 	switch(sx->sx_lock &
 	    (SX_LOCK_SHARED_WAITERS | SX_LOCK_EXCLUSIVE_WAITERS)) {
 	case SX_LOCK_SHARED_WAITERS:
 		db_printf("shared\n");
 		break;
 	case SX_LOCK_EXCLUSIVE_WAITERS:
 		db_printf("exclusive\n");
 		break;
 	case SX_LOCK_SHARED_WAITERS | SX_LOCK_EXCLUSIVE_WAITERS:
 		db_printf("exclusive and shared\n");
 		break;
 	default:
 		db_printf("none\n");
 	}
 }
 
 /*
  * Check to see if a thread that is blocked on a sleep queue is actually
  * blocked on an sx lock.  If so, output some details and return true.
  * If the lock has an exclusive owner, return that in *ownerp.
  */
 int
 sx_chain(struct thread *td, struct thread **ownerp)
 {
 	const struct sx *sx;
 
 	/*
 	 * Check to see if this thread is blocked on an sx lock.
 	 * First, we check the lock class.  If that is ok, then we
 	 * compare the lock name against the wait message.
 	 */
 	sx = td->td_wchan;
 	if (LOCK_CLASS(&sx->lock_object) != &lock_class_sx ||
 	    sx->lock_object.lo_name != td->td_wmesg)
 		return (0);
 
 	/* We think we have an sx lock, so output some details. */
 	db_printf("blocked on sx \"%s\" ", td->td_wmesg);
 	*ownerp = sx_xholder(sx);
 	if (sx->sx_lock & SX_LOCK_SHARED)
 		db_printf("SLOCK (count %ju)\n",
 		    (uintmax_t)SX_SHARERS(sx->sx_lock));
 	else
 		db_printf("XLOCK\n");
 	return (1);
 }
 #endif
diff --git a/sys/kern/kern_synch.c b/sys/kern/kern_synch.c
index 9bb6079c0c20..8e956324ee23 100644
--- a/sys/kern/kern_synch.c
+++ b/sys/kern/kern_synch.c
@@ -1,696 +1,680 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1982, 1986, 1990, 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  * (c) UNIX System Laboratories, Inc.
  * All or some portions of this file are derived from material licensed
  * to the University of California by American Telephone and Telegraph
  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  * the permission of UNIX System Laboratories, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 #include "opt_ktrace.h"
 #include "opt_sched.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/blockcount.h>
 #include <sys/condvar.h>
 #include <sys/kdb.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/ktrace.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/resourcevar.h>
 #include <sys/sched.h>
 #include <sys/sdt.h>
 #include <sys/signalvar.h>
 #include <sys/sleepqueue.h>
 #include <sys/smp.h>
 #include <sys/sx.h>
 #include <sys/sysctl.h>
 #include <sys/sysproto.h>
 #include <sys/vmmeter.h>
 #ifdef KTRACE
 #include <sys/uio.h>
 #endif
 #ifdef EPOCH_TRACE
 #include <sys/epoch.h>
 #endif
 
 #include <machine/cpu.h>
 
 static void synch_setup(void *dummy);
 SYSINIT(synch_setup, SI_SUB_KICK_SCHEDULER, SI_ORDER_FIRST, synch_setup,
     NULL);
 
 int	hogticks;
 static const char pause_wchan[MAXCPU];
 
 static struct callout loadav_callout;
 
 struct loadavg averunnable =
 	{ {0, 0, 0}, FSCALE };	/* load average, of runnable procs */
 /*
  * Constants for averages over 1, 5, and 15 minutes
  * when sampling at 5 second intervals.
  */
 static uint64_t cexp[3] = {
 	0.9200444146293232 * FSCALE,	/* exp(-1/12) */
 	0.9834714538216174 * FSCALE,	/* exp(-1/60) */
 	0.9944598480048967 * FSCALE,	/* exp(-1/180) */
 };
 
 /* kernel uses `FSCALE', userland (SHOULD) use kern.fscale */
 SYSCTL_INT(_kern, OID_AUTO, fscale, CTLFLAG_RD, SYSCTL_NULL_INT_PTR, FSCALE,
     "Fixed-point scale factor used for calculating load average values");
 
 static void	loadav(void *arg);
 
 SDT_PROVIDER_DECLARE(sched);
 SDT_PROBE_DEFINE(sched, , , preempt);
 
 static void
 sleepinit(void *unused)
 {
 
 	hogticks = (hz / 10) * 2;	/* Default only. */
 	init_sleepqueues();
 }
 
 /*
  * vmem tries to lock the sleepq mutexes when free'ing kva, so make sure
  * it is available.
  */
 SYSINIT(sleepinit, SI_SUB_KMEM, SI_ORDER_ANY, sleepinit, NULL);
 
 /*
  * General sleep call.  Suspends the current thread until a wakeup is
  * performed on the specified identifier.  The thread will then be made
  * runnable with the specified priority.  Sleeps at most sbt units of time
  * (0 means no timeout).  If pri includes the PCATCH flag, let signals
  * interrupt the sleep, otherwise ignore them while sleeping.  Returns 0 if
  * awakened, EWOULDBLOCK if the timeout expires.  If PCATCH is set and a
  * signal becomes pending, ERESTART is returned if the current system
  * call should be restarted if possible, and EINTR is returned if the system
  * call should be interrupted by the signal (return EINTR).
  *
  * The lock argument is unlocked before the caller is suspended, and
  * re-locked before _sleep() returns.  If priority includes the PDROP
  * flag the lock is not re-locked before returning.
  */
 int
 _sleep(const void *ident, struct lock_object *lock, int priority,
     const char *wmesg, sbintime_t sbt, sbintime_t pr, int flags)
 {
 	struct thread *td __ktrace_used;
 	struct lock_class *class;
 	uintptr_t lock_state;
 	int catch, pri, rval, sleepq_flags;
 	WITNESS_SAVE_DECL(lock_witness);
 
 	TSENTER();
 	td = curthread;
 #ifdef KTRACE
 	if (KTRPOINT(td, KTR_CSW))
 		ktrcsw(1, 0, wmesg);
 #endif
 	WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, lock,
 	    "Sleeping on \"%s\"", wmesg);
 	KASSERT(sbt != 0 || mtx_owned(&Giant) || lock != NULL ||
 	    (priority & PNOLOCK) != 0,
 	    ("sleeping without a lock"));
 	KASSERT(ident != NULL, ("_sleep: NULL ident"));
 	KASSERT(TD_IS_RUNNING(td), ("_sleep: curthread not running"));
 	if (priority & PDROP)
 		KASSERT(lock != NULL && lock != &Giant.lock_object,
 		    ("PDROP requires a non-Giant lock"));
 	if (lock != NULL)
 		class = LOCK_CLASS(lock);
 	else
 		class = NULL;
 
 	if (SCHEDULER_STOPPED()) {
 		if (lock != NULL && priority & PDROP)
 			class->lc_unlock(lock);
 		return (0);
 	}
 	catch = priority & PCATCH;
 	pri = priority & PRIMASK;
 
 	KASSERT(!TD_ON_SLEEPQ(td), ("recursive sleep"));
 
 	if ((uintptr_t)ident >= (uintptr_t)&pause_wchan[0] &&
 	    (uintptr_t)ident <= (uintptr_t)&pause_wchan[MAXCPU - 1])
 		sleepq_flags = SLEEPQ_PAUSE;
 	else
 		sleepq_flags = SLEEPQ_SLEEP;
 	if (catch)
 		sleepq_flags |= SLEEPQ_INTERRUPTIBLE;
 
 	sleepq_lock(ident);
 	CTR5(KTR_PROC, "sleep: thread %ld (pid %ld, %s) on %s (%p)",
 	    td->td_tid, td->td_proc->p_pid, td->td_name, wmesg, ident);
 
 	if (lock == &Giant.lock_object)
 		mtx_assert(&Giant, MA_OWNED);
 	DROP_GIANT();
 	if (lock != NULL && lock != &Giant.lock_object &&
 	    !(class->lc_flags & LC_SLEEPABLE)) {
 		KASSERT(!(class->lc_flags & LC_SPINLOCK),
 		    ("spin locks can only use msleep_spin"));
 		WITNESS_SAVE(lock, lock_witness);
 		lock_state = class->lc_unlock(lock);
 	} else
 		/* GCC needs to follow the Yellow Brick Road */
 		lock_state = -1;
 
 	/*
 	 * We put ourselves on the sleep queue and start our timeout
 	 * before calling thread_suspend_check, as we could stop there,
 	 * and a wakeup or a SIGCONT (or both) could occur while we were
 	 * stopped without resuming us.  Thus, we must be ready for sleep
 	 * when cursig() is called.  If the wakeup happens while we're
 	 * stopped, then td will no longer be on a sleep queue upon
 	 * return from cursig().
 	 */
 	sleepq_add(ident, lock, wmesg, sleepq_flags, 0);
 	if (sbt != 0)
 		sleepq_set_timeout_sbt(ident, sbt, pr, flags);
 	if (lock != NULL && class->lc_flags & LC_SLEEPABLE) {
 		sleepq_release(ident);
 		WITNESS_SAVE(lock, lock_witness);
 		lock_state = class->lc_unlock(lock);
 		sleepq_lock(ident);
 	}
 	if (sbt != 0 && catch)
 		rval = sleepq_timedwait_sig(ident, pri);
 	else if (sbt != 0)
 		rval = sleepq_timedwait(ident, pri);
 	else if (catch)
 		rval = sleepq_wait_sig(ident, pri);
 	else {
 		sleepq_wait(ident, pri);
 		rval = 0;
 	}
 #ifdef KTRACE
 	if (KTRPOINT(td, KTR_CSW))
 		ktrcsw(0, 0, wmesg);
 #endif
 	PICKUP_GIANT();
 	if (lock != NULL && lock != &Giant.lock_object && !(priority & PDROP)) {
 		class->lc_lock(lock, lock_state);
 		WITNESS_RESTORE(lock, lock_witness);
 	}
 	TSEXIT();
 	return (rval);
 }
 
 int
 msleep_spin_sbt(const void *ident, struct mtx *mtx, const char *wmesg,
     sbintime_t sbt, sbintime_t pr, int flags)
 {
 	struct thread *td __ktrace_used;
 	int rval;
 	WITNESS_SAVE_DECL(mtx);
 
 	td = curthread;
 	KASSERT(mtx != NULL, ("sleeping without a mutex"));
 	KASSERT(ident != NULL, ("msleep_spin_sbt: NULL ident"));
 	KASSERT(TD_IS_RUNNING(td), ("msleep_spin_sbt: curthread not running"));
 
 	if (SCHEDULER_STOPPED())
 		return (0);
 
 	sleepq_lock(ident);
 	CTR5(KTR_PROC, "msleep_spin: thread %ld (pid %ld, %s) on %s (%p)",
 	    td->td_tid, td->td_proc->p_pid, td->td_name, wmesg, ident);
 
 	DROP_GIANT();
 	mtx_assert(mtx, MA_OWNED | MA_NOTRECURSED);
 	WITNESS_SAVE(&mtx->lock_object, mtx);
 	mtx_unlock_spin(mtx);
 
 	/*
 	 * We put ourselves on the sleep queue and start our timeout.
 	 */
 	sleepq_add(ident, &mtx->lock_object, wmesg, SLEEPQ_SLEEP, 0);
 	if (sbt != 0)
 		sleepq_set_timeout_sbt(ident, sbt, pr, flags);
 
 	/*
 	 * Can't call ktrace with any spin locks held so it can lock the
 	 * ktrace_mtx lock, and WITNESS_WARN considers it an error to hold
 	 * any spin lock.  Thus, we have to drop the sleepq spin lock while
 	 * we handle those requests.  This is safe since we have placed our
 	 * thread on the sleep queue already.
 	 */
 #ifdef KTRACE
 	if (KTRPOINT(td, KTR_CSW)) {
 		sleepq_release(ident);
 		ktrcsw(1, 0, wmesg);
 		sleepq_lock(ident);
 	}
 #endif
 #ifdef WITNESS
 	sleepq_release(ident);
 	WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, "Sleeping on \"%s\"",
 	    wmesg);
 	sleepq_lock(ident);
 #endif
 	if (sbt != 0)
 		rval = sleepq_timedwait(ident, 0);
 	else {
 		sleepq_wait(ident, 0);
 		rval = 0;
 	}
 #ifdef KTRACE
 	if (KTRPOINT(td, KTR_CSW))
 		ktrcsw(0, 0, wmesg);
 #endif
 	PICKUP_GIANT();
 	mtx_lock_spin(mtx);
 	WITNESS_RESTORE(&mtx->lock_object, mtx);
 	return (rval);
 }
 
 /*
  * pause_sbt() delays the calling thread by the given signed binary
  * time. During cold bootup, pause_sbt() uses the DELAY() function
  * instead of the _sleep() function to do the waiting. The "sbt"
  * argument must be greater than or equal to zero. A "sbt" value of
  * zero is equivalent to a "sbt" value of one tick.
  */
 int
 pause_sbt(const char *wmesg, sbintime_t sbt, sbintime_t pr, int flags)
 {
 	KASSERT(sbt >= 0, ("pause_sbt: timeout must be >= 0"));
 
 	/* silently convert invalid timeouts */
 	if (sbt == 0)
 		sbt = tick_sbt;
 
 	if ((cold && curthread == &thread0) || kdb_active ||
 	    SCHEDULER_STOPPED()) {
 		/*
 		 * We delay one second at a time to avoid overflowing the
 		 * system specific DELAY() function(s):
 		 */
 		while (sbt >= SBT_1S) {
 			DELAY(1000000);
 			sbt -= SBT_1S;
 		}
 		/* Do the delay remainder, if any */
 		sbt = howmany(sbt, SBT_1US);
 		if (sbt > 0)
 			DELAY(sbt);
 		return (EWOULDBLOCK);
 	}
 	return (_sleep(&pause_wchan[curcpu], NULL,
 	    (flags & C_CATCH) ? PCATCH : 0, wmesg, sbt, pr, flags));
 }
 
 /*
  * Make all threads sleeping on the specified identifier runnable.
  */
 void
 wakeup(const void *ident)
 {
-	int wakeup_swapper;
-
 	sleepq_lock(ident);
-	wakeup_swapper = sleepq_broadcast(ident, SLEEPQ_SLEEP, 0, 0);
+	sleepq_broadcast(ident, SLEEPQ_SLEEP, 0, 0);
 	sleepq_release(ident);
-	if (wakeup_swapper) {
-		KASSERT(ident != &proc0,
-		    ("wakeup and wakeup_swapper and proc0"));
-		kick_proc0();
-	}
 }
 
 /*
  * Make a thread sleeping on the specified identifier runnable.
  * May wake more than one thread if a target thread is currently
  * swapped out.
  */
 void
 wakeup_one(const void *ident)
 {
-	int wakeup_swapper;
-
 	sleepq_lock(ident);
-	wakeup_swapper = sleepq_signal(ident, SLEEPQ_SLEEP | SLEEPQ_DROP, 0, 0);
-	if (wakeup_swapper)
-		kick_proc0();
+	sleepq_signal(ident, SLEEPQ_SLEEP | SLEEPQ_DROP, 0, 0);
 }
 
 void
 wakeup_any(const void *ident)
 {
-	int wakeup_swapper;
-
 	sleepq_lock(ident);
-	wakeup_swapper = sleepq_signal(ident, SLEEPQ_SLEEP | SLEEPQ_UNFAIR |
-	    SLEEPQ_DROP, 0, 0);
-	if (wakeup_swapper)
-		kick_proc0();
+	sleepq_signal(ident, SLEEPQ_SLEEP | SLEEPQ_UNFAIR | SLEEPQ_DROP, 0, 0);
 }
 
 /*
  * Signal sleeping waiters after the counter has reached zero.
  */
 void
 _blockcount_wakeup(blockcount_t *bc, u_int old)
 {
 
 	KASSERT(_BLOCKCOUNT_WAITERS(old),
 	    ("%s: no waiters on %p", __func__, bc));
 
 	if (atomic_cmpset_int(&bc->__count, _BLOCKCOUNT_WAITERS_FLAG, 0))
 		wakeup(bc);
 }
 
 /*
  * Wait for a wakeup or a signal.  This does not guarantee that the count is
  * still zero on return.  Callers wanting a precise answer should use
  * blockcount_wait() with an interlock.
  *
  * If there is no work to wait for, return 0.  If the sleep was interrupted by a
  * signal, return EINTR or ERESTART, and return EAGAIN otherwise.
  */
 int
 _blockcount_sleep(blockcount_t *bc, struct lock_object *lock, const char *wmesg,
     int prio)
 {
 	void *wchan;
 	uintptr_t lock_state;
 	u_int old;
 	int ret;
 	bool catch, drop;
 
 	KASSERT(lock != &Giant.lock_object,
 	    ("%s: cannot use Giant as the interlock", __func__));
 
 	catch = (prio & PCATCH) != 0;
 	drop = (prio & PDROP) != 0;
 	prio &= PRIMASK;
 
 	/*
 	 * Synchronize with the fence in blockcount_release().  If we end up
 	 * waiting, the sleepqueue lock acquisition will provide the required
 	 * side effects.
 	 *
 	 * If there is no work to wait for, but waiters are present, try to put
 	 * ourselves to sleep to avoid jumping ahead.
 	 */
 	if (atomic_load_acq_int(&bc->__count) == 0) {
 		if (lock != NULL && drop)
 			LOCK_CLASS(lock)->lc_unlock(lock);
 		return (0);
 	}
 	lock_state = 0;
 	wchan = bc;
 	sleepq_lock(wchan);
 	DROP_GIANT();
 	if (lock != NULL)
 		lock_state = LOCK_CLASS(lock)->lc_unlock(lock);
 	old = blockcount_read(bc);
 	ret = 0;
 	do {
 		if (_BLOCKCOUNT_COUNT(old) == 0) {
 			sleepq_release(wchan);
 			goto out;
 		}
 		if (_BLOCKCOUNT_WAITERS(old))
 			break;
 	} while (!atomic_fcmpset_int(&bc->__count, &old,
 	    old | _BLOCKCOUNT_WAITERS_FLAG));
 	sleepq_add(wchan, NULL, wmesg, catch ? SLEEPQ_INTERRUPTIBLE : 0, 0);
 	if (catch)
 		ret = sleepq_wait_sig(wchan, prio);
 	else
 		sleepq_wait(wchan, prio);
 	if (ret == 0)
 		ret = EAGAIN;
 
 out:
 	PICKUP_GIANT();
 	if (lock != NULL && !drop)
 		LOCK_CLASS(lock)->lc_lock(lock, lock_state);
 
 	return (ret);
 }
 
 static void
 kdb_switch(void)
 {
 	thread_unlock(curthread);
 	kdb_backtrace();
 	kdb_reenter();
 	panic("%s: did not reenter debugger", __func__);
 }
 
 /*
  * mi_switch(9): The machine-independent parts of context switching.
  *
  * The thread lock is required on entry and is no longer held on return.
  */
 void
 mi_switch(int flags)
 {
 	uint64_t runtime, new_switchtime;
 	struct thread *td;
 
 	td = curthread;			/* XXX */
 	THREAD_LOCK_ASSERT(td, MA_OWNED | MA_NOTRECURSED);
 	KASSERT(!TD_ON_RUNQ(td), ("mi_switch: called by old code"));
 #ifdef INVARIANTS
 	if (!TD_ON_LOCK(td) && !TD_IS_RUNNING(td))
 		mtx_assert(&Giant, MA_NOTOWNED);
 #endif
 	/* thread_lock() performs spinlock_enter(). */
 	KASSERT(td->td_critnest == 1 || KERNEL_PANICKED(),
 	    ("mi_switch: switch in a critical section"));
 	KASSERT((flags & (SW_INVOL | SW_VOL)) != 0,
 	    ("mi_switch: switch must be voluntary or involuntary"));
 	KASSERT((flags & SW_TYPE_MASK) != 0,
 	    ("mi_switch: a switch reason (type) must be specified"));
 	KASSERT((flags & SW_TYPE_MASK) < SWT_COUNT,
 	    ("mi_switch: invalid switch reason %d", (flags & SW_TYPE_MASK)));
 
 	/*
 	 * Don't perform context switches from the debugger.
 	 */
 	if (kdb_active)
 		kdb_switch();
 	if (SCHEDULER_STOPPED())
 		return;
 	if (flags & SW_VOL) {
 		td->td_ru.ru_nvcsw++;
 		td->td_swvoltick = ticks;
 	} else {
 		td->td_ru.ru_nivcsw++;
 		td->td_swinvoltick = ticks;
 	}
 #ifdef SCHED_STATS
 	SCHED_STAT_INC(sched_switch_stats[flags & SW_TYPE_MASK]);
 #endif
 	/*
 	 * Compute the amount of time during which the current
 	 * thread was running, and add that to its total so far.
 	 */
 	new_switchtime = cpu_ticks();
 	runtime = new_switchtime - PCPU_GET(switchtime);
 	td->td_runtime += runtime;
 	td->td_incruntime += runtime;
 	PCPU_SET(switchtime, new_switchtime);
 	td->td_generation++;	/* bump preempt-detect counter */
 	VM_CNT_INC(v_swtch);
 	PCPU_SET(switchticks, ticks);
 	CTR4(KTR_PROC, "mi_switch: old thread %ld (td_sched %p, pid %ld, %s)",
 	    td->td_tid, td_get_sched(td), td->td_proc->p_pid, td->td_name);
 #ifdef KDTRACE_HOOKS
 	if (SDT_PROBES_ENABLED() &&
 	    ((flags & SW_PREEMPT) != 0 || ((flags & SW_INVOL) != 0 &&
 	    (flags & SW_TYPE_MASK) == SWT_NEEDRESCHED)))
 		SDT_PROBE0(sched, , , preempt);
 #endif
 	sched_switch(td, flags);
 	CTR4(KTR_PROC, "mi_switch: new thread %ld (td_sched %p, pid %ld, %s)",
 	    td->td_tid, td_get_sched(td), td->td_proc->p_pid, td->td_name);
 
 	/* 
 	 * If the last thread was exiting, finish cleaning it up.
 	 */
 	if ((td = PCPU_GET(deadthread))) {
 		PCPU_SET(deadthread, NULL);
 		thread_stash(td);
 	}
 	spinlock_exit();
 }
 
 /*
  * Change thread state to be runnable, placing it on the run queue.
  *
  * Requires the thread lock on entry, drops on exit.
  */
 void
 setrunnable(struct thread *td, int srqflags)
 {
 	THREAD_LOCK_ASSERT(td, MA_OWNED);
 	KASSERT(td->td_proc->p_state != PRS_ZOMBIE,
 	    ("setrunnable: pid %d is a zombie", td->td_proc->p_pid));
 
 	switch (TD_GET_STATE(td)) {
 	case TDS_RUNNING:
 	case TDS_RUNQ:
 	case TDS_INHIBITED:
 		if ((srqflags & (SRQ_HOLD | SRQ_HOLDTD)) == 0)
 			thread_unlock(td);
 		break;
 	case TDS_CAN_RUN:
 		KASSERT((td->td_flags & TDF_INMEM) != 0,
 		    ("setrunnable: td %p not in mem, flags 0x%X inhibit 0x%X",
 		    td, td->td_flags, td->td_inhibitors));
 		/* unlocks thread lock according to flags */
 		sched_wakeup(td, srqflags);
 		break;
 	default:
 		panic("setrunnable: state 0x%x", TD_GET_STATE(td));
 	}
 }
 
 /*
  * Compute a tenex style load average of a quantity on
  * 1, 5 and 15 minute intervals.
  */
 static void
 loadav(void *arg)
 {
 	int i;
 	uint64_t nrun;
 	struct loadavg *avg;
 
 	nrun = (uint64_t)sched_load();
 	avg = &averunnable;
 
 	for (i = 0; i < 3; i++)
 		avg->ldavg[i] = (cexp[i] * (uint64_t)avg->ldavg[i] +
 		    nrun * FSCALE * (FSCALE - cexp[i])) >> FSHIFT;
 
 	/*
 	 * Schedule the next update to occur after 5 seconds, but add a
 	 * random variation to avoid synchronisation with processes that
 	 * run at regular intervals.
 	 */
 	callout_reset_sbt(&loadav_callout,
 	    SBT_1US * (4000000 + (int)(random() % 2000001)), SBT_1US,
 	    loadav, NULL, C_DIRECT_EXEC | C_PREL(32));
 }
 
 static void
 ast_scheduler(struct thread *td, int tda __unused)
 {
 #ifdef KTRACE
 	if (KTRPOINT(td, KTR_CSW))
 		ktrcsw(1, 1, __func__);
 #endif
 	thread_lock(td);
 	sched_prio(td, td->td_user_pri);
 	mi_switch(SW_INVOL | SWT_NEEDRESCHED);
 #ifdef KTRACE
 	if (KTRPOINT(td, KTR_CSW))
 		ktrcsw(0, 1, __func__);
 #endif
 }
 
 static void
 synch_setup(void *dummy __unused)
 {
 	callout_init(&loadav_callout, 1);
 	ast_register(TDA_SCHED, ASTR_ASTF_REQUIRED, 0, ast_scheduler);
 
 	/* Kick off timeout driven events by calling first time. */
 	loadav(NULL);
 }
 
 bool
 should_yield(void)
 {
 
 	return ((u_int)ticks - (u_int)curthread->td_swvoltick >= hogticks);
 }
 
 void
 maybe_yield(void)
 {
 
 	if (should_yield())
 		kern_yield(PRI_USER);
 }
 
 void
 kern_yield(int prio)
 {
 	struct thread *td;
 
 	td = curthread;
 	DROP_GIANT();
 	thread_lock(td);
 	if (prio == PRI_USER)
 		prio = td->td_user_pri;
 	if (prio >= 0)
 		sched_prio(td, prio);
 	mi_switch(SW_VOL | SWT_RELINQUISH);
 	PICKUP_GIANT();
 }
 
 /*
  * General purpose yield system call.
  */
 int
 sys_yield(struct thread *td, struct yield_args *uap)
 {
 
 	thread_lock(td);
 	if (PRI_BASE(td->td_pri_class) == PRI_TIMESHARE)
 		sched_prio(td, PRI_MAX_TIMESHARE);
 	mi_switch(SW_VOL | SWT_RELINQUISH);
 	td->td_retval[0] = 0;
 	return (0);
 }
 
 int
 sys_sched_getcpu(struct thread *td, struct sched_getcpu_args *uap)
 {
 	td->td_retval[0] = td->td_oncpu;
 	return (0);
 }
diff --git a/sys/kern/subr_sleepqueue.c b/sys/kern/subr_sleepqueue.c
index 0b792d752e94..0af666653352 100644
--- a/sys/kern/subr_sleepqueue.c
+++ b/sys/kern/subr_sleepqueue.c
@@ -1,1511 +1,1507 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause
  *
  * Copyright (c) 2004 John Baldwin <jhb@FreeBSD.org>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 /*
  * Implementation of sleep queues used to hold queue of threads blocked on
  * a wait channel.  Sleep queues are different from turnstiles in that wait
  * channels are not owned by anyone, so there is no priority propagation.
  * Sleep queues can also provide a timeout and can also be interrupted by
  * signals.  That said, there are several similarities between the turnstile
  * and sleep queue implementations.  (Note: turnstiles were implemented
  * first.)  For example, both use a hash table of the same size where each
  * bucket is referred to as a "chain" that contains both a spin lock and
  * a linked list of queues.  An individual queue is located by using a hash
  * to pick a chain, locking the chain, and then walking the chain searching
  * for the queue.  This means that a wait channel object does not need to
  * embed its queue head just as locks do not embed their turnstile queue
  * head.  Threads also carry around a sleep queue that they lend to the
  * wait channel when blocking.  Just as in turnstiles, the queue includes
  * a free list of the sleep queues of other threads blocked on the same
  * wait channel in the case of multiple waiters.
  *
  * Some additional functionality provided by sleep queues include the
  * ability to set a timeout.  The timeout is managed using a per-thread
  * callout that resumes a thread if it is asleep.  A thread may also
  * catch signals while it is asleep (aka an interruptible sleep).  The
  * signal code uses sleepq_abort() to interrupt a sleeping thread.  Finally,
  * sleep queues also provide some extra assertions.  One is not allowed to
  * mix the sleep/wakeup and cv APIs for a given wait channel.  Also, one
  * must consistently use the same lock to synchronize with a wait channel,
  * though this check is currently only a warning for sleep/wakeup due to
  * pre-existing abuse of that API.  The same lock must also be held when
  * awakening threads, though that is currently only enforced for condition
  * variables.
  */
 
 #include <sys/cdefs.h>
 #include "opt_sleepqueue_profiling.h"
 #include "opt_ddb.h"
 #include "opt_sched.h"
 #include "opt_stack.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/lock.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/sbuf.h>
 #include <sys/sched.h>
 #include <sys/sdt.h>
 #include <sys/signalvar.h>
 #include <sys/sleepqueue.h>
 #include <sys/stack.h>
 #include <sys/sysctl.h>
 #include <sys/time.h>
 #ifdef EPOCH_TRACE
 #include <sys/epoch.h>
 #endif
 
 #include <machine/atomic.h>
 
 #include <vm/uma.h>
 
 #ifdef DDB
 #include <ddb/ddb.h>
 #endif
 
 /*
  * Constants for the hash table of sleep queue chains.
  * SC_TABLESIZE must be a power of two for SC_MASK to work properly.
  */
 #ifndef SC_TABLESIZE
 #define	SC_TABLESIZE	256
 #endif
 CTASSERT(powerof2(SC_TABLESIZE));
 #define	SC_MASK		(SC_TABLESIZE - 1)
 #define	SC_SHIFT	8
 #define	SC_HASH(wc)	((((uintptr_t)(wc) >> SC_SHIFT) ^ (uintptr_t)(wc)) & \
 			    SC_MASK)
 #define	SC_LOOKUP(wc)	&sleepq_chains[SC_HASH(wc)]
 #define NR_SLEEPQS      2
 /*
  * There are two different lists of sleep queues.  Both lists are connected
  * via the sq_hash entries.  The first list is the sleep queue chain list
  * that a sleep queue is on when it is attached to a wait channel.  The
  * second list is the free list hung off of a sleep queue that is attached
  * to a wait channel.
  *
  * Each sleep queue also contains the wait channel it is attached to, the
  * list of threads blocked on that wait channel, flags specific to the
  * wait channel, and the lock used to synchronize with a wait channel.
  * The flags are used to catch mismatches between the various consumers
  * of the sleep queue API (e.g. sleep/wakeup and condition variables).
  * The lock pointer is only used when invariants are enabled for various
  * debugging checks.
  *
  * Locking key:
  *  c - sleep queue chain lock
  */
 struct sleepqueue {
 	struct threadqueue sq_blocked[NR_SLEEPQS]; /* (c) Blocked threads. */
 	u_int sq_blockedcnt[NR_SLEEPQS];	/* (c) N. of blocked threads. */
 	LIST_ENTRY(sleepqueue) sq_hash;		/* (c) Chain and free list. */
 	LIST_HEAD(, sleepqueue) sq_free;	/* (c) Free queues. */
 	const void	*sq_wchan;		/* (c) Wait channel. */
 	int	sq_type;			/* (c) Queue type. */
 #ifdef INVARIANTS
 	struct lock_object *sq_lock;		/* (c) Associated lock. */
 #endif
 };
 
 struct sleepqueue_chain {
 	LIST_HEAD(, sleepqueue) sc_queues;	/* List of sleep queues. */
 	struct mtx sc_lock;			/* Spin lock for this chain. */
 #ifdef SLEEPQUEUE_PROFILING
 	u_int	sc_depth;			/* Length of sc_queues. */
 	u_int	sc_max_depth;			/* Max length of sc_queues. */
 #endif
 } __aligned(CACHE_LINE_SIZE);
 
 #ifdef SLEEPQUEUE_PROFILING
 static SYSCTL_NODE(_debug, OID_AUTO, sleepq, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
     "sleepq profiling");
 static SYSCTL_NODE(_debug_sleepq, OID_AUTO, chains,
     CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
     "sleepq chain stats");
 static u_int sleepq_max_depth;
 SYSCTL_UINT(_debug_sleepq, OID_AUTO, max_depth, CTLFLAG_RD, &sleepq_max_depth,
     0, "maxmimum depth achieved of a single chain");
 
 static void	sleepq_profile(const char *wmesg);
 static int	prof_enabled;
 #endif
 static struct sleepqueue_chain sleepq_chains[SC_TABLESIZE];
 static uma_zone_t sleepq_zone;
 
 /*
  * Prototypes for non-exported routines.
  */
 static int	sleepq_catch_signals(const void *wchan, int pri);
 static inline int sleepq_check_signals(void);
 static inline int sleepq_check_timeout(void);
 #ifdef INVARIANTS
 static void	sleepq_dtor(void *mem, int size, void *arg);
 #endif
 static int	sleepq_init(void *mem, int size, int flags);
 static void	sleepq_resume_thread(struct sleepqueue *sq, struct thread *td,
 		    int pri, int srqflags);
 static void	sleepq_remove_thread(struct sleepqueue *sq, struct thread *td);
 static void	sleepq_switch(const void *wchan, int pri);
 static void	sleepq_timeout(void *arg);
 
 SDT_PROBE_DECLARE(sched, , , sleep);
 SDT_PROBE_DECLARE(sched, , , wakeup);
 
 /*
  * Initialize SLEEPQUEUE_PROFILING specific sysctl nodes.
  * Note that it must happen after sleepinit() has been fully executed, so
  * it must happen after SI_SUB_KMEM SYSINIT() subsystem setup.
  */
 #ifdef SLEEPQUEUE_PROFILING
 static void
 init_sleepqueue_profiling(void)
 {
 	char chain_name[10];
 	struct sysctl_oid *chain_oid;
 	u_int i;
 
 	for (i = 0; i < SC_TABLESIZE; i++) {
 		snprintf(chain_name, sizeof(chain_name), "%u", i);
 		chain_oid = SYSCTL_ADD_NODE(NULL,
 		    SYSCTL_STATIC_CHILDREN(_debug_sleepq_chains), OID_AUTO,
 		    chain_name, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
 		    "sleepq chain stats");
 		SYSCTL_ADD_UINT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO,
 		    "depth", CTLFLAG_RD, &sleepq_chains[i].sc_depth, 0, NULL);
 		SYSCTL_ADD_UINT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO,
 		    "max_depth", CTLFLAG_RD, &sleepq_chains[i].sc_max_depth, 0,
 		    NULL);
 	}
 }
 
 SYSINIT(sleepqueue_profiling, SI_SUB_LOCK, SI_ORDER_ANY,
     init_sleepqueue_profiling, NULL);
 #endif
 
 /*
  * Early initialization of sleep queues that is called from the sleepinit()
  * SYSINIT.
  */
 void
 init_sleepqueues(void)
 {
 	int i;
 
 	for (i = 0; i < SC_TABLESIZE; i++) {
 		LIST_INIT(&sleepq_chains[i].sc_queues);
 		mtx_init(&sleepq_chains[i].sc_lock, "sleepq chain", NULL,
 		    MTX_SPIN);
 	}
 	sleepq_zone = uma_zcreate("SLEEPQUEUE", sizeof(struct sleepqueue),
 #ifdef INVARIANTS
 	    NULL, sleepq_dtor, sleepq_init, NULL, UMA_ALIGN_CACHE, 0);
 #else
 	    NULL, NULL, sleepq_init, NULL, UMA_ALIGN_CACHE, 0);
 #endif
 
 	thread0.td_sleepqueue = sleepq_alloc();
 }
 
 /*
  * Get a sleep queue for a new thread.
  */
 struct sleepqueue *
 sleepq_alloc(void)
 {
 
 	return (uma_zalloc(sleepq_zone, M_WAITOK));
 }
 
 /*
  * Free a sleep queue when a thread is destroyed.
  */
 void
 sleepq_free(struct sleepqueue *sq)
 {
 
 	uma_zfree(sleepq_zone, sq);
 }
 
 /*
  * Lock the sleep queue chain associated with the specified wait channel.
  */
 void
 sleepq_lock(const void *wchan)
 {
 	struct sleepqueue_chain *sc;
 
 	sc = SC_LOOKUP(wchan);
 	mtx_lock_spin(&sc->sc_lock);
 }
 
 /*
  * Look up the sleep queue associated with a given wait channel in the hash
  * table locking the associated sleep queue chain.  If no queue is found in
  * the table, NULL is returned.
  */
 struct sleepqueue *
 sleepq_lookup(const void *wchan)
 {
 	struct sleepqueue_chain *sc;
 	struct sleepqueue *sq;
 
 	KASSERT(wchan != NULL, ("%s: invalid NULL wait channel", __func__));
 	sc = SC_LOOKUP(wchan);
 	mtx_assert(&sc->sc_lock, MA_OWNED);
 	LIST_FOREACH(sq, &sc->sc_queues, sq_hash)
 		if (sq->sq_wchan == wchan)
 			return (sq);
 	return (NULL);
 }
 
 /*
  * Unlock the sleep queue chain associated with a given wait channel.
  */
 void
 sleepq_release(const void *wchan)
 {
 	struct sleepqueue_chain *sc;
 
 	sc = SC_LOOKUP(wchan);
 	mtx_unlock_spin(&sc->sc_lock);
 }
 
 /*
  * Places the current thread on the sleep queue for the specified wait
  * channel.  If INVARIANTS is enabled, then it associates the passed in
  * lock with the sleepq to make sure it is held when that sleep queue is
  * woken up.
  */
 void
 sleepq_add(const void *wchan, struct lock_object *lock, const char *wmesg,
     int flags, int queue)
 {
 	struct sleepqueue_chain *sc;
 	struct sleepqueue *sq;
 	struct thread *td;
 
 	td = curthread;
 	sc = SC_LOOKUP(wchan);
 	mtx_assert(&sc->sc_lock, MA_OWNED);
 	MPASS(td->td_sleepqueue != NULL);
 	MPASS(wchan != NULL);
 	MPASS((queue >= 0) && (queue < NR_SLEEPQS));
 
 	/* If this thread is not allowed to sleep, die a horrible death. */
 	if (__predict_false(!THREAD_CAN_SLEEP())) {
 #ifdef EPOCH_TRACE
 		epoch_trace_list(curthread);
 #endif
 		KASSERT(0,
 		    ("%s: td %p to sleep on wchan %p with sleeping prohibited",
 		    __func__, td, wchan));
 	}
 
 	/* Look up the sleep queue associated with the wait channel 'wchan'. */
 	sq = sleepq_lookup(wchan);
 
 	/*
 	 * If the wait channel does not already have a sleep queue, use
 	 * this thread's sleep queue.  Otherwise, insert the current thread
 	 * into the sleep queue already in use by this wait channel.
 	 */
 	if (sq == NULL) {
 #ifdef INVARIANTS
 		int i;
 
 		sq = td->td_sleepqueue;
 		for (i = 0; i < NR_SLEEPQS; i++) {
 			KASSERT(TAILQ_EMPTY(&sq->sq_blocked[i]),
 			    ("thread's sleep queue %d is not empty", i));
 			KASSERT(sq->sq_blockedcnt[i] == 0,
 			    ("thread's sleep queue %d count mismatches", i));
 		}
 		KASSERT(LIST_EMPTY(&sq->sq_free),
 		    ("thread's sleep queue has a non-empty free list"));
 		KASSERT(sq->sq_wchan == NULL, ("stale sq_wchan pointer"));
 		sq->sq_lock = lock;
 #endif
 #ifdef SLEEPQUEUE_PROFILING
 		sc->sc_depth++;
 		if (sc->sc_depth > sc->sc_max_depth) {
 			sc->sc_max_depth = sc->sc_depth;
 			if (sc->sc_max_depth > sleepq_max_depth)
 				sleepq_max_depth = sc->sc_max_depth;
 		}
 #endif
 		sq = td->td_sleepqueue;
 		LIST_INSERT_HEAD(&sc->sc_queues, sq, sq_hash);
 		sq->sq_wchan = wchan;
 		sq->sq_type = flags & SLEEPQ_TYPE;
 	} else {
 		MPASS(wchan == sq->sq_wchan);
 		MPASS(lock == sq->sq_lock);
 		MPASS((flags & SLEEPQ_TYPE) == sq->sq_type);
 		LIST_INSERT_HEAD(&sq->sq_free, td->td_sleepqueue, sq_hash);
 	}
 	thread_lock(td);
 	TAILQ_INSERT_TAIL(&sq->sq_blocked[queue], td, td_slpq);
 	sq->sq_blockedcnt[queue]++;
 	td->td_sleepqueue = NULL;
 	td->td_sqqueue = queue;
 	td->td_wchan = wchan;
 	td->td_wmesg = wmesg;
 	if (flags & SLEEPQ_INTERRUPTIBLE) {
 		td->td_intrval = 0;
 		td->td_flags |= TDF_SINTR;
 	}
 	td->td_flags &= ~TDF_TIMEOUT;
 	thread_unlock(td);
 }
 
 /*
  * Sets a timeout that will remove the current thread from the
  * specified sleep queue at the specified time if the thread has not
  * already been awakened.  Flags are from C_* (callout) namespace.
  */
 void
 sleepq_set_timeout_sbt(const void *wchan, sbintime_t sbt, sbintime_t pr,
     int flags)
 {
 	struct sleepqueue_chain *sc __unused;
 	struct thread *td;
 	sbintime_t pr1;
 
 	td = curthread;
 	sc = SC_LOOKUP(wchan);
 	mtx_assert(&sc->sc_lock, MA_OWNED);
 	MPASS(TD_ON_SLEEPQ(td));
 	MPASS(td->td_sleepqueue == NULL);
 	MPASS(wchan != NULL);
 	if (cold && td == &thread0)
 		panic("timed sleep before timers are working");
 	KASSERT(td->td_sleeptimo == 0, ("td %d %p td_sleeptimo %jx",
 	    td->td_tid, td, (uintmax_t)td->td_sleeptimo));
 	thread_lock(td);
 	callout_when(sbt, pr, flags, &td->td_sleeptimo, &pr1);
 	thread_unlock(td);
 	callout_reset_sbt_on(&td->td_slpcallout, td->td_sleeptimo, pr1,
 	    sleepq_timeout, td, PCPU_GET(cpuid), flags | C_PRECALC |
 	    C_DIRECT_EXEC);
 }
 
 /*
  * Return the number of actual sleepers for the specified queue.
  */
 u_int
 sleepq_sleepcnt(const void *wchan, int queue)
 {
 	struct sleepqueue *sq;
 
 	KASSERT(wchan != NULL, ("%s: invalid NULL wait channel", __func__));
 	MPASS((queue >= 0) && (queue < NR_SLEEPQS));
 	sq = sleepq_lookup(wchan);
 	if (sq == NULL)
 		return (0);
 	return (sq->sq_blockedcnt[queue]);
 }
 
 static int
 sleepq_check_ast_sc_locked(struct thread *td, struct sleepqueue_chain *sc)
 {
 	struct proc *p;
 	int ret;
 
 	mtx_assert(&sc->sc_lock, MA_OWNED);
 
 	if ((td->td_pflags & TDP_WAKEUP) != 0) {
 		td->td_pflags &= ~TDP_WAKEUP;
 		thread_lock(td);
 		return (EINTR);
 	}
 
 	/*
 	 * See if there are any pending signals or suspension requests for this
 	 * thread.  If not, we can switch immediately.
 	 */
 	thread_lock(td);
 	if (!td_ast_pending(td, TDA_SIG) && !td_ast_pending(td, TDA_SUSPEND))
 		return (0);
 
 	thread_unlock(td);
 	mtx_unlock_spin(&sc->sc_lock);
 
 	p = td->td_proc;
 	CTR3(KTR_PROC, "sleepq catching signals: thread %p (pid %ld, %s)",
 	    (void *)td, (long)p->p_pid, td->td_name);
 	PROC_LOCK(p);
 
 	/*
 	 * Check for suspension first. Checking for signals and then
 	 * suspending could result in a missed signal, since a signal
 	 * can be delivered while this thread is suspended.
 	 */
 	ret = sig_ast_checksusp(td);
 	if (ret != 0) {
 		PROC_UNLOCK(p);
 		mtx_lock_spin(&sc->sc_lock);
 		thread_lock(td);
 		return (ret);
 	}
 
 	ret = sig_ast_needsigchk(td);
 
 	/*
 	 * Lock the per-process spinlock prior to dropping the
 	 * PROC_LOCK to avoid a signal delivery race.
 	 * PROC_LOCK, PROC_SLOCK, and thread_lock() are
 	 * currently held in tdsendsignal() and thread_single().
 	 */
 	PROC_SLOCK(p);
 	mtx_lock_spin(&sc->sc_lock);
 	PROC_UNLOCK(p);
 	thread_lock(td);
 	PROC_SUNLOCK(p);
 
 	return (ret);
 }
 
 /*
  * Marks the pending sleep of the current thread as interruptible and
  * makes an initial check for pending signals before putting a thread
  * to sleep. Enters and exits with the thread lock held.  Thread lock
  * may have transitioned from the sleepq lock to a run lock.
  */
 static int
 sleepq_catch_signals(const void *wchan, int pri)
 {
 	struct thread *td;
 	struct sleepqueue_chain *sc;
 	struct sleepqueue *sq;
 	int ret;
 
 	sc = SC_LOOKUP(wchan);
 	mtx_assert(&sc->sc_lock, MA_OWNED);
 	MPASS(wchan != NULL);
 	td = curthread;
 
 	ret = sleepq_check_ast_sc_locked(td, sc);
 	THREAD_LOCK_ASSERT(td, MA_OWNED);
 	mtx_assert(&sc->sc_lock, MA_OWNED);
 
 	if (ret == 0) {
 		/*
 		 * No pending signals and no suspension requests found.
 		 * Switch the thread off the cpu.
 		 */
 		sleepq_switch(wchan, pri);
 	} else {
 		/*
 		 * There were pending signals and this thread is still
 		 * on the sleep queue, remove it from the sleep queue.
 		 */
 		if (TD_ON_SLEEPQ(td)) {
 			sq = sleepq_lookup(wchan);
 			sleepq_remove_thread(sq, td);
 		}
 		MPASS(td->td_lock != &sc->sc_lock);
 		mtx_unlock_spin(&sc->sc_lock);
 		thread_unlock(td);
 	}
 	return (ret);
 }
 
 /*
  * Switches to another thread if we are still asleep on a sleep queue.
  * Returns with thread lock.
  */
 static void
 sleepq_switch(const void *wchan, int pri)
 {
 	struct sleepqueue_chain *sc;
 	struct sleepqueue *sq;
 	struct thread *td;
 	bool rtc_changed;
 
 	td = curthread;
 	sc = SC_LOOKUP(wchan);
 	mtx_assert(&sc->sc_lock, MA_OWNED);
 	THREAD_LOCK_ASSERT(td, MA_OWNED);
 
 	/*
 	 * If we have a sleep queue, then we've already been woken up, so
 	 * just return.
 	 */
 	if (td->td_sleepqueue != NULL) {
 		mtx_unlock_spin(&sc->sc_lock);
 		thread_unlock(td);
 		return;
 	}
 
 	/*
 	 * If TDF_TIMEOUT is set, then our sleep has been timed out
 	 * already but we are still on the sleep queue, so dequeue the
 	 * thread and return.
 	 *
 	 * Do the same if the real-time clock has been adjusted since this
 	 * thread calculated its timeout based on that clock.  This handles
 	 * the following race:
 	 * - The Ts thread needs to sleep until an absolute real-clock time.
 	 *   It copies the global rtc_generation into curthread->td_rtcgen,
 	 *   reads the RTC, and calculates a sleep duration based on that time.
 	 *   See umtxq_sleep() for an example.
 	 * - The Tc thread adjusts the RTC, bumps rtc_generation, and wakes
 	 *   threads that are sleeping until an absolute real-clock time.
 	 *   See tc_setclock() and the POSIX specification of clock_settime().
 	 * - Ts reaches the code below.  It holds the sleepqueue chain lock,
 	 *   so Tc has finished waking, so this thread must test td_rtcgen.
 	 * (The declaration of td_rtcgen refers to this comment.)
 	 */
 	rtc_changed = td->td_rtcgen != 0 && td->td_rtcgen != rtc_generation;
 	if ((td->td_flags & TDF_TIMEOUT) || rtc_changed) {
 		if (rtc_changed) {
 			td->td_rtcgen = 0;
 		}
 		MPASS(TD_ON_SLEEPQ(td));
 		sq = sleepq_lookup(wchan);
 		sleepq_remove_thread(sq, td);
 		mtx_unlock_spin(&sc->sc_lock);
 		thread_unlock(td);
 		return;
 	}
 #ifdef SLEEPQUEUE_PROFILING
 	if (prof_enabled)
 		sleepq_profile(td->td_wmesg);
 #endif
 	MPASS(td->td_sleepqueue == NULL);
 	sched_sleep(td, pri);
 	thread_lock_set(td, &sc->sc_lock);
 	SDT_PROBE0(sched, , , sleep);
 	TD_SET_SLEEPING(td);
 	mi_switch(SW_VOL | SWT_SLEEPQ);
 	KASSERT(TD_IS_RUNNING(td), ("running but not TDS_RUNNING"));
 	CTR3(KTR_PROC, "sleepq resume: thread %p (pid %ld, %s)",
 	    (void *)td, (long)td->td_proc->p_pid, (void *)td->td_name);
 }
 
 /*
  * Check to see if we timed out.
  */
 static inline int
 sleepq_check_timeout(void)
 {
 	struct thread *td;
 	int res;
 
 	res = 0;
 	td = curthread;
 	if (td->td_sleeptimo != 0) {
 		if (td->td_sleeptimo <= sbinuptime())
 			res = EWOULDBLOCK;
 		td->td_sleeptimo = 0;
 	}
 	return (res);
 }
 
 /*
  * Check to see if we were awoken by a signal.
  */
 static inline int
 sleepq_check_signals(void)
 {
 	struct thread *td;
 
 	td = curthread;
 	KASSERT((td->td_flags & TDF_SINTR) == 0,
 	    ("thread %p still in interruptible sleep?", td));
 
 	return (td->td_intrval);
 }
 
 /*
  * Block the current thread until it is awakened from its sleep queue.
  */
 void
 sleepq_wait(const void *wchan, int pri)
 {
 	struct thread *td;
 
 	td = curthread;
 	MPASS(!(td->td_flags & TDF_SINTR));
 	thread_lock(td);
 	sleepq_switch(wchan, pri);
 }
 
 /*
  * Block the current thread until it is awakened from its sleep queue
  * or it is interrupted by a signal.
  */
 int
 sleepq_wait_sig(const void *wchan, int pri)
 {
 	int rcatch;
 
 	rcatch = sleepq_catch_signals(wchan, pri);
 	if (rcatch)
 		return (rcatch);
 	return (sleepq_check_signals());
 }
 
 /*
  * Block the current thread until it is awakened from its sleep queue
  * or it times out while waiting.
  */
 int
 sleepq_timedwait(const void *wchan, int pri)
 {
 	struct thread *td;
 
 	td = curthread;
 	MPASS(!(td->td_flags & TDF_SINTR));
 
 	thread_lock(td);
 	sleepq_switch(wchan, pri);
 
 	return (sleepq_check_timeout());
 }
 
 /*
  * Block the current thread until it is awakened from its sleep queue,
  * it is interrupted by a signal, or it times out waiting to be awakened.
  */
 int
 sleepq_timedwait_sig(const void *wchan, int pri)
 {
 	int rcatch, rvalt, rvals;
 
 	rcatch = sleepq_catch_signals(wchan, pri);
 	/* We must always call check_timeout() to clear sleeptimo. */
 	rvalt = sleepq_check_timeout();
 	rvals = sleepq_check_signals();
 	if (rcatch)
 		return (rcatch);
 	if (rvals)
 		return (rvals);
 	return (rvalt);
 }
 
 /*
  * Returns the type of sleepqueue given a waitchannel.
  */
 int
 sleepq_type(const void *wchan)
 {
 	struct sleepqueue *sq;
 	int type;
 
 	MPASS(wchan != NULL);
 
 	sq = sleepq_lookup(wchan);
 	if (sq == NULL)
 		return (-1);
 	type = sq->sq_type;
 
 	return (type);
 }
 
 /*
  * Removes a thread from a sleep queue and makes it runnable.
  *
  * Requires the sc chain locked on entry.  If SRQ_HOLD is specified it will
  * be locked on return.  Returns without the thread lock held.
  */
 static void
 sleepq_resume_thread(struct sleepqueue *sq, struct thread *td, int pri,
     int srqflags)
 {
 	struct sleepqueue_chain *sc;
 	bool drop;
 
 	MPASS(td != NULL);
 	MPASS(sq->sq_wchan != NULL);
 	MPASS(td->td_wchan == sq->sq_wchan);
 
 	sc = SC_LOOKUP(sq->sq_wchan);
 	mtx_assert(&sc->sc_lock, MA_OWNED);
 
 	/*
 	 * Avoid recursing on the chain lock.  If the locks don't match we
 	 * need to acquire the thread lock which setrunnable will drop for
 	 * us.  In this case we need to drop the chain lock afterwards.
 	 *
 	 * There is no race that will make td_lock equal to sc_lock because
 	 * we hold sc_lock.
 	 */
 	drop = false;
 	if (!TD_IS_SLEEPING(td)) {
 		thread_lock(td);
 		drop = true;
 	} else
 		thread_lock_block_wait(td);
 
 	/* Remove thread from the sleepq. */
 	sleepq_remove_thread(sq, td);
 
 	/* If we're done with the sleepqueue release it. */
 	if ((srqflags & SRQ_HOLD) == 0 && drop)
 		mtx_unlock_spin(&sc->sc_lock);
 
 	/* Adjust priority if requested. */
 	MPASS(pri == 0 || (pri >= PRI_MIN && pri <= PRI_MAX));
 	if (pri != 0 && td->td_priority > pri &&
 	    PRI_BASE(td->td_pri_class) == PRI_TIMESHARE)
 		sched_prio(td, pri);
 
 	/*
 	 * Note that thread td might not be sleeping if it is running
 	 * sleepq_catch_signals() on another CPU or is blocked on its
 	 * proc lock to check signals.  There's no need to mark the
 	 * thread runnable in that case.
 	 */
 	if (TD_IS_SLEEPING(td)) {
 		MPASS(!drop);
 		TD_CLR_SLEEPING(td);
 		setrunnable(td, srqflags);
 	} else {
 		MPASS(drop);
 		thread_unlock(td);
 	}
 }
 
 static void
 sleepq_remove_thread(struct sleepqueue *sq, struct thread *td)
 {
 	struct sleepqueue_chain *sc __unused;
 
 	MPASS(td != NULL);
 	MPASS(sq->sq_wchan != NULL);
 	MPASS(td->td_wchan == sq->sq_wchan);
 	MPASS(td->td_sqqueue < NR_SLEEPQS && td->td_sqqueue >= 0);
 	THREAD_LOCK_ASSERT(td, MA_OWNED);
 	sc = SC_LOOKUP(sq->sq_wchan);
 	mtx_assert(&sc->sc_lock, MA_OWNED);
 
 	SDT_PROBE2(sched, , , wakeup, td, td->td_proc);
 
 	/* Remove the thread from the queue. */
 	sq->sq_blockedcnt[td->td_sqqueue]--;
 	TAILQ_REMOVE(&sq->sq_blocked[td->td_sqqueue], td, td_slpq);
 
 	/*
 	 * Get a sleep queue for this thread.  If this is the last waiter,
 	 * use the queue itself and take it out of the chain, otherwise,
 	 * remove a queue from the free list.
 	 */
 	if (LIST_EMPTY(&sq->sq_free)) {
 		td->td_sleepqueue = sq;
 #ifdef INVARIANTS
 		sq->sq_wchan = NULL;
 #endif
 #ifdef SLEEPQUEUE_PROFILING
 		sc->sc_depth--;
 #endif
 	} else
 		td->td_sleepqueue = LIST_FIRST(&sq->sq_free);
 	LIST_REMOVE(td->td_sleepqueue, sq_hash);
 
 	if ((td->td_flags & TDF_TIMEOUT) == 0 && td->td_sleeptimo != 0 &&
 	    td->td_lock == &sc->sc_lock) {
 		/*
 		 * We ignore the situation where timeout subsystem was
 		 * unable to stop our callout.  The struct thread is
 		 * type-stable, the callout will use the correct
 		 * memory when running.  The checks of the
 		 * td_sleeptimo value in this function and in
 		 * sleepq_timeout() ensure that the thread does not
 		 * get spurious wakeups, even if the callout was reset
 		 * or thread reused.
 		 *
 		 * We also cannot safely stop the callout if a scheduler
 		 * lock is held since softclock_thread() forces a lock
 		 * order of callout lock -> scheduler lock.  The thread
 		 * lock will be a scheduler lock only if the thread is
 		 * preparing to go to sleep, so this is hopefully a rare
 		 * scenario.
 		 */
 		callout_stop(&td->td_slpcallout);
 	}
 
 	td->td_wmesg = NULL;
 	td->td_wchan = NULL;
 	td->td_flags &= ~(TDF_SINTR | TDF_TIMEOUT);
 
 	CTR3(KTR_PROC, "sleepq_wakeup: thread %p (pid %ld, %s)",
 	    (void *)td, (long)td->td_proc->p_pid, td->td_name);
 }
 
 void
 sleepq_remove_nested(struct thread *td)
 {
 	struct sleepqueue_chain *sc;
 	struct sleepqueue *sq;
 	const void *wchan;
 
 	MPASS(TD_ON_SLEEPQ(td));
 
 	wchan = td->td_wchan;
 	sc = SC_LOOKUP(wchan);
 	mtx_lock_spin(&sc->sc_lock);
 	sq = sleepq_lookup(wchan);
 	MPASS(sq != NULL);
 	thread_lock(td);
 	sleepq_remove_thread(sq, td);
 	mtx_unlock_spin(&sc->sc_lock);
 	/* Returns with the thread lock owned. */
 }
 
 #ifdef INVARIANTS
 /*
  * UMA zone item deallocator.
  */
 static void
 sleepq_dtor(void *mem, int size, void *arg)
 {
 	struct sleepqueue *sq;
 	int i;
 
 	sq = mem;
 	for (i = 0; i < NR_SLEEPQS; i++) {
 		MPASS(TAILQ_EMPTY(&sq->sq_blocked[i]));
 		MPASS(sq->sq_blockedcnt[i] == 0);
 	}
 }
 #endif
 
 /*
  * UMA zone item initializer.
  */
 static int
 sleepq_init(void *mem, int size, int flags)
 {
 	struct sleepqueue *sq;
 	int i;
 
 	bzero(mem, size);
 	sq = mem;
 	for (i = 0; i < NR_SLEEPQS; i++) {
 		TAILQ_INIT(&sq->sq_blocked[i]);
 		sq->sq_blockedcnt[i] = 0;
 	}
 	LIST_INIT(&sq->sq_free);
 	return (0);
 }
 
 /*
  * Find thread sleeping on a wait channel and resume it.
  */
-int
+void
 sleepq_signal(const void *wchan, int flags, int pri, int queue)
 {
 	struct sleepqueue_chain *sc;
 	struct sleepqueue *sq;
 	struct threadqueue *head;
 	struct thread *td, *besttd;
 
 	CTR2(KTR_PROC, "sleepq_signal(%p, %d)", wchan, flags);
 	KASSERT(wchan != NULL, ("%s: invalid NULL wait channel", __func__));
 	MPASS((queue >= 0) && (queue < NR_SLEEPQS));
 	sq = sleepq_lookup(wchan);
 	if (sq == NULL) {
 		if (flags & SLEEPQ_DROP)
 			sleepq_release(wchan);
-		return (0);
+		return;
 	}
 	KASSERT(sq->sq_type == (flags & SLEEPQ_TYPE),
 	    ("%s: mismatch between sleep/wakeup and cv_*", __func__));
 
 	head = &sq->sq_blocked[queue];
 	if (flags & SLEEPQ_UNFAIR) {
 		/*
 		 * Find the most recently sleeping thread, but try to
 		 * skip threads still in process of context switch to
 		 * avoid spinning on the thread lock.
 		 */
 		sc = SC_LOOKUP(wchan);
 		besttd = TAILQ_LAST_FAST(head, thread, td_slpq);
 		while (besttd->td_lock != &sc->sc_lock) {
 			td = TAILQ_PREV_FAST(besttd, head, thread, td_slpq);
 			if (td == NULL)
 				break;
 			besttd = td;
 		}
 	} else {
 		/*
 		 * Find the highest priority thread on the queue.  If there
 		 * is a tie, use the thread that first appears in the queue
 		 * as it has been sleeping the longest since threads are
 		 * always added to the tail of sleep queues.
 		 */
 		besttd = td = TAILQ_FIRST(head);
 		while ((td = TAILQ_NEXT(td, td_slpq)) != NULL) {
 			if (td->td_priority < besttd->td_priority)
 				besttd = td;
 		}
 	}
 	MPASS(besttd != NULL);
 	sleepq_resume_thread(sq, besttd, pri,
 	    (flags & SLEEPQ_DROP) ? 0 : SRQ_HOLD);
-	return (0);
 }
 
 static bool
 match_any(struct thread *td __unused)
 {
 
 	return (true);
 }
 
 /*
  * Resume all threads sleeping on a specified wait channel.
  */
-int
+void
 sleepq_broadcast(const void *wchan, int flags, int pri, int queue)
 {
 	struct sleepqueue *sq;
 
 	CTR2(KTR_PROC, "sleepq_broadcast(%p, %d)", wchan, flags);
 	KASSERT(wchan != NULL, ("%s: invalid NULL wait channel", __func__));
 	MPASS((queue >= 0) && (queue < NR_SLEEPQS));
 	sq = sleepq_lookup(wchan);
-	if (sq == NULL)
-		return (0);
-	KASSERT(sq->sq_type == (flags & SLEEPQ_TYPE),
-	    ("%s: mismatch between sleep/wakeup and cv_*", __func__));
+	if (sq != NULL) {
+		KASSERT(sq->sq_type == (flags & SLEEPQ_TYPE),
+		    ("%s: mismatch between sleep/wakeup and cv_*", __func__));
 
-	return (sleepq_remove_matching(sq, queue, match_any, pri));
+		sleepq_remove_matching(sq, queue, match_any, pri);
+	}
 }
 
 /*
  * Resume threads on the sleep queue that match the given predicate.
  */
-int
+void
 sleepq_remove_matching(struct sleepqueue *sq, int queue,
     bool (*matches)(struct thread *), int pri)
 {
 	struct thread *td, *tdn;
 
 	/*
 	 * The last thread will be given ownership of sq and may
 	 * re-enqueue itself before sleepq_resume_thread() returns,
 	 * so we must cache the "next" queue item at the beginning
 	 * of the final iteration.
 	 */
 	TAILQ_FOREACH_SAFE(td, &sq->sq_blocked[queue], td_slpq, tdn) {
 		if (matches(td))
 			sleepq_resume_thread(sq, td, pri, SRQ_HOLD);
 	}
-
-	return (0);
 }
 
 /*
  * Time sleeping threads out.  When the timeout expires, the thread is
  * removed from the sleep queue and made runnable if it is still asleep.
  */
 static void
 sleepq_timeout(void *arg)
 {
 	struct sleepqueue_chain *sc __unused;
 	struct sleepqueue *sq;
 	struct thread *td;
 	const void *wchan;
 
 	td = arg;
 	CTR3(KTR_PROC, "sleepq_timeout: thread %p (pid %ld, %s)",
 	    (void *)td, (long)td->td_proc->p_pid, (void *)td->td_name);
 
 	thread_lock(td);
 	if (td->td_sleeptimo == 0 ||
 	    td->td_sleeptimo > td->td_slpcallout.c_time) {
 		/*
 		 * The thread does not want a timeout (yet).
 		 */
 	} else if (TD_IS_SLEEPING(td) && TD_ON_SLEEPQ(td)) {
 		/*
 		 * See if the thread is asleep and get the wait
 		 * channel if it is.
 		 */
 		wchan = td->td_wchan;
 		sc = SC_LOOKUP(wchan);
 		THREAD_LOCKPTR_ASSERT(td, &sc->sc_lock);
 		sq = sleepq_lookup(wchan);
 		MPASS(sq != NULL);
 		td->td_flags |= TDF_TIMEOUT;
 		sleepq_resume_thread(sq, td, 0, 0);
 		return;
 	} else if (TD_ON_SLEEPQ(td)) {
 		/*
 		 * If the thread is on the SLEEPQ but isn't sleeping
 		 * yet, it can either be on another CPU in between
 		 * sleepq_add() and one of the sleepq_*wait*()
 		 * routines or it can be in sleepq_catch_signals().
 		 */
 		td->td_flags |= TDF_TIMEOUT;
 	}
 	thread_unlock(td);
 }
 
 /*
  * Resumes a specific thread from the sleep queue associated with a specific
  * wait channel if it is on that queue.
  */
 void
 sleepq_remove(struct thread *td, const void *wchan)
 {
 	struct sleepqueue_chain *sc;
 	struct sleepqueue *sq;
 
 	/*
 	 * Look up the sleep queue for this wait channel, then re-check
 	 * that the thread is asleep on that channel, if it is not, then
 	 * bail.
 	 */
 	MPASS(wchan != NULL);
 	sc = SC_LOOKUP(wchan);
 	mtx_lock_spin(&sc->sc_lock);
 	/*
 	 * We can not lock the thread here as it may be sleeping on a
 	 * different sleepq.  However, holding the sleepq lock for this
 	 * wchan can guarantee that we do not miss a wakeup for this
 	 * channel.  The asserts below will catch any false positives.
 	 */
 	if (!TD_ON_SLEEPQ(td) || td->td_wchan != wchan) {
 		mtx_unlock_spin(&sc->sc_lock);
 		return;
 	}
 
 	/* Thread is asleep on sleep queue sq, so wake it up. */
 	sq = sleepq_lookup(wchan);
 	MPASS(sq != NULL);
 	MPASS(td->td_wchan == wchan);
 	sleepq_resume_thread(sq, td, 0, 0);
 }
 
 /*
  * Abort a thread as if an interrupt had occurred.  Only abort
  * interruptible waits (unfortunately it isn't safe to abort others).
  *
  * Requires thread lock on entry, releases on return.
  */
-int
+void
 sleepq_abort(struct thread *td, int intrval)
 {
 	struct sleepqueue *sq;
 	const void *wchan;
 
 	THREAD_LOCK_ASSERT(td, MA_OWNED);
 	MPASS(TD_ON_SLEEPQ(td));
 	MPASS(td->td_flags & TDF_SINTR);
 	MPASS((intrval == 0 && (td->td_flags & TDF_SIGWAIT) != 0) ||
 	    intrval == EINTR || intrval == ERESTART);
 
 	/*
 	 * If the TDF_TIMEOUT flag is set, just leave. A
 	 * timeout is scheduled anyhow.
 	 */
 	if (td->td_flags & TDF_TIMEOUT) {
 		thread_unlock(td);
-		return (0);
+		return;
 	}
 
 	CTR3(KTR_PROC, "sleepq_abort: thread %p (pid %ld, %s)",
 	    (void *)td, (long)td->td_proc->p_pid, (void *)td->td_name);
 	td->td_intrval = intrval;
 
 	/*
 	 * If the thread has not slept yet it will find the signal in
 	 * sleepq_catch_signals() and call sleepq_resume_thread.  Otherwise
 	 * we have to do it here.
 	 */
 	if (!TD_IS_SLEEPING(td)) {
 		thread_unlock(td);
-		return (0);
+		return;
 	}
 	wchan = td->td_wchan;
 	MPASS(wchan != NULL);
 	sq = sleepq_lookup(wchan);
 	MPASS(sq != NULL);
 
 	/* Thread is asleep on sleep queue sq, so wake it up. */
 	sleepq_resume_thread(sq, td, 0, 0);
-	return (0);
 }
 
 void
 sleepq_chains_remove_matching(bool (*matches)(struct thread *))
 {
 	struct sleepqueue_chain *sc;
 	struct sleepqueue *sq, *sq1;
 	int i;
 
 	for (sc = &sleepq_chains[0]; sc < sleepq_chains + SC_TABLESIZE; ++sc) {
 		if (LIST_EMPTY(&sc->sc_queues)) {
 			continue;
 		}
 		mtx_lock_spin(&sc->sc_lock);
 		LIST_FOREACH_SAFE(sq, &sc->sc_queues, sq_hash, sq1) {
 			for (i = 0; i < NR_SLEEPQS; ++i)
 				sleepq_remove_matching(sq, i, matches, 0);
 		}
 		mtx_unlock_spin(&sc->sc_lock);
 	}
 }
 
 /*
  * Prints the stacks of all threads presently sleeping on wchan/queue to
  * the sbuf sb.  Sets count_stacks_printed to the number of stacks actually
  * printed.  Typically, this will equal the number of threads sleeping on the
  * queue, but may be less if sb overflowed before all stacks were printed.
  */
 #ifdef STACK
 int
 sleepq_sbuf_print_stacks(struct sbuf *sb, const void *wchan, int queue,
     int *count_stacks_printed)
 {
 	struct thread *td, *td_next;
 	struct sleepqueue *sq;
 	struct stack **st;
 	struct sbuf **td_infos;
 	int i, stack_idx, error, stacks_to_allocate;
 	bool finished;
 
 	error = 0;
 	finished = false;
 
 	KASSERT(wchan != NULL, ("%s: invalid NULL wait channel", __func__));
 	MPASS((queue >= 0) && (queue < NR_SLEEPQS));
 
 	stacks_to_allocate = 10;
 	for (i = 0; i < 3 && !finished ; i++) {
 		/* We cannot malloc while holding the queue's spinlock, so
 		 * we do our mallocs now, and hope it is enough.  If it
 		 * isn't, we will free these, drop the lock, malloc more,
 		 * and try again, up to a point.  After that point we will
 		 * give up and report ENOMEM. We also cannot write to sb
 		 * during this time since the client may have set the
 		 * SBUF_AUTOEXTEND flag on their sbuf, which could cause a
 		 * malloc as we print to it.  So we defer actually printing
 		 * to sb until after we drop the spinlock.
 		 */
 
 		/* Where we will store the stacks. */
 		st = malloc(sizeof(struct stack *) * stacks_to_allocate,
 		    M_TEMP, M_WAITOK);
 		for (stack_idx = 0; stack_idx < stacks_to_allocate;
 		    stack_idx++)
 			st[stack_idx] = stack_create(M_WAITOK);
 
 		/* Where we will store the td name, tid, etc. */
 		td_infos = malloc(sizeof(struct sbuf *) * stacks_to_allocate,
 		    M_TEMP, M_WAITOK);
 		for (stack_idx = 0; stack_idx < stacks_to_allocate;
 		    stack_idx++)
 			td_infos[stack_idx] = sbuf_new(NULL, NULL,
 			    MAXCOMLEN + sizeof(struct thread *) * 2 + 40,
 			    SBUF_FIXEDLEN);
 
 		sleepq_lock(wchan);
 		sq = sleepq_lookup(wchan);
 		if (sq == NULL) {
 			/* This sleepq does not exist; exit and return ENOENT. */
 			error = ENOENT;
 			finished = true;
 			sleepq_release(wchan);
 			goto loop_end;
 		}
 
 		stack_idx = 0;
 		/* Save thread info */
 		TAILQ_FOREACH_SAFE(td, &sq->sq_blocked[queue], td_slpq,
 		    td_next) {
 			if (stack_idx >= stacks_to_allocate)
 				goto loop_end;
 
 			/* Note the td_lock is equal to the sleepq_lock here. */
 			(void)stack_save_td(st[stack_idx], td);
 
 			sbuf_printf(td_infos[stack_idx], "%d: %s %p",
 			    td->td_tid, td->td_name, td);
 
 			++stack_idx;
 		}
 
 		finished = true;
 		sleepq_release(wchan);
 
 		/* Print the stacks */
 		for (i = 0; i < stack_idx; i++) {
 			sbuf_finish(td_infos[i]);
 			sbuf_printf(sb, "--- thread %s: ---\n", sbuf_data(td_infos[i]));
 			stack_sbuf_print(sb, st[i]);
 			sbuf_putc(sb, '\n');
 
 			error = sbuf_error(sb);
 			if (error == 0)
 				*count_stacks_printed = stack_idx;
 		}
 
 loop_end:
 		if (!finished)
 			sleepq_release(wchan);
 		for (stack_idx = 0; stack_idx < stacks_to_allocate;
 		    stack_idx++)
 			stack_destroy(st[stack_idx]);
 		for (stack_idx = 0; stack_idx < stacks_to_allocate;
 		    stack_idx++)
 			sbuf_delete(td_infos[stack_idx]);
 		free(st, M_TEMP);
 		free(td_infos, M_TEMP);
 		stacks_to_allocate *= 10;
 	}
 
 	if (!finished && error == 0)
 		error = ENOMEM;
 
 	return (error);
 }
 #endif
 
 #ifdef SLEEPQUEUE_PROFILING
 #define	SLEEPQ_PROF_LOCATIONS	1024
 #define	SLEEPQ_SBUFSIZE		512
 struct sleepq_prof {
 	LIST_ENTRY(sleepq_prof) sp_link;
 	const char	*sp_wmesg;
 	long		sp_count;
 };
 
 LIST_HEAD(sqphead, sleepq_prof);
 
 struct sqphead sleepq_prof_free;
 struct sqphead sleepq_hash[SC_TABLESIZE];
 static struct sleepq_prof sleepq_profent[SLEEPQ_PROF_LOCATIONS];
 static struct mtx sleepq_prof_lock;
 MTX_SYSINIT(sleepq_prof_lock, &sleepq_prof_lock, "sleepq_prof", MTX_SPIN);
 
 static void
 sleepq_profile(const char *wmesg)
 {
 	struct sleepq_prof *sp;
 
 	mtx_lock_spin(&sleepq_prof_lock);
 	if (prof_enabled == 0)
 		goto unlock;
 	LIST_FOREACH(sp, &sleepq_hash[SC_HASH(wmesg)], sp_link)
 		if (sp->sp_wmesg == wmesg)
 			goto done;
 	sp = LIST_FIRST(&sleepq_prof_free);
 	if (sp == NULL)
 		goto unlock;
 	sp->sp_wmesg = wmesg;
 	LIST_REMOVE(sp, sp_link);
 	LIST_INSERT_HEAD(&sleepq_hash[SC_HASH(wmesg)], sp, sp_link);
 done:
 	sp->sp_count++;
 unlock:
 	mtx_unlock_spin(&sleepq_prof_lock);
 	return;
 }
 
 static void
 sleepq_prof_reset(void)
 {
 	struct sleepq_prof *sp;
 	int enabled;
 	int i;
 
 	mtx_lock_spin(&sleepq_prof_lock);
 	enabled = prof_enabled;
 	prof_enabled = 0;
 	for (i = 0; i < SC_TABLESIZE; i++)
 		LIST_INIT(&sleepq_hash[i]);
 	LIST_INIT(&sleepq_prof_free);
 	for (i = 0; i < SLEEPQ_PROF_LOCATIONS; i++) {
 		sp = &sleepq_profent[i];
 		sp->sp_wmesg = NULL;
 		sp->sp_count = 0;
 		LIST_INSERT_HEAD(&sleepq_prof_free, sp, sp_link);
 	}
 	prof_enabled = enabled;
 	mtx_unlock_spin(&sleepq_prof_lock);
 }
 
 static int
 enable_sleepq_prof(SYSCTL_HANDLER_ARGS)
 {
 	int error, v;
 
 	v = prof_enabled;
 	error = sysctl_handle_int(oidp, &v, v, req);
 	if (error)
 		return (error);
 	if (req->newptr == NULL)
 		return (error);
 	if (v == prof_enabled)
 		return (0);
 	if (v == 1)
 		sleepq_prof_reset();
 	mtx_lock_spin(&sleepq_prof_lock);
 	prof_enabled = !!v;
 	mtx_unlock_spin(&sleepq_prof_lock);
 
 	return (0);
 }
 
 static int
 reset_sleepq_prof_stats(SYSCTL_HANDLER_ARGS)
 {
 	int error, v;
 
 	v = 0;
 	error = sysctl_handle_int(oidp, &v, 0, req);
 	if (error)
 		return (error);
 	if (req->newptr == NULL)
 		return (error);
 	if (v == 0)
 		return (0);
 	sleepq_prof_reset();
 
 	return (0);
 }
 
 static int
 dump_sleepq_prof_stats(SYSCTL_HANDLER_ARGS)
 {
 	struct sleepq_prof *sp;
 	struct sbuf *sb;
 	int enabled;
 	int error;
 	int i;
 
 	error = sysctl_wire_old_buffer(req, 0);
 	if (error != 0)
 		return (error);
 	sb = sbuf_new_for_sysctl(NULL, NULL, SLEEPQ_SBUFSIZE, req);
 	sbuf_cat(sb, "\nwmesg\tcount\n");
 	enabled = prof_enabled;
 	mtx_lock_spin(&sleepq_prof_lock);
 	prof_enabled = 0;
 	mtx_unlock_spin(&sleepq_prof_lock);
 	for (i = 0; i < SC_TABLESIZE; i++) {
 		LIST_FOREACH(sp, &sleepq_hash[i], sp_link) {
 			sbuf_printf(sb, "%s\t%ld\n",
 			    sp->sp_wmesg, sp->sp_count);
 		}
 	}
 	mtx_lock_spin(&sleepq_prof_lock);
 	prof_enabled = enabled;
 	mtx_unlock_spin(&sleepq_prof_lock);
 
 	error = sbuf_finish(sb);
 	sbuf_delete(sb);
 	return (error);
 }
 
 SYSCTL_PROC(_debug_sleepq, OID_AUTO, stats,
     CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_NEEDGIANT, NULL, 0,
     dump_sleepq_prof_stats, "A",
     "Sleepqueue profiling statistics");
 SYSCTL_PROC(_debug_sleepq, OID_AUTO, reset,
     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, 0,
     reset_sleepq_prof_stats, "I",
     "Reset sleepqueue profiling statistics");
 SYSCTL_PROC(_debug_sleepq, OID_AUTO, enable,
     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, 0,
     enable_sleepq_prof, "I",
     "Enable sleepqueue profiling");
 #endif
 
 #ifdef DDB
 DB_SHOW_COMMAND(sleepq, db_show_sleepqueue)
 {
 	struct sleepqueue_chain *sc;
 	struct sleepqueue *sq;
 #ifdef INVARIANTS
 	struct lock_object *lock;
 #endif
 	struct thread *td;
 	void *wchan;
 	int i;
 
 	if (!have_addr)
 		return;
 
 	/*
 	 * First, see if there is an active sleep queue for the wait channel
 	 * indicated by the address.
 	 */
 	wchan = (void *)addr;
 	sc = SC_LOOKUP(wchan);
 	LIST_FOREACH(sq, &sc->sc_queues, sq_hash)
 		if (sq->sq_wchan == wchan)
 			goto found;
 
 	/*
 	 * Second, see if there is an active sleep queue at the address
 	 * indicated.
 	 */
 	for (i = 0; i < SC_TABLESIZE; i++)
 		LIST_FOREACH(sq, &sleepq_chains[i].sc_queues, sq_hash) {
 			if (sq == (struct sleepqueue *)addr)
 				goto found;
 		}
 
 	db_printf("Unable to locate a sleep queue via %p\n", (void *)addr);
 	return;
 found:
 	db_printf("Wait channel: %p\n", sq->sq_wchan);
 	db_printf("Queue type: %d\n", sq->sq_type);
 #ifdef INVARIANTS
 	if (sq->sq_lock) {
 		lock = sq->sq_lock;
 		db_printf("Associated Interlock: %p - (%s) %s\n", lock,
 		    LOCK_CLASS(lock)->lc_name, lock->lo_name);
 	}
 #endif
 	db_printf("Blocked threads:\n");
 	for (i = 0; i < NR_SLEEPQS; i++) {
 		db_printf("\nQueue[%d]:\n", i);
 		if (TAILQ_EMPTY(&sq->sq_blocked[i]))
 			db_printf("\tempty\n");
 		else
 			TAILQ_FOREACH(td, &sq->sq_blocked[i],
 				      td_slpq) {
 				db_printf("\t%p (tid %d, pid %d, \"%s\")\n", td,
 					  td->td_tid, td->td_proc->p_pid,
 					  td->td_name);
 			}
 		db_printf("(expected: %u)\n", sq->sq_blockedcnt[i]);
 	}
 }
 
 /* Alias 'show sleepqueue' to 'show sleepq'. */
 DB_SHOW_ALIAS(sleepqueue, db_show_sleepqueue);
 #endif
diff --git a/sys/sys/proc.h b/sys/sys/proc.h
index 5e3f0e2e117d..57c910d8fce0 100644
--- a/sys/sys/proc.h
+++ b/sys/sys/proc.h
@@ -1,1341 +1,1340 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1986, 1989, 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  * (c) UNIX System Laboratories, Inc.
  * All or some portions of this file are derived from material licensed
  * to the University of California by American Telephone and Telegraph
  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  * the permission of UNIX System Laboratories, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #ifndef _SYS_PROC_H_
 #define	_SYS_PROC_H_
 
 #include <sys/callout.h>		/* For struct callout. */
 #include <sys/event.h>			/* For struct klist. */
 #ifdef _KERNEL
 #include <sys/_eventhandler.h>
 #endif
 #include <sys/condvar.h>
 #ifndef _KERNEL
 #include <sys/filedesc.h>
 #endif
 #include <sys/queue.h>
 #include <sys/_lock.h>
 #include <sys/lock_profile.h>
 #include <sys/_mutex.h>
 #include <sys/osd.h>
 #include <sys/priority.h>
 #include <sys/rtprio.h>			/* XXX. */
 #include <sys/runq.h>
 #include <sys/resource.h>
 #include <sys/sigio.h>
 #include <sys/signal.h>
 #include <sys/signalvar.h>
 #ifndef _KERNEL
 #include <sys/time.h>			/* For structs itimerval, timeval. */
 #else
 #include <sys/pcpu.h>
 #include <sys/systm.h>
 #endif
 #include <sys/ucontext.h>
 #include <sys/ucred.h>
 #include <sys/types.h>
 #include <sys/_domainset.h>
 
 #include <machine/proc.h>		/* Machine-dependent proc substruct. */
 #ifdef _KERNEL
 #include <machine/cpu.h>
 #endif
 
 /*
  * One structure allocated per session.
  *
  * List of locks
  * (m)		locked by s_mtx mtx
  * (e)		locked by proctree_lock sx
  * (c)		const until freeing
  */
 struct session {
 	u_int		s_count;	/* Ref cnt; pgrps in session - atomic. */
 	struct proc	*s_leader;	/* (m + e) Session leader. */
 	struct vnode	*s_ttyvp;	/* (m) Vnode of controlling tty. */
 	struct cdev_priv *s_ttydp;	/* (m) Device of controlling tty.  */
 	struct tty	*s_ttyp;	/* (e) Controlling tty. */
 	pid_t		s_sid;		/* (c) Session ID. */
 					/* (m) Setlogin() name: */
 	char		s_login[roundup(MAXLOGNAME, sizeof(long))];
 	struct mtx	s_mtx;		/* Mutex to protect members. */
 };
 
 /*
  * One structure allocated per process group.
  *
  * List of locks
  * (m)		locked by pg_mtx mtx
  * (e)		locked by proctree_lock sx
  * (c)		const until freeing
  */
 struct pgrp {
 	LIST_ENTRY(pgrp) pg_hash;	/* (e) Hash chain. */
 	LIST_HEAD(, proc) pg_members;	/* (m + e) Pointer to pgrp members. */
 	struct session	*pg_session;	/* (c) Pointer to session. */
 	struct sigiolst	pg_sigiolst;	/* (m) List of sigio sources. */
 	pid_t		pg_id;		/* (c) Process group id. */
 	struct mtx	pg_mtx;		/* Mutex to protect members */
 	int		pg_flags;	/* (m) PGRP_ flags */
 	struct sx	pg_killsx;	/* Mutual exclusion between group member
 					 * fork() and killpg() */
 };
 
 #define	PGRP_ORPHANED	0x00000001	/* Group is orphaned */
 
 /*
  * pargs, used to hold a copy of the command line, if it had a sane length.
  */
 struct pargs {
 	u_int	ar_ref;		/* Reference count. */
 	u_int	ar_length;	/* Length. */
 	u_char	ar_args[1];	/* Arguments. */
 };
 
 /*-
  * Description of a process.
  *
  * This structure contains the information needed to manage a thread of
  * control, known in UN*X as a process; it has references to substructures
  * containing descriptions of things that the process uses, but may share
  * with related processes.  The process structure and the substructures
  * are always addressable except for those marked "(CPU)" below,
  * which might be addressable only on a processor on which the process
  * is running.
  *
  * Below is a key of locks used to protect each member of struct proc.  The
  * lock is indicated by a reference to a specific character in parens in the
  * associated comment.
  *      * - not yet protected
  *      a - only touched by curproc or parent during fork/wait
  *      b - created at fork, never changes
  *		(exception aiods switch vmspaces, but they are also
  *		marked 'P_SYSTEM' so hopefully it will be left alone)
  *      c - locked by proc mtx
  *      d - locked by allproc_lock lock
  *      e - locked by proctree_lock lock
  *      f - session mtx
  *      g - process group mtx
  *      h - callout_lock mtx
  *      i - by curproc or the master session mtx
  *      j - locked by proc slock
  *      k - only accessed by curthread
  *	k*- only accessed by curthread and from an interrupt
  *	kx- only accessed by curthread and by debugger
  *      l - the attaching proc or attaching proc parent
  *      n - not locked, lazy
  *      o - ktrace lock
  *      q - td_contested lock
  *      r - p_peers lock
  *      s - see sleepq_switch(), sleeping_on_old_rtc(), and sleep(9)
  *      t - thread lock
  *	u - process stat lock
  *	w - process timer lock
  *      x - created at fork, only changes during single threading in exec
  *      y - created at first aio, doesn't change until exit or exec at which
  *          point we are single-threaded and only curthread changes it
  *
  * If the locking key specifies two identifiers (for example, p_pptr) then
  * either lock is sufficient for read access, but both locks must be held
  * for write access.
  */
 struct cpuset;
 struct filecaps;
 struct filemon;
 struct kaioinfo;
 struct kaudit_record;
 struct kcov_info;
 struct kdtrace_proc;
 struct kdtrace_thread;
 struct kmsan_td;
 struct kq_timer_cb_data;
 struct mqueue_notifier;
 struct p_sched;
 struct proc;
 struct procdesc;
 struct racct;
 struct sbuf;
 struct sleepqueue;
 struct socket;
 struct td_sched;
 struct thread;
 struct trapframe;
 struct turnstile;
 struct vm_map;
 struct vm_map_entry;
 struct epoch_tracker;
 
 struct syscall_args {
 	u_int code;
 	u_int original_code;
 	struct sysent *callp;
 	register_t args[8];
 };
 
 /*
  * XXX: Does this belong in resource.h or resourcevar.h instead?
  * Resource usage extension.  The times in rusage structs in the kernel are
  * never up to date.  The actual times are kept as runtimes and tick counts
  * (with control info in the "previous" times), and are converted when
  * userland asks for rusage info.  Backwards compatibility prevents putting
  * this directly in the user-visible rusage struct.
  *
  * Locking for p_rux: (cu) means (u) for p_rux and (c) for p_crux.
  * Locking for td_rux: (t) for all fields.
  */
 struct rusage_ext {
 	uint64_t	rux_runtime;    /* (cu) Real time. */
 	uint64_t	rux_uticks;     /* (cu) Statclock hits in user mode. */
 	uint64_t	rux_sticks;     /* (cu) Statclock hits in sys mode. */
 	uint64_t	rux_iticks;     /* (cu) Statclock hits in intr mode. */
 	uint64_t	rux_uu;         /* (c) Previous user time in usec. */
 	uint64_t	rux_su;         /* (c) Previous sys time in usec. */
 	uint64_t	rux_tu;         /* (c) Previous total time in usec. */
 };
 
 /*
  * Kernel runnable context (thread).
  * This is what is put to sleep and reactivated.
  * Thread context.  Processes may have multiple threads.
  */
 struct thread {
 	struct mtx	*volatile td_lock; /* replaces sched lock */
 	struct proc	*td_proc;	/* (*) Associated process. */
 	TAILQ_ENTRY(thread) td_plist;	/* (*) All threads in this proc. */
 	TAILQ_ENTRY(thread) td_runq;	/* (t) Run queue. */
 	union	{
 		TAILQ_ENTRY(thread) td_slpq;	/* (t) Sleep queue. */
 		struct thread *td_zombie; /* Zombie list linkage */
 	};
 	TAILQ_ENTRY(thread) td_lockq;	/* (t) Lock queue. */
 	LIST_ENTRY(thread) td_hash;	/* (d) Hash chain. */
 	struct cpuset	*td_cpuset;	/* (t) CPU affinity mask. */
 	struct domainset_ref td_domain;	/* (a) NUMA policy */
 	struct seltd	*td_sel;	/* Select queue/channel. */
 	struct sleepqueue *td_sleepqueue; /* (k) Associated sleep queue. */
 	struct turnstile *td_turnstile;	/* (k) Associated turnstile. */
 	struct rl_q_entry *td_rlqe;	/* (k) Associated range lock entry. */
 	struct umtx_q   *td_umtxq;	/* (c?) Link for when we're blocked. */
 	lwpid_t		td_tid;		/* (b) Thread ID. */
 	sigqueue_t	td_sigqueue;	/* (c) Sigs arrived, not delivered. */
 #define	td_siglist	td_sigqueue.sq_signals
 	u_char		td_lend_user_pri; /* (t) Lend user pri. */
 	u_char		td_allocdomain;	/* (b) NUMA domain backing this struct thread. */
 	u_char		td_base_ithread_pri; /* (t) Base ithread pri */
 	struct kmsan_td	*td_kmsan;	/* (k) KMSAN state */
 
 /* Cleared during fork1(), thread_create(), or kthread_add(). */
 #define	td_startzero td_flags
 	int		td_flags;	/* (t) TDF_* flags. */
 	int		td_ast;		/* (t) TDA_* indicators */
 	int		td_inhibitors;	/* (t) Why can not run. */
 	int		td_pflags;	/* (k) Private thread (TDP_*) flags. */
 	int		td_pflags2;	/* (k) Private thread (TDP2_*) flags. */
 	int		td_dupfd;	/* (k) Ret value from fdopen. XXX */
 	int		td_sqqueue;	/* (t) Sleepqueue queue blocked on. */
 	const void	*td_wchan;	/* (t) Sleep address. */
 	const char	*td_wmesg;	/* (t) Reason for sleep. */
 	volatile u_char td_owepreempt;  /* (k*) Preempt on last critical_exit */
 	u_char		td_tsqueue;	/* (t) Turnstile queue blocked on. */
 	u_char		_td_pad0[2];	/* Available. */
 	int		td_locks;	/* (k) Debug: count of non-spin locks */
 	int		td_rw_rlocks;	/* (k) Count of rwlock read locks. */
 	int		td_sx_slocks;	/* (k) Count of sx shared locks. */
 	int		td_lk_slocks;	/* (k) Count of lockmgr shared locks. */
 	struct lock_object *td_wantedlock; /* (k) Lock we are contending on */
 	struct turnstile *td_blocked;	/* (t) Lock thread is blocked on. */
 	const char	*td_lockname;	/* (t) Name of lock blocked on. */
 	LIST_HEAD(, turnstile) td_contested;	/* (q) Contested locks. */
 	struct lock_list_entry *td_sleeplocks; /* (k) Held sleep locks. */
 	int		td_intr_nesting_level; /* (k) Interrupt recursion. */
 	int		td_pinned;	/* (k) Temporary cpu pin count. */
 	struct ucred	*td_realucred;	/* (k) Reference to credentials. */
 	struct ucred	*td_ucred;	/* (k) Used credentials, temporarily switchable. */
 	struct plimit	*td_limit;	/* (k) Resource limits. */
 	int		td_slptick;	/* (t) Time at sleep. */
 	int		td_blktick;	/* (t) Time spent blocked. */
 	int		td_swvoltick;	/* (t) Time at last SW_VOL switch. */
 	int		td_swinvoltick;	/* (t) Time at last SW_INVOL switch. */
 	u_int		td_cow;		/* (*) Number of copy-on-write faults */
 	struct rusage	td_ru;		/* (t) rusage information. */
 	struct rusage_ext td_rux;	/* (t) Internal rusage information. */
 	uint64_t	td_incruntime;	/* (t) Cpu ticks to transfer to proc. */
 	uint64_t	td_runtime;	/* (t) How many cpu ticks we've run. */
 	u_int 		td_pticks;	/* (t) Statclock hits for profiling */
 	u_int		td_sticks;	/* (t) Statclock hits in system mode. */
 	u_int		td_iticks;	/* (t) Statclock hits in intr mode. */
 	u_int		td_uticks;	/* (t) Statclock hits in user mode. */
 	int		td_intrval;	/* (t) Return value for sleepq. */
 	sigset_t	td_oldsigmask;	/* (k) Saved mask from pre sigpause. */
 	volatile u_int	td_generation;	/* (k) For detection of preemption */
 	stack_t		td_sigstk;	/* (k) Stack ptr and on-stack flag. */
 	int		td_xsig;	/* (c) Signal for ptrace */
 	u_long		td_profil_addr;	/* (k) Temporary addr until AST. */
 	u_int		td_profil_ticks; /* (k) Temporary ticks until AST. */
 	char		td_name[MAXCOMLEN + 1];	/* (*) Thread name. */
 	struct file	*td_fpop;	/* (k) file referencing cdev under op */
 	int		td_dbgflags;	/* (c) Userland debugger flags */
 	siginfo_t	td_si;		/* (c) For debugger or core file */
 	int		td_ng_outbound;	/* (k) Thread entered ng from above. */
 	struct osd	td_osd;		/* (k) Object specific data. */
 	struct vm_map_entry *td_map_def_user; /* (k) Deferred entries. */
 	pid_t		td_dbg_forked;	/* (c) Child pid for debugger. */
 	u_int		td_no_sleeping;	/* (k) Sleeping disabled count. */
 	struct vnode	*td_vp_reserved;/* (k) Preallocated vnode. */
 	void		*td_su;		/* (k) FFS SU private */
 	sbintime_t	td_sleeptimo;	/* (t) Sleep timeout. */
 	int		td_rtcgen;	/* (s) rtc_generation of abs. sleep */
 	int		td_errno;	/* (k) Error from last syscall. */
 	size_t		td_vslock_sz;	/* (k) amount of vslock-ed space */
 	struct kcov_info *td_kcov_info;	/* (*) Kernel code coverage data */
 	long		td_ucredref;	/* (k) references on td_realucred */
 #define	td_endzero td_sigmask
 
 /* Copied during fork1(), thread_create(), or kthread_add(). */
 #define	td_startcopy td_endzero
 	sigset_t	td_sigmask;	/* (c) Current signal mask. */
 	u_char		td_rqindex;	/* (t) Run queue index. */
 	u_char		td_base_pri;	/* (t) Thread base kernel priority. */
 	u_char		td_priority;	/* (t) Thread active priority. */
 	u_char		td_pri_class;	/* (t) Scheduling class. */
 	u_char		td_user_pri;	/* (t) User pri from estcpu and nice. */
 	u_char		td_base_user_pri; /* (t) Base user pri */
 	uintptr_t	td_rb_list;	/* (k) Robust list head. */
 	uintptr_t	td_rbp_list;	/* (k) Robust priv list head. */
 	uintptr_t	td_rb_inact;	/* (k) Current in-action mutex loc. */
 	struct syscall_args td_sa;	/* (kx) Syscall parameters. Copied on
 					   fork for child tracing. */
 	void		*td_sigblock_ptr; /* (k) uptr for fast sigblock. */
 	uint32_t	td_sigblock_val;  /* (k) fast sigblock value read at
 					     td_sigblock_ptr on kern entry */
 #define	td_endcopy td_pcb
 
 /*
  * Fields that must be manually set in fork1(), thread_create(), kthread_add(),
  * or already have been set in the allocator, constructor, etc.
  */
 	struct pcb	*td_pcb;	/* (k) Kernel VA of pcb and kstack. */
 	enum td_states {
 		TDS_INACTIVE = 0x0,
 		TDS_INHIBITED,
 		TDS_CAN_RUN,
 		TDS_RUNQ,
 		TDS_RUNNING
 	} td_state;			/* (t) thread state */
 	/* Note: td_state must be accessed using TD_{GET,SET}_STATE(). */
 	union {
 		syscallarg_t	tdu_retval[2];
 		off_t		tdu_off;
 	} td_uretoff;			/* (k) Syscall aux returns. */
 #define td_retval	td_uretoff.tdu_retval
 	u_int		td_cowgen;	/* (k) Generation of COW pointers. */
 	/* LP64 hole */
 	struct callout	td_slpcallout;	/* (h) Callout for sleep. */
 	struct trapframe *td_frame;	/* (k) */
 	vm_offset_t	td_kstack;	/* (a) Kernel VA of kstack. */
 	u_short td_kstack_pages;	/* (a) Size of the kstack. */
 	u_short td_kstack_domain;		/* (a) Domain backing kstack KVA. */
 	volatile u_int	td_critnest;	/* (k*) Critical section nest level. */
 	struct mdthread td_md;		/* (k) Any machine-dependent fields. */
 	struct kaudit_record	*td_ar;	/* (k) Active audit record, if any. */
 	struct lpohead	td_lprof[2];	/* (a) lock profiling objects. */
 	struct kdtrace_thread	*td_dtrace; /* (*) DTrace-specific data. */
 	struct vnet	*td_vnet;	/* (k) Effective vnet. */
 	const char	*td_vnet_lpush;	/* (k) Debugging vnet push / pop. */
 	struct trapframe *td_intr_frame;/* (k) Frame of the current irq */
 	struct proc	*td_rfppwait_p;	/* (k) The vforked child */
 	struct vm_page	**td_ma;	/* (k) uio pages held */
 	int		td_ma_cnt;	/* (k) size of *td_ma */
 	/* LP64 hole */
 	void		*td_emuldata;	/* Emulator state data */
 	int		td_lastcpu;	/* (t) Last cpu we were on. */
 	int		td_oncpu;	/* (t) Which cpu we are on. */
 	void		*td_lkpi_task;	/* LinuxKPI task struct pointer */
 	int		td_pmcpend;
 	void		*td_remotereq;	/* (c) dbg remote request. */
 	off_t		td_ktr_io_lim;	/* (k) limit for ktrace file size */
 #ifdef EPOCH_TRACE
 	SLIST_HEAD(, epoch_tracker) td_epochs;
 #endif
 };
 
 struct thread0_storage {
 	struct thread t0st_thread;
 	uint64_t t0st_sched[10];
 };
 
 struct mtx *thread_lock_block(struct thread *);
 void thread_lock_block_wait(struct thread *);
 void thread_lock_set(struct thread *, struct mtx *);
 void thread_lock_unblock(struct thread *, struct mtx *);
 #define	THREAD_LOCK_ASSERT(td, type)					\
 	mtx_assert((td)->td_lock, (type))
 
 #define	THREAD_LOCK_BLOCKED_ASSERT(td, type)				\
 do {									\
 	struct mtx *__m = (td)->td_lock;				\
 	if (__m != &blocked_lock)					\
 		mtx_assert(__m, (type));				\
 } while (0)
 
 #ifdef INVARIANTS
 #define	THREAD_LOCKPTR_ASSERT(td, lock)					\
 do {									\
 	struct mtx *__m;						\
 	__m = (td)->td_lock;						\
 	KASSERT(__m == (lock),						\
 	    ("Thread %p lock %p does not match %p", td, __m, (lock)));	\
 } while (0)
 
 #define	THREAD_LOCKPTR_BLOCKED_ASSERT(td, lock)				\
 do {									\
 	struct mtx *__m;						\
 	__m = (td)->td_lock;						\
 	KASSERT(__m == (lock) || __m == &blocked_lock,			\
 	    ("Thread %p lock %p does not match %p", td, __m, (lock)));	\
 } while (0)
 
 #define	TD_LOCKS_INC(td)	((td)->td_locks++)
 #define	TD_LOCKS_DEC(td) do {						\
 	KASSERT(SCHEDULER_STOPPED() || (td)->td_locks > 0,		\
 	    ("Thread %p owns no locks", (td)));				\
 	(td)->td_locks--;						\
 } while (0)
 #else
 #define	THREAD_LOCKPTR_ASSERT(td, lock)
 #define	THREAD_LOCKPTR_BLOCKED_ASSERT(td, lock)
 
 #define	TD_LOCKS_INC(td)
 #define	TD_LOCKS_DEC(td)
 #endif
 
 /*
  * Flags kept in td_flags:
  * To change these you MUST have the scheduler lock.
  */
 #define	TDF_BORROWING	0x00000001 /* Thread is borrowing pri from another. */
 #define	TDF_INPANIC	0x00000002 /* Caused a panic, let it drive crashdump. */
 #define	TDF_INMEM	0x00000004 /* Thread's stack is in memory. */
 #define	TDF_SINTR	0x00000008 /* Sleep is interruptible. */
 #define	TDF_TIMEOUT	0x00000010 /* Timing out during sleep. */
 #define	TDF_IDLETD	0x00000020 /* This is a per-CPU idle thread. */
 #define	TDF_UNUSED11	0x00000040 /* Available */
 #define	TDF_SIGWAIT	0x00000080 /* Ignore ignored signals */
 #define	TDF_KTH_SUSP	0x00000100 /* kthread is suspended */
 #define	TDF_ALLPROCSUSP	0x00000200 /* suspended by SINGLE_ALLPROC */
 #define	TDF_BOUNDARY	0x00000400 /* Thread suspended at user boundary */
 #define	TDF_UNUSED1	0x00000800 /* Available */
 #define	TDF_UNUSED2	0x00001000 /* Available */
 #define	TDF_SBDRY	0x00002000 /* Stop only on usermode boundary. */
 #define	TDF_UPIBLOCKED	0x00004000 /* Thread blocked on user PI mutex. */
 #define	TDF_UNUSED3	0x00008000 /* Available */
 #define	TDF_UNUSED4	0x00010000 /* Available */
 #define	TDF_UNUSED5	0x00020000 /* Available */
 #define	TDF_NOLOAD	0x00040000 /* Ignore during load avg calculations. */
 #define	TDF_SERESTART	0x00080000 /* ERESTART on stop attempts. */
 #define	TDF_THRWAKEUP	0x00100000 /* Libthr thread must not suspend itself. */
 #define	TDF_SEINTR	0x00200000 /* EINTR on stop attempts. */
 #define	TDF_UNUSED12	0x00400000 /* Available */
 #define	TDF_UNUSED6	0x00800000 /* Available */
 #define	TDF_SCHED0	0x01000000 /* Reserved for scheduler private use */
 #define	TDF_SCHED1	0x02000000 /* Reserved for scheduler private use */
 #define	TDF_SCHED2	0x04000000 /* Reserved for scheduler private use */
 #define	TDF_SCHED3	0x08000000 /* Reserved for scheduler private use */
 #define	TDF_UNUSED7	0x10000000 /* Available */
 #define	TDF_UNUSED8	0x20000000 /* Available */
 #define	TDF_UNUSED9	0x40000000 /* Available */
 #define	TDF_UNUSED10	0x80000000 /* Available */
 
 enum {
 	TDA_AST = 0,		/* Special: call all non-flagged AST handlers */
 	TDA_OWEUPC,
 	TDA_HWPMC,
 	TDA_VFORK,
 	TDA_ALRM,
 	TDA_PROF,
 	TDA_MAC,
 	TDA_SCHED,
 	TDA_UFS,
 	TDA_GEOM,
 	TDA_KQUEUE,
 	TDA_RACCT,
 	TDA_MOD1,		/* For third party use, before signals are */
 	TAD_MOD2,		/* processed .. */
 	TDA_SIG,
 	TDA_KTRACE,
 	TDA_SUSPEND,
 	TDA_SIGSUSPEND,
 	TDA_MOD3,		/* .. and after */
 	TAD_MOD4,
 	TDA_MAX,
 };
 #define	TDAI(tda)		(1U << (tda))
 #define	td_ast_pending(td, tda)	((td->td_ast & TDAI(tda)) != 0)
 
 /* Userland debug flags */
 #define	TDB_SUSPEND	0x00000001 /* Thread is suspended by debugger */
 #define	TDB_XSIG	0x00000002 /* Thread is exchanging signal under trace */
 #define	TDB_USERWR	0x00000004 /* Debugger modified memory or registers */
 #define	TDB_SCE		0x00000008 /* Thread performs syscall enter */
 #define	TDB_SCX		0x00000010 /* Thread performs syscall exit */
 #define	TDB_EXEC	0x00000020 /* TDB_SCX from exec(2) family */
 #define	TDB_FORK	0x00000040 /* TDB_SCX from fork(2) that created new
 				      process */
 #define	TDB_STOPATFORK	0x00000080 /* Stop at the return from fork (child
 				      only) */
 #define	TDB_CHILD	0x00000100 /* New child indicator for ptrace() */
 #define	TDB_BORN	0x00000200 /* New LWP indicator for ptrace() */
 #define	TDB_EXIT	0x00000400 /* Exiting LWP indicator for ptrace() */
 #define	TDB_VFORK	0x00000800 /* vfork indicator for ptrace() */
 #define	TDB_FSTP	0x00001000 /* The thread is PT_ATTACH leader */
 #define	TDB_STEP	0x00002000 /* (x86) PSL_T set for PT_STEP */
 #define	TDB_SSWITCH	0x00004000 /* Suspended in ptracestop */
 #define	TDB_BOUNDARY	0x00008000 /* ptracestop() at boundary */
 #define	TDB_COREDUMPREQ	0x00010000 /* Coredump request */
 #define	TDB_SCREMOTEREQ	0x00020000 /* Remote syscall request */
 
 /*
  * "Private" flags kept in td_pflags:
  * These are only written by curthread and thus need no locking.
  */
 #define	TDP_OLDMASK	0x00000001 /* Need to restore mask after suspend. */
 #define	TDP_INKTR	0x00000002 /* Thread is currently in KTR code. */
 #define	TDP_INKTRACE	0x00000004 /* Thread is currently in KTRACE code. */
 #define	TDP_BUFNEED	0x00000008 /* Do not recurse into the buf flush */
 #define	TDP_COWINPROGRESS 0x00000010 /* Snapshot copy-on-write in progress. */
 #define	TDP_ALTSTACK	0x00000020 /* Have alternate signal stack. */
 #define	TDP_DEADLKTREAT	0x00000040 /* Lock acquisition - deadlock treatment. */
 #define	TDP_NOFAULTING	0x00000080 /* Do not handle page faults. */
 #define	TDP_SIGFASTBLOCK 0x00000100 /* Fast sigblock active */
 #define	TDP_OWEUPC	0x00000200 /* Call addupc() at next AST. */
 #define	TDP_ITHREAD	0x00000400 /* Thread is an interrupt thread. */
 #define	TDP_SYNCIO	0x00000800 /* Local override, disable async i/o. */
 #define	TDP_SCHED1	0x00001000 /* Reserved for scheduler private use */
 #define	TDP_SCHED2	0x00002000 /* Reserved for scheduler private use */
 #define	TDP_SCHED3	0x00004000 /* Reserved for scheduler private use */
 #define	TDP_SCHED4	0x00008000 /* Reserved for scheduler private use */
 #define	TDP_GEOM	0x00010000 /* Settle GEOM before finishing syscall */
 #define	TDP_SOFTDEP	0x00020000 /* Stuck processing softdep worklist */
 #define	TDP_NORUNNINGBUF 0x00040000 /* Ignore runningbufspace check */
 #define	TDP_WAKEUP	0x00080000 /* Don't sleep in umtx cond_wait */
 #define	TDP_INBDFLUSH	0x00100000 /* Already in BO_BDFLUSH, do not recurse */
 #define	TDP_KTHREAD	0x00200000 /* This is an official kernel thread */
 #define	TDP_CALLCHAIN	0x00400000 /* Capture thread's callchain */
 #define	TDP_IGNSUSP	0x00800000 /* Permission to ignore the MNTK_SUSPEND* */
 #define	TDP_AUDITREC	0x01000000 /* Audit record pending on thread */
 #define	TDP_RFPPWAIT	0x02000000 /* Handle RFPPWAIT on syscall exit */
 #define	TDP_RESETSPUR	0x04000000 /* Reset spurious page fault history. */
 #define	TDP_NERRNO	0x08000000 /* Last errno is already in td_errno */
 #define	TDP_UIOHELD	0x10000000 /* Current uio has pages held in td_ma */
 #define	TDP_INTCPCALLOUT 0x20000000 /* used by netinet/tcp_timer.c */
 #define	TDP_EXECVMSPC	0x40000000 /* Execve destroyed old vmspace */
 #define	TDP_SIGFASTPENDING 0x80000000 /* Pending signal due to sigfastblock */
 
 #define	TDP2_SBPAGES	0x00000001 /* Owns sbusy on some pages */
 #define	TDP2_COMPAT32RB	0x00000002 /* compat32 ABI for robust lists */
 #define	TDP2_ACCT	0x00000004 /* Doing accounting */
 
 /*
  * Reasons that the current thread can not be run yet.
  * More than one may apply.
  */
 #define	TDI_SUSPENDED	0x0001	/* On suspension queue. */
 #define	TDI_SLEEPING	0x0002	/* Actually asleep! (tricky). */
 #define	TDI_LOCK	0x0008	/* Stopped on a lock. */
 #define	TDI_IWAIT	0x0010	/* Awaiting interrupt. */
 
 #define	TD_IS_SLEEPING(td)	((td)->td_inhibitors & TDI_SLEEPING)
 #define	TD_ON_SLEEPQ(td)	((td)->td_wchan != NULL)
 #define	TD_IS_SUSPENDED(td)	((td)->td_inhibitors & TDI_SUSPENDED)
 #define	TD_ON_LOCK(td)		((td)->td_inhibitors & TDI_LOCK)
 #define	TD_AWAITING_INTR(td)	((td)->td_inhibitors & TDI_IWAIT)
 #ifdef _KERNEL
 #define	TD_GET_STATE(td)	atomic_load_int(&(td)->td_state)
 #else
 #define	TD_GET_STATE(td)	((td)->td_state)
 #endif
 #define	TD_IS_RUNNING(td)	(TD_GET_STATE(td) == TDS_RUNNING)
 #define	TD_ON_RUNQ(td)		(TD_GET_STATE(td) == TDS_RUNQ)
 #define	TD_CAN_RUN(td)		(TD_GET_STATE(td) == TDS_CAN_RUN)
 #define	TD_IS_INHIBITED(td)	(TD_GET_STATE(td) == TDS_INHIBITED)
 #define	TD_ON_UPILOCK(td)	((td)->td_flags & TDF_UPIBLOCKED)
 #define TD_IS_IDLETHREAD(td)	((td)->td_flags & TDF_IDLETD)
 
 #define	TD_CAN_ABORT(td)	(TD_ON_SLEEPQ((td)) &&			\
 				    ((td)->td_flags & TDF_SINTR) != 0)
 
 #define	KTDSTATE(td)							\
 	(((td)->td_inhibitors & TDI_SLEEPING) != 0 ? "sleep"  :		\
 	((td)->td_inhibitors & TDI_SUSPENDED) != 0 ? "suspended" :	\
 	((td)->td_inhibitors & TDI_LOCK) != 0 ? "blocked" :		\
 	((td)->td_inhibitors & TDI_IWAIT) != 0 ? "iwait" : "yielding")
 
 #define	TD_SET_INHIB(td, inhib) do {		\
 	TD_SET_STATE(td, TDS_INHIBITED);	\
 	(td)->td_inhibitors |= (inhib);		\
 } while (0)
 
 #define	TD_CLR_INHIB(td, inhib) do {			\
 	if (((td)->td_inhibitors & (inhib)) &&		\
 	    (((td)->td_inhibitors &= ~(inhib)) == 0))	\
 		TD_SET_STATE(td, TDS_CAN_RUN);		\
 } while (0)
 
 #define	TD_SET_SLEEPING(td)	TD_SET_INHIB((td), TDI_SLEEPING)
 #define	TD_SET_LOCK(td)		TD_SET_INHIB((td), TDI_LOCK)
 #define	TD_SET_SUSPENDED(td)	TD_SET_INHIB((td), TDI_SUSPENDED)
 #define	TD_SET_IWAIT(td)	TD_SET_INHIB((td), TDI_IWAIT)
 #define	TD_SET_EXITING(td)	TD_SET_INHIB((td), TDI_EXITING)
 
 #define	TD_CLR_SLEEPING(td)	TD_CLR_INHIB((td), TDI_SLEEPING)
 #define	TD_CLR_LOCK(td)		TD_CLR_INHIB((td), TDI_LOCK)
 #define	TD_CLR_SUSPENDED(td)	TD_CLR_INHIB((td), TDI_SUSPENDED)
 #define	TD_CLR_IWAIT(td)	TD_CLR_INHIB((td), TDI_IWAIT)
 
 #ifdef _KERNEL
 #define	TD_SET_STATE(td, state)	atomic_store_int(&(td)->td_state, state)
 #else
 #define	TD_SET_STATE(td, state)	(td)->td_state = state
 #endif
 #define	TD_SET_RUNNING(td)	TD_SET_STATE(td, TDS_RUNNING)
 #define	TD_SET_RUNQ(td)		TD_SET_STATE(td, TDS_RUNQ)
 #define	TD_SET_CAN_RUN(td)	TD_SET_STATE(td, TDS_CAN_RUN)
 
 
 #define	TD_SBDRY_INTR(td) \
     (((td)->td_flags & (TDF_SEINTR | TDF_SERESTART)) != 0)
 #define	TD_SBDRY_ERRNO(td) \
     (((td)->td_flags & TDF_SEINTR) != 0 ? EINTR : ERESTART)
 
 /*
  * Process structure.
  */
 struct proc {
 	LIST_ENTRY(proc) p_list;	/* (d) List of all processes. */
 	TAILQ_HEAD(, thread) p_threads;	/* (c) all threads. */
 	struct mtx	p_slock;	/* process spin lock */
 	struct ucred	*p_ucred;	/* (c) Process owner's identity. */
 	struct filedesc	*p_fd;		/* (b) Open files. */
 	struct filedesc_to_leader *p_fdtol; /* (b) Tracking node */
 	struct pwddesc	*p_pd;		/* (b) Cwd, chroot, jail, umask */
 	struct pstats	*p_stats;	/* (b) Accounting/statistics (CPU). */
 	struct plimit	*p_limit;	/* (c) Resource limits. */
 	struct callout	p_limco;	/* (c) Limit callout handle */
 	struct sigacts	*p_sigacts;	/* (x) Signal actions, state (CPU). */
 
 	int		p_flag;		/* (c) P_* flags. */
 	int		p_flag2;	/* (c) P2_* flags. */
 	enum p_states {
 		PRS_NEW = 0,		/* In creation */
 		PRS_NORMAL,		/* threads can be run. */
 		PRS_ZOMBIE
 	} p_state;			/* (j/c) Process status. */
 	pid_t		p_pid;		/* (b) Process identifier. */
 	LIST_ENTRY(proc) p_hash;	/* (d) Hash chain. */
 	LIST_ENTRY(proc) p_pglist;	/* (g + e) List of processes in pgrp. */
 	struct proc	*p_pptr;	/* (c + e) Pointer to parent process. */
 	LIST_ENTRY(proc) p_sibling;	/* (e) List of sibling processes. */
 	LIST_HEAD(, proc) p_children;	/* (e) Pointer to list of children. */
 	struct proc	*p_reaper;	/* (e) My reaper. */
 	LIST_HEAD(, proc) p_reaplist;	/* (e) List of my descendants
 					       (if I am reaper). */
 	LIST_ENTRY(proc) p_reapsibling;	/* (e) List of siblings - descendants of
 					       the same reaper. */
 	struct mtx	p_mtx;		/* (n) Lock for this struct. */
 	struct mtx	p_statmtx;	/* Lock for the stats */
 	struct mtx	p_itimmtx;	/* Lock for the virt/prof timers */
 	struct mtx	p_profmtx;	/* Lock for the profiling */
 	struct ksiginfo *p_ksi;	/* Locked by parent proc lock */
 	sigqueue_t	p_sigqueue;	/* (c) Sigs not delivered to a td. */
 #define p_siglist	p_sigqueue.sq_signals
 	pid_t		p_oppid;	/* (c + e) Real parent pid. */
 
 /* The following fields are all zeroed upon creation in fork. */
 #define	p_startzero	p_vmspace
 	struct vmspace	*p_vmspace;	/* (b) Address space. */
 	u_int		p_swtick;	/* (c) Tick when swapped in or out. */
 	u_int		p_cowgen;	/* (c) Generation of COW pointers. */
 	struct itimerval p_realtimer;	/* (c) Alarm timer. */
 	struct rusage	p_ru;		/* (a) Exit information. */
 	struct rusage_ext p_rux;	/* (cu) Internal resource usage. */
 	struct rusage_ext p_crux;	/* (c) Internal child resource usage. */
 	int		p_profthreads;	/* (c) Num threads in addupc_task. */
 	volatile int	p_exitthreads;	/* (j) Number of threads exiting */
 	int		p_traceflag;	/* (o) Kernel trace points. */
 	struct ktr_io_params	*p_ktrioparms;	/* (c + o) Params for ktrace. */
 	struct vnode	*p_textvp;	/* (b) Vnode of executable. */
 	struct vnode	*p_textdvp;	/* (b) Dir containing textvp. */
 	char		*p_binname;	/* (b) Binary hardlink name. */
 	u_int		p_lock;		/* (c) Prevent exit. */
 	struct sigiolst	p_sigiolst;	/* (c) List of sigio sources. */
 	int		p_sigparent;	/* (c) Signal to parent on exit. */
 	int		p_sig;		/* (n) For core dump/debugger XXX. */
 	u_int		p_ptevents;	/* (c + e) ptrace() event mask. */
 	struct kaioinfo	*p_aioinfo;	/* (y) ASYNC I/O info. */
 	struct thread	*p_singlethread;/* (c + j) If single threading this is it */
 	int		p_suspcount;	/* (j) Num threads in suspended mode. */
 	struct thread	*p_xthread;	/* (c) Trap thread */
 	int		p_boundary_count;/* (j) Num threads at user boundary */
 	int		p_pendingcnt;	/* how many signals are pending */
 	struct itimers	*p_itimers;	/* (c) POSIX interval timers. */
 	struct procdesc	*p_procdesc;	/* (e) Process descriptor, if any. */
 	u_int		p_treeflag;	/* (e) P_TREE flags */
 	int		p_pendingexits; /* (c) Count of pending thread exits. */
 	struct filemon	*p_filemon;	/* (c) filemon-specific data. */
 	int		p_pdeathsig;	/* (c) Signal from parent on exit. */
 /* End area that is zeroed on creation. */
 #define	p_endzero	p_magic
 
 /* The following fields are all copied upon creation in fork. */
 #define	p_startcopy	p_endzero
 	u_int		p_magic;	/* (b) Magic number. */
 	int		p_osrel;	/* (x) osreldate for the
 					       binary (from ELF note, if any) */
 	uint32_t	p_fctl0;	/* (x) ABI feature control, ELF note */
 	char		p_comm[MAXCOMLEN + 1];	/* (x) Process name. */
 	struct sysentvec *p_sysent;	/* (b) Syscall dispatch info. */
 	struct pargs	*p_args;	/* (c) Process arguments. */
 	rlim_t		p_cpulimit;	/* (c) Current CPU limit in seconds. */
 	signed char	p_nice;		/* (c) Process "nice" value. */
 	int		p_fibnum;	/* in this routing domain XXX MRT */
 	pid_t		p_reapsubtree;	/* (e) Pid of the direct child of the
 					       reaper which spawned
 					       our subtree. */
 	uint64_t	p_elf_flags;	/* (x) ELF flags */
 	void		*p_elf_brandinfo; /* (x) Elf_Brandinfo, NULL for
 						 non ELF binaries. */
 	sbintime_t	p_umtx_min_timeout;
 /* End area that is copied on creation. */
 #define	p_endcopy	p_xexit
 
 	u_int		p_xexit;	/* (c) Exit code. */
 	u_int		p_xsig;		/* (c) Stop/kill sig. */
 	struct pgrp	*p_pgrp;	/* (c + e) Pointer to process group. */
 	struct knlist	*p_klist;	/* (c) Knotes attached to this proc. */
 	int		p_numthreads;	/* (c) Number of threads. */
 	struct mdproc	p_md;		/* Any machine-dependent fields. */
 	struct callout	p_itcallout;	/* (h + c) Interval timer callout. */
 	u_short		p_acflag;	/* (c) Accounting flags. */
 	struct proc	*p_peers;	/* (r) */
 	struct proc	*p_leader;	/* (b) */
 	void		*p_emuldata;	/* (c) Emulator state data. */
 	struct label	*p_label;	/* (*) Proc (not subject) MAC label. */
 	STAILQ_HEAD(, ktr_request)	p_ktr;	/* (o) KTR event queue. */
 	LIST_HEAD(, mqueue_notifier)	p_mqnotifier; /* (c) mqueue notifiers.*/
 	struct kdtrace_proc	*p_dtrace; /* (*) DTrace-specific data. */
 	struct cv	p_pwait;	/* (*) wait cv for exit/exec. */
 	uint64_t	p_prev_runtime;	/* (c) Resource usage accounting. */
 	struct racct	*p_racct;	/* (b) Resource accounting. */
 	int		p_throttled;	/* (c) Flag for racct pcpu throttling */
 	/*
 	 * An orphan is the child that has been re-parented to the
 	 * debugger as a result of attaching to it.  Need to keep
 	 * track of them for parent to be able to collect the exit
 	 * status of what used to be children.
 	 */
 	LIST_ENTRY(proc) p_orphan;	/* (e) List of orphan processes. */
 	LIST_HEAD(, proc) p_orphans;	/* (e) Pointer to list of orphans. */
 
 	TAILQ_HEAD(, kq_timer_cb_data)	p_kqtim_stop;	/* (c) */
 	LIST_ENTRY(proc) p_jaillist;	/* (d) Jail process linkage. */
 };
 
 #define	p_session	p_pgrp->pg_session
 #define	p_pgid		p_pgrp->pg_id
 
 #define	NOCPU		(-1)	/* For when we aren't on a CPU. */
 #define	NOCPU_OLD	(255)
 #define	MAXCPU_OLD	(254)
 
 #define	PROC_SLOCK(p)	mtx_lock_spin(&(p)->p_slock)
 #define	PROC_SUNLOCK(p)	mtx_unlock_spin(&(p)->p_slock)
 #define	PROC_SLOCK_ASSERT(p, type)	mtx_assert(&(p)->p_slock, (type))
 
 #define	PROC_STATLOCK(p)	mtx_lock_spin(&(p)->p_statmtx)
 #define	PROC_STATUNLOCK(p)	mtx_unlock_spin(&(p)->p_statmtx)
 #define	PROC_STATLOCK_ASSERT(p, type)	mtx_assert(&(p)->p_statmtx, (type))
 
 #define	PROC_ITIMLOCK(p)	mtx_lock_spin(&(p)->p_itimmtx)
 #define	PROC_ITIMUNLOCK(p)	mtx_unlock_spin(&(p)->p_itimmtx)
 #define	PROC_ITIMLOCK_ASSERT(p, type)	mtx_assert(&(p)->p_itimmtx, (type))
 
 #define	PROC_PROFLOCK(p)	mtx_lock_spin(&(p)->p_profmtx)
 #define	PROC_PROFUNLOCK(p)	mtx_unlock_spin(&(p)->p_profmtx)
 #define	PROC_PROFLOCK_ASSERT(p, type)	mtx_assert(&(p)->p_profmtx, (type))
 
 /* These flags are kept in p_flag. */
 #define	P_ADVLOCK	0x00000001	/* Process may hold a POSIX advisory
 					   lock. */
 #define	P_CONTROLT	0x00000002	/* Has a controlling terminal. */
 #define	P_KPROC		0x00000004	/* Kernel process. */
 #define	P_UNUSED3	0x00000008	/* --available-- */
 #define	P_PPWAIT	0x00000010	/* Parent is waiting for child to
 					   exec/exit. */
 #define	P_PROFIL	0x00000020	/* Has started profiling. */
 #define	P_STOPPROF	0x00000040	/* Has thread requesting to stop
 					   profiling. */
 #define	P_HADTHREADS	0x00000080	/* Has had threads (no cleanup
 					   shortcuts) */
 #define	P_SUGID		0x00000100	/* Had set id privileges since last
 					   exec. */
 #define	P_SYSTEM	0x00000200	/* System proc: no sigs or stats. */
 #define	P_SINGLE_EXIT	0x00000400	/* Threads suspending should exit,
 					   not wait. */
 #define	P_TRACED	0x00000800	/* Debugged process being traced. */
 #define	P_WAITED	0x00001000	/* Someone is waiting for us. */
 #define	P_WEXIT		0x00002000	/* Working on exiting. */
 #define	P_EXEC		0x00004000	/* Process called exec. */
 #define	P_WKILLED	0x00008000	/* Killed, go to kernel/user boundary
 					   ASAP. */
 #define	P_CONTINUED	0x00010000	/* Proc has continued from a stopped
 					   state. */
 #define	P_STOPPED_SIG	0x00020000	/* Stopped due to SIGSTOP/SIGTSTP. */
 #define	P_STOPPED_TRACE	0x00040000	/* Stopped because of tracing. */
 #define	P_STOPPED_SINGLE 0x00080000	/* Only 1 thread can continue (not to
 					   user). */
 #define	P_PROTECTED	0x00100000	/* Do not kill on memory overcommit. */
 #define	P_SIGEVENT	0x00200000	/* Process pending signals changed. */
 #define	P_SINGLE_BOUNDARY 0x00400000	/* Threads should suspend at user
 					   boundary. */
 #define	P_HWPMC		0x00800000	/* Process is using HWPMCs */
 #define	P_JAILED	0x01000000	/* Process is in jail. */
 #define	P_TOTAL_STOP	0x02000000	/* Stopped in stop_all_proc. */
 #define	P_INEXEC	0x04000000	/* Process is in execve(). */
 #define	P_STATCHILD	0x08000000	/* Child process stopped or exited. */
 #define	P_INMEM		0x10000000	/* Loaded into memory, always set. */
 #define	P_UNUSED1	0x20000000	/* --available-- */
 #define	P_UNUSED2	0x40000000	/* --available-- */
 #define	P_PPTRACE	0x80000000	/* PT_TRACEME by vforked child. */
 
 #define	P_STOPPED	(P_STOPPED_SIG|P_STOPPED_SINGLE|P_STOPPED_TRACE)
 #define	P_SHOULDSTOP(p)	((p)->p_flag & P_STOPPED)
 #define	P_KILLED(p)	((p)->p_flag & P_WKILLED)
 
 /* These flags are kept in p_flag2. */
 #define	P2_INHERIT_PROTECTED	0x00000001	/* New children get
 						   P_PROTECTED. */
 #define	P2_NOTRACE		0x00000002	/* No ptrace(2) attach or
 						   coredumps. */
 #define	P2_NOTRACE_EXEC		0x00000004	/* Keep P2_NOPTRACE on
 						   exec(2). */
 #define	P2_AST_SU		0x00000008	/* Handles SU ast for
 						   kthreads. */
 #define	P2_PTRACE_FSTP		0x00000010	/* SIGSTOP from PT_ATTACH not
 						   yet handled. */
 #define	P2_TRAPCAP		0x00000020	/* SIGTRAP on ENOTCAPABLE */
 #define	P2_ASLR_ENABLE		0x00000040	/* Force enable ASLR. */
 #define	P2_ASLR_DISABLE		0x00000080	/* Force disable ASLR. */
 #define	P2_ASLR_IGNSTART	0x00000100	/* Enable ASLR to consume sbrk
 						   area. */
 #define	P2_PROTMAX_ENABLE	0x00000200	/* Force enable implied
 						   PROT_MAX. */
 #define	P2_PROTMAX_DISABLE	0x00000400	/* Force disable implied
 						   PROT_MAX. */
 #define	P2_STKGAP_DISABLE	0x00000800	/* Disable stack gap for
 						   MAP_STACK */
 #define	P2_STKGAP_DISABLE_EXEC	0x00001000	/* Stack gap disabled
 						   after exec */
 #define	P2_ITSTOPPED		0x00002000
 #define	P2_PTRACEREQ		0x00004000	/* Active ptrace req */
 #define	P2_NO_NEW_PRIVS		0x00008000	/* Ignore setuid */
 #define	P2_WXORX_DISABLE	0x00010000	/* WX mappings enabled */
 #define	P2_WXORX_ENABLE_EXEC	0x00020000	/* WXORX enabled after exec */
 #define	P2_WEXIT		0x00040000	/* exit just started, no
 						   external thread_single() is
 						   permitted */
 #define	P2_REAPKILLED		0x00080000
 #define	P2_MEMBAR_PRIVE		0x00100000	/* membar private expedited
 						   registered */
 #define	P2_MEMBAR_PRIVE_SYNCORE	0x00200000	/* membar private expedited
 						   sync core registered */
 #define	P2_MEMBAR_GLOBE		0x00400000	/* membar global expedited
 						   registered */
 
 /* Flags protected by proctree_lock, kept in p_treeflags. */
 #define	P_TREE_ORPHANED		0x00000001	/* Reparented, on orphan list */
 #define	P_TREE_FIRST_ORPHAN	0x00000002	/* First element of orphan
 						   list */
 #define	P_TREE_REAPER		0x00000004	/* Reaper of subtree */
 #define	P_TREE_GRPEXITED	0x00000008	/* exit1() done with job ctl */
 
 /*
  * These were process status values (p_stat), now they are only used in
  * legacy conversion code.
  */
 #define	SIDL	1		/* Process being created by fork. */
 #define	SRUN	2		/* Currently runnable. */
 #define	SSLEEP	3		/* Sleeping on an address. */
 #define	SSTOP	4		/* Process debugging or suspension. */
 #define	SZOMB	5		/* Awaiting collection by parent. */
 #define	SWAIT	6		/* Waiting for interrupt. */
 #define	SLOCK	7		/* Blocked on a lock. */
 
 #define	P_MAGIC		0xbeefface
 
 #ifdef _KERNEL
 
 /* Types and flags for mi_switch(9). */
 #define	SW_TYPE_MASK		0xff	/* First 8 bits are switch type */
 #define	SWT_OWEPREEMPT		1	/* Switching due to owepreempt. */
 #define	SWT_TURNSTILE		2	/* Turnstile contention. */
 #define	SWT_SLEEPQ		3	/* Sleepq wait. */
 #define	SWT_RELINQUISH		4	/* yield call. */
 #define	SWT_NEEDRESCHED		5	/* NEEDRESCHED was set. */
 #define	SWT_IDLE		6	/* Switching from the idle thread. */
 #define	SWT_IWAIT		7	/* Waiting for interrupts. */
 #define	SWT_SUSPEND		8	/* Thread suspended. */
 #define	SWT_REMOTEPREEMPT	9	/* Remote processor preempted. */
 #define	SWT_REMOTEWAKEIDLE	10	/* Remote processor preempted idle. */
 #define	SWT_BIND		11	/* Thread bound to a new CPU. */
 #define	SWT_COUNT		12	/* Number of switch types. */
 /* Flags */
 #define	SW_VOL		0x0100		/* Voluntary switch. */
 #define	SW_INVOL	0x0200		/* Involuntary switch. */
 #define SW_PREEMPT	0x0400		/* The invol switch is a preemption */
 
 /* How values for thread_single(). */
 #define	SINGLE_NO_EXIT	0
 #define	SINGLE_EXIT	1
 #define	SINGLE_BOUNDARY	2
 #define	SINGLE_ALLPROC	3
 
 #define	FOREACH_PROC_IN_SYSTEM(p)					\
 	LIST_FOREACH((p), &allproc, p_list)
 #define	FOREACH_THREAD_IN_PROC(p, td)					\
 	TAILQ_FOREACH((td), &(p)->p_threads, td_plist)
 
 #define	FIRST_THREAD_IN_PROC(p)	TAILQ_FIRST(&(p)->p_threads)
 
 /*
  * We use process IDs <= pid_max <= PID_MAX; PID_MAX + 1 must also fit
  * in a pid_t, as it is used to represent "no process group".
  */
 #define	PID_MAX		99999
 #define	NO_PID		(PID_MAX + 1)
 #define	THREAD0_TID	NO_PID
 extern pid_t pid_max;
 
 #define	SESS_LEADER(p)	((p)->p_session->s_leader == (p))
 
 /* Lock and unlock a process. */
 #define	PROC_LOCK(p)	mtx_lock(&(p)->p_mtx)
 #define	PROC_TRYLOCK(p)	mtx_trylock(&(p)->p_mtx)
 #define	PROC_UNLOCK(p)	mtx_unlock(&(p)->p_mtx)
 #define	PROC_LOCKED(p)	mtx_owned(&(p)->p_mtx)
 #define	PROC_WAIT_UNLOCKED(p)	mtx_wait_unlocked(&(p)->p_mtx)
 #define	PROC_LOCK_ASSERT(p, type)	mtx_assert(&(p)->p_mtx, (type))
 
 /* Lock and unlock a process group. */
 #define	PGRP_LOCK(pg)	mtx_lock(&(pg)->pg_mtx)
 #define	PGRP_UNLOCK(pg)	mtx_unlock(&(pg)->pg_mtx)
 #define	PGRP_LOCKED(pg)	mtx_owned(&(pg)->pg_mtx)
 #define	PGRP_LOCK_ASSERT(pg, type)	mtx_assert(&(pg)->pg_mtx, (type))
 
 #define	PGRP_LOCK_PGSIGNAL(pg) do {					\
 	if ((pg) != NULL)						\
 		PGRP_LOCK(pg);						\
 } while (0)
 #define	PGRP_UNLOCK_PGSIGNAL(pg) do {					\
 	if ((pg) != NULL)						\
 		PGRP_UNLOCK(pg);					\
 } while (0)
 
 /* Lock and unlock a session. */
 #define	SESS_LOCK(s)	mtx_lock(&(s)->s_mtx)
 #define	SESS_UNLOCK(s)	mtx_unlock(&(s)->s_mtx)
 #define	SESS_LOCKED(s)	mtx_owned(&(s)->s_mtx)
 #define	SESS_LOCK_ASSERT(s, type)	mtx_assert(&(s)->s_mtx, (type))
 
 /*
  * A non-zero p_lock prevents the process from exiting; it will sleep in exit1()
  * until the count reaches zero.
  *
  * PHOLD() asserts that the process (except the current process) is
  * not exiting and increments p_lock.
  * _PHOLD() is same as PHOLD(), it takes the process locked.
  */
 #define	PHOLD(p) do {							\
 	PROC_LOCK(p);							\
 	_PHOLD(p);							\
 	PROC_UNLOCK(p);							\
 } while (0)
 #define	_PHOLD(p) do {							\
 	PROC_LOCK_ASSERT((p), MA_OWNED);				\
 	KASSERT(!((p)->p_flag & P_WEXIT) || (p) == curproc,		\
 	    ("PHOLD of exiting process %p", p));			\
 	(p)->p_lock++;							\
 } while (0)
 #define	PROC_ASSERT_HELD(p) do {					\
 	KASSERT((p)->p_lock > 0, ("process %p not held", p));		\
 } while (0)
 
 #define	PRELE(p) do {							\
 	PROC_LOCK((p));							\
 	_PRELE((p));							\
 	PROC_UNLOCK((p));						\
 } while (0)
 #define	_PRELE(p) do {							\
 	PROC_LOCK_ASSERT((p), MA_OWNED);				\
 	PROC_ASSERT_HELD(p);						\
 	(--(p)->p_lock);						\
 	if (((p)->p_flag & P_WEXIT) && (p)->p_lock == 0)		\
 		wakeup(&(p)->p_lock);					\
 } while (0)
 #define	PROC_ASSERT_NOT_HELD(p) do {					\
 	KASSERT((p)->p_lock == 0, ("process %p held", p));		\
 } while (0)
 
 #define	PROC_UPDATE_COW(p) do {						\
 	struct proc *_p = (p);						\
 	PROC_LOCK_ASSERT((_p), MA_OWNED);				\
 	atomic_store_int(&_p->p_cowgen, _p->p_cowgen + 1);		\
 } while (0)
 
 #define	PROC_COW_CHANGECOUNT(td, p) ({					\
 	struct thread *_td = (td);					\
 	struct proc *_p = (p);						\
 	MPASS(_td == curthread);					\
 	PROC_LOCK_ASSERT(_p, MA_OWNED);					\
 	_p->p_cowgen - _td->td_cowgen;					\
 })
 
 /* Control whether or not it is safe for curthread to sleep. */
 #define	THREAD_NO_SLEEPING()		do {				\
 	curthread->td_no_sleeping++;					\
 	MPASS(curthread->td_no_sleeping > 0);				\
 } while (0)
 
 #define	THREAD_SLEEPING_OK()		do {				\
 	MPASS(curthread->td_no_sleeping > 0);				\
 	curthread->td_no_sleeping--;					\
 } while (0)
 
 #define	THREAD_CAN_SLEEP()		((curthread)->td_no_sleeping == 0)
 
 #define	THREAD_CONTENDS_ON_LOCK(lo)		do {			\
 	MPASS(curthread->td_wantedlock == NULL);			\
 	curthread->td_wantedlock = lo;					\
 } while (0)
 
 #define	THREAD_CONTENTION_DONE(lo)		do {			\
 	MPASS(curthread->td_wantedlock == lo);				\
 	curthread->td_wantedlock = NULL;				\
 } while (0)
 
 #define	PIDHASH(pid)	(&pidhashtbl[(pid) & pidhash])
 #define	PIDHASHLOCK(pid) (&pidhashtbl_lock[((pid) & pidhashlock)])
 extern LIST_HEAD(pidhashhead, proc) *pidhashtbl;
 extern struct sx *pidhashtbl_lock;
 extern u_long pidhash;
 extern u_long pidhashlock;
 
 #define	PGRPHASH(pgid)	(&pgrphashtbl[(pgid) & pgrphash])
 extern LIST_HEAD(pgrphashhead, pgrp) *pgrphashtbl;
 extern u_long pgrphash;
 
 extern struct sx allproc_lock;
 extern int allproc_gen;
 extern struct sx proctree_lock;
 extern struct mtx ppeers_lock;
 extern struct mtx procid_lock;
 extern struct proc proc0;		/* Process slot for swapper. */
 extern struct thread0_storage thread0_st;	/* Primary thread in proc0. */
 #define	thread0 (thread0_st.t0st_thread)
 extern struct vmspace vmspace0;		/* VM space for proc0. */
 extern int hogticks;			/* Limit on kernel cpu hogs. */
 extern int lastpid;
 extern int nprocs, maxproc;		/* Current and max number of procs. */
 extern int maxprocperuid;		/* Max procs per uid. */
 extern u_long ps_arg_cache_limit;
 
 LIST_HEAD(proclist, proc);
 TAILQ_HEAD(procqueue, proc);
 TAILQ_HEAD(threadqueue, thread);
 extern struct proclist allproc;		/* List of all processes. */
 extern struct proc *initproc, *pageproc; /* Process slots for init, pager. */
 
 extern struct uma_zone *proc_zone;
 extern struct uma_zone *pgrp_zone;
 
 struct	proc *pfind(pid_t);		/* Find process by id. */
 struct	proc *pfind_any(pid_t);		/* Find (zombie) process by id. */
 struct	proc *pfind_any_locked(pid_t pid); /* Find process by id, locked. */
 struct	pgrp *pgfind(pid_t);		/* Find process group by id. */
 void	pidhash_slockall(void);		/* Shared lock all pid hash lists. */
 void	pidhash_sunlockall(void);	/* Shared unlock all pid hash lists. */
 
 struct	fork_req {
 	int		fr_flags;
 	int		fr_pages;
 	int 		*fr_pidp;
 	struct proc 	**fr_procp;
 	int 		*fr_pd_fd;
 	int 		fr_pd_flags;
 	struct filecaps	*fr_pd_fcaps;
 	int 		fr_flags2;
 #define	FR2_DROPSIG_CAUGHT	0x00000001 /* Drop caught non-DFL signals */
 #define	FR2_SHARE_PATHS		0x00000002 /* Invert sense of RFFDG for paths */
 #define	FR2_KPROC		0x00000004 /* Create a kernel process */
 };
 
 /*
  * pget() flags.
  */
 #define	PGET_HOLD	0x00001	/* Hold the process. */
 #define	PGET_CANSEE	0x00002	/* Check against p_cansee(). */
 #define	PGET_CANDEBUG	0x00004	/* Check against p_candebug(). */
 #define	PGET_ISCURRENT	0x00008	/* Check that the found process is current. */
 #define	PGET_NOTWEXIT	0x00010	/* Check that the process is not in P_WEXIT. */
 #define	PGET_NOTINEXEC	0x00020	/* Check that the process is not in P_INEXEC. */
 #define	PGET_NOTID	0x00040	/* Do not assume tid if pid > PID_MAX. */
 
 #define	PGET_WANTREAD	(PGET_HOLD | PGET_CANDEBUG | PGET_NOTWEXIT)
 
 int	pget(pid_t pid, int flags, struct proc **pp);
 
 /* ast_register() flags */
 #define	ASTR_ASTF_REQUIRED	0x0001	/* td_ast TDAI(TDA_X) flag set is
 					   required for call */
 #define	ASTR_TDP		0x0002	/* td_pflags flag set is required */
 #define	ASTR_KCLEAR		0x0004	/* call me on ast_kclear() */
 #define	ASTR_UNCOND		0x0008	/* call me always */
 
 void	ast(struct trapframe *framep);
 void	ast_kclear(struct thread *td);
 void	ast_register(int ast, int ast_flags, int tdp,
 	    void (*f)(struct thread *td, int asts));
 void	ast_deregister(int tda);
 void	ast_sched_locked(struct thread *td, int tda);
 void	ast_sched_mask(struct thread *td, int ast);
 void	ast_sched(struct thread *td, int tda);
 void	ast_unsched_locked(struct thread *td, int tda);
 
 struct	thread *choosethread(void);
 int	cr_bsd_visible(struct ucred *u1, struct ucred *u2);
 int	cr_cansee(struct ucred *u1, struct ucred *u2);
 int	cr_canseesocket(struct ucred *cred, struct socket *so);
 int	cr_cansignal(struct ucred *cred, struct proc *proc, int signum);
 int	enterpgrp(struct proc *p, pid_t pgid, struct pgrp *pgrp,
 	    struct session *sess);
 int	enterthispgrp(struct proc *p, struct pgrp *pgrp);
 int	fork1(struct thread *, struct fork_req *);
 void	fork_exit(void (*)(void *, struct trapframe *), void *,
 	    struct trapframe *);
 void	fork_return(struct thread *, struct trapframe *);
 int	inferior(struct proc *p);
 void	itimer_proc_continue(struct proc *p);
 void	kqtimer_proc_continue(struct proc *p);
 void	kern_proc_vmmap_resident(struct vm_map *map, struct vm_map_entry *entry,
 	    int *resident_count, bool *super);
 void	kern_yield(int);
-void 	kick_proc0(void);
 void	killjobc(void);
 int	leavepgrp(struct proc *p);
 int	maybe_preempt(struct thread *td);
 void	maybe_yield(void);
 void	mi_switch(int flags);
 int	p_candebug(struct thread *td, struct proc *p);
 int	p_cansee(struct thread *td, struct proc *p);
 int	p_cansched(struct thread *td, struct proc *p);
 int	p_cansignal(struct thread *td, struct proc *p, int signum);
 int	p_canwait(struct thread *td, struct proc *p);
 struct	pargs *pargs_alloc(int len);
 void	pargs_drop(struct pargs *pa);
 void	pargs_hold(struct pargs *pa);
 void	proc_add_orphan(struct proc *child, struct proc *parent);
 int	proc_get_binpath(struct proc *p, char *binname, char **fullpath,
 	    char **freepath);
 int	proc_getargv(struct thread *td, struct proc *p, struct sbuf *sb);
 int	proc_getauxv(struct thread *td, struct proc *p, struct sbuf *sb);
 int	proc_getenvv(struct thread *td, struct proc *p, struct sbuf *sb);
 void	procinit(void);
 int	proc_iterate(int (*cb)(struct proc *, void *), void *cbarg);
 void	proc_linkup0(struct proc *p, struct thread *td);
 void	proc_linkup(struct proc *p, struct thread *td);
 struct proc *proc_realparent(struct proc *child);
 void	proc_reap(struct thread *td, struct proc *p, int *status, int options);
 void	proc_reparent(struct proc *child, struct proc *newparent, bool set_oppid);
 void	proc_set_p2_wexit(struct proc *p);
 void	proc_set_traced(struct proc *p, bool stop);
 void	proc_wkilled(struct proc *p);
 struct	pstats *pstats_alloc(void);
 void	pstats_fork(struct pstats *src, struct pstats *dst);
 void	pstats_free(struct pstats *ps);
 void	proc_clear_orphan(struct proc *p);
 void	reaper_abandon_children(struct proc *p, bool exiting);
 int	securelevel_ge(struct ucred *cr, int level);
 int	securelevel_gt(struct ucred *cr, int level);
 void	sess_hold(struct session *);
 void	sess_release(struct session *);
 void	setrunnable(struct thread *, int);
 void	setsugid(struct proc *p);
 bool	should_yield(void);
 int	sigonstack(size_t sp);
 void	stopevent(struct proc *, u_int, u_int);
 struct	thread *tdfind(lwpid_t, pid_t);
 void	threadinit(void);
 void	tidhash_add(struct thread *);
 void	tidhash_remove(struct thread *);
 void	cpu_idle(int);
 int	cpu_idle_wakeup(int);
 extern	void (*cpu_idle_hook)(sbintime_t);	/* Hook to machdep CPU idler. */
 void	cpu_switch(struct thread *, struct thread *, struct mtx *);
 void	cpu_sync_core(void);
 void	cpu_throw(struct thread *, struct thread *) __dead2;
 bool	curproc_sigkilled(void);
 void	userret(struct thread *, struct trapframe *);
 
 void	cpu_exit(struct thread *);
 void	exit1(struct thread *, int, int) __dead2;
 void	cpu_copy_thread(struct thread *td, struct thread *td0);
 bool	cpu_exec_vmspace_reuse(struct proc *p, struct vm_map *map);
 int	cpu_fetch_syscall_args(struct thread *td);
 void	cpu_fork(struct thread *, struct proc *, struct thread *, int);
 void	cpu_fork_kthread_handler(struct thread *, void (*)(void *), void *);
 int	cpu_procctl(struct thread *td, int idtype, id_t id, int com,
 	    void *data);
 void	cpu_set_syscall_retval(struct thread *, int);
 int	cpu_set_upcall(struct thread *, void (*)(void *), void *,
 	    stack_t *);
 int	cpu_set_user_tls(struct thread *, void *tls_base);
 void	cpu_thread_alloc(struct thread *);
 void	cpu_thread_clean(struct thread *);
 void	cpu_thread_exit(struct thread *);
 void	cpu_thread_free(struct thread *);
 struct	thread *thread_alloc(int pages);
 int	thread_check_susp(struct thread *td, bool sleep);
 void	thread_cow_get_proc(struct thread *newtd, struct proc *p);
 void	thread_cow_get(struct thread *newtd, struct thread *td);
 void	thread_cow_free(struct thread *td);
 void	thread_cow_update(struct thread *td);
 void	thread_cow_synced(struct thread *td);
 int	thread_create(struct thread *td, struct rtprio *rtp,
 	    int (*initialize_thread)(struct thread *, void *), void *thunk);
 void	thread_exit(void) __dead2;
 void	thread_free(struct thread *td);
 void	thread_link(struct thread *td, struct proc *p);
 void	thread_reap_barrier(void);
 int	thread_recycle(struct thread *, int pages);
 int	thread_single(struct proc *p, int how);
 void	thread_single_end(struct proc *p, int how);
 void	thread_stash(struct thread *td);
 void	thread_stopped(struct proc *p);
 void	childproc_stopped(struct proc *child, int reason);
 void	childproc_continued(struct proc *child);
 void	childproc_exited(struct proc *child);
 void	thread_run_flash(struct thread *td);
 int	thread_suspend_check(int how);
 bool	thread_suspend_check_needed(void);
 void	thread_suspend_switch(struct thread *, struct proc *p);
 void	thread_suspend_one(struct thread *td);
 void	thread_unlink(struct thread *td);
 void	thread_unsuspend(struct proc *p);
 void	thread_wait(struct proc *p);
 
 bool	stop_all_proc_block(void);
 void	stop_all_proc_unblock(void);
 void	stop_all_proc(void);
 void	resume_all_proc(void);
 
 static __inline int
 curthread_pflags_set(int flags)
 {
 	struct thread *td;
 	int save;
 
 	td = curthread;
 	save = ~flags | (td->td_pflags & flags);
 	td->td_pflags |= flags;
 	return (save);
 }
 
 static __inline void
 curthread_pflags_restore(int save)
 {
 
 	curthread->td_pflags &= save;
 }
 
 static __inline int
 curthread_pflags2_set(int flags)
 {
 	struct thread *td;
 	int save;
 
 	td = curthread;
 	save = ~flags | (td->td_pflags2 & flags);
 	td->td_pflags2 |= flags;
 	return (save);
 }
 
 static __inline void
 curthread_pflags2_restore(int save)
 {
 
 	curthread->td_pflags2 &= save;
 }
 
 static __inline __pure2 struct td_sched *
 td_get_sched(struct thread *td)
 {
 
 	return ((struct td_sched *)&td[1]);
 }
 
 #define	PROC_ID_PID	0
 #define	PROC_ID_GROUP	1
 #define	PROC_ID_SESSION	2
 #define	PROC_ID_REAP	3
 
 void	proc_id_set(int type, pid_t id);
 void	proc_id_set_cond(int type, pid_t id);
 void	proc_id_clear(int type, pid_t id);
 
 EVENTHANDLER_LIST_DECLARE(process_ctor);
 EVENTHANDLER_LIST_DECLARE(process_dtor);
 EVENTHANDLER_LIST_DECLARE(process_init);
 EVENTHANDLER_LIST_DECLARE(process_fini);
 EVENTHANDLER_LIST_DECLARE(process_exit);
 EVENTHANDLER_LIST_DECLARE(process_fork);
 EVENTHANDLER_LIST_DECLARE(process_exec);
 
 EVENTHANDLER_LIST_DECLARE(thread_ctor);
 EVENTHANDLER_LIST_DECLARE(thread_dtor);
 EVENTHANDLER_LIST_DECLARE(thread_init);
 
 #endif	/* _KERNEL */
 
 #endif	/* !_SYS_PROC_H_ */
diff --git a/sys/sys/sleepqueue.h b/sys/sys/sleepqueue.h
index 9e456141393b..f28a7f75a50c 100644
--- a/sys/sys/sleepqueue.h
+++ b/sys/sys/sleepqueue.h
@@ -1,122 +1,122 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause
  *
  * Copyright (c) 2004 John Baldwin <jhb@FreeBSD.org>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #ifndef _SYS_SLEEPQUEUE_H_
 #define _SYS_SLEEPQUEUE_H_
 
 /*
  * Sleep queue interface.  Sleep/wakeup, condition variables, and sx
  * locks use a sleep queue for the queue of threads blocked on a sleep
  * channel.
  *
  * A thread calls sleepq_lock() to lock the sleep queue chain associated
  * with a given wait channel.  A thread can then call call sleepq_add() to
  * add themself onto a sleep queue and call one of the sleepq_wait()
  * functions to actually go to sleep.  If a thread needs to abort a sleep
  * operation it should call sleepq_release() to unlock the associated sleep
  * queue chain lock.  If the thread also needs to remove itself from a queue
  * it just enqueued itself on, it can use sleepq_remove() instead.
  *
  * If the thread only wishes to sleep for a limited amount of time, it can
  * call sleepq_set_timeout() after sleepq_add() to setup a timeout.  It
  * should then use one of the sleepq_timedwait() functions to block.
  *
  * A thread is normally resumed from a sleep queue by either the
  * sleepq_signal() or sleepq_broadcast() functions.  Sleepq_signal() wakes
  * the thread with the highest priority that is sleeping on the specified
  * wait channel.  Sleepq_broadcast() wakes all threads that are sleeping
  * on the specified wait channel.  A thread sleeping in an interruptible
  * sleep can be interrupted by calling sleepq_abort().  A thread can also
  * be removed from a specified sleep queue using the sleepq_remove()
  * function.  Note that the sleep queue chain must first be locked via
  * sleepq_lock() before calling sleepq_abort(), sleepq_broadcast(), or
  * sleepq_signal().  These routines each return a boolean that will be true
  * if at least one swapped-out thread was resumed.  In that case, the caller
  * is responsible for waking up the swapper by calling kick_proc0() after
  * releasing the sleep queue chain lock.
  *
  * Each thread allocates a sleep queue at thread creation via sleepq_alloc()
  * and releases it at thread destruction via sleepq_free().  Note that
  * a sleep queue is not tied to a specific thread and that the sleep queue
  * released at thread destruction may not be the same sleep queue that the
  * thread allocated when it was created.
  *
  * XXX: Some other parts of the kernel such as ithread sleeping may end up
  * using this interface as well (death to TDI_IWAIT!)
  */
 
 struct lock_object;
 struct sleepqueue;
 struct thread;
 
 #ifdef _KERNEL
 
 #define	SLEEPQ_TYPE		0x0ff		/* Mask of sleep queue types. */
 #define	SLEEPQ_SLEEP		0x00		/* Used by sleep/wakeup. */
 #define	SLEEPQ_CONDVAR		0x01		/* Used for a cv. */
 #define	SLEEPQ_PAUSE		0x02		/* Used by pause. */
 #define	SLEEPQ_SX		0x03		/* Used by an sx lock. */
 #define	SLEEPQ_LK		0x04		/* Used by a lockmgr. */
 #define	SLEEPQ_INTERRUPTIBLE	0x100		/* Sleep is interruptible. */
 #define	SLEEPQ_UNFAIR		0x200		/* Unfair wakeup order. */
 #define	SLEEPQ_DROP		0x400		/* Return without lock held. */
 
 void	init_sleepqueues(void);
-int	sleepq_abort(struct thread *td, int intrval);
+void	sleepq_abort(struct thread *td, int intrval);
 void	sleepq_add(const void *wchan, struct lock_object *lock,
 	    const char *wmesg, int flags, int queue);
 struct sleepqueue *sleepq_alloc(void);
-int	sleepq_broadcast(const void *wchan, int flags, int pri, int queue);
+void	sleepq_broadcast(const void *wchan, int flags, int pri, int queue);
 void	sleepq_chains_remove_matching(bool (*matches)(struct thread *));
 void	sleepq_free(struct sleepqueue *sq);
 void	sleepq_lock(const void *wchan);
 struct sleepqueue *sleepq_lookup(const void *wchan);
 void	sleepq_release(const void *wchan);
 void	sleepq_remove(struct thread *td, const void *wchan);
-int	sleepq_remove_matching(struct sleepqueue *sq, int queue,
+void	sleepq_remove_matching(struct sleepqueue *sq, int queue,
 	    bool (*matches)(struct thread *), int pri);
 void	sleepq_remove_nested(struct thread *td);
-int	sleepq_signal(const void *wchan, int flags, int pri, int queue);
+void	sleepq_signal(const void *wchan, int flags, int pri, int queue);
 void	sleepq_set_timeout_sbt(const void *wchan, sbintime_t sbt,
 	    sbintime_t pr, int flags);
 #define	sleepq_set_timeout(wchan, timo)					\
     sleepq_set_timeout_sbt((wchan), tick_sbt * (timo), 0, C_HARDCLOCK)
 u_int	sleepq_sleepcnt(const void *wchan, int queue);
 int	sleepq_timedwait(const void *wchan, int pri);
 int	sleepq_timedwait_sig(const void *wchan, int pri);
 int	sleepq_type(const void *wchan);
 void	sleepq_wait(const void *wchan, int pri);
 int	sleepq_wait_sig(const void *wchan, int pri);
 
 #ifdef STACK
 struct sbuf;
 int sleepq_sbuf_print_stacks(struct sbuf *sb, const void *wchan, int queue,
     int *count_stacks_printed);
 #endif
 
 #endif	/* _KERNEL */
 #endif	/* !_SYS_SLEEPQUEUE_H_ */
diff --git a/sys/vm/vm_glue.c b/sys/vm/vm_glue.c
index 2627ee75dbff..5128c46a1d9f 100644
--- a/sys/vm/vm_glue.c
+++ b/sys/vm/vm_glue.c
@@ -1,846 +1,838 @@
 /*-
  * SPDX-License-Identifier: (BSD-3-Clause AND MIT-CMU)
  *
  * Copyright (c) 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * The Mach Operating System project at Carnegie-Mellon University.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *
  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
  * All rights reserved.
  *
  * Permission to use, copy, modify and distribute this software and
  * its documentation is hereby granted, provided that both the copyright
  * notice and this permission notice appear in all copies of the
  * software, derivative works or modified versions, and any portions
  * thereof, and that both notices appear in supporting documentation.
  *
  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  *
  * Carnegie Mellon requests users of this software to return to
  *
  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  *  School of Computer Science
  *  Carnegie Mellon University
  *  Pittsburgh PA 15213-3890
  *
  * any improvements or extensions that they make and grant Carnegie the
  * rights to redistribute these changes.
  */
 
 #include "opt_vm.h"
 #include "opt_kstack_pages.h"
 #include "opt_kstack_max_pages.h"
 #include "opt_kstack_usage_prof.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/asan.h>
 #include <sys/domainset.h>
 #include <sys/limits.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/msan.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/racct.h>
 #include <sys/refcount.h>
 #include <sys/resourcevar.h>
 #include <sys/rwlock.h>
 #include <sys/sched.h>
 #include <sys/sf_buf.h>
 #include <sys/shm.h>
 #include <sys/smp.h>
 #include <sys/vmmeter.h>
 #include <sys/vmem.h>
 #include <sys/sx.h>
 #include <sys/sysctl.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/unistd.h>
 
 #include <vm/uma.h>
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/pmap.h>
 #include <vm/vm_domainset.h>
 #include <vm/vm_map.h>
 #include <vm/vm_page.h>
 #include <vm/vm_pageout.h>
 #include <vm/vm_pagequeue.h>
 #include <vm/vm_object.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_pager.h>
 #include <vm/vm_phys.h>
 
 #include <machine/cpu.h>
 
 #if VM_NRESERVLEVEL > 1
 #define KVA_KSTACK_QUANTUM_SHIFT (VM_LEVEL_1_ORDER + VM_LEVEL_0_ORDER + \
     PAGE_SHIFT)
 #elif VM_NRESERVLEVEL > 0
 #define KVA_KSTACK_QUANTUM_SHIFT (VM_LEVEL_0_ORDER + PAGE_SHIFT)
 #else
 #define KVA_KSTACK_QUANTUM_SHIFT (8 + PAGE_SHIFT)
 #endif
 #define KVA_KSTACK_QUANTUM (1ul << KVA_KSTACK_QUANTUM_SHIFT)
 
 /*
  * MPSAFE
  *
  * WARNING!  This code calls vm_map_check_protection() which only checks
  * the associated vm_map_entry range.  It does not determine whether the
  * contents of the memory is actually readable or writable.  In most cases
  * just checking the vm_map_entry is sufficient within the kernel's address
  * space.
  */
 bool
 kernacc(void *addr, int len, int rw)
 {
 	boolean_t rv;
 	vm_offset_t saddr, eaddr;
 	vm_prot_t prot;
 
 	KASSERT((rw & ~VM_PROT_ALL) == 0,
 	    ("illegal ``rw'' argument to kernacc (%x)\n", rw));
 
 	if ((vm_offset_t)addr + len > vm_map_max(kernel_map) ||
 	    (vm_offset_t)addr + len < (vm_offset_t)addr)
 		return (false);
 
 	prot = rw;
 	saddr = trunc_page((vm_offset_t)addr);
 	eaddr = round_page((vm_offset_t)addr + len);
 	vm_map_lock_read(kernel_map);
 	rv = vm_map_check_protection(kernel_map, saddr, eaddr, prot);
 	vm_map_unlock_read(kernel_map);
 	return (rv == TRUE);
 }
 
 /*
  * MPSAFE
  *
  * WARNING!  This code calls vm_map_check_protection() which only checks
  * the associated vm_map_entry range.  It does not determine whether the
  * contents of the memory is actually readable or writable.  vmapbuf(),
  * vm_fault_quick(), or copyin()/copout()/su*()/fu*() functions should be
  * used in conjunction with this call.
  */
 bool
 useracc(void *addr, int len, int rw)
 {
 	boolean_t rv;
 	vm_prot_t prot;
 	vm_map_t map;
 
 	KASSERT((rw & ~VM_PROT_ALL) == 0,
 	    ("illegal ``rw'' argument to useracc (%x)\n", rw));
 	prot = rw;
 	map = &curproc->p_vmspace->vm_map;
 	if ((vm_offset_t)addr + len > vm_map_max(map) ||
 	    (vm_offset_t)addr + len < (vm_offset_t)addr) {
 		return (false);
 	}
 	vm_map_lock_read(map);
 	rv = vm_map_check_protection(map, trunc_page((vm_offset_t)addr),
 	    round_page((vm_offset_t)addr + len), prot);
 	vm_map_unlock_read(map);
 	return (rv == TRUE);
 }
 
 int
 vslock(void *addr, size_t len)
 {
 	vm_offset_t end, last, start;
 	vm_size_t npages;
 	int error;
 
 	last = (vm_offset_t)addr + len;
 	start = trunc_page((vm_offset_t)addr);
 	end = round_page(last);
 	if (last < (vm_offset_t)addr || end < (vm_offset_t)addr)
 		return (EINVAL);
 	npages = atop(end - start);
 	if (npages > vm_page_max_user_wired)
 		return (ENOMEM);
 	error = vm_map_wire(&curproc->p_vmspace->vm_map, start, end,
 	    VM_MAP_WIRE_SYSTEM | VM_MAP_WIRE_NOHOLES);
 	if (error == KERN_SUCCESS) {
 		curthread->td_vslock_sz += len;
 		return (0);
 	}
 
 	/*
 	 * Return EFAULT on error to match copy{in,out}() behaviour
 	 * rather than returning ENOMEM like mlock() would.
 	 */
 	return (EFAULT);
 }
 
 void
 vsunlock(void *addr, size_t len)
 {
 
 	/* Rely on the parameter sanity checks performed by vslock(). */
 	MPASS(curthread->td_vslock_sz >= len);
 	curthread->td_vslock_sz -= len;
 	(void)vm_map_unwire(&curproc->p_vmspace->vm_map,
 	    trunc_page((vm_offset_t)addr), round_page((vm_offset_t)addr + len),
 	    VM_MAP_WIRE_SYSTEM | VM_MAP_WIRE_NOHOLES);
 }
 
 /*
  * Pin the page contained within the given object at the given offset.  If the
  * page is not resident, allocate and load it using the given object's pager.
  * Return the pinned page if successful; otherwise, return NULL.
  */
 static vm_page_t
 vm_imgact_hold_page(vm_object_t object, vm_ooffset_t offset)
 {
 	vm_page_t m;
 	vm_pindex_t pindex;
 
 	pindex = OFF_TO_IDX(offset);
 	(void)vm_page_grab_valid_unlocked(&m, object, pindex,
 	    VM_ALLOC_NORMAL | VM_ALLOC_NOBUSY | VM_ALLOC_WIRED);
 	return (m);
 }
 
 /*
  * Return a CPU private mapping to the page at the given offset within the
  * given object.  The page is pinned before it is mapped.
  */
 struct sf_buf *
 vm_imgact_map_page(vm_object_t object, vm_ooffset_t offset)
 {
 	vm_page_t m;
 
 	m = vm_imgact_hold_page(object, offset);
 	if (m == NULL)
 		return (NULL);
 	sched_pin();
 	return (sf_buf_alloc(m, SFB_CPUPRIVATE));
 }
 
 /*
  * Destroy the given CPU private mapping and unpin the page that it mapped.
  */
 void
 vm_imgact_unmap_page(struct sf_buf *sf)
 {
 	vm_page_t m;
 
 	m = sf_buf_page(sf);
 	sf_buf_free(sf);
 	sched_unpin();
 	vm_page_unwire(m, PQ_ACTIVE);
 }
 
 void
 vm_sync_icache(vm_map_t map, vm_offset_t va, vm_offset_t sz)
 {
 
 	pmap_sync_icache(map->pmap, va, sz);
 }
 
 static vm_object_t kstack_object;
 static vm_object_t kstack_alt_object;
 static uma_zone_t kstack_cache;
 static int kstack_cache_size;
 static vmem_t *vmd_kstack_arena[MAXMEMDOM];
 
 static vm_pindex_t vm_kstack_pindex(vm_offset_t ks, int npages);
 static vm_object_t vm_thread_kstack_size_to_obj(int npages);
 static int vm_thread_stack_back(vm_offset_t kaddr, vm_page_t ma[], int npages,
     int req_class, int domain);
 
 static int
 sysctl_kstack_cache_size(SYSCTL_HANDLER_ARGS)
 {
 	int error, oldsize;
 
 	oldsize = kstack_cache_size;
 	error = sysctl_handle_int(oidp, arg1, arg2, req);
 	if (error == 0 && req->newptr && oldsize != kstack_cache_size)
 		uma_zone_set_maxcache(kstack_cache, kstack_cache_size);
 	return (error);
 }
 SYSCTL_PROC(_vm, OID_AUTO, kstack_cache_size,
     CTLTYPE_INT|CTLFLAG_MPSAFE|CTLFLAG_RW, &kstack_cache_size, 0,
     sysctl_kstack_cache_size, "IU", "Maximum number of cached kernel stacks");
 
 /*
  *	Allocate a virtual address range from a domain kstack arena, following
  *	the specified NUMA policy.
  */
 static vm_offset_t
 vm_thread_alloc_kstack_kva(vm_size_t size, int domain)
 {
 #ifndef __ILP32__
 	int rv;
 	vmem_t *arena;
 	vm_offset_t addr = 0;
 
 	size = round_page(size);
 	/* Allocate from the kernel arena for non-standard kstack sizes. */
 	if (size != ptoa(kstack_pages + KSTACK_GUARD_PAGES)) {
 		arena = vm_dom[domain].vmd_kernel_arena;
 	} else {
 		arena = vmd_kstack_arena[domain];
 	}
 	rv = vmem_alloc(arena, size, M_BESTFIT | M_NOWAIT, &addr);
 	if (rv == ENOMEM)
 		return (0);
 	KASSERT(atop(addr - VM_MIN_KERNEL_ADDRESS) %
 	    (kstack_pages + KSTACK_GUARD_PAGES) == 0,
 	    ("%s: allocated kstack KVA not aligned to multiple of kstack size",
 	    __func__));
 
 	return (addr);
 #else
 	return (kva_alloc(size));
 #endif
 }
 
 /*
  *	Release a region of kernel virtual memory
  *	allocated from the kstack arena.
  */
 static __noinline void
 vm_thread_free_kstack_kva(vm_offset_t addr, vm_size_t size, int domain)
 {
 	vmem_t *arena;
 
 	size = round_page(size);
 #ifdef __ILP32__
 	arena = kernel_arena;
 #else
 	arena = vmd_kstack_arena[domain];
 	if (size != ptoa(kstack_pages + KSTACK_GUARD_PAGES)) {
 		arena = vm_dom[domain].vmd_kernel_arena;
 	}
 #endif
 	vmem_free(arena, addr, size);
 }
 
 static vmem_size_t
 vm_thread_kstack_import_quantum(void)
 {
 #ifndef __ILP32__
 	/*
 	 * The kstack_quantum is larger than KVA_QUANTUM to account
 	 * for holes induced by guard pages.
 	 */
 	return (KVA_KSTACK_QUANTUM * (kstack_pages + KSTACK_GUARD_PAGES));
 #else
 	return (KVA_KSTACK_QUANTUM);
 #endif
 }
 
 /*
  * Import KVA from a parent arena into the kstack arena. Imports must be
  * a multiple of kernel stack pages + guard pages in size.
  *
  * Kstack VA allocations need to be aligned so that the linear KVA pindex
  * is divisible by the total number of kstack VA pages. This is necessary to
  * make vm_kstack_pindex work properly.
  *
  * We import a multiple of KVA_KSTACK_QUANTUM-sized region from the parent
  * arena. The actual size used by the kstack arena is one kstack smaller to
  * allow for the necessary alignment adjustments to be made.
  */
 static int
 vm_thread_kstack_arena_import(void *arena, vmem_size_t size, int flags,
     vmem_addr_t *addrp)
 {
 	int error, rem;
 	size_t kpages = kstack_pages + KSTACK_GUARD_PAGES;
 
 	KASSERT(atop(size) % kpages == 0,
 	    ("%s: Size %jd is not a multiple of kstack pages (%d)", __func__,
 	    (intmax_t)size, (int)kpages));
 
 	error = vmem_xalloc(arena, vm_thread_kstack_import_quantum(),
 	    KVA_KSTACK_QUANTUM, 0, 0, VMEM_ADDR_MIN, VMEM_ADDR_MAX, flags,
 	    addrp);
 	if (error) {
 		return (error);
 	}
 
 	rem = atop(*addrp - VM_MIN_KERNEL_ADDRESS) % kpages;
 	if (rem != 0) {
 		/* Bump addr to next aligned address */
 		*addrp = *addrp + (kpages - rem) * PAGE_SIZE;
 	}
 
 	return (0);
 }
 
 /*
  * Release KVA from a parent arena into the kstack arena. Released imports must
  * be a multiple of kernel stack pages + guard pages in size.
  */
 static void
 vm_thread_kstack_arena_release(void *arena, vmem_addr_t addr, vmem_size_t size)
 {
 	int rem;
 	size_t kpages __diagused = kstack_pages + KSTACK_GUARD_PAGES;
 
 	KASSERT(size % kpages == 0,
 	    ("%s: Size %jd is not a multiple of kstack pages (%d)", __func__,
 	    (intmax_t)size, (int)kpages));
 
 	KASSERT((addr - VM_MIN_KERNEL_ADDRESS) % kpages == 0,
 	    ("%s: Address %p is not properly aligned (%p)", __func__,
 		(void *)addr, (void *)VM_MIN_KERNEL_ADDRESS));
 	/*
 	 * If the address is not KVA_KSTACK_QUANTUM-aligned we have to decrement
 	 * it to account for the shift in kva_import_kstack.
 	 */
 	rem = addr % KVA_KSTACK_QUANTUM;
 	if (rem) {
 		KASSERT(rem <= ptoa(kpages),
 		    ("%s: rem > kpages (%d), (%d)", __func__, rem,
 			(int)kpages));
 		addr -= rem;
 	}
 	vmem_xfree(arena, addr, vm_thread_kstack_import_quantum());
 }
 
 /*
  * Create the kernel stack for a new thread.
  */
 static vm_offset_t
 vm_thread_stack_create(struct domainset *ds, int pages)
 {
 	vm_page_t ma[KSTACK_MAX_PAGES];
 	struct vm_domainset_iter di;
 	int req = VM_ALLOC_NORMAL;
 	vm_object_t obj;
 	vm_offset_t ks;
 	int domain, i;
 
 	obj = vm_thread_kstack_size_to_obj(pages);
 	if (vm_ndomains > 1)
 		obj->domain.dr_policy = ds;
 	vm_domainset_iter_page_init(&di, obj, 0, &domain, &req);
 	do {
 		/*
 		 * Get a kernel virtual address for this thread's kstack.
 		 */
 		ks = vm_thread_alloc_kstack_kva(ptoa(pages + KSTACK_GUARD_PAGES),
 		    domain);
 		if (ks == 0)
 			continue;
 		ks += ptoa(KSTACK_GUARD_PAGES);
 
 		/*
 		 * Allocate physical pages to back the stack.
 		 */
 		if (vm_thread_stack_back(ks, ma, pages, req, domain) != 0) {
 			vm_thread_free_kstack_kva(ks - ptoa(KSTACK_GUARD_PAGES),
 			    ptoa(pages + KSTACK_GUARD_PAGES), domain);
 			continue;
 		}
 		if (KSTACK_GUARD_PAGES != 0) {
 			pmap_qremove(ks - ptoa(KSTACK_GUARD_PAGES),
 			    KSTACK_GUARD_PAGES);
 		}
 		for (i = 0; i < pages; i++)
 			vm_page_valid(ma[i]);
 		pmap_qenter(ks, ma, pages);
 		return (ks);
 	} while (vm_domainset_iter_page(&di, obj, &domain) == 0);
 
 	return (0);
 }
 
 static __noinline void
 vm_thread_stack_dispose(vm_offset_t ks, int pages)
 {
 	vm_page_t m;
 	vm_pindex_t pindex;
 	int i, domain;
 	vm_object_t obj = vm_thread_kstack_size_to_obj(pages);
 
 	pindex = vm_kstack_pindex(ks, pages);
 	domain = vm_phys_domain(vtophys(ks));
 	pmap_qremove(ks, pages);
 	VM_OBJECT_WLOCK(obj);
 	for (i = 0; i < pages; i++) {
 		m = vm_page_lookup(obj, pindex + i);
 		if (m == NULL)
 			panic("%s: kstack already missing?", __func__);
 		KASSERT(vm_page_domain(m) == domain,
 		    ("%s: page %p domain mismatch, expected %d got %d",
 		    __func__, m, domain, vm_page_domain(m)));
 		vm_page_xbusy_claim(m);
 		vm_page_unwire_noq(m);
 		vm_page_free(m);
 	}
 	VM_OBJECT_WUNLOCK(obj);
 	kasan_mark((void *)ks, ptoa(pages), ptoa(pages), 0);
 	vm_thread_free_kstack_kva(ks - (KSTACK_GUARD_PAGES * PAGE_SIZE),
 	    ptoa(pages + KSTACK_GUARD_PAGES), domain);
 }
 
 /*
  * Allocate the kernel stack for a new thread.
  */
 int
 vm_thread_new(struct thread *td, int pages)
 {
 	vm_offset_t ks;
 	u_short ks_domain;
 
 	/* Bounds check */
 	if (pages <= 1)
 		pages = kstack_pages;
 	else if (pages > KSTACK_MAX_PAGES)
 		pages = KSTACK_MAX_PAGES;
 
 	ks = 0;
 	if (pages == kstack_pages && kstack_cache != NULL)
 		ks = (vm_offset_t)uma_zalloc(kstack_cache, M_NOWAIT);
 
 	/*
 	 * Ensure that kstack objects can draw pages from any memory
 	 * domain.  Otherwise a local memory shortage can block a process
 	 * swap-in.
 	 */
 	if (ks == 0)
 		ks = vm_thread_stack_create(DOMAINSET_PREF(PCPU_GET(domain)),
 		    pages);
 	if (ks == 0)
 		return (0);
 
 	ks_domain = vm_phys_domain(vtophys(ks));
 	KASSERT(ks_domain >= 0 && ks_domain < vm_ndomains,
 	    ("%s: invalid domain for kstack %p", __func__, (void *)ks));
 	td->td_kstack = ks;
 	td->td_kstack_pages = pages;
 	td->td_kstack_domain = ks_domain;
 	return (1);
 }
 
 /*
  * Dispose of a thread's kernel stack.
  */
 void
 vm_thread_dispose(struct thread *td)
 {
 	vm_offset_t ks;
 	int pages;
 
 	pages = td->td_kstack_pages;
 	ks = td->td_kstack;
 	td->td_kstack = 0;
 	td->td_kstack_pages = 0;
 	td->td_kstack_domain = MAXMEMDOM;
 	if (pages == kstack_pages) {
 		kasan_mark((void *)ks, 0, ptoa(pages), KASAN_KSTACK_FREED);
 		uma_zfree(kstack_cache, (void *)ks);
 	} else {
 		vm_thread_stack_dispose(ks, pages);
 	}
 }
 
 /*
  * Calculate kstack pindex.
  *
  * Uses a non-identity mapping if guard pages are
  * active to avoid pindex holes in the kstack object.
  */
 static vm_pindex_t
 vm_kstack_pindex(vm_offset_t ks, int kpages)
 {
 	vm_pindex_t pindex = atop(ks - VM_MIN_KERNEL_ADDRESS);
 
 #ifdef __ILP32__
 	return (pindex);
 #else
 	/*
 	 * Return the linear pindex if guard pages aren't active or if we are
 	 * allocating a non-standard kstack size.
 	 */
 	if (KSTACK_GUARD_PAGES == 0 || kpages != kstack_pages) {
 		return (pindex);
 	}
 	KASSERT(pindex % (kpages + KSTACK_GUARD_PAGES) >= KSTACK_GUARD_PAGES,
 	    ("%s: Attempting to calculate kstack guard page pindex", __func__));
 
 	return (pindex -
 	    (pindex / (kpages + KSTACK_GUARD_PAGES) + 1) * KSTACK_GUARD_PAGES);
 #endif
 }
 
 /*
  * Allocate physical pages, following the specified NUMA policy, to back a
  * kernel stack.
  */
 static int
 vm_thread_stack_back(vm_offset_t ks, vm_page_t ma[], int npages, int req_class,
     int domain)
 {
 	vm_object_t obj = vm_thread_kstack_size_to_obj(npages);
 	vm_pindex_t pindex;
 	vm_page_t m;
 	int n;
 
 	pindex = vm_kstack_pindex(ks, npages);
 
 	VM_OBJECT_WLOCK(obj);
 	for (n = 0; n < npages;) {
 		m = vm_page_grab(obj, pindex + n,
 		    VM_ALLOC_NOCREAT | VM_ALLOC_WIRED);
 		if (m == NULL) {
 			m = vm_page_alloc_domain(obj, pindex + n, domain,
 			    req_class | VM_ALLOC_WIRED);
 		}
 		if (m == NULL)
 			break;
 		ma[n++] = m;
 	}
 	if (n < npages)
 		goto cleanup;
 	VM_OBJECT_WUNLOCK(obj);
 
 	return (0);
 cleanup:
 	for (int i = 0; i < n; i++) {
 		m = ma[i];
 		(void)vm_page_unwire_noq(m);
 		vm_page_free(m);
 	}
 	VM_OBJECT_WUNLOCK(obj);
 
 	return (ENOMEM);
 }
 
 static vm_object_t
 vm_thread_kstack_size_to_obj(int npages)
 {
 	return (npages == kstack_pages ? kstack_object : kstack_alt_object);
 }
 
 static int
 kstack_import(void *arg, void **store, int cnt, int domain, int flags)
 {
 	struct domainset *ds;
 	int i;
 
 	if (domain == UMA_ANYDOMAIN)
 		ds = DOMAINSET_RR();
 	else
 		ds = DOMAINSET_PREF(domain);
 
 	for (i = 0; i < cnt; i++) {
 		store[i] = (void *)vm_thread_stack_create(ds, kstack_pages);
 		if (store[i] == NULL)
 			break;
 	}
 	return (i);
 }
 
 static void
 kstack_release(void *arg, void **store, int cnt)
 {
 	vm_offset_t ks;
 	int i;
 
 	for (i = 0; i < cnt; i++) {
 		ks = (vm_offset_t)store[i];
 		vm_thread_stack_dispose(ks, kstack_pages);
 	}
 }
 
 static void
 kstack_cache_init(void *null)
 {
 	vm_size_t kstack_quantum;
 	int domain;
 
 	kstack_object = vm_object_allocate(OBJT_PHYS,
 	    atop(VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS));
 	kstack_cache = uma_zcache_create("kstack_cache",
 	    kstack_pages * PAGE_SIZE, NULL, NULL, NULL, NULL,
 	    kstack_import, kstack_release, NULL,
 	    UMA_ZONE_FIRSTTOUCH);
 	kstack_cache_size = imax(128, mp_ncpus * 4);
 	uma_zone_set_maxcache(kstack_cache, kstack_cache_size);
 
 	kstack_alt_object = vm_object_allocate(OBJT_PHYS,
 	    atop(VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS));
 
 	kstack_quantum = vm_thread_kstack_import_quantum();
 	/*
 	 * Reduce size used by the kstack arena to allow for
 	 * alignment adjustments in vm_thread_kstack_arena_import.
 	 */
 	kstack_quantum -= (kstack_pages + KSTACK_GUARD_PAGES) * PAGE_SIZE;
 	/*
 	 * Create the kstack_arena for each domain and set kernel_arena as
 	 * parent.
 	 */
 	for (domain = 0; domain < vm_ndomains; domain++) {
 		vmd_kstack_arena[domain] = vmem_create("kstack arena", 0, 0,
 		    PAGE_SIZE, 0, M_WAITOK);
 		KASSERT(vmd_kstack_arena[domain] != NULL,
 		    ("%s: failed to create domain %d kstack_arena", __func__,
 		    domain));
 		vmem_set_import(vmd_kstack_arena[domain],
 		    vm_thread_kstack_arena_import,
 		    vm_thread_kstack_arena_release,
 		    vm_dom[domain].vmd_kernel_arena, kstack_quantum);
 	}
 }
 SYSINIT(vm_kstacks, SI_SUB_KMEM, SI_ORDER_ANY, kstack_cache_init, NULL);
 
 #ifdef KSTACK_USAGE_PROF
 /*
  * Track maximum stack used by a thread in kernel.
  */
 static int max_kstack_used;
 
 SYSCTL_INT(_debug, OID_AUTO, max_kstack_used, CTLFLAG_RD,
     &max_kstack_used, 0,
     "Maximum stack depth used by a thread in kernel");
 
 void
 intr_prof_stack_use(struct thread *td, struct trapframe *frame)
 {
 	vm_offset_t stack_top;
 	vm_offset_t current;
 	int used, prev_used;
 
 	/*
 	 * Testing for interrupted kernel mode isn't strictly
 	 * needed. It optimizes the execution, since interrupts from
 	 * usermode will have only the trap frame on the stack.
 	 */
 	if (TRAPF_USERMODE(frame))
 		return;
 
 	stack_top = td->td_kstack + td->td_kstack_pages * PAGE_SIZE;
 	current = (vm_offset_t)(uintptr_t)&stack_top;
 
 	/*
 	 * Try to detect if interrupt is using kernel thread stack.
 	 * Hardware could use a dedicated stack for interrupt handling.
 	 */
 	if (stack_top <= current || current < td->td_kstack)
 		return;
 
 	used = stack_top - current;
 	for (;;) {
 		prev_used = max_kstack_used;
 		if (prev_used >= used)
 			break;
 		if (atomic_cmpset_int(&max_kstack_used, prev_used, used))
 			break;
 	}
 }
 #endif /* KSTACK_USAGE_PROF */
 
 /*
  * Implement fork's actions on an address space.
  * Here we arrange for the address space to be copied or referenced,
  * allocate a user struct (pcb and kernel stack), then call the
  * machine-dependent layer to fill those in and make the new process
  * ready to run.  The new process is set up so that it returns directly
  * to user mode to avoid stack copying and relocation problems.
  */
 int
 vm_forkproc(struct thread *td, struct proc *p2, struct thread *td2,
     struct vmspace *vm2, int flags)
 {
 	struct proc *p1 = td->td_proc;
 	struct domainset *dset;
 	int error;
 
 	if ((flags & RFPROC) == 0) {
 		/*
 		 * Divorce the memory, if it is shared, essentially
 		 * this changes shared memory amongst threads, into
 		 * COW locally.
 		 */
 		if ((flags & RFMEM) == 0) {
 			error = vmspace_unshare(p1);
 			if (error)
 				return (error);
 		}
 		cpu_fork(td, p2, td2, flags);
 		return (0);
 	}
 
 	if (flags & RFMEM) {
 		p2->p_vmspace = p1->p_vmspace;
 		refcount_acquire(&p1->p_vmspace->vm_refcnt);
 	}
 	dset = td2->td_domain.dr_policy;
 	while (vm_page_count_severe_set(&dset->ds_mask)) {
 		vm_wait_doms(&dset->ds_mask, 0);
 	}
 
 	if ((flags & RFMEM) == 0) {
 		p2->p_vmspace = vm2;
 		if (p1->p_vmspace->vm_shm)
 			shmfork(p1, p2);
 	}
 
 	/*
 	 * cpu_fork will copy and update the pcb, set up the kernel stack,
 	 * and make the child ready to run.
 	 */
 	cpu_fork(td, p2, td2, flags);
 	return (0);
 }
 
 /*
  * Called after process has been wait(2)'ed upon and is being reaped.
  * The idea is to reclaim resources that we could not reclaim while
  * the process was still executing.
  */
 void
 vm_waitproc(struct proc *p)
 {
 
 	vmspace_exitfree(p);		/* and clean-out the vmspace */
 }
-
-/*
- * This used to kick the thread which faults in threads.
- */
-void
-kick_proc0(void)
-{
-}