diff --git a/sys/compat/linuxkpi/common/src/linux_compat.c b/sys/compat/linuxkpi/common/src/linux_compat.c
index 70e208da428f..4159cef9ffba 100644
--- a/sys/compat/linuxkpi/common/src/linux_compat.c
+++ b/sys/compat/linuxkpi/common/src/linux_compat.c
@@ -1,2696 +1,2695 @@
 /*-
  * Copyright (c) 2010 Isilon Systems, Inc.
  * Copyright (c) 2010 iX Systems, Inc.
  * Copyright (c) 2010 Panasas, Inc.
  * Copyright (c) 2013-2021 Mellanox Technologies, Ltd.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_stack.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/malloc.h>
 #include <sys/kernel.h>
 #include <sys/sysctl.h>
 #include <sys/proc.h>
 #include <sys/sglist.h>
 #include <sys/sleepqueue.h>
 #include <sys/refcount.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/bus.h>
 #include <sys/eventhandler.h>
 #include <sys/fcntl.h>
 #include <sys/file.h>
 #include <sys/filio.h>
 #include <sys/rwlock.h>
 #include <sys/mman.h>
 #include <sys/stack.h>
 #include <sys/sysent.h>
 #include <sys/time.h>
 #include <sys/user.h>
 
 #include <vm/vm.h>
 #include <vm/pmap.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
 #include <vm/vm_pager.h>
 
 #include <machine/stdarg.h>
 
 #if defined(__i386__) || defined(__amd64__)
 #include <machine/md_var.h>
 #endif
 
 #include <linux/kobject.h>
 #include <linux/cpu.h>
 #include <linux/device.h>
 #include <linux/slab.h>
 #include <linux/module.h>
 #include <linux/moduleparam.h>
 #include <linux/cdev.h>
 #include <linux/file.h>
 #include <linux/sysfs.h>
 #include <linux/mm.h>
 #include <linux/io.h>
 #include <linux/vmalloc.h>
 #include <linux/netdevice.h>
 #include <linux/timer.h>
 #include <linux/interrupt.h>
 #include <linux/uaccess.h>
 #include <linux/list.h>
 #include <linux/kthread.h>
 #include <linux/kernel.h>
 #include <linux/compat.h>
 #include <linux/poll.h>
 #include <linux/smp.h>
 #include <linux/wait_bit.h>
 #include <linux/rcupdate.h>
 #include <linux/interval_tree.h>
 #include <linux/interval_tree_generic.h>
 
 #if defined(__i386__) || defined(__amd64__)
 #include <asm/smp.h>
 #endif
 
 SYSCTL_NODE(_compat, OID_AUTO, linuxkpi, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "LinuxKPI parameters");
 
 int linuxkpi_debug;
 SYSCTL_INT(_compat_linuxkpi, OID_AUTO, debug, CTLFLAG_RWTUN,
     &linuxkpi_debug, 0, "Set to enable pr_debug() prints. Clear to disable.");
 
 int linuxkpi_warn_dump_stack = 0;
 SYSCTL_INT(_compat_linuxkpi, OID_AUTO, warn_dump_stack, CTLFLAG_RWTUN,
     &linuxkpi_warn_dump_stack, 0,
     "Set to enable stack traces from WARN_ON(). Clear to disable.");
 
 static struct timeval lkpi_net_lastlog;
 static int lkpi_net_curpps;
 static int lkpi_net_maxpps = 99;
 SYSCTL_INT(_compat_linuxkpi, OID_AUTO, net_ratelimit, CTLFLAG_RWTUN,
     &lkpi_net_maxpps, 0, "Limit number of LinuxKPI net messages per second.");
 
 MALLOC_DEFINE(M_KMALLOC, "lkpikmalloc", "Linux kmalloc compat");
 
 #include <linux/rbtree.h>
 /* Undo Linux compat changes. */
 #undef RB_ROOT
 #undef file
 #undef cdev
 #define	RB_ROOT(head)	(head)->rbh_root
 
 static void linux_destroy_dev(struct linux_cdev *);
 static void linux_cdev_deref(struct linux_cdev *ldev);
 static struct vm_area_struct *linux_cdev_handle_find(void *handle);
 
 cpumask_t cpu_online_mask;
 struct kobject linux_class_root;
 struct device linux_root_device;
 struct class linux_class_misc;
 struct list_head pci_drivers;
 struct list_head pci_devices;
 spinlock_t pci_lock;
 
 unsigned long linux_timer_hz_mask;
 
 wait_queue_head_t linux_bit_waitq;
 wait_queue_head_t linux_var_waitq;
 
 int
 panic_cmp(struct rb_node *one, struct rb_node *two)
 {
 	panic("no cmp");
 }
 
 RB_GENERATE(linux_root, rb_node, __entry, panic_cmp);
 
 #define	START(node)	((node)->start)
 #define	LAST(node)	((node)->last)
 
 INTERVAL_TREE_DEFINE(struct interval_tree_node, rb, unsigned long,, START,
     LAST,, lkpi_interval_tree)
 
 int
 kobject_set_name_vargs(struct kobject *kobj, const char *fmt, va_list args)
 {
 	va_list tmp_va;
 	int len;
 	char *old;
 	char *name;
 	char dummy;
 
 	old = kobj->name;
 
 	if (old && fmt == NULL)
 		return (0);
 
 	/* compute length of string */
 	va_copy(tmp_va, args);
 	len = vsnprintf(&dummy, 0, fmt, tmp_va);
 	va_end(tmp_va);
 
 	/* account for zero termination */
 	len++;
 
 	/* check for error */
 	if (len < 1)
 		return (-EINVAL);
 
 	/* allocate memory for string */
 	name = kzalloc(len, GFP_KERNEL);
 	if (name == NULL)
 		return (-ENOMEM);
 	vsnprintf(name, len, fmt, args);
 	kobj->name = name;
 
 	/* free old string */
 	kfree(old);
 
 	/* filter new string */
 	for (; *name != '\0'; name++)
 		if (*name == '/')
 			*name = '!';
 	return (0);
 }
 
 int
 kobject_set_name(struct kobject *kobj, const char *fmt, ...)
 {
 	va_list args;
 	int error;
 
 	va_start(args, fmt);
 	error = kobject_set_name_vargs(kobj, fmt, args);
 	va_end(args);
 
 	return (error);
 }
 
 static int
 kobject_add_complete(struct kobject *kobj, struct kobject *parent)
 {
 	const struct kobj_type *t;
 	int error;
 
 	kobj->parent = parent;
 	error = sysfs_create_dir(kobj);
 	if (error == 0 && kobj->ktype && kobj->ktype->default_attrs) {
 		struct attribute **attr;
 		t = kobj->ktype;
 
 		for (attr = t->default_attrs; *attr != NULL; attr++) {
 			error = sysfs_create_file(kobj, *attr);
 			if (error)
 				break;
 		}
 		if (error)
 			sysfs_remove_dir(kobj);
 	}
 	return (error);
 }
 
 int
 kobject_add(struct kobject *kobj, struct kobject *parent, const char *fmt, ...)
 {
 	va_list args;
 	int error;
 
 	va_start(args, fmt);
 	error = kobject_set_name_vargs(kobj, fmt, args);
 	va_end(args);
 	if (error)
 		return (error);
 
 	return kobject_add_complete(kobj, parent);
 }
 
 void
 linux_kobject_release(struct kref *kref)
 {
 	struct kobject *kobj;
 	char *name;
 
 	kobj = container_of(kref, struct kobject, kref);
 	sysfs_remove_dir(kobj);
 	name = kobj->name;
 	if (kobj->ktype && kobj->ktype->release)
 		kobj->ktype->release(kobj);
 	kfree(name);
 }
 
 static void
 linux_kobject_kfree(struct kobject *kobj)
 {
 	kfree(kobj);
 }
 
 static void
 linux_kobject_kfree_name(struct kobject *kobj)
 {
 	if (kobj) {
 		kfree(kobj->name);
 	}
 }
 
 const struct kobj_type linux_kfree_type = {
 	.release = linux_kobject_kfree
 };
 
 static ssize_t
 lkpi_kobj_attr_show(struct kobject *kobj, struct attribute *attr, char *buf)
 {
 	struct kobj_attribute *ka =
 	    container_of(attr, struct kobj_attribute, attr);
 
 	if (ka->show == NULL)
 		return (-EIO);
 
 	return (ka->show(kobj, ka, buf));
 }
 
 static ssize_t
 lkpi_kobj_attr_store(struct kobject *kobj, struct attribute *attr,
     const char *buf, size_t count)
 {
 	struct kobj_attribute *ka =
 	    container_of(attr, struct kobj_attribute, attr);
 
 	if (ka->store == NULL)
 		return (-EIO);
 
 	return (ka->store(kobj, ka, buf, count));
 }
 
 const struct sysfs_ops kobj_sysfs_ops = {
 	.show	= lkpi_kobj_attr_show,
 	.store	= lkpi_kobj_attr_store,
 };
 
 static void
 linux_device_release(struct device *dev)
 {
 	pr_debug("linux_device_release: %s\n", dev_name(dev));
 	kfree(dev);
 }
 
 static ssize_t
 linux_class_show(struct kobject *kobj, struct attribute *attr, char *buf)
 {
 	struct class_attribute *dattr;
 	ssize_t error;
 
 	dattr = container_of(attr, struct class_attribute, attr);
 	error = -EIO;
 	if (dattr->show)
 		error = dattr->show(container_of(kobj, struct class, kobj),
 		    dattr, buf);
 	return (error);
 }
 
 static ssize_t
 linux_class_store(struct kobject *kobj, struct attribute *attr, const char *buf,
     size_t count)
 {
 	struct class_attribute *dattr;
 	ssize_t error;
 
 	dattr = container_of(attr, struct class_attribute, attr);
 	error = -EIO;
 	if (dattr->store)
 		error = dattr->store(container_of(kobj, struct class, kobj),
 		    dattr, buf, count);
 	return (error);
 }
 
 static void
 linux_class_release(struct kobject *kobj)
 {
 	struct class *class;
 
 	class = container_of(kobj, struct class, kobj);
 	if (class->class_release)
 		class->class_release(class);
 }
 
 static const struct sysfs_ops linux_class_sysfs = {
 	.show  = linux_class_show,
 	.store = linux_class_store,
 };
 
 const struct kobj_type linux_class_ktype = {
 	.release = linux_class_release,
 	.sysfs_ops = &linux_class_sysfs
 };
 
 static void
 linux_dev_release(struct kobject *kobj)
 {
 	struct device *dev;
 
 	dev = container_of(kobj, struct device, kobj);
 	/* This is the precedence defined by linux. */
 	if (dev->release)
 		dev->release(dev);
 	else if (dev->class && dev->class->dev_release)
 		dev->class->dev_release(dev);
 }
 
 static ssize_t
 linux_dev_show(struct kobject *kobj, struct attribute *attr, char *buf)
 {
 	struct device_attribute *dattr;
 	ssize_t error;
 
 	dattr = container_of(attr, struct device_attribute, attr);
 	error = -EIO;
 	if (dattr->show)
 		error = dattr->show(container_of(kobj, struct device, kobj),
 		    dattr, buf);
 	return (error);
 }
 
 static ssize_t
 linux_dev_store(struct kobject *kobj, struct attribute *attr, const char *buf,
     size_t count)
 {
 	struct device_attribute *dattr;
 	ssize_t error;
 
 	dattr = container_of(attr, struct device_attribute, attr);
 	error = -EIO;
 	if (dattr->store)
 		error = dattr->store(container_of(kobj, struct device, kobj),
 		    dattr, buf, count);
 	return (error);
 }
 
 static const struct sysfs_ops linux_dev_sysfs = {
 	.show  = linux_dev_show,
 	.store = linux_dev_store,
 };
 
 const struct kobj_type linux_dev_ktype = {
 	.release = linux_dev_release,
 	.sysfs_ops = &linux_dev_sysfs
 };
 
 struct device *
 device_create(struct class *class, struct device *parent, dev_t devt,
     void *drvdata, const char *fmt, ...)
 {
 	struct device *dev;
 	va_list args;
 
 	dev = kzalloc(sizeof(*dev), M_WAITOK);
 	dev->parent = parent;
 	dev->class = class;
 	dev->devt = devt;
 	dev->driver_data = drvdata;
 	dev->release = linux_device_release;
 	va_start(args, fmt);
 	kobject_set_name_vargs(&dev->kobj, fmt, args);
 	va_end(args);
 	device_register(dev);
 
 	return (dev);
 }
 
 int
 kobject_init_and_add(struct kobject *kobj, const struct kobj_type *ktype,
     struct kobject *parent, const char *fmt, ...)
 {
 	va_list args;
 	int error;
 
 	kobject_init(kobj, ktype);
 	kobj->ktype = ktype;
 	kobj->parent = parent;
 	kobj->name = NULL;
 
 	va_start(args, fmt);
 	error = kobject_set_name_vargs(kobj, fmt, args);
 	va_end(args);
 	if (error)
 		return (error);
 	return kobject_add_complete(kobj, parent);
 }
 
 static void
 linux_kq_lock(void *arg)
 {
 	spinlock_t *s = arg;
 
 	spin_lock(s);
 }
 static void
 linux_kq_unlock(void *arg)
 {
 	spinlock_t *s = arg;
 
 	spin_unlock(s);
 }
 
 static void
 linux_kq_assert_lock(void *arg, int what)
 {
 #ifdef INVARIANTS
 	spinlock_t *s = arg;
 
 	if (what == LA_LOCKED)
 		mtx_assert(&s->m, MA_OWNED);
 	else
 		mtx_assert(&s->m, MA_NOTOWNED);
 #endif
 }
 
 static void
 linux_file_kqfilter_poll(struct linux_file *, int);
 
 struct linux_file *
 linux_file_alloc(void)
 {
 	struct linux_file *filp;
 
 	filp = kzalloc(sizeof(*filp), GFP_KERNEL);
 
 	/* set initial refcount */
 	filp->f_count = 1;
 
 	/* setup fields needed by kqueue support */
 	spin_lock_init(&filp->f_kqlock);
 	knlist_init(&filp->f_selinfo.si_note, &filp->f_kqlock,
 	    linux_kq_lock, linux_kq_unlock, linux_kq_assert_lock);
 
 	return (filp);
 }
 
 void
 linux_file_free(struct linux_file *filp)
 {
 	if (filp->_file == NULL) {
 		if (filp->f_op != NULL && filp->f_op->release != NULL)
 			filp->f_op->release(filp->f_vnode, filp);
 		if (filp->f_shmem != NULL)
 			vm_object_deallocate(filp->f_shmem);
 		kfree_rcu(filp, rcu);
 	} else {
 		/*
 		 * The close method of the character device or file
 		 * will free the linux_file structure:
 		 */
 		_fdrop(filp->_file, curthread);
 	}
 }
 
 static int
 linux_cdev_pager_fault(vm_object_t vm_obj, vm_ooffset_t offset, int prot,
     vm_page_t *mres)
 {
 	struct vm_area_struct *vmap;
 
 	vmap = linux_cdev_handle_find(vm_obj->handle);
 
 	MPASS(vmap != NULL);
 	MPASS(vmap->vm_private_data == vm_obj->handle);
 
 	if (likely(vmap->vm_ops != NULL && offset < vmap->vm_len)) {
 		vm_paddr_t paddr = IDX_TO_OFF(vmap->vm_pfn) + offset;
 		vm_page_t page;
 
 		if (((*mres)->flags & PG_FICTITIOUS) != 0) {
 			/*
 			 * If the passed in result page is a fake
 			 * page, update it with the new physical
 			 * address.
 			 */
 			page = *mres;
 			vm_page_updatefake(page, paddr, vm_obj->memattr);
 		} else {
 			/*
 			 * Replace the passed in "mres" page with our
 			 * own fake page and free up the all of the
 			 * original pages.
 			 */
 			VM_OBJECT_WUNLOCK(vm_obj);
 			page = vm_page_getfake(paddr, vm_obj->memattr);
 			VM_OBJECT_WLOCK(vm_obj);
 
 			vm_page_replace(page, vm_obj, (*mres)->pindex, *mres);
 			*mres = page;
 		}
 		vm_page_valid(page);
 		return (VM_PAGER_OK);
 	}
 	return (VM_PAGER_FAIL);
 }
 
 static int
 linux_cdev_pager_populate(vm_object_t vm_obj, vm_pindex_t pidx, int fault_type,
     vm_prot_t max_prot, vm_pindex_t *first, vm_pindex_t *last)
 {
 	struct vm_area_struct *vmap;
 	int err;
 
 	/* get VM area structure */
 	vmap = linux_cdev_handle_find(vm_obj->handle);
 	MPASS(vmap != NULL);
 	MPASS(vmap->vm_private_data == vm_obj->handle);
 
 	VM_OBJECT_WUNLOCK(vm_obj);
 
 	linux_set_current(curthread);
 
 	down_write(&vmap->vm_mm->mmap_sem);
 	if (unlikely(vmap->vm_ops == NULL)) {
 		err = VM_FAULT_SIGBUS;
 	} else {
 		struct vm_fault vmf;
 
 		/* fill out VM fault structure */
 		vmf.virtual_address = (void *)(uintptr_t)IDX_TO_OFF(pidx);
 		vmf.flags = (fault_type & VM_PROT_WRITE) ? FAULT_FLAG_WRITE : 0;
 		vmf.pgoff = 0;
 		vmf.page = NULL;
 		vmf.vma = vmap;
 
 		vmap->vm_pfn_count = 0;
 		vmap->vm_pfn_pcount = &vmap->vm_pfn_count;
 		vmap->vm_obj = vm_obj;
 
 		err = vmap->vm_ops->fault(&vmf);
 
 		while (vmap->vm_pfn_count == 0 && err == VM_FAULT_NOPAGE) {
 			kern_yield(PRI_USER);
 			err = vmap->vm_ops->fault(&vmf);
 		}
 	}
 
 	/* translate return code */
 	switch (err) {
 	case VM_FAULT_OOM:
 		err = VM_PAGER_AGAIN;
 		break;
 	case VM_FAULT_SIGBUS:
 		err = VM_PAGER_BAD;
 		break;
 	case VM_FAULT_NOPAGE:
 		/*
 		 * By contract the fault handler will return having
 		 * busied all the pages itself. If pidx is already
 		 * found in the object, it will simply xbusy the first
 		 * page and return with vm_pfn_count set to 1.
 		 */
 		*first = vmap->vm_pfn_first;
 		*last = *first + vmap->vm_pfn_count - 1;
 		err = VM_PAGER_OK;
 		break;
 	default:
 		err = VM_PAGER_ERROR;
 		break;
 	}
 	up_write(&vmap->vm_mm->mmap_sem);
 	VM_OBJECT_WLOCK(vm_obj);
 	return (err);
 }
 
 static struct rwlock linux_vma_lock;
 static TAILQ_HEAD(, vm_area_struct) linux_vma_head =
     TAILQ_HEAD_INITIALIZER(linux_vma_head);
 
 static void
 linux_cdev_handle_free(struct vm_area_struct *vmap)
 {
 	/* Drop reference on vm_file */
 	if (vmap->vm_file != NULL)
 		fput(vmap->vm_file);
 
 	/* Drop reference on mm_struct */
 	mmput(vmap->vm_mm);
 
 	kfree(vmap);
 }
 
 static void
 linux_cdev_handle_remove(struct vm_area_struct *vmap)
 {
 	rw_wlock(&linux_vma_lock);
 	TAILQ_REMOVE(&linux_vma_head, vmap, vm_entry);
 	rw_wunlock(&linux_vma_lock);
 }
 
 static struct vm_area_struct *
 linux_cdev_handle_find(void *handle)
 {
 	struct vm_area_struct *vmap;
 
 	rw_rlock(&linux_vma_lock);
 	TAILQ_FOREACH(vmap, &linux_vma_head, vm_entry) {
 		if (vmap->vm_private_data == handle)
 			break;
 	}
 	rw_runlock(&linux_vma_lock);
 	return (vmap);
 }
 
 static int
 linux_cdev_pager_ctor(void *handle, vm_ooffset_t size, vm_prot_t prot,
 		      vm_ooffset_t foff, struct ucred *cred, u_short *color)
 {
 
 	MPASS(linux_cdev_handle_find(handle) != NULL);
 	*color = 0;
 	return (0);
 }
 
 static void
 linux_cdev_pager_dtor(void *handle)
 {
 	const struct vm_operations_struct *vm_ops;
 	struct vm_area_struct *vmap;
 
 	vmap = linux_cdev_handle_find(handle);
 	MPASS(vmap != NULL);
 
 	/*
 	 * Remove handle before calling close operation to prevent
 	 * other threads from reusing the handle pointer.
 	 */
 	linux_cdev_handle_remove(vmap);
 
 	down_write(&vmap->vm_mm->mmap_sem);
 	vm_ops = vmap->vm_ops;
 	if (likely(vm_ops != NULL))
 		vm_ops->close(vmap);
 	up_write(&vmap->vm_mm->mmap_sem);
 
 	linux_cdev_handle_free(vmap);
 }
 
 static struct cdev_pager_ops linux_cdev_pager_ops[2] = {
   {
 	/* OBJT_MGTDEVICE */
 	.cdev_pg_populate	= linux_cdev_pager_populate,
 	.cdev_pg_ctor	= linux_cdev_pager_ctor,
 	.cdev_pg_dtor	= linux_cdev_pager_dtor
   },
   {
 	/* OBJT_DEVICE */
 	.cdev_pg_fault	= linux_cdev_pager_fault,
 	.cdev_pg_ctor	= linux_cdev_pager_ctor,
 	.cdev_pg_dtor	= linux_cdev_pager_dtor
   },
 };
 
 int
 zap_vma_ptes(struct vm_area_struct *vma, unsigned long address,
     unsigned long size)
 {
 	vm_object_t obj;
 	vm_page_t m;
 
 	obj = vma->vm_obj;
 	if (obj == NULL || (obj->flags & OBJ_UNMANAGED) != 0)
 		return (-ENOTSUP);
 	VM_OBJECT_RLOCK(obj);
 	for (m = vm_page_find_least(obj, OFF_TO_IDX(address));
 	    m != NULL && m->pindex < OFF_TO_IDX(address + size);
 	    m = TAILQ_NEXT(m, listq))
 		pmap_remove_all(m);
 	VM_OBJECT_RUNLOCK(obj);
 	return (0);
 }
 
 static struct file_operations dummy_ldev_ops = {
 	/* XXXKIB */
 };
 
 static struct linux_cdev dummy_ldev = {
 	.ops = &dummy_ldev_ops,
 };
 
 #define	LDEV_SI_DTR	0x0001
 #define	LDEV_SI_REF	0x0002
 
 static void
 linux_get_fop(struct linux_file *filp, const struct file_operations **fop,
     struct linux_cdev **dev)
 {
 	struct linux_cdev *ldev;
 	u_int siref;
 
 	ldev = filp->f_cdev;
 	*fop = filp->f_op;
 	if (ldev != NULL) {
 		if (ldev->kobj.ktype == &linux_cdev_static_ktype) {
 			refcount_acquire(&ldev->refs);
 		} else {
 			for (siref = ldev->siref;;) {
 				if ((siref & LDEV_SI_DTR) != 0) {
 					ldev = &dummy_ldev;
 					*fop = ldev->ops;
 					siref = ldev->siref;
 					MPASS((ldev->siref & LDEV_SI_DTR) == 0);
 				} else if (atomic_fcmpset_int(&ldev->siref,
 				    &siref, siref + LDEV_SI_REF)) {
 					break;
 				}
 			}
 		}
 	}
 	*dev = ldev;
 }
 
 static void
 linux_drop_fop(struct linux_cdev *ldev)
 {
 
 	if (ldev == NULL)
 		return;
 	if (ldev->kobj.ktype == &linux_cdev_static_ktype) {
 		linux_cdev_deref(ldev);
 	} else {
 		MPASS(ldev->kobj.ktype == &linux_cdev_ktype);
 		MPASS((ldev->siref & ~LDEV_SI_DTR) != 0);
 		atomic_subtract_int(&ldev->siref, LDEV_SI_REF);
 	}
 }
 
 #define	OPW(fp,td,code) ({			\
 	struct file *__fpop;			\
 	__typeof(code) __retval;		\
 						\
 	__fpop = (td)->td_fpop;			\
 	(td)->td_fpop = (fp);			\
 	__retval = (code);			\
 	(td)->td_fpop = __fpop;			\
 	__retval;				\
 })
 
 static int
 linux_dev_fdopen(struct cdev *dev, int fflags, struct thread *td,
     struct file *file)
 {
 	struct linux_cdev *ldev;
 	struct linux_file *filp;
 	const struct file_operations *fop;
 	int error;
 
 	ldev = dev->si_drv1;
 
 	filp = linux_file_alloc();
 	filp->f_dentry = &filp->f_dentry_store;
 	filp->f_op = ldev->ops;
 	filp->f_mode = file->f_flag;
 	filp->f_flags = file->f_flag;
 	filp->f_vnode = file->f_vnode;
 	filp->_file = file;
 	refcount_acquire(&ldev->refs);
 	filp->f_cdev = ldev;
 
 	linux_set_current(td);
 	linux_get_fop(filp, &fop, &ldev);
 
 	if (fop->open != NULL) {
 		error = -fop->open(file->f_vnode, filp);
 		if (error != 0) {
 			linux_drop_fop(ldev);
 			linux_cdev_deref(filp->f_cdev);
 			kfree(filp);
 			return (error);
 		}
 	}
 
 	/* hold on to the vnode - used for fstat() */
 	vhold(filp->f_vnode);
 
 	/* release the file from devfs */
 	finit(file, filp->f_mode, DTYPE_DEV, filp, &linuxfileops);
 	linux_drop_fop(ldev);
 	return (ENXIO);
 }
 
 #define	LINUX_IOCTL_MIN_PTR 0x10000UL
 #define	LINUX_IOCTL_MAX_PTR (LINUX_IOCTL_MIN_PTR + IOCPARM_MAX)
 
 static inline int
 linux_remap_address(void **uaddr, size_t len)
 {
 	uintptr_t uaddr_val = (uintptr_t)(*uaddr);
 
 	if (unlikely(uaddr_val >= LINUX_IOCTL_MIN_PTR &&
 	    uaddr_val < LINUX_IOCTL_MAX_PTR)) {
 		struct task_struct *pts = current;
 		if (pts == NULL) {
 			*uaddr = NULL;
 			return (1);
 		}
 
 		/* compute data offset */
 		uaddr_val -= LINUX_IOCTL_MIN_PTR;
 
 		/* check that length is within bounds */
 		if ((len > IOCPARM_MAX) ||
 		    (uaddr_val + len) > pts->bsd_ioctl_len) {
 			*uaddr = NULL;
 			return (1);
 		}
 
 		/* re-add kernel buffer address */
 		uaddr_val += (uintptr_t)pts->bsd_ioctl_data;
 
 		/* update address location */
 		*uaddr = (void *)uaddr_val;
 		return (1);
 	}
 	return (0);
 }
 
 int
 linux_copyin(const void *uaddr, void *kaddr, size_t len)
 {
 	if (linux_remap_address(__DECONST(void **, &uaddr), len)) {
 		if (uaddr == NULL)
 			return (-EFAULT);
 		memcpy(kaddr, uaddr, len);
 		return (0);
 	}
 	return (-copyin(uaddr, kaddr, len));
 }
 
 int
 linux_copyout(const void *kaddr, void *uaddr, size_t len)
 {
 	if (linux_remap_address(&uaddr, len)) {
 		if (uaddr == NULL)
 			return (-EFAULT);
 		memcpy(uaddr, kaddr, len);
 		return (0);
 	}
 	return (-copyout(kaddr, uaddr, len));
 }
 
 size_t
 linux_clear_user(void *_uaddr, size_t _len)
 {
 	uint8_t *uaddr = _uaddr;
 	size_t len = _len;
 
 	/* make sure uaddr is aligned before going into the fast loop */
 	while (((uintptr_t)uaddr & 7) != 0 && len > 7) {
 		if (subyte(uaddr, 0))
 			return (_len);
 		uaddr++;
 		len--;
 	}
 
 	/* zero 8 bytes at a time */
 	while (len > 7) {
 #ifdef __LP64__
 		if (suword64(uaddr, 0))
 			return (_len);
 #else
 		if (suword32(uaddr, 0))
 			return (_len);
 		if (suword32(uaddr + 4, 0))
 			return (_len);
 #endif
 		uaddr += 8;
 		len -= 8;
 	}
 
 	/* zero fill end, if any */
 	while (len > 0) {
 		if (subyte(uaddr, 0))
 			return (_len);
 		uaddr++;
 		len--;
 	}
 	return (0);
 }
 
 int
 linux_access_ok(const void *uaddr, size_t len)
 {
 	uintptr_t saddr;
 	uintptr_t eaddr;
 
 	/* get start and end address */
 	saddr = (uintptr_t)uaddr;
 	eaddr = (uintptr_t)uaddr + len;
 
 	/* verify addresses are valid for userspace */
 	return ((saddr == eaddr) ||
 	    (eaddr > saddr && eaddr <= VM_MAXUSER_ADDRESS));
 }
 
 /*
  * This function should return either EINTR or ERESTART depending on
  * the signal type sent to this thread:
  */
 static int
 linux_get_error(struct task_struct *task, int error)
 {
 	/* check for signal type interrupt code */
 	if (error == EINTR || error == ERESTARTSYS || error == ERESTART) {
 		error = -linux_schedule_get_interrupt_value(task);
 		if (error == 0)
 			error = EINTR;
 	}
 	return (error);
 }
 
 static int
 linux_file_ioctl_sub(struct file *fp, struct linux_file *filp,
     const struct file_operations *fop, u_long cmd, caddr_t data,
     struct thread *td)
 {
 	struct task_struct *task = current;
 	unsigned size;
 	int error;
 
 	size = IOCPARM_LEN(cmd);
 	/* refer to logic in sys_ioctl() */
 	if (size > 0) {
 		/*
 		 * Setup hint for linux_copyin() and linux_copyout().
 		 *
 		 * Background: Linux code expects a user-space address
 		 * while FreeBSD supplies a kernel-space address.
 		 */
 		task->bsd_ioctl_data = data;
 		task->bsd_ioctl_len = size;
 		data = (void *)LINUX_IOCTL_MIN_PTR;
 	} else {
 		/* fetch user-space pointer */
 		data = *(void **)data;
 	}
 #ifdef COMPAT_FREEBSD32
 	if (SV_PROC_FLAG(td->td_proc, SV_ILP32)) {
 		/* try the compat IOCTL handler first */
 		if (fop->compat_ioctl != NULL) {
 			error = -OPW(fp, td, fop->compat_ioctl(filp,
 			    cmd, (u_long)data));
 		} else {
 			error = ENOTTY;
 		}
 
 		/* fallback to the regular IOCTL handler, if any */
 		if (error == ENOTTY && fop->unlocked_ioctl != NULL) {
 			error = -OPW(fp, td, fop->unlocked_ioctl(filp,
 			    cmd, (u_long)data));
 		}
 	} else
 #endif
 	{
 		if (fop->unlocked_ioctl != NULL) {
 			error = -OPW(fp, td, fop->unlocked_ioctl(filp,
 			    cmd, (u_long)data));
 		} else {
 			error = ENOTTY;
 		}
 	}
 	if (size > 0) {
 		task->bsd_ioctl_data = NULL;
 		task->bsd_ioctl_len = 0;
 	}
 
 	if (error == EWOULDBLOCK) {
 		/* update kqfilter status, if any */
 		linux_file_kqfilter_poll(filp,
 		    LINUX_KQ_FLAG_HAS_READ | LINUX_KQ_FLAG_HAS_WRITE);
 	} else {
 		error = linux_get_error(task, error);
 	}
 	return (error);
 }
 
 #define	LINUX_POLL_TABLE_NORMAL ((poll_table *)1)
 
 /*
  * This function atomically updates the poll wakeup state and returns
  * the previous state at the time of update.
  */
 static uint8_t
 linux_poll_wakeup_state(atomic_t *v, const uint8_t *pstate)
 {
 	int c, old;
 
 	c = v->counter;
 
 	while ((old = atomic_cmpxchg(v, c, pstate[c])) != c)
 		c = old;
 
 	return (c);
 }
 
 static int
 linux_poll_wakeup_callback(wait_queue_t *wq, unsigned int wq_state, int flags, void *key)
 {
 	static const uint8_t state[LINUX_FWQ_STATE_MAX] = {
 		[LINUX_FWQ_STATE_INIT] = LINUX_FWQ_STATE_INIT, /* NOP */
 		[LINUX_FWQ_STATE_NOT_READY] = LINUX_FWQ_STATE_NOT_READY, /* NOP */
 		[LINUX_FWQ_STATE_QUEUED] = LINUX_FWQ_STATE_READY,
 		[LINUX_FWQ_STATE_READY] = LINUX_FWQ_STATE_READY, /* NOP */
 	};
 	struct linux_file *filp = container_of(wq, struct linux_file, f_wait_queue.wq);
 
 	switch (linux_poll_wakeup_state(&filp->f_wait_queue.state, state)) {
 	case LINUX_FWQ_STATE_QUEUED:
 		linux_poll_wakeup(filp);
 		return (1);
 	default:
 		return (0);
 	}
 }
 
 void
 linux_poll_wait(struct linux_file *filp, wait_queue_head_t *wqh, poll_table *p)
 {
 	static const uint8_t state[LINUX_FWQ_STATE_MAX] = {
 		[LINUX_FWQ_STATE_INIT] = LINUX_FWQ_STATE_NOT_READY,
 		[LINUX_FWQ_STATE_NOT_READY] = LINUX_FWQ_STATE_NOT_READY, /* NOP */
 		[LINUX_FWQ_STATE_QUEUED] = LINUX_FWQ_STATE_QUEUED, /* NOP */
 		[LINUX_FWQ_STATE_READY] = LINUX_FWQ_STATE_QUEUED,
 	};
 
 	/* check if we are called inside the select system call */
 	if (p == LINUX_POLL_TABLE_NORMAL)
 		selrecord(curthread, &filp->f_selinfo);
 
 	switch (linux_poll_wakeup_state(&filp->f_wait_queue.state, state)) {
 	case LINUX_FWQ_STATE_INIT:
 		/* NOTE: file handles can only belong to one wait-queue */
 		filp->f_wait_queue.wqh = wqh;
 		filp->f_wait_queue.wq.func = &linux_poll_wakeup_callback;
 		add_wait_queue(wqh, &filp->f_wait_queue.wq);
 		atomic_set(&filp->f_wait_queue.state, LINUX_FWQ_STATE_QUEUED);
 		break;
 	default:
 		break;
 	}
 }
 
 static void
 linux_poll_wait_dequeue(struct linux_file *filp)
 {
 	static const uint8_t state[LINUX_FWQ_STATE_MAX] = {
 		[LINUX_FWQ_STATE_INIT] = LINUX_FWQ_STATE_INIT,	/* NOP */
 		[LINUX_FWQ_STATE_NOT_READY] = LINUX_FWQ_STATE_INIT,
 		[LINUX_FWQ_STATE_QUEUED] = LINUX_FWQ_STATE_INIT,
 		[LINUX_FWQ_STATE_READY] = LINUX_FWQ_STATE_INIT,
 	};
 
 	seldrain(&filp->f_selinfo);
 
 	switch (linux_poll_wakeup_state(&filp->f_wait_queue.state, state)) {
 	case LINUX_FWQ_STATE_NOT_READY:
 	case LINUX_FWQ_STATE_QUEUED:
 	case LINUX_FWQ_STATE_READY:
 		remove_wait_queue(filp->f_wait_queue.wqh, &filp->f_wait_queue.wq);
 		break;
 	default:
 		break;
 	}
 }
 
 void
 linux_poll_wakeup(struct linux_file *filp)
 {
 	/* this function should be NULL-safe */
 	if (filp == NULL)
 		return;
 
 	selwakeup(&filp->f_selinfo);
 
 	spin_lock(&filp->f_kqlock);
 	filp->f_kqflags |= LINUX_KQ_FLAG_NEED_READ |
 	    LINUX_KQ_FLAG_NEED_WRITE;
 
 	/* make sure the "knote" gets woken up */
 	KNOTE_LOCKED(&filp->f_selinfo.si_note, 1);
 	spin_unlock(&filp->f_kqlock);
 }
 
 static void
 linux_file_kqfilter_detach(struct knote *kn)
 {
 	struct linux_file *filp = kn->kn_hook;
 
 	spin_lock(&filp->f_kqlock);
 	knlist_remove(&filp->f_selinfo.si_note, kn, 1);
 	spin_unlock(&filp->f_kqlock);
 }
 
 static int
 linux_file_kqfilter_read_event(struct knote *kn, long hint)
 {
 	struct linux_file *filp = kn->kn_hook;
 
 	mtx_assert(&filp->f_kqlock.m, MA_OWNED);
 
 	return ((filp->f_kqflags & LINUX_KQ_FLAG_NEED_READ) ? 1 : 0);
 }
 
 static int
 linux_file_kqfilter_write_event(struct knote *kn, long hint)
 {
 	struct linux_file *filp = kn->kn_hook;
 
 	mtx_assert(&filp->f_kqlock.m, MA_OWNED);
 
 	return ((filp->f_kqflags & LINUX_KQ_FLAG_NEED_WRITE) ? 1 : 0);
 }
 
 static struct filterops linux_dev_kqfiltops_read = {
 	.f_isfd = 1,
 	.f_detach = linux_file_kqfilter_detach,
 	.f_event = linux_file_kqfilter_read_event,
 };
 
 static struct filterops linux_dev_kqfiltops_write = {
 	.f_isfd = 1,
 	.f_detach = linux_file_kqfilter_detach,
 	.f_event = linux_file_kqfilter_write_event,
 };
 
 static void
 linux_file_kqfilter_poll(struct linux_file *filp, int kqflags)
 {
 	struct thread *td;
 	const struct file_operations *fop;
 	struct linux_cdev *ldev;
 	int temp;
 
 	if ((filp->f_kqflags & kqflags) == 0)
 		return;
 
 	td = curthread;
 
 	linux_get_fop(filp, &fop, &ldev);
 	/* get the latest polling state */
 	temp = OPW(filp->_file, td, fop->poll(filp, NULL));
 	linux_drop_fop(ldev);
 
 	spin_lock(&filp->f_kqlock);
 	/* clear kqflags */
 	filp->f_kqflags &= ~(LINUX_KQ_FLAG_NEED_READ |
 	    LINUX_KQ_FLAG_NEED_WRITE);
 	/* update kqflags */
 	if ((temp & (POLLIN | POLLOUT)) != 0) {
 		if ((temp & POLLIN) != 0)
 			filp->f_kqflags |= LINUX_KQ_FLAG_NEED_READ;
 		if ((temp & POLLOUT) != 0)
 			filp->f_kqflags |= LINUX_KQ_FLAG_NEED_WRITE;
 
 		/* make sure the "knote" gets woken up */
 		KNOTE_LOCKED(&filp->f_selinfo.si_note, 0);
 	}
 	spin_unlock(&filp->f_kqlock);
 }
 
 static int
 linux_file_kqfilter(struct file *file, struct knote *kn)
 {
 	struct linux_file *filp;
 	struct thread *td;
 	int error;
 
 	td = curthread;
 	filp = (struct linux_file *)file->f_data;
 	filp->f_flags = file->f_flag;
 	if (filp->f_op->poll == NULL)
 		return (EINVAL);
 
 	spin_lock(&filp->f_kqlock);
 	switch (kn->kn_filter) {
 	case EVFILT_READ:
 		filp->f_kqflags |= LINUX_KQ_FLAG_HAS_READ;
 		kn->kn_fop = &linux_dev_kqfiltops_read;
 		kn->kn_hook = filp;
 		knlist_add(&filp->f_selinfo.si_note, kn, 1);
 		error = 0;
 		break;
 	case EVFILT_WRITE:
 		filp->f_kqflags |= LINUX_KQ_FLAG_HAS_WRITE;
 		kn->kn_fop = &linux_dev_kqfiltops_write;
 		kn->kn_hook = filp;
 		knlist_add(&filp->f_selinfo.si_note, kn, 1);
 		error = 0;
 		break;
 	default:
 		error = EINVAL;
 		break;
 	}
 	spin_unlock(&filp->f_kqlock);
 
 	if (error == 0) {
 		linux_set_current(td);
 
 		/* update kqfilter status, if any */
 		linux_file_kqfilter_poll(filp,
 		    LINUX_KQ_FLAG_HAS_READ | LINUX_KQ_FLAG_HAS_WRITE);
 	}
 	return (error);
 }
 
 static int
 linux_file_mmap_single(struct file *fp, const struct file_operations *fop,
     vm_ooffset_t *offset, vm_size_t size, struct vm_object **object,
     int nprot, bool is_shared, struct thread *td)
 {
 	struct task_struct *task;
 	struct vm_area_struct *vmap;
 	struct mm_struct *mm;
 	struct linux_file *filp;
 	vm_memattr_t attr;
 	int error;
 
 	filp = (struct linux_file *)fp->f_data;
 	filp->f_flags = fp->f_flag;
 
 	if (fop->mmap == NULL)
 		return (EOPNOTSUPP);
 
 	linux_set_current(td);
 
 	/*
 	 * The same VM object might be shared by multiple processes
 	 * and the mm_struct is usually freed when a process exits.
 	 *
 	 * The atomic reference below makes sure the mm_struct is
 	 * available as long as the vmap is in the linux_vma_head.
 	 */
 	task = current;
 	mm = task->mm;
 	if (atomic_inc_not_zero(&mm->mm_users) == 0)
 		return (EINVAL);
 
 	vmap = kzalloc(sizeof(*vmap), GFP_KERNEL);
 	vmap->vm_start = 0;
 	vmap->vm_end = size;
 	vmap->vm_pgoff = *offset / PAGE_SIZE;
 	vmap->vm_pfn = 0;
 	vmap->vm_flags = vmap->vm_page_prot = (nprot & VM_PROT_ALL);
 	if (is_shared)
 		vmap->vm_flags |= VM_SHARED;
 	vmap->vm_ops = NULL;
 	vmap->vm_file = get_file(filp);
 	vmap->vm_mm = mm;
 
 	if (unlikely(down_write_killable(&vmap->vm_mm->mmap_sem))) {
 		error = linux_get_error(task, EINTR);
 	} else {
 		error = -OPW(fp, td, fop->mmap(filp, vmap));
 		error = linux_get_error(task, error);
 		up_write(&vmap->vm_mm->mmap_sem);
 	}
 
 	if (error != 0) {
 		linux_cdev_handle_free(vmap);
 		return (error);
 	}
 
 	attr = pgprot2cachemode(vmap->vm_page_prot);
 
 	if (vmap->vm_ops != NULL) {
 		struct vm_area_struct *ptr;
 		void *vm_private_data;
 		bool vm_no_fault;
 
 		if (vmap->vm_ops->open == NULL ||
 		    vmap->vm_ops->close == NULL ||
 		    vmap->vm_private_data == NULL) {
 			/* free allocated VM area struct */
 			linux_cdev_handle_free(vmap);
 			return (EINVAL);
 		}
 
 		vm_private_data = vmap->vm_private_data;
 
 		rw_wlock(&linux_vma_lock);
 		TAILQ_FOREACH(ptr, &linux_vma_head, vm_entry) {
 			if (ptr->vm_private_data == vm_private_data)
 				break;
 		}
 		/* check if there is an existing VM area struct */
 		if (ptr != NULL) {
 			/* check if the VM area structure is invalid */
 			if (ptr->vm_ops == NULL ||
 			    ptr->vm_ops->open == NULL ||
 			    ptr->vm_ops->close == NULL) {
 				error = ESTALE;
 				vm_no_fault = 1;
 			} else {
 				error = EEXIST;
 				vm_no_fault = (ptr->vm_ops->fault == NULL);
 			}
 		} else {
 			/* insert VM area structure into list */
 			TAILQ_INSERT_TAIL(&linux_vma_head, vmap, vm_entry);
 			error = 0;
 			vm_no_fault = (vmap->vm_ops->fault == NULL);
 		}
 		rw_wunlock(&linux_vma_lock);
 
 		if (error != 0) {
 			/* free allocated VM area struct */
 			linux_cdev_handle_free(vmap);
 			/* check for stale VM area struct */
 			if (error != EEXIST)
 				return (error);
 		}
 
 		/* check if there is no fault handler */
 		if (vm_no_fault) {
 			*object = cdev_pager_allocate(vm_private_data, OBJT_DEVICE,
 			    &linux_cdev_pager_ops[1], size, nprot, *offset,
 			    td->td_ucred);
 		} else {
 			*object = cdev_pager_allocate(vm_private_data, OBJT_MGTDEVICE,
 			    &linux_cdev_pager_ops[0], size, nprot, *offset,
 			    td->td_ucred);
 		}
 
 		/* check if allocating the VM object failed */
 		if (*object == NULL) {
 			if (error == 0) {
 				/* remove VM area struct from list */
 				linux_cdev_handle_remove(vmap);
 				/* free allocated VM area struct */
 				linux_cdev_handle_free(vmap);
 			}
 			return (EINVAL);
 		}
 	} else {
 		struct sglist *sg;
 
 		sg = sglist_alloc(1, M_WAITOK);
 		sglist_append_phys(sg,
 		    (vm_paddr_t)vmap->vm_pfn << PAGE_SHIFT, vmap->vm_len);
 
 		*object = vm_pager_allocate(OBJT_SG, sg, vmap->vm_len,
 		    nprot, 0, td->td_ucred);
 
 		linux_cdev_handle_free(vmap);
 
 		if (*object == NULL) {
 			sglist_free(sg);
 			return (EINVAL);
 		}
 	}
 
 	if (attr != VM_MEMATTR_DEFAULT) {
 		VM_OBJECT_WLOCK(*object);
 		vm_object_set_memattr(*object, attr);
 		VM_OBJECT_WUNLOCK(*object);
 	}
 	*offset = 0;
 	return (0);
 }
 
 struct cdevsw linuxcdevsw = {
 	.d_version = D_VERSION,
 	.d_fdopen = linux_dev_fdopen,
 	.d_name = "lkpidev",
 };
 
 static int
 linux_file_read(struct file *file, struct uio *uio, struct ucred *active_cred,
     int flags, struct thread *td)
 {
 	struct linux_file *filp;
 	const struct file_operations *fop;
 	struct linux_cdev *ldev;
 	ssize_t bytes;
 	int error;
 
 	error = 0;
 	filp = (struct linux_file *)file->f_data;
 	filp->f_flags = file->f_flag;
 	/* XXX no support for I/O vectors currently */
 	if (uio->uio_iovcnt != 1)
 		return (EOPNOTSUPP);
 	if (uio->uio_resid > DEVFS_IOSIZE_MAX)
 		return (EINVAL);
 	linux_set_current(td);
 	linux_get_fop(filp, &fop, &ldev);
 	if (fop->read != NULL) {
 		bytes = OPW(file, td, fop->read(filp,
 		    uio->uio_iov->iov_base,
 		    uio->uio_iov->iov_len, &uio->uio_offset));
 		if (bytes >= 0) {
 			uio->uio_iov->iov_base =
 			    ((uint8_t *)uio->uio_iov->iov_base) + bytes;
 			uio->uio_iov->iov_len -= bytes;
 			uio->uio_resid -= bytes;
 		} else {
 			error = linux_get_error(current, -bytes);
 		}
 	} else
 		error = ENXIO;
 
 	/* update kqfilter status, if any */
 	linux_file_kqfilter_poll(filp, LINUX_KQ_FLAG_HAS_READ);
 	linux_drop_fop(ldev);
 
 	return (error);
 }
 
 static int
 linux_file_write(struct file *file, struct uio *uio, struct ucred *active_cred,
     int flags, struct thread *td)
 {
 	struct linux_file *filp;
 	const struct file_operations *fop;
 	struct linux_cdev *ldev;
 	ssize_t bytes;
 	int error;
 
 	filp = (struct linux_file *)file->f_data;
 	filp->f_flags = file->f_flag;
 	/* XXX no support for I/O vectors currently */
 	if (uio->uio_iovcnt != 1)
 		return (EOPNOTSUPP);
 	if (uio->uio_resid > DEVFS_IOSIZE_MAX)
 		return (EINVAL);
 	linux_set_current(td);
 	linux_get_fop(filp, &fop, &ldev);
 	if (fop->write != NULL) {
 		bytes = OPW(file, td, fop->write(filp,
 		    uio->uio_iov->iov_base,
 		    uio->uio_iov->iov_len, &uio->uio_offset));
 		if (bytes >= 0) {
 			uio->uio_iov->iov_base =
 			    ((uint8_t *)uio->uio_iov->iov_base) + bytes;
 			uio->uio_iov->iov_len -= bytes;
 			uio->uio_resid -= bytes;
 			error = 0;
 		} else {
 			error = linux_get_error(current, -bytes);
 		}
 	} else
 		error = ENXIO;
 
 	/* update kqfilter status, if any */
 	linux_file_kqfilter_poll(filp, LINUX_KQ_FLAG_HAS_WRITE);
 
 	linux_drop_fop(ldev);
 
 	return (error);
 }
 
 static int
 linux_file_poll(struct file *file, int events, struct ucred *active_cred,
     struct thread *td)
 {
 	struct linux_file *filp;
 	const struct file_operations *fop;
 	struct linux_cdev *ldev;
 	int revents;
 
 	filp = (struct linux_file *)file->f_data;
 	filp->f_flags = file->f_flag;
 	linux_set_current(td);
 	linux_get_fop(filp, &fop, &ldev);
 	if (fop->poll != NULL) {
 		revents = OPW(file, td, fop->poll(filp,
 		    LINUX_POLL_TABLE_NORMAL)) & events;
 	} else {
 		revents = 0;
 	}
 	linux_drop_fop(ldev);
 	return (revents);
 }
 
 static int
 linux_file_close(struct file *file, struct thread *td)
 {
 	struct linux_file *filp;
 	int (*release)(struct inode *, struct linux_file *);
 	const struct file_operations *fop;
 	struct linux_cdev *ldev;
 	int error;
 
 	filp = (struct linux_file *)file->f_data;
 
 	KASSERT(file_count(filp) == 0,
 	    ("File refcount(%d) is not zero", file_count(filp)));
 
 	if (td == NULL)
 		td = curthread;
 
 	error = 0;
 	filp->f_flags = file->f_flag;
 	linux_set_current(td);
 	linux_poll_wait_dequeue(filp);
 	linux_get_fop(filp, &fop, &ldev);
 	/*
 	 * Always use the real release function, if any, to avoid
 	 * leaking device resources:
 	 */
 	release = filp->f_op->release;
 	if (release != NULL)
 		error = -OPW(file, td, release(filp->f_vnode, filp));
 	funsetown(&filp->f_sigio);
 	if (filp->f_vnode != NULL)
 		vdrop(filp->f_vnode);
 	linux_drop_fop(ldev);
 	ldev = filp->f_cdev;
 	if (ldev != NULL)
 		linux_cdev_deref(ldev);
 	linux_synchronize_rcu(RCU_TYPE_REGULAR);
 	kfree(filp);
 
 	return (error);
 }
 
 static int
 linux_file_ioctl(struct file *fp, u_long cmd, void *data, struct ucred *cred,
     struct thread *td)
 {
 	struct linux_file *filp;
 	const struct file_operations *fop;
 	struct linux_cdev *ldev;
 	struct fiodgname_arg *fgn;
 	const char *p;
 	int error, i;
 
 	error = 0;
 	filp = (struct linux_file *)fp->f_data;
 	filp->f_flags = fp->f_flag;
 	linux_get_fop(filp, &fop, &ldev);
 
 	linux_set_current(td);
 	switch (cmd) {
 	case FIONBIO:
 		break;
 	case FIOASYNC:
 		if (fop->fasync == NULL)
 			break;
 		error = -OPW(fp, td, fop->fasync(0, filp, fp->f_flag & FASYNC));
 		break;
 	case FIOSETOWN:
 		error = fsetown(*(int *)data, &filp->f_sigio);
 		if (error == 0) {
 			if (fop->fasync == NULL)
 				break;
 			error = -OPW(fp, td, fop->fasync(0, filp,
 			    fp->f_flag & FASYNC));
 		}
 		break;
 	case FIOGETOWN:
 		*(int *)data = fgetown(&filp->f_sigio);
 		break;
 	case FIODGNAME:
 #ifdef	COMPAT_FREEBSD32
 	case FIODGNAME_32:
 #endif
 		if (filp->f_cdev == NULL || filp->f_cdev->cdev == NULL) {
 			error = ENXIO;
 			break;
 		}
 		fgn = data;
 		p = devtoname(filp->f_cdev->cdev);
 		i = strlen(p) + 1;
 		if (i > fgn->len) {
 			error = EINVAL;
 			break;
 		}
 		error = copyout(p, fiodgname_buf_get_ptr(fgn, cmd), i);
 		break;
 	default:
 		error = linux_file_ioctl_sub(fp, filp, fop, cmd, data, td);
 		break;
 	}
 	linux_drop_fop(ldev);
 	return (error);
 }
 
 static int
 linux_file_mmap_sub(struct thread *td, vm_size_t objsize, vm_prot_t prot,
     vm_prot_t maxprot, int flags, struct file *fp,
     vm_ooffset_t *foff, const struct file_operations *fop, vm_object_t *objp)
 {
 	/*
 	 * Character devices do not provide private mappings
 	 * of any kind:
 	 */
 	if ((maxprot & VM_PROT_WRITE) == 0 &&
 	    (prot & VM_PROT_WRITE) != 0)
 		return (EACCES);
 	if ((flags & (MAP_PRIVATE | MAP_COPY)) != 0)
 		return (EINVAL);
 
 	return (linux_file_mmap_single(fp, fop, foff, objsize, objp,
 	    (int)prot, (flags & MAP_SHARED) ? true : false, td));
 }
 
 static int
 linux_file_mmap(struct file *fp, vm_map_t map, vm_offset_t *addr, vm_size_t size,
     vm_prot_t prot, vm_prot_t cap_maxprot, int flags, vm_ooffset_t foff,
     struct thread *td)
 {
 	struct linux_file *filp;
 	const struct file_operations *fop;
 	struct linux_cdev *ldev;
 	struct mount *mp;
 	struct vnode *vp;
 	vm_object_t object;
 	vm_prot_t maxprot;
 	int error;
 
 	filp = (struct linux_file *)fp->f_data;
 
 	vp = filp->f_vnode;
 	if (vp == NULL)
 		return (EOPNOTSUPP);
 
 	/*
 	 * Ensure that file and memory protections are
 	 * compatible.
 	 */
 	mp = vp->v_mount;
 	if (mp != NULL && (mp->mnt_flag & MNT_NOEXEC) != 0) {
 		maxprot = VM_PROT_NONE;
 		if ((prot & VM_PROT_EXECUTE) != 0)
 			return (EACCES);
 	} else
 		maxprot = VM_PROT_EXECUTE;
 	if ((fp->f_flag & FREAD) != 0)
 		maxprot |= VM_PROT_READ;
 	else if ((prot & VM_PROT_READ) != 0)
 		return (EACCES);
 
 	/*
 	 * If we are sharing potential changes via MAP_SHARED and we
 	 * are trying to get write permission although we opened it
 	 * without asking for it, bail out.
 	 *
 	 * Note that most character devices always share mappings.
 	 *
 	 * Rely on linux_file_mmap_sub() to fail invalid MAP_PRIVATE
 	 * requests rather than doing it here.
 	 */
 	if ((flags & MAP_SHARED) != 0) {
 		if ((fp->f_flag & FWRITE) != 0)
 			maxprot |= VM_PROT_WRITE;
 		else if ((prot & VM_PROT_WRITE) != 0)
 			return (EACCES);
 	}
 	maxprot &= cap_maxprot;
 
 	linux_get_fop(filp, &fop, &ldev);
 	error = linux_file_mmap_sub(td, size, prot, maxprot, flags, fp,
 	    &foff, fop, &object);
 	if (error != 0)
 		goto out;
 
 	error = vm_mmap_object(map, addr, size, prot, maxprot, flags, object,
 	    foff, FALSE, td);
 	if (error != 0)
 		vm_object_deallocate(object);
 out:
 	linux_drop_fop(ldev);
 	return (error);
 }
 
 static int
 linux_file_stat(struct file *fp, struct stat *sb, struct ucred *active_cred)
 {
 	struct linux_file *filp;
 	struct vnode *vp;
 	int error;
 
 	filp = (struct linux_file *)fp->f_data;
 	if (filp->f_vnode == NULL)
 		return (EOPNOTSUPP);
 
 	vp = filp->f_vnode;
 
 	vn_lock(vp, LK_SHARED | LK_RETRY);
 	error = VOP_STAT(vp, sb, curthread->td_ucred, NOCRED);
 	VOP_UNLOCK(vp);
 
 	return (error);
 }
 
 static int
 linux_file_fill_kinfo(struct file *fp, struct kinfo_file *kif,
     struct filedesc *fdp)
 {
 	struct linux_file *filp;
 	struct vnode *vp;
 	int error;
 
 	filp = fp->f_data;
 	vp = filp->f_vnode;
 	if (vp == NULL) {
 		error = 0;
 		kif->kf_type = KF_TYPE_DEV;
 	} else {
 		vref(vp);
 		FILEDESC_SUNLOCK(fdp);
 		error = vn_fill_kinfo_vnode(vp, kif);
 		vrele(vp);
 		kif->kf_type = KF_TYPE_VNODE;
 		FILEDESC_SLOCK(fdp);
 	}
 	return (error);
 }
 
 unsigned int
 linux_iminor(struct inode *inode)
 {
 	struct linux_cdev *ldev;
 
 	if (inode == NULL || inode->v_rdev == NULL ||
 	    inode->v_rdev->si_devsw != &linuxcdevsw)
 		return (-1U);
 	ldev = inode->v_rdev->si_drv1;
 	if (ldev == NULL)
 		return (-1U);
 
 	return (minor(ldev->dev));
 }
 
 struct fileops linuxfileops = {
 	.fo_read = linux_file_read,
 	.fo_write = linux_file_write,
 	.fo_truncate = invfo_truncate,
 	.fo_kqfilter = linux_file_kqfilter,
 	.fo_stat = linux_file_stat,
 	.fo_fill_kinfo = linux_file_fill_kinfo,
 	.fo_poll = linux_file_poll,
 	.fo_close = linux_file_close,
 	.fo_ioctl = linux_file_ioctl,
 	.fo_mmap = linux_file_mmap,
 	.fo_chmod = invfo_chmod,
 	.fo_chown = invfo_chown,
 	.fo_sendfile = invfo_sendfile,
 	.fo_flags = DFLAG_PASSABLE,
 };
 
 /*
  * Hash of vmmap addresses.  This is infrequently accessed and does not
  * need to be particularly large.  This is done because we must store the
  * caller's idea of the map size to properly unmap.
  */
 struct vmmap {
 	LIST_ENTRY(vmmap)	vm_next;
 	void 			*vm_addr;
 	unsigned long		vm_size;
 };
 
 struct vmmaphd {
 	struct vmmap *lh_first;
 };
 #define	VMMAP_HASH_SIZE	64
 #define	VMMAP_HASH_MASK	(VMMAP_HASH_SIZE - 1)
 #define	VM_HASH(addr)	((uintptr_t)(addr) >> PAGE_SHIFT) & VMMAP_HASH_MASK
 static struct vmmaphd vmmaphead[VMMAP_HASH_SIZE];
 static struct mtx vmmaplock;
 
 static void
 vmmap_add(void *addr, unsigned long size)
 {
 	struct vmmap *vmmap;
 
 	vmmap = kmalloc(sizeof(*vmmap), GFP_KERNEL);
 	mtx_lock(&vmmaplock);
 	vmmap->vm_size = size;
 	vmmap->vm_addr = addr;
 	LIST_INSERT_HEAD(&vmmaphead[VM_HASH(addr)], vmmap, vm_next);
 	mtx_unlock(&vmmaplock);
 }
 
 static struct vmmap *
 vmmap_remove(void *addr)
 {
 	struct vmmap *vmmap;
 
 	mtx_lock(&vmmaplock);
 	LIST_FOREACH(vmmap, &vmmaphead[VM_HASH(addr)], vm_next)
 		if (vmmap->vm_addr == addr)
 			break;
 	if (vmmap)
 		LIST_REMOVE(vmmap, vm_next);
 	mtx_unlock(&vmmaplock);
 
 	return (vmmap);
 }
 
 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__) || defined(__aarch64__) || defined(__riscv)
 void *
 _ioremap_attr(vm_paddr_t phys_addr, unsigned long size, int attr)
 {
 	void *addr;
 
 	addr = pmap_mapdev_attr(phys_addr, size, attr);
 	if (addr == NULL)
 		return (NULL);
 	vmmap_add(addr, size);
 
 	return (addr);
 }
 #endif
 
 void
 iounmap(void *addr)
 {
 	struct vmmap *vmmap;
 
 	vmmap = vmmap_remove(addr);
 	if (vmmap == NULL)
 		return;
 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__) || defined(__aarch64__) || defined(__riscv)
 	pmap_unmapdev((vm_offset_t)addr, vmmap->vm_size);
 #endif
 	kfree(vmmap);
 }
 
 void *
 vmap(struct page **pages, unsigned int count, unsigned long flags, int prot)
 {
 	vm_offset_t off;
 	size_t size;
 
 	size = count * PAGE_SIZE;
 	off = kva_alloc(size);
 	if (off == 0)
 		return (NULL);
 	vmmap_add((void *)off, size);
 	pmap_qenter(off, pages, count);
 
 	return ((void *)off);
 }
 
 void
 vunmap(void *addr)
 {
 	struct vmmap *vmmap;
 
 	vmmap = vmmap_remove(addr);
 	if (vmmap == NULL)
 		return;
 	pmap_qremove((vm_offset_t)addr, vmmap->vm_size / PAGE_SIZE);
 	kva_free((vm_offset_t)addr, vmmap->vm_size);
 	kfree(vmmap);
 }
 
 static char *
 devm_kvasprintf(struct device *dev, gfp_t gfp, const char *fmt, va_list ap)
 {
 	unsigned int len;
 	char *p;
 	va_list aq;
 
 	va_copy(aq, ap);
 	len = vsnprintf(NULL, 0, fmt, aq);
 	va_end(aq);
 
 	if (dev != NULL)
 		p = devm_kmalloc(dev, len + 1, gfp);
 	else
 		p = kmalloc(len + 1, gfp);
 	if (p != NULL)
 		vsnprintf(p, len + 1, fmt, ap);
 
 	return (p);
 }
 
 char *
 kvasprintf(gfp_t gfp, const char *fmt, va_list ap)
 {
 
 	return (devm_kvasprintf(NULL, gfp, fmt, ap));
 }
 
 char *
 lkpi_devm_kasprintf(struct device *dev, gfp_t gfp, const char *fmt, ...)
 {
 	va_list ap;
 	char *p;
 
 	va_start(ap, fmt);
 	p = devm_kvasprintf(dev, gfp, fmt, ap);
 	va_end(ap);
 
 	return (p);
 }
 
 char *
 kasprintf(gfp_t gfp, const char *fmt, ...)
 {
 	va_list ap;
 	char *p;
 
 	va_start(ap, fmt);
 	p = kvasprintf(gfp, fmt, ap);
 	va_end(ap);
 
 	return (p);
 }
 
 static void
 linux_timer_callback_wrapper(void *context)
 {
 	struct timer_list *timer;
 
 	timer = context;
 
 	if (linux_set_current_flags(curthread, M_NOWAIT)) {
 		/* try again later */
 		callout_reset(&timer->callout, 1,
 		    &linux_timer_callback_wrapper, timer);
 		return;
 	}
 
 	timer->function(timer->data);
 }
 
 int
 mod_timer(struct timer_list *timer, int expires)
 {
 	int ret;
 
 	timer->expires = expires;
 	ret = callout_reset(&timer->callout,
 	    linux_timer_jiffies_until(expires),
 	    &linux_timer_callback_wrapper, timer);
 
 	MPASS(ret == 0 || ret == 1);
 
 	return (ret == 1);
 }
 
 void
 add_timer(struct timer_list *timer)
 {
 
 	callout_reset(&timer->callout,
 	    linux_timer_jiffies_until(timer->expires),
 	    &linux_timer_callback_wrapper, timer);
 }
 
 void
 add_timer_on(struct timer_list *timer, int cpu)
 {
 
 	callout_reset_on(&timer->callout,
 	    linux_timer_jiffies_until(timer->expires),
 	    &linux_timer_callback_wrapper, timer, cpu);
 }
 
 int
 del_timer(struct timer_list *timer)
 {
 
 	if (callout_stop(&(timer)->callout) == -1)
 		return (0);
 	return (1);
 }
 
 int
 del_timer_sync(struct timer_list *timer)
 {
 
 	if (callout_drain(&(timer)->callout) == -1)
 		return (0);
 	return (1);
 }
 
 /* greatest common divisor, Euclid equation */
 static uint64_t
 lkpi_gcd_64(uint64_t a, uint64_t b)
 {
 	uint64_t an;
 	uint64_t bn;
 
 	while (b != 0) {
 		an = b;
 		bn = a % b;
 		a = an;
 		b = bn;
 	}
 	return (a);
 }
 
 uint64_t lkpi_nsec2hz_rem;
 uint64_t lkpi_nsec2hz_div = 1000000000ULL;
 uint64_t lkpi_nsec2hz_max;
 
 uint64_t lkpi_usec2hz_rem;
 uint64_t lkpi_usec2hz_div = 1000000ULL;
 uint64_t lkpi_usec2hz_max;
 
 uint64_t lkpi_msec2hz_rem;
 uint64_t lkpi_msec2hz_div = 1000ULL;
 uint64_t lkpi_msec2hz_max;
 
 static void
 linux_timer_init(void *arg)
 {
 	uint64_t gcd;
 
 	/*
 	 * Compute an internal HZ value which can divide 2**32 to
 	 * avoid timer rounding problems when the tick value wraps
 	 * around 2**32:
 	 */
 	linux_timer_hz_mask = 1;
 	while (linux_timer_hz_mask < (unsigned long)hz)
 		linux_timer_hz_mask *= 2;
 	linux_timer_hz_mask--;
 
 	/* compute some internal constants */
 
 	lkpi_nsec2hz_rem = hz;
 	lkpi_usec2hz_rem = hz;
 	lkpi_msec2hz_rem = hz;
 
 	gcd = lkpi_gcd_64(lkpi_nsec2hz_rem, lkpi_nsec2hz_div);
 	lkpi_nsec2hz_rem /= gcd;
 	lkpi_nsec2hz_div /= gcd;
 	lkpi_nsec2hz_max = -1ULL / lkpi_nsec2hz_rem;
 
 	gcd = lkpi_gcd_64(lkpi_usec2hz_rem, lkpi_usec2hz_div);
 	lkpi_usec2hz_rem /= gcd;
 	lkpi_usec2hz_div /= gcd;
 	lkpi_usec2hz_max = -1ULL / lkpi_usec2hz_rem;
 
 	gcd = lkpi_gcd_64(lkpi_msec2hz_rem, lkpi_msec2hz_div);
 	lkpi_msec2hz_rem /= gcd;
 	lkpi_msec2hz_div /= gcd;
 	lkpi_msec2hz_max = -1ULL / lkpi_msec2hz_rem;
 }
 SYSINIT(linux_timer, SI_SUB_DRIVERS, SI_ORDER_FIRST, linux_timer_init, NULL);
 
 void
 linux_complete_common(struct completion *c, int all)
 {
 	int wakeup_swapper;
 
 	sleepq_lock(c);
 	if (all) {
 		c->done = UINT_MAX;
 		wakeup_swapper = sleepq_broadcast(c, SLEEPQ_SLEEP, 0, 0);
 	} else {
 		if (c->done != UINT_MAX)
 			c->done++;
 		wakeup_swapper = sleepq_signal(c, SLEEPQ_SLEEP, 0, 0);
 	}
 	sleepq_release(c);
 	if (wakeup_swapper)
 		kick_proc0();
 }
 
 /*
  * Indefinite wait for done != 0 with or without signals.
  */
 int
 linux_wait_for_common(struct completion *c, int flags)
 {
 	struct task_struct *task;
 	int error;
 
 	if (SCHEDULER_STOPPED())
 		return (0);
 
 	task = current;
 
 	if (flags != 0)
 		flags = SLEEPQ_INTERRUPTIBLE | SLEEPQ_SLEEP;
 	else
 		flags = SLEEPQ_SLEEP;
 	error = 0;
 	for (;;) {
 		sleepq_lock(c);
 		if (c->done)
 			break;
 		sleepq_add(c, NULL, "completion", flags, 0);
 		if (flags & SLEEPQ_INTERRUPTIBLE) {
 			DROP_GIANT();
 			error = -sleepq_wait_sig(c, 0);
 			PICKUP_GIANT();
 			if (error != 0) {
 				linux_schedule_save_interrupt_value(task, error);
 				error = -ERESTARTSYS;
 				goto intr;
 			}
 		} else {
 			DROP_GIANT();
 			sleepq_wait(c, 0);
 			PICKUP_GIANT();
 		}
 	}
 	if (c->done != UINT_MAX)
 		c->done--;
 	sleepq_release(c);
 
 intr:
 	return (error);
 }
 
 /*
  * Time limited wait for done != 0 with or without signals.
  */
 int
 linux_wait_for_timeout_common(struct completion *c, int timeout, int flags)
 {
 	struct task_struct *task;
 	int end = jiffies + timeout;
 	int error;
 
 	if (SCHEDULER_STOPPED())
 		return (0);
 
 	task = current;
 
 	if (flags != 0)
 		flags = SLEEPQ_INTERRUPTIBLE | SLEEPQ_SLEEP;
 	else
 		flags = SLEEPQ_SLEEP;
 
 	for (;;) {
 		sleepq_lock(c);
 		if (c->done)
 			break;
 		sleepq_add(c, NULL, "completion", flags, 0);
 		sleepq_set_timeout(c, linux_timer_jiffies_until(end));
 
 		DROP_GIANT();
 		if (flags & SLEEPQ_INTERRUPTIBLE)
 			error = -sleepq_timedwait_sig(c, 0);
 		else
 			error = -sleepq_timedwait(c, 0);
 		PICKUP_GIANT();
 
 		if (error != 0) {
 			/* check for timeout */
 			if (error == -EWOULDBLOCK) {
 				error = 0;	/* timeout */
 			} else {
 				/* signal happened */
 				linux_schedule_save_interrupt_value(task, error);
 				error = -ERESTARTSYS;
 			}
 			goto done;
 		}
 	}
 	if (c->done != UINT_MAX)
 		c->done--;
 	sleepq_release(c);
 
 	/* return how many jiffies are left */
 	error = linux_timer_jiffies_until(end);
 done:
 	return (error);
 }
 
 int
 linux_try_wait_for_completion(struct completion *c)
 {
 	int isdone;
 
 	sleepq_lock(c);
 	isdone = (c->done != 0);
 	if (c->done != 0 && c->done != UINT_MAX)
 		c->done--;
 	sleepq_release(c);
 	return (isdone);
 }
 
 int
 linux_completion_done(struct completion *c)
 {
 	int isdone;
 
 	sleepq_lock(c);
 	isdone = (c->done != 0);
 	sleepq_release(c);
 	return (isdone);
 }
 
 static void
 linux_cdev_deref(struct linux_cdev *ldev)
 {
 	if (refcount_release(&ldev->refs) &&
 	    ldev->kobj.ktype == &linux_cdev_ktype)
 		kfree(ldev);
 }
 
 static void
 linux_cdev_release(struct kobject *kobj)
 {
 	struct linux_cdev *cdev;
 	struct kobject *parent;
 
 	cdev = container_of(kobj, struct linux_cdev, kobj);
 	parent = kobj->parent;
 	linux_destroy_dev(cdev);
 	linux_cdev_deref(cdev);
 	kobject_put(parent);
 }
 
 static void
 linux_cdev_static_release(struct kobject *kobj)
 {
 	struct cdev *cdev;
 	struct linux_cdev *ldev;
 
 	ldev = container_of(kobj, struct linux_cdev, kobj);
 	cdev = ldev->cdev;
 	if (cdev != NULL) {
 		destroy_dev(cdev);
 		ldev->cdev = NULL;
 	}
 	kobject_put(kobj->parent);
 }
 
 int
 linux_cdev_device_add(struct linux_cdev *ldev, struct device *dev)
 {
 	int ret;
 
 	if (dev->devt != 0) {
 		/* Set parent kernel object. */
 		ldev->kobj.parent = &dev->kobj;
 
 		/*
 		 * Unlike Linux we require the kobject of the
 		 * character device structure to have a valid name
 		 * before calling this function:
 		 */
 		if (ldev->kobj.name == NULL)
 			return (-EINVAL);
 
 		ret = cdev_add(ldev, dev->devt, 1);
 		if (ret)
 			return (ret);
 	}
 	ret = device_add(dev);
 	if (ret != 0 && dev->devt != 0)
 		cdev_del(ldev);
 	return (ret);
 }
 
 void
 linux_cdev_device_del(struct linux_cdev *ldev, struct device *dev)
 {
 	device_del(dev);
 
 	if (dev->devt != 0)
 		cdev_del(ldev);
 }
 
 static void
 linux_destroy_dev(struct linux_cdev *ldev)
 {
 
 	if (ldev->cdev == NULL)
 		return;
 
 	MPASS((ldev->siref & LDEV_SI_DTR) == 0);
 	MPASS(ldev->kobj.ktype == &linux_cdev_ktype);
 
 	atomic_set_int(&ldev->siref, LDEV_SI_DTR);
 	while ((atomic_load_int(&ldev->siref) & ~LDEV_SI_DTR) != 0)
 		pause("ldevdtr", hz / 4);
 
 	destroy_dev(ldev->cdev);
 	ldev->cdev = NULL;
 }
 
 const struct kobj_type linux_cdev_ktype = {
 	.release = linux_cdev_release,
 };
 
 const struct kobj_type linux_cdev_static_ktype = {
 	.release = linux_cdev_static_release,
 };
 
 static void
 linux_handle_ifnet_link_event(void *arg, struct ifnet *ifp, int linkstate)
 {
 	struct notifier_block *nb;
 	struct netdev_notifier_info ni;
 
 	nb = arg;
 	ni.ifp = ifp;
 	ni.dev = (struct net_device *)ifp;
 	if (linkstate == LINK_STATE_UP)
 		nb->notifier_call(nb, NETDEV_UP, &ni);
 	else
 		nb->notifier_call(nb, NETDEV_DOWN, &ni);
 }
 
 static void
 linux_handle_ifnet_arrival_event(void *arg, struct ifnet *ifp)
 {
 	struct notifier_block *nb;
 	struct netdev_notifier_info ni;
 
 	nb = arg;
 	ni.ifp = ifp;
 	ni.dev = (struct net_device *)ifp;
 	nb->notifier_call(nb, NETDEV_REGISTER, &ni);
 }
 
 static void
 linux_handle_ifnet_departure_event(void *arg, struct ifnet *ifp)
 {
 	struct notifier_block *nb;
 	struct netdev_notifier_info ni;
 
 	nb = arg;
 	ni.ifp = ifp;
 	ni.dev = (struct net_device *)ifp;
 	nb->notifier_call(nb, NETDEV_UNREGISTER, &ni);
 }
 
 static void
 linux_handle_iflladdr_event(void *arg, struct ifnet *ifp)
 {
 	struct notifier_block *nb;
 	struct netdev_notifier_info ni;
 
 	nb = arg;
 	ni.ifp = ifp;
 	ni.dev = (struct net_device *)ifp;
 	nb->notifier_call(nb, NETDEV_CHANGEADDR, &ni);
 }
 
 static void
 linux_handle_ifaddr_event(void *arg, struct ifnet *ifp)
 {
 	struct notifier_block *nb;
 	struct netdev_notifier_info ni;
 
 	nb = arg;
 	ni.ifp = ifp;
 	ni.dev = (struct net_device *)ifp;
 	nb->notifier_call(nb, NETDEV_CHANGEIFADDR, &ni);
 }
 
 int
 register_netdevice_notifier(struct notifier_block *nb)
 {
 
 	nb->tags[NETDEV_UP] = EVENTHANDLER_REGISTER(
 	    ifnet_link_event, linux_handle_ifnet_link_event, nb, 0);
 	nb->tags[NETDEV_REGISTER] = EVENTHANDLER_REGISTER(
 	    ifnet_arrival_event, linux_handle_ifnet_arrival_event, nb, 0);
 	nb->tags[NETDEV_UNREGISTER] = EVENTHANDLER_REGISTER(
 	    ifnet_departure_event, linux_handle_ifnet_departure_event, nb, 0);
 	nb->tags[NETDEV_CHANGEADDR] = EVENTHANDLER_REGISTER(
 	    iflladdr_event, linux_handle_iflladdr_event, nb, 0);
 
 	return (0);
 }
 
 int
 register_inetaddr_notifier(struct notifier_block *nb)
 {
 
 	nb->tags[NETDEV_CHANGEIFADDR] = EVENTHANDLER_REGISTER(
 	    ifaddr_event, linux_handle_ifaddr_event, nb, 0);
 	return (0);
 }
 
 int
 unregister_netdevice_notifier(struct notifier_block *nb)
 {
 
 	EVENTHANDLER_DEREGISTER(ifnet_link_event,
 	    nb->tags[NETDEV_UP]);
 	EVENTHANDLER_DEREGISTER(ifnet_arrival_event,
 	    nb->tags[NETDEV_REGISTER]);
 	EVENTHANDLER_DEREGISTER(ifnet_departure_event,
 	    nb->tags[NETDEV_UNREGISTER]);
 	EVENTHANDLER_DEREGISTER(iflladdr_event,
 	    nb->tags[NETDEV_CHANGEADDR]);
 
 	return (0);
 }
 
 int
 unregister_inetaddr_notifier(struct notifier_block *nb)
 {
 
 	EVENTHANDLER_DEREGISTER(ifaddr_event,
 	    nb->tags[NETDEV_CHANGEIFADDR]);
 
 	return (0);
 }
 
 struct list_sort_thunk {
 	int (*cmp)(void *, struct list_head *, struct list_head *);
 	void *priv;
 };
 
 static inline int
 linux_le_cmp(void *priv, const void *d1, const void *d2)
 {
 	struct list_head *le1, *le2;
 	struct list_sort_thunk *thunk;
 
 	thunk = priv;
 	le1 = *(__DECONST(struct list_head **, d1));
 	le2 = *(__DECONST(struct list_head **, d2));
 	return ((thunk->cmp)(thunk->priv, le1, le2));
 }
 
 void
 list_sort(void *priv, struct list_head *head, int (*cmp)(void *priv,
     struct list_head *a, struct list_head *b))
 {
 	struct list_sort_thunk thunk;
 	struct list_head **ar, *le;
 	size_t count, i;
 
 	count = 0;
 	list_for_each(le, head)
 		count++;
 	ar = malloc(sizeof(struct list_head *) * count, M_KMALLOC, M_WAITOK);
 	i = 0;
 	list_for_each(le, head)
 		ar[i++] = le;
 	thunk.cmp = cmp;
 	thunk.priv = priv;
 	qsort_r(ar, count, sizeof(struct list_head *), &thunk, linux_le_cmp);
 	INIT_LIST_HEAD(head);
 	for (i = 0; i < count; i++)
 		list_add_tail(ar[i], head);
 	free(ar, M_KMALLOC);
 }
 
 #if defined(__i386__) || defined(__amd64__)
 int
 linux_wbinvd_on_all_cpus(void)
 {
 
 	pmap_invalidate_cache();
 	return (0);
 }
 #endif
 
 int
 linux_on_each_cpu(void callback(void *), void *data)
 {
 
 	smp_rendezvous(smp_no_rendezvous_barrier, callback,
 	    smp_no_rendezvous_barrier, data);
 	return (0);
 }
 
 int
 linux_in_atomic(void)
 {
 
 	return ((curthread->td_pflags & TDP_NOFAULTING) != 0);
 }
 
 struct linux_cdev *
 linux_find_cdev(const char *name, unsigned major, unsigned minor)
 {
 	dev_t dev = MKDEV(major, minor);
 	struct cdev *cdev;
 
 	dev_lock();
 	LIST_FOREACH(cdev, &linuxcdevsw.d_devs, si_list) {
 		struct linux_cdev *ldev = cdev->si_drv1;
 		if (ldev->dev == dev &&
 		    strcmp(kobject_name(&ldev->kobj), name) == 0) {
 			break;
 		}
 	}
 	dev_unlock();
 
 	return (cdev != NULL ? cdev->si_drv1 : NULL);
 }
 
 int
 __register_chrdev(unsigned int major, unsigned int baseminor,
     unsigned int count, const char *name,
     const struct file_operations *fops)
 {
 	struct linux_cdev *cdev;
 	int ret = 0;
 	int i;
 
 	for (i = baseminor; i < baseminor + count; i++) {
 		cdev = cdev_alloc();
 		cdev->ops = fops;
 		kobject_set_name(&cdev->kobj, name);
 
 		ret = cdev_add(cdev, makedev(major, i), 1);
 		if (ret != 0)
 			break;
 	}
 	return (ret);
 }
 
 int
 __register_chrdev_p(unsigned int major, unsigned int baseminor,
     unsigned int count, const char *name,
     const struct file_operations *fops, uid_t uid,
     gid_t gid, int mode)
 {
 	struct linux_cdev *cdev;
 	int ret = 0;
 	int i;
 
 	for (i = baseminor; i < baseminor + count; i++) {
 		cdev = cdev_alloc();
 		cdev->ops = fops;
 		kobject_set_name(&cdev->kobj, name);
 
 		ret = cdev_add_ext(cdev, makedev(major, i), uid, gid, mode);
 		if (ret != 0)
 			break;
 	}
 	return (ret);
 }
 
 void
 __unregister_chrdev(unsigned int major, unsigned int baseminor,
     unsigned int count, const char *name)
 {
 	struct linux_cdev *cdevp;
 	int i;
 
 	for (i = baseminor; i < baseminor + count; i++) {
 		cdevp = linux_find_cdev(name, major, i);
 		if (cdevp != NULL)
 			cdev_del(cdevp);
 	}
 }
 
 void
 linux_dump_stack(void)
 {
 #ifdef STACK
 	struct stack st;
 
-	stack_zero(&st);
 	stack_save(&st);
 	stack_print(&st);
 #endif
 }
 
 int
 linuxkpi_net_ratelimit(void)
 {
 
 	return (ppsratecheck(&lkpi_net_lastlog, &lkpi_net_curpps,
 	   lkpi_net_maxpps));
 }
 
 #if defined(__i386__) || defined(__amd64__)
 bool linux_cpu_has_clflush;
 #endif
 
 static void
 linux_compat_init(void *arg)
 {
 	struct sysctl_oid *rootoid;
 	int i;
 
 #if defined(__i386__) || defined(__amd64__)
 	linux_cpu_has_clflush = (cpu_feature & CPUID_CLFSH);
 #endif
 	rw_init(&linux_vma_lock, "lkpi-vma-lock");
 
 	rootoid = SYSCTL_ADD_ROOT_NODE(NULL,
 	    OID_AUTO, "sys", CTLFLAG_RD|CTLFLAG_MPSAFE, NULL, "sys");
 	kobject_init(&linux_class_root, &linux_class_ktype);
 	kobject_set_name(&linux_class_root, "class");
 	linux_class_root.oidp = SYSCTL_ADD_NODE(NULL, SYSCTL_CHILDREN(rootoid),
 	    OID_AUTO, "class", CTLFLAG_RD|CTLFLAG_MPSAFE, NULL, "class");
 	kobject_init(&linux_root_device.kobj, &linux_dev_ktype);
 	kobject_set_name(&linux_root_device.kobj, "device");
 	linux_root_device.kobj.oidp = SYSCTL_ADD_NODE(NULL,
 	    SYSCTL_CHILDREN(rootoid), OID_AUTO, "device",
 	    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "device");
 	linux_root_device.bsddev = root_bus;
 	linux_class_misc.name = "misc";
 	class_register(&linux_class_misc);
 	INIT_LIST_HEAD(&pci_drivers);
 	INIT_LIST_HEAD(&pci_devices);
 	spin_lock_init(&pci_lock);
 	mtx_init(&vmmaplock, "IO Map lock", NULL, MTX_DEF);
 	for (i = 0; i < VMMAP_HASH_SIZE; i++)
 		LIST_INIT(&vmmaphead[i]);
 	init_waitqueue_head(&linux_bit_waitq);
 	init_waitqueue_head(&linux_var_waitq);
 
 	CPU_COPY(&all_cpus, &cpu_online_mask);
 }
 SYSINIT(linux_compat, SI_SUB_DRIVERS, SI_ORDER_SECOND, linux_compat_init, NULL);
 
 static void
 linux_compat_uninit(void *arg)
 {
 	linux_kobject_kfree_name(&linux_class_root);
 	linux_kobject_kfree_name(&linux_root_device.kobj);
 	linux_kobject_kfree_name(&linux_class_misc.kobj);
 
 	mtx_destroy(&vmmaplock);
 	spin_lock_destroy(&pci_lock);
 	rw_destroy(&linux_vma_lock);
 }
 SYSUNINIT(linux_compat, SI_SUB_DRIVERS, SI_ORDER_SECOND, linux_compat_uninit, NULL);
 
 /*
  * NOTE: Linux frequently uses "unsigned long" for pointer to integer
  * conversion and vice versa, where in FreeBSD "uintptr_t" would be
  * used. Assert these types have the same size, else some parts of the
  * LinuxKPI may not work like expected:
  */
 CTASSERT(sizeof(unsigned long) == sizeof(uintptr_t));
diff --git a/sys/dev/ocs_fc/ocs_os.c b/sys/dev/ocs_fc/ocs_os.c
index 4e4edea2e63f..bd55b6c0bdce 100644
--- a/sys/dev/ocs_fc/ocs_os.c
+++ b/sys/dev/ocs_fc/ocs_os.c
@@ -1,1045 +1,1044 @@
 /*-
  * Copyright (c) 2017 Broadcom. All rights reserved.
  * The term "Broadcom" refers to Broadcom Limited and/or its subsidiaries.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
  *
  * 1. Redistributions of source code must retain the above copyright notice,
  *    this list of conditions and the following disclaimer.
  *
  * 2. Redistributions in binary form must reproduce the above copyright notice,
  *    this list of conditions and the following disclaimer in the documentation
  *    and/or other materials provided with the distribution.
  *
  * 3. Neither the name of the copyright holder nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 /**
  * @file
  * Implementation of common BSD OS abstraction functions
  */
 
 #include "ocs.h"
 
 static MALLOC_DEFINE(M_OCS, "OCS", "OneCore Storage data");
 
 #include <dev/pci/pcireg.h>
 #include <dev/pci/pcivar.h>
 
 #include <machine/bus.h>
 
 callout_func_t	__ocs_callout;
 
 uint32_t
 ocs_config_read32(ocs_os_handle_t os, uint32_t reg)
 {
 	return pci_read_config(os->dev, reg, 4);
 }
 
 uint16_t
 ocs_config_read16(ocs_os_handle_t os, uint32_t reg)
 {
 	return pci_read_config(os->dev, reg, 2);
 }
 
 uint8_t
 ocs_config_read8(ocs_os_handle_t os, uint32_t reg)
 {
 	return pci_read_config(os->dev, reg, 1);
 }
 
 void
 ocs_config_write8(ocs_os_handle_t os, uint32_t reg, uint8_t val)
 {
 	return pci_write_config(os->dev, reg, val, 1);
 }
 
 void
 ocs_config_write16(ocs_os_handle_t os, uint32_t reg, uint16_t val)
 {
 	return pci_write_config(os->dev, reg, val, 2);
 }
 
 void
 ocs_config_write32(ocs_os_handle_t os, uint32_t reg, uint32_t val)
 {
 	return pci_write_config(os->dev, reg, val, 4);
 }
 
 /**
  * @ingroup os
  * @brief Read a 32bit PCI register
  *
  * The SLI documentation uses the term "register set" to describe one or more
  * PCI BARs which form a logical address. For example, a 64-bit address uses
  * two BARs, and thus constitute a register set.
  *
  * @param ocs Pointer to the driver's context
  * @param rset Register Set to use
  * @param off Offset from the base address of the Register Set
  *
  * @return register value
  */
 uint32_t
 ocs_reg_read32(ocs_t *ocs, uint32_t rset, uint32_t off)
 {
 	ocs_pci_reg_t		*reg = NULL;
 
 	reg = &ocs->reg[rset];
 
 	return bus_space_read_4(reg->btag, reg->bhandle, off);
 }
 
 /**
  * @ingroup os
  * @brief Read a 16bit PCI register
  *
  * The SLI documentation uses the term "register set" to describe one or more
  * PCI BARs which form a logical address. For example, a 64-bit address uses
  * two BARs, and thus constitute a register set.
  *
  * @param ocs Pointer to the driver's context
  * @param rset Register Set to use
  * @param off Offset from the base address of the Register Set
  *
  * @return register value
  */
 uint16_t
 ocs_reg_read16(ocs_t *ocs, uint32_t rset, uint32_t off)
 {
 	ocs_pci_reg_t		*reg = NULL;
 
 	reg = &ocs->reg[rset];
 
 	return bus_space_read_2(reg->btag, reg->bhandle, off);
 }
 
 /**
  * @ingroup os
  * @brief Read a 8bit PCI register
  *
  * The SLI documentation uses the term "register set" to describe one or more
  * PCI BARs which form a logical address. For example, a 64-bit address uses
  * two BARs, and thus constitute a register set.
  *
  * @param ocs Pointer to the driver's context
  * @param rset Register Set to use
  * @param off Offset from the base address of the Register Set
  *
  * @return register value
  */
 uint8_t
 ocs_reg_read8(ocs_t *ocs, uint32_t rset, uint32_t off)
 {
 	ocs_pci_reg_t		*reg = NULL;
 
 	reg = &ocs->reg[rset];
 
 	return bus_space_read_1(reg->btag, reg->bhandle, off);
 }
 
 /**
  * @ingroup os
  * @brief Write a 32bit PCI register
  *
  * The SLI documentation uses the term "register set" to describe one or more
  * PCI BARs which form a logical address. For example, a 64-bit address uses
  * two BARs, and thus constitute a register set.
  *
  * @param ocs Pointer to the driver's context
  * @param rset Register Set to use
  * @param off Offset from the base address of the Register Set
  * @param val Value to write
  *
  * @return none
  */
 void
 ocs_reg_write32(ocs_t *ocs, uint32_t rset, uint32_t off, uint32_t val)
 {
 	ocs_pci_reg_t		*reg = NULL;
 
 	reg = &ocs->reg[rset];
 
 	return bus_space_write_4(reg->btag, reg->bhandle, off, val);
 }
 
 /**
  * @ingroup os
  * @brief Write a 16-bit PCI register
  *
  * The SLI documentation uses the term "register set" to describe one or more
  * PCI BARs which form a logical address. For example, a 64-bit address uses
  * two BARs, and thus constitute a register set.
  *
  * @param ocs Pointer to the driver's context
  * @param rset Register Set to use
  * @param off Offset from the base address of the Register Set
  * @param val Value to write
  *
  * @return none
  */
 void
 ocs_reg_write16(ocs_t *ocs, uint32_t rset, uint32_t off, uint16_t val)
 {
 	ocs_pci_reg_t		*reg = NULL;
 
 	reg = &ocs->reg[rset];
 
 	return bus_space_write_2(reg->btag, reg->bhandle, off, val);
 }
 
 /**
  * @ingroup os
  * @brief Write a 8-bit PCI register
  *
  * The SLI documentation uses the term "register set" to describe one or more
  * PCI BARs which form a logical address. For example, a 64-bit address uses
  * two BARs, and thus constitute a register set.
  *
  * @param ocs Pointer to the driver's context
  * @param rset Register Set to use
  * @param off Offset from the base address of the Register Set
  * @param val Value to write
  *
  * @return none
  */
 void
 ocs_reg_write8(ocs_t *ocs, uint32_t rset, uint32_t off, uint8_t val)
 {
 	ocs_pci_reg_t		*reg = NULL;
 
 	reg = &ocs->reg[rset];
 
 	return bus_space_write_1(reg->btag, reg->bhandle, off, val);
 }
 
 /**
  * @ingroup os
  * @brief Allocate host memory
  *
  * @param os OS handle
  * @param size number of bytes to allocate
  * @param flags additional options
  *
  * @return pointer to allocated memory, NULL otherwise
  */
 void *
 ocs_malloc(ocs_os_handle_t os, size_t size, int32_t flags)
 {
 	if ((flags & OCS_M_NOWAIT) == 0) {
 		flags |= M_WAITOK;
 	}
 
 #ifndef OCS_DEBUG_MEMORY
 	return malloc(size, M_OCS, flags);
 #else
 	char nameb[80];
 	long offset = 0;
 	void *addr = malloc(size, M_OCS, flags);
 
 	linker_ddb_search_symbol_name(__builtin_return_address(1), nameb, sizeof(nameb), &offset);
 	printf("A: %p %ld @ %s+%#lx\n", addr, size, nameb, offset);
 
 	return addr;
 #endif
 }
 
 /**
  * @ingroup os
  * @brief Free host memory
  *
  * @param os OS handle
  * @param addr pointer to memory
  * @param size bytes to free
  *
  * @note size ignored in BSD
  */
 void
 ocs_free(ocs_os_handle_t os, void *addr, size_t size)
 {
 #ifndef OCS_DEBUG_MEMORY
 	free(addr, M_OCS);
 #else
 	printf("F: %p %ld\n", addr, size);
 	free(addr, M_OCS);
 #endif
 }
 
 /**
  * @brief Callback function provided to bus_dmamap_load
  *
  * Function loads the physical / bus address into the DMA descriptor. The caller
  * can detect a mapping failure if a descriptor's phys element is zero.
  *
  * @param arg Argument provided to bus_dmamap_load is a ocs_dma_t
  * @param seg Array of DMA segment(s), each describing segment's address and length
  * @param nseg Number of elements in array
  * @param error Indicates success (0) or failure of mapping
  */
 static void
 ocs_dma_load(void *arg, bus_dma_segment_t *seg, int nseg, int error)
 {
 	ocs_dma_t	*dma = arg;
 
 	if (error) {
 		printf("%s: error=%d\n", __func__, error);
 		dma->phys = 0;
 	} else {
 		dma->phys = seg->ds_addr;
 	}
 }
 
 /**
  * @ingroup os
  * @brief Free a DMA capable block of memory
  *
  * @param os Device abstraction
  * @param dma DMA descriptor for memory to be freed
  *
  * @return 0 if memory is de-allocated, -1 otherwise
  */
 int32_t
 ocs_dma_free(ocs_os_handle_t os, ocs_dma_t *dma)
 {
 	struct ocs_softc	*ocs = os;
 
 	if (!dma) {
 		device_printf(ocs->dev, "%s: bad parameter(s) dma=%p\n", __func__, dma);
 		return -1;
 	}
 
 	if (dma->size == 0) {
 		return 0;
 	}
 
 	if (dma->map) {
 		bus_dmamap_sync(dma->tag, dma->map, BUS_DMASYNC_POSTREAD |
 				BUS_DMASYNC_POSTWRITE);
 		bus_dmamap_unload(dma->tag, dma->map);
 	}
 
 	if (dma->virt) {
 		bus_dmamem_free(dma->tag, dma->virt, dma->map);
 		bus_dmamap_destroy(dma->tag, dma->map);
 	}
 	bus_dma_tag_destroy(dma->tag);
 
 	bzero(dma, sizeof(ocs_dma_t));
 
 	return 0;
 }
 
 /**
  * @ingroup os
  * @brief Allocate a DMA capable block of memory
  *
  * @param os Device abstraction
  * @param dma DMA descriptor containing results of memory allocation
  * @param size Size in bytes of desired allocation
  * @param align Alignment in bytes
  *
  * @return 0 on success, ENOMEM otherwise
  */
 int32_t
 ocs_dma_alloc(ocs_os_handle_t os, ocs_dma_t *dma, size_t size, size_t align)
 {
 	struct ocs_softc	*ocs = os;
 
 	if (!dma || !size) {
 		device_printf(ocs->dev, "%s bad parameter(s) dma=%p size=%zd\n",
 				__func__, dma, size);
 		return ENOMEM;
 	}
 
 	bzero(dma, sizeof(ocs_dma_t));
 
 	/* create a "tag" that describes the desired memory allocation */
 	if (bus_dma_tag_create(ocs->dmat, align, 0, BUS_SPACE_MAXADDR,
 				BUS_SPACE_MAXADDR, NULL, NULL,
 				size, 1, size, 0, NULL, NULL, &dma->tag)) {
 		device_printf(ocs->dev, "DMA tag allocation failed\n");
 		return ENOMEM;
 	}
 
 	dma->size = size;
 
 	/* allocate the memory */
 	if (bus_dmamem_alloc(dma->tag, &dma->virt, BUS_DMA_NOWAIT | BUS_DMA_COHERENT,
 				&dma->map)) {
 		device_printf(ocs->dev, "DMA memory allocation failed s=%zd a=%zd\n", size, align);
 		ocs_dma_free(ocs, dma);
 		return ENOMEM;
 	}
 
 	dma->alloc = dma->virt;
 
 	/* map virtual address to device visible address */
 	if (bus_dmamap_load(dma->tag, dma->map, dma->virt, dma->size, ocs_dma_load,
 				dma, 0)) {
 		device_printf(ocs->dev, "DMA memory load failed\n");
 		ocs_dma_free(ocs, dma);
 		return ENOMEM;
 	}
 
 	/* if the DMA map load callback fails, it sets the physical address to zero */
 	if (0 == dma->phys) {
 		device_printf(ocs->dev, "ocs_dma_load failed\n");
 		ocs_dma_free(ocs, dma);
 		return ENOMEM;
 	}
 
 	return 0;
 }
 
 /**
  * @ingroup os
  * @brief Synchronize the DMA buffer memory
  *
  * Ensures memory coherency between the CPU and device
  *
  * @param dma DMA descriptor of memory to synchronize
  * @param flags Describes direction of synchronization
  *   See BUS_DMA(9) for details
  *   - BUS_DMASYNC_PREWRITE
  *   - BUS_DMASYNC_POSTREAD
  */
 void
 ocs_dma_sync(ocs_dma_t *dma, uint32_t flags)
 {
 	bus_dmamap_sync(dma->tag, dma->map, flags);
 }
 
 int32_t
 ocs_dma_copy_in(ocs_dma_t *dma, void *buffer, uint32_t buffer_length)
 {
 	if (!dma)
 		return -1;
 	if (!buffer)
 		return -1;
 	if (buffer_length == 0)
 		return 0;
 	if (buffer_length > dma->size)
 		buffer_length = dma->size;
 	ocs_memcpy(dma->virt, buffer, buffer_length);
 	dma->len = buffer_length;
 	return buffer_length;
 }
 
 int32_t
 ocs_dma_copy_out(ocs_dma_t *dma, void *buffer, uint32_t buffer_length)
 {
 	if (!dma)
 		return -1;
 	if (!buffer)
 		return -1;
 	if (buffer_length == 0)
 		return 0;
 	if (buffer_length > dma->len)
 		buffer_length = dma->len;
 	ocs_memcpy(buffer, dma->virt, buffer_length);
 	return buffer_length;
 }
 
 /**
  * @ingroup os
  * @brief Initialize a lock
  *
  * @param lock lock to initialize
  * @param name string identifier for the lock
  */
 void
 ocs_lock_init(void *os, ocs_lock_t *lock, const char *name, ...)
 {
 	va_list ap;
 
 	va_start(ap, name);
 	ocs_vsnprintf(lock->name, MAX_LOCK_DESC_LEN, name, ap);
 	va_end(ap);
 
 	mtx_init(&lock->lock, lock->name, NULL, MTX_DEF);
 }
 
 /**
  * @brief Allocate a bit map
  *
  * For BSD, this is a simple character string
  *
  * @param n_bits number of bits in bit map
  *
  * @return pointer to the bit map, NULL on error
  */
 ocs_bitmap_t *
 ocs_bitmap_alloc(uint32_t n_bits)
 {
 
 	return malloc(bitstr_size(n_bits), M_OCS, M_ZERO | M_NOWAIT);
 }
 
 /**
  * @brief Free a bit map
  *
  * @param bitmap pointer to previously allocated bit map
  */
 void
 ocs_bitmap_free(ocs_bitmap_t *bitmap)
 {
 
 	free(bitmap, M_OCS);
 }
 
 /**
  * @brief find next unset bit and set it
  *
  * @param bitmap bit map to search
  * @param n_bits number of bits in map
  *
  * @return bit position or -1 if map is full
  */
 int32_t
 ocs_bitmap_find(ocs_bitmap_t *bitmap, uint32_t n_bits)
 {
 	int32_t		position = -1;
 
 	bit_ffc(bitmap, n_bits, &position);
 
 	if (-1 != position) {
 		bit_set(bitmap, position);
 	}
 
 	return position;
 }
 
 /**
  * @brief search for next (un)set bit
  *
  * @param bitmap bit map to search
  * @param set search for a set or unset bit
  * @param n_bits number of bits in map
  *
  * @return bit position or -1
  */
 int32_t
 ocs_bitmap_search(ocs_bitmap_t *bitmap, uint8_t set, uint32_t n_bits)
 {
 	int32_t		position;
 
 	if (!bitmap) {
 		return -1;
 	}
 
 	if (set) {
 		bit_ffs(bitmap, n_bits, &position);
 	} else {
 		bit_ffc(bitmap, n_bits, &position);
 	}
 
 	return position;
 }
 
 /**
  * @brief clear the specified bit
  *
  * @param bitmap pointer to bit map
  * @param bit bit number to clear
  */
 void
 ocs_bitmap_clear(ocs_bitmap_t *bitmap, uint32_t bit)
 {
 	bit_clear(bitmap, bit);
 }
 
 void _ocs_log(ocs_t *ocs, const char *func_name, int line, const char *fmt, ...)
 {
 	va_list ap;
 	char buf[256];
 	char *p = buf;
 
 	va_start(ap, fmt);
 
 	/* TODO: Add Current PID info here. */
 
 	p += snprintf(p, sizeof(buf) - (p - buf), "%s: ", DRV_NAME);
 	p += snprintf(p, sizeof(buf) - (p - buf), "%s:", func_name);
 	p += snprintf(p, sizeof(buf) - (p - buf), "%i:", line);
 	p += snprintf(p, sizeof(buf) - (p - buf), "%s:", (ocs != NULL) ? device_get_nameunit(ocs->dev) : "");
 	p += vsnprintf(p, sizeof(buf) - (p - buf), fmt, ap);
 
 	va_end(ap);
 
 	printf("%s", buf);
 }
 
 /**
  * @brief Common thread call function
  *
  * This is the common function called whenever a thread instantiated by ocs_thread_create() is started.
  * It captures the return value from the actual thread function and stashes it in the thread object, to
  * be later retrieved by ocs_thread_get_retval(), and calls kthread_exit(), the proscribed method to terminate
  * a thread.
  *
  * @param arg a pointer to the thread object
  *
  * @return none
  */
 
 static void
 ocs_thread_call_fctn(void *arg)
 {
 	ocs_thread_t *thread = arg;
 	thread->retval = (*thread->fctn)(thread->arg);
 	ocs_free(NULL, thread->name, ocs_strlen(thread->name+1));
 	kthread_exit();
 }
 
 /**
  * @brief Create a kernel thread
  *
  * Creates a kernel thread and optionally starts it.   If the thread is not immediately
  * started, ocs_thread_start() should be called at some later point.
  *
  * @param os OS handle
  * @param thread pointer to thread object
  * @param fctn function for thread to be begin executing
  * @param name text name to identify thread
  * @param arg application specific argument passed to thread function
  * @param start start option, OCS_THREAD_RUN will start the thread immediately,
  *			OCS_THREAD_CREATE will create but not start the thread
  *
  * @return returns 0 for success, a negative error code value for failure.
  */
 
 int32_t
 ocs_thread_create(ocs_os_handle_t os, ocs_thread_t *thread, ocs_thread_fctn fctn, const char *name, void *arg, ocs_thread_start_e start)
 {
 	int32_t rc = 0;
 
 	ocs_memset(thread, 0, sizeof(*thread));
 
 	thread->fctn = fctn;
 	thread->name = ocs_strdup(name);
 	if (thread->name == NULL) {
 		thread->name = "unknown";
 	}
 	thread->arg = arg;
 
 	ocs_atomic_set(&thread->terminate, 0);
 
 	rc = kthread_add(ocs_thread_call_fctn, thread, NULL, &thread->tcb, (start == OCS_THREAD_CREATE) ? RFSTOPPED : 0,
 		OCS_THREAD_DEFAULT_STACK_SIZE_PAGES, "%s", name);
 
 	return rc;
 }
 
 /**
  * @brief Start a thread
  *
  * Starts a thread that was created with OCS_THREAD_CREATE rather than OCS_THREAD_RUN
  *
  * @param thread pointer to thread object
  *
  * @return returns 0 for success, a negative error code value for failure.
  */
 
 int32_t ocs_thread_start(ocs_thread_t *thread)
 {
 
 	thread_lock(thread->tcb);
 	sched_add(thread->tcb, SRQ_BORING);
 	return 0;
 }
 
 /**
  * @brief return thread argument
  *
  * Returns a pointer to the thread's application specific argument
  *
  * @param mythread pointer to the thread object
  *
  * @return pointer to application specific argument
  */
 
 void *ocs_thread_get_arg(ocs_thread_t *mythread)
 {
 	return mythread->arg;
 }
 
 /**
  * @brief Request thread stop
  *
  * A stop request is made to the thread.  This is a voluntary call, the thread needs
  * to periodically query its terminate request using ocs_thread_terminate_requested()
  *
  * @param thread pointer to thread object
  *
  * @return returns 0 for success, a negative error code value for failure.
  */
 
 int32_t
 ocs_thread_terminate(ocs_thread_t *thread)
 {
 	ocs_atomic_set(&thread->terminate, 1);
 	return 0;
 }
 
 /**
  * @brief See if a terminate request has been made
  *
  * Check to see if a stop request has been made to the current thread.  This
  * function would be used by a thread to see if it should terminate.
  *
  * @return returns non-zero if a stop has been requested
  */
 
 int32_t ocs_thread_terminate_requested(ocs_thread_t *thread)
 {
 	return ocs_atomic_read(&thread->terminate);
 }
 
 /**
  * @brief Retrieve threads return value
  *
  * After a thread has terminated, it's return value may be retrieved with this function.
  *
  * @param thread pointer to thread object
  *
  * @return return value from thread function
  */
 
 int32_t
 ocs_thread_get_retval(ocs_thread_t *thread)
 {
 	return thread->retval;
 }
 
 /**
  * @brief Request that the currently running thread yield
  *
  * The currently running thread yields to the scheduler
  *
  * @param thread pointer to thread (ignored)
  *
  * @return none
  */
 
 void
 ocs_thread_yield(ocs_thread_t *thread) {
 	pause("thread yield", 1);
 }
 
 ocs_thread_t *
 ocs_thread_self(void)
 {
 	ocs_printf(">>> %s not implemented\n", __func__);
 	ocs_abort();
 }
 
 int32_t
 ocs_thread_setcpu(ocs_thread_t *thread, uint32_t cpu)
 {
 	ocs_printf(">>> %s not implemented\n", __func__);
 	return -1;
 }
 
 int32_t
 ocs_thread_getcpu(void)
 {
 	return curcpu;
 }
 
 int
 ocs_sem_init(ocs_sem_t *sem, int val, const char *name, ...)
 {
 	va_list ap;
 
 	va_start(ap, name);
 	ocs_vsnprintf(sem->name, sizeof(sem->name), name, ap);
 	va_end(ap);
 
 	sema_init(&sem->sem, val, sem->name);
 	return 0;
 }
 
 /**
  * @ingroup os
  * @brief  Copy user arguments in to kernel space for an ioctl
  * @par Description
  * This function is called at the beginning of an ioctl function
  * to copy the ioctl argument from user space to kernel space.
  *
  * BSD handles this for us - arg is already in kernel space,
  * so we just return it.
  *
  * @param os OS handle
  * @param arg The argument passed to the ioctl function
  * @param size The size of the structure pointed to by arg
  *
  * @return A pointer to a kernel space copy of the argument on
  *	success; NULL on failure
  */
 void *ocs_ioctl_preprocess(ocs_os_handle_t os, void *arg, size_t size)
 {
 	 return arg;
 }
 
 /**
  * @ingroup os
  * @brief  Copy results of an ioctl back to user space
  * @par Description
  * This function is called at the end of ioctl processing to
  * copy the argument back to user space.
  *
  * BSD handles this for us.
  *
  * @param os OS handle
  * @param arg The argument passed to the ioctl function
  * @param kern_ptr A pointer to the kernel space copy of the
  *		   argument
  * @param size The size of the structure pointed to by arg.
  *
  * @return Returns 0.
  */
 int32_t ocs_ioctl_postprocess(ocs_os_handle_t os, void *arg, void *kern_ptr, size_t size)
 {
 	return 0;
 }
 
 /**
  * @ingroup os
  * @brief  Free memory allocated by ocs_ioctl_preprocess
  * @par Description
  * This function is called in the event of an error in ioctl
  * processing.  For operating environments where ocs_ioctlpreprocess
  * allocates memory, this call frees the memory without copying
  * results back to user space.
  *
  * For BSD, because no memory was allocated in ocs_ioctl_preprocess,
  * nothing needs to be done here.
  *
  * @param os OS handle
  * @param kern_ptr A pointer to the kernel space copy of the
  *		   argument
  * @param size The size of the structure pointed to by arg.
  *
  * @return Returns nothing.
  */
 void ocs_ioctl_free(ocs_os_handle_t os, void *kern_ptr, size_t size)
 {
 	return;
 }
 
 void ocs_intr_disable(ocs_os_handle_t os)
 {
 }
 
 void ocs_intr_enable(ocs_os_handle_t os)
 {
 }
 
 void ocs_print_stack(void)
 {
 #if defined(STACK)
 	struct stack st;
 
-	stack_zero(&st);
 	stack_save(&st);
 	stack_print(&st);
 #endif
 }
 
 void ocs_abort(void)
 {
 	panic(">>> abort/panic\n");
 }
 
 const char *
 ocs_pci_model(uint16_t vendor, uint16_t device)
 {
 	switch (device) {
 	case PCI_PRODUCT_EMULEX_OCE16002:	return "OCE16002";
 	case PCI_PRODUCT_EMULEX_OCE1600_VF:	return "OCE1600_VF";
 	case PCI_PRODUCT_EMULEX_OCE50102:	return "OCE50102";
 	case PCI_PRODUCT_EMULEX_OCE50102_VF:	return "OCE50102_VR";
 	default:
 		break;
 	}
 
 	return "unknown";
 }
 
 void
 ocs_get_bus_dev_func(ocs_t *ocs, uint8_t* bus, uint8_t* dev, uint8_t* func)
 {
 	*bus = pci_get_bus(ocs->dev);
 	*dev = pci_get_slot(ocs->dev);
 	*func= pci_get_function(ocs->dev);
 }
 
 /**
  * @brief return CPU information
  *
  * This function populates the ocs_cpuinfo_t buffer with CPU information
  *
  * @param cpuinfo pointer to ocs_cpuinfo_t buffer
  *
  * @return returns 0 for success, a negative error code value for failure.
  */
 extern int mp_ncpus;
 int32_t
 ocs_get_cpuinfo(ocs_cpuinfo_t *cpuinfo)
 {
 	cpuinfo->num_cpus = mp_ncpus;
 	return 0;
 }
 
 uint32_t
 ocs_get_num_cpus(void)
 {
 	static ocs_cpuinfo_t cpuinfo;
 
 	if (cpuinfo.num_cpus == 0) {
 		ocs_get_cpuinfo(&cpuinfo);
 	}
 	return cpuinfo.num_cpus;
 }
 
 void
 __ocs_callout(void *t)
 {
 	ocs_timer_t *timer = t;
 
 	if (callout_pending(&timer->callout)) {
 		/* Callout was reset */
 		return;
 	}
 
 	if (!callout_active(&timer->callout)) {
 		/* Callout was stopped */
 		return;
 	}
 
 	callout_deactivate(&timer->callout);
 
 	if (timer->func) {
 		timer->func(timer->data);
 	}
 }
 
 int32_t
 ocs_setup_timer(ocs_os_handle_t os, ocs_timer_t *timer, void(*func)(void *arg), void *data, uint32_t timeout_ms)
 {
 	struct	timeval tv;
 	int	hz;
 
 	if (timer == NULL) {
 		ocs_log_err(NULL, "bad parameter\n");
 		return -1;
 	}
 
 	if (!mtx_initialized(&timer->lock)) {
 		mtx_init(&timer->lock, "ocs_timer", NULL, MTX_DEF);
 	}
 
 	callout_init_mtx(&timer->callout, &timer->lock, 0);
 
 	timer->func = func;
 	timer->data = data;
 
 	tv.tv_sec  = timeout_ms / 1000;
 	tv.tv_usec = (timeout_ms % 1000) * 1000;
 
 	hz = tvtohz(&tv);
 	if (hz < 0)
 		hz = INT32_MAX;
 	if (hz == 0)
 		hz = 1;
 
 	mtx_lock(&timer->lock);
 		callout_reset(&timer->callout, hz, __ocs_callout, timer);
 	mtx_unlock(&timer->lock);
 
 	return 0;
 }
 
 int32_t
 ocs_mod_timer(ocs_timer_t *timer, uint32_t timeout_ms)
 {
 	struct	timeval tv;
 	int	hz;
 
 	if (timer == NULL) {
 		ocs_log_err(NULL, "bad parameter\n");
 		return -1;
 	}
 
 	tv.tv_sec  = timeout_ms / 1000;
 	tv.tv_usec = (timeout_ms % 1000) * 1000;
 
 	hz = tvtohz(&tv);
 	if (hz < 0)
 		hz = INT32_MAX;
 	if (hz == 0)
 		hz = 1;
 
 	mtx_lock(&timer->lock);
 		callout_reset(&timer->callout, hz, __ocs_callout, timer);
 	mtx_unlock(&timer->lock);
 
 	return 0;
 }
 
 int32_t
 ocs_timer_pending(ocs_timer_t *timer)
 {
 	return callout_active(&timer->callout);
 }
 
 int32_t
 ocs_del_timer(ocs_timer_t *timer)
 {
 
 	mtx_lock(&timer->lock);
 		callout_stop(&timer->callout);
 	mtx_unlock(&timer->lock);
 
 	return 0;
 }
 
 char *
 ocs_strdup(const char *s)
 {
 	uint32_t l = strlen(s);
 	char *d;
 
 	d = ocs_malloc(NULL, l+1, OCS_M_NOWAIT);
 	if (d != NULL) {
 		ocs_strcpy(d, s);
 	}
 	return d;
 }
 
 void
 _ocs_assert(const char *cond, const char *filename, int linenum)
 {
 	const char *fn = strrchr(__FILE__, '/');
 
 	ocs_log_err(NULL, "%s(%d) assertion (%s) failed\n", (fn ? fn + 1 : filename), linenum, cond);
 	ocs_print_stack();
 	ocs_save_ddump_all(OCS_DDUMP_FLAGS_WQES|OCS_DDUMP_FLAGS_CQES|OCS_DDUMP_FLAGS_MQES, -1, TRUE);
 }
diff --git a/sys/dev/xen/debug/debug.c b/sys/dev/xen/debug/debug.c
index c4562c02ed03..bd1d6101f392 100644
--- a/sys/dev/xen/debug/debug.c
+++ b/sys/dev/xen/debug/debug.c
@@ -1,157 +1,156 @@
 /*
  * Copyright (c) 2015 Roger Pau Monné <roger.pau@citrix.com>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_stack.h"
 #include "opt_ddb.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bus.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/module.h>
 #include <sys/mutex.h>
 #include <sys/pcpu.h>
 #include <sys/smp.h>
 #include <sys/stack.h>
 #include <sys/sbuf.h>
 
 #include <xen/xen-os.h>
 #include <xen/xen_intr.h>
 #include <xen/hypervisor.h>
 
 /*
  * Xen debug device
  *
  * Handles the VIRQ_DEBUG interrupt and prints the backtrace of each
  * vCPU on the Xen console.
  */
 
 DPCPU_DEFINE(xen_intr_handle_t, xendebug_handler);
 static struct mtx lock;
 static struct sbuf *buf;
 
 static int
 xendebug_drain(void *arg, const char *str, int len)
 {
 
 	HYPERVISOR_console_write(__DECONST(char *, str), len);
 	return (len);
 }
 
 extern void
 stack_capture(struct stack *st, register_t rbp);
 
 static int
 xendebug_filter(void *arg __unused)
 {
 #if defined(STACK) && defined(DDB)
 	struct stack st;
 
-	stack_zero(&st);
 	stack_save(&st);
 
 	mtx_lock_spin(&lock);
 	sbuf_clear(buf);
 	xc_printf("Printing stack trace vCPU%d\n", PCPU_GET(vcpu_id));
 	stack_sbuf_print_ddb(buf, &st);
 	sbuf_finish(buf);
 	mtx_unlock_spin(&lock);
 #endif
 
 	return (FILTER_HANDLED);
 }
 
 static void
 xendebug_identify(driver_t *driver, device_t parent)
 {
 
 	KASSERT(xen_domain(),
 	    ("Trying to add Xen debug device to non-xen guest"));
 
 	if (!xen_has_percpu_evtchn())
 		return;
 
 	if (BUS_ADD_CHILD(parent, 0, "debug", 0) == NULL)
 		panic("Unable to add Xen debug device.");
 }
 
 static int
 xendebug_probe(device_t dev)
 {
 
 	device_set_desc(dev, "Xen debug handler");
 	return (BUS_PROBE_NOWILDCARD);
 }
 
 static int
 xendebug_attach(device_t dev)
 {
 	int i, error;
 
 	mtx_init(&lock, "xen-dbg", NULL, MTX_SPIN);
 	buf = sbuf_new(NULL, NULL, 1024, SBUF_FIXEDLEN);
 	if (buf == NULL)
 		panic("Unable to create sbuf for stack dump");
 	sbuf_set_drain(buf, xendebug_drain, NULL);
 
 	/* Bind an event channel to a VIRQ on each VCPU. */
 	CPU_FOREACH(i) {
 		error = xen_intr_bind_virq(dev, VIRQ_DEBUG, i, xendebug_filter,
 		    NULL, NULL, INTR_TYPE_TTY,
 		    DPCPU_ID_PTR(i, xendebug_handler));
 		if (error != 0) {
 			printf("Failed to bind VIRQ_DEBUG to vCPU %d: %d",
 			    i, error);
 			continue;
 		}
 		xen_intr_describe(DPCPU_ID_GET(i, xendebug_handler), "d%d", i);
 	}
 
 	return (0);
 }
 
 static device_method_t xendebug_methods[] = {
 	DEVMETHOD(device_identify, xendebug_identify),
 	DEVMETHOD(device_probe, xendebug_probe),
 	DEVMETHOD(device_attach, xendebug_attach),
 
 	DEVMETHOD_END
 };
 
 static driver_t xendebug_driver = {
 	"debug",
 	xendebug_methods,
 	0,
 };
 
 devclass_t xendebug_devclass;
 
 DRIVER_MODULE(xendebug, xenpv, xendebug_driver, xendebug_devclass, 0, 0);
 MODULE_DEPEND(xendebug, xenpv, 1, 1, 1);
diff --git a/sys/kern/subr_epoch.c b/sys/kern/subr_epoch.c
index 74524fb3f97e..100221cd62f9 100644
--- a/sys/kern/subr_epoch.c
+++ b/sys/kern/subr_epoch.c
@@ -1,1022 +1,1021 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2018, Matthew Macy <mmacy@freebsd.org>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/counter.h>
 #include <sys/epoch.h>
 #include <sys/gtaskqueue.h>
 #include <sys/kernel.h>
 #include <sys/limits.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/pcpu.h>
 #include <sys/proc.h>
 #include <sys/sched.h>
 #include <sys/sx.h>
 #include <sys/smp.h>
 #include <sys/sysctl.h>
 #include <sys/turnstile.h>
 #ifdef EPOCH_TRACE
 #include <machine/stdarg.h>
 #include <sys/stack.h>
 #include <sys/tree.h>
 #endif
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_kern.h>
 #include <vm/uma.h>
 
 #include <ck_epoch.h>
 
 #ifdef __amd64__
 #define EPOCH_ALIGN CACHE_LINE_SIZE*2
 #else
 #define EPOCH_ALIGN CACHE_LINE_SIZE
 #endif
 
 TAILQ_HEAD (epoch_tdlist, epoch_tracker);
 typedef struct epoch_record {
 	ck_epoch_record_t er_record;
 	struct epoch_context er_drain_ctx;
 	struct epoch *er_parent;
 	volatile struct epoch_tdlist er_tdlist;
 	volatile uint32_t er_gen;
 	uint32_t er_cpuid;
 #ifdef INVARIANTS
 	/* Used to verify record ownership for non-preemptible epochs. */
 	struct thread *er_td;
 #endif
 } __aligned(EPOCH_ALIGN)     *epoch_record_t;
 
 struct epoch {
 	struct ck_epoch e_epoch __aligned(EPOCH_ALIGN);
 	epoch_record_t e_pcpu_record;
 	int	e_in_use;
 	int	e_flags;
 	struct sx e_drain_sx;
 	struct mtx e_drain_mtx;
 	volatile int e_drain_count;
 	const char *e_name;
 };
 
 /* arbitrary --- needs benchmarking */
 #define MAX_ADAPTIVE_SPIN 100
 #define MAX_EPOCHS 64
 
 CTASSERT(sizeof(ck_epoch_entry_t) == sizeof(struct epoch_context));
 SYSCTL_NODE(_kern, OID_AUTO, epoch, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "epoch information");
 SYSCTL_NODE(_kern_epoch, OID_AUTO, stats, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "epoch stats");
 
 /* Stats. */
 static counter_u64_t block_count;
 
 SYSCTL_COUNTER_U64(_kern_epoch_stats, OID_AUTO, nblocked, CTLFLAG_RW,
     &block_count, "# of times a thread was in an epoch when epoch_wait was called");
 static counter_u64_t migrate_count;
 
 SYSCTL_COUNTER_U64(_kern_epoch_stats, OID_AUTO, migrations, CTLFLAG_RW,
     &migrate_count, "# of times thread was migrated to another CPU in epoch_wait");
 static counter_u64_t turnstile_count;
 
 SYSCTL_COUNTER_U64(_kern_epoch_stats, OID_AUTO, ncontended, CTLFLAG_RW,
     &turnstile_count, "# of times a thread was blocked on a lock in an epoch during an epoch_wait");
 static counter_u64_t switch_count;
 
 SYSCTL_COUNTER_U64(_kern_epoch_stats, OID_AUTO, switches, CTLFLAG_RW,
     &switch_count, "# of times a thread voluntarily context switched in epoch_wait");
 static counter_u64_t epoch_call_count;
 
 SYSCTL_COUNTER_U64(_kern_epoch_stats, OID_AUTO, epoch_calls, CTLFLAG_RW,
     &epoch_call_count, "# of times a callback was deferred");
 static counter_u64_t epoch_call_task_count;
 
 SYSCTL_COUNTER_U64(_kern_epoch_stats, OID_AUTO, epoch_call_tasks, CTLFLAG_RW,
     &epoch_call_task_count, "# of times a callback task was run");
 
 TAILQ_HEAD (threadlist, thread);
 
 CK_STACK_CONTAINER(struct ck_epoch_entry, stack_entry,
     ck_epoch_entry_container)
 
 static struct epoch epoch_array[MAX_EPOCHS];
 
 DPCPU_DEFINE(struct grouptask, epoch_cb_task);
 DPCPU_DEFINE(int, epoch_cb_count);
 
 static __read_mostly int inited;
 __read_mostly epoch_t global_epoch;
 __read_mostly epoch_t global_epoch_preempt;
 
 static void epoch_call_task(void *context __unused);
 static 	uma_zone_t pcpu_zone_record;
 
 static struct sx epoch_sx;
 
 #define	EPOCH_LOCK() sx_xlock(&epoch_sx)
 #define	EPOCH_UNLOCK() sx_xunlock(&epoch_sx)
 
 static epoch_record_t
 epoch_currecord(epoch_t epoch)
 {
 
 	return (zpcpu_get(epoch->e_pcpu_record));
 }
 
 #ifdef EPOCH_TRACE
 struct stackentry {
 	RB_ENTRY(stackentry) se_node;
 	struct stack se_stack;
 };
 
 static int
 stackentry_compare(struct stackentry *a, struct stackentry *b)
 {
 
 	if (a->se_stack.depth > b->se_stack.depth)
 		return (1);
 	if (a->se_stack.depth < b->se_stack.depth)
 		return (-1);
 	for (int i = 0; i < a->se_stack.depth; i++) {
 		if (a->se_stack.pcs[i] > b->se_stack.pcs[i])
 			return (1);
 		if (a->se_stack.pcs[i] < b->se_stack.pcs[i])
 			return (-1);
 	}
 
 	return (0);
 }
 
 RB_HEAD(stacktree, stackentry) epoch_stacks = RB_INITIALIZER(&epoch_stacks);
 RB_GENERATE_STATIC(stacktree, stackentry, se_node, stackentry_compare);
 
 static struct mtx epoch_stacks_lock;
 MTX_SYSINIT(epochstacks, &epoch_stacks_lock, "epoch_stacks", MTX_DEF);
 
 static bool epoch_trace_stack_print = true;
 SYSCTL_BOOL(_kern_epoch, OID_AUTO, trace_stack_print, CTLFLAG_RWTUN,
     &epoch_trace_stack_print, 0, "Print stack traces on epoch reports");
 
 static void epoch_trace_report(const char *fmt, ...) __printflike(1, 2);
 static inline void
 epoch_trace_report(const char *fmt, ...)
 {
 	va_list ap;
 	struct stackentry se, *new;
 
-	stack_zero(&se.se_stack);	/* XXX: is it really needed? */
 	stack_save(&se.se_stack);
 
 	/* Tree is never reduced - go lockless. */
 	if (RB_FIND(stacktree, &epoch_stacks, &se) != NULL)
 		return;
 
 	new = malloc(sizeof(*new), M_STACK, M_NOWAIT);
 	if (new != NULL) {
 		bcopy(&se.se_stack, &new->se_stack, sizeof(struct stack));
 
 		mtx_lock(&epoch_stacks_lock);
 		new = RB_INSERT(stacktree, &epoch_stacks, new);
 		mtx_unlock(&epoch_stacks_lock);
 		if (new != NULL)
 			free(new, M_STACK);
 	}
 
 	va_start(ap, fmt);
 	(void)vprintf(fmt, ap);
 	va_end(ap);
 	if (epoch_trace_stack_print)
 		stack_print_ddb(&se.se_stack);
 }
 
 static inline void
 epoch_trace_enter(struct thread *td, epoch_t epoch, epoch_tracker_t et,
     const char *file, int line)
 {
 	epoch_tracker_t iet;
 
 	SLIST_FOREACH(iet, &td->td_epochs, et_tlink) {
 		if (iet->et_epoch != epoch)
 			continue;
 		epoch_trace_report("Recursively entering epoch %s "
 		    "at %s:%d, previously entered at %s:%d\n",
 		    epoch->e_name, file, line,
 		    iet->et_file, iet->et_line);
 	}
 	et->et_epoch = epoch;
 	et->et_file = file;
 	et->et_line = line;
 	et->et_flags = 0;
 	SLIST_INSERT_HEAD(&td->td_epochs, et, et_tlink);
 }
 
 static inline void
 epoch_trace_exit(struct thread *td, epoch_t epoch, epoch_tracker_t et,
     const char *file, int line)
 {
 
 	if (SLIST_FIRST(&td->td_epochs) != et) {
 		epoch_trace_report("Exiting epoch %s in a not nested order "
 		    "at %s:%d. Most recently entered %s at %s:%d\n",
 		    epoch->e_name,
 		    file, line,
 		    SLIST_FIRST(&td->td_epochs)->et_epoch->e_name,
 		    SLIST_FIRST(&td->td_epochs)->et_file,
 		    SLIST_FIRST(&td->td_epochs)->et_line);
 		/* This will panic if et is not anywhere on td_epochs. */
 		SLIST_REMOVE(&td->td_epochs, et, epoch_tracker, et_tlink);
 	} else
 		SLIST_REMOVE_HEAD(&td->td_epochs, et_tlink);
 	if (et->et_flags & ET_REPORT_EXIT)
 		printf("Td %p exiting epoch %s at %s:%d\n", td, epoch->e_name,
 		    file, line);
 }
 
 /* Used by assertions that check thread state before going to sleep. */
 void
 epoch_trace_list(struct thread *td)
 {
 	epoch_tracker_t iet;
 
 	SLIST_FOREACH(iet, &td->td_epochs, et_tlink)
 		printf("Epoch %s entered at %s:%d\n", iet->et_epoch->e_name,
 		    iet->et_file, iet->et_line);
 }
 
 void
 epoch_where_report(epoch_t epoch)
 {
 	epoch_record_t er;
 	struct epoch_tracker *tdwait;
 
 	MPASS(epoch != NULL);
 	MPASS((epoch->e_flags & EPOCH_PREEMPT) != 0);
 	MPASS(!THREAD_CAN_SLEEP());
 	critical_enter();
 	er = epoch_currecord(epoch);
 	TAILQ_FOREACH(tdwait, &er->er_tdlist, et_link)
 		if (tdwait->et_td == curthread)
 			break;
 	critical_exit();
 	if (tdwait != NULL) {
 		tdwait->et_flags |= ET_REPORT_EXIT;
 		printf("Td %p entered epoch %s at %s:%d\n", curthread,
 		    epoch->e_name, tdwait->et_file, tdwait->et_line);
 	}
 }
 #endif /* EPOCH_TRACE */
 
 static void
 epoch_init(void *arg __unused)
 {
 	int cpu;
 
 	block_count = counter_u64_alloc(M_WAITOK);
 	migrate_count = counter_u64_alloc(M_WAITOK);
 	turnstile_count = counter_u64_alloc(M_WAITOK);
 	switch_count = counter_u64_alloc(M_WAITOK);
 	epoch_call_count = counter_u64_alloc(M_WAITOK);
 	epoch_call_task_count = counter_u64_alloc(M_WAITOK);
 
 	pcpu_zone_record = uma_zcreate("epoch_record pcpu",
 	    sizeof(struct epoch_record), NULL, NULL, NULL, NULL,
 	    UMA_ALIGN_PTR, UMA_ZONE_PCPU);
 	CPU_FOREACH(cpu) {
 		GROUPTASK_INIT(DPCPU_ID_PTR(cpu, epoch_cb_task), 0,
 		    epoch_call_task, NULL);
 		taskqgroup_attach_cpu(qgroup_softirq,
 		    DPCPU_ID_PTR(cpu, epoch_cb_task), NULL, cpu, NULL, NULL,
 		    "epoch call task");
 	}
 #ifdef EPOCH_TRACE
 	SLIST_INIT(&thread0.td_epochs);
 #endif
 	sx_init(&epoch_sx, "epoch-sx");
 	inited = 1;
 	global_epoch = epoch_alloc("Global", 0);
 	global_epoch_preempt = epoch_alloc("Global preemptible", EPOCH_PREEMPT);
 }
 SYSINIT(epoch, SI_SUB_EPOCH, SI_ORDER_FIRST, epoch_init, NULL);
 
 #if !defined(EARLY_AP_STARTUP)
 static void
 epoch_init_smp(void *dummy __unused)
 {
 	inited = 2;
 }
 SYSINIT(epoch_smp, SI_SUB_SMP + 1, SI_ORDER_FIRST, epoch_init_smp, NULL);
 #endif
 
 static void
 epoch_ctor(epoch_t epoch)
 {
 	epoch_record_t er;
 	int cpu;
 
 	epoch->e_pcpu_record = uma_zalloc_pcpu(pcpu_zone_record, M_WAITOK);
 	CPU_FOREACH(cpu) {
 		er = zpcpu_get_cpu(epoch->e_pcpu_record, cpu);
 		bzero(er, sizeof(*er));
 		ck_epoch_register(&epoch->e_epoch, &er->er_record, NULL);
 		TAILQ_INIT((struct threadlist *)(uintptr_t)&er->er_tdlist);
 		er->er_cpuid = cpu;
 		er->er_parent = epoch;
 	}
 }
 
 static void
 epoch_adjust_prio(struct thread *td, u_char prio)
 {
 
 	thread_lock(td);
 	sched_prio(td, prio);
 	thread_unlock(td);
 }
 
 epoch_t
 epoch_alloc(const char *name, int flags)
 {
 	epoch_t epoch;
 	int i;
 
 	MPASS(name != NULL);
 
 	if (__predict_false(!inited))
 		panic("%s called too early in boot", __func__);
 
 	EPOCH_LOCK();
 
 	/*
 	 * Find a free index in the epoch array. If no free index is
 	 * found, try to use the index after the last one.
 	 */
 	for (i = 0;; i++) {
 		/*
 		 * If too many epochs are currently allocated,
 		 * return NULL.
 		 */
 		if (i == MAX_EPOCHS) {
 			epoch = NULL;
 			goto done;
 		}
 		if (epoch_array[i].e_in_use == 0)
 			break;
 	}
 
 	epoch = epoch_array + i;
 	ck_epoch_init(&epoch->e_epoch);
 	epoch_ctor(epoch);
 	epoch->e_flags = flags;
 	epoch->e_name = name;
 	sx_init(&epoch->e_drain_sx, "epoch-drain-sx");
 	mtx_init(&epoch->e_drain_mtx, "epoch-drain-mtx", NULL, MTX_DEF);
 
 	/*
 	 * Set e_in_use last, because when this field is set the
 	 * epoch_call_task() function will start scanning this epoch
 	 * structure.
 	 */
 	atomic_store_rel_int(&epoch->e_in_use, 1);
 done:
 	EPOCH_UNLOCK();
 	return (epoch);
 }
 
 void
 epoch_free(epoch_t epoch)
 {
 #ifdef INVARIANTS
 	int cpu;
 #endif
 
 	EPOCH_LOCK();
 
 	MPASS(epoch->e_in_use != 0);
 
 	epoch_drain_callbacks(epoch);
 
 	atomic_store_rel_int(&epoch->e_in_use, 0);
 	/*
 	 * Make sure the epoch_call_task() function see e_in_use equal
 	 * to zero, by calling epoch_wait() on the global_epoch:
 	 */
 	epoch_wait(global_epoch);
 #ifdef INVARIANTS
 	CPU_FOREACH(cpu) {
 		epoch_record_t er;
 
 		er = zpcpu_get_cpu(epoch->e_pcpu_record, cpu);
 
 		/*
 		 * Sanity check: none of the records should be in use anymore.
 		 * We drained callbacks above and freeing the pcpu records is
 		 * imminent.
 		 */
 		MPASS(er->er_td == NULL);
 		MPASS(TAILQ_EMPTY(&er->er_tdlist));
 	}
 #endif
 	uma_zfree_pcpu(pcpu_zone_record, epoch->e_pcpu_record);
 	mtx_destroy(&epoch->e_drain_mtx);
 	sx_destroy(&epoch->e_drain_sx);
 	memset(epoch, 0, sizeof(*epoch));
 
 	EPOCH_UNLOCK();
 }
 
 #define INIT_CHECK(epoch)					\
 	do {							\
 		if (__predict_false((epoch) == NULL))		\
 			return;					\
 	} while (0)
 
 void
 _epoch_enter_preempt(epoch_t epoch, epoch_tracker_t et EPOCH_FILE_LINE)
 {
 	struct epoch_record *er;
 	struct thread *td;
 
 	MPASS(cold || epoch != NULL);
 	td = curthread;
 	MPASS((vm_offset_t)et >= td->td_kstack &&
 	    (vm_offset_t)et + sizeof(struct epoch_tracker) <=
 	    td->td_kstack + td->td_kstack_pages * PAGE_SIZE);
 
 	INIT_CHECK(epoch);
 	MPASS(epoch->e_flags & EPOCH_PREEMPT);
 
 #ifdef EPOCH_TRACE
 	epoch_trace_enter(td, epoch, et, file, line);
 #endif
 	et->et_td = td;
 	THREAD_NO_SLEEPING();
 	critical_enter();
 	sched_pin();
 	et->et_old_priority = td->td_priority;
 	er = epoch_currecord(epoch);
 	/* Record-level tracking is reserved for non-preemptible epochs. */
 	MPASS(er->er_td == NULL);
 	TAILQ_INSERT_TAIL(&er->er_tdlist, et, et_link);
 	ck_epoch_begin(&er->er_record, &et->et_section);
 	critical_exit();
 }
 
 void
 epoch_enter(epoch_t epoch)
 {
 	epoch_record_t er;
 
 	MPASS(cold || epoch != NULL);
 	INIT_CHECK(epoch);
 	critical_enter();
 	er = epoch_currecord(epoch);
 #ifdef INVARIANTS
 	if (er->er_record.active == 0) {
 		MPASS(er->er_td == NULL);
 		er->er_td = curthread;
 	} else {
 		/* We've recursed, just make sure our accounting isn't wrong. */
 		MPASS(er->er_td == curthread);
 	}
 #endif
 	ck_epoch_begin(&er->er_record, NULL);
 }
 
 void
 _epoch_exit_preempt(epoch_t epoch, epoch_tracker_t et EPOCH_FILE_LINE)
 {
 	struct epoch_record *er;
 	struct thread *td;
 
 	INIT_CHECK(epoch);
 	td = curthread;
 	critical_enter();
 	sched_unpin();
 	THREAD_SLEEPING_OK();
 	er = epoch_currecord(epoch);
 	MPASS(epoch->e_flags & EPOCH_PREEMPT);
 	MPASS(et != NULL);
 	MPASS(et->et_td == td);
 #ifdef INVARIANTS
 	et->et_td = (void*)0xDEADBEEF;
 	/* Record-level tracking is reserved for non-preemptible epochs. */
 	MPASS(er->er_td == NULL);
 #endif
 	ck_epoch_end(&er->er_record, &et->et_section);
 	TAILQ_REMOVE(&er->er_tdlist, et, et_link);
 	er->er_gen++;
 	if (__predict_false(et->et_old_priority != td->td_priority))
 		epoch_adjust_prio(td, et->et_old_priority);
 	critical_exit();
 #ifdef EPOCH_TRACE
 	epoch_trace_exit(td, epoch, et, file, line);
 #endif
 }
 
 void
 epoch_exit(epoch_t epoch)
 {
 	epoch_record_t er;
 
 	INIT_CHECK(epoch);
 	er = epoch_currecord(epoch);
 	ck_epoch_end(&er->er_record, NULL);
 #ifdef INVARIANTS
 	MPASS(er->er_td == curthread);
 	if (er->er_record.active == 0)
 		er->er_td = NULL;
 #endif
 	critical_exit();
 }
 
 /*
  * epoch_block_handler_preempt() is a callback from the CK code when another
  * thread is currently in an epoch section.
  */
 static void
 epoch_block_handler_preempt(struct ck_epoch *global __unused,
     ck_epoch_record_t *cr, void *arg __unused)
 {
 	epoch_record_t record;
 	struct thread *td, *owner, *curwaittd;
 	struct epoch_tracker *tdwait;
 	struct turnstile *ts;
 	struct lock_object *lock;
 	int spincount, gen;
 	int locksheld __unused;
 
 	record = __containerof(cr, struct epoch_record, er_record);
 	td = curthread;
 	locksheld = td->td_locks;
 	spincount = 0;
 	counter_u64_add(block_count, 1);
 	/*
 	 * We lost a race and there's no longer any threads
 	 * on the CPU in an epoch section.
 	 */
 	if (TAILQ_EMPTY(&record->er_tdlist))
 		return;
 
 	if (record->er_cpuid != curcpu) {
 		/*
 		 * If the head of the list is running, we can wait for it
 		 * to remove itself from the list and thus save us the
 		 * overhead of a migration
 		 */
 		gen = record->er_gen;
 		thread_unlock(td);
 		/*
 		 * We can't actually check if the waiting thread is running
 		 * so we simply poll for it to exit before giving up and
 		 * migrating.
 		 */
 		do {
 			cpu_spinwait();
 		} while (!TAILQ_EMPTY(&record->er_tdlist) &&
 				 gen == record->er_gen &&
 				 spincount++ < MAX_ADAPTIVE_SPIN);
 		thread_lock(td);
 		/*
 		 * If the generation has changed we can poll again
 		 * otherwise we need to migrate.
 		 */
 		if (gen != record->er_gen)
 			return;
 		/*
 		 * Being on the same CPU as that of the record on which
 		 * we need to wait allows us access to the thread
 		 * list associated with that CPU. We can then examine the
 		 * oldest thread in the queue and wait on its turnstile
 		 * until it resumes and so on until a grace period
 		 * elapses.
 		 *
 		 */
 		counter_u64_add(migrate_count, 1);
 		sched_bind(td, record->er_cpuid);
 		/*
 		 * At this point we need to return to the ck code
 		 * to scan to see if a grace period has elapsed.
 		 * We can't move on to check the thread list, because
 		 * in the meantime new threads may have arrived that
 		 * in fact belong to a different epoch.
 		 */
 		return;
 	}
 	/*
 	 * Try to find a thread in an epoch section on this CPU
 	 * waiting on a turnstile. Otherwise find the lowest
 	 * priority thread (highest prio value) and drop our priority
 	 * to match to allow it to run.
 	 */
 	TAILQ_FOREACH(tdwait, &record->er_tdlist, et_link) {
 		/*
 		 * Propagate our priority to any other waiters to prevent us
 		 * from starving them. They will have their original priority
 		 * restore on exit from epoch_wait().
 		 */
 		curwaittd = tdwait->et_td;
 		if (!TD_IS_INHIBITED(curwaittd) && curwaittd->td_priority > td->td_priority) {
 			critical_enter();
 			thread_unlock(td);
 			thread_lock(curwaittd);
 			sched_prio(curwaittd, td->td_priority);
 			thread_unlock(curwaittd);
 			thread_lock(td);
 			critical_exit();
 		}
 		if (TD_IS_INHIBITED(curwaittd) && TD_ON_LOCK(curwaittd) &&
 		    ((ts = curwaittd->td_blocked) != NULL)) {
 			/*
 			 * We unlock td to allow turnstile_wait to reacquire
 			 * the thread lock. Before unlocking it we enter a
 			 * critical section to prevent preemption after we
 			 * reenable interrupts by dropping the thread lock in
 			 * order to prevent curwaittd from getting to run.
 			 */
 			critical_enter();
 			thread_unlock(td);
 
 			if (turnstile_lock(ts, &lock, &owner)) {
 				if (ts == curwaittd->td_blocked) {
 					MPASS(TD_IS_INHIBITED(curwaittd) &&
 					    TD_ON_LOCK(curwaittd));
 					critical_exit();
 					turnstile_wait(ts, owner,
 					    curwaittd->td_tsqueue);
 					counter_u64_add(turnstile_count, 1);
 					thread_lock(td);
 					return;
 				}
 				turnstile_unlock(ts, lock);
 			}
 			thread_lock(td);
 			critical_exit();
 			KASSERT(td->td_locks == locksheld,
 			    ("%d extra locks held", td->td_locks - locksheld));
 		}
 	}
 	/*
 	 * We didn't find any threads actually blocked on a lock
 	 * so we have nothing to do except context switch away.
 	 */
 	counter_u64_add(switch_count, 1);
 	mi_switch(SW_VOL | SWT_RELINQUISH);
 	/*
 	 * It is important the thread lock is dropped while yielding
 	 * to allow other threads to acquire the lock pointed to by
 	 * TDQ_LOCKPTR(td). Currently mi_switch() will unlock the
 	 * thread lock before returning. Else a deadlock like
 	 * situation might happen.
 	 */
 	thread_lock(td);
 }
 
 void
 epoch_wait_preempt(epoch_t epoch)
 {
 	struct thread *td;
 	int was_bound;
 	int old_cpu;
 	int old_pinned;
 	u_char old_prio;
 	int locks __unused;
 
 	MPASS(cold || epoch != NULL);
 	INIT_CHECK(epoch);
 	td = curthread;
 #ifdef INVARIANTS
 	locks = curthread->td_locks;
 	MPASS(epoch->e_flags & EPOCH_PREEMPT);
 	if ((epoch->e_flags & EPOCH_LOCKED) == 0)
 		WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
 		    "epoch_wait() can be long running");
 	KASSERT(!in_epoch(epoch), ("epoch_wait_preempt() called in the middle "
 	    "of an epoch section of the same epoch"));
 #endif
 	DROP_GIANT();
 	thread_lock(td);
 
 	old_cpu = PCPU_GET(cpuid);
 	old_pinned = td->td_pinned;
 	old_prio = td->td_priority;
 	was_bound = sched_is_bound(td);
 	sched_unbind(td);
 	td->td_pinned = 0;
 	sched_bind(td, old_cpu);
 
 	ck_epoch_synchronize_wait(&epoch->e_epoch, epoch_block_handler_preempt,
 	    NULL);
 
 	/* restore CPU binding, if any */
 	if (was_bound != 0) {
 		sched_bind(td, old_cpu);
 	} else {
 		/* get thread back to initial CPU, if any */
 		if (old_pinned != 0)
 			sched_bind(td, old_cpu);
 		sched_unbind(td);
 	}
 	/* restore pinned after bind */
 	td->td_pinned = old_pinned;
 
 	/* restore thread priority */
 	sched_prio(td, old_prio);
 	thread_unlock(td);
 	PICKUP_GIANT();
 	KASSERT(td->td_locks == locks,
 	    ("%d residual locks held", td->td_locks - locks));
 }
 
 static void
 epoch_block_handler(struct ck_epoch *g __unused, ck_epoch_record_t *c __unused,
     void *arg __unused)
 {
 	cpu_spinwait();
 }
 
 void
 epoch_wait(epoch_t epoch)
 {
 
 	MPASS(cold || epoch != NULL);
 	INIT_CHECK(epoch);
 	MPASS(epoch->e_flags == 0);
 	critical_enter();
 	ck_epoch_synchronize_wait(&epoch->e_epoch, epoch_block_handler, NULL);
 	critical_exit();
 }
 
 void
 epoch_call(epoch_t epoch, epoch_callback_t callback, epoch_context_t ctx)
 {
 	epoch_record_t er;
 	ck_epoch_entry_t *cb;
 
 	cb = (void *)ctx;
 
 	MPASS(callback);
 	/* too early in boot to have epoch set up */
 	if (__predict_false(epoch == NULL))
 		goto boottime;
 #if !defined(EARLY_AP_STARTUP)
 	if (__predict_false(inited < 2))
 		goto boottime;
 #endif
 
 	critical_enter();
 	*DPCPU_PTR(epoch_cb_count) += 1;
 	er = epoch_currecord(epoch);
 	ck_epoch_call(&er->er_record, cb, (ck_epoch_cb_t *)callback);
 	critical_exit();
 	return;
 boottime:
 	callback(ctx);
 }
 
 static void
 epoch_call_task(void *arg __unused)
 {
 	ck_stack_entry_t *cursor, *head, *next;
 	ck_epoch_record_t *record;
 	epoch_record_t er;
 	epoch_t epoch;
 	ck_stack_t cb_stack;
 	int i, npending, total;
 
 	ck_stack_init(&cb_stack);
 	critical_enter();
 	epoch_enter(global_epoch);
 	for (total = i = 0; i != MAX_EPOCHS; i++) {
 		epoch = epoch_array + i;
 		if (__predict_false(
 		    atomic_load_acq_int(&epoch->e_in_use) == 0))
 			continue;
 		er = epoch_currecord(epoch);
 		record = &er->er_record;
 		if ((npending = record->n_pending) == 0)
 			continue;
 		ck_epoch_poll_deferred(record, &cb_stack);
 		total += npending - record->n_pending;
 	}
 	epoch_exit(global_epoch);
 	*DPCPU_PTR(epoch_cb_count) -= total;
 	critical_exit();
 
 	counter_u64_add(epoch_call_count, total);
 	counter_u64_add(epoch_call_task_count, 1);
 
 	head = ck_stack_batch_pop_npsc(&cb_stack);
 	for (cursor = head; cursor != NULL; cursor = next) {
 		struct ck_epoch_entry *entry =
 		    ck_epoch_entry_container(cursor);
 
 		next = CK_STACK_NEXT(cursor);
 		entry->function(entry);
 	}
 }
 
 static int
 in_epoch_verbose_preempt(epoch_t epoch, int dump_onfail)
 {
 	epoch_record_t er;
 	struct epoch_tracker *tdwait;
 	struct thread *td;
 
 	MPASS(epoch != NULL);
 	MPASS((epoch->e_flags & EPOCH_PREEMPT) != 0);
 	td = curthread;
 	if (THREAD_CAN_SLEEP())
 		return (0);
 	critical_enter();
 	er = epoch_currecord(epoch);
 	TAILQ_FOREACH(tdwait, &er->er_tdlist, et_link)
 		if (tdwait->et_td == td) {
 			critical_exit();
 			return (1);
 		}
 #ifdef INVARIANTS
 	if (dump_onfail) {
 		MPASS(td->td_pinned);
 		printf("cpu: %d id: %d\n", curcpu, td->td_tid);
 		TAILQ_FOREACH(tdwait, &er->er_tdlist, et_link)
 			printf("td_tid: %d ", tdwait->et_td->td_tid);
 		printf("\n");
 	}
 #endif
 	critical_exit();
 	return (0);
 }
 
 #ifdef INVARIANTS
 static void
 epoch_assert_nocpu(epoch_t epoch, struct thread *td)
 {
 	epoch_record_t er;
 	int cpu;
 	bool crit;
 
 	crit = td->td_critnest > 0;
 
 	/* Check for a critical section mishap. */
 	CPU_FOREACH(cpu) {
 		er = zpcpu_get_cpu(epoch->e_pcpu_record, cpu);
 		KASSERT(er->er_td != td,
 		    ("%s critical section in epoch '%s', from cpu %d",
 		    (crit ? "exited" : "re-entered"), epoch->e_name, cpu));
 	}
 }
 #else
 #define	epoch_assert_nocpu(e, td) do {} while (0)
 #endif
 
 int
 in_epoch_verbose(epoch_t epoch, int dump_onfail)
 {
 	epoch_record_t er;
 	struct thread *td;
 
 	if (__predict_false((epoch) == NULL))
 		return (0);
 	if ((epoch->e_flags & EPOCH_PREEMPT) != 0)
 		return (in_epoch_verbose_preempt(epoch, dump_onfail));
 
 	/*
 	 * The thread being in a critical section is a necessary
 	 * condition to be correctly inside a non-preemptible epoch,
 	 * so it's definitely not in this epoch.
 	 */
 	td = curthread;
 	if (td->td_critnest == 0) {
 		epoch_assert_nocpu(epoch, td);
 		return (0);
 	}
 
 	/*
 	 * The current cpu is in a critical section, so the epoch record will be
 	 * stable for the rest of this function.  Knowing that the record is not
 	 * active is sufficient for knowing whether we're in this epoch or not,
 	 * since it's a pcpu record.
 	 */
 	er = epoch_currecord(epoch);
 	if (er->er_record.active == 0) {
 		epoch_assert_nocpu(epoch, td);
 		return (0);
 	}
 
 	MPASS(er->er_td == td);
 	return (1);
 }
 
 int
 in_epoch(epoch_t epoch)
 {
 	return (in_epoch_verbose(epoch, 0));
 }
 
 static void
 epoch_drain_cb(struct epoch_context *ctx)
 {
 	struct epoch *epoch =
 	    __containerof(ctx, struct epoch_record, er_drain_ctx)->er_parent;
 
 	if (atomic_fetchadd_int(&epoch->e_drain_count, -1) == 1) {
 		mtx_lock(&epoch->e_drain_mtx);
 		wakeup(epoch);
 		mtx_unlock(&epoch->e_drain_mtx);
 	}
 }
 
 void
 epoch_drain_callbacks(epoch_t epoch)
 {
 	epoch_record_t er;
 	struct thread *td;
 	int was_bound;
 	int old_pinned;
 	int old_cpu;
 	int cpu;
 
 	WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
 	    "epoch_drain_callbacks() may sleep!");
 
 	/* too early in boot to have epoch set up */
 	if (__predict_false(epoch == NULL))
 		return;
 #if !defined(EARLY_AP_STARTUP)
 	if (__predict_false(inited < 2))
 		return;
 #endif
 	DROP_GIANT();
 
 	sx_xlock(&epoch->e_drain_sx);
 	mtx_lock(&epoch->e_drain_mtx);
 
 	td = curthread;
 	thread_lock(td);
 	old_cpu = PCPU_GET(cpuid);
 	old_pinned = td->td_pinned;
 	was_bound = sched_is_bound(td);
 	sched_unbind(td);
 	td->td_pinned = 0;
 
 	CPU_FOREACH(cpu)
 		epoch->e_drain_count++;
 	CPU_FOREACH(cpu) {
 		er = zpcpu_get_cpu(epoch->e_pcpu_record, cpu);
 		sched_bind(td, cpu);
 		epoch_call(epoch, &epoch_drain_cb, &er->er_drain_ctx);
 	}
 
 	/* restore CPU binding, if any */
 	if (was_bound != 0) {
 		sched_bind(td, old_cpu);
 	} else {
 		/* get thread back to initial CPU, if any */
 		if (old_pinned != 0)
 			sched_bind(td, old_cpu);
 		sched_unbind(td);
 	}
 	/* restore pinned after bind */
 	td->td_pinned = old_pinned;
 
 	thread_unlock(td);
 
 	while (epoch->e_drain_count != 0)
 		msleep(epoch, &epoch->e_drain_mtx, PZERO, "EDRAIN", 0);
 
 	mtx_unlock(&epoch->e_drain_mtx);
 	sx_xunlock(&epoch->e_drain_sx);
 
 	PICKUP_GIANT();
 }
diff --git a/sys/kern/subr_kdb.c b/sys/kern/subr_kdb.c
index 14a0205fc098..b189fe303e0f 100644
--- a/sys/kern/subr_kdb.c
+++ b/sys/kern/subr_kdb.c
@@ -1,757 +1,756 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2004 The FreeBSD Project
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  *
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_kdb.h"
 #include "opt_stack.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/cons.h>
 #include <sys/kdb.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/lock.h>
 #include <sys/pcpu.h>
 #include <sys/proc.h>
 #include <sys/sbuf.h>
 #include <sys/smp.h>
 #include <sys/stack.h>
 #include <sys/sysctl.h>
 
 #include <machine/kdb.h>
 #include <machine/pcb.h>
 
 #ifdef SMP
 #include <machine/smp.h>
 #endif
 
 u_char __read_frequently kdb_active = 0;
 static void *kdb_jmpbufp = NULL;
 struct kdb_dbbe *kdb_dbbe = NULL;
 static struct pcb kdb_pcb;
 struct pcb *kdb_thrctx = NULL;
 struct thread *kdb_thread = NULL;
 struct trapframe *kdb_frame = NULL;
 
 #ifdef BREAK_TO_DEBUGGER
 #define	KDB_BREAK_TO_DEBUGGER	1
 #else
 #define	KDB_BREAK_TO_DEBUGGER	0
 #endif
 
 #ifdef ALT_BREAK_TO_DEBUGGER
 #define	KDB_ALT_BREAK_TO_DEBUGGER	1
 #else
 #define	KDB_ALT_BREAK_TO_DEBUGGER	0
 #endif
 
 static int	kdb_break_to_debugger = KDB_BREAK_TO_DEBUGGER;
 static int	kdb_alt_break_to_debugger = KDB_ALT_BREAK_TO_DEBUGGER;
 
 KDB_BACKEND(null, NULL, NULL, NULL, NULL);
 
 static int kdb_sysctl_available(SYSCTL_HANDLER_ARGS);
 static int kdb_sysctl_current(SYSCTL_HANDLER_ARGS);
 static int kdb_sysctl_enter(SYSCTL_HANDLER_ARGS);
 static int kdb_sysctl_panic(SYSCTL_HANDLER_ARGS);
 static int kdb_sysctl_panic_str(SYSCTL_HANDLER_ARGS);
 static int kdb_sysctl_trap(SYSCTL_HANDLER_ARGS);
 static int kdb_sysctl_trap_code(SYSCTL_HANDLER_ARGS);
 static int kdb_sysctl_stack_overflow(SYSCTL_HANDLER_ARGS);
 
 static SYSCTL_NODE(_debug, OID_AUTO, kdb, CTLFLAG_RW | CTLFLAG_MPSAFE, NULL,
     "KDB nodes");
 
 SYSCTL_PROC(_debug_kdb, OID_AUTO, available,
     CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0,
     kdb_sysctl_available, "A",
     "list of available KDB backends");
 
 SYSCTL_PROC(_debug_kdb, OID_AUTO, current,
     CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, 0,
     kdb_sysctl_current, "A",
     "currently selected KDB backend");
 
 SYSCTL_PROC(_debug_kdb, OID_AUTO, enter,
     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_SECURE | CTLFLAG_MPSAFE, NULL, 0,
     kdb_sysctl_enter, "I",
     "set to enter the debugger");
 
 SYSCTL_PROC(_debug_kdb, OID_AUTO, panic,
     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_SECURE | CTLFLAG_MPSAFE, NULL, 0,
     kdb_sysctl_panic, "I",
     "set to panic the kernel");
 
 SYSCTL_PROC(_debug_kdb, OID_AUTO, panic_str,
     CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_SECURE | CTLFLAG_MPSAFE, NULL, 0,
     kdb_sysctl_panic_str, "A",
     "trigger a kernel panic, using the provided string as the panic message");
 
 SYSCTL_PROC(_debug_kdb, OID_AUTO, trap,
     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_SECURE | CTLFLAG_MPSAFE, NULL, 0,
     kdb_sysctl_trap, "I",
     "set to cause a page fault via data access");
 
 SYSCTL_PROC(_debug_kdb, OID_AUTO, trap_code,
     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_SECURE | CTLFLAG_MPSAFE, NULL, 0,
     kdb_sysctl_trap_code, "I",
     "set to cause a page fault via code access");
 
 SYSCTL_PROC(_debug_kdb, OID_AUTO, stack_overflow,
     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_SECURE | CTLFLAG_MPSAFE, NULL, 0,
     kdb_sysctl_stack_overflow, "I",
     "set to cause a stack overflow");
 
 SYSCTL_INT(_debug_kdb, OID_AUTO, break_to_debugger,
     CTLFLAG_RWTUN | CTLFLAG_SECURE,
     &kdb_break_to_debugger, 0, "Enable break to debugger");
 
 SYSCTL_INT(_debug_kdb, OID_AUTO, alt_break_to_debugger,
     CTLFLAG_RWTUN | CTLFLAG_SECURE,
     &kdb_alt_break_to_debugger, 0, "Enable alternative break to debugger");
 
 /*
  * Flag to indicate to debuggers why the debugger was entered.
  */
 const char * volatile kdb_why = KDB_WHY_UNSET;
 
 static int
 kdb_sysctl_available(SYSCTL_HANDLER_ARGS)
 {
 	struct kdb_dbbe **iter;
 	struct sbuf sbuf;
 	int error;
 
 	sbuf_new_for_sysctl(&sbuf, NULL, 64, req);
 	SET_FOREACH(iter, kdb_dbbe_set) {
 		if ((*iter)->dbbe_active == 0)
 			sbuf_printf(&sbuf, "%s ", (*iter)->dbbe_name);
 	}
 	error = sbuf_finish(&sbuf);
 	sbuf_delete(&sbuf);
 	return (error);
 }
 
 static int
 kdb_sysctl_current(SYSCTL_HANDLER_ARGS)
 {
 	char buf[16];
 	int error;
 
 	if (kdb_dbbe != NULL)
 		strlcpy(buf, kdb_dbbe->dbbe_name, sizeof(buf));
 	else
 		*buf = '\0';
 	error = sysctl_handle_string(oidp, buf, sizeof(buf), req);
 	if (error != 0 || req->newptr == NULL)
 		return (error);
 	if (kdb_active)
 		return (EBUSY);
 	return (kdb_dbbe_select(buf));
 }
 
 static int
 kdb_sysctl_enter(SYSCTL_HANDLER_ARGS)
 {
 	int error, i;
 
 	error = sysctl_wire_old_buffer(req, sizeof(int));
 	if (error == 0) {
 		i = 0;
 		error = sysctl_handle_int(oidp, &i, 0, req);
 	}
 	if (error != 0 || req->newptr == NULL)
 		return (error);
 	if (kdb_active)
 		return (EBUSY);
 	kdb_enter(KDB_WHY_SYSCTL, "sysctl debug.kdb.enter");
 	return (0);
 }
 
 static int
 kdb_sysctl_panic(SYSCTL_HANDLER_ARGS)
 {
 	int error, i;
 
 	error = sysctl_wire_old_buffer(req, sizeof(int));
 	if (error == 0) {
 		i = 0;
 		error = sysctl_handle_int(oidp, &i, 0, req);
 	}
 	if (error != 0 || req->newptr == NULL)
 		return (error);
 	panic("kdb_sysctl_panic");
 	return (0);
 }
 
 static int
 kdb_sysctl_panic_str(SYSCTL_HANDLER_ARGS)
 {
 	int error;
 	static char buf[256]; /* static buffer to limit mallocs when panicing */
 
 	*buf = '\0';
 	error = sysctl_handle_string(oidp, buf, sizeof(buf), req);
 	if (error != 0 || req->newptr == NULL)
 		return (error);
 	panic("kdb_sysctl_panic: %s", buf);
 	return (0);
 }
 
 static int
 kdb_sysctl_trap(SYSCTL_HANDLER_ARGS)
 {
 	int error, i;
 	int *addr = (int *)0x10;
 
 	error = sysctl_wire_old_buffer(req, sizeof(int));
 	if (error == 0) {
 		i = 0;
 		error = sysctl_handle_int(oidp, &i, 0, req);
 	}
 	if (error != 0 || req->newptr == NULL)
 		return (error);
 	return (*addr);
 }
 
 static int
 kdb_sysctl_trap_code(SYSCTL_HANDLER_ARGS)
 {
 	int error, i;
 	void (*fp)(u_int, u_int, u_int) = (void *)0xdeadc0de;
 
 	error = sysctl_wire_old_buffer(req, sizeof(int));
 	if (error == 0) {
 		i = 0;
 		error = sysctl_handle_int(oidp, &i, 0, req);
 	}
 	if (error != 0 || req->newptr == NULL)
 		return (error);
 	(*fp)(0x11111111, 0x22222222, 0x33333333);
 	return (0);
 }
 
 static void kdb_stack_overflow(volatile int *x)  __noinline;
 static void
 kdb_stack_overflow(volatile int *x)
 {
 
 	if (*x > 10000000)
 		return;
 	kdb_stack_overflow(x);
 	*x += PCPU_GET(cpuid) / 1000000;
 }
 
 static int
 kdb_sysctl_stack_overflow(SYSCTL_HANDLER_ARGS)
 {
 	int error, i;
 	volatile int x;
 
 	error = sysctl_wire_old_buffer(req, sizeof(int));
 	if (error == 0) {
 		i = 0;
 		error = sysctl_handle_int(oidp, &i, 0, req);
 	}
 	if (error != 0 || req->newptr == NULL)
 		return (error);
 	x = 0;
 	kdb_stack_overflow(&x);
 	return (0);
 }
 
 void
 kdb_panic(const char *msg)
 {
 
 	printf("KDB: panic\n");
 	panic("%s", msg);
 }
 
 void
 kdb_reboot(void)
 {
 
 	printf("KDB: reboot requested\n");
 	shutdown_nice(0);
 }
 
 /*
  * Solaris implements a new BREAK which is initiated by a character sequence
  * CR ~ ^b which is similar to a familiar pattern used on Sun servers by the
  * Remote Console.
  *
  * Note that this function may be called from almost anywhere, with interrupts
  * disabled and with unknown locks held, so it must not access data other than
  * its arguments.  Its up to the caller to ensure that the state variable is
  * consistent.
  */
 #define	KEY_CR		13	/* CR '\r' */
 #define	KEY_TILDE	126	/* ~ */
 #define	KEY_CRTLB	2	/* ^B */
 #define	KEY_CRTLP	16	/* ^P */
 #define	KEY_CRTLR	18	/* ^R */
 
 /* States of th KDB "alternate break sequence" detecting state machine. */
 enum {
 	KDB_ALT_BREAK_SEEN_NONE,
 	KDB_ALT_BREAK_SEEN_CR,
 	KDB_ALT_BREAK_SEEN_CR_TILDE,
 };
 
 int
 kdb_break(void)
 {
 
 	if (!kdb_break_to_debugger)
 		return (0);
 	kdb_enter(KDB_WHY_BREAK, "Break to debugger");
 	return (KDB_REQ_DEBUGGER);
 }
 
 static int
 kdb_alt_break_state(int key, int *state)
 {
 	int brk;
 
 	/* All states transition to KDB_ALT_BREAK_SEEN_CR on a CR. */
 	if (key == KEY_CR) {
 		*state = KDB_ALT_BREAK_SEEN_CR;
 		return (0);
 	}
 
 	brk = 0;
 	switch (*state) {
 	case KDB_ALT_BREAK_SEEN_CR:
 		*state = KDB_ALT_BREAK_SEEN_NONE;
 		if (key == KEY_TILDE)
 			*state = KDB_ALT_BREAK_SEEN_CR_TILDE;
 		break;
 	case KDB_ALT_BREAK_SEEN_CR_TILDE:
 		*state = KDB_ALT_BREAK_SEEN_NONE;
 		if (key == KEY_CRTLB)
 			brk = KDB_REQ_DEBUGGER;
 		else if (key == KEY_CRTLP)
 			brk = KDB_REQ_PANIC;
 		else if (key == KEY_CRTLR)
 			brk = KDB_REQ_REBOOT;
 		break;
 	case KDB_ALT_BREAK_SEEN_NONE:
 	default:
 		*state = KDB_ALT_BREAK_SEEN_NONE;
 		break;
 	}
 	return (brk);
 }
 
 static int
 kdb_alt_break_internal(int key, int *state, int force_gdb)
 {
 	int brk;
 
 	if (!kdb_alt_break_to_debugger)
 		return (0);
 	brk = kdb_alt_break_state(key, state);
 	switch (brk) {
 	case KDB_REQ_DEBUGGER:
 		if (force_gdb)
 			kdb_dbbe_select("gdb");
 		kdb_enter(KDB_WHY_BREAK, "Break to debugger");
 		break;
 
 	case KDB_REQ_PANIC:
 		if (force_gdb)
 			kdb_dbbe_select("gdb");
 		kdb_panic("Panic sequence on console");
 		break;
 
 	case KDB_REQ_REBOOT:
 		kdb_reboot();
 		break;
 	}
 	return (0);
 }
 
 int
 kdb_alt_break(int key, int *state)
 {
 
 	return (kdb_alt_break_internal(key, state, 0));
 }
 
 /*
  * This variation on kdb_alt_break() is used only by dcons, which has its own
  * configuration flag to force GDB use regardless of the global KDB
  * configuration.
  */
 int
 kdb_alt_break_gdb(int key, int *state)
 {
 
 	return (kdb_alt_break_internal(key, state, 1));
 }
 
 /*
  * Print a backtrace of the calling thread. The backtrace is generated by
  * the selected debugger, provided it supports backtraces. If no debugger
  * is selected or the current debugger does not support backtraces, this
  * function silently returns.
  */
 void
 kdb_backtrace(void)
 {
 
 	if (kdb_dbbe != NULL && kdb_dbbe->dbbe_trace != NULL) {
 		printf("KDB: stack backtrace:\n");
 		kdb_dbbe->dbbe_trace();
 	}
 #ifdef STACK
 	else {
 		struct stack st;
 
 		printf("KDB: stack backtrace:\n");
-		stack_zero(&st);
 		stack_save(&st);
 		stack_print_ddb(&st);
 	}
 #endif
 }
 
 /*
  * Similar to kdb_backtrace() except that it prints a backtrace of an
  * arbitrary thread rather than the calling thread.
  */
 void
 kdb_backtrace_thread(struct thread *td)
 {
 
 	if (kdb_dbbe != NULL && kdb_dbbe->dbbe_trace_thread != NULL) {
 		printf("KDB: stack backtrace of thread %d:\n", td->td_tid);
 		kdb_dbbe->dbbe_trace_thread(td);
 	}
 #ifdef STACK
 	else {
 		struct stack st;
 
 		printf("KDB: stack backtrace of thread %d:\n", td->td_tid);
 		if (stack_save_td(&st, td) == 0)
 			stack_print_ddb(&st);
 	}
 #endif
 }
 
 /*
  * Set/change the current backend.
  */
 int
 kdb_dbbe_select(const char *name)
 {
 	struct kdb_dbbe *be, **iter;
 
 	SET_FOREACH(iter, kdb_dbbe_set) {
 		be = *iter;
 		if (be->dbbe_active == 0 && strcmp(be->dbbe_name, name) == 0) {
 			kdb_dbbe = be;
 			return (0);
 		}
 	}
 	return (EINVAL);
 }
 
 /*
  * Enter the currently selected debugger. If a message has been provided,
  * it is printed first. If the debugger does not support the enter method,
  * it is entered by using breakpoint(), which enters the debugger through
  * kdb_trap().  The 'why' argument will contain a more mechanically usable
  * string than 'msg', and is relied upon by DDB scripting to identify the
  * reason for entering the debugger so that the right script can be run.
  */
 void
 kdb_enter(const char *why, const char *msg)
 {
 
 	if (kdb_dbbe != NULL && kdb_active == 0) {
 		if (msg != NULL)
 			printf("KDB: enter: %s\n", msg);
 		kdb_why = why;
 		breakpoint();
 		kdb_why = KDB_WHY_UNSET;
 	}
 }
 
 /*
  * Initialize the kernel debugger interface.
  */
 void
 kdb_init(void)
 {
 	struct kdb_dbbe *be, **iter;
 	int cur_pri, pri;
 
 	kdb_active = 0;
 	kdb_dbbe = NULL;
 	cur_pri = -1;
 	SET_FOREACH(iter, kdb_dbbe_set) {
 		be = *iter;
 		pri = (be->dbbe_init != NULL) ? be->dbbe_init() : -1;
 		be->dbbe_active = (pri >= 0) ? 0 : -1;
 		if (pri > cur_pri) {
 			cur_pri = pri;
 			kdb_dbbe = be;
 		}
 	}
 	if (kdb_dbbe != NULL) {
 		printf("KDB: debugger backends:");
 		SET_FOREACH(iter, kdb_dbbe_set) {
 			be = *iter;
 			if (be->dbbe_active == 0)
 				printf(" %s", be->dbbe_name);
 		}
 		printf("\n");
 		printf("KDB: current backend: %s\n",
 		    kdb_dbbe->dbbe_name);
 	}
 }
 
 /*
  * Handle contexts.
  */
 void *
 kdb_jmpbuf(jmp_buf new)
 {
 	void *old;
 
 	old = kdb_jmpbufp;
 	kdb_jmpbufp = new;
 	return (old);
 }
 
 void
 kdb_reenter(void)
 {
 
 	if (!kdb_active || kdb_jmpbufp == NULL)
 		return;
 
 	printf("KDB: reentering\n");
 	kdb_backtrace();
 	longjmp(kdb_jmpbufp, 1);
 	/* NOTREACHED */
 }
 
 void
 kdb_reenter_silent(void)
 {
 
 	if (!kdb_active || kdb_jmpbufp == NULL)
 		return;
 
 	longjmp(kdb_jmpbufp, 1);
 	/* NOTREACHED */
 }
 
 /*
  * Thread-related support functions.
  */
 struct pcb *
 kdb_thr_ctx(struct thread *thr)
 {
 #if defined(SMP) && defined(KDB_STOPPEDPCB)
 	struct pcpu *pc;
 #endif
 
 	if (thr == curthread)
 		return (&kdb_pcb);
 
 #if defined(SMP) && defined(KDB_STOPPEDPCB)
 	STAILQ_FOREACH(pc, &cpuhead, pc_allcpu)  {
 		if (pc->pc_curthread == thr &&
 		    CPU_ISSET(pc->pc_cpuid, &stopped_cpus))
 			return (KDB_STOPPEDPCB(pc));
 	}
 #endif
 	return (thr->td_pcb);
 }
 
 struct thread *
 kdb_thr_first(void)
 {
 	struct proc *p;
 	struct thread *thr;
 	u_int i;
 
 	/* This function may be called early. */
 	if (pidhashtbl == NULL)
 		return (&thread0);
 
 	for (i = 0; i <= pidhash; i++) {
 		LIST_FOREACH(p, &pidhashtbl[i], p_hash) {
 			thr = FIRST_THREAD_IN_PROC(p);
 			if (thr != NULL)
 				return (thr);
 		}
 	}
 	return (NULL);
 }
 
 struct thread *
 kdb_thr_from_pid(pid_t pid)
 {
 	struct proc *p;
 
 	LIST_FOREACH(p, PIDHASH(pid), p_hash) {
 		if (p->p_pid == pid)
 			return (FIRST_THREAD_IN_PROC(p));
 	}
 	return (NULL);
 }
 
 struct thread *
 kdb_thr_lookup(lwpid_t tid)
 {
 	struct thread *thr;
 
 	thr = kdb_thr_first();
 	while (thr != NULL && thr->td_tid != tid)
 		thr = kdb_thr_next(thr);
 	return (thr);
 }
 
 struct thread *
 kdb_thr_next(struct thread *thr)
 {
 	struct proc *p;
 	u_int hash;
 
 	p = thr->td_proc;
 	thr = TAILQ_NEXT(thr, td_plist);
 	if (thr != NULL)
 		return (thr);
 	if (pidhashtbl == NULL)
 		return (NULL);
 	hash = p->p_pid & pidhash;
 	for (;;) {
 		p = LIST_NEXT(p, p_hash);
 		while (p == NULL) {
 			if (++hash > pidhash)
 				return (NULL);
 			p = LIST_FIRST(&pidhashtbl[hash]);
 		}
 		thr = FIRST_THREAD_IN_PROC(p);
 		if (thr != NULL)
 			return (thr);
 	}
 }
 
 int
 kdb_thr_select(struct thread *thr)
 {
 	if (thr == NULL)
 		return (EINVAL);
 	kdb_thread = thr;
 	kdb_thrctx = kdb_thr_ctx(thr);
 	return (0);
 }
 
 /*
  * Enter the debugger due to a trap.
  */
 int
 kdb_trap(int type, int code, struct trapframe *tf)
 {
 #ifdef SMP
 	cpuset_t other_cpus;
 #endif
 	struct kdb_dbbe *be;
 	register_t intr;
 	int handled;
 	int did_stop_cpus;
 
 	be = kdb_dbbe;
 	if (be == NULL || be->dbbe_trap == NULL)
 		return (0);
 
 	/* We reenter the debugger through kdb_reenter(). */
 	if (kdb_active)
 		return (0);
 
 	intr = intr_disable();
 
 	if (!SCHEDULER_STOPPED()) {
 #ifdef SMP
 		other_cpus = all_cpus;
 		CPU_ANDNOT(&other_cpus, &other_cpus, &stopped_cpus);
 		CPU_CLR(PCPU_GET(cpuid), &other_cpus);
 		stop_cpus_hard(other_cpus);
 #endif
 		curthread->td_stopsched = 1;
 		did_stop_cpus = 1;
 	} else
 		did_stop_cpus = 0;
 
 	kdb_active++;
 
 	kdb_frame = tf;
 
 	/* Let MD code do its thing first... */
 	kdb_cpu_trap(type, code);
 
 	makectx(tf, &kdb_pcb);
 	kdb_thr_select(curthread);
 
 	cngrab();
 
 	for (;;) {
 		handled = be->dbbe_trap(type, code);
 		if (be == kdb_dbbe)
 			break;
 		be = kdb_dbbe;
 		if (be == NULL || be->dbbe_trap == NULL)
 			break;
 		printf("Switching to %s back-end\n", be->dbbe_name);
 	}
 
 	cnungrab();
 
 	kdb_active--;
 
 	if (did_stop_cpus) {
 		curthread->td_stopsched = 0;
 #ifdef SMP
 		CPU_AND(&other_cpus, &other_cpus, &stopped_cpus);
 		restart_cpus(other_cpus);
 #endif
 	}
 
 	intr_restore(intr);
 
 	return (handled);
 }
diff --git a/sys/kern/subr_witness.c b/sys/kern/subr_witness.c
index db82450f0570..57f63d362a4f 100644
--- a/sys/kern/subr_witness.c
+++ b/sys/kern/subr_witness.c
@@ -1,3139 +1,3137 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 2008 Isilon Systems, Inc.
  * Copyright (c) 2008 Ilya Maykov <ivmaykov@gmail.com>
  * Copyright (c) 1998 Berkeley Software Design, Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Berkeley Software Design Inc's name may not be used to endorse or
  *    promote products derived from this software without specific prior
  *    written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from BSDI $Id: mutex_witness.c,v 1.1.2.20 2000/04/27 03:10:27 cp Exp $
  *	and BSDI $Id: synch_machdep.c,v 2.3.2.39 2000/04/27 03:10:25 cp Exp $
  */
 
 /*
  * Implementation of the `witness' lock verifier.  Originally implemented for
  * mutexes in BSD/OS.  Extended to handle generic lock objects and lock
  * classes in FreeBSD.
  */
 
 /*
  *	Main Entry: witness
  *	Pronunciation: 'wit-n&s
  *	Function: noun
  *	Etymology: Middle English witnesse, from Old English witnes knowledge,
  *	    testimony, witness, from 2wit
  *	Date: before 12th century
  *	1 : attestation of a fact or event : TESTIMONY
  *	2 : one that gives evidence; specifically : one who testifies in
  *	    a cause or before a judicial tribunal
  *	3 : one asked to be present at a transaction so as to be able to
  *	    testify to its having taken place
  *	4 : one who has personal knowledge of something
  *	5 a : something serving as evidence or proof : SIGN
  *	  b : public affirmation by word or example of usually
  *	      religious faith or conviction <the heroic witness to divine
  *	      life -- Pilot>
  *	6 capitalized : a member of the Jehovah's Witnesses 
  */
 
 /*
  * Special rules concerning Giant and lock orders:
  *
  * 1) Giant must be acquired before any other mutexes.  Stated another way,
  *    no other mutex may be held when Giant is acquired.
  *
  * 2) Giant must be released when blocking on a sleepable lock.
  *
  * This rule is less obvious, but is a result of Giant providing the same
  * semantics as spl().  Basically, when a thread sleeps, it must release
  * Giant.  When a thread blocks on a sleepable lock, it sleeps.  Hence rule
  * 2).
  *
  * 3) Giant may be acquired before or after sleepable locks.
  *
  * This rule is also not quite as obvious.  Giant may be acquired after
  * a sleepable lock because it is a non-sleepable lock and non-sleepable
  * locks may always be acquired while holding a sleepable lock.  The second
  * case, Giant before a sleepable lock, follows from rule 2) above.  Suppose
  * you have two threads T1 and T2 and a sleepable lock X.  Suppose that T1
  * acquires X and blocks on Giant.  Then suppose that T2 acquires Giant and
  * blocks on X.  When T2 blocks on X, T2 will release Giant allowing T1 to
  * execute.  Thus, acquiring Giant both before and after a sleepable lock
  * will not result in a lock order reversal.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_ddb.h"
 #include "opt_hwpmc_hooks.h"
 #include "opt_stack.h"
 #include "opt_witness.h"
 
 #include <sys/param.h>
 #include <sys/bus.h>
 #include <sys/kdb.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/sbuf.h>
 #include <sys/sched.h>
 #include <sys/stack.h>
 #include <sys/sysctl.h>
 #include <sys/syslog.h>
 #include <sys/systm.h>
 
 #ifdef DDB
 #include <ddb/ddb.h>
 #endif
 
 #include <machine/stdarg.h>
 
 #if !defined(DDB) && !defined(STACK)
 #error "DDB or STACK options are required for WITNESS"
 #endif
 
 /* Note that these traces do not work with KTR_ALQ. */
 #if 0
 #define	KTR_WITNESS	KTR_SUBSYS
 #else
 #define	KTR_WITNESS	0
 #endif
 
 #define	LI_RECURSEMASK	0x0000ffff	/* Recursion depth of lock instance. */
 #define	LI_EXCLUSIVE	0x00010000	/* Exclusive lock instance. */
 #define	LI_NORELEASE	0x00020000	/* Lock not allowed to be released. */
 #define	LI_SLEEPABLE	0x00040000	/* Lock may be held while sleeping. */
 
 #ifndef WITNESS_COUNT
 #define	WITNESS_COUNT 		1536
 #endif
 #define	WITNESS_HASH_SIZE	251	/* Prime, gives load factor < 2 */
 #define	WITNESS_PENDLIST	(512 + (MAXCPU * 4))
 
 /* Allocate 256 KB of stack data space */
 #define	WITNESS_LO_DATA_COUNT	2048
 
 /* Prime, gives load factor of ~2 at full load */
 #define	WITNESS_LO_HASH_SIZE	1021
 
 /*
  * XXX: This is somewhat bogus, as we assume here that at most 2048 threads
  * will hold LOCK_NCHILDREN locks.  We handle failure ok, and we should
  * probably be safe for the most part, but it's still a SWAG.
  */
 #define	LOCK_NCHILDREN	5
 #define	LOCK_CHILDCOUNT	2048
 
 #define	MAX_W_NAME	64
 
 #define	FULLGRAPH_SBUF_SIZE	512
 
 /*
  * These flags go in the witness relationship matrix and describe the
  * relationship between any two struct witness objects.
  */
 #define	WITNESS_UNRELATED        0x00    /* No lock order relation. */
 #define	WITNESS_PARENT           0x01    /* Parent, aka direct ancestor. */
 #define	WITNESS_ANCESTOR         0x02    /* Direct or indirect ancestor. */
 #define	WITNESS_CHILD            0x04    /* Child, aka direct descendant. */
 #define	WITNESS_DESCENDANT       0x08    /* Direct or indirect descendant. */
 #define	WITNESS_ANCESTOR_MASK    (WITNESS_PARENT | WITNESS_ANCESTOR)
 #define	WITNESS_DESCENDANT_MASK  (WITNESS_CHILD | WITNESS_DESCENDANT)
 #define	WITNESS_RELATED_MASK						\
 	(WITNESS_ANCESTOR_MASK | WITNESS_DESCENDANT_MASK)
 #define	WITNESS_REVERSAL         0x10    /* A lock order reversal has been
 					  * observed. */
 #define	WITNESS_RESERVED1        0x20    /* Unused flag, reserved. */
 #define	WITNESS_RESERVED2        0x40    /* Unused flag, reserved. */
 #define	WITNESS_LOCK_ORDER_KNOWN 0x80    /* This lock order is known. */
 
 /* Descendant to ancestor flags */
 #define	WITNESS_DTOA(x)	(((x) & WITNESS_RELATED_MASK) >> 2)
 
 /* Ancestor to descendant flags */
 #define	WITNESS_ATOD(x)	(((x) & WITNESS_RELATED_MASK) << 2)
 
 #define	WITNESS_INDEX_ASSERT(i)						\
 	MPASS((i) > 0 && (i) <= w_max_used_index && (i) < witness_count)
 
 static MALLOC_DEFINE(M_WITNESS, "Witness", "Witness");
 
 /*
  * Lock instances.  A lock instance is the data associated with a lock while
  * it is held by witness.  For example, a lock instance will hold the
  * recursion count of a lock.  Lock instances are held in lists.  Spin locks
  * are held in a per-cpu list while sleep locks are held in per-thread list.
  */
 struct lock_instance {
 	struct lock_object	*li_lock;
 	const char		*li_file;
 	int			li_line;
 	u_int			li_flags;
 };
 
 /*
  * A simple list type used to build the list of locks held by a thread
  * or CPU.  We can't simply embed the list in struct lock_object since a
  * lock may be held by more than one thread if it is a shared lock.  Locks
  * are added to the head of the list, so we fill up each list entry from
  * "the back" logically.  To ease some of the arithmetic, we actually fill
  * in each list entry the normal way (children[0] then children[1], etc.) but
  * when we traverse the list we read children[count-1] as the first entry
  * down to children[0] as the final entry.
  */
 struct lock_list_entry {
 	struct lock_list_entry	*ll_next;
 	struct lock_instance	ll_children[LOCK_NCHILDREN];
 	u_int			ll_count;
 };
 
 /*
  * The main witness structure. One of these per named lock type in the system
  * (for example, "vnode interlock").
  */
 struct witness {
 	char  			w_name[MAX_W_NAME];
 	uint32_t 		w_index;  /* Index in the relationship matrix */
 	struct lock_class	*w_class;
 	STAILQ_ENTRY(witness) 	w_list;		/* List of all witnesses. */
 	STAILQ_ENTRY(witness) 	w_typelist;	/* Witnesses of a type. */
 	struct witness		*w_hash_next; /* Linked list in hash buckets. */
 	const char		*w_file; /* File where last acquired */
 	uint32_t 		w_line; /* Line where last acquired */
 	uint32_t 		w_refcount;
 	uint16_t 		w_num_ancestors; /* direct/indirect
 						  * ancestor count */
 	uint16_t 		w_num_descendants; /* direct/indirect
 						    * descendant count */
 	int16_t 		w_ddb_level;
 	unsigned		w_displayed:1;
 	unsigned		w_reversed:1;
 };
 
 STAILQ_HEAD(witness_list, witness);
 
 /*
  * The witness hash table. Keys are witness names (const char *), elements are
  * witness objects (struct witness *).
  */
 struct witness_hash {
 	struct witness	*wh_array[WITNESS_HASH_SIZE];
 	uint32_t	wh_size;
 	uint32_t	wh_count;
 };
 
 /*
  * Key type for the lock order data hash table.
  */
 struct witness_lock_order_key {
 	uint16_t	from;
 	uint16_t	to;
 };
 
 struct witness_lock_order_data {
 	struct stack			wlod_stack;
 	struct witness_lock_order_key	wlod_key;
 	struct witness_lock_order_data	*wlod_next;
 };
 
 /*
  * The witness lock order data hash table. Keys are witness index tuples
  * (struct witness_lock_order_key), elements are lock order data objects
  * (struct witness_lock_order_data). 
  */
 struct witness_lock_order_hash {
 	struct witness_lock_order_data	*wloh_array[WITNESS_LO_HASH_SIZE];
 	u_int	wloh_size;
 	u_int	wloh_count;
 };
 
 struct witness_blessed {
 	const char	*b_lock1;
 	const char	*b_lock2;
 };
 
 struct witness_pendhelp {
 	const char		*wh_type;
 	struct lock_object	*wh_lock;
 };
 
 struct witness_order_list_entry {
 	const char		*w_name;
 	struct lock_class	*w_class;
 };
 
 /*
  * Returns 0 if one of the locks is a spin lock and the other is not.
  * Returns 1 otherwise.
  */
 static __inline int
 witness_lock_type_equal(struct witness *w1, struct witness *w2)
 {
 
 	return ((w1->w_class->lc_flags & (LC_SLEEPLOCK | LC_SPINLOCK)) ==
 		(w2->w_class->lc_flags & (LC_SLEEPLOCK | LC_SPINLOCK)));
 }
 
 static __inline int
 witness_lock_order_key_equal(const struct witness_lock_order_key *a,
     const struct witness_lock_order_key *b)
 {
 
 	return (a->from == b->from && a->to == b->to);
 }
 
 static int	_isitmyx(struct witness *w1, struct witness *w2, int rmask,
 		    const char *fname);
 static void	adopt(struct witness *parent, struct witness *child);
 static int	blessed(struct witness *, struct witness *);
 static void	depart(struct witness *w);
 static struct witness	*enroll(const char *description,
 			    struct lock_class *lock_class);
 static struct lock_instance	*find_instance(struct lock_list_entry *list,
 				    const struct lock_object *lock);
 static int	isitmychild(struct witness *parent, struct witness *child);
 static int	isitmydescendant(struct witness *parent, struct witness *child);
 static void	itismychild(struct witness *parent, struct witness *child);
 static int	sysctl_debug_witness_badstacks(SYSCTL_HANDLER_ARGS);
 static int	sysctl_debug_witness_watch(SYSCTL_HANDLER_ARGS);
 static int	sysctl_debug_witness_fullgraph(SYSCTL_HANDLER_ARGS);
 static int	sysctl_debug_witness_channel(SYSCTL_HANDLER_ARGS);
 static void	witness_add_fullgraph(struct sbuf *sb, struct witness *parent);
 #ifdef DDB
 static void	witness_ddb_compute_levels(void);
 static void	witness_ddb_display(int(*)(const char *fmt, ...));
 static void	witness_ddb_display_descendants(int(*)(const char *fmt, ...),
 		    struct witness *, int indent);
 static void	witness_ddb_display_list(int(*prnt)(const char *fmt, ...),
 		    struct witness_list *list);
 static void	witness_ddb_level_descendants(struct witness *parent, int l);
 static void	witness_ddb_list(struct thread *td);
 #endif
 static void	witness_enter_debugger(const char *msg);
 static void	witness_debugger(int cond, const char *msg);
 static void	witness_free(struct witness *m);
 static struct witness	*witness_get(void);
 static uint32_t	witness_hash_djb2(const uint8_t *key, uint32_t size);
 static struct witness	*witness_hash_get(const char *key);
 static void	witness_hash_put(struct witness *w);
 static void	witness_init_hash_tables(void);
 static void	witness_increment_graph_generation(void);
 static void	witness_lock_list_free(struct lock_list_entry *lle);
 static struct lock_list_entry	*witness_lock_list_get(void);
 static int	witness_lock_order_add(struct witness *parent,
 		    struct witness *child);
 static int	witness_lock_order_check(struct witness *parent,
 		    struct witness *child);
 static struct witness_lock_order_data	*witness_lock_order_get(
 					    struct witness *parent,
 					    struct witness *child);
 static void	witness_list_lock(struct lock_instance *instance,
 		    int (*prnt)(const char *fmt, ...));
 static int	witness_output(const char *fmt, ...) __printflike(1, 2);
 static int	witness_output_drain(void *arg __unused, const char *data,
 		    int len);
 static int	witness_voutput(const char *fmt, va_list ap) __printflike(1, 0);
 static void	witness_setflag(struct lock_object *lock, int flag, int set);
 
 FEATURE(witness, "kernel has witness(9) support");
 
 static SYSCTL_NODE(_debug, OID_AUTO, witness, CTLFLAG_RW | CTLFLAG_MPSAFE, NULL,
     "Witness Locking");
 
 /*
  * If set to 0, lock order checking is disabled.  If set to -1,
  * witness is completely disabled.  Otherwise witness performs full
  * lock order checking for all locks.  At runtime, lock order checking
  * may be toggled.  However, witness cannot be reenabled once it is
  * completely disabled.
  */
 static int witness_watch = 1;
 SYSCTL_PROC(_debug_witness, OID_AUTO, watch,
     CTLFLAG_RWTUN | CTLTYPE_INT | CTLFLAG_MPSAFE, NULL, 0,
     sysctl_debug_witness_watch, "I",
     "witness is watching lock operations");
 
 #ifdef KDB
 /*
  * When KDB is enabled and witness_kdb is 1, it will cause the system
  * to drop into kdebug() when:
  *	- a lock hierarchy violation occurs
  *	- locks are held when going to sleep.
  */
 #ifdef WITNESS_KDB
 int	witness_kdb = 1;
 #else
 int	witness_kdb = 0;
 #endif
 SYSCTL_INT(_debug_witness, OID_AUTO, kdb, CTLFLAG_RWTUN, &witness_kdb, 0, "");
 #endif /* KDB */
 
 #if defined(DDB) || defined(KDB)
 /*
  * When DDB or KDB is enabled and witness_trace is 1, it will cause the system
  * to print a stack trace:
  *	- a lock hierarchy violation occurs
  *	- locks are held when going to sleep.
  */
 int	witness_trace = 1;
 SYSCTL_INT(_debug_witness, OID_AUTO, trace, CTLFLAG_RWTUN, &witness_trace, 0, "");
 #endif /* DDB || KDB */
 
 #ifdef WITNESS_SKIPSPIN
 int	witness_skipspin = 1;
 #else
 int	witness_skipspin = 0;
 #endif
 SYSCTL_INT(_debug_witness, OID_AUTO, skipspin, CTLFLAG_RDTUN, &witness_skipspin, 0, "");
 
 int badstack_sbuf_size;
 
 int witness_count = WITNESS_COUNT;
 SYSCTL_INT(_debug_witness, OID_AUTO, witness_count, CTLFLAG_RDTUN, 
     &witness_count, 0, "");
 
 /*
  * Output channel for witness messages.  By default we print to the console.
  */
 enum witness_channel {
 	WITNESS_CONSOLE,
 	WITNESS_LOG,
 	WITNESS_NONE,
 };
 
 static enum witness_channel witness_channel = WITNESS_CONSOLE;
 SYSCTL_PROC(_debug_witness, OID_AUTO, output_channel,
     CTLTYPE_STRING | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, NULL, 0,
     sysctl_debug_witness_channel, "A",
     "Output channel for warnings");
 
 /*
  * Call this to print out the relations between locks.
  */
 SYSCTL_PROC(_debug_witness, OID_AUTO, fullgraph,
     CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0,
     sysctl_debug_witness_fullgraph, "A",
     "Show locks relation graphs");
 
 /*
  * Call this to print out the witness faulty stacks.
  */
 SYSCTL_PROC(_debug_witness, OID_AUTO, badstacks,
     CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0,
     sysctl_debug_witness_badstacks, "A",
     "Show bad witness stacks");
 
 static struct mtx w_mtx;
 
 /* w_list */
 static struct witness_list w_free = STAILQ_HEAD_INITIALIZER(w_free);
 static struct witness_list w_all = STAILQ_HEAD_INITIALIZER(w_all);
 
 /* w_typelist */
 static struct witness_list w_spin = STAILQ_HEAD_INITIALIZER(w_spin);
 static struct witness_list w_sleep = STAILQ_HEAD_INITIALIZER(w_sleep);
 
 /* lock list */
 static struct lock_list_entry *w_lock_list_free = NULL;
 static struct witness_pendhelp pending_locks[WITNESS_PENDLIST];
 static u_int pending_cnt;
 
 static int w_free_cnt, w_spin_cnt, w_sleep_cnt;
 SYSCTL_INT(_debug_witness, OID_AUTO, free_cnt, CTLFLAG_RD, &w_free_cnt, 0, "");
 SYSCTL_INT(_debug_witness, OID_AUTO, spin_cnt, CTLFLAG_RD, &w_spin_cnt, 0, "");
 SYSCTL_INT(_debug_witness, OID_AUTO, sleep_cnt, CTLFLAG_RD, &w_sleep_cnt, 0,
     "");
 
 static struct witness *w_data;
 static uint8_t **w_rmatrix;
 static struct lock_list_entry w_locklistdata[LOCK_CHILDCOUNT];
 static struct witness_hash w_hash;	/* The witness hash table. */
 
 /* The lock order data hash */
 static struct witness_lock_order_data w_lodata[WITNESS_LO_DATA_COUNT];
 static struct witness_lock_order_data *w_lofree = NULL;
 static struct witness_lock_order_hash w_lohash;
 static int w_max_used_index = 0;
 static unsigned int w_generation = 0;
 static const char w_notrunning[] = "Witness not running\n";
 static const char w_stillcold[] = "Witness is still cold\n";
 #ifdef __i386__
 static const char w_notallowed[] = "The sysctl is disabled on the arch\n";
 #endif
 
 static struct witness_order_list_entry order_lists[] = {
 	/*
 	 * sx locks
 	 */
 	{ "proctree", &lock_class_sx },
 	{ "allproc", &lock_class_sx },
 	{ "allprison", &lock_class_sx },
 	{ NULL, NULL },
 	/*
 	 * Various mutexes
 	 */
 	{ "Giant", &lock_class_mtx_sleep },
 	{ "pipe mutex", &lock_class_mtx_sleep },
 	{ "sigio lock", &lock_class_mtx_sleep },
 	{ "process group", &lock_class_mtx_sleep },
 #ifdef	HWPMC_HOOKS
 	{ "pmc-sleep", &lock_class_mtx_sleep },
 #endif
 	{ "process lock", &lock_class_mtx_sleep },
 	{ "session", &lock_class_mtx_sleep },
 	{ "uidinfo hash", &lock_class_rw },
 	{ "time lock", &lock_class_mtx_sleep },
 	{ NULL, NULL },
 	/*
 	 * umtx
 	 */
 	{ "umtx lock", &lock_class_mtx_sleep },
 	{ NULL, NULL },
 	/*
 	 * Sockets
 	 */
 	{ "accept", &lock_class_mtx_sleep },
 	{ "so_snd", &lock_class_mtx_sleep },
 	{ "so_rcv", &lock_class_mtx_sleep },
 	{ "sellck", &lock_class_mtx_sleep },
 	{ NULL, NULL },
 	/*
 	 * Routing
 	 */
 	{ "so_rcv", &lock_class_mtx_sleep },
 	{ "radix node head", &lock_class_rm },
 	{ "ifaddr", &lock_class_mtx_sleep },
 	{ NULL, NULL },
 	/*
 	 * IPv4 multicast:
 	 * protocol locks before interface locks, after UDP locks.
 	 */
 	{ "in_multi_sx", &lock_class_sx },
 	{ "udpinp", &lock_class_rw },
 	{ "in_multi_list_mtx", &lock_class_mtx_sleep },
 	{ "igmp_mtx", &lock_class_mtx_sleep },
 	{ "if_addr_lock", &lock_class_mtx_sleep },
 	{ NULL, NULL },
 	/*
 	 * IPv6 multicast:
 	 * protocol locks before interface locks, after UDP locks.
 	 */
 	{ "in6_multi_sx", &lock_class_sx },
 	{ "udpinp", &lock_class_rw },
 	{ "in6_multi_list_mtx", &lock_class_mtx_sleep },
 	{ "mld_mtx", &lock_class_mtx_sleep },
 	{ "if_addr_lock", &lock_class_mtx_sleep },
 	{ NULL, NULL },
 	/*
 	 * UNIX Domain Sockets
 	 */
 	{ "unp_link_rwlock", &lock_class_rw },
 	{ "unp_list_lock", &lock_class_mtx_sleep },
 	{ "unp", &lock_class_mtx_sleep },
 	{ "so_snd", &lock_class_mtx_sleep },
 	{ NULL, NULL },
 	/*
 	 * UDP/IP
 	 */
 	{ "udpinp", &lock_class_rw },
 	{ "udp", &lock_class_mtx_sleep },
 	{ "so_snd", &lock_class_mtx_sleep },
 	{ NULL, NULL },
 	/*
 	 * TCP/IP
 	 */
 	{ "tcpinp", &lock_class_rw },
 	{ "tcp", &lock_class_mtx_sleep },
 	{ "so_snd", &lock_class_mtx_sleep },
 	{ NULL, NULL },
 	/*
 	 * BPF
 	 */
 	{ "bpf global lock", &lock_class_sx },
 	{ "bpf cdev lock", &lock_class_mtx_sleep },
 	{ NULL, NULL },
 	/*
 	 * NFS server
 	 */
 	{ "nfsd_mtx", &lock_class_mtx_sleep },
 	{ "so_snd", &lock_class_mtx_sleep },
 	{ NULL, NULL },
 
 	/*
 	 * IEEE 802.11
 	 */
 	{ "802.11 com lock", &lock_class_mtx_sleep},
 	{ NULL, NULL },
 	/*
 	 * Network drivers
 	 */
 	{ "network driver", &lock_class_mtx_sleep},
 	{ NULL, NULL },
 
 	/*
 	 * Netgraph
 	 */
 	{ "ng_node", &lock_class_mtx_sleep },
 	{ "ng_worklist", &lock_class_mtx_sleep },
 	{ NULL, NULL },
 	/*
 	 * CDEV
 	 */
 	{ "vm map (system)", &lock_class_mtx_sleep },
 	{ "vnode interlock", &lock_class_mtx_sleep },
 	{ "cdev", &lock_class_mtx_sleep },
 	{ "devthrd", &lock_class_mtx_sleep },
 	{ NULL, NULL },
 	/*
 	 * VM
 	 */
 	{ "vm map (user)", &lock_class_sx },
 	{ "vm object", &lock_class_rw },
 	{ "vm page", &lock_class_mtx_sleep },
 	{ "pmap pv global", &lock_class_rw },
 	{ "pmap", &lock_class_mtx_sleep },
 	{ "pmap pv list", &lock_class_rw },
 	{ "vm page free queue", &lock_class_mtx_sleep },
 	{ "vm pagequeue", &lock_class_mtx_sleep },
 	{ NULL, NULL },
 	/*
 	 * kqueue/VFS interaction
 	 */
 	{ "kqueue", &lock_class_mtx_sleep },
 	{ "struct mount mtx", &lock_class_mtx_sleep },
 	{ "vnode interlock", &lock_class_mtx_sleep },
 	{ NULL, NULL },
 	/*
 	 * VFS namecache
 	 */
 	{ "ncvn", &lock_class_mtx_sleep },
 	{ "ncbuc", &lock_class_mtx_sleep },
 	{ "vnode interlock", &lock_class_mtx_sleep },
 	{ "ncneg", &lock_class_mtx_sleep },
 	{ NULL, NULL },
 	/*
 	 * ZFS locking
 	 */
 	{ "dn->dn_mtx", &lock_class_sx },
 	{ "dr->dt.di.dr_mtx", &lock_class_sx },
 	{ "db->db_mtx", &lock_class_sx },
 	{ NULL, NULL },
 	/*
 	 * TCP log locks
 	 */
 	{ "TCP ID tree", &lock_class_rw },
 	{ "tcp log id bucket", &lock_class_mtx_sleep },
 	{ "tcpinp", &lock_class_rw },
 	{ "TCP log expireq", &lock_class_mtx_sleep },
 	{ NULL, NULL },
 	/*
 	 * spin locks
 	 */
 #ifdef SMP
 	{ "ap boot", &lock_class_mtx_spin },
 #endif
 	{ "rm.mutex_mtx", &lock_class_mtx_spin },
 #ifdef __i386__
 	{ "cy", &lock_class_mtx_spin },
 #endif
 	{ "scc_hwmtx", &lock_class_mtx_spin },
 	{ "uart_hwmtx", &lock_class_mtx_spin },
 	{ "fast_taskqueue", &lock_class_mtx_spin },
 	{ "intr table", &lock_class_mtx_spin },
 	{ "process slock", &lock_class_mtx_spin },
 	{ "syscons video lock", &lock_class_mtx_spin },
 	{ "sleepq chain", &lock_class_mtx_spin },
 	{ "rm_spinlock", &lock_class_mtx_spin },
 	{ "turnstile chain", &lock_class_mtx_spin },
 	{ "turnstile lock", &lock_class_mtx_spin },
 	{ "sched lock", &lock_class_mtx_spin },
 	{ "td_contested", &lock_class_mtx_spin },
 	{ "callout", &lock_class_mtx_spin },
 	{ "entropy harvest mutex", &lock_class_mtx_spin },
 #ifdef SMP
 	{ "smp rendezvous", &lock_class_mtx_spin },
 #endif
 #ifdef __powerpc__
 	{ "tlb0", &lock_class_mtx_spin },
 #endif
 	{ NULL, NULL },
 	{ "sched lock", &lock_class_mtx_spin },
 #ifdef	HWPMC_HOOKS
 	{ "pmc-per-proc", &lock_class_mtx_spin },
 #endif
 	{ NULL, NULL },
 	/*
 	 * leaf locks
 	 */
 	{ "intrcnt", &lock_class_mtx_spin },
 	{ "icu", &lock_class_mtx_spin },
 #ifdef __i386__
 	{ "allpmaps", &lock_class_mtx_spin },
 	{ "descriptor tables", &lock_class_mtx_spin },
 #endif
 	{ "clk", &lock_class_mtx_spin },
 	{ "cpuset", &lock_class_mtx_spin },
 	{ "mprof lock", &lock_class_mtx_spin },
 	{ "zombie lock", &lock_class_mtx_spin },
 	{ "ALD Queue", &lock_class_mtx_spin },
 #if defined(__i386__) || defined(__amd64__)
 	{ "pcicfg", &lock_class_mtx_spin },
 	{ "NDIS thread lock", &lock_class_mtx_spin },
 #endif
 	{ "tw_osl_io_lock", &lock_class_mtx_spin },
 	{ "tw_osl_q_lock", &lock_class_mtx_spin },
 	{ "tw_cl_io_lock", &lock_class_mtx_spin },
 	{ "tw_cl_intr_lock", &lock_class_mtx_spin },
 	{ "tw_cl_gen_lock", &lock_class_mtx_spin },
 #ifdef	HWPMC_HOOKS
 	{ "pmc-leaf", &lock_class_mtx_spin },
 #endif
 	{ "blocked lock", &lock_class_mtx_spin },
 	{ NULL, NULL },
 	{ NULL, NULL }
 };
 
 /*
  * Pairs of locks which have been blessed.  Witness does not complain about
  * order problems with blessed lock pairs.  Please do not add an entry to the
  * table without an explanatory comment.
  */
 static struct witness_blessed blessed_list[] = {
 	/*
 	 * See the comment in ufs_dirhash.c.  Basically, a vnode lock serializes
 	 * both lock orders, so a deadlock cannot happen as a result of this
 	 * LOR.
 	 */
 	{ "dirhash",	"bufwait" },
 
 	/*
 	 * A UFS vnode may be locked in vget() while a buffer belonging to the
 	 * parent directory vnode is locked.
 	 */
 	{ "ufs",	"bufwait" },
 };
 
 /*
  * This global is set to 0 once it becomes safe to use the witness code.
  */
 static int witness_cold = 1;
 
 /*
  * This global is set to 1 once the static lock orders have been enrolled
  * so that a warning can be issued for any spin locks enrolled later.
  */
 static int witness_spin_warn = 0;
 
 /* Trim useless garbage from filenames. */
 static const char *
 fixup_filename(const char *file)
 {
 
 	if (file == NULL)
 		return (NULL);
 	while (strncmp(file, "../", 3) == 0)
 		file += 3;
 	return (file);
 }
 
 /*
  * Calculate the size of early witness structures.
  */
 int
 witness_startup_count(void)
 {
 	int sz;
 
 	sz = sizeof(struct witness) * witness_count;
 	sz += sizeof(*w_rmatrix) * (witness_count + 1);
 	sz += sizeof(*w_rmatrix[0]) * (witness_count + 1) *
 	    (witness_count + 1);
 
 	return (sz);
 }
 
 /*
  * The WITNESS-enabled diagnostic code.  Note that the witness code does
  * assume that the early boot is single-threaded at least until after this
  * routine is completed.
  */
 void
 witness_startup(void *mem)
 {
 	struct lock_object *lock;
 	struct witness_order_list_entry *order;
 	struct witness *w, *w1;
 	uintptr_t p;
 	int i;
 
 	p = (uintptr_t)mem;
 	w_data = (void *)p;
 	p += sizeof(struct witness) * witness_count;
 
 	w_rmatrix = (void *)p;
 	p += sizeof(*w_rmatrix) * (witness_count + 1);
 
 	for (i = 0; i < witness_count + 1; i++) {
 		w_rmatrix[i] = (void *)p;
 		p += sizeof(*w_rmatrix[i]) * (witness_count + 1);
 	}
 	badstack_sbuf_size = witness_count * 256;
 
 	/*
 	 * We have to release Giant before initializing its witness
 	 * structure so that WITNESS doesn't get confused.
 	 */
 	mtx_unlock(&Giant);
 	mtx_assert(&Giant, MA_NOTOWNED);
 
 	CTR1(KTR_WITNESS, "%s: initializing witness", __func__);
 	mtx_init(&w_mtx, "witness lock", NULL, MTX_SPIN | MTX_QUIET |
 	    MTX_NOWITNESS | MTX_NOPROFILE);
 	for (i = witness_count - 1; i >= 0; i--) {
 		w = &w_data[i];
 		memset(w, 0, sizeof(*w));
 		w_data[i].w_index = i;	/* Witness index never changes. */
 		witness_free(w);
 	}
 	KASSERT(STAILQ_FIRST(&w_free)->w_index == 0,
 	    ("%s: Invalid list of free witness objects", __func__));
 
 	/* Witness with index 0 is not used to aid in debugging. */
 	STAILQ_REMOVE_HEAD(&w_free, w_list);
 	w_free_cnt--;
 
 	for (i = 0; i < witness_count; i++) {
 		memset(w_rmatrix[i], 0, sizeof(*w_rmatrix[i]) * 
 		    (witness_count + 1));
 	}
 
 	for (i = 0; i < LOCK_CHILDCOUNT; i++)
 		witness_lock_list_free(&w_locklistdata[i]);
 	witness_init_hash_tables();
 
 	/* First add in all the specified order lists. */
 	for (order = order_lists; order->w_name != NULL; order++) {
 		w = enroll(order->w_name, order->w_class);
 		if (w == NULL)
 			continue;
 		w->w_file = "order list";
 		for (order++; order->w_name != NULL; order++) {
 			w1 = enroll(order->w_name, order->w_class);
 			if (w1 == NULL)
 				continue;
 			w1->w_file = "order list";
 			itismychild(w, w1);
 			w = w1;
 		}
 	}
 	witness_spin_warn = 1;
 
 	/* Iterate through all locks and add them to witness. */
 	for (i = 0; pending_locks[i].wh_lock != NULL; i++) {
 		lock = pending_locks[i].wh_lock;
 		KASSERT(lock->lo_flags & LO_WITNESS,
 		    ("%s: lock %s is on pending list but not LO_WITNESS",
 		    __func__, lock->lo_name));
 		lock->lo_witness = enroll(pending_locks[i].wh_type,
 		    LOCK_CLASS(lock));
 	}
 
 	/* Mark the witness code as being ready for use. */
 	witness_cold = 0;
 
 	mtx_lock(&Giant);
 }
 
 void
 witness_init(struct lock_object *lock, const char *type)
 {
 	struct lock_class *class;
 
 	/* Various sanity checks. */
 	class = LOCK_CLASS(lock);
 	if ((lock->lo_flags & LO_RECURSABLE) != 0 &&
 	    (class->lc_flags & LC_RECURSABLE) == 0)
 		kassert_panic("%s: lock (%s) %s can not be recursable",
 		    __func__, class->lc_name, lock->lo_name);
 	if ((lock->lo_flags & LO_SLEEPABLE) != 0 &&
 	    (class->lc_flags & LC_SLEEPABLE) == 0)
 		kassert_panic("%s: lock (%s) %s can not be sleepable",
 		    __func__, class->lc_name, lock->lo_name);
 	if ((lock->lo_flags & LO_UPGRADABLE) != 0 &&
 	    (class->lc_flags & LC_UPGRADABLE) == 0)
 		kassert_panic("%s: lock (%s) %s can not be upgradable",
 		    __func__, class->lc_name, lock->lo_name);
 
 	/*
 	 * If we shouldn't watch this lock, then just clear lo_witness.
 	 * Otherwise, if witness_cold is set, then it is too early to
 	 * enroll this lock, so defer it to witness_initialize() by adding
 	 * it to the pending_locks list.  If it is not too early, then enroll
 	 * the lock now.
 	 */
 	if (witness_watch < 1 || KERNEL_PANICKED() ||
 	    (lock->lo_flags & LO_WITNESS) == 0)
 		lock->lo_witness = NULL;
 	else if (witness_cold) {
 		pending_locks[pending_cnt].wh_lock = lock;
 		pending_locks[pending_cnt++].wh_type = type;
 		if (pending_cnt > WITNESS_PENDLIST)
 			panic("%s: pending locks list is too small, "
 			    "increase WITNESS_PENDLIST\n",
 			    __func__);
 	} else
 		lock->lo_witness = enroll(type, class);
 }
 
 void
 witness_destroy(struct lock_object *lock)
 {
 	struct lock_class *class;
 	struct witness *w;
 
 	class = LOCK_CLASS(lock);
 
 	if (witness_cold)
 		panic("lock (%s) %s destroyed while witness_cold",
 		    class->lc_name, lock->lo_name);
 
 	/* XXX: need to verify that no one holds the lock */
 	if ((lock->lo_flags & LO_WITNESS) == 0 || lock->lo_witness == NULL)
 		return;
 	w = lock->lo_witness;
 
 	mtx_lock_spin(&w_mtx);
 	MPASS(w->w_refcount > 0);
 	w->w_refcount--;
 
 	if (w->w_refcount == 0)
 		depart(w);
 	mtx_unlock_spin(&w_mtx);
 }
 
 #ifdef DDB
 static void
 witness_ddb_compute_levels(void)
 {
 	struct witness *w;
 
 	/*
 	 * First clear all levels.
 	 */
 	STAILQ_FOREACH(w, &w_all, w_list)
 		w->w_ddb_level = -1;
 
 	/*
 	 * Look for locks with no parents and level all their descendants.
 	 */
 	STAILQ_FOREACH(w, &w_all, w_list) {
 		/* If the witness has ancestors (is not a root), skip it. */
 		if (w->w_num_ancestors > 0)
 			continue;
 		witness_ddb_level_descendants(w, 0);
 	}
 }
 
 static void
 witness_ddb_level_descendants(struct witness *w, int l)
 {
 	int i;
 
 	if (w->w_ddb_level >= l)
 		return;
 
 	w->w_ddb_level = l;
 	l++;
 
 	for (i = 1; i <= w_max_used_index; i++) {
 		if (w_rmatrix[w->w_index][i] & WITNESS_PARENT)
 			witness_ddb_level_descendants(&w_data[i], l);
 	}
 }
 
 static void
 witness_ddb_display_descendants(int(*prnt)(const char *fmt, ...),
     struct witness *w, int indent)
 {
 	int i;
 
  	for (i = 0; i < indent; i++)
  		prnt(" ");
 	prnt("%s (type: %s, depth: %d, active refs: %d)",
 	     w->w_name, w->w_class->lc_name,
 	     w->w_ddb_level, w->w_refcount);
  	if (w->w_displayed) {
  		prnt(" -- (already displayed)\n");
  		return;
  	}
  	w->w_displayed = 1;
 	if (w->w_file != NULL && w->w_line != 0)
 		prnt(" -- last acquired @ %s:%d\n", fixup_filename(w->w_file),
 		    w->w_line);
 	else
 		prnt(" -- never acquired\n");
 	indent++;
 	WITNESS_INDEX_ASSERT(w->w_index);
 	for (i = 1; i <= w_max_used_index; i++) {
 		if (db_pager_quit)
 			return;
 		if (w_rmatrix[w->w_index][i] & WITNESS_PARENT)
 			witness_ddb_display_descendants(prnt, &w_data[i],
 			    indent);
 	}
 }
 
 static void
 witness_ddb_display_list(int(*prnt)(const char *fmt, ...),
     struct witness_list *list)
 {
 	struct witness *w;
 
 	STAILQ_FOREACH(w, list, w_typelist) {
 		if (w->w_file == NULL || w->w_ddb_level > 0)
 			continue;
 
 		/* This lock has no anscestors - display its descendants. */
 		witness_ddb_display_descendants(prnt, w, 0);
 		if (db_pager_quit)
 			return;
 	}
 }
 
 static void
 witness_ddb_display(int(*prnt)(const char *fmt, ...))
 {
 	struct witness *w;
 
 	KASSERT(witness_cold == 0, ("%s: witness_cold", __func__));
 	witness_ddb_compute_levels();
 
 	/* Clear all the displayed flags. */
 	STAILQ_FOREACH(w, &w_all, w_list)
 		w->w_displayed = 0;
 
 	/*
 	 * First, handle sleep locks which have been acquired at least
 	 * once.
 	 */
 	prnt("Sleep locks:\n");
 	witness_ddb_display_list(prnt, &w_sleep);
 	if (db_pager_quit)
 		return;
 
 	/*
 	 * Now do spin locks which have been acquired at least once.
 	 */
 	prnt("\nSpin locks:\n");
 	witness_ddb_display_list(prnt, &w_spin);
 	if (db_pager_quit)
 		return;
 
 	/*
 	 * Finally, any locks which have not been acquired yet.
 	 */
 	prnt("\nLocks which were never acquired:\n");
 	STAILQ_FOREACH(w, &w_all, w_list) {
 		if (w->w_file != NULL || w->w_refcount == 0)
 			continue;
 		prnt("%s (type: %s, depth: %d)\n", w->w_name,
 		    w->w_class->lc_name, w->w_ddb_level);
 		if (db_pager_quit)
 			return;
 	}
 }
 #endif /* DDB */
 
 int
 witness_defineorder(struct lock_object *lock1, struct lock_object *lock2)
 {
 
 	if (witness_watch == -1 || KERNEL_PANICKED())
 		return (0);
 
 	/* Require locks that witness knows about. */
 	if (lock1 == NULL || lock1->lo_witness == NULL || lock2 == NULL ||
 	    lock2->lo_witness == NULL)
 		return (EINVAL);
 
 	mtx_assert(&w_mtx, MA_NOTOWNED);
 	mtx_lock_spin(&w_mtx);
 
 	/*
 	 * If we already have either an explicit or implied lock order that
 	 * is the other way around, then return an error.
 	 */
 	if (witness_watch &&
 	    isitmydescendant(lock2->lo_witness, lock1->lo_witness)) {
 		mtx_unlock_spin(&w_mtx);
 		return (EDOOFUS);
 	}
 
 	/* Try to add the new order. */
 	CTR3(KTR_WITNESS, "%s: adding %s as a child of %s", __func__,
 	    lock2->lo_witness->w_name, lock1->lo_witness->w_name);
 	itismychild(lock1->lo_witness, lock2->lo_witness);
 	mtx_unlock_spin(&w_mtx);
 	return (0);
 }
 
 void
 witness_checkorder(struct lock_object *lock, int flags, const char *file,
     int line, struct lock_object *interlock)
 {
 	struct lock_list_entry *lock_list, *lle;
 	struct lock_instance *lock1, *lock2, *plock;
 	struct lock_class *class, *iclass;
 	struct witness *w, *w1;
 	struct thread *td;
 	int i, j;
 
 	if (witness_cold || witness_watch < 1 || lock->lo_witness == NULL ||
 	    KERNEL_PANICKED())
 		return;
 
 	w = lock->lo_witness;
 	class = LOCK_CLASS(lock);
 	td = curthread;
 
 	if (class->lc_flags & LC_SLEEPLOCK) {
 		/*
 		 * Since spin locks include a critical section, this check
 		 * implicitly enforces a lock order of all sleep locks before
 		 * all spin locks.
 		 */
 		if (td->td_critnest != 0 && !kdb_active)
 			kassert_panic("acquiring blockable sleep lock with "
 			    "spinlock or critical section held (%s) %s @ %s:%d",
 			    class->lc_name, lock->lo_name,
 			    fixup_filename(file), line);
 
 		/*
 		 * If this is the first lock acquired then just return as
 		 * no order checking is needed.
 		 */
 		lock_list = td->td_sleeplocks;
 		if (lock_list == NULL || lock_list->ll_count == 0)
 			return;
 	} else {
 		/*
 		 * If this is the first lock, just return as no order
 		 * checking is needed.  Avoid problems with thread
 		 * migration pinning the thread while checking if
 		 * spinlocks are held.  If at least one spinlock is held
 		 * the thread is in a safe path and it is allowed to
 		 * unpin it.
 		 */
 		sched_pin();
 		lock_list = PCPU_GET(spinlocks);
 		if (lock_list == NULL || lock_list->ll_count == 0) {
 			sched_unpin();
 			return;
 		}
 		sched_unpin();
 	}
 
 	/*
 	 * Check to see if we are recursing on a lock we already own.  If
 	 * so, make sure that we don't mismatch exclusive and shared lock
 	 * acquires.
 	 */
 	lock1 = find_instance(lock_list, lock);
 	if (lock1 != NULL) {
 		if ((lock1->li_flags & LI_EXCLUSIVE) != 0 &&
 		    (flags & LOP_EXCLUSIVE) == 0) {
 			witness_output("shared lock of (%s) %s @ %s:%d\n",
 			    class->lc_name, lock->lo_name,
 			    fixup_filename(file), line);
 			witness_output("while exclusively locked from %s:%d\n",
 			    fixup_filename(lock1->li_file), lock1->li_line);
 			kassert_panic("excl->share");
 		}
 		if ((lock1->li_flags & LI_EXCLUSIVE) == 0 &&
 		    (flags & LOP_EXCLUSIVE) != 0) {
 			witness_output("exclusive lock of (%s) %s @ %s:%d\n",
 			    class->lc_name, lock->lo_name,
 			    fixup_filename(file), line);
 			witness_output("while share locked from %s:%d\n",
 			    fixup_filename(lock1->li_file), lock1->li_line);
 			kassert_panic("share->excl");
 		}
 		return;
 	}
 
 	/* Warn if the interlock is not locked exactly once. */
 	if (interlock != NULL) {
 		iclass = LOCK_CLASS(interlock);
 		lock1 = find_instance(lock_list, interlock);
 		if (lock1 == NULL)
 			kassert_panic("interlock (%s) %s not locked @ %s:%d",
 			    iclass->lc_name, interlock->lo_name,
 			    fixup_filename(file), line);
 		else if ((lock1->li_flags & LI_RECURSEMASK) != 0)
 			kassert_panic("interlock (%s) %s recursed @ %s:%d",
 			    iclass->lc_name, interlock->lo_name,
 			    fixup_filename(file), line);
 	}
 
 	/*
 	 * Find the previously acquired lock, but ignore interlocks.
 	 */
 	plock = &lock_list->ll_children[lock_list->ll_count - 1];
 	if (interlock != NULL && plock->li_lock == interlock) {
 		if (lock_list->ll_count > 1)
 			plock =
 			    &lock_list->ll_children[lock_list->ll_count - 2];
 		else {
 			lle = lock_list->ll_next;
 
 			/*
 			 * The interlock is the only lock we hold, so
 			 * simply return.
 			 */
 			if (lle == NULL)
 				return;
 			plock = &lle->ll_children[lle->ll_count - 1];
 		}
 	}
 
 	/*
 	 * Try to perform most checks without a lock.  If this succeeds we
 	 * can skip acquiring the lock and return success.  Otherwise we redo
 	 * the check with the lock held to handle races with concurrent updates.
 	 */
 	w1 = plock->li_lock->lo_witness;
 	if (witness_lock_order_check(w1, w))
 		return;
 
 	mtx_lock_spin(&w_mtx);
 	if (witness_lock_order_check(w1, w)) {
 		mtx_unlock_spin(&w_mtx);
 		return;
 	}
 	witness_lock_order_add(w1, w);
 
 	/*
 	 * Check for duplicate locks of the same type.  Note that we only
 	 * have to check for this on the last lock we just acquired.  Any
 	 * other cases will be caught as lock order violations.
 	 */
 	if (w1 == w) {
 		i = w->w_index;
 		if (!(lock->lo_flags & LO_DUPOK) && !(flags & LOP_DUPOK) &&
 		    !(w_rmatrix[i][i] & WITNESS_REVERSAL)) {
 		    w_rmatrix[i][i] |= WITNESS_REVERSAL;
 			w->w_reversed = 1;
 			mtx_unlock_spin(&w_mtx);
 			witness_output(
 			    "acquiring duplicate lock of same type: \"%s\"\n", 
 			    w->w_name);
 			witness_output(" 1st %s @ %s:%d\n", plock->li_lock->lo_name,
 			    fixup_filename(plock->li_file), plock->li_line);
 			witness_output(" 2nd %s @ %s:%d\n", lock->lo_name,
 			    fixup_filename(file), line);
 			witness_debugger(1, __func__);
 		} else
 			mtx_unlock_spin(&w_mtx);
 		return;
 	}
 	mtx_assert(&w_mtx, MA_OWNED);
 
 	/*
 	 * If we know that the lock we are acquiring comes after
 	 * the lock we most recently acquired in the lock order tree,
 	 * then there is no need for any further checks.
 	 */
 	if (isitmychild(w1, w))
 		goto out;
 
 	for (j = 0, lle = lock_list; lle != NULL; lle = lle->ll_next) {
 		for (i = lle->ll_count - 1; i >= 0; i--, j++) {
 			struct stack pstack;
 			bool pstackv, trace;
 
 			MPASS(j < LOCK_CHILDCOUNT * LOCK_NCHILDREN);
 			lock1 = &lle->ll_children[i];
 
 			/*
 			 * Ignore the interlock.
 			 */
 			if (interlock == lock1->li_lock)
 				continue;
 
 			/*
 			 * If this lock doesn't undergo witness checking,
 			 * then skip it.
 			 */
 			w1 = lock1->li_lock->lo_witness;
 			if (w1 == NULL) {
 				KASSERT((lock1->li_lock->lo_flags & LO_WITNESS) == 0,
 				    ("lock missing witness structure"));
 				continue;
 			}
 
 			/*
 			 * If we are locking Giant and this is a sleepable
 			 * lock, then skip it.
 			 */
 			if ((lock1->li_flags & LI_SLEEPABLE) != 0 &&
 			    lock == &Giant.lock_object)
 				continue;
 
 			/*
 			 * If we are locking a sleepable lock and this lock
 			 * is Giant, then skip it.
 			 */
 			if ((lock->lo_flags & LO_SLEEPABLE) != 0 &&
 			    (flags & LOP_NOSLEEP) == 0 &&
 			    lock1->li_lock == &Giant.lock_object)
 				continue;
 
 			/*
 			 * If we are locking a sleepable lock and this lock
 			 * isn't sleepable, we want to treat it as a lock
 			 * order violation to enfore a general lock order of
 			 * sleepable locks before non-sleepable locks.
 			 */
 			if ((lock->lo_flags & LO_SLEEPABLE) != 0 &&
 			    (flags & LOP_NOSLEEP) == 0 &&
 			    (lock1->li_flags & LI_SLEEPABLE) == 0)
 				goto reversal;
 
 			/*
 			 * If we are locking Giant and this is a non-sleepable
 			 * lock, then treat it as a reversal.
 			 */
 			if ((lock1->li_flags & LI_SLEEPABLE) == 0 &&
 			    lock == &Giant.lock_object)
 				goto reversal;
 
 			/*
 			 * Check the lock order hierarchy for a reveresal.
 			 */
 			if (!isitmydescendant(w, w1))
 				continue;
 		reversal:
 
 			/*
 			 * We have a lock order violation, check to see if it
 			 * is allowed or has already been yelled about.
 			 */
 
 			/* Bail if this violation is known */
 			if (w_rmatrix[w1->w_index][w->w_index] & WITNESS_REVERSAL)
 				goto out;
 
 			/* Record this as a violation */
 			w_rmatrix[w1->w_index][w->w_index] |= WITNESS_REVERSAL;
 			w_rmatrix[w->w_index][w1->w_index] |= WITNESS_REVERSAL;
 			w->w_reversed = w1->w_reversed = 1;
 			witness_increment_graph_generation();
 
 			/*
 			 * If the lock order is blessed, bail before logging
 			 * anything.  We don't look for other lock order
 			 * violations though, which may be a bug.
 			 */
 			if (blessed(w, w1))
 				goto out;
 
 			trace = atomic_load_int(&witness_trace);
 			if (trace) {
 				struct witness_lock_order_data *data;
 
 				pstackv = false;
 				data = witness_lock_order_get(w, w1);
 				if (data != NULL) {
 					stack_copy(&data->wlod_stack,
 					    &pstack);
 					pstackv = true;
 				}
 			}
 			mtx_unlock_spin(&w_mtx);
 
 #ifdef WITNESS_NO_VNODE
 			/*
 			 * There are known LORs between VNODE locks. They are
 			 * not an indication of a bug. VNODE locks are flagged
 			 * as such (LO_IS_VNODE) and we don't yell if the LOR
 			 * is between 2 VNODE locks.
 			 */
 			if ((lock->lo_flags & LO_IS_VNODE) != 0 &&
 			    (lock1->li_lock->lo_flags & LO_IS_VNODE) != 0)
 				return;
 #endif
 
 			/*
 			 * Ok, yell about it.
 			 */
 			if ((lock->lo_flags & LO_SLEEPABLE) != 0 &&
 			    (flags & LOP_NOSLEEP) == 0 &&
 			    (lock1->li_flags & LI_SLEEPABLE) == 0)
 				witness_output(
 		"lock order reversal: (sleepable after non-sleepable)\n");
 			else if ((lock1->li_flags & LI_SLEEPABLE) == 0
 			    && lock == &Giant.lock_object)
 				witness_output(
 		"lock order reversal: (Giant after non-sleepable)\n");
 			else
 				witness_output("lock order reversal:\n");
 
 			/*
 			 * Try to locate an earlier lock with
 			 * witness w in our list.
 			 */
 			do {
 				lock2 = &lle->ll_children[i];
 				MPASS(lock2->li_lock != NULL);
 				if (lock2->li_lock->lo_witness == w)
 					break;
 				if (i == 0 && lle->ll_next != NULL) {
 					lle = lle->ll_next;
 					i = lle->ll_count - 1;
 					MPASS(i >= 0 && i < LOCK_NCHILDREN);
 				} else
 					i--;
 			} while (i >= 0);
 			if (i < 0) {
 				witness_output(" 1st %p %s (%s, %s) @ %s:%d\n",
 				    lock1->li_lock, lock1->li_lock->lo_name,
 				    w1->w_name, w1->w_class->lc_name,
 				    fixup_filename(lock1->li_file),
 				    lock1->li_line);
 				witness_output(" 2nd %p %s (%s, %s) @ %s:%d\n",
 				    lock, lock->lo_name, w->w_name,
 				    w->w_class->lc_name, fixup_filename(file),
 				    line);
 			} else {
 				struct witness *w2 = lock2->li_lock->lo_witness;
 
 				witness_output(" 1st %p %s (%s, %s) @ %s:%d\n",
 				    lock2->li_lock, lock2->li_lock->lo_name,
 				    w2->w_name, w2->w_class->lc_name,
 				    fixup_filename(lock2->li_file),
 				    lock2->li_line);
 				witness_output(" 2nd %p %s (%s, %s) @ %s:%d\n",
 				    lock1->li_lock, lock1->li_lock->lo_name,
 				    w1->w_name, w1->w_class->lc_name,
 				    fixup_filename(lock1->li_file),
 				    lock1->li_line);
 				witness_output(" 3rd %p %s (%s, %s) @ %s:%d\n", lock,
 				    lock->lo_name, w->w_name,
 				    w->w_class->lc_name, fixup_filename(file),
 				    line);
 			}
 			if (trace) {
 				char buf[64];
 				struct sbuf sb;
 
 				sbuf_new(&sb, buf, sizeof(buf), SBUF_FIXEDLEN);
 				sbuf_set_drain(&sb, witness_output_drain,
 				    NULL);
 
 				if (pstackv) {
 					sbuf_printf(&sb,
 				    "lock order %s -> %s established at:\n",
 					    w->w_name, w1->w_name);
 					stack_sbuf_print_flags(&sb, &pstack,
 					    M_NOWAIT, STACK_SBUF_FMT_LONG);
 				}
 
 				sbuf_printf(&sb,
 				    "lock order %s -> %s attempted at:\n",
 				    w1->w_name, w->w_name);
 				stack_save(&pstack);
 				stack_sbuf_print_flags(&sb, &pstack, M_NOWAIT,
 				    STACK_SBUF_FMT_LONG);
 
 				sbuf_finish(&sb);
 				sbuf_delete(&sb);
 			}
 			witness_enter_debugger(__func__);
 			return;
 		}
 	}
 
 	/*
 	 * If requested, build a new lock order.  However, don't build a new
 	 * relationship between a sleepable lock and Giant if it is in the
 	 * wrong direction.  The correct lock order is that sleepable locks
 	 * always come before Giant.
 	 */
 	if (flags & LOP_NEWORDER &&
 	    !(plock->li_lock == &Giant.lock_object &&
 	    (lock->lo_flags & LO_SLEEPABLE) != 0 &&
 	    (flags & LOP_NOSLEEP) == 0)) {
 		CTR3(KTR_WITNESS, "%s: adding %s as a child of %s", __func__,
 		    w->w_name, plock->li_lock->lo_witness->w_name);
 		itismychild(plock->li_lock->lo_witness, w);
 	}
 out:
 	mtx_unlock_spin(&w_mtx);
 }
 
 void
 witness_lock(struct lock_object *lock, int flags, const char *file, int line)
 {
 	struct lock_list_entry **lock_list, *lle;
 	struct lock_instance *instance;
 	struct witness *w;
 	struct thread *td;
 
 	if (witness_cold || witness_watch == -1 || lock->lo_witness == NULL ||
 	    KERNEL_PANICKED())
 		return;
 	w = lock->lo_witness;
 	td = curthread;
 
 	/* Determine lock list for this lock. */
 	if (LOCK_CLASS(lock)->lc_flags & LC_SLEEPLOCK)
 		lock_list = &td->td_sleeplocks;
 	else
 		lock_list = PCPU_PTR(spinlocks);
 
 	/* Check to see if we are recursing on a lock we already own. */
 	instance = find_instance(*lock_list, lock);
 	if (instance != NULL) {
 		instance->li_flags++;
 		CTR4(KTR_WITNESS, "%s: pid %d recursed on %s r=%d", __func__,
 		    td->td_proc->p_pid, lock->lo_name,
 		    instance->li_flags & LI_RECURSEMASK);
 		instance->li_file = file;
 		instance->li_line = line;
 		return;
 	}
 
 	/* Update per-witness last file and line acquire. */
 	w->w_file = file;
 	w->w_line = line;
 
 	/* Find the next open lock instance in the list and fill it. */
 	lle = *lock_list;
 	if (lle == NULL || lle->ll_count == LOCK_NCHILDREN) {
 		lle = witness_lock_list_get();
 		if (lle == NULL)
 			return;
 		lle->ll_next = *lock_list;
 		CTR3(KTR_WITNESS, "%s: pid %d added lle %p", __func__,
 		    td->td_proc->p_pid, lle);
 		*lock_list = lle;
 	}
 	instance = &lle->ll_children[lle->ll_count++];
 	instance->li_lock = lock;
 	instance->li_line = line;
 	instance->li_file = file;
 	instance->li_flags = 0;
 	if ((flags & LOP_EXCLUSIVE) != 0)
 		instance->li_flags |= LI_EXCLUSIVE;
 	if ((lock->lo_flags & LO_SLEEPABLE) != 0 && (flags & LOP_NOSLEEP) == 0)
 		instance->li_flags |= LI_SLEEPABLE;
 	CTR4(KTR_WITNESS, "%s: pid %d added %s as lle[%d]", __func__,
 	    td->td_proc->p_pid, lock->lo_name, lle->ll_count - 1);
 }
 
 void
 witness_upgrade(struct lock_object *lock, int flags, const char *file, int line)
 {
 	struct lock_instance *instance;
 	struct lock_class *class;
 
 	KASSERT(witness_cold == 0, ("%s: witness_cold", __func__));
 	if (lock->lo_witness == NULL || witness_watch == -1 || KERNEL_PANICKED())
 		return;
 	class = LOCK_CLASS(lock);
 	if (witness_watch) {
 		if ((lock->lo_flags & LO_UPGRADABLE) == 0)
 			kassert_panic(
 			    "upgrade of non-upgradable lock (%s) %s @ %s:%d",
 			    class->lc_name, lock->lo_name,
 			    fixup_filename(file), line);
 		if ((class->lc_flags & LC_SLEEPLOCK) == 0)
 			kassert_panic(
 			    "upgrade of non-sleep lock (%s) %s @ %s:%d",
 			    class->lc_name, lock->lo_name,
 			    fixup_filename(file), line);
 	}
 	instance = find_instance(curthread->td_sleeplocks, lock);
 	if (instance == NULL) {
 		kassert_panic("upgrade of unlocked lock (%s) %s @ %s:%d",
 		    class->lc_name, lock->lo_name,
 		    fixup_filename(file), line);
 		return;
 	}
 	if (witness_watch) {
 		if ((instance->li_flags & LI_EXCLUSIVE) != 0)
 			kassert_panic(
 			    "upgrade of exclusive lock (%s) %s @ %s:%d",
 			    class->lc_name, lock->lo_name,
 			    fixup_filename(file), line);
 		if ((instance->li_flags & LI_RECURSEMASK) != 0)
 			kassert_panic(
 			    "upgrade of recursed lock (%s) %s r=%d @ %s:%d",
 			    class->lc_name, lock->lo_name,
 			    instance->li_flags & LI_RECURSEMASK,
 			    fixup_filename(file), line);
 	}
 	instance->li_flags |= LI_EXCLUSIVE;
 }
 
 void
 witness_downgrade(struct lock_object *lock, int flags, const char *file,
     int line)
 {
 	struct lock_instance *instance;
 	struct lock_class *class;
 
 	KASSERT(witness_cold == 0, ("%s: witness_cold", __func__));
 	if (lock->lo_witness == NULL || witness_watch == -1 || KERNEL_PANICKED())
 		return;
 	class = LOCK_CLASS(lock);
 	if (witness_watch) {
 		if ((lock->lo_flags & LO_UPGRADABLE) == 0)
 			kassert_panic(
 			    "downgrade of non-upgradable lock (%s) %s @ %s:%d",
 			    class->lc_name, lock->lo_name,
 			    fixup_filename(file), line);
 		if ((class->lc_flags & LC_SLEEPLOCK) == 0)
 			kassert_panic(
 			    "downgrade of non-sleep lock (%s) %s @ %s:%d",
 			    class->lc_name, lock->lo_name,
 			    fixup_filename(file), line);
 	}
 	instance = find_instance(curthread->td_sleeplocks, lock);
 	if (instance == NULL) {
 		kassert_panic("downgrade of unlocked lock (%s) %s @ %s:%d",
 		    class->lc_name, lock->lo_name,
 		    fixup_filename(file), line);
 		return;
 	}
 	if (witness_watch) {
 		if ((instance->li_flags & LI_EXCLUSIVE) == 0)
 			kassert_panic(
 			    "downgrade of shared lock (%s) %s @ %s:%d",
 			    class->lc_name, lock->lo_name,
 			    fixup_filename(file), line);
 		if ((instance->li_flags & LI_RECURSEMASK) != 0)
 			kassert_panic(
 			    "downgrade of recursed lock (%s) %s r=%d @ %s:%d",
 			    class->lc_name, lock->lo_name,
 			    instance->li_flags & LI_RECURSEMASK,
 			    fixup_filename(file), line);
 	}
 	instance->li_flags &= ~LI_EXCLUSIVE;
 }
 
 void
 witness_unlock(struct lock_object *lock, int flags, const char *file, int line)
 {
 	struct lock_list_entry **lock_list, *lle;
 	struct lock_instance *instance;
 	struct lock_class *class;
 	struct thread *td;
 	register_t s;
 	int i, j;
 
 	if (witness_cold || lock->lo_witness == NULL || KERNEL_PANICKED())
 		return;
 	td = curthread;
 	class = LOCK_CLASS(lock);
 
 	/* Find lock instance associated with this lock. */
 	if (class->lc_flags & LC_SLEEPLOCK)
 		lock_list = &td->td_sleeplocks;
 	else
 		lock_list = PCPU_PTR(spinlocks);
 	lle = *lock_list;
 	for (; *lock_list != NULL; lock_list = &(*lock_list)->ll_next)
 		for (i = 0; i < (*lock_list)->ll_count; i++) {
 			instance = &(*lock_list)->ll_children[i];
 			if (instance->li_lock == lock)
 				goto found;
 		}
 
 	/*
 	 * When disabling WITNESS through witness_watch we could end up in
 	 * having registered locks in the td_sleeplocks queue.
 	 * We have to make sure we flush these queues, so just search for
 	 * eventual register locks and remove them.
 	 */
 	if (witness_watch > 0) {
 		kassert_panic("lock (%s) %s not locked @ %s:%d", class->lc_name,
 		    lock->lo_name, fixup_filename(file), line);
 		return;
 	} else {
 		return;
 	}
 found:
 
 	/* First, check for shared/exclusive mismatches. */
 	if ((instance->li_flags & LI_EXCLUSIVE) != 0 && witness_watch > 0 &&
 	    (flags & LOP_EXCLUSIVE) == 0) {
 		witness_output("shared unlock of (%s) %s @ %s:%d\n",
 		    class->lc_name, lock->lo_name, fixup_filename(file), line);
 		witness_output("while exclusively locked from %s:%d\n",
 		    fixup_filename(instance->li_file), instance->li_line);
 		kassert_panic("excl->ushare");
 	}
 	if ((instance->li_flags & LI_EXCLUSIVE) == 0 && witness_watch > 0 &&
 	    (flags & LOP_EXCLUSIVE) != 0) {
 		witness_output("exclusive unlock of (%s) %s @ %s:%d\n",
 		    class->lc_name, lock->lo_name, fixup_filename(file), line);
 		witness_output("while share locked from %s:%d\n",
 		    fixup_filename(instance->li_file),
 		    instance->li_line);
 		kassert_panic("share->uexcl");
 	}
 	/* If we are recursed, unrecurse. */
 	if ((instance->li_flags & LI_RECURSEMASK) > 0) {
 		CTR4(KTR_WITNESS, "%s: pid %d unrecursed on %s r=%d", __func__,
 		    td->td_proc->p_pid, instance->li_lock->lo_name,
 		    instance->li_flags);
 		instance->li_flags--;
 		return;
 	}
 	/* The lock is now being dropped, check for NORELEASE flag */
 	if ((instance->li_flags & LI_NORELEASE) != 0 && witness_watch > 0) {
 		witness_output("forbidden unlock of (%s) %s @ %s:%d\n",
 		    class->lc_name, lock->lo_name, fixup_filename(file), line);
 		kassert_panic("lock marked norelease");
 	}
 
 	/* Otherwise, remove this item from the list. */
 	s = intr_disable();
 	CTR4(KTR_WITNESS, "%s: pid %d removed %s from lle[%d]", __func__,
 	    td->td_proc->p_pid, instance->li_lock->lo_name,
 	    (*lock_list)->ll_count - 1);
 	for (j = i; j < (*lock_list)->ll_count - 1; j++)
 		(*lock_list)->ll_children[j] =
 		    (*lock_list)->ll_children[j + 1];
 	(*lock_list)->ll_count--;
 	intr_restore(s);
 
 	/*
 	 * In order to reduce contention on w_mtx, we want to keep always an
 	 * head object into lists so that frequent allocation from the 
 	 * free witness pool (and subsequent locking) is avoided.
 	 * In order to maintain the current code simple, when the head
 	 * object is totally unloaded it means also that we do not have
 	 * further objects in the list, so the list ownership needs to be
 	 * hand over to another object if the current head needs to be freed.
 	 */
 	if ((*lock_list)->ll_count == 0) {
 		if (*lock_list == lle) {
 			if (lle->ll_next == NULL)
 				return;
 		} else
 			lle = *lock_list;
 		*lock_list = lle->ll_next;
 		CTR3(KTR_WITNESS, "%s: pid %d removed lle %p", __func__,
 		    td->td_proc->p_pid, lle);
 		witness_lock_list_free(lle);
 	}
 }
 
 void
 witness_thread_exit(struct thread *td)
 {
 	struct lock_list_entry *lle;
 	int i, n;
 
 	lle = td->td_sleeplocks;
 	if (lle == NULL || KERNEL_PANICKED())
 		return;
 	if (lle->ll_count != 0) {
 		for (n = 0; lle != NULL; lle = lle->ll_next)
 			for (i = lle->ll_count - 1; i >= 0; i--) {
 				if (n == 0)
 					witness_output(
 		    "Thread %p exiting with the following locks held:\n", td);
 				n++;
 				witness_list_lock(&lle->ll_children[i],
 				    witness_output);
 				
 			}
 		kassert_panic(
 		    "Thread %p cannot exit while holding sleeplocks\n", td);
 	}
 	witness_lock_list_free(lle);
 }
 
 /*
  * Warn if any locks other than 'lock' are held.  Flags can be passed in to
  * exempt Giant and sleepable locks from the checks as well.  If any
  * non-exempt locks are held, then a supplied message is printed to the
  * output channel along with a list of the offending locks.  If indicated in the
  * flags then a failure results in a panic as well.
  */
 int
 witness_warn(int flags, struct lock_object *lock, const char *fmt, ...)
 {
 	struct lock_list_entry *lock_list, *lle;
 	struct lock_instance *lock1;
 	struct thread *td;
 	va_list ap;
 	int i, n;
 
 	if (witness_cold || witness_watch < 1 || KERNEL_PANICKED())
 		return (0);
 	n = 0;
 	td = curthread;
 	for (lle = td->td_sleeplocks; lle != NULL; lle = lle->ll_next)
 		for (i = lle->ll_count - 1; i >= 0; i--) {
 			lock1 = &lle->ll_children[i];
 			if (lock1->li_lock == lock)
 				continue;
 			if (flags & WARN_GIANTOK &&
 			    lock1->li_lock == &Giant.lock_object)
 				continue;
 			if (flags & WARN_SLEEPOK &&
 			    (lock1->li_flags & LI_SLEEPABLE) != 0)
 				continue;
 			if (n == 0) {
 				va_start(ap, fmt);
 				vprintf(fmt, ap);
 				va_end(ap);
 				printf(" with the following %slocks held:\n",
 				    (flags & WARN_SLEEPOK) != 0 ?
 				    "non-sleepable " : "");
 			}
 			n++;
 			witness_list_lock(lock1, printf);
 		}
 
 	/*
 	 * Pin the thread in order to avoid problems with thread migration.
 	 * Once that all verifies are passed about spinlocks ownership,
 	 * the thread is in a safe path and it can be unpinned.
 	 */
 	sched_pin();
 	lock_list = PCPU_GET(spinlocks);
 	if (lock_list != NULL && lock_list->ll_count != 0) {
 		sched_unpin();
 
 		/*
 		 * We should only have one spinlock and as long as
 		 * the flags cannot match for this locks class,
 		 * check if the first spinlock is the one curthread
 		 * should hold.
 		 */
 		lock1 = &lock_list->ll_children[lock_list->ll_count - 1];
 		if (lock_list->ll_count == 1 && lock_list->ll_next == NULL &&
 		    lock1->li_lock == lock && n == 0)
 			return (0);
 
 		va_start(ap, fmt);
 		vprintf(fmt, ap);
 		va_end(ap);
 		printf(" with the following %slocks held:\n",
 		    (flags & WARN_SLEEPOK) != 0 ?  "non-sleepable " : "");
 		n += witness_list_locks(&lock_list, printf);
 	} else
 		sched_unpin();
 	if (flags & WARN_PANIC && n)
 		kassert_panic("%s", __func__);
 	else
 		witness_debugger(n, __func__);
 	return (n);
 }
 
 const char *
 witness_file(struct lock_object *lock)
 {
 	struct witness *w;
 
 	if (witness_cold || witness_watch < 1 || lock->lo_witness == NULL)
 		return ("?");
 	w = lock->lo_witness;
 	return (w->w_file);
 }
 
 int
 witness_line(struct lock_object *lock)
 {
 	struct witness *w;
 
 	if (witness_cold || witness_watch < 1 || lock->lo_witness == NULL)
 		return (0);
 	w = lock->lo_witness;
 	return (w->w_line);
 }
 
 static struct witness *
 enroll(const char *description, struct lock_class *lock_class)
 {
 	struct witness *w;
 
 	MPASS(description != NULL);
 
 	if (witness_watch == -1 || KERNEL_PANICKED())
 		return (NULL);
 	if ((lock_class->lc_flags & LC_SPINLOCK)) {
 		if (witness_skipspin)
 			return (NULL);
 	} else if ((lock_class->lc_flags & LC_SLEEPLOCK) == 0) {
 		kassert_panic("lock class %s is not sleep or spin",
 		    lock_class->lc_name);
 		return (NULL);
 	}
 
 	mtx_lock_spin(&w_mtx);
 	w = witness_hash_get(description);
 	if (w)
 		goto found;
 	if ((w = witness_get()) == NULL)
 		return (NULL);
 	MPASS(strlen(description) < MAX_W_NAME);
 	strcpy(w->w_name, description);
 	w->w_class = lock_class;
 	w->w_refcount = 1;
 	STAILQ_INSERT_HEAD(&w_all, w, w_list);
 	if (lock_class->lc_flags & LC_SPINLOCK) {
 		STAILQ_INSERT_HEAD(&w_spin, w, w_typelist);
 		w_spin_cnt++;
 	} else if (lock_class->lc_flags & LC_SLEEPLOCK) {
 		STAILQ_INSERT_HEAD(&w_sleep, w, w_typelist);
 		w_sleep_cnt++;
 	}
 
 	/* Insert new witness into the hash */
 	witness_hash_put(w);
 	witness_increment_graph_generation();
 	mtx_unlock_spin(&w_mtx);
 	return (w);
 found:
 	w->w_refcount++;
 	if (w->w_refcount == 1)
 		w->w_class = lock_class;
 	mtx_unlock_spin(&w_mtx);
 	if (lock_class != w->w_class)
 		kassert_panic(
 		    "lock (%s) %s does not match earlier (%s) lock",
 		    description, lock_class->lc_name,
 		    w->w_class->lc_name);
 	return (w);
 }
 
 static void
 depart(struct witness *w)
 {
 
 	MPASS(w->w_refcount == 0);
 	if (w->w_class->lc_flags & LC_SLEEPLOCK) {
 		w_sleep_cnt--;
 	} else {
 		w_spin_cnt--;
 	}
 	/*
 	 * Set file to NULL as it may point into a loadable module.
 	 */
 	w->w_file = NULL;
 	w->w_line = 0;
 	witness_increment_graph_generation();
 }
 
 static void
 adopt(struct witness *parent, struct witness *child)
 {
 	int pi, ci, i, j;
 
 	if (witness_cold == 0)
 		mtx_assert(&w_mtx, MA_OWNED);
 
 	/* If the relationship is already known, there's no work to be done. */
 	if (isitmychild(parent, child))
 		return;
 
 	/* When the structure of the graph changes, bump up the generation. */
 	witness_increment_graph_generation();
 
 	/*
 	 * The hard part ... create the direct relationship, then propagate all
 	 * indirect relationships.
 	 */
 	pi = parent->w_index;
 	ci = child->w_index;
 	WITNESS_INDEX_ASSERT(pi);
 	WITNESS_INDEX_ASSERT(ci);
 	MPASS(pi != ci);
 	w_rmatrix[pi][ci] |= WITNESS_PARENT;
 	w_rmatrix[ci][pi] |= WITNESS_CHILD;
 
 	/*
 	 * If parent was not already an ancestor of child,
 	 * then we increment the descendant and ancestor counters.
 	 */
 	if ((w_rmatrix[pi][ci] & WITNESS_ANCESTOR) == 0) {
 		parent->w_num_descendants++;
 		child->w_num_ancestors++;
 	}
 
 	/* 
 	 * Find each ancestor of 'pi'. Note that 'pi' itself is counted as 
 	 * an ancestor of 'pi' during this loop.
 	 */
 	for (i = 1; i <= w_max_used_index; i++) {
 		if ((w_rmatrix[i][pi] & WITNESS_ANCESTOR_MASK) == 0 && 
 		    (i != pi))
 			continue;
 
 		/* Find each descendant of 'i' and mark it as a descendant. */
 		for (j = 1; j <= w_max_used_index; j++) {
 			/* 
 			 * Skip children that are already marked as
 			 * descendants of 'i'.
 			 */
 			if (w_rmatrix[i][j] & WITNESS_ANCESTOR_MASK)
 				continue;
 
 			/*
 			 * We are only interested in descendants of 'ci'. Note
 			 * that 'ci' itself is counted as a descendant of 'ci'.
 			 */
 			if ((w_rmatrix[ci][j] & WITNESS_ANCESTOR_MASK) == 0 && 
 			    (j != ci))
 				continue;
 			w_rmatrix[i][j] |= WITNESS_ANCESTOR;
 			w_rmatrix[j][i] |= WITNESS_DESCENDANT;
 			w_data[i].w_num_descendants++;
 			w_data[j].w_num_ancestors++;
 
 			/* 
 			 * Make sure we aren't marking a node as both an
 			 * ancestor and descendant. We should have caught 
 			 * this as a lock order reversal earlier.
 			 */
 			if ((w_rmatrix[i][j] & WITNESS_ANCESTOR_MASK) &&
 			    (w_rmatrix[i][j] & WITNESS_DESCENDANT_MASK)) {
 				printf("witness rmatrix paradox! [%d][%d]=%d "
 				    "both ancestor and descendant\n",
 				    i, j, w_rmatrix[i][j]); 
 				kdb_backtrace();
 				printf("Witness disabled.\n");
 				witness_watch = -1;
 			}
 			if ((w_rmatrix[j][i] & WITNESS_ANCESTOR_MASK) &&
 			    (w_rmatrix[j][i] & WITNESS_DESCENDANT_MASK)) {
 				printf("witness rmatrix paradox! [%d][%d]=%d "
 				    "both ancestor and descendant\n",
 				    j, i, w_rmatrix[j][i]); 
 				kdb_backtrace();
 				printf("Witness disabled.\n");
 				witness_watch = -1;
 			}
 		}
 	}
 }
 
 static void
 itismychild(struct witness *parent, struct witness *child)
 {
 	int unlocked;
 
 	MPASS(child != NULL && parent != NULL);
 	if (witness_cold == 0)
 		mtx_assert(&w_mtx, MA_OWNED);
 
 	if (!witness_lock_type_equal(parent, child)) {
 		if (witness_cold == 0) {
 			unlocked = 1;
 			mtx_unlock_spin(&w_mtx);
 		} else {
 			unlocked = 0;
 		}
 		kassert_panic(
 		    "%s: parent \"%s\" (%s) and child \"%s\" (%s) are not "
 		    "the same lock type", __func__, parent->w_name,
 		    parent->w_class->lc_name, child->w_name,
 		    child->w_class->lc_name);
 		if (unlocked)
 			mtx_lock_spin(&w_mtx);
 	}
 	adopt(parent, child);
 }
 
 /*
  * Generic code for the isitmy*() functions. The rmask parameter is the
  * expected relationship of w1 to w2.
  */
 static int
 _isitmyx(struct witness *w1, struct witness *w2, int rmask, const char *fname)
 {
 	unsigned char r1, r2;
 	int i1, i2;
 
 	i1 = w1->w_index;
 	i2 = w2->w_index;
 	WITNESS_INDEX_ASSERT(i1);
 	WITNESS_INDEX_ASSERT(i2);
 	r1 = w_rmatrix[i1][i2] & WITNESS_RELATED_MASK;
 	r2 = w_rmatrix[i2][i1] & WITNESS_RELATED_MASK;
 
 	/* The flags on one better be the inverse of the flags on the other */
 	if (!((WITNESS_ATOD(r1) == r2 && WITNESS_DTOA(r2) == r1) ||
 	    (WITNESS_DTOA(r1) == r2 && WITNESS_ATOD(r2) == r1))) {
 		/* Don't squawk if we're potentially racing with an update. */
 		if (!mtx_owned(&w_mtx))
 			return (0);
 		printf("%s: rmatrix mismatch between %s (index %d) and %s "
 		    "(index %d): w_rmatrix[%d][%d] == %hhx but "
 		    "w_rmatrix[%d][%d] == %hhx\n",
 		    fname, w1->w_name, i1, w2->w_name, i2, i1, i2, r1,
 		    i2, i1, r2);
 		kdb_backtrace();
 		printf("Witness disabled.\n");
 		witness_watch = -1;
 	}
 	return (r1 & rmask);
 }
 
 /*
  * Checks if @child is a direct child of @parent.
  */
 static int
 isitmychild(struct witness *parent, struct witness *child)
 {
 
 	return (_isitmyx(parent, child, WITNESS_PARENT, __func__));
 }
 
 /*
  * Checks if @descendant is a direct or inderect descendant of @ancestor.
  */
 static int
 isitmydescendant(struct witness *ancestor, struct witness *descendant)
 {
 
 	return (_isitmyx(ancestor, descendant, WITNESS_ANCESTOR_MASK,
 	    __func__));
 }
 
 static int
 blessed(struct witness *w1, struct witness *w2)
 {
 	int i;
 	struct witness_blessed *b;
 
 	for (i = 0; i < nitems(blessed_list); i++) {
 		b = &blessed_list[i];
 		if (strcmp(w1->w_name, b->b_lock1) == 0) {
 			if (strcmp(w2->w_name, b->b_lock2) == 0)
 				return (1);
 			continue;
 		}
 		if (strcmp(w1->w_name, b->b_lock2) == 0)
 			if (strcmp(w2->w_name, b->b_lock1) == 0)
 				return (1);
 	}
 	return (0);
 }
 
 static struct witness *
 witness_get(void)
 {
 	struct witness *w;
 	int index;
 
 	if (witness_cold == 0)
 		mtx_assert(&w_mtx, MA_OWNED);
 
 	if (witness_watch == -1) {
 		mtx_unlock_spin(&w_mtx);
 		return (NULL);
 	}
 	if (STAILQ_EMPTY(&w_free)) {
 		witness_watch = -1;
 		mtx_unlock_spin(&w_mtx);
 		printf("WITNESS: unable to allocate a new witness object\n");
 		return (NULL);
 	}
 	w = STAILQ_FIRST(&w_free);
 	STAILQ_REMOVE_HEAD(&w_free, w_list);
 	w_free_cnt--;
 	index = w->w_index;
 	MPASS(index > 0 && index == w_max_used_index+1 &&
 	    index < witness_count);
 	bzero(w, sizeof(*w));
 	w->w_index = index;
 	if (index > w_max_used_index)
 		w_max_used_index = index;
 	return (w);
 }
 
 static void
 witness_free(struct witness *w)
 {
 
 	STAILQ_INSERT_HEAD(&w_free, w, w_list);
 	w_free_cnt++;
 }
 
 static struct lock_list_entry *
 witness_lock_list_get(void)
 {
 	struct lock_list_entry *lle;
 
 	if (witness_watch == -1)
 		return (NULL);
 	mtx_lock_spin(&w_mtx);
 	lle = w_lock_list_free;
 	if (lle == NULL) {
 		witness_watch = -1;
 		mtx_unlock_spin(&w_mtx);
 		printf("%s: witness exhausted\n", __func__);
 		return (NULL);
 	}
 	w_lock_list_free = lle->ll_next;
 	mtx_unlock_spin(&w_mtx);
 	bzero(lle, sizeof(*lle));
 	return (lle);
 }
 		
 static void
 witness_lock_list_free(struct lock_list_entry *lle)
 {
 
 	mtx_lock_spin(&w_mtx);
 	lle->ll_next = w_lock_list_free;
 	w_lock_list_free = lle;
 	mtx_unlock_spin(&w_mtx);
 }
 
 static struct lock_instance *
 find_instance(struct lock_list_entry *list, const struct lock_object *lock)
 {
 	struct lock_list_entry *lle;
 	struct lock_instance *instance;
 	int i;
 
 	for (lle = list; lle != NULL; lle = lle->ll_next)
 		for (i = lle->ll_count - 1; i >= 0; i--) {
 			instance = &lle->ll_children[i];
 			if (instance->li_lock == lock)
 				return (instance);
 		}
 	return (NULL);
 }
 
 static void
 witness_list_lock(struct lock_instance *instance,
     int (*prnt)(const char *fmt, ...))
 {
 	struct lock_object *lock;
 
 	lock = instance->li_lock;
 	prnt("%s %s %s", (instance->li_flags & LI_EXCLUSIVE) != 0 ?
 	    "exclusive" : "shared", LOCK_CLASS(lock)->lc_name, lock->lo_name);
 	if (lock->lo_witness->w_name != lock->lo_name)
 		prnt(" (%s)", lock->lo_witness->w_name);
 	prnt(" r = %d (%p) locked @ %s:%d\n",
 	    instance->li_flags & LI_RECURSEMASK, lock,
 	    fixup_filename(instance->li_file), instance->li_line);
 }
 
 static int
 witness_output(const char *fmt, ...)
 {
 	va_list ap;
 	int ret;
 
 	va_start(ap, fmt);
 	ret = witness_voutput(fmt, ap);
 	va_end(ap);
 	return (ret);
 }
 
 static int
 witness_voutput(const char *fmt, va_list ap)
 {
 	int ret;
 
 	ret = 0;
 	switch (witness_channel) {
 	case WITNESS_CONSOLE:
 		ret = vprintf(fmt, ap);
 		break;
 	case WITNESS_LOG:
 		vlog(LOG_NOTICE, fmt, ap);
 		break;
 	case WITNESS_NONE:
 		break;
 	}
 	return (ret);
 }
 
 #ifdef DDB
 static int
 witness_thread_has_locks(struct thread *td)
 {
 
 	if (td->td_sleeplocks == NULL)
 		return (0);
 	return (td->td_sleeplocks->ll_count != 0);
 }
 
 static int
 witness_proc_has_locks(struct proc *p)
 {
 	struct thread *td;
 
 	FOREACH_THREAD_IN_PROC(p, td) {
 		if (witness_thread_has_locks(td))
 			return (1);
 	}
 	return (0);
 }
 #endif
 
 int
 witness_list_locks(struct lock_list_entry **lock_list,
     int (*prnt)(const char *fmt, ...))
 {
 	struct lock_list_entry *lle;
 	int i, nheld;
 
 	nheld = 0;
 	for (lle = *lock_list; lle != NULL; lle = lle->ll_next)
 		for (i = lle->ll_count - 1; i >= 0; i--) {
 			witness_list_lock(&lle->ll_children[i], prnt);
 			nheld++;
 		}
 	return (nheld);
 }
 
 /*
  * This is a bit risky at best.  We call this function when we have timed
  * out acquiring a spin lock, and we assume that the other CPU is stuck
  * with this lock held.  So, we go groveling around in the other CPU's
  * per-cpu data to try to find the lock instance for this spin lock to
  * see when it was last acquired.
  */
 void
 witness_display_spinlock(struct lock_object *lock, struct thread *owner,
     int (*prnt)(const char *fmt, ...))
 {
 	struct lock_instance *instance;
 	struct pcpu *pc;
 
 	if (owner->td_critnest == 0 || owner->td_oncpu == NOCPU)
 		return;
 	pc = pcpu_find(owner->td_oncpu);
 	instance = find_instance(pc->pc_spinlocks, lock);
 	if (instance != NULL)
 		witness_list_lock(instance, prnt);
 }
 
 void
 witness_save(struct lock_object *lock, const char **filep, int *linep)
 {
 	struct lock_list_entry *lock_list;
 	struct lock_instance *instance;
 	struct lock_class *class;
 
 	/*
 	 * This function is used independently in locking code to deal with
 	 * Giant, SCHEDULER_STOPPED() check can be removed here after Giant
 	 * is gone.
 	 */
 	if (SCHEDULER_STOPPED())
 		return;
 	KASSERT(witness_cold == 0, ("%s: witness_cold", __func__));
 	if (lock->lo_witness == NULL || witness_watch == -1 || KERNEL_PANICKED())
 		return;
 	class = LOCK_CLASS(lock);
 	if (class->lc_flags & LC_SLEEPLOCK)
 		lock_list = curthread->td_sleeplocks;
 	else {
 		if (witness_skipspin)
 			return;
 		lock_list = PCPU_GET(spinlocks);
 	}
 	instance = find_instance(lock_list, lock);
 	if (instance == NULL) {
 		kassert_panic("%s: lock (%s) %s not locked", __func__,
 		    class->lc_name, lock->lo_name);
 		return;
 	}
 	*filep = instance->li_file;
 	*linep = instance->li_line;
 }
 
 void
 witness_restore(struct lock_object *lock, const char *file, int line)
 {
 	struct lock_list_entry *lock_list;
 	struct lock_instance *instance;
 	struct lock_class *class;
 
 	/*
 	 * This function is used independently in locking code to deal with
 	 * Giant, SCHEDULER_STOPPED() check can be removed here after Giant
 	 * is gone.
 	 */
 	if (SCHEDULER_STOPPED())
 		return;
 	KASSERT(witness_cold == 0, ("%s: witness_cold", __func__));
 	if (lock->lo_witness == NULL || witness_watch == -1 || KERNEL_PANICKED())
 		return;
 	class = LOCK_CLASS(lock);
 	if (class->lc_flags & LC_SLEEPLOCK)
 		lock_list = curthread->td_sleeplocks;
 	else {
 		if (witness_skipspin)
 			return;
 		lock_list = PCPU_GET(spinlocks);
 	}
 	instance = find_instance(lock_list, lock);
 	if (instance == NULL)
 		kassert_panic("%s: lock (%s) %s not locked", __func__,
 		    class->lc_name, lock->lo_name);
 	lock->lo_witness->w_file = file;
 	lock->lo_witness->w_line = line;
 	if (instance == NULL)
 		return;
 	instance->li_file = file;
 	instance->li_line = line;
 }
 
 void
 witness_assert(const struct lock_object *lock, int flags, const char *file,
     int line)
 {
 #ifdef INVARIANT_SUPPORT
 	struct lock_instance *instance;
 	struct lock_class *class;
 
 	if (lock->lo_witness == NULL || witness_watch < 1 || KERNEL_PANICKED())
 		return;
 	class = LOCK_CLASS(lock);
 	if ((class->lc_flags & LC_SLEEPLOCK) != 0)
 		instance = find_instance(curthread->td_sleeplocks, lock);
 	else if ((class->lc_flags & LC_SPINLOCK) != 0)
 		instance = find_instance(PCPU_GET(spinlocks), lock);
 	else {
 		kassert_panic("Lock (%s) %s is not sleep or spin!",
 		    class->lc_name, lock->lo_name);
 		return;
 	}
 	switch (flags) {
 	case LA_UNLOCKED:
 		if (instance != NULL)
 			kassert_panic("Lock (%s) %s locked @ %s:%d.",
 			    class->lc_name, lock->lo_name,
 			    fixup_filename(file), line);
 		break;
 	case LA_LOCKED:
 	case LA_LOCKED | LA_RECURSED:
 	case LA_LOCKED | LA_NOTRECURSED:
 	case LA_SLOCKED:
 	case LA_SLOCKED | LA_RECURSED:
 	case LA_SLOCKED | LA_NOTRECURSED:
 	case LA_XLOCKED:
 	case LA_XLOCKED | LA_RECURSED:
 	case LA_XLOCKED | LA_NOTRECURSED:
 		if (instance == NULL) {
 			kassert_panic("Lock (%s) %s not locked @ %s:%d.",
 			    class->lc_name, lock->lo_name,
 			    fixup_filename(file), line);
 			break;
 		}
 		if ((flags & LA_XLOCKED) != 0 &&
 		    (instance->li_flags & LI_EXCLUSIVE) == 0)
 			kassert_panic(
 			    "Lock (%s) %s not exclusively locked @ %s:%d.",
 			    class->lc_name, lock->lo_name,
 			    fixup_filename(file), line);
 		if ((flags & LA_SLOCKED) != 0 &&
 		    (instance->li_flags & LI_EXCLUSIVE) != 0)
 			kassert_panic(
 			    "Lock (%s) %s exclusively locked @ %s:%d.",
 			    class->lc_name, lock->lo_name,
 			    fixup_filename(file), line);
 		if ((flags & LA_RECURSED) != 0 &&
 		    (instance->li_flags & LI_RECURSEMASK) == 0)
 			kassert_panic("Lock (%s) %s not recursed @ %s:%d.",
 			    class->lc_name, lock->lo_name,
 			    fixup_filename(file), line);
 		if ((flags & LA_NOTRECURSED) != 0 &&
 		    (instance->li_flags & LI_RECURSEMASK) != 0)
 			kassert_panic("Lock (%s) %s recursed @ %s:%d.",
 			    class->lc_name, lock->lo_name,
 			    fixup_filename(file), line);
 		break;
 	default:
 		kassert_panic("Invalid lock assertion at %s:%d.",
 		    fixup_filename(file), line);
 	}
 #endif	/* INVARIANT_SUPPORT */
 }
 
 static void
 witness_setflag(struct lock_object *lock, int flag, int set)
 {
 	struct lock_list_entry *lock_list;
 	struct lock_instance *instance;
 	struct lock_class *class;
 
 	if (lock->lo_witness == NULL || witness_watch == -1 || KERNEL_PANICKED())
 		return;
 	class = LOCK_CLASS(lock);
 	if (class->lc_flags & LC_SLEEPLOCK)
 		lock_list = curthread->td_sleeplocks;
 	else {
 		if (witness_skipspin)
 			return;
 		lock_list = PCPU_GET(spinlocks);
 	}
 	instance = find_instance(lock_list, lock);
 	if (instance == NULL) {
 		kassert_panic("%s: lock (%s) %s not locked", __func__,
 		    class->lc_name, lock->lo_name);
 		return;
 	}
 
 	if (set)
 		instance->li_flags |= flag;
 	else
 		instance->li_flags &= ~flag;
 }
 
 void
 witness_norelease(struct lock_object *lock)
 {
 
 	witness_setflag(lock, LI_NORELEASE, 1);
 }
 
 void
 witness_releaseok(struct lock_object *lock)
 {
 
 	witness_setflag(lock, LI_NORELEASE, 0);
 }
 
 #ifdef DDB
 static void
 witness_ddb_list(struct thread *td)
 {
 
 	KASSERT(witness_cold == 0, ("%s: witness_cold", __func__));
 	KASSERT(kdb_active, ("%s: not in the debugger", __func__));
 
 	if (witness_watch < 1)
 		return;
 
 	witness_list_locks(&td->td_sleeplocks, db_printf);
 
 	/*
 	 * We only handle spinlocks if td == curthread.  This is somewhat broken
 	 * if td is currently executing on some other CPU and holds spin locks
 	 * as we won't display those locks.  If we had a MI way of getting
 	 * the per-cpu data for a given cpu then we could use
 	 * td->td_oncpu to get the list of spinlocks for this thread
 	 * and "fix" this.
 	 *
 	 * That still wouldn't really fix this unless we locked the scheduler
 	 * lock or stopped the other CPU to make sure it wasn't changing the
 	 * list out from under us.  It is probably best to just not try to
 	 * handle threads on other CPU's for now.
 	 */
 	if (td == curthread && PCPU_GET(spinlocks) != NULL)
 		witness_list_locks(PCPU_PTR(spinlocks), db_printf);
 }
 
 DB_SHOW_COMMAND(locks, db_witness_list)
 {
 	struct thread *td;
 
 	if (have_addr)
 		td = db_lookup_thread(addr, true);
 	else
 		td = kdb_thread;
 	witness_ddb_list(td);
 }
 
 DB_SHOW_ALL_COMMAND(locks, db_witness_list_all)
 {
 	struct thread *td;
 	struct proc *p;
 
 	/*
 	 * It would be nice to list only threads and processes that actually
 	 * held sleep locks, but that information is currently not exported
 	 * by WITNESS.
 	 */
 	FOREACH_PROC_IN_SYSTEM(p) {
 		if (!witness_proc_has_locks(p))
 			continue;
 		FOREACH_THREAD_IN_PROC(p, td) {
 			if (!witness_thread_has_locks(td))
 				continue;
 			db_printf("Process %d (%s) thread %p (%d)\n", p->p_pid,
 			    p->p_comm, td, td->td_tid);
 			witness_ddb_list(td);
 			if (db_pager_quit)
 				return;
 		}
 	}
 }
 DB_SHOW_ALIAS(alllocks, db_witness_list_all)
 
 DB_SHOW_COMMAND(witness, db_witness_display)
 {
 
 	witness_ddb_display(db_printf);
 }
 #endif
 
 static void
 sbuf_print_witness_badstacks(struct sbuf *sb, size_t *oldidx)
 {
 	struct witness_lock_order_data *data1, *data2, *tmp_data1, *tmp_data2;
 	struct witness *tmp_w1, *tmp_w2, *w1, *w2;
 	int generation, i, j;
 
 	tmp_data1 = NULL;
 	tmp_data2 = NULL;
 	tmp_w1 = NULL;
 	tmp_w2 = NULL;
 
 	/* Allocate and init temporary storage space. */
 	tmp_w1 = malloc(sizeof(struct witness), M_TEMP, M_WAITOK | M_ZERO);
 	tmp_w2 = malloc(sizeof(struct witness), M_TEMP, M_WAITOK | M_ZERO);
 	tmp_data1 = malloc(sizeof(struct witness_lock_order_data), M_TEMP, 
 	    M_WAITOK | M_ZERO);
 	tmp_data2 = malloc(sizeof(struct witness_lock_order_data), M_TEMP, 
 	    M_WAITOK | M_ZERO);
 	stack_zero(&tmp_data1->wlod_stack);
 	stack_zero(&tmp_data2->wlod_stack);
 
 restart:
 	mtx_lock_spin(&w_mtx);
 	generation = w_generation;
 	mtx_unlock_spin(&w_mtx);
 	sbuf_printf(sb, "Number of known direct relationships is %d\n",
 	    w_lohash.wloh_count);
 	for (i = 1; i < w_max_used_index; i++) {
 		mtx_lock_spin(&w_mtx);
 		if (generation != w_generation) {
 			mtx_unlock_spin(&w_mtx);
 
 			/* The graph has changed, try again. */
 			*oldidx = 0;
 			sbuf_clear(sb);
 			goto restart;
 		}
 
 		w1 = &w_data[i];
 		if (w1->w_reversed == 0) {
 			mtx_unlock_spin(&w_mtx);
 			continue;
 		}
 
 		/* Copy w1 locally so we can release the spin lock. */
 		*tmp_w1 = *w1;
 		mtx_unlock_spin(&w_mtx);
 
 		if (tmp_w1->w_reversed == 0)
 			continue;
 		for (j = 1; j < w_max_used_index; j++) {
 			if ((w_rmatrix[i][j] & WITNESS_REVERSAL) == 0 || i > j)
 				continue;
 
 			mtx_lock_spin(&w_mtx);
 			if (generation != w_generation) {
 				mtx_unlock_spin(&w_mtx);
 
 				/* The graph has changed, try again. */
 				*oldidx = 0;
 				sbuf_clear(sb);
 				goto restart;
 			}
 
 			w2 = &w_data[j];
 			data1 = witness_lock_order_get(w1, w2);
 			data2 = witness_lock_order_get(w2, w1);
 
 			/*
 			 * Copy information locally so we can release the
 			 * spin lock.
 			 */
 			*tmp_w2 = *w2;
 
 			if (data1) {
 				stack_zero(&tmp_data1->wlod_stack);
 				stack_copy(&data1->wlod_stack,
 				    &tmp_data1->wlod_stack);
 			}
 			if (data2 && data2 != data1) {
 				stack_zero(&tmp_data2->wlod_stack);
 				stack_copy(&data2->wlod_stack,
 				    &tmp_data2->wlod_stack);
 			}
 			mtx_unlock_spin(&w_mtx);
 
 			if (blessed(tmp_w1, tmp_w2))
 				continue;
 
 			sbuf_printf(sb,
 	    "\nLock order reversal between \"%s\"(%s) and \"%s\"(%s)!\n",
 			    tmp_w1->w_name, tmp_w1->w_class->lc_name, 
 			    tmp_w2->w_name, tmp_w2->w_class->lc_name);
 			if (data1) {
 				sbuf_printf(sb,
 			"Lock order \"%s\"(%s) -> \"%s\"(%s) first seen at:\n",
 				    tmp_w1->w_name, tmp_w1->w_class->lc_name, 
 				    tmp_w2->w_name, tmp_w2->w_class->lc_name);
 				stack_sbuf_print(sb, &tmp_data1->wlod_stack);
 				sbuf_printf(sb, "\n");
 			}
 			if (data2 && data2 != data1) {
 				sbuf_printf(sb,
 			"Lock order \"%s\"(%s) -> \"%s\"(%s) first seen at:\n",
 				    tmp_w2->w_name, tmp_w2->w_class->lc_name, 
 				    tmp_w1->w_name, tmp_w1->w_class->lc_name);
 				stack_sbuf_print(sb, &tmp_data2->wlod_stack);
 				sbuf_printf(sb, "\n");
 			}
 		}
 	}
 	mtx_lock_spin(&w_mtx);
 	if (generation != w_generation) {
 		mtx_unlock_spin(&w_mtx);
 
 		/*
 		 * The graph changed while we were printing stack data,
 		 * try again.
 		 */
 		*oldidx = 0;
 		sbuf_clear(sb);
 		goto restart;
 	}
 	mtx_unlock_spin(&w_mtx);
 
 	/* Free temporary storage space. */
 	free(tmp_data1, M_TEMP);
 	free(tmp_data2, M_TEMP);
 	free(tmp_w1, M_TEMP);
 	free(tmp_w2, M_TEMP);
 }
 
 static int
 sysctl_debug_witness_badstacks(SYSCTL_HANDLER_ARGS)
 {
 	struct sbuf *sb;
 	int error;
 
 	if (witness_watch < 1) {
 		error = SYSCTL_OUT(req, w_notrunning, sizeof(w_notrunning));
 		return (error);
 	}
 	if (witness_cold) {
 		error = SYSCTL_OUT(req, w_stillcold, sizeof(w_stillcold));
 		return (error);
 	}
 	error = 0;
 	sb = sbuf_new(NULL, NULL, badstack_sbuf_size, SBUF_AUTOEXTEND);
 	if (sb == NULL)
 		return (ENOMEM);
 
 	sbuf_print_witness_badstacks(sb, &req->oldidx);
 
 	sbuf_finish(sb);
 	error = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1);
 	sbuf_delete(sb);
 
 	return (error);
 }
 
 #ifdef DDB
 static int
 sbuf_db_printf_drain(void *arg __unused, const char *data, int len)
 {
 
 	return (db_printf("%.*s", len, data));
 }
 
 DB_SHOW_COMMAND(badstacks, db_witness_badstacks)
 {
 	struct sbuf sb;
 	char buffer[128];
 	size_t dummy;
 
 	sbuf_new(&sb, buffer, sizeof(buffer), SBUF_FIXEDLEN);
 	sbuf_set_drain(&sb, sbuf_db_printf_drain, NULL);
 	sbuf_print_witness_badstacks(&sb, &dummy);
 	sbuf_finish(&sb);
 }
 #endif
 
 static int
 sysctl_debug_witness_channel(SYSCTL_HANDLER_ARGS)
 {
 	static const struct {
 		enum witness_channel channel;
 		const char *name;
 	} channels[] = {
 		{ WITNESS_CONSOLE, "console" },
 		{ WITNESS_LOG, "log" },
 		{ WITNESS_NONE, "none" },
 	};
 	char buf[16];
 	u_int i;
 	int error;
 
 	buf[0] = '\0';
 	for (i = 0; i < nitems(channels); i++)
 		if (witness_channel == channels[i].channel) {
 			snprintf(buf, sizeof(buf), "%s", channels[i].name);
 			break;
 		}
 
 	error = sysctl_handle_string(oidp, buf, sizeof(buf), req);
 	if (error != 0 || req->newptr == NULL)
 		return (error);
 
 	error = EINVAL;
 	for (i = 0; i < nitems(channels); i++)
 		if (strcmp(channels[i].name, buf) == 0) {
 			witness_channel = channels[i].channel;
 			error = 0;
 			break;
 		}
 	return (error);
 }
 
 static int
 sysctl_debug_witness_fullgraph(SYSCTL_HANDLER_ARGS)
 {
 	struct witness *w;
 	struct sbuf *sb;
 	int error;
 
 #ifdef __i386__
 	error = SYSCTL_OUT(req, w_notallowed, sizeof(w_notallowed));
 	return (error);
 #endif
 
 	if (witness_watch < 1) {
 		error = SYSCTL_OUT(req, w_notrunning, sizeof(w_notrunning));
 		return (error);
 	}
 	if (witness_cold) {
 		error = SYSCTL_OUT(req, w_stillcold, sizeof(w_stillcold));
 		return (error);
 	}
 	error = 0;
 
 	error = sysctl_wire_old_buffer(req, 0);
 	if (error != 0)
 		return (error);
 	sb = sbuf_new_for_sysctl(NULL, NULL, FULLGRAPH_SBUF_SIZE, req);
 	if (sb == NULL)
 		return (ENOMEM);
 	sbuf_printf(sb, "\n");
 
 	mtx_lock_spin(&w_mtx);
 	STAILQ_FOREACH(w, &w_all, w_list)
 		w->w_displayed = 0;
 	STAILQ_FOREACH(w, &w_all, w_list)
 		witness_add_fullgraph(sb, w);
 	mtx_unlock_spin(&w_mtx);
 
 	/*
 	 * Close the sbuf and return to userland.
 	 */
 	error = sbuf_finish(sb);
 	sbuf_delete(sb);
 
 	return (error);
 }
 
 static int
 sysctl_debug_witness_watch(SYSCTL_HANDLER_ARGS)
 {
 	int error, value;
 
 	value = witness_watch;
 	error = sysctl_handle_int(oidp, &value, 0, req);
 	if (error != 0 || req->newptr == NULL)
 		return (error);
 	if (value > 1 || value < -1 ||
 	    (witness_watch == -1 && value != witness_watch))
 		return (EINVAL);
 	witness_watch = value;
 	return (0);
 }
 
 static void
 witness_add_fullgraph(struct sbuf *sb, struct witness *w)
 {
 	int i;
 
 	if (w->w_displayed != 0 || (w->w_file == NULL && w->w_line == 0))
 		return;
 	w->w_displayed = 1;
 
 	WITNESS_INDEX_ASSERT(w->w_index);
 	for (i = 1; i <= w_max_used_index; i++) {
 		if (w_rmatrix[w->w_index][i] & WITNESS_PARENT) {
 			sbuf_printf(sb, "\"%s\",\"%s\"\n", w->w_name,
 			    w_data[i].w_name);
 			witness_add_fullgraph(sb, &w_data[i]);
 		}
 	}
 }
 
 /*
  * A simple hash function. Takes a key pointer and a key size. If size == 0,
  * interprets the key as a string and reads until the null
  * terminator. Otherwise, reads the first size bytes. Returns an unsigned 32-bit
  * hash value computed from the key.
  */
 static uint32_t
 witness_hash_djb2(const uint8_t *key, uint32_t size)
 {
 	unsigned int hash = 5381;
 	int i;
 
 	/* hash = hash * 33 + key[i] */
 	if (size)
 		for (i = 0; i < size; i++)
 			hash = ((hash << 5) + hash) + (unsigned int)key[i];
 	else
 		for (i = 0; key[i] != 0; i++)
 			hash = ((hash << 5) + hash) + (unsigned int)key[i];
 
 	return (hash);
 }
 
 /*
  * Initializes the two witness hash tables. Called exactly once from
  * witness_initialize().
  */
 static void
 witness_init_hash_tables(void)
 {
 	int i;
 
 	MPASS(witness_cold);
 
 	/* Initialize the hash tables. */
 	for (i = 0; i < WITNESS_HASH_SIZE; i++)
 		w_hash.wh_array[i] = NULL;
 
 	w_hash.wh_size = WITNESS_HASH_SIZE;
 	w_hash.wh_count = 0;
 
 	/* Initialize the lock order data hash. */
 	w_lofree = NULL;
 	for (i = 0; i < WITNESS_LO_DATA_COUNT; i++) {
 		memset(&w_lodata[i], 0, sizeof(w_lodata[i]));
 		w_lodata[i].wlod_next = w_lofree;
 		w_lofree = &w_lodata[i];
 	}
 	w_lohash.wloh_size = WITNESS_LO_HASH_SIZE;
 	w_lohash.wloh_count = 0;
 	for (i = 0; i < WITNESS_LO_HASH_SIZE; i++)
 		w_lohash.wloh_array[i] = NULL;
 }
 
 static struct witness *
 witness_hash_get(const char *key)
 {
 	struct witness *w;
 	uint32_t hash;
 
 	MPASS(key != NULL);
 	if (witness_cold == 0)
 		mtx_assert(&w_mtx, MA_OWNED);
 	hash = witness_hash_djb2(key, 0) % w_hash.wh_size;
 	w = w_hash.wh_array[hash];
 	while (w != NULL) {
 		if (strcmp(w->w_name, key) == 0)
 			goto out;
 		w = w->w_hash_next;
 	}
 
 out:
 	return (w);
 }
 
 static void
 witness_hash_put(struct witness *w)
 {
 	uint32_t hash;
 
 	MPASS(w != NULL);
 	MPASS(w->w_name != NULL);
 	if (witness_cold == 0)
 		mtx_assert(&w_mtx, MA_OWNED);
 	KASSERT(witness_hash_get(w->w_name) == NULL,
 	    ("%s: trying to add a hash entry that already exists!", __func__));
 	KASSERT(w->w_hash_next == NULL,
 	    ("%s: w->w_hash_next != NULL", __func__));
 
 	hash = witness_hash_djb2(w->w_name, 0) % w_hash.wh_size;
 	w->w_hash_next = w_hash.wh_array[hash];
 	w_hash.wh_array[hash] = w;
 	w_hash.wh_count++;
 }
 
 static struct witness_lock_order_data *
 witness_lock_order_get(struct witness *parent, struct witness *child)
 {
 	struct witness_lock_order_data *data = NULL;
 	struct witness_lock_order_key key;
 	unsigned int hash;
 
 	MPASS(parent != NULL && child != NULL);
 	key.from = parent->w_index;
 	key.to = child->w_index;
 	WITNESS_INDEX_ASSERT(key.from);
 	WITNESS_INDEX_ASSERT(key.to);
 	if ((w_rmatrix[parent->w_index][child->w_index]
 	    & WITNESS_LOCK_ORDER_KNOWN) == 0)
 		goto out;
 
 	hash = witness_hash_djb2((const char*)&key,
 	    sizeof(key)) % w_lohash.wloh_size;
 	data = w_lohash.wloh_array[hash];
 	while (data != NULL) {
 		if (witness_lock_order_key_equal(&data->wlod_key, &key))
 			break;
 		data = data->wlod_next;
 	}
 
 out:
 	return (data);
 }
 
 /*
  * Verify that parent and child have a known relationship, are not the same,
  * and child is actually a child of parent.  This is done without w_mtx
  * to avoid contention in the common case.
  */
 static int
 witness_lock_order_check(struct witness *parent, struct witness *child)
 {
 
 	if (parent != child &&
 	    w_rmatrix[parent->w_index][child->w_index]
 	    & WITNESS_LOCK_ORDER_KNOWN &&
 	    isitmychild(parent, child))
 		return (1);
 
 	return (0);
 }
 
 static int
 witness_lock_order_add(struct witness *parent, struct witness *child)
 {
 	struct witness_lock_order_data *data = NULL;
 	struct witness_lock_order_key key;
 	unsigned int hash;
 
 	MPASS(parent != NULL && child != NULL);
 	key.from = parent->w_index;
 	key.to = child->w_index;
 	WITNESS_INDEX_ASSERT(key.from);
 	WITNESS_INDEX_ASSERT(key.to);
 	if (w_rmatrix[parent->w_index][child->w_index]
 	    & WITNESS_LOCK_ORDER_KNOWN)
 		return (1);
 
 	hash = witness_hash_djb2((const char*)&key,
 	    sizeof(key)) % w_lohash.wloh_size;
 	w_rmatrix[parent->w_index][child->w_index] |= WITNESS_LOCK_ORDER_KNOWN;
 	data = w_lofree;
 	if (data == NULL)
 		return (0);
 	w_lofree = data->wlod_next;
 	data->wlod_next = w_lohash.wloh_array[hash];
 	data->wlod_key = key;
 	w_lohash.wloh_array[hash] = data;
 	w_lohash.wloh_count++;
-	stack_zero(&data->wlod_stack);
 	stack_save(&data->wlod_stack);
 	return (1);
 }
 
 /* Call this whenever the structure of the witness graph changes. */
 static void
 witness_increment_graph_generation(void)
 {
 
 	if (witness_cold == 0)
 		mtx_assert(&w_mtx, MA_OWNED);
 	w_generation++;
 }
 
 static int
 witness_output_drain(void *arg __unused, const char *data, int len)
 {
 
 	witness_output("%.*s", len, data);
 	return (len);
 }
 
 static void
 witness_debugger(int cond, const char *msg)
 {
 	char buf[32];
 	struct sbuf sb;
 	struct stack st;
 
 	if (!cond)
 		return;
 
 	if (witness_trace) {
 		sbuf_new(&sb, buf, sizeof(buf), SBUF_FIXEDLEN);
 		sbuf_set_drain(&sb, witness_output_drain, NULL);
 
-		stack_zero(&st);
 		stack_save(&st);
 		witness_output("stack backtrace:\n");
 		stack_sbuf_print_ddb(&sb, &st);
 
 		sbuf_finish(&sb);
 	}
 
 	witness_enter_debugger(msg);
 }
 
 static void
 witness_enter_debugger(const char *msg)
 {
 #ifdef KDB
 	if (witness_kdb)
 		kdb_enter(KDB_WHY_WITNESS, msg);
 #endif
 }