diff --git a/sys/kern/kern_physio.c b/sys/kern/kern_physio.c
index 3185e77aba46..2d384b6b9a60 100644
--- a/sys/kern/kern_physio.c
+++ b/sys/kern/kern_physio.c
@@ -1,213 +1,213 @@
 /*-
  * SPDX-License-Identifier: BSD-4-Clause
  *
  * Copyright (c) 1994 John S. Dyson
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice immediately at the beginning of the file, without modification,
  *    this list of conditions, and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Absolutely no warranty of function or purpose is made by the author
  *    John S. Dyson.
  * 4. Modifications may be freely made to this file if the above conditions
  *    are met.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bio.h>
 #include <sys/buf.h>
 #include <sys/conf.h>
 #include <sys/malloc.h>
 #include <sys/proc.h>
 #include <sys/racct.h>
 #include <sys/rwlock.h>
 #include <sys/uio.h>
 #include <geom/geom.h>
 
 #include <vm/vm.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
 #include <vm/vm_pager.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_map.h>
 
 int
 physio(struct cdev *dev, struct uio *uio, int ioflag)
 {
 	struct cdevsw *csw;
 	struct buf *pbuf;
 	struct bio *bp;
 	struct vm_page **pages;
 	char *base, *sa;
 	u_int iolen, poff;
 	int error, i, npages, maxpages;
 	vm_prot_t prot;
 
 	csw = dev->si_devsw;
 	npages = 0;
 	sa = NULL;
 	/* check if character device is being destroyed */
 	if (csw == NULL)
 		return (ENXIO);
 
 	/* XXX: sanity check */
-	if(dev->si_iosize_max < PAGE_SIZE) {
+	if (dev->si_iosize_max < PAGE_SIZE) {
 		printf("WARNING: %s si_iosize_max=%d, using DFLTPHYS.\n",
 		    devtoname(dev), dev->si_iosize_max);
 		dev->si_iosize_max = DFLTPHYS;
 	}
 
 	/*
 	 * If the driver does not want I/O to be split, that means that we
 	 * need to reject any requests that will not fit into one buffer.
 	 */
 	if (dev->si_flags & SI_NOSPLIT &&
 	    (uio->uio_resid > dev->si_iosize_max || uio->uio_resid > maxphys ||
 	    uio->uio_iovcnt > 1)) {
 		/*
 		 * Tell the user why his I/O was rejected.
 		 */
 		if (uio->uio_resid > dev->si_iosize_max)
 			uprintf("%s: request size=%zd > si_iosize_max=%d; "
 			    "cannot split request\n", devtoname(dev),
 			    uio->uio_resid, dev->si_iosize_max);
 		if (uio->uio_resid > maxphys)
 			uprintf("%s: request size=%zd > maxphys=%lu; "
 			    "cannot split request\n", devtoname(dev),
 			    uio->uio_resid, maxphys);
 		if (uio->uio_iovcnt > 1)
 			uprintf("%s: request vectors=%d > 1; "
 			    "cannot split request\n", devtoname(dev),
 			    uio->uio_iovcnt);
 		return (EFBIG);
 	}
 
 	/*
 	 * Keep the process UPAGES from being swapped.  Processes swapped
 	 * out while holding pbufs, used by swapper, may lead to deadlock.
 	 */
 	PHOLD(curproc);
 
 	bp = g_alloc_bio();
 	if (uio->uio_segflg != UIO_USERSPACE) {
 		pbuf = NULL;
 		pages = NULL;
 	} else if ((dev->si_flags & SI_UNMAPPED) && unmapped_buf_allowed) {
 		pbuf = NULL;
 		maxpages = btoc(MIN(uio->uio_resid, maxphys)) + 1;
 		pages = malloc(sizeof(*pages) * maxpages, M_DEVBUF, M_WAITOK);
 	} else {
 		pbuf = uma_zalloc(pbuf_zone, M_WAITOK);
 		MPASS((pbuf->b_flags & B_MAXPHYS) != 0);
 		sa = pbuf->b_data;
 		maxpages = PBUF_PAGES;
 		pages = pbuf->b_pages;
 	}
 	prot = VM_PROT_READ;
 	if (uio->uio_rw == UIO_READ)
 		prot |= VM_PROT_WRITE;	/* Less backwards than it looks */
 	error = 0;
 	for (i = 0; i < uio->uio_iovcnt; i++) {
 #ifdef RACCT
 		if (racct_enable) {
 			PROC_LOCK(curproc);
 			if (uio->uio_rw == UIO_READ) {
 				racct_add_force(curproc, RACCT_READBPS,
 				    uio->uio_iov[i].iov_len);
 				racct_add_force(curproc, RACCT_READIOPS, 1);
 			} else {
 				racct_add_force(curproc, RACCT_WRITEBPS,
 				    uio->uio_iov[i].iov_len);
 				racct_add_force(curproc, RACCT_WRITEIOPS, 1);
 			}
 			PROC_UNLOCK(curproc);
 		}
 #endif /* RACCT */
 
 		while (uio->uio_iov[i].iov_len) {
 			g_reset_bio(bp);
 			if (uio->uio_rw == UIO_READ) {
 				bp->bio_cmd = BIO_READ;
 				curthread->td_ru.ru_inblock++;
 			} else {
 				bp->bio_cmd = BIO_WRITE;
 				curthread->td_ru.ru_oublock++;
 			}
 			bp->bio_offset = uio->uio_offset;
 			base = uio->uio_iov[i].iov_base;
 			bp->bio_length = uio->uio_iov[i].iov_len;
 			if (bp->bio_length > dev->si_iosize_max)
 				bp->bio_length = dev->si_iosize_max;
 			if (bp->bio_length > maxphys)
 				bp->bio_length = maxphys;
 			bp->bio_bcount = bp->bio_length;
 			bp->bio_dev = dev;
 
 			if (pages) {
 				if ((npages = vm_fault_quick_hold_pages(
 				    &curproc->p_vmspace->vm_map,
 				    (vm_offset_t)base, bp->bio_length,
 				    prot, pages, maxpages)) < 0) {
 					error = EFAULT;
 					goto doerror;
 				}
 				poff = (vm_offset_t)base & PAGE_MASK;
 				if (pbuf && sa) {
 					pmap_qenter((vm_offset_t)sa,
 					    pages, npages);
 					bp->bio_data = sa + poff;
 				} else {
 					bp->bio_ma = pages;
 					bp->bio_ma_n = npages;
 					bp->bio_ma_offset = poff;
 					bp->bio_data = unmapped_buf;
 					bp->bio_flags |= BIO_UNMAPPED;
 				}
 			} else
 				bp->bio_data = base;
 
 			csw->d_strategy(bp);
 			if (uio->uio_rw == UIO_READ)
 				biowait(bp, "physrd");
 			else
 				biowait(bp, "physwr");
 
 			if (pages) {
 				if (pbuf)
 					pmap_qremove((vm_offset_t)sa, npages);
 				vm_page_unhold_pages(pages, npages);
 			}
 
 			iolen = bp->bio_length - bp->bio_resid;
 			if (iolen == 0 && !(bp->bio_flags & BIO_ERROR))
 				goto doerror;	/* EOF */
 			uio->uio_iov[i].iov_len -= iolen;
 			uio->uio_iov[i].iov_base =
 			    (char *)uio->uio_iov[i].iov_base + iolen;
 			uio->uio_resid -= iolen;
 			uio->uio_offset += iolen;
 			if (bp->bio_flags & BIO_ERROR) {
 				error = bp->bio_error;
 				goto doerror;
 			}
 		}
 	}
 doerror:
 	if (pbuf)
 		uma_zfree(pbuf_zone, pbuf);
 	else if (pages)
 		free(pages, M_DEVBUF);
 	g_destroy_bio(bp);
 	PRELE(curproc);
 	return (error);
 }
diff --git a/sys/kern/kern_sysctl.c b/sys/kern/kern_sysctl.c
index ffb6ac196ba3..011e3f44a124 100644
--- a/sys/kern/kern_sysctl.c
+++ b/sys/kern/kern_sysctl.c
@@ -1,2985 +1,2985 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1982, 1986, 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * Mike Karels at Berkeley Software Design, Inc.
  *
  * Quite extensively rewritten by Poul-Henning Kamp of the FreeBSD
  * project, to make these variables more userfriendly.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)kern_sysctl.c	8.4 (Berkeley) 4/14/94
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_capsicum.h"
 #include "opt_ddb.h"
 #include "opt_ktrace.h"
 #include "opt_sysctl.h"
 
 #include <sys/param.h>
 #include <sys/fail.h>
 #include <sys/systm.h>
 #include <sys/capsicum.h>
 #include <sys/kernel.h>
 #include <sys/limits.h>
 #include <sys/sysctl.h>
 #include <sys/malloc.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/jail.h>
 #include <sys/kdb.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/rmlock.h>
 #include <sys/sbuf.h>
 #include <sys/sx.h>
 #include <sys/sysproto.h>
 #include <sys/uio.h>
 #ifdef KTRACE
 #include <sys/ktrace.h>
 #endif
 
 #ifdef DDB
 #include <ddb/ddb.h>
 #include <ddb/db_lex.h>
 #endif
 
 #include <net/vnet.h>
 
 #include <security/mac/mac_framework.h>
 
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 
 static MALLOC_DEFINE(M_SYSCTL, "sysctl", "sysctl internal magic");
 static MALLOC_DEFINE(M_SYSCTLOID, "sysctloid", "sysctl dynamic oids");
 static MALLOC_DEFINE(M_SYSCTLTMP, "sysctltmp", "sysctl temp output buffer");
 
 /*
  * The sysctllock protects the MIB tree.  It also protects sysctl
  * contexts used with dynamic sysctls.  The sysctl_register_oid() and
  * sysctl_unregister_oid() routines require the sysctllock to already
  * be held, so the sysctl_wlock() and sysctl_wunlock() routines are
  * provided for the few places in the kernel which need to use that
  * API rather than using the dynamic API.  Use of the dynamic API is
  * strongly encouraged for most code.
  *
  * The sysctlmemlock is used to limit the amount of user memory wired for
  * sysctl requests.  This is implemented by serializing any userland
  * sysctl requests larger than a single page via an exclusive lock.
  *
  * The sysctlstringlock is used to protect concurrent access to writable
  * string nodes in sysctl_handle_string().
  */
 static struct rmlock sysctllock;
 static struct sx __exclusive_cache_line sysctlmemlock;
 static struct sx sysctlstringlock;
 
 #define	SYSCTL_WLOCK()		rm_wlock(&sysctllock)
 #define	SYSCTL_WUNLOCK()	rm_wunlock(&sysctllock)
 #define	SYSCTL_RLOCK(tracker)	rm_rlock(&sysctllock, (tracker))
 #define	SYSCTL_RUNLOCK(tracker)	rm_runlock(&sysctllock, (tracker))
 #define	SYSCTL_WLOCKED()	rm_wowned(&sysctllock)
 #define	SYSCTL_ASSERT_LOCKED()	rm_assert(&sysctllock, RA_LOCKED)
 #define	SYSCTL_ASSERT_WLOCKED()	rm_assert(&sysctllock, RA_WLOCKED)
 #define	SYSCTL_ASSERT_RLOCKED()	rm_assert(&sysctllock, RA_RLOCKED)
 #define	SYSCTL_INIT()		rm_init_flags(&sysctllock, "sysctl lock", \
 				    RM_SLEEPABLE)
 #define	SYSCTL_SLEEP(ch, wmesg, timo)					\
 				rm_sleep(ch, &sysctllock, 0, wmesg, timo)
 
 static int sysctl_root(SYSCTL_HANDLER_ARGS);
 
 /* Root list */
 struct sysctl_oid_list sysctl__children = SLIST_HEAD_INITIALIZER(&sysctl__children);
 
 static int	sysctl_remove_oid_locked(struct sysctl_oid *oidp, int del,
 		    int recurse);
 static int	sysctl_old_kernel(struct sysctl_req *, const void *, size_t);
 static int	sysctl_new_kernel(struct sysctl_req *, void *, size_t);
 
 static struct sysctl_oid *
 sysctl_find_oidname(const char *name, struct sysctl_oid_list *list)
 {
 	struct sysctl_oid *oidp;
 
 	SYSCTL_ASSERT_LOCKED();
 	SLIST_FOREACH(oidp, list, oid_link) {
 		if (strcmp(oidp->oid_name, name) == 0) {
 			return (oidp);
 		}
 	}
 	return (NULL);
 }
 
 /*
  * Initialization of the MIB tree.
  *
  * Order by number in each list.
  */
 void
 sysctl_wlock(void)
 {
 
 	SYSCTL_WLOCK();
 }
 
 void
 sysctl_wunlock(void)
 {
 
 	SYSCTL_WUNLOCK();
 }
 
 static int
 sysctl_root_handler_locked(struct sysctl_oid *oid, void *arg1, intmax_t arg2,
     struct sysctl_req *req, struct rm_priotracker *tracker)
 {
 	int error;
 
 	if (oid->oid_kind & CTLFLAG_DYN)
 		atomic_add_int(&oid->oid_running, 1);
 
 	if (tracker != NULL)
 		SYSCTL_RUNLOCK(tracker);
 	else
 		SYSCTL_WUNLOCK();
 
 	/*
 	 * Treat set CTLFLAG_NEEDGIANT and unset CTLFLAG_MPSAFE flags the same,
 	 * untill we're ready to remove all traces of Giant from sysctl(9).
 	 */
 	if ((oid->oid_kind & CTLFLAG_NEEDGIANT) ||
 	    (!(oid->oid_kind & CTLFLAG_MPSAFE)))
 		mtx_lock(&Giant);
 	error = oid->oid_handler(oid, arg1, arg2, req);
 	if ((oid->oid_kind & CTLFLAG_NEEDGIANT) ||
 	    (!(oid->oid_kind & CTLFLAG_MPSAFE)))
 		mtx_unlock(&Giant);
 
 	KFAIL_POINT_ERROR(_debug_fail_point, sysctl_running, error);
 
 	if (tracker != NULL)
 		SYSCTL_RLOCK(tracker);
 	else
 		SYSCTL_WLOCK();
 
 	if (oid->oid_kind & CTLFLAG_DYN) {
 		if (atomic_fetchadd_int(&oid->oid_running, -1) == 1 &&
 		    (oid->oid_kind & CTLFLAG_DYING) != 0)
 			wakeup(&oid->oid_running);
 	}
 
 	return (error);
 }
 
 static void
 sysctl_load_tunable_by_oid_locked(struct sysctl_oid *oidp)
 {
 	struct sysctl_req req;
 	struct sysctl_oid *curr;
 	char *penv = NULL;
 	char path[96];
 	ssize_t rem = sizeof(path);
 	ssize_t len;
 	uint8_t data[512] __aligned(sizeof(uint64_t));
 	int size;
 	int error;
 
 	path[--rem] = 0;
 
 	for (curr = oidp; curr != NULL; curr = SYSCTL_PARENT(curr)) {
 		len = strlen(curr->oid_name);
 		rem -= len;
 		if (curr != oidp)
 			rem -= 1;
 		if (rem < 0) {
 			printf("OID path exceeds %d bytes\n", (int)sizeof(path));
 			return;
 		}
 		memcpy(path + rem, curr->oid_name, len);
 		if (curr != oidp)
 			path[rem + len] = '.';
 	}
 
 	memset(&req, 0, sizeof(req));
 
 	req.td = curthread;
 	req.oldfunc = sysctl_old_kernel;
 	req.newfunc = sysctl_new_kernel;
 	req.lock = REQ_UNWIRED;
 
 	switch (oidp->oid_kind & CTLTYPE) {
 	case CTLTYPE_INT:
 		if (getenv_array(path + rem, data, sizeof(data), &size,
 		    sizeof(int), GETENV_SIGNED) == 0)
 			return;
 		req.newlen = size;
 		req.newptr = data;
 		break;
 	case CTLTYPE_UINT:
 		if (getenv_array(path + rem, data, sizeof(data), &size,
 		    sizeof(int), GETENV_UNSIGNED) == 0)
 			return;
 		req.newlen = size;
 		req.newptr = data;
 		break;
 	case CTLTYPE_LONG:
 		if (getenv_array(path + rem, data, sizeof(data), &size,
 		    sizeof(long), GETENV_SIGNED) == 0)
 			return;
 		req.newlen = size;
 		req.newptr = data;
 		break;
 	case CTLTYPE_ULONG:
 		if (getenv_array(path + rem, data, sizeof(data), &size,
 		    sizeof(long), GETENV_UNSIGNED) == 0)
 			return;
 		req.newlen = size;
 		req.newptr = data;
 		break;
 	case CTLTYPE_S8:
 		if (getenv_array(path + rem, data, sizeof(data), &size,
 		    sizeof(int8_t), GETENV_SIGNED) == 0)
 			return;
 		req.newlen = size;
 		req.newptr = data;
 		break;
 	case CTLTYPE_S16:
 		if (getenv_array(path + rem, data, sizeof(data), &size,
 		    sizeof(int16_t), GETENV_SIGNED) == 0)
 			return;
 		req.newlen = size;
 		req.newptr = data;
 		break;
 	case CTLTYPE_S32:
 		if (getenv_array(path + rem, data, sizeof(data), &size,
 		    sizeof(int32_t), GETENV_SIGNED) == 0)
 			return;
 		req.newlen = size;
 		req.newptr = data;
 		break;
 	case CTLTYPE_S64:
 		if (getenv_array(path + rem, data, sizeof(data), &size,
 		    sizeof(int64_t), GETENV_SIGNED) == 0)
 			return;
 		req.newlen = size;
 		req.newptr = data;
 		break;
 	case CTLTYPE_U8:
 		if (getenv_array(path + rem, data, sizeof(data), &size,
 		    sizeof(uint8_t), GETENV_UNSIGNED) == 0)
 			return;
 		req.newlen = size;
 		req.newptr = data;
 		break;
 	case CTLTYPE_U16:
 		if (getenv_array(path + rem, data, sizeof(data), &size,
 		    sizeof(uint16_t), GETENV_UNSIGNED) == 0)
 			return;
 		req.newlen = size;
 		req.newptr = data;
 		break;
 	case CTLTYPE_U32:
 		if (getenv_array(path + rem, data, sizeof(data), &size,
 		    sizeof(uint32_t), GETENV_UNSIGNED) == 0)
 			return;
 		req.newlen = size;
 		req.newptr = data;
 		break;
 	case CTLTYPE_U64:
 		if (getenv_array(path + rem, data, sizeof(data), &size,
 		    sizeof(uint64_t), GETENV_UNSIGNED) == 0)
 			return;
 		req.newlen = size;
 		req.newptr = data;
 		break;
 	case CTLTYPE_STRING:
 		penv = kern_getenv(path + rem);
 		if (penv == NULL)
 			return;
 		req.newlen = strlen(penv);
 		req.newptr = penv;
 		break;
 	default:
 		return;
 	}
 	error = sysctl_root_handler_locked(oidp, oidp->oid_arg1,
 	    oidp->oid_arg2, &req, NULL);
 	if (error != 0)
 		printf("Setting sysctl %s failed: %d\n", path + rem, error);
 	if (penv != NULL)
 		freeenv(penv);
 }
 
 /*
  * Locate the path to a given oid.  Returns the length of the resulting path,
  * or -1 if the oid was not found.  nodes must have room for CTL_MAXNAME
  * elements and be NULL initialized.
  */
 static int
 sysctl_search_oid(struct sysctl_oid **nodes, struct sysctl_oid *needle)
 {
 	int indx;
 
 	SYSCTL_ASSERT_LOCKED();
 	indx = 0;
 	while (indx < CTL_MAXNAME && indx >= 0) {
 		if (nodes[indx] == NULL && indx == 0)
 			nodes[indx] = SLIST_FIRST(&sysctl__children);
 		else if (nodes[indx] == NULL)
 			nodes[indx] = SLIST_FIRST(&nodes[indx - 1]->oid_children);
 		else
 			nodes[indx] = SLIST_NEXT(nodes[indx], oid_link);
 
 		if (nodes[indx] == needle)
 			return (indx + 1);
 
 		if (nodes[indx] == NULL) {
 			indx--;
 			continue;
 		}
 
 		if ((nodes[indx]->oid_kind & CTLTYPE) == CTLTYPE_NODE) {
 			indx++;
 			continue;
 		}
 	}
 	return (-1);
 }
 
 static void
 sysctl_warn_reuse(const char *func, struct sysctl_oid *leaf)
 {
 	struct sysctl_oid *nodes[CTL_MAXNAME];
 	char buf[128];
 	struct sbuf sb;
 	int rc, i;
 
 	(void)sbuf_new(&sb, buf, sizeof(buf), SBUF_FIXEDLEN | SBUF_INCLUDENUL);
 	sbuf_set_drain(&sb, sbuf_printf_drain, NULL);
 
 	sbuf_printf(&sb, "%s: can't re-use a leaf (", __func__);
 
 	memset(nodes, 0, sizeof(nodes));
 	rc = sysctl_search_oid(nodes, leaf);
 	if (rc > 0) {
 		for (i = 0; i < rc; i++)
 			sbuf_printf(&sb, "%s%.*s", nodes[i]->oid_name,
 			    i != (rc - 1), ".");
 	} else {
 		sbuf_printf(&sb, "%s", leaf->oid_name);
 	}
 	sbuf_printf(&sb, ")!\n");
 
 	(void)sbuf_finish(&sb);
 }
 
 #ifdef SYSCTL_DEBUG
 static int
 sysctl_reuse_test(SYSCTL_HANDLER_ARGS)
 {
 	struct rm_priotracker tracker;
 
 	SYSCTL_RLOCK(&tracker);
 	sysctl_warn_reuse(__func__, oidp);
 	SYSCTL_RUNLOCK(&tracker);
 	return (0);
 }
 SYSCTL_PROC(_sysctl, OID_AUTO, reuse_test,
     CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 0, sysctl_reuse_test, "-",
     "");
 #endif
 
 void
 sysctl_register_oid(struct sysctl_oid *oidp)
 {
 	struct sysctl_oid_list *parent = oidp->oid_parent;
 	struct sysctl_oid *p;
 	struct sysctl_oid *q;
 	int oid_number;
 	int timeout = 2;
 
 	/*
 	 * First check if another oid with the same name already
 	 * exists in the parent's list.
 	 */
 	SYSCTL_ASSERT_WLOCKED();
 	p = sysctl_find_oidname(oidp->oid_name, parent);
 	if (p != NULL) {
 		if ((p->oid_kind & CTLTYPE) == CTLTYPE_NODE) {
 			p->oid_refcnt++;
 			return;
 		} else {
 			sysctl_warn_reuse(__func__, p);
 			return;
 		}
 	}
 	/* get current OID number */
 	oid_number = oidp->oid_number;
 
 #if (OID_AUTO >= 0)
 #error "OID_AUTO is expected to be a negative value"
 #endif	
 	/*
 	 * Any negative OID number qualifies as OID_AUTO. Valid OID
 	 * numbers should always be positive.
 	 *
 	 * NOTE: DO NOT change the starting value here, change it in
 	 * <sys/sysctl.h>, and make sure it is at least 256 to
 	 * accommodate e.g. net.inet.raw as a static sysctl node.
 	 */
 	if (oid_number < 0) {
 		static int newoid;
 
 		/*
 		 * By decrementing the next OID number we spend less
 		 * time inserting the OIDs into a sorted list.
 		 */
 		if (--newoid < CTL_AUTO_START)
 			newoid = 0x7fffffff;
 
 		oid_number = newoid;
 	}
 
 	/*
 	 * Insert the OID into the parent's list sorted by OID number.
 	 */
 retry:
 	q = NULL;
 	SLIST_FOREACH(p, parent, oid_link) {
 		/* check if the current OID number is in use */
 		if (oid_number == p->oid_number) {
 			/* get the next valid OID number */
 			if (oid_number < CTL_AUTO_START ||
 			    oid_number == 0x7fffffff) {
 				/* wraparound - restart */
 				oid_number = CTL_AUTO_START;
 				/* don't loop forever */
 				if (!timeout--)
 					panic("sysctl: Out of OID numbers\n");
 				goto retry;
 			} else {
 				oid_number++;
 			}
 		} else if (oid_number < p->oid_number)
 			break;
 		q = p;
 	}
 	/* check for non-auto OID number collision */
 	if (oidp->oid_number >= 0 && oidp->oid_number < CTL_AUTO_START &&
 	    oid_number >= CTL_AUTO_START) {
 		printf("sysctl: OID number(%d) is already in use for '%s'\n",
 		    oidp->oid_number, oidp->oid_name);
 	}
 	/* update the OID number, if any */
 	oidp->oid_number = oid_number;
 	if (q != NULL)
 		SLIST_INSERT_AFTER(q, oidp, oid_link);
 	else
 		SLIST_INSERT_HEAD(parent, oidp, oid_link);
 
 	if ((oidp->oid_kind & CTLTYPE) != CTLTYPE_NODE &&
 #ifdef VIMAGE
 	    (oidp->oid_kind & CTLFLAG_VNET) == 0 &&
 #endif
 	    (oidp->oid_kind & CTLFLAG_TUN) != 0 &&
 	    (oidp->oid_kind & CTLFLAG_NOFETCH) == 0) {
 		/* only fetch value once */
 		oidp->oid_kind |= CTLFLAG_NOFETCH;
 		/* try to fetch value from kernel environment */
 		sysctl_load_tunable_by_oid_locked(oidp);
 	}
 }
 
 void
 sysctl_register_disabled_oid(struct sysctl_oid *oidp)
 {
 
 	/*
 	 * Mark the leaf as dormant if it's not to be immediately enabled.
 	 * We do not disable nodes as they can be shared between modules
 	 * and it is always safe to access a node.
 	 */
 	KASSERT((oidp->oid_kind & CTLFLAG_DORMANT) == 0,
 	    ("internal flag is set in oid_kind"));
 	if ((oidp->oid_kind & CTLTYPE) != CTLTYPE_NODE)
 		oidp->oid_kind |= CTLFLAG_DORMANT;
 	sysctl_register_oid(oidp);
 }
 
 void
 sysctl_enable_oid(struct sysctl_oid *oidp)
 {
 
 	SYSCTL_ASSERT_WLOCKED();
 	if ((oidp->oid_kind & CTLTYPE) == CTLTYPE_NODE) {
 		KASSERT((oidp->oid_kind & CTLFLAG_DORMANT) == 0,
 		    ("sysctl node is marked as dormant"));
 		return;
 	}
 	KASSERT((oidp->oid_kind & CTLFLAG_DORMANT) != 0,
 	    ("enabling already enabled sysctl oid"));
 	oidp->oid_kind &= ~CTLFLAG_DORMANT;
 }
 
 void
 sysctl_unregister_oid(struct sysctl_oid *oidp)
 {
 	struct sysctl_oid *p;
 	int error;
 
 	SYSCTL_ASSERT_WLOCKED();
 	if (oidp->oid_number == OID_AUTO) {
 		error = EINVAL;
 	} else {
 		error = ENOENT;
 		SLIST_FOREACH(p, oidp->oid_parent, oid_link) {
 			if (p == oidp) {
 				SLIST_REMOVE(oidp->oid_parent, oidp,
 				    sysctl_oid, oid_link);
 				error = 0;
 				break;
 			}
 		}
 	}
 
 	/* 
 	 * This can happen when a module fails to register and is
 	 * being unloaded afterwards.  It should not be a panic()
 	 * for normal use.
 	 */
 	if (error) {
 		printf("%s: failed(%d) to unregister sysctl(%s)\n",
 		    __func__, error, oidp->oid_name);
 	}
 }
 
 /* Initialize a new context to keep track of dynamically added sysctls. */
 int
 sysctl_ctx_init(struct sysctl_ctx_list *c)
 {
 
 	if (c == NULL) {
 		return (EINVAL);
 	}
 
 	/*
 	 * No locking here, the caller is responsible for not adding
 	 * new nodes to a context until after this function has
 	 * returned.
 	 */
 	TAILQ_INIT(c);
 	return (0);
 }
 
 /* Free the context, and destroy all dynamic oids registered in this context */
 int
 sysctl_ctx_free(struct sysctl_ctx_list *clist)
 {
 	struct sysctl_ctx_entry *e, *e1;
 	int error;
 
 	error = 0;
 	/*
 	 * First perform a "dry run" to check if it's ok to remove oids.
 	 * XXX FIXME
 	 * XXX This algorithm is a hack. But I don't know any
 	 * XXX better solution for now...
 	 */
 	SYSCTL_WLOCK();
 	TAILQ_FOREACH(e, clist, link) {
 		error = sysctl_remove_oid_locked(e->entry, 0, 0);
 		if (error)
 			break;
 	}
 	/*
 	 * Restore deregistered entries, either from the end,
 	 * or from the place where error occurred.
 	 * e contains the entry that was not unregistered
 	 */
 	if (error)
 		e1 = TAILQ_PREV(e, sysctl_ctx_list, link);
 	else
 		e1 = TAILQ_LAST(clist, sysctl_ctx_list);
 	while (e1 != NULL) {
 		sysctl_register_oid(e1->entry);
 		e1 = TAILQ_PREV(e1, sysctl_ctx_list, link);
 	}
 	if (error) {
 		SYSCTL_WUNLOCK();
 		return(EBUSY);
 	}
 	/* Now really delete the entries */
 	e = TAILQ_FIRST(clist);
 	while (e != NULL) {
 		e1 = TAILQ_NEXT(e, link);
 		error = sysctl_remove_oid_locked(e->entry, 1, 0);
 		if (error)
 			panic("sysctl_remove_oid: corrupt tree, entry: %s",
 			    e->entry->oid_name);
 		free(e, M_SYSCTLOID);
 		e = e1;
 	}
 	SYSCTL_WUNLOCK();
 	return (error);
 }
 
 /* Add an entry to the context */
 struct sysctl_ctx_entry *
 sysctl_ctx_entry_add(struct sysctl_ctx_list *clist, struct sysctl_oid *oidp)
 {
 	struct sysctl_ctx_entry *e;
 
 	SYSCTL_ASSERT_WLOCKED();
 	if (clist == NULL || oidp == NULL)
 		return(NULL);
 	e = malloc(sizeof(struct sysctl_ctx_entry), M_SYSCTLOID, M_WAITOK);
 	e->entry = oidp;
 	TAILQ_INSERT_HEAD(clist, e, link);
 	return (e);
 }
 
 /* Find an entry in the context */
 struct sysctl_ctx_entry *
 sysctl_ctx_entry_find(struct sysctl_ctx_list *clist, struct sysctl_oid *oidp)
 {
 	struct sysctl_ctx_entry *e;
 
 	SYSCTL_ASSERT_WLOCKED();
 	if (clist == NULL || oidp == NULL)
 		return(NULL);
 	TAILQ_FOREACH(e, clist, link) {
-		if(e->entry == oidp)
+		if (e->entry == oidp)
 			return(e);
 	}
 	return (e);
 }
 
 /*
  * Delete an entry from the context.
  * NOTE: this function doesn't free oidp! You have to remove it
  * with sysctl_remove_oid().
  */
 int
 sysctl_ctx_entry_del(struct sysctl_ctx_list *clist, struct sysctl_oid *oidp)
 {
 	struct sysctl_ctx_entry *e;
 
 	if (clist == NULL || oidp == NULL)
 		return (EINVAL);
 	SYSCTL_WLOCK();
 	e = sysctl_ctx_entry_find(clist, oidp);
 	if (e != NULL) {
 		TAILQ_REMOVE(clist, e, link);
 		SYSCTL_WUNLOCK();
 		free(e, M_SYSCTLOID);
 		return (0);
 	} else {
 		SYSCTL_WUNLOCK();
 		return (ENOENT);
 	}
 }
 
 /*
  * Remove dynamically created sysctl trees.
  * oidp - top of the tree to be removed
  * del - if 0 - just deregister, otherwise free up entries as well
  * recurse - if != 0 traverse the subtree to be deleted
  */
 int
 sysctl_remove_oid(struct sysctl_oid *oidp, int del, int recurse)
 {
 	int error;
 
 	SYSCTL_WLOCK();
 	error = sysctl_remove_oid_locked(oidp, del, recurse);
 	SYSCTL_WUNLOCK();
 	return (error);
 }
 
 int
 sysctl_remove_name(struct sysctl_oid *parent, const char *name,
     int del, int recurse)
 {
 	struct sysctl_oid *p, *tmp;
 	int error;
 
 	error = ENOENT;
 	SYSCTL_WLOCK();
 	SLIST_FOREACH_SAFE(p, SYSCTL_CHILDREN(parent), oid_link, tmp) {
 		if (strcmp(p->oid_name, name) == 0) {
 			error = sysctl_remove_oid_locked(p, del, recurse);
 			break;
 		}
 	}
 	SYSCTL_WUNLOCK();
 
 	return (error);
 }
 
 static int
 sysctl_remove_oid_locked(struct sysctl_oid *oidp, int del, int recurse)
 {
 	struct sysctl_oid *p, *tmp;
 	int error;
 
 	SYSCTL_ASSERT_WLOCKED();
 	if (oidp == NULL)
 		return(EINVAL);
 	if ((oidp->oid_kind & CTLFLAG_DYN) == 0) {
 		printf("Warning: can't remove non-dynamic nodes (%s)!\n",
 		    oidp->oid_name);
 		return (EINVAL);
 	}
 	/*
 	 * WARNING: normal method to do this should be through
 	 * sysctl_ctx_free(). Use recursing as the last resort
 	 * method to purge your sysctl tree of leftovers...
 	 * However, if some other code still references these nodes,
 	 * it will panic.
 	 */
 	if ((oidp->oid_kind & CTLTYPE) == CTLTYPE_NODE) {
 		if (oidp->oid_refcnt == 1) {
 			SLIST_FOREACH_SAFE(p,
 			    SYSCTL_CHILDREN(oidp), oid_link, tmp) {
 				if (!recurse) {
 					printf("Warning: failed attempt to "
 					    "remove oid %s with child %s\n",
 					    oidp->oid_name, p->oid_name);
 					return (ENOTEMPTY);
 				}
 				error = sysctl_remove_oid_locked(p, del,
 				    recurse);
 				if (error)
 					return (error);
 			}
 		}
 	}
 	if (oidp->oid_refcnt > 1 ) {
 		oidp->oid_refcnt--;
 	} else {
 		if (oidp->oid_refcnt == 0) {
 			printf("Warning: bad oid_refcnt=%u (%s)!\n",
 				oidp->oid_refcnt, oidp->oid_name);
 			return (EINVAL);
 		}
 		sysctl_unregister_oid(oidp);
 		if (del) {
 			/*
 			 * Wait for all threads running the handler to drain.
 			 * This preserves the previous behavior when the
 			 * sysctl lock was held across a handler invocation,
 			 * and is necessary for module unload correctness.
 			 */
 			while (oidp->oid_running > 0) {
 				oidp->oid_kind |= CTLFLAG_DYING;
 				SYSCTL_SLEEP(&oidp->oid_running, "oidrm", 0);
 			}
 			if (oidp->oid_descr)
 				free(__DECONST(char *, oidp->oid_descr),
 				    M_SYSCTLOID);
 			if (oidp->oid_label)
 				free(__DECONST(char *, oidp->oid_label),
 				    M_SYSCTLOID);
 			free(__DECONST(char *, oidp->oid_name), M_SYSCTLOID);
 			free(oidp, M_SYSCTLOID);
 		}
 	}
 	return (0);
 }
 /*
  * Create new sysctls at run time.
  * clist may point to a valid context initialized with sysctl_ctx_init().
  */
 struct sysctl_oid *
 sysctl_add_oid(struct sysctl_ctx_list *clist, struct sysctl_oid_list *parent,
 	int number, const char *name, int kind, void *arg1, intmax_t arg2,
 	int (*handler)(SYSCTL_HANDLER_ARGS), const char *fmt, const char *descr,
 	const char *label)
 {
 	struct sysctl_oid *oidp;
 
 	/* You have to hook up somewhere.. */
 	if (parent == NULL)
 		return(NULL);
 	/* Check if the node already exists, otherwise create it */
 	SYSCTL_WLOCK();
 	oidp = sysctl_find_oidname(name, parent);
 	if (oidp != NULL) {
 		if ((oidp->oid_kind & CTLTYPE) == CTLTYPE_NODE) {
 			oidp->oid_refcnt++;
 			/* Update the context */
 			if (clist != NULL)
 				sysctl_ctx_entry_add(clist, oidp);
 			SYSCTL_WUNLOCK();
 			return (oidp);
 		} else {
 			sysctl_warn_reuse(__func__, oidp);
 			SYSCTL_WUNLOCK();
 			return (NULL);
 		}
 	}
 	oidp = malloc(sizeof(struct sysctl_oid), M_SYSCTLOID, M_WAITOK|M_ZERO);
 	oidp->oid_parent = parent;
 	SLIST_INIT(&oidp->oid_children);
 	oidp->oid_number = number;
 	oidp->oid_refcnt = 1;
 	oidp->oid_name = strdup(name, M_SYSCTLOID);
 	oidp->oid_handler = handler;
 	oidp->oid_kind = CTLFLAG_DYN | kind;
 	oidp->oid_arg1 = arg1;
 	oidp->oid_arg2 = arg2;
 	oidp->oid_fmt = fmt;
 	if (descr != NULL)
 		oidp->oid_descr = strdup(descr, M_SYSCTLOID);
 	if (label != NULL)
 		oidp->oid_label = strdup(label, M_SYSCTLOID);
 	/* Update the context, if used */
 	if (clist != NULL)
 		sysctl_ctx_entry_add(clist, oidp);
 	/* Register this oid */
 	sysctl_register_oid(oidp);
 	SYSCTL_WUNLOCK();
 	return (oidp);
 }
 
 /*
  * Rename an existing oid.
  */
 void
 sysctl_rename_oid(struct sysctl_oid *oidp, const char *name)
 {
 	char *newname;
 	char *oldname;
 
 	newname = strdup(name, M_SYSCTLOID);
 	SYSCTL_WLOCK();
 	oldname = __DECONST(char *, oidp->oid_name);
 	oidp->oid_name = newname;
 	SYSCTL_WUNLOCK();
 	free(oldname, M_SYSCTLOID);
 }
 
 /*
  * Reparent an existing oid.
  */
 int
 sysctl_move_oid(struct sysctl_oid *oid, struct sysctl_oid_list *parent)
 {
 	struct sysctl_oid *oidp;
 
 	SYSCTL_WLOCK();
 	if (oid->oid_parent == parent) {
 		SYSCTL_WUNLOCK();
 		return (0);
 	}
 	oidp = sysctl_find_oidname(oid->oid_name, parent);
 	if (oidp != NULL) {
 		SYSCTL_WUNLOCK();
 		return (EEXIST);
 	}
 	sysctl_unregister_oid(oid);
 	oid->oid_parent = parent;
 	oid->oid_number = OID_AUTO;
 	sysctl_register_oid(oid);
 	SYSCTL_WUNLOCK();
 	return (0);
 }
 
 /*
  * Register the kernel's oids on startup.
  */
 SET_DECLARE(sysctl_set, struct sysctl_oid);
 
 static void
 sysctl_register_all(void *arg)
 {
 	struct sysctl_oid **oidp;
 
 	sx_init(&sysctlmemlock, "sysctl mem");
 	sx_init(&sysctlstringlock, "sysctl string handler");
 	SYSCTL_INIT();
 	SYSCTL_WLOCK();
 	SET_FOREACH(oidp, sysctl_set)
 		sysctl_register_oid(*oidp);
 	SYSCTL_WUNLOCK();
 }
 SYSINIT(sysctl, SI_SUB_KMEM, SI_ORDER_FIRST, sysctl_register_all, NULL);
 
 /*
  * "Staff-functions"
  *
  * These functions implement a presently undocumented interface 
  * used by the sysctl program to walk the tree, and get the type
  * so it can print the value.
  * This interface is under work and consideration, and should probably
  * be killed with a big axe by the first person who can find the time.
  * (be aware though, that the proper interface isn't as obvious as it
  * may seem, there are various conflicting requirements.
  *
  * {CTL_SYSCTL, CTL_SYSCTL_DEBUG}		printf the entire MIB-tree.
  * {CTL_SYSCTL, CTL_SYSCTL_NAME, ...}		return the name of the "..."
  *						OID.
  * {CTL_SYSCTL, CTL_SYSCTL_NEXT, ...}		return the next OID, honoring
  *						CTLFLAG_SKIP.
  * {CTL_SYSCTL, CTL_SYSCTL_NAME2OID}		return the OID of the name in
  *						"new"
  * {CTL_SYSCTL, CTL_SYSCTL_OIDFMT, ...}		return the kind & format info
  *						for the "..." OID.
  * {CTL_SYSCTL, CTL_SYSCTL_OIDDESCR, ...}	return the description of the
  *						"..." OID.
  * {CTL_SYSCTL, CTL_SYSCTL_OIDLABEL, ...}	return the aggregation label of
  *						the "..." OID.
  * {CTL_SYSCTL, CTL_SYSCTL_NEXTNOSKIP, ...}	return the next OID, ignoring
  *						CTLFLAG_SKIP.
  */
 
 #ifdef SYSCTL_DEBUG
 static void
 sysctl_sysctl_debug_dump_node(struct sysctl_oid_list *l, int i)
 {
 	int k;
 	struct sysctl_oid *oidp;
 
 	SYSCTL_ASSERT_LOCKED();
 	SLIST_FOREACH(oidp, l, oid_link) {
 		for (k=0; k<i; k++)
 			printf(" ");
 
 		printf("%d %s ", oidp->oid_number, oidp->oid_name);
 
 		printf("%c%c",
 			oidp->oid_kind & CTLFLAG_RD ? 'R':' ',
 			oidp->oid_kind & CTLFLAG_WR ? 'W':' ');
 
 		if (oidp->oid_handler)
 			printf(" *Handler");
 
 		switch (oidp->oid_kind & CTLTYPE) {
 			case CTLTYPE_NODE:
 				printf(" Node\n");
 				if (!oidp->oid_handler) {
 					sysctl_sysctl_debug_dump_node(
 					    SYSCTL_CHILDREN(oidp), i + 2);
 				}
 				break;
 			case CTLTYPE_INT:    printf(" Int\n"); break;
 			case CTLTYPE_UINT:   printf(" u_int\n"); break;
 			case CTLTYPE_LONG:   printf(" Long\n"); break;
 			case CTLTYPE_ULONG:  printf(" u_long\n"); break;
 			case CTLTYPE_STRING: printf(" String\n"); break;
 			case CTLTYPE_S8:     printf(" int8_t\n"); break;
 			case CTLTYPE_S16:    printf(" int16_t\n"); break;
 			case CTLTYPE_S32:    printf(" int32_t\n"); break;
 			case CTLTYPE_S64:    printf(" int64_t\n"); break;
 			case CTLTYPE_U8:     printf(" uint8_t\n"); break;
 			case CTLTYPE_U16:    printf(" uint16_t\n"); break;
 			case CTLTYPE_U32:    printf(" uint32_t\n"); break;
 			case CTLTYPE_U64:    printf(" uint64_t\n"); break;
 			case CTLTYPE_OPAQUE: printf(" Opaque/struct\n"); break;
 			default:	     printf("\n");
 		}
 	}
 }
 
 static int
 sysctl_sysctl_debug(SYSCTL_HANDLER_ARGS)
 {
 	struct rm_priotracker tracker;
 	int error;
 
 	error = priv_check(req->td, PRIV_SYSCTL_DEBUG);
 	if (error)
 		return (error);
 	SYSCTL_RLOCK(&tracker);
 	sysctl_sysctl_debug_dump_node(&sysctl__children, 0);
 	SYSCTL_RUNLOCK(&tracker);
 	return (ENOENT);
 }
 
 SYSCTL_PROC(_sysctl, CTL_SYSCTL_DEBUG, debug, CTLTYPE_STRING | CTLFLAG_RD |
     CTLFLAG_MPSAFE, 0, 0, sysctl_sysctl_debug, "-", "");
 #endif
 
 static int
 sysctl_sysctl_name(SYSCTL_HANDLER_ARGS)
 {
 	int *name = (int *) arg1;
 	u_int namelen = arg2;
 	int error;
 	struct sysctl_oid *oid;
 	struct sysctl_oid_list *lsp = &sysctl__children, *lsp2;
 	struct rm_priotracker tracker;
 	char buf[10];
 
 	error = sysctl_wire_old_buffer(req, 0);
 	if (error)
 		return (error);
 
 	SYSCTL_RLOCK(&tracker);
 	while (namelen) {
 		if (!lsp) {
 			snprintf(buf,sizeof(buf),"%d",*name);
 			if (req->oldidx)
 				error = SYSCTL_OUT(req, ".", 1);
 			if (!error)
 				error = SYSCTL_OUT(req, buf, strlen(buf));
 			if (error)
 				goto out;
 			namelen--;
 			name++;
 			continue;
 		}
 		lsp2 = NULL;
 		SLIST_FOREACH(oid, lsp, oid_link) {
 			if (oid->oid_number != *name)
 				continue;
 
 			if (req->oldidx)
 				error = SYSCTL_OUT(req, ".", 1);
 			if (!error)
 				error = SYSCTL_OUT(req, oid->oid_name,
 					strlen(oid->oid_name));
 			if (error)
 				goto out;
 
 			namelen--;
 			name++;
 
 			if ((oid->oid_kind & CTLTYPE) != CTLTYPE_NODE) 
 				break;
 
 			if (oid->oid_handler)
 				break;
 
 			lsp2 = SYSCTL_CHILDREN(oid);
 			break;
 		}
 		lsp = lsp2;
 	}
 	error = SYSCTL_OUT(req, "", 1);
  out:
 	SYSCTL_RUNLOCK(&tracker);
 	return (error);
 }
 
 /*
  * XXXRW/JA: Shouldn't return name data for nodes that we don't permit in
  * capability mode.
  */
 static SYSCTL_NODE(_sysctl, CTL_SYSCTL_NAME, name, CTLFLAG_RD |
     CTLFLAG_MPSAFE | CTLFLAG_CAPRD, sysctl_sysctl_name, "");
 
 enum sysctl_iter_action {
 	ITER_SIBLINGS,	/* Not matched, continue iterating siblings */
 	ITER_CHILDREN,	/* Node has children we need to iterate over them */
 	ITER_FOUND,	/* Matching node was found */
 };
 
 /*
  * Tries to find the next node for @name and @namelen.
  *
  * Returns next action to take. 
  */
 static enum sysctl_iter_action
 sysctl_sysctl_next_node(struct sysctl_oid *oidp, int *name, unsigned int namelen,
     bool honor_skip)
 {
 
 	if ((oidp->oid_kind & CTLFLAG_DORMANT) != 0)
 		return (ITER_SIBLINGS);
 
 	if (honor_skip && (oidp->oid_kind & CTLFLAG_SKIP) != 0)
 		return (ITER_SIBLINGS);
 
 	if (namelen == 0) {
 		/*
 		 * We have reached a node with a full name match and are
 		 * looking for the next oid in its children.
 		 *
 		 * For CTL_SYSCTL_NEXTNOSKIP we are done.
 		 *
 		 * For CTL_SYSCTL_NEXT we skip CTLTYPE_NODE (unless it
 		 * has a handler) and move on to the children.
 		 */
 		if (!honor_skip)
 			return (ITER_FOUND);
 		if ((oidp->oid_kind & CTLTYPE) != CTLTYPE_NODE) 
 			return (ITER_FOUND);
 		/* If node does not have an iterator, treat it as leaf */
 		if (oidp->oid_handler) 
 			return (ITER_FOUND);
 
 		/* Report oid as a node to iterate */
 		return (ITER_CHILDREN);
 	}
 
 	/*
 	 * No match yet. Continue seeking the given name.
 	 *
 	 * We are iterating in order by oid_number, so skip oids lower
 	 * than the one we are looking for.
 	 *
 	 * When the current oid_number is higher than the one we seek,
 	 * that means we have reached the next oid in the sequence and
 	 * should return it.
 	 *
 	 * If the oid_number matches the name at this level then we
 	 * have to find a node to continue searching at the next level.
 	 */
 	if (oidp->oid_number < *name)
 		return (ITER_SIBLINGS);
 	if (oidp->oid_number > *name) {
 		/*
 		 * We have reached the next oid.
 		 *
 		 * For CTL_SYSCTL_NEXTNOSKIP we are done.
 		 *
 		 * For CTL_SYSCTL_NEXT we skip CTLTYPE_NODE (unless it
 		 * has a handler) and move on to the children.
 		 */
 		if (!honor_skip)
 			return (ITER_FOUND);
 		if ((oidp->oid_kind & CTLTYPE) != CTLTYPE_NODE)
 			return (ITER_FOUND);
 		/* If node does not have an iterator, treat it as leaf */
 		if (oidp->oid_handler)
 			return (ITER_FOUND);
 		return (ITER_CHILDREN);
 	}
 
 	/* match at a current level */
 	if ((oidp->oid_kind & CTLTYPE) != CTLTYPE_NODE)
 		return (ITER_SIBLINGS);
 	if (oidp->oid_handler)
 		return (ITER_SIBLINGS);
 
 	return (ITER_CHILDREN);
 }
 
 /*
  * Recursively walk the sysctl subtree at lsp until we find the given name.
  * Returns true and fills in next oid data in @next and @len if oid is found.
  */
 static bool
 sysctl_sysctl_next_action(struct sysctl_oid_list *lsp, int *name, u_int namelen, 
     int *next, int *len, int level, bool honor_skip)
 {
 	struct sysctl_oid *oidp;
 	bool success = false;
 	enum sysctl_iter_action action;
 
 	SYSCTL_ASSERT_LOCKED();
 	SLIST_FOREACH(oidp, lsp, oid_link) {
 		action = sysctl_sysctl_next_node(oidp, name, namelen, honor_skip);
 		if (action == ITER_SIBLINGS)
 			continue;
 		if (action == ITER_FOUND) {
 			success = true;
 			break;
 		}
 		KASSERT((action== ITER_CHILDREN), ("ret(%d)!=ITER_CHILDREN", action));
 
 		lsp = SYSCTL_CHILDREN(oidp);
 		if (namelen == 0) {
 			success = sysctl_sysctl_next_action(lsp, NULL, 0,
 			    next + 1, len, level + 1, honor_skip);
 		} else {
 			success = sysctl_sysctl_next_action(lsp, name + 1, namelen - 1,
 			    next + 1, len, level + 1, honor_skip);
 			if (!success) {
 
 				/*
 				 * We maintain the invariant that current node oid
 				 * is >= the oid provided in @name.
 				 * As there are no usable children at this node,
 				 *  current node oid is strictly > than the requested
 				 *  oid.
 				 * Hence, reduce namelen to 0 to allow for picking first
 				 *  nodes/leafs in the next node in list.
 				 */
 				namelen = 0;
 			}
 		}
 		if (success)
 			break;
 	}
 
 	if (success) {
 		*next = oidp->oid_number;
 		if (level > *len)
 			*len = level;
 	}
 
 	return (success);
 }
 
 static int
 sysctl_sysctl_next(SYSCTL_HANDLER_ARGS)
 {
 	int *name = (int *) arg1;
 	u_int namelen = arg2;
 	int len, error;
 	bool success;
 	struct sysctl_oid_list *lsp = &sysctl__children;
 	struct rm_priotracker tracker;
 	int next[CTL_MAXNAME];
 
 	len = 0;
 	SYSCTL_RLOCK(&tracker);
 	success = sysctl_sysctl_next_action(lsp, name, namelen, next, &len, 1,
 	    oidp->oid_number == CTL_SYSCTL_NEXT);
 	SYSCTL_RUNLOCK(&tracker);
 	if (!success)
 		return (ENOENT);
 	error = SYSCTL_OUT(req, next, len * sizeof (int));
 	return (error);
 }
 
 /*
  * XXXRW/JA: Shouldn't return next data for nodes that we don't permit in
  * capability mode.
  */
 static SYSCTL_NODE(_sysctl, CTL_SYSCTL_NEXT, next, CTLFLAG_RD |
     CTLFLAG_MPSAFE | CTLFLAG_CAPRD, sysctl_sysctl_next, "");
 
 static SYSCTL_NODE(_sysctl, CTL_SYSCTL_NEXTNOSKIP, nextnoskip, CTLFLAG_RD |
     CTLFLAG_MPSAFE | CTLFLAG_CAPRD, sysctl_sysctl_next, "");
 
 static int
 name2oid(char *name, int *oid, int *len, struct sysctl_oid **oidpp)
 {
 	struct sysctl_oid *oidp;
 	struct sysctl_oid_list *lsp = &sysctl__children;
 	char *p;
 
 	SYSCTL_ASSERT_LOCKED();
 
 	for (*len = 0; *len < CTL_MAXNAME;) {
 		p = strsep(&name, ".");
 
 		oidp = SLIST_FIRST(lsp);
 		for (;; oidp = SLIST_NEXT(oidp, oid_link)) {
 			if (oidp == NULL)
 				return (ENOENT);
 			if (strcmp(p, oidp->oid_name) == 0)
 				break;
 		}
 		*oid++ = oidp->oid_number;
 		(*len)++;
 
 		if (name == NULL || *name == '\0') {
 			if (oidpp)
 				*oidpp = oidp;
 			return (0);
 		}
 
 		if ((oidp->oid_kind & CTLTYPE) != CTLTYPE_NODE)
 			break;
 
 		if (oidp->oid_handler)
 			break;
 
 		lsp = SYSCTL_CHILDREN(oidp);
 	}
 	return (ENOENT);
 }
 
 static int
 sysctl_sysctl_name2oid(SYSCTL_HANDLER_ARGS)
 {
 	char *p;
 	int error, oid[CTL_MAXNAME], len = 0;
 	struct sysctl_oid *op = NULL;
 	struct rm_priotracker tracker;
 	char buf[32];
 
 	if (!req->newlen) 
 		return (ENOENT);
 	if (req->newlen >= MAXPATHLEN)	/* XXX arbitrary, undocumented */
 		return (ENAMETOOLONG);
 
 	p = buf;
 	if (req->newlen >= sizeof(buf))
 		p = malloc(req->newlen+1, M_SYSCTL, M_WAITOK);
 
 	error = SYSCTL_IN(req, p, req->newlen);
 	if (error) {
 		if (p != buf)
 			free(p, M_SYSCTL);
 		return (error);
 	}
 
 	p [req->newlen] = '\0';
 
 	SYSCTL_RLOCK(&tracker);
 	error = name2oid(p, oid, &len, &op);
 	SYSCTL_RUNLOCK(&tracker);
 
 	if (p != buf)
 		free(p, M_SYSCTL);
 
 	if (error)
 		return (error);
 
 	error = SYSCTL_OUT(req, oid, len * sizeof *oid);
 	return (error);
 }
 
 /*
  * XXXRW/JA: Shouldn't return name2oid data for nodes that we don't permit in
  * capability mode.
  */
 SYSCTL_PROC(_sysctl, CTL_SYSCTL_NAME2OID, name2oid, CTLTYPE_INT | CTLFLAG_RW |
     CTLFLAG_ANYBODY | CTLFLAG_MPSAFE | CTLFLAG_CAPRW, 0, 0,
     sysctl_sysctl_name2oid, "I", "");
 
 static int
 sysctl_sysctl_oidfmt(SYSCTL_HANDLER_ARGS)
 {
 	struct sysctl_oid *oid;
 	struct rm_priotracker tracker;
 	int error;
 
 	error = sysctl_wire_old_buffer(req, 0);
 	if (error)
 		return (error);
 
 	SYSCTL_RLOCK(&tracker);
 	error = sysctl_find_oid(arg1, arg2, &oid, NULL, req);
 	if (error)
 		goto out;
 
 	if (oid->oid_fmt == NULL) {
 		error = ENOENT;
 		goto out;
 	}
 	error = SYSCTL_OUT(req, &oid->oid_kind, sizeof(oid->oid_kind));
 	if (error)
 		goto out;
 	error = SYSCTL_OUT(req, oid->oid_fmt, strlen(oid->oid_fmt) + 1);
  out:
 	SYSCTL_RUNLOCK(&tracker);
 	return (error);
 }
 
 static SYSCTL_NODE(_sysctl, CTL_SYSCTL_OIDFMT, oidfmt, CTLFLAG_RD |
     CTLFLAG_MPSAFE | CTLFLAG_CAPRD, sysctl_sysctl_oidfmt, "");
 
 static int
 sysctl_sysctl_oiddescr(SYSCTL_HANDLER_ARGS)
 {
 	struct sysctl_oid *oid;
 	struct rm_priotracker tracker;
 	int error;
 
 	error = sysctl_wire_old_buffer(req, 0);
 	if (error)
 		return (error);
 
 	SYSCTL_RLOCK(&tracker);
 	error = sysctl_find_oid(arg1, arg2, &oid, NULL, req);
 	if (error)
 		goto out;
 
 	if (oid->oid_descr == NULL) {
 		error = ENOENT;
 		goto out;
 	}
 	error = SYSCTL_OUT(req, oid->oid_descr, strlen(oid->oid_descr) + 1);
  out:
 	SYSCTL_RUNLOCK(&tracker);
 	return (error);
 }
 
 static SYSCTL_NODE(_sysctl, CTL_SYSCTL_OIDDESCR, oiddescr, CTLFLAG_RD |
     CTLFLAG_MPSAFE|CTLFLAG_CAPRD, sysctl_sysctl_oiddescr, "");
 
 static int
 sysctl_sysctl_oidlabel(SYSCTL_HANDLER_ARGS)
 {
 	struct sysctl_oid *oid;
 	struct rm_priotracker tracker;
 	int error;
 
 	error = sysctl_wire_old_buffer(req, 0);
 	if (error)
 		return (error);
 
 	SYSCTL_RLOCK(&tracker);
 	error = sysctl_find_oid(arg1, arg2, &oid, NULL, req);
 	if (error)
 		goto out;
 
 	if (oid->oid_label == NULL) {
 		error = ENOENT;
 		goto out;
 	}
 	error = SYSCTL_OUT(req, oid->oid_label, strlen(oid->oid_label) + 1);
  out:
 	SYSCTL_RUNLOCK(&tracker);
 	return (error);
 }
 
 static SYSCTL_NODE(_sysctl, CTL_SYSCTL_OIDLABEL, oidlabel, CTLFLAG_RD |
     CTLFLAG_MPSAFE | CTLFLAG_CAPRD, sysctl_sysctl_oidlabel, "");
 
 /*
  * Default "handler" functions.
  */
 
 /*
  * Handle a bool.
  * Two cases:
  *     a variable:  point arg1 at it.
  *     a constant:  pass it in arg2.
  */
 
 int
 sysctl_handle_bool(SYSCTL_HANDLER_ARGS)
 {
 	uint8_t temp;
 	int error;
 
 	/*
 	 * Attempt to get a coherent snapshot by making a copy of the data.
 	 */
 	if (arg1)
 		temp = *(bool *)arg1 ? 1 : 0;
 	else
 		temp = arg2 ? 1 : 0;
 
 	error = SYSCTL_OUT(req, &temp, sizeof(temp));
 	if (error || !req->newptr)
 		return (error);
 
 	if (!arg1)
 		error = EPERM;
 	else {
 		error = SYSCTL_IN(req, &temp, sizeof(temp));
 		if (!error)
 			*(bool *)arg1 = temp ? 1 : 0;
 	}
 	return (error);
 }
 
 /*
  * Handle an int8_t, signed or unsigned.
  * Two cases:
  *     a variable:  point arg1 at it.
  *     a constant:  pass it in arg2.
  */
 
 int
 sysctl_handle_8(SYSCTL_HANDLER_ARGS)
 {
 	int8_t tmpout;
 	int error = 0;
 
 	/*
 	 * Attempt to get a coherent snapshot by making a copy of the data.
 	 */
 	if (arg1)
 		tmpout = *(int8_t *)arg1;
 	else
 		tmpout = arg2;
 	error = SYSCTL_OUT(req, &tmpout, sizeof(tmpout));
 
 	if (error || !req->newptr)
 		return (error);
 
 	if (!arg1)
 		error = EPERM;
 	else
 		error = SYSCTL_IN(req, arg1, sizeof(tmpout));
 	return (error);
 }
 
 /*
  * Handle an int16_t, signed or unsigned.
  * Two cases:
  *     a variable:  point arg1 at it.
  *     a constant:  pass it in arg2.
  */
 
 int
 sysctl_handle_16(SYSCTL_HANDLER_ARGS)
 {
 	int16_t tmpout;
 	int error = 0;
 
 	/*
 	 * Attempt to get a coherent snapshot by making a copy of the data.
 	 */
 	if (arg1)
 		tmpout = *(int16_t *)arg1;
 	else
 		tmpout = arg2;
 	error = SYSCTL_OUT(req, &tmpout, sizeof(tmpout));
 
 	if (error || !req->newptr)
 		return (error);
 
 	if (!arg1)
 		error = EPERM;
 	else
 		error = SYSCTL_IN(req, arg1, sizeof(tmpout));
 	return (error);
 }
 
 /*
  * Handle an int32_t, signed or unsigned.
  * Two cases:
  *     a variable:  point arg1 at it.
  *     a constant:  pass it in arg2.
  */
 
 int
 sysctl_handle_32(SYSCTL_HANDLER_ARGS)
 {
 	int32_t tmpout;
 	int error = 0;
 
 	/*
 	 * Attempt to get a coherent snapshot by making a copy of the data.
 	 */
 	if (arg1)
 		tmpout = *(int32_t *)arg1;
 	else
 		tmpout = arg2;
 	error = SYSCTL_OUT(req, &tmpout, sizeof(tmpout));
 
 	if (error || !req->newptr)
 		return (error);
 
 	if (!arg1)
 		error = EPERM;
 	else
 		error = SYSCTL_IN(req, arg1, sizeof(tmpout));
 	return (error);
 }
 
 /*
  * Handle an int, signed or unsigned.
  * Two cases:
  *     a variable:  point arg1 at it.
  *     a constant:  pass it in arg2.
  */
 
 int
 sysctl_handle_int(SYSCTL_HANDLER_ARGS)
 {
 	int tmpout, error = 0;
 
 	/*
 	 * Attempt to get a coherent snapshot by making a copy of the data.
 	 */
 	if (arg1)
 		tmpout = *(int *)arg1;
 	else
 		tmpout = arg2;
 	error = SYSCTL_OUT(req, &tmpout, sizeof(int));
 
 	if (error || !req->newptr)
 		return (error);
 
 	if (!arg1)
 		error = EPERM;
 	else
 		error = SYSCTL_IN(req, arg1, sizeof(int));
 	return (error);
 }
 
 /*
  * Based on on sysctl_handle_int() convert milliseconds into ticks.
  * Note: this is used by TCP.
  */
 
 int
 sysctl_msec_to_ticks(SYSCTL_HANDLER_ARGS)
 {
 	int error, s, tt;
 
 	tt = *(int *)arg1;
 	s = (int)((int64_t)tt * 1000 / hz);
 
 	error = sysctl_handle_int(oidp, &s, 0, req);
 	if (error || !req->newptr)
 		return (error);
 
 	tt = (int)((int64_t)s * hz / 1000);
 	if (tt < 1)
 		return (EINVAL);
 
 	*(int *)arg1 = tt;
 	return (0);
 }
 
 /*
  * Handle a long, signed or unsigned.
  * Two cases:
  *     a variable:  point arg1 at it.
  *     a constant:  pass it in arg2.
  */
 
 int
 sysctl_handle_long(SYSCTL_HANDLER_ARGS)
 {
 	int error = 0;
 	long tmplong;
 #ifdef SCTL_MASK32
 	int tmpint;
 #endif
 
 	/*
 	 * Attempt to get a coherent snapshot by making a copy of the data.
 	 */
 	if (arg1)
 		tmplong = *(long *)arg1;
 	else
 		tmplong = arg2;
 #ifdef SCTL_MASK32
 	if (req->flags & SCTL_MASK32) {
 		tmpint = tmplong;
 		error = SYSCTL_OUT(req, &tmpint, sizeof(int));
 	} else
 #endif
 		error = SYSCTL_OUT(req, &tmplong, sizeof(long));
 
 	if (error || !req->newptr)
 		return (error);
 
 	if (!arg1)
 		error = EPERM;
 #ifdef SCTL_MASK32
 	else if (req->flags & SCTL_MASK32) {
 		error = SYSCTL_IN(req, &tmpint, sizeof(int));
 		*(long *)arg1 = (long)tmpint;
 	}
 #endif
 	else
 		error = SYSCTL_IN(req, arg1, sizeof(long));
 	return (error);
 }
 
 /*
  * Handle a 64 bit int, signed or unsigned.
  * Two cases:
  *     a variable:  point arg1 at it.
  *     a constant:  pass it in arg2.
  */
 int
 sysctl_handle_64(SYSCTL_HANDLER_ARGS)
 {
 	int error = 0;
 	uint64_t tmpout;
 
 	/*
 	 * Attempt to get a coherent snapshot by making a copy of the data.
 	 */
 	if (arg1)
 		tmpout = *(uint64_t *)arg1;
 	else
 		tmpout = arg2;
 	error = SYSCTL_OUT(req, &tmpout, sizeof(uint64_t));
 
 	if (error || !req->newptr)
 		return (error);
 
 	if (!arg1)
 		error = EPERM;
 	else
 		error = SYSCTL_IN(req, arg1, sizeof(uint64_t));
 	return (error);
 }
 
 /*
  * Handle our generic '\0' terminated 'C' string.
  * Two cases:
  * 	a variable string:  point arg1 at it, arg2 is max length.
  * 	a constant string:  point arg1 at it, arg2 is zero.
  */
 
 int
 sysctl_handle_string(SYSCTL_HANDLER_ARGS)
 {
 	char *tmparg;
 	size_t outlen;
 	int error = 0, ro_string = 0;
 
 	/*
 	 * If the sysctl isn't writable and isn't a preallocated tunable that
 	 * can be modified by kenv(2), microoptimise and treat it as a
 	 * read-only string.
 	 * A zero-length buffer indicates a fixed size read-only
 	 * string.  In ddb, don't worry about trying to make a malloced
 	 * snapshot.
 	 */
 	if ((oidp->oid_kind & (CTLFLAG_WR | CTLFLAG_TUN)) == 0 ||
 	    arg2 == 0 || kdb_active) {
 		arg2 = strlen((char *)arg1) + 1;
 		ro_string = 1;
 	}
 
 	if (req->oldptr != NULL) {
 		if (ro_string) {
 			tmparg = arg1;
 			outlen = strlen(tmparg) + 1;
 		} else {
 			tmparg = malloc(arg2, M_SYSCTLTMP, M_WAITOK);
 			sx_slock(&sysctlstringlock);
 			memcpy(tmparg, arg1, arg2);
 			sx_sunlock(&sysctlstringlock);
 			outlen = strlen(tmparg) + 1;
 		}
 
 		error = SYSCTL_OUT(req, tmparg, outlen);
 
 		if (!ro_string)
 			free(tmparg, M_SYSCTLTMP);
 	} else {
 		if (!ro_string)
 			sx_slock(&sysctlstringlock);
 		outlen = strlen((char *)arg1) + 1;
 		if (!ro_string)
 			sx_sunlock(&sysctlstringlock);
 		error = SYSCTL_OUT(req, NULL, outlen);
 	}
 	if (error || !req->newptr)
 		return (error);
 
 	if (req->newlen - req->newidx >= arg2 ||
 	    req->newlen - req->newidx < 0) {
 		error = EINVAL;
 	} else if (req->newlen - req->newidx == 0) {
 		sx_xlock(&sysctlstringlock);
 		((char *)arg1)[0] = '\0';
 		sx_xunlock(&sysctlstringlock);
 	} else {
 		arg2 = req->newlen - req->newidx;
 		tmparg = malloc(arg2, M_SYSCTLTMP, M_WAITOK);
 
 		error = SYSCTL_IN(req, tmparg, arg2);
 		if (error) {
 			free(tmparg, M_SYSCTLTMP);
 			return (error);
 		}
 
 		sx_xlock(&sysctlstringlock);
 		memcpy(arg1, tmparg, arg2);
 		((char *)arg1)[arg2] = '\0';
 		sx_xunlock(&sysctlstringlock);
 		free(tmparg, M_SYSCTLTMP);
 		req->newidx += arg2;
 	}
 	return (error);
 }
 
 /*
  * Handle any kind of opaque data.
  * arg1 points to it, arg2 is the size.
  */
 
 int
 sysctl_handle_opaque(SYSCTL_HANDLER_ARGS)
 {
 	int error, tries;
 	u_int generation;
 	struct sysctl_req req2;
 
 	/*
 	 * Attempt to get a coherent snapshot, by using the thread
 	 * pre-emption counter updated from within mi_switch() to
 	 * determine if we were pre-empted during a bcopy() or
 	 * copyout(). Make 3 attempts at doing this before giving up.
 	 * If we encounter an error, stop immediately.
 	 */
 	tries = 0;
 	req2 = *req;
 retry:
 	generation = curthread->td_generation;
 	error = SYSCTL_OUT(req, arg1, arg2);
 	if (error)
 		return (error);
 	tries++;
 	if (generation != curthread->td_generation && tries < 3) {
 		*req = req2;
 		goto retry;
 	}
 
 	error = SYSCTL_IN(req, arg1, arg2);
 
 	return (error);
 }
 
 /*
  * Based on on sysctl_handle_int() convert microseconds to a sbintime.
  */
 int
 sysctl_usec_to_sbintime(SYSCTL_HANDLER_ARGS)
 {
 	int error;
 	int64_t tt;
 	sbintime_t sb;
 
 	tt = *(int64_t *)arg1;
 	sb = sbttous(tt);
 
 	error = sysctl_handle_64(oidp, &sb, 0, req);
 	if (error || !req->newptr)
 		return (error);
 
 	tt = ustosbt(sb);
 	*(int64_t *)arg1 = tt;
 
 	return (0);
 }
 
 /*
  * Based on on sysctl_handle_int() convert milliseconds to a sbintime.
  */
 int
 sysctl_msec_to_sbintime(SYSCTL_HANDLER_ARGS)
 {
 	int error;
 	int64_t tt;
 	sbintime_t sb;
 
 	tt = *(int64_t *)arg1;
 	sb = sbttoms(tt);
 
 	error = sysctl_handle_64(oidp, &sb, 0, req);
 	if (error || !req->newptr)
 		return (error);
 
 	tt = mstosbt(sb);
 	*(int64_t *)arg1 = tt;
 
 	return (0);
 }
 
 /*
  * Convert seconds to a struct timeval.  Intended for use with
  * intervals and thus does not permit negative seconds.
  */
 int
 sysctl_sec_to_timeval(SYSCTL_HANDLER_ARGS)
 {
 	struct timeval *tv;
 	int error, secs;
 
 	tv = arg1;
 	secs = tv->tv_sec;
 
 	error = sysctl_handle_int(oidp, &secs, 0, req);
 	if (error || req->newptr == NULL)
 		return (error);
 
 	if (secs < 0)
 		return (EINVAL);
 	tv->tv_sec = secs;
 
 	return (0);
 }
 
 /*
  * Transfer functions to/from kernel space.
  * XXX: rather untested at this point
  */
 static int
 sysctl_old_kernel(struct sysctl_req *req, const void *p, size_t l)
 {
 	size_t i = 0;
 
 	if (req->oldptr) {
 		i = l;
 		if (req->oldlen <= req->oldidx)
 			i = 0;
 		else
 			if (i > req->oldlen - req->oldidx)
 				i = req->oldlen - req->oldidx;
 		if (i > 0)
 			bcopy(p, (char *)req->oldptr + req->oldidx, i);
 	}
 	req->oldidx += l;
 	if (req->oldptr && i != l)
 		return (ENOMEM);
 	return (0);
 }
 
 static int
 sysctl_new_kernel(struct sysctl_req *req, void *p, size_t l)
 {
 	if (!req->newptr)
 		return (0);
 	if (req->newlen - req->newidx < l)
 		return (EINVAL);
 	bcopy((const char *)req->newptr + req->newidx, p, l);
 	req->newidx += l;
 	return (0);
 }
 
 int
 kernel_sysctl(struct thread *td, int *name, u_int namelen, void *old,
     size_t *oldlenp, void *new, size_t newlen, size_t *retval, int flags)
 {
 	int error = 0;
 	struct sysctl_req req;
 
 	bzero(&req, sizeof req);
 
 	req.td = td;
 	req.flags = flags;
 
 	if (oldlenp) {
 		req.oldlen = *oldlenp;
 	}
 	req.validlen = req.oldlen;
 
 	if (old) {
 		req.oldptr= old;
 	}
 
 	if (new != NULL) {
 		req.newlen = newlen;
 		req.newptr = new;
 	}
 
 	req.oldfunc = sysctl_old_kernel;
 	req.newfunc = sysctl_new_kernel;
 	req.lock = REQ_UNWIRED;
 
 	error = sysctl_root(0, name, namelen, &req);
 
 	if (req.lock == REQ_WIRED && req.validlen > 0)
 		vsunlock(req.oldptr, req.validlen);
 
 	if (error && error != ENOMEM)
 		return (error);
 
 	if (retval) {
 		if (req.oldptr && req.oldidx > req.validlen)
 			*retval = req.validlen;
 		else
 			*retval = req.oldidx;
 	}
 	return (error);
 }
 
 int
 kernel_sysctlbyname(struct thread *td, char *name, void *old, size_t *oldlenp,
     void *new, size_t newlen, size_t *retval, int flags)
 {
         int oid[CTL_MAXNAME];
         size_t oidlen, plen;
 	int error;
 
 	oid[0] = CTL_SYSCTL;
 	oid[1] = CTL_SYSCTL_NAME2OID;
 	oidlen = sizeof(oid);
 
 	error = kernel_sysctl(td, oid, 2, oid, &oidlen,
 	    (void *)name, strlen(name), &plen, flags);
 	if (error)
 		return (error);
 
 	error = kernel_sysctl(td, oid, plen / sizeof(int), old, oldlenp,
 	    new, newlen, retval, flags);
 	return (error);
 }
 
 /*
  * Transfer function to/from user space.
  */
 static int
 sysctl_old_user(struct sysctl_req *req, const void *p, size_t l)
 {
 	size_t i, len, origidx;
 	int error;
 
 	origidx = req->oldidx;
 	req->oldidx += l;
 	if (req->oldptr == NULL)
 		return (0);
 	/*
 	 * If we have not wired the user supplied buffer and we are currently
 	 * holding locks, drop a witness warning, as it's possible that
 	 * write operations to the user page can sleep.
 	 */
 	if (req->lock != REQ_WIRED)
 		WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
 		    "sysctl_old_user()");
 	i = l;
 	len = req->validlen;
 	if (len <= origidx)
 		i = 0;
 	else {
 		if (i > len - origidx)
 			i = len - origidx;
 		if (req->lock == REQ_WIRED) {
 			error = copyout_nofault(p, (char *)req->oldptr +
 			    origidx, i);
 		} else
 			error = copyout(p, (char *)req->oldptr + origidx, i);
 		if (error != 0)
 			return (error);
 	}
 	if (i < l)
 		return (ENOMEM);
 	return (0);
 }
 
 static int
 sysctl_new_user(struct sysctl_req *req, void *p, size_t l)
 {
 	int error;
 
 	if (!req->newptr)
 		return (0);
 	if (req->newlen - req->newidx < l)
 		return (EINVAL);
 	WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
 	    "sysctl_new_user()");
 	error = copyin((const char *)req->newptr + req->newidx, p, l);
 	req->newidx += l;
 	return (error);
 }
 
 /*
  * Wire the user space destination buffer.  If set to a value greater than
  * zero, the len parameter limits the maximum amount of wired memory.
  */
 int
 sysctl_wire_old_buffer(struct sysctl_req *req, size_t len)
 {
 	int ret;
 	size_t wiredlen;
 
 	wiredlen = (len > 0 && len < req->oldlen) ? len : req->oldlen;
 	ret = 0;
 	if (req->lock != REQ_WIRED && req->oldptr &&
 	    req->oldfunc == sysctl_old_user) {
 		if (wiredlen != 0) {
 			ret = vslock(req->oldptr, wiredlen);
 			if (ret != 0) {
 				if (ret != ENOMEM)
 					return (ret);
 				wiredlen = 0;
 			}
 		}
 		req->lock = REQ_WIRED;
 		req->validlen = wiredlen;
 	}
 	return (0);
 }
 
 int
 sysctl_find_oid(int *name, u_int namelen, struct sysctl_oid **noid,
     int *nindx, struct sysctl_req *req)
 {
 	struct sysctl_oid_list *lsp;
 	struct sysctl_oid *oid;
 	int indx;
 
 	SYSCTL_ASSERT_LOCKED();
 	lsp = &sysctl__children;
 	indx = 0;
 	while (indx < CTL_MAXNAME) {
 		SLIST_FOREACH(oid, lsp, oid_link) {
 			if (oid->oid_number == name[indx])
 				break;
 		}
 		if (oid == NULL)
 			return (ENOENT);
 
 		indx++;
 		if ((oid->oid_kind & CTLTYPE) == CTLTYPE_NODE) {
 			if (oid->oid_handler != NULL || indx == namelen) {
 				*noid = oid;
 				if (nindx != NULL)
 					*nindx = indx;
 				KASSERT((oid->oid_kind & CTLFLAG_DYING) == 0,
 				    ("%s found DYING node %p", __func__, oid));
 				return (0);
 			}
 			lsp = SYSCTL_CHILDREN(oid);
 		} else if (indx == namelen) {
 			if ((oid->oid_kind & CTLFLAG_DORMANT) != 0)
 				return (ENOENT);
 			*noid = oid;
 			if (nindx != NULL)
 				*nindx = indx;
 			KASSERT((oid->oid_kind & CTLFLAG_DYING) == 0,
 			    ("%s found DYING node %p", __func__, oid));
 			return (0);
 		} else {
 			return (ENOTDIR);
 		}
 	}
 	return (ENOENT);
 }
 
 /*
  * Traverse our tree, and find the right node, execute whatever it points
  * to, and return the resulting error code.
  */
 
 static int
 sysctl_root(SYSCTL_HANDLER_ARGS)
 {
 	struct sysctl_oid *oid;
 	struct rm_priotracker tracker;
 	int error, indx, lvl;
 
 	SYSCTL_RLOCK(&tracker);
 
 	error = sysctl_find_oid(arg1, arg2, &oid, &indx, req);
 	if (error)
 		goto out;
 
 	if ((oid->oid_kind & CTLTYPE) == CTLTYPE_NODE) {
 		/*
 		 * You can't call a sysctl when it's a node, but has
 		 * no handler.  Inform the user that it's a node.
 		 * The indx may or may not be the same as namelen.
 		 */
 		if (oid->oid_handler == NULL) {
 			error = EISDIR;
 			goto out;
 		}
 	}
 
 	/* Is this sysctl writable? */
 	if (req->newptr && !(oid->oid_kind & CTLFLAG_WR)) {
 		error = EPERM;
 		goto out;
 	}
 
 	KASSERT(req->td != NULL, ("sysctl_root(): req->td == NULL"));
 
 #ifdef CAPABILITY_MODE
 	/*
 	 * If the process is in capability mode, then don't permit reading or
 	 * writing unless specifically granted for the node.
 	 */
 	if (IN_CAPABILITY_MODE(req->td)) {
 		if ((req->oldptr && !(oid->oid_kind & CTLFLAG_CAPRD)) ||
 		    (req->newptr && !(oid->oid_kind & CTLFLAG_CAPWR))) {
 			error = EPERM;
 			goto out;
 		}
 	}
 #endif
 
 	/* Is this sysctl sensitive to securelevels? */
 	if (req->newptr && (oid->oid_kind & CTLFLAG_SECURE)) {
 		lvl = (oid->oid_kind & CTLMASK_SECURE) >> CTLSHIFT_SECURE;
 		error = securelevel_gt(req->td->td_ucred, lvl);
 		if (error)
 			goto out;
 	}
 
 	/* Is this sysctl writable by only privileged users? */
 	if (req->newptr && !(oid->oid_kind & CTLFLAG_ANYBODY)) {
 		int priv;
 
 		if (oid->oid_kind & CTLFLAG_PRISON)
 			priv = PRIV_SYSCTL_WRITEJAIL;
 #ifdef VIMAGE
 		else if ((oid->oid_kind & CTLFLAG_VNET) &&
 		     prison_owns_vnet(req->td->td_ucred))
 			priv = PRIV_SYSCTL_WRITEJAIL;
 #endif
 		else
 			priv = PRIV_SYSCTL_WRITE;
 		error = priv_check(req->td, priv);
 		if (error)
 			goto out;
 	}
 
 	if (!oid->oid_handler) {
 		error = EINVAL;
 		goto out;
 	}
 
 	if ((oid->oid_kind & CTLTYPE) == CTLTYPE_NODE) {
 		arg1 = (int *)arg1 + indx;
 		arg2 -= indx;
 	} else {
 		arg1 = oid->oid_arg1;
 		arg2 = oid->oid_arg2;
 	}
 #ifdef MAC
 	error = mac_system_check_sysctl(req->td->td_ucred, oid, arg1, arg2,
 	    req);
 	if (error != 0)
 		goto out;
 #endif
 #ifdef VIMAGE
 	if ((oid->oid_kind & CTLFLAG_VNET) && arg1 != NULL)
 		arg1 = (void *)(curvnet->vnet_data_base + (uintptr_t)arg1);
 #endif
 	error = sysctl_root_handler_locked(oid, arg1, arg2, req, &tracker);
 
 out:
 	SYSCTL_RUNLOCK(&tracker);
 	return (error);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct sysctl_args {
 	int	*name;
 	u_int	namelen;
 	void	*old;
 	size_t	*oldlenp;
 	void	*new;
 	size_t	newlen;
 };
 #endif
 int
 sys___sysctl(struct thread *td, struct sysctl_args *uap)
 {
 	int error, i, name[CTL_MAXNAME];
 	size_t j;
 
 	if (uap->namelen > CTL_MAXNAME || uap->namelen < 2)
 		return (EINVAL);
 
  	error = copyin(uap->name, &name, uap->namelen * sizeof(int));
  	if (error)
 		return (error);
 
 	error = userland_sysctl(td, name, uap->namelen,
 		uap->old, uap->oldlenp, 0,
 		uap->new, uap->newlen, &j, 0);
 	if (error && error != ENOMEM)
 		return (error);
 	if (uap->oldlenp) {
 		i = copyout(&j, uap->oldlenp, sizeof(j));
 		if (i)
 			return (i);
 	}
 	return (error);
 }
 
 int
 kern___sysctlbyname(struct thread *td, const char *oname, size_t namelen,
     void *old, size_t *oldlenp, void *new, size_t newlen, size_t *retval,
     int flags, bool inkernel)
 {
 	int oid[CTL_MAXNAME];
 	char namebuf[16];
 	char *name;
 	size_t oidlen;
 	int error;
 
 	if (namelen > MAXPATHLEN || namelen == 0)
 		return (EINVAL);
 	name = namebuf;
 	if (namelen > sizeof(namebuf))
 		name = malloc(namelen, M_SYSCTL, M_WAITOK);
 	error = copyin(oname, name, namelen);
 	if (error != 0)
 		goto out;
 
 	oid[0] = CTL_SYSCTL;
 	oid[1] = CTL_SYSCTL_NAME2OID;
 	oidlen = sizeof(oid);
 	error = kernel_sysctl(td, oid, 2, oid, &oidlen, (void *)name, namelen,
 	    retval, flags);
 	if (error != 0)
 		goto out;
 	error = userland_sysctl(td, oid, *retval / sizeof(int), old, oldlenp,
 	    inkernel, new, newlen, retval, flags);
 
 out:
 	if (namelen > sizeof(namebuf))
 		free(name, M_SYSCTL);
 	return (error);
 }
 
 #ifndef	_SYS_SYSPROTO_H_
 struct __sysctlbyname_args {
 	const char	*name;
 	size_t	namelen;
 	void	*old;
 	size_t	*oldlenp;
 	void	*new;
 	size_t	newlen;
 };
 #endif
 int
 sys___sysctlbyname(struct thread *td, struct __sysctlbyname_args *uap)
 {
 	size_t rv;
 	int error;
 
 	error = kern___sysctlbyname(td, uap->name, uap->namelen, uap->old,
 	    uap->oldlenp, uap->new, uap->newlen, &rv, 0, 0);
 	if (error != 0)
 		return (error);
 	if (uap->oldlenp != NULL)
 		error = copyout(&rv, uap->oldlenp, sizeof(rv));
 
 	return (error);
 }
 
 /*
  * This is used from various compatibility syscalls too.  That's why name
  * must be in kernel space.
  */
 int
 userland_sysctl(struct thread *td, int *name, u_int namelen, void *old,
     size_t *oldlenp, int inkernel, const void *new, size_t newlen,
     size_t *retval, int flags)
 {
 	int error = 0, memlocked;
 	struct sysctl_req req;
 
 	bzero(&req, sizeof req);
 
 	req.td = td;
 	req.flags = flags;
 
 	if (oldlenp) {
 		if (inkernel) {
 			req.oldlen = *oldlenp;
 		} else {
 			error = copyin(oldlenp, &req.oldlen, sizeof(*oldlenp));
 			if (error)
 				return (error);
 		}
 	}
 	req.validlen = req.oldlen;
 	req.oldptr = old;
 
 	if (new != NULL) {
 		req.newlen = newlen;
 		req.newptr = new;
 	}
 
 	req.oldfunc = sysctl_old_user;
 	req.newfunc = sysctl_new_user;
 	req.lock = REQ_UNWIRED;
 
 #ifdef KTRACE
 	if (KTRPOINT(curthread, KTR_SYSCTL))
 		ktrsysctl(name, namelen);
 #endif
 	memlocked = 0;
 	if (req.oldptr && req.oldlen > 4 * PAGE_SIZE) {
 		memlocked = 1;
 		sx_xlock(&sysctlmemlock);
 	}
 	CURVNET_SET(TD_TO_VNET(td));
 
 	for (;;) {
 		req.oldidx = 0;
 		req.newidx = 0;
 		error = sysctl_root(0, name, namelen, &req);
 		if (error != EAGAIN)
 			break;
 		kern_yield(PRI_USER);
 	}
 
 	CURVNET_RESTORE();
 
 	if (req.lock == REQ_WIRED && req.validlen > 0)
 		vsunlock(req.oldptr, req.validlen);
 	if (memlocked)
 		sx_xunlock(&sysctlmemlock);
 
 	if (error && error != ENOMEM)
 		return (error);
 
 	if (retval) {
 		if (req.oldptr && req.oldidx > req.validlen)
 			*retval = req.validlen;
 		else
 			*retval = req.oldidx;
 	}
 	return (error);
 }
 
 /*
  * Drain into a sysctl struct.  The user buffer should be wired if a page
  * fault would cause issue.
  */
 static int
 sbuf_sysctl_drain(void *arg, const char *data, int len)
 {
 	struct sysctl_req *req = arg;
 	int error;
 
 	error = SYSCTL_OUT(req, data, len);
 	KASSERT(error >= 0, ("Got unexpected negative value %d", error));
 	return (error == 0 ? len : -error);
 }
 
 struct sbuf *
 sbuf_new_for_sysctl(struct sbuf *s, char *buf, int length,
     struct sysctl_req *req)
 {
 
 	/* Supply a default buffer size if none given. */
 	if (buf == NULL && length == 0)
 		length = 64;
 	s = sbuf_new(s, buf, length, SBUF_FIXEDLEN | SBUF_INCLUDENUL);
 	sbuf_set_drain(s, sbuf_sysctl_drain, req);
 	return (s);
 }
 
 #ifdef DDB
 
 /* The current OID the debugger is working with */
 static struct sysctl_oid *g_ddb_oid;
 
 /* The current flags specified by the user */
 static int g_ddb_sysctl_flags;
 
 /* Check to see if the last sysctl printed */
 static int g_ddb_sysctl_printed;
 
 static const int ctl_sign[CTLTYPE+1] = {
 	[CTLTYPE_INT] = 1,
 	[CTLTYPE_LONG] = 1,
 	[CTLTYPE_S8] = 1,
 	[CTLTYPE_S16] = 1,
 	[CTLTYPE_S32] = 1,
 	[CTLTYPE_S64] = 1,
 };
 
 static const int ctl_size[CTLTYPE+1] = {
 	[CTLTYPE_INT] = sizeof(int),
 	[CTLTYPE_UINT] = sizeof(u_int),
 	[CTLTYPE_LONG] = sizeof(long),
 	[CTLTYPE_ULONG] = sizeof(u_long),
 	[CTLTYPE_S8] = sizeof(int8_t),
 	[CTLTYPE_S16] = sizeof(int16_t),
 	[CTLTYPE_S32] = sizeof(int32_t),
 	[CTLTYPE_S64] = sizeof(int64_t),
 	[CTLTYPE_U8] = sizeof(uint8_t),
 	[CTLTYPE_U16] = sizeof(uint16_t),
 	[CTLTYPE_U32] = sizeof(uint32_t),
 	[CTLTYPE_U64] = sizeof(uint64_t),
 };
 
 #define DB_SYSCTL_NAME_ONLY	0x001	/* Compare with -N */
 #define DB_SYSCTL_VALUE_ONLY	0x002	/* Compare with -n */
 #define DB_SYSCTL_OPAQUE	0x004	/* Compare with -o */
 #define DB_SYSCTL_HEX		0x008	/* Compare with -x */
 
 #define DB_SYSCTL_SAFE_ONLY	0x100	/* Only simple types */
 
 static const char db_sysctl_modifs[] = {
 	'N', 'n', 'o', 'x',
 };
 
 static const int db_sysctl_modif_values[] = {
 	DB_SYSCTL_NAME_ONLY, DB_SYSCTL_VALUE_ONLY,
 	DB_SYSCTL_OPAQUE, DB_SYSCTL_HEX,
 };
 
 /* Handlers considered safe to print while recursing */
 static int (* const db_safe_handlers[])(SYSCTL_HANDLER_ARGS) = {
 	sysctl_handle_bool,
 	sysctl_handle_8,
 	sysctl_handle_16,
 	sysctl_handle_32,
 	sysctl_handle_64,
 	sysctl_handle_int,
 	sysctl_handle_long,
 	sysctl_handle_string,
 	sysctl_handle_opaque,
 };
 
 /*
  * Use in place of sysctl_old_kernel to print sysctl values.
  *
  * Compare to the output handling in show_var from sbin/sysctl/sysctl.c
  */
 static int
 sysctl_old_ddb(struct sysctl_req *req, const void *ptr, size_t len)
 {
 	const u_char *val, *p;
 	const char *sep1;
 	size_t intlen, slen;
 	uintmax_t umv;
 	intmax_t mv;
 	int sign, ctltype, hexlen, xflag, error;
 
 	/* Suppress false-positive GCC uninitialized variable warnings */
 	mv = 0;
 	umv = 0;
 
 	slen = len;
 	val = p = ptr;
 
 	if (ptr == NULL) {
 		error = 0;
 		goto out;
 	}
 
 	/* We are going to print */
 	g_ddb_sysctl_printed = 1;
 
 	xflag = g_ddb_sysctl_flags & DB_SYSCTL_HEX;
 
 	ctltype = (g_ddb_oid->oid_kind & CTLTYPE);
 	sign = ctl_sign[ctltype];
 	intlen = ctl_size[ctltype];
 
 	switch (ctltype) {
 	case CTLTYPE_NODE:
 	case CTLTYPE_STRING:
 		db_printf("%.*s", (int) len, (const char *) p);
 		error = 0;
 		goto out;
 
 	case CTLTYPE_INT:
 	case CTLTYPE_UINT:
 	case CTLTYPE_LONG:
 	case CTLTYPE_ULONG:
 	case CTLTYPE_S8:
 	case CTLTYPE_S16:
 	case CTLTYPE_S32:
 	case CTLTYPE_S64:
 	case CTLTYPE_U8:
 	case CTLTYPE_U16:
 	case CTLTYPE_U32:
 	case CTLTYPE_U64:
 		hexlen = 2 + (intlen * CHAR_BIT + 3) / 4;
 		sep1 = "";
 		while (len >= intlen) {
 			switch (ctltype) {
 			case CTLTYPE_INT:
 			case CTLTYPE_UINT:
 				umv = *(const u_int *)p;
 				mv = *(const int *)p;
 				break;
 			case CTLTYPE_LONG:
 			case CTLTYPE_ULONG:
 				umv = *(const u_long *)p;
 				mv = *(const long *)p;
 				break;
 			case CTLTYPE_S8:
 			case CTLTYPE_U8:
 				umv = *(const uint8_t *)p;
 				mv = *(const int8_t *)p;
 				break;
 			case CTLTYPE_S16:
 			case CTLTYPE_U16:
 				umv = *(const uint16_t *)p;
 				mv = *(const int16_t *)p;
 				break;
 			case CTLTYPE_S32:
 			case CTLTYPE_U32:
 				umv = *(const uint32_t *)p;
 				mv = *(const int32_t *)p;
 				break;
 			case CTLTYPE_S64:
 			case CTLTYPE_U64:
 				umv = *(const uint64_t *)p;
 				mv = *(const int64_t *)p;
 				break;
 			}
 
 			db_printf("%s", sep1);
 			if (xflag)
 				db_printf("%#0*jx", hexlen, umv);
 			else if (!sign)
 				db_printf("%ju", umv);
 			else if (g_ddb_oid->oid_fmt[1] == 'K') {
 				/* Kelvins are currently unsupported. */
 				error = EOPNOTSUPP;
 				goto out;
 			} else
 				db_printf("%jd", mv);
 
 			sep1 = " ";
 			len -= intlen;
 			p += intlen;
 		}
 		error = 0;
 		goto out;
 
 	case CTLTYPE_OPAQUE:
 		/* TODO: Support struct functions. */
 
 		/* FALLTHROUGH */
 	default:
 		db_printf("Format:%s Length:%zu Dump:0x",
 		    g_ddb_oid->oid_fmt, len);
 		while (len-- && (xflag || p < val + 16))
 			db_printf("%02x", *p++);
 		if (!xflag && len > 16)
 			db_printf("...");
 		error = 0;
 		goto out;
 	}
 
 out:
 	req->oldidx += slen;
 	return (error);
 }
 
 /*
  * Avoid setting new sysctl values from the debugger
  */
 static int
 sysctl_new_ddb(struct sysctl_req *req, void *p, size_t l)
 {
 
 	if (!req->newptr)
 		return (0);
 
 	/* Changing sysctls from the debugger is currently unsupported */
 	return (EPERM);
 }
 
 /*
  * Run a sysctl handler with the DDB oldfunc and newfunc attached.
  * Instead of copying any output to a buffer we'll dump it right to
  * the console.
  */
 static int
 db_sysctl(struct sysctl_oid *oidp, int *name, u_int namelen,
     void *old, size_t *oldlenp, size_t *retval, int flags)
 {
 	struct sysctl_req req;
 	int error;
 
 	/* Setup the request */
 	bzero(&req, sizeof req);
 	req.td = kdb_thread;
 	req.oldfunc = sysctl_old_ddb;
 	req.newfunc = sysctl_new_ddb;
 	req.lock = REQ_UNWIRED;
 	if (oldlenp) {
 		req.oldlen = *oldlenp;
 	}
 	req.validlen = req.oldlen;
 	if (old) {
 		req.oldptr = old;
 	}
 
 	/* Setup our globals for sysctl_old_ddb */
 	g_ddb_oid = oidp;
 	g_ddb_sysctl_flags = flags;
 	g_ddb_sysctl_printed = 0;
 
 	error = sysctl_root(0, name, namelen, &req);
 
 	/* Reset globals */
 	g_ddb_oid = NULL;
 	g_ddb_sysctl_flags = 0;
 
 	if (retval) {
 		if (req.oldptr && req.oldidx > req.validlen)
 			*retval = req.validlen;
 		else
 			*retval = req.oldidx;
 	}
 	return (error);
 }
 
 /*
  * Show a sysctl's name
  */
 static void
 db_show_oid_name(int *oid, size_t nlen)
 {
 	struct sysctl_oid *oidp;
 	int qoid[CTL_MAXNAME+2];
 	int error;
 
 	qoid[0] = 0;
 	memcpy(qoid + 2, oid, nlen * sizeof(int));
 	qoid[1] = 1;
 
 	error = sysctl_find_oid(qoid, nlen + 2, &oidp, NULL, NULL);
 	if (error)
 		db_error("sysctl name oid");
 
 	error = db_sysctl(oidp, qoid, nlen + 2, NULL, NULL, NULL, 0);
 	if (error)
 		db_error("sysctl name");
 }
 
 /*
  * Check to see if an OID is safe to print from ddb.
  */
 static bool
 db_oid_safe(const struct sysctl_oid *oidp)
 {
 	for (unsigned int i = 0; i < nitems(db_safe_handlers); ++i) {
 		if (oidp->oid_handler == db_safe_handlers[i])
 			return (true);
 	}
 
 	return (false);
 }
 
 /*
  * Show a sysctl at a specific OID
  * Compare to the input handling in show_var from sbin/sysctl/sysctl.c
  */
 static int
 db_show_oid(struct sysctl_oid *oidp, int *oid, size_t nlen, int flags)
 {
 	int error, xflag, oflag, Nflag, nflag;
 	size_t len;
 
 	xflag = flags & DB_SYSCTL_HEX;
 	oflag = flags & DB_SYSCTL_OPAQUE;
 	nflag = flags & DB_SYSCTL_VALUE_ONLY;
 	Nflag = flags & DB_SYSCTL_NAME_ONLY;
 
 	if ((oidp->oid_kind & CTLTYPE) == CTLTYPE_OPAQUE &&
 	    (!xflag && !oflag))
 		return (0);
 
 	if (Nflag) {
 		db_show_oid_name(oid, nlen);
 		error = 0;
 		goto out;
 	}
 
 	if (!nflag) {
 		db_show_oid_name(oid, nlen);
 		db_printf(": ");
 	}
 
 	if ((flags & DB_SYSCTL_SAFE_ONLY) && !db_oid_safe(oidp)) {
 		db_printf("Skipping, unsafe to print while recursing.");
 		error = 0;
 		goto out;
 	}
 
 	/* Try once, and ask about the size */
 	len = 0;
 	error = db_sysctl(oidp, oid, nlen,
 	    NULL, NULL, &len, flags);
 	if (error)
 		goto out;
 
 	if (!g_ddb_sysctl_printed)
 		/* Lie about the size */
 		error = db_sysctl(oidp, oid, nlen,
 		    (void *) 1, &len, NULL, flags);
 
 out:
 	db_printf("\n");
 	return (error);
 }
 
 /*
  * Show all sysctls under a specific OID
  * Compare to sysctl_all from sbin/sysctl/sysctl.c
  */
 static int
 db_show_sysctl_all(int *oid, size_t len, int flags)
 {
 	struct sysctl_oid *oidp;
 	int name1[CTL_MAXNAME + 2], name2[CTL_MAXNAME + 2];
 	size_t l1, l2;
 
 	name1[0] = CTL_SYSCTL;
 	name1[1] = CTL_SYSCTL_NEXT;
 	l1 = 2;
 	if (len) {
 		memcpy(name1 + 2, oid, len * sizeof(int));
 		l1 += len;
 	} else {
 		name1[2] = CTL_KERN;
 		l1++;
 	}
 	for (;;) {
 		int i, error;
 
 		l2 = sizeof(name2);
 		error = kernel_sysctl(kdb_thread, name1, l1,
 		    name2, &l2, NULL, 0, &l2, 0);
 		if (error != 0) {
 			if (error == ENOENT)
 				return (0);
 			else
 				db_error("sysctl(next)");
 		}
 
 		l2 /= sizeof(int);
 
 		if (l2 < (unsigned int)len)
 			return (0);
 
 		for (i = 0; i < len; i++)
 			if (name2[i] != oid[i])
 				return (0);
 
 		/* Find the OID in question */
 		error = sysctl_find_oid(name2, l2, &oidp, NULL, NULL);
 		if (error)
 			return (error);
 
 		i = db_show_oid(oidp, name2, l2, flags | DB_SYSCTL_SAFE_ONLY);
 
 		if (db_pager_quit)
 			return (0);
 
 		memcpy(name1+2, name2, l2 * sizeof(int));
 		l1 = 2 + l2;
 	}
 }
 
 /*
  * Show a sysctl by its user facing string
  */
 static int
 db_sysctlbyname(char *name, int flags)
 {
 	struct sysctl_oid *oidp;
 	int oid[CTL_MAXNAME];
 	int error, nlen;
 
 	error = name2oid(name, oid, &nlen, &oidp);
 	if (error) {
 		return (error);
 	}
 
 	if ((oidp->oid_kind & CTLTYPE) == CTLTYPE_NODE) {
 		db_show_sysctl_all(oid, nlen, flags);
 	} else {
 		error = db_show_oid(oidp, oid, nlen, flags);
 	}
 
 	return (error);
 }
 
 static void
 db_sysctl_cmd_usage(void)
 {
 	db_printf(
 	    " sysctl [/Nnox] <sysctl>					    \n"
 	    "								    \n"
 	    " <sysctl> The name of the sysctl to show.			    \n"
 	    "								    \n"
 	    " Show a sysctl by hooking into SYSCTL_IN and SYSCTL_OUT.	    \n"
 	    " This will work for most sysctls, but should not be used	    \n"
 	    " with sysctls that are known to malloc.			    \n"
 	    "								    \n"
 	    " While recursing any \"unsafe\" sysctls will be skipped.	    \n"
 	    " Call sysctl directly on the sysctl to try printing the	    \n"
 	    " skipped sysctl. This is unsafe and may make the ddb	    \n"
 	    " session unusable.						    \n"
 	    "								    \n"
 	    " Arguments:						    \n"
 	    "	/N	Display only the name of the sysctl.		    \n"
 	    "	/n	Display only the value of the sysctl.		    \n"
 	    "	/o	Display opaque values.				    \n"
 	    "	/x	Display the sysctl in hex.			    \n"
 	    "								    \n"
 	    "For example:						    \n"
 	    "sysctl vm.v_free_min					    \n"
 	    "vn.v_free_min: 12669					    \n"
 	    );
 }
 
 /*
  * Show a specific sysctl similar to sysctl (8).
  */
 DB_FUNC(sysctl, db_sysctl_cmd, db_cmd_table, CS_OWN, NULL)
 {
 	char name[TOK_STRING_SIZE];
 	int error, i, t, flags;
 
 	/* Parse the modifiers */
 	t = db_read_token();
 	if (t == tSLASH || t == tMINUS) {
 		t = db_read_token();
 		if (t != tIDENT) {
 			db_printf("Bad modifier\n");
 			error = EINVAL;
 			goto out;
 		}
 		db_strcpy(modif, db_tok_string);
 	}
 	else {
 		db_unread_token(t);
 		modif[0] = '\0';
 	}
 
 	flags = 0;
 	for (i = 0; i < nitems(db_sysctl_modifs); i++) {
 		if (strchr(modif, db_sysctl_modifs[i])) {
 			flags |= db_sysctl_modif_values[i];
 		}
 	}
 
 	/* Parse the sysctl names */
 	t = db_read_token();
 	if (t != tIDENT) {
 		db_printf("Need sysctl name\n");
 		error = EINVAL;
 		goto out;
 	}
 
 	/* Copy the name into a temporary buffer */
 	db_strcpy(name, db_tok_string);
 
 	/* Ensure there is no trailing cruft */
 	t = db_read_token();
 	if (t != tEOL) {
 		db_printf("Unexpected sysctl argument\n");
 		error = EINVAL;
 		goto out;
 	}
 
 	error = db_sysctlbyname(name, flags);
 	if (error == ENOENT) {
 		db_printf("unknown oid: '%s'\n", db_tok_string);
 		goto out;
 	} else if (error) {
 		db_printf("%s: error: %d\n", db_tok_string, error);
 		goto out;
 	}
 
 out:
 	/* Ensure we eat all of our text */
 	db_flush_lex();
 
 	if (error == EINVAL) {
 		db_sysctl_cmd_usage();
 	}
 }
 
 #endif /* DDB */
diff --git a/sys/kern/vfs_cluster.c b/sys/kern/vfs_cluster.c
index 7e328454c877..7ca67c390b91 100644
--- a/sys/kern/vfs_cluster.c
+++ b/sys/kern/vfs_cluster.c
@@ -1,1088 +1,1088 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1993
  *	The Regents of the University of California.  All rights reserved.
  * Modifications/enhancements:
  * 	Copyright (c) 1995 John S. Dyson.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)vfs_cluster.c	8.7 (Berkeley) 2/13/94
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/proc.h>
 #include <sys/bio.h>
 #include <sys/buf.h>
 #include <sys/vnode.h>
 #include <sys/malloc.h>
 #include <sys/mount.h>
 #include <sys/racct.h>
 #include <sys/resourcevar.h>
 #include <sys/rwlock.h>
 #include <sys/vmmeter.h>
 #include <vm/vm.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
 #include <sys/sysctl.h>
 
 static MALLOC_DEFINE(M_SEGMENT, "cl_savebuf", "cluster_save buffer");
 static uma_zone_t cluster_pbuf_zone;
 
 static void cluster_init(void *);
 static struct cluster_save *cluster_collectbufs(struct vnode *vp,
 	    struct vn_clusterw *vnc, struct buf *last_bp, int gbflags);
 static struct buf *cluster_rbuild(struct vnode *vp, u_quad_t filesize,
 	    daddr_t lbn, daddr_t blkno, long size, int run, int gbflags,
 	    struct buf *fbp);
 static void cluster_callback(struct buf *);
 
 static int write_behind = 1;
 SYSCTL_INT(_vfs, OID_AUTO, write_behind, CTLFLAG_RW, &write_behind, 0,
     "Cluster write-behind; 0: disable, 1: enable, 2: backed off");
 
 static int read_max = 64;
 SYSCTL_INT(_vfs, OID_AUTO, read_max, CTLFLAG_RW, &read_max, 0,
     "Cluster read-ahead max block count");
 
 static int read_min = 1;
 SYSCTL_INT(_vfs, OID_AUTO, read_min, CTLFLAG_RW, &read_min, 0,
     "Cluster read min block count");
 
 SYSINIT(cluster, SI_SUB_CPU, SI_ORDER_ANY, cluster_init, NULL);
 
 static void
 cluster_init(void *dummy)
 {
 
 	cluster_pbuf_zone = pbuf_zsecond_create("clpbuf", nswbuf / 2);
 }
 
 /*
  * Read data to a buf, including read-ahead if we find this to be beneficial.
  * cluster_read replaces bread.
  */
 int
 cluster_read(struct vnode *vp, u_quad_t filesize, daddr_t lblkno, long size,
     struct ucred *cred, long totread, int seqcount, int gbflags,
     struct buf **bpp)
 {
 	struct buf *bp, *rbp, *reqbp;
 	struct bufobj *bo;
 	struct thread *td;
 	daddr_t blkno, origblkno;
 	int maxra, racluster;
 	int error, ncontig;
 	int i;
 
 	error = 0;
 	td = curthread;
 	bo = &vp->v_bufobj;
 	if (!unmapped_buf_allowed)
 		gbflags &= ~GB_UNMAPPED;
 
 	/*
 	 * Try to limit the amount of read-ahead by a few
 	 * ad-hoc parameters.  This needs work!!!
 	 */
 	racluster = vp->v_mount->mnt_iosize_max / size;
 	maxra = seqcount;
 	maxra = min(read_max, maxra);
 	maxra = min(nbuf/8, maxra);
 	if (((u_quad_t)(lblkno + maxra + 1) * size) > filesize)
 		maxra = (filesize / size) - lblkno;
 
 	/*
 	 * get the requested block
 	 */
 	error = getblkx(vp, lblkno, lblkno, size, 0, 0, gbflags, &bp);
 	if (error != 0) {
 		*bpp = NULL;
 		return (error);
 	}
 	gbflags &= ~GB_NOSPARSE;
 	origblkno = lblkno;
 	*bpp = reqbp = bp;
 
 	/*
 	 * if it is in the cache, then check to see if the reads have been
 	 * sequential.  If they have, then try some read-ahead, otherwise
 	 * back-off on prospective read-aheads.
 	 */
 	if (bp->b_flags & B_CACHE) {
 		if (!seqcount) {
 			return 0;
 		} else if ((bp->b_flags & B_RAM) == 0) {
 			return 0;
 		} else {
 			bp->b_flags &= ~B_RAM;
 			BO_RLOCK(bo);
 			for (i = 1; i < maxra; i++) {
 				/*
 				 * Stop if the buffer does not exist or it
 				 * is invalid (about to go away?)
 				 */
 				rbp = gbincore(&vp->v_bufobj, lblkno+i);
 				if (rbp == NULL || (rbp->b_flags & B_INVAL))
 					break;
 
 				/*
 				 * Set another read-ahead mark so we know 
 				 * to check again. (If we can lock the
 				 * buffer without waiting)
 				 */
 				if ((((i % racluster) == (racluster - 1)) ||
 				    (i == (maxra - 1))) 
 				    && (0 == BUF_LOCK(rbp, 
 					LK_EXCLUSIVE | LK_NOWAIT, NULL))) {
 					rbp->b_flags |= B_RAM;
 					BUF_UNLOCK(rbp);
 				}
 			}
 			BO_RUNLOCK(bo);
 			if (i >= maxra) {
 				return 0;
 			}
 			lblkno += i;
 		}
 		reqbp = bp = NULL;
 	/*
 	 * If it isn't in the cache, then get a chunk from
 	 * disk if sequential, otherwise just get the block.
 	 */
 	} else {
 		off_t firstread = bp->b_offset;
 		int nblks;
 		long minread;
 
 		KASSERT(bp->b_offset != NOOFFSET,
 		    ("cluster_read: no buffer offset"));
 
 		ncontig = 0;
 
 		/*
 		 * Adjust totread if needed
 		 */
 		minread = read_min * size;
 		if (minread > totread)
 			totread = minread;
 
 		/*
 		 * Compute the total number of blocks that we should read
 		 * synchronously.
 		 */
 		if (firstread + totread > filesize)
 			totread = filesize - firstread;
 		nblks = howmany(totread, size);
 		if (nblks > racluster)
 			nblks = racluster;
 
 		/*
 		 * Now compute the number of contiguous blocks.
 		 */
 		if (nblks > 1) {
 	    		error = VOP_BMAP(vp, lblkno, NULL,
 				&blkno, &ncontig, NULL);
 			/*
 			 * If this failed to map just do the original block.
 			 */
 			if (error || blkno == -1)
 				ncontig = 0;
 		}
 
 		/*
 		 * If we have contiguous data available do a cluster
 		 * otherwise just read the requested block.
 		 */
 		if (ncontig) {
 			/* Account for our first block. */
 			ncontig = min(ncontig + 1, nblks);
 			if (ncontig < nblks)
 				nblks = ncontig;
 			bp = cluster_rbuild(vp, filesize, lblkno,
 			    blkno, size, nblks, gbflags, bp);
 			lblkno += (bp->b_bufsize / size);
 		} else {
 			bp->b_flags |= B_RAM;
 			bp->b_iocmd = BIO_READ;
 			lblkno += 1;
 		}
 	}
 
 	/*
 	 * handle the synchronous read so that it is available ASAP.
 	 */
 	if (bp) {
 		if ((bp->b_flags & B_CLUSTER) == 0) {
 			vfs_busy_pages(bp, 0);
 		}
 		bp->b_flags &= ~B_INVAL;
 		bp->b_ioflags &= ~BIO_ERROR;
 		if ((bp->b_flags & B_ASYNC) || bp->b_iodone != NULL)
 			BUF_KERNPROC(bp);
 		bp->b_iooffset = dbtob(bp->b_blkno);
 		bstrategy(bp);
 #ifdef RACCT
 		if (racct_enable) {
 			PROC_LOCK(td->td_proc);
 			racct_add_buf(td->td_proc, bp, 0);
 			PROC_UNLOCK(td->td_proc);
 		}
 #endif /* RACCT */
 		td->td_ru.ru_inblock++;
 	}
 
 	/*
 	 * If we have been doing sequential I/O, then do some read-ahead.
 	 */
 	while (lblkno < (origblkno + maxra)) {
 		error = VOP_BMAP(vp, lblkno, NULL, &blkno, &ncontig, NULL);
 		if (error)
 			break;
 
 		if (blkno == -1)
 			break;
 
 		/*
 		 * We could throttle ncontig here by maxra but we might as
 		 * well read the data if it is contiguous.  We're throttled
 		 * by racluster anyway.
 		 */
 		if (ncontig) {
 			ncontig = min(ncontig + 1, racluster);
 			rbp = cluster_rbuild(vp, filesize, lblkno, blkno,
 			    size, ncontig, gbflags, NULL);
 			lblkno += (rbp->b_bufsize / size);
 			if (rbp->b_flags & B_DELWRI) {
 				bqrelse(rbp);
 				continue;
 			}
 		} else {
 			rbp = getblk(vp, lblkno, size, 0, 0, gbflags);
 			lblkno += 1;
 			if (rbp->b_flags & B_DELWRI) {
 				bqrelse(rbp);
 				continue;
 			}
 			rbp->b_flags |= B_ASYNC | B_RAM;
 			rbp->b_iocmd = BIO_READ;
 			rbp->b_blkno = blkno;
 		}
 		if (rbp->b_flags & B_CACHE) {
 			rbp->b_flags &= ~B_ASYNC;
 			bqrelse(rbp);
 			continue;
 		}
 		if ((rbp->b_flags & B_CLUSTER) == 0) {
 			vfs_busy_pages(rbp, 0);
 		}
 		rbp->b_flags &= ~B_INVAL;
 		rbp->b_ioflags &= ~BIO_ERROR;
 		if ((rbp->b_flags & B_ASYNC) || rbp->b_iodone != NULL)
 			BUF_KERNPROC(rbp);
 		rbp->b_iooffset = dbtob(rbp->b_blkno);
 		bstrategy(rbp);
 #ifdef RACCT
 		if (racct_enable) {
 			PROC_LOCK(td->td_proc);
 			racct_add_buf(td->td_proc, rbp, 0);
 			PROC_UNLOCK(td->td_proc);
 		}
 #endif /* RACCT */
 		td->td_ru.ru_inblock++;
 	}
 
 	if (reqbp) {
 		/*
 		 * Like bread, always brelse() the buffer when
 		 * returning an error.
 		 */
 		error = bufwait(reqbp);
 		if (error != 0) {
 			brelse(reqbp);
 			*bpp = NULL;
 		}
 	}
 	return (error);
 }
 
 /*
  * If blocks are contiguous on disk, use this to provide clustered
  * read ahead.  We will read as many blocks as possible sequentially
  * and then parcel them up into logical blocks in the buffer hash table.
  */
 static struct buf *
 cluster_rbuild(struct vnode *vp, u_quad_t filesize, daddr_t lbn,
     daddr_t blkno, long size, int run, int gbflags, struct buf *fbp)
 {
 	struct buf *bp, *tbp;
 	daddr_t bn;
 	off_t off;
 	long tinc, tsize;
 	int i, inc, j, k, toff;
 
 	KASSERT(size == vp->v_mount->mnt_stat.f_iosize,
 	    ("cluster_rbuild: size %ld != f_iosize %jd\n",
 	    size, (intmax_t)vp->v_mount->mnt_stat.f_iosize));
 
 	/*
 	 * avoid a division
 	 */
 	while ((u_quad_t) size * (lbn + run) > filesize) {
 		--run;
 	}
 
 	if (fbp) {
 		tbp = fbp;
 		tbp->b_iocmd = BIO_READ; 
 	} else {
 		tbp = getblk(vp, lbn, size, 0, 0, gbflags);
 		if (tbp->b_flags & B_CACHE)
 			return tbp;
 		tbp->b_flags |= B_ASYNC | B_RAM;
 		tbp->b_iocmd = BIO_READ;
 	}
 	tbp->b_blkno = blkno;
-	if( (tbp->b_flags & B_MALLOC) ||
+	if ( (tbp->b_flags & B_MALLOC) ||
 		((tbp->b_flags & B_VMIO) == 0) || (run <= 1) )
 		return tbp;
 
 	bp = uma_zalloc(cluster_pbuf_zone, M_NOWAIT);
 	if (bp == NULL)
 		return tbp;
 	MPASS((bp->b_flags & B_MAXPHYS) != 0);
 
 	/*
 	 * We are synthesizing a buffer out of vm_page_t's, but
 	 * if the block size is not page aligned then the starting
 	 * address may not be either.  Inherit the b_data offset
 	 * from the original buffer.
 	 */
 	bp->b_flags = B_ASYNC | B_CLUSTER | B_VMIO;
 	if ((gbflags & GB_UNMAPPED) != 0) {
 		bp->b_data = unmapped_buf;
 	} else {
 		bp->b_data = (char *)((vm_offset_t)bp->b_data |
 		    ((vm_offset_t)tbp->b_data & PAGE_MASK));
 	}
 	bp->b_iocmd = BIO_READ;
 	bp->b_iodone = cluster_callback;
 	bp->b_blkno = blkno;
 	bp->b_lblkno = lbn;
 	bp->b_offset = tbp->b_offset;
 	KASSERT(bp->b_offset != NOOFFSET, ("cluster_rbuild: no buffer offset"));
 	pbgetvp(vp, bp);
 
 	TAILQ_INIT(&bp->b_cluster.cluster_head);
 
 	bp->b_bcount = 0;
 	bp->b_bufsize = 0;
 	bp->b_npages = 0;
 
 	inc = btodb(size);
 	for (bn = blkno, i = 0; i < run; ++i, bn += inc) {
 		if (i == 0) {
 			vm_object_pip_add(tbp->b_bufobj->bo_object,
 			    tbp->b_npages);
 			vfs_busy_pages_acquire(tbp);
 		} else {
 			if ((bp->b_npages * PAGE_SIZE) +
 			    round_page(size) > vp->v_mount->mnt_iosize_max) {
 				break;
 			}
 
 			tbp = getblk(vp, lbn + i, size, 0, 0, GB_LOCK_NOWAIT |
 			    (gbflags & GB_UNMAPPED));
 
 			/* Don't wait around for locked bufs. */
 			if (tbp == NULL)
 				break;
 
 			/*
 			 * Stop scanning if the buffer is fully valid
 			 * (marked B_CACHE), or locked (may be doing a
 			 * background write), or if the buffer is not
 			 * VMIO backed.  The clustering code can only deal
 			 * with VMIO-backed buffers.  The bo lock is not
 			 * required for the BKGRDINPROG check since it
 			 * can not be set without the buf lock.
 			 */
 			if ((tbp->b_vflags & BV_BKGRDINPROG) ||
 			    (tbp->b_flags & B_CACHE) ||
 			    (tbp->b_flags & B_VMIO) == 0) {
 				bqrelse(tbp);
 				break;
 			}
 
 			/*
 			 * The buffer must be completely invalid in order to
 			 * take part in the cluster.  If it is partially valid
 			 * then we stop.
 			 */
 			off = tbp->b_offset;
 			tsize = size;
 			for (j = 0; tsize > 0; j++) {
 				toff = off & PAGE_MASK;
 				tinc = tsize;
 				if (toff + tinc > PAGE_SIZE)
 					tinc = PAGE_SIZE - toff;
 				if (vm_page_trysbusy(tbp->b_pages[j]) == 0)
 					break;
 				if ((tbp->b_pages[j]->valid &
 				    vm_page_bits(toff, tinc)) != 0) {
 					vm_page_sunbusy(tbp->b_pages[j]);
 					break;
 				}
 				vm_object_pip_add(tbp->b_bufobj->bo_object, 1);
 				off += tinc;
 				tsize -= tinc;
 			}
 			if (tsize > 0) {
 clean_sbusy:
 				vm_object_pip_wakeupn(tbp->b_bufobj->bo_object,
 				    j);
 				for (k = 0; k < j; k++)
 					vm_page_sunbusy(tbp->b_pages[k]);
 				bqrelse(tbp);
 				break;
 			}
 
 			/*
 			 * Set a read-ahead mark as appropriate
 			 */
 			if ((fbp && (i == 1)) || (i == (run - 1)))
 				tbp->b_flags |= B_RAM;
 
 			/*
 			 * Set the buffer up for an async read (XXX should
 			 * we do this only if we do not wind up brelse()ing?).
 			 * Set the block number if it isn't set, otherwise
 			 * if it is make sure it matches the block number we
 			 * expect.
 			 */
 			tbp->b_flags |= B_ASYNC;
 			tbp->b_iocmd = BIO_READ;
 			if (tbp->b_blkno == tbp->b_lblkno) {
 				tbp->b_blkno = bn;
 			} else if (tbp->b_blkno != bn) {
 				goto clean_sbusy;
 			}
 		}
 		/*
 		 * XXX fbp from caller may not be B_ASYNC, but we are going
 		 * to biodone() it in cluster_callback() anyway
 		 */
 		BUF_KERNPROC(tbp);
 		TAILQ_INSERT_TAIL(&bp->b_cluster.cluster_head,
 			tbp, b_cluster.cluster_entry);
 		for (j = 0; j < tbp->b_npages; j += 1) {
 			vm_page_t m;
 
 			m = tbp->b_pages[j];
 			if ((bp->b_npages == 0) ||
 			    (bp->b_pages[bp->b_npages-1] != m)) {
 				bp->b_pages[bp->b_npages] = m;
 				bp->b_npages++;
 			}
 			if (vm_page_all_valid(m))
 				tbp->b_pages[j] = bogus_page;
 		}
 
 		/*
 		 * Don't inherit tbp->b_bufsize as it may be larger due to
 		 * a non-page-aligned size.  Instead just aggregate using
 		 * 'size'.
 		 */
 		if (tbp->b_bcount != size)
 			printf("warning: tbp->b_bcount wrong %ld vs %ld\n", tbp->b_bcount, size);
 		if (tbp->b_bufsize != size)
 			printf("warning: tbp->b_bufsize wrong %ld vs %ld\n", tbp->b_bufsize, size);
 		bp->b_bcount += size;
 		bp->b_bufsize += size;
 	}
 
 	/*
 	 * Fully valid pages in the cluster are already good and do not need
 	 * to be re-read from disk.  Replace the page with bogus_page
 	 */
 	for (j = 0; j < bp->b_npages; j++) {
 		if (vm_page_all_valid(bp->b_pages[j]))
 			bp->b_pages[j] = bogus_page;
 	}
 	if (bp->b_bufsize > bp->b_kvasize)
 		panic("cluster_rbuild: b_bufsize(%ld) > b_kvasize(%d)\n",
 		    bp->b_bufsize, bp->b_kvasize);
 
 	if (buf_mapped(bp)) {
 		pmap_qenter(trunc_page((vm_offset_t) bp->b_data),
 		    (vm_page_t *)bp->b_pages, bp->b_npages);
 	}
 	return (bp);
 }
 
 /*
  * Cleanup after a clustered read or write.
  * This is complicated by the fact that any of the buffers might have
  * extra memory (if there were no empty buffer headers at allocbuf time)
  * that we will need to shift around.
  */
 static void
 cluster_callback(struct buf *bp)
 {
 	struct buf *nbp, *tbp;
 	int error = 0;
 
 	/*
 	 * Must propagate errors to all the components.
 	 */
 	if (bp->b_ioflags & BIO_ERROR)
 		error = bp->b_error;
 
 	if (buf_mapped(bp)) {
 		pmap_qremove(trunc_page((vm_offset_t) bp->b_data),
 		    bp->b_npages);
 	}
 	/*
 	 * Move memory from the large cluster buffer into the component
 	 * buffers and mark IO as done on these.
 	 */
 	for (tbp = TAILQ_FIRST(&bp->b_cluster.cluster_head);
 		tbp; tbp = nbp) {
 		nbp = TAILQ_NEXT(&tbp->b_cluster, cluster_entry);
 		if (error) {
 			tbp->b_ioflags |= BIO_ERROR;
 			tbp->b_error = error;
 		} else {
 			tbp->b_dirtyoff = tbp->b_dirtyend = 0;
 			tbp->b_flags &= ~B_INVAL;
 			tbp->b_ioflags &= ~BIO_ERROR;
 			/*
 			 * XXX the bdwrite()/bqrelse() issued during
 			 * cluster building clears B_RELBUF (see bqrelse()
 			 * comment).  If direct I/O was specified, we have
 			 * to restore it here to allow the buffer and VM
 			 * to be freed.
 			 */
 			if (tbp->b_flags & B_DIRECT)
 				tbp->b_flags |= B_RELBUF;
 		}
 		bufdone(tbp);
 	}
 	pbrelvp(bp);
 	uma_zfree(cluster_pbuf_zone, bp);
 }
 
 /*
  *	cluster_wbuild_wb:
  *
  *	Implement modified write build for cluster.
  *
  *		write_behind = 0	write behind disabled
  *		write_behind = 1	write behind normal (default)
  *		write_behind = 2	write behind backed-off
  */
 
 static __inline int
 cluster_wbuild_wb(struct vnode *vp, long size, daddr_t start_lbn, int len,
     int gbflags)
 {
 	int r = 0;
 
 	switch (write_behind) {
 	case 2:
 		if (start_lbn < len)
 			break;
 		start_lbn -= len;
 		/* FALLTHROUGH */
 	case 1:
 		r = cluster_wbuild(vp, size, start_lbn, len, gbflags);
 		/* FALLTHROUGH */
 	default:
 		/* FALLTHROUGH */
 		break;
 	}
 	return(r);
 }
 
 /*
  * Do clustered write for FFS.
  *
  * Three cases:
  *	1. Write is not sequential (write asynchronously)
  *	Write is sequential:
  *	2.	beginning of cluster - begin cluster
  *	3.	middle of a cluster - add to cluster
  *	4.	end of a cluster - asynchronously write cluster
  */
 void
 cluster_write(struct vnode *vp, struct vn_clusterw *vnc, struct buf *bp,
     u_quad_t filesize, int seqcount, int gbflags)
 {
 	daddr_t lbn;
 	int maxclen, cursize;
 	int lblocksize;
 	int async;
 
 	if (!unmapped_buf_allowed)
 		gbflags &= ~GB_UNMAPPED;
 
 	if (vp->v_type == VREG) {
 		async = DOINGASYNC(vp);
 		lblocksize = vp->v_mount->mnt_stat.f_iosize;
 	} else {
 		async = 0;
 		lblocksize = bp->b_bufsize;
 	}
 	lbn = bp->b_lblkno;
 	KASSERT(bp->b_offset != NOOFFSET, ("cluster_write: no buffer offset"));
 
 	/* Initialize vnode to beginning of file. */
 	if (lbn == 0)
 		vnc->v_lasta = vnc->v_clen = vnc->v_cstart = vnc->v_lastw = 0;
 
 	if (vnc->v_clen == 0 || lbn != vnc->v_lastw + 1 ||
 	    (bp->b_blkno != vnc->v_lasta + btodb(lblocksize))) {
 		maxclen = vp->v_mount->mnt_iosize_max / lblocksize - 1;
 		if (vnc->v_clen != 0) {
 			/*
 			 * Next block is not sequential.
 			 *
 			 * If we are not writing at end of file, the process
 			 * seeked to another point in the file since its last
 			 * write, or we have reached our maximum cluster size,
 			 * then push the previous cluster. Otherwise try
 			 * reallocating to make it sequential.
 			 *
 			 * Change to algorithm: only push previous cluster if
 			 * it was sequential from the point of view of the
 			 * seqcount heuristic, otherwise leave the buffer 
 			 * intact so we can potentially optimize the I/O
 			 * later on in the buf_daemon or update daemon
 			 * flush.
 			 */
 			cursize = vnc->v_lastw - vnc->v_cstart + 1;
 			if ((u_quad_t)bp->b_offset + lblocksize != filesize ||
 			    lbn != vnc->v_lastw + 1 || vnc->v_clen <= cursize) {
 				if (!async && seqcount > 0) {
 					cluster_wbuild_wb(vp, lblocksize,
 					    vnc->v_cstart, cursize, gbflags);
 				}
 			} else {
 				struct buf **bpp, **endbp;
 				struct cluster_save *buflist;
 
 				buflist = cluster_collectbufs(vp, vnc, bp,
 				    gbflags);
 				if (buflist == NULL) {
 					/*
 					 * Cluster build failed so just write
 					 * it now.
 					 */
 					bawrite(bp);
 					return;
 				}
 				endbp = &buflist->bs_children
 				    [buflist->bs_nchildren - 1];
 				if (VOP_REALLOCBLKS(vp, buflist)) {
 					/*
 					 * Failed, push the previous cluster
 					 * if *really* writing sequentially
 					 * in the logical file (seqcount > 1),
 					 * otherwise delay it in the hopes that
 					 * the low level disk driver can
 					 * optimize the write ordering.
 					 */
 					for (bpp = buflist->bs_children;
 					     bpp < endbp; bpp++)
 						brelse(*bpp);
 					free(buflist, M_SEGMENT);
 					if (seqcount > 1) {
 						cluster_wbuild_wb(vp, 
 						    lblocksize, vnc->v_cstart,
 						    cursize, gbflags);
 					}
 				} else {
 					/*
 					 * Succeeded, keep building cluster.
 					 */
 					for (bpp = buflist->bs_children;
 					     bpp <= endbp; bpp++)
 						bdwrite(*bpp);
 					free(buflist, M_SEGMENT);
 					vnc->v_lastw = lbn;
 					vnc->v_lasta = bp->b_blkno;
 					return;
 				}
 			}
 		}
 		/*
 		 * Consider beginning a cluster. If at end of file, make
 		 * cluster as large as possible, otherwise find size of
 		 * existing cluster.
 		 */
 		if (vp->v_type == VREG &&
 		    (u_quad_t) bp->b_offset + lblocksize != filesize &&
 		    bp->b_blkno == bp->b_lblkno &&
 		    (VOP_BMAP(vp, lbn, NULL, &bp->b_blkno, &maxclen,
 		    NULL) != 0 || bp->b_blkno == -1)) {
 			bawrite(bp);
 			vnc->v_clen = 0;
 			vnc->v_lasta = bp->b_blkno;
 			vnc->v_cstart = lbn + 1;
 			vnc->v_lastw = lbn;
 			return;
 		}
 		vnc->v_clen = maxclen;
 		if (!async && maxclen == 0) {	/* I/O not contiguous */
 			vnc->v_cstart = lbn + 1;
 			bawrite(bp);
 		} else {	/* Wait for rest of cluster */
 			vnc->v_cstart = lbn;
 			bdwrite(bp);
 		}
 	} else if (lbn == vnc->v_cstart + vnc->v_clen) {
 		/*
 		 * At end of cluster, write it out if seqcount tells us we
 		 * are operating sequentially, otherwise let the buf or
 		 * update daemon handle it.
 		 */
 		bdwrite(bp);
 		if (seqcount > 1) {
 			cluster_wbuild_wb(vp, lblocksize, vnc->v_cstart,
 			    vnc->v_clen + 1, gbflags);
 		}
 		vnc->v_clen = 0;
 		vnc->v_cstart = lbn + 1;
 	} else if (vm_page_count_severe()) {
 		/*
 		 * We are low on memory, get it going NOW
 		 */
 		bawrite(bp);
 	} else {
 		/*
 		 * In the middle of a cluster, so just delay the I/O for now.
 		 */
 		bdwrite(bp);
 	}
 	vnc->v_lastw = lbn;
 	vnc->v_lasta = bp->b_blkno;
 }
 
 /*
  * This is an awful lot like cluster_rbuild...wish they could be combined.
  * The last lbn argument is the current block on which I/O is being
  * performed.  Check to see that it doesn't fall in the middle of
  * the current block (if last_bp == NULL).
  */
 int
 cluster_wbuild(struct vnode *vp, long size, daddr_t start_lbn, int len,
     int gbflags)
 {
 	struct buf *bp, *tbp;
 	struct bufobj *bo;
 	int i, j;
 	int totalwritten = 0;
 	int dbsize = btodb(size);
 
 	if (!unmapped_buf_allowed)
 		gbflags &= ~GB_UNMAPPED;
 
 	bo = &vp->v_bufobj;
 	while (len > 0) {
 		/*
 		 * If the buffer is not delayed-write (i.e. dirty), or it
 		 * is delayed-write but either locked or inval, it cannot
 		 * partake in the clustered write.
 		 */
 		BO_LOCK(bo);
 		if ((tbp = gbincore(&vp->v_bufobj, start_lbn)) == NULL ||
 		    (tbp->b_vflags & BV_BKGRDINPROG)) {
 			BO_UNLOCK(bo);
 			++start_lbn;
 			--len;
 			continue;
 		}
 		if (BUF_LOCK(tbp,
 		    LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK, BO_LOCKPTR(bo))) {
 			++start_lbn;
 			--len;
 			continue;
 		}
 		if ((tbp->b_flags & (B_INVAL | B_DELWRI)) != B_DELWRI) {
 			BUF_UNLOCK(tbp);
 			++start_lbn;
 			--len;
 			continue;
 		}
 		bremfree(tbp);
 		tbp->b_flags &= ~B_DONE;
 
 		/*
 		 * Extra memory in the buffer, punt on this buffer.
 		 * XXX we could handle this in most cases, but we would
 		 * have to push the extra memory down to after our max
 		 * possible cluster size and then potentially pull it back
 		 * up if the cluster was terminated prematurely--too much
 		 * hassle.
 		 */
 		if (((tbp->b_flags & (B_CLUSTEROK | B_MALLOC | B_VMIO)) != 
 		     (B_CLUSTEROK | B_VMIO)) ||
 		  (tbp->b_bcount != tbp->b_bufsize) ||
 		  (tbp->b_bcount != size) ||
 		  (len == 1) ||
 		  ((bp = uma_zalloc(cluster_pbuf_zone, M_NOWAIT)) == NULL)) {
 			totalwritten += tbp->b_bufsize;
 			bawrite(tbp);
 			++start_lbn;
 			--len;
 			continue;
 		}
 		MPASS((bp->b_flags & B_MAXPHYS) != 0);
 
 		/*
 		 * We got a pbuf to make the cluster in.
 		 * so initialise it.
 		 */
 		TAILQ_INIT(&bp->b_cluster.cluster_head);
 		bp->b_bcount = 0;
 		bp->b_bufsize = 0;
 		bp->b_npages = 0;
 		if (tbp->b_wcred != NOCRED)
 			bp->b_wcred = crhold(tbp->b_wcred);
 
 		bp->b_blkno = tbp->b_blkno;
 		bp->b_lblkno = tbp->b_lblkno;
 		bp->b_offset = tbp->b_offset;
 
 		/*
 		 * We are synthesizing a buffer out of vm_page_t's, but
 		 * if the block size is not page aligned then the starting
 		 * address may not be either.  Inherit the b_data offset
 		 * from the original buffer.
 		 */
 		if ((gbflags & GB_UNMAPPED) == 0 ||
 		    (tbp->b_flags & B_VMIO) == 0) {
 			bp->b_data = (char *)((vm_offset_t)bp->b_data |
 			    ((vm_offset_t)tbp->b_data & PAGE_MASK));
 		} else {
 			bp->b_data = unmapped_buf;
 		}
 		bp->b_flags |= B_CLUSTER | (tbp->b_flags & (B_VMIO |
 		    B_NEEDCOMMIT));
 		bp->b_iodone = cluster_callback;
 		pbgetvp(vp, bp);
 		/*
 		 * From this location in the file, scan forward to see
 		 * if there are buffers with adjacent data that need to
 		 * be written as well.
 		 */
 		for (i = 0; i < len; ++i, ++start_lbn) {
 			if (i != 0) { /* If not the first buffer */
 				/*
 				 * If the adjacent data is not even in core it
 				 * can't need to be written.
 				 */
 				BO_LOCK(bo);
 				if ((tbp = gbincore(bo, start_lbn)) == NULL ||
 				    (tbp->b_vflags & BV_BKGRDINPROG)) {
 					BO_UNLOCK(bo);
 					break;
 				}
 
 				/*
 				 * If it IS in core, but has different
 				 * characteristics, or is locked (which
 				 * means it could be undergoing a background
 				 * I/O or be in a weird state), then don't
 				 * cluster with it.
 				 */
 				if (BUF_LOCK(tbp,
 				    LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK,
 				    BO_LOCKPTR(bo)))
 					break;
 
 				if ((tbp->b_flags & (B_VMIO | B_CLUSTEROK |
 				    B_INVAL | B_DELWRI | B_NEEDCOMMIT))
 				    != (B_DELWRI | B_CLUSTEROK |
 				    (bp->b_flags & (B_VMIO | B_NEEDCOMMIT))) ||
 				    tbp->b_wcred != bp->b_wcred) {
 					BUF_UNLOCK(tbp);
 					break;
 				}
 
 				/*
 				 * Check that the combined cluster
 				 * would make sense with regard to pages
 				 * and would not be too large
 				 */
 				if ((tbp->b_bcount != size) ||
 				  ((bp->b_blkno + (dbsize * i)) !=
 				    tbp->b_blkno) ||
 				  ((tbp->b_npages + bp->b_npages) >
 				    (vp->v_mount->mnt_iosize_max / PAGE_SIZE))) {
 					BUF_UNLOCK(tbp);
 					break;
 				}
 
 				/*
 				 * Ok, it's passed all the tests,
 				 * so remove it from the free list
 				 * and mark it busy. We will use it.
 				 */
 				bremfree(tbp);
 				tbp->b_flags &= ~B_DONE;
 			} /* end of code for non-first buffers only */
 			/*
 			 * If the IO is via the VM then we do some
 			 * special VM hackery (yuck).  Since the buffer's
 			 * block size may not be page-aligned it is possible
 			 * for a page to be shared between two buffers.  We
 			 * have to get rid of the duplication when building
 			 * the cluster.
 			 */
 			if (tbp->b_flags & B_VMIO) {
 				vm_page_t m;
 
 				if (i == 0) {
 					vfs_busy_pages_acquire(tbp);
 				} else { /* if not first buffer */
 					for (j = 0; j < tbp->b_npages; j += 1) {
 						m = tbp->b_pages[j];
 						if (vm_page_trysbusy(m) == 0) {
 							for (j--; j >= 0; j--)
 								vm_page_sunbusy(
 								    tbp->b_pages[j]);
 							bqrelse(tbp);
 							goto finishcluster;
 						}
 					}
 				}
 				vm_object_pip_add(tbp->b_bufobj->bo_object,
 				    tbp->b_npages);
 				for (j = 0; j < tbp->b_npages; j += 1) {
 					m = tbp->b_pages[j];
 					if ((bp->b_npages == 0) ||
 					  (bp->b_pages[bp->b_npages - 1] != m)) {
 						bp->b_pages[bp->b_npages] = m;
 						bp->b_npages++;
 					}
 				}
 			}
 			bp->b_bcount += size;
 			bp->b_bufsize += size;
 			/*
 			 * If any of the clustered buffers have their
 			 * B_BARRIER flag set, transfer that request to
 			 * the cluster.
 			 */
 			bp->b_flags |= (tbp->b_flags & B_BARRIER);
 			tbp->b_flags &= ~(B_DONE | B_BARRIER);
 			tbp->b_flags |= B_ASYNC;
 			tbp->b_ioflags &= ~BIO_ERROR;
 			tbp->b_iocmd = BIO_WRITE;
 			bundirty(tbp);
 			reassignbuf(tbp);		/* put on clean list */
 			bufobj_wref(tbp->b_bufobj);
 			BUF_KERNPROC(tbp);
 			buf_track(tbp, __func__);
 			TAILQ_INSERT_TAIL(&bp->b_cluster.cluster_head,
 				tbp, b_cluster.cluster_entry);
 		}
 	finishcluster:
 		if (buf_mapped(bp)) {
 			pmap_qenter(trunc_page((vm_offset_t) bp->b_data),
 			    (vm_page_t *)bp->b_pages, bp->b_npages);
 		}
 		if (bp->b_bufsize > bp->b_kvasize)
 			panic(
 			    "cluster_wbuild: b_bufsize(%ld) > b_kvasize(%d)\n",
 			    bp->b_bufsize, bp->b_kvasize);
 		totalwritten += bp->b_bufsize;
 		bp->b_dirtyoff = 0;
 		bp->b_dirtyend = bp->b_bufsize;
 		bawrite(bp);
 
 		len -= i;
 	}
 	return totalwritten;
 }
 
 /*
  * Collect together all the buffers in a cluster.
  * Plus add one additional buffer.
  */
 static struct cluster_save *
 cluster_collectbufs(struct vnode *vp, struct vn_clusterw *vnc,
     struct buf *last_bp, int gbflags)
 {
 	struct cluster_save *buflist;
 	struct buf *bp;
 	daddr_t lbn;
 	int i, j, len, error;
 
 	len = vnc->v_lastw - vnc->v_cstart + 1;
 	buflist = malloc(sizeof(struct buf *) * (len + 1) + sizeof(*buflist),
 	    M_SEGMENT, M_WAITOK);
 	buflist->bs_nchildren = 0;
 	buflist->bs_children = (struct buf **) (buflist + 1);
 	for (lbn = vnc->v_cstart, i = 0; i < len; lbn++, i++) {
 		error = bread_gb(vp, lbn, last_bp->b_bcount, NOCRED,
 		    gbflags, &bp);
 		if (error != 0) {
 			/*
 			 * If read fails, release collected buffers
 			 * and return failure.
 			 */
 			for (j = 0; j < i; j++)
 				brelse(buflist->bs_children[j]);
 			free(buflist, M_SEGMENT);
 			return (NULL);
 		}
 		buflist->bs_children[i] = bp;
 		if (bp->b_blkno == bp->b_lblkno)
 			VOP_BMAP(vp, bp->b_lblkno, NULL, &bp->b_blkno,
 				NULL, NULL);
 	}
 	buflist->bs_children[i] = bp = last_bp;
 	if (bp->b_blkno == bp->b_lblkno)
 		VOP_BMAP(vp, bp->b_lblkno, NULL, &bp->b_blkno, NULL, NULL);
 	buflist->bs_nchildren = i + 1;
 	return (buflist);
 }
 
 void
 cluster_init_vn(struct vn_clusterw *vnc)
 {
 	vnc->v_lasta = 0;
 	vnc->v_clen = 0;
 	vnc->v_cstart = 0;
 	vnc->v_lastw = 0;
 }
diff --git a/sys/kern/vfs_mountroot.c b/sys/kern/vfs_mountroot.c
index 3b968fd19bbd..65d52cc68bcd 100644
--- a/sys/kern/vfs_mountroot.c
+++ b/sys/kern/vfs_mountroot.c
@@ -1,1160 +1,1159 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 2010 Marcel Moolenaar
  * Copyright (c) 1999-2004 Poul-Henning Kamp
  * Copyright (c) 1999 Michael Smith
  * Copyright (c) 1989, 1993
  *      The Regents of the University of California.  All rights reserved.
  * (c) UNIX System Laboratories, Inc.
  * All or some portions of this file are derived from material licensed
  * to the University of California by American Telephone and Telegraph
  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  * the permission of UNIX System Laboratories, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include "opt_rootdevname.h"
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/conf.h>
 #include <sys/cons.h>
 #include <sys/eventhandler.h>
 #include <sys/fcntl.h>
 #include <sys/jail.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/mdioctl.h>
 #include <sys/mount.h>
 #include <sys/mutex.h>
 #include <sys/namei.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/filedesc.h>
 #include <sys/reboot.h>
 #include <sys/sbuf.h>
 #include <sys/stat.h>
 #include <sys/syscallsubr.h>
 #include <sys/sysproto.h>
 #include <sys/sx.h>
 #include <sys/sysctl.h>
 #include <sys/sysent.h>
 #include <sys/systm.h>
 #include <sys/vnode.h>
 
 #include <geom/geom.h>
 
 /*
  * The root filesystem is detailed in the kernel environment variable
  * vfs.root.mountfrom, which is expected to be in the general format
  *
  * <vfsname>:[<path>][	<vfsname>:[<path>] ...]
  * vfsname   := the name of a VFS known to the kernel and capable
  *              of being mounted as root
  * path      := disk device name or other data used by the filesystem
  *              to locate its physical store
  *
  * If the environment variable vfs.root.mountfrom is a space separated list,
  * each list element is tried in turn and the root filesystem will be mounted
  * from the first one that succeeds.
  *
  * The environment variable vfs.root.mountfrom.options is a comma delimited
  * set of string mount options.  These mount options must be parseable
  * by nmount() in the kernel.
  */
 
 static int parse_mount(char **);
 static struct mntarg *parse_mountroot_options(struct mntarg *, const char *);
 static int sysctl_vfs_root_mount_hold(SYSCTL_HANDLER_ARGS);
 static void vfs_mountroot_wait(void);
 static int vfs_mountroot_wait_if_neccessary(const char *fs, const char *dev);
 
 /*
  * The vnode of the system's root (/ in the filesystem, without chroot
  * active.)
  */
 struct vnode *rootvnode;
 
 /*
  * Mount of the system's /dev.
  */
 struct mount *rootdevmp;
 
 char *rootdevnames[2] = {NULL, NULL};
 
 struct mtx root_holds_mtx;
 MTX_SYSINIT(root_holds, &root_holds_mtx, "root_holds", MTX_DEF);
 
 static TAILQ_HEAD(, root_hold_token)	root_holds =
     TAILQ_HEAD_INITIALIZER(root_holds);
 
 enum action {
 	A_CONTINUE,
 	A_PANIC,
 	A_REBOOT,
 	A_RETRY
 };
 
 enum rh_flags {
 	RH_FREE,
 	RH_ALLOC,
 	RH_ARG,
 };
 
 static enum action root_mount_onfail = A_CONTINUE;
 
 static int root_mount_mddev;
 static int root_mount_complete;
 
 /* By default wait up to 3 seconds for devices to appear. */
 static int root_mount_timeout = 3;
 TUNABLE_INT("vfs.mountroot.timeout", &root_mount_timeout);
 
 static int root_mount_always_wait = 0;
 SYSCTL_INT(_vfs, OID_AUTO, root_mount_always_wait, CTLFLAG_RDTUN,
     &root_mount_always_wait, 0,
     "Wait for root mount holds even if the root device already exists");
 
 SYSCTL_PROC(_vfs, OID_AUTO, root_mount_hold,
     CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE,
     NULL, 0, sysctl_vfs_root_mount_hold, "A",
     "List of root mount hold tokens");
 
 static int
 sysctl_vfs_root_mount_hold(SYSCTL_HANDLER_ARGS)
 {
 	struct sbuf sb;
 	struct root_hold_token *h;
 	int error;
 
 	sbuf_new(&sb, NULL, 256, SBUF_AUTOEXTEND | SBUF_INCLUDENUL);
 
 	mtx_lock(&root_holds_mtx);
 	TAILQ_FOREACH(h, &root_holds, list) {
 		if (h != TAILQ_FIRST(&root_holds))
 			sbuf_putc(&sb, ' ');
 		sbuf_printf(&sb, "%s", h->who);
 	}
 	mtx_unlock(&root_holds_mtx);
 
 	error = sbuf_finish(&sb);
 	if (error == 0)
 		error = SYSCTL_OUT(req, sbuf_data(&sb), sbuf_len(&sb));
 	sbuf_delete(&sb);
 	return (error);
 }
 
 struct root_hold_token *
 root_mount_hold(const char *identifier)
 {
 	struct root_hold_token *h;
 
 	h = malloc(sizeof *h, M_DEVBUF, M_ZERO | M_WAITOK);
 	h->flags = RH_ALLOC;
 	h->who = identifier;
 	mtx_lock(&root_holds_mtx);
 	TSHOLD("root mount");
 	TAILQ_INSERT_TAIL(&root_holds, h, list);
 	mtx_unlock(&root_holds_mtx);
 	return (h);
 }
 
 void
 root_mount_hold_token(const char *identifier, struct root_hold_token *h)
 {
 #ifdef INVARIANTS
 	struct root_hold_token *t;
 #endif
 
 	h->flags = RH_ARG;
 	h->who = identifier;
 	mtx_lock(&root_holds_mtx);
 #ifdef INVARIANTS
 	TAILQ_FOREACH(t, &root_holds, list) {
 		if (t == h) {
 			panic("Duplicate mount hold by '%s' on %p",
 			    identifier, h);
 		}
 	}
 #endif
 	TSHOLD("root mount");
 	TAILQ_INSERT_TAIL(&root_holds, h, list);
 	mtx_unlock(&root_holds_mtx);
 }
 
 void
 root_mount_rel(struct root_hold_token *h)
 {
 
 	if (h == NULL || h->flags == RH_FREE)
 		return;
 
 	mtx_lock(&root_holds_mtx);
 	TAILQ_REMOVE(&root_holds, h, list);
 	TSRELEASE("root mount");
 	wakeup(&root_holds);
 	mtx_unlock(&root_holds_mtx);
 	if (h->flags == RH_ALLOC) {
 		free(h, M_DEVBUF);
 	} else
 		h->flags = RH_FREE;
 }
 
 int
 root_mounted(void)
 {
 
 	/* No mutex is acquired here because int stores are atomic. */
 	return (root_mount_complete);
 }
 
 static void
 set_rootvnode(void)
 {
 
 	if (VFS_ROOT(TAILQ_FIRST(&mountlist), LK_EXCLUSIVE, &rootvnode))
 		panic("set_rootvnode: Cannot find root vnode");
 
 	VOP_UNLOCK(rootvnode);
 
 	pwd_set_rootvnode();
 }
 
 static int
 vfs_mountroot_devfs(struct thread *td, struct mount **mpp)
 {
 	struct vfsoptlist *opts;
 	struct vfsconf *vfsp;
 	struct mount *mp;
 	int error;
 
 	*mpp = NULL;
 
 	if (rootdevmp != NULL) {
 		/*
 		 * Already have /dev; this happens during rerooting.
 		 */
 		error = vfs_busy(rootdevmp, 0);
 		if (error != 0)
 			return (error);
 		*mpp = rootdevmp;
 	} else {
 		vfsp = vfs_byname("devfs");
 		KASSERT(vfsp != NULL, ("Could not find devfs by name"));
 		if (vfsp == NULL)
 			return (ENOENT);
 
 		mp = vfs_mount_alloc(NULLVP, vfsp, "/dev", td->td_ucred);
 
 		error = VFS_MOUNT(mp);
 		KASSERT(error == 0, ("VFS_MOUNT(devfs) failed %d", error));
 		if (error)
 			return (error);
 
 		error = VFS_STATFS(mp, &mp->mnt_stat);
 		KASSERT(error == 0, ("VFS_STATFS(devfs) failed %d", error));
 		if (error)
 			return (error);
 
 		opts = malloc(sizeof(struct vfsoptlist), M_MOUNT, M_WAITOK);
 		TAILQ_INIT(opts);
 		mp->mnt_opt = opts;
 
 		mtx_lock(&mountlist_mtx);
 		TAILQ_INSERT_HEAD(&mountlist, mp, mnt_list);
 		mtx_unlock(&mountlist_mtx);
 
 		*mpp = mp;
 		rootdevmp = mp;
 		vfs_op_exit(mp);
 	}
 
 	set_rootvnode();
 
 	error = kern_symlinkat(td, "/", AT_FDCWD, "dev", UIO_SYSSPACE);
 	if (error)
 		printf("kern_symlink /dev -> / returns %d\n", error);
 
 	return (error);
 }
 
 static void
 vfs_mountroot_shuffle(struct thread *td, struct mount *mpdevfs)
 {
 	struct nameidata nd;
 	struct mount *mporoot, *mpnroot;
 	struct vnode *vp, *vporoot, *vpdevfs;
 	char *fspath;
 	int error;
 
 	mpnroot = TAILQ_NEXT(mpdevfs, mnt_list);
 
 	/* Shuffle the mountlist. */
 	mtx_lock(&mountlist_mtx);
 	mporoot = TAILQ_FIRST(&mountlist);
 	TAILQ_REMOVE(&mountlist, mpdevfs, mnt_list);
 	if (mporoot != mpdevfs) {
 		TAILQ_REMOVE(&mountlist, mpnroot, mnt_list);
 		TAILQ_INSERT_HEAD(&mountlist, mpnroot, mnt_list);
 	}
 	TAILQ_INSERT_TAIL(&mountlist, mpdevfs, mnt_list);
 	mtx_unlock(&mountlist_mtx);
 
 	cache_purgevfs(mporoot);
 	if (mporoot != mpdevfs)
 		cache_purgevfs(mpdevfs);
 
 	if (VFS_ROOT(mporoot, LK_EXCLUSIVE, &vporoot))
 		panic("vfs_mountroot_shuffle: Cannot find root vnode");
 
 	VI_LOCK(vporoot);
 	vporoot->v_iflag &= ~VI_MOUNT;
 	vn_irflag_unset_locked(vporoot, VIRF_MOUNTPOINT);
 	vporoot->v_mountedhere = NULL;
 	VI_UNLOCK(vporoot);
 	mporoot->mnt_flag &= ~MNT_ROOTFS;
 	mporoot->mnt_vnodecovered = NULL;
 	vput(vporoot);
 
 	/* Set up the new rootvnode, and purge the cache */
 	mpnroot->mnt_vnodecovered = NULL;
 	set_rootvnode();
 	cache_purgevfs(rootvnode->v_mount);
 
 	if (mporoot != mpdevfs) {
 		/* Remount old root under /.mount or /mnt */
 		fspath = "/.mount";
 		NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE,
 		    fspath, td);
 		error = namei(&nd);
 		if (error) {
 			NDFREE(&nd, NDF_ONLY_PNBUF);
 			fspath = "/mnt";
 			NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE,
 			    fspath, td);
 			error = namei(&nd);
 		}
 		if (!error) {
 			vp = nd.ni_vp;
 			error = (vp->v_type == VDIR) ? 0 : ENOTDIR;
 			if (!error)
 				error = vinvalbuf(vp, V_SAVE, 0, 0);
 			if (!error) {
 				cache_purge(vp);
 				mporoot->mnt_vnodecovered = vp;
 				vp->v_mountedhere = mporoot;
 				strlcpy(mporoot->mnt_stat.f_mntonname,
 				    fspath, MNAMELEN);
 				VOP_UNLOCK(vp);
 			} else
 				vput(vp);
 		}
 		NDFREE(&nd, NDF_ONLY_PNBUF);
 
 		if (error)
 			printf("mountroot: unable to remount previous root "
 			    "under /.mount or /mnt (error %d)\n", error);
 	}
 
 	/* Remount devfs under /dev */
 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, "/dev", td);
 	error = namei(&nd);
 	if (!error) {
 		vp = nd.ni_vp;
 		error = (vp->v_type == VDIR) ? 0 : ENOTDIR;
 		if (!error)
 			error = vinvalbuf(vp, V_SAVE, 0, 0);
 		if (!error) {
 			vpdevfs = mpdevfs->mnt_vnodecovered;
 			if (vpdevfs != NULL) {
 				cache_purge(vpdevfs);
 				VI_LOCK(vpdevfs);
 				vn_irflag_unset_locked(vpdevfs, VIRF_MOUNTPOINT);
 				vpdevfs->v_mountedhere = NULL;
 				VI_UNLOCK(vpdevfs);
 				vrele(vpdevfs);
 			}
 			VI_LOCK(vp);
 			mpdevfs->mnt_vnodecovered = vp;
 			vn_irflag_set_locked(vp, VIRF_MOUNTPOINT);
 			vp->v_mountedhere = mpdevfs;
 			VI_UNLOCK(vp);
 			VOP_UNLOCK(vp);
 		} else
 			vput(vp);
 	}
 	if (error)
 		printf("mountroot: unable to remount devfs under /dev "
 		    "(error %d)\n", error);
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 
 	if (mporoot == mpdevfs) {
 		vfs_unbusy(mpdevfs);
 		/* Unlink the no longer needed /dev/dev -> / symlink */
 		error = kern_funlinkat(td, AT_FDCWD, "/dev/dev", FD_NONE,
 		    UIO_SYSSPACE, 0, 0);
 		if (error)
 			printf("mountroot: unable to unlink /dev/dev "
 			    "(error %d)\n", error);
 	}
 }
 
 /*
  * Configuration parser.
  */
 
 /* Parser character classes. */
 #define	CC_WHITESPACE		-1
 #define	CC_NONWHITESPACE	-2
 
 /* Parse errors. */
 #define	PE_EOF			-1
 #define	PE_EOL			-2
 
 static __inline int
 parse_peek(char **conf)
 {
 
 	return (**conf);
 }
 
 static __inline void
 parse_poke(char **conf, int c)
 {
 
 	**conf = c;
 }
 
 static __inline void
 parse_advance(char **conf)
 {
 
 	(*conf)++;
 }
 
 static int
 parse_skipto(char **conf, int mc)
 {
 	int c, match;
 
 	while (1) {
 		c = parse_peek(conf);
 		if (c == 0)
 			return (PE_EOF);
 		switch (mc) {
 		case CC_WHITESPACE:
 			match = (c == ' ' || c == '\t' || c == '\n') ? 1 : 0;
 			break;
 		case CC_NONWHITESPACE:
 			if (c == '\n')
 				return (PE_EOL);
 			match = (c != ' ' && c != '\t') ? 1 : 0;
 			break;
 		default:
 			match = (c == mc) ? 1 : 0;
 			break;
 		}
 		if (match)
 			break;
 		parse_advance(conf);
 	}
 	return (0);
 }
 
 static int
 parse_token(char **conf, char **tok)
 {
 	char *p;
 	size_t len;
 	int error;
 
 	*tok = NULL;
 	error = parse_skipto(conf, CC_NONWHITESPACE);
 	if (error)
 		return (error);
 	p = *conf;
 	error = parse_skipto(conf, CC_WHITESPACE);
 	len = *conf - p;
 	*tok = malloc(len + 1, M_TEMP, M_WAITOK | M_ZERO);
 	bcopy(p, *tok, len);
 	return (0);
 }
 
 static void
 parse_dir_ask_printenv(const char *var)
 {
 	char *val;
 
 	val = kern_getenv(var);
 	if (val != NULL) {
 		printf("  %s=%s\n", var, val);
 		freeenv(val);
 	}
 }
 
 static int
 parse_dir_ask(char **conf)
 {
 	char name[80];
 	char *mnt;
 	int error;
 
 	vfs_mountroot_wait();
 
 	printf("\nLoader variables:\n");
 	parse_dir_ask_printenv("vfs.root.mountfrom");
 	parse_dir_ask_printenv("vfs.root.mountfrom.options");
 
 	printf("\nManual root filesystem specification:\n");
 	printf("  <fstype>:<device> [options]\n");
 	printf("      Mount <device> using filesystem <fstype>\n");
 	printf("      and with the specified (optional) option list.\n");
 	printf("\n");
 	printf("    eg. ufs:/dev/da0s1a\n");
 	printf("        zfs:zroot/ROOT/default\n");
 	printf("        cd9660:/dev/cd0 ro\n");
 	printf("          (which is equivalent to: ");
 	printf("mount -t cd9660 -o ro /dev/cd0 /)\n");
 	printf("\n");
 	printf("  ?               List valid disk boot devices\n");
 	printf("  .               Yield 1 second (for background tasks)\n");
 	printf("  <empty line>    Abort manual input\n");
 
 	do {
 		error = EINVAL;
 		printf("\nmountroot> ");
 		cngets(name, sizeof(name), GETS_ECHO);
 		if (name[0] == '\0')
 			break;
 		if (name[0] == '?' && name[1] == '\0') {
 			printf("\nList of GEOM managed disk devices:\n  ");
 			g_dev_print();
 			continue;
 		}
 		if (name[0] == '.' && name[1] == '\0') {
 			pause("rmask", hz);
 			continue;
 		}
 		mnt = name;
 		error = parse_mount(&mnt);
 		if (error == -1)
 			printf("Invalid file system specification.\n");
 	} while (error != 0);
 
 	return (error);
 }
 
 static int
 parse_dir_md(char **conf)
 {
 	struct stat sb;
 	struct thread *td;
 	struct md_ioctl *mdio;
 	char *path, *tok;
 	int error, fd, len;
 
 	td = curthread;
 
 	error = parse_token(conf, &tok);
 	if (error)
 		return (error);
 
 	len = strlen(tok);
 	mdio = malloc(sizeof(*mdio) + len + 1, M_TEMP, M_WAITOK | M_ZERO);
 	path = (void *)(mdio + 1);
 	bcopy(tok, path, len);
 	free(tok, M_TEMP);
 
 	/* Get file status. */
 	error = kern_statat(td, 0, AT_FDCWD, path, UIO_SYSSPACE, &sb, NULL);
 	if (error)
 		goto out;
 
 	/* Open /dev/mdctl so that we can attach/detach. */
 	error = kern_openat(td, AT_FDCWD, "/dev/" MDCTL_NAME, UIO_SYSSPACE,
 	    O_RDWR, 0);
 	if (error)
 		goto out;
 
 	fd = td->td_retval[0];
 	mdio->md_version = MDIOVERSION;
 	mdio->md_type = MD_VNODE;
 
 	if (root_mount_mddev != -1) {
 		mdio->md_unit = root_mount_mddev;
 		(void)kern_ioctl(td, fd, MDIOCDETACH, (void *)mdio);
 		/* Ignore errors. We don't care. */
 		root_mount_mddev = -1;
 	}
 
 	mdio->md_file = (void *)(mdio + 1);
 	mdio->md_options = MD_AUTOUNIT | MD_READONLY;
 	mdio->md_mediasize = sb.st_size;
 	mdio->md_unit = 0;
 	error = kern_ioctl(td, fd, MDIOCATTACH, (void *)mdio);
 	if (error)
 		goto out;
 
 	if (mdio->md_unit > 9) {
 		printf("rootmount: too many md units\n");
 		mdio->md_file = NULL;
 		mdio->md_options = 0;
 		mdio->md_mediasize = 0;
 		error = kern_ioctl(td, fd, MDIOCDETACH, (void *)mdio);
 		/* Ignore errors. We don't care. */
 		error = ERANGE;
 		goto out;
 	}
 
 	root_mount_mddev = mdio->md_unit;
 	printf(MD_NAME "%u attached to %s\n", root_mount_mddev, mdio->md_file);
 
 	error = kern_close(td, fd);
 
  out:
 	free(mdio, M_TEMP);
 	return (error);
 }
 
 static int
 parse_dir_onfail(char **conf)
 {
 	char *action;
 	int error;
 
 	error = parse_token(conf, &action);
 	if (error)
 		return (error);
 
 	if (!strcmp(action, "continue"))
 		root_mount_onfail = A_CONTINUE;
 	else if (!strcmp(action, "panic"))
 		root_mount_onfail = A_PANIC;
 	else if (!strcmp(action, "reboot"))
 		root_mount_onfail = A_REBOOT;
 	else if (!strcmp(action, "retry"))
 		root_mount_onfail = A_RETRY;
 	else {
 		printf("rootmount: %s: unknown action\n", action);
 		error = EINVAL;
 	}
 
 	free(action, M_TEMP);
 	return (0);
 }
 
 static int
 parse_dir_timeout(char **conf)
 {
 	char *tok, *endtok;
 	long secs;
 	int error;
 
 	error = parse_token(conf, &tok);
 	if (error)
 		return (error);
 
 	secs = strtol(tok, &endtok, 0);
 	error = (secs < 0 || *endtok != '\0') ? EINVAL : 0;
 	if (!error)
 		root_mount_timeout = secs;
 	free(tok, M_TEMP);
 	return (error);
 }
 
 static int
 parse_directive(char **conf)
 {
 	char *dir;
 	int error;
 
 	error = parse_token(conf, &dir);
 	if (error)
 		return (error);
 
 	if (strcmp(dir, ".ask") == 0)
 		error = parse_dir_ask(conf);
 	else if (strcmp(dir, ".md") == 0)
 		error = parse_dir_md(conf);
 	else if (strcmp(dir, ".onfail") == 0)
 		error = parse_dir_onfail(conf);
 	else if (strcmp(dir, ".timeout") == 0)
 		error = parse_dir_timeout(conf);
 	else {
 		printf("mountroot: invalid directive `%s'\n", dir);
 		/* Ignore the rest of the line. */
 		(void)parse_skipto(conf, '\n');
 		error = EINVAL;
 	}
 	free(dir, M_TEMP);
 	return (error);
 }
 
 static int
 parse_mount_dev_present(const char *dev)
 {
 	struct nameidata nd;
 	int error;
 
 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, dev, curthread);
 	error = namei(&nd);
 	if (!error)
 		vput(nd.ni_vp);
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	return (error != 0) ? 0 : 1;
 }
 
 #define	ERRMSGL	255
 static int
 parse_mount(char **conf)
 {
 	char *errmsg;
 	struct mntarg *ma;
 	char *dev, *fs, *opts, *tok;
 	int delay, error, timeout;
 
 	error = parse_token(conf, &tok);
 	if (error)
 		return (error);
 	fs = tok;
 	error = parse_skipto(&tok, ':');
 	if (error) {
 		free(fs, M_TEMP);
 		return (error);
 	}
 	parse_poke(&tok, '\0');
 	parse_advance(&tok);
 	dev = tok;
 
 	if (root_mount_mddev != -1) {
 		/* Handle substitution for the md unit number. */
 		tok = strstr(dev, "md#");
 		if (tok != NULL)
 			tok[2] = '0' + root_mount_mddev;
 	}
 
 	/* Parse options. */
 	error = parse_token(conf, &tok);
 	opts = (error == 0) ? tok : NULL;
 
 	printf("Trying to mount root from %s:%s [%s]...\n", fs, dev,
 	    (opts != NULL) ? opts : "");
 
 	errmsg = malloc(ERRMSGL, M_TEMP, M_WAITOK | M_ZERO);
 
 	if (vfs_byname(fs) == NULL) {
 		strlcpy(errmsg, "unknown file system", ERRMSGL);
 		error = ENOENT;
 		goto out;
 	}
 
 	error = vfs_mountroot_wait_if_neccessary(fs, dev);
 	if (error != 0)
 		goto out;
 
 	delay = hz / 10;
 	timeout = root_mount_timeout * hz;
 
 	for (;;) {
 		ma = NULL;
 		ma = mount_arg(ma, "fstype", fs, -1);
 		ma = mount_arg(ma, "fspath", "/", -1);
 		ma = mount_arg(ma, "from", dev, -1);
 		ma = mount_arg(ma, "errmsg", errmsg, ERRMSGL);
 		ma = mount_arg(ma, "ro", NULL, 0);
 		ma = parse_mountroot_options(ma, opts);
 
 		error = kernel_mount(ma, MNT_ROOTFS);
 		if (error == 0 || timeout <= 0)
 			break;
 
 		if (root_mount_timeout * hz == timeout ||
 		    (bootverbose && timeout % hz == 0)) {
 			printf("Mounting from %s:%s failed with error %d; "
 			    "retrying for %d more second%s\n", fs, dev, error,
 			    timeout / hz, (timeout / hz > 1) ? "s" : "");
 		}
 		pause("rmretry", delay);
 		timeout -= delay;
 	}
  out:
 	if (error) {
 		printf("Mounting from %s:%s failed with error %d",
 		    fs, dev, error);
 		if (errmsg[0] != '\0')
 			printf(": %s", errmsg);
 		printf(".\n");
 	}
 	free(fs, M_TEMP);
 	free(errmsg, M_TEMP);
 	if (opts != NULL)
 		free(opts, M_TEMP);
 	/* kernel_mount can return -1 on error. */
 	return ((error < 0) ? EDOOFUS : error);
 }
 #undef ERRMSGL
 
 static int
 vfs_mountroot_parse(struct sbuf *sb, struct mount *mpdevfs)
 {
 	struct mount *mp;
 	char *conf;
 	int error;
 
 	root_mount_mddev = -1;
 
 retry:
 	conf = sbuf_data(sb);
 	mp = TAILQ_NEXT(mpdevfs, mnt_list);
 	error = (mp == NULL) ? 0 : EDOOFUS;
 	root_mount_onfail = A_CONTINUE;
 	while (mp == NULL) {
 		error = parse_skipto(&conf, CC_NONWHITESPACE);
 		if (error == PE_EOL) {
 			parse_advance(&conf);
 			continue;
 		}
 		if (error < 0)
 			break;
 		switch (parse_peek(&conf)) {
 		case '#':
 			error = parse_skipto(&conf, '\n');
 			break;
 		case '.':
 			error = parse_directive(&conf);
 			break;
 		default:
 			error = parse_mount(&conf);
 			if (error == -1) {
 				printf("mountroot: invalid file system "
 				    "specification.\n");
 				error = 0;
 			}
 			break;
 		}
 		if (error < 0)
 			break;
 		/* Ignore any trailing garbage on the line. */
 		if (parse_peek(&conf) != '\n') {
 			printf("mountroot: advancing to next directive...\n");
 			(void)parse_skipto(&conf, '\n');
 		}
 		mp = TAILQ_NEXT(mpdevfs, mnt_list);
 	}
 	if (mp != NULL)
 		return (0);
 
 	/*
 	 * We failed to mount (a new) root.
 	 */
 	switch (root_mount_onfail) {
 	case A_CONTINUE:
 		break;
 	case A_PANIC:
 		panic("mountroot: unable to (re-)mount root.");
 		/* NOTREACHED */
 	case A_RETRY:
 		goto retry;
 	case A_REBOOT:
 		kern_reboot(RB_NOSYNC);
 		/* NOTREACHED */
 	}
 
 	return (error);
 }
 
 static void
 vfs_mountroot_conf0(struct sbuf *sb)
 {
 	char *s, *tok, *mnt, *opt;
 	int error;
 
 	sbuf_printf(sb, ".onfail panic\n");
 	sbuf_printf(sb, ".timeout %d\n", root_mount_timeout);
 	if (boothowto & RB_ASKNAME)
 		sbuf_printf(sb, ".ask\n");
 #ifdef ROOTDEVNAME
 	if (boothowto & RB_DFLTROOT)
 		sbuf_printf(sb, "%s\n", ROOTDEVNAME);
 #endif
 	if (boothowto & RB_CDROM) {
 		sbuf_printf(sb, "cd9660:/dev/cd0 ro\n");
 		sbuf_printf(sb, ".timeout 0\n");
 		sbuf_printf(sb, "cd9660:/dev/cd1 ro\n");
 		sbuf_printf(sb, ".timeout %d\n", root_mount_timeout);
 	}
 	s = kern_getenv("vfs.root.mountfrom");
 	if (s != NULL) {
 		opt = kern_getenv("vfs.root.mountfrom.options");
 		tok = s;
 		error = parse_token(&tok, &mnt);
 		while (!error) {
 			sbuf_printf(sb, "%s %s\n", mnt,
 			    (opt != NULL) ? opt : "");
 			free(mnt, M_TEMP);
 			error = parse_token(&tok, &mnt);
 		}
 		if (opt != NULL)
 			freeenv(opt);
 		freeenv(s);
 	}
 	if (rootdevnames[0] != NULL)
 		sbuf_printf(sb, "%s\n", rootdevnames[0]);
 	if (rootdevnames[1] != NULL)
 		sbuf_printf(sb, "%s\n", rootdevnames[1]);
 #ifdef ROOTDEVNAME
 	if (!(boothowto & RB_DFLTROOT))
 		sbuf_printf(sb, "%s\n", ROOTDEVNAME);
 #endif
 	if (!(boothowto & RB_ASKNAME))
 		sbuf_printf(sb, ".ask\n");
 }
 
 static int
 vfs_mountroot_readconf(struct thread *td, struct sbuf *sb)
 {
 	static char buf[128];
 	struct nameidata nd;
 	off_t ofs;
 	ssize_t resid;
 	int error, flags, len;
 
 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, "/.mount.conf", td);
 	flags = FREAD;
 	error = vn_open(&nd, &flags, 0, NULL);
 	if (error)
 		return (error);
 
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	ofs = 0;
 	len = sizeof(buf) - 1;
 	while (1) {
 		error = vn_rdwr(UIO_READ, nd.ni_vp, buf, len, ofs,
 		    UIO_SYSSPACE, IO_NODELOCKED, td->td_ucred,
 		    NOCRED, &resid, td);
 		if (error)
 			break;
 		if (resid == len)
 			break;
 		buf[len - resid] = 0;
 		sbuf_printf(sb, "%s", buf);
 		ofs += len - resid;
 	}
 
 	VOP_UNLOCK(nd.ni_vp);
 	vn_close(nd.ni_vp, FREAD, td->td_ucred, td);
 	return (error);
 }
 
 static void
 vfs_mountroot_wait(void)
 {
 	struct root_hold_token *h;
 	struct timeval lastfail;
 	int curfail;
 
 	TSENTER();
 
 	curfail = 0;
 	while (1) {
 		g_waitidle();
 		mtx_lock(&root_holds_mtx);
 		if (TAILQ_EMPTY(&root_holds)) {
 			mtx_unlock(&root_holds_mtx);
 			break;
 		}
 		if (ppsratecheck(&lastfail, &curfail, 1)) {
 			printf("Root mount waiting for:");
 			TAILQ_FOREACH(h, &root_holds, list)
 				printf(" %s", h->who);
 			printf("\n");
 		}
 		TSWAIT("root mount");
 		msleep(&root_holds, &root_holds_mtx, PZERO | PDROP, "roothold",
 		    hz);
 		TSUNWAIT("root mount");
 	}
 
 	TSEXIT();
 }
 
 static int
 vfs_mountroot_wait_if_neccessary(const char *fs, const char *dev)
 {
 	int delay, timeout;
 
 	/*
 	 * In case of ZFS and NFS we don't have a way to wait for
 	 * specific device.  Also do the wait if the user forced that
 	 * behaviour by setting vfs.root_mount_always_wait=1.
 	 */
 	if (strcmp(fs, "zfs") == 0 || strstr(fs, "nfs") != NULL ||
 	    dev[0] == '\0' || root_mount_always_wait != 0) {
 		vfs_mountroot_wait();
 		return (0);
 	}
 
 	/*
 	 * Otherwise, no point in waiting if the device is already there.
 	 * Note that we must wait for GEOM to finish reconfiguring itself,
 	 * eg for geom_part(4) to finish tasting.
 	 */
 	g_waitidle();
 	if (parse_mount_dev_present(dev))
 		return (0);
 
 	/*
 	 * No luck.  Let's wait.  This code looks weird, but it's that way
 	 * to behave exactly as it used to work before.
 	 */
 	vfs_mountroot_wait();
 	printf("mountroot: waiting for device %s...\n", dev);
 	delay = hz / 10;
 	timeout = root_mount_timeout * hz;
 	do {
 		pause("rmdev", delay);
 		timeout -= delay;
 	} while (timeout > 0 && !parse_mount_dev_present(dev));
 
 	if (timeout <= 0)
 		return (ENODEV);
 
 	return (0);
 }
 
 void
 vfs_mountroot(void)
 {
 	struct mount *mp;
 	struct sbuf *sb;
 	struct thread *td;
 	time_t timebase;
 	int error;
 
 	mtx_assert(&Giant, MA_NOTOWNED);
 
 	TSENTER();
 
 	td = curthread;
 
 	sb = sbuf_new_auto();
 	vfs_mountroot_conf0(sb);
 	sbuf_finish(sb);
 
 	error = vfs_mountroot_devfs(td, &mp);
 	while (!error) {
 		error = vfs_mountroot_parse(sb, mp);
 		if (!error) {
 			vfs_mountroot_shuffle(td, mp);
 			sbuf_clear(sb);
 			error = vfs_mountroot_readconf(td, sb);
 			sbuf_finish(sb);
 		}
 	}
 
 	sbuf_delete(sb);
 
 	/*
 	 * Iterate over all currently mounted file systems and use
 	 * the time stamp found to check and/or initialize the RTC.
 	 * Call inittodr() only once and pass it the largest of the
 	 * timestamps we encounter.
 	 */
 	timebase = 0;
 	mtx_lock(&mountlist_mtx);
 	mp = TAILQ_FIRST(&mountlist);
 	while (mp != NULL) {
 		if (mp->mnt_time > timebase)
 			timebase = mp->mnt_time;
 		mp = TAILQ_NEXT(mp, mnt_list);
 	}
 	mtx_unlock(&mountlist_mtx);
 	inittodr(timebase);
 
 	/* Keep prison0's root in sync with the global rootvnode. */
 	mtx_lock(&prison0.pr_mtx);
 	prison0.pr_root = rootvnode;
 	vref(prison0.pr_root);
 	mtx_unlock(&prison0.pr_mtx);
 
 	mtx_lock(&root_holds_mtx);
 	atomic_store_rel_int(&root_mount_complete, 1);
 	wakeup(&root_mount_complete);
 	mtx_unlock(&root_holds_mtx);
 
 	EVENTHANDLER_INVOKE(mountroot);
 
 	TSEXIT();
 }
 
 static struct mntarg *
 parse_mountroot_options(struct mntarg *ma, const char *options)
 {
 	char *p;
 	char *name, *name_arg;
 	char *val, *val_arg;
 	char *opts;
 
 	if (options == NULL || options[0] == '\0')
 		return (ma);
 
 	p = opts = strdup(options, M_MOUNT);
 	if (opts == NULL) {
 		return (ma);
 	}
 
 	while((name = strsep(&p, ",")) != NULL) {
 		if (name[0] == '\0')
 			break;
 
 		val = strchr(name, '=');
 		if (val != NULL) {
 			*val = '\0';
 			++val;
 		}
-		if( strcmp(name, "rw") == 0 ||
-		    strcmp(name, "noro") == 0) {
+		if (strcmp(name, "rw") == 0 || strcmp(name, "noro") == 0) {
 			/*
 			 * The first time we mount the root file system,
 			 * we need to mount 'ro', so We need to ignore
 			 * 'rw' and 'noro' mount options.
 			 */
 			continue;
 		}
 		name_arg = strdup(name, M_MOUNT);
 		val_arg = NULL;
 		if (val != NULL)
 			val_arg = strdup(val, M_MOUNT);
 
 		ma = mount_arg(ma, name_arg, val_arg,
 		    (val_arg != NULL ? -1 : 0));
 	}
 	free(opts, M_MOUNT);
 	return (ma);
 }
diff --git a/sys/sys/buf_ring.h b/sys/sys/buf_ring.h
index 48c7101aad97..9622503e1f96 100644
--- a/sys/sys/buf_ring.h
+++ b/sys/sys/buf_ring.h
@@ -1,370 +1,370 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2007-2009 Kip Macy <kmacy@freebsd.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  *
  */
 
 #ifndef	_SYS_BUF_RING_H_
 #define	_SYS_BUF_RING_H_
 
 #include <machine/cpu.h>
 
 #ifdef DEBUG_BUFRING
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #endif
 
 struct buf_ring {
 	volatile uint32_t	br_prod_head;
 	volatile uint32_t	br_prod_tail;	
 	int              	br_prod_size;
 	int              	br_prod_mask;
 	uint64_t		br_drops;
 	volatile uint32_t	br_cons_head __aligned(CACHE_LINE_SIZE);
 	volatile uint32_t	br_cons_tail;
 	int		 	br_cons_size;
 	int              	br_cons_mask;
 #ifdef DEBUG_BUFRING
 	struct mtx		*br_lock;
 #endif	
 	void			*br_ring[0] __aligned(CACHE_LINE_SIZE);
 };
 
 /*
  * multi-producer safe lock-free ring buffer enqueue
  *
  */
 static __inline int
 buf_ring_enqueue(struct buf_ring *br, void *buf)
 {
 	uint32_t prod_head, prod_next, cons_tail;
 #ifdef DEBUG_BUFRING
 	int i;
 
 	/*
 	 * Note: It is possible to encounter an mbuf that was removed
 	 * via drbr_peek(), and then re-added via drbr_putback() and
 	 * trigger a spurious panic.
 	 */
 	for (i = br->br_cons_head; i != br->br_prod_head;
 	     i = ((i + 1) & br->br_cons_mask))
-		if(br->br_ring[i] == buf)
+		if (br->br_ring[i] == buf)
 			panic("buf=%p already enqueue at %d prod=%d cons=%d",
 			    buf, i, br->br_prod_tail, br->br_cons_tail);
 #endif	
 	critical_enter();
 	do {
 		prod_head = br->br_prod_head;
 		prod_next = (prod_head + 1) & br->br_prod_mask;
 		cons_tail = br->br_cons_tail;
 
 		if (prod_next == cons_tail) {
 			rmb();
 			if (prod_head == br->br_prod_head &&
 			    cons_tail == br->br_cons_tail) {
 				br->br_drops++;
 				critical_exit();
 				return (ENOBUFS);
 			}
 			continue;
 		}
 	} while (!atomic_cmpset_acq_int(&br->br_prod_head, prod_head, prod_next));
 #ifdef DEBUG_BUFRING
 	if (br->br_ring[prod_head] != NULL)
 		panic("dangling value in enqueue");
 #endif	
 	br->br_ring[prod_head] = buf;
 
 	/*
 	 * If there are other enqueues in progress
 	 * that preceded us, we need to wait for them
 	 * to complete 
 	 */   
 	while (br->br_prod_tail != prod_head)
 		cpu_spinwait();
 	atomic_store_rel_int(&br->br_prod_tail, prod_next);
 	critical_exit();
 	return (0);
 }
 
 /*
  * multi-consumer safe dequeue 
  *
  */
 static __inline void *
 buf_ring_dequeue_mc(struct buf_ring *br)
 {
 	uint32_t cons_head, cons_next;
 	void *buf;
 
 	critical_enter();
 	do {
 		cons_head = br->br_cons_head;
 		cons_next = (cons_head + 1) & br->br_cons_mask;
 
 		if (cons_head == br->br_prod_tail) {
 			critical_exit();
 			return (NULL);
 		}
 	} while (!atomic_cmpset_acq_int(&br->br_cons_head, cons_head, cons_next));
 
 	buf = br->br_ring[cons_head];
 #ifdef DEBUG_BUFRING
 	br->br_ring[cons_head] = NULL;
 #endif
 	/*
 	 * If there are other dequeues in progress
 	 * that preceded us, we need to wait for them
 	 * to complete 
 	 */   
 	while (br->br_cons_tail != cons_head)
 		cpu_spinwait();
 
 	atomic_store_rel_int(&br->br_cons_tail, cons_next);
 	critical_exit();
 
 	return (buf);
 }
 
 /*
  * single-consumer dequeue 
  * use where dequeue is protected by a lock
  * e.g. a network driver's tx queue lock
  */
 static __inline void *
 buf_ring_dequeue_sc(struct buf_ring *br)
 {
 	uint32_t cons_head, cons_next;
 #ifdef PREFETCH_DEFINED
 	uint32_t cons_next_next;
 #endif
 	uint32_t prod_tail;
 	void *buf;
 
 	/*
 	 * This is a workaround to allow using buf_ring on ARM and ARM64.
 	 * ARM64TODO: Fix buf_ring in a generic way.
 	 * REMARKS: It is suspected that br_cons_head does not require
 	 *   load_acq operation, but this change was extensively tested
 	 *   and confirmed it's working. To be reviewed once again in
 	 *   FreeBSD-12.
 	 *
 	 * Preventing following situation:
 
 	 * Core(0) - buf_ring_enqueue()                                       Core(1) - buf_ring_dequeue_sc()
 	 * -----------------------------------------                                       ----------------------------------------------
 	 *
 	 *                                                                                cons_head = br->br_cons_head;
 	 * atomic_cmpset_acq_32(&br->br_prod_head, ...));
 	 *                                                                                buf = br->br_ring[cons_head];     <see <1>>
 	 * br->br_ring[prod_head] = buf;
 	 * atomic_store_rel_32(&br->br_prod_tail, ...);
 	 *                                                                                prod_tail = br->br_prod_tail;
 	 *                                                                                if (cons_head == prod_tail) 
 	 *                                                                                        return (NULL);
 	 *                                                                                <condition is false and code uses invalid(old) buf>`	
 	 *
 	 * <1> Load (on core 1) from br->br_ring[cons_head] can be reordered (speculative readed) by CPU.
 	 */	
 #if defined(__arm__) || defined(__aarch64__)
 	cons_head = atomic_load_acq_32(&br->br_cons_head);
 #else
 	cons_head = br->br_cons_head;
 #endif
 	prod_tail = atomic_load_acq_32(&br->br_prod_tail);
 
 	cons_next = (cons_head + 1) & br->br_cons_mask;
 #ifdef PREFETCH_DEFINED
 	cons_next_next = (cons_head + 2) & br->br_cons_mask;
 #endif
 
 	if (cons_head == prod_tail) 
 		return (NULL);
 
 #ifdef PREFETCH_DEFINED	
 	if (cons_next != prod_tail) {		
 		prefetch(br->br_ring[cons_next]);
 		if (cons_next_next != prod_tail) 
 			prefetch(br->br_ring[cons_next_next]);
 	}
 #endif
 	br->br_cons_head = cons_next;
 	buf = br->br_ring[cons_head];
 
 #ifdef DEBUG_BUFRING
 	br->br_ring[cons_head] = NULL;
 	if (!mtx_owned(br->br_lock))
 		panic("lock not held on single consumer dequeue");
 	if (br->br_cons_tail != cons_head)
 		panic("inconsistent list cons_tail=%d cons_head=%d",
 		    br->br_cons_tail, cons_head);
 #endif
 	br->br_cons_tail = cons_next;
 	return (buf);
 }
 
 /*
  * single-consumer advance after a peek
  * use where it is protected by a lock
  * e.g. a network driver's tx queue lock
  */
 static __inline void
 buf_ring_advance_sc(struct buf_ring *br)
 {
 	uint32_t cons_head, cons_next;
 	uint32_t prod_tail;
 
 	cons_head = br->br_cons_head;
 	prod_tail = br->br_prod_tail;
 
 	cons_next = (cons_head + 1) & br->br_cons_mask;
 	if (cons_head == prod_tail) 
 		return;
 	br->br_cons_head = cons_next;
 #ifdef DEBUG_BUFRING
 	br->br_ring[cons_head] = NULL;
 #endif
 	br->br_cons_tail = cons_next;
 }
 
 /*
  * Used to return a buffer (most likely already there)
  * to the top of the ring. The caller should *not*
  * have used any dequeue to pull it out of the ring
  * but instead should have used the peek() function.
  * This is normally used where the transmit queue
  * of a driver is full, and an mbuf must be returned.
  * Most likely whats in the ring-buffer is what
  * is being put back (since it was not removed), but
  * sometimes the lower transmit function may have
  * done a pullup or other function that will have
  * changed it. As an optimization we always put it
  * back (since jhb says the store is probably cheaper),
  * if we have to do a multi-queue version we will need
  * the compare and an atomic.
  */
 static __inline void
 buf_ring_putback_sc(struct buf_ring *br, void *new)
 {
 	KASSERT(br->br_cons_head != br->br_prod_tail, 
 		("Buf-Ring has none in putback")) ;
 	br->br_ring[br->br_cons_head] = new;
 }
 
 /*
  * return a pointer to the first entry in the ring
  * without modifying it, or NULL if the ring is empty
  * race-prone if not protected by a lock
  */
 static __inline void *
 buf_ring_peek(struct buf_ring *br)
 {
 
 #ifdef DEBUG_BUFRING
 	if ((br->br_lock != NULL) && !mtx_owned(br->br_lock))
 		panic("lock not held on single consumer dequeue");
 #endif	
 	/*
 	 * I believe it is safe to not have a memory barrier
 	 * here because we control cons and tail is worst case
 	 * a lagging indicator so we worst case we might
 	 * return NULL immediately after a buffer has been enqueued
 	 */
 	if (br->br_cons_head == br->br_prod_tail)
 		return (NULL);
 
 	return (br->br_ring[br->br_cons_head]);
 }
 
 static __inline void *
 buf_ring_peek_clear_sc(struct buf_ring *br)
 {
 #ifdef DEBUG_BUFRING
 	void *ret;
 
 	if (!mtx_owned(br->br_lock))
 		panic("lock not held on single consumer dequeue");
 #endif	
 
 	if (br->br_cons_head == br->br_prod_tail)
 		return (NULL);
 
 #if defined(__arm__) || defined(__aarch64__)
 	/*
 	 * The barrier is required there on ARM and ARM64 to ensure, that
 	 * br->br_ring[br->br_cons_head] will not be fetched before the above
 	 * condition is checked.
 	 * Without the barrier, it is possible, that buffer will be fetched
 	 * before the enqueue will put mbuf into br, then, in the meantime, the
 	 * enqueue will update the array and the br_prod_tail, and the
 	 * conditional check will be true, so we will return previously fetched
 	 * (and invalid) buffer.
 	 */
 	atomic_thread_fence_acq();
 #endif
 
 #ifdef DEBUG_BUFRING
 	/*
 	 * Single consumer, i.e. cons_head will not move while we are
 	 * running, so atomic_swap_ptr() is not necessary here.
 	 */
 	ret = br->br_ring[br->br_cons_head];
 	br->br_ring[br->br_cons_head] = NULL;
 	return (ret);
 #else
 	return (br->br_ring[br->br_cons_head]);
 #endif
 }
 
 static __inline int
 buf_ring_full(struct buf_ring *br)
 {
 
 	return (((br->br_prod_head + 1) & br->br_prod_mask) == br->br_cons_tail);
 }
 
 static __inline int
 buf_ring_empty(struct buf_ring *br)
 {
 
 	return (br->br_cons_head == br->br_prod_tail);
 }
 
 static __inline int
 buf_ring_count(struct buf_ring *br)
 {
 
 	return ((br->br_prod_size + br->br_prod_tail - br->br_cons_tail)
 	    & br->br_prod_mask);
 }
 
 struct buf_ring *buf_ring_alloc(int count, struct malloc_type *type, int flags,
     struct mtx *);
 void buf_ring_free(struct buf_ring *br, struct malloc_type *type);
 
 #endif
diff --git a/sys/sys/efi.h b/sys/sys/efi.h
index 5875e87b3595..0c0b52afc81d 100644
--- a/sys/sys/efi.h
+++ b/sys/sys/efi.h
@@ -1,282 +1,282 @@
 /*-
  * Copyright (c) 2004 Marcel Moolenaar
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  *
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef _SYS_EFI_H_
 #define _SYS_EFI_H_
 
 #include <sys/uuid.h>
 #include <machine/efi.h>
 
 #define	EFI_PAGE_SHIFT		12
 #define	EFI_PAGE_SIZE		(1 << EFI_PAGE_SHIFT)
 #define	EFI_PAGE_MASK		(EFI_PAGE_SIZE - 1)
 
 #define	EFI_TABLE_SMBIOS				\
 	{0xeb9d2d31,0x2d88,0x11d3,0x9a,0x16,{0x00,0x90,0x27,0x3f,0xc1,0x4d}}
 #define	EFI_TABLE_SMBIOS3				\
 	{0xf2fd1544,0x9794,0x4a2c,0x99,0x2e,{0xe5,0xbb,0xcf,0x20,0xe3,0x94}}
 
 enum efi_reset {
 	EFI_RESET_COLD = 0,
 	EFI_RESET_WARM = 1,
 	EFI_RESET_SHUTDOWN = 2,
 };
 
 typedef uint16_t	efi_char;
 typedef unsigned long efi_status;
 
 struct efi_cfgtbl {
 	struct uuid	ct_uuid;
 	void		*ct_data;
 };
 
 struct efi_md {
 	uint32_t	md_type;
 #define	EFI_MD_TYPE_NULL	0
 #define	EFI_MD_TYPE_CODE	1	/* Loader text. */
 #define	EFI_MD_TYPE_DATA	2	/* Loader data. */
 #define	EFI_MD_TYPE_BS_CODE	3	/* Boot services text. */
 #define	EFI_MD_TYPE_BS_DATA	4	/* Boot services data. */
 #define	EFI_MD_TYPE_RT_CODE	5	/* Runtime services text. */
 #define	EFI_MD_TYPE_RT_DATA	6	/* Runtime services data. */
 #define	EFI_MD_TYPE_FREE	7	/* Unused/free memory. */
 #define	EFI_MD_TYPE_BAD		8	/* Bad memory */
 #define	EFI_MD_TYPE_RECLAIM	9	/* ACPI reclaimable memory. */
 #define	EFI_MD_TYPE_FIRMWARE	10	/* ACPI NV memory */
 #define	EFI_MD_TYPE_IOMEM	11	/* Memory-mapped I/O. */
 #define	EFI_MD_TYPE_IOPORT	12	/* I/O port space. */
 #define	EFI_MD_TYPE_PALCODE	13	/* PAL */
 #define	EFI_MD_TYPE_PERSISTENT	14	/* Persistent memory. */
 	uint32_t	__pad;
 	uint64_t	md_phys;
 	void		*md_virt;
 	uint64_t	md_pages;
 	uint64_t	md_attr;
 #define	EFI_MD_ATTR_UC		0x0000000000000001UL
 #define	EFI_MD_ATTR_WC		0x0000000000000002UL
 #define	EFI_MD_ATTR_WT		0x0000000000000004UL
 #define	EFI_MD_ATTR_WB		0x0000000000000008UL
 #define	EFI_MD_ATTR_UCE		0x0000000000000010UL
 #define	EFI_MD_ATTR_WP		0x0000000000001000UL
 #define	EFI_MD_ATTR_RP		0x0000000000002000UL
 #define	EFI_MD_ATTR_XP		0x0000000000004000UL
 #define	EFI_MD_ATTR_NV		0x0000000000008000UL
 #define	EFI_MD_ATTR_MORE_RELIABLE \
 				0x0000000000010000UL
 #define	EFI_MD_ATTR_RO		0x0000000000020000UL
 #define	EFI_MD_ATTR_RT		0x8000000000000000UL
 };
 
 #define efi_next_descriptor(ptr, size) \
     ((struct efi_md *)(((uint8_t *)(ptr)) + (size)))
 
 struct efi_tm {
 	uint16_t	tm_year;		/* 1998 - 20XX */
 	uint8_t		tm_mon;			/* 1 - 12 */
 	uint8_t		tm_mday;		/* 1 - 31 */
 	uint8_t		tm_hour;		/* 0 - 23 */
 	uint8_t		tm_min;			/* 0 - 59 */
 	uint8_t		tm_sec;			/* 0 - 59 */
 	uint8_t		__pad1;
 	uint32_t	tm_nsec;		/* 0 - 999,999,999 */
 	int16_t		tm_tz;			/* -1440 to 1440 or 2047 */
 	uint8_t		tm_dst;
 	uint8_t		__pad2;
 };
 
 struct efi_tmcap {
 	uint32_t	tc_res;		/* 1e-6 parts per million */
 	uint32_t	tc_prec;	/* hertz */
 	uint8_t		tc_stz;		/* Set clears sub-second time */
 };
 
 struct efi_tblhdr {
 	uint64_t	th_sig;
 	uint32_t	th_rev;
 	uint32_t	th_hdrsz;
 	uint32_t	th_crc32;
 	uint32_t	__res;
 };
 
 #ifdef _KERNEL
 
 #ifdef EFIABI_ATTR
 struct efi_rt {
 	struct efi_tblhdr rt_hdr;
 	efi_status	(*rt_gettime)(struct efi_tm *, struct efi_tmcap *)
 	    EFIABI_ATTR;
 	efi_status	(*rt_settime)(struct efi_tm *) EFIABI_ATTR;
 	efi_status	(*rt_getwaketime)(uint8_t *, uint8_t *,
 	    struct efi_tm *) EFIABI_ATTR;
 	efi_status	(*rt_setwaketime)(uint8_t, struct efi_tm *)
 	    EFIABI_ATTR;
 	efi_status	(*rt_setvirtual)(u_long, u_long, uint32_t,
 	    struct efi_md *) EFIABI_ATTR;
 	efi_status	(*rt_cvtptr)(u_long, void **) EFIABI_ATTR;
 	efi_status	(*rt_getvar)(efi_char *, struct uuid *, uint32_t *,
 	    u_long *, void *) EFIABI_ATTR;
 	efi_status	(*rt_scanvar)(u_long *, efi_char *, struct uuid *)
 	    EFIABI_ATTR;
 	efi_status	(*rt_setvar)(efi_char *, struct uuid *, uint32_t,
 	    u_long, void *) EFIABI_ATTR;
 	efi_status	(*rt_gethicnt)(uint32_t *) EFIABI_ATTR;
 	efi_status	(*rt_reset)(enum efi_reset, efi_status, u_long,
 	    efi_char *) EFIABI_ATTR;
 };
 #endif
 
 struct efi_systbl {
 	struct efi_tblhdr st_hdr;
 #define	EFI_SYSTBL_SIG	0x5453595320494249UL
 	efi_char	*st_fwvendor;
 	uint32_t	st_fwrev;
 	uint32_t	__pad;
 	void		*st_cin;
 	void		*st_cinif;
 	void		*st_cout;
 	void		*st_coutif;
 	void		*st_cerr;
 	void		*st_cerrif;
 	uint64_t	st_rt;
 	void		*st_bs;
 	u_long		st_entries;
 	uint64_t	st_cfgtbl;
 };
 
 extern vm_paddr_t efi_systbl_phys;
 
 struct efirt_callinfo;
 
 /* Internal MD EFI functions */
 int efi_arch_enter(void);
 void efi_arch_leave(void);
 vm_offset_t efi_phys_to_kva(vm_paddr_t);
 int efi_rt_arch_call(struct efirt_callinfo *);
 bool efi_create_1t1_map(struct efi_md *, int, int);
 void efi_destroy_1t1_map(void);
 
 struct efi_ops {
 	/*
 	 * The EFI calls might be virtualized in some environments, requiring
 	 * FreeBSD to use a different interface (ie: hypercalls) in order to
 	 * access them.
 	 */
 	int	(*rt_ok)(void);
 	int 	(*get_table)(struct uuid *, void **);
 	int 	(*get_time)(struct efi_tm *);
 	int 	(*get_time_capabilities)(struct efi_tmcap *);
 	int	(*reset_system)(enum efi_reset);
 	int 	(*set_time)(struct efi_tm *);
 	int 	(*var_get)(uint16_t *, struct uuid *, uint32_t *, size_t *,
     void *);
 	int 	(*var_nextname)(size_t *, uint16_t *, struct uuid *);
 	int 	(*var_set)(uint16_t *, struct uuid *, uint32_t, size_t, void *);
 };
 extern const struct efi_ops *active_efi_ops;
 
 /* Public MI EFI functions */
 static inline int efi_rt_ok(void)
 {
 
-	if(active_efi_ops->rt_ok == NULL)
+	if (active_efi_ops->rt_ok == NULL)
 		return (ENXIO);
 	return (active_efi_ops->rt_ok());
 }
 
 static inline int efi_get_table(struct uuid *uuid, void **ptr)
 {
 
         if (active_efi_ops->get_table == NULL)
 		return (ENXIO);
 	return (active_efi_ops->get_table(uuid, ptr));
 }
 
 static inline int efi_get_time(struct efi_tm *tm)
 {
 
 	if (active_efi_ops->get_time == NULL)
 		return (ENXIO);
 	return (active_efi_ops->get_time(tm));
 }
 
 static inline int efi_get_time_capabilities(struct efi_tmcap *tmcap)
 {
 
 	if (active_efi_ops->get_time_capabilities == NULL)
 		return (ENXIO);
 	return (active_efi_ops->get_time_capabilities(tmcap));
 }
 
 static inline int efi_reset_system(enum efi_reset type)
 {
 
 	if (active_efi_ops->reset_system == NULL)
 		return (ENXIO);
 	return (active_efi_ops->reset_system(type));
 }
 
 static inline int efi_set_time(struct efi_tm *tm)
 {
 
 	if (active_efi_ops->set_time == NULL)
 		return (ENXIO);
 	return (active_efi_ops->set_time(tm));
 }
 
 static inline int efi_var_get(uint16_t *name, struct uuid *vendor,
     uint32_t *attrib, size_t *datasize, void *data)
 {
 
 	if (active_efi_ops->var_get == NULL)
 		return (ENXIO);
 	return (active_efi_ops->var_get(name, vendor, attrib, datasize, data));
 }
 
 static inline int efi_var_nextname(size_t *namesize, uint16_t *name,
     struct uuid *vendor)
 {
 
 	if (active_efi_ops->var_nextname == NULL)
 		return (ENXIO);
 	return (active_efi_ops->var_nextname(namesize, name, vendor));
 }
 
 static inline int efi_var_set(uint16_t *name, struct uuid *vendor,
     uint32_t attrib, size_t datasize, void *data)
 {
 
 	if (active_efi_ops->var_set == NULL)
 		return (ENXIO);
 	return (active_efi_ops->var_set(name, vendor, attrib, datasize, data));
 }
 
 int efi_status_to_errno(efi_status status);
 
 #endif	/* _KERNEL */
 
 #endif /* _SYS_EFI_H_ */
diff --git a/sys/sys/tree.h b/sys/sys/tree.h
index eb5f244d8a12..bc01e4de910a 100644
--- a/sys/sys/tree.h
+++ b/sys/sys/tree.h
@@ -1,833 +1,833 @@
 /*	$NetBSD: tree.h,v 1.8 2004/03/28 19:38:30 provos Exp $	*/
 /*	$OpenBSD: tree.h,v 1.7 2002/10/17 21:51:54 art Exp $	*/
 /* $FreeBSD$ */
 
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright 2002 Niels Provos <provos@citi.umich.edu>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #ifndef	_SYS_TREE_H_
 #define	_SYS_TREE_H_
 
 #include <sys/cdefs.h>
 
 /*
  * This file defines data structures for different types of trees:
  * splay trees and rank-balanced trees.
  *
  * A splay tree is a self-organizing data structure.  Every operation
  * on the tree causes a splay to happen.  The splay moves the requested
  * node to the root of the tree and partly rebalances it.
  *
  * This has the benefit that request locality causes faster lookups as
  * the requested nodes move to the top of the tree.  On the other hand,
  * every lookup causes memory writes.
  *
  * The Balance Theorem bounds the total access time for m operations
  * and n inserts on an initially empty tree as O((m + n)lg n).  The
  * amortized cost for a sequence of m accesses to a splay tree is O(lg n);
  *
  * A rank-balanced tree is a binary search tree with an integer
  * rank-difference as an attribute of each pointer from parent to child.
  * The sum of the rank-differences on any path from a node down to null is
  * the same, and defines the rank of that node. The rank of the null node
  * is -1.
  *
  * Different additional conditions define different sorts of balanced
  * trees, including "red-black" and "AVL" trees.  The set of conditions
  * applied here are the "weak-AVL" conditions of Haeupler, Sen and Tarjan:
  *	- every rank-difference is 1 or 2.
  *	- the rank of any leaf is 1.
  *
  * For historical reasons, rank differences that are even are associated
  * with the color red (Rank-Even-Difference), and the child that a red edge
  * points to is called a red child.
  *
  * Every operation on a rank-balanced tree is bounded as O(lg n).
  * The maximum height of a rank-balanced tree is 2lg (n+1).
  */
 
 #define SPLAY_HEAD(name, type)						\
 struct name {								\
 	struct type *sph_root; /* root of the tree */			\
 }
 
 #define SPLAY_INITIALIZER(root)						\
 	{ NULL }
 
 #define SPLAY_INIT(root) do {						\
 	(root)->sph_root = NULL;					\
 } while (/*CONSTCOND*/ 0)
 
 #define SPLAY_ENTRY(type)						\
 struct {								\
 	struct type *spe_left; /* left element */			\
 	struct type *spe_right; /* right element */			\
 }
 
 #define SPLAY_LEFT(elm, field)		(elm)->field.spe_left
 #define SPLAY_RIGHT(elm, field)		(elm)->field.spe_right
 #define SPLAY_ROOT(head)		(head)->sph_root
 #define SPLAY_EMPTY(head)		(SPLAY_ROOT(head) == NULL)
 
 /* SPLAY_ROTATE_{LEFT,RIGHT} expect that tmp hold SPLAY_{RIGHT,LEFT} */
 #define SPLAY_ROTATE_RIGHT(head, tmp, field) do {			\
 	SPLAY_LEFT((head)->sph_root, field) = SPLAY_RIGHT(tmp, field);	\
 	SPLAY_RIGHT(tmp, field) = (head)->sph_root;			\
 	(head)->sph_root = tmp;						\
 } while (/*CONSTCOND*/ 0)
 
 #define SPLAY_ROTATE_LEFT(head, tmp, field) do {			\
 	SPLAY_RIGHT((head)->sph_root, field) = SPLAY_LEFT(tmp, field);	\
 	SPLAY_LEFT(tmp, field) = (head)->sph_root;			\
 	(head)->sph_root = tmp;						\
 } while (/*CONSTCOND*/ 0)
 
 #define SPLAY_LINKLEFT(head, tmp, field) do {				\
 	SPLAY_LEFT(tmp, field) = (head)->sph_root;			\
 	tmp = (head)->sph_root;						\
 	(head)->sph_root = SPLAY_LEFT((head)->sph_root, field);		\
 } while (/*CONSTCOND*/ 0)
 
 #define SPLAY_LINKRIGHT(head, tmp, field) do {				\
 	SPLAY_RIGHT(tmp, field) = (head)->sph_root;			\
 	tmp = (head)->sph_root;						\
 	(head)->sph_root = SPLAY_RIGHT((head)->sph_root, field);	\
 } while (/*CONSTCOND*/ 0)
 
 #define SPLAY_ASSEMBLE(head, node, left, right, field) do {		\
 	SPLAY_RIGHT(left, field) = SPLAY_LEFT((head)->sph_root, field);	\
 	SPLAY_LEFT(right, field) = SPLAY_RIGHT((head)->sph_root, field);\
 	SPLAY_LEFT((head)->sph_root, field) = SPLAY_RIGHT(node, field);	\
 	SPLAY_RIGHT((head)->sph_root, field) = SPLAY_LEFT(node, field);	\
 } while (/*CONSTCOND*/ 0)
 
 /* Generates prototypes and inline functions */
 
 #define SPLAY_PROTOTYPE(name, type, field, cmp)				\
 void name##_SPLAY(struct name *, struct type *);			\
 void name##_SPLAY_MINMAX(struct name *, int);				\
 struct type *name##_SPLAY_INSERT(struct name *, struct type *);		\
 struct type *name##_SPLAY_REMOVE(struct name *, struct type *);		\
 									\
 /* Finds the node with the same key as elm */				\
 static __unused __inline struct type *					\
 name##_SPLAY_FIND(struct name *head, struct type *elm)			\
 {									\
 	if (SPLAY_EMPTY(head))						\
 		return(NULL);						\
 	name##_SPLAY(head, elm);					\
 	if ((cmp)(elm, (head)->sph_root) == 0)				\
 		return (head->sph_root);				\
 	return (NULL);							\
 }									\
 									\
 static __unused __inline struct type *					\
 name##_SPLAY_NEXT(struct name *head, struct type *elm)			\
 {									\
 	name##_SPLAY(head, elm);					\
 	if (SPLAY_RIGHT(elm, field) != NULL) {				\
 		elm = SPLAY_RIGHT(elm, field);				\
 		while (SPLAY_LEFT(elm, field) != NULL) {		\
 			elm = SPLAY_LEFT(elm, field);			\
 		}							\
 	} else								\
 		elm = NULL;						\
 	return (elm);							\
 }									\
 									\
 static __unused __inline struct type *					\
 name##_SPLAY_MIN_MAX(struct name *head, int val)			\
 {									\
 	name##_SPLAY_MINMAX(head, val);					\
         return (SPLAY_ROOT(head));					\
 }
 
 /* Main splay operation.
  * Moves node close to the key of elm to top
  */
 #define SPLAY_GENERATE(name, type, field, cmp)				\
 struct type *								\
 name##_SPLAY_INSERT(struct name *head, struct type *elm)		\
 {									\
     if (SPLAY_EMPTY(head)) {						\
 	    SPLAY_LEFT(elm, field) = SPLAY_RIGHT(elm, field) = NULL;	\
     } else {								\
 	    int __comp;							\
 	    name##_SPLAY(head, elm);					\
 	    __comp = (cmp)(elm, (head)->sph_root);			\
-	    if(__comp < 0) {						\
+	    if (__comp < 0) {						\
 		    SPLAY_LEFT(elm, field) = SPLAY_LEFT((head)->sph_root, field);\
 		    SPLAY_RIGHT(elm, field) = (head)->sph_root;		\
 		    SPLAY_LEFT((head)->sph_root, field) = NULL;		\
 	    } else if (__comp > 0) {					\
 		    SPLAY_RIGHT(elm, field) = SPLAY_RIGHT((head)->sph_root, field);\
 		    SPLAY_LEFT(elm, field) = (head)->sph_root;		\
 		    SPLAY_RIGHT((head)->sph_root, field) = NULL;	\
 	    } else							\
 		    return ((head)->sph_root);				\
     }									\
     (head)->sph_root = (elm);						\
     return (NULL);							\
 }									\
 									\
 struct type *								\
 name##_SPLAY_REMOVE(struct name *head, struct type *elm)		\
 {									\
 	struct type *__tmp;						\
 	if (SPLAY_EMPTY(head))						\
 		return (NULL);						\
 	name##_SPLAY(head, elm);					\
 	if ((cmp)(elm, (head)->sph_root) == 0) {			\
 		if (SPLAY_LEFT((head)->sph_root, field) == NULL) {	\
 			(head)->sph_root = SPLAY_RIGHT((head)->sph_root, field);\
 		} else {						\
 			__tmp = SPLAY_RIGHT((head)->sph_root, field);	\
 			(head)->sph_root = SPLAY_LEFT((head)->sph_root, field);\
 			name##_SPLAY(head, elm);			\
 			SPLAY_RIGHT((head)->sph_root, field) = __tmp;	\
 		}							\
 		return (elm);						\
 	}								\
 	return (NULL);							\
 }									\
 									\
 void									\
 name##_SPLAY(struct name *head, struct type *elm)			\
 {									\
 	struct type __node, *__left, *__right, *__tmp;			\
 	int __comp;							\
 \
 	SPLAY_LEFT(&__node, field) = SPLAY_RIGHT(&__node, field) = NULL;\
 	__left = __right = &__node;					\
 \
 	while ((__comp = (cmp)(elm, (head)->sph_root)) != 0) {		\
 		if (__comp < 0) {					\
 			__tmp = SPLAY_LEFT((head)->sph_root, field);	\
 			if (__tmp == NULL)				\
 				break;					\
 			if ((cmp)(elm, __tmp) < 0){			\
 				SPLAY_ROTATE_RIGHT(head, __tmp, field);	\
 				if (SPLAY_LEFT((head)->sph_root, field) == NULL)\
 					break;				\
 			}						\
 			SPLAY_LINKLEFT(head, __right, field);		\
 		} else if (__comp > 0) {				\
 			__tmp = SPLAY_RIGHT((head)->sph_root, field);	\
 			if (__tmp == NULL)				\
 				break;					\
 			if ((cmp)(elm, __tmp) > 0){			\
 				SPLAY_ROTATE_LEFT(head, __tmp, field);	\
 				if (SPLAY_RIGHT((head)->sph_root, field) == NULL)\
 					break;				\
 			}						\
 			SPLAY_LINKRIGHT(head, __left, field);		\
 		}							\
 	}								\
 	SPLAY_ASSEMBLE(head, &__node, __left, __right, field);		\
 }									\
 									\
 /* Splay with either the minimum or the maximum element			\
  * Used to find minimum or maximum element in tree.			\
  */									\
 void name##_SPLAY_MINMAX(struct name *head, int __comp) \
 {									\
 	struct type __node, *__left, *__right, *__tmp;			\
 \
 	SPLAY_LEFT(&__node, field) = SPLAY_RIGHT(&__node, field) = NULL;\
 	__left = __right = &__node;					\
 \
 	while (1) {							\
 		if (__comp < 0) {					\
 			__tmp = SPLAY_LEFT((head)->sph_root, field);	\
 			if (__tmp == NULL)				\
 				break;					\
 			if (__comp < 0){				\
 				SPLAY_ROTATE_RIGHT(head, __tmp, field);	\
 				if (SPLAY_LEFT((head)->sph_root, field) == NULL)\
 					break;				\
 			}						\
 			SPLAY_LINKLEFT(head, __right, field);		\
 		} else if (__comp > 0) {				\
 			__tmp = SPLAY_RIGHT((head)->sph_root, field);	\
 			if (__tmp == NULL)				\
 				break;					\
 			if (__comp > 0) {				\
 				SPLAY_ROTATE_LEFT(head, __tmp, field);	\
 				if (SPLAY_RIGHT((head)->sph_root, field) == NULL)\
 					break;				\
 			}						\
 			SPLAY_LINKRIGHT(head, __left, field);		\
 		}							\
 	}								\
 	SPLAY_ASSEMBLE(head, &__node, __left, __right, field);		\
 }
 
 #define SPLAY_NEGINF	-1
 #define SPLAY_INF	1
 
 #define SPLAY_INSERT(name, x, y)	name##_SPLAY_INSERT(x, y)
 #define SPLAY_REMOVE(name, x, y)	name##_SPLAY_REMOVE(x, y)
 #define SPLAY_FIND(name, x, y)		name##_SPLAY_FIND(x, y)
 #define SPLAY_NEXT(name, x, y)		name##_SPLAY_NEXT(x, y)
 #define SPLAY_MIN(name, x)		(SPLAY_EMPTY(x) ? NULL	\
 					: name##_SPLAY_MIN_MAX(x, SPLAY_NEGINF))
 #define SPLAY_MAX(name, x)		(SPLAY_EMPTY(x) ? NULL	\
 					: name##_SPLAY_MIN_MAX(x, SPLAY_INF))
 
 #define SPLAY_FOREACH(x, name, head)					\
 	for ((x) = SPLAY_MIN(name, head);				\
 	     (x) != NULL;						\
 	     (x) = SPLAY_NEXT(name, head, x))
 
 /* Macros that define a rank-balanced tree */
 #define RB_HEAD(name, type)						\
 struct name {								\
 	struct type *rbh_root; /* root of the tree */			\
 }
 
 #define RB_INITIALIZER(root)						\
 	{ NULL }
 
 #define RB_INIT(root) do {						\
 	(root)->rbh_root = NULL;					\
 } while (/*CONSTCOND*/ 0)
 
 #define RB_ENTRY(type)							\
 struct {								\
 	struct type *rbe_left;		/* left element */		\
 	struct type *rbe_right;		/* right element */		\
 	struct type *rbe_parent;	/* parent element */		\
 }
 
 #define RB_LEFT(elm, field)		(elm)->field.rbe_left
 #define RB_RIGHT(elm, field)		(elm)->field.rbe_right
 
 /*
  * With the expectation that any object of struct type has an
  * address that is a multiple of 4, and that therefore the
  * 2 least significant bits of a pointer to struct type are
  * always zero, this implementation sets those bits to indicate
  * that the left or right child of the tree node is "red".
  */
 #define RB_UP(elm, field)		(elm)->field.rbe_parent
 #define RB_BITS(elm, field)		(*(__uintptr_t *)&RB_UP(elm, field))
 #define RB_RED_L			((__uintptr_t)1)
 #define RB_RED_R			((__uintptr_t)2)
 #define RB_RED_MASK			((__uintptr_t)3)
 #define RB_FLIP_LEFT(elm, field)	(RB_BITS(elm, field) ^= RB_RED_L)
 #define RB_FLIP_RIGHT(elm, field)	(RB_BITS(elm, field) ^= RB_RED_R)
 #define RB_RED_LEFT(elm, field)		((RB_BITS(elm, field) & RB_RED_L) != 0)
 #define RB_RED_RIGHT(elm, field)	((RB_BITS(elm, field) & RB_RED_R) != 0)
 #define RB_PARENT(elm, field)		((__typeof(RB_UP(elm, field)))	\
 					 (RB_BITS(elm, field) & ~RB_RED_MASK))
 #define RB_ROOT(head)			(head)->rbh_root
 #define RB_EMPTY(head)			(RB_ROOT(head) == NULL)
 
 #define RB_SET_PARENT(dst, src, field) do {				\
 	RB_BITS(dst, field) &= RB_RED_MASK;				\
 	RB_BITS(dst, field) |= (__uintptr_t)src;			\
 } while (/*CONSTCOND*/ 0)
 
 #define RB_SET(elm, parent, field) do {					\
 	RB_UP(elm, field) = parent;					\
 	RB_LEFT(elm, field) = RB_RIGHT(elm, field) = NULL;		\
 } while (/*CONSTCOND*/ 0)
 
 #define RB_COLOR(elm, field)	(RB_PARENT(elm, field) == NULL ? 0 :	\
 				RB_LEFT(RB_PARENT(elm, field), field) == elm ? \
 				RB_RED_LEFT(RB_PARENT(elm, field), field) : \
 				RB_RED_RIGHT(RB_PARENT(elm, field), field))
 
 /*
  * Something to be invoked in a loop at the root of every modified subtree,
  * from the bottom up to the root, to update augmented node data.
  */
 #ifndef RB_AUGMENT
 #define RB_AUGMENT(x)	break
 #endif
 
 #define RB_SWAP_CHILD(head, out, in, field) do {			\
 	if (RB_PARENT(out, field) == NULL)				\
 		RB_ROOT(head) = (in);					\
 	else if ((out) == RB_LEFT(RB_PARENT(out, field), field))	\
 		RB_LEFT(RB_PARENT(out, field), field) = (in);		\
 	else								\
 		RB_RIGHT(RB_PARENT(out, field), field) = (in);		\
 } while (/*CONSTCOND*/ 0)
 
 #define RB_ROTATE_LEFT(head, elm, tmp, field) do {			\
 	(tmp) = RB_RIGHT(elm, field);					\
 	if ((RB_RIGHT(elm, field) = RB_LEFT(tmp, field)) != NULL) {	\
 		RB_SET_PARENT(RB_RIGHT(elm, field), elm, field);	\
 	}								\
 	RB_SET_PARENT(tmp, RB_PARENT(elm, field), field);		\
 	RB_SWAP_CHILD(head, elm, tmp, field);				\
 	RB_LEFT(tmp, field) = (elm);					\
 	RB_SET_PARENT(elm, tmp, field);					\
 	RB_AUGMENT(elm);						\
 } while (/*CONSTCOND*/ 0)
 
 #define RB_ROTATE_RIGHT(head, elm, tmp, field) do {			\
 	(tmp) = RB_LEFT(elm, field);					\
 	if ((RB_LEFT(elm, field) = RB_RIGHT(tmp, field)) != NULL) {	\
 		RB_SET_PARENT(RB_LEFT(elm, field), elm, field);		\
 	}								\
 	RB_SET_PARENT(tmp, RB_PARENT(elm, field), field);		\
 	RB_SWAP_CHILD(head, elm, tmp, field);				\
 	RB_RIGHT(tmp, field) = (elm);					\
 	RB_SET_PARENT(elm, tmp, field);					\
 	RB_AUGMENT(elm);						\
 } while (/*CONSTCOND*/ 0)
 
 /* Generates prototypes and inline functions */
 #define	RB_PROTOTYPE(name, type, field, cmp)				\
 	RB_PROTOTYPE_INTERNAL(name, type, field, cmp,)
 #define	RB_PROTOTYPE_STATIC(name, type, field, cmp)			\
 	RB_PROTOTYPE_INTERNAL(name, type, field, cmp, __unused static)
 #define RB_PROTOTYPE_INTERNAL(name, type, field, cmp, attr)		\
 	RB_PROTOTYPE_INSERT_COLOR(name, type, attr);			\
 	RB_PROTOTYPE_REMOVE_COLOR(name, type, attr);			\
 	RB_PROTOTYPE_INSERT(name, type, attr);				\
 	RB_PROTOTYPE_REMOVE(name, type, attr);				\
 	RB_PROTOTYPE_FIND(name, type, attr);				\
 	RB_PROTOTYPE_NFIND(name, type, attr);				\
 	RB_PROTOTYPE_NEXT(name, type, attr);				\
 	RB_PROTOTYPE_PREV(name, type, attr);				\
 	RB_PROTOTYPE_MINMAX(name, type, attr);				\
 	RB_PROTOTYPE_REINSERT(name, type, attr);
 #define RB_PROTOTYPE_INSERT_COLOR(name, type, attr)			\
 	attr void name##_RB_INSERT_COLOR(struct name *, struct type *)
 #define RB_PROTOTYPE_REMOVE_COLOR(name, type, attr)			\
 	attr void name##_RB_REMOVE_COLOR(struct name *,			\
 	    struct type *, struct type *)
 #define RB_PROTOTYPE_REMOVE(name, type, attr)				\
 	attr struct type *name##_RB_REMOVE(struct name *, struct type *)
 #define RB_PROTOTYPE_INSERT(name, type, attr)				\
 	attr struct type *name##_RB_INSERT(struct name *, struct type *)
 #define RB_PROTOTYPE_FIND(name, type, attr)				\
 	attr struct type *name##_RB_FIND(struct name *, struct type *)
 #define RB_PROTOTYPE_NFIND(name, type, attr)				\
 	attr struct type *name##_RB_NFIND(struct name *, struct type *)
 #define RB_PROTOTYPE_NEXT(name, type, attr)				\
 	attr struct type *name##_RB_NEXT(struct type *)
 #define RB_PROTOTYPE_PREV(name, type, attr)				\
 	attr struct type *name##_RB_PREV(struct type *)
 #define RB_PROTOTYPE_MINMAX(name, type, attr)				\
 	attr struct type *name##_RB_MINMAX(struct name *, int)
 #define RB_PROTOTYPE_REINSERT(name, type, attr)			\
 	attr struct type *name##_RB_REINSERT(struct name *, struct type *)
 
 /* Main rb operation.
  * Moves node close to the key of elm to top
  */
 #define	RB_GENERATE(name, type, field, cmp)				\
 	RB_GENERATE_INTERNAL(name, type, field, cmp,)
 #define	RB_GENERATE_STATIC(name, type, field, cmp)			\
 	RB_GENERATE_INTERNAL(name, type, field, cmp, __unused static)
 #define RB_GENERATE_INTERNAL(name, type, field, cmp, attr)		\
 	RB_GENERATE_INSERT_COLOR(name, type, field, attr)		\
 	RB_GENERATE_REMOVE_COLOR(name, type, field, attr)		\
 	RB_GENERATE_INSERT(name, type, field, cmp, attr)		\
 	RB_GENERATE_REMOVE(name, type, field, attr)			\
 	RB_GENERATE_FIND(name, type, field, cmp, attr)			\
 	RB_GENERATE_NFIND(name, type, field, cmp, attr)			\
 	RB_GENERATE_NEXT(name, type, field, attr)			\
 	RB_GENERATE_PREV(name, type, field, attr)			\
 	RB_GENERATE_MINMAX(name, type, field, attr)			\
 	RB_GENERATE_REINSERT(name, type, field, cmp, attr)
 
 #define RB_GENERATE_INSERT_COLOR(name, type, field, attr)		\
 attr void								\
 name##_RB_INSERT_COLOR(struct name *head, struct type *elm)		\
 {									\
 	struct type *child, *parent;					\
 	while ((parent = RB_PARENT(elm, field)) != NULL) {		\
 		if (RB_LEFT(parent, field) == elm) {			\
 			if (RB_RED_LEFT(parent, field)) {		\
 				RB_FLIP_LEFT(parent, field);		\
 				return;					\
 			}						\
 			RB_FLIP_RIGHT(parent, field);			\
 			if (RB_RED_RIGHT(parent, field)) {		\
 				elm = parent;				\
 				continue;				\
 			}						\
 			if (!RB_RED_RIGHT(elm, field)) {		\
 				RB_FLIP_LEFT(elm, field);		\
 				RB_ROTATE_LEFT(head, elm, child, field);\
 				if (RB_RED_LEFT(child, field))		\
 					RB_FLIP_RIGHT(elm, field);	\
 				else if (RB_RED_RIGHT(child, field))	\
 					RB_FLIP_LEFT(parent, field);	\
 				elm = child;				\
 			}						\
 			RB_ROTATE_RIGHT(head, parent, elm, field);	\
 		} else {						\
 			if (RB_RED_RIGHT(parent, field)) {		\
 				RB_FLIP_RIGHT(parent, field);		\
 				return;					\
 			}						\
 			RB_FLIP_LEFT(parent, field);			\
 			if (RB_RED_LEFT(parent, field)) {		\
 				elm = parent;				\
 				continue;				\
 			}						\
 			if (!RB_RED_LEFT(elm, field)) {			\
 				RB_FLIP_RIGHT(elm, field);		\
 				RB_ROTATE_RIGHT(head, elm, child, field);\
 				if (RB_RED_RIGHT(child, field))		\
 					RB_FLIP_LEFT(elm, field);	\
 				else if (RB_RED_LEFT(child, field))	\
 					RB_FLIP_RIGHT(parent, field);	\
 				elm = child;				\
 			}						\
 			RB_ROTATE_LEFT(head, parent, elm, field);	\
 		}							\
 		RB_BITS(elm, field) &= ~RB_RED_MASK;			\
 		break;							\
 	}								\
 }
 
 #define RB_GENERATE_REMOVE_COLOR(name, type, field, attr)		\
 attr void								\
 name##_RB_REMOVE_COLOR(struct name *head,				\
     struct type *parent, struct type *elm)				\
 {									\
 	struct type *sib;						\
 	if (RB_LEFT(parent, field) == elm &&				\
 	    RB_RIGHT(parent, field) == elm) {				\
 		RB_BITS(parent, field) &= ~RB_RED_MASK;			\
 		elm = parent;						\
 		parent = RB_PARENT(elm, field);				\
 		if (parent == NULL)					\
 			return;						\
 	}								\
 	do  {								\
 		if (RB_LEFT(parent, field) == elm) {			\
 			if (!RB_RED_LEFT(parent, field)) {		\
 				RB_FLIP_LEFT(parent, field);		\
 				return;					\
 			}						\
 			if (RB_RED_RIGHT(parent, field)) {		\
 				RB_FLIP_RIGHT(parent, field);		\
 				elm = parent;				\
 				continue;				\
 			}						\
 			sib = RB_RIGHT(parent, field);			\
 			if ((~RB_BITS(sib, field) & RB_RED_MASK) == 0) {\
 				RB_BITS(sib, field) &= ~RB_RED_MASK;	\
 				elm = parent;				\
 				continue;				\
 			}						\
 			RB_FLIP_RIGHT(sib, field);			\
 			if (RB_RED_LEFT(sib, field))			\
 				RB_FLIP_LEFT(parent, field);		\
 			else if (!RB_RED_RIGHT(sib, field)) {		\
 				RB_FLIP_LEFT(parent, field);		\
 				RB_ROTATE_RIGHT(head, sib, elm, field);	\
 				if (RB_RED_RIGHT(elm, field))		\
 					RB_FLIP_LEFT(sib, field);	\
 				if (RB_RED_LEFT(elm, field))		\
 					RB_FLIP_RIGHT(parent, field);	\
 				RB_BITS(elm, field) |= RB_RED_MASK;	\
 				sib = elm;				\
 			}						\
 			RB_ROTATE_LEFT(head, parent, sib, field);	\
 		} else {						\
 			if (!RB_RED_RIGHT(parent, field)) {		\
 				RB_FLIP_RIGHT(parent, field);		\
 				return;					\
 			}						\
 			if (RB_RED_LEFT(parent, field)) {		\
 				RB_FLIP_LEFT(parent, field);		\
 				elm = parent;				\
 				continue;				\
 			}						\
 			sib = RB_LEFT(parent, field);			\
 			if ((~RB_BITS(sib, field) & RB_RED_MASK) == 0) {\
 				RB_BITS(sib, field) &= ~RB_RED_MASK;	\
 				elm = parent;				\
 				continue;				\
 			}						\
 			RB_FLIP_LEFT(sib, field);			\
 			if (RB_RED_RIGHT(sib, field))			\
 				RB_FLIP_RIGHT(parent, field);		\
 			else if (!RB_RED_LEFT(sib, field)) {		\
 				RB_FLIP_RIGHT(parent, field);		\
 				RB_ROTATE_LEFT(head, sib, elm, field);	\
 				if (RB_RED_LEFT(elm, field))		\
 					RB_FLIP_RIGHT(sib, field);	\
 				if (RB_RED_RIGHT(elm, field))		\
 					RB_FLIP_LEFT(parent, field);	\
 				RB_BITS(elm, field) |= RB_RED_MASK;	\
 				sib = elm;				\
 			}						\
 			RB_ROTATE_RIGHT(head, parent, sib, field);	\
 		}							\
 		break;							\
 	} while ((parent = RB_PARENT(elm, field)) != NULL);		\
 }
 
 #define RB_GENERATE_REMOVE(name, type, field, attr)			\
 attr struct type *							\
 name##_RB_REMOVE(struct name *head, struct type *elm)			\
 {									\
 	struct type *child, *old, *parent, *right;			\
 									\
 	old = elm;							\
 	parent = RB_PARENT(elm, field);					\
 	right = RB_RIGHT(elm, field);					\
 	if (RB_LEFT(elm, field) == NULL)				\
 		elm = child = right;					\
 	else if (right == NULL)						\
 		elm = child = RB_LEFT(elm, field);			\
 	else {								\
 		if ((child = RB_LEFT(right, field)) == NULL) {		\
 			child = RB_RIGHT(right, field);			\
 			RB_RIGHT(old, field) = child;			\
 			parent = elm = right;				\
 		} else {						\
 			do						\
 				elm = child;				\
 			while ((child = RB_LEFT(elm, field)) != NULL);	\
 			child = RB_RIGHT(elm, field);			\
 			parent = RB_PARENT(elm, field);			\
 			RB_LEFT(parent, field) = child;			\
 			RB_SET_PARENT(RB_RIGHT(old, field), elm, field);\
 		}							\
 		RB_SET_PARENT(RB_LEFT(old, field), elm, field);		\
 		elm->field = old->field;				\
 	}								\
 	RB_SWAP_CHILD(head, old, elm, field);				\
 	if (child != NULL)						\
 		RB_SET_PARENT(child, parent, field);			\
 	if (parent != NULL)						\
 		name##_RB_REMOVE_COLOR(head, parent, child);		\
 	while (parent != NULL) {					\
 		RB_AUGMENT(parent);					\
 		parent = RB_PARENT(parent, field);			\
 	}								\
 	return (old);							\
 }
 
 #define RB_GENERATE_INSERT(name, type, field, cmp, attr)		\
 /* Inserts a node into the RB tree */					\
 attr struct type *							\
 name##_RB_INSERT(struct name *head, struct type *elm)			\
 {									\
 	struct type *tmp;						\
 	struct type *parent = NULL;					\
 	int comp = 0;							\
 	tmp = RB_ROOT(head);						\
 	while (tmp) {							\
 		parent = tmp;						\
 		comp = (cmp)(elm, parent);				\
 		if (comp < 0)						\
 			tmp = RB_LEFT(tmp, field);			\
 		else if (comp > 0)					\
 			tmp = RB_RIGHT(tmp, field);			\
 		else							\
 			return (tmp);					\
 	}								\
 	RB_SET(elm, parent, field);					\
 	if (parent == NULL)						\
 		RB_ROOT(head) = elm;					\
 	else if (comp < 0)						\
 		RB_LEFT(parent, field) = elm;				\
 	else								\
 		RB_RIGHT(parent, field) = elm;				\
 	name##_RB_INSERT_COLOR(head, elm);				\
 	while (elm != NULL) {						\
 		RB_AUGMENT(elm);					\
 		elm = RB_PARENT(elm, field);				\
 	}								\
 	return (NULL);							\
 }
 
 #define RB_GENERATE_FIND(name, type, field, cmp, attr)			\
 /* Finds the node with the same key as elm */				\
 attr struct type *							\
 name##_RB_FIND(struct name *head, struct type *elm)			\
 {									\
 	struct type *tmp = RB_ROOT(head);				\
 	int comp;							\
 	while (tmp) {							\
 		comp = cmp(elm, tmp);					\
 		if (comp < 0)						\
 			tmp = RB_LEFT(tmp, field);			\
 		else if (comp > 0)					\
 			tmp = RB_RIGHT(tmp, field);			\
 		else							\
 			return (tmp);					\
 	}								\
 	return (NULL);							\
 }
 
 #define RB_GENERATE_NFIND(name, type, field, cmp, attr)			\
 /* Finds the first node greater than or equal to the search key */	\
 attr struct type *							\
 name##_RB_NFIND(struct name *head, struct type *elm)			\
 {									\
 	struct type *tmp = RB_ROOT(head);				\
 	struct type *res = NULL;					\
 	int comp;							\
 	while (tmp) {							\
 		comp = cmp(elm, tmp);					\
 		if (comp < 0) {						\
 			res = tmp;					\
 			tmp = RB_LEFT(tmp, field);			\
 		}							\
 		else if (comp > 0)					\
 			tmp = RB_RIGHT(tmp, field);			\
 		else							\
 			return (tmp);					\
 	}								\
 	return (res);							\
 }
 
 #define RB_GENERATE_NEXT(name, type, field, attr)			\
 /* ARGSUSED */								\
 attr struct type *							\
 name##_RB_NEXT(struct type *elm)					\
 {									\
 	if (RB_RIGHT(elm, field)) {					\
 		elm = RB_RIGHT(elm, field);				\
 		while (RB_LEFT(elm, field))				\
 			elm = RB_LEFT(elm, field);			\
 	} else {							\
 		if (RB_PARENT(elm, field) &&				\
 		    (elm == RB_LEFT(RB_PARENT(elm, field), field)))	\
 			elm = RB_PARENT(elm, field);			\
 		else {							\
 			while (RB_PARENT(elm, field) &&			\
 			    (elm == RB_RIGHT(RB_PARENT(elm, field), field)))\
 				elm = RB_PARENT(elm, field);		\
 			elm = RB_PARENT(elm, field);			\
 		}							\
 	}								\
 	return (elm);							\
 }
 
 #define RB_GENERATE_PREV(name, type, field, attr)			\
 /* ARGSUSED */								\
 attr struct type *							\
 name##_RB_PREV(struct type *elm)					\
 {									\
 	if (RB_LEFT(elm, field)) {					\
 		elm = RB_LEFT(elm, field);				\
 		while (RB_RIGHT(elm, field))				\
 			elm = RB_RIGHT(elm, field);			\
 	} else {							\
 		if (RB_PARENT(elm, field) &&				\
 		    (elm == RB_RIGHT(RB_PARENT(elm, field), field)))	\
 			elm = RB_PARENT(elm, field);			\
 		else {							\
 			while (RB_PARENT(elm, field) &&			\
 			    (elm == RB_LEFT(RB_PARENT(elm, field), field)))\
 				elm = RB_PARENT(elm, field);		\
 			elm = RB_PARENT(elm, field);			\
 		}							\
 	}								\
 	return (elm);							\
 }
 
 #define RB_GENERATE_MINMAX(name, type, field, attr)			\
 attr struct type *							\
 name##_RB_MINMAX(struct name *head, int val)				\
 {									\
 	struct type *tmp = RB_ROOT(head);				\
 	struct type *parent = NULL;					\
 	while (tmp) {							\
 		parent = tmp;						\
 		if (val < 0)						\
 			tmp = RB_LEFT(tmp, field);			\
 		else							\
 			tmp = RB_RIGHT(tmp, field);			\
 	}								\
 	return (parent);						\
 }
 
 #define	RB_GENERATE_REINSERT(name, type, field, cmp, attr)		\
 attr struct type *							\
 name##_RB_REINSERT(struct name *head, struct type *elm)			\
 {									\
 	struct type *cmpelm;						\
 	if (((cmpelm = RB_PREV(name, head, elm)) != NULL &&		\
 	    cmp(cmpelm, elm) >= 0) ||					\
 	    ((cmpelm = RB_NEXT(name, head, elm)) != NULL &&		\
 	    cmp(elm, cmpelm) >= 0)) {					\
 		/* XXXLAS: Remove/insert is heavy handed. */		\
 		RB_REMOVE(name, head, elm);				\
 		return (RB_INSERT(name, head, elm));			\
 	}								\
 	return (NULL);							\
 }									\
 
 #define RB_NEGINF	-1
 #define RB_INF	1
 
 #define RB_INSERT(name, x, y)	name##_RB_INSERT(x, y)
 #define RB_REMOVE(name, x, y)	name##_RB_REMOVE(x, y)
 #define RB_FIND(name, x, y)	name##_RB_FIND(x, y)
 #define RB_NFIND(name, x, y)	name##_RB_NFIND(x, y)
 #define RB_NEXT(name, x, y)	name##_RB_NEXT(y)
 #define RB_PREV(name, x, y)	name##_RB_PREV(y)
 #define RB_MIN(name, x)		name##_RB_MINMAX(x, RB_NEGINF)
 #define RB_MAX(name, x)		name##_RB_MINMAX(x, RB_INF)
 #define RB_REINSERT(name, x, y)	name##_RB_REINSERT(x, y)
 
 #define RB_FOREACH(x, name, head)					\
 	for ((x) = RB_MIN(name, head);					\
 	     (x) != NULL;						\
 	     (x) = name##_RB_NEXT(x))
 
 #define RB_FOREACH_FROM(x, name, y)					\
 	for ((x) = (y);							\
 	    ((x) != NULL) && ((y) = name##_RB_NEXT(x), (x) != NULL);	\
 	     (x) = (y))
 
 #define RB_FOREACH_SAFE(x, name, head, y)				\
 	for ((x) = RB_MIN(name, head);					\
 	    ((x) != NULL) && ((y) = name##_RB_NEXT(x), (x) != NULL);	\
 	     (x) = (y))
 
 #define RB_FOREACH_REVERSE(x, name, head)				\
 	for ((x) = RB_MAX(name, head);					\
 	     (x) != NULL;						\
 	     (x) = name##_RB_PREV(x))
 
 #define RB_FOREACH_REVERSE_FROM(x, name, y)				\
 	for ((x) = (y);							\
 	    ((x) != NULL) && ((y) = name##_RB_PREV(x), (x) != NULL);	\
 	     (x) = (y))
 
 #define RB_FOREACH_REVERSE_SAFE(x, name, head, y)			\
 	for ((x) = RB_MAX(name, head);					\
 	    ((x) != NULL) && ((y) = name##_RB_PREV(x), (x) != NULL);	\
 	     (x) = (y))
 
 #endif	/* _SYS_TREE_H_ */