diff --git a/sys/kern/kern_proc.c b/sys/kern/kern_proc.c
index 817cb9766bbf..33f168836370 100644
--- a/sys/kern/kern_proc.c
+++ b/sys/kern/kern_proc.c
@@ -1,3371 +1,3374 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1982, 1986, 1989, 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)kern_proc.c	8.7 (Berkeley) 2/14/95
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_ddb.h"
 #include "opt_ktrace.h"
 #include "opt_kstack_pages.h"
 #include "opt_stack.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bitstring.h>
 #include <sys/elf.h>
 #include <sys/eventhandler.h>
 #include <sys/exec.h>
 #include <sys/jail.h>
 #include <sys/kernel.h>
 #include <sys/limits.h>
 #include <sys/lock.h>
 #include <sys/loginclass.h>
 #include <sys/malloc.h>
 #include <sys/mman.h>
 #include <sys/mount.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/ptrace.h>
 #include <sys/refcount.h>
 #include <sys/resourcevar.h>
 #include <sys/rwlock.h>
 #include <sys/sbuf.h>
 #include <sys/sysent.h>
 #include <sys/sched.h>
 #include <sys/smp.h>
 #include <sys/stack.h>
 #include <sys/stat.h>
 #include <sys/dtrace_bsd.h>
 #include <sys/sysctl.h>
 #include <sys/filedesc.h>
 #include <sys/tty.h>
 #include <sys/signalvar.h>
 #include <sys/sdt.h>
 #include <sys/sx.h>
 #include <sys/user.h>
 #include <sys/vnode.h>
 #include <sys/wait.h>
 
 #ifdef DDB
 #include <ddb/ddb.h>
 #endif
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/vm_extern.h>
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
 #include <vm/uma.h>
 
 #include <fs/devfs/devfs.h>
 
 #ifdef COMPAT_FREEBSD32
 #include <compat/freebsd32/freebsd32.h>
 #include <compat/freebsd32/freebsd32_util.h>
 #endif
 
 SDT_PROVIDER_DEFINE(proc);
 
 MALLOC_DEFINE(M_SESSION, "session", "session header");
 static MALLOC_DEFINE(M_PROC, "proc", "Proc structures");
 MALLOC_DEFINE(M_SUBPROC, "subproc", "Proc sub-structures");
 
 static void doenterpgrp(struct proc *, struct pgrp *);
 static void orphanpg(struct pgrp *pg);
 static void fill_kinfo_aggregate(struct proc *p, struct kinfo_proc *kp);
 static void fill_kinfo_proc_only(struct proc *p, struct kinfo_proc *kp);
 static void fill_kinfo_thread(struct thread *td, struct kinfo_proc *kp,
     int preferthread);
 static void pgdelete(struct pgrp *);
 static int pgrp_init(void *mem, int size, int flags);
 static int proc_ctor(void *mem, int size, void *arg, int flags);
 static void proc_dtor(void *mem, int size, void *arg);
 static int proc_init(void *mem, int size, int flags);
 static void proc_fini(void *mem, int size);
 static void pargs_free(struct pargs *pa);
 
 /*
  * Other process lists
  */
 struct pidhashhead *pidhashtbl;
 struct sx *pidhashtbl_lock;
 u_long pidhash;
 u_long pidhashlock;
 struct pgrphashhead *pgrphashtbl;
 u_long pgrphash;
 struct proclist allproc;
 struct sx __exclusive_cache_line allproc_lock;
 struct sx __exclusive_cache_line proctree_lock;
 struct mtx __exclusive_cache_line ppeers_lock;
 struct mtx __exclusive_cache_line procid_lock;
 uma_zone_t proc_zone;
 uma_zone_t pgrp_zone;
 
 /*
  * The offset of various fields in struct proc and struct thread.
  * These are used by kernel debuggers to enumerate kernel threads and
  * processes.
  */
 const int proc_off_p_pid = offsetof(struct proc, p_pid);
 const int proc_off_p_comm = offsetof(struct proc, p_comm);
 const int proc_off_p_list = offsetof(struct proc, p_list);
 const int proc_off_p_hash = offsetof(struct proc, p_hash);
 const int proc_off_p_threads = offsetof(struct proc, p_threads);
 const int thread_off_td_tid = offsetof(struct thread, td_tid);
 const int thread_off_td_name = offsetof(struct thread, td_name);
 const int thread_off_td_oncpu = offsetof(struct thread, td_oncpu);
 const int thread_off_td_pcb = offsetof(struct thread, td_pcb);
 const int thread_off_td_plist = offsetof(struct thread, td_plist);
 
 EVENTHANDLER_LIST_DEFINE(process_ctor);
 EVENTHANDLER_LIST_DEFINE(process_dtor);
 EVENTHANDLER_LIST_DEFINE(process_init);
 EVENTHANDLER_LIST_DEFINE(process_fini);
 EVENTHANDLER_LIST_DEFINE(process_exit);
 EVENTHANDLER_LIST_DEFINE(process_fork);
 EVENTHANDLER_LIST_DEFINE(process_exec);
 
 int kstack_pages = KSTACK_PAGES;
 SYSCTL_INT(_kern, OID_AUTO, kstack_pages, CTLFLAG_RD, &kstack_pages, 0,
     "Kernel stack size in pages");
 static int vmmap_skip_res_cnt = 0;
 SYSCTL_INT(_kern, OID_AUTO, proc_vmmap_skip_resident_count, CTLFLAG_RW,
     &vmmap_skip_res_cnt, 0,
     "Skip calculation of the pages resident count in kern.proc.vmmap");
 
 CTASSERT(sizeof(struct kinfo_proc) == KINFO_PROC_SIZE);
 #ifdef COMPAT_FREEBSD32
 CTASSERT(sizeof(struct kinfo_proc32) == KINFO_PROC32_SIZE);
 #endif
 
 /*
  * Initialize global process hashing structures.
  */
 void
 procinit(void)
 {
 	u_long i;
 
 	sx_init(&allproc_lock, "allproc");
 	sx_init(&proctree_lock, "proctree");
 	mtx_init(&ppeers_lock, "p_peers", NULL, MTX_DEF);
 	mtx_init(&procid_lock, "procid", NULL, MTX_DEF);
 	LIST_INIT(&allproc);
 	pidhashtbl = hashinit(maxproc / 4, M_PROC, &pidhash);
 	pidhashlock = (pidhash + 1) / 64;
 	if (pidhashlock > 0)
 		pidhashlock--;
 	pidhashtbl_lock = malloc(sizeof(*pidhashtbl_lock) * (pidhashlock + 1),
 	    M_PROC, M_WAITOK | M_ZERO);
 	for (i = 0; i < pidhashlock + 1; i++)
 		sx_init_flags(&pidhashtbl_lock[i], "pidhash", SX_DUPOK);
 	pgrphashtbl = hashinit(maxproc / 4, M_PROC, &pgrphash);
 	proc_zone = uma_zcreate("PROC", sched_sizeof_proc(),
 	    proc_ctor, proc_dtor, proc_init, proc_fini,
 	    UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
 	pgrp_zone = uma_zcreate("PGRP", sizeof(struct pgrp), NULL, NULL,
 	    pgrp_init, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
 	uihashinit();
 }
 
 /*
  * Prepare a proc for use.
  */
 static int
 proc_ctor(void *mem, int size, void *arg, int flags)
 {
 	struct proc *p;
 	struct thread *td;
 
 	p = (struct proc *)mem;
 #ifdef KDTRACE_HOOKS
 	kdtrace_proc_ctor(p);
 #endif
 	EVENTHANDLER_DIRECT_INVOKE(process_ctor, p);
 	td = FIRST_THREAD_IN_PROC(p);
 	if (td != NULL) {
 		/* Make sure all thread constructors are executed */
 		EVENTHANDLER_DIRECT_INVOKE(thread_ctor, td);
 	}
 	return (0);
 }
 
 /*
  * Reclaim a proc after use.
  */
 static void
 proc_dtor(void *mem, int size, void *arg)
 {
 	struct proc *p;
 	struct thread *td;
 
 	/* INVARIANTS checks go here */
 	p = (struct proc *)mem;
 	td = FIRST_THREAD_IN_PROC(p);
 	if (td != NULL) {
 #ifdef INVARIANTS
 		KASSERT((p->p_numthreads == 1),
 		    ("bad number of threads in exiting process"));
 		KASSERT(STAILQ_EMPTY(&p->p_ktr), ("proc_dtor: non-empty p_ktr"));
 #endif
 		/* Free all OSD associated to this thread. */
 		osd_thread_exit(td);
 		td_softdep_cleanup(td);
 		MPASS(td->td_su == NULL);
 
 		/* Make sure all thread destructors are executed */
 		EVENTHANDLER_DIRECT_INVOKE(thread_dtor, td);
 	}
 	EVENTHANDLER_DIRECT_INVOKE(process_dtor, p);
 #ifdef KDTRACE_HOOKS
 	kdtrace_proc_dtor(p);
 #endif
 	if (p->p_ksi != NULL)
 		KASSERT(! KSI_ONQ(p->p_ksi), ("SIGCHLD queue"));
 }
 
 /*
  * Initialize type-stable parts of a proc (when newly created).
  */
 static int
 proc_init(void *mem, int size, int flags)
 {
 	struct proc *p;
 
 	p = (struct proc *)mem;
 	mtx_init(&p->p_mtx, "process lock", NULL, MTX_DEF | MTX_DUPOK | MTX_NEW);
 	mtx_init(&p->p_slock, "process slock", NULL, MTX_SPIN | MTX_NEW);
 	mtx_init(&p->p_statmtx, "pstatl", NULL, MTX_SPIN | MTX_NEW);
 	mtx_init(&p->p_itimmtx, "pitiml", NULL, MTX_SPIN | MTX_NEW);
 	mtx_init(&p->p_profmtx, "pprofl", NULL, MTX_SPIN | MTX_NEW);
 	cv_init(&p->p_pwait, "ppwait");
 	TAILQ_INIT(&p->p_threads);	     /* all threads in proc */
 	EVENTHANDLER_DIRECT_INVOKE(process_init, p);
 	p->p_stats = pstats_alloc();
 	p->p_pgrp = NULL;
 	return (0);
 }
 
 /*
  * UMA should ensure that this function is never called.
  * Freeing a proc structure would violate type stability.
  */
 static void
 proc_fini(void *mem, int size)
 {
 #ifdef notnow
 	struct proc *p;
 
 	p = (struct proc *)mem;
 	EVENTHANDLER_DIRECT_INVOKE(process_fini, p);
 	pstats_free(p->p_stats);
 	thread_free(FIRST_THREAD_IN_PROC(p));
 	mtx_destroy(&p->p_mtx);
 	if (p->p_ksi != NULL)
 		ksiginfo_free(p->p_ksi);
 #else
 	panic("proc reclaimed");
 #endif
 }
 
 static int
 pgrp_init(void *mem, int size, int flags)
 {
 	struct pgrp *pg;
 
 	pg = mem;
 	mtx_init(&pg->pg_mtx, "process group", NULL, MTX_DEF | MTX_DUPOK);
 	return (0);
 }
 
 /*
  * PID space management.
  *
  * These bitmaps are used by fork_findpid.
  */
 bitstr_t bit_decl(proc_id_pidmap, PID_MAX);
 bitstr_t bit_decl(proc_id_grpidmap, PID_MAX);
 bitstr_t bit_decl(proc_id_sessidmap, PID_MAX);
 bitstr_t bit_decl(proc_id_reapmap, PID_MAX);
 
 static bitstr_t *proc_id_array[] = {
 	proc_id_pidmap,
 	proc_id_grpidmap,
 	proc_id_sessidmap,
 	proc_id_reapmap,
 };
 
 void
 proc_id_set(int type, pid_t id)
 {
 
 	KASSERT(type >= 0 && type < nitems(proc_id_array),
 	    ("invalid type %d\n", type));
 	mtx_lock(&procid_lock);
 	KASSERT(bit_test(proc_id_array[type], id) == 0,
 	    ("bit %d already set in %d\n", id, type));
 	bit_set(proc_id_array[type], id);
 	mtx_unlock(&procid_lock);
 }
 
 void
 proc_id_set_cond(int type, pid_t id)
 {
 
 	KASSERT(type >= 0 && type < nitems(proc_id_array),
 	    ("invalid type %d\n", type));
 	if (bit_test(proc_id_array[type], id))
 		return;
 	mtx_lock(&procid_lock);
 	bit_set(proc_id_array[type], id);
 	mtx_unlock(&procid_lock);
 }
 
 void
 proc_id_clear(int type, pid_t id)
 {
 
 	KASSERT(type >= 0 && type < nitems(proc_id_array),
 	    ("invalid type %d\n", type));
 	mtx_lock(&procid_lock);
 	KASSERT(bit_test(proc_id_array[type], id) != 0,
 	    ("bit %d not set in %d\n", id, type));
 	bit_clear(proc_id_array[type], id);
 	mtx_unlock(&procid_lock);
 }
 
 /*
  * Is p an inferior of the current process?
  */
 int
 inferior(struct proc *p)
 {
 
 	sx_assert(&proctree_lock, SX_LOCKED);
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	for (; p != curproc; p = proc_realparent(p)) {
 		if (p->p_pid == 0)
 			return (0);
 	}
 	return (1);
 }
 
 /*
  * Shared lock all the pid hash lists.
  */
 void
 pidhash_slockall(void)
 {
 	u_long i;
 
 	for (i = 0; i < pidhashlock + 1; i++)
 		sx_slock(&pidhashtbl_lock[i]);
 }
 
 /*
  * Shared unlock all the pid hash lists.
  */
 void
 pidhash_sunlockall(void)
 {
 	u_long i;
 
 	for (i = 0; i < pidhashlock + 1; i++)
 		sx_sunlock(&pidhashtbl_lock[i]);
 }
 
 /*
  * Similar to pfind_any(), this function finds zombies.
  */
 struct proc *
 pfind_any_locked(pid_t pid)
 {
 	struct proc *p;
 
 	sx_assert(PIDHASHLOCK(pid), SX_LOCKED);
 	LIST_FOREACH(p, PIDHASH(pid), p_hash) {
 		if (p->p_pid == pid) {
 			PROC_LOCK(p);
 			if (p->p_state == PRS_NEW) {
 				PROC_UNLOCK(p);
 				p = NULL;
 			}
 			break;
 		}
 	}
 	return (p);
 }
 
 /*
  * Locate a process by number.
  *
  * By not returning processes in the PRS_NEW state, we allow callers to avoid
  * testing for that condition to avoid dereferencing p_ucred, et al.
  */
 static __always_inline struct proc *
 _pfind(pid_t pid, bool zombie)
 {
 	struct proc *p;
 
 	p = curproc;
 	if (p->p_pid == pid) {
 		PROC_LOCK(p);
 		return (p);
 	}
 	sx_slock(PIDHASHLOCK(pid));
 	LIST_FOREACH(p, PIDHASH(pid), p_hash) {
 		if (p->p_pid == pid) {
 			PROC_LOCK(p);
 			if (p->p_state == PRS_NEW ||
 			    (!zombie && p->p_state == PRS_ZOMBIE)) {
 				PROC_UNLOCK(p);
 				p = NULL;
 			}
 			break;
 		}
 	}
 	sx_sunlock(PIDHASHLOCK(pid));
 	return (p);
 }
 
 struct proc *
 pfind(pid_t pid)
 {
 
 	return (_pfind(pid, false));
 }
 
 /*
  * Same as pfind but allow zombies.
  */
 struct proc *
 pfind_any(pid_t pid)
 {
 
 	return (_pfind(pid, true));
 }
 
 /*
  * Locate a process group by number.
  * The caller must hold proctree_lock.
  */
 struct pgrp *
 pgfind(pid_t pgid)
 {
 	struct pgrp *pgrp;
 
 	sx_assert(&proctree_lock, SX_LOCKED);
 
 	LIST_FOREACH(pgrp, PGRPHASH(pgid), pg_hash) {
 		if (pgrp->pg_id == pgid) {
 			PGRP_LOCK(pgrp);
 			return (pgrp);
 		}
 	}
 	return (NULL);
 }
 
 /*
  * Locate process and do additional manipulations, depending on flags.
  */
 int
 pget(pid_t pid, int flags, struct proc **pp)
 {
 	struct proc *p;
 	struct thread *td1;
 	int error;
 
 	p = curproc;
 	if (p->p_pid == pid) {
 		PROC_LOCK(p);
 	} else {
 		p = NULL;
 		if (pid <= PID_MAX) {
 			if ((flags & PGET_NOTWEXIT) == 0)
 				p = pfind_any(pid);
 			else
 				p = pfind(pid);
 		} else if ((flags & PGET_NOTID) == 0) {
 			td1 = tdfind(pid, -1);
 			if (td1 != NULL)
 				p = td1->td_proc;
 		}
 		if (p == NULL)
 			return (ESRCH);
 		if ((flags & PGET_CANSEE) != 0) {
 			error = p_cansee(curthread, p);
 			if (error != 0)
 				goto errout;
 		}
 	}
 	if ((flags & PGET_CANDEBUG) != 0) {
 		error = p_candebug(curthread, p);
 		if (error != 0)
 			goto errout;
 	}
 	if ((flags & PGET_ISCURRENT) != 0 && curproc != p) {
 		error = EPERM;
 		goto errout;
 	}
 	if ((flags & PGET_NOTWEXIT) != 0 && (p->p_flag & P_WEXIT) != 0) {
 		error = ESRCH;
 		goto errout;
 	}
 	if ((flags & PGET_NOTINEXEC) != 0 && (p->p_flag & P_INEXEC) != 0) {
 		/*
 		 * XXXRW: Not clear ESRCH is the right error during proc
 		 * execve().
 		 */
 		error = ESRCH;
 		goto errout;
 	}
 	if ((flags & PGET_HOLD) != 0) {
 		_PHOLD(p);
 		PROC_UNLOCK(p);
 	}
 	*pp = p;
 	return (0);
 errout:
 	PROC_UNLOCK(p);
 	return (error);
 }
 
 /*
  * Create a new process group.
  * pgid must be equal to the pid of p.
  * Begin a new session if required.
  */
 int
 enterpgrp(struct proc *p, pid_t pgid, struct pgrp *pgrp, struct session *sess)
 {
 
 	sx_assert(&proctree_lock, SX_XLOCKED);
 
 	KASSERT(pgrp != NULL, ("enterpgrp: pgrp == NULL"));
 	KASSERT(p->p_pid == pgid,
 	    ("enterpgrp: new pgrp and pid != pgid"));
 	KASSERT(pgfind(pgid) == NULL,
 	    ("enterpgrp: pgrp with pgid exists"));
 	KASSERT(!SESS_LEADER(p),
 	    ("enterpgrp: session leader attempted setpgrp"));
 
 	if (sess != NULL) {
 		/*
 		 * new session
 		 */
 		mtx_init(&sess->s_mtx, "session", NULL, MTX_DEF);
 		PROC_LOCK(p);
 		p->p_flag &= ~P_CONTROLT;
 		PROC_UNLOCK(p);
 		PGRP_LOCK(pgrp);
 		sess->s_leader = p;
 		sess->s_sid = p->p_pid;
 		proc_id_set(PROC_ID_SESSION, p->p_pid);
 		refcount_init(&sess->s_count, 1);
 		sess->s_ttyvp = NULL;
 		sess->s_ttydp = NULL;
 		sess->s_ttyp = NULL;
 		bcopy(p->p_session->s_login, sess->s_login,
 			    sizeof(sess->s_login));
 		pgrp->pg_session = sess;
 		KASSERT(p == curproc,
 		    ("enterpgrp: mksession and p != curproc"));
 	} else {
 		pgrp->pg_session = p->p_session;
 		sess_hold(pgrp->pg_session);
 		PGRP_LOCK(pgrp);
 	}
 	pgrp->pg_id = pgid;
 	proc_id_set(PROC_ID_GROUP, p->p_pid);
 	LIST_INIT(&pgrp->pg_members);
 	pgrp->pg_flags = 0;
 
 	/*
 	 * As we have an exclusive lock of proctree_lock,
 	 * this should not deadlock.
 	 */
 	LIST_INSERT_HEAD(PGRPHASH(pgid), pgrp, pg_hash);
 	SLIST_INIT(&pgrp->pg_sigiolst);
 	PGRP_UNLOCK(pgrp);
 
 	doenterpgrp(p, pgrp);
 
 	return (0);
 }
 
 /*
  * Move p to an existing process group
  */
 int
 enterthispgrp(struct proc *p, struct pgrp *pgrp)
 {
 
 	sx_assert(&proctree_lock, SX_XLOCKED);
 	PROC_LOCK_ASSERT(p, MA_NOTOWNED);
 	PGRP_LOCK_ASSERT(pgrp, MA_NOTOWNED);
 	PGRP_LOCK_ASSERT(p->p_pgrp, MA_NOTOWNED);
 	SESS_LOCK_ASSERT(p->p_session, MA_NOTOWNED);
 	KASSERT(pgrp->pg_session == p->p_session,
 	    ("%s: pgrp's session %p, p->p_session %p proc %p\n",
 	    __func__, pgrp->pg_session, p->p_session, p));
 	KASSERT(pgrp != p->p_pgrp,
 	    ("%s: p %p belongs to pgrp %p", __func__, p, pgrp));
 
 	doenterpgrp(p, pgrp);
 
 	return (0);
 }
 
 /*
  * If true, any child of q which belongs to group pgrp, qualifies the
  * process group pgrp as not orphaned.
  */
 static bool
 isjobproc(struct proc *q, struct pgrp *pgrp)
 {
 	sx_assert(&proctree_lock, SX_LOCKED);
 
 	return (q->p_pgrp != pgrp &&
 	    q->p_pgrp->pg_session == pgrp->pg_session);
 }
 
 static struct proc *
 jobc_reaper(struct proc *p)
 {
 	struct proc *pp;
 
 	sx_assert(&proctree_lock, SA_LOCKED);
 
 	for (pp = p;;) {
 		pp = pp->p_reaper;
 		if (pp->p_reaper == pp ||
 		    (pp->p_treeflag & P_TREE_GRPEXITED) == 0)
 			return (pp);
 	}
 }
 
 static struct proc *
 jobc_parent(struct proc *p, struct proc *p_exiting)
 {
 	struct proc *pp;
 
 	sx_assert(&proctree_lock, SA_LOCKED);
 
 	pp = proc_realparent(p);
 	if (pp->p_pptr == NULL || pp == p_exiting ||
 	    (pp->p_treeflag & P_TREE_GRPEXITED) == 0)
 		return (pp);
 	return (jobc_reaper(pp));
 }
 
 static int
 pgrp_calc_jobc(struct pgrp *pgrp)
 {
 	struct proc *q;
 	int cnt;
 
 #ifdef INVARIANTS
 	if (!mtx_owned(&pgrp->pg_mtx))
 		sx_assert(&proctree_lock, SA_LOCKED);
 #endif
 
 	cnt = 0;
 	LIST_FOREACH(q, &pgrp->pg_members, p_pglist) {
 		if ((q->p_treeflag & P_TREE_GRPEXITED) != 0 ||
 		    q->p_pptr == NULL)
 			continue;
 		if (isjobproc(jobc_parent(q, NULL), pgrp))
 			cnt++;
 	}
 	return (cnt);
 }
 
 /*
  * Move p to a process group
  */
 static void
 doenterpgrp(struct proc *p, struct pgrp *pgrp)
 {
 	struct pgrp *savepgrp;
 	struct proc *pp;
 
 	sx_assert(&proctree_lock, SX_XLOCKED);
 	PROC_LOCK_ASSERT(p, MA_NOTOWNED);
 	PGRP_LOCK_ASSERT(pgrp, MA_NOTOWNED);
 	PGRP_LOCK_ASSERT(p->p_pgrp, MA_NOTOWNED);
 	SESS_LOCK_ASSERT(p->p_session, MA_NOTOWNED);
 
 	savepgrp = p->p_pgrp;
 	pp = jobc_parent(p, NULL);
 
 	PGRP_LOCK(pgrp);
 	PGRP_LOCK(savepgrp);
 	if (isjobproc(pp, savepgrp) && pgrp_calc_jobc(savepgrp) == 1)
 		orphanpg(savepgrp);
 	PROC_LOCK(p);
 	LIST_REMOVE(p, p_pglist);
 	p->p_pgrp = pgrp;
 	PROC_UNLOCK(p);
 	LIST_INSERT_HEAD(&pgrp->pg_members, p, p_pglist);
 	if (isjobproc(pp, pgrp))
 		pgrp->pg_flags &= ~PGRP_ORPHANED;
 	PGRP_UNLOCK(savepgrp);
 	PGRP_UNLOCK(pgrp);
 	if (LIST_EMPTY(&savepgrp->pg_members))
 		pgdelete(savepgrp);
 }
 
 /*
  * remove process from process group
  */
 int
 leavepgrp(struct proc *p)
 {
 	struct pgrp *savepgrp;
 
 	sx_assert(&proctree_lock, SX_XLOCKED);
 	savepgrp = p->p_pgrp;
 	PGRP_LOCK(savepgrp);
 	PROC_LOCK(p);
 	LIST_REMOVE(p, p_pglist);
 	p->p_pgrp = NULL;
 	PROC_UNLOCK(p);
 	PGRP_UNLOCK(savepgrp);
 	if (LIST_EMPTY(&savepgrp->pg_members))
 		pgdelete(savepgrp);
 	return (0);
 }
 
 /*
  * delete a process group
  */
 static void
 pgdelete(struct pgrp *pgrp)
 {
 	struct session *savesess;
 	struct tty *tp;
 
 	sx_assert(&proctree_lock, SX_XLOCKED);
 	PGRP_LOCK_ASSERT(pgrp, MA_NOTOWNED);
 	SESS_LOCK_ASSERT(pgrp->pg_session, MA_NOTOWNED);
 
 	/*
 	 * Reset any sigio structures pointing to us as a result of
 	 * F_SETOWN with our pgid.  The proctree lock ensures that
 	 * new sigio structures will not be added after this point.
 	 */
 	funsetownlst(&pgrp->pg_sigiolst);
 
 	PGRP_LOCK(pgrp);
 	tp = pgrp->pg_session->s_ttyp;
 	LIST_REMOVE(pgrp, pg_hash);
 	savesess = pgrp->pg_session;
 	PGRP_UNLOCK(pgrp);
 
 	/* Remove the reference to the pgrp before deallocating it. */
 	if (tp != NULL) {
 		tty_lock(tp);
 		tty_rel_pgrp(tp, pgrp);
 	}
 
 	proc_id_clear(PROC_ID_GROUP, pgrp->pg_id);
 	uma_zfree(pgrp_zone, pgrp);
 	sess_release(savesess);
 }
 
 
 static void
 fixjobc_kill(struct proc *p)
 {
 	struct proc *q;
 	struct pgrp *pgrp;
 
 	sx_assert(&proctree_lock, SX_LOCKED);
 	PROC_LOCK_ASSERT(p, MA_NOTOWNED);
 	pgrp = p->p_pgrp;
 	PGRP_LOCK_ASSERT(pgrp, MA_NOTOWNED);
 	SESS_LOCK_ASSERT(pgrp->pg_session, MA_NOTOWNED);
 
 	/*
 	 * p no longer affects process group orphanage for children.
 	 * It is marked by the flag because p is only physically
 	 * removed from its process group on wait(2).
 	 */
 	MPASS((p->p_treeflag & P_TREE_GRPEXITED) == 0);
 	p->p_treeflag |= P_TREE_GRPEXITED;
 
 	/*
 	 * Check if exiting p orphans its own group.
 	 */
 	pgrp = p->p_pgrp;
 	if (isjobproc(jobc_parent(p, NULL), pgrp)) {
 		PGRP_LOCK(pgrp);
 		if (pgrp_calc_jobc(pgrp) == 0)
 			orphanpg(pgrp);
 		PGRP_UNLOCK(pgrp);
 	}
 
 	/*
 	 * Check this process' children to see whether they qualify
 	 * their process groups after reparenting to reaper.
 	 */
 	LIST_FOREACH(q, &p->p_children, p_sibling) {
 		pgrp = q->p_pgrp;
 		PGRP_LOCK(pgrp);
 		if (pgrp_calc_jobc(pgrp) == 0) {
 			/*
 			 * We want to handle exactly the children that
 			 * has p as realparent.  Then, when calculating
 			 * jobc_parent for children, we should ignore
 			 * P_TREE_GRPEXITED flag already set on p.
 			 */
 			if (jobc_parent(q, p) == p && isjobproc(p, pgrp))
 				orphanpg(pgrp);
 		} else
 			pgrp->pg_flags &= ~PGRP_ORPHANED;
 		PGRP_UNLOCK(pgrp);
 	}
 	LIST_FOREACH(q, &p->p_orphans, p_orphan) {
 		pgrp = q->p_pgrp;
 		PGRP_LOCK(pgrp);
 		if (pgrp_calc_jobc(pgrp) == 0) {
 			if (isjobproc(p, pgrp))
 				orphanpg(pgrp);
 		} else
 			pgrp->pg_flags &= ~PGRP_ORPHANED;
 		PGRP_UNLOCK(pgrp);
 	}
 }
 
 void
 killjobc(void)
 {
 	struct session *sp;
 	struct tty *tp;
 	struct proc *p;
 	struct vnode *ttyvp;
 
 	p = curproc;
 	MPASS(p->p_flag & P_WEXIT);
 	sx_assert(&proctree_lock, SX_LOCKED);
 
 	if (SESS_LEADER(p)) {
 		sp = p->p_session;
 
 		/*
 		 * s_ttyp is not zero'd; we use this to indicate that
 		 * the session once had a controlling terminal. (for
 		 * logging and informational purposes)
 		 */
 		SESS_LOCK(sp);
 		ttyvp = sp->s_ttyvp;
 		tp = sp->s_ttyp;
 		sp->s_ttyvp = NULL;
 		sp->s_ttydp = NULL;
 		sp->s_leader = NULL;
 		SESS_UNLOCK(sp);
 
 		/*
 		 * Signal foreground pgrp and revoke access to
 		 * controlling terminal if it has not been revoked
 		 * already.
 		 *
 		 * Because the TTY may have been revoked in the mean
 		 * time and could already have a new session associated
 		 * with it, make sure we don't send a SIGHUP to a
 		 * foreground process group that does not belong to this
 		 * session.
 		 */
 
 		if (tp != NULL) {
 			tty_lock(tp);
 			if (tp->t_session == sp)
 				tty_signal_pgrp(tp, SIGHUP);
 			tty_unlock(tp);
 		}
 
 		if (ttyvp != NULL) {
 			sx_xunlock(&proctree_lock);
 			if (vn_lock(ttyvp, LK_EXCLUSIVE) == 0) {
 				VOP_REVOKE(ttyvp, REVOKEALL);
 				VOP_UNLOCK(ttyvp);
 			}
 			devfs_ctty_unref(ttyvp);
 			sx_xlock(&proctree_lock);
 		}
 	}
 	fixjobc_kill(p);
 }
 
 /*
  * A process group has become orphaned, mark it as such for signal
  * delivery code.  If there are any stopped processes in the group,
  * hang-up all process in that group.
  */
 static void
 orphanpg(struct pgrp *pg)
 {
 	struct proc *p;
 
 	PGRP_LOCK_ASSERT(pg, MA_OWNED);
 
 	pg->pg_flags |= PGRP_ORPHANED;
 
 	LIST_FOREACH(p, &pg->pg_members, p_pglist) {
 		PROC_LOCK(p);
 		if (P_SHOULDSTOP(p) == P_STOPPED_SIG) {
 			PROC_UNLOCK(p);
 			LIST_FOREACH(p, &pg->pg_members, p_pglist) {
 				PROC_LOCK(p);
 				kern_psignal(p, SIGHUP);
 				kern_psignal(p, SIGCONT);
 				PROC_UNLOCK(p);
 			}
 			return;
 		}
 		PROC_UNLOCK(p);
 	}
 }
 
 void
 sess_hold(struct session *s)
 {
 
 	refcount_acquire(&s->s_count);
 }
 
 void
 sess_release(struct session *s)
 {
 
 	if (refcount_release(&s->s_count)) {
 		if (s->s_ttyp != NULL) {
 			tty_lock(s->s_ttyp);
 			tty_rel_sess(s->s_ttyp, s);
 		}
 		proc_id_clear(PROC_ID_SESSION, s->s_sid);
 		mtx_destroy(&s->s_mtx);
 		free(s, M_SESSION);
 	}
 }
 
 #ifdef DDB
 
 static void
 db_print_pgrp_one(struct pgrp *pgrp, struct proc *p)
 {
 	db_printf(
 	    "    pid %d at %p pr %d pgrp %p e %d jc %d\n",
 	    p->p_pid, p, p->p_pptr == NULL ? -1 : p->p_pptr->p_pid,
 	    p->p_pgrp, (p->p_treeflag & P_TREE_GRPEXITED) != 0,
 	    p->p_pptr == NULL ? 0 : isjobproc(p->p_pptr, pgrp));
 }
 
 DB_SHOW_COMMAND(pgrpdump, pgrpdump)
 {
 	struct pgrp *pgrp;
 	struct proc *p;
 	int i;
 
 	for (i = 0; i <= pgrphash; i++) {
 		if (!LIST_EMPTY(&pgrphashtbl[i])) {
 			db_printf("indx %d\n", i);
 			LIST_FOREACH(pgrp, &pgrphashtbl[i], pg_hash) {
 				db_printf(
 			"  pgrp %p, pgid %d, sess %p, sesscnt %d, mem %p\n",
 				    pgrp, (int)pgrp->pg_id, pgrp->pg_session,
 				    pgrp->pg_session->s_count,
 				    LIST_FIRST(&pgrp->pg_members));
 				LIST_FOREACH(p, &pgrp->pg_members, p_pglist)
 					db_print_pgrp_one(pgrp, p);
 			}
 		}
 	}
 }
 #endif /* DDB */
 
 /*
  * Calculate the kinfo_proc members which contain process-wide
  * informations.
  * Must be called with the target process locked.
  */
 static void
 fill_kinfo_aggregate(struct proc *p, struct kinfo_proc *kp)
 {
 	struct thread *td;
 
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 
 	kp->ki_estcpu = 0;
 	kp->ki_pctcpu = 0;
 	FOREACH_THREAD_IN_PROC(p, td) {
 		thread_lock(td);
 		kp->ki_pctcpu += sched_pctcpu(td);
 		kp->ki_estcpu += sched_estcpu(td);
 		thread_unlock(td);
 	}
 }
 
 /*
  * Fill in any information that is common to all threads in the process.
  * Must be called with the target process locked.
  */
 static void
 fill_kinfo_proc_only(struct proc *p, struct kinfo_proc *kp)
 {
 	struct thread *td0;
 	struct ucred *cred;
 	struct sigacts *ps;
 	struct timeval boottime;
 
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 
 	kp->ki_structsize = sizeof(*kp);
 	kp->ki_paddr = p;
 	kp->ki_addr =/* p->p_addr; */0; /* XXX */
 	kp->ki_args = p->p_args;
 	kp->ki_textvp = p->p_textvp;
 #ifdef KTRACE
 	kp->ki_tracep = p->p_tracevp;
 	kp->ki_traceflag = p->p_traceflag;
 #endif
 	kp->ki_fd = p->p_fd;
 	kp->ki_pd = p->p_pd;
 	kp->ki_vmspace = p->p_vmspace;
 	kp->ki_flag = p->p_flag;
 	kp->ki_flag2 = p->p_flag2;
 	cred = p->p_ucred;
 	if (cred) {
 		kp->ki_uid = cred->cr_uid;
 		kp->ki_ruid = cred->cr_ruid;
 		kp->ki_svuid = cred->cr_svuid;
 		kp->ki_cr_flags = 0;
 		if (cred->cr_flags & CRED_FLAG_CAPMODE)
 			kp->ki_cr_flags |= KI_CRF_CAPABILITY_MODE;
 		/* XXX bde doesn't like KI_NGROUPS */
 		if (cred->cr_ngroups > KI_NGROUPS) {
 			kp->ki_ngroups = KI_NGROUPS;
 			kp->ki_cr_flags |= KI_CRF_GRP_OVERFLOW;
 		} else
 			kp->ki_ngroups = cred->cr_ngroups;
 		bcopy(cred->cr_groups, kp->ki_groups,
 		    kp->ki_ngroups * sizeof(gid_t));
 		kp->ki_rgid = cred->cr_rgid;
 		kp->ki_svgid = cred->cr_svgid;
 		/* If jailed(cred), emulate the old P_JAILED flag. */
 		if (jailed(cred)) {
 			kp->ki_flag |= P_JAILED;
 			/* If inside the jail, use 0 as a jail ID. */
 			if (cred->cr_prison != curthread->td_ucred->cr_prison)
 				kp->ki_jid = cred->cr_prison->pr_id;
 		}
 		strlcpy(kp->ki_loginclass, cred->cr_loginclass->lc_name,
 		    sizeof(kp->ki_loginclass));
 	}
 	ps = p->p_sigacts;
 	if (ps) {
 		mtx_lock(&ps->ps_mtx);
 		kp->ki_sigignore = ps->ps_sigignore;
 		kp->ki_sigcatch = ps->ps_sigcatch;
 		mtx_unlock(&ps->ps_mtx);
 	}
 	if (p->p_state != PRS_NEW &&
 	    p->p_state != PRS_ZOMBIE &&
 	    p->p_vmspace != NULL) {
 		struct vmspace *vm = p->p_vmspace;
 
 		kp->ki_size = vm->vm_map.size;
 		kp->ki_rssize = vmspace_resident_count(vm); /*XXX*/
 		FOREACH_THREAD_IN_PROC(p, td0) {
 			if (!TD_IS_SWAPPED(td0))
 				kp->ki_rssize += td0->td_kstack_pages;
 		}
 		kp->ki_swrss = vm->vm_swrss;
 		kp->ki_tsize = vm->vm_tsize;
 		kp->ki_dsize = vm->vm_dsize;
 		kp->ki_ssize = vm->vm_ssize;
 	} else if (p->p_state == PRS_ZOMBIE)
 		kp->ki_stat = SZOMB;
 	if (kp->ki_flag & P_INMEM)
 		kp->ki_sflag = PS_INMEM;
 	else
 		kp->ki_sflag = 0;
 	/* Calculate legacy swtime as seconds since 'swtick'. */
 	kp->ki_swtime = (ticks - p->p_swtick) / hz;
 	kp->ki_pid = p->p_pid;
 	kp->ki_nice = p->p_nice;
 	kp->ki_fibnum = p->p_fibnum;
 	kp->ki_start = p->p_stats->p_start;
 	getboottime(&boottime);
 	timevaladd(&kp->ki_start, &boottime);
 	PROC_STATLOCK(p);
 	rufetch(p, &kp->ki_rusage);
 	kp->ki_runtime = cputick2usec(p->p_rux.rux_runtime);
 	calcru(p, &kp->ki_rusage.ru_utime, &kp->ki_rusage.ru_stime);
 	PROC_STATUNLOCK(p);
 	calccru(p, &kp->ki_childutime, &kp->ki_childstime);
 	/* Some callers want child times in a single value. */
 	kp->ki_childtime = kp->ki_childstime;
 	timevaladd(&kp->ki_childtime, &kp->ki_childutime);
 
 	FOREACH_THREAD_IN_PROC(p, td0)
 		kp->ki_cow += td0->td_cow;
 
 	if (p->p_comm[0] != '\0')
 		strlcpy(kp->ki_comm, p->p_comm, sizeof(kp->ki_comm));
 	if (p->p_sysent && p->p_sysent->sv_name != NULL &&
 	    p->p_sysent->sv_name[0] != '\0')
 		strlcpy(kp->ki_emul, p->p_sysent->sv_name, sizeof(kp->ki_emul));
 	kp->ki_siglist = p->p_siglist;
 	kp->ki_xstat = KW_EXITCODE(p->p_xexit, p->p_xsig);
 	kp->ki_acflag = p->p_acflag;
 	kp->ki_lock = p->p_lock;
 	if (p->p_pptr) {
 		kp->ki_ppid = p->p_oppid;
 		if (p->p_flag & P_TRACED)
 			kp->ki_tracer = p->p_pptr->p_pid;
 	}
 }
 
 /*
  * Fill job-related process information.
  */
 static void
 fill_kinfo_proc_pgrp(struct proc *p, struct kinfo_proc *kp)
 {
 	struct tty *tp;
 	struct session *sp;
 	struct pgrp *pgrp;
 
 	sx_assert(&proctree_lock, SA_LOCKED);
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 
 	pgrp = p->p_pgrp;
 	if (pgrp == NULL)
 		return;
 
 	kp->ki_pgid = pgrp->pg_id;
 	kp->ki_jobc = pgrp_calc_jobc(pgrp);
 
 	sp = pgrp->pg_session;
 	tp = NULL;
 
 	if (sp != NULL) {
 		kp->ki_sid = sp->s_sid;
 		SESS_LOCK(sp);
 		strlcpy(kp->ki_login, sp->s_login, sizeof(kp->ki_login));
 		if (sp->s_ttyvp)
 			kp->ki_kiflag |= KI_CTTY;
 		if (SESS_LEADER(p))
 			kp->ki_kiflag |= KI_SLEADER;
 		tp = sp->s_ttyp;
 		SESS_UNLOCK(sp);
 	}
 
 	if ((p->p_flag & P_CONTROLT) && tp != NULL) {
 		kp->ki_tdev = tty_udev(tp);
 		kp->ki_tdev_freebsd11 = kp->ki_tdev; /* truncate */
 		kp->ki_tpgid = tp->t_pgrp ? tp->t_pgrp->pg_id : NO_PID;
 		if (tp->t_session)
 			kp->ki_tsid = tp->t_session->s_sid;
 	} else {
 		kp->ki_tdev = NODEV;
 		kp->ki_tdev_freebsd11 = kp->ki_tdev; /* truncate */
 	}
 }
 
 /*
  * Fill in information that is thread specific.  Must be called with
  * target process locked.  If 'preferthread' is set, overwrite certain
  * process-related fields that are maintained for both threads and
  * processes.
  */
 static void
 fill_kinfo_thread(struct thread *td, struct kinfo_proc *kp, int preferthread)
 {
 	struct proc *p;
 
 	p = td->td_proc;
 	kp->ki_tdaddr = td;
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 
 	if (preferthread)
 		PROC_STATLOCK(p);
 	thread_lock(td);
 	if (td->td_wmesg != NULL)
 		strlcpy(kp->ki_wmesg, td->td_wmesg, sizeof(kp->ki_wmesg));
 	else
 		bzero(kp->ki_wmesg, sizeof(kp->ki_wmesg));
 	if (strlcpy(kp->ki_tdname, td->td_name, sizeof(kp->ki_tdname)) >=
 	    sizeof(kp->ki_tdname)) {
 		strlcpy(kp->ki_moretdname,
 		    td->td_name + sizeof(kp->ki_tdname) - 1,
 		    sizeof(kp->ki_moretdname));
 	} else {
 		bzero(kp->ki_moretdname, sizeof(kp->ki_moretdname));
 	}
 	if (TD_ON_LOCK(td)) {
 		kp->ki_kiflag |= KI_LOCKBLOCK;
 		strlcpy(kp->ki_lockname, td->td_lockname,
 		    sizeof(kp->ki_lockname));
 	} else {
 		kp->ki_kiflag &= ~KI_LOCKBLOCK;
 		bzero(kp->ki_lockname, sizeof(kp->ki_lockname));
 	}
 
 	if (p->p_state == PRS_NORMAL) { /* approximate. */
 		if (TD_ON_RUNQ(td) ||
 		    TD_CAN_RUN(td) ||
 		    TD_IS_RUNNING(td)) {
 			kp->ki_stat = SRUN;
 		} else if (P_SHOULDSTOP(p)) {
 			kp->ki_stat = SSTOP;
 		} else if (TD_IS_SLEEPING(td)) {
 			kp->ki_stat = SSLEEP;
 		} else if (TD_ON_LOCK(td)) {
 			kp->ki_stat = SLOCK;
 		} else {
 			kp->ki_stat = SWAIT;
 		}
 	} else if (p->p_state == PRS_ZOMBIE) {
 		kp->ki_stat = SZOMB;
 	} else {
 		kp->ki_stat = SIDL;
 	}
 
 	/* Things in the thread */
 	kp->ki_wchan = td->td_wchan;
 	kp->ki_pri.pri_level = td->td_priority;
 	kp->ki_pri.pri_native = td->td_base_pri;
 
 	/*
 	 * Note: legacy fields; clamp at the old NOCPU value and/or
 	 * the maximum u_char CPU value.
 	 */
 	if (td->td_lastcpu == NOCPU)
 		kp->ki_lastcpu_old = NOCPU_OLD;
 	else if (td->td_lastcpu > MAXCPU_OLD)
 		kp->ki_lastcpu_old = MAXCPU_OLD;
 	else
 		kp->ki_lastcpu_old = td->td_lastcpu;
 
 	if (td->td_oncpu == NOCPU)
 		kp->ki_oncpu_old = NOCPU_OLD;
 	else if (td->td_oncpu > MAXCPU_OLD)
 		kp->ki_oncpu_old = MAXCPU_OLD;
 	else
 		kp->ki_oncpu_old = td->td_oncpu;
 
 	kp->ki_lastcpu = td->td_lastcpu;
 	kp->ki_oncpu = td->td_oncpu;
 	kp->ki_tdflags = td->td_flags;
 	kp->ki_tid = td->td_tid;
 	kp->ki_numthreads = p->p_numthreads;
 	kp->ki_pcb = td->td_pcb;
 	kp->ki_kstack = (void *)td->td_kstack;
 	kp->ki_slptime = (ticks - td->td_slptick) / hz;
 	kp->ki_pri.pri_class = td->td_pri_class;
 	kp->ki_pri.pri_user = td->td_user_pri;
 
 	if (preferthread) {
 		rufetchtd(td, &kp->ki_rusage);
 		kp->ki_runtime = cputick2usec(td->td_rux.rux_runtime);
 		kp->ki_pctcpu = sched_pctcpu(td);
 		kp->ki_estcpu = sched_estcpu(td);
 		kp->ki_cow = td->td_cow;
 	}
 
 	/* We can't get this anymore but ps etc never used it anyway. */
 	kp->ki_rqindex = 0;
 
 	if (preferthread)
 		kp->ki_siglist = td->td_siglist;
 	kp->ki_sigmask = td->td_sigmask;
 	thread_unlock(td);
 	if (preferthread)
 		PROC_STATUNLOCK(p);
 }
 
 /*
  * Fill in a kinfo_proc structure for the specified process.
  * Must be called with the target process locked.
  */
 void
 fill_kinfo_proc(struct proc *p, struct kinfo_proc *kp)
 {
 	MPASS(FIRST_THREAD_IN_PROC(p) != NULL);
 
 	bzero(kp, sizeof(*kp));
 
 	fill_kinfo_proc_pgrp(p,kp);
 	fill_kinfo_proc_only(p, kp);
 	fill_kinfo_thread(FIRST_THREAD_IN_PROC(p), kp, 0);
 	fill_kinfo_aggregate(p, kp);
 }
 
 struct pstats *
 pstats_alloc(void)
 {
 
 	return (malloc(sizeof(struct pstats), M_SUBPROC, M_ZERO|M_WAITOK));
 }
 
 /*
  * Copy parts of p_stats; zero the rest of p_stats (statistics).
  */
 void
 pstats_fork(struct pstats *src, struct pstats *dst)
 {
 
 	bzero(&dst->pstat_startzero,
 	    __rangeof(struct pstats, pstat_startzero, pstat_endzero));
 	bcopy(&src->pstat_startcopy, &dst->pstat_startcopy,
 	    __rangeof(struct pstats, pstat_startcopy, pstat_endcopy));
 }
 
 void
 pstats_free(struct pstats *ps)
 {
 
 	free(ps, M_SUBPROC);
 }
 
 #ifdef COMPAT_FREEBSD32
 
 /*
  * This function is typically used to copy out the kernel address, so
  * it can be replaced by assignment of zero.
  */
 static inline uint32_t
 ptr32_trim(const void *ptr)
 {
 	uintptr_t uptr;
 
 	uptr = (uintptr_t)ptr;
 	return ((uptr > UINT_MAX) ? 0 : uptr);
 }
 
 #define PTRTRIM_CP(src,dst,fld) \
 	do { (dst).fld = ptr32_trim((src).fld); } while (0)
 
 static void
 freebsd32_kinfo_proc_out(const struct kinfo_proc *ki, struct kinfo_proc32 *ki32)
 {
 	int i;
 
 	bzero(ki32, sizeof(struct kinfo_proc32));
 	ki32->ki_structsize = sizeof(struct kinfo_proc32);
 	CP(*ki, *ki32, ki_layout);
 	PTRTRIM_CP(*ki, *ki32, ki_args);
 	PTRTRIM_CP(*ki, *ki32, ki_paddr);
 	PTRTRIM_CP(*ki, *ki32, ki_addr);
 	PTRTRIM_CP(*ki, *ki32, ki_tracep);
 	PTRTRIM_CP(*ki, *ki32, ki_textvp);
 	PTRTRIM_CP(*ki, *ki32, ki_fd);
 	PTRTRIM_CP(*ki, *ki32, ki_vmspace);
 	PTRTRIM_CP(*ki, *ki32, ki_wchan);
 	CP(*ki, *ki32, ki_pid);
 	CP(*ki, *ki32, ki_ppid);
 	CP(*ki, *ki32, ki_pgid);
 	CP(*ki, *ki32, ki_tpgid);
 	CP(*ki, *ki32, ki_sid);
 	CP(*ki, *ki32, ki_tsid);
 	CP(*ki, *ki32, ki_jobc);
 	CP(*ki, *ki32, ki_tdev);
 	CP(*ki, *ki32, ki_tdev_freebsd11);
 	CP(*ki, *ki32, ki_siglist);
 	CP(*ki, *ki32, ki_sigmask);
 	CP(*ki, *ki32, ki_sigignore);
 	CP(*ki, *ki32, ki_sigcatch);
 	CP(*ki, *ki32, ki_uid);
 	CP(*ki, *ki32, ki_ruid);
 	CP(*ki, *ki32, ki_svuid);
 	CP(*ki, *ki32, ki_rgid);
 	CP(*ki, *ki32, ki_svgid);
 	CP(*ki, *ki32, ki_ngroups);
 	for (i = 0; i < KI_NGROUPS; i++)
 		CP(*ki, *ki32, ki_groups[i]);
 	CP(*ki, *ki32, ki_size);
 	CP(*ki, *ki32, ki_rssize);
 	CP(*ki, *ki32, ki_swrss);
 	CP(*ki, *ki32, ki_tsize);
 	CP(*ki, *ki32, ki_dsize);
 	CP(*ki, *ki32, ki_ssize);
 	CP(*ki, *ki32, ki_xstat);
 	CP(*ki, *ki32, ki_acflag);
 	CP(*ki, *ki32, ki_pctcpu);
 	CP(*ki, *ki32, ki_estcpu);
 	CP(*ki, *ki32, ki_slptime);
 	CP(*ki, *ki32, ki_swtime);
 	CP(*ki, *ki32, ki_cow);
 	CP(*ki, *ki32, ki_runtime);
 	TV_CP(*ki, *ki32, ki_start);
 	TV_CP(*ki, *ki32, ki_childtime);
 	CP(*ki, *ki32, ki_flag);
 	CP(*ki, *ki32, ki_kiflag);
 	CP(*ki, *ki32, ki_traceflag);
 	CP(*ki, *ki32, ki_stat);
 	CP(*ki, *ki32, ki_nice);
 	CP(*ki, *ki32, ki_lock);
 	CP(*ki, *ki32, ki_rqindex);
 	CP(*ki, *ki32, ki_oncpu);
 	CP(*ki, *ki32, ki_lastcpu);
 
 	/* XXX TODO: wrap cpu value as appropriate */
 	CP(*ki, *ki32, ki_oncpu_old);
 	CP(*ki, *ki32, ki_lastcpu_old);
 
 	bcopy(ki->ki_tdname, ki32->ki_tdname, TDNAMLEN + 1);
 	bcopy(ki->ki_wmesg, ki32->ki_wmesg, WMESGLEN + 1);
 	bcopy(ki->ki_login, ki32->ki_login, LOGNAMELEN + 1);
 	bcopy(ki->ki_lockname, ki32->ki_lockname, LOCKNAMELEN + 1);
 	bcopy(ki->ki_comm, ki32->ki_comm, COMMLEN + 1);
 	bcopy(ki->ki_emul, ki32->ki_emul, KI_EMULNAMELEN + 1);
 	bcopy(ki->ki_loginclass, ki32->ki_loginclass, LOGINCLASSLEN + 1);
 	bcopy(ki->ki_moretdname, ki32->ki_moretdname, MAXCOMLEN - TDNAMLEN + 1);
 	CP(*ki, *ki32, ki_tracer);
 	CP(*ki, *ki32, ki_flag2);
 	CP(*ki, *ki32, ki_fibnum);
 	CP(*ki, *ki32, ki_cr_flags);
 	CP(*ki, *ki32, ki_jid);
 	CP(*ki, *ki32, ki_numthreads);
 	CP(*ki, *ki32, ki_tid);
 	CP(*ki, *ki32, ki_pri);
 	freebsd32_rusage_out(&ki->ki_rusage, &ki32->ki_rusage);
 	freebsd32_rusage_out(&ki->ki_rusage_ch, &ki32->ki_rusage_ch);
 	PTRTRIM_CP(*ki, *ki32, ki_pcb);
 	PTRTRIM_CP(*ki, *ki32, ki_kstack);
 	PTRTRIM_CP(*ki, *ki32, ki_udata);
 	PTRTRIM_CP(*ki, *ki32, ki_tdaddr);
 	CP(*ki, *ki32, ki_sflag);
 	CP(*ki, *ki32, ki_tdflags);
 }
 #endif
 
 static ssize_t
 kern_proc_out_size(struct proc *p, int flags)
 {
 	ssize_t size = 0;
 
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 
 	if ((flags & KERN_PROC_NOTHREADS) != 0) {
 #ifdef COMPAT_FREEBSD32
 		if ((flags & KERN_PROC_MASK32) != 0) {
 			size += sizeof(struct kinfo_proc32);
 		} else
 #endif
 			size += sizeof(struct kinfo_proc);
 	} else {
 #ifdef COMPAT_FREEBSD32
 		if ((flags & KERN_PROC_MASK32) != 0)
 			size += sizeof(struct kinfo_proc32) * p->p_numthreads;
 		else
 #endif
 			size += sizeof(struct kinfo_proc) * p->p_numthreads;
 	}
 	PROC_UNLOCK(p);
 	return (size);
 }
 
 int
 kern_proc_out(struct proc *p, struct sbuf *sb, int flags)
 {
 	struct thread *td;
 	struct kinfo_proc ki;
 #ifdef COMPAT_FREEBSD32
 	struct kinfo_proc32 ki32;
 #endif
 	int error;
 
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	MPASS(FIRST_THREAD_IN_PROC(p) != NULL);
 
 	error = 0;
 	fill_kinfo_proc(p, &ki);
 	if ((flags & KERN_PROC_NOTHREADS) != 0) {
 #ifdef COMPAT_FREEBSD32
 		if ((flags & KERN_PROC_MASK32) != 0) {
 			freebsd32_kinfo_proc_out(&ki, &ki32);
 			if (sbuf_bcat(sb, &ki32, sizeof(ki32)) != 0)
 				error = ENOMEM;
 		} else
 #endif
 			if (sbuf_bcat(sb, &ki, sizeof(ki)) != 0)
 				error = ENOMEM;
 	} else {
 		FOREACH_THREAD_IN_PROC(p, td) {
 			fill_kinfo_thread(td, &ki, 1);
 #ifdef COMPAT_FREEBSD32
 			if ((flags & KERN_PROC_MASK32) != 0) {
 				freebsd32_kinfo_proc_out(&ki, &ki32);
 				if (sbuf_bcat(sb, &ki32, sizeof(ki32)) != 0)
 					error = ENOMEM;
 			} else
 #endif
 				if (sbuf_bcat(sb, &ki, sizeof(ki)) != 0)
 					error = ENOMEM;
 			if (error != 0)
 				break;
 		}
 	}
 	PROC_UNLOCK(p);
 	return (error);
 }
 
 static int
 sysctl_out_proc(struct proc *p, struct sysctl_req *req, int flags)
 {
 	struct sbuf sb;
 	struct kinfo_proc ki;
 	int error, error2;
 
 	if (req->oldptr == NULL)
 		return (SYSCTL_OUT(req, 0, kern_proc_out_size(p, flags)));
 
 	sbuf_new_for_sysctl(&sb, (char *)&ki, sizeof(ki), req);
 	sbuf_clear_flags(&sb, SBUF_INCLUDENUL);
 	error = kern_proc_out(p, &sb, flags);
 	error2 = sbuf_finish(&sb);
 	sbuf_delete(&sb);
 	if (error != 0)
 		return (error);
 	else if (error2 != 0)
 		return (error2);
 	return (0);
 }
 
 int
 proc_iterate(int (*cb)(struct proc *, void *), void *cbarg)
 {
 	struct proc *p;
 	int error, i, j;
 
 	for (i = 0; i < pidhashlock + 1; i++) {
 		sx_slock(&proctree_lock);
 		sx_slock(&pidhashtbl_lock[i]);
 		for (j = i; j <= pidhash; j += pidhashlock + 1) {
 			LIST_FOREACH(p, &pidhashtbl[j], p_hash) {
 				if (p->p_state == PRS_NEW)
 					continue;
 				error = cb(p, cbarg);
 				PROC_LOCK_ASSERT(p, MA_NOTOWNED);
 				if (error != 0) {
 					sx_sunlock(&pidhashtbl_lock[i]);
 					sx_sunlock(&proctree_lock);
 					return (error);
 				}
 			}
 		}
 		sx_sunlock(&pidhashtbl_lock[i]);
 		sx_sunlock(&proctree_lock);
 	}
 	return (0);
 }
 
 struct kern_proc_out_args {
 	struct sysctl_req *req;
 	int flags;
 	int oid_number;
 	int *name;
 };
 
 static int
 sysctl_kern_proc_iterate(struct proc *p, void *origarg)
 {
 	struct kern_proc_out_args *arg = origarg;
 	int *name = arg->name;
 	int oid_number = arg->oid_number;
 	int flags = arg->flags;
 	struct sysctl_req *req = arg->req;
 	int error = 0;
 
 	PROC_LOCK(p);
 
 	KASSERT(p->p_ucred != NULL,
 	    ("process credential is NULL for non-NEW proc"));
 	/*
 	 * Show a user only appropriate processes.
 	 */
 	if (p_cansee(curthread, p))
 		goto skip;
 	/*
 	 * TODO - make more efficient (see notes below).
 	 * do by session.
 	 */
 	switch (oid_number) {
 	case KERN_PROC_GID:
 		if (p->p_ucred->cr_gid != (gid_t)name[0])
 			goto skip;
 		break;
 
 	case KERN_PROC_PGRP:
 		/* could do this by traversing pgrp */
 		if (p->p_pgrp == NULL ||
 		    p->p_pgrp->pg_id != (pid_t)name[0])
 			goto skip;
 		break;
 
 	case KERN_PROC_RGID:
 		if (p->p_ucred->cr_rgid != (gid_t)name[0])
 			goto skip;
 		break;
 
 	case KERN_PROC_SESSION:
 		if (p->p_session == NULL ||
 		    p->p_session->s_sid != (pid_t)name[0])
 			goto skip;
 		break;
 
 	case KERN_PROC_TTY:
 		if ((p->p_flag & P_CONTROLT) == 0 ||
 		    p->p_session == NULL)
 			goto skip;
 		/* XXX proctree_lock */
 		SESS_LOCK(p->p_session);
 		if (p->p_session->s_ttyp == NULL ||
 		    tty_udev(p->p_session->s_ttyp) !=
 		    (dev_t)name[0]) {
 			SESS_UNLOCK(p->p_session);
 			goto skip;
 		}
 		SESS_UNLOCK(p->p_session);
 		break;
 
 	case KERN_PROC_UID:
 		if (p->p_ucred->cr_uid != (uid_t)name[0])
 			goto skip;
 		break;
 
 	case KERN_PROC_RUID:
 		if (p->p_ucred->cr_ruid != (uid_t)name[0])
 			goto skip;
 		break;
 
 	case KERN_PROC_PROC:
 		break;
 
 	default:
 		break;
 	}
 	error = sysctl_out_proc(p, req, flags);
 	PROC_LOCK_ASSERT(p, MA_NOTOWNED);
 	return (error);
 skip:
 	PROC_UNLOCK(p);
 	return (0);
 }
 
 static int
 sysctl_kern_proc(SYSCTL_HANDLER_ARGS)
 {
 	struct kern_proc_out_args iterarg;
 	int *name = (int *)arg1;
 	u_int namelen = arg2;
 	struct proc *p;
 	int flags, oid_number;
 	int error = 0;
 
 	oid_number = oidp->oid_number;
 	if (oid_number != KERN_PROC_ALL &&
 	    (oid_number & KERN_PROC_INC_THREAD) == 0)
 		flags = KERN_PROC_NOTHREADS;
 	else {
 		flags = 0;
 		oid_number &= ~KERN_PROC_INC_THREAD;
 	}
 #ifdef COMPAT_FREEBSD32
 	if (req->flags & SCTL_MASK32)
 		flags |= KERN_PROC_MASK32;
 #endif
 	if (oid_number == KERN_PROC_PID) {
 		if (namelen != 1)
 			return (EINVAL);
 		error = sysctl_wire_old_buffer(req, 0);
 		if (error)
 			return (error);
 		sx_slock(&proctree_lock);
 		error = pget((pid_t)name[0], PGET_CANSEE, &p);
 		if (error == 0)
 			error = sysctl_out_proc(p, req, flags);
 		sx_sunlock(&proctree_lock);
 		return (error);
 	}
 
 	switch (oid_number) {
 	case KERN_PROC_ALL:
 		if (namelen != 0)
 			return (EINVAL);
 		break;
 	case KERN_PROC_PROC:
 		if (namelen != 0 && namelen != 1)
 			return (EINVAL);
 		break;
 	default:
 		if (namelen != 1)
 			return (EINVAL);
 		break;
 	}
 
 	if (req->oldptr == NULL) {
 		/* overestimate by 5 procs */
 		error = SYSCTL_OUT(req, 0, sizeof (struct kinfo_proc) * 5);
 		if (error)
 			return (error);
 	} else {
 		error = sysctl_wire_old_buffer(req, 0);
 		if (error != 0)
 			return (error);
 	}
 	iterarg.flags = flags;
 	iterarg.oid_number = oid_number;
 	iterarg.req = req;
 	iterarg.name = name;
 	error = proc_iterate(sysctl_kern_proc_iterate, &iterarg);
 	return (error);
 }
 
 struct pargs *
 pargs_alloc(int len)
 {
 	struct pargs *pa;
 
 	pa = malloc(sizeof(struct pargs) + len, M_PARGS,
 		M_WAITOK);
 	refcount_init(&pa->ar_ref, 1);
 	pa->ar_length = len;
 	return (pa);
 }
 
 static void
 pargs_free(struct pargs *pa)
 {
 
 	free(pa, M_PARGS);
 }
 
 void
 pargs_hold(struct pargs *pa)
 {
 
 	if (pa == NULL)
 		return;
 	refcount_acquire(&pa->ar_ref);
 }
 
 void
 pargs_drop(struct pargs *pa)
 {
 
 	if (pa == NULL)
 		return;
 	if (refcount_release(&pa->ar_ref))
 		pargs_free(pa);
 }
 
 static int
 proc_read_string(struct thread *td, struct proc *p, const char *sptr, char *buf,
     size_t len)
 {
 	ssize_t n;
 
 	/*
 	 * This may return a short read if the string is shorter than the chunk
 	 * and is aligned at the end of the page, and the following page is not
 	 * mapped.
 	 */
 	n = proc_readmem(td, p, (vm_offset_t)sptr, buf, len);
 	if (n <= 0)
 		return (ENOMEM);
 	return (0);
 }
 
 #define PROC_AUXV_MAX	256	/* Safety limit on auxv size. */
 
 enum proc_vector_type {
 	PROC_ARG,
 	PROC_ENV,
 	PROC_AUX,
 };
 
 #ifdef COMPAT_FREEBSD32
 static int
 get_proc_vector32(struct thread *td, struct proc *p, char ***proc_vectorp,
     size_t *vsizep, enum proc_vector_type type)
 {
 	struct freebsd32_ps_strings pss;
 	Elf32_Auxinfo aux;
 	vm_offset_t vptr, ptr;
 	uint32_t *proc_vector32;
 	char **proc_vector;
 	size_t vsize, size;
 	int i, error;
 
 	error = 0;
 	if (proc_readmem(td, p, (vm_offset_t)p->p_sysent->sv_psstrings, &pss,
 	    sizeof(pss)) != sizeof(pss))
 		return (ENOMEM);
 	switch (type) {
 	case PROC_ARG:
 		vptr = (vm_offset_t)PTRIN(pss.ps_argvstr);
 		vsize = pss.ps_nargvstr;
 		if (vsize > ARG_MAX)
 			return (ENOEXEC);
 		size = vsize * sizeof(int32_t);
 		break;
 	case PROC_ENV:
 		vptr = (vm_offset_t)PTRIN(pss.ps_envstr);
 		vsize = pss.ps_nenvstr;
 		if (vsize > ARG_MAX)
 			return (ENOEXEC);
 		size = vsize * sizeof(int32_t);
 		break;
 	case PROC_AUX:
 		vptr = (vm_offset_t)PTRIN(pss.ps_envstr) +
 		    (pss.ps_nenvstr + 1) * sizeof(int32_t);
 		if (vptr % 4 != 0)
 			return (ENOEXEC);
 		for (ptr = vptr, i = 0; i < PROC_AUXV_MAX; i++) {
 			if (proc_readmem(td, p, ptr, &aux, sizeof(aux)) !=
 			    sizeof(aux))
 				return (ENOMEM);
 			if (aux.a_type == AT_NULL)
 				break;
 			ptr += sizeof(aux);
 		}
 		if (aux.a_type != AT_NULL)
 			return (ENOEXEC);
 		vsize = i + 1;
 		size = vsize * sizeof(aux);
 		break;
 	default:
 		KASSERT(0, ("Wrong proc vector type: %d", type));
 		return (EINVAL);
 	}
 	proc_vector32 = malloc(size, M_TEMP, M_WAITOK);
 	if (proc_readmem(td, p, vptr, proc_vector32, size) != size) {
 		error = ENOMEM;
 		goto done;
 	}
 	if (type == PROC_AUX) {
 		*proc_vectorp = (char **)proc_vector32;
 		*vsizep = vsize;
 		return (0);
 	}
 	proc_vector = malloc(vsize * sizeof(char *), M_TEMP, M_WAITOK);
 	for (i = 0; i < (int)vsize; i++)
 		proc_vector[i] = PTRIN(proc_vector32[i]);
 	*proc_vectorp = proc_vector;
 	*vsizep = vsize;
 done:
 	free(proc_vector32, M_TEMP);
 	return (error);
 }
 #endif
 
 static int
 get_proc_vector(struct thread *td, struct proc *p, char ***proc_vectorp,
     size_t *vsizep, enum proc_vector_type type)
 {
 	struct ps_strings pss;
 	Elf_Auxinfo aux;
 	vm_offset_t vptr, ptr;
 	char **proc_vector;
 	size_t vsize, size;
 	int i;
 
 #ifdef COMPAT_FREEBSD32
 	if (SV_PROC_FLAG(p, SV_ILP32) != 0)
 		return (get_proc_vector32(td, p, proc_vectorp, vsizep, type));
 #endif
 	if (proc_readmem(td, p, (vm_offset_t)p->p_sysent->sv_psstrings, &pss,
 	    sizeof(pss)) != sizeof(pss))
 		return (ENOMEM);
 	switch (type) {
 	case PROC_ARG:
 		vptr = (vm_offset_t)pss.ps_argvstr;
 		vsize = pss.ps_nargvstr;
 		if (vsize > ARG_MAX)
 			return (ENOEXEC);
 		size = vsize * sizeof(char *);
 		break;
 	case PROC_ENV:
 		vptr = (vm_offset_t)pss.ps_envstr;
 		vsize = pss.ps_nenvstr;
 		if (vsize > ARG_MAX)
 			return (ENOEXEC);
 		size = vsize * sizeof(char *);
 		break;
 	case PROC_AUX:
 		/*
 		 * The aux array is just above env array on the stack. Check
 		 * that the address is naturally aligned.
 		 */
 		vptr = (vm_offset_t)pss.ps_envstr + (pss.ps_nenvstr + 1)
 		    * sizeof(char *);
 #if __ELF_WORD_SIZE == 64
 		if (vptr % sizeof(uint64_t) != 0)
 #else
 		if (vptr % sizeof(uint32_t) != 0)
 #endif
 			return (ENOEXEC);
 		/*
 		 * We count the array size reading the aux vectors from the
 		 * stack until AT_NULL vector is returned.  So (to keep the code
 		 * simple) we read the process stack twice: the first time here
 		 * to find the size and the second time when copying the vectors
 		 * to the allocated proc_vector.
 		 */
 		for (ptr = vptr, i = 0; i < PROC_AUXV_MAX; i++) {
 			if (proc_readmem(td, p, ptr, &aux, sizeof(aux)) !=
 			    sizeof(aux))
 				return (ENOMEM);
 			if (aux.a_type == AT_NULL)
 				break;
 			ptr += sizeof(aux);
 		}
 		/*
 		 * If the PROC_AUXV_MAX entries are iterated over, and we have
 		 * not reached AT_NULL, it is most likely we are reading wrong
 		 * data: either the process doesn't have auxv array or data has
 		 * been modified. Return the error in this case.
 		 */
 		if (aux.a_type != AT_NULL)
 			return (ENOEXEC);
 		vsize = i + 1;
 		size = vsize * sizeof(aux);
 		break;
 	default:
 		KASSERT(0, ("Wrong proc vector type: %d", type));
 		return (EINVAL); /* In case we are built without INVARIANTS. */
 	}
 	proc_vector = malloc(size, M_TEMP, M_WAITOK);
 	if (proc_readmem(td, p, vptr, proc_vector, size) != size) {
 		free(proc_vector, M_TEMP);
 		return (ENOMEM);
 	}
 	*proc_vectorp = proc_vector;
 	*vsizep = vsize;
 
 	return (0);
 }
 
 #define GET_PS_STRINGS_CHUNK_SZ	256	/* Chunk size (bytes) for ps_strings operations. */
 
 static int
 get_ps_strings(struct thread *td, struct proc *p, struct sbuf *sb,
     enum proc_vector_type type)
 {
 	size_t done, len, nchr, vsize;
 	int error, i;
 	char **proc_vector, *sptr;
 	char pss_string[GET_PS_STRINGS_CHUNK_SZ];
 
 	PROC_ASSERT_HELD(p);
 
 	/*
 	 * We are not going to read more than 2 * (PATH_MAX + ARG_MAX) bytes.
 	 */
 	nchr = 2 * (PATH_MAX + ARG_MAX);
 
 	error = get_proc_vector(td, p, &proc_vector, &vsize, type);
 	if (error != 0)
 		return (error);
 	for (done = 0, i = 0; i < (int)vsize && done < nchr; i++) {
 		/*
 		 * The program may have scribbled into its argv array, e.g. to
 		 * remove some arguments.  If that has happened, break out
 		 * before trying to read from NULL.
 		 */
 		if (proc_vector[i] == NULL)
 			break;
 		for (sptr = proc_vector[i]; ; sptr += GET_PS_STRINGS_CHUNK_SZ) {
 			error = proc_read_string(td, p, sptr, pss_string,
 			    sizeof(pss_string));
 			if (error != 0)
 				goto done;
 			len = strnlen(pss_string, GET_PS_STRINGS_CHUNK_SZ);
 			if (done + len >= nchr)
 				len = nchr - done - 1;
 			sbuf_bcat(sb, pss_string, len);
 			if (len != GET_PS_STRINGS_CHUNK_SZ)
 				break;
 			done += GET_PS_STRINGS_CHUNK_SZ;
 		}
 		sbuf_bcat(sb, "", 1);
 		done += len + 1;
 	}
 done:
 	free(proc_vector, M_TEMP);
 	return (error);
 }
 
 int
 proc_getargv(struct thread *td, struct proc *p, struct sbuf *sb)
 {
 
 	return (get_ps_strings(curthread, p, sb, PROC_ARG));
 }
 
 int
 proc_getenvv(struct thread *td, struct proc *p, struct sbuf *sb)
 {
 
 	return (get_ps_strings(curthread, p, sb, PROC_ENV));
 }
 
 int
 proc_getauxv(struct thread *td, struct proc *p, struct sbuf *sb)
 {
 	size_t vsize, size;
 	char **auxv;
 	int error;
 
 	error = get_proc_vector(td, p, &auxv, &vsize, PROC_AUX);
 	if (error == 0) {
 #ifdef COMPAT_FREEBSD32
 		if (SV_PROC_FLAG(p, SV_ILP32) != 0)
 			size = vsize * sizeof(Elf32_Auxinfo);
 		else
 #endif
 			size = vsize * sizeof(Elf_Auxinfo);
 		if (sbuf_bcat(sb, auxv, size) != 0)
 			error = ENOMEM;
 		free(auxv, M_TEMP);
 	}
 	return (error);
 }
 
 /*
  * This sysctl allows a process to retrieve the argument list or process
  * title for another process without groping around in the address space
  * of the other process.  It also allow a process to set its own "process 
  * title to a string of its own choice.
  */
 static int
 sysctl_kern_proc_args(SYSCTL_HANDLER_ARGS)
 {
 	int *name = (int *)arg1;
 	u_int namelen = arg2;
 	struct pargs *newpa, *pa;
 	struct proc *p;
 	struct sbuf sb;
 	int flags, error = 0, error2;
 	pid_t pid;
 
 	if (namelen != 1)
 		return (EINVAL);
 
 	p = curproc;
 	pid = (pid_t)name[0];
 	if (pid == -1) {
 		pid = p->p_pid;
 	}
 
 	/*
 	 * If the query is for this process and it is single-threaded, there
 	 * is nobody to modify pargs, thus we can just read.
 	 */
 	if (pid == p->p_pid && p->p_numthreads == 1 && req->newptr == NULL &&
 	    (pa = p->p_args) != NULL)
 		return (SYSCTL_OUT(req, pa->ar_args, pa->ar_length));
 
 	flags = PGET_CANSEE;
 	if (req->newptr != NULL)
 		flags |= PGET_ISCURRENT;
 	error = pget(pid, flags, &p);
 	if (error)
 		return (error);
 
 	pa = p->p_args;
 	if (pa != NULL) {
 		pargs_hold(pa);
 		PROC_UNLOCK(p);
 		error = SYSCTL_OUT(req, pa->ar_args, pa->ar_length);
 		pargs_drop(pa);
 	} else if ((p->p_flag & (P_WEXIT | P_SYSTEM)) == 0) {
 		_PHOLD(p);
 		PROC_UNLOCK(p);
 		sbuf_new_for_sysctl(&sb, NULL, GET_PS_STRINGS_CHUNK_SZ, req);
 		sbuf_clear_flags(&sb, SBUF_INCLUDENUL);
 		error = proc_getargv(curthread, p, &sb);
 		error2 = sbuf_finish(&sb);
 		PRELE(p);
 		sbuf_delete(&sb);
 		if (error == 0 && error2 != 0)
 			error = error2;
 	} else {
 		PROC_UNLOCK(p);
 	}
 	if (error != 0 || req->newptr == NULL)
 		return (error);
 
 	if (req->newlen > ps_arg_cache_limit - sizeof(struct pargs))
 		return (ENOMEM);
 
 	if (req->newlen == 0) {
 		/*
 		 * Clear the argument pointer, so that we'll fetch arguments
 		 * with proc_getargv() until further notice.
 		 */
 		newpa = NULL;
 	} else {
 		newpa = pargs_alloc(req->newlen);
 		error = SYSCTL_IN(req, newpa->ar_args, req->newlen);
 		if (error != 0) {
 			pargs_free(newpa);
 			return (error);
 		}
 	}
 	PROC_LOCK(p);
 	pa = p->p_args;
 	p->p_args = newpa;
 	PROC_UNLOCK(p);
 	pargs_drop(pa);
 	return (0);
 }
 
 /*
  * This sysctl allows a process to retrieve environment of another process.
  */
 static int
 sysctl_kern_proc_env(SYSCTL_HANDLER_ARGS)
 {
 	int *name = (int *)arg1;
 	u_int namelen = arg2;
 	struct proc *p;
 	struct sbuf sb;
 	int error, error2;
 
 	if (namelen != 1)
 		return (EINVAL);
 
 	error = pget((pid_t)name[0], PGET_WANTREAD, &p);
 	if (error != 0)
 		return (error);
 	if ((p->p_flag & P_SYSTEM) != 0) {
 		PRELE(p);
 		return (0);
 	}
 
 	sbuf_new_for_sysctl(&sb, NULL, GET_PS_STRINGS_CHUNK_SZ, req);
 	sbuf_clear_flags(&sb, SBUF_INCLUDENUL);
 	error = proc_getenvv(curthread, p, &sb);
 	error2 = sbuf_finish(&sb);
 	PRELE(p);
 	sbuf_delete(&sb);
 	return (error != 0 ? error : error2);
 }
 
 /*
  * This sysctl allows a process to retrieve ELF auxiliary vector of
  * another process.
  */
 static int
 sysctl_kern_proc_auxv(SYSCTL_HANDLER_ARGS)
 {
 	int *name = (int *)arg1;
 	u_int namelen = arg2;
 	struct proc *p;
 	struct sbuf sb;
 	int error, error2;
 
 	if (namelen != 1)
 		return (EINVAL);
 
 	error = pget((pid_t)name[0], PGET_WANTREAD, &p);
 	if (error != 0)
 		return (error);
 	if ((p->p_flag & P_SYSTEM) != 0) {
 		PRELE(p);
 		return (0);
 	}
 	sbuf_new_for_sysctl(&sb, NULL, GET_PS_STRINGS_CHUNK_SZ, req);
 	sbuf_clear_flags(&sb, SBUF_INCLUDENUL);
 	error = proc_getauxv(curthread, p, &sb);
 	error2 = sbuf_finish(&sb);
 	PRELE(p);
 	sbuf_delete(&sb);
 	return (error != 0 ? error : error2);
 }
 
 /*
  * This sysctl allows a process to retrieve the path of the executable for
  * itself or another process.
  */
 static int
 sysctl_kern_proc_pathname(SYSCTL_HANDLER_ARGS)
 {
 	pid_t *pidp = (pid_t *)arg1;
 	unsigned int arglen = arg2;
 	struct proc *p;
 	struct vnode *vp;
 	char *retbuf, *freebuf;
 	int error;
 
 	if (arglen != 1)
 		return (EINVAL);
 	if (*pidp == -1) {	/* -1 means this process */
 		p = req->td->td_proc;
 	} else {
 		error = pget(*pidp, PGET_CANSEE, &p);
 		if (error != 0)
 			return (error);
 	}
 
 	vp = p->p_textvp;
 	if (vp == NULL) {
 		if (*pidp != -1)
 			PROC_UNLOCK(p);
 		return (0);
 	}
 	vref(vp);
 	if (*pidp != -1)
 		PROC_UNLOCK(p);
 	error = vn_fullpath(vp, &retbuf, &freebuf);
 	vrele(vp);
 	if (error)
 		return (error);
 	error = SYSCTL_OUT(req, retbuf, strlen(retbuf) + 1);
 	free(freebuf, M_TEMP);
 	return (error);
 }
 
 static int
 sysctl_kern_proc_sv_name(SYSCTL_HANDLER_ARGS)
 {
 	struct proc *p;
 	char *sv_name;
 	int *name;
 	int namelen;
 	int error;
 
 	namelen = arg2;
 	if (namelen != 1)
 		return (EINVAL);
 
 	name = (int *)arg1;
 	error = pget((pid_t)name[0], PGET_CANSEE, &p);
 	if (error != 0)
 		return (error);
 	sv_name = p->p_sysent->sv_name;
 	PROC_UNLOCK(p);
 	return (sysctl_handle_string(oidp, sv_name, 0, req));
 }
 
 #ifdef KINFO_OVMENTRY_SIZE
 CTASSERT(sizeof(struct kinfo_ovmentry) == KINFO_OVMENTRY_SIZE);
 #endif
 
 #ifdef COMPAT_FREEBSD7
 static int
 sysctl_kern_proc_ovmmap(SYSCTL_HANDLER_ARGS)
 {
 	vm_map_entry_t entry, tmp_entry;
 	unsigned int last_timestamp;
 	char *fullpath, *freepath;
 	struct kinfo_ovmentry *kve;
 	struct vattr va;
 	struct ucred *cred;
 	int error, *name;
 	struct vnode *vp;
 	struct proc *p;
 	vm_map_t map;
 	struct vmspace *vm;
 
 	name = (int *)arg1;
 	error = pget((pid_t)name[0], PGET_WANTREAD, &p);
 	if (error != 0)
 		return (error);
 	vm = vmspace_acquire_ref(p);
 	if (vm == NULL) {
 		PRELE(p);
 		return (ESRCH);
 	}
 	kve = malloc(sizeof(*kve), M_TEMP, M_WAITOK);
 
 	map = &vm->vm_map;
 	vm_map_lock_read(map);
 	VM_MAP_ENTRY_FOREACH(entry, map) {
 		vm_object_t obj, tobj, lobj;
 		vm_offset_t addr;
 
 		if (entry->eflags & MAP_ENTRY_IS_SUB_MAP)
 			continue;
 
 		bzero(kve, sizeof(*kve));
 		kve->kve_structsize = sizeof(*kve);
 
 		kve->kve_private_resident = 0;
 		obj = entry->object.vm_object;
 		if (obj != NULL) {
 			VM_OBJECT_RLOCK(obj);
 			if (obj->shadow_count == 1)
 				kve->kve_private_resident =
 				    obj->resident_page_count;
 		}
 		kve->kve_resident = 0;
 		addr = entry->start;
 		while (addr < entry->end) {
 			if (pmap_extract(map->pmap, addr))
 				kve->kve_resident++;
 			addr += PAGE_SIZE;
 		}
 
 		for (lobj = tobj = obj; tobj; tobj = tobj->backing_object) {
 			if (tobj != obj) {
 				VM_OBJECT_RLOCK(tobj);
 				kve->kve_offset += tobj->backing_object_offset;
 			}
 			if (lobj != obj)
 				VM_OBJECT_RUNLOCK(lobj);
 			lobj = tobj;
 		}
 
 		kve->kve_start = (void*)entry->start;
 		kve->kve_end = (void*)entry->end;
 		kve->kve_offset += (off_t)entry->offset;
 
 		if (entry->protection & VM_PROT_READ)
 			kve->kve_protection |= KVME_PROT_READ;
 		if (entry->protection & VM_PROT_WRITE)
 			kve->kve_protection |= KVME_PROT_WRITE;
 		if (entry->protection & VM_PROT_EXECUTE)
 			kve->kve_protection |= KVME_PROT_EXEC;
 
 		if (entry->eflags & MAP_ENTRY_COW)
 			kve->kve_flags |= KVME_FLAG_COW;
 		if (entry->eflags & MAP_ENTRY_NEEDS_COPY)
 			kve->kve_flags |= KVME_FLAG_NEEDS_COPY;
 		if (entry->eflags & MAP_ENTRY_NOCOREDUMP)
 			kve->kve_flags |= KVME_FLAG_NOCOREDUMP;
 
 		last_timestamp = map->timestamp;
 		vm_map_unlock_read(map);
 
 		kve->kve_fileid = 0;
 		kve->kve_fsid = 0;
 		freepath = NULL;
 		fullpath = "";
 		if (lobj) {
 			kve->kve_type = vm_object_kvme_type(lobj, &vp);
 			if (kve->kve_type == KVME_TYPE_MGTDEVICE)
 				kve->kve_type = KVME_TYPE_UNKNOWN;
 			if (vp != NULL)
 				vref(vp);
 			if (lobj != obj)
 				VM_OBJECT_RUNLOCK(lobj);
 
 			kve->kve_ref_count = obj->ref_count;
 			kve->kve_shadow_count = obj->shadow_count;
 			VM_OBJECT_RUNLOCK(obj);
 			if (vp != NULL) {
 				vn_fullpath(vp, &fullpath, &freepath);
 				cred = curthread->td_ucred;
 				vn_lock(vp, LK_SHARED | LK_RETRY);
 				if (VOP_GETATTR(vp, &va, cred) == 0) {
 					kve->kve_fileid = va.va_fileid;
 					/* truncate */
 					kve->kve_fsid = va.va_fsid;
 				}
 				vput(vp);
 			}
 		} else {
 			kve->kve_type = KVME_TYPE_NONE;
 			kve->kve_ref_count = 0;
 			kve->kve_shadow_count = 0;
 		}
 
 		strlcpy(kve->kve_path, fullpath, sizeof(kve->kve_path));
 		if (freepath != NULL)
 			free(freepath, M_TEMP);
 
 		error = SYSCTL_OUT(req, kve, sizeof(*kve));
 		vm_map_lock_read(map);
 		if (error)
 			break;
 		if (last_timestamp != map->timestamp) {
 			vm_map_lookup_entry(map, addr - 1, &tmp_entry);
 			entry = tmp_entry;
 		}
 	}
 	vm_map_unlock_read(map);
 	vmspace_free(vm);
 	PRELE(p);
 	free(kve, M_TEMP);
 	return (error);
 }
 #endif	/* COMPAT_FREEBSD7 */
 
 #ifdef KINFO_VMENTRY_SIZE
 CTASSERT(sizeof(struct kinfo_vmentry) == KINFO_VMENTRY_SIZE);
 #endif
 
 void
 kern_proc_vmmap_resident(vm_map_t map, vm_map_entry_t entry,
     int *resident_count, bool *super)
 {
 	vm_object_t obj, tobj;
 	vm_page_t m, m_adv;
 	vm_offset_t addr;
 	vm_paddr_t pa;
 	vm_pindex_t pi, pi_adv, pindex;
 
 	*super = false;
 	*resident_count = 0;
 	if (vmmap_skip_res_cnt)
 		return;
 
 	pa = 0;
 	obj = entry->object.vm_object;
 	addr = entry->start;
 	m_adv = NULL;
 	pi = OFF_TO_IDX(entry->offset);
 	for (; addr < entry->end; addr += IDX_TO_OFF(pi_adv), pi += pi_adv) {
 		if (m_adv != NULL) {
 			m = m_adv;
 		} else {
 			pi_adv = atop(entry->end - addr);
 			pindex = pi;
 			for (tobj = obj;; tobj = tobj->backing_object) {
 				m = vm_page_find_least(tobj, pindex);
 				if (m != NULL) {
 					if (m->pindex == pindex)
 						break;
 					if (pi_adv > m->pindex - pindex) {
 						pi_adv = m->pindex - pindex;
 						m_adv = m;
 					}
 				}
 				if (tobj->backing_object == NULL)
 					goto next;
 				pindex += OFF_TO_IDX(tobj->
 				    backing_object_offset);
 			}
 		}
 		m_adv = NULL;
 		if (m->psind != 0 && addr + pagesizes[1] <= entry->end &&
 		    (addr & (pagesizes[1] - 1)) == 0 &&
 		    (pmap_mincore(map->pmap, addr, &pa) & MINCORE_SUPER) != 0) {
 			*super = true;
 			pi_adv = atop(pagesizes[1]);
 		} else {
 			/*
 			 * We do not test the found page on validity.
 			 * Either the page is busy and being paged in,
 			 * or it was invalidated.  The first case
 			 * should be counted as resident, the second
 			 * is not so clear; we do account both.
 			 */
 			pi_adv = 1;
 		}
 		*resident_count += pi_adv;
 next:;
 	}
 }
 
 /*
  * Must be called with the process locked and will return unlocked.
  */
 int
 kern_proc_vmmap_out(struct proc *p, struct sbuf *sb, ssize_t maxlen, int flags)
 {
 	vm_map_entry_t entry, tmp_entry;
 	struct vattr va;
 	vm_map_t map;
 	vm_object_t lobj, nobj, obj, tobj;
 	char *fullpath, *freepath;
 	struct kinfo_vmentry *kve;
 	struct ucred *cred;
 	struct vnode *vp;
 	struct vmspace *vm;
 	vm_offset_t addr;
 	unsigned int last_timestamp;
 	int error;
 	bool guard, super;
 
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 
 	_PHOLD(p);
 	PROC_UNLOCK(p);
 	vm = vmspace_acquire_ref(p);
 	if (vm == NULL) {
 		PRELE(p);
 		return (ESRCH);
 	}
 	kve = malloc(sizeof(*kve), M_TEMP, M_WAITOK | M_ZERO);
 
 	error = 0;
 	map = &vm->vm_map;
 	vm_map_lock_read(map);
 	VM_MAP_ENTRY_FOREACH(entry, map) {
 		if (entry->eflags & MAP_ENTRY_IS_SUB_MAP)
 			continue;
 
 		addr = entry->end;
 		bzero(kve, sizeof(*kve));
 		obj = entry->object.vm_object;
 		if (obj != NULL) {
+			if ((obj->flags & OBJ_ANON) != 0)
+				kve->kve_obj = (uintptr_t)obj;
+
 			for (tobj = obj; tobj != NULL;
 			    tobj = tobj->backing_object) {
 				VM_OBJECT_RLOCK(tobj);
 				kve->kve_offset += tobj->backing_object_offset;
 				lobj = tobj;
 			}
 			if (obj->backing_object == NULL)
 				kve->kve_private_resident =
 				    obj->resident_page_count;
 			kern_proc_vmmap_resident(map, entry,
 			    &kve->kve_resident, &super);
 			if (super)
 				kve->kve_flags |= KVME_FLAG_SUPER;
 			for (tobj = obj; tobj != NULL; tobj = nobj) {
 				nobj = tobj->backing_object;
 				if (tobj != obj && tobj != lobj)
 					VM_OBJECT_RUNLOCK(tobj);
 			}
 		} else {
 			lobj = NULL;
 		}
 
 		kve->kve_start = entry->start;
 		kve->kve_end = entry->end;
 		kve->kve_offset += entry->offset;
 
 		if (entry->protection & VM_PROT_READ)
 			kve->kve_protection |= KVME_PROT_READ;
 		if (entry->protection & VM_PROT_WRITE)
 			kve->kve_protection |= KVME_PROT_WRITE;
 		if (entry->protection & VM_PROT_EXECUTE)
 			kve->kve_protection |= KVME_PROT_EXEC;
 
 		if (entry->eflags & MAP_ENTRY_COW)
 			kve->kve_flags |= KVME_FLAG_COW;
 		if (entry->eflags & MAP_ENTRY_NEEDS_COPY)
 			kve->kve_flags |= KVME_FLAG_NEEDS_COPY;
 		if (entry->eflags & MAP_ENTRY_NOCOREDUMP)
 			kve->kve_flags |= KVME_FLAG_NOCOREDUMP;
 		if (entry->eflags & MAP_ENTRY_GROWS_UP)
 			kve->kve_flags |= KVME_FLAG_GROWS_UP;
 		if (entry->eflags & MAP_ENTRY_GROWS_DOWN)
 			kve->kve_flags |= KVME_FLAG_GROWS_DOWN;
 		if (entry->eflags & MAP_ENTRY_USER_WIRED)
 			kve->kve_flags |= KVME_FLAG_USER_WIRED;
 
 		guard = (entry->eflags & MAP_ENTRY_GUARD) != 0;
 
 		last_timestamp = map->timestamp;
 		vm_map_unlock_read(map);
 
 		freepath = NULL;
 		fullpath = "";
 		if (lobj != NULL) {
 			kve->kve_type = vm_object_kvme_type(lobj, &vp);
 			if (vp != NULL)
 				vref(vp);
 			if (lobj != obj)
 				VM_OBJECT_RUNLOCK(lobj);
 
 			kve->kve_ref_count = obj->ref_count;
 			kve->kve_shadow_count = obj->shadow_count;
 			VM_OBJECT_RUNLOCK(obj);
 			if (vp != NULL) {
 				vn_fullpath(vp, &fullpath, &freepath);
 				kve->kve_vn_type = vntype_to_kinfo(vp->v_type);
 				cred = curthread->td_ucred;
 				vn_lock(vp, LK_SHARED | LK_RETRY);
 				if (VOP_GETATTR(vp, &va, cred) == 0) {
 					kve->kve_vn_fileid = va.va_fileid;
 					kve->kve_vn_fsid = va.va_fsid;
 					kve->kve_vn_fsid_freebsd11 =
 					    kve->kve_vn_fsid; /* truncate */
 					kve->kve_vn_mode =
 					    MAKEIMODE(va.va_type, va.va_mode);
 					kve->kve_vn_size = va.va_size;
 					kve->kve_vn_rdev = va.va_rdev;
 					kve->kve_vn_rdev_freebsd11 =
 					    kve->kve_vn_rdev; /* truncate */
 					kve->kve_status = KF_ATTR_VALID;
 				}
 				vput(vp);
 			}
 		} else {
 			kve->kve_type = guard ? KVME_TYPE_GUARD :
 			    KVME_TYPE_NONE;
 			kve->kve_ref_count = 0;
 			kve->kve_shadow_count = 0;
 		}
 
 		strlcpy(kve->kve_path, fullpath, sizeof(kve->kve_path));
 		if (freepath != NULL)
 			free(freepath, M_TEMP);
 
 		/* Pack record size down */
 		if ((flags & KERN_VMMAP_PACK_KINFO) != 0)
 			kve->kve_structsize =
 			    offsetof(struct kinfo_vmentry, kve_path) +
 			    strlen(kve->kve_path) + 1;
 		else
 			kve->kve_structsize = sizeof(*kve);
 		kve->kve_structsize = roundup(kve->kve_structsize,
 		    sizeof(uint64_t));
 
 		/* Halt filling and truncate rather than exceeding maxlen */
 		if (maxlen != -1 && maxlen < kve->kve_structsize) {
 			error = 0;
 			vm_map_lock_read(map);
 			break;
 		} else if (maxlen != -1)
 			maxlen -= kve->kve_structsize;
 
 		if (sbuf_bcat(sb, kve, kve->kve_structsize) != 0)
 			error = ENOMEM;
 		vm_map_lock_read(map);
 		if (error != 0)
 			break;
 		if (last_timestamp != map->timestamp) {
 			vm_map_lookup_entry(map, addr - 1, &tmp_entry);
 			entry = tmp_entry;
 		}
 	}
 	vm_map_unlock_read(map);
 	vmspace_free(vm);
 	PRELE(p);
 	free(kve, M_TEMP);
 	return (error);
 }
 
 static int
 sysctl_kern_proc_vmmap(SYSCTL_HANDLER_ARGS)
 {
 	struct proc *p;
 	struct sbuf sb;
 	int error, error2, *name;
 
 	name = (int *)arg1;
 	sbuf_new_for_sysctl(&sb, NULL, sizeof(struct kinfo_vmentry), req);
 	sbuf_clear_flags(&sb, SBUF_INCLUDENUL);
 	error = pget((pid_t)name[0], PGET_CANDEBUG | PGET_NOTWEXIT, &p);
 	if (error != 0) {
 		sbuf_delete(&sb);
 		return (error);
 	}
 	error = kern_proc_vmmap_out(p, &sb, -1, KERN_VMMAP_PACK_KINFO);
 	error2 = sbuf_finish(&sb);
 	sbuf_delete(&sb);
 	return (error != 0 ? error : error2);
 }
 
 #if defined(STACK) || defined(DDB)
 static int
 sysctl_kern_proc_kstack(SYSCTL_HANDLER_ARGS)
 {
 	struct kinfo_kstack *kkstp;
 	int error, i, *name, numthreads;
 	lwpid_t *lwpidarray;
 	struct thread *td;
 	struct stack *st;
 	struct sbuf sb;
 	struct proc *p;
 
 	name = (int *)arg1;
 	error = pget((pid_t)name[0], PGET_NOTINEXEC | PGET_WANTREAD, &p);
 	if (error != 0)
 		return (error);
 
 	kkstp = malloc(sizeof(*kkstp), M_TEMP, M_WAITOK);
 	st = stack_create(M_WAITOK);
 
 	lwpidarray = NULL;
 	PROC_LOCK(p);
 	do {
 		if (lwpidarray != NULL) {
 			free(lwpidarray, M_TEMP);
 			lwpidarray = NULL;
 		}
 		numthreads = p->p_numthreads;
 		PROC_UNLOCK(p);
 		lwpidarray = malloc(sizeof(*lwpidarray) * numthreads, M_TEMP,
 		    M_WAITOK | M_ZERO);
 		PROC_LOCK(p);
 	} while (numthreads < p->p_numthreads);
 
 	/*
 	 * XXXRW: During the below loop, execve(2) and countless other sorts
 	 * of changes could have taken place.  Should we check to see if the
 	 * vmspace has been replaced, or the like, in order to prevent
 	 * giving a snapshot that spans, say, execve(2), with some threads
 	 * before and some after?  Among other things, the credentials could
 	 * have changed, in which case the right to extract debug info might
 	 * no longer be assured.
 	 */
 	i = 0;
 	FOREACH_THREAD_IN_PROC(p, td) {
 		KASSERT(i < numthreads,
 		    ("sysctl_kern_proc_kstack: numthreads"));
 		lwpidarray[i] = td->td_tid;
 		i++;
 	}
 	PROC_UNLOCK(p);
 	numthreads = i;
 	for (i = 0; i < numthreads; i++) {
 		td = tdfind(lwpidarray[i], p->p_pid);
 		if (td == NULL) {
 			continue;
 		}
 		bzero(kkstp, sizeof(*kkstp));
 		(void)sbuf_new(&sb, kkstp->kkst_trace,
 		    sizeof(kkstp->kkst_trace), SBUF_FIXEDLEN);
 		thread_lock(td);
 		kkstp->kkst_tid = td->td_tid;
 		if (TD_IS_SWAPPED(td))
 			kkstp->kkst_state = KKST_STATE_SWAPPED;
 		else if (stack_save_td(st, td) == 0)
 			kkstp->kkst_state = KKST_STATE_STACKOK;
 		else
 			kkstp->kkst_state = KKST_STATE_RUNNING;
 		thread_unlock(td);
 		PROC_UNLOCK(p);
 		stack_sbuf_print(&sb, st);
 		sbuf_finish(&sb);
 		sbuf_delete(&sb);
 		error = SYSCTL_OUT(req, kkstp, sizeof(*kkstp));
 		if (error)
 			break;
 	}
 	PRELE(p);
 	if (lwpidarray != NULL)
 		free(lwpidarray, M_TEMP);
 	stack_destroy(st);
 	free(kkstp, M_TEMP);
 	return (error);
 }
 #endif
 
 /*
  * This sysctl allows a process to retrieve the full list of groups from
  * itself or another process.
  */
 static int
 sysctl_kern_proc_groups(SYSCTL_HANDLER_ARGS)
 {
 	pid_t *pidp = (pid_t *)arg1;
 	unsigned int arglen = arg2;
 	struct proc *p;
 	struct ucred *cred;
 	int error;
 
 	if (arglen != 1)
 		return (EINVAL);
 	if (*pidp == -1) {	/* -1 means this process */
 		p = req->td->td_proc;
 		PROC_LOCK(p);
 	} else {
 		error = pget(*pidp, PGET_CANSEE, &p);
 		if (error != 0)
 			return (error);
 	}
 
 	cred = crhold(p->p_ucred);
 	PROC_UNLOCK(p);
 
 	error = SYSCTL_OUT(req, cred->cr_groups,
 	    cred->cr_ngroups * sizeof(gid_t));
 	crfree(cred);
 	return (error);
 }
 
 /*
  * This sysctl allows a process to retrieve or/and set the resource limit for
  * another process.
  */
 static int
 sysctl_kern_proc_rlimit(SYSCTL_HANDLER_ARGS)
 {
 	int *name = (int *)arg1;
 	u_int namelen = arg2;
 	struct rlimit rlim;
 	struct proc *p;
 	u_int which;
 	int flags, error;
 
 	if (namelen != 2)
 		return (EINVAL);
 
 	which = (u_int)name[1];
 	if (which >= RLIM_NLIMITS)
 		return (EINVAL);
 
 	if (req->newptr != NULL && req->newlen != sizeof(rlim))
 		return (EINVAL);
 
 	flags = PGET_HOLD | PGET_NOTWEXIT;
 	if (req->newptr != NULL)
 		flags |= PGET_CANDEBUG;
 	else
 		flags |= PGET_CANSEE;
 	error = pget((pid_t)name[0], flags, &p);
 	if (error != 0)
 		return (error);
 
 	/*
 	 * Retrieve limit.
 	 */
 	if (req->oldptr != NULL) {
 		PROC_LOCK(p);
 		lim_rlimit_proc(p, which, &rlim);
 		PROC_UNLOCK(p);
 	}
 	error = SYSCTL_OUT(req, &rlim, sizeof(rlim));
 	if (error != 0)
 		goto errout;
 
 	/*
 	 * Set limit.
 	 */
 	if (req->newptr != NULL) {
 		error = SYSCTL_IN(req, &rlim, sizeof(rlim));
 		if (error == 0)
 			error = kern_proc_setrlimit(curthread, p, which, &rlim);
 	}
 
 errout:
 	PRELE(p);
 	return (error);
 }
 
 /*
  * This sysctl allows a process to retrieve ps_strings structure location of
  * another process.
  */
 static int
 sysctl_kern_proc_ps_strings(SYSCTL_HANDLER_ARGS)
 {
 	int *name = (int *)arg1;
 	u_int namelen = arg2;
 	struct proc *p;
 	vm_offset_t ps_strings;
 	int error;
 #ifdef COMPAT_FREEBSD32
 	uint32_t ps_strings32;
 #endif
 
 	if (namelen != 1)
 		return (EINVAL);
 
 	error = pget((pid_t)name[0], PGET_CANDEBUG, &p);
 	if (error != 0)
 		return (error);
 #ifdef COMPAT_FREEBSD32
 	if ((req->flags & SCTL_MASK32) != 0) {
 		/*
 		 * We return 0 if the 32 bit emulation request is for a 64 bit
 		 * process.
 		 */
 		ps_strings32 = SV_PROC_FLAG(p, SV_ILP32) != 0 ?
 		    PTROUT(p->p_sysent->sv_psstrings) : 0;
 		PROC_UNLOCK(p);
 		error = SYSCTL_OUT(req, &ps_strings32, sizeof(ps_strings32));
 		return (error);
 	}
 #endif
 	ps_strings = p->p_sysent->sv_psstrings;
 	PROC_UNLOCK(p);
 	error = SYSCTL_OUT(req, &ps_strings, sizeof(ps_strings));
 	return (error);
 }
 
 /*
  * This sysctl allows a process to retrieve umask of another process.
  */
 static int
 sysctl_kern_proc_umask(SYSCTL_HANDLER_ARGS)
 {
 	int *name = (int *)arg1;
 	u_int namelen = arg2;
 	struct proc *p;
 	int error;
 	u_short cmask;
 	pid_t pid;
 
 	if (namelen != 1)
 		return (EINVAL);
 
 	pid = (pid_t)name[0];
 	p = curproc;
 	if (pid == p->p_pid || pid == 0) {
 		cmask = p->p_pd->pd_cmask;
 		goto out;
 	}
 
 	error = pget(pid, PGET_WANTREAD, &p);
 	if (error != 0)
 		return (error);
 
 	cmask = p->p_pd->pd_cmask;
 	PRELE(p);
 out:
 	error = SYSCTL_OUT(req, &cmask, sizeof(cmask));
 	return (error);
 }
 
 /*
  * This sysctl allows a process to set and retrieve binary osreldate of
  * another process.
  */
 static int
 sysctl_kern_proc_osrel(SYSCTL_HANDLER_ARGS)
 {
 	int *name = (int *)arg1;
 	u_int namelen = arg2;
 	struct proc *p;
 	int flags, error, osrel;
 
 	if (namelen != 1)
 		return (EINVAL);
 
 	if (req->newptr != NULL && req->newlen != sizeof(osrel))
 		return (EINVAL);
 
 	flags = PGET_HOLD | PGET_NOTWEXIT;
 	if (req->newptr != NULL)
 		flags |= PGET_CANDEBUG;
 	else
 		flags |= PGET_CANSEE;
 	error = pget((pid_t)name[0], flags, &p);
 	if (error != 0)
 		return (error);
 
 	error = SYSCTL_OUT(req, &p->p_osrel, sizeof(p->p_osrel));
 	if (error != 0)
 		goto errout;
 
 	if (req->newptr != NULL) {
 		error = SYSCTL_IN(req, &osrel, sizeof(osrel));
 		if (error != 0)
 			goto errout;
 		if (osrel < 0) {
 			error = EINVAL;
 			goto errout;
 		}
 		p->p_osrel = osrel;
 	}
 errout:
 	PRELE(p);
 	return (error);
 }
 
 static int
 sysctl_kern_proc_sigtramp(SYSCTL_HANDLER_ARGS)
 {
 	int *name = (int *)arg1;
 	u_int namelen = arg2;
 	struct proc *p;
 	struct kinfo_sigtramp kst;
 	const struct sysentvec *sv;
 	int error;
 #ifdef COMPAT_FREEBSD32
 	struct kinfo_sigtramp32 kst32;
 #endif
 
 	if (namelen != 1)
 		return (EINVAL);
 
 	error = pget((pid_t)name[0], PGET_CANDEBUG, &p);
 	if (error != 0)
 		return (error);
 	sv = p->p_sysent;
 #ifdef COMPAT_FREEBSD32
 	if ((req->flags & SCTL_MASK32) != 0) {
 		bzero(&kst32, sizeof(kst32));
 		if (SV_PROC_FLAG(p, SV_ILP32)) {
 			if (sv->sv_sigcode_base != 0) {
 				kst32.ksigtramp_start = sv->sv_sigcode_base;
 				kst32.ksigtramp_end = sv->sv_sigcode_base +
 				    *sv->sv_szsigcode;
 			} else {
 				kst32.ksigtramp_start = sv->sv_psstrings -
 				    *sv->sv_szsigcode;
 				kst32.ksigtramp_end = sv->sv_psstrings;
 			}
 		}
 		PROC_UNLOCK(p);
 		error = SYSCTL_OUT(req, &kst32, sizeof(kst32));
 		return (error);
 	}
 #endif
 	bzero(&kst, sizeof(kst));
 	if (sv->sv_sigcode_base != 0) {
 		kst.ksigtramp_start = (char *)sv->sv_sigcode_base;
 		kst.ksigtramp_end = (char *)sv->sv_sigcode_base +
 		    *sv->sv_szsigcode;
 	} else {
 		kst.ksigtramp_start = (char *)sv->sv_psstrings -
 		    *sv->sv_szsigcode;
 		kst.ksigtramp_end = (char *)sv->sv_psstrings;
 	}
 	PROC_UNLOCK(p);
 	error = SYSCTL_OUT(req, &kst, sizeof(kst));
 	return (error);
 }
 
 static int
 sysctl_kern_proc_sigfastblk(SYSCTL_HANDLER_ARGS)
 {
 	int *name = (int *)arg1;
 	u_int namelen = arg2;
 	pid_t pid;
 	struct proc *p;
 	struct thread *td1;
 	uintptr_t addr;
 #ifdef COMPAT_FREEBSD32
 	uint32_t addr32;
 #endif
 	int error;
 
 	if (namelen != 1 || req->newptr != NULL)
 		return (EINVAL);
 
 	pid = (pid_t)name[0];
 	error = pget(pid, PGET_HOLD | PGET_NOTWEXIT | PGET_CANDEBUG, &p);
 	if (error != 0)
 		return (error);
 
 	PROC_LOCK(p);
 #ifdef COMPAT_FREEBSD32
 	if (SV_CURPROC_FLAG(SV_ILP32)) {
 		if (!SV_PROC_FLAG(p, SV_ILP32)) {
 			error = EINVAL;
 			goto errlocked;
 		}
 	}
 #endif
 	if (pid <= PID_MAX) {
 		td1 = FIRST_THREAD_IN_PROC(p);
 	} else {
 		FOREACH_THREAD_IN_PROC(p, td1) {
 			if (td1->td_tid == pid)
 				break;
 		}
 	}
 	if (td1 == NULL) {
 		error = ESRCH;
 		goto errlocked;
 	}
 	/*
 	 * The access to the private thread flags.  It is fine as far
 	 * as no out-of-thin-air values are read from td_pflags, and
 	 * usermode read of the td_sigblock_ptr is racy inherently,
 	 * since target process might have already changed it
 	 * meantime.
 	 */
 	if ((td1->td_pflags & TDP_SIGFASTBLOCK) != 0)
 		addr = (uintptr_t)td1->td_sigblock_ptr;
 	else
 		error = ENOTTY;
 
 errlocked:
 	_PRELE(p);
 	PROC_UNLOCK(p);
 	if (error != 0)
 		return (error);
 
 #ifdef COMPAT_FREEBSD32
 	if (SV_CURPROC_FLAG(SV_ILP32)) {
 		addr32 = addr;
 		error = SYSCTL_OUT(req, &addr32, sizeof(addr32));
 	} else
 #endif
 		error = SYSCTL_OUT(req, &addr, sizeof(addr));
 	return (error);
 }
 
 SYSCTL_NODE(_kern, KERN_PROC, proc, CTLFLAG_RD | CTLFLAG_MPSAFE,  0,
     "Process table");
 
 SYSCTL_PROC(_kern_proc, KERN_PROC_ALL, all, CTLFLAG_RD|CTLTYPE_STRUCT|
 	CTLFLAG_MPSAFE, 0, 0, sysctl_kern_proc, "S,proc",
 	"Return entire process table");
 
 static SYSCTL_NODE(_kern_proc, KERN_PROC_GID, gid, CTLFLAG_RD | CTLFLAG_MPSAFE,
 	sysctl_kern_proc, "Process table");
 
 static SYSCTL_NODE(_kern_proc, KERN_PROC_PGRP, pgrp, CTLFLAG_RD | CTLFLAG_MPSAFE,
 	sysctl_kern_proc, "Process table");
 
 static SYSCTL_NODE(_kern_proc, KERN_PROC_RGID, rgid, CTLFLAG_RD | CTLFLAG_MPSAFE,
 	sysctl_kern_proc, "Process table");
 
 static SYSCTL_NODE(_kern_proc, KERN_PROC_SESSION, sid, CTLFLAG_RD |
 	CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table");
 
 static SYSCTL_NODE(_kern_proc, KERN_PROC_TTY, tty, CTLFLAG_RD | CTLFLAG_MPSAFE,
 	sysctl_kern_proc, "Process table");
 
 static SYSCTL_NODE(_kern_proc, KERN_PROC_UID, uid, CTLFLAG_RD | CTLFLAG_MPSAFE,
 	sysctl_kern_proc, "Process table");
 
 static SYSCTL_NODE(_kern_proc, KERN_PROC_RUID, ruid, CTLFLAG_RD | CTLFLAG_MPSAFE,
 	sysctl_kern_proc, "Process table");
 
 static SYSCTL_NODE(_kern_proc, KERN_PROC_PID, pid, CTLFLAG_RD | CTLFLAG_MPSAFE,
 	sysctl_kern_proc, "Process table");
 
 static SYSCTL_NODE(_kern_proc, KERN_PROC_PROC, proc, CTLFLAG_RD | CTLFLAG_MPSAFE,
 	sysctl_kern_proc, "Return process table, no threads");
 
 static SYSCTL_NODE(_kern_proc, KERN_PROC_ARGS, args,
 	CTLFLAG_RW | CTLFLAG_CAPWR | CTLFLAG_ANYBODY | CTLFLAG_MPSAFE,
 	sysctl_kern_proc_args, "Process argument list");
 
 static SYSCTL_NODE(_kern_proc, KERN_PROC_ENV, env, CTLFLAG_RD | CTLFLAG_MPSAFE,
 	sysctl_kern_proc_env, "Process environment");
 
 static SYSCTL_NODE(_kern_proc, KERN_PROC_AUXV, auxv, CTLFLAG_RD |
 	CTLFLAG_MPSAFE, sysctl_kern_proc_auxv, "Process ELF auxiliary vector");
 
 static SYSCTL_NODE(_kern_proc, KERN_PROC_PATHNAME, pathname, CTLFLAG_RD |
 	CTLFLAG_MPSAFE, sysctl_kern_proc_pathname, "Process executable path");
 
 static SYSCTL_NODE(_kern_proc, KERN_PROC_SV_NAME, sv_name, CTLFLAG_RD |
 	CTLFLAG_MPSAFE, sysctl_kern_proc_sv_name,
 	"Process syscall vector name (ABI type)");
 
 static SYSCTL_NODE(_kern_proc, (KERN_PROC_GID | KERN_PROC_INC_THREAD), gid_td,
 	CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table");
 
 static SYSCTL_NODE(_kern_proc, (KERN_PROC_PGRP | KERN_PROC_INC_THREAD), pgrp_td,
 	CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table");
 
 static SYSCTL_NODE(_kern_proc, (KERN_PROC_RGID | KERN_PROC_INC_THREAD), rgid_td,
 	CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table");
 
 static SYSCTL_NODE(_kern_proc, (KERN_PROC_SESSION | KERN_PROC_INC_THREAD),
 	sid_td, CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table");
 
 static SYSCTL_NODE(_kern_proc, (KERN_PROC_TTY | KERN_PROC_INC_THREAD), tty_td,
 	CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table");
 
 static SYSCTL_NODE(_kern_proc, (KERN_PROC_UID | KERN_PROC_INC_THREAD), uid_td,
 	CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table");
 
 static SYSCTL_NODE(_kern_proc, (KERN_PROC_RUID | KERN_PROC_INC_THREAD), ruid_td,
 	CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table");
 
 static SYSCTL_NODE(_kern_proc, (KERN_PROC_PID | KERN_PROC_INC_THREAD), pid_td,
 	CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table");
 
 static SYSCTL_NODE(_kern_proc, (KERN_PROC_PROC | KERN_PROC_INC_THREAD), proc_td,
 	CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc,
 	"Return process table, including threads");
 
 #ifdef COMPAT_FREEBSD7
 static SYSCTL_NODE(_kern_proc, KERN_PROC_OVMMAP, ovmmap, CTLFLAG_RD |
 	CTLFLAG_MPSAFE, sysctl_kern_proc_ovmmap, "Old Process vm map entries");
 #endif
 
 static SYSCTL_NODE(_kern_proc, KERN_PROC_VMMAP, vmmap, CTLFLAG_RD |
 	CTLFLAG_MPSAFE, sysctl_kern_proc_vmmap, "Process vm map entries");
 
 #if defined(STACK) || defined(DDB)
 static SYSCTL_NODE(_kern_proc, KERN_PROC_KSTACK, kstack, CTLFLAG_RD |
 	CTLFLAG_MPSAFE, sysctl_kern_proc_kstack, "Process kernel stacks");
 #endif
 
 static SYSCTL_NODE(_kern_proc, KERN_PROC_GROUPS, groups, CTLFLAG_RD |
 	CTLFLAG_MPSAFE, sysctl_kern_proc_groups, "Process groups");
 
 static SYSCTL_NODE(_kern_proc, KERN_PROC_RLIMIT, rlimit, CTLFLAG_RW |
 	CTLFLAG_ANYBODY | CTLFLAG_MPSAFE, sysctl_kern_proc_rlimit,
 	"Process resource limits");
 
 static SYSCTL_NODE(_kern_proc, KERN_PROC_PS_STRINGS, ps_strings, CTLFLAG_RD |
 	CTLFLAG_MPSAFE, sysctl_kern_proc_ps_strings,
 	"Process ps_strings location");
 
 static SYSCTL_NODE(_kern_proc, KERN_PROC_UMASK, umask, CTLFLAG_RD |
 	CTLFLAG_MPSAFE, sysctl_kern_proc_umask, "Process umask");
 
 static SYSCTL_NODE(_kern_proc, KERN_PROC_OSREL, osrel, CTLFLAG_RW |
 	CTLFLAG_ANYBODY | CTLFLAG_MPSAFE, sysctl_kern_proc_osrel,
 	"Process binary osreldate");
 
 static SYSCTL_NODE(_kern_proc, KERN_PROC_SIGTRAMP, sigtramp, CTLFLAG_RD |
 	CTLFLAG_MPSAFE, sysctl_kern_proc_sigtramp,
 	"Process signal trampoline location");
 
 static SYSCTL_NODE(_kern_proc, KERN_PROC_SIGFASTBLK, sigfastblk, CTLFLAG_RD |
 	CTLFLAG_ANYBODY | CTLFLAG_MPSAFE, sysctl_kern_proc_sigfastblk,
 	"Thread sigfastblock address");
 
 int allproc_gen;
 
 /*
  * stop_all_proc() purpose is to stop all process which have usermode,
  * except current process for obvious reasons.  This makes it somewhat
  * unreliable when invoked from multithreaded process.  The service
  * must not be user-callable anyway.
  */
 void
 stop_all_proc(void)
 {
 	struct proc *cp, *p;
 	int r, gen;
 	bool restart, seen_stopped, seen_exiting, stopped_some;
 
 	cp = curproc;
 allproc_loop:
 	sx_xlock(&allproc_lock);
 	gen = allproc_gen;
 	seen_exiting = seen_stopped = stopped_some = restart = false;
 	LIST_REMOVE(cp, p_list);
 	LIST_INSERT_HEAD(&allproc, cp, p_list);
 	for (;;) {
 		p = LIST_NEXT(cp, p_list);
 		if (p == NULL)
 			break;
 		LIST_REMOVE(cp, p_list);
 		LIST_INSERT_AFTER(p, cp, p_list);
 		PROC_LOCK(p);
 		if ((p->p_flag & (P_KPROC | P_SYSTEM | P_TOTAL_STOP)) != 0) {
 			PROC_UNLOCK(p);
 			continue;
 		}
 		if ((p->p_flag & P_WEXIT) != 0) {
 			seen_exiting = true;
 			PROC_UNLOCK(p);
 			continue;
 		}
 		if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) {
 			/*
 			 * Stopped processes are tolerated when there
 			 * are no other processes which might continue
 			 * them.  P_STOPPED_SINGLE but not
 			 * P_TOTAL_STOP process still has at least one
 			 * thread running.
 			 */
 			seen_stopped = true;
 			PROC_UNLOCK(p);
 			continue;
 		}
 		sx_xunlock(&allproc_lock);
 		_PHOLD(p);
 		r = thread_single(p, SINGLE_ALLPROC);
 		if (r != 0)
 			restart = true;
 		else
 			stopped_some = true;
 		_PRELE(p);
 		PROC_UNLOCK(p);
 		sx_xlock(&allproc_lock);
 	}
 	/* Catch forked children we did not see in iteration. */
 	if (gen != allproc_gen)
 		restart = true;
 	sx_xunlock(&allproc_lock);
 	if (restart || stopped_some || seen_exiting || seen_stopped) {
 		kern_yield(PRI_USER);
 		goto allproc_loop;
 	}
 }
 
 void
 resume_all_proc(void)
 {
 	struct proc *cp, *p;
 
 	cp = curproc;
 	sx_xlock(&allproc_lock);
 again:
 	LIST_REMOVE(cp, p_list);
 	LIST_INSERT_HEAD(&allproc, cp, p_list);
 	for (;;) {
 		p = LIST_NEXT(cp, p_list);
 		if (p == NULL)
 			break;
 		LIST_REMOVE(cp, p_list);
 		LIST_INSERT_AFTER(p, cp, p_list);
 		PROC_LOCK(p);
 		if ((p->p_flag & P_TOTAL_STOP) != 0) {
 			sx_xunlock(&allproc_lock);
 			_PHOLD(p);
 			thread_single_end(p, SINGLE_ALLPROC);
 			_PRELE(p);
 			PROC_UNLOCK(p);
 			sx_xlock(&allproc_lock);
 		} else {
 			PROC_UNLOCK(p);
 		}
 	}
 	/*  Did the loop above missed any stopped process ? */
 	FOREACH_PROC_IN_SYSTEM(p) {
 		/* No need for proc lock. */
 		if ((p->p_flag & P_TOTAL_STOP) != 0)
 			goto again;
 	}
 	sx_xunlock(&allproc_lock);
 }
 
 /* #define	TOTAL_STOP_DEBUG	1 */
 #ifdef TOTAL_STOP_DEBUG
 volatile static int ap_resume;
 #include <sys/mount.h>
 
 static int
 sysctl_debug_stop_all_proc(SYSCTL_HANDLER_ARGS)
 {
 	int error, val;
 
 	val = 0;
 	ap_resume = 0;
 	error = sysctl_handle_int(oidp, &val, 0, req);
 	if (error != 0 || req->newptr == NULL)
 		return (error);
 	if (val != 0) {
 		stop_all_proc();
 		syncer_suspend();
 		while (ap_resume == 0)
 			;
 		syncer_resume();
 		resume_all_proc();
 	}
 	return (0);
 }
 
 SYSCTL_PROC(_debug, OID_AUTO, stop_all_proc, CTLTYPE_INT | CTLFLAG_RW |
     CTLFLAG_MPSAFE, __DEVOLATILE(int *, &ap_resume), 0,
     sysctl_debug_stop_all_proc, "I",
     "");
 #endif
diff --git a/sys/sys/user.h b/sys/sys/user.h
index f882e7dcd699..14471c91572f 100644
--- a/sys/sys/user.h
+++ b/sys/sys/user.h
@@ -1,621 +1,633 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1982, 1986, 1989, 1991, 1993
  *	The Regents of the University of California.
  * Copyright (c) 2007 Robert N. M. Watson
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)user.h	8.2 (Berkeley) 9/23/93
  * $FreeBSD$
  */
 
 #ifndef _SYS_USER_H_
 #define _SYS_USER_H_
 
 #include <machine/pcb.h>
 #ifndef _KERNEL
 /* stuff that *used* to be included by user.h, or is now needed */
 #include <sys/errno.h>
 #include <sys/time.h>
 #include <sys/resource.h>
 #include <sys/ucred.h>
 #include <sys/uio.h>
 #include <sys/queue.h>
 #include <sys/_lock.h>
 #include <sys/_mutex.h>
 #include <sys/proc.h>
 #include <vm/vm.h>		/* XXX */
 #include <vm/vm_param.h>	/* XXX */
 #include <vm/pmap.h>		/* XXX */
 #include <vm/vm_map.h>		/* XXX */
 #endif /* !_KERNEL */
 #ifndef _SYS_RESOURCEVAR_H_
 #include <sys/resourcevar.h>
 #endif
 #ifndef _SYS_SIGNALVAR_H_
 #include <sys/signalvar.h>
 #endif
 #ifndef _SYS_SOCKET_VAR_H_
 #include <sys/socket.h>
 #endif
 #include <sys/caprights.h>
 
 /*
  * KERN_PROC subtype ops return arrays of selected proc structure entries:
  *
  * This struct includes several arrays of spare space, with different arrays
  * for different standard C-types.  When adding new variables to this struct,
  * the space for byte-aligned data should be taken from the ki_sparestring,
  * pointers from ki_spareptrs, word-aligned data from ki_spareints, and
  * doubleword-aligned data from ki_sparelongs.  Make sure the space for new
  * variables come from the array which matches the size and alignment of
  * those variables on ALL hardware platforms, and then adjust the appropriate
  * KI_NSPARE_* value(s) to match.
  *
  * Always verify that sizeof(struct kinfo_proc) == KINFO_PROC_SIZE on all
  * platforms after you have added new variables.  Note that if you change
  * the value of KINFO_PROC_SIZE, then many userland programs will stop
  * working until they are recompiled!
  *
  * Once you have added the new field, you will need to add code to initialize
  * it in two places: function fill_kinfo_proc in sys/kern/kern_proc.c and
  * function kvm_proclist in lib/libkvm/kvm_proc.c .
  */
 #define	KI_NSPARE_INT	2
 #define	KI_NSPARE_LONG	12
 #define	KI_NSPARE_PTR	5
 
 #ifndef _KERNEL
 #ifndef KINFO_PROC_SIZE
 #error "Unknown architecture"
 #endif
 #endif /* !_KERNEL */
 
 #define	WMESGLEN	8		/* size of returned wchan message */
 #define	LOCKNAMELEN	8		/* size of returned lock name */
 #define	TDNAMLEN	16		/* size of returned thread name */
 #define	COMMLEN		19		/* size of returned ki_comm name */
 #define	KI_EMULNAMELEN	16		/* size of returned ki_emul */
 #define	KI_NGROUPS	16		/* number of groups in ki_groups */
 #define	LOGNAMELEN	17		/* size of returned ki_login */
 #define	LOGINCLASSLEN	17		/* size of returned ki_loginclass */
 
 #ifndef BURN_BRIDGES
 #define	OCOMMLEN	TDNAMLEN	
 #define	ki_ocomm	ki_tdname
 #endif
 
 /* Flags for the process credential. */
 #define	KI_CRF_CAPABILITY_MODE	0x00000001
 /*
  * Steal a bit from ki_cr_flags to indicate that the cred had more than
  * KI_NGROUPS groups.
  */
 #define KI_CRF_GRP_OVERFLOW	0x80000000
 
 struct kinfo_proc {
 	int	ki_structsize;		/* size of this structure */
 	int	ki_layout;		/* reserved: layout identifier */
 	struct	pargs *ki_args;		/* address of command arguments */
 	struct	proc *ki_paddr;		/* address of proc */
 	struct	user *ki_addr;		/* kernel virtual addr of u-area */
 	struct	vnode *ki_tracep;	/* pointer to trace file */
 	struct	vnode *ki_textvp;	/* pointer to executable file */
 	struct	filedesc *ki_fd;	/* pointer to open file info */
 	struct	vmspace *ki_vmspace;	/* pointer to kernel vmspace struct */
 	const void *ki_wchan;		/* sleep address */
 	pid_t	ki_pid;			/* Process identifier */
 	pid_t	ki_ppid;		/* parent process id */
 	pid_t	ki_pgid;		/* process group id */
 	pid_t	ki_tpgid;		/* tty process group id */
 	pid_t	ki_sid;			/* Process session ID */
 	pid_t	ki_tsid;		/* Terminal session ID */
 	short	ki_jobc;		/* job control counter */
 	short	ki_spare_short1;	/* unused (just here for alignment) */
 	uint32_t ki_tdev_freebsd11;	/* controlling tty dev */
 	sigset_t ki_siglist;		/* Signals arrived but not delivered */
 	sigset_t ki_sigmask;		/* Current signal mask */
 	sigset_t ki_sigignore;		/* Signals being ignored */
 	sigset_t ki_sigcatch;		/* Signals being caught by user */
 	uid_t	ki_uid;			/* effective user id */
 	uid_t	ki_ruid;		/* Real user id */
 	uid_t	ki_svuid;		/* Saved effective user id */
 	gid_t	ki_rgid;		/* Real group id */
 	gid_t	ki_svgid;		/* Saved effective group id */
 	short	ki_ngroups;		/* number of groups */
 	short	ki_spare_short2;	/* unused (just here for alignment) */
 	gid_t	ki_groups[KI_NGROUPS];	/* groups */
 	vm_size_t ki_size;		/* virtual size */
 	segsz_t ki_rssize;		/* current resident set size in pages */
 	segsz_t ki_swrss;		/* resident set size before last swap */
 	segsz_t ki_tsize;		/* text size (pages) XXX */
 	segsz_t ki_dsize;		/* data size (pages) XXX */
 	segsz_t ki_ssize;		/* stack size (pages) */
 	u_short	ki_xstat;		/* Exit status for wait & stop signal */
 	u_short	ki_acflag;		/* Accounting flags */
 	fixpt_t	ki_pctcpu;	 	/* %cpu for process during ki_swtime */
 	u_int	ki_estcpu;	 	/* Time averaged value of ki_cpticks */
 	u_int	ki_slptime;	 	/* Time since last blocked */
 	u_int	ki_swtime;	 	/* Time swapped in or out */
 	u_int	ki_cow;			/* number of copy-on-write faults */
 	u_int64_t ki_runtime;		/* Real time in microsec */
 	struct	timeval ki_start;	/* starting time */
 	struct	timeval ki_childtime;	/* time used by process children */
 	long	ki_flag;		/* P_* flags */
 	long	ki_kiflag;		/* KI_* flags (below) */
 	int	ki_traceflag;		/* Kernel trace points */
 	char	ki_stat;		/* S* process status */
 	signed char ki_nice;		/* Process "nice" value */
 	char	ki_lock;		/* Process lock (prevent swap) count */
 	char	ki_rqindex;		/* Run queue index */
 	u_char	ki_oncpu_old;		/* Which cpu we are on (legacy) */
 	u_char	ki_lastcpu_old;		/* Last cpu we were on (legacy) */
 	char	ki_tdname[TDNAMLEN+1];	/* thread name */
 	char	ki_wmesg[WMESGLEN+1];	/* wchan message */
 	char	ki_login[LOGNAMELEN+1];	/* setlogin name */
 	char	ki_lockname[LOCKNAMELEN+1]; /* lock name */
 	char	ki_comm[COMMLEN+1];	/* command name */
 	char	ki_emul[KI_EMULNAMELEN+1];  /* emulation name */
 	char	ki_loginclass[LOGINCLASSLEN+1]; /* login class */
 	char	ki_moretdname[MAXCOMLEN-TDNAMLEN+1];	/* more thread name */
 	/*
 	 * When adding new variables, take space for char-strings from the
 	 * front of ki_sparestrings, and ints from the end of ki_spareints.
 	 * That way the spare room from both arrays will remain contiguous.
 	 */
 	char	ki_sparestrings[46];	/* spare string space */
 	int	ki_spareints[KI_NSPARE_INT];	/* spare room for growth */
 	uint64_t ki_tdev;		/* controlling tty dev */
 	int	ki_oncpu;		/* Which cpu we are on */
 	int	ki_lastcpu;		/* Last cpu we were on */
 	int	ki_tracer;		/* Pid of tracing process */
 	int	ki_flag2;		/* P2_* flags */
 	int	ki_fibnum;		/* Default FIB number */
 	u_int	ki_cr_flags;		/* Credential flags */
 	int	ki_jid;			/* Process jail ID */
 	int	ki_numthreads;		/* XXXKSE number of threads in total */
 	lwpid_t	ki_tid;			/* XXXKSE thread id */
 	struct	priority ki_pri;	/* process priority */
 	struct	rusage ki_rusage;	/* process rusage statistics */
 	/* XXX - most fields in ki_rusage_ch are not (yet) filled in */
 	struct	rusage ki_rusage_ch;	/* rusage of children processes */
 	struct	pcb *ki_pcb;		/* kernel virtual addr of pcb */
 	void	*ki_kstack;		/* kernel virtual addr of stack */
 	void	*ki_udata;		/* User convenience pointer */
 	struct	thread *ki_tdaddr;	/* address of thread */
 	/*
 	 * When adding new variables, take space for pointers from the
 	 * front of ki_spareptrs, and longs from the end of ki_sparelongs.
 	 * That way the spare room from both arrays will remain contiguous.
 	 */
 	struct	pwddesc *ki_pd;	/* pointer to process paths info */
 	void	*ki_spareptrs[KI_NSPARE_PTR];	/* spare room for growth */
 	long	ki_sparelongs[KI_NSPARE_LONG];	/* spare room for growth */
 	long	ki_sflag;		/* PS_* flags */
 	long	ki_tdflags;		/* XXXKSE kthread flag */
 };
 void fill_kinfo_proc(struct proc *, struct kinfo_proc *);
 /* XXX - the following two defines are temporary */
 #define	ki_childstime	ki_rusage_ch.ru_stime
 #define	ki_childutime	ki_rusage_ch.ru_utime
 
 /*
  *  Legacy PS_ flag.  This moved to p_flag but is maintained for
  *  compatibility.
  */
 #define	PS_INMEM	0x00001		/* Loaded into memory. */
 
 /* ki_sessflag values */
 #define	KI_CTTY		0x00000001	/* controlling tty vnode active */
 #define	KI_SLEADER	0x00000002	/* session leader */
 #define	KI_LOCKBLOCK	0x00000004	/* proc blocked on lock ki_lockname */
 
 /*
  * This used to be the per-process structure containing data that
  * isn't needed in core when the process is swapped out, but now it
  * remains only for the benefit of a.out core dumps.
  */
 struct user {
 	struct	pstats u_stats;		/* *p_stats */
 	struct	kinfo_proc u_kproc;	/* eproc */
 };
 
 /*
  * The KERN_PROC_FILE sysctl allows a process to dump the file descriptor
  * array of another process.
  */
 #define	KF_ATTR_VALID	0x0001
 
 #define	KF_TYPE_NONE	0
 #define	KF_TYPE_VNODE	1
 #define	KF_TYPE_SOCKET	2
 #define	KF_TYPE_PIPE	3
 #define	KF_TYPE_FIFO	4
 #define	KF_TYPE_KQUEUE	5
 /* was	KF_TYPE_CRYPTO	6 */
 #define	KF_TYPE_MQUEUE	7
 #define	KF_TYPE_SHM	8
 #define	KF_TYPE_SEM	9
 #define	KF_TYPE_PTS	10
 #define	KF_TYPE_PROCDESC	11
 #define	KF_TYPE_DEV	12
 #define	KF_TYPE_EVENTFD	13
 #define	KF_TYPE_UNKNOWN	255
 
 #define	KF_VTYPE_VNON	0
 #define	KF_VTYPE_VREG	1
 #define	KF_VTYPE_VDIR	2
 #define	KF_VTYPE_VBLK	3
 #define	KF_VTYPE_VCHR	4
 #define	KF_VTYPE_VLNK	5
 #define	KF_VTYPE_VSOCK	6
 #define	KF_VTYPE_VFIFO	7
 #define	KF_VTYPE_VBAD	8
 #define	KF_VTYPE_UNKNOWN	255
 
 #define	KF_FD_TYPE_CWD	-1	/* Current working directory */
 #define	KF_FD_TYPE_ROOT	-2	/* Root directory */
 #define	KF_FD_TYPE_JAIL	-3	/* Jail directory */
 #define	KF_FD_TYPE_TRACE	-4	/* Ktrace vnode */
 #define	KF_FD_TYPE_TEXT	-5	/* Text vnode */
 #define	KF_FD_TYPE_CTTY	-6	/* Controlling terminal */
 
 #define	KF_FLAG_READ		0x00000001
 #define	KF_FLAG_WRITE		0x00000002
 #define	KF_FLAG_APPEND		0x00000004
 #define	KF_FLAG_ASYNC		0x00000008
 #define	KF_FLAG_FSYNC		0x00000010
 #define	KF_FLAG_NONBLOCK	0x00000020
 #define	KF_FLAG_DIRECT		0x00000040
 #define	KF_FLAG_HASLOCK		0x00000080
 #define	KF_FLAG_SHLOCK		0x00000100
 #define	KF_FLAG_EXLOCK		0x00000200
 #define	KF_FLAG_NOFOLLOW	0x00000400
 #define	KF_FLAG_CREAT		0x00000800
 #define	KF_FLAG_TRUNC		0x00001000
 #define	KF_FLAG_EXCL		0x00002000
 #define	KF_FLAG_EXEC		0x00004000
 
 /*
  * Old format.  Has variable hidden padding due to alignment.
  * This is a compatibility hack for pre-build 7.1 packages.
  */
 #if defined(__amd64__)
 #define	KINFO_OFILE_SIZE	1328
 #endif
 #if defined(__i386__)
 #define	KINFO_OFILE_SIZE	1324
 #endif
 
 struct kinfo_ofile {
 	int	kf_structsize;			/* Size of kinfo_file. */
 	int	kf_type;			/* Descriptor type. */
 	int	kf_fd;				/* Array index. */
 	int	kf_ref_count;			/* Reference count. */
 	int	kf_flags;			/* Flags. */
 	/* XXX Hidden alignment padding here on amd64 */
 	off_t	kf_offset;			/* Seek location. */
 	int	kf_vnode_type;			/* Vnode type. */
 	int	kf_sock_domain;			/* Socket domain. */
 	int	kf_sock_type;			/* Socket type. */
 	int	kf_sock_protocol;		/* Socket protocol. */
 	char	kf_path[PATH_MAX];	/* Path to file, if any. */
 	struct sockaddr_storage kf_sa_local;	/* Socket address. */
 	struct sockaddr_storage	kf_sa_peer;	/* Peer address. */
 };
 
 #if defined(__amd64__) || defined(__i386__)
 /*
  * This size should never be changed. If you really need to, you must provide
  * backward ABI compatibility by allocating a new sysctl MIB that will return
  * the new structure. The current structure has to be returned by the current
  * sysctl MIB. See how it is done for the kinfo_ofile structure.
  */
 #define	KINFO_FILE_SIZE	1392
 #endif
 
 struct kinfo_file {
 	int		kf_structsize;		/* Variable size of record. */
 	int		kf_type;		/* Descriptor type. */
 	int		kf_fd;			/* Array index. */
 	int		kf_ref_count;		/* Reference count. */
 	int		kf_flags;		/* Flags. */
 	int		kf_pad0;		/* Round to 64 bit alignment. */
 	int64_t		kf_offset;		/* Seek location. */
 	union {
 		struct {
 			/* API compatiblity with FreeBSD < 12. */
 			int		kf_vnode_type;
 			int		kf_sock_domain;
 			int		kf_sock_type;
 			int		kf_sock_protocol;
 			struct sockaddr_storage kf_sa_local;
 			struct sockaddr_storage	kf_sa_peer;
 		};
 		union {
 			struct {
 				/* Sendq size */
 				uint32_t	kf_sock_sendq;
 				/* Socket domain. */
 				int		kf_sock_domain0;
 				/* Socket type. */
 				int		kf_sock_type0;
 				/* Socket protocol. */
 				int		kf_sock_protocol0;
 				/* Socket address. */
 				struct sockaddr_storage kf_sa_local;
 				/* Peer address. */
 				struct sockaddr_storage	kf_sa_peer;
 				/* Address of so_pcb. */
 				uint64_t	kf_sock_pcb;
 				/* Address of inp_ppcb. */
 				uint64_t	kf_sock_inpcb;
 				/* Address of unp_conn. */
 				uint64_t	kf_sock_unpconn;
 				/* Send buffer state. */
 				uint16_t	kf_sock_snd_sb_state;
 				/* Receive buffer state. */
 				uint16_t	kf_sock_rcv_sb_state;
 				/* Recvq size. */
 				uint32_t	kf_sock_recvq;
 			} kf_sock;
 			struct {
 				/* Vnode type. */
 				int		kf_file_type;
 				/* Space for future use */
 				int		kf_spareint[3];
 				uint64_t	kf_spareint64[30];
 				/* Vnode filesystem id. */
 				uint64_t	kf_file_fsid;
 				/* File device. */
 				uint64_t	kf_file_rdev;
 				/* Global file id. */
 				uint64_t	kf_file_fileid;
 				/* File size. */
 				uint64_t	kf_file_size;
 				/* Vnode filesystem id, FreeBSD 11 compat. */
 				uint32_t	kf_file_fsid_freebsd11;
 				/* File device, FreeBSD 11 compat. */
 				uint32_t	kf_file_rdev_freebsd11;
 				/* File mode. */
 				uint16_t	kf_file_mode;
 				/* Round to 64 bit alignment. */
 				uint16_t	kf_file_pad0;
 				uint32_t	kf_file_pad1;
 			} kf_file;
 			struct {
 				uint32_t	kf_spareint[4];
 				uint64_t	kf_spareint64[32];
 				uint32_t	kf_sem_value;
 				uint16_t	kf_sem_mode;
 			} kf_sem;
 			struct {
 				uint32_t	kf_spareint[4];
 				uint64_t	kf_spareint64[32];
 				uint64_t	kf_pipe_addr;
 				uint64_t	kf_pipe_peer;
 				uint32_t	kf_pipe_buffer_cnt;
 				/* Round to 64 bit alignment. */
 				uint32_t	kf_pipe_pad0[3];
 			} kf_pipe;
 			struct {
 				uint32_t	kf_spareint[4];
 				uint64_t	kf_spareint64[32];
 				uint32_t	kf_pts_dev_freebsd11;
 				uint32_t	kf_pts_pad0;
 				uint64_t	kf_pts_dev;
 				/* Round to 64 bit alignment. */
 				uint32_t	kf_pts_pad1[4];
 			} kf_pts;
 			struct {
 				uint32_t	kf_spareint[4];
 				uint64_t	kf_spareint64[32];
 				pid_t		kf_pid;
 			} kf_proc;
 			struct {
 				uint64_t	kf_eventfd_value;
 				uint32_t	kf_eventfd_flags;
 			} kf_eventfd;
 		} kf_un;
 	};
 	uint16_t	kf_status;		/* Status flags. */
 	uint16_t	kf_pad1;		/* Round to 32 bit alignment. */
 	int		_kf_ispare0;		/* Space for more stuff. */
 	cap_rights_t	kf_cap_rights;		/* Capability rights. */
 	uint64_t	_kf_cap_spare;		/* Space for future cap_rights_t. */
 	/* Truncated before copyout in sysctl */
 	char		kf_path[PATH_MAX];	/* Path to file, if any. */
 };
 
 /*
  * The KERN_PROC_VMMAP sysctl allows a process to dump the VM layout of
  * another process as a series of entries.
  */
 #define	KVME_TYPE_NONE		0
 #define	KVME_TYPE_DEFAULT	1
 #define	KVME_TYPE_VNODE		2
 #define	KVME_TYPE_SWAP		3
 #define	KVME_TYPE_DEVICE	4
 #define	KVME_TYPE_PHYS		5
 #define	KVME_TYPE_DEAD		6
 #define	KVME_TYPE_SG		7
 #define	KVME_TYPE_MGTDEVICE	8
 #define	KVME_TYPE_GUARD		9
 #define	KVME_TYPE_UNKNOWN	255
 
 #define	KVME_PROT_READ		0x00000001
 #define	KVME_PROT_WRITE		0x00000002
 #define	KVME_PROT_EXEC		0x00000004
 
 #define	KVME_FLAG_COW		0x00000001
 #define	KVME_FLAG_NEEDS_COPY	0x00000002
 #define	KVME_FLAG_NOCOREDUMP	0x00000004
 #define	KVME_FLAG_SUPER		0x00000008
 #define	KVME_FLAG_GROWS_UP	0x00000010
 #define	KVME_FLAG_GROWS_DOWN	0x00000020
 #define	KVME_FLAG_USER_WIRED	0x00000040
 
 #if defined(__amd64__)
 #define	KINFO_OVMENTRY_SIZE	1168
 #endif
 #if defined(__i386__)
 #define	KINFO_OVMENTRY_SIZE	1128
 #endif
 
 struct kinfo_ovmentry {
 	int	 kve_structsize;		/* Size of kinfo_vmmapentry. */
 	int	 kve_type;			/* Type of map entry. */
 	void	*kve_start;			/* Starting address. */
 	void	*kve_end;			/* Finishing address. */
 	int	 kve_flags;			/* Flags on map entry. */
 	int	 kve_resident;			/* Number of resident pages. */
 	int	 kve_private_resident;		/* Number of private pages. */
 	int	 kve_protection;		/* Protection bitmask. */
 	int	 kve_ref_count;			/* VM obj ref count. */
 	int	 kve_shadow_count;		/* VM obj shadow count. */
 	char	 kve_path[PATH_MAX];		/* Path to VM obj, if any. */
 	void	*_kve_pspare[8];		/* Space for more stuff. */
 	off_t	 kve_offset;			/* Mapping offset in object */
 	uint64_t kve_fileid;			/* inode number if vnode */
 	uint32_t kve_fsid;			/* dev_t of vnode location */
 	int	 _kve_ispare[3];		/* Space for more stuff. */
 };
 
 #if defined(__amd64__) || defined(__i386__)
 #define	KINFO_VMENTRY_SIZE	1160
 #endif
 
 struct kinfo_vmentry {
 	int	 kve_structsize;		/* Variable size of record. */
 	int	 kve_type;			/* Type of map entry. */
 	uint64_t kve_start;			/* Starting address. */
 	uint64_t kve_end;			/* Finishing address. */
 	uint64_t kve_offset;			/* Mapping offset in object */
 	uint64_t kve_vn_fileid;			/* inode number if vnode */
 	uint32_t kve_vn_fsid_freebsd11;		/* dev_t of vnode location */
 	int	 kve_flags;			/* Flags on map entry. */
 	int	 kve_resident;			/* Number of resident pages. */
 	int	 kve_private_resident;		/* Number of private pages. */
 	int	 kve_protection;		/* Protection bitmask. */
 	int	 kve_ref_count;			/* VM obj ref count. */
 	int	 kve_shadow_count;		/* VM obj shadow count. */
 	int	 kve_vn_type;			/* Vnode type. */
 	uint64_t kve_vn_size;			/* File size. */
 	uint32_t kve_vn_rdev_freebsd11;		/* Device id if device. */
 	uint16_t kve_vn_mode;			/* File mode. */
 	uint16_t kve_status;			/* Status flags. */
-	uint64_t kve_vn_fsid;			/* dev_t of vnode location */
+	union {
+		uint64_t _kve_vn_fsid;		/* dev_t of vnode location */
+		uint64_t _kve_obj;		/* handle of anon obj */
+	} kve_type_spec;
 	uint64_t kve_vn_rdev;			/* Device id if device. */
 	int	 _kve_ispare[8];		/* Space for more stuff. */
 	/* Truncated before copyout in sysctl */
 	char	 kve_path[PATH_MAX];		/* Path to VM obj, if any. */
 };
+#define	kve_vn_fsid	kve_type_spec._kve_vn_fsid
+#define	kve_obj		kve_type_spec._kve_obj
 
 /*
  * The "vm.objects" sysctl provides a list of all VM objects in the system
  * via an array of these entries.
  */
 struct kinfo_vmobject {
 	int	kvo_structsize;			/* Variable size of record. */
 	int	kvo_type;			/* Object type: KVME_TYPE_*. */
 	uint64_t kvo_size;			/* Object size in pages. */
 	uint64_t kvo_vn_fileid;			/* inode number if vnode. */
 	uint32_t kvo_vn_fsid_freebsd11;		/* dev_t of vnode location. */
 	int	kvo_ref_count;			/* Reference count. */
 	int	kvo_shadow_count;		/* Shadow count. */
 	int	kvo_memattr;			/* Memory attribute. */
 	uint64_t kvo_resident;			/* Number of resident pages. */
 	uint64_t kvo_active;			/* Number of active pages. */
 	uint64_t kvo_inactive;			/* Number of inactive pages. */
-	uint64_t kvo_vn_fsid;
-	uint64_t _kvo_qspare[7];
-	uint32_t _kvo_ispare[8];
+	union {
+		uint64_t _kvo_vn_fsid;
+		uint64_t _kvo_backing_obj;	/* Handle for the backing obj */
+	} kvo_type_spec;			/* Type-specific union */
+	uint64_t kvo_me;			/* Uniq handle for anon obj */
+	uint64_t _kvo_qspare[6];
+	uint32_t kvo_swapped;			/* Number of swapped pages */
+	uint32_t _kvo_ispare[7];
 	char	kvo_path[PATH_MAX];		/* Pathname, if any. */
 };
+#define	kvo_vn_fsid	kvo_type_spec._kvo_vn_fsid
+#define	kvo_backing_obj	kvo_type_spec._kvo_backing_obj
 
 /*
  * The KERN_PROC_KSTACK sysctl allows a process to dump the kernel stacks of
  * another process as a series of entries.  Each stack is represented by a
  * series of symbol names and offsets as generated by stack_sbuf_print(9).
  */
 #define	KKST_MAXLEN	1024
 
 #define	KKST_STATE_STACKOK	0		/* Stack is valid. */
 #define	KKST_STATE_SWAPPED	1		/* Stack swapped out. */
 #define	KKST_STATE_RUNNING	2		/* Stack ephemeral. */
 
 #if defined(__amd64__) || defined(__i386__)
 #define	KINFO_KSTACK_SIZE	1096
 #endif
 
 struct kinfo_kstack {
 	lwpid_t	 kkst_tid;			/* ID of thread. */
 	int	 kkst_state;			/* Validity of stack. */
 	char	 kkst_trace[KKST_MAXLEN];	/* String representing stack. */
 	int	 _kkst_ispare[16];		/* Space for more stuff. */
 };
 
 struct kinfo_sigtramp {
 	void	*ksigtramp_start;
 	void	*ksigtramp_end;
 	void	*ksigtramp_spare[4];
 };
 
 #ifdef _KERNEL
 /* Flags for kern_proc_out function. */
 #define KERN_PROC_NOTHREADS	0x1
 #define KERN_PROC_MASK32	0x2
 
 /* Flags for kern_proc_filedesc_out. */
 #define	KERN_FILEDESC_PACK_KINFO	0x00000001U
 
 /* Flags for kern_proc_vmmap_out. */
 #define	KERN_VMMAP_PACK_KINFO		0x00000001U
 struct sbuf;
 
 /*
  * The kern_proc out functions are helper functions to dump process
  * miscellaneous kinfo structures to sbuf.  The main consumers are KERN_PROC
  * sysctls but they may also be used by other kernel subsystems.
  *
  * The functions manipulate the process locking state and expect the process
  * to be locked on enter.  On return the process is unlocked.
  */
 
 int	kern_proc_filedesc_out(struct proc *p, struct sbuf *sb, ssize_t maxlen,
 	int flags);
 int	kern_proc_cwd_out(struct proc *p, struct sbuf *sb, ssize_t maxlen);
 int	kern_proc_out(struct proc *p, struct sbuf *sb, int flags);
 int	kern_proc_vmmap_out(struct proc *p, struct sbuf *sb, ssize_t maxlen,
 	int flags);
 
 int	vntype_to_kinfo(int vtype);
 void	pack_kinfo(struct kinfo_file *kif);
 #endif /* !_KERNEL */
 
 #endif
diff --git a/sys/vm/swap_pager.c b/sys/vm/swap_pager.c
index 3789a0217252..888df04741b4 100644
--- a/sys/vm/swap_pager.c
+++ b/sys/vm/swap_pager.c
@@ -1,3102 +1,3125 @@
 /*-
  * SPDX-License-Identifier: BSD-4-Clause
  *
  * Copyright (c) 1998 Matthew Dillon,
  * Copyright (c) 1994 John S. Dyson
  * Copyright (c) 1990 University of Utah.
  * Copyright (c) 1982, 1986, 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * the Systems Programming Group of the University of Utah Computer
  * Science Department.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *				New Swap System
  *				Matthew Dillon
  *
  * Radix Bitmap 'blists'.
  *
  *	- The new swapper uses the new radix bitmap code.  This should scale
  *	  to arbitrarily small or arbitrarily large swap spaces and an almost
  *	  arbitrary degree of fragmentation.
  *
  * Features:
  *
  *	- on the fly reallocation of swap during putpages.  The new system
  *	  does not try to keep previously allocated swap blocks for dirty
  *	  pages.
  *
  *	- on the fly deallocation of swap
  *
  *	- No more garbage collection required.  Unnecessarily allocated swap
  *	  blocks only exist for dirty vm_page_t's now and these are already
  *	  cycled (in a high-load system) by the pager.  We also do on-the-fly
  *	  removal of invalidated swap blocks when a page is destroyed
  *	  or renamed.
  *
  * from: Utah $Hdr: swap_pager.c 1.4 91/04/30$
  *
  *	@(#)swap_pager.c	8.9 (Berkeley) 3/21/94
  *	@(#)vm_swap.c	8.5 (Berkeley) 2/17/94
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_vm.h"
 
 #include <sys/param.h>
 #include <sys/bio.h>
 #include <sys/blist.h>
 #include <sys/buf.h>
 #include <sys/conf.h>
 #include <sys/disk.h>
 #include <sys/disklabel.h>
 #include <sys/eventhandler.h>
 #include <sys/fcntl.h>
 #include <sys/lock.h>
 #include <sys/kernel.h>
 #include <sys/mount.h>
 #include <sys/namei.h>
 #include <sys/malloc.h>
 #include <sys/pctrie.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/racct.h>
 #include <sys/resource.h>
 #include <sys/resourcevar.h>
 #include <sys/rwlock.h>
 #include <sys/sbuf.h>
 #include <sys/sysctl.h>
 #include <sys/sysproto.h>
 #include <sys/systm.h>
 #include <sys/sx.h>
 #include <sys/vmmeter.h>
 #include <sys/vnode.h>
 
 #include <security/mac/mac_framework.h>
 
 #include <vm/vm.h>
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
 #include <vm/vm_pager.h>
 #include <vm/vm_pageout.h>
 #include <vm/vm_param.h>
 #include <vm/swap_pager.h>
 #include <vm/vm_extern.h>
 #include <vm/uma.h>
 
 #include <geom/geom.h>
 
 /*
  * MAX_PAGEOUT_CLUSTER must be a power of 2 between 1 and 64.
  * The 64-page limit is due to the radix code (kern/subr_blist.c).
  */
 #ifndef MAX_PAGEOUT_CLUSTER
 #define	MAX_PAGEOUT_CLUSTER	32
 #endif
 
 #if !defined(SWB_NPAGES)
 #define SWB_NPAGES	MAX_PAGEOUT_CLUSTER
 #endif
 
 #define	SWAP_META_PAGES		PCTRIE_COUNT
 
 /*
  * A swblk structure maps each page index within a
  * SWAP_META_PAGES-aligned and sized range to the address of an
  * on-disk swap block (or SWAPBLK_NONE). The collection of these
  * mappings for an entire vm object is implemented as a pc-trie.
  */
 struct swblk {
 	vm_pindex_t	p;
 	daddr_t		d[SWAP_META_PAGES];
 };
 
 static MALLOC_DEFINE(M_VMPGDATA, "vm_pgdata", "swap pager private data");
 static struct mtx sw_dev_mtx;
 static TAILQ_HEAD(, swdevt) swtailq = TAILQ_HEAD_INITIALIZER(swtailq);
 static struct swdevt *swdevhd;	/* Allocate from here next */
 static int nswapdev;		/* Number of swap devices */
 int swap_pager_avail;
 static struct sx swdev_syscall_lock;	/* serialize swap(on|off) */
 
 static __exclusive_cache_line u_long swap_reserved;
 static u_long swap_total;
 static int sysctl_page_shift(SYSCTL_HANDLER_ARGS);
 
 static SYSCTL_NODE(_vm_stats, OID_AUTO, swap, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
     "VM swap stats");
 
 SYSCTL_PROC(_vm, OID_AUTO, swap_reserved, CTLTYPE_U64 | CTLFLAG_RD | CTLFLAG_MPSAFE,
     &swap_reserved, 0, sysctl_page_shift, "A", 
     "Amount of swap storage needed to back all allocated anonymous memory.");
 SYSCTL_PROC(_vm, OID_AUTO, swap_total, CTLTYPE_U64 | CTLFLAG_RD | CTLFLAG_MPSAFE,
     &swap_total, 0, sysctl_page_shift, "A", 
     "Total amount of available swap storage.");
 
 static int overcommit = 0;
 SYSCTL_INT(_vm, VM_OVERCOMMIT, overcommit, CTLFLAG_RW, &overcommit, 0,
     "Configure virtual memory overcommit behavior. See tuning(7) "
     "for details.");
 static unsigned long swzone;
 SYSCTL_ULONG(_vm, OID_AUTO, swzone, CTLFLAG_RD, &swzone, 0,
     "Actual size of swap metadata zone");
 static unsigned long swap_maxpages;
 SYSCTL_ULONG(_vm, OID_AUTO, swap_maxpages, CTLFLAG_RD, &swap_maxpages, 0,
     "Maximum amount of swap supported");
 
 static COUNTER_U64_DEFINE_EARLY(swap_free_deferred);
 SYSCTL_COUNTER_U64(_vm_stats_swap, OID_AUTO, free_deferred,
     CTLFLAG_RD, &swap_free_deferred,
     "Number of pages that deferred freeing swap space");
 
 static COUNTER_U64_DEFINE_EARLY(swap_free_completed);
 SYSCTL_COUNTER_U64(_vm_stats_swap, OID_AUTO, free_completed,
     CTLFLAG_RD, &swap_free_completed,
     "Number of deferred frees completed");
 
 /* bits from overcommit */
 #define	SWAP_RESERVE_FORCE_ON		(1 << 0)
 #define	SWAP_RESERVE_RLIMIT_ON		(1 << 1)
 #define	SWAP_RESERVE_ALLOW_NONWIRED	(1 << 2)
 
 static int
 sysctl_page_shift(SYSCTL_HANDLER_ARGS)
 {
 	uint64_t newval;
 	u_long value = *(u_long *)arg1;
 
 	newval = ((uint64_t)value) << PAGE_SHIFT;
 	return (sysctl_handle_64(oidp, &newval, 0, req));
 }
 
 static bool
 swap_reserve_by_cred_rlimit(u_long pincr, struct ucred *cred, int oc)
 {
 	struct uidinfo *uip;
 	u_long prev;
 
 	uip = cred->cr_ruidinfo;
 
 	prev = atomic_fetchadd_long(&uip->ui_vmsize, pincr);
 	if ((oc & SWAP_RESERVE_RLIMIT_ON) != 0 &&
 	    prev + pincr > lim_cur(curthread, RLIMIT_SWAP) &&
 	    priv_check(curthread, PRIV_VM_SWAP_NORLIMIT) != 0) {
 		prev = atomic_fetchadd_long(&uip->ui_vmsize, -pincr);
 		KASSERT(prev >= pincr, ("negative vmsize for uid = %d\n", uip->ui_uid));
 		return (false);
 	}
 	return (true);
 }
 
 static void
 swap_release_by_cred_rlimit(u_long pdecr, struct ucred *cred)
 {
 	struct uidinfo *uip;
 #ifdef INVARIANTS
 	u_long prev;
 #endif
 
 	uip = cred->cr_ruidinfo;
 
 #ifdef INVARIANTS
 	prev = atomic_fetchadd_long(&uip->ui_vmsize, -pdecr);
 	KASSERT(prev >= pdecr, ("negative vmsize for uid = %d\n", uip->ui_uid));
 #else
 	atomic_subtract_long(&uip->ui_vmsize, pdecr);
 #endif
 }
 
 static void
 swap_reserve_force_rlimit(u_long pincr, struct ucred *cred)
 {
 	struct uidinfo *uip;
 
 	uip = cred->cr_ruidinfo;
 	atomic_add_long(&uip->ui_vmsize, pincr);
 }
 
 bool
 swap_reserve(vm_ooffset_t incr)
 {
 
 	return (swap_reserve_by_cred(incr, curthread->td_ucred));
 }
 
 bool
 swap_reserve_by_cred(vm_ooffset_t incr, struct ucred *cred)
 {
 	u_long r, s, prev, pincr;
 #ifdef RACCT
 	int error;
 #endif
 	int oc;
 	static int curfail;
 	static struct timeval lastfail;
 
 	KASSERT((incr & PAGE_MASK) == 0, ("%s: incr: %ju & PAGE_MASK", __func__,
 	    (uintmax_t)incr));
 
 #ifdef RACCT
 	if (RACCT_ENABLED()) {
 		PROC_LOCK(curproc);
 		error = racct_add(curproc, RACCT_SWAP, incr);
 		PROC_UNLOCK(curproc);
 		if (error != 0)
 			return (false);
 	}
 #endif
 
 	pincr = atop(incr);
 	prev = atomic_fetchadd_long(&swap_reserved, pincr);
 	r = prev + pincr;
 	s = swap_total;
 	oc = atomic_load_int(&overcommit);
 	if (r > s && (oc & SWAP_RESERVE_ALLOW_NONWIRED) != 0) {
 		s += vm_cnt.v_page_count - vm_cnt.v_free_reserved -
 		    vm_wire_count();
 	}
 	if ((oc & SWAP_RESERVE_FORCE_ON) != 0 && r > s &&
 	    priv_check(curthread, PRIV_VM_SWAP_NOQUOTA) != 0) {
 		prev = atomic_fetchadd_long(&swap_reserved, -pincr);
 		KASSERT(prev >= pincr, ("swap_reserved < incr on overcommit fail"));
 		goto out_error;
 	}
 
 	if (!swap_reserve_by_cred_rlimit(pincr, cred, oc)) {
 		prev = atomic_fetchadd_long(&swap_reserved, -pincr);
 		KASSERT(prev >= pincr, ("swap_reserved < incr on overcommit fail"));
 		goto out_error;
 	}
 
 	return (true);
 
 out_error:
 	if (ppsratecheck(&lastfail, &curfail, 1)) {
 		printf("uid %d, pid %d: swap reservation for %jd bytes failed\n",
 		    cred->cr_ruidinfo->ui_uid, curproc->p_pid, incr);
 	}
 #ifdef RACCT
 	if (RACCT_ENABLED()) {
 		PROC_LOCK(curproc);
 		racct_sub(curproc, RACCT_SWAP, incr);
 		PROC_UNLOCK(curproc);
 	}
 #endif
 
 	return (false);
 }
 
 void
 swap_reserve_force(vm_ooffset_t incr)
 {
 	u_long pincr;
 
 	KASSERT((incr & PAGE_MASK) == 0, ("%s: incr: %ju & PAGE_MASK", __func__,
 	    (uintmax_t)incr));
 
 #ifdef RACCT
 	if (RACCT_ENABLED()) {
 		PROC_LOCK(curproc);
 		racct_add_force(curproc, RACCT_SWAP, incr);
 		PROC_UNLOCK(curproc);
 	}
 #endif
 	pincr = atop(incr);
 	atomic_add_long(&swap_reserved, pincr);
 	swap_reserve_force_rlimit(pincr, curthread->td_ucred);
 }
 
 void
 swap_release(vm_ooffset_t decr)
 {
 	struct ucred *cred;
 
 	PROC_LOCK(curproc);
 	cred = curproc->p_ucred;
 	swap_release_by_cred(decr, cred);
 	PROC_UNLOCK(curproc);
 }
 
 void
 swap_release_by_cred(vm_ooffset_t decr, struct ucred *cred)
 {
 	u_long pdecr;
 #ifdef INVARIANTS
 	u_long prev;
 #endif
 
 	KASSERT((decr & PAGE_MASK) == 0, ("%s: decr: %ju & PAGE_MASK", __func__,
 	    (uintmax_t)decr));
 
 	pdecr = atop(decr);
 #ifdef INVARIANTS
 	prev = atomic_fetchadd_long(&swap_reserved, -pdecr);
 	KASSERT(prev >= pdecr, ("swap_reserved < decr"));
 #else
 	atomic_subtract_long(&swap_reserved, pdecr);
 #endif
 
 	swap_release_by_cred_rlimit(pdecr, cred);
 #ifdef RACCT
 	if (racct_enable)
 		racct_sub_cred(cred, RACCT_SWAP, decr);
 #endif
 }
 
 static int swap_pager_full = 2;	/* swap space exhaustion (task killing) */
 static int swap_pager_almost_full = 1; /* swap space exhaustion (w/hysteresis)*/
 static struct mtx swbuf_mtx;	/* to sync nsw_wcount_async */
 static int nsw_wcount_async;	/* limit async write buffers */
 static int nsw_wcount_async_max;/* assigned maximum			*/
 static int nsw_cluster_max;	/* maximum VOP I/O allowed		*/
 
 static int sysctl_swap_async_max(SYSCTL_HANDLER_ARGS);
 SYSCTL_PROC(_vm, OID_AUTO, swap_async_max, CTLTYPE_INT | CTLFLAG_RW |
     CTLFLAG_MPSAFE, NULL, 0, sysctl_swap_async_max, "I",
     "Maximum running async swap ops");
 static int sysctl_swap_fragmentation(SYSCTL_HANDLER_ARGS);
 SYSCTL_PROC(_vm, OID_AUTO, swap_fragmentation, CTLTYPE_STRING | CTLFLAG_RD |
     CTLFLAG_MPSAFE, NULL, 0, sysctl_swap_fragmentation, "A",
     "Swap Fragmentation Info");
 
 static struct sx sw_alloc_sx;
 
 /*
  * "named" and "unnamed" anon region objects.  Try to reduce the overhead
  * of searching a named list by hashing it just a little.
  */
 
 #define NOBJLISTS		8
 
 #define NOBJLIST(handle)	\
 	(&swap_pager_object_list[((int)(intptr_t)handle >> 4) & (NOBJLISTS-1)])
 
 static struct pagerlst	swap_pager_object_list[NOBJLISTS];
 static uma_zone_t swwbuf_zone;
 static uma_zone_t swrbuf_zone;
 static uma_zone_t swblk_zone;
 static uma_zone_t swpctrie_zone;
 
 /*
  * pagerops for OBJT_SWAP - "swap pager".  Some ops are also global procedure
  * calls hooked from other parts of the VM system and do not appear here.
  * (see vm/swap_pager.h).
  */
 static vm_object_t
 		swap_pager_alloc(void *handle, vm_ooffset_t size,
 		    vm_prot_t prot, vm_ooffset_t offset, struct ucred *);
 static void	swap_pager_dealloc(vm_object_t object);
 static int	swap_pager_getpages(vm_object_t, vm_page_t *, int, int *,
     int *);
 static int	swap_pager_getpages_async(vm_object_t, vm_page_t *, int, int *,
     int *, pgo_getpages_iodone_t, void *);
 static void	swap_pager_putpages(vm_object_t, vm_page_t *, int, boolean_t, int *);
 static boolean_t
 		swap_pager_haspage(vm_object_t object, vm_pindex_t pindex, int *before, int *after);
 static void	swap_pager_init(void);
 static void	swap_pager_unswapped(vm_page_t);
 static void	swap_pager_swapoff(struct swdevt *sp);
 static void	swap_pager_update_writecount(vm_object_t object,
     vm_offset_t start, vm_offset_t end);
 static void	swap_pager_release_writecount(vm_object_t object,
     vm_offset_t start, vm_offset_t end);
 
 struct pagerops swappagerops = {
 	.pgo_init =	swap_pager_init,	/* early system initialization of pager	*/
 	.pgo_alloc =	swap_pager_alloc,	/* allocate an OBJT_SWAP object		*/
 	.pgo_dealloc =	swap_pager_dealloc,	/* deallocate an OBJT_SWAP object	*/
 	.pgo_getpages =	swap_pager_getpages,	/* pagein				*/
 	.pgo_getpages_async = swap_pager_getpages_async, /* pagein (async)		*/
 	.pgo_putpages =	swap_pager_putpages,	/* pageout				*/
 	.pgo_haspage =	swap_pager_haspage,	/* get backing store status for page	*/
 	.pgo_pageunswapped = swap_pager_unswapped,	/* remove swap related to page		*/
 	.pgo_update_writecount = swap_pager_update_writecount,
 	.pgo_release_writecount = swap_pager_release_writecount,
 };
 
 /*
  * swap_*() routines are externally accessible.  swp_*() routines are
  * internal.
  */
 static int nswap_lowat = 128;	/* in pages, swap_pager_almost_full warn */
 static int nswap_hiwat = 512;	/* in pages, swap_pager_almost_full warn */
 
 SYSCTL_INT(_vm, OID_AUTO, dmmax, CTLFLAG_RD, &nsw_cluster_max, 0,
     "Maximum size of a swap block in pages");
 
 static void	swp_sizecheck(void);
 static void	swp_pager_async_iodone(struct buf *bp);
 static bool	swp_pager_swblk_empty(struct swblk *sb, int start, int limit);
 static void	swp_pager_free_empty_swblk(vm_object_t, struct swblk *sb);
 static int	swapongeom(struct vnode *);
 static int	swaponvp(struct thread *, struct vnode *, u_long);
 static int	swapoff_one(struct swdevt *sp, struct ucred *cred);
 
 /*
  * Swap bitmap functions
  */
 static void	swp_pager_freeswapspace(daddr_t blk, daddr_t npages);
 static daddr_t	swp_pager_getswapspace(int *npages);
 
 /*
  * Metadata functions
  */
 static daddr_t swp_pager_meta_build(vm_object_t, vm_pindex_t, daddr_t);
 static void swp_pager_meta_free(vm_object_t, vm_pindex_t, vm_pindex_t);
 static void swp_pager_meta_transfer(vm_object_t src, vm_object_t dst,
     vm_pindex_t pindex, vm_pindex_t count);
 static void swp_pager_meta_free_all(vm_object_t);
 static daddr_t swp_pager_meta_lookup(vm_object_t, vm_pindex_t);
 
 static void
 swp_pager_init_freerange(daddr_t *start, daddr_t *num)
 {
 
 	*start = SWAPBLK_NONE;
 	*num = 0;
 }
 
 static void
 swp_pager_update_freerange(daddr_t *start, daddr_t *num, daddr_t addr)
 {
 
 	if (*start + *num == addr) {
 		(*num)++;
 	} else {
 		swp_pager_freeswapspace(*start, *num);
 		*start = addr;
 		*num = 1;
 	}
 }
 
 static void *
 swblk_trie_alloc(struct pctrie *ptree)
 {
 
 	return (uma_zalloc(swpctrie_zone, M_NOWAIT | (curproc == pageproc ?
 	    M_USE_RESERVE : 0)));
 }
 
 static void
 swblk_trie_free(struct pctrie *ptree, void *node)
 {
 
 	uma_zfree(swpctrie_zone, node);
 }
 
 PCTRIE_DEFINE(SWAP, swblk, p, swblk_trie_alloc, swblk_trie_free);
 
 /*
  * SWP_SIZECHECK() -	update swap_pager_full indication
  *
  *	update the swap_pager_almost_full indication and warn when we are
  *	about to run out of swap space, using lowat/hiwat hysteresis.
  *
  *	Clear swap_pager_full ( task killing ) indication when lowat is met.
  *
  *	No restrictions on call
  *	This routine may not block.
  */
 static void
 swp_sizecheck(void)
 {
 
 	if (swap_pager_avail < nswap_lowat) {
 		if (swap_pager_almost_full == 0) {
 			printf("swap_pager: out of swap space\n");
 			swap_pager_almost_full = 1;
 		}
 	} else {
 		swap_pager_full = 0;
 		if (swap_pager_avail > nswap_hiwat)
 			swap_pager_almost_full = 0;
 	}
 }
 
 /*
  * SWAP_PAGER_INIT() -	initialize the swap pager!
  *
  *	Expected to be started from system init.  NOTE:  This code is run
  *	before much else so be careful what you depend on.  Most of the VM
  *	system has yet to be initialized at this point.
  */
 static void
 swap_pager_init(void)
 {
 	/*
 	 * Initialize object lists
 	 */
 	int i;
 
 	for (i = 0; i < NOBJLISTS; ++i)
 		TAILQ_INIT(&swap_pager_object_list[i]);
 	mtx_init(&sw_dev_mtx, "swapdev", NULL, MTX_DEF);
 	sx_init(&sw_alloc_sx, "swspsx");
 	sx_init(&swdev_syscall_lock, "swsysc");
 }
 
 /*
  * SWAP_PAGER_SWAP_INIT() - swap pager initialization from pageout process
  *
  *	Expected to be started from pageout process once, prior to entering
  *	its main loop.
  */
 void
 swap_pager_swap_init(void)
 {
 	unsigned long n, n2;
 
 	/*
 	 * Number of in-transit swap bp operations.  Don't
 	 * exhaust the pbufs completely.  Make sure we
 	 * initialize workable values (0 will work for hysteresis
 	 * but it isn't very efficient).
 	 *
 	 * The nsw_cluster_max is constrained by the bp->b_pages[]
 	 * array, which has maxphys / PAGE_SIZE entries, and our locally
 	 * defined MAX_PAGEOUT_CLUSTER.   Also be aware that swap ops are
 	 * constrained by the swap device interleave stripe size.
 	 *
 	 * Currently we hardwire nsw_wcount_async to 4.  This limit is
 	 * designed to prevent other I/O from having high latencies due to
 	 * our pageout I/O.  The value 4 works well for one or two active swap
 	 * devices but is probably a little low if you have more.  Even so,
 	 * a higher value would probably generate only a limited improvement
 	 * with three or four active swap devices since the system does not
 	 * typically have to pageout at extreme bandwidths.   We will want
 	 * at least 2 per swap devices, and 4 is a pretty good value if you
 	 * have one NFS swap device due to the command/ack latency over NFS.
 	 * So it all works out pretty well.
 	 */
 	nsw_cluster_max = min(maxphys / PAGE_SIZE, MAX_PAGEOUT_CLUSTER);
 
 	nsw_wcount_async = 4;
 	nsw_wcount_async_max = nsw_wcount_async;
 	mtx_init(&swbuf_mtx, "async swbuf mutex", NULL, MTX_DEF);
 
 	swwbuf_zone = pbuf_zsecond_create("swwbuf", nswbuf / 4);
 	swrbuf_zone = pbuf_zsecond_create("swrbuf", nswbuf / 2);
 
 	/*
 	 * Initialize our zone, taking the user's requested size or
 	 * estimating the number we need based on the number of pages
 	 * in the system.
 	 */
 	n = maxswzone != 0 ? maxswzone / sizeof(struct swblk) :
 	    vm_cnt.v_page_count / 2;
 	swpctrie_zone = uma_zcreate("swpctrie", pctrie_node_size(), NULL, NULL,
 	    pctrie_zone_init, NULL, UMA_ALIGN_PTR, 0);
 	if (swpctrie_zone == NULL)
 		panic("failed to create swap pctrie zone.");
 	swblk_zone = uma_zcreate("swblk", sizeof(struct swblk), NULL, NULL,
 	    NULL, NULL, _Alignof(struct swblk) - 1, 0);
 	if (swblk_zone == NULL)
 		panic("failed to create swap blk zone.");
 	n2 = n;
 	do {
 		if (uma_zone_reserve_kva(swblk_zone, n))
 			break;
 		/*
 		 * if the allocation failed, try a zone two thirds the
 		 * size of the previous attempt.
 		 */
 		n -= ((n + 2) / 3);
 	} while (n > 0);
 
 	/*
 	 * Often uma_zone_reserve_kva() cannot reserve exactly the
 	 * requested size.  Account for the difference when
 	 * calculating swap_maxpages.
 	 */
 	n = uma_zone_get_max(swblk_zone);
 
 	if (n < n2)
 		printf("Swap blk zone entries changed from %lu to %lu.\n",
 		    n2, n);
 	/* absolute maximum we can handle assuming 100% efficiency */
 	swap_maxpages = n * SWAP_META_PAGES;
 	swzone = n * sizeof(struct swblk);
 	if (!uma_zone_reserve_kva(swpctrie_zone, n))
 		printf("Cannot reserve swap pctrie zone, "
 		    "reduce kern.maxswzone.\n");
 }
 
 static vm_object_t
 swap_pager_alloc_init(void *handle, struct ucred *cred, vm_ooffset_t size,
     vm_ooffset_t offset)
 {
 	vm_object_t object;
 
 	if (cred != NULL) {
 		if (!swap_reserve_by_cred(size, cred))
 			return (NULL);
 		crhold(cred);
 	}
 
 	/*
 	 * The un_pager.swp.swp_blks trie is initialized by
 	 * vm_object_allocate() to ensure the correct order of
 	 * visibility to other threads.
 	 */
 	object = vm_object_allocate(OBJT_SWAP, OFF_TO_IDX(offset +
 	    PAGE_MASK + size));
 
 	object->un_pager.swp.writemappings = 0;
 	object->handle = handle;
 	if (cred != NULL) {
 		object->cred = cred;
 		object->charge = size;
 	}
 	return (object);
 }
 
 /*
  * SWAP_PAGER_ALLOC() -	allocate a new OBJT_SWAP VM object and instantiate
  *			its metadata structures.
  *
  *	This routine is called from the mmap and fork code to create a new
  *	OBJT_SWAP object.
  *
  *	This routine must ensure that no live duplicate is created for
  *	the named object request, which is protected against by
  *	holding the sw_alloc_sx lock in case handle != NULL.
  */
 static vm_object_t
 swap_pager_alloc(void *handle, vm_ooffset_t size, vm_prot_t prot,
     vm_ooffset_t offset, struct ucred *cred)
 {
 	vm_object_t object;
 
 	if (handle != NULL) {
 		/*
 		 * Reference existing named region or allocate new one.  There
 		 * should not be a race here against swp_pager_meta_build()
 		 * as called from vm_page_remove() in regards to the lookup
 		 * of the handle.
 		 */
 		sx_xlock(&sw_alloc_sx);
 		object = vm_pager_object_lookup(NOBJLIST(handle), handle);
 		if (object == NULL) {
 			object = swap_pager_alloc_init(handle, cred, size,
 			    offset);
 			if (object != NULL) {
 				TAILQ_INSERT_TAIL(NOBJLIST(object->handle),
 				    object, pager_object_list);
 			}
 		}
 		sx_xunlock(&sw_alloc_sx);
 	} else {
 		object = swap_pager_alloc_init(handle, cred, size, offset);
 	}
 	return (object);
 }
 
 /*
  * SWAP_PAGER_DEALLOC() -	remove swap metadata from object
  *
  *	The swap backing for the object is destroyed.  The code is
  *	designed such that we can reinstantiate it later, but this
  *	routine is typically called only when the entire object is
  *	about to be destroyed.
  *
  *	The object must be locked.
  */
 static void
 swap_pager_dealloc(vm_object_t object)
 {
 
 	VM_OBJECT_ASSERT_WLOCKED(object);
 	KASSERT((object->flags & OBJ_DEAD) != 0, ("dealloc of reachable obj"));
 
 	/*
 	 * Remove from list right away so lookups will fail if we block for
 	 * pageout completion.
 	 */
 	if ((object->flags & OBJ_ANON) == 0 && object->handle != NULL) {
 		VM_OBJECT_WUNLOCK(object);
 		sx_xlock(&sw_alloc_sx);
 		TAILQ_REMOVE(NOBJLIST(object->handle), object,
 		    pager_object_list);
 		sx_xunlock(&sw_alloc_sx);
 		VM_OBJECT_WLOCK(object);
 	}
 
 	vm_object_pip_wait(object, "swpdea");
 
 	/*
 	 * Free all remaining metadata.  We only bother to free it from
 	 * the swap meta data.  We do not attempt to free swapblk's still
 	 * associated with vm_page_t's for this object.  We do not care
 	 * if paging is still in progress on some objects.
 	 */
 	swp_pager_meta_free_all(object);
 	object->handle = NULL;
 	object->type = OBJT_DEAD;
 }
 
 /************************************************************************
  *			SWAP PAGER BITMAP ROUTINES			*
  ************************************************************************/
 
 /*
  * SWP_PAGER_GETSWAPSPACE() -	allocate raw swap space
  *
  *	Allocate swap for up to the requested number of pages.  The
  *	starting swap block number (a page index) is returned or
  *	SWAPBLK_NONE if the allocation failed.
  *
  *	Also has the side effect of advising that somebody made a mistake
  *	when they configured swap and didn't configure enough.
  *
  *	This routine may not sleep.
  *
  *	We allocate in round-robin fashion from the configured devices.
  */
 static daddr_t
 swp_pager_getswapspace(int *io_npages)
 {
 	daddr_t blk;
 	struct swdevt *sp;
 	int mpages, npages;
 
 	KASSERT(*io_npages >= 1,
 	    ("%s: npages not positive", __func__));
 	blk = SWAPBLK_NONE;
 	mpages = *io_npages;
 	npages = imin(BLIST_MAX_ALLOC, mpages);
 	mtx_lock(&sw_dev_mtx);
 	sp = swdevhd;
 	while (!TAILQ_EMPTY(&swtailq)) {
 		if (sp == NULL)
 			sp = TAILQ_FIRST(&swtailq);
 		if ((sp->sw_flags & SW_CLOSING) == 0)
 			blk = blist_alloc(sp->sw_blist, &npages, mpages);
 		if (blk != SWAPBLK_NONE)
 			break;
 		sp = TAILQ_NEXT(sp, sw_list);
 		if (swdevhd == sp) {
 			if (npages == 1)
 				break;
 			mpages = npages - 1;
 			npages >>= 1;
 		}
 	}
 	if (blk != SWAPBLK_NONE) {
 		*io_npages = npages;
 		blk += sp->sw_first;
 		sp->sw_used += npages;
 		swap_pager_avail -= npages;
 		swp_sizecheck();
 		swdevhd = TAILQ_NEXT(sp, sw_list);
 	} else {
 		if (swap_pager_full != 2) {
 			printf("swp_pager_getswapspace(%d): failed\n",
 			    *io_npages);
 			swap_pager_full = 2;
 			swap_pager_almost_full = 1;
 		}
 		swdevhd = NULL;
 	}
 	mtx_unlock(&sw_dev_mtx);
 	return (blk);
 }
 
 static bool
 swp_pager_isondev(daddr_t blk, struct swdevt *sp)
 {
 
 	return (blk >= sp->sw_first && blk < sp->sw_end);
 }
 
 static void
 swp_pager_strategy(struct buf *bp)
 {
 	struct swdevt *sp;
 
 	mtx_lock(&sw_dev_mtx);
 	TAILQ_FOREACH(sp, &swtailq, sw_list) {
 		if (swp_pager_isondev(bp->b_blkno, sp)) {
 			mtx_unlock(&sw_dev_mtx);
 			if ((sp->sw_flags & SW_UNMAPPED) != 0 &&
 			    unmapped_buf_allowed) {
 				bp->b_data = unmapped_buf;
 				bp->b_offset = 0;
 			} else {
 				pmap_qenter((vm_offset_t)bp->b_data,
 				    &bp->b_pages[0], bp->b_bcount / PAGE_SIZE);
 			}
 			sp->sw_strategy(bp, sp);
 			return;
 		}
 	}
 	panic("Swapdev not found");
 }
 
 /*
  * SWP_PAGER_FREESWAPSPACE() -	free raw swap space
  *
  *	This routine returns the specified swap blocks back to the bitmap.
  *
  *	This routine may not sleep.
  */
 static void
 swp_pager_freeswapspace(daddr_t blk, daddr_t npages)
 {
 	struct swdevt *sp;
 
 	if (npages == 0)
 		return;
 	mtx_lock(&sw_dev_mtx);
 	TAILQ_FOREACH(sp, &swtailq, sw_list) {
 		if (swp_pager_isondev(blk, sp)) {
 			sp->sw_used -= npages;
 			/*
 			 * If we are attempting to stop swapping on
 			 * this device, we don't want to mark any
 			 * blocks free lest they be reused.
 			 */
 			if ((sp->sw_flags & SW_CLOSING) == 0) {
 				blist_free(sp->sw_blist, blk - sp->sw_first,
 				    npages);
 				swap_pager_avail += npages;
 				swp_sizecheck();
 			}
 			mtx_unlock(&sw_dev_mtx);
 			return;
 		}
 	}
 	panic("Swapdev not found");
 }
 
 /*
  * SYSCTL_SWAP_FRAGMENTATION() -	produce raw swap space stats
  */
 static int
 sysctl_swap_fragmentation(SYSCTL_HANDLER_ARGS)
 {
 	struct sbuf sbuf;
 	struct swdevt *sp;
 	const char *devname;
 	int error;
 
 	error = sysctl_wire_old_buffer(req, 0);
 	if (error != 0)
 		return (error);
 	sbuf_new_for_sysctl(&sbuf, NULL, 128, req);
 	mtx_lock(&sw_dev_mtx);
 	TAILQ_FOREACH(sp, &swtailq, sw_list) {
 		if (vn_isdisk(sp->sw_vp))
 			devname = devtoname(sp->sw_vp->v_rdev);
 		else
 			devname = "[file]";
 		sbuf_printf(&sbuf, "\nFree space on device %s:\n", devname);
 		blist_stats(sp->sw_blist, &sbuf);
 	}
 	mtx_unlock(&sw_dev_mtx);
 	error = sbuf_finish(&sbuf);
 	sbuf_delete(&sbuf);
 	return (error);
 }
 
 /*
  * SWAP_PAGER_FREESPACE() -	frees swap blocks associated with a page
  *				range within an object.
  *
  *	This is a globally accessible routine.
  *
  *	This routine removes swapblk assignments from swap metadata.
  *
  *	The external callers of this routine typically have already destroyed
  *	or renamed vm_page_t's associated with this range in the object so
  *	we should be ok.
  *
  *	The object must be locked.
  */
 void
 swap_pager_freespace(vm_object_t object, vm_pindex_t start, vm_size_t size)
 {
 
 	swp_pager_meta_free(object, start, size);
 }
 
 /*
  * SWAP_PAGER_RESERVE() - reserve swap blocks in object
  *
  *	Assigns swap blocks to the specified range within the object.  The
  *	swap blocks are not zeroed.  Any previous swap assignment is destroyed.
  *
  *	Returns 0 on success, -1 on failure.
  */
 int
 swap_pager_reserve(vm_object_t object, vm_pindex_t start, vm_size_t size)
 {
 	daddr_t addr, blk, n_free, s_free;
 	int i, j, n;
 
 	swp_pager_init_freerange(&s_free, &n_free);
 	VM_OBJECT_WLOCK(object);
 	for (i = 0; i < size; i += n) {
 		n = size - i;
 		blk = swp_pager_getswapspace(&n);
 		if (blk == SWAPBLK_NONE) {
 			swp_pager_meta_free(object, start, i);
 			VM_OBJECT_WUNLOCK(object);
 			return (-1);
 		}
 		for (j = 0; j < n; ++j) {
 			addr = swp_pager_meta_build(object,
 			    start + i + j, blk + j);
 			if (addr != SWAPBLK_NONE)
 				swp_pager_update_freerange(&s_free, &n_free,
 				    addr);
 		}
 	}
 	swp_pager_freeswapspace(s_free, n_free);
 	VM_OBJECT_WUNLOCK(object);
 	return (0);
 }
 
 static bool
 swp_pager_xfer_source(vm_object_t srcobject, vm_object_t dstobject,
     vm_pindex_t pindex, daddr_t addr)
 {
 	daddr_t dstaddr;
 
 	KASSERT(srcobject->type == OBJT_SWAP,
 	    ("%s: Srcobject not swappable", __func__));
 	if (dstobject->type == OBJT_SWAP &&
 	    swp_pager_meta_lookup(dstobject, pindex) != SWAPBLK_NONE) {
 		/* Caller should destroy the source block. */
 		return (false);
 	}
 
 	/*
 	 * Destination has no swapblk and is not resident, transfer source.
 	 * swp_pager_meta_build() can sleep.
 	 */
 	VM_OBJECT_WUNLOCK(srcobject);
 	dstaddr = swp_pager_meta_build(dstobject, pindex, addr);
 	KASSERT(dstaddr == SWAPBLK_NONE,
 	    ("Unexpected destination swapblk"));
 	VM_OBJECT_WLOCK(srcobject);
 
 	return (true);
 }
 
 /*
  * SWAP_PAGER_COPY() -  copy blocks from source pager to destination pager
  *			and destroy the source.
  *
  *	Copy any valid swapblks from the source to the destination.  In
  *	cases where both the source and destination have a valid swapblk,
  *	we keep the destination's.
  *
  *	This routine is allowed to sleep.  It may sleep allocating metadata
  *	indirectly through swp_pager_meta_build().
  *
  *	The source object contains no vm_page_t's (which is just as well)
  *
  *	The source object is of type OBJT_SWAP.
  *
  *	The source and destination objects must be locked.
  *	Both object locks may temporarily be released.
  */
 void
 swap_pager_copy(vm_object_t srcobject, vm_object_t dstobject,
     vm_pindex_t offset, int destroysource)
 {
 
 	VM_OBJECT_ASSERT_WLOCKED(srcobject);
 	VM_OBJECT_ASSERT_WLOCKED(dstobject);
 
 	/*
 	 * If destroysource is set, we remove the source object from the
 	 * swap_pager internal queue now.
 	 */
 	if (destroysource && (srcobject->flags & OBJ_ANON) == 0 &&
 	    srcobject->handle != NULL) {
 		VM_OBJECT_WUNLOCK(srcobject);
 		VM_OBJECT_WUNLOCK(dstobject);
 		sx_xlock(&sw_alloc_sx);
 		TAILQ_REMOVE(NOBJLIST(srcobject->handle), srcobject,
 		    pager_object_list);
 		sx_xunlock(&sw_alloc_sx);
 		VM_OBJECT_WLOCK(dstobject);
 		VM_OBJECT_WLOCK(srcobject);
 	}
 
 	/*
 	 * Transfer source to destination.
 	 */
 	swp_pager_meta_transfer(srcobject, dstobject, offset, dstobject->size);
 
 	/*
 	 * Free left over swap blocks in source.
 	 *
 	 * We have to revert the type to OBJT_DEFAULT so we do not accidentally
 	 * double-remove the object from the swap queues.
 	 */
 	if (destroysource) {
 		swp_pager_meta_free_all(srcobject);
 		/*
 		 * Reverting the type is not necessary, the caller is going
 		 * to destroy srcobject directly, but I'm doing it here
 		 * for consistency since we've removed the object from its
 		 * queues.
 		 */
 		srcobject->type = OBJT_DEFAULT;
 	}
 }
 
 /*
  * SWAP_PAGER_HASPAGE() -	determine if we have good backing store for
  *				the requested page.
  *
  *	We determine whether good backing store exists for the requested
  *	page and return TRUE if it does, FALSE if it doesn't.
  *
  *	If TRUE, we also try to determine how much valid, contiguous backing
  *	store exists before and after the requested page.
  */
 static boolean_t
 swap_pager_haspage(vm_object_t object, vm_pindex_t pindex, int *before,
     int *after)
 {
 	daddr_t blk, blk0;
 	int i;
 
 	VM_OBJECT_ASSERT_LOCKED(object);
 	KASSERT(object->type == OBJT_SWAP,
 	    ("%s: object not swappable", __func__));
 
 	/*
 	 * do we have good backing store at the requested index ?
 	 */
 	blk0 = swp_pager_meta_lookup(object, pindex);
 	if (blk0 == SWAPBLK_NONE) {
 		if (before)
 			*before = 0;
 		if (after)
 			*after = 0;
 		return (FALSE);
 	}
 
 	/*
 	 * find backwards-looking contiguous good backing store
 	 */
 	if (before != NULL) {
 		for (i = 1; i < SWB_NPAGES; i++) {
 			if (i > pindex)
 				break;
 			blk = swp_pager_meta_lookup(object, pindex - i);
 			if (blk != blk0 - i)
 				break;
 		}
 		*before = i - 1;
 	}
 
 	/*
 	 * find forward-looking contiguous good backing store
 	 */
 	if (after != NULL) {
 		for (i = 1; i < SWB_NPAGES; i++) {
 			blk = swp_pager_meta_lookup(object, pindex + i);
 			if (blk != blk0 + i)
 				break;
 		}
 		*after = i - 1;
 	}
 	return (TRUE);
 }
 
 /*
  * SWAP_PAGER_PAGE_UNSWAPPED() - remove swap backing store related to page
  *
  *	This removes any associated swap backing store, whether valid or
  *	not, from the page.
  *
  *	This routine is typically called when a page is made dirty, at
  *	which point any associated swap can be freed.  MADV_FREE also
  *	calls us in a special-case situation
  *
  *	NOTE!!!  If the page is clean and the swap was valid, the caller
  *	should make the page dirty before calling this routine.  This routine
  *	does NOT change the m->dirty status of the page.  Also: MADV_FREE
  *	depends on it.
  *
  *	This routine may not sleep.
  *
  *	The object containing the page may be locked.
  */
 static void
 swap_pager_unswapped(vm_page_t m)
 {
 	struct swblk *sb;
 	vm_object_t obj;
 
 	/*
 	 * Handle enqueing deferred frees first.  If we do not have the
 	 * object lock we wait for the page daemon to clear the space.
 	 */
 	obj = m->object;
 	if (!VM_OBJECT_WOWNED(obj)) {
 		VM_PAGE_OBJECT_BUSY_ASSERT(m);
 		/*
 		 * The caller is responsible for synchronization but we
 		 * will harmlessly handle races.  This is typically provided
 		 * by only calling unswapped() when a page transitions from
 		 * clean to dirty.
 		 */
 		if ((m->a.flags & (PGA_SWAP_SPACE | PGA_SWAP_FREE)) ==
 		    PGA_SWAP_SPACE) {
 			vm_page_aflag_set(m, PGA_SWAP_FREE);
 			counter_u64_add(swap_free_deferred, 1);
 		}
 		return;
 	}
 	if ((m->a.flags & PGA_SWAP_FREE) != 0)
 		counter_u64_add(swap_free_completed, 1);
 	vm_page_aflag_clear(m, PGA_SWAP_FREE | PGA_SWAP_SPACE);
 
 	/*
 	 * The meta data only exists if the object is OBJT_SWAP
 	 * and even then might not be allocated yet.
 	 */
 	KASSERT(m->object->type == OBJT_SWAP,
 	    ("Free object not swappable"));
 
 	sb = SWAP_PCTRIE_LOOKUP(&m->object->un_pager.swp.swp_blks,
 	    rounddown(m->pindex, SWAP_META_PAGES));
 	if (sb == NULL)
 		return;
 	if (sb->d[m->pindex % SWAP_META_PAGES] == SWAPBLK_NONE)
 		return;
 	swp_pager_freeswapspace(sb->d[m->pindex % SWAP_META_PAGES], 1);
 	sb->d[m->pindex % SWAP_META_PAGES] = SWAPBLK_NONE;
 	swp_pager_free_empty_swblk(m->object, sb);
 }
 
 /*
  * swap_pager_getpages() - bring pages in from swap
  *
  *	Attempt to page in the pages in array "ma" of length "count".  The
  *	caller may optionally specify that additional pages preceding and
  *	succeeding the specified range be paged in.  The number of such pages
  *	is returned in the "rbehind" and "rahead" parameters, and they will
  *	be in the inactive queue upon return.
  *
  *	The pages in "ma" must be busied and will remain busied upon return.
  */
 static int
 swap_pager_getpages_locked(vm_object_t object, vm_page_t *ma, int count,
     int *rbehind, int *rahead)
 {
 	struct buf *bp;
 	vm_page_t bm, mpred, msucc, p;
 	vm_pindex_t pindex;
 	daddr_t blk;
 	int i, maxahead, maxbehind, reqcount;
 
 	VM_OBJECT_ASSERT_WLOCKED(object);
 	reqcount = count;
 
 	KASSERT(object->type == OBJT_SWAP,
 	    ("%s: object not swappable", __func__));
 	if (!swap_pager_haspage(object, ma[0]->pindex, &maxbehind, &maxahead)) {
 		VM_OBJECT_WUNLOCK(object);
 		return (VM_PAGER_FAIL);
 	}
 
 	KASSERT(reqcount - 1 <= maxahead,
 	    ("page count %d extends beyond swap block", reqcount));
 
 	/*
 	 * Do not transfer any pages other than those that are xbusied
 	 * when running during a split or collapse operation.  This
 	 * prevents clustering from re-creating pages which are being
 	 * moved into another object.
 	 */
 	if ((object->flags & (OBJ_SPLIT | OBJ_DEAD)) != 0) {
 		maxahead = reqcount - 1;
 		maxbehind = 0;
 	}
 
 	/*
 	 * Clip the readahead and readbehind ranges to exclude resident pages.
 	 */
 	if (rahead != NULL) {
 		*rahead = imin(*rahead, maxahead - (reqcount - 1));
 		pindex = ma[reqcount - 1]->pindex;
 		msucc = TAILQ_NEXT(ma[reqcount - 1], listq);
 		if (msucc != NULL && msucc->pindex - pindex - 1 < *rahead)
 			*rahead = msucc->pindex - pindex - 1;
 	}
 	if (rbehind != NULL) {
 		*rbehind = imin(*rbehind, maxbehind);
 		pindex = ma[0]->pindex;
 		mpred = TAILQ_PREV(ma[0], pglist, listq);
 		if (mpred != NULL && pindex - mpred->pindex - 1 < *rbehind)
 			*rbehind = pindex - mpred->pindex - 1;
 	}
 
 	bm = ma[0];
 	for (i = 0; i < count; i++)
 		ma[i]->oflags |= VPO_SWAPINPROG;
 
 	/*
 	 * Allocate readahead and readbehind pages.
 	 */
 	if (rbehind != NULL) {
 		for (i = 1; i <= *rbehind; i++) {
 			p = vm_page_alloc(object, ma[0]->pindex - i,
 			    VM_ALLOC_NORMAL);
 			if (p == NULL)
 				break;
 			p->oflags |= VPO_SWAPINPROG;
 			bm = p;
 		}
 		*rbehind = i - 1;
 	}
 	if (rahead != NULL) {
 		for (i = 0; i < *rahead; i++) {
 			p = vm_page_alloc(object,
 			    ma[reqcount - 1]->pindex + i + 1, VM_ALLOC_NORMAL);
 			if (p == NULL)
 				break;
 			p->oflags |= VPO_SWAPINPROG;
 		}
 		*rahead = i;
 	}
 	if (rbehind != NULL)
 		count += *rbehind;
 	if (rahead != NULL)
 		count += *rahead;
 
 	vm_object_pip_add(object, count);
 
 	pindex = bm->pindex;
 	blk = swp_pager_meta_lookup(object, pindex);
 	KASSERT(blk != SWAPBLK_NONE,
 	    ("no swap blocking containing %p(%jx)", object, (uintmax_t)pindex));
 
 	VM_OBJECT_WUNLOCK(object);
 	bp = uma_zalloc(swrbuf_zone, M_WAITOK);
 	MPASS((bp->b_flags & B_MAXPHYS) != 0);
 	/* Pages cannot leave the object while busy. */
 	for (i = 0, p = bm; i < count; i++, p = TAILQ_NEXT(p, listq)) {
 		MPASS(p->pindex == bm->pindex + i);
 		bp->b_pages[i] = p;
 	}
 
 	bp->b_flags |= B_PAGING;
 	bp->b_iocmd = BIO_READ;
 	bp->b_iodone = swp_pager_async_iodone;
 	bp->b_rcred = crhold(thread0.td_ucred);
 	bp->b_wcred = crhold(thread0.td_ucred);
 	bp->b_blkno = blk;
 	bp->b_bcount = PAGE_SIZE * count;
 	bp->b_bufsize = PAGE_SIZE * count;
 	bp->b_npages = count;
 	bp->b_pgbefore = rbehind != NULL ? *rbehind : 0;
 	bp->b_pgafter = rahead != NULL ? *rahead : 0;
 
 	VM_CNT_INC(v_swapin);
 	VM_CNT_ADD(v_swappgsin, count);
 
 	/*
 	 * perform the I/O.  NOTE!!!  bp cannot be considered valid after
 	 * this point because we automatically release it on completion.
 	 * Instead, we look at the one page we are interested in which we
 	 * still hold a lock on even through the I/O completion.
 	 *
 	 * The other pages in our ma[] array are also released on completion,
 	 * so we cannot assume they are valid anymore either.
 	 *
 	 * NOTE: b_blkno is destroyed by the call to swapdev_strategy
 	 */
 	BUF_KERNPROC(bp);
 	swp_pager_strategy(bp);
 
 	/*
 	 * Wait for the pages we want to complete.  VPO_SWAPINPROG is always
 	 * cleared on completion.  If an I/O error occurs, SWAPBLK_NONE
 	 * is set in the metadata for each page in the request.
 	 */
 	VM_OBJECT_WLOCK(object);
 	/* This could be implemented more efficiently with aflags */
 	while ((ma[0]->oflags & VPO_SWAPINPROG) != 0) {
 		ma[0]->oflags |= VPO_SWAPSLEEP;
 		VM_CNT_INC(v_intrans);
 		if (VM_OBJECT_SLEEP(object, &object->handle, PSWP,
 		    "swread", hz * 20)) {
 			printf(
 "swap_pager: indefinite wait buffer: bufobj: %p, blkno: %jd, size: %ld\n",
 			    bp->b_bufobj, (intmax_t)bp->b_blkno, bp->b_bcount);
 		}
 	}
 	VM_OBJECT_WUNLOCK(object);
 
 	/*
 	 * If we had an unrecoverable read error pages will not be valid.
 	 */
 	for (i = 0; i < reqcount; i++)
 		if (ma[i]->valid != VM_PAGE_BITS_ALL)
 			return (VM_PAGER_ERROR);
 
 	return (VM_PAGER_OK);
 
 	/*
 	 * A final note: in a low swap situation, we cannot deallocate swap
 	 * and mark a page dirty here because the caller is likely to mark
 	 * the page clean when we return, causing the page to possibly revert
 	 * to all-zero's later.
 	 */
 }
 
 static int
 swap_pager_getpages(vm_object_t object, vm_page_t *ma, int count,
     int *rbehind, int *rahead)
 {
 
 	VM_OBJECT_WLOCK(object);
 	return (swap_pager_getpages_locked(object, ma, count, rbehind, rahead));
 }
 
 /*
  * 	swap_pager_getpages_async():
  *
  *	Right now this is emulation of asynchronous operation on top of
  *	swap_pager_getpages().
  */
 static int
 swap_pager_getpages_async(vm_object_t object, vm_page_t *ma, int count,
     int *rbehind, int *rahead, pgo_getpages_iodone_t iodone, void *arg)
 {
 	int r, error;
 
 	r = swap_pager_getpages(object, ma, count, rbehind, rahead);
 	switch (r) {
 	case VM_PAGER_OK:
 		error = 0;
 		break;
 	case VM_PAGER_ERROR:
 		error = EIO;
 		break;
 	case VM_PAGER_FAIL:
 		error = EINVAL;
 		break;
 	default:
 		panic("unhandled swap_pager_getpages() error %d", r);
 	}
 	(iodone)(arg, ma, count, error);
 
 	return (r);
 }
 
 /*
  *	swap_pager_putpages:
  *
  *	Assign swap (if necessary) and initiate I/O on the specified pages.
  *
  *	We support both OBJT_DEFAULT and OBJT_SWAP objects.  DEFAULT objects
  *	are automatically converted to SWAP objects.
  *
  *	In a low memory situation we may block in VOP_STRATEGY(), but the new
  *	vm_page reservation system coupled with properly written VFS devices
  *	should ensure that no low-memory deadlock occurs.  This is an area
  *	which needs work.
  *
  *	The parent has N vm_object_pip_add() references prior to
  *	calling us and will remove references for rtvals[] that are
  *	not set to VM_PAGER_PEND.  We need to remove the rest on I/O
  *	completion.
  *
  *	The parent has soft-busy'd the pages it passes us and will unbusy
  *	those whose rtvals[] entry is not set to VM_PAGER_PEND on return.
  *	We need to unbusy the rest on I/O completion.
  */
 static void
 swap_pager_putpages(vm_object_t object, vm_page_t *ma, int count,
     int flags, int *rtvals)
 {
 	struct buf *bp;
 	daddr_t addr, blk, n_free, s_free;
 	vm_page_t mreq;
 	int i, j, n;
 	bool async;
 
 	KASSERT(count == 0 || ma[0]->object == object,
 	    ("%s: object mismatch %p/%p",
 	    __func__, object, ma[0]->object));
 
 	/*
 	 * Step 1
 	 *
 	 * Turn object into OBJT_SWAP.  Force sync if not a pageout process.
 	 */
 	if (object->type != OBJT_SWAP) {
 		addr = swp_pager_meta_build(object, 0, SWAPBLK_NONE);
 		KASSERT(addr == SWAPBLK_NONE,
 		    ("unexpected object swap block"));
 	}
 	VM_OBJECT_WUNLOCK(object);
 	async = curproc == pageproc && (flags & VM_PAGER_PUT_SYNC) == 0;
 	swp_pager_init_freerange(&s_free, &n_free);
 
 	/*
 	 * Step 2
 	 *
 	 * Assign swap blocks and issue I/O.  We reallocate swap on the fly.
 	 * The page is left dirty until the pageout operation completes
 	 * successfully.
 	 */
 	for (i = 0; i < count; i += n) {
 		/* Maximum I/O size is limited by maximum swap block size. */
 		n = min(count - i, nsw_cluster_max);
 
 		if (async) {
 			mtx_lock(&swbuf_mtx);
 			while (nsw_wcount_async == 0)
 				msleep(&nsw_wcount_async, &swbuf_mtx, PVM,
 				    "swbufa", 0);
 			nsw_wcount_async--;
 			mtx_unlock(&swbuf_mtx);
 		}
 
 		/* Get a block of swap of size up to size n. */
 		VM_OBJECT_WLOCK(object);
 		blk = swp_pager_getswapspace(&n);
 		if (blk == SWAPBLK_NONE) {
 			VM_OBJECT_WUNLOCK(object);
 			mtx_lock(&swbuf_mtx);
 			if (++nsw_wcount_async == 1)
 				wakeup(&nsw_wcount_async);
 			mtx_unlock(&swbuf_mtx);
 			for (j = 0; j < n; ++j)
 				rtvals[i + j] = VM_PAGER_FAIL;
 			continue;
 		}
 		for (j = 0; j < n; ++j) {
 			mreq = ma[i + j];
 			vm_page_aflag_clear(mreq, PGA_SWAP_FREE);
 			addr = swp_pager_meta_build(mreq->object, mreq->pindex,
 			    blk + j);
 			if (addr != SWAPBLK_NONE)
 				swp_pager_update_freerange(&s_free, &n_free,
 				    addr);
 			MPASS(mreq->dirty == VM_PAGE_BITS_ALL);
 			mreq->oflags |= VPO_SWAPINPROG;
 		}
 		VM_OBJECT_WUNLOCK(object);
 
 		bp = uma_zalloc(swwbuf_zone, M_WAITOK);
 		MPASS((bp->b_flags & B_MAXPHYS) != 0);
 		if (async)
 			bp->b_flags |= B_ASYNC;
 		bp->b_flags |= B_PAGING;
 		bp->b_iocmd = BIO_WRITE;
 
 		bp->b_rcred = crhold(thread0.td_ucred);
 		bp->b_wcred = crhold(thread0.td_ucred);
 		bp->b_bcount = PAGE_SIZE * n;
 		bp->b_bufsize = PAGE_SIZE * n;
 		bp->b_blkno = blk;
 		for (j = 0; j < n; j++)
 			bp->b_pages[j] = ma[i + j];
 		bp->b_npages = n;
 
 		/*
 		 * Must set dirty range for NFS to work.
 		 */
 		bp->b_dirtyoff = 0;
 		bp->b_dirtyend = bp->b_bcount;
 
 		VM_CNT_INC(v_swapout);
 		VM_CNT_ADD(v_swappgsout, bp->b_npages);
 
 		/*
 		 * We unconditionally set rtvals[] to VM_PAGER_PEND so that we
 		 * can call the async completion routine at the end of a
 		 * synchronous I/O operation.  Otherwise, our caller would
 		 * perform duplicate unbusy and wakeup operations on the page
 		 * and object, respectively.
 		 */
 		for (j = 0; j < n; j++)
 			rtvals[i + j] = VM_PAGER_PEND;
 
 		/*
 		 * asynchronous
 		 *
 		 * NOTE: b_blkno is destroyed by the call to swapdev_strategy.
 		 */
 		if (async) {
 			bp->b_iodone = swp_pager_async_iodone;
 			BUF_KERNPROC(bp);
 			swp_pager_strategy(bp);
 			continue;
 		}
 
 		/*
 		 * synchronous
 		 *
 		 * NOTE: b_blkno is destroyed by the call to swapdev_strategy.
 		 */
 		bp->b_iodone = bdone;
 		swp_pager_strategy(bp);
 
 		/*
 		 * Wait for the sync I/O to complete.
 		 */
 		bwait(bp, PVM, "swwrt");
 
 		/*
 		 * Now that we are through with the bp, we can call the
 		 * normal async completion, which frees everything up.
 		 */
 		swp_pager_async_iodone(bp);
 	}
 	swp_pager_freeswapspace(s_free, n_free);
 	VM_OBJECT_WLOCK(object);
 }
 
 /*
  *	swp_pager_async_iodone:
  *
  *	Completion routine for asynchronous reads and writes from/to swap.
  *	Also called manually by synchronous code to finish up a bp.
  *
  *	This routine may not sleep.
  */
 static void
 swp_pager_async_iodone(struct buf *bp)
 {
 	int i;
 	vm_object_t object = NULL;
 
 	/*
 	 * Report error - unless we ran out of memory, in which case
 	 * we've already logged it in swapgeom_strategy().
 	 */
 	if (bp->b_ioflags & BIO_ERROR && bp->b_error != ENOMEM) {
 		printf(
 		    "swap_pager: I/O error - %s failed; blkno %ld,"
 			"size %ld, error %d\n",
 		    ((bp->b_iocmd == BIO_READ) ? "pagein" : "pageout"),
 		    (long)bp->b_blkno,
 		    (long)bp->b_bcount,
 		    bp->b_error
 		);
 	}
 
 	/*
 	 * remove the mapping for kernel virtual
 	 */
 	if (buf_mapped(bp))
 		pmap_qremove((vm_offset_t)bp->b_data, bp->b_npages);
 	else
 		bp->b_data = bp->b_kvabase;
 
 	if (bp->b_npages) {
 		object = bp->b_pages[0]->object;
 		VM_OBJECT_WLOCK(object);
 	}
 
 	/*
 	 * cleanup pages.  If an error occurs writing to swap, we are in
 	 * very serious trouble.  If it happens to be a disk error, though,
 	 * we may be able to recover by reassigning the swap later on.  So
 	 * in this case we remove the m->swapblk assignment for the page
 	 * but do not free it in the rlist.  The errornous block(s) are thus
 	 * never reallocated as swap.  Redirty the page and continue.
 	 */
 	for (i = 0; i < bp->b_npages; ++i) {
 		vm_page_t m = bp->b_pages[i];
 
 		m->oflags &= ~VPO_SWAPINPROG;
 		if (m->oflags & VPO_SWAPSLEEP) {
 			m->oflags &= ~VPO_SWAPSLEEP;
 			wakeup(&object->handle);
 		}
 
 		/* We always have space after I/O, successful or not. */
 		vm_page_aflag_set(m, PGA_SWAP_SPACE);
 
 		if (bp->b_ioflags & BIO_ERROR) {
 			/*
 			 * If an error occurs I'd love to throw the swapblk
 			 * away without freeing it back to swapspace, so it
 			 * can never be used again.  But I can't from an
 			 * interrupt.
 			 */
 			if (bp->b_iocmd == BIO_READ) {
 				/*
 				 * NOTE: for reads, m->dirty will probably
 				 * be overridden by the original caller of
 				 * getpages so don't play cute tricks here.
 				 */
 				vm_page_invalid(m);
 			} else {
 				/*
 				 * If a write error occurs, reactivate page
 				 * so it doesn't clog the inactive list,
 				 * then finish the I/O.
 				 */
 				MPASS(m->dirty == VM_PAGE_BITS_ALL);
 
 				/* PQ_UNSWAPPABLE? */
 				vm_page_activate(m);
 				vm_page_sunbusy(m);
 			}
 		} else if (bp->b_iocmd == BIO_READ) {
 			/*
 			 * NOTE: for reads, m->dirty will probably be
 			 * overridden by the original caller of getpages so
 			 * we cannot set them in order to free the underlying
 			 * swap in a low-swap situation.  I don't think we'd
 			 * want to do that anyway, but it was an optimization
 			 * that existed in the old swapper for a time before
 			 * it got ripped out due to precisely this problem.
 			 */
 			KASSERT(!pmap_page_is_mapped(m),
 			    ("swp_pager_async_iodone: page %p is mapped", m));
 			KASSERT(m->dirty == 0,
 			    ("swp_pager_async_iodone: page %p is dirty", m));
 
 			vm_page_valid(m);
 			if (i < bp->b_pgbefore ||
 			    i >= bp->b_npages - bp->b_pgafter)
 				vm_page_readahead_finish(m);
 		} else {
 			/*
 			 * For write success, clear the dirty
 			 * status, then finish the I/O ( which decrements the
 			 * busy count and possibly wakes waiter's up ).
 			 * A page is only written to swap after a period of
 			 * inactivity.  Therefore, we do not expect it to be
 			 * reused.
 			 */
 			KASSERT(!pmap_page_is_write_mapped(m),
 			    ("swp_pager_async_iodone: page %p is not write"
 			    " protected", m));
 			vm_page_undirty(m);
 			vm_page_deactivate_noreuse(m);
 			vm_page_sunbusy(m);
 		}
 	}
 
 	/*
 	 * adjust pip.  NOTE: the original parent may still have its own
 	 * pip refs on the object.
 	 */
 	if (object != NULL) {
 		vm_object_pip_wakeupn(object, bp->b_npages);
 		VM_OBJECT_WUNLOCK(object);
 	}
 
 	/*
 	 * swapdev_strategy() manually sets b_vp and b_bufobj before calling
 	 * bstrategy(). Set them back to NULL now we're done with it, or we'll
 	 * trigger a KASSERT in relpbuf().
 	 */
 	if (bp->b_vp) {
 		    bp->b_vp = NULL;
 		    bp->b_bufobj = NULL;
 	}
 	/*
 	 * release the physical I/O buffer
 	 */
 	if (bp->b_flags & B_ASYNC) {
 		mtx_lock(&swbuf_mtx);
 		if (++nsw_wcount_async == 1)
 			wakeup(&nsw_wcount_async);
 		mtx_unlock(&swbuf_mtx);
 	}
 	uma_zfree((bp->b_iocmd == BIO_READ) ? swrbuf_zone : swwbuf_zone, bp);
 }
 
 int
 swap_pager_nswapdev(void)
 {
 
 	return (nswapdev);
 }
 
 static void
 swp_pager_force_dirty(vm_page_t m)
 {
 
 	vm_page_dirty(m);
 	swap_pager_unswapped(m);
 	vm_page_launder(m);
 }
 
+u_long
+swap_pager_swapped_pages(vm_object_t object)
+{
+	struct swblk *sb;
+	vm_pindex_t pi;
+	u_long res;
+	int i;
+
+	VM_OBJECT_ASSERT_LOCKED(object);
+	if (object->type != OBJT_SWAP)
+		return (0);
+
+	for (res = 0, pi = 0; (sb = SWAP_PCTRIE_LOOKUP_GE(
+	    &object->un_pager.swp.swp_blks, pi)) != NULL;
+	    pi = sb->p + SWAP_META_PAGES) {
+		for (i = 0; i < SWAP_META_PAGES; i++) {
+			if (sb->d[i] != SWAPBLK_NONE)
+				res++;
+		}
+	}
+	return (res);
+}
+
 /*
  *	swap_pager_swapoff_object:
  *
  *	Page in all of the pages that have been paged out for an object
  *	to a swap device.
  */
 static void
 swap_pager_swapoff_object(struct swdevt *sp, vm_object_t object)
 {
 	struct swblk *sb;
 	vm_page_t m;
 	vm_pindex_t pi;
 	daddr_t blk;
 	int i, nv, rahead, rv;
 
 	KASSERT(object->type == OBJT_SWAP,
 	    ("%s: Object not swappable", __func__));
 
 	for (pi = 0; (sb = SWAP_PCTRIE_LOOKUP_GE(
 	    &object->un_pager.swp.swp_blks, pi)) != NULL; ) {
 		if ((object->flags & OBJ_DEAD) != 0) {
 			/*
 			 * Make sure that pending writes finish before
 			 * returning.
 			 */
 			vm_object_pip_wait(object, "swpoff");
 			swp_pager_meta_free_all(object);
 			break;
 		}
 		for (i = 0; i < SWAP_META_PAGES; i++) {
 			/*
 			 * Count the number of contiguous valid blocks.
 			 */
 			for (nv = 0; nv < SWAP_META_PAGES - i; nv++) {
 				blk = sb->d[i + nv];
 				if (!swp_pager_isondev(blk, sp) ||
 				    blk == SWAPBLK_NONE)
 					break;
 			}
 			if (nv == 0)
 				continue;
 
 			/*
 			 * Look for a page corresponding to the first
 			 * valid block and ensure that any pending paging
 			 * operations on it are complete.  If the page is valid,
 			 * mark it dirty and free the swap block.  Try to batch
 			 * this operation since it may cause sp to be freed,
 			 * meaning that we must restart the scan.  Avoid busying
 			 * valid pages since we may block forever on kernel
 			 * stack pages.
 			 */
 			m = vm_page_lookup(object, sb->p + i);
 			if (m == NULL) {
 				m = vm_page_alloc(object, sb->p + i,
 				    VM_ALLOC_NORMAL | VM_ALLOC_WAITFAIL);
 				if (m == NULL)
 					break;
 			} else {
 				if ((m->oflags & VPO_SWAPINPROG) != 0) {
 					m->oflags |= VPO_SWAPSLEEP;
 					VM_OBJECT_SLEEP(object, &object->handle,
 					    PSWP, "swpoff", 0);
 					break;
 				}
 				if (vm_page_all_valid(m)) {
 					do {
 						swp_pager_force_dirty(m);
 					} while (--nv > 0 &&
 					    (m = vm_page_next(m)) != NULL &&
 					    vm_page_all_valid(m) &&
 					    (m->oflags & VPO_SWAPINPROG) == 0);
 					break;
 				}
 				if (!vm_page_busy_acquire(m, VM_ALLOC_WAITFAIL))
 					break;
 			}
 
 			vm_object_pip_add(object, 1);
 			rahead = SWAP_META_PAGES;
 			rv = swap_pager_getpages_locked(object, &m, 1, NULL,
 			    &rahead);
 			if (rv != VM_PAGER_OK)
 				panic("%s: read from swap failed: %d",
 				    __func__, rv);
 			vm_object_pip_wakeupn(object, 1);
 			VM_OBJECT_WLOCK(object);
 			vm_page_xunbusy(m);
 
 			/*
 			 * The object lock was dropped so we must restart the
 			 * scan of this swap block.  Pages paged in during this
 			 * iteration will be marked dirty in a future iteration.
 			 */
 			break;
 		}
 		if (i == SWAP_META_PAGES)
 			pi = sb->p + SWAP_META_PAGES;
 	}
 }
 
 /*
  *	swap_pager_swapoff:
  *
  *	Page in all of the pages that have been paged out to the
  *	given device.  The corresponding blocks in the bitmap must be
  *	marked as allocated and the device must be flagged SW_CLOSING.
  *	There may be no processes swapped out to the device.
  *
  *	This routine may block.
  */
 static void
 swap_pager_swapoff(struct swdevt *sp)
 {
 	vm_object_t object;
 	int retries;
 
 	sx_assert(&swdev_syscall_lock, SA_XLOCKED);
 
 	retries = 0;
 full_rescan:
 	mtx_lock(&vm_object_list_mtx);
 	TAILQ_FOREACH(object, &vm_object_list, object_list) {
 		if (object->type != OBJT_SWAP)
 			continue;
 		mtx_unlock(&vm_object_list_mtx);
 		/* Depends on type-stability. */
 		VM_OBJECT_WLOCK(object);
 
 		/*
 		 * Dead objects are eventually terminated on their own.
 		 */
 		if ((object->flags & OBJ_DEAD) != 0)
 			goto next_obj;
 
 		/*
 		 * Sync with fences placed after pctrie
 		 * initialization.  We must not access pctrie below
 		 * unless we checked that our object is swap and not
 		 * dead.
 		 */
 		atomic_thread_fence_acq();
 		if (object->type != OBJT_SWAP)
 			goto next_obj;
 
 		swap_pager_swapoff_object(sp, object);
 next_obj:
 		VM_OBJECT_WUNLOCK(object);
 		mtx_lock(&vm_object_list_mtx);
 	}
 	mtx_unlock(&vm_object_list_mtx);
 
 	if (sp->sw_used) {
 		/*
 		 * Objects may be locked or paging to the device being
 		 * removed, so we will miss their pages and need to
 		 * make another pass.  We have marked this device as
 		 * SW_CLOSING, so the activity should finish soon.
 		 */
 		retries++;
 		if (retries > 100) {
 			panic("swapoff: failed to locate %d swap blocks",
 			    sp->sw_used);
 		}
 		pause("swpoff", hz / 20);
 		goto full_rescan;
 	}
 	EVENTHANDLER_INVOKE(swapoff, sp);
 }
 
 /************************************************************************
  *				SWAP META DATA 				*
  ************************************************************************
  *
  *	These routines manipulate the swap metadata stored in the
  *	OBJT_SWAP object.
  *
  *	Swap metadata is implemented with a global hash and not directly
  *	linked into the object.  Instead the object simply contains
  *	appropriate tracking counters.
  */
 
 /*
  * SWP_PAGER_SWBLK_EMPTY() - is a range of blocks free?
  */
 static bool
 swp_pager_swblk_empty(struct swblk *sb, int start, int limit)
 {
 	int i;
 
 	MPASS(0 <= start && start <= limit && limit <= SWAP_META_PAGES);
 	for (i = start; i < limit; i++) {
 		if (sb->d[i] != SWAPBLK_NONE)
 			return (false);
 	}
 	return (true);
 }
 
 /*
  * SWP_PAGER_FREE_EMPTY_SWBLK() - frees if a block is free
  *
  *  Nothing is done if the block is still in use.
  */
 static void
 swp_pager_free_empty_swblk(vm_object_t object, struct swblk *sb)
 {
 
 	if (swp_pager_swblk_empty(sb, 0, SWAP_META_PAGES)) {
 		SWAP_PCTRIE_REMOVE(&object->un_pager.swp.swp_blks, sb->p);
 		uma_zfree(swblk_zone, sb);
 	}
 }
    
 /*
  * SWP_PAGER_META_BUILD() -	add swap block to swap meta data for object
  *
  *	We first convert the object to a swap object if it is a default
  *	object.
  *
  *	The specified swapblk is added to the object's swap metadata.  If
  *	the swapblk is not valid, it is freed instead.  Any previously
  *	assigned swapblk is returned.
  */
 static daddr_t
 swp_pager_meta_build(vm_object_t object, vm_pindex_t pindex, daddr_t swapblk)
 {
 	static volatile int swblk_zone_exhausted, swpctrie_zone_exhausted;
 	struct swblk *sb, *sb1;
 	vm_pindex_t modpi, rdpi;
 	daddr_t prev_swapblk;
 	int error, i;
 
 	VM_OBJECT_ASSERT_WLOCKED(object);
 
 	/*
 	 * Convert default object to swap object if necessary
 	 */
 	if (object->type != OBJT_SWAP) {
 		pctrie_init(&object->un_pager.swp.swp_blks);
 
 		/*
 		 * Ensure that swap_pager_swapoff()'s iteration over
 		 * object_list does not see a garbage pctrie.
 		 */
 		atomic_thread_fence_rel();
 
 		object->type = OBJT_SWAP;
 		object->un_pager.swp.writemappings = 0;
 		KASSERT((object->flags & OBJ_ANON) != 0 ||
 		    object->handle == NULL,
 		    ("default pager %p with handle %p",
 		    object, object->handle));
 	}
 
 	rdpi = rounddown(pindex, SWAP_META_PAGES);
 	sb = SWAP_PCTRIE_LOOKUP(&object->un_pager.swp.swp_blks, rdpi);
 	if (sb == NULL) {
 		if (swapblk == SWAPBLK_NONE)
 			return (SWAPBLK_NONE);
 		for (;;) {
 			sb = uma_zalloc(swblk_zone, M_NOWAIT | (curproc ==
 			    pageproc ? M_USE_RESERVE : 0));
 			if (sb != NULL) {
 				sb->p = rdpi;
 				for (i = 0; i < SWAP_META_PAGES; i++)
 					sb->d[i] = SWAPBLK_NONE;
 				if (atomic_cmpset_int(&swblk_zone_exhausted,
 				    1, 0))
 					printf("swblk zone ok\n");
 				break;
 			}
 			VM_OBJECT_WUNLOCK(object);
 			if (uma_zone_exhausted(swblk_zone)) {
 				if (atomic_cmpset_int(&swblk_zone_exhausted,
 				    0, 1))
 					printf("swap blk zone exhausted, "
 					    "increase kern.maxswzone\n");
 				vm_pageout_oom(VM_OOM_SWAPZ);
 				pause("swzonxb", 10);
 			} else
 				uma_zwait(swblk_zone);
 			VM_OBJECT_WLOCK(object);
 			sb = SWAP_PCTRIE_LOOKUP(&object->un_pager.swp.swp_blks,
 			    rdpi);
 			if (sb != NULL)
 				/*
 				 * Somebody swapped out a nearby page,
 				 * allocating swblk at the rdpi index,
 				 * while we dropped the object lock.
 				 */
 				goto allocated;
 		}
 		for (;;) {
 			error = SWAP_PCTRIE_INSERT(
 			    &object->un_pager.swp.swp_blks, sb);
 			if (error == 0) {
 				if (atomic_cmpset_int(&swpctrie_zone_exhausted,
 				    1, 0))
 					printf("swpctrie zone ok\n");
 				break;
 			}
 			VM_OBJECT_WUNLOCK(object);
 			if (uma_zone_exhausted(swpctrie_zone)) {
 				if (atomic_cmpset_int(&swpctrie_zone_exhausted,
 				    0, 1))
 					printf("swap pctrie zone exhausted, "
 					    "increase kern.maxswzone\n");
 				vm_pageout_oom(VM_OOM_SWAPZ);
 				pause("swzonxp", 10);
 			} else
 				uma_zwait(swpctrie_zone);
 			VM_OBJECT_WLOCK(object);
 			sb1 = SWAP_PCTRIE_LOOKUP(&object->un_pager.swp.swp_blks,
 			    rdpi);
 			if (sb1 != NULL) {
 				uma_zfree(swblk_zone, sb);
 				sb = sb1;
 				goto allocated;
 			}
 		}
 	}
 allocated:
 	MPASS(sb->p == rdpi);
 
 	modpi = pindex % SWAP_META_PAGES;
 	/* Return prior contents of metadata. */
 	prev_swapblk = sb->d[modpi];
 	/* Enter block into metadata. */
 	sb->d[modpi] = swapblk;
 
 	/*
 	 * Free the swblk if we end up with the empty page run.
 	 */
 	if (swapblk == SWAPBLK_NONE)
 		swp_pager_free_empty_swblk(object, sb);
 	return (prev_swapblk);
 }
 
 /*
  * SWP_PAGER_META_TRANSFER() - free a range of blocks in the srcobject's swap
  * metadata, or transfer it into dstobject.
  *
  *	This routine will free swap metadata structures as they are cleaned
  *	out.
  */
 static void
 swp_pager_meta_transfer(vm_object_t srcobject, vm_object_t dstobject,
     vm_pindex_t pindex, vm_pindex_t count)
 {
 	struct swblk *sb;
 	daddr_t n_free, s_free;
 	vm_pindex_t offset, last;
 	int i, limit, start;
 
 	VM_OBJECT_ASSERT_WLOCKED(srcobject);
 	if (srcobject->type != OBJT_SWAP || count == 0)
 		return;
 
 	swp_pager_init_freerange(&s_free, &n_free);
 	offset = pindex;
 	last = pindex + count;
 	for (;;) {
 		sb = SWAP_PCTRIE_LOOKUP_GE(&srcobject->un_pager.swp.swp_blks,
 		    rounddown(pindex, SWAP_META_PAGES));
 		if (sb == NULL || sb->p >= last)
 			break;
 		start = pindex > sb->p ? pindex - sb->p : 0;
 		limit = last - sb->p < SWAP_META_PAGES ? last - sb->p :
 		    SWAP_META_PAGES;
 		for (i = start; i < limit; i++) {
 			if (sb->d[i] == SWAPBLK_NONE)
 				continue;
 			if (dstobject == NULL ||
 			    !swp_pager_xfer_source(srcobject, dstobject, 
 			    sb->p + i - offset, sb->d[i])) {
 				swp_pager_update_freerange(&s_free, &n_free,
 				    sb->d[i]);
 			}
 			sb->d[i] = SWAPBLK_NONE;
 		}
 		pindex = sb->p + SWAP_META_PAGES;
 		if (swp_pager_swblk_empty(sb, 0, start) &&
 		    swp_pager_swblk_empty(sb, limit, SWAP_META_PAGES)) {
 			SWAP_PCTRIE_REMOVE(&srcobject->un_pager.swp.swp_blks,
 			    sb->p);
 			uma_zfree(swblk_zone, sb);
 		}
 	}
 	swp_pager_freeswapspace(s_free, n_free);
 }
 
 /*
  * SWP_PAGER_META_FREE() - free a range of blocks in the object's swap metadata
  *
  *	The requested range of blocks is freed, with any associated swap
  *	returned to the swap bitmap.
  *
  *	This routine will free swap metadata structures as they are cleaned
  *	out.  This routine does *NOT* operate on swap metadata associated
  *	with resident pages.
  */
 static void
 swp_pager_meta_free(vm_object_t object, vm_pindex_t pindex, vm_pindex_t count)
 {
 	swp_pager_meta_transfer(object, NULL, pindex, count);
 }
 
 /*
  * SWP_PAGER_META_FREE_ALL() - destroy all swap metadata associated with object
  *
  *	This routine locates and destroys all swap metadata associated with
  *	an object.
  */
 static void
 swp_pager_meta_free_all(vm_object_t object)
 {
 	struct swblk *sb;
 	daddr_t n_free, s_free;
 	vm_pindex_t pindex;
 	int i;
 
 	VM_OBJECT_ASSERT_WLOCKED(object);
 	if (object->type != OBJT_SWAP)
 		return;
 
 	swp_pager_init_freerange(&s_free, &n_free);
 	for (pindex = 0; (sb = SWAP_PCTRIE_LOOKUP_GE(
 	    &object->un_pager.swp.swp_blks, pindex)) != NULL;) {
 		pindex = sb->p + SWAP_META_PAGES;
 		for (i = 0; i < SWAP_META_PAGES; i++) {
 			if (sb->d[i] == SWAPBLK_NONE)
 				continue;
 			swp_pager_update_freerange(&s_free, &n_free, sb->d[i]);
 		}
 		SWAP_PCTRIE_REMOVE(&object->un_pager.swp.swp_blks, sb->p);
 		uma_zfree(swblk_zone, sb);
 	}
 	swp_pager_freeswapspace(s_free, n_free);
 }
 
 /*
  * SWP_PAGER_METACTL() -  misc control of swap meta data.
  *
  *	This routine is capable of looking up, or removing swapblk
  *	assignments in the swap meta data.  It returns the swapblk being
  *	looked-up, popped, or SWAPBLK_NONE if the block was invalid.
  *
  *	When acting on a busy resident page and paging is in progress, we
  *	have to wait until paging is complete but otherwise can act on the
  *	busy page.
  */
 static daddr_t
 swp_pager_meta_lookup(vm_object_t object, vm_pindex_t pindex)
 {
 	struct swblk *sb;
 
 	VM_OBJECT_ASSERT_LOCKED(object);
 
 	/*
 	 * The meta data only exists if the object is OBJT_SWAP
 	 * and even then might not be allocated yet.
 	 */
 	KASSERT(object->type == OBJT_SWAP,
 	    ("Lookup object not swappable"));
 
 	sb = SWAP_PCTRIE_LOOKUP(&object->un_pager.swp.swp_blks,
 	    rounddown(pindex, SWAP_META_PAGES));
 	if (sb == NULL)
 		return (SWAPBLK_NONE);
 	return (sb->d[pindex % SWAP_META_PAGES]);
 }
 
 /*
  * Returns the least page index which is greater than or equal to the
  * parameter pindex and for which there is a swap block allocated.
  * Returns object's size if the object's type is not swap or if there
  * are no allocated swap blocks for the object after the requested
  * pindex.
  */
 vm_pindex_t
 swap_pager_find_least(vm_object_t object, vm_pindex_t pindex)
 {
 	struct swblk *sb;
 	int i;
 
 	VM_OBJECT_ASSERT_LOCKED(object);
 	if (object->type != OBJT_SWAP)
 		return (object->size);
 
 	sb = SWAP_PCTRIE_LOOKUP_GE(&object->un_pager.swp.swp_blks,
 	    rounddown(pindex, SWAP_META_PAGES));
 	if (sb == NULL)
 		return (object->size);
 	if (sb->p < pindex) {
 		for (i = pindex % SWAP_META_PAGES; i < SWAP_META_PAGES; i++) {
 			if (sb->d[i] != SWAPBLK_NONE)
 				return (sb->p + i);
 		}
 		sb = SWAP_PCTRIE_LOOKUP_GE(&object->un_pager.swp.swp_blks,
 		    roundup(pindex, SWAP_META_PAGES));
 		if (sb == NULL)
 			return (object->size);
 	}
 	for (i = 0; i < SWAP_META_PAGES; i++) {
 		if (sb->d[i] != SWAPBLK_NONE)
 			return (sb->p + i);
 	}
 
 	/*
 	 * We get here if a swblk is present in the trie but it
 	 * doesn't map any blocks.
 	 */
 	MPASS(0);
 	return (object->size);
 }
 
 /*
  * System call swapon(name) enables swapping on device name,
  * which must be in the swdevsw.  Return EBUSY
  * if already swapping on this device.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct swapon_args {
 	char *name;
 };
 #endif
 
 /*
  * MPSAFE
  */
 /* ARGSUSED */
 int
 sys_swapon(struct thread *td, struct swapon_args *uap)
 {
 	struct vattr attr;
 	struct vnode *vp;
 	struct nameidata nd;
 	int error;
 
 	error = priv_check(td, PRIV_SWAPON);
 	if (error)
 		return (error);
 
 	sx_xlock(&swdev_syscall_lock);
 
 	/*
 	 * Swap metadata may not fit in the KVM if we have physical
 	 * memory of >1GB.
 	 */
 	if (swblk_zone == NULL) {
 		error = ENOMEM;
 		goto done;
 	}
 
 	NDINIT(&nd, LOOKUP, ISOPEN | FOLLOW | AUDITVNODE1, UIO_USERSPACE,
 	    uap->name, td);
 	error = namei(&nd);
 	if (error)
 		goto done;
 
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	vp = nd.ni_vp;
 
 	if (vn_isdisk_error(vp, &error)) {
 		error = swapongeom(vp);
 	} else if (vp->v_type == VREG &&
 	    (vp->v_mount->mnt_vfc->vfc_flags & VFCF_NETWORK) != 0 &&
 	    (error = VOP_GETATTR(vp, &attr, td->td_ucred)) == 0) {
 		/*
 		 * Allow direct swapping to NFS regular files in the same
 		 * way that nfs_mountroot() sets up diskless swapping.
 		 */
 		error = swaponvp(td, vp, attr.va_size / DEV_BSIZE);
 	}
 
 	if (error)
 		vrele(vp);
 done:
 	sx_xunlock(&swdev_syscall_lock);
 	return (error);
 }
 
 /*
  * Check that the total amount of swap currently configured does not
  * exceed half the theoretical maximum.  If it does, print a warning
  * message.
  */
 static void
 swapon_check_swzone(void)
 {
 
 	/* recommend using no more than half that amount */
 	if (swap_total > swap_maxpages / 2) {
 		printf("warning: total configured swap (%lu pages) "
 		    "exceeds maximum recommended amount (%lu pages).\n",
 		    swap_total, swap_maxpages / 2);
 		printf("warning: increase kern.maxswzone "
 		    "or reduce amount of swap.\n");
 	}
 }
 
 static void
 swaponsomething(struct vnode *vp, void *id, u_long nblks,
     sw_strategy_t *strategy, sw_close_t *close, dev_t dev, int flags)
 {
 	struct swdevt *sp, *tsp;
 	daddr_t dvbase;
 
 	/*
 	 * nblks is in DEV_BSIZE'd chunks, convert to PAGE_SIZE'd chunks.
 	 * First chop nblks off to page-align it, then convert.
 	 *
 	 * sw->sw_nblks is in page-sized chunks now too.
 	 */
 	nblks &= ~(ctodb(1) - 1);
 	nblks = dbtoc(nblks);
 
 	sp = malloc(sizeof *sp, M_VMPGDATA, M_WAITOK | M_ZERO);
 	sp->sw_blist = blist_create(nblks, M_WAITOK);
 	sp->sw_vp = vp;
 	sp->sw_id = id;
 	sp->sw_dev = dev;
 	sp->sw_nblks = nblks;
 	sp->sw_used = 0;
 	sp->sw_strategy = strategy;
 	sp->sw_close = close;
 	sp->sw_flags = flags;
 
 	/*
 	 * Do not free the first blocks in order to avoid overwriting
 	 * any bsd label at the front of the partition
 	 */
 	blist_free(sp->sw_blist, howmany(BBSIZE, PAGE_SIZE),
 	    nblks - howmany(BBSIZE, PAGE_SIZE));
 
 	dvbase = 0;
 	mtx_lock(&sw_dev_mtx);
 	TAILQ_FOREACH(tsp, &swtailq, sw_list) {
 		if (tsp->sw_end >= dvbase) {
 			/*
 			 * We put one uncovered page between the devices
 			 * in order to definitively prevent any cross-device
 			 * I/O requests
 			 */
 			dvbase = tsp->sw_end + 1;
 		}
 	}
 	sp->sw_first = dvbase;
 	sp->sw_end = dvbase + nblks;
 	TAILQ_INSERT_TAIL(&swtailq, sp, sw_list);
 	nswapdev++;
 	swap_pager_avail += nblks - howmany(BBSIZE, PAGE_SIZE);
 	swap_total += nblks;
 	swapon_check_swzone();
 	swp_sizecheck();
 	mtx_unlock(&sw_dev_mtx);
 	EVENTHANDLER_INVOKE(swapon, sp);
 }
 
 /*
  * SYSCALL: swapoff(devname)
  *
  * Disable swapping on the given device.
  *
  * XXX: Badly designed system call: it should use a device index
  * rather than filename as specification.  We keep sw_vp around
  * only to make this work.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct swapoff_args {
 	char *name;
 };
 #endif
 
 /*
  * MPSAFE
  */
 /* ARGSUSED */
 int
 sys_swapoff(struct thread *td, struct swapoff_args *uap)
 {
 	struct vnode *vp;
 	struct nameidata nd;
 	struct swdevt *sp;
 	int error;
 
 	error = priv_check(td, PRIV_SWAPOFF);
 	if (error)
 		return (error);
 
 	sx_xlock(&swdev_syscall_lock);
 
 	NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNODE1, UIO_USERSPACE, uap->name,
 	    td);
 	error = namei(&nd);
 	if (error)
 		goto done;
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	vp = nd.ni_vp;
 
 	mtx_lock(&sw_dev_mtx);
 	TAILQ_FOREACH(sp, &swtailq, sw_list) {
 		if (sp->sw_vp == vp)
 			break;
 	}
 	mtx_unlock(&sw_dev_mtx);
 	if (sp == NULL) {
 		error = EINVAL;
 		goto done;
 	}
 	error = swapoff_one(sp, td->td_ucred);
 done:
 	sx_xunlock(&swdev_syscall_lock);
 	return (error);
 }
 
 static int
 swapoff_one(struct swdevt *sp, struct ucred *cred)
 {
 	u_long nblks;
 #ifdef MAC
 	int error;
 #endif
 
 	sx_assert(&swdev_syscall_lock, SA_XLOCKED);
 #ifdef MAC
 	(void) vn_lock(sp->sw_vp, LK_EXCLUSIVE | LK_RETRY);
 	error = mac_system_check_swapoff(cred, sp->sw_vp);
 	(void) VOP_UNLOCK(sp->sw_vp);
 	if (error != 0)
 		return (error);
 #endif
 	nblks = sp->sw_nblks;
 
 	/*
 	 * We can turn off this swap device safely only if the
 	 * available virtual memory in the system will fit the amount
 	 * of data we will have to page back in, plus an epsilon so
 	 * the system doesn't become critically low on swap space.
 	 */
 	if (vm_free_count() + swap_pager_avail < nblks + nswap_lowat)
 		return (ENOMEM);
 
 	/*
 	 * Prevent further allocations on this device.
 	 */
 	mtx_lock(&sw_dev_mtx);
 	sp->sw_flags |= SW_CLOSING;
 	swap_pager_avail -= blist_fill(sp->sw_blist, 0, nblks);
 	swap_total -= nblks;
 	mtx_unlock(&sw_dev_mtx);
 
 	/*
 	 * Page in the contents of the device and close it.
 	 */
 	swap_pager_swapoff(sp);
 
 	sp->sw_close(curthread, sp);
 	mtx_lock(&sw_dev_mtx);
 	sp->sw_id = NULL;
 	TAILQ_REMOVE(&swtailq, sp, sw_list);
 	nswapdev--;
 	if (nswapdev == 0) {
 		swap_pager_full = 2;
 		swap_pager_almost_full = 1;
 	}
 	if (swdevhd == sp)
 		swdevhd = NULL;
 	mtx_unlock(&sw_dev_mtx);
 	blist_destroy(sp->sw_blist);
 	free(sp, M_VMPGDATA);
 	return (0);
 }
 
 void
 swapoff_all(void)
 {
 	struct swdevt *sp, *spt;
 	const char *devname;
 	int error;
 
 	sx_xlock(&swdev_syscall_lock);
 
 	mtx_lock(&sw_dev_mtx);
 	TAILQ_FOREACH_SAFE(sp, &swtailq, sw_list, spt) {
 		mtx_unlock(&sw_dev_mtx);
 		if (vn_isdisk(sp->sw_vp))
 			devname = devtoname(sp->sw_vp->v_rdev);
 		else
 			devname = "[file]";
 		error = swapoff_one(sp, thread0.td_ucred);
 		if (error != 0) {
 			printf("Cannot remove swap device %s (error=%d), "
 			    "skipping.\n", devname, error);
 		} else if (bootverbose) {
 			printf("Swap device %s removed.\n", devname);
 		}
 		mtx_lock(&sw_dev_mtx);
 	}
 	mtx_unlock(&sw_dev_mtx);
 
 	sx_xunlock(&swdev_syscall_lock);
 }
 
 void
 swap_pager_status(int *total, int *used)
 {
 
 	*total = swap_total;
 	*used = swap_total - swap_pager_avail -
 	    nswapdev * howmany(BBSIZE, PAGE_SIZE);
 }
 
 int
 swap_dev_info(int name, struct xswdev *xs, char *devname, size_t len)
 {
 	struct swdevt *sp;
 	const char *tmp_devname;
 	int error, n;
 
 	n = 0;
 	error = ENOENT;
 	mtx_lock(&sw_dev_mtx);
 	TAILQ_FOREACH(sp, &swtailq, sw_list) {
 		if (n != name) {
 			n++;
 			continue;
 		}
 		xs->xsw_version = XSWDEV_VERSION;
 		xs->xsw_dev = sp->sw_dev;
 		xs->xsw_flags = sp->sw_flags;
 		xs->xsw_nblks = sp->sw_nblks;
 		xs->xsw_used = sp->sw_used;
 		if (devname != NULL) {
 			if (vn_isdisk(sp->sw_vp))
 				tmp_devname = devtoname(sp->sw_vp->v_rdev);
 			else
 				tmp_devname = "[file]";
 			strncpy(devname, tmp_devname, len);
 		}
 		error = 0;
 		break;
 	}
 	mtx_unlock(&sw_dev_mtx);
 	return (error);
 }
 
 #if defined(COMPAT_FREEBSD11)
 #define XSWDEV_VERSION_11	1
 struct xswdev11 {
 	u_int	xsw_version;
 	uint32_t xsw_dev;
 	int	xsw_flags;
 	int	xsw_nblks;
 	int     xsw_used;
 };
 #endif
 
 #if defined(__amd64__) && defined(COMPAT_FREEBSD32)
 struct xswdev32 {
 	u_int	xsw_version;
 	u_int	xsw_dev1, xsw_dev2;
 	int	xsw_flags;
 	int	xsw_nblks;
 	int     xsw_used;
 };
 #endif
 
 static int
 sysctl_vm_swap_info(SYSCTL_HANDLER_ARGS)
 {
 	struct xswdev xs;
 #if defined(__amd64__) && defined(COMPAT_FREEBSD32)
 	struct xswdev32 xs32;
 #endif
 #if defined(COMPAT_FREEBSD11)
 	struct xswdev11 xs11;
 #endif
 	int error;
 
 	if (arg2 != 1)			/* name length */
 		return (EINVAL);
 	error = swap_dev_info(*(int *)arg1, &xs, NULL, 0);
 	if (error != 0)
 		return (error);
 #if defined(__amd64__) && defined(COMPAT_FREEBSD32)
 	if (req->oldlen == sizeof(xs32)) {
 		xs32.xsw_version = XSWDEV_VERSION;
 		xs32.xsw_dev1 = xs.xsw_dev;
 		xs32.xsw_dev2 = xs.xsw_dev >> 32;
 		xs32.xsw_flags = xs.xsw_flags;
 		xs32.xsw_nblks = xs.xsw_nblks;
 		xs32.xsw_used = xs.xsw_used;
 		error = SYSCTL_OUT(req, &xs32, sizeof(xs32));
 		return (error);
 	}
 #endif
 #if defined(COMPAT_FREEBSD11)
 	if (req->oldlen == sizeof(xs11)) {
 		xs11.xsw_version = XSWDEV_VERSION_11;
 		xs11.xsw_dev = xs.xsw_dev; /* truncation */
 		xs11.xsw_flags = xs.xsw_flags;
 		xs11.xsw_nblks = xs.xsw_nblks;
 		xs11.xsw_used = xs.xsw_used;
 		error = SYSCTL_OUT(req, &xs11, sizeof(xs11));
 		return (error);
 	}
 #endif
 	error = SYSCTL_OUT(req, &xs, sizeof(xs));
 	return (error);
 }
 
 SYSCTL_INT(_vm, OID_AUTO, nswapdev, CTLFLAG_RD, &nswapdev, 0,
     "Number of swap devices");
 SYSCTL_NODE(_vm, OID_AUTO, swap_info, CTLFLAG_RD | CTLFLAG_MPSAFE,
     sysctl_vm_swap_info,
     "Swap statistics by device");
 
 /*
  * Count the approximate swap usage in pages for a vmspace.  The
  * shadowed or not yet copied on write swap blocks are not accounted.
  * The map must be locked.
  */
 long
 vmspace_swap_count(struct vmspace *vmspace)
 {
 	vm_map_t map;
 	vm_map_entry_t cur;
 	vm_object_t object;
 	struct swblk *sb;
 	vm_pindex_t e, pi;
 	long count;
 	int i;
 
 	map = &vmspace->vm_map;
 	count = 0;
 
 	VM_MAP_ENTRY_FOREACH(cur, map) {
 		if ((cur->eflags & MAP_ENTRY_IS_SUB_MAP) != 0)
 			continue;
 		object = cur->object.vm_object;
 		if (object == NULL || object->type != OBJT_SWAP)
 			continue;
 		VM_OBJECT_RLOCK(object);
 		if (object->type != OBJT_SWAP)
 			goto unlock;
 		pi = OFF_TO_IDX(cur->offset);
 		e = pi + OFF_TO_IDX(cur->end - cur->start);
 		for (;; pi = sb->p + SWAP_META_PAGES) {
 			sb = SWAP_PCTRIE_LOOKUP_GE(
 			    &object->un_pager.swp.swp_blks, pi);
 			if (sb == NULL || sb->p >= e)
 				break;
 			for (i = 0; i < SWAP_META_PAGES; i++) {
 				if (sb->p + i < e &&
 				    sb->d[i] != SWAPBLK_NONE)
 					count++;
 			}
 		}
 unlock:
 		VM_OBJECT_RUNLOCK(object);
 	}
 	return (count);
 }
 
 /*
  * GEOM backend
  *
  * Swapping onto disk devices.
  *
  */
 
 static g_orphan_t swapgeom_orphan;
 
 static struct g_class g_swap_class = {
 	.name = "SWAP",
 	.version = G_VERSION,
 	.orphan = swapgeom_orphan,
 };
 
 DECLARE_GEOM_CLASS(g_swap_class, g_class);
 
 static void
 swapgeom_close_ev(void *arg, int flags)
 {
 	struct g_consumer *cp;
 
 	cp = arg;
 	g_access(cp, -1, -1, 0);
 	g_detach(cp);
 	g_destroy_consumer(cp);
 }
 
 /*
  * Add a reference to the g_consumer for an inflight transaction.
  */
 static void
 swapgeom_acquire(struct g_consumer *cp)
 {
 
 	mtx_assert(&sw_dev_mtx, MA_OWNED);
 	cp->index++;
 }
 
 /*
  * Remove a reference from the g_consumer.  Post a close event if all
  * references go away, since the function might be called from the
  * biodone context.
  */
 static void
 swapgeom_release(struct g_consumer *cp, struct swdevt *sp)
 {
 
 	mtx_assert(&sw_dev_mtx, MA_OWNED);
 	cp->index--;
 	if (cp->index == 0) {
 		if (g_post_event(swapgeom_close_ev, cp, M_NOWAIT, NULL) == 0)
 			sp->sw_id = NULL;
 	}
 }
 
 static void
 swapgeom_done(struct bio *bp2)
 {
 	struct swdevt *sp;
 	struct buf *bp;
 	struct g_consumer *cp;
 
 	bp = bp2->bio_caller2;
 	cp = bp2->bio_from;
 	bp->b_ioflags = bp2->bio_flags;
 	if (bp2->bio_error)
 		bp->b_ioflags |= BIO_ERROR;
 	bp->b_resid = bp->b_bcount - bp2->bio_completed;
 	bp->b_error = bp2->bio_error;
 	bp->b_caller1 = NULL;
 	bufdone(bp);
 	sp = bp2->bio_caller1;
 	mtx_lock(&sw_dev_mtx);
 	swapgeom_release(cp, sp);
 	mtx_unlock(&sw_dev_mtx);
 	g_destroy_bio(bp2);
 }
 
 static void
 swapgeom_strategy(struct buf *bp, struct swdevt *sp)
 {
 	struct bio *bio;
 	struct g_consumer *cp;
 
 	mtx_lock(&sw_dev_mtx);
 	cp = sp->sw_id;
 	if (cp == NULL) {
 		mtx_unlock(&sw_dev_mtx);
 		bp->b_error = ENXIO;
 		bp->b_ioflags |= BIO_ERROR;
 		bufdone(bp);
 		return;
 	}
 	swapgeom_acquire(cp);
 	mtx_unlock(&sw_dev_mtx);
 	if (bp->b_iocmd == BIO_WRITE)
 		bio = g_new_bio();
 	else
 		bio = g_alloc_bio();
 	if (bio == NULL) {
 		mtx_lock(&sw_dev_mtx);
 		swapgeom_release(cp, sp);
 		mtx_unlock(&sw_dev_mtx);
 		bp->b_error = ENOMEM;
 		bp->b_ioflags |= BIO_ERROR;
 		printf("swap_pager: cannot allocate bio\n");
 		bufdone(bp);
 		return;
 	}
 
 	bp->b_caller1 = bio;
 	bio->bio_caller1 = sp;
 	bio->bio_caller2 = bp;
 	bio->bio_cmd = bp->b_iocmd;
 	bio->bio_offset = (bp->b_blkno - sp->sw_first) * PAGE_SIZE;
 	bio->bio_length = bp->b_bcount;
 	bio->bio_done = swapgeom_done;
 	if (!buf_mapped(bp)) {
 		bio->bio_ma = bp->b_pages;
 		bio->bio_data = unmapped_buf;
 		bio->bio_ma_offset = (vm_offset_t)bp->b_offset & PAGE_MASK;
 		bio->bio_ma_n = bp->b_npages;
 		bio->bio_flags |= BIO_UNMAPPED;
 	} else {
 		bio->bio_data = bp->b_data;
 		bio->bio_ma = NULL;
 	}
 	g_io_request(bio, cp);
 	return;
 }
 
 static void
 swapgeom_orphan(struct g_consumer *cp)
 {
 	struct swdevt *sp;
 	int destroy;
 
 	mtx_lock(&sw_dev_mtx);
 	TAILQ_FOREACH(sp, &swtailq, sw_list) {
 		if (sp->sw_id == cp) {
 			sp->sw_flags |= SW_CLOSING;
 			break;
 		}
 	}
 	/*
 	 * Drop reference we were created with. Do directly since we're in a
 	 * special context where we don't have to queue the call to
 	 * swapgeom_close_ev().
 	 */
 	cp->index--;
 	destroy = ((sp != NULL) && (cp->index == 0));
 	if (destroy)
 		sp->sw_id = NULL;
 	mtx_unlock(&sw_dev_mtx);
 	if (destroy)
 		swapgeom_close_ev(cp, 0);
 }
 
 static void
 swapgeom_close(struct thread *td, struct swdevt *sw)
 {
 	struct g_consumer *cp;
 
 	mtx_lock(&sw_dev_mtx);
 	cp = sw->sw_id;
 	sw->sw_id = NULL;
 	mtx_unlock(&sw_dev_mtx);
 
 	/*
 	 * swapgeom_close() may be called from the biodone context,
 	 * where we cannot perform topology changes.  Delegate the
 	 * work to the events thread.
 	 */
 	if (cp != NULL)
 		g_waitfor_event(swapgeom_close_ev, cp, M_WAITOK, NULL);
 }
 
 static int
 swapongeom_locked(struct cdev *dev, struct vnode *vp)
 {
 	struct g_provider *pp;
 	struct g_consumer *cp;
 	static struct g_geom *gp;
 	struct swdevt *sp;
 	u_long nblks;
 	int error;
 
 	pp = g_dev_getprovider(dev);
 	if (pp == NULL)
 		return (ENODEV);
 	mtx_lock(&sw_dev_mtx);
 	TAILQ_FOREACH(sp, &swtailq, sw_list) {
 		cp = sp->sw_id;
 		if (cp != NULL && cp->provider == pp) {
 			mtx_unlock(&sw_dev_mtx);
 			return (EBUSY);
 		}
 	}
 	mtx_unlock(&sw_dev_mtx);
 	if (gp == NULL)
 		gp = g_new_geomf(&g_swap_class, "swap");
 	cp = g_new_consumer(gp);
 	cp->index = 1;	/* Number of active I/Os, plus one for being active. */
 	cp->flags |=  G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE;
 	g_attach(cp, pp);
 	/*
 	 * XXX: Every time you think you can improve the margin for
 	 * footshooting, somebody depends on the ability to do so:
 	 * savecore(8) wants to write to our swapdev so we cannot
 	 * set an exclusive count :-(
 	 */
 	error = g_access(cp, 1, 1, 0);
 	if (error != 0) {
 		g_detach(cp);
 		g_destroy_consumer(cp);
 		return (error);
 	}
 	nblks = pp->mediasize / DEV_BSIZE;
 	swaponsomething(vp, cp, nblks, swapgeom_strategy,
 	    swapgeom_close, dev2udev(dev),
 	    (pp->flags & G_PF_ACCEPT_UNMAPPED) != 0 ? SW_UNMAPPED : 0);
 	return (0);
 }
 
 static int
 swapongeom(struct vnode *vp)
 {
 	int error;
 
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 	if (vp->v_type != VCHR || VN_IS_DOOMED(vp)) {
 		error = ENOENT;
 	} else {
 		g_topology_lock();
 		error = swapongeom_locked(vp->v_rdev, vp);
 		g_topology_unlock();
 	}
 	VOP_UNLOCK(vp);
 	return (error);
 }
 
 /*
  * VNODE backend
  *
  * This is used mainly for network filesystem (read: probably only tested
  * with NFS) swapfiles.
  *
  */
 
 static void
 swapdev_strategy(struct buf *bp, struct swdevt *sp)
 {
 	struct vnode *vp2;
 
 	bp->b_blkno = ctodb(bp->b_blkno - sp->sw_first);
 
 	vp2 = sp->sw_id;
 	vhold(vp2);
 	if (bp->b_iocmd == BIO_WRITE) {
 		if (bp->b_bufobj)
 			bufobj_wdrop(bp->b_bufobj);
 		bufobj_wref(&vp2->v_bufobj);
 	}
 	if (bp->b_bufobj != &vp2->v_bufobj)
 		bp->b_bufobj = &vp2->v_bufobj;
 	bp->b_vp = vp2;
 	bp->b_iooffset = dbtob(bp->b_blkno);
 	bstrategy(bp);
 	return;
 }
 
 static void
 swapdev_close(struct thread *td, struct swdevt *sp)
 {
 
 	VOP_CLOSE(sp->sw_vp, FREAD | FWRITE, td->td_ucred, td);
 	vrele(sp->sw_vp);
 }
 
 static int
 swaponvp(struct thread *td, struct vnode *vp, u_long nblks)
 {
 	struct swdevt *sp;
 	int error;
 
 	if (nblks == 0)
 		return (ENXIO);
 	mtx_lock(&sw_dev_mtx);
 	TAILQ_FOREACH(sp, &swtailq, sw_list) {
 		if (sp->sw_id == vp) {
 			mtx_unlock(&sw_dev_mtx);
 			return (EBUSY);
 		}
 	}
 	mtx_unlock(&sw_dev_mtx);
 
 	(void) vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 #ifdef MAC
 	error = mac_system_check_swapon(td->td_ucred, vp);
 	if (error == 0)
 #endif
 		error = VOP_OPEN(vp, FREAD | FWRITE, td->td_ucred, td, NULL);
 	(void) VOP_UNLOCK(vp);
 	if (error)
 		return (error);
 
 	swaponsomething(vp, vp, nblks, swapdev_strategy, swapdev_close,
 	    NODEV, 0);
 	return (0);
 }
 
 static int
 sysctl_swap_async_max(SYSCTL_HANDLER_ARGS)
 {
 	int error, new, n;
 
 	new = nsw_wcount_async_max;
 	error = sysctl_handle_int(oidp, &new, 0, req);
 	if (error != 0 || req->newptr == NULL)
 		return (error);
 
 	if (new > nswbuf / 2 || new < 1)
 		return (EINVAL);
 
 	mtx_lock(&swbuf_mtx);
 	while (nsw_wcount_async_max != new) {
 		/*
 		 * Adjust difference.  If the current async count is too low,
 		 * we will need to sqeeze our update slowly in.  Sleep with a
 		 * higher priority than getpbuf() to finish faster.
 		 */
 		n = new - nsw_wcount_async_max;
 		if (nsw_wcount_async + n >= 0) {
 			nsw_wcount_async += n;
 			nsw_wcount_async_max += n;
 			wakeup(&nsw_wcount_async);
 		} else {
 			nsw_wcount_async_max -= nsw_wcount_async;
 			nsw_wcount_async = 0;
 			msleep(&nsw_wcount_async, &swbuf_mtx, PSWP,
 			    "swpsysctl", 0);
 		}
 	}
 	mtx_unlock(&swbuf_mtx);
 
 	return (0);
 }
 
 static void
 swap_pager_update_writecount(vm_object_t object, vm_offset_t start,
     vm_offset_t end)
 {
 
 	VM_OBJECT_WLOCK(object);
 	KASSERT((object->flags & OBJ_ANON) == 0,
 	    ("Splittable object with writecount"));
 	object->un_pager.swp.writemappings += (vm_ooffset_t)end - start;
 	VM_OBJECT_WUNLOCK(object);
 }
 
 static void
 swap_pager_release_writecount(vm_object_t object, vm_offset_t start,
     vm_offset_t end)
 {
 
 	VM_OBJECT_WLOCK(object);
 	KASSERT((object->flags & OBJ_ANON) == 0,
 	    ("Splittable object with writecount"));
 	object->un_pager.swp.writemappings -= (vm_ooffset_t)end - start;
 	VM_OBJECT_WUNLOCK(object);
 }
diff --git a/sys/vm/swap_pager.h b/sys/vm/swap_pager.h
index 80dca93fbf85..59ca2cc2cfdb 100644
--- a/sys/vm/swap_pager.h
+++ b/sys/vm/swap_pager.h
@@ -1,87 +1,88 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1990 University of Utah.
  * Copyright (c) 1991 The Regents of the University of California.
  * All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * the Systems Programming Group of the University of Utah Computer
  * Science Department.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from: @(#)swap_pager.h	7.1 (Berkeley) 12/5/90
  * $FreeBSD$
  */
 
 #ifndef	_VM_SWAP_PAGER_H_
 #define	_VM_SWAP_PAGER_H_
 
 #include <sys/_types.h>
 
 struct buf;
 struct swdevt;
 typedef void sw_strategy_t(struct buf *, struct swdevt *);
 typedef void sw_close_t(struct thread *, struct swdevt *);
 
 /*
  * Swap device table
  */
 struct swdevt {
 	int	sw_flags;
 	int	sw_nblks;
 	int     sw_used;
 	dev_t	sw_dev;
 	struct vnode *sw_vp;
 	void	*sw_id;
 	__daddr_t sw_first;
 	__daddr_t sw_end;
 	struct blist *sw_blist;
 	TAILQ_ENTRY(swdevt)	sw_list;
 	sw_strategy_t		*sw_strategy;
 	sw_close_t		*sw_close;
 };
 
 #define	SW_UNMAPPED	0x01
 #define	SW_CLOSING	0x04
 
 #ifdef _KERNEL
 
 extern int swap_pager_avail;
 
 struct xswdev;
 int swap_dev_info(int name, struct xswdev *xs, char *devname, size_t len);
 void swap_pager_copy(vm_object_t, vm_object_t, vm_pindex_t, int);
 vm_pindex_t swap_pager_find_least(vm_object_t object, vm_pindex_t pindex);
 void swap_pager_freespace(vm_object_t, vm_pindex_t, vm_size_t);
 void swap_pager_swap_init(void);
 int swap_pager_nswapdev(void);
 int swap_pager_reserve(vm_object_t, vm_pindex_t, vm_size_t);
 void swap_pager_status(int *total, int *used);
+u_long swap_pager_swapped_pages(vm_object_t object);
 void swapoff_all(void);
 
 #endif				/* _KERNEL */
 #endif				/* _VM_SWAP_PAGER_H_ */
diff --git a/sys/vm/vm_object.c b/sys/vm/vm_object.c
index 35ea9cc45fae..1f5194464b69 100644
--- a/sys/vm/vm_object.c
+++ b/sys/vm/vm_object.c
@@ -1,2860 +1,2871 @@
 /*-
  * SPDX-License-Identifier: (BSD-3-Clause AND MIT-CMU)
  *
  * Copyright (c) 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * The Mach Operating System project at Carnegie-Mellon University.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from: @(#)vm_object.c	8.5 (Berkeley) 3/22/94
  *
  *
  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
  * All rights reserved.
  *
  * Authors: Avadis Tevanian, Jr., Michael Wayne Young
  *
  * Permission to use, copy, modify and distribute this software and
  * its documentation is hereby granted, provided that both the copyright
  * notice and this permission notice appear in all copies of the
  * software, derivative works or modified versions, and any portions
  * thereof, and that both notices appear in supporting documentation.
  *
  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  *
  * Carnegie Mellon requests users of this software to return to
  *
  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  *  School of Computer Science
  *  Carnegie Mellon University
  *  Pittsburgh PA 15213-3890
  *
  * any improvements or extensions that they make and grant Carnegie the
  * rights to redistribute these changes.
  */
 
 /*
  *	Virtual memory object module.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_vm.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/blockcount.h>
 #include <sys/cpuset.h>
+#include <sys/limits.h>
 #include <sys/lock.h>
 #include <sys/mman.h>
 #include <sys/mount.h>
 #include <sys/kernel.h>
 #include <sys/pctrie.h>
 #include <sys/sysctl.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>		/* for curproc, pageproc */
 #include <sys/refcount.h>
 #include <sys/socket.h>
 #include <sys/resourcevar.h>
 #include <sys/refcount.h>
 #include <sys/rwlock.h>
 #include <sys/user.h>
 #include <sys/vnode.h>
 #include <sys/vmmeter.h>
 #include <sys/sx.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
 #include <vm/vm_pageout.h>
 #include <vm/vm_pager.h>
 #include <vm/vm_phys.h>
 #include <vm/vm_pagequeue.h>
 #include <vm/swap_pager.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_radix.h>
 #include <vm/vm_reserv.h>
 #include <vm/uma.h>
 
 static int old_msync;
 SYSCTL_INT(_vm, OID_AUTO, old_msync, CTLFLAG_RW, &old_msync, 0,
     "Use old (insecure) msync behavior");
 
 static int	vm_object_page_collect_flush(vm_object_t object, vm_page_t p,
 		    int pagerflags, int flags, boolean_t *allclean,
 		    boolean_t *eio);
 static boolean_t vm_object_page_remove_write(vm_page_t p, int flags,
 		    boolean_t *allclean);
 static void	vm_object_backing_remove(vm_object_t object);
 
 /*
  *	Virtual memory objects maintain the actual data
  *	associated with allocated virtual memory.  A given
  *	page of memory exists within exactly one object.
  *
  *	An object is only deallocated when all "references"
  *	are given up.  Only one "reference" to a given
  *	region of an object should be writeable.
  *
  *	Associated with each object is a list of all resident
  *	memory pages belonging to that object; this list is
  *	maintained by the "vm_page" module, and locked by the object's
  *	lock.
  *
  *	Each object also records a "pager" routine which is
  *	used to retrieve (and store) pages to the proper backing
  *	storage.  In addition, objects may be backed by other
  *	objects from which they were virtual-copied.
  *
  *	The only items within the object structure which are
  *	modified after time of creation are:
  *		reference count		locked by object's lock
  *		pager routine		locked by object's lock
  *
  */
 
 struct object_q vm_object_list;
 struct mtx vm_object_list_mtx;	/* lock for object list and count */
 
 struct vm_object kernel_object_store;
 
 static SYSCTL_NODE(_vm_stats, OID_AUTO, object, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
     "VM object stats");
 
 static COUNTER_U64_DEFINE_EARLY(object_collapses);
 SYSCTL_COUNTER_U64(_vm_stats_object, OID_AUTO, collapses, CTLFLAG_RD,
     &object_collapses,
     "VM object collapses");
 
 static COUNTER_U64_DEFINE_EARLY(object_bypasses);
 SYSCTL_COUNTER_U64(_vm_stats_object, OID_AUTO, bypasses, CTLFLAG_RD,
     &object_bypasses,
     "VM object bypasses");
 
 static COUNTER_U64_DEFINE_EARLY(object_collapse_waits);
 SYSCTL_COUNTER_U64(_vm_stats_object, OID_AUTO, collapse_waits, CTLFLAG_RD,
     &object_collapse_waits,
     "Number of sleeps for collapse");
 
 static uma_zone_t obj_zone;
 
 static int vm_object_zinit(void *mem, int size, int flags);
 
 #ifdef INVARIANTS
 static void vm_object_zdtor(void *mem, int size, void *arg);
 
 static void
 vm_object_zdtor(void *mem, int size, void *arg)
 {
 	vm_object_t object;
 
 	object = (vm_object_t)mem;
 	KASSERT(object->ref_count == 0,
 	    ("object %p ref_count = %d", object, object->ref_count));
 	KASSERT(TAILQ_EMPTY(&object->memq),
 	    ("object %p has resident pages in its memq", object));
 	KASSERT(vm_radix_is_empty(&object->rtree),
 	    ("object %p has resident pages in its trie", object));
 #if VM_NRESERVLEVEL > 0
 	KASSERT(LIST_EMPTY(&object->rvq),
 	    ("object %p has reservations",
 	    object));
 #endif
 	KASSERT(!vm_object_busied(object),
 	    ("object %p busy = %d", object, blockcount_read(&object->busy)));
 	KASSERT(object->resident_page_count == 0,
 	    ("object %p resident_page_count = %d",
 	    object, object->resident_page_count));
 	KASSERT(object->shadow_count == 0,
 	    ("object %p shadow_count = %d",
 	    object, object->shadow_count));
 	KASSERT(object->type == OBJT_DEAD,
 	    ("object %p has non-dead type %d",
 	    object, object->type));
 }
 #endif
 
 static int
 vm_object_zinit(void *mem, int size, int flags)
 {
 	vm_object_t object;
 
 	object = (vm_object_t)mem;
 	rw_init_flags(&object->lock, "vm object", RW_DUPOK | RW_NEW);
 
 	/* These are true for any object that has been freed */
 	object->type = OBJT_DEAD;
 	vm_radix_init(&object->rtree);
 	refcount_init(&object->ref_count, 0);
 	blockcount_init(&object->paging_in_progress);
 	blockcount_init(&object->busy);
 	object->resident_page_count = 0;
 	object->shadow_count = 0;
 	object->flags = OBJ_DEAD;
 
 	mtx_lock(&vm_object_list_mtx);
 	TAILQ_INSERT_TAIL(&vm_object_list, object, object_list);
 	mtx_unlock(&vm_object_list_mtx);
 	return (0);
 }
 
 static void
 _vm_object_allocate(objtype_t type, vm_pindex_t size, u_short flags,
     vm_object_t object, void *handle)
 {
 
 	TAILQ_INIT(&object->memq);
 	LIST_INIT(&object->shadow_head);
 
 	object->type = type;
 	if (type == OBJT_SWAP)
 		pctrie_init(&object->un_pager.swp.swp_blks);
 
 	/*
 	 * Ensure that swap_pager_swapoff() iteration over object_list
 	 * sees up to date type and pctrie head if it observed
 	 * non-dead object.
 	 */
 	atomic_thread_fence_rel();
 
 	object->pg_color = 0;
 	object->flags = flags;
 	object->size = size;
 	object->domain.dr_policy = NULL;
 	object->generation = 1;
 	object->cleangeneration = 1;
 	refcount_init(&object->ref_count, 1);
 	object->memattr = VM_MEMATTR_DEFAULT;
 	object->cred = NULL;
 	object->charge = 0;
 	object->handle = handle;
 	object->backing_object = NULL;
 	object->backing_object_offset = (vm_ooffset_t) 0;
 #if VM_NRESERVLEVEL > 0
 	LIST_INIT(&object->rvq);
 #endif
 	umtx_shm_object_init(object);
 }
 
 /*
  *	vm_object_init:
  *
  *	Initialize the VM objects module.
  */
 void
 vm_object_init(void)
 {
 	TAILQ_INIT(&vm_object_list);
 	mtx_init(&vm_object_list_mtx, "vm object_list", NULL, MTX_DEF);
 
 	rw_init(&kernel_object->lock, "kernel vm object");
 	_vm_object_allocate(OBJT_PHYS, atop(VM_MAX_KERNEL_ADDRESS -
 	    VM_MIN_KERNEL_ADDRESS), OBJ_UNMANAGED, kernel_object, NULL);
 #if VM_NRESERVLEVEL > 0
 	kernel_object->flags |= OBJ_COLORED;
 	kernel_object->pg_color = (u_short)atop(VM_MIN_KERNEL_ADDRESS);
 #endif
 	kernel_object->un_pager.phys.ops = &default_phys_pg_ops;
 
 	/*
 	 * The lock portion of struct vm_object must be type stable due
 	 * to vm_pageout_fallback_object_lock locking a vm object
 	 * without holding any references to it.
 	 *
 	 * paging_in_progress is valid always.  Lockless references to
 	 * the objects may acquire pip and then check OBJ_DEAD.
 	 */
 	obj_zone = uma_zcreate("VM OBJECT", sizeof (struct vm_object), NULL,
 #ifdef INVARIANTS
 	    vm_object_zdtor,
 #else
 	    NULL,
 #endif
 	    vm_object_zinit, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
 
 	vm_radix_zinit();
 }
 
 void
 vm_object_clear_flag(vm_object_t object, u_short bits)
 {
 
 	VM_OBJECT_ASSERT_WLOCKED(object);
 	object->flags &= ~bits;
 }
 
 /*
  *	Sets the default memory attribute for the specified object.  Pages
  *	that are allocated to this object are by default assigned this memory
  *	attribute.
  *
  *	Presently, this function must be called before any pages are allocated
  *	to the object.  In the future, this requirement may be relaxed for
  *	"default" and "swap" objects.
  */
 int
 vm_object_set_memattr(vm_object_t object, vm_memattr_t memattr)
 {
 
 	VM_OBJECT_ASSERT_WLOCKED(object);
 	switch (object->type) {
 	case OBJT_DEFAULT:
 	case OBJT_DEVICE:
 	case OBJT_MGTDEVICE:
 	case OBJT_PHYS:
 	case OBJT_SG:
 	case OBJT_SWAP:
 	case OBJT_VNODE:
 		if (!TAILQ_EMPTY(&object->memq))
 			return (KERN_FAILURE);
 		break;
 	case OBJT_DEAD:
 		return (KERN_INVALID_ARGUMENT);
 	default:
 		panic("vm_object_set_memattr: object %p is of undefined type",
 		    object);
 	}
 	object->memattr = memattr;
 	return (KERN_SUCCESS);
 }
 
 void
 vm_object_pip_add(vm_object_t object, short i)
 {
 
 	if (i > 0)
 		blockcount_acquire(&object->paging_in_progress, i);
 }
 
 void
 vm_object_pip_wakeup(vm_object_t object)
 {
 
 	vm_object_pip_wakeupn(object, 1);
 }
 
 void
 vm_object_pip_wakeupn(vm_object_t object, short i)
 {
 
 	if (i > 0)
 		blockcount_release(&object->paging_in_progress, i);
 }
 
 /*
  * Atomically drop the object lock and wait for pip to drain.  This protects
  * from sleep/wakeup races due to identity changes.  The lock is not re-acquired
  * on return.
  */
 static void
 vm_object_pip_sleep(vm_object_t object, const char *waitid)
 {
 
 	(void)blockcount_sleep(&object->paging_in_progress, &object->lock,
 	    waitid, PVM | PDROP);
 }
 
 void
 vm_object_pip_wait(vm_object_t object, const char *waitid)
 {
 
 	VM_OBJECT_ASSERT_WLOCKED(object);
 
 	blockcount_wait(&object->paging_in_progress, &object->lock, waitid,
 	    PVM);
 }
 
 void
 vm_object_pip_wait_unlocked(vm_object_t object, const char *waitid)
 {
 
 	VM_OBJECT_ASSERT_UNLOCKED(object);
 
 	blockcount_wait(&object->paging_in_progress, NULL, waitid, PVM);
 }
 
 /*
  *	vm_object_allocate:
  *
  *	Returns a new object with the given size.
  */
 vm_object_t
 vm_object_allocate(objtype_t type, vm_pindex_t size)
 {
 	vm_object_t object;
 	u_short flags;
 
 	switch (type) {
 	case OBJT_DEAD:
 		panic("vm_object_allocate: can't create OBJT_DEAD");
 	case OBJT_DEFAULT:
 	case OBJT_SWAP:
 		flags = OBJ_COLORED;
 		break;
 	case OBJT_DEVICE:
 	case OBJT_SG:
 		flags = OBJ_FICTITIOUS | OBJ_UNMANAGED;
 		break;
 	case OBJT_MGTDEVICE:
 		flags = OBJ_FICTITIOUS;
 		break;
 	case OBJT_PHYS:
 		flags = OBJ_UNMANAGED;
 		break;
 	case OBJT_VNODE:
 		flags = 0;
 		break;
 	default:
 		panic("vm_object_allocate: type %d is undefined", type);
 	}
 	object = (vm_object_t)uma_zalloc(obj_zone, M_WAITOK);
 	_vm_object_allocate(type, size, flags, object, NULL);
 
 	return (object);
 }
 
 /*
  *	vm_object_allocate_anon:
  *
  *	Returns a new default object of the given size and marked as
  *	anonymous memory for special split/collapse handling.  Color
  *	to be initialized by the caller.
  */
 vm_object_t
 vm_object_allocate_anon(vm_pindex_t size, vm_object_t backing_object,
     struct ucred *cred, vm_size_t charge)
 {
 	vm_object_t handle, object;
 
 	if (backing_object == NULL)
 		handle = NULL;
 	else if ((backing_object->flags & OBJ_ANON) != 0)
 		handle = backing_object->handle;
 	else
 		handle = backing_object;
 	object = uma_zalloc(obj_zone, M_WAITOK);
 	_vm_object_allocate(OBJT_DEFAULT, size, OBJ_ANON | OBJ_ONEMAPPING,
 	    object, handle);
 	object->cred = cred;
 	object->charge = cred != NULL ? charge : 0;
 	return (object);
 }
 
 static void
 vm_object_reference_vnode(vm_object_t object)
 {
 	u_int old;
 
 	/*
 	 * vnode objects need the lock for the first reference
 	 * to serialize with vnode_object_deallocate().
 	 */
 	if (!refcount_acquire_if_gt(&object->ref_count, 0)) {
 		VM_OBJECT_RLOCK(object);
 		old = refcount_acquire(&object->ref_count);
 		if (object->type == OBJT_VNODE && old == 0)
 			vref(object->handle);
 		VM_OBJECT_RUNLOCK(object);
 	}
 }
 
 /*
  *	vm_object_reference:
  *
  *	Acquires a reference to the given object.
  */
 void
 vm_object_reference(vm_object_t object)
 {
 
 	if (object == NULL)
 		return;
 
 	if (object->type == OBJT_VNODE)
 		vm_object_reference_vnode(object);
 	else
 		refcount_acquire(&object->ref_count);
 	KASSERT((object->flags & OBJ_DEAD) == 0,
 	    ("vm_object_reference: Referenced dead object."));
 }
 
 /*
  *	vm_object_reference_locked:
  *
  *	Gets another reference to the given object.
  *
  *	The object must be locked.
  */
 void
 vm_object_reference_locked(vm_object_t object)
 {
 	u_int old;
 
 	VM_OBJECT_ASSERT_LOCKED(object);
 	old = refcount_acquire(&object->ref_count);
 	if (object->type == OBJT_VNODE && old == 0)
 		vref(object->handle);
 	KASSERT((object->flags & OBJ_DEAD) == 0,
 	    ("vm_object_reference: Referenced dead object."));
 }
 
 /*
  * Handle deallocating an object of type OBJT_VNODE.
  */
 static void
 vm_object_deallocate_vnode(vm_object_t object)
 {
 	struct vnode *vp = (struct vnode *) object->handle;
 	bool last;
 
 	KASSERT(object->type == OBJT_VNODE,
 	    ("vm_object_deallocate_vnode: not a vnode object"));
 	KASSERT(vp != NULL, ("vm_object_deallocate_vnode: missing vp"));
 
 	/* Object lock to protect handle lookup. */
 	last = refcount_release(&object->ref_count);
 	VM_OBJECT_RUNLOCK(object);
 
 	if (!last)
 		return;
 
 	if (!umtx_shm_vnobj_persistent)
 		umtx_shm_object_terminated(object);
 
 	/* vrele may need the vnode lock. */
 	vrele(vp);
 }
 
 /*
  * We dropped a reference on an object and discovered that it had a
  * single remaining shadow.  This is a sibling of the reference we
  * dropped.  Attempt to collapse the sibling and backing object.
  */
 static vm_object_t
 vm_object_deallocate_anon(vm_object_t backing_object)
 {
 	vm_object_t object;
 
 	/* Fetch the final shadow.  */
 	object = LIST_FIRST(&backing_object->shadow_head);
 	KASSERT(object != NULL && backing_object->shadow_count == 1,
 	    ("vm_object_anon_deallocate: ref_count: %d, shadow_count: %d",
 	    backing_object->ref_count, backing_object->shadow_count));
 	KASSERT((object->flags & (OBJ_TMPFS_NODE | OBJ_ANON)) == OBJ_ANON,
 	    ("invalid shadow object %p", object));
 
 	if (!VM_OBJECT_TRYWLOCK(object)) {
 		/*
 		 * Prevent object from disappearing since we do not have a
 		 * reference.
 		 */
 		vm_object_pip_add(object, 1);
 		VM_OBJECT_WUNLOCK(backing_object);
 		VM_OBJECT_WLOCK(object);
 		vm_object_pip_wakeup(object);
 	} else
 		VM_OBJECT_WUNLOCK(backing_object);
 
 	/*
 	 * Check for a collapse/terminate race with the last reference holder.
 	 */
 	if ((object->flags & (OBJ_DEAD | OBJ_COLLAPSING)) != 0 ||
 	    !refcount_acquire_if_not_zero(&object->ref_count)) {
 		VM_OBJECT_WUNLOCK(object);
 		return (NULL);
 	}
 	backing_object = object->backing_object;
 	if (backing_object != NULL && (backing_object->flags & OBJ_ANON) != 0)
 		vm_object_collapse(object);
 	VM_OBJECT_WUNLOCK(object);
 
 	return (object);
 }
 
 /*
  *	vm_object_deallocate:
  *
  *	Release a reference to the specified object,
  *	gained either through a vm_object_allocate
  *	or a vm_object_reference call.  When all references
  *	are gone, storage associated with this object
  *	may be relinquished.
  *
  *	No object may be locked.
  */
 void
 vm_object_deallocate(vm_object_t object)
 {
 	vm_object_t temp;
 	bool released;
 
 	while (object != NULL) {
 		/*
 		 * If the reference count goes to 0 we start calling
 		 * vm_object_terminate() on the object chain.  A ref count
 		 * of 1 may be a special case depending on the shadow count
 		 * being 0 or 1.  These cases require a write lock on the
 		 * object.
 		 */
 		if ((object->flags & OBJ_ANON) == 0)
 			released = refcount_release_if_gt(&object->ref_count, 1);
 		else
 			released = refcount_release_if_gt(&object->ref_count, 2);
 		if (released)
 			return;
 
 		if (object->type == OBJT_VNODE) {
 			VM_OBJECT_RLOCK(object);
 			if (object->type == OBJT_VNODE) {
 				vm_object_deallocate_vnode(object);
 				return;
 			}
 			VM_OBJECT_RUNLOCK(object);
 		}
 
 		VM_OBJECT_WLOCK(object);
 		KASSERT(object->ref_count > 0,
 		    ("vm_object_deallocate: object deallocated too many times: %d",
 		    object->type));
 
 		/*
 		 * If this is not the final reference to an anonymous
 		 * object we may need to collapse the shadow chain.
 		 */
 		if (!refcount_release(&object->ref_count)) {
 			if (object->ref_count > 1 ||
 			    object->shadow_count == 0) {
 				if ((object->flags & OBJ_ANON) != 0 &&
 				    object->ref_count == 1)
 					vm_object_set_flag(object,
 					    OBJ_ONEMAPPING);
 				VM_OBJECT_WUNLOCK(object);
 				return;
 			}
 
 			/* Handle collapsing last ref on anonymous objects. */
 			object = vm_object_deallocate_anon(object);
 			continue;
 		}
 
 		/*
 		 * Handle the final reference to an object.  We restart
 		 * the loop with the backing object to avoid recursion.
 		 */
 		umtx_shm_object_terminated(object);
 		temp = object->backing_object;
 		if (temp != NULL) {
 			KASSERT((object->flags & OBJ_TMPFS_NODE) == 0,
 			    ("shadowed tmpfs v_object 2 %p", object));
 			vm_object_backing_remove(object);
 		}
 
 		KASSERT((object->flags & OBJ_DEAD) == 0,
 		    ("vm_object_deallocate: Terminating dead object."));
 		vm_object_set_flag(object, OBJ_DEAD);
 		vm_object_terminate(object);
 		object = temp;
 	}
 }
 
 /*
  *	vm_object_destroy removes the object from the global object list
  *      and frees the space for the object.
  */
 void
 vm_object_destroy(vm_object_t object)
 {
 
 	/*
 	 * Release the allocation charge.
 	 */
 	if (object->cred != NULL) {
 		swap_release_by_cred(object->charge, object->cred);
 		object->charge = 0;
 		crfree(object->cred);
 		object->cred = NULL;
 	}
 
 	/*
 	 * Free the space for the object.
 	 */
 	uma_zfree(obj_zone, object);
 }
 
 static void
 vm_object_backing_remove_locked(vm_object_t object)
 {
 	vm_object_t backing_object;
 
 	backing_object = object->backing_object;
 	VM_OBJECT_ASSERT_WLOCKED(object);
 	VM_OBJECT_ASSERT_WLOCKED(backing_object);
 
 	KASSERT((object->flags & OBJ_COLLAPSING) == 0,
 	    ("vm_object_backing_remove: Removing collapsing object."));
 
 	if ((object->flags & OBJ_SHADOWLIST) != 0) {
 		LIST_REMOVE(object, shadow_list);
 		backing_object->shadow_count--;
 		object->flags &= ~OBJ_SHADOWLIST;
 	}
 	object->backing_object = NULL;
 }
 
 static void
 vm_object_backing_remove(vm_object_t object)
 {
 	vm_object_t backing_object;
 
 	VM_OBJECT_ASSERT_WLOCKED(object);
 
 	if ((object->flags & OBJ_SHADOWLIST) != 0) {
 		backing_object = object->backing_object;
 		VM_OBJECT_WLOCK(backing_object);
 		vm_object_backing_remove_locked(object);
 		VM_OBJECT_WUNLOCK(backing_object);
 	} else
 		object->backing_object = NULL;
 }
 
 static void
 vm_object_backing_insert_locked(vm_object_t object, vm_object_t backing_object)
 {
 
 	VM_OBJECT_ASSERT_WLOCKED(object);
 
 	if ((backing_object->flags & OBJ_ANON) != 0) {
 		VM_OBJECT_ASSERT_WLOCKED(backing_object);
 		LIST_INSERT_HEAD(&backing_object->shadow_head, object,
 		    shadow_list);
 		backing_object->shadow_count++;
 		object->flags |= OBJ_SHADOWLIST;
 	}
 	object->backing_object = backing_object;
 }
 
 static void
 vm_object_backing_insert(vm_object_t object, vm_object_t backing_object)
 {
 
 	VM_OBJECT_ASSERT_WLOCKED(object);
 
 	if ((backing_object->flags & OBJ_ANON) != 0) {
 		VM_OBJECT_WLOCK(backing_object);
 		vm_object_backing_insert_locked(object, backing_object);
 		VM_OBJECT_WUNLOCK(backing_object);
 	} else
 		object->backing_object = backing_object;
 }
 
 /*
  * Insert an object into a backing_object's shadow list with an additional
  * reference to the backing_object added.
  */
 static void
 vm_object_backing_insert_ref(vm_object_t object, vm_object_t backing_object)
 {
 
 	VM_OBJECT_ASSERT_WLOCKED(object);
 
 	if ((backing_object->flags & OBJ_ANON) != 0) {
 		VM_OBJECT_WLOCK(backing_object);
 		KASSERT((backing_object->flags & OBJ_DEAD) == 0,
 		    ("shadowing dead anonymous object"));
 		vm_object_reference_locked(backing_object);
 		vm_object_backing_insert_locked(object, backing_object);
 		vm_object_clear_flag(backing_object, OBJ_ONEMAPPING);
 		VM_OBJECT_WUNLOCK(backing_object);
 	} else {
 		vm_object_reference(backing_object);
 		object->backing_object = backing_object;
 	}
 }
 
 /*
  * Transfer a backing reference from backing_object to object.
  */
 static void
 vm_object_backing_transfer(vm_object_t object, vm_object_t backing_object)
 {
 	vm_object_t new_backing_object;
 
 	/*
 	 * Note that the reference to backing_object->backing_object
 	 * moves from within backing_object to within object.
 	 */
 	vm_object_backing_remove_locked(object);
 	new_backing_object = backing_object->backing_object;
 	if (new_backing_object == NULL)
 		return;
 	if ((new_backing_object->flags & OBJ_ANON) != 0) {
 		VM_OBJECT_WLOCK(new_backing_object);
 		vm_object_backing_remove_locked(backing_object);
 		vm_object_backing_insert_locked(object, new_backing_object);
 		VM_OBJECT_WUNLOCK(new_backing_object);
 	} else {
 		object->backing_object = new_backing_object;
 		backing_object->backing_object = NULL;
 	}
 }
 
 /*
  * Wait for a concurrent collapse to settle.
  */
 static void
 vm_object_collapse_wait(vm_object_t object)
 {
 
 	VM_OBJECT_ASSERT_WLOCKED(object);
 
 	while ((object->flags & OBJ_COLLAPSING) != 0) {
 		vm_object_pip_wait(object, "vmcolwait");
 		counter_u64_add(object_collapse_waits, 1);
 	}
 }
 
 /*
  * Waits for a backing object to clear a pending collapse and returns
  * it locked if it is an ANON object.
  */
 static vm_object_t
 vm_object_backing_collapse_wait(vm_object_t object)
 {
 	vm_object_t backing_object;
 
 	VM_OBJECT_ASSERT_WLOCKED(object);
 
 	for (;;) {
 		backing_object = object->backing_object;
 		if (backing_object == NULL ||
 		    (backing_object->flags & OBJ_ANON) == 0)
 			return (NULL);
 		VM_OBJECT_WLOCK(backing_object);
 		if ((backing_object->flags & (OBJ_DEAD | OBJ_COLLAPSING)) == 0)
 			break;
 		VM_OBJECT_WUNLOCK(object);
 		vm_object_pip_sleep(backing_object, "vmbckwait");
 		counter_u64_add(object_collapse_waits, 1);
 		VM_OBJECT_WLOCK(object);
 	}
 	return (backing_object);
 }
 
 /*
  *	vm_object_terminate_pages removes any remaining pageable pages
  *	from the object and resets the object to an empty state.
  */
 static void
 vm_object_terminate_pages(vm_object_t object)
 {
 	vm_page_t p, p_next;
 
 	VM_OBJECT_ASSERT_WLOCKED(object);
 
 	/*
 	 * Free any remaining pageable pages.  This also removes them from the
 	 * paging queues.  However, don't free wired pages, just remove them
 	 * from the object.  Rather than incrementally removing each page from
 	 * the object, the page and object are reset to any empty state. 
 	 */
 	TAILQ_FOREACH_SAFE(p, &object->memq, listq, p_next) {
 		vm_page_assert_unbusied(p);
 		KASSERT(p->object == object &&
 		    (p->ref_count & VPRC_OBJREF) != 0,
 		    ("vm_object_terminate_pages: page %p is inconsistent", p));
 
 		p->object = NULL;
 		if (vm_page_drop(p, VPRC_OBJREF) == VPRC_OBJREF) {
 			VM_CNT_INC(v_pfree);
 			vm_page_free(p);
 		}
 	}
 
 	/*
 	 * If the object contained any pages, then reset it to an empty state.
 	 * None of the object's fields, including "resident_page_count", were
 	 * modified by the preceding loop.
 	 */
 	if (object->resident_page_count != 0) {
 		vm_radix_reclaim_allnodes(&object->rtree);
 		TAILQ_INIT(&object->memq);
 		object->resident_page_count = 0;
 		if (object->type == OBJT_VNODE)
 			vdrop(object->handle);
 	}
 }
 
 /*
  *	vm_object_terminate actually destroys the specified object, freeing
  *	up all previously used resources.
  *
  *	The object must be locked.
  *	This routine may block.
  */
 void
 vm_object_terminate(vm_object_t object)
 {
 
 	VM_OBJECT_ASSERT_WLOCKED(object);
 	KASSERT((object->flags & OBJ_DEAD) != 0,
 	    ("terminating non-dead obj %p", object));
 	KASSERT((object->flags & OBJ_COLLAPSING) == 0,
 	    ("terminating collapsing obj %p", object));
 	KASSERT(object->backing_object == NULL,
 	    ("terminating shadow obj %p", object));
 
 	/*
 	 * Wait for the pageout daemon and other current users to be
 	 * done with the object.  Note that new paging_in_progress
 	 * users can come after this wait, but they must check
 	 * OBJ_DEAD flag set (without unlocking the object), and avoid
 	 * the object being terminated.
 	 */
 	vm_object_pip_wait(object, "objtrm");
 
 	KASSERT(object->ref_count == 0,
 	    ("vm_object_terminate: object with references, ref_count=%d",
 	    object->ref_count));
 
 	if ((object->flags & OBJ_PG_DTOR) == 0)
 		vm_object_terminate_pages(object);
 
 #if VM_NRESERVLEVEL > 0
 	if (__predict_false(!LIST_EMPTY(&object->rvq)))
 		vm_reserv_break_all(object);
 #endif
 
 	KASSERT(object->cred == NULL || object->type == OBJT_DEFAULT ||
 	    object->type == OBJT_SWAP,
 	    ("%s: non-swap obj %p has cred", __func__, object));
 
 	/*
 	 * Let the pager know object is dead.
 	 */
 	vm_pager_deallocate(object);
 	VM_OBJECT_WUNLOCK(object);
 
 	vm_object_destroy(object);
 }
 
 /*
  * Make the page read-only so that we can clear the object flags.  However, if
  * this is a nosync mmap then the object is likely to stay dirty so do not
  * mess with the page and do not clear the object flags.  Returns TRUE if the
  * page should be flushed, and FALSE otherwise.
  */
 static boolean_t
 vm_object_page_remove_write(vm_page_t p, int flags, boolean_t *allclean)
 {
 
 	vm_page_assert_busied(p);
 
 	/*
 	 * If we have been asked to skip nosync pages and this is a
 	 * nosync page, skip it.  Note that the object flags were not
 	 * cleared in this case so we do not have to set them.
 	 */
 	if ((flags & OBJPC_NOSYNC) != 0 && (p->a.flags & PGA_NOSYNC) != 0) {
 		*allclean = FALSE;
 		return (FALSE);
 	} else {
 		pmap_remove_write(p);
 		return (p->dirty != 0);
 	}
 }
 
 /*
  *	vm_object_page_clean
  *
  *	Clean all dirty pages in the specified range of object.  Leaves page 
  * 	on whatever queue it is currently on.   If NOSYNC is set then do not
  *	write out pages with PGA_NOSYNC set (originally comes from MAP_NOSYNC),
  *	leaving the object dirty.
  *
  *	For swap objects backing tmpfs regular files, do not flush anything,
  *	but remove write protection on the mapped pages to update mtime through
  *	mmaped writes.
  *
  *	When stuffing pages asynchronously, allow clustering.  XXX we need a
  *	synchronous clustering mode implementation.
  *
  *	Odd semantics: if start == end, we clean everything.
  *
  *	The object must be locked.
  *
  *	Returns FALSE if some page from the range was not written, as
  *	reported by the pager, and TRUE otherwise.
  */
 boolean_t
 vm_object_page_clean(vm_object_t object, vm_ooffset_t start, vm_ooffset_t end,
     int flags)
 {
 	vm_page_t np, p;
 	vm_pindex_t pi, tend, tstart;
 	int curgeneration, n, pagerflags;
 	boolean_t eio, res, allclean;
 
 	VM_OBJECT_ASSERT_WLOCKED(object);
 
 	if (!vm_object_mightbedirty(object) || object->resident_page_count == 0)
 		return (TRUE);
 
 	pagerflags = (flags & (OBJPC_SYNC | OBJPC_INVAL)) != 0 ?
 	    VM_PAGER_PUT_SYNC : VM_PAGER_CLUSTER_OK;
 	pagerflags |= (flags & OBJPC_INVAL) != 0 ? VM_PAGER_PUT_INVAL : 0;
 
 	tstart = OFF_TO_IDX(start);
 	tend = (end == 0) ? object->size : OFF_TO_IDX(end + PAGE_MASK);
 	allclean = tstart == 0 && tend >= object->size;
 	res = TRUE;
 
 rescan:
 	curgeneration = object->generation;
 
 	for (p = vm_page_find_least(object, tstart); p != NULL; p = np) {
 		pi = p->pindex;
 		if (pi >= tend)
 			break;
 		np = TAILQ_NEXT(p, listq);
 		if (vm_page_none_valid(p))
 			continue;
 		if (vm_page_busy_acquire(p, VM_ALLOC_WAITFAIL) == 0) {
 			if (object->generation != curgeneration &&
 			    (flags & OBJPC_SYNC) != 0)
 				goto rescan;
 			np = vm_page_find_least(object, pi);
 			continue;
 		}
 		if (!vm_object_page_remove_write(p, flags, &allclean)) {
 			vm_page_xunbusy(p);
 			continue;
 		}
 		if (object->type == OBJT_VNODE) {
 			n = vm_object_page_collect_flush(object, p, pagerflags,
 			    flags, &allclean, &eio);
 			if (eio) {
 				res = FALSE;
 				allclean = FALSE;
 			}
 			if (object->generation != curgeneration &&
 			    (flags & OBJPC_SYNC) != 0)
 				goto rescan;
 
 			/*
 			 * If the VOP_PUTPAGES() did a truncated write, so
 			 * that even the first page of the run is not fully
 			 * written, vm_pageout_flush() returns 0 as the run
 			 * length.  Since the condition that caused truncated
 			 * write may be permanent, e.g. exhausted free space,
 			 * accepting n == 0 would cause an infinite loop.
 			 *
 			 * Forwarding the iterator leaves the unwritten page
 			 * behind, but there is not much we can do there if
 			 * filesystem refuses to write it.
 			 */
 			if (n == 0) {
 				n = 1;
 				allclean = FALSE;
 			}
 		} else {
 			n = 1;
 			vm_page_xunbusy(p);
 		}
 		np = vm_page_find_least(object, pi + n);
 	}
 #if 0
 	VOP_FSYNC(vp, (pagerflags & VM_PAGER_PUT_SYNC) ? MNT_WAIT : 0);
 #endif
 
 	/*
 	 * Leave updating cleangeneration for tmpfs objects to tmpfs
 	 * scan.  It needs to update mtime, which happens for other
 	 * filesystems during page writeouts.
 	 */
 	if (allclean && object->type == OBJT_VNODE)
 		object->cleangeneration = curgeneration;
 	return (res);
 }
 
 static int
 vm_object_page_collect_flush(vm_object_t object, vm_page_t p, int pagerflags,
     int flags, boolean_t *allclean, boolean_t *eio)
 {
 	vm_page_t ma[vm_pageout_page_count], p_first, tp;
 	int count, i, mreq, runlen;
 
 	vm_page_lock_assert(p, MA_NOTOWNED);
 	vm_page_assert_xbusied(p);
 	VM_OBJECT_ASSERT_WLOCKED(object);
 
 	count = 1;
 	mreq = 0;
 
 	for (tp = p; count < vm_pageout_page_count; count++) {
 		tp = vm_page_next(tp);
 		if (tp == NULL || vm_page_tryxbusy(tp) == 0)
 			break;
 		if (!vm_object_page_remove_write(tp, flags, allclean)) {
 			vm_page_xunbusy(tp);
 			break;
 		}
 	}
 
 	for (p_first = p; count < vm_pageout_page_count; count++) {
 		tp = vm_page_prev(p_first);
 		if (tp == NULL || vm_page_tryxbusy(tp) == 0)
 			break;
 		if (!vm_object_page_remove_write(tp, flags, allclean)) {
 			vm_page_xunbusy(tp);
 			break;
 		}
 		p_first = tp;
 		mreq++;
 	}
 
 	for (tp = p_first, i = 0; i < count; tp = TAILQ_NEXT(tp, listq), i++)
 		ma[i] = tp;
 
 	vm_pageout_flush(ma, count, pagerflags, mreq, &runlen, eio);
 	return (runlen);
 }
 
 /*
  * Note that there is absolutely no sense in writing out
  * anonymous objects, so we track down the vnode object
  * to write out.
  * We invalidate (remove) all pages from the address space
  * for semantic correctness.
  *
  * If the backing object is a device object with unmanaged pages, then any
  * mappings to the specified range of pages must be removed before this
  * function is called.
  *
  * Note: certain anonymous maps, such as MAP_NOSYNC maps,
  * may start out with a NULL object.
  */
 boolean_t
 vm_object_sync(vm_object_t object, vm_ooffset_t offset, vm_size_t size,
     boolean_t syncio, boolean_t invalidate)
 {
 	vm_object_t backing_object;
 	struct vnode *vp;
 	struct mount *mp;
 	int error, flags, fsync_after;
 	boolean_t res;
 
 	if (object == NULL)
 		return (TRUE);
 	res = TRUE;
 	error = 0;
 	VM_OBJECT_WLOCK(object);
 	while ((backing_object = object->backing_object) != NULL) {
 		VM_OBJECT_WLOCK(backing_object);
 		offset += object->backing_object_offset;
 		VM_OBJECT_WUNLOCK(object);
 		object = backing_object;
 		if (object->size < OFF_TO_IDX(offset + size))
 			size = IDX_TO_OFF(object->size) - offset;
 	}
 	/*
 	 * Flush pages if writing is allowed, invalidate them
 	 * if invalidation requested.  Pages undergoing I/O
 	 * will be ignored by vm_object_page_remove().
 	 *
 	 * We cannot lock the vnode and then wait for paging
 	 * to complete without deadlocking against vm_fault.
 	 * Instead we simply call vm_object_page_remove() and
 	 * allow it to block internally on a page-by-page
 	 * basis when it encounters pages undergoing async
 	 * I/O.
 	 */
 	if (object->type == OBJT_VNODE &&
 	    vm_object_mightbedirty(object) != 0 &&
 	    ((vp = object->handle)->v_vflag & VV_NOSYNC) == 0) {
 		VM_OBJECT_WUNLOCK(object);
 		(void) vn_start_write(vp, &mp, V_WAIT);
 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 		if (syncio && !invalidate && offset == 0 &&
 		    atop(size) == object->size) {
 			/*
 			 * If syncing the whole mapping of the file,
 			 * it is faster to schedule all the writes in
 			 * async mode, also allowing the clustering,
 			 * and then wait for i/o to complete.
 			 */
 			flags = 0;
 			fsync_after = TRUE;
 		} else {
 			flags = (syncio || invalidate) ? OBJPC_SYNC : 0;
 			flags |= invalidate ? (OBJPC_SYNC | OBJPC_INVAL) : 0;
 			fsync_after = FALSE;
 		}
 		VM_OBJECT_WLOCK(object);
 		res = vm_object_page_clean(object, offset, offset + size,
 		    flags);
 		VM_OBJECT_WUNLOCK(object);
 		if (fsync_after)
 			error = VOP_FSYNC(vp, MNT_WAIT, curthread);
 		VOP_UNLOCK(vp);
 		vn_finished_write(mp);
 		if (error != 0)
 			res = FALSE;
 		VM_OBJECT_WLOCK(object);
 	}
 	if ((object->type == OBJT_VNODE ||
 	     object->type == OBJT_DEVICE) && invalidate) {
 		if (object->type == OBJT_DEVICE)
 			/*
 			 * The option OBJPR_NOTMAPPED must be passed here
 			 * because vm_object_page_remove() cannot remove
 			 * unmanaged mappings.
 			 */
 			flags = OBJPR_NOTMAPPED;
 		else if (old_msync)
 			flags = 0;
 		else
 			flags = OBJPR_CLEANONLY;
 		vm_object_page_remove(object, OFF_TO_IDX(offset),
 		    OFF_TO_IDX(offset + size + PAGE_MASK), flags);
 	}
 	VM_OBJECT_WUNLOCK(object);
 	return (res);
 }
 
 /*
  * Determine whether the given advice can be applied to the object.  Advice is
  * not applied to unmanaged pages since they never belong to page queues, and
  * since MADV_FREE is destructive, it can apply only to anonymous pages that
  * have been mapped at most once.
  */
 static bool
 vm_object_advice_applies(vm_object_t object, int advice)
 {
 
 	if ((object->flags & OBJ_UNMANAGED) != 0)
 		return (false);
 	if (advice != MADV_FREE)
 		return (true);
 	return ((object->flags & (OBJ_ONEMAPPING | OBJ_ANON)) ==
 	    (OBJ_ONEMAPPING | OBJ_ANON));
 }
 
 static void
 vm_object_madvise_freespace(vm_object_t object, int advice, vm_pindex_t pindex,
     vm_size_t size)
 {
 
 	if (advice == MADV_FREE && object->type == OBJT_SWAP)
 		swap_pager_freespace(object, pindex, size);
 }
 
 /*
  *	vm_object_madvise:
  *
  *	Implements the madvise function at the object/page level.
  *
  *	MADV_WILLNEED	(any object)
  *
  *	    Activate the specified pages if they are resident.
  *
  *	MADV_DONTNEED	(any object)
  *
  *	    Deactivate the specified pages if they are resident.
  *
  *	MADV_FREE	(OBJT_DEFAULT/OBJT_SWAP objects,
  *			 OBJ_ONEMAPPING only)
  *
  *	    Deactivate and clean the specified pages if they are
  *	    resident.  This permits the process to reuse the pages
  *	    without faulting or the kernel to reclaim the pages
  *	    without I/O.
  */
 void
 vm_object_madvise(vm_object_t object, vm_pindex_t pindex, vm_pindex_t end,
     int advice)
 {
 	vm_pindex_t tpindex;
 	vm_object_t backing_object, tobject;
 	vm_page_t m, tm;
 
 	if (object == NULL)
 		return;
 
 relookup:
 	VM_OBJECT_WLOCK(object);
 	if (!vm_object_advice_applies(object, advice)) {
 		VM_OBJECT_WUNLOCK(object);
 		return;
 	}
 	for (m = vm_page_find_least(object, pindex); pindex < end; pindex++) {
 		tobject = object;
 
 		/*
 		 * If the next page isn't resident in the top-level object, we
 		 * need to search the shadow chain.  When applying MADV_FREE, we
 		 * take care to release any swap space used to store
 		 * non-resident pages.
 		 */
 		if (m == NULL || pindex < m->pindex) {
 			/*
 			 * Optimize a common case: if the top-level object has
 			 * no backing object, we can skip over the non-resident
 			 * range in constant time.
 			 */
 			if (object->backing_object == NULL) {
 				tpindex = (m != NULL && m->pindex < end) ?
 				    m->pindex : end;
 				vm_object_madvise_freespace(object, advice,
 				    pindex, tpindex - pindex);
 				if ((pindex = tpindex) == end)
 					break;
 				goto next_page;
 			}
 
 			tpindex = pindex;
 			do {
 				vm_object_madvise_freespace(tobject, advice,
 				    tpindex, 1);
 				/*
 				 * Prepare to search the next object in the
 				 * chain.
 				 */
 				backing_object = tobject->backing_object;
 				if (backing_object == NULL)
 					goto next_pindex;
 				VM_OBJECT_WLOCK(backing_object);
 				tpindex +=
 				    OFF_TO_IDX(tobject->backing_object_offset);
 				if (tobject != object)
 					VM_OBJECT_WUNLOCK(tobject);
 				tobject = backing_object;
 				if (!vm_object_advice_applies(tobject, advice))
 					goto next_pindex;
 			} while ((tm = vm_page_lookup(tobject, tpindex)) ==
 			    NULL);
 		} else {
 next_page:
 			tm = m;
 			m = TAILQ_NEXT(m, listq);
 		}
 
 		/*
 		 * If the page is not in a normal state, skip it.  The page
 		 * can not be invalidated while the object lock is held.
 		 */
 		if (!vm_page_all_valid(tm) || vm_page_wired(tm))
 			goto next_pindex;
 		KASSERT((tm->flags & PG_FICTITIOUS) == 0,
 		    ("vm_object_madvise: page %p is fictitious", tm));
 		KASSERT((tm->oflags & VPO_UNMANAGED) == 0,
 		    ("vm_object_madvise: page %p is not managed", tm));
 		if (vm_page_tryxbusy(tm) == 0) {
 			if (object != tobject)
 				VM_OBJECT_WUNLOCK(object);
 			if (advice == MADV_WILLNEED) {
 				/*
 				 * Reference the page before unlocking and
 				 * sleeping so that the page daemon is less
 				 * likely to reclaim it.
 				 */
 				vm_page_aflag_set(tm, PGA_REFERENCED);
 			}
 			vm_page_busy_sleep(tm, "madvpo", false);
   			goto relookup;
 		}
 		vm_page_advise(tm, advice);
 		vm_page_xunbusy(tm);
 		vm_object_madvise_freespace(tobject, advice, tm->pindex, 1);
 next_pindex:
 		if (tobject != object)
 			VM_OBJECT_WUNLOCK(tobject);
 	}
 	VM_OBJECT_WUNLOCK(object);
 }
 
 /*
  *	vm_object_shadow:
  *
  *	Create a new object which is backed by the
  *	specified existing object range.  The source
  *	object reference is deallocated.
  *
  *	The new object and offset into that object
  *	are returned in the source parameters.
  */
 void
 vm_object_shadow(vm_object_t *object, vm_ooffset_t *offset, vm_size_t length,
     struct ucred *cred, bool shared)
 {
 	vm_object_t source;
 	vm_object_t result;
 
 	source = *object;
 
 	/*
 	 * Don't create the new object if the old object isn't shared.
 	 *
 	 * If we hold the only reference we can guarantee that it won't
 	 * increase while we have the map locked.  Otherwise the race is
 	 * harmless and we will end up with an extra shadow object that
 	 * will be collapsed later.
 	 */
 	if (source != NULL && source->ref_count == 1 &&
 	    (source->flags & OBJ_ANON) != 0)
 		return;
 
 	/*
 	 * Allocate a new object with the given length.
 	 */
 	result = vm_object_allocate_anon(atop(length), source, cred, length);
 
 	/*
 	 * Store the offset into the source object, and fix up the offset into
 	 * the new object.
 	 */
 	result->backing_object_offset = *offset;
 
 	if (shared || source != NULL) {
 		VM_OBJECT_WLOCK(result);
 
 		/*
 		 * The new object shadows the source object, adding a
 		 * reference to it.  Our caller changes his reference
 		 * to point to the new object, removing a reference to
 		 * the source object.  Net result: no change of
 		 * reference count, unless the caller needs to add one
 		 * more reference due to forking a shared map entry.
 		 */
 		if (shared) {
 			vm_object_reference_locked(result);
 			vm_object_clear_flag(result, OBJ_ONEMAPPING);
 		}
 
 		/*
 		 * Try to optimize the result object's page color when
 		 * shadowing in order to maintain page coloring
 		 * consistency in the combined shadowed object.
 		 */
 		if (source != NULL) {
 			vm_object_backing_insert(result, source);
 			result->domain = source->domain;
 #if VM_NRESERVLEVEL > 0
 			result->flags |= source->flags & OBJ_COLORED;
 			result->pg_color = (source->pg_color +
 			    OFF_TO_IDX(*offset)) & ((1 << (VM_NFREEORDER -
 			    1)) - 1);
 #endif
 		}
 		VM_OBJECT_WUNLOCK(result);
 	}
 
 	/*
 	 * Return the new things
 	 */
 	*offset = 0;
 	*object = result;
 }
 
 /*
  *	vm_object_split:
  *
  * Split the pages in a map entry into a new object.  This affords
  * easier removal of unused pages, and keeps object inheritance from
  * being a negative impact on memory usage.
  */
 void
 vm_object_split(vm_map_entry_t entry)
 {
 	vm_page_t m, m_busy, m_next;
 	vm_object_t orig_object, new_object, backing_object;
 	vm_pindex_t idx, offidxstart;
 	vm_size_t size;
 
 	orig_object = entry->object.vm_object;
 	KASSERT((orig_object->flags & OBJ_ONEMAPPING) != 0,
 	    ("vm_object_split:  Splitting object with multiple mappings."));
 	if ((orig_object->flags & OBJ_ANON) == 0)
 		return;
 	if (orig_object->ref_count <= 1)
 		return;
 	VM_OBJECT_WUNLOCK(orig_object);
 
 	offidxstart = OFF_TO_IDX(entry->offset);
 	size = atop(entry->end - entry->start);
 
 	/*
 	 * If swap_pager_copy() is later called, it will convert new_object
 	 * into a swap object.
 	 */
 	new_object = vm_object_allocate_anon(size, orig_object,
 	    orig_object->cred, ptoa(size));
 
 	/*
 	 * We must wait for the orig_object to complete any in-progress
 	 * collapse so that the swap blocks are stable below.  The
 	 * additional reference on backing_object by new object will
 	 * prevent further collapse operations until split completes.
 	 */
 	VM_OBJECT_WLOCK(orig_object);
 	vm_object_collapse_wait(orig_object);
 
 	/*
 	 * At this point, the new object is still private, so the order in
 	 * which the original and new objects are locked does not matter.
 	 */
 	VM_OBJECT_WLOCK(new_object);
 	new_object->domain = orig_object->domain;
 	backing_object = orig_object->backing_object;
 	if (backing_object != NULL) {
 		vm_object_backing_insert_ref(new_object, backing_object);
 		new_object->backing_object_offset = 
 		    orig_object->backing_object_offset + entry->offset;
 	}
 	if (orig_object->cred != NULL) {
 		crhold(orig_object->cred);
 		KASSERT(orig_object->charge >= ptoa(size),
 		    ("orig_object->charge < 0"));
 		orig_object->charge -= ptoa(size);
 	}
 
 	/*
 	 * Mark the split operation so that swap_pager_getpages() knows
 	 * that the object is in transition.
 	 */
 	vm_object_set_flag(orig_object, OBJ_SPLIT);
 	m_busy = NULL;
 #ifdef INVARIANTS
 	idx = 0;
 #endif
 retry:
 	m = vm_page_find_least(orig_object, offidxstart);
 	KASSERT(m == NULL || idx <= m->pindex - offidxstart,
 	    ("%s: object %p was repopulated", __func__, orig_object));
 	for (; m != NULL && (idx = m->pindex - offidxstart) < size;
 	    m = m_next) {
 		m_next = TAILQ_NEXT(m, listq);
 
 		/*
 		 * We must wait for pending I/O to complete before we can
 		 * rename the page.
 		 *
 		 * We do not have to VM_PROT_NONE the page as mappings should
 		 * not be changed by this operation.
 		 */
 		if (vm_page_tryxbusy(m) == 0) {
 			VM_OBJECT_WUNLOCK(new_object);
 			vm_page_sleep_if_busy(m, "spltwt");
 			VM_OBJECT_WLOCK(new_object);
 			goto retry;
 		}
 
 		/*
 		 * The page was left invalid.  Likely placed there by
 		 * an incomplete fault.  Just remove and ignore.
 		 */
 		if (vm_page_none_valid(m)) {
 			if (vm_page_remove(m))
 				vm_page_free(m);
 			continue;
 		}
 
 		/* vm_page_rename() will dirty the page. */
 		if (vm_page_rename(m, new_object, idx)) {
 			vm_page_xunbusy(m);
 			VM_OBJECT_WUNLOCK(new_object);
 			VM_OBJECT_WUNLOCK(orig_object);
 			vm_radix_wait();
 			VM_OBJECT_WLOCK(orig_object);
 			VM_OBJECT_WLOCK(new_object);
 			goto retry;
 		}
 
 #if VM_NRESERVLEVEL > 0
 		/*
 		 * If some of the reservation's allocated pages remain with
 		 * the original object, then transferring the reservation to
 		 * the new object is neither particularly beneficial nor
 		 * particularly harmful as compared to leaving the reservation
 		 * with the original object.  If, however, all of the
 		 * reservation's allocated pages are transferred to the new
 		 * object, then transferring the reservation is typically
 		 * beneficial.  Determining which of these two cases applies
 		 * would be more costly than unconditionally renaming the
 		 * reservation.
 		 */
 		vm_reserv_rename(m, new_object, orig_object, offidxstart);
 #endif
 
 		/*
 		 * orig_object's type may change while sleeping, so keep track
 		 * of the beginning of the busied range.
 		 */
 		if (orig_object->type != OBJT_SWAP)
 			vm_page_xunbusy(m);
 		else if (m_busy == NULL)
 			m_busy = m;
 	}
 	if (orig_object->type == OBJT_SWAP) {
 		/*
 		 * swap_pager_copy() can sleep, in which case the orig_object's
 		 * and new_object's locks are released and reacquired. 
 		 */
 		swap_pager_copy(orig_object, new_object, offidxstart, 0);
 		if (m_busy != NULL)
 			TAILQ_FOREACH_FROM(m_busy, &new_object->memq, listq)
 				vm_page_xunbusy(m_busy);
 	}
 	vm_object_clear_flag(orig_object, OBJ_SPLIT);
 	VM_OBJECT_WUNLOCK(orig_object);
 	VM_OBJECT_WUNLOCK(new_object);
 	entry->object.vm_object = new_object;
 	entry->offset = 0LL;
 	vm_object_deallocate(orig_object);
 	VM_OBJECT_WLOCK(new_object);
 }
 
 static vm_page_t
 vm_object_collapse_scan_wait(vm_object_t object, vm_page_t p)
 {
 	vm_object_t backing_object;
 
 	VM_OBJECT_ASSERT_WLOCKED(object);
 	backing_object = object->backing_object;
 	VM_OBJECT_ASSERT_WLOCKED(backing_object);
 
 	KASSERT(p == NULL || p->object == object || p->object == backing_object,
 	    ("invalid ownership %p %p %p", p, object, backing_object));
 	/* The page is only NULL when rename fails. */
 	if (p == NULL) {
 		VM_OBJECT_WUNLOCK(object);
 		VM_OBJECT_WUNLOCK(backing_object);
 		vm_radix_wait();
 	} else {
 		if (p->object == object)
 			VM_OBJECT_WUNLOCK(backing_object);
 		else
 			VM_OBJECT_WUNLOCK(object);
 		vm_page_busy_sleep(p, "vmocol", false);
 	}
 	VM_OBJECT_WLOCK(object);
 	VM_OBJECT_WLOCK(backing_object);
 	return (TAILQ_FIRST(&backing_object->memq));
 }
 
 static bool
 vm_object_scan_all_shadowed(vm_object_t object)
 {
 	vm_object_t backing_object;
 	vm_page_t p, pp;
 	vm_pindex_t backing_offset_index, new_pindex, pi, ps;
 
 	VM_OBJECT_ASSERT_WLOCKED(object);
 	VM_OBJECT_ASSERT_WLOCKED(object->backing_object);
 
 	backing_object = object->backing_object;
 
 	if ((backing_object->flags & OBJ_ANON) == 0)
 		return (false);
 
 	pi = backing_offset_index = OFF_TO_IDX(object->backing_object_offset);
 	p = vm_page_find_least(backing_object, pi);
 	ps = swap_pager_find_least(backing_object, pi);
 
 	/*
 	 * Only check pages inside the parent object's range and
 	 * inside the parent object's mapping of the backing object.
 	 */
 	for (;; pi++) {
 		if (p != NULL && p->pindex < pi)
 			p = TAILQ_NEXT(p, listq);
 		if (ps < pi)
 			ps = swap_pager_find_least(backing_object, pi);
 		if (p == NULL && ps >= backing_object->size)
 			break;
 		else if (p == NULL)
 			pi = ps;
 		else
 			pi = MIN(p->pindex, ps);
 
 		new_pindex = pi - backing_offset_index;
 		if (new_pindex >= object->size)
 			break;
 
 		if (p != NULL) {
 			/*
 			 * If the backing object page is busy a
 			 * grandparent or older page may still be
 			 * undergoing CoW.  It is not safe to collapse
 			 * the backing object until it is quiesced.
 			 */
 			if (vm_page_tryxbusy(p) == 0)
 				return (false);
 
 			/*
 			 * We raced with the fault handler that left
 			 * newly allocated invalid page on the object
 			 * queue and retried.
 			 */
 			if (!vm_page_all_valid(p))
 				goto unbusy_ret;
 		}
 
 		/*
 		 * See if the parent has the page or if the parent's object
 		 * pager has the page.  If the parent has the page but the page
 		 * is not valid, the parent's object pager must have the page.
 		 *
 		 * If this fails, the parent does not completely shadow the
 		 * object and we might as well give up now.
 		 */
 		pp = vm_page_lookup(object, new_pindex);
 
 		/*
 		 * The valid check here is stable due to object lock
 		 * being required to clear valid and initiate paging.
 		 * Busy of p disallows fault handler to validate pp.
 		 */
 		if ((pp == NULL || vm_page_none_valid(pp)) &&
 		    !vm_pager_has_page(object, new_pindex, NULL, NULL))
 			goto unbusy_ret;
 		if (p != NULL)
 			vm_page_xunbusy(p);
 	}
 	return (true);
 
 unbusy_ret:
 	if (p != NULL)
 		vm_page_xunbusy(p);
 	return (false);
 }
 
 static void
 vm_object_collapse_scan(vm_object_t object)
 {
 	vm_object_t backing_object;
 	vm_page_t next, p, pp;
 	vm_pindex_t backing_offset_index, new_pindex;
 
 	VM_OBJECT_ASSERT_WLOCKED(object);
 	VM_OBJECT_ASSERT_WLOCKED(object->backing_object);
 
 	backing_object = object->backing_object;
 	backing_offset_index = OFF_TO_IDX(object->backing_object_offset);
 
 	/*
 	 * Our scan
 	 */
 	for (p = TAILQ_FIRST(&backing_object->memq); p != NULL; p = next) {
 		next = TAILQ_NEXT(p, listq);
 		new_pindex = p->pindex - backing_offset_index;
 
 		/*
 		 * Check for busy page
 		 */
 		if (vm_page_tryxbusy(p) == 0) {
 			next = vm_object_collapse_scan_wait(object, p);
 			continue;
 		}
 
 		KASSERT(object->backing_object == backing_object,
 		    ("vm_object_collapse_scan: backing object mismatch %p != %p",
 		    object->backing_object, backing_object));
 		KASSERT(p->object == backing_object,
 		    ("vm_object_collapse_scan: object mismatch %p != %p",
 		    p->object, backing_object));
 
 		if (p->pindex < backing_offset_index ||
 		    new_pindex >= object->size) {
 			if (backing_object->type == OBJT_SWAP)
 				swap_pager_freespace(backing_object, p->pindex,
 				    1);
 
 			KASSERT(!pmap_page_is_mapped(p),
 			    ("freeing mapped page %p", p));
 			if (vm_page_remove(p))
 				vm_page_free(p);
 			continue;
 		}
 
 		if (!vm_page_all_valid(p)) {
 			KASSERT(!pmap_page_is_mapped(p),
 			    ("freeing mapped page %p", p));
 			if (vm_page_remove(p))
 				vm_page_free(p);
 			continue;
 		}
 
 		pp = vm_page_lookup(object, new_pindex);
 		if (pp != NULL && vm_page_tryxbusy(pp) == 0) {
 			vm_page_xunbusy(p);
 			/*
 			 * The page in the parent is busy and possibly not
 			 * (yet) valid.  Until its state is finalized by the
 			 * busy bit owner, we can't tell whether it shadows the
 			 * original page.
 			 */
 			next = vm_object_collapse_scan_wait(object, pp);
 			continue;
 		}
 
 		if (pp != NULL && vm_page_none_valid(pp)) {
 			/*
 			 * The page was invalid in the parent.  Likely placed
 			 * there by an incomplete fault.  Just remove and
 			 * ignore.  p can replace it.
 			 */
 			if (vm_page_remove(pp))
 				vm_page_free(pp);
 			pp = NULL;
 		}
 
 		if (pp != NULL || vm_pager_has_page(object, new_pindex, NULL,
 			NULL)) {
 			/*
 			 * The page already exists in the parent OR swap exists
 			 * for this location in the parent.  Leave the parent's
 			 * page alone.  Destroy the original page from the
 			 * backing object.
 			 */
 			if (backing_object->type == OBJT_SWAP)
 				swap_pager_freespace(backing_object, p->pindex,
 				    1);
 			KASSERT(!pmap_page_is_mapped(p),
 			    ("freeing mapped page %p", p));
 			if (vm_page_remove(p))
 				vm_page_free(p);
 			if (pp != NULL)
 				vm_page_xunbusy(pp);
 			continue;
 		}
 
 		/*
 		 * Page does not exist in parent, rename the page from the
 		 * backing object to the main object.
 		 *
 		 * If the page was mapped to a process, it can remain mapped
 		 * through the rename.  vm_page_rename() will dirty the page.
 		 */
 		if (vm_page_rename(p, object, new_pindex)) {
 			vm_page_xunbusy(p);
 			next = vm_object_collapse_scan_wait(object, NULL);
 			continue;
 		}
 
 		/* Use the old pindex to free the right page. */
 		if (backing_object->type == OBJT_SWAP)
 			swap_pager_freespace(backing_object,
 			    new_pindex + backing_offset_index, 1);
 
 #if VM_NRESERVLEVEL > 0
 		/*
 		 * Rename the reservation.
 		 */
 		vm_reserv_rename(p, object, backing_object,
 		    backing_offset_index);
 #endif
 		vm_page_xunbusy(p);
 	}
 	return;
 }
 
 /*
  *	vm_object_collapse:
  *
  *	Collapse an object with the object backing it.
  *	Pages in the backing object are moved into the
  *	parent, and the backing object is deallocated.
  */
 void
 vm_object_collapse(vm_object_t object)
 {
 	vm_object_t backing_object, new_backing_object;
 
 	VM_OBJECT_ASSERT_WLOCKED(object);
 
 	while (TRUE) {
 		KASSERT((object->flags & (OBJ_DEAD | OBJ_ANON)) == OBJ_ANON,
 		    ("collapsing invalid object"));
 
 		/*
 		 * Wait for the backing_object to finish any pending
 		 * collapse so that the caller sees the shortest possible
 		 * shadow chain.
 		 */
 		backing_object = vm_object_backing_collapse_wait(object);
 		if (backing_object == NULL)
 			return;
 
 		KASSERT(object->ref_count > 0 &&
 		    object->ref_count > object->shadow_count,
 		    ("collapse with invalid ref %d or shadow %d count.",
 		    object->ref_count, object->shadow_count));
 		KASSERT((backing_object->flags &
 		    (OBJ_COLLAPSING | OBJ_DEAD)) == 0,
 		    ("vm_object_collapse: Backing object already collapsing."));
 		KASSERT((object->flags & (OBJ_COLLAPSING | OBJ_DEAD)) == 0,
 		    ("vm_object_collapse: object is already collapsing."));
 
 		/*
 		 * We know that we can either collapse the backing object if
 		 * the parent is the only reference to it, or (perhaps) have
 		 * the parent bypass the object if the parent happens to shadow
 		 * all the resident pages in the entire backing object.
 		 */
 		if (backing_object->ref_count == 1) {
 			KASSERT(backing_object->shadow_count == 1,
 			    ("vm_object_collapse: shadow_count: %d",
 			    backing_object->shadow_count));
 			vm_object_pip_add(object, 1);
 			vm_object_set_flag(object, OBJ_COLLAPSING);
 			vm_object_pip_add(backing_object, 1);
 			vm_object_set_flag(backing_object, OBJ_DEAD);
 
 			/*
 			 * If there is exactly one reference to the backing
 			 * object, we can collapse it into the parent.
 			 */
 			vm_object_collapse_scan(object);
 
 #if VM_NRESERVLEVEL > 0
 			/*
 			 * Break any reservations from backing_object.
 			 */
 			if (__predict_false(!LIST_EMPTY(&backing_object->rvq)))
 				vm_reserv_break_all(backing_object);
 #endif
 
 			/*
 			 * Move the pager from backing_object to object.
 			 */
 			if (backing_object->type == OBJT_SWAP) {
 				/*
 				 * swap_pager_copy() can sleep, in which case
 				 * the backing_object's and object's locks are
 				 * released and reacquired.
 				 * Since swap_pager_copy() is being asked to
 				 * destroy backing_object, it will change the
 				 * type to OBJT_DEFAULT.
 				 */
 				swap_pager_copy(
 				    backing_object,
 				    object,
 				    OFF_TO_IDX(object->backing_object_offset), TRUE);
 			}
 
 			/*
 			 * Object now shadows whatever backing_object did.
 			 */
 			vm_object_clear_flag(object, OBJ_COLLAPSING);
 			vm_object_backing_transfer(object, backing_object);
 			object->backing_object_offset +=
 			    backing_object->backing_object_offset;
 			VM_OBJECT_WUNLOCK(object);
 			vm_object_pip_wakeup(object);
 
 			/*
 			 * Discard backing_object.
 			 *
 			 * Since the backing object has no pages, no pager left,
 			 * and no object references within it, all that is
 			 * necessary is to dispose of it.
 			 */
 			KASSERT(backing_object->ref_count == 1, (
 "backing_object %p was somehow re-referenced during collapse!",
 			    backing_object));
 			vm_object_pip_wakeup(backing_object);
 			(void)refcount_release(&backing_object->ref_count);
 			vm_object_terminate(backing_object);
 			counter_u64_add(object_collapses, 1);
 			VM_OBJECT_WLOCK(object);
 		} else {
 			/*
 			 * If we do not entirely shadow the backing object,
 			 * there is nothing we can do so we give up.
 			 *
 			 * The object lock and backing_object lock must not
 			 * be dropped during this sequence.
 			 */
 			if (!vm_object_scan_all_shadowed(object)) {
 				VM_OBJECT_WUNLOCK(backing_object);
 				break;
 			}
 
 			/*
 			 * Make the parent shadow the next object in the
 			 * chain.  Deallocating backing_object will not remove
 			 * it, since its reference count is at least 2.
 			 */
 			vm_object_backing_remove_locked(object);
 			new_backing_object = backing_object->backing_object;
 			if (new_backing_object != NULL) {
 				vm_object_backing_insert_ref(object,
 				    new_backing_object);
 				object->backing_object_offset +=
 				    backing_object->backing_object_offset;
 			}
 
 			/*
 			 * Drop the reference count on backing_object. Since
 			 * its ref_count was at least 2, it will not vanish.
 			 */
 			(void)refcount_release(&backing_object->ref_count);
 			KASSERT(backing_object->ref_count >= 1, (
 "backing_object %p was somehow dereferenced during collapse!",
 			    backing_object));
 			VM_OBJECT_WUNLOCK(backing_object);
 			counter_u64_add(object_bypasses, 1);
 		}
 
 		/*
 		 * Try again with this object's new backing object.
 		 */
 	}
 }
 
 /*
  *	vm_object_page_remove:
  *
  *	For the given object, either frees or invalidates each of the
  *	specified pages.  In general, a page is freed.  However, if a page is
  *	wired for any reason other than the existence of a managed, wired
  *	mapping, then it may be invalidated but not removed from the object.
  *	Pages are specified by the given range ["start", "end") and the option
  *	OBJPR_CLEANONLY.  As a special case, if "end" is zero, then the range
  *	extends from "start" to the end of the object.  If the option
  *	OBJPR_CLEANONLY is specified, then only the non-dirty pages within the
  *	specified range are affected.  If the option OBJPR_NOTMAPPED is
  *	specified, then the pages within the specified range must have no
  *	mappings.  Otherwise, if this option is not specified, any mappings to
  *	the specified pages are removed before the pages are freed or
  *	invalidated.
  *
  *	In general, this operation should only be performed on objects that
  *	contain managed pages.  There are, however, two exceptions.  First, it
  *	is performed on the kernel and kmem objects by vm_map_entry_delete().
  *	Second, it is used by msync(..., MS_INVALIDATE) to invalidate device-
  *	backed pages.  In both of these cases, the option OBJPR_CLEANONLY must
  *	not be specified and the option OBJPR_NOTMAPPED must be specified.
  *
  *	The object must be locked.
  */
 void
 vm_object_page_remove(vm_object_t object, vm_pindex_t start, vm_pindex_t end,
     int options)
 {
 	vm_page_t p, next;
 
 	VM_OBJECT_ASSERT_WLOCKED(object);
 	KASSERT((object->flags & OBJ_UNMANAGED) == 0 ||
 	    (options & (OBJPR_CLEANONLY | OBJPR_NOTMAPPED)) == OBJPR_NOTMAPPED,
 	    ("vm_object_page_remove: illegal options for object %p", object));
 	if (object->resident_page_count == 0)
 		return;
 	vm_object_pip_add(object, 1);
 again:
 	p = vm_page_find_least(object, start);
 
 	/*
 	 * Here, the variable "p" is either (1) the page with the least pindex
 	 * greater than or equal to the parameter "start" or (2) NULL. 
 	 */
 	for (; p != NULL && (p->pindex < end || end == 0); p = next) {
 		next = TAILQ_NEXT(p, listq);
 
 		/*
 		 * If the page is wired for any reason besides the existence
 		 * of managed, wired mappings, then it cannot be freed.  For
 		 * example, fictitious pages, which represent device memory,
 		 * are inherently wired and cannot be freed.  They can,
 		 * however, be invalidated if the option OBJPR_CLEANONLY is
 		 * not specified.
 		 */
 		if (vm_page_tryxbusy(p) == 0) {
 			vm_page_sleep_if_busy(p, "vmopar");
 			goto again;
 		}
 		if (vm_page_wired(p)) {
 wired:
 			if ((options & OBJPR_NOTMAPPED) == 0 &&
 			    object->ref_count != 0)
 				pmap_remove_all(p);
 			if ((options & OBJPR_CLEANONLY) == 0) {
 				vm_page_invalid(p);
 				vm_page_undirty(p);
 			}
 			vm_page_xunbusy(p);
 			continue;
 		}
 		KASSERT((p->flags & PG_FICTITIOUS) == 0,
 		    ("vm_object_page_remove: page %p is fictitious", p));
 		if ((options & OBJPR_CLEANONLY) != 0 &&
 		    !vm_page_none_valid(p)) {
 			if ((options & OBJPR_NOTMAPPED) == 0 &&
 			    object->ref_count != 0 &&
 			    !vm_page_try_remove_write(p))
 				goto wired;
 			if (p->dirty != 0) {
 				vm_page_xunbusy(p);
 				continue;
 			}
 		}
 		if ((options & OBJPR_NOTMAPPED) == 0 &&
 		    object->ref_count != 0 && !vm_page_try_remove_all(p))
 			goto wired;
 		vm_page_free(p);
 	}
 	vm_object_pip_wakeup(object);
 
 	if (object->type == OBJT_SWAP) {
 		if (end == 0)
 			end = object->size;
 		swap_pager_freespace(object, start, end - start);
 	}
 }
 
 /*
  *	vm_object_page_noreuse:
  *
  *	For the given object, attempt to move the specified pages to
  *	the head of the inactive queue.  This bypasses regular LRU
  *	operation and allows the pages to be reused quickly under memory
  *	pressure.  If a page is wired for any reason, then it will not
  *	be queued.  Pages are specified by the range ["start", "end").
  *	As a special case, if "end" is zero, then the range extends from
  *	"start" to the end of the object.
  *
  *	This operation should only be performed on objects that
  *	contain non-fictitious, managed pages.
  *
  *	The object must be locked.
  */
 void
 vm_object_page_noreuse(vm_object_t object, vm_pindex_t start, vm_pindex_t end)
 {
 	vm_page_t p, next;
 
 	VM_OBJECT_ASSERT_LOCKED(object);
 	KASSERT((object->flags & (OBJ_FICTITIOUS | OBJ_UNMANAGED)) == 0,
 	    ("vm_object_page_noreuse: illegal object %p", object));
 	if (object->resident_page_count == 0)
 		return;
 	p = vm_page_find_least(object, start);
 
 	/*
 	 * Here, the variable "p" is either (1) the page with the least pindex
 	 * greater than or equal to the parameter "start" or (2) NULL. 
 	 */
 	for (; p != NULL && (p->pindex < end || end == 0); p = next) {
 		next = TAILQ_NEXT(p, listq);
 		vm_page_deactivate_noreuse(p);
 	}
 }
 
 /*
  *	Populate the specified range of the object with valid pages.  Returns
  *	TRUE if the range is successfully populated and FALSE otherwise.
  *
  *	Note: This function should be optimized to pass a larger array of
  *	pages to vm_pager_get_pages() before it is applied to a non-
  *	OBJT_DEVICE object.
  *
  *	The object must be locked.
  */
 boolean_t
 vm_object_populate(vm_object_t object, vm_pindex_t start, vm_pindex_t end)
 {
 	vm_page_t m;
 	vm_pindex_t pindex;
 	int rv;
 
 	VM_OBJECT_ASSERT_WLOCKED(object);
 	for (pindex = start; pindex < end; pindex++) {
 		rv = vm_page_grab_valid(&m, object, pindex, VM_ALLOC_NORMAL);
 		if (rv != VM_PAGER_OK)
 			break;
 
 		/*
 		 * Keep "m" busy because a subsequent iteration may unlock
 		 * the object.
 		 */
 	}
 	if (pindex > start) {
 		m = vm_page_lookup(object, start);
 		while (m != NULL && m->pindex < pindex) {
 			vm_page_xunbusy(m);
 			m = TAILQ_NEXT(m, listq);
 		}
 	}
 	return (pindex == end);
 }
 
 /*
  *	Routine:	vm_object_coalesce
  *	Function:	Coalesces two objects backing up adjoining
  *			regions of memory into a single object.
  *
  *	returns TRUE if objects were combined.
  *
  *	NOTE:	Only works at the moment if the second object is NULL -
  *		if it's not, which object do we lock first?
  *
  *	Parameters:
  *		prev_object	First object to coalesce
  *		prev_offset	Offset into prev_object
  *		prev_size	Size of reference to prev_object
  *		next_size	Size of reference to the second object
  *		reserved	Indicator that extension region has
  *				swap accounted for
  *
  *	Conditions:
  *	The object must *not* be locked.
  */
 boolean_t
 vm_object_coalesce(vm_object_t prev_object, vm_ooffset_t prev_offset,
     vm_size_t prev_size, vm_size_t next_size, boolean_t reserved)
 {
 	vm_pindex_t next_pindex;
 
 	if (prev_object == NULL)
 		return (TRUE);
 	if ((prev_object->flags & OBJ_ANON) == 0)
 		return (FALSE);
 
 	VM_OBJECT_WLOCK(prev_object);
 	/*
 	 * Try to collapse the object first.
 	 */
 	vm_object_collapse(prev_object);
 
 	/*
 	 * Can't coalesce if: . more than one reference . paged out . shadows
 	 * another object . has a copy elsewhere (any of which mean that the
 	 * pages not mapped to prev_entry may be in use anyway)
 	 */
 	if (prev_object->backing_object != NULL) {
 		VM_OBJECT_WUNLOCK(prev_object);
 		return (FALSE);
 	}
 
 	prev_size >>= PAGE_SHIFT;
 	next_size >>= PAGE_SHIFT;
 	next_pindex = OFF_TO_IDX(prev_offset) + prev_size;
 
 	if (prev_object->ref_count > 1 &&
 	    prev_object->size != next_pindex &&
 	    (prev_object->flags & OBJ_ONEMAPPING) == 0) {
 		VM_OBJECT_WUNLOCK(prev_object);
 		return (FALSE);
 	}
 
 	/*
 	 * Account for the charge.
 	 */
 	if (prev_object->cred != NULL) {
 		/*
 		 * If prev_object was charged, then this mapping,
 		 * although not charged now, may become writable
 		 * later. Non-NULL cred in the object would prevent
 		 * swap reservation during enabling of the write
 		 * access, so reserve swap now. Failed reservation
 		 * cause allocation of the separate object for the map
 		 * entry, and swap reservation for this entry is
 		 * managed in appropriate time.
 		 */
 		if (!reserved && !swap_reserve_by_cred(ptoa(next_size),
 		    prev_object->cred)) {
 			VM_OBJECT_WUNLOCK(prev_object);
 			return (FALSE);
 		}
 		prev_object->charge += ptoa(next_size);
 	}
 
 	/*
 	 * Remove any pages that may still be in the object from a previous
 	 * deallocation.
 	 */
 	if (next_pindex < prev_object->size) {
 		vm_object_page_remove(prev_object, next_pindex, next_pindex +
 		    next_size, 0);
 #if 0
 		if (prev_object->cred != NULL) {
 			KASSERT(prev_object->charge >=
 			    ptoa(prev_object->size - next_pindex),
 			    ("object %p overcharged 1 %jx %jx", prev_object,
 				(uintmax_t)next_pindex, (uintmax_t)next_size));
 			prev_object->charge -= ptoa(prev_object->size -
 			    next_pindex);
 		}
 #endif
 	}
 
 	/*
 	 * Extend the object if necessary.
 	 */
 	if (next_pindex + next_size > prev_object->size)
 		prev_object->size = next_pindex + next_size;
 
 	VM_OBJECT_WUNLOCK(prev_object);
 	return (TRUE);
 }
 
 void
 vm_object_set_writeable_dirty(vm_object_t object)
 {
 
 	/* Only set for vnodes & tmpfs */
 	if (object->type != OBJT_VNODE &&
 	    (object->flags & OBJ_TMPFS_NODE) == 0)
 		return;
 	atomic_add_int(&object->generation, 1);
 }
 
 /*
  *	vm_object_unwire:
  *
  *	For each page offset within the specified range of the given object,
  *	find the highest-level page in the shadow chain and unwire it.  A page
  *	must exist at every page offset, and the highest-level page must be
  *	wired.
  */
 void
 vm_object_unwire(vm_object_t object, vm_ooffset_t offset, vm_size_t length,
     uint8_t queue)
 {
 	vm_object_t tobject, t1object;
 	vm_page_t m, tm;
 	vm_pindex_t end_pindex, pindex, tpindex;
 	int depth, locked_depth;
 
 	KASSERT((offset & PAGE_MASK) == 0,
 	    ("vm_object_unwire: offset is not page aligned"));
 	KASSERT((length & PAGE_MASK) == 0,
 	    ("vm_object_unwire: length is not a multiple of PAGE_SIZE"));
 	/* The wired count of a fictitious page never changes. */
 	if ((object->flags & OBJ_FICTITIOUS) != 0)
 		return;
 	pindex = OFF_TO_IDX(offset);
 	end_pindex = pindex + atop(length);
 again:
 	locked_depth = 1;
 	VM_OBJECT_RLOCK(object);
 	m = vm_page_find_least(object, pindex);
 	while (pindex < end_pindex) {
 		if (m == NULL || pindex < m->pindex) {
 			/*
 			 * The first object in the shadow chain doesn't
 			 * contain a page at the current index.  Therefore,
 			 * the page must exist in a backing object.
 			 */
 			tobject = object;
 			tpindex = pindex;
 			depth = 0;
 			do {
 				tpindex +=
 				    OFF_TO_IDX(tobject->backing_object_offset);
 				tobject = tobject->backing_object;
 				KASSERT(tobject != NULL,
 				    ("vm_object_unwire: missing page"));
 				if ((tobject->flags & OBJ_FICTITIOUS) != 0)
 					goto next_page;
 				depth++;
 				if (depth == locked_depth) {
 					locked_depth++;
 					VM_OBJECT_RLOCK(tobject);
 				}
 			} while ((tm = vm_page_lookup(tobject, tpindex)) ==
 			    NULL);
 		} else {
 			tm = m;
 			m = TAILQ_NEXT(m, listq);
 		}
 		if (vm_page_trysbusy(tm) == 0) {
 			for (tobject = object; locked_depth >= 1;
 			    locked_depth--) {
 				t1object = tobject->backing_object;
 				if (tm->object != tobject)
 					VM_OBJECT_RUNLOCK(tobject);
 				tobject = t1object;
 			}
 			vm_page_busy_sleep(tm, "unwbo", true);
 			goto again;
 		}
 		vm_page_unwire(tm, queue);
 		vm_page_sunbusy(tm);
 next_page:
 		pindex++;
 	}
 	/* Release the accumulated object locks. */
 	for (tobject = object; locked_depth >= 1; locked_depth--) {
 		t1object = tobject->backing_object;
 		VM_OBJECT_RUNLOCK(tobject);
 		tobject = t1object;
 	}
 }
 
 /*
  * Return the vnode for the given object, or NULL if none exists.
  * For tmpfs objects, the function may return NULL if there is
  * no vnode allocated at the time of the call.
  */
 struct vnode *
 vm_object_vnode(vm_object_t object)
 {
 	struct vnode *vp;
 
 	VM_OBJECT_ASSERT_LOCKED(object);
 	if (object->type == OBJT_VNODE) {
 		vp = object->handle;
 		KASSERT(vp != NULL, ("%s: OBJT_VNODE has no vnode", __func__));
 	} else if (object->type == OBJT_SWAP &&
 	    (object->flags & OBJ_TMPFS) != 0) {
 		vp = object->un_pager.swp.swp_tmpfs;
 		KASSERT(vp != NULL, ("%s: OBJT_TMPFS has no vnode", __func__));
 	} else {
 		vp = NULL;
 	}
 	return (vp);
 }
 
 /*
  * Busy the vm object.  This prevents new pages belonging to the object from
  * becoming busy.  Existing pages persist as busy.  Callers are responsible
  * for checking page state before proceeding.
  */
 void
 vm_object_busy(vm_object_t obj)
 {
 
 	VM_OBJECT_ASSERT_LOCKED(obj);
 
 	blockcount_acquire(&obj->busy, 1);
 	/* The fence is required to order loads of page busy. */
 	atomic_thread_fence_acq_rel();
 }
 
 void
 vm_object_unbusy(vm_object_t obj)
 {
 
 	blockcount_release(&obj->busy, 1);
 }
 
 void
 vm_object_busy_wait(vm_object_t obj, const char *wmesg)
 {
 
 	VM_OBJECT_ASSERT_UNLOCKED(obj);
 
 	(void)blockcount_sleep(&obj->busy, NULL, wmesg, PVM);
 }
 
 /*
  * Return the kvme type of the given object.
  * If vpp is not NULL, set it to the object's vm_object_vnode() or NULL.
  */
 int
 vm_object_kvme_type(vm_object_t object, struct vnode **vpp)
 {
 
 	VM_OBJECT_ASSERT_LOCKED(object);
 	if (vpp != NULL)
 		*vpp = vm_object_vnode(object);
 	switch (object->type) {
 	case OBJT_DEFAULT:
 		return (KVME_TYPE_DEFAULT);
 	case OBJT_VNODE:
 		return (KVME_TYPE_VNODE);
 	case OBJT_SWAP:
 		if ((object->flags & OBJ_TMPFS_NODE) != 0)
 			return (KVME_TYPE_VNODE);
 		return (KVME_TYPE_SWAP);
 	case OBJT_DEVICE:
 		return (KVME_TYPE_DEVICE);
 	case OBJT_PHYS:
 		return (KVME_TYPE_PHYS);
 	case OBJT_DEAD:
 		return (KVME_TYPE_DEAD);
 	case OBJT_SG:
 		return (KVME_TYPE_SG);
 	case OBJT_MGTDEVICE:
 		return (KVME_TYPE_MGTDEVICE);
 	default:
 		return (KVME_TYPE_UNKNOWN);
 	}
 }
 
 static int
 sysctl_vm_object_list(SYSCTL_HANDLER_ARGS)
 {
 	struct kinfo_vmobject *kvo;
 	char *fullpath, *freepath;
 	struct vnode *vp;
 	struct vattr va;
 	vm_object_t obj;
 	vm_page_t m;
+	u_long sp;
 	int count, error;
 
 	if (req->oldptr == NULL) {
 		/*
 		 * If an old buffer has not been provided, generate an
 		 * estimate of the space needed for a subsequent call.
 		 */
 		mtx_lock(&vm_object_list_mtx);
 		count = 0;
 		TAILQ_FOREACH(obj, &vm_object_list, object_list) {
 			if (obj->type == OBJT_DEAD)
 				continue;
 			count++;
 		}
 		mtx_unlock(&vm_object_list_mtx);
 		return (SYSCTL_OUT(req, NULL, sizeof(struct kinfo_vmobject) *
 		    count * 11 / 10));
 	}
 
 	kvo = malloc(sizeof(*kvo), M_TEMP, M_WAITOK);
 	error = 0;
 
 	/*
 	 * VM objects are type stable and are never removed from the
 	 * list once added.  This allows us to safely read obj->object_list
 	 * after reacquiring the VM object lock.
 	 */
 	mtx_lock(&vm_object_list_mtx);
 	TAILQ_FOREACH(obj, &vm_object_list, object_list) {
 		if (obj->type == OBJT_DEAD)
 			continue;
 		VM_OBJECT_RLOCK(obj);
 		if (obj->type == OBJT_DEAD) {
 			VM_OBJECT_RUNLOCK(obj);
 			continue;
 		}
 		mtx_unlock(&vm_object_list_mtx);
 		kvo->kvo_size = ptoa(obj->size);
 		kvo->kvo_resident = obj->resident_page_count;
 		kvo->kvo_ref_count = obj->ref_count;
 		kvo->kvo_shadow_count = obj->shadow_count;
 		kvo->kvo_memattr = obj->memattr;
 		kvo->kvo_active = 0;
 		kvo->kvo_inactive = 0;
 		TAILQ_FOREACH(m, &obj->memq, listq) {
 			/*
 			 * A page may belong to the object but be
 			 * dequeued and set to PQ_NONE while the
 			 * object lock is not held.  This makes the
 			 * reads of m->queue below racy, and we do not
 			 * count pages set to PQ_NONE.  However, this
 			 * sysctl is only meant to give an
 			 * approximation of the system anyway.
 			 */
 			if (m->a.queue == PQ_ACTIVE)
 				kvo->kvo_active++;
 			else if (m->a.queue == PQ_INACTIVE)
 				kvo->kvo_inactive++;
 		}
 
 		kvo->kvo_vn_fileid = 0;
 		kvo->kvo_vn_fsid = 0;
 		kvo->kvo_vn_fsid_freebsd11 = 0;
 		freepath = NULL;
 		fullpath = "";
 		kvo->kvo_type = vm_object_kvme_type(obj, &vp);
-		if (vp != NULL)
+		if (vp != NULL) {
 			vref(vp);
+		} else if ((obj->flags & OBJ_ANON) != 0) {
+			MPASS(kvo->kvo_type == KVME_TYPE_DEFAULT ||
+			    kvo->kvo_type == KVME_TYPE_SWAP);
+			kvo->kvo_me = (uintptr_t)obj;
+			/* tmpfs objs are reported as vnodes */
+			kvo->kvo_backing_obj = (uintptr_t)obj->backing_object;
+			sp = swap_pager_swapped_pages(obj);
+			kvo->kvo_swapped = sp > UINT32_MAX ? UINT32_MAX : sp;
+		}
 		VM_OBJECT_RUNLOCK(obj);
 		if (vp != NULL) {
 			vn_fullpath(vp, &fullpath, &freepath);
 			vn_lock(vp, LK_SHARED | LK_RETRY);
 			if (VOP_GETATTR(vp, &va, curthread->td_ucred) == 0) {
 				kvo->kvo_vn_fileid = va.va_fileid;
 				kvo->kvo_vn_fsid = va.va_fsid;
 				kvo->kvo_vn_fsid_freebsd11 = va.va_fsid;
 								/* truncate */
 			}
 			vput(vp);
 		}
 
 		strlcpy(kvo->kvo_path, fullpath, sizeof(kvo->kvo_path));
 		if (freepath != NULL)
 			free(freepath, M_TEMP);
 
 		/* Pack record size down */
 		kvo->kvo_structsize = offsetof(struct kinfo_vmobject, kvo_path)
 		    + strlen(kvo->kvo_path) + 1;
 		kvo->kvo_structsize = roundup(kvo->kvo_structsize,
 		    sizeof(uint64_t));
 		error = SYSCTL_OUT(req, kvo, kvo->kvo_structsize);
 		mtx_lock(&vm_object_list_mtx);
 		if (error)
 			break;
 	}
 	mtx_unlock(&vm_object_list_mtx);
 	free(kvo, M_TEMP);
 	return (error);
 }
 SYSCTL_PROC(_vm, OID_AUTO, objects, CTLTYPE_STRUCT | CTLFLAG_RW | CTLFLAG_SKIP |
     CTLFLAG_MPSAFE, NULL, 0, sysctl_vm_object_list, "S,kinfo_vmobject",
     "List of VM objects");
 
 #include "opt_ddb.h"
 #ifdef DDB
 #include <sys/kernel.h>
 
 #include <sys/cons.h>
 
 #include <ddb/ddb.h>
 
 static int
 _vm_object_in_map(vm_map_t map, vm_object_t object, vm_map_entry_t entry)
 {
 	vm_map_t tmpm;
 	vm_map_entry_t tmpe;
 	vm_object_t obj;
 
 	if (map == 0)
 		return 0;
 
 	if (entry == 0) {
 		VM_MAP_ENTRY_FOREACH(tmpe, map) {
 			if (_vm_object_in_map(map, object, tmpe)) {
 				return 1;
 			}
 		}
 	} else if (entry->eflags & MAP_ENTRY_IS_SUB_MAP) {
 		tmpm = entry->object.sub_map;
 		VM_MAP_ENTRY_FOREACH(tmpe, tmpm) {
 			if (_vm_object_in_map(tmpm, object, tmpe)) {
 				return 1;
 			}
 		}
 	} else if ((obj = entry->object.vm_object) != NULL) {
 		for (; obj; obj = obj->backing_object)
 			if (obj == object) {
 				return 1;
 			}
 	}
 	return 0;
 }
 
 static int
 vm_object_in_map(vm_object_t object)
 {
 	struct proc *p;
 
 	/* sx_slock(&allproc_lock); */
 	FOREACH_PROC_IN_SYSTEM(p) {
 		if (!p->p_vmspace /* || (p->p_flag & (P_SYSTEM|P_WEXIT)) */)
 			continue;
 		if (_vm_object_in_map(&p->p_vmspace->vm_map, object, 0)) {
 			/* sx_sunlock(&allproc_lock); */
 			return 1;
 		}
 	}
 	/* sx_sunlock(&allproc_lock); */
 	if (_vm_object_in_map(kernel_map, object, 0))
 		return 1;
 	return 0;
 }
 
 DB_SHOW_COMMAND(vmochk, vm_object_check)
 {
 	vm_object_t object;
 
 	/*
 	 * make sure that internal objs are in a map somewhere
 	 * and none have zero ref counts.
 	 */
 	TAILQ_FOREACH(object, &vm_object_list, object_list) {
 		if ((object->flags & OBJ_ANON) != 0) {
 			if (object->ref_count == 0) {
 				db_printf("vmochk: internal obj has zero ref count: %ld\n",
 					(long)object->size);
 			}
 			if (!vm_object_in_map(object)) {
 				db_printf(
 			"vmochk: internal obj is not in a map: "
 			"ref: %d, size: %lu: 0x%lx, backing_object: %p\n",
 				    object->ref_count, (u_long)object->size, 
 				    (u_long)object->size,
 				    (void *)object->backing_object);
 			}
 		}
 		if (db_pager_quit)
 			return;
 	}
 }
 
 /*
  *	vm_object_print:	[ debug ]
  */
 DB_SHOW_COMMAND(object, vm_object_print_static)
 {
 	/* XXX convert args. */
 	vm_object_t object = (vm_object_t)addr;
 	boolean_t full = have_addr;
 
 	vm_page_t p;
 
 	/* XXX count is an (unused) arg.  Avoid shadowing it. */
 #define	count	was_count
 
 	int count;
 
 	if (object == NULL)
 		return;
 
 	db_iprintf(
 	    "Object %p: type=%d, size=0x%jx, res=%d, ref=%d, flags=0x%x ruid %d charge %jx\n",
 	    object, (int)object->type, (uintmax_t)object->size,
 	    object->resident_page_count, object->ref_count, object->flags,
 	    object->cred ? object->cred->cr_ruid : -1, (uintmax_t)object->charge);
 	db_iprintf(" sref=%d, backing_object(%d)=(%p)+0x%jx\n",
 	    object->shadow_count, 
 	    object->backing_object ? object->backing_object->ref_count : 0,
 	    object->backing_object, (uintmax_t)object->backing_object_offset);
 
 	if (!full)
 		return;
 
 	db_indent += 2;
 	count = 0;
 	TAILQ_FOREACH(p, &object->memq, listq) {
 		if (count == 0)
 			db_iprintf("memory:=");
 		else if (count == 6) {
 			db_printf("\n");
 			db_iprintf(" ...");
 			count = 0;
 		} else
 			db_printf(",");
 		count++;
 
 		db_printf("(off=0x%jx,page=0x%jx)",
 		    (uintmax_t)p->pindex, (uintmax_t)VM_PAGE_TO_PHYS(p));
 
 		if (db_pager_quit)
 			break;
 	}
 	if (count != 0)
 		db_printf("\n");
 	db_indent -= 2;
 }
 
 /* XXX. */
 #undef count
 
 /* XXX need this non-static entry for calling from vm_map_print. */
 void
 vm_object_print(
         /* db_expr_t */ long addr,
 	boolean_t have_addr,
 	/* db_expr_t */ long count,
 	char *modif)
 {
 	vm_object_print_static(addr, have_addr, count, modif);
 }
 
 DB_SHOW_COMMAND(vmopag, vm_object_print_pages)
 {
 	vm_object_t object;
 	vm_pindex_t fidx;
 	vm_paddr_t pa;
 	vm_page_t m, prev_m;
 	int rcount, nl, c;
 
 	nl = 0;
 	TAILQ_FOREACH(object, &vm_object_list, object_list) {
 		db_printf("new object: %p\n", (void *)object);
 		if (nl > 18) {
 			c = cngetc();
 			if (c != ' ')
 				return;
 			nl = 0;
 		}
 		nl++;
 		rcount = 0;
 		fidx = 0;
 		pa = -1;
 		TAILQ_FOREACH(m, &object->memq, listq) {
 			if (m->pindex > 128)
 				break;
 			if ((prev_m = TAILQ_PREV(m, pglist, listq)) != NULL &&
 			    prev_m->pindex + 1 != m->pindex) {
 				if (rcount) {
 					db_printf(" index(%ld)run(%d)pa(0x%lx)\n",
 						(long)fidx, rcount, (long)pa);
 					if (nl > 18) {
 						c = cngetc();
 						if (c != ' ')
 							return;
 						nl = 0;
 					}
 					nl++;
 					rcount = 0;
 				}
 			}				
 			if (rcount &&
 				(VM_PAGE_TO_PHYS(m) == pa + rcount * PAGE_SIZE)) {
 				++rcount;
 				continue;
 			}
 			if (rcount) {
 				db_printf(" index(%ld)run(%d)pa(0x%lx)\n",
 					(long)fidx, rcount, (long)pa);
 				if (nl > 18) {
 					c = cngetc();
 					if (c != ' ')
 						return;
 					nl = 0;
 				}
 				nl++;
 			}
 			fidx = m->pindex;
 			pa = VM_PAGE_TO_PHYS(m);
 			rcount = 1;
 		}
 		if (rcount) {
 			db_printf(" index(%ld)run(%d)pa(0x%lx)\n",
 				(long)fidx, rcount, (long)pa);
 			if (nl > 18) {
 				c = cngetc();
 				if (c != ' ')
 					return;
 				nl = 0;
 			}
 			nl++;
 		}
 	}
 }
 #endif /* DDB */