diff --git a/sys/amd64/amd64/exec_machdep.c b/sys/amd64/amd64/exec_machdep.c
index 1e537cad43f4..50de0421922a 100644
--- a/sys/amd64/amd64/exec_machdep.c
+++ b/sys/amd64/amd64/exec_machdep.c
@@ -1,976 +1,977 @@
 /*-
  * SPDX-License-Identifier: BSD-4-Clause
  *
  * Copyright (c) 2003 Peter Wemm.
  * Copyright (c) 1992 Terrence R. Lambert.
  * Copyright (c) 1982, 1987, 1990 The Regents of the University of California.
  * All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * William Jolitz.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from: @(#)machdep.c	7.4 (Berkeley) 6/3/91
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_cpu.h"
 #include "opt_ddb.h"
 #include "opt_kstack_pages.h"
 
 #include <sys/param.h>
 #include <sys/proc.h>
 #include <sys/systm.h>
 #include <sys/exec.h>
 #include <sys/imgact.h>
 #include <sys/kdb.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/linker.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/pcpu.h>
 #include <sys/reg.h>
 #include <sys/rwlock.h>
 #include <sys/signalvar.h>
 #ifdef SMP
 #include <sys/smp.h>
 #endif
 #include <sys/syscallsubr.h>
 #include <sys/sysctl.h>
 #include <sys/sysent.h>
 #include <sys/sysproto.h>
 #include <sys/ucontext.h>
 #include <sys/vmmeter.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/vm_extern.h>
 #include <vm/pmap.h>
+#include <vm/vm_map.h>
 
 #ifdef DDB
 #ifndef KDB
 #error KDB must be enabled in order for DDB to work!
 #endif
 #include <ddb/ddb.h>
 #include <ddb/db_sym.h>
 #endif
 
 #include <machine/vmparam.h>
 #include <machine/frame.h>
 #include <machine/md_var.h>
 #include <machine/pcb.h>
 #include <machine/proc.h>
 #include <machine/sigframe.h>
 #include <machine/specialreg.h>
 #include <machine/trap.h>
 
 _Static_assert(sizeof(mcontext_t) == 800, "mcontext_t size incorrect");
 _Static_assert(sizeof(ucontext_t) == 880, "ucontext_t size incorrect");
 _Static_assert(sizeof(siginfo_t) == 80, "siginfo_t size incorrect");
 
 /*
  * Send an interrupt to process.
  *
  * Stack is set up to allow sigcode stored at top to call routine,
  * followed by call to sigreturn routine below.  After sigreturn
  * resets the signal mask, the stack, and the frame pointer, it
  * returns to the user specified pc, psl.
  */
 void
 sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
 {
 	struct sigframe sf, *sfp;
 	struct pcb *pcb;
 	struct proc *p;
 	struct thread *td;
 	struct sigacts *psp;
 	char *sp;
 	struct trapframe *regs;
 	char *xfpusave;
 	size_t xfpusave_len;
 	int sig;
 	int oonstack;
 
 	td = curthread;
 	pcb = td->td_pcb;
 	p = td->td_proc;
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	sig = ksi->ksi_signo;
 	psp = p->p_sigacts;
 	mtx_assert(&psp->ps_mtx, MA_OWNED);
 	regs = td->td_frame;
 	oonstack = sigonstack(regs->tf_rsp);
 
 	/* Save user context. */
 	bzero(&sf, sizeof(sf));
 	sf.sf_uc.uc_sigmask = *mask;
 	sf.sf_uc.uc_stack = td->td_sigstk;
 	sf.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
 	    ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE;
 	sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0;
 	bcopy(regs, &sf.sf_uc.uc_mcontext.mc_rdi, sizeof(*regs));
 	sf.sf_uc.uc_mcontext.mc_len = sizeof(sf.sf_uc.uc_mcontext); /* magic */
 	get_fpcontext(td, &sf.sf_uc.uc_mcontext, &xfpusave, &xfpusave_len);
 	update_pcb_bases(pcb);
 	sf.sf_uc.uc_mcontext.mc_fsbase = pcb->pcb_fsbase;
 	sf.sf_uc.uc_mcontext.mc_gsbase = pcb->pcb_gsbase;
 	bzero(sf.sf_uc.uc_mcontext.mc_spare,
 	    sizeof(sf.sf_uc.uc_mcontext.mc_spare));
 
 	/* Allocate space for the signal handler context. */
 	if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack &&
 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
 		sp = (char *)td->td_sigstk.ss_sp + td->td_sigstk.ss_size;
 #if defined(COMPAT_43)
 		td->td_sigstk.ss_flags |= SS_ONSTACK;
 #endif
 	} else
 		sp = (char *)regs->tf_rsp - 128;
 	if (xfpusave != NULL) {
 		sp -= xfpusave_len;
 		sp = (char *)((unsigned long)sp & ~0x3Ful);
 		sf.sf_uc.uc_mcontext.mc_xfpustate = (register_t)sp;
 	}
 	sp -= sizeof(struct sigframe);
 	/* Align to 16 bytes. */
 	sfp = (struct sigframe *)((unsigned long)sp & ~0xFul);
 
 	/* Build the argument list for the signal handler. */
 	regs->tf_rdi = sig;			/* arg 1 in %rdi */
 	regs->tf_rdx = (register_t)&sfp->sf_uc;	/* arg 3 in %rdx */
 	bzero(&sf.sf_si, sizeof(sf.sf_si));
 	if (SIGISMEMBER(psp->ps_siginfo, sig)) {
 		/* Signal handler installed with SA_SIGINFO. */
 		regs->tf_rsi = (register_t)&sfp->sf_si;	/* arg 2 in %rsi */
 		sf.sf_ahu.sf_action = (__siginfohandler_t *)catcher;
 
 		/* Fill in POSIX parts */
 		sf.sf_si = ksi->ksi_info;
 		sf.sf_si.si_signo = sig; /* maybe a translated signal */
 		regs->tf_rcx = (register_t)ksi->ksi_addr; /* arg 4 in %rcx */
 	} else {
 		/* Old FreeBSD-style arguments. */
 		regs->tf_rsi = ksi->ksi_code;	/* arg 2 in %rsi */
 		regs->tf_rcx = (register_t)ksi->ksi_addr; /* arg 4 in %rcx */
 		sf.sf_ahu.sf_handler = catcher;
 	}
 	mtx_unlock(&psp->ps_mtx);
 	PROC_UNLOCK(p);
 
 	/*
 	 * Copy the sigframe out to the user's stack.
 	 */
 	if (copyout(&sf, sfp, sizeof(*sfp)) != 0 ||
 	    (xfpusave != NULL && copyout(xfpusave,
 	    (void *)sf.sf_uc.uc_mcontext.mc_xfpustate, xfpusave_len)
 	    != 0)) {
 		uprintf("pid %d comm %s has trashed its stack, killing\n",
 		    p->p_pid, p->p_comm);
 		PROC_LOCK(p);
 		sigexit(td, SIGILL);
 	}
 
 	fpstate_drop(td);
 	regs->tf_rsp = (long)sfp;
 	regs->tf_rip = PROC_SIGCODE(p);
 	regs->tf_rflags &= ~(PSL_T | PSL_D);
 	regs->tf_cs = _ucodesel;
 	regs->tf_ds = _udatasel;
 	regs->tf_ss = _udatasel;
 	regs->tf_es = _udatasel;
 	regs->tf_fs = _ufssel;
 	regs->tf_gs = _ugssel;
 	regs->tf_flags = TF_HASSEGS;
 	PROC_LOCK(p);
 	mtx_lock(&psp->ps_mtx);
 }
 
 /*
  * System call to cleanup state after a signal
  * has been taken.  Reset signal mask and
  * stack state from context left by sendsig (above).
  * Return to previous pc and psl as specified by
  * context left by sendsig. Check carefully to
  * make sure that the user has not modified the
  * state to gain improper privileges.
  */
 int
 sys_sigreturn(struct thread *td, struct sigreturn_args *uap)
 {
 	ucontext_t uc;
 	struct pcb *pcb;
 	struct proc *p;
 	struct trapframe *regs;
 	ucontext_t *ucp;
 	char *xfpustate;
 	size_t xfpustate_len;
 	long rflags;
 	int cs, error, ret;
 	ksiginfo_t ksi;
 
 	pcb = td->td_pcb;
 	p = td->td_proc;
 
 	error = copyin(uap->sigcntxp, &uc, sizeof(uc));
 	if (error != 0) {
 		uprintf("pid %d (%s): sigreturn copyin failed\n",
 		    p->p_pid, td->td_name);
 		return (error);
 	}
 	ucp = &uc;
 	if ((ucp->uc_mcontext.mc_flags & ~_MC_FLAG_MASK) != 0) {
 		uprintf("pid %d (%s): sigreturn mc_flags %x\n", p->p_pid,
 		    td->td_name, ucp->uc_mcontext.mc_flags);
 		return (EINVAL);
 	}
 	regs = td->td_frame;
 	rflags = ucp->uc_mcontext.mc_rflags;
 	/*
 	 * Don't allow users to change privileged or reserved flags.
 	 */
 	if (!EFL_SECURE(rflags, regs->tf_rflags)) {
 		uprintf("pid %d (%s): sigreturn rflags = 0x%lx\n", p->p_pid,
 		    td->td_name, rflags);
 		return (EINVAL);
 	}
 
 	/*
 	 * Don't allow users to load a valid privileged %cs.  Let the
 	 * hardware check for invalid selectors, excess privilege in
 	 * other selectors, invalid %eip's and invalid %esp's.
 	 */
 	cs = ucp->uc_mcontext.mc_cs;
 	if (!CS_SECURE(cs)) {
 		uprintf("pid %d (%s): sigreturn cs = 0x%x\n", p->p_pid,
 		    td->td_name, cs);
 		ksiginfo_init_trap(&ksi);
 		ksi.ksi_signo = SIGBUS;
 		ksi.ksi_code = BUS_OBJERR;
 		ksi.ksi_trapno = T_PROTFLT;
 		ksi.ksi_addr = (void *)regs->tf_rip;
 		trapsignal(td, &ksi);
 		return (EINVAL);
 	}
 
 	if ((uc.uc_mcontext.mc_flags & _MC_HASFPXSTATE) != 0) {
 		xfpustate_len = uc.uc_mcontext.mc_xfpustate_len;
 		if (xfpustate_len > cpu_max_ext_state_size -
 		    sizeof(struct savefpu)) {
 			uprintf("pid %d (%s): sigreturn xfpusave_len = 0x%zx\n",
 			    p->p_pid, td->td_name, xfpustate_len);
 			return (EINVAL);
 		}
 		xfpustate = (char *)fpu_save_area_alloc();
 		error = copyin((const void *)uc.uc_mcontext.mc_xfpustate,
 		    xfpustate, xfpustate_len);
 		if (error != 0) {
 			fpu_save_area_free((struct savefpu *)xfpustate);
 			uprintf(
 	"pid %d (%s): sigreturn copying xfpustate failed\n",
 			    p->p_pid, td->td_name);
 			return (error);
 		}
 	} else {
 		xfpustate = NULL;
 		xfpustate_len = 0;
 	}
 	ret = set_fpcontext(td, &ucp->uc_mcontext, xfpustate, xfpustate_len);
 	fpu_save_area_free((struct savefpu *)xfpustate);
 	if (ret != 0) {
 		uprintf("pid %d (%s): sigreturn set_fpcontext err %d\n",
 		    p->p_pid, td->td_name, ret);
 		return (ret);
 	}
 	bcopy(&ucp->uc_mcontext.mc_rdi, regs, sizeof(*regs));
 	update_pcb_bases(pcb);
 	pcb->pcb_fsbase = ucp->uc_mcontext.mc_fsbase;
 	pcb->pcb_gsbase = ucp->uc_mcontext.mc_gsbase;
 
 #if defined(COMPAT_43)
 	if (ucp->uc_mcontext.mc_onstack & 1)
 		td->td_sigstk.ss_flags |= SS_ONSTACK;
 	else
 		td->td_sigstk.ss_flags &= ~SS_ONSTACK;
 #endif
 
 	kern_sigprocmask(td, SIG_SETMASK, &ucp->uc_sigmask, NULL, 0);
 	return (EJUSTRETURN);
 }
 
 #ifdef COMPAT_FREEBSD4
 int
 freebsd4_sigreturn(struct thread *td, struct freebsd4_sigreturn_args *uap)
 {
 
 	return sys_sigreturn(td, (struct sigreturn_args *)uap);
 }
 #endif
 
 /*
  * Reset the hardware debug registers if they were in use.
  * They won't have any meaning for the newly exec'd process.
  */
 void
 x86_clear_dbregs(struct pcb *pcb)
 {
 	if ((pcb->pcb_flags & PCB_DBREGS) == 0)
 		return;
 
 	pcb->pcb_dr0 = 0;
 	pcb->pcb_dr1 = 0;
 	pcb->pcb_dr2 = 0;
 	pcb->pcb_dr3 = 0;
 	pcb->pcb_dr6 = 0;
 	pcb->pcb_dr7 = 0;
 
 	if (pcb == curpcb) {
 		/*
 		 * Clear the debug registers on the running CPU,
 		 * otherwise they will end up affecting the next
 		 * process we switch to.
 		 */
 		reset_dbregs();
 	}
 	clear_pcb_flags(pcb, PCB_DBREGS);
 }
 
 /*
  * Reset registers to default values on exec.
  */
 void
 exec_setregs(struct thread *td, struct image_params *imgp, uintptr_t stack)
 {
 	struct trapframe *regs;
 	struct pcb *pcb;
 	register_t saved_rflags;
 
 	regs = td->td_frame;
 	pcb = td->td_pcb;
 
 	if (td->td_proc->p_md.md_ldt != NULL)
 		user_ldt_free(td);
 
 	update_pcb_bases(pcb);
 	pcb->pcb_fsbase = 0;
 	pcb->pcb_gsbase = 0;
 	clear_pcb_flags(pcb, PCB_32BIT);
 	pcb->pcb_initial_fpucw = __INITIAL_FPUCW__;
 
 	saved_rflags = regs->tf_rflags & PSL_T;
 	bzero((char *)regs, sizeof(struct trapframe));
 	regs->tf_rip = imgp->entry_addr;
 	regs->tf_rsp = ((stack - 8) & ~0xFul) + 8;
 	regs->tf_rdi = stack;		/* argv */
 	regs->tf_rflags = PSL_USER | saved_rflags;
 	regs->tf_ss = _udatasel;
 	regs->tf_cs = _ucodesel;
 	regs->tf_ds = _udatasel;
 	regs->tf_es = _udatasel;
 	regs->tf_fs = _ufssel;
 	regs->tf_gs = _ugssel;
 	regs->tf_flags = TF_HASSEGS;
 
 	x86_clear_dbregs(pcb);
 
 	/*
 	 * Drop the FP state if we hold it, so that the process gets a
 	 * clean FP state if it uses the FPU again.
 	 */
 	fpstate_drop(td);
 }
 
 int
 fill_regs(struct thread *td, struct reg *regs)
 {
 	struct trapframe *tp;
 
 	tp = td->td_frame;
 	return (fill_frame_regs(tp, regs));
 }
 
 int
 fill_frame_regs(struct trapframe *tp, struct reg *regs)
 {
 
 	regs->r_r15 = tp->tf_r15;
 	regs->r_r14 = tp->tf_r14;
 	regs->r_r13 = tp->tf_r13;
 	regs->r_r12 = tp->tf_r12;
 	regs->r_r11 = tp->tf_r11;
 	regs->r_r10 = tp->tf_r10;
 	regs->r_r9  = tp->tf_r9;
 	regs->r_r8  = tp->tf_r8;
 	regs->r_rdi = tp->tf_rdi;
 	regs->r_rsi = tp->tf_rsi;
 	regs->r_rbp = tp->tf_rbp;
 	regs->r_rbx = tp->tf_rbx;
 	regs->r_rdx = tp->tf_rdx;
 	regs->r_rcx = tp->tf_rcx;
 	regs->r_rax = tp->tf_rax;
 	regs->r_rip = tp->tf_rip;
 	regs->r_cs = tp->tf_cs;
 	regs->r_rflags = tp->tf_rflags;
 	regs->r_rsp = tp->tf_rsp;
 	regs->r_ss = tp->tf_ss;
 	if (tp->tf_flags & TF_HASSEGS) {
 		regs->r_ds = tp->tf_ds;
 		regs->r_es = tp->tf_es;
 		regs->r_fs = tp->tf_fs;
 		regs->r_gs = tp->tf_gs;
 	} else {
 		regs->r_ds = 0;
 		regs->r_es = 0;
 		regs->r_fs = 0;
 		regs->r_gs = 0;
 	}
 	regs->r_err = 0;
 	regs->r_trapno = 0;
 	return (0);
 }
 
 int
 set_regs(struct thread *td, struct reg *regs)
 {
 	struct trapframe *tp;
 	register_t rflags;
 
 	tp = td->td_frame;
 	rflags = regs->r_rflags & 0xffffffff;
 	if (!EFL_SECURE(rflags, tp->tf_rflags) || !CS_SECURE(regs->r_cs))
 		return (EINVAL);
 	tp->tf_r15 = regs->r_r15;
 	tp->tf_r14 = regs->r_r14;
 	tp->tf_r13 = regs->r_r13;
 	tp->tf_r12 = regs->r_r12;
 	tp->tf_r11 = regs->r_r11;
 	tp->tf_r10 = regs->r_r10;
 	tp->tf_r9  = regs->r_r9;
 	tp->tf_r8  = regs->r_r8;
 	tp->tf_rdi = regs->r_rdi;
 	tp->tf_rsi = regs->r_rsi;
 	tp->tf_rbp = regs->r_rbp;
 	tp->tf_rbx = regs->r_rbx;
 	tp->tf_rdx = regs->r_rdx;
 	tp->tf_rcx = regs->r_rcx;
 	tp->tf_rax = regs->r_rax;
 	tp->tf_rip = regs->r_rip;
 	tp->tf_cs = regs->r_cs;
 	tp->tf_rflags = rflags;
 	tp->tf_rsp = regs->r_rsp;
 	tp->tf_ss = regs->r_ss;
 	if (0) {	/* XXXKIB */
 		tp->tf_ds = regs->r_ds;
 		tp->tf_es = regs->r_es;
 		tp->tf_fs = regs->r_fs;
 		tp->tf_gs = regs->r_gs;
 		tp->tf_flags = TF_HASSEGS;
 	}
 	set_pcb_flags(td->td_pcb, PCB_FULL_IRET);
 	return (0);
 }
 
 /* XXX check all this stuff! */
 /* externalize from sv_xmm */
 static void
 fill_fpregs_xmm(struct savefpu *sv_xmm, struct fpreg *fpregs)
 {
 	struct envxmm *penv_fpreg = (struct envxmm *)&fpregs->fpr_env;
 	struct envxmm *penv_xmm = &sv_xmm->sv_env;
 	int i;
 
 	/* pcb -> fpregs */
 	bzero(fpregs, sizeof(*fpregs));
 
 	/* FPU control/status */
 	penv_fpreg->en_cw = penv_xmm->en_cw;
 	penv_fpreg->en_sw = penv_xmm->en_sw;
 	penv_fpreg->en_tw = penv_xmm->en_tw;
 	penv_fpreg->en_opcode = penv_xmm->en_opcode;
 	penv_fpreg->en_rip = penv_xmm->en_rip;
 	penv_fpreg->en_rdp = penv_xmm->en_rdp;
 	penv_fpreg->en_mxcsr = penv_xmm->en_mxcsr;
 	penv_fpreg->en_mxcsr_mask = penv_xmm->en_mxcsr_mask;
 
 	/* FPU registers */
 	for (i = 0; i < 8; ++i)
 		bcopy(sv_xmm->sv_fp[i].fp_acc.fp_bytes, fpregs->fpr_acc[i], 10);
 
 	/* SSE registers */
 	for (i = 0; i < 16; ++i)
 		bcopy(sv_xmm->sv_xmm[i].xmm_bytes, fpregs->fpr_xacc[i], 16);
 }
 
 /* internalize from fpregs into sv_xmm */
 static void
 set_fpregs_xmm(struct fpreg *fpregs, struct savefpu *sv_xmm)
 {
 	struct envxmm *penv_xmm = &sv_xmm->sv_env;
 	struct envxmm *penv_fpreg = (struct envxmm *)&fpregs->fpr_env;
 	int i;
 
 	/* fpregs -> pcb */
 	/* FPU control/status */
 	penv_xmm->en_cw = penv_fpreg->en_cw;
 	penv_xmm->en_sw = penv_fpreg->en_sw;
 	penv_xmm->en_tw = penv_fpreg->en_tw;
 	penv_xmm->en_opcode = penv_fpreg->en_opcode;
 	penv_xmm->en_rip = penv_fpreg->en_rip;
 	penv_xmm->en_rdp = penv_fpreg->en_rdp;
 	penv_xmm->en_mxcsr = penv_fpreg->en_mxcsr;
 	penv_xmm->en_mxcsr_mask = penv_fpreg->en_mxcsr_mask & cpu_mxcsr_mask;
 
 	/* FPU registers */
 	for (i = 0; i < 8; ++i)
 		bcopy(fpregs->fpr_acc[i], sv_xmm->sv_fp[i].fp_acc.fp_bytes, 10);
 
 	/* SSE registers */
 	for (i = 0; i < 16; ++i)
 		bcopy(fpregs->fpr_xacc[i], sv_xmm->sv_xmm[i].xmm_bytes, 16);
 }
 
 /* externalize from td->pcb */
 int
 fill_fpregs(struct thread *td, struct fpreg *fpregs)
 {
 
 	KASSERT(td == curthread || TD_IS_SUSPENDED(td) ||
 	    P_SHOULDSTOP(td->td_proc),
 	    ("not suspended thread %p", td));
 	fpugetregs(td);
 	fill_fpregs_xmm(get_pcb_user_save_td(td), fpregs);
 	return (0);
 }
 
 /* internalize to td->pcb */
 int
 set_fpregs(struct thread *td, struct fpreg *fpregs)
 {
 
 	critical_enter();
 	set_fpregs_xmm(fpregs, get_pcb_user_save_td(td));
 	fpuuserinited(td);
 	critical_exit();
 	return (0);
 }
 
 /*
  * Get machine context.
  */
 int
 get_mcontext(struct thread *td, mcontext_t *mcp, int flags)
 {
 	struct pcb *pcb;
 	struct trapframe *tp;
 
 	pcb = td->td_pcb;
 	tp = td->td_frame;
 	PROC_LOCK(curthread->td_proc);
 	mcp->mc_onstack = sigonstack(tp->tf_rsp);
 	PROC_UNLOCK(curthread->td_proc);
 	mcp->mc_r15 = tp->tf_r15;
 	mcp->mc_r14 = tp->tf_r14;
 	mcp->mc_r13 = tp->tf_r13;
 	mcp->mc_r12 = tp->tf_r12;
 	mcp->mc_r11 = tp->tf_r11;
 	mcp->mc_r10 = tp->tf_r10;
 	mcp->mc_r9  = tp->tf_r9;
 	mcp->mc_r8  = tp->tf_r8;
 	mcp->mc_rdi = tp->tf_rdi;
 	mcp->mc_rsi = tp->tf_rsi;
 	mcp->mc_rbp = tp->tf_rbp;
 	mcp->mc_rbx = tp->tf_rbx;
 	mcp->mc_rcx = tp->tf_rcx;
 	mcp->mc_rflags = tp->tf_rflags;
 	if (flags & GET_MC_CLEAR_RET) {
 		mcp->mc_rax = 0;
 		mcp->mc_rdx = 0;
 		mcp->mc_rflags &= ~PSL_C;
 	} else {
 		mcp->mc_rax = tp->tf_rax;
 		mcp->mc_rdx = tp->tf_rdx;
 	}
 	mcp->mc_rip = tp->tf_rip;
 	mcp->mc_cs = tp->tf_cs;
 	mcp->mc_rsp = tp->tf_rsp;
 	mcp->mc_ss = tp->tf_ss;
 	mcp->mc_ds = tp->tf_ds;
 	mcp->mc_es = tp->tf_es;
 	mcp->mc_fs = tp->tf_fs;
 	mcp->mc_gs = tp->tf_gs;
 	mcp->mc_flags = tp->tf_flags;
 	mcp->mc_len = sizeof(*mcp);
 	get_fpcontext(td, mcp, NULL, NULL);
 	update_pcb_bases(pcb);
 	mcp->mc_fsbase = pcb->pcb_fsbase;
 	mcp->mc_gsbase = pcb->pcb_gsbase;
 	mcp->mc_xfpustate = 0;
 	mcp->mc_xfpustate_len = 0;
 	bzero(mcp->mc_spare, sizeof(mcp->mc_spare));
 	return (0);
 }
 
 /*
  * Set machine context.
  *
  * However, we don't set any but the user modifiable flags, and we won't
  * touch the cs selector.
  */
 int
 set_mcontext(struct thread *td, mcontext_t *mcp)
 {
 	struct pcb *pcb;
 	struct trapframe *tp;
 	char *xfpustate;
 	long rflags;
 	int ret;
 
 	pcb = td->td_pcb;
 	tp = td->td_frame;
 	if (mcp->mc_len != sizeof(*mcp) ||
 	    (mcp->mc_flags & ~_MC_FLAG_MASK) != 0)
 		return (EINVAL);
 	rflags = (mcp->mc_rflags & PSL_USERCHANGE) |
 	    (tp->tf_rflags & ~PSL_USERCHANGE);
 	if (mcp->mc_flags & _MC_HASFPXSTATE) {
 		if (mcp->mc_xfpustate_len > cpu_max_ext_state_size -
 		    sizeof(struct savefpu))
 			return (EINVAL);
 		xfpustate = (char *)fpu_save_area_alloc();
 		ret = copyin((void *)mcp->mc_xfpustate, xfpustate,
 		    mcp->mc_xfpustate_len);
 		if (ret != 0) {
 			fpu_save_area_free((struct savefpu *)xfpustate);
 			return (ret);
 		}
 	} else
 		xfpustate = NULL;
 	ret = set_fpcontext(td, mcp, xfpustate, mcp->mc_xfpustate_len);
 	fpu_save_area_free((struct savefpu *)xfpustate);
 	if (ret != 0)
 		return (ret);
 	tp->tf_r15 = mcp->mc_r15;
 	tp->tf_r14 = mcp->mc_r14;
 	tp->tf_r13 = mcp->mc_r13;
 	tp->tf_r12 = mcp->mc_r12;
 	tp->tf_r11 = mcp->mc_r11;
 	tp->tf_r10 = mcp->mc_r10;
 	tp->tf_r9  = mcp->mc_r9;
 	tp->tf_r8  = mcp->mc_r8;
 	tp->tf_rdi = mcp->mc_rdi;
 	tp->tf_rsi = mcp->mc_rsi;
 	tp->tf_rbp = mcp->mc_rbp;
 	tp->tf_rbx = mcp->mc_rbx;
 	tp->tf_rdx = mcp->mc_rdx;
 	tp->tf_rcx = mcp->mc_rcx;
 	tp->tf_rax = mcp->mc_rax;
 	tp->tf_rip = mcp->mc_rip;
 	tp->tf_rflags = rflags;
 	tp->tf_rsp = mcp->mc_rsp;
 	tp->tf_ss = mcp->mc_ss;
 	tp->tf_flags = mcp->mc_flags;
 	if (tp->tf_flags & TF_HASSEGS) {
 		tp->tf_ds = mcp->mc_ds;
 		tp->tf_es = mcp->mc_es;
 		tp->tf_fs = mcp->mc_fs;
 		tp->tf_gs = mcp->mc_gs;
 	}
 	set_pcb_flags(pcb, PCB_FULL_IRET);
 	if (mcp->mc_flags & _MC_HASBASES) {
 		pcb->pcb_fsbase = mcp->mc_fsbase;
 		pcb->pcb_gsbase = mcp->mc_gsbase;
 	}
 	return (0);
 }
 
 void
 get_fpcontext(struct thread *td, mcontext_t *mcp, char **xfpusave,
     size_t *xfpusave_len)
 {
 	mcp->mc_ownedfp = fpugetregs(td);
 	bcopy(get_pcb_user_save_td(td), &mcp->mc_fpstate[0],
 	    sizeof(mcp->mc_fpstate));
 	mcp->mc_fpformat = fpuformat();
 	if (xfpusave == NULL)
 		return;
 	if (!use_xsave || cpu_max_ext_state_size <= sizeof(struct savefpu)) {
 		*xfpusave_len = 0;
 		*xfpusave = NULL;
 	} else {
 		mcp->mc_flags |= _MC_HASFPXSTATE;
 		*xfpusave_len = mcp->mc_xfpustate_len =
 		    cpu_max_ext_state_size - sizeof(struct savefpu);
 		*xfpusave = (char *)(get_pcb_user_save_td(td) + 1);
 	}
 }
 
 int
 set_fpcontext(struct thread *td, mcontext_t *mcp, char *xfpustate,
     size_t xfpustate_len)
 {
 	int error;
 
 	if (mcp->mc_fpformat == _MC_FPFMT_NODEV)
 		return (0);
 	else if (mcp->mc_fpformat != _MC_FPFMT_XMM)
 		return (EINVAL);
 	else if (mcp->mc_ownedfp == _MC_FPOWNED_NONE) {
 		/* We don't care what state is left in the FPU or PCB. */
 		fpstate_drop(td);
 		error = 0;
 	} else if (mcp->mc_ownedfp == _MC_FPOWNED_FPU ||
 	    mcp->mc_ownedfp == _MC_FPOWNED_PCB) {
 		error = fpusetregs(td, (struct savefpu *)&mcp->mc_fpstate,
 		    xfpustate, xfpustate_len);
 	} else
 		return (EINVAL);
 	return (error);
 }
 
 void
 fpstate_drop(struct thread *td)
 {
 
 	KASSERT(PCB_USER_FPU(td->td_pcb), ("fpstate_drop: kernel-owned fpu"));
 	critical_enter();
 	if (PCPU_GET(fpcurthread) == td)
 		fpudrop();
 	/*
 	 * XXX force a full drop of the fpu.  The above only drops it if we
 	 * owned it.
 	 *
 	 * XXX I don't much like fpugetuserregs()'s semantics of doing a full
 	 * drop.  Dropping only to the pcb matches fnsave's behaviour.
 	 * We only need to drop to !PCB_INITDONE in sendsig().  But
 	 * sendsig() is the only caller of fpugetuserregs()... perhaps we just
 	 * have too many layers.
 	 */
 	clear_pcb_flags(curthread->td_pcb,
 	    PCB_FPUINITDONE | PCB_USERFPUINITDONE);
 	critical_exit();
 }
 
 int
 fill_dbregs(struct thread *td, struct dbreg *dbregs)
 {
 	struct pcb *pcb;
 
 	if (td == NULL) {
 		dbregs->dr[0] = rdr0();
 		dbregs->dr[1] = rdr1();
 		dbregs->dr[2] = rdr2();
 		dbregs->dr[3] = rdr3();
 		dbregs->dr[6] = rdr6();
 		dbregs->dr[7] = rdr7();
 	} else {
 		pcb = td->td_pcb;
 		dbregs->dr[0] = pcb->pcb_dr0;
 		dbregs->dr[1] = pcb->pcb_dr1;
 		dbregs->dr[2] = pcb->pcb_dr2;
 		dbregs->dr[3] = pcb->pcb_dr3;
 		dbregs->dr[6] = pcb->pcb_dr6;
 		dbregs->dr[7] = pcb->pcb_dr7;
 	}
 	dbregs->dr[4] = 0;
 	dbregs->dr[5] = 0;
 	dbregs->dr[8] = 0;
 	dbregs->dr[9] = 0;
 	dbregs->dr[10] = 0;
 	dbregs->dr[11] = 0;
 	dbregs->dr[12] = 0;
 	dbregs->dr[13] = 0;
 	dbregs->dr[14] = 0;
 	dbregs->dr[15] = 0;
 	return (0);
 }
 
 int
 set_dbregs(struct thread *td, struct dbreg *dbregs)
 {
 	struct pcb *pcb;
 	int i;
 
 	if (td == NULL) {
 		load_dr0(dbregs->dr[0]);
 		load_dr1(dbregs->dr[1]);
 		load_dr2(dbregs->dr[2]);
 		load_dr3(dbregs->dr[3]);
 		load_dr6(dbregs->dr[6]);
 		load_dr7(dbregs->dr[7]);
 	} else {
 		/*
 		 * Don't let an illegal value for dr7 get set.  Specifically,
 		 * check for undefined settings.  Setting these bit patterns
 		 * result in undefined behaviour and can lead to an unexpected
 		 * TRCTRAP or a general protection fault right here.
 		 * Upper bits of dr6 and dr7 must not be set
 		 */
 		for (i = 0; i < 4; i++) {
 			if (DBREG_DR7_ACCESS(dbregs->dr[7], i) == 0x02)
 				return (EINVAL);
 			if (td->td_frame->tf_cs == _ucode32sel &&
 			    DBREG_DR7_LEN(dbregs->dr[7], i) == DBREG_DR7_LEN_8)
 				return (EINVAL);
 		}
 		if ((dbregs->dr[6] & 0xffffffff00000000ul) != 0 ||
 		    (dbregs->dr[7] & 0xffffffff00000000ul) != 0)
 			return (EINVAL);
 
 		pcb = td->td_pcb;
 
 		/*
 		 * Don't let a process set a breakpoint that is not within the
 		 * process's address space.  If a process could do this, it
 		 * could halt the system by setting a breakpoint in the kernel
 		 * (if ddb was enabled).  Thus, we need to check to make sure
 		 * that no breakpoints are being enabled for addresses outside
 		 * process's address space.
 		 *
 		 * XXX - what about when the watched area of the user's
 		 * address space is written into from within the kernel
 		 * ... wouldn't that still cause a breakpoint to be generated
 		 * from within kernel mode?
 		 */
 
 		if (DBREG_DR7_ENABLED(dbregs->dr[7], 0)) {
 			/* dr0 is enabled */
 			if (dbregs->dr[0] >= VM_MAXUSER_ADDRESS)
 				return (EINVAL);
 		}
 		if (DBREG_DR7_ENABLED(dbregs->dr[7], 1)) {
 			/* dr1 is enabled */
 			if (dbregs->dr[1] >= VM_MAXUSER_ADDRESS)
 				return (EINVAL);
 		}
 		if (DBREG_DR7_ENABLED(dbregs->dr[7], 2)) {
 			/* dr2 is enabled */
 			if (dbregs->dr[2] >= VM_MAXUSER_ADDRESS)
 				return (EINVAL);
 		}
 		if (DBREG_DR7_ENABLED(dbregs->dr[7], 3)) {
 			/* dr3 is enabled */
 			if (dbregs->dr[3] >= VM_MAXUSER_ADDRESS)
 				return (EINVAL);
 		}
 
 		pcb->pcb_dr0 = dbregs->dr[0];
 		pcb->pcb_dr1 = dbregs->dr[1];
 		pcb->pcb_dr2 = dbregs->dr[2];
 		pcb->pcb_dr3 = dbregs->dr[3];
 		pcb->pcb_dr6 = dbregs->dr[6];
 		pcb->pcb_dr7 = dbregs->dr[7];
 
 		set_pcb_flags(pcb, PCB_DBREGS);
 	}
 
 	return (0);
 }
 
 void
 reset_dbregs(void)
 {
 
 	load_dr7(0);	/* Turn off the control bits first */
 	load_dr0(0);
 	load_dr1(0);
 	load_dr2(0);
 	load_dr3(0);
 	load_dr6(0);
 }
 
 /*
  * Return > 0 if a hardware breakpoint has been hit, and the
  * breakpoint was in user space.  Return 0, otherwise.
  */
 int
 user_dbreg_trap(register_t dr6)
 {
         u_int64_t dr7;
         u_int64_t bp;       /* breakpoint bits extracted from dr6 */
         int nbp;            /* number of breakpoints that triggered */
         caddr_t addr[4];    /* breakpoint addresses */
         int i;
 
         bp = dr6 & DBREG_DR6_BMASK;
         if (bp == 0) {
                 /*
                  * None of the breakpoint bits are set meaning this
                  * trap was not caused by any of the debug registers
                  */
                 return (0);
         }
 
         dr7 = rdr7();
         if ((dr7 & 0x000000ff) == 0) {
                 /*
                  * all GE and LE bits in the dr7 register are zero,
                  * thus the trap couldn't have been caused by the
                  * hardware debug registers
                  */
 		return (0);
         }
 
         nbp = 0;
 
         /*
          * at least one of the breakpoints were hit, check to see
          * which ones and if any of them are user space addresses
          */
 
         if (bp & 0x01) {
                 addr[nbp++] = (caddr_t)rdr0();
         }
         if (bp & 0x02) {
                 addr[nbp++] = (caddr_t)rdr1();
         }
         if (bp & 0x04) {
                 addr[nbp++] = (caddr_t)rdr2();
         }
         if (bp & 0x08) {
                 addr[nbp++] = (caddr_t)rdr3();
         }
 
         for (i = 0; i < nbp; i++) {
                 if (addr[i] < (caddr_t)VM_MAXUSER_ADDRESS) {
                         /*
                          * addr[i] is in user space
                          */
                         return (nbp);
                 }
         }
 
         /*
          * None of the breakpoints are in user space.
          */
         return (0);
 }
diff --git a/sys/amd64/linux/linux_sysvec.c b/sys/amd64/linux/linux_sysvec.c
index 25fc8b10e903..abb498370f79 100644
--- a/sys/amd64/linux/linux_sysvec.c
+++ b/sys/amd64/linux/linux_sysvec.c
@@ -1,1018 +1,1018 @@
 /*-
  * Copyright (c) 2004 Tim J. Robbins
  * Copyright (c) 2003 Peter Wemm
  * Copyright (c) 2002 Doug Rabson
  * Copyright (c) 1998-1999 Andrew Gallatin
  * Copyright (c) 1994-1996 Søren Schmidt
  * All rights reserved.
  * Copyright (c) 2013, 2021 Dmitry Chagin <dchagin@FreeBSD.org>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer
  *    in this position and unchanged.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. The name of the author may not be used to endorse or promote products
  *    derived from this software without specific prior written permission
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #define	__ELF_WORD_SIZE	64
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/exec.h>
 #include <sys/fcntl.h>
 #include <sys/imgact.h>
 #include <sys/imgact_elf.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/module.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/resourcevar.h>
 #include <sys/stddef.h>
 #include <sys/signalvar.h>
 #include <sys/syscallsubr.h>
 #include <sys/sysctl.h>
 #include <sys/sysent.h>
 #include <sys/sysproto.h>
 #include <sys/vnode.h>
 #include <sys/eventhandler.h>
 
 #include <vm/vm.h>
 #include <vm/pmap.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_map.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
 #include <vm/vm_param.h>
 
 #include <machine/cpu.h>
 #include <machine/md_var.h>
 #include <machine/pcb.h>
 #include <machine/specialreg.h>
 #include <machine/trap.h>
 
 #include <x86/linux/linux_x86.h>
 #include <amd64/linux/linux.h>
 #include <amd64/linux/linux_proto.h>
 #include <compat/linux/linux_emul.h>
 #include <compat/linux/linux_fork.h>
 #include <compat/linux/linux_ioctl.h>
 #include <compat/linux/linux_mib.h>
 #include <compat/linux/linux_misc.h>
 #include <compat/linux/linux_signal.h>
 #include <compat/linux/linux_sysproto.h>
 #include <compat/linux/linux_util.h>
 #include <compat/linux/linux_vdso.h>
 
 #include <x86/linux/linux_x86_sigframe.h>
 
 MODULE_VERSION(linux64, 1);
 
 #define	LINUX_VDSOPAGE_SIZE	PAGE_SIZE * 2
 #define	LINUX_VDSOPAGE_LA48	(VM_MAXUSER_ADDRESS_LA48 - \
 				    LINUX_VDSOPAGE_SIZE)
 #define	LINUX_SHAREDPAGE_LA48	(LINUX_VDSOPAGE_LA48 - PAGE_SIZE)
 				/*
 				 * PAGE_SIZE - the size
 				 * of the native SHAREDPAGE
 				 */
 #define	LINUX_USRSTACK_LA48	LINUX_SHAREDPAGE_LA48
 #define	LINUX_PS_STRINGS_LA48	(LINUX_USRSTACK_LA48 - \
 				    sizeof(struct ps_strings))
 
 static int linux_szsigcode;
 static vm_object_t linux_vdso_obj;
 static char *linux_vdso_mapping;
 extern char _binary_linux_vdso_so_o_start;
 extern char _binary_linux_vdso_so_o_end;
 static vm_offset_t linux_vdso_base;
 
 extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
 
 SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler);
 
 static int	linux_copyout_strings(struct image_params *imgp,
 		    uintptr_t *stack_base);
 static int	linux_fixup_elf(uintptr_t *stack_base,
 		    struct image_params *iparams);
 static bool	linux_trans_osrel(const Elf_Note *note, int32_t *osrel);
 static void	linux_vdso_install(const void *param);
 static void	linux_vdso_deinstall(const void *param);
 static void	linux_vdso_reloc(char *mapping, Elf_Addr offset);
 static void	linux_set_syscall_retval(struct thread *td, int error);
 static int	linux_fetch_syscall_args(struct thread *td);
 static void	linux_exec_setregs(struct thread *td, struct image_params *imgp,
 		    uintptr_t stack);
 static void	linux_exec_sysvec_init(void *param);
 static int	linux_on_exec_vmspace(struct proc *p,
 		    struct image_params *imgp);
 static void	linux_set_fork_retval(struct thread *td);
 static int	linux_vsyscall(struct thread *td);
 
 LINUX_VDSO_SYM_INTPTR(linux_rt_sigcode);
 LINUX_VDSO_SYM_CHAR(linux_platform);
 LINUX_VDSO_SYM_INTPTR(kern_timekeep_base);
 LINUX_VDSO_SYM_INTPTR(kern_tsc_selector);
 LINUX_VDSO_SYM_INTPTR(kern_cpu_selector);
 
 static int
 linux_fetch_syscall_args(struct thread *td)
 {
 	struct proc *p;
 	struct trapframe *frame;
 	struct syscall_args *sa;
 
 	p = td->td_proc;
 	frame = td->td_frame;
 	sa = &td->td_sa;
 
 	sa->args[0] = frame->tf_rdi;
 	sa->args[1] = frame->tf_rsi;
 	sa->args[2] = frame->tf_rdx;
 	sa->args[3] = frame->tf_rcx;
 	sa->args[4] = frame->tf_r8;
 	sa->args[5] = frame->tf_r9;
 	sa->code = frame->tf_rax;
 	sa->original_code = sa->code;
 
 	if (sa->code >= p->p_sysent->sv_size)
 		/* nosys */
 		sa->callp = &p->p_sysent->sv_table[p->p_sysent->sv_size - 1];
 	else
 		sa->callp = &p->p_sysent->sv_table[sa->code];
 
 	td->td_retval[0] = 0;
 	return (0);
 }
 
 static void
 linux_set_syscall_retval(struct thread *td, int error)
 {
 	struct trapframe *frame;
 
 	frame = td->td_frame;
 
 	switch (error) {
 	case 0:
 		frame->tf_rax = td->td_retval[0];
 		frame->tf_r10 = frame->tf_rcx;
 		break;
 
 	case ERESTART:
 		/*
 		 * Reconstruct pc, we know that 'syscall' is 2 bytes,
 		 * lcall $X,y is 7 bytes, int 0x80 is 2 bytes.
 		 * We saved this in tf_err.
 		 *
 		 */
 		frame->tf_rip -= frame->tf_err;
 		frame->tf_r10 = frame->tf_rcx;
 		break;
 
 	case EJUSTRETURN:
 		break;
 
 	default:
 		frame->tf_rax = bsd_to_linux_errno(error);
 		frame->tf_r10 = frame->tf_rcx;
 		break;
 	}
 
 	/*
 	 * Differently from FreeBSD native ABI, on Linux only %rcx
 	 * and %r11 values are not preserved across the syscall.
 	 * Require full context restore to get all registers except
 	 * those two restored at return to usermode.
 	 *
 	 * XXX: Would be great to be able to avoid PCB_FULL_IRET
 	 *      for the error == 0 case.
 	 */
 	set_pcb_flags(td->td_pcb, PCB_FULL_IRET);
 }
 
 static void
 linux_set_fork_retval(struct thread *td)
 {
 	struct trapframe *frame = td->td_frame;
 
 	frame->tf_rax = 0;
 }
 
 static int
 linux_copyout_auxargs(struct image_params *imgp, uintptr_t base)
 {
 	Elf_Auxargs *args;
 	Elf_Auxinfo *argarray, *pos;
 	struct proc *p;
 	int error, issetugid;
 
 	p = imgp->proc;
 	args = (Elf64_Auxargs *)imgp->auxargs;
 	argarray = pos = malloc(LINUX_AT_COUNT * sizeof(*pos), M_TEMP,
 	    M_WAITOK | M_ZERO);
 
 	issetugid = p->p_flag & P_SUGID ? 1 : 0;
 	AUXARGS_ENTRY(pos, LINUX_AT_SYSINFO_EHDR, linux_vdso_base);
 	AUXARGS_ENTRY(pos, LINUX_AT_HWCAP, cpu_feature);
 	AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz);
 	AUXARGS_ENTRY(pos, LINUX_AT_CLKTCK, stclohz);
 	AUXARGS_ENTRY(pos, AT_PHDR, args->phdr);
 	AUXARGS_ENTRY(pos, AT_PHENT, args->phent);
 	AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum);
 	AUXARGS_ENTRY(pos, AT_BASE, args->base);
 	AUXARGS_ENTRY(pos, AT_FLAGS, args->flags);
 	AUXARGS_ENTRY(pos, AT_ENTRY, args->entry);
 	AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_ucred->cr_ruid);
 	AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid);
 	AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_ucred->cr_rgid);
 	AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid);
 	AUXARGS_ENTRY(pos, LINUX_AT_SECURE, issetugid);
 	AUXARGS_ENTRY_PTR(pos, LINUX_AT_RANDOM, imgp->canary);
 	AUXARGS_ENTRY(pos, LINUX_AT_HWCAP2, 0);
 	if (imgp->execpathp != 0)
 		AUXARGS_ENTRY_PTR(pos, LINUX_AT_EXECFN, imgp->execpathp);
 	if (args->execfd != -1)
 		AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd);
 	AUXARGS_ENTRY(pos, LINUX_AT_PLATFORM, PTROUT(linux_platform));
 	AUXARGS_ENTRY(pos, AT_NULL, 0);
 
 	free(imgp->auxargs, M_TEMP);
 	imgp->auxargs = NULL;
 	KASSERT(pos - argarray <= LINUX_AT_COUNT, ("Too many auxargs"));
 
 	error = copyout(argarray, (void *)base,
 	    sizeof(*argarray) * LINUX_AT_COUNT);
 	free(argarray, M_TEMP);
 	return (error);
 }
 
 static int
 linux_fixup_elf(uintptr_t *stack_base, struct image_params *imgp)
 {
 	Elf_Addr *base;
 
 	base = (Elf64_Addr *)*stack_base;
 	base--;
 	if (suword(base, (uint64_t)imgp->args->argc) == -1)
 		return (EFAULT);
 
 	*stack_base = (uintptr_t)base;
 	return (0);
 }
 
 /*
  * Copy strings out to the new process address space, constructing new arg
  * and env vector tables. Return a pointer to the base so that it can be used
  * as the initial stack pointer.
  */
 static int
 linux_copyout_strings(struct image_params *imgp, uintptr_t *stack_base)
 {
 	int argc, envc, error;
 	char **vectp;
 	char *stringp;
 	uintptr_t destp, ustringp;
 	struct ps_strings *arginfo;
 	char canary[LINUX_AT_RANDOM_LEN];
 	size_t execpath_len;
 	struct proc *p;
 
 	p = imgp->proc;
 	arginfo = (struct ps_strings *)PROC_PS_STRINGS(p);
 	destp = (uintptr_t)arginfo;
 
 	if (imgp->execpath != NULL && imgp->auxargs != NULL) {
 		execpath_len = strlen(imgp->execpath) + 1;
 		destp -= execpath_len;
 		destp = rounddown2(destp, sizeof(void *));
 		imgp->execpathp = (void *)destp;
 		error = copyout(imgp->execpath, imgp->execpathp, execpath_len);
 		if (error != 0)
 			return (error);
 	}
 
 	/* Prepare the canary for SSP. */
 	arc4rand(canary, sizeof(canary), 0);
 	destp -= roundup(sizeof(canary), sizeof(void *));
 	imgp->canary = (void *)destp;
 	error = copyout(canary, imgp->canary, sizeof(canary));
 	if (error != 0)
 		return (error);
 
 	/* Allocate room for the argument and environment strings. */
 	destp -= ARG_MAX - imgp->args->stringspace;
 	destp = rounddown2(destp, sizeof(void *));
 	ustringp = destp;
 
 	if (imgp->auxargs) {
 		/*
 		 * Allocate room on the stack for the ELF auxargs
 		 * array.  It has LINUX_AT_COUNT entries.
 		 */
 		destp -= LINUX_AT_COUNT * sizeof(Elf64_Auxinfo);
 		destp = rounddown2(destp, sizeof(void *));
 	}
 
 	vectp = (char **)destp;
 
 	/*
 	 * Allocate room for the argv[] and env vectors including the
 	 * terminating NULL pointers.
 	 */
 	vectp -= imgp->args->argc + 1 + imgp->args->envc + 1;
 
 	/*
 	 * Starting with 2.24, glibc depends on a 16-byte stack alignment.
 	 * One "long argc" will be prepended later.
 	 */
 	vectp = (char **)((((uintptr_t)vectp + 8) & ~0xF) - 8);
 
 	/* vectp also becomes our initial stack base. */
 	*stack_base = (uintptr_t)vectp;
 
 	stringp = imgp->args->begin_argv;
 	argc = imgp->args->argc;
 	envc = imgp->args->envc;
 
 	/* Copy out strings - arguments and environment. */
 	error = copyout(stringp, (void *)ustringp,
 	    ARG_MAX - imgp->args->stringspace);
 	if (error != 0)
 		return (error);
 
 	/* Fill in "ps_strings" struct for ps, w, etc. */
 	if (suword(&arginfo->ps_argvstr, (long)(intptr_t)vectp) != 0 ||
 	    suword(&arginfo->ps_nargvstr, argc) != 0)
 		return (EFAULT);
 
 	/* Fill in argument portion of vector table. */
 	for (; argc > 0; --argc) {
 		if (suword(vectp++, ustringp) != 0)
 			return (EFAULT);
 		while (*stringp++ != 0)
 			ustringp++;
 		ustringp++;
 	}
 
 	/* A null vector table pointer separates the argp's from the envp's. */
 	if (suword(vectp++, 0) != 0)
 		return (EFAULT);
 
 	if (suword(&arginfo->ps_envstr, (long)(intptr_t)vectp) != 0 ||
 	    suword(&arginfo->ps_nenvstr, envc) != 0)
 		return (EFAULT);
 
 	/* Fill in environment portion of vector table. */
 	for (; envc > 0; --envc) {
 		if (suword(vectp++, ustringp) != 0)
 			return (EFAULT);
 		while (*stringp++ != 0)
 			ustringp++;
 		ustringp++;
 	}
 
 	/* The end of the vector table is a null pointer. */
 	if (suword(vectp, 0) != 0)
 		return (EFAULT);
 
 	if (imgp->auxargs) {
 		vectp++;
 		error = imgp->sysent->sv_copyout_auxargs(imgp,
 		    (uintptr_t)vectp);
 		if (error != 0)
 			return (error);
 	}
 
 	return (0);
 }
 
 /*
  * Reset registers to default values on exec.
  */
 static void
 linux_exec_setregs(struct thread *td, struct image_params *imgp,
     uintptr_t stack)
 {
 	struct trapframe *regs;
 	struct pcb *pcb;
 	register_t saved_rflags;
 
 	regs = td->td_frame;
 	pcb = td->td_pcb;
 
 	if (td->td_proc->p_md.md_ldt != NULL)
 		user_ldt_free(td);
 
 	pcb->pcb_fsbase = 0;
 	pcb->pcb_gsbase = 0;
 	clear_pcb_flags(pcb, PCB_32BIT);
 	pcb->pcb_initial_fpucw = __LINUX_NPXCW__;
 	set_pcb_flags(pcb, PCB_FULL_IRET);
 
 	saved_rflags = regs->tf_rflags & PSL_T;
 	bzero((char *)regs, sizeof(struct trapframe));
 	regs->tf_rip = imgp->entry_addr;
 	regs->tf_rsp = stack;
 	regs->tf_rflags = PSL_USER | saved_rflags;
 	regs->tf_ss = _udatasel;
 	regs->tf_cs = _ucodesel;
 	regs->tf_ds = _udatasel;
 	regs->tf_es = _udatasel;
 	regs->tf_fs = _ufssel;
 	regs->tf_gs = _ugssel;
 	regs->tf_flags = TF_HASSEGS;
 
 	x86_clear_dbregs(pcb);
 
 	/*
 	 * Drop the FP state if we hold it, so that the process gets a
 	 * clean FP state if it uses the FPU again.
 	 */
 	fpstate_drop(td);
 }
 
 /*
  * Copied from amd64/amd64/machdep.c
  *
  * XXX fpu state need? don't think so
  */
 int
 linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args)
 {
 	struct proc *p;
 	struct l_ucontext uc;
 	struct l_sigcontext *context;
 	struct trapframe *regs;
 	unsigned long rflags;
 	sigset_t bmask;
 	int error;
 	ksiginfo_t ksi;
 
 	regs = td->td_frame;
 	error = copyin((void *)regs->tf_rbx, &uc, sizeof(uc));
 	if (error != 0)
 		return (error);
 
 	p = td->td_proc;
 	context = &uc.uc_mcontext;
 	rflags = context->sc_rflags;
 
 	/*
 	 * Don't allow users to change privileged or reserved flags.
 	 */
 	/*
 	 * XXX do allow users to change the privileged flag PSL_RF.
 	 * The cpu sets PSL_RF in tf_rflags for faults.  Debuggers
 	 * should sometimes set it there too.  tf_rflags is kept in
 	 * the signal context during signal handling and there is no
 	 * other place to remember it, so the PSL_RF bit may be
 	 * corrupted by the signal handler without us knowing.
 	 * Corruption of the PSL_RF bit at worst causes one more or
 	 * one less debugger trap, so allowing it is fairly harmless.
 	 */
 	if (!EFL_SECURE(rflags & ~PSL_RF, regs->tf_rflags & ~PSL_RF)) {
 		uprintf("pid %d comm %s linux mangled rflags %#lx\n",
 		    p->p_pid, p->p_comm, rflags);
 		return (EINVAL);
 	}
 
 	/*
 	 * Don't allow users to load a valid privileged %cs.  Let the
 	 * hardware check for invalid selectors, excess privilege in
 	 * other selectors, invalid %eip's and invalid %esp's.
 	 */
 	if (!CS_SECURE(context->sc_cs)) {
 		uprintf("pid %d comm %s linux mangled cs %#x\n",
 		    p->p_pid, p->p_comm, context->sc_cs);
 		ksiginfo_init_trap(&ksi);
 		ksi.ksi_signo = SIGBUS;
 		ksi.ksi_code = BUS_OBJERR;
 		ksi.ksi_trapno = T_PROTFLT;
 		ksi.ksi_addr = (void *)regs->tf_rip;
 		trapsignal(td, &ksi);
 		return (EINVAL);
 	}
 
 	linux_to_bsd_sigset(&uc.uc_sigmask, &bmask);
 	kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0);
 
 	regs->tf_rdi    = context->sc_rdi;
 	regs->tf_rsi    = context->sc_rsi;
 	regs->tf_rdx    = context->sc_rdx;
 	regs->tf_rbp    = context->sc_rbp;
 	regs->tf_rbx    = context->sc_rbx;
 	regs->tf_rcx    = context->sc_rcx;
 	regs->tf_rax    = context->sc_rax;
 	regs->tf_rip    = context->sc_rip;
 	regs->tf_rsp    = context->sc_rsp;
 	regs->tf_r8     = context->sc_r8;
 	regs->tf_r9     = context->sc_r9;
 	regs->tf_r10    = context->sc_r10;
 	regs->tf_r11    = context->sc_r11;
 	regs->tf_r12    = context->sc_r12;
 	regs->tf_r13    = context->sc_r13;
 	regs->tf_r14    = context->sc_r14;
 	regs->tf_r15    = context->sc_r15;
 	regs->tf_cs     = context->sc_cs;
 	regs->tf_err    = context->sc_err;
 	regs->tf_rflags = rflags;
 
 	set_pcb_flags(td->td_pcb, PCB_FULL_IRET);
 	return (EJUSTRETURN);
 }
 
 /*
  * copied from amd64/amd64/machdep.c
  *
  * Send an interrupt to process.
  */
 static void
 linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
 {
 	struct l_rt_sigframe sf, *sfp;
 	struct proc *p;
 	struct thread *td;
 	struct sigacts *psp;
 	caddr_t sp;
 	struct trapframe *regs;
 	int sig, code;
 	int oonstack, issiginfo;
 
 	td = curthread;
 	p = td->td_proc;
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	sig = linux_translate_traps(ksi->ksi_signo, ksi->ksi_trapno);
 	psp = p->p_sigacts;
 	issiginfo = SIGISMEMBER(psp->ps_siginfo, sig);
 	code = ksi->ksi_code;
 	mtx_assert(&psp->ps_mtx, MA_OWNED);
 	regs = td->td_frame;
 	oonstack = sigonstack(regs->tf_rsp);
 
 	LINUX_CTR4(rt_sendsig, "%p, %d, %p, %u",
 	    catcher, sig, mask, code);
 
 	/* Save user context. */
 	bzero(&sf, sizeof(sf));
 	bsd_to_linux_sigset(mask, &sf.sf_uc.uc_sigmask);
 	sf.sf_uc.uc_mcontext.sc_mask = sf.sf_uc.uc_sigmask;
 
 	sf.sf_uc.uc_stack.ss_sp = PTROUT(td->td_sigstk.ss_sp);
 	sf.sf_uc.uc_stack.ss_size = td->td_sigstk.ss_size;
 	sf.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
 	    ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE;
 
 	sf.sf_uc.uc_mcontext.sc_rdi    = regs->tf_rdi;
 	sf.sf_uc.uc_mcontext.sc_rsi    = regs->tf_rsi;
 	sf.sf_uc.uc_mcontext.sc_rdx    = regs->tf_rdx;
 	sf.sf_uc.uc_mcontext.sc_rbp    = regs->tf_rbp;
 	sf.sf_uc.uc_mcontext.sc_rbx    = regs->tf_rbx;
 	sf.sf_uc.uc_mcontext.sc_rcx    = regs->tf_rcx;
 	sf.sf_uc.uc_mcontext.sc_rax    = regs->tf_rax;
 	sf.sf_uc.uc_mcontext.sc_rip    = regs->tf_rip;
 	sf.sf_uc.uc_mcontext.sc_rsp    = regs->tf_rsp;
 	sf.sf_uc.uc_mcontext.sc_r8     = regs->tf_r8;
 	sf.sf_uc.uc_mcontext.sc_r9     = regs->tf_r9;
 	sf.sf_uc.uc_mcontext.sc_r10    = regs->tf_r10;
 	sf.sf_uc.uc_mcontext.sc_r11    = regs->tf_r11;
 	sf.sf_uc.uc_mcontext.sc_r12    = regs->tf_r12;
 	sf.sf_uc.uc_mcontext.sc_r13    = regs->tf_r13;
 	sf.sf_uc.uc_mcontext.sc_r14    = regs->tf_r14;
 	sf.sf_uc.uc_mcontext.sc_r15    = regs->tf_r15;
 	sf.sf_uc.uc_mcontext.sc_cs     = regs->tf_cs;
 	sf.sf_uc.uc_mcontext.sc_rflags = regs->tf_rflags;
 	sf.sf_uc.uc_mcontext.sc_err    = regs->tf_err;
 	sf.sf_uc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code);
 	sf.sf_uc.uc_mcontext.sc_cr2    = (register_t)ksi->ksi_addr;
 
 	/* Allocate space for the signal handler context. */
 	if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack &&
 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
 		sp = (caddr_t)td->td_sigstk.ss_sp + td->td_sigstk.ss_size;
 	} else
 		sp = (caddr_t)regs->tf_rsp - 128;
 	sp -= sizeof(struct l_rt_sigframe);
 	/* Align to 16 bytes. */
 	sfp = (struct l_rt_sigframe *)((unsigned long)sp & ~0xFul);
 
 	mtx_unlock(&psp->ps_mtx);
 	PROC_UNLOCK(p);
 
 	/* Translate the signal. */
 	sig = bsd_to_linux_signal(sig);
 	/* Fill in POSIX parts. */
 	siginfo_to_lsiginfo(&ksi->ksi_info, &sf.sf_si, sig);
 
 	/* Copy the sigframe out to the user's stack. */
 	if (copyout(&sf, sfp, sizeof(*sfp)) != 0) {
 		uprintf("pid %d comm %s has trashed its stack, killing\n",
 		    p->p_pid, p->p_comm);
 		PROC_LOCK(p);
 		sigexit(td, SIGILL);
 	}
 
 	/* Build the argument list for the signal handler. */
 	regs->tf_rdi = sig;			/* arg 1 in %rdi */
 	regs->tf_rax = 0;
 	if (issiginfo) {
 		regs->tf_rsi = (register_t)&sfp->sf_si;	/* arg 2 in %rsi */
 		regs->tf_rdx = (register_t)&sfp->sf_uc;	/* arg 3 in %rdx */
 	} else {
 		regs->tf_rsi = 0;
 		regs->tf_rdx = 0;
 	}
 	regs->tf_rcx = (register_t)catcher;
 	regs->tf_rsp = (long)sfp;
 	regs->tf_rip = linux_rt_sigcode;
 	regs->tf_rflags &= ~(PSL_T | PSL_D);
 	regs->tf_cs = _ucodesel;
 	set_pcb_flags(td->td_pcb, PCB_FULL_IRET);
 	PROC_LOCK(p);
 	mtx_lock(&psp->ps_mtx);
 }
 
 #define	LINUX_VSYSCALL_START		(-10UL << 20)
 #define	LINUX_VSYSCALL_SZ		1024
 
 const unsigned long linux_vsyscall_vector[] = {
 	LINUX_SYS_gettimeofday,
 	LINUX_SYS_linux_time,
 	LINUX_SYS_linux_getcpu,
 };
 
 static int
 linux_vsyscall(struct thread *td)
 {
 	struct trapframe *frame;
 	uint64_t retqaddr;
 	int code, traced;
 	int error;
 
 	frame = td->td_frame;
 
 	/* Check %rip for vsyscall area. */
 	if (__predict_true(frame->tf_rip < LINUX_VSYSCALL_START))
 		return (EINVAL);
 	if ((frame->tf_rip & (LINUX_VSYSCALL_SZ - 1)) != 0)
 		return (EINVAL);
 	code = (frame->tf_rip - LINUX_VSYSCALL_START) / LINUX_VSYSCALL_SZ;
 	if (code >= nitems(linux_vsyscall_vector))
 		return (EINVAL);
 
 	/*
 	 * vsyscall called as callq *(%rax), so we must
 	 * use return address from %rsp and also fixup %rsp.
 	 */
 	error = copyin((void *)frame->tf_rsp, &retqaddr, sizeof(retqaddr));
 	if (error)
 		return (error);
 
 	frame->tf_rip = retqaddr;
 	frame->tf_rax = linux_vsyscall_vector[code];
 	frame->tf_rsp += 8;
 
 	traced = (frame->tf_flags & PSL_T);
 
 	amd64_syscall(td, traced);
 
 	return (0);
 }
 
 struct sysentvec elf_linux_sysvec = {
 	.sv_size	= LINUX_SYS_MAXSYSCALL,
 	.sv_table	= linux_sysent,
 	.sv_fixup	= linux_fixup_elf,
 	.sv_sendsig	= linux_rt_sendsig,
 	.sv_sigcode	= &_binary_linux_vdso_so_o_start,
 	.sv_szsigcode	= &linux_szsigcode,
 	.sv_name	= "Linux ELF64",
 	.sv_coredump	= elf64_coredump,
 	.sv_elf_core_osabi = ELFOSABI_NONE,
 	.sv_elf_core_abi_vendor = LINUX_ABI_VENDOR,
 	.sv_elf_core_prepare_notes = linux64_prepare_notes,
 	.sv_imgact_try	= linux_exec_imgact_try,
 	.sv_minsigstksz	= LINUX_MINSIGSTKSZ,
 	.sv_minuser	= VM_MIN_ADDRESS,
 	.sv_maxuser	= VM_MAXUSER_ADDRESS_LA48,
 	.sv_usrstack	= LINUX_USRSTACK_LA48,
 	.sv_psstrings	= LINUX_PS_STRINGS_LA48,
 	.sv_psstringssz	= sizeof(struct ps_strings),
 	.sv_stackprot	= VM_PROT_ALL,
 	.sv_copyout_auxargs = linux_copyout_auxargs,
 	.sv_copyout_strings = linux_copyout_strings,
 	.sv_setregs	= linux_exec_setregs,
 	.sv_fixlimit	= NULL,
 	.sv_maxssiz	= NULL,
 	.sv_flags	= SV_ABI_LINUX | SV_LP64 | SV_SHP | SV_SIG_DISCIGN |
 	    SV_SIG_WAITNDQ | SV_TIMEKEEP,
 	.sv_set_syscall_retval = linux_set_syscall_retval,
 	.sv_fetch_syscall_args = linux_fetch_syscall_args,
 	.sv_syscallnames = NULL,
 	.sv_shared_page_base = LINUX_SHAREDPAGE_LA48,
 	.sv_shared_page_len = PAGE_SIZE,
 	.sv_schedtail	= linux_schedtail,
 	.sv_thread_detach = linux_thread_detach,
 	.sv_trap	= linux_vsyscall,
 	.sv_onexec	= linux_on_exec_vmspace,
 	.sv_onexit	= linux_on_exit,
 	.sv_ontdexit	= linux_thread_dtor,
 	.sv_setid_allowed = &linux_setid_allowed_query,
 	.sv_set_fork_retval = linux_set_fork_retval,
 };
 
 static int
 linux_on_exec_vmspace(struct proc *p, struct image_params *imgp)
 {
 	int error;
 
 	error = linux_map_vdso(p, linux_vdso_obj, linux_vdso_base,
 	    LINUX_VDSOPAGE_SIZE, imgp);
 	if (error == 0)
 		linux_on_exec(p, imgp);
 	return (error);
 }
 
 /*
  * linux_vdso_install() and linux_exec_sysvec_init() must be called
  * after exec_sysvec_init() which is SI_SUB_EXEC (SI_ORDER_ANY).
  */
 static void
 linux_exec_sysvec_init(void *param)
 {
 	l_uintptr_t *ktimekeep_base, *ktsc_selector;
 	struct sysentvec *sv;
 	ptrdiff_t tkoff;
 
 	sv = param;
 	amd64_lower_shared_page(sv);
 	/* Fill timekeep_base */
 	exec_sysvec_init(sv);
 
 	tkoff = kern_timekeep_base - linux_vdso_base;
 	ktimekeep_base = (l_uintptr_t *)(linux_vdso_mapping + tkoff);
-	*ktimekeep_base = sv->sv_timekeep_base;
+	*ktimekeep_base = sv->sv_shared_page_base + sv->sv_timekeep_offset;
 
 	tkoff = kern_tsc_selector - linux_vdso_base;
 	ktsc_selector = (l_uintptr_t *)(linux_vdso_mapping + tkoff);
 	*ktsc_selector = linux_vdso_tsc_selector_idx();
 	if (bootverbose)
 		printf("Linux x86-64 vDSO tsc_selector: %lu\n", *ktsc_selector);
 
 	tkoff = kern_cpu_selector - linux_vdso_base;
 	ktsc_selector = (l_uintptr_t *)(linux_vdso_mapping + tkoff);
 	*ktsc_selector = linux_vdso_cpu_selector_idx();
 	if (bootverbose)
 		printf("Linux x86-64 vDSO cpu_selector: %lu\n", *ktsc_selector);
 }
 SYSINIT(elf_linux_exec_sysvec_init, SI_SUB_EXEC + 1, SI_ORDER_ANY,
     linux_exec_sysvec_init, &elf_linux_sysvec);
 
 static void
 linux_vdso_install(const void *param)
 {
 	char *vdso_start = &_binary_linux_vdso_so_o_start;
 	char *vdso_end = &_binary_linux_vdso_so_o_end;
 
 	linux_szsigcode = vdso_end - vdso_start;
 	MPASS(linux_szsigcode <= LINUX_VDSOPAGE_SIZE);
 
 	linux_vdso_base = LINUX_VDSOPAGE_LA48;
 	if (hw_lower_amd64_sharedpage != 0)
 		linux_vdso_base -= PAGE_SIZE;
 
 	__elfN(linux_vdso_fixup)(vdso_start, linux_vdso_base);
 
 	linux_vdso_obj = __elfN(linux_shared_page_init)
 	    (&linux_vdso_mapping, LINUX_VDSOPAGE_SIZE);
 	bcopy(vdso_start, linux_vdso_mapping, linux_szsigcode);
 
 	linux_vdso_reloc(linux_vdso_mapping, linux_vdso_base);
 }
 SYSINIT(elf_linux_vdso_init, SI_SUB_EXEC + 1, SI_ORDER_FIRST,
     linux_vdso_install, NULL);
 
 static void
 linux_vdso_deinstall(const void *param)
 {
 
 	__elfN(linux_shared_page_fini)(linux_vdso_obj,
 	    linux_vdso_mapping, LINUX_VDSOPAGE_SIZE);
 }
 SYSUNINIT(elf_linux_vdso_uninit, SI_SUB_EXEC, SI_ORDER_FIRST,
     linux_vdso_deinstall, NULL);
 
 static void
 linux_vdso_reloc(char *mapping, Elf_Addr offset)
 {
 	const Elf_Ehdr *ehdr;
 	const Elf_Shdr *shdr;
 	Elf64_Addr *where, val;
 	Elf_Size rtype, symidx;
 	const Elf_Rela *rela;
 	Elf_Addr addr, addend;
 	int relacnt;
 	int i, j;
 
 	MPASS(offset != 0);
 
 	relacnt = 0;
 	ehdr = (const Elf_Ehdr *)mapping;
 	shdr = (const Elf_Shdr *)(mapping + ehdr->e_shoff);
 	for (i = 0; i < ehdr->e_shnum; i++)
 	{
 		switch (shdr[i].sh_type) {
 		case SHT_REL:
 			printf("Linux x86_64 vDSO: unexpected Rel section\n");
 			break;
 		case SHT_RELA:
 			rela = (const Elf_Rela *)(mapping + shdr[i].sh_offset);
 			relacnt = shdr[i].sh_size / sizeof(*rela);
 		}
 	}
 
 	for (j = 0; j < relacnt; j++, rela++) {
 		where = (Elf_Addr *)(mapping + rela->r_offset);
 		addend = rela->r_addend;
 		rtype = ELF_R_TYPE(rela->r_info);
 		symidx = ELF_R_SYM(rela->r_info);
 
 		switch (rtype) {
 		case R_X86_64_NONE:	/* none */
 			break;
 
 		case R_X86_64_RELATIVE:	/* B + A */
 			addr = (Elf_Addr)(offset + addend);
 			val = addr;
 			if (*where != val)
 				*where = val;
 			break;
 		case R_X86_64_IRELATIVE:
 			printf("Linux x86_64 vDSO: unexpected ifunc relocation, "
 			    "symbol index %ld\n", symidx);
 			break;
 		default:
 			printf("Linux x86_64 vDSO: unexpected relocation type %ld, "
 			    "symbol index %ld\n", rtype, symidx);
 		}
 	}
 }
 
 static char GNULINUX_ABI_VENDOR[] = "GNU";
 static int GNULINUX_ABI_DESC = 0;
 
 static bool
 linux_trans_osrel(const Elf_Note *note, int32_t *osrel)
 {
 	const Elf32_Word *desc;
 	uintptr_t p;
 
 	p = (uintptr_t)(note + 1);
 	p += roundup2(note->n_namesz, sizeof(Elf32_Addr));
 
 	desc = (const Elf32_Word *)p;
 	if (desc[0] != GNULINUX_ABI_DESC)
 		return (false);
 
 	/*
 	 * For Linux we encode osrel using the Linux convention of
 	 * 	(version << 16) | (major << 8) | (minor)
 	 * See macro in linux_mib.h
 	 */
 	*osrel = LINUX_KERNVER(desc[1], desc[2], desc[3]);
 
 	return (true);
 }
 
 static Elf_Brandnote linux64_brandnote = {
 	.hdr.n_namesz	= sizeof(GNULINUX_ABI_VENDOR),
 	.hdr.n_descsz	= 16,
 	.hdr.n_type	= 1,
 	.vendor		= GNULINUX_ABI_VENDOR,
 	.flags		= BN_TRANSLATE_OSREL,
 	.trans_osrel	= linux_trans_osrel
 };
 
 static Elf64_Brandinfo linux_glibc2brand = {
 	.brand		= ELFOSABI_LINUX,
 	.machine	= EM_X86_64,
 	.compat_3_brand	= "Linux",
 	.emul_path	= linux_emul_path,
 	.interp_path	= "/lib64/ld-linux-x86-64.so.2",
 	.sysvec		= &elf_linux_sysvec,
 	.interp_newpath	= NULL,
 	.brand_note	= &linux64_brandnote,
 	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE
 };
 
 static Elf64_Brandinfo linux_glibc2brandshort = {
 	.brand		= ELFOSABI_LINUX,
 	.machine	= EM_X86_64,
 	.compat_3_brand	= "Linux",
 	.emul_path	= linux_emul_path,
 	.interp_path	= "/lib64/ld-linux.so.2",
 	.sysvec		= &elf_linux_sysvec,
 	.interp_newpath	= NULL,
 	.brand_note	= &linux64_brandnote,
 	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE
 };
 
 static Elf64_Brandinfo linux_muslbrand = {
 	.brand		= ELFOSABI_LINUX,
 	.machine	= EM_X86_64,
 	.compat_3_brand	= "Linux",
 	.emul_path	= linux_emul_path,
 	.interp_path	= "/lib/ld-musl-x86_64.so.1",
 	.sysvec		= &elf_linux_sysvec,
 	.interp_newpath	= NULL,
 	.brand_note	= &linux64_brandnote,
 	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE |
 			    LINUX_BI_FUTEX_REQUEUE
 };
 
 Elf64_Brandinfo *linux_brandlist[] = {
 	&linux_glibc2brand,
 	&linux_glibc2brandshort,
 	&linux_muslbrand,
 	NULL
 };
 
 static int
 linux64_elf_modevent(module_t mod, int type, void *data)
 {
 	Elf64_Brandinfo **brandinfo;
 	int error;
 	struct linux_ioctl_handler **lihp;
 
 	error = 0;
 
 	switch(type) {
 	case MOD_LOAD:
 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
 		     ++brandinfo)
 			if (elf64_insert_brand_entry(*brandinfo) < 0)
 				error = EINVAL;
 		if (error == 0) {
 			SET_FOREACH(lihp, linux_ioctl_handler_set)
 				linux_ioctl_register_handler(*lihp);
 			stclohz = (stathz ? stathz : hz);
 			if (bootverbose)
 				printf("Linux x86-64 ELF exec handler installed\n");
 		} else
 			printf("cannot insert Linux x86-64 ELF brand handler\n");
 		break;
 	case MOD_UNLOAD:
 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
 		     ++brandinfo)
 			if (elf64_brand_inuse(*brandinfo))
 				error = EBUSY;
 		if (error == 0) {
 			for (brandinfo = &linux_brandlist[0];
 			     *brandinfo != NULL; ++brandinfo)
 				if (elf64_remove_brand_entry(*brandinfo) < 0)
 					error = EINVAL;
 		}
 		if (error == 0) {
 			SET_FOREACH(lihp, linux_ioctl_handler_set)
 				linux_ioctl_unregister_handler(*lihp);
 			if (bootverbose)
 				printf("Linux x86_64 ELF exec handler removed\n");
 		} else
 			printf("Could not deinstall Linux x86_64 ELF interpreter entry\n");
 		break;
 	default:
 		return (EOPNOTSUPP);
 	}
 	return (error);
 }
 
 static moduledata_t linux64_elf_mod = {
 	"linux64elf",
 	linux64_elf_modevent,
 	0
 };
 
 DECLARE_MODULE_TIED(linux64elf, linux64_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);
 MODULE_DEPEND(linux64elf, linux_common, 1, 1, 1);
 FEATURE(linux64, "Linux 64bit support");
diff --git a/sys/amd64/linux32/linux32_sysvec.c b/sys/amd64/linux32/linux32_sysvec.c
index e9162f04d3f7..bd10d659979c 100644
--- a/sys/amd64/linux32/linux32_sysvec.c
+++ b/sys/amd64/linux32/linux32_sysvec.c
@@ -1,1168 +1,1168 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 2004 Tim J. Robbins
  * Copyright (c) 2003 Peter Wemm
  * Copyright (c) 2002 Doug Rabson
  * Copyright (c) 1998-1999 Andrew Gallatin
  * Copyright (c) 1994-1996 Søren Schmidt
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer
  *    in this position and unchanged.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. The name of the author may not be used to endorse or promote products
  *    derived from this software without specific prior written permission
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include "opt_compat.h"
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #ifndef COMPAT_FREEBSD32
 #error "Unable to compile Linux-emulator due to missing COMPAT_FREEBSD32 option!"
 #endif
 
 #define	__ELF_WORD_SIZE	32
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/exec.h>
 #include <sys/fcntl.h>
 #include <sys/imgact.h>
 #include <sys/imgact_elf.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/module.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/resourcevar.h>
 #include <sys/stddef.h>
 #include <sys/signalvar.h>
 #include <sys/syscallsubr.h>
 #include <sys/sysctl.h>
 #include <sys/sysent.h>
 #include <sys/sysproto.h>
 #include <sys/vnode.h>
 #include <sys/eventhandler.h>
 
 #include <vm/vm.h>
 #include <vm/pmap.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_map.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
 #include <vm/vm_param.h>
 
 #include <machine/cpu.h>
 #include <machine/md_var.h>
 #include <machine/pcb.h>
 #include <machine/specialreg.h>
 #include <machine/trap.h>
 
 #include <x86/linux/linux_x86.h>
 #include <amd64/linux32/linux.h>
 #include <amd64/linux32/linux32_proto.h>
 #include <compat/linux/linux_emul.h>
 #include <compat/linux/linux_fork.h>
 #include <compat/linux/linux_ioctl.h>
 #include <compat/linux/linux_mib.h>
 #include <compat/linux/linux_misc.h>
 #include <compat/linux/linux_signal.h>
 #include <compat/linux/linux_util.h>
 #include <compat/linux/linux_vdso.h>
 
 #include <x86/linux/linux_x86_sigframe.h>
 
 MODULE_VERSION(linux, 1);
 
 #define	LINUX32_MAXUSER		((1ul << 32) - PAGE_SIZE)
 #define	LINUX32_VDSOPAGE_SIZE	PAGE_SIZE * 2
 #define	LINUX32_VDSOPAGE	(LINUX32_MAXUSER - LINUX32_VDSOPAGE_SIZE)
 #define	LINUX32_SHAREDPAGE	(LINUX32_VDSOPAGE - PAGE_SIZE)
 				/*
 				 * PAGE_SIZE - the size
 				 * of the native SHAREDPAGE
 				 */
 #define	LINUX32_USRSTACK	LINUX32_SHAREDPAGE
 
 static int linux_szsigcode;
 static vm_object_t linux_vdso_obj;
 static char *linux_vdso_mapping;
 extern char _binary_linux32_vdso_so_o_start;
 extern char _binary_linux32_vdso_so_o_end;
 static vm_offset_t linux_vdso_base;
 
 extern struct sysent linux32_sysent[LINUX32_SYS_MAXSYSCALL];
 
 SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler);
 
 static int	linux_fixup_elf(uintptr_t *stack_base,
 		    struct image_params *iparams);
 static int	linux_copyout_strings(struct image_params *imgp,
 		    uintptr_t *stack_base);
 static void     linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask);
 static void	linux_exec_setregs(struct thread *td,
 				   struct image_params *imgp, uintptr_t stack);
 static void	linux_exec_sysvec_init(void *param);
 static int	linux_on_exec_vmspace(struct proc *p,
 		    struct image_params *imgp);
 static void	linux32_fixlimit(struct rlimit *rl, int which);
 static bool	linux32_trans_osrel(const Elf_Note *note, int32_t *osrel);
 static void	linux_vdso_install(const void *param);
 static void	linux_vdso_deinstall(const void *param);
 static void	linux_vdso_reloc(char *mapping, Elf_Addr offset);
 static void	linux32_set_fork_retval(struct thread *td);
 static void	linux32_set_syscall_retval(struct thread *td, int error);
 
 struct linux32_ps_strings {
 	u_int32_t ps_argvstr;	/* first of 0 or more argument strings */
 	u_int ps_nargvstr;	/* the number of argument strings */
 	u_int32_t ps_envstr;	/* first of 0 or more environment strings */
 	u_int ps_nenvstr;	/* the number of environment strings */
 };
 #define	LINUX32_PS_STRINGS	(LINUX32_USRSTACK - \
 				    sizeof(struct linux32_ps_strings))
 
 LINUX_VDSO_SYM_INTPTR(__kernel_vsyscall);
 LINUX_VDSO_SYM_INTPTR(linux32_vdso_sigcode);
 LINUX_VDSO_SYM_INTPTR(linux32_vdso_rt_sigcode);
 LINUX_VDSO_SYM_INTPTR(kern_timekeep_base);
 LINUX_VDSO_SYM_INTPTR(kern_tsc_selector);
 LINUX_VDSO_SYM_INTPTR(kern_cpu_selector);
 LINUX_VDSO_SYM_CHAR(linux_platform);
 
 static int
 linux_copyout_auxargs(struct image_params *imgp, uintptr_t base)
 {
 	Elf32_Auxargs *args;
 	Elf32_Auxinfo *argarray, *pos;
 	int error, issetugid;
 
 	args = (Elf32_Auxargs *)imgp->auxargs;
 	argarray = pos = malloc(LINUX_AT_COUNT * sizeof(*pos), M_TEMP,
 	    M_WAITOK | M_ZERO);
 
 	issetugid = imgp->proc->p_flag & P_SUGID ? 1 : 0;
 	AUXARGS_ENTRY(pos, LINUX_AT_SYSINFO, __kernel_vsyscall);
 	AUXARGS_ENTRY(pos, LINUX_AT_SYSINFO_EHDR, linux_vdso_base);
 	AUXARGS_ENTRY(pos, LINUX_AT_HWCAP, cpu_feature);
 	AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz);
 
 	/*
 	 * Do not export AT_CLKTCK when emulating Linux kernel prior to 2.4.0,
 	 * as it has appeared in the 2.4.0-rc7 first time.
 	 * Being exported, AT_CLKTCK is returned by sysconf(_SC_CLK_TCK),
 	 * glibc falls back to the hard-coded CLK_TCK value when aux entry
 	 * is not present.
 	 * Also see linux_times() implementation.
 	 */
 	if (linux_kernver(curthread) >= LINUX_KERNVER_2004000)
 		AUXARGS_ENTRY(pos, LINUX_AT_CLKTCK, stclohz);
 	AUXARGS_ENTRY(pos, AT_PHDR, args->phdr);
 	AUXARGS_ENTRY(pos, AT_PHENT, args->phent);
 	AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum);
 	AUXARGS_ENTRY(pos, AT_BASE, args->base);
 	AUXARGS_ENTRY(pos, AT_FLAGS, args->flags);
 	AUXARGS_ENTRY(pos, AT_ENTRY, args->entry);
 	AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_ucred->cr_ruid);
 	AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid);
 	AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_ucred->cr_rgid);
 	AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid);
 	AUXARGS_ENTRY(pos, LINUX_AT_SECURE, issetugid);
 	AUXARGS_ENTRY(pos, LINUX_AT_RANDOM, PTROUT(imgp->canary));
 	AUXARGS_ENTRY(pos, LINUX_AT_HWCAP2, 0);
 	if (imgp->execpathp != 0)
 		AUXARGS_ENTRY(pos, LINUX_AT_EXECFN, PTROUT(imgp->execpathp));
 	if (args->execfd != -1)
 		AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd);
 	AUXARGS_ENTRY(pos, LINUX_AT_PLATFORM, PTROUT(linux_platform));
 	AUXARGS_ENTRY(pos, AT_NULL, 0);
 
 	free(imgp->auxargs, M_TEMP);
 	imgp->auxargs = NULL;
 	KASSERT(pos - argarray <= LINUX_AT_COUNT, ("Too many auxargs"));
 
 	error = copyout(argarray, (void *)base,
 	    sizeof(*argarray) * LINUX_AT_COUNT);
 	free(argarray, M_TEMP);
 	return (error);
 }
 
 static int
 linux_fixup_elf(uintptr_t *stack_base, struct image_params *imgp)
 {
 	Elf32_Addr *base;
 
 	base = (Elf32_Addr *)*stack_base;
 	base--;
 	if (suword32(base, (uint32_t)imgp->args->argc) == -1)
 		return (EFAULT);
 	*stack_base = (uintptr_t)base;
 	return (0);
 }
 
 static void
 linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
 {
 	struct thread *td = curthread;
 	struct proc *p = td->td_proc;
 	struct sigacts *psp;
 	struct trapframe *regs;
 	struct l_rt_sigframe *fp, frame;
 	int oonstack;
 	int sig;
 	int code;
 
 	sig = linux_translate_traps(ksi->ksi_signo, ksi->ksi_trapno);
 	code = ksi->ksi_code;
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	psp = p->p_sigacts;
 	mtx_assert(&psp->ps_mtx, MA_OWNED);
 	regs = td->td_frame;
 	oonstack = sigonstack(regs->tf_rsp);
 
 	/* Allocate space for the signal handler context. */
 	if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
 		fp = (struct l_rt_sigframe *)((uintptr_t)td->td_sigstk.ss_sp +
 		    td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe));
 	} else
 		fp = (struct l_rt_sigframe *)regs->tf_rsp - 1;
 	mtx_unlock(&psp->ps_mtx);
 
 	/* Build the argument list for the signal handler. */
 	sig = bsd_to_linux_signal(sig);
 
 	bzero(&frame, sizeof(frame));
 
 	frame.sf_sig = sig;
 	frame.sf_siginfo = PTROUT(&fp->sf_si);
 	frame.sf_ucontext = PTROUT(&fp->sf_uc);
 
 	/* Fill in POSIX parts. */
 	siginfo_to_lsiginfo(&ksi->ksi_info, &frame.sf_si, sig);
 
 	/*
 	 * Build the signal context to be used by sigreturn and libgcc unwind.
 	 */
 	frame.sf_uc.uc_stack.ss_sp = PTROUT(td->td_sigstk.ss_sp);
 	frame.sf_uc.uc_stack.ss_size = td->td_sigstk.ss_size;
 	frame.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
 	    ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE;
 	PROC_UNLOCK(p);
 
 	bsd_to_linux_sigset(mask, &frame.sf_uc.uc_sigmask);
 
 	frame.sf_uc.uc_mcontext.sc_mask   = frame.sf_uc.uc_sigmask.__mask;
 	frame.sf_uc.uc_mcontext.sc_edi    = regs->tf_rdi;
 	frame.sf_uc.uc_mcontext.sc_esi    = regs->tf_rsi;
 	frame.sf_uc.uc_mcontext.sc_ebp    = regs->tf_rbp;
 	frame.sf_uc.uc_mcontext.sc_ebx    = regs->tf_rbx;
 	frame.sf_uc.uc_mcontext.sc_esp    = regs->tf_rsp;
 	frame.sf_uc.uc_mcontext.sc_edx    = regs->tf_rdx;
 	frame.sf_uc.uc_mcontext.sc_ecx    = regs->tf_rcx;
 	frame.sf_uc.uc_mcontext.sc_eax    = regs->tf_rax;
 	frame.sf_uc.uc_mcontext.sc_eip    = regs->tf_rip;
 	frame.sf_uc.uc_mcontext.sc_cs     = regs->tf_cs;
 	frame.sf_uc.uc_mcontext.sc_gs     = regs->tf_gs;
 	frame.sf_uc.uc_mcontext.sc_fs     = regs->tf_fs;
 	frame.sf_uc.uc_mcontext.sc_es     = regs->tf_es;
 	frame.sf_uc.uc_mcontext.sc_ds     = regs->tf_ds;
 	frame.sf_uc.uc_mcontext.sc_eflags = regs->tf_rflags;
 	frame.sf_uc.uc_mcontext.sc_esp_at_signal = regs->tf_rsp;
 	frame.sf_uc.uc_mcontext.sc_ss     = regs->tf_ss;
 	frame.sf_uc.uc_mcontext.sc_err    = regs->tf_err;
 	frame.sf_uc.uc_mcontext.sc_cr2    = (u_int32_t)(uintptr_t)ksi->ksi_addr;
 	frame.sf_uc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code);
 
 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
 		/*
 		 * Process has trashed its stack; give it an illegal
 		 * instruction to halt it in its tracks.
 		 */
 		PROC_LOCK(p);
 		sigexit(td, SIGILL);
 	}
 
 	/* Build context to run handler in. */
 	regs->tf_rsp = PTROUT(fp);
 	regs->tf_rip = linux32_vdso_rt_sigcode;
 	regs->tf_rdi = PTROUT(catcher);
 	regs->tf_rflags &= ~(PSL_T | PSL_D);
 	regs->tf_cs = _ucode32sel;
 	regs->tf_ss = _udatasel;
 	regs->tf_ds = _udatasel;
 	regs->tf_es = _udatasel;
 	regs->tf_fs = _ufssel;
 	regs->tf_gs = _ugssel;
 	regs->tf_flags = TF_HASSEGS;
 	set_pcb_flags(td->td_pcb, PCB_FULL_IRET);
 	PROC_LOCK(p);
 	mtx_lock(&psp->ps_mtx);
 }
 
 /*
  * Send an interrupt to process.
  *
  * Stack is set up to allow sigcode stored
  * in u. to call routine, followed by kcall
  * to sigreturn routine below.  After sigreturn
  * resets the signal mask, the stack, and the
  * frame pointer, it returns to the user
  * specified pc, psl.
  */
 static void
 linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
 {
 	struct thread *td = curthread;
 	struct proc *p = td->td_proc;
 	struct sigacts *psp;
 	struct trapframe *regs;
 	struct l_sigframe *fp, frame;
 	l_sigset_t lmask;
 	int oonstack;
 	int sig, code;
 
 	sig = linux_translate_traps(ksi->ksi_signo, ksi->ksi_trapno);
 	code = ksi->ksi_code;
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	psp = p->p_sigacts;
 	mtx_assert(&psp->ps_mtx, MA_OWNED);
 	if (SIGISMEMBER(psp->ps_siginfo, sig)) {
 		/* Signal handler installed with SA_SIGINFO. */
 		linux_rt_sendsig(catcher, ksi, mask);
 		return;
 	}
 
 	regs = td->td_frame;
 	oonstack = sigonstack(regs->tf_rsp);
 
 	/* Allocate space for the signal handler context. */
 	if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
 		fp = (struct l_sigframe *)((uintptr_t)td->td_sigstk.ss_sp +
 		    td->td_sigstk.ss_size - sizeof(struct l_sigframe));
 	} else
 		fp = (struct l_sigframe *)regs->tf_rsp - 1;
 	mtx_unlock(&psp->ps_mtx);
 	PROC_UNLOCK(p);
 
 	/* Build the argument list for the signal handler. */
 	sig = bsd_to_linux_signal(sig);
 
 	bzero(&frame, sizeof(frame));
 
 	frame.sf_sig = sig;
 	frame.sf_sigmask = *mask;
 	bsd_to_linux_sigset(mask, &lmask);
 
 	/* Build the signal context to be used by sigreturn. */
 	frame.sf_sc.sc_mask   = lmask.__mask;
 	frame.sf_sc.sc_gs     = regs->tf_gs;
 	frame.sf_sc.sc_fs     = regs->tf_fs;
 	frame.sf_sc.sc_es     = regs->tf_es;
 	frame.sf_sc.sc_ds     = regs->tf_ds;
 	frame.sf_sc.sc_edi    = regs->tf_rdi;
 	frame.sf_sc.sc_esi    = regs->tf_rsi;
 	frame.sf_sc.sc_ebp    = regs->tf_rbp;
 	frame.sf_sc.sc_ebx    = regs->tf_rbx;
 	frame.sf_sc.sc_esp    = regs->tf_rsp;
 	frame.sf_sc.sc_edx    = regs->tf_rdx;
 	frame.sf_sc.sc_ecx    = regs->tf_rcx;
 	frame.sf_sc.sc_eax    = regs->tf_rax;
 	frame.sf_sc.sc_eip    = regs->tf_rip;
 	frame.sf_sc.sc_cs     = regs->tf_cs;
 	frame.sf_sc.sc_eflags = regs->tf_rflags;
 	frame.sf_sc.sc_esp_at_signal = regs->tf_rsp;
 	frame.sf_sc.sc_ss     = regs->tf_ss;
 	frame.sf_sc.sc_err    = regs->tf_err;
 	frame.sf_sc.sc_cr2    = (u_int32_t)(uintptr_t)ksi->ksi_addr;
 	frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(code);
 
 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
 		/*
 		 * Process has trashed its stack; give it an illegal
 		 * instruction to halt it in its tracks.
 		 */
 		PROC_LOCK(p);
 		sigexit(td, SIGILL);
 	}
 
 	/* Build context to run handler in. */
 	regs->tf_rsp = PTROUT(fp);
 	regs->tf_rip = linux32_vdso_sigcode;
 	regs->tf_rdi = PTROUT(catcher);
 	regs->tf_rflags &= ~(PSL_T | PSL_D);
 	regs->tf_cs = _ucode32sel;
 	regs->tf_ss = _udatasel;
 	regs->tf_ds = _udatasel;
 	regs->tf_es = _udatasel;
 	regs->tf_fs = _ufssel;
 	regs->tf_gs = _ugssel;
 	regs->tf_flags = TF_HASSEGS;
 	set_pcb_flags(td->td_pcb, PCB_FULL_IRET);
 	PROC_LOCK(p);
 	mtx_lock(&psp->ps_mtx);
 }
 
 /*
  * System call to cleanup state after a signal
  * has been taken.  Reset signal mask and
  * stack state from context left by sendsig (above).
  * Return to previous pc and psl as specified by
  * context left by sendsig. Check carefully to
  * make sure that the user has not modified the
  * psl to gain improper privileges or to cause
  * a machine fault.
  */
 int
 linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args)
 {
 	struct l_sigframe frame;
 	struct trapframe *regs;
 	int eflags;
 	ksiginfo_t ksi;
 
 	regs = td->td_frame;
 
 	/*
 	 * The trampoline code hands us the sigframe.
 	 * It is unsafe to keep track of it ourselves, in the event that a
 	 * program jumps out of a signal handler.
 	 */
 	if (copyin(args->sfp, &frame, sizeof(frame)) != 0)
 		return (EFAULT);
 
 	/* Check for security violations. */
 	eflags = frame.sf_sc.sc_eflags;
 	if (!EFL_SECURE(eflags, regs->tf_rflags))
 		return(EINVAL);
 
 	/*
 	 * Don't allow users to load a valid privileged %cs.  Let the
 	 * hardware check for invalid selectors, excess privilege in
 	 * other selectors, invalid %eip's and invalid %esp's.
 	 */
 	if (!CS_SECURE(frame.sf_sc.sc_cs)) {
 		ksiginfo_init_trap(&ksi);
 		ksi.ksi_signo = SIGBUS;
 		ksi.ksi_code = BUS_OBJERR;
 		ksi.ksi_trapno = T_PROTFLT;
 		ksi.ksi_addr = (void *)regs->tf_rip;
 		trapsignal(td, &ksi);
 		return(EINVAL);
 	}
 
 	kern_sigprocmask(td, SIG_SETMASK, &frame.sf_sigmask, NULL, 0);
 
 	/* Restore signal context. */
 	regs->tf_rdi    = frame.sf_sc.sc_edi;
 	regs->tf_rsi    = frame.sf_sc.sc_esi;
 	regs->tf_rbp    = frame.sf_sc.sc_ebp;
 	regs->tf_rbx    = frame.sf_sc.sc_ebx;
 	regs->tf_rdx    = frame.sf_sc.sc_edx;
 	regs->tf_rcx    = frame.sf_sc.sc_ecx;
 	regs->tf_rax    = frame.sf_sc.sc_eax;
 	regs->tf_rip    = frame.sf_sc.sc_eip;
 	regs->tf_cs     = frame.sf_sc.sc_cs;
 	regs->tf_ds     = frame.sf_sc.sc_ds;
 	regs->tf_es     = frame.sf_sc.sc_es;
 	regs->tf_fs     = frame.sf_sc.sc_fs;
 	regs->tf_gs     = frame.sf_sc.sc_gs;
 	regs->tf_rflags = eflags;
 	regs->tf_rsp    = frame.sf_sc.sc_esp_at_signal;
 	regs->tf_ss     = frame.sf_sc.sc_ss;
 	set_pcb_flags(td->td_pcb, PCB_FULL_IRET);
 
 	return (EJUSTRETURN);
 }
 
 /*
  * System call to cleanup state after a signal
  * has been taken.  Reset signal mask and
  * stack state from context left by rt_sendsig (above).
  * Return to previous pc and psl as specified by
  * context left by sendsig. Check carefully to
  * make sure that the user has not modified the
  * psl to gain improper privileges or to cause
  * a machine fault.
  */
 int
 linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args)
 {
 	struct l_ucontext uc;
 	struct l_sigcontext *context;
 	sigset_t bmask;
 	l_stack_t *lss;
 	stack_t ss;
 	struct trapframe *regs;
 	int eflags;
 	ksiginfo_t ksi;
 
 	regs = td->td_frame;
 
 	/*
 	 * The trampoline code hands us the ucontext.
 	 * It is unsafe to keep track of it ourselves, in the event that a
 	 * program jumps out of a signal handler.
 	 */
 	if (copyin(args->ucp, &uc, sizeof(uc)) != 0)
 		return (EFAULT);
 
 	context = &uc.uc_mcontext;
 
 	/* Check for security violations. */
 	eflags = context->sc_eflags;
 	if (!EFL_SECURE(eflags, regs->tf_rflags))
 		return(EINVAL);
 
 	/*
 	 * Don't allow users to load a valid privileged %cs.  Let the
 	 * hardware check for invalid selectors, excess privilege in
 	 * other selectors, invalid %eip's and invalid %esp's.
 	 */
 	if (!CS_SECURE(context->sc_cs)) {
 		ksiginfo_init_trap(&ksi);
 		ksi.ksi_signo = SIGBUS;
 		ksi.ksi_code = BUS_OBJERR;
 		ksi.ksi_trapno = T_PROTFLT;
 		ksi.ksi_addr = (void *)regs->tf_rip;
 		trapsignal(td, &ksi);
 		return(EINVAL);
 	}
 
 	linux_to_bsd_sigset(&uc.uc_sigmask, &bmask);
 	kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0);
 
 	/*
 	 * Restore signal context
 	 */
 	regs->tf_gs	= context->sc_gs;
 	regs->tf_fs	= context->sc_fs;
 	regs->tf_es	= context->sc_es;
 	regs->tf_ds	= context->sc_ds;
 	regs->tf_rdi    = context->sc_edi;
 	regs->tf_rsi    = context->sc_esi;
 	regs->tf_rbp    = context->sc_ebp;
 	regs->tf_rbx    = context->sc_ebx;
 	regs->tf_rdx    = context->sc_edx;
 	regs->tf_rcx    = context->sc_ecx;
 	regs->tf_rax    = context->sc_eax;
 	regs->tf_rip    = context->sc_eip;
 	regs->tf_cs     = context->sc_cs;
 	regs->tf_rflags = eflags;
 	regs->tf_rsp    = context->sc_esp_at_signal;
 	regs->tf_ss     = context->sc_ss;
 	set_pcb_flags(td->td_pcb, PCB_FULL_IRET);
 
 	/*
 	 * call sigaltstack & ignore results..
 	 */
 	lss = &uc.uc_stack;
 	ss.ss_sp = PTRIN(lss->ss_sp);
 	ss.ss_size = lss->ss_size;
 	ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags);
 
 	(void)kern_sigaltstack(td, &ss, NULL);
 
 	return (EJUSTRETURN);
 }
 
 static int
 linux32_fetch_syscall_args(struct thread *td)
 {
 	struct proc *p;
 	struct trapframe *frame;
 	struct syscall_args *sa;
 
 	p = td->td_proc;
 	frame = td->td_frame;
 	sa = &td->td_sa;
 
 	sa->args[0] = frame->tf_rbx;
 	sa->args[1] = frame->tf_rcx;
 	sa->args[2] = frame->tf_rdx;
 	sa->args[3] = frame->tf_rsi;
 	sa->args[4] = frame->tf_rdi;
 	sa->args[5] = frame->tf_rbp;	/* Unconfirmed */
 	sa->code = frame->tf_rax;
 	sa->original_code = sa->code;
 
 	if (sa->code >= p->p_sysent->sv_size)
 		/* nosys */
 		sa->callp = &p->p_sysent->sv_table[p->p_sysent->sv_size - 1];
 	else
 		sa->callp = &p->p_sysent->sv_table[sa->code];
 
 	td->td_retval[0] = 0;
 	td->td_retval[1] = frame->tf_rdx;
 
 	return (0);
 }
 
 static void
 linux32_set_syscall_retval(struct thread *td, int error)
 {
 	struct trapframe *frame = td->td_frame;
 
 	cpu_set_syscall_retval(td, error);
 
 	if (__predict_false(error != 0)) {
 		if (error != ERESTART && error != EJUSTRETURN)
 			frame->tf_rax = bsd_to_linux_errno(error);
 	}
 }
 
 static void
 linux32_set_fork_retval(struct thread *td)
 {
 	struct trapframe *frame = td->td_frame;
 
 	frame->tf_rax = 0;
 }
 
 /*
  * Clear registers on exec
  * XXX copied from ia32_signal.c.
  */
 static void
 linux_exec_setregs(struct thread *td, struct image_params *imgp,
     uintptr_t stack)
 {
 	struct trapframe *regs = td->td_frame;
 	struct pcb *pcb = td->td_pcb;
 	register_t saved_rflags;
 
 	regs = td->td_frame;
 	pcb = td->td_pcb;
 
 	if (td->td_proc->p_md.md_ldt != NULL)
 		user_ldt_free(td);
 
 	critical_enter();
 	wrmsr(MSR_FSBASE, 0);
 	wrmsr(MSR_KGSBASE, 0);	/* User value while we're in the kernel */
 	pcb->pcb_fsbase = 0;
 	pcb->pcb_gsbase = 0;
 	critical_exit();
 	pcb->pcb_initial_fpucw = __LINUX_NPXCW__;
 
 	saved_rflags = regs->tf_rflags & PSL_T;
 	bzero((char *)regs, sizeof(struct trapframe));
 	regs->tf_rip = imgp->entry_addr;
 	regs->tf_rsp = stack;
 	regs->tf_rflags = PSL_USER | saved_rflags;
 	regs->tf_gs = _ugssel;
 	regs->tf_fs = _ufssel;
 	regs->tf_es = _udatasel;
 	regs->tf_ds = _udatasel;
 	regs->tf_ss = _udatasel;
 	regs->tf_flags = TF_HASSEGS;
 	regs->tf_cs = _ucode32sel;
 	regs->tf_rbx = (register_t)imgp->ps_strings;
 
 	x86_clear_dbregs(pcb);
 
 	fpstate_drop(td);
 
 	/* Do full restore on return so that we can change to a different %cs */
 	set_pcb_flags(pcb, PCB_32BIT | PCB_FULL_IRET);
 }
 
 /*
  * XXX copied from ia32_sysvec.c.
  */
 static int
 linux_copyout_strings(struct image_params *imgp, uintptr_t *stack_base)
 {
 	int argc, envc, error;
 	u_int32_t *vectp;
 	char *stringp;
 	uintptr_t destp, ustringp;
 	struct linux32_ps_strings *arginfo;
 	char canary[LINUX_AT_RANDOM_LEN];
 	size_t execpath_len;
 
 	arginfo = (struct linux32_ps_strings *)PROC_PS_STRINGS(imgp->proc);
 	destp = (uintptr_t)arginfo;
 
 	if (imgp->execpath != NULL && imgp->auxargs != NULL) {
 		execpath_len = strlen(imgp->execpath) + 1;
 		destp -= execpath_len;
 		destp = rounddown2(destp, sizeof(uint32_t));
 		imgp->execpathp = (void *)destp;
 		error = copyout(imgp->execpath, imgp->execpathp, execpath_len);
 		if (error != 0)
 			return (error);
 	}
 
 	/* Prepare the canary for SSP. */
 	arc4rand(canary, sizeof(canary), 0);
 	destp -= roundup(sizeof(canary), sizeof(uint32_t));
 	imgp->canary = (void *)destp;
 	error = copyout(canary, imgp->canary, sizeof(canary));
 	if (error != 0)
 		return (error);
 
 	/* Allocate room for the argument and environment strings. */
 	destp -= ARG_MAX - imgp->args->stringspace;
 	destp = rounddown2(destp, sizeof(uint32_t));
 	ustringp = destp;
 
 	if (imgp->auxargs) {
 		/*
 		 * Allocate room on the stack for the ELF auxargs
 		 * array.  It has LINUX_AT_COUNT entries.
 		 */
 		destp -= LINUX_AT_COUNT * sizeof(Elf32_Auxinfo);
 		destp = rounddown2(destp, sizeof(uint32_t));
 	}
 
 	vectp = (uint32_t *)destp;
 
 	/*
 	 * Allocate room for the argv[] and env vectors including the
 	 * terminating NULL pointers.
 	 */
 	vectp -= imgp->args->argc + 1 + imgp->args->envc + 1;
 
 	/* vectp also becomes our initial stack base. */
 	*stack_base = (uintptr_t)vectp;
 
 	stringp = imgp->args->begin_argv;
 	argc = imgp->args->argc;
 	envc = imgp->args->envc;
 
 	/* Copy out strings - arguments and environment. */
 	error = copyout(stringp, (void *)ustringp,
 	    ARG_MAX - imgp->args->stringspace);
 	if (error != 0)
 		return (error);
 
 	/* Fill in "ps_strings" struct for ps, w, etc. */
 	if (suword32(&arginfo->ps_argvstr, (uint32_t)(intptr_t)vectp) != 0 ||
 	    suword32(&arginfo->ps_nargvstr, argc) != 0)
 		return (EFAULT);
 
 	/* Fill in argument portion of vector table. */
 	for (; argc > 0; --argc) {
 		if (suword32(vectp++, ustringp) != 0)
 			return (EFAULT);
 		while (*stringp++ != 0)
 			ustringp++;
 		ustringp++;
 	}
 
 	/* A null vector table pointer separates the argp's from the envp's. */
 	if (suword32(vectp++, 0) != 0)
 		return (EFAULT);
 
 	if (suword32(&arginfo->ps_envstr, (uint32_t)(intptr_t)vectp) != 0 ||
 	    suword32(&arginfo->ps_nenvstr, envc) != 0)
 		return (EFAULT);
 
 	/* Fill in environment portion of vector table. */
 	for (; envc > 0; --envc) {
 		if (suword32(vectp++, ustringp) != 0)
 			return (EFAULT);
 		while (*stringp++ != 0)
 			ustringp++;
 		ustringp++;
 	}
 
 	/* The end of the vector table is a null pointer. */
 	if (suword32(vectp, 0) != 0)
 		return (EFAULT);
 
 	if (imgp->auxargs) {
 		vectp++;
 		error = imgp->sysent->sv_copyout_auxargs(imgp,
 		    (uintptr_t)vectp);
 		if (error != 0)
 			return (error);
 	}
 
 	return (0);
 }
 
 static SYSCTL_NODE(_compat, OID_AUTO, linux32, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "32-bit Linux emulation");
 
 static u_long	linux32_maxdsiz = LINUX32_MAXDSIZ;
 SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxdsiz, CTLFLAG_RW,
     &linux32_maxdsiz, 0, "");
 static u_long	linux32_maxssiz = LINUX32_MAXSSIZ;
 SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxssiz, CTLFLAG_RW,
     &linux32_maxssiz, 0, "");
 static u_long	linux32_maxvmem = LINUX32_MAXVMEM;
 SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxvmem, CTLFLAG_RW,
     &linux32_maxvmem, 0, "");
 bool linux32_emulate_i386 = false;
 SYSCTL_BOOL(_compat_linux32, OID_AUTO, emulate_i386, CTLFLAG_RWTUN,
     &linux32_emulate_i386, 0, "Emulate the real i386");
 
 static void
 linux32_fixlimit(struct rlimit *rl, int which)
 {
 
 	switch (which) {
 	case RLIMIT_DATA:
 		if (linux32_maxdsiz != 0) {
 			if (rl->rlim_cur > linux32_maxdsiz)
 				rl->rlim_cur = linux32_maxdsiz;
 			if (rl->rlim_max > linux32_maxdsiz)
 				rl->rlim_max = linux32_maxdsiz;
 		}
 		break;
 	case RLIMIT_STACK:
 		if (linux32_maxssiz != 0) {
 			if (rl->rlim_cur > linux32_maxssiz)
 				rl->rlim_cur = linux32_maxssiz;
 			if (rl->rlim_max > linux32_maxssiz)
 				rl->rlim_max = linux32_maxssiz;
 		}
 		break;
 	case RLIMIT_VMEM:
 		if (linux32_maxvmem != 0) {
 			if (rl->rlim_cur > linux32_maxvmem)
 				rl->rlim_cur = linux32_maxvmem;
 			if (rl->rlim_max > linux32_maxvmem)
 				rl->rlim_max = linux32_maxvmem;
 		}
 		break;
 	}
 }
 
 struct sysentvec elf_linux_sysvec = {
 	.sv_size	= LINUX32_SYS_MAXSYSCALL,
 	.sv_table	= linux32_sysent,
 	.sv_fixup	= linux_fixup_elf,
 	.sv_sendsig	= linux_sendsig,
 	.sv_sigcode	= &_binary_linux32_vdso_so_o_start,
 	.sv_szsigcode	= &linux_szsigcode,
 	.sv_name	= "Linux ELF32",
 	.sv_coredump	= elf32_coredump,
 	.sv_elf_core_osabi = ELFOSABI_NONE,
 	.sv_elf_core_abi_vendor = LINUX_ABI_VENDOR,
 	.sv_elf_core_prepare_notes = linux32_prepare_notes,
 	.sv_imgact_try	= linux_exec_imgact_try,
 	.sv_minsigstksz	= LINUX_MINSIGSTKSZ,
 	.sv_minuser	= VM_MIN_ADDRESS,
 	.sv_maxuser	= LINUX32_MAXUSER,
 	.sv_usrstack	= LINUX32_USRSTACK,
 	.sv_psstrings	= LINUX32_PS_STRINGS,
 	.sv_psstringssz	= sizeof(struct linux32_ps_strings),
 	.sv_stackprot	= VM_PROT_ALL,
 	.sv_copyout_auxargs = linux_copyout_auxargs,
 	.sv_copyout_strings = linux_copyout_strings,
 	.sv_setregs	= linux_exec_setregs,
 	.sv_fixlimit	= linux32_fixlimit,
 	.sv_maxssiz	= &linux32_maxssiz,
 	.sv_flags	= SV_ABI_LINUX | SV_ILP32 | SV_IA32 | SV_SHP |
 	    SV_SIG_DISCIGN | SV_SIG_WAITNDQ | SV_TIMEKEEP,
 	.sv_set_syscall_retval = linux32_set_syscall_retval,
 	.sv_fetch_syscall_args = linux32_fetch_syscall_args,
 	.sv_syscallnames = NULL,
 	.sv_shared_page_base = LINUX32_SHAREDPAGE,
 	.sv_shared_page_len = PAGE_SIZE,
 	.sv_schedtail	= linux_schedtail,
 	.sv_thread_detach = linux_thread_detach,
 	.sv_trap	= NULL,
 	.sv_onexec	= linux_on_exec_vmspace,
 	.sv_onexit	= linux_on_exit,
 	.sv_ontdexit	= linux_thread_dtor,
 	.sv_setid_allowed = &linux_setid_allowed_query,
 	.sv_set_fork_retval = linux32_set_fork_retval,
 };
 
 static int
 linux_on_exec_vmspace(struct proc *p, struct image_params *imgp)
 {
 	int error;
 
 	error = linux_map_vdso(p, linux_vdso_obj, linux_vdso_base,
 	    LINUX32_VDSOPAGE_SIZE, imgp);
 	if (error == 0)
 		linux_on_exec(p, imgp);
 	return (error);
 }
 
 /*
  * linux_vdso_install() and linux_exec_sysvec_init() must be called
  * after exec_sysvec_init() which is SI_SUB_EXEC (SI_ORDER_ANY).
  */
 static void
 linux_exec_sysvec_init(void *param)
 {
 	l_uintptr_t *ktimekeep_base, *ktsc_selector;
 	struct sysentvec *sv;
 	ptrdiff_t tkoff;
 
 	sv = param;
 	/* Fill timekeep_base */
 	exec_sysvec_init(sv);
 
 	tkoff = kern_timekeep_base - linux_vdso_base;
 	ktimekeep_base = (l_uintptr_t *)(linux_vdso_mapping + tkoff);
-	*ktimekeep_base = sv->sv_timekeep_base;
+	*ktimekeep_base = sv->sv_shared_page_base + sv->sv_timekeep_offset;
 
 	tkoff = kern_tsc_selector - linux_vdso_base;
 	ktsc_selector = (l_uintptr_t *)(linux_vdso_mapping + tkoff);
 	*ktsc_selector = linux_vdso_tsc_selector_idx();
 	if (bootverbose)
 		printf("Linux i386 vDSO tsc_selector: %u\n", *ktsc_selector);
 
 	tkoff = kern_cpu_selector - linux_vdso_base;
 	ktsc_selector = (l_uintptr_t *)(linux_vdso_mapping + tkoff);
 	*ktsc_selector = linux_vdso_cpu_selector_idx();
 	if (bootverbose)
 		printf("Linux i386 vDSO cpu_selector: %u\n", *ktsc_selector);
 }
 SYSINIT(elf_linux_exec_sysvec_init, SI_SUB_EXEC + 1, SI_ORDER_ANY,
     linux_exec_sysvec_init, &elf_linux_sysvec);
 
 static void
 linux_vdso_install(const void *param)
 {
 	char *vdso_start = &_binary_linux32_vdso_so_o_start;
 	char *vdso_end = &_binary_linux32_vdso_so_o_end;
 
 	linux_szsigcode = vdso_end - vdso_start;
 	MPASS(linux_szsigcode <= LINUX32_VDSOPAGE_SIZE);
 
 	linux_vdso_base = LINUX32_VDSOPAGE;
 
 	__elfN(linux_vdso_fixup)(vdso_start, linux_vdso_base);
 
 	linux_vdso_obj = __elfN(linux_shared_page_init)
 	    (&linux_vdso_mapping, LINUX32_VDSOPAGE_SIZE);
 	bcopy(vdso_start, linux_vdso_mapping, linux_szsigcode);
 
 	linux_vdso_reloc(linux_vdso_mapping, linux_vdso_base);
 }
 SYSINIT(elf_linux_vdso_init, SI_SUB_EXEC + 1, SI_ORDER_FIRST,
     linux_vdso_install, NULL);
 
 static void
 linux_vdso_deinstall(const void *param)
 {
 
 	__elfN(linux_shared_page_fini)(linux_vdso_obj,
 	    linux_vdso_mapping, LINUX32_VDSOPAGE_SIZE);
 }
 SYSUNINIT(elf_linux_vdso_uninit, SI_SUB_EXEC, SI_ORDER_FIRST,
     linux_vdso_deinstall, NULL);
 
 static void
 linux_vdso_reloc(char *mapping, Elf_Addr offset)
 {
 	const Elf_Shdr *shdr;
 	const Elf_Rel *rel;
 	const Elf_Ehdr *ehdr;
 	Elf32_Addr *where;
 	Elf_Size rtype, symidx;
 	Elf32_Addr addr, addend;
 	int i, relcnt;
 
 	MPASS(offset != 0);
 
 	relcnt = 0;
 	ehdr = (const Elf_Ehdr *)mapping;
 	shdr = (const Elf_Shdr *)(mapping + ehdr->e_shoff);
 	for (i = 0; i < ehdr->e_shnum; i++)
 	{
 		switch (shdr[i].sh_type) {
 		case SHT_REL:
 			rel = (const Elf_Rel *)(mapping + shdr[i].sh_offset);
 			relcnt = shdr[i].sh_size / sizeof(*rel);
 			break;
 		case SHT_RELA:
 			printf("Linux i386 vDSO: unexpected Rela section\n");
 			break;
 		}
 	}
 
 	for (i = 0; i < relcnt; i++, rel++) {
 		where = (Elf32_Addr *)(mapping + rel->r_offset);
 		addend = *where;
 		rtype = ELF_R_TYPE(rel->r_info);
 		symidx = ELF_R_SYM(rel->r_info);
 
 		switch (rtype) {
 		case R_386_NONE:	/* none */
 			break;
 
 		case R_386_RELATIVE:	/* B + A */
 			addr = (Elf32_Addr)PTROUT(offset + addend);
 			if (*where != addr)
 				*where = addr;
 			break;
 
 		case R_386_IRELATIVE:
 			printf("Linux i386 vDSO: unexpected ifunc relocation, "
 			    "symbol index %ld\n", (intmax_t)symidx);
 			break;
 		default:
 			printf("Linux i386 vDSO: unexpected relocation type %ld, "
 			    "symbol index %ld\n", (intmax_t)rtype, (intmax_t)symidx);
 		}
 	}
 }
 
 static char GNU_ABI_VENDOR[] = "GNU";
 static int GNULINUX_ABI_DESC = 0;
 
 static bool
 linux32_trans_osrel(const Elf_Note *note, int32_t *osrel)
 {
 	const Elf32_Word *desc;
 	uintptr_t p;
 
 	p = (uintptr_t)(note + 1);
 	p += roundup2(note->n_namesz, sizeof(Elf32_Addr));
 
 	desc = (const Elf32_Word *)p;
 	if (desc[0] != GNULINUX_ABI_DESC)
 		return (false);
 
 	/*
 	 * For Linux we encode osrel using the Linux convention of
 	 * 	(version << 16) | (major << 8) | (minor)
 	 * See macro in linux_mib.h
 	 */
 	*osrel = LINUX_KERNVER(desc[1], desc[2], desc[3]);
 
 	return (true);
 }
 
 static Elf_Brandnote linux32_brandnote = {
 	.hdr.n_namesz	= sizeof(GNU_ABI_VENDOR),
 	.hdr.n_descsz	= 16,	/* XXX at least 16 */
 	.hdr.n_type	= 1,
 	.vendor		= GNU_ABI_VENDOR,
 	.flags		= BN_TRANSLATE_OSREL,
 	.trans_osrel	= linux32_trans_osrel
 };
 
 static Elf32_Brandinfo linux_brand = {
 	.brand		= ELFOSABI_LINUX,
 	.machine	= EM_386,
 	.compat_3_brand	= "Linux",
 	.emul_path	= linux_emul_path,
 	.interp_path	= "/lib/ld-linux.so.1",
 	.sysvec		= &elf_linux_sysvec,
 	.interp_newpath	= NULL,
 	.brand_note	= &linux32_brandnote,
 	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE
 };
 
 static Elf32_Brandinfo linux_glibc2brand = {
 	.brand		= ELFOSABI_LINUX,
 	.machine	= EM_386,
 	.compat_3_brand	= "Linux",
 	.emul_path	= linux_emul_path,
 	.interp_path	= "/lib/ld-linux.so.2",
 	.sysvec		= &elf_linux_sysvec,
 	.interp_newpath	= NULL,
 	.brand_note	= &linux32_brandnote,
 	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE
 };
 
 static Elf32_Brandinfo linux_muslbrand = {
 	.brand		= ELFOSABI_LINUX,
 	.machine	= EM_386,
 	.compat_3_brand	= "Linux",
 	.emul_path	= linux_emul_path,
 	.interp_path	= "/lib/ld-musl-i386.so.1",
 	.sysvec		= &elf_linux_sysvec,
 	.interp_newpath	= NULL,
 	.brand_note	= &linux32_brandnote,
 	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE |
 			    LINUX_BI_FUTEX_REQUEUE
 };
 
 Elf32_Brandinfo *linux_brandlist[] = {
 	&linux_brand,
 	&linux_glibc2brand,
 	&linux_muslbrand,
 	NULL
 };
 
 static int
 linux_elf_modevent(module_t mod, int type, void *data)
 {
 	Elf32_Brandinfo **brandinfo;
 	int error;
 	struct linux_ioctl_handler **lihp;
 
 	error = 0;
 
 	switch(type) {
 	case MOD_LOAD:
 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
 		     ++brandinfo)
 			if (elf32_insert_brand_entry(*brandinfo) < 0)
 				error = EINVAL;
 		if (error == 0) {
 			SET_FOREACH(lihp, linux_ioctl_handler_set)
 				linux32_ioctl_register_handler(*lihp);
 			stclohz = (stathz ? stathz : hz);
 			if (bootverbose)
 				printf("Linux i386 ELF exec handler installed\n");
 		} else
 			printf("cannot insert Linux i386 ELF brand handler\n");
 		break;
 	case MOD_UNLOAD:
 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
 		     ++brandinfo)
 			if (elf32_brand_inuse(*brandinfo))
 				error = EBUSY;
 		if (error == 0) {
 			for (brandinfo = &linux_brandlist[0];
 			     *brandinfo != NULL; ++brandinfo)
 				if (elf32_remove_brand_entry(*brandinfo) < 0)
 					error = EINVAL;
 		}
 		if (error == 0) {
 			SET_FOREACH(lihp, linux_ioctl_handler_set)
 				linux32_ioctl_unregister_handler(*lihp);
 			if (bootverbose)
 				printf("Linux i386 ELF exec handler removed\n");
 		} else
 			printf("Could not deinstall Linux i386 ELF interpreter entry\n");
 		break;
 	default:
 		return (EOPNOTSUPP);
 	}
 	return (error);
 }
 
 static moduledata_t linux_elf_mod = {
 	"linuxelf",
 	linux_elf_modevent,
 	0
 };
 
 DECLARE_MODULE_TIED(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);
 MODULE_DEPEND(linuxelf, linux_common, 1, 1, 1);
 FEATURE(linux, "Linux 32bit support");
diff --git a/sys/arm/arm/exec_machdep.c b/sys/arm/arm/exec_machdep.c
index 56e6006c0767..2bf3efff7fe4 100644
--- a/sys/arm/arm/exec_machdep.c
+++ b/sys/arm/arm/exec_machdep.c
@@ -1,388 +1,388 @@
 /*	$NetBSD: arm32_machdep.c,v 1.44 2004/03/24 15:34:47 atatat Exp $	*/
 
 /*-
  * SPDX-License-Identifier: BSD-4-Clause
  *
  * Copyright (c) 2004 Olivier Houchard
  * Copyright (c) 1994-1998 Mark Brinicombe.
  * Copyright (c) 1994 Brini.
  * All rights reserved.
  *
  * This code is derived from software written for Brini by Mark Brinicombe
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by Mark Brinicombe
  *	for the NetBSD Project.
  * 4. The name of the company nor the name of the author may be used to
  *    endorse or promote products derived from this software without specific
  *    prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/exec.h>
 #include <sys/imgact.h>
 #include <sys/kdb.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/rwlock.h>
 #include <sys/syscallsubr.h>
 #include <sys/sysent.h>
 #include <sys/sysproto.h>
 #include <sys/vmmeter.h>
 
 #include <machine/asm.h>
 #include <machine/machdep.h>
 #include <machine/pcb.h>
 #include <machine/sysarch.h>
 #include <machine/vfp.h>
 #include <machine/vmparam.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 
 _Static_assert(sizeof(mcontext_t) == 208, "mcontext_t size incorrect");
 _Static_assert(sizeof(ucontext_t) == 260, "ucontext_t size incorrect");
 _Static_assert(sizeof(siginfo_t) == 64, "siginfo_t size incorrect");
 
 /*
  * Clear registers on exec
  */
 void
 exec_setregs(struct thread *td, struct image_params *imgp, uintptr_t stack)
 {
 	struct trapframe *tf = td->td_frame;
 
 	memset(tf, 0, sizeof(*tf));
 	tf->tf_usr_sp = stack;
 	tf->tf_usr_lr = imgp->entry_addr;
 	tf->tf_svc_lr = 0x77777777;
 	tf->tf_pc = imgp->entry_addr;
 	tf->tf_spsr = PSR_USR32_MODE;
 	if ((register_t)imgp->entry_addr & 1)
 		tf->tf_spsr |= PSR_T;
 }
 
 #ifdef VFP
 /*
  * Get machine VFP context.
  */
 void
 get_vfpcontext(struct thread *td, mcontext_vfp_t *vfp)
 {
 	struct pcb *pcb;
 
 	pcb = td->td_pcb;
 	if (td == curthread) {
 		critical_enter();
 		vfp_store(&pcb->pcb_vfpstate, false);
 		critical_exit();
 	} else
 		MPASS(TD_IS_SUSPENDED(td));
 	memset(vfp, 0, sizeof(*vfp));
 	memcpy(vfp->mcv_reg, pcb->pcb_vfpstate.reg,
 	    sizeof(vfp->mcv_reg));
 	vfp->mcv_fpscr = pcb->pcb_vfpstate.fpscr;
 }
 
 /*
  * Set machine VFP context.
  */
 void
 set_vfpcontext(struct thread *td, mcontext_vfp_t *vfp)
 {
 	struct pcb *pcb;
 
 	pcb = td->td_pcb;
 	if (td == curthread) {
 		critical_enter();
 		vfp_discard(td);
 		critical_exit();
 	} else
 		MPASS(TD_IS_SUSPENDED(td));
 	memcpy(pcb->pcb_vfpstate.reg, vfp->mcv_reg,
 	    sizeof(pcb->pcb_vfpstate.reg));
 	pcb->pcb_vfpstate.fpscr = vfp->mcv_fpscr;
 }
 #endif
 
 int
 arm_get_vfpstate(struct thread *td, void *args)
 {
 	int rv;
 	struct arm_get_vfpstate_args ua;
 	mcontext_vfp_t	mcontext_vfp;
 
 	rv = copyin(args, &ua, sizeof(ua));
 	if (rv != 0)
 		return (rv);
 	if (ua.mc_vfp_size != sizeof(mcontext_vfp_t))
 		return (EINVAL);
 #ifdef VFP
 	get_vfpcontext(td, &mcontext_vfp);
 #else
 	bzero(&mcontext_vfp, sizeof(mcontext_vfp));
 #endif
 
 	rv = copyout(&mcontext_vfp, ua.mc_vfp,  sizeof(mcontext_vfp));
 	if (rv != 0)
 		return (rv);
 	return (0);
 }
 
 /*
  * Get machine context.
  */
 int
 get_mcontext(struct thread *td, mcontext_t *mcp, int clear_ret)
 {
 	struct trapframe *tf = td->td_frame;
 	__greg_t *gr = mcp->__gregs;
 
 	if (clear_ret & GET_MC_CLEAR_RET) {
 		gr[_REG_R0] = 0;
 		gr[_REG_CPSR] = tf->tf_spsr & ~PSR_C;
 	} else {
 		gr[_REG_R0]   = tf->tf_r0;
 		gr[_REG_CPSR] = tf->tf_spsr;
 	}
 	gr[_REG_R1]   = tf->tf_r1;
 	gr[_REG_R2]   = tf->tf_r2;
 	gr[_REG_R3]   = tf->tf_r3;
 	gr[_REG_R4]   = tf->tf_r4;
 	gr[_REG_R5]   = tf->tf_r5;
 	gr[_REG_R6]   = tf->tf_r6;
 	gr[_REG_R7]   = tf->tf_r7;
 	gr[_REG_R8]   = tf->tf_r8;
 	gr[_REG_R9]   = tf->tf_r9;
 	gr[_REG_R10]  = tf->tf_r10;
 	gr[_REG_R11]  = tf->tf_r11;
 	gr[_REG_R12]  = tf->tf_r12;
 	gr[_REG_SP]   = tf->tf_usr_sp;
 	gr[_REG_LR]   = tf->tf_usr_lr;
 	gr[_REG_PC]   = tf->tf_pc;
 
 	mcp->mc_vfp_size = 0;
 	mcp->mc_vfp_ptr = NULL;
 	memset(&mcp->mc_spare, 0, sizeof(mcp->mc_spare));
 
 	return (0);
 }
 
 /*
  * Set machine context.
  *
  * However, we don't set any but the user modifiable flags, and we won't
  * touch the cs selector.
  */
 int
 set_mcontext(struct thread *td, mcontext_t *mcp)
 {
 	mcontext_vfp_t mc_vfp, *vfp;
 	struct trapframe *tf = td->td_frame;
 	const __greg_t *gr = mcp->__gregs;
 	int spsr;
 
 	/*
 	 * Make sure the processor mode has not been tampered with and
 	 * interrupts have not been disabled.
 	 */
 	spsr = gr[_REG_CPSR];
 	if ((spsr & PSR_MODE) != PSR_USR32_MODE ||
 	    (spsr & (PSR_I | PSR_F)) != 0)
 		return (EINVAL);
 
 #ifdef WITNESS
 	if (mcp->mc_vfp_size != 0 && mcp->mc_vfp_size != sizeof(mc_vfp)) {
 		printf("%s: %s: Malformed mc_vfp_size: %d (0x%08X)\n",
 		    td->td_proc->p_comm, __func__,
 		    mcp->mc_vfp_size, mcp->mc_vfp_size);
 	} else if (mcp->mc_vfp_size != 0 && mcp->mc_vfp_ptr == NULL) {
 		printf("%s: %s: c_vfp_size != 0 but mc_vfp_ptr == NULL\n",
 		    td->td_proc->p_comm, __func__);
 	}
 #endif
 
 	if (mcp->mc_vfp_size == sizeof(mc_vfp) && mcp->mc_vfp_ptr != NULL) {
 		if (copyin(mcp->mc_vfp_ptr, &mc_vfp, sizeof(mc_vfp)) != 0)
 			return (EFAULT);
 		vfp = &mc_vfp;
 	} else {
 		vfp = NULL;
 	}
 
 	tf->tf_r0 = gr[_REG_R0];
 	tf->tf_r1 = gr[_REG_R1];
 	tf->tf_r2 = gr[_REG_R2];
 	tf->tf_r3 = gr[_REG_R3];
 	tf->tf_r4 = gr[_REG_R4];
 	tf->tf_r5 = gr[_REG_R5];
 	tf->tf_r6 = gr[_REG_R6];
 	tf->tf_r7 = gr[_REG_R7];
 	tf->tf_r8 = gr[_REG_R8];
 	tf->tf_r9 = gr[_REG_R9];
 	tf->tf_r10 = gr[_REG_R10];
 	tf->tf_r11 = gr[_REG_R11];
 	tf->tf_r12 = gr[_REG_R12];
 	tf->tf_usr_sp = gr[_REG_SP];
 	tf->tf_usr_lr = gr[_REG_LR];
 	tf->tf_pc = gr[_REG_PC];
 	tf->tf_spsr = gr[_REG_CPSR];
 #ifdef VFP
 	if (vfp != NULL)
 		set_vfpcontext(td, vfp);
 #endif
 	return (0);
 }
 
 void
 sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
 {
 	struct thread *td;
 	struct proc *p;
 	struct trapframe *tf;
 	struct sigframe *fp, frame;
 	struct sigacts *psp;
 	struct sysentvec *sysent;
 	int onstack;
 	int sig;
 
 	td = curthread;
 	p = td->td_proc;
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	sig = ksi->ksi_signo;
 	psp = p->p_sigacts;
 	mtx_assert(&psp->ps_mtx, MA_OWNED);
 	tf = td->td_frame;
 	onstack = sigonstack(tf->tf_usr_sp);
 
 	CTR4(KTR_SIG, "sendsig: td=%p (%s) catcher=%p sig=%d", td, p->p_comm,
 	    catcher, sig);
 
 	/* Allocate and validate space for the signal handler context. */
 	if ((td->td_pflags & TDP_ALTSTACK) != 0 && !(onstack) &&
 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
 		fp = (struct sigframe *)((uintptr_t)td->td_sigstk.ss_sp +
 		    td->td_sigstk.ss_size);
 #if defined(COMPAT_43)
 		td->td_sigstk.ss_flags |= SS_ONSTACK;
 #endif
 	} else
 		fp = (struct sigframe *)td->td_frame->tf_usr_sp;
 
 	/* make room on the stack */
 	fp--;
 
 	/* make the stack aligned */
 	fp = (struct sigframe *)STACKALIGN(fp);
 	/* Populate the siginfo frame. */
 	bzero(&frame, sizeof(frame));
 	get_mcontext(td, &frame.sf_uc.uc_mcontext, 0);
 #ifdef VFP
 	get_vfpcontext(td, &frame.sf_vfp);
 	frame.sf_uc.uc_mcontext.mc_vfp_size = sizeof(fp->sf_vfp);
 	frame.sf_uc.uc_mcontext.mc_vfp_ptr = &fp->sf_vfp;
 #else
 	frame.sf_uc.uc_mcontext.mc_vfp_size = 0;
 	frame.sf_uc.uc_mcontext.mc_vfp_ptr = NULL;
 #endif
 	frame.sf_si = ksi->ksi_info;
 	frame.sf_uc.uc_sigmask = *mask;
 	frame.sf_uc.uc_stack = td->td_sigstk;
 	frame.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK) != 0 ?
 	    (onstack ? SS_ONSTACK : 0) : SS_DISABLE;
 	mtx_unlock(&psp->ps_mtx);
 	PROC_UNLOCK(td->td_proc);
 
 	/* Copy the sigframe out to the user's stack. */
 	if (copyout(&frame, fp, sizeof(*fp)) != 0) {
 		/* Process has trashed its stack. Kill it. */
 		CTR2(KTR_SIG, "sendsig: sigexit td=%p fp=%p", td, fp);
 		PROC_LOCK(p);
 		sigexit(td, SIGILL);
 	}
 
 	/*
 	 * Build context to run handler in.  We invoke the handler
 	 * directly, only returning via the trampoline.  Note the
 	 * trampoline version numbers are coordinated with machine-
 	 * dependent code in libc.
 	 */
 
 	tf->tf_r0 = sig;
 	tf->tf_r1 = (register_t)&fp->sf_si;
 	tf->tf_r2 = (register_t)&fp->sf_uc;
 
 	/* the trampoline uses r5 as the uc address */
 	tf->tf_r5 = (register_t)&fp->sf_uc;
 	tf->tf_pc = (register_t)catcher;
 	tf->tf_usr_sp = (register_t)fp;
 	sysent = p->p_sysent;
-	if (sysent->sv_sigcode_base != 0)
+	if (PROC_HAS_SHP(p))
 		tf->tf_usr_lr = (register_t)PROC_SIGCODE(p);
 	else
 		tf->tf_usr_lr = (register_t)(PROC_PS_STRINGS(p) -
 		    *(sysent->sv_szsigcode));
 	/* Set the mode to enter in the signal handler */
 #if __ARM_ARCH >= 7
 	if ((register_t)catcher & 1)
 		tf->tf_spsr |= PSR_T;
 	else
 		tf->tf_spsr &= ~PSR_T;
 #endif
 
 	CTR3(KTR_SIG, "sendsig: return td=%p pc=%#x sp=%#x", td, tf->tf_usr_lr,
 	    tf->tf_usr_sp);
 
 	PROC_LOCK(p);
 	mtx_lock(&psp->ps_mtx);
 }
 
 int
 sys_sigreturn(struct thread *td, struct sigreturn_args *uap)
 {
 	ucontext_t uc;
 	int error;
 
 	if (uap == NULL)
 		return (EFAULT);
 	if (copyin(uap->sigcntxp, &uc, sizeof(uc)))
 		return (EFAULT);
 	/* Restore register context. */
 	error = set_mcontext(td, &uc.uc_mcontext);
 	if (error != 0)
 		return (error);
 
 	/* Restore signal mask. */
 	kern_sigprocmask(td, SIG_SETMASK, &uc.uc_sigmask, NULL, 0);
 
 	return (EJUSTRETURN);
 }
diff --git a/sys/arm64/arm64/exec_machdep.c b/sys/arm64/arm64/exec_machdep.c
index 49765e73f390..6109a866a2d0 100644
--- a/sys/arm64/arm64/exec_machdep.c
+++ b/sys/arm64/arm64/exec_machdep.c
@@ -1,644 +1,646 @@
 /*-
  * Copyright (c) 2014 Andrew Turner
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/exec.h>
 #include <sys/imgact.h>
 #include <sys/kdb.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/limits.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/ptrace.h>
 #include <sys/reg.h>
 #include <sys/rwlock.h>
 #include <sys/signalvar.h>
 #include <sys/syscallsubr.h>
 #include <sys/sysent.h>
 #include <sys/sysproto.h>
 #include <sys/ucontext.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
+#include <vm/pmap.h>
+#include <vm/vm_map.h>
 
 #include <machine/armreg.h>
 #include <machine/kdb.h>
 #include <machine/md_var.h>
 #include <machine/pcb.h>
 
 #ifdef VFP
 #include <machine/vfp.h>
 #endif
 
 _Static_assert(sizeof(mcontext_t) == 880, "mcontext_t size incorrect");
 _Static_assert(sizeof(ucontext_t) == 960, "ucontext_t size incorrect");
 _Static_assert(sizeof(siginfo_t) == 80, "siginfo_t size incorrect");
 
 static void get_fpcontext(struct thread *td, mcontext_t *mcp);
 static void set_fpcontext(struct thread *td, mcontext_t *mcp);
 
 int
 fill_regs(struct thread *td, struct reg *regs)
 {
 	struct trapframe *frame;
 
 	frame = td->td_frame;
 	regs->sp = frame->tf_sp;
 	regs->lr = frame->tf_lr;
 	regs->elr = frame->tf_elr;
 	regs->spsr = frame->tf_spsr;
 
 	memcpy(regs->x, frame->tf_x, sizeof(regs->x));
 
 #ifdef COMPAT_FREEBSD32
 	/*
 	 * We may be called here for a 32bits process, if we're using a
 	 * 64bits debugger. If so, put PC and SPSR where it expects it.
 	 */
 	if (SV_PROC_FLAG(td->td_proc, SV_ILP32)) {
 		regs->x[15] = frame->tf_elr;
 		regs->x[16] = frame->tf_spsr;
 	}
 #endif
 	return (0);
 }
 
 int
 set_regs(struct thread *td, struct reg *regs)
 {
 	struct trapframe *frame;
 
 	frame = td->td_frame;
 	frame->tf_sp = regs->sp;
 	frame->tf_lr = regs->lr;
 
 	memcpy(frame->tf_x, regs->x, sizeof(frame->tf_x));
 
 #ifdef COMPAT_FREEBSD32
 	if (SV_PROC_FLAG(td->td_proc, SV_ILP32)) {
 		/*
 		 * We may be called for a 32bits process if we're using
 		 * a 64bits debugger. If so, get PC and SPSR from where
 		 * it put it.
 		 */
 		frame->tf_elr = regs->x[15];
 		frame->tf_spsr &= ~PSR_SETTABLE_32;
 		frame->tf_spsr |= regs->x[16] & PSR_SETTABLE_32;
 		/* Don't allow userspace to ask to continue single stepping.
 		 * The SPSR.SS field doesn't exist when the EL1 is AArch32.
 		 * As the SPSR.DIT field has moved in its place don't
 		 * allow userspace to set the SPSR.SS field.
 		 */
 	} else
 #endif
 	{
 		frame->tf_elr = regs->elr;
 		frame->tf_spsr &= ~PSR_SETTABLE_64;
 		frame->tf_spsr |= regs->spsr & PSR_SETTABLE_64;
 		/* Enable single stepping if userspace asked fot it */
 		if ((frame->tf_spsr & PSR_SS) != 0) {
 			td->td_pcb->pcb_flags |= PCB_SINGLE_STEP;
 
 			WRITE_SPECIALREG(mdscr_el1,
 			    READ_SPECIALREG(mdscr_el1) | MDSCR_SS);
 			isb();
 		}
 	}
 	return (0);
 }
 
 int
 fill_fpregs(struct thread *td, struct fpreg *regs)
 {
 #ifdef VFP
 	struct pcb *pcb;
 
 	pcb = td->td_pcb;
 	if ((pcb->pcb_fpflags & PCB_FP_STARTED) != 0) {
 		/*
 		 * If we have just been running VFP instructions we will
 		 * need to save the state to memcpy it below.
 		 */
 		if (td == curthread)
 			vfp_save_state(td, pcb);
 
 		KASSERT(pcb->pcb_fpusaved == &pcb->pcb_fpustate,
 		    ("Called fill_fpregs while the kernel is using the VFP"));
 		memcpy(regs->fp_q, pcb->pcb_fpustate.vfp_regs,
 		    sizeof(regs->fp_q));
 		regs->fp_cr = pcb->pcb_fpustate.vfp_fpcr;
 		regs->fp_sr = pcb->pcb_fpustate.vfp_fpsr;
 	} else
 #endif
 		memset(regs, 0, sizeof(*regs));
 	return (0);
 }
 
 int
 set_fpregs(struct thread *td, struct fpreg *regs)
 {
 #ifdef VFP
 	struct pcb *pcb;
 
 	pcb = td->td_pcb;
 	KASSERT(pcb->pcb_fpusaved == &pcb->pcb_fpustate,
 	    ("Called set_fpregs while the kernel is using the VFP"));
 	memcpy(pcb->pcb_fpustate.vfp_regs, regs->fp_q, sizeof(regs->fp_q));
 	pcb->pcb_fpustate.vfp_fpcr = regs->fp_cr;
 	pcb->pcb_fpustate.vfp_fpsr = regs->fp_sr;
 #endif
 	return (0);
 }
 
 int
 fill_dbregs(struct thread *td, struct dbreg *regs)
 {
 	struct debug_monitor_state *monitor;
 	int i;
 	uint8_t debug_ver, nbkpts, nwtpts;
 
 	memset(regs, 0, sizeof(*regs));
 
 	extract_user_id_field(ID_AA64DFR0_EL1, ID_AA64DFR0_DebugVer_SHIFT,
 	    &debug_ver);
 	extract_user_id_field(ID_AA64DFR0_EL1, ID_AA64DFR0_BRPs_SHIFT,
 	    &nbkpts);
 	extract_user_id_field(ID_AA64DFR0_EL1, ID_AA64DFR0_WRPs_SHIFT,
 	    &nwtpts);
 
 	/*
 	 * The BRPs field contains the number of breakpoints - 1. Armv8-A
 	 * allows the hardware to provide 2-16 breakpoints so this won't
 	 * overflow an 8 bit value. The same applies to the WRPs field.
 	 */
 	nbkpts++;
 	nwtpts++;
 
 	regs->db_debug_ver = debug_ver;
 	regs->db_nbkpts = nbkpts;
 	regs->db_nwtpts = nwtpts;
 
 	monitor = &td->td_pcb->pcb_dbg_regs;
 	if ((monitor->dbg_flags & DBGMON_ENABLED) != 0) {
 		for (i = 0; i < nbkpts; i++) {
 			regs->db_breakregs[i].dbr_addr = monitor->dbg_bvr[i];
 			regs->db_breakregs[i].dbr_ctrl = monitor->dbg_bcr[i];
 		}
 		for (i = 0; i < nwtpts; i++) {
 			regs->db_watchregs[i].dbw_addr = monitor->dbg_wvr[i];
 			regs->db_watchregs[i].dbw_ctrl = monitor->dbg_wcr[i];
 		}
 	}
 
 	return (0);
 }
 
 int
 set_dbregs(struct thread *td, struct dbreg *regs)
 {
 	struct debug_monitor_state *monitor;
 	uint64_t addr;
 	uint32_t ctrl;
 	int i;
 
 	monitor = &td->td_pcb->pcb_dbg_regs;
 	monitor->dbg_enable_count = 0;
 
 	for (i = 0; i < DBG_BRP_MAX; i++) {
 		addr = regs->db_breakregs[i].dbr_addr;
 		ctrl = regs->db_breakregs[i].dbr_ctrl;
 
 		/*
 		 * Don't let the user set a breakpoint on a kernel or
 		 * non-canonical user address.
 		 */
 		if (addr >= VM_MAXUSER_ADDRESS)
 			return (EINVAL);
 
 		/*
 		 * The lowest 2 bits are ignored, so record the effective
 		 * address.
 		 */
 		addr = rounddown2(addr, 4);
 
 		/*
 		 * Some control fields are ignored, and other bits reserved.
 		 * Only unlinked, address-matching breakpoints are supported.
 		 *
 		 * XXX: fields that appear unvalidated, such as BAS, have
 		 * constrained undefined behaviour. If the user mis-programs
 		 * these, there is no risk to the system.
 		 */
 		ctrl &= DBGBCR_EN | DBGBCR_PMC | DBGBCR_BAS;
 		if ((ctrl & DBGBCR_EN) != 0) {
 			/* Only target EL0. */
 			if ((ctrl & DBGBCR_PMC) != DBGBCR_PMC_EL0)
 				return (EINVAL);
 
 			monitor->dbg_enable_count++;
 		}
 
 		monitor->dbg_bvr[i] = addr;
 		monitor->dbg_bcr[i] = ctrl;
 	}
 
 	for (i = 0; i < DBG_WRP_MAX; i++) {
 		addr = regs->db_watchregs[i].dbw_addr;
 		ctrl = regs->db_watchregs[i].dbw_ctrl;
 
 		/*
 		 * Don't let the user set a watchpoint on a kernel or
 		 * non-canonical user address.
 		 */
 		if (addr >= VM_MAXUSER_ADDRESS)
 			return (EINVAL);
 
 		/*
 		 * Some control fields are ignored, and other bits reserved.
 		 * Only unlinked watchpoints are supported.
 		 */
 		ctrl &= DBGWCR_EN | DBGWCR_PAC | DBGWCR_LSC | DBGWCR_BAS |
 		    DBGWCR_MASK;
 
 		if ((ctrl & DBGWCR_EN) != 0) {
 			/* Only target EL0. */
 			if ((ctrl & DBGWCR_PAC) != DBGWCR_PAC_EL0)
 				return (EINVAL);
 
 			/* Must set at least one of the load/store bits. */
 			if ((ctrl & DBGWCR_LSC) == 0)
 				return (EINVAL);
 
 			/*
 			 * When specifying the address range with BAS, the MASK
 			 * field must be zero.
 			 */
 			if ((ctrl & DBGWCR_BAS) != DBGWCR_BAS &&
 			    (ctrl & DBGWCR_MASK) != 0)
 				return (EINVAL);
 
 			monitor->dbg_enable_count++;
 		}
 		monitor->dbg_wvr[i] = addr;
 		monitor->dbg_wcr[i] = ctrl;
 	}
 
 	if (monitor->dbg_enable_count > 0)
 		monitor->dbg_flags |= DBGMON_ENABLED;
 
 	return (0);
 }
 
 #ifdef COMPAT_FREEBSD32
 int
 fill_regs32(struct thread *td, struct reg32 *regs)
 {
 	int i;
 	struct trapframe *tf;
 
 	tf = td->td_frame;
 	for (i = 0; i < 13; i++)
 		regs->r[i] = tf->tf_x[i];
 	/* For arm32, SP is r13 and LR is r14 */
 	regs->r_sp = tf->tf_x[13];
 	regs->r_lr = tf->tf_x[14];
 	regs->r_pc = tf->tf_elr;
 	regs->r_cpsr = tf->tf_spsr;
 
 	return (0);
 }
 
 int
 set_regs32(struct thread *td, struct reg32 *regs)
 {
 	int i;
 	struct trapframe *tf;
 
 	tf = td->td_frame;
 	for (i = 0; i < 13; i++)
 		tf->tf_x[i] = regs->r[i];
 	/* For arm 32, SP is r13 an LR is r14 */
 	tf->tf_x[13] = regs->r_sp;
 	tf->tf_x[14] = regs->r_lr;
 	tf->tf_elr = regs->r_pc;
 	tf->tf_spsr &= ~PSR_SETTABLE_32;
 	tf->tf_spsr |= regs->r_cpsr & PSR_SETTABLE_32;
 
 	return (0);
 }
 
 /* XXX fill/set dbregs/fpregs are stubbed on 32-bit arm. */
 int
 fill_fpregs32(struct thread *td, struct fpreg32 *regs)
 {
 
 	memset(regs, 0, sizeof(*regs));
 	return (0);
 }
 
 int
 set_fpregs32(struct thread *td, struct fpreg32 *regs)
 {
 
 	return (0);
 }
 
 int
 fill_dbregs32(struct thread *td, struct dbreg32 *regs)
 {
 
 	memset(regs, 0, sizeof(*regs));
 	return (0);
 }
 
 int
 set_dbregs32(struct thread *td, struct dbreg32 *regs)
 {
 
 	return (0);
 }
 #endif
 
 void
 exec_setregs(struct thread *td, struct image_params *imgp, uintptr_t stack)
 {
 	struct trapframe *tf = td->td_frame;
 	struct pcb *pcb = td->td_pcb;
 
 	memset(tf, 0, sizeof(struct trapframe));
 
 	tf->tf_x[0] = stack;
 	tf->tf_sp = STACKALIGN(stack);
 	tf->tf_lr = imgp->entry_addr;
 	tf->tf_elr = imgp->entry_addr;
 
 	td->td_pcb->pcb_tpidr_el0 = 0;
 	td->td_pcb->pcb_tpidrro_el0 = 0;
 	WRITE_SPECIALREG(tpidrro_el0, 0);
 	WRITE_SPECIALREG(tpidr_el0, 0);
 
 #ifdef VFP
 	vfp_reset_state(td, pcb);
 #endif
 
 	/*
 	 * Clear debug register state. It is not applicable to the new process.
 	 */
 	bzero(&pcb->pcb_dbg_regs, sizeof(pcb->pcb_dbg_regs));
 
 	/* Generate new pointer authentication keys */
 	ptrauth_exec(td);
 }
 
 /* Sanity check these are the same size, they will be memcpy'd to and from */
 CTASSERT(sizeof(((struct trapframe *)0)->tf_x) ==
     sizeof((struct gpregs *)0)->gp_x);
 CTASSERT(sizeof(((struct trapframe *)0)->tf_x) ==
     sizeof((struct reg *)0)->x);
 
 int
 get_mcontext(struct thread *td, mcontext_t *mcp, int clear_ret)
 {
 	struct trapframe *tf = td->td_frame;
 
 	if (clear_ret & GET_MC_CLEAR_RET) {
 		mcp->mc_gpregs.gp_x[0] = 0;
 		mcp->mc_gpregs.gp_spsr = tf->tf_spsr & ~PSR_C;
 	} else {
 		mcp->mc_gpregs.gp_x[0] = tf->tf_x[0];
 		mcp->mc_gpregs.gp_spsr = tf->tf_spsr;
 	}
 
 	memcpy(&mcp->mc_gpregs.gp_x[1], &tf->tf_x[1],
 	    sizeof(mcp->mc_gpregs.gp_x[1]) * (nitems(mcp->mc_gpregs.gp_x) - 1));
 
 	mcp->mc_gpregs.gp_sp = tf->tf_sp;
 	mcp->mc_gpregs.gp_lr = tf->tf_lr;
 	mcp->mc_gpregs.gp_elr = tf->tf_elr;
 	get_fpcontext(td, mcp);
 
 	return (0);
 }
 
 int
 set_mcontext(struct thread *td, mcontext_t *mcp)
 {
 	struct trapframe *tf = td->td_frame;
 	uint32_t spsr;
 
 	spsr = mcp->mc_gpregs.gp_spsr;
 	if ((spsr & PSR_M_MASK) != PSR_M_EL0t ||
 	    (spsr & PSR_AARCH32) != 0 ||
 	    (spsr & PSR_DAIF) != (td->td_frame->tf_spsr & PSR_DAIF))
 		return (EINVAL); 
 
 	memcpy(tf->tf_x, mcp->mc_gpregs.gp_x, sizeof(tf->tf_x));
 
 	tf->tf_sp = mcp->mc_gpregs.gp_sp;
 	tf->tf_lr = mcp->mc_gpregs.gp_lr;
 	tf->tf_elr = mcp->mc_gpregs.gp_elr;
 	tf->tf_spsr = mcp->mc_gpregs.gp_spsr;
 	if ((tf->tf_spsr & PSR_SS) != 0) {
 		td->td_pcb->pcb_flags |= PCB_SINGLE_STEP;
 
 		WRITE_SPECIALREG(mdscr_el1,
 		    READ_SPECIALREG(mdscr_el1) | MDSCR_SS);
 		isb();
 	}
 	set_fpcontext(td, mcp);
 
 	return (0);
 }
 
 static void
 get_fpcontext(struct thread *td, mcontext_t *mcp)
 {
 #ifdef VFP
 	struct pcb *curpcb;
 
 	critical_enter();
 
 	curpcb = curthread->td_pcb;
 
 	if ((curpcb->pcb_fpflags & PCB_FP_STARTED) != 0) {
 		/*
 		 * If we have just been running VFP instructions we will
 		 * need to save the state to memcpy it below.
 		 */
 		vfp_save_state(td, curpcb);
 
 		KASSERT(curpcb->pcb_fpusaved == &curpcb->pcb_fpustate,
 		    ("Called get_fpcontext while the kernel is using the VFP"));
 		KASSERT((curpcb->pcb_fpflags & ~PCB_FP_USERMASK) == 0,
 		    ("Non-userspace FPU flags set in get_fpcontext"));
 		memcpy(mcp->mc_fpregs.fp_q, curpcb->pcb_fpustate.vfp_regs,
 		    sizeof(mcp->mc_fpregs.fp_q));
 		mcp->mc_fpregs.fp_cr = curpcb->pcb_fpustate.vfp_fpcr;
 		mcp->mc_fpregs.fp_sr = curpcb->pcb_fpustate.vfp_fpsr;
 		mcp->mc_fpregs.fp_flags = curpcb->pcb_fpflags;
 		mcp->mc_flags |= _MC_FP_VALID;
 	}
 
 	critical_exit();
 #endif
 }
 
 static void
 set_fpcontext(struct thread *td, mcontext_t *mcp)
 {
 #ifdef VFP
 	struct pcb *curpcb;
 
 	critical_enter();
 
 	if ((mcp->mc_flags & _MC_FP_VALID) != 0) {
 		curpcb = curthread->td_pcb;
 
 		/*
 		 * Discard any vfp state for the current thread, we
 		 * are about to override it.
 		 */
 		vfp_discard(td);
 
 		KASSERT(curpcb->pcb_fpusaved == &curpcb->pcb_fpustate,
 		    ("Called set_fpcontext while the kernel is using the VFP"));
 		memcpy(curpcb->pcb_fpustate.vfp_regs, mcp->mc_fpregs.fp_q,
 		    sizeof(mcp->mc_fpregs.fp_q));
 		curpcb->pcb_fpustate.vfp_fpcr = mcp->mc_fpregs.fp_cr;
 		curpcb->pcb_fpustate.vfp_fpsr = mcp->mc_fpregs.fp_sr;
 		curpcb->pcb_fpflags = mcp->mc_fpregs.fp_flags & PCB_FP_USERMASK;
 	}
 
 	critical_exit();
 #endif
 }
 
 int
 sys_sigreturn(struct thread *td, struct sigreturn_args *uap)
 {
 	ucontext_t uc;
 	int error;
 
 	if (copyin(uap->sigcntxp, &uc, sizeof(uc)))
 		return (EFAULT);
 
 	error = set_mcontext(td, &uc.uc_mcontext);
 	if (error != 0)
 		return (error);
 
 	/* Restore signal mask. */
 	kern_sigprocmask(td, SIG_SETMASK, &uc.uc_sigmask, NULL, 0);
 
 	return (EJUSTRETURN);
 }
 
 void
 sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
 {
 	struct thread *td;
 	struct proc *p;
 	struct trapframe *tf;
 	struct sigframe *fp, frame;
 	struct sigacts *psp;
 	int onstack, sig;
 
 	td = curthread;
 	p = td->td_proc;
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 
 	sig = ksi->ksi_signo;
 	psp = p->p_sigacts;
 	mtx_assert(&psp->ps_mtx, MA_OWNED);
 
 	tf = td->td_frame;
 	onstack = sigonstack(tf->tf_sp);
 
 	CTR4(KTR_SIG, "sendsig: td=%p (%s) catcher=%p sig=%d", td, p->p_comm,
 	    catcher, sig);
 
 	/* Allocate and validate space for the signal handler context. */
 	if ((td->td_pflags & TDP_ALTSTACK) != 0 && !onstack &&
 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
 		fp = (struct sigframe *)((uintptr_t)td->td_sigstk.ss_sp +
 		    td->td_sigstk.ss_size);
 #if defined(COMPAT_43)
 		td->td_sigstk.ss_flags |= SS_ONSTACK;
 #endif
 	} else {
 		fp = (struct sigframe *)td->td_frame->tf_sp;
 	}
 
 	/* Make room, keeping the stack aligned */
 	fp--;
 	fp = (struct sigframe *)STACKALIGN(fp);
 
 	/* Fill in the frame to copy out */
 	bzero(&frame, sizeof(frame));
 	get_mcontext(td, &frame.sf_uc.uc_mcontext, 0);
 	frame.sf_si = ksi->ksi_info;
 	frame.sf_uc.uc_sigmask = *mask;
 	frame.sf_uc.uc_stack = td->td_sigstk;
 	frame.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK) != 0 ?
 	    (onstack ? SS_ONSTACK : 0) : SS_DISABLE;
 	mtx_unlock(&psp->ps_mtx);
 	PROC_UNLOCK(td->td_proc);
 
 	/* Copy the sigframe out to the user's stack. */
 	if (copyout(&frame, fp, sizeof(*fp)) != 0) {
 		/* Process has trashed its stack. Kill it. */
 		CTR2(KTR_SIG, "sendsig: sigexit td=%p fp=%p", td, fp);
 		PROC_LOCK(p);
 		sigexit(td, SIGILL);
 	}
 
 	tf->tf_x[0] = sig;
 	tf->tf_x[1] = (register_t)&fp->sf_si;
 	tf->tf_x[2] = (register_t)&fp->sf_uc;
 	tf->tf_x[8] = (register_t)catcher;
 	tf->tf_sp = (register_t)fp;
 	tf->tf_elr = (register_t)PROC_SIGCODE(p);
 
 	/* Clear the single step flag while in the signal handler */
 	if ((td->td_pcb->pcb_flags & PCB_SINGLE_STEP) != 0) {
 		td->td_pcb->pcb_flags &= ~PCB_SINGLE_STEP;
 		WRITE_SPECIALREG(mdscr_el1,
 		    READ_SPECIALREG(mdscr_el1) & ~MDSCR_SS);
 		isb();
 	}
 
 	CTR3(KTR_SIG, "sendsig: return td=%p pc=%#x sp=%#x", td, tf->tf_elr,
 	    tf->tf_sp);
 
 	PROC_LOCK(p);
 	mtx_lock(&psp->ps_mtx);
 }
diff --git a/sys/arm64/arm64/freebsd32_machdep.c b/sys/arm64/arm64/freebsd32_machdep.c
index 85ed3b923bc0..9b62802efbc5 100644
--- a/sys/arm64/arm64/freebsd32_machdep.c
+++ b/sys/arm64/arm64/freebsd32_machdep.c
@@ -1,459 +1,459 @@
 /*-
  * Copyright (c) 2018 Olivier Houchard
  * Copyright (c) 2017 Nuxi, https://nuxi.nl/
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/exec.h>
 #include <sys/proc.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/syscallsubr.h>
 #include <sys/ktr.h>
 #include <sys/sysent.h>
 #include <sys/sysproto.h>
 #include <machine/armreg.h>
 #ifdef VFP
 #include <machine/vfp.h>
 #endif
 #include <compat/freebsd32/freebsd32_proto.h>
 #include <compat/freebsd32/freebsd32_signal.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 
 _Static_assert(sizeof(mcontext32_t) == 208, "mcontext32_t size incorrect");
 _Static_assert(sizeof(ucontext32_t) == 260, "ucontext32_t size incorrect");
 _Static_assert(sizeof(struct siginfo32) == 64, "struct siginfo32 size incorrect");
 
 extern void freebsd32_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask);
 
 /*
  * The first two fields of a ucontext_t are the signal mask and the machine
  * context.  The next field is uc_link; we want to avoid destroying the link
  * when copying out contexts.
  */
 #define UC32_COPY_SIZE  offsetof(ucontext32_t, uc_link)
 
 /*
  * Stubs for machine dependent 32-bits system calls.
  */
 
 int
 freebsd32_sysarch(struct thread *td, struct freebsd32_sysarch_args *uap)
 {
 	int error;
 
 #define ARM_SYNC_ICACHE		0
 #define ARM_DRAIN_WRITEBUF	1
 #define ARM_SET_TP		2
 #define ARM_GET_TP		3
 #define ARM_GET_VFPSTATE	4
 
 	switch(uap->op) {
 	case ARM_SET_TP:
 		WRITE_SPECIALREG(tpidr_el0, uap->parms);
 		WRITE_SPECIALREG(tpidrro_el0, uap->parms);
 		return 0;
 	case ARM_SYNC_ICACHE:
 		{
 			struct {
 				uint32_t addr;
 				uint32_t size;
 			} args;
 
 			if ((error = copyin(uap->parms, &args, sizeof(args))) != 0)
 				return (error);
 			if ((uint64_t)args.addr + (uint64_t)args.size > 0xffffffff)
 				return (EINVAL);
 			cpu_icache_sync_range_checked(args.addr, args.size);
 			return 0;
 		}
 	case ARM_GET_VFPSTATE:
 		{
 			mcontext32_vfp_t mcontext_vfp;
 
 			struct {
 				uint32_t mc_vfp_size;
 				uint32_t mc_vfp;
 			} args;
 			if ((error = copyin(uap->parms, &args, sizeof(args))) != 0)
 				return (error);
 			if (args.mc_vfp_size != sizeof(mcontext_vfp))
 				return (EINVAL);
 #ifdef VFP
 			get_fpcontext32(td, &mcontext_vfp);
 #else
 			bzero(&mcontext_vfp, sizeof(mcontext_vfp));
 #endif
 			error = copyout(&mcontext_vfp,
 				(void *)(uintptr_t)args.mc_vfp,
 				sizeof(mcontext_vfp));
 			return error;
 		}
 	}
 
 	return (EINVAL);
 }
 
 #ifdef VFP
 void
 get_fpcontext32(struct thread *td, mcontext32_vfp_t *mcp)
 {
 	struct pcb *pcb;
 	int i;
 
 	KASSERT(td == curthread || TD_IS_SUSPENDED(td) ||
 	    P_SHOULDSTOP(td->td_proc),
 	    ("not suspended thread %p", td));
 
 	memset(mcp, 0, sizeof(*mcp));
 	pcb = td->td_pcb;
 
 	if ((pcb->pcb_fpflags & PCB_FP_STARTED) != 0) {
 		/*
 		 * If we have just been running VFP instructions we will
 		 * need to save the state to memcpy it below.
 		 */
 		if (td == curthread)
 			vfp_save_state(td, pcb);
 
 		KASSERT(pcb->pcb_fpusaved == &pcb->pcb_fpustate,
 		    ("Called get_fpcontext32 while the kernel is using the VFP"));
 		KASSERT((pcb->pcb_fpflags & ~PCB_FP_USERMASK) == 0,
 		    ("Non-userspace FPU flags set in get_fpcontext32"));
 		for (i = 0; i < 32; i++)
 			mcp->mcv_reg[i] = (uint64_t)pcb->pcb_fpustate.vfp_regs[i];
 		mcp->mcv_fpscr = VFP_FPSCR_FROM_SRCR(pcb->pcb_fpustate.vfp_fpcr,
 		    pcb->pcb_fpustate.vfp_fpsr);
 	}
 }
 
 void
 set_fpcontext32(struct thread *td, mcontext32_vfp_t *mcp)
 {
 	struct pcb *pcb;
 	int i;
 
 	critical_enter();
 	pcb = td->td_pcb;
 	if (td == curthread)
 		vfp_discard(td);
 	for (i = 0; i < 32; i++)
 		pcb->pcb_fpustate.vfp_regs[i] = mcp->mcv_reg[i];
 	pcb->pcb_fpustate.vfp_fpsr = VFP_FPSR_FROM_FPSCR(mcp->mcv_fpscr);
 	pcb->pcb_fpustate.vfp_fpcr = VFP_FPSR_FROM_FPSCR(mcp->mcv_fpscr);
 	critical_exit();
 }
 #endif
 
 static void
 get_mcontext32(struct thread *td, mcontext32_t *mcp, int flags)
 {
 	struct trapframe *tf;
 	int i;
 
 	tf = td->td_frame;
 
 	if ((flags & GET_MC_CLEAR_RET) != 0) {
 		mcp->mc_gregset[0] = 0;
 		mcp->mc_gregset[16] = tf->tf_spsr & ~PSR_C;
 	} else {
 		mcp->mc_gregset[0] = tf->tf_x[0];
 		mcp->mc_gregset[16] = tf->tf_spsr;
 	}
 	for (i = 1; i < 15; i++)
 		mcp->mc_gregset[i] = tf->tf_x[i];
 	mcp->mc_gregset[15] = tf->tf_elr;
 
 	mcp->mc_vfp_size = 0;
 	mcp->mc_vfp_ptr = 0;
 
 	memset(mcp->mc_spare, 0, sizeof(mcp->mc_spare));
 }
 
 static int
 set_mcontext32(struct thread *td, mcontext32_t *mcp)
 {
 	struct trapframe *tf;
 	mcontext32_vfp_t mc_vfp;
 	uint32_t spsr;
 	int i;
 
 	tf = td->td_frame;
 
 	spsr = mcp->mc_gregset[16];
 	/*
 	 * There is no PSR_SS in the 32-bit kernel so ignore it if it's set
 	 * as we will set it later if needed.
 	 */
 	if ((spsr & ~(PSR_SETTABLE_32 | PSR_SS)) !=
 	    (tf->tf_spsr & ~(PSR_SETTABLE_32 | PSR_SS)))
 		return (EINVAL);
 
 	spsr &= PSR_SETTABLE_32;
 	spsr |= tf->tf_spsr & ~PSR_SETTABLE_32;
 
 	if ((td->td_dbgflags & TDB_STEP) != 0) {
 		spsr |= PSR_SS;
 		td->td_pcb->pcb_flags |= PCB_SINGLE_STEP;
 		WRITE_SPECIALREG(mdscr_el1,
 		    READ_SPECIALREG(mdscr_el1) | MDSCR_SS);
 	}
 
 	for (i = 0; i < 15; i++)
 		tf->tf_x[i] = mcp->mc_gregset[i];
 	tf->tf_elr = mcp->mc_gregset[15];
 	tf->tf_spsr = spsr;
 #ifdef VFP
 	if (mcp->mc_vfp_size == sizeof(mc_vfp) && mcp->mc_vfp_ptr != 0) {
 		if (copyin((void *)(uintptr_t)mcp->mc_vfp_ptr, &mc_vfp,
 					sizeof(mc_vfp)) != 0)
 			return (EFAULT);
 		set_fpcontext32(td, &mc_vfp);
 	}
 #endif
 
 	return (0);
 }
 
 #define UC_COPY_SIZE	offsetof(ucontext32_t, uc_link)
 
 int
 freebsd32_getcontext(struct thread *td, struct freebsd32_getcontext_args *uap)
 {
 	ucontext32_t uc;
 	int ret;
 
 	if (uap->ucp == NULL)
 		ret = EINVAL;
 	else {
 		memset(&uc, 0, sizeof(uc));
 		get_mcontext32(td, &uc.uc_mcontext, GET_MC_CLEAR_RET);
 		PROC_LOCK(td->td_proc);
 		uc.uc_sigmask = td->td_sigmask;
 		PROC_UNLOCK(td->td_proc);
 		ret = copyout(&uc, uap->ucp, UC_COPY_SIZE);
 	}
 	return (ret);
 }
 
 int
 freebsd32_setcontext(struct thread *td, struct freebsd32_setcontext_args *uap)
 {
 	ucontext32_t uc;
 	int ret;
 
 	if (uap->ucp == NULL)
 		ret = EINVAL;
 	else {
 		ret = copyin(uap->ucp, &uc, UC_COPY_SIZE);
 		if (ret == 0) {
 			ret = set_mcontext32(td, &uc.uc_mcontext);
 			if (ret == 0)
 				kern_sigprocmask(td, SIG_SETMASK, &uc.uc_sigmask,
 						NULL, 0);
 		}
 	}
 	return (ret);
 }
 
 int
 freebsd32_sigreturn(struct thread *td, struct freebsd32_sigreturn_args *uap)
 {
 	ucontext32_t uc;
 	int error;
 
 	if (uap == NULL)
 		return (EFAULT);
 	if (copyin(uap->sigcntxp, &uc, sizeof(uc)))
 		return (EFAULT);
 	error = set_mcontext32(td, &uc.uc_mcontext);
 	if (error != 0)
 		return (0);
 
 	/* Restore signal mask. */
 	kern_sigprocmask(td, SIG_SETMASK, &uc.uc_sigmask, NULL, 0);
 
 	return (EJUSTRETURN);
 
 }
 
 int
 freebsd32_swapcontext(struct thread *td, struct freebsd32_swapcontext_args *uap)
 {
 	ucontext32_t uc;
 	int ret;
 
 	if (uap->oucp == NULL || uap->ucp == NULL)
 		ret = EINVAL;
 	else {
 		bzero(&uc, sizeof(uc));
 		get_mcontext32(td, &uc.uc_mcontext, GET_MC_CLEAR_RET);
 		PROC_LOCK(td->td_proc);
 		uc.uc_sigmask = td->td_sigmask;
 		PROC_UNLOCK(td->td_proc);
 		ret = copyout(&uc, uap->oucp, UC32_COPY_SIZE);
 		if (ret == 0) {
 			ret = copyin(uap->ucp, &uc, UC32_COPY_SIZE);
 			if (ret == 0) {
 				ret = set_mcontext32(td, &uc.uc_mcontext);
 				kern_sigprocmask(td, SIG_SETMASK,
 						&uc.uc_sigmask, NULL, 0);
 			}
 		}
 	}
 	return (ret);
 }
 
 void
 freebsd32_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
 {
 	struct thread *td;
 	struct proc *p;
 	struct trapframe *tf;
 	struct sigframe32 *fp, frame;
 	struct sigacts *psp;
 	struct siginfo32 siginfo;
 	struct sysentvec *sysent;
 	int onstack;
 	int sig;
 
 	siginfo_to_siginfo32(&ksi->ksi_info, &siginfo);
 	td = curthread;
 	p = td->td_proc;
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	sig = ksi->ksi_signo;
 	psp = p->p_sigacts;
 	mtx_assert(&psp->ps_mtx, MA_OWNED);
 	tf = td->td_frame;
 	onstack = sigonstack(tf->tf_x[13]);
 
 	CTR4(KTR_SIG, "sendsig: td=%p (%s) catcher=%p sig=%d", td, p->p_comm,
 	    catcher, sig);
 
 	/* Allocate and validate space for the signal handler context. */
 	if ((td->td_pflags & TDP_ALTSTACK) != 0 && !(onstack) &&
 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
 		fp = (struct sigframe32 *)((uintptr_t)td->td_sigstk.ss_sp +
 		    td->td_sigstk.ss_size);
 #if defined(COMPAT_43)
 		td->td_sigstk.ss_flags |= SS_ONSTACK;
 #endif
 	} else
 		fp = (struct sigframe32 *)td->td_frame->tf_x[13];
 
 	/* make room on the stack */
 	fp--;
 
 	/* make the stack aligned */
 	fp = (struct sigframe32 *)((unsigned long)(fp) &~ (8 - 1));
 	/* Populate the siginfo frame. */
 	get_mcontext32(td, &frame.sf_uc.uc_mcontext, 0);
 #ifdef VFP
 	get_fpcontext32(td, &frame.sf_vfp);
 	frame.sf_uc.uc_mcontext.mc_vfp_size = sizeof(fp->sf_vfp);
 	frame.sf_uc.uc_mcontext.mc_vfp_ptr = (uint32_t)(uintptr_t)&fp->sf_vfp;
 #else
 	frame.sf_uc.uc_mcontext.mc_vfp_size = 0;
 	frame.sf_uc.uc_mcontext.mc_vfp_ptr = (uint32_t)NULL;
 #endif
 	frame.sf_si = siginfo;
 	frame.sf_uc.uc_sigmask = *mask;
 	frame.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK )
 	    ? ((onstack) ? SS_ONSTACK : 0) : SS_DISABLE;
 	frame.sf_uc.uc_stack.ss_sp = (uintptr_t)td->td_sigstk.ss_sp;
 	frame.sf_uc.uc_stack.ss_size = td->td_sigstk.ss_size;
 
 	mtx_unlock(&psp->ps_mtx);
 	PROC_UNLOCK(td->td_proc);
 
 	/* Copy the sigframe out to the user's stack. */
 	if (copyout(&frame, fp, sizeof(*fp)) != 0) {
 		/* Process has trashed its stack. Kill it. */
 		CTR2(KTR_SIG, "sendsig: sigexit td=%p fp=%p", td, fp);
 		PROC_LOCK(p);
 		sigexit(td, SIGILL);
 	}
 
 	/*
 	 * Build context to run handler in.  We invoke the handler
 	 * directly, only returning via the trampoline.  Note the
 	 * trampoline version numbers are coordinated with machine-
 	 * dependent code in libc.
 	 */
 
 	tf->tf_x[0] = sig;
 	tf->tf_x[1] = (register_t)&fp->sf_si;
 	tf->tf_x[2] = (register_t)&fp->sf_uc;
 
 	/* the trampoline uses r5 as the uc address */
 	tf->tf_x[5] = (register_t)&fp->sf_uc;
 	tf->tf_elr = (register_t)catcher;
 	tf->tf_x[13] = (register_t)fp;
 	sysent = p->p_sysent;
-	if (sysent->sv_sigcode_base != 0)
+	if (PROC_HAS_SHP(p))
 		tf->tf_x[14] = (register_t)PROC_SIGCODE(p);
 	else
 		tf->tf_x[14] = (register_t)(PROC_PS_STRINGS(p) -
 		    *(sysent->sv_szsigcode));
 	/* Set the mode to enter in the signal handler */
 	if ((register_t)catcher & 1)
 		tf->tf_spsr |= PSR_T;
 	else
 		tf->tf_spsr &= ~PSR_T;
 
 	/* Clear the single step flag while in the signal handler */
 	if ((td->td_pcb->pcb_flags & PCB_SINGLE_STEP) != 0) {
 		td->td_pcb->pcb_flags &= ~PCB_SINGLE_STEP;
 		WRITE_SPECIALREG(mdscr_el1,
 		    READ_SPECIALREG(mdscr_el1) & ~MDSCR_SS);
 		isb();
 	}
 
 	CTR3(KTR_SIG, "sendsig: return td=%p pc=%#x sp=%#x", td, tf->tf_x[14],
 	    tf->tf_x[13]);
 
 	PROC_LOCK(p);
 	mtx_lock(&psp->ps_mtx);
 
 }
 
 #ifdef COMPAT_43
 /*
  * Mirror the osigreturn definition in kern_sig.c for !i386 platforms. This
  * mirrors what's connected to the FreeBSD/arm syscall.
  */
 int
 ofreebsd32_sigreturn(struct thread *td, struct ofreebsd32_sigreturn_args *uap)
 {
 
 	return (nosys(td, (struct nosys_args *)uap));
 }
 #endif
diff --git a/sys/arm64/linux/linux_sysvec.c b/sys/arm64/linux/linux_sysvec.c
index 732eddf0c308..362917c3de31 100644
--- a/sys/arm64/linux/linux_sysvec.c
+++ b/sys/arm64/linux/linux_sysvec.c
@@ -1,811 +1,811 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 1994-1996 Søren Schmidt
  * Copyright (c) 2018 Turing Robotic Industries Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/cdefs.h>
 #include <sys/elf.h>
 #include <sys/exec.h>
 #include <sys/imgact.h>
 #include <sys/imgact_elf.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/lock.h>
 #include <sys/module.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/stddef.h>
 #include <sys/signalvar.h>
 #include <sys/syscallsubr.h>
 #include <sys/sysctl.h>
 #include <sys/sysent.h>
 
 #include <vm/vm.h>
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
 #include <vm/vm_param.h>
 
 #include <arm64/linux/linux.h>
 #include <arm64/linux/linux_proto.h>
 #include <compat/linux/linux_dtrace.h>
 #include <compat/linux/linux_emul.h>
 #include <compat/linux/linux_fork.h>
 #include <compat/linux/linux_ioctl.h>
 #include <compat/linux/linux_mib.h>
 #include <compat/linux/linux_misc.h>
 #include <compat/linux/linux_signal.h>
 #include <compat/linux/linux_util.h>
 #include <compat/linux/linux_vdso.h>
 
 #include <arm64/linux/linux_sigframe.h>
 
 #include <machine/md_var.h>
 
 #ifdef VFP
 #include <machine/vfp.h>
 #endif
 
 MODULE_VERSION(linux64elf, 1);
 
 #define	LINUX_VDSOPAGE_SIZE	PAGE_SIZE * 2
 #define	LINUX_VDSOPAGE		(VM_MAXUSER_ADDRESS - \
 				    LINUX_VDSOPAGE_SIZE)
 #define	LINUX_SHAREDPAGE	(LINUX_VDSOPAGE - PAGE_SIZE)
 				/*
 				 * PAGE_SIZE - the size
 				 * of the native SHAREDPAGE
 				 */
 #define	LINUX_USRSTACK		LINUX_SHAREDPAGE
 #define	LINUX_PS_STRINGS	(LINUX_USRSTACK - \
 				    sizeof(struct ps_strings))
 
 static int linux_szsigcode;
 static vm_object_t linux_vdso_obj;
 static char *linux_vdso_mapping;
 extern char _binary_linux_vdso_so_o_start;
 extern char _binary_linux_vdso_so_o_end;
 static vm_offset_t linux_vdso_base;
 
 extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
 
 SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler);
 
 static int	linux_copyout_strings(struct image_params *imgp,
 		    uintptr_t *stack_base);
 static int	linux_elf_fixup(uintptr_t *stack_base,
 		    struct image_params *iparams);
 static bool	linux_trans_osrel(const Elf_Note *note, int32_t *osrel);
 static void	linux_vdso_install(const void *param);
 static void	linux_vdso_deinstall(const void *param);
 static void	linux_vdso_reloc(char *mapping, Elf_Addr offset);
 static void	linux_set_syscall_retval(struct thread *td, int error);
 static int	linux_fetch_syscall_args(struct thread *td);
 static void	linux_exec_setregs(struct thread *td, struct image_params *imgp,
 		    uintptr_t stack);
 static void	linux_exec_sysvec_init(void *param);
 static int	linux_on_exec_vmspace(struct proc *p,
 		    struct image_params *imgp);
 
 /* DTrace init */
 LIN_SDT_PROVIDER_DECLARE(LINUX_DTRACE);
 
 /* DTrace probes */
 LIN_SDT_PROBE_DEFINE0(sysvec, linux_exec_setregs, todo);
 LIN_SDT_PROBE_DEFINE0(sysvec, linux_copyout_auxargs, todo);
 LIN_SDT_PROBE_DEFINE0(sysvec, linux_elf_fixup, todo);
 
 LINUX_VDSO_SYM_CHAR(linux_platform);
 LINUX_VDSO_SYM_INTPTR(kern_timekeep_base);
 LINUX_VDSO_SYM_INTPTR(linux_vdso_sigcode);
 
 static int
 linux_fetch_syscall_args(struct thread *td)
 {
 	struct proc *p;
 	struct syscall_args *sa;
 	register_t *ap;
 
 	p = td->td_proc;
 	ap = td->td_frame->tf_x;
 	sa = &td->td_sa;
 
 	sa->code = td->td_frame->tf_x[8];
 	sa->original_code = sa->code;
 	/* LINUXTODO: generic syscall? */
 	if (sa->code >= p->p_sysent->sv_size)
 		sa->callp = &p->p_sysent->sv_table[0];
 	else
 		sa->callp = &p->p_sysent->sv_table[sa->code];
 
 	if (sa->callp->sy_narg > nitems(sa->args))
 		panic("ARM64TODO: Could we have more than %zu args?",
 		    nitems(sa->args));
 	memcpy(sa->args, ap, nitems(sa->args) * sizeof(register_t));
 
 	td->td_retval[0] = 0;
 	return (0);
 }
 
 static void
 linux_set_syscall_retval(struct thread *td, int error)
 {
 
 	td->td_retval[1] = td->td_frame->tf_x[1];
 	cpu_set_syscall_retval(td, error);
 
 	if (__predict_false(error != 0)) {
 		if (error != ERESTART && error != EJUSTRETURN)
 			td->td_frame->tf_x[0] = bsd_to_linux_errno(error);
 	}
 }
 
 static int
 linux_copyout_auxargs(struct image_params *imgp, uintptr_t base)
 {
 	Elf_Auxargs *args;
 	Elf_Auxinfo *argarray, *pos;
 	struct proc *p;
 	int error, issetugid;
 
 	LIN_SDT_PROBE0(sysvec, linux_copyout_auxargs, todo);
 	p = imgp->proc;
 
 	args = (Elf64_Auxargs *)imgp->auxargs;
 	argarray = pos = malloc(LINUX_AT_COUNT * sizeof(*pos), M_TEMP,
 	    M_WAITOK | M_ZERO);
 
 	issetugid = p->p_flag & P_SUGID ? 1 : 0;
 	AUXARGS_ENTRY(pos, LINUX_AT_SYSINFO_EHDR, linux_vdso_base);
 	AUXARGS_ENTRY(pos, LINUX_AT_MINSIGSTKSZ, LINUX_MINSIGSTKSZ);
 	AUXARGS_ENTRY(pos, LINUX_AT_HWCAP, *imgp->sysent->sv_hwcap);
 	AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz);
 	AUXARGS_ENTRY(pos, LINUX_AT_CLKTCK, stclohz);
 	AUXARGS_ENTRY(pos, AT_PHDR, args->phdr);
 	AUXARGS_ENTRY(pos, AT_PHENT, args->phent);
 	AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum);
 	AUXARGS_ENTRY(pos, AT_BASE, args->base);
 	AUXARGS_ENTRY(pos, AT_FLAGS, args->flags);
 	AUXARGS_ENTRY(pos, AT_ENTRY, args->entry);
 	AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_ucred->cr_ruid);
 	AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid);
 	AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_ucred->cr_rgid);
 	AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid);
 	AUXARGS_ENTRY(pos, LINUX_AT_SECURE, issetugid);
 	AUXARGS_ENTRY_PTR(pos, LINUX_AT_RANDOM, imgp->canary);
 	AUXARGS_ENTRY(pos, LINUX_AT_HWCAP2, *imgp->sysent->sv_hwcap2);
 	if (imgp->execpathp != 0)
 		AUXARGS_ENTRY_PTR(pos, LINUX_AT_EXECFN, imgp->execpathp);
 	if (args->execfd != -1)
 		AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd);
 	AUXARGS_ENTRY(pos, LINUX_AT_PLATFORM, PTROUT(linux_platform));
 	AUXARGS_ENTRY(pos, AT_NULL, 0);
 
 	free(imgp->auxargs, M_TEMP);
 	imgp->auxargs = NULL;
 	KASSERT(pos - argarray <= LINUX_AT_COUNT, ("Too many auxargs"));
 
 	error = copyout(argarray, (void *)base,
 	    sizeof(*argarray) * LINUX_AT_COUNT);
 	free(argarray, M_TEMP);
 	return (error);
 }
 
 static int
 linux_elf_fixup(uintptr_t *stack_base, struct image_params *imgp)
 {
 
 	LIN_SDT_PROBE0(sysvec, linux_elf_fixup, todo);
 
 	return (0);
 }
 
 /*
  * Copy strings out to the new process address space, constructing new arg
  * and env vector tables. Return a pointer to the base so that it can be used
  * as the initial stack pointer.
  * LINUXTODO: deduplicate against other linuxulator archs
  */
 static int
 linux_copyout_strings(struct image_params *imgp, uintptr_t *stack_base)
 {
 	char **vectp;
 	char *stringp;
 	uintptr_t destp, ustringp;
 	struct ps_strings *arginfo;
 	char canary[LINUX_AT_RANDOM_LEN];
 	size_t execpath_len;
 	struct proc *p;
 	int argc, envc, error;
 
 	p = imgp->proc;
 	arginfo = (struct ps_strings *)PROC_PS_STRINGS(p);
 	destp = (uintptr_t)arginfo;
 
 	if (imgp->execpath != NULL && imgp->auxargs != NULL) {
 		execpath_len = strlen(imgp->execpath) + 1;
 		destp -= execpath_len;
 		destp = rounddown2(destp, sizeof(void *));
 		imgp->execpathp = (void *)destp;
 		error = copyout(imgp->execpath, imgp->execpathp, execpath_len);
 		if (error != 0)
 			return (error);
 	}
 
 	/* Prepare the canary for SSP. */
 	arc4rand(canary, sizeof(canary), 0);
 	destp -= roundup(sizeof(canary), sizeof(void *));
 	imgp->canary = (void *)destp;
 	error = copyout(canary, imgp->canary, sizeof(canary));
 	if (error != 0)
 		return (error);
 
 	/* Allocate room for the argument and environment strings. */
 	destp -= ARG_MAX - imgp->args->stringspace;
 	destp = rounddown2(destp, sizeof(void *));
 	ustringp = destp;
 
 	if (imgp->auxargs) {
 		/*
 		 * Allocate room on the stack for the ELF auxargs
 		 * array.  It has up to LINUX_AT_COUNT entries.
 		 */
 		destp -= LINUX_AT_COUNT * sizeof(Elf64_Auxinfo);
 		destp = rounddown2(destp, sizeof(void *));
 	}
 
 	vectp = (char **)destp;
 
 	/*
 	 * Allocate room for argc and the argv[] and env vectors including the
 	 * terminating NULL pointers.
 	 */
 	vectp -= 1 + imgp->args->argc + 1 + imgp->args->envc + 1;
 	vectp = (char **)STACKALIGN(vectp);
 
 	/* vectp also becomes our initial stack base. */
 	*stack_base = (uintptr_t)vectp;
 
 	stringp = imgp->args->begin_argv;
 	argc = imgp->args->argc;
 	envc = imgp->args->envc;
 
 	/* Copy out strings - arguments and environment. */
 	error = copyout(stringp, (void *)ustringp,
 	    ARG_MAX - imgp->args->stringspace);
 	if (error != 0)
 		return (error);
 
 	/* Fill in "ps_strings" struct for ps, w, etc. */
 	if (suword(&arginfo->ps_argvstr, (long)(intptr_t)vectp) != 0 ||
 	    suword(&arginfo->ps_nargvstr, argc) != 0)
 		return (EFAULT);
 
 	if (suword(vectp++, argc) != 0)
 		return (EFAULT);
 
 	/* Fill in argument portion of vector table. */
 	for (; argc > 0; --argc) {
 		if (suword(vectp++, ustringp) != 0)
 			return (EFAULT);
 		while (*stringp++ != 0)
 			ustringp++;
 		ustringp++;
 	}
 
 	/* A null vector table pointer separates the argp's from the envp's. */
 	if (suword(vectp++, 0) != 0)
 		return (EFAULT);
 
 	if (suword(&arginfo->ps_envstr, (long)(intptr_t)vectp) != 0 ||
 	    suword(&arginfo->ps_nenvstr, envc) != 0)
 		return (EFAULT);
 
 	/* Fill in environment portion of vector table. */
 	for (; envc > 0; --envc) {
 		if (suword(vectp++, ustringp) != 0)
 			return (EFAULT);
 		while (*stringp++ != 0)
 			ustringp++;
 		ustringp++;
 	}
 
 	/* The end of the vector table is a null pointer. */
 	if (suword(vectp, 0) != 0)
 		return (EFAULT);
 
 	if (imgp->auxargs) {
 		vectp++;
 		error = imgp->sysent->sv_copyout_auxargs(imgp,
 		    (uintptr_t)vectp);
 		if (error != 0)
 			return (error);
 	}
 
 	return (0);
 }
 
 /*
  * Reset registers to default values on exec.
  */
 static void
 linux_exec_setregs(struct thread *td, struct image_params *imgp,
     uintptr_t stack)
 {
 	struct trapframe *regs = td->td_frame;
 	struct pcb *pcb = td->td_pcb;
 
 	/* LINUXTODO: validate */
 	LIN_SDT_PROBE0(sysvec, linux_exec_setregs, todo);
 
 	memset(regs, 0, sizeof(*regs));
 	/* glibc start.S registers function pointer in x0 with atexit. */
         regs->tf_sp = stack;
 #if 0	/* LINUXTODO: See if this is used. */
 	regs->tf_lr = imgp->entry_addr;
 #else
         regs->tf_lr = 0xffffffffffffffff;
 #endif
         regs->tf_elr = imgp->entry_addr;
 
 	pcb->pcb_tpidr_el0 = 0;
 	pcb->pcb_tpidrro_el0 = 0;
 	WRITE_SPECIALREG(tpidrro_el0, 0);
 	WRITE_SPECIALREG(tpidr_el0, 0);
 
 #ifdef VFP
 	vfp_reset_state(td, pcb);
 #endif
 
 	/*
 	 * Clear debug register state. It is not applicable to the new process.
 	 */
 	bzero(&pcb->pcb_dbg_regs, sizeof(pcb->pcb_dbg_regs));
 }
 
 int
 linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args)
 {
 	struct l_sigframe *frame;
 	ucontext_t uc;
 	struct trapframe *tf;
 	int error;
 
 	tf = td->td_frame;
 	frame = (struct l_sigframe *)tf->tf_sp;
 
 	if (copyin((void *)&frame->uc, &uc, sizeof(uc)))
 		return (EFAULT);
 
 	error = set_mcontext(td, &uc.uc_mcontext);
 	if (error != 0)
 		return (error);
 
 	/* Restore signal mask. */
 	kern_sigprocmask(td, SIG_SETMASK, &uc.uc_sigmask, NULL, 0);
 
 	return (EJUSTRETURN);
 }
 
 static void
 linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
 {
 	struct thread *td;
 	struct proc *p;
 	struct trapframe *tf;
 	struct l_sigframe *fp, *frame;
 	struct l_fpsimd_context *fpsimd;
 	struct l_esr_context *esr;
 	l_stack_t uc_stack;
 	ucontext_t uc;
 	uint8_t *scr;
 	struct sigacts *psp;
 	int onstack, sig, issiginfo;
 
 	td = curthread;
 	p = td->td_proc;
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 
 	sig = ksi->ksi_signo;
 	psp = p->p_sigacts;
 	mtx_assert(&psp->ps_mtx, MA_OWNED);
 
 	tf = td->td_frame;
 	onstack = sigonstack(tf->tf_sp);
 	issiginfo = SIGISMEMBER(psp->ps_siginfo, sig);
 
 	CTR4(KTR_SIG, "sendsig: td=%p (%s) catcher=%p sig=%d", td, p->p_comm,
 	    catcher, sig);
 
 	/* Allocate and validate space for the signal handler context. */
 	if ((td->td_pflags & TDP_ALTSTACK) != 0 && !onstack &&
 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
 		fp = (struct l_sigframe *)((uintptr_t)td->td_sigstk.ss_sp +
 		    td->td_sigstk.ss_size);
 #if defined(COMPAT_43)
 		td->td_sigstk.ss_flags |= SS_ONSTACK;
 #endif
 	} else {
 		fp = (struct l_sigframe *)td->td_frame->tf_sp;
 	}
 
 	/* Make room, keeping the stack aligned */
 	fp--;
 	fp = (struct l_sigframe *)STACKALIGN(fp);
 
 	get_mcontext(td, &uc.uc_mcontext, 0);
 	uc.uc_sigmask = *mask;
 
 	uc_stack.ss_sp = PTROUT(td->td_sigstk.ss_sp);
 	uc_stack.ss_size = td->td_sigstk.ss_size;
 	uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK) != 0 ?
 	    (onstack ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE;
 	mtx_unlock(&psp->ps_mtx);
 	PROC_UNLOCK(td->td_proc);
 
 	/* Fill in the frame to copy out */
 	frame = malloc(sizeof(*frame), M_LINUX, M_WAITOK | M_ZERO);
 
 	memcpy(&frame->sf.sf_uc.uc_sc.regs, tf->tf_x, sizeof(tf->tf_x));
 	frame->sf.sf_uc.uc_sc.regs[30] = tf->tf_lr;
 	frame->sf.sf_uc.uc_sc.sp = tf->tf_sp;
 	frame->sf.sf_uc.uc_sc.pc = tf->tf_lr;
 	frame->sf.sf_uc.uc_sc.pstate = tf->tf_spsr;
 	frame->sf.sf_uc.uc_sc.fault_address = (register_t)ksi->ksi_addr;
 
 	/* Stack frame for unwinding */
 	frame->fp = tf->tf_x[29];
 	frame->lr = tf->tf_lr;
 
 	/* Translate the signal. */
 	sig = bsd_to_linux_signal(sig);
 	siginfo_to_lsiginfo(&ksi->ksi_info, &frame->sf.sf_si, sig);
 	bsd_to_linux_sigset(mask, &frame->sf.sf_uc.uc_sigmask);
 
 	/*
 	 * Prepare fpsimd & esr. Does not check sizes, as
 	 * __reserved is big enougth.
 	 */
 	scr = (uint8_t *)&frame->sf.sf_uc.uc_sc.__reserved;
 #ifdef VFP
 	fpsimd = (struct l_fpsimd_context *) scr;
 	fpsimd->head.magic = L_FPSIMD_MAGIC;
 	fpsimd->head.size = sizeof(struct l_fpsimd_context);
 	fpsimd->fpsr = uc.uc_mcontext.mc_fpregs.fp_sr;
 	fpsimd->fpcr = uc.uc_mcontext.mc_fpregs.fp_cr;
 
 	memcpy(fpsimd->vregs, &uc.uc_mcontext.mc_fpregs.fp_q,
 	    sizeof(uc.uc_mcontext.mc_fpregs.fp_q));
 	scr += roundup(sizeof(struct l_fpsimd_context), 16);
 #endif
 	if (ksi->ksi_addr != 0) {
 		esr = (struct l_esr_context *) scr;
 		esr->head.magic = L_ESR_MAGIC;
 		esr->head.size = sizeof(struct l_esr_context);
 		esr->esr = tf->tf_esr;
 	}
 
 	memcpy(&frame->sf.sf_uc.uc_stack, &uc_stack, sizeof(uc_stack));
 	memcpy(&frame->uc, &uc, sizeof(uc));
 
 	/* Copy the sigframe out to the user's stack. */
 	if (copyout(frame, fp, sizeof(*fp)) != 0) {
 		/* Process has trashed its stack. Kill it. */
 		free(frame, M_LINUX);
 		CTR2(KTR_SIG, "sendsig: sigexit td=%p fp=%p", td, fp);
 		PROC_LOCK(p);
 		sigexit(td, SIGILL);
 	}
 	free(frame, M_LINUX);
 
 	tf->tf_x[0]= sig;
 	if (issiginfo) {
 		tf->tf_x[1] = (register_t)&fp->sf.sf_si;
 		tf->tf_x[2] = (register_t)&fp->sf.sf_uc;
 	} else {
 		tf->tf_x[1] = 0;
 		tf->tf_x[2] = 0;
 	}
 	tf->tf_x[8] = (register_t)catcher;
 	tf->tf_sp = (register_t)fp;
 	tf->tf_elr = (register_t)linux_vdso_sigcode;
 
 	CTR3(KTR_SIG, "sendsig: return td=%p pc=%#x sp=%#x", td, tf->tf_elr,
 	    tf->tf_sp);
 
 	PROC_LOCK(p);
 	mtx_lock(&psp->ps_mtx);
 }
 
 struct sysentvec elf_linux_sysvec = {
 	.sv_size	= LINUX_SYS_MAXSYSCALL,
 	.sv_table	= linux_sysent,
 	.sv_fixup	= linux_elf_fixup,
 	.sv_sendsig	= linux_rt_sendsig,
 	.sv_sigcode	= &_binary_linux_vdso_so_o_start,
 	.sv_szsigcode	= &linux_szsigcode,
 	.sv_name	= "Linux ELF64",
 	.sv_coredump	= elf64_coredump,
 	.sv_elf_core_osabi = ELFOSABI_NONE,
 	.sv_elf_core_abi_vendor = LINUX_ABI_VENDOR,
 	.sv_elf_core_prepare_notes = linux64_prepare_notes,
 	.sv_imgact_try	= linux_exec_imgact_try,
 	.sv_minsigstksz	= LINUX_MINSIGSTKSZ,
 	.sv_minuser	= VM_MIN_ADDRESS,
 	.sv_maxuser	= VM_MAXUSER_ADDRESS,
 	.sv_usrstack	= LINUX_USRSTACK,
 	.sv_psstrings	= LINUX_PS_STRINGS,
 	.sv_psstringssz	= sizeof(struct ps_strings),
 	.sv_stackprot	= VM_PROT_READ | VM_PROT_WRITE,
 	.sv_copyout_auxargs = linux_copyout_auxargs,
 	.sv_copyout_strings = linux_copyout_strings,
 	.sv_setregs	= linux_exec_setregs,
 	.sv_fixlimit	= NULL,
 	.sv_maxssiz	= NULL,
 	.sv_flags	= SV_ABI_LINUX | SV_LP64 | SV_SHP | SV_SIG_DISCIGN |
 	    SV_SIG_WAITNDQ | SV_TIMEKEEP,
 	.sv_set_syscall_retval = linux_set_syscall_retval,
 	.sv_fetch_syscall_args = linux_fetch_syscall_args,
 	.sv_syscallnames = NULL,
 	.sv_shared_page_base = LINUX_SHAREDPAGE,
 	.sv_shared_page_len = PAGE_SIZE,
 	.sv_schedtail	= linux_schedtail,
 	.sv_thread_detach = linux_thread_detach,
 	.sv_trap	= NULL,
 	.sv_hwcap	= &elf_hwcap,
 	.sv_hwcap2	= &elf_hwcap2,
 	.sv_onexec	= linux_on_exec_vmspace,
 	.sv_onexit	= linux_on_exit,
 	.sv_ontdexit	= linux_thread_dtor,
 	.sv_setid_allowed = &linux_setid_allowed_query,
 };
 
 static int
 linux_on_exec_vmspace(struct proc *p, struct image_params *imgp)
 {
 	int error;
 
 	error = linux_map_vdso(p, linux_vdso_obj, linux_vdso_base,
 	    LINUX_VDSOPAGE_SIZE, imgp);
 	if (error == 0)
 		linux_on_exec(p, imgp);
 	return (error);
 }
 
 /*
  * linux_vdso_install() and linux_exec_sysvec_init() must be called
  * after exec_sysvec_init() which is SI_SUB_EXEC (SI_ORDER_ANY).
  */
 static void
 linux_exec_sysvec_init(void *param)
 {
 	l_uintptr_t *ktimekeep_base;
 	struct sysentvec *sv;
 	ptrdiff_t tkoff;
 
 	sv = param;
 	/* Fill timekeep_base */
 	exec_sysvec_init(sv);
 
 	tkoff = kern_timekeep_base - linux_vdso_base;
 	ktimekeep_base = (l_uintptr_t *)(linux_vdso_mapping + tkoff);
-	*ktimekeep_base = sv->sv_timekeep_base;
+	*ktimekeep_base = sv->sv_shared_page_base + sv->sv_timekeep_offset;
 }
 SYSINIT(elf_linux_exec_sysvec_init, SI_SUB_EXEC + 1, SI_ORDER_ANY,
     linux_exec_sysvec_init, &elf_linux_sysvec);
 
 static void
 linux_vdso_install(const void *param)
 {
 	char *vdso_start = &_binary_linux_vdso_so_o_start;
 	char *vdso_end = &_binary_linux_vdso_so_o_end;
 
 	linux_szsigcode = vdso_end - vdso_start;
 	MPASS(linux_szsigcode <= LINUX_VDSOPAGE_SIZE);
 
 	linux_vdso_base = LINUX_VDSOPAGE;
 
 	__elfN(linux_vdso_fixup)(vdso_start, linux_vdso_base);
 
 	linux_vdso_obj = __elfN(linux_shared_page_init)
 	    (&linux_vdso_mapping, LINUX_VDSOPAGE_SIZE);
 	bcopy(vdso_start, linux_vdso_mapping, linux_szsigcode);
 
 	linux_vdso_reloc(linux_vdso_mapping, linux_vdso_base);
 }
 SYSINIT(elf_linux_vdso_init, SI_SUB_EXEC + 1, SI_ORDER_FIRST,
     linux_vdso_install, NULL);
 
 static void
 linux_vdso_deinstall(const void *param)
 {
 
 	__elfN(linux_shared_page_fini)(linux_vdso_obj,
 	    linux_vdso_mapping, LINUX_VDSOPAGE_SIZE);
 }
 SYSUNINIT(elf_linux_vdso_uninit, SI_SUB_EXEC, SI_ORDER_FIRST,
     linux_vdso_deinstall, NULL);
 
 static void
 linux_vdso_reloc(char *mapping, Elf_Addr offset)
 {
 	Elf_Size rtype, symidx;
 	const Elf_Rela *rela;
 	const Elf_Shdr *shdr;
 	const Elf_Ehdr *ehdr;
 	Elf_Addr *where;
 	Elf_Addr addr, addend;
 	int i, relacnt;
 
 	MPASS(offset != 0);
 
 	relacnt = 0;
 	ehdr = (const Elf_Ehdr *)mapping;
 	shdr = (const Elf_Shdr *)(mapping + ehdr->e_shoff);
 	for (i = 0; i < ehdr->e_shnum; i++)
 	{
 		switch (shdr[i].sh_type) {
 		case SHT_REL:
 			printf("Linux Aarch64 vDSO: unexpected Rel section\n");
 			break;
 		case SHT_RELA:
 			rela = (const Elf_Rela *)(mapping + shdr[i].sh_offset);
 			relacnt = shdr[i].sh_size / sizeof(*rela);
 		}
 	}
 
 	for (i = 0; i < relacnt; i++, rela++) {
 		where = (Elf_Addr *)(mapping + rela->r_offset);
 		addend = rela->r_addend;
 		rtype = ELF_R_TYPE(rela->r_info);
 		symidx = ELF_R_SYM(rela->r_info);
 
 		switch (rtype) {
 		case R_AARCH64_NONE:	/* none */
 			break;
 
 		case R_AARCH64_RELATIVE:	/* B + A */
 			addr = (Elf_Addr)(mapping + addend);
 			if (*where != addr)
 				*where = addr;
 			break;
 		default:
 			printf("Linux Aarch64 vDSO: unexpected relocation type %ld, "
 			    "symbol index %ld\n", rtype, symidx);
 		}
 	}
 }
 
 static char GNU_ABI_VENDOR[] = "GNU";
 static int GNU_ABI_LINUX = 0;
 
 /* LINUXTODO: deduplicate */
 static bool
 linux_trans_osrel(const Elf_Note *note, int32_t *osrel)
 {
 	const Elf32_Word *desc;
 	uintptr_t p;
 
 	p = (uintptr_t)(note + 1);
 	p += roundup2(note->n_namesz, sizeof(Elf32_Addr));
 
 	desc = (const Elf32_Word *)p;
 	if (desc[0] != GNU_ABI_LINUX)
 		return (false);
 
 	*osrel = LINUX_KERNVER(desc[1], desc[2], desc[3]);
 	return (true);
 }
 
 static Elf_Brandnote linux64_brandnote = {
 	.hdr.n_namesz	= sizeof(GNU_ABI_VENDOR),
 	.hdr.n_descsz	= 16,
 	.hdr.n_type	= 1,
 	.vendor		= GNU_ABI_VENDOR,
 	.flags		= BN_TRANSLATE_OSREL,
 	.trans_osrel	= linux_trans_osrel
 };
 
 static Elf64_Brandinfo linux_glibc2brand = {
 	.brand		= ELFOSABI_LINUX,
 	.machine	= EM_AARCH64,
 	.compat_3_brand	= "Linux",
 	.emul_path	= linux_emul_path,
 	.interp_path	= "/lib64/ld-linux-x86-64.so.2",
 	.sysvec		= &elf_linux_sysvec,
 	.interp_newpath	= NULL,
 	.brand_note	= &linux64_brandnote,
 	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE
 };
 
 Elf64_Brandinfo *linux_brandlist[] = {
 	&linux_glibc2brand,
 	NULL
 };
 
 static int
 linux64_elf_modevent(module_t mod, int type, void *data)
 {
 	Elf64_Brandinfo **brandinfo;
 	struct linux_ioctl_handler**lihp;
 	int error;
 
 	error = 0;
 	switch(type) {
 	case MOD_LOAD:
 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
 		    ++brandinfo)
 			if (elf64_insert_brand_entry(*brandinfo) < 0)
 				error = EINVAL;
 		if (error == 0) {
 			SET_FOREACH(lihp, linux_ioctl_handler_set)
 				linux_ioctl_register_handler(*lihp);
 			stclohz = (stathz ? stathz : hz);
 			if (bootverbose)
 				printf("Linux arm64 ELF exec handler installed\n");
 		}
 		break;
 	case MOD_UNLOAD:
 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
 		    ++brandinfo)
 			if (elf64_brand_inuse(*brandinfo))
 				error = EBUSY;
 		if (error == 0) {
 			for (brandinfo = &linux_brandlist[0];
 			    *brandinfo != NULL; ++brandinfo)
 				if (elf64_remove_brand_entry(*brandinfo) < 0)
 					error = EINVAL;
 		}
 		if (error == 0) {
 			SET_FOREACH(lihp, linux_ioctl_handler_set)
 				linux_ioctl_unregister_handler(*lihp);
 			if (bootverbose)
 				printf("Linux arm64 ELF exec handler removed\n");
 		} else
 			printf("Could not deinstall Linux arm64 ELF interpreter entry\n");
 		break;
 	default:
 		return (EOPNOTSUPP);
 	}
 	return (error);
 }
 
 static moduledata_t linux64_elf_mod = {
 	"linux64elf",
 	linux64_elf_modevent,
 	0
 };
 
 DECLARE_MODULE_TIED(linux64elf, linux64_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);
 MODULE_DEPEND(linux64elf, linux_common, 1, 1, 1);
 FEATURE(linux64, "AArch64 Linux 64bit support");
diff --git a/sys/compat/freebsd32/freebsd32_misc.c b/sys/compat/freebsd32/freebsd32_misc.c
index 67dcaa35cae5..d1c44a4f9952 100644
--- a/sys/compat/freebsd32/freebsd32_misc.c
+++ b/sys/compat/freebsd32/freebsd32_misc.c
@@ -1,4086 +1,4086 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2002 Doug Rabson
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_ffclock.h"
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_ktrace.h"
 
 #define __ELF_WORD_SIZE 32
 
 #ifdef COMPAT_FREEBSD11
 #define	_WANT_FREEBSD11_KEVENT
 #endif
 
 #include <sys/param.h>
 #include <sys/bus.h>
 #include <sys/capsicum.h>
 #include <sys/clock.h>
 #include <sys/exec.h>
 #include <sys/fcntl.h>
 #include <sys/filedesc.h>
 #include <sys/imgact.h>
 #include <sys/jail.h>
 #include <sys/kernel.h>
 #include <sys/limits.h>
 #include <sys/linker.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/file.h>		/* Must come after sys/malloc.h */
 #include <sys/imgact.h>
 #include <sys/mbuf.h>
 #include <sys/mman.h>
 #include <sys/module.h>
 #include <sys/mount.h>
 #include <sys/mutex.h>
 #include <sys/namei.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/procctl.h>
 #include <sys/ptrace.h>
 #include <sys/reboot.h>
 #include <sys/resource.h>
 #include <sys/resourcevar.h>
 #include <sys/selinfo.h>
 #include <sys/eventvar.h>	/* Must come after sys/selinfo.h */
 #include <sys/pipe.h>		/* Must come after sys/selinfo.h */
 #include <sys/signal.h>
 #include <sys/signalvar.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/stat.h>
 #include <sys/syscall.h>
 #include <sys/syscallsubr.h>
 #include <sys/sysctl.h>
 #include <sys/sysent.h>
 #include <sys/sysproto.h>
 #include <sys/systm.h>
 #include <sys/thr.h>
 #include <sys/timex.h>
 #include <sys/unistd.h>
 #include <sys/ucontext.h>
 #include <sys/vnode.h>
 #include <sys/wait.h>
 #include <sys/ipc.h>
 #include <sys/msg.h>
 #include <sys/sem.h>
 #include <sys/shm.h>
 #include <sys/timeffc.h>
 #ifdef KTRACE
 #include <sys/ktrace.h>
 #endif
 
 #ifdef INET
 #include <netinet/in.h>
 #endif
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 #include <vm/vm_object.h>
 #include <vm/vm_extern.h>
 
 #include <machine/cpu.h>
 #include <machine/elf.h>
 #ifdef __amd64__
 #include <machine/md_var.h>
 #endif
 
 #include <security/audit/audit.h>
 
 #include <compat/freebsd32/freebsd32_util.h>
 #include <compat/freebsd32/freebsd32.h>
 #include <compat/freebsd32/freebsd32_ipc.h>
 #include <compat/freebsd32/freebsd32_misc.h>
 #include <compat/freebsd32/freebsd32_signal.h>
 #include <compat/freebsd32/freebsd32_proto.h>
 
 FEATURE(compat_freebsd_32bit, "Compatible with 32-bit FreeBSD");
 
 struct ptrace_io_desc32 {
 	int		piod_op;
 	uint32_t	piod_offs;
 	uint32_t	piod_addr;
 	uint32_t	piod_len;
 };
 
 struct ptrace_sc_ret32 {
 	uint32_t	sr_retval[2];
 	int		sr_error;
 };
 
 struct ptrace_vm_entry32 {
 	int		pve_entry;
 	int		pve_timestamp;
 	uint32_t	pve_start;
 	uint32_t	pve_end;
 	uint32_t	pve_offset;
 	u_int		pve_prot;
 	u_int		pve_pathlen;
 	int32_t		pve_fileid;
 	u_int		pve_fsid;
 	uint32_t	pve_path;
 };
 
 #ifdef __amd64__
 CTASSERT(sizeof(struct timeval32) == 8);
 CTASSERT(sizeof(struct timespec32) == 8);
 CTASSERT(sizeof(struct itimerval32) == 16);
 CTASSERT(sizeof(struct bintime32) == 12);
 #else
 CTASSERT(sizeof(struct timeval32) == 16);
 CTASSERT(sizeof(struct timespec32) == 16);
 CTASSERT(sizeof(struct itimerval32) == 32);
 CTASSERT(sizeof(struct bintime32) == 16);
 #endif
 CTASSERT(sizeof(struct ostatfs32) == 256);
 #ifdef __amd64__
 CTASSERT(sizeof(struct rusage32) == 72);
 #else
 CTASSERT(sizeof(struct rusage32) == 88);
 #endif
 CTASSERT(sizeof(struct sigaltstack32) == 12);
 #ifdef __amd64__
 CTASSERT(sizeof(struct kevent32) == 56);
 #else
 CTASSERT(sizeof(struct kevent32) == 64);
 #endif
 CTASSERT(sizeof(struct iovec32) == 8);
 CTASSERT(sizeof(struct msghdr32) == 28);
 #ifdef __amd64__
 CTASSERT(sizeof(struct stat32) == 208);
 CTASSERT(sizeof(struct freebsd11_stat32) == 96);
 #else
 CTASSERT(sizeof(struct stat32) == 224);
 CTASSERT(sizeof(struct freebsd11_stat32) == 120);
 #endif
 CTASSERT(sizeof(struct sigaction32) == 24);
 
 static int freebsd32_kevent_copyout(void *arg, struct kevent *kevp, int count);
 static int freebsd32_kevent_copyin(void *arg, struct kevent *kevp, int count);
 static int freebsd32_user_clock_nanosleep(struct thread *td, clockid_t clock_id,
     int flags, const struct timespec32 *ua_rqtp, struct timespec32 *ua_rmtp);
 
 void
 freebsd32_rusage_out(const struct rusage *s, struct rusage32 *s32)
 {
 
 	TV_CP(*s, *s32, ru_utime);
 	TV_CP(*s, *s32, ru_stime);
 	CP(*s, *s32, ru_maxrss);
 	CP(*s, *s32, ru_ixrss);
 	CP(*s, *s32, ru_idrss);
 	CP(*s, *s32, ru_isrss);
 	CP(*s, *s32, ru_minflt);
 	CP(*s, *s32, ru_majflt);
 	CP(*s, *s32, ru_nswap);
 	CP(*s, *s32, ru_inblock);
 	CP(*s, *s32, ru_oublock);
 	CP(*s, *s32, ru_msgsnd);
 	CP(*s, *s32, ru_msgrcv);
 	CP(*s, *s32, ru_nsignals);
 	CP(*s, *s32, ru_nvcsw);
 	CP(*s, *s32, ru_nivcsw);
 }
 
 int
 freebsd32_wait4(struct thread *td, struct freebsd32_wait4_args *uap)
 {
 	int error, status;
 	struct rusage32 ru32;
 	struct rusage ru, *rup;
 
 	if (uap->rusage != NULL)
 		rup = &ru;
 	else
 		rup = NULL;
 	error = kern_wait(td, uap->pid, &status, uap->options, rup);
 	if (error)
 		return (error);
 	if (uap->status != NULL)
 		error = copyout(&status, uap->status, sizeof(status));
 	if (uap->rusage != NULL && error == 0) {
 		freebsd32_rusage_out(&ru, &ru32);
 		error = copyout(&ru32, uap->rusage, sizeof(ru32));
 	}
 	return (error);
 }
 
 int
 freebsd32_wait6(struct thread *td, struct freebsd32_wait6_args *uap)
 {
 	struct __wrusage32 wru32;
 	struct __wrusage wru, *wrup;
 	struct siginfo32 si32;
 	struct __siginfo si, *sip;
 	int error, status;
 
 	if (uap->wrusage != NULL)
 		wrup = &wru;
 	else
 		wrup = NULL;
 	if (uap->info != NULL) {
 		sip = &si;
 		bzero(sip, sizeof(*sip));
 	} else
 		sip = NULL;
 	error = kern_wait6(td, uap->idtype, PAIR32TO64(id_t, uap->id),
 	    &status, uap->options, wrup, sip);
 	if (error != 0)
 		return (error);
 	if (uap->status != NULL)
 		error = copyout(&status, uap->status, sizeof(status));
 	if (uap->wrusage != NULL && error == 0) {
 		freebsd32_rusage_out(&wru.wru_self, &wru32.wru_self);
 		freebsd32_rusage_out(&wru.wru_children, &wru32.wru_children);
 		error = copyout(&wru32, uap->wrusage, sizeof(wru32));
 	}
 	if (uap->info != NULL && error == 0) {
 		siginfo_to_siginfo32 (&si, &si32);
 		error = copyout(&si32, uap->info, sizeof(si32));
 	}
 	return (error);
 }
 
 #ifdef COMPAT_FREEBSD4
 static void
 copy_statfs(struct statfs *in, struct ostatfs32 *out)
 {
 
 	statfs_scale_blocks(in, INT32_MAX);
 	bzero(out, sizeof(*out));
 	CP(*in, *out, f_bsize);
 	out->f_iosize = MIN(in->f_iosize, INT32_MAX);
 	CP(*in, *out, f_blocks);
 	CP(*in, *out, f_bfree);
 	CP(*in, *out, f_bavail);
 	out->f_files = MIN(in->f_files, INT32_MAX);
 	out->f_ffree = MIN(in->f_ffree, INT32_MAX);
 	CP(*in, *out, f_fsid);
 	CP(*in, *out, f_owner);
 	CP(*in, *out, f_type);
 	CP(*in, *out, f_flags);
 	out->f_syncwrites = MIN(in->f_syncwrites, INT32_MAX);
 	out->f_asyncwrites = MIN(in->f_asyncwrites, INT32_MAX);
 	strlcpy(out->f_fstypename,
 	      in->f_fstypename, MFSNAMELEN);
 	strlcpy(out->f_mntonname,
 	      in->f_mntonname, min(MNAMELEN, FREEBSD4_OMNAMELEN));
 	out->f_syncreads = MIN(in->f_syncreads, INT32_MAX);
 	out->f_asyncreads = MIN(in->f_asyncreads, INT32_MAX);
 	strlcpy(out->f_mntfromname,
 	      in->f_mntfromname, min(MNAMELEN, FREEBSD4_OMNAMELEN));
 }
 #endif
 
 int
 freebsd32_getfsstat(struct thread *td, struct freebsd32_getfsstat_args *uap)
 {
 	size_t count;
 	int error;
 
 	if (uap->bufsize < 0 || uap->bufsize > SIZE_MAX)
 		return (EINVAL);
 	error = kern_getfsstat(td, &uap->buf, uap->bufsize, &count,
 	    UIO_USERSPACE, uap->mode);
 	if (error == 0)
 		td->td_retval[0] = count;
 	return (error);
 }
 
 #ifdef COMPAT_FREEBSD4
 int
 freebsd4_freebsd32_getfsstat(struct thread *td,
     struct freebsd4_freebsd32_getfsstat_args *uap)
 {
 	struct statfs *buf, *sp;
 	struct ostatfs32 stat32;
 	size_t count, size, copycount;
 	int error;
 
 	count = uap->bufsize / sizeof(struct ostatfs32);
 	size = count * sizeof(struct statfs);
 	error = kern_getfsstat(td, &buf, size, &count, UIO_SYSSPACE, uap->mode);
 	if (size > 0) {
 		sp = buf;
 		copycount = count;
 		while (copycount > 0 && error == 0) {
 			copy_statfs(sp, &stat32);
 			error = copyout(&stat32, uap->buf, sizeof(stat32));
 			sp++;
 			uap->buf++;
 			copycount--;
 		}
 		free(buf, M_STATFS);
 	}
 	if (error == 0)
 		td->td_retval[0] = count;
 	return (error);
 }
 #endif
 
 #ifdef COMPAT_FREEBSD11
 int
 freebsd11_freebsd32_getfsstat(struct thread *td,
     struct freebsd11_freebsd32_getfsstat_args *uap)
 {
 	return(kern_freebsd11_getfsstat(td, uap->buf, uap->bufsize,
 	    uap->mode));
 }
 #endif
 
 int
 freebsd32_sigaltstack(struct thread *td,
 		      struct freebsd32_sigaltstack_args *uap)
 {
 	struct sigaltstack32 s32;
 	struct sigaltstack ss, oss, *ssp;
 	int error;
 
 	if (uap->ss != NULL) {
 		error = copyin(uap->ss, &s32, sizeof(s32));
 		if (error)
 			return (error);
 		PTRIN_CP(s32, ss, ss_sp);
 		CP(s32, ss, ss_size);
 		CP(s32, ss, ss_flags);
 		ssp = &ss;
 	} else
 		ssp = NULL;
 	error = kern_sigaltstack(td, ssp, &oss);
 	if (error == 0 && uap->oss != NULL) {
 		PTROUT_CP(oss, s32, ss_sp);
 		CP(oss, s32, ss_size);
 		CP(oss, s32, ss_flags);
 		error = copyout(&s32, uap->oss, sizeof(s32));
 	}
 	return (error);
 }
 
 /*
  * Custom version of exec_copyin_args() so that we can translate
  * the pointers.
  */
 int
 freebsd32_exec_copyin_args(struct image_args *args, const char *fname,
     enum uio_seg segflg, uint32_t *argv, uint32_t *envv)
 {
 	char *argp, *envp;
 	uint32_t *p32, arg;
 	int error;
 
 	bzero(args, sizeof(*args));
 	if (argv == NULL)
 		return (EFAULT);
 
 	/*
 	 * Allocate demand-paged memory for the file name, argument, and
 	 * environment strings.
 	 */
 	error = exec_alloc_args(args);
 	if (error != 0)
 		return (error);
 
 	/*
 	 * Copy the file name.
 	 */
 	error = exec_args_add_fname(args, fname, segflg);
 	if (error != 0)
 		goto err_exit;
 
 	/*
 	 * extract arguments first
 	 */
 	p32 = argv;
 	for (;;) {
 		error = copyin(p32++, &arg, sizeof(arg));
 		if (error)
 			goto err_exit;
 		if (arg == 0)
 			break;
 		argp = PTRIN(arg);
 		error = exec_args_add_arg(args, argp, UIO_USERSPACE);
 		if (error != 0)
 			goto err_exit;
 	}
 
 	/*
 	 * extract environment strings
 	 */
 	if (envv) {
 		p32 = envv;
 		for (;;) {
 			error = copyin(p32++, &arg, sizeof(arg));
 			if (error)
 				goto err_exit;
 			if (arg == 0)
 				break;
 			envp = PTRIN(arg);
 			error = exec_args_add_env(args, envp, UIO_USERSPACE);
 			if (error != 0)
 				goto err_exit;
 		}
 	}
 
 	return (0);
 
 err_exit:
 	exec_free_args(args);
 	return (error);
 }
 
 int
 freebsd32_execve(struct thread *td, struct freebsd32_execve_args *uap)
 {
 	struct image_args eargs;
 	struct vmspace *oldvmspace;
 	int error;
 
 	error = pre_execve(td, &oldvmspace);
 	if (error != 0)
 		return (error);
 	error = freebsd32_exec_copyin_args(&eargs, uap->fname, UIO_USERSPACE,
 	    uap->argv, uap->envv);
 	if (error == 0)
 		error = kern_execve(td, &eargs, NULL, oldvmspace);
 	post_execve(td, error, oldvmspace);
 	AUDIT_SYSCALL_EXIT(error == EJUSTRETURN ? 0 : error, td);
 	return (error);
 }
 
 int
 freebsd32_fexecve(struct thread *td, struct freebsd32_fexecve_args *uap)
 {
 	struct image_args eargs;
 	struct vmspace *oldvmspace;
 	int error;
 
 	error = pre_execve(td, &oldvmspace);
 	if (error != 0)
 		return (error);
 	error = freebsd32_exec_copyin_args(&eargs, NULL, UIO_SYSSPACE,
 	    uap->argv, uap->envv);
 	if (error == 0) {
 		eargs.fd = uap->fd;
 		error = kern_execve(td, &eargs, NULL, oldvmspace);
 	}
 	post_execve(td, error, oldvmspace);
 	AUDIT_SYSCALL_EXIT(error == EJUSTRETURN ? 0 : error, td);
 	return (error);
 }
 
 int
 freebsd32_mknodat(struct thread *td, struct freebsd32_mknodat_args *uap)
 {
 
 	return (kern_mknodat(td, uap->fd, uap->path, UIO_USERSPACE,
 	    uap->mode, PAIR32TO64(dev_t, uap->dev)));
 }
 
 int
 freebsd32_mprotect(struct thread *td, struct freebsd32_mprotect_args *uap)
 {
 	int prot;
 
 	prot = uap->prot;
 #if defined(__amd64__)
 	if (i386_read_exec && (prot & PROT_READ) != 0)
 		prot |= PROT_EXEC;
 #endif
 	return (kern_mprotect(td, (uintptr_t)PTRIN(uap->addr), uap->len,
 	    prot));
 }
 
 int
 freebsd32_mmap(struct thread *td, struct freebsd32_mmap_args *uap)
 {
 	int prot;
 
 	prot = uap->prot;
 #if defined(__amd64__)
 	if (i386_read_exec && (prot & PROT_READ))
 		prot |= PROT_EXEC;
 #endif
 
 	return (kern_mmap(td, &(struct mmap_req){
 		.mr_hint = (uintptr_t)uap->addr,
 		.mr_len = uap->len,
 		.mr_prot = prot,
 		.mr_flags = uap->flags,
 		.mr_fd = uap->fd,
 		.mr_pos = PAIR32TO64(off_t, uap->pos),
 	    }));
 }
 
 #ifdef COMPAT_FREEBSD6
 int
 freebsd6_freebsd32_mmap(struct thread *td,
     struct freebsd6_freebsd32_mmap_args *uap)
 {
 	int prot;
 
 	prot = uap->prot;
 #if defined(__amd64__)
 	if (i386_read_exec && (prot & PROT_READ))
 		prot |= PROT_EXEC;
 #endif
 
 	return (kern_mmap(td, &(struct mmap_req){
 		.mr_hint = (uintptr_t)uap->addr,
 		.mr_len = uap->len,
 		.mr_prot = prot,
 		.mr_flags = uap->flags,
 		.mr_fd = uap->fd,
 		.mr_pos = PAIR32TO64(off_t, uap->pos),
 	    }));
 }
 #endif
 
 #ifdef COMPAT_43
 int
 ofreebsd32_mmap(struct thread *td, struct ofreebsd32_mmap_args *uap)
 {
 	return (kern_ommap(td, (uintptr_t)uap->addr, uap->len, uap->prot,
 	    uap->flags, uap->fd, uap->pos));
 }
 #endif
 
 int
 freebsd32_setitimer(struct thread *td, struct freebsd32_setitimer_args *uap)
 {
 	struct itimerval itv, oitv, *itvp;	
 	struct itimerval32 i32;
 	int error;
 
 	if (uap->itv != NULL) {
 		error = copyin(uap->itv, &i32, sizeof(i32));
 		if (error)
 			return (error);
 		TV_CP(i32, itv, it_interval);
 		TV_CP(i32, itv, it_value);
 		itvp = &itv;
 	} else
 		itvp = NULL;
 	error = kern_setitimer(td, uap->which, itvp, &oitv);
 	if (error || uap->oitv == NULL)
 		return (error);
 	TV_CP(oitv, i32, it_interval);
 	TV_CP(oitv, i32, it_value);
 	return (copyout(&i32, uap->oitv, sizeof(i32)));
 }
 
 int
 freebsd32_getitimer(struct thread *td, struct freebsd32_getitimer_args *uap)
 {
 	struct itimerval itv;
 	struct itimerval32 i32;
 	int error;
 
 	error = kern_getitimer(td, uap->which, &itv);
 	if (error || uap->itv == NULL)
 		return (error);
 	TV_CP(itv, i32, it_interval);
 	TV_CP(itv, i32, it_value);
 	return (copyout(&i32, uap->itv, sizeof(i32)));
 }
 
 int
 freebsd32_select(struct thread *td, struct freebsd32_select_args *uap)
 {
 	struct timeval32 tv32;
 	struct timeval tv, *tvp;
 	int error;
 
 	if (uap->tv != NULL) {
 		error = copyin(uap->tv, &tv32, sizeof(tv32));
 		if (error)
 			return (error);
 		CP(tv32, tv, tv_sec);
 		CP(tv32, tv, tv_usec);
 		tvp = &tv;
 	} else
 		tvp = NULL;
 	/*
 	 * XXX Do pointers need PTRIN()?
 	 */
 	return (kern_select(td, uap->nd, uap->in, uap->ou, uap->ex, tvp,
 	    sizeof(int32_t) * 8));
 }
 
 int
 freebsd32_pselect(struct thread *td, struct freebsd32_pselect_args *uap)
 {
 	struct timespec32 ts32;
 	struct timespec ts;
 	struct timeval tv, *tvp;
 	sigset_t set, *uset;
 	int error;
 
 	if (uap->ts != NULL) {
 		error = copyin(uap->ts, &ts32, sizeof(ts32));
 		if (error != 0)
 			return (error);
 		CP(ts32, ts, tv_sec);
 		CP(ts32, ts, tv_nsec);
 		TIMESPEC_TO_TIMEVAL(&tv, &ts);
 		tvp = &tv;
 	} else
 		tvp = NULL;
 	if (uap->sm != NULL) {
 		error = copyin(uap->sm, &set, sizeof(set));
 		if (error != 0)
 			return (error);
 		uset = &set;
 	} else
 		uset = NULL;
 	/*
 	 * XXX Do pointers need PTRIN()?
 	 */
 	error = kern_pselect(td, uap->nd, uap->in, uap->ou, uap->ex, tvp,
 	    uset, sizeof(int32_t) * 8);
 	return (error);
 }
 
 /*
  * Copy 'count' items into the destination list pointed to by uap->eventlist.
  */
 static int
 freebsd32_kevent_copyout(void *arg, struct kevent *kevp, int count)
 {
 	struct freebsd32_kevent_args *uap;
 	struct kevent32	ks32[KQ_NEVENTS];
 	uint64_t e;
 	int i, j, error;
 
 	KASSERT(count <= KQ_NEVENTS, ("count (%d) > KQ_NEVENTS", count));
 	uap = (struct freebsd32_kevent_args *)arg;
 
 	for (i = 0; i < count; i++) {
 		CP(kevp[i], ks32[i], ident);
 		CP(kevp[i], ks32[i], filter);
 		CP(kevp[i], ks32[i], flags);
 		CP(kevp[i], ks32[i], fflags);
 #if BYTE_ORDER == LITTLE_ENDIAN
 		ks32[i].data1 = kevp[i].data;
 		ks32[i].data2 = kevp[i].data >> 32;
 #else
 		ks32[i].data1 = kevp[i].data >> 32;
 		ks32[i].data2 = kevp[i].data;
 #endif
 		PTROUT_CP(kevp[i], ks32[i], udata);
 		for (j = 0; j < nitems(kevp->ext); j++) {
 			e = kevp[i].ext[j];
 #if BYTE_ORDER == LITTLE_ENDIAN
 			ks32[i].ext64[2 * j] = e;
 			ks32[i].ext64[2 * j + 1] = e >> 32;
 #else
 			ks32[i].ext64[2 * j] = e >> 32;
 			ks32[i].ext64[2 * j + 1] = e;
 #endif
 		}
 	}
 	error = copyout(ks32, uap->eventlist, count * sizeof *ks32);
 	if (error == 0)
 		uap->eventlist += count;
 	return (error);
 }
 
 /*
  * Copy 'count' items from the list pointed to by uap->changelist.
  */
 static int
 freebsd32_kevent_copyin(void *arg, struct kevent *kevp, int count)
 {
 	struct freebsd32_kevent_args *uap;
 	struct kevent32	ks32[KQ_NEVENTS];
 	uint64_t e;
 	int i, j, error;
 
 	KASSERT(count <= KQ_NEVENTS, ("count (%d) > KQ_NEVENTS", count));
 	uap = (struct freebsd32_kevent_args *)arg;
 
 	error = copyin(uap->changelist, ks32, count * sizeof *ks32);
 	if (error)
 		goto done;
 	uap->changelist += count;
 
 	for (i = 0; i < count; i++) {
 		CP(ks32[i], kevp[i], ident);
 		CP(ks32[i], kevp[i], filter);
 		CP(ks32[i], kevp[i], flags);
 		CP(ks32[i], kevp[i], fflags);
 		kevp[i].data = PAIR32TO64(uint64_t, ks32[i].data);
 		PTRIN_CP(ks32[i], kevp[i], udata);
 		for (j = 0; j < nitems(kevp->ext); j++) {
 #if BYTE_ORDER == LITTLE_ENDIAN
 			e = ks32[i].ext64[2 * j + 1];
 			e <<= 32;
 			e += ks32[i].ext64[2 * j];
 #else
 			e = ks32[i].ext64[2 * j];
 			e <<= 32;
 			e += ks32[i].ext64[2 * j + 1];
 #endif
 			kevp[i].ext[j] = e;
 		}
 	}
 done:
 	return (error);
 }
 
 int
 freebsd32_kevent(struct thread *td, struct freebsd32_kevent_args *uap)
 {
 	struct timespec32 ts32;
 	struct timespec ts, *tsp;
 	struct kevent_copyops k_ops = {
 		.arg = uap,
 		.k_copyout = freebsd32_kevent_copyout,
 		.k_copyin = freebsd32_kevent_copyin,
 	};
 #ifdef KTRACE
 	struct kevent32 *eventlist = uap->eventlist;
 #endif
 	int error;
 
 	if (uap->timeout) {
 		error = copyin(uap->timeout, &ts32, sizeof(ts32));
 		if (error)
 			return (error);
 		CP(ts32, ts, tv_sec);
 		CP(ts32, ts, tv_nsec);
 		tsp = &ts;
 	} else
 		tsp = NULL;
 #ifdef KTRACE
 	if (KTRPOINT(td, KTR_STRUCT_ARRAY))
 		ktrstructarray("kevent32", UIO_USERSPACE, uap->changelist,
 		    uap->nchanges, sizeof(struct kevent32));
 #endif
 	error = kern_kevent(td, uap->fd, uap->nchanges, uap->nevents,
 	    &k_ops, tsp);
 #ifdef KTRACE
 	if (error == 0 && KTRPOINT(td, KTR_STRUCT_ARRAY))
 		ktrstructarray("kevent32", UIO_USERSPACE, eventlist,
 		    td->td_retval[0], sizeof(struct kevent32));
 #endif
 	return (error);
 }
 
 #ifdef COMPAT_FREEBSD11
 static int
 freebsd32_kevent11_copyout(void *arg, struct kevent *kevp, int count)
 {
 	struct freebsd11_freebsd32_kevent_args *uap;
 	struct freebsd11_kevent32 ks32[KQ_NEVENTS];
 	int i, error;
 
 	KASSERT(count <= KQ_NEVENTS, ("count (%d) > KQ_NEVENTS", count));
 	uap = (struct freebsd11_freebsd32_kevent_args *)arg;
 
 	for (i = 0; i < count; i++) {
 		CP(kevp[i], ks32[i], ident);
 		CP(kevp[i], ks32[i], filter);
 		CP(kevp[i], ks32[i], flags);
 		CP(kevp[i], ks32[i], fflags);
 		CP(kevp[i], ks32[i], data);
 		PTROUT_CP(kevp[i], ks32[i], udata);
 	}
 	error = copyout(ks32, uap->eventlist, count * sizeof *ks32);
 	if (error == 0)
 		uap->eventlist += count;
 	return (error);
 }
 
 /*
  * Copy 'count' items from the list pointed to by uap->changelist.
  */
 static int
 freebsd32_kevent11_copyin(void *arg, struct kevent *kevp, int count)
 {
 	struct freebsd11_freebsd32_kevent_args *uap;
 	struct freebsd11_kevent32 ks32[KQ_NEVENTS];
 	int i, j, error;
 
 	KASSERT(count <= KQ_NEVENTS, ("count (%d) > KQ_NEVENTS", count));
 	uap = (struct freebsd11_freebsd32_kevent_args *)arg;
 
 	error = copyin(uap->changelist, ks32, count * sizeof *ks32);
 	if (error)
 		goto done;
 	uap->changelist += count;
 
 	for (i = 0; i < count; i++) {
 		CP(ks32[i], kevp[i], ident);
 		CP(ks32[i], kevp[i], filter);
 		CP(ks32[i], kevp[i], flags);
 		CP(ks32[i], kevp[i], fflags);
 		CP(ks32[i], kevp[i], data);
 		PTRIN_CP(ks32[i], kevp[i], udata);
 		for (j = 0; j < nitems(kevp->ext); j++)
 			kevp[i].ext[j] = 0;
 	}
 done:
 	return (error);
 }
 
 int
 freebsd11_freebsd32_kevent(struct thread *td,
     struct freebsd11_freebsd32_kevent_args *uap)
 {
 	struct timespec32 ts32;
 	struct timespec ts, *tsp;
 	struct kevent_copyops k_ops = {
 		.arg = uap,
 		.k_copyout = freebsd32_kevent11_copyout,
 		.k_copyin = freebsd32_kevent11_copyin,
 	};
 #ifdef KTRACE
 	struct freebsd11_kevent32 *eventlist = uap->eventlist;
 #endif
 	int error;
 
 	if (uap->timeout) {
 		error = copyin(uap->timeout, &ts32, sizeof(ts32));
 		if (error)
 			return (error);
 		CP(ts32, ts, tv_sec);
 		CP(ts32, ts, tv_nsec);
 		tsp = &ts;
 	} else
 		tsp = NULL;
 #ifdef KTRACE
 	if (KTRPOINT(td, KTR_STRUCT_ARRAY))
 		ktrstructarray("freebsd11_kevent32", UIO_USERSPACE,
 		    uap->changelist, uap->nchanges,
 		    sizeof(struct freebsd11_kevent32));
 #endif
 	error = kern_kevent(td, uap->fd, uap->nchanges, uap->nevents,
 	    &k_ops, tsp);
 #ifdef KTRACE
 	if (error == 0 && KTRPOINT(td, KTR_STRUCT_ARRAY))
 		ktrstructarray("freebsd11_kevent32", UIO_USERSPACE,
 		    eventlist, td->td_retval[0],
 		    sizeof(struct freebsd11_kevent32));
 #endif
 	return (error);
 }
 #endif
 
 int
 freebsd32_gettimeofday(struct thread *td,
 		       struct freebsd32_gettimeofday_args *uap)
 {
 	struct timeval atv;
 	struct timeval32 atv32;
 	struct timezone rtz;
 	int error = 0;
 
 	if (uap->tp) {
 		microtime(&atv);
 		CP(atv, atv32, tv_sec);
 		CP(atv, atv32, tv_usec);
 		error = copyout(&atv32, uap->tp, sizeof (atv32));
 	}
 	if (error == 0 && uap->tzp != NULL) {
 		rtz.tz_minuteswest = 0;
 		rtz.tz_dsttime = 0;
 		error = copyout(&rtz, uap->tzp, sizeof (rtz));
 	}
 	return (error);
 }
 
 int
 freebsd32_getrusage(struct thread *td, struct freebsd32_getrusage_args *uap)
 {
 	struct rusage32 s32;
 	struct rusage s;
 	int error;
 
 	error = kern_getrusage(td, uap->who, &s);
 	if (error == 0) {
 		freebsd32_rusage_out(&s, &s32);
 		error = copyout(&s32, uap->rusage, sizeof(s32));
 	}
 	return (error);
 }
 
 static void
 ptrace_lwpinfo_to32(const struct ptrace_lwpinfo *pl,
     struct ptrace_lwpinfo32 *pl32)
 {
 
 	bzero(pl32, sizeof(*pl32));
 	pl32->pl_lwpid = pl->pl_lwpid;
 	pl32->pl_event = pl->pl_event;
 	pl32->pl_flags = pl->pl_flags;
 	pl32->pl_sigmask = pl->pl_sigmask;
 	pl32->pl_siglist = pl->pl_siglist;
 	siginfo_to_siginfo32(&pl->pl_siginfo, &pl32->pl_siginfo);
 	strcpy(pl32->pl_tdname, pl->pl_tdname);
 	pl32->pl_child_pid = pl->pl_child_pid;
 	pl32->pl_syscall_code = pl->pl_syscall_code;
 	pl32->pl_syscall_narg = pl->pl_syscall_narg;
 }
 
 static void
 ptrace_sc_ret_to32(const struct ptrace_sc_ret *psr,
     struct ptrace_sc_ret32 *psr32)
 {
 
 	bzero(psr32, sizeof(*psr32));
 	psr32->sr_retval[0] = psr->sr_retval[0];
 	psr32->sr_retval[1] = psr->sr_retval[1];
 	psr32->sr_error = psr->sr_error;
 }
 
 int
 freebsd32_ptrace(struct thread *td, struct freebsd32_ptrace_args *uap)
 {
 	union {
 		struct ptrace_io_desc piod;
 		struct ptrace_lwpinfo pl;
 		struct ptrace_vm_entry pve;
 		struct ptrace_coredump pc;
 		struct dbreg32 dbreg;
 		struct fpreg32 fpreg;
 		struct reg32 reg;
 		struct iovec vec;
 		register_t args[nitems(td->td_sa.args)];
 		struct ptrace_sc_ret psr;
 		int ptevents;
 	} r;
 	union {
 		struct ptrace_io_desc32 piod;
 		struct ptrace_lwpinfo32 pl;
 		struct ptrace_vm_entry32 pve;
 		struct ptrace_coredump32 pc;
 		uint32_t args[nitems(td->td_sa.args)];
 		struct ptrace_sc_ret32 psr;
 		struct iovec32 vec;
 	} r32;
 	void *addr;
 	int data, error, i;
 
 	if (!allow_ptrace)
 		return (ENOSYS);
 	error = 0;
 
 	AUDIT_ARG_PID(uap->pid);
 	AUDIT_ARG_CMD(uap->req);
 	AUDIT_ARG_VALUE(uap->data);
 	addr = &r;
 	data = uap->data;
 	switch (uap->req) {
 	case PT_GET_EVENT_MASK:
 	case PT_GET_SC_ARGS:
 	case PT_GET_SC_RET:
 		break;
 	case PT_LWPINFO:
 		if (uap->data > sizeof(r32.pl))
 			return (EINVAL);
 
 		/*
 		 * Pass size of native structure in 'data'.  Truncate
 		 * if necessary to avoid siginfo.
 		 */
 		data = sizeof(r.pl);
 		if (uap->data < offsetof(struct ptrace_lwpinfo32, pl_siginfo) +
 		    sizeof(struct siginfo32))
 			data = offsetof(struct ptrace_lwpinfo, pl_siginfo);
 		break;
 	case PT_GETREGS:
 		bzero(&r.reg, sizeof(r.reg));
 		break;
 	case PT_GETFPREGS:
 		bzero(&r.fpreg, sizeof(r.fpreg));
 		break;
 	case PT_GETDBREGS:
 		bzero(&r.dbreg, sizeof(r.dbreg));
 		break;
 	case PT_SETREGS:
 		error = copyin(uap->addr, &r.reg, sizeof(r.reg));
 		break;
 	case PT_SETFPREGS:
 		error = copyin(uap->addr, &r.fpreg, sizeof(r.fpreg));
 		break;
 	case PT_SETDBREGS:
 		error = copyin(uap->addr, &r.dbreg, sizeof(r.dbreg));
 		break;
 	case PT_GETREGSET:
 	case PT_SETREGSET:
 		error = copyin(uap->addr, &r32.vec, sizeof(r32.vec));
 		if (error != 0)
 			break;
 
 		r.vec.iov_len = r32.vec.iov_len;
 		r.vec.iov_base = PTRIN(r32.vec.iov_base);
 		break;
 	case PT_SET_EVENT_MASK:
 		if (uap->data != sizeof(r.ptevents))
 			error = EINVAL;
 		else
 			error = copyin(uap->addr, &r.ptevents, uap->data);
 		break;
 	case PT_IO:
 		error = copyin(uap->addr, &r32.piod, sizeof(r32.piod));
 		if (error)
 			break;
 		CP(r32.piod, r.piod, piod_op);
 		PTRIN_CP(r32.piod, r.piod, piod_offs);
 		PTRIN_CP(r32.piod, r.piod, piod_addr);
 		CP(r32.piod, r.piod, piod_len);
 		break;
 	case PT_VM_ENTRY:
 		error = copyin(uap->addr, &r32.pve, sizeof(r32.pve));
 		if (error)
 			break;
 
 		CP(r32.pve, r.pve, pve_entry);
 		CP(r32.pve, r.pve, pve_timestamp);
 		CP(r32.pve, r.pve, pve_start);
 		CP(r32.pve, r.pve, pve_end);
 		CP(r32.pve, r.pve, pve_offset);
 		CP(r32.pve, r.pve, pve_prot);
 		CP(r32.pve, r.pve, pve_pathlen);
 		CP(r32.pve, r.pve, pve_fileid);
 		CP(r32.pve, r.pve, pve_fsid);
 		PTRIN_CP(r32.pve, r.pve, pve_path);
 		break;
 	case PT_COREDUMP:
 		if (uap->data != sizeof(r32.pc))
 			error = EINVAL;
 		else
 			error = copyin(uap->addr, &r32.pc, uap->data);
 		CP(r32.pc, r.pc, pc_fd);
 		CP(r32.pc, r.pc, pc_flags);
 		r.pc.pc_limit = PAIR32TO64(off_t, r32.pc.pc_limit);
 		data = sizeof(r.pc);
 		break;
 	default:
 		addr = uap->addr;
 		break;
 	}
 	if (error)
 		return (error);
 
 	error = kern_ptrace(td, uap->req, uap->pid, addr, data);
 	if (error)
 		return (error);
 
 	switch (uap->req) {
 	case PT_VM_ENTRY:
 		CP(r.pve, r32.pve, pve_entry);
 		CP(r.pve, r32.pve, pve_timestamp);
 		CP(r.pve, r32.pve, pve_start);
 		CP(r.pve, r32.pve, pve_end);
 		CP(r.pve, r32.pve, pve_offset);
 		CP(r.pve, r32.pve, pve_prot);
 		CP(r.pve, r32.pve, pve_pathlen);
 		CP(r.pve, r32.pve, pve_fileid);
 		CP(r.pve, r32.pve, pve_fsid);
 		error = copyout(&r32.pve, uap->addr, sizeof(r32.pve));
 		break;
 	case PT_IO:
 		CP(r.piod, r32.piod, piod_len);
 		error = copyout(&r32.piod, uap->addr, sizeof(r32.piod));
 		break;
 	case PT_GETREGS:
 		error = copyout(&r.reg, uap->addr, sizeof(r.reg));
 		break;
 	case PT_GETFPREGS:
 		error = copyout(&r.fpreg, uap->addr, sizeof(r.fpreg));
 		break;
 	case PT_GETDBREGS:
 		error = copyout(&r.dbreg, uap->addr, sizeof(r.dbreg));
 		break;
 	case PT_GETREGSET:
 		r32.vec.iov_len = r.vec.iov_len;
 		error = copyout(&r32.vec, uap->addr, sizeof(r32.vec));
 		break;
 	case PT_GET_EVENT_MASK:
 		/* NB: The size in uap->data is validated in kern_ptrace(). */
 		error = copyout(&r.ptevents, uap->addr, uap->data);
 		break;
 	case PT_LWPINFO:
 		ptrace_lwpinfo_to32(&r.pl, &r32.pl);
 		error = copyout(&r32.pl, uap->addr, uap->data);
 		break;
 	case PT_GET_SC_ARGS:
 		for (i = 0; i < nitems(r.args); i++)
 			r32.args[i] = (uint32_t)r.args[i];
 		error = copyout(r32.args, uap->addr, MIN(uap->data,
 		    sizeof(r32.args)));
 		break;
 	case PT_GET_SC_RET:
 		ptrace_sc_ret_to32(&r.psr, &r32.psr);
 		error = copyout(&r32.psr, uap->addr, MIN(uap->data,
 		    sizeof(r32.psr)));
 		break;
 	}
 
 	return (error);
 }
 
 int
 freebsd32_copyinuio(struct iovec32 *iovp, u_int iovcnt, struct uio **uiop)
 {
 	struct iovec32 iov32;
 	struct iovec *iov;
 	struct uio *uio;
 	u_int iovlen;
 	int error, i;
 
 	*uiop = NULL;
 	if (iovcnt > UIO_MAXIOV)
 		return (EINVAL);
 	iovlen = iovcnt * sizeof(struct iovec);
 	uio = malloc(iovlen + sizeof *uio, M_IOV, M_WAITOK);
 	iov = (struct iovec *)(uio + 1);
 	for (i = 0; i < iovcnt; i++) {
 		error = copyin(&iovp[i], &iov32, sizeof(struct iovec32));
 		if (error) {
 			free(uio, M_IOV);
 			return (error);
 		}
 		iov[i].iov_base = PTRIN(iov32.iov_base);
 		iov[i].iov_len = iov32.iov_len;
 	}
 	uio->uio_iov = iov;
 	uio->uio_iovcnt = iovcnt;
 	uio->uio_segflg = UIO_USERSPACE;
 	uio->uio_offset = -1;
 	uio->uio_resid = 0;
 	for (i = 0; i < iovcnt; i++) {
 		if (iov->iov_len > INT_MAX - uio->uio_resid) {
 			free(uio, M_IOV);
 			return (EINVAL);
 		}
 		uio->uio_resid += iov->iov_len;
 		iov++;
 	}
 	*uiop = uio;
 	return (0);
 }
 
 int
 freebsd32_readv(struct thread *td, struct freebsd32_readv_args *uap)
 {
 	struct uio *auio;
 	int error;
 
 	error = freebsd32_copyinuio(uap->iovp, uap->iovcnt, &auio);
 	if (error)
 		return (error);
 	error = kern_readv(td, uap->fd, auio);
 	free(auio, M_IOV);
 	return (error);
 }
 
 int
 freebsd32_writev(struct thread *td, struct freebsd32_writev_args *uap)
 {
 	struct uio *auio;
 	int error;
 
 	error = freebsd32_copyinuio(uap->iovp, uap->iovcnt, &auio);
 	if (error)
 		return (error);
 	error = kern_writev(td, uap->fd, auio);
 	free(auio, M_IOV);
 	return (error);
 }
 
 int
 freebsd32_preadv(struct thread *td, struct freebsd32_preadv_args *uap)
 {
 	struct uio *auio;
 	int error;
 
 	error = freebsd32_copyinuio(uap->iovp, uap->iovcnt, &auio);
 	if (error)
 		return (error);
 	error = kern_preadv(td, uap->fd, auio, PAIR32TO64(off_t,uap->offset));
 	free(auio, M_IOV);
 	return (error);
 }
 
 int
 freebsd32_pwritev(struct thread *td, struct freebsd32_pwritev_args *uap)
 {
 	struct uio *auio;
 	int error;
 
 	error = freebsd32_copyinuio(uap->iovp, uap->iovcnt, &auio);
 	if (error)
 		return (error);
 	error = kern_pwritev(td, uap->fd, auio, PAIR32TO64(off_t,uap->offset));
 	free(auio, M_IOV);
 	return (error);
 }
 
 int
 freebsd32_copyiniov(struct iovec32 *iovp32, u_int iovcnt, struct iovec **iovp,
     int error)
 {
 	struct iovec32 iov32;
 	struct iovec *iov;
 	u_int iovlen;
 	int i;
 
 	*iovp = NULL;
 	if (iovcnt > UIO_MAXIOV)
 		return (error);
 	iovlen = iovcnt * sizeof(struct iovec);
 	iov = malloc(iovlen, M_IOV, M_WAITOK);
 	for (i = 0; i < iovcnt; i++) {
 		error = copyin(&iovp32[i], &iov32, sizeof(struct iovec32));
 		if (error) {
 			free(iov, M_IOV);
 			return (error);
 		}
 		iov[i].iov_base = PTRIN(iov32.iov_base);
 		iov[i].iov_len = iov32.iov_len;
 	}
 	*iovp = iov;
 	return (0);
 }
 
 static int
 freebsd32_copyinmsghdr(const struct msghdr32 *msg32, struct msghdr *msg)
 {
 	struct msghdr32 m32;
 	int error;
 
 	error = copyin(msg32, &m32, sizeof(m32));
 	if (error)
 		return (error);
 	msg->msg_name = PTRIN(m32.msg_name);
 	msg->msg_namelen = m32.msg_namelen;
 	msg->msg_iov = PTRIN(m32.msg_iov);
 	msg->msg_iovlen = m32.msg_iovlen;
 	msg->msg_control = PTRIN(m32.msg_control);
 	msg->msg_controllen = m32.msg_controllen;
 	msg->msg_flags = m32.msg_flags;
 	return (0);
 }
 
 static int
 freebsd32_copyoutmsghdr(struct msghdr *msg, struct msghdr32 *msg32)
 {
 	struct msghdr32 m32;
 	int error;
 
 	m32.msg_name = PTROUT(msg->msg_name);
 	m32.msg_namelen = msg->msg_namelen;
 	m32.msg_iov = PTROUT(msg->msg_iov);
 	m32.msg_iovlen = msg->msg_iovlen;
 	m32.msg_control = PTROUT(msg->msg_control);
 	m32.msg_controllen = msg->msg_controllen;
 	m32.msg_flags = msg->msg_flags;
 	error = copyout(&m32, msg32, sizeof(m32));
 	return (error);
 }
 
 #ifndef __mips__
 #define FREEBSD32_ALIGNBYTES	(sizeof(int) - 1)
 #else
 #define FREEBSD32_ALIGNBYTES	(sizeof(long) - 1)
 #endif
 #define FREEBSD32_ALIGN(p)	\
 	(((u_long)(p) + FREEBSD32_ALIGNBYTES) & ~FREEBSD32_ALIGNBYTES)
 #define	FREEBSD32_CMSG_SPACE(l)	\
 	(FREEBSD32_ALIGN(sizeof(struct cmsghdr)) + FREEBSD32_ALIGN(l))
 
 #define	FREEBSD32_CMSG_DATA(cmsg)	((unsigned char *)(cmsg) + \
 				 FREEBSD32_ALIGN(sizeof(struct cmsghdr)))
 
 static size_t
 freebsd32_cmsg_convert(const struct cmsghdr *cm, void *data, socklen_t datalen)
 {
 	size_t copylen;
 	union {
 		struct timespec32 ts;
 		struct timeval32 tv;
 		struct bintime32 bt;
 	} tmp32;
 
 	union {
 		struct timespec ts;
 		struct timeval tv;
 		struct bintime bt;
 	} *in;
 
 	in = data;
 	copylen = 0;
 	switch (cm->cmsg_level) {
 	case SOL_SOCKET:
 		switch (cm->cmsg_type) {
 		case SCM_TIMESTAMP:
 			TV_CP(*in, tmp32, tv);
 			copylen = sizeof(tmp32.tv);
 			break;
 
 		case SCM_BINTIME:
 			BT_CP(*in, tmp32, bt);
 			copylen = sizeof(tmp32.bt);
 			break;
 
 		case SCM_REALTIME:
 		case SCM_MONOTONIC:
 			TS_CP(*in, tmp32, ts);
 			copylen = sizeof(tmp32.ts);
 			break;
 
 		default:
 			break;
 		}
 
 	default:
 		break;
 	}
 
 	if (copylen == 0)
 		return (datalen);
 
 	KASSERT((datalen >= copylen), ("corrupted cmsghdr"));
 
 	bcopy(&tmp32, data, copylen);
 	return (copylen);
 }
 
 static int
 freebsd32_copy_msg_out(struct msghdr *msg, struct mbuf *control)
 {
 	struct cmsghdr *cm;
 	void *data;
 	socklen_t clen, datalen, datalen_out, oldclen;
 	int error;
 	caddr_t ctlbuf;
 	int len, copylen;
 	struct mbuf *m;
 	error = 0;
 
 	len    = msg->msg_controllen;
 	msg->msg_controllen = 0;
 
 	ctlbuf = msg->msg_control;
 	for (m = control; m != NULL && len > 0; m = m->m_next) {
 		cm = mtod(m, struct cmsghdr *);
 		clen = m->m_len;
 		while (cm != NULL) {
 			if (sizeof(struct cmsghdr) > clen ||
 			    cm->cmsg_len > clen) {
 				error = EINVAL;
 				break;
 			}
 
 			data   = CMSG_DATA(cm);
 			datalen = (caddr_t)cm + cm->cmsg_len - (caddr_t)data;
 			datalen_out = freebsd32_cmsg_convert(cm, data, datalen);
 
 			/*
 			 * Copy out the message header.  Preserve the native
 			 * message size in case we need to inspect the message
 			 * contents later.
 			 */
 			copylen = sizeof(struct cmsghdr);
 			if (len < copylen) {
 				msg->msg_flags |= MSG_CTRUNC;
 				m_dispose_extcontrolm(m);
 				goto exit;
 			}
 			oldclen = cm->cmsg_len;
 			cm->cmsg_len = FREEBSD32_ALIGN(sizeof(struct cmsghdr)) +
 			    datalen_out;
 			error = copyout(cm, ctlbuf, copylen);
 			cm->cmsg_len = oldclen;
 			if (error != 0)
 				goto exit;
 
 			ctlbuf += FREEBSD32_ALIGN(copylen);
 			len    -= FREEBSD32_ALIGN(copylen);
 
 			copylen = datalen_out;
 			if (len < copylen) {
 				msg->msg_flags |= MSG_CTRUNC;
 				m_dispose_extcontrolm(m);
 				break;
 			}
 
 			/* Copy out the message data. */
 			error = copyout(data, ctlbuf, copylen);
 			if (error)
 				goto exit;
 
 			ctlbuf += FREEBSD32_ALIGN(copylen);
 			len    -= FREEBSD32_ALIGN(copylen);
 
 			if (CMSG_SPACE(datalen) < clen) {
 				clen -= CMSG_SPACE(datalen);
 				cm = (struct cmsghdr *)
 				    ((caddr_t)cm + CMSG_SPACE(datalen));
 			} else {
 				clen = 0;
 				cm = NULL;
 			}
 
 			msg->msg_controllen +=
 			    FREEBSD32_CMSG_SPACE(datalen_out);
 		}
 	}
 	if (len == 0 && m != NULL) {
 		msg->msg_flags |= MSG_CTRUNC;
 		m_dispose_extcontrolm(m);
 	}
 
 exit:
 	return (error);
 }
 
 int
 freebsd32_recvmsg(struct thread *td, struct freebsd32_recvmsg_args *uap)
 {
 	struct msghdr msg;
 	struct iovec *uiov, *iov;
 	struct mbuf *control = NULL;
 	struct mbuf **controlp;
 	int error;
 
 	error = freebsd32_copyinmsghdr(uap->msg, &msg);
 	if (error)
 		return (error);
 	error = freebsd32_copyiniov((void *)msg.msg_iov, msg.msg_iovlen, &iov,
 	    EMSGSIZE);
 	if (error)
 		return (error);
 	msg.msg_flags = uap->flags;
 	uiov = msg.msg_iov;
 	msg.msg_iov = iov;
 
 	controlp = (msg.msg_control != NULL) ?  &control : NULL;
 	error = kern_recvit(td, uap->s, &msg, UIO_USERSPACE, controlp);
 	if (error == 0) {
 		msg.msg_iov = uiov;
 
 		if (control != NULL)
 			error = freebsd32_copy_msg_out(&msg, control);
 		else
 			msg.msg_controllen = 0;
 
 		if (error == 0)
 			error = freebsd32_copyoutmsghdr(&msg, uap->msg);
 	}
 	free(iov, M_IOV);
 
 	if (control != NULL) {
 		if (error != 0)
 			m_dispose_extcontrolm(control);
 		m_freem(control);
 	}
 
 	return (error);
 }
 
 #ifdef COMPAT_43
 int
 ofreebsd32_recvmsg(struct thread *td, struct ofreebsd32_recvmsg_args *uap)
 {
 	return (ENOSYS);
 }
 #endif
 
 /*
  * Copy-in the array of control messages constructed using alignment
  * and padding suitable for a 32-bit environment and construct an
  * mbuf using alignment and padding suitable for a 64-bit kernel.
  * The alignment and padding are defined indirectly by CMSG_DATA(),
  * CMSG_SPACE() and CMSG_LEN().
  */
 static int
 freebsd32_copyin_control(struct mbuf **mp, caddr_t buf, u_int buflen)
 {
 	struct cmsghdr *cm;
 	struct mbuf *m;
 	void *in, *in1, *md;
 	u_int msglen, outlen;
 	int error;
 
 	if (buflen > MCLBYTES)
 		return (EINVAL);
 
 	in = malloc(buflen, M_TEMP, M_WAITOK);
 	error = copyin(buf, in, buflen);
 	if (error != 0)
 		goto out;
 
 	/*
 	 * Make a pass over the input buffer to determine the amount of space
 	 * required for 64 bit-aligned copies of the control messages.
 	 */
 	in1 = in;
 	outlen = 0;
 	while (buflen > 0) {
 		if (buflen < sizeof(*cm)) {
 			error = EINVAL;
 			break;
 		}
 		cm = (struct cmsghdr *)in1;
 		if (cm->cmsg_len < FREEBSD32_ALIGN(sizeof(*cm))) {
 			error = EINVAL;
 			break;
 		}
 		msglen = FREEBSD32_ALIGN(cm->cmsg_len);
 		if (msglen > buflen || msglen < cm->cmsg_len) {
 			error = EINVAL;
 			break;
 		}
 		buflen -= msglen;
 
 		in1 = (char *)in1 + msglen;
 		outlen += CMSG_ALIGN(sizeof(*cm)) +
 		    CMSG_ALIGN(msglen - FREEBSD32_ALIGN(sizeof(*cm)));
 	}
 	if (error == 0 && outlen > MCLBYTES) {
 		/*
 		 * XXXMJ This implies that the upper limit on 32-bit aligned
 		 * control messages is less than MCLBYTES, and so we are not
 		 * perfectly compatible.  However, there is no platform
 		 * guarantee that mbuf clusters larger than MCLBYTES can be
 		 * allocated.
 		 */
 		error = EINVAL;
 	}
 	if (error != 0)
 		goto out;
 
 	m = m_get2(outlen, M_WAITOK, MT_CONTROL, 0);
 	m->m_len = outlen;
 	md = mtod(m, void *);
 
 	/*
 	 * Make a second pass over input messages, copying them into the output
 	 * buffer.
 	 */
 	in1 = in;
 	while (outlen > 0) {
 		/* Copy the message header and align the length field. */
 		cm = md;
 		memcpy(cm, in1, sizeof(*cm));
 		msglen = cm->cmsg_len - FREEBSD32_ALIGN(sizeof(*cm));
 		cm->cmsg_len = CMSG_ALIGN(sizeof(*cm)) + msglen;
 
 		/* Copy the message body. */
 		in1 = (char *)in1 + FREEBSD32_ALIGN(sizeof(*cm));
 		md = (char *)md + CMSG_ALIGN(sizeof(*cm));
 		memcpy(md, in1, msglen);
 		in1 = (char *)in1 + FREEBSD32_ALIGN(msglen);
 		md = (char *)md + CMSG_ALIGN(msglen);
 		KASSERT(outlen >= CMSG_ALIGN(sizeof(*cm)) + CMSG_ALIGN(msglen),
 		    ("outlen %u underflow, msglen %u", outlen, msglen));
 		outlen -= CMSG_ALIGN(sizeof(*cm)) + CMSG_ALIGN(msglen);
 	}
 
 	*mp = m;
 out:
 	free(in, M_TEMP);
 	return (error);
 }
 
 int
 freebsd32_sendmsg(struct thread *td, struct freebsd32_sendmsg_args *uap)
 {
 	struct msghdr msg;
 	struct iovec *iov;
 	struct mbuf *control = NULL;
 	struct sockaddr *to = NULL;
 	int error;
 
 	error = freebsd32_copyinmsghdr(uap->msg, &msg);
 	if (error)
 		return (error);
 	error = freebsd32_copyiniov((void *)msg.msg_iov, msg.msg_iovlen, &iov,
 	    EMSGSIZE);
 	if (error)
 		return (error);
 	msg.msg_iov = iov;
 	if (msg.msg_name != NULL) {
 		error = getsockaddr(&to, msg.msg_name, msg.msg_namelen);
 		if (error) {
 			to = NULL;
 			goto out;
 		}
 		msg.msg_name = to;
 	}
 
 	if (msg.msg_control) {
 		if (msg.msg_controllen < sizeof(struct cmsghdr)) {
 			error = EINVAL;
 			goto out;
 		}
 
 		error = freebsd32_copyin_control(&control, msg.msg_control,
 		    msg.msg_controllen);
 		if (error)
 			goto out;
 
 		msg.msg_control = NULL;
 		msg.msg_controllen = 0;
 	}
 
 	error = kern_sendit(td, uap->s, &msg, uap->flags, control,
 	    UIO_USERSPACE);
 
 out:
 	free(iov, M_IOV);
 	if (to)
 		free(to, M_SONAME);
 	return (error);
 }
 
 #ifdef COMPAT_43
 int
 ofreebsd32_sendmsg(struct thread *td, struct ofreebsd32_sendmsg_args *uap)
 {
 	return (ENOSYS);
 }
 #endif
 
 
 int
 freebsd32_settimeofday(struct thread *td,
 		       struct freebsd32_settimeofday_args *uap)
 {
 	struct timeval32 tv32;
 	struct timeval tv, *tvp;
 	struct timezone tz, *tzp;
 	int error;
 
 	if (uap->tv) {
 		error = copyin(uap->tv, &tv32, sizeof(tv32));
 		if (error)
 			return (error);
 		CP(tv32, tv, tv_sec);
 		CP(tv32, tv, tv_usec);
 		tvp = &tv;
 	} else
 		tvp = NULL;
 	if (uap->tzp) {
 		error = copyin(uap->tzp, &tz, sizeof(tz));
 		if (error)
 			return (error);
 		tzp = &tz;
 	} else
 		tzp = NULL;
 	return (kern_settimeofday(td, tvp, tzp));
 }
 
 int
 freebsd32_utimes(struct thread *td, struct freebsd32_utimes_args *uap)
 {
 	struct timeval32 s32[2];
 	struct timeval s[2], *sp;
 	int error;
 
 	if (uap->tptr != NULL) {
 		error = copyin(uap->tptr, s32, sizeof(s32));
 		if (error)
 			return (error);
 		CP(s32[0], s[0], tv_sec);
 		CP(s32[0], s[0], tv_usec);
 		CP(s32[1], s[1], tv_sec);
 		CP(s32[1], s[1], tv_usec);
 		sp = s;
 	} else
 		sp = NULL;
 	return (kern_utimesat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
 	    sp, UIO_SYSSPACE));
 }
 
 int
 freebsd32_lutimes(struct thread *td, struct freebsd32_lutimes_args *uap)
 {
 	struct timeval32 s32[2];
 	struct timeval s[2], *sp;
 	int error;
 
 	if (uap->tptr != NULL) {
 		error = copyin(uap->tptr, s32, sizeof(s32));
 		if (error)
 			return (error);
 		CP(s32[0], s[0], tv_sec);
 		CP(s32[0], s[0], tv_usec);
 		CP(s32[1], s[1], tv_sec);
 		CP(s32[1], s[1], tv_usec);
 		sp = s;
 	} else
 		sp = NULL;
 	return (kern_lutimes(td, uap->path, UIO_USERSPACE, sp, UIO_SYSSPACE));
 }
 
 int
 freebsd32_futimes(struct thread *td, struct freebsd32_futimes_args *uap)
 {
 	struct timeval32 s32[2];
 	struct timeval s[2], *sp;
 	int error;
 
 	if (uap->tptr != NULL) {
 		error = copyin(uap->tptr, s32, sizeof(s32));
 		if (error)
 			return (error);
 		CP(s32[0], s[0], tv_sec);
 		CP(s32[0], s[0], tv_usec);
 		CP(s32[1], s[1], tv_sec);
 		CP(s32[1], s[1], tv_usec);
 		sp = s;
 	} else
 		sp = NULL;
 	return (kern_futimes(td, uap->fd, sp, UIO_SYSSPACE));
 }
 
 int
 freebsd32_futimesat(struct thread *td, struct freebsd32_futimesat_args *uap)
 {
 	struct timeval32 s32[2];
 	struct timeval s[2], *sp;
 	int error;
 
 	if (uap->times != NULL) {
 		error = copyin(uap->times, s32, sizeof(s32));
 		if (error)
 			return (error);
 		CP(s32[0], s[0], tv_sec);
 		CP(s32[0], s[0], tv_usec);
 		CP(s32[1], s[1], tv_sec);
 		CP(s32[1], s[1], tv_usec);
 		sp = s;
 	} else
 		sp = NULL;
 	return (kern_utimesat(td, uap->fd, uap->path, UIO_USERSPACE,
 		sp, UIO_SYSSPACE));
 }
 
 int
 freebsd32_futimens(struct thread *td, struct freebsd32_futimens_args *uap)
 {
 	struct timespec32 ts32[2];
 	struct timespec ts[2], *tsp;
 	int error;
 
 	if (uap->times != NULL) {
 		error = copyin(uap->times, ts32, sizeof(ts32));
 		if (error)
 			return (error);
 		CP(ts32[0], ts[0], tv_sec);
 		CP(ts32[0], ts[0], tv_nsec);
 		CP(ts32[1], ts[1], tv_sec);
 		CP(ts32[1], ts[1], tv_nsec);
 		tsp = ts;
 	} else
 		tsp = NULL;
 	return (kern_futimens(td, uap->fd, tsp, UIO_SYSSPACE));
 }
 
 int
 freebsd32_utimensat(struct thread *td, struct freebsd32_utimensat_args *uap)
 {
 	struct timespec32 ts32[2];
 	struct timespec ts[2], *tsp;
 	int error;
 
 	if (uap->times != NULL) {
 		error = copyin(uap->times, ts32, sizeof(ts32));
 		if (error)
 			return (error);
 		CP(ts32[0], ts[0], tv_sec);
 		CP(ts32[0], ts[0], tv_nsec);
 		CP(ts32[1], ts[1], tv_sec);
 		CP(ts32[1], ts[1], tv_nsec);
 		tsp = ts;
 	} else
 		tsp = NULL;
 	return (kern_utimensat(td, uap->fd, uap->path, UIO_USERSPACE,
 	    tsp, UIO_SYSSPACE, uap->flag));
 }
 
 int
 freebsd32_adjtime(struct thread *td, struct freebsd32_adjtime_args *uap)
 {
 	struct timeval32 tv32;
 	struct timeval delta, olddelta, *deltap;
 	int error;
 
 	if (uap->delta) {
 		error = copyin(uap->delta, &tv32, sizeof(tv32));
 		if (error)
 			return (error);
 		CP(tv32, delta, tv_sec);
 		CP(tv32, delta, tv_usec);
 		deltap = &delta;
 	} else
 		deltap = NULL;
 	error = kern_adjtime(td, deltap, &olddelta);
 	if (uap->olddelta && error == 0) {
 		CP(olddelta, tv32, tv_sec);
 		CP(olddelta, tv32, tv_usec);
 		error = copyout(&tv32, uap->olddelta, sizeof(tv32));
 	}
 	return (error);
 }
 
 #ifdef COMPAT_FREEBSD4
 int
 freebsd4_freebsd32_statfs(struct thread *td, struct freebsd4_freebsd32_statfs_args *uap)
 {
 	struct ostatfs32 s32;
 	struct statfs *sp;
 	int error;
 
 	sp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK);
 	error = kern_statfs(td, uap->path, UIO_USERSPACE, sp);
 	if (error == 0) {
 		copy_statfs(sp, &s32);
 		error = copyout(&s32, uap->buf, sizeof(s32));
 	}
 	free(sp, M_STATFS);
 	return (error);
 }
 #endif
 
 #ifdef COMPAT_FREEBSD4
 int
 freebsd4_freebsd32_fstatfs(struct thread *td, struct freebsd4_freebsd32_fstatfs_args *uap)
 {
 	struct ostatfs32 s32;
 	struct statfs *sp;
 	int error;
 
 	sp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK);
 	error = kern_fstatfs(td, uap->fd, sp);
 	if (error == 0) {
 		copy_statfs(sp, &s32);
 		error = copyout(&s32, uap->buf, sizeof(s32));
 	}
 	free(sp, M_STATFS);
 	return (error);
 }
 #endif
 
 #ifdef COMPAT_FREEBSD4
 int
 freebsd4_freebsd32_fhstatfs(struct thread *td, struct freebsd4_freebsd32_fhstatfs_args *uap)
 {
 	struct ostatfs32 s32;
 	struct statfs *sp;
 	fhandle_t fh;
 	int error;
 
 	if ((error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t))) != 0)
 		return (error);
 	sp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK);
 	error = kern_fhstatfs(td, fh, sp);
 	if (error == 0) {
 		copy_statfs(sp, &s32);
 		error = copyout(&s32, uap->buf, sizeof(s32));
 	}
 	free(sp, M_STATFS);
 	return (error);
 }
 #endif
 
 int
 freebsd32_pread(struct thread *td, struct freebsd32_pread_args *uap)
 {
 
 	return (kern_pread(td, uap->fd, uap->buf, uap->nbyte,
 	    PAIR32TO64(off_t, uap->offset)));
 }
 
 int
 freebsd32_pwrite(struct thread *td, struct freebsd32_pwrite_args *uap)
 {
 
 	return (kern_pwrite(td, uap->fd, uap->buf, uap->nbyte,
 	    PAIR32TO64(off_t, uap->offset)));
 }
 
 #ifdef COMPAT_43
 int
 ofreebsd32_lseek(struct thread *td, struct ofreebsd32_lseek_args *uap)
 {
 
 	return (kern_lseek(td, uap->fd, uap->offset, uap->whence));
 }
 #endif
 
 int
 freebsd32_lseek(struct thread *td, struct freebsd32_lseek_args *uap)
 {
 	int error;
 	off_t pos;
 
 	error = kern_lseek(td, uap->fd, PAIR32TO64(off_t, uap->offset),
 	    uap->whence);
 	/* Expand the quad return into two parts for eax and edx */
 	pos = td->td_uretoff.tdu_off;
 	td->td_retval[RETVAL_LO] = pos & 0xffffffff;	/* %eax */
 	td->td_retval[RETVAL_HI] = pos >> 32;		/* %edx */
 	return error;
 }
 
 int
 freebsd32_truncate(struct thread *td, struct freebsd32_truncate_args *uap)
 {
 
 	return (kern_truncate(td, uap->path, UIO_USERSPACE,
 	    PAIR32TO64(off_t, uap->length)));
 }
 
 #ifdef COMPAT_43
 int
 ofreebsd32_truncate(struct thread *td, struct ofreebsd32_truncate_args *uap)
 {
 	return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length));
 }
 #endif
 
 int
 freebsd32_ftruncate(struct thread *td, struct freebsd32_ftruncate_args *uap)
 {
 
 	return (kern_ftruncate(td, uap->fd, PAIR32TO64(off_t, uap->length)));
 }
 
 #ifdef COMPAT_43
 int
 ofreebsd32_ftruncate(struct thread *td, struct ofreebsd32_ftruncate_args *uap)
 {
 	return (kern_ftruncate(td, uap->fd, uap->length));
 }
 
 int
 ofreebsd32_getdirentries(struct thread *td,
     struct ofreebsd32_getdirentries_args *uap)
 {
 	struct ogetdirentries_args ap;
 	int error;
 	long loff;
 	int32_t loff_cut;
 
 	ap.fd = uap->fd;
 	ap.buf = uap->buf;
 	ap.count = uap->count;
 	ap.basep = NULL;
 	error = kern_ogetdirentries(td, &ap, &loff);
 	if (error == 0) {
 		loff_cut = loff;
 		error = copyout(&loff_cut, uap->basep, sizeof(int32_t));
 	}
 	return (error);
 }
 #endif
 
 #if defined(COMPAT_FREEBSD11)
 int
 freebsd11_freebsd32_getdirentries(struct thread *td,
     struct freebsd11_freebsd32_getdirentries_args *uap)
 {
 	long base;
 	int32_t base32;
 	int error;
 
 	error = freebsd11_kern_getdirentries(td, uap->fd, uap->buf, uap->count,
 	    &base, NULL);
 	if (error)
 		return (error);
 	if (uap->basep != NULL) {
 		base32 = base;
 		error = copyout(&base32, uap->basep, sizeof(int32_t));
 	}
 	return (error);
 }
 #endif /* COMPAT_FREEBSD11 */
 
 #ifdef COMPAT_FREEBSD6
 /* versions with the 'int pad' argument */
 int
 freebsd6_freebsd32_pread(struct thread *td, struct freebsd6_freebsd32_pread_args *uap)
 {
 
 	return (kern_pread(td, uap->fd, uap->buf, uap->nbyte,
 	    PAIR32TO64(off_t, uap->offset)));
 }
 
 int
 freebsd6_freebsd32_pwrite(struct thread *td, struct freebsd6_freebsd32_pwrite_args *uap)
 {
 
 	return (kern_pwrite(td, uap->fd, uap->buf, uap->nbyte,
 	    PAIR32TO64(off_t, uap->offset)));
 }
 
 int
 freebsd6_freebsd32_lseek(struct thread *td, struct freebsd6_freebsd32_lseek_args *uap)
 {
 	int error;
 	off_t pos;
 
 	error = kern_lseek(td, uap->fd, PAIR32TO64(off_t, uap->offset),
 	    uap->whence);
 	/* Expand the quad return into two parts for eax and edx */
 	pos = *(off_t *)(td->td_retval);
 	td->td_retval[RETVAL_LO] = pos & 0xffffffff;	/* %eax */
 	td->td_retval[RETVAL_HI] = pos >> 32;		/* %edx */
 	return error;
 }
 
 int
 freebsd6_freebsd32_truncate(struct thread *td, struct freebsd6_freebsd32_truncate_args *uap)
 {
 
 	return (kern_truncate(td, uap->path, UIO_USERSPACE,
 	    PAIR32TO64(off_t, uap->length)));
 }
 
 int
 freebsd6_freebsd32_ftruncate(struct thread *td, struct freebsd6_freebsd32_ftruncate_args *uap)
 {
 
 	return (kern_ftruncate(td, uap->fd, PAIR32TO64(off_t, uap->length)));
 }
 #endif /* COMPAT_FREEBSD6 */
 
 struct sf_hdtr32 {
 	uint32_t headers;
 	int hdr_cnt;
 	uint32_t trailers;
 	int trl_cnt;
 };
 
 static int
 freebsd32_do_sendfile(struct thread *td,
     struct freebsd32_sendfile_args *uap, int compat)
 {
 	struct sf_hdtr32 hdtr32;
 	struct sf_hdtr hdtr;
 	struct uio *hdr_uio, *trl_uio;
 	struct file *fp;
 	cap_rights_t rights;
 	struct iovec32 *iov32;
 	off_t offset, sbytes;
 	int error;
 
 	offset = PAIR32TO64(off_t, uap->offset);
 	if (offset < 0)
 		return (EINVAL);
 
 	hdr_uio = trl_uio = NULL;
 
 	if (uap->hdtr != NULL) {
 		error = copyin(uap->hdtr, &hdtr32, sizeof(hdtr32));
 		if (error)
 			goto out;
 		PTRIN_CP(hdtr32, hdtr, headers);
 		CP(hdtr32, hdtr, hdr_cnt);
 		PTRIN_CP(hdtr32, hdtr, trailers);
 		CP(hdtr32, hdtr, trl_cnt);
 
 		if (hdtr.headers != NULL) {
 			iov32 = PTRIN(hdtr32.headers);
 			error = freebsd32_copyinuio(iov32,
 			    hdtr32.hdr_cnt, &hdr_uio);
 			if (error)
 				goto out;
 #ifdef COMPAT_FREEBSD4
 			/*
 			 * In FreeBSD < 5.0 the nbytes to send also included
 			 * the header.  If compat is specified subtract the
 			 * header size from nbytes.
 			 */
 			if (compat) {
 				if (uap->nbytes > hdr_uio->uio_resid)
 					uap->nbytes -= hdr_uio->uio_resid;
 				else
 					uap->nbytes = 0;
 			}
 #endif
 		}
 		if (hdtr.trailers != NULL) {
 			iov32 = PTRIN(hdtr32.trailers);
 			error = freebsd32_copyinuio(iov32,
 			    hdtr32.trl_cnt, &trl_uio);
 			if (error)
 				goto out;
 		}
 	}
 
 	AUDIT_ARG_FD(uap->fd);
 
 	if ((error = fget_read(td, uap->fd,
 	    cap_rights_init_one(&rights, CAP_PREAD), &fp)) != 0)
 		goto out;
 
 	error = fo_sendfile(fp, uap->s, hdr_uio, trl_uio, offset,
 	    uap->nbytes, &sbytes, uap->flags, td);
 	fdrop(fp, td);
 
 	if (uap->sbytes != NULL)
 		copyout(&sbytes, uap->sbytes, sizeof(off_t));
 
 out:
 	if (hdr_uio)
 		free(hdr_uio, M_IOV);
 	if (trl_uio)
 		free(trl_uio, M_IOV);
 	return (error);
 }
 
 #ifdef COMPAT_FREEBSD4
 int
 freebsd4_freebsd32_sendfile(struct thread *td,
     struct freebsd4_freebsd32_sendfile_args *uap)
 {
 	return (freebsd32_do_sendfile(td,
 	    (struct freebsd32_sendfile_args *)uap, 1));
 }
 #endif
 
 int
 freebsd32_sendfile(struct thread *td, struct freebsd32_sendfile_args *uap)
 {
 
 	return (freebsd32_do_sendfile(td, uap, 0));
 }
 
 static void
 copy_stat(struct stat *in, struct stat32 *out)
 {
 
 #ifndef __amd64__
 	/*
 	 * 32-bit architectures other than i386 have 64-bit time_t.  This
 	 * results in struct timespec32 with 12 bytes for tv_sec and tv_nsec,
 	 * and 4 bytes of padding.  Zero the padding holes in struct stat32.
 	 */
 	bzero(&out->st_atim, sizeof(out->st_atim));
 	bzero(&out->st_mtim, sizeof(out->st_mtim));
 	bzero(&out->st_ctim, sizeof(out->st_ctim));
 	bzero(&out->st_birthtim, sizeof(out->st_birthtim));
 #endif
 	CP(*in, *out, st_dev);
 	CP(*in, *out, st_ino);
 	CP(*in, *out, st_mode);
 	CP(*in, *out, st_nlink);
 	CP(*in, *out, st_uid);
 	CP(*in, *out, st_gid);
 	CP(*in, *out, st_rdev);
 	TS_CP(*in, *out, st_atim);
 	TS_CP(*in, *out, st_mtim);
 	TS_CP(*in, *out, st_ctim);
 	CP(*in, *out, st_size);
 	CP(*in, *out, st_blocks);
 	CP(*in, *out, st_blksize);
 	CP(*in, *out, st_flags);
 	CP(*in, *out, st_gen);
 	TS_CP(*in, *out, st_birthtim);
 	out->st_padding0 = 0;
 	out->st_padding1 = 0;
 #ifdef __STAT32_TIME_T_EXT
 	out->st_atim_ext = 0;
 	out->st_mtim_ext = 0;
 	out->st_ctim_ext = 0;
 	out->st_btim_ext = 0;
 #endif
 	bzero(out->st_spare, sizeof(out->st_spare));
 }
 
 #ifdef COMPAT_43
 static void
 copy_ostat(struct stat *in, struct ostat32 *out)
 {
 
 	bzero(out, sizeof(*out));
 	CP(*in, *out, st_dev);
 	CP(*in, *out, st_ino);
 	CP(*in, *out, st_mode);
 	CP(*in, *out, st_nlink);
 	CP(*in, *out, st_uid);
 	CP(*in, *out, st_gid);
 	CP(*in, *out, st_rdev);
 	out->st_size = MIN(in->st_size, INT32_MAX);
 	TS_CP(*in, *out, st_atim);
 	TS_CP(*in, *out, st_mtim);
 	TS_CP(*in, *out, st_ctim);
 	CP(*in, *out, st_blksize);
 	CP(*in, *out, st_blocks);
 	CP(*in, *out, st_flags);
 	CP(*in, *out, st_gen);
 }
 #endif
 
 #ifdef COMPAT_43
 int
 ofreebsd32_stat(struct thread *td, struct ofreebsd32_stat_args *uap)
 {
 	struct stat sb;
 	struct ostat32 sb32;
 	int error;
 
 	error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE,
 	    &sb, NULL);
 	if (error)
 		return (error);
 	copy_ostat(&sb, &sb32);
 	error = copyout(&sb32, uap->ub, sizeof (sb32));
 	return (error);
 }
 #endif
 
 int
 freebsd32_fstat(struct thread *td, struct freebsd32_fstat_args *uap)
 {
 	struct stat ub;
 	struct stat32 ub32;
 	int error;
 
 	error = kern_fstat(td, uap->fd, &ub);
 	if (error)
 		return (error);
 	copy_stat(&ub, &ub32);
 	error = copyout(&ub32, uap->sb, sizeof(ub32));
 	return (error);
 }
 
 #ifdef COMPAT_43
 int
 ofreebsd32_fstat(struct thread *td, struct ofreebsd32_fstat_args *uap)
 {
 	struct stat ub;
 	struct ostat32 ub32;
 	int error;
 
 	error = kern_fstat(td, uap->fd, &ub);
 	if (error)
 		return (error);
 	copy_ostat(&ub, &ub32);
 	error = copyout(&ub32, uap->sb, sizeof(ub32));
 	return (error);
 }
 #endif
 
 int
 freebsd32_fstatat(struct thread *td, struct freebsd32_fstatat_args *uap)
 {
 	struct stat ub;
 	struct stat32 ub32;
 	int error;
 
 	error = kern_statat(td, uap->flag, uap->fd, uap->path, UIO_USERSPACE,
 	    &ub, NULL);
 	if (error)
 		return (error);
 	copy_stat(&ub, &ub32);
 	error = copyout(&ub32, uap->buf, sizeof(ub32));
 	return (error);
 }
 
 #ifdef COMPAT_43
 int
 ofreebsd32_lstat(struct thread *td, struct ofreebsd32_lstat_args *uap)
 {
 	struct stat sb;
 	struct ostat32 sb32;
 	int error;
 
 	error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path,
 	    UIO_USERSPACE, &sb, NULL);
 	if (error)
 		return (error);
 	copy_ostat(&sb, &sb32);
 	error = copyout(&sb32, uap->ub, sizeof (sb32));
 	return (error);
 }
 #endif
 
 int
 freebsd32_fhstat(struct thread *td, struct freebsd32_fhstat_args *uap)
 {
 	struct stat sb;
 	struct stat32 sb32;
 	struct fhandle fh;
 	int error;
 
 	error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
         if (error != 0)
                 return (error);
 	error = kern_fhstat(td, fh, &sb);
 	if (error != 0)
 		return (error);
 	copy_stat(&sb, &sb32);
 	error = copyout(&sb32, uap->sb, sizeof (sb32));
 	return (error);
 }
 
 #if defined(COMPAT_FREEBSD11)
 extern int ino64_trunc_error;
 
 static int
 freebsd11_cvtstat32(struct stat *in, struct freebsd11_stat32 *out)
 {
 
 #ifndef __amd64__
 	/*
 	 * 32-bit architectures other than i386 have 64-bit time_t.  This
 	 * results in struct timespec32 with 12 bytes for tv_sec and tv_nsec,
 	 * and 4 bytes of padding.  Zero the padding holes in freebsd11_stat32.
 	 */
 	bzero(&out->st_atim, sizeof(out->st_atim));
 	bzero(&out->st_mtim, sizeof(out->st_mtim));
 	bzero(&out->st_ctim, sizeof(out->st_ctim));
 	bzero(&out->st_birthtim, sizeof(out->st_birthtim));
 #endif
 
 	CP(*in, *out, st_ino);
 	if (in->st_ino != out->st_ino) {
 		switch (ino64_trunc_error) {
 		default:
 		case 0:
 			break;
 		case 1:
 			return (EOVERFLOW);
 		case 2:
 			out->st_ino = UINT32_MAX;
 			break;
 		}
 	}
 	CP(*in, *out, st_nlink);
 	if (in->st_nlink != out->st_nlink) {
 		switch (ino64_trunc_error) {
 		default:
 		case 0:
 			break;
 		case 1:
 			return (EOVERFLOW);
 		case 2:
 			out->st_nlink = UINT16_MAX;
 			break;
 		}
 	}
 	out->st_dev = in->st_dev;
 	if (out->st_dev != in->st_dev) {
 		switch (ino64_trunc_error) {
 		default:
 			break;
 		case 1:
 			return (EOVERFLOW);
 		}
 	}
 	CP(*in, *out, st_mode);
 	CP(*in, *out, st_uid);
 	CP(*in, *out, st_gid);
 	out->st_rdev = in->st_rdev;
 	if (out->st_rdev != in->st_rdev) {
 		switch (ino64_trunc_error) {
 		default:
 			break;
 		case 1:
 			return (EOVERFLOW);
 		}
 	}
 	TS_CP(*in, *out, st_atim);
 	TS_CP(*in, *out, st_mtim);
 	TS_CP(*in, *out, st_ctim);
 	CP(*in, *out, st_size);
 	CP(*in, *out, st_blocks);
 	CP(*in, *out, st_blksize);
 	CP(*in, *out, st_flags);
 	CP(*in, *out, st_gen);
 	TS_CP(*in, *out, st_birthtim);
 	out->st_lspare = 0;
 	bzero((char *)&out->st_birthtim + sizeof(out->st_birthtim),
 	    sizeof(*out) - offsetof(struct freebsd11_stat32,
 	    st_birthtim) - sizeof(out->st_birthtim));
 	return (0);
 }
 
 int
 freebsd11_freebsd32_stat(struct thread *td,
     struct freebsd11_freebsd32_stat_args *uap)
 {
 	struct stat sb;
 	struct freebsd11_stat32 sb32;
 	int error;
 
 	error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE,
 	    &sb, NULL);
 	if (error != 0)
 		return (error);
 	error = freebsd11_cvtstat32(&sb, &sb32);
 	if (error == 0)
 		error = copyout(&sb32, uap->ub, sizeof (sb32));
 	return (error);
 }
 
 int
 freebsd11_freebsd32_fstat(struct thread *td,
     struct freebsd11_freebsd32_fstat_args *uap)
 {
 	struct stat sb;
 	struct freebsd11_stat32 sb32;
 	int error;
 
 	error = kern_fstat(td, uap->fd, &sb);
 	if (error != 0)
 		return (error);
 	error = freebsd11_cvtstat32(&sb, &sb32);
 	if (error == 0)
 		error = copyout(&sb32, uap->sb, sizeof (sb32));
 	return (error);
 }
 
 int
 freebsd11_freebsd32_fstatat(struct thread *td,
     struct freebsd11_freebsd32_fstatat_args *uap)
 {
 	struct stat sb;
 	struct freebsd11_stat32 sb32;
 	int error;
 
 	error = kern_statat(td, uap->flag, uap->fd, uap->path, UIO_USERSPACE,
 	    &sb, NULL);
 	if (error != 0)
 		return (error);
 	error = freebsd11_cvtstat32(&sb, &sb32);
 	if (error == 0)
 		error = copyout(&sb32, uap->buf, sizeof (sb32));
 	return (error);
 }
 
 int
 freebsd11_freebsd32_lstat(struct thread *td,
     struct freebsd11_freebsd32_lstat_args *uap)
 {
 	struct stat sb;
 	struct freebsd11_stat32 sb32;
 	int error;
 
 	error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path,
 	    UIO_USERSPACE, &sb, NULL);
 	if (error != 0)
 		return (error);
 	error = freebsd11_cvtstat32(&sb, &sb32);
 	if (error == 0)
 		error = copyout(&sb32, uap->ub, sizeof (sb32));
 	return (error);
 }
 
 int
 freebsd11_freebsd32_fhstat(struct thread *td,
     struct freebsd11_freebsd32_fhstat_args *uap)
 {
 	struct stat sb;
 	struct freebsd11_stat32 sb32;
 	struct fhandle fh;
 	int error;
 
 	error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
         if (error != 0)
                 return (error);
 	error = kern_fhstat(td, fh, &sb);
 	if (error != 0)
 		return (error);
 	error = freebsd11_cvtstat32(&sb, &sb32);
 	if (error == 0)
 		error = copyout(&sb32, uap->sb, sizeof (sb32));
 	return (error);
 }
 
 static int
 freebsd11_cvtnstat32(struct stat *sb, struct nstat32 *nsb32)
 {
 	struct nstat nsb;
 	int error;
 
 	error = freebsd11_cvtnstat(sb, &nsb);
 	if (error != 0)
 		return (error);
 
 	bzero(nsb32, sizeof(*nsb32));
 	CP(nsb, *nsb32, st_dev);
 	CP(nsb, *nsb32, st_ino);
 	CP(nsb, *nsb32, st_mode);
 	CP(nsb, *nsb32, st_nlink);
 	CP(nsb, *nsb32, st_uid);
 	CP(nsb, *nsb32, st_gid);
 	CP(nsb, *nsb32, st_rdev);
 	CP(nsb, *nsb32, st_atim.tv_sec);
 	CP(nsb, *nsb32, st_atim.tv_nsec);
 	CP(nsb, *nsb32, st_mtim.tv_sec);
 	CP(nsb, *nsb32, st_mtim.tv_nsec);
 	CP(nsb, *nsb32, st_ctim.tv_sec);
 	CP(nsb, *nsb32, st_ctim.tv_nsec);
 	CP(nsb, *nsb32, st_size);
 	CP(nsb, *nsb32, st_blocks);
 	CP(nsb, *nsb32, st_blksize);
 	CP(nsb, *nsb32, st_flags);
 	CP(nsb, *nsb32, st_gen);
 	CP(nsb, *nsb32, st_birthtim.tv_sec);
 	CP(nsb, *nsb32, st_birthtim.tv_nsec);
 	return (0);
 }
 
 int
 freebsd11_freebsd32_nstat(struct thread *td,
     struct freebsd11_freebsd32_nstat_args *uap)
 {
 	struct stat sb;
 	struct nstat32 nsb;
 	int error;
 
 	error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE,
 	    &sb, NULL);
 	if (error != 0)
 		return (error);
 	error = freebsd11_cvtnstat32(&sb, &nsb);
 	if (error != 0)
 		error = copyout(&nsb, uap->ub, sizeof (nsb));
 	return (error);
 }
 
 int
 freebsd11_freebsd32_nlstat(struct thread *td,
     struct freebsd11_freebsd32_nlstat_args *uap)
 {
 	struct stat sb;
 	struct nstat32 nsb;
 	int error;
 
 	error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path,
 	    UIO_USERSPACE, &sb, NULL);
 	if (error != 0)
 		return (error);
 	error = freebsd11_cvtnstat32(&sb, &nsb);
 	if (error == 0)
 		error = copyout(&nsb, uap->ub, sizeof (nsb));
 	return (error);
 }
 
 int
 freebsd11_freebsd32_nfstat(struct thread *td,
     struct freebsd11_freebsd32_nfstat_args *uap)
 {
 	struct nstat32 nub;
 	struct stat ub;
 	int error;
 
 	error = kern_fstat(td, uap->fd, &ub);
 	if (error != 0)
 		return (error);
 	error = freebsd11_cvtnstat32(&ub, &nub);
 	if (error == 0)
 		error = copyout(&nub, uap->sb, sizeof(nub));
 	return (error);
 }
 #endif
 
 int
 freebsd32___sysctl(struct thread *td, struct freebsd32___sysctl_args *uap)
 {
 	int error, name[CTL_MAXNAME];
 	size_t j, oldlen;
 	uint32_t tmp;
 
 	if (uap->namelen > CTL_MAXNAME || uap->namelen < 2)
 		return (EINVAL);
  	error = copyin(uap->name, name, uap->namelen * sizeof(int));
  	if (error)
 		return (error);
 	if (uap->oldlenp) {
 		error = fueword32(uap->oldlenp, &tmp);
 		oldlen = tmp;
 	} else {
 		oldlen = 0;
 	}
 	if (error != 0)
 		return (EFAULT);
 	error = userland_sysctl(td, name, uap->namelen,
 		uap->old, &oldlen, 1,
 		uap->new, uap->newlen, &j, SCTL_MASK32);
 	if (error)
 		return (error);
 	if (uap->oldlenp)
 		suword32(uap->oldlenp, j);
 	return (0);
 }
 
 int
 freebsd32___sysctlbyname(struct thread *td,
     struct freebsd32___sysctlbyname_args *uap)
 {
 	size_t oldlen, rv;
 	int error;
 	uint32_t tmp;
 
 	if (uap->oldlenp != NULL) {
 		error = fueword32(uap->oldlenp, &tmp);
 		oldlen = tmp;
 	} else {
 		error = oldlen = 0;
 	}
 	if (error != 0)
 		return (EFAULT);
 	error = kern___sysctlbyname(td, uap->name, uap->namelen, uap->old,
 	    &oldlen, uap->new, uap->newlen, &rv, SCTL_MASK32, 1);
 	if (error != 0)
 		return (error);
 	if (uap->oldlenp != NULL)
 		error = suword32(uap->oldlenp, rv);
 
 	return (error);
 }
 
 int
 freebsd32_jail(struct thread *td, struct freebsd32_jail_args *uap)
 {
 	uint32_t version;
 	int error;
 	struct jail j;
 
 	error = copyin(uap->jail, &version, sizeof(uint32_t));
 	if (error)
 		return (error);
 
 	switch (version) {
 	case 0:
 	{
 		/* FreeBSD single IPv4 jails. */
 		struct jail32_v0 j32_v0;
 
 		bzero(&j, sizeof(struct jail));
 		error = copyin(uap->jail, &j32_v0, sizeof(struct jail32_v0));
 		if (error)
 			return (error);
 		CP(j32_v0, j, version);
 		PTRIN_CP(j32_v0, j, path);
 		PTRIN_CP(j32_v0, j, hostname);
 		j.ip4s = htonl(j32_v0.ip_number);	/* jail_v0 is host order */
 		break;
 	}
 
 	case 1:
 		/*
 		 * Version 1 was used by multi-IPv4 jail implementations
 		 * that never made it into the official kernel.
 		 */
 		return (EINVAL);
 
 	case 2:	/* JAIL_API_VERSION */
 	{
 		/* FreeBSD multi-IPv4/IPv6,noIP jails. */
 		struct jail32 j32;
 
 		error = copyin(uap->jail, &j32, sizeof(struct jail32));
 		if (error)
 			return (error);
 		CP(j32, j, version);
 		PTRIN_CP(j32, j, path);
 		PTRIN_CP(j32, j, hostname);
 		PTRIN_CP(j32, j, jailname);
 		CP(j32, j, ip4s);
 		CP(j32, j, ip6s);
 		PTRIN_CP(j32, j, ip4);
 		PTRIN_CP(j32, j, ip6);
 		break;
 	}
 
 	default:
 		/* Sci-Fi jails are not supported, sorry. */
 		return (EINVAL);
 	}
 	return (kern_jail(td, &j));
 }
 
 int
 freebsd32_jail_set(struct thread *td, struct freebsd32_jail_set_args *uap)
 {
 	struct uio *auio;
 	int error;
 
 	/* Check that we have an even number of iovecs. */
 	if (uap->iovcnt & 1)
 		return (EINVAL);
 
 	error = freebsd32_copyinuio(uap->iovp, uap->iovcnt, &auio);
 	if (error)
 		return (error);
 	error = kern_jail_set(td, auio, uap->flags);
 	free(auio, M_IOV);
 	return (error);
 }
 
 int
 freebsd32_jail_get(struct thread *td, struct freebsd32_jail_get_args *uap)
 {
 	struct iovec32 iov32;
 	struct uio *auio;
 	int error, i;
 
 	/* Check that we have an even number of iovecs. */
 	if (uap->iovcnt & 1)
 		return (EINVAL);
 
 	error = freebsd32_copyinuio(uap->iovp, uap->iovcnt, &auio);
 	if (error)
 		return (error);
 	error = kern_jail_get(td, auio, uap->flags);
 	if (error == 0)
 		for (i = 0; i < uap->iovcnt; i++) {
 			PTROUT_CP(auio->uio_iov[i], iov32, iov_base);
 			CP(auio->uio_iov[i], iov32, iov_len);
 			error = copyout(&iov32, uap->iovp + i, sizeof(iov32));
 			if (error != 0)
 				break;
 		}
 	free(auio, M_IOV);
 	return (error);
 }
 
 int
 freebsd32_sigaction(struct thread *td, struct freebsd32_sigaction_args *uap)
 {
 	struct sigaction32 s32;
 	struct sigaction sa, osa, *sap;
 	int error;
 
 	if (uap->act) {
 		error = copyin(uap->act, &s32, sizeof(s32));
 		if (error)
 			return (error);
 		sa.sa_handler = PTRIN(s32.sa_u);
 		CP(s32, sa, sa_flags);
 		CP(s32, sa, sa_mask);
 		sap = &sa;
 	} else
 		sap = NULL;
 	error = kern_sigaction(td, uap->sig, sap, &osa, 0);
 	if (error == 0 && uap->oact != NULL) {
 		s32.sa_u = PTROUT(osa.sa_handler);
 		CP(osa, s32, sa_flags);
 		CP(osa, s32, sa_mask);
 		error = copyout(&s32, uap->oact, sizeof(s32));
 	}
 	return (error);
 }
 
 #ifdef COMPAT_FREEBSD4
 int
 freebsd4_freebsd32_sigaction(struct thread *td,
 			     struct freebsd4_freebsd32_sigaction_args *uap)
 {
 	struct sigaction32 s32;
 	struct sigaction sa, osa, *sap;
 	int error;
 
 	if (uap->act) {
 		error = copyin(uap->act, &s32, sizeof(s32));
 		if (error)
 			return (error);
 		sa.sa_handler = PTRIN(s32.sa_u);
 		CP(s32, sa, sa_flags);
 		CP(s32, sa, sa_mask);
 		sap = &sa;
 	} else
 		sap = NULL;
 	error = kern_sigaction(td, uap->sig, sap, &osa, KSA_FREEBSD4);
 	if (error == 0 && uap->oact != NULL) {
 		s32.sa_u = PTROUT(osa.sa_handler);
 		CP(osa, s32, sa_flags);
 		CP(osa, s32, sa_mask);
 		error = copyout(&s32, uap->oact, sizeof(s32));
 	}
 	return (error);
 }
 #endif
 
 #ifdef COMPAT_43
 struct osigaction32 {
 	uint32_t	sa_u;
 	osigset_t	sa_mask;
 	int		sa_flags;
 };
 
 #define	ONSIG	32
 
 int
 ofreebsd32_sigaction(struct thread *td,
 			     struct ofreebsd32_sigaction_args *uap)
 {
 	struct osigaction32 s32;
 	struct sigaction sa, osa, *sap;
 	int error;
 
 	if (uap->signum <= 0 || uap->signum >= ONSIG)
 		return (EINVAL);
 
 	if (uap->nsa) {
 		error = copyin(uap->nsa, &s32, sizeof(s32));
 		if (error)
 			return (error);
 		sa.sa_handler = PTRIN(s32.sa_u);
 		CP(s32, sa, sa_flags);
 		OSIG2SIG(s32.sa_mask, sa.sa_mask);
 		sap = &sa;
 	} else
 		sap = NULL;
 	error = kern_sigaction(td, uap->signum, sap, &osa, KSA_OSIGSET);
 	if (error == 0 && uap->osa != NULL) {
 		s32.sa_u = PTROUT(osa.sa_handler);
 		CP(osa, s32, sa_flags);
 		SIG2OSIG(osa.sa_mask, s32.sa_mask);
 		error = copyout(&s32, uap->osa, sizeof(s32));
 	}
 	return (error);
 }
 
 struct sigvec32 {
 	uint32_t	sv_handler;
 	int		sv_mask;
 	int		sv_flags;
 };
 
 int
 ofreebsd32_sigvec(struct thread *td,
 			  struct ofreebsd32_sigvec_args *uap)
 {
 	struct sigvec32 vec;
 	struct sigaction sa, osa, *sap;
 	int error;
 
 	if (uap->signum <= 0 || uap->signum >= ONSIG)
 		return (EINVAL);
 
 	if (uap->nsv) {
 		error = copyin(uap->nsv, &vec, sizeof(vec));
 		if (error)
 			return (error);
 		sa.sa_handler = PTRIN(vec.sv_handler);
 		OSIG2SIG(vec.sv_mask, sa.sa_mask);
 		sa.sa_flags = vec.sv_flags;
 		sa.sa_flags ^= SA_RESTART;
 		sap = &sa;
 	} else
 		sap = NULL;
 	error = kern_sigaction(td, uap->signum, sap, &osa, KSA_OSIGSET);
 	if (error == 0 && uap->osv != NULL) {
 		vec.sv_handler = PTROUT(osa.sa_handler);
 		SIG2OSIG(osa.sa_mask, vec.sv_mask);
 		vec.sv_flags = osa.sa_flags;
 		vec.sv_flags &= ~SA_NOCLDWAIT;
 		vec.sv_flags ^= SA_RESTART;
 		error = copyout(&vec, uap->osv, sizeof(vec));
 	}
 	return (error);
 }
 
 struct sigstack32 {
 	uint32_t	ss_sp;
 	int		ss_onstack;
 };
 
 int
 ofreebsd32_sigstack(struct thread *td,
 			    struct ofreebsd32_sigstack_args *uap)
 {
 	struct sigstack32 s32;
 	struct sigstack nss, oss;
 	int error = 0, unss;
 
 	if (uap->nss != NULL) {
 		error = copyin(uap->nss, &s32, sizeof(s32));
 		if (error)
 			return (error);
 		nss.ss_sp = PTRIN(s32.ss_sp);
 		CP(s32, nss, ss_onstack);
 		unss = 1;
 	} else {
 		unss = 0;
 	}
 	oss.ss_sp = td->td_sigstk.ss_sp;
 	oss.ss_onstack = sigonstack(cpu_getstack(td));
 	if (unss) {
 		td->td_sigstk.ss_sp = nss.ss_sp;
 		td->td_sigstk.ss_size = 0;
 		td->td_sigstk.ss_flags |= (nss.ss_onstack & SS_ONSTACK);
 		td->td_pflags |= TDP_ALTSTACK;
 	}
 	if (uap->oss != NULL) {
 		s32.ss_sp = PTROUT(oss.ss_sp);
 		CP(oss, s32, ss_onstack);
 		error = copyout(&s32, uap->oss, sizeof(s32));
 	}
 	return (error);
 }
 #endif
 
 int
 freebsd32_nanosleep(struct thread *td, struct freebsd32_nanosleep_args *uap)
 {
 
 	return (freebsd32_user_clock_nanosleep(td, CLOCK_REALTIME,
 	    TIMER_RELTIME, uap->rqtp, uap->rmtp));
 }
 
 int
 freebsd32_clock_nanosleep(struct thread *td,
     struct freebsd32_clock_nanosleep_args *uap)
 {
 	int error;
 
 	error = freebsd32_user_clock_nanosleep(td, uap->clock_id, uap->flags,
 	    uap->rqtp, uap->rmtp);
 	return (kern_posix_error(td, error));
 }
 
 static int
 freebsd32_user_clock_nanosleep(struct thread *td, clockid_t clock_id,
     int flags, const struct timespec32 *ua_rqtp, struct timespec32 *ua_rmtp)
 {
 	struct timespec32 rmt32, rqt32;
 	struct timespec rmt, rqt;
 	int error, error2;
 
 	error = copyin(ua_rqtp, &rqt32, sizeof(rqt32));
 	if (error)
 		return (error);
 
 	CP(rqt32, rqt, tv_sec);
 	CP(rqt32, rqt, tv_nsec);
 
 	error = kern_clock_nanosleep(td, clock_id, flags, &rqt, &rmt);
 	if (error == EINTR && ua_rmtp != NULL && (flags & TIMER_ABSTIME) == 0) {
 		CP(rmt, rmt32, tv_sec);
 		CP(rmt, rmt32, tv_nsec);
 
 		error2 = copyout(&rmt32, ua_rmtp, sizeof(rmt32));
 		if (error2 != 0)
 			error = error2;
 	}
 	return (error);
 }
 
 int
 freebsd32_clock_gettime(struct thread *td,
 			struct freebsd32_clock_gettime_args *uap)
 {
 	struct timespec	ats;
 	struct timespec32 ats32;
 	int error;
 
 	error = kern_clock_gettime(td, uap->clock_id, &ats);
 	if (error == 0) {
 		CP(ats, ats32, tv_sec);
 		CP(ats, ats32, tv_nsec);
 		error = copyout(&ats32, uap->tp, sizeof(ats32));
 	}
 	return (error);
 }
 
 int
 freebsd32_clock_settime(struct thread *td,
 			struct freebsd32_clock_settime_args *uap)
 {
 	struct timespec	ats;
 	struct timespec32 ats32;
 	int error;
 
 	error = copyin(uap->tp, &ats32, sizeof(ats32));
 	if (error)
 		return (error);
 	CP(ats32, ats, tv_sec);
 	CP(ats32, ats, tv_nsec);
 
 	return (kern_clock_settime(td, uap->clock_id, &ats));
 }
 
 int
 freebsd32_clock_getres(struct thread *td,
 		       struct freebsd32_clock_getres_args *uap)
 {
 	struct timespec	ts;
 	struct timespec32 ts32;
 	int error;
 
 	if (uap->tp == NULL)
 		return (0);
 	error = kern_clock_getres(td, uap->clock_id, &ts);
 	if (error == 0) {
 		CP(ts, ts32, tv_sec);
 		CP(ts, ts32, tv_nsec);
 		error = copyout(&ts32, uap->tp, sizeof(ts32));
 	}
 	return (error);
 }
 
 int freebsd32_ktimer_create(struct thread *td,
     struct freebsd32_ktimer_create_args *uap)
 {
 	struct sigevent32 ev32;
 	struct sigevent ev, *evp;
 	int error, id;
 
 	if (uap->evp == NULL) {
 		evp = NULL;
 	} else {
 		evp = &ev;
 		error = copyin(uap->evp, &ev32, sizeof(ev32));
 		if (error != 0)
 			return (error);
 		error = convert_sigevent32(&ev32, &ev);
 		if (error != 0)
 			return (error);
 	}
 	error = kern_ktimer_create(td, uap->clock_id, evp, &id, -1);
 	if (error == 0) {
 		error = copyout(&id, uap->timerid, sizeof(int));
 		if (error != 0)
 			kern_ktimer_delete(td, id);
 	}
 	return (error);
 }
 
 int
 freebsd32_ktimer_settime(struct thread *td,
     struct freebsd32_ktimer_settime_args *uap)
 {
 	struct itimerspec32 val32, oval32;
 	struct itimerspec val, oval, *ovalp;
 	int error;
 
 	error = copyin(uap->value, &val32, sizeof(val32));
 	if (error != 0)
 		return (error);
 	ITS_CP(val32, val);
 	ovalp = uap->ovalue != NULL ? &oval : NULL;
 	error = kern_ktimer_settime(td, uap->timerid, uap->flags, &val, ovalp);
 	if (error == 0 && uap->ovalue != NULL) {
 		ITS_CP(oval, oval32);
 		error = copyout(&oval32, uap->ovalue, sizeof(oval32));
 	}
 	return (error);
 }
 
 int
 freebsd32_ktimer_gettime(struct thread *td,
     struct freebsd32_ktimer_gettime_args *uap)
 {
 	struct itimerspec32 val32;
 	struct itimerspec val;
 	int error;
 
 	error = kern_ktimer_gettime(td, uap->timerid, &val);
 	if (error == 0) {
 		ITS_CP(val, val32);
 		error = copyout(&val32, uap->value, sizeof(val32));
 	}
 	return (error);
 }
 
 int
 freebsd32_clock_getcpuclockid2(struct thread *td,
     struct freebsd32_clock_getcpuclockid2_args *uap)
 {
 	clockid_t clk_id;
 	int error;
 
 	error = kern_clock_getcpuclockid2(td, PAIR32TO64(id_t, uap->id),
 	    uap->which, &clk_id);
 	if (error == 0)
 		error = copyout(&clk_id, uap->clock_id, sizeof(clockid_t));
 	return (error);
 }
 
 int
 freebsd32_thr_new(struct thread *td,
 		  struct freebsd32_thr_new_args *uap)
 {
 	struct thr_param32 param32;
 	struct thr_param param;
 	int error;
 
 	if (uap->param_size < 0 ||
 	    uap->param_size > sizeof(struct thr_param32))
 		return (EINVAL);
 	bzero(&param, sizeof(struct thr_param));
 	bzero(&param32, sizeof(struct thr_param32));
 	error = copyin(uap->param, &param32, uap->param_size);
 	if (error != 0)
 		return (error);
 	param.start_func = PTRIN(param32.start_func);
 	param.arg = PTRIN(param32.arg);
 	param.stack_base = PTRIN(param32.stack_base);
 	param.stack_size = param32.stack_size;
 	param.tls_base = PTRIN(param32.tls_base);
 	param.tls_size = param32.tls_size;
 	param.child_tid = PTRIN(param32.child_tid);
 	param.parent_tid = PTRIN(param32.parent_tid);
 	param.flags = param32.flags;
 	param.rtp = PTRIN(param32.rtp);
 	param.spare[0] = PTRIN(param32.spare[0]);
 	param.spare[1] = PTRIN(param32.spare[1]);
 	param.spare[2] = PTRIN(param32.spare[2]);
 
 	return (kern_thr_new(td, &param));
 }
 
 int
 freebsd32_thr_suspend(struct thread *td, struct freebsd32_thr_suspend_args *uap)
 {
 	struct timespec32 ts32;
 	struct timespec ts, *tsp;
 	int error;
 
 	error = 0;
 	tsp = NULL;
 	if (uap->timeout != NULL) {
 		error = copyin((const void *)uap->timeout, (void *)&ts32,
 		    sizeof(struct timespec32));
 		if (error != 0)
 			return (error);
 		ts.tv_sec = ts32.tv_sec;
 		ts.tv_nsec = ts32.tv_nsec;
 		tsp = &ts;
 	}
 	return (kern_thr_suspend(td, tsp));
 }
 
 void
 siginfo_to_siginfo32(const siginfo_t *src, struct siginfo32 *dst)
 {
 	bzero(dst, sizeof(*dst));
 	dst->si_signo = src->si_signo;
 	dst->si_errno = src->si_errno;
 	dst->si_code = src->si_code;
 	dst->si_pid = src->si_pid;
 	dst->si_uid = src->si_uid;
 	dst->si_status = src->si_status;
 	dst->si_addr = (uintptr_t)src->si_addr;
 	dst->si_value.sival_int = src->si_value.sival_int;
 	dst->si_timerid = src->si_timerid;
 	dst->si_overrun = src->si_overrun;
 }
 
 #ifndef _FREEBSD32_SYSPROTO_H_
 struct freebsd32_sigqueue_args {
         pid_t pid;
         int signum;
         /* union sigval32 */ int value;
 };
 #endif
 int
 freebsd32_sigqueue(struct thread *td, struct freebsd32_sigqueue_args *uap)
 {
 	union sigval sv;
 
 	/*
 	 * On 32-bit ABIs, sival_int and sival_ptr are the same.
 	 * On 64-bit little-endian ABIs, the low bits are the same.
 	 * In 64-bit big-endian ABIs, sival_int overlaps with
 	 * sival_ptr's HIGH bits.  We choose to support sival_int
 	 * rather than sival_ptr in this case as it seems to be
 	 * more common.
 	 */
 	bzero(&sv, sizeof(sv));
 	sv.sival_int = (uint32_t)(uint64_t)uap->value;
 
 	return (kern_sigqueue(td, uap->pid, uap->signum, &sv));
 }
 
 int
 freebsd32_sigtimedwait(struct thread *td, struct freebsd32_sigtimedwait_args *uap)
 {
 	struct timespec32 ts32;
 	struct timespec ts;
 	struct timespec *timeout;
 	sigset_t set;
 	ksiginfo_t ksi;
 	struct siginfo32 si32;
 	int error;
 
 	if (uap->timeout) {
 		error = copyin(uap->timeout, &ts32, sizeof(ts32));
 		if (error)
 			return (error);
 		ts.tv_sec = ts32.tv_sec;
 		ts.tv_nsec = ts32.tv_nsec;
 		timeout = &ts;
 	} else
 		timeout = NULL;
 
 	error = copyin(uap->set, &set, sizeof(set));
 	if (error)
 		return (error);
 
 	error = kern_sigtimedwait(td, set, &ksi, timeout);
 	if (error)
 		return (error);
 
 	if (uap->info) {
 		siginfo_to_siginfo32(&ksi.ksi_info, &si32);
 		error = copyout(&si32, uap->info, sizeof(struct siginfo32));
 	}
 
 	if (error == 0)
 		td->td_retval[0] = ksi.ksi_signo;
 	return (error);
 }
 
 /*
  * MPSAFE
  */
 int
 freebsd32_sigwaitinfo(struct thread *td, struct freebsd32_sigwaitinfo_args *uap)
 {
 	ksiginfo_t ksi;
 	struct siginfo32 si32;
 	sigset_t set;
 	int error;
 
 	error = copyin(uap->set, &set, sizeof(set));
 	if (error)
 		return (error);
 
 	error = kern_sigtimedwait(td, set, &ksi, NULL);
 	if (error)
 		return (error);
 
 	if (uap->info) {
 		siginfo_to_siginfo32(&ksi.ksi_info, &si32);
 		error = copyout(&si32, uap->info, sizeof(struct siginfo32));
 	}	
 	if (error == 0)
 		td->td_retval[0] = ksi.ksi_signo;
 	return (error);
 }
 
 int
 freebsd32_cpuset_setid(struct thread *td,
     struct freebsd32_cpuset_setid_args *uap)
 {
 
 	return (kern_cpuset_setid(td, uap->which,
 	    PAIR32TO64(id_t, uap->id), uap->setid));
 }
 
 int
 freebsd32_cpuset_getid(struct thread *td,
     struct freebsd32_cpuset_getid_args *uap)
 {
 
 	return (kern_cpuset_getid(td, uap->level, uap->which,
 	    PAIR32TO64(id_t, uap->id), uap->setid));
 }
 
 static int
 copyin32_set(const void *u, void *k, size_t size)
 {
 #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
 	int rv;
 	struct bitset *kb = k;
 	int *p;
 
 	rv = copyin(u, k, size);
 	if (rv != 0)
 		return (rv);
 
 	p = (int *)kb->__bits;
 	/* Loop through swapping words.
 	 * `size' is in bytes, we need bits. */
 	for (int i = 0; i < __bitset_words(size * 8); i++) {
 		int tmp = p[0];
 		p[0] = p[1];
 		p[1] = tmp;
 		p += 2;
 	}
 	return (0);
 #else
 	return (copyin(u, k, size));
 #endif
 }
 
 static int
 copyout32_set(const void *k, void *u, size_t size)
 {
 #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
 	const struct bitset *kb = k;
 	struct bitset *ub = u;
 	const int *kp = (const int *)kb->__bits;
 	int *up = (int *)ub->__bits;
 	int rv;
 
 	for (int i = 0; i < __bitset_words(CPU_SETSIZE); i++) {
 		/* `size' is in bytes, we need bits. */
 		for (int i = 0; i < __bitset_words(size * 8); i++) {
 			rv = suword32(up, kp[1]);
 			if (rv == 0)
 				rv = suword32(up + 1, kp[0]);
 			if (rv != 0)
 				return (EFAULT);
 		}
 	}
 	return (0);
 #else
 	return (copyout(k, u, size));
 #endif
 }
 
 static const struct cpuset_copy_cb cpuset_copy32_cb = {
 	.cpuset_copyin = copyin32_set,
 	.cpuset_copyout = copyout32_set
 };
 
 int
 freebsd32_cpuset_getaffinity(struct thread *td,
     struct freebsd32_cpuset_getaffinity_args *uap)
 {
 
 	return (user_cpuset_getaffinity(td, uap->level, uap->which,
 	    PAIR32TO64(id_t,uap->id), uap->cpusetsize, uap->mask,
 	    &cpuset_copy32_cb));
 }
 
 int
 freebsd32_cpuset_setaffinity(struct thread *td,
     struct freebsd32_cpuset_setaffinity_args *uap)
 {
 
 	return (user_cpuset_setaffinity(td, uap->level, uap->which,
 	    PAIR32TO64(id_t,uap->id), uap->cpusetsize, uap->mask,
 	    &cpuset_copy32_cb));
 }
 
 int
 freebsd32_cpuset_getdomain(struct thread *td,
     struct freebsd32_cpuset_getdomain_args *uap)
 {
 
 	return (kern_cpuset_getdomain(td, uap->level, uap->which,
 	    PAIR32TO64(id_t,uap->id), uap->domainsetsize, uap->mask, uap->policy,
 	    &cpuset_copy32_cb));
 }
 
 int
 freebsd32_cpuset_setdomain(struct thread *td,
     struct freebsd32_cpuset_setdomain_args *uap)
 {
 
 	return (kern_cpuset_setdomain(td, uap->level, uap->which,
 	    PAIR32TO64(id_t,uap->id), uap->domainsetsize, uap->mask, uap->policy,
 	    &cpuset_copy32_cb));
 }
 
 int
 freebsd32_nmount(struct thread *td,
     struct freebsd32_nmount_args /* {
     	struct iovec *iovp;
     	unsigned int iovcnt;
     	int flags;
     } */ *uap)
 {
 	struct uio *auio;
 	uint64_t flags;
 	int error;
 
 	/*
 	 * Mount flags are now 64-bits. On 32-bit archtectures only
 	 * 32-bits are passed in, but from here on everything handles
 	 * 64-bit flags correctly.
 	 */
 	flags = uap->flags;
 
 	AUDIT_ARG_FFLAGS(flags);
 
 	/*
 	 * Filter out MNT_ROOTFS.  We do not want clients of nmount() in
 	 * userspace to set this flag, but we must filter it out if we want
 	 * MNT_UPDATE on the root file system to work.
 	 * MNT_ROOTFS should only be set by the kernel when mounting its
 	 * root file system.
 	 */
 	flags &= ~MNT_ROOTFS;
 
 	/*
 	 * check that we have an even number of iovec's
 	 * and that we have at least two options.
 	 */
 	if ((uap->iovcnt & 1) || (uap->iovcnt < 4))
 		return (EINVAL);
 
 	error = freebsd32_copyinuio(uap->iovp, uap->iovcnt, &auio);
 	if (error)
 		return (error);
 	error = vfs_donmount(td, flags, auio);
 
 	free(auio, M_IOV);
 	return error;
 }
 
 #if 0
 int
 freebsd32_xxx(struct thread *td, struct freebsd32_xxx_args *uap)
 {
 	struct yyy32 *p32, s32;
 	struct yyy *p = NULL, s;
 	struct xxx_arg ap;
 	int error;
 
 	if (uap->zzz) {
 		error = copyin(uap->zzz, &s32, sizeof(s32));
 		if (error)
 			return (error);
 		/* translate in */
 		p = &s;
 	}
 	error = kern_xxx(td, p);
 	if (error)
 		return (error);
 	if (uap->zzz) {
 		/* translate out */
 		error = copyout(&s32, p32, sizeof(s32));
 	}
 	return (error);
 }
 #endif
 
 int
 syscall32_module_handler(struct module *mod, int what, void *arg)
 {
 
 	return (kern_syscall_module_handler(freebsd32_sysent, mod, what, arg));
 }
 
 int
 syscall32_helper_register(struct syscall_helper_data *sd, int flags)
 {
 
 	return (kern_syscall_helper_register(freebsd32_sysent, sd, flags));
 }
 
 int
 syscall32_helper_unregister(struct syscall_helper_data *sd)
 {
 
 	return (kern_syscall_helper_unregister(freebsd32_sysent, sd));
 }
 
 int
 freebsd32_copyout_strings(struct image_params *imgp, uintptr_t *stack_base)
 {
 	struct sysentvec *sysent;
 	int argc, envc, i;
 	uint32_t *vectp;
 	char *stringp;
 	uintptr_t destp, ustringp;
 	struct freebsd32_ps_strings *arginfo;
 	char canary[sizeof(long) * 8];
 	int32_t pagesizes32[MAXPAGESIZES];
 	size_t execpath_len;
 	int error, szsigcode;
 
 	sysent = imgp->sysent;
 
 	arginfo = (struct freebsd32_ps_strings *)PROC_PS_STRINGS(imgp->proc);
 	imgp->ps_strings = arginfo;
 	destp =	(uintptr_t)arginfo;
 
 	/*
 	 * Install sigcode.
 	 */
-	if (sysent->sv_sigcode_base == 0) {
+	if (!PROC_HAS_SHP(imgp->proc)) {
 		szsigcode = *sysent->sv_szsigcode;
 		destp -= szsigcode;
 		destp = rounddown2(destp, sizeof(uint32_t));
 		error = copyout(sysent->sv_sigcode, (void *)destp,
 		    szsigcode);
 		if (error != 0)
 			return (error);
 	}
 
 	/*
 	 * Copy the image path for the rtld.
 	 */
 	if (imgp->execpath != NULL && imgp->auxargs != NULL) {
 		execpath_len = strlen(imgp->execpath) + 1;
 		destp -= execpath_len;
 		imgp->execpathp = (void *)destp;
 		error = copyout(imgp->execpath, imgp->execpathp, execpath_len);
 		if (error != 0)
 			return (error);
 	}
 
 	/*
 	 * Prepare the canary for SSP.
 	 */
 	arc4rand(canary, sizeof(canary), 0);
 	destp -= sizeof(canary);
 	imgp->canary = (void *)destp;
 	error = copyout(canary, imgp->canary, sizeof(canary));
 	if (error != 0)
 		return (error);
 	imgp->canarylen = sizeof(canary);
 
 	/*
 	 * Prepare the pagesizes array.
 	 */
 	for (i = 0; i < MAXPAGESIZES; i++)
 		pagesizes32[i] = (uint32_t)pagesizes[i];
 	destp -= sizeof(pagesizes32);
 	destp = rounddown2(destp, sizeof(uint32_t));
 	imgp->pagesizes = (void *)destp;
 	error = copyout(pagesizes32, imgp->pagesizes, sizeof(pagesizes32));
 	if (error != 0)
 		return (error);
 	imgp->pagesizeslen = sizeof(pagesizes32);
 
 	/*
 	 * Allocate room for the argument and environment strings.
 	 */
 	destp -= ARG_MAX - imgp->args->stringspace;
 	destp = rounddown2(destp, sizeof(uint32_t));
 	ustringp = destp;
 
 	if (imgp->auxargs) {
 		/*
 		 * Allocate room on the stack for the ELF auxargs
 		 * array.  It has up to AT_COUNT entries.
 		 */
 		destp -= AT_COUNT * sizeof(Elf32_Auxinfo);
 		destp = rounddown2(destp, sizeof(uint32_t));
 	}
 
 	vectp = (uint32_t *)destp;
 
 	/*
 	 * Allocate room for the argv[] and env vectors including the
 	 * terminating NULL pointers.
 	 */
 	vectp -= imgp->args->argc + 1 + imgp->args->envc + 1;
 
 	/*
 	 * vectp also becomes our initial stack base
 	 */
 	*stack_base = (uintptr_t)vectp;
 
 	stringp = imgp->args->begin_argv;
 	argc = imgp->args->argc;
 	envc = imgp->args->envc;
 	/*
 	 * Copy out strings - arguments and environment.
 	 */
 	error = copyout(stringp, (void *)ustringp,
 	    ARG_MAX - imgp->args->stringspace);
 	if (error != 0)
 		return (error);
 
 	/*
 	 * Fill in "ps_strings" struct for ps, w, etc.
 	 */
 	imgp->argv = vectp;
 	if (suword32(&arginfo->ps_argvstr, (uint32_t)(intptr_t)vectp) != 0 ||
 	    suword32(&arginfo->ps_nargvstr, argc) != 0)
 		return (EFAULT);
 
 	/*
 	 * Fill in argument portion of vector table.
 	 */
 	for (; argc > 0; --argc) {
 		if (suword32(vectp++, ustringp) != 0)
 			return (EFAULT);
 		while (*stringp++ != 0)
 			ustringp++;
 		ustringp++;
 	}
 
 	/* a null vector table pointer separates the argp's from the envp's */
 	if (suword32(vectp++, 0) != 0)
 		return (EFAULT);
 
 	imgp->envv = vectp;
 	if (suword32(&arginfo->ps_envstr, (uint32_t)(intptr_t)vectp) != 0 ||
 	    suword32(&arginfo->ps_nenvstr, envc) != 0)
 		return (EFAULT);
 
 	/*
 	 * Fill in environment portion of vector table.
 	 */
 	for (; envc > 0; --envc) {
 		if (suword32(vectp++, ustringp) != 0)
 			return (EFAULT);
 		while (*stringp++ != 0)
 			ustringp++;
 		ustringp++;
 	}
 
 	/* end of vector table is a null pointer */
 	if (suword32(vectp, 0) != 0)
 		return (EFAULT);
 
 	if (imgp->auxargs) {
 		vectp++;
 		error = imgp->sysent->sv_copyout_auxargs(imgp,
 		    (uintptr_t)vectp);
 		if (error != 0)
 			return (error);
 	}
 
 	return (0);
 }
 
 int
 freebsd32_kldstat(struct thread *td, struct freebsd32_kldstat_args *uap)
 {
 	struct kld_file_stat *stat;
 	struct kld_file_stat32 *stat32;
 	int error, version;
 
 	if ((error = copyin(&uap->stat->version, &version, sizeof(version)))
 	    != 0)
 		return (error);
 	if (version != sizeof(struct kld_file_stat_1_32) &&
 	    version != sizeof(struct kld_file_stat32))
 		return (EINVAL);
 
 	stat = malloc(sizeof(*stat), M_TEMP, M_WAITOK | M_ZERO);
 	stat32 = malloc(sizeof(*stat32), M_TEMP, M_WAITOK | M_ZERO);
 	error = kern_kldstat(td, uap->fileid, stat);
 	if (error == 0) {
 		bcopy(&stat->name[0], &stat32->name[0], sizeof(stat->name));
 		CP(*stat, *stat32, refs);
 		CP(*stat, *stat32, id);
 		PTROUT_CP(*stat, *stat32, address);
 		CP(*stat, *stat32, size);
 		bcopy(&stat->pathname[0], &stat32->pathname[0],
 		    sizeof(stat->pathname));
 		stat32->version  = version;
 		error = copyout(stat32, uap->stat, version);
 	}
 	free(stat, M_TEMP);
 	free(stat32, M_TEMP);
 	return (error);
 }
 
 int
 freebsd32_posix_fallocate(struct thread *td,
     struct freebsd32_posix_fallocate_args *uap)
 {
 	int error;
 
 	error = kern_posix_fallocate(td, uap->fd,
 	    PAIR32TO64(off_t, uap->offset), PAIR32TO64(off_t, uap->len));
 	return (kern_posix_error(td, error));
 }
 
 int
 freebsd32_posix_fadvise(struct thread *td,
     struct freebsd32_posix_fadvise_args *uap)
 {
 	int error;
 
 	error = kern_posix_fadvise(td, uap->fd, PAIR32TO64(off_t, uap->offset),
 	    PAIR32TO64(off_t, uap->len), uap->advice);
 	return (kern_posix_error(td, error));
 }
 
 int
 convert_sigevent32(struct sigevent32 *sig32, struct sigevent *sig)
 {
 
 	CP(*sig32, *sig, sigev_notify);
 	switch (sig->sigev_notify) {
 	case SIGEV_NONE:
 		break;
 	case SIGEV_THREAD_ID:
 		CP(*sig32, *sig, sigev_notify_thread_id);
 		/* FALLTHROUGH */
 	case SIGEV_SIGNAL:
 		CP(*sig32, *sig, sigev_signo);
 		PTRIN_CP(*sig32, *sig, sigev_value.sival_ptr);
 		break;
 	case SIGEV_KEVENT:
 		CP(*sig32, *sig, sigev_notify_kqueue);
 		CP(*sig32, *sig, sigev_notify_kevent_flags);
 		PTRIN_CP(*sig32, *sig, sigev_value.sival_ptr);
 		break;
 	default:
 		return (EINVAL);
 	}
 	return (0);
 }
 
 int
 freebsd32_procctl(struct thread *td, struct freebsd32_procctl_args *uap)
 {
 	void *data;
 	union {
 		struct procctl_reaper_status rs;
 		struct procctl_reaper_pids rp;
 		struct procctl_reaper_kill rk;
 	} x;
 	union {
 		struct procctl_reaper_pids32 rp;
 	} x32;
 	int error, error1, flags, signum;
 
 	if (uap->com >= PROC_PROCCTL_MD_MIN)
 		return (cpu_procctl(td, uap->idtype, PAIR32TO64(id_t, uap->id),
 		    uap->com, PTRIN(uap->data)));
 
 	switch (uap->com) {
 	case PROC_ASLR_CTL:
 	case PROC_PROTMAX_CTL:
 	case PROC_SPROTECT:
 	case PROC_STACKGAP_CTL:
 	case PROC_TRACE_CTL:
 	case PROC_TRAPCAP_CTL:
 	case PROC_NO_NEW_PRIVS_CTL:
 	case PROC_WXMAP_CTL:
 		error = copyin(PTRIN(uap->data), &flags, sizeof(flags));
 		if (error != 0)
 			return (error);
 		data = &flags;
 		break;
 	case PROC_REAP_ACQUIRE:
 	case PROC_REAP_RELEASE:
 		if (uap->data != NULL)
 			return (EINVAL);
 		data = NULL;
 		break;
 	case PROC_REAP_STATUS:
 		data = &x.rs;
 		break;
 	case PROC_REAP_GETPIDS:
 		error = copyin(uap->data, &x32.rp, sizeof(x32.rp));
 		if (error != 0)
 			return (error);
 		CP(x32.rp, x.rp, rp_count);
 		PTRIN_CP(x32.rp, x.rp, rp_pids);
 		data = &x.rp;
 		break;
 	case PROC_REAP_KILL:
 		error = copyin(uap->data, &x.rk, sizeof(x.rk));
 		if (error != 0)
 			return (error);
 		data = &x.rk;
 		break;
 	case PROC_ASLR_STATUS:
 	case PROC_PROTMAX_STATUS:
 	case PROC_STACKGAP_STATUS:
 	case PROC_TRACE_STATUS:
 	case PROC_TRAPCAP_STATUS:
 	case PROC_NO_NEW_PRIVS_STATUS:
 	case PROC_WXMAP_STATUS:
 		data = &flags;
 		break;
 	case PROC_PDEATHSIG_CTL:
 		error = copyin(uap->data, &signum, sizeof(signum));
 		if (error != 0)
 			return (error);
 		data = &signum;
 		break;
 	case PROC_PDEATHSIG_STATUS:
 		data = &signum;
 		break;
 	default:
 		return (EINVAL);
 	}
 	error = kern_procctl(td, uap->idtype, PAIR32TO64(id_t, uap->id),
 	    uap->com, data);
 	switch (uap->com) {
 	case PROC_REAP_STATUS:
 		if (error == 0)
 			error = copyout(&x.rs, uap->data, sizeof(x.rs));
 		break;
 	case PROC_REAP_KILL:
 		error1 = copyout(&x.rk, uap->data, sizeof(x.rk));
 		if (error == 0)
 			error = error1;
 		break;
 	case PROC_ASLR_STATUS:
 	case PROC_PROTMAX_STATUS:
 	case PROC_STACKGAP_STATUS:
 	case PROC_TRACE_STATUS:
 	case PROC_TRAPCAP_STATUS:
 	case PROC_NO_NEW_PRIVS_STATUS:
 	case PROC_WXMAP_STATUS:
 		if (error == 0)
 			error = copyout(&flags, uap->data, sizeof(flags));
 		break;
 	case PROC_PDEATHSIG_STATUS:
 		if (error == 0)
 			error = copyout(&signum, uap->data, sizeof(signum));
 		break;
 	}
 	return (error);
 }
 
 int
 freebsd32_fcntl(struct thread *td, struct freebsd32_fcntl_args *uap)
 {
 	long tmp;
 
 	switch (uap->cmd) {
 	/*
 	 * Do unsigned conversion for arg when operation
 	 * interprets it as flags or pointer.
 	 */
 	case F_SETLK_REMOTE:
 	case F_SETLKW:
 	case F_SETLK:
 	case F_GETLK:
 	case F_SETFD:
 	case F_SETFL:
 	case F_OGETLK:
 	case F_OSETLK:
 	case F_OSETLKW:
 	case F_KINFO:
 		tmp = (unsigned int)(uap->arg);
 		break;
 	default:
 		tmp = uap->arg;
 		break;
 	}
 	return (kern_fcntl_freebsd(td, uap->fd, uap->cmd, tmp));
 }
 
 int
 freebsd32_ppoll(struct thread *td, struct freebsd32_ppoll_args *uap)
 {
 	struct timespec32 ts32;
 	struct timespec ts, *tsp;
 	sigset_t set, *ssp;
 	int error;
 
 	if (uap->ts != NULL) {
 		error = copyin(uap->ts, &ts32, sizeof(ts32));
 		if (error != 0)
 			return (error);
 		CP(ts32, ts, tv_sec);
 		CP(ts32, ts, tv_nsec);
 		tsp = &ts;
 	} else
 		tsp = NULL;
 	if (uap->set != NULL) {
 		error = copyin(uap->set, &set, sizeof(set));
 		if (error != 0)
 			return (error);
 		ssp = &set;
 	} else
 		ssp = NULL;
 
 	return (kern_poll(td, uap->fds, uap->nfds, tsp, ssp));
 }
 
 int
 freebsd32_sched_rr_get_interval(struct thread *td,
     struct freebsd32_sched_rr_get_interval_args *uap)
 {
 	struct timespec ts;
 	struct timespec32 ts32;
 	int error;
 
 	error = kern_sched_rr_get_interval(td, uap->pid, &ts);
 	if (error == 0) {
 		CP(ts, ts32, tv_sec);
 		CP(ts, ts32, tv_nsec);
 		error = copyout(&ts32, uap->interval, sizeof(ts32));
 	}
 	return (error);
 }
 
 static void
 timex_to_32(struct timex32 *dst, struct timex *src)
 {
 	CP(*src, *dst, modes);
 	CP(*src, *dst, offset);
 	CP(*src, *dst, freq);
 	CP(*src, *dst, maxerror);
 	CP(*src, *dst, esterror);
 	CP(*src, *dst, status);
 	CP(*src, *dst, constant);
 	CP(*src, *dst, precision);
 	CP(*src, *dst, tolerance);
 	CP(*src, *dst, ppsfreq);
 	CP(*src, *dst, jitter);
 	CP(*src, *dst, shift);
 	CP(*src, *dst, stabil);
 	CP(*src, *dst, jitcnt);
 	CP(*src, *dst, calcnt);
 	CP(*src, *dst, errcnt);
 	CP(*src, *dst, stbcnt);
 }
 
 static void
 timex_from_32(struct timex *dst, struct timex32 *src)
 {
 	CP(*src, *dst, modes);
 	CP(*src, *dst, offset);
 	CP(*src, *dst, freq);
 	CP(*src, *dst, maxerror);
 	CP(*src, *dst, esterror);
 	CP(*src, *dst, status);
 	CP(*src, *dst, constant);
 	CP(*src, *dst, precision);
 	CP(*src, *dst, tolerance);
 	CP(*src, *dst, ppsfreq);
 	CP(*src, *dst, jitter);
 	CP(*src, *dst, shift);
 	CP(*src, *dst, stabil);
 	CP(*src, *dst, jitcnt);
 	CP(*src, *dst, calcnt);
 	CP(*src, *dst, errcnt);
 	CP(*src, *dst, stbcnt);
 }
 
 int
 freebsd32_ntp_adjtime(struct thread *td, struct freebsd32_ntp_adjtime_args *uap)
 {
 	struct timex tx;
 	struct timex32 tx32;
 	int error, retval;
 
 	error = copyin(uap->tp, &tx32, sizeof(tx32));
 	if (error == 0) {
 		timex_from_32(&tx, &tx32);
 		error = kern_ntp_adjtime(td, &tx, &retval);
 		if (error == 0) {
 			timex_to_32(&tx32, &tx);
 			error = copyout(&tx32, uap->tp, sizeof(tx32));
 			if (error == 0)
 				td->td_retval[0] = retval;
 		}
 	}
 	return (error);
 }
 
 #ifdef FFCLOCK
 extern struct mtx ffclock_mtx;
 extern struct ffclock_estimate ffclock_estimate;
 extern int8_t ffclock_updated;
 
 int
 freebsd32_ffclock_setestimate(struct thread *td,
     struct freebsd32_ffclock_setestimate_args *uap)
 {
 	struct ffclock_estimate cest;
 	struct ffclock_estimate32 cest32;
 	int error;
 
 	/* Reuse of PRIV_CLOCK_SETTIME. */
 	if ((error = priv_check(td, PRIV_CLOCK_SETTIME)) != 0)
 		return (error);
 
 	if ((error = copyin(uap->cest, &cest32,
 	    sizeof(struct ffclock_estimate32))) != 0)
 		return (error);
 
 	CP(cest.update_time, cest32.update_time, sec);
 	memcpy(&cest.update_time.frac, &cest32.update_time.frac, sizeof(uint64_t));
 	CP(cest, cest32, update_ffcount);
 	CP(cest, cest32, leapsec_next);
 	CP(cest, cest32, period);
 	CP(cest, cest32, errb_abs);
 	CP(cest, cest32, errb_rate);
 	CP(cest, cest32, status);
 	CP(cest, cest32, leapsec_total);
 	CP(cest, cest32, leapsec);
 
 	mtx_lock(&ffclock_mtx);
 	memcpy(&ffclock_estimate, &cest, sizeof(struct ffclock_estimate));
 	ffclock_updated++;
 	mtx_unlock(&ffclock_mtx);
 	return (error);
 }
 
 int
 freebsd32_ffclock_getestimate(struct thread *td,
     struct freebsd32_ffclock_getestimate_args *uap)
 {
 	struct ffclock_estimate cest;
 	struct ffclock_estimate32 cest32;
 	int error;
 
 	mtx_lock(&ffclock_mtx);
 	memcpy(&cest, &ffclock_estimate, sizeof(struct ffclock_estimate));
 	mtx_unlock(&ffclock_mtx);
 
 	CP(cest32.update_time, cest.update_time, sec);
 	memcpy(&cest32.update_time.frac, &cest.update_time.frac, sizeof(uint64_t));
 	CP(cest32, cest, update_ffcount);
 	CP(cest32, cest, leapsec_next);
 	CP(cest32, cest, period);
 	CP(cest32, cest, errb_abs);
 	CP(cest32, cest, errb_rate);
 	CP(cest32, cest, status);
 	CP(cest32, cest, leapsec_total);
 	CP(cest32, cest, leapsec);
 
 	error = copyout(&cest32, uap->cest, sizeof(struct ffclock_estimate32));
 	return (error);
 }
 #else /* !FFCLOCK */
 int
 freebsd32_ffclock_setestimate(struct thread *td,
     struct freebsd32_ffclock_setestimate_args *uap)
 {
 	return (ENOSYS);
 }
 
 int
 freebsd32_ffclock_getestimate(struct thread *td,
     struct freebsd32_ffclock_getestimate_args *uap)
 {
 	return (ENOSYS);
 }
 #endif /* FFCLOCK */
 
 #ifdef COMPAT_43
 int
 ofreebsd32_sethostid(struct thread *td, struct ofreebsd32_sethostid_args *uap)
 {
 	int name[] = { CTL_KERN, KERN_HOSTID };
 	long hostid;
 
 	hostid = uap->hostid;
 	return (kernel_sysctl(td, name, nitems(name), NULL, NULL, &hostid,
 	    sizeof(hostid), NULL, 0));
 }
 #endif
diff --git a/sys/i386/i386/exec_machdep.c b/sys/i386/i386/exec_machdep.c
index ba85cf9756a9..29c06ff86ca8 100644
--- a/sys/i386/i386/exec_machdep.c
+++ b/sys/i386/i386/exec_machdep.c
@@ -1,1447 +1,1447 @@
 /*-
  * SPDX-License-Identifier: BSD-4-Clause
  *
  * Copyright (c) 2018 The FreeBSD Foundation
  * Copyright (c) 1992 Terrence R. Lambert.
  * Copyright (c) 1982, 1987, 1990 The Regents of the University of California.
  * All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * William Jolitz.
  *
  * Portions of this software were developed by A. Joseph Koshy under
  * sponsorship from the FreeBSD Foundation and Google, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from: @(#)machdep.c	7.4 (Berkeley) 6/3/91
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_cpu.h"
 #include "opt_ddb.h"
 #include "opt_kstack_pages.h"
 
 #include <sys/param.h>
 #include <sys/proc.h>
 #include <sys/systm.h>
 #include <sys/exec.h>
 #include <sys/imgact.h>
 #include <sys/kdb.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/linker.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/pcpu.h>
 #include <sys/ptrace.h>
 #include <sys/reg.h>
 #include <sys/rwlock.h>
 #include <sys/signalvar.h>
 #include <sys/syscallsubr.h>
 #include <sys/sysctl.h>
 #include <sys/sysent.h>
 #include <sys/sysproto.h>
 #include <sys/ucontext.h>
 #include <sys/vmmeter.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_page.h>
 #include <vm/vm_map.h>
 #include <vm/vm_object.h>
 
 #ifdef DDB
 #ifndef KDB
 #error KDB must be enabled in order for DDB to work!
 #endif
 #include <ddb/ddb.h>
 #include <ddb/db_sym.h>
 #endif
 
 #include <machine/cpu.h>
 #include <machine/cputypes.h>
 #include <machine/md_var.h>
 #include <machine/pcb.h>
 #include <machine/pcb_ext.h>
 #include <machine/proc.h>
 #include <machine/sigframe.h>
 #include <machine/specialreg.h>
 #include <machine/sysarch.h>
 #include <machine/trap.h>
 
 static void fpstate_drop(struct thread *td);
 static void get_fpcontext(struct thread *td, mcontext_t *mcp,
     char *xfpusave, size_t xfpusave_len);
 static int  set_fpcontext(struct thread *td, mcontext_t *mcp,
     char *xfpustate, size_t xfpustate_len);
 #ifdef COMPAT_43
 static void osendsig(sig_t catcher, ksiginfo_t *, sigset_t *mask);
 #endif
 #ifdef COMPAT_FREEBSD4
 static void freebsd4_sendsig(sig_t catcher, ksiginfo_t *, sigset_t *mask);
 #endif
 
 extern struct sysentvec elf32_freebsd_sysvec;
 
 _Static_assert(sizeof(mcontext_t) == 640, "mcontext_t size incorrect");
 _Static_assert(sizeof(ucontext_t) == 704, "ucontext_t size incorrect");
 _Static_assert(sizeof(siginfo_t) == 64, "siginfo_t size incorrect");
 
 /*
  * Send an interrupt to process.
  *
  * Stack is set up to allow sigcode stored at top to call routine,
  * followed by call to sigreturn routine below.  After sigreturn
  * resets the signal mask, the stack, and the frame pointer, it
  * returns to the user specified pc, psl.
  */
 #ifdef COMPAT_43
 static void
 osendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
 {
 	struct osigframe sf, *fp;
 	struct proc *p;
 	struct thread *td;
 	struct sigacts *psp;
 	struct trapframe *regs;
 	int sig;
 	int oonstack;
 
 	td = curthread;
 	p = td->td_proc;
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	sig = ksi->ksi_signo;
 	psp = p->p_sigacts;
 	mtx_assert(&psp->ps_mtx, MA_OWNED);
 	regs = td->td_frame;
 	oonstack = sigonstack(regs->tf_esp);
 
 	/* Allocate space for the signal handler context. */
 	if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
 		fp = (struct osigframe *)((uintptr_t)td->td_sigstk.ss_sp +
 		    td->td_sigstk.ss_size - sizeof(struct osigframe));
 #if defined(COMPAT_43)
 		td->td_sigstk.ss_flags |= SS_ONSTACK;
 #endif
 	} else
 		fp = (struct osigframe *)regs->tf_esp - 1;
 
 	/* Build the argument list for the signal handler. */
 	sf.sf_signum = sig;
 	sf.sf_scp = (register_t)&fp->sf_siginfo.si_sc;
 	bzero(&sf.sf_siginfo, sizeof(sf.sf_siginfo));
 	if (SIGISMEMBER(psp->ps_siginfo, sig)) {
 		/* Signal handler installed with SA_SIGINFO. */
 		sf.sf_arg2 = (register_t)&fp->sf_siginfo;
 		sf.sf_siginfo.si_signo = sig;
 		sf.sf_siginfo.si_code = ksi->ksi_code;
 		sf.sf_ahu.sf_action = (__osiginfohandler_t *)catcher;
 		sf.sf_addr = 0;
 	} else {
 		/* Old FreeBSD-style arguments. */
 		sf.sf_arg2 = ksi->ksi_code;
 		sf.sf_addr = (register_t)ksi->ksi_addr;
 		sf.sf_ahu.sf_handler = catcher;
 	}
 	mtx_unlock(&psp->ps_mtx);
 	PROC_UNLOCK(p);
 
 	/* Save most if not all of trap frame. */
 	sf.sf_siginfo.si_sc.sc_eax = regs->tf_eax;
 	sf.sf_siginfo.si_sc.sc_ebx = regs->tf_ebx;
 	sf.sf_siginfo.si_sc.sc_ecx = regs->tf_ecx;
 	sf.sf_siginfo.si_sc.sc_edx = regs->tf_edx;
 	sf.sf_siginfo.si_sc.sc_esi = regs->tf_esi;
 	sf.sf_siginfo.si_sc.sc_edi = regs->tf_edi;
 	sf.sf_siginfo.si_sc.sc_cs = regs->tf_cs;
 	sf.sf_siginfo.si_sc.sc_ds = regs->tf_ds;
 	sf.sf_siginfo.si_sc.sc_ss = regs->tf_ss;
 	sf.sf_siginfo.si_sc.sc_es = regs->tf_es;
 	sf.sf_siginfo.si_sc.sc_fs = regs->tf_fs;
 	sf.sf_siginfo.si_sc.sc_gs = rgs();
 	sf.sf_siginfo.si_sc.sc_isp = regs->tf_isp;
 
 	/* Build the signal context to be used by osigreturn(). */
 	sf.sf_siginfo.si_sc.sc_onstack = (oonstack) ? 1 : 0;
 	SIG2OSIG(*mask, sf.sf_siginfo.si_sc.sc_mask);
 	sf.sf_siginfo.si_sc.sc_sp = regs->tf_esp;
 	sf.sf_siginfo.si_sc.sc_fp = regs->tf_ebp;
 	sf.sf_siginfo.si_sc.sc_pc = regs->tf_eip;
 	sf.sf_siginfo.si_sc.sc_ps = regs->tf_eflags;
 	sf.sf_siginfo.si_sc.sc_trapno = regs->tf_trapno;
 	sf.sf_siginfo.si_sc.sc_err = regs->tf_err;
 
 	/*
 	 * If we're a vm86 process, we want to save the segment registers.
 	 * We also change eflags to be our emulated eflags, not the actual
 	 * eflags.
 	 */
 	if (regs->tf_eflags & PSL_VM) {
 		/* XXX confusing names: `tf' isn't a trapframe; `regs' is. */
 		struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
 		struct vm86_kernel *vm86 = &td->td_pcb->pcb_ext->ext_vm86;
 
 		sf.sf_siginfo.si_sc.sc_gs = tf->tf_vm86_gs;
 		sf.sf_siginfo.si_sc.sc_fs = tf->tf_vm86_fs;
 		sf.sf_siginfo.si_sc.sc_es = tf->tf_vm86_es;
 		sf.sf_siginfo.si_sc.sc_ds = tf->tf_vm86_ds;
 
 		if (vm86->vm86_has_vme == 0)
 			sf.sf_siginfo.si_sc.sc_ps =
 			    (tf->tf_eflags & ~(PSL_VIF | PSL_VIP)) |
 			    (vm86->vm86_eflags & (PSL_VIF | PSL_VIP));
 
 		/* See sendsig() for comments. */
 		tf->tf_eflags &= ~(PSL_VM | PSL_NT | PSL_VIF | PSL_VIP);
 	}
 
 	/*
 	 * Copy the sigframe out to the user's stack.
 	 */
 	if (copyout(&sf, fp, sizeof(*fp)) != 0) {
 		PROC_LOCK(p);
 		sigexit(td, SIGILL);
 	}
 
 	regs->tf_esp = (int)fp;
-	if (p->p_sysent->sv_sigcode_base != 0) {
+	if (PROC_HAS_SHP(p)) {
 		regs->tf_eip = PROC_SIGCODE(p) + szsigcode -
 		    szosigcode;
 	} else {
 		/* a.out sysentvec does not use shared page */
 		regs->tf_eip = PROC_PS_STRINGS(p) - szosigcode;
 	}
 	regs->tf_eflags &= ~(PSL_T | PSL_D);
 	regs->tf_cs = _ucodesel;
 	regs->tf_ds = _udatasel;
 	regs->tf_es = _udatasel;
 	regs->tf_fs = _udatasel;
 	load_gs(_udatasel);
 	regs->tf_ss = _udatasel;
 	PROC_LOCK(p);
 	mtx_lock(&psp->ps_mtx);
 }
 #endif /* COMPAT_43 */
 
 #ifdef COMPAT_FREEBSD4
 static void
 freebsd4_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
 {
 	struct freebsd4_sigframe sf, *sfp;
 	struct proc *p;
 	struct thread *td;
 	struct sigacts *psp;
 	struct trapframe *regs;
 	int sig;
 	int oonstack;
 
 	td = curthread;
 	p = td->td_proc;
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	sig = ksi->ksi_signo;
 	psp = p->p_sigacts;
 	mtx_assert(&psp->ps_mtx, MA_OWNED);
 	regs = td->td_frame;
 	oonstack = sigonstack(regs->tf_esp);
 
 	/* Save user context. */
 	bzero(&sf, sizeof(sf));
 	sf.sf_uc.uc_sigmask = *mask;
 	sf.sf_uc.uc_stack = td->td_sigstk;
 	sf.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
 	    ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE;
 	sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0;
 	sf.sf_uc.uc_mcontext.mc_gs = rgs();
 	bcopy(regs, &sf.sf_uc.uc_mcontext.mc_fs, sizeof(*regs));
 	bzero(sf.sf_uc.uc_mcontext.mc_fpregs,
 	    sizeof(sf.sf_uc.uc_mcontext.mc_fpregs));
 	bzero(sf.sf_uc.uc_mcontext.__spare__,
 	    sizeof(sf.sf_uc.uc_mcontext.__spare__));
 	bzero(sf.sf_uc.__spare__, sizeof(sf.sf_uc.__spare__));
 
 	/* Allocate space for the signal handler context. */
 	if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack &&
 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
 		sfp = (struct freebsd4_sigframe *)((uintptr_t)td->td_sigstk.ss_sp +
 		    td->td_sigstk.ss_size - sizeof(struct freebsd4_sigframe));
 #if defined(COMPAT_43)
 		td->td_sigstk.ss_flags |= SS_ONSTACK;
 #endif
 	} else
 		sfp = (struct freebsd4_sigframe *)regs->tf_esp - 1;
 
 	/* Build the argument list for the signal handler. */
 	sf.sf_signum = sig;
 	sf.sf_ucontext = (register_t)&sfp->sf_uc;
 	bzero(&sf.sf_si, sizeof(sf.sf_si));
 	if (SIGISMEMBER(psp->ps_siginfo, sig)) {
 		/* Signal handler installed with SA_SIGINFO. */
 		sf.sf_siginfo = (register_t)&sfp->sf_si;
 		sf.sf_ahu.sf_action = (__siginfohandler_t *)catcher;
 
 		/* Fill in POSIX parts */
 		sf.sf_si.si_signo = sig;
 		sf.sf_si.si_code = ksi->ksi_code;
 		sf.sf_si.si_addr = ksi->ksi_addr;
 	} else {
 		/* Old FreeBSD-style arguments. */
 		sf.sf_siginfo = ksi->ksi_code;
 		sf.sf_addr = (register_t)ksi->ksi_addr;
 		sf.sf_ahu.sf_handler = catcher;
 	}
 	mtx_unlock(&psp->ps_mtx);
 	PROC_UNLOCK(p);
 
 	/*
 	 * If we're a vm86 process, we want to save the segment registers.
 	 * We also change eflags to be our emulated eflags, not the actual
 	 * eflags.
 	 */
 	if (regs->tf_eflags & PSL_VM) {
 		struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
 		struct vm86_kernel *vm86 = &td->td_pcb->pcb_ext->ext_vm86;
 
 		sf.sf_uc.uc_mcontext.mc_gs = tf->tf_vm86_gs;
 		sf.sf_uc.uc_mcontext.mc_fs = tf->tf_vm86_fs;
 		sf.sf_uc.uc_mcontext.mc_es = tf->tf_vm86_es;
 		sf.sf_uc.uc_mcontext.mc_ds = tf->tf_vm86_ds;
 
 		if (vm86->vm86_has_vme == 0)
 			sf.sf_uc.uc_mcontext.mc_eflags =
 			    (tf->tf_eflags & ~(PSL_VIF | PSL_VIP)) |
 			    (vm86->vm86_eflags & (PSL_VIF | PSL_VIP));
 
 		/*
 		 * Clear PSL_NT to inhibit T_TSSFLT faults on return from
 		 * syscalls made by the signal handler.  This just avoids
 		 * wasting time for our lazy fixup of such faults.  PSL_NT
 		 * does nothing in vm86 mode, but vm86 programs can set it
 		 * almost legitimately in probes for old cpu types.
 		 */
 		tf->tf_eflags &= ~(PSL_VM | PSL_NT | PSL_VIF | PSL_VIP);
 	}
 
 	/*
 	 * Copy the sigframe out to the user's stack.
 	 */
 	if (copyout(&sf, sfp, sizeof(*sfp)) != 0) {
 		PROC_LOCK(p);
 		sigexit(td, SIGILL);
 	}
 
 	regs->tf_esp = (int)sfp;
 	regs->tf_eip = PROC_SIGCODE(p) + szsigcode -
 	    szfreebsd4_sigcode;
 	regs->tf_eflags &= ~(PSL_T | PSL_D);
 	regs->tf_cs = _ucodesel;
 	regs->tf_ds = _udatasel;
 	regs->tf_es = _udatasel;
 	regs->tf_fs = _udatasel;
 	regs->tf_ss = _udatasel;
 	PROC_LOCK(p);
 	mtx_lock(&psp->ps_mtx);
 }
 #endif	/* COMPAT_FREEBSD4 */
 
 void
 sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
 {
 	struct sigframe sf, *sfp;
 	struct proc *p;
 	struct thread *td;
 	struct sigacts *psp;
 	char *sp;
 	struct trapframe *regs;
 	struct segment_descriptor *sdp;
 	char *xfpusave;
 	size_t xfpusave_len;
 	int sig;
 	int oonstack;
 
 	td = curthread;
 	p = td->td_proc;
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	sig = ksi->ksi_signo;
 	psp = p->p_sigacts;
 	mtx_assert(&psp->ps_mtx, MA_OWNED);
 #ifdef COMPAT_FREEBSD4
 	if (SIGISMEMBER(psp->ps_freebsd4, sig)) {
 		freebsd4_sendsig(catcher, ksi, mask);
 		return;
 	}
 #endif
 #ifdef COMPAT_43
 	if (SIGISMEMBER(psp->ps_osigset, sig)) {
 		osendsig(catcher, ksi, mask);
 		return;
 	}
 #endif
 	regs = td->td_frame;
 	oonstack = sigonstack(regs->tf_esp);
 
 	if (cpu_max_ext_state_size > sizeof(union savefpu) && use_xsave) {
 		xfpusave_len = cpu_max_ext_state_size - sizeof(union savefpu);
 		xfpusave = __builtin_alloca(xfpusave_len);
 	} else {
 		xfpusave_len = 0;
 		xfpusave = NULL;
 	}
 
 	/* Save user context. */
 	bzero(&sf, sizeof(sf));
 	sf.sf_uc.uc_sigmask = *mask;
 	sf.sf_uc.uc_stack = td->td_sigstk;
 	sf.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
 	    ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE;
 	sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0;
 	sf.sf_uc.uc_mcontext.mc_gs = rgs();
 	bcopy(regs, &sf.sf_uc.uc_mcontext.mc_fs, sizeof(*regs));
 	sf.sf_uc.uc_mcontext.mc_len = sizeof(sf.sf_uc.uc_mcontext); /* magic */
 	get_fpcontext(td, &sf.sf_uc.uc_mcontext, xfpusave, xfpusave_len);
 	fpstate_drop(td);
 	/*
 	 * Unconditionally fill the fsbase and gsbase into the mcontext.
 	 */
 	sdp = &td->td_pcb->pcb_fsd;
 	sf.sf_uc.uc_mcontext.mc_fsbase = sdp->sd_hibase << 24 |
 	    sdp->sd_lobase;
 	sdp = &td->td_pcb->pcb_gsd;
 	sf.sf_uc.uc_mcontext.mc_gsbase = sdp->sd_hibase << 24 |
 	    sdp->sd_lobase;
 	bzero(sf.sf_uc.uc_mcontext.mc_spare2,
 	    sizeof(sf.sf_uc.uc_mcontext.mc_spare2));
 
 	/* Allocate space for the signal handler context. */
 	if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack &&
 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
 		sp = (char *)td->td_sigstk.ss_sp + td->td_sigstk.ss_size;
 #if defined(COMPAT_43)
 		td->td_sigstk.ss_flags |= SS_ONSTACK;
 #endif
 	} else
 		sp = (char *)regs->tf_esp - 128;
 	if (xfpusave != NULL) {
 		sp -= xfpusave_len;
 		sp = (char *)((unsigned int)sp & ~0x3F);
 		sf.sf_uc.uc_mcontext.mc_xfpustate = (register_t)sp;
 	}
 	sp -= sizeof(struct sigframe);
 
 	/* Align to 16 bytes. */
 	sfp = (struct sigframe *)((unsigned int)sp & ~0xF);
 
 	/* Build the argument list for the signal handler. */
 	sf.sf_signum = sig;
 	sf.sf_ucontext = (register_t)&sfp->sf_uc;
 	bzero(&sf.sf_si, sizeof(sf.sf_si));
 	if (SIGISMEMBER(psp->ps_siginfo, sig)) {
 		/* Signal handler installed with SA_SIGINFO. */
 		sf.sf_siginfo = (register_t)&sfp->sf_si;
 		sf.sf_ahu.sf_action = (__siginfohandler_t *)catcher;
 
 		/* Fill in POSIX parts */
 		sf.sf_si = ksi->ksi_info;
 		sf.sf_si.si_signo = sig; /* maybe a translated signal */
 	} else {
 		/* Old FreeBSD-style arguments. */
 		sf.sf_siginfo = ksi->ksi_code;
 		sf.sf_addr = (register_t)ksi->ksi_addr;
 		sf.sf_ahu.sf_handler = catcher;
 	}
 	mtx_unlock(&psp->ps_mtx);
 	PROC_UNLOCK(p);
 
 	/*
 	 * If we're a vm86 process, we want to save the segment registers.
 	 * We also change eflags to be our emulated eflags, not the actual
 	 * eflags.
 	 */
 	if (regs->tf_eflags & PSL_VM) {
 		struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
 		struct vm86_kernel *vm86 = &td->td_pcb->pcb_ext->ext_vm86;
 
 		sf.sf_uc.uc_mcontext.mc_gs = tf->tf_vm86_gs;
 		sf.sf_uc.uc_mcontext.mc_fs = tf->tf_vm86_fs;
 		sf.sf_uc.uc_mcontext.mc_es = tf->tf_vm86_es;
 		sf.sf_uc.uc_mcontext.mc_ds = tf->tf_vm86_ds;
 
 		if (vm86->vm86_has_vme == 0)
 			sf.sf_uc.uc_mcontext.mc_eflags =
 			    (tf->tf_eflags & ~(PSL_VIF | PSL_VIP)) |
 			    (vm86->vm86_eflags & (PSL_VIF | PSL_VIP));
 
 		/*
 		 * Clear PSL_NT to inhibit T_TSSFLT faults on return from
 		 * syscalls made by the signal handler.  This just avoids
 		 * wasting time for our lazy fixup of such faults.  PSL_NT
 		 * does nothing in vm86 mode, but vm86 programs can set it
 		 * almost legitimately in probes for old cpu types.
 		 */
 		tf->tf_eflags &= ~(PSL_VM | PSL_NT | PSL_VIF | PSL_VIP);
 	}
 
 	/*
 	 * Copy the sigframe out to the user's stack.
 	 */
 	if (copyout(&sf, sfp, sizeof(*sfp)) != 0 ||
 	    (xfpusave != NULL && copyout(xfpusave,
 	    (void *)sf.sf_uc.uc_mcontext.mc_xfpustate, xfpusave_len)
 	    != 0)) {
 		PROC_LOCK(p);
 		sigexit(td, SIGILL);
 	}
 
 	regs->tf_esp = (int)sfp;
 	regs->tf_eip = PROC_SIGCODE(p);
 	if (regs->tf_eip == 0)
 		regs->tf_eip = PROC_PS_STRINGS(p) - szsigcode;
 	regs->tf_eflags &= ~(PSL_T | PSL_D);
 	regs->tf_cs = _ucodesel;
 	regs->tf_ds = _udatasel;
 	regs->tf_es = _udatasel;
 	regs->tf_fs = _udatasel;
 	regs->tf_ss = _udatasel;
 	PROC_LOCK(p);
 	mtx_lock(&psp->ps_mtx);
 }
 
 /*
  * System call to cleanup state after a signal has been taken.  Reset
  * signal mask and stack state from context left by sendsig (above).
  * Return to previous pc and psl as specified by context left by
  * sendsig. Check carefully to make sure that the user has not
  * modified the state to gain improper privileges.
  */
 #ifdef COMPAT_43
 int
 osigreturn(struct thread *td, struct osigreturn_args *uap)
 {
 	struct osigcontext sc;
 	struct trapframe *regs;
 	struct osigcontext *scp;
 	int eflags, error;
 	ksiginfo_t ksi;
 
 	regs = td->td_frame;
 	error = copyin(uap->sigcntxp, &sc, sizeof(sc));
 	if (error != 0)
 		return (error);
 	scp = &sc;
 	eflags = scp->sc_ps;
 	if (eflags & PSL_VM) {
 		struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
 		struct vm86_kernel *vm86;
 
 		/*
 		 * if pcb_ext == 0 or vm86_inited == 0, the user hasn't
 		 * set up the vm86 area, and we can't enter vm86 mode.
 		 */
 		if (td->td_pcb->pcb_ext == 0)
 			return (EINVAL);
 		vm86 = &td->td_pcb->pcb_ext->ext_vm86;
 		if (vm86->vm86_inited == 0)
 			return (EINVAL);
 
 		/* Go back to user mode if both flags are set. */
 		if ((eflags & PSL_VIP) && (eflags & PSL_VIF)) {
 			ksiginfo_init_trap(&ksi);
 			ksi.ksi_signo = SIGBUS;
 			ksi.ksi_code = BUS_OBJERR;
 			ksi.ksi_addr = (void *)regs->tf_eip;
 			trapsignal(td, &ksi);
 		}
 
 		if (vm86->vm86_has_vme) {
 			eflags = (tf->tf_eflags & ~VME_USERCHANGE) |
 			    (eflags & VME_USERCHANGE) | PSL_VM;
 		} else {
 			vm86->vm86_eflags = eflags;	/* save VIF, VIP */
 			eflags = (tf->tf_eflags & ~VM_USERCHANGE) |
 			    (eflags & VM_USERCHANGE) | PSL_VM;
 		}
 		tf->tf_vm86_ds = scp->sc_ds;
 		tf->tf_vm86_es = scp->sc_es;
 		tf->tf_vm86_fs = scp->sc_fs;
 		tf->tf_vm86_gs = scp->sc_gs;
 		tf->tf_ds = _udatasel;
 		tf->tf_es = _udatasel;
 		tf->tf_fs = _udatasel;
 	} else {
 		/*
 		 * Don't allow users to change privileged or reserved flags.
 		 */
 		if (!EFL_SECURE(eflags, regs->tf_eflags)) {
 			return (EINVAL);
 		}
 
 		/*
 		 * Don't allow users to load a valid privileged %cs.  Let the
 		 * hardware check for invalid selectors, excess privilege in
 		 * other selectors, invalid %eip's and invalid %esp's.
 		 */
 		if (!CS_SECURE(scp->sc_cs)) {
 			ksiginfo_init_trap(&ksi);
 			ksi.ksi_signo = SIGBUS;
 			ksi.ksi_code = BUS_OBJERR;
 			ksi.ksi_trapno = T_PROTFLT;
 			ksi.ksi_addr = (void *)regs->tf_eip;
 			trapsignal(td, &ksi);
 			return (EINVAL);
 		}
 		regs->tf_ds = scp->sc_ds;
 		regs->tf_es = scp->sc_es;
 		regs->tf_fs = scp->sc_fs;
 	}
 
 	/* Restore remaining registers. */
 	regs->tf_eax = scp->sc_eax;
 	regs->tf_ebx = scp->sc_ebx;
 	regs->tf_ecx = scp->sc_ecx;
 	regs->tf_edx = scp->sc_edx;
 	regs->tf_esi = scp->sc_esi;
 	regs->tf_edi = scp->sc_edi;
 	regs->tf_cs = scp->sc_cs;
 	regs->tf_ss = scp->sc_ss;
 	regs->tf_isp = scp->sc_isp;
 	regs->tf_ebp = scp->sc_fp;
 	regs->tf_esp = scp->sc_sp;
 	regs->tf_eip = scp->sc_pc;
 	regs->tf_eflags = eflags;
 
 #if defined(COMPAT_43)
 	if (scp->sc_onstack & 1)
 		td->td_sigstk.ss_flags |= SS_ONSTACK;
 	else
 		td->td_sigstk.ss_flags &= ~SS_ONSTACK;
 #endif
 	kern_sigprocmask(td, SIG_SETMASK, (sigset_t *)&scp->sc_mask, NULL,
 	    SIGPROCMASK_OLD);
 	return (EJUSTRETURN);
 }
 #endif /* COMPAT_43 */
 
 #ifdef COMPAT_FREEBSD4
 int
 freebsd4_sigreturn(struct thread *td, struct freebsd4_sigreturn_args *uap)
 {
 	struct freebsd4_ucontext uc;
 	struct trapframe *regs;
 	struct freebsd4_ucontext *ucp;
 	int cs, eflags, error;
 	ksiginfo_t ksi;
 
 	error = copyin(uap->sigcntxp, &uc, sizeof(uc));
 	if (error != 0)
 		return (error);
 	ucp = &uc;
 	regs = td->td_frame;
 	eflags = ucp->uc_mcontext.mc_eflags;
 	if (eflags & PSL_VM) {
 		struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
 		struct vm86_kernel *vm86;
 
 		/*
 		 * if pcb_ext == 0 or vm86_inited == 0, the user hasn't
 		 * set up the vm86 area, and we can't enter vm86 mode.
 		 */
 		if (td->td_pcb->pcb_ext == 0)
 			return (EINVAL);
 		vm86 = &td->td_pcb->pcb_ext->ext_vm86;
 		if (vm86->vm86_inited == 0)
 			return (EINVAL);
 
 		/* Go back to user mode if both flags are set. */
 		if ((eflags & PSL_VIP) && (eflags & PSL_VIF)) {
 			ksiginfo_init_trap(&ksi);
 			ksi.ksi_signo = SIGBUS;
 			ksi.ksi_code = BUS_OBJERR;
 			ksi.ksi_addr = (void *)regs->tf_eip;
 			trapsignal(td, &ksi);
 		}
 		if (vm86->vm86_has_vme) {
 			eflags = (tf->tf_eflags & ~VME_USERCHANGE) |
 			    (eflags & VME_USERCHANGE) | PSL_VM;
 		} else {
 			vm86->vm86_eflags = eflags;	/* save VIF, VIP */
 			eflags = (tf->tf_eflags & ~VM_USERCHANGE) |
 			    (eflags & VM_USERCHANGE) | PSL_VM;
 		}
 		bcopy(&ucp->uc_mcontext.mc_fs, tf, sizeof(struct trapframe));
 		tf->tf_eflags = eflags;
 		tf->tf_vm86_ds = tf->tf_ds;
 		tf->tf_vm86_es = tf->tf_es;
 		tf->tf_vm86_fs = tf->tf_fs;
 		tf->tf_vm86_gs = ucp->uc_mcontext.mc_gs;
 		tf->tf_ds = _udatasel;
 		tf->tf_es = _udatasel;
 		tf->tf_fs = _udatasel;
 	} else {
 		/*
 		 * Don't allow users to change privileged or reserved flags.
 		 */
 		if (!EFL_SECURE(eflags, regs->tf_eflags)) {
 			uprintf(
 			    "pid %d (%s): freebsd4_sigreturn eflags = 0x%x\n",
 			    td->td_proc->p_pid, td->td_name, eflags);
 			return (EINVAL);
 		}
 
 		/*
 		 * Don't allow users to load a valid privileged %cs.  Let the
 		 * hardware check for invalid selectors, excess privilege in
 		 * other selectors, invalid %eip's and invalid %esp's.
 		 */
 		cs = ucp->uc_mcontext.mc_cs;
 		if (!CS_SECURE(cs)) {
 			uprintf("pid %d (%s): freebsd4_sigreturn cs = 0x%x\n",
 			    td->td_proc->p_pid, td->td_name, cs);
 			ksiginfo_init_trap(&ksi);
 			ksi.ksi_signo = SIGBUS;
 			ksi.ksi_code = BUS_OBJERR;
 			ksi.ksi_trapno = T_PROTFLT;
 			ksi.ksi_addr = (void *)regs->tf_eip;
 			trapsignal(td, &ksi);
 			return (EINVAL);
 		}
 
 		bcopy(&ucp->uc_mcontext.mc_fs, regs, sizeof(*regs));
 	}
 
 #if defined(COMPAT_43)
 	if (ucp->uc_mcontext.mc_onstack & 1)
 		td->td_sigstk.ss_flags |= SS_ONSTACK;
 	else
 		td->td_sigstk.ss_flags &= ~SS_ONSTACK;
 #endif
 	kern_sigprocmask(td, SIG_SETMASK, &ucp->uc_sigmask, NULL, 0);
 	return (EJUSTRETURN);
 }
 #endif	/* COMPAT_FREEBSD4 */
 
 int
 sys_sigreturn(struct thread *td, struct sigreturn_args *uap)
 {
 	ucontext_t uc;
 	struct proc *p;
 	struct trapframe *regs;
 	ucontext_t *ucp;
 	char *xfpustate;
 	size_t xfpustate_len;
 	int cs, eflags, error, ret;
 	ksiginfo_t ksi;
 
 	p = td->td_proc;
 
 	error = copyin(uap->sigcntxp, &uc, sizeof(uc));
 	if (error != 0)
 		return (error);
 	ucp = &uc;
 	if ((ucp->uc_mcontext.mc_flags & ~_MC_FLAG_MASK) != 0) {
 		uprintf("pid %d (%s): sigreturn mc_flags %x\n", p->p_pid,
 		    td->td_name, ucp->uc_mcontext.mc_flags);
 		return (EINVAL);
 	}
 	regs = td->td_frame;
 	eflags = ucp->uc_mcontext.mc_eflags;
 	if (eflags & PSL_VM) {
 		struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
 		struct vm86_kernel *vm86;
 
 		/*
 		 * if pcb_ext == 0 or vm86_inited == 0, the user hasn't
 		 * set up the vm86 area, and we can't enter vm86 mode.
 		 */
 		if (td->td_pcb->pcb_ext == 0)
 			return (EINVAL);
 		vm86 = &td->td_pcb->pcb_ext->ext_vm86;
 		if (vm86->vm86_inited == 0)
 			return (EINVAL);
 
 		/* Go back to user mode if both flags are set. */
 		if ((eflags & PSL_VIP) && (eflags & PSL_VIF)) {
 			ksiginfo_init_trap(&ksi);
 			ksi.ksi_signo = SIGBUS;
 			ksi.ksi_code = BUS_OBJERR;
 			ksi.ksi_addr = (void *)regs->tf_eip;
 			trapsignal(td, &ksi);
 		}
 
 		if (vm86->vm86_has_vme) {
 			eflags = (tf->tf_eflags & ~VME_USERCHANGE) |
 			    (eflags & VME_USERCHANGE) | PSL_VM;
 		} else {
 			vm86->vm86_eflags = eflags;	/* save VIF, VIP */
 			eflags = (tf->tf_eflags & ~VM_USERCHANGE) |
 			    (eflags & VM_USERCHANGE) | PSL_VM;
 		}
 		bcopy(&ucp->uc_mcontext.mc_fs, tf, sizeof(struct trapframe));
 		tf->tf_eflags = eflags;
 		tf->tf_vm86_ds = tf->tf_ds;
 		tf->tf_vm86_es = tf->tf_es;
 		tf->tf_vm86_fs = tf->tf_fs;
 		tf->tf_vm86_gs = ucp->uc_mcontext.mc_gs;
 		tf->tf_ds = _udatasel;
 		tf->tf_es = _udatasel;
 		tf->tf_fs = _udatasel;
 	} else {
 		/*
 		 * Don't allow users to change privileged or reserved flags.
 		 */
 		if (!EFL_SECURE(eflags, regs->tf_eflags)) {
 			uprintf("pid %d (%s): sigreturn eflags = 0x%x\n",
 			    td->td_proc->p_pid, td->td_name, eflags);
 			return (EINVAL);
 		}
 
 		/*
 		 * Don't allow users to load a valid privileged %cs.  Let the
 		 * hardware check for invalid selectors, excess privilege in
 		 * other selectors, invalid %eip's and invalid %esp's.
 		 */
 		cs = ucp->uc_mcontext.mc_cs;
 		if (!CS_SECURE(cs)) {
 			uprintf("pid %d (%s): sigreturn cs = 0x%x\n",
 			    td->td_proc->p_pid, td->td_name, cs);
 			ksiginfo_init_trap(&ksi);
 			ksi.ksi_signo = SIGBUS;
 			ksi.ksi_code = BUS_OBJERR;
 			ksi.ksi_trapno = T_PROTFLT;
 			ksi.ksi_addr = (void *)regs->tf_eip;
 			trapsignal(td, &ksi);
 			return (EINVAL);
 		}
 
 		if ((uc.uc_mcontext.mc_flags & _MC_HASFPXSTATE) != 0) {
 			xfpustate_len = uc.uc_mcontext.mc_xfpustate_len;
 			if (xfpustate_len > cpu_max_ext_state_size -
 			    sizeof(union savefpu)) {
 				uprintf(
 			    "pid %d (%s): sigreturn xfpusave_len = 0x%zx\n",
 				    p->p_pid, td->td_name, xfpustate_len);
 				return (EINVAL);
 			}
 			xfpustate = __builtin_alloca(xfpustate_len);
 			error = copyin(
 			    (const void *)uc.uc_mcontext.mc_xfpustate,
 			    xfpustate, xfpustate_len);
 			if (error != 0) {
 				uprintf(
 	"pid %d (%s): sigreturn copying xfpustate failed\n",
 				    p->p_pid, td->td_name);
 				return (error);
 			}
 		} else {
 			xfpustate = NULL;
 			xfpustate_len = 0;
 		}
 		ret = set_fpcontext(td, &ucp->uc_mcontext, xfpustate,
 		    xfpustate_len);
 		if (ret != 0)
 			return (ret);
 		bcopy(&ucp->uc_mcontext.mc_fs, regs, sizeof(*regs));
 	}
 
 #if defined(COMPAT_43)
 	if (ucp->uc_mcontext.mc_onstack & 1)
 		td->td_sigstk.ss_flags |= SS_ONSTACK;
 	else
 		td->td_sigstk.ss_flags &= ~SS_ONSTACK;
 #endif
 
 	kern_sigprocmask(td, SIG_SETMASK, &ucp->uc_sigmask, NULL, 0);
 	return (EJUSTRETURN);
 }
 
 /*
  * Reset the hardware debug registers if they were in use.
  * They won't have any meaning for the newly exec'd process.
  */
 void
 x86_clear_dbregs(struct pcb *pcb)
 {
 	if ((pcb->pcb_flags & PCB_DBREGS) == 0)
 		return;
 
 	pcb->pcb_dr0 = 0;
 	pcb->pcb_dr1 = 0;
 	pcb->pcb_dr2 = 0;
 	pcb->pcb_dr3 = 0;
 	pcb->pcb_dr6 = 0;
 	pcb->pcb_dr7 = 0;
 
 	if (pcb == curpcb) {
 		/*
 		 * Clear the debug registers on the running CPU,
 		 * otherwise they will end up affecting the next
 		 * process we switch to.
 		 */
 		reset_dbregs();
 	}
 	pcb->pcb_flags &= ~PCB_DBREGS;
 }
 
 #ifdef COMPAT_43
 static void
 setup_priv_lcall_gate(struct proc *p)
 {
 	struct i386_ldt_args uap;
 	union descriptor desc;
 	u_int lcall_addr;
 
 	bzero(&uap, sizeof(uap));
 	uap.start = 0;
 	uap.num = 1;
 	lcall_addr = p->p_sysent->sv_psstrings - sz_lcall_tramp;
 	bzero(&desc, sizeof(desc));
 	desc.sd.sd_type = SDT_MEMERA;
 	desc.sd.sd_dpl = SEL_UPL;
 	desc.sd.sd_p = 1;
 	desc.sd.sd_def32 = 1;
 	desc.sd.sd_gran = 1;
 	desc.sd.sd_lolimit = 0xffff;
 	desc.sd.sd_hilimit = 0xf;
 	desc.sd.sd_lobase = lcall_addr;
 	desc.sd.sd_hibase = lcall_addr >> 24;
 	i386_set_ldt(curthread, &uap, &desc);
 }
 #endif
 
 /*
  * Reset registers to default values on exec.
  */
 void
 exec_setregs(struct thread *td, struct image_params *imgp, uintptr_t stack)
 {
 	struct trapframe *regs;
 	struct pcb *pcb;
 	register_t saved_eflags;
 
 	regs = td->td_frame;
 	pcb = td->td_pcb;
 
 	/* Reset pc->pcb_gs and %gs before possibly invalidating it. */
 	pcb->pcb_gs = _udatasel;
 	load_gs(_udatasel);
 
 	mtx_lock_spin(&dt_lock);
 	if (td->td_proc->p_md.md_ldt != NULL)
 		user_ldt_free(td);
 	else
 		mtx_unlock_spin(&dt_lock);
 
 #ifdef COMPAT_43
 	if (td->td_proc->p_sysent->sv_psstrings !=
 	    elf32_freebsd_sysvec.sv_psstrings)
 		setup_priv_lcall_gate(td->td_proc);
 #endif
 
 	/*
 	 * Reset the fs and gs bases.  The values from the old address
 	 * space do not make sense for the new program.  In particular,
 	 * gsbase might be the TLS base for the old program but the new
 	 * program has no TLS now.
 	 */
 	set_fsbase(td, 0);
 	set_gsbase(td, 0);
 
 	/* Make sure edx is 0x0 on entry. Linux binaries depend on it. */
 	saved_eflags = regs->tf_eflags & PSL_T;
 	bzero((char *)regs, sizeof(struct trapframe));
 	regs->tf_eip = imgp->entry_addr;
 	regs->tf_esp = stack;
 	regs->tf_eflags = PSL_USER | saved_eflags;
 	regs->tf_ss = _udatasel;
 	regs->tf_ds = _udatasel;
 	regs->tf_es = _udatasel;
 	regs->tf_fs = _udatasel;
 	regs->tf_cs = _ucodesel;
 
 	/* PS_STRINGS value for BSD/OS binaries.  It is 0 for non-BSD/OS. */
 	regs->tf_ebx = (register_t)imgp->ps_strings;
 
 	x86_clear_dbregs(pcb);
 
 	pcb->pcb_initial_npxcw = __INITIAL_NPXCW__;
 
 	/*
 	 * Drop the FP state if we hold it, so that the process gets a
 	 * clean FP state if it uses the FPU again.
 	 */
 	fpstate_drop(td);
 }
 
 int
 fill_regs(struct thread *td, struct reg *regs)
 {
 	struct pcb *pcb;
 	struct trapframe *tp;
 
 	tp = td->td_frame;
 	pcb = td->td_pcb;
 	regs->r_gs = pcb->pcb_gs;
 	return (fill_frame_regs(tp, regs));
 }
 
 int
 fill_frame_regs(struct trapframe *tp, struct reg *regs)
 {
 
 	regs->r_fs = tp->tf_fs;
 	regs->r_es = tp->tf_es;
 	regs->r_ds = tp->tf_ds;
 	regs->r_edi = tp->tf_edi;
 	regs->r_esi = tp->tf_esi;
 	regs->r_ebp = tp->tf_ebp;
 	regs->r_ebx = tp->tf_ebx;
 	regs->r_edx = tp->tf_edx;
 	regs->r_ecx = tp->tf_ecx;
 	regs->r_eax = tp->tf_eax;
 	regs->r_eip = tp->tf_eip;
 	regs->r_cs = tp->tf_cs;
 	regs->r_eflags = tp->tf_eflags;
 	regs->r_esp = tp->tf_esp;
 	regs->r_ss = tp->tf_ss;
 	regs->r_err = 0;
 	regs->r_trapno = 0;
 	return (0);
 }
 
 int
 set_regs(struct thread *td, struct reg *regs)
 {
 	struct pcb *pcb;
 	struct trapframe *tp;
 
 	tp = td->td_frame;
 	if (!EFL_SECURE(regs->r_eflags, tp->tf_eflags) ||
 	    !CS_SECURE(regs->r_cs))
 		return (EINVAL);
 	pcb = td->td_pcb;
 	tp->tf_fs = regs->r_fs;
 	tp->tf_es = regs->r_es;
 	tp->tf_ds = regs->r_ds;
 	tp->tf_edi = regs->r_edi;
 	tp->tf_esi = regs->r_esi;
 	tp->tf_ebp = regs->r_ebp;
 	tp->tf_ebx = regs->r_ebx;
 	tp->tf_edx = regs->r_edx;
 	tp->tf_ecx = regs->r_ecx;
 	tp->tf_eax = regs->r_eax;
 	tp->tf_eip = regs->r_eip;
 	tp->tf_cs = regs->r_cs;
 	tp->tf_eflags = regs->r_eflags;
 	tp->tf_esp = regs->r_esp;
 	tp->tf_ss = regs->r_ss;
 	pcb->pcb_gs = regs->r_gs;
 	return (0);
 }
 
 int
 fill_fpregs(struct thread *td, struct fpreg *fpregs)
 {
 
 	KASSERT(td == curthread || TD_IS_SUSPENDED(td) ||
 	    P_SHOULDSTOP(td->td_proc),
 	    ("not suspended thread %p", td));
 	npxgetregs(td);
 	if (cpu_fxsr)
 		npx_fill_fpregs_xmm(&get_pcb_user_save_td(td)->sv_xmm,
 		    (struct save87 *)fpregs);
 	else
 		bcopy(&get_pcb_user_save_td(td)->sv_87, fpregs,
 		    sizeof(*fpregs));
 	return (0);
 }
 
 int
 set_fpregs(struct thread *td, struct fpreg *fpregs)
 {
 
 	critical_enter();
 	if (cpu_fxsr)
 		npx_set_fpregs_xmm((struct save87 *)fpregs,
 		    &get_pcb_user_save_td(td)->sv_xmm);
 	else
 		bcopy(fpregs, &get_pcb_user_save_td(td)->sv_87,
 		    sizeof(*fpregs));
 	npxuserinited(td);
 	critical_exit();
 	return (0);
 }
 
 /*
  * Get machine context.
  */
 int
 get_mcontext(struct thread *td, mcontext_t *mcp, int flags)
 {
 	struct trapframe *tp;
 	struct segment_descriptor *sdp;
 
 	tp = td->td_frame;
 
 	PROC_LOCK(curthread->td_proc);
 	mcp->mc_onstack = sigonstack(tp->tf_esp);
 	PROC_UNLOCK(curthread->td_proc);
 	mcp->mc_gs = td->td_pcb->pcb_gs;
 	mcp->mc_fs = tp->tf_fs;
 	mcp->mc_es = tp->tf_es;
 	mcp->mc_ds = tp->tf_ds;
 	mcp->mc_edi = tp->tf_edi;
 	mcp->mc_esi = tp->tf_esi;
 	mcp->mc_ebp = tp->tf_ebp;
 	mcp->mc_isp = tp->tf_isp;
 	mcp->mc_eflags = tp->tf_eflags;
 	if (flags & GET_MC_CLEAR_RET) {
 		mcp->mc_eax = 0;
 		mcp->mc_edx = 0;
 		mcp->mc_eflags &= ~PSL_C;
 	} else {
 		mcp->mc_eax = tp->tf_eax;
 		mcp->mc_edx = tp->tf_edx;
 	}
 	mcp->mc_ebx = tp->tf_ebx;
 	mcp->mc_ecx = tp->tf_ecx;
 	mcp->mc_eip = tp->tf_eip;
 	mcp->mc_cs = tp->tf_cs;
 	mcp->mc_esp = tp->tf_esp;
 	mcp->mc_ss = tp->tf_ss;
 	mcp->mc_len = sizeof(*mcp);
 	get_fpcontext(td, mcp, NULL, 0);
 	sdp = &td->td_pcb->pcb_fsd;
 	mcp->mc_fsbase = sdp->sd_hibase << 24 | sdp->sd_lobase;
 	sdp = &td->td_pcb->pcb_gsd;
 	mcp->mc_gsbase = sdp->sd_hibase << 24 | sdp->sd_lobase;
 	mcp->mc_flags = 0;
 	mcp->mc_xfpustate = 0;
 	mcp->mc_xfpustate_len = 0;
 	bzero(mcp->mc_spare2, sizeof(mcp->mc_spare2));
 	return (0);
 }
 
 /*
  * Set machine context.
  *
  * However, we don't set any but the user modifiable flags, and we won't
  * touch the cs selector.
  */
 int
 set_mcontext(struct thread *td, mcontext_t *mcp)
 {
 	struct trapframe *tp;
 	char *xfpustate;
 	int eflags, ret;
 
 	tp = td->td_frame;
 	if (mcp->mc_len != sizeof(*mcp) ||
 	    (mcp->mc_flags & ~_MC_FLAG_MASK) != 0)
 		return (EINVAL);
 	eflags = (mcp->mc_eflags & PSL_USERCHANGE) |
 	    (tp->tf_eflags & ~PSL_USERCHANGE);
 	if (mcp->mc_flags & _MC_HASFPXSTATE) {
 		if (mcp->mc_xfpustate_len > cpu_max_ext_state_size -
 		    sizeof(union savefpu))
 			return (EINVAL);
 		xfpustate = __builtin_alloca(mcp->mc_xfpustate_len);
 		ret = copyin((void *)mcp->mc_xfpustate, xfpustate,
 		    mcp->mc_xfpustate_len);
 		if (ret != 0)
 			return (ret);
 	} else
 		xfpustate = NULL;
 	ret = set_fpcontext(td, mcp, xfpustate, mcp->mc_xfpustate_len);
 	if (ret != 0)
 		return (ret);
 	tp->tf_fs = mcp->mc_fs;
 	tp->tf_es = mcp->mc_es;
 	tp->tf_ds = mcp->mc_ds;
 	tp->tf_edi = mcp->mc_edi;
 	tp->tf_esi = mcp->mc_esi;
 	tp->tf_ebp = mcp->mc_ebp;
 	tp->tf_ebx = mcp->mc_ebx;
 	tp->tf_edx = mcp->mc_edx;
 	tp->tf_ecx = mcp->mc_ecx;
 	tp->tf_eax = mcp->mc_eax;
 	tp->tf_eip = mcp->mc_eip;
 	tp->tf_eflags = eflags;
 	tp->tf_esp = mcp->mc_esp;
 	tp->tf_ss = mcp->mc_ss;
 	td->td_pcb->pcb_gs = mcp->mc_gs;
 	return (0);
 }
 
 static void
 get_fpcontext(struct thread *td, mcontext_t *mcp, char *xfpusave,
     size_t xfpusave_len)
 {
 	size_t max_len, len;
 
 	mcp->mc_ownedfp = npxgetregs(td);
 	bcopy(get_pcb_user_save_td(td), &mcp->mc_fpstate[0],
 	    sizeof(mcp->mc_fpstate));
 	mcp->mc_fpformat = npxformat();
 	if (!use_xsave || xfpusave_len == 0)
 		return;
 	max_len = cpu_max_ext_state_size - sizeof(union savefpu);
 	len = xfpusave_len;
 	if (len > max_len) {
 		len = max_len;
 		bzero(xfpusave + max_len, len - max_len);
 	}
 	mcp->mc_flags |= _MC_HASFPXSTATE;
 	mcp->mc_xfpustate_len = len;
 	bcopy(get_pcb_user_save_td(td) + 1, xfpusave, len);
 }
 
 static int
 set_fpcontext(struct thread *td, mcontext_t *mcp, char *xfpustate,
     size_t xfpustate_len)
 {
 	int error;
 
 	if (mcp->mc_fpformat == _MC_FPFMT_NODEV)
 		return (0);
 	else if (mcp->mc_fpformat != _MC_FPFMT_387 &&
 	    mcp->mc_fpformat != _MC_FPFMT_XMM)
 		return (EINVAL);
 	else if (mcp->mc_ownedfp == _MC_FPOWNED_NONE) {
 		/* We don't care what state is left in the FPU or PCB. */
 		fpstate_drop(td);
 		error = 0;
 	} else if (mcp->mc_ownedfp == _MC_FPOWNED_FPU ||
 	    mcp->mc_ownedfp == _MC_FPOWNED_PCB) {
 		error = npxsetregs(td, (union savefpu *)&mcp->mc_fpstate,
 		    xfpustate, xfpustate_len);
 	} else
 		return (EINVAL);
 	return (error);
 }
 
 static void
 fpstate_drop(struct thread *td)
 {
 
 	KASSERT(PCB_USER_FPU(td->td_pcb), ("fpstate_drop: kernel-owned fpu"));
 	critical_enter();
 	if (PCPU_GET(fpcurthread) == td)
 		npxdrop();
 	/*
 	 * XXX force a full drop of the npx.  The above only drops it if we
 	 * owned it.  npxgetregs() has the same bug in the !cpu_fxsr case.
 	 *
 	 * XXX I don't much like npxgetregs()'s semantics of doing a full
 	 * drop.  Dropping only to the pcb matches fnsave's behaviour.
 	 * We only need to drop to !PCB_INITDONE in sendsig().  But
 	 * sendsig() is the only caller of npxgetregs()... perhaps we just
 	 * have too many layers.
 	 */
 	curthread->td_pcb->pcb_flags &= ~(PCB_NPXINITDONE |
 	    PCB_NPXUSERINITDONE);
 	critical_exit();
 }
 
 int
 fill_dbregs(struct thread *td, struct dbreg *dbregs)
 {
 	struct pcb *pcb;
 
 	if (td == NULL) {
 		dbregs->dr[0] = rdr0();
 		dbregs->dr[1] = rdr1();
 		dbregs->dr[2] = rdr2();
 		dbregs->dr[3] = rdr3();
 		dbregs->dr[6] = rdr6();
 		dbregs->dr[7] = rdr7();
 	} else {
 		pcb = td->td_pcb;
 		dbregs->dr[0] = pcb->pcb_dr0;
 		dbregs->dr[1] = pcb->pcb_dr1;
 		dbregs->dr[2] = pcb->pcb_dr2;
 		dbregs->dr[3] = pcb->pcb_dr3;
 		dbregs->dr[6] = pcb->pcb_dr6;
 		dbregs->dr[7] = pcb->pcb_dr7;
 	}
 	dbregs->dr[4] = 0;
 	dbregs->dr[5] = 0;
 	return (0);
 }
 
 int
 set_dbregs(struct thread *td, struct dbreg *dbregs)
 {
 	struct pcb *pcb;
 	int i;
 
 	if (td == NULL) {
 		load_dr0(dbregs->dr[0]);
 		load_dr1(dbregs->dr[1]);
 		load_dr2(dbregs->dr[2]);
 		load_dr3(dbregs->dr[3]);
 		load_dr6(dbregs->dr[6]);
 		load_dr7(dbregs->dr[7]);
 	} else {
 		/*
 		 * Don't let an illegal value for dr7 get set.	Specifically,
 		 * check for undefined settings.  Setting these bit patterns
 		 * result in undefined behaviour and can lead to an unexpected
 		 * TRCTRAP.
 		 */
 		for (i = 0; i < 4; i++) {
 			if (DBREG_DR7_ACCESS(dbregs->dr[7], i) == 0x02)
 				return (EINVAL);
 			if (DBREG_DR7_LEN(dbregs->dr[7], i) == 0x02)
 				return (EINVAL);
 		}
 
 		pcb = td->td_pcb;
 
 		/*
 		 * Don't let a process set a breakpoint that is not within the
 		 * process's address space.  If a process could do this, it
 		 * could halt the system by setting a breakpoint in the kernel
 		 * (if ddb was enabled).  Thus, we need to check to make sure
 		 * that no breakpoints are being enabled for addresses outside
 		 * process's address space.
 		 *
 		 * XXX - what about when the watched area of the user's
 		 * address space is written into from within the kernel
 		 * ... wouldn't that still cause a breakpoint to be generated
 		 * from within kernel mode?
 		 */
 
 		if (DBREG_DR7_ENABLED(dbregs->dr[7], 0)) {
 			/* dr0 is enabled */
 			if (dbregs->dr[0] >= VM_MAXUSER_ADDRESS)
 				return (EINVAL);
 		}
 
 		if (DBREG_DR7_ENABLED(dbregs->dr[7], 1)) {
 			/* dr1 is enabled */
 			if (dbregs->dr[1] >= VM_MAXUSER_ADDRESS)
 				return (EINVAL);
 		}
 
 		if (DBREG_DR7_ENABLED(dbregs->dr[7], 2)) {
 			/* dr2 is enabled */
 			if (dbregs->dr[2] >= VM_MAXUSER_ADDRESS)
 				return (EINVAL);
 		}
 
 		if (DBREG_DR7_ENABLED(dbregs->dr[7], 3)) {
 			/* dr3 is enabled */
 			if (dbregs->dr[3] >= VM_MAXUSER_ADDRESS)
 				return (EINVAL);
 		}
 
 		pcb->pcb_dr0 = dbregs->dr[0];
 		pcb->pcb_dr1 = dbregs->dr[1];
 		pcb->pcb_dr2 = dbregs->dr[2];
 		pcb->pcb_dr3 = dbregs->dr[3];
 		pcb->pcb_dr6 = dbregs->dr[6];
 		pcb->pcb_dr7 = dbregs->dr[7];
 
 		pcb->pcb_flags |= PCB_DBREGS;
 	}
 
 	return (0);
 }
 
 /*
  * Return > 0 if a hardware breakpoint has been hit, and the
  * breakpoint was in user space.  Return 0, otherwise.
  */
 int
 user_dbreg_trap(register_t dr6)
 {
 	u_int32_t dr7;
 	u_int32_t bp;       /* breakpoint bits extracted from dr6 */
 	int nbp;            /* number of breakpoints that triggered */
 	caddr_t addr[4];    /* breakpoint addresses */
 	int i;
 
 	bp = dr6 & DBREG_DR6_BMASK;
 	if (bp == 0) {
 		/*
 		 * None of the breakpoint bits are set meaning this
 		 * trap was not caused by any of the debug registers
 		 */
 		return (0);
 	}
 
 	dr7 = rdr7();
 	if ((dr7 & 0x000000ff) == 0) {
 		/*
 		 * all GE and LE bits in the dr7 register are zero,
 		 * thus the trap couldn't have been caused by the
 		 * hardware debug registers
 		 */
 		return (0);
 	}
 
 	nbp = 0;
 
 	/*
 	 * at least one of the breakpoints were hit, check to see
 	 * which ones and if any of them are user space addresses
 	 */
 
 	if (bp & 0x01) {
 		addr[nbp++] = (caddr_t)rdr0();
 	}
 	if (bp & 0x02) {
 		addr[nbp++] = (caddr_t)rdr1();
 	}
 	if (bp & 0x04) {
 		addr[nbp++] = (caddr_t)rdr2();
 	}
 	if (bp & 0x08) {
 		addr[nbp++] = (caddr_t)rdr3();
 	}
 
 	for (i = 0; i < nbp; i++) {
 		if (addr[i] < (caddr_t)VM_MAXUSER_ADDRESS) {
 			/*
 			 * addr[i] is in user space
 			 */
 			return (nbp);
 		}
 	}
 
 	/*
 	 * None of the breakpoints are in user space.
 	 */
 	return (0);
 }
diff --git a/sys/i386/linux/linux_sysvec.c b/sys/i386/linux/linux_sysvec.c
index 269ab3b7ab75..e964f475a393 100644
--- a/sys/i386/linux/linux_sysvec.c
+++ b/sys/i386/linux/linux_sysvec.c
@@ -1,1113 +1,1113 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 1994-1996 Søren Schmidt
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/exec.h>
 #include <sys/fcntl.h>
 #include <sys/imgact.h>
 #include <sys/imgact_aout.h>
 #include <sys/imgact_elf.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/module.h>
 #include <sys/proc.h>
 #include <sys/stddef.h>
 #include <sys/signalvar.h>
 #include <sys/syscallsubr.h>
 #include <sys/sysctl.h>
 #include <sys/sysent.h>
 #include <sys/sysproto.h>
 #include <sys/vnode.h>
 
 #include <vm/vm.h>
 #include <vm/pmap.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_map.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
 #include <vm/vm_param.h>
 
 #include <machine/cpu.h>
 #include <machine/cputypes.h>
 #include <machine/md_var.h>
 #include <machine/pcb.h>
 #include <machine/trap.h>
 
 #include <x86/linux/linux_x86.h>
 #include <i386/linux/linux.h>
 #include <i386/linux/linux_proto.h>
 #include <compat/linux/linux_emul.h>
 #include <compat/linux/linux_fork.h>
 #include <compat/linux/linux_ioctl.h>
 #include <compat/linux/linux_mib.h>
 #include <compat/linux/linux_misc.h>
 #include <compat/linux/linux_signal.h>
 #include <compat/linux/linux_util.h>
 #include <compat/linux/linux_vdso.h>
 
 #include <x86/linux/linux_x86_sigframe.h>
 
 MODULE_VERSION(linux, 1);
 
 #define	LINUX_VDSOPAGE_SIZE	PAGE_SIZE * 2
 #define	LINUX_VDSOPAGE		(VM_MAXUSER_ADDRESS - LINUX_VDSOPAGE_SIZE)
 #define	LINUX_SHAREDPAGE	(LINUX_VDSOPAGE - PAGE_SIZE)
 				/*
 				 * PAGE_SIZE - the size
 				 * of the native SHAREDPAGE
 				 */
 #define	LINUX_USRSTACK		LINUX_SHAREDPAGE
 #define	LINUX_PS_STRINGS	(LINUX_USRSTACK - sizeof(struct ps_strings))
 
 static int linux_szsigcode;
 static vm_object_t linux_vdso_obj;
 static char *linux_vdso_mapping;
 extern char _binary_linux_vdso_so_o_start;
 extern char _binary_linux_vdso_so_o_end;
 static vm_offset_t linux_vdso_base;
 
 extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
 
 SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler);
 
 static int	linux_fixup(uintptr_t *stack_base,
 		    struct image_params *iparams);
 static int	linux_fixup_elf(uintptr_t *stack_base,
 		    struct image_params *iparams);
 static void     linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask);
 static void	linux_exec_setregs(struct thread *td,
 		    struct image_params *imgp, uintptr_t stack);
 static void	linux_exec_sysvec_init(void *param);
 static int	linux_on_exec_vmspace(struct proc *p,
 		    struct image_params *imgp);
 static int	linux_copyout_strings(struct image_params *imgp,
 		    uintptr_t *stack_base);
 static void	linux_set_fork_retval(struct thread *td);
 static bool	linux_trans_osrel(const Elf_Note *note, int32_t *osrel);
 static void	linux_vdso_install(const void *param);
 static void	linux_vdso_deinstall(const void *param);
 static void	linux_vdso_reloc(char *mapping, Elf_Addr offset);
 
 LINUX_VDSO_SYM_CHAR(linux_platform);
 LINUX_VDSO_SYM_INTPTR(__kernel_vsyscall);
 LINUX_VDSO_SYM_INTPTR(linux_vdso_sigcode);
 LINUX_VDSO_SYM_INTPTR(linux_vdso_rt_sigcode);
 LINUX_VDSO_SYM_INTPTR(kern_timekeep_base);
 LINUX_VDSO_SYM_INTPTR(kern_tsc_selector);
 LINUX_VDSO_SYM_INTPTR(kern_cpu_selector);
 
 static int
 linux_fixup(uintptr_t *stack_base, struct image_params *imgp)
 {
 	register_t *base, *argv, *envp;
 
 	base = (register_t *)*stack_base;
 	argv = base;
 	envp = base + (imgp->args->argc + 1);
 	base--;
 	suword(base, (intptr_t)envp);
 	base--;
 	suword(base, (intptr_t)argv);
 	base--;
 	suword(base, imgp->args->argc);
 	*stack_base = (uintptr_t)base;
 	return (0);
 }
 
 static int
 linux_copyout_auxargs(struct image_params *imgp, uintptr_t base)
 {
 	Elf32_Auxargs *args;
 	Elf32_Auxinfo *argarray, *pos;
 	int error, issetugid;
 
 	issetugid = imgp->proc->p_flag & P_SUGID ? 1 : 0;
 	args = (Elf32_Auxargs *)imgp->auxargs;
 	argarray = pos = malloc(LINUX_AT_COUNT * sizeof(*pos), M_TEMP,
 	    M_WAITOK | M_ZERO);
 
 	AUXARGS_ENTRY(pos, LINUX_AT_SYSINFO_EHDR, linux_vdso_base);
 	AUXARGS_ENTRY(pos, LINUX_AT_SYSINFO, __kernel_vsyscall);
 	AUXARGS_ENTRY(pos, LINUX_AT_HWCAP, cpu_feature);
 
 	/*
 	 * Do not export AT_CLKTCK when emulating Linux kernel prior to 2.4.0,
 	 * as it has appeared in the 2.4.0-rc7 first time.
 	 * Being exported, AT_CLKTCK is returned by sysconf(_SC_CLK_TCK),
 	 * glibc falls back to the hard-coded CLK_TCK value when aux entry
 	 * is not present.
 	 * Also see linux_times() implementation.
 	 */
 	if (linux_kernver(curthread) >= LINUX_KERNVER_2004000)
 		AUXARGS_ENTRY(pos, LINUX_AT_CLKTCK, stclohz);
 	AUXARGS_ENTRY(pos, AT_PHDR, args->phdr);
 	AUXARGS_ENTRY(pos, AT_PHENT, args->phent);
 	AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum);
 	AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz);
 	AUXARGS_ENTRY(pos, AT_FLAGS, args->flags);
 	AUXARGS_ENTRY(pos, AT_ENTRY, args->entry);
 	AUXARGS_ENTRY(pos, AT_BASE, args->base);
 	AUXARGS_ENTRY(pos, LINUX_AT_SECURE, issetugid);
 	AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_ucred->cr_ruid);
 	AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid);
 	AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_ucred->cr_rgid);
 	AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid);
 	AUXARGS_ENTRY(pos, LINUX_AT_PLATFORM, PTROUT(linux_platform));
 	AUXARGS_ENTRY_PTR(pos, LINUX_AT_RANDOM, imgp->canary);
 	if (imgp->execpathp != 0)
 		AUXARGS_ENTRY_PTR(pos, LINUX_AT_EXECFN, imgp->execpathp);
 	if (args->execfd != -1)
 		AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd);
 	AUXARGS_ENTRY(pos, AT_NULL, 0);
 
 	free(imgp->auxargs, M_TEMP);
 	imgp->auxargs = NULL;
 	KASSERT(pos - argarray <= LINUX_AT_COUNT, ("Too many auxargs"));
 
 	error = copyout(argarray, (void *)base,
 	    sizeof(*argarray) * LINUX_AT_COUNT);
 	free(argarray, M_TEMP);
 	return (error);
 }
 
 static int
 linux_fixup_elf(uintptr_t *stack_base, struct image_params *imgp)
 {
 	register_t *base;
 
 	base = (register_t *)*stack_base;
 	base--;
 	if (suword(base, (register_t)imgp->args->argc) == -1)
 		return (EFAULT);
 	*stack_base = (uintptr_t)base;
 	return (0);
 }
 
 /*
  * Copied from kern/kern_exec.c
  */
 static int
 linux_copyout_strings(struct image_params *imgp, uintptr_t *stack_base)
 {
 	int argc, envc, error;
 	char **vectp;
 	char *stringp;
 	uintptr_t destp, ustringp;
 	struct ps_strings *arginfo;
 	char canary[LINUX_AT_RANDOM_LEN];
 	size_t execpath_len;
 	struct proc *p;
 
 	p = imgp->proc;
 	arginfo = (struct ps_strings *)PROC_PS_STRINGS(p);
 	destp = (uintptr_t)arginfo;
 
 	if (imgp->execpath != NULL && imgp->auxargs != NULL) {
 		execpath_len = strlen(imgp->execpath) + 1;
 		destp -= execpath_len;
 		destp = rounddown2(destp, sizeof(void *));
 		imgp->execpathp = (void *)destp;
 		error = copyout(imgp->execpath, imgp->execpathp, execpath_len);
 		if (error != 0)
 			return (error);
 	}
 
 	/* Prepare the canary for SSP. */
 	arc4rand(canary, sizeof(canary), 0);
 	destp -= roundup(sizeof(canary), sizeof(void *));
 	imgp->canary = (void *)destp;
 	error = copyout(canary, imgp->canary, sizeof(canary));
 	if (error != 0)
 		return (error);
 
 	/* Allocate room for the argument and environment strings. */
 	destp -= ARG_MAX - imgp->args->stringspace;
 	destp = rounddown2(destp, sizeof(void *));
 	ustringp = destp;
 
 	if (imgp->auxargs) {
 		/*
 		 * Allocate room on the stack for the ELF auxargs
 		 * array.  It has LINUX_AT_COUNT entries.
 		 */
 		destp -= LINUX_AT_COUNT * sizeof(Elf32_Auxinfo);
 		destp = rounddown2(destp, sizeof(void *));
 	}
 
 	vectp = (char **)destp;
 
 	/*
 	 * Allocate room for the argv[] and env vectors including the
 	 * terminating NULL pointers.
 	 */
 	vectp -= imgp->args->argc + 1 + imgp->args->envc + 1;
 
 	/* vectp also becomes our initial stack base. */
 	*stack_base = (uintptr_t)vectp;
 
 	stringp = imgp->args->begin_argv;
 	argc = imgp->args->argc;
 	envc = imgp->args->envc;
 
 	/* Copy out strings - arguments and environment. */
 	error = copyout(stringp, (void *)ustringp,
 	    ARG_MAX - imgp->args->stringspace);
 	if (error != 0)
 		return (error);
 
 	/* Fill in "ps_strings" struct for ps, w, etc. */
 	if (suword(&arginfo->ps_argvstr, (long)(intptr_t)vectp) != 0 ||
 	    suword(&arginfo->ps_nargvstr, argc) != 0)
 		return (EFAULT);
 
 	/* Fill in argument portion of vector table. */
 	for (; argc > 0; --argc) {
 		if (suword(vectp++, ustringp) != 0)
 			return (EFAULT);
 		while (*stringp++ != 0)
 			ustringp++;
 		ustringp++;
 	}
 
 	/* A null vector table pointer separates the argp's from the envp's. */
 	if (suword(vectp++, 0) != 0)
 		return (EFAULT);
 
 	if (suword(&arginfo->ps_envstr, (long)(intptr_t)vectp) != 0 ||
 	    suword(&arginfo->ps_nenvstr, envc) != 0)
 		return (EFAULT);
 
 	/* Fill in environment portion of vector table. */
 	for (; envc > 0; --envc) {
 		if (suword(vectp++, ustringp) != 0)
 			return (EFAULT);
 		while (*stringp++ != 0)
 			ustringp++;
 		ustringp++;
 	}
 
 	/* The end of the vector table is a null pointer. */
 	if (suword(vectp, 0) != 0)
 		return (EFAULT);
 
 	if (imgp->auxargs) {
 		vectp++;
 		error = imgp->sysent->sv_copyout_auxargs(imgp,
 		    (uintptr_t)vectp);
 		if (error != 0)
 			return (error);
 	}
 
 	return (0);
 }
 
 static void
 linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
 {
 	struct thread *td = curthread;
 	struct proc *p = td->td_proc;
 	struct sigacts *psp;
 	struct trapframe *regs;
 	struct l_rt_sigframe *fp, frame;
 	int sig, code;
 	int oonstack;
 
 	sig = linux_translate_traps(ksi->ksi_signo, ksi->ksi_trapno);
 	code = ksi->ksi_code;
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	psp = p->p_sigacts;
 	mtx_assert(&psp->ps_mtx, MA_OWNED);
 	regs = td->td_frame;
 	oonstack = sigonstack(regs->tf_esp);
 
 	/* Allocate space for the signal handler context. */
 	if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
 		fp = (struct l_rt_sigframe *)((uintptr_t)td->td_sigstk.ss_sp +
 		    td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe));
 	} else
 		fp = (struct l_rt_sigframe *)regs->tf_esp - 1;
 	mtx_unlock(&psp->ps_mtx);
 
 	/* Build the argument list for the signal handler. */
 	sig = bsd_to_linux_signal(sig);
 
 	bzero(&frame, sizeof(frame));
 
 	frame.sf_sig = sig;
 	frame.sf_siginfo = PTROUT(&fp->sf_si);
 	frame.sf_ucontext = PTROUT(&fp->sf_uc);
 
 	/* Fill in POSIX parts. */
 	siginfo_to_lsiginfo(&ksi->ksi_info, &frame.sf_si, sig);
 
 	/* Build the signal context to be used by sigreturn. */
 	frame.sf_uc.uc_stack.ss_sp = PTROUT(td->td_sigstk.ss_sp);
 	frame.sf_uc.uc_stack.ss_size = td->td_sigstk.ss_size;
 	frame.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
 	    ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE;
 	PROC_UNLOCK(p);
 
 	bsd_to_linux_sigset(mask, &frame.sf_uc.uc_sigmask);
 
 	frame.sf_uc.uc_mcontext.sc_mask   = frame.sf_uc.uc_sigmask.__mask;
 	frame.sf_uc.uc_mcontext.sc_gs     = rgs();
 	frame.sf_uc.uc_mcontext.sc_fs     = regs->tf_fs;
 	frame.sf_uc.uc_mcontext.sc_es     = regs->tf_es;
 	frame.sf_uc.uc_mcontext.sc_ds     = regs->tf_ds;
 	frame.sf_uc.uc_mcontext.sc_edi    = regs->tf_edi;
 	frame.sf_uc.uc_mcontext.sc_esi    = regs->tf_esi;
 	frame.sf_uc.uc_mcontext.sc_ebp    = regs->tf_ebp;
 	frame.sf_uc.uc_mcontext.sc_ebx    = regs->tf_ebx;
 	frame.sf_uc.uc_mcontext.sc_esp    = regs->tf_esp;
 	frame.sf_uc.uc_mcontext.sc_edx    = regs->tf_edx;
 	frame.sf_uc.uc_mcontext.sc_ecx    = regs->tf_ecx;
 	frame.sf_uc.uc_mcontext.sc_eax    = regs->tf_eax;
 	frame.sf_uc.uc_mcontext.sc_eip    = regs->tf_eip;
 	frame.sf_uc.uc_mcontext.sc_cs     = regs->tf_cs;
 	frame.sf_uc.uc_mcontext.sc_eflags = regs->tf_eflags;
 	frame.sf_uc.uc_mcontext.sc_esp_at_signal = regs->tf_esp;
 	frame.sf_uc.uc_mcontext.sc_ss     = regs->tf_ss;
 	frame.sf_uc.uc_mcontext.sc_err    = regs->tf_err;
 	frame.sf_uc.uc_mcontext.sc_cr2    = (register_t)ksi->ksi_addr;
 	frame.sf_uc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code);
 
 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
 		/*
 		 * Process has trashed its stack; give it an illegal
 		 * instruction to halt it in its tracks.
 		 */
 		PROC_LOCK(p);
 		sigexit(td, SIGILL);
 	}
 
 	/* Build context to run handler in. */
 	regs->tf_esp = PTROUT(fp);
 	regs->tf_eip = linux_vdso_rt_sigcode;
 	regs->tf_edi = PTROUT(catcher);
 	regs->tf_eflags &= ~(PSL_T | PSL_VM | PSL_D);
 	regs->tf_cs = _ucodesel;
 	regs->tf_ds = _udatasel;
 	regs->tf_es = _udatasel;
 	regs->tf_fs = _udatasel;
 	regs->tf_ss = _udatasel;
 	PROC_LOCK(p);
 	mtx_lock(&psp->ps_mtx);
 }
 
 /*
  * Send an interrupt to process.
  *
  * Stack is set up to allow sigcode stored
  * in u. to call routine, followed by kcall
  * to sigreturn routine below.  After sigreturn
  * resets the signal mask, the stack, and the
  * frame pointer, it returns to the user
  * specified pc, psl.
  */
 static void
 linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
 {
 	struct thread *td = curthread;
 	struct proc *p = td->td_proc;
 	struct sigacts *psp;
 	struct trapframe *regs;
 	struct l_sigframe *fp, frame;
 	l_sigset_t lmask;
 	int sig;
 	int oonstack;
 
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	psp = p->p_sigacts;
 	sig = linux_translate_traps(ksi->ksi_signo, ksi->ksi_trapno);
 	mtx_assert(&psp->ps_mtx, MA_OWNED);
 	if (SIGISMEMBER(psp->ps_siginfo, sig)) {
 		/* Signal handler installed with SA_SIGINFO. */
 		linux_rt_sendsig(catcher, ksi, mask);
 		return;
 	}
 	regs = td->td_frame;
 	oonstack = sigonstack(regs->tf_esp);
 
 	/* Allocate space for the signal handler context. */
 	if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
 		fp = (struct l_sigframe *)((uintptr_t)td->td_sigstk.ss_sp +
 		    td->td_sigstk.ss_size - sizeof(struct l_sigframe));
 	} else
 		fp = (struct l_sigframe *)regs->tf_esp - 1;
 	mtx_unlock(&psp->ps_mtx);
 	PROC_UNLOCK(p);
 
 	/* Build the argument list for the signal handler. */
 	sig = bsd_to_linux_signal(sig);
 
 	bzero(&frame, sizeof(frame));
 
 	frame.sf_sig = sig;
 	frame.sf_sigmask = *mask;
 	bsd_to_linux_sigset(mask, &lmask);
 
 	/* Build the signal context to be used by sigreturn. */
 	frame.sf_sc.sc_mask   = lmask.__mask;
 	frame.sf_sc.sc_gs     = rgs();
 	frame.sf_sc.sc_fs     = regs->tf_fs;
 	frame.sf_sc.sc_es     = regs->tf_es;
 	frame.sf_sc.sc_ds     = regs->tf_ds;
 	frame.sf_sc.sc_edi    = regs->tf_edi;
 	frame.sf_sc.sc_esi    = regs->tf_esi;
 	frame.sf_sc.sc_ebp    = regs->tf_ebp;
 	frame.sf_sc.sc_ebx    = regs->tf_ebx;
 	frame.sf_sc.sc_esp    = regs->tf_esp;
 	frame.sf_sc.sc_edx    = regs->tf_edx;
 	frame.sf_sc.sc_ecx    = regs->tf_ecx;
 	frame.sf_sc.sc_eax    = regs->tf_eax;
 	frame.sf_sc.sc_eip    = regs->tf_eip;
 	frame.sf_sc.sc_cs     = regs->tf_cs;
 	frame.sf_sc.sc_eflags = regs->tf_eflags;
 	frame.sf_sc.sc_esp_at_signal = regs->tf_esp;
 	frame.sf_sc.sc_ss     = regs->tf_ss;
 	frame.sf_sc.sc_err    = regs->tf_err;
 	frame.sf_sc.sc_cr2    = (register_t)ksi->ksi_addr;
 	frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(ksi->ksi_trapno);
 
 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
 		/*
 		 * Process has trashed its stack; give it an illegal
 		 * instruction to halt it in its tracks.
 		 */
 		PROC_LOCK(p);
 		sigexit(td, SIGILL);
 	}
 
 	/* Build context to run handler in. */
 	regs->tf_esp = PTROUT(fp);
 	regs->tf_eip = linux_vdso_sigcode;
 	regs->tf_edi = PTROUT(catcher);
 	regs->tf_eflags &= ~(PSL_T | PSL_VM | PSL_D);
 	regs->tf_cs = _ucodesel;
 	regs->tf_ds = _udatasel;
 	regs->tf_es = _udatasel;
 	regs->tf_fs = _udatasel;
 	regs->tf_ss = _udatasel;
 	PROC_LOCK(p);
 	mtx_lock(&psp->ps_mtx);
 }
 
 /*
  * System call to cleanup state after a signal
  * has been taken.  Reset signal mask and
  * stack state from context left by sendsig (above).
  * Return to previous pc and psl as specified by
  * context left by sendsig. Check carefully to
  * make sure that the user has not modified the
  * psl to gain improper privileges or to cause
  * a machine fault.
  */
 int
 linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args)
 {
 	struct l_sigframe frame;
 	struct trapframe *regs;
 	int eflags;
 	ksiginfo_t ksi;
 
 	regs = td->td_frame;
 
 	/*
 	 * The trampoline code hands us the sigframe.
 	 * It is unsafe to keep track of it ourselves, in the event that a
 	 * program jumps out of a signal handler.
 	 */
 	if (copyin(args->sfp, &frame, sizeof(frame)) != 0)
 		return (EFAULT);
 
 	/* Check for security violations. */
 #define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
 	eflags = frame.sf_sc.sc_eflags;
 	if (!EFLAGS_SECURE(eflags, regs->tf_eflags))
 		return (EINVAL);
 
 	/*
 	 * Don't allow users to load a valid privileged %cs.  Let the
 	 * hardware check for invalid selectors, excess privilege in
 	 * other selectors, invalid %eip's and invalid %esp's.
 	 */
 #define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
 	if (!CS_SECURE(frame.sf_sc.sc_cs)) {
 		ksiginfo_init_trap(&ksi);
 		ksi.ksi_signo = SIGBUS;
 		ksi.ksi_code = BUS_OBJERR;
 		ksi.ksi_trapno = T_PROTFLT;
 		ksi.ksi_addr = (void *)regs->tf_eip;
 		trapsignal(td, &ksi);
 		return (EINVAL);
 	}
 
 	kern_sigprocmask(td, SIG_SETMASK, &frame.sf_sigmask, NULL, 0);
 
 	/* Restore signal context. */
 	/* %gs was restored by the trampoline. */
 	regs->tf_fs     = frame.sf_sc.sc_fs;
 	regs->tf_es     = frame.sf_sc.sc_es;
 	regs->tf_ds     = frame.sf_sc.sc_ds;
 	regs->tf_edi    = frame.sf_sc.sc_edi;
 	regs->tf_esi    = frame.sf_sc.sc_esi;
 	regs->tf_ebp    = frame.sf_sc.sc_ebp;
 	regs->tf_ebx    = frame.sf_sc.sc_ebx;
 	regs->tf_edx    = frame.sf_sc.sc_edx;
 	regs->tf_ecx    = frame.sf_sc.sc_ecx;
 	regs->tf_eax    = frame.sf_sc.sc_eax;
 	regs->tf_eip    = frame.sf_sc.sc_eip;
 	regs->tf_cs     = frame.sf_sc.sc_cs;
 	regs->tf_eflags = eflags;
 	regs->tf_esp    = frame.sf_sc.sc_esp_at_signal;
 	regs->tf_ss     = frame.sf_sc.sc_ss;
 
 	return (EJUSTRETURN);
 }
 
 /*
  * System call to cleanup state after a signal
  * has been taken.  Reset signal mask and
  * stack state from context left by rt_sendsig (above).
  * Return to previous pc and psl as specified by
  * context left by sendsig. Check carefully to
  * make sure that the user has not modified the
  * psl to gain improper privileges or to cause
  * a machine fault.
  */
 int
 linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args)
 {
 	struct l_ucontext uc;
 	struct l_sigcontext *context;
 	sigset_t bmask;
 	l_stack_t *lss;
 	stack_t ss;
 	struct trapframe *regs;
 	int eflags;
 	ksiginfo_t ksi;
 
 	regs = td->td_frame;
 
 	/*
 	 * The trampoline code hands us the ucontext.
 	 * It is unsafe to keep track of it ourselves, in the event that a
 	 * program jumps out of a signal handler.
 	 */
 	if (copyin(args->ucp, &uc, sizeof(uc)) != 0)
 		return (EFAULT);
 
 	context = &uc.uc_mcontext;
 
 	/* Check for security violations. */
 #define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
 	eflags = context->sc_eflags;
 	if (!EFLAGS_SECURE(eflags, regs->tf_eflags))
 		return (EINVAL);
 
 	/*
 	 * Don't allow users to load a valid privileged %cs.  Let the
 	 * hardware check for invalid selectors, excess privilege in
 	 * other selectors, invalid %eip's and invalid %esp's.
 	 */
 #define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
 	if (!CS_SECURE(context->sc_cs)) {
 		ksiginfo_init_trap(&ksi);
 		ksi.ksi_signo = SIGBUS;
 		ksi.ksi_code = BUS_OBJERR;
 		ksi.ksi_trapno = T_PROTFLT;
 		ksi.ksi_addr = (void *)regs->tf_eip;
 		trapsignal(td, &ksi);
 		return (EINVAL);
 	}
 
 	linux_to_bsd_sigset(&uc.uc_sigmask, &bmask);
 	kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0);
 
 	/* Restore signal context. */
 	/* %gs was restored by the trampoline. */
 	regs->tf_fs     = context->sc_fs;
 	regs->tf_es     = context->sc_es;
 	regs->tf_ds     = context->sc_ds;
 	regs->tf_edi    = context->sc_edi;
 	regs->tf_esi    = context->sc_esi;
 	regs->tf_ebp    = context->sc_ebp;
 	regs->tf_ebx    = context->sc_ebx;
 	regs->tf_edx    = context->sc_edx;
 	regs->tf_ecx    = context->sc_ecx;
 	regs->tf_eax    = context->sc_eax;
 	regs->tf_eip    = context->sc_eip;
 	regs->tf_cs     = context->sc_cs;
 	regs->tf_eflags = eflags;
 	regs->tf_esp    = context->sc_esp_at_signal;
 	regs->tf_ss     = context->sc_ss;
 
 	/* Call sigaltstack & ignore results. */
 	lss = &uc.uc_stack;
 	ss.ss_sp = PTRIN(lss->ss_sp);
 	ss.ss_size = lss->ss_size;
 	ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags);
 
 	(void)kern_sigaltstack(td, &ss, NULL);
 
 	return (EJUSTRETURN);
 }
 
 static int
 linux_fetch_syscall_args(struct thread *td)
 {
 	struct proc *p;
 	struct trapframe *frame;
 	struct syscall_args *sa;
 
 	p = td->td_proc;
 	frame = td->td_frame;
 	sa = &td->td_sa;
 
 	sa->code = frame->tf_eax;
 	sa->original_code = sa->code;
 	sa->args[0] = frame->tf_ebx;
 	sa->args[1] = frame->tf_ecx;
 	sa->args[2] = frame->tf_edx;
 	sa->args[3] = frame->tf_esi;
 	sa->args[4] = frame->tf_edi;
 	sa->args[5] = frame->tf_ebp;	/* Unconfirmed */
 
 	if (sa->code >= p->p_sysent->sv_size)
 		/* nosys */
 		sa->callp = &p->p_sysent->sv_table[p->p_sysent->sv_size - 1];
 	else
 		sa->callp = &p->p_sysent->sv_table[sa->code];
 
 	td->td_retval[0] = 0;
 	td->td_retval[1] = frame->tf_edx;
 
 	return (0);
 }
 
 static void
 linux_set_syscall_retval(struct thread *td, int error)
 {
 	struct trapframe *frame = td->td_frame;
 
 	cpu_set_syscall_retval(td, error);
 
 	if (__predict_false(error != 0)) {
 		if (error != ERESTART && error != EJUSTRETURN)
 			frame->tf_eax = bsd_to_linux_errno(error);
 	}
 }
 
 static void
 linux_set_fork_retval(struct thread *td)
 {
 	struct trapframe *frame = td->td_frame;
 
 	frame->tf_eax = 0;
 }
 
 /*
  * exec_setregs may initialize some registers differently than Linux
  * does, thus potentially confusing Linux binaries. If necessary, we
  * override the exec_setregs default(s) here.
  */
 static void
 linux_exec_setregs(struct thread *td, struct image_params *imgp,
     uintptr_t stack)
 {
 	struct pcb *pcb = td->td_pcb;
 
 	exec_setregs(td, imgp, stack);
 
 	/* Linux sets %gs to 0, we default to _udatasel. */
 	pcb->pcb_gs = 0;
 	load_gs(0);
 
 	pcb->pcb_initial_npxcw = __LINUX_NPXCW__;
 }
 
 struct sysentvec linux_sysvec = {
 	.sv_size	= LINUX_SYS_MAXSYSCALL,
 	.sv_table	= linux_sysent,
 	.sv_fixup	= linux_fixup,
 	.sv_sendsig	= linux_sendsig,
 	.sv_sigcode	= &_binary_linux_vdso_so_o_start,
 	.sv_szsigcode	= &linux_szsigcode,
 	.sv_name	= "Linux a.out",
 	.sv_coredump	= NULL,
 	.sv_imgact_try	= linux_exec_imgact_try,
 	.sv_minsigstksz	= LINUX_MINSIGSTKSZ,
 	.sv_minuser	= VM_MIN_ADDRESS,
 	.sv_maxuser	= VM_MAXUSER_ADDRESS,
 	.sv_usrstack	= LINUX_USRSTACK,
 	.sv_psstrings	= PS_STRINGS,
 	.sv_psstringssz	= sizeof(struct ps_strings),
 	.sv_stackprot	= VM_PROT_ALL,
 	.sv_copyout_strings = exec_copyout_strings,
 	.sv_setregs	= linux_exec_setregs,
 	.sv_fixlimit	= NULL,
 	.sv_maxssiz	= NULL,
 	.sv_flags	= SV_ABI_LINUX | SV_AOUT | SV_IA32 | SV_ILP32 |
 	    SV_SIG_DISCIGN | SV_SIG_WAITNDQ,
 	.sv_set_syscall_retval = linux_set_syscall_retval,
 	.sv_fetch_syscall_args = linux_fetch_syscall_args,
 	.sv_syscallnames = NULL,
 	.sv_schedtail	= linux_schedtail,
 	.sv_thread_detach = linux_thread_detach,
 	.sv_trap	= NULL,
 	.sv_onexec	= linux_on_exec_vmspace,
 	.sv_onexit	= linux_on_exit,
 	.sv_ontdexit	= linux_thread_dtor,
 	.sv_setid_allowed = &linux_setid_allowed_query,
 	.sv_set_fork_retval = linux_set_fork_retval,
 };
 INIT_SYSENTVEC(aout_sysvec, &linux_sysvec);
 
 struct sysentvec elf_linux_sysvec = {
 	.sv_size	= LINUX_SYS_MAXSYSCALL,
 	.sv_table	= linux_sysent,
 	.sv_fixup	= linux_fixup_elf,
 	.sv_sendsig	= linux_sendsig,
 	.sv_sigcode	= &_binary_linux_vdso_so_o_start,
 	.sv_szsigcode	= &linux_szsigcode,
 	.sv_name	= "Linux ELF32",
 	.sv_coredump	= elf32_coredump,
 	.sv_elf_core_osabi = ELFOSABI_FREEBSD,
 	.sv_elf_core_abi_vendor = FREEBSD_ABI_VENDOR,
 	.sv_elf_core_prepare_notes = elf32_prepare_notes,
 	.sv_imgact_try	= linux_exec_imgact_try,
 	.sv_minsigstksz	= LINUX_MINSIGSTKSZ,
 	.sv_minuser	= VM_MIN_ADDRESS,
 	.sv_maxuser	= VM_MAXUSER_ADDRESS,
 	.sv_usrstack	= LINUX_USRSTACK,
 	.sv_psstrings	= LINUX_PS_STRINGS,
 	.sv_psstringssz	= sizeof(struct ps_strings),
 	.sv_stackprot	= VM_PROT_ALL,
 	.sv_copyout_auxargs = linux_copyout_auxargs,
 	.sv_copyout_strings = linux_copyout_strings,
 	.sv_setregs	= linux_exec_setregs,
 	.sv_fixlimit	= NULL,
 	.sv_maxssiz	= NULL,
 	.sv_flags	= SV_ABI_LINUX | SV_IA32 | SV_ILP32 | SV_SHP |
 	    SV_SIG_DISCIGN | SV_SIG_WAITNDQ | SV_TIMEKEEP,
 	.sv_set_syscall_retval = linux_set_syscall_retval,
 	.sv_fetch_syscall_args = linux_fetch_syscall_args,
 	.sv_syscallnames = NULL,
 	.sv_shared_page_base = LINUX_SHAREDPAGE,
 	.sv_shared_page_len = PAGE_SIZE,
 	.sv_schedtail	= linux_schedtail,
 	.sv_thread_detach = linux_thread_detach,
 	.sv_trap	= NULL,
 	.sv_onexec	= linux_on_exec_vmspace,
 	.sv_onexit	= linux_on_exit,
 	.sv_ontdexit	= linux_thread_dtor,
 	.sv_setid_allowed = &linux_setid_allowed_query,
 	.sv_set_fork_retval = linux_set_fork_retval,
 };
 
 static int
 linux_on_exec_vmspace(struct proc *p, struct image_params *imgp)
 {
 	int error = 0;
 
 	if (SV_PROC_FLAG(p, SV_SHP) != 0)
 		error = linux_map_vdso(p, linux_vdso_obj,
 		    linux_vdso_base, LINUX_VDSOPAGE_SIZE, imgp);
 	if (error == 0)
 		linux_on_exec(p, imgp);
 	return (error);
 }
 
 /*
  * linux_vdso_install() and linux_exec_sysvec_init() must be called
  * after exec_sysvec_init() which is SI_SUB_EXEC (SI_ORDER_ANY).
  */
 static void
 linux_exec_sysvec_init(void *param)
 {
 	l_uintptr_t *ktimekeep_base, *ktsc_selector;
 	struct sysentvec *sv;
 	ptrdiff_t tkoff;
 
 	sv = param;
 	/* Fill timekeep_base */
 	exec_sysvec_init(sv);
 
 	tkoff = kern_timekeep_base - linux_vdso_base;
 	ktimekeep_base = (l_uintptr_t *)(linux_vdso_mapping + tkoff);
-	*ktimekeep_base = sv->sv_timekeep_base;
+	*ktimekeep_base = sv->sv_shared_page_base + sv->sv_timekeep_offset;
 
 	tkoff = kern_tsc_selector - linux_vdso_base;
 	ktsc_selector = (l_uintptr_t *)(linux_vdso_mapping + tkoff);
 	*ktsc_selector = linux_vdso_tsc_selector_idx();
 	if (bootverbose)
 		printf("Linux i386 vDSO tsc_selector: %u\n", *ktsc_selector);
 
 	tkoff = kern_cpu_selector - linux_vdso_base;
 	ktsc_selector = (l_uintptr_t *)(linux_vdso_mapping + tkoff);
 	*ktsc_selector = linux_vdso_cpu_selector_idx();
 	if (bootverbose)
 		printf("Linux i386 vDSO cpu_selector: %u\n", *ktsc_selector);
 }
 SYSINIT(elf_linux_exec_sysvec_init, SI_SUB_EXEC + 1, SI_ORDER_ANY,
     linux_exec_sysvec_init, &elf_linux_sysvec);
 
 static void
 linux_vdso_install(const void *param)
 {
 	char *vdso_start = &_binary_linux_vdso_so_o_start;
 	char *vdso_end = &_binary_linux_vdso_so_o_end;
 
 	linux_szsigcode = vdso_end - vdso_start;
 	MPASS(linux_szsigcode <= LINUX_VDSOPAGE_SIZE);
 
 	linux_vdso_base = LINUX_VDSOPAGE;
 
 	__elfN(linux_vdso_fixup)(vdso_start, linux_vdso_base);
 
 	linux_vdso_obj = __elfN(linux_shared_page_init)
 	    (&linux_vdso_mapping, LINUX_VDSOPAGE_SIZE);
 	bcopy(vdso_start, linux_vdso_mapping, linux_szsigcode);
 
 	linux_vdso_reloc(linux_vdso_mapping, linux_vdso_base);
 }
 SYSINIT(elf_linux_vdso_init, SI_SUB_EXEC + 1, SI_ORDER_FIRST,
     linux_vdso_install, NULL);
 
 static void
 linux_vdso_deinstall(const void *param)
 {
 
 	__elfN(linux_shared_page_fini)(linux_vdso_obj,
 	    linux_vdso_mapping, LINUX_VDSOPAGE_SIZE);
 }
 SYSUNINIT(elf_linux_vdso_uninit, SI_SUB_EXEC, SI_ORDER_FIRST,
     linux_vdso_deinstall, NULL);
 
 static void
 linux_vdso_reloc(char *mapping, Elf_Addr offset)
 {
 	const Elf_Shdr *shdr;
 	const Elf_Rel *rel;
 	const Elf_Ehdr *ehdr;
 	Elf_Addr *where;
 	Elf_Size rtype, symidx;
 	Elf_Addr addr, addend;
 	int i, relcnt;
 
 	MPASS(offset != 0);
 
 	relcnt = 0;
 	ehdr = (const Elf_Ehdr *)mapping;
 	shdr = (const Elf_Shdr *)(mapping + ehdr->e_shoff);
 	for (i = 0; i < ehdr->e_shnum; i++)
 	{
 		switch (shdr[i].sh_type) {
 		case SHT_REL:
 			rel = (const Elf_Rel *)(mapping + shdr[i].sh_offset);
 			relcnt = shdr[i].sh_size / sizeof(*rel);
 			break;
 		case SHT_RELA:
 			printf("Linux i386 vDSO: unexpected Rela section\n");
 			break;
 		}
 	}
 
 	for (i = 0; i < relcnt; i++, rel++) {
 		where = (Elf_Addr *)(mapping + rel->r_offset);
 		addend = *where;
 		rtype = ELF_R_TYPE(rel->r_info);
 		symidx = ELF_R_SYM(rel->r_info);
 
 		switch (rtype) {
 		case R_386_NONE:	/* none */
 			break;
 
 		case R_386_RELATIVE:	/* B + A */
 			addr = (Elf_Addr)PTROUT(offset + addend);
 			if (*where != addr)
 				*where = addr;
 			break;
 
 		case R_386_IRELATIVE:
 			printf("Linux i386 vDSO: unexpected ifunc relocation, "
 			    "symbol index %d\n", symidx);
 			break;
 		default:
 			printf("Linux i386 vDSO: unexpected relocation type %d, "
 			    "symbol index %d\n", rtype, symidx);
 		}
 	}
 }
 
 static char GNU_ABI_VENDOR[] = "GNU";
 static int GNULINUX_ABI_DESC = 0;
 
 static bool
 linux_trans_osrel(const Elf_Note *note, int32_t *osrel)
 {
 	const Elf32_Word *desc;
 	uintptr_t p;
 
 	p = (uintptr_t)(note + 1);
 	p += roundup2(note->n_namesz, sizeof(Elf32_Addr));
 
 	desc = (const Elf32_Word *)p;
 	if (desc[0] != GNULINUX_ABI_DESC)
 		return (false);
 
 	/*
 	 * For Linux we encode osrel using the Linux convention of
 	 * 	(version << 16) | (major << 8) | (minor)
 	 * See macro in linux_mib.h
 	 */
 	*osrel = LINUX_KERNVER(desc[1], desc[2], desc[3]);
 
 	return (true);
 }
 
 static Elf_Brandnote linux_brandnote = {
 	.hdr.n_namesz	= sizeof(GNU_ABI_VENDOR),
 	.hdr.n_descsz	= 16,	/* XXX at least 16 */
 	.hdr.n_type	= 1,
 	.vendor		= GNU_ABI_VENDOR,
 	.flags		= BN_TRANSLATE_OSREL,
 	.trans_osrel	= linux_trans_osrel
 };
 
 static Elf32_Brandinfo linux_brand = {
 	.brand		= ELFOSABI_LINUX,
 	.machine	= EM_386,
 	.compat_3_brand	= "Linux",
 	.emul_path	= linux_emul_path,
 	.interp_path	= "/lib/ld-linux.so.1",
 	.sysvec		= &elf_linux_sysvec,
 	.interp_newpath	= NULL,
 	.brand_note	= &linux_brandnote,
 	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE
 };
 
 static Elf32_Brandinfo linux_glibc2brand = {
 	.brand		= ELFOSABI_LINUX,
 	.machine	= EM_386,
 	.compat_3_brand	= "Linux",
 	.emul_path	= linux_emul_path,
 	.interp_path	= "/lib/ld-linux.so.2",
 	.sysvec		= &elf_linux_sysvec,
 	.interp_newpath	= NULL,
 	.brand_note	= &linux_brandnote,
 	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE
 };
 
 static Elf32_Brandinfo linux_muslbrand = {
 	.brand		= ELFOSABI_LINUX,
 	.machine	= EM_386,
 	.compat_3_brand	= "Linux",
 	.emul_path	= linux_emul_path,
 	.interp_path	= "/lib/ld-musl-i386.so.1",
 	.sysvec		= &elf_linux_sysvec,
 	.interp_newpath	= NULL,
 	.brand_note	= &linux_brandnote,
 	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE |
 			    LINUX_BI_FUTEX_REQUEUE
 };
 
 Elf32_Brandinfo *linux_brandlist[] = {
 	&linux_brand,
 	&linux_glibc2brand,
 	&linux_muslbrand,
 	NULL
 };
 
 static int
 linux_elf_modevent(module_t mod, int type, void *data)
 {
 	Elf32_Brandinfo **brandinfo;
 	int error;
 	struct linux_ioctl_handler **lihp;
 
 	error = 0;
 
 	switch(type) {
 	case MOD_LOAD:
 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
 		     ++brandinfo)
 			if (elf32_insert_brand_entry(*brandinfo) < 0)
 				error = EINVAL;
 		if (error == 0) {
 			SET_FOREACH(lihp, linux_ioctl_handler_set)
 				linux_ioctl_register_handler(*lihp);
 			linux_dev_shm_create();
 			linux_osd_jail_register();
 			stclohz = (stathz ? stathz : hz);
 			if (bootverbose)
 				printf("Linux ELF exec handler installed\n");
 		} else
 			printf("cannot insert Linux ELF brand handler\n");
 		break;
 	case MOD_UNLOAD:
 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
 		     ++brandinfo)
 			if (elf32_brand_inuse(*brandinfo))
 				error = EBUSY;
 		if (error == 0) {
 			for (brandinfo = &linux_brandlist[0];
 			     *brandinfo != NULL; ++brandinfo)
 				if (elf32_remove_brand_entry(*brandinfo) < 0)
 					error = EINVAL;
 		}
 		if (error == 0) {
 			SET_FOREACH(lihp, linux_ioctl_handler_set)
 				linux_ioctl_unregister_handler(*lihp);
 			linux_dev_shm_destroy();
 			linux_osd_jail_deregister();
 			if (bootverbose)
 				printf("Linux ELF exec handler removed\n");
 		} else
 			printf("Could not deinstall ELF interpreter entry\n");
 		break;
 	default:
 		return (EOPNOTSUPP);
 	}
 	return (error);
 }
 
 static moduledata_t linux_elf_mod = {
 	"linuxelf",
 	linux_elf_modevent,
 	0
 };
 
 DECLARE_MODULE_TIED(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);
 FEATURE(linux, "Linux 32bit support");
diff --git a/sys/kern/imgact_elf.c b/sys/kern/imgact_elf.c
index 964e9c999d60..c71b00337027 100644
--- a/sys/kern/imgact_elf.c
+++ b/sys/kern/imgact_elf.c
@@ -1,2889 +1,2898 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 2017 Dell EMC
  * Copyright (c) 2000-2001, 2003 David O'Brien
  * Copyright (c) 1995-1996 Søren Schmidt
  * Copyright (c) 1996 Peter Wemm
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer
  *    in this position and unchanged.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. The name of the author may not be used to endorse or promote products
  *    derived from this software without specific prior written permission
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_capsicum.h"
 
 #include <sys/param.h>
 #include <sys/capsicum.h>
 #include <sys/compressor.h>
 #include <sys/exec.h>
 #include <sys/fcntl.h>
 #include <sys/imgact.h>
 #include <sys/imgact_elf.h>
 #include <sys/jail.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mount.h>
 #include <sys/mman.h>
 #include <sys/namei.h>
 #include <sys/proc.h>
 #include <sys/procfs.h>
 #include <sys/ptrace.h>
 #include <sys/racct.h>
 #include <sys/reg.h>
 #include <sys/resourcevar.h>
 #include <sys/rwlock.h>
 #include <sys/sbuf.h>
 #include <sys/sf_buf.h>
 #include <sys/smp.h>
 #include <sys/systm.h>
 #include <sys/signalvar.h>
 #include <sys/stat.h>
 #include <sys/sx.h>
 #include <sys/syscall.h>
 #include <sys/sysctl.h>
 #include <sys/sysent.h>
 #include <sys/vnode.h>
 #include <sys/syslog.h>
 #include <sys/eventhandler.h>
 #include <sys/user.h>
 
 #include <vm/vm.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_param.h>
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 #include <vm/vm_object.h>
 #include <vm/vm_extern.h>
 
 #include <machine/elf.h>
 #include <machine/md_var.h>
 
 #define ELF_NOTE_ROUNDSIZE	4
 #define OLD_EI_BRAND	8
 
 static int __elfN(check_header)(const Elf_Ehdr *hdr);
 static Elf_Brandinfo *__elfN(get_brandinfo)(struct image_params *imgp,
     const char *interp, int32_t *osrel, uint32_t *fctl0);
 static int __elfN(load_file)(struct proc *p, const char *file, u_long *addr,
     u_long *entry);
 static int __elfN(load_section)(struct image_params *imgp, vm_ooffset_t offset,
     caddr_t vmaddr, size_t memsz, size_t filsz, vm_prot_t prot);
 static int __CONCAT(exec_, __elfN(imgact))(struct image_params *imgp);
 static bool __elfN(freebsd_trans_osrel)(const Elf_Note *note,
     int32_t *osrel);
 static bool kfreebsd_trans_osrel(const Elf_Note *note, int32_t *osrel);
 static bool __elfN(check_note)(struct image_params *imgp,
     Elf_Brandnote *checknote, int32_t *osrel, bool *has_fctl0,
     uint32_t *fctl0);
 static vm_prot_t __elfN(trans_prot)(Elf_Word);
 static Elf_Word __elfN(untrans_prot)(vm_prot_t);
 static size_t __elfN(prepare_register_notes)(struct thread *td,
     struct note_info_list *list, struct thread *target_td);
 
 SYSCTL_NODE(_kern, OID_AUTO, __CONCAT(elf, __ELF_WORD_SIZE),
     CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "");
 
 int __elfN(fallback_brand) = -1;
 SYSCTL_INT(__CONCAT(_kern_elf, __ELF_WORD_SIZE), OID_AUTO,
     fallback_brand, CTLFLAG_RWTUN, &__elfN(fallback_brand), 0,
     __XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE)) " brand of last resort");
 
 static int elf_legacy_coredump = 0;
 SYSCTL_INT(_debug, OID_AUTO, __elfN(legacy_coredump), CTLFLAG_RW, 
     &elf_legacy_coredump, 0,
     "include all and only RW pages in core dumps");
 
 int __elfN(nxstack) =
 #if defined(__amd64__) || defined(__powerpc64__) /* both 64 and 32 bit */ || \
     (defined(__arm__) && __ARM_ARCH >= 7) || defined(__aarch64__) || \
     defined(__riscv)
 	1;
 #else
 	0;
 #endif
 SYSCTL_INT(__CONCAT(_kern_elf, __ELF_WORD_SIZE), OID_AUTO,
     nxstack, CTLFLAG_RW, &__elfN(nxstack), 0,
     __XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE)) ": enable non-executable stack");
 
 #if defined(__amd64__)
 static int __elfN(vdso) = 1;
 SYSCTL_INT(__CONCAT(_kern_elf, __ELF_WORD_SIZE), OID_AUTO,
     vdso, CTLFLAG_RWTUN, &__elfN(vdso), 0,
     __XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE)) ": enable vdso preloading");
 #else
 static int __elfN(vdso) = 0;
 #endif
 
 #if __ELF_WORD_SIZE == 32 && (defined(__amd64__) || defined(__i386__))
 int i386_read_exec = 0;
 SYSCTL_INT(_kern_elf32, OID_AUTO, read_exec, CTLFLAG_RW, &i386_read_exec, 0,
     "enable execution from readable segments");
 #endif
 
 static u_long __elfN(pie_base) = ET_DYN_LOAD_ADDR;
 static int
 sysctl_pie_base(SYSCTL_HANDLER_ARGS)
 {
 	u_long val;
 	int error;
 
 	val = __elfN(pie_base);
 	error = sysctl_handle_long(oidp, &val, 0, req);
 	if (error != 0 || req->newptr == NULL)
 		return (error);
 	if ((val & PAGE_MASK) != 0)
 		return (EINVAL);
 	__elfN(pie_base) = val;
 	return (0);
 }
 SYSCTL_PROC(__CONCAT(_kern_elf, __ELF_WORD_SIZE), OID_AUTO, pie_base,
     CTLTYPE_ULONG | CTLFLAG_MPSAFE | CTLFLAG_RW, NULL, 0,
     sysctl_pie_base, "LU",
     "PIE load base without randomization");
 
 SYSCTL_NODE(__CONCAT(_kern_elf, __ELF_WORD_SIZE), OID_AUTO, aslr,
     CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "");
 #define	ASLR_NODE_OID	__CONCAT(__CONCAT(_kern_elf, __ELF_WORD_SIZE), _aslr)
 
 /*
  * While for 64-bit machines ASLR works properly, there are
  * still some problems when using 32-bit architectures. For this
  * reason ASLR is only enabled by default when running native
  * 64-bit non-PIE executables.
  */
 static int __elfN(aslr_enabled) = __ELF_WORD_SIZE == 64;
 SYSCTL_INT(ASLR_NODE_OID, OID_AUTO, enable, CTLFLAG_RWTUN,
     &__elfN(aslr_enabled), 0,
     __XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE))
     ": enable address map randomization");
 
 /*
  * Enable ASLR only for 64-bit PIE binaries by default.
  */
 static int __elfN(pie_aslr_enabled) = __ELF_WORD_SIZE == 64;
 SYSCTL_INT(ASLR_NODE_OID, OID_AUTO, pie_enable, CTLFLAG_RWTUN,
     &__elfN(pie_aslr_enabled), 0,
     __XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE))
     ": enable address map randomization for PIE binaries");
 
 /*
  * Sbrk is now deprecated and it can be assumed, that in most
  * cases it will not be used anyway. This setting is valid only
  * for the ASLR enabled and allows for utilizing the bss grow region.
  */
 static int __elfN(aslr_honor_sbrk) = 0;
 SYSCTL_INT(ASLR_NODE_OID, OID_AUTO, honor_sbrk, CTLFLAG_RW,
     &__elfN(aslr_honor_sbrk), 0,
     __XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE)) ": assume sbrk is used");
 
 static int __elfN(aslr_stack) = 1;
 SYSCTL_INT(ASLR_NODE_OID, OID_AUTO, stack, CTLFLAG_RWTUN,
     &__elfN(aslr_stack), 0,
     __XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE))
     ": enable stack address randomization");
 
 static int __elfN(sigfastblock) = 1;
 SYSCTL_INT(__CONCAT(_kern_elf, __ELF_WORD_SIZE), OID_AUTO, sigfastblock,
     CTLFLAG_RWTUN, &__elfN(sigfastblock), 0,
     "enable sigfastblock for new processes");
 
 static bool __elfN(allow_wx) = true;
 SYSCTL_BOOL(__CONCAT(_kern_elf, __ELF_WORD_SIZE), OID_AUTO, allow_wx,
     CTLFLAG_RWTUN, &__elfN(allow_wx), 0,
     "Allow pages to be mapped simultaneously writable and executable");
 
 static Elf_Brandinfo *elf_brand_list[MAX_BRANDS];
 
 #define	aligned(a, t)	(rounddown2((u_long)(a), sizeof(t)) == (u_long)(a))
 
 Elf_Brandnote __elfN(freebsd_brandnote) = {
 	.hdr.n_namesz	= sizeof(FREEBSD_ABI_VENDOR),
 	.hdr.n_descsz	= sizeof(int32_t),
 	.hdr.n_type	= NT_FREEBSD_ABI_TAG,
 	.vendor		= FREEBSD_ABI_VENDOR,
 	.flags		= BN_TRANSLATE_OSREL,
 	.trans_osrel	= __elfN(freebsd_trans_osrel)
 };
 
 static bool
 __elfN(freebsd_trans_osrel)(const Elf_Note *note, int32_t *osrel)
 {
 	uintptr_t p;
 
 	p = (uintptr_t)(note + 1);
 	p += roundup2(note->n_namesz, ELF_NOTE_ROUNDSIZE);
 	*osrel = *(const int32_t *)(p);
 
 	return (true);
 }
 
 static const char GNU_ABI_VENDOR[] = "GNU";
 static int GNU_KFREEBSD_ABI_DESC = 3;
 
 Elf_Brandnote __elfN(kfreebsd_brandnote) = {
 	.hdr.n_namesz	= sizeof(GNU_ABI_VENDOR),
 	.hdr.n_descsz	= 16,	/* XXX at least 16 */
 	.hdr.n_type	= 1,
 	.vendor		= GNU_ABI_VENDOR,
 	.flags		= BN_TRANSLATE_OSREL,
 	.trans_osrel	= kfreebsd_trans_osrel
 };
 
 static bool
 kfreebsd_trans_osrel(const Elf_Note *note, int32_t *osrel)
 {
 	const Elf32_Word *desc;
 	uintptr_t p;
 
 	p = (uintptr_t)(note + 1);
 	p += roundup2(note->n_namesz, ELF_NOTE_ROUNDSIZE);
 
 	desc = (const Elf32_Word *)p;
 	if (desc[0] != GNU_KFREEBSD_ABI_DESC)
 		return (false);
 
 	/*
 	 * Debian GNU/kFreeBSD embed the earliest compatible kernel version
 	 * (__FreeBSD_version: <major><two digit minor>Rxx) in the LSB way.
 	 */
 	*osrel = desc[1] * 100000 + desc[2] * 1000 + desc[3];
 
 	return (true);
 }
 
 int
 __elfN(insert_brand_entry)(Elf_Brandinfo *entry)
 {
 	int i;
 
 	for (i = 0; i < MAX_BRANDS; i++) {
 		if (elf_brand_list[i] == NULL) {
 			elf_brand_list[i] = entry;
 			break;
 		}
 	}
 	if (i == MAX_BRANDS) {
 		printf("WARNING: %s: could not insert brandinfo entry: %p\n",
 			__func__, entry);
 		return (-1);
 	}
 	return (0);
 }
 
 int
 __elfN(remove_brand_entry)(Elf_Brandinfo *entry)
 {
 	int i;
 
 	for (i = 0; i < MAX_BRANDS; i++) {
 		if (elf_brand_list[i] == entry) {
 			elf_brand_list[i] = NULL;
 			break;
 		}
 	}
 	if (i == MAX_BRANDS)
 		return (-1);
 	return (0);
 }
 
 bool
 __elfN(brand_inuse)(Elf_Brandinfo *entry)
 {
 	struct proc *p;
 	bool rval = false;
 
 	sx_slock(&allproc_lock);
 	FOREACH_PROC_IN_SYSTEM(p) {
 		if (p->p_sysent == entry->sysvec) {
 			rval = true;
 			break;
 		}
 	}
 	sx_sunlock(&allproc_lock);
 
 	return (rval);
 }
 
 static Elf_Brandinfo *
 __elfN(get_brandinfo)(struct image_params *imgp, const char *interp,
     int32_t *osrel, uint32_t *fctl0)
 {
 	const Elf_Ehdr *hdr = (const Elf_Ehdr *)imgp->image_header;
 	Elf_Brandinfo *bi, *bi_m;
 	bool ret, has_fctl0;
 	int i, interp_name_len;
 
 	interp_name_len = interp != NULL ? strlen(interp) + 1 : 0;
 
 	/*
 	 * We support four types of branding -- (1) the ELF EI_OSABI field
 	 * that SCO added to the ELF spec, (2) FreeBSD 3.x's traditional string
 	 * branding w/in the ELF header, (3) path of the `interp_path'
 	 * field, and (4) the ".note.ABI-tag" ELF section.
 	 */
 
 	/* Look for an ".note.ABI-tag" ELF section */
 	bi_m = NULL;
 	for (i = 0; i < MAX_BRANDS; i++) {
 		bi = elf_brand_list[i];
 		if (bi == NULL)
 			continue;
 		if (interp != NULL && (bi->flags & BI_BRAND_ONLY_STATIC) != 0)
 			continue;
 		if (hdr->e_machine == bi->machine && (bi->flags &
 		    (BI_BRAND_NOTE|BI_BRAND_NOTE_MANDATORY)) != 0) {
 			has_fctl0 = false;
 			*fctl0 = 0;
 			*osrel = 0;
 			ret = __elfN(check_note)(imgp, bi->brand_note, osrel,
 			    &has_fctl0, fctl0);
 			/* Give brand a chance to veto check_note's guess */
 			if (ret && bi->header_supported) {
 				ret = bi->header_supported(imgp, osrel,
 				    has_fctl0 ? fctl0 : NULL);
 			}
 			/*
 			 * If note checker claimed the binary, but the
 			 * interpreter path in the image does not
 			 * match default one for the brand, try to
 			 * search for other brands with the same
 			 * interpreter.  Either there is better brand
 			 * with the right interpreter, or, failing
 			 * this, we return first brand which accepted
 			 * our note and, optionally, header.
 			 */
 			if (ret && bi_m == NULL && interp != NULL &&
 			    (bi->interp_path == NULL ||
 			    (strlen(bi->interp_path) + 1 != interp_name_len ||
 			    strncmp(interp, bi->interp_path, interp_name_len)
 			    != 0))) {
 				bi_m = bi;
 				ret = 0;
 			}
 			if (ret)
 				return (bi);
 		}
 	}
 	if (bi_m != NULL)
 		return (bi_m);
 
 	/* If the executable has a brand, search for it in the brand list. */
 	for (i = 0; i < MAX_BRANDS; i++) {
 		bi = elf_brand_list[i];
 		if (bi == NULL || (bi->flags & BI_BRAND_NOTE_MANDATORY) != 0 ||
 		    (interp != NULL && (bi->flags & BI_BRAND_ONLY_STATIC) != 0))
 			continue;
 		if (hdr->e_machine == bi->machine &&
 		    (hdr->e_ident[EI_OSABI] == bi->brand ||
 		    (bi->compat_3_brand != NULL &&
 		    strcmp((const char *)&hdr->e_ident[OLD_EI_BRAND],
 		    bi->compat_3_brand) == 0))) {
 			/* Looks good, but give brand a chance to veto */
 			if (bi->header_supported == NULL ||
 			    bi->header_supported(imgp, NULL, NULL)) {
 				/*
 				 * Again, prefer strictly matching
 				 * interpreter path.
 				 */
 				if (interp_name_len == 0 &&
 				    bi->interp_path == NULL)
 					return (bi);
 				if (bi->interp_path != NULL &&
 				    strlen(bi->interp_path) + 1 ==
 				    interp_name_len && strncmp(interp,
 				    bi->interp_path, interp_name_len) == 0)
 					return (bi);
 				if (bi_m == NULL)
 					bi_m = bi;
 			}
 		}
 	}
 	if (bi_m != NULL)
 		return (bi_m);
 
 	/* No known brand, see if the header is recognized by any brand */
 	for (i = 0; i < MAX_BRANDS; i++) {
 		bi = elf_brand_list[i];
 		if (bi == NULL || bi->flags & BI_BRAND_NOTE_MANDATORY ||
 		    bi->header_supported == NULL)
 			continue;
 		if (hdr->e_machine == bi->machine) {
 			ret = bi->header_supported(imgp, NULL, NULL);
 			if (ret)
 				return (bi);
 		}
 	}
 
 	/* Lacking a known brand, search for a recognized interpreter. */
 	if (interp != NULL) {
 		for (i = 0; i < MAX_BRANDS; i++) {
 			bi = elf_brand_list[i];
 			if (bi == NULL || (bi->flags &
 			    (BI_BRAND_NOTE_MANDATORY | BI_BRAND_ONLY_STATIC))
 			    != 0)
 				continue;
 			if (hdr->e_machine == bi->machine &&
 			    bi->interp_path != NULL &&
 			    /* ELF image p_filesz includes terminating zero */
 			    strlen(bi->interp_path) + 1 == interp_name_len &&
 			    strncmp(interp, bi->interp_path, interp_name_len)
 			    == 0 && (bi->header_supported == NULL ||
 			    bi->header_supported(imgp, NULL, NULL)))
 				return (bi);
 		}
 	}
 
 	/* Lacking a recognized interpreter, try the default brand */
 	for (i = 0; i < MAX_BRANDS; i++) {
 		bi = elf_brand_list[i];
 		if (bi == NULL || (bi->flags & BI_BRAND_NOTE_MANDATORY) != 0 ||
 		    (interp != NULL && (bi->flags & BI_BRAND_ONLY_STATIC) != 0))
 			continue;
 		if (hdr->e_machine == bi->machine &&
 		    __elfN(fallback_brand) == bi->brand &&
 		    (bi->header_supported == NULL ||
 		    bi->header_supported(imgp, NULL, NULL)))
 			return (bi);
 	}
 	return (NULL);
 }
 
 static bool
 __elfN(phdr_in_zero_page)(const Elf_Ehdr *hdr)
 {
 	return (hdr->e_phoff <= PAGE_SIZE &&
 	    (u_int)hdr->e_phentsize * hdr->e_phnum <= PAGE_SIZE - hdr->e_phoff);
 }
 
 static int
 __elfN(check_header)(const Elf_Ehdr *hdr)
 {
 	Elf_Brandinfo *bi;
 	int i;
 
 	if (!IS_ELF(*hdr) ||
 	    hdr->e_ident[EI_CLASS] != ELF_TARG_CLASS ||
 	    hdr->e_ident[EI_DATA] != ELF_TARG_DATA ||
 	    hdr->e_ident[EI_VERSION] != EV_CURRENT ||
 	    hdr->e_phentsize != sizeof(Elf_Phdr) ||
 	    hdr->e_version != ELF_TARG_VER)
 		return (ENOEXEC);
 
 	/*
 	 * Make sure we have at least one brand for this machine.
 	 */
 
 	for (i = 0; i < MAX_BRANDS; i++) {
 		bi = elf_brand_list[i];
 		if (bi != NULL && bi->machine == hdr->e_machine)
 			break;
 	}
 	if (i == MAX_BRANDS)
 		return (ENOEXEC);
 
 	return (0);
 }
 
 static int
 __elfN(map_partial)(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
     vm_offset_t start, vm_offset_t end, vm_prot_t prot)
 {
 	struct sf_buf *sf;
 	int error;
 	vm_offset_t off;
 
 	/*
 	 * Create the page if it doesn't exist yet. Ignore errors.
 	 */
 	vm_map_fixed(map, NULL, 0, trunc_page(start), round_page(end) -
 	    trunc_page(start), VM_PROT_ALL, VM_PROT_ALL, MAP_CHECK_EXCL);
 
 	/*
 	 * Find the page from the underlying object.
 	 */
 	if (object != NULL) {
 		sf = vm_imgact_map_page(object, offset);
 		if (sf == NULL)
 			return (KERN_FAILURE);
 		off = offset - trunc_page(offset);
 		error = copyout((caddr_t)sf_buf_kva(sf) + off, (caddr_t)start,
 		    end - start);
 		vm_imgact_unmap_page(sf);
 		if (error != 0)
 			return (KERN_FAILURE);
 	}
 
 	return (KERN_SUCCESS);
 }
 
 static int
 __elfN(map_insert)(struct image_params *imgp, vm_map_t map, vm_object_t object,
     vm_ooffset_t offset, vm_offset_t start, vm_offset_t end, vm_prot_t prot,
     int cow)
 {
 	struct sf_buf *sf;
 	vm_offset_t off;
 	vm_size_t sz;
 	int error, locked, rv;
 
 	if (start != trunc_page(start)) {
 		rv = __elfN(map_partial)(map, object, offset, start,
 		    round_page(start), prot);
 		if (rv != KERN_SUCCESS)
 			return (rv);
 		offset += round_page(start) - start;
 		start = round_page(start);
 	}
 	if (end != round_page(end)) {
 		rv = __elfN(map_partial)(map, object, offset +
 		    trunc_page(end) - start, trunc_page(end), end, prot);
 		if (rv != KERN_SUCCESS)
 			return (rv);
 		end = trunc_page(end);
 	}
 	if (start >= end)
 		return (KERN_SUCCESS);
 	if ((offset & PAGE_MASK) != 0) {
 		/*
 		 * The mapping is not page aligned.  This means that we have
 		 * to copy the data.
 		 */
 		rv = vm_map_fixed(map, NULL, 0, start, end - start,
 		    prot | VM_PROT_WRITE, VM_PROT_ALL, MAP_CHECK_EXCL);
 		if (rv != KERN_SUCCESS)
 			return (rv);
 		if (object == NULL)
 			return (KERN_SUCCESS);
 		for (; start < end; start += sz) {
 			sf = vm_imgact_map_page(object, offset);
 			if (sf == NULL)
 				return (KERN_FAILURE);
 			off = offset - trunc_page(offset);
 			sz = end - start;
 			if (sz > PAGE_SIZE - off)
 				sz = PAGE_SIZE - off;
 			error = copyout((caddr_t)sf_buf_kva(sf) + off,
 			    (caddr_t)start, sz);
 			vm_imgact_unmap_page(sf);
 			if (error != 0)
 				return (KERN_FAILURE);
 			offset += sz;
 		}
 	} else {
 		vm_object_reference(object);
 		rv = vm_map_fixed(map, object, offset, start, end - start,
 		    prot, VM_PROT_ALL, cow | MAP_CHECK_EXCL |
 		    (object != NULL ? MAP_VN_EXEC : 0));
 		if (rv != KERN_SUCCESS) {
 			locked = VOP_ISLOCKED(imgp->vp);
 			VOP_UNLOCK(imgp->vp);
 			vm_object_deallocate(object);
 			vn_lock(imgp->vp, locked | LK_RETRY);
 			return (rv);
 		} else if (object != NULL) {
 			MPASS(imgp->vp->v_object == object);
 			VOP_SET_TEXT_CHECKED(imgp->vp);
 		}
 	}
 	return (KERN_SUCCESS);
 }
 
 static int
 __elfN(load_section)(struct image_params *imgp, vm_ooffset_t offset,
     caddr_t vmaddr, size_t memsz, size_t filsz, vm_prot_t prot)
 {
 	struct sf_buf *sf;
 	size_t map_len;
 	vm_map_t map;
 	vm_object_t object;
 	vm_offset_t map_addr;
 	int error, rv, cow;
 	size_t copy_len;
 	vm_ooffset_t file_addr;
 
 	/*
 	 * It's necessary to fail if the filsz + offset taken from the
 	 * header is greater than the actual file pager object's size.
 	 * If we were to allow this, then the vm_map_find() below would
 	 * walk right off the end of the file object and into the ether.
 	 *
 	 * While I'm here, might as well check for something else that
 	 * is invalid: filsz cannot be greater than memsz.
 	 */
 	if ((filsz != 0 && (off_t)filsz + offset > imgp->attr->va_size) ||
 	    filsz > memsz) {
 		uprintf("elf_load_section: truncated ELF file\n");
 		return (ENOEXEC);
 	}
 
 	object = imgp->object;
 	map = &imgp->proc->p_vmspace->vm_map;
 	map_addr = trunc_page((vm_offset_t)vmaddr);
 	file_addr = trunc_page(offset);
 
 	/*
 	 * We have two choices.  We can either clear the data in the last page
 	 * of an oversized mapping, or we can start the anon mapping a page
 	 * early and copy the initialized data into that first page.  We
 	 * choose the second.
 	 */
 	if (filsz == 0)
 		map_len = 0;
 	else if (memsz > filsz)
 		map_len = trunc_page(offset + filsz) - file_addr;
 	else
 		map_len = round_page(offset + filsz) - file_addr;
 
 	if (map_len != 0) {
 		/* cow flags: don't dump readonly sections in core */
 		cow = MAP_COPY_ON_WRITE | MAP_PREFAULT |
 		    (prot & VM_PROT_WRITE ? 0 : MAP_DISABLE_COREDUMP);
 
 		rv = __elfN(map_insert)(imgp, map, object, file_addr,
 		    map_addr, map_addr + map_len, prot, cow);
 		if (rv != KERN_SUCCESS)
 			return (EINVAL);
 
 		/* we can stop now if we've covered it all */
 		if (memsz == filsz)
 			return (0);
 	}
 
 	/*
 	 * We have to get the remaining bit of the file into the first part
 	 * of the oversized map segment.  This is normally because the .data
 	 * segment in the file is extended to provide bss.  It's a neat idea
 	 * to try and save a page, but it's a pain in the behind to implement.
 	 */
 	copy_len = filsz == 0 ? 0 : (offset + filsz) - trunc_page(offset +
 	    filsz);
 	map_addr = trunc_page((vm_offset_t)vmaddr + filsz);
 	map_len = round_page((vm_offset_t)vmaddr + memsz) - map_addr;
 
 	/* This had damn well better be true! */
 	if (map_len != 0) {
 		rv = __elfN(map_insert)(imgp, map, NULL, 0, map_addr,
 		    map_addr + map_len, prot, 0);
 		if (rv != KERN_SUCCESS)
 			return (EINVAL);
 	}
 
 	if (copy_len != 0) {
 		sf = vm_imgact_map_page(object, offset + filsz);
 		if (sf == NULL)
 			return (EIO);
 
 		/* send the page fragment to user space */
 		error = copyout((caddr_t)sf_buf_kva(sf), (caddr_t)map_addr,
 		    copy_len);
 		vm_imgact_unmap_page(sf);
 		if (error != 0)
 			return (error);
 	}
 
 	/*
 	 * Remove write access to the page if it was only granted by map_insert
 	 * to allow copyout.
 	 */
 	if ((prot & VM_PROT_WRITE) == 0)
 		vm_map_protect(map, trunc_page(map_addr), round_page(map_addr +
 		    map_len), prot, 0, VM_MAP_PROTECT_SET_PROT);
 
 	return (0);
 }
 
 static int
 __elfN(load_sections)(struct image_params *imgp, const Elf_Ehdr *hdr,
     const Elf_Phdr *phdr, u_long rbase, u_long *base_addrp)
 {
 	vm_prot_t prot;
 	u_long base_addr;
 	bool first;
 	int error, i;
 
 	ASSERT_VOP_LOCKED(imgp->vp, __func__);
 
 	base_addr = 0;
 	first = true;
 
 	for (i = 0; i < hdr->e_phnum; i++) {
 		if (phdr[i].p_type != PT_LOAD || phdr[i].p_memsz == 0)
 			continue;
 
 		/* Loadable segment */
 		prot = __elfN(trans_prot)(phdr[i].p_flags);
 		error = __elfN(load_section)(imgp, phdr[i].p_offset,
 		    (caddr_t)(uintptr_t)phdr[i].p_vaddr + rbase,
 		    phdr[i].p_memsz, phdr[i].p_filesz, prot);
 		if (error != 0)
 			return (error);
 
 		/*
 		 * Establish the base address if this is the first segment.
 		 */
 		if (first) {
   			base_addr = trunc_page(phdr[i].p_vaddr + rbase);
 			first = false;
 		}
 	}
 
 	if (base_addrp != NULL)
 		*base_addrp = base_addr;
 
 	return (0);
 }
 
 /*
  * Load the file "file" into memory.  It may be either a shared object
  * or an executable.
  *
  * The "addr" reference parameter is in/out.  On entry, it specifies
  * the address where a shared object should be loaded.  If the file is
  * an executable, this value is ignored.  On exit, "addr" specifies
  * where the file was actually loaded.
  *
  * The "entry" reference parameter is out only.  On exit, it specifies
  * the entry point for the loaded file.
  */
 static int
 __elfN(load_file)(struct proc *p, const char *file, u_long *addr,
 	u_long *entry)
 {
 	struct {
 		struct nameidata nd;
 		struct vattr attr;
 		struct image_params image_params;
 	} *tempdata;
 	const Elf_Ehdr *hdr = NULL;
 	const Elf_Phdr *phdr = NULL;
 	struct nameidata *nd;
 	struct vattr *attr;
 	struct image_params *imgp;
 	u_long rbase;
 	u_long base_addr = 0;
 	int error;
 
 #ifdef CAPABILITY_MODE
 	/*
 	 * XXXJA: This check can go away once we are sufficiently confident
 	 * that the checks in namei() are correct.
 	 */
 	if (IN_CAPABILITY_MODE(curthread))
 		return (ECAPMODE);
 #endif
 
 	tempdata = malloc(sizeof(*tempdata), M_TEMP, M_WAITOK | M_ZERO);
 	nd = &tempdata->nd;
 	attr = &tempdata->attr;
 	imgp = &tempdata->image_params;
 
 	/*
 	 * Initialize part of the common data
 	 */
 	imgp->proc = p;
 	imgp->attr = attr;
 
 	NDINIT(nd, LOOKUP, ISOPEN | FOLLOW | LOCKSHARED | LOCKLEAF,
 	    UIO_SYSSPACE, file);
 	if ((error = namei(nd)) != 0) {
 		nd->ni_vp = NULL;
 		goto fail;
 	}
 	NDFREE_PNBUF(nd);
 	imgp->vp = nd->ni_vp;
 
 	/*
 	 * Check permissions, modes, uid, etc on the file, and "open" it.
 	 */
 	error = exec_check_permissions(imgp);
 	if (error)
 		goto fail;
 
 	error = exec_map_first_page(imgp);
 	if (error)
 		goto fail;
 
 	imgp->object = nd->ni_vp->v_object;
 
 	hdr = (const Elf_Ehdr *)imgp->image_header;
 	if ((error = __elfN(check_header)(hdr)) != 0)
 		goto fail;
 	if (hdr->e_type == ET_DYN)
 		rbase = *addr;
 	else if (hdr->e_type == ET_EXEC)
 		rbase = 0;
 	else {
 		error = ENOEXEC;
 		goto fail;
 	}
 
 	/* Only support headers that fit within first page for now      */
 	if (!__elfN(phdr_in_zero_page)(hdr)) {
 		error = ENOEXEC;
 		goto fail;
 	}
 
 	phdr = (const Elf_Phdr *)(imgp->image_header + hdr->e_phoff);
 	if (!aligned(phdr, Elf_Addr)) {
 		error = ENOEXEC;
 		goto fail;
 	}
 
 	error = __elfN(load_sections)(imgp, hdr, phdr, rbase, &base_addr);
 	if (error != 0)
 		goto fail;
 
 	*addr = base_addr;
 	*entry = (unsigned long)hdr->e_entry + rbase;
 
 fail:
 	if (imgp->firstpage)
 		exec_unmap_first_page(imgp);
 
 	if (nd->ni_vp) {
 		if (imgp->textset)
 			VOP_UNSET_TEXT_CHECKED(nd->ni_vp);
 		vput(nd->ni_vp);
 	}
 	free(tempdata, M_TEMP);
 
 	return (error);
 }
 
 /*
  * Select randomized valid address in the map map, between minv and
  * maxv, with specified alignment.  The [minv, maxv) range must belong
  * to the map.  Note that function only allocates the address, it is
  * up to caller to clamp maxv in a way that the final allocation
  * length fit into the map.
  *
  * Result is returned in *resp, error code indicates that arguments
  * did not pass sanity checks for overflow and range correctness.
  */
 static int
 __CONCAT(rnd_, __elfN(base))(vm_map_t map, u_long minv, u_long maxv,
     u_int align, u_long *resp)
 {
 	u_long rbase, res;
 
 	MPASS(vm_map_min(map) <= minv);
 
 	if (minv >= maxv || minv + align >= maxv || maxv > vm_map_max(map)) {
 		uprintf("Invalid ELF segments layout\n");
 		return (ENOEXEC);
 	}
 
 	arc4rand(&rbase, sizeof(rbase), 0);
 	res = roundup(minv, (u_long)align) + rbase % (maxv - minv);
 	res &= ~((u_long)align - 1);
 	if (res >= maxv)
 		res -= align;
 
 	KASSERT(res >= minv,
 	    ("res %#lx < minv %#lx, maxv %#lx rbase %#lx",
 	    res, minv, maxv, rbase));
 	KASSERT(res < maxv,
 	    ("res %#lx > maxv %#lx, minv %#lx rbase %#lx",
 	    res, maxv, minv, rbase));
 
 	*resp = res;
 	return (0);
 }
 
 static int
 __elfN(enforce_limits)(struct image_params *imgp, const Elf_Ehdr *hdr,
     const Elf_Phdr *phdr, u_long et_dyn_addr)
 {
 	struct vmspace *vmspace;
 	const char *err_str;
 	u_long text_size, data_size, total_size, text_addr, data_addr;
 	u_long seg_size, seg_addr;
 	int i;
 
 	err_str = NULL;
 	text_size = data_size = total_size = text_addr = data_addr = 0;
 
 	for (i = 0; i < hdr->e_phnum; i++) {
 		if (phdr[i].p_type != PT_LOAD || phdr[i].p_memsz == 0)
 			continue;
 
 		seg_addr = trunc_page(phdr[i].p_vaddr + et_dyn_addr);
 		seg_size = round_page(phdr[i].p_memsz +
 		    phdr[i].p_vaddr + et_dyn_addr - seg_addr);
 
 		/*
 		 * Make the largest executable segment the official
 		 * text segment and all others data.
 		 *
 		 * Note that obreak() assumes that data_addr + data_size == end
 		 * of data load area, and the ELF file format expects segments
 		 * to be sorted by address.  If multiple data segments exist,
 		 * the last one will be used.
 		 */
 
 		if ((phdr[i].p_flags & PF_X) != 0 && text_size < seg_size) {
 			text_size = seg_size;
 			text_addr = seg_addr;
 		} else {
 			data_size = seg_size;
 			data_addr = seg_addr;
 		}
 		total_size += seg_size;
 	}
 
 	if (data_addr == 0 && data_size == 0) {
 		data_addr = text_addr;
 		data_size = text_size;
 	}
 
 	/*
 	 * Check limits.  It should be safe to check the
 	 * limits after loading the segments since we do
 	 * not actually fault in all the segments pages.
 	 */
 	PROC_LOCK(imgp->proc);
 	if (data_size > lim_cur_proc(imgp->proc, RLIMIT_DATA))
 		err_str = "Data segment size exceeds process limit";
 	else if (text_size > maxtsiz)
 		err_str = "Text segment size exceeds system limit";
 	else if (total_size > lim_cur_proc(imgp->proc, RLIMIT_VMEM))
 		err_str = "Total segment size exceeds process limit";
 	else if (racct_set(imgp->proc, RACCT_DATA, data_size) != 0)
 		err_str = "Data segment size exceeds resource limit";
 	else if (racct_set(imgp->proc, RACCT_VMEM, total_size) != 0)
 		err_str = "Total segment size exceeds resource limit";
 	PROC_UNLOCK(imgp->proc);
 	if (err_str != NULL) {
 		uprintf("%s\n", err_str);
 		return (ENOMEM);
 	}
 
 	vmspace = imgp->proc->p_vmspace;
 	vmspace->vm_tsize = text_size >> PAGE_SHIFT;
 	vmspace->vm_taddr = (caddr_t)(uintptr_t)text_addr;
 	vmspace->vm_dsize = data_size >> PAGE_SHIFT;
 	vmspace->vm_daddr = (caddr_t)(uintptr_t)data_addr;
 
 	return (0);
 }
 
 static int
 __elfN(get_interp)(struct image_params *imgp, const Elf_Phdr *phdr,
     char **interpp, bool *free_interpp)
 {
 	struct thread *td;
 	char *interp;
 	int error, interp_name_len;
 
 	KASSERT(phdr->p_type == PT_INTERP,
 	    ("%s: p_type %u != PT_INTERP", __func__, phdr->p_type));
 	ASSERT_VOP_LOCKED(imgp->vp, __func__);
 
 	td = curthread;
 
 	/* Path to interpreter */
 	if (phdr->p_filesz < 2 || phdr->p_filesz > MAXPATHLEN) {
 		uprintf("Invalid PT_INTERP\n");
 		return (ENOEXEC);
 	}
 
 	interp_name_len = phdr->p_filesz;
 	if (phdr->p_offset > PAGE_SIZE ||
 	    interp_name_len > PAGE_SIZE - phdr->p_offset) {
 		/*
 		 * The vnode lock might be needed by the pagedaemon to
 		 * clean pages owned by the vnode.  Do not allow sleep
 		 * waiting for memory with the vnode locked, instead
 		 * try non-sleepable allocation first, and if it
 		 * fails, go to the slow path were we drop the lock
 		 * and do M_WAITOK.  A text reference prevents
 		 * modifications to the vnode content.
 		 */
 		interp = malloc(interp_name_len + 1, M_TEMP, M_NOWAIT);
 		if (interp == NULL) {
 			VOP_UNLOCK(imgp->vp);
 			interp = malloc(interp_name_len + 1, M_TEMP, M_WAITOK);
 			vn_lock(imgp->vp, LK_SHARED | LK_RETRY);
 		}
 
 		error = vn_rdwr(UIO_READ, imgp->vp, interp,
 		    interp_name_len, phdr->p_offset,
 		    UIO_SYSSPACE, IO_NODELOCKED, td->td_ucred,
 		    NOCRED, NULL, td);
 		if (error != 0) {
 			free(interp, M_TEMP);
 			uprintf("i/o error PT_INTERP %d\n", error);
 			return (error);
 		}
 		interp[interp_name_len] = '\0';
 
 		*interpp = interp;
 		*free_interpp = true;
 		return (0);
 	}
 
 	interp = __DECONST(char *, imgp->image_header) + phdr->p_offset;
 	if (interp[interp_name_len - 1] != '\0') {
 		uprintf("Invalid PT_INTERP\n");
 		return (ENOEXEC);
 	}
 
 	*interpp = interp;
 	*free_interpp = false;
 	return (0);
 }
 
 static int
 __elfN(load_interp)(struct image_params *imgp, const Elf_Brandinfo *brand_info,
     const char *interp, u_long *addr, u_long *entry)
 {
 	char *path;
 	int error;
 
 	if (brand_info->emul_path != NULL &&
 	    brand_info->emul_path[0] != '\0') {
 		path = malloc(MAXPATHLEN, M_TEMP, M_WAITOK);
 		snprintf(path, MAXPATHLEN, "%s%s",
 		    brand_info->emul_path, interp);
 		error = __elfN(load_file)(imgp->proc, path, addr, entry);
 		free(path, M_TEMP);
 		if (error == 0)
 			return (0);
 	}
 
 	if (brand_info->interp_newpath != NULL &&
 	    (brand_info->interp_path == NULL ||
 	    strcmp(interp, brand_info->interp_path) == 0)) {
 		error = __elfN(load_file)(imgp->proc,
 		    brand_info->interp_newpath, addr, entry);
 		if (error == 0)
 			return (0);
 	}
 
 	error = __elfN(load_file)(imgp->proc, interp, addr, entry);
 	if (error == 0)
 		return (0);
 
 	uprintf("ELF interpreter %s not found, error %d\n", interp, error);
 	return (error);
 }
 
 /*
  * Impossible et_dyn_addr initial value indicating that the real base
  * must be calculated later with some randomization applied.
  */
 #define	ET_DYN_ADDR_RAND	1
 
 static int
 __CONCAT(exec_, __elfN(imgact))(struct image_params *imgp)
 {
 	struct thread *td;
 	const Elf_Ehdr *hdr;
 	const Elf_Phdr *phdr;
 	Elf_Auxargs *elf_auxargs;
 	struct vmspace *vmspace;
 	vm_map_t map;
 	char *interp;
 	Elf_Brandinfo *brand_info;
 	struct sysentvec *sv;
 	u_long addr, baddr, et_dyn_addr, entry, proghdr;
 	u_long maxalign, maxsalign, mapsz, maxv, maxv1, anon_loc;
 	uint32_t fctl0;
 	int32_t osrel;
 	bool free_interp;
 	int error, i, n;
 
 	hdr = (const Elf_Ehdr *)imgp->image_header;
 
 	/*
 	 * Do we have a valid ELF header ?
 	 *
 	 * Only allow ET_EXEC & ET_DYN here, reject ET_DYN later
 	 * if particular brand doesn't support it.
 	 */
 	if (__elfN(check_header)(hdr) != 0 ||
 	    (hdr->e_type != ET_EXEC && hdr->e_type != ET_DYN))
 		return (-1);
 
 	/*
 	 * From here on down, we return an errno, not -1, as we've
 	 * detected an ELF file.
 	 */
 
 	if (!__elfN(phdr_in_zero_page)(hdr)) {
 		uprintf("Program headers not in the first page\n");
 		return (ENOEXEC);
 	}
 	phdr = (const Elf_Phdr *)(imgp->image_header + hdr->e_phoff); 
 	if (!aligned(phdr, Elf_Addr)) {
 		uprintf("Unaligned program headers\n");
 		return (ENOEXEC);
 	}
 
 	n = error = 0;
 	baddr = 0;
 	osrel = 0;
 	fctl0 = 0;
 	entry = proghdr = 0;
 	interp = NULL;
 	free_interp = false;
 	td = curthread;
 
 	/*
 	 * Somewhat arbitrary, limit accepted max alignment for the
 	 * loadable segment to the max supported superpage size. Too
 	 * large alignment requests are not useful and are indicators
 	 * of corrupted or outright malicious binary.
 	 */
 	maxalign = PAGE_SIZE;
 	maxsalign = PAGE_SIZE * 1024;
 	for (i = MAXPAGESIZES - 1; i > 0; i--) {
 		if (pagesizes[i] > maxsalign)
 			maxsalign = pagesizes[i];
 	}
 
 	mapsz = 0;
 
 	for (i = 0; i < hdr->e_phnum; i++) {
 		switch (phdr[i].p_type) {
 		case PT_LOAD:
 			if (n == 0)
 				baddr = phdr[i].p_vaddr;
 			if (!powerof2(phdr[i].p_align) ||
 			    phdr[i].p_align > maxsalign) {
 				uprintf("Invalid segment alignment\n");
 				error = ENOEXEC;
 				goto ret;
 			}
 			if (phdr[i].p_align > maxalign)
 				maxalign = phdr[i].p_align;
 			if (mapsz + phdr[i].p_memsz < mapsz) {
 				uprintf("Mapsize overflow\n");
 				error = ENOEXEC;
 				goto ret;
 			}
 			mapsz += phdr[i].p_memsz;
 			n++;
 
 			/*
 			 * If this segment contains the program headers,
 			 * remember their virtual address for the AT_PHDR
 			 * aux entry. Static binaries don't usually include
 			 * a PT_PHDR entry.
 			 */
 			if (phdr[i].p_offset == 0 &&
 			    hdr->e_phoff + hdr->e_phnum * hdr->e_phentsize <=
 			    phdr[i].p_filesz)
 				proghdr = phdr[i].p_vaddr + hdr->e_phoff;
 			break;
 		case PT_INTERP:
 			/* Path to interpreter */
 			if (interp != NULL) {
 				uprintf("Multiple PT_INTERP headers\n");
 				error = ENOEXEC;
 				goto ret;
 			}
 			error = __elfN(get_interp)(imgp, &phdr[i], &interp,
 			    &free_interp);
 			if (error != 0)
 				goto ret;
 			break;
 		case PT_GNU_STACK:
 			if (__elfN(nxstack))
 				imgp->stack_prot =
 				    __elfN(trans_prot)(phdr[i].p_flags);
 			imgp->stack_sz = phdr[i].p_memsz;
 			break;
 		case PT_PHDR: 	/* Program header table info */
 			proghdr = phdr[i].p_vaddr;
 			break;
 		}
 	}
 
 	brand_info = __elfN(get_brandinfo)(imgp, interp, &osrel, &fctl0);
 	if (brand_info == NULL) {
 		uprintf("ELF binary type \"%u\" not known.\n",
 		    hdr->e_ident[EI_OSABI]);
 		error = ENOEXEC;
 		goto ret;
 	}
 	sv = brand_info->sysvec;
 	et_dyn_addr = 0;
 	if (hdr->e_type == ET_DYN) {
 		if ((brand_info->flags & BI_CAN_EXEC_DYN) == 0) {
 			uprintf("Cannot execute shared object\n");
 			error = ENOEXEC;
 			goto ret;
 		}
 		/*
 		 * Honour the base load address from the dso if it is
 		 * non-zero for some reason.
 		 */
 		if (baddr == 0) {
 			if ((sv->sv_flags & SV_ASLR) == 0 ||
 			    (fctl0 & NT_FREEBSD_FCTL_ASLR_DISABLE) != 0)
 				et_dyn_addr = __elfN(pie_base);
 			else if ((__elfN(pie_aslr_enabled) &&
 			    (imgp->proc->p_flag2 & P2_ASLR_DISABLE) == 0) ||
 			    (imgp->proc->p_flag2 & P2_ASLR_ENABLE) != 0)
 				et_dyn_addr = ET_DYN_ADDR_RAND;
 			else
 				et_dyn_addr = __elfN(pie_base);
 		}
 	}
 
 	/*
 	 * Avoid a possible deadlock if the current address space is destroyed
 	 * and that address space maps the locked vnode.  In the common case,
 	 * the locked vnode's v_usecount is decremented but remains greater
 	 * than zero.  Consequently, the vnode lock is not needed by vrele().
 	 * However, in cases where the vnode lock is external, such as nullfs,
 	 * v_usecount may become zero.
 	 *
 	 * The VV_TEXT flag prevents modifications to the executable while
 	 * the vnode is unlocked.
 	 */
 	VOP_UNLOCK(imgp->vp);
 
 	/*
 	 * Decide whether to enable randomization of user mappings.
 	 * First, reset user preferences for the setid binaries.
 	 * Then, account for the support of the randomization by the
 	 * ABI, by user preferences, and make special treatment for
 	 * PIE binaries.
 	 */
 	if (imgp->credential_setid) {
 		PROC_LOCK(imgp->proc);
 		imgp->proc->p_flag2 &= ~(P2_ASLR_ENABLE | P2_ASLR_DISABLE |
 		    P2_WXORX_DISABLE | P2_WXORX_ENABLE_EXEC);
 		PROC_UNLOCK(imgp->proc);
 	}
 	if ((sv->sv_flags & SV_ASLR) == 0 ||
 	    (imgp->proc->p_flag2 & P2_ASLR_DISABLE) != 0 ||
 	    (fctl0 & NT_FREEBSD_FCTL_ASLR_DISABLE) != 0) {
 		KASSERT(et_dyn_addr != ET_DYN_ADDR_RAND,
 		    ("et_dyn_addr == RAND and !ASLR"));
 	} else if ((imgp->proc->p_flag2 & P2_ASLR_ENABLE) != 0 ||
 	    (__elfN(aslr_enabled) && hdr->e_type == ET_EXEC) ||
 	    et_dyn_addr == ET_DYN_ADDR_RAND) {
 		imgp->map_flags |= MAP_ASLR;
 		/*
 		 * If user does not care about sbrk, utilize the bss
 		 * grow region for mappings as well.  We can select
 		 * the base for the image anywere and still not suffer
 		 * from the fragmentation.
 		 */
 		if (!__elfN(aslr_honor_sbrk) ||
 		    (imgp->proc->p_flag2 & P2_ASLR_IGNSTART) != 0)
 			imgp->map_flags |= MAP_ASLR_IGNSTART;
 		if (__elfN(aslr_stack))
 			imgp->map_flags |= MAP_ASLR_STACK;
 	}
 
 	if ((!__elfN(allow_wx) && (fctl0 & NT_FREEBSD_FCTL_WXNEEDED) == 0 &&
 	    (imgp->proc->p_flag2 & P2_WXORX_DISABLE) == 0) ||
 	    (imgp->proc->p_flag2 & P2_WXORX_ENABLE_EXEC) != 0)
 		imgp->map_flags |= MAP_WXORX;
 
 	error = exec_new_vmspace(imgp, sv);
 
 	imgp->proc->p_sysent = sv;
 	imgp->proc->p_elf_brandinfo = brand_info;
 
 	vmspace = imgp->proc->p_vmspace;
 	map = &vmspace->vm_map;
 	maxv = sv->sv_usrstack;
 	if ((imgp->map_flags & MAP_ASLR_STACK) == 0)
 		maxv -= lim_max(td, RLIMIT_STACK);
 	if (error == 0 && mapsz >= maxv - vm_map_min(map)) {
 		uprintf("Excessive mapping size\n");
 		error = ENOEXEC;
 	}
 
 	if (error == 0 && et_dyn_addr == ET_DYN_ADDR_RAND) {
 		KASSERT((map->flags & MAP_ASLR) != 0,
 		    ("ET_DYN_ADDR_RAND but !MAP_ASLR"));
 		error = __CONCAT(rnd_, __elfN(base))(map,
 		    vm_map_min(map) + mapsz + lim_max(td, RLIMIT_DATA),
 		    /* reserve half of the address space to interpreter */
 		    maxv / 2, maxalign, &et_dyn_addr);
 	}
 
 	vn_lock(imgp->vp, LK_SHARED | LK_RETRY);
 	if (error != 0)
 		goto ret;
 
 	error = __elfN(load_sections)(imgp, hdr, phdr, et_dyn_addr, NULL);
 	if (error != 0)
 		goto ret;
 
 	error = __elfN(enforce_limits)(imgp, hdr, phdr, et_dyn_addr);
 	if (error != 0)
 		goto ret;
 
 	/*
 	 * We load the dynamic linker where a userland call
 	 * to mmap(0, ...) would put it.  The rationale behind this
 	 * calculation is that it leaves room for the heap to grow to
 	 * its maximum allowed size.
 	 */
 	addr = round_page((vm_offset_t)vmspace->vm_daddr + lim_max(td,
 	    RLIMIT_DATA));
 	if ((map->flags & MAP_ASLR) != 0) {
 		maxv1 = maxv / 2 + addr / 2;
 		error = __CONCAT(rnd_, __elfN(base))(map, addr, maxv1,
 		    (MAXPAGESIZES > 1 && pagesizes[1] != 0) ?
 		    pagesizes[1] : pagesizes[0], &anon_loc);
 		if (error != 0)
 			goto ret;
 		map->anon_loc = anon_loc;
 	} else {
 		map->anon_loc = addr;
 	}
 
 	entry = (u_long)hdr->e_entry + et_dyn_addr;
 	imgp->entry_addr = entry;
 
 	if (interp != NULL) {
 		VOP_UNLOCK(imgp->vp);
 		if ((map->flags & MAP_ASLR) != 0) {
 			/* Assume that interpreter fits into 1/4 of AS */
 			maxv1 = maxv / 2 + addr / 2;
 			error = __CONCAT(rnd_, __elfN(base))(map, addr,
 			    maxv1, PAGE_SIZE, &addr);
 		}
 		if (error == 0) {
 			error = __elfN(load_interp)(imgp, brand_info, interp,
 			    &addr, &imgp->entry_addr);
 		}
 		vn_lock(imgp->vp, LK_SHARED | LK_RETRY);
 		if (error != 0)
 			goto ret;
 	} else
 		addr = et_dyn_addr;
 
 	error = exec_map_stack(imgp);
 	if (error != 0)
 		goto ret;
 
 	/*
 	 * Construct auxargs table (used by the copyout_auxargs routine)
 	 */
 	elf_auxargs = malloc(sizeof(Elf_Auxargs), M_TEMP, M_NOWAIT);
 	if (elf_auxargs == NULL) {
 		VOP_UNLOCK(imgp->vp);
 		elf_auxargs = malloc(sizeof(Elf_Auxargs), M_TEMP, M_WAITOK);
 		vn_lock(imgp->vp, LK_SHARED | LK_RETRY);
 	}
 	elf_auxargs->execfd = -1;
 	elf_auxargs->phdr = proghdr + et_dyn_addr;
 	elf_auxargs->phent = hdr->e_phentsize;
 	elf_auxargs->phnum = hdr->e_phnum;
 	elf_auxargs->pagesz = PAGE_SIZE;
 	elf_auxargs->base = addr;
 	elf_auxargs->flags = 0;
 	elf_auxargs->entry = entry;
 	elf_auxargs->hdr_eflags = hdr->e_flags;
 
 	imgp->auxargs = elf_auxargs;
 	imgp->interpreted = 0;
 	imgp->reloc_base = addr;
 	imgp->proc->p_osrel = osrel;
 	imgp->proc->p_fctl0 = fctl0;
 	imgp->proc->p_elf_flags = hdr->e_flags;
 
 ret:
 	ASSERT_VOP_LOCKED(imgp->vp, "skipped relock");
 	if (free_interp)
 		free(interp, M_TEMP);
 	return (error);
 }
 
 #define	elf_suword __CONCAT(suword, __ELF_WORD_SIZE)
 
 int
 __elfN(freebsd_copyout_auxargs)(struct image_params *imgp, uintptr_t base)
 {
 	Elf_Auxargs *args = (Elf_Auxargs *)imgp->auxargs;
 	Elf_Auxinfo *argarray, *pos;
+	struct vmspace *vmspace;
 	int error;
 
 	argarray = pos = malloc(AT_COUNT * sizeof(*pos), M_TEMP,
 	    M_WAITOK | M_ZERO);
 
+	vmspace = imgp->proc->p_vmspace;
+
 	if (args->execfd != -1)
 		AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd);
 	AUXARGS_ENTRY(pos, AT_PHDR, args->phdr);
 	AUXARGS_ENTRY(pos, AT_PHENT, args->phent);
 	AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum);
 	AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz);
 	AUXARGS_ENTRY(pos, AT_FLAGS, args->flags);
 	AUXARGS_ENTRY(pos, AT_ENTRY, args->entry);
 	AUXARGS_ENTRY(pos, AT_BASE, args->base);
 	AUXARGS_ENTRY(pos, AT_EHDRFLAGS, args->hdr_eflags);
 	if (imgp->execpathp != 0)
 		AUXARGS_ENTRY_PTR(pos, AT_EXECPATH, imgp->execpathp);
 	AUXARGS_ENTRY(pos, AT_OSRELDATE,
 	    imgp->proc->p_ucred->cr_prison->pr_osreldate);
 	if (imgp->canary != 0) {
 		AUXARGS_ENTRY_PTR(pos, AT_CANARY, imgp->canary);
 		AUXARGS_ENTRY(pos, AT_CANARYLEN, imgp->canarylen);
 	}
 	AUXARGS_ENTRY(pos, AT_NCPUS, mp_ncpus);
 	if (imgp->pagesizes != 0) {
 		AUXARGS_ENTRY_PTR(pos, AT_PAGESIZES, imgp->pagesizes);
 		AUXARGS_ENTRY(pos, AT_PAGESIZESLEN, imgp->pagesizeslen);
 	}
-	if (imgp->sysent->sv_timekeep_base != 0) {
+	if ((imgp->sysent->sv_flags & SV_TIMEKEEP) != 0) {
 		AUXARGS_ENTRY(pos, AT_TIMEKEEP,
-		    imgp->sysent->sv_timekeep_base);
+		    vmspace->vm_shp_base + imgp->sysent->sv_timekeep_offset);
 	}
 	AUXARGS_ENTRY(pos, AT_STACKPROT, imgp->sysent->sv_shared_page_obj
 	    != NULL && imgp->stack_prot != 0 ? imgp->stack_prot :
 	    imgp->sysent->sv_stackprot);
 	if (imgp->sysent->sv_hwcap != NULL)
 		AUXARGS_ENTRY(pos, AT_HWCAP, *imgp->sysent->sv_hwcap);
 	if (imgp->sysent->sv_hwcap2 != NULL)
 		AUXARGS_ENTRY(pos, AT_HWCAP2, *imgp->sysent->sv_hwcap2);
 	AUXARGS_ENTRY(pos, AT_BSDFLAGS, __elfN(sigfastblock) ?
 	    ELF_BSDF_SIGFASTBLK : 0);
 	AUXARGS_ENTRY(pos, AT_ARGC, imgp->args->argc);
 	AUXARGS_ENTRY_PTR(pos, AT_ARGV, imgp->argv);
 	AUXARGS_ENTRY(pos, AT_ENVC, imgp->args->envc);
 	AUXARGS_ENTRY_PTR(pos, AT_ENVV, imgp->envv);
 	AUXARGS_ENTRY_PTR(pos, AT_PS_STRINGS, imgp->ps_strings);
-	if (imgp->sysent->sv_fxrng_gen_base != 0)
-		AUXARGS_ENTRY(pos, AT_FXRNG, imgp->sysent->sv_fxrng_gen_base);
-	if (imgp->sysent->sv_vdso_base != 0 && __elfN(vdso) != 0)
-		AUXARGS_ENTRY(pos, AT_KPRELOAD, imgp->sysent->sv_vdso_base);
+#ifdef RANDOM_FENESTRASX
+	if ((imgp->sysent->sv_flags & SV_RNG_SEED_VER) != 0) {
+		AUXARGS_ENTRY(pos, AT_FXRNG,
+		    vmspace->vm_shp_base + imgp->sysent->sv_fxrng_gen_offset);
+	}
+#endif
+	if ((imgp->sysent->sv_flags & SV_DSO_SIG) != 0 && __elfN(vdso) != 0) {
+		AUXARGS_ENTRY(pos, AT_KPRELOAD,
+		    vmspace->vm_shp_base + imgp->sysent->sv_vdso_offset);
+	}
 	AUXARGS_ENTRY(pos, AT_NULL, 0);
 
 	free(imgp->auxargs, M_TEMP);
 	imgp->auxargs = NULL;
 	KASSERT(pos - argarray <= AT_COUNT, ("Too many auxargs"));
 
 	error = copyout(argarray, (void *)base, sizeof(*argarray) * AT_COUNT);
 	free(argarray, M_TEMP);
 	return (error);
 }
 
 int
 __elfN(freebsd_fixup)(uintptr_t *stack_base, struct image_params *imgp)
 {
 	Elf_Addr *base;
 
 	base = (Elf_Addr *)*stack_base;
 	base--;
 	if (elf_suword(base, imgp->args->argc) == -1)
 		return (EFAULT);
 	*stack_base = (uintptr_t)base;
 	return (0);
 }
 
 /*
  * Code for generating ELF core dumps.
  */
 
 typedef void (*segment_callback)(vm_map_entry_t, void *);
 
 /* Closure for cb_put_phdr(). */
 struct phdr_closure {
 	Elf_Phdr *phdr;		/* Program header to fill in */
 	Elf_Off offset;		/* Offset of segment in core file */
 };
 
 struct note_info {
 	int		type;		/* Note type. */
 	struct regset	*regset;	/* Register set. */
 	outfunc_t 	outfunc; 	/* Output function. */
 	void		*outarg;	/* Argument for the output function. */
 	size_t		outsize;	/* Output size. */
 	TAILQ_ENTRY(note_info) link;	/* Link to the next note info. */
 };
 
 TAILQ_HEAD(note_info_list, note_info);
 
 extern int compress_user_cores;
 extern int compress_user_cores_level;
 
 static void cb_put_phdr(vm_map_entry_t, void *);
 static void cb_size_segment(vm_map_entry_t, void *);
 static void each_dumpable_segment(struct thread *, segment_callback, void *,
     int);
 static int __elfN(corehdr)(struct coredump_params *, int, void *, size_t,
     struct note_info_list *, size_t, int);
 static void __elfN(putnote)(struct thread *td, struct note_info *, struct sbuf *);
 
 static void __elfN(note_prpsinfo)(void *, struct sbuf *, size_t *);
 static void __elfN(note_threadmd)(void *, struct sbuf *, size_t *);
 static void __elfN(note_procstat_auxv)(void *, struct sbuf *, size_t *);
 static void __elfN(note_procstat_proc)(void *, struct sbuf *, size_t *);
 static void __elfN(note_procstat_psstrings)(void *, struct sbuf *, size_t *);
 static void note_procstat_files(void *, struct sbuf *, size_t *);
 static void note_procstat_groups(void *, struct sbuf *, size_t *);
 static void note_procstat_osrel(void *, struct sbuf *, size_t *);
 static void note_procstat_rlimit(void *, struct sbuf *, size_t *);
 static void note_procstat_umask(void *, struct sbuf *, size_t *);
 static void note_procstat_vmmap(void *, struct sbuf *, size_t *);
 
 static int
 core_compressed_write(void *base, size_t len, off_t offset, void *arg)
 {
 
 	return (core_write((struct coredump_params *)arg, base, len, offset,
 	    UIO_SYSSPACE, NULL));
 }
 
 int
 __elfN(coredump)(struct thread *td, struct vnode *vp, off_t limit, int flags)
 {
 	struct ucred *cred = td->td_ucred;
 	int compm, error = 0;
 	struct sseg_closure seginfo;
 	struct note_info_list notelst;
 	struct coredump_params params;
 	struct note_info *ninfo;
 	void *hdr, *tmpbuf;
 	size_t hdrsize, notesz, coresize;
 
 	hdr = NULL;
 	tmpbuf = NULL;
 	TAILQ_INIT(&notelst);
 
 	/* Size the program segments. */
 	__elfN(size_segments)(td, &seginfo, flags);
 
 	/*
 	 * Collect info about the core file header area.
 	 */
 	hdrsize = sizeof(Elf_Ehdr) + sizeof(Elf_Phdr) * (1 + seginfo.count);
 	if (seginfo.count + 1 >= PN_XNUM)
 		hdrsize += sizeof(Elf_Shdr);
 	td->td_proc->p_sysent->sv_elf_core_prepare_notes(td, &notelst, &notesz);
 	coresize = round_page(hdrsize + notesz) + seginfo.size;
 
 	/* Set up core dump parameters. */
 	params.offset = 0;
 	params.active_cred = cred;
 	params.file_cred = NOCRED;
 	params.td = td;
 	params.vp = vp;
 	params.comp = NULL;
 
 #ifdef RACCT
 	if (racct_enable) {
 		PROC_LOCK(td->td_proc);
 		error = racct_add(td->td_proc, RACCT_CORE, coresize);
 		PROC_UNLOCK(td->td_proc);
 		if (error != 0) {
 			error = EFAULT;
 			goto done;
 		}
 	}
 #endif
 	if (coresize >= limit) {
 		error = EFAULT;
 		goto done;
 	}
 
 	/* Create a compression stream if necessary. */
 	compm = compress_user_cores;
 	if ((flags & (SVC_PT_COREDUMP | SVC_NOCOMPRESS)) == SVC_PT_COREDUMP &&
 	    compm == 0)
 		compm = COMPRESS_GZIP;
 	if (compm != 0) {
 		params.comp = compressor_init(core_compressed_write,
 		    compm, CORE_BUF_SIZE,
 		    compress_user_cores_level, &params);
 		if (params.comp == NULL) {
 			error = EFAULT;
 			goto done;
 		}
 		tmpbuf = malloc(CORE_BUF_SIZE, M_TEMP, M_WAITOK | M_ZERO);
         }
 
 	/*
 	 * Allocate memory for building the header, fill it up,
 	 * and write it out following the notes.
 	 */
 	hdr = malloc(hdrsize, M_TEMP, M_WAITOK);
 	error = __elfN(corehdr)(&params, seginfo.count, hdr, hdrsize, &notelst,
 	    notesz, flags);
 
 	/* Write the contents of all of the writable segments. */
 	if (error == 0) {
 		Elf_Phdr *php;
 		off_t offset;
 		int i;
 
 		php = (Elf_Phdr *)((char *)hdr + sizeof(Elf_Ehdr)) + 1;
 		offset = round_page(hdrsize + notesz);
 		for (i = 0; i < seginfo.count; i++) {
 			error = core_output((char *)(uintptr_t)php->p_vaddr,
 			    php->p_filesz, offset, &params, tmpbuf);
 			if (error != 0)
 				break;
 			offset += php->p_filesz;
 			php++;
 		}
 		if (error == 0 && params.comp != NULL)
 			error = compressor_flush(params.comp);
 	}
 	if (error) {
 		log(LOG_WARNING,
 		    "Failed to write core file for process %s (error %d)\n",
 		    curproc->p_comm, error);
 	}
 
 done:
 	free(tmpbuf, M_TEMP);
 	if (params.comp != NULL)
 		compressor_fini(params.comp);
 	while ((ninfo = TAILQ_FIRST(&notelst)) != NULL) {
 		TAILQ_REMOVE(&notelst, ninfo, link);
 		free(ninfo, M_TEMP);
 	}
 	if (hdr != NULL)
 		free(hdr, M_TEMP);
 
 	return (error);
 }
 
 /*
  * A callback for each_dumpable_segment() to write out the segment's
  * program header entry.
  */
 static void
 cb_put_phdr(vm_map_entry_t entry, void *closure)
 {
 	struct phdr_closure *phc = (struct phdr_closure *)closure;
 	Elf_Phdr *phdr = phc->phdr;
 
 	phc->offset = round_page(phc->offset);
 
 	phdr->p_type = PT_LOAD;
 	phdr->p_offset = phc->offset;
 	phdr->p_vaddr = entry->start;
 	phdr->p_paddr = 0;
 	phdr->p_filesz = phdr->p_memsz = entry->end - entry->start;
 	phdr->p_align = PAGE_SIZE;
 	phdr->p_flags = __elfN(untrans_prot)(entry->protection);
 
 	phc->offset += phdr->p_filesz;
 	phc->phdr++;
 }
 
 /*
  * A callback for each_dumpable_segment() to gather information about
  * the number of segments and their total size.
  */
 static void
 cb_size_segment(vm_map_entry_t entry, void *closure)
 {
 	struct sseg_closure *ssc = (struct sseg_closure *)closure;
 
 	ssc->count++;
 	ssc->size += entry->end - entry->start;
 }
 
 void
 __elfN(size_segments)(struct thread *td, struct sseg_closure *seginfo,
     int flags)
 {
 	seginfo->count = 0;
 	seginfo->size = 0;
 
 	each_dumpable_segment(td, cb_size_segment, seginfo, flags);
 }
 
 /*
  * For each writable segment in the process's memory map, call the given
  * function with a pointer to the map entry and some arbitrary
  * caller-supplied data.
  */
 static void
 each_dumpable_segment(struct thread *td, segment_callback func, void *closure,
     int flags)
 {
 	struct proc *p = td->td_proc;
 	vm_map_t map = &p->p_vmspace->vm_map;
 	vm_map_entry_t entry;
 	vm_object_t backing_object, object;
 	bool ignore_entry;
 
 	vm_map_lock_read(map);
 	VM_MAP_ENTRY_FOREACH(entry, map) {
 		/*
 		 * Don't dump inaccessible mappings, deal with legacy
 		 * coredump mode.
 		 *
 		 * Note that read-only segments related to the elf binary
 		 * are marked MAP_ENTRY_NOCOREDUMP now so we no longer
 		 * need to arbitrarily ignore such segments.
 		 */
 		if ((flags & SVC_ALL) == 0) {
 			if (elf_legacy_coredump) {
 				if ((entry->protection & VM_PROT_RW) !=
 				    VM_PROT_RW)
 					continue;
 			} else {
 				if ((entry->protection & VM_PROT_ALL) == 0)
 					continue;
 			}
 		}
 
 		/*
 		 * Dont include memory segment in the coredump if
 		 * MAP_NOCORE is set in mmap(2) or MADV_NOCORE in
 		 * madvise(2).  Do not dump submaps (i.e. parts of the
 		 * kernel map).
 		 */
 		if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) != 0)
 			continue;
 		if ((entry->eflags & MAP_ENTRY_NOCOREDUMP) != 0 &&
 		    (flags & SVC_ALL) == 0)
 			continue;
 		if ((object = entry->object.vm_object) == NULL)
 			continue;
 
 		/* Ignore memory-mapped devices and such things. */
 		VM_OBJECT_RLOCK(object);
 		while ((backing_object = object->backing_object) != NULL) {
 			VM_OBJECT_RLOCK(backing_object);
 			VM_OBJECT_RUNLOCK(object);
 			object = backing_object;
 		}
 		ignore_entry = (object->flags & OBJ_FICTITIOUS) != 0;
 		VM_OBJECT_RUNLOCK(object);
 		if (ignore_entry)
 			continue;
 
 		(*func)(entry, closure);
 	}
 	vm_map_unlock_read(map);
 }
 
 /*
  * Write the core file header to the file, including padding up to
  * the page boundary.
  */
 static int
 __elfN(corehdr)(struct coredump_params *p, int numsegs, void *hdr,
     size_t hdrsize, struct note_info_list *notelst, size_t notesz,
     int flags)
 {
 	struct note_info *ninfo;
 	struct sbuf *sb;
 	int error;
 
 	/* Fill in the header. */
 	bzero(hdr, hdrsize);
 	__elfN(puthdr)(p->td, hdr, hdrsize, numsegs, notesz, flags);
 
 	sb = sbuf_new(NULL, NULL, CORE_BUF_SIZE, SBUF_FIXEDLEN);
 	sbuf_set_drain(sb, sbuf_drain_core_output, p);
 	sbuf_start_section(sb, NULL);
 	sbuf_bcat(sb, hdr, hdrsize);
 	TAILQ_FOREACH(ninfo, notelst, link)
 	    __elfN(putnote)(p->td, ninfo, sb);
 	/* Align up to a page boundary for the program segments. */
 	sbuf_end_section(sb, -1, PAGE_SIZE, 0);
 	error = sbuf_finish(sb);
 	sbuf_delete(sb);
 
 	return (error);
 }
 
 void
 __elfN(prepare_notes)(struct thread *td, struct note_info_list *list,
     size_t *sizep)
 {
 	struct proc *p;
 	struct thread *thr;
 	size_t size;
 
 	p = td->td_proc;
 	size = 0;
 
 	size += __elfN(register_note)(td, list, NT_PRPSINFO,
 	    __elfN(note_prpsinfo), p);
 
 	/*
 	 * To have the debugger select the right thread (LWP) as the initial
 	 * thread, we dump the state of the thread passed to us in td first.
 	 * This is the thread that causes the core dump and thus likely to
 	 * be the right thread one wants to have selected in the debugger.
 	 */
 	thr = td;
 	while (thr != NULL) {
 		size += __elfN(prepare_register_notes)(td, list, thr);
 		size += __elfN(register_note)(td, list, -1,
 		    __elfN(note_threadmd), thr);
 
 		thr = thr == td ? TAILQ_FIRST(&p->p_threads) :
 		    TAILQ_NEXT(thr, td_plist);
 		if (thr == td)
 			thr = TAILQ_NEXT(thr, td_plist);
 	}
 
 	size += __elfN(register_note)(td, list, NT_PROCSTAT_PROC,
 	    __elfN(note_procstat_proc), p);
 	size += __elfN(register_note)(td, list, NT_PROCSTAT_FILES,
 	    note_procstat_files, p);
 	size += __elfN(register_note)(td, list, NT_PROCSTAT_VMMAP,
 	    note_procstat_vmmap, p);
 	size += __elfN(register_note)(td, list, NT_PROCSTAT_GROUPS,
 	    note_procstat_groups, p);
 	size += __elfN(register_note)(td, list, NT_PROCSTAT_UMASK,
 	    note_procstat_umask, p);
 	size += __elfN(register_note)(td, list, NT_PROCSTAT_RLIMIT,
 	    note_procstat_rlimit, p);
 	size += __elfN(register_note)(td, list, NT_PROCSTAT_OSREL,
 	    note_procstat_osrel, p);
 	size += __elfN(register_note)(td, list, NT_PROCSTAT_PSSTRINGS,
 	    __elfN(note_procstat_psstrings), p);
 	size += __elfN(register_note)(td, list, NT_PROCSTAT_AUXV,
 	    __elfN(note_procstat_auxv), p);
 
 	*sizep = size;
 }
 
 void
 __elfN(puthdr)(struct thread *td, void *hdr, size_t hdrsize, int numsegs,
     size_t notesz, int flags)
 {
 	Elf_Ehdr *ehdr;
 	Elf_Phdr *phdr;
 	Elf_Shdr *shdr;
 	struct phdr_closure phc;
 	Elf_Brandinfo *bi;
 
 	ehdr = (Elf_Ehdr *)hdr;
 	bi = td->td_proc->p_elf_brandinfo;
 
 	ehdr->e_ident[EI_MAG0] = ELFMAG0;
 	ehdr->e_ident[EI_MAG1] = ELFMAG1;
 	ehdr->e_ident[EI_MAG2] = ELFMAG2;
 	ehdr->e_ident[EI_MAG3] = ELFMAG3;
 	ehdr->e_ident[EI_CLASS] = ELF_CLASS;
 	ehdr->e_ident[EI_DATA] = ELF_DATA;
 	ehdr->e_ident[EI_VERSION] = EV_CURRENT;
 	ehdr->e_ident[EI_OSABI] = td->td_proc->p_sysent->sv_elf_core_osabi;
 	ehdr->e_ident[EI_ABIVERSION] = 0;
 	ehdr->e_ident[EI_PAD] = 0;
 	ehdr->e_type = ET_CORE;
 	ehdr->e_machine = bi->machine;
 	ehdr->e_version = EV_CURRENT;
 	ehdr->e_entry = 0;
 	ehdr->e_phoff = sizeof(Elf_Ehdr);
 	ehdr->e_flags = td->td_proc->p_elf_flags;
 	ehdr->e_ehsize = sizeof(Elf_Ehdr);
 	ehdr->e_phentsize = sizeof(Elf_Phdr);
 	ehdr->e_shentsize = sizeof(Elf_Shdr);
 	ehdr->e_shstrndx = SHN_UNDEF;
 	if (numsegs + 1 < PN_XNUM) {
 		ehdr->e_phnum = numsegs + 1;
 		ehdr->e_shnum = 0;
 	} else {
 		ehdr->e_phnum = PN_XNUM;
 		ehdr->e_shnum = 1;
 
 		ehdr->e_shoff = ehdr->e_phoff +
 		    (numsegs + 1) * ehdr->e_phentsize;
 		KASSERT(ehdr->e_shoff == hdrsize - sizeof(Elf_Shdr),
 		    ("e_shoff: %zu, hdrsize - shdr: %zu",
 		     (size_t)ehdr->e_shoff, hdrsize - sizeof(Elf_Shdr)));
 
 		shdr = (Elf_Shdr *)((char *)hdr + ehdr->e_shoff);
 		memset(shdr, 0, sizeof(*shdr));
 		/*
 		 * A special first section is used to hold large segment and
 		 * section counts.  This was proposed by Sun Microsystems in
 		 * Solaris and has been adopted by Linux; the standard ELF
 		 * tools are already familiar with the technique.
 		 *
 		 * See table 7-7 of the Solaris "Linker and Libraries Guide"
 		 * (or 12-7 depending on the version of the document) for more
 		 * details.
 		 */
 		shdr->sh_type = SHT_NULL;
 		shdr->sh_size = ehdr->e_shnum;
 		shdr->sh_link = ehdr->e_shstrndx;
 		shdr->sh_info = numsegs + 1;
 	}
 
 	/*
 	 * Fill in the program header entries.
 	 */
 	phdr = (Elf_Phdr *)((char *)hdr + ehdr->e_phoff);
 
 	/* The note segement. */
 	phdr->p_type = PT_NOTE;
 	phdr->p_offset = hdrsize;
 	phdr->p_vaddr = 0;
 	phdr->p_paddr = 0;
 	phdr->p_filesz = notesz;
 	phdr->p_memsz = 0;
 	phdr->p_flags = PF_R;
 	phdr->p_align = ELF_NOTE_ROUNDSIZE;
 	phdr++;
 
 	/* All the writable segments from the program. */
 	phc.phdr = phdr;
 	phc.offset = round_page(hdrsize + notesz);
 	each_dumpable_segment(td, cb_put_phdr, &phc, flags);
 }
 
 static size_t
 __elfN(register_regset_note)(struct thread *td, struct note_info_list *list,
     struct regset *regset, struct thread *target_td)
 {
 	const struct sysentvec *sv;
 	struct note_info *ninfo;
 	size_t size, notesize;
 
 	size = 0;
 	if (!regset->get(regset, target_td, NULL, &size) || size == 0)
 		return (0);
 
 	ninfo = malloc(sizeof(*ninfo), M_TEMP, M_ZERO | M_WAITOK);
 	ninfo->type = regset->note;
 	ninfo->regset = regset;
 	ninfo->outarg = target_td;
 	ninfo->outsize = size;
 	TAILQ_INSERT_TAIL(list, ninfo, link);
 
 	sv = td->td_proc->p_sysent;
 	notesize = sizeof(Elf_Note) +		/* note header */
 	    roundup2(strlen(sv->sv_elf_core_abi_vendor) + 1, ELF_NOTE_ROUNDSIZE) +
 						/* note name */
 	    roundup2(size, ELF_NOTE_ROUNDSIZE);	/* note description */
 
 	return (notesize);
 }
 
 size_t
 __elfN(register_note)(struct thread *td, struct note_info_list *list,
     int type, outfunc_t out, void *arg)
 {
 	const struct sysentvec *sv;
 	struct note_info *ninfo;
 	size_t size, notesize;
 
 	sv = td->td_proc->p_sysent;
 	size = 0;
 	out(arg, NULL, &size);
 	ninfo = malloc(sizeof(*ninfo), M_TEMP, M_ZERO | M_WAITOK);
 	ninfo->type = type;
 	ninfo->outfunc = out;
 	ninfo->outarg = arg;
 	ninfo->outsize = size;
 	TAILQ_INSERT_TAIL(list, ninfo, link);
 
 	if (type == -1)
 		return (size);
 
 	notesize = sizeof(Elf_Note) +		/* note header */
 	    roundup2(strlen(sv->sv_elf_core_abi_vendor) + 1, ELF_NOTE_ROUNDSIZE) +
 						/* note name */
 	    roundup2(size, ELF_NOTE_ROUNDSIZE);	/* note description */
 
 	return (notesize);
 }
 
 static size_t
 append_note_data(const void *src, void *dst, size_t len)
 {
 	size_t padded_len;
 
 	padded_len = roundup2(len, ELF_NOTE_ROUNDSIZE);
 	if (dst != NULL) {
 		bcopy(src, dst, len);
 		bzero((char *)dst + len, padded_len - len);
 	}
 	return (padded_len);
 }
 
 size_t
 __elfN(populate_note)(int type, void *src, void *dst, size_t size, void **descp)
 {
 	Elf_Note *note;
 	char *buf;
 	size_t notesize;
 
 	buf = dst;
 	if (buf != NULL) {
 		note = (Elf_Note *)buf;
 		note->n_namesz = sizeof(FREEBSD_ABI_VENDOR);
 		note->n_descsz = size;
 		note->n_type = type;
 		buf += sizeof(*note);
 		buf += append_note_data(FREEBSD_ABI_VENDOR, buf,
 		    sizeof(FREEBSD_ABI_VENDOR));
 		append_note_data(src, buf, size);
 		if (descp != NULL)
 			*descp = buf;
 	}
 
 	notesize = sizeof(Elf_Note) +		/* note header */
 	    roundup2(sizeof(FREEBSD_ABI_VENDOR), ELF_NOTE_ROUNDSIZE) +
 						/* note name */
 	    roundup2(size, ELF_NOTE_ROUNDSIZE);	/* note description */
 
 	return (notesize);
 }
 
 static void
 __elfN(putnote)(struct thread *td, struct note_info *ninfo, struct sbuf *sb)
 {
 	Elf_Note note;
 	const struct sysentvec *sv;
 	ssize_t old_len, sect_len;
 	size_t new_len, descsz, i;
 
 	if (ninfo->type == -1) {
 		ninfo->outfunc(ninfo->outarg, sb, &ninfo->outsize);
 		return;
 	}
 
 	sv = td->td_proc->p_sysent;
 
 	note.n_namesz = strlen(sv->sv_elf_core_abi_vendor) + 1;
 	note.n_descsz = ninfo->outsize;
 	note.n_type = ninfo->type;
 
 	sbuf_bcat(sb, &note, sizeof(note));
 	sbuf_start_section(sb, &old_len);
 	sbuf_bcat(sb, sv->sv_elf_core_abi_vendor,
 	    strlen(sv->sv_elf_core_abi_vendor) + 1);
 	sbuf_end_section(sb, old_len, ELF_NOTE_ROUNDSIZE, 0);
 	if (note.n_descsz == 0)
 		return;
 	sbuf_start_section(sb, &old_len);
 	if (ninfo->regset != NULL) {
 		struct regset *regset = ninfo->regset;
 		void *buf;
 
 		buf = malloc(ninfo->outsize, M_TEMP, M_ZERO | M_WAITOK);
 		(void)regset->get(regset, ninfo->outarg, buf, &ninfo->outsize);
 		sbuf_bcat(sb, buf, ninfo->outsize);
 		free(buf, M_TEMP);
 	} else
 		ninfo->outfunc(ninfo->outarg, sb, &ninfo->outsize);
 	sect_len = sbuf_end_section(sb, old_len, ELF_NOTE_ROUNDSIZE, 0);
 	if (sect_len < 0)
 		return;
 
 	new_len = (size_t)sect_len;
 	descsz = roundup(note.n_descsz, ELF_NOTE_ROUNDSIZE);
 	if (new_len < descsz) {
 		/*
 		 * It is expected that individual note emitters will correctly
 		 * predict their expected output size and fill up to that size
 		 * themselves, padding in a format-specific way if needed.
 		 * However, in case they don't, just do it here with zeros.
 		 */
 		for (i = 0; i < descsz - new_len; i++)
 			sbuf_putc(sb, 0);
 	} else if (new_len > descsz) {
 		/*
 		 * We can't always truncate sb -- we may have drained some
 		 * of it already.
 		 */
 		KASSERT(new_len == descsz, ("%s: Note type %u changed as we "
 		    "read it (%zu > %zu).  Since it is longer than "
 		    "expected, this coredump's notes are corrupt.  THIS "
 		    "IS A BUG in the note_procstat routine for type %u.\n",
 		    __func__, (unsigned)note.n_type, new_len, descsz,
 		    (unsigned)note.n_type));
 	}
 }
 
 /*
  * Miscellaneous note out functions.
  */
 
 #if defined(COMPAT_FREEBSD32) && __ELF_WORD_SIZE == 32
 #include <compat/freebsd32/freebsd32.h>
 #include <compat/freebsd32/freebsd32_signal.h>
 
 typedef struct prstatus32 elf_prstatus_t;
 typedef struct prpsinfo32 elf_prpsinfo_t;
 typedef struct fpreg32 elf_prfpregset_t;
 typedef struct fpreg32 elf_fpregset_t;
 typedef struct reg32 elf_gregset_t;
 typedef struct thrmisc32 elf_thrmisc_t;
 typedef struct ptrace_lwpinfo32 elf_lwpinfo_t;
 #define ELF_KERN_PROC_MASK	KERN_PROC_MASK32
 typedef struct kinfo_proc32 elf_kinfo_proc_t;
 typedef uint32_t elf_ps_strings_t;
 #else
 typedef prstatus_t elf_prstatus_t;
 typedef prpsinfo_t elf_prpsinfo_t;
 typedef prfpregset_t elf_prfpregset_t;
 typedef prfpregset_t elf_fpregset_t;
 typedef gregset_t elf_gregset_t;
 typedef thrmisc_t elf_thrmisc_t;
 typedef struct ptrace_lwpinfo elf_lwpinfo_t;
 #define ELF_KERN_PROC_MASK	0
 typedef struct kinfo_proc elf_kinfo_proc_t;
 typedef vm_offset_t elf_ps_strings_t;
 #endif
 
 static void
 __elfN(note_prpsinfo)(void *arg, struct sbuf *sb, size_t *sizep)
 {
 	struct sbuf sbarg;
 	size_t len;
 	char *cp, *end;
 	struct proc *p;
 	elf_prpsinfo_t *psinfo;
 	int error;
 
 	p = arg;
 	if (sb != NULL) {
 		KASSERT(*sizep == sizeof(*psinfo), ("invalid size"));
 		psinfo = malloc(sizeof(*psinfo), M_TEMP, M_ZERO | M_WAITOK);
 		psinfo->pr_version = PRPSINFO_VERSION;
 		psinfo->pr_psinfosz = sizeof(elf_prpsinfo_t);
 		strlcpy(psinfo->pr_fname, p->p_comm, sizeof(psinfo->pr_fname));
 		PROC_LOCK(p);
 		if (p->p_args != NULL) {
 			len = sizeof(psinfo->pr_psargs) - 1;
 			if (len > p->p_args->ar_length)
 				len = p->p_args->ar_length;
 			memcpy(psinfo->pr_psargs, p->p_args->ar_args, len);
 			PROC_UNLOCK(p);
 			error = 0;
 		} else {
 			_PHOLD(p);
 			PROC_UNLOCK(p);
 			sbuf_new(&sbarg, psinfo->pr_psargs,
 			    sizeof(psinfo->pr_psargs), SBUF_FIXEDLEN);
 			error = proc_getargv(curthread, p, &sbarg);
 			PRELE(p);
 			if (sbuf_finish(&sbarg) == 0)
 				len = sbuf_len(&sbarg) - 1;
 			else
 				len = sizeof(psinfo->pr_psargs) - 1;
 			sbuf_delete(&sbarg);
 		}
 		if (error || len == 0)
 			strlcpy(psinfo->pr_psargs, p->p_comm,
 			    sizeof(psinfo->pr_psargs));
 		else {
 			KASSERT(len < sizeof(psinfo->pr_psargs),
 			    ("len is too long: %zu vs %zu", len,
 			    sizeof(psinfo->pr_psargs)));
 			cp = psinfo->pr_psargs;
 			end = cp + len - 1;
 			for (;;) {
 				cp = memchr(cp, '\0', end - cp);
 				if (cp == NULL)
 					break;
 				*cp = ' ';
 			}
 		}
 		psinfo->pr_pid = p->p_pid;
 		sbuf_bcat(sb, psinfo, sizeof(*psinfo));
 		free(psinfo, M_TEMP);
 	}
 	*sizep = sizeof(*psinfo);
 }
 
 static bool
 __elfN(get_prstatus)(struct regset *rs, struct thread *td, void *buf,
     size_t *sizep)
 {
 	elf_prstatus_t *status;
 
 	if (buf != NULL) {
 		KASSERT(*sizep == sizeof(*status), ("%s: invalid size",
 		    __func__));
 		status = buf;
 		memset(status, 0, *sizep);
 		status->pr_version = PRSTATUS_VERSION;
 		status->pr_statussz = sizeof(elf_prstatus_t);
 		status->pr_gregsetsz = sizeof(elf_gregset_t);
 		status->pr_fpregsetsz = sizeof(elf_fpregset_t);
 		status->pr_osreldate = osreldate;
 		status->pr_cursig = td->td_proc->p_sig;
 		status->pr_pid = td->td_tid;
 #if defined(COMPAT_FREEBSD32) && __ELF_WORD_SIZE == 32
 		fill_regs32(td, &status->pr_reg);
 #else
 		fill_regs(td, &status->pr_reg);
 #endif
 	}
 	*sizep = sizeof(*status);
 	return (true);
 }
 
 static bool
 __elfN(set_prstatus)(struct regset *rs, struct thread *td, void *buf,
     size_t size)
 {
 	elf_prstatus_t *status;
 
 	KASSERT(size == sizeof(*status), ("%s: invalid size", __func__));
 	status = buf;
 #if defined(COMPAT_FREEBSD32) && __ELF_WORD_SIZE == 32
 	set_regs32(td, &status->pr_reg);
 #else
 	set_regs(td, &status->pr_reg);
 #endif
 	return (true);
 }
 
 static struct regset __elfN(regset_prstatus) = {
 	.note = NT_PRSTATUS,
 	.size = sizeof(elf_prstatus_t),
 	.get = __elfN(get_prstatus),
 	.set = __elfN(set_prstatus),
 };
 ELF_REGSET(__elfN(regset_prstatus));
 
 static bool
 __elfN(get_fpregset)(struct regset *rs, struct thread *td, void *buf,
     size_t *sizep)
 {
 	elf_prfpregset_t *fpregset;
 
 	if (buf != NULL) {
 		KASSERT(*sizep == sizeof(*fpregset), ("%s: invalid size",
 		    __func__));
 		fpregset = buf;
 #if defined(COMPAT_FREEBSD32) && __ELF_WORD_SIZE == 32
 		fill_fpregs32(td, fpregset);
 #else
 		fill_fpregs(td, fpregset);
 #endif
 	}
 	*sizep = sizeof(*fpregset);
 	return (true);
 }
 
 static bool
 __elfN(set_fpregset)(struct regset *rs, struct thread *td, void *buf,
     size_t size)
 {
 	elf_prfpregset_t *fpregset;
 
 	fpregset = buf;
 	KASSERT(size == sizeof(*fpregset), ("%s: invalid size", __func__));
 #if defined(COMPAT_FREEBSD32) && __ELF_WORD_SIZE == 32
 	set_fpregs32(td, fpregset);
 #else
 	set_fpregs(td, fpregset);
 #endif
 	return (true);
 }
 
 static struct regset __elfN(regset_fpregset) = {
 	.note = NT_FPREGSET,
 	.size = sizeof(elf_prfpregset_t),
 	.get = __elfN(get_fpregset),
 	.set = __elfN(set_fpregset),
 };
 ELF_REGSET(__elfN(regset_fpregset));
 
 static bool
 __elfN(get_thrmisc)(struct regset *rs, struct thread *td, void *buf,
     size_t *sizep)
 {
 	elf_thrmisc_t *thrmisc;
 
 	if (buf != NULL) {
 		KASSERT(*sizep == sizeof(*thrmisc),
 		    ("%s: invalid size", __func__));
 		thrmisc = buf;
 		bzero(thrmisc, sizeof(*thrmisc));
 		strcpy(thrmisc->pr_tname, td->td_name);
 	}
 	*sizep = sizeof(*thrmisc);
 	return (true);
 }
 
 static struct regset __elfN(regset_thrmisc) = {
 	.note = NT_THRMISC,
 	.size = sizeof(elf_thrmisc_t),
 	.get = __elfN(get_thrmisc),
 };
 ELF_REGSET(__elfN(regset_thrmisc));
 
 static bool
 __elfN(get_lwpinfo)(struct regset *rs, struct thread *td, void *buf,
     size_t *sizep)
 {
 	elf_lwpinfo_t pl;
 	size_t size;
 	int structsize;
 
 	size = sizeof(structsize) + sizeof(pl);
 	if (buf != NULL) {
 		KASSERT(*sizep == size, ("%s: invalid size", __func__));
 		structsize = sizeof(pl);
 		memcpy(buf, &structsize, sizeof(structsize));
 		bzero(&pl, sizeof(pl));
 		pl.pl_lwpid = td->td_tid;
 		pl.pl_event = PL_EVENT_NONE;
 		pl.pl_sigmask = td->td_sigmask;
 		pl.pl_siglist = td->td_siglist;
 		if (td->td_si.si_signo != 0) {
 			pl.pl_event = PL_EVENT_SIGNAL;
 			pl.pl_flags |= PL_FLAG_SI;
 #if defined(COMPAT_FREEBSD32) && __ELF_WORD_SIZE == 32
 			siginfo_to_siginfo32(&td->td_si, &pl.pl_siginfo);
 #else
 			pl.pl_siginfo = td->td_si;
 #endif
 		}
 		strcpy(pl.pl_tdname, td->td_name);
 		/* XXX TODO: supply more information in struct ptrace_lwpinfo*/
 		memcpy((int *)buf + 1, &pl, sizeof(pl));
 	}
 	*sizep = size;
 	return (true);
 }
 
 static struct regset __elfN(regset_lwpinfo) = {
 	.note = NT_PTLWPINFO,
 	.size = sizeof(int) + sizeof(elf_lwpinfo_t),
 	.get = __elfN(get_lwpinfo),
 };
 ELF_REGSET(__elfN(regset_lwpinfo));
 
 static size_t
 __elfN(prepare_register_notes)(struct thread *td, struct note_info_list *list,
     struct thread *target_td)
 {
 	struct sysentvec *sv = td->td_proc->p_sysent;
 	struct regset **regsetp, **regset_end, *regset;
 	size_t size;
 
 	size = 0;
 
 	/* NT_PRSTATUS must be the first register set note. */
 	size += __elfN(register_regset_note)(td, list, &__elfN(regset_prstatus),
 	    target_td);
 
 	regsetp = sv->sv_regset_begin;
 	if (regsetp == NULL) {
 		/* XXX: This shouldn't be true for any FreeBSD ABIs. */
 		size += __elfN(register_regset_note)(td, list,
 		    &__elfN(regset_fpregset), target_td);
 		return (size);
 	}
 	regset_end = sv->sv_regset_end;
 	MPASS(regset_end != NULL);
 	for (; regsetp < regset_end; regsetp++) {
 		regset = *regsetp;
 		if (regset->note == NT_PRSTATUS)
 			continue;
 		size += __elfN(register_regset_note)(td, list, regset,
 		    target_td);
 	}
 	return (size);
 }
 
 /*
  * Allow for MD specific notes, as well as any MD
  * specific preparations for writing MI notes.
  */
 static void
 __elfN(note_threadmd)(void *arg, struct sbuf *sb, size_t *sizep)
 {
 	struct thread *td;
 	void *buf;
 	size_t size;
 
 	td = (struct thread *)arg;
 	size = *sizep;
 	if (size != 0 && sb != NULL)
 		buf = malloc(size, M_TEMP, M_ZERO | M_WAITOK);
 	else
 		buf = NULL;
 	size = 0;
 	__elfN(dump_thread)(td, buf, &size);
 	KASSERT(sb == NULL || *sizep == size, ("invalid size"));
 	if (size != 0 && sb != NULL)
 		sbuf_bcat(sb, buf, size);
 	free(buf, M_TEMP);
 	*sizep = size;
 }
 
 #ifdef KINFO_PROC_SIZE
 CTASSERT(sizeof(struct kinfo_proc) == KINFO_PROC_SIZE);
 #endif
 
 static void
 __elfN(note_procstat_proc)(void *arg, struct sbuf *sb, size_t *sizep)
 {
 	struct proc *p;
 	size_t size;
 	int structsize;
 
 	p = arg;
 	size = sizeof(structsize) + p->p_numthreads *
 	    sizeof(elf_kinfo_proc_t);
 
 	if (sb != NULL) {
 		KASSERT(*sizep == size, ("invalid size"));
 		structsize = sizeof(elf_kinfo_proc_t);
 		sbuf_bcat(sb, &structsize, sizeof(structsize));
 		sx_slock(&proctree_lock);
 		PROC_LOCK(p);
 		kern_proc_out(p, sb, ELF_KERN_PROC_MASK);
 		sx_sunlock(&proctree_lock);
 	}
 	*sizep = size;
 }
 
 #ifdef KINFO_FILE_SIZE
 CTASSERT(sizeof(struct kinfo_file) == KINFO_FILE_SIZE);
 #endif
 
 static void
 note_procstat_files(void *arg, struct sbuf *sb, size_t *sizep)
 {
 	struct proc *p;
 	size_t size, sect_sz, i;
 	ssize_t start_len, sect_len;
 	int structsize, filedesc_flags;
 
 	if (coredump_pack_fileinfo)
 		filedesc_flags = KERN_FILEDESC_PACK_KINFO;
 	else
 		filedesc_flags = 0;
 
 	p = arg;
 	structsize = sizeof(struct kinfo_file);
 	if (sb == NULL) {
 		size = 0;
 		sb = sbuf_new(NULL, NULL, 128, SBUF_FIXEDLEN);
 		sbuf_set_drain(sb, sbuf_count_drain, &size);
 		sbuf_bcat(sb, &structsize, sizeof(structsize));
 		PROC_LOCK(p);
 		kern_proc_filedesc_out(p, sb, -1, filedesc_flags);
 		sbuf_finish(sb);
 		sbuf_delete(sb);
 		*sizep = size;
 	} else {
 		sbuf_start_section(sb, &start_len);
 
 		sbuf_bcat(sb, &structsize, sizeof(structsize));
 		PROC_LOCK(p);
 		kern_proc_filedesc_out(p, sb, *sizep - sizeof(structsize),
 		    filedesc_flags);
 
 		sect_len = sbuf_end_section(sb, start_len, 0, 0);
 		if (sect_len < 0)
 			return;
 		sect_sz = sect_len;
 
 		KASSERT(sect_sz <= *sizep,
 		    ("kern_proc_filedesc_out did not respect maxlen; "
 		     "requested %zu, got %zu", *sizep - sizeof(structsize),
 		     sect_sz - sizeof(structsize)));
 
 		for (i = 0; i < *sizep - sect_sz && sb->s_error == 0; i++)
 			sbuf_putc(sb, 0);
 	}
 }
 
 #ifdef KINFO_VMENTRY_SIZE
 CTASSERT(sizeof(struct kinfo_vmentry) == KINFO_VMENTRY_SIZE);
 #endif
 
 static void
 note_procstat_vmmap(void *arg, struct sbuf *sb, size_t *sizep)
 {
 	struct proc *p;
 	size_t size;
 	int structsize, vmmap_flags;
 
 	if (coredump_pack_vmmapinfo)
 		vmmap_flags = KERN_VMMAP_PACK_KINFO;
 	else
 		vmmap_flags = 0;
 
 	p = arg;
 	structsize = sizeof(struct kinfo_vmentry);
 	if (sb == NULL) {
 		size = 0;
 		sb = sbuf_new(NULL, NULL, 128, SBUF_FIXEDLEN);
 		sbuf_set_drain(sb, sbuf_count_drain, &size);
 		sbuf_bcat(sb, &structsize, sizeof(structsize));
 		PROC_LOCK(p);
 		kern_proc_vmmap_out(p, sb, -1, vmmap_flags);
 		sbuf_finish(sb);
 		sbuf_delete(sb);
 		*sizep = size;
 	} else {
 		sbuf_bcat(sb, &structsize, sizeof(structsize));
 		PROC_LOCK(p);
 		kern_proc_vmmap_out(p, sb, *sizep - sizeof(structsize),
 		    vmmap_flags);
 	}
 }
 
 static void
 note_procstat_groups(void *arg, struct sbuf *sb, size_t *sizep)
 {
 	struct proc *p;
 	size_t size;
 	int structsize;
 
 	p = arg;
 	size = sizeof(structsize) + p->p_ucred->cr_ngroups * sizeof(gid_t);
 	if (sb != NULL) {
 		KASSERT(*sizep == size, ("invalid size"));
 		structsize = sizeof(gid_t);
 		sbuf_bcat(sb, &structsize, sizeof(structsize));
 		sbuf_bcat(sb, p->p_ucred->cr_groups, p->p_ucred->cr_ngroups *
 		    sizeof(gid_t));
 	}
 	*sizep = size;
 }
 
 static void
 note_procstat_umask(void *arg, struct sbuf *sb, size_t *sizep)
 {
 	struct proc *p;
 	size_t size;
 	int structsize;
 
 	p = arg;
 	size = sizeof(structsize) + sizeof(p->p_pd->pd_cmask);
 	if (sb != NULL) {
 		KASSERT(*sizep == size, ("invalid size"));
 		structsize = sizeof(p->p_pd->pd_cmask);
 		sbuf_bcat(sb, &structsize, sizeof(structsize));
 		sbuf_bcat(sb, &p->p_pd->pd_cmask, sizeof(p->p_pd->pd_cmask));
 	}
 	*sizep = size;
 }
 
 static void
 note_procstat_rlimit(void *arg, struct sbuf *sb, size_t *sizep)
 {
 	struct proc *p;
 	struct rlimit rlim[RLIM_NLIMITS];
 	size_t size;
 	int structsize, i;
 
 	p = arg;
 	size = sizeof(structsize) + sizeof(rlim);
 	if (sb != NULL) {
 		KASSERT(*sizep == size, ("invalid size"));
 		structsize = sizeof(rlim);
 		sbuf_bcat(sb, &structsize, sizeof(structsize));
 		PROC_LOCK(p);
 		for (i = 0; i < RLIM_NLIMITS; i++)
 			lim_rlimit_proc(p, i, &rlim[i]);
 		PROC_UNLOCK(p);
 		sbuf_bcat(sb, rlim, sizeof(rlim));
 	}
 	*sizep = size;
 }
 
 static void
 note_procstat_osrel(void *arg, struct sbuf *sb, size_t *sizep)
 {
 	struct proc *p;
 	size_t size;
 	int structsize;
 
 	p = arg;
 	size = sizeof(structsize) + sizeof(p->p_osrel);
 	if (sb != NULL) {
 		KASSERT(*sizep == size, ("invalid size"));
 		structsize = sizeof(p->p_osrel);
 		sbuf_bcat(sb, &structsize, sizeof(structsize));
 		sbuf_bcat(sb, &p->p_osrel, sizeof(p->p_osrel));
 	}
 	*sizep = size;
 }
 
 static void
 __elfN(note_procstat_psstrings)(void *arg, struct sbuf *sb, size_t *sizep)
 {
 	struct proc *p;
 	elf_ps_strings_t ps_strings;
 	size_t size;
 	int structsize;
 
 	p = arg;
 	size = sizeof(structsize) + sizeof(ps_strings);
 	if (sb != NULL) {
 		KASSERT(*sizep == size, ("invalid size"));
 		structsize = sizeof(ps_strings);
 #if defined(COMPAT_FREEBSD32) && __ELF_WORD_SIZE == 32
 		ps_strings = PTROUT(PROC_PS_STRINGS(p));
 #else
 		ps_strings = PROC_PS_STRINGS(p);
 #endif
 		sbuf_bcat(sb, &structsize, sizeof(structsize));
 		sbuf_bcat(sb, &ps_strings, sizeof(ps_strings));
 	}
 	*sizep = size;
 }
 
 static void
 __elfN(note_procstat_auxv)(void *arg, struct sbuf *sb, size_t *sizep)
 {
 	struct proc *p;
 	size_t size;
 	int structsize;
 
 	p = arg;
 	if (sb == NULL) {
 		size = 0;
 		sb = sbuf_new(NULL, NULL, AT_COUNT * sizeof(Elf_Auxinfo),
 		    SBUF_FIXEDLEN);
 		sbuf_set_drain(sb, sbuf_count_drain, &size);
 		sbuf_bcat(sb, &structsize, sizeof(structsize));
 		PHOLD(p);
 		proc_getauxv(curthread, p, sb);
 		PRELE(p);
 		sbuf_finish(sb);
 		sbuf_delete(sb);
 		*sizep = size;
 	} else {
 		structsize = sizeof(Elf_Auxinfo);
 		sbuf_bcat(sb, &structsize, sizeof(structsize));
 		PHOLD(p);
 		proc_getauxv(curthread, p, sb);
 		PRELE(p);
 	}
 }
 
 static bool
 __elfN(parse_notes)(struct image_params *imgp, Elf_Note *checknote,
     const char *note_vendor, const Elf_Phdr *pnote,
     bool (*cb)(const Elf_Note *, void *, bool *), void *cb_arg)
 {
 	const Elf_Note *note, *note0, *note_end;
 	const char *note_name;
 	char *buf;
 	int i, error;
 	bool res;
 
 	/* We need some limit, might as well use PAGE_SIZE. */
 	if (pnote == NULL || pnote->p_filesz > PAGE_SIZE)
 		return (false);
 	ASSERT_VOP_LOCKED(imgp->vp, "parse_notes");
 	if (pnote->p_offset > PAGE_SIZE ||
 	    pnote->p_filesz > PAGE_SIZE - pnote->p_offset) {
 		buf = malloc(pnote->p_filesz, M_TEMP, M_NOWAIT);
 		if (buf == NULL) {
 			VOP_UNLOCK(imgp->vp);
 			buf = malloc(pnote->p_filesz, M_TEMP, M_WAITOK);
 			vn_lock(imgp->vp, LK_SHARED | LK_RETRY);
 		}
 		error = vn_rdwr(UIO_READ, imgp->vp, buf, pnote->p_filesz,
 		    pnote->p_offset, UIO_SYSSPACE, IO_NODELOCKED,
 		    curthread->td_ucred, NOCRED, NULL, curthread);
 		if (error != 0) {
 			uprintf("i/o error PT_NOTE\n");
 			goto retf;
 		}
 		note = note0 = (const Elf_Note *)buf;
 		note_end = (const Elf_Note *)(buf + pnote->p_filesz);
 	} else {
 		note = note0 = (const Elf_Note *)(imgp->image_header +
 		    pnote->p_offset);
 		note_end = (const Elf_Note *)(imgp->image_header +
 		    pnote->p_offset + pnote->p_filesz);
 		buf = NULL;
 	}
 	for (i = 0; i < 100 && note >= note0 && note < note_end; i++) {
 		if (!aligned(note, Elf32_Addr) || (const char *)note_end -
 		    (const char *)note < sizeof(Elf_Note)) {
 			goto retf;
 		}
 		if (note->n_namesz != checknote->n_namesz ||
 		    note->n_descsz != checknote->n_descsz ||
 		    note->n_type != checknote->n_type)
 			goto nextnote;
 		note_name = (const char *)(note + 1);
 		if (note_name + checknote->n_namesz >=
 		    (const char *)note_end || strncmp(note_vendor,
 		    note_name, checknote->n_namesz) != 0)
 			goto nextnote;
 
 		if (cb(note, cb_arg, &res))
 			goto ret;
 nextnote:
 		note = (const Elf_Note *)((const char *)(note + 1) +
 		    roundup2(note->n_namesz, ELF_NOTE_ROUNDSIZE) +
 		    roundup2(note->n_descsz, ELF_NOTE_ROUNDSIZE));
 	}
 retf:
 	res = false;
 ret:
 	free(buf, M_TEMP);
 	return (res);
 }
 
 struct brandnote_cb_arg {
 	Elf_Brandnote *brandnote;
 	int32_t *osrel;
 };
 
 static bool
 brandnote_cb(const Elf_Note *note, void *arg0, bool *res)
 {
 	struct brandnote_cb_arg *arg;
 
 	arg = arg0;
 
 	/*
 	 * Fetch the osreldate for binary from the ELF OSABI-note if
 	 * necessary.
 	 */
 	*res = (arg->brandnote->flags & BN_TRANSLATE_OSREL) != 0 &&
 	    arg->brandnote->trans_osrel != NULL ?
 	    arg->brandnote->trans_osrel(note, arg->osrel) : true;
 
 	return (true);
 }
 
 static Elf_Note fctl_note = {
 	.n_namesz = sizeof(FREEBSD_ABI_VENDOR),
 	.n_descsz = sizeof(uint32_t),
 	.n_type = NT_FREEBSD_FEATURE_CTL,
 };
 
 struct fctl_cb_arg {
 	bool *has_fctl0;
 	uint32_t *fctl0;
 };
 
 static bool
 note_fctl_cb(const Elf_Note *note, void *arg0, bool *res)
 {
 	struct fctl_cb_arg *arg;
 	const Elf32_Word *desc;
 	uintptr_t p;
 
 	arg = arg0;
 	p = (uintptr_t)(note + 1);
 	p += roundup2(note->n_namesz, ELF_NOTE_ROUNDSIZE);
 	desc = (const Elf32_Word *)p;
 	*arg->has_fctl0 = true;
 	*arg->fctl0 = desc[0];
 	*res = true;
 	return (true);
 }
 
 /*
  * Try to find the appropriate ABI-note section for checknote, fetch
  * the osreldate and feature control flags for binary from the ELF
  * OSABI-note.  Only the first page of the image is searched, the same
  * as for headers.
  */
 static bool
 __elfN(check_note)(struct image_params *imgp, Elf_Brandnote *brandnote,
     int32_t *osrel, bool *has_fctl0, uint32_t *fctl0)
 {
 	const Elf_Phdr *phdr;
 	const Elf_Ehdr *hdr;
 	struct brandnote_cb_arg b_arg;
 	struct fctl_cb_arg f_arg;
 	int i, j;
 
 	hdr = (const Elf_Ehdr *)imgp->image_header;
 	phdr = (const Elf_Phdr *)(imgp->image_header + hdr->e_phoff);
 	b_arg.brandnote = brandnote;
 	b_arg.osrel = osrel;
 	f_arg.has_fctl0 = has_fctl0;
 	f_arg.fctl0 = fctl0;
 
 	for (i = 0; i < hdr->e_phnum; i++) {
 		if (phdr[i].p_type == PT_NOTE && __elfN(parse_notes)(imgp,
 		    &brandnote->hdr, brandnote->vendor, &phdr[i], brandnote_cb,
 		    &b_arg)) {
 			for (j = 0; j < hdr->e_phnum; j++) {
 				if (phdr[j].p_type == PT_NOTE &&
 				    __elfN(parse_notes)(imgp, &fctl_note,
 				    FREEBSD_ABI_VENDOR, &phdr[j],
 				    note_fctl_cb, &f_arg))
 					break;
 			}
 			return (true);
 		}
 	}
 	return (false);
 
 }
 
 /*
  * Tell kern_execve.c about it, with a little help from the linker.
  */
 static struct execsw __elfN(execsw) = {
 	.ex_imgact = __CONCAT(exec_, __elfN(imgact)),
 	.ex_name = __XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE))
 };
 EXEC_SET(__CONCAT(elf, __ELF_WORD_SIZE), __elfN(execsw));
 
 static vm_prot_t
 __elfN(trans_prot)(Elf_Word flags)
 {
 	vm_prot_t prot;
 
 	prot = 0;
 	if (flags & PF_X)
 		prot |= VM_PROT_EXECUTE;
 	if (flags & PF_W)
 		prot |= VM_PROT_WRITE;
 	if (flags & PF_R)
 		prot |= VM_PROT_READ;
 #if __ELF_WORD_SIZE == 32 && (defined(__amd64__) || defined(__i386__))
 	if (i386_read_exec && (flags & PF_R))
 		prot |= VM_PROT_EXECUTE;
 #endif
 	return (prot);
 }
 
 static Elf_Word
 __elfN(untrans_prot)(vm_prot_t prot)
 {
 	Elf_Word flags;
 
 	flags = 0;
 	if (prot & VM_PROT_EXECUTE)
 		flags |= PF_X;
 	if (prot & VM_PROT_READ)
 		flags |= PF_R;
 	if (prot & VM_PROT_WRITE)
 		flags |= PF_W;
 	return (flags);
 }
diff --git a/sys/kern/kern_exec.c b/sys/kern/kern_exec.c
index 5951883cdc62..2d46bc018173 100644
--- a/sys/kern/kern_exec.c
+++ b/sys/kern/kern_exec.c
@@ -1,2055 +1,2054 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 1993, David Greenman
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_capsicum.h"
 #include "opt_hwpmc_hooks.h"
 #include "opt_ktrace.h"
 #include "opt_vm.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/acct.h>
 #include <sys/asan.h>
 #include <sys/capsicum.h>
 #include <sys/compressor.h>
 #include <sys/eventhandler.h>
 #include <sys/exec.h>
 #include <sys/fcntl.h>
 #include <sys/filedesc.h>
 #include <sys/imgact.h>
 #include <sys/imgact_elf.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mman.h>
 #include <sys/mount.h>
 #include <sys/mutex.h>
 #include <sys/namei.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/ptrace.h>
 #include <sys/reg.h>
 #include <sys/resourcevar.h>
 #include <sys/rwlock.h>
 #include <sys/sched.h>
 #include <sys/sdt.h>
 #include <sys/sf_buf.h>
 #include <sys/shm.h>
 #include <sys/signalvar.h>
 #include <sys/smp.h>
 #include <sys/stat.h>
 #include <sys/syscallsubr.h>
 #include <sys/sysctl.h>
 #include <sys/sysent.h>
 #include <sys/sysproto.h>
 #include <sys/timers.h>
 #include <sys/umtxvar.h>
 #include <sys/vnode.h>
 #include <sys/wait.h>
 #ifdef KTRACE
 #include <sys/ktrace.h>
 #endif
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/pmap.h>
 #include <vm/vm_page.h>
 #include <vm/vm_map.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_object.h>
 #include <vm/vm_pager.h>
 
 #ifdef	HWPMC_HOOKS
 #include <sys/pmckern.h>
 #endif
 
 #include <security/audit/audit.h>
 #include <security/mac/mac_framework.h>
 
 #ifdef KDTRACE_HOOKS
 #include <sys/dtrace_bsd.h>
 dtrace_execexit_func_t	dtrace_fasttrap_exec;
 #endif
 
 SDT_PROVIDER_DECLARE(proc);
 SDT_PROBE_DEFINE1(proc, , , exec, "char *");
 SDT_PROBE_DEFINE1(proc, , , exec__failure, "int");
 SDT_PROBE_DEFINE1(proc, , , exec__success, "char *");
 
 MALLOC_DEFINE(M_PARGS, "proc-args", "Process arguments");
 
 int coredump_pack_fileinfo = 1;
 SYSCTL_INT(_kern, OID_AUTO, coredump_pack_fileinfo, CTLFLAG_RWTUN,
     &coredump_pack_fileinfo, 0,
     "Enable file path packing in 'procstat -f' coredump notes");
 
 int coredump_pack_vmmapinfo = 1;
 SYSCTL_INT(_kern, OID_AUTO, coredump_pack_vmmapinfo, CTLFLAG_RWTUN,
     &coredump_pack_vmmapinfo, 0,
     "Enable file path packing in 'procstat -v' coredump notes");
 
 static int sysctl_kern_ps_strings(SYSCTL_HANDLER_ARGS);
 static int sysctl_kern_usrstack(SYSCTL_HANDLER_ARGS);
 static int sysctl_kern_stackprot(SYSCTL_HANDLER_ARGS);
 static int do_execve(struct thread *td, struct image_args *args,
     struct mac *mac_p, struct vmspace *oldvmspace);
 
 /* XXX This should be vm_size_t. */
 SYSCTL_PROC(_kern, KERN_PS_STRINGS, ps_strings, CTLTYPE_ULONG|CTLFLAG_RD|
     CTLFLAG_CAPRD|CTLFLAG_MPSAFE, NULL, 0, sysctl_kern_ps_strings, "LU",
     "Location of process' ps_strings structure");
 
 /* XXX This should be vm_size_t. */
 SYSCTL_PROC(_kern, KERN_USRSTACK, usrstack, CTLTYPE_ULONG|CTLFLAG_RD|
     CTLFLAG_CAPRD|CTLFLAG_MPSAFE, NULL, 0, sysctl_kern_usrstack, "LU",
     "Top of process stack");
 
 SYSCTL_PROC(_kern, OID_AUTO, stackprot, CTLTYPE_INT|CTLFLAG_RD|CTLFLAG_MPSAFE,
     NULL, 0, sysctl_kern_stackprot, "I",
     "Stack memory permissions");
 
 u_long ps_arg_cache_limit = PAGE_SIZE / 16;
 SYSCTL_ULONG(_kern, OID_AUTO, ps_arg_cache_limit, CTLFLAG_RW, 
     &ps_arg_cache_limit, 0,
     "Process' command line characters cache limit");
 
 static int disallow_high_osrel;
 SYSCTL_INT(_kern, OID_AUTO, disallow_high_osrel, CTLFLAG_RW,
     &disallow_high_osrel, 0,
     "Disallow execution of binaries built for higher version of the world");
 
 static int map_at_zero = 0;
 SYSCTL_INT(_security_bsd, OID_AUTO, map_at_zero, CTLFLAG_RWTUN, &map_at_zero, 0,
     "Permit processes to map an object at virtual address 0.");
 
 static int core_dump_can_intr = 1;
 SYSCTL_INT(_kern, OID_AUTO, core_dump_can_intr, CTLFLAG_RWTUN,
     &core_dump_can_intr, 0,
     "Core dumping interruptible with SIGKILL");
 
 static int
 sysctl_kern_ps_strings(SYSCTL_HANDLER_ARGS)
 {
 	struct proc *p;
 	vm_offset_t ps_strings;
 
 	p = curproc;
 #ifdef SCTL_MASK32
 	if (req->flags & SCTL_MASK32) {
 		unsigned int val;
 		val = (unsigned int)PROC_PS_STRINGS(p);
 		return (SYSCTL_OUT(req, &val, sizeof(val)));
 	}
 #endif
 	ps_strings = PROC_PS_STRINGS(p);
 	return (SYSCTL_OUT(req, &ps_strings, sizeof(ps_strings)));
 }
 
 static int
 sysctl_kern_usrstack(SYSCTL_HANDLER_ARGS)
 {
 	struct proc *p;
 	vm_offset_t val;
 
 	p = curproc;
 #ifdef SCTL_MASK32
 	if (req->flags & SCTL_MASK32) {
 		unsigned int val32;
 
 		val32 = round_page((unsigned int)p->p_vmspace->vm_stacktop);
 		return (SYSCTL_OUT(req, &val32, sizeof(val32)));
 	}
 #endif
 	val = round_page(p->p_vmspace->vm_stacktop);
 	return (SYSCTL_OUT(req, &val, sizeof(val)));
 }
 
 static int
 sysctl_kern_stackprot(SYSCTL_HANDLER_ARGS)
 {
 	struct proc *p;
 
 	p = curproc;
 	return (SYSCTL_OUT(req, &p->p_sysent->sv_stackprot,
 	    sizeof(p->p_sysent->sv_stackprot)));
 }
 
 /*
  * Each of the items is a pointer to a `const struct execsw', hence the
  * double pointer here.
  */
 static const struct execsw **execsw;
 
 #ifndef _SYS_SYSPROTO_H_
 struct execve_args {
 	char    *fname;
 	char    **argv;
 	char    **envv;
 };
 #endif
 
 int
 sys_execve(struct thread *td, struct execve_args *uap)
 {
 	struct image_args args;
 	struct vmspace *oldvmspace;
 	int error;
 
 	error = pre_execve(td, &oldvmspace);
 	if (error != 0)
 		return (error);
 	error = exec_copyin_args(&args, uap->fname, UIO_USERSPACE,
 	    uap->argv, uap->envv);
 	if (error == 0)
 		error = kern_execve(td, &args, NULL, oldvmspace);
 	post_execve(td, error, oldvmspace);
 	AUDIT_SYSCALL_EXIT(error == EJUSTRETURN ? 0 : error, td);
 	return (error);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct fexecve_args {
 	int	fd;
 	char	**argv;
 	char	**envv;
 };
 #endif
 int
 sys_fexecve(struct thread *td, struct fexecve_args *uap)
 {
 	struct image_args args;
 	struct vmspace *oldvmspace;
 	int error;
 
 	error = pre_execve(td, &oldvmspace);
 	if (error != 0)
 		return (error);
 	error = exec_copyin_args(&args, NULL, UIO_SYSSPACE,
 	    uap->argv, uap->envv);
 	if (error == 0) {
 		args.fd = uap->fd;
 		error = kern_execve(td, &args, NULL, oldvmspace);
 	}
 	post_execve(td, error, oldvmspace);
 	AUDIT_SYSCALL_EXIT(error == EJUSTRETURN ? 0 : error, td);
 	return (error);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct __mac_execve_args {
 	char	*fname;
 	char	**argv;
 	char	**envv;
 	struct mac	*mac_p;
 };
 #endif
 
 int
 sys___mac_execve(struct thread *td, struct __mac_execve_args *uap)
 {
 #ifdef MAC
 	struct image_args args;
 	struct vmspace *oldvmspace;
 	int error;
 
 	error = pre_execve(td, &oldvmspace);
 	if (error != 0)
 		return (error);
 	error = exec_copyin_args(&args, uap->fname, UIO_USERSPACE,
 	    uap->argv, uap->envv);
 	if (error == 0)
 		error = kern_execve(td, &args, uap->mac_p, oldvmspace);
 	post_execve(td, error, oldvmspace);
 	AUDIT_SYSCALL_EXIT(error == EJUSTRETURN ? 0 : error, td);
 	return (error);
 #else
 	return (ENOSYS);
 #endif
 }
 
 int
 pre_execve(struct thread *td, struct vmspace **oldvmspace)
 {
 	struct proc *p;
 	int error;
 
 	KASSERT(td == curthread, ("non-current thread %p", td));
 	error = 0;
 	p = td->td_proc;
 	if ((p->p_flag & P_HADTHREADS) != 0) {
 		PROC_LOCK(p);
 		while (p->p_singlethr > 0) {
 			error = msleep(&p->p_singlethr, &p->p_mtx,
 			    PWAIT | PCATCH, "exec1t", 0);
 			if (error != 0) {
 				error = ERESTART;
 				goto unlock;
 			}
 		}
 		if (thread_single(p, SINGLE_BOUNDARY) != 0)
 			error = ERESTART;
 unlock:
 		PROC_UNLOCK(p);
 	}
 	KASSERT(error != 0 || (td->td_pflags & TDP_EXECVMSPC) == 0,
 	    ("nested execve"));
 	*oldvmspace = p->p_vmspace;
 	return (error);
 }
 
 void
 post_execve(struct thread *td, int error, struct vmspace *oldvmspace)
 {
 	struct proc *p;
 
 	KASSERT(td == curthread, ("non-current thread %p", td));
 	p = td->td_proc;
 	if ((p->p_flag & P_HADTHREADS) != 0) {
 		PROC_LOCK(p);
 		/*
 		 * If success, we upgrade to SINGLE_EXIT state to
 		 * force other threads to suicide.
 		 */
 		if (error == EJUSTRETURN)
 			thread_single(p, SINGLE_EXIT);
 		else
 			thread_single_end(p, SINGLE_BOUNDARY);
 		PROC_UNLOCK(p);
 	}
 	exec_cleanup(td, oldvmspace);
 }
 
 /*
  * kern_execve() has the astonishing property of not always returning to
  * the caller.  If sufficiently bad things happen during the call to
  * do_execve(), it can end up calling exit1(); as a result, callers must
  * avoid doing anything which they might need to undo (e.g., allocating
  * memory).
  */
 int
 kern_execve(struct thread *td, struct image_args *args, struct mac *mac_p,
     struct vmspace *oldvmspace)
 {
 
 	TSEXEC(td->td_proc->p_pid, args->begin_argv);
 	AUDIT_ARG_ARGV(args->begin_argv, args->argc,
 	    exec_args_get_begin_envv(args) - args->begin_argv);
 	AUDIT_ARG_ENVV(exec_args_get_begin_envv(args), args->envc,
 	    args->endp - exec_args_get_begin_envv(args));
 
 	/* Must have at least one argument. */
 	if (args->argc == 0) {
 		exec_free_args(args);
 		return (EINVAL);
 	}
 	return (do_execve(td, args, mac_p, oldvmspace));
 }
 
 static void
 execve_nosetid(struct image_params *imgp)
 {
 	imgp->credential_setid = false;
 	if (imgp->newcred != NULL) {
 		crfree(imgp->newcred);
 		imgp->newcred = NULL;
 	}
 }
 
 /*
  * In-kernel implementation of execve().  All arguments are assumed to be
  * userspace pointers from the passed thread.
  */
 static int
 do_execve(struct thread *td, struct image_args *args, struct mac *mac_p,
     struct vmspace *oldvmspace)
 {
 	struct proc *p = td->td_proc;
 	struct nameidata nd;
 	struct ucred *oldcred;
 	struct uidinfo *euip = NULL;
 	uintptr_t stack_base;
 	struct image_params image_params, *imgp;
 	struct vattr attr;
 	int (*img_first)(struct image_params *);
 	struct pargs *oldargs = NULL, *newargs = NULL;
 	struct sigacts *oldsigacts = NULL, *newsigacts = NULL;
 #ifdef KTRACE
 	struct ktr_io_params *kiop;
 #endif
 	struct vnode *oldtextvp, *newtextvp;
 	struct vnode *oldtextdvp, *newtextdvp;
 	char *oldbinname, *newbinname;
 	bool credential_changing;
 #ifdef MAC
 	struct label *interpvplabel = NULL;
 	bool will_transition;
 #endif
 #ifdef HWPMC_HOOKS
 	struct pmckern_procexec pe;
 #endif
 	int error, i, orig_osrel;
 	uint32_t orig_fctl0;
 	Elf_Brandinfo *orig_brandinfo;
 	size_t freepath_size;
 	static const char fexecv_proc_title[] = "(fexecv)";
 
 	imgp = &image_params;
 	oldtextvp = oldtextdvp = NULL;
 	newtextvp = newtextdvp = NULL;
 	newbinname = oldbinname = NULL;
 #ifdef KTRACE
 	kiop = NULL;
 #endif
 
 	/*
 	 * Lock the process and set the P_INEXEC flag to indicate that
 	 * it should be left alone until we're done here.  This is
 	 * necessary to avoid race conditions - e.g. in ptrace() -
 	 * that might allow a local user to illicitly obtain elevated
 	 * privileges.
 	 */
 	PROC_LOCK(p);
 	KASSERT((p->p_flag & P_INEXEC) == 0,
 	    ("%s(): process already has P_INEXEC flag", __func__));
 	p->p_flag |= P_INEXEC;
 	PROC_UNLOCK(p);
 
 	/*
 	 * Initialize part of the common data
 	 */
 	bzero(imgp, sizeof(*imgp));
 	imgp->proc = p;
 	imgp->attr = &attr;
 	imgp->args = args;
 	oldcred = p->p_ucred;
 	orig_osrel = p->p_osrel;
 	orig_fctl0 = p->p_fctl0;
 	orig_brandinfo = p->p_elf_brandinfo;
 
 #ifdef MAC
 	error = mac_execve_enter(imgp, mac_p);
 	if (error)
 		goto exec_fail;
 #endif
 
 	SDT_PROBE1(proc, , , exec, args->fname);
 
 interpret:
 	if (args->fname != NULL) {
 #ifdef CAPABILITY_MODE
 		/*
 		 * While capability mode can't reach this point via direct
 		 * path arguments to execve(), we also don't allow
 		 * interpreters to be used in capability mode (for now).
 		 * Catch indirect lookups and return a permissions error.
 		 */
 		if (IN_CAPABILITY_MODE(td)) {
 			error = ECAPMODE;
 			goto exec_fail;
 		}
 #endif
 
 		/*
 		 * Translate the file name. namei() returns a vnode
 		 * pointer in ni_vp among other things.
 		 */
 		NDINIT(&nd, LOOKUP, ISOPEN | LOCKLEAF | LOCKSHARED | FOLLOW |
 		    SAVENAME | AUDITVNODE1 | WANTPARENT, UIO_SYSSPACE,
 		    args->fname);
 
 		error = namei(&nd);
 		if (error)
 			goto exec_fail;
 
 		newtextvp = nd.ni_vp;
 		newtextdvp = nd.ni_dvp;
 		nd.ni_dvp = NULL;
 		newbinname = malloc(nd.ni_cnd.cn_namelen + 1, M_PARGS,
 		    M_WAITOK);
 		memcpy(newbinname, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen);
 		newbinname[nd.ni_cnd.cn_namelen] = '\0';
 		imgp->vp = newtextvp;
 
 		/*
 		 * Do the best to calculate the full path to the image file.
 		 */
 		if (args->fname[0] == '/') {
 			imgp->execpath = args->fname;
 		} else {
 			VOP_UNLOCK(imgp->vp);
 			freepath_size = MAXPATHLEN;
 			if (vn_fullpath_hardlink(newtextvp, newtextdvp,
 			    newbinname, nd.ni_cnd.cn_namelen, &imgp->execpath,
 			    &imgp->freepath, &freepath_size) != 0)
 				imgp->execpath = args->fname;
 			vn_lock(imgp->vp, LK_SHARED | LK_RETRY);
 		}
 	} else {
 		AUDIT_ARG_FD(args->fd);
 
 		/*
 		 * If the descriptors was not opened with O_PATH, then
 		 * we require that it was opened with O_EXEC or
 		 * O_RDONLY.  In either case, exec_check_permissions()
 		 * below checks _current_ file access mode regardless
 		 * of the permissions additionally checked at the
 		 * open(2).
 		 */
 		error = fgetvp_exec(td, args->fd, &cap_fexecve_rights,
 		    &newtextvp);
 		if (error != 0)
 			goto exec_fail;
 
 		if (vn_fullpath(newtextvp, &imgp->execpath,
 		    &imgp->freepath) != 0)
 			imgp->execpath = args->fname;
 		vn_lock(newtextvp, LK_SHARED | LK_RETRY);
 		AUDIT_ARG_VNODE1(newtextvp);
 		imgp->vp = newtextvp;
 	}
 
 	/*
 	 * Check file permissions.  Also 'opens' file and sets its vnode to
 	 * text mode.
 	 */
 	error = exec_check_permissions(imgp);
 	if (error)
 		goto exec_fail_dealloc;
 
 	imgp->object = imgp->vp->v_object;
 	if (imgp->object != NULL)
 		vm_object_reference(imgp->object);
 
 	error = exec_map_first_page(imgp);
 	if (error)
 		goto exec_fail_dealloc;
 
 	imgp->proc->p_osrel = 0;
 	imgp->proc->p_fctl0 = 0;
 	imgp->proc->p_elf_brandinfo = NULL;
 
 	/*
 	 * Implement image setuid/setgid.
 	 *
 	 * Determine new credentials before attempting image activators
 	 * so that it can be used by process_exec handlers to determine
 	 * credential/setid changes.
 	 *
 	 * Don't honor setuid/setgid if the filesystem prohibits it or if
 	 * the process is being traced.
 	 *
 	 * We disable setuid/setgid/etc in capability mode on the basis
 	 * that most setugid applications are not written with that
 	 * environment in mind, and will therefore almost certainly operate
 	 * incorrectly. In principle there's no reason that setugid
 	 * applications might not be useful in capability mode, so we may want
 	 * to reconsider this conservative design choice in the future.
 	 *
 	 * XXXMAC: For the time being, use NOSUID to also prohibit
 	 * transitions on the file system.
 	 */
 	credential_changing = false;
 	credential_changing |= (attr.va_mode & S_ISUID) &&
 	    oldcred->cr_uid != attr.va_uid;
 	credential_changing |= (attr.va_mode & S_ISGID) &&
 	    oldcred->cr_gid != attr.va_gid;
 #ifdef MAC
 	will_transition = mac_vnode_execve_will_transition(oldcred, imgp->vp,
 	    interpvplabel, imgp) != 0;
 	credential_changing |= will_transition;
 #endif
 
 	/* Don't inherit PROC_PDEATHSIG_CTL value if setuid/setgid. */
 	if (credential_changing)
 		imgp->proc->p_pdeathsig = 0;
 
 	if (credential_changing &&
 #ifdef CAPABILITY_MODE
 	    ((oldcred->cr_flags & CRED_FLAG_CAPMODE) == 0) &&
 #endif
 	    (imgp->vp->v_mount->mnt_flag & MNT_NOSUID) == 0 &&
 	    (p->p_flag & P_TRACED) == 0) {
 		imgp->credential_setid = true;
 		VOP_UNLOCK(imgp->vp);
 		imgp->newcred = crdup(oldcred);
 		if (attr.va_mode & S_ISUID) {
 			euip = uifind(attr.va_uid);
 			change_euid(imgp->newcred, euip);
 		}
 		vn_lock(imgp->vp, LK_SHARED | LK_RETRY);
 		if (attr.va_mode & S_ISGID)
 			change_egid(imgp->newcred, attr.va_gid);
 		/*
 		 * Implement correct POSIX saved-id behavior.
 		 *
 		 * XXXMAC: Note that the current logic will save the
 		 * uid and gid if a MAC domain transition occurs, even
 		 * though maybe it shouldn't.
 		 */
 		change_svuid(imgp->newcred, imgp->newcred->cr_uid);
 		change_svgid(imgp->newcred, imgp->newcred->cr_gid);
 	} else {
 		/*
 		 * Implement correct POSIX saved-id behavior.
 		 *
 		 * XXX: It's not clear that the existing behavior is
 		 * POSIX-compliant.  A number of sources indicate that the
 		 * saved uid/gid should only be updated if the new ruid is
 		 * not equal to the old ruid, or the new euid is not equal
 		 * to the old euid and the new euid is not equal to the old
 		 * ruid.  The FreeBSD code always updates the saved uid/gid.
 		 * Also, this code uses the new (replaced) euid and egid as
 		 * the source, which may or may not be the right ones to use.
 		 */
 		if (oldcred->cr_svuid != oldcred->cr_uid ||
 		    oldcred->cr_svgid != oldcred->cr_gid) {
 			VOP_UNLOCK(imgp->vp);
 			imgp->newcred = crdup(oldcred);
 			vn_lock(imgp->vp, LK_SHARED | LK_RETRY);
 			change_svuid(imgp->newcred, imgp->newcred->cr_uid);
 			change_svgid(imgp->newcred, imgp->newcred->cr_gid);
 		}
 	}
 	/* The new credentials are installed into the process later. */
 
 	/*
 	 *	If the current process has a special image activator it
 	 *	wants to try first, call it.   For example, emulating shell
 	 *	scripts differently.
 	 */
 	error = -1;
 	if ((img_first = imgp->proc->p_sysent->sv_imgact_try) != NULL)
 		error = img_first(imgp);
 
 	/*
 	 *	Loop through the list of image activators, calling each one.
 	 *	An activator returns -1 if there is no match, 0 on success,
 	 *	and an error otherwise.
 	 */
 	for (i = 0; error == -1 && execsw[i]; ++i) {
 		if (execsw[i]->ex_imgact == NULL ||
 		    execsw[i]->ex_imgact == img_first) {
 			continue;
 		}
 		error = (*execsw[i]->ex_imgact)(imgp);
 	}
 
 	if (error) {
 		if (error == -1)
 			error = ENOEXEC;
 		goto exec_fail_dealloc;
 	}
 
 	/*
 	 * Special interpreter operation, cleanup and loop up to try to
 	 * activate the interpreter.
 	 */
 	if (imgp->interpreted) {
 		exec_unmap_first_page(imgp);
 		/*
 		 * The text reference needs to be removed for scripts.
 		 * There is a short period before we determine that
 		 * something is a script where text reference is active.
 		 * The vnode lock is held over this entire period
 		 * so nothing should illegitimately be blocked.
 		 */
 		MPASS(imgp->textset);
 		VOP_UNSET_TEXT_CHECKED(newtextvp);
 		imgp->textset = false;
 		/* free name buffer and old vnode */
 #ifdef MAC
 		mac_execve_interpreter_enter(newtextvp, &interpvplabel);
 #endif
 		if (imgp->opened) {
 			VOP_CLOSE(newtextvp, FREAD, td->td_ucred, td);
 			imgp->opened = false;
 		}
 		vput(newtextvp);
 		imgp->vp = newtextvp = NULL;
 		if (args->fname != NULL) {
 			if (newtextdvp != NULL) {
 				vrele(newtextdvp);
 				newtextdvp = NULL;
 			}
 			NDFREE_PNBUF(&nd);
 			free(newbinname, M_PARGS);
 			newbinname = NULL;
 		}
 		vm_object_deallocate(imgp->object);
 		imgp->object = NULL;
 		execve_nosetid(imgp);
 		imgp->execpath = NULL;
 		free(imgp->freepath, M_TEMP);
 		imgp->freepath = NULL;
 		/* set new name to that of the interpreter */
 		args->fname = imgp->interpreter_name;
 		goto interpret;
 	}
 
 	/*
 	 * NB: We unlock the vnode here because it is believed that none
 	 * of the sv_copyout_strings/sv_fixup operations require the vnode.
 	 */
 	VOP_UNLOCK(imgp->vp);
 
 	if (disallow_high_osrel &&
 	    P_OSREL_MAJOR(p->p_osrel) > P_OSREL_MAJOR(__FreeBSD_version)) {
 		error = ENOEXEC;
 		uprintf("Osrel %d for image %s too high\n", p->p_osrel,
 		    imgp->execpath != NULL ? imgp->execpath : "<unresolved>");
 		vn_lock(imgp->vp, LK_SHARED | LK_RETRY);
 		goto exec_fail_dealloc;
 	}
 
 	/*
 	 * Copy out strings (args and env) and initialize stack base.
 	 */
 	error = (*p->p_sysent->sv_copyout_strings)(imgp, &stack_base);
 	if (error != 0) {
 		vn_lock(imgp->vp, LK_SHARED | LK_RETRY);
 		goto exec_fail_dealloc;
 	}
 
 	/*
 	 * Stack setup.
 	 */
 	error = (*p->p_sysent->sv_fixup)(&stack_base, imgp);
 	if (error != 0) {
 		vn_lock(imgp->vp, LK_SHARED | LK_RETRY);
 		goto exec_fail_dealloc;
 	}
 
 	/*
 	 * For security and other reasons, the file descriptor table cannot be
 	 * shared after an exec.
 	 */
 	fdunshare(td);
 	pdunshare(td);
 	/* close files on exec */
 	fdcloseexec(td);
 
 	/*
 	 * Malloc things before we need locks.
 	 */
 	i = exec_args_get_begin_envv(imgp->args) - imgp->args->begin_argv;
 	/* Cache arguments if they fit inside our allowance */
 	if (ps_arg_cache_limit >= i + sizeof(struct pargs)) {
 		newargs = pargs_alloc(i);
 		bcopy(imgp->args->begin_argv, newargs->ar_args, i);
 	}
 
 	/*
 	 * For security and other reasons, signal handlers cannot
 	 * be shared after an exec. The new process gets a copy of the old
 	 * handlers. In execsigs(), the new process will have its signals
 	 * reset.
 	 */
 	if (sigacts_shared(p->p_sigacts)) {
 		oldsigacts = p->p_sigacts;
 		newsigacts = sigacts_alloc();
 		sigacts_copy(newsigacts, oldsigacts);
 	}
 
 	vn_lock(imgp->vp, LK_SHARED | LK_RETRY);
 
 	PROC_LOCK(p);
 	if (oldsigacts)
 		p->p_sigacts = newsigacts;
 	/* Stop profiling */
 	stopprofclock(p);
 
 	/* reset caught signals */
 	execsigs(p);
 
 	/* name this process - nameiexec(p, ndp) */
 	bzero(p->p_comm, sizeof(p->p_comm));
 	if (args->fname)
 		bcopy(nd.ni_cnd.cn_nameptr, p->p_comm,
 		    min(nd.ni_cnd.cn_namelen, MAXCOMLEN));
 	else if (vn_commname(newtextvp, p->p_comm, sizeof(p->p_comm)) != 0)
 		bcopy(fexecv_proc_title, p->p_comm, sizeof(fexecv_proc_title));
 	bcopy(p->p_comm, td->td_name, sizeof(td->td_name));
 #ifdef KTR
 	sched_clear_tdname(td);
 #endif
 
 	/*
 	 * mark as execed, wakeup the process that vforked (if any) and tell
 	 * it that it now has its own resources back
 	 */
 	p->p_flag |= P_EXEC;
 	if ((p->p_flag2 & P2_NOTRACE_EXEC) == 0)
 		p->p_flag2 &= ~P2_NOTRACE;
 	if ((p->p_flag2 & P2_STKGAP_DISABLE_EXEC) == 0)
 		p->p_flag2 &= ~P2_STKGAP_DISABLE;
 	if (p->p_flag & P_PPWAIT) {
 		p->p_flag &= ~(P_PPWAIT | P_PPTRACE);
 		cv_broadcast(&p->p_pwait);
 		/* STOPs are no longer ignored, arrange for AST */
 		signotify(td);
 	}
 
 	if ((imgp->sysent->sv_setid_allowed != NULL &&
 	    !(*imgp->sysent->sv_setid_allowed)(td, imgp)) ||
 	    (p->p_flag2 & P2_NO_NEW_PRIVS) != 0)
 		execve_nosetid(imgp);
 
 	/*
 	 * Implement image setuid/setgid installation.
 	 */
 	if (imgp->credential_setid) {
 		/*
 		 * Turn off syscall tracing for set-id programs, except for
 		 * root.  Record any set-id flags first to make sure that
 		 * we do not regain any tracing during a possible block.
 		 */
 		setsugid(p);
 #ifdef KTRACE
 		kiop = ktrprocexec(p);
 #endif
 		/*
 		 * Close any file descriptors 0..2 that reference procfs,
 		 * then make sure file descriptors 0..2 are in use.
 		 *
 		 * Both fdsetugidsafety() and fdcheckstd() may call functions
 		 * taking sleepable locks, so temporarily drop our locks.
 		 */
 		PROC_UNLOCK(p);
 		VOP_UNLOCK(imgp->vp);
 		fdsetugidsafety(td);
 		error = fdcheckstd(td);
 		vn_lock(imgp->vp, LK_SHARED | LK_RETRY);
 		if (error != 0)
 			goto exec_fail_dealloc;
 		PROC_LOCK(p);
 #ifdef MAC
 		if (will_transition) {
 			mac_vnode_execve_transition(oldcred, imgp->newcred,
 			    imgp->vp, interpvplabel, imgp);
 		}
 #endif
 	} else {
 		if (oldcred->cr_uid == oldcred->cr_ruid &&
 		    oldcred->cr_gid == oldcred->cr_rgid)
 			p->p_flag &= ~P_SUGID;
 	}
 	/*
 	 * Set the new credentials.
 	 */
 	if (imgp->newcred != NULL) {
 		proc_set_cred(p, imgp->newcred);
 		crfree(oldcred);
 		oldcred = NULL;
 	}
 
 	/*
 	 * Store the vp for use in kern.proc.pathname.  This vnode was
 	 * referenced by namei() or by fexecve variant of fname handling.
 	 */
 	oldtextvp = p->p_textvp;
 	p->p_textvp = newtextvp;
 	oldtextdvp = p->p_textdvp;
 	p->p_textdvp = newtextdvp;
 	newtextdvp = NULL;
 	oldbinname = p->p_binname;
 	p->p_binname = newbinname;
 	newbinname = NULL;
 
 #ifdef KDTRACE_HOOKS
 	/*
 	 * Tell the DTrace fasttrap provider about the exec if it
 	 * has declared an interest.
 	 */
 	if (dtrace_fasttrap_exec)
 		dtrace_fasttrap_exec(p);
 #endif
 
 	/*
 	 * Notify others that we exec'd, and clear the P_INEXEC flag
 	 * as we're now a bona fide freshly-execed process.
 	 */
 	KNOTE_LOCKED(p->p_klist, NOTE_EXEC);
 	p->p_flag &= ~P_INEXEC;
 
 	/* clear "fork but no exec" flag, as we _are_ execing */
 	p->p_acflag &= ~AFORK;
 
 	/*
 	 * Free any previous argument cache and replace it with
 	 * the new argument cache, if any.
 	 */
 	oldargs = p->p_args;
 	p->p_args = newargs;
 	newargs = NULL;
 
 	PROC_UNLOCK(p);
 
 #ifdef	HWPMC_HOOKS
 	/*
 	 * Check if system-wide sampling is in effect or if the
 	 * current process is using PMCs.  If so, do exec() time
 	 * processing.  This processing needs to happen AFTER the
 	 * P_INEXEC flag is cleared.
 	 */
 	if (PMC_SYSTEM_SAMPLING_ACTIVE() || PMC_PROC_IS_USING_PMCS(p)) {
 		VOP_UNLOCK(imgp->vp);
 		pe.pm_credentialschanged = credential_changing;
 		pe.pm_entryaddr = imgp->entry_addr;
 
 		PMC_CALL_HOOK_X(td, PMC_FN_PROCESS_EXEC, (void *) &pe);
 		vn_lock(imgp->vp, LK_SHARED | LK_RETRY);
 	}
 #endif
 
 	/* Set values passed into the program in registers. */
 	(*p->p_sysent->sv_setregs)(td, imgp, stack_base);
 
 	VOP_MMAPPED(imgp->vp);
 
 	SDT_PROBE1(proc, , , exec__success, args->fname);
 
 exec_fail_dealloc:
 	if (error != 0) {
 		p->p_osrel = orig_osrel;
 		p->p_fctl0 = orig_fctl0;
 		p->p_elf_brandinfo = orig_brandinfo;
 	}
 
 	if (imgp->firstpage != NULL)
 		exec_unmap_first_page(imgp);
 
 	if (imgp->vp != NULL) {
 		if (imgp->opened)
 			VOP_CLOSE(imgp->vp, FREAD, td->td_ucred, td);
 		if (imgp->textset)
 			VOP_UNSET_TEXT_CHECKED(imgp->vp);
 		if (error != 0)
 			vput(imgp->vp);
 		else
 			VOP_UNLOCK(imgp->vp);
 		if (args->fname != NULL)
 			NDFREE_PNBUF(&nd);
 		if (newtextdvp != NULL)
 			vrele(newtextdvp);
 		free(newbinname, M_PARGS);
 	}
 
 	if (imgp->object != NULL)
 		vm_object_deallocate(imgp->object);
 
 	free(imgp->freepath, M_TEMP);
 
 	if (error == 0) {
 		if (p->p_ptevents & PTRACE_EXEC) {
 			PROC_LOCK(p);
 			if (p->p_ptevents & PTRACE_EXEC)
 				td->td_dbgflags |= TDB_EXEC;
 			PROC_UNLOCK(p);
 		}
 	} else {
 exec_fail:
 		/* we're done here, clear P_INEXEC */
 		PROC_LOCK(p);
 		p->p_flag &= ~P_INEXEC;
 		PROC_UNLOCK(p);
 
 		SDT_PROBE1(proc, , , exec__failure, error);
 	}
 
 	if (imgp->newcred != NULL && oldcred != NULL)
 		crfree(imgp->newcred);
 
 #ifdef MAC
 	mac_execve_exit(imgp);
 	mac_execve_interpreter_exit(interpvplabel);
 #endif
 	exec_free_args(args);
 
 	/*
 	 * Handle deferred decrement of ref counts.
 	 */
 	if (oldtextvp != NULL)
 		vrele(oldtextvp);
 	if (oldtextdvp != NULL)
 		vrele(oldtextdvp);
 	free(oldbinname, M_PARGS);
 #ifdef KTRACE
 	ktr_io_params_free(kiop);
 #endif
 	pargs_drop(oldargs);
 	pargs_drop(newargs);
 	if (oldsigacts != NULL)
 		sigacts_free(oldsigacts);
 	if (euip != NULL)
 		uifree(euip);
 
 	if (error && imgp->vmspace_destroyed) {
 		/* sorry, no more process anymore. exit gracefully */
 		exec_cleanup(td, oldvmspace);
 		exit1(td, 0, SIGABRT);
 		/* NOT REACHED */
 	}
 
 #ifdef KTRACE
 	if (error == 0)
 		ktrprocctor(p);
 #endif
 
 	/*
 	 * We don't want cpu_set_syscall_retval() to overwrite any of
 	 * the register values put in place by exec_setregs().
 	 * Implementations of cpu_set_syscall_retval() will leave
 	 * registers unmodified when returning EJUSTRETURN.
 	 */
 	return (error == 0 ? EJUSTRETURN : error);
 }
 
 void
 exec_cleanup(struct thread *td, struct vmspace *oldvmspace)
 {
 	if ((td->td_pflags & TDP_EXECVMSPC) != 0) {
 		KASSERT(td->td_proc->p_vmspace != oldvmspace,
 		    ("oldvmspace still used"));
 		vmspace_free(oldvmspace);
 		td->td_pflags &= ~TDP_EXECVMSPC;
 	}
 }
 
 int
 exec_map_first_page(struct image_params *imgp)
 {
 	vm_object_t object;
 	vm_page_t m;
 	int error;
 
 	if (imgp->firstpage != NULL)
 		exec_unmap_first_page(imgp);
 
 	object = imgp->vp->v_object;
 	if (object == NULL)
 		return (EACCES);
 #if VM_NRESERVLEVEL > 0
 	if ((object->flags & OBJ_COLORED) == 0) {
 		VM_OBJECT_WLOCK(object);
 		vm_object_color(object, 0);
 		VM_OBJECT_WUNLOCK(object);
 	}
 #endif
 	error = vm_page_grab_valid_unlocked(&m, object, 0,
 	    VM_ALLOC_COUNT(VM_INITIAL_PAGEIN) |
 	    VM_ALLOC_NORMAL | VM_ALLOC_NOBUSY | VM_ALLOC_WIRED);
 
 	if (error != VM_PAGER_OK)
 		return (EIO);
 	imgp->firstpage = sf_buf_alloc(m, 0);
 	imgp->image_header = (char *)sf_buf_kva(imgp->firstpage);
 
 	return (0);
 }
 
 void
 exec_unmap_first_page(struct image_params *imgp)
 {
 	vm_page_t m;
 
 	if (imgp->firstpage != NULL) {
 		m = sf_buf_page(imgp->firstpage);
 		sf_buf_free(imgp->firstpage);
 		imgp->firstpage = NULL;
 		vm_page_unwire(m, PQ_ACTIVE);
 	}
 }
 
 void
 exec_onexec_old(struct thread *td)
 {
 	sigfastblock_clear(td);
 	umtx_exec(td->td_proc);
 }
 
 /*
  * This is an optimization which removes the unmanaged shared page
  * mapping. In combination with pmap_remove_pages(), which cleans all
  * managed mappings in the process' vmspace pmap, no work will be left
  * for pmap_remove(min, max).
  */
 void
 exec_free_abi_mappings(struct proc *p)
 {
 	struct vmspace *vmspace;
-	struct sysentvec *sv;
 
 	vmspace = p->p_vmspace;
 	if (refcount_load(&vmspace->vm_refcnt) != 1)
 		return;
 
-	sv = p->p_sysent;
-	if (sv->sv_shared_page_obj == NULL)
+	if (!PROC_HAS_SHP(p))
 		return;
 
-	pmap_remove(vmspace_pmap(vmspace), sv->sv_shared_page_base,
-	    sv->sv_shared_page_base + sv->sv_shared_page_len);
+	pmap_remove(vmspace_pmap(vmspace), vmspace->vm_shp_base,
+	    vmspace->vm_shp_base + p->p_sysent->sv_shared_page_len);
 }
 
 /*
  * Run down the current address space and install a new one.  Map the shared
  * page.
  */
 int
 exec_new_vmspace(struct image_params *imgp, struct sysentvec *sv)
 {
 	int error;
 	struct proc *p = imgp->proc;
 	struct vmspace *vmspace = p->p_vmspace;
 	struct thread *td = curthread;
 	vm_object_t obj;
 	vm_offset_t sv_minuser;
 	vm_map_t map;
 
 	imgp->vmspace_destroyed = true;
 	imgp->sysent = sv;
 
 	if (p->p_sysent->sv_onexec_old != NULL)
 		p->p_sysent->sv_onexec_old(td);
 	itimers_exec(p);
 
 	EVENTHANDLER_DIRECT_INVOKE(process_exec, p, imgp);
 
 	/*
 	 * Blow away entire process VM, if address space not shared,
 	 * otherwise, create a new VM space so that other threads are
 	 * not disrupted
 	 */
 	map = &vmspace->vm_map;
 	if (map_at_zero)
 		sv_minuser = sv->sv_minuser;
 	else
 		sv_minuser = MAX(sv->sv_minuser, PAGE_SIZE);
 	if (refcount_load(&vmspace->vm_refcnt) == 1 &&
 	    vm_map_min(map) == sv_minuser &&
 	    vm_map_max(map) == sv->sv_maxuser &&
 	    cpu_exec_vmspace_reuse(p, map)) {
 		exec_free_abi_mappings(p);
 		shmexit(vmspace);
 		pmap_remove_pages(vmspace_pmap(vmspace));
 		vm_map_remove(map, vm_map_min(map), vm_map_max(map));
 		/*
 		 * An exec terminates mlockall(MCL_FUTURE).
 		 * ASLR and W^X states must be re-evaluated.
 		 */
 		vm_map_lock(map);
 		vm_map_modflags(map, 0, MAP_WIREFUTURE | MAP_ASLR |
 		    MAP_ASLR_IGNSTART | MAP_ASLR_STACK | MAP_WXORX);
 		vm_map_unlock(map);
 	} else {
 		error = vmspace_exec(p, sv_minuser, sv->sv_maxuser);
 		if (error)
 			return (error);
 		vmspace = p->p_vmspace;
 		map = &vmspace->vm_map;
 	}
 	map->flags |= imgp->map_flags;
 
 	/* Map a shared page */
 	obj = sv->sv_shared_page_obj;
 	if (obj != NULL) {
 		vm_object_reference(obj);
 		error = vm_map_fixed(map, obj, 0,
 		    sv->sv_shared_page_base, sv->sv_shared_page_len,
 		    VM_PROT_READ | VM_PROT_EXECUTE,
 		    VM_PROT_READ | VM_PROT_EXECUTE,
 		    MAP_INHERIT_SHARE | MAP_ACC_NO_CHARGE);
 		if (error != KERN_SUCCESS) {
 			vm_object_deallocate(obj);
 			return (vm_mmap_to_errno(error));
 		}
+		vmspace->vm_shp_base = sv->sv_shared_page_base;
 	}
 
 	return (sv->sv_onexec != NULL ? sv->sv_onexec(p, imgp) : 0);
 }
 
 /*
  * Compute the stack size limit and map the main process stack.
  */
 int
 exec_map_stack(struct image_params *imgp)
 {
 	struct rlimit rlim_stack;
 	struct sysentvec *sv;
 	struct proc *p;
 	vm_map_t map;
 	struct vmspace *vmspace;
 	vm_offset_t stack_addr, stack_top;
 	u_long ssiz;
 	int error, find_space, stack_off;
 	vm_prot_t stack_prot;
 
 	p = imgp->proc;
 	sv = p->p_sysent;
 
 	if (imgp->stack_sz != 0) {
 		ssiz = trunc_page(imgp->stack_sz);
 		PROC_LOCK(p);
 		lim_rlimit_proc(p, RLIMIT_STACK, &rlim_stack);
 		PROC_UNLOCK(p);
 		if (ssiz > rlim_stack.rlim_max)
 			ssiz = rlim_stack.rlim_max;
 		if (ssiz > rlim_stack.rlim_cur) {
 			rlim_stack.rlim_cur = ssiz;
 			kern_setrlimit(curthread, RLIMIT_STACK, &rlim_stack);
 		}
 	} else if (sv->sv_maxssiz != NULL) {
 		ssiz = *sv->sv_maxssiz;
 	} else {
 		ssiz = maxssiz;
 	}
 
 	vmspace = p->p_vmspace;
 	map = &vmspace->vm_map;
 
 	stack_prot = sv->sv_shared_page_obj != NULL && imgp->stack_prot != 0 ?
 	    imgp->stack_prot : sv->sv_stackprot;
 	if ((map->flags & MAP_ASLR_STACK) != 0) {
 		stack_addr = round_page((vm_offset_t)p->p_vmspace->vm_daddr +
 		    lim_max(curthread, RLIMIT_DATA));
 		find_space = VMFS_ANY_SPACE;
 	} else {
 		stack_addr = sv->sv_usrstack - ssiz;
 		find_space = VMFS_NO_SPACE;
 	}
 	error = vm_map_find(map, NULL, 0, &stack_addr, (vm_size_t)ssiz,
 	    sv->sv_usrstack, find_space, stack_prot, VM_PROT_ALL,
 	    MAP_STACK_GROWS_DOWN);
 	if (error != KERN_SUCCESS) {
 		uprintf("exec_new_vmspace: mapping stack size %#jx prot %#x "
 		    "failed, mach error %d errno %d\n", (uintmax_t)ssiz,
 		    stack_prot, error, vm_mmap_to_errno(error));
 		return (vm_mmap_to_errno(error));
 	}
 
 	stack_top = stack_addr + ssiz;
 	if ((map->flags & MAP_ASLR_STACK) != 0) {
 		/* Randomize within the first page of the stack. */
 		arc4rand(&stack_off, sizeof(stack_off), 0);
 		stack_top -= rounddown2(stack_off & PAGE_MASK, sizeof(void *));
 	}
 
 	/*
 	 * vm_ssize and vm_maxsaddr are somewhat antiquated concepts, but they
 	 * are still used to enforce the stack rlimit on the process stack.
 	 */
 	vmspace->vm_maxsaddr = (char *)stack_addr;
 	vmspace->vm_stacktop = stack_top;
 	vmspace->vm_ssize = sgrowsiz >> PAGE_SHIFT;
 
 	return (0);
 }
 
 /*
  * Copy out argument and environment strings from the old process address
  * space into the temporary string buffer.
  */
 int
 exec_copyin_args(struct image_args *args, const char *fname,
     enum uio_seg segflg, char **argv, char **envv)
 {
 	u_long arg, env;
 	int error;
 
 	bzero(args, sizeof(*args));
 	if (argv == NULL)
 		return (EFAULT);
 
 	/*
 	 * Allocate demand-paged memory for the file name, argument, and
 	 * environment strings.
 	 */
 	error = exec_alloc_args(args);
 	if (error != 0)
 		return (error);
 
 	/*
 	 * Copy the file name.
 	 */
 	error = exec_args_add_fname(args, fname, segflg);
 	if (error != 0)
 		goto err_exit;
 
 	/*
 	 * extract arguments first
 	 */
 	for (;;) {
 		error = fueword(argv++, &arg);
 		if (error == -1) {
 			error = EFAULT;
 			goto err_exit;
 		}
 		if (arg == 0)
 			break;
 		error = exec_args_add_arg(args, (char *)(uintptr_t)arg,
 		    UIO_USERSPACE);
 		if (error != 0)
 			goto err_exit;
 	}
 
 	/*
 	 * extract environment strings
 	 */
 	if (envv) {
 		for (;;) {
 			error = fueword(envv++, &env);
 			if (error == -1) {
 				error = EFAULT;
 				goto err_exit;
 			}
 			if (env == 0)
 				break;
 			error = exec_args_add_env(args,
 			    (char *)(uintptr_t)env, UIO_USERSPACE);
 			if (error != 0)
 				goto err_exit;
 		}
 	}
 
 	return (0);
 
 err_exit:
 	exec_free_args(args);
 	return (error);
 }
 
 struct exec_args_kva {
 	vm_offset_t addr;
 	u_int gen;
 	SLIST_ENTRY(exec_args_kva) next;
 };
 
 DPCPU_DEFINE_STATIC(struct exec_args_kva *, exec_args_kva);
 
 static SLIST_HEAD(, exec_args_kva) exec_args_kva_freelist;
 static struct mtx exec_args_kva_mtx;
 static u_int exec_args_gen;
 
 static void
 exec_prealloc_args_kva(void *arg __unused)
 {
 	struct exec_args_kva *argkva;
 	u_int i;
 
 	SLIST_INIT(&exec_args_kva_freelist);
 	mtx_init(&exec_args_kva_mtx, "exec args kva", NULL, MTX_DEF);
 	for (i = 0; i < exec_map_entries; i++) {
 		argkva = malloc(sizeof(*argkva), M_PARGS, M_WAITOK);
 		argkva->addr = kmap_alloc_wait(exec_map, exec_map_entry_size);
 		argkva->gen = exec_args_gen;
 		SLIST_INSERT_HEAD(&exec_args_kva_freelist, argkva, next);
 	}
 }
 SYSINIT(exec_args_kva, SI_SUB_EXEC, SI_ORDER_ANY, exec_prealloc_args_kva, NULL);
 
 static vm_offset_t
 exec_alloc_args_kva(void **cookie)
 {
 	struct exec_args_kva *argkva;
 
 	argkva = (void *)atomic_readandclear_ptr(
 	    (uintptr_t *)DPCPU_PTR(exec_args_kva));
 	if (argkva == NULL) {
 		mtx_lock(&exec_args_kva_mtx);
 		while ((argkva = SLIST_FIRST(&exec_args_kva_freelist)) == NULL)
 			(void)mtx_sleep(&exec_args_kva_freelist,
 			    &exec_args_kva_mtx, 0, "execkva", 0);
 		SLIST_REMOVE_HEAD(&exec_args_kva_freelist, next);
 		mtx_unlock(&exec_args_kva_mtx);
 	}
 	kasan_mark((void *)argkva->addr, exec_map_entry_size,
 	    exec_map_entry_size, 0);
 	*(struct exec_args_kva **)cookie = argkva;
 	return (argkva->addr);
 }
 
 static void
 exec_release_args_kva(struct exec_args_kva *argkva, u_int gen)
 {
 	vm_offset_t base;
 
 	base = argkva->addr;
 	kasan_mark((void *)argkva->addr, 0, exec_map_entry_size,
 	    KASAN_EXEC_ARGS_FREED);
 	if (argkva->gen != gen) {
 		(void)vm_map_madvise(exec_map, base, base + exec_map_entry_size,
 		    MADV_FREE);
 		argkva->gen = gen;
 	}
 	if (!atomic_cmpset_ptr((uintptr_t *)DPCPU_PTR(exec_args_kva),
 	    (uintptr_t)NULL, (uintptr_t)argkva)) {
 		mtx_lock(&exec_args_kva_mtx);
 		SLIST_INSERT_HEAD(&exec_args_kva_freelist, argkva, next);
 		wakeup_one(&exec_args_kva_freelist);
 		mtx_unlock(&exec_args_kva_mtx);
 	}
 }
 
 static void
 exec_free_args_kva(void *cookie)
 {
 
 	exec_release_args_kva(cookie, exec_args_gen);
 }
 
 static void
 exec_args_kva_lowmem(void *arg __unused)
 {
 	SLIST_HEAD(, exec_args_kva) head;
 	struct exec_args_kva *argkva;
 	u_int gen;
 	int i;
 
 	gen = atomic_fetchadd_int(&exec_args_gen, 1) + 1;
 
 	/*
 	 * Force an madvise of each KVA range. Any currently allocated ranges
 	 * will have MADV_FREE applied once they are freed.
 	 */
 	SLIST_INIT(&head);
 	mtx_lock(&exec_args_kva_mtx);
 	SLIST_SWAP(&head, &exec_args_kva_freelist, exec_args_kva);
 	mtx_unlock(&exec_args_kva_mtx);
 	while ((argkva = SLIST_FIRST(&head)) != NULL) {
 		SLIST_REMOVE_HEAD(&head, next);
 		exec_release_args_kva(argkva, gen);
 	}
 
 	CPU_FOREACH(i) {
 		argkva = (void *)atomic_readandclear_ptr(
 		    (uintptr_t *)DPCPU_ID_PTR(i, exec_args_kva));
 		if (argkva != NULL)
 			exec_release_args_kva(argkva, gen);
 	}
 }
 EVENTHANDLER_DEFINE(vm_lowmem, exec_args_kva_lowmem, NULL,
     EVENTHANDLER_PRI_ANY);
 
 /*
  * Allocate temporary demand-paged, zero-filled memory for the file name,
  * argument, and environment strings.
  */
 int
 exec_alloc_args(struct image_args *args)
 {
 
 	args->buf = (char *)exec_alloc_args_kva(&args->bufkva);
 	return (0);
 }
 
 void
 exec_free_args(struct image_args *args)
 {
 
 	if (args->buf != NULL) {
 		exec_free_args_kva(args->bufkva);
 		args->buf = NULL;
 	}
 	if (args->fname_buf != NULL) {
 		free(args->fname_buf, M_TEMP);
 		args->fname_buf = NULL;
 	}
 }
 
 /*
  * A set to functions to fill struct image args.
  *
  * NOTE: exec_args_add_fname() must be called (possibly with a NULL
  * fname) before the other functions.  All exec_args_add_arg() calls must
  * be made before any exec_args_add_env() calls.  exec_args_adjust_args()
  * may be called any time after exec_args_add_fname().
  *
  * exec_args_add_fname() - install path to be executed
  * exec_args_add_arg() - append an argument string
  * exec_args_add_env() - append an env string
  * exec_args_adjust_args() - adjust location of the argument list to
  *                           allow new arguments to be prepended
  */
 int
 exec_args_add_fname(struct image_args *args, const char *fname,
     enum uio_seg segflg)
 {
 	int error;
 	size_t length;
 
 	KASSERT(args->fname == NULL, ("fname already appended"));
 	KASSERT(args->endp == NULL, ("already appending to args"));
 
 	if (fname != NULL) {
 		args->fname = args->buf;
 		error = segflg == UIO_SYSSPACE ?
 		    copystr(fname, args->fname, PATH_MAX, &length) :
 		    copyinstr(fname, args->fname, PATH_MAX, &length);
 		if (error != 0)
 			return (error == ENAMETOOLONG ? E2BIG : error);
 	} else
 		length = 0;
 
 	/* Set up for _arg_*()/_env_*() */
 	args->endp = args->buf + length;
 	/* begin_argv must be set and kept updated */
 	args->begin_argv = args->endp;
 	KASSERT(exec_map_entry_size - length >= ARG_MAX,
 	    ("too little space remaining for arguments %zu < %zu",
 	    exec_map_entry_size - length, (size_t)ARG_MAX));
 	args->stringspace = ARG_MAX;
 
 	return (0);
 }
 
 static int
 exec_args_add_str(struct image_args *args, const char *str,
     enum uio_seg segflg, int *countp)
 {
 	int error;
 	size_t length;
 
 	KASSERT(args->endp != NULL, ("endp not initialized"));
 	KASSERT(args->begin_argv != NULL, ("begin_argp not initialized"));
 
 	error = (segflg == UIO_SYSSPACE) ?
 	    copystr(str, args->endp, args->stringspace, &length) :
 	    copyinstr(str, args->endp, args->stringspace, &length);
 	if (error != 0)
 		return (error == ENAMETOOLONG ? E2BIG : error);
 	args->stringspace -= length;
 	args->endp += length;
 	(*countp)++;
 
 	return (0);
 }
 
 int
 exec_args_add_arg(struct image_args *args, const char *argp,
     enum uio_seg segflg)
 {
 
 	KASSERT(args->envc == 0, ("appending args after env"));
 
 	return (exec_args_add_str(args, argp, segflg, &args->argc));
 }
 
 int
 exec_args_add_env(struct image_args *args, const char *envp,
     enum uio_seg segflg)
 {
 
 	if (args->envc == 0)
 		args->begin_envv = args->endp;
 
 	return (exec_args_add_str(args, envp, segflg, &args->envc));
 }
 
 int
 exec_args_adjust_args(struct image_args *args, size_t consume, ssize_t extend)
 {
 	ssize_t offset;
 
 	KASSERT(args->endp != NULL, ("endp not initialized"));
 	KASSERT(args->begin_argv != NULL, ("begin_argp not initialized"));
 
 	offset = extend - consume;
 	if (args->stringspace < offset)
 		return (E2BIG);
 	memmove(args->begin_argv + extend, args->begin_argv + consume,
 	    args->endp - args->begin_argv + consume);
 	if (args->envc > 0)
 		args->begin_envv += offset;
 	args->endp += offset;
 	args->stringspace -= offset;
 	return (0);
 }
 
 char *
 exec_args_get_begin_envv(struct image_args *args)
 {
 
 	KASSERT(args->endp != NULL, ("endp not initialized"));
 
 	if (args->envc > 0)
 		return (args->begin_envv);
 	return (args->endp);
 }
 
 /*
  * Copy strings out to the new process address space, constructing new arg
  * and env vector tables. Return a pointer to the base so that it can be used
  * as the initial stack pointer.
  */
 int
 exec_copyout_strings(struct image_params *imgp, uintptr_t *stack_base)
 {
 	int argc, envc;
 	char **vectp;
 	char *stringp;
 	uintptr_t destp, ustringp;
 	struct ps_strings *arginfo;
 	struct proc *p;
 	struct sysentvec *sysent;
 	size_t execpath_len;
 	int error, szsigcode;
 	char canary[sizeof(long) * 8];
 
 	p = imgp->proc;
 	sysent = p->p_sysent;
 
 	destp =	PROC_PS_STRINGS(p);
 	arginfo = imgp->ps_strings = (void *)destp;
 
 	/*
 	 * Install sigcode.
 	 */
-	if (sysent->sv_sigcode_base == 0 && sysent->sv_szsigcode != NULL) {
+	if (sysent->sv_shared_page_base == 0 && sysent->sv_szsigcode != NULL) {
 		szsigcode = *(sysent->sv_szsigcode);
 		destp -= szsigcode;
 		destp = rounddown2(destp, sizeof(void *));
 		error = copyout(sysent->sv_sigcode, (void *)destp, szsigcode);
 		if (error != 0)
 			return (error);
 	}
 
 	/*
 	 * Copy the image path for the rtld.
 	 */
 	if (imgp->execpath != NULL && imgp->auxargs != NULL) {
 		execpath_len = strlen(imgp->execpath) + 1;
 		destp -= execpath_len;
 		destp = rounddown2(destp, sizeof(void *));
 		imgp->execpathp = (void *)destp;
 		error = copyout(imgp->execpath, imgp->execpathp, execpath_len);
 		if (error != 0)
 			return (error);
 	}
 
 	/*
 	 * Prepare the canary for SSP.
 	 */
 	arc4rand(canary, sizeof(canary), 0);
 	destp -= sizeof(canary);
 	imgp->canary = (void *)destp;
 	error = copyout(canary, imgp->canary, sizeof(canary));
 	if (error != 0)
 		return (error);
 	imgp->canarylen = sizeof(canary);
 
 	/*
 	 * Prepare the pagesizes array.
 	 */
 	imgp->pagesizeslen = sizeof(pagesizes[0]) * MAXPAGESIZES;
 	destp -= imgp->pagesizeslen;
 	destp = rounddown2(destp, sizeof(void *));
 	imgp->pagesizes = (void *)destp;
 	error = copyout(pagesizes, imgp->pagesizes, imgp->pagesizeslen);
 	if (error != 0)
 		return (error);
 
 	/*
 	 * Allocate room for the argument and environment strings.
 	 */
 	destp -= ARG_MAX - imgp->args->stringspace;
 	destp = rounddown2(destp, sizeof(void *));
 	ustringp = destp;
 
 	if (imgp->auxargs) {
 		/*
 		 * Allocate room on the stack for the ELF auxargs
 		 * array.  It has up to AT_COUNT entries.
 		 */
 		destp -= AT_COUNT * sizeof(Elf_Auxinfo);
 		destp = rounddown2(destp, sizeof(void *));
 	}
 
 	vectp = (char **)destp;
 
 	/*
 	 * Allocate room for the argv[] and env vectors including the
 	 * terminating NULL pointers.
 	 */
 	vectp -= imgp->args->argc + 1 + imgp->args->envc + 1;
 
 	/*
 	 * vectp also becomes our initial stack base
 	 */
 	*stack_base = (uintptr_t)vectp;
 
 	stringp = imgp->args->begin_argv;
 	argc = imgp->args->argc;
 	envc = imgp->args->envc;
 
 	/*
 	 * Copy out strings - arguments and environment.
 	 */
 	error = copyout(stringp, (void *)ustringp,
 	    ARG_MAX - imgp->args->stringspace);
 	if (error != 0)
 		return (error);
 
 	/*
 	 * Fill in "ps_strings" struct for ps, w, etc.
 	 */
 	imgp->argv = vectp;
 	if (suword(&arginfo->ps_argvstr, (long)(intptr_t)vectp) != 0 ||
 	    suword32(&arginfo->ps_nargvstr, argc) != 0)
 		return (EFAULT);
 
 	/*
 	 * Fill in argument portion of vector table.
 	 */
 	for (; argc > 0; --argc) {
 		if (suword(vectp++, ustringp) != 0)
 			return (EFAULT);
 		while (*stringp++ != 0)
 			ustringp++;
 		ustringp++;
 	}
 
 	/* a null vector table pointer separates the argp's from the envp's */
 	if (suword(vectp++, 0) != 0)
 		return (EFAULT);
 
 	imgp->envv = vectp;
 	if (suword(&arginfo->ps_envstr, (long)(intptr_t)vectp) != 0 ||
 	    suword32(&arginfo->ps_nenvstr, envc) != 0)
 		return (EFAULT);
 
 	/*
 	 * Fill in environment portion of vector table.
 	 */
 	for (; envc > 0; --envc) {
 		if (suword(vectp++, ustringp) != 0)
 			return (EFAULT);
 		while (*stringp++ != 0)
 			ustringp++;
 		ustringp++;
 	}
 
 	/* end of vector table is a null pointer */
 	if (suword(vectp, 0) != 0)
 		return (EFAULT);
 
 	if (imgp->auxargs) {
 		vectp++;
 		error = imgp->sysent->sv_copyout_auxargs(imgp,
 		    (uintptr_t)vectp);
 		if (error != 0)
 			return (error);
 	}
 
 	return (0);
 }
 
 /*
  * Check permissions of file to execute.
  *	Called with imgp->vp locked.
  *	Return 0 for success or error code on failure.
  */
 int
 exec_check_permissions(struct image_params *imgp)
 {
 	struct vnode *vp = imgp->vp;
 	struct vattr *attr = imgp->attr;
 	struct thread *td;
 	int error;
 
 	td = curthread;
 
 	/* Get file attributes */
 	error = VOP_GETATTR(vp, attr, td->td_ucred);
 	if (error)
 		return (error);
 
 #ifdef MAC
 	error = mac_vnode_check_exec(td->td_ucred, imgp->vp, imgp);
 	if (error)
 		return (error);
 #endif
 
 	/*
 	 * 1) Check if file execution is disabled for the filesystem that
 	 *    this file resides on.
 	 * 2) Ensure that at least one execute bit is on. Otherwise, a
 	 *    privileged user will always succeed, and we don't want this
 	 *    to happen unless the file really is executable.
 	 * 3) Ensure that the file is a regular file.
 	 */
 	if ((vp->v_mount->mnt_flag & MNT_NOEXEC) ||
 	    (attr->va_mode & (S_IXUSR | S_IXGRP | S_IXOTH)) == 0 ||
 	    (attr->va_type != VREG))
 		return (EACCES);
 
 	/*
 	 * Zero length files can't be exec'd
 	 */
 	if (attr->va_size == 0)
 		return (ENOEXEC);
 
 	/*
 	 *  Check for execute permission to file based on current credentials.
 	 */
 	error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td);
 	if (error)
 		return (error);
 
 	/*
 	 * Check number of open-for-writes on the file and deny execution
 	 * if there are any.
 	 *
 	 * Add a text reference now so no one can write to the
 	 * executable while we're activating it.
 	 *
 	 * Remember if this was set before and unset it in case this is not
 	 * actually an executable image.
 	 */
 	error = VOP_SET_TEXT(vp);
 	if (error != 0)
 		return (error);
 	imgp->textset = true;
 
 	/*
 	 * Call filesystem specific open routine (which does nothing in the
 	 * general case).
 	 */
 	error = VOP_OPEN(vp, FREAD, td->td_ucred, td, NULL);
 	if (error == 0)
 		imgp->opened = true;
 	return (error);
 }
 
 /*
  * Exec handler registration
  */
 int
 exec_register(const struct execsw *execsw_arg)
 {
 	const struct execsw **es, **xs, **newexecsw;
 	u_int count = 2;	/* New slot and trailing NULL */
 
 	if (execsw)
 		for (es = execsw; *es; es++)
 			count++;
 	newexecsw = malloc(count * sizeof(*es), M_TEMP, M_WAITOK);
 	xs = newexecsw;
 	if (execsw)
 		for (es = execsw; *es; es++)
 			*xs++ = *es;
 	*xs++ = execsw_arg;
 	*xs = NULL;
 	if (execsw)
 		free(execsw, M_TEMP);
 	execsw = newexecsw;
 	return (0);
 }
 
 int
 exec_unregister(const struct execsw *execsw_arg)
 {
 	const struct execsw **es, **xs, **newexecsw;
 	int count = 1;
 
 	if (execsw == NULL)
 		panic("unregister with no handlers left?\n");
 
 	for (es = execsw; *es; es++) {
 		if (*es == execsw_arg)
 			break;
 	}
 	if (*es == NULL)
 		return (ENOENT);
 	for (es = execsw; *es; es++)
 		if (*es != execsw_arg)
 			count++;
 	newexecsw = malloc(count * sizeof(*es), M_TEMP, M_WAITOK);
 	xs = newexecsw;
 	for (es = execsw; *es; es++)
 		if (*es != execsw_arg)
 			*xs++ = *es;
 	*xs = NULL;
 	if (execsw)
 		free(execsw, M_TEMP);
 	execsw = newexecsw;
 	return (0);
 }
 
 /*
  * Write out a core segment to the compression stream.
  */
 static int
 compress_chunk(struct coredump_params *cp, char *base, char *buf, size_t len)
 {
 	size_t chunk_len;
 	int error;
 
 	while (len > 0) {
 		chunk_len = MIN(len, CORE_BUF_SIZE);
 
 		/*
 		 * We can get EFAULT error here.
 		 * In that case zero out the current chunk of the segment.
 		 */
 		error = copyin(base, buf, chunk_len);
 		if (error != 0)
 			bzero(buf, chunk_len);
 		error = compressor_write(cp->comp, buf, chunk_len);
 		if (error != 0)
 			break;
 		base += chunk_len;
 		len -= chunk_len;
 	}
 	return (error);
 }
 
 int
 core_write(struct coredump_params *cp, const void *base, size_t len,
     off_t offset, enum uio_seg seg, size_t *resid)
 {
 
 	return (vn_rdwr_inchunks(UIO_WRITE, cp->vp, __DECONST(void *, base),
 	    len, offset, seg, IO_UNIT | IO_DIRECT | IO_RANGELOCKED,
 	    cp->active_cred, cp->file_cred, resid, cp->td));
 }
 
 int
 core_output(char *base, size_t len, off_t offset, struct coredump_params *cp,
     void *tmpbuf)
 {
 	vm_map_t map;
 	struct mount *mp;
 	size_t resid, runlen;
 	int error;
 	bool success;
 
 	KASSERT((uintptr_t)base % PAGE_SIZE == 0,
 	    ("%s: user address %p is not page-aligned", __func__, base));
 
 	if (cp->comp != NULL)
 		return (compress_chunk(cp, base, tmpbuf, len));
 
 	map = &cp->td->td_proc->p_vmspace->vm_map;
 	for (; len > 0; base += runlen, offset += runlen, len -= runlen) {
 		/*
 		 * Attempt to page in all virtual pages in the range.  If a
 		 * virtual page is not backed by the pager, it is represented as
 		 * a hole in the file.  This can occur with zero-filled
 		 * anonymous memory or truncated files, for example.
 		 */
 		for (runlen = 0; runlen < len; runlen += PAGE_SIZE) {
 			if (core_dump_can_intr && curproc_sigkilled())
 				return (EINTR);
 			error = vm_fault(map, (uintptr_t)base + runlen,
 			    VM_PROT_READ, VM_FAULT_NOFILL, NULL);
 			if (runlen == 0)
 				success = error == KERN_SUCCESS;
 			else if ((error == KERN_SUCCESS) != success)
 				break;
 		}
 
 		if (success) {
 			error = core_write(cp, base, runlen, offset,
 			    UIO_USERSPACE, &resid);
 			if (error != 0) {
 				if (error != EFAULT)
 					break;
 
 				/*
 				 * EFAULT may be returned if the user mapping
 				 * could not be accessed, e.g., because a mapped
 				 * file has been truncated.  Skip the page if no
 				 * progress was made, to protect against a
 				 * hypothetical scenario where vm_fault() was
 				 * successful but core_write() returns EFAULT
 				 * anyway.
 				 */
 				runlen -= resid;
 				if (runlen == 0) {
 					success = false;
 					runlen = PAGE_SIZE;
 				}
 			}
 		}
 		if (!success) {
 			error = vn_start_write(cp->vp, &mp, V_WAIT);
 			if (error != 0)
 				break;
 			vn_lock(cp->vp, LK_EXCLUSIVE | LK_RETRY);
 			error = vn_truncate_locked(cp->vp, offset + runlen,
 			    false, cp->td->td_ucred);
 			VOP_UNLOCK(cp->vp);
 			vn_finished_write(mp);
 			if (error != 0)
 				break;
 		}
 	}
 	return (error);
 }
 
 /*
  * Drain into a core file.
  */
 int
 sbuf_drain_core_output(void *arg, const char *data, int len)
 {
 	struct coredump_params *cp;
 	struct proc *p;
 	int error, locked;
 
 	cp = arg;
 	p = cp->td->td_proc;
 
 	/*
 	 * Some kern_proc out routines that print to this sbuf may
 	 * call us with the process lock held. Draining with the
 	 * non-sleepable lock held is unsafe. The lock is needed for
 	 * those routines when dumping a live process. In our case we
 	 * can safely release the lock before draining and acquire
 	 * again after.
 	 */
 	locked = PROC_LOCKED(p);
 	if (locked)
 		PROC_UNLOCK(p);
 	if (cp->comp != NULL)
 		error = compressor_write(cp->comp, __DECONST(char *, data),
 		    len);
 	else
 		error = core_write(cp, __DECONST(void *, data), len, cp->offset,
 		    UIO_SYSSPACE, NULL);
 	if (locked)
 		PROC_LOCK(p);
 	if (error != 0)
 		return (-error);
 	cp->offset += len;
 	return (len);
 }
diff --git a/sys/kern/kern_proc.c b/sys/kern/kern_proc.c
index 3938bfe611b9..56bdb6aa3837 100644
--- a/sys/kern/kern_proc.c
+++ b/sys/kern/kern_proc.c
@@ -1,3557 +1,3557 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1982, 1986, 1989, 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)kern_proc.c	8.7 (Berkeley) 2/14/95
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_ddb.h"
 #include "opt_ktrace.h"
 #include "opt_kstack_pages.h"
 #include "opt_stack.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bitstring.h>
 #include <sys/elf.h>
 #include <sys/eventhandler.h>
 #include <sys/exec.h>
 #include <sys/fcntl.h>
 #include <sys/jail.h>
 #include <sys/kernel.h>
 #include <sys/limits.h>
 #include <sys/lock.h>
 #include <sys/loginclass.h>
 #include <sys/malloc.h>
 #include <sys/mman.h>
 #include <sys/mount.h>
 #include <sys/mutex.h>
 #include <sys/namei.h>
 #include <sys/proc.h>
 #include <sys/ptrace.h>
 #include <sys/refcount.h>
 #include <sys/resourcevar.h>
 #include <sys/rwlock.h>
 #include <sys/sbuf.h>
 #include <sys/sysent.h>
 #include <sys/sched.h>
 #include <sys/smp.h>
 #include <sys/stack.h>
 #include <sys/stat.h>
 #include <sys/dtrace_bsd.h>
 #include <sys/sysctl.h>
 #include <sys/filedesc.h>
 #include <sys/tty.h>
 #include <sys/signalvar.h>
 #include <sys/sdt.h>
 #include <sys/sx.h>
 #include <sys/user.h>
 #include <sys/vnode.h>
 #include <sys/wait.h>
 #ifdef KTRACE
 #include <sys/ktrace.h>
 #endif
 
 #ifdef DDB
 #include <ddb/ddb.h>
 #endif
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/vm_extern.h>
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
 #include <vm/uma.h>
 
 #include <fs/devfs/devfs.h>
 
 #ifdef COMPAT_FREEBSD32
 #include <compat/freebsd32/freebsd32.h>
 #include <compat/freebsd32/freebsd32_util.h>
 #endif
 
 SDT_PROVIDER_DEFINE(proc);
 
 MALLOC_DEFINE(M_SESSION, "session", "session header");
 static MALLOC_DEFINE(M_PROC, "proc", "Proc structures");
 MALLOC_DEFINE(M_SUBPROC, "subproc", "Proc sub-structures");
 
 static void doenterpgrp(struct proc *, struct pgrp *);
 static void orphanpg(struct pgrp *pg);
 static void fill_kinfo_aggregate(struct proc *p, struct kinfo_proc *kp);
 static void fill_kinfo_proc_only(struct proc *p, struct kinfo_proc *kp);
 static void fill_kinfo_thread(struct thread *td, struct kinfo_proc *kp,
     int preferthread);
 static void pgdelete(struct pgrp *);
 static int pgrp_init(void *mem, int size, int flags);
 static int proc_ctor(void *mem, int size, void *arg, int flags);
 static void proc_dtor(void *mem, int size, void *arg);
 static int proc_init(void *mem, int size, int flags);
 static void proc_fini(void *mem, int size);
 static void pargs_free(struct pargs *pa);
 
 /*
  * Other process lists
  */
 struct pidhashhead *pidhashtbl = NULL;
 struct sx *pidhashtbl_lock;
 u_long pidhash;
 u_long pidhashlock;
 struct pgrphashhead *pgrphashtbl;
 u_long pgrphash;
 struct proclist allproc = LIST_HEAD_INITIALIZER(allproc);
 struct sx __exclusive_cache_line allproc_lock;
 struct sx __exclusive_cache_line proctree_lock;
 struct mtx __exclusive_cache_line ppeers_lock;
 struct mtx __exclusive_cache_line procid_lock;
 uma_zone_t proc_zone;
 uma_zone_t pgrp_zone;
 
 /*
  * The offset of various fields in struct proc and struct thread.
  * These are used by kernel debuggers to enumerate kernel threads and
  * processes.
  */
 const int proc_off_p_pid = offsetof(struct proc, p_pid);
 const int proc_off_p_comm = offsetof(struct proc, p_comm);
 const int proc_off_p_list = offsetof(struct proc, p_list);
 const int proc_off_p_hash = offsetof(struct proc, p_hash);
 const int proc_off_p_threads = offsetof(struct proc, p_threads);
 const int thread_off_td_tid = offsetof(struct thread, td_tid);
 const int thread_off_td_name = offsetof(struct thread, td_name);
 const int thread_off_td_oncpu = offsetof(struct thread, td_oncpu);
 const int thread_off_td_pcb = offsetof(struct thread, td_pcb);
 const int thread_off_td_plist = offsetof(struct thread, td_plist);
 
 EVENTHANDLER_LIST_DEFINE(process_ctor);
 EVENTHANDLER_LIST_DEFINE(process_dtor);
 EVENTHANDLER_LIST_DEFINE(process_init);
 EVENTHANDLER_LIST_DEFINE(process_fini);
 EVENTHANDLER_LIST_DEFINE(process_exit);
 EVENTHANDLER_LIST_DEFINE(process_fork);
 EVENTHANDLER_LIST_DEFINE(process_exec);
 
 int kstack_pages = KSTACK_PAGES;
 SYSCTL_INT(_kern, OID_AUTO, kstack_pages, CTLFLAG_RD, &kstack_pages, 0,
     "Kernel stack size in pages");
 static int vmmap_skip_res_cnt = 0;
 SYSCTL_INT(_kern, OID_AUTO, proc_vmmap_skip_resident_count, CTLFLAG_RW,
     &vmmap_skip_res_cnt, 0,
     "Skip calculation of the pages resident count in kern.proc.vmmap");
 
 CTASSERT(sizeof(struct kinfo_proc) == KINFO_PROC_SIZE);
 #ifdef COMPAT_FREEBSD32
 CTASSERT(sizeof(struct kinfo_proc32) == KINFO_PROC32_SIZE);
 #endif
 
 /*
  * Initialize global process hashing structures.
  */
 void
 procinit(void)
 {
 	u_long i;
 
 	sx_init(&allproc_lock, "allproc");
 	sx_init(&proctree_lock, "proctree");
 	mtx_init(&ppeers_lock, "p_peers", NULL, MTX_DEF);
 	mtx_init(&procid_lock, "procid", NULL, MTX_DEF);
 	pidhashtbl = hashinit(maxproc / 4, M_PROC, &pidhash);
 	pidhashlock = (pidhash + 1) / 64;
 	if (pidhashlock > 0)
 		pidhashlock--;
 	pidhashtbl_lock = malloc(sizeof(*pidhashtbl_lock) * (pidhashlock + 1),
 	    M_PROC, M_WAITOK | M_ZERO);
 	for (i = 0; i < pidhashlock + 1; i++)
 		sx_init_flags(&pidhashtbl_lock[i], "pidhash", SX_DUPOK);
 	pgrphashtbl = hashinit(maxproc / 4, M_PROC, &pgrphash);
 	proc_zone = uma_zcreate("PROC", sched_sizeof_proc(),
 	    proc_ctor, proc_dtor, proc_init, proc_fini,
 	    UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
 	pgrp_zone = uma_zcreate("PGRP", sizeof(struct pgrp), NULL, NULL,
 	    pgrp_init, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
 	uihashinit();
 }
 
 /*
  * Prepare a proc for use.
  */
 static int
 proc_ctor(void *mem, int size, void *arg, int flags)
 {
 	struct proc *p;
 	struct thread *td;
 
 	p = (struct proc *)mem;
 #ifdef KDTRACE_HOOKS
 	kdtrace_proc_ctor(p);
 #endif
 	EVENTHANDLER_DIRECT_INVOKE(process_ctor, p);
 	td = FIRST_THREAD_IN_PROC(p);
 	if (td != NULL) {
 		/* Make sure all thread constructors are executed */
 		EVENTHANDLER_DIRECT_INVOKE(thread_ctor, td);
 	}
 	return (0);
 }
 
 /*
  * Reclaim a proc after use.
  */
 static void
 proc_dtor(void *mem, int size, void *arg)
 {
 	struct proc *p;
 	struct thread *td;
 
 	/* INVARIANTS checks go here */
 	p = (struct proc *)mem;
 	td = FIRST_THREAD_IN_PROC(p);
 	if (td != NULL) {
 #ifdef INVARIANTS
 		KASSERT((p->p_numthreads == 1),
 		    ("bad number of threads in exiting process"));
 		KASSERT(STAILQ_EMPTY(&p->p_ktr), ("proc_dtor: non-empty p_ktr"));
 #endif
 		/* Free all OSD associated to this thread. */
 		osd_thread_exit(td);
 		td_softdep_cleanup(td);
 		MPASS(td->td_su == NULL);
 
 		/* Make sure all thread destructors are executed */
 		EVENTHANDLER_DIRECT_INVOKE(thread_dtor, td);
 	}
 	EVENTHANDLER_DIRECT_INVOKE(process_dtor, p);
 #ifdef KDTRACE_HOOKS
 	kdtrace_proc_dtor(p);
 #endif
 	if (p->p_ksi != NULL)
 		KASSERT(! KSI_ONQ(p->p_ksi), ("SIGCHLD queue"));
 }
 
 /*
  * Initialize type-stable parts of a proc (when newly created).
  */
 static int
 proc_init(void *mem, int size, int flags)
 {
 	struct proc *p;
 
 	p = (struct proc *)mem;
 	mtx_init(&p->p_mtx, "process lock", NULL, MTX_DEF | MTX_DUPOK | MTX_NEW);
 	mtx_init(&p->p_slock, "process slock", NULL, MTX_SPIN | MTX_NEW);
 	mtx_init(&p->p_statmtx, "pstatl", NULL, MTX_SPIN | MTX_NEW);
 	mtx_init(&p->p_itimmtx, "pitiml", NULL, MTX_SPIN | MTX_NEW);
 	mtx_init(&p->p_profmtx, "pprofl", NULL, MTX_SPIN | MTX_NEW);
 	cv_init(&p->p_pwait, "ppwait");
 	TAILQ_INIT(&p->p_threads);	     /* all threads in proc */
 	EVENTHANDLER_DIRECT_INVOKE(process_init, p);
 	p->p_stats = pstats_alloc();
 	p->p_pgrp = NULL;
 	return (0);
 }
 
 /*
  * UMA should ensure that this function is never called.
  * Freeing a proc structure would violate type stability.
  */
 static void
 proc_fini(void *mem, int size)
 {
 #ifdef notnow
 	struct proc *p;
 
 	p = (struct proc *)mem;
 	EVENTHANDLER_DIRECT_INVOKE(process_fini, p);
 	pstats_free(p->p_stats);
 	thread_free(FIRST_THREAD_IN_PROC(p));
 	mtx_destroy(&p->p_mtx);
 	if (p->p_ksi != NULL)
 		ksiginfo_free(p->p_ksi);
 #else
 	panic("proc reclaimed");
 #endif
 }
 
 static int
 pgrp_init(void *mem, int size, int flags)
 {
 	struct pgrp *pg;
 
 	pg = mem;
 	mtx_init(&pg->pg_mtx, "process group", NULL, MTX_DEF | MTX_DUPOK);
 	return (0);
 }
 
 /*
  * PID space management.
  *
  * These bitmaps are used by fork_findpid.
  */
 bitstr_t bit_decl(proc_id_pidmap, PID_MAX);
 bitstr_t bit_decl(proc_id_grpidmap, PID_MAX);
 bitstr_t bit_decl(proc_id_sessidmap, PID_MAX);
 bitstr_t bit_decl(proc_id_reapmap, PID_MAX);
 
 static bitstr_t *proc_id_array[] = {
 	proc_id_pidmap,
 	proc_id_grpidmap,
 	proc_id_sessidmap,
 	proc_id_reapmap,
 };
 
 void
 proc_id_set(int type, pid_t id)
 {
 
 	KASSERT(type >= 0 && type < nitems(proc_id_array),
 	    ("invalid type %d\n", type));
 	mtx_lock(&procid_lock);
 	KASSERT(bit_test(proc_id_array[type], id) == 0,
 	    ("bit %d already set in %d\n", id, type));
 	bit_set(proc_id_array[type], id);
 	mtx_unlock(&procid_lock);
 }
 
 void
 proc_id_set_cond(int type, pid_t id)
 {
 
 	KASSERT(type >= 0 && type < nitems(proc_id_array),
 	    ("invalid type %d\n", type));
 	if (bit_test(proc_id_array[type], id))
 		return;
 	mtx_lock(&procid_lock);
 	bit_set(proc_id_array[type], id);
 	mtx_unlock(&procid_lock);
 }
 
 void
 proc_id_clear(int type, pid_t id)
 {
 
 	KASSERT(type >= 0 && type < nitems(proc_id_array),
 	    ("invalid type %d\n", type));
 	mtx_lock(&procid_lock);
 	KASSERT(bit_test(proc_id_array[type], id) != 0,
 	    ("bit %d not set in %d\n", id, type));
 	bit_clear(proc_id_array[type], id);
 	mtx_unlock(&procid_lock);
 }
 
 /*
  * Is p an inferior of the current process?
  */
 int
 inferior(struct proc *p)
 {
 
 	sx_assert(&proctree_lock, SX_LOCKED);
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	for (; p != curproc; p = proc_realparent(p)) {
 		if (p->p_pid == 0)
 			return (0);
 	}
 	return (1);
 }
 
 /*
  * Shared lock all the pid hash lists.
  */
 void
 pidhash_slockall(void)
 {
 	u_long i;
 
 	for (i = 0; i < pidhashlock + 1; i++)
 		sx_slock(&pidhashtbl_lock[i]);
 }
 
 /*
  * Shared unlock all the pid hash lists.
  */
 void
 pidhash_sunlockall(void)
 {
 	u_long i;
 
 	for (i = 0; i < pidhashlock + 1; i++)
 		sx_sunlock(&pidhashtbl_lock[i]);
 }
 
 /*
  * Similar to pfind_any(), this function finds zombies.
  */
 struct proc *
 pfind_any_locked(pid_t pid)
 {
 	struct proc *p;
 
 	sx_assert(PIDHASHLOCK(pid), SX_LOCKED);
 	LIST_FOREACH(p, PIDHASH(pid), p_hash) {
 		if (p->p_pid == pid) {
 			PROC_LOCK(p);
 			if (p->p_state == PRS_NEW) {
 				PROC_UNLOCK(p);
 				p = NULL;
 			}
 			break;
 		}
 	}
 	return (p);
 }
 
 /*
  * Locate a process by number.
  *
  * By not returning processes in the PRS_NEW state, we allow callers to avoid
  * testing for that condition to avoid dereferencing p_ucred, et al.
  */
 static __always_inline struct proc *
 _pfind(pid_t pid, bool zombie)
 {
 	struct proc *p;
 
 	p = curproc;
 	if (p->p_pid == pid) {
 		PROC_LOCK(p);
 		return (p);
 	}
 	sx_slock(PIDHASHLOCK(pid));
 	LIST_FOREACH(p, PIDHASH(pid), p_hash) {
 		if (p->p_pid == pid) {
 			PROC_LOCK(p);
 			if (p->p_state == PRS_NEW ||
 			    (!zombie && p->p_state == PRS_ZOMBIE)) {
 				PROC_UNLOCK(p);
 				p = NULL;
 			}
 			break;
 		}
 	}
 	sx_sunlock(PIDHASHLOCK(pid));
 	return (p);
 }
 
 struct proc *
 pfind(pid_t pid)
 {
 
 	return (_pfind(pid, false));
 }
 
 /*
  * Same as pfind but allow zombies.
  */
 struct proc *
 pfind_any(pid_t pid)
 {
 
 	return (_pfind(pid, true));
 }
 
 /*
  * Locate a process group by number.
  * The caller must hold proctree_lock.
  */
 struct pgrp *
 pgfind(pid_t pgid)
 {
 	struct pgrp *pgrp;
 
 	sx_assert(&proctree_lock, SX_LOCKED);
 
 	LIST_FOREACH(pgrp, PGRPHASH(pgid), pg_hash) {
 		if (pgrp->pg_id == pgid) {
 			PGRP_LOCK(pgrp);
 			return (pgrp);
 		}
 	}
 	return (NULL);
 }
 
 /*
  * Locate process and do additional manipulations, depending on flags.
  */
 int
 pget(pid_t pid, int flags, struct proc **pp)
 {
 	struct proc *p;
 	struct thread *td1;
 	int error;
 
 	p = curproc;
 	if (p->p_pid == pid) {
 		PROC_LOCK(p);
 	} else {
 		p = NULL;
 		if (pid <= PID_MAX) {
 			if ((flags & PGET_NOTWEXIT) == 0)
 				p = pfind_any(pid);
 			else
 				p = pfind(pid);
 		} else if ((flags & PGET_NOTID) == 0) {
 			td1 = tdfind(pid, -1);
 			if (td1 != NULL)
 				p = td1->td_proc;
 		}
 		if (p == NULL)
 			return (ESRCH);
 		if ((flags & PGET_CANSEE) != 0) {
 			error = p_cansee(curthread, p);
 			if (error != 0)
 				goto errout;
 		}
 	}
 	if ((flags & PGET_CANDEBUG) != 0) {
 		error = p_candebug(curthread, p);
 		if (error != 0)
 			goto errout;
 	}
 	if ((flags & PGET_ISCURRENT) != 0 && curproc != p) {
 		error = EPERM;
 		goto errout;
 	}
 	if ((flags & PGET_NOTWEXIT) != 0 && (p->p_flag & P_WEXIT) != 0) {
 		error = ESRCH;
 		goto errout;
 	}
 	if ((flags & PGET_NOTINEXEC) != 0 && (p->p_flag & P_INEXEC) != 0) {
 		/*
 		 * XXXRW: Not clear ESRCH is the right error during proc
 		 * execve().
 		 */
 		error = ESRCH;
 		goto errout;
 	}
 	if ((flags & PGET_HOLD) != 0) {
 		_PHOLD(p);
 		PROC_UNLOCK(p);
 	}
 	*pp = p;
 	return (0);
 errout:
 	PROC_UNLOCK(p);
 	return (error);
 }
 
 /*
  * Create a new process group.
  * pgid must be equal to the pid of p.
  * Begin a new session if required.
  */
 int
 enterpgrp(struct proc *p, pid_t pgid, struct pgrp *pgrp, struct session *sess)
 {
 
 	sx_assert(&proctree_lock, SX_XLOCKED);
 
 	KASSERT(pgrp != NULL, ("enterpgrp: pgrp == NULL"));
 	KASSERT(p->p_pid == pgid,
 	    ("enterpgrp: new pgrp and pid != pgid"));
 	KASSERT(pgfind(pgid) == NULL,
 	    ("enterpgrp: pgrp with pgid exists"));
 	KASSERT(!SESS_LEADER(p),
 	    ("enterpgrp: session leader attempted setpgrp"));
 
 	if (sess != NULL) {
 		/*
 		 * new session
 		 */
 		mtx_init(&sess->s_mtx, "session", NULL, MTX_DEF);
 		PROC_LOCK(p);
 		p->p_flag &= ~P_CONTROLT;
 		PROC_UNLOCK(p);
 		PGRP_LOCK(pgrp);
 		sess->s_leader = p;
 		sess->s_sid = p->p_pid;
 		proc_id_set(PROC_ID_SESSION, p->p_pid);
 		refcount_init(&sess->s_count, 1);
 		sess->s_ttyvp = NULL;
 		sess->s_ttydp = NULL;
 		sess->s_ttyp = NULL;
 		bcopy(p->p_session->s_login, sess->s_login,
 			    sizeof(sess->s_login));
 		pgrp->pg_session = sess;
 		KASSERT(p == curproc,
 		    ("enterpgrp: mksession and p != curproc"));
 	} else {
 		pgrp->pg_session = p->p_session;
 		sess_hold(pgrp->pg_session);
 		PGRP_LOCK(pgrp);
 	}
 	pgrp->pg_id = pgid;
 	proc_id_set(PROC_ID_GROUP, p->p_pid);
 	LIST_INIT(&pgrp->pg_members);
 	pgrp->pg_flags = 0;
 
 	/*
 	 * As we have an exclusive lock of proctree_lock,
 	 * this should not deadlock.
 	 */
 	LIST_INSERT_HEAD(PGRPHASH(pgid), pgrp, pg_hash);
 	SLIST_INIT(&pgrp->pg_sigiolst);
 	PGRP_UNLOCK(pgrp);
 
 	doenterpgrp(p, pgrp);
 
 	return (0);
 }
 
 /*
  * Move p to an existing process group
  */
 int
 enterthispgrp(struct proc *p, struct pgrp *pgrp)
 {
 
 	sx_assert(&proctree_lock, SX_XLOCKED);
 	PROC_LOCK_ASSERT(p, MA_NOTOWNED);
 	PGRP_LOCK_ASSERT(pgrp, MA_NOTOWNED);
 	PGRP_LOCK_ASSERT(p->p_pgrp, MA_NOTOWNED);
 	SESS_LOCK_ASSERT(p->p_session, MA_NOTOWNED);
 	KASSERT(pgrp->pg_session == p->p_session,
 	    ("%s: pgrp's session %p, p->p_session %p proc %p\n",
 	    __func__, pgrp->pg_session, p->p_session, p));
 	KASSERT(pgrp != p->p_pgrp,
 	    ("%s: p %p belongs to pgrp %p", __func__, p, pgrp));
 
 	doenterpgrp(p, pgrp);
 
 	return (0);
 }
 
 /*
  * If true, any child of q which belongs to group pgrp, qualifies the
  * process group pgrp as not orphaned.
  */
 static bool
 isjobproc(struct proc *q, struct pgrp *pgrp)
 {
 	sx_assert(&proctree_lock, SX_LOCKED);
 
 	return (q->p_pgrp != pgrp &&
 	    q->p_pgrp->pg_session == pgrp->pg_session);
 }
 
 static struct proc *
 jobc_reaper(struct proc *p)
 {
 	struct proc *pp;
 
 	sx_assert(&proctree_lock, SA_LOCKED);
 
 	for (pp = p;;) {
 		pp = pp->p_reaper;
 		if (pp->p_reaper == pp ||
 		    (pp->p_treeflag & P_TREE_GRPEXITED) == 0)
 			return (pp);
 	}
 }
 
 static struct proc *
 jobc_parent(struct proc *p, struct proc *p_exiting)
 {
 	struct proc *pp;
 
 	sx_assert(&proctree_lock, SA_LOCKED);
 
 	pp = proc_realparent(p);
 	if (pp->p_pptr == NULL || pp == p_exiting ||
 	    (pp->p_treeflag & P_TREE_GRPEXITED) == 0)
 		return (pp);
 	return (jobc_reaper(pp));
 }
 
 static int
 pgrp_calc_jobc(struct pgrp *pgrp)
 {
 	struct proc *q;
 	int cnt;
 
 #ifdef INVARIANTS
 	if (!mtx_owned(&pgrp->pg_mtx))
 		sx_assert(&proctree_lock, SA_LOCKED);
 #endif
 
 	cnt = 0;
 	LIST_FOREACH(q, &pgrp->pg_members, p_pglist) {
 		if ((q->p_treeflag & P_TREE_GRPEXITED) != 0 ||
 		    q->p_pptr == NULL)
 			continue;
 		if (isjobproc(jobc_parent(q, NULL), pgrp))
 			cnt++;
 	}
 	return (cnt);
 }
 
 /*
  * Move p to a process group
  */
 static void
 doenterpgrp(struct proc *p, struct pgrp *pgrp)
 {
 	struct pgrp *savepgrp;
 	struct proc *pp;
 
 	sx_assert(&proctree_lock, SX_XLOCKED);
 	PROC_LOCK_ASSERT(p, MA_NOTOWNED);
 	PGRP_LOCK_ASSERT(pgrp, MA_NOTOWNED);
 	PGRP_LOCK_ASSERT(p->p_pgrp, MA_NOTOWNED);
 	SESS_LOCK_ASSERT(p->p_session, MA_NOTOWNED);
 
 	savepgrp = p->p_pgrp;
 	pp = jobc_parent(p, NULL);
 
 	PGRP_LOCK(pgrp);
 	PGRP_LOCK(savepgrp);
 	if (isjobproc(pp, savepgrp) && pgrp_calc_jobc(savepgrp) == 1)
 		orphanpg(savepgrp);
 	PROC_LOCK(p);
 	LIST_REMOVE(p, p_pglist);
 	p->p_pgrp = pgrp;
 	PROC_UNLOCK(p);
 	LIST_INSERT_HEAD(&pgrp->pg_members, p, p_pglist);
 	if (isjobproc(pp, pgrp))
 		pgrp->pg_flags &= ~PGRP_ORPHANED;
 	PGRP_UNLOCK(savepgrp);
 	PGRP_UNLOCK(pgrp);
 	if (LIST_EMPTY(&savepgrp->pg_members))
 		pgdelete(savepgrp);
 }
 
 /*
  * remove process from process group
  */
 int
 leavepgrp(struct proc *p)
 {
 	struct pgrp *savepgrp;
 
 	sx_assert(&proctree_lock, SX_XLOCKED);
 	savepgrp = p->p_pgrp;
 	PGRP_LOCK(savepgrp);
 	PROC_LOCK(p);
 	LIST_REMOVE(p, p_pglist);
 	p->p_pgrp = NULL;
 	PROC_UNLOCK(p);
 	PGRP_UNLOCK(savepgrp);
 	if (LIST_EMPTY(&savepgrp->pg_members))
 		pgdelete(savepgrp);
 	return (0);
 }
 
 /*
  * delete a process group
  */
 static void
 pgdelete(struct pgrp *pgrp)
 {
 	struct session *savesess;
 	struct tty *tp;
 
 	sx_assert(&proctree_lock, SX_XLOCKED);
 	PGRP_LOCK_ASSERT(pgrp, MA_NOTOWNED);
 	SESS_LOCK_ASSERT(pgrp->pg_session, MA_NOTOWNED);
 
 	/*
 	 * Reset any sigio structures pointing to us as a result of
 	 * F_SETOWN with our pgid.  The proctree lock ensures that
 	 * new sigio structures will not be added after this point.
 	 */
 	funsetownlst(&pgrp->pg_sigiolst);
 
 	PGRP_LOCK(pgrp);
 	tp = pgrp->pg_session->s_ttyp;
 	LIST_REMOVE(pgrp, pg_hash);
 	savesess = pgrp->pg_session;
 	PGRP_UNLOCK(pgrp);
 
 	/* Remove the reference to the pgrp before deallocating it. */
 	if (tp != NULL) {
 		tty_lock(tp);
 		tty_rel_pgrp(tp, pgrp);
 	}
 
 	proc_id_clear(PROC_ID_GROUP, pgrp->pg_id);
 	uma_zfree(pgrp_zone, pgrp);
 	sess_release(savesess);
 }
 
 
 static void
 fixjobc_kill(struct proc *p)
 {
 	struct proc *q;
 	struct pgrp *pgrp;
 
 	sx_assert(&proctree_lock, SX_LOCKED);
 	PROC_LOCK_ASSERT(p, MA_NOTOWNED);
 	pgrp = p->p_pgrp;
 	PGRP_LOCK_ASSERT(pgrp, MA_NOTOWNED);
 	SESS_LOCK_ASSERT(pgrp->pg_session, MA_NOTOWNED);
 
 	/*
 	 * p no longer affects process group orphanage for children.
 	 * It is marked by the flag because p is only physically
 	 * removed from its process group on wait(2).
 	 */
 	MPASS((p->p_treeflag & P_TREE_GRPEXITED) == 0);
 	p->p_treeflag |= P_TREE_GRPEXITED;
 
 	/*
 	 * Check if exiting p orphans its own group.
 	 */
 	pgrp = p->p_pgrp;
 	if (isjobproc(jobc_parent(p, NULL), pgrp)) {
 		PGRP_LOCK(pgrp);
 		if (pgrp_calc_jobc(pgrp) == 0)
 			orphanpg(pgrp);
 		PGRP_UNLOCK(pgrp);
 	}
 
 	/*
 	 * Check this process' children to see whether they qualify
 	 * their process groups after reparenting to reaper.
 	 */
 	LIST_FOREACH(q, &p->p_children, p_sibling) {
 		pgrp = q->p_pgrp;
 		PGRP_LOCK(pgrp);
 		if (pgrp_calc_jobc(pgrp) == 0) {
 			/*
 			 * We want to handle exactly the children that
 			 * has p as realparent.  Then, when calculating
 			 * jobc_parent for children, we should ignore
 			 * P_TREE_GRPEXITED flag already set on p.
 			 */
 			if (jobc_parent(q, p) == p && isjobproc(p, pgrp))
 				orphanpg(pgrp);
 		} else
 			pgrp->pg_flags &= ~PGRP_ORPHANED;
 		PGRP_UNLOCK(pgrp);
 	}
 	LIST_FOREACH(q, &p->p_orphans, p_orphan) {
 		pgrp = q->p_pgrp;
 		PGRP_LOCK(pgrp);
 		if (pgrp_calc_jobc(pgrp) == 0) {
 			if (isjobproc(p, pgrp))
 				orphanpg(pgrp);
 		} else
 			pgrp->pg_flags &= ~PGRP_ORPHANED;
 		PGRP_UNLOCK(pgrp);
 	}
 }
 
 void
 killjobc(void)
 {
 	struct session *sp;
 	struct tty *tp;
 	struct proc *p;
 	struct vnode *ttyvp;
 
 	p = curproc;
 	MPASS(p->p_flag & P_WEXIT);
 	sx_assert(&proctree_lock, SX_LOCKED);
 
 	if (SESS_LEADER(p)) {
 		sp = p->p_session;
 
 		/*
 		 * s_ttyp is not zero'd; we use this to indicate that
 		 * the session once had a controlling terminal. (for
 		 * logging and informational purposes)
 		 */
 		SESS_LOCK(sp);
 		ttyvp = sp->s_ttyvp;
 		tp = sp->s_ttyp;
 		sp->s_ttyvp = NULL;
 		sp->s_ttydp = NULL;
 		sp->s_leader = NULL;
 		SESS_UNLOCK(sp);
 
 		/*
 		 * Signal foreground pgrp and revoke access to
 		 * controlling terminal if it has not been revoked
 		 * already.
 		 *
 		 * Because the TTY may have been revoked in the mean
 		 * time and could already have a new session associated
 		 * with it, make sure we don't send a SIGHUP to a
 		 * foreground process group that does not belong to this
 		 * session.
 		 */
 
 		if (tp != NULL) {
 			tty_lock(tp);
 			if (tp->t_session == sp)
 				tty_signal_pgrp(tp, SIGHUP);
 			tty_unlock(tp);
 		}
 
 		if (ttyvp != NULL) {
 			sx_xunlock(&proctree_lock);
 			if (vn_lock(ttyvp, LK_EXCLUSIVE) == 0) {
 				VOP_REVOKE(ttyvp, REVOKEALL);
 				VOP_UNLOCK(ttyvp);
 			}
 			devfs_ctty_unref(ttyvp);
 			sx_xlock(&proctree_lock);
 		}
 	}
 	fixjobc_kill(p);
 }
 
 /*
  * A process group has become orphaned, mark it as such for signal
  * delivery code.  If there are any stopped processes in the group,
  * hang-up all process in that group.
  */
 static void
 orphanpg(struct pgrp *pg)
 {
 	struct proc *p;
 
 	PGRP_LOCK_ASSERT(pg, MA_OWNED);
 
 	pg->pg_flags |= PGRP_ORPHANED;
 
 	LIST_FOREACH(p, &pg->pg_members, p_pglist) {
 		PROC_LOCK(p);
 		if (P_SHOULDSTOP(p) == P_STOPPED_SIG) {
 			PROC_UNLOCK(p);
 			LIST_FOREACH(p, &pg->pg_members, p_pglist) {
 				PROC_LOCK(p);
 				kern_psignal(p, SIGHUP);
 				kern_psignal(p, SIGCONT);
 				PROC_UNLOCK(p);
 			}
 			return;
 		}
 		PROC_UNLOCK(p);
 	}
 }
 
 void
 sess_hold(struct session *s)
 {
 
 	refcount_acquire(&s->s_count);
 }
 
 void
 sess_release(struct session *s)
 {
 
 	if (refcount_release(&s->s_count)) {
 		if (s->s_ttyp != NULL) {
 			tty_lock(s->s_ttyp);
 			tty_rel_sess(s->s_ttyp, s);
 		}
 		proc_id_clear(PROC_ID_SESSION, s->s_sid);
 		mtx_destroy(&s->s_mtx);
 		free(s, M_SESSION);
 	}
 }
 
 #ifdef DDB
 
 static void
 db_print_pgrp_one(struct pgrp *pgrp, struct proc *p)
 {
 	db_printf(
 	    "    pid %d at %p pr %d pgrp %p e %d jc %d\n",
 	    p->p_pid, p, p->p_pptr == NULL ? -1 : p->p_pptr->p_pid,
 	    p->p_pgrp, (p->p_treeflag & P_TREE_GRPEXITED) != 0,
 	    p->p_pptr == NULL ? 0 : isjobproc(p->p_pptr, pgrp));
 }
 
 DB_SHOW_COMMAND(pgrpdump, pgrpdump)
 {
 	struct pgrp *pgrp;
 	struct proc *p;
 	int i;
 
 	for (i = 0; i <= pgrphash; i++) {
 		if (!LIST_EMPTY(&pgrphashtbl[i])) {
 			db_printf("indx %d\n", i);
 			LIST_FOREACH(pgrp, &pgrphashtbl[i], pg_hash) {
 				db_printf(
 			"  pgrp %p, pgid %d, sess %p, sesscnt %d, mem %p\n",
 				    pgrp, (int)pgrp->pg_id, pgrp->pg_session,
 				    pgrp->pg_session->s_count,
 				    LIST_FIRST(&pgrp->pg_members));
 				LIST_FOREACH(p, &pgrp->pg_members, p_pglist)
 					db_print_pgrp_one(pgrp, p);
 			}
 		}
 	}
 }
 #endif /* DDB */
 
 /*
  * Calculate the kinfo_proc members which contain process-wide
  * informations.
  * Must be called with the target process locked.
  */
 static void
 fill_kinfo_aggregate(struct proc *p, struct kinfo_proc *kp)
 {
 	struct thread *td;
 
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 
 	kp->ki_estcpu = 0;
 	kp->ki_pctcpu = 0;
 	FOREACH_THREAD_IN_PROC(p, td) {
 		thread_lock(td);
 		kp->ki_pctcpu += sched_pctcpu(td);
 		kp->ki_estcpu += sched_estcpu(td);
 		thread_unlock(td);
 	}
 }
 
 /*
  * Fill in any information that is common to all threads in the process.
  * Must be called with the target process locked.
  */
 static void
 fill_kinfo_proc_only(struct proc *p, struct kinfo_proc *kp)
 {
 	struct thread *td0;
 	struct ucred *cred;
 	struct sigacts *ps;
 	struct timeval boottime;
 
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 
 	kp->ki_structsize = sizeof(*kp);
 	kp->ki_paddr = p;
 	kp->ki_addr =/* p->p_addr; */0; /* XXX */
 	kp->ki_args = p->p_args;
 	kp->ki_textvp = p->p_textvp;
 #ifdef KTRACE
 	kp->ki_tracep = ktr_get_tracevp(p, false);
 	kp->ki_traceflag = p->p_traceflag;
 #endif
 	kp->ki_fd = p->p_fd;
 	kp->ki_pd = p->p_pd;
 	kp->ki_vmspace = p->p_vmspace;
 	kp->ki_flag = p->p_flag;
 	kp->ki_flag2 = p->p_flag2;
 	cred = p->p_ucred;
 	if (cred) {
 		kp->ki_uid = cred->cr_uid;
 		kp->ki_ruid = cred->cr_ruid;
 		kp->ki_svuid = cred->cr_svuid;
 		kp->ki_cr_flags = 0;
 		if (cred->cr_flags & CRED_FLAG_CAPMODE)
 			kp->ki_cr_flags |= KI_CRF_CAPABILITY_MODE;
 		/* XXX bde doesn't like KI_NGROUPS */
 		if (cred->cr_ngroups > KI_NGROUPS) {
 			kp->ki_ngroups = KI_NGROUPS;
 			kp->ki_cr_flags |= KI_CRF_GRP_OVERFLOW;
 		} else
 			kp->ki_ngroups = cred->cr_ngroups;
 		bcopy(cred->cr_groups, kp->ki_groups,
 		    kp->ki_ngroups * sizeof(gid_t));
 		kp->ki_rgid = cred->cr_rgid;
 		kp->ki_svgid = cred->cr_svgid;
 		/* If jailed(cred), emulate the old P_JAILED flag. */
 		if (jailed(cred)) {
 			kp->ki_flag |= P_JAILED;
 			/* If inside the jail, use 0 as a jail ID. */
 			if (cred->cr_prison != curthread->td_ucred->cr_prison)
 				kp->ki_jid = cred->cr_prison->pr_id;
 		}
 		strlcpy(kp->ki_loginclass, cred->cr_loginclass->lc_name,
 		    sizeof(kp->ki_loginclass));
 	}
 	ps = p->p_sigacts;
 	if (ps) {
 		mtx_lock(&ps->ps_mtx);
 		kp->ki_sigignore = ps->ps_sigignore;
 		kp->ki_sigcatch = ps->ps_sigcatch;
 		mtx_unlock(&ps->ps_mtx);
 	}
 	if (p->p_state != PRS_NEW &&
 	    p->p_state != PRS_ZOMBIE &&
 	    p->p_vmspace != NULL) {
 		struct vmspace *vm = p->p_vmspace;
 
 		kp->ki_size = vm->vm_map.size;
 		kp->ki_rssize = vmspace_resident_count(vm); /*XXX*/
 		FOREACH_THREAD_IN_PROC(p, td0) {
 			if (!TD_IS_SWAPPED(td0))
 				kp->ki_rssize += td0->td_kstack_pages;
 		}
 		kp->ki_swrss = vm->vm_swrss;
 		kp->ki_tsize = vm->vm_tsize;
 		kp->ki_dsize = vm->vm_dsize;
 		kp->ki_ssize = vm->vm_ssize;
 	} else if (p->p_state == PRS_ZOMBIE)
 		kp->ki_stat = SZOMB;
 	if (kp->ki_flag & P_INMEM)
 		kp->ki_sflag = PS_INMEM;
 	else
 		kp->ki_sflag = 0;
 	/* Calculate legacy swtime as seconds since 'swtick'. */
 	kp->ki_swtime = (ticks - p->p_swtick) / hz;
 	kp->ki_pid = p->p_pid;
 	kp->ki_nice = p->p_nice;
 	kp->ki_fibnum = p->p_fibnum;
 	kp->ki_start = p->p_stats->p_start;
 	getboottime(&boottime);
 	timevaladd(&kp->ki_start, &boottime);
 	PROC_STATLOCK(p);
 	rufetch(p, &kp->ki_rusage);
 	kp->ki_runtime = cputick2usec(p->p_rux.rux_runtime);
 	calcru(p, &kp->ki_rusage.ru_utime, &kp->ki_rusage.ru_stime);
 	PROC_STATUNLOCK(p);
 	calccru(p, &kp->ki_childutime, &kp->ki_childstime);
 	/* Some callers want child times in a single value. */
 	kp->ki_childtime = kp->ki_childstime;
 	timevaladd(&kp->ki_childtime, &kp->ki_childutime);
 
 	FOREACH_THREAD_IN_PROC(p, td0)
 		kp->ki_cow += td0->td_cow;
 
 	if (p->p_comm[0] != '\0')
 		strlcpy(kp->ki_comm, p->p_comm, sizeof(kp->ki_comm));
 	if (p->p_sysent && p->p_sysent->sv_name != NULL &&
 	    p->p_sysent->sv_name[0] != '\0')
 		strlcpy(kp->ki_emul, p->p_sysent->sv_name, sizeof(kp->ki_emul));
 	kp->ki_siglist = p->p_siglist;
 	kp->ki_xstat = KW_EXITCODE(p->p_xexit, p->p_xsig);
 	kp->ki_acflag = p->p_acflag;
 	kp->ki_lock = p->p_lock;
 	if (p->p_pptr) {
 		kp->ki_ppid = p->p_oppid;
 		if (p->p_flag & P_TRACED)
 			kp->ki_tracer = p->p_pptr->p_pid;
 	}
 }
 
 /*
  * Fill job-related process information.
  */
 static void
 fill_kinfo_proc_pgrp(struct proc *p, struct kinfo_proc *kp)
 {
 	struct tty *tp;
 	struct session *sp;
 	struct pgrp *pgrp;
 
 	sx_assert(&proctree_lock, SA_LOCKED);
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 
 	pgrp = p->p_pgrp;
 	if (pgrp == NULL)
 		return;
 
 	kp->ki_pgid = pgrp->pg_id;
 	kp->ki_jobc = pgrp_calc_jobc(pgrp);
 
 	sp = pgrp->pg_session;
 	tp = NULL;
 
 	if (sp != NULL) {
 		kp->ki_sid = sp->s_sid;
 		SESS_LOCK(sp);
 		strlcpy(kp->ki_login, sp->s_login, sizeof(kp->ki_login));
 		if (sp->s_ttyvp)
 			kp->ki_kiflag |= KI_CTTY;
 		if (SESS_LEADER(p))
 			kp->ki_kiflag |= KI_SLEADER;
 		tp = sp->s_ttyp;
 		SESS_UNLOCK(sp);
 	}
 
 	if ((p->p_flag & P_CONTROLT) && tp != NULL) {
 		kp->ki_tdev = tty_udev(tp);
 		kp->ki_tdev_freebsd11 = kp->ki_tdev; /* truncate */
 		kp->ki_tpgid = tp->t_pgrp ? tp->t_pgrp->pg_id : NO_PID;
 		if (tp->t_session)
 			kp->ki_tsid = tp->t_session->s_sid;
 	} else {
 		kp->ki_tdev = NODEV;
 		kp->ki_tdev_freebsd11 = kp->ki_tdev; /* truncate */
 	}
 }
 
 /*
  * Fill in information that is thread specific.  Must be called with
  * target process locked.  If 'preferthread' is set, overwrite certain
  * process-related fields that are maintained for both threads and
  * processes.
  */
 static void
 fill_kinfo_thread(struct thread *td, struct kinfo_proc *kp, int preferthread)
 {
 	struct proc *p;
 
 	p = td->td_proc;
 	kp->ki_tdaddr = td;
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 
 	if (preferthread)
 		PROC_STATLOCK(p);
 	thread_lock(td);
 	if (td->td_wmesg != NULL)
 		strlcpy(kp->ki_wmesg, td->td_wmesg, sizeof(kp->ki_wmesg));
 	else
 		bzero(kp->ki_wmesg, sizeof(kp->ki_wmesg));
 	if (strlcpy(kp->ki_tdname, td->td_name, sizeof(kp->ki_tdname)) >=
 	    sizeof(kp->ki_tdname)) {
 		strlcpy(kp->ki_moretdname,
 		    td->td_name + sizeof(kp->ki_tdname) - 1,
 		    sizeof(kp->ki_moretdname));
 	} else {
 		bzero(kp->ki_moretdname, sizeof(kp->ki_moretdname));
 	}
 	if (TD_ON_LOCK(td)) {
 		kp->ki_kiflag |= KI_LOCKBLOCK;
 		strlcpy(kp->ki_lockname, td->td_lockname,
 		    sizeof(kp->ki_lockname));
 	} else {
 		kp->ki_kiflag &= ~KI_LOCKBLOCK;
 		bzero(kp->ki_lockname, sizeof(kp->ki_lockname));
 	}
 
 	if (p->p_state == PRS_NORMAL) { /* approximate. */
 		if (TD_ON_RUNQ(td) ||
 		    TD_CAN_RUN(td) ||
 		    TD_IS_RUNNING(td)) {
 			kp->ki_stat = SRUN;
 		} else if (P_SHOULDSTOP(p)) {
 			kp->ki_stat = SSTOP;
 		} else if (TD_IS_SLEEPING(td)) {
 			kp->ki_stat = SSLEEP;
 		} else if (TD_ON_LOCK(td)) {
 			kp->ki_stat = SLOCK;
 		} else {
 			kp->ki_stat = SWAIT;
 		}
 	} else if (p->p_state == PRS_ZOMBIE) {
 		kp->ki_stat = SZOMB;
 	} else {
 		kp->ki_stat = SIDL;
 	}
 
 	/* Things in the thread */
 	kp->ki_wchan = td->td_wchan;
 	kp->ki_pri.pri_level = td->td_priority;
 	kp->ki_pri.pri_native = td->td_base_pri;
 
 	/*
 	 * Note: legacy fields; clamp at the old NOCPU value and/or
 	 * the maximum u_char CPU value.
 	 */
 	if (td->td_lastcpu == NOCPU)
 		kp->ki_lastcpu_old = NOCPU_OLD;
 	else if (td->td_lastcpu > MAXCPU_OLD)
 		kp->ki_lastcpu_old = MAXCPU_OLD;
 	else
 		kp->ki_lastcpu_old = td->td_lastcpu;
 
 	if (td->td_oncpu == NOCPU)
 		kp->ki_oncpu_old = NOCPU_OLD;
 	else if (td->td_oncpu > MAXCPU_OLD)
 		kp->ki_oncpu_old = MAXCPU_OLD;
 	else
 		kp->ki_oncpu_old = td->td_oncpu;
 
 	kp->ki_lastcpu = td->td_lastcpu;
 	kp->ki_oncpu = td->td_oncpu;
 	kp->ki_tdflags = td->td_flags;
 	kp->ki_tid = td->td_tid;
 	kp->ki_numthreads = p->p_numthreads;
 	kp->ki_pcb = td->td_pcb;
 	kp->ki_kstack = (void *)td->td_kstack;
 	kp->ki_slptime = (ticks - td->td_slptick) / hz;
 	kp->ki_pri.pri_class = td->td_pri_class;
 	kp->ki_pri.pri_user = td->td_user_pri;
 
 	if (preferthread) {
 		rufetchtd(td, &kp->ki_rusage);
 		kp->ki_runtime = cputick2usec(td->td_rux.rux_runtime);
 		kp->ki_pctcpu = sched_pctcpu(td);
 		kp->ki_estcpu = sched_estcpu(td);
 		kp->ki_cow = td->td_cow;
 	}
 
 	/* We can't get this anymore but ps etc never used it anyway. */
 	kp->ki_rqindex = 0;
 
 	if (preferthread)
 		kp->ki_siglist = td->td_siglist;
 	kp->ki_sigmask = td->td_sigmask;
 	thread_unlock(td);
 	if (preferthread)
 		PROC_STATUNLOCK(p);
 }
 
 /*
  * Fill in a kinfo_proc structure for the specified process.
  * Must be called with the target process locked.
  */
 void
 fill_kinfo_proc(struct proc *p, struct kinfo_proc *kp)
 {
 	MPASS(FIRST_THREAD_IN_PROC(p) != NULL);
 
 	bzero(kp, sizeof(*kp));
 
 	fill_kinfo_proc_pgrp(p,kp);
 	fill_kinfo_proc_only(p, kp);
 	fill_kinfo_thread(FIRST_THREAD_IN_PROC(p), kp, 0);
 	fill_kinfo_aggregate(p, kp);
 }
 
 struct pstats *
 pstats_alloc(void)
 {
 
 	return (malloc(sizeof(struct pstats), M_SUBPROC, M_ZERO|M_WAITOK));
 }
 
 /*
  * Copy parts of p_stats; zero the rest of p_stats (statistics).
  */
 void
 pstats_fork(struct pstats *src, struct pstats *dst)
 {
 
 	bzero(&dst->pstat_startzero,
 	    __rangeof(struct pstats, pstat_startzero, pstat_endzero));
 	bcopy(&src->pstat_startcopy, &dst->pstat_startcopy,
 	    __rangeof(struct pstats, pstat_startcopy, pstat_endcopy));
 }
 
 void
 pstats_free(struct pstats *ps)
 {
 
 	free(ps, M_SUBPROC);
 }
 
 #ifdef COMPAT_FREEBSD32
 
 /*
  * This function is typically used to copy out the kernel address, so
  * it can be replaced by assignment of zero.
  */
 static inline uint32_t
 ptr32_trim(const void *ptr)
 {
 	uintptr_t uptr;
 
 	uptr = (uintptr_t)ptr;
 	return ((uptr > UINT_MAX) ? 0 : uptr);
 }
 
 #define PTRTRIM_CP(src,dst,fld) \
 	do { (dst).fld = ptr32_trim((src).fld); } while (0)
 
 static void
 freebsd32_kinfo_proc_out(const struct kinfo_proc *ki, struct kinfo_proc32 *ki32)
 {
 	int i;
 
 	bzero(ki32, sizeof(struct kinfo_proc32));
 	ki32->ki_structsize = sizeof(struct kinfo_proc32);
 	CP(*ki, *ki32, ki_layout);
 	PTRTRIM_CP(*ki, *ki32, ki_args);
 	PTRTRIM_CP(*ki, *ki32, ki_paddr);
 	PTRTRIM_CP(*ki, *ki32, ki_addr);
 	PTRTRIM_CP(*ki, *ki32, ki_tracep);
 	PTRTRIM_CP(*ki, *ki32, ki_textvp);
 	PTRTRIM_CP(*ki, *ki32, ki_fd);
 	PTRTRIM_CP(*ki, *ki32, ki_vmspace);
 	PTRTRIM_CP(*ki, *ki32, ki_wchan);
 	CP(*ki, *ki32, ki_pid);
 	CP(*ki, *ki32, ki_ppid);
 	CP(*ki, *ki32, ki_pgid);
 	CP(*ki, *ki32, ki_tpgid);
 	CP(*ki, *ki32, ki_sid);
 	CP(*ki, *ki32, ki_tsid);
 	CP(*ki, *ki32, ki_jobc);
 	CP(*ki, *ki32, ki_tdev);
 	CP(*ki, *ki32, ki_tdev_freebsd11);
 	CP(*ki, *ki32, ki_siglist);
 	CP(*ki, *ki32, ki_sigmask);
 	CP(*ki, *ki32, ki_sigignore);
 	CP(*ki, *ki32, ki_sigcatch);
 	CP(*ki, *ki32, ki_uid);
 	CP(*ki, *ki32, ki_ruid);
 	CP(*ki, *ki32, ki_svuid);
 	CP(*ki, *ki32, ki_rgid);
 	CP(*ki, *ki32, ki_svgid);
 	CP(*ki, *ki32, ki_ngroups);
 	for (i = 0; i < KI_NGROUPS; i++)
 		CP(*ki, *ki32, ki_groups[i]);
 	CP(*ki, *ki32, ki_size);
 	CP(*ki, *ki32, ki_rssize);
 	CP(*ki, *ki32, ki_swrss);
 	CP(*ki, *ki32, ki_tsize);
 	CP(*ki, *ki32, ki_dsize);
 	CP(*ki, *ki32, ki_ssize);
 	CP(*ki, *ki32, ki_xstat);
 	CP(*ki, *ki32, ki_acflag);
 	CP(*ki, *ki32, ki_pctcpu);
 	CP(*ki, *ki32, ki_estcpu);
 	CP(*ki, *ki32, ki_slptime);
 	CP(*ki, *ki32, ki_swtime);
 	CP(*ki, *ki32, ki_cow);
 	CP(*ki, *ki32, ki_runtime);
 	TV_CP(*ki, *ki32, ki_start);
 	TV_CP(*ki, *ki32, ki_childtime);
 	CP(*ki, *ki32, ki_flag);
 	CP(*ki, *ki32, ki_kiflag);
 	CP(*ki, *ki32, ki_traceflag);
 	CP(*ki, *ki32, ki_stat);
 	CP(*ki, *ki32, ki_nice);
 	CP(*ki, *ki32, ki_lock);
 	CP(*ki, *ki32, ki_rqindex);
 	CP(*ki, *ki32, ki_oncpu);
 	CP(*ki, *ki32, ki_lastcpu);
 
 	/* XXX TODO: wrap cpu value as appropriate */
 	CP(*ki, *ki32, ki_oncpu_old);
 	CP(*ki, *ki32, ki_lastcpu_old);
 
 	bcopy(ki->ki_tdname, ki32->ki_tdname, TDNAMLEN + 1);
 	bcopy(ki->ki_wmesg, ki32->ki_wmesg, WMESGLEN + 1);
 	bcopy(ki->ki_login, ki32->ki_login, LOGNAMELEN + 1);
 	bcopy(ki->ki_lockname, ki32->ki_lockname, LOCKNAMELEN + 1);
 	bcopy(ki->ki_comm, ki32->ki_comm, COMMLEN + 1);
 	bcopy(ki->ki_emul, ki32->ki_emul, KI_EMULNAMELEN + 1);
 	bcopy(ki->ki_loginclass, ki32->ki_loginclass, LOGINCLASSLEN + 1);
 	bcopy(ki->ki_moretdname, ki32->ki_moretdname, MAXCOMLEN - TDNAMLEN + 1);
 	CP(*ki, *ki32, ki_tracer);
 	CP(*ki, *ki32, ki_flag2);
 	CP(*ki, *ki32, ki_fibnum);
 	CP(*ki, *ki32, ki_cr_flags);
 	CP(*ki, *ki32, ki_jid);
 	CP(*ki, *ki32, ki_numthreads);
 	CP(*ki, *ki32, ki_tid);
 	CP(*ki, *ki32, ki_pri);
 	freebsd32_rusage_out(&ki->ki_rusage, &ki32->ki_rusage);
 	freebsd32_rusage_out(&ki->ki_rusage_ch, &ki32->ki_rusage_ch);
 	PTRTRIM_CP(*ki, *ki32, ki_pcb);
 	PTRTRIM_CP(*ki, *ki32, ki_kstack);
 	PTRTRIM_CP(*ki, *ki32, ki_udata);
 	PTRTRIM_CP(*ki, *ki32, ki_tdaddr);
 	CP(*ki, *ki32, ki_sflag);
 	CP(*ki, *ki32, ki_tdflags);
 }
 #endif
 
 static ssize_t
 kern_proc_out_size(struct proc *p, int flags)
 {
 	ssize_t size = 0;
 
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 
 	if ((flags & KERN_PROC_NOTHREADS) != 0) {
 #ifdef COMPAT_FREEBSD32
 		if ((flags & KERN_PROC_MASK32) != 0) {
 			size += sizeof(struct kinfo_proc32);
 		} else
 #endif
 			size += sizeof(struct kinfo_proc);
 	} else {
 #ifdef COMPAT_FREEBSD32
 		if ((flags & KERN_PROC_MASK32) != 0)
 			size += sizeof(struct kinfo_proc32) * p->p_numthreads;
 		else
 #endif
 			size += sizeof(struct kinfo_proc) * p->p_numthreads;
 	}
 	PROC_UNLOCK(p);
 	return (size);
 }
 
 int
 kern_proc_out(struct proc *p, struct sbuf *sb, int flags)
 {
 	struct thread *td;
 	struct kinfo_proc ki;
 #ifdef COMPAT_FREEBSD32
 	struct kinfo_proc32 ki32;
 #endif
 	int error;
 
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	MPASS(FIRST_THREAD_IN_PROC(p) != NULL);
 
 	error = 0;
 	fill_kinfo_proc(p, &ki);
 	if ((flags & KERN_PROC_NOTHREADS) != 0) {
 #ifdef COMPAT_FREEBSD32
 		if ((flags & KERN_PROC_MASK32) != 0) {
 			freebsd32_kinfo_proc_out(&ki, &ki32);
 			if (sbuf_bcat(sb, &ki32, sizeof(ki32)) != 0)
 				error = ENOMEM;
 		} else
 #endif
 			if (sbuf_bcat(sb, &ki, sizeof(ki)) != 0)
 				error = ENOMEM;
 	} else {
 		FOREACH_THREAD_IN_PROC(p, td) {
 			fill_kinfo_thread(td, &ki, 1);
 #ifdef COMPAT_FREEBSD32
 			if ((flags & KERN_PROC_MASK32) != 0) {
 				freebsd32_kinfo_proc_out(&ki, &ki32);
 				if (sbuf_bcat(sb, &ki32, sizeof(ki32)) != 0)
 					error = ENOMEM;
 			} else
 #endif
 				if (sbuf_bcat(sb, &ki, sizeof(ki)) != 0)
 					error = ENOMEM;
 			if (error != 0)
 				break;
 		}
 	}
 	PROC_UNLOCK(p);
 	return (error);
 }
 
 static int
 sysctl_out_proc(struct proc *p, struct sysctl_req *req, int flags)
 {
 	struct sbuf sb;
 	struct kinfo_proc ki;
 	int error, error2;
 
 	if (req->oldptr == NULL)
 		return (SYSCTL_OUT(req, 0, kern_proc_out_size(p, flags)));
 
 	sbuf_new_for_sysctl(&sb, (char *)&ki, sizeof(ki), req);
 	sbuf_clear_flags(&sb, SBUF_INCLUDENUL);
 	error = kern_proc_out(p, &sb, flags);
 	error2 = sbuf_finish(&sb);
 	sbuf_delete(&sb);
 	if (error != 0)
 		return (error);
 	else if (error2 != 0)
 		return (error2);
 	return (0);
 }
 
 int
 proc_iterate(int (*cb)(struct proc *, void *), void *cbarg)
 {
 	struct proc *p;
 	int error, i, j;
 
 	for (i = 0; i < pidhashlock + 1; i++) {
 		sx_slock(&proctree_lock);
 		sx_slock(&pidhashtbl_lock[i]);
 		for (j = i; j <= pidhash; j += pidhashlock + 1) {
 			LIST_FOREACH(p, &pidhashtbl[j], p_hash) {
 				if (p->p_state == PRS_NEW)
 					continue;
 				error = cb(p, cbarg);
 				PROC_LOCK_ASSERT(p, MA_NOTOWNED);
 				if (error != 0) {
 					sx_sunlock(&pidhashtbl_lock[i]);
 					sx_sunlock(&proctree_lock);
 					return (error);
 				}
 			}
 		}
 		sx_sunlock(&pidhashtbl_lock[i]);
 		sx_sunlock(&proctree_lock);
 	}
 	return (0);
 }
 
 struct kern_proc_out_args {
 	struct sysctl_req *req;
 	int flags;
 	int oid_number;
 	int *name;
 };
 
 static int
 sysctl_kern_proc_iterate(struct proc *p, void *origarg)
 {
 	struct kern_proc_out_args *arg = origarg;
 	int *name = arg->name;
 	int oid_number = arg->oid_number;
 	int flags = arg->flags;
 	struct sysctl_req *req = arg->req;
 	int error = 0;
 
 	PROC_LOCK(p);
 
 	KASSERT(p->p_ucred != NULL,
 	    ("process credential is NULL for non-NEW proc"));
 	/*
 	 * Show a user only appropriate processes.
 	 */
 	if (p_cansee(curthread, p))
 		goto skip;
 	/*
 	 * TODO - make more efficient (see notes below).
 	 * do by session.
 	 */
 	switch (oid_number) {
 	case KERN_PROC_GID:
 		if (p->p_ucred->cr_gid != (gid_t)name[0])
 			goto skip;
 		break;
 
 	case KERN_PROC_PGRP:
 		/* could do this by traversing pgrp */
 		if (p->p_pgrp == NULL ||
 		    p->p_pgrp->pg_id != (pid_t)name[0])
 			goto skip;
 		break;
 
 	case KERN_PROC_RGID:
 		if (p->p_ucred->cr_rgid != (gid_t)name[0])
 			goto skip;
 		break;
 
 	case KERN_PROC_SESSION:
 		if (p->p_session == NULL ||
 		    p->p_session->s_sid != (pid_t)name[0])
 			goto skip;
 		break;
 
 	case KERN_PROC_TTY:
 		if ((p->p_flag & P_CONTROLT) == 0 ||
 		    p->p_session == NULL)
 			goto skip;
 		/* XXX proctree_lock */
 		SESS_LOCK(p->p_session);
 		if (p->p_session->s_ttyp == NULL ||
 		    tty_udev(p->p_session->s_ttyp) !=
 		    (dev_t)name[0]) {
 			SESS_UNLOCK(p->p_session);
 			goto skip;
 		}
 		SESS_UNLOCK(p->p_session);
 		break;
 
 	case KERN_PROC_UID:
 		if (p->p_ucred->cr_uid != (uid_t)name[0])
 			goto skip;
 		break;
 
 	case KERN_PROC_RUID:
 		if (p->p_ucred->cr_ruid != (uid_t)name[0])
 			goto skip;
 		break;
 
 	case KERN_PROC_PROC:
 		break;
 
 	default:
 		break;
 	}
 	error = sysctl_out_proc(p, req, flags);
 	PROC_LOCK_ASSERT(p, MA_NOTOWNED);
 	return (error);
 skip:
 	PROC_UNLOCK(p);
 	return (0);
 }
 
 static int
 sysctl_kern_proc(SYSCTL_HANDLER_ARGS)
 {
 	struct kern_proc_out_args iterarg;
 	int *name = (int *)arg1;
 	u_int namelen = arg2;
 	struct proc *p;
 	int flags, oid_number;
 	int error = 0;
 
 	oid_number = oidp->oid_number;
 	if (oid_number != KERN_PROC_ALL &&
 	    (oid_number & KERN_PROC_INC_THREAD) == 0)
 		flags = KERN_PROC_NOTHREADS;
 	else {
 		flags = 0;
 		oid_number &= ~KERN_PROC_INC_THREAD;
 	}
 #ifdef COMPAT_FREEBSD32
 	if (req->flags & SCTL_MASK32)
 		flags |= KERN_PROC_MASK32;
 #endif
 	if (oid_number == KERN_PROC_PID) {
 		if (namelen != 1)
 			return (EINVAL);
 		error = sysctl_wire_old_buffer(req, 0);
 		if (error)
 			return (error);
 		sx_slock(&proctree_lock);
 		error = pget((pid_t)name[0], PGET_CANSEE, &p);
 		if (error == 0)
 			error = sysctl_out_proc(p, req, flags);
 		sx_sunlock(&proctree_lock);
 		return (error);
 	}
 
 	switch (oid_number) {
 	case KERN_PROC_ALL:
 		if (namelen != 0)
 			return (EINVAL);
 		break;
 	case KERN_PROC_PROC:
 		if (namelen != 0 && namelen != 1)
 			return (EINVAL);
 		break;
 	default:
 		if (namelen != 1)
 			return (EINVAL);
 		break;
 	}
 
 	if (req->oldptr == NULL) {
 		/* overestimate by 5 procs */
 		error = SYSCTL_OUT(req, 0, sizeof (struct kinfo_proc) * 5);
 		if (error)
 			return (error);
 	} else {
 		error = sysctl_wire_old_buffer(req, 0);
 		if (error != 0)
 			return (error);
 	}
 	iterarg.flags = flags;
 	iterarg.oid_number = oid_number;
 	iterarg.req = req;
 	iterarg.name = name;
 	error = proc_iterate(sysctl_kern_proc_iterate, &iterarg);
 	return (error);
 }
 
 struct pargs *
 pargs_alloc(int len)
 {
 	struct pargs *pa;
 
 	pa = malloc(sizeof(struct pargs) + len, M_PARGS,
 		M_WAITOK);
 	refcount_init(&pa->ar_ref, 1);
 	pa->ar_length = len;
 	return (pa);
 }
 
 static void
 pargs_free(struct pargs *pa)
 {
 
 	free(pa, M_PARGS);
 }
 
 void
 pargs_hold(struct pargs *pa)
 {
 
 	if (pa == NULL)
 		return;
 	refcount_acquire(&pa->ar_ref);
 }
 
 void
 pargs_drop(struct pargs *pa)
 {
 
 	if (pa == NULL)
 		return;
 	if (refcount_release(&pa->ar_ref))
 		pargs_free(pa);
 }
 
 static int
 proc_read_string(struct thread *td, struct proc *p, const char *sptr, char *buf,
     size_t len)
 {
 	ssize_t n;
 
 	/*
 	 * This may return a short read if the string is shorter than the chunk
 	 * and is aligned at the end of the page, and the following page is not
 	 * mapped.
 	 */
 	n = proc_readmem(td, p, (vm_offset_t)sptr, buf, len);
 	if (n <= 0)
 		return (ENOMEM);
 	return (0);
 }
 
 #define PROC_AUXV_MAX	256	/* Safety limit on auxv size. */
 
 enum proc_vector_type {
 	PROC_ARG,
 	PROC_ENV,
 	PROC_AUX,
 };
 
 #ifdef COMPAT_FREEBSD32
 static int
 get_proc_vector32(struct thread *td, struct proc *p, char ***proc_vectorp,
     size_t *vsizep, enum proc_vector_type type)
 {
 	struct freebsd32_ps_strings pss;
 	Elf32_Auxinfo aux;
 	vm_offset_t vptr, ptr;
 	uint32_t *proc_vector32;
 	char **proc_vector;
 	size_t vsize, size;
 	int i, error;
 
 	error = 0;
 	if (proc_readmem(td, p, PROC_PS_STRINGS(p), &pss, sizeof(pss)) !=
 	    sizeof(pss))
 		return (ENOMEM);
 	switch (type) {
 	case PROC_ARG:
 		vptr = (vm_offset_t)PTRIN(pss.ps_argvstr);
 		vsize = pss.ps_nargvstr;
 		if (vsize > ARG_MAX)
 			return (ENOEXEC);
 		size = vsize * sizeof(int32_t);
 		break;
 	case PROC_ENV:
 		vptr = (vm_offset_t)PTRIN(pss.ps_envstr);
 		vsize = pss.ps_nenvstr;
 		if (vsize > ARG_MAX)
 			return (ENOEXEC);
 		size = vsize * sizeof(int32_t);
 		break;
 	case PROC_AUX:
 		vptr = (vm_offset_t)PTRIN(pss.ps_envstr) +
 		    (pss.ps_nenvstr + 1) * sizeof(int32_t);
 		if (vptr % 4 != 0)
 			return (ENOEXEC);
 		for (ptr = vptr, i = 0; i < PROC_AUXV_MAX; i++) {
 			if (proc_readmem(td, p, ptr, &aux, sizeof(aux)) !=
 			    sizeof(aux))
 				return (ENOMEM);
 			if (aux.a_type == AT_NULL)
 				break;
 			ptr += sizeof(aux);
 		}
 		if (aux.a_type != AT_NULL)
 			return (ENOEXEC);
 		vsize = i + 1;
 		size = vsize * sizeof(aux);
 		break;
 	default:
 		KASSERT(0, ("Wrong proc vector type: %d", type));
 		return (EINVAL);
 	}
 	proc_vector32 = malloc(size, M_TEMP, M_WAITOK);
 	if (proc_readmem(td, p, vptr, proc_vector32, size) != size) {
 		error = ENOMEM;
 		goto done;
 	}
 	if (type == PROC_AUX) {
 		*proc_vectorp = (char **)proc_vector32;
 		*vsizep = vsize;
 		return (0);
 	}
 	proc_vector = malloc(vsize * sizeof(char *), M_TEMP, M_WAITOK);
 	for (i = 0; i < (int)vsize; i++)
 		proc_vector[i] = PTRIN(proc_vector32[i]);
 	*proc_vectorp = proc_vector;
 	*vsizep = vsize;
 done:
 	free(proc_vector32, M_TEMP);
 	return (error);
 }
 #endif
 
 static int
 get_proc_vector(struct thread *td, struct proc *p, char ***proc_vectorp,
     size_t *vsizep, enum proc_vector_type type)
 {
 	struct ps_strings pss;
 	Elf_Auxinfo aux;
 	vm_offset_t vptr, ptr;
 	char **proc_vector;
 	size_t vsize, size;
 	int i;
 
 #ifdef COMPAT_FREEBSD32
 	if (SV_PROC_FLAG(p, SV_ILP32) != 0)
 		return (get_proc_vector32(td, p, proc_vectorp, vsizep, type));
 #endif
 	if (proc_readmem(td, p, PROC_PS_STRINGS(p), &pss, sizeof(pss)) !=
 	    sizeof(pss))
 		return (ENOMEM);
 	switch (type) {
 	case PROC_ARG:
 		vptr = (vm_offset_t)pss.ps_argvstr;
 		vsize = pss.ps_nargvstr;
 		if (vsize > ARG_MAX)
 			return (ENOEXEC);
 		size = vsize * sizeof(char *);
 		break;
 	case PROC_ENV:
 		vptr = (vm_offset_t)pss.ps_envstr;
 		vsize = pss.ps_nenvstr;
 		if (vsize > ARG_MAX)
 			return (ENOEXEC);
 		size = vsize * sizeof(char *);
 		break;
 	case PROC_AUX:
 		/*
 		 * The aux array is just above env array on the stack. Check
 		 * that the address is naturally aligned.
 		 */
 		vptr = (vm_offset_t)pss.ps_envstr + (pss.ps_nenvstr + 1)
 		    * sizeof(char *);
 #if __ELF_WORD_SIZE == 64
 		if (vptr % sizeof(uint64_t) != 0)
 #else
 		if (vptr % sizeof(uint32_t) != 0)
 #endif
 			return (ENOEXEC);
 		/*
 		 * We count the array size reading the aux vectors from the
 		 * stack until AT_NULL vector is returned.  So (to keep the code
 		 * simple) we read the process stack twice: the first time here
 		 * to find the size and the second time when copying the vectors
 		 * to the allocated proc_vector.
 		 */
 		for (ptr = vptr, i = 0; i < PROC_AUXV_MAX; i++) {
 			if (proc_readmem(td, p, ptr, &aux, sizeof(aux)) !=
 			    sizeof(aux))
 				return (ENOMEM);
 			if (aux.a_type == AT_NULL)
 				break;
 			ptr += sizeof(aux);
 		}
 		/*
 		 * If the PROC_AUXV_MAX entries are iterated over, and we have
 		 * not reached AT_NULL, it is most likely we are reading wrong
 		 * data: either the process doesn't have auxv array or data has
 		 * been modified. Return the error in this case.
 		 */
 		if (aux.a_type != AT_NULL)
 			return (ENOEXEC);
 		vsize = i + 1;
 		size = vsize * sizeof(aux);
 		break;
 	default:
 		KASSERT(0, ("Wrong proc vector type: %d", type));
 		return (EINVAL); /* In case we are built without INVARIANTS. */
 	}
 	proc_vector = malloc(size, M_TEMP, M_WAITOK);
 	if (proc_readmem(td, p, vptr, proc_vector, size) != size) {
 		free(proc_vector, M_TEMP);
 		return (ENOMEM);
 	}
 	*proc_vectorp = proc_vector;
 	*vsizep = vsize;
 
 	return (0);
 }
 
 #define GET_PS_STRINGS_CHUNK_SZ	256	/* Chunk size (bytes) for ps_strings operations. */
 
 static int
 get_ps_strings(struct thread *td, struct proc *p, struct sbuf *sb,
     enum proc_vector_type type)
 {
 	size_t done, len, nchr, vsize;
 	int error, i;
 	char **proc_vector, *sptr;
 	char pss_string[GET_PS_STRINGS_CHUNK_SZ];
 
 	PROC_ASSERT_HELD(p);
 
 	/*
 	 * We are not going to read more than 2 * (PATH_MAX + ARG_MAX) bytes.
 	 */
 	nchr = 2 * (PATH_MAX + ARG_MAX);
 
 	error = get_proc_vector(td, p, &proc_vector, &vsize, type);
 	if (error != 0)
 		return (error);
 	for (done = 0, i = 0; i < (int)vsize && done < nchr; i++) {
 		/*
 		 * The program may have scribbled into its argv array, e.g. to
 		 * remove some arguments.  If that has happened, break out
 		 * before trying to read from NULL.
 		 */
 		if (proc_vector[i] == NULL)
 			break;
 		for (sptr = proc_vector[i]; ; sptr += GET_PS_STRINGS_CHUNK_SZ) {
 			error = proc_read_string(td, p, sptr, pss_string,
 			    sizeof(pss_string));
 			if (error != 0)
 				goto done;
 			len = strnlen(pss_string, GET_PS_STRINGS_CHUNK_SZ);
 			if (done + len >= nchr)
 				len = nchr - done - 1;
 			sbuf_bcat(sb, pss_string, len);
 			if (len != GET_PS_STRINGS_CHUNK_SZ)
 				break;
 			done += GET_PS_STRINGS_CHUNK_SZ;
 		}
 		sbuf_bcat(sb, "", 1);
 		done += len + 1;
 	}
 done:
 	free(proc_vector, M_TEMP);
 	return (error);
 }
 
 int
 proc_getargv(struct thread *td, struct proc *p, struct sbuf *sb)
 {
 
 	return (get_ps_strings(curthread, p, sb, PROC_ARG));
 }
 
 int
 proc_getenvv(struct thread *td, struct proc *p, struct sbuf *sb)
 {
 
 	return (get_ps_strings(curthread, p, sb, PROC_ENV));
 }
 
 int
 proc_getauxv(struct thread *td, struct proc *p, struct sbuf *sb)
 {
 	size_t vsize, size;
 	char **auxv;
 	int error;
 
 	error = get_proc_vector(td, p, &auxv, &vsize, PROC_AUX);
 	if (error == 0) {
 #ifdef COMPAT_FREEBSD32
 		if (SV_PROC_FLAG(p, SV_ILP32) != 0)
 			size = vsize * sizeof(Elf32_Auxinfo);
 		else
 #endif
 			size = vsize * sizeof(Elf_Auxinfo);
 		if (sbuf_bcat(sb, auxv, size) != 0)
 			error = ENOMEM;
 		free(auxv, M_TEMP);
 	}
 	return (error);
 }
 
 /*
  * This sysctl allows a process to retrieve the argument list or process
  * title for another process without groping around in the address space
  * of the other process.  It also allow a process to set its own "process 
  * title to a string of its own choice.
  */
 static int
 sysctl_kern_proc_args(SYSCTL_HANDLER_ARGS)
 {
 	int *name = (int *)arg1;
 	u_int namelen = arg2;
 	struct pargs *newpa, *pa;
 	struct proc *p;
 	struct sbuf sb;
 	int flags, error = 0, error2;
 	pid_t pid;
 
 	if (namelen != 1)
 		return (EINVAL);
 
 	p = curproc;
 	pid = (pid_t)name[0];
 	if (pid == -1) {
 		pid = p->p_pid;
 	}
 
 	/*
 	 * If the query is for this process and it is single-threaded, there
 	 * is nobody to modify pargs, thus we can just read.
 	 */
 	if (pid == p->p_pid && p->p_numthreads == 1 && req->newptr == NULL &&
 	    (pa = p->p_args) != NULL)
 		return (SYSCTL_OUT(req, pa->ar_args, pa->ar_length));
 
 	flags = PGET_CANSEE;
 	if (req->newptr != NULL)
 		flags |= PGET_ISCURRENT;
 	error = pget(pid, flags, &p);
 	if (error)
 		return (error);
 
 	pa = p->p_args;
 	if (pa != NULL) {
 		pargs_hold(pa);
 		PROC_UNLOCK(p);
 		error = SYSCTL_OUT(req, pa->ar_args, pa->ar_length);
 		pargs_drop(pa);
 	} else if ((p->p_flag & (P_WEXIT | P_SYSTEM)) == 0) {
 		_PHOLD(p);
 		PROC_UNLOCK(p);
 		sbuf_new_for_sysctl(&sb, NULL, GET_PS_STRINGS_CHUNK_SZ, req);
 		sbuf_clear_flags(&sb, SBUF_INCLUDENUL);
 		error = proc_getargv(curthread, p, &sb);
 		error2 = sbuf_finish(&sb);
 		PRELE(p);
 		sbuf_delete(&sb);
 		if (error == 0 && error2 != 0)
 			error = error2;
 	} else {
 		PROC_UNLOCK(p);
 	}
 	if (error != 0 || req->newptr == NULL)
 		return (error);
 
 	if (req->newlen > ps_arg_cache_limit - sizeof(struct pargs))
 		return (ENOMEM);
 
 	if (req->newlen == 0) {
 		/*
 		 * Clear the argument pointer, so that we'll fetch arguments
 		 * with proc_getargv() until further notice.
 		 */
 		newpa = NULL;
 	} else {
 		newpa = pargs_alloc(req->newlen);
 		error = SYSCTL_IN(req, newpa->ar_args, req->newlen);
 		if (error != 0) {
 			pargs_free(newpa);
 			return (error);
 		}
 	}
 	PROC_LOCK(p);
 	pa = p->p_args;
 	p->p_args = newpa;
 	PROC_UNLOCK(p);
 	pargs_drop(pa);
 	return (0);
 }
 
 /*
  * This sysctl allows a process to retrieve environment of another process.
  */
 static int
 sysctl_kern_proc_env(SYSCTL_HANDLER_ARGS)
 {
 	int *name = (int *)arg1;
 	u_int namelen = arg2;
 	struct proc *p;
 	struct sbuf sb;
 	int error, error2;
 
 	if (namelen != 1)
 		return (EINVAL);
 
 	error = pget((pid_t)name[0], PGET_WANTREAD, &p);
 	if (error != 0)
 		return (error);
 	if ((p->p_flag & P_SYSTEM) != 0) {
 		PRELE(p);
 		return (0);
 	}
 
 	sbuf_new_for_sysctl(&sb, NULL, GET_PS_STRINGS_CHUNK_SZ, req);
 	sbuf_clear_flags(&sb, SBUF_INCLUDENUL);
 	error = proc_getenvv(curthread, p, &sb);
 	error2 = sbuf_finish(&sb);
 	PRELE(p);
 	sbuf_delete(&sb);
 	return (error != 0 ? error : error2);
 }
 
 /*
  * This sysctl allows a process to retrieve ELF auxiliary vector of
  * another process.
  */
 static int
 sysctl_kern_proc_auxv(SYSCTL_HANDLER_ARGS)
 {
 	int *name = (int *)arg1;
 	u_int namelen = arg2;
 	struct proc *p;
 	struct sbuf sb;
 	int error, error2;
 
 	if (namelen != 1)
 		return (EINVAL);
 
 	error = pget((pid_t)name[0], PGET_WANTREAD, &p);
 	if (error != 0)
 		return (error);
 	if ((p->p_flag & P_SYSTEM) != 0) {
 		PRELE(p);
 		return (0);
 	}
 	sbuf_new_for_sysctl(&sb, NULL, GET_PS_STRINGS_CHUNK_SZ, req);
 	sbuf_clear_flags(&sb, SBUF_INCLUDENUL);
 	error = proc_getauxv(curthread, p, &sb);
 	error2 = sbuf_finish(&sb);
 	PRELE(p);
 	sbuf_delete(&sb);
 	return (error != 0 ? error : error2);
 }
 
 /*
  * Look up the canonical executable path running in the specified process.
  * It tries to return the same hardlink name as was used for execve(2).
  * This allows the programs that modify their behavior based on their progname,
  * to operate correctly.
  *
  * Result is returned in retbuf, it must not be freed, similar to vn_fullpath()
  *   calling conventions.
  * binname is a pointer to temporary string buffer of length MAXPATHLEN,
  *   allocated and freed by caller.
  * freebuf should be freed by caller, from the M_TEMP malloc type.
  */
 int
 proc_get_binpath(struct proc *p, char *binname, char **retbuf,
     char **freebuf)
 {
 	struct nameidata nd;
 	struct vnode *vp, *dvp;
 	size_t freepath_size;
 	int error;
 	bool do_fullpath;
 
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 
 	vp = p->p_textvp;
 	if (vp == NULL) {
 		PROC_UNLOCK(p);
 		*retbuf = "";
 		*freebuf = NULL;
 		return (0);
 	}
 	vref(vp);
 	dvp = p->p_textdvp;
 	if (dvp != NULL)
 		vref(dvp);
 	if (p->p_binname != NULL)
 		strlcpy(binname, p->p_binname, MAXPATHLEN);
 	PROC_UNLOCK(p);
 
 	do_fullpath = true;
 	*freebuf = NULL;
 	if (dvp != NULL && binname[0] != '\0') {
 		freepath_size = MAXPATHLEN;
 		if (vn_fullpath_hardlink(vp, dvp, binname, strlen(binname),
 		    retbuf, freebuf, &freepath_size) == 0) {
 			/*
 			 * Recheck the looked up path.  The binary
 			 * might have been renamed or replaced, in
 			 * which case we should not report old name.
 			 */
 			NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, *retbuf);
 			error = namei(&nd);
 			if (error == 0) {
 				if (nd.ni_vp == vp)
 					do_fullpath = false;
 				vrele(nd.ni_vp);
 				NDFREE_PNBUF(&nd);
 			}
 		}
 	}
 	if (do_fullpath) {
 		free(*freebuf, M_TEMP);
 		*freebuf = NULL;
 		error = vn_fullpath(vp, retbuf, freebuf);
 	}
 	vrele(vp);
 	if (dvp != NULL)
 		vrele(dvp);
 	return (error);
 }
 
 /*
  * This sysctl allows a process to retrieve the path of the executable for
  * itself or another process.
  */
 static int
 sysctl_kern_proc_pathname(SYSCTL_HANDLER_ARGS)
 {
 	pid_t *pidp = (pid_t *)arg1;
 	unsigned int arglen = arg2;
 	struct proc *p;
 	char *retbuf, *freebuf, *binname;
 	int error;
 
 	if (arglen != 1)
 		return (EINVAL);
 	binname = malloc(MAXPATHLEN, M_TEMP, M_WAITOK);
 	binname[0] = '\0';
 	if (*pidp == -1) {	/* -1 means this process */
 		error = 0;
 		p = req->td->td_proc;
 		PROC_LOCK(p);
 	} else {
 		error = pget(*pidp, PGET_CANSEE, &p);
 	}
 
 	if (error == 0)
 		error = proc_get_binpath(p, binname, &retbuf, &freebuf);
 	free(binname, M_TEMP);
 	if (error != 0)
 		return (error);
 	error = SYSCTL_OUT(req, retbuf, strlen(retbuf) + 1);
 	free(freebuf, M_TEMP);
 	return (error);
 }
 
 static int
 sysctl_kern_proc_sv_name(SYSCTL_HANDLER_ARGS)
 {
 	struct proc *p;
 	char *sv_name;
 	int *name;
 	int namelen;
 	int error;
 
 	namelen = arg2;
 	if (namelen != 1)
 		return (EINVAL);
 
 	name = (int *)arg1;
 	error = pget((pid_t)name[0], PGET_CANSEE, &p);
 	if (error != 0)
 		return (error);
 	sv_name = p->p_sysent->sv_name;
 	PROC_UNLOCK(p);
 	return (sysctl_handle_string(oidp, sv_name, 0, req));
 }
 
 #ifdef KINFO_OVMENTRY_SIZE
 CTASSERT(sizeof(struct kinfo_ovmentry) == KINFO_OVMENTRY_SIZE);
 #endif
 
 #ifdef COMPAT_FREEBSD7
 static int
 sysctl_kern_proc_ovmmap(SYSCTL_HANDLER_ARGS)
 {
 	vm_map_entry_t entry, tmp_entry;
 	unsigned int last_timestamp, namelen;
 	char *fullpath, *freepath;
 	struct kinfo_ovmentry *kve;
 	struct vattr va;
 	struct ucred *cred;
 	int error, *name;
 	struct vnode *vp;
 	struct proc *p;
 	vm_map_t map;
 	struct vmspace *vm;
 
 	namelen = arg2;
 	if (namelen != 1)
 		return (EINVAL);
 
 	name = (int *)arg1;
 	error = pget((pid_t)name[0], PGET_WANTREAD, &p);
 	if (error != 0)
 		return (error);
 	vm = vmspace_acquire_ref(p);
 	if (vm == NULL) {
 		PRELE(p);
 		return (ESRCH);
 	}
 	kve = malloc(sizeof(*kve), M_TEMP, M_WAITOK);
 
 	map = &vm->vm_map;
 	vm_map_lock_read(map);
 	VM_MAP_ENTRY_FOREACH(entry, map) {
 		vm_object_t obj, tobj, lobj;
 		vm_offset_t addr;
 
 		if (entry->eflags & MAP_ENTRY_IS_SUB_MAP)
 			continue;
 
 		bzero(kve, sizeof(*kve));
 		kve->kve_structsize = sizeof(*kve);
 
 		kve->kve_private_resident = 0;
 		obj = entry->object.vm_object;
 		if (obj != NULL) {
 			VM_OBJECT_RLOCK(obj);
 			if (obj->shadow_count == 1)
 				kve->kve_private_resident =
 				    obj->resident_page_count;
 		}
 		kve->kve_resident = 0;
 		addr = entry->start;
 		while (addr < entry->end) {
 			if (pmap_extract(map->pmap, addr))
 				kve->kve_resident++;
 			addr += PAGE_SIZE;
 		}
 
 		for (lobj = tobj = obj; tobj; tobj = tobj->backing_object) {
 			if (tobj != obj) {
 				VM_OBJECT_RLOCK(tobj);
 				kve->kve_offset += tobj->backing_object_offset;
 			}
 			if (lobj != obj)
 				VM_OBJECT_RUNLOCK(lobj);
 			lobj = tobj;
 		}
 
 		kve->kve_start = (void*)entry->start;
 		kve->kve_end = (void*)entry->end;
 		kve->kve_offset += (off_t)entry->offset;
 
 		if (entry->protection & VM_PROT_READ)
 			kve->kve_protection |= KVME_PROT_READ;
 		if (entry->protection & VM_PROT_WRITE)
 			kve->kve_protection |= KVME_PROT_WRITE;
 		if (entry->protection & VM_PROT_EXECUTE)
 			kve->kve_protection |= KVME_PROT_EXEC;
 
 		if (entry->eflags & MAP_ENTRY_COW)
 			kve->kve_flags |= KVME_FLAG_COW;
 		if (entry->eflags & MAP_ENTRY_NEEDS_COPY)
 			kve->kve_flags |= KVME_FLAG_NEEDS_COPY;
 		if (entry->eflags & MAP_ENTRY_NOCOREDUMP)
 			kve->kve_flags |= KVME_FLAG_NOCOREDUMP;
 
 		last_timestamp = map->timestamp;
 		vm_map_unlock_read(map);
 
 		kve->kve_fileid = 0;
 		kve->kve_fsid = 0;
 		freepath = NULL;
 		fullpath = "";
 		if (lobj) {
 			kve->kve_type = vm_object_kvme_type(lobj, &vp);
 			if (kve->kve_type == KVME_TYPE_MGTDEVICE)
 				kve->kve_type = KVME_TYPE_UNKNOWN;
 			if (vp != NULL)
 				vref(vp);
 			if (lobj != obj)
 				VM_OBJECT_RUNLOCK(lobj);
 
 			kve->kve_ref_count = obj->ref_count;
 			kve->kve_shadow_count = obj->shadow_count;
 			VM_OBJECT_RUNLOCK(obj);
 			if (vp != NULL) {
 				vn_fullpath(vp, &fullpath, &freepath);
 				cred = curthread->td_ucred;
 				vn_lock(vp, LK_SHARED | LK_RETRY);
 				if (VOP_GETATTR(vp, &va, cred) == 0) {
 					kve->kve_fileid = va.va_fileid;
 					/* truncate */
 					kve->kve_fsid = va.va_fsid;
 				}
 				vput(vp);
 			}
 		} else {
 			kve->kve_type = KVME_TYPE_NONE;
 			kve->kve_ref_count = 0;
 			kve->kve_shadow_count = 0;
 		}
 
 		strlcpy(kve->kve_path, fullpath, sizeof(kve->kve_path));
 		if (freepath != NULL)
 			free(freepath, M_TEMP);
 
 		error = SYSCTL_OUT(req, kve, sizeof(*kve));
 		vm_map_lock_read(map);
 		if (error)
 			break;
 		if (last_timestamp != map->timestamp) {
 			vm_map_lookup_entry(map, addr - 1, &tmp_entry);
 			entry = tmp_entry;
 		}
 	}
 	vm_map_unlock_read(map);
 	vmspace_free(vm);
 	PRELE(p);
 	free(kve, M_TEMP);
 	return (error);
 }
 #endif	/* COMPAT_FREEBSD7 */
 
 #ifdef KINFO_VMENTRY_SIZE
 CTASSERT(sizeof(struct kinfo_vmentry) == KINFO_VMENTRY_SIZE);
 #endif
 
 void
 kern_proc_vmmap_resident(vm_map_t map, vm_map_entry_t entry,
     int *resident_count, bool *super)
 {
 	vm_object_t obj, tobj;
 	vm_page_t m, m_adv;
 	vm_offset_t addr;
 	vm_paddr_t pa;
 	vm_pindex_t pi, pi_adv, pindex;
 
 	*super = false;
 	*resident_count = 0;
 	if (vmmap_skip_res_cnt)
 		return;
 
 	pa = 0;
 	obj = entry->object.vm_object;
 	addr = entry->start;
 	m_adv = NULL;
 	pi = OFF_TO_IDX(entry->offset);
 	for (; addr < entry->end; addr += IDX_TO_OFF(pi_adv), pi += pi_adv) {
 		if (m_adv != NULL) {
 			m = m_adv;
 		} else {
 			pi_adv = atop(entry->end - addr);
 			pindex = pi;
 			for (tobj = obj;; tobj = tobj->backing_object) {
 				m = vm_page_find_least(tobj, pindex);
 				if (m != NULL) {
 					if (m->pindex == pindex)
 						break;
 					if (pi_adv > m->pindex - pindex) {
 						pi_adv = m->pindex - pindex;
 						m_adv = m;
 					}
 				}
 				if (tobj->backing_object == NULL)
 					goto next;
 				pindex += OFF_TO_IDX(tobj->
 				    backing_object_offset);
 			}
 		}
 		m_adv = NULL;
 		if (m->psind != 0 && addr + pagesizes[1] <= entry->end &&
 		    (addr & (pagesizes[1] - 1)) == 0 &&
 		    (pmap_mincore(map->pmap, addr, &pa) & MINCORE_SUPER) != 0) {
 			*super = true;
 			pi_adv = atop(pagesizes[1]);
 		} else {
 			/*
 			 * We do not test the found page on validity.
 			 * Either the page is busy and being paged in,
 			 * or it was invalidated.  The first case
 			 * should be counted as resident, the second
 			 * is not so clear; we do account both.
 			 */
 			pi_adv = 1;
 		}
 		*resident_count += pi_adv;
 next:;
 	}
 }
 
 /*
  * Must be called with the process locked and will return unlocked.
  */
 int
 kern_proc_vmmap_out(struct proc *p, struct sbuf *sb, ssize_t maxlen, int flags)
 {
 	vm_map_entry_t entry, tmp_entry;
 	struct vattr va;
 	vm_map_t map;
 	vm_object_t lobj, nobj, obj, tobj;
 	char *fullpath, *freepath;
 	struct kinfo_vmentry *kve;
 	struct ucred *cred;
 	struct vnode *vp;
 	struct vmspace *vm;
 	vm_offset_t addr;
 	unsigned int last_timestamp;
 	int error;
 	bool guard, super;
 
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 
 	_PHOLD(p);
 	PROC_UNLOCK(p);
 	vm = vmspace_acquire_ref(p);
 	if (vm == NULL) {
 		PRELE(p);
 		return (ESRCH);
 	}
 	kve = malloc(sizeof(*kve), M_TEMP, M_WAITOK | M_ZERO);
 
 	error = 0;
 	map = &vm->vm_map;
 	vm_map_lock_read(map);
 	VM_MAP_ENTRY_FOREACH(entry, map) {
 		if (entry->eflags & MAP_ENTRY_IS_SUB_MAP)
 			continue;
 
 		addr = entry->end;
 		bzero(kve, sizeof(*kve));
 		obj = entry->object.vm_object;
 		if (obj != NULL) {
 			if ((obj->flags & OBJ_ANON) != 0)
 				kve->kve_obj = (uintptr_t)obj;
 
 			for (tobj = obj; tobj != NULL;
 			    tobj = tobj->backing_object) {
 				VM_OBJECT_RLOCK(tobj);
 				kve->kve_offset += tobj->backing_object_offset;
 				lobj = tobj;
 			}
 			if (obj->backing_object == NULL)
 				kve->kve_private_resident =
 				    obj->resident_page_count;
 			kern_proc_vmmap_resident(map, entry,
 			    &kve->kve_resident, &super);
 			if (super)
 				kve->kve_flags |= KVME_FLAG_SUPER;
 			for (tobj = obj; tobj != NULL; tobj = nobj) {
 				nobj = tobj->backing_object;
 				if (tobj != obj && tobj != lobj)
 					VM_OBJECT_RUNLOCK(tobj);
 			}
 		} else {
 			lobj = NULL;
 		}
 
 		kve->kve_start = entry->start;
 		kve->kve_end = entry->end;
 		kve->kve_offset += entry->offset;
 
 		if (entry->protection & VM_PROT_READ)
 			kve->kve_protection |= KVME_PROT_READ;
 		if (entry->protection & VM_PROT_WRITE)
 			kve->kve_protection |= KVME_PROT_WRITE;
 		if (entry->protection & VM_PROT_EXECUTE)
 			kve->kve_protection |= KVME_PROT_EXEC;
 
 		if (entry->eflags & MAP_ENTRY_COW)
 			kve->kve_flags |= KVME_FLAG_COW;
 		if (entry->eflags & MAP_ENTRY_NEEDS_COPY)
 			kve->kve_flags |= KVME_FLAG_NEEDS_COPY;
 		if (entry->eflags & MAP_ENTRY_NOCOREDUMP)
 			kve->kve_flags |= KVME_FLAG_NOCOREDUMP;
 		if (entry->eflags & MAP_ENTRY_GROWS_UP)
 			kve->kve_flags |= KVME_FLAG_GROWS_UP;
 		if (entry->eflags & MAP_ENTRY_GROWS_DOWN)
 			kve->kve_flags |= KVME_FLAG_GROWS_DOWN;
 		if (entry->eflags & MAP_ENTRY_USER_WIRED)
 			kve->kve_flags |= KVME_FLAG_USER_WIRED;
 
 		guard = (entry->eflags & MAP_ENTRY_GUARD) != 0;
 
 		last_timestamp = map->timestamp;
 		vm_map_unlock_read(map);
 
 		freepath = NULL;
 		fullpath = "";
 		if (lobj != NULL) {
 			kve->kve_type = vm_object_kvme_type(lobj, &vp);
 			if (vp != NULL)
 				vref(vp);
 			if (lobj != obj)
 				VM_OBJECT_RUNLOCK(lobj);
 
 			kve->kve_ref_count = obj->ref_count;
 			kve->kve_shadow_count = obj->shadow_count;
 			VM_OBJECT_RUNLOCK(obj);
 			if (vp != NULL) {
 				vn_fullpath(vp, &fullpath, &freepath);
 				kve->kve_vn_type = vntype_to_kinfo(vp->v_type);
 				cred = curthread->td_ucred;
 				vn_lock(vp, LK_SHARED | LK_RETRY);
 				if (VOP_GETATTR(vp, &va, cred) == 0) {
 					kve->kve_vn_fileid = va.va_fileid;
 					kve->kve_vn_fsid = va.va_fsid;
 					kve->kve_vn_fsid_freebsd11 =
 					    kve->kve_vn_fsid; /* truncate */
 					kve->kve_vn_mode =
 					    MAKEIMODE(va.va_type, va.va_mode);
 					kve->kve_vn_size = va.va_size;
 					kve->kve_vn_rdev = va.va_rdev;
 					kve->kve_vn_rdev_freebsd11 =
 					    kve->kve_vn_rdev; /* truncate */
 					kve->kve_status = KF_ATTR_VALID;
 				}
 				vput(vp);
 			}
 		} else {
 			kve->kve_type = guard ? KVME_TYPE_GUARD :
 			    KVME_TYPE_NONE;
 			kve->kve_ref_count = 0;
 			kve->kve_shadow_count = 0;
 		}
 
 		strlcpy(kve->kve_path, fullpath, sizeof(kve->kve_path));
 		if (freepath != NULL)
 			free(freepath, M_TEMP);
 
 		/* Pack record size down */
 		if ((flags & KERN_VMMAP_PACK_KINFO) != 0)
 			kve->kve_structsize =
 			    offsetof(struct kinfo_vmentry, kve_path) +
 			    strlen(kve->kve_path) + 1;
 		else
 			kve->kve_structsize = sizeof(*kve);
 		kve->kve_structsize = roundup(kve->kve_structsize,
 		    sizeof(uint64_t));
 
 		/* Halt filling and truncate rather than exceeding maxlen */
 		if (maxlen != -1 && maxlen < kve->kve_structsize) {
 			error = 0;
 			vm_map_lock_read(map);
 			break;
 		} else if (maxlen != -1)
 			maxlen -= kve->kve_structsize;
 
 		if (sbuf_bcat(sb, kve, kve->kve_structsize) != 0)
 			error = ENOMEM;
 		vm_map_lock_read(map);
 		if (error != 0)
 			break;
 		if (last_timestamp != map->timestamp) {
 			vm_map_lookup_entry(map, addr - 1, &tmp_entry);
 			entry = tmp_entry;
 		}
 	}
 	vm_map_unlock_read(map);
 	vmspace_free(vm);
 	PRELE(p);
 	free(kve, M_TEMP);
 	return (error);
 }
 
 static int
 sysctl_kern_proc_vmmap(SYSCTL_HANDLER_ARGS)
 {
 	struct proc *p;
 	struct sbuf sb;
 	u_int namelen;
 	int error, error2, *name;
 
 	namelen = arg2;
 	if (namelen != 1)
 		return (EINVAL);
 
 	name = (int *)arg1;
 	sbuf_new_for_sysctl(&sb, NULL, sizeof(struct kinfo_vmentry), req);
 	sbuf_clear_flags(&sb, SBUF_INCLUDENUL);
 	error = pget((pid_t)name[0], PGET_CANDEBUG | PGET_NOTWEXIT, &p);
 	if (error != 0) {
 		sbuf_delete(&sb);
 		return (error);
 	}
 	error = kern_proc_vmmap_out(p, &sb, -1, KERN_VMMAP_PACK_KINFO);
 	error2 = sbuf_finish(&sb);
 	sbuf_delete(&sb);
 	return (error != 0 ? error : error2);
 }
 
 #if defined(STACK) || defined(DDB)
 static int
 sysctl_kern_proc_kstack(SYSCTL_HANDLER_ARGS)
 {
 	struct kinfo_kstack *kkstp;
 	int error, i, *name, numthreads;
 	lwpid_t *lwpidarray;
 	struct thread *td;
 	struct stack *st;
 	struct sbuf sb;
 	struct proc *p;
 	u_int namelen;
 
 	namelen = arg2;
 	if (namelen != 1)
 		return (EINVAL);
 
 	name = (int *)arg1;
 	error = pget((pid_t)name[0], PGET_NOTINEXEC | PGET_WANTREAD, &p);
 	if (error != 0)
 		return (error);
 
 	kkstp = malloc(sizeof(*kkstp), M_TEMP, M_WAITOK);
 	st = stack_create(M_WAITOK);
 
 	lwpidarray = NULL;
 	PROC_LOCK(p);
 	do {
 		if (lwpidarray != NULL) {
 			free(lwpidarray, M_TEMP);
 			lwpidarray = NULL;
 		}
 		numthreads = p->p_numthreads;
 		PROC_UNLOCK(p);
 		lwpidarray = malloc(sizeof(*lwpidarray) * numthreads, M_TEMP,
 		    M_WAITOK | M_ZERO);
 		PROC_LOCK(p);
 	} while (numthreads < p->p_numthreads);
 
 	/*
 	 * XXXRW: During the below loop, execve(2) and countless other sorts
 	 * of changes could have taken place.  Should we check to see if the
 	 * vmspace has been replaced, or the like, in order to prevent
 	 * giving a snapshot that spans, say, execve(2), with some threads
 	 * before and some after?  Among other things, the credentials could
 	 * have changed, in which case the right to extract debug info might
 	 * no longer be assured.
 	 */
 	i = 0;
 	FOREACH_THREAD_IN_PROC(p, td) {
 		KASSERT(i < numthreads,
 		    ("sysctl_kern_proc_kstack: numthreads"));
 		lwpidarray[i] = td->td_tid;
 		i++;
 	}
 	PROC_UNLOCK(p);
 	numthreads = i;
 	for (i = 0; i < numthreads; i++) {
 		td = tdfind(lwpidarray[i], p->p_pid);
 		if (td == NULL) {
 			continue;
 		}
 		bzero(kkstp, sizeof(*kkstp));
 		(void)sbuf_new(&sb, kkstp->kkst_trace,
 		    sizeof(kkstp->kkst_trace), SBUF_FIXEDLEN);
 		thread_lock(td);
 		kkstp->kkst_tid = td->td_tid;
 		if (TD_IS_SWAPPED(td))
 			kkstp->kkst_state = KKST_STATE_SWAPPED;
 		else if (stack_save_td(st, td) == 0)
 			kkstp->kkst_state = KKST_STATE_STACKOK;
 		else
 			kkstp->kkst_state = KKST_STATE_RUNNING;
 		thread_unlock(td);
 		PROC_UNLOCK(p);
 		stack_sbuf_print(&sb, st);
 		sbuf_finish(&sb);
 		sbuf_delete(&sb);
 		error = SYSCTL_OUT(req, kkstp, sizeof(*kkstp));
 		if (error)
 			break;
 	}
 	PRELE(p);
 	if (lwpidarray != NULL)
 		free(lwpidarray, M_TEMP);
 	stack_destroy(st);
 	free(kkstp, M_TEMP);
 	return (error);
 }
 #endif
 
 /*
  * This sysctl allows a process to retrieve the full list of groups from
  * itself or another process.
  */
 static int
 sysctl_kern_proc_groups(SYSCTL_HANDLER_ARGS)
 {
 	pid_t *pidp = (pid_t *)arg1;
 	unsigned int arglen = arg2;
 	struct proc *p;
 	struct ucred *cred;
 	int error;
 
 	if (arglen != 1)
 		return (EINVAL);
 	if (*pidp == -1) {	/* -1 means this process */
 		p = req->td->td_proc;
 		PROC_LOCK(p);
 	} else {
 		error = pget(*pidp, PGET_CANSEE, &p);
 		if (error != 0)
 			return (error);
 	}
 
 	cred = crhold(p->p_ucred);
 	PROC_UNLOCK(p);
 
 	error = SYSCTL_OUT(req, cred->cr_groups,
 	    cred->cr_ngroups * sizeof(gid_t));
 	crfree(cred);
 	return (error);
 }
 
 /*
  * This sysctl allows a process to retrieve or/and set the resource limit for
  * another process.
  */
 static int
 sysctl_kern_proc_rlimit(SYSCTL_HANDLER_ARGS)
 {
 	int *name = (int *)arg1;
 	u_int namelen = arg2;
 	struct rlimit rlim;
 	struct proc *p;
 	u_int which;
 	int flags, error;
 
 	if (namelen != 2)
 		return (EINVAL);
 
 	which = (u_int)name[1];
 	if (which >= RLIM_NLIMITS)
 		return (EINVAL);
 
 	if (req->newptr != NULL && req->newlen != sizeof(rlim))
 		return (EINVAL);
 
 	flags = PGET_HOLD | PGET_NOTWEXIT;
 	if (req->newptr != NULL)
 		flags |= PGET_CANDEBUG;
 	else
 		flags |= PGET_CANSEE;
 	error = pget((pid_t)name[0], flags, &p);
 	if (error != 0)
 		return (error);
 
 	/*
 	 * Retrieve limit.
 	 */
 	if (req->oldptr != NULL) {
 		PROC_LOCK(p);
 		lim_rlimit_proc(p, which, &rlim);
 		PROC_UNLOCK(p);
 	}
 	error = SYSCTL_OUT(req, &rlim, sizeof(rlim));
 	if (error != 0)
 		goto errout;
 
 	/*
 	 * Set limit.
 	 */
 	if (req->newptr != NULL) {
 		error = SYSCTL_IN(req, &rlim, sizeof(rlim));
 		if (error == 0)
 			error = kern_proc_setrlimit(curthread, p, which, &rlim);
 	}
 
 errout:
 	PRELE(p);
 	return (error);
 }
 
 /*
  * This sysctl allows a process to retrieve ps_strings structure location of
  * another process.
  */
 static int
 sysctl_kern_proc_ps_strings(SYSCTL_HANDLER_ARGS)
 {
 	int *name = (int *)arg1;
 	u_int namelen = arg2;
 	struct proc *p;
 	vm_offset_t ps_strings;
 	int error;
 #ifdef COMPAT_FREEBSD32
 	uint32_t ps_strings32;
 #endif
 
 	if (namelen != 1)
 		return (EINVAL);
 
 	error = pget((pid_t)name[0], PGET_CANDEBUG, &p);
 	if (error != 0)
 		return (error);
 #ifdef COMPAT_FREEBSD32
 	if ((req->flags & SCTL_MASK32) != 0) {
 		/*
 		 * We return 0 if the 32 bit emulation request is for a 64 bit
 		 * process.
 		 */
 		ps_strings32 = SV_PROC_FLAG(p, SV_ILP32) != 0 ?
 		    PTROUT(PROC_PS_STRINGS(p)) : 0;
 		PROC_UNLOCK(p);
 		error = SYSCTL_OUT(req, &ps_strings32, sizeof(ps_strings32));
 		return (error);
 	}
 #endif
 	ps_strings = PROC_PS_STRINGS(p);
 	PROC_UNLOCK(p);
 	error = SYSCTL_OUT(req, &ps_strings, sizeof(ps_strings));
 	return (error);
 }
 
 /*
  * This sysctl allows a process to retrieve umask of another process.
  */
 static int
 sysctl_kern_proc_umask(SYSCTL_HANDLER_ARGS)
 {
 	int *name = (int *)arg1;
 	u_int namelen = arg2;
 	struct proc *p;
 	int error;
 	u_short cmask;
 	pid_t pid;
 
 	if (namelen != 1)
 		return (EINVAL);
 
 	pid = (pid_t)name[0];
 	p = curproc;
 	if (pid == p->p_pid || pid == 0) {
 		cmask = p->p_pd->pd_cmask;
 		goto out;
 	}
 
 	error = pget(pid, PGET_WANTREAD, &p);
 	if (error != 0)
 		return (error);
 
 	cmask = p->p_pd->pd_cmask;
 	PRELE(p);
 out:
 	error = SYSCTL_OUT(req, &cmask, sizeof(cmask));
 	return (error);
 }
 
 /*
  * This sysctl allows a process to set and retrieve binary osreldate of
  * another process.
  */
 static int
 sysctl_kern_proc_osrel(SYSCTL_HANDLER_ARGS)
 {
 	int *name = (int *)arg1;
 	u_int namelen = arg2;
 	struct proc *p;
 	int flags, error, osrel;
 
 	if (namelen != 1)
 		return (EINVAL);
 
 	if (req->newptr != NULL && req->newlen != sizeof(osrel))
 		return (EINVAL);
 
 	flags = PGET_HOLD | PGET_NOTWEXIT;
 	if (req->newptr != NULL)
 		flags |= PGET_CANDEBUG;
 	else
 		flags |= PGET_CANSEE;
 	error = pget((pid_t)name[0], flags, &p);
 	if (error != 0)
 		return (error);
 
 	error = SYSCTL_OUT(req, &p->p_osrel, sizeof(p->p_osrel));
 	if (error != 0)
 		goto errout;
 
 	if (req->newptr != NULL) {
 		error = SYSCTL_IN(req, &osrel, sizeof(osrel));
 		if (error != 0)
 			goto errout;
 		if (osrel < 0) {
 			error = EINVAL;
 			goto errout;
 		}
 		p->p_osrel = osrel;
 	}
 errout:
 	PRELE(p);
 	return (error);
 }
 
 static int
 sysctl_kern_proc_sigtramp(SYSCTL_HANDLER_ARGS)
 {
 	int *name = (int *)arg1;
 	u_int namelen = arg2;
 	struct proc *p;
 	struct kinfo_sigtramp kst;
 	const struct sysentvec *sv;
 	int error;
 #ifdef COMPAT_FREEBSD32
 	struct kinfo_sigtramp32 kst32;
 #endif
 
 	if (namelen != 1)
 		return (EINVAL);
 
 	error = pget((pid_t)name[0], PGET_CANDEBUG, &p);
 	if (error != 0)
 		return (error);
 	sv = p->p_sysent;
 #ifdef COMPAT_FREEBSD32
 	if ((req->flags & SCTL_MASK32) != 0) {
 		bzero(&kst32, sizeof(kst32));
 		if (SV_PROC_FLAG(p, SV_ILP32)) {
-			if (sv->sv_sigcode_base != 0) {
+			if (PROC_HAS_SHP(p)) {
 				kst32.ksigtramp_start = PROC_SIGCODE(p);
 				kst32.ksigtramp_end = kst32.ksigtramp_start +
 				    ((sv->sv_flags & SV_DSO_SIG) == 0 ?
 				    *sv->sv_szsigcode :
 				    (uintptr_t)sv->sv_szsigcode);
 			} else {
 				kst32.ksigtramp_start = PROC_PS_STRINGS(p) -
 				    *sv->sv_szsigcode;
 				kst32.ksigtramp_end = PROC_PS_STRINGS(p);
 			}
 		}
 		PROC_UNLOCK(p);
 		error = SYSCTL_OUT(req, &kst32, sizeof(kst32));
 		return (error);
 	}
 #endif
 	bzero(&kst, sizeof(kst));
-	if (sv->sv_sigcode_base != 0) {
+	if (PROC_HAS_SHP(p)) {
 		kst.ksigtramp_start = (char *)PROC_SIGCODE(p);
 		kst.ksigtramp_end = (char *)kst.ksigtramp_start +
 		    ((sv->sv_flags & SV_DSO_SIG) == 0 ? *sv->sv_szsigcode :
 		    (uintptr_t)sv->sv_szsigcode);
 	} else {
 		kst.ksigtramp_start = (char *)PROC_PS_STRINGS(p) -
 		    *sv->sv_szsigcode;
 		kst.ksigtramp_end = (char *)PROC_PS_STRINGS(p);
 	}
 	PROC_UNLOCK(p);
 	error = SYSCTL_OUT(req, &kst, sizeof(kst));
 	return (error);
 }
 
 static int
 sysctl_kern_proc_sigfastblk(SYSCTL_HANDLER_ARGS)
 {
 	int *name = (int *)arg1;
 	u_int namelen = arg2;
 	pid_t pid;
 	struct proc *p;
 	struct thread *td1;
 	uintptr_t addr;
 #ifdef COMPAT_FREEBSD32
 	uint32_t addr32;
 #endif
 	int error;
 
 	if (namelen != 1 || req->newptr != NULL)
 		return (EINVAL);
 
 	pid = (pid_t)name[0];
 	error = pget(pid, PGET_HOLD | PGET_NOTWEXIT | PGET_CANDEBUG, &p);
 	if (error != 0)
 		return (error);
 
 	PROC_LOCK(p);
 #ifdef COMPAT_FREEBSD32
 	if (SV_CURPROC_FLAG(SV_ILP32)) {
 		if (!SV_PROC_FLAG(p, SV_ILP32)) {
 			error = EINVAL;
 			goto errlocked;
 		}
 	}
 #endif
 	if (pid <= PID_MAX) {
 		td1 = FIRST_THREAD_IN_PROC(p);
 	} else {
 		FOREACH_THREAD_IN_PROC(p, td1) {
 			if (td1->td_tid == pid)
 				break;
 		}
 	}
 	if (td1 == NULL) {
 		error = ESRCH;
 		goto errlocked;
 	}
 	/*
 	 * The access to the private thread flags.  It is fine as far
 	 * as no out-of-thin-air values are read from td_pflags, and
 	 * usermode read of the td_sigblock_ptr is racy inherently,
 	 * since target process might have already changed it
 	 * meantime.
 	 */
 	if ((td1->td_pflags & TDP_SIGFASTBLOCK) != 0)
 		addr = (uintptr_t)td1->td_sigblock_ptr;
 	else
 		error = ENOTTY;
 
 errlocked:
 	_PRELE(p);
 	PROC_UNLOCK(p);
 	if (error != 0)
 		return (error);
 
 #ifdef COMPAT_FREEBSD32
 	if (SV_CURPROC_FLAG(SV_ILP32)) {
 		addr32 = addr;
 		error = SYSCTL_OUT(req, &addr32, sizeof(addr32));
 	} else
 #endif
 		error = SYSCTL_OUT(req, &addr, sizeof(addr));
 	return (error);
 }
 
 static int
 sysctl_kern_proc_vm_layout(SYSCTL_HANDLER_ARGS)
 {
 	struct kinfo_vm_layout kvm;
 	struct proc *p;
 	struct vmspace *vmspace;
 	int error, *name;
 
 	name = (int *)arg1;
 	if ((u_int)arg2 != 1)
 		return (EINVAL);
 
 	error = pget((pid_t)name[0], PGET_CANDEBUG, &p);
 	if (error != 0)
 		return (error);
 #ifdef COMPAT_FREEBSD32
 	if (SV_CURPROC_FLAG(SV_ILP32)) {
 		if (!SV_PROC_FLAG(p, SV_ILP32)) {
 			PROC_UNLOCK(p);
 			return (EINVAL);
 		}
 	}
 #endif
 	vmspace = vmspace_acquire_ref(p);
 	PROC_UNLOCK(p);
 
 	memset(&kvm, 0, sizeof(kvm));
 	kvm.kvm_min_user_addr = vm_map_min(&vmspace->vm_map);
 	kvm.kvm_max_user_addr = vm_map_max(&vmspace->vm_map);
 	kvm.kvm_text_addr = (uintptr_t)vmspace->vm_taddr;
 	kvm.kvm_text_size = vmspace->vm_tsize;
 	kvm.kvm_data_addr = (uintptr_t)vmspace->vm_daddr;
 	kvm.kvm_data_size = vmspace->vm_dsize;
 	kvm.kvm_stack_addr = (uintptr_t)vmspace->vm_maxsaddr;
 	kvm.kvm_stack_size = vmspace->vm_ssize;
 	if ((vmspace->vm_map.flags & MAP_WIREFUTURE) != 0)
 		kvm.kvm_map_flags |= KMAP_FLAG_WIREFUTURE;
 	if ((vmspace->vm_map.flags & MAP_ASLR) != 0)
 		kvm.kvm_map_flags |= KMAP_FLAG_ASLR;
 	if ((vmspace->vm_map.flags & MAP_ASLR_IGNSTART) != 0)
 		kvm.kvm_map_flags |= KMAP_FLAG_ASLR_IGNSTART;
 	if ((vmspace->vm_map.flags & MAP_WXORX) != 0)
 		kvm.kvm_map_flags |= KMAP_FLAG_WXORX;
 	if ((vmspace->vm_map.flags & MAP_ASLR_STACK) != 0)
 		kvm.kvm_map_flags |= KMAP_FLAG_ASLR_STACK;
 
 #ifdef COMPAT_FREEBSD32
 	if (SV_CURPROC_FLAG(SV_ILP32)) {
 		struct kinfo_vm_layout32 kvm32;
 
 		memset(&kvm32, 0, sizeof(kvm32));
 		kvm32.kvm_min_user_addr = (uint32_t)kvm.kvm_min_user_addr;
 		kvm32.kvm_max_user_addr = (uint32_t)kvm.kvm_max_user_addr;
 		kvm32.kvm_text_addr = (uint32_t)kvm.kvm_text_addr;
 		kvm32.kvm_text_size = (uint32_t)kvm.kvm_text_size;
 		kvm32.kvm_data_addr = (uint32_t)kvm.kvm_data_addr;
 		kvm32.kvm_data_size = (uint32_t)kvm.kvm_data_size;
 		kvm32.kvm_stack_addr = (uint32_t)kvm.kvm_stack_addr;
 		kvm32.kvm_stack_size = (uint32_t)kvm.kvm_stack_size;
 		kvm32.kvm_map_flags = kvm.kvm_map_flags;
 		vmspace_free(vmspace);
 		error = SYSCTL_OUT(req, &kvm32, sizeof(kvm32));
 		goto out;
 	}
 #endif
 
 	error = SYSCTL_OUT(req, &kvm, sizeof(kvm));
 #ifdef COMPAT_FREEBSD32
 out:
 #endif
 	vmspace_free(vmspace);
 	return (error);
 }
 
 SYSCTL_NODE(_kern, KERN_PROC, proc, CTLFLAG_RD | CTLFLAG_MPSAFE,  0,
     "Process table");
 
 SYSCTL_PROC(_kern_proc, KERN_PROC_ALL, all, CTLFLAG_RD|CTLTYPE_STRUCT|
 	CTLFLAG_MPSAFE, 0, 0, sysctl_kern_proc, "S,proc",
 	"Return entire process table");
 
 static SYSCTL_NODE(_kern_proc, KERN_PROC_GID, gid, CTLFLAG_RD | CTLFLAG_MPSAFE,
 	sysctl_kern_proc, "Process table");
 
 static SYSCTL_NODE(_kern_proc, KERN_PROC_PGRP, pgrp, CTLFLAG_RD | CTLFLAG_MPSAFE,
 	sysctl_kern_proc, "Process table");
 
 static SYSCTL_NODE(_kern_proc, KERN_PROC_RGID, rgid, CTLFLAG_RD | CTLFLAG_MPSAFE,
 	sysctl_kern_proc, "Process table");
 
 static SYSCTL_NODE(_kern_proc, KERN_PROC_SESSION, sid, CTLFLAG_RD |
 	CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table");
 
 static SYSCTL_NODE(_kern_proc, KERN_PROC_TTY, tty, CTLFLAG_RD | CTLFLAG_MPSAFE,
 	sysctl_kern_proc, "Process table");
 
 static SYSCTL_NODE(_kern_proc, KERN_PROC_UID, uid, CTLFLAG_RD | CTLFLAG_MPSAFE,
 	sysctl_kern_proc, "Process table");
 
 static SYSCTL_NODE(_kern_proc, KERN_PROC_RUID, ruid, CTLFLAG_RD | CTLFLAG_MPSAFE,
 	sysctl_kern_proc, "Process table");
 
 static SYSCTL_NODE(_kern_proc, KERN_PROC_PID, pid, CTLFLAG_RD | CTLFLAG_MPSAFE,
 	sysctl_kern_proc, "Process table");
 
 static SYSCTL_NODE(_kern_proc, KERN_PROC_PROC, proc, CTLFLAG_RD | CTLFLAG_MPSAFE,
 	sysctl_kern_proc, "Return process table, no threads");
 
 static SYSCTL_NODE(_kern_proc, KERN_PROC_ARGS, args,
 	CTLFLAG_RW | CTLFLAG_CAPWR | CTLFLAG_ANYBODY | CTLFLAG_MPSAFE,
 	sysctl_kern_proc_args, "Process argument list");
 
 static SYSCTL_NODE(_kern_proc, KERN_PROC_ENV, env, CTLFLAG_RD | CTLFLAG_MPSAFE,
 	sysctl_kern_proc_env, "Process environment");
 
 static SYSCTL_NODE(_kern_proc, KERN_PROC_AUXV, auxv, CTLFLAG_RD |
 	CTLFLAG_MPSAFE, sysctl_kern_proc_auxv, "Process ELF auxiliary vector");
 
 static SYSCTL_NODE(_kern_proc, KERN_PROC_PATHNAME, pathname, CTLFLAG_RD |
 	CTLFLAG_MPSAFE, sysctl_kern_proc_pathname, "Process executable path");
 
 static SYSCTL_NODE(_kern_proc, KERN_PROC_SV_NAME, sv_name, CTLFLAG_RD |
 	CTLFLAG_MPSAFE, sysctl_kern_proc_sv_name,
 	"Process syscall vector name (ABI type)");
 
 static SYSCTL_NODE(_kern_proc, (KERN_PROC_GID | KERN_PROC_INC_THREAD), gid_td,
 	CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table");
 
 static SYSCTL_NODE(_kern_proc, (KERN_PROC_PGRP | KERN_PROC_INC_THREAD), pgrp_td,
 	CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table");
 
 static SYSCTL_NODE(_kern_proc, (KERN_PROC_RGID | KERN_PROC_INC_THREAD), rgid_td,
 	CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table");
 
 static SYSCTL_NODE(_kern_proc, (KERN_PROC_SESSION | KERN_PROC_INC_THREAD),
 	sid_td, CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table");
 
 static SYSCTL_NODE(_kern_proc, (KERN_PROC_TTY | KERN_PROC_INC_THREAD), tty_td,
 	CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table");
 
 static SYSCTL_NODE(_kern_proc, (KERN_PROC_UID | KERN_PROC_INC_THREAD), uid_td,
 	CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table");
 
 static SYSCTL_NODE(_kern_proc, (KERN_PROC_RUID | KERN_PROC_INC_THREAD), ruid_td,
 	CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table");
 
 static SYSCTL_NODE(_kern_proc, (KERN_PROC_PID | KERN_PROC_INC_THREAD), pid_td,
 	CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table");
 
 static SYSCTL_NODE(_kern_proc, (KERN_PROC_PROC | KERN_PROC_INC_THREAD), proc_td,
 	CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc,
 	"Return process table, including threads");
 
 #ifdef COMPAT_FREEBSD7
 static SYSCTL_NODE(_kern_proc, KERN_PROC_OVMMAP, ovmmap, CTLFLAG_RD |
 	CTLFLAG_MPSAFE, sysctl_kern_proc_ovmmap, "Old Process vm map entries");
 #endif
 
 static SYSCTL_NODE(_kern_proc, KERN_PROC_VMMAP, vmmap, CTLFLAG_RD |
 	CTLFLAG_MPSAFE, sysctl_kern_proc_vmmap, "Process vm map entries");
 
 #if defined(STACK) || defined(DDB)
 static SYSCTL_NODE(_kern_proc, KERN_PROC_KSTACK, kstack, CTLFLAG_RD |
 	CTLFLAG_MPSAFE, sysctl_kern_proc_kstack, "Process kernel stacks");
 #endif
 
 static SYSCTL_NODE(_kern_proc, KERN_PROC_GROUPS, groups, CTLFLAG_RD |
 	CTLFLAG_MPSAFE, sysctl_kern_proc_groups, "Process groups");
 
 static SYSCTL_NODE(_kern_proc, KERN_PROC_RLIMIT, rlimit, CTLFLAG_RW |
 	CTLFLAG_ANYBODY | CTLFLAG_MPSAFE, sysctl_kern_proc_rlimit,
 	"Process resource limits");
 
 static SYSCTL_NODE(_kern_proc, KERN_PROC_PS_STRINGS, ps_strings, CTLFLAG_RD |
 	CTLFLAG_MPSAFE, sysctl_kern_proc_ps_strings,
 	"Process ps_strings location");
 
 static SYSCTL_NODE(_kern_proc, KERN_PROC_UMASK, umask, CTLFLAG_RD |
 	CTLFLAG_MPSAFE, sysctl_kern_proc_umask, "Process umask");
 
 static SYSCTL_NODE(_kern_proc, KERN_PROC_OSREL, osrel, CTLFLAG_RW |
 	CTLFLAG_ANYBODY | CTLFLAG_MPSAFE, sysctl_kern_proc_osrel,
 	"Process binary osreldate");
 
 static SYSCTL_NODE(_kern_proc, KERN_PROC_SIGTRAMP, sigtramp, CTLFLAG_RD |
 	CTLFLAG_MPSAFE, sysctl_kern_proc_sigtramp,
 	"Process signal trampoline location");
 
 static SYSCTL_NODE(_kern_proc, KERN_PROC_SIGFASTBLK, sigfastblk, CTLFLAG_RD |
 	CTLFLAG_ANYBODY | CTLFLAG_MPSAFE, sysctl_kern_proc_sigfastblk,
 	"Thread sigfastblock address");
 
 static SYSCTL_NODE(_kern_proc, KERN_PROC_VM_LAYOUT, vm_layout, CTLFLAG_RD |
 	CTLFLAG_ANYBODY | CTLFLAG_MPSAFE, sysctl_kern_proc_vm_layout,
 	"Process virtual address space layout info");
 
 static struct sx stop_all_proc_blocker;
 SX_SYSINIT(stop_all_proc_blocker, &stop_all_proc_blocker, "sapblk");
 
 bool
 stop_all_proc_block(void)
 {
 	return (sx_xlock_sig(&stop_all_proc_blocker) == 0);
 }
 
 void
 stop_all_proc_unblock(void)
 {
 	sx_xunlock(&stop_all_proc_blocker);
 }
 
 int allproc_gen;
 
 /*
  * stop_all_proc() purpose is to stop all process which have usermode,
  * except current process for obvious reasons.  This makes it somewhat
  * unreliable when invoked from multithreaded process.  The service
  * must not be user-callable anyway.
  */
 void
 stop_all_proc(void)
 {
 	struct proc *cp, *p;
 	int r, gen;
 	bool restart, seen_stopped, seen_exiting, stopped_some;
 
 	if (!stop_all_proc_block())
 		return;
 
 	cp = curproc;
 allproc_loop:
 	sx_xlock(&allproc_lock);
 	gen = allproc_gen;
 	seen_exiting = seen_stopped = stopped_some = restart = false;
 	LIST_REMOVE(cp, p_list);
 	LIST_INSERT_HEAD(&allproc, cp, p_list);
 	for (;;) {
 		p = LIST_NEXT(cp, p_list);
 		if (p == NULL)
 			break;
 		LIST_REMOVE(cp, p_list);
 		LIST_INSERT_AFTER(p, cp, p_list);
 		PROC_LOCK(p);
 		if ((p->p_flag & (P_KPROC | P_SYSTEM | P_TOTAL_STOP)) != 0) {
 			PROC_UNLOCK(p);
 			continue;
 		}
 		if ((p->p_flag2 & P2_WEXIT) != 0) {
 			seen_exiting = true;
 			PROC_UNLOCK(p);
 			continue;
 		}
 		if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) {
 			/*
 			 * Stopped processes are tolerated when there
 			 * are no other processes which might continue
 			 * them.  P_STOPPED_SINGLE but not
 			 * P_TOTAL_STOP process still has at least one
 			 * thread running.
 			 */
 			seen_stopped = true;
 			PROC_UNLOCK(p);
 			continue;
 		}
 		sx_xunlock(&allproc_lock);
 		_PHOLD(p);
 		r = thread_single(p, SINGLE_ALLPROC);
 		if (r != 0)
 			restart = true;
 		else
 			stopped_some = true;
 		_PRELE(p);
 		PROC_UNLOCK(p);
 		sx_xlock(&allproc_lock);
 	}
 	/* Catch forked children we did not see in iteration. */
 	if (gen != allproc_gen)
 		restart = true;
 	sx_xunlock(&allproc_lock);
 	if (restart || stopped_some || seen_exiting || seen_stopped) {
 		kern_yield(PRI_USER);
 		goto allproc_loop;
 	}
 }
 
 void
 resume_all_proc(void)
 {
 	struct proc *cp, *p;
 
 	cp = curproc;
 	sx_xlock(&allproc_lock);
 again:
 	LIST_REMOVE(cp, p_list);
 	LIST_INSERT_HEAD(&allproc, cp, p_list);
 	for (;;) {
 		p = LIST_NEXT(cp, p_list);
 		if (p == NULL)
 			break;
 		LIST_REMOVE(cp, p_list);
 		LIST_INSERT_AFTER(p, cp, p_list);
 		PROC_LOCK(p);
 		if ((p->p_flag & P_TOTAL_STOP) != 0) {
 			sx_xunlock(&allproc_lock);
 			_PHOLD(p);
 			thread_single_end(p, SINGLE_ALLPROC);
 			_PRELE(p);
 			PROC_UNLOCK(p);
 			sx_xlock(&allproc_lock);
 		} else {
 			PROC_UNLOCK(p);
 		}
 	}
 	/*  Did the loop above missed any stopped process ? */
 	FOREACH_PROC_IN_SYSTEM(p) {
 		/* No need for proc lock. */
 		if ((p->p_flag & P_TOTAL_STOP) != 0)
 			goto again;
 	}
 	sx_xunlock(&allproc_lock);
 
 	stop_all_proc_unblock();
 }
 
 /* #define	TOTAL_STOP_DEBUG	1 */
 #ifdef TOTAL_STOP_DEBUG
 volatile static int ap_resume;
 #include <sys/mount.h>
 
 static int
 sysctl_debug_stop_all_proc(SYSCTL_HANDLER_ARGS)
 {
 	int error, val;
 
 	val = 0;
 	ap_resume = 0;
 	error = sysctl_handle_int(oidp, &val, 0, req);
 	if (error != 0 || req->newptr == NULL)
 		return (error);
 	if (val != 0) {
 		stop_all_proc();
 		syncer_suspend();
 		while (ap_resume == 0)
 			;
 		syncer_resume();
 		resume_all_proc();
 	}
 	return (0);
 }
 
 SYSCTL_PROC(_debug, OID_AUTO, stop_all_proc, CTLTYPE_INT | CTLFLAG_RW |
     CTLFLAG_MPSAFE, __DEVOLATILE(int *, &ap_resume), 0,
     sysctl_debug_stop_all_proc, "I",
     "");
 #endif
diff --git a/sys/kern/kern_sharedpage.c b/sys/kern/kern_sharedpage.c
index 3aa5501eafdc..f90e90b0feaf 100644
--- a/sys/kern/kern_sharedpage.c
+++ b/sys/kern/kern_sharedpage.c
@@ -1,411 +1,394 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2010, 2012 Konstantin Belousov <kib@FreeBSD.org>
  * Copyright (c) 2015 The FreeBSD Foundation
  * All rights reserved.
  *
  * Portions of this software were developed by Konstantin Belousov
  * under sponsorship from the FreeBSD Foundation.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_vm.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/rwlock.h>
 #include <sys/stddef.h>
 #include <sys/sysent.h>
 #include <sys/sysctl.h>
 #include <sys/vdso.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/pmap.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_map.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
 #include <vm/vm_pager.h>
 
 static struct sx shared_page_alloc_sx;
 static vm_object_t shared_page_obj;
 static int shared_page_free;
 char *shared_page_mapping;
 
 #ifdef RANDOM_FENESTRASX
 static struct vdso_fxrng_generation *fxrng_shpage_mapping;
 
 static bool fxrng_enabled = true;
 SYSCTL_BOOL(_debug, OID_AUTO, fxrng_vdso_enable, CTLFLAG_RWTUN, &fxrng_enabled,
     0, "Enable FXRNG VDSO");
 #endif
 
 void
 shared_page_write(int base, int size, const void *data)
 {
 
 	bcopy(data, shared_page_mapping + base, size);
 }
 
 static int
 shared_page_alloc_locked(int size, int align)
 {
 	int res;
 
 	res = roundup(shared_page_free, align);
 	if (res + size >= IDX_TO_OFF(shared_page_obj->size))
 		res = -1;
 	else
 		shared_page_free = res + size;
 	return (res);
 }
 
 int
 shared_page_alloc(int size, int align)
 {
 	int res;
 
 	sx_xlock(&shared_page_alloc_sx);
 	res = shared_page_alloc_locked(size, align);
 	sx_xunlock(&shared_page_alloc_sx);
 	return (res);
 }
 
 int
 shared_page_fill(int size, int align, const void *data)
 {
 	int res;
 
 	sx_xlock(&shared_page_alloc_sx);
 	res = shared_page_alloc_locked(size, align);
 	if (res != -1)
 		shared_page_write(res, size, data);
 	sx_xunlock(&shared_page_alloc_sx);
 	return (res);
 }
 
 static void
 shared_page_init(void *dummy __unused)
 {
 	vm_page_t m;
 	vm_offset_t addr;
 
 	sx_init(&shared_page_alloc_sx, "shpsx");
 	shared_page_obj = vm_pager_allocate(OBJT_PHYS, 0, PAGE_SIZE,
 	    VM_PROT_DEFAULT, 0, NULL);
 	VM_OBJECT_WLOCK(shared_page_obj);
 	m = vm_page_grab(shared_page_obj, 0, VM_ALLOC_ZERO);
 	VM_OBJECT_WUNLOCK(shared_page_obj);
 	vm_page_valid(m);
 	vm_page_xunbusy(m);
 	addr = kva_alloc(PAGE_SIZE);
 	pmap_qenter(addr, &m, 1);
 	shared_page_mapping = (char *)addr;
 }
 
 SYSINIT(shp, SI_SUB_EXEC, SI_ORDER_FIRST, (sysinit_cfunc_t)shared_page_init,
     NULL);
 
 /*
  * Push the timehands update to the shared page.
  *
  * The lockless update scheme is similar to the one used to update the
  * in-kernel timehands, see sys/kern/kern_tc.c:tc_windup() (which
  * calls us after the timehands are updated).
  */
 static void
 timehands_update(struct vdso_sv_tk *svtk)
 {
 	struct vdso_timehands th;
 	struct vdso_timekeep *tk;
 	uint32_t enabled, idx;
 
 	enabled = tc_fill_vdso_timehands(&th);
 	th.th_gen = 0;
 	idx = svtk->sv_timekeep_curr;
 	if (++idx >= VDSO_TH_NUM)
 		idx = 0;
 	svtk->sv_timekeep_curr = idx;
 	if (++svtk->sv_timekeep_gen == 0)
 		svtk->sv_timekeep_gen = 1;
 
 	tk = (struct vdso_timekeep *)(shared_page_mapping +
 	    svtk->sv_timekeep_off);
 	tk->tk_th[idx].th_gen = 0;
 	atomic_thread_fence_rel();
 	if (enabled)
 		tk->tk_th[idx] = th;
 	atomic_store_rel_32(&tk->tk_th[idx].th_gen, svtk->sv_timekeep_gen);
 	atomic_store_rel_32(&tk->tk_current, idx);
 
 	/*
 	 * The ordering of the assignment to tk_enabled relative to
 	 * the update of the vdso_timehands is not important.
 	 */
 	tk->tk_enabled = enabled;
 }
 
 #ifdef COMPAT_FREEBSD32
 static void
 timehands_update32(struct vdso_sv_tk *svtk)
 {
 	struct vdso_timehands32 th;
 	struct vdso_timekeep32 *tk;
 	uint32_t enabled, idx;
 
 	enabled = tc_fill_vdso_timehands32(&th);
 	th.th_gen = 0;
 	idx = svtk->sv_timekeep_curr;
 	if (++idx >= VDSO_TH_NUM)
 		idx = 0;
 	svtk->sv_timekeep_curr = idx;
 	if (++svtk->sv_timekeep_gen == 0)
 		svtk->sv_timekeep_gen = 1;
 
 	tk = (struct vdso_timekeep32 *)(shared_page_mapping +
 	    svtk->sv_timekeep_off);
 	tk->tk_th[idx].th_gen = 0;
 	atomic_thread_fence_rel();
 	if (enabled)
 		tk->tk_th[idx] = th;
 	atomic_store_rel_32(&tk->tk_th[idx].th_gen, svtk->sv_timekeep_gen);
 	atomic_store_rel_32(&tk->tk_current, idx);
 	tk->tk_enabled = enabled;
 }
 #endif
 
 /*
  * This is hackish, but easiest way to avoid creating list structures
  * that needs to be iterated over from the hardclock interrupt
  * context.
  */
 static struct vdso_sv_tk *host_svtk;
 #ifdef COMPAT_FREEBSD32
 static struct vdso_sv_tk *compat32_svtk;
 #endif
 
 void
 timekeep_push_vdso(void)
 {
 
 	if (host_svtk != NULL)
 		timehands_update(host_svtk);
 #ifdef COMPAT_FREEBSD32
 	if (compat32_svtk != NULL)
 		timehands_update32(compat32_svtk);
 #endif
 }
 
 struct vdso_sv_tk *
 alloc_sv_tk(void)
 {
 	struct vdso_sv_tk *svtk;
 	int tk_base;
 	uint32_t tk_ver;
 
 	tk_ver = VDSO_TK_VER_CURR;
 	svtk = malloc(sizeof(struct vdso_sv_tk), M_TEMP, M_WAITOK | M_ZERO);
 	tk_base = shared_page_alloc(sizeof(struct vdso_timekeep) +
 	    sizeof(struct vdso_timehands) * VDSO_TH_NUM, 16);
 	KASSERT(tk_base != -1, ("tk_base -1 for native"));
 	shared_page_write(tk_base + offsetof(struct vdso_timekeep, tk_ver),
 	    sizeof(uint32_t), &tk_ver);
 	svtk->sv_timekeep_off = tk_base;
 	timekeep_push_vdso();
 	return (svtk);
 }
 
 #ifdef COMPAT_FREEBSD32
 struct vdso_sv_tk *
 alloc_sv_tk_compat32(void)
 {
 	struct vdso_sv_tk *svtk;
 	int tk_base;
 	uint32_t tk_ver;
 
 	svtk = malloc(sizeof(struct vdso_sv_tk), M_TEMP, M_WAITOK | M_ZERO);
 	tk_ver = VDSO_TK_VER_CURR;
 	tk_base = shared_page_alloc(sizeof(struct vdso_timekeep32) +
 	    sizeof(struct vdso_timehands32) * VDSO_TH_NUM, 16);
 	KASSERT(tk_base != -1, ("tk_base -1 for 32bit"));
 	shared_page_write(tk_base + offsetof(struct vdso_timekeep32,
 	    tk_ver), sizeof(uint32_t), &tk_ver);
 	svtk->sv_timekeep_off = tk_base;
 	timekeep_push_vdso();
 	return (svtk);
 }
 #endif
 
 #ifdef RANDOM_FENESTRASX
 void
 fxrng_push_seed_generation(uint64_t gen)
 {
 	if (fxrng_shpage_mapping == NULL || !fxrng_enabled)
 		return;
 	KASSERT(gen < INT32_MAX,
 	    ("fxrng seed version shouldn't roll over a 32-bit counter "
 	     "for approximately 456,000 years"));
 	atomic_store_rel_32(&fxrng_shpage_mapping->fx_generation32,
 	    (uint32_t)gen);
 }
 
 static void
 alloc_sv_fxrng_generation(void)
 {
 	int base;
 
 	/*
 	 * Allocate a full cache line for the fxrng root generation (64-bit
 	 * counter, or truncated 32-bit counter on ILP32 userspace).  It is
 	 * important that the line is not shared with frequently dirtied data,
 	 * and the shared page allocator lacks a __read_mostly mechanism.
 	 * However, PAGE_SIZE is typically large relative to the amount of
 	 * stuff we've got in it so far, so maybe the possible waste isn't an
 	 * issue.
 	 */
 	base = shared_page_alloc(CACHE_LINE_SIZE, CACHE_LINE_SIZE);
 	KASSERT(base != -1, ("%s: base allocation failed", __func__));
 	fxrng_shpage_mapping = (void *)(shared_page_mapping + base);
 	*fxrng_shpage_mapping = (struct vdso_fxrng_generation) {
 		.fx_vdso_version = VDSO_FXRNG_VER_CURR,
 	};
 }
 #endif /* RANDOM_FENESTRASX */
 
 void
 exec_sysvec_init(void *param)
 {
 	struct sysentvec *sv;
-	vm_offset_t sb;
-#ifdef RANDOM_FENESTRASX
-	ptrdiff_t base;
-#endif
 	u_int flags;
 	int res;
 
 	sv = param;
 	flags = sv->sv_flags;
 	if ((flags & SV_SHP) == 0)
 		return;
 	MPASS(sv->sv_shared_page_obj == NULL);
 	MPASS(sv->sv_shared_page_base != 0);
 
 	sv->sv_shared_page_obj = shared_page_obj;
 	if ((flags & SV_ABI_MASK) == SV_ABI_FREEBSD) {
 		if ((flags & SV_DSO_SIG) != 0) {
-			sb = sv->sv_shared_page_base;
 			res = shared_page_fill((uintptr_t)sv->sv_szsigcode,
 			    16, sv->sv_sigcode);
 			if (res == -1)
-				panic("copying sigtramp to shared page");
-			sb += res;
-			sv->sv_vdso_base = sb;
-			sb += sv->sv_sigcodeoff;
-			sv->sv_sigcode_base = sb;
+				panic("copying vdso to shared page");
+			sv->sv_vdso_offset = res;
+			sv->sv_sigcode_offset = res + sv->sv_sigcodeoff;
 		} else {
-			sv->sv_sigcode_base = sv->sv_shared_page_base +
-			    shared_page_fill(*(sv->sv_szsigcode), 16,
-			    sv->sv_sigcode);
+			res = shared_page_fill(*(sv->sv_szsigcode),
+			    16, sv->sv_sigcode);
+			if (res == -1)
+				panic("copying sigtramp to shared page");
+			sv->sv_sigcode_offset = res;
 		}
 	}
 	if ((flags & SV_TIMEKEEP) != 0) {
 #ifdef COMPAT_FREEBSD32
 		if ((flags & SV_ILP32) != 0) {
 			if ((flags & SV_ABI_MASK) == SV_ABI_FREEBSD) {
 				KASSERT(compat32_svtk == NULL,
 				    ("Compat32 already registered"));
 				compat32_svtk = alloc_sv_tk_compat32();
 			} else {
 				KASSERT(compat32_svtk != NULL,
 				    ("Compat32 not registered"));
 			}
-			sv->sv_timekeep_base = sv->sv_shared_page_base +
-			    compat32_svtk->sv_timekeep_off;
+			sv->sv_timekeep_offset = compat32_svtk->sv_timekeep_off;
 		} else {
 #endif
 			if ((flags & SV_ABI_MASK) == SV_ABI_FREEBSD) {
 				KASSERT(host_svtk == NULL,
 				    ("Host already registered"));
 				host_svtk = alloc_sv_tk();
 			} else {
 				KASSERT(host_svtk != NULL,
 				    ("Host not registered"));
 			}
-			sv->sv_timekeep_base = sv->sv_shared_page_base +
-			    host_svtk->sv_timekeep_off;
+			sv->sv_timekeep_offset = host_svtk->sv_timekeep_off;
 #ifdef COMPAT_FREEBSD32
 		}
 #endif
 	}
 #ifdef RANDOM_FENESTRASX
 	if ((flags & (SV_ABI_MASK | SV_RNG_SEED_VER)) ==
 	    (SV_ABI_FREEBSD | SV_RNG_SEED_VER)) {
 		/*
 		 * Only allocate a single VDSO entry for multiple sysentvecs,
 		 * i.e., native and COMPAT32.
 		 */
 		if (fxrng_shpage_mapping == NULL)
 			alloc_sv_fxrng_generation();
-		base = (char *)fxrng_shpage_mapping - shared_page_mapping;
-		sv->sv_fxrng_gen_base = sv->sv_shared_page_base + base;
+		sv->sv_fxrng_gen_offset =
+		    (char *)fxrng_shpage_mapping - shared_page_mapping;
 	}
 #endif
 }
 
 void
 exec_sysvec_init_secondary(struct sysentvec *sv, struct sysentvec *sv2)
 {
 	MPASS((sv2->sv_flags & SV_ABI_MASK) == (sv->sv_flags & SV_ABI_MASK));
 	MPASS((sv2->sv_flags & SV_TIMEKEEP) == (sv->sv_flags & SV_TIMEKEEP));
 	MPASS((sv2->sv_flags & SV_SHP) != 0 && (sv->sv_flags & SV_SHP) != 0);
 	MPASS((sv2->sv_flags & SV_DSO_SIG) == (sv->sv_flags & SV_DSO_SIG));
 	MPASS((sv2->sv_flags & SV_RNG_SEED_VER) ==
 	    (sv->sv_flags & SV_RNG_SEED_VER));
 
 	sv2->sv_shared_page_obj = sv->sv_shared_page_obj;
-	sv2->sv_sigcode_base = sv2->sv_shared_page_base +
-	    (sv->sv_sigcode_base - sv->sv_shared_page_base);
-	if ((sv2->sv_flags & SV_DSO_SIG) != 0) {
-		sv2->sv_vdso_base = sv2->sv_shared_page_base +
-		    (sv->sv_vdso_base - sv->sv_shared_page_base);
-	}
+	sv2->sv_sigcode_offset = sv->sv_sigcode_offset;
+	sv2->sv_vdso_offset = sv->sv_vdso_offset;
 	if ((sv2->sv_flags & SV_ABI_MASK) != SV_ABI_FREEBSD)
 		return;
-	if ((sv2->sv_flags & SV_TIMEKEEP) != 0) {
-		sv2->sv_timekeep_base = sv2->sv_shared_page_base +
-		    (sv->sv_timekeep_base - sv->sv_shared_page_base);
-	}
-	if ((sv2->sv_flags & SV_RNG_SEED_VER) != 0) {
-		sv2->sv_fxrng_gen_base = sv2->sv_shared_page_base +
-		    (sv->sv_fxrng_gen_base - sv->sv_shared_page_base);
-	}
+	sv2->sv_timekeep_offset = sv->sv_timekeep_offset;
+	sv2->sv_fxrng_gen_offset = sv->sv_fxrng_gen_offset;
 }
diff --git a/sys/powerpc/powerpc/elf32_machdep.c b/sys/powerpc/powerpc/elf32_machdep.c
index 2efe9a5fdca8..c518080ebad3 100644
--- a/sys/powerpc/powerpc/elf32_machdep.c
+++ b/sys/powerpc/powerpc/elf32_machdep.c
@@ -1,472 +1,474 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright 1996-1998 John D. Polstra.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/systm.h>
 
 #define __ELF_WORD_SIZE 32
 
 #include <sys/exec.h>
 #include <sys/imgact.h>
 #include <sys/malloc.h>
 #include <sys/proc.h>
 #include <sys/namei.h>
 #include <sys/fcntl.h>
 #include <sys/sysent.h>
 #include <sys/imgact_elf.h>
 #include <sys/jail.h>
 #include <sys/reg.h>
 #include <sys/smp.h>
 #include <sys/syscall.h>
 #include <sys/sysctl.h>
 #include <sys/signalvar.h>
 #include <sys/vnode.h>
 #include <sys/linker.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
+#include <vm/pmap.h>
+#include <vm/vm_map.h>
 
 #include <machine/altivec.h>
 #include <machine/cpu.h>
 #include <machine/fpu.h>
 #include <machine/elf.h>
 #include <machine/md_var.h>
 
 #include <powerpc/powerpc/elf_common.c>
 
 #ifdef __powerpc64__
 #include <compat/freebsd32/freebsd32_proto.h>
 #include <compat/freebsd32/freebsd32_util.h>
 
 extern const char *freebsd32_syscallnames[];
 static void ppc32_fixlimit(struct rlimit *rl, int which);
 
 static SYSCTL_NODE(_compat, OID_AUTO, ppc32, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "32-bit mode");
 
 #define PPC32_MAXDSIZ (1024*1024*1024)
 static u_long ppc32_maxdsiz = PPC32_MAXDSIZ;
 SYSCTL_ULONG(_compat_ppc32, OID_AUTO, maxdsiz, CTLFLAG_RWTUN, &ppc32_maxdsiz,
              0, "");
 #define PPC32_MAXSSIZ (64*1024*1024)
 u_long ppc32_maxssiz = PPC32_MAXSSIZ;
 SYSCTL_ULONG(_compat_ppc32, OID_AUTO, maxssiz, CTLFLAG_RWTUN, &ppc32_maxssiz,
              0, "");
 #else
 static void ppc32_runtime_resolve(void);
 #endif
 
 struct sysentvec elf32_freebsd_sysvec = {
 	.sv_size	= SYS_MAXSYSCALL,
 #ifdef __powerpc64__
 	.sv_table	= freebsd32_sysent,
 #else
 	.sv_table	= sysent,
 #endif
 	.sv_fixup	= __elfN(freebsd_fixup),
 	.sv_copyout_auxargs = __elfN(powerpc_copyout_auxargs),
 	.sv_sendsig	= sendsig,
 	.sv_sigcode	= sigcode32,
 	.sv_szsigcode	= &szsigcode32,
 	.sv_name	= "FreeBSD ELF32",
 	.sv_coredump	= __elfN(coredump),
 	.sv_elf_core_osabi = ELFOSABI_FREEBSD,
 	.sv_elf_core_abi_vendor = FREEBSD_ABI_VENDOR,
 	.sv_elf_core_prepare_notes = __elfN(prepare_notes),
 	.sv_imgact_try	= NULL,
 	.sv_minsigstksz	= MINSIGSTKSZ,
 	.sv_minuser	= VM_MIN_ADDRESS,
 	.sv_stackprot	= VM_PROT_ALL,
 #ifdef __powerpc64__
 	.sv_maxuser	= VM_MAXUSER_ADDRESS32,
 	.sv_usrstack	= FREEBSD32_USRSTACK,
 	.sv_psstrings	= FREEBSD32_PS_STRINGS,
 	.sv_psstringssz	= sizeof(struct freebsd32_ps_strings),
 	.sv_copyout_strings = freebsd32_copyout_strings,
 	.sv_setregs	= ppc32_setregs,
 	.sv_syscallnames = freebsd32_syscallnames,
 	.sv_fixlimit	= ppc32_fixlimit,
 #else
 	.sv_maxuser	= VM_MAXUSER_ADDRESS,
 	.sv_usrstack	= USRSTACK,
 	.sv_psstrings	= PS_STRINGS,
 	.sv_psstringssz	= sizeof(struct ps_strings),
 	.sv_copyout_strings = exec_copyout_strings,
 	.sv_setregs	= exec_setregs,
 	.sv_syscallnames = syscallnames,
 	.sv_fixlimit	= NULL,
 #endif
 	.sv_maxssiz	= NULL,
 	.sv_flags	= SV_ABI_FREEBSD | SV_ILP32 | SV_SHP | SV_ASLR |
 			    SV_TIMEKEEP | SV_RNG_SEED_VER,
 	.sv_set_syscall_retval = cpu_set_syscall_retval,
 	.sv_fetch_syscall_args = cpu_fetch_syscall_args,
 	.sv_shared_page_base = FREEBSD32_SHAREDPAGE,
 	.sv_shared_page_len = PAGE_SIZE,
 	.sv_schedtail	= NULL,
 	.sv_thread_detach = NULL,
 	.sv_trap	= NULL,
 	.sv_hwcap	= &cpu_features,
 	.sv_hwcap2	= &cpu_features2,
 	.sv_onexec_old	= exec_onexec_old,
 	.sv_onexit	= exit_onexit,
 	.sv_regset_begin = SET_BEGIN(__elfN(regset)),
 	.sv_regset_end  = SET_LIMIT(__elfN(regset)),
 };
 INIT_SYSENTVEC(elf32_sysvec, &elf32_freebsd_sysvec);
 
 static Elf32_Brandinfo freebsd_brand_info = {
 	.brand		= ELFOSABI_FREEBSD,
 	.machine	= EM_PPC,
 	.compat_3_brand	= "FreeBSD",
 	.emul_path	= NULL,
 	.interp_path	= "/libexec/ld-elf.so.1",
 	.sysvec		= &elf32_freebsd_sysvec,
 #ifdef __powerpc64__
 	.interp_newpath	= "/libexec/ld-elf32.so.1",
 #else
 	.interp_newpath	= NULL,
 #endif
 	.brand_note	= &elf32_freebsd_brandnote,
 	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE
 };
 
 SYSINIT(elf32, SI_SUB_EXEC, SI_ORDER_FIRST,
     (sysinit_cfunc_t) elf32_insert_brand_entry,
     &freebsd_brand_info);
 
 static Elf32_Brandinfo freebsd_brand_oinfo = {
 	.brand		= ELFOSABI_FREEBSD,
 	.machine	= EM_PPC,
 	.compat_3_brand	= "FreeBSD",
 	.emul_path	= NULL,
 	.interp_path	= "/usr/libexec/ld-elf.so.1",
 	.sysvec		= &elf32_freebsd_sysvec,
 	.interp_newpath	= NULL,
 	.brand_note	= &elf32_freebsd_brandnote,
 	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE
 };
 
 SYSINIT(oelf32, SI_SUB_EXEC, SI_ORDER_ANY,
 	(sysinit_cfunc_t) elf32_insert_brand_entry,
 	&freebsd_brand_oinfo);
 
 void elf_reloc_self(Elf_Dyn *dynp, Elf_Addr relocbase);
 
 void
 elf32_dump_thread(struct thread *td, void *dst, size_t *off)
 {
 	size_t len;
 	struct pcb *pcb;
 	uint64_t vshr[32];
 	uint64_t *vsr_dw1;
 	int vsr_idx;
 
 	len = 0;
 	pcb = td->td_pcb;
 
 	if (pcb->pcb_flags & PCB_VEC) {
 		save_vec_nodrop(td);
 		if (dst != NULL) {
 			len += elf32_populate_note(NT_PPC_VMX,
 			    &pcb->pcb_vec, (char *)dst + len,
 			    sizeof(pcb->pcb_vec), NULL);
 		} else
 			len += elf32_populate_note(NT_PPC_VMX, NULL, NULL,
 			    sizeof(pcb->pcb_vec), NULL);
 	}
 
 	if (pcb->pcb_flags & PCB_VSX) {
 		save_fpu_nodrop(td);
 		if (dst != NULL) {
 			/*
 			 * Doubleword 0 of VSR0-VSR31 overlap with FPR0-FPR31 and
 			 * VSR32-VSR63 overlap with VR0-VR31, so we only copy
 			 * the non-overlapping data, which is doubleword 1 of VSR0-VSR31.
 			 */
 			for (vsr_idx = 0; vsr_idx < nitems(vshr); vsr_idx++) {
 				vsr_dw1 = (uint64_t *)&pcb->pcb_fpu.fpr[vsr_idx].vsr[2];
 				vshr[vsr_idx] = *vsr_dw1;
 			}
 			len += elf32_populate_note(NT_PPC_VSX,
 			    vshr, (char *)dst + len,
 			    sizeof(vshr), NULL);
 		} else
 			len += elf32_populate_note(NT_PPC_VSX, NULL, NULL,
 			    sizeof(vshr), NULL);
 	}
 
 	*off = len;
 }
 
 #ifndef __powerpc64__
 bool
 elf_is_ifunc_reloc(Elf_Size r_info)
 {
 
 	return (ELF_R_TYPE(r_info) == R_PPC_IRELATIVE);
 }
 
 /* Process one elf relocation with addend. */
 static int
 elf_reloc_internal(linker_file_t lf, Elf_Addr relocbase, const void *data,
     int type, int local, elf_lookup_fn lookup)
 {
 	Elf_Addr *where;
 	Elf_Half *hwhere;
 	Elf_Addr addr;
 	Elf_Addr addend, val;
 	Elf_Word rtype, symidx;
 	const Elf_Rela *rela;
 	int error;
 
 	switch (type) {
 	case ELF_RELOC_REL:
 		panic("PPC only supports RELA relocations");
 		break;
 	case ELF_RELOC_RELA:
 		rela = (const Elf_Rela *)data;
 		where = (Elf_Addr *) ((uintptr_t)relocbase + rela->r_offset);
 		hwhere = (Elf_Half *) ((uintptr_t)relocbase + rela->r_offset);
 		addend = rela->r_addend;
 		rtype = ELF_R_TYPE(rela->r_info);
 		symidx = ELF_R_SYM(rela->r_info);
 		break;
 	default:
 		panic("elf_reloc: unknown relocation mode %d\n", type);
 	}
 
 	switch (rtype) {
 	case R_PPC_NONE:
 		break;
 
 	case R_PPC_ADDR32: /* word32 S + A */
 		error = lookup(lf, symidx, 1, &addr);
 		if (error != 0)
 			return (-1);
 		*where = elf_relocaddr(lf, addr + addend);
 			break;
 
 	case R_PPC_ADDR16_LO: /* #lo(S) */
 		error = lookup(lf, symidx, 1, &addr);
 		if (error != 0)
 			return (-1);
 		/*
 		 * addend values are sometimes relative to sections
 		 * (i.e. .rodata) in rela, where in reality they
 		 * are relative to relocbase. Detect this condition.
 		 */
 		if (addr > relocbase && addr <= (relocbase + addend))
 			addr = relocbase;
 		addr = elf_relocaddr(lf, addr + addend);
 		*hwhere = addr & 0xffff;
 		break;
 
 	case R_PPC_ADDR16_HA: /* #ha(S) */
 		error = lookup(lf, symidx, 1, &addr);
 		if (error != 0)
 			return (-1);
 		/*
 		 * addend values are sometimes relative to sections
 		 * (i.e. .rodata) in rela, where in reality they
 		 * are relative to relocbase. Detect this condition.
 		 */
 		if (addr > relocbase && addr <= (relocbase + addend))
 			addr = relocbase;
 		addr = elf_relocaddr(lf, addr + addend);
 		*hwhere = ((addr >> 16) + ((addr & 0x8000) ? 1 : 0))
 		    & 0xffff;
 		break;
 
 	case R_PPC_RELATIVE: /* word32 B + A */
 		*where = elf_relocaddr(lf, relocbase + addend);
 		break;
 
 	case R_PPC_JMP_SLOT: /* PLT jump slot entry */
 		/*
 		 * We currently only support Secure-PLT jump slots.
 		 * Given that we reject BSS-PLT modules during load, we
 		 * don't need to check again.
 		 * The method we are using here is equivilent to
 		 * LD_BIND_NOW.
 		 */
 		error = lookup(lf, symidx, 1, &addr);
 		if (error != 0)
 			return (-1);
 		*where = elf_relocaddr(lf, addr + addend);
 		break;
 
 	case R_PPC_IRELATIVE:
 		addr = relocbase + addend;
 		val = ((Elf32_Addr (*)(void))addr)();
 		if (*where != val)
 			*where = val;
 		break;
 
 	default:
 		printf("kldload: unexpected relocation type %d, "
 		    "symbol index %d\n", (int)rtype, symidx);
 		return (-1);
 	}
 	return (0);
 }
 
 void
 elf_reloc_self(Elf_Dyn *dynp, Elf_Addr relocbase)
 {
 	Elf_Rela *rela = NULL, *relalim;
 	Elf_Addr relasz = 0;
 	Elf_Addr *where;
 
 	/*
 	 * Extract the rela/relasz values from the dynamic section
 	 */
 	for (; dynp->d_tag != DT_NULL; dynp++) {
 		switch (dynp->d_tag) {
 		case DT_RELA:
 			rela = (Elf_Rela *)(relocbase+dynp->d_un.d_ptr);
 			break;
 		case DT_RELASZ:
 			relasz = dynp->d_un.d_val;
 			break;
 		}
 	}
 
 	/*
 	 * Relocate these values
 	 */
 	relalim = (Elf_Rela *)((caddr_t)rela + relasz);
 	for (; rela < relalim; rela++) {
 		if (ELF_R_TYPE(rela->r_info) != R_PPC_RELATIVE)
 			continue;
 		where = (Elf_Addr *)(relocbase + rela->r_offset);
 		*where = (Elf_Addr)(relocbase + rela->r_addend);
 	}
 }
 
 int
 elf_reloc(linker_file_t lf, Elf_Addr relocbase, const void *data, int type,
     elf_lookup_fn lookup)
 {
 
 	return (elf_reloc_internal(lf, relocbase, data, type, 0, lookup));
 }
 
 int
 elf_reloc_local(linker_file_t lf, Elf_Addr relocbase, const void *data,
     int type, elf_lookup_fn lookup)
 {
 
 	return (elf_reloc_internal(lf, relocbase, data, type, 1, lookup));
 }
 
 int
 elf_cpu_load_file(linker_file_t lf)
 {
 
 	/* Only sync the cache for non-kernel modules */
 	if (lf->id != 1)
 		__syncicache(lf->address, lf->size);
 	return (0);
 }
 
 int
 elf_cpu_unload_file(linker_file_t lf __unused)
 {
 
 	return (0);
 }
 
 static void
 ppc32_runtime_resolve()
 {
 
 	/*
 	 * Since we don't support lazy binding, panic immediately if anyone
 	 * manages to call the runtime resolver.
 	 */
 	panic("kldload: Runtime resolver was called unexpectedly!");
 }
 
 int
 elf_cpu_parse_dynamic(caddr_t loadbase, Elf_Dyn *dynamic)
 {
 	Elf_Dyn *dp;
 	bool has_plt = false;
 	bool secure_plt = false;
 	Elf_Addr *got;
 
 	for (dp = dynamic; dp->d_tag != DT_NULL; dp++) {
 		switch (dp->d_tag) {
 		case DT_PPC_GOT:
 			secure_plt = true;
 			got = (Elf_Addr *)(loadbase + dp->d_un.d_ptr);
 			/* Install runtime resolver canary. */
 			got[1] = (Elf_Addr)ppc32_runtime_resolve;
 			got[2] = (Elf_Addr)0;
 			break;
 		case DT_PLTGOT:
 			has_plt = true;
 			break;
 		}
 	}
 
 	if (has_plt && !secure_plt) {
 		printf("kldload: BSS-PLT modules are not supported.\n");
 		return (-1);
 	}
 	return (0);
 }
 #endif
 
 #ifdef __powerpc64__
 static void
 ppc32_fixlimit(struct rlimit *rl, int which)
 {
 	switch (which) {
 	case RLIMIT_DATA:
 		if (ppc32_maxdsiz != 0) {
 			if (rl->rlim_cur > ppc32_maxdsiz)
 				rl->rlim_cur = ppc32_maxdsiz;
 			if (rl->rlim_max > ppc32_maxdsiz)
 				rl->rlim_max = ppc32_maxdsiz;
 		}
 		break;
 	case RLIMIT_STACK:
 		if (ppc32_maxssiz != 0) {
 			if (rl->rlim_cur > ppc32_maxssiz)
 				rl->rlim_cur = ppc32_maxssiz;
 			if (rl->rlim_max > ppc32_maxssiz)
 				rl->rlim_max = ppc32_maxssiz;
 		}
 		break;
 	}
 }
 #endif
diff --git a/sys/powerpc/powerpc/elf64_machdep.c b/sys/powerpc/powerpc/elf64_machdep.c
index 93b66461308d..a247a2c51ad6 100644
--- a/sys/powerpc/powerpc/elf64_machdep.c
+++ b/sys/powerpc/powerpc/elf64_machdep.c
@@ -1,465 +1,467 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright 1996-1998 John D. Polstra.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/systm.h>
 #include <sys/elf.h>
 #include <sys/exec.h>
 #include <sys/imgact.h>
 #include <sys/malloc.h>
 #include <sys/proc.h>
 #include <sys/namei.h>
 #include <sys/fcntl.h>
 #include <sys/reg.h>
 #include <sys/sysent.h>
 #include <sys/imgact_elf.h>
 #include <sys/jail.h>
 #include <sys/smp.h>
 #include <sys/syscall.h>
 #include <sys/signalvar.h>
 #include <sys/vnode.h>
 #include <sys/linker.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
+#include <vm/pmap.h>
+#include <vm/vm_map.h>
 
 #include <machine/altivec.h>
 #include <machine/cpu.h>
 #include <machine/fpu.h>
 #include <machine/elf.h>
 #include <machine/md_var.h>
 
 #include <powerpc/powerpc/elf_common.c>
 
 static void exec_setregs_funcdesc(struct thread *td, struct image_params *imgp,
     uintptr_t stack);
 
 struct sysentvec elf64_freebsd_sysvec_v1 = {
 	.sv_size	= SYS_MAXSYSCALL,
 	.sv_table	= sysent,
 	.sv_fixup	= __elfN(freebsd_fixup),
 	.sv_sendsig	= sendsig,
 	.sv_sigcode	= sigcode64,
 	.sv_szsigcode	= &szsigcode64,
 	.sv_name	= "FreeBSD ELF64",
 	.sv_coredump	= __elfN(coredump),
 	.sv_elf_core_osabi = ELFOSABI_FREEBSD,
 	.sv_elf_core_abi_vendor = FREEBSD_ABI_VENDOR,
 	.sv_elf_core_prepare_notes = __elfN(prepare_notes),
 	.sv_imgact_try	= NULL,
 	.sv_minsigstksz	= MINSIGSTKSZ,
 	.sv_minuser	= VM_MIN_ADDRESS,
 	.sv_maxuser	= VM_MAXUSER_ADDRESS,
 	.sv_usrstack	= USRSTACK,
 	.sv_psstrings	= PS_STRINGS,
 	.sv_psstringssz	= sizeof(struct ps_strings),
 	.sv_stackprot	= VM_PROT_ALL,
 	.sv_copyout_auxargs = __elfN(powerpc_copyout_auxargs),
 	.sv_copyout_strings = exec_copyout_strings,
 	.sv_setregs	= exec_setregs_funcdesc,
 	.sv_fixlimit	= NULL,
 	.sv_maxssiz	= NULL,
 	.sv_flags	= SV_ABI_FREEBSD | SV_LP64 | SV_SHP | SV_ASLR |
 			    SV_TIMEKEEP | SV_RNG_SEED_VER,
 	.sv_set_syscall_retval = cpu_set_syscall_retval,
 	.sv_fetch_syscall_args = cpu_fetch_syscall_args,
 	.sv_syscallnames = syscallnames,
 	.sv_shared_page_base = SHAREDPAGE,
 	.sv_shared_page_len = PAGE_SIZE,
 	.sv_schedtail	= NULL,
 	.sv_thread_detach = NULL,
 	.sv_trap	= NULL,
 	.sv_hwcap	= &cpu_features,
 	.sv_hwcap2	= &cpu_features2,
 	.sv_onexec_old	= exec_onexec_old,
 	.sv_onexit	= exit_onexit,
 	.sv_regset_begin = SET_BEGIN(__elfN(regset)),
 	.sv_regset_end  = SET_LIMIT(__elfN(regset)),
 };
 
 struct sysentvec elf64_freebsd_sysvec_v2 = {
 	.sv_size	= SYS_MAXSYSCALL,
 	.sv_table	= sysent,
 	.sv_fixup	= __elfN(freebsd_fixup),
 	.sv_sendsig	= sendsig,
 	.sv_sigcode	= sigcode64, /* Fixed up in ppc64_init_sysvecs(). */
 	.sv_szsigcode	= &szsigcode64,
 	.sv_name	= "FreeBSD ELF64 V2",
 	.sv_coredump	= __elfN(coredump),
 	.sv_elf_core_osabi = ELFOSABI_FREEBSD,
 	.sv_elf_core_abi_vendor = FREEBSD_ABI_VENDOR,
 	.sv_elf_core_prepare_notes = __elfN(prepare_notes),
 	.sv_imgact_try	= NULL,
 	.sv_minsigstksz	= MINSIGSTKSZ,
 	.sv_minuser	= VM_MIN_ADDRESS,
 	.sv_maxuser	= VM_MAXUSER_ADDRESS,
 	.sv_usrstack	= USRSTACK,
 	.sv_psstrings	= PS_STRINGS,
 	.sv_psstringssz	= sizeof(struct ps_strings),
 	.sv_stackprot	= VM_PROT_ALL,
 	.sv_copyout_auxargs = __elfN(powerpc_copyout_auxargs),
 	.sv_copyout_strings = exec_copyout_strings,
 	.sv_setregs	= exec_setregs,
 	.sv_fixlimit	= NULL,
 	.sv_maxssiz	= NULL,
 	.sv_flags	= SV_ABI_FREEBSD | SV_LP64 | SV_SHP |
 			    SV_TIMEKEEP | SV_RNG_SEED_VER,
 	.sv_set_syscall_retval = cpu_set_syscall_retval,
 	.sv_fetch_syscall_args = cpu_fetch_syscall_args,
 	.sv_syscallnames = syscallnames,
 	.sv_shared_page_base = SHAREDPAGE,
 	.sv_shared_page_len = PAGE_SIZE,
 	.sv_schedtail	= NULL,
 	.sv_thread_detach = NULL,
 	.sv_trap	= NULL,
 	.sv_hwcap	= &cpu_features,
 	.sv_hwcap2	= &cpu_features2,
 	.sv_onexec_old	= exec_onexec_old,
 	.sv_onexit	= exit_onexit,
 	.sv_regset_begin = SET_BEGIN(__elfN(regset)),
 	.sv_regset_end  = SET_LIMIT(__elfN(regset)),
 };
 
 static boolean_t ppc64_elfv1_header_match(struct image_params *params,
     int32_t *, uint32_t *);
 static boolean_t ppc64_elfv2_header_match(struct image_params *params,
     int32_t *, uint32_t *);
 
 static Elf64_Brandinfo freebsd_brand_info_elfv1 = {
 	.brand		= ELFOSABI_FREEBSD,
 	.machine	= EM_PPC64,
 	.compat_3_brand	= "FreeBSD",
 	.emul_path	= NULL,
 	.interp_path	= "/libexec/ld-elf.so.1",
 	.sysvec		= &elf64_freebsd_sysvec_v1,
 	.interp_newpath	= NULL,
 	.brand_note	= &elf64_freebsd_brandnote,
 	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE,
 	.header_supported = &ppc64_elfv1_header_match
 };
 
 SYSINIT(elf64v1, SI_SUB_EXEC, SI_ORDER_ANY,
     (sysinit_cfunc_t) elf64_insert_brand_entry,
     &freebsd_brand_info_elfv1);
 
 static Elf64_Brandinfo freebsd_brand_info_elfv2 = {
 	.brand		= ELFOSABI_FREEBSD,
 	.machine	= EM_PPC64,
 	.compat_3_brand	= "FreeBSD",
 	.emul_path	= NULL,
 	.interp_path	= "/libexec/ld-elf.so.1",
 	.sysvec		= &elf64_freebsd_sysvec_v2,
 	.interp_newpath	= NULL,
 	.brand_note	= &elf64_freebsd_brandnote,
 	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE,
 	.header_supported = &ppc64_elfv2_header_match
 };
 
 SYSINIT(elf64v2, SI_SUB_EXEC, SI_ORDER_ANY,
     (sysinit_cfunc_t) elf64_insert_brand_entry,
     &freebsd_brand_info_elfv2);
 
 static Elf64_Brandinfo freebsd_brand_oinfo = {
 	.brand		= ELFOSABI_FREEBSD,
 	.machine	= EM_PPC64,
 	.compat_3_brand	= "FreeBSD",
 	.emul_path	= NULL,
 	.interp_path	= "/usr/libexec/ld-elf.so.1",
 	.sysvec		= &elf64_freebsd_sysvec_v1,
 	.interp_newpath	= NULL,
 	.brand_note	= &elf64_freebsd_brandnote,
 	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE,
 	.header_supported = &ppc64_elfv1_header_match
 };
 
 SYSINIT(oelf64, SI_SUB_EXEC, SI_ORDER_ANY,
 	(sysinit_cfunc_t) elf64_insert_brand_entry,
 	&freebsd_brand_oinfo);
 
 void elf_reloc_self(Elf_Dyn *dynp, Elf_Addr relocbase);
 
 static void
 ppc64_init_sysvecs(void *arg)
 {
 	exec_sysvec_init(&elf64_freebsd_sysvec_v2);
 	exec_sysvec_init_secondary(&elf64_freebsd_sysvec_v2,
 	    &elf64_freebsd_sysvec_v1);
 	/*
 	 * Adjust elfv2 sigcode after elfv1 sysvec is initialized.
 	 * exec_sysvec_init_secondary() assumes secondary sysvecs use
 	 * identical signal code, and skips allocating a second copy.
 	 * Since the ELFv2 trampoline is a strict subset of the ELFv1 code,
-	 * we can work around this by adjusting the base address. This also
+	 * we can work around this by adjusting the offset. This also
 	 * avoids two copies of the trampoline code being allocated!
 	 */
-	elf64_freebsd_sysvec_v2.sv_sigcode_base +=
+	elf64_freebsd_sysvec_v2.sv_sigcode_offset +=
 	    (uintptr_t)sigcode64_elfv2 - (uintptr_t)&sigcode64;
 	elf64_freebsd_sysvec_v2.sv_szsigcode = &szsigcode64_elfv2;
 }
 SYSINIT(elf64_sysvec, SI_SUB_EXEC, SI_ORDER_ANY, ppc64_init_sysvecs, NULL);
 
 static boolean_t
 ppc64_elfv1_header_match(struct image_params *params, int32_t *osrel __unused,
     uint32_t *fctl0 __unused)
 {
 	const Elf64_Ehdr *hdr = (const Elf64_Ehdr *)params->image_header;
 	int abi = (hdr->e_flags & 3);
 
 	return (abi == 0 || abi == 1);
 }
 
 static boolean_t
 ppc64_elfv2_header_match(struct image_params *params, int32_t *osrel __unused,
     uint32_t *fctl0 __unused)
 {
 	const Elf64_Ehdr *hdr = (const Elf64_Ehdr *)params->image_header;
 	int abi = (hdr->e_flags & 3);
 
 	return (abi == 2);
 }
 
 static void  
 exec_setregs_funcdesc(struct thread *td, struct image_params *imgp,
     uintptr_t stack)
 {
 	struct trapframe *tf;
 	register_t entry_desc[3];
 
 	tf = trapframe(td);
 	exec_setregs(td, imgp, stack);
 
 	/*
 	 * For 64-bit ELFv1, we need to disentangle the function
 	 * descriptor
 	 *
 	 * 0. entry point
 	 * 1. TOC value (r2)
 	 * 2. Environment pointer (r11)
 	 */
 
 	(void)copyin((void *)imgp->entry_addr, entry_desc,
 	    sizeof(entry_desc));
 	tf->srr0 = entry_desc[0] + imgp->reloc_base;
 	tf->fixreg[2] = entry_desc[1] + imgp->reloc_base;
 	tf->fixreg[11] = entry_desc[2] + imgp->reloc_base;
 }
 
 void
 elf64_dump_thread(struct thread *td, void *dst, size_t *off)
 {
 	size_t len;
 	struct pcb *pcb;
 	uint64_t vshr[32];
 	uint64_t *vsr_dw1;
 	int vsr_idx;
 
 	len = 0;
 	pcb = td->td_pcb;
 
 	if (pcb->pcb_flags & PCB_VEC) {
 		save_vec_nodrop(td);
 		if (dst != NULL) {
 			len += elf64_populate_note(NT_PPC_VMX,
 			    &pcb->pcb_vec, (char *)dst + len,
 			    sizeof(pcb->pcb_vec), NULL);
 		} else
 			len += elf64_populate_note(NT_PPC_VMX, NULL, NULL,
 			    sizeof(pcb->pcb_vec), NULL);
 	}
 
 	if (pcb->pcb_flags & PCB_VSX) {
 		save_fpu_nodrop(td);
 		if (dst != NULL) {
 			/*
 			 * Doubleword 0 of VSR0-VSR31 overlap with FPR0-FPR31 and
 			 * VSR32-VSR63 overlap with VR0-VR31, so we only copy
 			 * the non-overlapping data, which is doubleword 1 of VSR0-VSR31.
 			 */
 			for (vsr_idx = 0; vsr_idx < nitems(vshr); vsr_idx++) {
 				vsr_dw1 = (uint64_t *)&pcb->pcb_fpu.fpr[vsr_idx].vsr[2];
 				vshr[vsr_idx] = *vsr_dw1;
 			}
 			len += elf64_populate_note(NT_PPC_VSX,
 			    vshr, (char *)dst + len,
 			    sizeof(vshr), NULL);
 		} else
 			len += elf64_populate_note(NT_PPC_VSX, NULL, NULL,
 			    sizeof(vshr), NULL);
 	}
 
 	*off = len;
 }
 
 bool
 elf_is_ifunc_reloc(Elf_Size r_info)
 {
 
 	return (ELF_R_TYPE(r_info) == R_PPC_IRELATIVE);
 }
 
 /* Process one elf relocation with addend. */
 static int
 elf_reloc_internal(linker_file_t lf, Elf_Addr relocbase, const void *data,
     int type, int local, elf_lookup_fn lookup)
 {
 	Elf_Addr *where;
 	Elf_Addr addr;
 	Elf_Addr addend, val;
 	Elf_Word rtype, symidx;
 	const Elf_Rela *rela;
 	int error;
 
 	switch (type) {
 	case ELF_RELOC_REL:
 		panic("PPC only supports RELA relocations");
 		break;
 	case ELF_RELOC_RELA:
 		rela = (const Elf_Rela *)data;
 		where = (Elf_Addr *) (relocbase + rela->r_offset);
 		addend = rela->r_addend;
 		rtype = ELF_R_TYPE(rela->r_info);
 		symidx = ELF_R_SYM(rela->r_info);
 		break;
 	default:
 		panic("elf_reloc: unknown relocation mode %d\n", type);
 	}
 
 	switch (rtype) {
 	case R_PPC_NONE:
 		break;
 
 	case R_PPC64_ADDR64:	/* doubleword64 S + A */
 		error = lookup(lf, symidx, 1, &addr);
 		if (error != 0)
 			return (-1);
 		addr += addend;
 		*where = addr;
 		break;
 
 	case R_PPC_RELATIVE:	/* doubleword64 B + A */
 		*where = elf_relocaddr(lf, relocbase + addend);
 		break;
 
 	case R_PPC_JMP_SLOT:	/* function descriptor copy */
 		lookup(lf, symidx, 1, &addr);
 #if !defined(_CALL_ELF) || _CALL_ELF == 1
 		memcpy(where, (Elf_Addr *)addr, 3*sizeof(Elf_Addr));
 #else
 		*where = addr;
 #endif
 		__asm __volatile("dcbst 0,%0; sync" :: "r"(where) : "memory");
 		break;
 
 	case R_PPC_IRELATIVE:
 		addr = relocbase + addend;
 		val = ((Elf64_Addr (*)(void))addr)();
 		if (*where != val)
 			*where = val;
 		break;
 
 	default:
 		printf("kldload: unexpected relocation type %d, "
 		    "symbol index %d\n", (int)rtype, symidx);
 		return (-1);
 	}
 	return (0);
 }
 
 void
 elf_reloc_self(Elf_Dyn *dynp, Elf_Addr relocbase)
 {
 	Elf_Rela *rela = NULL, *relalim;
 	Elf_Addr relasz = 0;
 	Elf_Addr *where;
 
 	/*
 	 * Extract the rela/relasz values from the dynamic section
 	 */
 	for (; dynp->d_tag != DT_NULL; dynp++) {
 		switch (dynp->d_tag) {
 		case DT_RELA:
 			rela = (Elf_Rela *)(relocbase+dynp->d_un.d_ptr);
 			break;
 		case DT_RELASZ:
 			relasz = dynp->d_un.d_val;
 			break;
 		}
 	}
 
 	/*
 	 * Relocate these values
 	 */
 	relalim = (Elf_Rela *)((caddr_t)rela + relasz);
 	for (; rela < relalim; rela++) {
 		if (ELF_R_TYPE(rela->r_info) != R_PPC_RELATIVE)
 			continue;
 		where = (Elf_Addr *)(relocbase + rela->r_offset);
 		*where = (Elf_Addr)(relocbase + rela->r_addend);
 	}
 }
 
 int
 elf_reloc(linker_file_t lf, Elf_Addr relocbase, const void *data, int type,
     elf_lookup_fn lookup)
 {
 
 	return (elf_reloc_internal(lf, relocbase, data, type, 0, lookup));
 }
 
 int
 elf_reloc_local(linker_file_t lf, Elf_Addr relocbase, const void *data,
     int type, elf_lookup_fn lookup)
 {
 
 	return (elf_reloc_internal(lf, relocbase, data, type, 1, lookup));
 }
 
 int
 elf_cpu_load_file(linker_file_t lf)
 {
 	/* Only sync the cache for non-kernel modules */
 	if (lf->id != 1)
 		__syncicache(lf->address, lf->size);
 	return (0);
 }
 
 int
 elf_cpu_unload_file(linker_file_t lf __unused)
 {
 
 	return (0);
 }
 
 int
 elf_cpu_parse_dynamic(caddr_t loadbase __unused, Elf_Dyn *dynamic __unused)
 {
 
 	return (0);
 }
diff --git a/sys/powerpc/powerpc/elf_common.c b/sys/powerpc/powerpc/elf_common.c
index c7460848b89a..2ee7fd96b94d 100644
--- a/sys/powerpc/powerpc/elf_common.c
+++ b/sys/powerpc/powerpc/elf_common.c
@@ -1,104 +1,107 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 2019 Justin Hibbits
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer
  *    in this position and unchanged.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. The name of the author may not be used to endorse or promote products
  *    derived from this software without specific prior written permission
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 static int
 __elfN(powerpc_copyout_auxargs)(struct image_params *imgp, uintptr_t base)
 {
 	Elf_Auxargs *args;
 	Elf_Auxinfo *argarray, *pos;
+	struct vmspace *vmspace;
 	int error;
 
 	/*
 	 * XXX If we can't find image's OSREL, assume it uses the new auxv
 	 * format.
 	 *
 	 * This is specially important for rtld, that is not tagged. Using
 	 * direct exec mode with new (ELFv2) binaries that expect the new auxv
 	 * format would result in crashes otherwise.
 	 *
 	 * Unfortunately, this may break direct exec'ing old binaries,
 	 * but it seems better to correctly support new binaries by default,
 	 * considering the transition to ELFv2 happened quite some time
 	 * ago. If needed, a sysctl may be added to allow old auxv format to
 	 * be used when OSREL is not found.
 	 */
 	if (imgp->proc->p_osrel >= P_OSREL_POWERPC_NEW_AUX_ARGS ||
 	    imgp->proc->p_osrel == 0)
 		return (__elfN(freebsd_copyout_auxargs)(imgp, base));
 
 	args = (Elf_Auxargs *)imgp->auxargs;
 	argarray = pos = malloc(AT_OLD_COUNT * sizeof(*pos), M_TEMP,
 	    M_WAITOK | M_ZERO);
 
+	vmspace = imgp->proc->p_vmspace;
+
 	if (args->execfd != -1)
 		AUXARGS_ENTRY(pos, AT_OLD_EXECFD, args->execfd);
 	AUXARGS_ENTRY(pos, AT_OLD_PHDR, args->phdr);
 	AUXARGS_ENTRY(pos, AT_OLD_PHENT, args->phent);
 	AUXARGS_ENTRY(pos, AT_OLD_PHNUM, args->phnum);
 	AUXARGS_ENTRY(pos, AT_OLD_PAGESZ, args->pagesz);
 	AUXARGS_ENTRY(pos, AT_OLD_FLAGS, args->flags);
 	AUXARGS_ENTRY(pos, AT_OLD_ENTRY, args->entry);
 	AUXARGS_ENTRY(pos, AT_OLD_BASE, args->base);
 	AUXARGS_ENTRY(pos, AT_OLD_EHDRFLAGS, args->hdr_eflags);
 	if (imgp->execpathp != 0)
 		AUXARGS_ENTRY_PTR(pos, AT_OLD_EXECPATH, imgp->execpathp);
 	AUXARGS_ENTRY(pos, AT_OLD_OSRELDATE,
 	    imgp->proc->p_ucred->cr_prison->pr_osreldate);
 	if (imgp->canary != 0) {
 		AUXARGS_ENTRY_PTR(pos, AT_OLD_CANARY, imgp->canary);
 		AUXARGS_ENTRY(pos, AT_OLD_CANARYLEN, imgp->canarylen);
 	}
 	AUXARGS_ENTRY(pos, AT_OLD_NCPUS, mp_ncpus);
 	if (imgp->pagesizes != 0) {
 		AUXARGS_ENTRY_PTR(pos, AT_OLD_PAGESIZES, imgp->pagesizes);
 		AUXARGS_ENTRY(pos, AT_OLD_PAGESIZESLEN, imgp->pagesizeslen);
 	}
-	if (imgp->sysent->sv_timekeep_base != 0) {
+	if ((imgp->sysent->sv_flags & SV_TIMEKEEP) != 0) {
 		AUXARGS_ENTRY(pos, AT_OLD_TIMEKEEP,
-		    imgp->sysent->sv_timekeep_base);
+		    vmspace->vm_shp_base + imgp->sysent->sv_timekeep_offset);
 	}
 	AUXARGS_ENTRY(pos, AT_OLD_STACKPROT, imgp->sysent->sv_shared_page_obj
 	    != NULL && imgp->stack_prot != 0 ? imgp->stack_prot :
 	    imgp->sysent->sv_stackprot);
 	if (imgp->sysent->sv_hwcap != NULL)
 		AUXARGS_ENTRY(pos, AT_OLD_HWCAP, *imgp->sysent->sv_hwcap);
 	if (imgp->sysent->sv_hwcap2 != NULL)
 		AUXARGS_ENTRY(pos, AT_OLD_HWCAP2, *imgp->sysent->sv_hwcap2);
 	AUXARGS_ENTRY(pos, AT_OLD_NULL, 0);
 
 	free(imgp->auxargs, M_TEMP);
 	imgp->auxargs = NULL;
 	KASSERT(pos - argarray <= AT_OLD_COUNT, ("Too many auxargs"));
 
 	error = copyout(argarray, (void *)base, sizeof(*argarray) * AT_OLD_COUNT);
 	free(argarray, M_TEMP);
 	return (error);
 }
diff --git a/sys/powerpc/powerpc/exec_machdep.c b/sys/powerpc/powerpc/exec_machdep.c
index 000892bdf295..c06c13a86368 100644
--- a/sys/powerpc/powerpc/exec_machdep.c
+++ b/sys/powerpc/powerpc/exec_machdep.c
@@ -1,1299 +1,1302 @@
 /*-
  * SPDX-License-Identifier: BSD-4-Clause AND BSD-2-Clause-FreeBSD
  *
  * Copyright (C) 1995, 1996 Wolfgang Solfrank.
  * Copyright (C) 1995, 1996 TooLs GmbH.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *      This product includes software developed by TooLs GmbH.
  * 4. The name of TooLs GmbH may not be used to endorse or promote products
  *    derived from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 /*-
  * Copyright (C) 2001 Benno Rice
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY Benno Rice ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *	$NetBSD: machdep.c,v 1.74.2.1 2000/11/01 16:13:48 tv Exp $
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_fpu_emu.h"
 
 #include <sys/param.h>
 #include <sys/proc.h>
 #include <sys/systm.h>
 #include <sys/bio.h>
 #include <sys/buf.h>
 #include <sys/bus.h>
 #include <sys/cons.h>
 #include <sys/cpu.h>
 #include <sys/exec.h>
 #include <sys/imgact.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/reg.h>
 #include <sys/signalvar.h>
 #include <sys/syscallsubr.h>
 #include <sys/syscall.h>
 #include <sys/sysent.h>
 #include <sys/sysproto.h>
 #include <sys/ucontext.h>
 #include <sys/uio.h>
 
 #include <machine/altivec.h>
 #include <machine/cpu.h>
 #include <machine/elf.h>
 #include <machine/fpu.h>
 #include <machine/pcb.h>
 #include <machine/sigframe.h>
 #include <machine/trap.h>
 #include <machine/vmparam.h>
 
+#include <vm/vm.h>
+#include <vm/vm_param.h>
 #include <vm/pmap.h>
+#include <vm/vm_map.h>
 
 #ifdef FPU_EMU
 #include <powerpc/fpu/fpu_extern.h>
 #endif
 
 #ifdef COMPAT_FREEBSD32
 #include <compat/freebsd32/freebsd32_signal.h>
 #include <compat/freebsd32/freebsd32_util.h>
 #include <compat/freebsd32/freebsd32_proto.h>
 
 typedef struct __ucontext32 {
 	sigset_t		uc_sigmask;
 	mcontext32_t		uc_mcontext;
 	uint32_t		uc_link;
 	struct sigaltstack32    uc_stack;
 	uint32_t		uc_flags;
 	uint32_t		__spare__[4];
 } ucontext32_t;
 
 struct sigframe32 {
 	ucontext32_t		sf_uc;
 	struct siginfo32	sf_si;
 };
 
 static int	grab_mcontext32(struct thread *td, mcontext32_t *, int flags);
 #endif
 
 static int	grab_mcontext(struct thread *, mcontext_t *, int);
 
 static void	cleanup_power_extras(struct thread *);
 
 #ifdef __powerpc64__
 extern struct sysentvec elf64_freebsd_sysvec_v2;
 #endif
 
 #ifdef __powerpc64__
 _Static_assert(sizeof(mcontext_t) == 1392, "mcontext_t size incorrect");
 _Static_assert(sizeof(ucontext_t) == 1472, "ucontext_t size incorrect");
 _Static_assert(sizeof(siginfo_t) == 80, "siginfo_t size incorrect");
 #ifdef COMPAT_FREEBSD32
 _Static_assert(sizeof(mcontext32_t) == 1224, "mcontext32_t size incorrect");
 _Static_assert(sizeof(ucontext32_t) == 1280, "ucontext32_t size incorrect");
 _Static_assert(sizeof(struct siginfo32) == 64, "struct siginfo32 size incorrect");
 #endif /* COMPAT_FREEBSD32 */
 #else /* powerpc */
 _Static_assert(sizeof(mcontext_t) == 1224, "mcontext_t size incorrect");
 _Static_assert(sizeof(ucontext_t) == 1280, "ucontext_t size incorrect");
 _Static_assert(sizeof(siginfo_t) == 64, "siginfo_t size incorrect");
 #endif
 
 void
 sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
 {
 	struct trapframe *tf;
 	struct sigacts *psp;
 	struct sigframe sf;
 	struct thread *td;
 	struct proc *p;
 	#ifdef COMPAT_FREEBSD32
 	struct siginfo32 siginfo32;
 	struct sigframe32 sf32;
 	#endif
 	size_t sfpsize;
 	caddr_t sfp, usfp;
 	register_t sp;
 	int oonstack, rndfsize;
 	int sig;
 	int code;
 
 	td = curthread;
 	p = td->td_proc;
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 
 	psp = p->p_sigacts;
 	mtx_assert(&psp->ps_mtx, MA_OWNED);
 	tf = td->td_frame;
 
 	/*
 	 * Fill siginfo structure.
 	 */
 	ksi->ksi_info.si_signo = ksi->ksi_signo;
 	ksi->ksi_info.si_addr =
 	    (void *)((tf->exc == EXC_DSI || tf->exc == EXC_DSE) ? 
 	    tf->dar : tf->srr0);
 
 	#ifdef COMPAT_FREEBSD32
 	if (SV_PROC_FLAG(p, SV_ILP32)) {
 		siginfo_to_siginfo32(&ksi->ksi_info, &siginfo32);
 		sig = siginfo32.si_signo;
 		code = siginfo32.si_code;
 		sfp = (caddr_t)&sf32;
 		sfpsize = sizeof(sf32);
 		rndfsize = roundup(sizeof(sf32), 16);
 		sp = (uint32_t)tf->fixreg[1];
 		oonstack = sigonstack(sp);
 
 		/*
 		 * Save user context
 		 */
 
 		memset(&sf32, 0, sizeof(sf32));
 		grab_mcontext32(td, &sf32.sf_uc.uc_mcontext, 0);
 
 		sf32.sf_uc.uc_sigmask = *mask;
 		sf32.sf_uc.uc_stack.ss_sp = (uintptr_t)td->td_sigstk.ss_sp;
 		sf32.sf_uc.uc_stack.ss_size = (uint32_t)td->td_sigstk.ss_size;
 		sf32.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
 		    ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE;
 
 		sf32.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0;
 	} else {
 	#endif
 		sig = ksi->ksi_signo;
 		code = ksi->ksi_code;
 		sfp = (caddr_t)&sf;
 		sfpsize = sizeof(sf);
 		#ifdef __powerpc64__
 		/*
 		 * 64-bit PPC defines a 288 byte scratch region
 		 * below the stack.
 		 */
 		rndfsize = 288 + roundup(sizeof(sf), 48);
 		#else
 		rndfsize = roundup(sizeof(sf), 16);
 		#endif
 		sp = tf->fixreg[1];
 		oonstack = sigonstack(sp);
 
 		/*
 		 * Save user context
 		 */
 
 		memset(&sf, 0, sizeof(sf));
 		grab_mcontext(td, &sf.sf_uc.uc_mcontext, 0);
 
 		sf.sf_uc.uc_sigmask = *mask;
 		sf.sf_uc.uc_stack = td->td_sigstk;
 		sf.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
 		    ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE;
 
 		sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0;
 	#ifdef COMPAT_FREEBSD32
 	}
 	#endif
 
 	CTR4(KTR_SIG, "sendsig: td=%p (%s) catcher=%p sig=%d", td, p->p_comm,
 	     catcher, sig);
 
 	/*
 	 * Allocate and validate space for the signal handler context.
 	 */
 	if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack &&
 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
 		usfp = (void *)(((uintptr_t)td->td_sigstk.ss_sp +
 		   td->td_sigstk.ss_size - rndfsize) & ~0xFul);
 	} else {
 		usfp = (void *)((sp - rndfsize) & ~0xFul);
 	}
 
 	/*
 	 * Set Floating Point facility to "Ignore Exceptions Mode" so signal
 	 * handler can run.
 	 */
 	if (td->td_pcb->pcb_flags & PCB_FPU)
 		tf->srr1 = tf->srr1 & ~(PSL_FE0 | PSL_FE1);
 
 	/*
 	 * Set up the registers to return to sigcode.
 	 *
 	 *   r1/sp - sigframe ptr
 	 *   lr    - sig function, dispatched to by blrl in trampoline
 	 *   r3    - sig number
 	 *   r4    - SIGINFO ? &siginfo : exception code
 	 *   r5    - user context
 	 *   srr0  - trampoline function addr
 	 */
 	tf->lr = (register_t)catcher;
 	tf->fixreg[1] = (register_t)usfp;
 	tf->fixreg[FIRSTARG] = sig;
 	#ifdef COMPAT_FREEBSD32
 	tf->fixreg[FIRSTARG+2] = (register_t)usfp +
 	    ((SV_PROC_FLAG(p, SV_ILP32)) ?
 	    offsetof(struct sigframe32, sf_uc) :
 	    offsetof(struct sigframe, sf_uc));
 	#else
 	tf->fixreg[FIRSTARG+2] = (register_t)usfp +
 	    offsetof(struct sigframe, sf_uc);
 	#endif
 	if (SIGISMEMBER(psp->ps_siginfo, sig)) {
 		/*
 		 * Signal handler installed with SA_SIGINFO.
 		 */
 		#ifdef COMPAT_FREEBSD32
 		if (SV_PROC_FLAG(p, SV_ILP32)) {
 			sf32.sf_si = siginfo32;
 			tf->fixreg[FIRSTARG+1] = (register_t)usfp +
 			    offsetof(struct sigframe32, sf_si);
 			sf32.sf_si = siginfo32;
 		} else  {
 		#endif
 			tf->fixreg[FIRSTARG+1] = (register_t)usfp +
 			    offsetof(struct sigframe, sf_si);
 			sf.sf_si = ksi->ksi_info;
 		#ifdef COMPAT_FREEBSD32
 		}
 		#endif
 	} else {
 		/* Old FreeBSD-style arguments. */
 		tf->fixreg[FIRSTARG+1] = code;
 		tf->fixreg[FIRSTARG+3] = (tf->exc == EXC_DSI) ? 
 		    tf->dar : tf->srr0;
 	}
 	mtx_unlock(&psp->ps_mtx);
 	PROC_UNLOCK(p);
 
 	tf->srr0 = (register_t)PROC_SIGCODE(p);
 
 	/*
 	 * copy the frame out to userland.
 	 */
 	if (copyout(sfp, usfp, sfpsize) != 0) {
 		/*
 		 * Process has trashed its stack. Kill it.
 		 */
 		CTR2(KTR_SIG, "sendsig: sigexit td=%p sfp=%p", td, sfp);
 		PROC_LOCK(p);
 		sigexit(td, SIGILL);
 	}
 
 	CTR3(KTR_SIG, "sendsig: return td=%p pc=%#x sp=%#x", td,
 	     tf->srr0, tf->fixreg[1]);
 
 	PROC_LOCK(p);
 	mtx_lock(&psp->ps_mtx);
 }
 
 int
 sys_sigreturn(struct thread *td, struct sigreturn_args *uap)
 {
 	ucontext_t uc;
 	int error;
 
 	CTR2(KTR_SIG, "sigreturn: td=%p ucp=%p", td, uap->sigcntxp);
 
 	if (copyin(uap->sigcntxp, &uc, sizeof(uc)) != 0) {
 		CTR1(KTR_SIG, "sigreturn: efault td=%p", td);
 		return (EFAULT);
 	}
 
 	error = set_mcontext(td, &uc.uc_mcontext);
 	if (error != 0)
 		return (error);
 
 	/*
 	 * Save FPU state if needed. User may have changed it on
 	 * signal handler
 	 */
 	if (uc.uc_mcontext.mc_srr1 & PSL_FP)
 		save_fpu(td);
 
 	kern_sigprocmask(td, SIG_SETMASK, &uc.uc_sigmask, NULL, 0);
 
 	CTR3(KTR_SIG, "sigreturn: return td=%p pc=%#x sp=%#x",
 	     td, uc.uc_mcontext.mc_srr0, uc.uc_mcontext.mc_gpr[1]);
 
 	return (EJUSTRETURN);
 }
 
 #ifdef COMPAT_FREEBSD4
 int
 freebsd4_sigreturn(struct thread *td, struct freebsd4_sigreturn_args *uap)
 {
 
 	return sys_sigreturn(td, (struct sigreturn_args *)uap);
 }
 #endif
 
 /*
  * Construct a PCB from a trapframe. This is called from kdb_trap() where
  * we want to start a backtrace from the function that caused us to enter
  * the debugger. We have the context in the trapframe, but base the trace
  * on the PCB. The PCB doesn't have to be perfect, as long as it contains
  * enough for a backtrace.
  */
 void
 makectx(struct trapframe *tf, struct pcb *pcb)
 {
 
 	pcb->pcb_lr = tf->srr0;
 	pcb->pcb_sp = tf->fixreg[1];
 }
 
 /*
  * get_mcontext/sendsig helper routine that doesn't touch the
  * proc lock
  */
 static int
 grab_mcontext(struct thread *td, mcontext_t *mcp, int flags)
 {
 	struct pcb *pcb;
 	int i;
 
 	pcb = td->td_pcb;
 
 	memset(mcp, 0, sizeof(mcontext_t));
 
 	mcp->mc_vers = _MC_VERSION;
 	mcp->mc_flags = 0;
 	memcpy(&mcp->mc_frame, td->td_frame, sizeof(struct trapframe));
 	if (flags & GET_MC_CLEAR_RET) {
 		mcp->mc_gpr[3] = 0;
 		mcp->mc_gpr[4] = 0;
 	}
 
 	/*
 	 * This assumes that floating-point context is *not* lazy,
 	 * so if the thread has used FP there would have been a
 	 * FP-unavailable exception that would have set things up
 	 * correctly.
 	 */
 	if (pcb->pcb_flags & PCB_FPREGS) {
 		if (pcb->pcb_flags & PCB_FPU) {
 			KASSERT(td == curthread,
 				("get_mcontext: fp save not curthread"));
 			critical_enter();
 			save_fpu(td);
 			critical_exit();
 		}
 		mcp->mc_flags |= _MC_FP_VALID;
 		memcpy(&mcp->mc_fpscr, &pcb->pcb_fpu.fpscr, sizeof(double));
 		for (i = 0; i < 32; i++)
 			memcpy(&mcp->mc_fpreg[i], &pcb->pcb_fpu.fpr[i].fpr,
 			    sizeof(double));
 	}
 
 	if (pcb->pcb_flags & PCB_VSX) {
 		for (i = 0; i < 32; i++)
 			memcpy(&mcp->mc_vsxfpreg[i],
 			    &pcb->pcb_fpu.fpr[i].vsr[2], sizeof(double));
 	}
 
 	/*
 	 * Repeat for Altivec context
 	 */
 
 	if (pcb->pcb_flags & PCB_VEC) {
 		KASSERT(td == curthread,
 			("get_mcontext: fp save not curthread"));
 		critical_enter();
 		save_vec(td);
 		critical_exit();
 		mcp->mc_flags |= _MC_AV_VALID;
 		mcp->mc_vscr  = pcb->pcb_vec.vscr;
 		mcp->mc_vrsave =  pcb->pcb_vec.vrsave;
 		memcpy(mcp->mc_avec, pcb->pcb_vec.vr, sizeof(mcp->mc_avec));
 	}
 
 	mcp->mc_len = sizeof(*mcp);
 
 	return (0);
 }
 
 int
 get_mcontext(struct thread *td, mcontext_t *mcp, int flags)
 {
 	int error;
 
 	error = grab_mcontext(td, mcp, flags);
 	if (error == 0) {
 		PROC_LOCK(curthread->td_proc);
 		mcp->mc_onstack = sigonstack(td->td_frame->fixreg[1]);
 		PROC_UNLOCK(curthread->td_proc);
 	}
 
 	return (error);
 }
 
 int
 set_mcontext(struct thread *td, mcontext_t *mcp)
 {
 	struct pcb *pcb;
 	struct trapframe *tf;
 	register_t tls;
 	int i;
 
 	pcb = td->td_pcb;
 	tf = td->td_frame;
 
 	if (mcp->mc_vers != _MC_VERSION || mcp->mc_len != sizeof(*mcp))
 		return (EINVAL);
 
 	/*
 	 * Don't let the user change privileged MSR bits.
 	 *
 	 * psl_userstatic is used here to mask off any bits that can
 	 * legitimately vary between user contexts (Floating point
 	 * exception control and any facilities that we are using the
 	 * "enable on first use" pattern with.)
 	 *
 	 * All other bits are required to match psl_userset(32).
 	 *
 	 * Remember to update the platform cpu_init code when implementing
 	 * support for a new conditional facility!
 	 */
 	if ((mcp->mc_srr1 & psl_userstatic) != (tf->srr1 & psl_userstatic)) {
 		return (EINVAL);
 	}
 
 	/* Copy trapframe, preserving TLS pointer across context change */
 	if (SV_PROC_FLAG(td->td_proc, SV_LP64))
 		tls = tf->fixreg[13];
 	else
 		tls = tf->fixreg[2];
 	memcpy(tf, mcp->mc_frame, sizeof(mcp->mc_frame));
 	if (SV_PROC_FLAG(td->td_proc, SV_LP64))
 		tf->fixreg[13] = tls;
 	else
 		tf->fixreg[2] = tls;
 
 	/*
 	 * Force the FPU back off to ensure the new context will not bypass
 	 * the enable_fpu() setup code accidentally.
 	 *
 	 * This prevents an issue where a process that uses floating point
 	 * inside a signal handler could end up in a state where the MSR
 	 * did not match pcb_flags.
 	 *
 	 * Additionally, ensure VSX is disabled as well, as it is illegal
 	 * to leave it turned on when FP or VEC are off.
 	 */
 	tf->srr1 &= ~(PSL_FP | PSL_VSX);
 	pcb->pcb_flags &= ~(PCB_FPU | PCB_VSX);
 
 	if (mcp->mc_flags & _MC_FP_VALID) {
 		/* enable_fpu() will happen lazily on a fault */
 		pcb->pcb_flags |= PCB_FPREGS;
 		memcpy(&pcb->pcb_fpu.fpscr, &mcp->mc_fpscr, sizeof(double));
 		bzero(pcb->pcb_fpu.fpr, sizeof(pcb->pcb_fpu.fpr));
 		for (i = 0; i < 32; i++) {
 			memcpy(&pcb->pcb_fpu.fpr[i].fpr, &mcp->mc_fpreg[i],
 			    sizeof(double));
 			memcpy(&pcb->pcb_fpu.fpr[i].vsr[2],
 			    &mcp->mc_vsxfpreg[i], sizeof(double));
 		}
 	}
 
 	if (mcp->mc_flags & _MC_AV_VALID) {
 		if ((pcb->pcb_flags & PCB_VEC) != PCB_VEC) {
 			critical_enter();
 			enable_vec(td);
 			critical_exit();
 		}
 		pcb->pcb_vec.vscr = mcp->mc_vscr;
 		pcb->pcb_vec.vrsave = mcp->mc_vrsave;
 		memcpy(pcb->pcb_vec.vr, mcp->mc_avec, sizeof(mcp->mc_avec));
 	} else {
 		tf->srr1 &= ~PSL_VEC;
 		pcb->pcb_flags &= ~PCB_VEC;
 	}
 
 	return (0);
 }
 
 /*
  * Clean up extra POWER state.  Some per-process registers and states are not
  * managed by the MSR, so must be cleaned up explicitly on thread exit.
  *
  * Currently this includes:
  * DSCR -- Data stream control register (PowerISA 2.06+)
  * FSCR -- Facility Status and Control Register (PowerISA 2.07+)
  */
 static void
 cleanup_power_extras(struct thread *td)
 {
 	uint32_t pcb_flags;
 
 	if (td != curthread)
 		return;
 
 	pcb_flags = td->td_pcb->pcb_flags;
 	/* Clean up registers not managed by MSR. */
 	if (pcb_flags & PCB_CFSCR)
 		mtspr(SPR_FSCR, 0);
 	if (pcb_flags & PCB_CDSCR) 
 		mtspr(SPR_DSCRP, 0);
 
 	if (pcb_flags & PCB_FPU)
 		cleanup_fpscr();
 }
 
 /*
  * Ensure the PCB has been updated in preparation for copying a thread.
  *
  * This is needed because normally this only happens during switching tasks,
  * but when we are cloning a thread, we need the updated state before doing
  * the actual copy, so the new thread inherits the current state instead of
  * the state at the last task switch.
  *
  * Keep this in sync with the assembly code in cpu_switch()!
  */
 void
 cpu_save_thread_regs(struct thread *td)
 {
 	uint32_t pcb_flags;
 	struct pcb *pcb;
 
 	KASSERT(td == curthread,
 	    ("cpu_save_thread_regs: td is not curthread"));
 
 	pcb = td->td_pcb;
 
 	pcb_flags = pcb->pcb_flags;
 
 #if defined(__powerpc64__)
 	/* Are *any* FSCR flags in use? */
 	if (pcb_flags & PCB_CFSCR) {
 		pcb->pcb_fscr = mfspr(SPR_FSCR);
 
 		if (pcb->pcb_fscr & FSCR_EBB) {
 			pcb->pcb_ebb.ebbhr = mfspr(SPR_EBBHR);
 			pcb->pcb_ebb.ebbrr = mfspr(SPR_EBBRR);
 			pcb->pcb_ebb.bescr = mfspr(SPR_BESCR);
 		}
 		if (pcb->pcb_fscr & FSCR_LM) {
 			pcb->pcb_lm.lmrr = mfspr(SPR_LMRR);
 			pcb->pcb_lm.lmser = mfspr(SPR_LMSER);
 		}
 		if (pcb->pcb_fscr & FSCR_TAR)
 			pcb->pcb_tar = mfspr(SPR_TAR);
 	}
 
 	/*
 	 * This is outside of the PCB_CFSCR check because it can be set
 	 * independently when running on POWER7/POWER8.
 	 */
 	if (pcb_flags & PCB_CDSCR)
 		pcb->pcb_dscr = mfspr(SPR_DSCRP);
 #endif
 
 #if defined(__SPE__)
 	/*
 	 * On E500v2, single-precision scalar instructions and access to
 	 * SPEFSCR may be used without PSL_VEC turned on, as long as they
 	 * limit themselves to the low word of the registers.
 	 *
 	 * As such, we need to unconditionally save SPEFSCR, even though
 	 * it is also updated in save_vec_nodrop().
 	 */
 	pcb->pcb_vec.vscr = mfspr(SPR_SPEFSCR);
 #endif
 
 	if (pcb_flags & PCB_FPU)
 		save_fpu_nodrop(td);
 
 	if (pcb_flags & PCB_VEC)
 		save_vec_nodrop(td);
 }
 
 /*
  * Set set up registers on exec.
  */
 void
 exec_setregs(struct thread *td, struct image_params *imgp, uintptr_t stack)
 {
 	struct trapframe	*tf;
 	register_t		argc;
 
 	tf = trapframe(td);
 	bzero(tf, sizeof *tf);
 	#ifdef __powerpc64__
 	tf->fixreg[1] = -roundup(-stack + 48, 16);
 	#else
 	tf->fixreg[1] = -roundup(-stack + 8, 16);
 	#endif
 
 	/*
 	 * Set up arguments for _start():
 	 *	_start(argc, argv, envp, obj, cleanup, ps_strings);
 	 *
 	 * Notes:
 	 *	- obj and cleanup are the auxilliary and termination
 	 *	  vectors.  They are fixed up by ld.elf_so.
 	 *	- ps_strings is a NetBSD extention, and will be
 	 * 	  ignored by executables which are strictly
 	 *	  compliant with the SVR4 ABI.
 	 */
 
 	/* Collect argc from the user stack */
 	argc = fuword((void *)stack);
 
 	tf->fixreg[3] = argc;
 	tf->fixreg[4] = stack + sizeof(register_t);
 	tf->fixreg[5] = stack + (2 + argc)*sizeof(register_t);
 	tf->fixreg[6] = 0;				/* auxiliary vector */
 	tf->fixreg[7] = 0;				/* termination vector */
 	tf->fixreg[8] = (register_t)imgp->ps_strings;	/* NetBSD extension */
 
 	tf->srr0 = imgp->entry_addr;
 	#ifdef __powerpc64__
 	tf->fixreg[12] = imgp->entry_addr;
 	#endif
 	tf->srr1 = psl_userset | PSL_FE_DFLT;
 	cleanup_power_extras(td);
 	td->td_pcb->pcb_flags = 0;
 }
 
 #ifdef COMPAT_FREEBSD32
 void
 ppc32_setregs(struct thread *td, struct image_params *imgp, uintptr_t stack)
 {
 	struct trapframe	*tf;
 	uint32_t		argc;
 
 	tf = trapframe(td);
 	bzero(tf, sizeof *tf);
 	tf->fixreg[1] = -roundup(-stack + 8, 16);
 
 	argc = fuword32((void *)stack);
 
 	tf->fixreg[3] = argc;
 	tf->fixreg[4] = stack + sizeof(uint32_t);
 	tf->fixreg[5] = stack + (2 + argc)*sizeof(uint32_t);
 	tf->fixreg[6] = 0;				/* auxiliary vector */
 	tf->fixreg[7] = 0;				/* termination vector */
 	tf->fixreg[8] = (register_t)imgp->ps_strings;	/* NetBSD extension */
 
 	tf->srr0 = imgp->entry_addr;
 	tf->srr1 = psl_userset32 | PSL_FE_DFLT;
 	cleanup_power_extras(td);
 	td->td_pcb->pcb_flags = 0;
 }
 #endif
 
 int
 fill_regs(struct thread *td, struct reg *regs)
 {
 	struct trapframe *tf;
 
 	tf = td->td_frame;
 	memcpy(regs, tf, sizeof(struct reg));
 
 	return (0);
 }
 
 int
 fill_dbregs(struct thread *td, struct dbreg *dbregs)
 {
 	/* No debug registers on PowerPC */
 	return (ENOSYS);
 }
 
 int
 fill_fpregs(struct thread *td, struct fpreg *fpregs)
 {
 	struct pcb *pcb;
 	int i;
 
 	pcb = td->td_pcb;
 
 	if ((pcb->pcb_flags & PCB_FPREGS) == 0)
 		memset(fpregs, 0, sizeof(struct fpreg));
 	else {
 		memcpy(&fpregs->fpscr, &pcb->pcb_fpu.fpscr, sizeof(double));
 		for (i = 0; i < 32; i++)
 			memcpy(&fpregs->fpreg[i], &pcb->pcb_fpu.fpr[i].fpr,
 			    sizeof(double));
 	}
 
 	return (0);
 }
 
 int
 set_regs(struct thread *td, struct reg *regs)
 {
 	struct trapframe *tf;
 
 	tf = td->td_frame;
 	memcpy(tf, regs, sizeof(struct reg));
 
 	return (0);
 }
 
 int
 set_dbregs(struct thread *td, struct dbreg *dbregs)
 {
 	/* No debug registers on PowerPC */
 	return (ENOSYS);
 }
 
 int
 set_fpregs(struct thread *td, struct fpreg *fpregs)
 {
 	struct pcb *pcb;
 	int i;
 
 	pcb = td->td_pcb;
 	pcb->pcb_flags |= PCB_FPREGS;
 	memcpy(&pcb->pcb_fpu.fpscr, &fpregs->fpscr, sizeof(double));
 	for (i = 0; i < 32; i++) {
 		memcpy(&pcb->pcb_fpu.fpr[i].fpr, &fpregs->fpreg[i],
 		    sizeof(double));
 	}
 
 	return (0);
 }
 
 #ifdef COMPAT_FREEBSD32
 int
 set_regs32(struct thread *td, struct reg32 *regs)
 {
 	struct trapframe *tf;
 	int i;
 
 	tf = td->td_frame;
 	for (i = 0; i < 32; i++)
 		tf->fixreg[i] = regs->fixreg[i];
 	tf->lr = regs->lr;
 	tf->cr = regs->cr;
 	tf->xer = regs->xer;
 	tf->ctr = regs->ctr;
 	tf->srr0 = regs->pc;
 
 	return (0);
 }
 
 int
 fill_regs32(struct thread *td, struct reg32 *regs)
 {
 	struct trapframe *tf;
 	int i;
 
 	tf = td->td_frame;
 	for (i = 0; i < 32; i++)
 		regs->fixreg[i] = tf->fixreg[i];
 	regs->lr = tf->lr;
 	regs->cr = tf->cr;
 	regs->xer = tf->xer;
 	regs->ctr = tf->ctr;
 	regs->pc = tf->srr0;
 
 	return (0);
 }
 
 static int
 grab_mcontext32(struct thread *td, mcontext32_t *mcp, int flags)
 {
 	mcontext_t mcp64;
 	int i, error;
 
 	error = grab_mcontext(td, &mcp64, flags);
 	if (error != 0)
 		return (error);
 
 	mcp->mc_vers = mcp64.mc_vers;
 	mcp->mc_flags = mcp64.mc_flags;
 	mcp->mc_onstack = mcp64.mc_onstack;
 	mcp->mc_len = mcp64.mc_len;
 	memcpy(mcp->mc_avec,mcp64.mc_avec,sizeof(mcp64.mc_avec));
 	memcpy(mcp->mc_av,mcp64.mc_av,sizeof(mcp64.mc_av));
 	for (i = 0; i < 42; i++)
 		mcp->mc_frame[i] = mcp64.mc_frame[i];
 	memcpy(mcp->mc_fpreg,mcp64.mc_fpreg,sizeof(mcp64.mc_fpreg));
 	memcpy(mcp->mc_vsxfpreg,mcp64.mc_vsxfpreg,sizeof(mcp64.mc_vsxfpreg));
 
 	return (0);
 }
 
 static int
 get_mcontext32(struct thread *td, mcontext32_t *mcp, int flags)
 {
 	int error;
 
 	error = grab_mcontext32(td, mcp, flags);
 	if (error == 0) {
 		PROC_LOCK(curthread->td_proc);
 		mcp->mc_onstack = sigonstack(td->td_frame->fixreg[1]);
 		PROC_UNLOCK(curthread->td_proc);
 	}
 
 	return (error);
 }
 
 static int
 set_mcontext32(struct thread *td, mcontext32_t *mcp)
 {
 	mcontext_t mcp64;
 	int i, error;
 
 	mcp64.mc_vers = mcp->mc_vers;
 	mcp64.mc_flags = mcp->mc_flags;
 	mcp64.mc_onstack = mcp->mc_onstack;
 	mcp64.mc_len = mcp->mc_len;
 	memcpy(mcp64.mc_avec,mcp->mc_avec,sizeof(mcp64.mc_avec));
 	memcpy(mcp64.mc_av,mcp->mc_av,sizeof(mcp64.mc_av));
 	for (i = 0; i < 42; i++)
 		mcp64.mc_frame[i] = mcp->mc_frame[i];
 	mcp64.mc_srr1 |= (td->td_frame->srr1 & 0xFFFFFFFF00000000ULL);
 	memcpy(mcp64.mc_fpreg,mcp->mc_fpreg,sizeof(mcp64.mc_fpreg));
 	memcpy(mcp64.mc_vsxfpreg,mcp->mc_vsxfpreg,sizeof(mcp64.mc_vsxfpreg));
 
 	error = set_mcontext(td, &mcp64);
 
 	return (error);
 }
 #endif
 
 #ifdef COMPAT_FREEBSD32
 int
 freebsd32_sigreturn(struct thread *td, struct freebsd32_sigreturn_args *uap)
 {
 	ucontext32_t uc;
 	int error;
 
 	CTR2(KTR_SIG, "sigreturn: td=%p ucp=%p", td, uap->sigcntxp);
 
 	if (copyin(uap->sigcntxp, &uc, sizeof(uc)) != 0) {
 		CTR1(KTR_SIG, "sigreturn: efault td=%p", td);
 		return (EFAULT);
 	}
 
 	error = set_mcontext32(td, &uc.uc_mcontext);
 	if (error != 0)
 		return (error);
 
 	/*
 	 * Save FPU state if needed. User may have changed it on
 	 * signal handler
 	 */
 	if (uc.uc_mcontext.mc_srr1 & PSL_FP)
 		save_fpu(td);
 
 	kern_sigprocmask(td, SIG_SETMASK, &uc.uc_sigmask, NULL, 0);
 
 	CTR3(KTR_SIG, "sigreturn: return td=%p pc=%#x sp=%#x",
 	     td, uc.uc_mcontext.mc_srr0, uc.uc_mcontext.mc_gpr[1]);
 
 	return (EJUSTRETURN);
 }
 
 /*
  * The first two fields of a ucontext_t are the signal mask and the machine
  * context.  The next field is uc_link; we want to avoid destroying the link
  * when copying out contexts.
  */
 #define	UC32_COPY_SIZE	offsetof(ucontext32_t, uc_link)
 
 int
 freebsd32_getcontext(struct thread *td, struct freebsd32_getcontext_args *uap)
 {
 	ucontext32_t uc;
 	int ret;
 
 	if (uap->ucp == NULL)
 		ret = EINVAL;
 	else {
 		bzero(&uc, sizeof(uc));
 		get_mcontext32(td, &uc.uc_mcontext, GET_MC_CLEAR_RET);
 		PROC_LOCK(td->td_proc);
 		uc.uc_sigmask = td->td_sigmask;
 		PROC_UNLOCK(td->td_proc);
 		ret = copyout(&uc, uap->ucp, UC32_COPY_SIZE);
 	}
 	return (ret);
 }
 
 int
 freebsd32_setcontext(struct thread *td, struct freebsd32_setcontext_args *uap)
 {
 	ucontext32_t uc;
 	int ret;	
 
 	if (uap->ucp == NULL)
 		ret = EINVAL;
 	else {
 		ret = copyin(uap->ucp, &uc, UC32_COPY_SIZE);
 		if (ret == 0) {
 			ret = set_mcontext32(td, &uc.uc_mcontext);
 			if (ret == 0) {
 				kern_sigprocmask(td, SIG_SETMASK,
 				    &uc.uc_sigmask, NULL, 0);
 			}
 		}
 	}
 	return (ret == 0 ? EJUSTRETURN : ret);
 }
 
 int
 freebsd32_swapcontext(struct thread *td, struct freebsd32_swapcontext_args *uap)
 {
 	ucontext32_t uc;
 	int ret;
 
 	if (uap->oucp == NULL || uap->ucp == NULL)
 		ret = EINVAL;
 	else {
 		bzero(&uc, sizeof(uc));
 		get_mcontext32(td, &uc.uc_mcontext, GET_MC_CLEAR_RET);
 		PROC_LOCK(td->td_proc);
 		uc.uc_sigmask = td->td_sigmask;
 		PROC_UNLOCK(td->td_proc);
 		ret = copyout(&uc, uap->oucp, UC32_COPY_SIZE);
 		if (ret == 0) {
 			ret = copyin(uap->ucp, &uc, UC32_COPY_SIZE);
 			if (ret == 0) {
 				ret = set_mcontext32(td, &uc.uc_mcontext);
 				if (ret == 0) {
 					kern_sigprocmask(td, SIG_SETMASK,
 					    &uc.uc_sigmask, NULL, 0);
 				}
 			}
 		}
 	}
 	return (ret == 0 ? EJUSTRETURN : ret);
 }
 
 #endif
 
 void
 cpu_set_syscall_retval(struct thread *td, int error)
 {
 	struct proc *p;
 	struct trapframe *tf;
 	int fixup;
 
 	if (error == EJUSTRETURN)
 		return;
 
 	p = td->td_proc;
 	tf = td->td_frame;
 
 	if (tf->fixreg[0] == SYS___syscall &&
 	    (SV_PROC_FLAG(p, SV_ILP32))) {
 		int code = tf->fixreg[FIRSTARG + 1];
 		fixup = (
 #if defined(COMPAT_FREEBSD6) && defined(SYS_freebsd6_lseek)
 		    code != SYS_freebsd6_lseek &&
 #endif
 		    code != SYS_lseek) ?  1 : 0;
 	} else
 		fixup = 0;
 
 	switch (error) {
 	case 0:
 		if (fixup) {
 			/*
 			 * 64-bit return, 32-bit syscall. Fixup byte order
 			 */
 			tf->fixreg[FIRSTARG] = 0;
 			tf->fixreg[FIRSTARG + 1] = td->td_retval[0];
 		} else {
 			tf->fixreg[FIRSTARG] = td->td_retval[0];
 			tf->fixreg[FIRSTARG + 1] = td->td_retval[1];
 		}
 		tf->cr &= ~0x10000000;		/* Unset summary overflow */
 		break;
 	case ERESTART:
 		/*
 		 * Set user's pc back to redo the system call.
 		 */
 		tf->srr0 -= 4;
 		break;
 	default:
 		tf->fixreg[FIRSTARG] = error;
 		tf->cr |= 0x10000000;		/* Set summary overflow */
 		break;
 	}
 }
 
 /*
  * Threading functions
  */
 void
 cpu_thread_exit(struct thread *td)
 {
 	cleanup_power_extras(td);
 }
 
 void
 cpu_thread_clean(struct thread *td)
 {
 }
 
 void
 cpu_thread_alloc(struct thread *td)
 {
 	struct pcb *pcb;
 
 	pcb = (struct pcb *)((td->td_kstack + td->td_kstack_pages * PAGE_SIZE -
 	    sizeof(struct pcb)) & ~0x2fUL);
 	td->td_pcb = pcb;
 	td->td_frame = (struct trapframe *)pcb - 1;
 }
 
 void
 cpu_thread_free(struct thread *td)
 {
 }
 
 int
 cpu_set_user_tls(struct thread *td, void *tls_base)
 {
 
 	if (SV_PROC_FLAG(td->td_proc, SV_LP64))
 		td->td_frame->fixreg[13] = (register_t)tls_base + 0x7010;
 	else
 		td->td_frame->fixreg[2] = (register_t)tls_base + 0x7008;
 	return (0);
 }
 
 void
 cpu_copy_thread(struct thread *td, struct thread *td0)
 {
 	struct pcb *pcb2;
 	struct trapframe *tf;
 	struct callframe *cf;
 
 	/* Ensure td0 pcb is up to date. */
 	if (td0 == curthread)
 		cpu_save_thread_regs(td0);
 
 	pcb2 = td->td_pcb;
 
 	/* Copy the upcall pcb */
 	bcopy(td0->td_pcb, pcb2, sizeof(*pcb2));
 
 	/* Create a stack for the new thread */
 	tf = td->td_frame;
 	bcopy(td0->td_frame, tf, sizeof(struct trapframe));
 	tf->fixreg[FIRSTARG] = 0;
 	tf->fixreg[FIRSTARG + 1] = 0;
 	tf->cr &= ~0x10000000;
 
 	/* Set registers for trampoline to user mode. */
 	cf = (struct callframe *)tf - 1;
 	memset(cf, 0, sizeof(struct callframe));
 	cf->cf_func = (register_t)fork_return;
 	cf->cf_arg0 = (register_t)td;
 	cf->cf_arg1 = (register_t)tf;
 
 	pcb2->pcb_sp = (register_t)cf;
 	#if defined(__powerpc64__) && (!defined(_CALL_ELF) || _CALL_ELF == 1)
 	pcb2->pcb_lr = ((register_t *)fork_trampoline)[0];
 	pcb2->pcb_toc = ((register_t *)fork_trampoline)[1];
 	#else
 	pcb2->pcb_lr = (register_t)fork_trampoline;
 	pcb2->pcb_context[0] = pcb2->pcb_lr;
 	#endif
 	pcb2->pcb_cpu.aim.usr_vsid = 0;
 #ifdef __SPE__
 	pcb2->pcb_vec.vscr = SPEFSCR_DFLT;
 #endif
 
 	/* Setup to release spin count in fork_exit(). */
 	td->td_md.md_spinlock_count = 1;
 	td->td_md.md_saved_msr = psl_kernset;
 }
 
 void
 cpu_set_upcall(struct thread *td, void (*entry)(void *), void *arg,
     stack_t *stack)
 {
 	struct trapframe *tf;
 	uintptr_t sp;
 
 	tf = td->td_frame;
 	/* align stack and alloc space for frame ptr and saved LR */
 	#ifdef __powerpc64__
 	sp = ((uintptr_t)stack->ss_sp + stack->ss_size - 48) &
 	    ~0x1f;
 	#else
 	sp = ((uintptr_t)stack->ss_sp + stack->ss_size - 8) &
 	    ~0x1f;
 	#endif
 	bzero(tf, sizeof(struct trapframe));
 
 	tf->fixreg[1] = (register_t)sp;
 	tf->fixreg[3] = (register_t)arg;
 	if (SV_PROC_FLAG(td->td_proc, SV_ILP32)) {
 		tf->srr0 = (register_t)entry;
 		#ifdef __powerpc64__
 		tf->srr1 = psl_userset32 | PSL_FE_DFLT;
 		#else
 		tf->srr1 = psl_userset | PSL_FE_DFLT;
 		#endif
 	} else {
 	    #ifdef __powerpc64__
 		if (td->td_proc->p_sysent == &elf64_freebsd_sysvec_v2) {
 			tf->srr0 = (register_t)entry;
 			/* ELFv2 ABI requires that the global entry point be in r12. */
 			tf->fixreg[12] = (register_t)entry;
 		}
 		else {
 			register_t entry_desc[3];
 			(void)copyin((void *)entry, entry_desc, sizeof(entry_desc));
 			tf->srr0 = entry_desc[0];
 			tf->fixreg[2] = entry_desc[1];
 			tf->fixreg[11] = entry_desc[2];
 		}
 		tf->srr1 = psl_userset | PSL_FE_DFLT;
 	    #endif
 	}
 
 	td->td_pcb->pcb_flags = 0;
 #ifdef __SPE__
 	td->td_pcb->pcb_vec.vscr = SPEFSCR_DFLT;
 #endif
 
 	td->td_retval[0] = (register_t)entry;
 	td->td_retval[1] = 0;
 }
 
 static int
 emulate_mfspr(int spr, int reg, struct trapframe *frame){
 	struct thread *td;
 
 	td = curthread;
 
 	if (spr == SPR_DSCR || spr == SPR_DSCRP) {
 		if (!(cpu_features2 & PPC_FEATURE2_DSCR))
 			return (SIGILL);
 		// If DSCR was never set, get the default DSCR
 		if ((td->td_pcb->pcb_flags & PCB_CDSCR) == 0)
 			td->td_pcb->pcb_dscr = mfspr(SPR_DSCRP);
 
 		frame->fixreg[reg] = td->td_pcb->pcb_dscr;
 		frame->srr0 += 4;
 		return (0);
 	} else
 		return (SIGILL);
 }
 
 static int
 emulate_mtspr(int spr, int reg, struct trapframe *frame){
 	struct thread *td;
 
 	td = curthread;
 
 	if (spr == SPR_DSCR || spr == SPR_DSCRP) {
 		if (!(cpu_features2 & PPC_FEATURE2_DSCR))
 			return (SIGILL);
 		td->td_pcb->pcb_flags |= PCB_CDSCR;
 		td->td_pcb->pcb_dscr = frame->fixreg[reg];
 		mtspr(SPR_DSCRP, frame->fixreg[reg]);
 		frame->srr0 += 4;
 		return (0);
 	} else
 		return (SIGILL);
 }
 
 #define XFX 0xFC0007FF
 int
 ppc_instr_emulate(struct trapframe *frame, struct thread *td)
 {
 	struct pcb *pcb;
 	uint32_t instr;
 	int reg, sig;
 	int rs, spr;
 
 	instr = fuword32((void *)frame->srr0);
 	sig = SIGILL;
 
 	if ((instr & 0xfc1fffff) == 0x7c1f42a6) {	/* mfpvr */
 		reg = (instr & ~0xfc1fffff) >> 21;
 		frame->fixreg[reg] = mfpvr();
 		frame->srr0 += 4;
 		return (0);
 	} else if ((instr & XFX) == 0x7c0002a6) {	/* mfspr */
 		rs = (instr &  0x3e00000) >> 21;
 		spr = (instr & 0x1ff800) >> 16;
 		return emulate_mfspr(spr, rs, frame);
 	} else if ((instr & XFX) == 0x7c0003a6) {	/* mtspr */
 		rs = (instr &  0x3e00000) >> 21;
 		spr = (instr & 0x1ff800) >> 16;
 		return emulate_mtspr(spr, rs, frame);
 	} else if ((instr & 0xfc000ffe) == 0x7c0004ac) {	/* various sync */
 		powerpc_sync(); /* Do a heavy-weight sync */
 		frame->srr0 += 4;
 		return (0);
 	}
 
 	pcb = td->td_pcb;
 #ifdef FPU_EMU
 	if (!(pcb->pcb_flags & PCB_FPREGS)) {
 		bzero(&pcb->pcb_fpu, sizeof(pcb->pcb_fpu));
 		pcb->pcb_flags |= PCB_FPREGS;
 	} else if (pcb->pcb_flags & PCB_FPU)
 		save_fpu(td);
 	sig = fpu_emulate(frame, &pcb->pcb_fpu);
 	if ((sig == 0 || sig == SIGFPE) && pcb->pcb_flags & PCB_FPU)
 		enable_fpu(td);
 #endif
 	if (sig == SIGILL) {
 		if (pcb->pcb_lastill != frame->srr0) {
 			/* Allow a second chance, in case of cache sync issues. */
 			sig = 0;
 			pmap_sync_icache(PCPU_GET(curpmap), frame->srr0, 4);
 			pcb->pcb_lastill = frame->srr0;
 		}
 	}
 
 	return (sig);
 }
diff --git a/sys/riscv/riscv/exec_machdep.c b/sys/riscv/riscv/exec_machdep.c
index d45e8b808f74..ab79c0384eb3 100644
--- a/sys/riscv/riscv/exec_machdep.c
+++ b/sys/riscv/riscv/exec_machdep.c
@@ -1,429 +1,429 @@
 /*-
  * Copyright (c) 2014 Andrew Turner
  * Copyright (c) 2015-2017 Ruslan Bukin <br@bsdpad.com>
  * All rights reserved.
  *
  * Portions of this software were developed by SRI International and the
  * University of Cambridge Computer Laboratory under DARPA/AFRL contract
  * FA8750-10-C-0237 ("CTSRD"), as part of the DARPA CRASH research programme.
  *
  * Portions of this software were developed by the University of Cambridge
  * Computer Laboratory as part of the CTSRD Project, with support from the
  * UK Higher Education Innovation Fund (HEIF).
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/exec.h>
 #include <sys/imgact.h>
 #include <sys/kdb.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/limits.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/ptrace.h>
 #include <sys/reg.h>
 #include <sys/rwlock.h>
 #include <sys/sched.h>
 #include <sys/signalvar.h>
 #include <sys/syscallsubr.h>
 #include <sys/sysent.h>
 #include <sys/sysproto.h>
 #include <sys/ucontext.h>
 
 #include <machine/cpu.h>
 #include <machine/kdb.h>
 #include <machine/pcb.h>
 #include <machine/pte.h>
 #include <machine/riscvreg.h>
 #include <machine/sbi.h>
 #include <machine/trap.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 
 #ifdef FPE
 #include <machine/fpe.h>
 #endif
 
 static void get_fpcontext(struct thread *td, mcontext_t *mcp);
 static void set_fpcontext(struct thread *td, mcontext_t *mcp);
 
 _Static_assert(sizeof(mcontext_t) == 864, "mcontext_t size incorrect");
 _Static_assert(sizeof(ucontext_t) == 936, "ucontext_t size incorrect");
 _Static_assert(sizeof(siginfo_t) == 80, "siginfo_t size incorrect");
 
 int
 fill_regs(struct thread *td, struct reg *regs)
 {
 	struct trapframe *frame;
 
 	frame = td->td_frame;
 	regs->sepc = frame->tf_sepc;
 	regs->sstatus = frame->tf_sstatus;
 	regs->ra = frame->tf_ra;
 	regs->sp = frame->tf_sp;
 	regs->gp = frame->tf_gp;
 	regs->tp = frame->tf_tp;
 
 	memcpy(regs->t, frame->tf_t, sizeof(regs->t));
 	memcpy(regs->s, frame->tf_s, sizeof(regs->s));
 	memcpy(regs->a, frame->tf_a, sizeof(regs->a));
 
 	return (0);
 }
 
 int
 set_regs(struct thread *td, struct reg *regs)
 {
 	struct trapframe *frame;
 
 	frame = td->td_frame;
 	frame->tf_sepc = regs->sepc;
 	frame->tf_ra = regs->ra;
 	frame->tf_sp = regs->sp;
 	frame->tf_gp = regs->gp;
 	frame->tf_tp = regs->tp;
 
 	memcpy(frame->tf_t, regs->t, sizeof(frame->tf_t));
 	memcpy(frame->tf_s, regs->s, sizeof(frame->tf_s));
 	memcpy(frame->tf_a, regs->a, sizeof(frame->tf_a));
 
 	return (0);
 }
 
 int
 fill_fpregs(struct thread *td, struct fpreg *regs)
 {
 #ifdef FPE
 	struct pcb *pcb;
 
 	pcb = td->td_pcb;
 
 	if ((pcb->pcb_fpflags & PCB_FP_STARTED) != 0) {
 		/*
 		 * If we have just been running FPE instructions we will
 		 * need to save the state to memcpy it below.
 		 */
 		if (td == curthread)
 			fpe_state_save(td);
 
 		memcpy(regs->fp_x, pcb->pcb_x, sizeof(regs->fp_x));
 		regs->fp_fcsr = pcb->pcb_fcsr;
 	} else
 #endif
 		memset(regs, 0, sizeof(*regs));
 
 	return (0);
 }
 
 int
 set_fpregs(struct thread *td, struct fpreg *regs)
 {
 #ifdef FPE
 	struct trapframe *frame;
 	struct pcb *pcb;
 
 	frame = td->td_frame;
 	pcb = td->td_pcb;
 
 	memcpy(pcb->pcb_x, regs->fp_x, sizeof(regs->fp_x));
 	pcb->pcb_fcsr = regs->fp_fcsr;
 	pcb->pcb_fpflags |= PCB_FP_STARTED;
 	frame->tf_sstatus &= ~SSTATUS_FS_MASK;
 	frame->tf_sstatus |= SSTATUS_FS_CLEAN;
 #endif
 
 	return (0);
 }
 
 int
 fill_dbregs(struct thread *td, struct dbreg *regs)
 {
 
 	panic("fill_dbregs");
 }
 
 int
 set_dbregs(struct thread *td, struct dbreg *regs)
 {
 
 	panic("set_dbregs");
 }
 
 void
 exec_setregs(struct thread *td, struct image_params *imgp, uintptr_t stack)
 {
 	struct trapframe *tf;
 	struct pcb *pcb;
 
 	tf = td->td_frame;
 	pcb = td->td_pcb;
 
 	memset(tf, 0, sizeof(struct trapframe));
 
 	tf->tf_a[0] = stack;
 	tf->tf_sp = STACKALIGN(stack);
 	tf->tf_ra = imgp->entry_addr;
 	tf->tf_sepc = imgp->entry_addr;
 
 	pcb->pcb_fpflags &= ~PCB_FP_STARTED;
 }
 
 /* Sanity check these are the same size, they will be memcpy'd to and from */
 CTASSERT(sizeof(((struct trapframe *)0)->tf_a) ==
     sizeof((struct gpregs *)0)->gp_a);
 CTASSERT(sizeof(((struct trapframe *)0)->tf_s) ==
     sizeof((struct gpregs *)0)->gp_s);
 CTASSERT(sizeof(((struct trapframe *)0)->tf_t) ==
     sizeof((struct gpregs *)0)->gp_t);
 CTASSERT(sizeof(((struct trapframe *)0)->tf_a) ==
     sizeof((struct reg *)0)->a);
 CTASSERT(sizeof(((struct trapframe *)0)->tf_s) ==
     sizeof((struct reg *)0)->s);
 CTASSERT(sizeof(((struct trapframe *)0)->tf_t) ==
     sizeof((struct reg *)0)->t);
 
 int
 get_mcontext(struct thread *td, mcontext_t *mcp, int clear_ret)
 {
 	struct trapframe *tf = td->td_frame;
 
 	memcpy(mcp->mc_gpregs.gp_t, tf->tf_t, sizeof(mcp->mc_gpregs.gp_t));
 	memcpy(mcp->mc_gpregs.gp_s, tf->tf_s, sizeof(mcp->mc_gpregs.gp_s));
 	memcpy(mcp->mc_gpregs.gp_a, tf->tf_a, sizeof(mcp->mc_gpregs.gp_a));
 
 	if (clear_ret & GET_MC_CLEAR_RET) {
 		mcp->mc_gpregs.gp_a[0] = 0;
 		mcp->mc_gpregs.gp_t[0] = 0; /* clear syscall error */
 	}
 
 	mcp->mc_gpregs.gp_ra = tf->tf_ra;
 	mcp->mc_gpregs.gp_sp = tf->tf_sp;
 	mcp->mc_gpregs.gp_gp = tf->tf_gp;
 	mcp->mc_gpregs.gp_tp = tf->tf_tp;
 	mcp->mc_gpregs.gp_sepc = tf->tf_sepc;
 	mcp->mc_gpregs.gp_sstatus = tf->tf_sstatus;
 	get_fpcontext(td, mcp);
 
 	return (0);
 }
 
 int
 set_mcontext(struct thread *td, mcontext_t *mcp)
 {
 	struct trapframe *tf;
 
 	tf = td->td_frame;
 
 	/*
 	 * Permit changes to the USTATUS bits of SSTATUS.
 	 *
 	 * Ignore writes to read-only bits (SD, XS).
 	 *
 	 * Ignore writes to the FS field as set_fpcontext() will set
 	 * it explicitly.
 	 */
 	if (((mcp->mc_gpregs.gp_sstatus ^ tf->tf_sstatus) &
 	    ~(SSTATUS_SD | SSTATUS_XS_MASK | SSTATUS_FS_MASK | SSTATUS_UPIE |
 	    SSTATUS_UIE)) != 0)
 		return (EINVAL);
 
 	memcpy(tf->tf_t, mcp->mc_gpregs.gp_t, sizeof(tf->tf_t));
 	memcpy(tf->tf_s, mcp->mc_gpregs.gp_s, sizeof(tf->tf_s));
 	memcpy(tf->tf_a, mcp->mc_gpregs.gp_a, sizeof(tf->tf_a));
 
 	tf->tf_ra = mcp->mc_gpregs.gp_ra;
 	tf->tf_sp = mcp->mc_gpregs.gp_sp;
 	tf->tf_gp = mcp->mc_gpregs.gp_gp;
 	tf->tf_sepc = mcp->mc_gpregs.gp_sepc;
 	tf->tf_sstatus = mcp->mc_gpregs.gp_sstatus;
 	set_fpcontext(td, mcp);
 
 	return (0);
 }
 
 static void
 get_fpcontext(struct thread *td, mcontext_t *mcp)
 {
 #ifdef FPE
 	struct pcb *curpcb;
 
 	critical_enter();
 
 	curpcb = curthread->td_pcb;
 
 	KASSERT(td->td_pcb == curpcb, ("Invalid fpe pcb"));
 
 	if ((curpcb->pcb_fpflags & PCB_FP_STARTED) != 0) {
 		/*
 		 * If we have just been running FPE instructions we will
 		 * need to save the state to memcpy it below.
 		 */
 		fpe_state_save(td);
 
 		KASSERT((curpcb->pcb_fpflags & ~PCB_FP_USERMASK) == 0,
 		    ("Non-userspace FPE flags set in get_fpcontext"));
 		memcpy(mcp->mc_fpregs.fp_x, curpcb->pcb_x,
 		    sizeof(mcp->mc_fpregs.fp_x));
 		mcp->mc_fpregs.fp_fcsr = curpcb->pcb_fcsr;
 		mcp->mc_fpregs.fp_flags = curpcb->pcb_fpflags;
 		mcp->mc_flags |= _MC_FP_VALID;
 	}
 
 	critical_exit();
 #endif
 }
 
 static void
 set_fpcontext(struct thread *td, mcontext_t *mcp)
 {
 #ifdef FPE
 	struct pcb *curpcb;
 #endif
 
 	td->td_frame->tf_sstatus &= ~SSTATUS_FS_MASK;
 	td->td_frame->tf_sstatus |= SSTATUS_FS_OFF;
 
 #ifdef FPE
 	critical_enter();
 
 	if ((mcp->mc_flags & _MC_FP_VALID) != 0) {
 		curpcb = curthread->td_pcb;
 		/* FPE usage is enabled, override registers. */
 		memcpy(curpcb->pcb_x, mcp->mc_fpregs.fp_x,
 		    sizeof(mcp->mc_fpregs.fp_x));
 		curpcb->pcb_fcsr = mcp->mc_fpregs.fp_fcsr;
 		curpcb->pcb_fpflags = mcp->mc_fpregs.fp_flags & PCB_FP_USERMASK;
 		td->td_frame->tf_sstatus |= SSTATUS_FS_CLEAN;
 	}
 
 	critical_exit();
 #endif
 }
 
 int
 sys_sigreturn(struct thread *td, struct sigreturn_args *uap)
 {
 	ucontext_t uc;
 	int error;
 
 	if (copyin(uap->sigcntxp, &uc, sizeof(uc)))
 		return (EFAULT);
 
 	error = set_mcontext(td, &uc.uc_mcontext);
 	if (error != 0)
 		return (error);
 
 	/* Restore signal mask. */
 	kern_sigprocmask(td, SIG_SETMASK, &uc.uc_sigmask, NULL, 0);
 
 	return (EJUSTRETURN);
 }
 
 void
 sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
 {
 	struct sigframe *fp, frame;
 	struct sysentvec *sysent;
 	struct trapframe *tf;
 	struct sigacts *psp;
 	struct thread *td;
 	struct proc *p;
 	int onstack;
 	int sig;
 
 	td = curthread;
 	p = td->td_proc;
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 
 	sig = ksi->ksi_signo;
 	psp = p->p_sigacts;
 	mtx_assert(&psp->ps_mtx, MA_OWNED);
 
 	tf = td->td_frame;
 	onstack = sigonstack(tf->tf_sp);
 
 	CTR4(KTR_SIG, "sendsig: td=%p (%s) catcher=%p sig=%d", td, p->p_comm,
 	    catcher, sig);
 
 	/* Allocate and validate space for the signal handler context. */
 	if ((td->td_pflags & TDP_ALTSTACK) != 0 && !onstack &&
 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
 		fp = (struct sigframe *)((uintptr_t)td->td_sigstk.ss_sp +
 		    td->td_sigstk.ss_size);
 	} else {
 		fp = (struct sigframe *)td->td_frame->tf_sp;
 	}
 
 	/* Make room, keeping the stack aligned */
 	fp--;
 	fp = (struct sigframe *)STACKALIGN(fp);
 
 	/* Fill in the frame to copy out */
 	bzero(&frame, sizeof(frame));
 	get_mcontext(td, &frame.sf_uc.uc_mcontext, 0);
 	frame.sf_si = ksi->ksi_info;
 	frame.sf_uc.uc_sigmask = *mask;
 	frame.sf_uc.uc_stack = td->td_sigstk;
 	frame.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK) != 0 ?
 	    (onstack ? SS_ONSTACK : 0) : SS_DISABLE;
 	mtx_unlock(&psp->ps_mtx);
 	PROC_UNLOCK(td->td_proc);
 
 	/* Copy the sigframe out to the user's stack. */
 	if (copyout(&frame, fp, sizeof(*fp)) != 0) {
 		/* Process has trashed its stack. Kill it. */
 		CTR2(KTR_SIG, "sendsig: sigexit td=%p fp=%p", td, fp);
 		PROC_LOCK(p);
 		sigexit(td, SIGILL);
 	}
 
 	tf->tf_a[0] = sig;
 	tf->tf_a[1] = (register_t)&fp->sf_si;
 	tf->tf_a[2] = (register_t)&fp->sf_uc;
 
 	tf->tf_sepc = (register_t)catcher;
 	tf->tf_sp = (register_t)fp;
 
 	sysent = p->p_sysent;
-	if (sysent->sv_sigcode_base != 0)
+	if (PROC_HAS_SHP(p))
 		tf->tf_ra = (register_t)PROC_SIGCODE(p);
 	else
 		tf->tf_ra = (register_t)(PROC_PS_STRINGS(p) -
 		    *(sysent->sv_szsigcode));
 
 	CTR3(KTR_SIG, "sendsig: return td=%p pc=%#x sp=%#x", td, tf->tf_sepc,
 	    tf->tf_sp);
 
 	PROC_LOCK(p);
 	mtx_lock(&psp->ps_mtx);
 }
diff --git a/sys/sys/exec.h b/sys/sys/exec.h
index 8e62876deb81..dd7a99475cbd 100644
--- a/sys/sys/exec.h
+++ b/sys/sys/exec.h
@@ -1,161 +1,163 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1992, 1993
  *	The Regents of the University of California.  All rights reserved.
  * (c) UNIX System Laboratories, Inc.
  * All or some portions of this file are derived from material licensed
  * to the University of California by American Telephone and Telegraph
  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  * the permission of UNIX System Laboratories, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)exec.h	8.3 (Berkeley) 1/21/94
  * $FreeBSD$
  */
 
 #ifndef _SYS_EXEC_H_
 #define _SYS_EXEC_H_
 
 /*
  * Before ps_args existed, the following structure, found at the top of
  * the user stack of each user process, was used by ps(1) to locate
  * environment and argv strings.  Normally ps_argvstr points to the
  * argv vector, and ps_nargvstr is the same as the program's argc. The
  * fields ps_envstr and ps_nenvstr are the equivalent for the environment.
  *
  * Programs should now use setproctitle(3) to change ps output.
  * setproctitle() always informs the kernel with sysctl and sets the
  * pointers in ps_strings.  The kern.proc.args sysctl first tries p_args.
  * If p_args is NULL, it then falls back to reading ps_strings and following
  * the pointers.
  */
 struct ps_strings {
 	char	**ps_argvstr;	/* first of 0 or more argument strings */
 	unsigned int ps_nargvstr; /* the number of argument strings */
 	char	**ps_envstr;	/* first of 0 or more environment strings */
 	unsigned int ps_nenvstr; /* the number of environment strings */
 };
 
 /* Coredump output parameters. */
 struct coredump_params {
 	off_t		offset;
 	struct ucred	*active_cred;
 	struct ucred	*file_cred;
 	struct thread	*td;
 	struct vnode	*vp;
 	struct compressor *comp;
 };
 
 struct image_params;
 
 struct execsw {
 	int (*ex_imgact)(struct image_params *);
 	const char *ex_name;
 };
 
 #include <machine/exec.h>
 
 #ifdef _KERNEL
 #include <sys/cdefs.h>
 
 /*
  * Address of ps_strings structure (in user space).
  * Prefer the kern.ps_strings or kern.proc.ps_strings sysctls to this constant.
  */
 #define	PS_STRINGS	(USRSTACK - sizeof(struct ps_strings))
 #define	PROC_PS_STRINGS(p)	\
 	((p)->p_vmspace->vm_stacktop - (p)->p_sysent->sv_psstringssz)
 
 /*
  * Address of signal trampoline (in user space).
- * This assumes that the sigcode resides in the shared page, which is true
- * in all cases, except for a.out binaries.
+ * This assumes that the sigcode resides in the shared page.
  */
 #define PROC_SIGCODE(p)		\
-	((p)->p_sysent->sv_sigcode_base)
+	((p)->p_vmspace->vm_shp_base + (p)->p_sysent->sv_sigcode_offset)
+
+#define PROC_HAS_SHP(p)		\
+	((p)->p_sysent->sv_shared_page_obj != NULL)
 
 int exec_map_first_page(struct image_params *);        
 void exec_unmap_first_page(struct image_params *);       
 
 int exec_register(const struct execsw *);
 int exec_unregister(const struct execsw *);
 
 enum uio_seg;
 
 #define   CORE_BUF_SIZE   (16 * 1024)
 
 int core_write(struct coredump_params *, const void *, size_t, off_t,
     enum uio_seg, size_t *);
 int core_output(char *, size_t, off_t, struct coredump_params *, void *);
 int sbuf_drain_core_output(void *, const char *, int);
 
 extern int coredump_pack_fileinfo;
 extern int coredump_pack_vmmapinfo;
 
 /*
  * note: name##_mod cannot be const storage because the
  * linker_file_sysinit() function modifies _file in the
  * moduledata_t.
  */
 
 #include <sys/module.h>
 
 #define EXEC_SET(name, execsw_arg) \
 	static int __CONCAT(name,_modevent)(module_t mod, int type, \
 	    void *data) \
 	{ \
 		struct execsw *exec = (struct execsw *)data; \
 		int error = 0; \
 		switch (type) { \
 		case MOD_LOAD: \
 			/* printf(#name " module loaded\n"); */ \
 			error = exec_register(exec); \
 			if (error) \
 				printf(__XSTRING(name) "register failed\n"); \
 			break; \
 		case MOD_UNLOAD: \
 			/* printf(#name " module unloaded\n"); */ \
 			error = exec_unregister(exec); \
 			if (error) \
 				printf(__XSTRING(name) " unregister failed\n");\
 			break; \
 		default: \
 			error = EOPNOTSUPP; \
 			break; \
 		} \
 		return error; \
 	} \
 	static moduledata_t __CONCAT(name,_mod) = { \
 		__XSTRING(name), \
 		__CONCAT(name,_modevent), \
 		(void *)& execsw_arg \
 	}; \
 	DECLARE_MODULE_TIED(name, __CONCAT(name,_mod), SI_SUB_EXEC, \
 	    SI_ORDER_ANY)
 #endif
 
 #endif
diff --git a/sys/sys/sysent.h b/sys/sys/sysent.h
index f677050db769..a77feb5bcbf7 100644
--- a/sys/sys/sysent.h
+++ b/sys/sys/sysent.h
@@ -1,345 +1,345 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1982, 1988, 1991 The Regents of the University of California.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef _SYS_SYSENT_H_
 #define	_SYS_SYSENT_H_
 
 #include <bsm/audit.h>
 
 struct rlimit;
 struct sysent;
 struct thread;
 struct ksiginfo;
 struct syscall_args;
 
 enum systrace_probe_t {
 	SYSTRACE_ENTRY,
 	SYSTRACE_RETURN,
 };
 
 typedef	int	sy_call_t(struct thread *, void *);
 
 typedef	void	(*systrace_probe_func_t)(struct syscall_args *,
 		    enum systrace_probe_t, int);
 typedef	void	(*systrace_args_func_t)(int, void *, uint64_t *, int *);
 
 #ifdef _KERNEL
 extern systrace_probe_func_t	systrace_probe_func;
 extern bool			systrace_enabled;
 
 #ifdef KDTRACE_HOOKS
 #define	SYSTRACE_ENABLED()	(systrace_enabled)
 #else
 #define SYSTRACE_ENABLED()	(0)
 #endif
 #endif /* _KERNEL */
 
 struct sysent {			/* system call table */
 	sy_call_t *sy_call;	/* implementing function */
 	systrace_args_func_t sy_systrace_args_func;
 				/* optional argument conversion function. */
 	u_int8_t sy_narg;	/* number of arguments */
 	u_int8_t sy_flags;	/* General flags for system calls. */
 	au_event_t sy_auevent;	/* audit event associated with syscall */
 	u_int32_t sy_entry;	/* DTrace entry ID for systrace. */
 	u_int32_t sy_return;	/* DTrace return ID for systrace. */
 	u_int32_t sy_thrcnt;
 };
 
 /*
  * A system call is permitted in capability mode.
  */
 #define	SYF_CAPENABLED	0x00000001
 
 #define	SY_THR_FLAGMASK	0x7
 #define	SY_THR_STATIC	0x1
 #define	SY_THR_DRAINING	0x2
 #define	SY_THR_ABSENT	0x4
 #define	SY_THR_INCR	0x8
 
 #ifdef KLD_MODULE
 #define	SY_THR_STATIC_KLD	0
 #else
 #define	SY_THR_STATIC_KLD	SY_THR_STATIC
 #endif
 
 struct image_params;
 struct proc;
 struct __sigset;
 struct trapframe;
 struct vnode;
 struct note_info_list;
 
 struct sysentvec {
 	int		sv_size;	/* number of entries */
 	struct sysent	*sv_table;	/* pointer to sysent */
 	int		(*sv_fixup)(uintptr_t *, struct image_params *);
 					/* stack fixup function */
 	void		(*sv_sendsig)(void (*)(int), struct ksiginfo *, struct __sigset *);
 			    		/* send signal */
 	const char 	*sv_sigcode;	/* start of sigtramp code */
 	int 		*sv_szsigcode;	/* size of sigtramp code */
 	int		sv_sigcodeoff;
 	char		*sv_name;	/* name of binary type */
 	int		(*sv_coredump)(struct thread *, struct vnode *, off_t, int);
 					/* function to dump core, or NULL */
 	int		sv_elf_core_osabi;
 	const char	*sv_elf_core_abi_vendor;
 	void		(*sv_elf_core_prepare_notes)(struct thread *,
 			    struct note_info_list *, size_t *);
 	int		(*sv_imgact_try)(struct image_params *);
 	int		(*sv_copyout_auxargs)(struct image_params *,
 			    uintptr_t);
 	int		sv_minsigstksz;	/* minimum signal stack size */
 	vm_offset_t	sv_minuser;	/* VM_MIN_ADDRESS */
 	vm_offset_t	sv_maxuser;	/* VM_MAXUSER_ADDRESS */
 	vm_offset_t	sv_usrstack;	/* USRSTACK */
 	vm_offset_t	sv_psstrings;	/* PS_STRINGS */
 	size_t		sv_psstringssz;	/* PS_STRINGS size */
 	int		sv_stackprot;	/* vm protection for stack */
 	int		(*sv_copyout_strings)(struct image_params *,
 			    uintptr_t *);
 	void		(*sv_setregs)(struct thread *, struct image_params *,
 			    uintptr_t);
 	void		(*sv_fixlimit)(struct rlimit *, int);
 	u_long		*sv_maxssiz;
 	u_int		sv_flags;
 	void		(*sv_set_syscall_retval)(struct thread *, int);
 	int		(*sv_fetch_syscall_args)(struct thread *);
 	const char	**sv_syscallnames;
-	vm_offset_t	sv_timekeep_base;
+	vm_offset_t	sv_timekeep_offset;
 	vm_offset_t	sv_shared_page_base;
 	vm_offset_t	sv_shared_page_len;
-	vm_offset_t	sv_sigcode_base;
+	vm_offset_t	sv_sigcode_offset;
 	void		*sv_shared_page_obj;
-	vm_offset_t	sv_vdso_base;
+	vm_offset_t	sv_vdso_offset;
 	void		(*sv_schedtail)(struct thread *);
 	void		(*sv_thread_detach)(struct thread *);
 	int		(*sv_trap)(struct thread *);
 	u_long		*sv_hwcap;	/* Value passed in AT_HWCAP. */
 	u_long		*sv_hwcap2;	/* Value passed in AT_HWCAP2. */
 	const char	*(*sv_machine_arch)(struct proc *);
-	vm_offset_t	sv_fxrng_gen_base;
+	vm_offset_t	sv_fxrng_gen_offset;
 	void		(*sv_onexec_old)(struct thread *td);
 	int		(*sv_onexec)(struct proc *, struct image_params *);
 	void		(*sv_onexit)(struct proc *);
 	void		(*sv_ontdexit)(struct thread *td);
 	int		(*sv_setid_allowed)(struct thread *td,
 			    struct image_params *imgp);
 	void		(*sv_set_fork_retval)(struct thread *);
 					/* Only used on x86 */
 	struct regset	**sv_regset_begin;
 	struct regset	**sv_regset_end;
 };
 
 #define	SV_ILP32	0x000100	/* 32-bit executable. */
 #define	SV_LP64		0x000200	/* 64-bit executable. */
 #define	SV_IA32		0x004000	/* Intel 32-bit executable. */
 #define	SV_AOUT		0x008000	/* a.out executable. */
 #define	SV_SHP		0x010000	/* Shared page. */
 #define	SV_AVAIL1	0x020000	/* Unused */
 #define	SV_TIMEKEEP	0x040000	/* Shared page timehands. */
 #define	SV_ASLR		0x080000	/* ASLR allowed. */
 #define	SV_RNG_SEED_VER	0x100000	/* random(4) reseed generation. */
 #define	SV_SIG_DISCIGN	0x200000	/* Do not discard ignored signals */
 #define	SV_SIG_WAITNDQ	0x400000	/* Wait does not dequeue SIGCHLD */
 #define	SV_DSO_SIG	0x800000	/* Signal trampoline packed in dso */
 
 #define	SV_ABI_MASK	0xff
 #define	SV_PROC_FLAG(p, x)	((p)->p_sysent->sv_flags & (x))
 #define	SV_PROC_ABI(p)		((p)->p_sysent->sv_flags & SV_ABI_MASK)
 #define	SV_CURPROC_FLAG(x)	SV_PROC_FLAG(curproc, x)
 #define	SV_CURPROC_ABI()	SV_PROC_ABI(curproc)
 /* same as ELFOSABI_XXX, to prevent header pollution */
 #define	SV_ABI_LINUX	3
 #define	SV_ABI_FREEBSD 	9
 #define	SV_ABI_UNDEF	255
 
 /* sv_coredump flags */
 #define	SVC_PT_COREDUMP	0x00000001	/* dump requested by ptrace(2) */
 #define	SVC_NOCOMPRESS	0x00000002	/* disable compression. */
 #define	SVC_ALL		0x00000004	/* dump everything */
 
 #ifdef _KERNEL
 extern struct sysentvec aout_sysvec;
 extern struct sysent sysent[];
 extern const char *syscallnames[];
 
 struct nosys_args {
 	register_t dummy;
 };
 
 int	nosys(struct thread *, struct nosys_args *);
 
 #define	NO_SYSCALL (-1)
 
 struct module;
 
 struct syscall_module_data {
 	int	(*chainevh)(struct module *, int, void *); /* next handler */
 	void	*chainarg;		/* arg for next event handler */
 	int	*offset;		/* offset into sysent */
 	struct sysent *new_sysent;	/* new sysent */
 	struct sysent old_sysent;	/* old sysent */
 	int	flags;			/* flags for syscall_register */
 };
 
 /* separate initialization vector so it can be used in a substructure */
 #define SYSENT_INIT_VALS(_syscallname) {			\
 	.sy_narg = (sizeof(struct _syscallname ## _args )	\
 	    / sizeof(register_t)),				\
 	.sy_call = (sy_call_t *)&sys_##_syscallname,		\
 	.sy_auevent = SYS_AUE_##_syscallname,			\
 	.sy_systrace_args_func = NULL,				\
 	.sy_entry = 0,						\
 	.sy_return = 0,						\
 	.sy_flags = 0,						\
 	.sy_thrcnt = 0						\
 }							
 
 #define	MAKE_SYSENT(syscallname)				\
 static struct sysent syscallname##_sysent = SYSENT_INIT_VALS(syscallname);
 
 #define	MAKE_SYSENT_COMPAT(syscallname)				\
 static struct sysent syscallname##_sysent = {			\
 	(sizeof(struct syscallname ## _args )			\
 	    / sizeof(register_t)),				\
 	(sy_call_t *)& syscallname,				\
 	SYS_AUE_##syscallname					\
 }
 
 #define SYSCALL_MODULE(name, offset, new_sysent, evh, arg)	\
 static struct syscall_module_data name##_syscall_mod = {	\
 	evh, arg, offset, new_sysent, { 0, NULL, AUE_NULL }	\
 };								\
 								\
 static moduledata_t name##_mod = {				\
 	"sys/" #name,						\
 	syscall_module_handler,					\
 	&name##_syscall_mod					\
 };								\
 DECLARE_MODULE(name, name##_mod, SI_SUB_SYSCALLS, SI_ORDER_MIDDLE)
 
 #define	SYSCALL_MODULE_HELPER(syscallname)			\
 static int syscallname##_syscall = SYS_##syscallname;		\
 MAKE_SYSENT(syscallname);					\
 SYSCALL_MODULE(syscallname,					\
     & syscallname##_syscall, & syscallname##_sysent,		\
     NULL, NULL)
 
 #define	SYSCALL_MODULE_PRESENT(syscallname)				\
 	(sysent[SYS_##syscallname].sy_call != (sy_call_t *)lkmnosys &&	\
 	sysent[SYS_##syscallname].sy_call != (sy_call_t *)lkmressys)
 
 /*
  * Syscall registration helpers with resource allocation handling.
  */
 struct syscall_helper_data {
 	struct sysent new_sysent;
 	struct sysent old_sysent;
 	int syscall_no;
 	int registered;
 };
 #define SYSCALL_INIT_HELPER_F(syscallname, flags) {		\
     .new_sysent = {						\
 	.sy_narg = (sizeof(struct syscallname ## _args )	\
 	    / sizeof(register_t)),				\
 	.sy_call = (sy_call_t *)& sys_ ## syscallname,		\
 	.sy_auevent = SYS_AUE_##syscallname,			\
 	.sy_flags = (flags)					\
     },								\
     .syscall_no = SYS_##syscallname				\
 }
 #define SYSCALL_INIT_HELPER_COMPAT_F(syscallname, flags) {	\
     .new_sysent = {						\
 	.sy_narg = (sizeof(struct syscallname ## _args )	\
 	    / sizeof(register_t)),				\
 	.sy_call = (sy_call_t *)& syscallname,			\
 	.sy_auevent = SYS_AUE_##syscallname,			\
 	.sy_flags = (flags)					\
     },								\
     .syscall_no = SYS_##syscallname				\
 }
 #define SYSCALL_INIT_HELPER(syscallname)			\
     SYSCALL_INIT_HELPER_F(syscallname, 0)
 #define SYSCALL_INIT_HELPER_COMPAT(syscallname)			\
     SYSCALL_INIT_HELPER_COMPAT_F(syscallname, 0)
 #define SYSCALL_INIT_LAST {					\
     .syscall_no = NO_SYSCALL					\
 }
 
 int	syscall_module_handler(struct module *mod, int what, void *arg);
 int	syscall_helper_register(struct syscall_helper_data *sd, int flags);
 int	syscall_helper_unregister(struct syscall_helper_data *sd);
 /* Implementation, exposed for COMPAT code */
 int	kern_syscall_register(struct sysent *sysents, int *offset,
 	    struct sysent *new_sysent, struct sysent *old_sysent, int flags);
 int	kern_syscall_deregister(struct sysent *sysents, int offset,
 	    const struct sysent *old_sysent);
 int	kern_syscall_module_handler(struct sysent *sysents,
 	    struct module *mod, int what, void *arg);
 int	kern_syscall_helper_register(struct sysent *sysents,
 	    struct syscall_helper_data *sd, int flags);
 int	kern_syscall_helper_unregister(struct sysent *sysents,
 	    struct syscall_helper_data *sd);
 
 struct proc;
 const char *syscallname(struct proc *p, u_int code);
 
 /* Special purpose system call functions. */
 struct nosys_args;
 
 int	lkmnosys(struct thread *, struct nosys_args *);
 int	lkmressys(struct thread *, struct nosys_args *);
 
 int	syscall_thread_enter(struct thread *td, struct sysent *se);
 void	syscall_thread_exit(struct thread *td, struct sysent *se);
 
 int shared_page_alloc(int size, int align);
 int shared_page_fill(int size, int align, const void *data);
 void shared_page_write(int base, int size, const void *data);
 void exec_sysvec_init(void *param);
 void exec_sysvec_init_secondary(struct sysentvec *sv, struct sysentvec *sv2);
 void exec_inittk(void);
 
 void exit_onexit(struct proc *p);
 void exec_free_abi_mappings(struct proc *p);
 void exec_onexec_old(struct thread *td);
 
 #define INIT_SYSENTVEC(name, sv)					\
     SYSINIT(name, SI_SUB_EXEC, SI_ORDER_ANY,				\
 	(sysinit_cfunc_t)exec_sysvec_init, sv);
 
 #endif /* _KERNEL */
 
 #endif /* !_SYS_SYSENT_H_ */
diff --git a/sys/vm/vm_map.c b/sys/vm/vm_map.c
index 04310e42218f..a7efa00da1e2 100644
--- a/sys/vm/vm_map.c
+++ b/sys/vm/vm_map.c
@@ -1,5365 +1,5366 @@
 /*-
  * SPDX-License-Identifier: (BSD-3-Clause AND MIT-CMU)
  *
  * Copyright (c) 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * The Mach Operating System project at Carnegie-Mellon University.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from: @(#)vm_map.c	8.3 (Berkeley) 1/12/94
  *
  *
  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
  * All rights reserved.
  *
  * Authors: Avadis Tevanian, Jr., Michael Wayne Young
  *
  * Permission to use, copy, modify and distribute this software and
  * its documentation is hereby granted, provided that both the copyright
  * notice and this permission notice appear in all copies of the
  * software, derivative works or modified versions, and any portions
  * thereof, and that both notices appear in supporting documentation.
  *
  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  *
  * Carnegie Mellon requests users of this software to return to
  *
  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  *  School of Computer Science
  *  Carnegie Mellon University
  *  Pittsburgh PA 15213-3890
  *
  * any improvements or extensions that they make and grant Carnegie the
  * rights to redistribute these changes.
  */
 
 /*
  *	Virtual memory mapping module.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/elf.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/vmmeter.h>
 #include <sys/mman.h>
 #include <sys/vnode.h>
 #include <sys/racct.h>
 #include <sys/resourcevar.h>
 #include <sys/rwlock.h>
 #include <sys/file.h>
 #include <sys/sysctl.h>
 #include <sys/sysent.h>
 #include <sys/shm.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 #include <vm/vm_page.h>
 #include <vm/vm_pageout.h>
 #include <vm/vm_object.h>
 #include <vm/vm_pager.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_extern.h>
 #include <vm/vnode_pager.h>
 #include <vm/swap_pager.h>
 #include <vm/uma.h>
 
 /*
  *	Virtual memory maps provide for the mapping, protection,
  *	and sharing of virtual memory objects.  In addition,
  *	this module provides for an efficient virtual copy of
  *	memory from one map to another.
  *
  *	Synchronization is required prior to most operations.
  *
  *	Maps consist of an ordered doubly-linked list of simple
  *	entries; a self-adjusting binary search tree of these
  *	entries is used to speed up lookups.
  *
  *	Since portions of maps are specified by start/end addresses,
  *	which may not align with existing map entries, all
  *	routines merely "clip" entries to these start/end values.
  *	[That is, an entry is split into two, bordering at a
  *	start or end value.]  Note that these clippings may not
  *	always be necessary (as the two resulting entries are then
  *	not changed); however, the clipping is done for convenience.
  *
  *	As mentioned above, virtual copy operations are performed
  *	by copying VM object references from one map to
  *	another, and then marking both regions as copy-on-write.
  */
 
 static struct mtx map_sleep_mtx;
 static uma_zone_t mapentzone;
 static uma_zone_t kmapentzone;
 static uma_zone_t vmspace_zone;
 static int vmspace_zinit(void *mem, int size, int flags);
 static void _vm_map_init(vm_map_t map, pmap_t pmap, vm_offset_t min,
     vm_offset_t max);
 static void vm_map_entry_deallocate(vm_map_entry_t entry, boolean_t system_map);
 static void vm_map_entry_dispose(vm_map_t map, vm_map_entry_t entry);
 static void vm_map_entry_unwire(vm_map_t map, vm_map_entry_t entry);
 static int vm_map_growstack(vm_map_t map, vm_offset_t addr,
     vm_map_entry_t gap_entry);
 static void vm_map_pmap_enter(vm_map_t map, vm_offset_t addr, vm_prot_t prot,
     vm_object_t object, vm_pindex_t pindex, vm_size_t size, int flags);
 #ifdef INVARIANTS
 static void vmspace_zdtor(void *mem, int size, void *arg);
 #endif
 static int vm_map_stack_locked(vm_map_t map, vm_offset_t addrbos,
     vm_size_t max_ssize, vm_size_t growsize, vm_prot_t prot, vm_prot_t max,
     int cow);
 static void vm_map_wire_entry_failure(vm_map_t map, vm_map_entry_t entry,
     vm_offset_t failed_addr);
 
 #define	ENTRY_CHARGED(e) ((e)->cred != NULL || \
     ((e)->object.vm_object != NULL && (e)->object.vm_object->cred != NULL && \
      !((e)->eflags & MAP_ENTRY_NEEDS_COPY)))
 
 /* 
  * PROC_VMSPACE_{UN,}LOCK() can be a noop as long as vmspaces are type
  * stable.
  */
 #define PROC_VMSPACE_LOCK(p) do { } while (0)
 #define PROC_VMSPACE_UNLOCK(p) do { } while (0)
 
 /*
  *	VM_MAP_RANGE_CHECK:	[ internal use only ]
  *
  *	Asserts that the starting and ending region
  *	addresses fall within the valid range of the map.
  */
 #define	VM_MAP_RANGE_CHECK(map, start, end)		\
 		{					\
 		if (start < vm_map_min(map))		\
 			start = vm_map_min(map);	\
 		if (end > vm_map_max(map))		\
 			end = vm_map_max(map);		\
 		if (start > end)			\
 			start = end;			\
 		}
 
 #ifndef UMA_MD_SMALL_ALLOC
 
 /*
  * Allocate a new slab for kernel map entries.  The kernel map may be locked or
  * unlocked, depending on whether the request is coming from the kernel map or a
  * submap.  This function allocates a virtual address range directly from the
  * kernel map instead of the kmem_* layer to avoid recursion on the kernel map
  * lock and also to avoid triggering allocator recursion in the vmem boundary
  * tag allocator.
  */
 static void *
 kmapent_alloc(uma_zone_t zone, vm_size_t bytes, int domain, uint8_t *pflag,
     int wait)
 {
 	vm_offset_t addr;
 	int error, locked;
 
 	*pflag = UMA_SLAB_PRIV;
 
 	if (!(locked = vm_map_locked(kernel_map)))
 		vm_map_lock(kernel_map);
 	addr = vm_map_findspace(kernel_map, vm_map_min(kernel_map), bytes);
 	if (addr + bytes < addr || addr + bytes > vm_map_max(kernel_map))
 		panic("%s: kernel map is exhausted", __func__);
 	error = vm_map_insert(kernel_map, NULL, 0, addr, addr + bytes,
 	    VM_PROT_RW, VM_PROT_RW, MAP_NOFAULT);
 	if (error != KERN_SUCCESS)
 		panic("%s: vm_map_insert() failed: %d", __func__, error);
 	if (!locked)
 		vm_map_unlock(kernel_map);
 	error = kmem_back_domain(domain, kernel_object, addr, bytes, M_NOWAIT |
 	    M_USE_RESERVE | (wait & M_ZERO));
 	if (error == KERN_SUCCESS) {
 		return ((void *)addr);
 	} else {
 		if (!locked)
 			vm_map_lock(kernel_map);
 		vm_map_delete(kernel_map, addr, bytes);
 		if (!locked)
 			vm_map_unlock(kernel_map);
 		return (NULL);
 	}
 }
 
 static void
 kmapent_free(void *item, vm_size_t size, uint8_t pflag)
 {
 	vm_offset_t addr;
 	int error __diagused;
 
 	if ((pflag & UMA_SLAB_PRIV) == 0)
 		/* XXX leaked */
 		return;
 
 	addr = (vm_offset_t)item;
 	kmem_unback(kernel_object, addr, size);
 	error = vm_map_remove(kernel_map, addr, addr + size);
 	KASSERT(error == KERN_SUCCESS,
 	    ("%s: vm_map_remove failed: %d", __func__, error));
 }
 
 /*
  * The worst-case upper bound on the number of kernel map entries that may be
  * created before the zone must be replenished in _vm_map_unlock().
  */
 #define	KMAPENT_RESERVE		1
 
 #endif /* !UMD_MD_SMALL_ALLOC */
 
 /*
  *	vm_map_startup:
  *
  *	Initialize the vm_map module.  Must be called before any other vm_map
  *	routines.
  *
  *	User map and entry structures are allocated from the general purpose
  *	memory pool.  Kernel maps are statically defined.  Kernel map entries
  *	require special handling to avoid recursion; see the comments above
  *	kmapent_alloc() and in vm_map_entry_create().
  */
 void
 vm_map_startup(void)
 {
 	mtx_init(&map_sleep_mtx, "vm map sleep mutex", NULL, MTX_DEF);
 
 	/*
 	 * Disable the use of per-CPU buckets: map entry allocation is
 	 * serialized by the kernel map lock.
 	 */
 	kmapentzone = uma_zcreate("KMAP ENTRY", sizeof(struct vm_map_entry),
 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR,
 	    UMA_ZONE_VM | UMA_ZONE_NOBUCKET);
 #ifndef UMA_MD_SMALL_ALLOC
 	/* Reserve an extra map entry for use when replenishing the reserve. */
 	uma_zone_reserve(kmapentzone, KMAPENT_RESERVE + 1);
 	uma_prealloc(kmapentzone, KMAPENT_RESERVE + 1);
 	uma_zone_set_allocf(kmapentzone, kmapent_alloc);
 	uma_zone_set_freef(kmapentzone, kmapent_free);
 #endif
 
 	mapentzone = uma_zcreate("MAP ENTRY", sizeof(struct vm_map_entry),
 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
 	vmspace_zone = uma_zcreate("VMSPACE", sizeof(struct vmspace), NULL,
 #ifdef INVARIANTS
 	    vmspace_zdtor,
 #else
 	    NULL,
 #endif
 	    vmspace_zinit, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
 }
 
 static int
 vmspace_zinit(void *mem, int size, int flags)
 {
 	struct vmspace *vm;
 	vm_map_t map;
 
 	vm = (struct vmspace *)mem;
 	map = &vm->vm_map;
 
 	memset(map, 0, sizeof(*map));
 	mtx_init(&map->system_mtx, "vm map (system)", NULL,
 	    MTX_DEF | MTX_DUPOK);
 	sx_init(&map->lock, "vm map (user)");
 	PMAP_LOCK_INIT(vmspace_pmap(vm));
 	return (0);
 }
 
 #ifdef INVARIANTS
 static void
 vmspace_zdtor(void *mem, int size, void *arg)
 {
 	struct vmspace *vm;
 
 	vm = (struct vmspace *)mem;
 	KASSERT(vm->vm_map.nentries == 0,
 	    ("vmspace %p nentries == %d on free", vm, vm->vm_map.nentries));
 	KASSERT(vm->vm_map.size == 0,
 	    ("vmspace %p size == %ju on free", vm, (uintmax_t)vm->vm_map.size));
 }
 #endif	/* INVARIANTS */
 
 /*
  * Allocate a vmspace structure, including a vm_map and pmap,
  * and initialize those structures.  The refcnt is set to 1.
  */
 struct vmspace *
 vmspace_alloc(vm_offset_t min, vm_offset_t max, pmap_pinit_t pinit)
 {
 	struct vmspace *vm;
 
 	vm = uma_zalloc(vmspace_zone, M_WAITOK);
 	KASSERT(vm->vm_map.pmap == NULL, ("vm_map.pmap must be NULL"));
 	if (!pinit(vmspace_pmap(vm))) {
 		uma_zfree(vmspace_zone, vm);
 		return (NULL);
 	}
 	CTR1(KTR_VM, "vmspace_alloc: %p", vm);
 	_vm_map_init(&vm->vm_map, vmspace_pmap(vm), min, max);
 	refcount_init(&vm->vm_refcnt, 1);
 	vm->vm_shm = NULL;
 	vm->vm_swrss = 0;
 	vm->vm_tsize = 0;
 	vm->vm_dsize = 0;
 	vm->vm_ssize = 0;
 	vm->vm_taddr = 0;
 	vm->vm_daddr = 0;
 	vm->vm_maxsaddr = 0;
 	return (vm);
 }
 
 #ifdef RACCT
 static void
 vmspace_container_reset(struct proc *p)
 {
 
 	PROC_LOCK(p);
 	racct_set(p, RACCT_DATA, 0);
 	racct_set(p, RACCT_STACK, 0);
 	racct_set(p, RACCT_RSS, 0);
 	racct_set(p, RACCT_MEMLOCK, 0);
 	racct_set(p, RACCT_VMEM, 0);
 	PROC_UNLOCK(p);
 }
 #endif
 
 static inline void
 vmspace_dofree(struct vmspace *vm)
 {
 
 	CTR1(KTR_VM, "vmspace_free: %p", vm);
 
 	/*
 	 * Make sure any SysV shm is freed, it might not have been in
 	 * exit1().
 	 */
 	shmexit(vm);
 
 	/*
 	 * Lock the map, to wait out all other references to it.
 	 * Delete all of the mappings and pages they hold, then call
 	 * the pmap module to reclaim anything left.
 	 */
 	(void)vm_map_remove(&vm->vm_map, vm_map_min(&vm->vm_map),
 	    vm_map_max(&vm->vm_map));
 
 	pmap_release(vmspace_pmap(vm));
 	vm->vm_map.pmap = NULL;
 	uma_zfree(vmspace_zone, vm);
 }
 
 void
 vmspace_free(struct vmspace *vm)
 {
 
 	WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
 	    "vmspace_free() called");
 
 	if (refcount_release(&vm->vm_refcnt))
 		vmspace_dofree(vm);
 }
 
 void
 vmspace_exitfree(struct proc *p)
 {
 	struct vmspace *vm;
 
 	PROC_VMSPACE_LOCK(p);
 	vm = p->p_vmspace;
 	p->p_vmspace = NULL;
 	PROC_VMSPACE_UNLOCK(p);
 	KASSERT(vm == &vmspace0, ("vmspace_exitfree: wrong vmspace"));
 	vmspace_free(vm);
 }
 
 void
 vmspace_exit(struct thread *td)
 {
 	struct vmspace *vm;
 	struct proc *p;
 	bool released;
 
 	p = td->td_proc;
 	vm = p->p_vmspace;
 
 	/*
 	 * Prepare to release the vmspace reference.  The thread that releases
 	 * the last reference is responsible for tearing down the vmspace.
 	 * However, threads not releasing the final reference must switch to the
 	 * kernel's vmspace0 before the decrement so that the subsequent pmap
 	 * deactivation does not modify a freed vmspace.
 	 */
 	refcount_acquire(&vmspace0.vm_refcnt);
 	if (!(released = refcount_release_if_last(&vm->vm_refcnt))) {
 		if (p->p_vmspace != &vmspace0) {
 			PROC_VMSPACE_LOCK(p);
 			p->p_vmspace = &vmspace0;
 			PROC_VMSPACE_UNLOCK(p);
 			pmap_activate(td);
 		}
 		released = refcount_release(&vm->vm_refcnt);
 	}
 	if (released) {
 		/*
 		 * pmap_remove_pages() expects the pmap to be active, so switch
 		 * back first if necessary.
 		 */
 		if (p->p_vmspace != vm) {
 			PROC_VMSPACE_LOCK(p);
 			p->p_vmspace = vm;
 			PROC_VMSPACE_UNLOCK(p);
 			pmap_activate(td);
 		}
 		pmap_remove_pages(vmspace_pmap(vm));
 		PROC_VMSPACE_LOCK(p);
 		p->p_vmspace = &vmspace0;
 		PROC_VMSPACE_UNLOCK(p);
 		pmap_activate(td);
 		vmspace_dofree(vm);
 	}
 #ifdef RACCT
 	if (racct_enable)
 		vmspace_container_reset(p);
 #endif
 }
 
 /* Acquire reference to vmspace owned by another process. */
 
 struct vmspace *
 vmspace_acquire_ref(struct proc *p)
 {
 	struct vmspace *vm;
 
 	PROC_VMSPACE_LOCK(p);
 	vm = p->p_vmspace;
 	if (vm == NULL || !refcount_acquire_if_not_zero(&vm->vm_refcnt)) {
 		PROC_VMSPACE_UNLOCK(p);
 		return (NULL);
 	}
 	if (vm != p->p_vmspace) {
 		PROC_VMSPACE_UNLOCK(p);
 		vmspace_free(vm);
 		return (NULL);
 	}
 	PROC_VMSPACE_UNLOCK(p);
 	return (vm);
 }
 
 /*
  * Switch between vmspaces in an AIO kernel process.
  *
  * The new vmspace is either the vmspace of a user process obtained
  * from an active AIO request or the initial vmspace of the AIO kernel
  * process (when it is idling).  Because user processes will block to
  * drain any active AIO requests before proceeding in exit() or
  * execve(), the reference count for vmspaces from AIO requests can
  * never be 0.  Similarly, AIO kernel processes hold an extra
  * reference on their initial vmspace for the life of the process.  As
  * a result, the 'newvm' vmspace always has a non-zero reference
  * count.  This permits an additional reference on 'newvm' to be
  * acquired via a simple atomic increment rather than the loop in
  * vmspace_acquire_ref() above.
  */
 void
 vmspace_switch_aio(struct vmspace *newvm)
 {
 	struct vmspace *oldvm;
 
 	/* XXX: Need some way to assert that this is an aio daemon. */
 
 	KASSERT(refcount_load(&newvm->vm_refcnt) > 0,
 	    ("vmspace_switch_aio: newvm unreferenced"));
 
 	oldvm = curproc->p_vmspace;
 	if (oldvm == newvm)
 		return;
 
 	/*
 	 * Point to the new address space and refer to it.
 	 */
 	curproc->p_vmspace = newvm;
 	refcount_acquire(&newvm->vm_refcnt);
 
 	/* Activate the new mapping. */
 	pmap_activate(curthread);
 
 	vmspace_free(oldvm);
 }
 
 void
 _vm_map_lock(vm_map_t map, const char *file, int line)
 {
 
 	if (map->system_map)
 		mtx_lock_flags_(&map->system_mtx, 0, file, line);
 	else
 		sx_xlock_(&map->lock, file, line);
 	map->timestamp++;
 }
 
 void
 vm_map_entry_set_vnode_text(vm_map_entry_t entry, bool add)
 {
 	vm_object_t object;
 	struct vnode *vp;
 	bool vp_held;
 
 	if ((entry->eflags & MAP_ENTRY_VN_EXEC) == 0)
 		return;
 	KASSERT((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0,
 	    ("Submap with execs"));
 	object = entry->object.vm_object;
 	KASSERT(object != NULL, ("No object for text, entry %p", entry));
 	if ((object->flags & OBJ_ANON) != 0)
 		object = object->handle;
 	else
 		KASSERT(object->backing_object == NULL,
 		    ("non-anon object %p shadows", object));
 	KASSERT(object != NULL, ("No content object for text, entry %p obj %p",
 	    entry, entry->object.vm_object));
 
 	/*
 	 * Mostly, we do not lock the backing object.  It is
 	 * referenced by the entry we are processing, so it cannot go
 	 * away.
 	 */
 	vm_pager_getvp(object, &vp, &vp_held);
 	if (vp != NULL) {
 		if (add) {
 			VOP_SET_TEXT_CHECKED(vp);
 		} else {
 			vn_lock(vp, LK_SHARED | LK_RETRY);
 			VOP_UNSET_TEXT_CHECKED(vp);
 			VOP_UNLOCK(vp);
 		}
 		if (vp_held)
 			vdrop(vp);
 	}
 }
 
 /*
  * Use a different name for this vm_map_entry field when it's use
  * is not consistent with its use as part of an ordered search tree.
  */
 #define defer_next right
 
 static void
 vm_map_process_deferred(void)
 {
 	struct thread *td;
 	vm_map_entry_t entry, next;
 	vm_object_t object;
 
 	td = curthread;
 	entry = td->td_map_def_user;
 	td->td_map_def_user = NULL;
 	while (entry != NULL) {
 		next = entry->defer_next;
 		MPASS((entry->eflags & (MAP_ENTRY_WRITECNT |
 		    MAP_ENTRY_VN_EXEC)) != (MAP_ENTRY_WRITECNT |
 		    MAP_ENTRY_VN_EXEC));
 		if ((entry->eflags & MAP_ENTRY_WRITECNT) != 0) {
 			/*
 			 * Decrement the object's writemappings and
 			 * possibly the vnode's v_writecount.
 			 */
 			KASSERT((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0,
 			    ("Submap with writecount"));
 			object = entry->object.vm_object;
 			KASSERT(object != NULL, ("No object for writecount"));
 			vm_pager_release_writecount(object, entry->start,
 			    entry->end);
 		}
 		vm_map_entry_set_vnode_text(entry, false);
 		vm_map_entry_deallocate(entry, FALSE);
 		entry = next;
 	}
 }
 
 #ifdef INVARIANTS
 static void
 _vm_map_assert_locked(vm_map_t map, const char *file, int line)
 {
 
 	if (map->system_map)
 		mtx_assert_(&map->system_mtx, MA_OWNED, file, line);
 	else
 		sx_assert_(&map->lock, SA_XLOCKED, file, line);
 }
 
 #define	VM_MAP_ASSERT_LOCKED(map) \
     _vm_map_assert_locked(map, LOCK_FILE, LOCK_LINE)
 
 enum { VMMAP_CHECK_NONE, VMMAP_CHECK_UNLOCK, VMMAP_CHECK_ALL };
 #ifdef DIAGNOSTIC
 static int enable_vmmap_check = VMMAP_CHECK_UNLOCK;
 #else
 static int enable_vmmap_check = VMMAP_CHECK_NONE;
 #endif
 SYSCTL_INT(_debug, OID_AUTO, vmmap_check, CTLFLAG_RWTUN,
     &enable_vmmap_check, 0, "Enable vm map consistency checking");
 
 static void _vm_map_assert_consistent(vm_map_t map, int check);
 
 #define VM_MAP_ASSERT_CONSISTENT(map) \
     _vm_map_assert_consistent(map, VMMAP_CHECK_ALL)
 #ifdef DIAGNOSTIC
 #define VM_MAP_UNLOCK_CONSISTENT(map) do {				\
 	if (map->nupdates > map->nentries) {				\
 		_vm_map_assert_consistent(map, VMMAP_CHECK_UNLOCK);	\
 		map->nupdates = 0;					\
 	}								\
 } while (0)
 #else
 #define VM_MAP_UNLOCK_CONSISTENT(map)
 #endif
 #else
 #define	VM_MAP_ASSERT_LOCKED(map)
 #define VM_MAP_ASSERT_CONSISTENT(map)
 #define VM_MAP_UNLOCK_CONSISTENT(map)
 #endif /* INVARIANTS */
 
 void
 _vm_map_unlock(vm_map_t map, const char *file, int line)
 {
 
 	VM_MAP_UNLOCK_CONSISTENT(map);
 	if (map->system_map) {
 #ifndef UMA_MD_SMALL_ALLOC
 		if (map == kernel_map && (map->flags & MAP_REPLENISH) != 0) {
 			uma_prealloc(kmapentzone, 1);
 			map->flags &= ~MAP_REPLENISH;
 		}
 #endif
 		mtx_unlock_flags_(&map->system_mtx, 0, file, line);
 	} else {
 		sx_xunlock_(&map->lock, file, line);
 		vm_map_process_deferred();
 	}
 }
 
 void
 _vm_map_lock_read(vm_map_t map, const char *file, int line)
 {
 
 	if (map->system_map)
 		mtx_lock_flags_(&map->system_mtx, 0, file, line);
 	else
 		sx_slock_(&map->lock, file, line);
 }
 
 void
 _vm_map_unlock_read(vm_map_t map, const char *file, int line)
 {
 
 	if (map->system_map) {
 		KASSERT((map->flags & MAP_REPLENISH) == 0,
 		    ("%s: MAP_REPLENISH leaked", __func__));
 		mtx_unlock_flags_(&map->system_mtx, 0, file, line);
 	} else {
 		sx_sunlock_(&map->lock, file, line);
 		vm_map_process_deferred();
 	}
 }
 
 int
 _vm_map_trylock(vm_map_t map, const char *file, int line)
 {
 	int error;
 
 	error = map->system_map ?
 	    !mtx_trylock_flags_(&map->system_mtx, 0, file, line) :
 	    !sx_try_xlock_(&map->lock, file, line);
 	if (error == 0)
 		map->timestamp++;
 	return (error == 0);
 }
 
 int
 _vm_map_trylock_read(vm_map_t map, const char *file, int line)
 {
 	int error;
 
 	error = map->system_map ?
 	    !mtx_trylock_flags_(&map->system_mtx, 0, file, line) :
 	    !sx_try_slock_(&map->lock, file, line);
 	return (error == 0);
 }
 
 /*
  *	_vm_map_lock_upgrade:	[ internal use only ]
  *
  *	Tries to upgrade a read (shared) lock on the specified map to a write
  *	(exclusive) lock.  Returns the value "0" if the upgrade succeeds and a
  *	non-zero value if the upgrade fails.  If the upgrade fails, the map is
  *	returned without a read or write lock held.
  *
  *	Requires that the map be read locked.
  */
 int
 _vm_map_lock_upgrade(vm_map_t map, const char *file, int line)
 {
 	unsigned int last_timestamp;
 
 	if (map->system_map) {
 		mtx_assert_(&map->system_mtx, MA_OWNED, file, line);
 	} else {
 		if (!sx_try_upgrade_(&map->lock, file, line)) {
 			last_timestamp = map->timestamp;
 			sx_sunlock_(&map->lock, file, line);
 			vm_map_process_deferred();
 			/*
 			 * If the map's timestamp does not change while the
 			 * map is unlocked, then the upgrade succeeds.
 			 */
 			sx_xlock_(&map->lock, file, line);
 			if (last_timestamp != map->timestamp) {
 				sx_xunlock_(&map->lock, file, line);
 				return (1);
 			}
 		}
 	}
 	map->timestamp++;
 	return (0);
 }
 
 void
 _vm_map_lock_downgrade(vm_map_t map, const char *file, int line)
 {
 
 	if (map->system_map) {
 		KASSERT((map->flags & MAP_REPLENISH) == 0,
 		    ("%s: MAP_REPLENISH leaked", __func__));
 		mtx_assert_(&map->system_mtx, MA_OWNED, file, line);
 	} else {
 		VM_MAP_UNLOCK_CONSISTENT(map);
 		sx_downgrade_(&map->lock, file, line);
 	}
 }
 
 /*
  *	vm_map_locked:
  *
  *	Returns a non-zero value if the caller holds a write (exclusive) lock
  *	on the specified map and the value "0" otherwise.
  */
 int
 vm_map_locked(vm_map_t map)
 {
 
 	if (map->system_map)
 		return (mtx_owned(&map->system_mtx));
 	else
 		return (sx_xlocked(&map->lock));
 }
 
 /*
  *	_vm_map_unlock_and_wait:
  *
  *	Atomically releases the lock on the specified map and puts the calling
  *	thread to sleep.  The calling thread will remain asleep until either
  *	vm_map_wakeup() is performed on the map or the specified timeout is
  *	exceeded.
  *
  *	WARNING!  This function does not perform deferred deallocations of
  *	objects and map	entries.  Therefore, the calling thread is expected to
  *	reacquire the map lock after reawakening and later perform an ordinary
  *	unlock operation, such as vm_map_unlock(), before completing its
  *	operation on the map.
  */
 int
 _vm_map_unlock_and_wait(vm_map_t map, int timo, const char *file, int line)
 {
 
 	VM_MAP_UNLOCK_CONSISTENT(map);
 	mtx_lock(&map_sleep_mtx);
 	if (map->system_map) {
 		KASSERT((map->flags & MAP_REPLENISH) == 0,
 		    ("%s: MAP_REPLENISH leaked", __func__));
 		mtx_unlock_flags_(&map->system_mtx, 0, file, line);
 	} else {
 		sx_xunlock_(&map->lock, file, line);
 	}
 	return (msleep(&map->root, &map_sleep_mtx, PDROP | PVM, "vmmaps",
 	    timo));
 }
 
 /*
  *	vm_map_wakeup:
  *
  *	Awaken any threads that have slept on the map using
  *	vm_map_unlock_and_wait().
  */
 void
 vm_map_wakeup(vm_map_t map)
 {
 
 	/*
 	 * Acquire and release map_sleep_mtx to prevent a wakeup()
 	 * from being performed (and lost) between the map unlock
 	 * and the msleep() in _vm_map_unlock_and_wait().
 	 */
 	mtx_lock(&map_sleep_mtx);
 	mtx_unlock(&map_sleep_mtx);
 	wakeup(&map->root);
 }
 
 void
 vm_map_busy(vm_map_t map)
 {
 
 	VM_MAP_ASSERT_LOCKED(map);
 	map->busy++;
 }
 
 void
 vm_map_unbusy(vm_map_t map)
 {
 
 	VM_MAP_ASSERT_LOCKED(map);
 	KASSERT(map->busy, ("vm_map_unbusy: not busy"));
 	if (--map->busy == 0 && (map->flags & MAP_BUSY_WAKEUP)) {
 		vm_map_modflags(map, 0, MAP_BUSY_WAKEUP);
 		wakeup(&map->busy);
 	}
 }
 
 void 
 vm_map_wait_busy(vm_map_t map)
 {
 
 	VM_MAP_ASSERT_LOCKED(map);
 	while (map->busy) {
 		vm_map_modflags(map, MAP_BUSY_WAKEUP, 0);
 		if (map->system_map)
 			msleep(&map->busy, &map->system_mtx, 0, "mbusy", 0);
 		else
 			sx_sleep(&map->busy, &map->lock, 0, "mbusy", 0);
 	}
 	map->timestamp++;
 }
 
 long
 vmspace_resident_count(struct vmspace *vmspace)
 {
 	return pmap_resident_count(vmspace_pmap(vmspace));
 }
 
 /*
  * Initialize an existing vm_map structure
  * such as that in the vmspace structure.
  */
 static void
 _vm_map_init(vm_map_t map, pmap_t pmap, vm_offset_t min, vm_offset_t max)
 {
 
 	map->header.eflags = MAP_ENTRY_HEADER;
 	map->needs_wakeup = FALSE;
 	map->system_map = 0;
 	map->pmap = pmap;
 	map->header.end = min;
 	map->header.start = max;
 	map->flags = 0;
 	map->header.left = map->header.right = &map->header;
 	map->root = NULL;
 	map->timestamp = 0;
 	map->busy = 0;
 	map->anon_loc = 0;
 #ifdef DIAGNOSTIC
 	map->nupdates = 0;
 #endif
 }
 
 void
 vm_map_init(vm_map_t map, pmap_t pmap, vm_offset_t min, vm_offset_t max)
 {
 
 	_vm_map_init(map, pmap, min, max);
 	mtx_init(&map->system_mtx, "vm map (system)", NULL,
 	    MTX_DEF | MTX_DUPOK);
 	sx_init(&map->lock, "vm map (user)");
 }
 
 /*
  *	vm_map_entry_dispose:	[ internal use only ]
  *
  *	Inverse of vm_map_entry_create.
  */
 static void
 vm_map_entry_dispose(vm_map_t map, vm_map_entry_t entry)
 {
 	uma_zfree(map->system_map ? kmapentzone : mapentzone, entry);
 }
 
 /*
  *	vm_map_entry_create:	[ internal use only ]
  *
  *	Allocates a VM map entry for insertion.
  *	No entry fields are filled in.
  */
 static vm_map_entry_t
 vm_map_entry_create(vm_map_t map)
 {
 	vm_map_entry_t new_entry;
 
 #ifndef UMA_MD_SMALL_ALLOC
 	if (map == kernel_map) {
 		VM_MAP_ASSERT_LOCKED(map);
 
 		/*
 		 * A new slab of kernel map entries cannot be allocated at this
 		 * point because the kernel map has not yet been updated to
 		 * reflect the caller's request.  Therefore, we allocate a new
 		 * map entry, dipping into the reserve if necessary, and set a
 		 * flag indicating that the reserve must be replenished before
 		 * the map is unlocked.
 		 */
 		new_entry = uma_zalloc(kmapentzone, M_NOWAIT | M_NOVM);
 		if (new_entry == NULL) {
 			new_entry = uma_zalloc(kmapentzone,
 			    M_NOWAIT | M_NOVM | M_USE_RESERVE);
 			kernel_map->flags |= MAP_REPLENISH;
 		}
 	} else
 #endif
 	if (map->system_map) {
 		new_entry = uma_zalloc(kmapentzone, M_NOWAIT);
 	} else {
 		new_entry = uma_zalloc(mapentzone, M_WAITOK);
 	}
 	KASSERT(new_entry != NULL,
 	    ("vm_map_entry_create: kernel resources exhausted"));
 	return (new_entry);
 }
 
 /*
  *	vm_map_entry_set_behavior:
  *
  *	Set the expected access behavior, either normal, random, or
  *	sequential.
  */
 static inline void
 vm_map_entry_set_behavior(vm_map_entry_t entry, u_char behavior)
 {
 	entry->eflags = (entry->eflags & ~MAP_ENTRY_BEHAV_MASK) |
 	    (behavior & MAP_ENTRY_BEHAV_MASK);
 }
 
 /*
  *	vm_map_entry_max_free_{left,right}:
  *
  *	Compute the size of the largest free gap between two entries,
  *	one the root of a tree and the other the ancestor of that root
  *	that is the least or greatest ancestor found on the search path.
  */
 static inline vm_size_t
 vm_map_entry_max_free_left(vm_map_entry_t root, vm_map_entry_t left_ancestor)
 {
 
 	return (root->left != left_ancestor ?
 	    root->left->max_free : root->start - left_ancestor->end);
 }
 
 static inline vm_size_t
 vm_map_entry_max_free_right(vm_map_entry_t root, vm_map_entry_t right_ancestor)
 {
 
 	return (root->right != right_ancestor ?
 	    root->right->max_free : right_ancestor->start - root->end);
 }
 
 /*
  *	vm_map_entry_{pred,succ}:
  *
  *	Find the {predecessor, successor} of the entry by taking one step
  *	in the appropriate direction and backtracking as much as necessary.
  *	vm_map_entry_succ is defined in vm_map.h.
  */
 static inline vm_map_entry_t
 vm_map_entry_pred(vm_map_entry_t entry)
 {
 	vm_map_entry_t prior;
 
 	prior = entry->left;
 	if (prior->right->start < entry->start) {
 		do
 			prior = prior->right;
 		while (prior->right != entry);
 	}
 	return (prior);
 }
 
 static inline vm_size_t
 vm_size_max(vm_size_t a, vm_size_t b)
 {
 
 	return (a > b ? a : b);
 }
 
 #define SPLAY_LEFT_STEP(root, y, llist, rlist, test) do {		\
 	vm_map_entry_t z;						\
 	vm_size_t max_free;						\
 									\
 	/*								\
 	 * Infer root->right->max_free == root->max_free when		\
 	 * y->max_free < root->max_free || root->max_free == 0.		\
 	 * Otherwise, look right to find it.				\
 	 */								\
 	y = root->left;							\
 	max_free = root->max_free;					\
 	KASSERT(max_free == vm_size_max(				\
 	    vm_map_entry_max_free_left(root, llist),			\
 	    vm_map_entry_max_free_right(root, rlist)),			\
 	    ("%s: max_free invariant fails", __func__));		\
 	if (max_free - 1 < vm_map_entry_max_free_left(root, llist))	\
 		max_free = vm_map_entry_max_free_right(root, rlist);	\
 	if (y != llist && (test)) {					\
 		/* Rotate right and make y root. */			\
 		z = y->right;						\
 		if (z != root) {					\
 			root->left = z;					\
 			y->right = root;				\
 			if (max_free < y->max_free)			\
 			    root->max_free = max_free =			\
 			    vm_size_max(max_free, z->max_free);		\
 		} else if (max_free < y->max_free)			\
 			root->max_free = max_free =			\
 			    vm_size_max(max_free, root->start - y->end);\
 		root = y;						\
 		y = root->left;						\
 	}								\
 	/* Copy right->max_free.  Put root on rlist. */			\
 	root->max_free = max_free;					\
 	KASSERT(max_free == vm_map_entry_max_free_right(root, rlist),	\
 	    ("%s: max_free not copied from right", __func__));		\
 	root->left = rlist;						\
 	rlist = root;							\
 	root = y != llist ? y : NULL;					\
 } while (0)
 
 #define SPLAY_RIGHT_STEP(root, y, llist, rlist, test) do {		\
 	vm_map_entry_t z;						\
 	vm_size_t max_free;						\
 									\
 	/*								\
 	 * Infer root->left->max_free == root->max_free when		\
 	 * y->max_free < root->max_free || root->max_free == 0.		\
 	 * Otherwise, look left to find it.				\
 	 */								\
 	y = root->right;						\
 	max_free = root->max_free;					\
 	KASSERT(max_free == vm_size_max(				\
 	    vm_map_entry_max_free_left(root, llist),			\
 	    vm_map_entry_max_free_right(root, rlist)),			\
 	    ("%s: max_free invariant fails", __func__));		\
 	if (max_free - 1 < vm_map_entry_max_free_right(root, rlist))	\
 		max_free = vm_map_entry_max_free_left(root, llist);	\
 	if (y != rlist && (test)) {					\
 		/* Rotate left and make y root. */			\
 		z = y->left;						\
 		if (z != root) {					\
 			root->right = z;				\
 			y->left = root;					\
 			if (max_free < y->max_free)			\
 			    root->max_free = max_free =			\
 			    vm_size_max(max_free, z->max_free);		\
 		} else if (max_free < y->max_free)			\
 			root->max_free = max_free =			\
 			    vm_size_max(max_free, y->start - root->end);\
 		root = y;						\
 		y = root->right;					\
 	}								\
 	/* Copy left->max_free.  Put root on llist. */			\
 	root->max_free = max_free;					\
 	KASSERT(max_free == vm_map_entry_max_free_left(root, llist),	\
 	    ("%s: max_free not copied from left", __func__));		\
 	root->right = llist;						\
 	llist = root;							\
 	root = y != rlist ? y : NULL;					\
 } while (0)
 
 /*
  * Walk down the tree until we find addr or a gap where addr would go, breaking
  * off left and right subtrees of nodes less than, or greater than addr.  Treat
  * subtrees with root->max_free < length as empty trees.  llist and rlist are
  * the two sides in reverse order (bottom-up), with llist linked by the right
  * pointer and rlist linked by the left pointer in the vm_map_entry, and both
  * lists terminated by &map->header.  This function, and the subsequent call to
  * vm_map_splay_merge_{left,right,pred,succ}, rely on the start and end address
  * values in &map->header.
  */
 static __always_inline vm_map_entry_t
 vm_map_splay_split(vm_map_t map, vm_offset_t addr, vm_size_t length,
     vm_map_entry_t *llist, vm_map_entry_t *rlist)
 {
 	vm_map_entry_t left, right, root, y;
 
 	left = right = &map->header;
 	root = map->root;
 	while (root != NULL && root->max_free >= length) {
 		KASSERT(left->end <= root->start &&
 		    root->end <= right->start,
 		    ("%s: root not within tree bounds", __func__));
 		if (addr < root->start) {
 			SPLAY_LEFT_STEP(root, y, left, right,
 			    y->max_free >= length && addr < y->start);
 		} else if (addr >= root->end) {
 			SPLAY_RIGHT_STEP(root, y, left, right,
 			    y->max_free >= length && addr >= y->end);
 		} else
 			break;
 	}
 	*llist = left;
 	*rlist = right;
 	return (root);
 }
 
 static __always_inline void
 vm_map_splay_findnext(vm_map_entry_t root, vm_map_entry_t *rlist)
 {
 	vm_map_entry_t hi, right, y;
 
 	right = *rlist;
 	hi = root->right == right ? NULL : root->right;
 	if (hi == NULL)
 		return;
 	do
 		SPLAY_LEFT_STEP(hi, y, root, right, true);
 	while (hi != NULL);
 	*rlist = right;
 }
 
 static __always_inline void
 vm_map_splay_findprev(vm_map_entry_t root, vm_map_entry_t *llist)
 {
 	vm_map_entry_t left, lo, y;
 
 	left = *llist;
 	lo = root->left == left ? NULL : root->left;
 	if (lo == NULL)
 		return;
 	do
 		SPLAY_RIGHT_STEP(lo, y, left, root, true);
 	while (lo != NULL);
 	*llist = left;
 }
 
 static inline void
 vm_map_entry_swap(vm_map_entry_t *a, vm_map_entry_t *b)
 {
 	vm_map_entry_t tmp;
 
 	tmp = *b;
 	*b = *a;
 	*a = tmp;
 }
 
 /*
  * Walk back up the two spines, flip the pointers and set max_free.  The
  * subtrees of the root go at the bottom of llist and rlist.
  */
 static vm_size_t
 vm_map_splay_merge_left_walk(vm_map_entry_t header, vm_map_entry_t root,
     vm_map_entry_t tail, vm_size_t max_free, vm_map_entry_t llist)
 {
 	do {
 		/*
 		 * The max_free values of the children of llist are in
 		 * llist->max_free and max_free.  Update with the
 		 * max value.
 		 */
 		llist->max_free = max_free =
 		    vm_size_max(llist->max_free, max_free);
 		vm_map_entry_swap(&llist->right, &tail);
 		vm_map_entry_swap(&tail, &llist);
 	} while (llist != header);
 	root->left = tail;
 	return (max_free);
 }
 
 /*
  * When llist is known to be the predecessor of root.
  */
 static inline vm_size_t
 vm_map_splay_merge_pred(vm_map_entry_t header, vm_map_entry_t root,
     vm_map_entry_t llist)
 {
 	vm_size_t max_free;
 
 	max_free = root->start - llist->end;
 	if (llist != header) {
 		max_free = vm_map_splay_merge_left_walk(header, root,
 		    root, max_free, llist);
 	} else {
 		root->left = header;
 		header->right = root;
 	}
 	return (max_free);
 }
 
 /*
  * When llist may or may not be the predecessor of root.
  */
 static inline vm_size_t
 vm_map_splay_merge_left(vm_map_entry_t header, vm_map_entry_t root,
     vm_map_entry_t llist)
 {
 	vm_size_t max_free;
 
 	max_free = vm_map_entry_max_free_left(root, llist);
 	if (llist != header) {
 		max_free = vm_map_splay_merge_left_walk(header, root,
 		    root->left == llist ? root : root->left,
 		    max_free, llist);
 	}
 	return (max_free);
 }
 
 static vm_size_t
 vm_map_splay_merge_right_walk(vm_map_entry_t header, vm_map_entry_t root,
     vm_map_entry_t tail, vm_size_t max_free, vm_map_entry_t rlist)
 {
 	do {
 		/*
 		 * The max_free values of the children of rlist are in
 		 * rlist->max_free and max_free.  Update with the
 		 * max value.
 		 */
 		rlist->max_free = max_free =
 		    vm_size_max(rlist->max_free, max_free);
 		vm_map_entry_swap(&rlist->left, &tail);
 		vm_map_entry_swap(&tail, &rlist);
 	} while (rlist != header);
 	root->right = tail;
 	return (max_free);
 }
 
 /*
  * When rlist is known to be the succecessor of root.
  */
 static inline vm_size_t
 vm_map_splay_merge_succ(vm_map_entry_t header, vm_map_entry_t root,
     vm_map_entry_t rlist)
 {
 	vm_size_t max_free;
 
 	max_free = rlist->start - root->end;
 	if (rlist != header) {
 		max_free = vm_map_splay_merge_right_walk(header, root,
 		    root, max_free, rlist);
 	} else {
 		root->right = header;
 		header->left = root;
 	}
 	return (max_free);
 }
 
 /*
  * When rlist may or may not be the succecessor of root.
  */
 static inline vm_size_t
 vm_map_splay_merge_right(vm_map_entry_t header, vm_map_entry_t root,
     vm_map_entry_t rlist)
 {
 	vm_size_t max_free;
 
 	max_free = vm_map_entry_max_free_right(root, rlist);
 	if (rlist != header) {
 		max_free = vm_map_splay_merge_right_walk(header, root,
 		    root->right == rlist ? root : root->right,
 		    max_free, rlist);
 	}
 	return (max_free);
 }
 
 /*
  *	vm_map_splay:
  *
  *	The Sleator and Tarjan top-down splay algorithm with the
  *	following variation.  Max_free must be computed bottom-up, so
  *	on the downward pass, maintain the left and right spines in
  *	reverse order.  Then, make a second pass up each side to fix
  *	the pointers and compute max_free.  The time bound is O(log n)
  *	amortized.
  *
  *	The tree is threaded, which means that there are no null pointers.
  *	When a node has no left child, its left pointer points to its
  *	predecessor, which the last ancestor on the search path from the root
  *	where the search branched right.  Likewise, when a node has no right
  *	child, its right pointer points to its successor.  The map header node
  *	is the predecessor of the first map entry, and the successor of the
  *	last.
  *
  *	The new root is the vm_map_entry containing "addr", or else an
  *	adjacent entry (lower if possible) if addr is not in the tree.
  *
  *	The map must be locked, and leaves it so.
  *
  *	Returns: the new root.
  */
 static vm_map_entry_t
 vm_map_splay(vm_map_t map, vm_offset_t addr)
 {
 	vm_map_entry_t header, llist, rlist, root;
 	vm_size_t max_free_left, max_free_right;
 
 	header = &map->header;
 	root = vm_map_splay_split(map, addr, 0, &llist, &rlist);
 	if (root != NULL) {
 		max_free_left = vm_map_splay_merge_left(header, root, llist);
 		max_free_right = vm_map_splay_merge_right(header, root, rlist);
 	} else if (llist != header) {
 		/*
 		 * Recover the greatest node in the left
 		 * subtree and make it the root.
 		 */
 		root = llist;
 		llist = root->right;
 		max_free_left = vm_map_splay_merge_left(header, root, llist);
 		max_free_right = vm_map_splay_merge_succ(header, root, rlist);
 	} else if (rlist != header) {
 		/*
 		 * Recover the least node in the right
 		 * subtree and make it the root.
 		 */
 		root = rlist;
 		rlist = root->left;
 		max_free_left = vm_map_splay_merge_pred(header, root, llist);
 		max_free_right = vm_map_splay_merge_right(header, root, rlist);
 	} else {
 		/* There is no root. */
 		return (NULL);
 	}
 	root->max_free = vm_size_max(max_free_left, max_free_right);
 	map->root = root;
 	VM_MAP_ASSERT_CONSISTENT(map);
 	return (root);
 }
 
 /*
  *	vm_map_entry_{un,}link:
  *
  *	Insert/remove entries from maps.  On linking, if new entry clips
  *	existing entry, trim existing entry to avoid overlap, and manage
  *	offsets.  On unlinking, merge disappearing entry with neighbor, if
  *	called for, and manage offsets.  Callers should not modify fields in
  *	entries already mapped.
  */
 static void
 vm_map_entry_link(vm_map_t map, vm_map_entry_t entry)
 {
 	vm_map_entry_t header, llist, rlist, root;
 	vm_size_t max_free_left, max_free_right;
 
 	CTR3(KTR_VM,
 	    "vm_map_entry_link: map %p, nentries %d, entry %p", map,
 	    map->nentries, entry);
 	VM_MAP_ASSERT_LOCKED(map);
 	map->nentries++;
 	header = &map->header;
 	root = vm_map_splay_split(map, entry->start, 0, &llist, &rlist);
 	if (root == NULL) {
 		/*
 		 * The new entry does not overlap any existing entry in the
 		 * map, so it becomes the new root of the map tree.
 		 */
 		max_free_left = vm_map_splay_merge_pred(header, entry, llist);
 		max_free_right = vm_map_splay_merge_succ(header, entry, rlist);
 	} else if (entry->start == root->start) {
 		/*
 		 * The new entry is a clone of root, with only the end field
 		 * changed.  The root entry will be shrunk to abut the new
 		 * entry, and will be the right child of the new root entry in
 		 * the modified map.
 		 */
 		KASSERT(entry->end < root->end,
 		    ("%s: clip_start not within entry", __func__));
 		vm_map_splay_findprev(root, &llist);
 		root->offset += entry->end - root->start;
 		root->start = entry->end;
 		max_free_left = vm_map_splay_merge_pred(header, entry, llist);
 		max_free_right = root->max_free = vm_size_max(
 		    vm_map_splay_merge_pred(entry, root, entry),
 		    vm_map_splay_merge_right(header, root, rlist));
 	} else {
 		/*
 		 * The new entry is a clone of root, with only the start field
 		 * changed.  The root entry will be shrunk to abut the new
 		 * entry, and will be the left child of the new root entry in
 		 * the modified map.
 		 */
 		KASSERT(entry->end == root->end,
 		    ("%s: clip_start not within entry", __func__));
 		vm_map_splay_findnext(root, &rlist);
 		entry->offset += entry->start - root->start;
 		root->end = entry->start;
 		max_free_left = root->max_free = vm_size_max(
 		    vm_map_splay_merge_left(header, root, llist),
 		    vm_map_splay_merge_succ(entry, root, entry));
 		max_free_right = vm_map_splay_merge_succ(header, entry, rlist);
 	}
 	entry->max_free = vm_size_max(max_free_left, max_free_right);
 	map->root = entry;
 	VM_MAP_ASSERT_CONSISTENT(map);
 }
 
 enum unlink_merge_type {
 	UNLINK_MERGE_NONE,
 	UNLINK_MERGE_NEXT
 };
 
 static void
 vm_map_entry_unlink(vm_map_t map, vm_map_entry_t entry,
     enum unlink_merge_type op)
 {
 	vm_map_entry_t header, llist, rlist, root;
 	vm_size_t max_free_left, max_free_right;
 
 	VM_MAP_ASSERT_LOCKED(map);
 	header = &map->header;
 	root = vm_map_splay_split(map, entry->start, 0, &llist, &rlist);
 	KASSERT(root != NULL,
 	    ("vm_map_entry_unlink: unlink object not mapped"));
 
 	vm_map_splay_findprev(root, &llist);
 	vm_map_splay_findnext(root, &rlist);
 	if (op == UNLINK_MERGE_NEXT) {
 		rlist->start = root->start;
 		rlist->offset = root->offset;
 	}
 	if (llist != header) {
 		root = llist;
 		llist = root->right;
 		max_free_left = vm_map_splay_merge_left(header, root, llist);
 		max_free_right = vm_map_splay_merge_succ(header, root, rlist);
 	} else if (rlist != header) {
 		root = rlist;
 		rlist = root->left;
 		max_free_left = vm_map_splay_merge_pred(header, root, llist);
 		max_free_right = vm_map_splay_merge_right(header, root, rlist);
 	} else {
 		header->left = header->right = header;
 		root = NULL;
 	}
 	if (root != NULL)
 		root->max_free = vm_size_max(max_free_left, max_free_right);
 	map->root = root;
 	VM_MAP_ASSERT_CONSISTENT(map);
 	map->nentries--;
 	CTR3(KTR_VM, "vm_map_entry_unlink: map %p, nentries %d, entry %p", map,
 	    map->nentries, entry);
 }
 
 /*
  *	vm_map_entry_resize:
  *
  *	Resize a vm_map_entry, recompute the amount of free space that
  *	follows it and propagate that value up the tree.
  *
  *	The map must be locked, and leaves it so.
  */
 static void
 vm_map_entry_resize(vm_map_t map, vm_map_entry_t entry, vm_size_t grow_amount)
 {
 	vm_map_entry_t header, llist, rlist, root;
 
 	VM_MAP_ASSERT_LOCKED(map);
 	header = &map->header;
 	root = vm_map_splay_split(map, entry->start, 0, &llist, &rlist);
 	KASSERT(root != NULL, ("%s: resize object not mapped", __func__));
 	vm_map_splay_findnext(root, &rlist);
 	entry->end += grow_amount;
 	root->max_free = vm_size_max(
 	    vm_map_splay_merge_left(header, root, llist),
 	    vm_map_splay_merge_succ(header, root, rlist));
 	map->root = root;
 	VM_MAP_ASSERT_CONSISTENT(map);
 	CTR4(KTR_VM, "%s: map %p, nentries %d, entry %p",
 	    __func__, map, map->nentries, entry);
 }
 
 /*
  *	vm_map_lookup_entry:	[ internal use only ]
  *
  *	Finds the map entry containing (or
  *	immediately preceding) the specified address
  *	in the given map; the entry is returned
  *	in the "entry" parameter.  The boolean
  *	result indicates whether the address is
  *	actually contained in the map.
  */
 boolean_t
 vm_map_lookup_entry(
 	vm_map_t map,
 	vm_offset_t address,
 	vm_map_entry_t *entry)	/* OUT */
 {
 	vm_map_entry_t cur, header, lbound, ubound;
 	boolean_t locked;
 
 	/*
 	 * If the map is empty, then the map entry immediately preceding
 	 * "address" is the map's header.
 	 */
 	header = &map->header;
 	cur = map->root;
 	if (cur == NULL) {
 		*entry = header;
 		return (FALSE);
 	}
 	if (address >= cur->start && cur->end > address) {
 		*entry = cur;
 		return (TRUE);
 	}
 	if ((locked = vm_map_locked(map)) ||
 	    sx_try_upgrade(&map->lock)) {
 		/*
 		 * Splay requires a write lock on the map.  However, it only
 		 * restructures the binary search tree; it does not otherwise
 		 * change the map.  Thus, the map's timestamp need not change
 		 * on a temporary upgrade.
 		 */
 		cur = vm_map_splay(map, address);
 		if (!locked) {
 			VM_MAP_UNLOCK_CONSISTENT(map);
 			sx_downgrade(&map->lock);
 		}
 
 		/*
 		 * If "address" is contained within a map entry, the new root
 		 * is that map entry.  Otherwise, the new root is a map entry
 		 * immediately before or after "address".
 		 */
 		if (address < cur->start) {
 			*entry = header;
 			return (FALSE);
 		}
 		*entry = cur;
 		return (address < cur->end);
 	}
 	/*
 	 * Since the map is only locked for read access, perform a
 	 * standard binary search tree lookup for "address".
 	 */
 	lbound = ubound = header;
 	for (;;) {
 		if (address < cur->start) {
 			ubound = cur;
 			cur = cur->left;
 			if (cur == lbound)
 				break;
 		} else if (cur->end <= address) {
 			lbound = cur;
 			cur = cur->right;
 			if (cur == ubound)
 				break;
 		} else {
 			*entry = cur;
 			return (TRUE);
 		}
 	}
 	*entry = lbound;
 	return (FALSE);
 }
 
 /*
  *	vm_map_insert:
  *
  *	Inserts the given whole VM object into the target
  *	map at the specified address range.  The object's
  *	size should match that of the address range.
  *
  *	Requires that the map be locked, and leaves it so.
  *
  *	If object is non-NULL, ref count must be bumped by caller
  *	prior to making call to account for the new entry.
  */
 int
 vm_map_insert(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
     vm_offset_t start, vm_offset_t end, vm_prot_t prot, vm_prot_t max, int cow)
 {
 	vm_map_entry_t new_entry, next_entry, prev_entry;
 	struct ucred *cred;
 	vm_eflags_t protoeflags;
 	vm_inherit_t inheritance;
 	u_long bdry;
 	u_int bidx;
 
 	VM_MAP_ASSERT_LOCKED(map);
 	KASSERT(object != kernel_object ||
 	    (cow & MAP_COPY_ON_WRITE) == 0,
 	    ("vm_map_insert: kernel object and COW"));
 	KASSERT(object == NULL || (cow & MAP_NOFAULT) == 0 ||
 	    (cow & MAP_SPLIT_BOUNDARY_MASK) != 0,
 	    ("vm_map_insert: paradoxical MAP_NOFAULT request, obj %p cow %#x",
 	    object, cow));
 	KASSERT((prot & ~max) == 0,
 	    ("prot %#x is not subset of max_prot %#x", prot, max));
 
 	/*
 	 * Check that the start and end points are not bogus.
 	 */
 	if (start == end || !vm_map_range_valid(map, start, end))
 		return (KERN_INVALID_ADDRESS);
 
 	if ((map->flags & MAP_WXORX) != 0 && (prot & (VM_PROT_WRITE |
 	    VM_PROT_EXECUTE)) == (VM_PROT_WRITE | VM_PROT_EXECUTE))
 		return (KERN_PROTECTION_FAILURE);
 
 	/*
 	 * Find the entry prior to the proposed starting address; if it's part
 	 * of an existing entry, this range is bogus.
 	 */
 	if (vm_map_lookup_entry(map, start, &prev_entry))
 		return (KERN_NO_SPACE);
 
 	/*
 	 * Assert that the next entry doesn't overlap the end point.
 	 */
 	next_entry = vm_map_entry_succ(prev_entry);
 	if (next_entry->start < end)
 		return (KERN_NO_SPACE);
 
 	if ((cow & MAP_CREATE_GUARD) != 0 && (object != NULL ||
 	    max != VM_PROT_NONE))
 		return (KERN_INVALID_ARGUMENT);
 
 	protoeflags = 0;
 	if (cow & MAP_COPY_ON_WRITE)
 		protoeflags |= MAP_ENTRY_COW | MAP_ENTRY_NEEDS_COPY;
 	if (cow & MAP_NOFAULT)
 		protoeflags |= MAP_ENTRY_NOFAULT;
 	if (cow & MAP_DISABLE_SYNCER)
 		protoeflags |= MAP_ENTRY_NOSYNC;
 	if (cow & MAP_DISABLE_COREDUMP)
 		protoeflags |= MAP_ENTRY_NOCOREDUMP;
 	if (cow & MAP_STACK_GROWS_DOWN)
 		protoeflags |= MAP_ENTRY_GROWS_DOWN;
 	if (cow & MAP_STACK_GROWS_UP)
 		protoeflags |= MAP_ENTRY_GROWS_UP;
 	if (cow & MAP_WRITECOUNT)
 		protoeflags |= MAP_ENTRY_WRITECNT;
 	if (cow & MAP_VN_EXEC)
 		protoeflags |= MAP_ENTRY_VN_EXEC;
 	if ((cow & MAP_CREATE_GUARD) != 0)
 		protoeflags |= MAP_ENTRY_GUARD;
 	if ((cow & MAP_CREATE_STACK_GAP_DN) != 0)
 		protoeflags |= MAP_ENTRY_STACK_GAP_DN;
 	if ((cow & MAP_CREATE_STACK_GAP_UP) != 0)
 		protoeflags |= MAP_ENTRY_STACK_GAP_UP;
 	if (cow & MAP_INHERIT_SHARE)
 		inheritance = VM_INHERIT_SHARE;
 	else
 		inheritance = VM_INHERIT_DEFAULT;
 	if ((cow & MAP_SPLIT_BOUNDARY_MASK) != 0) {
 		/* This magically ignores index 0, for usual page size. */
 		bidx = (cow & MAP_SPLIT_BOUNDARY_MASK) >>
 		    MAP_SPLIT_BOUNDARY_SHIFT;
 		if (bidx >= MAXPAGESIZES)
 			return (KERN_INVALID_ARGUMENT);
 		bdry = pagesizes[bidx] - 1;
 		if ((start & bdry) != 0 || (end & bdry) != 0)
 			return (KERN_INVALID_ARGUMENT);
 		protoeflags |= bidx << MAP_ENTRY_SPLIT_BOUNDARY_SHIFT;
 	}
 
 	cred = NULL;
 	if ((cow & (MAP_ACC_NO_CHARGE | MAP_NOFAULT | MAP_CREATE_GUARD)) != 0)
 		goto charged;
 	if ((cow & MAP_ACC_CHARGED) || ((prot & VM_PROT_WRITE) &&
 	    ((protoeflags & MAP_ENTRY_NEEDS_COPY) || object == NULL))) {
 		if (!(cow & MAP_ACC_CHARGED) && !swap_reserve(end - start))
 			return (KERN_RESOURCE_SHORTAGE);
 		KASSERT(object == NULL ||
 		    (protoeflags & MAP_ENTRY_NEEDS_COPY) != 0 ||
 		    object->cred == NULL,
 		    ("overcommit: vm_map_insert o %p", object));
 		cred = curthread->td_ucred;
 	}
 
 charged:
 	/* Expand the kernel pmap, if necessary. */
 	if (map == kernel_map && end > kernel_vm_end)
 		pmap_growkernel(end);
 	if (object != NULL) {
 		/*
 		 * OBJ_ONEMAPPING must be cleared unless this mapping
 		 * is trivially proven to be the only mapping for any
 		 * of the object's pages.  (Object granularity
 		 * reference counting is insufficient to recognize
 		 * aliases with precision.)
 		 */
 		if ((object->flags & OBJ_ANON) != 0) {
 			VM_OBJECT_WLOCK(object);
 			if (object->ref_count > 1 || object->shadow_count != 0)
 				vm_object_clear_flag(object, OBJ_ONEMAPPING);
 			VM_OBJECT_WUNLOCK(object);
 		}
 	} else if ((prev_entry->eflags & ~MAP_ENTRY_USER_WIRED) ==
 	    protoeflags &&
 	    (cow & (MAP_STACK_GROWS_DOWN | MAP_STACK_GROWS_UP |
 	    MAP_VN_EXEC)) == 0 &&
 	    prev_entry->end == start && (prev_entry->cred == cred ||
 	    (prev_entry->object.vm_object != NULL &&
 	    prev_entry->object.vm_object->cred == cred)) &&
 	    vm_object_coalesce(prev_entry->object.vm_object,
 	    prev_entry->offset,
 	    (vm_size_t)(prev_entry->end - prev_entry->start),
 	    (vm_size_t)(end - prev_entry->end), cred != NULL &&
 	    (protoeflags & MAP_ENTRY_NEEDS_COPY) == 0)) {
 		/*
 		 * We were able to extend the object.  Determine if we
 		 * can extend the previous map entry to include the
 		 * new range as well.
 		 */
 		if (prev_entry->inheritance == inheritance &&
 		    prev_entry->protection == prot &&
 		    prev_entry->max_protection == max &&
 		    prev_entry->wired_count == 0) {
 			KASSERT((prev_entry->eflags & MAP_ENTRY_USER_WIRED) ==
 			    0, ("prev_entry %p has incoherent wiring",
 			    prev_entry));
 			if ((prev_entry->eflags & MAP_ENTRY_GUARD) == 0)
 				map->size += end - prev_entry->end;
 			vm_map_entry_resize(map, prev_entry,
 			    end - prev_entry->end);
 			vm_map_try_merge_entries(map, prev_entry, next_entry);
 			return (KERN_SUCCESS);
 		}
 
 		/*
 		 * If we can extend the object but cannot extend the
 		 * map entry, we have to create a new map entry.  We
 		 * must bump the ref count on the extended object to
 		 * account for it.  object may be NULL.
 		 */
 		object = prev_entry->object.vm_object;
 		offset = prev_entry->offset +
 		    (prev_entry->end - prev_entry->start);
 		vm_object_reference(object);
 		if (cred != NULL && object != NULL && object->cred != NULL &&
 		    !(prev_entry->eflags & MAP_ENTRY_NEEDS_COPY)) {
 			/* Object already accounts for this uid. */
 			cred = NULL;
 		}
 	}
 	if (cred != NULL)
 		crhold(cred);
 
 	/*
 	 * Create a new entry
 	 */
 	new_entry = vm_map_entry_create(map);
 	new_entry->start = start;
 	new_entry->end = end;
 	new_entry->cred = NULL;
 
 	new_entry->eflags = protoeflags;
 	new_entry->object.vm_object = object;
 	new_entry->offset = offset;
 
 	new_entry->inheritance = inheritance;
 	new_entry->protection = prot;
 	new_entry->max_protection = max;
 	new_entry->wired_count = 0;
 	new_entry->wiring_thread = NULL;
 	new_entry->read_ahead = VM_FAULT_READ_AHEAD_INIT;
 	new_entry->next_read = start;
 
 	KASSERT(cred == NULL || !ENTRY_CHARGED(new_entry),
 	    ("overcommit: vm_map_insert leaks vm_map %p", new_entry));
 	new_entry->cred = cred;
 
 	/*
 	 * Insert the new entry into the list
 	 */
 	vm_map_entry_link(map, new_entry);
 	if ((new_entry->eflags & MAP_ENTRY_GUARD) == 0)
 		map->size += new_entry->end - new_entry->start;
 
 	/*
 	 * Try to coalesce the new entry with both the previous and next
 	 * entries in the list.  Previously, we only attempted to coalesce
 	 * with the previous entry when object is NULL.  Here, we handle the
 	 * other cases, which are less common.
 	 */
 	vm_map_try_merge_entries(map, prev_entry, new_entry);
 	vm_map_try_merge_entries(map, new_entry, next_entry);
 
 	if ((cow & (MAP_PREFAULT | MAP_PREFAULT_PARTIAL)) != 0) {
 		vm_map_pmap_enter(map, start, prot, object, OFF_TO_IDX(offset),
 		    end - start, cow & MAP_PREFAULT_PARTIAL);
 	}
 
 	return (KERN_SUCCESS);
 }
 
 /*
  *	vm_map_findspace:
  *
  *	Find the first fit (lowest VM address) for "length" free bytes
  *	beginning at address >= start in the given map.
  *
  *	In a vm_map_entry, "max_free" is the maximum amount of
  *	contiguous free space between an entry in its subtree and a
  *	neighbor of that entry.  This allows finding a free region in
  *	one path down the tree, so O(log n) amortized with splay
  *	trees.
  *
  *	The map must be locked, and leaves it so.
  *
  *	Returns: starting address if sufficient space,
  *		 vm_map_max(map)-length+1 if insufficient space.
  */
 vm_offset_t
 vm_map_findspace(vm_map_t map, vm_offset_t start, vm_size_t length)
 {
 	vm_map_entry_t header, llist, rlist, root, y;
 	vm_size_t left_length, max_free_left, max_free_right;
 	vm_offset_t gap_end;
 
 	VM_MAP_ASSERT_LOCKED(map);
 
 	/*
 	 * Request must fit within min/max VM address and must avoid
 	 * address wrap.
 	 */
 	start = MAX(start, vm_map_min(map));
 	if (start >= vm_map_max(map) || length > vm_map_max(map) - start)
 		return (vm_map_max(map) - length + 1);
 
 	/* Empty tree means wide open address space. */
 	if (map->root == NULL)
 		return (start);
 
 	/*
 	 * After splay_split, if start is within an entry, push it to the start
 	 * of the following gap.  If rlist is at the end of the gap containing
 	 * start, save the end of that gap in gap_end to see if the gap is big
 	 * enough; otherwise set gap_end to start skip gap-checking and move
 	 * directly to a search of the right subtree.
 	 */
 	header = &map->header;
 	root = vm_map_splay_split(map, start, length, &llist, &rlist);
 	gap_end = rlist->start;
 	if (root != NULL) {
 		start = root->end;
 		if (root->right != rlist)
 			gap_end = start;
 		max_free_left = vm_map_splay_merge_left(header, root, llist);
 		max_free_right = vm_map_splay_merge_right(header, root, rlist);
 	} else if (rlist != header) {
 		root = rlist;
 		rlist = root->left;
 		max_free_left = vm_map_splay_merge_pred(header, root, llist);
 		max_free_right = vm_map_splay_merge_right(header, root, rlist);
 	} else {
 		root = llist;
 		llist = root->right;
 		max_free_left = vm_map_splay_merge_left(header, root, llist);
 		max_free_right = vm_map_splay_merge_succ(header, root, rlist);
 	}
 	root->max_free = vm_size_max(max_free_left, max_free_right);
 	map->root = root;
 	VM_MAP_ASSERT_CONSISTENT(map);
 	if (length <= gap_end - start)
 		return (start);
 
 	/* With max_free, can immediately tell if no solution. */
 	if (root->right == header || length > root->right->max_free)
 		return (vm_map_max(map) - length + 1);
 
 	/*
 	 * Splay for the least large-enough gap in the right subtree.
 	 */
 	llist = rlist = header;
 	for (left_length = 0;;
 	    left_length = vm_map_entry_max_free_left(root, llist)) {
 		if (length <= left_length)
 			SPLAY_LEFT_STEP(root, y, llist, rlist,
 			    length <= vm_map_entry_max_free_left(y, llist));
 		else
 			SPLAY_RIGHT_STEP(root, y, llist, rlist,
 			    length > vm_map_entry_max_free_left(y, root));
 		if (root == NULL)
 			break;
 	}
 	root = llist;
 	llist = root->right;
 	max_free_left = vm_map_splay_merge_left(header, root, llist);
 	if (rlist == header) {
 		root->max_free = vm_size_max(max_free_left,
 		    vm_map_splay_merge_succ(header, root, rlist));
 	} else {
 		y = rlist;
 		rlist = y->left;
 		y->max_free = vm_size_max(
 		    vm_map_splay_merge_pred(root, y, root),
 		    vm_map_splay_merge_right(header, y, rlist));
 		root->max_free = vm_size_max(max_free_left, y->max_free);
 	}
 	map->root = root;
 	VM_MAP_ASSERT_CONSISTENT(map);
 	return (root->end);
 }
 
 int
 vm_map_fixed(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
     vm_offset_t start, vm_size_t length, vm_prot_t prot,
     vm_prot_t max, int cow)
 {
 	vm_offset_t end;
 	int result;
 
 	end = start + length;
 	KASSERT((cow & (MAP_STACK_GROWS_DOWN | MAP_STACK_GROWS_UP)) == 0 ||
 	    object == NULL,
 	    ("vm_map_fixed: non-NULL backing object for stack"));
 	vm_map_lock(map);
 	VM_MAP_RANGE_CHECK(map, start, end);
 	if ((cow & MAP_CHECK_EXCL) == 0) {
 		result = vm_map_delete(map, start, end);
 		if (result != KERN_SUCCESS)
 			goto out;
 	}
 	if ((cow & (MAP_STACK_GROWS_DOWN | MAP_STACK_GROWS_UP)) != 0) {
 		result = vm_map_stack_locked(map, start, length, sgrowsiz,
 		    prot, max, cow);
 	} else {
 		result = vm_map_insert(map, object, offset, start, end,
 		    prot, max, cow);
 	}
 out:
 	vm_map_unlock(map);
 	return (result);
 }
 
 static const int aslr_pages_rnd_64[2] = {0x1000, 0x10};
 static const int aslr_pages_rnd_32[2] = {0x100, 0x4};
 
 static int cluster_anon = 1;
 SYSCTL_INT(_vm, OID_AUTO, cluster_anon, CTLFLAG_RW,
     &cluster_anon, 0,
     "Cluster anonymous mappings: 0 = no, 1 = yes if no hint, 2 = always");
 
 static bool
 clustering_anon_allowed(vm_offset_t addr)
 {
 
 	switch (cluster_anon) {
 	case 0:
 		return (false);
 	case 1:
 		return (addr == 0);
 	case 2:
 	default:
 		return (true);
 	}
 }
 
 static long aslr_restarts;
 SYSCTL_LONG(_vm, OID_AUTO, aslr_restarts, CTLFLAG_RD,
     &aslr_restarts, 0,
     "Number of aslr failures");
 
 /*
  * Searches for the specified amount of free space in the given map with the
  * specified alignment.  Performs an address-ordered, first-fit search from
  * the given address "*addr", with an optional upper bound "max_addr".  If the
  * parameter "alignment" is zero, then the alignment is computed from the
  * given (object, offset) pair so as to enable the greatest possible use of
  * superpage mappings.  Returns KERN_SUCCESS and the address of the free space
  * in "*addr" if successful.  Otherwise, returns KERN_NO_SPACE.
  *
  * The map must be locked.  Initially, there must be at least "length" bytes
  * of free space at the given address.
  */
 static int
 vm_map_alignspace(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
     vm_offset_t *addr, vm_size_t length, vm_offset_t max_addr,
     vm_offset_t alignment)
 {
 	vm_offset_t aligned_addr, free_addr;
 
 	VM_MAP_ASSERT_LOCKED(map);
 	free_addr = *addr;
 	KASSERT(free_addr == vm_map_findspace(map, free_addr, length),
 	    ("caller failed to provide space %#jx at address %p",
 	     (uintmax_t)length, (void *)free_addr));
 	for (;;) {
 		/*
 		 * At the start of every iteration, the free space at address
 		 * "*addr" is at least "length" bytes.
 		 */
 		if (alignment == 0)
 			pmap_align_superpage(object, offset, addr, length);
 		else
 			*addr = roundup2(*addr, alignment);
 		aligned_addr = *addr;
 		if (aligned_addr == free_addr) {
 			/*
 			 * Alignment did not change "*addr", so "*addr" must
 			 * still provide sufficient free space.
 			 */
 			return (KERN_SUCCESS);
 		}
 
 		/*
 		 * Test for address wrap on "*addr".  A wrapped "*addr" could
 		 * be a valid address, in which case vm_map_findspace() cannot
 		 * be relied upon to fail.
 		 */
 		if (aligned_addr < free_addr)
 			return (KERN_NO_SPACE);
 		*addr = vm_map_findspace(map, aligned_addr, length);
 		if (*addr + length > vm_map_max(map) ||
 		    (max_addr != 0 && *addr + length > max_addr))
 			return (KERN_NO_SPACE);
 		free_addr = *addr;
 		if (free_addr == aligned_addr) {
 			/*
 			 * If a successful call to vm_map_findspace() did not
 			 * change "*addr", then "*addr" must still be aligned
 			 * and provide sufficient free space.
 			 */
 			return (KERN_SUCCESS);
 		}
 	}
 }
 
 int
 vm_map_find_aligned(vm_map_t map, vm_offset_t *addr, vm_size_t length,
     vm_offset_t max_addr, vm_offset_t alignment)
 {
 	/* XXXKIB ASLR eh ? */
 	*addr = vm_map_findspace(map, *addr, length);
 	if (*addr + length > vm_map_max(map) ||
 	    (max_addr != 0 && *addr + length > max_addr))
 		return (KERN_NO_SPACE);
 	return (vm_map_alignspace(map, NULL, 0, addr, length, max_addr,
 	    alignment));
 }
 
 /*
  *	vm_map_find finds an unallocated region in the target address
  *	map with the given length.  The search is defined to be
  *	first-fit from the specified address; the region found is
  *	returned in the same parameter.
  *
  *	If object is non-NULL, ref count must be bumped by caller
  *	prior to making call to account for the new entry.
  */
 int
 vm_map_find(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
 	    vm_offset_t *addr,	/* IN/OUT */
 	    vm_size_t length, vm_offset_t max_addr, int find_space,
 	    vm_prot_t prot, vm_prot_t max, int cow)
 {
 	vm_offset_t alignment, curr_min_addr, min_addr;
 	int gap, pidx, rv, try;
 	bool cluster, en_aslr, update_anon;
 
 	KASSERT((cow & (MAP_STACK_GROWS_DOWN | MAP_STACK_GROWS_UP)) == 0 ||
 	    object == NULL,
 	    ("vm_map_find: non-NULL backing object for stack"));
 	MPASS((cow & MAP_REMAP) == 0 || (find_space == VMFS_NO_SPACE &&
 	    (cow & (MAP_STACK_GROWS_DOWN | MAP_STACK_GROWS_UP)) == 0));
 	if (find_space == VMFS_OPTIMAL_SPACE && (object == NULL ||
 	    (object->flags & OBJ_COLORED) == 0))
 		find_space = VMFS_ANY_SPACE;
 	if (find_space >> 8 != 0) {
 		KASSERT((find_space & 0xff) == 0, ("bad VMFS flags"));
 		alignment = (vm_offset_t)1 << (find_space >> 8);
 	} else
 		alignment = 0;
 	en_aslr = (map->flags & MAP_ASLR) != 0;
 	update_anon = cluster = clustering_anon_allowed(*addr) &&
 	    (map->flags & MAP_IS_SUB_MAP) == 0 && max_addr == 0 &&
 	    find_space != VMFS_NO_SPACE && object == NULL &&
 	    (cow & (MAP_INHERIT_SHARE | MAP_STACK_GROWS_UP |
 	    MAP_STACK_GROWS_DOWN)) == 0 && prot != PROT_NONE;
 	curr_min_addr = min_addr = *addr;
 	if (en_aslr && min_addr == 0 && !cluster &&
 	    find_space != VMFS_NO_SPACE &&
 	    (map->flags & MAP_ASLR_IGNSTART) != 0)
 		curr_min_addr = min_addr = vm_map_min(map);
 	try = 0;
 	vm_map_lock(map);
 	if (cluster) {
 		curr_min_addr = map->anon_loc;
 		if (curr_min_addr == 0)
 			cluster = false;
 	}
 	if (find_space != VMFS_NO_SPACE) {
 		KASSERT(find_space == VMFS_ANY_SPACE ||
 		    find_space == VMFS_OPTIMAL_SPACE ||
 		    find_space == VMFS_SUPER_SPACE ||
 		    alignment != 0, ("unexpected VMFS flag"));
 again:
 		/*
 		 * When creating an anonymous mapping, try clustering
 		 * with an existing anonymous mapping first.
 		 *
 		 * We make up to two attempts to find address space
 		 * for a given find_space value. The first attempt may
 		 * apply randomization or may cluster with an existing
 		 * anonymous mapping. If this first attempt fails,
 		 * perform a first-fit search of the available address
 		 * space.
 		 *
 		 * If all tries failed, and find_space is
 		 * VMFS_OPTIMAL_SPACE, fallback to VMFS_ANY_SPACE.
 		 * Again enable clustering and randomization.
 		 */
 		try++;
 		MPASS(try <= 2);
 
 		if (try == 2) {
 			/*
 			 * Second try: we failed either to find a
 			 * suitable region for randomizing the
 			 * allocation, or to cluster with an existing
 			 * mapping.  Retry with free run.
 			 */
 			curr_min_addr = (map->flags & MAP_ASLR_IGNSTART) != 0 ?
 			    vm_map_min(map) : min_addr;
 			atomic_add_long(&aslr_restarts, 1);
 		}
 
 		if (try == 1 && en_aslr && !cluster) {
 			/*
 			 * Find space for allocation, including
 			 * gap needed for later randomization.
 			 */
 			pidx = MAXPAGESIZES > 1 && pagesizes[1] != 0 &&
 			    (find_space == VMFS_SUPER_SPACE || find_space ==
 			    VMFS_OPTIMAL_SPACE) ? 1 : 0;
 			gap = vm_map_max(map) > MAP_32BIT_MAX_ADDR &&
 			    (max_addr == 0 || max_addr > MAP_32BIT_MAX_ADDR) ?
 			    aslr_pages_rnd_64[pidx] : aslr_pages_rnd_32[pidx];
 			*addr = vm_map_findspace(map, curr_min_addr,
 			    length + gap * pagesizes[pidx]);
 			if (*addr + length + gap * pagesizes[pidx] >
 			    vm_map_max(map))
 				goto again;
 			/* And randomize the start address. */
 			*addr += (arc4random() % gap) * pagesizes[pidx];
 			if (max_addr != 0 && *addr + length > max_addr)
 				goto again;
 		} else {
 			*addr = vm_map_findspace(map, curr_min_addr, length);
 			if (*addr + length > vm_map_max(map) ||
 			    (max_addr != 0 && *addr + length > max_addr)) {
 				if (cluster) {
 					cluster = false;
 					MPASS(try == 1);
 					goto again;
 				}
 				rv = KERN_NO_SPACE;
 				goto done;
 			}
 		}
 
 		if (find_space != VMFS_ANY_SPACE &&
 		    (rv = vm_map_alignspace(map, object, offset, addr, length,
 		    max_addr, alignment)) != KERN_SUCCESS) {
 			if (find_space == VMFS_OPTIMAL_SPACE) {
 				find_space = VMFS_ANY_SPACE;
 				curr_min_addr = min_addr;
 				cluster = update_anon;
 				try = 0;
 				goto again;
 			}
 			goto done;
 		}
 	} else if ((cow & MAP_REMAP) != 0) {
 		if (!vm_map_range_valid(map, *addr, *addr + length)) {
 			rv = KERN_INVALID_ADDRESS;
 			goto done;
 		}
 		rv = vm_map_delete(map, *addr, *addr + length);
 		if (rv != KERN_SUCCESS)
 			goto done;
 	}
 	if ((cow & (MAP_STACK_GROWS_DOWN | MAP_STACK_GROWS_UP)) != 0) {
 		rv = vm_map_stack_locked(map, *addr, length, sgrowsiz, prot,
 		    max, cow);
 	} else {
 		rv = vm_map_insert(map, object, offset, *addr, *addr + length,
 		    prot, max, cow);
 	}
 	if (rv == KERN_SUCCESS && update_anon)
 		map->anon_loc = *addr + length;
 done:
 	vm_map_unlock(map);
 	return (rv);
 }
 
 /*
  *	vm_map_find_min() is a variant of vm_map_find() that takes an
  *	additional parameter (min_addr) and treats the given address
  *	(*addr) differently.  Specifically, it treats *addr as a hint
  *	and not as the minimum address where the mapping is created.
  *
  *	This function works in two phases.  First, it tries to
  *	allocate above the hint.  If that fails and the hint is
  *	greater than min_addr, it performs a second pass, replacing
  *	the hint with min_addr as the minimum address for the
  *	allocation.
  */
 int
 vm_map_find_min(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
     vm_offset_t *addr, vm_size_t length, vm_offset_t min_addr,
     vm_offset_t max_addr, int find_space, vm_prot_t prot, vm_prot_t max,
     int cow)
 {
 	vm_offset_t hint;
 	int rv;
 
 	hint = *addr;
 	for (;;) {
 		rv = vm_map_find(map, object, offset, addr, length, max_addr,
 		    find_space, prot, max, cow);
 		if (rv == KERN_SUCCESS || min_addr >= hint)
 			return (rv);
 		*addr = hint = min_addr;
 	}
 }
 
 /*
  * A map entry with any of the following flags set must not be merged with
  * another entry.
  */
 #define	MAP_ENTRY_NOMERGE_MASK	(MAP_ENTRY_GROWS_DOWN | MAP_ENTRY_GROWS_UP | \
 	    MAP_ENTRY_IN_TRANSITION | MAP_ENTRY_IS_SUB_MAP | MAP_ENTRY_VN_EXEC)
 
 static bool
 vm_map_mergeable_neighbors(vm_map_entry_t prev, vm_map_entry_t entry)
 {
 
 	KASSERT((prev->eflags & MAP_ENTRY_NOMERGE_MASK) == 0 ||
 	    (entry->eflags & MAP_ENTRY_NOMERGE_MASK) == 0,
 	    ("vm_map_mergeable_neighbors: neither %p nor %p are mergeable",
 	    prev, entry));
 	return (prev->end == entry->start &&
 	    prev->object.vm_object == entry->object.vm_object &&
 	    (prev->object.vm_object == NULL ||
 	    prev->offset + (prev->end - prev->start) == entry->offset) &&
 	    prev->eflags == entry->eflags &&
 	    prev->protection == entry->protection &&
 	    prev->max_protection == entry->max_protection &&
 	    prev->inheritance == entry->inheritance &&
 	    prev->wired_count == entry->wired_count &&
 	    prev->cred == entry->cred);
 }
 
 static void
 vm_map_merged_neighbor_dispose(vm_map_t map, vm_map_entry_t entry)
 {
 
 	/*
 	 * If the backing object is a vnode object, vm_object_deallocate()
 	 * calls vrele().  However, vrele() does not lock the vnode because
 	 * the vnode has additional references.  Thus, the map lock can be
 	 * kept without causing a lock-order reversal with the vnode lock.
 	 *
 	 * Since we count the number of virtual page mappings in
 	 * object->un_pager.vnp.writemappings, the writemappings value
 	 * should not be adjusted when the entry is disposed of.
 	 */
 	if (entry->object.vm_object != NULL)
 		vm_object_deallocate(entry->object.vm_object);
 	if (entry->cred != NULL)
 		crfree(entry->cred);
 	vm_map_entry_dispose(map, entry);
 }
 
 /*
  *	vm_map_try_merge_entries:
  *
  *	Compare the given map entry to its predecessor, and merge its precessor
  *	into it if possible.  The entry remains valid, and may be extended.
  *	The predecessor may be deleted.
  *
  *	The map must be locked.
  */
 void
 vm_map_try_merge_entries(vm_map_t map, vm_map_entry_t prev_entry,
     vm_map_entry_t entry)
 {
 
 	VM_MAP_ASSERT_LOCKED(map);
 	if ((entry->eflags & MAP_ENTRY_NOMERGE_MASK) == 0 &&
 	    vm_map_mergeable_neighbors(prev_entry, entry)) {
 		vm_map_entry_unlink(map, prev_entry, UNLINK_MERGE_NEXT);
 		vm_map_merged_neighbor_dispose(map, prev_entry);
 	}
 }
 
 /*
  *	vm_map_entry_back:
  *
  *	Allocate an object to back a map entry.
  */
 static inline void
 vm_map_entry_back(vm_map_entry_t entry)
 {
 	vm_object_t object;
 
 	KASSERT(entry->object.vm_object == NULL,
 	    ("map entry %p has backing object", entry));
 	KASSERT((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0,
 	    ("map entry %p is a submap", entry));
 	object = vm_object_allocate_anon(atop(entry->end - entry->start), NULL,
 	    entry->cred, entry->end - entry->start);
 	entry->object.vm_object = object;
 	entry->offset = 0;
 	entry->cred = NULL;
 }
 
 /*
  *	vm_map_entry_charge_object
  *
  *	If there is no object backing this entry, create one.  Otherwise, if
  *	the entry has cred, give it to the backing object.
  */
 static inline void
 vm_map_entry_charge_object(vm_map_t map, vm_map_entry_t entry)
 {
 
 	VM_MAP_ASSERT_LOCKED(map);
 	KASSERT((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0,
 	    ("map entry %p is a submap", entry));
 	if (entry->object.vm_object == NULL && !map->system_map &&
 	    (entry->eflags & MAP_ENTRY_GUARD) == 0)
 		vm_map_entry_back(entry);
 	else if (entry->object.vm_object != NULL &&
 	    ((entry->eflags & MAP_ENTRY_NEEDS_COPY) == 0) &&
 	    entry->cred != NULL) {
 		VM_OBJECT_WLOCK(entry->object.vm_object);
 		KASSERT(entry->object.vm_object->cred == NULL,
 		    ("OVERCOMMIT: %s: both cred e %p", __func__, entry));
 		entry->object.vm_object->cred = entry->cred;
 		entry->object.vm_object->charge = entry->end - entry->start;
 		VM_OBJECT_WUNLOCK(entry->object.vm_object);
 		entry->cred = NULL;
 	}
 }
 
 /*
  *	vm_map_entry_clone
  *
  *	Create a duplicate map entry for clipping.
  */
 static vm_map_entry_t
 vm_map_entry_clone(vm_map_t map, vm_map_entry_t entry)
 {
 	vm_map_entry_t new_entry;
 
 	VM_MAP_ASSERT_LOCKED(map);
 
 	/*
 	 * Create a backing object now, if none exists, so that more individual
 	 * objects won't be created after the map entry is split.
 	 */
 	vm_map_entry_charge_object(map, entry);
 
 	/* Clone the entry. */
 	new_entry = vm_map_entry_create(map);
 	*new_entry = *entry;
 	if (new_entry->cred != NULL)
 		crhold(entry->cred);
 	if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) {
 		vm_object_reference(new_entry->object.vm_object);
 		vm_map_entry_set_vnode_text(new_entry, true);
 		/*
 		 * The object->un_pager.vnp.writemappings for the object of
 		 * MAP_ENTRY_WRITECNT type entry shall be kept as is here.  The
 		 * virtual pages are re-distributed among the clipped entries,
 		 * so the sum is left the same.
 		 */
 	}
 	return (new_entry);
 }
 
 /*
  *	vm_map_clip_start:	[ internal use only ]
  *
  *	Asserts that the given entry begins at or after
  *	the specified address; if necessary,
  *	it splits the entry into two.
  */
 static int
 vm_map_clip_start(vm_map_t map, vm_map_entry_t entry, vm_offset_t startaddr)
 {
 	vm_map_entry_t new_entry;
 	int bdry_idx;
 
 	if (!map->system_map)
 		WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
 		    "%s: map %p entry %p start 0x%jx", __func__, map, entry,
 		    (uintmax_t)startaddr);
 
 	if (startaddr <= entry->start)
 		return (KERN_SUCCESS);
 
 	VM_MAP_ASSERT_LOCKED(map);
 	KASSERT(entry->end > startaddr && entry->start < startaddr,
 	    ("%s: invalid clip of entry %p", __func__, entry));
 
 	bdry_idx = (entry->eflags & MAP_ENTRY_SPLIT_BOUNDARY_MASK) >>
 	    MAP_ENTRY_SPLIT_BOUNDARY_SHIFT;
 	if (bdry_idx != 0) {
 		if ((startaddr & (pagesizes[bdry_idx] - 1)) != 0)
 			return (KERN_INVALID_ARGUMENT);
 	}
 
 	new_entry = vm_map_entry_clone(map, entry);
 
 	/*
 	 * Split off the front portion.  Insert the new entry BEFORE this one,
 	 * so that this entry has the specified starting address.
 	 */
 	new_entry->end = startaddr;
 	vm_map_entry_link(map, new_entry);
 	return (KERN_SUCCESS);
 }
 
 /*
  *	vm_map_lookup_clip_start:
  *
  *	Find the entry at or just after 'start', and clip it if 'start' is in
  *	the interior of the entry.  Return entry after 'start', and in
  *	prev_entry set the entry before 'start'.
  */
 static int
 vm_map_lookup_clip_start(vm_map_t map, vm_offset_t start,
     vm_map_entry_t *res_entry, vm_map_entry_t *prev_entry)
 {
 	vm_map_entry_t entry;
 	int rv;
 
 	if (!map->system_map)
 		WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
 		    "%s: map %p start 0x%jx prev %p", __func__, map,
 		    (uintmax_t)start, prev_entry);
 
 	if (vm_map_lookup_entry(map, start, prev_entry)) {
 		entry = *prev_entry;
 		rv = vm_map_clip_start(map, entry, start);
 		if (rv != KERN_SUCCESS)
 			return (rv);
 		*prev_entry = vm_map_entry_pred(entry);
 	} else
 		entry = vm_map_entry_succ(*prev_entry);
 	*res_entry = entry;
 	return (KERN_SUCCESS);
 }
 
 /*
  *	vm_map_clip_end:	[ internal use only ]
  *
  *	Asserts that the given entry ends at or before
  *	the specified address; if necessary,
  *	it splits the entry into two.
  */
 static int
 vm_map_clip_end(vm_map_t map, vm_map_entry_t entry, vm_offset_t endaddr)
 {
 	vm_map_entry_t new_entry;
 	int bdry_idx;
 
 	if (!map->system_map)
 		WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
 		    "%s: map %p entry %p end 0x%jx", __func__, map, entry,
 		    (uintmax_t)endaddr);
 
 	if (endaddr >= entry->end)
 		return (KERN_SUCCESS);
 
 	VM_MAP_ASSERT_LOCKED(map);
 	KASSERT(entry->start < endaddr && entry->end > endaddr,
 	    ("%s: invalid clip of entry %p", __func__, entry));
 
 	bdry_idx = (entry->eflags & MAP_ENTRY_SPLIT_BOUNDARY_MASK) >>
 	    MAP_ENTRY_SPLIT_BOUNDARY_SHIFT;
 	if (bdry_idx != 0) {
 		if ((endaddr & (pagesizes[bdry_idx] - 1)) != 0)
 			return (KERN_INVALID_ARGUMENT);
 	}
 
 	new_entry = vm_map_entry_clone(map, entry);
 
 	/*
 	 * Split off the back portion.  Insert the new entry AFTER this one,
 	 * so that this entry has the specified ending address.
 	 */
 	new_entry->start = endaddr;
 	vm_map_entry_link(map, new_entry);
 
 	return (KERN_SUCCESS);
 }
 
 /*
  *	vm_map_submap:		[ kernel use only ]
  *
  *	Mark the given range as handled by a subordinate map.
  *
  *	This range must have been created with vm_map_find,
  *	and no other operations may have been performed on this
  *	range prior to calling vm_map_submap.
  *
  *	Only a limited number of operations can be performed
  *	within this rage after calling vm_map_submap:
  *		vm_fault
  *	[Don't try vm_map_copy!]
  *
  *	To remove a submapping, one must first remove the
  *	range from the superior map, and then destroy the
  *	submap (if desired).  [Better yet, don't try it.]
  */
 int
 vm_map_submap(
 	vm_map_t map,
 	vm_offset_t start,
 	vm_offset_t end,
 	vm_map_t submap)
 {
 	vm_map_entry_t entry;
 	int result;
 
 	result = KERN_INVALID_ARGUMENT;
 
 	vm_map_lock(submap);
 	submap->flags |= MAP_IS_SUB_MAP;
 	vm_map_unlock(submap);
 
 	vm_map_lock(map);
 	VM_MAP_RANGE_CHECK(map, start, end);
 	if (vm_map_lookup_entry(map, start, &entry) && entry->end >= end &&
 	    (entry->eflags & MAP_ENTRY_COW) == 0 &&
 	    entry->object.vm_object == NULL) {
 		result = vm_map_clip_start(map, entry, start);
 		if (result != KERN_SUCCESS)
 			goto unlock;
 		result = vm_map_clip_end(map, entry, end);
 		if (result != KERN_SUCCESS)
 			goto unlock;
 		entry->object.sub_map = submap;
 		entry->eflags |= MAP_ENTRY_IS_SUB_MAP;
 		result = KERN_SUCCESS;
 	}
 unlock:
 	vm_map_unlock(map);
 
 	if (result != KERN_SUCCESS) {
 		vm_map_lock(submap);
 		submap->flags &= ~MAP_IS_SUB_MAP;
 		vm_map_unlock(submap);
 	}
 	return (result);
 }
 
 /*
  * The maximum number of pages to map if MAP_PREFAULT_PARTIAL is specified
  */
 #define	MAX_INIT_PT	96
 
 /*
  *	vm_map_pmap_enter:
  *
  *	Preload the specified map's pmap with mappings to the specified
  *	object's memory-resident pages.  No further physical pages are
  *	allocated, and no further virtual pages are retrieved from secondary
  *	storage.  If the specified flags include MAP_PREFAULT_PARTIAL, then a
  *	limited number of page mappings are created at the low-end of the
  *	specified address range.  (For this purpose, a superpage mapping
  *	counts as one page mapping.)  Otherwise, all resident pages within
  *	the specified address range are mapped.
  */
 static void
 vm_map_pmap_enter(vm_map_t map, vm_offset_t addr, vm_prot_t prot,
     vm_object_t object, vm_pindex_t pindex, vm_size_t size, int flags)
 {
 	vm_offset_t start;
 	vm_page_t p, p_start;
 	vm_pindex_t mask, psize, threshold, tmpidx;
 
 	if ((prot & (VM_PROT_READ | VM_PROT_EXECUTE)) == 0 || object == NULL)
 		return;
 	if (object->type == OBJT_DEVICE || object->type == OBJT_SG) {
 		VM_OBJECT_WLOCK(object);
 		if (object->type == OBJT_DEVICE || object->type == OBJT_SG) {
 			pmap_object_init_pt(map->pmap, addr, object, pindex,
 			    size);
 			VM_OBJECT_WUNLOCK(object);
 			return;
 		}
 		VM_OBJECT_LOCK_DOWNGRADE(object);
 	} else
 		VM_OBJECT_RLOCK(object);
 
 	psize = atop(size);
 	if (psize + pindex > object->size) {
 		if (pindex >= object->size) {
 			VM_OBJECT_RUNLOCK(object);
 			return;
 		}
 		psize = object->size - pindex;
 	}
 
 	start = 0;
 	p_start = NULL;
 	threshold = MAX_INIT_PT;
 
 	p = vm_page_find_least(object, pindex);
 	/*
 	 * Assert: the variable p is either (1) the page with the
 	 * least pindex greater than or equal to the parameter pindex
 	 * or (2) NULL.
 	 */
 	for (;
 	     p != NULL && (tmpidx = p->pindex - pindex) < psize;
 	     p = TAILQ_NEXT(p, listq)) {
 		/*
 		 * don't allow an madvise to blow away our really
 		 * free pages allocating pv entries.
 		 */
 		if (((flags & MAP_PREFAULT_MADVISE) != 0 &&
 		    vm_page_count_severe()) ||
 		    ((flags & MAP_PREFAULT_PARTIAL) != 0 &&
 		    tmpidx >= threshold)) {
 			psize = tmpidx;
 			break;
 		}
 		if (vm_page_all_valid(p)) {
 			if (p_start == NULL) {
 				start = addr + ptoa(tmpidx);
 				p_start = p;
 			}
 			/* Jump ahead if a superpage mapping is possible. */
 			if (p->psind > 0 && ((addr + ptoa(tmpidx)) &
 			    (pagesizes[p->psind] - 1)) == 0) {
 				mask = atop(pagesizes[p->psind]) - 1;
 				if (tmpidx + mask < psize &&
 				    vm_page_ps_test(p, PS_ALL_VALID, NULL)) {
 					p += mask;
 					threshold += mask;
 				}
 			}
 		} else if (p_start != NULL) {
 			pmap_enter_object(map->pmap, start, addr +
 			    ptoa(tmpidx), p_start, prot);
 			p_start = NULL;
 		}
 	}
 	if (p_start != NULL)
 		pmap_enter_object(map->pmap, start, addr + ptoa(psize),
 		    p_start, prot);
 	VM_OBJECT_RUNLOCK(object);
 }
 
 /*
  *	vm_map_protect:
  *
  *	Sets the protection and/or the maximum protection of the
  *	specified address region in the target map.
  */
 int
 vm_map_protect(vm_map_t map, vm_offset_t start, vm_offset_t end,
     vm_prot_t new_prot, vm_prot_t new_maxprot, int flags)
 {
 	vm_map_entry_t entry, first_entry, in_tran, prev_entry;
 	vm_object_t obj;
 	struct ucred *cred;
 	vm_prot_t old_prot;
 	int rv;
 
 	if (start == end)
 		return (KERN_SUCCESS);
 
 	if ((flags & (VM_MAP_PROTECT_SET_PROT | VM_MAP_PROTECT_SET_MAXPROT)) ==
 	    (VM_MAP_PROTECT_SET_PROT | VM_MAP_PROTECT_SET_MAXPROT) &&
 	    (new_prot & new_maxprot) != new_prot)
 		return (KERN_OUT_OF_BOUNDS);
 
 again:
 	in_tran = NULL;
 	vm_map_lock(map);
 
 	if ((map->flags & MAP_WXORX) != 0 &&
 	    (flags & VM_MAP_PROTECT_SET_PROT) != 0 &&
 	    (new_prot & (VM_PROT_WRITE | VM_PROT_EXECUTE)) == (VM_PROT_WRITE |
 	    VM_PROT_EXECUTE)) {
 		vm_map_unlock(map);
 		return (KERN_PROTECTION_FAILURE);
 	}
 
 	/*
 	 * Ensure that we are not concurrently wiring pages.  vm_map_wire() may
 	 * need to fault pages into the map and will drop the map lock while
 	 * doing so, and the VM object may end up in an inconsistent state if we
 	 * update the protection on the map entry in between faults.
 	 */
 	vm_map_wait_busy(map);
 
 	VM_MAP_RANGE_CHECK(map, start, end);
 
 	if (!vm_map_lookup_entry(map, start, &first_entry))
 		first_entry = vm_map_entry_succ(first_entry);
 
 	/*
 	 * Make a first pass to check for protection violations.
 	 */
 	for (entry = first_entry; entry->start < end;
 	    entry = vm_map_entry_succ(entry)) {
 		if ((entry->eflags & MAP_ENTRY_GUARD) != 0)
 			continue;
 		if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) != 0) {
 			vm_map_unlock(map);
 			return (KERN_INVALID_ARGUMENT);
 		}
 		if ((flags & VM_MAP_PROTECT_SET_PROT) == 0)
 			new_prot = entry->protection;
 		if ((flags & VM_MAP_PROTECT_SET_MAXPROT) == 0)
 			new_maxprot = entry->max_protection;
 		if ((new_prot & entry->max_protection) != new_prot ||
 		    (new_maxprot & entry->max_protection) != new_maxprot) {
 			vm_map_unlock(map);
 			return (KERN_PROTECTION_FAILURE);
 		}
 		if ((entry->eflags & MAP_ENTRY_IN_TRANSITION) != 0)
 			in_tran = entry;
 	}
 
 	/*
 	 * Postpone the operation until all in-transition map entries have
 	 * stabilized.  An in-transition entry might already have its pages
 	 * wired and wired_count incremented, but not yet have its
 	 * MAP_ENTRY_USER_WIRED flag set.  In which case, we would fail to call
 	 * vm_fault_copy_entry() in the final loop below.
 	 */
 	if (in_tran != NULL) {
 		in_tran->eflags |= MAP_ENTRY_NEEDS_WAKEUP;
 		vm_map_unlock_and_wait(map, 0);
 		goto again;
 	}
 
 	/*
 	 * Before changing the protections, try to reserve swap space for any
 	 * private (i.e., copy-on-write) mappings that are transitioning from
 	 * read-only to read/write access.  If a reservation fails, break out
 	 * of this loop early and let the next loop simplify the entries, since
 	 * some may now be mergeable.
 	 */
 	rv = vm_map_clip_start(map, first_entry, start);
 	if (rv != KERN_SUCCESS) {
 		vm_map_unlock(map);
 		return (rv);
 	}
 	for (entry = first_entry; entry->start < end;
 	    entry = vm_map_entry_succ(entry)) {
 		rv = vm_map_clip_end(map, entry, end);
 		if (rv != KERN_SUCCESS) {
 			vm_map_unlock(map);
 			return (rv);
 		}
 
 		if ((flags & VM_MAP_PROTECT_SET_PROT) == 0 ||
 		    ((new_prot & ~entry->protection) & VM_PROT_WRITE) == 0 ||
 		    ENTRY_CHARGED(entry) ||
 		    (entry->eflags & MAP_ENTRY_GUARD) != 0)
 			continue;
 
 		cred = curthread->td_ucred;
 		obj = entry->object.vm_object;
 
 		if (obj == NULL ||
 		    (entry->eflags & MAP_ENTRY_NEEDS_COPY) != 0) {
 			if (!swap_reserve(entry->end - entry->start)) {
 				rv = KERN_RESOURCE_SHORTAGE;
 				end = entry->end;
 				break;
 			}
 			crhold(cred);
 			entry->cred = cred;
 			continue;
 		}
 
 		VM_OBJECT_WLOCK(obj);
 		if ((obj->flags & OBJ_SWAP) == 0) {
 			VM_OBJECT_WUNLOCK(obj);
 			continue;
 		}
 
 		/*
 		 * Charge for the whole object allocation now, since
 		 * we cannot distinguish between non-charged and
 		 * charged clipped mapping of the same object later.
 		 */
 		KASSERT(obj->charge == 0,
 		    ("vm_map_protect: object %p overcharged (entry %p)",
 		    obj, entry));
 		if (!swap_reserve(ptoa(obj->size))) {
 			VM_OBJECT_WUNLOCK(obj);
 			rv = KERN_RESOURCE_SHORTAGE;
 			end = entry->end;
 			break;
 		}
 
 		crhold(cred);
 		obj->cred = cred;
 		obj->charge = ptoa(obj->size);
 		VM_OBJECT_WUNLOCK(obj);
 	}
 
 	/*
 	 * If enough swap space was available, go back and fix up protections.
 	 * Otherwise, just simplify entries, since some may have been modified.
 	 * [Note that clipping is not necessary the second time.]
 	 */
 	for (prev_entry = vm_map_entry_pred(first_entry), entry = first_entry;
 	    entry->start < end;
 	    vm_map_try_merge_entries(map, prev_entry, entry),
 	    prev_entry = entry, entry = vm_map_entry_succ(entry)) {
 		if (rv != KERN_SUCCESS ||
 		    (entry->eflags & MAP_ENTRY_GUARD) != 0)
 			continue;
 
 		old_prot = entry->protection;
 
 		if ((flags & VM_MAP_PROTECT_SET_MAXPROT) != 0) {
 			entry->max_protection = new_maxprot;
 			entry->protection = new_maxprot & old_prot;
 		}
 		if ((flags & VM_MAP_PROTECT_SET_PROT) != 0)
 			entry->protection = new_prot;
 
 		/*
 		 * For user wired map entries, the normal lazy evaluation of
 		 * write access upgrades through soft page faults is
 		 * undesirable.  Instead, immediately copy any pages that are
 		 * copy-on-write and enable write access in the physical map.
 		 */
 		if ((entry->eflags & MAP_ENTRY_USER_WIRED) != 0 &&
 		    (entry->protection & VM_PROT_WRITE) != 0 &&
 		    (old_prot & VM_PROT_WRITE) == 0)
 			vm_fault_copy_entry(map, map, entry, entry, NULL);
 
 		/*
 		 * When restricting access, update the physical map.  Worry
 		 * about copy-on-write here.
 		 */
 		if ((old_prot & ~entry->protection) != 0) {
 #define MASK(entry)	(((entry)->eflags & MAP_ENTRY_COW) ? ~VM_PROT_WRITE : \
 							VM_PROT_ALL)
 			pmap_protect(map->pmap, entry->start,
 			    entry->end,
 			    entry->protection & MASK(entry));
 #undef	MASK
 		}
 	}
 	vm_map_try_merge_entries(map, prev_entry, entry);
 	vm_map_unlock(map);
 	return (rv);
 }
 
 /*
  *	vm_map_madvise:
  *
  *	This routine traverses a processes map handling the madvise
  *	system call.  Advisories are classified as either those effecting
  *	the vm_map_entry structure, or those effecting the underlying
  *	objects.
  */
 int
 vm_map_madvise(
 	vm_map_t map,
 	vm_offset_t start,
 	vm_offset_t end,
 	int behav)
 {
 	vm_map_entry_t entry, prev_entry;
 	int rv;
 	bool modify_map;
 
 	/*
 	 * Some madvise calls directly modify the vm_map_entry, in which case
 	 * we need to use an exclusive lock on the map and we need to perform
 	 * various clipping operations.  Otherwise we only need a read-lock
 	 * on the map.
 	 */
 	switch(behav) {
 	case MADV_NORMAL:
 	case MADV_SEQUENTIAL:
 	case MADV_RANDOM:
 	case MADV_NOSYNC:
 	case MADV_AUTOSYNC:
 	case MADV_NOCORE:
 	case MADV_CORE:
 		if (start == end)
 			return (0);
 		modify_map = true;
 		vm_map_lock(map);
 		break;
 	case MADV_WILLNEED:
 	case MADV_DONTNEED:
 	case MADV_FREE:
 		if (start == end)
 			return (0);
 		modify_map = false;
 		vm_map_lock_read(map);
 		break;
 	default:
 		return (EINVAL);
 	}
 
 	/*
 	 * Locate starting entry and clip if necessary.
 	 */
 	VM_MAP_RANGE_CHECK(map, start, end);
 
 	if (modify_map) {
 		/*
 		 * madvise behaviors that are implemented in the vm_map_entry.
 		 *
 		 * We clip the vm_map_entry so that behavioral changes are
 		 * limited to the specified address range.
 		 */
 		rv = vm_map_lookup_clip_start(map, start, &entry, &prev_entry);
 		if (rv != KERN_SUCCESS) {
 			vm_map_unlock(map);
 			return (vm_mmap_to_errno(rv));
 		}
 
 		for (; entry->start < end; prev_entry = entry,
 		    entry = vm_map_entry_succ(entry)) {
 			if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) != 0)
 				continue;
 
 			rv = vm_map_clip_end(map, entry, end);
 			if (rv != KERN_SUCCESS) {
 				vm_map_unlock(map);
 				return (vm_mmap_to_errno(rv));
 			}
 
 			switch (behav) {
 			case MADV_NORMAL:
 				vm_map_entry_set_behavior(entry,
 				    MAP_ENTRY_BEHAV_NORMAL);
 				break;
 			case MADV_SEQUENTIAL:
 				vm_map_entry_set_behavior(entry,
 				    MAP_ENTRY_BEHAV_SEQUENTIAL);
 				break;
 			case MADV_RANDOM:
 				vm_map_entry_set_behavior(entry,
 				    MAP_ENTRY_BEHAV_RANDOM);
 				break;
 			case MADV_NOSYNC:
 				entry->eflags |= MAP_ENTRY_NOSYNC;
 				break;
 			case MADV_AUTOSYNC:
 				entry->eflags &= ~MAP_ENTRY_NOSYNC;
 				break;
 			case MADV_NOCORE:
 				entry->eflags |= MAP_ENTRY_NOCOREDUMP;
 				break;
 			case MADV_CORE:
 				entry->eflags &= ~MAP_ENTRY_NOCOREDUMP;
 				break;
 			default:
 				break;
 			}
 			vm_map_try_merge_entries(map, prev_entry, entry);
 		}
 		vm_map_try_merge_entries(map, prev_entry, entry);
 		vm_map_unlock(map);
 	} else {
 		vm_pindex_t pstart, pend;
 
 		/*
 		 * madvise behaviors that are implemented in the underlying
 		 * vm_object.
 		 *
 		 * Since we don't clip the vm_map_entry, we have to clip
 		 * the vm_object pindex and count.
 		 */
 		if (!vm_map_lookup_entry(map, start, &entry))
 			entry = vm_map_entry_succ(entry);
 		for (; entry->start < end;
 		    entry = vm_map_entry_succ(entry)) {
 			vm_offset_t useEnd, useStart;
 
 			if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) != 0)
 				continue;
 
 			/*
 			 * MADV_FREE would otherwise rewind time to
 			 * the creation of the shadow object.  Because
 			 * we hold the VM map read-locked, neither the
 			 * entry's object nor the presence of a
 			 * backing object can change.
 			 */
 			if (behav == MADV_FREE &&
 			    entry->object.vm_object != NULL &&
 			    entry->object.vm_object->backing_object != NULL)
 				continue;
 
 			pstart = OFF_TO_IDX(entry->offset);
 			pend = pstart + atop(entry->end - entry->start);
 			useStart = entry->start;
 			useEnd = entry->end;
 
 			if (entry->start < start) {
 				pstart += atop(start - entry->start);
 				useStart = start;
 			}
 			if (entry->end > end) {
 				pend -= atop(entry->end - end);
 				useEnd = end;
 			}
 
 			if (pstart >= pend)
 				continue;
 
 			/*
 			 * Perform the pmap_advise() before clearing
 			 * PGA_REFERENCED in vm_page_advise().  Otherwise, a
 			 * concurrent pmap operation, such as pmap_remove(),
 			 * could clear a reference in the pmap and set
 			 * PGA_REFERENCED on the page before the pmap_advise()
 			 * had completed.  Consequently, the page would appear
 			 * referenced based upon an old reference that
 			 * occurred before this pmap_advise() ran.
 			 */
 			if (behav == MADV_DONTNEED || behav == MADV_FREE)
 				pmap_advise(map->pmap, useStart, useEnd,
 				    behav);
 
 			vm_object_madvise(entry->object.vm_object, pstart,
 			    pend, behav);
 
 			/*
 			 * Pre-populate paging structures in the
 			 * WILLNEED case.  For wired entries, the
 			 * paging structures are already populated.
 			 */
 			if (behav == MADV_WILLNEED &&
 			    entry->wired_count == 0) {
 				vm_map_pmap_enter(map,
 				    useStart,
 				    entry->protection,
 				    entry->object.vm_object,
 				    pstart,
 				    ptoa(pend - pstart),
 				    MAP_PREFAULT_MADVISE
 				);
 			}
 		}
 		vm_map_unlock_read(map);
 	}
 	return (0);
 }
 
 /*
  *	vm_map_inherit:
  *
  *	Sets the inheritance of the specified address
  *	range in the target map.  Inheritance
  *	affects how the map will be shared with
  *	child maps at the time of vmspace_fork.
  */
 int
 vm_map_inherit(vm_map_t map, vm_offset_t start, vm_offset_t end,
 	       vm_inherit_t new_inheritance)
 {
 	vm_map_entry_t entry, lentry, prev_entry, start_entry;
 	int rv;
 
 	switch (new_inheritance) {
 	case VM_INHERIT_NONE:
 	case VM_INHERIT_COPY:
 	case VM_INHERIT_SHARE:
 	case VM_INHERIT_ZERO:
 		break;
 	default:
 		return (KERN_INVALID_ARGUMENT);
 	}
 	if (start == end)
 		return (KERN_SUCCESS);
 	vm_map_lock(map);
 	VM_MAP_RANGE_CHECK(map, start, end);
 	rv = vm_map_lookup_clip_start(map, start, &start_entry, &prev_entry);
 	if (rv != KERN_SUCCESS)
 		goto unlock;
 	if (vm_map_lookup_entry(map, end - 1, &lentry)) {
 		rv = vm_map_clip_end(map, lentry, end);
 		if (rv != KERN_SUCCESS)
 			goto unlock;
 	}
 	if (new_inheritance == VM_INHERIT_COPY) {
 		for (entry = start_entry; entry->start < end;
 		    prev_entry = entry, entry = vm_map_entry_succ(entry)) {
 			if ((entry->eflags & MAP_ENTRY_SPLIT_BOUNDARY_MASK)
 			    != 0) {
 				rv = KERN_INVALID_ARGUMENT;
 				goto unlock;
 			}
 		}
 	}
 	for (entry = start_entry; entry->start < end; prev_entry = entry,
 	    entry = vm_map_entry_succ(entry)) {
 		KASSERT(entry->end <= end, ("non-clipped entry %p end %jx %jx",
 		    entry, (uintmax_t)entry->end, (uintmax_t)end));
 		if ((entry->eflags & MAP_ENTRY_GUARD) == 0 ||
 		    new_inheritance != VM_INHERIT_ZERO)
 			entry->inheritance = new_inheritance;
 		vm_map_try_merge_entries(map, prev_entry, entry);
 	}
 	vm_map_try_merge_entries(map, prev_entry, entry);
 unlock:
 	vm_map_unlock(map);
 	return (rv);
 }
 
 /*
  *	vm_map_entry_in_transition:
  *
  *	Release the map lock, and sleep until the entry is no longer in
  *	transition.  Awake and acquire the map lock.  If the map changed while
  *	another held the lock, lookup a possibly-changed entry at or after the
  *	'start' position of the old entry.
  */
 static vm_map_entry_t
 vm_map_entry_in_transition(vm_map_t map, vm_offset_t in_start,
     vm_offset_t *io_end, bool holes_ok, vm_map_entry_t in_entry)
 {
 	vm_map_entry_t entry;
 	vm_offset_t start;
 	u_int last_timestamp;
 
 	VM_MAP_ASSERT_LOCKED(map);
 	KASSERT((in_entry->eflags & MAP_ENTRY_IN_TRANSITION) != 0,
 	    ("not in-tranition map entry %p", in_entry));
 	/*
 	 * We have not yet clipped the entry.
 	 */
 	start = MAX(in_start, in_entry->start);
 	in_entry->eflags |= MAP_ENTRY_NEEDS_WAKEUP;
 	last_timestamp = map->timestamp;
 	if (vm_map_unlock_and_wait(map, 0)) {
 		/*
 		 * Allow interruption of user wiring/unwiring?
 		 */
 	}
 	vm_map_lock(map);
 	if (last_timestamp + 1 == map->timestamp)
 		return (in_entry);
 
 	/*
 	 * Look again for the entry because the map was modified while it was
 	 * unlocked.  Specifically, the entry may have been clipped, merged, or
 	 * deleted.
 	 */
 	if (!vm_map_lookup_entry(map, start, &entry)) {
 		if (!holes_ok) {
 			*io_end = start;
 			return (NULL);
 		}
 		entry = vm_map_entry_succ(entry);
 	}
 	return (entry);
 }
 
 /*
  *	vm_map_unwire:
  *
  *	Implements both kernel and user unwiring.
  */
 int
 vm_map_unwire(vm_map_t map, vm_offset_t start, vm_offset_t end,
     int flags)
 {
 	vm_map_entry_t entry, first_entry, next_entry, prev_entry;
 	int rv;
 	bool holes_ok, need_wakeup, user_unwire;
 
 	if (start == end)
 		return (KERN_SUCCESS);
 	holes_ok = (flags & VM_MAP_WIRE_HOLESOK) != 0;
 	user_unwire = (flags & VM_MAP_WIRE_USER) != 0;
 	vm_map_lock(map);
 	VM_MAP_RANGE_CHECK(map, start, end);
 	if (!vm_map_lookup_entry(map, start, &first_entry)) {
 		if (holes_ok)
 			first_entry = vm_map_entry_succ(first_entry);
 		else {
 			vm_map_unlock(map);
 			return (KERN_INVALID_ADDRESS);
 		}
 	}
 	rv = KERN_SUCCESS;
 	for (entry = first_entry; entry->start < end; entry = next_entry) {
 		if (entry->eflags & MAP_ENTRY_IN_TRANSITION) {
 			/*
 			 * We have not yet clipped the entry.
 			 */
 			next_entry = vm_map_entry_in_transition(map, start,
 			    &end, holes_ok, entry);
 			if (next_entry == NULL) {
 				if (entry == first_entry) {
 					vm_map_unlock(map);
 					return (KERN_INVALID_ADDRESS);
 				}
 				rv = KERN_INVALID_ADDRESS;
 				break;
 			}
 			first_entry = (entry == first_entry) ?
 			    next_entry : NULL;
 			continue;
 		}
 		rv = vm_map_clip_start(map, entry, start);
 		if (rv != KERN_SUCCESS)
 			break;
 		rv = vm_map_clip_end(map, entry, end);
 		if (rv != KERN_SUCCESS)
 			break;
 
 		/*
 		 * Mark the entry in case the map lock is released.  (See
 		 * above.)
 		 */
 		KASSERT((entry->eflags & MAP_ENTRY_IN_TRANSITION) == 0 &&
 		    entry->wiring_thread == NULL,
 		    ("owned map entry %p", entry));
 		entry->eflags |= MAP_ENTRY_IN_TRANSITION;
 		entry->wiring_thread = curthread;
 		next_entry = vm_map_entry_succ(entry);
 		/*
 		 * Check the map for holes in the specified region.
 		 * If holes_ok, skip this check.
 		 */
 		if (!holes_ok &&
 		    entry->end < end && next_entry->start > entry->end) {
 			end = entry->end;
 			rv = KERN_INVALID_ADDRESS;
 			break;
 		}
 		/*
 		 * If system unwiring, require that the entry is system wired.
 		 */
 		if (!user_unwire &&
 		    vm_map_entry_system_wired_count(entry) == 0) {
 			end = entry->end;
 			rv = KERN_INVALID_ARGUMENT;
 			break;
 		}
 	}
 	need_wakeup = false;
 	if (first_entry == NULL &&
 	    !vm_map_lookup_entry(map, start, &first_entry)) {
 		KASSERT(holes_ok, ("vm_map_unwire: lookup failed"));
 		prev_entry = first_entry;
 		entry = vm_map_entry_succ(first_entry);
 	} else {
 		prev_entry = vm_map_entry_pred(first_entry);
 		entry = first_entry;
 	}
 	for (; entry->start < end;
 	    prev_entry = entry, entry = vm_map_entry_succ(entry)) {
 		/*
 		 * If holes_ok was specified, an empty
 		 * space in the unwired region could have been mapped
 		 * while the map lock was dropped for draining
 		 * MAP_ENTRY_IN_TRANSITION.  Moreover, another thread
 		 * could be simultaneously wiring this new mapping
 		 * entry.  Detect these cases and skip any entries
 		 * marked as in transition by us.
 		 */
 		if ((entry->eflags & MAP_ENTRY_IN_TRANSITION) == 0 ||
 		    entry->wiring_thread != curthread) {
 			KASSERT(holes_ok,
 			    ("vm_map_unwire: !HOLESOK and new/changed entry"));
 			continue;
 		}
 
 		if (rv == KERN_SUCCESS && (!user_unwire ||
 		    (entry->eflags & MAP_ENTRY_USER_WIRED))) {
 			if (entry->wired_count == 1)
 				vm_map_entry_unwire(map, entry);
 			else
 				entry->wired_count--;
 			if (user_unwire)
 				entry->eflags &= ~MAP_ENTRY_USER_WIRED;
 		}
 		KASSERT((entry->eflags & MAP_ENTRY_IN_TRANSITION) != 0,
 		    ("vm_map_unwire: in-transition flag missing %p", entry));
 		KASSERT(entry->wiring_thread == curthread,
 		    ("vm_map_unwire: alien wire %p", entry));
 		entry->eflags &= ~MAP_ENTRY_IN_TRANSITION;
 		entry->wiring_thread = NULL;
 		if (entry->eflags & MAP_ENTRY_NEEDS_WAKEUP) {
 			entry->eflags &= ~MAP_ENTRY_NEEDS_WAKEUP;
 			need_wakeup = true;
 		}
 		vm_map_try_merge_entries(map, prev_entry, entry);
 	}
 	vm_map_try_merge_entries(map, prev_entry, entry);
 	vm_map_unlock(map);
 	if (need_wakeup)
 		vm_map_wakeup(map);
 	return (rv);
 }
 
 static void
 vm_map_wire_user_count_sub(u_long npages)
 {
 
 	atomic_subtract_long(&vm_user_wire_count, npages);
 }
 
 static bool
 vm_map_wire_user_count_add(u_long npages)
 {
 	u_long wired;
 
 	wired = vm_user_wire_count;
 	do {
 		if (npages + wired > vm_page_max_user_wired)
 			return (false);
 	} while (!atomic_fcmpset_long(&vm_user_wire_count, &wired,
 	    npages + wired));
 
 	return (true);
 }
 
 /*
  *	vm_map_wire_entry_failure:
  *
  *	Handle a wiring failure on the given entry.
  *
  *	The map should be locked.
  */
 static void
 vm_map_wire_entry_failure(vm_map_t map, vm_map_entry_t entry,
     vm_offset_t failed_addr)
 {
 
 	VM_MAP_ASSERT_LOCKED(map);
 	KASSERT((entry->eflags & MAP_ENTRY_IN_TRANSITION) != 0 &&
 	    entry->wired_count == 1,
 	    ("vm_map_wire_entry_failure: entry %p isn't being wired", entry));
 	KASSERT(failed_addr < entry->end,
 	    ("vm_map_wire_entry_failure: entry %p was fully wired", entry));
 
 	/*
 	 * If any pages at the start of this entry were successfully wired,
 	 * then unwire them.
 	 */
 	if (failed_addr > entry->start) {
 		pmap_unwire(map->pmap, entry->start, failed_addr);
 		vm_object_unwire(entry->object.vm_object, entry->offset,
 		    failed_addr - entry->start, PQ_ACTIVE);
 	}
 
 	/*
 	 * Assign an out-of-range value to represent the failure to wire this
 	 * entry.
 	 */
 	entry->wired_count = -1;
 }
 
 int
 vm_map_wire(vm_map_t map, vm_offset_t start, vm_offset_t end, int flags)
 {
 	int rv;
 
 	vm_map_lock(map);
 	rv = vm_map_wire_locked(map, start, end, flags);
 	vm_map_unlock(map);
 	return (rv);
 }
 
 /*
  *	vm_map_wire_locked:
  *
  *	Implements both kernel and user wiring.  Returns with the map locked,
  *	the map lock may be dropped.
  */
 int
 vm_map_wire_locked(vm_map_t map, vm_offset_t start, vm_offset_t end, int flags)
 {
 	vm_map_entry_t entry, first_entry, next_entry, prev_entry;
 	vm_offset_t faddr, saved_end, saved_start;
 	u_long incr, npages;
 	u_int bidx, last_timestamp;
 	int rv;
 	bool holes_ok, need_wakeup, user_wire;
 	vm_prot_t prot;
 
 	VM_MAP_ASSERT_LOCKED(map);
 
 	if (start == end)
 		return (KERN_SUCCESS);
 	prot = 0;
 	if (flags & VM_MAP_WIRE_WRITE)
 		prot |= VM_PROT_WRITE;
 	holes_ok = (flags & VM_MAP_WIRE_HOLESOK) != 0;
 	user_wire = (flags & VM_MAP_WIRE_USER) != 0;
 	VM_MAP_RANGE_CHECK(map, start, end);
 	if (!vm_map_lookup_entry(map, start, &first_entry)) {
 		if (holes_ok)
 			first_entry = vm_map_entry_succ(first_entry);
 		else
 			return (KERN_INVALID_ADDRESS);
 	}
 	for (entry = first_entry; entry->start < end; entry = next_entry) {
 		if (entry->eflags & MAP_ENTRY_IN_TRANSITION) {
 			/*
 			 * We have not yet clipped the entry.
 			 */
 			next_entry = vm_map_entry_in_transition(map, start,
 			    &end, holes_ok, entry);
 			if (next_entry == NULL) {
 				if (entry == first_entry)
 					return (KERN_INVALID_ADDRESS);
 				rv = KERN_INVALID_ADDRESS;
 				goto done;
 			}
 			first_entry = (entry == first_entry) ?
 			    next_entry : NULL;
 			continue;
 		}
 		rv = vm_map_clip_start(map, entry, start);
 		if (rv != KERN_SUCCESS)
 			goto done;
 		rv = vm_map_clip_end(map, entry, end);
 		if (rv != KERN_SUCCESS)
 			goto done;
 
 		/*
 		 * Mark the entry in case the map lock is released.  (See
 		 * above.)
 		 */
 		KASSERT((entry->eflags & MAP_ENTRY_IN_TRANSITION) == 0 &&
 		    entry->wiring_thread == NULL,
 		    ("owned map entry %p", entry));
 		entry->eflags |= MAP_ENTRY_IN_TRANSITION;
 		entry->wiring_thread = curthread;
 		if ((entry->protection & (VM_PROT_READ | VM_PROT_EXECUTE)) == 0
 		    || (entry->protection & prot) != prot) {
 			entry->eflags |= MAP_ENTRY_WIRE_SKIPPED;
 			if (!holes_ok) {
 				end = entry->end;
 				rv = KERN_INVALID_ADDRESS;
 				goto done;
 			}
 		} else if (entry->wired_count == 0) {
 			entry->wired_count++;
 
 			npages = atop(entry->end - entry->start);
 			if (user_wire && !vm_map_wire_user_count_add(npages)) {
 				vm_map_wire_entry_failure(map, entry,
 				    entry->start);
 				end = entry->end;
 				rv = KERN_RESOURCE_SHORTAGE;
 				goto done;
 			}
 
 			/*
 			 * Release the map lock, relying on the in-transition
 			 * mark.  Mark the map busy for fork.
 			 */
 			saved_start = entry->start;
 			saved_end = entry->end;
 			last_timestamp = map->timestamp;
 			bidx = (entry->eflags & MAP_ENTRY_SPLIT_BOUNDARY_MASK)
 			    >> MAP_ENTRY_SPLIT_BOUNDARY_SHIFT;
 			incr =  pagesizes[bidx];
 			vm_map_busy(map);
 			vm_map_unlock(map);
 
 			for (faddr = saved_start; faddr < saved_end;
 			    faddr += incr) {
 				/*
 				 * Simulate a fault to get the page and enter
 				 * it into the physical map.
 				 */
 				rv = vm_fault(map, faddr, VM_PROT_NONE,
 				    VM_FAULT_WIRE, NULL);
 				if (rv != KERN_SUCCESS)
 					break;
 			}
 			vm_map_lock(map);
 			vm_map_unbusy(map);
 			if (last_timestamp + 1 != map->timestamp) {
 				/*
 				 * Look again for the entry because the map was
 				 * modified while it was unlocked.  The entry
 				 * may have been clipped, but NOT merged or
 				 * deleted.
 				 */
 				if (!vm_map_lookup_entry(map, saved_start,
 				    &next_entry))
 					KASSERT(false,
 					    ("vm_map_wire: lookup failed"));
 				first_entry = (entry == first_entry) ?
 				    next_entry : NULL;
 				for (entry = next_entry; entry->end < saved_end;
 				    entry = vm_map_entry_succ(entry)) {
 					/*
 					 * In case of failure, handle entries
 					 * that were not fully wired here;
 					 * fully wired entries are handled
 					 * later.
 					 */
 					if (rv != KERN_SUCCESS &&
 					    faddr < entry->end)
 						vm_map_wire_entry_failure(map,
 						    entry, faddr);
 				}
 			}
 			if (rv != KERN_SUCCESS) {
 				vm_map_wire_entry_failure(map, entry, faddr);
 				if (user_wire)
 					vm_map_wire_user_count_sub(npages);
 				end = entry->end;
 				goto done;
 			}
 		} else if (!user_wire ||
 			   (entry->eflags & MAP_ENTRY_USER_WIRED) == 0) {
 			entry->wired_count++;
 		}
 		/*
 		 * Check the map for holes in the specified region.
 		 * If holes_ok was specified, skip this check.
 		 */
 		next_entry = vm_map_entry_succ(entry);
 		if (!holes_ok &&
 		    entry->end < end && next_entry->start > entry->end) {
 			end = entry->end;
 			rv = KERN_INVALID_ADDRESS;
 			goto done;
 		}
 	}
 	rv = KERN_SUCCESS;
 done:
 	need_wakeup = false;
 	if (first_entry == NULL &&
 	    !vm_map_lookup_entry(map, start, &first_entry)) {
 		KASSERT(holes_ok, ("vm_map_wire: lookup failed"));
 		prev_entry = first_entry;
 		entry = vm_map_entry_succ(first_entry);
 	} else {
 		prev_entry = vm_map_entry_pred(first_entry);
 		entry = first_entry;
 	}
 	for (; entry->start < end;
 	    prev_entry = entry, entry = vm_map_entry_succ(entry)) {
 		/*
 		 * If holes_ok was specified, an empty
 		 * space in the unwired region could have been mapped
 		 * while the map lock was dropped for faulting in the
 		 * pages or draining MAP_ENTRY_IN_TRANSITION.
 		 * Moreover, another thread could be simultaneously
 		 * wiring this new mapping entry.  Detect these cases
 		 * and skip any entries marked as in transition not by us.
 		 *
 		 * Another way to get an entry not marked with
 		 * MAP_ENTRY_IN_TRANSITION is after failed clipping,
 		 * which set rv to KERN_INVALID_ARGUMENT.
 		 */
 		if ((entry->eflags & MAP_ENTRY_IN_TRANSITION) == 0 ||
 		    entry->wiring_thread != curthread) {
 			KASSERT(holes_ok || rv == KERN_INVALID_ARGUMENT,
 			    ("vm_map_wire: !HOLESOK and new/changed entry"));
 			continue;
 		}
 
 		if ((entry->eflags & MAP_ENTRY_WIRE_SKIPPED) != 0) {
 			/* do nothing */
 		} else if (rv == KERN_SUCCESS) {
 			if (user_wire)
 				entry->eflags |= MAP_ENTRY_USER_WIRED;
 		} else if (entry->wired_count == -1) {
 			/*
 			 * Wiring failed on this entry.  Thus, unwiring is
 			 * unnecessary.
 			 */
 			entry->wired_count = 0;
 		} else if (!user_wire ||
 		    (entry->eflags & MAP_ENTRY_USER_WIRED) == 0) {
 			/*
 			 * Undo the wiring.  Wiring succeeded on this entry
 			 * but failed on a later entry.  
 			 */
 			if (entry->wired_count == 1) {
 				vm_map_entry_unwire(map, entry);
 				if (user_wire)
 					vm_map_wire_user_count_sub(
 					    atop(entry->end - entry->start));
 			} else
 				entry->wired_count--;
 		}
 		KASSERT((entry->eflags & MAP_ENTRY_IN_TRANSITION) != 0,
 		    ("vm_map_wire: in-transition flag missing %p", entry));
 		KASSERT(entry->wiring_thread == curthread,
 		    ("vm_map_wire: alien wire %p", entry));
 		entry->eflags &= ~(MAP_ENTRY_IN_TRANSITION |
 		    MAP_ENTRY_WIRE_SKIPPED);
 		entry->wiring_thread = NULL;
 		if (entry->eflags & MAP_ENTRY_NEEDS_WAKEUP) {
 			entry->eflags &= ~MAP_ENTRY_NEEDS_WAKEUP;
 			need_wakeup = true;
 		}
 		vm_map_try_merge_entries(map, prev_entry, entry);
 	}
 	vm_map_try_merge_entries(map, prev_entry, entry);
 	if (need_wakeup)
 		vm_map_wakeup(map);
 	return (rv);
 }
 
 /*
  * vm_map_sync
  *
  * Push any dirty cached pages in the address range to their pager.
  * If syncio is TRUE, dirty pages are written synchronously.
  * If invalidate is TRUE, any cached pages are freed as well.
  *
  * If the size of the region from start to end is zero, we are
  * supposed to flush all modified pages within the region containing
  * start.  Unfortunately, a region can be split or coalesced with
  * neighboring regions, making it difficult to determine what the
  * original region was.  Therefore, we approximate this requirement by
  * flushing the current region containing start.
  *
  * Returns an error if any part of the specified range is not mapped.
  */
 int
 vm_map_sync(
 	vm_map_t map,
 	vm_offset_t start,
 	vm_offset_t end,
 	boolean_t syncio,
 	boolean_t invalidate)
 {
 	vm_map_entry_t entry, first_entry, next_entry;
 	vm_size_t size;
 	vm_object_t object;
 	vm_ooffset_t offset;
 	unsigned int last_timestamp;
 	int bdry_idx;
 	boolean_t failed;
 
 	vm_map_lock_read(map);
 	VM_MAP_RANGE_CHECK(map, start, end);
 	if (!vm_map_lookup_entry(map, start, &first_entry)) {
 		vm_map_unlock_read(map);
 		return (KERN_INVALID_ADDRESS);
 	} else if (start == end) {
 		start = first_entry->start;
 		end = first_entry->end;
 	}
 
 	/*
 	 * Make a first pass to check for user-wired memory, holes,
 	 * and partial invalidation of largepage mappings.
 	 */
 	for (entry = first_entry; entry->start < end; entry = next_entry) {
 		if (invalidate) {
 			if ((entry->eflags & MAP_ENTRY_USER_WIRED) != 0) {
 				vm_map_unlock_read(map);
 				return (KERN_INVALID_ARGUMENT);
 			}
 			bdry_idx = (entry->eflags &
 			    MAP_ENTRY_SPLIT_BOUNDARY_MASK) >>
 			    MAP_ENTRY_SPLIT_BOUNDARY_SHIFT;
 			if (bdry_idx != 0 &&
 			    ((start & (pagesizes[bdry_idx] - 1)) != 0 ||
 			    (end & (pagesizes[bdry_idx] - 1)) != 0)) {
 				vm_map_unlock_read(map);
 				return (KERN_INVALID_ARGUMENT);
 			}
 		}
 		next_entry = vm_map_entry_succ(entry);
 		if (end > entry->end &&
 		    entry->end != next_entry->start) {
 			vm_map_unlock_read(map);
 			return (KERN_INVALID_ADDRESS);
 		}
 	}
 
 	if (invalidate)
 		pmap_remove(map->pmap, start, end);
 	failed = FALSE;
 
 	/*
 	 * Make a second pass, cleaning/uncaching pages from the indicated
 	 * objects as we go.
 	 */
 	for (entry = first_entry; entry->start < end;) {
 		offset = entry->offset + (start - entry->start);
 		size = (end <= entry->end ? end : entry->end) - start;
 		if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) != 0) {
 			vm_map_t smap;
 			vm_map_entry_t tentry;
 			vm_size_t tsize;
 
 			smap = entry->object.sub_map;
 			vm_map_lock_read(smap);
 			(void) vm_map_lookup_entry(smap, offset, &tentry);
 			tsize = tentry->end - offset;
 			if (tsize < size)
 				size = tsize;
 			object = tentry->object.vm_object;
 			offset = tentry->offset + (offset - tentry->start);
 			vm_map_unlock_read(smap);
 		} else {
 			object = entry->object.vm_object;
 		}
 		vm_object_reference(object);
 		last_timestamp = map->timestamp;
 		vm_map_unlock_read(map);
 		if (!vm_object_sync(object, offset, size, syncio, invalidate))
 			failed = TRUE;
 		start += size;
 		vm_object_deallocate(object);
 		vm_map_lock_read(map);
 		if (last_timestamp == map->timestamp ||
 		    !vm_map_lookup_entry(map, start, &entry))
 			entry = vm_map_entry_succ(entry);
 	}
 
 	vm_map_unlock_read(map);
 	return (failed ? KERN_FAILURE : KERN_SUCCESS);
 }
 
 /*
  *	vm_map_entry_unwire:	[ internal use only ]
  *
  *	Make the region specified by this entry pageable.
  *
  *	The map in question should be locked.
  *	[This is the reason for this routine's existence.]
  */
 static void
 vm_map_entry_unwire(vm_map_t map, vm_map_entry_t entry)
 {
 	vm_size_t size;
 
 	VM_MAP_ASSERT_LOCKED(map);
 	KASSERT(entry->wired_count > 0,
 	    ("vm_map_entry_unwire: entry %p isn't wired", entry));
 
 	size = entry->end - entry->start;
 	if ((entry->eflags & MAP_ENTRY_USER_WIRED) != 0)
 		vm_map_wire_user_count_sub(atop(size));
 	pmap_unwire(map->pmap, entry->start, entry->end);
 	vm_object_unwire(entry->object.vm_object, entry->offset, size,
 	    PQ_ACTIVE);
 	entry->wired_count = 0;
 }
 
 static void
 vm_map_entry_deallocate(vm_map_entry_t entry, boolean_t system_map)
 {
 
 	if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0)
 		vm_object_deallocate(entry->object.vm_object);
 	uma_zfree(system_map ? kmapentzone : mapentzone, entry);
 }
 
 /*
  *	vm_map_entry_delete:	[ internal use only ]
  *
  *	Deallocate the given entry from the target map.
  */
 static void
 vm_map_entry_delete(vm_map_t map, vm_map_entry_t entry)
 {
 	vm_object_t object;
 	vm_pindex_t offidxstart, offidxend, size1;
 	vm_size_t size;
 
 	vm_map_entry_unlink(map, entry, UNLINK_MERGE_NONE);
 	object = entry->object.vm_object;
 
 	if ((entry->eflags & MAP_ENTRY_GUARD) != 0) {
 		MPASS(entry->cred == NULL);
 		MPASS((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0);
 		MPASS(object == NULL);
 		vm_map_entry_deallocate(entry, map->system_map);
 		return;
 	}
 
 	size = entry->end - entry->start;
 	map->size -= size;
 
 	if (entry->cred != NULL) {
 		swap_release_by_cred(size, entry->cred);
 		crfree(entry->cred);
 	}
 
 	if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) != 0 || object == NULL) {
 		entry->object.vm_object = NULL;
 	} else if ((object->flags & OBJ_ANON) != 0 ||
 	    object == kernel_object) {
 		KASSERT(entry->cred == NULL || object->cred == NULL ||
 		    (entry->eflags & MAP_ENTRY_NEEDS_COPY),
 		    ("OVERCOMMIT vm_map_entry_delete: both cred %p", entry));
 		offidxstart = OFF_TO_IDX(entry->offset);
 		offidxend = offidxstart + atop(size);
 		VM_OBJECT_WLOCK(object);
 		if (object->ref_count != 1 &&
 		    ((object->flags & OBJ_ONEMAPPING) != 0 ||
 		    object == kernel_object)) {
 			vm_object_collapse(object);
 
 			/*
 			 * The option OBJPR_NOTMAPPED can be passed here
 			 * because vm_map_delete() already performed
 			 * pmap_remove() on the only mapping to this range
 			 * of pages. 
 			 */
 			vm_object_page_remove(object, offidxstart, offidxend,
 			    OBJPR_NOTMAPPED);
 			if (offidxend >= object->size &&
 			    offidxstart < object->size) {
 				size1 = object->size;
 				object->size = offidxstart;
 				if (object->cred != NULL) {
 					size1 -= object->size;
 					KASSERT(object->charge >= ptoa(size1),
 					    ("object %p charge < 0", object));
 					swap_release_by_cred(ptoa(size1),
 					    object->cred);
 					object->charge -= ptoa(size1);
 				}
 			}
 		}
 		VM_OBJECT_WUNLOCK(object);
 	}
 	if (map->system_map)
 		vm_map_entry_deallocate(entry, TRUE);
 	else {
 		entry->defer_next = curthread->td_map_def_user;
 		curthread->td_map_def_user = entry;
 	}
 }
 
 /*
  *	vm_map_delete:	[ internal use only ]
  *
  *	Deallocates the given address range from the target
  *	map.
  */
 int
 vm_map_delete(vm_map_t map, vm_offset_t start, vm_offset_t end)
 {
 	vm_map_entry_t entry, next_entry, scratch_entry;
 	int rv;
 
 	VM_MAP_ASSERT_LOCKED(map);
 
 	if (start == end)
 		return (KERN_SUCCESS);
 
 	/*
 	 * Find the start of the region, and clip it.
 	 * Step through all entries in this region.
 	 */
 	rv = vm_map_lookup_clip_start(map, start, &entry, &scratch_entry);
 	if (rv != KERN_SUCCESS)
 		return (rv);
 	for (; entry->start < end; entry = next_entry) {
 		/*
 		 * Wait for wiring or unwiring of an entry to complete.
 		 * Also wait for any system wirings to disappear on
 		 * user maps.
 		 */
 		if ((entry->eflags & MAP_ENTRY_IN_TRANSITION) != 0 ||
 		    (vm_map_pmap(map) != kernel_pmap &&
 		    vm_map_entry_system_wired_count(entry) != 0)) {
 			unsigned int last_timestamp;
 			vm_offset_t saved_start;
 
 			saved_start = entry->start;
 			entry->eflags |= MAP_ENTRY_NEEDS_WAKEUP;
 			last_timestamp = map->timestamp;
 			(void) vm_map_unlock_and_wait(map, 0);
 			vm_map_lock(map);
 			if (last_timestamp + 1 != map->timestamp) {
 				/*
 				 * Look again for the entry because the map was
 				 * modified while it was unlocked.
 				 * Specifically, the entry may have been
 				 * clipped, merged, or deleted.
 				 */
 				rv = vm_map_lookup_clip_start(map, saved_start,
 				    &next_entry, &scratch_entry);
 				if (rv != KERN_SUCCESS)
 					break;
 			} else
 				next_entry = entry;
 			continue;
 		}
 
 		/* XXXKIB or delete to the upper superpage boundary ? */
 		rv = vm_map_clip_end(map, entry, end);
 		if (rv != KERN_SUCCESS)
 			break;
 		next_entry = vm_map_entry_succ(entry);
 
 		/*
 		 * Unwire before removing addresses from the pmap; otherwise,
 		 * unwiring will put the entries back in the pmap.
 		 */
 		if (entry->wired_count != 0)
 			vm_map_entry_unwire(map, entry);
 
 		/*
 		 * Remove mappings for the pages, but only if the
 		 * mappings could exist.  For instance, it does not
 		 * make sense to call pmap_remove() for guard entries.
 		 */
 		if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) != 0 ||
 		    entry->object.vm_object != NULL)
 			pmap_remove(map->pmap, entry->start, entry->end);
 
 		if (entry->end == map->anon_loc)
 			map->anon_loc = entry->start;
 
 		/*
 		 * Delete the entry only after removing all pmap
 		 * entries pointing to its pages.  (Otherwise, its
 		 * page frames may be reallocated, and any modify bits
 		 * will be set in the wrong object!)
 		 */
 		vm_map_entry_delete(map, entry);
 	}
 	return (rv);
 }
 
 /*
  *	vm_map_remove:
  *
  *	Remove the given address range from the target map.
  *	This is the exported form of vm_map_delete.
  */
 int
 vm_map_remove(vm_map_t map, vm_offset_t start, vm_offset_t end)
 {
 	int result;
 
 	vm_map_lock(map);
 	VM_MAP_RANGE_CHECK(map, start, end);
 	result = vm_map_delete(map, start, end);
 	vm_map_unlock(map);
 	return (result);
 }
 
 /*
  *	vm_map_check_protection:
  *
  *	Assert that the target map allows the specified privilege on the
  *	entire address region given.  The entire region must be allocated.
  *
  *	WARNING!  This code does not and should not check whether the
  *	contents of the region is accessible.  For example a smaller file
  *	might be mapped into a larger address space.
  *
  *	NOTE!  This code is also called by munmap().
  *
  *	The map must be locked.  A read lock is sufficient.
  */
 boolean_t
 vm_map_check_protection(vm_map_t map, vm_offset_t start, vm_offset_t end,
 			vm_prot_t protection)
 {
 	vm_map_entry_t entry;
 	vm_map_entry_t tmp_entry;
 
 	if (!vm_map_lookup_entry(map, start, &tmp_entry))
 		return (FALSE);
 	entry = tmp_entry;
 
 	while (start < end) {
 		/*
 		 * No holes allowed!
 		 */
 		if (start < entry->start)
 			return (FALSE);
 		/*
 		 * Check protection associated with entry.
 		 */
 		if ((entry->protection & protection) != protection)
 			return (FALSE);
 		/* go to next entry */
 		start = entry->end;
 		entry = vm_map_entry_succ(entry);
 	}
 	return (TRUE);
 }
 
 /*
  *
  *	vm_map_copy_swap_object:
  *
  *	Copies a swap-backed object from an existing map entry to a
  *	new one.  Carries forward the swap charge.  May change the
  *	src object on return.
  */
 static void
 vm_map_copy_swap_object(vm_map_entry_t src_entry, vm_map_entry_t dst_entry,
     vm_offset_t size, vm_ooffset_t *fork_charge)
 {
 	vm_object_t src_object;
 	struct ucred *cred;
 	int charged;
 
 	src_object = src_entry->object.vm_object;
 	charged = ENTRY_CHARGED(src_entry);
 	if ((src_object->flags & OBJ_ANON) != 0) {
 		VM_OBJECT_WLOCK(src_object);
 		vm_object_collapse(src_object);
 		if ((src_object->flags & OBJ_ONEMAPPING) != 0) {
 			vm_object_split(src_entry);
 			src_object = src_entry->object.vm_object;
 		}
 		vm_object_reference_locked(src_object);
 		vm_object_clear_flag(src_object, OBJ_ONEMAPPING);
 		VM_OBJECT_WUNLOCK(src_object);
 	} else
 		vm_object_reference(src_object);
 	if (src_entry->cred != NULL &&
 	    !(src_entry->eflags & MAP_ENTRY_NEEDS_COPY)) {
 		KASSERT(src_object->cred == NULL,
 		    ("OVERCOMMIT: vm_map_copy_anon_entry: cred %p",
 		     src_object));
 		src_object->cred = src_entry->cred;
 		src_object->charge = size;
 	}
 	dst_entry->object.vm_object = src_object;
 	if (charged) {
 		cred = curthread->td_ucred;
 		crhold(cred);
 		dst_entry->cred = cred;
 		*fork_charge += size;
 		if (!(src_entry->eflags & MAP_ENTRY_NEEDS_COPY)) {
 			crhold(cred);
 			src_entry->cred = cred;
 			*fork_charge += size;
 		}
 	}
 }
 
 /*
  *	vm_map_copy_entry:
  *
  *	Copies the contents of the source entry to the destination
  *	entry.  The entries *must* be aligned properly.
  */
 static void
 vm_map_copy_entry(
 	vm_map_t src_map,
 	vm_map_t dst_map,
 	vm_map_entry_t src_entry,
 	vm_map_entry_t dst_entry,
 	vm_ooffset_t *fork_charge)
 {
 	vm_object_t src_object;
 	vm_map_entry_t fake_entry;
 	vm_offset_t size;
 
 	VM_MAP_ASSERT_LOCKED(dst_map);
 
 	if ((dst_entry->eflags|src_entry->eflags) & MAP_ENTRY_IS_SUB_MAP)
 		return;
 
 	if (src_entry->wired_count == 0 ||
 	    (src_entry->protection & VM_PROT_WRITE) == 0) {
 		/*
 		 * If the source entry is marked needs_copy, it is already
 		 * write-protected.
 		 */
 		if ((src_entry->eflags & MAP_ENTRY_NEEDS_COPY) == 0 &&
 		    (src_entry->protection & VM_PROT_WRITE) != 0) {
 			pmap_protect(src_map->pmap,
 			    src_entry->start,
 			    src_entry->end,
 			    src_entry->protection & ~VM_PROT_WRITE);
 		}
 
 		/*
 		 * Make a copy of the object.
 		 */
 		size = src_entry->end - src_entry->start;
 		if ((src_object = src_entry->object.vm_object) != NULL) {
 			if ((src_object->flags & OBJ_SWAP) != 0) {
 				vm_map_copy_swap_object(src_entry, dst_entry,
 				    size, fork_charge);
 				/* May have split/collapsed, reload obj. */
 				src_object = src_entry->object.vm_object;
 			} else {
 				vm_object_reference(src_object);
 				dst_entry->object.vm_object = src_object;
 			}
 			src_entry->eflags |= MAP_ENTRY_COW |
 			    MAP_ENTRY_NEEDS_COPY;
 			dst_entry->eflags |= MAP_ENTRY_COW |
 			    MAP_ENTRY_NEEDS_COPY;
 			dst_entry->offset = src_entry->offset;
 			if (src_entry->eflags & MAP_ENTRY_WRITECNT) {
 				/*
 				 * MAP_ENTRY_WRITECNT cannot
 				 * indicate write reference from
 				 * src_entry, since the entry is
 				 * marked as needs copy.  Allocate a
 				 * fake entry that is used to
 				 * decrement object->un_pager writecount
 				 * at the appropriate time.  Attach
 				 * fake_entry to the deferred list.
 				 */
 				fake_entry = vm_map_entry_create(dst_map);
 				fake_entry->eflags = MAP_ENTRY_WRITECNT;
 				src_entry->eflags &= ~MAP_ENTRY_WRITECNT;
 				vm_object_reference(src_object);
 				fake_entry->object.vm_object = src_object;
 				fake_entry->start = src_entry->start;
 				fake_entry->end = src_entry->end;
 				fake_entry->defer_next =
 				    curthread->td_map_def_user;
 				curthread->td_map_def_user = fake_entry;
 			}
 
 			pmap_copy(dst_map->pmap, src_map->pmap,
 			    dst_entry->start, dst_entry->end - dst_entry->start,
 			    src_entry->start);
 		} else {
 			dst_entry->object.vm_object = NULL;
 			dst_entry->offset = 0;
 			if (src_entry->cred != NULL) {
 				dst_entry->cred = curthread->td_ucred;
 				crhold(dst_entry->cred);
 				*fork_charge += size;
 			}
 		}
 	} else {
 		/*
 		 * We don't want to make writeable wired pages copy-on-write.
 		 * Immediately copy these pages into the new map by simulating
 		 * page faults.  The new pages are pageable.
 		 */
 		vm_fault_copy_entry(dst_map, src_map, dst_entry, src_entry,
 		    fork_charge);
 	}
 }
 
 /*
  * vmspace_map_entry_forked:
  * Update the newly-forked vmspace each time a map entry is inherited
  * or copied.  The values for vm_dsize and vm_tsize are approximate
  * (and mostly-obsolete ideas in the face of mmap(2) et al.)
  */
 static void
 vmspace_map_entry_forked(const struct vmspace *vm1, struct vmspace *vm2,
     vm_map_entry_t entry)
 {
 	vm_size_t entrysize;
 	vm_offset_t newend;
 
 	if ((entry->eflags & MAP_ENTRY_GUARD) != 0)
 		return;
 	entrysize = entry->end - entry->start;
 	vm2->vm_map.size += entrysize;
 	if (entry->eflags & (MAP_ENTRY_GROWS_DOWN | MAP_ENTRY_GROWS_UP)) {
 		vm2->vm_ssize += btoc(entrysize);
 	} else if (entry->start >= (vm_offset_t)vm1->vm_daddr &&
 	    entry->start < (vm_offset_t)vm1->vm_daddr + ctob(vm1->vm_dsize)) {
 		newend = MIN(entry->end,
 		    (vm_offset_t)vm1->vm_daddr + ctob(vm1->vm_dsize));
 		vm2->vm_dsize += btoc(newend - entry->start);
 	} else if (entry->start >= (vm_offset_t)vm1->vm_taddr &&
 	    entry->start < (vm_offset_t)vm1->vm_taddr + ctob(vm1->vm_tsize)) {
 		newend = MIN(entry->end,
 		    (vm_offset_t)vm1->vm_taddr + ctob(vm1->vm_tsize));
 		vm2->vm_tsize += btoc(newend - entry->start);
 	}
 }
 
 /*
  * vmspace_fork:
  * Create a new process vmspace structure and vm_map
  * based on those of an existing process.  The new map
  * is based on the old map, according to the inheritance
  * values on the regions in that map.
  *
  * XXX It might be worth coalescing the entries added to the new vmspace.
  *
  * The source map must not be locked.
  */
 struct vmspace *
 vmspace_fork(struct vmspace *vm1, vm_ooffset_t *fork_charge)
 {
 	struct vmspace *vm2;
 	vm_map_t new_map, old_map;
 	vm_map_entry_t new_entry, old_entry;
 	vm_object_t object;
 	int error, locked __diagused;
 	vm_inherit_t inh;
 
 	old_map = &vm1->vm_map;
 	/* Copy immutable fields of vm1 to vm2. */
 	vm2 = vmspace_alloc(vm_map_min(old_map), vm_map_max(old_map),
 	    pmap_pinit);
 	if (vm2 == NULL)
 		return (NULL);
 
 	vm2->vm_taddr = vm1->vm_taddr;
 	vm2->vm_daddr = vm1->vm_daddr;
 	vm2->vm_maxsaddr = vm1->vm_maxsaddr;
 	vm2->vm_stacktop = vm1->vm_stacktop;
+	vm2->vm_shp_base = vm1->vm_shp_base;
 	vm_map_lock(old_map);
 	if (old_map->busy)
 		vm_map_wait_busy(old_map);
 	new_map = &vm2->vm_map;
 	locked = vm_map_trylock(new_map); /* trylock to silence WITNESS */
 	KASSERT(locked, ("vmspace_fork: lock failed"));
 
 	error = pmap_vmspace_copy(new_map->pmap, old_map->pmap);
 	if (error != 0) {
 		sx_xunlock(&old_map->lock);
 		sx_xunlock(&new_map->lock);
 		vm_map_process_deferred();
 		vmspace_free(vm2);
 		return (NULL);
 	}
 
 	new_map->anon_loc = old_map->anon_loc;
 	new_map->flags |= old_map->flags & (MAP_ASLR | MAP_ASLR_IGNSTART |
 	    MAP_ASLR_STACK | MAP_WXORX);
 
 	VM_MAP_ENTRY_FOREACH(old_entry, old_map) {
 		if ((old_entry->eflags & MAP_ENTRY_IS_SUB_MAP) != 0)
 			panic("vm_map_fork: encountered a submap");
 
 		inh = old_entry->inheritance;
 		if ((old_entry->eflags & MAP_ENTRY_GUARD) != 0 &&
 		    inh != VM_INHERIT_NONE)
 			inh = VM_INHERIT_COPY;
 
 		switch (inh) {
 		case VM_INHERIT_NONE:
 			break;
 
 		case VM_INHERIT_SHARE:
 			/*
 			 * Clone the entry, creating the shared object if
 			 * necessary.
 			 */
 			object = old_entry->object.vm_object;
 			if (object == NULL) {
 				vm_map_entry_back(old_entry);
 				object = old_entry->object.vm_object;
 			}
 
 			/*
 			 * Add the reference before calling vm_object_shadow
 			 * to insure that a shadow object is created.
 			 */
 			vm_object_reference(object);
 			if (old_entry->eflags & MAP_ENTRY_NEEDS_COPY) {
 				vm_object_shadow(&old_entry->object.vm_object,
 				    &old_entry->offset,
 				    old_entry->end - old_entry->start,
 				    old_entry->cred,
 				    /* Transfer the second reference too. */
 				    true);
 				old_entry->eflags &= ~MAP_ENTRY_NEEDS_COPY;
 				old_entry->cred = NULL;
 
 				/*
 				 * As in vm_map_merged_neighbor_dispose(),
 				 * the vnode lock will not be acquired in
 				 * this call to vm_object_deallocate().
 				 */
 				vm_object_deallocate(object);
 				object = old_entry->object.vm_object;
 			} else {
 				VM_OBJECT_WLOCK(object);
 				vm_object_clear_flag(object, OBJ_ONEMAPPING);
 				if (old_entry->cred != NULL) {
 					KASSERT(object->cred == NULL,
 					    ("vmspace_fork both cred"));
 					object->cred = old_entry->cred;
 					object->charge = old_entry->end -
 					    old_entry->start;
 					old_entry->cred = NULL;
 				}
 
 				/*
 				 * Assert the correct state of the vnode
 				 * v_writecount while the object is locked, to
 				 * not relock it later for the assertion
 				 * correctness.
 				 */
 				if (old_entry->eflags & MAP_ENTRY_WRITECNT &&
 				    object->type == OBJT_VNODE) {
 					KASSERT(((struct vnode *)object->
 					    handle)->v_writecount > 0,
 					    ("vmspace_fork: v_writecount %p",
 					    object));
 					KASSERT(object->un_pager.vnp.
 					    writemappings > 0,
 					    ("vmspace_fork: vnp.writecount %p",
 					    object));
 				}
 				VM_OBJECT_WUNLOCK(object);
 			}
 
 			/*
 			 * Clone the entry, referencing the shared object.
 			 */
 			new_entry = vm_map_entry_create(new_map);
 			*new_entry = *old_entry;
 			new_entry->eflags &= ~(MAP_ENTRY_USER_WIRED |
 			    MAP_ENTRY_IN_TRANSITION);
 			new_entry->wiring_thread = NULL;
 			new_entry->wired_count = 0;
 			if (new_entry->eflags & MAP_ENTRY_WRITECNT) {
 				vm_pager_update_writecount(object,
 				    new_entry->start, new_entry->end);
 			}
 			vm_map_entry_set_vnode_text(new_entry, true);
 
 			/*
 			 * Insert the entry into the new map -- we know we're
 			 * inserting at the end of the new map.
 			 */
 			vm_map_entry_link(new_map, new_entry);
 			vmspace_map_entry_forked(vm1, vm2, new_entry);
 
 			/*
 			 * Update the physical map
 			 */
 			pmap_copy(new_map->pmap, old_map->pmap,
 			    new_entry->start,
 			    (old_entry->end - old_entry->start),
 			    old_entry->start);
 			break;
 
 		case VM_INHERIT_COPY:
 			/*
 			 * Clone the entry and link into the map.
 			 */
 			new_entry = vm_map_entry_create(new_map);
 			*new_entry = *old_entry;
 			/*
 			 * Copied entry is COW over the old object.
 			 */
 			new_entry->eflags &= ~(MAP_ENTRY_USER_WIRED |
 			    MAP_ENTRY_IN_TRANSITION | MAP_ENTRY_WRITECNT);
 			new_entry->wiring_thread = NULL;
 			new_entry->wired_count = 0;
 			new_entry->object.vm_object = NULL;
 			new_entry->cred = NULL;
 			vm_map_entry_link(new_map, new_entry);
 			vmspace_map_entry_forked(vm1, vm2, new_entry);
 			vm_map_copy_entry(old_map, new_map, old_entry,
 			    new_entry, fork_charge);
 			vm_map_entry_set_vnode_text(new_entry, true);
 			break;
 
 		case VM_INHERIT_ZERO:
 			/*
 			 * Create a new anonymous mapping entry modelled from
 			 * the old one.
 			 */
 			new_entry = vm_map_entry_create(new_map);
 			memset(new_entry, 0, sizeof(*new_entry));
 
 			new_entry->start = old_entry->start;
 			new_entry->end = old_entry->end;
 			new_entry->eflags = old_entry->eflags &
 			    ~(MAP_ENTRY_USER_WIRED | MAP_ENTRY_IN_TRANSITION |
 			    MAP_ENTRY_WRITECNT | MAP_ENTRY_VN_EXEC |
 			    MAP_ENTRY_SPLIT_BOUNDARY_MASK);
 			new_entry->protection = old_entry->protection;
 			new_entry->max_protection = old_entry->max_protection;
 			new_entry->inheritance = VM_INHERIT_ZERO;
 
 			vm_map_entry_link(new_map, new_entry);
 			vmspace_map_entry_forked(vm1, vm2, new_entry);
 
 			new_entry->cred = curthread->td_ucred;
 			crhold(new_entry->cred);
 			*fork_charge += (new_entry->end - new_entry->start);
 
 			break;
 		}
 	}
 	/*
 	 * Use inlined vm_map_unlock() to postpone handling the deferred
 	 * map entries, which cannot be done until both old_map and
 	 * new_map locks are released.
 	 */
 	sx_xunlock(&old_map->lock);
 	sx_xunlock(&new_map->lock);
 	vm_map_process_deferred();
 
 	return (vm2);
 }
 
 /*
  * Create a process's stack for exec_new_vmspace().  This function is never
  * asked to wire the newly created stack.
  */
 int
 vm_map_stack(vm_map_t map, vm_offset_t addrbos, vm_size_t max_ssize,
     vm_prot_t prot, vm_prot_t max, int cow)
 {
 	vm_size_t growsize, init_ssize;
 	rlim_t vmemlim;
 	int rv;
 
 	MPASS((map->flags & MAP_WIREFUTURE) == 0);
 	growsize = sgrowsiz;
 	init_ssize = (max_ssize < growsize) ? max_ssize : growsize;
 	vm_map_lock(map);
 	vmemlim = lim_cur(curthread, RLIMIT_VMEM);
 	/* If we would blow our VMEM resource limit, no go */
 	if (map->size + init_ssize > vmemlim) {
 		rv = KERN_NO_SPACE;
 		goto out;
 	}
 	rv = vm_map_stack_locked(map, addrbos, max_ssize, growsize, prot,
 	    max, cow);
 out:
 	vm_map_unlock(map);
 	return (rv);
 }
 
 static int stack_guard_page = 1;
 SYSCTL_INT(_security_bsd, OID_AUTO, stack_guard_page, CTLFLAG_RWTUN,
     &stack_guard_page, 0,
     "Specifies the number of guard pages for a stack that grows");
 
 static int
 vm_map_stack_locked(vm_map_t map, vm_offset_t addrbos, vm_size_t max_ssize,
     vm_size_t growsize, vm_prot_t prot, vm_prot_t max, int cow)
 {
 	vm_map_entry_t new_entry, prev_entry;
 	vm_offset_t bot, gap_bot, gap_top, top;
 	vm_size_t init_ssize, sgp;
 	int orient, rv;
 
 	/*
 	 * The stack orientation is piggybacked with the cow argument.
 	 * Extract it into orient and mask the cow argument so that we
 	 * don't pass it around further.
 	 */
 	orient = cow & (MAP_STACK_GROWS_DOWN | MAP_STACK_GROWS_UP);
 	KASSERT(orient != 0, ("No stack grow direction"));
 	KASSERT(orient != (MAP_STACK_GROWS_DOWN | MAP_STACK_GROWS_UP),
 	    ("bi-dir stack"));
 
 	if (max_ssize == 0 ||
 	    !vm_map_range_valid(map, addrbos, addrbos + max_ssize))
 		return (KERN_INVALID_ADDRESS);
 	sgp = ((curproc->p_flag2 & P2_STKGAP_DISABLE) != 0 ||
 	    (curproc->p_fctl0 & NT_FREEBSD_FCTL_STKGAP_DISABLE) != 0) ? 0 :
 	    (vm_size_t)stack_guard_page * PAGE_SIZE;
 	if (sgp >= max_ssize)
 		return (KERN_INVALID_ARGUMENT);
 
 	init_ssize = growsize;
 	if (max_ssize < init_ssize + sgp)
 		init_ssize = max_ssize - sgp;
 
 	/* If addr is already mapped, no go */
 	if (vm_map_lookup_entry(map, addrbos, &prev_entry))
 		return (KERN_NO_SPACE);
 
 	/*
 	 * If we can't accommodate max_ssize in the current mapping, no go.
 	 */
 	if (vm_map_entry_succ(prev_entry)->start < addrbos + max_ssize)
 		return (KERN_NO_SPACE);
 
 	/*
 	 * We initially map a stack of only init_ssize.  We will grow as
 	 * needed later.  Depending on the orientation of the stack (i.e.
 	 * the grow direction) we either map at the top of the range, the
 	 * bottom of the range or in the middle.
 	 *
 	 * Note: we would normally expect prot and max to be VM_PROT_ALL,
 	 * and cow to be 0.  Possibly we should eliminate these as input
 	 * parameters, and just pass these values here in the insert call.
 	 */
 	if (orient == MAP_STACK_GROWS_DOWN) {
 		bot = addrbos + max_ssize - init_ssize;
 		top = bot + init_ssize;
 		gap_bot = addrbos;
 		gap_top = bot;
 	} else /* if (orient == MAP_STACK_GROWS_UP) */ {
 		bot = addrbos;
 		top = bot + init_ssize;
 		gap_bot = top;
 		gap_top = addrbos + max_ssize;
 	}
 	rv = vm_map_insert(map, NULL, 0, bot, top, prot, max, cow);
 	if (rv != KERN_SUCCESS)
 		return (rv);
 	new_entry = vm_map_entry_succ(prev_entry);
 	KASSERT(new_entry->end == top || new_entry->start == bot,
 	    ("Bad entry start/end for new stack entry"));
 	KASSERT((orient & MAP_STACK_GROWS_DOWN) == 0 ||
 	    (new_entry->eflags & MAP_ENTRY_GROWS_DOWN) != 0,
 	    ("new entry lacks MAP_ENTRY_GROWS_DOWN"));
 	KASSERT((orient & MAP_STACK_GROWS_UP) == 0 ||
 	    (new_entry->eflags & MAP_ENTRY_GROWS_UP) != 0,
 	    ("new entry lacks MAP_ENTRY_GROWS_UP"));
 	if (gap_bot == gap_top)
 		return (KERN_SUCCESS);
 	rv = vm_map_insert(map, NULL, 0, gap_bot, gap_top, VM_PROT_NONE,
 	    VM_PROT_NONE, MAP_CREATE_GUARD | (orient == MAP_STACK_GROWS_DOWN ?
 	    MAP_CREATE_STACK_GAP_DN : MAP_CREATE_STACK_GAP_UP));
 	if (rv == KERN_SUCCESS) {
 		/*
 		 * Gap can never successfully handle a fault, so
 		 * read-ahead logic is never used for it.  Re-use
 		 * next_read of the gap entry to store
 		 * stack_guard_page for vm_map_growstack().
 		 */
 		if (orient == MAP_STACK_GROWS_DOWN)
 			vm_map_entry_pred(new_entry)->next_read = sgp;
 		else
 			vm_map_entry_succ(new_entry)->next_read = sgp;
 	} else {
 		(void)vm_map_delete(map, bot, top);
 	}
 	return (rv);
 }
 
 /*
  * Attempts to grow a vm stack entry.  Returns KERN_SUCCESS if we
  * successfully grow the stack.
  */
 static int
 vm_map_growstack(vm_map_t map, vm_offset_t addr, vm_map_entry_t gap_entry)
 {
 	vm_map_entry_t stack_entry;
 	struct proc *p;
 	struct vmspace *vm;
 	struct ucred *cred;
 	vm_offset_t gap_end, gap_start, grow_start;
 	vm_size_t grow_amount, guard, max_grow;
 	rlim_t lmemlim, stacklim, vmemlim;
 	int rv, rv1 __diagused;
 	bool gap_deleted, grow_down, is_procstack;
 #ifdef notyet
 	uint64_t limit;
 #endif
 #ifdef RACCT
 	int error __diagused;
 #endif
 
 	p = curproc;
 	vm = p->p_vmspace;
 
 	/*
 	 * Disallow stack growth when the access is performed by a
 	 * debugger or AIO daemon.  The reason is that the wrong
 	 * resource limits are applied.
 	 */
 	if (p != initproc && (map != &p->p_vmspace->vm_map ||
 	    p->p_textvp == NULL))
 		return (KERN_FAILURE);
 
 	MPASS(!map->system_map);
 
 	lmemlim = lim_cur(curthread, RLIMIT_MEMLOCK);
 	stacklim = lim_cur(curthread, RLIMIT_STACK);
 	vmemlim = lim_cur(curthread, RLIMIT_VMEM);
 retry:
 	/* If addr is not in a hole for a stack grow area, no need to grow. */
 	if (gap_entry == NULL && !vm_map_lookup_entry(map, addr, &gap_entry))
 		return (KERN_FAILURE);
 	if ((gap_entry->eflags & MAP_ENTRY_GUARD) == 0)
 		return (KERN_SUCCESS);
 	if ((gap_entry->eflags & MAP_ENTRY_STACK_GAP_DN) != 0) {
 		stack_entry = vm_map_entry_succ(gap_entry);
 		if ((stack_entry->eflags & MAP_ENTRY_GROWS_DOWN) == 0 ||
 		    stack_entry->start != gap_entry->end)
 			return (KERN_FAILURE);
 		grow_amount = round_page(stack_entry->start - addr);
 		grow_down = true;
 	} else if ((gap_entry->eflags & MAP_ENTRY_STACK_GAP_UP) != 0) {
 		stack_entry = vm_map_entry_pred(gap_entry);
 		if ((stack_entry->eflags & MAP_ENTRY_GROWS_UP) == 0 ||
 		    stack_entry->end != gap_entry->start)
 			return (KERN_FAILURE);
 		grow_amount = round_page(addr + 1 - stack_entry->end);
 		grow_down = false;
 	} else {
 		return (KERN_FAILURE);
 	}
 	guard = ((curproc->p_flag2 & P2_STKGAP_DISABLE) != 0 ||
 	    (curproc->p_fctl0 & NT_FREEBSD_FCTL_STKGAP_DISABLE) != 0) ? 0 :
 	    gap_entry->next_read;
 	max_grow = gap_entry->end - gap_entry->start;
 	if (guard > max_grow)
 		return (KERN_NO_SPACE);
 	max_grow -= guard;
 	if (grow_amount > max_grow)
 		return (KERN_NO_SPACE);
 
 	/*
 	 * If this is the main process stack, see if we're over the stack
 	 * limit.
 	 */
 	is_procstack = addr >= (vm_offset_t)vm->vm_maxsaddr &&
 	    addr < (vm_offset_t)vm->vm_stacktop;
 	if (is_procstack && (ctob(vm->vm_ssize) + grow_amount > stacklim))
 		return (KERN_NO_SPACE);
 
 #ifdef RACCT
 	if (racct_enable) {
 		PROC_LOCK(p);
 		if (is_procstack && racct_set(p, RACCT_STACK,
 		    ctob(vm->vm_ssize) + grow_amount)) {
 			PROC_UNLOCK(p);
 			return (KERN_NO_SPACE);
 		}
 		PROC_UNLOCK(p);
 	}
 #endif
 
 	grow_amount = roundup(grow_amount, sgrowsiz);
 	if (grow_amount > max_grow)
 		grow_amount = max_grow;
 	if (is_procstack && (ctob(vm->vm_ssize) + grow_amount > stacklim)) {
 		grow_amount = trunc_page((vm_size_t)stacklim) -
 		    ctob(vm->vm_ssize);
 	}
 
 #ifdef notyet
 	PROC_LOCK(p);
 	limit = racct_get_available(p, RACCT_STACK);
 	PROC_UNLOCK(p);
 	if (is_procstack && (ctob(vm->vm_ssize) + grow_amount > limit))
 		grow_amount = limit - ctob(vm->vm_ssize);
 #endif
 
 	if (!old_mlock && (map->flags & MAP_WIREFUTURE) != 0) {
 		if (ptoa(pmap_wired_count(map->pmap)) + grow_amount > lmemlim) {
 			rv = KERN_NO_SPACE;
 			goto out;
 		}
 #ifdef RACCT
 		if (racct_enable) {
 			PROC_LOCK(p);
 			if (racct_set(p, RACCT_MEMLOCK,
 			    ptoa(pmap_wired_count(map->pmap)) + grow_amount)) {
 				PROC_UNLOCK(p);
 				rv = KERN_NO_SPACE;
 				goto out;
 			}
 			PROC_UNLOCK(p);
 		}
 #endif
 	}
 
 	/* If we would blow our VMEM resource limit, no go */
 	if (map->size + grow_amount > vmemlim) {
 		rv = KERN_NO_SPACE;
 		goto out;
 	}
 #ifdef RACCT
 	if (racct_enable) {
 		PROC_LOCK(p);
 		if (racct_set(p, RACCT_VMEM, map->size + grow_amount)) {
 			PROC_UNLOCK(p);
 			rv = KERN_NO_SPACE;
 			goto out;
 		}
 		PROC_UNLOCK(p);
 	}
 #endif
 
 	if (vm_map_lock_upgrade(map)) {
 		gap_entry = NULL;
 		vm_map_lock_read(map);
 		goto retry;
 	}
 
 	if (grow_down) {
 		grow_start = gap_entry->end - grow_amount;
 		if (gap_entry->start + grow_amount == gap_entry->end) {
 			gap_start = gap_entry->start;
 			gap_end = gap_entry->end;
 			vm_map_entry_delete(map, gap_entry);
 			gap_deleted = true;
 		} else {
 			MPASS(gap_entry->start < gap_entry->end - grow_amount);
 			vm_map_entry_resize(map, gap_entry, -grow_amount);
 			gap_deleted = false;
 		}
 		rv = vm_map_insert(map, NULL, 0, grow_start,
 		    grow_start + grow_amount,
 		    stack_entry->protection, stack_entry->max_protection,
 		    MAP_STACK_GROWS_DOWN);
 		if (rv != KERN_SUCCESS) {
 			if (gap_deleted) {
 				rv1 = vm_map_insert(map, NULL, 0, gap_start,
 				    gap_end, VM_PROT_NONE, VM_PROT_NONE,
 				    MAP_CREATE_GUARD | MAP_CREATE_STACK_GAP_DN);
 				MPASS(rv1 == KERN_SUCCESS);
 			} else
 				vm_map_entry_resize(map, gap_entry,
 				    grow_amount);
 		}
 	} else {
 		grow_start = stack_entry->end;
 		cred = stack_entry->cred;
 		if (cred == NULL && stack_entry->object.vm_object != NULL)
 			cred = stack_entry->object.vm_object->cred;
 		if (cred != NULL && !swap_reserve_by_cred(grow_amount, cred))
 			rv = KERN_NO_SPACE;
 		/* Grow the underlying object if applicable. */
 		else if (stack_entry->object.vm_object == NULL ||
 		    vm_object_coalesce(stack_entry->object.vm_object,
 		    stack_entry->offset,
 		    (vm_size_t)(stack_entry->end - stack_entry->start),
 		    grow_amount, cred != NULL)) {
 			if (gap_entry->start + grow_amount == gap_entry->end) {
 				vm_map_entry_delete(map, gap_entry);
 				vm_map_entry_resize(map, stack_entry,
 				    grow_amount);
 			} else {
 				gap_entry->start += grow_amount;
 				stack_entry->end += grow_amount;
 			}
 			map->size += grow_amount;
 			rv = KERN_SUCCESS;
 		} else
 			rv = KERN_FAILURE;
 	}
 	if (rv == KERN_SUCCESS && is_procstack)
 		vm->vm_ssize += btoc(grow_amount);
 
 	/*
 	 * Heed the MAP_WIREFUTURE flag if it was set for this process.
 	 */
 	if (rv == KERN_SUCCESS && (map->flags & MAP_WIREFUTURE) != 0) {
 		rv = vm_map_wire_locked(map, grow_start,
 		    grow_start + grow_amount,
 		    VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES);
 	}
 	vm_map_lock_downgrade(map);
 
 out:
 #ifdef RACCT
 	if (racct_enable && rv != KERN_SUCCESS) {
 		PROC_LOCK(p);
 		error = racct_set(p, RACCT_VMEM, map->size);
 		KASSERT(error == 0, ("decreasing RACCT_VMEM failed"));
 		if (!old_mlock) {
 			error = racct_set(p, RACCT_MEMLOCK,
 			    ptoa(pmap_wired_count(map->pmap)));
 			KASSERT(error == 0, ("decreasing RACCT_MEMLOCK failed"));
 		}
 	    	error = racct_set(p, RACCT_STACK, ctob(vm->vm_ssize));
 		KASSERT(error == 0, ("decreasing RACCT_STACK failed"));
 		PROC_UNLOCK(p);
 	}
 #endif
 
 	return (rv);
 }
 
 /*
  * Unshare the specified VM space for exec.  If other processes are
  * mapped to it, then create a new one.  The new vmspace is null.
  */
 int
 vmspace_exec(struct proc *p, vm_offset_t minuser, vm_offset_t maxuser)
 {
 	struct vmspace *oldvmspace = p->p_vmspace;
 	struct vmspace *newvmspace;
 
 	KASSERT((curthread->td_pflags & TDP_EXECVMSPC) == 0,
 	    ("vmspace_exec recursed"));
 	newvmspace = vmspace_alloc(minuser, maxuser, pmap_pinit);
 	if (newvmspace == NULL)
 		return (ENOMEM);
 	newvmspace->vm_swrss = oldvmspace->vm_swrss;
 	/*
 	 * This code is written like this for prototype purposes.  The
 	 * goal is to avoid running down the vmspace here, but let the
 	 * other process's that are still using the vmspace to finally
 	 * run it down.  Even though there is little or no chance of blocking
 	 * here, it is a good idea to keep this form for future mods.
 	 */
 	PROC_VMSPACE_LOCK(p);
 	p->p_vmspace = newvmspace;
 	PROC_VMSPACE_UNLOCK(p);
 	if (p == curthread->td_proc)
 		pmap_activate(curthread);
 	curthread->td_pflags |= TDP_EXECVMSPC;
 	return (0);
 }
 
 /*
  * Unshare the specified VM space for forcing COW.  This
  * is called by rfork, for the (RFMEM|RFPROC) == 0 case.
  */
 int
 vmspace_unshare(struct proc *p)
 {
 	struct vmspace *oldvmspace = p->p_vmspace;
 	struct vmspace *newvmspace;
 	vm_ooffset_t fork_charge;
 
 	/*
 	 * The caller is responsible for ensuring that the reference count
 	 * cannot concurrently transition 1 -> 2.
 	 */
 	if (refcount_load(&oldvmspace->vm_refcnt) == 1)
 		return (0);
 	fork_charge = 0;
 	newvmspace = vmspace_fork(oldvmspace, &fork_charge);
 	if (newvmspace == NULL)
 		return (ENOMEM);
 	if (!swap_reserve_by_cred(fork_charge, p->p_ucred)) {
 		vmspace_free(newvmspace);
 		return (ENOMEM);
 	}
 	PROC_VMSPACE_LOCK(p);
 	p->p_vmspace = newvmspace;
 	PROC_VMSPACE_UNLOCK(p);
 	if (p == curthread->td_proc)
 		pmap_activate(curthread);
 	vmspace_free(oldvmspace);
 	return (0);
 }
 
 /*
  *	vm_map_lookup:
  *
  *	Finds the VM object, offset, and
  *	protection for a given virtual address in the
  *	specified map, assuming a page fault of the
  *	type specified.
  *
  *	Leaves the map in question locked for read; return
  *	values are guaranteed until a vm_map_lookup_done
  *	call is performed.  Note that the map argument
  *	is in/out; the returned map must be used in
  *	the call to vm_map_lookup_done.
  *
  *	A handle (out_entry) is returned for use in
  *	vm_map_lookup_done, to make that fast.
  *
  *	If a lookup is requested with "write protection"
  *	specified, the map may be changed to perform virtual
  *	copying operations, although the data referenced will
  *	remain the same.
  */
 int
 vm_map_lookup(vm_map_t *var_map,		/* IN/OUT */
 	      vm_offset_t vaddr,
 	      vm_prot_t fault_typea,
 	      vm_map_entry_t *out_entry,	/* OUT */
 	      vm_object_t *object,		/* OUT */
 	      vm_pindex_t *pindex,		/* OUT */
 	      vm_prot_t *out_prot,		/* OUT */
 	      boolean_t *wired)			/* OUT */
 {
 	vm_map_entry_t entry;
 	vm_map_t map = *var_map;
 	vm_prot_t prot;
 	vm_prot_t fault_type;
 	vm_object_t eobject;
 	vm_size_t size;
 	struct ucred *cred;
 
 RetryLookup:
 
 	vm_map_lock_read(map);
 
 RetryLookupLocked:
 	/*
 	 * Lookup the faulting address.
 	 */
 	if (!vm_map_lookup_entry(map, vaddr, out_entry)) {
 		vm_map_unlock_read(map);
 		return (KERN_INVALID_ADDRESS);
 	}
 
 	entry = *out_entry;
 
 	/*
 	 * Handle submaps.
 	 */
 	if (entry->eflags & MAP_ENTRY_IS_SUB_MAP) {
 		vm_map_t old_map = map;
 
 		*var_map = map = entry->object.sub_map;
 		vm_map_unlock_read(old_map);
 		goto RetryLookup;
 	}
 
 	/*
 	 * Check whether this task is allowed to have this page.
 	 */
 	prot = entry->protection;
 	if ((fault_typea & VM_PROT_FAULT_LOOKUP) != 0) {
 		fault_typea &= ~VM_PROT_FAULT_LOOKUP;
 		if (prot == VM_PROT_NONE && map != kernel_map &&
 		    (entry->eflags & MAP_ENTRY_GUARD) != 0 &&
 		    (entry->eflags & (MAP_ENTRY_STACK_GAP_DN |
 		    MAP_ENTRY_STACK_GAP_UP)) != 0 &&
 		    vm_map_growstack(map, vaddr, entry) == KERN_SUCCESS)
 			goto RetryLookupLocked;
 	}
 	fault_type = fault_typea & VM_PROT_ALL;
 	if ((fault_type & prot) != fault_type || prot == VM_PROT_NONE) {
 		vm_map_unlock_read(map);
 		return (KERN_PROTECTION_FAILURE);
 	}
 	KASSERT((prot & VM_PROT_WRITE) == 0 || (entry->eflags &
 	    (MAP_ENTRY_USER_WIRED | MAP_ENTRY_NEEDS_COPY)) !=
 	    (MAP_ENTRY_USER_WIRED | MAP_ENTRY_NEEDS_COPY),
 	    ("entry %p flags %x", entry, entry->eflags));
 	if ((fault_typea & VM_PROT_COPY) != 0 &&
 	    (entry->max_protection & VM_PROT_WRITE) == 0 &&
 	    (entry->eflags & MAP_ENTRY_COW) == 0) {
 		vm_map_unlock_read(map);
 		return (KERN_PROTECTION_FAILURE);
 	}
 
 	/*
 	 * If this page is not pageable, we have to get it for all possible
 	 * accesses.
 	 */
 	*wired = (entry->wired_count != 0);
 	if (*wired)
 		fault_type = entry->protection;
 	size = entry->end - entry->start;
 
 	/*
 	 * If the entry was copy-on-write, we either ...
 	 */
 	if (entry->eflags & MAP_ENTRY_NEEDS_COPY) {
 		/*
 		 * If we want to write the page, we may as well handle that
 		 * now since we've got the map locked.
 		 *
 		 * If we don't need to write the page, we just demote the
 		 * permissions allowed.
 		 */
 		if ((fault_type & VM_PROT_WRITE) != 0 ||
 		    (fault_typea & VM_PROT_COPY) != 0) {
 			/*
 			 * Make a new object, and place it in the object
 			 * chain.  Note that no new references have appeared
 			 * -- one just moved from the map to the new
 			 * object.
 			 */
 			if (vm_map_lock_upgrade(map))
 				goto RetryLookup;
 
 			if (entry->cred == NULL) {
 				/*
 				 * The debugger owner is charged for
 				 * the memory.
 				 */
 				cred = curthread->td_ucred;
 				crhold(cred);
 				if (!swap_reserve_by_cred(size, cred)) {
 					crfree(cred);
 					vm_map_unlock(map);
 					return (KERN_RESOURCE_SHORTAGE);
 				}
 				entry->cred = cred;
 			}
 			eobject = entry->object.vm_object;
 			vm_object_shadow(&entry->object.vm_object,
 			    &entry->offset, size, entry->cred, false);
 			if (eobject == entry->object.vm_object) {
 				/*
 				 * The object was not shadowed.
 				 */
 				swap_release_by_cred(size, entry->cred);
 				crfree(entry->cred);
 			}
 			entry->cred = NULL;
 			entry->eflags &= ~MAP_ENTRY_NEEDS_COPY;
 
 			vm_map_lock_downgrade(map);
 		} else {
 			/*
 			 * We're attempting to read a copy-on-write page --
 			 * don't allow writes.
 			 */
 			prot &= ~VM_PROT_WRITE;
 		}
 	}
 
 	/*
 	 * Create an object if necessary.
 	 */
 	if (entry->object.vm_object == NULL && !map->system_map) {
 		if (vm_map_lock_upgrade(map))
 			goto RetryLookup;
 		entry->object.vm_object = vm_object_allocate_anon(atop(size),
 		    NULL, entry->cred, size);
 		entry->offset = 0;
 		entry->cred = NULL;
 		vm_map_lock_downgrade(map);
 	}
 
 	/*
 	 * Return the object/offset from this entry.  If the entry was
 	 * copy-on-write or empty, it has been fixed up.
 	 */
 	*pindex = OFF_TO_IDX((vaddr - entry->start) + entry->offset);
 	*object = entry->object.vm_object;
 
 	*out_prot = prot;
 	return (KERN_SUCCESS);
 }
 
 /*
  *	vm_map_lookup_locked:
  *
  *	Lookup the faulting address.  A version of vm_map_lookup that returns 
  *      KERN_FAILURE instead of blocking on map lock or memory allocation.
  */
 int
 vm_map_lookup_locked(vm_map_t *var_map,		/* IN/OUT */
 		     vm_offset_t vaddr,
 		     vm_prot_t fault_typea,
 		     vm_map_entry_t *out_entry,	/* OUT */
 		     vm_object_t *object,	/* OUT */
 		     vm_pindex_t *pindex,	/* OUT */
 		     vm_prot_t *out_prot,	/* OUT */
 		     boolean_t *wired)		/* OUT */
 {
 	vm_map_entry_t entry;
 	vm_map_t map = *var_map;
 	vm_prot_t prot;
 	vm_prot_t fault_type = fault_typea;
 
 	/*
 	 * Lookup the faulting address.
 	 */
 	if (!vm_map_lookup_entry(map, vaddr, out_entry))
 		return (KERN_INVALID_ADDRESS);
 
 	entry = *out_entry;
 
 	/*
 	 * Fail if the entry refers to a submap.
 	 */
 	if (entry->eflags & MAP_ENTRY_IS_SUB_MAP)
 		return (KERN_FAILURE);
 
 	/*
 	 * Check whether this task is allowed to have this page.
 	 */
 	prot = entry->protection;
 	fault_type &= VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE;
 	if ((fault_type & prot) != fault_type)
 		return (KERN_PROTECTION_FAILURE);
 
 	/*
 	 * If this page is not pageable, we have to get it for all possible
 	 * accesses.
 	 */
 	*wired = (entry->wired_count != 0);
 	if (*wired)
 		fault_type = entry->protection;
 
 	if (entry->eflags & MAP_ENTRY_NEEDS_COPY) {
 		/*
 		 * Fail if the entry was copy-on-write for a write fault.
 		 */
 		if (fault_type & VM_PROT_WRITE)
 			return (KERN_FAILURE);
 		/*
 		 * We're attempting to read a copy-on-write page --
 		 * don't allow writes.
 		 */
 		prot &= ~VM_PROT_WRITE;
 	}
 
 	/*
 	 * Fail if an object should be created.
 	 */
 	if (entry->object.vm_object == NULL && !map->system_map)
 		return (KERN_FAILURE);
 
 	/*
 	 * Return the object/offset from this entry.  If the entry was
 	 * copy-on-write or empty, it has been fixed up.
 	 */
 	*pindex = OFF_TO_IDX((vaddr - entry->start) + entry->offset);
 	*object = entry->object.vm_object;
 
 	*out_prot = prot;
 	return (KERN_SUCCESS);
 }
 
 /*
  *	vm_map_lookup_done:
  *
  *	Releases locks acquired by a vm_map_lookup
  *	(according to the handle returned by that lookup).
  */
 void
 vm_map_lookup_done(vm_map_t map, vm_map_entry_t entry)
 {
 	/*
 	 * Unlock the main-level map
 	 */
 	vm_map_unlock_read(map);
 }
 
 vm_offset_t
 vm_map_max_KBI(const struct vm_map *map)
 {
 
 	return (vm_map_max(map));
 }
 
 vm_offset_t
 vm_map_min_KBI(const struct vm_map *map)
 {
 
 	return (vm_map_min(map));
 }
 
 pmap_t
 vm_map_pmap_KBI(vm_map_t map)
 {
 
 	return (map->pmap);
 }
 
 bool
 vm_map_range_valid_KBI(vm_map_t map, vm_offset_t start, vm_offset_t end)
 {
 
 	return (vm_map_range_valid(map, start, end));
 }
 
 #ifdef INVARIANTS
 static void
 _vm_map_assert_consistent(vm_map_t map, int check)
 {
 	vm_map_entry_t entry, prev;
 	vm_map_entry_t cur, header, lbound, ubound;
 	vm_size_t max_left, max_right;
 
 #ifdef DIAGNOSTIC
 	++map->nupdates;
 #endif
 	if (enable_vmmap_check != check)
 		return;
 
 	header = prev = &map->header;
 	VM_MAP_ENTRY_FOREACH(entry, map) {
 		KASSERT(prev->end <= entry->start,
 		    ("map %p prev->end = %jx, start = %jx", map,
 		    (uintmax_t)prev->end, (uintmax_t)entry->start));
 		KASSERT(entry->start < entry->end,
 		    ("map %p start = %jx, end = %jx", map,
 		    (uintmax_t)entry->start, (uintmax_t)entry->end));
 		KASSERT(entry->left == header ||
 		    entry->left->start < entry->start,
 		    ("map %p left->start = %jx, start = %jx", map,
 		    (uintmax_t)entry->left->start, (uintmax_t)entry->start));
 		KASSERT(entry->right == header ||
 		    entry->start < entry->right->start,
 		    ("map %p start = %jx, right->start = %jx", map,
 		    (uintmax_t)entry->start, (uintmax_t)entry->right->start));
 		cur = map->root;
 		lbound = ubound = header;
 		for (;;) {
 			if (entry->start < cur->start) {
 				ubound = cur;
 				cur = cur->left;
 				KASSERT(cur != lbound,
 				    ("map %p cannot find %jx",
 				    map, (uintmax_t)entry->start));
 			} else if (cur->end <= entry->start) {
 				lbound = cur;
 				cur = cur->right;
 				KASSERT(cur != ubound,
 				    ("map %p cannot find %jx",
 				    map, (uintmax_t)entry->start));
 			} else {
 				KASSERT(cur == entry,
 				    ("map %p cannot find %jx",
 				    map, (uintmax_t)entry->start));
 				break;
 			}
 		}
 		max_left = vm_map_entry_max_free_left(entry, lbound);
 		max_right = vm_map_entry_max_free_right(entry, ubound);
 		KASSERT(entry->max_free == vm_size_max(max_left, max_right),
 		    ("map %p max = %jx, max_left = %jx, max_right = %jx", map,
 		    (uintmax_t)entry->max_free,
 		    (uintmax_t)max_left, (uintmax_t)max_right));
 		prev = entry;
 	}
 	KASSERT(prev->end <= entry->start,
 	    ("map %p prev->end = %jx, start = %jx", map,
 	    (uintmax_t)prev->end, (uintmax_t)entry->start));
 }
 #endif
 
 #include "opt_ddb.h"
 #ifdef DDB
 #include <sys/kernel.h>
 
 #include <ddb/ddb.h>
 
 static void
 vm_map_print(vm_map_t map)
 {
 	vm_map_entry_t entry, prev;
 
 	db_iprintf("Task map %p: pmap=%p, nentries=%d, version=%u\n",
 	    (void *)map,
 	    (void *)map->pmap, map->nentries, map->timestamp);
 
 	db_indent += 2;
 	prev = &map->header;
 	VM_MAP_ENTRY_FOREACH(entry, map) {
 		db_iprintf("map entry %p: start=%p, end=%p, eflags=%#x, \n",
 		    (void *)entry, (void *)entry->start, (void *)entry->end,
 		    entry->eflags);
 		{
 			static const char * const inheritance_name[4] =
 			{"share", "copy", "none", "donate_copy"};
 
 			db_iprintf(" prot=%x/%x/%s",
 			    entry->protection,
 			    entry->max_protection,
 			    inheritance_name[(int)(unsigned char)
 			    entry->inheritance]);
 			if (entry->wired_count != 0)
 				db_printf(", wired");
 		}
 		if (entry->eflags & MAP_ENTRY_IS_SUB_MAP) {
 			db_printf(", share=%p, offset=0x%jx\n",
 			    (void *)entry->object.sub_map,
 			    (uintmax_t)entry->offset);
 			if (prev == &map->header ||
 			    prev->object.sub_map !=
 				entry->object.sub_map) {
 				db_indent += 2;
 				vm_map_print((vm_map_t)entry->object.sub_map);
 				db_indent -= 2;
 			}
 		} else {
 			if (entry->cred != NULL)
 				db_printf(", ruid %d", entry->cred->cr_ruid);
 			db_printf(", object=%p, offset=0x%jx",
 			    (void *)entry->object.vm_object,
 			    (uintmax_t)entry->offset);
 			if (entry->object.vm_object && entry->object.vm_object->cred)
 				db_printf(", obj ruid %d charge %jx",
 				    entry->object.vm_object->cred->cr_ruid,
 				    (uintmax_t)entry->object.vm_object->charge);
 			if (entry->eflags & MAP_ENTRY_COW)
 				db_printf(", copy (%s)",
 				    (entry->eflags & MAP_ENTRY_NEEDS_COPY) ? "needed" : "done");
 			db_printf("\n");
 
 			if (prev == &map->header ||
 			    prev->object.vm_object !=
 				entry->object.vm_object) {
 				db_indent += 2;
 				vm_object_print((db_expr_t)(intptr_t)
 						entry->object.vm_object,
 						0, 0, (char *)0);
 				db_indent -= 2;
 			}
 		}
 		prev = entry;
 	}
 	db_indent -= 2;
 }
 
 DB_SHOW_COMMAND(map, map)
 {
 
 	if (!have_addr) {
 		db_printf("usage: show map <addr>\n");
 		return;
 	}
 	vm_map_print((vm_map_t)addr);
 }
 
 DB_SHOW_COMMAND(procvm, procvm)
 {
 	struct proc *p;
 
 	if (have_addr) {
 		p = db_lookup_proc(addr);
 	} else {
 		p = curproc;
 	}
 
 	db_printf("p = %p, vmspace = %p, map = %p, pmap = %p\n",
 	    (void *)p, (void *)p->p_vmspace, (void *)&p->p_vmspace->vm_map,
 	    (void *)vmspace_pmap(p->p_vmspace));
 
 	vm_map_print((vm_map_t)&p->p_vmspace->vm_map);
 }
 
 #endif /* DDB */
diff --git a/sys/vm/vm_map.h b/sys/vm/vm_map.h
index 8f318b34e601..2ac54a39a57b 100644
--- a/sys/vm/vm_map.h
+++ b/sys/vm/vm_map.h
@@ -1,537 +1,538 @@
 /*-
  * SPDX-License-Identifier: (BSD-3-Clause AND MIT-CMU)
  *
  * Copyright (c) 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * The Mach Operating System project at Carnegie-Mellon University.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)vm_map.h	8.9 (Berkeley) 5/17/95
  *
  *
  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
  * All rights reserved.
  *
  * Authors: Avadis Tevanian, Jr., Michael Wayne Young
  *
  * Permission to use, copy, modify and distribute this software and
  * its documentation is hereby granted, provided that both the copyright
  * notice and this permission notice appear in all copies of the
  * software, derivative works or modified versions, and any portions
  * thereof, and that both notices appear in supporting documentation.
  *
  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  *
  * Carnegie Mellon requests users of this software to return to
  *
  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  *  School of Computer Science
  *  Carnegie Mellon University
  *  Pittsburgh PA 15213-3890
  *
  * any improvements or extensions that they make and grant Carnegie the
  * rights to redistribute these changes.
  *
  * $FreeBSD$
  */
 
 /*
  *	Virtual memory map module definitions.
  */
 #ifndef	_VM_MAP_
 #define	_VM_MAP_
 
 #include <sys/lock.h>
 #include <sys/sx.h>
 #include <sys/_mutex.h>
 
 /*
  *	Types defined:
  *
  *	vm_map_t		the high-level address map data structure.
  *	vm_map_entry_t		an entry in an address map.
  */
 
 typedef u_char vm_flags_t;
 typedef u_int vm_eflags_t;
 
 /*
  *	Objects which live in maps may be either VM objects, or
  *	another map (called a "sharing map") which denotes read-write
  *	sharing with other maps.
  */
 union vm_map_object {
 	struct vm_object *vm_object;	/* object object */
 	struct vm_map *sub_map;		/* belongs to another map */
 };
 
 /*
  *	Address map entries consist of start and end addresses,
  *	a VM object (or sharing map) and offset into that object,
  *	and user-exported inheritance and protection information.
  *	Also included is control information for virtual copy operations.
  */
 struct vm_map_entry {
 	struct vm_map_entry *left;	/* left child or previous entry */
 	struct vm_map_entry *right;	/* right child or next entry */
 	vm_offset_t start;		/* start address */
 	vm_offset_t end;		/* end address */
 	vm_offset_t next_read;		/* vaddr of the next sequential read */
 	vm_size_t max_free;		/* max free space in subtree */
 	union vm_map_object object;	/* object I point to */
 	vm_ooffset_t offset;		/* offset into object */
 	vm_eflags_t eflags;		/* map entry flags */
 	vm_prot_t protection;		/* protection code */
 	vm_prot_t max_protection;	/* maximum protection */
 	vm_inherit_t inheritance;	/* inheritance */
 	uint8_t read_ahead;		/* pages in the read-ahead window */
 	int wired_count;		/* can be paged if = 0 */
 	struct ucred *cred;		/* tmp storage for creator ref */
 	struct thread *wiring_thread;
 };
 
 #define	MAP_ENTRY_NOSYNC		0x00000001
 #define	MAP_ENTRY_IS_SUB_MAP		0x00000002
 #define	MAP_ENTRY_COW			0x00000004
 #define	MAP_ENTRY_NEEDS_COPY		0x00000008
 #define	MAP_ENTRY_NOFAULT		0x00000010
 #define	MAP_ENTRY_USER_WIRED		0x00000020
 
 #define	MAP_ENTRY_BEHAV_NORMAL		0x00000000	/* default behavior */
 #define	MAP_ENTRY_BEHAV_SEQUENTIAL	0x00000040	/* expect sequential
 							   access */
 #define	MAP_ENTRY_BEHAV_RANDOM		0x00000080	/* expect random
 							   access */
 #define	MAP_ENTRY_BEHAV_RESERVED	0x000000c0	/* future use */
 #define	MAP_ENTRY_BEHAV_MASK		0x000000c0
 #define	MAP_ENTRY_IN_TRANSITION		0x00000100	/* entry being
 							   changed */
 #define	MAP_ENTRY_NEEDS_WAKEUP		0x00000200	/* waiters in
 							   transition */
 #define	MAP_ENTRY_NOCOREDUMP		0x00000400	/* don't include in
 							   a core */
 #define	MAP_ENTRY_VN_EXEC		0x00000800	/* text vnode mapping */
 #define	MAP_ENTRY_GROWS_DOWN		0x00001000	/* top-down stacks */
 #define	MAP_ENTRY_GROWS_UP		0x00002000	/* bottom-up stacks */
 
 #define	MAP_ENTRY_WIRE_SKIPPED		0x00004000
 #define	MAP_ENTRY_WRITECNT		0x00008000	/* tracked writeable
 							   mapping */
 #define	MAP_ENTRY_GUARD			0x00010000
 #define	MAP_ENTRY_STACK_GAP_DN		0x00020000
 #define	MAP_ENTRY_STACK_GAP_UP		0x00040000
 #define	MAP_ENTRY_HEADER		0x00080000
 
 #define	MAP_ENTRY_SPLIT_BOUNDARY_MASK	0x00300000
 
 #define	MAP_ENTRY_SPLIT_BOUNDARY_SHIFT	20
 
 #ifdef	_KERNEL
 static __inline u_char
 vm_map_entry_behavior(vm_map_entry_t entry)
 {
 	return (entry->eflags & MAP_ENTRY_BEHAV_MASK);
 }
 
 static __inline int
 vm_map_entry_user_wired_count(vm_map_entry_t entry)
 {
 	if (entry->eflags & MAP_ENTRY_USER_WIRED)
 		return (1);
 	return (0);
 }
 
 static __inline int
 vm_map_entry_system_wired_count(vm_map_entry_t entry)
 {
 	return (entry->wired_count - vm_map_entry_user_wired_count(entry));
 }
 #endif	/* _KERNEL */
 
 /*
  *	A map is a set of map entries.  These map entries are
  *	organized as a threaded binary search tree.  Both structures
  *	are ordered based upon the start and end addresses contained
  *	within each map entry.  The largest gap between an entry in a
  *	subtree and one of its neighbors is saved in the max_free
  *	field, and that field is updated when the tree is
  *	restructured.
  *
  *	Sleator and Tarjan's top-down splay algorithm is employed to
  *	control height imbalance in the binary search tree.
  *
  *	The map's min offset value is stored in map->header.end, and
  *	its max offset value is stored in map->header.start.  These
  *	values act as sentinels for any forward or backward address
  *	scan of the list.  The right and left fields of the map
  *	header point to the first and list map entries.  The map
  *	header has a special value for the eflags field,
  *	MAP_ENTRY_HEADER, that is set initially, is never changed,
  *	and prevents an eflags match of the header with any other map
  *	entry.
  *
  *	List of locks
  *	(c)	const until freed
  */
 struct vm_map {
 	struct vm_map_entry header;	/* List of entries */
 	struct sx lock;			/* Lock for map data */
 	struct mtx system_mtx;
 	int nentries;			/* Number of entries */
 	vm_size_t size;			/* virtual size */
 	u_int timestamp;		/* Version number */
 	u_char needs_wakeup;
 	u_char system_map;		/* (c) Am I a system map? */
 	vm_flags_t flags;		/* flags for this vm_map */
 	vm_map_entry_t root;		/* Root of a binary search tree */
 	pmap_t pmap;			/* (c) Physical map */
 	vm_offset_t anon_loc;
 	int busy;
 #ifdef DIAGNOSTIC
 	int nupdates;
 #endif
 };
 
 /*
  * vm_flags_t values
  */
 #define MAP_WIREFUTURE		0x01	/* wire all future pages */
 #define	MAP_BUSY_WAKEUP		0x02	/* thread(s) waiting on busy state */
 #define	MAP_IS_SUB_MAP		0x04	/* has parent */
 #define	MAP_ASLR		0x08	/* enabled ASLR */
 #define	MAP_ASLR_IGNSTART	0x10	/* ASLR ignores data segment */
 #define	MAP_REPLENISH		0x20	/* kmapent zone needs to be refilled */
 #define	MAP_WXORX		0x40	/* enforce W^X */
 #define	MAP_ASLR_STACK		0x80	/* stack location is randomized */
 
 #ifdef	_KERNEL
 #if defined(KLD_MODULE) && !defined(KLD_TIED)
 #define	vm_map_max(map)		vm_map_max_KBI((map))
 #define	vm_map_min(map)		vm_map_min_KBI((map))
 #define	vm_map_pmap(map)	vm_map_pmap_KBI((map))
 #define	vm_map_range_valid(map, start, end)	\
 	vm_map_range_valid_KBI((map), (start), (end))
 #else
 static __inline vm_offset_t
 vm_map_max(const struct vm_map *map)
 {
 
 	return (map->header.start);
 }
 
 static __inline vm_offset_t
 vm_map_min(const struct vm_map *map)
 {
 
 	return (map->header.end);
 }
 
 static __inline pmap_t
 vm_map_pmap(vm_map_t map)
 {
 	return (map->pmap);
 }
 
 static __inline void
 vm_map_modflags(vm_map_t map, vm_flags_t set, vm_flags_t clear)
 {
 	map->flags = (map->flags | set) & ~clear;
 }
 
 static inline bool
 vm_map_range_valid(vm_map_t map, vm_offset_t start, vm_offset_t end)
 {
 	if (end < start)
 		return (false);
 	if (start < vm_map_min(map) || end > vm_map_max(map))
 		return (false);
 	return (true);
 }
 
 #endif	/* KLD_MODULE */
 #endif	/* _KERNEL */
 
 /*
  * Shareable process virtual address space.
  *
  * List of locks
  *	(c)	const until freed
  */
 struct vmspace {
 	struct vm_map vm_map;	/* VM address map */
 	struct shmmap_state *vm_shm;	/* SYS5 shared memory private data XXX */
 	segsz_t vm_swrss;	/* resident set size before last swap */
 	segsz_t vm_tsize;	/* text size (pages) XXX */
 	segsz_t vm_dsize;	/* data size (pages) XXX */
 	segsz_t vm_ssize;	/* stack size (pages) */
 	caddr_t vm_taddr;	/* (c) user virtual address of text */
 	caddr_t vm_daddr;	/* (c) user virtual address of data */
 	caddr_t vm_maxsaddr;	/* user VA at max stack growth */
 	vm_offset_t vm_stacktop; /* top of the stack, may not be page-aligned */
+	vm_offset_t vm_shp_base; /* shared page address */
 	u_int vm_refcnt;	/* number of references */
 	/*
 	 * Keep the PMAP last, so that CPU-specific variations of that
 	 * structure on a single architecture don't result in offset
 	 * variations of the machine-independent fields in the vmspace.
 	 */
 	struct pmap vm_pmap;	/* private physical map */
 };
 
 #ifdef	_KERNEL
 static __inline pmap_t
 vmspace_pmap(struct vmspace *vmspace)
 {
 	return &vmspace->vm_pmap;
 }
 #endif	/* _KERNEL */
 
 #ifdef	_KERNEL
 /*
  *	Macros:		vm_map_lock, etc.
  *	Function:
  *		Perform locking on the data portion of a map.  Note that
  *		these macros mimic procedure calls returning void.  The
  *		semicolon is supplied by the user of these macros, not
  *		by the macros themselves.  The macros can safely be used
  *		as unbraced elements in a higher level statement.
  */
 
 void _vm_map_lock(vm_map_t map, const char *file, int line);
 void _vm_map_unlock(vm_map_t map, const char *file, int line);
 int _vm_map_unlock_and_wait(vm_map_t map, int timo, const char *file, int line);
 void _vm_map_lock_read(vm_map_t map, const char *file, int line);
 void _vm_map_unlock_read(vm_map_t map, const char *file, int line);
 int _vm_map_trylock(vm_map_t map, const char *file, int line);
 int _vm_map_trylock_read(vm_map_t map, const char *file, int line);
 int _vm_map_lock_upgrade(vm_map_t map, const char *file, int line);
 void _vm_map_lock_downgrade(vm_map_t map, const char *file, int line);
 int vm_map_locked(vm_map_t map);
 void vm_map_wakeup(vm_map_t map);
 void vm_map_busy(vm_map_t map);
 void vm_map_unbusy(vm_map_t map);
 void vm_map_wait_busy(vm_map_t map);
 vm_offset_t vm_map_max_KBI(const struct vm_map *map);
 vm_offset_t vm_map_min_KBI(const struct vm_map *map);
 pmap_t vm_map_pmap_KBI(vm_map_t map);
 bool vm_map_range_valid_KBI(vm_map_t map, vm_offset_t start, vm_offset_t end);
 
 #define	vm_map_lock(map)	_vm_map_lock(map, LOCK_FILE, LOCK_LINE)
 #define	vm_map_unlock(map)	_vm_map_unlock(map, LOCK_FILE, LOCK_LINE)
 #define	vm_map_unlock_and_wait(map, timo)	\
 			_vm_map_unlock_and_wait(map, timo, LOCK_FILE, LOCK_LINE)
 #define	vm_map_lock_read(map)	_vm_map_lock_read(map, LOCK_FILE, LOCK_LINE)
 #define	vm_map_unlock_read(map)	_vm_map_unlock_read(map, LOCK_FILE, LOCK_LINE)
 #define	vm_map_trylock(map)	_vm_map_trylock(map, LOCK_FILE, LOCK_LINE)
 #define	vm_map_trylock_read(map)	\
 			_vm_map_trylock_read(map, LOCK_FILE, LOCK_LINE)
 #define	vm_map_lock_upgrade(map)	\
 			_vm_map_lock_upgrade(map, LOCK_FILE, LOCK_LINE)
 #define	vm_map_lock_downgrade(map)	\
 			_vm_map_lock_downgrade(map, LOCK_FILE, LOCK_LINE)
 
 long vmspace_resident_count(struct vmspace *vmspace);
 #endif	/* _KERNEL */
 
 /*
  * Copy-on-write flags for vm_map operations
  */
 #define	MAP_INHERIT_SHARE	0x00000001
 #define	MAP_COPY_ON_WRITE	0x00000002
 #define	MAP_NOFAULT		0x00000004
 #define	MAP_PREFAULT		0x00000008
 #define	MAP_PREFAULT_PARTIAL	0x00000010
 #define	MAP_DISABLE_SYNCER	0x00000020
 #define	MAP_CHECK_EXCL		0x00000040
 #define	MAP_CREATE_GUARD	0x00000080
 #define	MAP_DISABLE_COREDUMP	0x00000100
 #define	MAP_PREFAULT_MADVISE	0x00000200    /* from (user) madvise request */
 #define	MAP_WRITECOUNT		0x00000400
 #define	MAP_REMAP		0x00000800
 #define	MAP_STACK_GROWS_DOWN	0x00001000
 #define	MAP_STACK_GROWS_UP	0x00002000
 #define	MAP_ACC_CHARGED		0x00004000
 #define	MAP_ACC_NO_CHARGE	0x00008000
 #define	MAP_CREATE_STACK_GAP_UP	0x00010000
 #define	MAP_CREATE_STACK_GAP_DN	0x00020000
 #define	MAP_VN_EXEC		0x00040000
 #define	MAP_SPLIT_BOUNDARY_MASK	0x00180000
 
 #define	MAP_SPLIT_BOUNDARY_SHIFT 19
 
 /*
  * vm_fault option flags
  */
 #define	VM_FAULT_NORMAL	0x00	/* Nothing special */
 #define	VM_FAULT_WIRE	0x01	/* Wire the mapped page */
 #define	VM_FAULT_DIRTY	0x02	/* Dirty the page; use w/VM_PROT_COPY */
 #define	VM_FAULT_NOFILL	0x04	/* Fail if the pager doesn't have a copy */
 
 /*
  * Initially, mappings are slightly sequential.  The maximum window size must
  * account for the map entry's "read_ahead" field being defined as an uint8_t.
  */
 #define	VM_FAULT_READ_AHEAD_MIN		7
 #define	VM_FAULT_READ_AHEAD_INIT	15
 #define	VM_FAULT_READ_AHEAD_MAX		min(atop(maxphys) - 1, UINT8_MAX)
 
 /*
  * The following "find_space" options are supported by vm_map_find().
  *
  * For VMFS_ALIGNED_SPACE, the desired alignment is specified to
  * the macro argument as log base 2 of the desired alignment.
  */
 #define	VMFS_NO_SPACE		0	/* don't find; use the given range */
 #define	VMFS_ANY_SPACE		1	/* find a range with any alignment */
 #define	VMFS_OPTIMAL_SPACE	2	/* find a range with optimal alignment*/
 #define	VMFS_SUPER_SPACE	3	/* find a superpage-aligned range */
 #define	VMFS_ALIGNED_SPACE(x)	((x) << 8) /* find a range with fixed alignment */
 
 /*
  * vm_map_wire and vm_map_unwire option flags
  */
 #define VM_MAP_WIRE_SYSTEM	0	/* wiring in a kernel map */
 #define VM_MAP_WIRE_USER	1	/* wiring in a user map */
 
 #define VM_MAP_WIRE_NOHOLES	0	/* region must not have holes */
 #define VM_MAP_WIRE_HOLESOK	2	/* region may have holes */
 
 #define VM_MAP_WIRE_WRITE	4	/* Validate writable. */
 
 typedef int vm_map_entry_reader(void *token, vm_map_entry_t addr, 
     vm_map_entry_t dest);
 
 #ifndef _KERNEL
 /*
  * Find the successor of a map_entry, using a reader to dereference pointers.
  * '*clone' is a copy of a vm_map entry.  'reader' is used to copy a map entry
  * at some address into '*clone'.  Change *clone to a copy of the next map
  * entry, and return the address of that entry, or NULL if copying has failed.
  *
  * This function is made available to user-space code that needs to traverse
  * map entries.
  */
 static inline vm_map_entry_t
 vm_map_entry_read_succ(void *token, struct vm_map_entry *const clone,
     vm_map_entry_reader reader)
 {
 	vm_map_entry_t after, backup;
 	vm_offset_t start;
 
 	after = clone->right;
 	start = clone->start;
 	if (!reader(token, after, clone))
 		return (NULL);
 	backup = clone->left;
 	if (!reader(token, backup, clone))
 		return (NULL);
 	if (clone->start > start) {
 		do {
 			after = backup;
 			backup = clone->left;
 			if (!reader(token, backup, clone))
 				return (NULL);
 		} while (clone->start != start);
 	}
 	if (!reader(token, after, clone))
 		return (NULL);
 	return (after);
 }
 #endif				/* ! _KERNEL */
 
 #ifdef _KERNEL
 boolean_t vm_map_check_protection (vm_map_t, vm_offset_t, vm_offset_t, vm_prot_t);
 int vm_map_delete(vm_map_t, vm_offset_t, vm_offset_t);
 int vm_map_find(vm_map_t, vm_object_t, vm_ooffset_t, vm_offset_t *, vm_size_t,
     vm_offset_t, int, vm_prot_t, vm_prot_t, int);
 int vm_map_find_min(vm_map_t, vm_object_t, vm_ooffset_t, vm_offset_t *,
     vm_size_t, vm_offset_t, vm_offset_t, int, vm_prot_t, vm_prot_t, int);
 int vm_map_find_aligned(vm_map_t map, vm_offset_t *addr, vm_size_t length,
     vm_offset_t max_addr, vm_offset_t alignment);
 int vm_map_fixed(vm_map_t, vm_object_t, vm_ooffset_t, vm_offset_t, vm_size_t,
     vm_prot_t, vm_prot_t, int);
 vm_offset_t vm_map_findspace(vm_map_t, vm_offset_t, vm_size_t);
 int vm_map_inherit (vm_map_t, vm_offset_t, vm_offset_t, vm_inherit_t);
 void vm_map_init(vm_map_t, pmap_t, vm_offset_t, vm_offset_t);
 int vm_map_insert (vm_map_t, vm_object_t, vm_ooffset_t, vm_offset_t, vm_offset_t, vm_prot_t, vm_prot_t, int);
 int vm_map_lookup (vm_map_t *, vm_offset_t, vm_prot_t, vm_map_entry_t *, vm_object_t *,
     vm_pindex_t *, vm_prot_t *, boolean_t *);
 int vm_map_lookup_locked(vm_map_t *, vm_offset_t, vm_prot_t, vm_map_entry_t *, vm_object_t *,
     vm_pindex_t *, vm_prot_t *, boolean_t *);
 void vm_map_lookup_done (vm_map_t, vm_map_entry_t);
 boolean_t vm_map_lookup_entry (vm_map_t, vm_offset_t, vm_map_entry_t *);
 
 static inline vm_map_entry_t
 vm_map_entry_first(vm_map_t map)
 {
 
 	return (map->header.right);
 }
 
 static inline vm_map_entry_t
 vm_map_entry_succ(vm_map_entry_t entry)
 {
 	vm_map_entry_t after;
 
 	after = entry->right;
 	if (after->left->start > entry->start) {
 		do
 			after = after->left;
 		while (after->left != entry);
 	}
 	return (after);
 }
 
 #define VM_MAP_ENTRY_FOREACH(it, map)		\
 	for ((it) = vm_map_entry_first(map);	\
 	    (it) != &(map)->header;		\
 	    (it) = vm_map_entry_succ(it))
 
 #define	VM_MAP_PROTECT_SET_PROT		0x0001
 #define	VM_MAP_PROTECT_SET_MAXPROT	0x0002
 
 int vm_map_protect(vm_map_t map, vm_offset_t start, vm_offset_t end,
     vm_prot_t new_prot, vm_prot_t new_maxprot, int flags);
 int vm_map_remove (vm_map_t, vm_offset_t, vm_offset_t);
 void vm_map_try_merge_entries(vm_map_t map, vm_map_entry_t prev,
     vm_map_entry_t entry);
 void vm_map_startup (void);
 int vm_map_submap (vm_map_t, vm_offset_t, vm_offset_t, vm_map_t);
 int vm_map_sync(vm_map_t, vm_offset_t, vm_offset_t, boolean_t, boolean_t);
 int vm_map_madvise (vm_map_t, vm_offset_t, vm_offset_t, int);
 int vm_map_stack (vm_map_t, vm_offset_t, vm_size_t, vm_prot_t, vm_prot_t, int);
 int vm_map_unwire(vm_map_t map, vm_offset_t start, vm_offset_t end,
     int flags);
 int vm_map_wire(vm_map_t map, vm_offset_t start, vm_offset_t end, int flags);
 int vm_map_wire_locked(vm_map_t map, vm_offset_t start, vm_offset_t end,
     int flags);
 long vmspace_swap_count(struct vmspace *vmspace);
 void vm_map_entry_set_vnode_text(vm_map_entry_t entry, bool add);
 #endif				/* _KERNEL */
 #endif				/* _VM_MAP_ */