Index: head/sys/amd64/amd64/elf_machdep.c
===================================================================
--- head/sys/amd64/amd64/elf_machdep.c	(revision 343963)
+++ head/sys/amd64/amd64/elf_machdep.c	(revision 343964)
@@ -1,330 +1,331 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright 1996-1998 John D. Polstra.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/systm.h>
 #include <sys/exec.h>
 #include <sys/imgact.h>
 #include <sys/linker.h>
 #include <sys/proc.h>
 #include <sys/sysent.h>
 #include <sys/imgact_elf.h>
 #include <sys/syscall.h>
 #include <sys/signalvar.h>
 #include <sys/vnode.h>
 
 #include <vm/vm.h>
 #include <vm/pmap.h>
 #include <vm/vm_param.h>
 
 #include <machine/elf.h>
 #include <machine/fpu.h>
 #include <machine/md_var.h>
 
 struct sysentvec elf64_freebsd_sysvec = {
 	.sv_size	= SYS_MAXSYSCALL,
 	.sv_table	= sysent,
 	.sv_errsize	= 0,
 	.sv_errtbl	= NULL,
 	.sv_transtrap	= NULL,
 	.sv_fixup	= __elfN(freebsd_fixup),
 	.sv_sendsig	= sendsig,
 	.sv_sigcode	= sigcode,
 	.sv_szsigcode	= &szsigcode,
 	.sv_name	= "FreeBSD ELF64",
 	.sv_coredump	= __elfN(coredump),
 	.sv_imgact_try	= NULL,
 	.sv_minsigstksz	= MINSIGSTKSZ,
 	.sv_pagesize	= PAGE_SIZE,
 	.sv_minuser	= VM_MIN_ADDRESS,
 	.sv_maxuser	= VM_MAXUSER_ADDRESS,
 	.sv_usrstack	= USRSTACK,
 	.sv_psstrings	= PS_STRINGS,
 	.sv_stackprot	= VM_PROT_ALL,
 	.sv_copyout_strings	= exec_copyout_strings,
 	.sv_setregs	= exec_setregs,
 	.sv_fixlimit	= NULL,
 	.sv_maxssiz	= NULL,
-	.sv_flags	= SV_ABI_FREEBSD | SV_LP64 | SV_SHP | SV_TIMEKEEP,
+	.sv_flags	= SV_ABI_FREEBSD | SV_ASLR | SV_LP64 | SV_SHP |
+			    SV_TIMEKEEP,
 	.sv_set_syscall_retval = cpu_set_syscall_retval,
 	.sv_fetch_syscall_args = cpu_fetch_syscall_args,
 	.sv_syscallnames = syscallnames,
 	.sv_shared_page_base = SHAREDPAGE,
 	.sv_shared_page_len = PAGE_SIZE,
 	.sv_schedtail	= NULL,
 	.sv_thread_detach = NULL,
 	.sv_trap	= NULL,
 };
 INIT_SYSENTVEC(elf64_sysvec, &elf64_freebsd_sysvec);
 
 void
 amd64_lower_shared_page(struct sysentvec *sv)
 {
 	if (hw_lower_amd64_sharedpage != 0) {
 		sv->sv_maxuser -= PAGE_SIZE;
 		sv->sv_shared_page_base -= PAGE_SIZE;
 		sv->sv_usrstack -= PAGE_SIZE;
 		sv->sv_psstrings -= PAGE_SIZE;
 	}
 }
 
 /*
  * Do this fixup before INIT_SYSENTVEC (SI_ORDER_ANY) because the latter
  * uses the value of sv_shared_page_base.
  */
 SYSINIT(elf64_sysvec_fixup, SI_SUB_EXEC, SI_ORDER_FIRST,
 	(sysinit_cfunc_t) amd64_lower_shared_page,
 	&elf64_freebsd_sysvec);
 
 static Elf64_Brandinfo freebsd_brand_info = {
 	.brand		= ELFOSABI_FREEBSD,
 	.machine	= EM_X86_64,
 	.compat_3_brand	= "FreeBSD",
 	.emul_path	= NULL,
 	.interp_path	= "/libexec/ld-elf.so.1",
 	.sysvec		= &elf64_freebsd_sysvec,
 	.interp_newpath	= NULL,
 	.brand_note	= &elf64_freebsd_brandnote,
 	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE
 };
 
 SYSINIT(elf64, SI_SUB_EXEC, SI_ORDER_FIRST,
 	(sysinit_cfunc_t) elf64_insert_brand_entry,
 	&freebsd_brand_info);
 
 static Elf64_Brandinfo freebsd_brand_oinfo = {
 	.brand		= ELFOSABI_FREEBSD,
 	.machine	= EM_X86_64,
 	.compat_3_brand	= "FreeBSD",
 	.emul_path	= NULL,
 	.interp_path	= "/usr/libexec/ld-elf.so.1",
 	.sysvec		= &elf64_freebsd_sysvec,
 	.interp_newpath	= NULL,
 	.brand_note	= &elf64_freebsd_brandnote,
 	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE
 };
 
 SYSINIT(oelf64, SI_SUB_EXEC, SI_ORDER_ANY,
 	(sysinit_cfunc_t) elf64_insert_brand_entry,
 	&freebsd_brand_oinfo);
 
 static Elf64_Brandinfo kfreebsd_brand_info = {
 	.brand		= ELFOSABI_FREEBSD,
 	.machine	= EM_X86_64,
 	.compat_3_brand	= "FreeBSD",
 	.emul_path	= NULL,
 	.interp_path	= "/lib/ld-kfreebsd-x86-64.so.1",
 	.sysvec		= &elf64_freebsd_sysvec,
 	.interp_newpath	= NULL,
 	.brand_note	= &elf64_kfreebsd_brandnote,
 	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE_MANDATORY
 };
 
 SYSINIT(kelf64, SI_SUB_EXEC, SI_ORDER_ANY,
 	(sysinit_cfunc_t) elf64_insert_brand_entry,
 	&kfreebsd_brand_info);
 
 void
 elf64_dump_thread(struct thread *td, void *dst, size_t *off)
 {
 	void *buf;
 	size_t len;
 
 	len = 0;
 	if (use_xsave) {
 		if (dst != NULL) {
 			fpugetregs(td);
 			len += elf64_populate_note(NT_X86_XSTATE,
 			    get_pcb_user_save_td(td), dst,
 			    cpu_max_ext_state_size, &buf);
 			*(uint64_t *)((char *)buf + X86_XSTATE_XCR0_OFFSET) =
 			    xsave_mask;
 		} else
 			len += elf64_populate_note(NT_X86_XSTATE, NULL, NULL,
 			    cpu_max_ext_state_size, NULL);
 	}
 	*off = len;
 }
 
 bool
 elf_is_ifunc_reloc(Elf_Size r_info)
 {
 
 	return (ELF_R_TYPE(r_info) == R_X86_64_IRELATIVE);
 }
 
 /* Process one elf relocation with addend. */
 static int
 elf_reloc_internal(linker_file_t lf, Elf_Addr relocbase, const void *data,
     int type, elf_lookup_fn lookup)
 {
 	Elf64_Addr *where, val;
 	Elf32_Addr *where32, val32;
 	Elf_Addr addr;
 	Elf_Addr addend;
 	Elf_Size rtype, symidx;
 	const Elf_Rel *rel;
 	const Elf_Rela *rela;
 	int error;
 
 	switch (type) {
 	case ELF_RELOC_REL:
 		rel = (const Elf_Rel *)data;
 		where = (Elf_Addr *) (relocbase + rel->r_offset);
 		rtype = ELF_R_TYPE(rel->r_info);
 		symidx = ELF_R_SYM(rel->r_info);
 		/* Addend is 32 bit on 32 bit relocs */
 		switch (rtype) {
 		case R_X86_64_PC32:
 		case R_X86_64_32S:
 		case R_X86_64_PLT32:
 			addend = *(Elf32_Addr *)where;
 			break;
 		default:
 			addend = *where;
 			break;
 		}
 		break;
 	case ELF_RELOC_RELA:
 		rela = (const Elf_Rela *)data;
 		where = (Elf_Addr *) (relocbase + rela->r_offset);
 		addend = rela->r_addend;
 		rtype = ELF_R_TYPE(rela->r_info);
 		symidx = ELF_R_SYM(rela->r_info);
 		break;
 	default:
 		panic("unknown reloc type %d\n", type);
 	}
 
 	switch (rtype) {
 		case R_X86_64_NONE:	/* none */
 			break;
 
 		case R_X86_64_64:		/* S + A */
 			error = lookup(lf, symidx, 1, &addr);
 			val = addr + addend;
 			if (error != 0)
 				return -1;
 			if (*where != val)
 				*where = val;
 			break;
 
 		case R_X86_64_PC32:	/* S + A - P */
 		case R_X86_64_PLT32:	/* L + A - P, L is PLT location for
 					   the symbol, which we treat as S */
 			error = lookup(lf, symidx, 1, &addr);
 			where32 = (Elf32_Addr *)where;
 			val32 = (Elf32_Addr)(addr + addend - (Elf_Addr)where);
 			if (error != 0)
 				return -1;
 			if (*where32 != val32)
 				*where32 = val32;
 			break;
 
 		case R_X86_64_32S:	/* S + A sign extend */
 			error = lookup(lf, symidx, 1, &addr);
 			val32 = (Elf32_Addr)(addr + addend);
 			where32 = (Elf32_Addr *)where;
 			if (error != 0)
 				return -1;
 			if (*where32 != val32)
 				*where32 = val32;
 			break;
 
 		case R_X86_64_COPY:	/* none */
 			/*
 			 * There shouldn't be copy relocations in kernel
 			 * objects.
 			 */
 			printf("kldload: unexpected R_COPY relocation\n");
 			return (-1);
 			break;
 
 		case R_X86_64_GLOB_DAT:	/* S */
 		case R_X86_64_JMP_SLOT:	/* XXX need addend + offset */
 			error = lookup(lf, symidx, 1, &addr);
 			if (error != 0)
 				return -1;
 			if (*where != addr)
 				*where = addr;
 			break;
 
 		case R_X86_64_RELATIVE:	/* B + A */
 			addr = relocbase + addend;
 			val = addr;
 			if (*where != val)
 				*where = val;
 			break;
 
 		case R_X86_64_IRELATIVE:
 			addr = relocbase + addend;
 			val = ((Elf64_Addr (*)(void))addr)();
 			if (*where != val)
 				*where = val;
 			break;
 
 		default:
 			printf("kldload: unexpected relocation type %ld\n",
 			       rtype);
 			return (-1);
 	}
 	return (0);
 }
 
 int
 elf_reloc(linker_file_t lf, Elf_Addr relocbase, const void *data, int type,
     elf_lookup_fn lookup)
 {
 
 	return (elf_reloc_internal(lf, relocbase, data, type, lookup));
 }
 
 int
 elf_reloc_local(linker_file_t lf, Elf_Addr relocbase, const void *data,
     int type, elf_lookup_fn lookup)
 {
 
 	return (elf_reloc_internal(lf, relocbase, data, type, lookup));
 }
 
 int
 elf_cpu_load_file(linker_file_t lf __unused)
 {
 
 	return (0);
 }
 
 int
 elf_cpu_unload_file(linker_file_t lf __unused)
 {
 
 	return (0);
 }
Index: head/sys/arm/arm/elf_machdep.c
===================================================================
--- head/sys/arm/arm/elf_machdep.c	(revision 343963)
+++ head/sys/arm/arm/elf_machdep.c	(revision 343964)
@@ -1,320 +1,320 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright 1996-1998 John D. Polstra.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/systm.h>
 #include <sys/exec.h>
 #include <sys/imgact.h>
 #include <sys/linker.h>
 #include <sys/sysent.h>
 #include <sys/imgact_elf.h>
 #include <sys/proc.h>
 #include <sys/syscall.h>
 #include <sys/signalvar.h>
 #include <sys/vnode.h>
 
 #include <vm/vm.h>
 #include <vm/pmap.h>
 #include <vm/vm_param.h>
 
 #include <machine/elf.h>
 #include <machine/md_var.h>
 #ifdef VFP
 #include <machine/vfp.h>
 #endif
 
 static boolean_t elf32_arm_abi_supported(struct image_params *);
 
 u_long elf_hwcap;
 u_long elf_hwcap2;
 
 struct sysentvec elf32_freebsd_sysvec = {
 	.sv_size	= SYS_MAXSYSCALL,
 	.sv_table	= sysent,
 	.sv_errsize	= 0,
 	.sv_errtbl	= NULL,
 	.sv_transtrap	= NULL,
 	.sv_fixup	= __elfN(freebsd_fixup),
 	.sv_sendsig	= sendsig,
 	.sv_sigcode	= sigcode,
 	.sv_szsigcode	= &szsigcode,
 	.sv_name	= "FreeBSD ELF32",
 	.sv_coredump	= __elfN(coredump),
 	.sv_imgact_try	= NULL,
 	.sv_minsigstksz	= MINSIGSTKSZ,
 	.sv_pagesize	= PAGE_SIZE,
 	.sv_minuser	= VM_MIN_ADDRESS,
 	.sv_maxuser	= VM_MAXUSER_ADDRESS,
 	.sv_usrstack	= USRSTACK,
 	.sv_psstrings	= PS_STRINGS,
 	.sv_stackprot	= VM_PROT_ALL,
 	.sv_copyout_strings = exec_copyout_strings,
 	.sv_setregs	= exec_setregs,
 	.sv_fixlimit	= NULL,
 	.sv_maxssiz	= NULL,
 	.sv_flags	=
 #if __ARM_ARCH >= 6
-			  SV_SHP | SV_TIMEKEEP |
+			  SV_ASLR | SV_SHP | SV_TIMEKEEP |
 #endif
 			  SV_ABI_FREEBSD | SV_ILP32,
 	.sv_set_syscall_retval = cpu_set_syscall_retval,
 	.sv_fetch_syscall_args = cpu_fetch_syscall_args,
 	.sv_syscallnames = syscallnames,
 	.sv_shared_page_base = SHAREDPAGE,
 	.sv_shared_page_len = PAGE_SIZE,
 	.sv_schedtail	= NULL,
 	.sv_thread_detach = NULL,
 	.sv_trap	= NULL,
 	.sv_hwcap	= &elf_hwcap,
 	.sv_hwcap2	= &elf_hwcap2,
 };
 INIT_SYSENTVEC(elf32_sysvec, &elf32_freebsd_sysvec);
 
 static Elf32_Brandinfo freebsd_brand_info = {
 	.brand		= ELFOSABI_FREEBSD,
 	.machine	= EM_ARM,
 	.compat_3_brand	= "FreeBSD",
 	.emul_path	= NULL,
 	.interp_path	= "/libexec/ld-elf.so.1",
 	.sysvec		= &elf32_freebsd_sysvec,
 	.interp_newpath	= NULL,
 	.brand_note	= &elf32_freebsd_brandnote,
 	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE,
 	.header_supported= elf32_arm_abi_supported,
 };
 
 SYSINIT(elf32, SI_SUB_EXEC, SI_ORDER_FIRST,
 	(sysinit_cfunc_t) elf32_insert_brand_entry,
 	&freebsd_brand_info);
 
 static boolean_t
 elf32_arm_abi_supported(struct image_params *imgp)
 {
 	const Elf_Ehdr *hdr = (const Elf_Ehdr *)imgp->image_header;
 
 	/*
 	 * When configured for EABI, FreeBSD supports EABI vesions 4 and 5.
 	 */
 	if (EF_ARM_EABI_VERSION(hdr->e_flags) < EF_ARM_EABI_FREEBSD_MIN) {
 		if (bootverbose)
 			uprintf("Attempting to execute non EABI binary (rev %d) image %s",
 			    EF_ARM_EABI_VERSION(hdr->e_flags), imgp->args->fname);
 		return (FALSE);
 	}
 	return (TRUE);
 }
 
 void
 elf32_dump_thread(struct thread *td, void *dst, size_t *off)
 {
 #ifdef VFP
 	mcontext_vfp_t vfp;
 
 	if (dst != NULL) {
 		get_vfpcontext(td, &vfp);
 		*off = elf32_populate_note(NT_ARM_VFP, &vfp, dst, sizeof(vfp),
 		    NULL);
 	} else
 		*off = elf32_populate_note(NT_ARM_VFP, NULL, NULL, sizeof(vfp),
 		    NULL);
 #endif
 }
 
 bool
 elf_is_ifunc_reloc(Elf_Size r_info __unused)
 {
 
 	return (false);
 }
 
 /*
  * It is possible for the compiler to emit relocations for unaligned data.
  * We handle this situation with these inlines.
  */
 #define	RELOC_ALIGNED_P(x) \
 	(((uintptr_t)(x) & (sizeof(void *) - 1)) == 0)
 
 static __inline Elf_Addr
 load_ptr(Elf_Addr *where)
 {
 	Elf_Addr res;
 
 	if (RELOC_ALIGNED_P(where))
 		return *where;
 	memcpy(&res, where, sizeof(res));
 	return (res);
 }
 
 static __inline void
 store_ptr(Elf_Addr *where, Elf_Addr val)
 {
 	if (RELOC_ALIGNED_P(where))
 		*where = val;
 	else
 		memcpy(where, &val, sizeof(val));
 }
 #undef RELOC_ALIGNED_P
 
 
 /* Process one elf relocation with addend. */
 static int
 elf_reloc_internal(linker_file_t lf, Elf_Addr relocbase, const void *data,
     int type, int local, elf_lookup_fn lookup)
 {
 	Elf_Addr *where;
 	Elf_Addr addr;
 	Elf_Addr addend;
 	Elf_Word rtype, symidx;
 	const Elf_Rel *rel;
 	const Elf_Rela *rela;
 	int error;
 
 	switch (type) {
 	case ELF_RELOC_REL:
 		rel = (const Elf_Rel *)data;
 		where = (Elf_Addr *) (relocbase + rel->r_offset);
 		addend = load_ptr(where);
 		rtype = ELF_R_TYPE(rel->r_info);
 		symidx = ELF_R_SYM(rel->r_info);
 		break;
 	case ELF_RELOC_RELA:
 		rela = (const Elf_Rela *)data;
 		where = (Elf_Addr *) (relocbase + rela->r_offset);
 		addend = rela->r_addend;
 		rtype = ELF_R_TYPE(rela->r_info);
 		symidx = ELF_R_SYM(rela->r_info);
 		break;
 	default:
 		panic("unknown reloc type %d\n", type);
 	}
 
 	if (local) {
 		if (rtype == R_ARM_RELATIVE) {	/* A + B */
 			addr = elf_relocaddr(lf, relocbase + addend);
 			if (load_ptr(where) != addr)
 				store_ptr(where, addr);
 		}
 		return (0);
 	}
 
 	switch (rtype) {
 
 		case R_ARM_NONE:	/* none */
 			break;
 
 		case R_ARM_ABS32:
 			error = lookup(lf, symidx, 1, &addr);
 			if (error != 0)
 				return -1;
 			store_ptr(where, addr + load_ptr(where));
 			break;
 
 		case R_ARM_COPY:	/* none */
 			/*
 			 * There shouldn't be copy relocations in kernel
 			 * objects.
 			 */
 			printf("kldload: unexpected R_COPY relocation\n");
 			return -1;
 			break;
 
 		case R_ARM_JUMP_SLOT:
 			error = lookup(lf, symidx, 1, &addr);
 			if (error == 0) {
 				store_ptr(where, addr);
 				return (0);
 			}
 			return (-1);
 		case R_ARM_RELATIVE:
 			break;
 
 		default:
 			printf("kldload: unexpected relocation type %d\n",
 			       rtype);
 			return -1;
 	}
 	return(0);
 }
 
 int
 elf_reloc(linker_file_t lf, Elf_Addr relocbase, const void *data, int type,
     elf_lookup_fn lookup)
 {
 
 	return (elf_reloc_internal(lf, relocbase, data, type, 0, lookup));
 }
 
 int
 elf_reloc_local(linker_file_t lf, Elf_Addr relocbase, const void *data,
     int type, elf_lookup_fn lookup)
 {
 
 	return (elf_reloc_internal(lf, relocbase, data, type, 1, lookup));
 }
 
 int
 elf_cpu_load_file(linker_file_t lf)
 {
 
 	/*
 	 * The pmap code does not do an icache sync upon establishing executable
 	 * mappings in the kernel pmap.  It's an optimization based on the fact
 	 * that kernel memory allocations always have EXECUTABLE protection even
 	 * when the memory isn't going to hold executable code.  The only time
 	 * kernel memory holding instructions does need a sync is after loading
 	 * a kernel module, and that's when this function gets called.
 	 *
 	 * This syncs data and instruction caches after loading a module.  We
 	 * don't worry about the kernel itself (lf->id is 1) as locore.S did
 	 * that on entry.  Even if data cache maintenance was done by IO code,
 	 * the relocation fixup process creates dirty cache entries that we must
 	 * write back before doing icache sync. The instruction cache sync also
 	 * invalidates the branch predictor cache on platforms that have one.
 	 */
 	if (lf->id == 1)
 		return (0);
 #if __ARM_ARCH >= 6
 	dcache_wb_pou((vm_offset_t)lf->address, (vm_size_t)lf->size);
 	icache_inv_all();
 #else
 	cpu_dcache_wb_range((vm_offset_t)lf->address, (vm_size_t)lf->size);
 	cpu_l2cache_wb_range((vm_offset_t)lf->address, (vm_size_t)lf->size);
 	cpu_icache_sync_range((vm_offset_t)lf->address, (vm_size_t)lf->size);
 #endif
 	return (0);
 }
 
 int
 elf_cpu_unload_file(linker_file_t lf __unused)
 {
 
 	return (0);
 }
Index: head/sys/compat/freebsd32/freebsd32_misc.c
===================================================================
--- head/sys/compat/freebsd32/freebsd32_misc.c	(revision 343963)
+++ head/sys/compat/freebsd32/freebsd32_misc.c	(revision 343964)
@@ -1,3474 +1,3477 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2002 Doug Rabson
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_ktrace.h"
 
 #define __ELF_WORD_SIZE 32
 
 #ifdef COMPAT_FREEBSD11
 #define	_WANT_FREEBSD11_KEVENT
 #endif
 
 #include <sys/param.h>
 #include <sys/bus.h>
 #include <sys/capsicum.h>
 #include <sys/clock.h>
 #include <sys/exec.h>
 #include <sys/fcntl.h>
 #include <sys/filedesc.h>
 #include <sys/imgact.h>
 #include <sys/jail.h>
 #include <sys/kernel.h>
 #include <sys/limits.h>
 #include <sys/linker.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/file.h>		/* Must come after sys/malloc.h */
 #include <sys/imgact.h>
 #include <sys/mbuf.h>
 #include <sys/mman.h>
 #include <sys/module.h>
 #include <sys/mount.h>
 #include <sys/mutex.h>
 #include <sys/namei.h>
 #include <sys/proc.h>
 #include <sys/procctl.h>
 #include <sys/reboot.h>
 #include <sys/resource.h>
 #include <sys/resourcevar.h>
 #include <sys/selinfo.h>
 #include <sys/eventvar.h>	/* Must come after sys/selinfo.h */
 #include <sys/pipe.h>		/* Must come after sys/selinfo.h */
 #include <sys/signal.h>
 #include <sys/signalvar.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/stat.h>
 #include <sys/syscall.h>
 #include <sys/syscallsubr.h>
 #include <sys/sysctl.h>
 #include <sys/sysent.h>
 #include <sys/sysproto.h>
 #include <sys/systm.h>
 #include <sys/thr.h>
 #include <sys/unistd.h>
 #include <sys/ucontext.h>
 #include <sys/vnode.h>
 #include <sys/wait.h>
 #include <sys/ipc.h>
 #include <sys/msg.h>
 #include <sys/sem.h>
 #include <sys/shm.h>
 #ifdef KTRACE
 #include <sys/ktrace.h>
 #endif
 
 #ifdef INET
 #include <netinet/in.h>
 #endif
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 #include <vm/vm_object.h>
 #include <vm/vm_extern.h>
 
 #include <machine/cpu.h>
 #include <machine/elf.h>
 #ifdef __amd64__
 #include <machine/md_var.h>
 #endif
 
 #include <security/audit/audit.h>
 
 #include <compat/freebsd32/freebsd32_util.h>
 #include <compat/freebsd32/freebsd32.h>
 #include <compat/freebsd32/freebsd32_ipc.h>
 #include <compat/freebsd32/freebsd32_misc.h>
 #include <compat/freebsd32/freebsd32_signal.h>
 #include <compat/freebsd32/freebsd32_proto.h>
 
 FEATURE(compat_freebsd_32bit, "Compatible with 32-bit FreeBSD");
 
 #ifdef __amd64__
 CTASSERT(sizeof(struct timeval32) == 8);
 CTASSERT(sizeof(struct timespec32) == 8);
 CTASSERT(sizeof(struct itimerval32) == 16);
 CTASSERT(sizeof(struct bintime32) == 12);
 #endif
 CTASSERT(sizeof(struct statfs32) == 256);
 #ifdef __amd64__
 CTASSERT(sizeof(struct rusage32) == 72);
 #endif
 CTASSERT(sizeof(struct sigaltstack32) == 12);
 #ifdef __amd64__
 CTASSERT(sizeof(struct kevent32) == 56);
 #else
 CTASSERT(sizeof(struct kevent32) == 64);
 #endif
 CTASSERT(sizeof(struct iovec32) == 8);
 CTASSERT(sizeof(struct msghdr32) == 28);
 #ifdef __amd64__
 CTASSERT(sizeof(struct stat32) == 208);
 CTASSERT(sizeof(struct freebsd11_stat32) == 96);
 #endif
 CTASSERT(sizeof(struct sigaction32) == 24);
 
 static int freebsd32_kevent_copyout(void *arg, struct kevent *kevp, int count);
 static int freebsd32_kevent_copyin(void *arg, struct kevent *kevp, int count);
 static int freebsd32_user_clock_nanosleep(struct thread *td, clockid_t clock_id,
     int flags, const struct timespec32 *ua_rqtp, struct timespec32 *ua_rmtp);
 
 void
 freebsd32_rusage_out(const struct rusage *s, struct rusage32 *s32)
 {
 
 	TV_CP(*s, *s32, ru_utime);
 	TV_CP(*s, *s32, ru_stime);
 	CP(*s, *s32, ru_maxrss);
 	CP(*s, *s32, ru_ixrss);
 	CP(*s, *s32, ru_idrss);
 	CP(*s, *s32, ru_isrss);
 	CP(*s, *s32, ru_minflt);
 	CP(*s, *s32, ru_majflt);
 	CP(*s, *s32, ru_nswap);
 	CP(*s, *s32, ru_inblock);
 	CP(*s, *s32, ru_oublock);
 	CP(*s, *s32, ru_msgsnd);
 	CP(*s, *s32, ru_msgrcv);
 	CP(*s, *s32, ru_nsignals);
 	CP(*s, *s32, ru_nvcsw);
 	CP(*s, *s32, ru_nivcsw);
 }
 
 int
 freebsd32_wait4(struct thread *td, struct freebsd32_wait4_args *uap)
 {
 	int error, status;
 	struct rusage32 ru32;
 	struct rusage ru, *rup;
 
 	if (uap->rusage != NULL)
 		rup = &ru;
 	else
 		rup = NULL;
 	error = kern_wait(td, uap->pid, &status, uap->options, rup);
 	if (error)
 		return (error);
 	if (uap->status != NULL)
 		error = copyout(&status, uap->status, sizeof(status));
 	if (uap->rusage != NULL && error == 0) {
 		freebsd32_rusage_out(&ru, &ru32);
 		error = copyout(&ru32, uap->rusage, sizeof(ru32));
 	}
 	return (error);
 }
 
 int
 freebsd32_wait6(struct thread *td, struct freebsd32_wait6_args *uap)
 {
 	struct wrusage32 wru32;
 	struct __wrusage wru, *wrup;
 	struct siginfo32 si32;
 	struct __siginfo si, *sip;
 	int error, status;
 
 	if (uap->wrusage != NULL)
 		wrup = &wru;
 	else
 		wrup = NULL;
 	if (uap->info != NULL) {
 		sip = &si;
 		bzero(sip, sizeof(*sip));
 	} else
 		sip = NULL;
 	error = kern_wait6(td, uap->idtype, PAIR32TO64(id_t, uap->id),
 	    &status, uap->options, wrup, sip);
 	if (error != 0)
 		return (error);
 	if (uap->status != NULL)
 		error = copyout(&status, uap->status, sizeof(status));
 	if (uap->wrusage != NULL && error == 0) {
 		freebsd32_rusage_out(&wru.wru_self, &wru32.wru_self);
 		freebsd32_rusage_out(&wru.wru_children, &wru32.wru_children);
 		error = copyout(&wru32, uap->wrusage, sizeof(wru32));
 	}
 	if (uap->info != NULL && error == 0) {
 		siginfo_to_siginfo32 (&si, &si32);
 		error = copyout(&si32, uap->info, sizeof(si32));
 	}
 	return (error);
 }
 
 #ifdef COMPAT_FREEBSD4
 static void
 copy_statfs(struct statfs *in, struct statfs32 *out)
 {
 
 	statfs_scale_blocks(in, INT32_MAX);
 	bzero(out, sizeof(*out));
 	CP(*in, *out, f_bsize);
 	out->f_iosize = MIN(in->f_iosize, INT32_MAX);
 	CP(*in, *out, f_blocks);
 	CP(*in, *out, f_bfree);
 	CP(*in, *out, f_bavail);
 	out->f_files = MIN(in->f_files, INT32_MAX);
 	out->f_ffree = MIN(in->f_ffree, INT32_MAX);
 	CP(*in, *out, f_fsid);
 	CP(*in, *out, f_owner);
 	CP(*in, *out, f_type);
 	CP(*in, *out, f_flags);
 	out->f_syncwrites = MIN(in->f_syncwrites, INT32_MAX);
 	out->f_asyncwrites = MIN(in->f_asyncwrites, INT32_MAX);
 	strlcpy(out->f_fstypename,
 	      in->f_fstypename, MFSNAMELEN);
 	strlcpy(out->f_mntonname,
 	      in->f_mntonname, min(MNAMELEN, FREEBSD4_MNAMELEN));
 	out->f_syncreads = MIN(in->f_syncreads, INT32_MAX);
 	out->f_asyncreads = MIN(in->f_asyncreads, INT32_MAX);
 	strlcpy(out->f_mntfromname,
 	      in->f_mntfromname, min(MNAMELEN, FREEBSD4_MNAMELEN));
 }
 #endif
 
 #ifdef COMPAT_FREEBSD4
 int
 freebsd4_freebsd32_getfsstat(struct thread *td,
     struct freebsd4_freebsd32_getfsstat_args *uap)
 {
 	struct statfs *buf, *sp;
 	struct statfs32 stat32;
 	size_t count, size, copycount;
 	int error;
 
 	count = uap->bufsize / sizeof(struct statfs32);
 	size = count * sizeof(struct statfs);
 	error = kern_getfsstat(td, &buf, size, &count, UIO_SYSSPACE, uap->mode);
 	if (size > 0) {
 		sp = buf;
 		copycount = count;
 		while (copycount > 0 && error == 0) {
 			copy_statfs(sp, &stat32);
 			error = copyout(&stat32, uap->buf, sizeof(stat32));
 			sp++;
 			uap->buf++;
 			copycount--;
 		}
 		free(buf, M_STATFS);
 	}
 	if (error == 0)
 		td->td_retval[0] = count;
 	return (error);
 }
 #endif
 
 #ifdef COMPAT_FREEBSD10
 int
 freebsd10_freebsd32_pipe(struct thread *td,
     struct freebsd10_freebsd32_pipe_args *uap) {
 	
 	return (freebsd10_pipe(td, (struct freebsd10_pipe_args*)uap));
 }
 #endif
 
 int
 freebsd32_sigaltstack(struct thread *td,
 		      struct freebsd32_sigaltstack_args *uap)
 {
 	struct sigaltstack32 s32;
 	struct sigaltstack ss, oss, *ssp;
 	int error;
 
 	if (uap->ss != NULL) {
 		error = copyin(uap->ss, &s32, sizeof(s32));
 		if (error)
 			return (error);
 		PTRIN_CP(s32, ss, ss_sp);
 		CP(s32, ss, ss_size);
 		CP(s32, ss, ss_flags);
 		ssp = &ss;
 	} else
 		ssp = NULL;
 	error = kern_sigaltstack(td, ssp, &oss);
 	if (error == 0 && uap->oss != NULL) {
 		PTROUT_CP(oss, s32, ss_sp);
 		CP(oss, s32, ss_size);
 		CP(oss, s32, ss_flags);
 		error = copyout(&s32, uap->oss, sizeof(s32));
 	}
 	return (error);
 }
 
 /*
  * Custom version of exec_copyin_args() so that we can translate
  * the pointers.
  */
 int
 freebsd32_exec_copyin_args(struct image_args *args, const char *fname,
     enum uio_seg segflg, u_int32_t *argv, u_int32_t *envv)
 {
 	char *argp, *envp;
 	u_int32_t *p32, arg;
 	int error;
 
 	bzero(args, sizeof(*args));
 	if (argv == NULL)
 		return (EFAULT);
 
 	/*
 	 * Allocate demand-paged memory for the file name, argument, and
 	 * environment strings.
 	 */
 	error = exec_alloc_args(args);
 	if (error != 0)
 		return (error);
 
 	/*
 	 * Copy the file name.
 	 */
 	error = exec_args_add_fname(args, fname, segflg);
 	if (error != 0)
 		goto err_exit;
 
 	/*
 	 * extract arguments first
 	 */
 	p32 = argv;
 	for (;;) {
 		error = copyin(p32++, &arg, sizeof(arg));
 		if (error)
 			goto err_exit;
 		if (arg == 0)
 			break;
 		argp = PTRIN(arg);
 		error = exec_args_add_arg(args, argp, UIO_USERSPACE);
 		if (error != 0)
 			goto err_exit;
 	}
 			
 	/*
 	 * extract environment strings
 	 */
 	if (envv) {
 		p32 = envv;
 		for (;;) {
 			error = copyin(p32++, &arg, sizeof(arg));
 			if (error)
 				goto err_exit;
 			if (arg == 0)
 				break;
 			envp = PTRIN(arg);
 			error = exec_args_add_env(args, envp, UIO_USERSPACE);
 			if (error != 0)
 				goto err_exit;
 		}
 	}
 
 	return (0);
 
 err_exit:
 	exec_free_args(args);
 	return (error);
 }
 
 int
 freebsd32_execve(struct thread *td, struct freebsd32_execve_args *uap)
 {
 	struct image_args eargs;
 	struct vmspace *oldvmspace;
 	int error;
 
 	error = pre_execve(td, &oldvmspace);
 	if (error != 0)
 		return (error);
 	error = freebsd32_exec_copyin_args(&eargs, uap->fname, UIO_USERSPACE,
 	    uap->argv, uap->envv);
 	if (error == 0)
 		error = kern_execve(td, &eargs, NULL);
 	post_execve(td, error, oldvmspace);
 	return (error);
 }
 
 int
 freebsd32_fexecve(struct thread *td, struct freebsd32_fexecve_args *uap)
 {
 	struct image_args eargs;
 	struct vmspace *oldvmspace;
 	int error;
 
 	error = pre_execve(td, &oldvmspace);
 	if (error != 0)
 		return (error);
 	error = freebsd32_exec_copyin_args(&eargs, NULL, UIO_SYSSPACE,
 	    uap->argv, uap->envv);
 	if (error == 0) {
 		eargs.fd = uap->fd;
 		error = kern_execve(td, &eargs, NULL);
 	}
 	post_execve(td, error, oldvmspace);
 	return (error);
 }
 
 
 int
 freebsd32_mknodat(struct thread *td, struct freebsd32_mknodat_args *uap)
 {
 
 	return (kern_mknodat(td, uap->fd, uap->path, UIO_USERSPACE,
 	    uap->mode, PAIR32TO64(dev_t, uap->dev)));
 }
 
 int
 freebsd32_mprotect(struct thread *td, struct freebsd32_mprotect_args *uap)
 {
 	int prot;
 
 	prot = uap->prot;
 #if defined(__amd64__)
 	if (i386_read_exec && (prot & PROT_READ) != 0)
 		prot |= PROT_EXEC;
 #endif
 	return (kern_mprotect(td, (uintptr_t)PTRIN(uap->addr), uap->len,
 	    prot));
 }
 
 int
 freebsd32_mmap(struct thread *td, struct freebsd32_mmap_args *uap)
 {
 	int prot;
 
 	prot = uap->prot;
 #if defined(__amd64__)
 	if (i386_read_exec && (prot & PROT_READ))
 		prot |= PROT_EXEC;
 #endif
 
 	return (kern_mmap(td, (uintptr_t)uap->addr, uap->len, prot,
 	    uap->flags, uap->fd, PAIR32TO64(off_t, uap->pos)));
 }
 
 #ifdef COMPAT_FREEBSD6
 int
 freebsd6_freebsd32_mmap(struct thread *td,
     struct freebsd6_freebsd32_mmap_args *uap)
 {
 	int prot;
 
 	prot = uap->prot;
 #if defined(__amd64__)
 	if (i386_read_exec && (prot & PROT_READ))
 		prot |= PROT_EXEC;
 #endif
 
 	return (kern_mmap(td, (uintptr_t)uap->addr, uap->len, prot,
 	    uap->flags, uap->fd, PAIR32TO64(off_t, uap->pos)));
 }
 #endif
 
 int
 freebsd32_setitimer(struct thread *td, struct freebsd32_setitimer_args *uap)
 {
 	struct itimerval itv, oitv, *itvp;	
 	struct itimerval32 i32;
 	int error;
 
 	if (uap->itv != NULL) {
 		error = copyin(uap->itv, &i32, sizeof(i32));
 		if (error)
 			return (error);
 		TV_CP(i32, itv, it_interval);
 		TV_CP(i32, itv, it_value);
 		itvp = &itv;
 	} else
 		itvp = NULL;
 	error = kern_setitimer(td, uap->which, itvp, &oitv);
 	if (error || uap->oitv == NULL)
 		return (error);
 	TV_CP(oitv, i32, it_interval);
 	TV_CP(oitv, i32, it_value);
 	return (copyout(&i32, uap->oitv, sizeof(i32)));
 }
 
 int
 freebsd32_getitimer(struct thread *td, struct freebsd32_getitimer_args *uap)
 {
 	struct itimerval itv;
 	struct itimerval32 i32;
 	int error;
 
 	error = kern_getitimer(td, uap->which, &itv);
 	if (error || uap->itv == NULL)
 		return (error);
 	TV_CP(itv, i32, it_interval);
 	TV_CP(itv, i32, it_value);
 	return (copyout(&i32, uap->itv, sizeof(i32)));
 }
 
 int
 freebsd32_select(struct thread *td, struct freebsd32_select_args *uap)
 {
 	struct timeval32 tv32;
 	struct timeval tv, *tvp;
 	int error;
 
 	if (uap->tv != NULL) {
 		error = copyin(uap->tv, &tv32, sizeof(tv32));
 		if (error)
 			return (error);
 		CP(tv32, tv, tv_sec);
 		CP(tv32, tv, tv_usec);
 		tvp = &tv;
 	} else
 		tvp = NULL;
 	/*
 	 * XXX Do pointers need PTRIN()?
 	 */
 	return (kern_select(td, uap->nd, uap->in, uap->ou, uap->ex, tvp,
 	    sizeof(int32_t) * 8));
 }
 
 int
 freebsd32_pselect(struct thread *td, struct freebsd32_pselect_args *uap)
 {
 	struct timespec32 ts32;
 	struct timespec ts;
 	struct timeval tv, *tvp;
 	sigset_t set, *uset;
 	int error;
 
 	if (uap->ts != NULL) {
 		error = copyin(uap->ts, &ts32, sizeof(ts32));
 		if (error != 0)
 			return (error);
 		CP(ts32, ts, tv_sec);
 		CP(ts32, ts, tv_nsec);
 		TIMESPEC_TO_TIMEVAL(&tv, &ts);
 		tvp = &tv;
 	} else
 		tvp = NULL;
 	if (uap->sm != NULL) {
 		error = copyin(uap->sm, &set, sizeof(set));
 		if (error != 0)
 			return (error);
 		uset = &set;
 	} else
 		uset = NULL;
 	/*
 	 * XXX Do pointers need PTRIN()?
 	 */
 	error = kern_pselect(td, uap->nd, uap->in, uap->ou, uap->ex, tvp,
 	    uset, sizeof(int32_t) * 8);
 	return (error);
 }
 
 /*
  * Copy 'count' items into the destination list pointed to by uap->eventlist.
  */
 static int
 freebsd32_kevent_copyout(void *arg, struct kevent *kevp, int count)
 {
 	struct freebsd32_kevent_args *uap;
 	struct kevent32	ks32[KQ_NEVENTS];
 	uint64_t e;
 	int i, j, error;
 
 	KASSERT(count <= KQ_NEVENTS, ("count (%d) > KQ_NEVENTS", count));
 	uap = (struct freebsd32_kevent_args *)arg;
 
 	for (i = 0; i < count; i++) {
 		CP(kevp[i], ks32[i], ident);
 		CP(kevp[i], ks32[i], filter);
 		CP(kevp[i], ks32[i], flags);
 		CP(kevp[i], ks32[i], fflags);
 #if BYTE_ORDER == LITTLE_ENDIAN
 		ks32[i].data1 = kevp[i].data;
 		ks32[i].data2 = kevp[i].data >> 32;
 #else
 		ks32[i].data1 = kevp[i].data >> 32;
 		ks32[i].data2 = kevp[i].data;
 #endif
 		PTROUT_CP(kevp[i], ks32[i], udata);
 		for (j = 0; j < nitems(kevp->ext); j++) {
 			e = kevp[i].ext[j];
 #if BYTE_ORDER == LITTLE_ENDIAN
 			ks32[i].ext64[2 * j] = e;
 			ks32[i].ext64[2 * j + 1] = e >> 32;
 #else
 			ks32[i].ext64[2 * j] = e >> 32;
 			ks32[i].ext64[2 * j + 1] = e;
 #endif
 		}
 	}
 	error = copyout(ks32, uap->eventlist, count * sizeof *ks32);
 	if (error == 0)
 		uap->eventlist += count;
 	return (error);
 }
 
 /*
  * Copy 'count' items from the list pointed to by uap->changelist.
  */
 static int
 freebsd32_kevent_copyin(void *arg, struct kevent *kevp, int count)
 {
 	struct freebsd32_kevent_args *uap;
 	struct kevent32	ks32[KQ_NEVENTS];
 	uint64_t e;
 	int i, j, error;
 
 	KASSERT(count <= KQ_NEVENTS, ("count (%d) > KQ_NEVENTS", count));
 	uap = (struct freebsd32_kevent_args *)arg;
 
 	error = copyin(uap->changelist, ks32, count * sizeof *ks32);
 	if (error)
 		goto done;
 	uap->changelist += count;
 
 	for (i = 0; i < count; i++) {
 		CP(ks32[i], kevp[i], ident);
 		CP(ks32[i], kevp[i], filter);
 		CP(ks32[i], kevp[i], flags);
 		CP(ks32[i], kevp[i], fflags);
 		kevp[i].data = PAIR32TO64(uint64_t, ks32[i].data);
 		PTRIN_CP(ks32[i], kevp[i], udata);
 		for (j = 0; j < nitems(kevp->ext); j++) {
 #if BYTE_ORDER == LITTLE_ENDIAN
 			e = ks32[i].ext64[2 * j + 1];
 			e <<= 32;
 			e += ks32[i].ext64[2 * j];
 #else
 			e = ks32[i].ext64[2 * j];
 			e <<= 32;
 			e += ks32[i].ext64[2 * j + 1];
 #endif
 			kevp[i].ext[j] = e;
 		}
 	}
 done:
 	return (error);
 }
 
 int
 freebsd32_kevent(struct thread *td, struct freebsd32_kevent_args *uap)
 {
 	struct timespec32 ts32;
 	struct timespec ts, *tsp;
 	struct kevent_copyops k_ops = {
 		.arg = uap,
 		.k_copyout = freebsd32_kevent_copyout,
 		.k_copyin = freebsd32_kevent_copyin,
 	};
 #ifdef KTRACE
 	struct kevent32 *eventlist = uap->eventlist;
 #endif
 	int error;
 
 	if (uap->timeout) {
 		error = copyin(uap->timeout, &ts32, sizeof(ts32));
 		if (error)
 			return (error);
 		CP(ts32, ts, tv_sec);
 		CP(ts32, ts, tv_nsec);
 		tsp = &ts;
 	} else
 		tsp = NULL;
 #ifdef KTRACE
 	if (KTRPOINT(td, KTR_STRUCT_ARRAY))
 		ktrstructarray("kevent32", UIO_USERSPACE, uap->changelist,
 		    uap->nchanges, sizeof(struct kevent32));
 #endif
 	error = kern_kevent(td, uap->fd, uap->nchanges, uap->nevents,
 	    &k_ops, tsp);
 #ifdef KTRACE
 	if (error == 0 && KTRPOINT(td, KTR_STRUCT_ARRAY))
 		ktrstructarray("kevent32", UIO_USERSPACE, eventlist,
 		    td->td_retval[0], sizeof(struct kevent32));
 #endif
 	return (error);
 }
 
 #ifdef COMPAT_FREEBSD11
 static int
 freebsd32_kevent11_copyout(void *arg, struct kevent *kevp, int count)
 {
 	struct freebsd11_freebsd32_kevent_args *uap;
 	struct kevent32_freebsd11 ks32[KQ_NEVENTS];
 	int i, error;
 
 	KASSERT(count <= KQ_NEVENTS, ("count (%d) > KQ_NEVENTS", count));
 	uap = (struct freebsd11_freebsd32_kevent_args *)arg;
 
 	for (i = 0; i < count; i++) {
 		CP(kevp[i], ks32[i], ident);
 		CP(kevp[i], ks32[i], filter);
 		CP(kevp[i], ks32[i], flags);
 		CP(kevp[i], ks32[i], fflags);
 		CP(kevp[i], ks32[i], data);
 		PTROUT_CP(kevp[i], ks32[i], udata);
 	}
 	error = copyout(ks32, uap->eventlist, count * sizeof *ks32);
 	if (error == 0)
 		uap->eventlist += count;
 	return (error);
 }
 
 /*
  * Copy 'count' items from the list pointed to by uap->changelist.
  */
 static int
 freebsd32_kevent11_copyin(void *arg, struct kevent *kevp, int count)
 {
 	struct freebsd11_freebsd32_kevent_args *uap;
 	struct kevent32_freebsd11 ks32[KQ_NEVENTS];
 	int i, j, error;
 
 	KASSERT(count <= KQ_NEVENTS, ("count (%d) > KQ_NEVENTS", count));
 	uap = (struct freebsd11_freebsd32_kevent_args *)arg;
 
 	error = copyin(uap->changelist, ks32, count * sizeof *ks32);
 	if (error)
 		goto done;
 	uap->changelist += count;
 
 	for (i = 0; i < count; i++) {
 		CP(ks32[i], kevp[i], ident);
 		CP(ks32[i], kevp[i], filter);
 		CP(ks32[i], kevp[i], flags);
 		CP(ks32[i], kevp[i], fflags);
 		CP(ks32[i], kevp[i], data);
 		PTRIN_CP(ks32[i], kevp[i], udata);
 		for (j = 0; j < nitems(kevp->ext); j++)
 			kevp[i].ext[j] = 0;
 	}
 done:
 	return (error);
 }
 
 int
 freebsd11_freebsd32_kevent(struct thread *td,
     struct freebsd11_freebsd32_kevent_args *uap)
 {
 	struct timespec32 ts32;
 	struct timespec ts, *tsp;
 	struct kevent_copyops k_ops = {
 		.arg = uap,
 		.k_copyout = freebsd32_kevent11_copyout,
 		.k_copyin = freebsd32_kevent11_copyin,
 	};
 #ifdef KTRACE
 	struct kevent32_freebsd11 *eventlist = uap->eventlist;
 #endif
 	int error;
 
 	if (uap->timeout) {
 		error = copyin(uap->timeout, &ts32, sizeof(ts32));
 		if (error)
 			return (error);
 		CP(ts32, ts, tv_sec);
 		CP(ts32, ts, tv_nsec);
 		tsp = &ts;
 	} else
 		tsp = NULL;
 #ifdef KTRACE
 	if (KTRPOINT(td, KTR_STRUCT_ARRAY))
 		ktrstructarray("kevent32_freebsd11", UIO_USERSPACE,
 		    uap->changelist, uap->nchanges,
 		    sizeof(struct kevent32_freebsd11));
 #endif
 	error = kern_kevent(td, uap->fd, uap->nchanges, uap->nevents,
 	    &k_ops, tsp);
 #ifdef KTRACE
 	if (error == 0 && KTRPOINT(td, KTR_STRUCT_ARRAY))
 		ktrstructarray("kevent32_freebsd11", UIO_USERSPACE,
 		    eventlist, td->td_retval[0],
 		    sizeof(struct kevent32_freebsd11));
 #endif
 	return (error);
 }
 #endif
 
 int
 freebsd32_gettimeofday(struct thread *td,
 		       struct freebsd32_gettimeofday_args *uap)
 {
 	struct timeval atv;
 	struct timeval32 atv32;
 	struct timezone rtz;
 	int error = 0;
 
 	if (uap->tp) {
 		microtime(&atv);
 		CP(atv, atv32, tv_sec);
 		CP(atv, atv32, tv_usec);
 		error = copyout(&atv32, uap->tp, sizeof (atv32));
 	}
 	if (error == 0 && uap->tzp != NULL) {
 		rtz.tz_minuteswest = tz_minuteswest;
 		rtz.tz_dsttime = tz_dsttime;
 		error = copyout(&rtz, uap->tzp, sizeof (rtz));
 	}
 	return (error);
 }
 
 int
 freebsd32_getrusage(struct thread *td, struct freebsd32_getrusage_args *uap)
 {
 	struct rusage32 s32;
 	struct rusage s;
 	int error;
 
 	error = kern_getrusage(td, uap->who, &s);
 	if (error == 0) {
 		freebsd32_rusage_out(&s, &s32);
 		error = copyout(&s32, uap->rusage, sizeof(s32));
 	}
 	return (error);
 }
 
 static int
 freebsd32_copyinuio(struct iovec32 *iovp, u_int iovcnt, struct uio **uiop)
 {
 	struct iovec32 iov32;
 	struct iovec *iov;
 	struct uio *uio;
 	u_int iovlen;
 	int error, i;
 
 	*uiop = NULL;
 	if (iovcnt > UIO_MAXIOV)
 		return (EINVAL);
 	iovlen = iovcnt * sizeof(struct iovec);
 	uio = malloc(iovlen + sizeof *uio, M_IOV, M_WAITOK);
 	iov = (struct iovec *)(uio + 1);
 	for (i = 0; i < iovcnt; i++) {
 		error = copyin(&iovp[i], &iov32, sizeof(struct iovec32));
 		if (error) {
 			free(uio, M_IOV);
 			return (error);
 		}
 		iov[i].iov_base = PTRIN(iov32.iov_base);
 		iov[i].iov_len = iov32.iov_len;
 	}
 	uio->uio_iov = iov;
 	uio->uio_iovcnt = iovcnt;
 	uio->uio_segflg = UIO_USERSPACE;
 	uio->uio_offset = -1;
 	uio->uio_resid = 0;
 	for (i = 0; i < iovcnt; i++) {
 		if (iov->iov_len > INT_MAX - uio->uio_resid) {
 			free(uio, M_IOV);
 			return (EINVAL);
 		}
 		uio->uio_resid += iov->iov_len;
 		iov++;
 	}
 	*uiop = uio;
 	return (0);
 }
 
 int
 freebsd32_readv(struct thread *td, struct freebsd32_readv_args *uap)
 {
 	struct uio *auio;
 	int error;
 
 	error = freebsd32_copyinuio(uap->iovp, uap->iovcnt, &auio);
 	if (error)
 		return (error);
 	error = kern_readv(td, uap->fd, auio);
 	free(auio, M_IOV);
 	return (error);
 }
 
 int
 freebsd32_writev(struct thread *td, struct freebsd32_writev_args *uap)
 {
 	struct uio *auio;
 	int error;
 
 	error = freebsd32_copyinuio(uap->iovp, uap->iovcnt, &auio);
 	if (error)
 		return (error);
 	error = kern_writev(td, uap->fd, auio);
 	free(auio, M_IOV);
 	return (error);
 }
 
 int
 freebsd32_preadv(struct thread *td, struct freebsd32_preadv_args *uap)
 {
 	struct uio *auio;
 	int error;
 
 	error = freebsd32_copyinuio(uap->iovp, uap->iovcnt, &auio);
 	if (error)
 		return (error);
 	error = kern_preadv(td, uap->fd, auio, PAIR32TO64(off_t,uap->offset));
 	free(auio, M_IOV);
 	return (error);
 }
 
 int
 freebsd32_pwritev(struct thread *td, struct freebsd32_pwritev_args *uap)
 {
 	struct uio *auio;
 	int error;
 
 	error = freebsd32_copyinuio(uap->iovp, uap->iovcnt, &auio);
 	if (error)
 		return (error);
 	error = kern_pwritev(td, uap->fd, auio, PAIR32TO64(off_t,uap->offset));
 	free(auio, M_IOV);
 	return (error);
 }
 
 int
 freebsd32_copyiniov(struct iovec32 *iovp32, u_int iovcnt, struct iovec **iovp,
     int error)
 {
 	struct iovec32 iov32;
 	struct iovec *iov;
 	u_int iovlen;
 	int i;
 
 	*iovp = NULL;
 	if (iovcnt > UIO_MAXIOV)
 		return (error);
 	iovlen = iovcnt * sizeof(struct iovec);
 	iov = malloc(iovlen, M_IOV, M_WAITOK);
 	for (i = 0; i < iovcnt; i++) {
 		error = copyin(&iovp32[i], &iov32, sizeof(struct iovec32));
 		if (error) {
 			free(iov, M_IOV);
 			return (error);
 		}
 		iov[i].iov_base = PTRIN(iov32.iov_base);
 		iov[i].iov_len = iov32.iov_len;
 	}
 	*iovp = iov;
 	return (0);
 }
 
 static int
 freebsd32_copyinmsghdr(struct msghdr32 *msg32, struct msghdr *msg)
 {
 	struct msghdr32 m32;
 	int error;
 
 	error = copyin(msg32, &m32, sizeof(m32));
 	if (error)
 		return (error);
 	msg->msg_name = PTRIN(m32.msg_name);
 	msg->msg_namelen = m32.msg_namelen;
 	msg->msg_iov = PTRIN(m32.msg_iov);
 	msg->msg_iovlen = m32.msg_iovlen;
 	msg->msg_control = PTRIN(m32.msg_control);
 	msg->msg_controllen = m32.msg_controllen;
 	msg->msg_flags = m32.msg_flags;
 	return (0);
 }
 
 static int
 freebsd32_copyoutmsghdr(struct msghdr *msg, struct msghdr32 *msg32)
 {
 	struct msghdr32 m32;
 	int error;
 
 	m32.msg_name = PTROUT(msg->msg_name);
 	m32.msg_namelen = msg->msg_namelen;
 	m32.msg_iov = PTROUT(msg->msg_iov);
 	m32.msg_iovlen = msg->msg_iovlen;
 	m32.msg_control = PTROUT(msg->msg_control);
 	m32.msg_controllen = msg->msg_controllen;
 	m32.msg_flags = msg->msg_flags;
 	error = copyout(&m32, msg32, sizeof(m32));
 	return (error);
 }
 
 #ifndef __mips__
 #define FREEBSD32_ALIGNBYTES	(sizeof(int) - 1)
 #else
 #define FREEBSD32_ALIGNBYTES	(sizeof(long) - 1)
 #endif
 #define FREEBSD32_ALIGN(p)	\
 	(((u_long)(p) + FREEBSD32_ALIGNBYTES) & ~FREEBSD32_ALIGNBYTES)
 #define	FREEBSD32_CMSG_SPACE(l)	\
 	(FREEBSD32_ALIGN(sizeof(struct cmsghdr)) + FREEBSD32_ALIGN(l))
 
 #define	FREEBSD32_CMSG_DATA(cmsg)	((unsigned char *)(cmsg) + \
 				 FREEBSD32_ALIGN(sizeof(struct cmsghdr)))
 
 static size_t
 freebsd32_cmsg_convert(const struct cmsghdr *cm, void *data, socklen_t datalen)
 {
 	size_t copylen;
 	union {
 		struct timespec32 ts;
 		struct timeval32 tv;
 		struct bintime32 bt;
 	} tmp32;
 
 	union {
 		struct timespec ts;
 		struct timeval tv;
 		struct bintime bt;
 	} *in;
 
 	in = data;
 	copylen = 0;
 	switch (cm->cmsg_level) {
 	case SOL_SOCKET:
 		switch (cm->cmsg_type) {
 		case SCM_TIMESTAMP:
 			TV_CP(*in, tmp32, tv);
 			copylen = sizeof(tmp32.tv);
 			break;
 
 		case SCM_BINTIME:
 			BT_CP(*in, tmp32, bt);
 			copylen = sizeof(tmp32.bt);
 			break;
 
 		case SCM_REALTIME:
 		case SCM_MONOTONIC:
 			TS_CP(*in, tmp32, ts);
 			copylen = sizeof(tmp32.ts);
 			break;
 
 		default:
 			break;
 		}
 
 	default:
 		break;
 	}
 
 	if (copylen == 0)
 		return (datalen);
 
 	KASSERT((datalen >= copylen), ("corrupted cmsghdr"));
 
 	bcopy(&tmp32, data, copylen);
 	return (copylen);
 }
 
 static int
 freebsd32_copy_msg_out(struct msghdr *msg, struct mbuf *control)
 {
 	struct cmsghdr *cm;
 	void *data;
 	socklen_t clen, datalen, datalen_out, oldclen;
 	int error;
 	caddr_t ctlbuf;
 	int len, maxlen, copylen;
 	struct mbuf *m;
 	error = 0;
 
 	len    = msg->msg_controllen;
 	maxlen = msg->msg_controllen;
 	msg->msg_controllen = 0;
 
 	ctlbuf = msg->msg_control;
 	for (m = control; m != NULL && len > 0; m = m->m_next) {
 		cm = mtod(m, struct cmsghdr *);
 		clen = m->m_len;
 		while (cm != NULL) {
 			if (sizeof(struct cmsghdr) > clen ||
 			    cm->cmsg_len > clen) {
 				error = EINVAL;
 				break;
 			}
 
 			data   = CMSG_DATA(cm);
 			datalen = (caddr_t)cm + cm->cmsg_len - (caddr_t)data;
 			datalen_out = freebsd32_cmsg_convert(cm, data, datalen);
 
 			/*
 			 * Copy out the message header.  Preserve the native
 			 * message size in case we need to inspect the message
 			 * contents later.
 			 */
 			copylen = sizeof(struct cmsghdr);
 			if (len < copylen) {
 				msg->msg_flags |= MSG_CTRUNC;
 				m_dispose_extcontrolm(m);
 				goto exit;
 			}
 			oldclen = cm->cmsg_len;
 			cm->cmsg_len = FREEBSD32_ALIGN(sizeof(struct cmsghdr)) +
 			    datalen_out;
 			error = copyout(cm, ctlbuf, copylen);
 			cm->cmsg_len = oldclen;
 			if (error != 0)
 				goto exit;
 
 			ctlbuf += FREEBSD32_ALIGN(copylen);
 			len    -= FREEBSD32_ALIGN(copylen);
 
 			copylen = datalen_out;
 			if (len < copylen) {
 				msg->msg_flags |= MSG_CTRUNC;
 				m_dispose_extcontrolm(m);
 				break;
 			}
 
 			/* Copy out the message data. */
 			error = copyout(data, ctlbuf, copylen);
 			if (error)
 				goto exit;
 
 			ctlbuf += FREEBSD32_ALIGN(copylen);
 			len    -= FREEBSD32_ALIGN(copylen);
 
 			if (CMSG_SPACE(datalen) < clen) {
 				clen -= CMSG_SPACE(datalen);
 				cm = (struct cmsghdr *)
 				    ((caddr_t)cm + CMSG_SPACE(datalen));
 			} else {
 				clen = 0;
 				cm = NULL;
 			}
 
 			msg->msg_controllen += FREEBSD32_ALIGN(sizeof(*cm)) +
 			    datalen_out;
 		}
 	}
 	if (len == 0 && m != NULL) {
 		msg->msg_flags |= MSG_CTRUNC;
 		m_dispose_extcontrolm(m);
 	}
 
 exit:
 	return (error);
 }
 
 int
 freebsd32_recvmsg(td, uap)
 	struct thread *td;
 	struct freebsd32_recvmsg_args /* {
 		int	s;
 		struct	msghdr32 *msg;
 		int	flags;
 	} */ *uap;
 {
 	struct msghdr msg;
 	struct msghdr32 m32;
 	struct iovec *uiov, *iov;
 	struct mbuf *control = NULL;
 	struct mbuf **controlp;
 
 	int error;
 	error = copyin(uap->msg, &m32, sizeof(m32));
 	if (error)
 		return (error);
 	error = freebsd32_copyinmsghdr(uap->msg, &msg);
 	if (error)
 		return (error);
 	error = freebsd32_copyiniov(PTRIN(m32.msg_iov), m32.msg_iovlen, &iov,
 	    EMSGSIZE);
 	if (error)
 		return (error);
 	msg.msg_flags = uap->flags;
 	uiov = msg.msg_iov;
 	msg.msg_iov = iov;
 
 	controlp = (msg.msg_control != NULL) ?  &control : NULL;
 	error = kern_recvit(td, uap->s, &msg, UIO_USERSPACE, controlp);
 	if (error == 0) {
 		msg.msg_iov = uiov;
 
 		if (control != NULL)
 			error = freebsd32_copy_msg_out(&msg, control);
 		else
 			msg.msg_controllen = 0;
 
 		if (error == 0)
 			error = freebsd32_copyoutmsghdr(&msg, uap->msg);
 	}
 	free(iov, M_IOV);
 
 	if (control != NULL) {
 		if (error != 0)
 			m_dispose_extcontrolm(control);
 		m_freem(control);
 	}
 
 	return (error);
 }
 
 /*
  * Copy-in the array of control messages constructed using alignment
  * and padding suitable for a 32-bit environment and construct an
  * mbuf using alignment and padding suitable for a 64-bit kernel.
  * The alignment and padding are defined indirectly by CMSG_DATA(),
  * CMSG_SPACE() and CMSG_LEN().
  */
 static int
 freebsd32_copyin_control(struct mbuf **mp, caddr_t buf, u_int buflen)
 {
 	struct mbuf *m;
 	void *md;
 	u_int idx, len, msglen;
 	int error;
 
 	buflen = FREEBSD32_ALIGN(buflen);
 
 	if (buflen > MCLBYTES)
 		return (EINVAL);
 
 	/*
 	 * Iterate over the buffer and get the length of each message
 	 * in there. This has 32-bit alignment and padding. Use it to
 	 * determine the length of these messages when using 64-bit
 	 * alignment and padding.
 	 */
 	idx = 0;
 	len = 0;
 	while (idx < buflen) {
 		error = copyin(buf + idx, &msglen, sizeof(msglen));
 		if (error)
 			return (error);
 		if (msglen < sizeof(struct cmsghdr))
 			return (EINVAL);
 		msglen = FREEBSD32_ALIGN(msglen);
 		if (idx + msglen > buflen)
 			return (EINVAL);
 		idx += msglen;
 		msglen += CMSG_ALIGN(sizeof(struct cmsghdr)) -
 		    FREEBSD32_ALIGN(sizeof(struct cmsghdr));
 		len += CMSG_ALIGN(msglen);
 	}
 
 	if (len > MCLBYTES)
 		return (EINVAL);
 
 	m = m_get(M_WAITOK, MT_CONTROL);
 	if (len > MLEN)
 		MCLGET(m, M_WAITOK);
 	m->m_len = len;
 
 	md = mtod(m, void *);
 	while (buflen > 0) {
 		error = copyin(buf, md, sizeof(struct cmsghdr));
 		if (error)
 			break;
 		msglen = *(u_int *)md;
 		msglen = FREEBSD32_ALIGN(msglen);
 
 		/* Modify the message length to account for alignment. */
 		*(u_int *)md = msglen + CMSG_ALIGN(sizeof(struct cmsghdr)) -
 		    FREEBSD32_ALIGN(sizeof(struct cmsghdr));
 
 		md = (char *)md + CMSG_ALIGN(sizeof(struct cmsghdr));
 		buf += FREEBSD32_ALIGN(sizeof(struct cmsghdr));
 		buflen -= FREEBSD32_ALIGN(sizeof(struct cmsghdr));
 
 		msglen -= FREEBSD32_ALIGN(sizeof(struct cmsghdr));
 		if (msglen > 0) {
 			error = copyin(buf, md, msglen);
 			if (error)
 				break;
 			md = (char *)md + CMSG_ALIGN(msglen);
 			buf += msglen;
 			buflen -= msglen;
 		}
 	}
 
 	if (error)
 		m_free(m);
 	else
 		*mp = m;
 	return (error);
 }
 
 int
 freebsd32_sendmsg(struct thread *td,
 		  struct freebsd32_sendmsg_args *uap)
 {
 	struct msghdr msg;
 	struct msghdr32 m32;
 	struct iovec *iov;
 	struct mbuf *control = NULL;
 	struct sockaddr *to = NULL;
 	int error;
 
 	error = copyin(uap->msg, &m32, sizeof(m32));
 	if (error)
 		return (error);
 	error = freebsd32_copyinmsghdr(uap->msg, &msg);
 	if (error)
 		return (error);
 	error = freebsd32_copyiniov(PTRIN(m32.msg_iov), m32.msg_iovlen, &iov,
 	    EMSGSIZE);
 	if (error)
 		return (error);
 	msg.msg_iov = iov;
 	if (msg.msg_name != NULL) {
 		error = getsockaddr(&to, msg.msg_name, msg.msg_namelen);
 		if (error) {
 			to = NULL;
 			goto out;
 		}
 		msg.msg_name = to;
 	}
 
 	if (msg.msg_control) {
 		if (msg.msg_controllen < sizeof(struct cmsghdr)) {
 			error = EINVAL;
 			goto out;
 		}
 
 		error = freebsd32_copyin_control(&control, msg.msg_control,
 		    msg.msg_controllen);
 		if (error)
 			goto out;
 
 		msg.msg_control = NULL;
 		msg.msg_controllen = 0;
 	}
 
 	error = kern_sendit(td, uap->s, &msg, uap->flags, control,
 	    UIO_USERSPACE);
 
 out:
 	free(iov, M_IOV);
 	if (to)
 		free(to, M_SONAME);
 	return (error);
 }
 
 int
 freebsd32_recvfrom(struct thread *td,
 		   struct freebsd32_recvfrom_args *uap)
 {
 	struct msghdr msg;
 	struct iovec aiov;
 	int error;
 
 	if (uap->fromlenaddr) {
 		error = copyin(PTRIN(uap->fromlenaddr), &msg.msg_namelen,
 		    sizeof(msg.msg_namelen));
 		if (error)
 			return (error);
 	} else {
 		msg.msg_namelen = 0;
 	}
 
 	msg.msg_name = PTRIN(uap->from);
 	msg.msg_iov = &aiov;
 	msg.msg_iovlen = 1;
 	aiov.iov_base = PTRIN(uap->buf);
 	aiov.iov_len = uap->len;
 	msg.msg_control = NULL;
 	msg.msg_flags = uap->flags;
 	error = kern_recvit(td, uap->s, &msg, UIO_USERSPACE, NULL);
 	if (error == 0 && uap->fromlenaddr)
 		error = copyout(&msg.msg_namelen, PTRIN(uap->fromlenaddr),
 		    sizeof (msg.msg_namelen));
 	return (error);
 }
 
 int
 freebsd32_settimeofday(struct thread *td,
 		       struct freebsd32_settimeofday_args *uap)
 {
 	struct timeval32 tv32;
 	struct timeval tv, *tvp;
 	struct timezone tz, *tzp;
 	int error;
 
 	if (uap->tv) {
 		error = copyin(uap->tv, &tv32, sizeof(tv32));
 		if (error)
 			return (error);
 		CP(tv32, tv, tv_sec);
 		CP(tv32, tv, tv_usec);
 		tvp = &tv;
 	} else
 		tvp = NULL;
 	if (uap->tzp) {
 		error = copyin(uap->tzp, &tz, sizeof(tz));
 		if (error)
 			return (error);
 		tzp = &tz;
 	} else
 		tzp = NULL;
 	return (kern_settimeofday(td, tvp, tzp));
 }
 
 int
 freebsd32_utimes(struct thread *td, struct freebsd32_utimes_args *uap)
 {
 	struct timeval32 s32[2];
 	struct timeval s[2], *sp;
 	int error;
 
 	if (uap->tptr != NULL) {
 		error = copyin(uap->tptr, s32, sizeof(s32));
 		if (error)
 			return (error);
 		CP(s32[0], s[0], tv_sec);
 		CP(s32[0], s[0], tv_usec);
 		CP(s32[1], s[1], tv_sec);
 		CP(s32[1], s[1], tv_usec);
 		sp = s;
 	} else
 		sp = NULL;
 	return (kern_utimesat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
 	    sp, UIO_SYSSPACE));
 }
 
 int
 freebsd32_lutimes(struct thread *td, struct freebsd32_lutimes_args *uap)
 {
 	struct timeval32 s32[2];
 	struct timeval s[2], *sp;
 	int error;
 
 	if (uap->tptr != NULL) {
 		error = copyin(uap->tptr, s32, sizeof(s32));
 		if (error)
 			return (error);
 		CP(s32[0], s[0], tv_sec);
 		CP(s32[0], s[0], tv_usec);
 		CP(s32[1], s[1], tv_sec);
 		CP(s32[1], s[1], tv_usec);
 		sp = s;
 	} else
 		sp = NULL;
 	return (kern_lutimes(td, uap->path, UIO_USERSPACE, sp, UIO_SYSSPACE));
 }
 
 int
 freebsd32_futimes(struct thread *td, struct freebsd32_futimes_args *uap)
 {
 	struct timeval32 s32[2];
 	struct timeval s[2], *sp;
 	int error;
 
 	if (uap->tptr != NULL) {
 		error = copyin(uap->tptr, s32, sizeof(s32));
 		if (error)
 			return (error);
 		CP(s32[0], s[0], tv_sec);
 		CP(s32[0], s[0], tv_usec);
 		CP(s32[1], s[1], tv_sec);
 		CP(s32[1], s[1], tv_usec);
 		sp = s;
 	} else
 		sp = NULL;
 	return (kern_futimes(td, uap->fd, sp, UIO_SYSSPACE));
 }
 
 int
 freebsd32_futimesat(struct thread *td, struct freebsd32_futimesat_args *uap)
 {
 	struct timeval32 s32[2];
 	struct timeval s[2], *sp;
 	int error;
 
 	if (uap->times != NULL) {
 		error = copyin(uap->times, s32, sizeof(s32));
 		if (error)
 			return (error);
 		CP(s32[0], s[0], tv_sec);
 		CP(s32[0], s[0], tv_usec);
 		CP(s32[1], s[1], tv_sec);
 		CP(s32[1], s[1], tv_usec);
 		sp = s;
 	} else
 		sp = NULL;
 	return (kern_utimesat(td, uap->fd, uap->path, UIO_USERSPACE,
 		sp, UIO_SYSSPACE));
 }
 
 int
 freebsd32_futimens(struct thread *td, struct freebsd32_futimens_args *uap)
 {
 	struct timespec32 ts32[2];
 	struct timespec ts[2], *tsp;
 	int error;
 
 	if (uap->times != NULL) {
 		error = copyin(uap->times, ts32, sizeof(ts32));
 		if (error)
 			return (error);
 		CP(ts32[0], ts[0], tv_sec);
 		CP(ts32[0], ts[0], tv_nsec);
 		CP(ts32[1], ts[1], tv_sec);
 		CP(ts32[1], ts[1], tv_nsec);
 		tsp = ts;
 	} else
 		tsp = NULL;
 	return (kern_futimens(td, uap->fd, tsp, UIO_SYSSPACE));
 }
 
 int
 freebsd32_utimensat(struct thread *td, struct freebsd32_utimensat_args *uap)
 {
 	struct timespec32 ts32[2];
 	struct timespec ts[2], *tsp;
 	int error;
 
 	if (uap->times != NULL) {
 		error = copyin(uap->times, ts32, sizeof(ts32));
 		if (error)
 			return (error);
 		CP(ts32[0], ts[0], tv_sec);
 		CP(ts32[0], ts[0], tv_nsec);
 		CP(ts32[1], ts[1], tv_sec);
 		CP(ts32[1], ts[1], tv_nsec);
 		tsp = ts;
 	} else
 		tsp = NULL;
 	return (kern_utimensat(td, uap->fd, uap->path, UIO_USERSPACE,
 	    tsp, UIO_SYSSPACE, uap->flag));
 }
 
 int
 freebsd32_adjtime(struct thread *td, struct freebsd32_adjtime_args *uap)
 {
 	struct timeval32 tv32;
 	struct timeval delta, olddelta, *deltap;
 	int error;
 
 	if (uap->delta) {
 		error = copyin(uap->delta, &tv32, sizeof(tv32));
 		if (error)
 			return (error);
 		CP(tv32, delta, tv_sec);
 		CP(tv32, delta, tv_usec);
 		deltap = &delta;
 	} else
 		deltap = NULL;
 	error = kern_adjtime(td, deltap, &olddelta);
 	if (uap->olddelta && error == 0) {
 		CP(olddelta, tv32, tv_sec);
 		CP(olddelta, tv32, tv_usec);
 		error = copyout(&tv32, uap->olddelta, sizeof(tv32));
 	}
 	return (error);
 }
 
 #ifdef COMPAT_FREEBSD4
 int
 freebsd4_freebsd32_statfs(struct thread *td, struct freebsd4_freebsd32_statfs_args *uap)
 {
 	struct statfs32 s32;
 	struct statfs *sp;
 	int error;
 
 	sp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK);
 	error = kern_statfs(td, uap->path, UIO_USERSPACE, sp);
 	if (error == 0) {
 		copy_statfs(sp, &s32);
 		error = copyout(&s32, uap->buf, sizeof(s32));
 	}
 	free(sp, M_STATFS);
 	return (error);
 }
 #endif
 
 #ifdef COMPAT_FREEBSD4
 int
 freebsd4_freebsd32_fstatfs(struct thread *td, struct freebsd4_freebsd32_fstatfs_args *uap)
 {
 	struct statfs32 s32;
 	struct statfs *sp;
 	int error;
 
 	sp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK);
 	error = kern_fstatfs(td, uap->fd, sp);
 	if (error == 0) {
 		copy_statfs(sp, &s32);
 		error = copyout(&s32, uap->buf, sizeof(s32));
 	}
 	free(sp, M_STATFS);
 	return (error);
 }
 #endif
 
 #ifdef COMPAT_FREEBSD4
 int
 freebsd4_freebsd32_fhstatfs(struct thread *td, struct freebsd4_freebsd32_fhstatfs_args *uap)
 {
 	struct statfs32 s32;
 	struct statfs *sp;
 	fhandle_t fh;
 	int error;
 
 	if ((error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t))) != 0)
 		return (error);
 	sp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK);
 	error = kern_fhstatfs(td, fh, sp);
 	if (error == 0) {
 		copy_statfs(sp, &s32);
 		error = copyout(&s32, uap->buf, sizeof(s32));
 	}
 	free(sp, M_STATFS);
 	return (error);
 }
 #endif
 
 int
 freebsd32_pread(struct thread *td, struct freebsd32_pread_args *uap)
 {
 
 	return (kern_pread(td, uap->fd, uap->buf, uap->nbyte,
 	    PAIR32TO64(off_t, uap->offset)));
 }
 
 int
 freebsd32_pwrite(struct thread *td, struct freebsd32_pwrite_args *uap)
 {
 
 	return (kern_pwrite(td, uap->fd, uap->buf, uap->nbyte,
 	    PAIR32TO64(off_t, uap->offset)));
 }
 
 #ifdef COMPAT_43
 int
 ofreebsd32_lseek(struct thread *td, struct ofreebsd32_lseek_args *uap)
 {
 
 	return (kern_lseek(td, uap->fd, uap->offset, uap->whence));
 }
 #endif
 
 int
 freebsd32_lseek(struct thread *td, struct freebsd32_lseek_args *uap)
 {
 	int error;
 	off_t pos;
 
 	error = kern_lseek(td, uap->fd, PAIR32TO64(off_t, uap->offset),
 	    uap->whence);
 	/* Expand the quad return into two parts for eax and edx */
 	pos = td->td_uretoff.tdu_off;
 	td->td_retval[RETVAL_LO] = pos & 0xffffffff;	/* %eax */
 	td->td_retval[RETVAL_HI] = pos >> 32;		/* %edx */
 	return error;
 }
 
 int
 freebsd32_truncate(struct thread *td, struct freebsd32_truncate_args *uap)
 {
 
 	return (kern_truncate(td, uap->path, UIO_USERSPACE,
 	    PAIR32TO64(off_t, uap->length)));
 }
 
 int
 freebsd32_ftruncate(struct thread *td, struct freebsd32_ftruncate_args *uap)
 {
 
 	return (kern_ftruncate(td, uap->fd, PAIR32TO64(off_t, uap->length)));
 }
 
 #ifdef COMPAT_43
 int
 ofreebsd32_getdirentries(struct thread *td,
     struct ofreebsd32_getdirentries_args *uap)
 {
 	struct ogetdirentries_args ap;
 	int error;
 	long loff;
 	int32_t loff_cut;
 
 	ap.fd = uap->fd;
 	ap.buf = uap->buf;
 	ap.count = uap->count;
 	ap.basep = NULL;
 	error = kern_ogetdirentries(td, &ap, &loff);
 	if (error == 0) {
 		loff_cut = loff;
 		error = copyout(&loff_cut, uap->basep, sizeof(int32_t));
 	}
 	return (error);
 }
 #endif
 
 #if defined(COMPAT_FREEBSD11)
 int
 freebsd11_freebsd32_getdirentries(struct thread *td,
     struct freebsd11_freebsd32_getdirentries_args *uap)
 {
 	long base;
 	int32_t base32;
 	int error;
 
 	error = freebsd11_kern_getdirentries(td, uap->fd, uap->buf, uap->count,
 	    &base, NULL);
 	if (error)
 		return (error);
 	if (uap->basep != NULL) {
 		base32 = base;
 		error = copyout(&base32, uap->basep, sizeof(int32_t));
 	}
 	return (error);
 }
 
 int
 freebsd11_freebsd32_getdents(struct thread *td,
     struct freebsd11_freebsd32_getdents_args *uap)
 {
 	struct freebsd11_freebsd32_getdirentries_args ap;
 
 	ap.fd = uap->fd;
 	ap.buf = uap->buf;
 	ap.count = uap->count;
 	ap.basep = NULL;
 	return (freebsd11_freebsd32_getdirentries(td, &ap));
 }
 #endif /* COMPAT_FREEBSD11 */
 
 #ifdef COMPAT_FREEBSD6
 /* versions with the 'int pad' argument */
 int
 freebsd6_freebsd32_pread(struct thread *td, struct freebsd6_freebsd32_pread_args *uap)
 {
 
 	return (kern_pread(td, uap->fd, uap->buf, uap->nbyte,
 	    PAIR32TO64(off_t, uap->offset)));
 }
 
 int
 freebsd6_freebsd32_pwrite(struct thread *td, struct freebsd6_freebsd32_pwrite_args *uap)
 {
 
 	return (kern_pwrite(td, uap->fd, uap->buf, uap->nbyte,
 	    PAIR32TO64(off_t, uap->offset)));
 }
 
 int
 freebsd6_freebsd32_lseek(struct thread *td, struct freebsd6_freebsd32_lseek_args *uap)
 {
 	int error;
 	off_t pos;
 
 	error = kern_lseek(td, uap->fd, PAIR32TO64(off_t, uap->offset),
 	    uap->whence);
 	/* Expand the quad return into two parts for eax and edx */
 	pos = *(off_t *)(td->td_retval);
 	td->td_retval[RETVAL_LO] = pos & 0xffffffff;	/* %eax */
 	td->td_retval[RETVAL_HI] = pos >> 32;		/* %edx */
 	return error;
 }
 
 int
 freebsd6_freebsd32_truncate(struct thread *td, struct freebsd6_freebsd32_truncate_args *uap)
 {
 
 	return (kern_truncate(td, uap->path, UIO_USERSPACE,
 	    PAIR32TO64(off_t, uap->length)));
 }
 
 int
 freebsd6_freebsd32_ftruncate(struct thread *td, struct freebsd6_freebsd32_ftruncate_args *uap)
 {
 
 	return (kern_ftruncate(td, uap->fd, PAIR32TO64(off_t, uap->length)));
 }
 #endif /* COMPAT_FREEBSD6 */
 
 struct sf_hdtr32 {
 	uint32_t headers;
 	int hdr_cnt;
 	uint32_t trailers;
 	int trl_cnt;
 };
 
 static int
 freebsd32_do_sendfile(struct thread *td,
     struct freebsd32_sendfile_args *uap, int compat)
 {
 	struct sf_hdtr32 hdtr32;
 	struct sf_hdtr hdtr;
 	struct uio *hdr_uio, *trl_uio;
 	struct file *fp;
 	cap_rights_t rights;
 	struct iovec32 *iov32;
 	off_t offset, sbytes;
 	int error;
 
 	offset = PAIR32TO64(off_t, uap->offset);
 	if (offset < 0)
 		return (EINVAL);
 
 	hdr_uio = trl_uio = NULL;
 
 	if (uap->hdtr != NULL) {
 		error = copyin(uap->hdtr, &hdtr32, sizeof(hdtr32));
 		if (error)
 			goto out;
 		PTRIN_CP(hdtr32, hdtr, headers);
 		CP(hdtr32, hdtr, hdr_cnt);
 		PTRIN_CP(hdtr32, hdtr, trailers);
 		CP(hdtr32, hdtr, trl_cnt);
 
 		if (hdtr.headers != NULL) {
 			iov32 = PTRIN(hdtr32.headers);
 			error = freebsd32_copyinuio(iov32,
 			    hdtr32.hdr_cnt, &hdr_uio);
 			if (error)
 				goto out;
 #ifdef COMPAT_FREEBSD4
 			/*
 			 * In FreeBSD < 5.0 the nbytes to send also included
 			 * the header.  If compat is specified subtract the
 			 * header size from nbytes.
 			 */
 			if (compat) {
 				if (uap->nbytes > hdr_uio->uio_resid)
 					uap->nbytes -= hdr_uio->uio_resid;
 				else
 					uap->nbytes = 0;
 			}
 #endif
 		}
 		if (hdtr.trailers != NULL) {
 			iov32 = PTRIN(hdtr32.trailers);
 			error = freebsd32_copyinuio(iov32,
 			    hdtr32.trl_cnt, &trl_uio);
 			if (error)
 				goto out;
 		}
 	}
 
 	AUDIT_ARG_FD(uap->fd);
 
 	if ((error = fget_read(td, uap->fd,
 	    cap_rights_init(&rights, CAP_PREAD), &fp)) != 0)
 		goto out;
 
 	error = fo_sendfile(fp, uap->s, hdr_uio, trl_uio, offset,
 	    uap->nbytes, &sbytes, uap->flags, td);
 	fdrop(fp, td);
 
 	if (uap->sbytes != NULL)
 		copyout(&sbytes, uap->sbytes, sizeof(off_t));
 
 out:
 	if (hdr_uio)
 		free(hdr_uio, M_IOV);
 	if (trl_uio)
 		free(trl_uio, M_IOV);
 	return (error);
 }
 
 #ifdef COMPAT_FREEBSD4
 int
 freebsd4_freebsd32_sendfile(struct thread *td,
     struct freebsd4_freebsd32_sendfile_args *uap)
 {
 	return (freebsd32_do_sendfile(td,
 	    (struct freebsd32_sendfile_args *)uap, 1));
 }
 #endif
 
 int
 freebsd32_sendfile(struct thread *td, struct freebsd32_sendfile_args *uap)
 {
 
 	return (freebsd32_do_sendfile(td, uap, 0));
 }
 
 static void
 copy_stat(struct stat *in, struct stat32 *out)
 {
 
 	CP(*in, *out, st_dev);
 	CP(*in, *out, st_ino);
 	CP(*in, *out, st_mode);
 	CP(*in, *out, st_nlink);
 	CP(*in, *out, st_uid);
 	CP(*in, *out, st_gid);
 	CP(*in, *out, st_rdev);
 	TS_CP(*in, *out, st_atim);
 	TS_CP(*in, *out, st_mtim);
 	TS_CP(*in, *out, st_ctim);
 	CP(*in, *out, st_size);
 	CP(*in, *out, st_blocks);
 	CP(*in, *out, st_blksize);
 	CP(*in, *out, st_flags);
 	CP(*in, *out, st_gen);
 	TS_CP(*in, *out, st_birthtim);
 	out->st_padding0 = 0;
 	out->st_padding1 = 0;
 #ifdef __STAT32_TIME_T_EXT
 	out->st_atim_ext = 0;
 	out->st_mtim_ext = 0;
 	out->st_ctim_ext = 0;
 	out->st_btim_ext = 0;
 #endif
 	bzero(out->st_spare, sizeof(out->st_spare));
 }
 
 #ifdef COMPAT_43
 static void
 copy_ostat(struct stat *in, struct ostat32 *out)
 {
 
 	bzero(out, sizeof(*out));
 	CP(*in, *out, st_dev);
 	CP(*in, *out, st_ino);
 	CP(*in, *out, st_mode);
 	CP(*in, *out, st_nlink);
 	CP(*in, *out, st_uid);
 	CP(*in, *out, st_gid);
 	CP(*in, *out, st_rdev);
 	out->st_size = MIN(in->st_size, INT32_MAX);
 	TS_CP(*in, *out, st_atim);
 	TS_CP(*in, *out, st_mtim);
 	TS_CP(*in, *out, st_ctim);
 	CP(*in, *out, st_blksize);
 	CP(*in, *out, st_blocks);
 	CP(*in, *out, st_flags);
 	CP(*in, *out, st_gen);
 }
 #endif
 
 #ifdef COMPAT_43
 int
 ofreebsd32_stat(struct thread *td, struct ofreebsd32_stat_args *uap)
 {
 	struct stat sb;
 	struct ostat32 sb32;
 	int error;
 
 	error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE,
 	    &sb, NULL);
 	if (error)
 		return (error);
 	copy_ostat(&sb, &sb32);
 	error = copyout(&sb32, uap->ub, sizeof (sb32));
 	return (error);
 }
 #endif
 
 int
 freebsd32_fstat(struct thread *td, struct freebsd32_fstat_args *uap)
 {
 	struct stat ub;
 	struct stat32 ub32;
 	int error;
 
 	error = kern_fstat(td, uap->fd, &ub);
 	if (error)
 		return (error);
 	copy_stat(&ub, &ub32);
 	error = copyout(&ub32, uap->ub, sizeof(ub32));
 	return (error);
 }
 
 #ifdef COMPAT_43
 int
 ofreebsd32_fstat(struct thread *td, struct ofreebsd32_fstat_args *uap)
 {
 	struct stat ub;
 	struct ostat32 ub32;
 	int error;
 
 	error = kern_fstat(td, uap->fd, &ub);
 	if (error)
 		return (error);
 	copy_ostat(&ub, &ub32);
 	error = copyout(&ub32, uap->ub, sizeof(ub32));
 	return (error);
 }
 #endif
 
 int
 freebsd32_fstatat(struct thread *td, struct freebsd32_fstatat_args *uap)
 {
 	struct stat ub;
 	struct stat32 ub32;
 	int error;
 
 	error = kern_statat(td, uap->flag, uap->fd, uap->path, UIO_USERSPACE,
 	    &ub, NULL);
 	if (error)
 		return (error);
 	copy_stat(&ub, &ub32);
 	error = copyout(&ub32, uap->buf, sizeof(ub32));
 	return (error);
 }
 
 #ifdef COMPAT_43
 int
 ofreebsd32_lstat(struct thread *td, struct ofreebsd32_lstat_args *uap)
 {
 	struct stat sb;
 	struct ostat32 sb32;
 	int error;
 
 	error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path,
 	    UIO_USERSPACE, &sb, NULL);
 	if (error)
 		return (error);
 	copy_ostat(&sb, &sb32);
 	error = copyout(&sb32, uap->ub, sizeof (sb32));
 	return (error);
 }
 #endif
 
 int
 freebsd32_fhstat(struct thread *td, struct freebsd32_fhstat_args *uap)
 {
 	struct stat sb;
 	struct stat32 sb32;
 	struct fhandle fh;
 	int error;
 
 	error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
         if (error != 0)
                 return (error);
 	error = kern_fhstat(td, fh, &sb);
 	if (error != 0)
 		return (error);
 	copy_stat(&sb, &sb32);
 	error = copyout(&sb32, uap->sb, sizeof (sb32));
 	return (error);
 }
 
 #if defined(COMPAT_FREEBSD11)
 extern int ino64_trunc_error;
 
 static int
 freebsd11_cvtstat32(struct stat *in, struct freebsd11_stat32 *out)
 {
 
 	CP(*in, *out, st_ino);
 	if (in->st_ino != out->st_ino) {
 		switch (ino64_trunc_error) {
 		default:
 		case 0:
 			break;
 		case 1:
 			return (EOVERFLOW);
 		case 2:
 			out->st_ino = UINT32_MAX;
 			break;
 		}
 	}
 	CP(*in, *out, st_nlink);
 	if (in->st_nlink != out->st_nlink) {
 		switch (ino64_trunc_error) {
 		default:
 		case 0:
 			break;
 		case 1:
 			return (EOVERFLOW);
 		case 2:
 			out->st_nlink = UINT16_MAX;
 			break;
 		}
 	}
 	out->st_dev = in->st_dev;
 	if (out->st_dev != in->st_dev) {
 		switch (ino64_trunc_error) {
 		default:
 			break;
 		case 1:
 			return (EOVERFLOW);
 		}
 	}
 	CP(*in, *out, st_mode);
 	CP(*in, *out, st_uid);
 	CP(*in, *out, st_gid);
 	out->st_rdev = in->st_rdev;
 	if (out->st_rdev != in->st_rdev) {
 		switch (ino64_trunc_error) {
 		default:
 			break;
 		case 1:
 			return (EOVERFLOW);
 		}
 	}
 	TS_CP(*in, *out, st_atim);
 	TS_CP(*in, *out, st_mtim);
 	TS_CP(*in, *out, st_ctim);
 	CP(*in, *out, st_size);
 	CP(*in, *out, st_blocks);
 	CP(*in, *out, st_blksize);
 	CP(*in, *out, st_flags);
 	CP(*in, *out, st_gen);
 	TS_CP(*in, *out, st_birthtim);
 	out->st_lspare = 0;
 	bzero((char *)&out->st_birthtim + sizeof(out->st_birthtim),
 	    sizeof(*out) - offsetof(struct freebsd11_stat32,
 	    st_birthtim) - sizeof(out->st_birthtim));
 	return (0);
 }
 
 int
 freebsd11_freebsd32_stat(struct thread *td,
     struct freebsd11_freebsd32_stat_args *uap)
 {
 	struct stat sb;
 	struct freebsd11_stat32 sb32;
 	int error;
 
 	error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE,
 	    &sb, NULL);
 	if (error != 0)
 		return (error);
 	error = freebsd11_cvtstat32(&sb, &sb32);
 	if (error == 0)
 		error = copyout(&sb32, uap->ub, sizeof (sb32));
 	return (error);
 }
 
 int
 freebsd11_freebsd32_fstat(struct thread *td,
     struct freebsd11_freebsd32_fstat_args *uap)
 {
 	struct stat sb;
 	struct freebsd11_stat32 sb32;
 	int error;
 
 	error = kern_fstat(td, uap->fd, &sb);
 	if (error != 0)
 		return (error);
 	error = freebsd11_cvtstat32(&sb, &sb32);
 	if (error == 0)
 		error = copyout(&sb32, uap->ub, sizeof (sb32));
 	return (error);
 }
 
 int
 freebsd11_freebsd32_fstatat(struct thread *td,
     struct freebsd11_freebsd32_fstatat_args *uap)
 {
 	struct stat sb;
 	struct freebsd11_stat32 sb32;
 	int error;
 
 	error = kern_statat(td, uap->flag, uap->fd, uap->path, UIO_USERSPACE,
 	    &sb, NULL);
 	if (error != 0)
 		return (error);
 	error = freebsd11_cvtstat32(&sb, &sb32);
 	if (error == 0)
 		error = copyout(&sb32, uap->buf, sizeof (sb32));
 	return (error);
 }
 
 int
 freebsd11_freebsd32_lstat(struct thread *td,
     struct freebsd11_freebsd32_lstat_args *uap)
 {
 	struct stat sb;
 	struct freebsd11_stat32 sb32;
 	int error;
 
 	error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path,
 	    UIO_USERSPACE, &sb, NULL);
 	if (error != 0)
 		return (error);
 	error = freebsd11_cvtstat32(&sb, &sb32);
 	if (error == 0)
 		error = copyout(&sb32, uap->ub, sizeof (sb32));
 	return (error);
 }
 
 int
 freebsd11_freebsd32_fhstat(struct thread *td,
     struct freebsd11_freebsd32_fhstat_args *uap)
 {
 	struct stat sb;
 	struct freebsd11_stat32 sb32;
 	struct fhandle fh;
 	int error;
 
 	error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
         if (error != 0)
                 return (error);
 	error = kern_fhstat(td, fh, &sb);
 	if (error != 0)
 		return (error);
 	error = freebsd11_cvtstat32(&sb, &sb32);
 	if (error == 0)
 		error = copyout(&sb32, uap->sb, sizeof (sb32));
 	return (error);
 }
 #endif
 
 int
 freebsd32___sysctl(struct thread *td, struct freebsd32___sysctl_args *uap)
 {
 	int error, name[CTL_MAXNAME];
 	size_t j, oldlen;
 	uint32_t tmp;
 
 	if (uap->namelen > CTL_MAXNAME || uap->namelen < 2)
 		return (EINVAL);
  	error = copyin(uap->name, name, uap->namelen * sizeof(int));
  	if (error)
 		return (error);
 	if (uap->oldlenp) {
 		error = fueword32(uap->oldlenp, &tmp);
 		oldlen = tmp;
 	} else {
 		oldlen = 0;
 	}
 	if (error != 0)
 		return (EFAULT);
 	error = userland_sysctl(td, name, uap->namelen,
 		uap->old, &oldlen, 1,
 		uap->new, uap->newlen, &j, SCTL_MASK32);
 	if (error)
 		return (error);
 	if (uap->oldlenp)
 		suword32(uap->oldlenp, j);
 	return (0);
 }
 
 int
 freebsd32_jail(struct thread *td, struct freebsd32_jail_args *uap)
 {
 	uint32_t version;
 	int error;
 	struct jail j;
 
 	error = copyin(uap->jail, &version, sizeof(uint32_t));
 	if (error)
 		return (error);
 
 	switch (version) {
 	case 0:
 	{
 		/* FreeBSD single IPv4 jails. */
 		struct jail32_v0 j32_v0;
 
 		bzero(&j, sizeof(struct jail));
 		error = copyin(uap->jail, &j32_v0, sizeof(struct jail32_v0));
 		if (error)
 			return (error);
 		CP(j32_v0, j, version);
 		PTRIN_CP(j32_v0, j, path);
 		PTRIN_CP(j32_v0, j, hostname);
 		j.ip4s = htonl(j32_v0.ip_number);	/* jail_v0 is host order */
 		break;
 	}
 
 	case 1:
 		/*
 		 * Version 1 was used by multi-IPv4 jail implementations
 		 * that never made it into the official kernel.
 		 */
 		return (EINVAL);
 
 	case 2:	/* JAIL_API_VERSION */
 	{
 		/* FreeBSD multi-IPv4/IPv6,noIP jails. */
 		struct jail32 j32;
 
 		error = copyin(uap->jail, &j32, sizeof(struct jail32));
 		if (error)
 			return (error);
 		CP(j32, j, version);
 		PTRIN_CP(j32, j, path);
 		PTRIN_CP(j32, j, hostname);
 		PTRIN_CP(j32, j, jailname);
 		CP(j32, j, ip4s);
 		CP(j32, j, ip6s);
 		PTRIN_CP(j32, j, ip4);
 		PTRIN_CP(j32, j, ip6);
 		break;
 	}
 
 	default:
 		/* Sci-Fi jails are not supported, sorry. */
 		return (EINVAL);
 	}
 	return (kern_jail(td, &j));
 }
 
 int
 freebsd32_jail_set(struct thread *td, struct freebsd32_jail_set_args *uap)
 {
 	struct uio *auio;
 	int error;
 
 	/* Check that we have an even number of iovecs. */
 	if (uap->iovcnt & 1)
 		return (EINVAL);
 
 	error = freebsd32_copyinuio(uap->iovp, uap->iovcnt, &auio);
 	if (error)
 		return (error);
 	error = kern_jail_set(td, auio, uap->flags);
 	free(auio, M_IOV);
 	return (error);
 }
 
 int
 freebsd32_jail_get(struct thread *td, struct freebsd32_jail_get_args *uap)
 {
 	struct iovec32 iov32;
 	struct uio *auio;
 	int error, i;
 
 	/* Check that we have an even number of iovecs. */
 	if (uap->iovcnt & 1)
 		return (EINVAL);
 
 	error = freebsd32_copyinuio(uap->iovp, uap->iovcnt, &auio);
 	if (error)
 		return (error);
 	error = kern_jail_get(td, auio, uap->flags);
 	if (error == 0)
 		for (i = 0; i < uap->iovcnt; i++) {
 			PTROUT_CP(auio->uio_iov[i], iov32, iov_base);
 			CP(auio->uio_iov[i], iov32, iov_len);
 			error = copyout(&iov32, uap->iovp + i, sizeof(iov32));
 			if (error != 0)
 				break;
 		}
 	free(auio, M_IOV);
 	return (error);
 }
 
 int
 freebsd32_sigaction(struct thread *td, struct freebsd32_sigaction_args *uap)
 {
 	struct sigaction32 s32;
 	struct sigaction sa, osa, *sap;
 	int error;
 
 	if (uap->act) {
 		error = copyin(uap->act, &s32, sizeof(s32));
 		if (error)
 			return (error);
 		sa.sa_handler = PTRIN(s32.sa_u);
 		CP(s32, sa, sa_flags);
 		CP(s32, sa, sa_mask);
 		sap = &sa;
 	} else
 		sap = NULL;
 	error = kern_sigaction(td, uap->sig, sap, &osa, 0);
 	if (error == 0 && uap->oact != NULL) {
 		s32.sa_u = PTROUT(osa.sa_handler);
 		CP(osa, s32, sa_flags);
 		CP(osa, s32, sa_mask);
 		error = copyout(&s32, uap->oact, sizeof(s32));
 	}
 	return (error);
 }
 
 #ifdef COMPAT_FREEBSD4
 int
 freebsd4_freebsd32_sigaction(struct thread *td,
 			     struct freebsd4_freebsd32_sigaction_args *uap)
 {
 	struct sigaction32 s32;
 	struct sigaction sa, osa, *sap;
 	int error;
 
 	if (uap->act) {
 		error = copyin(uap->act, &s32, sizeof(s32));
 		if (error)
 			return (error);
 		sa.sa_handler = PTRIN(s32.sa_u);
 		CP(s32, sa, sa_flags);
 		CP(s32, sa, sa_mask);
 		sap = &sa;
 	} else
 		sap = NULL;
 	error = kern_sigaction(td, uap->sig, sap, &osa, KSA_FREEBSD4);
 	if (error == 0 && uap->oact != NULL) {
 		s32.sa_u = PTROUT(osa.sa_handler);
 		CP(osa, s32, sa_flags);
 		CP(osa, s32, sa_mask);
 		error = copyout(&s32, uap->oact, sizeof(s32));
 	}
 	return (error);
 }
 #endif
 
 #ifdef COMPAT_43
 struct osigaction32 {
 	u_int32_t	sa_u;
 	osigset_t	sa_mask;
 	int		sa_flags;
 };
 
 #define	ONSIG	32
 
 int
 ofreebsd32_sigaction(struct thread *td,
 			     struct ofreebsd32_sigaction_args *uap)
 {
 	struct osigaction32 s32;
 	struct sigaction sa, osa, *sap;
 	int error;
 
 	if (uap->signum <= 0 || uap->signum >= ONSIG)
 		return (EINVAL);
 
 	if (uap->nsa) {
 		error = copyin(uap->nsa, &s32, sizeof(s32));
 		if (error)
 			return (error);
 		sa.sa_handler = PTRIN(s32.sa_u);
 		CP(s32, sa, sa_flags);
 		OSIG2SIG(s32.sa_mask, sa.sa_mask);
 		sap = &sa;
 	} else
 		sap = NULL;
 	error = kern_sigaction(td, uap->signum, sap, &osa, KSA_OSIGSET);
 	if (error == 0 && uap->osa != NULL) {
 		s32.sa_u = PTROUT(osa.sa_handler);
 		CP(osa, s32, sa_flags);
 		SIG2OSIG(osa.sa_mask, s32.sa_mask);
 		error = copyout(&s32, uap->osa, sizeof(s32));
 	}
 	return (error);
 }
 
 int
 ofreebsd32_sigprocmask(struct thread *td,
 			       struct ofreebsd32_sigprocmask_args *uap)
 {
 	sigset_t set, oset;
 	int error;
 
 	OSIG2SIG(uap->mask, set);
 	error = kern_sigprocmask(td, uap->how, &set, &oset, SIGPROCMASK_OLD);
 	SIG2OSIG(oset, td->td_retval[0]);
 	return (error);
 }
 
 int
 ofreebsd32_sigpending(struct thread *td,
 			      struct ofreebsd32_sigpending_args *uap)
 {
 	struct proc *p = td->td_proc;
 	sigset_t siglist;
 
 	PROC_LOCK(p);
 	siglist = p->p_siglist;
 	SIGSETOR(siglist, td->td_siglist);
 	PROC_UNLOCK(p);
 	SIG2OSIG(siglist, td->td_retval[0]);
 	return (0);
 }
 
 struct sigvec32 {
 	u_int32_t	sv_handler;
 	int		sv_mask;
 	int		sv_flags;
 };
 
 int
 ofreebsd32_sigvec(struct thread *td,
 			  struct ofreebsd32_sigvec_args *uap)
 {
 	struct sigvec32 vec;
 	struct sigaction sa, osa, *sap;
 	int error;
 
 	if (uap->signum <= 0 || uap->signum >= ONSIG)
 		return (EINVAL);
 
 	if (uap->nsv) {
 		error = copyin(uap->nsv, &vec, sizeof(vec));
 		if (error)
 			return (error);
 		sa.sa_handler = PTRIN(vec.sv_handler);
 		OSIG2SIG(vec.sv_mask, sa.sa_mask);
 		sa.sa_flags = vec.sv_flags;
 		sa.sa_flags ^= SA_RESTART;
 		sap = &sa;
 	} else
 		sap = NULL;
 	error = kern_sigaction(td, uap->signum, sap, &osa, KSA_OSIGSET);
 	if (error == 0 && uap->osv != NULL) {
 		vec.sv_handler = PTROUT(osa.sa_handler);
 		SIG2OSIG(osa.sa_mask, vec.sv_mask);
 		vec.sv_flags = osa.sa_flags;
 		vec.sv_flags &= ~SA_NOCLDWAIT;
 		vec.sv_flags ^= SA_RESTART;
 		error = copyout(&vec, uap->osv, sizeof(vec));
 	}
 	return (error);
 }
 
 int
 ofreebsd32_sigblock(struct thread *td,
 			    struct ofreebsd32_sigblock_args *uap)
 {
 	sigset_t set, oset;
 
 	OSIG2SIG(uap->mask, set);
 	kern_sigprocmask(td, SIG_BLOCK, &set, &oset, 0);
 	SIG2OSIG(oset, td->td_retval[0]);
 	return (0);
 }
 
 int
 ofreebsd32_sigsetmask(struct thread *td,
 			      struct ofreebsd32_sigsetmask_args *uap)
 {
 	sigset_t set, oset;
 
 	OSIG2SIG(uap->mask, set);
 	kern_sigprocmask(td, SIG_SETMASK, &set, &oset, 0);
 	SIG2OSIG(oset, td->td_retval[0]);
 	return (0);
 }
 
 int
 ofreebsd32_sigsuspend(struct thread *td,
 			      struct ofreebsd32_sigsuspend_args *uap)
 {
 	sigset_t mask;
 
 	OSIG2SIG(uap->mask, mask);
 	return (kern_sigsuspend(td, mask));
 }
 
 struct sigstack32 {
 	u_int32_t	ss_sp;
 	int		ss_onstack;
 };
 
 int
 ofreebsd32_sigstack(struct thread *td,
 			    struct ofreebsd32_sigstack_args *uap)
 {
 	struct sigstack32 s32;
 	struct sigstack nss, oss;
 	int error = 0, unss;
 
 	if (uap->nss != NULL) {
 		error = copyin(uap->nss, &s32, sizeof(s32));
 		if (error)
 			return (error);
 		nss.ss_sp = PTRIN(s32.ss_sp);
 		CP(s32, nss, ss_onstack);
 		unss = 1;
 	} else {
 		unss = 0;
 	}
 	oss.ss_sp = td->td_sigstk.ss_sp;
 	oss.ss_onstack = sigonstack(cpu_getstack(td));
 	if (unss) {
 		td->td_sigstk.ss_sp = nss.ss_sp;
 		td->td_sigstk.ss_size = 0;
 		td->td_sigstk.ss_flags |= (nss.ss_onstack & SS_ONSTACK);
 		td->td_pflags |= TDP_ALTSTACK;
 	}
 	if (uap->oss != NULL) {
 		s32.ss_sp = PTROUT(oss.ss_sp);
 		CP(oss, s32, ss_onstack);
 		error = copyout(&s32, uap->oss, sizeof(s32));
 	}
 	return (error);
 }
 #endif
 
 int
 freebsd32_nanosleep(struct thread *td, struct freebsd32_nanosleep_args *uap)
 {
 
 	return (freebsd32_user_clock_nanosleep(td, CLOCK_REALTIME,
 	    TIMER_RELTIME, uap->rqtp, uap->rmtp));
 }
 
 int
 freebsd32_clock_nanosleep(struct thread *td,
     struct freebsd32_clock_nanosleep_args *uap)
 {
 	int error;
 
 	error = freebsd32_user_clock_nanosleep(td, uap->clock_id, uap->flags,
 	    uap->rqtp, uap->rmtp);
 	return (kern_posix_error(td, error));
 }
 
 static int
 freebsd32_user_clock_nanosleep(struct thread *td, clockid_t clock_id,
     int flags, const struct timespec32 *ua_rqtp, struct timespec32 *ua_rmtp)
 {
 	struct timespec32 rmt32, rqt32;
 	struct timespec rmt, rqt;
 	int error;
 
 	error = copyin(ua_rqtp, &rqt32, sizeof(rqt32));
 	if (error)
 		return (error);
 
 	CP(rqt32, rqt, tv_sec);
 	CP(rqt32, rqt, tv_nsec);
 
 	if (ua_rmtp != NULL && (flags & TIMER_ABSTIME) == 0 &&
 	    !useracc(ua_rmtp, sizeof(rmt32), VM_PROT_WRITE))
 		return (EFAULT);
 	error = kern_clock_nanosleep(td, clock_id, flags, &rqt, &rmt);
 	if (error == EINTR && ua_rmtp != NULL && (flags & TIMER_ABSTIME) == 0) {
 		int error2;
 
 		CP(rmt, rmt32, tv_sec);
 		CP(rmt, rmt32, tv_nsec);
 
 		error2 = copyout(&rmt32, ua_rmtp, sizeof(rmt32));
 		if (error2)
 			error = error2;
 	}
 	return (error);
 }
 
 int
 freebsd32_clock_gettime(struct thread *td,
 			struct freebsd32_clock_gettime_args *uap)
 {
 	struct timespec	ats;
 	struct timespec32 ats32;
 	int error;
 
 	error = kern_clock_gettime(td, uap->clock_id, &ats);
 	if (error == 0) {
 		CP(ats, ats32, tv_sec);
 		CP(ats, ats32, tv_nsec);
 		error = copyout(&ats32, uap->tp, sizeof(ats32));
 	}
 	return (error);
 }
 
 int
 freebsd32_clock_settime(struct thread *td,
 			struct freebsd32_clock_settime_args *uap)
 {
 	struct timespec	ats;
 	struct timespec32 ats32;
 	int error;
 
 	error = copyin(uap->tp, &ats32, sizeof(ats32));
 	if (error)
 		return (error);
 	CP(ats32, ats, tv_sec);
 	CP(ats32, ats, tv_nsec);
 
 	return (kern_clock_settime(td, uap->clock_id, &ats));
 }
 
 int
 freebsd32_clock_getres(struct thread *td,
 		       struct freebsd32_clock_getres_args *uap)
 {
 	struct timespec	ts;
 	struct timespec32 ts32;
 	int error;
 
 	if (uap->tp == NULL)
 		return (0);
 	error = kern_clock_getres(td, uap->clock_id, &ts);
 	if (error == 0) {
 		CP(ts, ts32, tv_sec);
 		CP(ts, ts32, tv_nsec);
 		error = copyout(&ts32, uap->tp, sizeof(ts32));
 	}
 	return (error);
 }
 
 int freebsd32_ktimer_create(struct thread *td,
     struct freebsd32_ktimer_create_args *uap)
 {
 	struct sigevent32 ev32;
 	struct sigevent ev, *evp;
 	int error, id;
 
 	if (uap->evp == NULL) {
 		evp = NULL;
 	} else {
 		evp = &ev;
 		error = copyin(uap->evp, &ev32, sizeof(ev32));
 		if (error != 0)
 			return (error);
 		error = convert_sigevent32(&ev32, &ev);
 		if (error != 0)
 			return (error);
 	}
 	error = kern_ktimer_create(td, uap->clock_id, evp, &id, -1);
 	if (error == 0) {
 		error = copyout(&id, uap->timerid, sizeof(int));
 		if (error != 0)
 			kern_ktimer_delete(td, id);
 	}
 	return (error);
 }
 
 int
 freebsd32_ktimer_settime(struct thread *td,
     struct freebsd32_ktimer_settime_args *uap)
 {
 	struct itimerspec32 val32, oval32;
 	struct itimerspec val, oval, *ovalp;
 	int error;
 
 	error = copyin(uap->value, &val32, sizeof(val32));
 	if (error != 0)
 		return (error);
 	ITS_CP(val32, val);
 	ovalp = uap->ovalue != NULL ? &oval : NULL;
 	error = kern_ktimer_settime(td, uap->timerid, uap->flags, &val, ovalp);
 	if (error == 0 && uap->ovalue != NULL) {
 		ITS_CP(oval, oval32);
 		error = copyout(&oval32, uap->ovalue, sizeof(oval32));
 	}
 	return (error);
 }
 
 int
 freebsd32_ktimer_gettime(struct thread *td,
     struct freebsd32_ktimer_gettime_args *uap)
 {
 	struct itimerspec32 val32;
 	struct itimerspec val;
 	int error;
 
 	error = kern_ktimer_gettime(td, uap->timerid, &val);
 	if (error == 0) {
 		ITS_CP(val, val32);
 		error = copyout(&val32, uap->value, sizeof(val32));
 	}
 	return (error);
 }
 
 int
 freebsd32_clock_getcpuclockid2(struct thread *td,
     struct freebsd32_clock_getcpuclockid2_args *uap)
 {
 	clockid_t clk_id;
 	int error;
 
 	error = kern_clock_getcpuclockid2(td, PAIR32TO64(id_t, uap->id),
 	    uap->which, &clk_id);
 	if (error == 0)
 		error = copyout(&clk_id, uap->clock_id, sizeof(clockid_t));
 	return (error);
 }
 
 int
 freebsd32_thr_new(struct thread *td,
 		  struct freebsd32_thr_new_args *uap)
 {
 	struct thr_param32 param32;
 	struct thr_param param;
 	int error;
 
 	if (uap->param_size < 0 ||
 	    uap->param_size > sizeof(struct thr_param32))
 		return (EINVAL);
 	bzero(&param, sizeof(struct thr_param));
 	bzero(&param32, sizeof(struct thr_param32));
 	error = copyin(uap->param, &param32, uap->param_size);
 	if (error != 0)
 		return (error);
 	param.start_func = PTRIN(param32.start_func);
 	param.arg = PTRIN(param32.arg);
 	param.stack_base = PTRIN(param32.stack_base);
 	param.stack_size = param32.stack_size;
 	param.tls_base = PTRIN(param32.tls_base);
 	param.tls_size = param32.tls_size;
 	param.child_tid = PTRIN(param32.child_tid);
 	param.parent_tid = PTRIN(param32.parent_tid);
 	param.flags = param32.flags;
 	param.rtp = PTRIN(param32.rtp);
 	param.spare[0] = PTRIN(param32.spare[0]);
 	param.spare[1] = PTRIN(param32.spare[1]);
 	param.spare[2] = PTRIN(param32.spare[2]);
 
 	return (kern_thr_new(td, &param));
 }
 
 int
 freebsd32_thr_suspend(struct thread *td, struct freebsd32_thr_suspend_args *uap)
 {
 	struct timespec32 ts32;
 	struct timespec ts, *tsp;
 	int error;
 
 	error = 0;
 	tsp = NULL;
 	if (uap->timeout != NULL) {
 		error = copyin((const void *)uap->timeout, (void *)&ts32,
 		    sizeof(struct timespec32));
 		if (error != 0)
 			return (error);
 		ts.tv_sec = ts32.tv_sec;
 		ts.tv_nsec = ts32.tv_nsec;
 		tsp = &ts;
 	}
 	return (kern_thr_suspend(td, tsp));
 }
 
 void
 siginfo_to_siginfo32(const siginfo_t *src, struct siginfo32 *dst)
 {
 	bzero(dst, sizeof(*dst));
 	dst->si_signo = src->si_signo;
 	dst->si_errno = src->si_errno;
 	dst->si_code = src->si_code;
 	dst->si_pid = src->si_pid;
 	dst->si_uid = src->si_uid;
 	dst->si_status = src->si_status;
 	dst->si_addr = (uintptr_t)src->si_addr;
 	dst->si_value.sival_int = src->si_value.sival_int;
 	dst->si_timerid = src->si_timerid;
 	dst->si_overrun = src->si_overrun;
 }
 
 #ifndef _FREEBSD32_SYSPROTO_H_
 struct freebsd32_sigqueue_args {
         pid_t pid;
         int signum;
         /* union sigval32 */ int value;
 };
 #endif
 int
 freebsd32_sigqueue(struct thread *td, struct freebsd32_sigqueue_args *uap)
 {
 	union sigval sv;
 
 	/*
 	 * On 32-bit ABIs, sival_int and sival_ptr are the same.
 	 * On 64-bit little-endian ABIs, the low bits are the same.
 	 * In 64-bit big-endian ABIs, sival_int overlaps with
 	 * sival_ptr's HIGH bits.  We choose to support sival_int
 	 * rather than sival_ptr in this case as it seems to be
 	 * more common.
 	 */
 	bzero(&sv, sizeof(sv));
 	sv.sival_int = uap->value;
 
 	return (kern_sigqueue(td, uap->pid, uap->signum, &sv));
 }
 
 int
 freebsd32_sigtimedwait(struct thread *td, struct freebsd32_sigtimedwait_args *uap)
 {
 	struct timespec32 ts32;
 	struct timespec ts;
 	struct timespec *timeout;
 	sigset_t set;
 	ksiginfo_t ksi;
 	struct siginfo32 si32;
 	int error;
 
 	if (uap->timeout) {
 		error = copyin(uap->timeout, &ts32, sizeof(ts32));
 		if (error)
 			return (error);
 		ts.tv_sec = ts32.tv_sec;
 		ts.tv_nsec = ts32.tv_nsec;
 		timeout = &ts;
 	} else
 		timeout = NULL;
 
 	error = copyin(uap->set, &set, sizeof(set));
 	if (error)
 		return (error);
 
 	error = kern_sigtimedwait(td, set, &ksi, timeout);
 	if (error)
 		return (error);
 
 	if (uap->info) {
 		siginfo_to_siginfo32(&ksi.ksi_info, &si32);
 		error = copyout(&si32, uap->info, sizeof(struct siginfo32));
 	}
 
 	if (error == 0)
 		td->td_retval[0] = ksi.ksi_signo;
 	return (error);
 }
 
 /*
  * MPSAFE
  */
 int
 freebsd32_sigwaitinfo(struct thread *td, struct freebsd32_sigwaitinfo_args *uap)
 {
 	ksiginfo_t ksi;
 	struct siginfo32 si32;
 	sigset_t set;
 	int error;
 
 	error = copyin(uap->set, &set, sizeof(set));
 	if (error)
 		return (error);
 
 	error = kern_sigtimedwait(td, set, &ksi, NULL);
 	if (error)
 		return (error);
 
 	if (uap->info) {
 		siginfo_to_siginfo32(&ksi.ksi_info, &si32);
 		error = copyout(&si32, uap->info, sizeof(struct siginfo32));
 	}	
 	if (error == 0)
 		td->td_retval[0] = ksi.ksi_signo;
 	return (error);
 }
 
 int
 freebsd32_cpuset_setid(struct thread *td,
     struct freebsd32_cpuset_setid_args *uap)
 {
 
 	return (kern_cpuset_setid(td, uap->which,
 	    PAIR32TO64(id_t, uap->id), uap->setid));
 }
 
 int
 freebsd32_cpuset_getid(struct thread *td,
     struct freebsd32_cpuset_getid_args *uap)
 {
 
 	return (kern_cpuset_getid(td, uap->level, uap->which,
 	    PAIR32TO64(id_t, uap->id), uap->setid));
 }
 
 int
 freebsd32_cpuset_getaffinity(struct thread *td,
     struct freebsd32_cpuset_getaffinity_args *uap)
 {
 
 	return (kern_cpuset_getaffinity(td, uap->level, uap->which,
 	    PAIR32TO64(id_t,uap->id), uap->cpusetsize, uap->mask));
 }
 
 int
 freebsd32_cpuset_setaffinity(struct thread *td,
     struct freebsd32_cpuset_setaffinity_args *uap)
 {
 
 	return (kern_cpuset_setaffinity(td, uap->level, uap->which,
 	    PAIR32TO64(id_t,uap->id), uap->cpusetsize, uap->mask));
 }
 
 int
 freebsd32_cpuset_getdomain(struct thread *td,
     struct freebsd32_cpuset_getdomain_args *uap)
 {
 
 	return (kern_cpuset_getdomain(td, uap->level, uap->which,
 	    PAIR32TO64(id_t,uap->id), uap->domainsetsize, uap->mask, uap->policy));
 }
 
 int
 freebsd32_cpuset_setdomain(struct thread *td,
     struct freebsd32_cpuset_setdomain_args *uap)
 {
 
 	return (kern_cpuset_setdomain(td, uap->level, uap->which,
 	    PAIR32TO64(id_t,uap->id), uap->domainsetsize, uap->mask, uap->policy));
 }
 
 int
 freebsd32_nmount(struct thread *td,
     struct freebsd32_nmount_args /* {
     	struct iovec *iovp;
     	unsigned int iovcnt;
     	int flags;
     } */ *uap)
 {
 	struct uio *auio;
 	uint64_t flags;
 	int error;
 
 	/*
 	 * Mount flags are now 64-bits. On 32-bit archtectures only
 	 * 32-bits are passed in, but from here on everything handles
 	 * 64-bit flags correctly.
 	 */
 	flags = uap->flags;
 
 	AUDIT_ARG_FFLAGS(flags);
 
 	/*
 	 * Filter out MNT_ROOTFS.  We do not want clients of nmount() in
 	 * userspace to set this flag, but we must filter it out if we want
 	 * MNT_UPDATE on the root file system to work.
 	 * MNT_ROOTFS should only be set by the kernel when mounting its
 	 * root file system.
 	 */
 	flags &= ~MNT_ROOTFS;
 
 	/*
 	 * check that we have an even number of iovec's
 	 * and that we have at least two options.
 	 */
 	if ((uap->iovcnt & 1) || (uap->iovcnt < 4))
 		return (EINVAL);
 
 	error = freebsd32_copyinuio(uap->iovp, uap->iovcnt, &auio);
 	if (error)
 		return (error);
 	error = vfs_donmount(td, flags, auio);
 
 	free(auio, M_IOV);
 	return error;
 }
 
 #if 0
 int
 freebsd32_xxx(struct thread *td, struct freebsd32_xxx_args *uap)
 {
 	struct yyy32 *p32, s32;
 	struct yyy *p = NULL, s;
 	struct xxx_arg ap;
 	int error;
 
 	if (uap->zzz) {
 		error = copyin(uap->zzz, &s32, sizeof(s32));
 		if (error)
 			return (error);
 		/* translate in */
 		p = &s;
 	}
 	error = kern_xxx(td, p);
 	if (error)
 		return (error);
 	if (uap->zzz) {
 		/* translate out */
 		error = copyout(&s32, p32, sizeof(s32));
 	}
 	return (error);
 }
 #endif
 
 int
 syscall32_module_handler(struct module *mod, int what, void *arg)
 {
 
 	return (kern_syscall_module_handler(freebsd32_sysent, mod, what, arg));
 }
 
 int
 syscall32_helper_register(struct syscall_helper_data *sd, int flags)
 {
 
 	return (kern_syscall_helper_register(freebsd32_sysent, sd, flags));
 }
 
 int
 syscall32_helper_unregister(struct syscall_helper_data *sd)
 {
 
 	return (kern_syscall_helper_unregister(freebsd32_sysent, sd));
 }
 
 register_t *
 freebsd32_copyout_strings(struct image_params *imgp)
 {
 	int argc, envc, i;
 	u_int32_t *vectp;
 	char *stringp;
 	uintptr_t destp;
 	u_int32_t *stack_base;
 	struct freebsd32_ps_strings *arginfo;
 	char canary[sizeof(long) * 8];
 	int32_t pagesizes32[MAXPAGESIZES];
 	size_t execpath_len;
 	int szsigcode;
 
 	/*
 	 * Calculate string base and vector table pointers.
 	 * Also deal with signal trampoline code for this exec type.
 	 */
 	if (imgp->execpath != NULL && imgp->auxargs != NULL)
 		execpath_len = strlen(imgp->execpath) + 1;
 	else
 		execpath_len = 0;
 	arginfo = (struct freebsd32_ps_strings *)curproc->p_sysent->
 	    sv_psstrings;
 	if (imgp->proc->p_sysent->sv_sigcode_base == 0)
 		szsigcode = *(imgp->proc->p_sysent->sv_szsigcode);
 	else
 		szsigcode = 0;
 	destp =	(uintptr_t)arginfo;
 
 	/*
 	 * install sigcode
 	 */
 	if (szsigcode != 0) {
 		destp -= szsigcode;
 		destp = rounddown2(destp, sizeof(uint32_t));
 		copyout(imgp->proc->p_sysent->sv_sigcode, (void *)destp,
 		    szsigcode);
 	}
 
 	/*
 	 * Copy the image path for the rtld.
 	 */
 	if (execpath_len != 0) {
 		destp -= execpath_len;
 		imgp->execpathp = destp;
 		copyout(imgp->execpath, (void *)destp, execpath_len);
 	}
 
 	/*
 	 * Prepare the canary for SSP.
 	 */
 	arc4rand(canary, sizeof(canary), 0);
 	destp -= sizeof(canary);
 	imgp->canary = destp;
 	copyout(canary, (void *)destp, sizeof(canary));
 	imgp->canarylen = sizeof(canary);
 
 	/*
 	 * Prepare the pagesizes array.
 	 */
 	for (i = 0; i < MAXPAGESIZES; i++)
 		pagesizes32[i] = (uint32_t)pagesizes[i];
 	destp -= sizeof(pagesizes32);
 	destp = rounddown2(destp, sizeof(uint32_t));
 	imgp->pagesizes = destp;
 	copyout(pagesizes32, (void *)destp, sizeof(pagesizes32));
 	imgp->pagesizeslen = sizeof(pagesizes32);
 
 	destp -= ARG_MAX - imgp->args->stringspace;
 	destp = rounddown2(destp, sizeof(uint32_t));
 
 	vectp = (uint32_t *)destp;
 	if (imgp->auxargs) {
 		/*
 		 * Allocate room on the stack for the ELF auxargs
 		 * array.  It has up to AT_COUNT entries.
 		 */
 		vectp -= howmany(AT_COUNT * sizeof(Elf32_Auxinfo),
 		    sizeof(*vectp));
 	}
 
 	/*
 	 * Allocate room for the argv[] and env vectors including the
 	 * terminating NULL pointers.
 	 */
 	vectp -= imgp->args->argc + 1 + imgp->args->envc + 1;
 
 	/*
 	 * vectp also becomes our initial stack base
 	 */
 	stack_base = vectp;
 
 	stringp = imgp->args->begin_argv;
 	argc = imgp->args->argc;
 	envc = imgp->args->envc;
 	/*
 	 * Copy out strings - arguments and environment.
 	 */
 	copyout(stringp, (void *)destp, ARG_MAX - imgp->args->stringspace);
 
 	/*
 	 * Fill in "ps_strings" struct for ps, w, etc.
 	 */
 	suword32(&arginfo->ps_argvstr, (u_int32_t)(intptr_t)vectp);
 	suword32(&arginfo->ps_nargvstr, argc);
 
 	/*
 	 * Fill in argument portion of vector table.
 	 */
 	for (; argc > 0; --argc) {
 		suword32(vectp++, (u_int32_t)(intptr_t)destp);
 		while (*stringp++ != 0)
 			destp++;
 		destp++;
 	}
 
 	/* a null vector table pointer separates the argp's from the envp's */
 	suword32(vectp++, 0);
 
 	suword32(&arginfo->ps_envstr, (u_int32_t)(intptr_t)vectp);
 	suword32(&arginfo->ps_nenvstr, envc);
 
 	/*
 	 * Fill in environment portion of vector table.
 	 */
 	for (; envc > 0; --envc) {
 		suword32(vectp++, (u_int32_t)(intptr_t)destp);
 		while (*stringp++ != 0)
 			destp++;
 		destp++;
 	}
 
 	/* end of vector table is a null pointer */
 	suword32(vectp, 0);
 
 	return ((register_t *)stack_base);
 }
 
 int
 freebsd32_kldstat(struct thread *td, struct freebsd32_kldstat_args *uap)
 {
 	struct kld_file_stat *stat;
 	struct kld32_file_stat *stat32;
 	int error, version;
 
 	if ((error = copyin(&uap->stat->version, &version, sizeof(version)))
 	    != 0)
 		return (error);
 	if (version != sizeof(struct kld32_file_stat_1) &&
 	    version != sizeof(struct kld32_file_stat))
 		return (EINVAL);
 
 	stat = malloc(sizeof(*stat), M_TEMP, M_WAITOK | M_ZERO);
 	stat32 = malloc(sizeof(*stat32), M_TEMP, M_WAITOK | M_ZERO);
 	error = kern_kldstat(td, uap->fileid, stat);
 	if (error == 0) {
 		bcopy(&stat->name[0], &stat32->name[0], sizeof(stat->name));
 		CP(*stat, *stat32, refs);
 		CP(*stat, *stat32, id);
 		PTROUT_CP(*stat, *stat32, address);
 		CP(*stat, *stat32, size);
 		bcopy(&stat->pathname[0], &stat32->pathname[0],
 		    sizeof(stat->pathname));
 		stat32->version  = version;
 		error = copyout(stat32, uap->stat, version);
 	}
 	free(stat, M_TEMP);
 	free(stat32, M_TEMP);
 	return (error);
 }
 
 int
 freebsd32_posix_fallocate(struct thread *td,
     struct freebsd32_posix_fallocate_args *uap)
 {
 	int error;
 
 	error = kern_posix_fallocate(td, uap->fd,
 	    PAIR32TO64(off_t, uap->offset), PAIR32TO64(off_t, uap->len));
 	return (kern_posix_error(td, error));
 }
 
 int
 freebsd32_posix_fadvise(struct thread *td,
     struct freebsd32_posix_fadvise_args *uap)
 {
 	int error;
 
 	error = kern_posix_fadvise(td, uap->fd, PAIR32TO64(off_t, uap->offset),
 	    PAIR32TO64(off_t, uap->len), uap->advice);
 	return (kern_posix_error(td, error));
 }
 
 int
 convert_sigevent32(struct sigevent32 *sig32, struct sigevent *sig)
 {
 
 	CP(*sig32, *sig, sigev_notify);
 	switch (sig->sigev_notify) {
 	case SIGEV_NONE:
 		break;
 	case SIGEV_THREAD_ID:
 		CP(*sig32, *sig, sigev_notify_thread_id);
 		/* FALLTHROUGH */
 	case SIGEV_SIGNAL:
 		CP(*sig32, *sig, sigev_signo);
 		PTRIN_CP(*sig32, *sig, sigev_value.sival_ptr);
 		break;
 	case SIGEV_KEVENT:
 		CP(*sig32, *sig, sigev_notify_kqueue);
 		CP(*sig32, *sig, sigev_notify_kevent_flags);
 		PTRIN_CP(*sig32, *sig, sigev_value.sival_ptr);
 		break;
 	default:
 		return (EINVAL);
 	}
 	return (0);
 }
 
 int
 freebsd32_procctl(struct thread *td, struct freebsd32_procctl_args *uap)
 {
 	void *data;
 	union {
 		struct procctl_reaper_status rs;
 		struct procctl_reaper_pids rp;
 		struct procctl_reaper_kill rk;
 	} x;
 	union {
 		struct procctl_reaper_pids32 rp;
 	} x32;
 	int error, error1, flags, signum;
 
 	switch (uap->com) {
+	case PROC_ASLR_CTL:
 	case PROC_SPROTECT:
 	case PROC_TRACE_CTL:
 	case PROC_TRAPCAP_CTL:
 		error = copyin(PTRIN(uap->data), &flags, sizeof(flags));
 		if (error != 0)
 			return (error);
 		data = &flags;
 		break;
 	case PROC_REAP_ACQUIRE:
 	case PROC_REAP_RELEASE:
 		if (uap->data != NULL)
 			return (EINVAL);
 		data = NULL;
 		break;
 	case PROC_REAP_STATUS:
 		data = &x.rs;
 		break;
 	case PROC_REAP_GETPIDS:
 		error = copyin(uap->data, &x32.rp, sizeof(x32.rp));
 		if (error != 0)
 			return (error);
 		CP(x32.rp, x.rp, rp_count);
 		PTRIN_CP(x32.rp, x.rp, rp_pids);
 		data = &x.rp;
 		break;
 	case PROC_REAP_KILL:
 		error = copyin(uap->data, &x.rk, sizeof(x.rk));
 		if (error != 0)
 			return (error);
 		data = &x.rk;
 		break;
+	case PROC_ASLR_STATUS:
 	case PROC_TRACE_STATUS:
 	case PROC_TRAPCAP_STATUS:
 		data = &flags;
 		break;
 	case PROC_PDEATHSIG_CTL:
 		error = copyin(uap->data, &signum, sizeof(signum));
 		if (error != 0)
 			return (error);
 		data = &signum;
 		break;
 	case PROC_PDEATHSIG_STATUS:
 		data = &signum;
 		break;
 	default:
 		return (EINVAL);
 	}
 	error = kern_procctl(td, uap->idtype, PAIR32TO64(id_t, uap->id),
 	    uap->com, data);
 	switch (uap->com) {
 	case PROC_REAP_STATUS:
 		if (error == 0)
 			error = copyout(&x.rs, uap->data, sizeof(x.rs));
 		break;
 	case PROC_REAP_KILL:
 		error1 = copyout(&x.rk, uap->data, sizeof(x.rk));
 		if (error == 0)
 			error = error1;
 		break;
+	case PROC_ASLR_STATUS:
 	case PROC_TRACE_STATUS:
 	case PROC_TRAPCAP_STATUS:
 		if (error == 0)
 			error = copyout(&flags, uap->data, sizeof(flags));
 		break;
 	case PROC_PDEATHSIG_STATUS:
 		if (error == 0)
 			error = copyout(&signum, uap->data, sizeof(signum));
 		break;
 	}
 	return (error);
 }
 
 int
 freebsd32_fcntl(struct thread *td, struct freebsd32_fcntl_args *uap)
 {
 	long tmp;
 
 	switch (uap->cmd) {
 	/*
 	 * Do unsigned conversion for arg when operation
 	 * interprets it as flags or pointer.
 	 */
 	case F_SETLK_REMOTE:
 	case F_SETLKW:
 	case F_SETLK:
 	case F_GETLK:
 	case F_SETFD:
 	case F_SETFL:
 	case F_OGETLK:
 	case F_OSETLK:
 	case F_OSETLKW:
 		tmp = (unsigned int)(uap->arg);
 		break;
 	default:
 		tmp = uap->arg;
 		break;
 	}
 	return (kern_fcntl_freebsd(td, uap->fd, uap->cmd, tmp));
 }
 
 int
 freebsd32_ppoll(struct thread *td, struct freebsd32_ppoll_args *uap)
 {
 	struct timespec32 ts32;
 	struct timespec ts, *tsp;
 	sigset_t set, *ssp;
 	int error;
 
 	if (uap->ts != NULL) {
 		error = copyin(uap->ts, &ts32, sizeof(ts32));
 		if (error != 0)
 			return (error);
 		CP(ts32, ts, tv_sec);
 		CP(ts32, ts, tv_nsec);
 		tsp = &ts;
 	} else
 		tsp = NULL;
 	if (uap->set != NULL) {
 		error = copyin(uap->set, &set, sizeof(set));
 		if (error != 0)
 			return (error);
 		ssp = &set;
 	} else
 		ssp = NULL;
 
 	return (kern_poll(td, uap->fds, uap->nfds, tsp, ssp));
 }
 
 int
 freebsd32_sched_rr_get_interval(struct thread *td,
     struct freebsd32_sched_rr_get_interval_args *uap)
 {
 	struct timespec ts;
 	struct timespec32 ts32;
 	int error;
 
 	error = kern_sched_rr_get_interval(td, uap->pid, &ts);
 	if (error == 0) {
 		CP(ts, ts32, tv_sec);
 		CP(ts, ts32, tv_nsec);
 		error = copyout(&ts32, uap->interval, sizeof(ts32));
 	}
 	return (error);
 }
Index: head/sys/compat/ia32/ia32_sysvec.c
===================================================================
--- head/sys/compat/ia32/ia32_sysvec.c	(revision 343963)
+++ head/sys/compat/ia32/ia32_sysvec.c	(revision 343964)
@@ -1,234 +1,234 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2002 Doug Rabson
  * Copyright (c) 2003 Peter Wemm
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #define __ELF_WORD_SIZE 32
 
 #include <sys/param.h>
 #include <sys/exec.h>
 #include <sys/fcntl.h>
 #include <sys/imgact.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/mman.h>
 #include <sys/namei.h>
 #include <sys/pioctl.h>
 #include <sys/proc.h>
 #include <sys/procfs.h>
 #include <sys/resourcevar.h>
 #include <sys/systm.h>
 #include <sys/signalvar.h>
 #include <sys/stat.h>
 #include <sys/sx.h>
 #include <sys/syscall.h>
 #include <sys/sysctl.h>
 #include <sys/sysent.h>
 #include <sys/vnode.h>
 #include <sys/imgact_elf.h>
 
 #include <vm/vm.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_param.h>
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 #include <vm/vm_object.h>
 #include <vm/vm_extern.h>
 
 #include <compat/freebsd32/freebsd32_signal.h>
 #include <compat/freebsd32/freebsd32_util.h>
 #include <compat/freebsd32/freebsd32_proto.h>
 #include <compat/freebsd32/freebsd32_syscall.h>
 #include <compat/ia32/ia32_signal.h>
 #include <machine/frame.h>
 #include <machine/md_var.h>
 #include <machine/pcb.h>
 #include <machine/cpufunc.h>
 
 CTASSERT(sizeof(struct ia32_mcontext) == 640);
 CTASSERT(sizeof(struct ia32_ucontext) == 704);
 CTASSERT(sizeof(struct ia32_sigframe) == 800);
 CTASSERT(sizeof(struct siginfo32) == 64);
 #ifdef COMPAT_FREEBSD4
 CTASSERT(sizeof(struct ia32_mcontext4) == 260);
 CTASSERT(sizeof(struct ia32_ucontext4) == 324);
 CTASSERT(sizeof(struct ia32_sigframe4) == 408);
 #endif
 
 extern const char *freebsd32_syscallnames[];
 
 static SYSCTL_NODE(_compat, OID_AUTO, ia32, CTLFLAG_RW, 0, "ia32 mode");
 
 static u_long	ia32_maxdsiz = IA32_MAXDSIZ;
 SYSCTL_ULONG(_compat_ia32, OID_AUTO, maxdsiz, CTLFLAG_RWTUN, &ia32_maxdsiz, 0, "");
 u_long	ia32_maxssiz = IA32_MAXSSIZ;
 SYSCTL_ULONG(_compat_ia32, OID_AUTO, maxssiz, CTLFLAG_RWTUN, &ia32_maxssiz, 0, "");
 static u_long	ia32_maxvmem = IA32_MAXVMEM;
 SYSCTL_ULONG(_compat_ia32, OID_AUTO, maxvmem, CTLFLAG_RWTUN, &ia32_maxvmem, 0, "");
 
 struct sysentvec ia32_freebsd_sysvec = {
 	.sv_size	= FREEBSD32_SYS_MAXSYSCALL,
 	.sv_table	= freebsd32_sysent,
 	.sv_errsize	= 0,
 	.sv_errtbl	= NULL,
 	.sv_transtrap	= NULL,
 	.sv_fixup	= elf32_freebsd_fixup,
 	.sv_sendsig	= ia32_sendsig,
 	.sv_sigcode	= ia32_sigcode,
 	.sv_szsigcode	= &sz_ia32_sigcode,
 	.sv_name	= "FreeBSD ELF32",
 	.sv_coredump	= elf32_coredump,
 	.sv_imgact_try	= NULL,
 	.sv_minsigstksz	= MINSIGSTKSZ,
 	.sv_pagesize	= IA32_PAGE_SIZE,
 	.sv_minuser	= FREEBSD32_MINUSER,
 	.sv_maxuser	= FREEBSD32_MAXUSER,
 	.sv_usrstack	= FREEBSD32_USRSTACK,
 	.sv_psstrings	= FREEBSD32_PS_STRINGS,
 	.sv_stackprot	= VM_PROT_ALL,
 	.sv_copyout_strings	= freebsd32_copyout_strings,
 	.sv_setregs	= ia32_setregs,
 	.sv_fixlimit	= ia32_fixlimit,
 	.sv_maxssiz	= &ia32_maxssiz,
-	.sv_flags	= SV_ABI_FREEBSD | SV_IA32 | SV_ILP32 |
+	.sv_flags	= SV_ABI_FREEBSD | SV_ASLR | SV_IA32 | SV_ILP32 |
 			    SV_SHP | SV_TIMEKEEP,
 	.sv_set_syscall_retval = ia32_set_syscall_retval,
 	.sv_fetch_syscall_args = ia32_fetch_syscall_args,
 	.sv_syscallnames = freebsd32_syscallnames,
 	.sv_shared_page_base = FREEBSD32_SHAREDPAGE,
 	.sv_shared_page_len = PAGE_SIZE,
 	.sv_schedtail	= NULL,
 	.sv_thread_detach = NULL,
 	.sv_trap	= NULL,
 };
 INIT_SYSENTVEC(elf_ia32_sysvec, &ia32_freebsd_sysvec);
 
 static Elf32_Brandinfo ia32_brand_info = {
 	.brand		= ELFOSABI_FREEBSD,
 	.machine	= EM_386,
 	.compat_3_brand	= "FreeBSD",
 	.emul_path	= NULL,
 	.interp_path	= "/libexec/ld-elf.so.1",
 	.sysvec		= &ia32_freebsd_sysvec,
 	.interp_newpath	= "/libexec/ld-elf32.so.1",
 	.brand_note	= &elf32_freebsd_brandnote,
 	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE
 };
 
 SYSINIT(ia32, SI_SUB_EXEC, SI_ORDER_MIDDLE,
 	(sysinit_cfunc_t) elf32_insert_brand_entry,
 	&ia32_brand_info);
 
 static Elf32_Brandinfo ia32_brand_oinfo = {
 	.brand		= ELFOSABI_FREEBSD,
 	.machine	= EM_386,
 	.compat_3_brand	= "FreeBSD",
 	.emul_path	= NULL,
 	.interp_path	= "/usr/libexec/ld-elf.so.1",
 	.sysvec		= &ia32_freebsd_sysvec,
 	.interp_newpath	= "/libexec/ld-elf32.so.1",
 	.brand_note	= &elf32_freebsd_brandnote,
 	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE
 };
 
 SYSINIT(oia32, SI_SUB_EXEC, SI_ORDER_ANY,
 	(sysinit_cfunc_t) elf32_insert_brand_entry,
 	&ia32_brand_oinfo);
 
 static Elf32_Brandinfo kia32_brand_info = {
 	.brand		= ELFOSABI_FREEBSD,
 	.machine	= EM_386,
 	.compat_3_brand	= "FreeBSD",
 	.emul_path	= NULL,
 	.interp_path	= "/lib/ld.so.1",
 	.sysvec		= &ia32_freebsd_sysvec,
 	.brand_note	= &elf32_kfreebsd_brandnote,
 	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE_MANDATORY
 };
 
 SYSINIT(kia32, SI_SUB_EXEC, SI_ORDER_ANY,
 	(sysinit_cfunc_t) elf32_insert_brand_entry,
 	&kia32_brand_info);
 
 void
 elf32_dump_thread(struct thread *td, void *dst, size_t *off)
 {
 	void *buf;
 	size_t len;
 
 	len = 0;
 	if (use_xsave) {
 		if (dst != NULL) {
 			fpugetregs(td);
 			len += elf32_populate_note(NT_X86_XSTATE,
 			    get_pcb_user_save_td(td), dst,
 			    cpu_max_ext_state_size, &buf);
 			*(uint64_t *)((char *)buf + X86_XSTATE_XCR0_OFFSET) =
 			    xsave_mask;
 		} else
 			len += elf32_populate_note(NT_X86_XSTATE, NULL, NULL,
 			    cpu_max_ext_state_size, NULL);
 	}
 	*off = len;
 }
 
 void
 ia32_fixlimit(struct rlimit *rl, int which)
 {
 
 	switch (which) {
 	case RLIMIT_DATA:
 		if (ia32_maxdsiz != 0) {
 			if (rl->rlim_cur > ia32_maxdsiz)
 				rl->rlim_cur = ia32_maxdsiz;
 			if (rl->rlim_max > ia32_maxdsiz)
 				rl->rlim_max = ia32_maxdsiz;
 		}
 		break;
 	case RLIMIT_STACK:
 		if (ia32_maxssiz != 0) {
 			if (rl->rlim_cur > ia32_maxssiz)
 				rl->rlim_cur = ia32_maxssiz;
 			if (rl->rlim_max > ia32_maxssiz)
 				rl->rlim_max = ia32_maxssiz;
 		}
 		break;
 	case RLIMIT_VMEM:
 		if (ia32_maxvmem != 0) {
 			if (rl->rlim_cur > ia32_maxvmem)
 				rl->rlim_cur = ia32_maxvmem;
 			if (rl->rlim_max > ia32_maxvmem)
 				rl->rlim_max = ia32_maxvmem;
 		}
 		break;
 	}
 }
Index: head/sys/i386/i386/elf_machdep.c
===================================================================
--- head/sys/i386/i386/elf_machdep.c	(revision 343963)
+++ head/sys/i386/i386/elf_machdep.c	(revision 343964)
@@ -1,297 +1,297 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright 1996-1998 John D. Polstra.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_cpu.h"
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/systm.h>
 #include <sys/exec.h>
 #include <sys/imgact.h>
 #include <sys/linker.h>
 #include <sys/proc.h>
 #include <sys/sysent.h>
 #include <sys/imgact_elf.h>
 #include <sys/syscall.h>
 #include <sys/signalvar.h>
 #include <sys/vnode.h>
 
 #include <vm/vm.h>
 #include <vm/pmap.h>
 #include <vm/vm_param.h>
 
 #include <machine/elf.h>
 #include <machine/md_var.h>
 #include <machine/npx.h>
 
 struct sysentvec elf32_freebsd_sysvec = {
 	.sv_size	= SYS_MAXSYSCALL,
 	.sv_table	= sysent,
 	.sv_errsize	= 0,
 	.sv_errtbl	= NULL,
 	.sv_transtrap	= NULL,
 	.sv_fixup	= __elfN(freebsd_fixup),
 	.sv_sendsig	= sendsig,
 	.sv_sigcode	= sigcode,
 	.sv_szsigcode	= &szsigcode,
 	.sv_name	= "FreeBSD ELF32",
 	.sv_coredump	= __elfN(coredump),
 	.sv_imgact_try	= NULL,
 	.sv_minsigstksz	= MINSIGSTKSZ,
 	.sv_pagesize	= PAGE_SIZE,
 	.sv_minuser	= VM_MIN_ADDRESS,
 	.sv_maxuser	= VM_MAXUSER_ADDRESS,
 	.sv_usrstack	= USRSTACK,
 	.sv_psstrings	= PS_STRINGS,
 	.sv_stackprot	= VM_PROT_ALL,
 	.sv_copyout_strings	= exec_copyout_strings,
 	.sv_setregs	= exec_setregs,
 	.sv_fixlimit	= NULL,
 	.sv_maxssiz	= NULL,
-	.sv_flags	= SV_ABI_FREEBSD | SV_IA32 | SV_ILP32 | SV_SHP |
-			    SV_TIMEKEEP,
+	.sv_flags	= SV_ABI_FREEBSD | SV_ASLR | SV_IA32 | SV_ILP32 |
+			    SV_SHP | SV_TIMEKEEP,
 	.sv_set_syscall_retval = cpu_set_syscall_retval,
 	.sv_fetch_syscall_args = cpu_fetch_syscall_args,
 	.sv_syscallnames = syscallnames,
 	.sv_shared_page_base = SHAREDPAGE,
 	.sv_shared_page_len = PAGE_SIZE,
 	.sv_schedtail	= NULL,
 	.sv_thread_detach = NULL,
 	.sv_trap	= NULL,
 };
 INIT_SYSENTVEC(elf32_sysvec, &elf32_freebsd_sysvec);
 
 static Elf32_Brandinfo freebsd_brand_info = {
 	.brand		= ELFOSABI_FREEBSD,
 	.machine	= EM_386,
 	.compat_3_brand	= "FreeBSD",
 	.emul_path	= NULL,
 	.interp_path	= "/libexec/ld-elf.so.1",
 	.sysvec		= &elf32_freebsd_sysvec,
 	.interp_newpath	= NULL,
 	.brand_note	= &elf32_freebsd_brandnote,
 	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE
 };
 
 SYSINIT(elf32, SI_SUB_EXEC, SI_ORDER_FIRST,
 	(sysinit_cfunc_t) elf32_insert_brand_entry,
 	&freebsd_brand_info);
 
 static Elf32_Brandinfo freebsd_brand_oinfo = {
 	.brand		= ELFOSABI_FREEBSD,
 	.machine	= EM_386,
 	.compat_3_brand	= "FreeBSD",
 	.emul_path	= NULL,
 	.interp_path	= "/usr/libexec/ld-elf.so.1",
 	.sysvec		= &elf32_freebsd_sysvec,
 	.interp_newpath	= NULL,
 	.brand_note	= &elf32_freebsd_brandnote,
 	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE
 };
 
 SYSINIT(oelf32, SI_SUB_EXEC, SI_ORDER_ANY,
 	(sysinit_cfunc_t) elf32_insert_brand_entry,
 	&freebsd_brand_oinfo);
 
 static Elf32_Brandinfo kfreebsd_brand_info = {
 	.brand		= ELFOSABI_FREEBSD,
 	.machine	= EM_386,
 	.compat_3_brand	= "FreeBSD",
 	.emul_path	= NULL,
 	.interp_path	= "/lib/ld.so.1",
 	.sysvec		= &elf32_freebsd_sysvec,
 	.interp_newpath	= NULL,
 	.brand_note	= &elf32_kfreebsd_brandnote,
 	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE_MANDATORY
 };
 
 SYSINIT(kelf32, SI_SUB_EXEC, SI_ORDER_ANY,
 	(sysinit_cfunc_t) elf32_insert_brand_entry,
 	&kfreebsd_brand_info);
 
 void
 elf32_dump_thread(struct thread *td, void *dst, size_t *off)
 {
 	void *buf;
 	size_t len;
 
 	len = 0;
 	if (use_xsave) {
 		if (dst != NULL) {
 			npxgetregs(td);
 			len += elf32_populate_note(NT_X86_XSTATE,
 			    get_pcb_user_save_td(td), dst,
 			    cpu_max_ext_state_size, &buf);
 			*(uint64_t *)((char *)buf + X86_XSTATE_XCR0_OFFSET) =
 			    xsave_mask;
 		} else
 			len += elf32_populate_note(NT_X86_XSTATE, NULL, NULL,
 			    cpu_max_ext_state_size, NULL);
 	}
 	*off = len;
 }
 
 bool
 elf_is_ifunc_reloc(Elf_Size r_info)
 {
 
 	return (ELF_R_TYPE(r_info) == R_386_IRELATIVE);
 }
 
 #define	ERI_LOCAL	0x0001
 
 /* Process one elf relocation with addend. */
 static int
 elf_reloc_internal(linker_file_t lf, Elf_Addr relocbase, const void *data,
     int type, elf_lookup_fn lookup, int flags)
 {
 	Elf_Addr *where;
 	Elf_Addr addr;
 	Elf_Addr addend;
 	Elf_Word rtype, symidx;
 	const Elf_Rel *rel;
 	const Elf_Rela *rela;
 	int error;
 
 	switch (type) {
 	case ELF_RELOC_REL:
 		rel = (const Elf_Rel *)data;
 		where = (Elf_Addr *) (relocbase + rel->r_offset);
 		addend = *where;
 		rtype = ELF_R_TYPE(rel->r_info);
 		symidx = ELF_R_SYM(rel->r_info);
 		break;
 	case ELF_RELOC_RELA:
 		rela = (const Elf_Rela *)data;
 		where = (Elf_Addr *) (relocbase + rela->r_offset);
 		addend = rela->r_addend;
 		rtype = ELF_R_TYPE(rela->r_info);
 		symidx = ELF_R_SYM(rela->r_info);
 		break;
 	default:
 		panic("unknown reloc type %d\n", type);
 	}
 
 	if ((flags & ERI_LOCAL) != 0) {
 		if (rtype == R_386_RELATIVE) {	/* A + B */
 			addr = elf_relocaddr(lf, relocbase + addend);
 			if (*where != addr)
 				*where = addr;
 		}
 		return (0);
 	}
 
 	switch (rtype) {
 
 		case R_386_NONE:	/* none */
 			break;
 
 		case R_386_32:		/* S + A */
 			error = lookup(lf, symidx, 1, &addr);
 			if (error != 0)
 				return -1;
 			addr += addend;
 			if (*where != addr)
 				*where = addr;
 			break;
 
 		case R_386_PC32:	/* S + A - P */
 			error = lookup(lf, symidx, 1, &addr);
 			if (error != 0)
 				return -1;
 			addr += addend - (Elf_Addr)where;
 			if (*where != addr)
 				*where = addr;
 			break;
 
 		case R_386_COPY:	/* none */
 			/*
 			 * There shouldn't be copy relocations in kernel
 			 * objects.
 			 */
 			printf("kldload: unexpected R_COPY relocation\n");
 			return -1;
 			break;
 
 		case R_386_GLOB_DAT:	/* S */
 			error = lookup(lf, symidx, 1, &addr);
 			if (error != 0)
 				return -1;
 			if (*where != addr)
 				*where = addr;
 			break;
 
 		case R_386_RELATIVE:
 			break;
 
 		case R_386_IRELATIVE:
 			addr = relocbase + addend;
 			addr = ((Elf_Addr (*)(void))addr)();
 			if (*where != addr)
 				*where = addr;
 			break;
 		default:
 			printf("kldload: unexpected relocation type %d\n",
 			       rtype);
 			return -1;
 	}
 	return(0);
 }
 
 int
 elf_reloc(linker_file_t lf, Elf_Addr relocbase, const void *data, int type,
     elf_lookup_fn lookup)
 {
 
 	return (elf_reloc_internal(lf, relocbase, data, type, lookup, 0));
 }
 
 int
 elf_reloc_local(linker_file_t lf, Elf_Addr relocbase, const void *data,
     int type, elf_lookup_fn lookup)
 {
 
 	return (elf_reloc_internal(lf, relocbase, data, type, lookup,
 	    ERI_LOCAL));
 }
 
 int
 elf_cpu_load_file(linker_file_t lf __unused)
 {
 
 	return (0);
 }
 
 int
 elf_cpu_unload_file(linker_file_t lf __unused)
 {
 
 	return (0);
 }
Index: head/sys/kern/imgact_elf.c
===================================================================
--- head/sys/kern/imgact_elf.c	(revision 343963)
+++ head/sys/kern/imgact_elf.c	(revision 343964)
@@ -1,2537 +1,2663 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 2017 Dell EMC
  * Copyright (c) 2000-2001, 2003 David O'Brien
  * Copyright (c) 1995-1996 Søren Schmidt
  * Copyright (c) 1996 Peter Wemm
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer
  *    in this position and unchanged.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. The name of the author may not be used to endorse or promote products
  *    derived from this software without specific prior written permission
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_capsicum.h"
 
 #include <sys/param.h>
 #include <sys/capsicum.h>
 #include <sys/compressor.h>
 #include <sys/exec.h>
 #include <sys/fcntl.h>
 #include <sys/imgact.h>
 #include <sys/imgact_elf.h>
 #include <sys/jail.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mount.h>
 #include <sys/mman.h>
 #include <sys/namei.h>
 #include <sys/pioctl.h>
 #include <sys/proc.h>
 #include <sys/procfs.h>
 #include <sys/ptrace.h>
 #include <sys/racct.h>
 #include <sys/resourcevar.h>
 #include <sys/rwlock.h>
 #include <sys/sbuf.h>
 #include <sys/sf_buf.h>
 #include <sys/smp.h>
 #include <sys/systm.h>
 #include <sys/signalvar.h>
 #include <sys/stat.h>
 #include <sys/sx.h>
 #include <sys/syscall.h>
 #include <sys/sysctl.h>
 #include <sys/sysent.h>
 #include <sys/vnode.h>
 #include <sys/syslog.h>
 #include <sys/eventhandler.h>
 #include <sys/user.h>
 
 #include <vm/vm.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_param.h>
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 #include <vm/vm_object.h>
 #include <vm/vm_extern.h>
 
 #include <machine/elf.h>
 #include <machine/md_var.h>
 
 #define ELF_NOTE_ROUNDSIZE	4
 #define OLD_EI_BRAND	8
 
 static int __elfN(check_header)(const Elf_Ehdr *hdr);
 static Elf_Brandinfo *__elfN(get_brandinfo)(struct image_params *imgp,
     const char *interp, int interp_name_len, int32_t *osrel, uint32_t *fctl0);
 static int __elfN(load_file)(struct proc *p, const char *file, u_long *addr,
     u_long *entry, size_t pagesize);
 static int __elfN(load_section)(struct image_params *imgp, vm_ooffset_t offset,
     caddr_t vmaddr, size_t memsz, size_t filsz, vm_prot_t prot,
     size_t pagesize);
 static int __CONCAT(exec_, __elfN(imgact))(struct image_params *imgp);
 static bool __elfN(freebsd_trans_osrel)(const Elf_Note *note,
     int32_t *osrel);
 static bool kfreebsd_trans_osrel(const Elf_Note *note, int32_t *osrel);
 static boolean_t __elfN(check_note)(struct image_params *imgp,
     Elf_Brandnote *checknote, int32_t *osrel, uint32_t *fctl0);
 static vm_prot_t __elfN(trans_prot)(Elf_Word);
 static Elf_Word __elfN(untrans_prot)(vm_prot_t);
 
 SYSCTL_NODE(_kern, OID_AUTO, __CONCAT(elf, __ELF_WORD_SIZE), CTLFLAG_RW, 0,
     "");
 
 #define	CORE_BUF_SIZE	(16 * 1024)
 
 int __elfN(fallback_brand) = -1;
 SYSCTL_INT(__CONCAT(_kern_elf, __ELF_WORD_SIZE), OID_AUTO,
     fallback_brand, CTLFLAG_RWTUN, &__elfN(fallback_brand), 0,
     __XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE)) " brand of last resort");
 
 static int elf_legacy_coredump = 0;
 SYSCTL_INT(_debug, OID_AUTO, __elfN(legacy_coredump), CTLFLAG_RW, 
     &elf_legacy_coredump, 0,
     "include all and only RW pages in core dumps");
 
 int __elfN(nxstack) =
 #if defined(__amd64__) || defined(__powerpc64__) /* both 64 and 32 bit */ || \
     (defined(__arm__) && __ARM_ARCH >= 7) || defined(__aarch64__) || \
     defined(__riscv)
 	1;
 #else
 	0;
 #endif
 SYSCTL_INT(__CONCAT(_kern_elf, __ELF_WORD_SIZE), OID_AUTO,
     nxstack, CTLFLAG_RW, &__elfN(nxstack), 0,
     __XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE)) ": enable non-executable stack");
 
 #if __ELF_WORD_SIZE == 32 && (defined(__amd64__) || defined(__i386__))
 int i386_read_exec = 0;
 SYSCTL_INT(_kern_elf32, OID_AUTO, read_exec, CTLFLAG_RW, &i386_read_exec, 0,
     "enable execution from readable segments");
 #endif
 
+SYSCTL_NODE(__CONCAT(_kern_elf, __ELF_WORD_SIZE), OID_AUTO, aslr, CTLFLAG_RW, 0,
+    "");
+#define	ASLR_NODE_OID	__CONCAT(__CONCAT(_kern_elf, __ELF_WORD_SIZE), _aslr)
+
+static int __elfN(aslr_enabled) = 0;
+SYSCTL_INT(ASLR_NODE_OID, OID_AUTO, enable, CTLFLAG_RWTUN,
+    &__elfN(aslr_enabled), 0,
+    __XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE))
+    ": enable address map randomization");
+
+static int __elfN(pie_aslr_enabled) = 0;
+SYSCTL_INT(ASLR_NODE_OID, OID_AUTO, pie_enable, CTLFLAG_RWTUN,
+    &__elfN(pie_aslr_enabled), 0,
+    __XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE))
+    ": enable address map randomization for PIE binaries");
+
+static int __elfN(aslr_honor_sbrk) = 1;
+SYSCTL_INT(ASLR_NODE_OID, OID_AUTO, honor_sbrk, CTLFLAG_RW,
+    &__elfN(aslr_honor_sbrk), 0,
+    __XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE)) ": assume sbrk is used");
+
 static Elf_Brandinfo *elf_brand_list[MAX_BRANDS];
 
 #define	trunc_page_ps(va, ps)	rounddown2(va, ps)
 #define	round_page_ps(va, ps)	roundup2(va, ps)
 #define	aligned(a, t)	(trunc_page_ps((u_long)(a), sizeof(t)) == (u_long)(a))
 
 static const char FREEBSD_ABI_VENDOR[] = "FreeBSD";
 
 Elf_Brandnote __elfN(freebsd_brandnote) = {
 	.hdr.n_namesz	= sizeof(FREEBSD_ABI_VENDOR),
 	.hdr.n_descsz	= sizeof(int32_t),
 	.hdr.n_type	= NT_FREEBSD_ABI_TAG,
 	.vendor		= FREEBSD_ABI_VENDOR,
 	.flags		= BN_TRANSLATE_OSREL,
 	.trans_osrel	= __elfN(freebsd_trans_osrel)
 };
 
 static bool
 __elfN(freebsd_trans_osrel)(const Elf_Note *note, int32_t *osrel)
 {
 	uintptr_t p;
 
 	p = (uintptr_t)(note + 1);
 	p += roundup2(note->n_namesz, ELF_NOTE_ROUNDSIZE);
 	*osrel = *(const int32_t *)(p);
 
 	return (true);
 }
 
 static const char GNU_ABI_VENDOR[] = "GNU";
 static int GNU_KFREEBSD_ABI_DESC = 3;
 
 Elf_Brandnote __elfN(kfreebsd_brandnote) = {
 	.hdr.n_namesz	= sizeof(GNU_ABI_VENDOR),
 	.hdr.n_descsz	= 16,	/* XXX at least 16 */
 	.hdr.n_type	= 1,
 	.vendor		= GNU_ABI_VENDOR,
 	.flags		= BN_TRANSLATE_OSREL,
 	.trans_osrel	= kfreebsd_trans_osrel
 };
 
 static bool
 kfreebsd_trans_osrel(const Elf_Note *note, int32_t *osrel)
 {
 	const Elf32_Word *desc;
 	uintptr_t p;
 
 	p = (uintptr_t)(note + 1);
 	p += roundup2(note->n_namesz, ELF_NOTE_ROUNDSIZE);
 
 	desc = (const Elf32_Word *)p;
 	if (desc[0] != GNU_KFREEBSD_ABI_DESC)
 		return (false);
 
 	/*
 	 * Debian GNU/kFreeBSD embed the earliest compatible kernel version
 	 * (__FreeBSD_version: <major><two digit minor>Rxx) in the LSB way.
 	 */
 	*osrel = desc[1] * 100000 + desc[2] * 1000 + desc[3];
 
 	return (true);
 }
 
 int
 __elfN(insert_brand_entry)(Elf_Brandinfo *entry)
 {
 	int i;
 
 	for (i = 0; i < MAX_BRANDS; i++) {
 		if (elf_brand_list[i] == NULL) {
 			elf_brand_list[i] = entry;
 			break;
 		}
 	}
 	if (i == MAX_BRANDS) {
 		printf("WARNING: %s: could not insert brandinfo entry: %p\n",
 			__func__, entry);
 		return (-1);
 	}
 	return (0);
 }
 
 int
 __elfN(remove_brand_entry)(Elf_Brandinfo *entry)
 {
 	int i;
 
 	for (i = 0; i < MAX_BRANDS; i++) {
 		if (elf_brand_list[i] == entry) {
 			elf_brand_list[i] = NULL;
 			break;
 		}
 	}
 	if (i == MAX_BRANDS)
 		return (-1);
 	return (0);
 }
 
 int
 __elfN(brand_inuse)(Elf_Brandinfo *entry)
 {
 	struct proc *p;
 	int rval = FALSE;
 
 	sx_slock(&allproc_lock);
 	FOREACH_PROC_IN_SYSTEM(p) {
 		if (p->p_sysent == entry->sysvec) {
 			rval = TRUE;
 			break;
 		}
 	}
 	sx_sunlock(&allproc_lock);
 
 	return (rval);
 }
 
 static Elf_Brandinfo *
 __elfN(get_brandinfo)(struct image_params *imgp, const char *interp,
     int interp_name_len, int32_t *osrel, uint32_t *fctl0)
 {
 	const Elf_Ehdr *hdr = (const Elf_Ehdr *)imgp->image_header;
 	Elf_Brandinfo *bi, *bi_m;
 	boolean_t ret;
 	int i;
 
 	/*
 	 * We support four types of branding -- (1) the ELF EI_OSABI field
 	 * that SCO added to the ELF spec, (2) FreeBSD 3.x's traditional string
 	 * branding w/in the ELF header, (3) path of the `interp_path'
 	 * field, and (4) the ".note.ABI-tag" ELF section.
 	 */
 
 	/* Look for an ".note.ABI-tag" ELF section */
 	bi_m = NULL;
 	for (i = 0; i < MAX_BRANDS; i++) {
 		bi = elf_brand_list[i];
 		if (bi == NULL)
 			continue;
 		if (interp != NULL && (bi->flags & BI_BRAND_ONLY_STATIC) != 0)
 			continue;
 		if (hdr->e_machine == bi->machine && (bi->flags &
 		    (BI_BRAND_NOTE|BI_BRAND_NOTE_MANDATORY)) != 0) {
 			ret = __elfN(check_note)(imgp, bi->brand_note, osrel,
 			    fctl0);
 			/* Give brand a chance to veto check_note's guess */
 			if (ret && bi->header_supported)
 				ret = bi->header_supported(imgp);
 			/*
 			 * If note checker claimed the binary, but the
 			 * interpreter path in the image does not
 			 * match default one for the brand, try to
 			 * search for other brands with the same
 			 * interpreter.  Either there is better brand
 			 * with the right interpreter, or, failing
 			 * this, we return first brand which accepted
 			 * our note and, optionally, header.
 			 */
 			if (ret && bi_m == NULL && interp != NULL &&
 			    (bi->interp_path == NULL ||
 			    (strlen(bi->interp_path) + 1 != interp_name_len ||
 			    strncmp(interp, bi->interp_path, interp_name_len)
 			    != 0))) {
 				bi_m = bi;
 				ret = 0;
 			}
 			if (ret)
 				return (bi);
 		}
 	}
 	if (bi_m != NULL)
 		return (bi_m);
 
 	/* If the executable has a brand, search for it in the brand list. */
 	for (i = 0; i < MAX_BRANDS; i++) {
 		bi = elf_brand_list[i];
 		if (bi == NULL || (bi->flags & BI_BRAND_NOTE_MANDATORY) != 0 ||
 		    (interp != NULL && (bi->flags & BI_BRAND_ONLY_STATIC) != 0))
 			continue;
 		if (hdr->e_machine == bi->machine &&
 		    (hdr->e_ident[EI_OSABI] == bi->brand ||
 		    (bi->compat_3_brand != NULL &&
 		    strcmp((const char *)&hdr->e_ident[OLD_EI_BRAND],
 		    bi->compat_3_brand) == 0))) {
 			/* Looks good, but give brand a chance to veto */
 			if (bi->header_supported == NULL ||
 			    bi->header_supported(imgp)) {
 				/*
 				 * Again, prefer strictly matching
 				 * interpreter path.
 				 */
 				if (interp_name_len == 0 &&
 				    bi->interp_path == NULL)
 					return (bi);
 				if (bi->interp_path != NULL &&
 				    strlen(bi->interp_path) + 1 ==
 				    interp_name_len && strncmp(interp,
 				    bi->interp_path, interp_name_len) == 0)
 					return (bi);
 				if (bi_m == NULL)
 					bi_m = bi;
 			}
 		}
 	}
 	if (bi_m != NULL)
 		return (bi_m);
 
 	/* No known brand, see if the header is recognized by any brand */
 	for (i = 0; i < MAX_BRANDS; i++) {
 		bi = elf_brand_list[i];
 		if (bi == NULL || bi->flags & BI_BRAND_NOTE_MANDATORY ||
 		    bi->header_supported == NULL)
 			continue;
 		if (hdr->e_machine == bi->machine) {
 			ret = bi->header_supported(imgp);
 			if (ret)
 				return (bi);
 		}
 	}
 
 	/* Lacking a known brand, search for a recognized interpreter. */
 	if (interp != NULL) {
 		for (i = 0; i < MAX_BRANDS; i++) {
 			bi = elf_brand_list[i];
 			if (bi == NULL || (bi->flags &
 			    (BI_BRAND_NOTE_MANDATORY | BI_BRAND_ONLY_STATIC))
 			    != 0)
 				continue;
 			if (hdr->e_machine == bi->machine &&
 			    bi->interp_path != NULL &&
 			    /* ELF image p_filesz includes terminating zero */
 			    strlen(bi->interp_path) + 1 == interp_name_len &&
 			    strncmp(interp, bi->interp_path, interp_name_len)
 			    == 0 && (bi->header_supported == NULL ||
 			    bi->header_supported(imgp)))
 				return (bi);
 		}
 	}
 
 	/* Lacking a recognized interpreter, try the default brand */
 	for (i = 0; i < MAX_BRANDS; i++) {
 		bi = elf_brand_list[i];
 		if (bi == NULL || (bi->flags & BI_BRAND_NOTE_MANDATORY) != 0 ||
 		    (interp != NULL && (bi->flags & BI_BRAND_ONLY_STATIC) != 0))
 			continue;
 		if (hdr->e_machine == bi->machine &&
 		    __elfN(fallback_brand) == bi->brand &&
 		    (bi->header_supported == NULL ||
 		    bi->header_supported(imgp)))
 			return (bi);
 	}
 	return (NULL);
 }
 
 static int
 __elfN(check_header)(const Elf_Ehdr *hdr)
 {
 	Elf_Brandinfo *bi;
 	int i;
 
 	if (!IS_ELF(*hdr) ||
 	    hdr->e_ident[EI_CLASS] != ELF_TARG_CLASS ||
 	    hdr->e_ident[EI_DATA] != ELF_TARG_DATA ||
 	    hdr->e_ident[EI_VERSION] != EV_CURRENT ||
 	    hdr->e_phentsize != sizeof(Elf_Phdr) ||
 	    hdr->e_version != ELF_TARG_VER)
 		return (ENOEXEC);
 
 	/*
 	 * Make sure we have at least one brand for this machine.
 	 */
 
 	for (i = 0; i < MAX_BRANDS; i++) {
 		bi = elf_brand_list[i];
 		if (bi != NULL && bi->machine == hdr->e_machine)
 			break;
 	}
 	if (i == MAX_BRANDS)
 		return (ENOEXEC);
 
 	return (0);
 }
 
 static int
 __elfN(map_partial)(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
     vm_offset_t start, vm_offset_t end, vm_prot_t prot)
 {
 	struct sf_buf *sf;
 	int error;
 	vm_offset_t off;
 
 	/*
 	 * Create the page if it doesn't exist yet. Ignore errors.
 	 */
 	vm_map_fixed(map, NULL, 0, trunc_page(start), round_page(end) -
 	    trunc_page(start), VM_PROT_ALL, VM_PROT_ALL, MAP_CHECK_EXCL);
 
 	/*
 	 * Find the page from the underlying object.
 	 */
 	if (object != NULL) {
 		sf = vm_imgact_map_page(object, offset);
 		if (sf == NULL)
 			return (KERN_FAILURE);
 		off = offset - trunc_page(offset);
 		error = copyout((caddr_t)sf_buf_kva(sf) + off, (caddr_t)start,
 		    end - start);
 		vm_imgact_unmap_page(sf);
 		if (error != 0)
 			return (KERN_FAILURE);
 	}
 
 	return (KERN_SUCCESS);
 }
 
 static int
 __elfN(map_insert)(struct image_params *imgp, vm_map_t map, vm_object_t object,
     vm_ooffset_t offset, vm_offset_t start, vm_offset_t end, vm_prot_t prot,
     int cow)
 {
 	struct sf_buf *sf;
 	vm_offset_t off;
 	vm_size_t sz;
 	int error, locked, rv;
 
 	if (start != trunc_page(start)) {
 		rv = __elfN(map_partial)(map, object, offset, start,
 		    round_page(start), prot);
 		if (rv != KERN_SUCCESS)
 			return (rv);
 		offset += round_page(start) - start;
 		start = round_page(start);
 	}
 	if (end != round_page(end)) {
 		rv = __elfN(map_partial)(map, object, offset +
 		    trunc_page(end) - start, trunc_page(end), end, prot);
 		if (rv != KERN_SUCCESS)
 			return (rv);
 		end = trunc_page(end);
 	}
 	if (start >= end)
 		return (KERN_SUCCESS);
 	if ((offset & PAGE_MASK) != 0) {
 		/*
 		 * The mapping is not page aligned.  This means that we have
 		 * to copy the data.
 		 */
 		rv = vm_map_fixed(map, NULL, 0, start, end - start,
 		    prot | VM_PROT_WRITE, VM_PROT_ALL, MAP_CHECK_EXCL);
 		if (rv != KERN_SUCCESS)
 			return (rv);
 		if (object == NULL)
 			return (KERN_SUCCESS);
 		for (; start < end; start += sz) {
 			sf = vm_imgact_map_page(object, offset);
 			if (sf == NULL)
 				return (KERN_FAILURE);
 			off = offset - trunc_page(offset);
 			sz = end - start;
 			if (sz > PAGE_SIZE - off)
 				sz = PAGE_SIZE - off;
 			error = copyout((caddr_t)sf_buf_kva(sf) + off,
 			    (caddr_t)start, sz);
 			vm_imgact_unmap_page(sf);
 			if (error != 0)
 				return (KERN_FAILURE);
 			offset += sz;
 		}
 	} else {
 		vm_object_reference(object);
 		rv = vm_map_fixed(map, object, offset, start, end - start,
 		    prot, VM_PROT_ALL, cow | MAP_CHECK_EXCL);
 		if (rv != KERN_SUCCESS) {
 			locked = VOP_ISLOCKED(imgp->vp);
 			VOP_UNLOCK(imgp->vp, 0);
 			vm_object_deallocate(object);
 			vn_lock(imgp->vp, locked | LK_RETRY);
 			return (rv);
 		}
 	}
 	return (KERN_SUCCESS);
 }
 
 static int
 __elfN(load_section)(struct image_params *imgp, vm_ooffset_t offset,
     caddr_t vmaddr, size_t memsz, size_t filsz, vm_prot_t prot,
     size_t pagesize)
 {
 	struct sf_buf *sf;
 	size_t map_len;
 	vm_map_t map;
 	vm_object_t object;
 	vm_offset_t off, map_addr;
 	int error, rv, cow;
 	size_t copy_len;
 	vm_ooffset_t file_addr;
 
 	/*
 	 * It's necessary to fail if the filsz + offset taken from the
 	 * header is greater than the actual file pager object's size.
 	 * If we were to allow this, then the vm_map_find() below would
 	 * walk right off the end of the file object and into the ether.
 	 *
 	 * While I'm here, might as well check for something else that
 	 * is invalid: filsz cannot be greater than memsz.
 	 */
 	if ((filsz != 0 && (off_t)filsz + offset > imgp->attr->va_size) ||
 	    filsz > memsz) {
 		uprintf("elf_load_section: truncated ELF file\n");
 		return (ENOEXEC);
 	}
 
 	object = imgp->object;
 	map = &imgp->proc->p_vmspace->vm_map;
 	map_addr = trunc_page_ps((vm_offset_t)vmaddr, pagesize);
 	file_addr = trunc_page_ps(offset, pagesize);
 
 	/*
 	 * We have two choices.  We can either clear the data in the last page
 	 * of an oversized mapping, or we can start the anon mapping a page
 	 * early and copy the initialized data into that first page.  We
 	 * choose the second.
 	 */
 	if (filsz == 0)
 		map_len = 0;
 	else if (memsz > filsz)
 		map_len = trunc_page_ps(offset + filsz, pagesize) - file_addr;
 	else
 		map_len = round_page_ps(offset + filsz, pagesize) - file_addr;
 
 	if (map_len != 0) {
 		/* cow flags: don't dump readonly sections in core */
 		cow = MAP_COPY_ON_WRITE | MAP_PREFAULT |
 		    (prot & VM_PROT_WRITE ? 0 : MAP_DISABLE_COREDUMP);
 
 		rv = __elfN(map_insert)(imgp, map,
 				      object,
 				      file_addr,	/* file offset */
 				      map_addr,		/* virtual start */
 				      map_addr + map_len,/* virtual end */
 				      prot,
 				      cow);
 		if (rv != KERN_SUCCESS)
 			return (EINVAL);
 
 		/* we can stop now if we've covered it all */
 		if (memsz == filsz)
 			return (0);
 	}
 
 
 	/*
 	 * We have to get the remaining bit of the file into the first part
 	 * of the oversized map segment.  This is normally because the .data
 	 * segment in the file is extended to provide bss.  It's a neat idea
 	 * to try and save a page, but it's a pain in the behind to implement.
 	 */
 	copy_len = filsz == 0 ? 0 : (offset + filsz) - trunc_page_ps(offset +
 	    filsz, pagesize);
 	map_addr = trunc_page_ps((vm_offset_t)vmaddr + filsz, pagesize);
 	map_len = round_page_ps((vm_offset_t)vmaddr + memsz, pagesize) -
 	    map_addr;
 
 	/* This had damn well better be true! */
 	if (map_len != 0) {
 		rv = __elfN(map_insert)(imgp, map, NULL, 0, map_addr,
 		    map_addr + map_len, prot, 0);
 		if (rv != KERN_SUCCESS)
 			return (EINVAL);
 	}
 
 	if (copy_len != 0) {
 		sf = vm_imgact_map_page(object, offset + filsz);
 		if (sf == NULL)
 			return (EIO);
 
 		/* send the page fragment to user space */
 		off = trunc_page_ps(offset + filsz, pagesize) -
 		    trunc_page(offset + filsz);
 		error = copyout((caddr_t)sf_buf_kva(sf) + off,
 		    (caddr_t)map_addr, copy_len);
 		vm_imgact_unmap_page(sf);
 		if (error != 0)
 			return (error);
 	}
 
 	/*
 	 * Remove write access to the page if it was only granted by map_insert
 	 * to allow copyout.
 	 */
 	if ((prot & VM_PROT_WRITE) == 0)
 		vm_map_protect(map, trunc_page(map_addr), round_page(map_addr +
 		    map_len), prot, FALSE);
 
 	return (0);
 }
 
 /*
  * Load the file "file" into memory.  It may be either a shared object
  * or an executable.
  *
  * The "addr" reference parameter is in/out.  On entry, it specifies
  * the address where a shared object should be loaded.  If the file is
  * an executable, this value is ignored.  On exit, "addr" specifies
  * where the file was actually loaded.
  *
  * The "entry" reference parameter is out only.  On exit, it specifies
  * the entry point for the loaded file.
  */
 static int
 __elfN(load_file)(struct proc *p, const char *file, u_long *addr,
 	u_long *entry, size_t pagesize)
 {
 	struct {
 		struct nameidata nd;
 		struct vattr attr;
 		struct image_params image_params;
 	} *tempdata;
 	const Elf_Ehdr *hdr = NULL;
 	const Elf_Phdr *phdr = NULL;
 	struct nameidata *nd;
 	struct vattr *attr;
 	struct image_params *imgp;
 	vm_prot_t prot;
 	u_long rbase;
 	u_long base_addr = 0;
 	int error, i, numsegs;
 
 #ifdef CAPABILITY_MODE
 	/*
 	 * XXXJA: This check can go away once we are sufficiently confident
 	 * that the checks in namei() are correct.
 	 */
 	if (IN_CAPABILITY_MODE(curthread))
 		return (ECAPMODE);
 #endif
 
 	tempdata = malloc(sizeof(*tempdata), M_TEMP, M_WAITOK);
 	nd = &tempdata->nd;
 	attr = &tempdata->attr;
 	imgp = &tempdata->image_params;
 
 	/*
 	 * Initialize part of the common data
 	 */
 	imgp->proc = p;
 	imgp->attr = attr;
 	imgp->firstpage = NULL;
 	imgp->image_header = NULL;
 	imgp->object = NULL;
 	imgp->execlabel = NULL;
 
 	NDINIT(nd, LOOKUP, LOCKLEAF | FOLLOW, UIO_SYSSPACE, file, curthread);
 	if ((error = namei(nd)) != 0) {
 		nd->ni_vp = NULL;
 		goto fail;
 	}
 	NDFREE(nd, NDF_ONLY_PNBUF);
 	imgp->vp = nd->ni_vp;
 
 	/*
 	 * Check permissions, modes, uid, etc on the file, and "open" it.
 	 */
 	error = exec_check_permissions(imgp);
 	if (error)
 		goto fail;
 
 	error = exec_map_first_page(imgp);
 	if (error)
 		goto fail;
 
 	/*
 	 * Also make certain that the interpreter stays the same, so set
 	 * its VV_TEXT flag, too.
 	 */
 	VOP_SET_TEXT(nd->ni_vp);
 
 	imgp->object = nd->ni_vp->v_object;
 
 	hdr = (const Elf_Ehdr *)imgp->image_header;
 	if ((error = __elfN(check_header)(hdr)) != 0)
 		goto fail;
 	if (hdr->e_type == ET_DYN)
 		rbase = *addr;
 	else if (hdr->e_type == ET_EXEC)
 		rbase = 0;
 	else {
 		error = ENOEXEC;
 		goto fail;
 	}
 
 	/* Only support headers that fit within first page for now      */
 	if ((hdr->e_phoff > PAGE_SIZE) ||
 	    (u_int)hdr->e_phentsize * hdr->e_phnum > PAGE_SIZE - hdr->e_phoff) {
 		error = ENOEXEC;
 		goto fail;
 	}
 
 	phdr = (const Elf_Phdr *)(imgp->image_header + hdr->e_phoff);
 	if (!aligned(phdr, Elf_Addr)) {
 		error = ENOEXEC;
 		goto fail;
 	}
 
 	for (i = 0, numsegs = 0; i < hdr->e_phnum; i++) {
 		if (phdr[i].p_type == PT_LOAD && phdr[i].p_memsz != 0) {
 			/* Loadable segment */
 			prot = __elfN(trans_prot)(phdr[i].p_flags);
 			error = __elfN(load_section)(imgp, phdr[i].p_offset,
 			    (caddr_t)(uintptr_t)phdr[i].p_vaddr + rbase,
 			    phdr[i].p_memsz, phdr[i].p_filesz, prot, pagesize);
 			if (error != 0)
 				goto fail;
 			/*
 			 * Establish the base address if this is the
 			 * first segment.
 			 */
 			if (numsegs == 0)
   				base_addr = trunc_page(phdr[i].p_vaddr +
 				    rbase);
 			numsegs++;
 		}
 	}
 	*addr = base_addr;
 	*entry = (unsigned long)hdr->e_entry + rbase;
 
 fail:
 	if (imgp->firstpage)
 		exec_unmap_first_page(imgp);
 
 	if (nd->ni_vp)
 		vput(nd->ni_vp);
 
 	free(tempdata, M_TEMP);
 
 	return (error);
 }
 
+static u_long
+__CONCAT(rnd_, __elfN(base))(vm_map_t map __unused, u_long minv, u_long maxv,
+    u_int align)
+{
+	u_long rbase, res;
+
+	MPASS(vm_map_min(map) <= minv);
+	MPASS(maxv <= vm_map_max(map));
+	MPASS(minv < maxv);
+	MPASS(minv + align < maxv);
+	arc4rand(&rbase, sizeof(rbase), 0);
+	res = roundup(minv, (u_long)align) + rbase % (maxv - minv);
+	res &= ~((u_long)align - 1);
+	if (res >= maxv)
+		res -= align;
+	KASSERT(res >= minv,
+	    ("res %#lx < minv %#lx, maxv %#lx rbase %#lx",
+	    res, minv, maxv, rbase));
+	KASSERT(res < maxv,
+	    ("res %#lx > maxv %#lx, minv %#lx rbase %#lx",
+	    res, maxv, minv, rbase));
+	return (res);
+}
+
+/*
+ * Impossible et_dyn_addr initial value indicating that the real base
+ * must be calculated later with some randomization applied.
+ */
+#define	ET_DYN_ADDR_RAND	1
+
 static int
 __CONCAT(exec_, __elfN(imgact))(struct image_params *imgp)
 {
 	struct thread *td;
 	const Elf_Ehdr *hdr;
 	const Elf_Phdr *phdr;
 	Elf_Auxargs *elf_auxargs;
 	struct vmspace *vmspace;
+	vm_map_t map;
 	const char *err_str, *newinterp;
 	char *interp, *interp_buf, *path;
 	Elf_Brandinfo *brand_info;
 	struct sysentvec *sv;
 	vm_prot_t prot;
 	u_long text_size, data_size, total_size, text_addr, data_addr;
 	u_long seg_size, seg_addr, addr, baddr, et_dyn_addr, entry, proghdr;
+	u_long maxalign, mapsz, maxv, maxv1;
 	uint32_t fctl0;
 	int32_t osrel;
 	int error, i, n, interp_name_len, have_interp;
 
 	hdr = (const Elf_Ehdr *)imgp->image_header;
 
 	/*
 	 * Do we have a valid ELF header ?
 	 *
 	 * Only allow ET_EXEC & ET_DYN here, reject ET_DYN later
 	 * if particular brand doesn't support it.
 	 */
 	if (__elfN(check_header)(hdr) != 0 ||
 	    (hdr->e_type != ET_EXEC && hdr->e_type != ET_DYN))
 		return (-1);
 
 	/*
 	 * From here on down, we return an errno, not -1, as we've
 	 * detected an ELF file.
 	 */
 
 	if ((hdr->e_phoff > PAGE_SIZE) ||
 	    (u_int)hdr->e_phentsize * hdr->e_phnum > PAGE_SIZE - hdr->e_phoff) {
 		/* Only support headers in first page for now */
 		uprintf("Program headers not in the first page\n");
 		return (ENOEXEC);
 	}
 	phdr = (const Elf_Phdr *)(imgp->image_header + hdr->e_phoff); 
 	if (!aligned(phdr, Elf_Addr)) {
 		uprintf("Unaligned program headers\n");
 		return (ENOEXEC);
 	}
 
 	n = error = 0;
 	baddr = 0;
 	osrel = 0;
 	fctl0 = 0;
 	text_size = data_size = total_size = text_addr = data_addr = 0;
 	entry = proghdr = 0;
 	interp_name_len = 0;
 	err_str = newinterp = NULL;
 	interp = interp_buf = NULL;
 	td = curthread;
+	maxalign = PAGE_SIZE;
+	mapsz = 0;
 
 	for (i = 0; i < hdr->e_phnum; i++) {
 		switch (phdr[i].p_type) {
 		case PT_LOAD:
 			if (n == 0)
 				baddr = phdr[i].p_vaddr;
+			if (phdr[i].p_align > maxalign)
+				maxalign = phdr[i].p_align;
+			mapsz += phdr[i].p_memsz;
 			n++;
 			break;
 		case PT_INTERP:
 			/* Path to interpreter */
 			if (phdr[i].p_filesz < 2 ||
 			    phdr[i].p_filesz > MAXPATHLEN) {
 				uprintf("Invalid PT_INTERP\n");
 				error = ENOEXEC;
 				goto ret;
 			}
 			if (interp != NULL) {
 				uprintf("Multiple PT_INTERP headers\n");
 				error = ENOEXEC;
 				goto ret;
 			}
 			interp_name_len = phdr[i].p_filesz;
 			if (phdr[i].p_offset > PAGE_SIZE ||
 			    interp_name_len > PAGE_SIZE - phdr[i].p_offset) {
 				VOP_UNLOCK(imgp->vp, 0);
 				interp_buf = malloc(interp_name_len + 1, M_TEMP,
 				    M_WAITOK);
 				vn_lock(imgp->vp, LK_EXCLUSIVE | LK_RETRY);
 				error = vn_rdwr(UIO_READ, imgp->vp, interp_buf,
 				    interp_name_len, phdr[i].p_offset,
 				    UIO_SYSSPACE, IO_NODELOCKED, td->td_ucred,
 				    NOCRED, NULL, td);
 				if (error != 0) {
 					uprintf("i/o error PT_INTERP %d\n",
 					    error);
 					goto ret;
 				}
 				interp_buf[interp_name_len] = '\0';
 				interp = interp_buf;
 			} else {
 				interp = __DECONST(char *, imgp->image_header) +
 				    phdr[i].p_offset;
 				if (interp[interp_name_len - 1] != '\0') {
 					uprintf("Invalid PT_INTERP\n");
 					error = ENOEXEC;
 					goto ret;
 				}
 			}
 			break;
 		case PT_GNU_STACK:
 			if (__elfN(nxstack))
 				imgp->stack_prot =
 				    __elfN(trans_prot)(phdr[i].p_flags);
 			imgp->stack_sz = phdr[i].p_memsz;
 			break;
 		}
 	}
 
 	brand_info = __elfN(get_brandinfo)(imgp, interp, interp_name_len,
 	    &osrel, &fctl0);
 	if (brand_info == NULL) {
 		uprintf("ELF binary type \"%u\" not known.\n",
 		    hdr->e_ident[EI_OSABI]);
 		error = ENOEXEC;
 		goto ret;
 	}
+	sv = brand_info->sysvec;
 	et_dyn_addr = 0;
 	if (hdr->e_type == ET_DYN) {
 		if ((brand_info->flags & BI_CAN_EXEC_DYN) == 0) {
 			uprintf("Cannot execute shared object\n");
 			error = ENOEXEC;
 			goto ret;
 		}
 		/*
 		 * Honour the base load address from the dso if it is
 		 * non-zero for some reason.
 		 */
-		if (baddr == 0)
-			et_dyn_addr = ET_DYN_LOAD_ADDR;
+		if (baddr == 0) {
+			if ((sv->sv_flags & SV_ASLR) == 0 ||
+			    (fctl0 & NT_FREEBSD_FCTL_ASLR_DISABLE) != 0)
+				et_dyn_addr = ET_DYN_LOAD_ADDR;
+			else if ((__elfN(pie_aslr_enabled) &&
+			    (imgp->proc->p_flag2 & P2_ASLR_DISABLE) == 0) ||
+			    (imgp->proc->p_flag2 & P2_ASLR_ENABLE) != 0)
+				et_dyn_addr = ET_DYN_ADDR_RAND;
+			else
+				et_dyn_addr = ET_DYN_LOAD_ADDR;
+		}
 	}
-	sv = brand_info->sysvec;
 	if (interp != NULL && brand_info->interp_newpath != NULL)
 		newinterp = brand_info->interp_newpath;
 
 	/*
 	 * Avoid a possible deadlock if the current address space is destroyed
 	 * and that address space maps the locked vnode.  In the common case,
 	 * the locked vnode's v_usecount is decremented but remains greater
 	 * than zero.  Consequently, the vnode lock is not needed by vrele().
 	 * However, in cases where the vnode lock is external, such as nullfs,
 	 * v_usecount may become zero.
 	 *
 	 * The VV_TEXT flag prevents modifications to the executable while
 	 * the vnode is unlocked.
 	 */
 	VOP_UNLOCK(imgp->vp, 0);
 
+	/*
+	 * Decide whether to enable randomization of user mappings.
+	 * First, reset user preferences for the setid binaries.
+	 * Then, account for the support of the randomization by the
+	 * ABI, by user preferences, and make special treatment for
+	 * PIE binaries.
+	 */
+	if (imgp->credential_setid) {
+		PROC_LOCK(imgp->proc);
+		imgp->proc->p_flag2 &= ~(P2_ASLR_ENABLE | P2_ASLR_DISABLE);
+		PROC_UNLOCK(imgp->proc);
+	}
+	if ((sv->sv_flags & SV_ASLR) == 0 ||
+	    (imgp->proc->p_flag2 & P2_ASLR_DISABLE) != 0 ||
+	    (fctl0 & NT_FREEBSD_FCTL_ASLR_DISABLE) != 0) {
+		KASSERT(et_dyn_addr != ET_DYN_ADDR_RAND,
+		    ("et_dyn_addr == RAND and !ASLR"));
+	} else if ((imgp->proc->p_flag2 & P2_ASLR_ENABLE) != 0 ||
+	    (__elfN(aslr_enabled) && hdr->e_type == ET_EXEC) ||
+	    et_dyn_addr == ET_DYN_ADDR_RAND) {
+		imgp->map_flags |= MAP_ASLR;
+		/*
+		 * If user does not care about sbrk, utilize the bss
+		 * grow region for mappings as well.  We can select
+		 * the base for the image anywere and still not suffer
+		 * from the fragmentation.
+		 */
+		if (!__elfN(aslr_honor_sbrk) ||
+		    (imgp->proc->p_flag2 & P2_ASLR_IGNSTART) != 0)
+			imgp->map_flags |= MAP_ASLR_IGNSTART;
+	}
+
 	error = exec_new_vmspace(imgp, sv);
+	vmspace = imgp->proc->p_vmspace;
+	map = &vmspace->vm_map;
+
 	imgp->proc->p_sysent = sv;
 
+	maxv = vm_map_max(map) - lim_max(td, RLIMIT_STACK);
+	if (et_dyn_addr == ET_DYN_ADDR_RAND) {
+		KASSERT((map->flags & MAP_ASLR) != 0,
+		    ("ET_DYN_ADDR_RAND but !MAP_ASLR"));
+		et_dyn_addr = __CONCAT(rnd_, __elfN(base))(map,
+		    vm_map_min(map) + mapsz + lim_max(td, RLIMIT_DATA),
+		    /* reserve half of the address space to interpreter */
+		    maxv / 2, 1UL << flsl(maxalign));
+	}
+
 	vn_lock(imgp->vp, LK_EXCLUSIVE | LK_RETRY);
 	if (error != 0)
 		goto ret;
 
 	for (i = 0; i < hdr->e_phnum; i++) {
 		switch (phdr[i].p_type) {
 		case PT_LOAD:	/* Loadable segment */
 			if (phdr[i].p_memsz == 0)
 				break;
 			prot = __elfN(trans_prot)(phdr[i].p_flags);
 			error = __elfN(load_section)(imgp, phdr[i].p_offset,
 			    (caddr_t)(uintptr_t)phdr[i].p_vaddr + et_dyn_addr,
 			    phdr[i].p_memsz, phdr[i].p_filesz, prot,
 			    sv->sv_pagesize);
 			if (error != 0)
 				goto ret;
 
 			/*
 			 * If this segment contains the program headers,
 			 * remember their virtual address for the AT_PHDR
 			 * aux entry. Static binaries don't usually include
 			 * a PT_PHDR entry.
 			 */
 			if (phdr[i].p_offset == 0 &&
 			    hdr->e_phoff + hdr->e_phnum * hdr->e_phentsize
 				<= phdr[i].p_filesz)
 				proghdr = phdr[i].p_vaddr + hdr->e_phoff +
 				    et_dyn_addr;
 
 			seg_addr = trunc_page(phdr[i].p_vaddr + et_dyn_addr);
 			seg_size = round_page(phdr[i].p_memsz +
 			    phdr[i].p_vaddr + et_dyn_addr - seg_addr);
 
 			/*
 			 * Make the largest executable segment the official
 			 * text segment and all others data.
 			 *
 			 * Note that obreak() assumes that data_addr + 
 			 * data_size == end of data load area, and the ELF
 			 * file format expects segments to be sorted by
 			 * address.  If multiple data segments exist, the
 			 * last one will be used.
 			 */
 
 			if (phdr[i].p_flags & PF_X && text_size < seg_size) {
 				text_size = seg_size;
 				text_addr = seg_addr;
 			} else {
 				data_size = seg_size;
 				data_addr = seg_addr;
 			}
 			total_size += seg_size;
 			break;
 		case PT_PHDR: 	/* Program header table info */
 			proghdr = phdr[i].p_vaddr + et_dyn_addr;
 			break;
 		default:
 			break;
 		}
 	}
 	
 	if (data_addr == 0 && data_size == 0) {
 		data_addr = text_addr;
 		data_size = text_size;
 	}
 
 	entry = (u_long)hdr->e_entry + et_dyn_addr;
 
 	/*
 	 * Check limits.  It should be safe to check the
 	 * limits after loading the segments since we do
 	 * not actually fault in all the segments pages.
 	 */
 	PROC_LOCK(imgp->proc);
 	if (data_size > lim_cur_proc(imgp->proc, RLIMIT_DATA))
 		err_str = "Data segment size exceeds process limit";
 	else if (text_size > maxtsiz)
 		err_str = "Text segment size exceeds system limit";
 	else if (total_size > lim_cur_proc(imgp->proc, RLIMIT_VMEM))
 		err_str = "Total segment size exceeds process limit";
 	else if (racct_set(imgp->proc, RACCT_DATA, data_size) != 0)
 		err_str = "Data segment size exceeds resource limit";
 	else if (racct_set(imgp->proc, RACCT_VMEM, total_size) != 0)
 		err_str = "Total segment size exceeds resource limit";
 	if (err_str != NULL) {
 		PROC_UNLOCK(imgp->proc);
 		uprintf("%s\n", err_str);
 		error = ENOMEM;
 		goto ret;
 	}
 
-	vmspace = imgp->proc->p_vmspace;
 	vmspace->vm_tsize = text_size >> PAGE_SHIFT;
 	vmspace->vm_taddr = (caddr_t)(uintptr_t)text_addr;
 	vmspace->vm_dsize = data_size >> PAGE_SHIFT;
 	vmspace->vm_daddr = (caddr_t)(uintptr_t)data_addr;
 
 	/*
 	 * We load the dynamic linker where a userland call
 	 * to mmap(0, ...) would put it.  The rationale behind this
 	 * calculation is that it leaves room for the heap to grow to
 	 * its maximum allowed size.
 	 */
 	addr = round_page((vm_offset_t)vmspace->vm_daddr + lim_max(td,
 	    RLIMIT_DATA));
+	if ((map->flags & MAP_ASLR) != 0) {
+		maxv1 = maxv / 2 + addr / 2;
+		MPASS(maxv1 >= addr);	/* No overflow */
+		map->anon_loc = __CONCAT(rnd_, __elfN(base))(map, addr, maxv1,
+		    MAXPAGESIZES > 1 ? pagesizes[1] : pagesizes[0]);
+	} else {
+		map->anon_loc = addr;
+	}
 	PROC_UNLOCK(imgp->proc);
 
 	imgp->entry_addr = entry;
 
 	if (interp != NULL) {
 		have_interp = FALSE;
 		VOP_UNLOCK(imgp->vp, 0);
+		if ((map->flags & MAP_ASLR) != 0) {
+			/* Assume that interpeter fits into 1/4 of AS */
+			maxv1 = maxv / 2 + addr / 2;
+			MPASS(maxv1 >= addr);	/* No overflow */
+			addr = __CONCAT(rnd_, __elfN(base))(map, addr,
+			    maxv1, PAGE_SIZE);
+		}
 		if (brand_info->emul_path != NULL &&
 		    brand_info->emul_path[0] != '\0') {
 			path = malloc(MAXPATHLEN, M_TEMP, M_WAITOK);
 			snprintf(path, MAXPATHLEN, "%s%s",
 			    brand_info->emul_path, interp);
 			error = __elfN(load_file)(imgp->proc, path, &addr,
 			    &imgp->entry_addr, sv->sv_pagesize);
 			free(path, M_TEMP);
 			if (error == 0)
 				have_interp = TRUE;
 		}
 		if (!have_interp && newinterp != NULL &&
 		    (brand_info->interp_path == NULL ||
 		    strcmp(interp, brand_info->interp_path) == 0)) {
 			error = __elfN(load_file)(imgp->proc, newinterp, &addr,
 			    &imgp->entry_addr, sv->sv_pagesize);
 			if (error == 0)
 				have_interp = TRUE;
 		}
 		if (!have_interp) {
 			error = __elfN(load_file)(imgp->proc, interp, &addr,
 			    &imgp->entry_addr, sv->sv_pagesize);
 		}
 		vn_lock(imgp->vp, LK_EXCLUSIVE | LK_RETRY);
 		if (error != 0) {
 			uprintf("ELF interpreter %s not found, error %d\n",
 			    interp, error);
 			goto ret;
 		}
 	} else
 		addr = et_dyn_addr;
 
 	/*
 	 * Construct auxargs table (used by the fixup routine)
 	 */
 	elf_auxargs = malloc(sizeof(Elf_Auxargs), M_TEMP, M_WAITOK);
 	elf_auxargs->execfd = -1;
 	elf_auxargs->phdr = proghdr;
 	elf_auxargs->phent = hdr->e_phentsize;
 	elf_auxargs->phnum = hdr->e_phnum;
 	elf_auxargs->pagesz = PAGE_SIZE;
 	elf_auxargs->base = addr;
 	elf_auxargs->flags = 0;
 	elf_auxargs->entry = entry;
 	elf_auxargs->hdr_eflags = hdr->e_flags;
 
 	imgp->auxargs = elf_auxargs;
 	imgp->interpreted = 0;
 	imgp->reloc_base = addr;
 	imgp->proc->p_osrel = osrel;
 	imgp->proc->p_fctl0 = fctl0;
 	imgp->proc->p_elf_machine = hdr->e_machine;
 	imgp->proc->p_elf_flags = hdr->e_flags;
 
 ret:
 	free(interp_buf, M_TEMP);
 	return (error);
 }
 
 #define	suword __CONCAT(suword, __ELF_WORD_SIZE)
 
 int
 __elfN(freebsd_fixup)(register_t **stack_base, struct image_params *imgp)
 {
 	Elf_Auxargs *args = (Elf_Auxargs *)imgp->auxargs;
 	Elf_Auxinfo *argarray, *pos;
 	Elf_Addr *base, *auxbase;
 	int error;
 
 	base = (Elf_Addr *)*stack_base;
 	auxbase = base + imgp->args->argc + 1 + imgp->args->envc + 1;
 	argarray = pos = malloc(AT_COUNT * sizeof(*pos), M_TEMP,
 	    M_WAITOK | M_ZERO);
 
 	if (args->execfd != -1)
 		AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd);
 	AUXARGS_ENTRY(pos, AT_PHDR, args->phdr);
 	AUXARGS_ENTRY(pos, AT_PHENT, args->phent);
 	AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum);
 	AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz);
 	AUXARGS_ENTRY(pos, AT_FLAGS, args->flags);
 	AUXARGS_ENTRY(pos, AT_ENTRY, args->entry);
 	AUXARGS_ENTRY(pos, AT_BASE, args->base);
 	AUXARGS_ENTRY(pos, AT_EHDRFLAGS, args->hdr_eflags);
 	if (imgp->execpathp != 0)
 		AUXARGS_ENTRY(pos, AT_EXECPATH, imgp->execpathp);
 	AUXARGS_ENTRY(pos, AT_OSRELDATE,
 	    imgp->proc->p_ucred->cr_prison->pr_osreldate);
 	if (imgp->canary != 0) {
 		AUXARGS_ENTRY(pos, AT_CANARY, imgp->canary);
 		AUXARGS_ENTRY(pos, AT_CANARYLEN, imgp->canarylen);
 	}
 	AUXARGS_ENTRY(pos, AT_NCPUS, mp_ncpus);
 	if (imgp->pagesizes != 0) {
 		AUXARGS_ENTRY(pos, AT_PAGESIZES, imgp->pagesizes);
 		AUXARGS_ENTRY(pos, AT_PAGESIZESLEN, imgp->pagesizeslen);
 	}
 	if (imgp->sysent->sv_timekeep_base != 0) {
 		AUXARGS_ENTRY(pos, AT_TIMEKEEP,
 		    imgp->sysent->sv_timekeep_base);
 	}
 	AUXARGS_ENTRY(pos, AT_STACKPROT, imgp->sysent->sv_shared_page_obj
 	    != NULL && imgp->stack_prot != 0 ? imgp->stack_prot :
 	    imgp->sysent->sv_stackprot);
 	if (imgp->sysent->sv_hwcap != NULL)
 		AUXARGS_ENTRY(pos, AT_HWCAP, *imgp->sysent->sv_hwcap);
 	if (imgp->sysent->sv_hwcap2 != NULL)
 		AUXARGS_ENTRY(pos, AT_HWCAP2, *imgp->sysent->sv_hwcap2);
 	AUXARGS_ENTRY(pos, AT_NULL, 0);
 
 	free(imgp->auxargs, M_TEMP);
 	imgp->auxargs = NULL;
 	KASSERT(pos - argarray <= AT_COUNT, ("Too many auxargs"));
 
 	error = copyout(argarray, auxbase, sizeof(*argarray) * AT_COUNT);
 	free(argarray, M_TEMP);
 	if (error != 0)
 		return (error);
 
 	base--;
 	if (suword(base, imgp->args->argc) == -1)
 		return (EFAULT);
 	*stack_base = (register_t *)base;
 	return (0);
 }
 
 /*
  * Code for generating ELF core dumps.
  */
 
 typedef void (*segment_callback)(vm_map_entry_t, void *);
 
 /* Closure for cb_put_phdr(). */
 struct phdr_closure {
 	Elf_Phdr *phdr;		/* Program header to fill in */
 	Elf_Off offset;		/* Offset of segment in core file */
 };
 
 /* Closure for cb_size_segment(). */
 struct sseg_closure {
 	int count;		/* Count of writable segments. */
 	size_t size;		/* Total size of all writable segments. */
 };
 
 typedef void (*outfunc_t)(void *, struct sbuf *, size_t *);
 
 struct note_info {
 	int		type;		/* Note type. */
 	outfunc_t 	outfunc; 	/* Output function. */
 	void		*outarg;	/* Argument for the output function. */
 	size_t		outsize;	/* Output size. */
 	TAILQ_ENTRY(note_info) link;	/* Link to the next note info. */
 };
 
 TAILQ_HEAD(note_info_list, note_info);
 
 /* Coredump output parameters. */
 struct coredump_params {
 	off_t		offset;
 	struct ucred	*active_cred;
 	struct ucred	*file_cred;
 	struct thread	*td;
 	struct vnode	*vp;
 	struct compressor *comp;
 };
 
 extern int compress_user_cores;
 extern int compress_user_cores_level;
 
 static void cb_put_phdr(vm_map_entry_t, void *);
 static void cb_size_segment(vm_map_entry_t, void *);
 static int core_write(struct coredump_params *, const void *, size_t, off_t,
     enum uio_seg);
 static void each_dumpable_segment(struct thread *, segment_callback, void *);
 static int __elfN(corehdr)(struct coredump_params *, int, void *, size_t,
     struct note_info_list *, size_t);
 static void __elfN(prepare_notes)(struct thread *, struct note_info_list *,
     size_t *);
 static void __elfN(puthdr)(struct thread *, void *, size_t, int, size_t);
 static void __elfN(putnote)(struct note_info *, struct sbuf *);
 static size_t register_note(struct note_info_list *, int, outfunc_t, void *);
 static int sbuf_drain_core_output(void *, const char *, int);
 static int sbuf_drain_count(void *arg, const char *data, int len);
 
 static void __elfN(note_fpregset)(void *, struct sbuf *, size_t *);
 static void __elfN(note_prpsinfo)(void *, struct sbuf *, size_t *);
 static void __elfN(note_prstatus)(void *, struct sbuf *, size_t *);
 static void __elfN(note_threadmd)(void *, struct sbuf *, size_t *);
 static void __elfN(note_thrmisc)(void *, struct sbuf *, size_t *);
 static void __elfN(note_ptlwpinfo)(void *, struct sbuf *, size_t *);
 static void __elfN(note_procstat_auxv)(void *, struct sbuf *, size_t *);
 static void __elfN(note_procstat_proc)(void *, struct sbuf *, size_t *);
 static void __elfN(note_procstat_psstrings)(void *, struct sbuf *, size_t *);
 static void note_procstat_files(void *, struct sbuf *, size_t *);
 static void note_procstat_groups(void *, struct sbuf *, size_t *);
 static void note_procstat_osrel(void *, struct sbuf *, size_t *);
 static void note_procstat_rlimit(void *, struct sbuf *, size_t *);
 static void note_procstat_umask(void *, struct sbuf *, size_t *);
 static void note_procstat_vmmap(void *, struct sbuf *, size_t *);
 
 /*
  * Write out a core segment to the compression stream.
  */
 static int
 compress_chunk(struct coredump_params *p, char *base, char *buf, u_int len)
 {
 	u_int chunk_len;
 	int error;
 
 	while (len > 0) {
 		chunk_len = MIN(len, CORE_BUF_SIZE);
 
 		/*
 		 * We can get EFAULT error here.
 		 * In that case zero out the current chunk of the segment.
 		 */
 		error = copyin(base, buf, chunk_len);
 		if (error != 0)
 			bzero(buf, chunk_len);
 		error = compressor_write(p->comp, buf, chunk_len);
 		if (error != 0)
 			break;
 		base += chunk_len;
 		len -= chunk_len;
 	}
 	return (error);
 }
 
 static int
 core_compressed_write(void *base, size_t len, off_t offset, void *arg)
 {
 
 	return (core_write((struct coredump_params *)arg, base, len, offset,
 	    UIO_SYSSPACE));
 }
 
 static int
 core_write(struct coredump_params *p, const void *base, size_t len,
     off_t offset, enum uio_seg seg)
 {
 
 	return (vn_rdwr_inchunks(UIO_WRITE, p->vp, __DECONST(void *, base),
 	    len, offset, seg, IO_UNIT | IO_DIRECT | IO_RANGELOCKED,
 	    p->active_cred, p->file_cred, NULL, p->td));
 }
 
 static int
 core_output(void *base, size_t len, off_t offset, struct coredump_params *p,
     void *tmpbuf)
 {
 	int error;
 
 	if (p->comp != NULL)
 		return (compress_chunk(p, base, tmpbuf, len));
 
 	/*
 	 * EFAULT is a non-fatal error that we can get, for example,
 	 * if the segment is backed by a file but extends beyond its
 	 * end.
 	 */
 	error = core_write(p, base, len, offset, UIO_USERSPACE);
 	if (error == EFAULT) {
 		log(LOG_WARNING, "Failed to fully fault in a core file segment "
 		    "at VA %p with size 0x%zx to be written at offset 0x%jx "
 		    "for process %s\n", base, len, offset, curproc->p_comm);
 
 		/*
 		 * Write a "real" zero byte at the end of the target region
 		 * in the case this is the last segment.
 		 * The intermediate space will be implicitly zero-filled.
 		 */
 		error = core_write(p, zero_region, 1, offset + len - 1,
 		    UIO_SYSSPACE);
 	}
 	return (error);
 }
 
 /*
  * Drain into a core file.
  */
 static int
 sbuf_drain_core_output(void *arg, const char *data, int len)
 {
 	struct coredump_params *p;
 	int error, locked;
 
 	p = (struct coredump_params *)arg;
 
 	/*
 	 * Some kern_proc out routines that print to this sbuf may
 	 * call us with the process lock held. Draining with the
 	 * non-sleepable lock held is unsafe. The lock is needed for
 	 * those routines when dumping a live process. In our case we
 	 * can safely release the lock before draining and acquire
 	 * again after.
 	 */
 	locked = PROC_LOCKED(p->td->td_proc);
 	if (locked)
 		PROC_UNLOCK(p->td->td_proc);
 	if (p->comp != NULL)
 		error = compressor_write(p->comp, __DECONST(char *, data), len);
 	else
 		error = core_write(p, __DECONST(void *, data), len, p->offset,
 		    UIO_SYSSPACE);
 	if (locked)
 		PROC_LOCK(p->td->td_proc);
 	if (error != 0)
 		return (-error);
 	p->offset += len;
 	return (len);
 }
 
 /*
  * Drain into a counter.
  */
 static int
 sbuf_drain_count(void *arg, const char *data __unused, int len)
 {
 	size_t *sizep;
 
 	sizep = (size_t *)arg;
 	*sizep += len;
 	return (len);
 }
 
 int
 __elfN(coredump)(struct thread *td, struct vnode *vp, off_t limit, int flags)
 {
 	struct ucred *cred = td->td_ucred;
 	int error = 0;
 	struct sseg_closure seginfo;
 	struct note_info_list notelst;
 	struct coredump_params params;
 	struct note_info *ninfo;
 	void *hdr, *tmpbuf;
 	size_t hdrsize, notesz, coresize;
 
 	hdr = NULL;
 	tmpbuf = NULL;
 	TAILQ_INIT(&notelst);
 
 	/* Size the program segments. */
 	seginfo.count = 0;
 	seginfo.size = 0;
 	each_dumpable_segment(td, cb_size_segment, &seginfo);
 
 	/*
 	 * Collect info about the core file header area.
 	 */
 	hdrsize = sizeof(Elf_Ehdr) + sizeof(Elf_Phdr) * (1 + seginfo.count);
 	if (seginfo.count + 1 >= PN_XNUM)
 		hdrsize += sizeof(Elf_Shdr);
 	__elfN(prepare_notes)(td, &notelst, &notesz);
 	coresize = round_page(hdrsize + notesz) + seginfo.size;
 
 	/* Set up core dump parameters. */
 	params.offset = 0;
 	params.active_cred = cred;
 	params.file_cred = NOCRED;
 	params.td = td;
 	params.vp = vp;
 	params.comp = NULL;
 
 #ifdef RACCT
 	if (racct_enable) {
 		PROC_LOCK(td->td_proc);
 		error = racct_add(td->td_proc, RACCT_CORE, coresize);
 		PROC_UNLOCK(td->td_proc);
 		if (error != 0) {
 			error = EFAULT;
 			goto done;
 		}
 	}
 #endif
 	if (coresize >= limit) {
 		error = EFAULT;
 		goto done;
 	}
 
 	/* Create a compression stream if necessary. */
 	if (compress_user_cores != 0) {
 		params.comp = compressor_init(core_compressed_write,
 		    compress_user_cores, CORE_BUF_SIZE,
 		    compress_user_cores_level, &params);
 		if (params.comp == NULL) {
 			error = EFAULT;
 			goto done;
 		}
 		tmpbuf = malloc(CORE_BUF_SIZE, M_TEMP, M_WAITOK | M_ZERO);
         }
 
 	/*
 	 * Allocate memory for building the header, fill it up,
 	 * and write it out following the notes.
 	 */
 	hdr = malloc(hdrsize, M_TEMP, M_WAITOK);
 	error = __elfN(corehdr)(&params, seginfo.count, hdr, hdrsize, &notelst,
 	    notesz);
 
 	/* Write the contents of all of the writable segments. */
 	if (error == 0) {
 		Elf_Phdr *php;
 		off_t offset;
 		int i;
 
 		php = (Elf_Phdr *)((char *)hdr + sizeof(Elf_Ehdr)) + 1;
 		offset = round_page(hdrsize + notesz);
 		for (i = 0; i < seginfo.count; i++) {
 			error = core_output((caddr_t)(uintptr_t)php->p_vaddr,
 			    php->p_filesz, offset, &params, tmpbuf);
 			if (error != 0)
 				break;
 			offset += php->p_filesz;
 			php++;
 		}
 		if (error == 0 && params.comp != NULL)
 			error = compressor_flush(params.comp);
 	}
 	if (error) {
 		log(LOG_WARNING,
 		    "Failed to write core file for process %s (error %d)\n",
 		    curproc->p_comm, error);
 	}
 
 done:
 	free(tmpbuf, M_TEMP);
 	if (params.comp != NULL)
 		compressor_fini(params.comp);
 	while ((ninfo = TAILQ_FIRST(&notelst)) != NULL) {
 		TAILQ_REMOVE(&notelst, ninfo, link);
 		free(ninfo, M_TEMP);
 	}
 	if (hdr != NULL)
 		free(hdr, M_TEMP);
 
 	return (error);
 }
 
 /*
  * A callback for each_dumpable_segment() to write out the segment's
  * program header entry.
  */
 static void
 cb_put_phdr(vm_map_entry_t entry, void *closure)
 {
 	struct phdr_closure *phc = (struct phdr_closure *)closure;
 	Elf_Phdr *phdr = phc->phdr;
 
 	phc->offset = round_page(phc->offset);
 
 	phdr->p_type = PT_LOAD;
 	phdr->p_offset = phc->offset;
 	phdr->p_vaddr = entry->start;
 	phdr->p_paddr = 0;
 	phdr->p_filesz = phdr->p_memsz = entry->end - entry->start;
 	phdr->p_align = PAGE_SIZE;
 	phdr->p_flags = __elfN(untrans_prot)(entry->protection);
 
 	phc->offset += phdr->p_filesz;
 	phc->phdr++;
 }
 
 /*
  * A callback for each_dumpable_segment() to gather information about
  * the number of segments and their total size.
  */
 static void
 cb_size_segment(vm_map_entry_t entry, void *closure)
 {
 	struct sseg_closure *ssc = (struct sseg_closure *)closure;
 
 	ssc->count++;
 	ssc->size += entry->end - entry->start;
 }
 
 /*
  * For each writable segment in the process's memory map, call the given
  * function with a pointer to the map entry and some arbitrary
  * caller-supplied data.
  */
 static void
 each_dumpable_segment(struct thread *td, segment_callback func, void *closure)
 {
 	struct proc *p = td->td_proc;
 	vm_map_t map = &p->p_vmspace->vm_map;
 	vm_map_entry_t entry;
 	vm_object_t backing_object, object;
 	boolean_t ignore_entry;
 
 	vm_map_lock_read(map);
 	for (entry = map->header.next; entry != &map->header;
 	    entry = entry->next) {
 		/*
 		 * Don't dump inaccessible mappings, deal with legacy
 		 * coredump mode.
 		 *
 		 * Note that read-only segments related to the elf binary
 		 * are marked MAP_ENTRY_NOCOREDUMP now so we no longer
 		 * need to arbitrarily ignore such segments.
 		 */
 		if (elf_legacy_coredump) {
 			if ((entry->protection & VM_PROT_RW) != VM_PROT_RW)
 				continue;
 		} else {
 			if ((entry->protection & VM_PROT_ALL) == 0)
 				continue;
 		}
 
 		/*
 		 * Dont include memory segment in the coredump if
 		 * MAP_NOCORE is set in mmap(2) or MADV_NOCORE in
 		 * madvise(2).  Do not dump submaps (i.e. parts of the
 		 * kernel map).
 		 */
 		if (entry->eflags & (MAP_ENTRY_NOCOREDUMP|MAP_ENTRY_IS_SUB_MAP))
 			continue;
 
 		if ((object = entry->object.vm_object) == NULL)
 			continue;
 
 		/* Ignore memory-mapped devices and such things. */
 		VM_OBJECT_RLOCK(object);
 		while ((backing_object = object->backing_object) != NULL) {
 			VM_OBJECT_RLOCK(backing_object);
 			VM_OBJECT_RUNLOCK(object);
 			object = backing_object;
 		}
 		ignore_entry = object->type != OBJT_DEFAULT &&
 		    object->type != OBJT_SWAP && object->type != OBJT_VNODE &&
 		    object->type != OBJT_PHYS;
 		VM_OBJECT_RUNLOCK(object);
 		if (ignore_entry)
 			continue;
 
 		(*func)(entry, closure);
 	}
 	vm_map_unlock_read(map);
 }
 
 /*
  * Write the core file header to the file, including padding up to
  * the page boundary.
  */
 static int
 __elfN(corehdr)(struct coredump_params *p, int numsegs, void *hdr,
     size_t hdrsize, struct note_info_list *notelst, size_t notesz)
 {
 	struct note_info *ninfo;
 	struct sbuf *sb;
 	int error;
 
 	/* Fill in the header. */
 	bzero(hdr, hdrsize);
 	__elfN(puthdr)(p->td, hdr, hdrsize, numsegs, notesz);
 
 	sb = sbuf_new(NULL, NULL, CORE_BUF_SIZE, SBUF_FIXEDLEN);
 	sbuf_set_drain(sb, sbuf_drain_core_output, p);
 	sbuf_start_section(sb, NULL);
 	sbuf_bcat(sb, hdr, hdrsize);
 	TAILQ_FOREACH(ninfo, notelst, link)
 	    __elfN(putnote)(ninfo, sb);
 	/* Align up to a page boundary for the program segments. */
 	sbuf_end_section(sb, -1, PAGE_SIZE, 0);
 	error = sbuf_finish(sb);
 	sbuf_delete(sb);
 
 	return (error);
 }
 
 static void
 __elfN(prepare_notes)(struct thread *td, struct note_info_list *list,
     size_t *sizep)
 {
 	struct proc *p;
 	struct thread *thr;
 	size_t size;
 
 	p = td->td_proc;
 	size = 0;
 
 	size += register_note(list, NT_PRPSINFO, __elfN(note_prpsinfo), p);
 
 	/*
 	 * To have the debugger select the right thread (LWP) as the initial
 	 * thread, we dump the state of the thread passed to us in td first.
 	 * This is the thread that causes the core dump and thus likely to
 	 * be the right thread one wants to have selected in the debugger.
 	 */
 	thr = td;
 	while (thr != NULL) {
 		size += register_note(list, NT_PRSTATUS,
 		    __elfN(note_prstatus), thr);
 		size += register_note(list, NT_FPREGSET,
 		    __elfN(note_fpregset), thr);
 		size += register_note(list, NT_THRMISC,
 		    __elfN(note_thrmisc), thr);
 		size += register_note(list, NT_PTLWPINFO,
 		    __elfN(note_ptlwpinfo), thr);
 		size += register_note(list, -1,
 		    __elfN(note_threadmd), thr);
 
 		thr = (thr == td) ? TAILQ_FIRST(&p->p_threads) :
 		    TAILQ_NEXT(thr, td_plist);
 		if (thr == td)
 			thr = TAILQ_NEXT(thr, td_plist);
 	}
 
 	size += register_note(list, NT_PROCSTAT_PROC,
 	    __elfN(note_procstat_proc), p);
 	size += register_note(list, NT_PROCSTAT_FILES,
 	    note_procstat_files, p);
 	size += register_note(list, NT_PROCSTAT_VMMAP,
 	    note_procstat_vmmap, p);
 	size += register_note(list, NT_PROCSTAT_GROUPS,
 	    note_procstat_groups, p);
 	size += register_note(list, NT_PROCSTAT_UMASK,
 	    note_procstat_umask, p);
 	size += register_note(list, NT_PROCSTAT_RLIMIT,
 	    note_procstat_rlimit, p);
 	size += register_note(list, NT_PROCSTAT_OSREL,
 	    note_procstat_osrel, p);
 	size += register_note(list, NT_PROCSTAT_PSSTRINGS,
 	    __elfN(note_procstat_psstrings), p);
 	size += register_note(list, NT_PROCSTAT_AUXV,
 	    __elfN(note_procstat_auxv), p);
 
 	*sizep = size;
 }
 
 static void
 __elfN(puthdr)(struct thread *td, void *hdr, size_t hdrsize, int numsegs,
     size_t notesz)
 {
 	Elf_Ehdr *ehdr;
 	Elf_Phdr *phdr;
 	Elf_Shdr *shdr;
 	struct phdr_closure phc;
 
 	ehdr = (Elf_Ehdr *)hdr;
 
 	ehdr->e_ident[EI_MAG0] = ELFMAG0;
 	ehdr->e_ident[EI_MAG1] = ELFMAG1;
 	ehdr->e_ident[EI_MAG2] = ELFMAG2;
 	ehdr->e_ident[EI_MAG3] = ELFMAG3;
 	ehdr->e_ident[EI_CLASS] = ELF_CLASS;
 	ehdr->e_ident[EI_DATA] = ELF_DATA;
 	ehdr->e_ident[EI_VERSION] = EV_CURRENT;
 	ehdr->e_ident[EI_OSABI] = ELFOSABI_FREEBSD;
 	ehdr->e_ident[EI_ABIVERSION] = 0;
 	ehdr->e_ident[EI_PAD] = 0;
 	ehdr->e_type = ET_CORE;
 	ehdr->e_machine = td->td_proc->p_elf_machine;
 	ehdr->e_version = EV_CURRENT;
 	ehdr->e_entry = 0;
 	ehdr->e_phoff = sizeof(Elf_Ehdr);
 	ehdr->e_flags = td->td_proc->p_elf_flags;
 	ehdr->e_ehsize = sizeof(Elf_Ehdr);
 	ehdr->e_phentsize = sizeof(Elf_Phdr);
 	ehdr->e_shentsize = sizeof(Elf_Shdr);
 	ehdr->e_shstrndx = SHN_UNDEF;
 	if (numsegs + 1 < PN_XNUM) {
 		ehdr->e_phnum = numsegs + 1;
 		ehdr->e_shnum = 0;
 	} else {
 		ehdr->e_phnum = PN_XNUM;
 		ehdr->e_shnum = 1;
 
 		ehdr->e_shoff = ehdr->e_phoff +
 		    (numsegs + 1) * ehdr->e_phentsize;
 		KASSERT(ehdr->e_shoff == hdrsize - sizeof(Elf_Shdr),
 		    ("e_shoff: %zu, hdrsize - shdr: %zu",
 		     (size_t)ehdr->e_shoff, hdrsize - sizeof(Elf_Shdr)));
 
 		shdr = (Elf_Shdr *)((char *)hdr + ehdr->e_shoff);
 		memset(shdr, 0, sizeof(*shdr));
 		/*
 		 * A special first section is used to hold large segment and
 		 * section counts.  This was proposed by Sun Microsystems in
 		 * Solaris and has been adopted by Linux; the standard ELF
 		 * tools are already familiar with the technique.
 		 *
 		 * See table 7-7 of the Solaris "Linker and Libraries Guide"
 		 * (or 12-7 depending on the version of the document) for more
 		 * details.
 		 */
 		shdr->sh_type = SHT_NULL;
 		shdr->sh_size = ehdr->e_shnum;
 		shdr->sh_link = ehdr->e_shstrndx;
 		shdr->sh_info = numsegs + 1;
 	}
 
 	/*
 	 * Fill in the program header entries.
 	 */
 	phdr = (Elf_Phdr *)((char *)hdr + ehdr->e_phoff);
 
 	/* The note segement. */
 	phdr->p_type = PT_NOTE;
 	phdr->p_offset = hdrsize;
 	phdr->p_vaddr = 0;
 	phdr->p_paddr = 0;
 	phdr->p_filesz = notesz;
 	phdr->p_memsz = 0;
 	phdr->p_flags = PF_R;
 	phdr->p_align = ELF_NOTE_ROUNDSIZE;
 	phdr++;
 
 	/* All the writable segments from the program. */
 	phc.phdr = phdr;
 	phc.offset = round_page(hdrsize + notesz);
 	each_dumpable_segment(td, cb_put_phdr, &phc);
 }
 
 static size_t
 register_note(struct note_info_list *list, int type, outfunc_t out, void *arg)
 {
 	struct note_info *ninfo;
 	size_t size, notesize;
 
 	size = 0;
 	out(arg, NULL, &size);
 	ninfo = malloc(sizeof(*ninfo), M_TEMP, M_ZERO | M_WAITOK);
 	ninfo->type = type;
 	ninfo->outfunc = out;
 	ninfo->outarg = arg;
 	ninfo->outsize = size;
 	TAILQ_INSERT_TAIL(list, ninfo, link);
 
 	if (type == -1)
 		return (size);
 
 	notesize = sizeof(Elf_Note) +		/* note header */
 	    roundup2(sizeof(FREEBSD_ABI_VENDOR), ELF_NOTE_ROUNDSIZE) +
 						/* note name */
 	    roundup2(size, ELF_NOTE_ROUNDSIZE);	/* note description */
 
 	return (notesize);
 }
 
 static size_t
 append_note_data(const void *src, void *dst, size_t len)
 {
 	size_t padded_len;
 
 	padded_len = roundup2(len, ELF_NOTE_ROUNDSIZE);
 	if (dst != NULL) {
 		bcopy(src, dst, len);
 		bzero((char *)dst + len, padded_len - len);
 	}
 	return (padded_len);
 }
 
 size_t
 __elfN(populate_note)(int type, void *src, void *dst, size_t size, void **descp)
 {
 	Elf_Note *note;
 	char *buf;
 	size_t notesize;
 
 	buf = dst;
 	if (buf != NULL) {
 		note = (Elf_Note *)buf;
 		note->n_namesz = sizeof(FREEBSD_ABI_VENDOR);
 		note->n_descsz = size;
 		note->n_type = type;
 		buf += sizeof(*note);
 		buf += append_note_data(FREEBSD_ABI_VENDOR, buf,
 		    sizeof(FREEBSD_ABI_VENDOR));
 		append_note_data(src, buf, size);
 		if (descp != NULL)
 			*descp = buf;
 	}
 
 	notesize = sizeof(Elf_Note) +		/* note header */
 	    roundup2(sizeof(FREEBSD_ABI_VENDOR), ELF_NOTE_ROUNDSIZE) +
 						/* note name */
 	    roundup2(size, ELF_NOTE_ROUNDSIZE);	/* note description */
 
 	return (notesize);
 }
 
 static void
 __elfN(putnote)(struct note_info *ninfo, struct sbuf *sb)
 {
 	Elf_Note note;
 	ssize_t old_len, sect_len;
 	size_t new_len, descsz, i;
 
 	if (ninfo->type == -1) {
 		ninfo->outfunc(ninfo->outarg, sb, &ninfo->outsize);
 		return;
 	}
 
 	note.n_namesz = sizeof(FREEBSD_ABI_VENDOR);
 	note.n_descsz = ninfo->outsize;
 	note.n_type = ninfo->type;
 
 	sbuf_bcat(sb, &note, sizeof(note));
 	sbuf_start_section(sb, &old_len);
 	sbuf_bcat(sb, FREEBSD_ABI_VENDOR, sizeof(FREEBSD_ABI_VENDOR));
 	sbuf_end_section(sb, old_len, ELF_NOTE_ROUNDSIZE, 0);
 	if (note.n_descsz == 0)
 		return;
 	sbuf_start_section(sb, &old_len);
 	ninfo->outfunc(ninfo->outarg, sb, &ninfo->outsize);
 	sect_len = sbuf_end_section(sb, old_len, ELF_NOTE_ROUNDSIZE, 0);
 	if (sect_len < 0)
 		return;
 
 	new_len = (size_t)sect_len;
 	descsz = roundup(note.n_descsz, ELF_NOTE_ROUNDSIZE);
 	if (new_len < descsz) {
 		/*
 		 * It is expected that individual note emitters will correctly
 		 * predict their expected output size and fill up to that size
 		 * themselves, padding in a format-specific way if needed.
 		 * However, in case they don't, just do it here with zeros.
 		 */
 		for (i = 0; i < descsz - new_len; i++)
 			sbuf_putc(sb, 0);
 	} else if (new_len > descsz) {
 		/*
 		 * We can't always truncate sb -- we may have drained some
 		 * of it already.
 		 */
 		KASSERT(new_len == descsz, ("%s: Note type %u changed as we "
 		    "read it (%zu > %zu).  Since it is longer than "
 		    "expected, this coredump's notes are corrupt.  THIS "
 		    "IS A BUG in the note_procstat routine for type %u.\n",
 		    __func__, (unsigned)note.n_type, new_len, descsz,
 		    (unsigned)note.n_type));
 	}
 }
 
 /*
  * Miscellaneous note out functions.
  */
 
 #if defined(COMPAT_FREEBSD32) && __ELF_WORD_SIZE == 32
 #include <compat/freebsd32/freebsd32.h>
 #include <compat/freebsd32/freebsd32_signal.h>
 
 typedef struct prstatus32 elf_prstatus_t;
 typedef struct prpsinfo32 elf_prpsinfo_t;
 typedef struct fpreg32 elf_prfpregset_t;
 typedef struct fpreg32 elf_fpregset_t;
 typedef struct reg32 elf_gregset_t;
 typedef struct thrmisc32 elf_thrmisc_t;
 #define ELF_KERN_PROC_MASK	KERN_PROC_MASK32
 typedef struct kinfo_proc32 elf_kinfo_proc_t;
 typedef uint32_t elf_ps_strings_t;
 #else
 typedef prstatus_t elf_prstatus_t;
 typedef prpsinfo_t elf_prpsinfo_t;
 typedef prfpregset_t elf_prfpregset_t;
 typedef prfpregset_t elf_fpregset_t;
 typedef gregset_t elf_gregset_t;
 typedef thrmisc_t elf_thrmisc_t;
 #define ELF_KERN_PROC_MASK	0
 typedef struct kinfo_proc elf_kinfo_proc_t;
 typedef vm_offset_t elf_ps_strings_t;
 #endif
 
 static void
 __elfN(note_prpsinfo)(void *arg, struct sbuf *sb, size_t *sizep)
 {
 	struct sbuf sbarg;
 	size_t len;
 	char *cp, *end;
 	struct proc *p;
 	elf_prpsinfo_t *psinfo;
 	int error;
 
 	p = (struct proc *)arg;
 	if (sb != NULL) {
 		KASSERT(*sizep == sizeof(*psinfo), ("invalid size"));
 		psinfo = malloc(sizeof(*psinfo), M_TEMP, M_ZERO | M_WAITOK);
 		psinfo->pr_version = PRPSINFO_VERSION;
 		psinfo->pr_psinfosz = sizeof(elf_prpsinfo_t);
 		strlcpy(psinfo->pr_fname, p->p_comm, sizeof(psinfo->pr_fname));
 		PROC_LOCK(p);
 		if (p->p_args != NULL) {
 			len = sizeof(psinfo->pr_psargs) - 1;
 			if (len > p->p_args->ar_length)
 				len = p->p_args->ar_length;
 			memcpy(psinfo->pr_psargs, p->p_args->ar_args, len);
 			PROC_UNLOCK(p);
 			error = 0;
 		} else {
 			_PHOLD(p);
 			PROC_UNLOCK(p);
 			sbuf_new(&sbarg, psinfo->pr_psargs,
 			    sizeof(psinfo->pr_psargs), SBUF_FIXEDLEN);
 			error = proc_getargv(curthread, p, &sbarg);
 			PRELE(p);
 			if (sbuf_finish(&sbarg) == 0)
 				len = sbuf_len(&sbarg) - 1;
 			else
 				len = sizeof(psinfo->pr_psargs) - 1;
 			sbuf_delete(&sbarg);
 		}
 		if (error || len == 0)
 			strlcpy(psinfo->pr_psargs, p->p_comm,
 			    sizeof(psinfo->pr_psargs));
 		else {
 			KASSERT(len < sizeof(psinfo->pr_psargs),
 			    ("len is too long: %zu vs %zu", len,
 			    sizeof(psinfo->pr_psargs)));
 			cp = psinfo->pr_psargs;
 			end = cp + len - 1;
 			for (;;) {
 				cp = memchr(cp, '\0', end - cp);
 				if (cp == NULL)
 					break;
 				*cp = ' ';
 			}
 		}
 		psinfo->pr_pid = p->p_pid;
 		sbuf_bcat(sb, psinfo, sizeof(*psinfo));
 		free(psinfo, M_TEMP);
 	}
 	*sizep = sizeof(*psinfo);
 }
 
 static void
 __elfN(note_prstatus)(void *arg, struct sbuf *sb, size_t *sizep)
 {
 	struct thread *td;
 	elf_prstatus_t *status;
 
 	td = (struct thread *)arg;
 	if (sb != NULL) {
 		KASSERT(*sizep == sizeof(*status), ("invalid size"));
 		status = malloc(sizeof(*status), M_TEMP, M_ZERO | M_WAITOK);
 		status->pr_version = PRSTATUS_VERSION;
 		status->pr_statussz = sizeof(elf_prstatus_t);
 		status->pr_gregsetsz = sizeof(elf_gregset_t);
 		status->pr_fpregsetsz = sizeof(elf_fpregset_t);
 		status->pr_osreldate = osreldate;
 		status->pr_cursig = td->td_proc->p_sig;
 		status->pr_pid = td->td_tid;
 #if defined(COMPAT_FREEBSD32) && __ELF_WORD_SIZE == 32
 		fill_regs32(td, &status->pr_reg);
 #else
 		fill_regs(td, &status->pr_reg);
 #endif
 		sbuf_bcat(sb, status, sizeof(*status));
 		free(status, M_TEMP);
 	}
 	*sizep = sizeof(*status);
 }
 
 static void
 __elfN(note_fpregset)(void *arg, struct sbuf *sb, size_t *sizep)
 {
 	struct thread *td;
 	elf_prfpregset_t *fpregset;
 
 	td = (struct thread *)arg;
 	if (sb != NULL) {
 		KASSERT(*sizep == sizeof(*fpregset), ("invalid size"));
 		fpregset = malloc(sizeof(*fpregset), M_TEMP, M_ZERO | M_WAITOK);
 #if defined(COMPAT_FREEBSD32) && __ELF_WORD_SIZE == 32
 		fill_fpregs32(td, fpregset);
 #else
 		fill_fpregs(td, fpregset);
 #endif
 		sbuf_bcat(sb, fpregset, sizeof(*fpregset));
 		free(fpregset, M_TEMP);
 	}
 	*sizep = sizeof(*fpregset);
 }
 
 static void
 __elfN(note_thrmisc)(void *arg, struct sbuf *sb, size_t *sizep)
 {
 	struct thread *td;
 	elf_thrmisc_t thrmisc;
 
 	td = (struct thread *)arg;
 	if (sb != NULL) {
 		KASSERT(*sizep == sizeof(thrmisc), ("invalid size"));
 		bzero(&thrmisc._pad, sizeof(thrmisc._pad));
 		strcpy(thrmisc.pr_tname, td->td_name);
 		sbuf_bcat(sb, &thrmisc, sizeof(thrmisc));
 	}
 	*sizep = sizeof(thrmisc);
 }
 
 static void
 __elfN(note_ptlwpinfo)(void *arg, struct sbuf *sb, size_t *sizep)
 {
 	struct thread *td;
 	size_t size;
 	int structsize;
 #if defined(COMPAT_FREEBSD32) && __ELF_WORD_SIZE == 32
 	struct ptrace_lwpinfo32 pl;
 #else
 	struct ptrace_lwpinfo pl;
 #endif
 
 	td = (struct thread *)arg;
 	size = sizeof(structsize) + sizeof(pl);
 	if (sb != NULL) {
 		KASSERT(*sizep == size, ("invalid size"));
 		structsize = sizeof(pl);
 		sbuf_bcat(sb, &structsize, sizeof(structsize));
 		bzero(&pl, sizeof(pl));
 		pl.pl_lwpid = td->td_tid;
 		pl.pl_event = PL_EVENT_NONE;
 		pl.pl_sigmask = td->td_sigmask;
 		pl.pl_siglist = td->td_siglist;
 		if (td->td_si.si_signo != 0) {
 			pl.pl_event = PL_EVENT_SIGNAL;
 			pl.pl_flags |= PL_FLAG_SI;
 #if defined(COMPAT_FREEBSD32) && __ELF_WORD_SIZE == 32
 			siginfo_to_siginfo32(&td->td_si, &pl.pl_siginfo);
 #else
 			pl.pl_siginfo = td->td_si;
 #endif
 		}
 		strcpy(pl.pl_tdname, td->td_name);
 		/* XXX TODO: supply more information in struct ptrace_lwpinfo*/
 		sbuf_bcat(sb, &pl, sizeof(pl));
 	}
 	*sizep = size;
 }
 
 /*
  * Allow for MD specific notes, as well as any MD
  * specific preparations for writing MI notes.
  */
 static void
 __elfN(note_threadmd)(void *arg, struct sbuf *sb, size_t *sizep)
 {
 	struct thread *td;
 	void *buf;
 	size_t size;
 
 	td = (struct thread *)arg;
 	size = *sizep;
 	if (size != 0 && sb != NULL)
 		buf = malloc(size, M_TEMP, M_ZERO | M_WAITOK);
 	else
 		buf = NULL;
 	size = 0;
 	__elfN(dump_thread)(td, buf, &size);
 	KASSERT(sb == NULL || *sizep == size, ("invalid size"));
 	if (size != 0 && sb != NULL)
 		sbuf_bcat(sb, buf, size);
 	free(buf, M_TEMP);
 	*sizep = size;
 }
 
 #ifdef KINFO_PROC_SIZE
 CTASSERT(sizeof(struct kinfo_proc) == KINFO_PROC_SIZE);
 #endif
 
 static void
 __elfN(note_procstat_proc)(void *arg, struct sbuf *sb, size_t *sizep)
 {
 	struct proc *p;
 	size_t size;
 	int structsize;
 
 	p = (struct proc *)arg;
 	size = sizeof(structsize) + p->p_numthreads *
 	    sizeof(elf_kinfo_proc_t);
 
 	if (sb != NULL) {
 		KASSERT(*sizep == size, ("invalid size"));
 		structsize = sizeof(elf_kinfo_proc_t);
 		sbuf_bcat(sb, &structsize, sizeof(structsize));
 		PROC_LOCK(p);
 		kern_proc_out(p, sb, ELF_KERN_PROC_MASK);
 	}
 	*sizep = size;
 }
 
 #ifdef KINFO_FILE_SIZE
 CTASSERT(sizeof(struct kinfo_file) == KINFO_FILE_SIZE);
 #endif
 
 static void
 note_procstat_files(void *arg, struct sbuf *sb, size_t *sizep)
 {
 	struct proc *p;
 	size_t size, sect_sz, i;
 	ssize_t start_len, sect_len;
 	int structsize, filedesc_flags;
 
 	if (coredump_pack_fileinfo)
 		filedesc_flags = KERN_FILEDESC_PACK_KINFO;
 	else
 		filedesc_flags = 0;
 
 	p = (struct proc *)arg;
 	structsize = sizeof(struct kinfo_file);
 	if (sb == NULL) {
 		size = 0;
 		sb = sbuf_new(NULL, NULL, 128, SBUF_FIXEDLEN);
 		sbuf_set_drain(sb, sbuf_drain_count, &size);
 		sbuf_bcat(sb, &structsize, sizeof(structsize));
 		PROC_LOCK(p);
 		kern_proc_filedesc_out(p, sb, -1, filedesc_flags);
 		sbuf_finish(sb);
 		sbuf_delete(sb);
 		*sizep = size;
 	} else {
 		sbuf_start_section(sb, &start_len);
 
 		sbuf_bcat(sb, &structsize, sizeof(structsize));
 		PROC_LOCK(p);
 		kern_proc_filedesc_out(p, sb, *sizep - sizeof(structsize),
 		    filedesc_flags);
 
 		sect_len = sbuf_end_section(sb, start_len, 0, 0);
 		if (sect_len < 0)
 			return;
 		sect_sz = sect_len;
 
 		KASSERT(sect_sz <= *sizep,
 		    ("kern_proc_filedesc_out did not respect maxlen; "
 		     "requested %zu, got %zu", *sizep - sizeof(structsize),
 		     sect_sz - sizeof(structsize)));
 
 		for (i = 0; i < *sizep - sect_sz && sb->s_error == 0; i++)
 			sbuf_putc(sb, 0);
 	}
 }
 
 #ifdef KINFO_VMENTRY_SIZE
 CTASSERT(sizeof(struct kinfo_vmentry) == KINFO_VMENTRY_SIZE);
 #endif
 
 static void
 note_procstat_vmmap(void *arg, struct sbuf *sb, size_t *sizep)
 {
 	struct proc *p;
 	size_t size;
 	int structsize, vmmap_flags;
 
 	if (coredump_pack_vmmapinfo)
 		vmmap_flags = KERN_VMMAP_PACK_KINFO;
 	else
 		vmmap_flags = 0;
 
 	p = (struct proc *)arg;
 	structsize = sizeof(struct kinfo_vmentry);
 	if (sb == NULL) {
 		size = 0;
 		sb = sbuf_new(NULL, NULL, 128, SBUF_FIXEDLEN);
 		sbuf_set_drain(sb, sbuf_drain_count, &size);
 		sbuf_bcat(sb, &structsize, sizeof(structsize));
 		PROC_LOCK(p);
 		kern_proc_vmmap_out(p, sb, -1, vmmap_flags);
 		sbuf_finish(sb);
 		sbuf_delete(sb);
 		*sizep = size;
 	} else {
 		sbuf_bcat(sb, &structsize, sizeof(structsize));
 		PROC_LOCK(p);
 		kern_proc_vmmap_out(p, sb, *sizep - sizeof(structsize),
 		    vmmap_flags);
 	}
 }
 
 static void
 note_procstat_groups(void *arg, struct sbuf *sb, size_t *sizep)
 {
 	struct proc *p;
 	size_t size;
 	int structsize;
 
 	p = (struct proc *)arg;
 	size = sizeof(structsize) + p->p_ucred->cr_ngroups * sizeof(gid_t);
 	if (sb != NULL) {
 		KASSERT(*sizep == size, ("invalid size"));
 		structsize = sizeof(gid_t);
 		sbuf_bcat(sb, &structsize, sizeof(structsize));
 		sbuf_bcat(sb, p->p_ucred->cr_groups, p->p_ucred->cr_ngroups *
 		    sizeof(gid_t));
 	}
 	*sizep = size;
 }
 
 static void
 note_procstat_umask(void *arg, struct sbuf *sb, size_t *sizep)
 {
 	struct proc *p;
 	size_t size;
 	int structsize;
 
 	p = (struct proc *)arg;
 	size = sizeof(structsize) + sizeof(p->p_fd->fd_cmask);
 	if (sb != NULL) {
 		KASSERT(*sizep == size, ("invalid size"));
 		structsize = sizeof(p->p_fd->fd_cmask);
 		sbuf_bcat(sb, &structsize, sizeof(structsize));
 		sbuf_bcat(sb, &p->p_fd->fd_cmask, sizeof(p->p_fd->fd_cmask));
 	}
 	*sizep = size;
 }
 
 static void
 note_procstat_rlimit(void *arg, struct sbuf *sb, size_t *sizep)
 {
 	struct proc *p;
 	struct rlimit rlim[RLIM_NLIMITS];
 	size_t size;
 	int structsize, i;
 
 	p = (struct proc *)arg;
 	size = sizeof(structsize) + sizeof(rlim);
 	if (sb != NULL) {
 		KASSERT(*sizep == size, ("invalid size"));
 		structsize = sizeof(rlim);
 		sbuf_bcat(sb, &structsize, sizeof(structsize));
 		PROC_LOCK(p);
 		for (i = 0; i < RLIM_NLIMITS; i++)
 			lim_rlimit_proc(p, i, &rlim[i]);
 		PROC_UNLOCK(p);
 		sbuf_bcat(sb, rlim, sizeof(rlim));
 	}
 	*sizep = size;
 }
 
 static void
 note_procstat_osrel(void *arg, struct sbuf *sb, size_t *sizep)
 {
 	struct proc *p;
 	size_t size;
 	int structsize;
 
 	p = (struct proc *)arg;
 	size = sizeof(structsize) + sizeof(p->p_osrel);
 	if (sb != NULL) {
 		KASSERT(*sizep == size, ("invalid size"));
 		structsize = sizeof(p->p_osrel);
 		sbuf_bcat(sb, &structsize, sizeof(structsize));
 		sbuf_bcat(sb, &p->p_osrel, sizeof(p->p_osrel));
 	}
 	*sizep = size;
 }
 
 static void
 __elfN(note_procstat_psstrings)(void *arg, struct sbuf *sb, size_t *sizep)
 {
 	struct proc *p;
 	elf_ps_strings_t ps_strings;
 	size_t size;
 	int structsize;
 
 	p = (struct proc *)arg;
 	size = sizeof(structsize) + sizeof(ps_strings);
 	if (sb != NULL) {
 		KASSERT(*sizep == size, ("invalid size"));
 		structsize = sizeof(ps_strings);
 #if defined(COMPAT_FREEBSD32) && __ELF_WORD_SIZE == 32
 		ps_strings = PTROUT(p->p_sysent->sv_psstrings);
 #else
 		ps_strings = p->p_sysent->sv_psstrings;
 #endif
 		sbuf_bcat(sb, &structsize, sizeof(structsize));
 		sbuf_bcat(sb, &ps_strings, sizeof(ps_strings));
 	}
 	*sizep = size;
 }
 
 static void
 __elfN(note_procstat_auxv)(void *arg, struct sbuf *sb, size_t *sizep)
 {
 	struct proc *p;
 	size_t size;
 	int structsize;
 
 	p = (struct proc *)arg;
 	if (sb == NULL) {
 		size = 0;
 		sb = sbuf_new(NULL, NULL, 128, SBUF_FIXEDLEN);
 		sbuf_set_drain(sb, sbuf_drain_count, &size);
 		sbuf_bcat(sb, &structsize, sizeof(structsize));
 		PHOLD(p);
 		proc_getauxv(curthread, p, sb);
 		PRELE(p);
 		sbuf_finish(sb);
 		sbuf_delete(sb);
 		*sizep = size;
 	} else {
 		structsize = sizeof(Elf_Auxinfo);
 		sbuf_bcat(sb, &structsize, sizeof(structsize));
 		PHOLD(p);
 		proc_getauxv(curthread, p, sb);
 		PRELE(p);
 	}
 }
 
 static boolean_t
 __elfN(parse_notes)(struct image_params *imgp, Elf_Note *checknote,
     const char *note_vendor, const Elf_Phdr *pnote,
     boolean_t (*cb)(const Elf_Note *, void *, boolean_t *), void *cb_arg)
 {
 	const Elf_Note *note, *note0, *note_end;
 	const char *note_name;
 	char *buf;
 	int i, error;
 	boolean_t res;
 
 	/* We need some limit, might as well use PAGE_SIZE. */
 	if (pnote == NULL || pnote->p_filesz > PAGE_SIZE)
 		return (FALSE);
 	ASSERT_VOP_LOCKED(imgp->vp, "parse_notes");
 	if (pnote->p_offset > PAGE_SIZE ||
 	    pnote->p_filesz > PAGE_SIZE - pnote->p_offset) {
 		VOP_UNLOCK(imgp->vp, 0);
 		buf = malloc(pnote->p_filesz, M_TEMP, M_WAITOK);
 		vn_lock(imgp->vp, LK_EXCLUSIVE | LK_RETRY);
 		error = vn_rdwr(UIO_READ, imgp->vp, buf, pnote->p_filesz,
 		    pnote->p_offset, UIO_SYSSPACE, IO_NODELOCKED,
 		    curthread->td_ucred, NOCRED, NULL, curthread);
 		if (error != 0) {
 			uprintf("i/o error PT_NOTE\n");
 			goto retf;
 		}
 		note = note0 = (const Elf_Note *)buf;
 		note_end = (const Elf_Note *)(buf + pnote->p_filesz);
 	} else {
 		note = note0 = (const Elf_Note *)(imgp->image_header +
 		    pnote->p_offset);
 		note_end = (const Elf_Note *)(imgp->image_header +
 		    pnote->p_offset + pnote->p_filesz);
 		buf = NULL;
 	}
 	for (i = 0; i < 100 && note >= note0 && note < note_end; i++) {
 		if (!aligned(note, Elf32_Addr) || (const char *)note_end -
 		    (const char *)note < sizeof(Elf_Note)) {
 			goto retf;
 		}
 		if (note->n_namesz != checknote->n_namesz ||
 		    note->n_descsz != checknote->n_descsz ||
 		    note->n_type != checknote->n_type)
 			goto nextnote;
 		note_name = (const char *)(note + 1);
 		if (note_name + checknote->n_namesz >=
 		    (const char *)note_end || strncmp(note_vendor,
 		    note_name, checknote->n_namesz) != 0)
 			goto nextnote;
 
 		if (cb(note, cb_arg, &res))
 			goto ret;
 nextnote:
 		note = (const Elf_Note *)((const char *)(note + 1) +
 		    roundup2(note->n_namesz, ELF_NOTE_ROUNDSIZE) +
 		    roundup2(note->n_descsz, ELF_NOTE_ROUNDSIZE));
 	}
 retf:
 	res = FALSE;
 ret:
 	free(buf, M_TEMP);
 	return (res);
 }
 
 struct brandnote_cb_arg {
 	Elf_Brandnote *brandnote;
 	int32_t *osrel;
 };
 
 static boolean_t
 brandnote_cb(const Elf_Note *note, void *arg0, boolean_t *res)
 {
 	struct brandnote_cb_arg *arg;
 
 	arg = arg0;
 
 	/*
 	 * Fetch the osreldate for binary from the ELF OSABI-note if
 	 * necessary.
 	 */
 	*res = (arg->brandnote->flags & BN_TRANSLATE_OSREL) != 0 &&
 	    arg->brandnote->trans_osrel != NULL ?
 	    arg->brandnote->trans_osrel(note, arg->osrel) : TRUE;
 
 	return (TRUE);
 }
 
 static Elf_Note fctl_note = {
 	.n_namesz = sizeof(FREEBSD_ABI_VENDOR),
 	.n_descsz = sizeof(uint32_t),
 	.n_type = NT_FREEBSD_FEATURE_CTL,
 };
 
 struct fctl_cb_arg {
 	uint32_t *fctl0;
 };
 
 static boolean_t
 note_fctl_cb(const Elf_Note *note, void *arg0, boolean_t *res)
 {
 	struct fctl_cb_arg *arg;
 	const Elf32_Word *desc;
 	uintptr_t p;
 
 	arg = arg0;
 	p = (uintptr_t)(note + 1);
 	p += roundup2(note->n_namesz, ELF_NOTE_ROUNDSIZE);
 	desc = (const Elf32_Word *)p;
 	*arg->fctl0 = desc[0];
 	return (TRUE);
 }
 
 /*
  * Try to find the appropriate ABI-note section for checknote, fetch
  * the osreldate and feature control flags for binary from the ELF
  * OSABI-note.  Only the first page of the image is searched, the same
  * as for headers.
  */
 static boolean_t
 __elfN(check_note)(struct image_params *imgp, Elf_Brandnote *brandnote,
     int32_t *osrel, uint32_t *fctl0)
 {
 	const Elf_Phdr *phdr;
 	const Elf_Ehdr *hdr;
 	struct brandnote_cb_arg b_arg;
 	struct fctl_cb_arg f_arg;
 	int i, j;
 
 	hdr = (const Elf_Ehdr *)imgp->image_header;
 	phdr = (const Elf_Phdr *)(imgp->image_header + hdr->e_phoff);
 	b_arg.brandnote = brandnote;
 	b_arg.osrel = osrel;
 	f_arg.fctl0 = fctl0;
 
 	for (i = 0; i < hdr->e_phnum; i++) {
 		if (phdr[i].p_type == PT_NOTE && __elfN(parse_notes)(imgp,
 		    &brandnote->hdr, brandnote->vendor, &phdr[i], brandnote_cb,
 		    &b_arg)) {
 			for (j = 0; j < hdr->e_phnum; j++) {
 				if (phdr[j].p_type == PT_NOTE &&
 				    __elfN(parse_notes)(imgp, &fctl_note,
 				    FREEBSD_ABI_VENDOR, &phdr[j],
 				    note_fctl_cb, &f_arg))
 					break;
 			}
 			return (TRUE);
 		}
 	}
 	return (FALSE);
 
 }
 
 /*
  * Tell kern_execve.c about it, with a little help from the linker.
  */
 static struct execsw __elfN(execsw) = {
 	.ex_imgact = __CONCAT(exec_, __elfN(imgact)),
 	.ex_name = __XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE))
 };
 EXEC_SET(__CONCAT(elf, __ELF_WORD_SIZE), __elfN(execsw));
 
 static vm_prot_t
 __elfN(trans_prot)(Elf_Word flags)
 {
 	vm_prot_t prot;
 
 	prot = 0;
 	if (flags & PF_X)
 		prot |= VM_PROT_EXECUTE;
 	if (flags & PF_W)
 		prot |= VM_PROT_WRITE;
 	if (flags & PF_R)
 		prot |= VM_PROT_READ;
 #if __ELF_WORD_SIZE == 32 && (defined(__amd64__) || defined(__i386__))
 	if (i386_read_exec && (flags & PF_R))
 		prot |= VM_PROT_EXECUTE;
 #endif
 	return (prot);
 }
 
 static Elf_Word
 __elfN(untrans_prot)(vm_prot_t prot)
 {
 	Elf_Word flags;
 
 	flags = 0;
 	if (prot & VM_PROT_EXECUTE)
 		flags |= PF_X;
 	if (prot & VM_PROT_READ)
 		flags |= PF_R;
 	if (prot & VM_PROT_WRITE)
 		flags |= PF_W;
 	return (flags);
 }
Index: head/sys/kern/kern_exec.c
===================================================================
--- head/sys/kern/kern_exec.c	(revision 343963)
+++ head/sys/kern/kern_exec.c	(revision 343964)
@@ -1,1830 +1,1835 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 1993, David Greenman
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_capsicum.h"
 #include "opt_hwpmc_hooks.h"
 #include "opt_ktrace.h"
 #include "opt_vm.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/acct.h>
 #include <sys/capsicum.h>
 #include <sys/eventhandler.h>
 #include <sys/exec.h>
 #include <sys/fcntl.h>
 #include <sys/filedesc.h>
 #include <sys/imgact.h>
 #include <sys/imgact_elf.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mman.h>
 #include <sys/mount.h>
 #include <sys/mutex.h>
 #include <sys/namei.h>
 #include <sys/pioctl.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/ptrace.h>
 #include <sys/resourcevar.h>
 #include <sys/rwlock.h>
 #include <sys/sched.h>
 #include <sys/sdt.h>
 #include <sys/sf_buf.h>
 #include <sys/shm.h>
 #include <sys/signalvar.h>
 #include <sys/smp.h>
 #include <sys/stat.h>
 #include <sys/syscallsubr.h>
 #include <sys/sysctl.h>
 #include <sys/sysent.h>
 #include <sys/sysproto.h>
 #include <sys/vnode.h>
 #include <sys/wait.h>
 #ifdef KTRACE
 #include <sys/ktrace.h>
 #endif
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/pmap.h>
 #include <vm/vm_page.h>
 #include <vm/vm_map.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_object.h>
 #include <vm/vm_pager.h>
 
 #ifdef	HWPMC_HOOKS
 #include <sys/pmckern.h>
 #endif
 
 #include <machine/reg.h>
 
 #include <security/audit/audit.h>
 #include <security/mac/mac_framework.h>
 
 #ifdef KDTRACE_HOOKS
 #include <sys/dtrace_bsd.h>
 dtrace_execexit_func_t	dtrace_fasttrap_exec;
 #endif
 
 SDT_PROVIDER_DECLARE(proc);
 SDT_PROBE_DEFINE1(proc, , , exec, "char *");
 SDT_PROBE_DEFINE1(proc, , , exec__failure, "int");
 SDT_PROBE_DEFINE1(proc, , , exec__success, "char *");
 
 MALLOC_DEFINE(M_PARGS, "proc-args", "Process arguments");
 
 int coredump_pack_fileinfo = 1;
 SYSCTL_INT(_kern, OID_AUTO, coredump_pack_fileinfo, CTLFLAG_RWTUN,
     &coredump_pack_fileinfo, 0,
     "Enable file path packing in 'procstat -f' coredump notes");
 
 int coredump_pack_vmmapinfo = 1;
 SYSCTL_INT(_kern, OID_AUTO, coredump_pack_vmmapinfo, CTLFLAG_RWTUN,
     &coredump_pack_vmmapinfo, 0,
     "Enable file path packing in 'procstat -v' coredump notes");
 
 static int sysctl_kern_ps_strings(SYSCTL_HANDLER_ARGS);
 static int sysctl_kern_usrstack(SYSCTL_HANDLER_ARGS);
 static int sysctl_kern_stackprot(SYSCTL_HANDLER_ARGS);
 static int do_execve(struct thread *td, struct image_args *args,
     struct mac *mac_p);
 
 /* XXX This should be vm_size_t. */
 SYSCTL_PROC(_kern, KERN_PS_STRINGS, ps_strings, CTLTYPE_ULONG|CTLFLAG_RD|
     CTLFLAG_CAPRD|CTLFLAG_MPSAFE, NULL, 0, sysctl_kern_ps_strings, "LU", "");
 
 /* XXX This should be vm_size_t. */
 SYSCTL_PROC(_kern, KERN_USRSTACK, usrstack, CTLTYPE_ULONG|CTLFLAG_RD|
     CTLFLAG_CAPRD|CTLFLAG_MPSAFE, NULL, 0, sysctl_kern_usrstack, "LU", "");
 
 SYSCTL_PROC(_kern, OID_AUTO, stackprot, CTLTYPE_INT|CTLFLAG_RD|CTLFLAG_MPSAFE,
     NULL, 0, sysctl_kern_stackprot, "I", "");
 
 u_long ps_arg_cache_limit = PAGE_SIZE / 16;
 SYSCTL_ULONG(_kern, OID_AUTO, ps_arg_cache_limit, CTLFLAG_RW, 
     &ps_arg_cache_limit, 0, "");
 
 static int disallow_high_osrel;
 SYSCTL_INT(_kern, OID_AUTO, disallow_high_osrel, CTLFLAG_RW,
     &disallow_high_osrel, 0,
     "Disallow execution of binaries built for higher version of the world");
 
 static int map_at_zero = 0;
 SYSCTL_INT(_security_bsd, OID_AUTO, map_at_zero, CTLFLAG_RWTUN, &map_at_zero, 0,
     "Permit processes to map an object at virtual address 0.");
 
 EVENTHANDLER_LIST_DECLARE(process_exec);
 
 static int
 sysctl_kern_ps_strings(SYSCTL_HANDLER_ARGS)
 {
 	struct proc *p;
 	int error;
 
 	p = curproc;
 #ifdef SCTL_MASK32
 	if (req->flags & SCTL_MASK32) {
 		unsigned int val;
 		val = (unsigned int)p->p_sysent->sv_psstrings;
 		error = SYSCTL_OUT(req, &val, sizeof(val));
 	} else
 #endif
 		error = SYSCTL_OUT(req, &p->p_sysent->sv_psstrings,
 		   sizeof(p->p_sysent->sv_psstrings));
 	return error;
 }
 
 static int
 sysctl_kern_usrstack(SYSCTL_HANDLER_ARGS)
 {
 	struct proc *p;
 	int error;
 
 	p = curproc;
 #ifdef SCTL_MASK32
 	if (req->flags & SCTL_MASK32) {
 		unsigned int val;
 		val = (unsigned int)p->p_sysent->sv_usrstack;
 		error = SYSCTL_OUT(req, &val, sizeof(val));
 	} else
 #endif
 		error = SYSCTL_OUT(req, &p->p_sysent->sv_usrstack,
 		    sizeof(p->p_sysent->sv_usrstack));
 	return error;
 }
 
 static int
 sysctl_kern_stackprot(SYSCTL_HANDLER_ARGS)
 {
 	struct proc *p;
 
 	p = curproc;
 	return (SYSCTL_OUT(req, &p->p_sysent->sv_stackprot,
 	    sizeof(p->p_sysent->sv_stackprot)));
 }
 
 /*
  * Each of the items is a pointer to a `const struct execsw', hence the
  * double pointer here.
  */
 static const struct execsw **execsw;
 
 #ifndef _SYS_SYSPROTO_H_
 struct execve_args {
 	char    *fname; 
 	char    **argv;
 	char    **envv; 
 };
 #endif
 
 int
 sys_execve(struct thread *td, struct execve_args *uap)
 {
 	struct image_args args;
 	struct vmspace *oldvmspace;
 	int error;
 
 	error = pre_execve(td, &oldvmspace);
 	if (error != 0)
 		return (error);
 	error = exec_copyin_args(&args, uap->fname, UIO_USERSPACE,
 	    uap->argv, uap->envv);
 	if (error == 0)
 		error = kern_execve(td, &args, NULL);
 	post_execve(td, error, oldvmspace);
 	return (error);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct fexecve_args {
 	int	fd;
 	char	**argv;
 	char	**envv;
 }
 #endif
 int
 sys_fexecve(struct thread *td, struct fexecve_args *uap)
 {
 	struct image_args args;
 	struct vmspace *oldvmspace;
 	int error;
 
 	error = pre_execve(td, &oldvmspace);
 	if (error != 0)
 		return (error);
 	error = exec_copyin_args(&args, NULL, UIO_SYSSPACE,
 	    uap->argv, uap->envv);
 	if (error == 0) {
 		args.fd = uap->fd;
 		error = kern_execve(td, &args, NULL);
 	}
 	post_execve(td, error, oldvmspace);
 	return (error);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct __mac_execve_args {
 	char	*fname;
 	char	**argv;
 	char	**envv;
 	struct mac	*mac_p;
 };
 #endif
 
 int
 sys___mac_execve(struct thread *td, struct __mac_execve_args *uap)
 {
 #ifdef MAC
 	struct image_args args;
 	struct vmspace *oldvmspace;
 	int error;
 
 	error = pre_execve(td, &oldvmspace);
 	if (error != 0)
 		return (error);
 	error = exec_copyin_args(&args, uap->fname, UIO_USERSPACE,
 	    uap->argv, uap->envv);
 	if (error == 0)
 		error = kern_execve(td, &args, uap->mac_p);
 	post_execve(td, error, oldvmspace);
 	return (error);
 #else
 	return (ENOSYS);
 #endif
 }
 
 int
 pre_execve(struct thread *td, struct vmspace **oldvmspace)
 {
 	struct proc *p;
 	int error;
 
 	KASSERT(td == curthread, ("non-current thread %p", td));
 	error = 0;
 	p = td->td_proc;
 	if ((p->p_flag & P_HADTHREADS) != 0) {
 		PROC_LOCK(p);
 		if (thread_single(p, SINGLE_BOUNDARY) != 0)
 			error = ERESTART;
 		PROC_UNLOCK(p);
 	}
 	KASSERT(error != 0 || (td->td_pflags & TDP_EXECVMSPC) == 0,
 	    ("nested execve"));
 	*oldvmspace = p->p_vmspace;
 	return (error);
 }
 
 void
 post_execve(struct thread *td, int error, struct vmspace *oldvmspace)
 {
 	struct proc *p;
 
 	KASSERT(td == curthread, ("non-current thread %p", td));
 	p = td->td_proc;
 	if ((p->p_flag & P_HADTHREADS) != 0) {
 		PROC_LOCK(p);
 		/*
 		 * If success, we upgrade to SINGLE_EXIT state to
 		 * force other threads to suicide.
 		 */
 		if (error == EJUSTRETURN)
 			thread_single(p, SINGLE_EXIT);
 		else
 			thread_single_end(p, SINGLE_BOUNDARY);
 		PROC_UNLOCK(p);
 	}
 	if ((td->td_pflags & TDP_EXECVMSPC) != 0) {
 		KASSERT(p->p_vmspace != oldvmspace,
 		    ("oldvmspace still used"));
 		vmspace_free(oldvmspace);
 		td->td_pflags &= ~TDP_EXECVMSPC;
 	}
 }
 
 /*
  * XXX: kern_execve has the astonishing property of not always returning to
  * the caller.  If sufficiently bad things happen during the call to
  * do_execve(), it can end up calling exit1(); as a result, callers must
  * avoid doing anything which they might need to undo (e.g., allocating
  * memory).
  */
 int
 kern_execve(struct thread *td, struct image_args *args, struct mac *mac_p)
 {
 
 	AUDIT_ARG_ARGV(args->begin_argv, args->argc,
 	    exec_args_get_begin_envv(args) - args->begin_argv);
 	AUDIT_ARG_ENVV(exec_args_get_begin_envv(args), args->envc,
 	    args->endp - exec_args_get_begin_envv(args));
 	return (do_execve(td, args, mac_p));
 }
 
 /*
  * In-kernel implementation of execve().  All arguments are assumed to be
  * userspace pointers from the passed thread.
  */
 static int
 do_execve(struct thread *td, struct image_args *args, struct mac *mac_p)
 {
 	struct proc *p = td->td_proc;
 	struct nameidata nd;
 	struct ucred *oldcred;
 	struct uidinfo *euip = NULL;
 	register_t *stack_base;
 	int error, i;
 	struct image_params image_params, *imgp;
 	struct vattr attr;
 	int (*img_first)(struct image_params *);
 	struct pargs *oldargs = NULL, *newargs = NULL;
 	struct sigacts *oldsigacts = NULL, *newsigacts = NULL;
 #ifdef KTRACE
 	struct vnode *tracevp = NULL;
 	struct ucred *tracecred = NULL;
 #endif
 	struct vnode *oldtextvp = NULL, *newtextvp;
 	int credential_changing;
 	int textset;
 #ifdef MAC
 	struct label *interpvplabel = NULL;
 	int will_transition;
 #endif
 #ifdef HWPMC_HOOKS
 	struct pmckern_procexec pe;
 #endif
 	static const char fexecv_proc_title[] = "(fexecv)";
 
 	imgp = &image_params;
 
 	/*
 	 * Lock the process and set the P_INEXEC flag to indicate that
 	 * it should be left alone until we're done here.  This is
 	 * necessary to avoid race conditions - e.g. in ptrace() -
 	 * that might allow a local user to illicitly obtain elevated
 	 * privileges.
 	 */
 	PROC_LOCK(p);
 	KASSERT((p->p_flag & P_INEXEC) == 0,
 	    ("%s(): process already has P_INEXEC flag", __func__));
 	p->p_flag |= P_INEXEC;
 	PROC_UNLOCK(p);
 
 	/*
 	 * Initialize part of the common data
 	 */
 	bzero(imgp, sizeof(*imgp));
 	imgp->proc = p;
 	imgp->attr = &attr;
 	imgp->args = args;
 	oldcred = p->p_ucred;
 
 #ifdef MAC
 	error = mac_execve_enter(imgp, mac_p);
 	if (error)
 		goto exec_fail;
 #endif
 
 	/*
 	 * Translate the file name. namei() returns a vnode pointer
 	 *	in ni_vp among other things.
 	 *
 	 * XXXAUDIT: It would be desirable to also audit the name of the
 	 * interpreter if this is an interpreted binary.
 	 */
 	if (args->fname != NULL) {
 		NDINIT(&nd, LOOKUP, ISOPEN | LOCKLEAF | FOLLOW | SAVENAME
 		    | AUDITVNODE1, UIO_SYSSPACE, args->fname, td);
 	}
 
 	SDT_PROBE1(proc, , , exec, args->fname);
 
 interpret:
 	if (args->fname != NULL) {
 #ifdef CAPABILITY_MODE
 		/*
 		 * While capability mode can't reach this point via direct
 		 * path arguments to execve(), we also don't allow
 		 * interpreters to be used in capability mode (for now).
 		 * Catch indirect lookups and return a permissions error.
 		 */
 		if (IN_CAPABILITY_MODE(td)) {
 			error = ECAPMODE;
 			goto exec_fail;
 		}
 #endif
 		error = namei(&nd);
 		if (error)
 			goto exec_fail;
 
 		newtextvp = nd.ni_vp;
 		imgp->vp = newtextvp;
 	} else {
 		AUDIT_ARG_FD(args->fd);
 		/*
 		 * Descriptors opened only with O_EXEC or O_RDONLY are allowed.
 		 */
 		error = fgetvp_exec(td, args->fd, &cap_fexecve_rights, &newtextvp);
 		if (error)
 			goto exec_fail;
 		vn_lock(newtextvp, LK_EXCLUSIVE | LK_RETRY);
 		AUDIT_ARG_VNODE1(newtextvp);
 		imgp->vp = newtextvp;
 	}
 
 	/*
 	 * Check file permissions (also 'opens' file)
 	 */
 	error = exec_check_permissions(imgp);
 	if (error)
 		goto exec_fail_dealloc;
 
 	imgp->object = imgp->vp->v_object;
 	if (imgp->object != NULL)
 		vm_object_reference(imgp->object);
 
 	/*
 	 * Set VV_TEXT now so no one can write to the executable while we're
 	 * activating it.
 	 *
 	 * Remember if this was set before and unset it in case this is not
 	 * actually an executable image.
 	 */
 	textset = VOP_IS_TEXT(imgp->vp);
 	VOP_SET_TEXT(imgp->vp);
 
 	error = exec_map_first_page(imgp);
 	if (error)
 		goto exec_fail_dealloc;
 
 	imgp->proc->p_osrel = 0;
 	imgp->proc->p_fctl0 = 0;
 
 	/*
 	 * Implement image setuid/setgid.
 	 *
 	 * Determine new credentials before attempting image activators
 	 * so that it can be used by process_exec handlers to determine
 	 * credential/setid changes.
 	 *
 	 * Don't honor setuid/setgid if the filesystem prohibits it or if
 	 * the process is being traced.
 	 *
 	 * We disable setuid/setgid/etc in capability mode on the basis
 	 * that most setugid applications are not written with that
 	 * environment in mind, and will therefore almost certainly operate
 	 * incorrectly. In principle there's no reason that setugid
 	 * applications might not be useful in capability mode, so we may want
 	 * to reconsider this conservative design choice in the future.
 	 *
 	 * XXXMAC: For the time being, use NOSUID to also prohibit
 	 * transitions on the file system.
 	 */
 	credential_changing = 0;
 	credential_changing |= (attr.va_mode & S_ISUID) &&
 	    oldcred->cr_uid != attr.va_uid;
 	credential_changing |= (attr.va_mode & S_ISGID) &&
 	    oldcred->cr_gid != attr.va_gid;
 #ifdef MAC
 	will_transition = mac_vnode_execve_will_transition(oldcred, imgp->vp,
 	    interpvplabel, imgp);
 	credential_changing |= will_transition;
 #endif
 
 	/* Don't inherit PROC_PDEATHSIG_CTL value if setuid/setgid. */
 	if (credential_changing)
 		imgp->proc->p_pdeathsig = 0;
 
 	if (credential_changing &&
 #ifdef CAPABILITY_MODE
 	    ((oldcred->cr_flags & CRED_FLAG_CAPMODE) == 0) &&
 #endif
 	    (imgp->vp->v_mount->mnt_flag & MNT_NOSUID) == 0 &&
 	    (p->p_flag & P_TRACED) == 0) {
 		imgp->credential_setid = true;
 		VOP_UNLOCK(imgp->vp, 0);
 		imgp->newcred = crdup(oldcred);
 		if (attr.va_mode & S_ISUID) {
 			euip = uifind(attr.va_uid);
 			change_euid(imgp->newcred, euip);
 		}
 		vn_lock(imgp->vp, LK_EXCLUSIVE | LK_RETRY);
 		if (attr.va_mode & S_ISGID)
 			change_egid(imgp->newcred, attr.va_gid);
 		/*
 		 * Implement correct POSIX saved-id behavior.
 		 *
 		 * XXXMAC: Note that the current logic will save the
 		 * uid and gid if a MAC domain transition occurs, even
 		 * though maybe it shouldn't.
 		 */
 		change_svuid(imgp->newcred, imgp->newcred->cr_uid);
 		change_svgid(imgp->newcred, imgp->newcred->cr_gid);
 	} else {
 		/*
 		 * Implement correct POSIX saved-id behavior.
 		 *
 		 * XXX: It's not clear that the existing behavior is
 		 * POSIX-compliant.  A number of sources indicate that the
 		 * saved uid/gid should only be updated if the new ruid is
 		 * not equal to the old ruid, or the new euid is not equal
 		 * to the old euid and the new euid is not equal to the old
 		 * ruid.  The FreeBSD code always updates the saved uid/gid.
 		 * Also, this code uses the new (replaced) euid and egid as
 		 * the source, which may or may not be the right ones to use.
 		 */
 		if (oldcred->cr_svuid != oldcred->cr_uid ||
 		    oldcred->cr_svgid != oldcred->cr_gid) {
 			VOP_UNLOCK(imgp->vp, 0);
 			imgp->newcred = crdup(oldcred);
 			vn_lock(imgp->vp, LK_EXCLUSIVE | LK_RETRY);
 			change_svuid(imgp->newcred, imgp->newcred->cr_uid);
 			change_svgid(imgp->newcred, imgp->newcred->cr_gid);
 		}
 	}
 	/* The new credentials are installed into the process later. */
 
 	/*
 	 * Do the best to calculate the full path to the image file.
 	 */
 	if (args->fname != NULL && args->fname[0] == '/')
 		imgp->execpath = args->fname;
 	else {
 		VOP_UNLOCK(imgp->vp, 0);
 		if (vn_fullpath(td, imgp->vp, &imgp->execpath,
 		    &imgp->freepath) != 0)
 			imgp->execpath = args->fname;
 		vn_lock(imgp->vp, LK_EXCLUSIVE | LK_RETRY);
 	}
 
 	/*
 	 *	If the current process has a special image activator it
 	 *	wants to try first, call it.   For example, emulating shell
 	 *	scripts differently.
 	 */
 	error = -1;
 	if ((img_first = imgp->proc->p_sysent->sv_imgact_try) != NULL)
 		error = img_first(imgp);
 
 	/*
 	 *	Loop through the list of image activators, calling each one.
 	 *	An activator returns -1 if there is no match, 0 on success,
 	 *	and an error otherwise.
 	 */
 	for (i = 0; error == -1 && execsw[i]; ++i) {
 		if (execsw[i]->ex_imgact == NULL ||
 		    execsw[i]->ex_imgact == img_first) {
 			continue;
 		}
 		error = (*execsw[i]->ex_imgact)(imgp);
 	}
 
 	if (error) {
 		if (error == -1) {
 			if (textset == 0)
 				VOP_UNSET_TEXT(imgp->vp);
 			error = ENOEXEC;
 		}
 		goto exec_fail_dealloc;
 	}
 
 	/*
 	 * Special interpreter operation, cleanup and loop up to try to
 	 * activate the interpreter.
 	 */
 	if (imgp->interpreted) {
 		exec_unmap_first_page(imgp);
 		/*
 		 * VV_TEXT needs to be unset for scripts.  There is a short
 		 * period before we determine that something is a script where
 		 * VV_TEXT will be set. The vnode lock is held over this
 		 * entire period so nothing should illegitimately be blocked.
 		 */
 		VOP_UNSET_TEXT(imgp->vp);
 		/* free name buffer and old vnode */
 		if (args->fname != NULL)
 			NDFREE(&nd, NDF_ONLY_PNBUF);
 #ifdef MAC
 		mac_execve_interpreter_enter(newtextvp, &interpvplabel);
 #endif
 		if (imgp->opened) {
 			VOP_CLOSE(newtextvp, FREAD, td->td_ucred, td);
 			imgp->opened = 0;
 		}
 		vput(newtextvp);
 		vm_object_deallocate(imgp->object);
 		imgp->object = NULL;
 		imgp->credential_setid = false;
 		if (imgp->newcred != NULL) {
 			crfree(imgp->newcred);
 			imgp->newcred = NULL;
 		}
 		imgp->execpath = NULL;
 		free(imgp->freepath, M_TEMP);
 		imgp->freepath = NULL;
 		/* set new name to that of the interpreter */
 		NDINIT(&nd, LOOKUP, LOCKLEAF | FOLLOW | SAVENAME,
 		    UIO_SYSSPACE, imgp->interpreter_name, td);
 		args->fname = imgp->interpreter_name;
 		goto interpret;
 	}
 
 	/*
 	 * NB: We unlock the vnode here because it is believed that none
 	 * of the sv_copyout_strings/sv_fixup operations require the vnode.
 	 */
 	VOP_UNLOCK(imgp->vp, 0);
 
 	if (disallow_high_osrel &&
 	    P_OSREL_MAJOR(p->p_osrel) > P_OSREL_MAJOR(__FreeBSD_version)) {
 		error = ENOEXEC;
 		uprintf("Osrel %d for image %s too high\n", p->p_osrel,
 		    imgp->execpath != NULL ? imgp->execpath : "<unresolved>");
 		vn_lock(imgp->vp, LK_SHARED | LK_RETRY);
 		goto exec_fail_dealloc;
 	}
 
 	/* ABI enforces the use of Capsicum. Switch into capabilities mode. */
 	if (SV_PROC_FLAG(p, SV_CAPSICUM))
 		sys_cap_enter(td, NULL);
 
 	/*
 	 * Copy out strings (args and env) and initialize stack base
 	 */
 	if (p->p_sysent->sv_copyout_strings)
 		stack_base = (*p->p_sysent->sv_copyout_strings)(imgp);
 	else
 		stack_base = exec_copyout_strings(imgp);
 
 	/*
 	 * If custom stack fixup routine present for this process
 	 * let it do the stack setup.
 	 * Else stuff argument count as first item on stack
 	 */
 	if (p->p_sysent->sv_fixup != NULL)
 		error = (*p->p_sysent->sv_fixup)(&stack_base, imgp);
 	else
 		error = suword(--stack_base, imgp->args->argc) == 0 ?
 		    0 : EFAULT;
 	if (error != 0) {
 		vn_lock(imgp->vp, LK_SHARED | LK_RETRY);
 		goto exec_fail_dealloc;
 	}
 
 	if (args->fdp != NULL) {
 		/* Install a brand new file descriptor table. */
 		fdinstall_remapped(td, args->fdp);
 		args->fdp = NULL;
 	} else {
 		/*
 		 * Keep on using the existing file descriptor table. For
 		 * security and other reasons, the file descriptor table
 		 * cannot be shared after an exec.
 		 */
 		fdunshare(td);
 		/* close files on exec */
 		fdcloseexec(td);
 	}
 
 	/*
 	 * Malloc things before we need locks.
 	 */
 	i = exec_args_get_begin_envv(imgp->args) - imgp->args->begin_argv;
 	/* Cache arguments if they fit inside our allowance */
 	if (ps_arg_cache_limit >= i + sizeof(struct pargs)) {
 		newargs = pargs_alloc(i);
 		bcopy(imgp->args->begin_argv, newargs->ar_args, i);
 	}
 
 	/*
 	 * For security and other reasons, signal handlers cannot
 	 * be shared after an exec. The new process gets a copy of the old
 	 * handlers. In execsigs(), the new process will have its signals
 	 * reset.
 	 */
 	if (sigacts_shared(p->p_sigacts)) {
 		oldsigacts = p->p_sigacts;
 		newsigacts = sigacts_alloc();
 		sigacts_copy(newsigacts, oldsigacts);
 	}
 
 	vn_lock(imgp->vp, LK_SHARED | LK_RETRY);
 
 	PROC_LOCK(p);
 	if (oldsigacts)
 		p->p_sigacts = newsigacts;
 	/* Stop profiling */
 	stopprofclock(p);
 
 	/* reset caught signals */
 	execsigs(p);
 
 	/* name this process - nameiexec(p, ndp) */
 	bzero(p->p_comm, sizeof(p->p_comm));
 	if (args->fname)
 		bcopy(nd.ni_cnd.cn_nameptr, p->p_comm,
 		    min(nd.ni_cnd.cn_namelen, MAXCOMLEN));
 	else if (vn_commname(newtextvp, p->p_comm, sizeof(p->p_comm)) != 0)
 		bcopy(fexecv_proc_title, p->p_comm, sizeof(fexecv_proc_title));
 	bcopy(p->p_comm, td->td_name, sizeof(td->td_name));
 #ifdef KTR
 	sched_clear_tdname(td);
 #endif
 
 	/*
 	 * mark as execed, wakeup the process that vforked (if any) and tell
 	 * it that it now has its own resources back
 	 */
 	p->p_flag |= P_EXEC;
 	if ((p->p_flag2 & P2_NOTRACE_EXEC) == 0)
 		p->p_flag2 &= ~P2_NOTRACE;
 	if (p->p_flag & P_PPWAIT) {
 		p->p_flag &= ~(P_PPWAIT | P_PPTRACE);
 		cv_broadcast(&p->p_pwait);
 		/* STOPs are no longer ignored, arrange for AST */
 		signotify(td);
 	}
 
 	/*
 	 * Implement image setuid/setgid installation.
 	 */
 	if (imgp->credential_setid) {
 		/*
 		 * Turn off syscall tracing for set-id programs, except for
 		 * root.  Record any set-id flags first to make sure that
 		 * we do not regain any tracing during a possible block.
 		 */
 		setsugid(p);
 
 #ifdef KTRACE
 		if (p->p_tracecred != NULL &&
 		    priv_check_cred(p->p_tracecred, PRIV_DEBUG_DIFFCRED))
 			ktrprocexec(p, &tracecred, &tracevp);
 #endif
 		/*
 		 * Close any file descriptors 0..2 that reference procfs,
 		 * then make sure file descriptors 0..2 are in use.
 		 *
 		 * Both fdsetugidsafety() and fdcheckstd() may call functions
 		 * taking sleepable locks, so temporarily drop our locks.
 		 */
 		PROC_UNLOCK(p);
 		VOP_UNLOCK(imgp->vp, 0);
 		fdsetugidsafety(td);
 		error = fdcheckstd(td);
 		vn_lock(imgp->vp, LK_SHARED | LK_RETRY);
 		if (error != 0)
 			goto exec_fail_dealloc;
 		PROC_LOCK(p);
 #ifdef MAC
 		if (will_transition) {
 			mac_vnode_execve_transition(oldcred, imgp->newcred,
 			    imgp->vp, interpvplabel, imgp);
 		}
 #endif
 	} else {
 		if (oldcred->cr_uid == oldcred->cr_ruid &&
 		    oldcred->cr_gid == oldcred->cr_rgid)
 			p->p_flag &= ~P_SUGID;
 	}
 	/*
 	 * Set the new credentials.
 	 */
 	if (imgp->newcred != NULL) {
 		proc_set_cred(p, imgp->newcred);
 		crfree(oldcred);
 		oldcred = NULL;
 	}
 
 	/*
 	 * Store the vp for use in procfs.  This vnode was referenced by namei
 	 * or fgetvp_exec.
 	 */
 	oldtextvp = p->p_textvp;
 	p->p_textvp = newtextvp;
 
 #ifdef KDTRACE_HOOKS
 	/*
 	 * Tell the DTrace fasttrap provider about the exec if it
 	 * has declared an interest.
 	 */
 	if (dtrace_fasttrap_exec)
 		dtrace_fasttrap_exec(p);
 #endif
 
 	/*
 	 * Notify others that we exec'd, and clear the P_INEXEC flag
 	 * as we're now a bona fide freshly-execed process.
 	 */
 	KNOTE_LOCKED(p->p_klist, NOTE_EXEC);
 	p->p_flag &= ~P_INEXEC;
 
 	/* clear "fork but no exec" flag, as we _are_ execing */
 	p->p_acflag &= ~AFORK;
 
 	/*
 	 * Free any previous argument cache and replace it with
 	 * the new argument cache, if any.
 	 */
 	oldargs = p->p_args;
 	p->p_args = newargs;
 	newargs = NULL;
 
 	PROC_UNLOCK(p);
 
 #ifdef	HWPMC_HOOKS
 	/*
 	 * Check if system-wide sampling is in effect or if the
 	 * current process is using PMCs.  If so, do exec() time
 	 * processing.  This processing needs to happen AFTER the
 	 * P_INEXEC flag is cleared.
 	 */
 	if (PMC_SYSTEM_SAMPLING_ACTIVE() || PMC_PROC_IS_USING_PMCS(p)) {
 		VOP_UNLOCK(imgp->vp, 0);
 		pe.pm_credentialschanged = credential_changing;
 		pe.pm_entryaddr = imgp->entry_addr;
 
 		PMC_CALL_HOOK_X(td, PMC_FN_PROCESS_EXEC, (void *) &pe);
 		vn_lock(imgp->vp, LK_SHARED | LK_RETRY);
 	}
 #endif
 
 	/* Set values passed into the program in registers. */
 	if (p->p_sysent->sv_setregs)
 		(*p->p_sysent->sv_setregs)(td, imgp, 
 		    (u_long)(uintptr_t)stack_base);
 	else
 		exec_setregs(td, imgp, (u_long)(uintptr_t)stack_base);
 
 	vfs_mark_atime(imgp->vp, td->td_ucred);
 
 	SDT_PROBE1(proc, , , exec__success, args->fname);
 
 exec_fail_dealloc:
 	if (imgp->firstpage != NULL)
 		exec_unmap_first_page(imgp);
 
 	if (imgp->vp != NULL) {
 		if (args->fname)
 			NDFREE(&nd, NDF_ONLY_PNBUF);
 		if (imgp->opened)
 			VOP_CLOSE(imgp->vp, FREAD, td->td_ucred, td);
 		if (error != 0)
 			vput(imgp->vp);
 		else
 			VOP_UNLOCK(imgp->vp, 0);
 	}
 
 	if (imgp->object != NULL)
 		vm_object_deallocate(imgp->object);
 
 	free(imgp->freepath, M_TEMP);
 
 	if (error == 0) {
 		if (p->p_ptevents & PTRACE_EXEC) {
 			PROC_LOCK(p);
 			if (p->p_ptevents & PTRACE_EXEC)
 				td->td_dbgflags |= TDB_EXEC;
 			PROC_UNLOCK(p);
 		}
 
 		/*
 		 * Stop the process here if its stop event mask has
 		 * the S_EXEC bit set.
 		 */
 		STOPEVENT(p, S_EXEC, 0);
 	} else {
 exec_fail:
 		/* we're done here, clear P_INEXEC */
 		PROC_LOCK(p);
 		p->p_flag &= ~P_INEXEC;
 		PROC_UNLOCK(p);
 
 		SDT_PROBE1(proc, , , exec__failure, error);
 	}
 
 	if (imgp->newcred != NULL && oldcred != NULL)
 		crfree(imgp->newcred);
 
 #ifdef MAC
 	mac_execve_exit(imgp);
 	mac_execve_interpreter_exit(interpvplabel);
 #endif
 	exec_free_args(args);
 
 	/*
 	 * Handle deferred decrement of ref counts.
 	 */
 	if (oldtextvp != NULL)
 		vrele(oldtextvp);
 #ifdef KTRACE
 	if (tracevp != NULL)
 		vrele(tracevp);
 	if (tracecred != NULL)
 		crfree(tracecred);
 #endif
 	pargs_drop(oldargs);
 	pargs_drop(newargs);
 	if (oldsigacts != NULL)
 		sigacts_free(oldsigacts);
 	if (euip != NULL)
 		uifree(euip);
 
 	if (error && imgp->vmspace_destroyed) {
 		/* sorry, no more process anymore. exit gracefully */
 		exit1(td, 0, SIGABRT);
 		/* NOT REACHED */
 	}
 
 #ifdef KTRACE
 	if (error == 0)
 		ktrprocctor(p);
 #endif
 
 	/*
 	 * We don't want cpu_set_syscall_retval() to overwrite any of
 	 * the register values put in place by exec_setregs().
 	 * Implementations of cpu_set_syscall_retval() will leave
 	 * registers unmodified when returning EJUSTRETURN.
 	 */
 	return (error == 0 ? EJUSTRETURN : error);
 }
 
 int
 exec_map_first_page(struct image_params *imgp)
 {
 	int rv, i, after, initial_pagein;
 	vm_page_t ma[VM_INITIAL_PAGEIN];
 	vm_object_t object;
 
 	if (imgp->firstpage != NULL)
 		exec_unmap_first_page(imgp);
 
 	object = imgp->vp->v_object;
 	if (object == NULL)
 		return (EACCES);
 	VM_OBJECT_WLOCK(object);
 #if VM_NRESERVLEVEL > 0
 	vm_object_color(object, 0);
 #endif
 	ma[0] = vm_page_grab(object, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOBUSY);
 	if (ma[0]->valid != VM_PAGE_BITS_ALL) {
 		vm_page_xbusy(ma[0]);
 		if (!vm_pager_has_page(object, 0, NULL, &after)) {
 			vm_page_lock(ma[0]);
 			vm_page_free(ma[0]);
 			vm_page_unlock(ma[0]);
 			VM_OBJECT_WUNLOCK(object);
 			return (EIO);
 		}
 		initial_pagein = min(after, VM_INITIAL_PAGEIN);
 		KASSERT(initial_pagein <= object->size,
 		    ("%s: initial_pagein %d object->size %ju",
 		    __func__, initial_pagein, (uintmax_t )object->size));
 		for (i = 1; i < initial_pagein; i++) {
 			if ((ma[i] = vm_page_next(ma[i - 1])) != NULL) {
 				if (ma[i]->valid)
 					break;
 				if (!vm_page_tryxbusy(ma[i]))
 					break;
 			} else {
 				ma[i] = vm_page_alloc(object, i,
 				    VM_ALLOC_NORMAL);
 				if (ma[i] == NULL)
 					break;
 			}
 		}
 		initial_pagein = i;
 		rv = vm_pager_get_pages(object, ma, initial_pagein, NULL, NULL);
 		if (rv != VM_PAGER_OK) {
 			for (i = 0; i < initial_pagein; i++) {
 				vm_page_lock(ma[i]);
 				vm_page_free(ma[i]);
 				vm_page_unlock(ma[i]);
 			}
 			VM_OBJECT_WUNLOCK(object);
 			return (EIO);
 		}
 		vm_page_xunbusy(ma[0]);
 		for (i = 1; i < initial_pagein; i++)
 			vm_page_readahead_finish(ma[i]);
 	}
 	vm_page_lock(ma[0]);
 	vm_page_hold(ma[0]);
 	vm_page_activate(ma[0]);
 	vm_page_unlock(ma[0]);
 	VM_OBJECT_WUNLOCK(object);
 
 	imgp->firstpage = sf_buf_alloc(ma[0], 0);
 	imgp->image_header = (char *)sf_buf_kva(imgp->firstpage);
 
 	return (0);
 }
 
 void
 exec_unmap_first_page(struct image_params *imgp)
 {
 	vm_page_t m;
 
 	if (imgp->firstpage != NULL) {
 		m = sf_buf_page(imgp->firstpage);
 		sf_buf_free(imgp->firstpage);
 		imgp->firstpage = NULL;
 		vm_page_lock(m);
 		vm_page_unhold(m);
 		vm_page_unlock(m);
 	}
 }
 
 /*
  * Destroy old address space, and allocate a new stack.
  *	The new stack is only sgrowsiz large because it is grown
  *	automatically on a page fault.
  */
 int
 exec_new_vmspace(struct image_params *imgp, struct sysentvec *sv)
 {
 	int error;
 	struct proc *p = imgp->proc;
 	struct vmspace *vmspace = p->p_vmspace;
 	vm_object_t obj;
 	struct rlimit rlim_stack;
 	vm_offset_t sv_minuser, stack_addr;
 	vm_map_t map;
 	u_long ssiz;
 
 	imgp->vmspace_destroyed = 1;
 	imgp->sysent = sv;
 
 	/* May be called with Giant held */
 	EVENTHANDLER_DIRECT_INVOKE(process_exec, p, imgp);
 
 	/*
 	 * Blow away entire process VM, if address space not shared,
 	 * otherwise, create a new VM space so that other threads are
 	 * not disrupted
 	 */
 	map = &vmspace->vm_map;
 	if (map_at_zero)
 		sv_minuser = sv->sv_minuser;
 	else
 		sv_minuser = MAX(sv->sv_minuser, PAGE_SIZE);
 	if (vmspace->vm_refcnt == 1 && vm_map_min(map) == sv_minuser &&
 	    vm_map_max(map) == sv->sv_maxuser) {
 		shmexit(vmspace);
 		pmap_remove_pages(vmspace_pmap(vmspace));
 		vm_map_remove(map, vm_map_min(map), vm_map_max(map));
-		/* An exec terminates mlockall(MCL_FUTURE). */
+		/*
+		 * An exec terminates mlockall(MCL_FUTURE), ASLR state
+		 * must be re-evaluated.
+		 */
 		vm_map_lock(map);
-		vm_map_modflags(map, 0, MAP_WIREFUTURE);
+		vm_map_modflags(map, 0, MAP_WIREFUTURE | MAP_ASLR |
+		    MAP_ASLR_IGNSTART);
 		vm_map_unlock(map);
 	} else {
 		error = vmspace_exec(p, sv_minuser, sv->sv_maxuser);
 		if (error)
 			return (error);
 		vmspace = p->p_vmspace;
 		map = &vmspace->vm_map;
 	}
+	map->flags |= imgp->map_flags;
 
 	/* Map a shared page */
 	obj = sv->sv_shared_page_obj;
 	if (obj != NULL) {
 		vm_object_reference(obj);
 		error = vm_map_fixed(map, obj, 0,
 		    sv->sv_shared_page_base, sv->sv_shared_page_len,
 		    VM_PROT_READ | VM_PROT_EXECUTE,
 		    VM_PROT_READ | VM_PROT_EXECUTE,
 		    MAP_INHERIT_SHARE | MAP_ACC_NO_CHARGE);
 		if (error != KERN_SUCCESS) {
 			vm_object_deallocate(obj);
 			return (vm_mmap_to_errno(error));
 		}
 	}
 
 	/* Allocate a new stack */
 	if (imgp->stack_sz != 0) {
 		ssiz = trunc_page(imgp->stack_sz);
 		PROC_LOCK(p);
 		lim_rlimit_proc(p, RLIMIT_STACK, &rlim_stack);
 		PROC_UNLOCK(p);
 		if (ssiz > rlim_stack.rlim_max)
 			ssiz = rlim_stack.rlim_max;
 		if (ssiz > rlim_stack.rlim_cur) {
 			rlim_stack.rlim_cur = ssiz;
 			kern_setrlimit(curthread, RLIMIT_STACK, &rlim_stack);
 		}
 	} else if (sv->sv_maxssiz != NULL) {
 		ssiz = *sv->sv_maxssiz;
 	} else {
 		ssiz = maxssiz;
 	}
 	stack_addr = sv->sv_usrstack - ssiz;
 	error = vm_map_stack(map, stack_addr, (vm_size_t)ssiz,
 	    obj != NULL && imgp->stack_prot != 0 ? imgp->stack_prot :
 	    sv->sv_stackprot, VM_PROT_ALL, MAP_STACK_GROWS_DOWN);
 	if (error != KERN_SUCCESS)
 		return (vm_mmap_to_errno(error));
 
 	/*
 	 * vm_ssize and vm_maxsaddr are somewhat antiquated concepts, but they
 	 * are still used to enforce the stack rlimit on the process stack.
 	 */
 	vmspace->vm_ssize = sgrowsiz >> PAGE_SHIFT;
 	vmspace->vm_maxsaddr = (char *)stack_addr;
 
 	return (0);
 }
 
 /*
  * Copy out argument and environment strings from the old process address
  * space into the temporary string buffer.
  */
 int
 exec_copyin_args(struct image_args *args, const char *fname,
     enum uio_seg segflg, char **argv, char **envv)
 {
 	u_long arg, env;
 	int error;
 
 	bzero(args, sizeof(*args));
 	if (argv == NULL)
 		return (EFAULT);
 
 	/*
 	 * Allocate demand-paged memory for the file name, argument, and
 	 * environment strings.
 	 */
 	error = exec_alloc_args(args);
 	if (error != 0)
 		return (error);
 
 	/*
 	 * Copy the file name.
 	 */
 	error = exec_args_add_fname(args, fname, segflg);
 	if (error != 0)
 		goto err_exit;
 
 	/*
 	 * extract arguments first
 	 */
 	for (;;) {
 		error = fueword(argv++, &arg);
 		if (error == -1) {
 			error = EFAULT;
 			goto err_exit;
 		}
 		if (arg == 0)
 			break;
 		error = exec_args_add_arg(args, (char *)(uintptr_t)arg,
 		    UIO_USERSPACE);
 		if (error != 0)
 			goto err_exit;
 	}
 
 	/*
 	 * extract environment strings
 	 */
 	if (envv) {
 		for (;;) {
 			error = fueword(envv++, &env);
 			if (error == -1) {
 				error = EFAULT;
 				goto err_exit;
 			}
 			if (env == 0)
 				break;
 			error = exec_args_add_env(args,
 			    (char *)(uintptr_t)env, UIO_USERSPACE);
 			if (error != 0)
 				goto err_exit;
 		}
 	}
 
 	return (0);
 
 err_exit:
 	exec_free_args(args);
 	return (error);
 }
 
 int
 exec_copyin_data_fds(struct thread *td, struct image_args *args,
     const void *data, size_t datalen, const int *fds, size_t fdslen)
 {
 	struct filedesc *ofdp;
 	const char *p;
 	int *kfds;
 	int error;
 
 	memset(args, '\0', sizeof(*args));
 	ofdp = td->td_proc->p_fd;
 	if (datalen >= ARG_MAX || fdslen > ofdp->fd_lastfile + 1)
 		return (E2BIG);
 	error = exec_alloc_args(args);
 	if (error != 0)
 		return (error);
 
 	args->begin_argv = args->buf;
 	args->stringspace = ARG_MAX;
 
 	if (datalen > 0) {
 		/*
 		 * Argument buffer has been provided. Copy it into the
 		 * kernel as a single string and add a terminating null
 		 * byte.
 		 */
 		error = copyin(data, args->begin_argv, datalen);
 		if (error != 0)
 			goto err_exit;
 		args->begin_argv[datalen] = '\0';
 		args->endp = args->begin_argv + datalen + 1;
 		args->stringspace -= datalen + 1;
 
 		/*
 		 * Traditional argument counting. Count the number of
 		 * null bytes.
 		 */
 		for (p = args->begin_argv; p < args->endp; ++p)
 			if (*p == '\0')
 				++args->argc;
 	} else {
 		/* No argument buffer provided. */
 		args->endp = args->begin_argv;
 	}
 
 	/* Create new file descriptor table. */
 	kfds = malloc(fdslen * sizeof(int), M_TEMP, M_WAITOK);
 	error = copyin(fds, kfds, fdslen * sizeof(int));
 	if (error != 0) {
 		free(kfds, M_TEMP);
 		goto err_exit;
 	}
 	error = fdcopy_remapped(ofdp, kfds, fdslen, &args->fdp);
 	free(kfds, M_TEMP);
 	if (error != 0)
 		goto err_exit;
 
 	return (0);
 err_exit:
 	exec_free_args(args);
 	return (error);
 }
 
 struct exec_args_kva {
 	vm_offset_t addr;
 	u_int gen;
 	SLIST_ENTRY(exec_args_kva) next;
 };
 
 DPCPU_DEFINE_STATIC(struct exec_args_kva *, exec_args_kva);
 
 static SLIST_HEAD(, exec_args_kva) exec_args_kva_freelist;
 static struct mtx exec_args_kva_mtx;
 static u_int exec_args_gen;
 
 static void
 exec_prealloc_args_kva(void *arg __unused)
 {
 	struct exec_args_kva *argkva;
 	u_int i;
 
 	SLIST_INIT(&exec_args_kva_freelist);
 	mtx_init(&exec_args_kva_mtx, "exec args kva", NULL, MTX_DEF);
 	for (i = 0; i < exec_map_entries; i++) {
 		argkva = malloc(sizeof(*argkva), M_PARGS, M_WAITOK);
 		argkva->addr = kmap_alloc_wait(exec_map, exec_map_entry_size);
 		argkva->gen = exec_args_gen;
 		SLIST_INSERT_HEAD(&exec_args_kva_freelist, argkva, next);
 	}
 }
 SYSINIT(exec_args_kva, SI_SUB_EXEC, SI_ORDER_ANY, exec_prealloc_args_kva, NULL);
 
 static vm_offset_t
 exec_alloc_args_kva(void **cookie)
 {
 	struct exec_args_kva *argkva;
 
 	argkva = (void *)atomic_readandclear_ptr(
 	    (uintptr_t *)DPCPU_PTR(exec_args_kva));
 	if (argkva == NULL) {
 		mtx_lock(&exec_args_kva_mtx);
 		while ((argkva = SLIST_FIRST(&exec_args_kva_freelist)) == NULL)
 			(void)mtx_sleep(&exec_args_kva_freelist,
 			    &exec_args_kva_mtx, 0, "execkva", 0);
 		SLIST_REMOVE_HEAD(&exec_args_kva_freelist, next);
 		mtx_unlock(&exec_args_kva_mtx);
 	}
 	*(struct exec_args_kva **)cookie = argkva;
 	return (argkva->addr);
 }
 
 static void
 exec_release_args_kva(struct exec_args_kva *argkva, u_int gen)
 {
 	vm_offset_t base;
 
 	base = argkva->addr;
 	if (argkva->gen != gen) {
 		(void)vm_map_madvise(exec_map, base, base + exec_map_entry_size,
 		    MADV_FREE);
 		argkva->gen = gen;
 	}
 	if (!atomic_cmpset_ptr((uintptr_t *)DPCPU_PTR(exec_args_kva),
 	    (uintptr_t)NULL, (uintptr_t)argkva)) {
 		mtx_lock(&exec_args_kva_mtx);
 		SLIST_INSERT_HEAD(&exec_args_kva_freelist, argkva, next);
 		wakeup_one(&exec_args_kva_freelist);
 		mtx_unlock(&exec_args_kva_mtx);
 	}
 }
 
 static void
 exec_free_args_kva(void *cookie)
 {
 
 	exec_release_args_kva(cookie, exec_args_gen);
 }
 
 static void
 exec_args_kva_lowmem(void *arg __unused)
 {
 	SLIST_HEAD(, exec_args_kva) head;
 	struct exec_args_kva *argkva;
 	u_int gen;
 	int i;
 
 	gen = atomic_fetchadd_int(&exec_args_gen, 1) + 1;
 
 	/*
 	 * Force an madvise of each KVA range. Any currently allocated ranges
 	 * will have MADV_FREE applied once they are freed.
 	 */
 	SLIST_INIT(&head);
 	mtx_lock(&exec_args_kva_mtx);
 	SLIST_SWAP(&head, &exec_args_kva_freelist, exec_args_kva);
 	mtx_unlock(&exec_args_kva_mtx);
 	while ((argkva = SLIST_FIRST(&head)) != NULL) {
 		SLIST_REMOVE_HEAD(&head, next);
 		exec_release_args_kva(argkva, gen);
 	}
 
 	CPU_FOREACH(i) {
 		argkva = (void *)atomic_readandclear_ptr(
 		    (uintptr_t *)DPCPU_ID_PTR(i, exec_args_kva));
 		if (argkva != NULL)
 			exec_release_args_kva(argkva, gen);
 	}
 }
 EVENTHANDLER_DEFINE(vm_lowmem, exec_args_kva_lowmem, NULL,
     EVENTHANDLER_PRI_ANY);
 
 /*
  * Allocate temporary demand-paged, zero-filled memory for the file name,
  * argument, and environment strings.
  */
 int
 exec_alloc_args(struct image_args *args)
 {
 
 	args->buf = (char *)exec_alloc_args_kva(&args->bufkva);
 	return (0);
 }
 
 void
 exec_free_args(struct image_args *args)
 {
 
 	if (args->buf != NULL) {
 		exec_free_args_kva(args->bufkva);
 		args->buf = NULL;
 	}
 	if (args->fname_buf != NULL) {
 		free(args->fname_buf, M_TEMP);
 		args->fname_buf = NULL;
 	}
 	if (args->fdp != NULL)
 		fdescfree_remapped(args->fdp);
 }
 
 /*
  * A set to functions to fill struct image args.
  *
  * NOTE: exec_args_add_fname() must be called (possibly with a NULL
  * fname) before the other functions.  All exec_args_add_arg() calls must
  * be made before any exec_args_add_env() calls.  exec_args_adjust_args()
  * may be called any time after exec_args_add_fname().
  *
  * exec_args_add_fname() - install path to be executed
  * exec_args_add_arg() - append an argument string
  * exec_args_add_env() - append an env string
  * exec_args_adjust_args() - adjust location of the argument list to
  *                           allow new arguments to be prepended
  */
 int
 exec_args_add_fname(struct image_args *args, const char *fname,
     enum uio_seg segflg)
 {
 	int error;
 	size_t length;
 
 	KASSERT(args->fname == NULL, ("fname already appended"));
 	KASSERT(args->endp == NULL, ("already appending to args"));
 
 	if (fname != NULL) {
 		args->fname = args->buf;
 		error = segflg == UIO_SYSSPACE ?
 		    copystr(fname, args->fname, PATH_MAX, &length) :
 		    copyinstr(fname, args->fname, PATH_MAX, &length);
 		if (error != 0)
 			return (error == ENAMETOOLONG ? E2BIG : error);
 	} else
 		length = 0;
 
 	/* Set up for _arg_*()/_env_*() */
 	args->endp = args->buf + length;
 	/* begin_argv must be set and kept updated */
 	args->begin_argv = args->endp;
 	KASSERT(exec_map_entry_size - length >= ARG_MAX,
 	    ("too little space remaining for arguments %zu < %zu",
 	    exec_map_entry_size - length, (size_t)ARG_MAX));
 	args->stringspace = ARG_MAX;
 
 	return (0);
 }
 
 static int
 exec_args_add_str(struct image_args *args, const char *str,
     enum uio_seg segflg, int *countp)
 {
 	int error;
 	size_t length;
 
 	KASSERT(args->endp != NULL, ("endp not initialized"));
 	KASSERT(args->begin_argv != NULL, ("begin_argp not initialized"));
 
 	error = (segflg == UIO_SYSSPACE) ?
 	    copystr(str, args->endp, args->stringspace, &length) :
 	    copyinstr(str, args->endp, args->stringspace, &length);
 	if (error != 0)
 		return (error == ENAMETOOLONG ? E2BIG : error);
 	args->stringspace -= length;
 	args->endp += length;
 	(*countp)++;
 
 	return (0);
 }
 
 int
 exec_args_add_arg(struct image_args *args, const char *argp,
     enum uio_seg segflg)
 {
 
 	KASSERT(args->envc == 0, ("appending args after env"));
 
 	return (exec_args_add_str(args, argp, segflg, &args->argc));
 }
 
 int
 exec_args_add_env(struct image_args *args, const char *envp,
     enum uio_seg segflg)
 {
 
 	if (args->envc == 0)
 		args->begin_envv = args->endp;
 
 	return (exec_args_add_str(args, envp, segflg, &args->envc));
 }
 
 int
 exec_args_adjust_args(struct image_args *args, size_t consume, ssize_t extend)
 {
 	ssize_t offset;
 
 	KASSERT(args->endp != NULL, ("endp not initialized"));
 	KASSERT(args->begin_argv != NULL, ("begin_argp not initialized"));
 
 	offset = extend - consume;
 	if (args->stringspace < offset)
 		return (E2BIG);
 	memmove(args->begin_argv + extend, args->begin_argv + consume,
 	    args->endp - args->begin_argv + consume);
 	if (args->envc > 0)
 		args->begin_envv += offset;
 	args->endp += offset;
 	args->stringspace -= offset;
 	return (0);
 }
 
 char *
 exec_args_get_begin_envv(struct image_args *args)
 {
 
 	KASSERT(args->endp != NULL, ("endp not initialized"));
 
 	if (args->envc > 0)
 		return (args->begin_envv);
 	return (args->endp);
 }
 
 /*
  * Copy strings out to the new process address space, constructing new arg
  * and env vector tables. Return a pointer to the base so that it can be used
  * as the initial stack pointer.
  */
 register_t *
 exec_copyout_strings(struct image_params *imgp)
 {
 	int argc, envc;
 	char **vectp;
 	char *stringp;
 	uintptr_t destp;
 	register_t *stack_base;
 	struct ps_strings *arginfo;
 	struct proc *p;
 	size_t execpath_len;
 	int szsigcode, szps;
 	char canary[sizeof(long) * 8];
 
 	szps = sizeof(pagesizes[0]) * MAXPAGESIZES;
 	/*
 	 * Calculate string base and vector table pointers.
 	 * Also deal with signal trampoline code for this exec type.
 	 */
 	if (imgp->execpath != NULL && imgp->auxargs != NULL)
 		execpath_len = strlen(imgp->execpath) + 1;
 	else
 		execpath_len = 0;
 	p = imgp->proc;
 	szsigcode = 0;
 	arginfo = (struct ps_strings *)p->p_sysent->sv_psstrings;
 	if (p->p_sysent->sv_sigcode_base == 0) {
 		if (p->p_sysent->sv_szsigcode != NULL)
 			szsigcode = *(p->p_sysent->sv_szsigcode);
 	}
 	destp =	(uintptr_t)arginfo;
 
 	/*
 	 * install sigcode
 	 */
 	if (szsigcode != 0) {
 		destp -= szsigcode;
 		destp = rounddown2(destp, sizeof(void *));
 		copyout(p->p_sysent->sv_sigcode, (void *)destp, szsigcode);
 	}
 
 	/*
 	 * Copy the image path for the rtld.
 	 */
 	if (execpath_len != 0) {
 		destp -= execpath_len;
 		destp = rounddown2(destp, sizeof(void *));
 		imgp->execpathp = destp;
 		copyout(imgp->execpath, (void *)destp, execpath_len);
 	}
 
 	/*
 	 * Prepare the canary for SSP.
 	 */
 	arc4rand(canary, sizeof(canary), 0);
 	destp -= sizeof(canary);
 	imgp->canary = destp;
 	copyout(canary, (void *)destp, sizeof(canary));
 	imgp->canarylen = sizeof(canary);
 
 	/*
 	 * Prepare the pagesizes array.
 	 */
 	destp -= szps;
 	destp = rounddown2(destp, sizeof(void *));
 	imgp->pagesizes = destp;
 	copyout(pagesizes, (void *)destp, szps);
 	imgp->pagesizeslen = szps;
 
 	destp -= ARG_MAX - imgp->args->stringspace;
 	destp = rounddown2(destp, sizeof(void *));
 
 	vectp = (char **)destp;
 	if (imgp->auxargs) {
 		/*
 		 * Allocate room on the stack for the ELF auxargs
 		 * array.  It has up to AT_COUNT entries.
 		 */
 		vectp -= howmany(AT_COUNT * sizeof(Elf_Auxinfo),
 		    sizeof(*vectp));
 	}
 
 	/*
 	 * Allocate room for the argv[] and env vectors including the
 	 * terminating NULL pointers.
 	 */
 	vectp -= imgp->args->argc + 1 + imgp->args->envc + 1;
 
 	/*
 	 * vectp also becomes our initial stack base
 	 */
 	stack_base = (register_t *)vectp;
 
 	stringp = imgp->args->begin_argv;
 	argc = imgp->args->argc;
 	envc = imgp->args->envc;
 
 	/*
 	 * Copy out strings - arguments and environment.
 	 */
 	copyout(stringp, (void *)destp, ARG_MAX - imgp->args->stringspace);
 
 	/*
 	 * Fill in "ps_strings" struct for ps, w, etc.
 	 */
 	suword(&arginfo->ps_argvstr, (long)(intptr_t)vectp);
 	suword32(&arginfo->ps_nargvstr, argc);
 
 	/*
 	 * Fill in argument portion of vector table.
 	 */
 	for (; argc > 0; --argc) {
 		suword(vectp++, (long)(intptr_t)destp);
 		while (*stringp++ != 0)
 			destp++;
 		destp++;
 	}
 
 	/* a null vector table pointer separates the argp's from the envp's */
 	suword(vectp++, 0);
 
 	suword(&arginfo->ps_envstr, (long)(intptr_t)vectp);
 	suword32(&arginfo->ps_nenvstr, envc);
 
 	/*
 	 * Fill in environment portion of vector table.
 	 */
 	for (; envc > 0; --envc) {
 		suword(vectp++, (long)(intptr_t)destp);
 		while (*stringp++ != 0)
 			destp++;
 		destp++;
 	}
 
 	/* end of vector table is a null pointer */
 	suword(vectp, 0);
 
 	return (stack_base);
 }
 
 /*
  * Check permissions of file to execute.
  *	Called with imgp->vp locked.
  *	Return 0 for success or error code on failure.
  */
 int
 exec_check_permissions(struct image_params *imgp)
 {
 	struct vnode *vp = imgp->vp;
 	struct vattr *attr = imgp->attr;
 	struct thread *td;
 	int error, writecount;
 
 	td = curthread;
 
 	/* Get file attributes */
 	error = VOP_GETATTR(vp, attr, td->td_ucred);
 	if (error)
 		return (error);
 
 #ifdef MAC
 	error = mac_vnode_check_exec(td->td_ucred, imgp->vp, imgp);
 	if (error)
 		return (error);
 #endif
 
 	/*
 	 * 1) Check if file execution is disabled for the filesystem that
 	 *    this file resides on.
 	 * 2) Ensure that at least one execute bit is on. Otherwise, a
 	 *    privileged user will always succeed, and we don't want this
 	 *    to happen unless the file really is executable.
 	 * 3) Ensure that the file is a regular file.
 	 */
 	if ((vp->v_mount->mnt_flag & MNT_NOEXEC) ||
 	    (attr->va_mode & (S_IXUSR | S_IXGRP | S_IXOTH)) == 0 ||
 	    (attr->va_type != VREG))
 		return (EACCES);
 
 	/*
 	 * Zero length files can't be exec'd
 	 */
 	if (attr->va_size == 0)
 		return (ENOEXEC);
 
 	/*
 	 *  Check for execute permission to file based on current credentials.
 	 */
 	error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td);
 	if (error)
 		return (error);
 
 	/*
 	 * Check number of open-for-writes on the file and deny execution
 	 * if there are any.
 	 */
 	error = VOP_GET_WRITECOUNT(vp, &writecount);
 	if (error != 0)
 		return (error);
 	if (writecount != 0)
 		return (ETXTBSY);
 
 	/*
 	 * Call filesystem specific open routine (which does nothing in the
 	 * general case).
 	 */
 	error = VOP_OPEN(vp, FREAD, td->td_ucred, td, NULL);
 	if (error == 0)
 		imgp->opened = 1;
 	return (error);
 }
 
 /*
  * Exec handler registration
  */
 int
 exec_register(const struct execsw *execsw_arg)
 {
 	const struct execsw **es, **xs, **newexecsw;
 	u_int count = 2;	/* New slot and trailing NULL */
 
 	if (execsw)
 		for (es = execsw; *es; es++)
 			count++;
 	newexecsw = malloc(count * sizeof(*es), M_TEMP, M_WAITOK);
 	xs = newexecsw;
 	if (execsw)
 		for (es = execsw; *es; es++)
 			*xs++ = *es;
 	*xs++ = execsw_arg;
 	*xs = NULL;
 	if (execsw)
 		free(execsw, M_TEMP);
 	execsw = newexecsw;
 	return (0);
 }
 
 int
 exec_unregister(const struct execsw *execsw_arg)
 {
 	const struct execsw **es, **xs, **newexecsw;
 	int count = 1;
 
 	if (execsw == NULL)
 		panic("unregister with no handlers left?\n");
 
 	for (es = execsw; *es; es++) {
 		if (*es == execsw_arg)
 			break;
 	}
 	if (*es == NULL)
 		return (ENOENT);
 	for (es = execsw; *es; es++)
 		if (*es != execsw_arg)
 			count++;
 	newexecsw = malloc(count * sizeof(*es), M_TEMP, M_WAITOK);
 	xs = newexecsw;
 	for (es = execsw; *es; es++)
 		if (*es != execsw_arg)
 			*xs++ = *es;
 	*xs = NULL;
 	if (execsw)
 		free(execsw, M_TEMP);
 	execsw = newexecsw;
 	return (0);
 }
Index: head/sys/kern/kern_fork.c
===================================================================
--- head/sys/kern/kern_fork.c	(revision 343963)
+++ head/sys/kern/kern_fork.c	(revision 343964)
@@ -1,1122 +1,1123 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1982, 1986, 1989, 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  * (c) UNIX System Laboratories, Inc.
  * All or some portions of this file are derived from material licensed
  * to the University of California by American Telephone and Telegraph
  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  * the permission of UNIX System Laboratories, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)kern_fork.c	8.6 (Berkeley) 4/8/94
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_ktrace.h"
 #include "opt_kstack_pages.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bitstring.h>
 #include <sys/sysproto.h>
 #include <sys/eventhandler.h>
 #include <sys/fcntl.h>
 #include <sys/filedesc.h>
 #include <sys/jail.h>
 #include <sys/kernel.h>
 #include <sys/kthread.h>
 #include <sys/sysctl.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/procdesc.h>
 #include <sys/pioctl.h>
 #include <sys/ptrace.h>
 #include <sys/racct.h>
 #include <sys/resourcevar.h>
 #include <sys/sched.h>
 #include <sys/syscall.h>
 #include <sys/vmmeter.h>
 #include <sys/vnode.h>
 #include <sys/acct.h>
 #include <sys/ktr.h>
 #include <sys/ktrace.h>
 #include <sys/unistd.h>
 #include <sys/sdt.h>
 #include <sys/sx.h>
 #include <sys/sysent.h>
 #include <sys/signalvar.h>
 
 #include <security/audit/audit.h>
 #include <security/mac/mac_framework.h>
 
 #include <vm/vm.h>
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 #include <vm/vm_extern.h>
 #include <vm/uma.h>
 
 #ifdef KDTRACE_HOOKS
 #include <sys/dtrace_bsd.h>
 dtrace_fork_func_t	dtrace_fasttrap_fork;
 #endif
 
 SDT_PROVIDER_DECLARE(proc);
 SDT_PROBE_DEFINE3(proc, , , create, "struct proc *", "struct proc *", "int");
 
 #ifndef _SYS_SYSPROTO_H_
 struct fork_args {
 	int     dummy;
 };
 #endif
 
 EVENTHANDLER_LIST_DECLARE(process_fork);
 
 /* ARGSUSED */
 int
 sys_fork(struct thread *td, struct fork_args *uap)
 {
 	struct fork_req fr;
 	int error, pid;
 
 	bzero(&fr, sizeof(fr));
 	fr.fr_flags = RFFDG | RFPROC;
 	fr.fr_pidp = &pid;
 	error = fork1(td, &fr);
 	if (error == 0) {
 		td->td_retval[0] = pid;
 		td->td_retval[1] = 0;
 	}
 	return (error);
 }
 
 /* ARGUSED */
 int
 sys_pdfork(struct thread *td, struct pdfork_args *uap)
 {
 	struct fork_req fr;
 	int error, fd, pid;
 
 	bzero(&fr, sizeof(fr));
 	fr.fr_flags = RFFDG | RFPROC | RFPROCDESC;
 	fr.fr_pidp = &pid;
 	fr.fr_pd_fd = &fd;
 	fr.fr_pd_flags = uap->flags;
 	/*
 	 * It is necessary to return fd by reference because 0 is a valid file
 	 * descriptor number, and the child needs to be able to distinguish
 	 * itself from the parent using the return value.
 	 */
 	error = fork1(td, &fr);
 	if (error == 0) {
 		td->td_retval[0] = pid;
 		td->td_retval[1] = 0;
 		error = copyout(&fd, uap->fdp, sizeof(fd));
 	}
 	return (error);
 }
 
 /* ARGSUSED */
 int
 sys_vfork(struct thread *td, struct vfork_args *uap)
 {
 	struct fork_req fr;
 	int error, pid;
 
 	bzero(&fr, sizeof(fr));
 	fr.fr_flags = RFFDG | RFPROC | RFPPWAIT | RFMEM;
 	fr.fr_pidp = &pid;
 	error = fork1(td, &fr);
 	if (error == 0) {
 		td->td_retval[0] = pid;
 		td->td_retval[1] = 0;
 	}
 	return (error);
 }
 
 int
 sys_rfork(struct thread *td, struct rfork_args *uap)
 {
 	struct fork_req fr;
 	int error, pid;
 
 	/* Don't allow kernel-only flags. */
 	if ((uap->flags & RFKERNELONLY) != 0)
 		return (EINVAL);
 
 	AUDIT_ARG_FFLAGS(uap->flags);
 	bzero(&fr, sizeof(fr));
 	fr.fr_flags = uap->flags;
 	fr.fr_pidp = &pid;
 	error = fork1(td, &fr);
 	if (error == 0) {
 		td->td_retval[0] = pid;
 		td->td_retval[1] = 0;
 	}
 	return (error);
 }
 
 int	nprocs = 1;		/* process 0 */
 int	lastpid = 0;
 SYSCTL_INT(_kern, OID_AUTO, lastpid, CTLFLAG_RD, &lastpid, 0,
     "Last used PID");
 
 /*
  * Random component to lastpid generation.  We mix in a random factor to make
  * it a little harder to predict.  We sanity check the modulus value to avoid
  * doing it in critical paths.  Don't let it be too small or we pointlessly
  * waste randomness entropy, and don't let it be impossibly large.  Using a
  * modulus that is too big causes a LOT more process table scans and slows
  * down fork processing as the pidchecked caching is defeated.
  */
 static int randompid = 0;
 
 static int
 sysctl_kern_randompid(SYSCTL_HANDLER_ARGS)
 {
 	int error, pid;
 
 	error = sysctl_wire_old_buffer(req, sizeof(int));
 	if (error != 0)
 		return(error);
 	sx_xlock(&allproc_lock);
 	pid = randompid;
 	error = sysctl_handle_int(oidp, &pid, 0, req);
 	if (error == 0 && req->newptr != NULL) {
 		if (pid == 0)
 			randompid = 0;
 		else if (pid == 1)
 			/* generate a random PID modulus between 100 and 1123 */
 			randompid = 100 + arc4random() % 1024;
 		else if (pid < 0 || pid > pid_max - 100)
 			/* out of range */
 			randompid = pid_max - 100;
 		else if (pid < 100)
 			/* Make it reasonable */
 			randompid = 100;
 		else
 			randompid = pid;
 	}
 	sx_xunlock(&allproc_lock);
 	return (error);
 }
 
 SYSCTL_PROC(_kern, OID_AUTO, randompid, CTLTYPE_INT|CTLFLAG_RW,
     0, 0, sysctl_kern_randompid, "I", "Random PID modulus. Special values: 0: disable, 1: choose random value");
 
 extern bitstr_t proc_id_pidmap;
 extern bitstr_t proc_id_grpidmap;
 extern bitstr_t proc_id_sessidmap;
 extern bitstr_t proc_id_reapmap;
 
 /*
  * Find an unused process ID
  *
  * If RFHIGHPID is set (used during system boot), do not allocate
  * low-numbered pids.
  */
 static int
 fork_findpid(int flags)
 {
 	pid_t result;
 	int trypid;
 
 	trypid = lastpid + 1;
 	if (flags & RFHIGHPID) {
 		if (trypid < 10)
 			trypid = 10;
 	} else {
 		if (randompid)
 			trypid += arc4random() % randompid;
 	}
 	mtx_lock(&procid_lock);
 retry:
 	/*
 	 * If the process ID prototype has wrapped around,
 	 * restart somewhat above 0, as the low-numbered procs
 	 * tend to include daemons that don't exit.
 	 */
 	if (trypid >= pid_max) {
 		trypid = trypid % pid_max;
 		if (trypid < 100)
 			trypid += 100;
 	}
 
 	bit_ffc_at(&proc_id_pidmap, trypid, pid_max, &result);
 	if (result == -1) {
 		trypid = 100;
 		goto retry;
 	}
 	if (bit_test(&proc_id_grpidmap, result) ||
 	    bit_test(&proc_id_sessidmap, result) ||
 	    bit_test(&proc_id_reapmap, result)) {
 		trypid = result + 1;
 		goto retry;
 	}
 
 	/*
 	 * RFHIGHPID does not mess with the lastpid counter during boot.
 	 */
 	if ((flags & RFHIGHPID) == 0)
 		lastpid = result;
 
 	bit_set(&proc_id_pidmap, result);
 	mtx_unlock(&procid_lock);
 
 	return (result);
 }
 
 static int
 fork_norfproc(struct thread *td, int flags)
 {
 	int error;
 	struct proc *p1;
 
 	KASSERT((flags & RFPROC) == 0,
 	    ("fork_norfproc called with RFPROC set"));
 	p1 = td->td_proc;
 
 	if (((p1->p_flag & (P_HADTHREADS|P_SYSTEM)) == P_HADTHREADS) &&
 	    (flags & (RFCFDG | RFFDG))) {
 		PROC_LOCK(p1);
 		if (thread_single(p1, SINGLE_BOUNDARY)) {
 			PROC_UNLOCK(p1);
 			return (ERESTART);
 		}
 		PROC_UNLOCK(p1);
 	}
 
 	error = vm_forkproc(td, NULL, NULL, NULL, flags);
 	if (error)
 		goto fail;
 
 	/*
 	 * Close all file descriptors.
 	 */
 	if (flags & RFCFDG) {
 		struct filedesc *fdtmp;
 		fdtmp = fdinit(td->td_proc->p_fd, false);
 		fdescfree(td);
 		p1->p_fd = fdtmp;
 	}
 
 	/*
 	 * Unshare file descriptors (from parent).
 	 */
 	if (flags & RFFDG)
 		fdunshare(td);
 
 fail:
 	if (((p1->p_flag & (P_HADTHREADS|P_SYSTEM)) == P_HADTHREADS) &&
 	    (flags & (RFCFDG | RFFDG))) {
 		PROC_LOCK(p1);
 		thread_single_end(p1, SINGLE_BOUNDARY);
 		PROC_UNLOCK(p1);
 	}
 	return (error);
 }
 
 static void
 do_fork(struct thread *td, struct fork_req *fr, struct proc *p2, struct thread *td2,
     struct vmspace *vm2, struct file *fp_procdesc)
 {
 	struct proc *p1, *pptr;
 	int trypid;
 	struct filedesc *fd;
 	struct filedesc_to_leader *fdtol;
 	struct sigacts *newsigacts;
 
 	sx_assert(&allproc_lock, SX_XLOCKED);
 
 	p1 = td->td_proc;
 
 	trypid = fork_findpid(fr->fr_flags);
 	p2->p_state = PRS_NEW;		/* protect against others */
 	p2->p_pid = trypid;
 	AUDIT_ARG_PID(p2->p_pid);
 	LIST_INSERT_HEAD(&allproc, p2, p_list);
 	allproc_gen++;
 	sx_xlock(PIDHASHLOCK(p2->p_pid));
 	LIST_INSERT_HEAD(PIDHASH(p2->p_pid), p2, p_hash);
 	sx_xunlock(PIDHASHLOCK(p2->p_pid));
 	PROC_LOCK(p2);
 	PROC_LOCK(p1);
 
 	sx_xunlock(&allproc_lock);
 
 	bcopy(&p1->p_startcopy, &p2->p_startcopy,
 	    __rangeof(struct proc, p_startcopy, p_endcopy));
 	pargs_hold(p2->p_args);
 
 	PROC_UNLOCK(p1);
 
 	bzero(&p2->p_startzero,
 	    __rangeof(struct proc, p_startzero, p_endzero));
 
 	/* Tell the prison that we exist. */
 	prison_proc_hold(p2->p_ucred->cr_prison);
 
 	PROC_UNLOCK(p2);
 
 	tidhash_add(td2);
 
 	/*
 	 * Malloc things while we don't hold any locks.
 	 */
 	if (fr->fr_flags & RFSIGSHARE)
 		newsigacts = NULL;
 	else
 		newsigacts = sigacts_alloc();
 
 	/*
 	 * Copy filedesc.
 	 */
 	if (fr->fr_flags & RFCFDG) {
 		fd = fdinit(p1->p_fd, false);
 		fdtol = NULL;
 	} else if (fr->fr_flags & RFFDG) {
 		fd = fdcopy(p1->p_fd);
 		fdtol = NULL;
 	} else {
 		fd = fdshare(p1->p_fd);
 		if (p1->p_fdtol == NULL)
 			p1->p_fdtol = filedesc_to_leader_alloc(NULL, NULL,
 			    p1->p_leader);
 		if ((fr->fr_flags & RFTHREAD) != 0) {
 			/*
 			 * Shared file descriptor table, and shared
 			 * process leaders.
 			 */
 			fdtol = p1->p_fdtol;
 			FILEDESC_XLOCK(p1->p_fd);
 			fdtol->fdl_refcount++;
 			FILEDESC_XUNLOCK(p1->p_fd);
 		} else {
 			/*
 			 * Shared file descriptor table, and different
 			 * process leaders.
 			 */
 			fdtol = filedesc_to_leader_alloc(p1->p_fdtol,
 			    p1->p_fd, p2);
 		}
 	}
 	/*
 	 * Make a proc table entry for the new process.
 	 * Start by zeroing the section of proc that is zero-initialized,
 	 * then copy the section that is copied directly from the parent.
 	 */
 
 	PROC_LOCK(p2);
 	PROC_LOCK(p1);
 
 	bzero(&td2->td_startzero,
 	    __rangeof(struct thread, td_startzero, td_endzero));
 
 	bcopy(&td->td_startcopy, &td2->td_startcopy,
 	    __rangeof(struct thread, td_startcopy, td_endcopy));
 
 	bcopy(&p2->p_comm, &td2->td_name, sizeof(td2->td_name));
 	td2->td_sigstk = td->td_sigstk;
 	td2->td_flags = TDF_INMEM;
 	td2->td_lend_user_pri = PRI_MAX;
 
 #ifdef VIMAGE
 	td2->td_vnet = NULL;
 	td2->td_vnet_lpush = NULL;
 #endif
 
 	/*
 	 * Allow the scheduler to initialize the child.
 	 */
 	thread_lock(td);
 	sched_fork(td, td2);
 	thread_unlock(td);
 
 	/*
 	 * Duplicate sub-structures as needed.
 	 * Increase reference counts on shared objects.
 	 */
 	p2->p_flag = P_INMEM;
-	p2->p_flag2 = p1->p_flag2 & (P2_NOTRACE | P2_NOTRACE_EXEC | P2_TRAPCAP);
+	p2->p_flag2 = p1->p_flag2 & (P2_ASLR_DISABLE | P2_ASLR_ENABLE |
+	    P2_ASLR_IGNSTART | P2_NOTRACE | P2_NOTRACE_EXEC | P2_TRAPCAP);
 	p2->p_swtick = ticks;
 	if (p1->p_flag & P_PROFIL)
 		startprofclock(p2);
 
 	if (fr->fr_flags & RFSIGSHARE) {
 		p2->p_sigacts = sigacts_hold(p1->p_sigacts);
 	} else {
 		sigacts_copy(newsigacts, p1->p_sigacts);
 		p2->p_sigacts = newsigacts;
 	}
 
 	if (fr->fr_flags & RFTSIGZMB)
 	        p2->p_sigparent = RFTSIGNUM(fr->fr_flags);
 	else if (fr->fr_flags & RFLINUXTHPN)
 	        p2->p_sigparent = SIGUSR1;
 	else
 	        p2->p_sigparent = SIGCHLD;
 
 	p2->p_textvp = p1->p_textvp;
 	p2->p_fd = fd;
 	p2->p_fdtol = fdtol;
 
 	if (p1->p_flag2 & P2_INHERIT_PROTECTED) {
 		p2->p_flag |= P_PROTECTED;
 		p2->p_flag2 |= P2_INHERIT_PROTECTED;
 	}
 
 	/*
 	 * p_limit is copy-on-write.  Bump its refcount.
 	 */
 	lim_fork(p1, p2);
 
 	thread_cow_get_proc(td2, p2);
 
 	pstats_fork(p1->p_stats, p2->p_stats);
 
 	PROC_UNLOCK(p1);
 	PROC_UNLOCK(p2);
 
 	/* Bump references to the text vnode (for procfs). */
 	if (p2->p_textvp)
 		vrefact(p2->p_textvp);
 
 	/*
 	 * Set up linkage for kernel based threading.
 	 */
 	if ((fr->fr_flags & RFTHREAD) != 0) {
 		mtx_lock(&ppeers_lock);
 		p2->p_peers = p1->p_peers;
 		p1->p_peers = p2;
 		p2->p_leader = p1->p_leader;
 		mtx_unlock(&ppeers_lock);
 		PROC_LOCK(p1->p_leader);
 		if ((p1->p_leader->p_flag & P_WEXIT) != 0) {
 			PROC_UNLOCK(p1->p_leader);
 			/*
 			 * The task leader is exiting, so process p1 is
 			 * going to be killed shortly.  Since p1 obviously
 			 * isn't dead yet, we know that the leader is either
 			 * sending SIGKILL's to all the processes in this
 			 * task or is sleeping waiting for all the peers to
 			 * exit.  We let p1 complete the fork, but we need
 			 * to go ahead and kill the new process p2 since
 			 * the task leader may not get a chance to send
 			 * SIGKILL to it.  We leave it on the list so that
 			 * the task leader will wait for this new process
 			 * to commit suicide.
 			 */
 			PROC_LOCK(p2);
 			kern_psignal(p2, SIGKILL);
 			PROC_UNLOCK(p2);
 		} else
 			PROC_UNLOCK(p1->p_leader);
 	} else {
 		p2->p_peers = NULL;
 		p2->p_leader = p2;
 	}
 
 	sx_xlock(&proctree_lock);
 	PGRP_LOCK(p1->p_pgrp);
 	PROC_LOCK(p2);
 	PROC_LOCK(p1);
 
 	/*
 	 * Preserve some more flags in subprocess.  P_PROFIL has already
 	 * been preserved.
 	 */
 	p2->p_flag |= p1->p_flag & P_SUGID;
 	td2->td_pflags |= (td->td_pflags & TDP_ALTSTACK) | TDP_FORKING;
 	SESS_LOCK(p1->p_session);
 	if (p1->p_session->s_ttyvp != NULL && p1->p_flag & P_CONTROLT)
 		p2->p_flag |= P_CONTROLT;
 	SESS_UNLOCK(p1->p_session);
 	if (fr->fr_flags & RFPPWAIT)
 		p2->p_flag |= P_PPWAIT;
 
 	p2->p_pgrp = p1->p_pgrp;
 	LIST_INSERT_AFTER(p1, p2, p_pglist);
 	PGRP_UNLOCK(p1->p_pgrp);
 	LIST_INIT(&p2->p_children);
 	LIST_INIT(&p2->p_orphans);
 
 	callout_init_mtx(&p2->p_itcallout, &p2->p_mtx, 0);
 
 	/*
 	 * If PF_FORK is set, the child process inherits the
 	 * procfs ioctl flags from its parent.
 	 */
 	if (p1->p_pfsflags & PF_FORK) {
 		p2->p_stops = p1->p_stops;
 		p2->p_pfsflags = p1->p_pfsflags;
 	}
 
 	/*
 	 * This begins the section where we must prevent the parent
 	 * from being swapped.
 	 */
 	_PHOLD(p1);
 	PROC_UNLOCK(p1);
 
 	/*
 	 * Attach the new process to its parent.
 	 *
 	 * If RFNOWAIT is set, the newly created process becomes a child
 	 * of init.  This effectively disassociates the child from the
 	 * parent.
 	 */
 	if ((fr->fr_flags & RFNOWAIT) != 0) {
 		pptr = p1->p_reaper;
 		p2->p_reaper = pptr;
 	} else {
 		p2->p_reaper = (p1->p_treeflag & P_TREE_REAPER) != 0 ?
 		    p1 : p1->p_reaper;
 		pptr = p1;
 	}
 	p2->p_pptr = pptr;
 	p2->p_oppid = pptr->p_pid;
 	LIST_INSERT_HEAD(&pptr->p_children, p2, p_sibling);
 	LIST_INIT(&p2->p_reaplist);
 	LIST_INSERT_HEAD(&p2->p_reaper->p_reaplist, p2, p_reapsibling);
 	if (p2->p_reaper == p1 && p1 != initproc) {
 		p2->p_reapsubtree = p2->p_pid;
 		proc_id_set_cond(PROC_ID_REAP, p2->p_pid);
 	}
 	sx_xunlock(&proctree_lock);
 
 	/* Inform accounting that we have forked. */
 	p2->p_acflag = AFORK;
 	PROC_UNLOCK(p2);
 
 #ifdef KTRACE
 	ktrprocfork(p1, p2);
 #endif
 
 	/*
 	 * Finish creating the child process.  It will return via a different
 	 * execution path later.  (ie: directly into user mode)
 	 */
 	vm_forkproc(td, p2, td2, vm2, fr->fr_flags);
 
 	if (fr->fr_flags == (RFFDG | RFPROC)) {
 		VM_CNT_INC(v_forks);
 		VM_CNT_ADD(v_forkpages, p2->p_vmspace->vm_dsize +
 		    p2->p_vmspace->vm_ssize);
 	} else if (fr->fr_flags == (RFFDG | RFPROC | RFPPWAIT | RFMEM)) {
 		VM_CNT_INC(v_vforks);
 		VM_CNT_ADD(v_vforkpages, p2->p_vmspace->vm_dsize +
 		    p2->p_vmspace->vm_ssize);
 	} else if (p1 == &proc0) {
 		VM_CNT_INC(v_kthreads);
 		VM_CNT_ADD(v_kthreadpages, p2->p_vmspace->vm_dsize +
 		    p2->p_vmspace->vm_ssize);
 	} else {
 		VM_CNT_INC(v_rforks);
 		VM_CNT_ADD(v_rforkpages, p2->p_vmspace->vm_dsize +
 		    p2->p_vmspace->vm_ssize);
 	}
 
 	/*
 	 * Associate the process descriptor with the process before anything
 	 * can happen that might cause that process to need the descriptor.
 	 * However, don't do this until after fork(2) can no longer fail.
 	 */
 	if (fr->fr_flags & RFPROCDESC)
 		procdesc_new(p2, fr->fr_pd_flags);
 
 	/*
 	 * Both processes are set up, now check if any loadable modules want
 	 * to adjust anything.
 	 */
 	EVENTHANDLER_DIRECT_INVOKE(process_fork, p1, p2, fr->fr_flags);
 
 	/*
 	 * Set the child start time and mark the process as being complete.
 	 */
 	PROC_LOCK(p2);
 	PROC_LOCK(p1);
 	microuptime(&p2->p_stats->p_start);
 	PROC_SLOCK(p2);
 	p2->p_state = PRS_NORMAL;
 	PROC_SUNLOCK(p2);
 
 #ifdef KDTRACE_HOOKS
 	/*
 	 * Tell the DTrace fasttrap provider about the new process so that any
 	 * tracepoints inherited from the parent can be removed. We have to do
 	 * this only after p_state is PRS_NORMAL since the fasttrap module will
 	 * use pfind() later on.
 	 */
 	if ((fr->fr_flags & RFMEM) == 0 && dtrace_fasttrap_fork)
 		dtrace_fasttrap_fork(p1, p2);
 #endif
 	if (fr->fr_flags & RFPPWAIT) {
 		td->td_pflags |= TDP_RFPPWAIT;
 		td->td_rfppwait_p = p2;
 		td->td_dbgflags |= TDB_VFORK;
 	}
 	PROC_UNLOCK(p2);
 
 	/*
 	 * Tell any interested parties about the new process.
 	 */
 	knote_fork(p1->p_klist, p2->p_pid);
 
 	/*
 	 * Now can be swapped.
 	 */
 	_PRELE(p1);
 	PROC_UNLOCK(p1);
 	SDT_PROBE3(proc, , , create, p2, p1, fr->fr_flags);
 
 	if (fr->fr_flags & RFPROCDESC) {
 		procdesc_finit(p2->p_procdesc, fp_procdesc);
 		fdrop(fp_procdesc, td);
 	}
 	
 	/*
 	 * Speculative check for PTRACE_FORK. PTRACE_FORK is not
 	 * synced with forks in progress so it is OK if we miss it
 	 * if being set atm.
 	 */
 	if ((p1->p_ptevents & PTRACE_FORK) != 0) {
 		sx_xlock(&proctree_lock);
 		PROC_LOCK(p2);
 		
 		/*
 		 * p1->p_ptevents & p1->p_pptr are protected by both
 		 * process and proctree locks for modifications,
 		 * so owning proctree_lock allows the race-free read.
 		 */
 		if ((p1->p_ptevents & PTRACE_FORK) != 0) {
 			/*
 			 * Arrange for debugger to receive the fork event.
 			 *
 			 * We can report PL_FLAG_FORKED regardless of
 			 * P_FOLLOWFORK settings, but it does not make a sense
 			 * for runaway child.
 			 */
 			td->td_dbgflags |= TDB_FORK;
 			td->td_dbg_forked = p2->p_pid;
 			td2->td_dbgflags |= TDB_STOPATFORK;
 			proc_set_traced(p2, true);
 			CTR2(KTR_PTRACE,
 			    "do_fork: attaching to new child pid %d: oppid %d",
 			    p2->p_pid, p2->p_oppid);
 			proc_reparent(p2, p1->p_pptr, false);
 		}
 		PROC_UNLOCK(p2);
 		sx_xunlock(&proctree_lock);
 	}
 
 	racct_proc_fork_done(p2);
 
 	if ((fr->fr_flags & RFSTOPPED) == 0) {
 		if (fr->fr_pidp != NULL)
 			*fr->fr_pidp = p2->p_pid;
 		/*
 		 * If RFSTOPPED not requested, make child runnable and
 		 * add to run queue.
 		 */
 		thread_lock(td2);
 		TD_SET_CAN_RUN(td2);
 		sched_add(td2, SRQ_BORING);
 		thread_unlock(td2);
 	} else {
 		*fr->fr_procp = p2;
 	}
 }
 
 void
 fork_rfppwait(struct thread *td)
 {
 	struct proc *p, *p2;
 
 	MPASS(td->td_pflags & TDP_RFPPWAIT);
 
 	p = td->td_proc;
 	/*
 	 * Preserve synchronization semantics of vfork.  If
 	 * waiting for child to exec or exit, fork set
 	 * P_PPWAIT on child, and there we sleep on our proc
 	 * (in case of exit).
 	 *
 	 * Do it after the ptracestop() above is finished, to
 	 * not block our debugger until child execs or exits
 	 * to finish vfork wait.
 	 */
 	td->td_pflags &= ~TDP_RFPPWAIT;
 	p2 = td->td_rfppwait_p;
 again:
 	PROC_LOCK(p2);
 	while (p2->p_flag & P_PPWAIT) {
 		PROC_LOCK(p);
 		if (thread_suspend_check_needed()) {
 			PROC_UNLOCK(p2);
 			thread_suspend_check(0);
 			PROC_UNLOCK(p);
 			goto again;
 		} else {
 			PROC_UNLOCK(p);
 		}
 		cv_timedwait(&p2->p_pwait, &p2->p_mtx, hz);
 	}
 	PROC_UNLOCK(p2);
 
 	if (td->td_dbgflags & TDB_VFORK) {
 		PROC_LOCK(p);
 		if (p->p_ptevents & PTRACE_VFORK)
 			ptracestop(td, SIGTRAP, NULL);
 		td->td_dbgflags &= ~TDB_VFORK;
 		PROC_UNLOCK(p);
 	}
 }
 
 int
 fork1(struct thread *td, struct fork_req *fr)
 {
 	struct proc *p1, *newproc;
 	struct thread *td2;
 	struct vmspace *vm2;
 	struct file *fp_procdesc;
 	vm_ooffset_t mem_charged;
 	int error, nprocs_new, ok;
 	static int curfail;
 	static struct timeval lastfail;
 	int flags, pages;
 
 	flags = fr->fr_flags;
 	pages = fr->fr_pages;
 
 	if ((flags & RFSTOPPED) != 0)
 		MPASS(fr->fr_procp != NULL && fr->fr_pidp == NULL);
 	else
 		MPASS(fr->fr_procp == NULL);
 
 	/* Check for the undefined or unimplemented flags. */
 	if ((flags & ~(RFFLAGS | RFTSIGFLAGS(RFTSIGMASK))) != 0)
 		return (EINVAL);
 
 	/* Signal value requires RFTSIGZMB. */
 	if ((flags & RFTSIGFLAGS(RFTSIGMASK)) != 0 && (flags & RFTSIGZMB) == 0)
 		return (EINVAL);
 
 	/* Can't copy and clear. */
 	if ((flags & (RFFDG|RFCFDG)) == (RFFDG|RFCFDG))
 		return (EINVAL);
 
 	/* Check the validity of the signal number. */
 	if ((flags & RFTSIGZMB) != 0 && (u_int)RFTSIGNUM(flags) > _SIG_MAXSIG)
 		return (EINVAL);
 
 	if ((flags & RFPROCDESC) != 0) {
 		/* Can't not create a process yet get a process descriptor. */
 		if ((flags & RFPROC) == 0)
 			return (EINVAL);
 
 		/* Must provide a place to put a procdesc if creating one. */
 		if (fr->fr_pd_fd == NULL)
 			return (EINVAL);
 
 		/* Check if we are using supported flags. */
 		if ((fr->fr_pd_flags & ~PD_ALLOWED_AT_FORK) != 0)
 			return (EINVAL);
 	}
 
 	p1 = td->td_proc;
 
 	/*
 	 * Here we don't create a new process, but we divorce
 	 * certain parts of a process from itself.
 	 */
 	if ((flags & RFPROC) == 0) {
 		if (fr->fr_procp != NULL)
 			*fr->fr_procp = NULL;
 		else if (fr->fr_pidp != NULL)
 			*fr->fr_pidp = 0;
 		return (fork_norfproc(td, flags));
 	}
 
 	fp_procdesc = NULL;
 	newproc = NULL;
 	vm2 = NULL;
 
 	/*
 	 * Increment the nprocs resource before allocations occur.
 	 * Although process entries are dynamically created, we still
 	 * keep a global limit on the maximum number we will
 	 * create. There are hard-limits as to the number of processes
 	 * that can run, established by the KVA and memory usage for
 	 * the process data.
 	 *
 	 * Don't allow a nonprivileged user to use the last ten
 	 * processes; don't let root exceed the limit.
 	 */
 	nprocs_new = atomic_fetchadd_int(&nprocs, 1) + 1;
 	if ((nprocs_new >= maxproc - 10 &&
 	    priv_check_cred(td->td_ucred, PRIV_MAXPROC) != 0) ||
 	    nprocs_new >= maxproc) {
 		error = EAGAIN;
 		sx_xlock(&allproc_lock);
 		if (ppsratecheck(&lastfail, &curfail, 1)) {
 			printf("maxproc limit exceeded by uid %u (pid %d); "
 			    "see tuning(7) and login.conf(5)\n",
 			    td->td_ucred->cr_ruid, p1->p_pid);
 		}
 		sx_xunlock(&allproc_lock);
 		goto fail2;
 	}
 
 	/*
 	 * If required, create a process descriptor in the parent first; we
 	 * will abandon it if something goes wrong. We don't finit() until
 	 * later.
 	 */
 	if (flags & RFPROCDESC) {
 		error = procdesc_falloc(td, &fp_procdesc, fr->fr_pd_fd,
 		    fr->fr_pd_flags, fr->fr_pd_fcaps);
 		if (error != 0)
 			goto fail2;
 	}
 
 	mem_charged = 0;
 	if (pages == 0)
 		pages = kstack_pages;
 	/* Allocate new proc. */
 	newproc = uma_zalloc(proc_zone, M_WAITOK);
 	td2 = FIRST_THREAD_IN_PROC(newproc);
 	if (td2 == NULL) {
 		td2 = thread_alloc(pages);
 		if (td2 == NULL) {
 			error = ENOMEM;
 			goto fail2;
 		}
 		proc_linkup(newproc, td2);
 	} else {
 		if (td2->td_kstack == 0 || td2->td_kstack_pages != pages) {
 			if (td2->td_kstack != 0)
 				vm_thread_dispose(td2);
 			if (!thread_alloc_stack(td2, pages)) {
 				error = ENOMEM;
 				goto fail2;
 			}
 		}
 	}
 
 	if ((flags & RFMEM) == 0) {
 		vm2 = vmspace_fork(p1->p_vmspace, &mem_charged);
 		if (vm2 == NULL) {
 			error = ENOMEM;
 			goto fail2;
 		}
 		if (!swap_reserve(mem_charged)) {
 			/*
 			 * The swap reservation failed. The accounting
 			 * from the entries of the copied vm2 will be
 			 * subtracted in vmspace_free(), so force the
 			 * reservation there.
 			 */
 			swap_reserve_force(mem_charged);
 			error = ENOMEM;
 			goto fail2;
 		}
 	} else
 		vm2 = NULL;
 
 	/*
 	 * XXX: This is ugly; when we copy resource usage, we need to bump
 	 *      per-cred resource counters.
 	 */
 	proc_set_cred_init(newproc, crhold(td->td_ucred));
 
 	/*
 	 * Initialize resource accounting for the child process.
 	 */
 	error = racct_proc_fork(p1, newproc);
 	if (error != 0) {
 		error = EAGAIN;
 		goto fail1;
 	}
 
 #ifdef MAC
 	mac_proc_init(newproc);
 #endif
 	newproc->p_klist = knlist_alloc(&newproc->p_mtx);
 	STAILQ_INIT(&newproc->p_ktr);
 
 	sx_xlock(&allproc_lock);
 
 	/*
 	 * Increment the count of procs running with this uid. Don't allow
 	 * a nonprivileged user to exceed their current limit.
 	 *
 	 * XXXRW: Can we avoid privilege here if it's not needed?
 	 */
 	error = priv_check_cred(td->td_ucred, PRIV_PROC_LIMIT);
 	if (error == 0)
 		ok = chgproccnt(td->td_ucred->cr_ruidinfo, 1, 0);
 	else {
 		ok = chgproccnt(td->td_ucred->cr_ruidinfo, 1,
 		    lim_cur(td, RLIMIT_NPROC));
 	}
 	if (ok) {
 		do_fork(td, fr, newproc, td2, vm2, fp_procdesc);
 		return (0);
 	}
 
 	error = EAGAIN;
 	sx_xunlock(&allproc_lock);
 #ifdef MAC
 	mac_proc_destroy(newproc);
 #endif
 	racct_proc_exit(newproc);
 fail1:
 	crfree(newproc->p_ucred);
 	newproc->p_ucred = NULL;
 fail2:
 	if (vm2 != NULL)
 		vmspace_free(vm2);
 	uma_zfree(proc_zone, newproc);
 	if ((flags & RFPROCDESC) != 0 && fp_procdesc != NULL) {
 		fdclose(td, fp_procdesc, *fr->fr_pd_fd);
 		fdrop(fp_procdesc, td);
 	}
 	atomic_add_int(&nprocs, -1);
 	pause("fork", hz / 2);
 	return (error);
 }
 
 /*
  * Handle the return of a child process from fork1().  This function
  * is called from the MD fork_trampoline() entry point.
  */
 void
 fork_exit(void (*callout)(void *, struct trapframe *), void *arg,
     struct trapframe *frame)
 {
 	struct proc *p;
 	struct thread *td;
 	struct thread *dtd;
 
 	td = curthread;
 	p = td->td_proc;
 	KASSERT(p->p_state == PRS_NORMAL, ("executing process is still new"));
 
 	CTR4(KTR_PROC, "fork_exit: new thread %p (td_sched %p, pid %d, %s)",
 	    td, td_get_sched(td), p->p_pid, td->td_name);
 
 	sched_fork_exit(td);
 	/*
 	* Processes normally resume in mi_switch() after being
 	* cpu_switch()'ed to, but when children start up they arrive here
 	* instead, so we must do much the same things as mi_switch() would.
 	*/
 	if ((dtd = PCPU_GET(deadthread))) {
 		PCPU_SET(deadthread, NULL);
 		thread_stash(dtd);
 	}
 	thread_unlock(td);
 
 	/*
 	 * cpu_fork_kthread_handler intercepts this function call to
 	 * have this call a non-return function to stay in kernel mode.
 	 * initproc has its own fork handler, but it does return.
 	 */
 	KASSERT(callout != NULL, ("NULL callout in fork_exit"));
 	callout(arg, frame);
 
 	/*
 	 * Check if a kernel thread misbehaved and returned from its main
 	 * function.
 	 */
 	if (p->p_flag & P_KPROC) {
 		printf("Kernel thread \"%s\" (pid %d) exited prematurely.\n",
 		    td->td_name, p->p_pid);
 		kthread_exit();
 	}
 	mtx_assert(&Giant, MA_NOTOWNED);
 
 	if (p->p_sysent->sv_schedtail != NULL)
 		(p->p_sysent->sv_schedtail)(td);
 	td->td_pflags &= ~TDP_FORKING;
 }
 
 /*
  * Simplified back end of syscall(), used when returning from fork()
  * directly into user mode.  This function is passed in to fork_exit()
  * as the first parameter and is called when returning to a new
  * userland process.
  */
 void
 fork_return(struct thread *td, struct trapframe *frame)
 {
 	struct proc *p;
 
 	p = td->td_proc;
 	if (td->td_dbgflags & TDB_STOPATFORK) {
 		PROC_LOCK(p);
 		if ((p->p_flag & P_TRACED) != 0) {
 			/*
 			 * Inform the debugger if one is still present.
 			 */
 			td->td_dbgflags |= TDB_CHILD | TDB_SCX | TDB_FSTP;
 			ptracestop(td, SIGSTOP, NULL);
 			td->td_dbgflags &= ~(TDB_CHILD | TDB_SCX);
 		} else {
 			/*
 			 * ... otherwise clear the request.
 			 */
 			td->td_dbgflags &= ~TDB_STOPATFORK;
 		}
 		PROC_UNLOCK(p);
 	} else if (p->p_flag & P_TRACED || td->td_dbgflags & TDB_BORN) {
  		/*
 		 * This is the start of a new thread in a traced
 		 * process.  Report a system call exit event.
 		 */
 		PROC_LOCK(p);
 		td->td_dbgflags |= TDB_SCX;
 		_STOPEVENT(p, S_SCX, td->td_sa.code);
 		if ((p->p_ptevents & PTRACE_SCX) != 0 ||
 		    (td->td_dbgflags & TDB_BORN) != 0)
 			ptracestop(td, SIGTRAP, NULL);
 		td->td_dbgflags &= ~(TDB_SCX | TDB_BORN);
 		PROC_UNLOCK(p);
 	}
 
 	userret(td, frame);
 
 #ifdef KTRACE
 	if (KTRPOINT(td, KTR_SYSRET))
 		ktrsysret(SYS_fork, 0, 0);
 #endif
 }
Index: head/sys/kern/kern_procctl.c
===================================================================
--- head/sys/kern/kern_procctl.c	(revision 343963)
+++ head/sys/kern/kern_procctl.c	(revision 343964)
@@ -1,662 +1,734 @@
 /*-
  * Copyright (c) 2014 John Baldwin
  * Copyright (c) 2014, 2016 The FreeBSD Foundation
  *
  * Portions of this software were developed by Konstantin Belousov
  * under sponsorship from the FreeBSD Foundation.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/capsicum.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/procctl.h>
 #include <sys/sx.h>
 #include <sys/syscallsubr.h>
 #include <sys/sysproto.h>
 #include <sys/wait.h>
 
+#include <vm/vm.h>
+#include <vm/pmap.h>
+#include <vm/vm_map.h>
+#include <vm/vm_extern.h>
+
 static int
 protect_setchild(struct thread *td, struct proc *p, int flags)
 {
 
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	if (p->p_flag & P_SYSTEM || p_cansched(td, p) != 0)
 		return (0);
 	if (flags & PPROT_SET) {
 		p->p_flag |= P_PROTECTED;
 		if (flags & PPROT_INHERIT)
 			p->p_flag2 |= P2_INHERIT_PROTECTED;
 	} else {
 		p->p_flag &= ~P_PROTECTED;
 		p->p_flag2 &= ~P2_INHERIT_PROTECTED;
 	}
 	return (1);
 }
 
 static int
 protect_setchildren(struct thread *td, struct proc *top, int flags)
 {
 	struct proc *p;
 	int ret;
 
 	p = top;
 	ret = 0;
 	sx_assert(&proctree_lock, SX_LOCKED);
 	for (;;) {
 		ret |= protect_setchild(td, p, flags);
 		PROC_UNLOCK(p);
 		/*
 		 * If this process has children, descend to them next,
 		 * otherwise do any siblings, and if done with this level,
 		 * follow back up the tree (but not past top).
 		 */
 		if (!LIST_EMPTY(&p->p_children))
 			p = LIST_FIRST(&p->p_children);
 		else for (;;) {
 			if (p == top) {
 				PROC_LOCK(p);
 				return (ret);
 			}
 			if (LIST_NEXT(p, p_sibling)) {
 				p = LIST_NEXT(p, p_sibling);
 				break;
 			}
 			p = p->p_pptr;
 		}
 		PROC_LOCK(p);
 	}
 }
 
 static int
 protect_set(struct thread *td, struct proc *p, int flags)
 {
 	int error, ret;
 
 	switch (PPROT_OP(flags)) {
 	case PPROT_SET:
 	case PPROT_CLEAR:
 		break;
 	default:
 		return (EINVAL);
 	}
 
 	if ((PPROT_FLAGS(flags) & ~(PPROT_DESCEND | PPROT_INHERIT)) != 0)
 		return (EINVAL);
 
 	error = priv_check(td, PRIV_VM_MADV_PROTECT);
 	if (error)
 		return (error);
 
 	if (flags & PPROT_DESCEND)
 		ret = protect_setchildren(td, p, flags);
 	else
 		ret = protect_setchild(td, p, flags);
 	if (ret == 0)
 		return (EPERM);
 	return (0);
 }
 
 static int
 reap_acquire(struct thread *td, struct proc *p)
 {
 
 	sx_assert(&proctree_lock, SX_XLOCKED);
 	if (p != curproc)
 		return (EPERM);
 	if ((p->p_treeflag & P_TREE_REAPER) != 0)
 		return (EBUSY);
 	p->p_treeflag |= P_TREE_REAPER;
 	/*
 	 * We do not reattach existing children and the whole tree
 	 * under them to us, since p->p_reaper already seen them.
 	 */
 	return (0);
 }
 
 static int
 reap_release(struct thread *td, struct proc *p)
 {
 
 	sx_assert(&proctree_lock, SX_XLOCKED);
 	if (p != curproc)
 		return (EPERM);
 	if (p == initproc)
 		return (EINVAL);
 	if ((p->p_treeflag & P_TREE_REAPER) == 0)
 		return (EINVAL);
 	reaper_abandon_children(p, false);
 	return (0);
 }
 
 static int
 reap_status(struct thread *td, struct proc *p,
     struct procctl_reaper_status *rs)
 {
 	struct proc *reap, *p2, *first_p;
 
 	sx_assert(&proctree_lock, SX_LOCKED);
 	bzero(rs, sizeof(*rs));
 	if ((p->p_treeflag & P_TREE_REAPER) == 0) {
 		reap = p->p_reaper;
 	} else {
 		reap = p;
 		rs->rs_flags |= REAPER_STATUS_OWNED;
 	}
 	if (reap == initproc)
 		rs->rs_flags |= REAPER_STATUS_REALINIT;
 	rs->rs_reaper = reap->p_pid;
 	rs->rs_descendants = 0;
 	rs->rs_children = 0;
 	if (!LIST_EMPTY(&reap->p_reaplist)) {
 		first_p = LIST_FIRST(&reap->p_children);
 		if (first_p == NULL)
 			first_p = LIST_FIRST(&reap->p_reaplist);
 		rs->rs_pid = first_p->p_pid;
 		LIST_FOREACH(p2, &reap->p_reaplist, p_reapsibling) {
 			if (proc_realparent(p2) == reap)
 				rs->rs_children++;
 			rs->rs_descendants++;
 		}
 	} else {
 		rs->rs_pid = -1;
 	}
 	return (0);
 }
 
 static int
 reap_getpids(struct thread *td, struct proc *p, struct procctl_reaper_pids *rp)
 {
 	struct proc *reap, *p2;
 	struct procctl_reaper_pidinfo *pi, *pip;
 	u_int i, n;
 	int error;
 
 	sx_assert(&proctree_lock, SX_LOCKED);
 	PROC_UNLOCK(p);
 	reap = (p->p_treeflag & P_TREE_REAPER) == 0 ? p->p_reaper : p;
 	n = i = 0;
 	error = 0;
 	LIST_FOREACH(p2, &reap->p_reaplist, p_reapsibling)
 		n++;
 	sx_unlock(&proctree_lock);
 	if (rp->rp_count < n)
 		n = rp->rp_count;
 	pi = malloc(n * sizeof(*pi), M_TEMP, M_WAITOK);
 	sx_slock(&proctree_lock);
 	LIST_FOREACH(p2, &reap->p_reaplist, p_reapsibling) {
 		if (i == n)
 			break;
 		pip = &pi[i];
 		bzero(pip, sizeof(*pip));
 		pip->pi_pid = p2->p_pid;
 		pip->pi_subtree = p2->p_reapsubtree;
 		pip->pi_flags = REAPER_PIDINFO_VALID;
 		if (proc_realparent(p2) == reap)
 			pip->pi_flags |= REAPER_PIDINFO_CHILD;
 		if ((p2->p_treeflag & P_TREE_REAPER) != 0)
 			pip->pi_flags |= REAPER_PIDINFO_REAPER;
 		i++;
 	}
 	sx_sunlock(&proctree_lock);
 	error = copyout(pi, rp->rp_pids, i * sizeof(*pi));
 	free(pi, M_TEMP);
 	sx_slock(&proctree_lock);
 	PROC_LOCK(p);
 	return (error);
 }
 
 static void
 reap_kill_proc(struct thread *td, struct proc *p2, ksiginfo_t *ksi,
     struct procctl_reaper_kill *rk, int *error)
 {
 	int error1;
 
 	PROC_LOCK(p2);
 	error1 = p_cansignal(td, p2, rk->rk_sig);
 	if (error1 == 0) {
 		pksignal(p2, rk->rk_sig, ksi);
 		rk->rk_killed++;
 		*error = error1;
 	} else if (*error == ESRCH) {
 		rk->rk_fpid = p2->p_pid;
 		*error = error1;
 	}
 	PROC_UNLOCK(p2);
 }
 
 struct reap_kill_tracker {
 	struct proc *parent;
 	TAILQ_ENTRY(reap_kill_tracker) link;
 };
 
 TAILQ_HEAD(reap_kill_tracker_head, reap_kill_tracker);
 
 static void
 reap_kill_sched(struct reap_kill_tracker_head *tracker, struct proc *p2)
 {
 	struct reap_kill_tracker *t;
 
 	t = malloc(sizeof(struct reap_kill_tracker), M_TEMP, M_WAITOK);
 	t->parent = p2;
 	TAILQ_INSERT_TAIL(tracker, t, link);
 }
 
 static int
 reap_kill(struct thread *td, struct proc *p, struct procctl_reaper_kill *rk)
 {
 	struct proc *reap, *p2;
 	ksiginfo_t ksi;
 	struct reap_kill_tracker_head tracker;
 	struct reap_kill_tracker *t;
 	int error;
 
 	sx_assert(&proctree_lock, SX_LOCKED);
 	if (IN_CAPABILITY_MODE(td))
 		return (ECAPMODE);
 	if (rk->rk_sig <= 0 || rk->rk_sig > _SIG_MAXSIG ||
 	    (rk->rk_flags & ~(REAPER_KILL_CHILDREN |
 	    REAPER_KILL_SUBTREE)) != 0 || (rk->rk_flags &
 	    (REAPER_KILL_CHILDREN | REAPER_KILL_SUBTREE)) ==
 	    (REAPER_KILL_CHILDREN | REAPER_KILL_SUBTREE))
 		return (EINVAL);
 	PROC_UNLOCK(p);
 	reap = (p->p_treeflag & P_TREE_REAPER) == 0 ? p->p_reaper : p;
 	ksiginfo_init(&ksi);
 	ksi.ksi_signo = rk->rk_sig;
 	ksi.ksi_code = SI_USER;
 	ksi.ksi_pid = td->td_proc->p_pid;
 	ksi.ksi_uid = td->td_ucred->cr_ruid;
 	error = ESRCH;
 	rk->rk_killed = 0;
 	rk->rk_fpid = -1;
 	if ((rk->rk_flags & REAPER_KILL_CHILDREN) != 0) {
 		for (p2 = LIST_FIRST(&reap->p_children); p2 != NULL;
 		    p2 = LIST_NEXT(p2, p_sibling)) {
 			reap_kill_proc(td, p2, &ksi, rk, &error);
 			/*
 			 * Do not end the loop on error, signal
 			 * everything we can.
 			 */
 		}
 	} else {
 		TAILQ_INIT(&tracker);
 		reap_kill_sched(&tracker, reap);
 		while ((t = TAILQ_FIRST(&tracker)) != NULL) {
 			MPASS((t->parent->p_treeflag & P_TREE_REAPER) != 0);
 			TAILQ_REMOVE(&tracker, t, link);
 			for (p2 = LIST_FIRST(&t->parent->p_reaplist); p2 != NULL;
 			    p2 = LIST_NEXT(p2, p_reapsibling)) {
 				if (t->parent == reap &&
 				    (rk->rk_flags & REAPER_KILL_SUBTREE) != 0 &&
 				    p2->p_reapsubtree != rk->rk_subtree)
 					continue;
 				if ((p2->p_treeflag & P_TREE_REAPER) != 0)
 					reap_kill_sched(&tracker, p2);
 				reap_kill_proc(td, p2, &ksi, rk, &error);
 			}
 			free(t, M_TEMP);
 		}
 	}
 	PROC_LOCK(p);
 	return (error);
 }
 
 static int
 trace_ctl(struct thread *td, struct proc *p, int state)
 {
 
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 
 	/*
 	 * Ktrace changes p_traceflag from or to zero under the
 	 * process lock, so the test does not need to acquire ktrace
 	 * mutex.
 	 */
 	if ((p->p_flag & P_TRACED) != 0 || p->p_traceflag != 0)
 		return (EBUSY);
 
 	switch (state) {
 	case PROC_TRACE_CTL_ENABLE:
 		if (td->td_proc != p)
 			return (EPERM);
 		p->p_flag2 &= ~(P2_NOTRACE | P2_NOTRACE_EXEC);
 		break;
 	case PROC_TRACE_CTL_DISABLE_EXEC:
 		p->p_flag2 |= P2_NOTRACE_EXEC | P2_NOTRACE;
 		break;
 	case PROC_TRACE_CTL_DISABLE:
 		if ((p->p_flag2 & P2_NOTRACE_EXEC) != 0) {
 			KASSERT((p->p_flag2 & P2_NOTRACE) != 0,
 			    ("dandling P2_NOTRACE_EXEC"));
 			if (td->td_proc != p)
 				return (EPERM);
 			p->p_flag2 &= ~P2_NOTRACE_EXEC;
 		} else {
 			p->p_flag2 |= P2_NOTRACE;
 		}
 		break;
 	default:
 		return (EINVAL);
 	}
 	return (0);
 }
 
 static int
 trace_status(struct thread *td, struct proc *p, int *data)
 {
 
 	if ((p->p_flag2 & P2_NOTRACE) != 0) {
 		KASSERT((p->p_flag & P_TRACED) == 0,
 		    ("%d traced but tracing disabled", p->p_pid));
 		*data = -1;
 	} else if ((p->p_flag & P_TRACED) != 0) {
 		*data = p->p_pptr->p_pid;
 	} else {
 		*data = 0;
 	}
 	return (0);
 }
 
 static int
 trapcap_ctl(struct thread *td, struct proc *p, int state)
 {
 
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 
 	switch (state) {
 	case PROC_TRAPCAP_CTL_ENABLE:
 		p->p_flag2 |= P2_TRAPCAP;
 		break;
 	case PROC_TRAPCAP_CTL_DISABLE:
 		p->p_flag2 &= ~P2_TRAPCAP;
 		break;
 	default:
 		return (EINVAL);
 	}
 	return (0);
 }
 
 static int
 trapcap_status(struct thread *td, struct proc *p, int *data)
 {
 
 	*data = (p->p_flag2 & P2_TRAPCAP) != 0 ? PROC_TRAPCAP_CTL_ENABLE :
 	    PROC_TRAPCAP_CTL_DISABLE;
 	return (0);
 }
 
+static int
+aslr_ctl(struct thread *td, struct proc *p, int state)
+{
+
+	PROC_LOCK_ASSERT(p, MA_OWNED);
+
+	switch (state) {
+	case PROC_ASLR_FORCE_ENABLE:
+		p->p_flag2 &= ~P2_ASLR_DISABLE;
+		p->p_flag2 |= P2_ASLR_ENABLE;
+		break;
+	case PROC_ASLR_FORCE_DISABLE:
+		p->p_flag2 |= P2_ASLR_DISABLE;
+		p->p_flag2 &= ~P2_ASLR_ENABLE;
+		break;
+	case PROC_ASLR_NOFORCE:
+		p->p_flag2 &= ~(P2_ASLR_ENABLE | P2_ASLR_DISABLE);
+		break;
+	default:
+		return (EINVAL);
+	}
+	return (0);
+}
+
+static int
+aslr_status(struct thread *td, struct proc *p, int *data)
+{
+	struct vmspace *vm;
+	int d;
+
+	switch (p->p_flag2 & (P2_ASLR_ENABLE | P2_ASLR_DISABLE)) {
+	case 0:
+		d = PROC_ASLR_NOFORCE;
+		break;
+	case P2_ASLR_ENABLE:
+		d = PROC_ASLR_FORCE_ENABLE;
+		break;
+	case P2_ASLR_DISABLE:
+		d = PROC_ASLR_FORCE_DISABLE;
+		break;
+	}
+	if ((p->p_flag & P_WEXIT) == 0) {
+		_PHOLD(p);
+		PROC_UNLOCK(p);
+		vm = vmspace_acquire_ref(p);
+		if (vm != NULL && (vm->vm_map.flags & MAP_ASLR) != 0) {
+			d |= PROC_ASLR_ACTIVE;
+			vmspace_free(vm);
+		}
+		PROC_LOCK(p);
+		_PRELE(p);
+	}
+	*data = d;
+	return (0);
+}
+
 #ifndef _SYS_SYSPROTO_H_
 struct procctl_args {
 	idtype_t idtype;
 	id_t	id;
 	int	com;
 	void	*data;
 };
 #endif
 /* ARGSUSED */
 int
 sys_procctl(struct thread *td, struct procctl_args *uap)
 {
 	void *data;
 	union {
 		struct procctl_reaper_status rs;
 		struct procctl_reaper_pids rp;
 		struct procctl_reaper_kill rk;
 	} x;
 	int error, error1, flags, signum;
 
 	switch (uap->com) {
+	case PROC_ASLR_CTL:
 	case PROC_SPROTECT:
 	case PROC_TRACE_CTL:
 	case PROC_TRAPCAP_CTL:
 		error = copyin(uap->data, &flags, sizeof(flags));
 		if (error != 0)
 			return (error);
 		data = &flags;
 		break;
 	case PROC_REAP_ACQUIRE:
 	case PROC_REAP_RELEASE:
 		if (uap->data != NULL)
 			return (EINVAL);
 		data = NULL;
 		break;
 	case PROC_REAP_STATUS:
 		data = &x.rs;
 		break;
 	case PROC_REAP_GETPIDS:
 		error = copyin(uap->data, &x.rp, sizeof(x.rp));
 		if (error != 0)
 			return (error);
 		data = &x.rp;
 		break;
 	case PROC_REAP_KILL:
 		error = copyin(uap->data, &x.rk, sizeof(x.rk));
 		if (error != 0)
 			return (error);
 		data = &x.rk;
 		break;
+	case PROC_ASLR_STATUS:
 	case PROC_TRACE_STATUS:
 	case PROC_TRAPCAP_STATUS:
 		data = &flags;
 		break;
 	case PROC_PDEATHSIG_CTL:
 		error = copyin(uap->data, &signum, sizeof(signum));
 		if (error != 0)
 			return (error);
 		data = &signum;
 		break;
 	case PROC_PDEATHSIG_STATUS:
 		data = &signum;
 		break;
 	default:
 		return (EINVAL);
 	}
 	error = kern_procctl(td, uap->idtype, uap->id, uap->com, data);
 	switch (uap->com) {
 	case PROC_REAP_STATUS:
 		if (error == 0)
 			error = copyout(&x.rs, uap->data, sizeof(x.rs));
 		break;
 	case PROC_REAP_KILL:
 		error1 = copyout(&x.rk, uap->data, sizeof(x.rk));
 		if (error == 0)
 			error = error1;
 		break;
+	case PROC_ASLR_STATUS:
 	case PROC_TRACE_STATUS:
 	case PROC_TRAPCAP_STATUS:
 		if (error == 0)
 			error = copyout(&flags, uap->data, sizeof(flags));
 		break;
 	case PROC_PDEATHSIG_STATUS:
 		if (error == 0)
 			error = copyout(&signum, uap->data, sizeof(signum));
 		break;
 	}
 	return (error);
 }
 
 static int
 kern_procctl_single(struct thread *td, struct proc *p, int com, void *data)
 {
 
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	switch (com) {
+	case PROC_ASLR_CTL:
+		return (aslr_ctl(td, p, *(int *)data));
+	case PROC_ASLR_STATUS:
+		return (aslr_status(td, p, data));
 	case PROC_SPROTECT:
 		return (protect_set(td, p, *(int *)data));
 	case PROC_REAP_ACQUIRE:
 		return (reap_acquire(td, p));
 	case PROC_REAP_RELEASE:
 		return (reap_release(td, p));
 	case PROC_REAP_STATUS:
 		return (reap_status(td, p, data));
 	case PROC_REAP_GETPIDS:
 		return (reap_getpids(td, p, data));
 	case PROC_REAP_KILL:
 		return (reap_kill(td, p, data));
 	case PROC_TRACE_CTL:
 		return (trace_ctl(td, p, *(int *)data));
 	case PROC_TRACE_STATUS:
 		return (trace_status(td, p, data));
 	case PROC_TRAPCAP_CTL:
 		return (trapcap_ctl(td, p, *(int *)data));
 	case PROC_TRAPCAP_STATUS:
 		return (trapcap_status(td, p, data));
 	default:
 		return (EINVAL);
 	}
 }
 
 int
 kern_procctl(struct thread *td, idtype_t idtype, id_t id, int com, void *data)
 {
 	struct pgrp *pg;
 	struct proc *p;
 	int error, first_error, ok;
 	int signum;
 	bool tree_locked;
 
 	switch (com) {
+	case PROC_ASLR_CTL:
+	case PROC_ASLR_STATUS:
 	case PROC_REAP_ACQUIRE:
 	case PROC_REAP_RELEASE:
 	case PROC_REAP_STATUS:
 	case PROC_REAP_GETPIDS:
 	case PROC_REAP_KILL:
 	case PROC_TRACE_STATUS:
 	case PROC_TRAPCAP_STATUS:
 	case PROC_PDEATHSIG_CTL:
 	case PROC_PDEATHSIG_STATUS:
 		if (idtype != P_PID)
 			return (EINVAL);
 	}
 
 	switch (com) {
 	case PROC_PDEATHSIG_CTL:
 		signum = *(int *)data;
 		p = td->td_proc;
 		if ((id != 0 && id != p->p_pid) ||
 		    (signum != 0 && !_SIG_VALID(signum)))
 			return (EINVAL);
 		PROC_LOCK(p);
 		p->p_pdeathsig = signum;
 		PROC_UNLOCK(p);
 		return (0);
 	case PROC_PDEATHSIG_STATUS:
 		p = td->td_proc;
 		if (id != 0 && id != p->p_pid)
 			return (EINVAL);
 		PROC_LOCK(p);
 		*(int *)data = p->p_pdeathsig;
 		PROC_UNLOCK(p);
 		return (0);
 	}
 
 	switch (com) {
 	case PROC_SPROTECT:
 	case PROC_REAP_STATUS:
 	case PROC_REAP_GETPIDS:
 	case PROC_REAP_KILL:
 	case PROC_TRACE_CTL:
 	case PROC_TRAPCAP_CTL:
 		sx_slock(&proctree_lock);
 		tree_locked = true;
 		break;
 	case PROC_REAP_ACQUIRE:
 	case PROC_REAP_RELEASE:
 		sx_xlock(&proctree_lock);
 		tree_locked = true;
 		break;
+	case PROC_ASLR_CTL:
+	case PROC_ASLR_STATUS:
 	case PROC_TRACE_STATUS:
 	case PROC_TRAPCAP_STATUS:
 		tree_locked = false;
 		break;
 	default:
 		return (EINVAL);
 	}
 
 	switch (idtype) {
 	case P_PID:
 		p = pfind(id);
 		if (p == NULL) {
 			error = ESRCH;
 			break;
 		}
 		error = p_cansee(td, p);
 		if (error == 0)
 			error = kern_procctl_single(td, p, com, data);
 		PROC_UNLOCK(p);
 		break;
 	case P_PGID:
 		/*
 		 * Attempt to apply the operation to all members of the
 		 * group.  Ignore processes in the group that can't be
 		 * seen.  Ignore errors so long as at least one process is
 		 * able to complete the request successfully.
 		 */
 		pg = pgfind(id);
 		if (pg == NULL) {
 			error = ESRCH;
 			break;
 		}
 		PGRP_UNLOCK(pg);
 		ok = 0;
 		first_error = 0;
 		LIST_FOREACH(p, &pg->pg_members, p_pglist) {
 			PROC_LOCK(p);
 			if (p->p_state == PRS_NEW || p_cansee(td, p) != 0) {
 				PROC_UNLOCK(p);
 				continue;
 			}
 			error = kern_procctl_single(td, p, com, data);
 			PROC_UNLOCK(p);
 			if (error == 0)
 				ok = 1;
 			else if (first_error == 0)
 				first_error = error;
 		}
 		if (ok)
 			error = 0;
 		else if (first_error != 0)
 			error = first_error;
 		else
 			/*
 			 * Was not able to see any processes in the
 			 * process group.
 			 */
 			error = ESRCH;
 		break;
 	default:
 		error = EINVAL;
 		break;
 	}
 	if (tree_locked)
 		sx_unlock(&proctree_lock);
 	return (error);
 }
Index: head/sys/sys/imgact.h
===================================================================
--- head/sys/sys/imgact.h	(revision 343963)
+++ head/sys/sys/imgact.h	(revision 343964)
@@ -1,123 +1,124 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1993, David Greenman
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef _SYS_IMGACT_H_
 #define	_SYS_IMGACT_H_
 
 #include <sys/_uio.h>
 
 #include <vm/vm.h>
 
 #define MAXSHELLCMDLEN	PAGE_SIZE
 
 struct ucred;
 
 struct image_args {
 	char *buf;		/* pointer to string buffer */
 	void *bufkva;		/* cookie for string buffer KVA */
 	char *begin_argv;	/* beginning of argv in buf */
 	char *begin_envv;	/* (interal use only) beginning of envv in buf,
 				 * access with exec_args_get_begin_envv(). */
 	char *endp;		/* current `end' pointer of arg & env strings */
 	char *fname;            /* pointer to filename of executable (system space) */
 	char *fname_buf;	/* pointer to optional malloc(M_TEMP) buffer */
 	int stringspace;	/* space left in arg & env buffer */
 	int argc;		/* count of argument strings */
 	int envc;		/* count of environment strings */
 	int fd;			/* file descriptor of the executable */
 	struct filedesc *fdp;	/* new file descriptor table */
 };
 
 struct image_params {
 	struct proc *proc;	/* our process struct */
 	struct label *execlabel;	/* optional exec label */
 	struct vnode *vp;	/* pointer to vnode of file to exec */
 	struct vm_object *object;	/* The vm object for this vp */
 	struct vattr *attr;	/* attributes of file */
 	const char *image_header; /* head of file to exec */
 	unsigned long entry_addr; /* entry address of target executable */
 	unsigned long reloc_base; /* load address of image */
 	char vmspace_destroyed;	/* flag - we've blown away original vm space */
 #define IMGACT_SHELL	0x1
 #define IMGACT_BINMISC	0x2
 	unsigned char interpreted;	/* mask of interpreters that have run */
 	char opened;		/* flag - we have opened executable vnode */
 	char *interpreter_name;	/* name of the interpreter */
 	void *auxargs;		/* ELF Auxinfo structure pointer */
 	struct sf_buf *firstpage;	/* first page that we mapped */
 	unsigned long ps_strings; /* PS_STRINGS for BSD/OS binaries */
 	struct image_args *args;	/* system call arguments */
 	struct sysentvec *sysent;	/* system entry vector */
 	char *execpath;
 	unsigned long execpathp;
 	char *freepath;
 	unsigned long canary;
 	int canarylen;
 	unsigned long pagesizes;
 	int pagesizeslen;
 	vm_prot_t stack_prot;
 	u_long stack_sz;
 	struct ucred *newcred;		/* new credentials if changing */
 	bool credential_setid;		/* true if becoming setid */
+	u_int map_flags;
 };
 
 #ifdef _KERNEL
 struct sysentvec;
 struct thread;
 struct vmspace;
 
 int	exec_alloc_args(struct image_args *);
 int	exec_args_add_arg(struct image_args *args, const char *argp,
 	    enum uio_seg segflg);
 int	exec_args_add_env(struct image_args *args, const char *envp,
 	    enum uio_seg segflg);
 int	exec_args_add_fname(struct image_args *args, const char *fname,
 	    enum uio_seg segflg);
 int	exec_args_adjust_args(struct image_args *args, size_t consume,
 	    ssize_t extend);
 char	*exec_args_get_begin_envv(struct image_args *args);
 int	exec_check_permissions(struct image_params *);
 register_t *exec_copyout_strings(struct image_params *);
 void	exec_free_args(struct image_args *);
 int	exec_new_vmspace(struct image_params *, struct sysentvec *);
 void	exec_setregs(struct thread *, struct image_params *, u_long);
 int	exec_shell_imgact(struct image_params *);
 int	exec_copyin_args(struct image_args *, const char *, enum uio_seg,
 	char **, char **);
 int	exec_copyin_data_fds(struct thread *, struct image_args *, const void *,
 	size_t, const int *, size_t);
 int	pre_execve(struct thread *td, struct vmspace **oldvmspace);
 void	post_execve(struct thread *td, int error, struct vmspace *oldvmspace);
 #endif
 
 #endif /* !_SYS_IMGACT_H_ */
Index: head/sys/sys/proc.h
===================================================================
--- head/sys/sys/proc.h	(revision 343963)
+++ head/sys/sys/proc.h	(revision 343964)
@@ -1,1183 +1,1186 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1986, 1989, 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  * (c) UNIX System Laboratories, Inc.
  * All or some portions of this file are derived from material licensed
  * to the University of California by American Telephone and Telegraph
  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  * the permission of UNIX System Laboratories, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)proc.h	8.15 (Berkeley) 5/19/95
  * $FreeBSD$
  */
 
 #ifndef _SYS_PROC_H_
 #define	_SYS_PROC_H_
 
 #include <sys/callout.h>		/* For struct callout. */
 #include <sys/event.h>			/* For struct klist. */
 #include <sys/condvar.h>
 #ifndef _KERNEL
 #include <sys/filedesc.h>
 #endif
 #include <sys/queue.h>
 #include <sys/_lock.h>
 #include <sys/lock_profile.h>
 #include <sys/_mutex.h>
 #include <sys/osd.h>
 #include <sys/priority.h>
 #include <sys/rtprio.h>			/* XXX. */
 #include <sys/runq.h>
 #include <sys/resource.h>
 #include <sys/sigio.h>
 #include <sys/signal.h>
 #include <sys/signalvar.h>
 #ifndef _KERNEL
 #include <sys/time.h>			/* For structs itimerval, timeval. */
 #else
 #include <sys/pcpu.h>
 #include <sys/systm.h>
 #endif
 #include <sys/ucontext.h>
 #include <sys/ucred.h>
 #include <sys/types.h>
 #include <sys/_domainset.h>
 
 #include <machine/proc.h>		/* Machine-dependent proc substruct. */
 #ifdef _KERNEL
 #include <machine/cpu.h>
 #endif
 
 /*
  * One structure allocated per session.
  *
  * List of locks
  * (m)		locked by s_mtx mtx
  * (e)		locked by proctree_lock sx
  * (c)		const until freeing
  */
 struct session {
 	u_int		s_count;	/* Ref cnt; pgrps in session - atomic. */
 	struct proc	*s_leader;	/* (m + e) Session leader. */
 	struct vnode	*s_ttyvp;	/* (m) Vnode of controlling tty. */
 	struct cdev_priv *s_ttydp;	/* (m) Device of controlling tty.  */
 	struct tty	*s_ttyp;	/* (e) Controlling tty. */
 	pid_t		s_sid;		/* (c) Session ID. */
 					/* (m) Setlogin() name: */
 	char		s_login[roundup(MAXLOGNAME, sizeof(long))];
 	struct mtx	s_mtx;		/* Mutex to protect members. */
 };
 
 /*
  * One structure allocated per process group.
  *
  * List of locks
  * (m)		locked by pg_mtx mtx
  * (e)		locked by proctree_lock sx
  * (c)		const until freeing
  */
 struct pgrp {
 	LIST_ENTRY(pgrp) pg_hash;	/* (e) Hash chain. */
 	LIST_HEAD(, proc) pg_members;	/* (m + e) Pointer to pgrp members. */
 	struct session	*pg_session;	/* (c) Pointer to session. */
 	struct sigiolst	pg_sigiolst;	/* (m) List of sigio sources. */
 	pid_t		pg_id;		/* (c) Process group id. */
 	int		pg_jobc;	/* (m) Job control process count. */
 	struct mtx	pg_mtx;		/* Mutex to protect members */
 };
 
 /*
  * pargs, used to hold a copy of the command line, if it had a sane length.
  */
 struct pargs {
 	u_int	ar_ref;		/* Reference count. */
 	u_int	ar_length;	/* Length. */
 	u_char	ar_args[1];	/* Arguments. */
 };
 
 /*-
  * Description of a process.
  *
  * This structure contains the information needed to manage a thread of
  * control, known in UN*X as a process; it has references to substructures
  * containing descriptions of things that the process uses, but may share
  * with related processes.  The process structure and the substructures
  * are always addressable except for those marked "(CPU)" below,
  * which might be addressable only on a processor on which the process
  * is running.
  *
  * Below is a key of locks used to protect each member of struct proc.  The
  * lock is indicated by a reference to a specific character in parens in the
  * associated comment.
  *      * - not yet protected
  *      a - only touched by curproc or parent during fork/wait
  *      b - created at fork, never changes
  *		(exception aiods switch vmspaces, but they are also
  *		marked 'P_SYSTEM' so hopefully it will be left alone)
  *      c - locked by proc mtx
  *      d - locked by allproc_lock lock
  *      e - locked by proctree_lock lock
  *      f - session mtx
  *      g - process group mtx
  *      h - callout_lock mtx
  *      i - by curproc or the master session mtx
  *      j - locked by proc slock
  *      k - only accessed by curthread
  *	k*- only accessed by curthread and from an interrupt
  *	kx- only accessed by curthread and by debugger
  *      l - the attaching proc or attaching proc parent
  *      m - Giant
  *      n - not locked, lazy
  *      o - ktrace lock
  *      q - td_contested lock
  *      r - p_peers lock
  *      s - see sleepq_switch(), sleeping_on_old_rtc(), and sleep(9)
  *      t - thread lock
  *	u - process stat lock
  *	w - process timer lock
  *      x - created at fork, only changes during single threading in exec
  *      y - created at first aio, doesn't change until exit or exec at which
  *          point we are single-threaded and only curthread changes it
  *      z - zombie threads lock
  *
  * If the locking key specifies two identifiers (for example, p_pptr) then
  * either lock is sufficient for read access, but both locks must be held
  * for write access.
  */
 struct cpuset;
 struct filecaps;
 struct filemon;
 struct kaioinfo;
 struct kaudit_record;
 struct kcov_info;
 struct kdtrace_proc;
 struct kdtrace_thread;
 struct mqueue_notifier;
 struct nlminfo;
 struct p_sched;
 struct proc;
 struct procdesc;
 struct racct;
 struct sbuf;
 struct sleepqueue;
 struct socket;
 struct syscall_args;
 struct td_sched;
 struct thread;
 struct trapframe;
 struct turnstile;
 struct vm_map;
 struct vm_map_entry;
 struct epoch_tracker;
 
 /*
  * XXX: Does this belong in resource.h or resourcevar.h instead?
  * Resource usage extension.  The times in rusage structs in the kernel are
  * never up to date.  The actual times are kept as runtimes and tick counts
  * (with control info in the "previous" times), and are converted when
  * userland asks for rusage info.  Backwards compatibility prevents putting
  * this directly in the user-visible rusage struct.
  *
  * Locking for p_rux: (cu) means (u) for p_rux and (c) for p_crux.
  * Locking for td_rux: (t) for all fields.
  */
 struct rusage_ext {
 	uint64_t	rux_runtime;    /* (cu) Real time. */
 	uint64_t	rux_uticks;     /* (cu) Statclock hits in user mode. */
 	uint64_t	rux_sticks;     /* (cu) Statclock hits in sys mode. */
 	uint64_t	rux_iticks;     /* (cu) Statclock hits in intr mode. */
 	uint64_t	rux_uu;         /* (c) Previous user time in usec. */
 	uint64_t	rux_su;         /* (c) Previous sys time in usec. */
 	uint64_t	rux_tu;         /* (c) Previous total time in usec. */
 };
 
 /*
  * Kernel runnable context (thread).
  * This is what is put to sleep and reactivated.
  * Thread context.  Processes may have multiple threads.
  */
 struct thread {
 	struct mtx	*volatile td_lock; /* replaces sched lock */
 	struct proc	*td_proc;	/* (*) Associated process. */
 	TAILQ_ENTRY(thread) td_plist;	/* (*) All threads in this proc. */
 	TAILQ_ENTRY(thread) td_runq;	/* (t) Run queue. */
 	TAILQ_ENTRY(thread) td_slpq;	/* (t) Sleep queue. */
 	TAILQ_ENTRY(thread) td_lockq;	/* (t) Lock queue. */
 	LIST_ENTRY(thread) td_hash;	/* (d) Hash chain. */
 	struct cpuset	*td_cpuset;	/* (t) CPU affinity mask. */
 	struct domainset_ref td_domain;	/* (a) NUMA policy */
 	struct seltd	*td_sel;	/* Select queue/channel. */
 	struct sleepqueue *td_sleepqueue; /* (k) Associated sleep queue. */
 	struct turnstile *td_turnstile;	/* (k) Associated turnstile. */
 	struct rl_q_entry *td_rlqe;	/* (k) Associated range lock entry. */
 	struct umtx_q   *td_umtxq;	/* (c?) Link for when we're blocked. */
 	lwpid_t		td_tid;		/* (b) Thread ID. */
 	sigqueue_t	td_sigqueue;	/* (c) Sigs arrived, not delivered. */
 #define	td_siglist	td_sigqueue.sq_signals
 	u_char		td_lend_user_pri; /* (t) Lend user pri. */
 
 /* Cleared during fork1() */
 #define	td_startzero td_epochnest
 	u_char		td_epochnest;	/* (k) Epoch nest counter. */
 	int		td_flags;	/* (t) TDF_* flags. */
 	int		td_inhibitors;	/* (t) Why can not run. */
 	int		td_pflags;	/* (k) Private thread (TDP_*) flags. */
 	int		td_dupfd;	/* (k) Ret value from fdopen. XXX */
 	int		td_sqqueue;	/* (t) Sleepqueue queue blocked on. */
 	void		*td_wchan;	/* (t) Sleep address. */
 	const char	*td_wmesg;	/* (t) Reason for sleep. */
 	volatile u_char td_owepreempt;  /* (k*) Preempt on last critical_exit */
 	u_char		td_tsqueue;	/* (t) Turnstile queue blocked on. */
 	short		td_locks;	/* (k) Debug: count of non-spin locks */
 	short		td_rw_rlocks;	/* (k) Count of rwlock read locks. */
 	short		td_sx_slocks;	/* (k) Count of sx shared locks. */
 	short		td_lk_slocks;	/* (k) Count of lockmgr shared locks. */
 	short		td_stopsched;	/* (k) Scheduler stopped. */
 	struct turnstile *td_blocked;	/* (t) Lock thread is blocked on. */
 	const char	*td_lockname;	/* (t) Name of lock blocked on. */
 	LIST_HEAD(, turnstile) td_contested;	/* (q) Contested locks. */
 	struct lock_list_entry *td_sleeplocks; /* (k) Held sleep locks. */
 	int		td_intr_nesting_level; /* (k) Interrupt recursion. */
 	int		td_pinned;	/* (k) Temporary cpu pin count. */
 	struct ucred	*td_ucred;	/* (k) Reference to credentials. */
 	struct plimit	*td_limit;	/* (k) Resource limits. */
 	int		td_slptick;	/* (t) Time at sleep. */
 	int		td_blktick;	/* (t) Time spent blocked. */
 	int		td_swvoltick;	/* (t) Time at last SW_VOL switch. */
 	int		td_swinvoltick;	/* (t) Time at last SW_INVOL switch. */
 	u_int		td_cow;		/* (*) Number of copy-on-write faults */
 	struct rusage	td_ru;		/* (t) rusage information. */
 	struct rusage_ext td_rux;	/* (t) Internal rusage information. */
 	uint64_t	td_incruntime;	/* (t) Cpu ticks to transfer to proc. */
 	uint64_t	td_runtime;	/* (t) How many cpu ticks we've run. */
 	u_int 		td_pticks;	/* (t) Statclock hits for profiling */
 	u_int		td_sticks;	/* (t) Statclock hits in system mode. */
 	u_int		td_iticks;	/* (t) Statclock hits in intr mode. */
 	u_int		td_uticks;	/* (t) Statclock hits in user mode. */
 	int		td_intrval;	/* (t) Return value for sleepq. */
 	sigset_t	td_oldsigmask;	/* (k) Saved mask from pre sigpause. */
 	volatile u_int	td_generation;	/* (k) For detection of preemption */
 	stack_t		td_sigstk;	/* (k) Stack ptr and on-stack flag. */
 	int		td_xsig;	/* (c) Signal for ptrace */
 	u_long		td_profil_addr;	/* (k) Temporary addr until AST. */
 	u_int		td_profil_ticks; /* (k) Temporary ticks until AST. */
 	char		td_name[MAXCOMLEN + 1];	/* (*) Thread name. */
 	struct file	*td_fpop;	/* (k) file referencing cdev under op */
 	int		td_dbgflags;	/* (c) Userland debugger flags */
 	siginfo_t	td_si;		/* (c) For debugger or core file */
 	int		td_ng_outbound;	/* (k) Thread entered ng from above. */
 	struct osd	td_osd;		/* (k) Object specific data. */
 	struct vm_map_entry *td_map_def_user; /* (k) Deferred entries. */
 	pid_t		td_dbg_forked;	/* (c) Child pid for debugger. */
 	u_int		td_vp_reserv;	/* (k) Count of reserved vnodes. */
 	int		td_no_sleeping;	/* (k) Sleeping disabled count. */
 	void		*td_su;		/* (k) FFS SU private */
 	sbintime_t	td_sleeptimo;	/* (t) Sleep timeout. */
 	int		td_rtcgen;	/* (s) rtc_generation of abs. sleep */
 	size_t		td_vslock_sz;	/* (k) amount of vslock-ed space */
 	struct kcov_info *td_kcov_info;	/* (*) Kernel code coverage data */
 #define	td_endzero td_sigmask
 
 /* Copied during fork1() or create_thread(). */
 #define	td_startcopy td_endzero
 	sigset_t	td_sigmask;	/* (c) Current signal mask. */
 	u_char		td_rqindex;	/* (t) Run queue index. */
 	u_char		td_base_pri;	/* (t) Thread base kernel priority. */
 	u_char		td_priority;	/* (t) Thread active priority. */
 	u_char		td_pri_class;	/* (t) Scheduling class. */
 	u_char		td_user_pri;	/* (t) User pri from estcpu and nice. */
 	u_char		td_base_user_pri; /* (t) Base user pri */
 	u_char		td_pre_epoch_prio; /* (k) User pri on entry to epoch */
 	uintptr_t	td_rb_list;	/* (k) Robust list head. */
 	uintptr_t	td_rbp_list;	/* (k) Robust priv list head. */
 	uintptr_t	td_rb_inact;	/* (k) Current in-action mutex loc. */
 	struct syscall_args td_sa;	/* (kx) Syscall parameters. Copied on
 					   fork for child tracing. */
 #define	td_endcopy td_pcb
 
 /*
  * Fields that must be manually set in fork1() or create_thread()
  * or already have been set in the allocator, constructor, etc.
  */
 	struct pcb	*td_pcb;	/* (k) Kernel VA of pcb and kstack. */
 	enum {
 		TDS_INACTIVE = 0x0,
 		TDS_INHIBITED,
 		TDS_CAN_RUN,
 		TDS_RUNQ,
 		TDS_RUNNING
 	} td_state;			/* (t) thread state */
 	union {
 		register_t	tdu_retval[2];
 		off_t		tdu_off;
 	} td_uretoff;			/* (k) Syscall aux returns. */
 #define td_retval	td_uretoff.tdu_retval
 	u_int		td_cowgen;	/* (k) Generation of COW pointers. */
 	/* LP64 hole */
 	struct callout	td_slpcallout;	/* (h) Callout for sleep. */
 	struct trapframe *td_frame;	/* (k) */
 	struct vm_object *td_kstack_obj;/* (a) Kstack object. */
 	vm_offset_t	td_kstack;	/* (a) Kernel VA of kstack. */
 	int		td_kstack_pages; /* (a) Size of the kstack. */
 	volatile u_int	td_critnest;	/* (k*) Critical section nest level. */
 	struct mdthread td_md;		/* (k) Any machine-dependent fields. */
 	struct kaudit_record	*td_ar;	/* (k) Active audit record, if any. */
 	struct lpohead	td_lprof[2];	/* (a) lock profiling objects. */
 	struct kdtrace_thread	*td_dtrace; /* (*) DTrace-specific data. */
 	int		td_errno;	/* Error returned by last syscall. */
 	/* LP64 hole */
 	struct vnet	*td_vnet;	/* (k) Effective vnet. */
 	const char	*td_vnet_lpush;	/* (k) Debugging vnet push / pop. */
 	struct trapframe *td_intr_frame;/* (k) Frame of the current irq */
 	struct proc	*td_rfppwait_p;	/* (k) The vforked child */
 	struct vm_page	**td_ma;	/* (k) uio pages held */
 	int		td_ma_cnt;	/* (k) size of *td_ma */
 	/* LP64 hole */
 	void		*td_emuldata;	/* Emulator state data */
 	int		td_lastcpu;	/* (t) Last cpu we were on. */
 	int		td_oncpu;	/* (t) Which cpu we are on. */
 	void		*td_lkpi_task;	/* LinuxKPI task struct pointer */
 	struct epoch_tracker *td_et;	/* (k) compat KPI spare tracker */
 	int		td_pmcpend;
 };
 
 struct thread0_storage {
 	struct thread t0st_thread;
 	uint64_t t0st_sched[10];
 };
 
 struct mtx *thread_lock_block(struct thread *);
 void thread_lock_unblock(struct thread *, struct mtx *);
 void thread_lock_set(struct thread *, struct mtx *);
 #define	THREAD_LOCK_ASSERT(td, type)					\
 do {									\
 	struct mtx *__m = (td)->td_lock;				\
 	if (__m != &blocked_lock)					\
 		mtx_assert(__m, (type));				\
 } while (0)
 
 #ifdef INVARIANTS
 #define	THREAD_LOCKPTR_ASSERT(td, lock)					\
 do {									\
 	struct mtx *__m = (td)->td_lock;				\
 	KASSERT((__m == &blocked_lock || __m == (lock)),		\
 	    ("Thread %p lock %p does not match %p", td, __m, (lock)));	\
 } while (0)
 
 #define	TD_LOCKS_INC(td)	((td)->td_locks++)
 #define	TD_LOCKS_DEC(td) do {						\
 	KASSERT(SCHEDULER_STOPPED_TD(td) || (td)->td_locks > 0,		\
 	    ("thread %p owns no locks", (td)));				\
 	(td)->td_locks--;						\
 } while (0)
 #else
 #define	THREAD_LOCKPTR_ASSERT(td, lock)
 
 #define	TD_LOCKS_INC(td)
 #define	TD_LOCKS_DEC(td)
 #endif
 
 /*
  * Flags kept in td_flags:
  * To change these you MUST have the scheduler lock.
  */
 #define	TDF_BORROWING	0x00000001 /* Thread is borrowing pri from another. */
 #define	TDF_INPANIC	0x00000002 /* Caused a panic, let it drive crashdump. */
 #define	TDF_INMEM	0x00000004 /* Thread's stack is in memory. */
 #define	TDF_SINTR	0x00000008 /* Sleep is interruptible. */
 #define	TDF_TIMEOUT	0x00000010 /* Timing out during sleep. */
 #define	TDF_IDLETD	0x00000020 /* This is a per-CPU idle thread. */
 #define	TDF_CANSWAP	0x00000040 /* Thread can be swapped. */
 #define	TDF_SLEEPABORT	0x00000080 /* sleepq_abort was called. */
 #define	TDF_KTH_SUSP	0x00000100 /* kthread is suspended */
 #define	TDF_ALLPROCSUSP	0x00000200 /* suspended by SINGLE_ALLPROC */
 #define	TDF_BOUNDARY	0x00000400 /* Thread suspended at user boundary */
 #define	TDF_ASTPENDING	0x00000800 /* Thread has some asynchronous events. */
 #define	TDF_UNUSED12	0x00001000 /* --available-- */
 #define	TDF_SBDRY	0x00002000 /* Stop only on usermode boundary. */
 #define	TDF_UPIBLOCKED	0x00004000 /* Thread blocked on user PI mutex. */
 #define	TDF_NEEDSUSPCHK	0x00008000 /* Thread may need to suspend. */
 #define	TDF_NEEDRESCHED	0x00010000 /* Thread needs to yield. */
 #define	TDF_NEEDSIGCHK	0x00020000 /* Thread may need signal delivery. */
 #define	TDF_NOLOAD	0x00040000 /* Ignore during load avg calculations. */
 #define	TDF_SERESTART	0x00080000 /* ERESTART on stop attempts. */
 #define	TDF_THRWAKEUP	0x00100000 /* Libthr thread must not suspend itself. */
 #define	TDF_SEINTR	0x00200000 /* EINTR on stop attempts. */
 #define	TDF_SWAPINREQ	0x00400000 /* Swapin request due to wakeup. */
 #define	TDF_UNUSED23	0x00800000 /* --available-- */
 #define	TDF_SCHED0	0x01000000 /* Reserved for scheduler private use */
 #define	TDF_SCHED1	0x02000000 /* Reserved for scheduler private use */
 #define	TDF_SCHED2	0x04000000 /* Reserved for scheduler private use */
 #define	TDF_SCHED3	0x08000000 /* Reserved for scheduler private use */
 #define	TDF_ALRMPEND	0x10000000 /* Pending SIGVTALRM needs to be posted. */
 #define	TDF_PROFPEND	0x20000000 /* Pending SIGPROF needs to be posted. */
 #define	TDF_MACPEND	0x40000000 /* AST-based MAC event pending. */
 
 /* Userland debug flags */
 #define	TDB_SUSPEND	0x00000001 /* Thread is suspended by debugger */
 #define	TDB_XSIG	0x00000002 /* Thread is exchanging signal under trace */
 #define	TDB_USERWR	0x00000004 /* Debugger modified memory or registers */
 #define	TDB_SCE		0x00000008 /* Thread performs syscall enter */
 #define	TDB_SCX		0x00000010 /* Thread performs syscall exit */
 #define	TDB_EXEC	0x00000020 /* TDB_SCX from exec(2) family */
 #define	TDB_FORK	0x00000040 /* TDB_SCX from fork(2) that created new
 				      process */
 #define	TDB_STOPATFORK	0x00000080 /* Stop at the return from fork (child
 				      only) */
 #define	TDB_CHILD	0x00000100 /* New child indicator for ptrace() */
 #define	TDB_BORN	0x00000200 /* New LWP indicator for ptrace() */
 #define	TDB_EXIT	0x00000400 /* Exiting LWP indicator for ptrace() */
 #define	TDB_VFORK	0x00000800 /* vfork indicator for ptrace() */
 #define	TDB_FSTP	0x00001000 /* The thread is PT_ATTACH leader */
 #define	TDB_STEP	0x00002000 /* (x86) PSL_T set for PT_STEP */
 
 /*
  * "Private" flags kept in td_pflags:
  * These are only written by curthread and thus need no locking.
  */
 #define	TDP_OLDMASK	0x00000001 /* Need to restore mask after suspend. */
 #define	TDP_INKTR	0x00000002 /* Thread is currently in KTR code. */
 #define	TDP_INKTRACE	0x00000004 /* Thread is currently in KTRACE code. */
 #define	TDP_BUFNEED	0x00000008 /* Do not recurse into the buf flush */
 #define	TDP_COWINPROGRESS 0x00000010 /* Snapshot copy-on-write in progress. */
 #define	TDP_ALTSTACK	0x00000020 /* Have alternate signal stack. */
 #define	TDP_DEADLKTREAT	0x00000040 /* Lock acquisition - deadlock treatment. */
 #define	TDP_NOFAULTING	0x00000080 /* Do not handle page faults. */
 #define	TDP_UNUSED9	0x00000100 /* --available-- */
 #define	TDP_OWEUPC	0x00000200 /* Call addupc() at next AST. */
 #define	TDP_ITHREAD	0x00000400 /* Thread is an interrupt thread. */
 #define	TDP_SYNCIO	0x00000800 /* Local override, disable async i/o. */
 #define	TDP_SCHED1	0x00001000 /* Reserved for scheduler private use */
 #define	TDP_SCHED2	0x00002000 /* Reserved for scheduler private use */
 #define	TDP_SCHED3	0x00004000 /* Reserved for scheduler private use */
 #define	TDP_SCHED4	0x00008000 /* Reserved for scheduler private use */
 #define	TDP_GEOM	0x00010000 /* Settle GEOM before finishing syscall */
 #define	TDP_SOFTDEP	0x00020000 /* Stuck processing softdep worklist */
 #define	TDP_NORUNNINGBUF 0x00040000 /* Ignore runningbufspace check */
 #define	TDP_WAKEUP	0x00080000 /* Don't sleep in umtx cond_wait */
 #define	TDP_INBDFLUSH	0x00100000 /* Already in BO_BDFLUSH, do not recurse */
 #define	TDP_KTHREAD	0x00200000 /* This is an official kernel thread */
 #define	TDP_CALLCHAIN	0x00400000 /* Capture thread's callchain */
 #define	TDP_IGNSUSP	0x00800000 /* Permission to ignore the MNTK_SUSPEND* */
 #define	TDP_AUDITREC	0x01000000 /* Audit record pending on thread */
 #define	TDP_RFPPWAIT	0x02000000 /* Handle RFPPWAIT on syscall exit */
 #define	TDP_RESETSPUR	0x04000000 /* Reset spurious page fault history. */
 #define	TDP_NERRNO	0x08000000 /* Last errno is already in td_errno */
 #define	TDP_UIOHELD	0x10000000 /* Current uio has pages held in td_ma */
 #define	TDP_FORKING	0x20000000 /* Thread is being created through fork() */
 #define	TDP_EXECVMSPC	0x40000000 /* Execve destroyed old vmspace */
 
 /*
  * Reasons that the current thread can not be run yet.
  * More than one may apply.
  */
 #define	TDI_SUSPENDED	0x0001	/* On suspension queue. */
 #define	TDI_SLEEPING	0x0002	/* Actually asleep! (tricky). */
 #define	TDI_SWAPPED	0x0004	/* Stack not in mem.  Bad juju if run. */
 #define	TDI_LOCK	0x0008	/* Stopped on a lock. */
 #define	TDI_IWAIT	0x0010	/* Awaiting interrupt. */
 
 #define	TD_IS_SLEEPING(td)	((td)->td_inhibitors & TDI_SLEEPING)
 #define	TD_ON_SLEEPQ(td)	((td)->td_wchan != NULL)
 #define	TD_IS_SUSPENDED(td)	((td)->td_inhibitors & TDI_SUSPENDED)
 #define	TD_IS_SWAPPED(td)	((td)->td_inhibitors & TDI_SWAPPED)
 #define	TD_ON_LOCK(td)		((td)->td_inhibitors & TDI_LOCK)
 #define	TD_AWAITING_INTR(td)	((td)->td_inhibitors & TDI_IWAIT)
 #define	TD_IS_RUNNING(td)	((td)->td_state == TDS_RUNNING)
 #define	TD_ON_RUNQ(td)		((td)->td_state == TDS_RUNQ)
 #define	TD_CAN_RUN(td)		((td)->td_state == TDS_CAN_RUN)
 #define	TD_IS_INHIBITED(td)	((td)->td_state == TDS_INHIBITED)
 #define	TD_ON_UPILOCK(td)	((td)->td_flags & TDF_UPIBLOCKED)
 #define TD_IS_IDLETHREAD(td)	((td)->td_flags & TDF_IDLETD)
 
 #define	KTDSTATE(td)							\
 	(((td)->td_inhibitors & TDI_SLEEPING) != 0 ? "sleep"  :		\
 	((td)->td_inhibitors & TDI_SUSPENDED) != 0 ? "suspended" :	\
 	((td)->td_inhibitors & TDI_SWAPPED) != 0 ? "swapped" :		\
 	((td)->td_inhibitors & TDI_LOCK) != 0 ? "blocked" :		\
 	((td)->td_inhibitors & TDI_IWAIT) != 0 ? "iwait" : "yielding")
 
 #define	TD_SET_INHIB(td, inhib) do {			\
 	(td)->td_state = TDS_INHIBITED;			\
 	(td)->td_inhibitors |= (inhib);			\
 } while (0)
 
 #define	TD_CLR_INHIB(td, inhib) do {			\
 	if (((td)->td_inhibitors & (inhib)) &&		\
 	    (((td)->td_inhibitors &= ~(inhib)) == 0))	\
 		(td)->td_state = TDS_CAN_RUN;		\
 } while (0)
 
 #define	TD_SET_SLEEPING(td)	TD_SET_INHIB((td), TDI_SLEEPING)
 #define	TD_SET_SWAPPED(td)	TD_SET_INHIB((td), TDI_SWAPPED)
 #define	TD_SET_LOCK(td)		TD_SET_INHIB((td), TDI_LOCK)
 #define	TD_SET_SUSPENDED(td)	TD_SET_INHIB((td), TDI_SUSPENDED)
 #define	TD_SET_IWAIT(td)	TD_SET_INHIB((td), TDI_IWAIT)
 #define	TD_SET_EXITING(td)	TD_SET_INHIB((td), TDI_EXITING)
 
 #define	TD_CLR_SLEEPING(td)	TD_CLR_INHIB((td), TDI_SLEEPING)
 #define	TD_CLR_SWAPPED(td)	TD_CLR_INHIB((td), TDI_SWAPPED)
 #define	TD_CLR_LOCK(td)		TD_CLR_INHIB((td), TDI_LOCK)
 #define	TD_CLR_SUSPENDED(td)	TD_CLR_INHIB((td), TDI_SUSPENDED)
 #define	TD_CLR_IWAIT(td)	TD_CLR_INHIB((td), TDI_IWAIT)
 
 #define	TD_SET_RUNNING(td)	(td)->td_state = TDS_RUNNING
 #define	TD_SET_RUNQ(td)		(td)->td_state = TDS_RUNQ
 #define	TD_SET_CAN_RUN(td)	(td)->td_state = TDS_CAN_RUN
 
 #define	TD_SBDRY_INTR(td) \
     (((td)->td_flags & (TDF_SEINTR | TDF_SERESTART)) != 0)
 #define	TD_SBDRY_ERRNO(td) \
     (((td)->td_flags & TDF_SEINTR) != 0 ? EINTR : ERESTART)
 
 /*
  * Process structure.
  */
 struct proc {
 	LIST_ENTRY(proc) p_list;	/* (d) List of all processes. */
 	TAILQ_HEAD(, thread) p_threads;	/* (c) all threads. */
 	struct mtx	p_slock;	/* process spin lock */
 	struct ucred	*p_ucred;	/* (c) Process owner's identity. */
 	struct filedesc	*p_fd;		/* (b) Open files. */
 	struct filedesc_to_leader *p_fdtol; /* (b) Tracking node */
 	struct pstats	*p_stats;	/* (b) Accounting/statistics (CPU). */
 	struct plimit	*p_limit;	/* (c) Resource limits. */
 	struct callout	p_limco;	/* (c) Limit callout handle */
 	struct sigacts	*p_sigacts;	/* (x) Signal actions, state (CPU). */
 
 	int		p_flag;		/* (c) P_* flags. */
 	int		p_flag2;	/* (c) P2_* flags. */
 	enum {
 		PRS_NEW = 0,		/* In creation */
 		PRS_NORMAL,		/* threads can be run. */
 		PRS_ZOMBIE
 	} p_state;			/* (j/c) Process status. */
 	pid_t		p_pid;		/* (b) Process identifier. */
 	LIST_ENTRY(proc) p_hash;	/* (d) Hash chain. */
 	LIST_ENTRY(proc) p_pglist;	/* (g + e) List of processes in pgrp. */
 	struct proc	*p_pptr;	/* (c + e) Pointer to parent process. */
 	LIST_ENTRY(proc) p_sibling;	/* (e) List of sibling processes. */
 	LIST_HEAD(, proc) p_children;	/* (e) Pointer to list of children. */
 	struct proc	*p_reaper;	/* (e) My reaper. */
 	LIST_HEAD(, proc) p_reaplist;	/* (e) List of my descendants
 					       (if I am reaper). */
 	LIST_ENTRY(proc) p_reapsibling;	/* (e) List of siblings - descendants of
 					       the same reaper. */
 	struct mtx	p_mtx;		/* (n) Lock for this struct. */
 	struct mtx	p_statmtx;	/* Lock for the stats */
 	struct mtx	p_itimmtx;	/* Lock for the virt/prof timers */
 	struct mtx	p_profmtx;	/* Lock for the profiling */
 	struct ksiginfo *p_ksi;	/* Locked by parent proc lock */
 	sigqueue_t	p_sigqueue;	/* (c) Sigs not delivered to a td. */
 #define p_siglist	p_sigqueue.sq_signals
 	pid_t		p_oppid;	/* (c + e) Real parent pid. */
 
 /* The following fields are all zeroed upon creation in fork. */
 #define	p_startzero	p_vmspace
 	struct vmspace	*p_vmspace;	/* (b) Address space. */
 	u_int		p_swtick;	/* (c) Tick when swapped in or out. */
 	u_int		p_cowgen;	/* (c) Generation of COW pointers. */
 	struct itimerval p_realtimer;	/* (c) Alarm timer. */
 	struct rusage	p_ru;		/* (a) Exit information. */
 	struct rusage_ext p_rux;	/* (cu) Internal resource usage. */
 	struct rusage_ext p_crux;	/* (c) Internal child resource usage. */
 	int		p_profthreads;	/* (c) Num threads in addupc_task. */
 	volatile int	p_exitthreads;	/* (j) Number of threads exiting */
 	int		p_traceflag;	/* (o) Kernel trace points. */
 	struct vnode	*p_tracevp;	/* (c + o) Trace to vnode. */
 	struct ucred	*p_tracecred;	/* (o) Credentials to trace with. */
 	struct vnode	*p_textvp;	/* (b) Vnode of executable. */
 	u_int		p_lock;		/* (c) Proclock (prevent swap) count. */
 	struct sigiolst	p_sigiolst;	/* (c) List of sigio sources. */
 	int		p_sigparent;	/* (c) Signal to parent on exit. */
 	int		p_sig;		/* (n) For core dump/debugger XXX. */
 	u_long		p_code;		/* (n) For core dump/debugger XXX. */
 	u_int		p_stops;	/* (c) Stop event bitmask. */
 	u_int		p_stype;	/* (c) Stop event type. */
 	char		p_step;		/* (c) Process is stopped. */
 	u_char		p_pfsflags;	/* (c) Procfs flags. */
 	u_int		p_ptevents;	/* (c + e) ptrace() event mask. */
 	struct nlminfo	*p_nlminfo;	/* (?) Only used by/for lockd. */
 	struct kaioinfo	*p_aioinfo;	/* (y) ASYNC I/O info. */
 	struct thread	*p_singlethread;/* (c + j) If single threading this is it */
 	int		p_suspcount;	/* (j) Num threads in suspended mode. */
 	struct thread	*p_xthread;	/* (c) Trap thread */
 	int		p_boundary_count;/* (j) Num threads at user boundary */
 	int		p_pendingcnt;	/* how many signals are pending */
 	struct itimers	*p_itimers;	/* (c) POSIX interval timers. */
 	struct procdesc	*p_procdesc;	/* (e) Process descriptor, if any. */
 	u_int		p_treeflag;	/* (e) P_TREE flags */
 	int		p_pendingexits; /* (c) Count of pending thread exits. */
 	struct filemon	*p_filemon;	/* (c) filemon-specific data. */
 	int		p_pdeathsig;	/* (c) Signal from parent on exit. */
 /* End area that is zeroed on creation. */
 #define	p_endzero	p_magic
 
 /* The following fields are all copied upon creation in fork. */
 #define	p_startcopy	p_endzero
 	u_int		p_magic;	/* (b) Magic number. */
 	int		p_osrel;	/* (x) osreldate for the
 					       binary (from ELF note, if any) */
 	uint32_t	p_fctl0;	/* (x) ABI feature control, ELF note */
 	char		p_comm[MAXCOMLEN + 1];	/* (x) Process name. */
 	struct sysentvec *p_sysent;	/* (b) Syscall dispatch info. */
 	struct pargs	*p_args;	/* (c) Process arguments. */
 	rlim_t		p_cpulimit;	/* (c) Current CPU limit in seconds. */
 	signed char	p_nice;		/* (c) Process "nice" value. */
 	int		p_fibnum;	/* in this routing domain XXX MRT */
 	pid_t		p_reapsubtree;	/* (e) Pid of the direct child of the
 					       reaper which spawned
 					       our subtree. */
 	uint16_t	p_elf_machine;	/* (x) ELF machine type */
 	uint64_t	p_elf_flags;	/* (x) ELF flags */
 /* End area that is copied on creation. */
 #define	p_endcopy	p_xexit
 
 	u_int		p_xexit;	/* (c) Exit code. */
 	u_int		p_xsig;		/* (c) Stop/kill sig. */
 	struct pgrp	*p_pgrp;	/* (c + e) Pointer to process group. */
 	struct knlist	*p_klist;	/* (c) Knotes attached to this proc. */
 	int		p_numthreads;	/* (c) Number of threads. */
 	struct mdproc	p_md;		/* Any machine-dependent fields. */
 	struct callout	p_itcallout;	/* (h + c) Interval timer callout. */
 	u_short		p_acflag;	/* (c) Accounting flags. */
 	struct proc	*p_peers;	/* (r) */
 	struct proc	*p_leader;	/* (b) */
 	void		*p_emuldata;	/* (c) Emulator state data. */
 	struct label	*p_label;	/* (*) Proc (not subject) MAC label. */
 	STAILQ_HEAD(, ktr_request)	p_ktr;	/* (o) KTR event queue. */
 	LIST_HEAD(, mqueue_notifier)	p_mqnotifier; /* (c) mqueue notifiers.*/
 	struct kdtrace_proc	*p_dtrace; /* (*) DTrace-specific data. */
 	struct cv	p_pwait;	/* (*) wait cv for exit/exec. */
 	uint64_t	p_prev_runtime;	/* (c) Resource usage accounting. */
 	struct racct	*p_racct;	/* (b) Resource accounting. */
 	int		p_throttled;	/* (c) Flag for racct pcpu throttling */
 	/*
 	 * An orphan is the child that has been re-parented to the
 	 * debugger as a result of attaching to it.  Need to keep
 	 * track of them for parent to be able to collect the exit
 	 * status of what used to be children.
 	 */
 	LIST_ENTRY(proc) p_orphan;	/* (e) List of orphan processes. */
 	LIST_HEAD(, proc) p_orphans;	/* (e) Pointer to list of orphans. */
 };
 
 #define	p_session	p_pgrp->pg_session
 #define	p_pgid		p_pgrp->pg_id
 
 #define	NOCPU		(-1)	/* For when we aren't on a CPU. */
 #define	NOCPU_OLD	(255)
 #define	MAXCPU_OLD	(254)
 
 #define	PROC_SLOCK(p)	mtx_lock_spin(&(p)->p_slock)
 #define	PROC_SUNLOCK(p)	mtx_unlock_spin(&(p)->p_slock)
 #define	PROC_SLOCK_ASSERT(p, type)	mtx_assert(&(p)->p_slock, (type))
 
 #define	PROC_STATLOCK(p)	mtx_lock_spin(&(p)->p_statmtx)
 #define	PROC_STATUNLOCK(p)	mtx_unlock_spin(&(p)->p_statmtx)
 #define	PROC_STATLOCK_ASSERT(p, type)	mtx_assert(&(p)->p_statmtx, (type))
 
 #define	PROC_ITIMLOCK(p)	mtx_lock_spin(&(p)->p_itimmtx)
 #define	PROC_ITIMUNLOCK(p)	mtx_unlock_spin(&(p)->p_itimmtx)
 #define	PROC_ITIMLOCK_ASSERT(p, type)	mtx_assert(&(p)->p_itimmtx, (type))
 
 #define	PROC_PROFLOCK(p)	mtx_lock_spin(&(p)->p_profmtx)
 #define	PROC_PROFUNLOCK(p)	mtx_unlock_spin(&(p)->p_profmtx)
 #define	PROC_PROFLOCK_ASSERT(p, type)	mtx_assert(&(p)->p_profmtx, (type))
 
 /* These flags are kept in p_flag. */
 #define	P_ADVLOCK	0x00001	/* Process may hold a POSIX advisory lock. */
 #define	P_CONTROLT	0x00002	/* Has a controlling terminal. */
 #define	P_KPROC		0x00004	/* Kernel process. */
 #define	P_UNUSED3	0x00008	/* --available-- */
 #define	P_PPWAIT	0x00010	/* Parent is waiting for child to exec/exit. */
 #define	P_PROFIL	0x00020	/* Has started profiling. */
 #define	P_STOPPROF	0x00040	/* Has thread requesting to stop profiling. */
 #define	P_HADTHREADS	0x00080	/* Has had threads (no cleanup shortcuts) */
 #define	P_SUGID		0x00100	/* Had set id privileges since last exec. */
 #define	P_SYSTEM	0x00200	/* System proc: no sigs, stats or swapping. */
 #define	P_SINGLE_EXIT	0x00400	/* Threads suspending should exit, not wait. */
 #define	P_TRACED	0x00800	/* Debugged process being traced. */
 #define	P_WAITED	0x01000	/* Someone is waiting for us. */
 #define	P_WEXIT		0x02000	/* Working on exiting. */
 #define	P_EXEC		0x04000	/* Process called exec. */
 #define	P_WKILLED	0x08000	/* Killed, go to kernel/user boundary ASAP. */
 #define	P_CONTINUED	0x10000	/* Proc has continued from a stopped state. */
 #define	P_STOPPED_SIG	0x20000	/* Stopped due to SIGSTOP/SIGTSTP. */
 #define	P_STOPPED_TRACE	0x40000	/* Stopped because of tracing. */
 #define	P_STOPPED_SINGLE 0x80000 /* Only 1 thread can continue (not to user). */
 #define	P_PROTECTED	0x100000 /* Do not kill on memory overcommit. */
 #define	P_SIGEVENT	0x200000 /* Process pending signals changed. */
 #define	P_SINGLE_BOUNDARY 0x400000 /* Threads should suspend at user boundary. */
 #define	P_HWPMC		0x800000 /* Process is using HWPMCs */
 #define	P_JAILED	0x1000000 /* Process is in jail. */
 #define	P_TOTAL_STOP	0x2000000 /* Stopped in stop_all_proc. */
 #define	P_INEXEC	0x4000000 /* Process is in execve(). */
 #define	P_STATCHILD	0x8000000 /* Child process stopped or exited. */
 #define	P_INMEM		0x10000000 /* Loaded into memory. */
 #define	P_SWAPPINGOUT	0x20000000 /* Process is being swapped out. */
 #define	P_SWAPPINGIN	0x40000000 /* Process is being swapped in. */
 #define	P_PPTRACE	0x80000000 /* PT_TRACEME by vforked child. */
 
 #define	P_STOPPED	(P_STOPPED_SIG|P_STOPPED_SINGLE|P_STOPPED_TRACE)
 #define	P_SHOULDSTOP(p)	((p)->p_flag & P_STOPPED)
 #define	P_KILLED(p)	((p)->p_flag & P_WKILLED)
 
 /* These flags are kept in p_flag2. */
 #define	P2_INHERIT_PROTECTED 0x00000001 /* New children get P_PROTECTED. */
 #define	P2_NOTRACE	0x00000002	/* No ptrace(2) attach or coredumps. */
 #define	P2_NOTRACE_EXEC 0x00000004	/* Keep P2_NOPTRACE on exec(2). */
 #define	P2_AST_SU	0x00000008	/* Handles SU ast for kthreads. */
 #define	P2_PTRACE_FSTP	0x00000010 /* SIGSTOP from PT_ATTACH not yet handled. */
 #define	P2_TRAPCAP	0x00000020	/* SIGTRAP on ENOTCAPABLE */
+#define	P2_ASLR_ENABLE	0x00000040	/* Force enable ASLR. */
+#define	P2_ASLR_DISABLE	0x00000080	/* Force disable ASLR. */
+#define	P2_ASLR_IGNSTART 0x00000100	/* Enable ASLR to consume sbrk area. */
 
 /* Flags protected by proctree_lock, kept in p_treeflags. */
 #define	P_TREE_ORPHANED		0x00000001	/* Reparented, on orphan list */
 #define	P_TREE_FIRST_ORPHAN	0x00000002	/* First element of orphan
 						   list */
 #define	P_TREE_REAPER		0x00000004	/* Reaper of subtree */
 
 /*
  * These were process status values (p_stat), now they are only used in
  * legacy conversion code.
  */
 #define	SIDL	1		/* Process being created by fork. */
 #define	SRUN	2		/* Currently runnable. */
 #define	SSLEEP	3		/* Sleeping on an address. */
 #define	SSTOP	4		/* Process debugging or suspension. */
 #define	SZOMB	5		/* Awaiting collection by parent. */
 #define	SWAIT	6		/* Waiting for interrupt. */
 #define	SLOCK	7		/* Blocked on a lock. */
 
 #define	P_MAGIC		0xbeefface
 
 #ifdef _KERNEL
 
 /* Types and flags for mi_switch(). */
 #define	SW_TYPE_MASK		0xff	/* First 8 bits are switch type */
 #define	SWT_NONE		0	/* Unspecified switch. */
 #define	SWT_PREEMPT		1	/* Switching due to preemption. */
 #define	SWT_OWEPREEMPT		2	/* Switching due to owepreempt. */
 #define	SWT_TURNSTILE		3	/* Turnstile contention. */
 #define	SWT_SLEEPQ		4	/* Sleepq wait. */
 #define	SWT_SLEEPQTIMO		5	/* Sleepq timeout wait. */
 #define	SWT_RELINQUISH		6	/* yield call. */
 #define	SWT_NEEDRESCHED		7	/* NEEDRESCHED was set. */
 #define	SWT_IDLE		8	/* Switching from the idle thread. */
 #define	SWT_IWAIT		9	/* Waiting for interrupts. */
 #define	SWT_SUSPEND		10	/* Thread suspended. */
 #define	SWT_REMOTEPREEMPT	11	/* Remote processor preempted. */
 #define	SWT_REMOTEWAKEIDLE	12	/* Remote processor preempted idle. */
 #define	SWT_COUNT		13	/* Number of switch types. */
 /* Flags */
 #define	SW_VOL		0x0100		/* Voluntary switch. */
 #define	SW_INVOL	0x0200		/* Involuntary switch. */
 #define SW_PREEMPT	0x0400		/* The invol switch is a preemption */
 
 /* How values for thread_single(). */
 #define	SINGLE_NO_EXIT	0
 #define	SINGLE_EXIT	1
 #define	SINGLE_BOUNDARY	2
 #define	SINGLE_ALLPROC	3
 
 #ifdef MALLOC_DECLARE
 MALLOC_DECLARE(M_PARGS);
 MALLOC_DECLARE(M_PGRP);
 MALLOC_DECLARE(M_SESSION);
 MALLOC_DECLARE(M_SUBPROC);
 #endif
 
 #define	FOREACH_PROC_IN_SYSTEM(p)					\
 	LIST_FOREACH((p), &allproc, p_list)
 #define	FOREACH_THREAD_IN_PROC(p, td)					\
 	TAILQ_FOREACH((td), &(p)->p_threads, td_plist)
 
 #define	FIRST_THREAD_IN_PROC(p)	TAILQ_FIRST(&(p)->p_threads)
 
 /*
  * We use process IDs <= pid_max <= PID_MAX; PID_MAX + 1 must also fit
  * in a pid_t, as it is used to represent "no process group".
  */
 #define	PID_MAX		99999
 #define	NO_PID		100000
 extern pid_t pid_max;
 
 #define	SESS_LEADER(p)	((p)->p_session->s_leader == (p))
 
 
 #define	STOPEVENT(p, e, v) do {						\
 	WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,			\
  	    "checking stopevent %d", (e));				\
 	if ((p)->p_stops & (e))	{					\
 		PROC_LOCK(p);						\
 		stopevent((p), (e), (v));				\
 		PROC_UNLOCK(p);						\
 	}								\
 } while (0)
 #define	_STOPEVENT(p, e, v) do {					\
 	PROC_LOCK_ASSERT(p, MA_OWNED);					\
 	WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, &p->p_mtx.lock_object, \
  	    "checking stopevent %d", (e));				\
 	if ((p)->p_stops & (e))						\
 		stopevent((p), (e), (v));				\
 } while (0)
 
 /* Lock and unlock a process. */
 #define	PROC_LOCK(p)	mtx_lock(&(p)->p_mtx)
 #define	PROC_TRYLOCK(p)	mtx_trylock(&(p)->p_mtx)
 #define	PROC_UNLOCK(p)	mtx_unlock(&(p)->p_mtx)
 #define	PROC_LOCKED(p)	mtx_owned(&(p)->p_mtx)
 #define	PROC_LOCK_ASSERT(p, type)	mtx_assert(&(p)->p_mtx, (type))
 
 /* Lock and unlock a process group. */
 #define	PGRP_LOCK(pg)	mtx_lock(&(pg)->pg_mtx)
 #define	PGRP_UNLOCK(pg)	mtx_unlock(&(pg)->pg_mtx)
 #define	PGRP_LOCKED(pg)	mtx_owned(&(pg)->pg_mtx)
 #define	PGRP_LOCK_ASSERT(pg, type)	mtx_assert(&(pg)->pg_mtx, (type))
 
 #define	PGRP_LOCK_PGSIGNAL(pg) do {					\
 	if ((pg) != NULL)						\
 		PGRP_LOCK(pg);						\
 } while (0)
 #define	PGRP_UNLOCK_PGSIGNAL(pg) do {					\
 	if ((pg) != NULL)						\
 		PGRP_UNLOCK(pg);					\
 } while (0)
 
 /* Lock and unlock a session. */
 #define	SESS_LOCK(s)	mtx_lock(&(s)->s_mtx)
 #define	SESS_UNLOCK(s)	mtx_unlock(&(s)->s_mtx)
 #define	SESS_LOCKED(s)	mtx_owned(&(s)->s_mtx)
 #define	SESS_LOCK_ASSERT(s, type)	mtx_assert(&(s)->s_mtx, (type))
 
 /*
  * Non-zero p_lock ensures that:
  * - exit1() is not performed until p_lock reaches zero;
  * - the process' threads stack are not swapped out if they are currently
  *   not (P_INMEM).
  *
  * PHOLD() asserts that the process (except the current process) is
  * not exiting, increments p_lock and swaps threads stacks into memory,
  * if needed.
  * _PHOLD() is same as PHOLD(), it takes the process locked.
  * _PHOLD_LITE() also takes the process locked, but comparing with
  * _PHOLD(), it only guarantees that exit1() is not executed,
  * faultin() is not called.
  */
 #define	PHOLD(p) do {							\
 	PROC_LOCK(p);							\
 	_PHOLD(p);							\
 	PROC_UNLOCK(p);							\
 } while (0)
 #define	_PHOLD(p) do {							\
 	PROC_LOCK_ASSERT((p), MA_OWNED);				\
 	KASSERT(!((p)->p_flag & P_WEXIT) || (p) == curproc,		\
 	    ("PHOLD of exiting process %p", p));			\
 	(p)->p_lock++;							\
 	if (((p)->p_flag & P_INMEM) == 0)				\
 		faultin((p));						\
 } while (0)
 #define	_PHOLD_LITE(p) do {						\
 	PROC_LOCK_ASSERT((p), MA_OWNED);				\
 	KASSERT(!((p)->p_flag & P_WEXIT) || (p) == curproc,		\
 	    ("PHOLD of exiting process %p", p));			\
 	(p)->p_lock++;							\
 } while (0)
 #define	PROC_ASSERT_HELD(p) do {					\
 	KASSERT((p)->p_lock > 0, ("process %p not held", p));		\
 } while (0)
 
 #define	PRELE(p) do {							\
 	PROC_LOCK((p));							\
 	_PRELE((p));							\
 	PROC_UNLOCK((p));						\
 } while (0)
 #define	_PRELE(p) do {							\
 	PROC_LOCK_ASSERT((p), MA_OWNED);				\
 	PROC_ASSERT_HELD(p);						\
 	(--(p)->p_lock);						\
 	if (((p)->p_flag & P_WEXIT) && (p)->p_lock == 0)		\
 		wakeup(&(p)->p_lock);					\
 } while (0)
 #define	PROC_ASSERT_NOT_HELD(p) do {					\
 	KASSERT((p)->p_lock == 0, ("process %p held", p));		\
 } while (0)
 
 #define	PROC_UPDATE_COW(p) do {						\
 	PROC_LOCK_ASSERT((p), MA_OWNED);				\
 	(p)->p_cowgen++;						\
 } while (0)
 
 /* Check whether a thread is safe to be swapped out. */
 #define	thread_safetoswapout(td)	((td)->td_flags & TDF_CANSWAP)
 
 /* Control whether or not it is safe for curthread to sleep. */
 #define	THREAD_NO_SLEEPING()		((curthread)->td_no_sleeping++)
 
 #define	THREAD_SLEEPING_OK()		((curthread)->td_no_sleeping--)
 
 #define	THREAD_CAN_SLEEP()		((curthread)->td_no_sleeping == 0)
 
 #define	PIDHASH(pid)	(&pidhashtbl[(pid) & pidhash])
 #define	PIDHASHLOCK(pid) (&pidhashtbl_lock[((pid) & pidhashlock)])
 extern LIST_HEAD(pidhashhead, proc) *pidhashtbl;
 extern struct sx *pidhashtbl_lock;
 extern u_long pidhash;
 extern u_long pidhashlock;
 #define	TIDHASH(tid)	(&tidhashtbl[(tid) & tidhash])
 extern LIST_HEAD(tidhashhead, thread) *tidhashtbl;
 extern u_long tidhash;
 extern struct rwlock tidhash_lock;
 
 #define	PGRPHASH(pgid)	(&pgrphashtbl[(pgid) & pgrphash])
 extern LIST_HEAD(pgrphashhead, pgrp) *pgrphashtbl;
 extern u_long pgrphash;
 
 extern struct sx allproc_lock;
 extern int allproc_gen;
 extern struct sx zombproc_lock;
 extern struct sx proctree_lock;
 extern struct mtx ppeers_lock;
 extern struct mtx procid_lock;
 extern struct proc proc0;		/* Process slot for swapper. */
 extern struct thread0_storage thread0_st;	/* Primary thread in proc0. */
 #define	thread0 (thread0_st.t0st_thread)
 extern struct vmspace vmspace0;		/* VM space for proc0. */
 extern int hogticks;			/* Limit on kernel cpu hogs. */
 extern int lastpid;
 extern int nprocs, maxproc;		/* Current and max number of procs. */
 extern int maxprocperuid;		/* Max procs per uid. */
 extern u_long ps_arg_cache_limit;
 
 LIST_HEAD(proclist, proc);
 TAILQ_HEAD(procqueue, proc);
 TAILQ_HEAD(threadqueue, thread);
 extern struct proclist allproc;		/* List of all processes. */
 extern struct proclist zombproc;	/* List of zombie processes. */
 extern struct proc *initproc, *pageproc; /* Process slots for init, pager. */
 
 extern struct uma_zone *proc_zone;
 
 struct	proc *pfind(pid_t);		/* Find process by id. */
 struct	proc *pfind_any(pid_t);		/* Find (zombie) process by id. */
 struct	pgrp *pgfind(pid_t);		/* Find process group by id. */
 struct	proc *zpfind(pid_t);		/* Find zombie process by id. */
 
 struct	fork_req {
 	int		fr_flags;
 	int		fr_pages;
 	int 		*fr_pidp;
 	struct proc 	**fr_procp;
 	int 		*fr_pd_fd;
 	int 		fr_pd_flags;
 	struct filecaps	*fr_pd_fcaps;
 };
 
 /*
  * pget() flags.
  */
 #define	PGET_HOLD	0x00001	/* Hold the process. */
 #define	PGET_CANSEE	0x00002	/* Check against p_cansee(). */
 #define	PGET_CANDEBUG	0x00004	/* Check against p_candebug(). */
 #define	PGET_ISCURRENT	0x00008	/* Check that the found process is current. */
 #define	PGET_NOTWEXIT	0x00010	/* Check that the process is not in P_WEXIT. */
 #define	PGET_NOTINEXEC	0x00020	/* Check that the process is not in P_INEXEC. */
 #define	PGET_NOTID	0x00040	/* Do not assume tid if pid > PID_MAX. */
 
 #define	PGET_WANTREAD	(PGET_HOLD | PGET_CANDEBUG | PGET_NOTWEXIT)
 
 int	pget(pid_t pid, int flags, struct proc **pp);
 
 void	ast(struct trapframe *framep);
 struct	thread *choosethread(void);
 int	cr_cansee(struct ucred *u1, struct ucred *u2);
 int	cr_canseesocket(struct ucred *cred, struct socket *so);
 int	cr_canseeothergids(struct ucred *u1, struct ucred *u2);
 int	cr_canseeotheruids(struct ucred *u1, struct ucred *u2);
 int	cr_canseejailproc(struct ucred *u1, struct ucred *u2);
 int	cr_cansignal(struct ucred *cred, struct proc *proc, int signum);
 int	enterpgrp(struct proc *p, pid_t pgid, struct pgrp *pgrp,
 	    struct session *sess);
 int	enterthispgrp(struct proc *p, struct pgrp *pgrp);
 void	faultin(struct proc *p);
 void	fixjobc(struct proc *p, struct pgrp *pgrp, int entering);
 int	fork1(struct thread *, struct fork_req *);
 void	fork_rfppwait(struct thread *);
 void	fork_exit(void (*)(void *, struct trapframe *), void *,
 	    struct trapframe *);
 void	fork_return(struct thread *, struct trapframe *);
 int	inferior(struct proc *p);
 void	kern_proc_vmmap_resident(struct vm_map *map, struct vm_map_entry *entry,
 	    int *resident_count, bool *super);
 void	kern_yield(int);
 void 	kick_proc0(void);
 void	killjobc(void);
 int	leavepgrp(struct proc *p);
 int	maybe_preempt(struct thread *td);
 void	maybe_yield(void);
 void	mi_switch(int flags, struct thread *newtd);
 int	p_candebug(struct thread *td, struct proc *p);
 int	p_cansee(struct thread *td, struct proc *p);
 int	p_cansched(struct thread *td, struct proc *p);
 int	p_cansignal(struct thread *td, struct proc *p, int signum);
 int	p_canwait(struct thread *td, struct proc *p);
 struct	pargs *pargs_alloc(int len);
 void	pargs_drop(struct pargs *pa);
 void	pargs_hold(struct pargs *pa);
 int	proc_getargv(struct thread *td, struct proc *p, struct sbuf *sb);
 int	proc_getauxv(struct thread *td, struct proc *p, struct sbuf *sb);
 int	proc_getenvv(struct thread *td, struct proc *p, struct sbuf *sb);
 void	procinit(void);
 int	proc_iterate(int (*cb)(struct proc *, void *), void *cbarg);
 void	proc_linkup0(struct proc *p, struct thread *td);
 void	proc_linkup(struct proc *p, struct thread *td);
 struct proc *proc_realparent(struct proc *child);
 void	proc_reap(struct thread *td, struct proc *p, int *status, int options);
 void	proc_reparent(struct proc *child, struct proc *newparent, bool set_oppid);
 void	proc_set_traced(struct proc *p, bool stop);
 void	proc_wkilled(struct proc *p);
 struct	pstats *pstats_alloc(void);
 void	pstats_fork(struct pstats *src, struct pstats *dst);
 void	pstats_free(struct pstats *ps);
 void	reaper_abandon_children(struct proc *p, bool exiting);
 int	securelevel_ge(struct ucred *cr, int level);
 int	securelevel_gt(struct ucred *cr, int level);
 void	sess_hold(struct session *);
 void	sess_release(struct session *);
 int	setrunnable(struct thread *);
 void	setsugid(struct proc *p);
 int	should_yield(void);
 int	sigonstack(size_t sp);
 void	stopevent(struct proc *, u_int, u_int);
 struct	thread *tdfind(lwpid_t, pid_t);
 void	threadinit(void);
 void	tidhash_add(struct thread *);
 void	tidhash_remove(struct thread *);
 void	cpu_idle(int);
 int	cpu_idle_wakeup(int);
 extern	void (*cpu_idle_hook)(sbintime_t);	/* Hook to machdep CPU idler. */
 void	cpu_switch(struct thread *, struct thread *, struct mtx *);
 void	cpu_throw(struct thread *, struct thread *) __dead2;
 void	unsleep(struct thread *);
 void	userret(struct thread *, struct trapframe *);
 
 void	cpu_exit(struct thread *);
 void	exit1(struct thread *, int, int) __dead2;
 void	cpu_copy_thread(struct thread *td, struct thread *td0);
 int	cpu_fetch_syscall_args(struct thread *td);
 void	cpu_fork(struct thread *, struct proc *, struct thread *, int);
 void	cpu_fork_kthread_handler(struct thread *, void (*)(void *), void *);
 void	cpu_set_syscall_retval(struct thread *, int);
 void	cpu_set_upcall(struct thread *, void (*)(void *), void *,
 	    stack_t *);
 int	cpu_set_user_tls(struct thread *, void *tls_base);
 void	cpu_thread_alloc(struct thread *);
 void	cpu_thread_clean(struct thread *);
 void	cpu_thread_exit(struct thread *);
 void	cpu_thread_free(struct thread *);
 void	cpu_thread_swapin(struct thread *);
 void	cpu_thread_swapout(struct thread *);
 struct	thread *thread_alloc(int pages);
 int	thread_alloc_stack(struct thread *, int pages);
 void	thread_cow_get_proc(struct thread *newtd, struct proc *p);
 void	thread_cow_get(struct thread *newtd, struct thread *td);
 void	thread_cow_free(struct thread *td);
 void	thread_cow_update(struct thread *td);
 int	thread_create(struct thread *td, struct rtprio *rtp,
 	    int (*initialize_thread)(struct thread *, void *), void *thunk);
 void	thread_exit(void) __dead2;
 void	thread_free(struct thread *td);
 void	thread_link(struct thread *td, struct proc *p);
 void	thread_reap(void);
 int	thread_single(struct proc *p, int how);
 void	thread_single_end(struct proc *p, int how);
 void	thread_stash(struct thread *td);
 void	thread_stopped(struct proc *p);
 void	childproc_stopped(struct proc *child, int reason);
 void	childproc_continued(struct proc *child);
 void	childproc_exited(struct proc *child);
 int	thread_suspend_check(int how);
 bool	thread_suspend_check_needed(void);
 void	thread_suspend_switch(struct thread *, struct proc *p);
 void	thread_suspend_one(struct thread *td);
 void	thread_unlink(struct thread *td);
 void	thread_unsuspend(struct proc *p);
 void	thread_wait(struct proc *p);
 struct thread	*thread_find(struct proc *p, lwpid_t tid);
 
 void	stop_all_proc(void);
 void	resume_all_proc(void);
 
 static __inline int
 curthread_pflags_set(int flags)
 {
 	struct thread *td;
 	int save;
 
 	td = curthread;
 	save = ~flags | (td->td_pflags & flags);
 	td->td_pflags |= flags;
 	return (save);
 }
 
 static __inline void
 curthread_pflags_restore(int save)
 {
 
 	curthread->td_pflags &= save;
 }
 
 static __inline __pure2 struct td_sched *
 td_get_sched(struct thread *td)
 {
 
 	return ((struct td_sched *)&td[1]);
 }
 
 extern void (*softdep_ast_cleanup)(struct thread *);
 static __inline void
 td_softdep_cleanup(struct thread *td)
 {
 
 	if (td->td_su != NULL && softdep_ast_cleanup != NULL)
 		softdep_ast_cleanup(td);
 }
 
 #define	PROC_ID_PID	0
 #define	PROC_ID_GROUP	1
 #define	PROC_ID_SESSION	2
 #define	PROC_ID_REAP	3
 
 void	proc_id_set(int type, pid_t id);
 void	proc_id_set_cond(int type, pid_t id);
 void	proc_id_clear(int type, pid_t id);
 
 #endif	/* _KERNEL */
 
 #endif	/* !_SYS_PROC_H_ */
Index: head/sys/sys/procctl.h
===================================================================
--- head/sys/sys/procctl.h	(revision 343963)
+++ head/sys/sys/procctl.h	(revision 343964)
@@ -1,126 +1,133 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2013 Hudson River Trading LLC
  * Copyright (c) 2014, 2016 The FreeBSD Foundation
  * Written by: John H. Baldwin <jhb@FreeBSD.org>
  * All rights reserved.
  *
  * Portions of this software were developed by Konstantin Belousov
  * under sponsorship from the FreeBSD Foundation.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef	_SYS_PROCCTL_H_
 #define	_SYS_PROCCTL_H_
 
 #ifndef _KERNEL
 #include <sys/types.h>
 #include <sys/wait.h>
 #endif
 
 #define	PROC_SPROTECT		1	/* set protected state */
 #define	PROC_REAP_ACQUIRE	2	/* reaping enable */
 #define	PROC_REAP_RELEASE	3	/* reaping disable */
 #define	PROC_REAP_STATUS	4	/* reaping status */
 #define	PROC_REAP_GETPIDS	5	/* get descendants */
 #define	PROC_REAP_KILL		6	/* kill descendants */
 #define	PROC_TRACE_CTL		7	/* en/dis ptrace and coredumps */
 #define	PROC_TRACE_STATUS	8	/* query tracing status */
 #define	PROC_TRAPCAP_CTL	9	/* trap capability errors */
 #define	PROC_TRAPCAP_STATUS	10	/* query trap capability status */
 #define	PROC_PDEATHSIG_CTL	11	/* set parent death signal */
 #define	PROC_PDEATHSIG_STATUS	12	/* get parent death signal */
+#define	PROC_ASLR_CTL		13	/* en/dis ASLR */
+#define	PROC_ASLR_STATUS	14	/* query ASLR status */
 
 /* Operations for PROC_SPROTECT (passed in integer arg). */
 #define	PPROT_OP(x)	((x) & 0xf)
 #define	PPROT_SET	1
 #define	PPROT_CLEAR	2
 
 /* Flags for PROC_SPROTECT (ORed in with operation). */
 #define	PPROT_FLAGS(x)	((x) & ~0xf)
 #define	PPROT_DESCEND	0x10
 #define	PPROT_INHERIT	0x20
 
 /* Result of PREAP_STATUS (returned by value). */
 struct procctl_reaper_status {
 	u_int	rs_flags;
 	u_int	rs_children;
 	u_int	rs_descendants;
 	pid_t	rs_reaper;
 	pid_t	rs_pid;
 	u_int	rs_pad0[15];
 };
 
 /* struct procctl_reaper_status rs_flags */
 #define	REAPER_STATUS_OWNED	0x00000001
 #define	REAPER_STATUS_REALINIT	0x00000002
 
 struct procctl_reaper_pidinfo {
 	pid_t	pi_pid;
 	pid_t	pi_subtree;
 	u_int	pi_flags;
 	u_int	pi_pad0[15];
 };
 
 #define	REAPER_PIDINFO_VALID	0x00000001
 #define	REAPER_PIDINFO_CHILD	0x00000002
 #define	REAPER_PIDINFO_REAPER	0x00000004
 
 struct procctl_reaper_pids {
 	u_int	rp_count;
 	u_int	rp_pad0[15];
 	struct procctl_reaper_pidinfo *rp_pids;
 };
 
 struct procctl_reaper_kill {
 	int	rk_sig;		/* in  - signal to send */
 	u_int	rk_flags;	/* in  - REAPER_KILL flags */
 	pid_t	rk_subtree;	/* in  - subtree, if REAPER_KILL_SUBTREE */
 	u_int	rk_killed;	/* out - count of processes successfully
 				   killed */
 	pid_t	rk_fpid;	/* out - first failed pid for which error
 				   is returned */
 	u_int	rk_pad0[15];
 };
 
 #define	REAPER_KILL_CHILDREN	0x00000001
 #define	REAPER_KILL_SUBTREE	0x00000002
 
 #define	PROC_TRACE_CTL_ENABLE		1
 #define	PROC_TRACE_CTL_DISABLE		2
 #define	PROC_TRACE_CTL_DISABLE_EXEC	3
 
 #define	PROC_TRAPCAP_CTL_ENABLE		1
 #define	PROC_TRAPCAP_CTL_DISABLE	2
+
+#define	PROC_ASLR_FORCE_ENABLE		1
+#define	PROC_ASLR_FORCE_DISABLE		2
+#define	PROC_ASLR_NOFORCE		3
+#define	PROC_ASLR_ACTIVE		0x80000000
 
 #ifndef _KERNEL
 __BEGIN_DECLS
 int	procctl(idtype_t, id_t, int, void *);
 __END_DECLS
 
 #endif
 
 #endif /* !_SYS_PROCCTL_H_ */
Index: head/sys/sys/sysent.h
===================================================================
--- head/sys/sys/sysent.h	(revision 343963)
+++ head/sys/sys/sysent.h	(revision 343964)
@@ -1,320 +1,321 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1982, 1988, 1991 The Regents of the University of California.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef _SYS_SYSENT_H_
 #define	_SYS_SYSENT_H_
 
 #include <bsm/audit.h>
 
 struct rlimit;
 struct sysent;
 struct thread;
 struct ksiginfo;
 struct syscall_args;
 
 enum systrace_probe_t {
 	SYSTRACE_ENTRY,
 	SYSTRACE_RETURN,
 };
 
 typedef	int	sy_call_t(struct thread *, void *);
 
 typedef	void	(*systrace_probe_func_t)(struct syscall_args *,
 		    enum systrace_probe_t, int);
 typedef	void	(*systrace_args_func_t)(int, void *, uint64_t *, int *);
 
 #ifdef _KERNEL
 extern bool			systrace_enabled;
 #endif
 extern systrace_probe_func_t	systrace_probe_func;
 
 struct sysent {			/* system call table */
 	int	sy_narg;	/* number of arguments */
 	sy_call_t *sy_call;	/* implementing function */
 	au_event_t sy_auevent;	/* audit event associated with syscall */
 	systrace_args_func_t sy_systrace_args_func;
 				/* optional argument conversion function. */
 	u_int32_t sy_entry;	/* DTrace entry ID for systrace. */
 	u_int32_t sy_return;	/* DTrace return ID for systrace. */
 	u_int32_t sy_flags;	/* General flags for system calls. */
 	u_int32_t sy_thrcnt;
 };
 
 /*
  * A system call is permitted in capability mode.
  */
 #define	SYF_CAPENABLED	0x00000001
 
 #define	SY_THR_FLAGMASK	0x7
 #define	SY_THR_STATIC	0x1
 #define	SY_THR_DRAINING	0x2
 #define	SY_THR_ABSENT	0x4
 #define	SY_THR_INCR	0x8
 
 #ifdef KLD_MODULE
 #define	SY_THR_STATIC_KLD	0
 #else
 #define	SY_THR_STATIC_KLD	SY_THR_STATIC
 #endif
 
 struct image_params;
 struct __sigset;
 struct trapframe;
 struct vnode;
 
 struct sysentvec {
 	int		sv_size;	/* number of entries */
 	struct sysent	*sv_table;	/* pointer to sysent */
 	int		sv_errsize;	/* size of errno translation table */
 	const int 	*sv_errtbl;	/* errno translation table */
 	int		(*sv_transtrap)(int, int);
 					/* translate trap-to-signal mapping */
 	int		(*sv_fixup)(register_t **, struct image_params *);
 					/* stack fixup function */
 	void		(*sv_sendsig)(void (*)(int), struct ksiginfo *, struct __sigset *);
 			    		/* send signal */
 	char 		*sv_sigcode;	/* start of sigtramp code */
 	int 		*sv_szsigcode;	/* size of sigtramp code */
 	char		*sv_name;	/* name of binary type */
 	int		(*sv_coredump)(struct thread *, struct vnode *, off_t, int);
 					/* function to dump core, or NULL */
 	int		(*sv_imgact_try)(struct image_params *);
 	int		sv_minsigstksz;	/* minimum signal stack size */
 	int		sv_pagesize;	/* pagesize */
 	vm_offset_t	sv_minuser;	/* VM_MIN_ADDRESS */
 	vm_offset_t	sv_maxuser;	/* VM_MAXUSER_ADDRESS */
 	vm_offset_t	sv_usrstack;	/* USRSTACK */
 	vm_offset_t	sv_psstrings;	/* PS_STRINGS */
 	int		sv_stackprot;	/* vm protection for stack */
 	register_t	*(*sv_copyout_strings)(struct image_params *);
 	void		(*sv_setregs)(struct thread *, struct image_params *,
 			    u_long);
 	void		(*sv_fixlimit)(struct rlimit *, int);
 	u_long		*sv_maxssiz;
 	u_int		sv_flags;
 	void		(*sv_set_syscall_retval)(struct thread *, int);
 	int		(*sv_fetch_syscall_args)(struct thread *);
 	const char	**sv_syscallnames;
 	vm_offset_t	sv_timekeep_base;
 	vm_offset_t	sv_shared_page_base;
 	vm_offset_t	sv_shared_page_len;
 	vm_offset_t	sv_sigcode_base;
 	void		*sv_shared_page_obj;
 	void		(*sv_schedtail)(struct thread *);
 	void		(*sv_thread_detach)(struct thread *);
 	int		(*sv_trap)(struct thread *);
 	u_long		*sv_hwcap;	/* Value passed in AT_HWCAP. */
 	u_long		*sv_hwcap2;	/* Value passed in AT_HWCAP2. */
 };
 
 #define	SV_ILP32	0x000100	/* 32-bit executable. */
 #define	SV_LP64		0x000200	/* 64-bit executable. */
 #define	SV_IA32		0x004000	/* Intel 32-bit executable. */
 #define	SV_AOUT		0x008000	/* a.out executable. */
 #define	SV_SHP		0x010000	/* Shared page. */
 #define	SV_CAPSICUM	0x020000	/* Force cap_enter() on startup. */
 #define	SV_TIMEKEEP	0x040000	/* Shared page timehands. */
+#define	SV_ASLR		0x080000	/* ASLR allowed. */
 
 #define	SV_ABI_MASK	0xff
 #define	SV_ABI_ERRNO(p, e)	((p)->p_sysent->sv_errsize <= 0 ? e :	\
 	((e) >= (p)->p_sysent->sv_errsize ? -1 : (p)->p_sysent->sv_errtbl[e]))
 #define	SV_PROC_FLAG(p, x)	((p)->p_sysent->sv_flags & (x))
 #define	SV_PROC_ABI(p)		((p)->p_sysent->sv_flags & SV_ABI_MASK)
 #define	SV_CURPROC_FLAG(x)	SV_PROC_FLAG(curproc, x)
 #define	SV_CURPROC_ABI()	SV_PROC_ABI(curproc)
 /* same as ELFOSABI_XXX, to prevent header pollution */
 #define	SV_ABI_LINUX	3
 #define	SV_ABI_FREEBSD 	9
 #define	SV_ABI_CLOUDABI	17
 #define	SV_ABI_UNDEF	255
 
 #ifdef _KERNEL
 extern struct sysentvec aout_sysvec;
 extern struct sysent sysent[];
 extern const char *syscallnames[];
 
 #define	NO_SYSCALL (-1)
 
 struct module;
 
 struct syscall_module_data {
 	int	(*chainevh)(struct module *, int, void *); /* next handler */
 	void	*chainarg;		/* arg for next event handler */
 	int	*offset;		/* offset into sysent */
 	struct sysent *new_sysent;	/* new sysent */
 	struct sysent old_sysent;	/* old sysent */
 	int	flags;			/* flags for syscall_register */
 };
 
 /* separate initialization vector so it can be used in a substructure */
 #define SYSENT_INIT_VALS(_syscallname) {			\
 	.sy_narg = (sizeof(struct _syscallname ## _args )	\
 	    / sizeof(register_t)),				\
 	.sy_call = (sy_call_t *)&sys_##_syscallname,		\
 	.sy_auevent = SYS_AUE_##_syscallname,			\
 	.sy_systrace_args_func = NULL,				\
 	.sy_entry = 0,						\
 	.sy_return = 0,						\
 	.sy_flags = 0,						\
 	.sy_thrcnt = 0						\
 }							
 
 #define	MAKE_SYSENT(syscallname)				\
 static struct sysent syscallname##_sysent = SYSENT_INIT_VALS(syscallname);
 
 #define	MAKE_SYSENT_COMPAT(syscallname)				\
 static struct sysent syscallname##_sysent = {			\
 	(sizeof(struct syscallname ## _args )			\
 	    / sizeof(register_t)),				\
 	(sy_call_t *)& syscallname,				\
 	SYS_AUE_##syscallname					\
 }
 
 #define SYSCALL_MODULE(name, offset, new_sysent, evh, arg)	\
 static struct syscall_module_data name##_syscall_mod = {	\
 	evh, arg, offset, new_sysent, { 0, NULL, AUE_NULL }	\
 };								\
 								\
 static moduledata_t name##_mod = {				\
 	"sys/" #name,						\
 	syscall_module_handler,					\
 	&name##_syscall_mod					\
 };								\
 DECLARE_MODULE(name, name##_mod, SI_SUB_SYSCALLS, SI_ORDER_MIDDLE)
 
 #define	SYSCALL_MODULE_HELPER(syscallname)			\
 static int syscallname##_syscall = SYS_##syscallname;		\
 MAKE_SYSENT(syscallname);					\
 SYSCALL_MODULE(syscallname,					\
     & syscallname##_syscall, & syscallname##_sysent,		\
     NULL, NULL)
 
 #define	SYSCALL_MODULE_PRESENT(syscallname)				\
 	(sysent[SYS_##syscallname].sy_call != (sy_call_t *)lkmnosys &&	\
 	sysent[SYS_##syscallname].sy_call != (sy_call_t *)lkmressys)
 
 /*
  * Syscall registration helpers with resource allocation handling.
  */
 struct syscall_helper_data {
 	struct sysent new_sysent;
 	struct sysent old_sysent;
 	int syscall_no;
 	int registered;
 };
 #define SYSCALL_INIT_HELPER_F(syscallname, flags) {		\
     .new_sysent = {						\
 	.sy_narg = (sizeof(struct syscallname ## _args )	\
 	    / sizeof(register_t)),				\
 	.sy_call = (sy_call_t *)& sys_ ## syscallname,		\
 	.sy_auevent = SYS_AUE_##syscallname,			\
 	.sy_flags = (flags)					\
     },								\
     .syscall_no = SYS_##syscallname				\
 }
 #define SYSCALL_INIT_HELPER_COMPAT_F(syscallname, flags) {	\
     .new_sysent = {						\
 	.sy_narg = (sizeof(struct syscallname ## _args )	\
 	    / sizeof(register_t)),				\
 	.sy_call = (sy_call_t *)& syscallname,			\
 	.sy_auevent = SYS_AUE_##syscallname,			\
 	.sy_flags = (flags)					\
     },								\
     .syscall_no = SYS_##syscallname				\
 }
 #define SYSCALL_INIT_HELPER(syscallname)			\
     SYSCALL_INIT_HELPER_F(syscallname, 0)
 #define SYSCALL_INIT_HELPER_COMPAT(syscallname)			\
     SYSCALL_INIT_HELPER_COMPAT_F(syscallname, 0)
 #define SYSCALL_INIT_LAST {					\
     .syscall_no = NO_SYSCALL					\
 }
 
 int	syscall_module_handler(struct module *mod, int what, void *arg);
 int	syscall_helper_register(struct syscall_helper_data *sd, int flags);
 int	syscall_helper_unregister(struct syscall_helper_data *sd);
 /* Implementation, exposed for COMPAT code */
 int	kern_syscall_register(struct sysent *sysents, int *offset,
 	    struct sysent *new_sysent, struct sysent *old_sysent, int flags);
 int	kern_syscall_deregister(struct sysent *sysents, int offset,
 	    const struct sysent *old_sysent);
 int	kern_syscall_module_handler(struct sysent *sysents,
 	    struct module *mod, int what, void *arg);
 int	kern_syscall_helper_register(struct sysent *sysents,
 	    struct syscall_helper_data *sd, int flags);
 int	kern_syscall_helper_unregister(struct sysent *sysents,
 	    struct syscall_helper_data *sd);
 
 struct proc;
 const char *syscallname(struct proc *p, u_int code);
 
 /* Special purpose system call functions. */
 struct nosys_args;
 
 int	lkmnosys(struct thread *, struct nosys_args *);
 int	lkmressys(struct thread *, struct nosys_args *);
 
 int	_syscall_thread_enter(struct thread *td, struct sysent *se);
 void	_syscall_thread_exit(struct thread *td, struct sysent *se);
 
 static inline int
 syscall_thread_enter(struct thread *td, struct sysent *se)
 {
 
 	if (__predict_true((se->sy_thrcnt & SY_THR_STATIC) != 0))
 		return (0);
 	return (_syscall_thread_enter(td, se));
 }
 
 static inline void
 syscall_thread_exit(struct thread *td, struct sysent *se)
 {
 
 	if (__predict_true((se->sy_thrcnt & SY_THR_STATIC) != 0))
 		return;
 	_syscall_thread_exit(td, se);
 }
 
 int shared_page_alloc(int size, int align);
 int shared_page_fill(int size, int align, const void *data);
 void shared_page_write(int base, int size, const void *data);
 void exec_sysvec_init(void *param);
 void exec_inittk(void);
 
 #define INIT_SYSENTVEC(name, sv)					\
     SYSINIT(name, SI_SUB_EXEC, SI_ORDER_ANY,				\
 	(sysinit_cfunc_t)exec_sysvec_init, sv);
 
 #endif /* _KERNEL */
 
 #endif /* !_SYS_SYSENT_H_ */
Index: head/sys/vm/vm_map.c
===================================================================
--- head/sys/vm/vm_map.c	(revision 343963)
+++ head/sys/vm/vm_map.c	(revision 343964)
@@ -1,4449 +1,4555 @@
 /*-
  * SPDX-License-Identifier: (BSD-3-Clause AND MIT-CMU)
  *
  * Copyright (c) 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * The Mach Operating System project at Carnegie-Mellon University.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from: @(#)vm_map.c	8.3 (Berkeley) 1/12/94
  *
  *
  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
  * All rights reserved.
  *
  * Authors: Avadis Tevanian, Jr., Michael Wayne Young
  *
  * Permission to use, copy, modify and distribute this software and
  * its documentation is hereby granted, provided that both the copyright
  * notice and this permission notice appear in all copies of the
  * software, derivative works or modified versions, and any portions
  * thereof, and that both notices appear in supporting documentation.
  *
  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  *
  * Carnegie Mellon requests users of this software to return to
  *
  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  *  School of Computer Science
  *  Carnegie Mellon University
  *  Pittsburgh PA 15213-3890
  *
  * any improvements or extensions that they make and grant Carnegie the
  * rights to redistribute these changes.
  */
 
 /*
  *	Virtual memory mapping module.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/vmmeter.h>
 #include <sys/mman.h>
 #include <sys/vnode.h>
 #include <sys/racct.h>
 #include <sys/resourcevar.h>
 #include <sys/rwlock.h>
 #include <sys/file.h>
 #include <sys/sysctl.h>
 #include <sys/sysent.h>
 #include <sys/shm.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 #include <vm/vm_page.h>
 #include <vm/vm_object.h>
 #include <vm/vm_pager.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_extern.h>
 #include <vm/vnode_pager.h>
 #include <vm/swap_pager.h>
 #include <vm/uma.h>
 
 /*
  *	Virtual memory maps provide for the mapping, protection,
  *	and sharing of virtual memory objects.  In addition,
  *	this module provides for an efficient virtual copy of
  *	memory from one map to another.
  *
  *	Synchronization is required prior to most operations.
  *
  *	Maps consist of an ordered doubly-linked list of simple
  *	entries; a self-adjusting binary search tree of these
  *	entries is used to speed up lookups.
  *
  *	Since portions of maps are specified by start/end addresses,
  *	which may not align with existing map entries, all
  *	routines merely "clip" entries to these start/end values.
  *	[That is, an entry is split into two, bordering at a
  *	start or end value.]  Note that these clippings may not
  *	always be necessary (as the two resulting entries are then
  *	not changed); however, the clipping is done for convenience.
  *
  *	As mentioned above, virtual copy operations are performed
  *	by copying VM object references from one map to
  *	another, and then marking both regions as copy-on-write.
  */
 
 static struct mtx map_sleep_mtx;
 static uma_zone_t mapentzone;
 static uma_zone_t kmapentzone;
 static uma_zone_t mapzone;
 static uma_zone_t vmspace_zone;
 static int vmspace_zinit(void *mem, int size, int flags);
 static int vm_map_zinit(void *mem, int ize, int flags);
 static void _vm_map_init(vm_map_t map, pmap_t pmap, vm_offset_t min,
     vm_offset_t max);
 static int vm_map_alignspace(vm_map_t map, vm_object_t object,
     vm_ooffset_t offset, vm_offset_t *addr, vm_size_t length,
     vm_offset_t max_addr, vm_offset_t alignment);
 static void vm_map_entry_deallocate(vm_map_entry_t entry, boolean_t system_map);
 static void vm_map_entry_dispose(vm_map_t map, vm_map_entry_t entry);
 static void vm_map_entry_unwire(vm_map_t map, vm_map_entry_t entry);
 static int vm_map_growstack(vm_map_t map, vm_offset_t addr,
     vm_map_entry_t gap_entry);
 static void vm_map_pmap_enter(vm_map_t map, vm_offset_t addr, vm_prot_t prot,
     vm_object_t object, vm_pindex_t pindex, vm_size_t size, int flags);
 #ifdef INVARIANTS
 static void vm_map_zdtor(void *mem, int size, void *arg);
 static void vmspace_zdtor(void *mem, int size, void *arg);
 #endif
 static int vm_map_stack_locked(vm_map_t map, vm_offset_t addrbos,
     vm_size_t max_ssize, vm_size_t growsize, vm_prot_t prot, vm_prot_t max,
     int cow);
 static void vm_map_wire_entry_failure(vm_map_t map, vm_map_entry_t entry,
     vm_offset_t failed_addr);
 
 #define	ENTRY_CHARGED(e) ((e)->cred != NULL || \
     ((e)->object.vm_object != NULL && (e)->object.vm_object->cred != NULL && \
      !((e)->eflags & MAP_ENTRY_NEEDS_COPY)))
 
 /* 
  * PROC_VMSPACE_{UN,}LOCK() can be a noop as long as vmspaces are type
  * stable.
  */
 #define PROC_VMSPACE_LOCK(p) do { } while (0)
 #define PROC_VMSPACE_UNLOCK(p) do { } while (0)
 
 /*
  *	VM_MAP_RANGE_CHECK:	[ internal use only ]
  *
  *	Asserts that the starting and ending region
  *	addresses fall within the valid range of the map.
  */
 #define	VM_MAP_RANGE_CHECK(map, start, end)		\
 		{					\
 		if (start < vm_map_min(map))		\
 			start = vm_map_min(map);	\
 		if (end > vm_map_max(map))		\
 			end = vm_map_max(map);		\
 		if (start > end)			\
 			start = end;			\
 		}
 
 /*
  *	vm_map_startup:
  *
  *	Initialize the vm_map module.  Must be called before
  *	any other vm_map routines.
  *
  *	Map and entry structures are allocated from the general
  *	purpose memory pool with some exceptions:
  *
  *	- The kernel map and kmem submap are allocated statically.
  *	- Kernel map entries are allocated out of a static pool.
  *
  *	These restrictions are necessary since malloc() uses the
  *	maps and requires map entries.
  */
 
 void
 vm_map_startup(void)
 {
 	mtx_init(&map_sleep_mtx, "vm map sleep mutex", NULL, MTX_DEF);
 	mapzone = uma_zcreate("MAP", sizeof(struct vm_map), NULL,
 #ifdef INVARIANTS
 	    vm_map_zdtor,
 #else
 	    NULL,
 #endif
 	    vm_map_zinit, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
 	uma_prealloc(mapzone, MAX_KMAP);
 	kmapentzone = uma_zcreate("KMAP ENTRY", sizeof(struct vm_map_entry),
 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR,
 	    UMA_ZONE_MTXCLASS | UMA_ZONE_VM);
 	mapentzone = uma_zcreate("MAP ENTRY", sizeof(struct vm_map_entry),
 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
 	vmspace_zone = uma_zcreate("VMSPACE", sizeof(struct vmspace), NULL,
 #ifdef INVARIANTS
 	    vmspace_zdtor,
 #else
 	    NULL,
 #endif
 	    vmspace_zinit, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
 }
 
 static int
 vmspace_zinit(void *mem, int size, int flags)
 {
 	struct vmspace *vm;
 
 	vm = (struct vmspace *)mem;
 
 	vm->vm_map.pmap = NULL;
 	(void)vm_map_zinit(&vm->vm_map, sizeof(vm->vm_map), flags);
 	PMAP_LOCK_INIT(vmspace_pmap(vm));
 	return (0);
 }
 
 static int
 vm_map_zinit(void *mem, int size, int flags)
 {
 	vm_map_t map;
 
 	map = (vm_map_t)mem;
 	memset(map, 0, sizeof(*map));
 	mtx_init(&map->system_mtx, "vm map (system)", NULL, MTX_DEF | MTX_DUPOK);
 	sx_init(&map->lock, "vm map (user)");
 	return (0);
 }
 
 #ifdef INVARIANTS
 static void
 vmspace_zdtor(void *mem, int size, void *arg)
 {
 	struct vmspace *vm;
 
 	vm = (struct vmspace *)mem;
 
 	vm_map_zdtor(&vm->vm_map, sizeof(vm->vm_map), arg);
 }
 static void
 vm_map_zdtor(void *mem, int size, void *arg)
 {
 	vm_map_t map;
 
 	map = (vm_map_t)mem;
 	KASSERT(map->nentries == 0,
 	    ("map %p nentries == %d on free.",
 	    map, map->nentries));
 	KASSERT(map->size == 0,
 	    ("map %p size == %lu on free.",
 	    map, (unsigned long)map->size));
 }
 #endif	/* INVARIANTS */
 
 /*
  * Allocate a vmspace structure, including a vm_map and pmap,
  * and initialize those structures.  The refcnt is set to 1.
  *
  * If 'pinit' is NULL then the embedded pmap is initialized via pmap_pinit().
  */
 struct vmspace *
 vmspace_alloc(vm_offset_t min, vm_offset_t max, pmap_pinit_t pinit)
 {
 	struct vmspace *vm;
 
 	vm = uma_zalloc(vmspace_zone, M_WAITOK);
 	KASSERT(vm->vm_map.pmap == NULL, ("vm_map.pmap must be NULL"));
 	if (!pinit(vmspace_pmap(vm))) {
 		uma_zfree(vmspace_zone, vm);
 		return (NULL);
 	}
 	CTR1(KTR_VM, "vmspace_alloc: %p", vm);
 	_vm_map_init(&vm->vm_map, vmspace_pmap(vm), min, max);
 	vm->vm_refcnt = 1;
 	vm->vm_shm = NULL;
 	vm->vm_swrss = 0;
 	vm->vm_tsize = 0;
 	vm->vm_dsize = 0;
 	vm->vm_ssize = 0;
 	vm->vm_taddr = 0;
 	vm->vm_daddr = 0;
 	vm->vm_maxsaddr = 0;
 	return (vm);
 }
 
 #ifdef RACCT
 static void
 vmspace_container_reset(struct proc *p)
 {
 
 	PROC_LOCK(p);
 	racct_set(p, RACCT_DATA, 0);
 	racct_set(p, RACCT_STACK, 0);
 	racct_set(p, RACCT_RSS, 0);
 	racct_set(p, RACCT_MEMLOCK, 0);
 	racct_set(p, RACCT_VMEM, 0);
 	PROC_UNLOCK(p);
 }
 #endif
 
 static inline void
 vmspace_dofree(struct vmspace *vm)
 {
 
 	CTR1(KTR_VM, "vmspace_free: %p", vm);
 
 	/*
 	 * Make sure any SysV shm is freed, it might not have been in
 	 * exit1().
 	 */
 	shmexit(vm);
 
 	/*
 	 * Lock the map, to wait out all other references to it.
 	 * Delete all of the mappings and pages they hold, then call
 	 * the pmap module to reclaim anything left.
 	 */
 	(void)vm_map_remove(&vm->vm_map, vm_map_min(&vm->vm_map),
 	    vm_map_max(&vm->vm_map));
 
 	pmap_release(vmspace_pmap(vm));
 	vm->vm_map.pmap = NULL;
 	uma_zfree(vmspace_zone, vm);
 }
 
 void
 vmspace_free(struct vmspace *vm)
 {
 
 	WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
 	    "vmspace_free() called");
 
 	if (vm->vm_refcnt == 0)
 		panic("vmspace_free: attempt to free already freed vmspace");
 
 	if (atomic_fetchadd_int(&vm->vm_refcnt, -1) == 1)
 		vmspace_dofree(vm);
 }
 
 void
 vmspace_exitfree(struct proc *p)
 {
 	struct vmspace *vm;
 
 	PROC_VMSPACE_LOCK(p);
 	vm = p->p_vmspace;
 	p->p_vmspace = NULL;
 	PROC_VMSPACE_UNLOCK(p);
 	KASSERT(vm == &vmspace0, ("vmspace_exitfree: wrong vmspace"));
 	vmspace_free(vm);
 }
 
 void
 vmspace_exit(struct thread *td)
 {
 	int refcnt;
 	struct vmspace *vm;
 	struct proc *p;
 
 	/*
 	 * Release user portion of address space.
 	 * This releases references to vnodes,
 	 * which could cause I/O if the file has been unlinked.
 	 * Need to do this early enough that we can still sleep.
 	 *
 	 * The last exiting process to reach this point releases as
 	 * much of the environment as it can. vmspace_dofree() is the
 	 * slower fallback in case another process had a temporary
 	 * reference to the vmspace.
 	 */
 
 	p = td->td_proc;
 	vm = p->p_vmspace;
 	atomic_add_int(&vmspace0.vm_refcnt, 1);
 	refcnt = vm->vm_refcnt;
 	do {
 		if (refcnt > 1 && p->p_vmspace != &vmspace0) {
 			/* Switch now since other proc might free vmspace */
 			PROC_VMSPACE_LOCK(p);
 			p->p_vmspace = &vmspace0;
 			PROC_VMSPACE_UNLOCK(p);
 			pmap_activate(td);
 		}
 	} while (!atomic_fcmpset_int(&vm->vm_refcnt, &refcnt, refcnt - 1));
 	if (refcnt == 1) {
 		if (p->p_vmspace != vm) {
 			/* vmspace not yet freed, switch back */
 			PROC_VMSPACE_LOCK(p);
 			p->p_vmspace = vm;
 			PROC_VMSPACE_UNLOCK(p);
 			pmap_activate(td);
 		}
 		pmap_remove_pages(vmspace_pmap(vm));
 		/* Switch now since this proc will free vmspace */
 		PROC_VMSPACE_LOCK(p);
 		p->p_vmspace = &vmspace0;
 		PROC_VMSPACE_UNLOCK(p);
 		pmap_activate(td);
 		vmspace_dofree(vm);
 	}
 #ifdef RACCT
 	if (racct_enable)
 		vmspace_container_reset(p);
 #endif
 }
 
 /* Acquire reference to vmspace owned by another process. */
 
 struct vmspace *
 vmspace_acquire_ref(struct proc *p)
 {
 	struct vmspace *vm;
 	int refcnt;
 
 	PROC_VMSPACE_LOCK(p);
 	vm = p->p_vmspace;
 	if (vm == NULL) {
 		PROC_VMSPACE_UNLOCK(p);
 		return (NULL);
 	}
 	refcnt = vm->vm_refcnt;
 	do {
 		if (refcnt <= 0) { 	/* Avoid 0->1 transition */
 			PROC_VMSPACE_UNLOCK(p);
 			return (NULL);
 		}
 	} while (!atomic_fcmpset_int(&vm->vm_refcnt, &refcnt, refcnt + 1));
 	if (vm != p->p_vmspace) {
 		PROC_VMSPACE_UNLOCK(p);
 		vmspace_free(vm);
 		return (NULL);
 	}
 	PROC_VMSPACE_UNLOCK(p);
 	return (vm);
 }
 
 /*
  * Switch between vmspaces in an AIO kernel process.
  *
  * The AIO kernel processes switch to and from a user process's
  * vmspace while performing an I/O operation on behalf of a user
  * process.  The new vmspace is either the vmspace of a user process
  * obtained from an active AIO request or the initial vmspace of the
  * AIO kernel process (when it is idling).  Because user processes
  * will block to drain any active AIO requests before proceeding in
  * exit() or execve(), the vmspace reference count for these vmspaces
  * can never be 0.  This allows for a much simpler implementation than
  * the loop in vmspace_acquire_ref() above.  Similarly, AIO kernel
  * processes hold an extra reference on their initial vmspace for the
  * life of the process so that this guarantee is true for any vmspace
  * passed as 'newvm'.
  */
 void
 vmspace_switch_aio(struct vmspace *newvm)
 {
 	struct vmspace *oldvm;
 
 	/* XXX: Need some way to assert that this is an aio daemon. */
 
 	KASSERT(newvm->vm_refcnt > 0,
 	    ("vmspace_switch_aio: newvm unreferenced"));
 
 	oldvm = curproc->p_vmspace;
 	if (oldvm == newvm)
 		return;
 
 	/*
 	 * Point to the new address space and refer to it.
 	 */
 	curproc->p_vmspace = newvm;
 	atomic_add_int(&newvm->vm_refcnt, 1);
 
 	/* Activate the new mapping. */
 	pmap_activate(curthread);
 
 	/* Remove the daemon's reference to the old address space. */
 	KASSERT(oldvm->vm_refcnt > 1,
 	    ("vmspace_switch_aio: oldvm dropping last reference"));
 	vmspace_free(oldvm);
 }
 
 void
 _vm_map_lock(vm_map_t map, const char *file, int line)
 {
 
 	if (map->system_map)
 		mtx_lock_flags_(&map->system_mtx, 0, file, line);
 	else
 		sx_xlock_(&map->lock, file, line);
 	map->timestamp++;
 }
 
 static void
 vm_map_process_deferred(void)
 {
 	struct thread *td;
 	vm_map_entry_t entry, next;
 	vm_object_t object;
 
 	td = curthread;
 	entry = td->td_map_def_user;
 	td->td_map_def_user = NULL;
 	while (entry != NULL) {
 		next = entry->next;
 		if ((entry->eflags & MAP_ENTRY_VN_WRITECNT) != 0) {
 			/*
 			 * Decrement the object's writemappings and
 			 * possibly the vnode's v_writecount.
 			 */
 			KASSERT((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0,
 			    ("Submap with writecount"));
 			object = entry->object.vm_object;
 			KASSERT(object != NULL, ("No object for writecount"));
 			vnode_pager_release_writecount(object, entry->start,
 			    entry->end);
 		}
 		vm_map_entry_deallocate(entry, FALSE);
 		entry = next;
 	}
 }
 
 void
 _vm_map_unlock(vm_map_t map, const char *file, int line)
 {
 
 	if (map->system_map)
 		mtx_unlock_flags_(&map->system_mtx, 0, file, line);
 	else {
 		sx_xunlock_(&map->lock, file, line);
 		vm_map_process_deferred();
 	}
 }
 
 void
 _vm_map_lock_read(vm_map_t map, const char *file, int line)
 {
 
 	if (map->system_map)
 		mtx_lock_flags_(&map->system_mtx, 0, file, line);
 	else
 		sx_slock_(&map->lock, file, line);
 }
 
 void
 _vm_map_unlock_read(vm_map_t map, const char *file, int line)
 {
 
 	if (map->system_map)
 		mtx_unlock_flags_(&map->system_mtx, 0, file, line);
 	else {
 		sx_sunlock_(&map->lock, file, line);
 		vm_map_process_deferred();
 	}
 }
 
 int
 _vm_map_trylock(vm_map_t map, const char *file, int line)
 {
 	int error;
 
 	error = map->system_map ?
 	    !mtx_trylock_flags_(&map->system_mtx, 0, file, line) :
 	    !sx_try_xlock_(&map->lock, file, line);
 	if (error == 0)
 		map->timestamp++;
 	return (error == 0);
 }
 
 int
 _vm_map_trylock_read(vm_map_t map, const char *file, int line)
 {
 	int error;
 
 	error = map->system_map ?
 	    !mtx_trylock_flags_(&map->system_mtx, 0, file, line) :
 	    !sx_try_slock_(&map->lock, file, line);
 	return (error == 0);
 }
 
 /*
  *	_vm_map_lock_upgrade:	[ internal use only ]
  *
  *	Tries to upgrade a read (shared) lock on the specified map to a write
  *	(exclusive) lock.  Returns the value "0" if the upgrade succeeds and a
  *	non-zero value if the upgrade fails.  If the upgrade fails, the map is
  *	returned without a read or write lock held.
  *
  *	Requires that the map be read locked.
  */
 int
 _vm_map_lock_upgrade(vm_map_t map, const char *file, int line)
 {
 	unsigned int last_timestamp;
 
 	if (map->system_map) {
 		mtx_assert_(&map->system_mtx, MA_OWNED, file, line);
 	} else {
 		if (!sx_try_upgrade_(&map->lock, file, line)) {
 			last_timestamp = map->timestamp;
 			sx_sunlock_(&map->lock, file, line);
 			vm_map_process_deferred();
 			/*
 			 * If the map's timestamp does not change while the
 			 * map is unlocked, then the upgrade succeeds.
 			 */
 			sx_xlock_(&map->lock, file, line);
 			if (last_timestamp != map->timestamp) {
 				sx_xunlock_(&map->lock, file, line);
 				return (1);
 			}
 		}
 	}
 	map->timestamp++;
 	return (0);
 }
 
 void
 _vm_map_lock_downgrade(vm_map_t map, const char *file, int line)
 {
 
 	if (map->system_map) {
 		mtx_assert_(&map->system_mtx, MA_OWNED, file, line);
 	} else
 		sx_downgrade_(&map->lock, file, line);
 }
 
 /*
  *	vm_map_locked:
  *
  *	Returns a non-zero value if the caller holds a write (exclusive) lock
  *	on the specified map and the value "0" otherwise.
  */
 int
 vm_map_locked(vm_map_t map)
 {
 
 	if (map->system_map)
 		return (mtx_owned(&map->system_mtx));
 	else
 		return (sx_xlocked(&map->lock));
 }
 
 #ifdef INVARIANTS
 static void
 _vm_map_assert_locked(vm_map_t map, const char *file, int line)
 {
 
 	if (map->system_map)
 		mtx_assert_(&map->system_mtx, MA_OWNED, file, line);
 	else
 		sx_assert_(&map->lock, SA_XLOCKED, file, line);
 }
 
 #define	VM_MAP_ASSERT_LOCKED(map) \
     _vm_map_assert_locked(map, LOCK_FILE, LOCK_LINE)
 #else
 #define	VM_MAP_ASSERT_LOCKED(map)
 #endif
 
 /*
  *	_vm_map_unlock_and_wait:
  *
  *	Atomically releases the lock on the specified map and puts the calling
  *	thread to sleep.  The calling thread will remain asleep until either
  *	vm_map_wakeup() is performed on the map or the specified timeout is
  *	exceeded.
  *
  *	WARNING!  This function does not perform deferred deallocations of
  *	objects and map	entries.  Therefore, the calling thread is expected to
  *	reacquire the map lock after reawakening and later perform an ordinary
  *	unlock operation, such as vm_map_unlock(), before completing its
  *	operation on the map.
  */
 int
 _vm_map_unlock_and_wait(vm_map_t map, int timo, const char *file, int line)
 {
 
 	mtx_lock(&map_sleep_mtx);
 	if (map->system_map)
 		mtx_unlock_flags_(&map->system_mtx, 0, file, line);
 	else
 		sx_xunlock_(&map->lock, file, line);
 	return (msleep(&map->root, &map_sleep_mtx, PDROP | PVM, "vmmaps",
 	    timo));
 }
 
 /*
  *	vm_map_wakeup:
  *
  *	Awaken any threads that have slept on the map using
  *	vm_map_unlock_and_wait().
  */
 void
 vm_map_wakeup(vm_map_t map)
 {
 
 	/*
 	 * Acquire and release map_sleep_mtx to prevent a wakeup()
 	 * from being performed (and lost) between the map unlock
 	 * and the msleep() in _vm_map_unlock_and_wait().
 	 */
 	mtx_lock(&map_sleep_mtx);
 	mtx_unlock(&map_sleep_mtx);
 	wakeup(&map->root);
 }
 
 void
 vm_map_busy(vm_map_t map)
 {
 
 	VM_MAP_ASSERT_LOCKED(map);
 	map->busy++;
 }
 
 void
 vm_map_unbusy(vm_map_t map)
 {
 
 	VM_MAP_ASSERT_LOCKED(map);
 	KASSERT(map->busy, ("vm_map_unbusy: not busy"));
 	if (--map->busy == 0 && (map->flags & MAP_BUSY_WAKEUP)) {
 		vm_map_modflags(map, 0, MAP_BUSY_WAKEUP);
 		wakeup(&map->busy);
 	}
 }
 
 void 
 vm_map_wait_busy(vm_map_t map)
 {
 
 	VM_MAP_ASSERT_LOCKED(map);
 	while (map->busy) {
 		vm_map_modflags(map, MAP_BUSY_WAKEUP, 0);
 		if (map->system_map)
 			msleep(&map->busy, &map->system_mtx, 0, "mbusy", 0);
 		else
 			sx_sleep(&map->busy, &map->lock, 0, "mbusy", 0);
 	}
 	map->timestamp++;
 }
 
 long
 vmspace_resident_count(struct vmspace *vmspace)
 {
 	return pmap_resident_count(vmspace_pmap(vmspace));
 }
 
 /*
  *	vm_map_create:
  *
  *	Creates and returns a new empty VM map with
  *	the given physical map structure, and having
  *	the given lower and upper address bounds.
  */
 vm_map_t
 vm_map_create(pmap_t pmap, vm_offset_t min, vm_offset_t max)
 {
 	vm_map_t result;
 
 	result = uma_zalloc(mapzone, M_WAITOK);
 	CTR1(KTR_VM, "vm_map_create: %p", result);
 	_vm_map_init(result, pmap, min, max);
 	return (result);
 }
 
 /*
  * Initialize an existing vm_map structure
  * such as that in the vmspace structure.
  */
 static void
 _vm_map_init(vm_map_t map, pmap_t pmap, vm_offset_t min, vm_offset_t max)
 {
 
 	map->header.next = map->header.prev = &map->header;
 	map->header.eflags = MAP_ENTRY_HEADER;
 	map->needs_wakeup = FALSE;
 	map->system_map = 0;
 	map->pmap = pmap;
 	map->header.end = min;
 	map->header.start = max;
 	map->flags = 0;
 	map->root = NULL;
 	map->timestamp = 0;
 	map->busy = 0;
+	map->anon_loc = 0;
 }
 
 void
 vm_map_init(vm_map_t map, pmap_t pmap, vm_offset_t min, vm_offset_t max)
 {
 
 	_vm_map_init(map, pmap, min, max);
 	mtx_init(&map->system_mtx, "system map", NULL, MTX_DEF | MTX_DUPOK);
 	sx_init(&map->lock, "user map");
 }
 
 /*
  *	vm_map_entry_dispose:	[ internal use only ]
  *
  *	Inverse of vm_map_entry_create.
  */
 static void
 vm_map_entry_dispose(vm_map_t map, vm_map_entry_t entry)
 {
 	uma_zfree(map->system_map ? kmapentzone : mapentzone, entry);
 }
 
 /*
  *	vm_map_entry_create:	[ internal use only ]
  *
  *	Allocates a VM map entry for insertion.
  *	No entry fields are filled in.
  */
 static vm_map_entry_t
 vm_map_entry_create(vm_map_t map)
 {
 	vm_map_entry_t new_entry;
 
 	if (map->system_map)
 		new_entry = uma_zalloc(kmapentzone, M_NOWAIT);
 	else
 		new_entry = uma_zalloc(mapentzone, M_WAITOK);
 	if (new_entry == NULL)
 		panic("vm_map_entry_create: kernel resources exhausted");
 	return (new_entry);
 }
 
 /*
  *	vm_map_entry_set_behavior:
  *
  *	Set the expected access behavior, either normal, random, or
  *	sequential.
  */
 static inline void
 vm_map_entry_set_behavior(vm_map_entry_t entry, u_char behavior)
 {
 	entry->eflags = (entry->eflags & ~MAP_ENTRY_BEHAV_MASK) |
 	    (behavior & MAP_ENTRY_BEHAV_MASK);
 }
 
 /*
  *	vm_map_entry_set_max_free:
  *
  *	Set the max_free field in a vm_map_entry.
  */
 static inline void
 vm_map_entry_set_max_free(vm_map_entry_t entry)
 {
 
 	entry->max_free = entry->adj_free;
 	if (entry->left != NULL && entry->left->max_free > entry->max_free)
 		entry->max_free = entry->left->max_free;
 	if (entry->right != NULL && entry->right->max_free > entry->max_free)
 		entry->max_free = entry->right->max_free;
 }
 
 /*
  *	vm_map_entry_splay:
  *
  *	The Sleator and Tarjan top-down splay algorithm with the
  *	following variation.  Max_free must be computed bottom-up, so
  *	on the downward pass, maintain the left and right spines in
  *	reverse order.  Then, make a second pass up each side to fix
  *	the pointers and compute max_free.  The time bound is O(log n)
  *	amortized.
  *
  *	The new root is the vm_map_entry containing "addr", or else an
  *	adjacent entry (lower or higher) if addr is not in the tree.
  *
  *	The map must be locked, and leaves it so.
  *
  *	Returns: the new root.
  */
 static vm_map_entry_t
 vm_map_entry_splay(vm_offset_t addr, vm_map_entry_t root)
 {
 	vm_map_entry_t llist, rlist;
 	vm_map_entry_t ltree, rtree;
 	vm_map_entry_t y;
 
 	/* Special case of empty tree. */
 	if (root == NULL)
 		return (root);
 
 	/*
 	 * Pass One: Splay down the tree until we find addr or a NULL
 	 * pointer where addr would go.  llist and rlist are the two
 	 * sides in reverse order (bottom-up), with llist linked by
 	 * the right pointer and rlist linked by the left pointer in
 	 * the vm_map_entry.  Wait until Pass Two to set max_free on
 	 * the two spines.
 	 */
 	llist = NULL;
 	rlist = NULL;
 	for (;;) {
 		/* root is never NULL in here. */
 		if (addr < root->start) {
 			y = root->left;
 			if (y == NULL)
 				break;
 			if (addr < y->start && y->left != NULL) {
 				/* Rotate right and put y on rlist. */
 				root->left = y->right;
 				y->right = root;
 				vm_map_entry_set_max_free(root);
 				root = y->left;
 				y->left = rlist;
 				rlist = y;
 			} else {
 				/* Put root on rlist. */
 				root->left = rlist;
 				rlist = root;
 				root = y;
 			}
 		} else if (addr >= root->end) {
 			y = root->right;
 			if (y == NULL)
 				break;
 			if (addr >= y->end && y->right != NULL) {
 				/* Rotate left and put y on llist. */
 				root->right = y->left;
 				y->left = root;
 				vm_map_entry_set_max_free(root);
 				root = y->right;
 				y->right = llist;
 				llist = y;
 			} else {
 				/* Put root on llist. */
 				root->right = llist;
 				llist = root;
 				root = y;
 			}
 		} else
 			break;
 	}
 
 	/*
 	 * Pass Two: Walk back up the two spines, flip the pointers
 	 * and set max_free.  The subtrees of the root go at the
 	 * bottom of llist and rlist.
 	 */
 	ltree = root->left;
 	while (llist != NULL) {
 		y = llist->right;
 		llist->right = ltree;
 		vm_map_entry_set_max_free(llist);
 		ltree = llist;
 		llist = y;
 	}
 	rtree = root->right;
 	while (rlist != NULL) {
 		y = rlist->left;
 		rlist->left = rtree;
 		vm_map_entry_set_max_free(rlist);
 		rtree = rlist;
 		rlist = y;
 	}
 
 	/*
 	 * Final assembly: add ltree and rtree as subtrees of root.
 	 */
 	root->left = ltree;
 	root->right = rtree;
 	vm_map_entry_set_max_free(root);
 
 	return (root);
 }
 
 /*
  *	vm_map_entry_{un,}link:
  *
  *	Insert/remove entries from maps.
  */
 static void
 vm_map_entry_link(vm_map_t map,
 		  vm_map_entry_t after_where,
 		  vm_map_entry_t entry)
 {
 
 	CTR4(KTR_VM,
 	    "vm_map_entry_link: map %p, nentries %d, entry %p, after %p", map,
 	    map->nentries, entry, after_where);
 	VM_MAP_ASSERT_LOCKED(map);
 	KASSERT(after_where->end <= entry->start,
 	    ("vm_map_entry_link: prev end %jx new start %jx overlap",
 	    (uintmax_t)after_where->end, (uintmax_t)entry->start));
 	KASSERT(entry->end <= after_where->next->start,
 	    ("vm_map_entry_link: new end %jx next start %jx overlap",
 	    (uintmax_t)entry->end, (uintmax_t)after_where->next->start));
 
 	map->nentries++;
 	entry->prev = after_where;
 	entry->next = after_where->next;
 	entry->next->prev = entry;
 	after_where->next = entry;
 
 	if (after_where != &map->header) {
 		if (after_where != map->root)
 			vm_map_entry_splay(after_where->start, map->root);
 		entry->right = after_where->right;
 		entry->left = after_where;
 		after_where->right = NULL;
 		after_where->adj_free = entry->start - after_where->end;
 		vm_map_entry_set_max_free(after_where);
 	} else {
 		entry->right = map->root;
 		entry->left = NULL;
 	}
 	entry->adj_free = entry->next->start - entry->end;
 	vm_map_entry_set_max_free(entry);
 	map->root = entry;
 }
 
 static void
 vm_map_entry_unlink(vm_map_t map,
 		    vm_map_entry_t entry)
 {
 	vm_map_entry_t next, prev, root;
 
 	VM_MAP_ASSERT_LOCKED(map);
 	if (entry != map->root)
 		vm_map_entry_splay(entry->start, map->root);
 	if (entry->left == NULL)
 		root = entry->right;
 	else {
 		root = vm_map_entry_splay(entry->start, entry->left);
 		root->right = entry->right;
 		root->adj_free = entry->next->start - root->end;
 		vm_map_entry_set_max_free(root);
 	}
 	map->root = root;
 
 	prev = entry->prev;
 	next = entry->next;
 	next->prev = prev;
 	prev->next = next;
 	map->nentries--;
 	CTR3(KTR_VM, "vm_map_entry_unlink: map %p, nentries %d, entry %p", map,
 	    map->nentries, entry);
 }
 
 /*
  *	vm_map_entry_resize_free:
  *
  *	Recompute the amount of free space following a vm_map_entry
  *	and propagate that value up the tree.  Call this function after
  *	resizing a map entry in-place, that is, without a call to
  *	vm_map_entry_link() or _unlink().
  *
  *	The map must be locked, and leaves it so.
  */
 static void
 vm_map_entry_resize_free(vm_map_t map, vm_map_entry_t entry)
 {
 
 	/*
 	 * Using splay trees without parent pointers, propagating
 	 * max_free up the tree is done by moving the entry to the
 	 * root and making the change there.
 	 */
 	if (entry != map->root)
 		map->root = vm_map_entry_splay(entry->start, map->root);
 
 	entry->adj_free = entry->next->start - entry->end;
 	vm_map_entry_set_max_free(entry);
 }
 
 /*
  *	vm_map_lookup_entry:	[ internal use only ]
  *
  *	Finds the map entry containing (or
  *	immediately preceding) the specified address
  *	in the given map; the entry is returned
  *	in the "entry" parameter.  The boolean
  *	result indicates whether the address is
  *	actually contained in the map.
  */
 boolean_t
 vm_map_lookup_entry(
 	vm_map_t map,
 	vm_offset_t address,
 	vm_map_entry_t *entry)	/* OUT */
 {
 	vm_map_entry_t cur;
 	boolean_t locked;
 
 	/*
 	 * If the map is empty, then the map entry immediately preceding
 	 * "address" is the map's header.
 	 */
 	cur = map->root;
 	if (cur == NULL)
 		*entry = &map->header;
 	else if (address >= cur->start && cur->end > address) {
 		*entry = cur;
 		return (TRUE);
 	} else if ((locked = vm_map_locked(map)) ||
 	    sx_try_upgrade(&map->lock)) {
 		/*
 		 * Splay requires a write lock on the map.  However, it only
 		 * restructures the binary search tree; it does not otherwise
 		 * change the map.  Thus, the map's timestamp need not change
 		 * on a temporary upgrade.
 		 */
 		map->root = cur = vm_map_entry_splay(address, cur);
 		if (!locked)
 			sx_downgrade(&map->lock);
 
 		/*
 		 * If "address" is contained within a map entry, the new root
 		 * is that map entry.  Otherwise, the new root is a map entry
 		 * immediately before or after "address".
 		 */
 		if (address >= cur->start) {
 			*entry = cur;
 			if (cur->end > address)
 				return (TRUE);
 		} else
 			*entry = cur->prev;
 	} else
 		/*
 		 * Since the map is only locked for read access, perform a
 		 * standard binary search tree lookup for "address".
 		 */
 		for (;;) {
 			if (address < cur->start) {
 				if (cur->left == NULL) {
 					*entry = cur->prev;
 					break;
 				}
 				cur = cur->left;
 			} else if (cur->end > address) {
 				*entry = cur;
 				return (TRUE);
 			} else {
 				if (cur->right == NULL) {
 					*entry = cur;
 					break;
 				}
 				cur = cur->right;
 			}
 		}
 	return (FALSE);
 }
 
 /*
  *	vm_map_insert:
  *
  *	Inserts the given whole VM object into the target
  *	map at the specified address range.  The object's
  *	size should match that of the address range.
  *
  *	Requires that the map be locked, and leaves it so.
  *
  *	If object is non-NULL, ref count must be bumped by caller
  *	prior to making call to account for the new entry.
  */
 int
 vm_map_insert(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
     vm_offset_t start, vm_offset_t end, vm_prot_t prot, vm_prot_t max, int cow)
 {
 	vm_map_entry_t new_entry, prev_entry, temp_entry;
 	struct ucred *cred;
 	vm_eflags_t protoeflags;
 	vm_inherit_t inheritance;
 
 	VM_MAP_ASSERT_LOCKED(map);
 	KASSERT(object != kernel_object ||
 	    (cow & MAP_COPY_ON_WRITE) == 0,
 	    ("vm_map_insert: kernel object and COW"));
 	KASSERT(object == NULL || (cow & MAP_NOFAULT) == 0,
 	    ("vm_map_insert: paradoxical MAP_NOFAULT request"));
 	KASSERT((prot & ~max) == 0,
 	    ("prot %#x is not subset of max_prot %#x", prot, max));
 
 	/*
 	 * Check that the start and end points are not bogus.
 	 */
 	if (start < vm_map_min(map) || end > vm_map_max(map) ||
 	    start >= end)
 		return (KERN_INVALID_ADDRESS);
 
 	/*
 	 * Find the entry prior to the proposed starting address; if it's part
 	 * of an existing entry, this range is bogus.
 	 */
 	if (vm_map_lookup_entry(map, start, &temp_entry))
 		return (KERN_NO_SPACE);
 
 	prev_entry = temp_entry;
 
 	/*
 	 * Assert that the next entry doesn't overlap the end point.
 	 */
 	if (prev_entry->next->start < end)
 		return (KERN_NO_SPACE);
 
 	if ((cow & MAP_CREATE_GUARD) != 0 && (object != NULL ||
 	    max != VM_PROT_NONE))
 		return (KERN_INVALID_ARGUMENT);
 
 	protoeflags = 0;
 	if (cow & MAP_COPY_ON_WRITE)
 		protoeflags |= MAP_ENTRY_COW | MAP_ENTRY_NEEDS_COPY;
 	if (cow & MAP_NOFAULT)
 		protoeflags |= MAP_ENTRY_NOFAULT;
 	if (cow & MAP_DISABLE_SYNCER)
 		protoeflags |= MAP_ENTRY_NOSYNC;
 	if (cow & MAP_DISABLE_COREDUMP)
 		protoeflags |= MAP_ENTRY_NOCOREDUMP;
 	if (cow & MAP_STACK_GROWS_DOWN)
 		protoeflags |= MAP_ENTRY_GROWS_DOWN;
 	if (cow & MAP_STACK_GROWS_UP)
 		protoeflags |= MAP_ENTRY_GROWS_UP;
 	if (cow & MAP_VN_WRITECOUNT)
 		protoeflags |= MAP_ENTRY_VN_WRITECNT;
 	if ((cow & MAP_CREATE_GUARD) != 0)
 		protoeflags |= MAP_ENTRY_GUARD;
 	if ((cow & MAP_CREATE_STACK_GAP_DN) != 0)
 		protoeflags |= MAP_ENTRY_STACK_GAP_DN;
 	if ((cow & MAP_CREATE_STACK_GAP_UP) != 0)
 		protoeflags |= MAP_ENTRY_STACK_GAP_UP;
 	if (cow & MAP_INHERIT_SHARE)
 		inheritance = VM_INHERIT_SHARE;
 	else
 		inheritance = VM_INHERIT_DEFAULT;
 
 	cred = NULL;
 	if ((cow & (MAP_ACC_NO_CHARGE | MAP_NOFAULT | MAP_CREATE_GUARD)) != 0)
 		goto charged;
 	if ((cow & MAP_ACC_CHARGED) || ((prot & VM_PROT_WRITE) &&
 	    ((protoeflags & MAP_ENTRY_NEEDS_COPY) || object == NULL))) {
 		if (!(cow & MAP_ACC_CHARGED) && !swap_reserve(end - start))
 			return (KERN_RESOURCE_SHORTAGE);
 		KASSERT(object == NULL ||
 		    (protoeflags & MAP_ENTRY_NEEDS_COPY) != 0 ||
 		    object->cred == NULL,
 		    ("overcommit: vm_map_insert o %p", object));
 		cred = curthread->td_ucred;
 	}
 
 charged:
 	/* Expand the kernel pmap, if necessary. */
 	if (map == kernel_map && end > kernel_vm_end)
 		pmap_growkernel(end);
 	if (object != NULL) {
 		/*
 		 * OBJ_ONEMAPPING must be cleared unless this mapping
 		 * is trivially proven to be the only mapping for any
 		 * of the object's pages.  (Object granularity
 		 * reference counting is insufficient to recognize
 		 * aliases with precision.)
 		 */
 		VM_OBJECT_WLOCK(object);
 		if (object->ref_count > 1 || object->shadow_count != 0)
 			vm_object_clear_flag(object, OBJ_ONEMAPPING);
 		VM_OBJECT_WUNLOCK(object);
 	} else if ((prev_entry->eflags & ~MAP_ENTRY_USER_WIRED) ==
 	    protoeflags &&
 	    (cow & (MAP_STACK_GROWS_DOWN | MAP_STACK_GROWS_UP)) == 0 &&
 	    prev_entry->end == start && (prev_entry->cred == cred ||
 	    (prev_entry->object.vm_object != NULL &&
 	    prev_entry->object.vm_object->cred == cred)) &&
 	    vm_object_coalesce(prev_entry->object.vm_object,
 	    prev_entry->offset,
 	    (vm_size_t)(prev_entry->end - prev_entry->start),
 	    (vm_size_t)(end - prev_entry->end), cred != NULL &&
 	    (protoeflags & MAP_ENTRY_NEEDS_COPY) == 0)) {
 		/*
 		 * We were able to extend the object.  Determine if we
 		 * can extend the previous map entry to include the
 		 * new range as well.
 		 */
 		if (prev_entry->inheritance == inheritance &&
 		    prev_entry->protection == prot &&
 		    prev_entry->max_protection == max &&
 		    prev_entry->wired_count == 0) {
 			KASSERT((prev_entry->eflags & MAP_ENTRY_USER_WIRED) ==
 			    0, ("prev_entry %p has incoherent wiring",
 			    prev_entry));
 			if ((prev_entry->eflags & MAP_ENTRY_GUARD) == 0)
 				map->size += end - prev_entry->end;
 			prev_entry->end = end;
 			vm_map_entry_resize_free(map, prev_entry);
 			vm_map_simplify_entry(map, prev_entry);
 			return (KERN_SUCCESS);
 		}
 
 		/*
 		 * If we can extend the object but cannot extend the
 		 * map entry, we have to create a new map entry.  We
 		 * must bump the ref count on the extended object to
 		 * account for it.  object may be NULL.
 		 */
 		object = prev_entry->object.vm_object;
 		offset = prev_entry->offset +
 		    (prev_entry->end - prev_entry->start);
 		vm_object_reference(object);
 		if (cred != NULL && object != NULL && object->cred != NULL &&
 		    !(prev_entry->eflags & MAP_ENTRY_NEEDS_COPY)) {
 			/* Object already accounts for this uid. */
 			cred = NULL;
 		}
 	}
 	if (cred != NULL)
 		crhold(cred);
 
 	/*
 	 * Create a new entry
 	 */
 	new_entry = vm_map_entry_create(map);
 	new_entry->start = start;
 	new_entry->end = end;
 	new_entry->cred = NULL;
 
 	new_entry->eflags = protoeflags;
 	new_entry->object.vm_object = object;
 	new_entry->offset = offset;
 
 	new_entry->inheritance = inheritance;
 	new_entry->protection = prot;
 	new_entry->max_protection = max;
 	new_entry->wired_count = 0;
 	new_entry->wiring_thread = NULL;
 	new_entry->read_ahead = VM_FAULT_READ_AHEAD_INIT;
 	new_entry->next_read = start;
 
 	KASSERT(cred == NULL || !ENTRY_CHARGED(new_entry),
 	    ("overcommit: vm_map_insert leaks vm_map %p", new_entry));
 	new_entry->cred = cred;
 
 	/*
 	 * Insert the new entry into the list
 	 */
 	vm_map_entry_link(map, prev_entry, new_entry);
 	if ((new_entry->eflags & MAP_ENTRY_GUARD) == 0)
 		map->size += new_entry->end - new_entry->start;
 
 	/*
 	 * Try to coalesce the new entry with both the previous and next
 	 * entries in the list.  Previously, we only attempted to coalesce
 	 * with the previous entry when object is NULL.  Here, we handle the
 	 * other cases, which are less common.
 	 */
 	vm_map_simplify_entry(map, new_entry);
 
 	if ((cow & (MAP_PREFAULT | MAP_PREFAULT_PARTIAL)) != 0) {
 		vm_map_pmap_enter(map, start, prot, object, OFF_TO_IDX(offset),
 		    end - start, cow & MAP_PREFAULT_PARTIAL);
 	}
 
 	return (KERN_SUCCESS);
 }
 
 /*
  *	vm_map_findspace:
  *
  *	Find the first fit (lowest VM address) for "length" free bytes
  *	beginning at address >= start in the given map.
  *
  *	In a vm_map_entry, "adj_free" is the amount of free space
  *	adjacent (higher address) to this entry, and "max_free" is the
  *	maximum amount of contiguous free space in its subtree.  This
  *	allows finding a free region in one path down the tree, so
  *	O(log n) amortized with splay trees.
  *
  *	The map must be locked, and leaves it so.
  *
  *	Returns: 0 on success, and starting address in *addr,
  *		 1 if insufficient space.
  */
 int
 vm_map_findspace(vm_map_t map, vm_offset_t start, vm_size_t length,
     vm_offset_t *addr)	/* OUT */
 {
 	vm_map_entry_t entry;
 	vm_offset_t st;
 
 	/*
 	 * Request must fit within min/max VM address and must avoid
 	 * address wrap.
 	 */
 	start = MAX(start, vm_map_min(map));
 	if (start + length > vm_map_max(map) || start + length < start)
 		return (1);
 
 	/* Empty tree means wide open address space. */
 	if (map->root == NULL) {
 		*addr = start;
 		return (0);
 	}
 
 	/*
 	 * After splay, if start comes before root node, then there
 	 * must be a gap from start to the root.
 	 */
 	map->root = vm_map_entry_splay(start, map->root);
 	if (start + length <= map->root->start) {
 		*addr = start;
 		return (0);
 	}
 
 	/*
 	 * Root is the last node that might begin its gap before
 	 * start, and this is the last comparison where address
 	 * wrap might be a problem.
 	 */
 	st = (start > map->root->end) ? start : map->root->end;
 	if (length <= map->root->end + map->root->adj_free - st) {
 		*addr = st;
 		return (0);
 	}
 
 	/* With max_free, can immediately tell if no solution. */
 	entry = map->root->right;
 	if (entry == NULL || length > entry->max_free)
 		return (1);
 
 	/*
 	 * Search the right subtree in the order: left subtree, root,
 	 * right subtree (first fit).  The previous splay implies that
 	 * all regions in the right subtree have addresses > start.
 	 */
 	while (entry != NULL) {
 		if (entry->left != NULL && entry->left->max_free >= length)
 			entry = entry->left;
 		else if (entry->adj_free >= length) {
 			*addr = entry->end;
 			return (0);
 		} else
 			entry = entry->right;
 	}
 
 	/* Can't get here, so panic if we do. */
 	panic("vm_map_findspace: max_free corrupt");
 }
 
 int
 vm_map_fixed(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
     vm_offset_t start, vm_size_t length, vm_prot_t prot,
     vm_prot_t max, int cow)
 {
 	vm_offset_t end;
 	int result;
 
 	end = start + length;
 	KASSERT((cow & (MAP_STACK_GROWS_DOWN | MAP_STACK_GROWS_UP)) == 0 ||
 	    object == NULL,
 	    ("vm_map_fixed: non-NULL backing object for stack"));
 	vm_map_lock(map);
 	VM_MAP_RANGE_CHECK(map, start, end);
 	if ((cow & MAP_CHECK_EXCL) == 0)
 		vm_map_delete(map, start, end);
 	if ((cow & (MAP_STACK_GROWS_DOWN | MAP_STACK_GROWS_UP)) != 0) {
 		result = vm_map_stack_locked(map, start, length, sgrowsiz,
 		    prot, max, cow);
 	} else {
 		result = vm_map_insert(map, object, offset, start, end,
 		    prot, max, cow);
 	}
 	vm_map_unlock(map);
 	return (result);
 }
 
+static const int aslr_pages_rnd_64[2] = {0x1000, 0x10};
+static const int aslr_pages_rnd_32[2] = {0x100, 0x4};
+
+static int cluster_anon = 1;
+SYSCTL_INT(_vm, OID_AUTO, cluster_anon, CTLFLAG_RW,
+    &cluster_anon, 0,
+    "Cluster anonymous mappings");
+
+static long aslr_restarts;
+SYSCTL_LONG(_vm, OID_AUTO, aslr_restarts, CTLFLAG_RD,
+    &aslr_restarts, 0,
+    "Number of aslr failures");
+
+#define	MAP_32BIT_MAX_ADDR	((vm_offset_t)1 << 31)
+
 /*
  * Searches for the specified amount of free space in the given map with the
  * specified alignment.  Performs an address-ordered, first-fit search from
  * the given address "*addr", with an optional upper bound "max_addr".  If the
  * parameter "alignment" is zero, then the alignment is computed from the
  * given (object, offset) pair so as to enable the greatest possible use of
  * superpage mappings.  Returns KERN_SUCCESS and the address of the free space
  * in "*addr" if successful.  Otherwise, returns KERN_NO_SPACE.
  *
  * The map must be locked.  Initially, there must be at least "length" bytes
  * of free space at the given address.
  */
 static int
 vm_map_alignspace(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
     vm_offset_t *addr, vm_size_t length, vm_offset_t max_addr,
     vm_offset_t alignment)
 {
 	vm_offset_t aligned_addr, free_addr;
 
 	VM_MAP_ASSERT_LOCKED(map);
 	free_addr = *addr;
 	KASSERT(!vm_map_findspace(map, free_addr, length, addr) &&
 	    free_addr == *addr, ("caller provided insufficient free space"));
 	for (;;) {
 		/*
 		 * At the start of every iteration, the free space at address
 		 * "*addr" is at least "length" bytes.
 		 */
 		if (alignment == 0)
 			pmap_align_superpage(object, offset, addr, length);
 		else if ((*addr & (alignment - 1)) != 0) {
 			*addr &= ~(alignment - 1);
 			*addr += alignment;
 		}
 		aligned_addr = *addr;
 		if (aligned_addr == free_addr) {
 			/*
 			 * Alignment did not change "*addr", so "*addr" must
 			 * still provide sufficient free space.
 			 */
 			return (KERN_SUCCESS);
 		}
 
 		/*
 		 * Test for address wrap on "*addr".  A wrapped "*addr" could
 		 * be a valid address, in which case vm_map_findspace() cannot
 		 * be relied upon to fail.
 		 */
 		if (aligned_addr < free_addr ||
 		    vm_map_findspace(map, aligned_addr, length, addr) ||
 		    (max_addr != 0 && *addr + length > max_addr))
 			return (KERN_NO_SPACE);
 		free_addr = *addr;
 		if (free_addr == aligned_addr) {
 			/*
 			 * If a successful call to vm_map_findspace() did not
 			 * change "*addr", then "*addr" must still be aligned
 			 * and provide sufficient free space.
 			 */
 			return (KERN_SUCCESS);
 		}
 	}
 }
 
 /*
  *	vm_map_find finds an unallocated region in the target address
  *	map with the given length.  The search is defined to be
  *	first-fit from the specified address; the region found is
  *	returned in the same parameter.
  *
  *	If object is non-NULL, ref count must be bumped by caller
  *	prior to making call to account for the new entry.
  */
 int
 vm_map_find(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
 	    vm_offset_t *addr,	/* IN/OUT */
 	    vm_size_t length, vm_offset_t max_addr, int find_space,
 	    vm_prot_t prot, vm_prot_t max, int cow)
 {
-	vm_offset_t alignment, min_addr;
-	int rv;
+	vm_offset_t alignment, curr_min_addr, min_addr;
+	int gap, pidx, rv, try;
+	bool cluster, en_aslr, update_anon;
 
 	KASSERT((cow & (MAP_STACK_GROWS_DOWN | MAP_STACK_GROWS_UP)) == 0 ||
 	    object == NULL,
 	    ("vm_map_find: non-NULL backing object for stack"));
 	MPASS((cow & MAP_REMAP) == 0 || (find_space == VMFS_NO_SPACE &&
 	    (cow & (MAP_STACK_GROWS_DOWN | MAP_STACK_GROWS_UP)) == 0));
 	if (find_space == VMFS_OPTIMAL_SPACE && (object == NULL ||
 	    (object->flags & OBJ_COLORED) == 0))
 		find_space = VMFS_ANY_SPACE;
 	if (find_space >> 8 != 0) {
 		KASSERT((find_space & 0xff) == 0, ("bad VMFS flags"));
 		alignment = (vm_offset_t)1 << (find_space >> 8);
 	} else
 		alignment = 0;
+	en_aslr = (map->flags & MAP_ASLR) != 0;
+	update_anon = cluster = cluster_anon != 0 &&
+	    (map->flags & MAP_IS_SUB_MAP) == 0 && max_addr == 0 &&
+	    find_space != VMFS_NO_SPACE && object == NULL &&
+	    (cow & (MAP_INHERIT_SHARE | MAP_STACK_GROWS_UP |
+	    MAP_STACK_GROWS_DOWN)) == 0 && prot != PROT_NONE;
+	curr_min_addr = min_addr = *addr;
+	if (en_aslr && min_addr == 0 && !cluster &&
+	    find_space != VMFS_NO_SPACE &&
+	    (map->flags & MAP_ASLR_IGNSTART) != 0)
+		curr_min_addr = min_addr = vm_map_min(map);
+	try = 0;
 	vm_map_lock(map);
+	if (cluster) {
+		curr_min_addr = map->anon_loc;
+		if (curr_min_addr == 0)
+			cluster = false;
+	}
 	if (find_space != VMFS_NO_SPACE) {
 		KASSERT(find_space == VMFS_ANY_SPACE ||
 		    find_space == VMFS_OPTIMAL_SPACE ||
 		    find_space == VMFS_SUPER_SPACE ||
 		    alignment != 0, ("unexpected VMFS flag"));
-		min_addr = *addr;
 again:
-		if (vm_map_findspace(map, min_addr, length, addr) ||
+		/*
+		 * When creating an anonymous mapping, try clustering
+		 * with an existing anonymous mapping first.
+		 *
+		 * We make up to two attempts to find address space
+		 * for a given find_space value. The first attempt may
+		 * apply randomization or may cluster with an existing
+		 * anonymous mapping. If this first attempt fails,
+		 * perform a first-fit search of the available address
+		 * space.
+		 *
+		 * If all tries failed, and find_space is
+		 * VMFS_OPTIMAL_SPACE, fallback to VMFS_ANY_SPACE.
+		 * Again enable clustering and randomization.
+		 */
+		try++;
+		MPASS(try <= 2);
+
+		if (try == 2) {
+			/*
+			 * Second try: we failed either to find a
+			 * suitable region for randomizing the
+			 * allocation, or to cluster with an existing
+			 * mapping.  Retry with free run.
+			 */
+			curr_min_addr = (map->flags & MAP_ASLR_IGNSTART) != 0 ?
+			    vm_map_min(map) : min_addr;
+			atomic_add_long(&aslr_restarts, 1);
+		}
+
+		if (try == 1 && en_aslr && !cluster) {
+			/*
+			 * Find space for allocation, including
+			 * gap needed for later randomization.
+			 */
+			pidx = MAXPAGESIZES > 1 && pagesizes[1] != 0 &&
+			    (find_space == VMFS_SUPER_SPACE || find_space ==
+			    VMFS_OPTIMAL_SPACE) ? 1 : 0;
+			gap = vm_map_max(map) > MAP_32BIT_MAX_ADDR &&
+			    (max_addr == 0 || max_addr > MAP_32BIT_MAX_ADDR) ?
+			    aslr_pages_rnd_64[pidx] : aslr_pages_rnd_32[pidx];
+			if (vm_map_findspace(map, curr_min_addr, length +
+			    gap * pagesizes[pidx], addr) ||
+			    (max_addr != 0 && *addr + length > max_addr))
+				goto again;
+			/* And randomize the start address. */
+			*addr += (arc4random() % gap) * pagesizes[pidx];
+		} else if (vm_map_findspace(map, curr_min_addr, length, addr) ||
 		    (max_addr != 0 && *addr + length > max_addr)) {
+			if (cluster) {
+				cluster = false;
+				MPASS(try == 1);
+				goto again;
+			}
 			rv = KERN_NO_SPACE;
 			goto done;
 		}
+
 		if (find_space != VMFS_ANY_SPACE &&
 		    (rv = vm_map_alignspace(map, object, offset, addr, length,
 		    max_addr, alignment)) != KERN_SUCCESS) {
 			if (find_space == VMFS_OPTIMAL_SPACE) {
 				find_space = VMFS_ANY_SPACE;
+				curr_min_addr = min_addr;
+				cluster = update_anon;
+				try = 0;
 				goto again;
 			}
 			goto done;
 		}
 	} else if ((cow & MAP_REMAP) != 0) {
 		if (*addr < vm_map_min(map) ||
 		    *addr + length > vm_map_max(map) ||
 		    *addr + length <= length) {
 			rv = KERN_INVALID_ADDRESS;
 			goto done;
 		}
 		vm_map_delete(map, *addr, *addr + length);
 	}
 	if ((cow & (MAP_STACK_GROWS_DOWN | MAP_STACK_GROWS_UP)) != 0) {
 		rv = vm_map_stack_locked(map, *addr, length, sgrowsiz, prot,
 		    max, cow);
 	} else {
 		rv = vm_map_insert(map, object, offset, *addr, *addr + length,
 		    prot, max, cow);
 	}
+	if (rv == KERN_SUCCESS && update_anon)
+		map->anon_loc = *addr + length;
 done:
 	vm_map_unlock(map);
 	return (rv);
 }
 
 /*
  *	vm_map_find_min() is a variant of vm_map_find() that takes an
  *	additional parameter (min_addr) and treats the given address
  *	(*addr) differently.  Specifically, it treats *addr as a hint
  *	and not as the minimum address where the mapping is created.
  *
  *	This function works in two phases.  First, it tries to
  *	allocate above the hint.  If that fails and the hint is
  *	greater than min_addr, it performs a second pass, replacing
  *	the hint with min_addr as the minimum address for the
  *	allocation.
  */
 int
 vm_map_find_min(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
     vm_offset_t *addr, vm_size_t length, vm_offset_t min_addr,
     vm_offset_t max_addr, int find_space, vm_prot_t prot, vm_prot_t max,
     int cow)
 {
 	vm_offset_t hint;
 	int rv;
 
 	hint = *addr;
 	for (;;) {
 		rv = vm_map_find(map, object, offset, addr, length, max_addr,
 		    find_space, prot, max, cow);
 		if (rv == KERN_SUCCESS || min_addr >= hint)
 			return (rv);
 		*addr = hint = min_addr;
 	}
 }
 
 /*
  * A map entry with any of the following flags set must not be merged with
  * another entry.
  */
 #define	MAP_ENTRY_NOMERGE_MASK	(MAP_ENTRY_GROWS_DOWN | MAP_ENTRY_GROWS_UP | \
 	    MAP_ENTRY_IN_TRANSITION | MAP_ENTRY_IS_SUB_MAP)
 
 static bool
 vm_map_mergeable_neighbors(vm_map_entry_t prev, vm_map_entry_t entry)
 {
 
 	KASSERT((prev->eflags & MAP_ENTRY_NOMERGE_MASK) == 0 ||
 	    (entry->eflags & MAP_ENTRY_NOMERGE_MASK) == 0,
 	    ("vm_map_mergeable_neighbors: neither %p nor %p are mergeable",
 	    prev, entry));
 	return (prev->end == entry->start &&
 	    prev->object.vm_object == entry->object.vm_object &&
 	    (prev->object.vm_object == NULL ||
 	    prev->offset + (prev->end - prev->start) == entry->offset) &&
 	    prev->eflags == entry->eflags &&
 	    prev->protection == entry->protection &&
 	    prev->max_protection == entry->max_protection &&
 	    prev->inheritance == entry->inheritance &&
 	    prev->wired_count == entry->wired_count &&
 	    prev->cred == entry->cred);
 }
 
 static void
 vm_map_merged_neighbor_dispose(vm_map_t map, vm_map_entry_t entry)
 {
 
 	/*
 	 * If the backing object is a vnode object, vm_object_deallocate()
 	 * calls vrele().  However, vrele() does not lock the vnode because
 	 * the vnode has additional references.  Thus, the map lock can be
 	 * kept without causing a lock-order reversal with the vnode lock.
 	 *
 	 * Since we count the number of virtual page mappings in
 	 * object->un_pager.vnp.writemappings, the writemappings value
 	 * should not be adjusted when the entry is disposed of.
 	 */
 	if (entry->object.vm_object != NULL)
 		vm_object_deallocate(entry->object.vm_object);
 	if (entry->cred != NULL)
 		crfree(entry->cred);
 	vm_map_entry_dispose(map, entry);
 }
 
 /*
  *	vm_map_simplify_entry:
  *
  *	Simplify the given map entry by merging with either neighbor.  This
  *	routine also has the ability to merge with both neighbors.
  *
  *	The map must be locked.
  *
  *	This routine guarantees that the passed entry remains valid (though
  *	possibly extended).  When merging, this routine may delete one or
  *	both neighbors.
  */
 void
 vm_map_simplify_entry(vm_map_t map, vm_map_entry_t entry)
 {
 	vm_map_entry_t next, prev;
 
 	if ((entry->eflags & MAP_ENTRY_NOMERGE_MASK) != 0)
 		return;
 	prev = entry->prev;
 	if (vm_map_mergeable_neighbors(prev, entry)) {
 		vm_map_entry_unlink(map, prev);
 		entry->start = prev->start;
 		entry->offset = prev->offset;
 		if (entry->prev != &map->header)
 			vm_map_entry_resize_free(map, entry->prev);
 		vm_map_merged_neighbor_dispose(map, prev);
 	}
 	next = entry->next;
 	if (vm_map_mergeable_neighbors(entry, next)) {
 		vm_map_entry_unlink(map, next);
 		entry->end = next->end;
 		vm_map_entry_resize_free(map, entry);
 		vm_map_merged_neighbor_dispose(map, next);
 	}
 }
 
 /*
  *	vm_map_clip_start:	[ internal use only ]
  *
  *	Asserts that the given entry begins at or after
  *	the specified address; if necessary,
  *	it splits the entry into two.
  */
 #define vm_map_clip_start(map, entry, startaddr) \
 { \
 	if (startaddr > entry->start) \
 		_vm_map_clip_start(map, entry, startaddr); \
 }
 
 /*
  *	This routine is called only when it is known that
  *	the entry must be split.
  */
 static void
 _vm_map_clip_start(vm_map_t map, vm_map_entry_t entry, vm_offset_t start)
 {
 	vm_map_entry_t new_entry;
 
 	VM_MAP_ASSERT_LOCKED(map);
 	KASSERT(entry->end > start && entry->start < start,
 	    ("_vm_map_clip_start: invalid clip of entry %p", entry));
 
 	/*
 	 * Split off the front portion -- note that we must insert the new
 	 * entry BEFORE this one, so that this entry has the specified
 	 * starting address.
 	 */
 	vm_map_simplify_entry(map, entry);
 
 	/*
 	 * If there is no object backing this entry, we might as well create
 	 * one now.  If we defer it, an object can get created after the map
 	 * is clipped, and individual objects will be created for the split-up
 	 * map.  This is a bit of a hack, but is also about the best place to
 	 * put this improvement.
 	 */
 	if (entry->object.vm_object == NULL && !map->system_map &&
 	    (entry->eflags & MAP_ENTRY_GUARD) == 0) {
 		vm_object_t object;
 		object = vm_object_allocate(OBJT_DEFAULT,
 				atop(entry->end - entry->start));
 		entry->object.vm_object = object;
 		entry->offset = 0;
 		if (entry->cred != NULL) {
 			object->cred = entry->cred;
 			object->charge = entry->end - entry->start;
 			entry->cred = NULL;
 		}
 	} else if (entry->object.vm_object != NULL &&
 		   ((entry->eflags & MAP_ENTRY_NEEDS_COPY) == 0) &&
 		   entry->cred != NULL) {
 		VM_OBJECT_WLOCK(entry->object.vm_object);
 		KASSERT(entry->object.vm_object->cred == NULL,
 		    ("OVERCOMMIT: vm_entry_clip_start: both cred e %p", entry));
 		entry->object.vm_object->cred = entry->cred;
 		entry->object.vm_object->charge = entry->end - entry->start;
 		VM_OBJECT_WUNLOCK(entry->object.vm_object);
 		entry->cred = NULL;
 	}
 
 	new_entry = vm_map_entry_create(map);
 	*new_entry = *entry;
 
 	new_entry->end = start;
 	entry->offset += (start - entry->start);
 	entry->start = start;
 	if (new_entry->cred != NULL)
 		crhold(entry->cred);
 
 	vm_map_entry_link(map, entry->prev, new_entry);
 
 	if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) {
 		vm_object_reference(new_entry->object.vm_object);
 		/*
 		 * The object->un_pager.vnp.writemappings for the
 		 * object of MAP_ENTRY_VN_WRITECNT type entry shall be
 		 * kept as is here.  The virtual pages are
 		 * re-distributed among the clipped entries, so the sum is
 		 * left the same.
 		 */
 	}
 }
 
 /*
  *	vm_map_clip_end:	[ internal use only ]
  *
  *	Asserts that the given entry ends at or before
  *	the specified address; if necessary,
  *	it splits the entry into two.
  */
 #define vm_map_clip_end(map, entry, endaddr) \
 { \
 	if ((endaddr) < (entry->end)) \
 		_vm_map_clip_end((map), (entry), (endaddr)); \
 }
 
 /*
  *	This routine is called only when it is known that
  *	the entry must be split.
  */
 static void
 _vm_map_clip_end(vm_map_t map, vm_map_entry_t entry, vm_offset_t end)
 {
 	vm_map_entry_t new_entry;
 
 	VM_MAP_ASSERT_LOCKED(map);
 	KASSERT(entry->start < end && entry->end > end,
 	    ("_vm_map_clip_end: invalid clip of entry %p", entry));
 
 	/*
 	 * If there is no object backing this entry, we might as well create
 	 * one now.  If we defer it, an object can get created after the map
 	 * is clipped, and individual objects will be created for the split-up
 	 * map.  This is a bit of a hack, but is also about the best place to
 	 * put this improvement.
 	 */
 	if (entry->object.vm_object == NULL && !map->system_map &&
 	    (entry->eflags & MAP_ENTRY_GUARD) == 0) {
 		vm_object_t object;
 		object = vm_object_allocate(OBJT_DEFAULT,
 				atop(entry->end - entry->start));
 		entry->object.vm_object = object;
 		entry->offset = 0;
 		if (entry->cred != NULL) {
 			object->cred = entry->cred;
 			object->charge = entry->end - entry->start;
 			entry->cred = NULL;
 		}
 	} else if (entry->object.vm_object != NULL &&
 		   ((entry->eflags & MAP_ENTRY_NEEDS_COPY) == 0) &&
 		   entry->cred != NULL) {
 		VM_OBJECT_WLOCK(entry->object.vm_object);
 		KASSERT(entry->object.vm_object->cred == NULL,
 		    ("OVERCOMMIT: vm_entry_clip_end: both cred e %p", entry));
 		entry->object.vm_object->cred = entry->cred;
 		entry->object.vm_object->charge = entry->end - entry->start;
 		VM_OBJECT_WUNLOCK(entry->object.vm_object);
 		entry->cred = NULL;
 	}
 
 	/*
 	 * Create a new entry and insert it AFTER the specified entry
 	 */
 	new_entry = vm_map_entry_create(map);
 	*new_entry = *entry;
 
 	new_entry->start = entry->end = end;
 	new_entry->offset += (end - entry->start);
 	if (new_entry->cred != NULL)
 		crhold(entry->cred);
 
 	vm_map_entry_link(map, entry, new_entry);
 
 	if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) {
 		vm_object_reference(new_entry->object.vm_object);
 	}
 }
 
 /*
  *	vm_map_submap:		[ kernel use only ]
  *
  *	Mark the given range as handled by a subordinate map.
  *
  *	This range must have been created with vm_map_find,
  *	and no other operations may have been performed on this
  *	range prior to calling vm_map_submap.
  *
  *	Only a limited number of operations can be performed
  *	within this rage after calling vm_map_submap:
  *		vm_fault
  *	[Don't try vm_map_copy!]
  *
  *	To remove a submapping, one must first remove the
  *	range from the superior map, and then destroy the
  *	submap (if desired).  [Better yet, don't try it.]
  */
 int
 vm_map_submap(
 	vm_map_t map,
 	vm_offset_t start,
 	vm_offset_t end,
 	vm_map_t submap)
 {
 	vm_map_entry_t entry;
-	int result = KERN_INVALID_ARGUMENT;
+	int result;
 
+	result = KERN_INVALID_ARGUMENT;
+
+	vm_map_lock(submap);
+	submap->flags |= MAP_IS_SUB_MAP;
+	vm_map_unlock(submap);
+
 	vm_map_lock(map);
 
 	VM_MAP_RANGE_CHECK(map, start, end);
 
 	if (vm_map_lookup_entry(map, start, &entry)) {
 		vm_map_clip_start(map, entry, start);
 	} else
 		entry = entry->next;
 
 	vm_map_clip_end(map, entry, end);
 
 	if ((entry->start == start) && (entry->end == end) &&
 	    ((entry->eflags & MAP_ENTRY_COW) == 0) &&
 	    (entry->object.vm_object == NULL)) {
 		entry->object.sub_map = submap;
 		entry->eflags |= MAP_ENTRY_IS_SUB_MAP;
 		result = KERN_SUCCESS;
 	}
 	vm_map_unlock(map);
 
+	if (result != KERN_SUCCESS) {
+		vm_map_lock(submap);
+		submap->flags &= ~MAP_IS_SUB_MAP;
+		vm_map_unlock(submap);
+	}
 	return (result);
 }
 
 /*
  * The maximum number of pages to map if MAP_PREFAULT_PARTIAL is specified
  */
 #define	MAX_INIT_PT	96
 
 /*
  *	vm_map_pmap_enter:
  *
  *	Preload the specified map's pmap with mappings to the specified
  *	object's memory-resident pages.  No further physical pages are
  *	allocated, and no further virtual pages are retrieved from secondary
  *	storage.  If the specified flags include MAP_PREFAULT_PARTIAL, then a
  *	limited number of page mappings are created at the low-end of the
  *	specified address range.  (For this purpose, a superpage mapping
  *	counts as one page mapping.)  Otherwise, all resident pages within
  *	the specified address range are mapped.
  */
 static void
 vm_map_pmap_enter(vm_map_t map, vm_offset_t addr, vm_prot_t prot,
     vm_object_t object, vm_pindex_t pindex, vm_size_t size, int flags)
 {
 	vm_offset_t start;
 	vm_page_t p, p_start;
 	vm_pindex_t mask, psize, threshold, tmpidx;
 
 	if ((prot & (VM_PROT_READ | VM_PROT_EXECUTE)) == 0 || object == NULL)
 		return;
 	VM_OBJECT_RLOCK(object);
 	if (object->type == OBJT_DEVICE || object->type == OBJT_SG) {
 		VM_OBJECT_RUNLOCK(object);
 		VM_OBJECT_WLOCK(object);
 		if (object->type == OBJT_DEVICE || object->type == OBJT_SG) {
 			pmap_object_init_pt(map->pmap, addr, object, pindex,
 			    size);
 			VM_OBJECT_WUNLOCK(object);
 			return;
 		}
 		VM_OBJECT_LOCK_DOWNGRADE(object);
 	}
 
 	psize = atop(size);
 	if (psize + pindex > object->size) {
 		if (object->size < pindex) {
 			VM_OBJECT_RUNLOCK(object);
 			return;
 		}
 		psize = object->size - pindex;
 	}
 
 	start = 0;
 	p_start = NULL;
 	threshold = MAX_INIT_PT;
 
 	p = vm_page_find_least(object, pindex);
 	/*
 	 * Assert: the variable p is either (1) the page with the
 	 * least pindex greater than or equal to the parameter pindex
 	 * or (2) NULL.
 	 */
 	for (;
 	     p != NULL && (tmpidx = p->pindex - pindex) < psize;
 	     p = TAILQ_NEXT(p, listq)) {
 		/*
 		 * don't allow an madvise to blow away our really
 		 * free pages allocating pv entries.
 		 */
 		if (((flags & MAP_PREFAULT_MADVISE) != 0 &&
 		    vm_page_count_severe()) ||
 		    ((flags & MAP_PREFAULT_PARTIAL) != 0 &&
 		    tmpidx >= threshold)) {
 			psize = tmpidx;
 			break;
 		}
 		if (p->valid == VM_PAGE_BITS_ALL) {
 			if (p_start == NULL) {
 				start = addr + ptoa(tmpidx);
 				p_start = p;
 			}
 			/* Jump ahead if a superpage mapping is possible. */
 			if (p->psind > 0 && ((addr + ptoa(tmpidx)) &
 			    (pagesizes[p->psind] - 1)) == 0) {
 				mask = atop(pagesizes[p->psind]) - 1;
 				if (tmpidx + mask < psize &&
 				    vm_page_ps_test(p, PS_ALL_VALID, NULL)) {
 					p += mask;
 					threshold += mask;
 				}
 			}
 		} else if (p_start != NULL) {
 			pmap_enter_object(map->pmap, start, addr +
 			    ptoa(tmpidx), p_start, prot);
 			p_start = NULL;
 		}
 	}
 	if (p_start != NULL)
 		pmap_enter_object(map->pmap, start, addr + ptoa(psize),
 		    p_start, prot);
 	VM_OBJECT_RUNLOCK(object);
 }
 
 /*
  *	vm_map_protect:
  *
  *	Sets the protection of the specified address
  *	region in the target map.  If "set_max" is
  *	specified, the maximum protection is to be set;
  *	otherwise, only the current protection is affected.
  */
 int
 vm_map_protect(vm_map_t map, vm_offset_t start, vm_offset_t end,
 	       vm_prot_t new_prot, boolean_t set_max)
 {
 	vm_map_entry_t current, entry;
 	vm_object_t obj;
 	struct ucred *cred;
 	vm_prot_t old_prot;
 
 	if (start == end)
 		return (KERN_SUCCESS);
 
 	vm_map_lock(map);
 
 	/*
 	 * Ensure that we are not concurrently wiring pages.  vm_map_wire() may
 	 * need to fault pages into the map and will drop the map lock while
 	 * doing so, and the VM object may end up in an inconsistent state if we
 	 * update the protection on the map entry in between faults.
 	 */
 	vm_map_wait_busy(map);
 
 	VM_MAP_RANGE_CHECK(map, start, end);
 
 	if (vm_map_lookup_entry(map, start, &entry)) {
 		vm_map_clip_start(map, entry, start);
 	} else {
 		entry = entry->next;
 	}
 
 	/*
 	 * Make a first pass to check for protection violations.
 	 */
 	for (current = entry; current->start < end; current = current->next) {
 		if ((current->eflags & MAP_ENTRY_GUARD) != 0)
 			continue;
 		if (current->eflags & MAP_ENTRY_IS_SUB_MAP) {
 			vm_map_unlock(map);
 			return (KERN_INVALID_ARGUMENT);
 		}
 		if ((new_prot & current->max_protection) != new_prot) {
 			vm_map_unlock(map);
 			return (KERN_PROTECTION_FAILURE);
 		}
 	}
 
 	/*
 	 * Do an accounting pass for private read-only mappings that
 	 * now will do cow due to allowed write (e.g. debugger sets
 	 * breakpoint on text segment)
 	 */
 	for (current = entry; current->start < end; current = current->next) {
 
 		vm_map_clip_end(map, current, end);
 
 		if (set_max ||
 		    ((new_prot & ~(current->protection)) & VM_PROT_WRITE) == 0 ||
 		    ENTRY_CHARGED(current) ||
 		    (current->eflags & MAP_ENTRY_GUARD) != 0) {
 			continue;
 		}
 
 		cred = curthread->td_ucred;
 		obj = current->object.vm_object;
 
 		if (obj == NULL || (current->eflags & MAP_ENTRY_NEEDS_COPY)) {
 			if (!swap_reserve(current->end - current->start)) {
 				vm_map_unlock(map);
 				return (KERN_RESOURCE_SHORTAGE);
 			}
 			crhold(cred);
 			current->cred = cred;
 			continue;
 		}
 
 		VM_OBJECT_WLOCK(obj);
 		if (obj->type != OBJT_DEFAULT && obj->type != OBJT_SWAP) {
 			VM_OBJECT_WUNLOCK(obj);
 			continue;
 		}
 
 		/*
 		 * Charge for the whole object allocation now, since
 		 * we cannot distinguish between non-charged and
 		 * charged clipped mapping of the same object later.
 		 */
 		KASSERT(obj->charge == 0,
 		    ("vm_map_protect: object %p overcharged (entry %p)",
 		    obj, current));
 		if (!swap_reserve(ptoa(obj->size))) {
 			VM_OBJECT_WUNLOCK(obj);
 			vm_map_unlock(map);
 			return (KERN_RESOURCE_SHORTAGE);
 		}
 
 		crhold(cred);
 		obj->cred = cred;
 		obj->charge = ptoa(obj->size);
 		VM_OBJECT_WUNLOCK(obj);
 	}
 
 	/*
 	 * Go back and fix up protections. [Note that clipping is not
 	 * necessary the second time.]
 	 */
 	for (current = entry; current->start < end; current = current->next) {
 		if ((current->eflags & MAP_ENTRY_GUARD) != 0)
 			continue;
 
 		old_prot = current->protection;
 
 		if (set_max)
 			current->protection =
 			    (current->max_protection = new_prot) &
 			    old_prot;
 		else
 			current->protection = new_prot;
 
 		/*
 		 * For user wired map entries, the normal lazy evaluation of
 		 * write access upgrades through soft page faults is
 		 * undesirable.  Instead, immediately copy any pages that are
 		 * copy-on-write and enable write access in the physical map.
 		 */
 		if ((current->eflags & MAP_ENTRY_USER_WIRED) != 0 &&
 		    (current->protection & VM_PROT_WRITE) != 0 &&
 		    (old_prot & VM_PROT_WRITE) == 0)
 			vm_fault_copy_entry(map, map, current, current, NULL);
 
 		/*
 		 * When restricting access, update the physical map.  Worry
 		 * about copy-on-write here.
 		 */
 		if ((old_prot & ~current->protection) != 0) {
 #define MASK(entry)	(((entry)->eflags & MAP_ENTRY_COW) ? ~VM_PROT_WRITE : \
 							VM_PROT_ALL)
 			pmap_protect(map->pmap, current->start,
 			    current->end,
 			    current->protection & MASK(current));
 #undef	MASK
 		}
 		vm_map_simplify_entry(map, current);
 	}
 	vm_map_unlock(map);
 	return (KERN_SUCCESS);
 }
 
 /*
  *	vm_map_madvise:
  *
  *	This routine traverses a processes map handling the madvise
  *	system call.  Advisories are classified as either those effecting
  *	the vm_map_entry structure, or those effecting the underlying
  *	objects.
  */
 int
 vm_map_madvise(
 	vm_map_t map,
 	vm_offset_t start,
 	vm_offset_t end,
 	int behav)
 {
 	vm_map_entry_t current, entry;
 	bool modify_map;
 
 	/*
 	 * Some madvise calls directly modify the vm_map_entry, in which case
 	 * we need to use an exclusive lock on the map and we need to perform
 	 * various clipping operations.  Otherwise we only need a read-lock
 	 * on the map.
 	 */
 	switch(behav) {
 	case MADV_NORMAL:
 	case MADV_SEQUENTIAL:
 	case MADV_RANDOM:
 	case MADV_NOSYNC:
 	case MADV_AUTOSYNC:
 	case MADV_NOCORE:
 	case MADV_CORE:
 		if (start == end)
 			return (0);
 		modify_map = true;
 		vm_map_lock(map);
 		break;
 	case MADV_WILLNEED:
 	case MADV_DONTNEED:
 	case MADV_FREE:
 		if (start == end)
 			return (0);
 		modify_map = false;
 		vm_map_lock_read(map);
 		break;
 	default:
 		return (EINVAL);
 	}
 
 	/*
 	 * Locate starting entry and clip if necessary.
 	 */
 	VM_MAP_RANGE_CHECK(map, start, end);
 
 	if (vm_map_lookup_entry(map, start, &entry)) {
 		if (modify_map)
 			vm_map_clip_start(map, entry, start);
 	} else {
 		entry = entry->next;
 	}
 
 	if (modify_map) {
 		/*
 		 * madvise behaviors that are implemented in the vm_map_entry.
 		 *
 		 * We clip the vm_map_entry so that behavioral changes are
 		 * limited to the specified address range.
 		 */
 		for (current = entry; current->start < end;
 		    current = current->next) {
 			if (current->eflags & MAP_ENTRY_IS_SUB_MAP)
 				continue;
 
 			vm_map_clip_end(map, current, end);
 
 			switch (behav) {
 			case MADV_NORMAL:
 				vm_map_entry_set_behavior(current, MAP_ENTRY_BEHAV_NORMAL);
 				break;
 			case MADV_SEQUENTIAL:
 				vm_map_entry_set_behavior(current, MAP_ENTRY_BEHAV_SEQUENTIAL);
 				break;
 			case MADV_RANDOM:
 				vm_map_entry_set_behavior(current, MAP_ENTRY_BEHAV_RANDOM);
 				break;
 			case MADV_NOSYNC:
 				current->eflags |= MAP_ENTRY_NOSYNC;
 				break;
 			case MADV_AUTOSYNC:
 				current->eflags &= ~MAP_ENTRY_NOSYNC;
 				break;
 			case MADV_NOCORE:
 				current->eflags |= MAP_ENTRY_NOCOREDUMP;
 				break;
 			case MADV_CORE:
 				current->eflags &= ~MAP_ENTRY_NOCOREDUMP;
 				break;
 			default:
 				break;
 			}
 			vm_map_simplify_entry(map, current);
 		}
 		vm_map_unlock(map);
 	} else {
 		vm_pindex_t pstart, pend;
 
 		/*
 		 * madvise behaviors that are implemented in the underlying
 		 * vm_object.
 		 *
 		 * Since we don't clip the vm_map_entry, we have to clip
 		 * the vm_object pindex and count.
 		 */
 		for (current = entry; current->start < end;
 		    current = current->next) {
 			vm_offset_t useEnd, useStart;
 
 			if (current->eflags & MAP_ENTRY_IS_SUB_MAP)
 				continue;
 
 			pstart = OFF_TO_IDX(current->offset);
 			pend = pstart + atop(current->end - current->start);
 			useStart = current->start;
 			useEnd = current->end;
 
 			if (current->start < start) {
 				pstart += atop(start - current->start);
 				useStart = start;
 			}
 			if (current->end > end) {
 				pend -= atop(current->end - end);
 				useEnd = end;
 			}
 
 			if (pstart >= pend)
 				continue;
 
 			/*
 			 * Perform the pmap_advise() before clearing
 			 * PGA_REFERENCED in vm_page_advise().  Otherwise, a
 			 * concurrent pmap operation, such as pmap_remove(),
 			 * could clear a reference in the pmap and set
 			 * PGA_REFERENCED on the page before the pmap_advise()
 			 * had completed.  Consequently, the page would appear
 			 * referenced based upon an old reference that
 			 * occurred before this pmap_advise() ran.
 			 */
 			if (behav == MADV_DONTNEED || behav == MADV_FREE)
 				pmap_advise(map->pmap, useStart, useEnd,
 				    behav);
 
 			vm_object_madvise(current->object.vm_object, pstart,
 			    pend, behav);
 
 			/*
 			 * Pre-populate paging structures in the
 			 * WILLNEED case.  For wired entries, the
 			 * paging structures are already populated.
 			 */
 			if (behav == MADV_WILLNEED &&
 			    current->wired_count == 0) {
 				vm_map_pmap_enter(map,
 				    useStart,
 				    current->protection,
 				    current->object.vm_object,
 				    pstart,
 				    ptoa(pend - pstart),
 				    MAP_PREFAULT_MADVISE
 				);
 			}
 		}
 		vm_map_unlock_read(map);
 	}
 	return (0);
 }
 
 
 /*
  *	vm_map_inherit:
  *
  *	Sets the inheritance of the specified address
  *	range in the target map.  Inheritance
  *	affects how the map will be shared with
  *	child maps at the time of vmspace_fork.
  */
 int
 vm_map_inherit(vm_map_t map, vm_offset_t start, vm_offset_t end,
 	       vm_inherit_t new_inheritance)
 {
 	vm_map_entry_t entry;
 	vm_map_entry_t temp_entry;
 
 	switch (new_inheritance) {
 	case VM_INHERIT_NONE:
 	case VM_INHERIT_COPY:
 	case VM_INHERIT_SHARE:
 	case VM_INHERIT_ZERO:
 		break;
 	default:
 		return (KERN_INVALID_ARGUMENT);
 	}
 	if (start == end)
 		return (KERN_SUCCESS);
 	vm_map_lock(map);
 	VM_MAP_RANGE_CHECK(map, start, end);
 	if (vm_map_lookup_entry(map, start, &temp_entry)) {
 		entry = temp_entry;
 		vm_map_clip_start(map, entry, start);
 	} else
 		entry = temp_entry->next;
 	while (entry->start < end) {
 		vm_map_clip_end(map, entry, end);
 		if ((entry->eflags & MAP_ENTRY_GUARD) == 0 ||
 		    new_inheritance != VM_INHERIT_ZERO)
 			entry->inheritance = new_inheritance;
 		vm_map_simplify_entry(map, entry);
 		entry = entry->next;
 	}
 	vm_map_unlock(map);
 	return (KERN_SUCCESS);
 }
 
 /*
  *	vm_map_unwire:
  *
  *	Implements both kernel and user unwiring.
  */
 int
 vm_map_unwire(vm_map_t map, vm_offset_t start, vm_offset_t end,
     int flags)
 {
 	vm_map_entry_t entry, first_entry, tmp_entry;
 	vm_offset_t saved_start;
 	unsigned int last_timestamp;
 	int rv;
 	boolean_t need_wakeup, result, user_unwire;
 
 	if (start == end)
 		return (KERN_SUCCESS);
 	user_unwire = (flags & VM_MAP_WIRE_USER) ? TRUE : FALSE;
 	vm_map_lock(map);
 	VM_MAP_RANGE_CHECK(map, start, end);
 	if (!vm_map_lookup_entry(map, start, &first_entry)) {
 		if (flags & VM_MAP_WIRE_HOLESOK)
 			first_entry = first_entry->next;
 		else {
 			vm_map_unlock(map);
 			return (KERN_INVALID_ADDRESS);
 		}
 	}
 	last_timestamp = map->timestamp;
 	entry = first_entry;
 	while (entry->start < end) {
 		if (entry->eflags & MAP_ENTRY_IN_TRANSITION) {
 			/*
 			 * We have not yet clipped the entry.
 			 */
 			saved_start = (start >= entry->start) ? start :
 			    entry->start;
 			entry->eflags |= MAP_ENTRY_NEEDS_WAKEUP;
 			if (vm_map_unlock_and_wait(map, 0)) {
 				/*
 				 * Allow interruption of user unwiring?
 				 */
 			}
 			vm_map_lock(map);
 			if (last_timestamp+1 != map->timestamp) {
 				/*
 				 * Look again for the entry because the map was
 				 * modified while it was unlocked.
 				 * Specifically, the entry may have been
 				 * clipped, merged, or deleted.
 				 */
 				if (!vm_map_lookup_entry(map, saved_start,
 				    &tmp_entry)) {
 					if (flags & VM_MAP_WIRE_HOLESOK)
 						tmp_entry = tmp_entry->next;
 					else {
 						if (saved_start == start) {
 							/*
 							 * First_entry has been deleted.
 							 */
 							vm_map_unlock(map);
 							return (KERN_INVALID_ADDRESS);
 						}
 						end = saved_start;
 						rv = KERN_INVALID_ADDRESS;
 						goto done;
 					}
 				}
 				if (entry == first_entry)
 					first_entry = tmp_entry;
 				else
 					first_entry = NULL;
 				entry = tmp_entry;
 			}
 			last_timestamp = map->timestamp;
 			continue;
 		}
 		vm_map_clip_start(map, entry, start);
 		vm_map_clip_end(map, entry, end);
 		/*
 		 * Mark the entry in case the map lock is released.  (See
 		 * above.)
 		 */
 		KASSERT((entry->eflags & MAP_ENTRY_IN_TRANSITION) == 0 &&
 		    entry->wiring_thread == NULL,
 		    ("owned map entry %p", entry));
 		entry->eflags |= MAP_ENTRY_IN_TRANSITION;
 		entry->wiring_thread = curthread;
 		/*
 		 * Check the map for holes in the specified region.
 		 * If VM_MAP_WIRE_HOLESOK was specified, skip this check.
 		 */
 		if (((flags & VM_MAP_WIRE_HOLESOK) == 0) &&
 		    (entry->end < end && entry->next->start > entry->end)) {
 			end = entry->end;
 			rv = KERN_INVALID_ADDRESS;
 			goto done;
 		}
 		/*
 		 * If system unwiring, require that the entry is system wired.
 		 */
 		if (!user_unwire &&
 		    vm_map_entry_system_wired_count(entry) == 0) {
 			end = entry->end;
 			rv = KERN_INVALID_ARGUMENT;
 			goto done;
 		}
 		entry = entry->next;
 	}
 	rv = KERN_SUCCESS;
 done:
 	need_wakeup = FALSE;
 	if (first_entry == NULL) {
 		result = vm_map_lookup_entry(map, start, &first_entry);
 		if (!result && (flags & VM_MAP_WIRE_HOLESOK))
 			first_entry = first_entry->next;
 		else
 			KASSERT(result, ("vm_map_unwire: lookup failed"));
 	}
 	for (entry = first_entry; entry->start < end; entry = entry->next) {
 		/*
 		 * If VM_MAP_WIRE_HOLESOK was specified, an empty
 		 * space in the unwired region could have been mapped
 		 * while the map lock was dropped for draining
 		 * MAP_ENTRY_IN_TRANSITION.  Moreover, another thread
 		 * could be simultaneously wiring this new mapping
 		 * entry.  Detect these cases and skip any entries
 		 * marked as in transition by us.
 		 */
 		if ((entry->eflags & MAP_ENTRY_IN_TRANSITION) == 0 ||
 		    entry->wiring_thread != curthread) {
 			KASSERT((flags & VM_MAP_WIRE_HOLESOK) != 0,
 			    ("vm_map_unwire: !HOLESOK and new/changed entry"));
 			continue;
 		}
 
 		if (rv == KERN_SUCCESS && (!user_unwire ||
 		    (entry->eflags & MAP_ENTRY_USER_WIRED))) {
 			if (user_unwire)
 				entry->eflags &= ~MAP_ENTRY_USER_WIRED;
 			if (entry->wired_count == 1)
 				vm_map_entry_unwire(map, entry);
 			else
 				entry->wired_count--;
 		}
 		KASSERT((entry->eflags & MAP_ENTRY_IN_TRANSITION) != 0,
 		    ("vm_map_unwire: in-transition flag missing %p", entry));
 		KASSERT(entry->wiring_thread == curthread,
 		    ("vm_map_unwire: alien wire %p", entry));
 		entry->eflags &= ~MAP_ENTRY_IN_TRANSITION;
 		entry->wiring_thread = NULL;
 		if (entry->eflags & MAP_ENTRY_NEEDS_WAKEUP) {
 			entry->eflags &= ~MAP_ENTRY_NEEDS_WAKEUP;
 			need_wakeup = TRUE;
 		}
 		vm_map_simplify_entry(map, entry);
 	}
 	vm_map_unlock(map);
 	if (need_wakeup)
 		vm_map_wakeup(map);
 	return (rv);
 }
 
 /*
  *	vm_map_wire_entry_failure:
  *
  *	Handle a wiring failure on the given entry.
  *
  *	The map should be locked.
  */
 static void
 vm_map_wire_entry_failure(vm_map_t map, vm_map_entry_t entry,
     vm_offset_t failed_addr)
 {
 
 	VM_MAP_ASSERT_LOCKED(map);
 	KASSERT((entry->eflags & MAP_ENTRY_IN_TRANSITION) != 0 &&
 	    entry->wired_count == 1,
 	    ("vm_map_wire_entry_failure: entry %p isn't being wired", entry));
 	KASSERT(failed_addr < entry->end,
 	    ("vm_map_wire_entry_failure: entry %p was fully wired", entry));
 
 	/*
 	 * If any pages at the start of this entry were successfully wired,
 	 * then unwire them.
 	 */
 	if (failed_addr > entry->start) {
 		pmap_unwire(map->pmap, entry->start, failed_addr);
 		vm_object_unwire(entry->object.vm_object, entry->offset,
 		    failed_addr - entry->start, PQ_ACTIVE);
 	}
 
 	/*
 	 * Assign an out-of-range value to represent the failure to wire this
 	 * entry.
 	 */
 	entry->wired_count = -1;
 }
 
 /*
  *	vm_map_wire:
  *
  *	Implements both kernel and user wiring.
  */
 int
 vm_map_wire(vm_map_t map, vm_offset_t start, vm_offset_t end,
     int flags)
 {
 	vm_map_entry_t entry, first_entry, tmp_entry;
 	vm_offset_t faddr, saved_end, saved_start;
 	unsigned int last_timestamp;
 	int rv;
 	boolean_t need_wakeup, result, user_wire;
 	vm_prot_t prot;
 
 	if (start == end)
 		return (KERN_SUCCESS);
 	prot = 0;
 	if (flags & VM_MAP_WIRE_WRITE)
 		prot |= VM_PROT_WRITE;
 	user_wire = (flags & VM_MAP_WIRE_USER) ? TRUE : FALSE;
 	vm_map_lock(map);
 	VM_MAP_RANGE_CHECK(map, start, end);
 	if (!vm_map_lookup_entry(map, start, &first_entry)) {
 		if (flags & VM_MAP_WIRE_HOLESOK)
 			first_entry = first_entry->next;
 		else {
 			vm_map_unlock(map);
 			return (KERN_INVALID_ADDRESS);
 		}
 	}
 	last_timestamp = map->timestamp;
 	entry = first_entry;
 	while (entry->start < end) {
 		if (entry->eflags & MAP_ENTRY_IN_TRANSITION) {
 			/*
 			 * We have not yet clipped the entry.
 			 */
 			saved_start = (start >= entry->start) ? start :
 			    entry->start;
 			entry->eflags |= MAP_ENTRY_NEEDS_WAKEUP;
 			if (vm_map_unlock_and_wait(map, 0)) {
 				/*
 				 * Allow interruption of user wiring?
 				 */
 			}
 			vm_map_lock(map);
 			if (last_timestamp + 1 != map->timestamp) {
 				/*
 				 * Look again for the entry because the map was
 				 * modified while it was unlocked.
 				 * Specifically, the entry may have been
 				 * clipped, merged, or deleted.
 				 */
 				if (!vm_map_lookup_entry(map, saved_start,
 				    &tmp_entry)) {
 					if (flags & VM_MAP_WIRE_HOLESOK)
 						tmp_entry = tmp_entry->next;
 					else {
 						if (saved_start == start) {
 							/*
 							 * first_entry has been deleted.
 							 */
 							vm_map_unlock(map);
 							return (KERN_INVALID_ADDRESS);
 						}
 						end = saved_start;
 						rv = KERN_INVALID_ADDRESS;
 						goto done;
 					}
 				}
 				if (entry == first_entry)
 					first_entry = tmp_entry;
 				else
 					first_entry = NULL;
 				entry = tmp_entry;
 			}
 			last_timestamp = map->timestamp;
 			continue;
 		}
 		vm_map_clip_start(map, entry, start);
 		vm_map_clip_end(map, entry, end);
 		/*
 		 * Mark the entry in case the map lock is released.  (See
 		 * above.)
 		 */
 		KASSERT((entry->eflags & MAP_ENTRY_IN_TRANSITION) == 0 &&
 		    entry->wiring_thread == NULL,
 		    ("owned map entry %p", entry));
 		entry->eflags |= MAP_ENTRY_IN_TRANSITION;
 		entry->wiring_thread = curthread;
 		if ((entry->protection & (VM_PROT_READ | VM_PROT_EXECUTE)) == 0
 		    || (entry->protection & prot) != prot) {
 			entry->eflags |= MAP_ENTRY_WIRE_SKIPPED;
 			if ((flags & VM_MAP_WIRE_HOLESOK) == 0) {
 				end = entry->end;
 				rv = KERN_INVALID_ADDRESS;
 				goto done;
 			}
 			goto next_entry;
 		}
 		if (entry->wired_count == 0) {
 			entry->wired_count++;
 			saved_start = entry->start;
 			saved_end = entry->end;
 
 			/*
 			 * Release the map lock, relying on the in-transition
 			 * mark.  Mark the map busy for fork.
 			 */
 			vm_map_busy(map);
 			vm_map_unlock(map);
 
 			faddr = saved_start;
 			do {
 				/*
 				 * Simulate a fault to get the page and enter
 				 * it into the physical map.
 				 */
 				if ((rv = vm_fault(map, faddr, VM_PROT_NONE,
 				    VM_FAULT_WIRE)) != KERN_SUCCESS)
 					break;
 			} while ((faddr += PAGE_SIZE) < saved_end);
 			vm_map_lock(map);
 			vm_map_unbusy(map);
 			if (last_timestamp + 1 != map->timestamp) {
 				/*
 				 * Look again for the entry because the map was
 				 * modified while it was unlocked.  The entry
 				 * may have been clipped, but NOT merged or
 				 * deleted.
 				 */
 				result = vm_map_lookup_entry(map, saved_start,
 				    &tmp_entry);
 				KASSERT(result, ("vm_map_wire: lookup failed"));
 				if (entry == first_entry)
 					first_entry = tmp_entry;
 				else
 					first_entry = NULL;
 				entry = tmp_entry;
 				while (entry->end < saved_end) {
 					/*
 					 * In case of failure, handle entries
 					 * that were not fully wired here;
 					 * fully wired entries are handled
 					 * later.
 					 */
 					if (rv != KERN_SUCCESS &&
 					    faddr < entry->end)
 						vm_map_wire_entry_failure(map,
 						    entry, faddr);
 					entry = entry->next;
 				}
 			}
 			last_timestamp = map->timestamp;
 			if (rv != KERN_SUCCESS) {
 				vm_map_wire_entry_failure(map, entry, faddr);
 				end = entry->end;
 				goto done;
 			}
 		} else if (!user_wire ||
 			   (entry->eflags & MAP_ENTRY_USER_WIRED) == 0) {
 			entry->wired_count++;
 		}
 		/*
 		 * Check the map for holes in the specified region.
 		 * If VM_MAP_WIRE_HOLESOK was specified, skip this check.
 		 */
 	next_entry:
 		if ((flags & VM_MAP_WIRE_HOLESOK) == 0 &&
 		    entry->end < end && entry->next->start > entry->end) {
 			end = entry->end;
 			rv = KERN_INVALID_ADDRESS;
 			goto done;
 		}
 		entry = entry->next;
 	}
 	rv = KERN_SUCCESS;
 done:
 	need_wakeup = FALSE;
 	if (first_entry == NULL) {
 		result = vm_map_lookup_entry(map, start, &first_entry);
 		if (!result && (flags & VM_MAP_WIRE_HOLESOK))
 			first_entry = first_entry->next;
 		else
 			KASSERT(result, ("vm_map_wire: lookup failed"));
 	}
 	for (entry = first_entry; entry->start < end; entry = entry->next) {
 		/*
 		 * If VM_MAP_WIRE_HOLESOK was specified, an empty
 		 * space in the unwired region could have been mapped
 		 * while the map lock was dropped for faulting in the
 		 * pages or draining MAP_ENTRY_IN_TRANSITION.
 		 * Moreover, another thread could be simultaneously
 		 * wiring this new mapping entry.  Detect these cases
 		 * and skip any entries marked as in transition not by us.
 		 */
 		if ((entry->eflags & MAP_ENTRY_IN_TRANSITION) == 0 ||
 		    entry->wiring_thread != curthread) {
 			KASSERT((flags & VM_MAP_WIRE_HOLESOK) != 0,
 			    ("vm_map_wire: !HOLESOK and new/changed entry"));
 			continue;
 		}
 
 		if ((entry->eflags & MAP_ENTRY_WIRE_SKIPPED) != 0)
 			goto next_entry_done;
 
 		if (rv == KERN_SUCCESS) {
 			if (user_wire)
 				entry->eflags |= MAP_ENTRY_USER_WIRED;
 		} else if (entry->wired_count == -1) {
 			/*
 			 * Wiring failed on this entry.  Thus, unwiring is
 			 * unnecessary.
 			 */
 			entry->wired_count = 0;
 		} else if (!user_wire ||
 		    (entry->eflags & MAP_ENTRY_USER_WIRED) == 0) {
 			/*
 			 * Undo the wiring.  Wiring succeeded on this entry
 			 * but failed on a later entry.  
 			 */
 			if (entry->wired_count == 1)
 				vm_map_entry_unwire(map, entry);
 			else
 				entry->wired_count--;
 		}
 	next_entry_done:
 		KASSERT((entry->eflags & MAP_ENTRY_IN_TRANSITION) != 0,
 		    ("vm_map_wire: in-transition flag missing %p", entry));
 		KASSERT(entry->wiring_thread == curthread,
 		    ("vm_map_wire: alien wire %p", entry));
 		entry->eflags &= ~(MAP_ENTRY_IN_TRANSITION |
 		    MAP_ENTRY_WIRE_SKIPPED);
 		entry->wiring_thread = NULL;
 		if (entry->eflags & MAP_ENTRY_NEEDS_WAKEUP) {
 			entry->eflags &= ~MAP_ENTRY_NEEDS_WAKEUP;
 			need_wakeup = TRUE;
 		}
 		vm_map_simplify_entry(map, entry);
 	}
 	vm_map_unlock(map);
 	if (need_wakeup)
 		vm_map_wakeup(map);
 	return (rv);
 }
 
 /*
  * vm_map_sync
  *
  * Push any dirty cached pages in the address range to their pager.
  * If syncio is TRUE, dirty pages are written synchronously.
  * If invalidate is TRUE, any cached pages are freed as well.
  *
  * If the size of the region from start to end is zero, we are
  * supposed to flush all modified pages within the region containing
  * start.  Unfortunately, a region can be split or coalesced with
  * neighboring regions, making it difficult to determine what the
  * original region was.  Therefore, we approximate this requirement by
  * flushing the current region containing start.
  *
  * Returns an error if any part of the specified range is not mapped.
  */
 int
 vm_map_sync(
 	vm_map_t map,
 	vm_offset_t start,
 	vm_offset_t end,
 	boolean_t syncio,
 	boolean_t invalidate)
 {
 	vm_map_entry_t current;
 	vm_map_entry_t entry;
 	vm_size_t size;
 	vm_object_t object;
 	vm_ooffset_t offset;
 	unsigned int last_timestamp;
 	boolean_t failed;
 
 	vm_map_lock_read(map);
 	VM_MAP_RANGE_CHECK(map, start, end);
 	if (!vm_map_lookup_entry(map, start, &entry)) {
 		vm_map_unlock_read(map);
 		return (KERN_INVALID_ADDRESS);
 	} else if (start == end) {
 		start = entry->start;
 		end = entry->end;
 	}
 	/*
 	 * Make a first pass to check for user-wired memory and holes.
 	 */
 	for (current = entry; current->start < end; current = current->next) {
 		if (invalidate && (current->eflags & MAP_ENTRY_USER_WIRED)) {
 			vm_map_unlock_read(map);
 			return (KERN_INVALID_ARGUMENT);
 		}
 		if (end > current->end &&
 		    current->end != current->next->start) {
 			vm_map_unlock_read(map);
 			return (KERN_INVALID_ADDRESS);
 		}
 	}
 
 	if (invalidate)
 		pmap_remove(map->pmap, start, end);
 	failed = FALSE;
 
 	/*
 	 * Make a second pass, cleaning/uncaching pages from the indicated
 	 * objects as we go.
 	 */
 	for (current = entry; current->start < end;) {
 		offset = current->offset + (start - current->start);
 		size = (end <= current->end ? end : current->end) - start;
 		if (current->eflags & MAP_ENTRY_IS_SUB_MAP) {
 			vm_map_t smap;
 			vm_map_entry_t tentry;
 			vm_size_t tsize;
 
 			smap = current->object.sub_map;
 			vm_map_lock_read(smap);
 			(void) vm_map_lookup_entry(smap, offset, &tentry);
 			tsize = tentry->end - offset;
 			if (tsize < size)
 				size = tsize;
 			object = tentry->object.vm_object;
 			offset = tentry->offset + (offset - tentry->start);
 			vm_map_unlock_read(smap);
 		} else {
 			object = current->object.vm_object;
 		}
 		vm_object_reference(object);
 		last_timestamp = map->timestamp;
 		vm_map_unlock_read(map);
 		if (!vm_object_sync(object, offset, size, syncio, invalidate))
 			failed = TRUE;
 		start += size;
 		vm_object_deallocate(object);
 		vm_map_lock_read(map);
 		if (last_timestamp == map->timestamp ||
 		    !vm_map_lookup_entry(map, start, &current))
 			current = current->next;
 	}
 
 	vm_map_unlock_read(map);
 	return (failed ? KERN_FAILURE : KERN_SUCCESS);
 }
 
 /*
  *	vm_map_entry_unwire:	[ internal use only ]
  *
  *	Make the region specified by this entry pageable.
  *
  *	The map in question should be locked.
  *	[This is the reason for this routine's existence.]
  */
 static void
 vm_map_entry_unwire(vm_map_t map, vm_map_entry_t entry)
 {
 
 	VM_MAP_ASSERT_LOCKED(map);
 	KASSERT(entry->wired_count > 0,
 	    ("vm_map_entry_unwire: entry %p isn't wired", entry));
 	pmap_unwire(map->pmap, entry->start, entry->end);
 	vm_object_unwire(entry->object.vm_object, entry->offset, entry->end -
 	    entry->start, PQ_ACTIVE);
 	entry->wired_count = 0;
 }
 
 static void
 vm_map_entry_deallocate(vm_map_entry_t entry, boolean_t system_map)
 {
 
 	if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0)
 		vm_object_deallocate(entry->object.vm_object);
 	uma_zfree(system_map ? kmapentzone : mapentzone, entry);
 }
 
 /*
  *	vm_map_entry_delete:	[ internal use only ]
  *
  *	Deallocate the given entry from the target map.
  */
 static void
 vm_map_entry_delete(vm_map_t map, vm_map_entry_t entry)
 {
 	vm_object_t object;
 	vm_pindex_t offidxstart, offidxend, count, size1;
 	vm_size_t size;
 
 	vm_map_entry_unlink(map, entry);
 	object = entry->object.vm_object;
 
 	if ((entry->eflags & MAP_ENTRY_GUARD) != 0) {
 		MPASS(entry->cred == NULL);
 		MPASS((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0);
 		MPASS(object == NULL);
 		vm_map_entry_deallocate(entry, map->system_map);
 		return;
 	}
 
 	size = entry->end - entry->start;
 	map->size -= size;
 
 	if (entry->cred != NULL) {
 		swap_release_by_cred(size, entry->cred);
 		crfree(entry->cred);
 	}
 
 	if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0 &&
 	    (object != NULL)) {
 		KASSERT(entry->cred == NULL || object->cred == NULL ||
 		    (entry->eflags & MAP_ENTRY_NEEDS_COPY),
 		    ("OVERCOMMIT vm_map_entry_delete: both cred %p", entry));
 		count = atop(size);
 		offidxstart = OFF_TO_IDX(entry->offset);
 		offidxend = offidxstart + count;
 		VM_OBJECT_WLOCK(object);
 		if (object->ref_count != 1 && ((object->flags & (OBJ_NOSPLIT |
 		    OBJ_ONEMAPPING)) == OBJ_ONEMAPPING ||
 		    object == kernel_object)) {
 			vm_object_collapse(object);
 
 			/*
 			 * The option OBJPR_NOTMAPPED can be passed here
 			 * because vm_map_delete() already performed
 			 * pmap_remove() on the only mapping to this range
 			 * of pages. 
 			 */
 			vm_object_page_remove(object, offidxstart, offidxend,
 			    OBJPR_NOTMAPPED);
 			if (object->type == OBJT_SWAP)
 				swap_pager_freespace(object, offidxstart,
 				    count);
 			if (offidxend >= object->size &&
 			    offidxstart < object->size) {
 				size1 = object->size;
 				object->size = offidxstart;
 				if (object->cred != NULL) {
 					size1 -= object->size;
 					KASSERT(object->charge >= ptoa(size1),
 					    ("object %p charge < 0", object));
 					swap_release_by_cred(ptoa(size1),
 					    object->cred);
 					object->charge -= ptoa(size1);
 				}
 			}
 		}
 		VM_OBJECT_WUNLOCK(object);
 	} else
 		entry->object.vm_object = NULL;
 	if (map->system_map)
 		vm_map_entry_deallocate(entry, TRUE);
 	else {
 		entry->next = curthread->td_map_def_user;
 		curthread->td_map_def_user = entry;
 	}
 }
 
 /*
  *	vm_map_delete:	[ internal use only ]
  *
  *	Deallocates the given address range from the target
  *	map.
  */
 int
 vm_map_delete(vm_map_t map, vm_offset_t start, vm_offset_t end)
 {
 	vm_map_entry_t entry;
 	vm_map_entry_t first_entry;
 
 	VM_MAP_ASSERT_LOCKED(map);
 	if (start == end)
 		return (KERN_SUCCESS);
 
 	/*
 	 * Find the start of the region, and clip it
 	 */
 	if (!vm_map_lookup_entry(map, start, &first_entry))
 		entry = first_entry->next;
 	else {
 		entry = first_entry;
 		vm_map_clip_start(map, entry, start);
 	}
 
 	/*
 	 * Step through all entries in this region
 	 */
 	while (entry->start < end) {
 		vm_map_entry_t next;
 
 		/*
 		 * Wait for wiring or unwiring of an entry to complete.
 		 * Also wait for any system wirings to disappear on
 		 * user maps.
 		 */
 		if ((entry->eflags & MAP_ENTRY_IN_TRANSITION) != 0 ||
 		    (vm_map_pmap(map) != kernel_pmap &&
 		    vm_map_entry_system_wired_count(entry) != 0)) {
 			unsigned int last_timestamp;
 			vm_offset_t saved_start;
 			vm_map_entry_t tmp_entry;
 
 			saved_start = entry->start;
 			entry->eflags |= MAP_ENTRY_NEEDS_WAKEUP;
 			last_timestamp = map->timestamp;
 			(void) vm_map_unlock_and_wait(map, 0);
 			vm_map_lock(map);
 			if (last_timestamp + 1 != map->timestamp) {
 				/*
 				 * Look again for the entry because the map was
 				 * modified while it was unlocked.
 				 * Specifically, the entry may have been
 				 * clipped, merged, or deleted.
 				 */
 				if (!vm_map_lookup_entry(map, saved_start,
 							 &tmp_entry))
 					entry = tmp_entry->next;
 				else {
 					entry = tmp_entry;
 					vm_map_clip_start(map, entry,
 							  saved_start);
 				}
 			}
 			continue;
 		}
 		vm_map_clip_end(map, entry, end);
 
 		next = entry->next;
 
 		/*
 		 * Unwire before removing addresses from the pmap; otherwise,
 		 * unwiring will put the entries back in the pmap.
 		 */
 		if (entry->wired_count != 0)
 			vm_map_entry_unwire(map, entry);
 
 		/*
 		 * Remove mappings for the pages, but only if the
 		 * mappings could exist.  For instance, it does not
 		 * make sense to call pmap_remove() for guard entries.
 		 */
 		if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) != 0 ||
 		    entry->object.vm_object != NULL)
 			pmap_remove(map->pmap, entry->start, entry->end);
 
+		if (entry->end == map->anon_loc)
+			map->anon_loc = entry->start;
+
 		/*
 		 * Delete the entry only after removing all pmap
 		 * entries pointing to its pages.  (Otherwise, its
 		 * page frames may be reallocated, and any modify bits
 		 * will be set in the wrong object!)
 		 */
 		vm_map_entry_delete(map, entry);
 		entry = next;
 	}
 	return (KERN_SUCCESS);
 }
 
 /*
  *	vm_map_remove:
  *
  *	Remove the given address range from the target map.
  *	This is the exported form of vm_map_delete.
  */
 int
 vm_map_remove(vm_map_t map, vm_offset_t start, vm_offset_t end)
 {
 	int result;
 
 	vm_map_lock(map);
 	VM_MAP_RANGE_CHECK(map, start, end);
 	result = vm_map_delete(map, start, end);
 	vm_map_unlock(map);
 	return (result);
 }
 
 /*
  *	vm_map_check_protection:
  *
  *	Assert that the target map allows the specified privilege on the
  *	entire address region given.  The entire region must be allocated.
  *
  *	WARNING!  This code does not and should not check whether the
  *	contents of the region is accessible.  For example a smaller file
  *	might be mapped into a larger address space.
  *
  *	NOTE!  This code is also called by munmap().
  *
  *	The map must be locked.  A read lock is sufficient.
  */
 boolean_t
 vm_map_check_protection(vm_map_t map, vm_offset_t start, vm_offset_t end,
 			vm_prot_t protection)
 {
 	vm_map_entry_t entry;
 	vm_map_entry_t tmp_entry;
 
 	if (!vm_map_lookup_entry(map, start, &tmp_entry))
 		return (FALSE);
 	entry = tmp_entry;
 
 	while (start < end) {
 		/*
 		 * No holes allowed!
 		 */
 		if (start < entry->start)
 			return (FALSE);
 		/*
 		 * Check protection associated with entry.
 		 */
 		if ((entry->protection & protection) != protection)
 			return (FALSE);
 		/* go to next entry */
 		start = entry->end;
 		entry = entry->next;
 	}
 	return (TRUE);
 }
 
 /*
  *	vm_map_copy_entry:
  *
  *	Copies the contents of the source entry to the destination
  *	entry.  The entries *must* be aligned properly.
  */
 static void
 vm_map_copy_entry(
 	vm_map_t src_map,
 	vm_map_t dst_map,
 	vm_map_entry_t src_entry,
 	vm_map_entry_t dst_entry,
 	vm_ooffset_t *fork_charge)
 {
 	vm_object_t src_object;
 	vm_map_entry_t fake_entry;
 	vm_offset_t size;
 	struct ucred *cred;
 	int charged;
 
 	VM_MAP_ASSERT_LOCKED(dst_map);
 
 	if ((dst_entry->eflags|src_entry->eflags) & MAP_ENTRY_IS_SUB_MAP)
 		return;
 
 	if (src_entry->wired_count == 0 ||
 	    (src_entry->protection & VM_PROT_WRITE) == 0) {
 		/*
 		 * If the source entry is marked needs_copy, it is already
 		 * write-protected.
 		 */
 		if ((src_entry->eflags & MAP_ENTRY_NEEDS_COPY) == 0 &&
 		    (src_entry->protection & VM_PROT_WRITE) != 0) {
 			pmap_protect(src_map->pmap,
 			    src_entry->start,
 			    src_entry->end,
 			    src_entry->protection & ~VM_PROT_WRITE);
 		}
 
 		/*
 		 * Make a copy of the object.
 		 */
 		size = src_entry->end - src_entry->start;
 		if ((src_object = src_entry->object.vm_object) != NULL) {
 			VM_OBJECT_WLOCK(src_object);
 			charged = ENTRY_CHARGED(src_entry);
 			if (src_object->handle == NULL &&
 			    (src_object->type == OBJT_DEFAULT ||
 			    src_object->type == OBJT_SWAP)) {
 				vm_object_collapse(src_object);
 				if ((src_object->flags & (OBJ_NOSPLIT |
 				    OBJ_ONEMAPPING)) == OBJ_ONEMAPPING) {
 					vm_object_split(src_entry);
 					src_object =
 					    src_entry->object.vm_object;
 				}
 			}
 			vm_object_reference_locked(src_object);
 			vm_object_clear_flag(src_object, OBJ_ONEMAPPING);
 			if (src_entry->cred != NULL &&
 			    !(src_entry->eflags & MAP_ENTRY_NEEDS_COPY)) {
 				KASSERT(src_object->cred == NULL,
 				    ("OVERCOMMIT: vm_map_copy_entry: cred %p",
 				     src_object));
 				src_object->cred = src_entry->cred;
 				src_object->charge = size;
 			}
 			VM_OBJECT_WUNLOCK(src_object);
 			dst_entry->object.vm_object = src_object;
 			if (charged) {
 				cred = curthread->td_ucred;
 				crhold(cred);
 				dst_entry->cred = cred;
 				*fork_charge += size;
 				if (!(src_entry->eflags &
 				      MAP_ENTRY_NEEDS_COPY)) {
 					crhold(cred);
 					src_entry->cred = cred;
 					*fork_charge += size;
 				}
 			}
 			src_entry->eflags |= MAP_ENTRY_COW |
 			    MAP_ENTRY_NEEDS_COPY;
 			dst_entry->eflags |= MAP_ENTRY_COW |
 			    MAP_ENTRY_NEEDS_COPY;
 			dst_entry->offset = src_entry->offset;
 			if (src_entry->eflags & MAP_ENTRY_VN_WRITECNT) {
 				/*
 				 * MAP_ENTRY_VN_WRITECNT cannot
 				 * indicate write reference from
 				 * src_entry, since the entry is
 				 * marked as needs copy.  Allocate a
 				 * fake entry that is used to
 				 * decrement object->un_pager.vnp.writecount
 				 * at the appropriate time.  Attach
 				 * fake_entry to the deferred list.
 				 */
 				fake_entry = vm_map_entry_create(dst_map);
 				fake_entry->eflags = MAP_ENTRY_VN_WRITECNT;
 				src_entry->eflags &= ~MAP_ENTRY_VN_WRITECNT;
 				vm_object_reference(src_object);
 				fake_entry->object.vm_object = src_object;
 				fake_entry->start = src_entry->start;
 				fake_entry->end = src_entry->end;
 				fake_entry->next = curthread->td_map_def_user;
 				curthread->td_map_def_user = fake_entry;
 			}
 
 			pmap_copy(dst_map->pmap, src_map->pmap,
 			    dst_entry->start, dst_entry->end - dst_entry->start,
 			    src_entry->start);
 		} else {
 			dst_entry->object.vm_object = NULL;
 			dst_entry->offset = 0;
 			if (src_entry->cred != NULL) {
 				dst_entry->cred = curthread->td_ucred;
 				crhold(dst_entry->cred);
 				*fork_charge += size;
 			}
 		}
 	} else {
 		/*
 		 * We don't want to make writeable wired pages copy-on-write.
 		 * Immediately copy these pages into the new map by simulating
 		 * page faults.  The new pages are pageable.
 		 */
 		vm_fault_copy_entry(dst_map, src_map, dst_entry, src_entry,
 		    fork_charge);
 	}
 }
 
 /*
  * vmspace_map_entry_forked:
  * Update the newly-forked vmspace each time a map entry is inherited
  * or copied.  The values for vm_dsize and vm_tsize are approximate
  * (and mostly-obsolete ideas in the face of mmap(2) et al.)
  */
 static void
 vmspace_map_entry_forked(const struct vmspace *vm1, struct vmspace *vm2,
     vm_map_entry_t entry)
 {
 	vm_size_t entrysize;
 	vm_offset_t newend;
 
 	if ((entry->eflags & MAP_ENTRY_GUARD) != 0)
 		return;
 	entrysize = entry->end - entry->start;
 	vm2->vm_map.size += entrysize;
 	if (entry->eflags & (MAP_ENTRY_GROWS_DOWN | MAP_ENTRY_GROWS_UP)) {
 		vm2->vm_ssize += btoc(entrysize);
 	} else if (entry->start >= (vm_offset_t)vm1->vm_daddr &&
 	    entry->start < (vm_offset_t)vm1->vm_daddr + ctob(vm1->vm_dsize)) {
 		newend = MIN(entry->end,
 		    (vm_offset_t)vm1->vm_daddr + ctob(vm1->vm_dsize));
 		vm2->vm_dsize += btoc(newend - entry->start);
 	} else if (entry->start >= (vm_offset_t)vm1->vm_taddr &&
 	    entry->start < (vm_offset_t)vm1->vm_taddr + ctob(vm1->vm_tsize)) {
 		newend = MIN(entry->end,
 		    (vm_offset_t)vm1->vm_taddr + ctob(vm1->vm_tsize));
 		vm2->vm_tsize += btoc(newend - entry->start);
 	}
 }
 
 /*
  * vmspace_fork:
  * Create a new process vmspace structure and vm_map
  * based on those of an existing process.  The new map
  * is based on the old map, according to the inheritance
  * values on the regions in that map.
  *
  * XXX It might be worth coalescing the entries added to the new vmspace.
  *
  * The source map must not be locked.
  */
 struct vmspace *
 vmspace_fork(struct vmspace *vm1, vm_ooffset_t *fork_charge)
 {
 	struct vmspace *vm2;
 	vm_map_t new_map, old_map;
 	vm_map_entry_t new_entry, old_entry;
 	vm_object_t object;
 	int locked;
 	vm_inherit_t inh;
 
 	old_map = &vm1->vm_map;
 	/* Copy immutable fields of vm1 to vm2. */
 	vm2 = vmspace_alloc(vm_map_min(old_map), vm_map_max(old_map),
 	    pmap_pinit);
 	if (vm2 == NULL)
 		return (NULL);
 	vm2->vm_taddr = vm1->vm_taddr;
 	vm2->vm_daddr = vm1->vm_daddr;
 	vm2->vm_maxsaddr = vm1->vm_maxsaddr;
 	vm_map_lock(old_map);
 	if (old_map->busy)
 		vm_map_wait_busy(old_map);
 	new_map = &vm2->vm_map;
 	locked = vm_map_trylock(new_map); /* trylock to silence WITNESS */
 	KASSERT(locked, ("vmspace_fork: lock failed"));
 
+	new_map->anon_loc = old_map->anon_loc;
 	old_entry = old_map->header.next;
 
 	while (old_entry != &old_map->header) {
 		if (old_entry->eflags & MAP_ENTRY_IS_SUB_MAP)
 			panic("vm_map_fork: encountered a submap");
 
 		inh = old_entry->inheritance;
 		if ((old_entry->eflags & MAP_ENTRY_GUARD) != 0 &&
 		    inh != VM_INHERIT_NONE)
 			inh = VM_INHERIT_COPY;
 
 		switch (inh) {
 		case VM_INHERIT_NONE:
 			break;
 
 		case VM_INHERIT_SHARE:
 			/*
 			 * Clone the entry, creating the shared object if necessary.
 			 */
 			object = old_entry->object.vm_object;
 			if (object == NULL) {
 				object = vm_object_allocate(OBJT_DEFAULT,
 					atop(old_entry->end - old_entry->start));
 				old_entry->object.vm_object = object;
 				old_entry->offset = 0;
 				if (old_entry->cred != NULL) {
 					object->cred = old_entry->cred;
 					object->charge = old_entry->end -
 					    old_entry->start;
 					old_entry->cred = NULL;
 				}
 			}
 
 			/*
 			 * Add the reference before calling vm_object_shadow
 			 * to insure that a shadow object is created.
 			 */
 			vm_object_reference(object);
 			if (old_entry->eflags & MAP_ENTRY_NEEDS_COPY) {
 				vm_object_shadow(&old_entry->object.vm_object,
 				    &old_entry->offset,
 				    old_entry->end - old_entry->start);
 				old_entry->eflags &= ~MAP_ENTRY_NEEDS_COPY;
 				/* Transfer the second reference too. */
 				vm_object_reference(
 				    old_entry->object.vm_object);
 
 				/*
 				 * As in vm_map_simplify_entry(), the
 				 * vnode lock will not be acquired in
 				 * this call to vm_object_deallocate().
 				 */
 				vm_object_deallocate(object);
 				object = old_entry->object.vm_object;
 			}
 			VM_OBJECT_WLOCK(object);
 			vm_object_clear_flag(object, OBJ_ONEMAPPING);
 			if (old_entry->cred != NULL) {
 				KASSERT(object->cred == NULL, ("vmspace_fork both cred"));
 				object->cred = old_entry->cred;
 				object->charge = old_entry->end - old_entry->start;
 				old_entry->cred = NULL;
 			}
 
 			/*
 			 * Assert the correct state of the vnode
 			 * v_writecount while the object is locked, to
 			 * not relock it later for the assertion
 			 * correctness.
 			 */
 			if (old_entry->eflags & MAP_ENTRY_VN_WRITECNT &&
 			    object->type == OBJT_VNODE) {
 				KASSERT(((struct vnode *)object->handle)->
 				    v_writecount > 0,
 				    ("vmspace_fork: v_writecount %p", object));
 				KASSERT(object->un_pager.vnp.writemappings > 0,
 				    ("vmspace_fork: vnp.writecount %p",
 				    object));
 			}
 			VM_OBJECT_WUNLOCK(object);
 
 			/*
 			 * Clone the entry, referencing the shared object.
 			 */
 			new_entry = vm_map_entry_create(new_map);
 			*new_entry = *old_entry;
 			new_entry->eflags &= ~(MAP_ENTRY_USER_WIRED |
 			    MAP_ENTRY_IN_TRANSITION);
 			new_entry->wiring_thread = NULL;
 			new_entry->wired_count = 0;
 			if (new_entry->eflags & MAP_ENTRY_VN_WRITECNT) {
 				vnode_pager_update_writecount(object,
 				    new_entry->start, new_entry->end);
 			}
 
 			/*
 			 * Insert the entry into the new map -- we know we're
 			 * inserting at the end of the new map.
 			 */
 			vm_map_entry_link(new_map, new_map->header.prev,
 			    new_entry);
 			vmspace_map_entry_forked(vm1, vm2, new_entry);
 
 			/*
 			 * Update the physical map
 			 */
 			pmap_copy(new_map->pmap, old_map->pmap,
 			    new_entry->start,
 			    (old_entry->end - old_entry->start),
 			    old_entry->start);
 			break;
 
 		case VM_INHERIT_COPY:
 			/*
 			 * Clone the entry and link into the map.
 			 */
 			new_entry = vm_map_entry_create(new_map);
 			*new_entry = *old_entry;
 			/*
 			 * Copied entry is COW over the old object.
 			 */
 			new_entry->eflags &= ~(MAP_ENTRY_USER_WIRED |
 			    MAP_ENTRY_IN_TRANSITION | MAP_ENTRY_VN_WRITECNT);
 			new_entry->wiring_thread = NULL;
 			new_entry->wired_count = 0;
 			new_entry->object.vm_object = NULL;
 			new_entry->cred = NULL;
 			vm_map_entry_link(new_map, new_map->header.prev,
 			    new_entry);
 			vmspace_map_entry_forked(vm1, vm2, new_entry);
 			vm_map_copy_entry(old_map, new_map, old_entry,
 			    new_entry, fork_charge);
 			break;
 
 		case VM_INHERIT_ZERO:
 			/*
 			 * Create a new anonymous mapping entry modelled from
 			 * the old one.
 			 */
 			new_entry = vm_map_entry_create(new_map);
 			memset(new_entry, 0, sizeof(*new_entry));
 
 			new_entry->start = old_entry->start;
 			new_entry->end = old_entry->end;
 			new_entry->eflags = old_entry->eflags &
 			    ~(MAP_ENTRY_USER_WIRED | MAP_ENTRY_IN_TRANSITION |
 			    MAP_ENTRY_VN_WRITECNT);
 			new_entry->protection = old_entry->protection;
 			new_entry->max_protection = old_entry->max_protection;
 			new_entry->inheritance = VM_INHERIT_ZERO;
 
 			vm_map_entry_link(new_map, new_map->header.prev,
 			    new_entry);
 			vmspace_map_entry_forked(vm1, vm2, new_entry);
 
 			new_entry->cred = curthread->td_ucred;
 			crhold(new_entry->cred);
 			*fork_charge += (new_entry->end - new_entry->start);
 
 			break;
 		}
 		old_entry = old_entry->next;
 	}
 	/*
 	 * Use inlined vm_map_unlock() to postpone handling the deferred
 	 * map entries, which cannot be done until both old_map and
 	 * new_map locks are released.
 	 */
 	sx_xunlock(&old_map->lock);
 	sx_xunlock(&new_map->lock);
 	vm_map_process_deferred();
 
 	return (vm2);
 }
 
 /*
  * Create a process's stack for exec_new_vmspace().  This function is never
  * asked to wire the newly created stack.
  */
 int
 vm_map_stack(vm_map_t map, vm_offset_t addrbos, vm_size_t max_ssize,
     vm_prot_t prot, vm_prot_t max, int cow)
 {
 	vm_size_t growsize, init_ssize;
 	rlim_t vmemlim;
 	int rv;
 
 	MPASS((map->flags & MAP_WIREFUTURE) == 0);
 	growsize = sgrowsiz;
 	init_ssize = (max_ssize < growsize) ? max_ssize : growsize;
 	vm_map_lock(map);
 	vmemlim = lim_cur(curthread, RLIMIT_VMEM);
 	/* If we would blow our VMEM resource limit, no go */
 	if (map->size + init_ssize > vmemlim) {
 		rv = KERN_NO_SPACE;
 		goto out;
 	}
 	rv = vm_map_stack_locked(map, addrbos, max_ssize, growsize, prot,
 	    max, cow);
 out:
 	vm_map_unlock(map);
 	return (rv);
 }
 
 static int stack_guard_page = 1;
 SYSCTL_INT(_security_bsd, OID_AUTO, stack_guard_page, CTLFLAG_RWTUN,
     &stack_guard_page, 0,
     "Specifies the number of guard pages for a stack that grows");
 
 static int
 vm_map_stack_locked(vm_map_t map, vm_offset_t addrbos, vm_size_t max_ssize,
     vm_size_t growsize, vm_prot_t prot, vm_prot_t max, int cow)
 {
 	vm_map_entry_t new_entry, prev_entry;
 	vm_offset_t bot, gap_bot, gap_top, top;
 	vm_size_t init_ssize, sgp;
 	int orient, rv;
 
 	/*
 	 * The stack orientation is piggybacked with the cow argument.
 	 * Extract it into orient and mask the cow argument so that we
 	 * don't pass it around further.
 	 */
 	orient = cow & (MAP_STACK_GROWS_DOWN | MAP_STACK_GROWS_UP);
 	KASSERT(orient != 0, ("No stack grow direction"));
 	KASSERT(orient != (MAP_STACK_GROWS_DOWN | MAP_STACK_GROWS_UP),
 	    ("bi-dir stack"));
 
 	if (addrbos < vm_map_min(map) ||
 	    addrbos + max_ssize > vm_map_max(map) ||
 	    addrbos + max_ssize <= addrbos)
 		return (KERN_INVALID_ADDRESS);
 	sgp = (vm_size_t)stack_guard_page * PAGE_SIZE;
 	if (sgp >= max_ssize)
 		return (KERN_INVALID_ARGUMENT);
 
 	init_ssize = growsize;
 	if (max_ssize < init_ssize + sgp)
 		init_ssize = max_ssize - sgp;
 
 	/* If addr is already mapped, no go */
 	if (vm_map_lookup_entry(map, addrbos, &prev_entry))
 		return (KERN_NO_SPACE);
 
 	/*
 	 * If we can't accommodate max_ssize in the current mapping, no go.
 	 */
 	if (prev_entry->next->start < addrbos + max_ssize)
 		return (KERN_NO_SPACE);
 
 	/*
 	 * We initially map a stack of only init_ssize.  We will grow as
 	 * needed later.  Depending on the orientation of the stack (i.e.
 	 * the grow direction) we either map at the top of the range, the
 	 * bottom of the range or in the middle.
 	 *
 	 * Note: we would normally expect prot and max to be VM_PROT_ALL,
 	 * and cow to be 0.  Possibly we should eliminate these as input
 	 * parameters, and just pass these values here in the insert call.
 	 */
 	if (orient == MAP_STACK_GROWS_DOWN) {
 		bot = addrbos + max_ssize - init_ssize;
 		top = bot + init_ssize;
 		gap_bot = addrbos;
 		gap_top = bot;
 	} else /* if (orient == MAP_STACK_GROWS_UP) */ {
 		bot = addrbos;
 		top = bot + init_ssize;
 		gap_bot = top;
 		gap_top = addrbos + max_ssize;
 	}
 	rv = vm_map_insert(map, NULL, 0, bot, top, prot, max, cow);
 	if (rv != KERN_SUCCESS)
 		return (rv);
 	new_entry = prev_entry->next;
 	KASSERT(new_entry->end == top || new_entry->start == bot,
 	    ("Bad entry start/end for new stack entry"));
 	KASSERT((orient & MAP_STACK_GROWS_DOWN) == 0 ||
 	    (new_entry->eflags & MAP_ENTRY_GROWS_DOWN) != 0,
 	    ("new entry lacks MAP_ENTRY_GROWS_DOWN"));
 	KASSERT((orient & MAP_STACK_GROWS_UP) == 0 ||
 	    (new_entry->eflags & MAP_ENTRY_GROWS_UP) != 0,
 	    ("new entry lacks MAP_ENTRY_GROWS_UP"));
 	rv = vm_map_insert(map, NULL, 0, gap_bot, gap_top, VM_PROT_NONE,
 	    VM_PROT_NONE, MAP_CREATE_GUARD | (orient == MAP_STACK_GROWS_DOWN ?
 	    MAP_CREATE_STACK_GAP_DN : MAP_CREATE_STACK_GAP_UP));
 	if (rv != KERN_SUCCESS)
 		(void)vm_map_delete(map, bot, top);
 	return (rv);
 }
 
 /*
  * Attempts to grow a vm stack entry.  Returns KERN_SUCCESS if we
  * successfully grow the stack.
  */
 static int
 vm_map_growstack(vm_map_t map, vm_offset_t addr, vm_map_entry_t gap_entry)
 {
 	vm_map_entry_t stack_entry;
 	struct proc *p;
 	struct vmspace *vm;
 	struct ucred *cred;
 	vm_offset_t gap_end, gap_start, grow_start;
 	size_t grow_amount, guard, max_grow;
 	rlim_t lmemlim, stacklim, vmemlim;
 	int rv, rv1;
 	bool gap_deleted, grow_down, is_procstack;
 #ifdef notyet
 	uint64_t limit;
 #endif
 #ifdef RACCT
 	int error;
 #endif
 
 	p = curproc;
 	vm = p->p_vmspace;
 
 	/*
 	 * Disallow stack growth when the access is performed by a
 	 * debugger or AIO daemon.  The reason is that the wrong
 	 * resource limits are applied.
 	 */
 	if (map != &p->p_vmspace->vm_map || p->p_textvp == NULL)
 		return (KERN_FAILURE);
 
 	MPASS(!map->system_map);
 
 	guard = stack_guard_page * PAGE_SIZE;
 	lmemlim = lim_cur(curthread, RLIMIT_MEMLOCK);
 	stacklim = lim_cur(curthread, RLIMIT_STACK);
 	vmemlim = lim_cur(curthread, RLIMIT_VMEM);
 retry:
 	/* If addr is not in a hole for a stack grow area, no need to grow. */
 	if (gap_entry == NULL && !vm_map_lookup_entry(map, addr, &gap_entry))
 		return (KERN_FAILURE);
 	if ((gap_entry->eflags & MAP_ENTRY_GUARD) == 0)
 		return (KERN_SUCCESS);
 	if ((gap_entry->eflags & MAP_ENTRY_STACK_GAP_DN) != 0) {
 		stack_entry = gap_entry->next;
 		if ((stack_entry->eflags & MAP_ENTRY_GROWS_DOWN) == 0 ||
 		    stack_entry->start != gap_entry->end)
 			return (KERN_FAILURE);
 		grow_amount = round_page(stack_entry->start - addr);
 		grow_down = true;
 	} else if ((gap_entry->eflags & MAP_ENTRY_STACK_GAP_UP) != 0) {
 		stack_entry = gap_entry->prev;
 		if ((stack_entry->eflags & MAP_ENTRY_GROWS_UP) == 0 ||
 		    stack_entry->end != gap_entry->start)
 			return (KERN_FAILURE);
 		grow_amount = round_page(addr + 1 - stack_entry->end);
 		grow_down = false;
 	} else {
 		return (KERN_FAILURE);
 	}
 	max_grow = gap_entry->end - gap_entry->start;
 	if (guard > max_grow)
 		return (KERN_NO_SPACE);
 	max_grow -= guard;
 	if (grow_amount > max_grow)
 		return (KERN_NO_SPACE);
 
 	/*
 	 * If this is the main process stack, see if we're over the stack
 	 * limit.
 	 */
 	is_procstack = addr >= (vm_offset_t)vm->vm_maxsaddr &&
 	    addr < (vm_offset_t)p->p_sysent->sv_usrstack;
 	if (is_procstack && (ctob(vm->vm_ssize) + grow_amount > stacklim))
 		return (KERN_NO_SPACE);
 
 #ifdef RACCT
 	if (racct_enable) {
 		PROC_LOCK(p);
 		if (is_procstack && racct_set(p, RACCT_STACK,
 		    ctob(vm->vm_ssize) + grow_amount)) {
 			PROC_UNLOCK(p);
 			return (KERN_NO_SPACE);
 		}
 		PROC_UNLOCK(p);
 	}
 #endif
 
 	grow_amount = roundup(grow_amount, sgrowsiz);
 	if (grow_amount > max_grow)
 		grow_amount = max_grow;
 	if (is_procstack && (ctob(vm->vm_ssize) + grow_amount > stacklim)) {
 		grow_amount = trunc_page((vm_size_t)stacklim) -
 		    ctob(vm->vm_ssize);
 	}
 
 #ifdef notyet
 	PROC_LOCK(p);
 	limit = racct_get_available(p, RACCT_STACK);
 	PROC_UNLOCK(p);
 	if (is_procstack && (ctob(vm->vm_ssize) + grow_amount > limit))
 		grow_amount = limit - ctob(vm->vm_ssize);
 #endif
 
 	if (!old_mlock && (map->flags & MAP_WIREFUTURE) != 0) {
 		if (ptoa(pmap_wired_count(map->pmap)) + grow_amount > lmemlim) {
 			rv = KERN_NO_SPACE;
 			goto out;
 		}
 #ifdef RACCT
 		if (racct_enable) {
 			PROC_LOCK(p);
 			if (racct_set(p, RACCT_MEMLOCK,
 			    ptoa(pmap_wired_count(map->pmap)) + grow_amount)) {
 				PROC_UNLOCK(p);
 				rv = KERN_NO_SPACE;
 				goto out;
 			}
 			PROC_UNLOCK(p);
 		}
 #endif
 	}
 
 	/* If we would blow our VMEM resource limit, no go */
 	if (map->size + grow_amount > vmemlim) {
 		rv = KERN_NO_SPACE;
 		goto out;
 	}
 #ifdef RACCT
 	if (racct_enable) {
 		PROC_LOCK(p);
 		if (racct_set(p, RACCT_VMEM, map->size + grow_amount)) {
 			PROC_UNLOCK(p);
 			rv = KERN_NO_SPACE;
 			goto out;
 		}
 		PROC_UNLOCK(p);
 	}
 #endif
 
 	if (vm_map_lock_upgrade(map)) {
 		gap_entry = NULL;
 		vm_map_lock_read(map);
 		goto retry;
 	}
 
 	if (grow_down) {
 		grow_start = gap_entry->end - grow_amount;
 		if (gap_entry->start + grow_amount == gap_entry->end) {
 			gap_start = gap_entry->start;
 			gap_end = gap_entry->end;
 			vm_map_entry_delete(map, gap_entry);
 			gap_deleted = true;
 		} else {
 			MPASS(gap_entry->start < gap_entry->end - grow_amount);
 			gap_entry->end -= grow_amount;
 			vm_map_entry_resize_free(map, gap_entry);
 			gap_deleted = false;
 		}
 		rv = vm_map_insert(map, NULL, 0, grow_start,
 		    grow_start + grow_amount,
 		    stack_entry->protection, stack_entry->max_protection,
 		    MAP_STACK_GROWS_DOWN);
 		if (rv != KERN_SUCCESS) {
 			if (gap_deleted) {
 				rv1 = vm_map_insert(map, NULL, 0, gap_start,
 				    gap_end, VM_PROT_NONE, VM_PROT_NONE,
 				    MAP_CREATE_GUARD | MAP_CREATE_STACK_GAP_DN);
 				MPASS(rv1 == KERN_SUCCESS);
 			} else {
 				gap_entry->end += grow_amount;
 				vm_map_entry_resize_free(map, gap_entry);
 			}
 		}
 	} else {
 		grow_start = stack_entry->end;
 		cred = stack_entry->cred;
 		if (cred == NULL && stack_entry->object.vm_object != NULL)
 			cred = stack_entry->object.vm_object->cred;
 		if (cred != NULL && !swap_reserve_by_cred(grow_amount, cred))
 			rv = KERN_NO_SPACE;
 		/* Grow the underlying object if applicable. */
 		else if (stack_entry->object.vm_object == NULL ||
 		    vm_object_coalesce(stack_entry->object.vm_object,
 		    stack_entry->offset,
 		    (vm_size_t)(stack_entry->end - stack_entry->start),
 		    (vm_size_t)grow_amount, cred != NULL)) {
 			if (gap_entry->start + grow_amount == gap_entry->end)
 				vm_map_entry_delete(map, gap_entry);
 			else
 				gap_entry->start += grow_amount;
 			stack_entry->end += grow_amount;
 			map->size += grow_amount;
 			vm_map_entry_resize_free(map, stack_entry);
 			rv = KERN_SUCCESS;
 		} else
 			rv = KERN_FAILURE;
 	}
 	if (rv == KERN_SUCCESS && is_procstack)
 		vm->vm_ssize += btoc(grow_amount);
 
 	/*
 	 * Heed the MAP_WIREFUTURE flag if it was set for this process.
 	 */
 	if (rv == KERN_SUCCESS && (map->flags & MAP_WIREFUTURE) != 0) {
 		vm_map_unlock(map);
 		vm_map_wire(map, grow_start, grow_start + grow_amount,
 		    VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES);
 		vm_map_lock_read(map);
 	} else
 		vm_map_lock_downgrade(map);
 
 out:
 #ifdef RACCT
 	if (racct_enable && rv != KERN_SUCCESS) {
 		PROC_LOCK(p);
 		error = racct_set(p, RACCT_VMEM, map->size);
 		KASSERT(error == 0, ("decreasing RACCT_VMEM failed"));
 		if (!old_mlock) {
 			error = racct_set(p, RACCT_MEMLOCK,
 			    ptoa(pmap_wired_count(map->pmap)));
 			KASSERT(error == 0, ("decreasing RACCT_MEMLOCK failed"));
 		}
 	    	error = racct_set(p, RACCT_STACK, ctob(vm->vm_ssize));
 		KASSERT(error == 0, ("decreasing RACCT_STACK failed"));
 		PROC_UNLOCK(p);
 	}
 #endif
 
 	return (rv);
 }
 
 /*
  * Unshare the specified VM space for exec.  If other processes are
  * mapped to it, then create a new one.  The new vmspace is null.
  */
 int
 vmspace_exec(struct proc *p, vm_offset_t minuser, vm_offset_t maxuser)
 {
 	struct vmspace *oldvmspace = p->p_vmspace;
 	struct vmspace *newvmspace;
 
 	KASSERT((curthread->td_pflags & TDP_EXECVMSPC) == 0,
 	    ("vmspace_exec recursed"));
 	newvmspace = vmspace_alloc(minuser, maxuser, pmap_pinit);
 	if (newvmspace == NULL)
 		return (ENOMEM);
 	newvmspace->vm_swrss = oldvmspace->vm_swrss;
 	/*
 	 * This code is written like this for prototype purposes.  The
 	 * goal is to avoid running down the vmspace here, but let the
 	 * other process's that are still using the vmspace to finally
 	 * run it down.  Even though there is little or no chance of blocking
 	 * here, it is a good idea to keep this form for future mods.
 	 */
 	PROC_VMSPACE_LOCK(p);
 	p->p_vmspace = newvmspace;
 	PROC_VMSPACE_UNLOCK(p);
 	if (p == curthread->td_proc)
 		pmap_activate(curthread);
 	curthread->td_pflags |= TDP_EXECVMSPC;
 	return (0);
 }
 
 /*
  * Unshare the specified VM space for forcing COW.  This
  * is called by rfork, for the (RFMEM|RFPROC) == 0 case.
  */
 int
 vmspace_unshare(struct proc *p)
 {
 	struct vmspace *oldvmspace = p->p_vmspace;
 	struct vmspace *newvmspace;
 	vm_ooffset_t fork_charge;
 
 	if (oldvmspace->vm_refcnt == 1)
 		return (0);
 	fork_charge = 0;
 	newvmspace = vmspace_fork(oldvmspace, &fork_charge);
 	if (newvmspace == NULL)
 		return (ENOMEM);
 	if (!swap_reserve_by_cred(fork_charge, p->p_ucred)) {
 		vmspace_free(newvmspace);
 		return (ENOMEM);
 	}
 	PROC_VMSPACE_LOCK(p);
 	p->p_vmspace = newvmspace;
 	PROC_VMSPACE_UNLOCK(p);
 	if (p == curthread->td_proc)
 		pmap_activate(curthread);
 	vmspace_free(oldvmspace);
 	return (0);
 }
 
 /*
  *	vm_map_lookup:
  *
  *	Finds the VM object, offset, and
  *	protection for a given virtual address in the
  *	specified map, assuming a page fault of the
  *	type specified.
  *
  *	Leaves the map in question locked for read; return
  *	values are guaranteed until a vm_map_lookup_done
  *	call is performed.  Note that the map argument
  *	is in/out; the returned map must be used in
  *	the call to vm_map_lookup_done.
  *
  *	A handle (out_entry) is returned for use in
  *	vm_map_lookup_done, to make that fast.
  *
  *	If a lookup is requested with "write protection"
  *	specified, the map may be changed to perform virtual
  *	copying operations, although the data referenced will
  *	remain the same.
  */
 int
 vm_map_lookup(vm_map_t *var_map,		/* IN/OUT */
 	      vm_offset_t vaddr,
 	      vm_prot_t fault_typea,
 	      vm_map_entry_t *out_entry,	/* OUT */
 	      vm_object_t *object,		/* OUT */
 	      vm_pindex_t *pindex,		/* OUT */
 	      vm_prot_t *out_prot,		/* OUT */
 	      boolean_t *wired)			/* OUT */
 {
 	vm_map_entry_t entry;
 	vm_map_t map = *var_map;
 	vm_prot_t prot;
 	vm_prot_t fault_type = fault_typea;
 	vm_object_t eobject;
 	vm_size_t size;
 	struct ucred *cred;
 
 RetryLookup:
 
 	vm_map_lock_read(map);
 
 RetryLookupLocked:
 	/*
 	 * Lookup the faulting address.
 	 */
 	if (!vm_map_lookup_entry(map, vaddr, out_entry)) {
 		vm_map_unlock_read(map);
 		return (KERN_INVALID_ADDRESS);
 	}
 
 	entry = *out_entry;
 
 	/*
 	 * Handle submaps.
 	 */
 	if (entry->eflags & MAP_ENTRY_IS_SUB_MAP) {
 		vm_map_t old_map = map;
 
 		*var_map = map = entry->object.sub_map;
 		vm_map_unlock_read(old_map);
 		goto RetryLookup;
 	}
 
 	/*
 	 * Check whether this task is allowed to have this page.
 	 */
 	prot = entry->protection;
 	if ((fault_typea & VM_PROT_FAULT_LOOKUP) != 0) {
 		fault_typea &= ~VM_PROT_FAULT_LOOKUP;
 		if (prot == VM_PROT_NONE && map != kernel_map &&
 		    (entry->eflags & MAP_ENTRY_GUARD) != 0 &&
 		    (entry->eflags & (MAP_ENTRY_STACK_GAP_DN |
 		    MAP_ENTRY_STACK_GAP_UP)) != 0 &&
 		    vm_map_growstack(map, vaddr, entry) == KERN_SUCCESS)
 			goto RetryLookupLocked;
 	}
 	fault_type &= VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE;
 	if ((fault_type & prot) != fault_type || prot == VM_PROT_NONE) {
 		vm_map_unlock_read(map);
 		return (KERN_PROTECTION_FAILURE);
 	}
 	KASSERT((prot & VM_PROT_WRITE) == 0 || (entry->eflags &
 	    (MAP_ENTRY_USER_WIRED | MAP_ENTRY_NEEDS_COPY)) !=
 	    (MAP_ENTRY_USER_WIRED | MAP_ENTRY_NEEDS_COPY),
 	    ("entry %p flags %x", entry, entry->eflags));
 	if ((fault_typea & VM_PROT_COPY) != 0 &&
 	    (entry->max_protection & VM_PROT_WRITE) == 0 &&
 	    (entry->eflags & MAP_ENTRY_COW) == 0) {
 		vm_map_unlock_read(map);
 		return (KERN_PROTECTION_FAILURE);
 	}
 
 	/*
 	 * If this page is not pageable, we have to get it for all possible
 	 * accesses.
 	 */
 	*wired = (entry->wired_count != 0);
 	if (*wired)
 		fault_type = entry->protection;
 	size = entry->end - entry->start;
 	/*
 	 * If the entry was copy-on-write, we either ...
 	 */
 	if (entry->eflags & MAP_ENTRY_NEEDS_COPY) {
 		/*
 		 * If we want to write the page, we may as well handle that
 		 * now since we've got the map locked.
 		 *
 		 * If we don't need to write the page, we just demote the
 		 * permissions allowed.
 		 */
 		if ((fault_type & VM_PROT_WRITE) != 0 ||
 		    (fault_typea & VM_PROT_COPY) != 0) {
 			/*
 			 * Make a new object, and place it in the object
 			 * chain.  Note that no new references have appeared
 			 * -- one just moved from the map to the new
 			 * object.
 			 */
 			if (vm_map_lock_upgrade(map))
 				goto RetryLookup;
 
 			if (entry->cred == NULL) {
 				/*
 				 * The debugger owner is charged for
 				 * the memory.
 				 */
 				cred = curthread->td_ucred;
 				crhold(cred);
 				if (!swap_reserve_by_cred(size, cred)) {
 					crfree(cred);
 					vm_map_unlock(map);
 					return (KERN_RESOURCE_SHORTAGE);
 				}
 				entry->cred = cred;
 			}
 			vm_object_shadow(&entry->object.vm_object,
 			    &entry->offset, size);
 			entry->eflags &= ~MAP_ENTRY_NEEDS_COPY;
 			eobject = entry->object.vm_object;
 			if (eobject->cred != NULL) {
 				/*
 				 * The object was not shadowed.
 				 */
 				swap_release_by_cred(size, entry->cred);
 				crfree(entry->cred);
 				entry->cred = NULL;
 			} else if (entry->cred != NULL) {
 				VM_OBJECT_WLOCK(eobject);
 				eobject->cred = entry->cred;
 				eobject->charge = size;
 				VM_OBJECT_WUNLOCK(eobject);
 				entry->cred = NULL;
 			}
 
 			vm_map_lock_downgrade(map);
 		} else {
 			/*
 			 * We're attempting to read a copy-on-write page --
 			 * don't allow writes.
 			 */
 			prot &= ~VM_PROT_WRITE;
 		}
 	}
 
 	/*
 	 * Create an object if necessary.
 	 */
 	if (entry->object.vm_object == NULL &&
 	    !map->system_map) {
 		if (vm_map_lock_upgrade(map))
 			goto RetryLookup;
 		entry->object.vm_object = vm_object_allocate(OBJT_DEFAULT,
 		    atop(size));
 		entry->offset = 0;
 		if (entry->cred != NULL) {
 			VM_OBJECT_WLOCK(entry->object.vm_object);
 			entry->object.vm_object->cred = entry->cred;
 			entry->object.vm_object->charge = size;
 			VM_OBJECT_WUNLOCK(entry->object.vm_object);
 			entry->cred = NULL;
 		}
 		vm_map_lock_downgrade(map);
 	}
 
 	/*
 	 * Return the object/offset from this entry.  If the entry was
 	 * copy-on-write or empty, it has been fixed up.
 	 */
 	*pindex = OFF_TO_IDX((vaddr - entry->start) + entry->offset);
 	*object = entry->object.vm_object;
 
 	*out_prot = prot;
 	return (KERN_SUCCESS);
 }
 
 /*
  *	vm_map_lookup_locked:
  *
  *	Lookup the faulting address.  A version of vm_map_lookup that returns 
  *      KERN_FAILURE instead of blocking on map lock or memory allocation.
  */
 int
 vm_map_lookup_locked(vm_map_t *var_map,		/* IN/OUT */
 		     vm_offset_t vaddr,
 		     vm_prot_t fault_typea,
 		     vm_map_entry_t *out_entry,	/* OUT */
 		     vm_object_t *object,	/* OUT */
 		     vm_pindex_t *pindex,	/* OUT */
 		     vm_prot_t *out_prot,	/* OUT */
 		     boolean_t *wired)		/* OUT */
 {
 	vm_map_entry_t entry;
 	vm_map_t map = *var_map;
 	vm_prot_t prot;
 	vm_prot_t fault_type = fault_typea;
 
 	/*
 	 * Lookup the faulting address.
 	 */
 	if (!vm_map_lookup_entry(map, vaddr, out_entry))
 		return (KERN_INVALID_ADDRESS);
 
 	entry = *out_entry;
 
 	/*
 	 * Fail if the entry refers to a submap.
 	 */
 	if (entry->eflags & MAP_ENTRY_IS_SUB_MAP)
 		return (KERN_FAILURE);
 
 	/*
 	 * Check whether this task is allowed to have this page.
 	 */
 	prot = entry->protection;
 	fault_type &= VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE;
 	if ((fault_type & prot) != fault_type)
 		return (KERN_PROTECTION_FAILURE);
 
 	/*
 	 * If this page is not pageable, we have to get it for all possible
 	 * accesses.
 	 */
 	*wired = (entry->wired_count != 0);
 	if (*wired)
 		fault_type = entry->protection;
 
 	if (entry->eflags & MAP_ENTRY_NEEDS_COPY) {
 		/*
 		 * Fail if the entry was copy-on-write for a write fault.
 		 */
 		if (fault_type & VM_PROT_WRITE)
 			return (KERN_FAILURE);
 		/*
 		 * We're attempting to read a copy-on-write page --
 		 * don't allow writes.
 		 */
 		prot &= ~VM_PROT_WRITE;
 	}
 
 	/*
 	 * Fail if an object should be created.
 	 */
 	if (entry->object.vm_object == NULL && !map->system_map)
 		return (KERN_FAILURE);
 
 	/*
 	 * Return the object/offset from this entry.  If the entry was
 	 * copy-on-write or empty, it has been fixed up.
 	 */
 	*pindex = OFF_TO_IDX((vaddr - entry->start) + entry->offset);
 	*object = entry->object.vm_object;
 
 	*out_prot = prot;
 	return (KERN_SUCCESS);
 }
 
 /*
  *	vm_map_lookup_done:
  *
  *	Releases locks acquired by a vm_map_lookup
  *	(according to the handle returned by that lookup).
  */
 void
 vm_map_lookup_done(vm_map_t map, vm_map_entry_t entry)
 {
 	/*
 	 * Unlock the main-level map
 	 */
 	vm_map_unlock_read(map);
 }
 
 vm_offset_t
 vm_map_max_KBI(const struct vm_map *map)
 {
 
 	return (vm_map_max(map));
 }
 
 vm_offset_t
 vm_map_min_KBI(const struct vm_map *map)
 {
 
 	return (vm_map_min(map));
 }
 
 pmap_t
 vm_map_pmap_KBI(vm_map_t map)
 {
 
 	return (map->pmap);
 }
 
 #include "opt_ddb.h"
 #ifdef DDB
 #include <sys/kernel.h>
 
 #include <ddb/ddb.h>
 
 static void
 vm_map_print(vm_map_t map)
 {
 	vm_map_entry_t entry;
 
 	db_iprintf("Task map %p: pmap=%p, nentries=%d, version=%u\n",
 	    (void *)map,
 	    (void *)map->pmap, map->nentries, map->timestamp);
 
 	db_indent += 2;
 	for (entry = map->header.next; entry != &map->header;
 	    entry = entry->next) {
 		db_iprintf("map entry %p: start=%p, end=%p, eflags=%#x, \n",
 		    (void *)entry, (void *)entry->start, (void *)entry->end,
 		    entry->eflags);
 		{
 			static char *inheritance_name[4] =
 			{"share", "copy", "none", "donate_copy"};
 
 			db_iprintf(" prot=%x/%x/%s",
 			    entry->protection,
 			    entry->max_protection,
 			    inheritance_name[(int)(unsigned char)entry->inheritance]);
 			if (entry->wired_count != 0)
 				db_printf(", wired");
 		}
 		if (entry->eflags & MAP_ENTRY_IS_SUB_MAP) {
 			db_printf(", share=%p, offset=0x%jx\n",
 			    (void *)entry->object.sub_map,
 			    (uintmax_t)entry->offset);
 			if ((entry->prev == &map->header) ||
 			    (entry->prev->object.sub_map !=
 				entry->object.sub_map)) {
 				db_indent += 2;
 				vm_map_print((vm_map_t)entry->object.sub_map);
 				db_indent -= 2;
 			}
 		} else {
 			if (entry->cred != NULL)
 				db_printf(", ruid %d", entry->cred->cr_ruid);
 			db_printf(", object=%p, offset=0x%jx",
 			    (void *)entry->object.vm_object,
 			    (uintmax_t)entry->offset);
 			if (entry->object.vm_object && entry->object.vm_object->cred)
 				db_printf(", obj ruid %d charge %jx",
 				    entry->object.vm_object->cred->cr_ruid,
 				    (uintmax_t)entry->object.vm_object->charge);
 			if (entry->eflags & MAP_ENTRY_COW)
 				db_printf(", copy (%s)",
 				    (entry->eflags & MAP_ENTRY_NEEDS_COPY) ? "needed" : "done");
 			db_printf("\n");
 
 			if ((entry->prev == &map->header) ||
 			    (entry->prev->object.vm_object !=
 				entry->object.vm_object)) {
 				db_indent += 2;
 				vm_object_print((db_expr_t)(intptr_t)
 						entry->object.vm_object,
 						0, 0, (char *)0);
 				db_indent -= 2;
 			}
 		}
 	}
 	db_indent -= 2;
 }
 
 DB_SHOW_COMMAND(map, map)
 {
 
 	if (!have_addr) {
 		db_printf("usage: show map <addr>\n");
 		return;
 	}
 	vm_map_print((vm_map_t)addr);
 }
 
 DB_SHOW_COMMAND(procvm, procvm)
 {
 	struct proc *p;
 
 	if (have_addr) {
 		p = db_lookup_proc(addr);
 	} else {
 		p = curproc;
 	}
 
 	db_printf("p = %p, vmspace = %p, map = %p, pmap = %p\n",
 	    (void *)p, (void *)p->p_vmspace, (void *)&p->p_vmspace->vm_map,
 	    (void *)vmspace_pmap(p->p_vmspace));
 
 	vm_map_print((vm_map_t)&p->p_vmspace->vm_map);
 }
 
 #endif /* DDB */
Index: head/sys/vm/vm_map.h
===================================================================
--- head/sys/vm/vm_map.h	(revision 343963)
+++ head/sys/vm/vm_map.h	(revision 343964)
@@ -1,425 +1,429 @@
 /*-
  * SPDX-License-Identifier: (BSD-3-Clause AND MIT-CMU)
  *
  * Copyright (c) 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * The Mach Operating System project at Carnegie-Mellon University.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)vm_map.h	8.9 (Berkeley) 5/17/95
  *
  *
  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
  * All rights reserved.
  *
  * Authors: Avadis Tevanian, Jr., Michael Wayne Young
  *
  * Permission to use, copy, modify and distribute this software and
  * its documentation is hereby granted, provided that both the copyright
  * notice and this permission notice appear in all copies of the
  * software, derivative works or modified versions, and any portions
  * thereof, and that both notices appear in supporting documentation.
  *
  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  *
  * Carnegie Mellon requests users of this software to return to
  *
  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  *  School of Computer Science
  *  Carnegie Mellon University
  *  Pittsburgh PA 15213-3890
  *
  * any improvements or extensions that they make and grant Carnegie the
  * rights to redistribute these changes.
  *
  * $FreeBSD$
  */
 
 /*
  *	Virtual memory map module definitions.
  */
 #ifndef	_VM_MAP_
 #define	_VM_MAP_
 
 #include <sys/lock.h>
 #include <sys/sx.h>
 #include <sys/_mutex.h>
 
 /*
  *	Types defined:
  *
  *	vm_map_t		the high-level address map data structure.
  *	vm_map_entry_t		an entry in an address map.
  */
 
 typedef u_char vm_flags_t;
 typedef u_int vm_eflags_t;
 
 /*
  *	Objects which live in maps may be either VM objects, or
  *	another map (called a "sharing map") which denotes read-write
  *	sharing with other maps.
  */
 union vm_map_object {
 	struct vm_object *vm_object;	/* object object */
 	struct vm_map *sub_map;		/* belongs to another map */
 };
 
 /*
  *	Address map entries consist of start and end addresses,
  *	a VM object (or sharing map) and offset into that object,
  *	and user-exported inheritance and protection information.
  *	Also included is control information for virtual copy operations.
  */
 struct vm_map_entry {
 	struct vm_map_entry *prev;	/* previous entry */
 	struct vm_map_entry *next;	/* next entry */
 	struct vm_map_entry *left;	/* left child in binary search tree */
 	struct vm_map_entry *right;	/* right child in binary search tree */
 	vm_offset_t start;		/* start address */
 	vm_offset_t end;		/* end address */
 	vm_offset_t next_read;		/* vaddr of the next sequential read */
 	vm_size_t adj_free;		/* amount of adjacent free space */
 	vm_size_t max_free;		/* max free space in subtree */
 	union vm_map_object object;	/* object I point to */
 	vm_ooffset_t offset;		/* offset into object */
 	vm_eflags_t eflags;		/* map entry flags */
 	vm_prot_t protection;		/* protection code */
 	vm_prot_t max_protection;	/* maximum protection */
 	vm_inherit_t inheritance;	/* inheritance */
 	uint8_t read_ahead;		/* pages in the read-ahead window */
 	int wired_count;		/* can be paged if = 0 */
 	struct ucred *cred;		/* tmp storage for creator ref */
 	struct thread *wiring_thread;
 };
 
 #define MAP_ENTRY_NOSYNC		0x0001
 #define MAP_ENTRY_IS_SUB_MAP		0x0002
 #define MAP_ENTRY_COW			0x0004
 #define MAP_ENTRY_NEEDS_COPY		0x0008
 #define MAP_ENTRY_NOFAULT		0x0010
 #define MAP_ENTRY_USER_WIRED		0x0020
 
 #define MAP_ENTRY_BEHAV_NORMAL		0x0000	/* default behavior */
 #define MAP_ENTRY_BEHAV_SEQUENTIAL	0x0040	/* expect sequential access */
 #define MAP_ENTRY_BEHAV_RANDOM		0x0080	/* expect random access */
 #define MAP_ENTRY_BEHAV_RESERVED	0x00C0	/* future use */
 
 #define MAP_ENTRY_BEHAV_MASK		0x00C0
 
 #define MAP_ENTRY_IN_TRANSITION		0x0100	/* entry being changed */
 #define MAP_ENTRY_NEEDS_WAKEUP		0x0200	/* waiters in transition */
 #define MAP_ENTRY_NOCOREDUMP		0x0400	/* don't include in a core */
 
 #define	MAP_ENTRY_GROWS_DOWN		0x1000	/* Top-down stacks */
 #define	MAP_ENTRY_GROWS_UP		0x2000	/* Bottom-up stacks */
 
 #define	MAP_ENTRY_WIRE_SKIPPED		0x4000
 #define	MAP_ENTRY_VN_WRITECNT		0x8000	/* writeable vnode mapping */
 #define	MAP_ENTRY_GUARD			0x10000
 #define	MAP_ENTRY_STACK_GAP_DN		0x20000
 #define	MAP_ENTRY_STACK_GAP_UP		0x40000
 #define	MAP_ENTRY_HEADER		0x80000
 
 #ifdef	_KERNEL
 static __inline u_char
 vm_map_entry_behavior(vm_map_entry_t entry)
 {
 	return (entry->eflags & MAP_ENTRY_BEHAV_MASK);
 }
 
 static __inline int
 vm_map_entry_user_wired_count(vm_map_entry_t entry)
 {
 	if (entry->eflags & MAP_ENTRY_USER_WIRED)
 		return (1);
 	return (0);
 }
 
 static __inline int
 vm_map_entry_system_wired_count(vm_map_entry_t entry)
 {
 	return (entry->wired_count - vm_map_entry_user_wired_count(entry));
 }
 #endif	/* _KERNEL */
 
 /*
  *	A map is a set of map entries.  These map entries are
  *	organized both as a binary search tree and as a doubly-linked
  *	list.  Both structures are ordered based upon the start and
  *	end addresses contained within each map entry.
  *
  *	Sleator and Tarjan's top-down splay algorithm is employed to
  *	control height imbalance in the binary search tree.
  *
  *	The map's min offset value is stored in map->header.end, and
  *	its max offset value is stored in map->header.start.  These
  *	values act as sentinels for any forward or backward address
  *	scan of the list.  The map header has a special value for the
  *	eflags field, MAP_ENTRY_HEADER, that is set initially, is
  *	never changed, and prevents an eflags match of the header
  *	with any other map entry.
  *
  *	List of locks
  *	(c)	const until freed
  */
 struct vm_map {
 	struct vm_map_entry header;	/* List of entries */
 	struct sx lock;			/* Lock for map data */
 	struct mtx system_mtx;
 	int nentries;			/* Number of entries */
 	vm_size_t size;			/* virtual size */
 	u_int timestamp;		/* Version number */
 	u_char needs_wakeup;
 	u_char system_map;		/* (c) Am I a system map? */
 	vm_flags_t flags;		/* flags for this vm_map */
 	vm_map_entry_t root;		/* Root of a binary search tree */
 	pmap_t pmap;			/* (c) Physical map */
+	vm_offset_t anon_loc;
 	int busy;
 };
 
 /*
  * vm_flags_t values
  */
 #define MAP_WIREFUTURE		0x01	/* wire all future pages */
 #define	MAP_BUSY_WAKEUP		0x02
+#define	MAP_IS_SUB_MAP		0x04	/* has parent */
+#define	MAP_ASLR		0x08	/* enabled ASLR */
+#define	MAP_ASLR_IGNSTART	0x10
 
 #ifdef	_KERNEL
 #if defined(KLD_MODULE) && !defined(KLD_TIED)
 #define	vm_map_max(map)		vm_map_max_KBI((map))
 #define	vm_map_min(map)		vm_map_min_KBI((map))
 #define	vm_map_pmap(map)	vm_map_pmap_KBI((map))
 #else
 static __inline vm_offset_t
 vm_map_max(const struct vm_map *map)
 {
 
 	return (map->header.start);
 }
 
 static __inline vm_offset_t
 vm_map_min(const struct vm_map *map)
 {
 
 	return (map->header.end);
 }
 
 static __inline pmap_t
 vm_map_pmap(vm_map_t map)
 {
 	return (map->pmap);
 }
 
 static __inline void
 vm_map_modflags(vm_map_t map, vm_flags_t set, vm_flags_t clear)
 {
 	map->flags = (map->flags | set) & ~clear;
 }
 #endif	/* KLD_MODULE */
 #endif	/* _KERNEL */
 
 /*
  * Shareable process virtual address space.
  *
  * List of locks
  *	(c)	const until freed
  */
 struct vmspace {
 	struct vm_map vm_map;	/* VM address map */
 	struct shmmap_state *vm_shm;	/* SYS5 shared memory private data XXX */
 	segsz_t vm_swrss;	/* resident set size before last swap */
 	segsz_t vm_tsize;	/* text size (pages) XXX */
 	segsz_t vm_dsize;	/* data size (pages) XXX */
 	segsz_t vm_ssize;	/* stack size (pages) */
 	caddr_t vm_taddr;	/* (c) user virtual address of text */
 	caddr_t vm_daddr;	/* (c) user virtual address of data */
 	caddr_t vm_maxsaddr;	/* user VA at max stack growth */
 	volatile int vm_refcnt;	/* number of references */
 	/*
 	 * Keep the PMAP last, so that CPU-specific variations of that
 	 * structure on a single architecture don't result in offset
 	 * variations of the machine-independent fields in the vmspace.
 	 */
 	struct pmap vm_pmap;	/* private physical map */
 };
 
 #ifdef	_KERNEL
 static __inline pmap_t
 vmspace_pmap(struct vmspace *vmspace)
 {
 	return &vmspace->vm_pmap;
 }
 #endif	/* _KERNEL */
 
 #ifdef	_KERNEL
 /*
  *	Macros:		vm_map_lock, etc.
  *	Function:
  *		Perform locking on the data portion of a map.  Note that
  *		these macros mimic procedure calls returning void.  The
  *		semicolon is supplied by the user of these macros, not
  *		by the macros themselves.  The macros can safely be used
  *		as unbraced elements in a higher level statement.
  */
 
 void _vm_map_lock(vm_map_t map, const char *file, int line);
 void _vm_map_unlock(vm_map_t map, const char *file, int line);
 int _vm_map_unlock_and_wait(vm_map_t map, int timo, const char *file, int line);
 void _vm_map_lock_read(vm_map_t map, const char *file, int line);
 void _vm_map_unlock_read(vm_map_t map, const char *file, int line);
 int _vm_map_trylock(vm_map_t map, const char *file, int line);
 int _vm_map_trylock_read(vm_map_t map, const char *file, int line);
 int _vm_map_lock_upgrade(vm_map_t map, const char *file, int line);
 void _vm_map_lock_downgrade(vm_map_t map, const char *file, int line);
 int vm_map_locked(vm_map_t map);
 void vm_map_wakeup(vm_map_t map);
 void vm_map_busy(vm_map_t map);
 void vm_map_unbusy(vm_map_t map);
 void vm_map_wait_busy(vm_map_t map);
 vm_offset_t vm_map_max_KBI(const struct vm_map *map);
 vm_offset_t vm_map_min_KBI(const struct vm_map *map);
 pmap_t vm_map_pmap_KBI(vm_map_t map);
 
 #define	vm_map_lock(map)	_vm_map_lock(map, LOCK_FILE, LOCK_LINE)
 #define	vm_map_unlock(map)	_vm_map_unlock(map, LOCK_FILE, LOCK_LINE)
 #define	vm_map_unlock_and_wait(map, timo)	\
 			_vm_map_unlock_and_wait(map, timo, LOCK_FILE, LOCK_LINE)
 #define	vm_map_lock_read(map)	_vm_map_lock_read(map, LOCK_FILE, LOCK_LINE)
 #define	vm_map_unlock_read(map)	_vm_map_unlock_read(map, LOCK_FILE, LOCK_LINE)
 #define	vm_map_trylock(map)	_vm_map_trylock(map, LOCK_FILE, LOCK_LINE)
 #define	vm_map_trylock_read(map)	\
 			_vm_map_trylock_read(map, LOCK_FILE, LOCK_LINE)
 #define	vm_map_lock_upgrade(map)	\
 			_vm_map_lock_upgrade(map, LOCK_FILE, LOCK_LINE)
 #define	vm_map_lock_downgrade(map)	\
 			_vm_map_lock_downgrade(map, LOCK_FILE, LOCK_LINE)
 
 long vmspace_resident_count(struct vmspace *vmspace);
 #endif	/* _KERNEL */
 
 
 /* XXX: number of kernel maps to statically allocate */
 #define MAX_KMAP	10
 
 /*
  * Copy-on-write flags for vm_map operations
  */
 #define MAP_INHERIT_SHARE	0x0001
 #define MAP_COPY_ON_WRITE	0x0002
 #define MAP_NOFAULT		0x0004
 #define MAP_PREFAULT		0x0008
 #define MAP_PREFAULT_PARTIAL	0x0010
 #define MAP_DISABLE_SYNCER	0x0020
 #define	MAP_CHECK_EXCL		0x0040
 #define	MAP_CREATE_GUARD	0x0080
 #define MAP_DISABLE_COREDUMP	0x0100
 #define MAP_PREFAULT_MADVISE	0x0200	/* from (user) madvise request */
 #define	MAP_VN_WRITECOUNT	0x0400
 #define	MAP_REMAP		0x0800
 #define	MAP_STACK_GROWS_DOWN	0x1000
 #define	MAP_STACK_GROWS_UP	0x2000
 #define	MAP_ACC_CHARGED		0x4000
 #define	MAP_ACC_NO_CHARGE	0x8000
 #define	MAP_CREATE_STACK_GAP_UP	0x10000
 #define	MAP_CREATE_STACK_GAP_DN	0x20000
 
 /*
  * vm_fault option flags
  */
 #define	VM_FAULT_NORMAL	0		/* Nothing special */
 #define	VM_FAULT_WIRE	1		/* Wire the mapped page */
 #define	VM_FAULT_DIRTY	2		/* Dirty the page; use w/VM_PROT_COPY */
 
 /*
  * Initially, mappings are slightly sequential.  The maximum window size must
  * account for the map entry's "read_ahead" field being defined as an uint8_t.
  */
 #define	VM_FAULT_READ_AHEAD_MIN		7
 #define	VM_FAULT_READ_AHEAD_INIT	15
 #define	VM_FAULT_READ_AHEAD_MAX		min(atop(MAXPHYS) - 1, UINT8_MAX)
 
 /*
  * The following "find_space" options are supported by vm_map_find().
  *
  * For VMFS_ALIGNED_SPACE, the desired alignment is specified to
  * the macro argument as log base 2 of the desired alignment.
  */
 #define	VMFS_NO_SPACE		0	/* don't find; use the given range */
 #define	VMFS_ANY_SPACE		1	/* find a range with any alignment */
 #define	VMFS_OPTIMAL_SPACE	2	/* find a range with optimal alignment*/
 #define	VMFS_SUPER_SPACE	3	/* find a superpage-aligned range */
 #define	VMFS_ALIGNED_SPACE(x)	((x) << 8) /* find a range with fixed alignment */
 
 /*
  * vm_map_wire and vm_map_unwire option flags
  */
 #define VM_MAP_WIRE_SYSTEM	0	/* wiring in a kernel map */
 #define VM_MAP_WIRE_USER	1	/* wiring in a user map */
 
 #define VM_MAP_WIRE_NOHOLES	0	/* region must not have holes */
 #define VM_MAP_WIRE_HOLESOK	2	/* region may have holes */
 
 #define VM_MAP_WIRE_WRITE	4	/* Validate writable. */
 
 #ifdef _KERNEL
 boolean_t vm_map_check_protection (vm_map_t, vm_offset_t, vm_offset_t, vm_prot_t);
 vm_map_t vm_map_create(pmap_t, vm_offset_t, vm_offset_t);
 int vm_map_delete(vm_map_t, vm_offset_t, vm_offset_t);
 int vm_map_find(vm_map_t, vm_object_t, vm_ooffset_t, vm_offset_t *, vm_size_t,
     vm_offset_t, int, vm_prot_t, vm_prot_t, int);
 int vm_map_find_min(vm_map_t, vm_object_t, vm_ooffset_t, vm_offset_t *,
     vm_size_t, vm_offset_t, vm_offset_t, int, vm_prot_t, vm_prot_t, int);
 int vm_map_fixed(vm_map_t, vm_object_t, vm_ooffset_t, vm_offset_t, vm_size_t,
     vm_prot_t, vm_prot_t, int);
 int vm_map_findspace (vm_map_t, vm_offset_t, vm_size_t, vm_offset_t *);
 int vm_map_inherit (vm_map_t, vm_offset_t, vm_offset_t, vm_inherit_t);
 void vm_map_init(vm_map_t, pmap_t, vm_offset_t, vm_offset_t);
 int vm_map_insert (vm_map_t, vm_object_t, vm_ooffset_t, vm_offset_t, vm_offset_t, vm_prot_t, vm_prot_t, int);
 int vm_map_lookup (vm_map_t *, vm_offset_t, vm_prot_t, vm_map_entry_t *, vm_object_t *,
     vm_pindex_t *, vm_prot_t *, boolean_t *);
 int vm_map_lookup_locked(vm_map_t *, vm_offset_t, vm_prot_t, vm_map_entry_t *, vm_object_t *,
     vm_pindex_t *, vm_prot_t *, boolean_t *);
 void vm_map_lookup_done (vm_map_t, vm_map_entry_t);
 boolean_t vm_map_lookup_entry (vm_map_t, vm_offset_t, vm_map_entry_t *);
 int vm_map_protect (vm_map_t, vm_offset_t, vm_offset_t, vm_prot_t, boolean_t);
 int vm_map_remove (vm_map_t, vm_offset_t, vm_offset_t);
 void vm_map_simplify_entry(vm_map_t map, vm_map_entry_t entry);
 void vm_map_startup (void);
 int vm_map_submap (vm_map_t, vm_offset_t, vm_offset_t, vm_map_t);
 int vm_map_sync(vm_map_t, vm_offset_t, vm_offset_t, boolean_t, boolean_t);
 int vm_map_madvise (vm_map_t, vm_offset_t, vm_offset_t, int);
 int vm_map_stack (vm_map_t, vm_offset_t, vm_size_t, vm_prot_t, vm_prot_t, int);
 int vm_map_unwire(vm_map_t map, vm_offset_t start, vm_offset_t end,
     int flags);
 int vm_map_wire(vm_map_t map, vm_offset_t start, vm_offset_t end,
     int flags);
 long vmspace_swap_count(struct vmspace *vmspace);
 #endif				/* _KERNEL */
 #endif				/* _VM_MAP_ */
Index: head/usr.bin/proccontrol/proccontrol.c
===================================================================
--- head/usr.bin/proccontrol/proccontrol.c	(revision 343963)
+++ head/usr.bin/proccontrol/proccontrol.c	(revision 343964)
@@ -1,180 +1,208 @@
 /*-
  * Copyright (c) 2016 The FreeBSD Foundation
  * All rights reserved.
  *
  * This software was developed by Konstantin Belousov <kib@FreeBSD.org>
  * under sponsorship from the FreeBSD Foundation.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/procctl.h>
 #include <err.h>
 #include <stdbool.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
 
 enum {
+	MODE_ASLR,
 	MODE_INVALID,
 	MODE_TRACE,
 	MODE_TRAPCAP,
 };
 
 static pid_t
 str2pid(const char *str)
 {
 	pid_t res;
 	char *tail;
 
 	res = strtol(str, &tail, 0);
 	if (*tail != '\0') {
 		warnx("non-numeric pid");
 		return (-1);
 	}
 	return (res);
 }
 
 static void __dead2
 usage(void)
 {
 
-	fprintf(stderr, "Usage: proccontrol -m (trace|trapcap) [-q] "
+	fprintf(stderr, "Usage: proccontrol -m (aslr|trace|trapcap) [-q] "
 	    "[-s (enable|disable)] [-p pid | command]\n");
 	exit(1);
 }
 
 int
 main(int argc, char *argv[])
 {
 	int arg, ch, error, mode;
 	pid_t pid;
 	bool enable, do_command, query;
 
 	mode = MODE_INVALID;
 	enable = true;
 	pid = -1;
 	query = false;
 	while ((ch = getopt(argc, argv, "m:qs:p:")) != -1) {
 		switch (ch) {
 		case 'm':
-			if (strcmp(optarg, "trace") == 0)
+			if (strcmp(optarg, "aslr") == 0)
+				mode = MODE_ASLR;
+			else if (strcmp(optarg, "trace") == 0)
 				mode = MODE_TRACE;
 			else if (strcmp(optarg, "trapcap") == 0)
 				mode = MODE_TRAPCAP;
 			else
 				usage();
 			break;
 		case 's':
 			if (strcmp(optarg, "enable") == 0)
 				enable = true;
 			else if (strcmp(optarg, "disable") == 0)
 				enable = false;
 			else
 				usage();
 			break;
 		case 'p':
 			pid = str2pid(optarg);
 			break;
 		case 'q':
 			query = true;
 			break;
 		case '?':
 		default:
 			usage();
 			break;
 		}
 	}
 	argc -= optind;
 	argv += optind;
 	do_command = argc != 0;
 	if (do_command) {
 		if (pid != -1 || query)
 			usage();
 		pid = getpid();
 	} else if (pid == -1) {
 		pid = getpid();
 	}
 
 	if (query) {
 		switch (mode) {
+		case MODE_ASLR:
+			error = procctl(P_PID, pid, PROC_ASLR_STATUS, &arg);
+			break;
 		case MODE_TRACE:
 			error = procctl(P_PID, pid, PROC_TRACE_STATUS, &arg);
 			break;
 		case MODE_TRAPCAP:
 			error = procctl(P_PID, pid, PROC_TRAPCAP_STATUS, &arg);
 			break;
 		default:
 			usage();
 			break;
 		}
 		if (error != 0)
 			err(1, "procctl status");
 		switch (mode) {
+		case MODE_ASLR:
+			switch (arg & ~PROC_ASLR_ACTIVE) {
+			case PROC_ASLR_FORCE_ENABLE:
+				printf("force enabled");
+				break;
+			case PROC_ASLR_FORCE_DISABLE:
+				printf("force disabled");
+				break;
+			case PROC_ASLR_NOFORCE:
+				printf("not forced");
+				break;
+			}
+			if ((arg & PROC_ASLR_ACTIVE) != 0)
+				printf(", active\n");
+			else
+				printf(", not active\n");
+			break;
 		case MODE_TRACE:
 			if (arg == -1)
 				printf("disabled\n");
 			else if (arg == 0)
 				printf("enabled, no debugger\n");
 			else
 				printf("enabled, traced by %d\n", arg);
 			break;
 		case MODE_TRAPCAP:
 			switch (arg) {
 			case PROC_TRAPCAP_CTL_ENABLE:
 				printf("enabled\n");
 				break;
 			case PROC_TRAPCAP_CTL_DISABLE:
 				printf("disabled\n");
 				break;
 			}
 			break;
 		}
 	} else {
 		switch (mode) {
+		case MODE_ASLR:
+			arg = enable ? PROC_ASLR_FORCE_ENABLE :
+			    PROC_ASLR_FORCE_DISABLE;
+			error = procctl(P_PID, pid, PROC_ASLR_CTL, &arg);
+			break;
 		case MODE_TRACE:
 			arg = enable ? PROC_TRACE_CTL_ENABLE :
 			    PROC_TRACE_CTL_DISABLE;
 			error = procctl(P_PID, pid, PROC_TRACE_CTL, &arg);
 			break;
 		case MODE_TRAPCAP:
 			arg = enable ? PROC_TRAPCAP_CTL_ENABLE :
 			    PROC_TRAPCAP_CTL_DISABLE;
 			error = procctl(P_PID, pid, PROC_TRAPCAP_CTL, &arg);
 			break;
 		default:
 			usage();
 			break;
 		}
 		if (error != 0)
 			err(1, "procctl ctl");
 		if (do_command) {
 			error = execvp(argv[0], argv);
 			err(1, "exec");
 		}
 	}
 	exit(0);
 }