diff --git a/sys/amd64/amd64/db_trace.c b/sys/amd64/amd64/db_trace.c
index d2682afec8b0..90fcce5732d1 100644
--- a/sys/amd64/amd64/db_trace.c
+++ b/sys/amd64/amd64/db_trace.c
@@ -1,393 +1,393 @@
 /*-
  * Mach Operating System
  * Copyright (c) 1991,1990 Carnegie Mellon University
  * All Rights Reserved.
  *
  * Permission to use, copy, modify and distribute this software and its
  * documentation is hereby granted, provided that both the copyright
  * notice and this permission notice appear in all copies of the
  * software, derivative works or modified versions, and any portions
  * thereof, and that both notices appear in supporting documentation.
  *
  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS
  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  *
  * Carnegie Mellon requests users of this software to return to
  *
  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  *  School of Computer Science
  *  Carnegie Mellon University
  *  Pittsburgh PA 15213-3890
  *
  * any improvements or extensions that they make and grant Carnegie the
  * rights to redistribute these changes.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kdb.h>
 #include <sys/proc.h>
+#include <sys/reg.h>
 #include <sys/smp.h>
 #include <sys/stack.h>
 #include <sys/sysent.h>
 
 #include <machine/cpu.h>
 #include <machine/md_var.h>
 #include <machine/pcb.h>
-#include <machine/reg.h>
 #include <machine/stack.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/pmap.h>
 
 #include <ddb/ddb.h>
 #include <ddb/db_access.h>
 #include <ddb/db_sym.h>
 #include <ddb/db_variables.h>
 
 static db_varfcn_t db_frame;
 static db_varfcn_t db_frame_seg;
 
 CTASSERT(sizeof(struct dbreg) == sizeof(((struct pcpu *)NULL)->pc_dbreg));
 
 /*
  * Machine register set.
  */
 #define	DB_OFFSET(x)	(db_expr_t *)offsetof(struct trapframe, x)
 struct db_variable db_regs[] = {
 	{ "cs",		DB_OFFSET(tf_cs),	db_frame_seg },
 	{ "ds",		DB_OFFSET(tf_ds),	db_frame_seg },
 	{ "es",		DB_OFFSET(tf_es),	db_frame_seg },
 	{ "fs",		DB_OFFSET(tf_fs),	db_frame_seg },
 	{ "gs",		DB_OFFSET(tf_gs),	db_frame_seg },
 	{ "ss",		DB_OFFSET(tf_ss),	db_frame_seg },
 	{ "rax",	DB_OFFSET(tf_rax),	db_frame },
 	{ "rcx",        DB_OFFSET(tf_rcx),	db_frame },
 	{ "rdx",	DB_OFFSET(tf_rdx),	db_frame },
 	{ "rbx",	DB_OFFSET(tf_rbx),	db_frame },
 	{ "rsp",	DB_OFFSET(tf_rsp),	db_frame },
 	{ "rbp",	DB_OFFSET(tf_rbp),	db_frame },
 	{ "rsi",	DB_OFFSET(tf_rsi),	db_frame },
 	{ "rdi",	DB_OFFSET(tf_rdi),	db_frame },
 	{ "r8",		DB_OFFSET(tf_r8),	db_frame },
 	{ "r9",		DB_OFFSET(tf_r9),	db_frame },
 	{ "r10",	DB_OFFSET(tf_r10),	db_frame },
 	{ "r11",	DB_OFFSET(tf_r11),	db_frame },
 	{ "r12",	DB_OFFSET(tf_r12),	db_frame },
 	{ "r13",	DB_OFFSET(tf_r13),	db_frame },
 	{ "r14",	DB_OFFSET(tf_r14),	db_frame },
 	{ "r15",	DB_OFFSET(tf_r15),	db_frame },
 	{ "rip",	DB_OFFSET(tf_rip),	db_frame },
 	{ "rflags",	DB_OFFSET(tf_rflags),	db_frame },
 };
 struct db_variable *db_eregs = db_regs + nitems(db_regs);
 
 static int
 db_frame_seg(struct db_variable *vp, db_expr_t *valuep, int op)
 {
 	uint16_t *reg;
 
 	if (kdb_frame == NULL)
 		return (0);
 
 	reg = (uint16_t *)((uintptr_t)kdb_frame + (db_expr_t)vp->valuep);
 	if (op == DB_VAR_GET)
 		*valuep = *reg;
 	else
 		*reg = *valuep;
 	return (1);
 }
 
 static int
 db_frame(struct db_variable *vp, db_expr_t *valuep, int op)
 {
 	long *reg;
 
 	if (kdb_frame == NULL)
 		return (0);
 
 	reg = (long *)((uintptr_t)kdb_frame + (db_expr_t)vp->valuep);
 	if (op == DB_VAR_GET)
 		*valuep = *reg;
 	else
 		*reg = *valuep;
 	return (1);
 }
 
 #define NORMAL		0
 #define	TRAP		1
 #define	INTERRUPT	2
 #define	SYSCALL		3
 
 static void db_nextframe(struct amd64_frame **, db_addr_t *, struct thread *);
 static void db_print_stack_entry(const char *, db_addr_t, void *);
 static void decode_syscall(int, struct thread *);
 
 static void
 db_print_stack_entry(const char *name, db_addr_t callpc, void *frame)
 {
 
 	db_printf("%s() at ", name != NULL ? name : "??");
 	db_printsym(callpc, DB_STGY_PROC);
 	if (frame != NULL)
 		db_printf("/frame 0x%lx", (register_t)frame);
 	db_printf("\n");
 }
 
 static void
 decode_syscall(int number, struct thread *td)
 {
 	struct proc *p;
 	c_db_sym_t sym;
 	db_expr_t diff;
 	sy_call_t *f;
 	const char *symname;
 
 	db_printf(" (%d", number);
 	p = (td != NULL) ? td->td_proc : NULL;
 	if (p != NULL && 0 <= number && number < p->p_sysent->sv_size) {
 		f = p->p_sysent->sv_table[number].sy_call;
 		sym = db_search_symbol((db_addr_t)f, DB_STGY_ANY, &diff);
 		if (sym != DB_SYM_NULL && diff == 0) {
 			db_symbol_values(sym, &symname, NULL);
 			db_printf(", %s, %s", p->p_sysent->sv_name, symname);
 		}
 	}
 	db_printf(")");
 }
 
 /*
  * Figure out the next frame up in the call stack.
  */
 static void
 db_nextframe(struct amd64_frame **fp, db_addr_t *ip, struct thread *td)
 {
 	struct trapframe *tf;
 	int frame_type;
 	long rip, rsp, rbp;
 	db_expr_t offset;
 	c_db_sym_t sym;
 	const char *name;
 
 	rip = db_get_value((long) &(*fp)->f_retaddr, 8, FALSE);
 	rbp = db_get_value((long) &(*fp)->f_frame, 8, FALSE);
 
 	/*
 	 * Figure out frame type.  We look at the address just before
 	 * the saved instruction pointer as the saved EIP is after the
 	 * call function, and if the function being called is marked as
 	 * dead (such as panic() at the end of dblfault_handler()), then
 	 * the instruction at the saved EIP will be part of a different
 	 * function (syscall() in this example) rather than the one that
 	 * actually made the call.
 	 */
 	frame_type = NORMAL;
 	sym = db_search_symbol(rip - 1, DB_STGY_ANY, &offset);
 	db_symbol_values(sym, &name, NULL);
 	if (name != NULL) {
 		if (strcmp(name, "calltrap") == 0 ||
 		    strcmp(name, "fork_trampoline") == 0 ||
 		    strcmp(name, "mchk_calltrap") == 0 ||
 		    strcmp(name, "nmi_calltrap") == 0 ||
 		    strcmp(name, "Xdblfault") == 0)
 			frame_type = TRAP;
 		else if (strncmp(name, "Xatpic_intr", 11) == 0 ||
 		    strncmp(name, "Xapic_isr", 9) == 0 ||
 		    strcmp(name, "Xxen_intr_upcall") == 0 ||
 		    strcmp(name, "Xtimerint") == 0 ||
 		    strcmp(name, "Xipi_intr_bitmap_handler") == 0 ||
 		    strcmp(name, "Xcpustop") == 0 ||
 		    strcmp(name, "Xcpususpend") == 0 ||
 		    strcmp(name, "Xrendezvous") == 0)
 			frame_type = INTERRUPT;
 		else if (strcmp(name, "Xfast_syscall") == 0 ||
 		    strcmp(name, "Xfast_syscall_pti") == 0 ||
 		    strcmp(name, "fast_syscall_common") == 0)
 			frame_type = SYSCALL;
 #ifdef COMPAT_FREEBSD32
 		else if (strcmp(name, "Xint0x80_syscall") == 0)
 			frame_type = SYSCALL;
 #endif
 	}
 
 	/*
 	 * Normal frames need no special processing.
 	 */
 	if (frame_type == NORMAL) {
 		*ip = (db_addr_t) rip;
 		*fp = (struct amd64_frame *) rbp;
 		return;
 	}
 
 	db_print_stack_entry(name, rip, &(*fp)->f_frame);
 
 	/*
 	 * Point to base of trapframe which is just above the
 	 * current frame.
 	 */
 	tf = (struct trapframe *)((long)*fp + 16);
 
 	if (INKERNEL((long) tf)) {
 		rsp = tf->tf_rsp;
 		rip = tf->tf_rip;
 		rbp = tf->tf_rbp;
 		switch (frame_type) {
 		case TRAP:
 			db_printf("--- trap %#r", tf->tf_trapno);
 			break;
 		case SYSCALL:
 			db_printf("--- syscall");
 			decode_syscall(tf->tf_rax, td);
 			break;
 		case INTERRUPT:
 			db_printf("--- interrupt");
 			break;
 		default:
 			panic("The moon has moved again.");
 		}
 		db_printf(", rip = %#lr, rsp = %#lr, rbp = %#lr ---\n", rip,
 		    rsp, rbp);
 	}
 
 	*ip = (db_addr_t) rip;
 	*fp = (struct amd64_frame *) rbp;
 }
 
 static int __nosanitizeaddress __nosanitizememory
 db_backtrace(struct thread *td, struct trapframe *tf, struct amd64_frame *frame,
     db_addr_t pc, register_t sp, int count)
 {
 	struct amd64_frame *actframe;
 	const char *name;
 	db_expr_t offset;
 	c_db_sym_t sym;
 	boolean_t first;
 
 	if (count == -1)
 		count = 1024;
 
 	first = TRUE;
 	while (count-- && !db_pager_quit) {
 		sym = db_search_symbol(pc, DB_STGY_ANY, &offset);
 		db_symbol_values(sym, &name, NULL);
 
 		/*
 		 * Attempt to determine a (possibly fake) frame that gives
 		 * the caller's pc.  It may differ from `frame' if the
 		 * current function never sets up a standard frame or hasn't
 		 * set one up yet or has just discarded one.  The last two
 		 * cases can be guessed fairly reliably for code generated
 		 * by gcc.  The first case is too much trouble to handle in
 		 * general because the amount of junk on the stack depends
 		 * on the pc (the special handling of "calltrap", etc. in
 		 * db_nextframe() works because the `next' pc is special).
 		 */
 		actframe = frame;
 		if (first) {
 			first = FALSE;
 			if (sym == C_DB_SYM_NULL && sp != 0) {
 				/*
 				 * If a symbol couldn't be found, we've probably
 				 * jumped to a bogus location, so try and use
 				 * the return address to find our caller.
 				 */
 				db_print_stack_entry(name, pc, NULL);
 				pc = db_get_value(sp, 8, FALSE);
 				if (db_search_symbol(pc, DB_STGY_PROC,
 				    &offset) == C_DB_SYM_NULL)
 					break;
 				continue;
 			} else if (tf != NULL) {
 				int instr;
 
 				instr = db_get_value(pc, 4, FALSE);
 				if ((instr & 0xffffffff) == 0xe5894855) {
 					/* pushq %rbp; movq %rsp, %rbp */
 					actframe = (void *)(tf->tf_rsp - 8);
 				} else if ((instr & 0xffffff) == 0xe58948) {
 					/* movq %rsp, %rbp */
 					actframe = (void *)tf->tf_rsp;
 					if (tf->tf_rbp == 0) {
 						/* Fake frame better. */
 						frame = actframe;
 					}
 				} else if ((instr & 0xff) == 0xc3) {
 					/* ret */
 					actframe = (void *)(tf->tf_rsp - 8);
 				} else if (offset == 0) {
 					/* Probably an assembler symbol. */
 					actframe = (void *)(tf->tf_rsp - 8);
 				}
 			} else if (name != NULL &&
 			    strcmp(name, "fork_trampoline") == 0) {
 				/*
 				 * Don't try to walk back on a stack for a
 				 * process that hasn't actually been run yet.
 				 */
 				db_print_stack_entry(name, pc, actframe);
 				break;
 			}
 		}
 
 		db_print_stack_entry(name, pc, actframe);
 
 		if (actframe != frame) {
 			/* `frame' belongs to caller. */
 			pc = (db_addr_t)
 			    db_get_value((long)&actframe->f_retaddr, 8, FALSE);
 			continue;
 		}
 
 		db_nextframe(&frame, &pc, td);
 
 		if (INKERNEL((long)pc) && !INKERNEL((long)frame)) {
 			sym = db_search_symbol(pc, DB_STGY_ANY, &offset);
 			db_symbol_values(sym, &name, NULL);
 			db_print_stack_entry(name, pc, frame);
 			break;
 		}
 		if (!INKERNEL((long) frame)) {
 			break;
 		}
 	}
 
 	return (0);
 }
 
 void
 db_trace_self(void)
 {
 	struct amd64_frame *frame;
 	db_addr_t callpc;
 	register_t rbp;
 
 	__asm __volatile("movq %%rbp,%0" : "=r" (rbp));
 	frame = (struct amd64_frame *)rbp;
 	callpc = (db_addr_t)db_get_value((long)&frame->f_retaddr, 8, FALSE);
 	frame = frame->f_frame;
 	db_backtrace(curthread, NULL, frame, callpc, 0, -1);
 }
 
 int
 db_trace_thread(struct thread *thr, int count)
 {
 	struct pcb *ctx;
 	struct trapframe *tf;
 
 	ctx = kdb_thr_ctx(thr);
 	tf = thr == kdb_thread ? kdb_frame : NULL;
 	return (db_backtrace(thr, tf, (struct amd64_frame *)ctx->pcb_rbp,
 	    ctx->pcb_rip, ctx->pcb_rsp, count));
 }
 
 void
 db_md_list_watchpoints(void)
 {
 
 	dbreg_list_watchpoints();
 }
diff --git a/sys/amd64/amd64/gdb_machdep.c b/sys/amd64/amd64/gdb_machdep.c
index 176c770a3731..b5acce216fe6 100644
--- a/sys/amd64/amd64/gdb_machdep.c
+++ b/sys/amd64/amd64/gdb_machdep.c
@@ -1,202 +1,202 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2004 Marcel Moolenaar
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  *
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kdb.h>
 #include <sys/kernel.h>
 #include <sys/proc.h>
+#include <sys/reg.h>
 #include <sys/signal.h>
 
 #include <machine/cpufunc.h>
 #include <machine/frame.h>
 #include <machine/gdb_machdep.h>
 #include <machine/md_var.h>
 #include <machine/pcb.h>
 #include <machine/psl.h>
-#include <machine/reg.h>
 #include <machine/specialreg.h>
 #include <machine/trap.h>
 #include <machine/frame.h>
 #include <machine/endian.h>
 
 #include <gdb/gdb.h>
 #include <gdb/gdb_int.h>
 
 void *
 gdb_cpu_getreg(int regnum, size_t *regsz)
 {
 	static uint32_t _kcodesel = GSEL(GCODE_SEL, SEL_KPL);
 	static uint32_t _kdatasel = GSEL(GDATA_SEL, SEL_KPL);
 
 	*regsz = gdb_cpu_regsz(regnum);
 
 	if (kdb_thread  == curthread) {
 		switch (regnum) {
 		case GDB_REG_RAX: return (&kdb_frame->tf_rax);
 		case GDB_REG_RCX: return (&kdb_frame->tf_rcx);
 		case GDB_REG_RDX: return (&kdb_frame->tf_rdx);
 		case GDB_REG_RSI: return (&kdb_frame->tf_rsi);
 		case GDB_REG_RDI: return (&kdb_frame->tf_rdi);
 		case GDB_REG_R8:  return (&kdb_frame->tf_r8);
 		case GDB_REG_R9:  return (&kdb_frame->tf_r9);
 		case GDB_REG_R10: return (&kdb_frame->tf_r10);
 		case GDB_REG_R11: return (&kdb_frame->tf_r11);
 		case GDB_REG_RFLAGS: return (&kdb_frame->tf_rflags);
 		case GDB_REG_CS:  return (&kdb_frame->tf_cs);
 		case GDB_REG_SS:  return (&kdb_frame->tf_ss);
 		}
 	}
 	switch (regnum) {
 	case GDB_REG_RBX: return (&kdb_thrctx->pcb_rbx);
 	case GDB_REG_RBP: return (&kdb_thrctx->pcb_rbp);
 	case GDB_REG_RSP: return (&kdb_thrctx->pcb_rsp);
 	case GDB_REG_R12: return (&kdb_thrctx->pcb_r12);
 	case GDB_REG_R13: return (&kdb_thrctx->pcb_r13);
 	case GDB_REG_R14: return (&kdb_thrctx->pcb_r14);
 	case GDB_REG_R15: return (&kdb_thrctx->pcb_r15);
 	case GDB_REG_PC:  return (&kdb_thrctx->pcb_rip);
 	case GDB_REG_CS:  return (&_kcodesel);
 	case GDB_REG_SS:  return (&_kdatasel);
 	}
 	return (NULL);
 }
 
 void
 gdb_cpu_setreg(int regnum, void *val)
 {
 	register_t regval = *(register_t *)val;
 
 	/*
 	 * Write registers to the trapframe and pcb, if applicable.
 	 * Some scratch registers are not tracked by the pcb.
 	 */
 	if (kdb_thread == curthread) {
 		switch (regnum) {
 		case GDB_REG_RAX: kdb_frame->tf_rax = regval; break;
 		case GDB_REG_RBX: kdb_frame->tf_rbx = regval; break;
 		case GDB_REG_RCX: kdb_frame->tf_rcx = regval; break;
 		case GDB_REG_RDX: kdb_frame->tf_rdx = regval; break;
 		case GDB_REG_RSI: kdb_frame->tf_rsi = regval; break;
 		case GDB_REG_RDI: kdb_frame->tf_rdi = regval; break;
 		case GDB_REG_RBP: kdb_frame->tf_rbp = regval; break;
 		case GDB_REG_RSP: kdb_frame->tf_rsp = regval; break;
 		case GDB_REG_R8:  kdb_frame->tf_r8  = regval; break;
 		case GDB_REG_R9:  kdb_frame->tf_r9  = regval; break;
 		case GDB_REG_R10: kdb_frame->tf_r10 = regval; break;
 		case GDB_REG_R11: kdb_frame->tf_r11 = regval; break;
 		case GDB_REG_R12: kdb_frame->tf_r12 = regval; break;
 		case GDB_REG_R13: kdb_frame->tf_r13 = regval; break;
 		case GDB_REG_R14: kdb_frame->tf_r14 = regval; break;
 		case GDB_REG_R15: kdb_frame->tf_r15 = regval; break;
 		case GDB_REG_PC:  kdb_frame->tf_rip = regval; break;
 		}
 	}
 	switch (regnum) {
 	case GDB_REG_RBX: kdb_thrctx->pcb_rbx = regval; break;
 	case GDB_REG_RBP: kdb_thrctx->pcb_rbp = regval; break;
 	case GDB_REG_RSP: kdb_thrctx->pcb_rsp = regval; break;
 	case GDB_REG_R12: kdb_thrctx->pcb_r12 = regval; break;
 	case GDB_REG_R13: kdb_thrctx->pcb_r13 = regval; break;
 	case GDB_REG_R14: kdb_thrctx->pcb_r14 = regval; break;
 	case GDB_REG_R15: kdb_thrctx->pcb_r15 = regval; break;
 	case GDB_REG_PC:  kdb_thrctx->pcb_rip = regval; break;
 	}
 }
 
 int
 gdb_cpu_signal(int type, int code)
 {
 
 	switch (type & ~T_USER) {
 	case T_BPTFLT: return (SIGTRAP);
 	case T_ARITHTRAP: return (SIGFPE);
 	case T_PROTFLT: return (SIGSEGV);
 	case T_TRCTRAP: return (SIGTRAP);
 	case T_PAGEFLT: return (SIGSEGV);
 	case T_DIVIDE: return (SIGFPE);
 	case T_NMI: return (SIGTRAP);
 	case T_FPOPFLT: return (SIGILL);
 	case T_TSSFLT: return (SIGSEGV);
 	case T_SEGNPFLT: return (SIGSEGV);
 	case T_STKFLT: return (SIGSEGV);
 	case T_XMMFLT: return (SIGFPE);
 	}
 	return (SIGEMT);
 }
 
 void
 gdb_cpu_stop_reason(int type, int code)
 {
 	uintmax_t val;
 
 	val = 0;
 	if (type == T_TRCTRAP) {
 		/* NB: 'code' contains the value of dr6 at the trap. */
 		if ((code & DBREG_DR6_B(0)) != 0) {
 			val = rdr0();
 		}
 		if ((code & DBREG_DR6_B(1)) != 0) {
 			val = rdr1();
 		}
 		if ((code & DBREG_DR6_B(2)) != 0) {
 			val = rdr2();
 		}
 		if ((code & DBREG_DR6_B(3)) != 0) {
 			val = rdr3();
 		}
 
 		/*
 		 * TODO: validate the bits in DR7 to differentiate between a
 		 * watchpoint trap and a hardware breakpoint trap (currently
 		 * unsupported).
 		 */
 		if (val != 0) {
 			gdb_tx_str("watch:");
 			gdb_tx_varhex(val);
 			gdb_tx_char(';');
 		}
 	}
 }
 
 void *
 gdb_begin_write(void)
 {
 
 	return (disable_wp() ? &gdb_begin_write : NULL);
 }
 
 void
 gdb_end_write(void *arg)
 {
 
 	restore_wp(arg != NULL);
 }
diff --git a/sys/amd64/amd64/machdep.c b/sys/amd64/amd64/machdep.c
index e49dcaa576e8..eaf592c9b947 100644
--- a/sys/amd64/amd64/machdep.c
+++ b/sys/amd64/amd64/machdep.c
@@ -1,2835 +1,2835 @@
 /*-
  * SPDX-License-Identifier: BSD-4-Clause
  *
  * Copyright (c) 2003 Peter Wemm.
  * Copyright (c) 1992 Terrence R. Lambert.
  * Copyright (c) 1982, 1987, 1990 The Regents of the University of California.
  * All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * William Jolitz.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from: @(#)machdep.c	7.4 (Berkeley) 6/3/91
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_atpic.h"
 #include "opt_cpu.h"
 #include "opt_ddb.h"
 #include "opt_inet.h"
 #include "opt_isa.h"
 #include "opt_kstack_pages.h"
 #include "opt_maxmem.h"
 #include "opt_mp_watchdog.h"
 #include "opt_pci.h"
 #include "opt_platform.h"
 #include "opt_sched.h"
 
 #include <sys/param.h>
 #include <sys/proc.h>
 #include <sys/systm.h>
 #include <sys/asan.h>
 #include <sys/bio.h>
 #include <sys/buf.h>
 #include <sys/bus.h>
 #include <sys/callout.h>
 #include <sys/cons.h>
 #include <sys/cpu.h>
 #include <sys/csan.h>
 #include <sys/efi.h>
 #include <sys/eventhandler.h>
 #include <sys/exec.h>
 #include <sys/imgact.h>
 #include <sys/kdb.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/linker.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/memrange.h>
 #include <sys/msan.h>
 #include <sys/msgbuf.h>
 #include <sys/mutex.h>
 #include <sys/pcpu.h>
 #include <sys/ptrace.h>
 #include <sys/reboot.h>
+#include <sys/reg.h>
 #include <sys/rwlock.h>
 #include <sys/sched.h>
 #include <sys/signalvar.h>
 #ifdef SMP
 #include <sys/smp.h>
 #endif
 #include <sys/syscallsubr.h>
 #include <sys/sysctl.h>
 #include <sys/sysent.h>
 #include <sys/sysproto.h>
 #include <sys/ucontext.h>
 #include <sys/vmmeter.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_page.h>
 #include <vm/vm_map.h>
 #include <vm/vm_object.h>
 #include <vm/vm_pager.h>
 #include <vm/vm_phys.h>
 #include <vm/vm_dumpset.h>
 
 #ifdef DDB
 #ifndef KDB
 #error KDB must be enabled in order for DDB to work!
 #endif
 #include <ddb/ddb.h>
 #include <ddb/db_sym.h>
 #endif
 
 #include <net/netisr.h>
 
 #include <machine/clock.h>
 #include <machine/cpu.h>
 #include <machine/cputypes.h>
 #include <machine/frame.h>
 #include <machine/intr_machdep.h>
 #include <x86/mca.h>
 #include <machine/md_var.h>
 #include <machine/metadata.h>
 #include <machine/mp_watchdog.h>
 #include <machine/pc/bios.h>
 #include <machine/pcb.h>
 #include <machine/proc.h>
-#include <machine/reg.h>
 #include <machine/sigframe.h>
 #include <machine/specialreg.h>
 #include <machine/trap.h>
 #include <machine/tss.h>
 #include <x86/ucode.h>
 #include <x86/ifunc.h>
 #ifdef SMP
 #include <machine/smp.h>
 #endif
 #ifdef FDT
 #include <x86/fdt.h>
 #endif
 
 #ifdef DEV_ATPIC
 #include <x86/isa/icu.h>
 #else
 #include <x86/apicvar.h>
 #endif
 
 #include <isa/isareg.h>
 #include <isa/rtc.h>
 #include <x86/init.h>
 
 /* Sanity check for __curthread() */
 CTASSERT(offsetof(struct pcpu, pc_curthread) == 0);
 
 /*
  * The PTI trampoline stack needs enough space for a hardware trapframe and a
  * couple of scratch registers, as well as the trapframe left behind after an
  * iret fault.
  */
 CTASSERT(PC_PTI_STACK_SZ * sizeof(register_t) >= 2 * sizeof(struct pti_frame) -
     offsetof(struct pti_frame, pti_rip));
 
 extern u_int64_t hammer_time(u_int64_t, u_int64_t);
 
 #define	CS_SECURE(cs)		(ISPL(cs) == SEL_UPL)
 #define	EFL_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
 
 static void cpu_startup(void *);
 static void get_fpcontext(struct thread *td, mcontext_t *mcp,
     char *xfpusave, size_t xfpusave_len);
 static int  set_fpcontext(struct thread *td, mcontext_t *mcp,
     char *xfpustate, size_t xfpustate_len);
 SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL);
 
 /* Preload data parse function */
 static caddr_t native_parse_preload_data(u_int64_t);
 
 /* Native function to fetch and parse the e820 map */
 static void native_parse_memmap(caddr_t, vm_paddr_t *, int *);
 
 /* Default init_ops implementation. */
 struct init_ops init_ops = {
 	.parse_preload_data =	native_parse_preload_data,
 	.early_clock_source_init =	i8254_init,
 	.early_delay =			i8254_delay,
 	.parse_memmap =			native_parse_memmap,
 };
 
 /*
  * Physical address of the EFI System Table. Stashed from the metadata hints
  * passed into the kernel and used by the EFI code to call runtime services.
  */
 vm_paddr_t efi_systbl_phys;
 
 /* Intel ICH registers */
 #define ICH_PMBASE	0x400
 #define ICH_SMI_EN	ICH_PMBASE + 0x30
 
 int	_udatasel, _ucodesel, _ucode32sel, _ufssel, _ugssel;
 
 int cold = 1;
 
 long Maxmem = 0;
 long realmem = 0;
 
 struct kva_md_info kmi;
 
 static struct trapframe proc0_tf;
 struct region_descriptor r_idt;
 
 struct pcpu *__pcpu;
 struct pcpu temp_bsp_pcpu;
 
 struct mtx icu_lock;
 
 struct mem_range_softc mem_range_softc;
 
 struct mtx dt_lock;	/* lock for GDT and LDT */
 
 void (*vmm_resume_p)(void);
 
 bool efi_boot;
 
 static void
 cpu_startup(dummy)
 	void *dummy;
 {
 	uintmax_t memsize;
 	char *sysenv;
 
 	/*
 	 * On MacBooks, we need to disallow the legacy USB circuit to
 	 * generate an SMI# because this can cause several problems,
 	 * namely: incorrect CPU frequency detection and failure to
 	 * start the APs.
 	 * We do this by disabling a bit in the SMI_EN (SMI Control and
 	 * Enable register) of the Intel ICH LPC Interface Bridge. 
 	 */
 	sysenv = kern_getenv("smbios.system.product");
 	if (sysenv != NULL) {
 		if (strncmp(sysenv, "MacBook1,1", 10) == 0 ||
 		    strncmp(sysenv, "MacBook3,1", 10) == 0 ||
 		    strncmp(sysenv, "MacBook4,1", 10) == 0 ||
 		    strncmp(sysenv, "MacBookPro1,1", 13) == 0 ||
 		    strncmp(sysenv, "MacBookPro1,2", 13) == 0 ||
 		    strncmp(sysenv, "MacBookPro3,1", 13) == 0 ||
 		    strncmp(sysenv, "MacBookPro4,1", 13) == 0 ||
 		    strncmp(sysenv, "Macmini1,1", 10) == 0) {
 			if (bootverbose)
 				printf("Disabling LEGACY_USB_EN bit on "
 				    "Intel ICH.\n");
 			outl(ICH_SMI_EN, inl(ICH_SMI_EN) & ~0x8);
 		}
 		freeenv(sysenv);
 	}
 
 	/*
 	 * Good {morning,afternoon,evening,night}.
 	 */
 	startrtclock();
 	printcpuinfo();
 
 	/*
 	 * Display physical memory if SMBIOS reports reasonable amount.
 	 */
 	memsize = 0;
 	sysenv = kern_getenv("smbios.memory.enabled");
 	if (sysenv != NULL) {
 		memsize = (uintmax_t)strtoul(sysenv, (char **)NULL, 10) << 10;
 		freeenv(sysenv);
 	}
 	if (memsize < ptoa((uintmax_t)vm_free_count()))
 		memsize = ptoa((uintmax_t)Maxmem);
 	printf("real memory  = %ju (%ju MB)\n", memsize, memsize >> 20);
 	realmem = atop(memsize);
 
 	/*
 	 * Display any holes after the first chunk of extended memory.
 	 */
 	if (bootverbose) {
 		int indx;
 
 		printf("Physical memory chunk(s):\n");
 		for (indx = 0; phys_avail[indx + 1] != 0; indx += 2) {
 			vm_paddr_t size;
 
 			size = phys_avail[indx + 1] - phys_avail[indx];
 			printf(
 			    "0x%016jx - 0x%016jx, %ju bytes (%ju pages)\n",
 			    (uintmax_t)phys_avail[indx],
 			    (uintmax_t)phys_avail[indx + 1] - 1,
 			    (uintmax_t)size, (uintmax_t)size / PAGE_SIZE);
 		}
 	}
 
 	vm_ksubmap_init(&kmi);
 
 	printf("avail memory = %ju (%ju MB)\n",
 	    ptoa((uintmax_t)vm_free_count()),
 	    ptoa((uintmax_t)vm_free_count()) / 1048576);
 #ifdef DEV_PCI
 	if (bootverbose && intel_graphics_stolen_base != 0)
 		printf("intel stolen mem: base %#jx size %ju MB\n",
 		    (uintmax_t)intel_graphics_stolen_base,
 		    (uintmax_t)intel_graphics_stolen_size / 1024 / 1024);
 #endif
 
 	/*
 	 * Set up buffers, so they can be used to read disk labels.
 	 */
 	bufinit();
 	vm_pager_bufferinit();
 
 	cpu_setregs();
 }
 
 static void
 late_ifunc_resolve(void *dummy __unused)
 {
 	link_elf_late_ireloc();
 }
 SYSINIT(late_ifunc_resolve, SI_SUB_CPU, SI_ORDER_ANY, late_ifunc_resolve, NULL);
 
 /*
  * Send an interrupt to process.
  *
  * Stack is set up to allow sigcode stored
  * at top to call routine, followed by call
  * to sigreturn routine below.  After sigreturn
  * resets the signal mask, the stack, and the
  * frame pointer, it returns to the user
  * specified pc, psl.
  */
 void
 sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
 {
 	struct sigframe sf, *sfp;
 	struct pcb *pcb;
 	struct proc *p;
 	struct thread *td;
 	struct sigacts *psp;
 	char *sp;
 	struct trapframe *regs;
 	char *xfpusave;
 	size_t xfpusave_len;
 	int sig;
 	int oonstack;
 
 	td = curthread;
 	pcb = td->td_pcb;
 	p = td->td_proc;
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	sig = ksi->ksi_signo;
 	psp = p->p_sigacts;
 	mtx_assert(&psp->ps_mtx, MA_OWNED);
 	regs = td->td_frame;
 	oonstack = sigonstack(regs->tf_rsp);
 
 	if (cpu_max_ext_state_size > sizeof(struct savefpu) && use_xsave) {
 		xfpusave_len = cpu_max_ext_state_size - sizeof(struct savefpu);
 		xfpusave = __builtin_alloca(xfpusave_len);
 	} else {
 		xfpusave_len = 0;
 		xfpusave = NULL;
 	}
 
 	/* Save user context. */
 	bzero(&sf, sizeof(sf));
 	sf.sf_uc.uc_sigmask = *mask;
 	sf.sf_uc.uc_stack = td->td_sigstk;
 	sf.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
 	    ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE;
 	sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0;
 	bcopy(regs, &sf.sf_uc.uc_mcontext.mc_rdi, sizeof(*regs));
 	sf.sf_uc.uc_mcontext.mc_len = sizeof(sf.sf_uc.uc_mcontext); /* magic */
 	get_fpcontext(td, &sf.sf_uc.uc_mcontext, xfpusave, xfpusave_len);
 	fpstate_drop(td);
 	update_pcb_bases(pcb);
 	sf.sf_uc.uc_mcontext.mc_fsbase = pcb->pcb_fsbase;
 	sf.sf_uc.uc_mcontext.mc_gsbase = pcb->pcb_gsbase;
 	bzero(sf.sf_uc.uc_mcontext.mc_spare,
 	    sizeof(sf.sf_uc.uc_mcontext.mc_spare));
 
 	/* Allocate space for the signal handler context. */
 	if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack &&
 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
 		sp = (char *)td->td_sigstk.ss_sp + td->td_sigstk.ss_size;
 #if defined(COMPAT_43)
 		td->td_sigstk.ss_flags |= SS_ONSTACK;
 #endif
 	} else
 		sp = (char *)regs->tf_rsp - 128;
 	if (xfpusave != NULL) {
 		sp -= xfpusave_len;
 		sp = (char *)((unsigned long)sp & ~0x3Ful);
 		sf.sf_uc.uc_mcontext.mc_xfpustate = (register_t)sp;
 	}
 	sp -= sizeof(struct sigframe);
 	/* Align to 16 bytes. */
 	sfp = (struct sigframe *)((unsigned long)sp & ~0xFul);
 
 	/* Build the argument list for the signal handler. */
 	regs->tf_rdi = sig;			/* arg 1 in %rdi */
 	regs->tf_rdx = (register_t)&sfp->sf_uc;	/* arg 3 in %rdx */
 	bzero(&sf.sf_si, sizeof(sf.sf_si));
 	if (SIGISMEMBER(psp->ps_siginfo, sig)) {
 		/* Signal handler installed with SA_SIGINFO. */
 		regs->tf_rsi = (register_t)&sfp->sf_si;	/* arg 2 in %rsi */
 		sf.sf_ahu.sf_action = (__siginfohandler_t *)catcher;
 
 		/* Fill in POSIX parts */
 		sf.sf_si = ksi->ksi_info;
 		sf.sf_si.si_signo = sig; /* maybe a translated signal */
 		regs->tf_rcx = (register_t)ksi->ksi_addr; /* arg 4 in %rcx */
 	} else {
 		/* Old FreeBSD-style arguments. */
 		regs->tf_rsi = ksi->ksi_code;	/* arg 2 in %rsi */
 		regs->tf_rcx = (register_t)ksi->ksi_addr; /* arg 4 in %rcx */
 		sf.sf_ahu.sf_handler = catcher;
 	}
 	mtx_unlock(&psp->ps_mtx);
 	PROC_UNLOCK(p);
 
 	/*
 	 * Copy the sigframe out to the user's stack.
 	 */
 	if (copyout(&sf, sfp, sizeof(*sfp)) != 0 ||
 	    (xfpusave != NULL && copyout(xfpusave,
 	    (void *)sf.sf_uc.uc_mcontext.mc_xfpustate, xfpusave_len)
 	    != 0)) {
 #ifdef DEBUG
 		printf("process %ld has trashed its stack\n", (long)p->p_pid);
 #endif
 		PROC_LOCK(p);
 		sigexit(td, SIGILL);
 	}
 
 	regs->tf_rsp = (long)sfp;
 	regs->tf_rip = p->p_sysent->sv_sigcode_base;
 	regs->tf_rflags &= ~(PSL_T | PSL_D);
 	regs->tf_cs = _ucodesel;
 	regs->tf_ds = _udatasel;
 	regs->tf_ss = _udatasel;
 	regs->tf_es = _udatasel;
 	regs->tf_fs = _ufssel;
 	regs->tf_gs = _ugssel;
 	regs->tf_flags = TF_HASSEGS;
 	PROC_LOCK(p);
 	mtx_lock(&psp->ps_mtx);
 }
 
 /*
  * System call to cleanup state after a signal
  * has been taken.  Reset signal mask and
  * stack state from context left by sendsig (above).
  * Return to previous pc and psl as specified by
  * context left by sendsig. Check carefully to
  * make sure that the user has not modified the
  * state to gain improper privileges.
  *
  * MPSAFE
  */
 int
 sys_sigreturn(td, uap)
 	struct thread *td;
 	struct sigreturn_args /* {
 		const struct __ucontext *sigcntxp;
 	} */ *uap;
 {
 	ucontext_t uc;
 	struct pcb *pcb;
 	struct proc *p;
 	struct trapframe *regs;
 	ucontext_t *ucp;
 	char *xfpustate;
 	size_t xfpustate_len;
 	long rflags;
 	int cs, error, ret;
 	ksiginfo_t ksi;
 
 	pcb = td->td_pcb;
 	p = td->td_proc;
 
 	error = copyin(uap->sigcntxp, &uc, sizeof(uc));
 	if (error != 0) {
 		uprintf("pid %d (%s): sigreturn copyin failed\n",
 		    p->p_pid, td->td_name);
 		return (error);
 	}
 	ucp = &uc;
 	if ((ucp->uc_mcontext.mc_flags & ~_MC_FLAG_MASK) != 0) {
 		uprintf("pid %d (%s): sigreturn mc_flags %x\n", p->p_pid,
 		    td->td_name, ucp->uc_mcontext.mc_flags);
 		return (EINVAL);
 	}
 	regs = td->td_frame;
 	rflags = ucp->uc_mcontext.mc_rflags;
 	/*
 	 * Don't allow users to change privileged or reserved flags.
 	 */
 	if (!EFL_SECURE(rflags, regs->tf_rflags)) {
 		uprintf("pid %d (%s): sigreturn rflags = 0x%lx\n", p->p_pid,
 		    td->td_name, rflags);
 		return (EINVAL);
 	}
 
 	/*
 	 * Don't allow users to load a valid privileged %cs.  Let the
 	 * hardware check for invalid selectors, excess privilege in
 	 * other selectors, invalid %eip's and invalid %esp's.
 	 */
 	cs = ucp->uc_mcontext.mc_cs;
 	if (!CS_SECURE(cs)) {
 		uprintf("pid %d (%s): sigreturn cs = 0x%x\n", p->p_pid,
 		    td->td_name, cs);
 		ksiginfo_init_trap(&ksi);
 		ksi.ksi_signo = SIGBUS;
 		ksi.ksi_code = BUS_OBJERR;
 		ksi.ksi_trapno = T_PROTFLT;
 		ksi.ksi_addr = (void *)regs->tf_rip;
 		trapsignal(td, &ksi);
 		return (EINVAL);
 	}
 
 	if ((uc.uc_mcontext.mc_flags & _MC_HASFPXSTATE) != 0) {
 		xfpustate_len = uc.uc_mcontext.mc_xfpustate_len;
 		if (xfpustate_len > cpu_max_ext_state_size -
 		    sizeof(struct savefpu)) {
 			uprintf("pid %d (%s): sigreturn xfpusave_len = 0x%zx\n",
 			    p->p_pid, td->td_name, xfpustate_len);
 			return (EINVAL);
 		}
 		xfpustate = __builtin_alloca(xfpustate_len);
 		error = copyin((const void *)uc.uc_mcontext.mc_xfpustate,
 		    xfpustate, xfpustate_len);
 		if (error != 0) {
 			uprintf(
 	"pid %d (%s): sigreturn copying xfpustate failed\n",
 			    p->p_pid, td->td_name);
 			return (error);
 		}
 	} else {
 		xfpustate = NULL;
 		xfpustate_len = 0;
 	}
 	ret = set_fpcontext(td, &ucp->uc_mcontext, xfpustate, xfpustate_len);
 	if (ret != 0) {
 		uprintf("pid %d (%s): sigreturn set_fpcontext err %d\n",
 		    p->p_pid, td->td_name, ret);
 		return (ret);
 	}
 	bcopy(&ucp->uc_mcontext.mc_rdi, regs, sizeof(*regs));
 	update_pcb_bases(pcb);
 	pcb->pcb_fsbase = ucp->uc_mcontext.mc_fsbase;
 	pcb->pcb_gsbase = ucp->uc_mcontext.mc_gsbase;
 
 #if defined(COMPAT_43)
 	if (ucp->uc_mcontext.mc_onstack & 1)
 		td->td_sigstk.ss_flags |= SS_ONSTACK;
 	else
 		td->td_sigstk.ss_flags &= ~SS_ONSTACK;
 #endif
 
 	kern_sigprocmask(td, SIG_SETMASK, &ucp->uc_sigmask, NULL, 0);
 	return (EJUSTRETURN);
 }
 
 #ifdef COMPAT_FREEBSD4
 int
 freebsd4_sigreturn(struct thread *td, struct freebsd4_sigreturn_args *uap)
 {
 
 	return sys_sigreturn(td, (struct sigreturn_args *)uap);
 }
 #endif
 
 /*
  * Reset the hardware debug registers if they were in use.
  * They won't have any meaning for the newly exec'd process.
  */
 void
 x86_clear_dbregs(struct pcb *pcb)
 {
 	if ((pcb->pcb_flags & PCB_DBREGS) == 0)
 		return;
 
 	pcb->pcb_dr0 = 0;
 	pcb->pcb_dr1 = 0;
 	pcb->pcb_dr2 = 0;
 	pcb->pcb_dr3 = 0;
 	pcb->pcb_dr6 = 0;
 	pcb->pcb_dr7 = 0;
 
 	if (pcb == curpcb) {
 		/*
 		 * Clear the debug registers on the running CPU,
 		 * otherwise they will end up affecting the next
 		 * process we switch to.
 		 */
 		reset_dbregs();
 	}
 	clear_pcb_flags(pcb, PCB_DBREGS);
 }
 
 /*
  * Reset registers to default values on exec.
  */
 void
 exec_setregs(struct thread *td, struct image_params *imgp, uintptr_t stack)
 {
 	struct trapframe *regs;
 	struct pcb *pcb;
 	register_t saved_rflags;
 
 	regs = td->td_frame;
 	pcb = td->td_pcb;
 
 	if (td->td_proc->p_md.md_ldt != NULL)
 		user_ldt_free(td);
 
 	update_pcb_bases(pcb);
 	pcb->pcb_fsbase = 0;
 	pcb->pcb_gsbase = 0;
 	clear_pcb_flags(pcb, PCB_32BIT);
 	pcb->pcb_initial_fpucw = __INITIAL_FPUCW__;
 
 	saved_rflags = regs->tf_rflags & PSL_T;
 	bzero((char *)regs, sizeof(struct trapframe));
 	regs->tf_rip = imgp->entry_addr;
 	regs->tf_rsp = ((stack - 8) & ~0xFul) + 8;
 	regs->tf_rdi = stack;		/* argv */
 	regs->tf_rflags = PSL_USER | saved_rflags;
 	regs->tf_ss = _udatasel;
 	regs->tf_cs = _ucodesel;
 	regs->tf_ds = _udatasel;
 	regs->tf_es = _udatasel;
 	regs->tf_fs = _ufssel;
 	regs->tf_gs = _ugssel;
 	regs->tf_flags = TF_HASSEGS;
 
 	x86_clear_dbregs(pcb);
 
 	/*
 	 * Drop the FP state if we hold it, so that the process gets a
 	 * clean FP state if it uses the FPU again.
 	 */
 	fpstate_drop(td);
 }
 
 void
 cpu_setregs(void)
 {
 	register_t cr0;
 
 	cr0 = rcr0();
 	/*
 	 * CR0_MP, CR0_NE and CR0_TS are also set by npx_probe() for the
 	 * BSP.  See the comments there about why we set them.
 	 */
 	cr0 |= CR0_MP | CR0_NE | CR0_TS | CR0_WP | CR0_AM;
 	load_cr0(cr0);
 }
 
 /*
  * Initialize amd64 and configure to run kernel
  */
 
 /*
  * Initialize segments & interrupt table
  */
 static struct gate_descriptor idt0[NIDT];
 struct gate_descriptor *idt = &idt0[0];	/* interrupt descriptor table */
 
 static char dblfault_stack[DBLFAULT_STACK_SIZE] __aligned(16);
 static char mce0_stack[MCE_STACK_SIZE] __aligned(16);
 static char nmi0_stack[NMI_STACK_SIZE] __aligned(16);
 static char dbg0_stack[DBG_STACK_SIZE] __aligned(16);
 CTASSERT(sizeof(struct nmi_pcpu) == 16);
 
 /*
  * Software prototypes -- in more palatable form.
  *
  * Keep GUFS32, GUGS32, GUCODE32 and GUDATA at the same
  * slots as corresponding segments for i386 kernel.
  */
 struct soft_segment_descriptor gdt_segs[] = {
 /* GNULL_SEL	0 Null Descriptor */
 {	.ssd_base = 0x0,
 	.ssd_limit = 0x0,
 	.ssd_type = 0,
 	.ssd_dpl = 0,
 	.ssd_p = 0,
 	.ssd_long = 0,
 	.ssd_def32 = 0,
 	.ssd_gran = 0		},
 /* GNULL2_SEL	1 Null Descriptor */
 {	.ssd_base = 0x0,
 	.ssd_limit = 0x0,
 	.ssd_type = 0,
 	.ssd_dpl = 0,
 	.ssd_p = 0,
 	.ssd_long = 0,
 	.ssd_def32 = 0,
 	.ssd_gran = 0		},
 /* GUFS32_SEL	2 32 bit %gs Descriptor for user */
 {	.ssd_base = 0x0,
 	.ssd_limit = 0xfffff,
 	.ssd_type = SDT_MEMRWA,
 	.ssd_dpl = SEL_UPL,
 	.ssd_p = 1,
 	.ssd_long = 0,
 	.ssd_def32 = 1,
 	.ssd_gran = 1		},
 /* GUGS32_SEL	3 32 bit %fs Descriptor for user */
 {	.ssd_base = 0x0,
 	.ssd_limit = 0xfffff,
 	.ssd_type = SDT_MEMRWA,
 	.ssd_dpl = SEL_UPL,
 	.ssd_p = 1,
 	.ssd_long = 0,
 	.ssd_def32 = 1,
 	.ssd_gran = 1		},
 /* GCODE_SEL	4 Code Descriptor for kernel */
 {	.ssd_base = 0x0,
 	.ssd_limit = 0xfffff,
 	.ssd_type = SDT_MEMERA,
 	.ssd_dpl = SEL_KPL,
 	.ssd_p = 1,
 	.ssd_long = 1,
 	.ssd_def32 = 0,
 	.ssd_gran = 1		},
 /* GDATA_SEL	5 Data Descriptor for kernel */
 {	.ssd_base = 0x0,
 	.ssd_limit = 0xfffff,
 	.ssd_type = SDT_MEMRWA,
 	.ssd_dpl = SEL_KPL,
 	.ssd_p = 1,
 	.ssd_long = 1,
 	.ssd_def32 = 0,
 	.ssd_gran = 1		},
 /* GUCODE32_SEL	6 32 bit Code Descriptor for user */
 {	.ssd_base = 0x0,
 	.ssd_limit = 0xfffff,
 	.ssd_type = SDT_MEMERA,
 	.ssd_dpl = SEL_UPL,
 	.ssd_p = 1,
 	.ssd_long = 0,
 	.ssd_def32 = 1,
 	.ssd_gran = 1		},
 /* GUDATA_SEL	7 32/64 bit Data Descriptor for user */
 {	.ssd_base = 0x0,
 	.ssd_limit = 0xfffff,
 	.ssd_type = SDT_MEMRWA,
 	.ssd_dpl = SEL_UPL,
 	.ssd_p = 1,
 	.ssd_long = 0,
 	.ssd_def32 = 1,
 	.ssd_gran = 1		},
 /* GUCODE_SEL	8 64 bit Code Descriptor for user */
 {	.ssd_base = 0x0,
 	.ssd_limit = 0xfffff,
 	.ssd_type = SDT_MEMERA,
 	.ssd_dpl = SEL_UPL,
 	.ssd_p = 1,
 	.ssd_long = 1,
 	.ssd_def32 = 0,
 	.ssd_gran = 1		},
 /* GPROC0_SEL	9 Proc 0 Tss Descriptor */
 {	.ssd_base = 0x0,
 	.ssd_limit = sizeof(struct amd64tss) + IOPERM_BITMAP_SIZE - 1,
 	.ssd_type = SDT_SYSTSS,
 	.ssd_dpl = SEL_KPL,
 	.ssd_p = 1,
 	.ssd_long = 0,
 	.ssd_def32 = 0,
 	.ssd_gran = 0		},
 /* Actually, the TSS is a system descriptor which is double size */
 {	.ssd_base = 0x0,
 	.ssd_limit = 0x0,
 	.ssd_type = 0,
 	.ssd_dpl = 0,
 	.ssd_p = 0,
 	.ssd_long = 0,
 	.ssd_def32 = 0,
 	.ssd_gran = 0		},
 /* GUSERLDT_SEL	11 LDT Descriptor */
 {	.ssd_base = 0x0,
 	.ssd_limit = 0x0,
 	.ssd_type = 0,
 	.ssd_dpl = 0,
 	.ssd_p = 0,
 	.ssd_long = 0,
 	.ssd_def32 = 0,
 	.ssd_gran = 0		},
 /* GUSERLDT_SEL	12 LDT Descriptor, double size */
 {	.ssd_base = 0x0,
 	.ssd_limit = 0x0,
 	.ssd_type = 0,
 	.ssd_dpl = 0,
 	.ssd_p = 0,
 	.ssd_long = 0,
 	.ssd_def32 = 0,
 	.ssd_gran = 0		},
 };
 _Static_assert(nitems(gdt_segs) == NGDT, "Stale NGDT");
 
 void
 setidt(int idx, inthand_t *func, int typ, int dpl, int ist)
 {
 	struct gate_descriptor *ip;
 
 	ip = idt + idx;
 	ip->gd_looffset = (uintptr_t)func;
 	ip->gd_selector = GSEL(GCODE_SEL, SEL_KPL);
 	ip->gd_ist = ist;
 	ip->gd_xx = 0;
 	ip->gd_type = typ;
 	ip->gd_dpl = dpl;
 	ip->gd_p = 1;
 	ip->gd_hioffset = ((uintptr_t)func)>>16 ;
 }
 
 extern inthand_t
 	IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl),
 	IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(fpusegm),
 	IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot),
 	IDTVEC(page), IDTVEC(mchk), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align),
 	IDTVEC(xmm), IDTVEC(dblfault),
 	IDTVEC(div_pti), IDTVEC(bpt_pti),
 	IDTVEC(ofl_pti), IDTVEC(bnd_pti), IDTVEC(ill_pti), IDTVEC(dna_pti),
 	IDTVEC(fpusegm_pti), IDTVEC(tss_pti), IDTVEC(missing_pti),
 	IDTVEC(stk_pti), IDTVEC(prot_pti), IDTVEC(page_pti),
 	IDTVEC(rsvd_pti), IDTVEC(fpu_pti), IDTVEC(align_pti),
 	IDTVEC(xmm_pti),
 #ifdef KDTRACE_HOOKS
 	IDTVEC(dtrace_ret), IDTVEC(dtrace_ret_pti),
 #endif
 #ifdef XENHVM
 	IDTVEC(xen_intr_upcall), IDTVEC(xen_intr_upcall_pti),
 #endif
 	IDTVEC(fast_syscall), IDTVEC(fast_syscall32),
 	IDTVEC(fast_syscall_pti);
 
 #ifdef DDB
 /*
  * Display the index and function name of any IDT entries that don't use
  * the default 'rsvd' entry point.
  */
 DB_SHOW_COMMAND(idt, db_show_idt)
 {
 	struct gate_descriptor *ip;
 	int idx;
 	uintptr_t func;
 
 	ip = idt;
 	for (idx = 0; idx < NIDT && !db_pager_quit; idx++) {
 		func = ((long)ip->gd_hioffset << 16 | ip->gd_looffset);
 		if (func != (uintptr_t)&IDTVEC(rsvd)) {
 			db_printf("%3d\t", idx);
 			db_printsym(func, DB_STGY_PROC);
 			db_printf("\n");
 		}
 		ip++;
 	}
 }
 
 /* Show privileged registers. */
 DB_SHOW_COMMAND(sysregs, db_show_sysregs)
 {
 	struct {
 		uint16_t limit;
 		uint64_t base;
 	} __packed idtr, gdtr;
 	uint16_t ldt, tr;
 
 	__asm __volatile("sidt %0" : "=m" (idtr));
 	db_printf("idtr\t0x%016lx/%04x\n",
 	    (u_long)idtr.base, (u_int)idtr.limit);
 	__asm __volatile("sgdt %0" : "=m" (gdtr));
 	db_printf("gdtr\t0x%016lx/%04x\n",
 	    (u_long)gdtr.base, (u_int)gdtr.limit);
 	__asm __volatile("sldt %0" : "=r" (ldt));
 	db_printf("ldtr\t0x%04x\n", ldt);
 	__asm __volatile("str %0" : "=r" (tr));
 	db_printf("tr\t0x%04x\n", tr);
 	db_printf("cr0\t0x%016lx\n", rcr0());
 	db_printf("cr2\t0x%016lx\n", rcr2());
 	db_printf("cr3\t0x%016lx\n", rcr3());
 	db_printf("cr4\t0x%016lx\n", rcr4());
 	if (rcr4() & CR4_XSAVE)
 		db_printf("xcr0\t0x%016lx\n", rxcr(0));
 	db_printf("EFER\t0x%016lx\n", rdmsr(MSR_EFER));
 	if (cpu_feature2 & (CPUID2_VMX | CPUID2_SMX))
 		db_printf("FEATURES_CTL\t%016lx\n",
 		    rdmsr(MSR_IA32_FEATURE_CONTROL));
 	db_printf("DEBUG_CTL\t0x%016lx\n", rdmsr(MSR_DEBUGCTLMSR));
 	db_printf("PAT\t0x%016lx\n", rdmsr(MSR_PAT));
 	db_printf("GSBASE\t0x%016lx\n", rdmsr(MSR_GSBASE));
 }
 
 DB_SHOW_COMMAND(dbregs, db_show_dbregs)
 {
 
 	db_printf("dr0\t0x%016lx\n", rdr0());
 	db_printf("dr1\t0x%016lx\n", rdr1());
 	db_printf("dr2\t0x%016lx\n", rdr2());
 	db_printf("dr3\t0x%016lx\n", rdr3());
 	db_printf("dr6\t0x%016lx\n", rdr6());
 	db_printf("dr7\t0x%016lx\n", rdr7());	
 }
 #endif
 
 void
 sdtossd(sd, ssd)
 	struct user_segment_descriptor *sd;
 	struct soft_segment_descriptor *ssd;
 {
 
 	ssd->ssd_base  = (sd->sd_hibase << 24) | sd->sd_lobase;
 	ssd->ssd_limit = (sd->sd_hilimit << 16) | sd->sd_lolimit;
 	ssd->ssd_type  = sd->sd_type;
 	ssd->ssd_dpl   = sd->sd_dpl;
 	ssd->ssd_p     = sd->sd_p;
 	ssd->ssd_long  = sd->sd_long;
 	ssd->ssd_def32 = sd->sd_def32;
 	ssd->ssd_gran  = sd->sd_gran;
 }
 
 void
 ssdtosd(ssd, sd)
 	struct soft_segment_descriptor *ssd;
 	struct user_segment_descriptor *sd;
 {
 
 	sd->sd_lobase = (ssd->ssd_base) & 0xffffff;
 	sd->sd_hibase = (ssd->ssd_base >> 24) & 0xff;
 	sd->sd_lolimit = (ssd->ssd_limit) & 0xffff;
 	sd->sd_hilimit = (ssd->ssd_limit >> 16) & 0xf;
 	sd->sd_type  = ssd->ssd_type;
 	sd->sd_dpl   = ssd->ssd_dpl;
 	sd->sd_p     = ssd->ssd_p;
 	sd->sd_long  = ssd->ssd_long;
 	sd->sd_def32 = ssd->ssd_def32;
 	sd->sd_gran  = ssd->ssd_gran;
 }
 
 void
 ssdtosyssd(ssd, sd)
 	struct soft_segment_descriptor *ssd;
 	struct system_segment_descriptor *sd;
 {
 
 	sd->sd_lobase = (ssd->ssd_base) & 0xffffff;
 	sd->sd_hibase = (ssd->ssd_base >> 24) & 0xfffffffffful;
 	sd->sd_lolimit = (ssd->ssd_limit) & 0xffff;
 	sd->sd_hilimit = (ssd->ssd_limit >> 16) & 0xf;
 	sd->sd_type  = ssd->ssd_type;
 	sd->sd_dpl   = ssd->ssd_dpl;
 	sd->sd_p     = ssd->ssd_p;
 	sd->sd_gran  = ssd->ssd_gran;
 }
 
 u_int basemem;
 
 static int
 add_physmap_entry(uint64_t base, uint64_t length, vm_paddr_t *physmap,
     int *physmap_idxp)
 {
 	int i, insert_idx, physmap_idx;
 
 	physmap_idx = *physmap_idxp;
 
 	if (length == 0)
 		return (1);
 
 	/*
 	 * Find insertion point while checking for overlap.  Start off by
 	 * assuming the new entry will be added to the end.
 	 *
 	 * NB: physmap_idx points to the next free slot.
 	 */
 	insert_idx = physmap_idx;
 	for (i = 0; i <= physmap_idx; i += 2) {
 		if (base < physmap[i + 1]) {
 			if (base + length <= physmap[i]) {
 				insert_idx = i;
 				break;
 			}
 			if (boothowto & RB_VERBOSE)
 				printf(
 		    "Overlapping memory regions, ignoring second region\n");
 			return (1);
 		}
 	}
 
 	/* See if we can prepend to the next entry. */
 	if (insert_idx <= physmap_idx && base + length == physmap[insert_idx]) {
 		physmap[insert_idx] = base;
 		return (1);
 	}
 
 	/* See if we can append to the previous entry. */
 	if (insert_idx > 0 && base == physmap[insert_idx - 1]) {
 		physmap[insert_idx - 1] += length;
 		return (1);
 	}
 
 	physmap_idx += 2;
 	*physmap_idxp = physmap_idx;
 	if (physmap_idx == PHYS_AVAIL_ENTRIES) {
 		printf(
 		"Too many segments in the physical address map, giving up\n");
 		return (0);
 	}
 
 	/*
 	 * Move the last 'N' entries down to make room for the new
 	 * entry if needed.
 	 */
 	for (i = (physmap_idx - 2); i > insert_idx; i -= 2) {
 		physmap[i] = physmap[i - 2];
 		physmap[i + 1] = physmap[i - 1];
 	}
 
 	/* Insert the new entry. */
 	physmap[insert_idx] = base;
 	physmap[insert_idx + 1] = base + length;
 	return (1);
 }
 
 void
 bios_add_smap_entries(struct bios_smap *smapbase, u_int32_t smapsize,
                       vm_paddr_t *physmap, int *physmap_idx)
 {
 	struct bios_smap *smap, *smapend;
 
 	smapend = (struct bios_smap *)((uintptr_t)smapbase + smapsize);
 
 	for (smap = smapbase; smap < smapend; smap++) {
 		if (boothowto & RB_VERBOSE)
 			printf("SMAP type=%02x base=%016lx len=%016lx\n",
 			    smap->type, smap->base, smap->length);
 
 		if (smap->type != SMAP_TYPE_MEMORY)
 			continue;
 
 		if (!add_physmap_entry(smap->base, smap->length, physmap,
 		    physmap_idx))
 			break;
 	}
 }
 
 static void
 add_efi_map_entries(struct efi_map_header *efihdr, vm_paddr_t *physmap,
     int *physmap_idx)
 {
 	struct efi_md *map, *p;
 	const char *type;
 	size_t efisz;
 	int ndesc, i;
 
 	static const char *types[] = {
 		"Reserved",
 		"LoaderCode",
 		"LoaderData",
 		"BootServicesCode",
 		"BootServicesData",
 		"RuntimeServicesCode",
 		"RuntimeServicesData",
 		"ConventionalMemory",
 		"UnusableMemory",
 		"ACPIReclaimMemory",
 		"ACPIMemoryNVS",
 		"MemoryMappedIO",
 		"MemoryMappedIOPortSpace",
 		"PalCode",
 		"PersistentMemory"
 	};
 
 	/*
 	 * Memory map data provided by UEFI via the GetMemoryMap
 	 * Boot Services API.
 	 */
 	efisz = (sizeof(struct efi_map_header) + 0xf) & ~0xf;
 	map = (struct efi_md *)((uint8_t *)efihdr + efisz);
 
 	if (efihdr->descriptor_size == 0)
 		return;
 	ndesc = efihdr->memory_size / efihdr->descriptor_size;
 
 	if (boothowto & RB_VERBOSE)
 		printf("%23s %12s %12s %8s %4s\n",
 		    "Type", "Physical", "Virtual", "#Pages", "Attr");
 
 	for (i = 0, p = map; i < ndesc; i++,
 	    p = efi_next_descriptor(p, efihdr->descriptor_size)) {
 		if (boothowto & RB_VERBOSE) {
 			if (p->md_type < nitems(types))
 				type = types[p->md_type];
 			else
 				type = "<INVALID>";
 			printf("%23s %012lx %012lx %08lx ", type, p->md_phys,
 			    p->md_virt, p->md_pages);
 			if (p->md_attr & EFI_MD_ATTR_UC)
 				printf("UC ");
 			if (p->md_attr & EFI_MD_ATTR_WC)
 				printf("WC ");
 			if (p->md_attr & EFI_MD_ATTR_WT)
 				printf("WT ");
 			if (p->md_attr & EFI_MD_ATTR_WB)
 				printf("WB ");
 			if (p->md_attr & EFI_MD_ATTR_UCE)
 				printf("UCE ");
 			if (p->md_attr & EFI_MD_ATTR_WP)
 				printf("WP ");
 			if (p->md_attr & EFI_MD_ATTR_RP)
 				printf("RP ");
 			if (p->md_attr & EFI_MD_ATTR_XP)
 				printf("XP ");
 			if (p->md_attr & EFI_MD_ATTR_NV)
 				printf("NV ");
 			if (p->md_attr & EFI_MD_ATTR_MORE_RELIABLE)
 				printf("MORE_RELIABLE ");
 			if (p->md_attr & EFI_MD_ATTR_RO)
 				printf("RO ");
 			if (p->md_attr & EFI_MD_ATTR_RT)
 				printf("RUNTIME");
 			printf("\n");
 		}
 
 		switch (p->md_type) {
 		case EFI_MD_TYPE_CODE:
 		case EFI_MD_TYPE_DATA:
 		case EFI_MD_TYPE_BS_CODE:
 		case EFI_MD_TYPE_BS_DATA:
 		case EFI_MD_TYPE_FREE:
 			/*
 			 * We're allowed to use any entry with these types.
 			 */
 			break;
 		default:
 			continue;
 		}
 
 		if (!add_physmap_entry(p->md_phys, (p->md_pages * PAGE_SIZE),
 		    physmap, physmap_idx))
 			break;
 	}
 }
 
 static void
 native_parse_memmap(caddr_t kmdp, vm_paddr_t *physmap, int *physmap_idx)
 {
 	struct bios_smap *smap;
 	struct efi_map_header *efihdr;
 	u_int32_t size;
 
 	/*
 	 * Memory map from INT 15:E820.
 	 *
 	 * subr_module.c says:
 	 * "Consumer may safely assume that size value precedes data."
 	 * ie: an int32_t immediately precedes smap.
 	 */
 
 	efihdr = (struct efi_map_header *)preload_search_info(kmdp,
 	    MODINFO_METADATA | MODINFOMD_EFI_MAP);
 	smap = (struct bios_smap *)preload_search_info(kmdp,
 	    MODINFO_METADATA | MODINFOMD_SMAP);
 	if (efihdr == NULL && smap == NULL)
 		panic("No BIOS smap or EFI map info from loader!");
 
 	if (efihdr != NULL) {
 		add_efi_map_entries(efihdr, physmap, physmap_idx);
 		strlcpy(bootmethod, "UEFI", sizeof(bootmethod));
 	} else {
 		size = *((u_int32_t *)smap - 1);
 		bios_add_smap_entries(smap, size, physmap, physmap_idx);
 		strlcpy(bootmethod, "BIOS", sizeof(bootmethod));
 	}
 }
 
 #define	PAGES_PER_GB	(1024 * 1024 * 1024 / PAGE_SIZE)
 
 /*
  * Populate the (physmap) array with base/bound pairs describing the
  * available physical memory in the system, then test this memory and
  * build the phys_avail array describing the actually-available memory.
  *
  * Total memory size may be set by the kernel environment variable
  * hw.physmem or the compile-time define MAXMEM.
  *
  * XXX first should be vm_paddr_t.
  */
 static void
 getmemsize(caddr_t kmdp, u_int64_t first)
 {
 	int i, physmap_idx, pa_indx, da_indx;
 	vm_paddr_t pa, physmap[PHYS_AVAIL_ENTRIES];
 	u_long physmem_start, physmem_tunable, memtest;
 	pt_entry_t *pte;
 	quad_t dcons_addr, dcons_size;
 	int page_counter;
 
 	/*
 	 * Tell the physical memory allocator about pages used to store
 	 * the kernel and preloaded data.  See kmem_bootstrap_free().
 	 */
 	vm_phys_early_add_seg((vm_paddr_t)kernphys, trunc_page(first));
 
 	bzero(physmap, sizeof(physmap));
 	physmap_idx = 0;
 
 	init_ops.parse_memmap(kmdp, physmap, &physmap_idx);
 	physmap_idx -= 2;
 
 	/*
 	 * Find the 'base memory' segment for SMP
 	 */
 	basemem = 0;
 	for (i = 0; i <= physmap_idx; i += 2) {
 		if (physmap[i] <= 0xA0000) {
 			basemem = physmap[i + 1] / 1024;
 			break;
 		}
 	}
 	if (basemem == 0 || basemem > 640) {
 		if (bootverbose)
 			printf(
 		"Memory map doesn't contain a basemem segment, faking it");
 		basemem = 640;
 	}
 
 	/*
 	 * Maxmem isn't the "maximum memory", it's one larger than the
 	 * highest page of the physical address space.  It should be
 	 * called something like "Maxphyspage".  We may adjust this
 	 * based on ``hw.physmem'' and the results of the memory test.
 	 */
 	Maxmem = atop(physmap[physmap_idx + 1]);
 
 #ifdef MAXMEM
 	Maxmem = MAXMEM / 4;
 #endif
 
 	if (TUNABLE_ULONG_FETCH("hw.physmem", &physmem_tunable))
 		Maxmem = atop(physmem_tunable);
 
 	/*
 	 * The boot memory test is disabled by default, as it takes a
 	 * significant amount of time on large-memory systems, and is
 	 * unfriendly to virtual machines as it unnecessarily touches all
 	 * pages.
 	 *
 	 * A general name is used as the code may be extended to support
 	 * additional tests beyond the current "page present" test.
 	 */
 	memtest = 0;
 	TUNABLE_ULONG_FETCH("hw.memtest.tests", &memtest);
 
 	/*
 	 * Don't allow MAXMEM or hw.physmem to extend the amount of memory
 	 * in the system.
 	 */
 	if (Maxmem > atop(physmap[physmap_idx + 1]))
 		Maxmem = atop(physmap[physmap_idx + 1]);
 
 	if (atop(physmap[physmap_idx + 1]) != Maxmem &&
 	    (boothowto & RB_VERBOSE))
 		printf("Physical memory use set to %ldK\n", Maxmem * 4);
 
 	/* call pmap initialization to make new kernel address space */
 	pmap_bootstrap(&first);
 
 	/*
 	 * Size up each available chunk of physical memory.
 	 *
 	 * XXX Some BIOSes corrupt low 64KB between suspend and resume.
 	 * By default, mask off the first 16 pages unless we appear to be
 	 * running in a VM.
 	 */
 	physmem_start = (vm_guest > VM_GUEST_NO ? 1 : 16) << PAGE_SHIFT;
 	TUNABLE_ULONG_FETCH("hw.physmem.start", &physmem_start);
 	if (physmap[0] < physmem_start) {
 		if (physmem_start < PAGE_SIZE)
 			physmap[0] = PAGE_SIZE;
 		else if (physmem_start >= physmap[1])
 			physmap[0] = round_page(physmap[1] - PAGE_SIZE);
 		else
 			physmap[0] = round_page(physmem_start);
 	}
 	pa_indx = 0;
 	da_indx = 1;
 	phys_avail[pa_indx++] = physmap[0];
 	phys_avail[pa_indx] = physmap[0];
 	dump_avail[da_indx] = physmap[0];
 	pte = CMAP1;
 
 	/*
 	 * Get dcons buffer address
 	 */
 	if (getenv_quad("dcons.addr", &dcons_addr) == 0 ||
 	    getenv_quad("dcons.size", &dcons_size) == 0)
 		dcons_addr = 0;
 
 	/*
 	 * physmap is in bytes, so when converting to page boundaries,
 	 * round up the start address and round down the end address.
 	 */
 	page_counter = 0;
 	if (memtest != 0)
 		printf("Testing system memory");
 	for (i = 0; i <= physmap_idx; i += 2) {
 		vm_paddr_t end;
 
 		end = ptoa((vm_paddr_t)Maxmem);
 		if (physmap[i + 1] < end)
 			end = trunc_page(physmap[i + 1]);
 		for (pa = round_page(physmap[i]); pa < end; pa += PAGE_SIZE) {
 			int tmp, page_bad, full;
 			int *ptr = (int *)CADDR1;
 
 			full = FALSE;
 			/*
 			 * block out kernel memory as not available.
 			 */
 			if (pa >= (vm_paddr_t)kernphys && pa < first)
 				goto do_dump_avail;
 
 			/*
 			 * block out dcons buffer
 			 */
 			if (dcons_addr > 0
 			    && pa >= trunc_page(dcons_addr)
 			    && pa < dcons_addr + dcons_size)
 				goto do_dump_avail;
 
 			page_bad = FALSE;
 			if (memtest == 0)
 				goto skip_memtest;
 
 			/*
 			 * Print a "." every GB to show we're making
 			 * progress.
 			 */
 			page_counter++;
 			if ((page_counter % PAGES_PER_GB) == 0)
 				printf(".");
 
 			/*
 			 * map page into kernel: valid, read/write,non-cacheable
 			 */
 			*pte = pa | PG_V | PG_RW | PG_NC_PWT | PG_NC_PCD;
 			invltlb();
 
 			tmp = *(int *)ptr;
 			/*
 			 * Test for alternating 1's and 0's
 			 */
 			*(volatile int *)ptr = 0xaaaaaaaa;
 			if (*(volatile int *)ptr != 0xaaaaaaaa)
 				page_bad = TRUE;
 			/*
 			 * Test for alternating 0's and 1's
 			 */
 			*(volatile int *)ptr = 0x55555555;
 			if (*(volatile int *)ptr != 0x55555555)
 				page_bad = TRUE;
 			/*
 			 * Test for all 1's
 			 */
 			*(volatile int *)ptr = 0xffffffff;
 			if (*(volatile int *)ptr != 0xffffffff)
 				page_bad = TRUE;
 			/*
 			 * Test for all 0's
 			 */
 			*(volatile int *)ptr = 0x0;
 			if (*(volatile int *)ptr != 0x0)
 				page_bad = TRUE;
 			/*
 			 * Restore original value.
 			 */
 			*(int *)ptr = tmp;
 
 skip_memtest:
 			/*
 			 * Adjust array of valid/good pages.
 			 */
 			if (page_bad == TRUE)
 				continue;
 			/*
 			 * If this good page is a continuation of the
 			 * previous set of good pages, then just increase
 			 * the end pointer. Otherwise start a new chunk.
 			 * Note that "end" points one higher than end,
 			 * making the range >= start and < end.
 			 * If we're also doing a speculative memory
 			 * test and we at or past the end, bump up Maxmem
 			 * so that we keep going. The first bad page
 			 * will terminate the loop.
 			 */
 			if (phys_avail[pa_indx] == pa) {
 				phys_avail[pa_indx] += PAGE_SIZE;
 			} else {
 				pa_indx++;
 				if (pa_indx == PHYS_AVAIL_ENTRIES) {
 					printf(
 		"Too many holes in the physical address space, giving up\n");
 					pa_indx--;
 					full = TRUE;
 					goto do_dump_avail;
 				}
 				phys_avail[pa_indx++] = pa;	/* start */
 				phys_avail[pa_indx] = pa + PAGE_SIZE; /* end */
 			}
 			physmem++;
 do_dump_avail:
 			if (dump_avail[da_indx] == pa) {
 				dump_avail[da_indx] += PAGE_SIZE;
 			} else {
 				da_indx++;
 				if (da_indx == PHYS_AVAIL_ENTRIES) {
 					da_indx--;
 					goto do_next;
 				}
 				dump_avail[da_indx++] = pa; /* start */
 				dump_avail[da_indx] = pa + PAGE_SIZE; /* end */
 			}
 do_next:
 			if (full)
 				break;
 		}
 	}
 	*pte = 0;
 	invltlb();
 	if (memtest != 0)
 		printf("\n");
 
 	/*
 	 * XXX
 	 * The last chunk must contain at least one page plus the message
 	 * buffer to avoid complicating other code (message buffer address
 	 * calculation, etc.).
 	 */
 	while (phys_avail[pa_indx - 1] + PAGE_SIZE +
 	    round_page(msgbufsize) >= phys_avail[pa_indx]) {
 		physmem -= atop(phys_avail[pa_indx] - phys_avail[pa_indx - 1]);
 		phys_avail[pa_indx--] = 0;
 		phys_avail[pa_indx--] = 0;
 	}
 
 	Maxmem = atop(phys_avail[pa_indx]);
 
 	/* Trim off space for the message buffer. */
 	phys_avail[pa_indx] -= round_page(msgbufsize);
 
 	/* Map the message buffer. */
 	msgbufp = (struct msgbuf *)PHYS_TO_DMAP(phys_avail[pa_indx]);
 }
 
 static caddr_t
 native_parse_preload_data(u_int64_t modulep)
 {
 	caddr_t kmdp;
 	char *envp;
 #ifdef DDB
 	vm_offset_t ksym_start;
 	vm_offset_t ksym_end;
 #endif
 
 	preload_metadata = (caddr_t)(uintptr_t)(modulep + KERNBASE);
 	preload_bootstrap_relocate(KERNBASE);
 	kmdp = preload_search_by_type("elf kernel");
 	if (kmdp == NULL)
 		kmdp = preload_search_by_type("elf64 kernel");
 	boothowto = MD_FETCH(kmdp, MODINFOMD_HOWTO, int);
 	envp = MD_FETCH(kmdp, MODINFOMD_ENVP, char *);
 	if (envp != NULL)
 		envp += KERNBASE;
 	init_static_kenv(envp, 0);
 #ifdef DDB
 	ksym_start = MD_FETCH(kmdp, MODINFOMD_SSYM, uintptr_t);
 	ksym_end = MD_FETCH(kmdp, MODINFOMD_ESYM, uintptr_t);
 	db_fetch_ksymtab(ksym_start, ksym_end, 0);
 #endif
 	efi_systbl_phys = MD_FETCH(kmdp, MODINFOMD_FW_HANDLE, vm_paddr_t);
 
 	return (kmdp);
 }
 
 static void
 amd64_kdb_init(void)
 {
 	kdb_init();
 #ifdef KDB
 	if (boothowto & RB_KDB)
 		kdb_enter(KDB_WHY_BOOTFLAGS, "Boot flags requested debugger");
 #endif
 }
 
 /* Set up the fast syscall stuff */
 void
 amd64_conf_fast_syscall(void)
 {
 	uint64_t msr;
 
 	msr = rdmsr(MSR_EFER) | EFER_SCE;
 	wrmsr(MSR_EFER, msr);
 	wrmsr(MSR_LSTAR, pti ? (u_int64_t)IDTVEC(fast_syscall_pti) :
 	    (u_int64_t)IDTVEC(fast_syscall));
 	wrmsr(MSR_CSTAR, (u_int64_t)IDTVEC(fast_syscall32));
 	msr = ((u_int64_t)GSEL(GCODE_SEL, SEL_KPL) << 32) |
 	    ((u_int64_t)GSEL(GUCODE32_SEL, SEL_UPL) << 48);
 	wrmsr(MSR_STAR, msr);
 	wrmsr(MSR_SF_MASK, PSL_NT | PSL_T | PSL_I | PSL_C | PSL_D | PSL_AC);
 }
 
 void
 amd64_bsp_pcpu_init1(struct pcpu *pc)
 {
 	struct user_segment_descriptor *gdt;
 
 	PCPU_SET(prvspace, pc);
 	gdt = *PCPU_PTR(gdt);
 	PCPU_SET(curthread, &thread0);
 	PCPU_SET(tssp, PCPU_PTR(common_tss));
 	PCPU_SET(tss, (struct system_segment_descriptor *)&gdt[GPROC0_SEL]);
 	PCPU_SET(ldt, (struct system_segment_descriptor *)&gdt[GUSERLDT_SEL]);
 	PCPU_SET(fs32p, &gdt[GUFS32_SEL]);
 	PCPU_SET(gs32p, &gdt[GUGS32_SEL]);
 	PCPU_SET(ucr3_load_mask, PMAP_UCR3_NOMASK);
 	PCPU_SET(smp_tlb_gen, 1);
 }
 
 void
 amd64_bsp_pcpu_init2(uint64_t rsp0)
 {
 
 	PCPU_SET(rsp0, rsp0);
 	PCPU_SET(pti_rsp0, ((vm_offset_t)PCPU_PTR(pti_stack) +
 	    PC_PTI_STACK_SZ * sizeof(uint64_t)) & ~0xful);
 	PCPU_SET(curpcb, thread0.td_pcb);
 }
 
 void
 amd64_bsp_ist_init(struct pcpu *pc)
 {
 	struct nmi_pcpu *np;
 	struct amd64tss *tssp;
 
 	tssp = &pc->pc_common_tss;
 
 	/* doublefault stack space, runs on ist1 */
 	np = ((struct nmi_pcpu *)&dblfault_stack[sizeof(dblfault_stack)]) - 1;
 	np->np_pcpu = (register_t)pc;
 	tssp->tss_ist1 = (long)np;
 
 	/*
 	 * NMI stack, runs on ist2.  The pcpu pointer is stored just
 	 * above the start of the ist2 stack.
 	 */
 	np = ((struct nmi_pcpu *)&nmi0_stack[sizeof(nmi0_stack)]) - 1;
 	np->np_pcpu = (register_t)pc;
 	tssp->tss_ist2 = (long)np;
 
 	/*
 	 * MC# stack, runs on ist3.  The pcpu pointer is stored just
 	 * above the start of the ist3 stack.
 	 */
 	np = ((struct nmi_pcpu *)&mce0_stack[sizeof(mce0_stack)]) - 1;
 	np->np_pcpu = (register_t)pc;
 	tssp->tss_ist3 = (long)np;
 
 	/*
 	 * DB# stack, runs on ist4.
 	 */
 	np = ((struct nmi_pcpu *)&dbg0_stack[sizeof(dbg0_stack)]) - 1;
 	np->np_pcpu = (register_t)pc;
 	tssp->tss_ist4 = (long)np;
 }
 
 u_int64_t
 hammer_time(u_int64_t modulep, u_int64_t physfree)
 {
 	caddr_t kmdp;
 	int gsel_tss, x;
 	struct pcpu *pc;
 	struct xstate_hdr *xhdr;
 	uint64_t cr3, rsp0;
 	pml4_entry_t *pml4e;
 	pdp_entry_t *pdpe;
 	pd_entry_t *pde;
 	char *env;
 	struct user_segment_descriptor *gdt;
 	struct region_descriptor r_gdt;
 	size_t kstack0_sz;
 	int late_console;
 
 	TSRAW(&thread0, TS_ENTER, __func__, NULL);
 
 	/*
 	 * Calculate kernphys by inspecting page table created by loader.
 	 * The assumptions:
 	 * - kernel is mapped at KERNBASE, backed by contiguous phys memory
 	 *   aligned at 2M, below 4G (the latter is important for AP startup)
 	 * - there is a 2M hole at KERNBASE
 	 * - kernel is mapped with 2M superpages
 	 * - all participating memory, i.e. kernel, modules, metadata,
 	 *   page table is accessible by pre-created 1:1 mapping
 	 *   (right now loader creates 1:1 mapping for lower 4G, and all
 	 *   memory is from there)
 	 * - there is a usable memory block right after the end of the
 	 *   mapped kernel and all modules/metadata, pointed to by
 	 *   physfree, for early allocations
 	 */
 	cr3 = rcr3();
 	pml4e = (pml4_entry_t *)(cr3 & ~PAGE_MASK) + pmap_pml4e_index(
 	    (vm_offset_t)hammer_time);
 	pdpe = (pdp_entry_t *)(*pml4e & ~PAGE_MASK) + pmap_pdpe_index(
 	    (vm_offset_t)hammer_time);
 	pde = (pd_entry_t *)(*pdpe & ~PAGE_MASK) + pmap_pde_index(
 	    (vm_offset_t)hammer_time);
 	kernphys = (vm_paddr_t)(*pde & ~PDRMASK) -
 	    (vm_paddr_t)(((vm_offset_t)hammer_time - KERNBASE) & ~PDRMASK);
 
 	/* Fix-up for 2M hole */
 	physfree += kernphys;
 	kernphys += NBPDR;
 
 	kmdp = init_ops.parse_preload_data(modulep);
 
 	efi_boot = preload_search_info(kmdp, MODINFO_METADATA |
 	    MODINFOMD_EFI_MAP) != NULL;
 
 	if (!efi_boot) {
 		/* Tell the bios to warmboot next time */
 		atomic_store_short((u_short *)0x472, 0x1234);
 	}
 
 	physfree += ucode_load_bsp(physfree + KERNBASE);
 	physfree = roundup2(physfree, PAGE_SIZE);
 
 	identify_cpu1();
 	identify_hypervisor();
 	identify_cpu_fixup_bsp();
 	identify_cpu2();
 	initializecpucache();
 
 	/*
 	 * Check for pti, pcid, and invpcid before ifuncs are
 	 * resolved, to correctly select the implementation for
 	 * pmap_activate_sw_mode().
 	 */
 	pti = pti_get_default();
 	TUNABLE_INT_FETCH("vm.pmap.pti", &pti);
 	TUNABLE_INT_FETCH("vm.pmap.pcid_enabled", &pmap_pcid_enabled);
 	if ((cpu_feature2 & CPUID2_PCID) != 0 && pmap_pcid_enabled) {
 		invpcid_works = (cpu_stdext_feature &
 		    CPUID_STDEXT_INVPCID) != 0;
 	} else {
 		pmap_pcid_enabled = 0;
 	}
 
 	link_elf_ireloc(kmdp);
 
 	/*
 	 * This may be done better later if it gets more high level
 	 * components in it. If so just link td->td_proc here.
 	 */
 	proc_linkup0(&proc0, &thread0);
 
 	/* Init basic tunables, hz etc */
 	init_param1();
 
 	thread0.td_kstack = physfree - kernphys + KERNSTART;
 	thread0.td_kstack_pages = kstack_pages;
 	kstack0_sz = thread0.td_kstack_pages * PAGE_SIZE;
 	bzero((void *)thread0.td_kstack, kstack0_sz);
 	physfree += kstack0_sz;
 
 	/*
 	 * Initialize enough of thread0 for delayed invalidation to
 	 * work very early.  Rely on thread0.td_base_pri
 	 * zero-initialization, it is reset to PVM at proc0_init().
 	 */
 	pmap_thread_init_invl_gen(&thread0);
 
 	pc = &temp_bsp_pcpu;
 	pcpu_init(pc, 0, sizeof(struct pcpu));
 	gdt = &temp_bsp_pcpu.pc_gdt[0];
 
 	/*
 	 * make gdt memory segments
 	 */
 	for (x = 0; x < NGDT; x++) {
 		if (x != GPROC0_SEL && x != (GPROC0_SEL + 1) &&
 		    x != GUSERLDT_SEL && x != (GUSERLDT_SEL) + 1)
 			ssdtosd(&gdt_segs[x], &gdt[x]);
 	}
 	gdt_segs[GPROC0_SEL].ssd_base = (uintptr_t)&pc->pc_common_tss;
 	ssdtosyssd(&gdt_segs[GPROC0_SEL],
 	    (struct system_segment_descriptor *)&gdt[GPROC0_SEL]);
 
 	r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1;
 	r_gdt.rd_base = (long)gdt;
 	lgdt(&r_gdt);
 
 	wrmsr(MSR_FSBASE, 0);		/* User value */
 	wrmsr(MSR_GSBASE, (u_int64_t)pc);
 	wrmsr(MSR_KGSBASE, 0);		/* User value while in the kernel */
 
 	dpcpu_init((void *)(physfree - kernphys + KERNSTART), 0);
 	physfree += DPCPU_SIZE;
 	amd64_bsp_pcpu_init1(pc);
 	/* Non-late cninit() and printf() can be moved up to here. */
 
 	/*
 	 * Initialize mutexes.
 	 *
 	 * icu_lock: in order to allow an interrupt to occur in a critical
 	 * 	     section, to set pcpu->ipending (etc...) properly, we
 	 *	     must be able to get the icu lock, so it can't be
 	 *	     under witness.
 	 */
 	mutex_init();
 	mtx_init(&icu_lock, "icu", NULL, MTX_SPIN | MTX_NOWITNESS);
 	mtx_init(&dt_lock, "descriptor tables", NULL, MTX_DEF);
 
 	/* exceptions */
 	for (x = 0; x < NIDT; x++)
 		setidt(x, pti ? &IDTVEC(rsvd_pti) : &IDTVEC(rsvd), SDT_SYSIGT,
 		    SEL_KPL, 0);
 	setidt(IDT_DE, pti ? &IDTVEC(div_pti) : &IDTVEC(div), SDT_SYSIGT,
 	    SEL_KPL, 0);
 	setidt(IDT_DB, &IDTVEC(dbg), SDT_SYSIGT, SEL_KPL, 4);
 	setidt(IDT_NMI, &IDTVEC(nmi),  SDT_SYSIGT, SEL_KPL, 2);
 	setidt(IDT_BP, pti ? &IDTVEC(bpt_pti) : &IDTVEC(bpt), SDT_SYSIGT,
 	    SEL_UPL, 0);
 	setidt(IDT_OF, pti ? &IDTVEC(ofl_pti) : &IDTVEC(ofl), SDT_SYSIGT,
 	    SEL_UPL, 0);
 	setidt(IDT_BR, pti ? &IDTVEC(bnd_pti) : &IDTVEC(bnd), SDT_SYSIGT,
 	    SEL_KPL, 0);
 	setidt(IDT_UD, pti ? &IDTVEC(ill_pti) : &IDTVEC(ill), SDT_SYSIGT,
 	    SEL_KPL, 0);
 	setidt(IDT_NM, pti ? &IDTVEC(dna_pti) : &IDTVEC(dna), SDT_SYSIGT,
 	    SEL_KPL, 0);
 	setidt(IDT_DF, &IDTVEC(dblfault), SDT_SYSIGT, SEL_KPL, 1);
 	setidt(IDT_FPUGP, pti ? &IDTVEC(fpusegm_pti) : &IDTVEC(fpusegm),
 	    SDT_SYSIGT, SEL_KPL, 0);
 	setidt(IDT_TS, pti ? &IDTVEC(tss_pti) : &IDTVEC(tss), SDT_SYSIGT,
 	    SEL_KPL, 0);
 	setidt(IDT_NP, pti ? &IDTVEC(missing_pti) : &IDTVEC(missing),
 	    SDT_SYSIGT, SEL_KPL, 0);
 	setidt(IDT_SS, pti ? &IDTVEC(stk_pti) : &IDTVEC(stk), SDT_SYSIGT,
 	    SEL_KPL, 0);
 	setidt(IDT_GP, pti ? &IDTVEC(prot_pti) : &IDTVEC(prot), SDT_SYSIGT,
 	    SEL_KPL, 0);
 	setidt(IDT_PF, pti ? &IDTVEC(page_pti) : &IDTVEC(page), SDT_SYSIGT,
 	    SEL_KPL, 0);
 	setidt(IDT_MF, pti ? &IDTVEC(fpu_pti) : &IDTVEC(fpu), SDT_SYSIGT,
 	    SEL_KPL, 0);
 	setidt(IDT_AC, pti ? &IDTVEC(align_pti) : &IDTVEC(align), SDT_SYSIGT,
 	    SEL_KPL, 0);
 	setidt(IDT_MC, &IDTVEC(mchk), SDT_SYSIGT, SEL_KPL, 3);
 	setidt(IDT_XF, pti ? &IDTVEC(xmm_pti) : &IDTVEC(xmm), SDT_SYSIGT,
 	    SEL_KPL, 0);
 #ifdef KDTRACE_HOOKS
 	setidt(IDT_DTRACE_RET, pti ? &IDTVEC(dtrace_ret_pti) :
 	    &IDTVEC(dtrace_ret), SDT_SYSIGT, SEL_UPL, 0);
 #endif
 #ifdef XENHVM
 	setidt(IDT_EVTCHN, pti ? &IDTVEC(xen_intr_upcall_pti) :
 	    &IDTVEC(xen_intr_upcall), SDT_SYSIGT, SEL_KPL, 0);
 #endif
 	r_idt.rd_limit = sizeof(idt0) - 1;
 	r_idt.rd_base = (long) idt;
 	lidt(&r_idt);
 
 	/*
 	 * Initialize the clock before the console so that console
 	 * initialization can use DELAY().
 	 */
 	clock_init();
 
 	/*
 	 * Use vt(4) by default for UEFI boot (during the sc(4)/vt(4)
 	 * transition).
 	 * Once bootblocks have updated, we can test directly for
 	 * efi_systbl != NULL here...
 	 */
 	if (efi_boot)
 		vty_set_preferred(VTY_VT);
 
 	TUNABLE_INT_FETCH("hw.ibrs_disable", &hw_ibrs_disable);
 	TUNABLE_INT_FETCH("machdep.mitigations.ibrs.disable", &hw_ibrs_disable);
 
 	TUNABLE_INT_FETCH("hw.spec_store_bypass_disable", &hw_ssb_disable);
 	TUNABLE_INT_FETCH("machdep.mitigations.ssb.disable", &hw_ssb_disable);
 
 	TUNABLE_INT_FETCH("machdep.syscall_ret_l1d_flush",
 	    &syscall_ret_l1d_flush_mode);
 
 	TUNABLE_INT_FETCH("hw.mds_disable", &hw_mds_disable);
 	TUNABLE_INT_FETCH("machdep.mitigations.mds.disable", &hw_mds_disable);
 
 	TUNABLE_INT_FETCH("machdep.mitigations.taa.enable", &x86_taa_enable);
 
 	TUNABLE_INT_FETCH("machdep.mitigations.rndgs.enable",
 	    &x86_rngds_mitg_enable);
 
 	finishidentcpu();	/* Final stage of CPU initialization */
 	initializecpu();	/* Initialize CPU registers */
 
 	amd64_bsp_ist_init(pc);
 
 	/* Set the IO permission bitmap (empty due to tss seg limit) */
 	pc->pc_common_tss.tss_iobase = sizeof(struct amd64tss) +
 	    IOPERM_BITMAP_SIZE;
 
 	gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
 	ltr(gsel_tss);
 
 	amd64_conf_fast_syscall();
 
 	/*
 	 * We initialize the PCB pointer early so that exception
 	 * handlers will work.  Also set up td_critnest to short-cut
 	 * the page fault handler.
 	 */
 	cpu_max_ext_state_size = sizeof(struct savefpu);
 	set_top_of_stack_td(&thread0);
 	thread0.td_pcb = get_pcb_td(&thread0);
 	thread0.td_critnest = 1;
 
 	/*
 	 * The console and kdb should be initialized even earlier than here,
 	 * but some console drivers don't work until after getmemsize().
 	 * Default to late console initialization to support these drivers.
 	 * This loses mainly printf()s in getmemsize() and early debugging.
 	 */
 	late_console = 1;
 	TUNABLE_INT_FETCH("debug.late_console", &late_console);
 	if (!late_console) {
 		cninit();
 		amd64_kdb_init();
 	}
 
 	getmemsize(kmdp, physfree);
 	init_param2(physmem);
 
 	/* now running on new page tables, configured,and u/iom is accessible */
 
 #ifdef DEV_PCI
         /* This call might adjust phys_avail[]. */
         pci_early_quirks();
 #endif
 
 	if (late_console)
 		cninit();
 
 	/*
 	 * Dump the boot metadata. We have to wait for cninit() since console
 	 * output is required. If it's grossly incorrect the kernel will never
 	 * make it this far.
 	 */
 	if (getenv_is_true("debug.dump_modinfo_at_boot"))
 		preload_dump();
 
 #ifdef DEV_ISA
 #ifdef DEV_ATPIC
 	elcr_probe();
 	atpic_startup();
 #else
 	/* Reset and mask the atpics and leave them shut down. */
 	atpic_reset();
 
 	/*
 	 * Point the ICU spurious interrupt vectors at the APIC spurious
 	 * interrupt handler.
 	 */
 	setidt(IDT_IO_INTS + 7, IDTVEC(spuriousint), SDT_SYSIGT, SEL_KPL, 0);
 	setidt(IDT_IO_INTS + 15, IDTVEC(spuriousint), SDT_SYSIGT, SEL_KPL, 0);
 #endif
 #else
 #error "have you forgotten the isa device?"
 #endif
 
 	if (late_console)
 		amd64_kdb_init();
 
 	msgbufinit(msgbufp, msgbufsize);
 	fpuinit();
 
 	/*
 	 * Reinitialize thread0's stack base now that the xsave area size is
 	 * known.  Set up thread0's pcb save area after fpuinit calculated fpu
 	 * save area size.  Zero out the extended state header in fpu save area.
 	 */
 	set_top_of_stack_td(&thread0);
 	thread0.td_pcb->pcb_save = get_pcb_user_save_td(&thread0);
 	bzero(thread0.td_pcb->pcb_save, cpu_max_ext_state_size);
 	if (use_xsave) {
 		xhdr = (struct xstate_hdr *)(get_pcb_user_save_td(&thread0) +
 		    1);
 		xhdr->xstate_bv = xsave_mask;
 	}
 	/* make an initial tss so cpu can get interrupt stack on syscall! */
 	rsp0 = thread0.td_md.md_stack_base;
 	/* Ensure the stack is aligned to 16 bytes */
 	rsp0 &= ~0xFul;
 	PCPU_PTR(common_tss)->tss_rsp0 = rsp0;
 	amd64_bsp_pcpu_init2(rsp0);
 
 	/* transfer to user mode */
 
 	_ucodesel = GSEL(GUCODE_SEL, SEL_UPL);
 	_udatasel = GSEL(GUDATA_SEL, SEL_UPL);
 	_ucode32sel = GSEL(GUCODE32_SEL, SEL_UPL);
 	_ufssel = GSEL(GUFS32_SEL, SEL_UPL);
 	_ugssel = GSEL(GUGS32_SEL, SEL_UPL);
 
 	load_ds(_udatasel);
 	load_es(_udatasel);
 	load_fs(_ufssel);
 
 	/* setup proc 0's pcb */
 	thread0.td_pcb->pcb_flags = 0;
 	thread0.td_frame = &proc0_tf;
 
         env = kern_getenv("kernelname");
 	if (env != NULL)
 		strlcpy(kernelname, env, sizeof(kernelname));
 
 	kcsan_cpu_init(0);
 
 #ifdef FDT
 	x86_init_fdt();
 #endif
 	thread0.td_critnest = 0;
 
 	kasan_init();
 	kmsan_init();
 
 	TSEXIT();
 
 	/* Location of kernel stack for locore */
 	return (thread0.td_md.md_stack_base);
 }
 
 void
 cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size)
 {
 
 	pcpu->pc_acpi_id = 0xffffffff;
 }
 
 static int
 smap_sysctl_handler(SYSCTL_HANDLER_ARGS)
 {
 	struct bios_smap *smapbase;
 	struct bios_smap_xattr smap;
 	caddr_t kmdp;
 	uint32_t *smapattr;
 	int count, error, i;
 
 	/* Retrieve the system memory map from the loader. */
 	kmdp = preload_search_by_type("elf kernel");
 	if (kmdp == NULL)
 		kmdp = preload_search_by_type("elf64 kernel");
 	smapbase = (struct bios_smap *)preload_search_info(kmdp,
 	    MODINFO_METADATA | MODINFOMD_SMAP);
 	if (smapbase == NULL)
 		return (0);
 	smapattr = (uint32_t *)preload_search_info(kmdp,
 	    MODINFO_METADATA | MODINFOMD_SMAP_XATTR);
 	count = *((uint32_t *)smapbase - 1) / sizeof(*smapbase);
 	error = 0;
 	for (i = 0; i < count; i++) {
 		smap.base = smapbase[i].base;
 		smap.length = smapbase[i].length;
 		smap.type = smapbase[i].type;
 		if (smapattr != NULL)
 			smap.xattr = smapattr[i];
 		else
 			smap.xattr = 0;
 		error = SYSCTL_OUT(req, &smap, sizeof(smap));
 	}
 	return (error);
 }
 SYSCTL_PROC(_machdep, OID_AUTO, smap,
     CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0,
     smap_sysctl_handler, "S,bios_smap_xattr",
     "Raw BIOS SMAP data");
 
 static int
 efi_map_sysctl_handler(SYSCTL_HANDLER_ARGS)
 {
 	struct efi_map_header *efihdr;
 	caddr_t kmdp;
 	uint32_t efisize;
 
 	kmdp = preload_search_by_type("elf kernel");
 	if (kmdp == NULL)
 		kmdp = preload_search_by_type("elf64 kernel");
 	efihdr = (struct efi_map_header *)preload_search_info(kmdp,
 	    MODINFO_METADATA | MODINFOMD_EFI_MAP);
 	if (efihdr == NULL)
 		return (0);
 	efisize = *((uint32_t *)efihdr - 1);
 	return (SYSCTL_OUT(req, efihdr, efisize));
 }
 SYSCTL_PROC(_machdep, OID_AUTO, efi_map,
     CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0,
     efi_map_sysctl_handler, "S,efi_map_header",
     "Raw EFI Memory Map");
 
 void
 spinlock_enter(void)
 {
 	struct thread *td;
 	register_t flags;
 
 	td = curthread;
 	if (td->td_md.md_spinlock_count == 0) {
 		flags = intr_disable();
 		td->td_md.md_spinlock_count = 1;
 		td->td_md.md_saved_flags = flags;
 		critical_enter();
 	} else
 		td->td_md.md_spinlock_count++;
 }
 
 void
 spinlock_exit(void)
 {
 	struct thread *td;
 	register_t flags;
 
 	td = curthread;
 	flags = td->td_md.md_saved_flags;
 	td->td_md.md_spinlock_count--;
 	if (td->td_md.md_spinlock_count == 0) {
 		critical_exit();
 		intr_restore(flags);
 	}
 }
 
 /*
  * Construct a PCB from a trapframe. This is called from kdb_trap() where
  * we want to start a backtrace from the function that caused us to enter
  * the debugger. We have the context in the trapframe, but base the trace
  * on the PCB. The PCB doesn't have to be perfect, as long as it contains
  * enough for a backtrace.
  */
 void
 makectx(struct trapframe *tf, struct pcb *pcb)
 {
 
 	pcb->pcb_r12 = tf->tf_r12;
 	pcb->pcb_r13 = tf->tf_r13;
 	pcb->pcb_r14 = tf->tf_r14;
 	pcb->pcb_r15 = tf->tf_r15;
 	pcb->pcb_rbp = tf->tf_rbp;
 	pcb->pcb_rbx = tf->tf_rbx;
 	pcb->pcb_rip = tf->tf_rip;
 	pcb->pcb_rsp = tf->tf_rsp;
 }
 
 int
 ptrace_set_pc(struct thread *td, unsigned long addr)
 {
 
 	td->td_frame->tf_rip = addr;
 	set_pcb_flags(td->td_pcb, PCB_FULL_IRET);
 	return (0);
 }
 
 int
 ptrace_single_step(struct thread *td)
 {
 
 	PROC_LOCK_ASSERT(td->td_proc, MA_OWNED);
 	if ((td->td_frame->tf_rflags & PSL_T) == 0) {
 		td->td_frame->tf_rflags |= PSL_T;
 		td->td_dbgflags |= TDB_STEP;
 	}
 	return (0);
 }
 
 int
 ptrace_clear_single_step(struct thread *td)
 {
 
 	PROC_LOCK_ASSERT(td->td_proc, MA_OWNED);
 	td->td_frame->tf_rflags &= ~PSL_T;
 	td->td_dbgflags &= ~TDB_STEP;
 	return (0);
 }
 
 int
 fill_regs(struct thread *td, struct reg *regs)
 {
 	struct trapframe *tp;
 
 	tp = td->td_frame;
 	return (fill_frame_regs(tp, regs));
 }
 
 int
 fill_frame_regs(struct trapframe *tp, struct reg *regs)
 {
 
 	regs->r_r15 = tp->tf_r15;
 	regs->r_r14 = tp->tf_r14;
 	regs->r_r13 = tp->tf_r13;
 	regs->r_r12 = tp->tf_r12;
 	regs->r_r11 = tp->tf_r11;
 	regs->r_r10 = tp->tf_r10;
 	regs->r_r9  = tp->tf_r9;
 	regs->r_r8  = tp->tf_r8;
 	regs->r_rdi = tp->tf_rdi;
 	regs->r_rsi = tp->tf_rsi;
 	regs->r_rbp = tp->tf_rbp;
 	regs->r_rbx = tp->tf_rbx;
 	regs->r_rdx = tp->tf_rdx;
 	regs->r_rcx = tp->tf_rcx;
 	regs->r_rax = tp->tf_rax;
 	regs->r_rip = tp->tf_rip;
 	regs->r_cs = tp->tf_cs;
 	regs->r_rflags = tp->tf_rflags;
 	regs->r_rsp = tp->tf_rsp;
 	regs->r_ss = tp->tf_ss;
 	if (tp->tf_flags & TF_HASSEGS) {
 		regs->r_ds = tp->tf_ds;
 		regs->r_es = tp->tf_es;
 		regs->r_fs = tp->tf_fs;
 		regs->r_gs = tp->tf_gs;
 	} else {
 		regs->r_ds = 0;
 		regs->r_es = 0;
 		regs->r_fs = 0;
 		regs->r_gs = 0;
 	}
 	regs->r_err = 0;
 	regs->r_trapno = 0;
 	return (0);
 }
 
 int
 set_regs(struct thread *td, struct reg *regs)
 {
 	struct trapframe *tp;
 	register_t rflags;
 
 	tp = td->td_frame;
 	rflags = regs->r_rflags & 0xffffffff;
 	if (!EFL_SECURE(rflags, tp->tf_rflags) || !CS_SECURE(regs->r_cs))
 		return (EINVAL);
 	tp->tf_r15 = regs->r_r15;
 	tp->tf_r14 = regs->r_r14;
 	tp->tf_r13 = regs->r_r13;
 	tp->tf_r12 = regs->r_r12;
 	tp->tf_r11 = regs->r_r11;
 	tp->tf_r10 = regs->r_r10;
 	tp->tf_r9  = regs->r_r9;
 	tp->tf_r8  = regs->r_r8;
 	tp->tf_rdi = regs->r_rdi;
 	tp->tf_rsi = regs->r_rsi;
 	tp->tf_rbp = regs->r_rbp;
 	tp->tf_rbx = regs->r_rbx;
 	tp->tf_rdx = regs->r_rdx;
 	tp->tf_rcx = regs->r_rcx;
 	tp->tf_rax = regs->r_rax;
 	tp->tf_rip = regs->r_rip;
 	tp->tf_cs = regs->r_cs;
 	tp->tf_rflags = rflags;
 	tp->tf_rsp = regs->r_rsp;
 	tp->tf_ss = regs->r_ss;
 	if (0) {	/* XXXKIB */
 		tp->tf_ds = regs->r_ds;
 		tp->tf_es = regs->r_es;
 		tp->tf_fs = regs->r_fs;
 		tp->tf_gs = regs->r_gs;
 		tp->tf_flags = TF_HASSEGS;
 	}
 	set_pcb_flags(td->td_pcb, PCB_FULL_IRET);
 	return (0);
 }
 
 /* XXX check all this stuff! */
 /* externalize from sv_xmm */
 static void
 fill_fpregs_xmm(struct savefpu *sv_xmm, struct fpreg *fpregs)
 {
 	struct envxmm *penv_fpreg = (struct envxmm *)&fpregs->fpr_env;
 	struct envxmm *penv_xmm = &sv_xmm->sv_env;
 	int i;
 
 	/* pcb -> fpregs */
 	bzero(fpregs, sizeof(*fpregs));
 
 	/* FPU control/status */
 	penv_fpreg->en_cw = penv_xmm->en_cw;
 	penv_fpreg->en_sw = penv_xmm->en_sw;
 	penv_fpreg->en_tw = penv_xmm->en_tw;
 	penv_fpreg->en_opcode = penv_xmm->en_opcode;
 	penv_fpreg->en_rip = penv_xmm->en_rip;
 	penv_fpreg->en_rdp = penv_xmm->en_rdp;
 	penv_fpreg->en_mxcsr = penv_xmm->en_mxcsr;
 	penv_fpreg->en_mxcsr_mask = penv_xmm->en_mxcsr_mask;
 
 	/* FPU registers */
 	for (i = 0; i < 8; ++i)
 		bcopy(sv_xmm->sv_fp[i].fp_acc.fp_bytes, fpregs->fpr_acc[i], 10);
 
 	/* SSE registers */
 	for (i = 0; i < 16; ++i)
 		bcopy(sv_xmm->sv_xmm[i].xmm_bytes, fpregs->fpr_xacc[i], 16);
 }
 
 /* internalize from fpregs into sv_xmm */
 static void
 set_fpregs_xmm(struct fpreg *fpregs, struct savefpu *sv_xmm)
 {
 	struct envxmm *penv_xmm = &sv_xmm->sv_env;
 	struct envxmm *penv_fpreg = (struct envxmm *)&fpregs->fpr_env;
 	int i;
 
 	/* fpregs -> pcb */
 	/* FPU control/status */
 	penv_xmm->en_cw = penv_fpreg->en_cw;
 	penv_xmm->en_sw = penv_fpreg->en_sw;
 	penv_xmm->en_tw = penv_fpreg->en_tw;
 	penv_xmm->en_opcode = penv_fpreg->en_opcode;
 	penv_xmm->en_rip = penv_fpreg->en_rip;
 	penv_xmm->en_rdp = penv_fpreg->en_rdp;
 	penv_xmm->en_mxcsr = penv_fpreg->en_mxcsr;
 	penv_xmm->en_mxcsr_mask = penv_fpreg->en_mxcsr_mask & cpu_mxcsr_mask;
 
 	/* FPU registers */
 	for (i = 0; i < 8; ++i)
 		bcopy(fpregs->fpr_acc[i], sv_xmm->sv_fp[i].fp_acc.fp_bytes, 10);
 
 	/* SSE registers */
 	for (i = 0; i < 16; ++i)
 		bcopy(fpregs->fpr_xacc[i], sv_xmm->sv_xmm[i].xmm_bytes, 16);
 }
 
 /* externalize from td->pcb */
 int
 fill_fpregs(struct thread *td, struct fpreg *fpregs)
 {
 
 	KASSERT(td == curthread || TD_IS_SUSPENDED(td) ||
 	    P_SHOULDSTOP(td->td_proc),
 	    ("not suspended thread %p", td));
 	fpugetregs(td);
 	fill_fpregs_xmm(get_pcb_user_save_td(td), fpregs);
 	return (0);
 }
 
 /* internalize to td->pcb */
 int
 set_fpregs(struct thread *td, struct fpreg *fpregs)
 {
 
 	critical_enter();
 	set_fpregs_xmm(fpregs, get_pcb_user_save_td(td));
 	fpuuserinited(td);
 	critical_exit();
 	return (0);
 }
 
 /*
  * Get machine context.
  */
 int
 get_mcontext(struct thread *td, mcontext_t *mcp, int flags)
 {
 	struct pcb *pcb;
 	struct trapframe *tp;
 
 	pcb = td->td_pcb;
 	tp = td->td_frame;
 	PROC_LOCK(curthread->td_proc);
 	mcp->mc_onstack = sigonstack(tp->tf_rsp);
 	PROC_UNLOCK(curthread->td_proc);
 	mcp->mc_r15 = tp->tf_r15;
 	mcp->mc_r14 = tp->tf_r14;
 	mcp->mc_r13 = tp->tf_r13;
 	mcp->mc_r12 = tp->tf_r12;
 	mcp->mc_r11 = tp->tf_r11;
 	mcp->mc_r10 = tp->tf_r10;
 	mcp->mc_r9  = tp->tf_r9;
 	mcp->mc_r8  = tp->tf_r8;
 	mcp->mc_rdi = tp->tf_rdi;
 	mcp->mc_rsi = tp->tf_rsi;
 	mcp->mc_rbp = tp->tf_rbp;
 	mcp->mc_rbx = tp->tf_rbx;
 	mcp->mc_rcx = tp->tf_rcx;
 	mcp->mc_rflags = tp->tf_rflags;
 	if (flags & GET_MC_CLEAR_RET) {
 		mcp->mc_rax = 0;
 		mcp->mc_rdx = 0;
 		mcp->mc_rflags &= ~PSL_C;
 	} else {
 		mcp->mc_rax = tp->tf_rax;
 		mcp->mc_rdx = tp->tf_rdx;
 	}
 	mcp->mc_rip = tp->tf_rip;
 	mcp->mc_cs = tp->tf_cs;
 	mcp->mc_rsp = tp->tf_rsp;
 	mcp->mc_ss = tp->tf_ss;
 	mcp->mc_ds = tp->tf_ds;
 	mcp->mc_es = tp->tf_es;
 	mcp->mc_fs = tp->tf_fs;
 	mcp->mc_gs = tp->tf_gs;
 	mcp->mc_flags = tp->tf_flags;
 	mcp->mc_len = sizeof(*mcp);
 	get_fpcontext(td, mcp, NULL, 0);
 	update_pcb_bases(pcb);
 	mcp->mc_fsbase = pcb->pcb_fsbase;
 	mcp->mc_gsbase = pcb->pcb_gsbase;
 	mcp->mc_xfpustate = 0;
 	mcp->mc_xfpustate_len = 0;
 	bzero(mcp->mc_spare, sizeof(mcp->mc_spare));
 	return (0);
 }
 
 /*
  * Set machine context.
  *
  * However, we don't set any but the user modifiable flags, and we won't
  * touch the cs selector.
  */
 int
 set_mcontext(struct thread *td, mcontext_t *mcp)
 {
 	struct pcb *pcb;
 	struct trapframe *tp;
 	char *xfpustate;
 	long rflags;
 	int ret;
 
 	pcb = td->td_pcb;
 	tp = td->td_frame;
 	if (mcp->mc_len != sizeof(*mcp) ||
 	    (mcp->mc_flags & ~_MC_FLAG_MASK) != 0)
 		return (EINVAL);
 	rflags = (mcp->mc_rflags & PSL_USERCHANGE) |
 	    (tp->tf_rflags & ~PSL_USERCHANGE);
 	if (mcp->mc_flags & _MC_HASFPXSTATE) {
 		if (mcp->mc_xfpustate_len > cpu_max_ext_state_size -
 		    sizeof(struct savefpu))
 			return (EINVAL);
 		xfpustate = __builtin_alloca(mcp->mc_xfpustate_len);
 		ret = copyin((void *)mcp->mc_xfpustate, xfpustate,
 		    mcp->mc_xfpustate_len);
 		if (ret != 0)
 			return (ret);
 	} else
 		xfpustate = NULL;
 	ret = set_fpcontext(td, mcp, xfpustate, mcp->mc_xfpustate_len);
 	if (ret != 0)
 		return (ret);
 	tp->tf_r15 = mcp->mc_r15;
 	tp->tf_r14 = mcp->mc_r14;
 	tp->tf_r13 = mcp->mc_r13;
 	tp->tf_r12 = mcp->mc_r12;
 	tp->tf_r11 = mcp->mc_r11;
 	tp->tf_r10 = mcp->mc_r10;
 	tp->tf_r9  = mcp->mc_r9;
 	tp->tf_r8  = mcp->mc_r8;
 	tp->tf_rdi = mcp->mc_rdi;
 	tp->tf_rsi = mcp->mc_rsi;
 	tp->tf_rbp = mcp->mc_rbp;
 	tp->tf_rbx = mcp->mc_rbx;
 	tp->tf_rdx = mcp->mc_rdx;
 	tp->tf_rcx = mcp->mc_rcx;
 	tp->tf_rax = mcp->mc_rax;
 	tp->tf_rip = mcp->mc_rip;
 	tp->tf_rflags = rflags;
 	tp->tf_rsp = mcp->mc_rsp;
 	tp->tf_ss = mcp->mc_ss;
 	tp->tf_flags = mcp->mc_flags;
 	if (tp->tf_flags & TF_HASSEGS) {
 		tp->tf_ds = mcp->mc_ds;
 		tp->tf_es = mcp->mc_es;
 		tp->tf_fs = mcp->mc_fs;
 		tp->tf_gs = mcp->mc_gs;
 	}
 	set_pcb_flags(pcb, PCB_FULL_IRET);
 	if (mcp->mc_flags & _MC_HASBASES) {
 		pcb->pcb_fsbase = mcp->mc_fsbase;
 		pcb->pcb_gsbase = mcp->mc_gsbase;
 	}
 	return (0);
 }
 
 static void
 get_fpcontext(struct thread *td, mcontext_t *mcp, char *xfpusave,
     size_t xfpusave_len)
 {
 	size_t max_len, len;
 
 	mcp->mc_ownedfp = fpugetregs(td);
 	bcopy(get_pcb_user_save_td(td), &mcp->mc_fpstate[0],
 	    sizeof(mcp->mc_fpstate));
 	mcp->mc_fpformat = fpuformat();
 	if (!use_xsave || xfpusave_len == 0)
 		return;
 	max_len = cpu_max_ext_state_size - sizeof(struct savefpu);
 	len = xfpusave_len;
 	if (len > max_len) {
 		len = max_len;
 		bzero(xfpusave + max_len, len - max_len);
 	}
 	mcp->mc_flags |= _MC_HASFPXSTATE;
 	mcp->mc_xfpustate_len = len;
 	bcopy(get_pcb_user_save_td(td) + 1, xfpusave, len);
 }
 
 static int
 set_fpcontext(struct thread *td, mcontext_t *mcp, char *xfpustate,
     size_t xfpustate_len)
 {
 	int error;
 
 	if (mcp->mc_fpformat == _MC_FPFMT_NODEV)
 		return (0);
 	else if (mcp->mc_fpformat != _MC_FPFMT_XMM)
 		return (EINVAL);
 	else if (mcp->mc_ownedfp == _MC_FPOWNED_NONE) {
 		/* We don't care what state is left in the FPU or PCB. */
 		fpstate_drop(td);
 		error = 0;
 	} else if (mcp->mc_ownedfp == _MC_FPOWNED_FPU ||
 	    mcp->mc_ownedfp == _MC_FPOWNED_PCB) {
 		error = fpusetregs(td, (struct savefpu *)&mcp->mc_fpstate,
 		    xfpustate, xfpustate_len);
 	} else
 		return (EINVAL);
 	return (error);
 }
 
 void
 fpstate_drop(struct thread *td)
 {
 
 	KASSERT(PCB_USER_FPU(td->td_pcb), ("fpstate_drop: kernel-owned fpu"));
 	critical_enter();
 	if (PCPU_GET(fpcurthread) == td)
 		fpudrop();
 	/*
 	 * XXX force a full drop of the fpu.  The above only drops it if we
 	 * owned it.
 	 *
 	 * XXX I don't much like fpugetuserregs()'s semantics of doing a full
 	 * drop.  Dropping only to the pcb matches fnsave's behaviour.
 	 * We only need to drop to !PCB_INITDONE in sendsig().  But
 	 * sendsig() is the only caller of fpugetuserregs()... perhaps we just
 	 * have too many layers.
 	 */
 	clear_pcb_flags(curthread->td_pcb,
 	    PCB_FPUINITDONE | PCB_USERFPUINITDONE);
 	critical_exit();
 }
 
 int
 fill_dbregs(struct thread *td, struct dbreg *dbregs)
 {
 	struct pcb *pcb;
 
 	if (td == NULL) {
 		dbregs->dr[0] = rdr0();
 		dbregs->dr[1] = rdr1();
 		dbregs->dr[2] = rdr2();
 		dbregs->dr[3] = rdr3();
 		dbregs->dr[6] = rdr6();
 		dbregs->dr[7] = rdr7();
 	} else {
 		pcb = td->td_pcb;
 		dbregs->dr[0] = pcb->pcb_dr0;
 		dbregs->dr[1] = pcb->pcb_dr1;
 		dbregs->dr[2] = pcb->pcb_dr2;
 		dbregs->dr[3] = pcb->pcb_dr3;
 		dbregs->dr[6] = pcb->pcb_dr6;
 		dbregs->dr[7] = pcb->pcb_dr7;
 	}
 	dbregs->dr[4] = 0;
 	dbregs->dr[5] = 0;
 	dbregs->dr[8] = 0;
 	dbregs->dr[9] = 0;
 	dbregs->dr[10] = 0;
 	dbregs->dr[11] = 0;
 	dbregs->dr[12] = 0;
 	dbregs->dr[13] = 0;
 	dbregs->dr[14] = 0;
 	dbregs->dr[15] = 0;
 	return (0);
 }
 
 int
 set_dbregs(struct thread *td, struct dbreg *dbregs)
 {
 	struct pcb *pcb;
 	int i;
 
 	if (td == NULL) {
 		load_dr0(dbregs->dr[0]);
 		load_dr1(dbregs->dr[1]);
 		load_dr2(dbregs->dr[2]);
 		load_dr3(dbregs->dr[3]);
 		load_dr6(dbregs->dr[6]);
 		load_dr7(dbregs->dr[7]);
 	} else {
 		/*
 		 * Don't let an illegal value for dr7 get set.  Specifically,
 		 * check for undefined settings.  Setting these bit patterns
 		 * result in undefined behaviour and can lead to an unexpected
 		 * TRCTRAP or a general protection fault right here.
 		 * Upper bits of dr6 and dr7 must not be set
 		 */
 		for (i = 0; i < 4; i++) {
 			if (DBREG_DR7_ACCESS(dbregs->dr[7], i) == 0x02)
 				return (EINVAL);
 			if (td->td_frame->tf_cs == _ucode32sel &&
 			    DBREG_DR7_LEN(dbregs->dr[7], i) == DBREG_DR7_LEN_8)
 				return (EINVAL);
 		}
 		if ((dbregs->dr[6] & 0xffffffff00000000ul) != 0 ||
 		    (dbregs->dr[7] & 0xffffffff00000000ul) != 0)
 			return (EINVAL);
 
 		pcb = td->td_pcb;
 
 		/*
 		 * Don't let a process set a breakpoint that is not within the
 		 * process's address space.  If a process could do this, it
 		 * could halt the system by setting a breakpoint in the kernel
 		 * (if ddb was enabled).  Thus, we need to check to make sure
 		 * that no breakpoints are being enabled for addresses outside
 		 * process's address space.
 		 *
 		 * XXX - what about when the watched area of the user's
 		 * address space is written into from within the kernel
 		 * ... wouldn't that still cause a breakpoint to be generated
 		 * from within kernel mode?
 		 */
 
 		if (DBREG_DR7_ENABLED(dbregs->dr[7], 0)) {
 			/* dr0 is enabled */
 			if (dbregs->dr[0] >= VM_MAXUSER_ADDRESS)
 				return (EINVAL);
 		}
 		if (DBREG_DR7_ENABLED(dbregs->dr[7], 1)) {
 			/* dr1 is enabled */
 			if (dbregs->dr[1] >= VM_MAXUSER_ADDRESS)
 				return (EINVAL);
 		}
 		if (DBREG_DR7_ENABLED(dbregs->dr[7], 2)) {
 			/* dr2 is enabled */
 			if (dbregs->dr[2] >= VM_MAXUSER_ADDRESS)
 				return (EINVAL);
 		}
 		if (DBREG_DR7_ENABLED(dbregs->dr[7], 3)) {
 			/* dr3 is enabled */
 			if (dbregs->dr[3] >= VM_MAXUSER_ADDRESS)
 				return (EINVAL);
 		}
 
 		pcb->pcb_dr0 = dbregs->dr[0];
 		pcb->pcb_dr1 = dbregs->dr[1];
 		pcb->pcb_dr2 = dbregs->dr[2];
 		pcb->pcb_dr3 = dbregs->dr[3];
 		pcb->pcb_dr6 = dbregs->dr[6];
 		pcb->pcb_dr7 = dbregs->dr[7];
 
 		set_pcb_flags(pcb, PCB_DBREGS);
 	}
 
 	return (0);
 }
 
 void
 reset_dbregs(void)
 {
 
 	load_dr7(0);	/* Turn off the control bits first */
 	load_dr0(0);
 	load_dr1(0);
 	load_dr2(0);
 	load_dr3(0);
 	load_dr6(0);
 }
 
 /*
  * Return > 0 if a hardware breakpoint has been hit, and the
  * breakpoint was in user space.  Return 0, otherwise.
  */
 int
 user_dbreg_trap(register_t dr6)
 {
         u_int64_t dr7;
         u_int64_t bp;       /* breakpoint bits extracted from dr6 */
         int nbp;            /* number of breakpoints that triggered */
         caddr_t addr[4];    /* breakpoint addresses */
         int i;
 
         bp = dr6 & DBREG_DR6_BMASK;
         if (bp == 0) {
                 /*
                  * None of the breakpoint bits are set meaning this
                  * trap was not caused by any of the debug registers
                  */
                 return 0;
         }
 
         dr7 = rdr7();
         if ((dr7 & 0x000000ff) == 0) {
                 /*
                  * all GE and LE bits in the dr7 register are zero,
                  * thus the trap couldn't have been caused by the
                  * hardware debug registers
                  */
                 return 0;
         }
 
         nbp = 0;
 
         /*
          * at least one of the breakpoints were hit, check to see
          * which ones and if any of them are user space addresses
          */
 
         if (bp & 0x01) {
                 addr[nbp++] = (caddr_t)rdr0();
         }
         if (bp & 0x02) {
                 addr[nbp++] = (caddr_t)rdr1();
         }
         if (bp & 0x04) {
                 addr[nbp++] = (caddr_t)rdr2();
         }
         if (bp & 0x08) {
                 addr[nbp++] = (caddr_t)rdr3();
         }
 
         for (i = 0; i < nbp; i++) {
                 if (addr[i] < (caddr_t)VM_MAXUSER_ADDRESS) {
                         /*
                          * addr[i] is in user space
                          */
                         return nbp;
                 }
         }
 
         /*
          * None of the breakpoints are in user space.
          */
         return 0;
 }
 
 /*
  * The pcb_flags is only modified by current thread, or by other threads
  * when current thread is stopped.  However, current thread may change it
  * from the interrupt context in cpu_switch(), or in the trap handler.
  * When we read-modify-write pcb_flags from C sources, compiler may generate
  * code that is not atomic regarding the interrupt handler.  If a trap or
  * interrupt happens and any flag is modified from the handler, it can be
  * clobbered with the cached value later.  Therefore, we implement setting
  * and clearing flags with single-instruction functions, which do not race
  * with possible modification of the flags from the trap or interrupt context,
  * because traps and interrupts are executed only on instruction boundary.
  */
 void
 set_pcb_flags_raw(struct pcb *pcb, const u_int flags)
 {
 
 	__asm __volatile("orl %1,%0"
 	    : "=m" (pcb->pcb_flags) : "ir" (flags), "m" (pcb->pcb_flags)
 	    : "cc", "memory");
 
 }
 
 /*
  * The support for RDFSBASE, WRFSBASE and similar instructions for %gs
  * base requires that kernel saves MSR_FSBASE and MSR_{K,}GSBASE into
  * pcb if user space modified the bases.  We must save on the context
  * switch or if the return to usermode happens through the doreti.
  *
  * Tracking of both events is performed by the pcb flag PCB_FULL_IRET,
  * which have a consequence that the base MSRs must be saved each time
  * the PCB_FULL_IRET flag is set.  We disable interrupts to sync with
  * context switches.
  */
 static void
 set_pcb_flags_fsgsbase(struct pcb *pcb, const u_int flags)
 {
 	register_t r;
 
 	if (curpcb == pcb &&
 	    (flags & PCB_FULL_IRET) != 0 &&
 	    (pcb->pcb_flags & PCB_FULL_IRET) == 0) {
 		r = intr_disable();
 		if ((pcb->pcb_flags & PCB_FULL_IRET) == 0) {
 			if (rfs() == _ufssel)
 				pcb->pcb_fsbase = rdfsbase();
 			if (rgs() == _ugssel)
 				pcb->pcb_gsbase = rdmsr(MSR_KGSBASE);
 		}
 		set_pcb_flags_raw(pcb, flags);
 		intr_restore(r);
 	} else {
 		set_pcb_flags_raw(pcb, flags);
 	}
 }
 
 DEFINE_IFUNC(, void, set_pcb_flags, (struct pcb *, const u_int))
 {
 
 	return ((cpu_stdext_feature & CPUID_STDEXT_FSGSBASE) != 0 ?
 	    set_pcb_flags_fsgsbase : set_pcb_flags_raw);
 }
 
 void
 clear_pcb_flags(struct pcb *pcb, const u_int flags)
 {
 
 	__asm __volatile("andl %1,%0"
 	    : "=m" (pcb->pcb_flags) : "ir" (~flags), "m" (pcb->pcb_flags)
 	    : "cc", "memory");
 }
 
 #ifdef KDB
 
 /*
  * Provide inb() and outb() as functions.  They are normally only available as
  * inline functions, thus cannot be called from the debugger.
  */
 
 /* silence compiler warnings */
 u_char inb_(u_short);
 void outb_(u_short, u_char);
 
 u_char
 inb_(u_short port)
 {
 	return inb(port);
 }
 
 void
 outb_(u_short port, u_char data)
 {
 	outb(port, data);
 }
 
 #endif /* KDB */
 
 #undef memset
 #undef memmove
 #undef memcpy
 
 void	*memset_std(void *buf, int c, size_t len);
 void	*memset_erms(void *buf, int c, size_t len);
 void    *memmove_std(void * _Nonnull dst, const void * _Nonnull src,
 	    size_t len);
 void    *memmove_erms(void * _Nonnull dst, const void * _Nonnull src,
 	    size_t len);
 void    *memcpy_std(void * _Nonnull dst, const void * _Nonnull src,
 	    size_t len);
 void    *memcpy_erms(void * _Nonnull dst, const void * _Nonnull src,
 	    size_t len);
 
 #ifdef KCSAN
 /*
  * These fail to build as ifuncs when used with KCSAN.
  */
 void *
 memset(void *buf, int c, size_t len)
 {
 
 	return (memset_std(buf, c, len));
 }
 
 void *
 memmove(void * _Nonnull dst, const void * _Nonnull src, size_t len)
 {
 
 	return (memmove_std(dst, src, len));
 }
 
 void *
 memcpy(void * _Nonnull dst, const void * _Nonnull src, size_t len)
 {
 
 	return (memcpy_std(dst, src, len));
 }
 #else
 DEFINE_IFUNC(, void *, memset, (void *, int, size_t))
 {
 
 	return ((cpu_stdext_feature & CPUID_STDEXT_ERMS) != 0 ?
 	    memset_erms : memset_std);
 }
 
 DEFINE_IFUNC(, void *, memmove, (void * _Nonnull, const void * _Nonnull,
     size_t))
 {
 
 	return ((cpu_stdext_feature & CPUID_STDEXT_ERMS) != 0 ?
 	    memmove_erms : memmove_std);
 }
 
 DEFINE_IFUNC(, void *, memcpy, (void * _Nonnull, const void * _Nonnull,size_t))
 {
 
 	return ((cpu_stdext_feature & CPUID_STDEXT_ERMS) != 0 ?
 	    memcpy_erms : memcpy_std);
 }
 #endif
 
 void	pagezero_std(void *addr);
 void	pagezero_erms(void *addr);
 DEFINE_IFUNC(, void , pagezero, (void *))
 {
 
 	return ((cpu_stdext_feature & CPUID_STDEXT_ERMS) != 0 ?
 	    pagezero_erms : pagezero_std);
 }
diff --git a/sys/amd64/ia32/ia32_reg.c b/sys/amd64/ia32/ia32_reg.c
index 0725936cf84d..ae041a325cba 100644
--- a/sys/amd64/ia32/ia32_reg.c
+++ b/sys/amd64/ia32/ia32_reg.c
@@ -1,270 +1,271 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2005 Peter Wemm
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/exec.h>
 #include <sys/fcntl.h>
 #include <sys/imgact.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/mman.h>
 #include <sys/namei.h>
 #include <sys/proc.h>
 #include <sys/procfs.h>
+#include <sys/reg.h>
 #include <sys/resourcevar.h>
 #include <sys/systm.h>
 #include <sys/signalvar.h>
 #include <sys/stat.h>
 #include <sys/sx.h>
 #include <sys/syscall.h>
 #include <sys/sysctl.h>
 #include <sys/sysent.h>
 #include <sys/vnode.h>
 
 #include <vm/vm.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_param.h>
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 #include <vm/vm_object.h>
 #include <vm/vm_extern.h>
 
 #include <compat/freebsd32/freebsd32_util.h>
 #include <compat/freebsd32/freebsd32_proto.h>
 #include <machine/fpu.h>
 #include <machine/psl.h>
 #include <machine/segments.h>
 #include <machine/specialreg.h>
 #include <machine/frame.h>
 #include <machine/md_var.h>
 #include <machine/pcb.h>
 #include <machine/cpufunc.h>
 
 #define	CS_SECURE(cs)		(ISPL(cs) == SEL_UPL)
 #define	EFL_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
 
 int
 fill_regs32(struct thread *td, struct reg32 *regs)
 {
 	struct trapframe *tp;
 
 	tp = td->td_frame;
 	if (tp->tf_flags & TF_HASSEGS) {
 		regs->r_gs = tp->tf_gs;
 		regs->r_fs = tp->tf_fs;
 		regs->r_es = tp->tf_es;
 		regs->r_ds = tp->tf_ds;
 	} else {
 		regs->r_gs = _ugssel;
 		regs->r_fs = _ufssel;
 		regs->r_es = _udatasel;
 		regs->r_ds = _udatasel;
 	}
 	regs->r_edi = tp->tf_rdi;
 	regs->r_esi = tp->tf_rsi;
 	regs->r_ebp = tp->tf_rbp;
 	regs->r_ebx = tp->tf_rbx;
 	regs->r_edx = tp->tf_rdx;
 	regs->r_ecx = tp->tf_rcx;
 	regs->r_eax = tp->tf_rax;
 	regs->r_eip = tp->tf_rip;
 	regs->r_cs = tp->tf_cs;
 	regs->r_eflags = tp->tf_rflags;
 	regs->r_esp = tp->tf_rsp;
 	regs->r_ss = tp->tf_ss;
 	regs->r_err = 0;
 	regs->r_trapno = 0;
 	return (0);
 }
 
 int
 set_regs32(struct thread *td, struct reg32 *regs)
 {
 	struct trapframe *tp;
 
 	tp = td->td_frame;
 	if (!EFL_SECURE(regs->r_eflags, tp->tf_rflags) || !CS_SECURE(regs->r_cs))
 		return (EINVAL);
 	tp->tf_gs = regs->r_gs;
 	tp->tf_fs = regs->r_fs;
 	tp->tf_es = regs->r_es;
 	tp->tf_ds = regs->r_ds;
 	set_pcb_flags(td->td_pcb, PCB_FULL_IRET);
 	tp->tf_flags = TF_HASSEGS;
 	tp->tf_rdi = regs->r_edi;
 	tp->tf_rsi = regs->r_esi;
 	tp->tf_rbp = regs->r_ebp;
 	tp->tf_rbx = regs->r_ebx;
 	tp->tf_rdx = regs->r_edx;
 	tp->tf_rcx = regs->r_ecx;
 	tp->tf_rax = regs->r_eax;
 	tp->tf_rip = regs->r_eip;
 	tp->tf_cs = regs->r_cs;
 	tp->tf_rflags = regs->r_eflags;
 	tp->tf_rsp = regs->r_esp;
 	tp->tf_ss = regs->r_ss;
 	return (0);
 }
 
 int
 fill_fpregs32(struct thread *td, struct fpreg32 *regs)
 {
 	struct savefpu *sv_fpu;
 	struct save87 *sv_87;
 	struct env87 *penv_87;
 	struct envxmm *penv_xmm;
 	struct fpacc87 *fx_reg;
 	int i, st;
 	uint64_t mantissa;
 	uint16_t tw, exp;
 	uint8_t ab_tw;
 
 	bzero(regs, sizeof(*regs));
 	sv_87 = (struct save87 *)regs;
 	penv_87 = &sv_87->sv_env;
 	fpugetregs(td);
 	sv_fpu = get_pcb_user_save_td(td);
 	penv_xmm = &sv_fpu->sv_env;
 
 	/* FPU control/status */
 	penv_87->en_cw = penv_xmm->en_cw;
 	penv_87->en_sw = penv_xmm->en_sw;
 
 	/*
 	 * XXX for en_fip/fcs/foo/fos, check if the fxsave format
 	 * uses the old-style layout for 32 bit user apps.  If so,
 	 * read the ip and operand segment registers from there.
 	 * For now, use the process's %cs/%ds.
 	 */
 	penv_87->en_fip = penv_xmm->en_rip;
 	penv_87->en_fcs = td->td_frame->tf_cs;
 	penv_87->en_opcode = penv_xmm->en_opcode;
 	penv_87->en_foo = penv_xmm->en_rdp;
 	/* Entry into the kernel always sets TF_HASSEGS */
 	penv_87->en_fos = td->td_frame->tf_ds;
 
 	/*
 	 * FPU registers and tags.
 	 * For ST(i), i = fpu_reg - top; we start with fpu_reg=7.
 	 */
 	st = 7 - ((penv_xmm->en_sw >> 11) & 7);
 	ab_tw = penv_xmm->en_tw;
 	tw = 0;
 	for (i = 0x80; i != 0; i >>= 1) {
 		sv_87->sv_ac[st] = sv_fpu->sv_fp[st].fp_acc;
 		tw <<= 2;
 		if ((ab_tw & i) != 0) {
 			/* Non-empty - we need to check ST(i) */
 			fx_reg = &sv_fpu->sv_fp[st].fp_acc;
 			/* The first 64 bits contain the mantissa. */
 			mantissa = *((uint64_t *)fx_reg->fp_bytes);
 			/*
 			 * The final 16 bits contain the sign bit and the exponent.
 			 * Mask the sign bit since it is of no consequence to these
 			 * tests.
 			 */
 			exp = *((uint16_t *)&fx_reg->fp_bytes[8]) & 0x7fff;
 			if (exp == 0) {
 				if (mantissa == 0)
 					tw |= 1; /* Zero */
 				else
 					tw |= 2; /* Denormal */
 			} else if (exp == 0x7fff)
 				tw |= 2; /* Infinity or NaN */
 		} else
 			tw |= 3; /* Empty */
 		st = (st - 1) & 7;
 	}
 	penv_87->en_tw = tw;
 
 	return (0);
 }
 
 int
 set_fpregs32(struct thread *td, struct fpreg32 *regs)
 {
 	struct save87 *sv_87 = (struct save87 *)regs;
 	struct env87 *penv_87 = &sv_87->sv_env;
 	struct savefpu *sv_fpu = get_pcb_user_save_td(td);
 	struct envxmm *penv_xmm = &sv_fpu->sv_env;
 	int i;
 
 	/* FPU control/status */
 	penv_xmm->en_cw = penv_87->en_cw;
 	penv_xmm->en_sw = penv_87->en_sw;
 	penv_xmm->en_rip = penv_87->en_fip;
 	/* penv_87->en_fcs and en_fos ignored, see above */
 	penv_xmm->en_opcode = penv_87->en_opcode;
 	penv_xmm->en_rdp = penv_87->en_foo;
 
 	/* FPU registers and tags */
 	penv_xmm->en_tw = 0;
 	for (i = 0; i < 8; ++i) {
 		sv_fpu->sv_fp[i].fp_acc = sv_87->sv_ac[i];
 		if ((penv_87->en_tw & (3 << i * 2)) != (3 << i * 2))
 			penv_xmm->en_tw |= 1 << i;
 	}
 
 	for (i = 8; i < 16; ++i)
 		bzero(&sv_fpu->sv_fp[i].fp_acc, sizeof(sv_fpu->sv_fp[i].fp_acc));
 	fpuuserinited(td);
 
 	return (0);
 }
 
 int
 fill_dbregs32(struct thread *td, struct dbreg32 *regs)
 {
 	struct dbreg dr;
 	int err, i;
 
 	err = fill_dbregs(td, &dr);
 	for (i = 0; i < 8; i++)
 		regs->dr[i] = dr.dr[i];
 	return (err);
 }
 
 int
 set_dbregs32(struct thread *td, struct dbreg32 *regs)
 {
 	struct dbreg dr;
 	int i;
 
 	for (i = 0; i < 8; i++)
 		dr.dr[i] = regs->dr[i];
 	for (i = 8; i < 16; i++)
 		dr.dr[i] = 0;
 	return (set_dbregs(td, &dr));
 }
diff --git a/sys/amd64/include/db_machdep.h b/sys/amd64/include/db_machdep.h
index c88c281ff9a6..102475f660a4 100644
--- a/sys/amd64/include/db_machdep.h
+++ b/sys/amd64/include/db_machdep.h
@@ -1,86 +1,85 @@
 /*-
  * Mach Operating System
  * Copyright (c) 1991,1990 Carnegie Mellon University
  * All Rights Reserved.
  *
  * Permission to use, copy, modify and distribute this software and its
  * documentation is hereby granted, provided that both the copyright
  * notice and this permission notice appear in all copies of the
  * software, derivative works or modified versions, and any portions
  * thereof, and that both notices appear in supporting documentation.
  *
  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  *
  * Carnegie Mellon requests users of this software to return to
  *
  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  *  School of Computer Science
  *  Carnegie Mellon University
  *  Pittsburgh PA 15213-3890
  *
  * any improvements or extensions that they make and grant Carnegie Mellon
  * the rights to redistribute these changes.
  *
  * $FreeBSD$
  */
 
 #ifndef _MACHINE_DB_MACHDEP_H_
 #define	_MACHINE_DB_MACHDEP_H_
 
 #include <machine/frame.h>
-#include <machine/reg.h>
 #include <machine/trap.h>
 
 typedef	vm_offset_t	db_addr_t;	/* address - unsigned */
 typedef	long		db_expr_t;	/* expression - signed */
 
 #define	PC_REGS()	((db_addr_t)kdb_thrctx->pcb_rip)
 
 #define	BKPT_INST	0xcc		/* breakpoint instruction */
 #define	BKPT_SIZE	(1)		/* size of breakpoint inst */
 #define	BKPT_SET(inst)	(BKPT_INST)
 
 #define BKPT_SKIP				\
 do {						\
 	kdb_frame->tf_rip += 1;			\
 	kdb_thrctx->pcb_rip += 1;		\
 } while(0)
 
 #define	FIXUP_PC_AFTER_BREAK			\
 do {						\
 	kdb_frame->tf_rip -= 1;			\
 	kdb_thrctx->pcb_rip -= 1;		\
 } while(0);
 
 #define	db_clear_single_step	kdb_cpu_clear_singlestep
 #define	db_set_single_step	kdb_cpu_set_singlestep
 
 /*
  * The debug exception type is copied from %dr6 to 'code' and used to
  * disambiguate single step traps.  Watchpoints have no special support.
  * Our hardware breakpoints are not well integrated with ddb and are too
  * different from watchpoints.  ddb treats them as unknown traps with
  * unknown addresses and doesn't turn them off while it is running.
  */
 #define	IS_BREAKPOINT_TRAP(type, code)	((type) == T_BPTFLT)
 #define	IS_SSTEP_TRAP(type, code)					\
 	((type) == T_TRCTRAP && (code) & DBREG_DR6_BS)
 #define	IS_WATCHPOINT_TRAP(type, code)	0
 
 #define	I_CALL		0xe8
 #define	I_CALLI		0xff
 #define	i_calli(ins)	(((ins)&0xff) == I_CALLI && ((ins)&0x3800) == 0x1000)
 #define	I_RET		0xc3
 #define	I_IRET		0xcf
 #define	i_rex(ins)	(((ins) & 0xff) == 0x41 || ((ins) & 0xff) == 0x43)
 
 #define	inst_trap_return(ins)	(((ins)&0xff) == I_IRET)
 #define	inst_return(ins)	(((ins)&0xff) == I_RET)
 #define	inst_call(ins)		(((ins)&0xff) == I_CALL || i_calli(ins) || \
 				 (i_calli((ins) >> 8) && i_rex(ins)))
 #define inst_load(ins)		0
 #define inst_store(ins)		0
 
 #endif /* !_MACHINE_DB_MACHDEP_H_ */
diff --git a/sys/amd64/linux/linux_ptrace.c b/sys/amd64/linux/linux_ptrace.c
index 9386ba1e6ae3..3afaded3a203 100644
--- a/sys/amd64/linux/linux_ptrace.c
+++ b/sys/amd64/linux/linux_ptrace.c
@@ -1,611 +1,610 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2017 Edward Tomasz Napierala <trasz@FreeBSD.org>
  *
  * This software was developed by SRI International and the University of
  * Cambridge Computer Laboratory under DARPA/AFRL contract (FA8750-10-C-0237)
  * ("CTSRD"), as part of the DARPA CRASH research programme.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/lock.h>
 #include <sys/proc.h>
 #include <sys/ptrace.h>
 #include <sys/sx.h>
 #include <sys/syscallsubr.h>
 
 #include <machine/pcb.h>
-#include <machine/reg.h>
 
 #include <amd64/linux/linux.h>
 #include <amd64/linux/linux_proto.h>
 #include <compat/linux/linux_emul.h>
 #include <compat/linux/linux_misc.h>
 #include <compat/linux/linux_signal.h>
 #include <compat/linux/linux_util.h>
 
 #define	LINUX_PTRACE_TRACEME		0
 #define	LINUX_PTRACE_PEEKTEXT		1
 #define	LINUX_PTRACE_PEEKDATA		2
 #define	LINUX_PTRACE_PEEKUSER		3
 #define	LINUX_PTRACE_POKETEXT		4
 #define	LINUX_PTRACE_POKEDATA		5
 #define	LINUX_PTRACE_POKEUSER		6
 #define	LINUX_PTRACE_CONT		7
 #define	LINUX_PTRACE_KILL		8
 #define	LINUX_PTRACE_SINGLESTEP		9
 #define	LINUX_PTRACE_GETREGS		12
 #define	LINUX_PTRACE_SETREGS		13
 #define	LINUX_PTRACE_GETFPREGS		14
 #define	LINUX_PTRACE_SETFPREGS		15
 #define	LINUX_PTRACE_ATTACH		16
 #define	LINUX_PTRACE_DETACH		17
 #define	LINUX_PTRACE_SYSCALL		24
 #define	LINUX_PTRACE_SETOPTIONS		0x4200
 #define	LINUX_PTRACE_GETSIGINFO		0x4202
 #define	LINUX_PTRACE_GETREGSET		0x4204
 #define	LINUX_PTRACE_SEIZE		0x4206
 #define	LINUX_PTRACE_GET_SYSCALL_INFO	0x420e
 
 #define	LINUX_PTRACE_EVENT_EXIT		6
 
 #define	LINUX_PTRACE_O_TRACESYSGOOD	1
 #define	LINUX_PTRACE_O_TRACEFORK	2
 #define	LINUX_PTRACE_O_TRACEVFORK	4
 #define	LINUX_PTRACE_O_TRACECLONE	8
 #define	LINUX_PTRACE_O_TRACEEXEC	16
 #define	LINUX_PTRACE_O_TRACEVFORKDONE	32
 #define	LINUX_PTRACE_O_TRACEEXIT	64
 #define	LINUX_PTRACE_O_TRACESECCOMP	128
 #define	LINUX_PTRACE_O_EXITKILL		1048576
 #define	LINUX_PTRACE_O_SUSPEND_SECCOMP	2097152
 
 #define	LINUX_NT_PRSTATUS		1
 
 #define	LINUX_PTRACE_O_MASK	(LINUX_PTRACE_O_TRACESYSGOOD |	\
     LINUX_PTRACE_O_TRACEFORK | LINUX_PTRACE_O_TRACEVFORK |	\
     LINUX_PTRACE_O_TRACECLONE | LINUX_PTRACE_O_TRACEEXEC |	\
     LINUX_PTRACE_O_TRACEVFORKDONE | LINUX_PTRACE_O_TRACEEXIT |	\
     LINUX_PTRACE_O_TRACESECCOMP | LINUX_PTRACE_O_EXITKILL |	\
     LINUX_PTRACE_O_SUSPEND_SECCOMP)
 
 static int
 map_signum(int lsig, int *bsigp)
 {
 	int bsig;
 
 	if (lsig == 0) {
 		*bsigp = 0;
 		return (0);
 	}
 
 	if (lsig < 0 || lsig > LINUX_SIGRTMAX)
 		return (EINVAL);
 
 	bsig = linux_to_bsd_signal(lsig);
 	if (bsig == SIGSTOP)
 		bsig = 0;
 
 	*bsigp = bsig;
 	return (0);
 }
 
 int
 linux_ptrace_status(struct thread *td, pid_t pid, int status)
 {
 	struct ptrace_lwpinfo lwpinfo;
 	struct linux_pemuldata *pem;
 	register_t saved_retval;
 	int error;
 
 	saved_retval = td->td_retval[0];
 	error = kern_ptrace(td, PT_LWPINFO, pid, &lwpinfo, sizeof(lwpinfo));
 	td->td_retval[0] = saved_retval;
 	if (error != 0) {
 		linux_msg(td, "PT_LWPINFO failed with error %d", error);
 		return (status);
 	}
 
 	pem = pem_find(td->td_proc);
 	KASSERT(pem != NULL, ("%s: proc emuldata not found.\n", __func__));
 
 	LINUX_PEM_SLOCK(pem);
 	if ((pem->ptrace_flags & LINUX_PTRACE_O_TRACESYSGOOD) &&
 	    lwpinfo.pl_flags & PL_FLAG_SCE)
 		status |= (LINUX_SIGTRAP | 0x80) << 8;
 	if ((pem->ptrace_flags & LINUX_PTRACE_O_TRACESYSGOOD) &&
 	    lwpinfo.pl_flags & PL_FLAG_SCX)
 		status |= (LINUX_SIGTRAP | 0x80) << 8;
 	if ((pem->ptrace_flags & LINUX_PTRACE_O_TRACEEXIT) &&
 	    lwpinfo.pl_flags & PL_FLAG_EXITED)
 		status |= (LINUX_SIGTRAP | LINUX_PTRACE_EVENT_EXIT << 8) << 8;
 	LINUX_PEM_SUNLOCK(pem);
 
 	return (status);
 }
 
 struct linux_pt_reg {
 	l_ulong	r15;
 	l_ulong	r14;
 	l_ulong	r13;
 	l_ulong	r12;
 	l_ulong	rbp;
 	l_ulong	rbx;
 	l_ulong	r11;
 	l_ulong	r10;
 	l_ulong	r9;
 	l_ulong	r8;
 	l_ulong	rax;
 	l_ulong	rcx;
 	l_ulong	rdx;
 	l_ulong	rsi;
 	l_ulong	rdi;
 	l_ulong	orig_rax;
 	l_ulong	rip;
 	l_ulong	cs;
 	l_ulong	eflags;
 	l_ulong	rsp;
 	l_ulong	ss;
 };
 
 /*
  * Translate amd64 ptrace registers between Linux and FreeBSD formats.
  * The translation is pretty straighforward, for all registers but
  * orig_rax on Linux side and r_trapno and r_err in FreeBSD.
  */
 static void
 map_regs_to_linux(struct reg *b_reg, struct linux_pt_reg *l_reg)
 {
 
 	l_reg->r15 = b_reg->r_r15;
 	l_reg->r14 = b_reg->r_r14;
 	l_reg->r13 = b_reg->r_r13;
 	l_reg->r12 = b_reg->r_r12;
 	l_reg->rbp = b_reg->r_rbp;
 	l_reg->rbx = b_reg->r_rbx;
 	l_reg->r11 = b_reg->r_r11;
 	l_reg->r10 = b_reg->r_r10;
 	l_reg->r9 = b_reg->r_r9;
 	l_reg->r8 = b_reg->r_r8;
 	l_reg->rax = b_reg->r_rax;
 	l_reg->rcx = b_reg->r_rcx;
 	l_reg->rdx = b_reg->r_rdx;
 	l_reg->rsi = b_reg->r_rsi;
 	l_reg->rdi = b_reg->r_rdi;
 	l_reg->orig_rax = b_reg->r_rax;
 	l_reg->rip = b_reg->r_rip;
 	l_reg->cs = b_reg->r_cs;
 	l_reg->eflags = b_reg->r_rflags;
 	l_reg->rsp = b_reg->r_rsp;
 	l_reg->ss = b_reg->r_ss;
 }
 
 static void
 map_regs_from_linux(struct reg *b_reg, struct linux_pt_reg *l_reg)
 {
 	b_reg->r_r15 = l_reg->r15;
 	b_reg->r_r14 = l_reg->r14;
 	b_reg->r_r13 = l_reg->r13;
 	b_reg->r_r12 = l_reg->r12;
 	b_reg->r_r11 = l_reg->r11;
 	b_reg->r_r10 = l_reg->r10;
 	b_reg->r_r9 = l_reg->r9;
 	b_reg->r_r8 = l_reg->r8;
 	b_reg->r_rdi = l_reg->rdi;
 	b_reg->r_rsi = l_reg->rsi;
 	b_reg->r_rbp = l_reg->rbp;
 	b_reg->r_rbx = l_reg->rbx;
 	b_reg->r_rdx = l_reg->rdx;
 	b_reg->r_rcx = l_reg->rcx;
 	b_reg->r_rax = l_reg->rax;
 
 	/*
 	 * XXX: Are zeroes the right thing to put here?
 	 */
 	b_reg->r_trapno = 0;
 	b_reg->r_fs = 0;
 	b_reg->r_gs = 0;
 	b_reg->r_err = 0;
 	b_reg->r_es = 0;
 	b_reg->r_ds = 0;
 
 	b_reg->r_rip = l_reg->rip;
 	b_reg->r_cs = l_reg->cs;
 	b_reg->r_rflags = l_reg->eflags;
 	b_reg->r_rsp = l_reg->rsp;
 	b_reg->r_ss = l_reg->ss;
 }
 
 static int
 linux_ptrace_peek(struct thread *td, pid_t pid, void *addr, void *data)
 {
 	int error;
 
 	error = kern_ptrace(td, PT_READ_I, pid, addr, 0);
 	if (error == 0)
 		error = copyout(td->td_retval, data, sizeof(l_int));
 	else if (error == ENOMEM)
 		error = EIO;
 	td->td_retval[0] = error;
 
 	return (error);
 }
 
 static int
 linux_ptrace_peekuser(struct thread *td, pid_t pid, void *addr, void *data)
 {
 
 	linux_msg(td, "PTRACE_PEEKUSER not implemented; returning EINVAL");
 	return (EINVAL);
 }
 
 static int
 linux_ptrace_pokeuser(struct thread *td, pid_t pid, void *addr, void *data)
 {
 
 	linux_msg(td, "PTRACE_POKEUSER not implemented; returning EINVAL");
 	return (EINVAL);
 }
 
 static int
 linux_ptrace_setoptions(struct thread *td, pid_t pid, l_ulong data)
 {
 	struct linux_pemuldata *pem;
 	int mask;
 
 	mask = 0;
 
 	if (data & ~LINUX_PTRACE_O_MASK) {
 		linux_msg(td, "unknown ptrace option %lx set; "
 		    "returning EINVAL",
 		    data & ~LINUX_PTRACE_O_MASK);
 		return (EINVAL);
 	}
 
 	pem = pem_find(td->td_proc);
 	KASSERT(pem != NULL, ("%s: proc emuldata not found.\n", __func__));
 
 	/*
 	 * PTRACE_O_EXITKILL is ignored, we do that by default.
 	 */
 
 	LINUX_PEM_XLOCK(pem);
 	if (data & LINUX_PTRACE_O_TRACESYSGOOD) {
 		pem->ptrace_flags |= LINUX_PTRACE_O_TRACESYSGOOD;
 	} else {
 		pem->ptrace_flags &= ~LINUX_PTRACE_O_TRACESYSGOOD;
 	}
 	LINUX_PEM_XUNLOCK(pem);
 
 	if (data & LINUX_PTRACE_O_TRACEFORK)
 		mask |= PTRACE_FORK;
 
 	if (data & LINUX_PTRACE_O_TRACEVFORK)
 		mask |= PTRACE_VFORK;
 
 	if (data & LINUX_PTRACE_O_TRACECLONE)
 		mask |= PTRACE_VFORK;
 
 	if (data & LINUX_PTRACE_O_TRACEEXEC)
 		mask |= PTRACE_EXEC;
 
 	if (data & LINUX_PTRACE_O_TRACEVFORKDONE)
 		mask |= PTRACE_VFORK; /* XXX: Close enough? */
 
 	if (data & LINUX_PTRACE_O_TRACEEXIT) {
 		pem->ptrace_flags |= LINUX_PTRACE_O_TRACEEXIT;
 	} else {
 		pem->ptrace_flags &= ~LINUX_PTRACE_O_TRACEEXIT;
 	}
 
 	return (kern_ptrace(td, PT_SET_EVENT_MASK, pid, &mask, sizeof(mask)));
 }
 
 static int
 linux_ptrace_getsiginfo(struct thread *td, pid_t pid, l_ulong data)
 {
 	struct ptrace_lwpinfo lwpinfo;
 	l_siginfo_t l_siginfo;
 	int error, sig;
 
 	error = kern_ptrace(td, PT_LWPINFO, pid, &lwpinfo, sizeof(lwpinfo));
 	if (error != 0) {
 		linux_msg(td, "PT_LWPINFO failed with error %d", error);
 		return (error);
 	}
 
 	if ((lwpinfo.pl_flags & PL_FLAG_SI) == 0) {
 		error = EINVAL;
 		linux_msg(td, "no PL_FLAG_SI, returning %d", error);
 		return (error);
 	}
 
 	sig = bsd_to_linux_signal(lwpinfo.pl_siginfo.si_signo);
 	memset(&l_siginfo, 0, sizeof(l_siginfo));
 	siginfo_to_lsiginfo(&lwpinfo.pl_siginfo, &l_siginfo, sig);
 	error = copyout(&l_siginfo, (void *)data, sizeof(l_siginfo));
 	return (error);
 }
 
 static int
 linux_ptrace_getregs(struct thread *td, pid_t pid, void *data)
 {
 	struct ptrace_lwpinfo lwpinfo;
 	struct reg b_reg;
 	struct linux_pt_reg l_reg;
 	int error;
 
 	error = kern_ptrace(td, PT_GETREGS, pid, &b_reg, 0);
 	if (error != 0)
 		return (error);
 
 	map_regs_to_linux(&b_reg, &l_reg);
 
 	error = kern_ptrace(td, PT_LWPINFO, pid, &lwpinfo, sizeof(lwpinfo));
 	if (error != 0) {
 		linux_msg(td, "PT_LWPINFO failed with error %d", error);
 		return (error);
 	}
 	if (lwpinfo.pl_flags & PL_FLAG_SCE) {
 		/*
 		 * The strace(1) utility depends on RAX being set to -ENOSYS
 		 * on syscall entry; otherwise it loops printing those:
 		 *
 		 * [ Process PID=928 runs in 64 bit mode. ]
 		 * [ Process PID=928 runs in x32 mode. ]
 		 */
 		l_reg.rax = -38; /* -ENOSYS */
 
 		/*
 		 * Undo the mangling done in exception.S:fast_syscall_common().
 		 */
 		l_reg.r10 = l_reg.rcx;
 	}
 
 	error = copyout(&l_reg, (void *)data, sizeof(l_reg));
 	return (error);
 }
 
 static int
 linux_ptrace_setregs(struct thread *td, pid_t pid, void *data)
 {
 	struct reg b_reg;
 	struct linux_pt_reg l_reg;
 	int error;
 
 	error = copyin(data, &l_reg, sizeof(l_reg));
 	if (error != 0)
 		return (error);
 	map_regs_from_linux(&b_reg, &l_reg);
 	error = kern_ptrace(td, PT_SETREGS, pid, &b_reg, 0);
 	return (error);
 }
 
 static int
 linux_ptrace_getregset_prstatus(struct thread *td, pid_t pid, l_ulong data)
 {
 	struct ptrace_lwpinfo lwpinfo;
 	struct reg b_reg;
 	struct linux_pt_regset l_regset;
 	struct iovec iov;
 	struct pcb *pcb;
 	size_t len;
 	int error;
 
 	error = copyin((const void *)data, &iov, sizeof(iov));
 	if (error != 0) {
 		linux_msg(td, "copyin error %d", error);
 		return (error);
 	}
 
 	error = kern_ptrace(td, PT_GETREGS, pid, &b_reg, 0);
 	if (error != 0)
 		return (error);
 
 	pcb = td->td_pcb;
 	if (td == curthread)
 		update_pcb_bases(pcb);
 
 	bsd_to_linux_regset(&b_reg, &l_regset);
 	l_regset.fs_base = pcb->pcb_fsbase;
 	l_regset.gs_base = pcb->pcb_gsbase;
 
 	error = kern_ptrace(td, PT_LWPINFO, pid, &lwpinfo, sizeof(lwpinfo));
 	if (error != 0) {
 		linux_msg(td, "PT_LWPINFO failed with error %d", error);
 		return (error);
 	}
 	if (lwpinfo.pl_flags & PL_FLAG_SCE) {
 		/*
 		 * Undo the mangling done in exception.S:fast_syscall_common().
 		 */
 		l_regset.r10 = l_regset.rcx;
 	}
 
 	if (lwpinfo.pl_flags & (PL_FLAG_SCE | PL_FLAG_SCX)) {
 		/*
 		 * In Linux, the syscall number - passed to the syscall
 		 * as rax - is preserved in orig_rax; rax gets overwritten
 		 * with syscall return value.
 		 */
 		l_regset.orig_rax = lwpinfo.pl_syscall_code;
 	}
 
 	len = MIN(iov.iov_len, sizeof(l_regset));
 	error = copyout(&l_regset, (void *)iov.iov_base, len);
 	if (error != 0) {
 		linux_msg(td, "copyout error %d", error);
 		return (error);
 	}
 
 	iov.iov_len -= len;
 	error = copyout(&iov, (void *)data, sizeof(iov));
 	if (error != 0) {
 		linux_msg(td, "iov copyout error %d", error);
 		return (error);
 	}
 
 	return (error);
 }
 
 static int
 linux_ptrace_getregset(struct thread *td, pid_t pid, l_ulong addr, l_ulong data)
 {
 
 	switch (addr) {
 	case LINUX_NT_PRSTATUS:
 		return (linux_ptrace_getregset_prstatus(td, pid, data));
 	default:
 		linux_msg(td, "PTRACE_GETREGSET request %ld not implemented; "
 		    "returning EINVAL", addr);
 		return (EINVAL);
 	}
 }
 
 static int
 linux_ptrace_seize(struct thread *td, pid_t pid, l_ulong addr, l_ulong data)
 {
 
 	linux_msg(td, "PTRACE_SEIZE not implemented; returning EINVAL");
 	return (EINVAL);
 }
 
 static int
 linux_ptrace_get_syscall_info(struct thread *td, pid_t pid, l_ulong addr, l_ulong data)
 {
 
 	linux_msg(td, "PTRACE_GET_SYSCALL_INFO not implemented; returning EINVAL");
 	return (EINVAL);
 }
 
 int
 linux_ptrace(struct thread *td, struct linux_ptrace_args *uap)
 {
 	void *addr;
 	pid_t pid;
 	int error, sig;
 
 	pid  = (pid_t)uap->pid;
 	addr = (void *)uap->addr;
 
 	switch (uap->req) {
 	case LINUX_PTRACE_TRACEME:
 		error = kern_ptrace(td, PT_TRACE_ME, 0, 0, 0);
 		break;
 	case LINUX_PTRACE_PEEKTEXT:
 	case LINUX_PTRACE_PEEKDATA:
 		error = linux_ptrace_peek(td, pid, addr, (void *)uap->data);
 		if (error != 0)
 			goto out;
 		/*
 		 * Linux expects this syscall to read 64 bits, not 32.
 		 */
 		error = linux_ptrace_peek(td, pid,
 		    (void *)(uap->addr + 4), (void *)(uap->data + 4));
 		break;
 	case LINUX_PTRACE_PEEKUSER:
 		error = linux_ptrace_peekuser(td, pid, addr, (void *)uap->data);
 		break;
 	case LINUX_PTRACE_POKETEXT:
 	case LINUX_PTRACE_POKEDATA:
 		error = kern_ptrace(td, PT_WRITE_D, pid, addr, uap->data);
 		if (error != 0)
 			goto out;
 		/*
 		 * Linux expects this syscall to write 64 bits, not 32.
 		 */
 		error = kern_ptrace(td, PT_WRITE_D, pid,
 		    (void *)(uap->addr + 4), uap->data >> 32);
 		break;
 	case LINUX_PTRACE_POKEUSER:
 		error = linux_ptrace_pokeuser(td, pid, addr, (void *)uap->data);
 		break;
 	case LINUX_PTRACE_CONT:
 		error = map_signum(uap->data, &sig);
 		if (error != 0)
 			break;
 		error = kern_ptrace(td, PT_CONTINUE, pid, (void *)1, sig);
 		break;
 	case LINUX_PTRACE_KILL:
 		error = kern_ptrace(td, PT_KILL, pid, addr, uap->data);
 		break;
 	case LINUX_PTRACE_SINGLESTEP:
 		error = map_signum(uap->data, &sig);
 		if (error != 0)
 			break;
 		error = kern_ptrace(td, PT_STEP, pid, (void *)1, sig);
 		break;
 	case LINUX_PTRACE_GETREGS:
 		error = linux_ptrace_getregs(td, pid, (void *)uap->data);
 		break;
 	case LINUX_PTRACE_SETREGS:
 		error = linux_ptrace_setregs(td, pid, (void *)uap->data);
 		break;
 	case LINUX_PTRACE_ATTACH:
 		error = kern_ptrace(td, PT_ATTACH, pid, addr, uap->data);
 		break;
 	case LINUX_PTRACE_DETACH:
 		error = map_signum(uap->data, &sig);
 		if (error != 0)
 			break;
 		error = kern_ptrace(td, PT_DETACH, pid, (void *)1, sig);
 		break;
 	case LINUX_PTRACE_SYSCALL:
 		error = map_signum(uap->data, &sig);
 		if (error != 0)
 			break;
 		error = kern_ptrace(td, PT_SYSCALL, pid, (void *)1, sig);
 		break;
 	case LINUX_PTRACE_SETOPTIONS:
 		error = linux_ptrace_setoptions(td, pid, uap->data);
 		break;
 	case LINUX_PTRACE_GETSIGINFO:
 		error = linux_ptrace_getsiginfo(td, pid, uap->data);
 		break;
 	case LINUX_PTRACE_GETREGSET:
 		error = linux_ptrace_getregset(td, pid, uap->addr, uap->data);
 		break;
 	case LINUX_PTRACE_SEIZE:
 		error = linux_ptrace_seize(td, pid, uap->addr, uap->data);
 		break;
 	case LINUX_PTRACE_GET_SYSCALL_INFO:
 		error = linux_ptrace_get_syscall_info(td, pid, uap->addr, uap->data);
 		break;
 	default:
 		linux_msg(td, "ptrace(%ld, ...) not implemented; "
 		    "returning EINVAL", uap->req);
 		error = EINVAL;
 		break;
 	}
 
 out:
 	if (error == EBUSY)
 		error = ESRCH;
 
 	return (error);
 }
diff --git a/sys/amd64/linux32/linux32_machdep.c b/sys/amd64/linux32/linux32_machdep.c
index 3a32d1eadb37..5801fac7f126 100644
--- a/sys/amd64/linux32/linux32_machdep.c
+++ b/sys/amd64/linux32/linux32_machdep.c
@@ -1,746 +1,746 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 2004 Tim J. Robbins
  * Copyright (c) 2002 Doug Rabson
  * Copyright (c) 2000 Marcel Moolenaar
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer
  *    in this position and unchanged.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. The name of the author may not be used to endorse or promote products
  *    derived from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_compat.h"
 
 #include <sys/param.h>
 #include <sys/capsicum.h>
 #include <sys/clock.h>
 #include <sys/fcntl.h>
 #include <sys/file.h>
 #include <sys/imgact.h>
 #include <sys/kernel.h>
 #include <sys/limits.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mman.h>
 #include <sys/mutex.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
+#include <sys/reg.h>
 #include <sys/resource.h>
 #include <sys/resourcevar.h>
 #include <sys/syscallsubr.h>
 #include <sys/sysproto.h>
 #include <sys/systm.h>
 #include <sys/unistd.h>
 #include <sys/wait.h>
 
 #include <machine/frame.h>
 #include <machine/md_var.h>
 #include <machine/pcb.h>
 #include <machine/psl.h>
-#include <machine/reg.h>
 #include <machine/segments.h>
 #include <machine/specialreg.h>
 #include <x86/ifunc.h>
 
 #include <vm/pmap.h>
 #include <vm/vm.h>
 #include <vm/vm_map.h>
 
 #include <security/audit/audit.h>
 
 #include <compat/freebsd32/freebsd32_util.h>
 #include <amd64/linux32/linux.h>
 #include <amd64/linux32/linux32_proto.h>
 #include <compat/linux/linux_emul.h>
 #include <compat/linux/linux_fork.h>
 #include <compat/linux/linux_ipc.h>
 #include <compat/linux/linux_misc.h>
 #include <compat/linux/linux_mmap.h>
 #include <compat/linux/linux_signal.h>
 #include <compat/linux/linux_util.h>
 
 static void	bsd_to_linux_rusage(struct rusage *ru, struct l_rusage *lru);
 
 struct l_old_select_argv {
 	l_int		nfds;
 	l_uintptr_t	readfds;
 	l_uintptr_t	writefds;
 	l_uintptr_t	exceptfds;
 	l_uintptr_t	timeout;
 } __packed;
 
 static void
 bsd_to_linux_rusage(struct rusage *ru, struct l_rusage *lru)
 {
 
 	lru->ru_utime.tv_sec = ru->ru_utime.tv_sec;
 	lru->ru_utime.tv_usec = ru->ru_utime.tv_usec;
 	lru->ru_stime.tv_sec = ru->ru_stime.tv_sec;
 	lru->ru_stime.tv_usec = ru->ru_stime.tv_usec;
 	lru->ru_maxrss = ru->ru_maxrss;
 	lru->ru_ixrss = ru->ru_ixrss;
 	lru->ru_idrss = ru->ru_idrss;
 	lru->ru_isrss = ru->ru_isrss;
 	lru->ru_minflt = ru->ru_minflt;
 	lru->ru_majflt = ru->ru_majflt;
 	lru->ru_nswap = ru->ru_nswap;
 	lru->ru_inblock = ru->ru_inblock;
 	lru->ru_oublock = ru->ru_oublock;
 	lru->ru_msgsnd = ru->ru_msgsnd;
 	lru->ru_msgrcv = ru->ru_msgrcv;
 	lru->ru_nsignals = ru->ru_nsignals;
 	lru->ru_nvcsw = ru->ru_nvcsw;
 	lru->ru_nivcsw = ru->ru_nivcsw;
 }
 
 int
 linux_copyout_rusage(struct rusage *ru, void *uaddr)
 {
 	struct l_rusage lru;
 
 	bsd_to_linux_rusage(ru, &lru);
 
 	return (copyout(&lru, uaddr, sizeof(struct l_rusage)));
 }
 
 int
 linux_execve(struct thread *td, struct linux_execve_args *args)
 {
 	struct image_args eargs;
 	char *path;
 	int error;
 
 	LCONVPATHEXIST(td, args->path, &path);
 
 	error = freebsd32_exec_copyin_args(&eargs, path, UIO_SYSSPACE,
 	    args->argp, args->envp);
 	free(path, M_TEMP);
 	if (error == 0)
 		error = linux_common_execve(td, &eargs);
 	AUDIT_SYSCALL_EXIT(error == EJUSTRETURN ? 0 : error, td);
 	return (error);
 }
 
 CTASSERT(sizeof(struct l_iovec32) == 8);
 
 int
 linux32_copyinuio(struct l_iovec32 *iovp, l_ulong iovcnt, struct uio **uiop)
 {
 	struct l_iovec32 iov32;
 	struct iovec *iov;
 	struct uio *uio;
 	uint32_t iovlen;
 	int error, i;
 
 	*uiop = NULL;
 	if (iovcnt > UIO_MAXIOV)
 		return (EINVAL);
 	iovlen = iovcnt * sizeof(struct iovec);
 	uio = malloc(iovlen + sizeof(*uio), M_IOV, M_WAITOK);
 	iov = (struct iovec *)(uio + 1);
 	for (i = 0; i < iovcnt; i++) {
 		error = copyin(&iovp[i], &iov32, sizeof(struct l_iovec32));
 		if (error) {
 			free(uio, M_IOV);
 			return (error);
 		}
 		iov[i].iov_base = PTRIN(iov32.iov_base);
 		iov[i].iov_len = iov32.iov_len;
 	}
 	uio->uio_iov = iov;
 	uio->uio_iovcnt = iovcnt;
 	uio->uio_segflg = UIO_USERSPACE;
 	uio->uio_offset = -1;
 	uio->uio_resid = 0;
 	for (i = 0; i < iovcnt; i++) {
 		if (iov->iov_len > INT_MAX - uio->uio_resid) {
 			free(uio, M_IOV);
 			return (EINVAL);
 		}
 		uio->uio_resid += iov->iov_len;
 		iov++;
 	}
 	*uiop = uio;
 	return (0);
 }
 
 int
 linux32_copyiniov(struct l_iovec32 *iovp32, l_ulong iovcnt, struct iovec **iovp,
     int error)
 {
 	struct l_iovec32 iov32;
 	struct iovec *iov;
 	uint32_t iovlen;
 	int i;
 
 	*iovp = NULL;
 	if (iovcnt > UIO_MAXIOV)
 		return (error);
 	iovlen = iovcnt * sizeof(struct iovec);
 	iov = malloc(iovlen, M_IOV, M_WAITOK);
 	for (i = 0; i < iovcnt; i++) {
 		error = copyin(&iovp32[i], &iov32, sizeof(struct l_iovec32));
 		if (error) {
 			free(iov, M_IOV);
 			return (error);
 		}
 		iov[i].iov_base = PTRIN(iov32.iov_base);
 		iov[i].iov_len = iov32.iov_len;
 	}
 	*iovp = iov;
 	return(0);
 
 }
 
 int
 linux_readv(struct thread *td, struct linux_readv_args *uap)
 {
 	struct uio *auio;
 	int error;
 
 	error = linux32_copyinuio(uap->iovp, uap->iovcnt, &auio);
 	if (error)
 		return (error);
 	error = kern_readv(td, uap->fd, auio);
 	free(auio, M_IOV);
 	return (error);
 }
 
 int
 linux_writev(struct thread *td, struct linux_writev_args *uap)
 {
 	struct uio *auio;
 	int error;
 
 	error = linux32_copyinuio(uap->iovp, uap->iovcnt, &auio);
 	if (error)
 		return (error);
 	error = kern_writev(td, uap->fd, auio);
 	free(auio, M_IOV);
 	return (error);
 }
 
 struct l_ipc_kludge {
 	l_uintptr_t msgp;
 	l_long msgtyp;
 } __packed;
 
 int
 linux_ipc(struct thread *td, struct linux_ipc_args *args)
 {
 
 	switch (args->what & 0xFFFF) {
 	case LINUX_SEMOP: {
 		struct linux_semop_args a;
 
 		a.semid = args->arg1;
 		a.tsops = PTRIN(args->ptr);
 		a.nsops = args->arg2;
 		return (linux_semop(td, &a));
 	}
 	case LINUX_SEMGET: {
 		struct linux_semget_args a;
 
 		a.key = args->arg1;
 		a.nsems = args->arg2;
 		a.semflg = args->arg3;
 		return (linux_semget(td, &a));
 	}
 	case LINUX_SEMCTL: {
 		struct linux_semctl_args a;
 		int error;
 
 		a.semid = args->arg1;
 		a.semnum = args->arg2;
 		a.cmd = args->arg3;
 		error = copyin(PTRIN(args->ptr), &a.arg, sizeof(a.arg));
 		if (error)
 			return (error);
 		return (linux_semctl(td, &a));
 	}
 	case LINUX_MSGSND: {
 		struct linux_msgsnd_args a;
 
 		a.msqid = args->arg1;
 		a.msgp = PTRIN(args->ptr);
 		a.msgsz = args->arg2;
 		a.msgflg = args->arg3;
 		return (linux_msgsnd(td, &a));
 	}
 	case LINUX_MSGRCV: {
 		struct linux_msgrcv_args a;
 
 		a.msqid = args->arg1;
 		a.msgsz = args->arg2;
 		a.msgflg = args->arg3;
 		if ((args->what >> 16) == 0) {
 			struct l_ipc_kludge tmp;
 			int error;
 
 			if (args->ptr == 0)
 				return (EINVAL);
 			error = copyin(PTRIN(args->ptr), &tmp, sizeof(tmp));
 			if (error)
 				return (error);
 			a.msgp = PTRIN(tmp.msgp);
 			a.msgtyp = tmp.msgtyp;
 		} else {
 			a.msgp = PTRIN(args->ptr);
 			a.msgtyp = args->arg5;
 		}
 		return (linux_msgrcv(td, &a));
 	}
 	case LINUX_MSGGET: {
 		struct linux_msgget_args a;
 
 		a.key = args->arg1;
 		a.msgflg = args->arg2;
 		return (linux_msgget(td, &a));
 	}
 	case LINUX_MSGCTL: {
 		struct linux_msgctl_args a;
 
 		a.msqid = args->arg1;
 		a.cmd = args->arg2;
 		a.buf = PTRIN(args->ptr);
 		return (linux_msgctl(td, &a));
 	}
 	case LINUX_SHMAT: {
 		struct linux_shmat_args a;
 		l_uintptr_t addr;
 		int error;
 
 		a.shmid = args->arg1;
 		a.shmaddr = PTRIN(args->ptr);
 		a.shmflg = args->arg2;
 		error = linux_shmat(td, &a);
 		if (error != 0)
 			return (error);
 		addr = td->td_retval[0];
 		error = copyout(&addr, PTRIN(args->arg3), sizeof(addr));
 		td->td_retval[0] = 0;
 		return (error);
 	}
 	case LINUX_SHMDT: {
 		struct linux_shmdt_args a;
 
 		a.shmaddr = PTRIN(args->ptr);
 		return (linux_shmdt(td, &a));
 	}
 	case LINUX_SHMGET: {
 		struct linux_shmget_args a;
 
 		a.key = args->arg1;
 		a.size = args->arg2;
 		a.shmflg = args->arg3;
 		return (linux_shmget(td, &a));
 	}
 	case LINUX_SHMCTL: {
 		struct linux_shmctl_args a;
 
 		a.shmid = args->arg1;
 		a.cmd = args->arg2;
 		a.buf = PTRIN(args->ptr);
 		return (linux_shmctl(td, &a));
 	}
 	default:
 		break;
 	}
 
 	return (EINVAL);
 }
 
 int
 linux_old_select(struct thread *td, struct linux_old_select_args *args)
 {
 	struct l_old_select_argv linux_args;
 	struct linux_select_args newsel;
 	int error;
 
 	error = copyin(args->ptr, &linux_args, sizeof(linux_args));
 	if (error)
 		return (error);
 
 	newsel.nfds = linux_args.nfds;
 	newsel.readfds = PTRIN(linux_args.readfds);
 	newsel.writefds = PTRIN(linux_args.writefds);
 	newsel.exceptfds = PTRIN(linux_args.exceptfds);
 	newsel.timeout = PTRIN(linux_args.timeout);
 	return (linux_select(td, &newsel));
 }
 
 int
 linux_set_cloned_tls(struct thread *td, void *desc)
 {
 	struct l_user_desc info;
 	struct pcb *pcb;
 	int error;
 
 	error = copyin(desc, &info, sizeof(struct l_user_desc));
 	if (error) {
 		linux_msg(td, "set_cloned_tls copyin info failed!");
 	} else {
 		/* We might copy out the entry_number as GUGS32_SEL. */
 		info.entry_number = GUGS32_SEL;
 		error = copyout(&info, desc, sizeof(struct l_user_desc));
 		if (error)
 			linux_msg(td, "set_cloned_tls copyout info failed!");
 
 		pcb = td->td_pcb;
 		update_pcb_bases(pcb);
 		pcb->pcb_gsbase = (register_t)info.base_addr;
 		td->td_frame->tf_gs = GSEL(GUGS32_SEL, SEL_UPL);
 	}
 
 	return (error);
 }
 
 int
 linux_set_upcall(struct thread *td, register_t stack)
 {
 
 	if (stack)
 		td->td_frame->tf_rsp = stack;
 
 	/*
 	 * The newly created Linux thread returns
 	 * to the user space by the same path that a parent do.
 	 */
 	td->td_frame->tf_rax = 0;
 	return (0);
 }
 
 int
 linux_mmap2(struct thread *td, struct linux_mmap2_args *args)
 {
 
 	return (linux_mmap_common(td, PTROUT(args->addr), args->len, args->prot,
 		args->flags, args->fd, (uint64_t)(uint32_t)args->pgoff *
 		PAGE_SIZE));
 }
 
 int
 linux_mmap(struct thread *td, struct linux_mmap_args *args)
 {
 	int error;
 	struct l_mmap_argv linux_args;
 
 	error = copyin(args->ptr, &linux_args, sizeof(linux_args));
 	if (error)
 		return (error);
 
 	return (linux_mmap_common(td, linux_args.addr, linux_args.len,
 	    linux_args.prot, linux_args.flags, linux_args.fd,
 	    (uint32_t)linux_args.pgoff));
 }
 
 int
 linux_mprotect(struct thread *td, struct linux_mprotect_args *uap)
 {
 
 	return (linux_mprotect_common(td, PTROUT(uap->addr), uap->len, uap->prot));
 }
 
 int
 linux_madvise(struct thread *td, struct linux_madvise_args *uap)
 {
 
 	return (linux_madvise_common(td, PTROUT(uap->addr), uap->len, uap->behav));
 }
 
 int
 linux_iopl(struct thread *td, struct linux_iopl_args *args)
 {
 	int error;
 
 	if (args->level < 0 || args->level > 3)
 		return (EINVAL);
 	if ((error = priv_check(td, PRIV_IO)) != 0)
 		return (error);
 	if ((error = securelevel_gt(td->td_ucred, 0)) != 0)
 		return (error);
 	td->td_frame->tf_rflags = (td->td_frame->tf_rflags & ~PSL_IOPL) |
 	    (args->level * (PSL_IOPL / 3));
 
 	return (0);
 }
 
 int
 linux_sigaction(struct thread *td, struct linux_sigaction_args *args)
 {
 	l_osigaction_t osa;
 	l_sigaction_t act, oact;
 	int error;
 
 	if (args->nsa != NULL) {
 		error = copyin(args->nsa, &osa, sizeof(l_osigaction_t));
 		if (error)
 			return (error);
 		act.lsa_handler = osa.lsa_handler;
 		act.lsa_flags = osa.lsa_flags;
 		act.lsa_restorer = osa.lsa_restorer;
 		LINUX_SIGEMPTYSET(act.lsa_mask);
 		act.lsa_mask.__mask = osa.lsa_mask;
 	}
 
 	error = linux_do_sigaction(td, args->sig, args->nsa ? &act : NULL,
 	    args->osa ? &oact : NULL);
 
 	if (args->osa != NULL && !error) {
 		osa.lsa_handler = oact.lsa_handler;
 		osa.lsa_flags = oact.lsa_flags;
 		osa.lsa_restorer = oact.lsa_restorer;
 		osa.lsa_mask = oact.lsa_mask.__mask;
 		error = copyout(&osa, args->osa, sizeof(l_osigaction_t));
 	}
 
 	return (error);
 }
 
 /*
  * Linux has two extra args, restart and oldmask.  We don't use these,
  * but it seems that "restart" is actually a context pointer that
  * enables the signal to happen with a different register set.
  */
 int
 linux_sigsuspend(struct thread *td, struct linux_sigsuspend_args *args)
 {
 	sigset_t sigmask;
 	l_sigset_t mask;
 
 	LINUX_SIGEMPTYSET(mask);
 	mask.__mask = args->mask;
 	linux_to_bsd_sigset(&mask, &sigmask);
 	return (kern_sigsuspend(td, sigmask));
 }
 
 int
 linux_pause(struct thread *td, struct linux_pause_args *args)
 {
 	struct proc *p = td->td_proc;
 	sigset_t sigmask;
 
 	PROC_LOCK(p);
 	sigmask = td->td_sigmask;
 	PROC_UNLOCK(p);
 	return (kern_sigsuspend(td, sigmask));
 }
 
 int
 linux_gettimeofday(struct thread *td, struct linux_gettimeofday_args *uap)
 {
 	struct timeval atv;
 	l_timeval atv32;
 	struct timezone rtz;
 	int error = 0;
 
 	if (uap->tp) {
 		microtime(&atv);
 		atv32.tv_sec = atv.tv_sec;
 		atv32.tv_usec = atv.tv_usec;
 		error = copyout(&atv32, uap->tp, sizeof(atv32));
 	}
 	if (error == 0 && uap->tzp != NULL) {
 		rtz.tz_minuteswest = 0;
 		rtz.tz_dsttime = 0;
 		error = copyout(&rtz, uap->tzp, sizeof(rtz));
 	}
 	return (error);
 }
 
 int
 linux_settimeofday(struct thread *td, struct linux_settimeofday_args *uap)
 {
 	l_timeval atv32;
 	struct timeval atv, *tvp;
 	struct timezone atz, *tzp;
 	int error;
 
 	if (uap->tp) {
 		error = copyin(uap->tp, &atv32, sizeof(atv32));
 		if (error)
 			return (error);
 		atv.tv_sec = atv32.tv_sec;
 		atv.tv_usec = atv32.tv_usec;
 		tvp = &atv;
 	} else
 		tvp = NULL;
 	if (uap->tzp) {
 		error = copyin(uap->tzp, &atz, sizeof(atz));
 		if (error)
 			return (error);
 		tzp = &atz;
 	} else
 		tzp = NULL;
 	return (kern_settimeofday(td, tvp, tzp));
 }
 
 int
 linux_getrusage(struct thread *td, struct linux_getrusage_args *uap)
 {
 	struct rusage s;
 	int error;
 
 	error = kern_getrusage(td, uap->who, &s);
 	if (error != 0)
 		return (error);
 	if (uap->rusage != NULL)
 		error = linux_copyout_rusage(&s, uap->rusage);
 	return (error);
 }
 
 int
 linux_set_thread_area(struct thread *td,
     struct linux_set_thread_area_args *args)
 {
 	struct l_user_desc info;
 	struct pcb *pcb;
 	int error;
 
 	error = copyin(args->desc, &info, sizeof(struct l_user_desc));
 	if (error)
 		return (error);
 
 	/*
 	 * Semantics of Linux version: every thread in the system has array
 	 * of three TLS descriptors. 1st is GLIBC TLS, 2nd is WINE, 3rd unknown.
 	 * This syscall loads one of the selected TLS decriptors with a value
 	 * and also loads GDT descriptors 6, 7 and 8 with the content of
 	 * the per-thread descriptors.
 	 *
 	 * Semantics of FreeBSD version: I think we can ignore that Linux has
 	 * three per-thread descriptors and use just the first one.
 	 * The tls_array[] is used only in [gs]et_thread_area() syscalls and
 	 * for loading the GDT descriptors. We use just one GDT descriptor
 	 * for TLS, so we will load just one.
 	 *
 	 * XXX: This doesn't work when a user space process tries to use more
 	 * than one TLS segment. Comment in the Linux source says wine might
 	 * do this.
 	 */
 
 	/*
 	 * GLIBC reads current %gs and call set_thread_area() with it.
 	 * We should let GUDATA_SEL and GUGS32_SEL proceed as well because
 	 * we use these segments.
 	 */
 	switch (info.entry_number) {
 	case GUGS32_SEL:
 	case GUDATA_SEL:
 	case 6:
 	case -1:
 		info.entry_number = GUGS32_SEL;
 		break;
 	default:
 		return (EINVAL);
 	}
 
 	/*
 	 * We have to copy out the GDT entry we use.
 	 *
 	 * XXX: What if a user space program does not check the return value
 	 * and tries to use 6, 7 or 8?
 	 */
 	error = copyout(&info, args->desc, sizeof(struct l_user_desc));
 	if (error)
 		return (error);
 
 	pcb = td->td_pcb;
 	update_pcb_bases(pcb);
 	pcb->pcb_gsbase = (register_t)info.base_addr;
 	update_gdt_gsbase(td, info.base_addr);
 
 	return (0);
 }
 
 void
 bsd_to_linux_regset32(struct reg32 *b_reg, struct linux_pt_regset32 *l_regset)
 {
 
 	l_regset->ebx = b_reg->r_ebx;
 	l_regset->ecx = b_reg->r_ecx;
 	l_regset->edx = b_reg->r_edx;
 	l_regset->esi = b_reg->r_esi;
 	l_regset->edi = b_reg->r_edi;
 	l_regset->ebp = b_reg->r_ebp;
 	l_regset->eax = b_reg->r_eax;
 	l_regset->ds = b_reg->r_ds;
 	l_regset->es = b_reg->r_es;
 	l_regset->fs = b_reg->r_fs;
 	l_regset->gs = b_reg->r_gs;
 	l_regset->orig_eax = b_reg->r_eax;
 	l_regset->eip = b_reg->r_eip;
 	l_regset->cs = b_reg->r_cs;
 	l_regset->eflags = b_reg->r_eflags;
 	l_regset->esp = b_reg->r_esp;
 	l_regset->ss = b_reg->r_ss;
 }
 
 int futex_xchgl_nosmap(int oparg, uint32_t *uaddr, int *oldval);
 int futex_xchgl_smap(int oparg, uint32_t *uaddr, int *oldval);
 DEFINE_IFUNC(, int, futex_xchgl, (int, uint32_t *, int *))
 {
 
 	return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ?
 	    futex_xchgl_smap : futex_xchgl_nosmap);
 }
 
 int futex_addl_nosmap(int oparg, uint32_t *uaddr, int *oldval);
 int futex_addl_smap(int oparg, uint32_t *uaddr, int *oldval);
 DEFINE_IFUNC(, int, futex_addl, (int, uint32_t *, int *))
 {
 
 	return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ?
 	    futex_addl_smap : futex_addl_nosmap);
 }
 
 int futex_orl_nosmap(int oparg, uint32_t *uaddr, int *oldval);
 int futex_orl_smap(int oparg, uint32_t *uaddr, int *oldval);
 DEFINE_IFUNC(, int, futex_orl, (int, uint32_t *, int *))
 {
 
 	return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ?
 	    futex_orl_smap : futex_orl_nosmap);
 }
 
 int futex_andl_nosmap(int oparg, uint32_t *uaddr, int *oldval);
 int futex_andl_smap(int oparg, uint32_t *uaddr, int *oldval);
 DEFINE_IFUNC(, int, futex_andl, (int, uint32_t *, int *))
 {
 
 	return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ?
 	    futex_andl_smap : futex_andl_nosmap);
 }
 
 int futex_xorl_nosmap(int oparg, uint32_t *uaddr, int *oldval);
 int futex_xorl_smap(int oparg, uint32_t *uaddr, int *oldval);
 DEFINE_IFUNC(, int, futex_xorl, (int, uint32_t *, int *))
 {
 
 	return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ?
 	    futex_xorl_smap : futex_xorl_nosmap);
 }
diff --git a/sys/amd64/vmm/amd/svm.c b/sys/amd64/vmm/amd/svm.c
index 0348c3335409..fbf11ec84084 100644
--- a/sys/amd64/vmm/amd/svm.c
+++ b/sys/amd64/vmm/amd/svm.c
@@ -1,2722 +1,2722 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2013, Anish Gupta (akgupt3@gmail.com)
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_bhyve_snapshot.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/smp.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/pcpu.h>
 #include <sys/proc.h>
+#include <sys/reg.h>
 #include <sys/smr.h>
 #include <sys/sysctl.h>
 
 #include <vm/vm.h>
 #include <vm/pmap.h>
 
 #include <machine/cpufunc.h>
 #include <machine/psl.h>
 #include <machine/md_var.h>
-#include <machine/reg.h>
 #include <machine/specialreg.h>
 #include <machine/smp.h>
 #include <machine/vmm.h>
 #include <machine/vmm_dev.h>
 #include <machine/vmm_instruction_emul.h>
 #include <machine/vmm_snapshot.h>
 
 #include "vmm_lapic.h"
 #include "vmm_stat.h"
 #include "vmm_ktr.h"
 #include "vmm_ioport.h"
 #include "vatpic.h"
 #include "vlapic.h"
 #include "vlapic_priv.h"
 
 #include "x86.h"
 #include "vmcb.h"
 #include "svm.h"
 #include "svm_softc.h"
 #include "svm_msr.h"
 #include "npt.h"
 
 SYSCTL_DECL(_hw_vmm);
 SYSCTL_NODE(_hw_vmm, OID_AUTO, svm, CTLFLAG_RW | CTLFLAG_MPSAFE, NULL,
     NULL);
 
 /*
  * SVM CPUID function 0x8000_000A, edx bit decoding.
  */
 #define AMD_CPUID_SVM_NP		BIT(0)  /* Nested paging or RVI */
 #define AMD_CPUID_SVM_LBR		BIT(1)  /* Last branch virtualization */
 #define AMD_CPUID_SVM_SVML		BIT(2)  /* SVM lock */
 #define AMD_CPUID_SVM_NRIP_SAVE		BIT(3)  /* Next RIP is saved */
 #define AMD_CPUID_SVM_TSC_RATE		BIT(4)  /* TSC rate control. */
 #define AMD_CPUID_SVM_VMCB_CLEAN	BIT(5)  /* VMCB state caching */
 #define AMD_CPUID_SVM_FLUSH_BY_ASID	BIT(6)  /* Flush by ASID */
 #define AMD_CPUID_SVM_DECODE_ASSIST	BIT(7)  /* Decode assist */
 #define AMD_CPUID_SVM_PAUSE_INC		BIT(10) /* Pause intercept filter. */
 #define AMD_CPUID_SVM_PAUSE_FTH		BIT(12) /* Pause filter threshold */
 #define	AMD_CPUID_SVM_AVIC		BIT(13)	/* AVIC present */
 
 #define	VMCB_CACHE_DEFAULT	(VMCB_CACHE_ASID 	|	\
 				VMCB_CACHE_IOPM		|	\
 				VMCB_CACHE_I		|	\
 				VMCB_CACHE_TPR		|	\
 				VMCB_CACHE_CR2		|	\
 				VMCB_CACHE_CR		|	\
 				VMCB_CACHE_DR		|	\
 				VMCB_CACHE_DT		|	\
 				VMCB_CACHE_SEG		|	\
 				VMCB_CACHE_NP)
 
 static uint32_t vmcb_clean = VMCB_CACHE_DEFAULT;
 SYSCTL_INT(_hw_vmm_svm, OID_AUTO, vmcb_clean, CTLFLAG_RDTUN, &vmcb_clean,
     0, NULL);
 
 static MALLOC_DEFINE(M_SVM, "svm", "svm");
 static MALLOC_DEFINE(M_SVM_VLAPIC, "svm-vlapic", "svm-vlapic");
 
 static uint32_t svm_feature = ~0U;	/* AMD SVM features. */
 SYSCTL_UINT(_hw_vmm_svm, OID_AUTO, features, CTLFLAG_RDTUN, &svm_feature, 0,
     "SVM features advertised by CPUID.8000000AH:EDX");
 
 static int disable_npf_assist;
 SYSCTL_INT(_hw_vmm_svm, OID_AUTO, disable_npf_assist, CTLFLAG_RWTUN,
     &disable_npf_assist, 0, NULL);
 
 /* Maximum ASIDs supported by the processor */
 static uint32_t nasid;
 SYSCTL_UINT(_hw_vmm_svm, OID_AUTO, num_asids, CTLFLAG_RDTUN, &nasid, 0,
     "Number of ASIDs supported by this processor");
 
 /* Current ASID generation for each host cpu */
 static struct asid asid[MAXCPU];
 
 /* 
  * SVM host state saved area of size 4KB for each core.
  */
 static uint8_t hsave[MAXCPU][PAGE_SIZE] __aligned(PAGE_SIZE);
 
 static VMM_STAT_AMD(VCPU_EXITINTINFO, "VM exits during event delivery");
 static VMM_STAT_AMD(VCPU_INTINFO_INJECTED, "Events pending at VM entry");
 static VMM_STAT_AMD(VMEXIT_VINTR, "VM exits due to interrupt window");
 
 static int svm_getdesc(void *arg, int vcpu, int reg, struct seg_desc *desc);
 static int svm_setreg(void *arg, int vcpu, int ident, uint64_t val);
 
 static __inline int
 flush_by_asid(void)
 {
 
 	return (svm_feature & AMD_CPUID_SVM_FLUSH_BY_ASID);
 }
 
 static __inline int
 decode_assist(void)
 {
 
 	return (svm_feature & AMD_CPUID_SVM_DECODE_ASSIST);
 }
 
 static void
 svm_disable(void *arg __unused)
 {
 	uint64_t efer;
 
 	efer = rdmsr(MSR_EFER);
 	efer &= ~EFER_SVM;
 	wrmsr(MSR_EFER, efer);
 }
 
 /*
  * Disable SVM on all CPUs.
  */
 static int
 svm_modcleanup(void)
 {
 
 	smp_rendezvous(NULL, svm_disable, NULL, NULL);
 	return (0);
 }
 
 /*
  * Verify that all the features required by bhyve are available.
  */
 static int
 check_svm_features(void)
 {
 	u_int regs[4];
 
 	/* CPUID Fn8000_000A is for SVM */
 	do_cpuid(0x8000000A, regs);
 	svm_feature &= regs[3];
 
 	/*
 	 * The number of ASIDs can be configured to be less than what is
 	 * supported by the hardware but not more.
 	 */
 	if (nasid == 0 || nasid > regs[1])
 		nasid = regs[1];
 	KASSERT(nasid > 1, ("Insufficient ASIDs for guests: %#x", nasid));
 
 	/* bhyve requires the Nested Paging feature */
 	if (!(svm_feature & AMD_CPUID_SVM_NP)) {
 		printf("SVM: Nested Paging feature not available.\n");
 		return (ENXIO);
 	}
 
 	/* bhyve requires the NRIP Save feature */
 	if (!(svm_feature & AMD_CPUID_SVM_NRIP_SAVE)) {
 		printf("SVM: NRIP Save feature not available.\n");
 		return (ENXIO);
 	}
 
 	return (0);
 }
 
 static void
 svm_enable(void *arg __unused)
 {
 	uint64_t efer;
 
 	efer = rdmsr(MSR_EFER);
 	efer |= EFER_SVM;
 	wrmsr(MSR_EFER, efer);
 
 	wrmsr(MSR_VM_HSAVE_PA, vtophys(hsave[curcpu]));
 }
 
 /*
  * Return 1 if SVM is enabled on this processor and 0 otherwise.
  */
 static int
 svm_available(void)
 {
 	uint64_t msr;
 
 	/* Section 15.4 Enabling SVM from APM2. */
 	if ((amd_feature2 & AMDID2_SVM) == 0) {
 		printf("SVM: not available.\n");
 		return (0);
 	}
 
 	msr = rdmsr(MSR_VM_CR);
 	if ((msr & VM_CR_SVMDIS) != 0) {
 		printf("SVM: disabled by BIOS.\n");
 		return (0);
 	}
 
 	return (1);
 }
 
 static int
 svm_modinit(int ipinum)
 {
 	int error, cpu;
 
 	if (!svm_available())
 		return (ENXIO);
 
 	error = check_svm_features();
 	if (error)
 		return (error);
 
 	vmcb_clean &= VMCB_CACHE_DEFAULT;
 
 	for (cpu = 0; cpu < MAXCPU; cpu++) {
 		/*
 		 * Initialize the host ASIDs to their "highest" valid values.
 		 *
 		 * The next ASID allocation will rollover both 'gen' and 'num'
 		 * and start off the sequence at {1,1}.
 		 */
 		asid[cpu].gen = ~0UL;
 		asid[cpu].num = nasid - 1;
 	}
 
 	svm_msr_init();
 	svm_npt_init(ipinum);
 
 	/* Enable SVM on all CPUs */
 	smp_rendezvous(NULL, svm_enable, NULL, NULL);
 
 	return (0);
 }
 
 static void
 svm_modresume(void)
 {
 
 	svm_enable(NULL);
 }		
 
 #ifdef BHYVE_SNAPSHOT
 int
 svm_set_tsc_offset(struct svm_softc *sc, int vcpu, uint64_t offset)
 {
 	int error;
 	struct vmcb_ctrl *ctrl;
 
 	ctrl = svm_get_vmcb_ctrl(sc, vcpu);
 	ctrl->tsc_offset = offset;
 
 	svm_set_dirty(sc, vcpu, VMCB_CACHE_I);
 	VCPU_CTR1(sc->vm, vcpu, "tsc offset changed to %#lx", offset);
 
 	error = vm_set_tsc_offset(sc->vm, vcpu, offset);
 
 	return (error);
 }
 #endif
 
 /* Pentium compatible MSRs */
 #define MSR_PENTIUM_START 	0	
 #define MSR_PENTIUM_END 	0x1FFF
 /* AMD 6th generation and Intel compatible MSRs */
 #define MSR_AMD6TH_START 	0xC0000000UL	
 #define MSR_AMD6TH_END 		0xC0001FFFUL	
 /* AMD 7th and 8th generation compatible MSRs */
 #define MSR_AMD7TH_START 	0xC0010000UL	
 #define MSR_AMD7TH_END 		0xC0011FFFUL	
 
 /*
  * Get the index and bit position for a MSR in permission bitmap.
  * Two bits are used for each MSR: lower bit for read and higher bit for write.
  */
 static int
 svm_msr_index(uint64_t msr, int *index, int *bit)
 {
 	uint32_t base, off;
 
 	*index = -1;
 	*bit = (msr % 4) * 2;
 	base = 0;
 
 	if (msr >= MSR_PENTIUM_START && msr <= MSR_PENTIUM_END) {
 		*index = msr / 4;
 		return (0);
 	}
 
 	base += (MSR_PENTIUM_END - MSR_PENTIUM_START + 1); 
 	if (msr >= MSR_AMD6TH_START && msr <= MSR_AMD6TH_END) {
 		off = (msr - MSR_AMD6TH_START); 
 		*index = (off + base) / 4;
 		return (0);
 	} 
 
 	base += (MSR_AMD6TH_END - MSR_AMD6TH_START + 1);
 	if (msr >= MSR_AMD7TH_START && msr <= MSR_AMD7TH_END) {
 		off = (msr - MSR_AMD7TH_START);
 		*index = (off + base) / 4;
 		return (0);
 	}
 
 	return (EINVAL);
 }
 
 /*
  * Allow vcpu to read or write the 'msr' without trapping into the hypervisor.
  */
 static void
 svm_msr_perm(uint8_t *perm_bitmap, uint64_t msr, bool read, bool write)
 {
 	int index, bit, error;
 
 	error = svm_msr_index(msr, &index, &bit);
 	KASSERT(error == 0, ("%s: invalid msr %#lx", __func__, msr));
 	KASSERT(index >= 0 && index < SVM_MSR_BITMAP_SIZE,
 	    ("%s: invalid index %d for msr %#lx", __func__, index, msr));
 	KASSERT(bit >= 0 && bit <= 6, ("%s: invalid bit position %d "
 	    "msr %#lx", __func__, bit, msr));
 
 	if (read)
 		perm_bitmap[index] &= ~(1UL << bit);
 
 	if (write)
 		perm_bitmap[index] &= ~(2UL << bit);
 }
 
 static void
 svm_msr_rw_ok(uint8_t *perm_bitmap, uint64_t msr)
 {
 
 	svm_msr_perm(perm_bitmap, msr, true, true);
 }
 
 static void
 svm_msr_rd_ok(uint8_t *perm_bitmap, uint64_t msr)
 {
 
 	svm_msr_perm(perm_bitmap, msr, true, false);
 }
 
 static __inline int
 svm_get_intercept(struct svm_softc *sc, int vcpu, int idx, uint32_t bitmask)
 {
 	struct vmcb_ctrl *ctrl;
 
 	KASSERT(idx >=0 && idx < 5, ("invalid intercept index %d", idx));
 
 	ctrl = svm_get_vmcb_ctrl(sc, vcpu);
 	return (ctrl->intercept[idx] & bitmask ? 1 : 0);
 }
 
 static __inline void
 svm_set_intercept(struct svm_softc *sc, int vcpu, int idx, uint32_t bitmask,
     int enabled)
 {
 	struct vmcb_ctrl *ctrl;
 	uint32_t oldval;
 
 	KASSERT(idx >=0 && idx < 5, ("invalid intercept index %d", idx));
 
 	ctrl = svm_get_vmcb_ctrl(sc, vcpu);
 	oldval = ctrl->intercept[idx];
 
 	if (enabled)
 		ctrl->intercept[idx] |= bitmask;
 	else
 		ctrl->intercept[idx] &= ~bitmask;
 
 	if (ctrl->intercept[idx] != oldval) {
 		svm_set_dirty(sc, vcpu, VMCB_CACHE_I);
 		VCPU_CTR3(sc->vm, vcpu, "intercept[%d] modified "
 		    "from %#x to %#x", idx, oldval, ctrl->intercept[idx]);
 	}
 }
 
 static __inline void
 svm_disable_intercept(struct svm_softc *sc, int vcpu, int off, uint32_t bitmask)
 {
 
 	svm_set_intercept(sc, vcpu, off, bitmask, 0);
 }
 
 static __inline void
 svm_enable_intercept(struct svm_softc *sc, int vcpu, int off, uint32_t bitmask)
 {
 
 	svm_set_intercept(sc, vcpu, off, bitmask, 1);
 }
 
 static void
 vmcb_init(struct svm_softc *sc, int vcpu, uint64_t iopm_base_pa,
     uint64_t msrpm_base_pa, uint64_t np_pml4)
 {
 	struct vmcb_ctrl *ctrl;
 	struct vmcb_state *state;
 	uint32_t mask;
 	int n;
 
 	ctrl = svm_get_vmcb_ctrl(sc, vcpu);
 	state = svm_get_vmcb_state(sc, vcpu);
 
 	ctrl->iopm_base_pa = iopm_base_pa;
 	ctrl->msrpm_base_pa = msrpm_base_pa;
 
 	/* Enable nested paging */
 	ctrl->np_enable = 1;
 	ctrl->n_cr3 = np_pml4;
 
 	/*
 	 * Intercept accesses to the control registers that are not shadowed
 	 * in the VMCB - i.e. all except cr0, cr2, cr3, cr4 and cr8.
 	 */
 	for (n = 0; n < 16; n++) {
 		mask = (BIT(n) << 16) | BIT(n);
 		if (n == 0 || n == 2 || n == 3 || n == 4 || n == 8)
 			svm_disable_intercept(sc, vcpu, VMCB_CR_INTCPT, mask);
 		else
 			svm_enable_intercept(sc, vcpu, VMCB_CR_INTCPT, mask);
 	}
 
 	/*
 	 * Intercept everything when tracing guest exceptions otherwise
 	 * just intercept machine check exception.
 	 */
 	if (vcpu_trace_exceptions(sc->vm, vcpu)) {
 		for (n = 0; n < 32; n++) {
 			/*
 			 * Skip unimplemented vectors in the exception bitmap.
 			 */
 			if (n == 2 || n == 9) {
 				continue;
 			}
 			svm_enable_intercept(sc, vcpu, VMCB_EXC_INTCPT, BIT(n));
 		}
 	} else {
 		svm_enable_intercept(sc, vcpu, VMCB_EXC_INTCPT, BIT(IDT_MC));
 	}
 
 	/* Intercept various events (for e.g. I/O, MSR and CPUID accesses) */
 	svm_enable_intercept(sc, vcpu, VMCB_CTRL1_INTCPT, VMCB_INTCPT_IO);
 	svm_enable_intercept(sc, vcpu, VMCB_CTRL1_INTCPT, VMCB_INTCPT_MSR);
 	svm_enable_intercept(sc, vcpu, VMCB_CTRL1_INTCPT, VMCB_INTCPT_CPUID);
 	svm_enable_intercept(sc, vcpu, VMCB_CTRL1_INTCPT, VMCB_INTCPT_INTR);
 	svm_enable_intercept(sc, vcpu, VMCB_CTRL1_INTCPT, VMCB_INTCPT_INIT);
 	svm_enable_intercept(sc, vcpu, VMCB_CTRL1_INTCPT, VMCB_INTCPT_NMI);
 	svm_enable_intercept(sc, vcpu, VMCB_CTRL1_INTCPT, VMCB_INTCPT_SMI);
 	svm_enable_intercept(sc, vcpu, VMCB_CTRL1_INTCPT, VMCB_INTCPT_SHUTDOWN);
 	svm_enable_intercept(sc, vcpu, VMCB_CTRL1_INTCPT,
 	    VMCB_INTCPT_FERR_FREEZE);
 	svm_enable_intercept(sc, vcpu, VMCB_CTRL1_INTCPT, VMCB_INTCPT_INVD);
 	svm_enable_intercept(sc, vcpu, VMCB_CTRL1_INTCPT, VMCB_INTCPT_INVLPGA);
 
 	svm_enable_intercept(sc, vcpu, VMCB_CTRL2_INTCPT, VMCB_INTCPT_MONITOR);
 	svm_enable_intercept(sc, vcpu, VMCB_CTRL2_INTCPT, VMCB_INTCPT_MWAIT);
 
 	/*
 	 * Intercept SVM instructions since AMD enables them in guests otherwise.
 	 * Non-intercepted VMMCALL causes #UD, skip it.
 	 */
 	svm_enable_intercept(sc, vcpu, VMCB_CTRL2_INTCPT, VMCB_INTCPT_VMLOAD);
 	svm_enable_intercept(sc, vcpu, VMCB_CTRL2_INTCPT, VMCB_INTCPT_VMSAVE);
 	svm_enable_intercept(sc, vcpu, VMCB_CTRL2_INTCPT, VMCB_INTCPT_STGI);
 	svm_enable_intercept(sc, vcpu, VMCB_CTRL2_INTCPT, VMCB_INTCPT_CLGI);
 	svm_enable_intercept(sc, vcpu, VMCB_CTRL2_INTCPT, VMCB_INTCPT_SKINIT);
 	svm_enable_intercept(sc, vcpu, VMCB_CTRL2_INTCPT, VMCB_INTCPT_ICEBP);
 
 	/*
 	 * From section "Canonicalization and Consistency Checks" in APMv2
 	 * the VMRUN intercept bit must be set to pass the consistency check.
 	 */
 	svm_enable_intercept(sc, vcpu, VMCB_CTRL2_INTCPT, VMCB_INTCPT_VMRUN);
 
 	/*
 	 * The ASID will be set to a non-zero value just before VMRUN.
 	 */
 	ctrl->asid = 0;
 
 	/*
 	 * Section 15.21.1, Interrupt Masking in EFLAGS
 	 * Section 15.21.2, Virtualizing APIC.TPR
 	 *
 	 * This must be set for %rflag and %cr8 isolation of guest and host.
 	 */
 	ctrl->v_intr_masking = 1;
 
 	/* Enable Last Branch Record aka LBR for debugging */
 	ctrl->lbr_virt_en = 1;
 	state->dbgctl = BIT(0);
 
 	/* EFER_SVM must always be set when the guest is executing */
 	state->efer = EFER_SVM;
 
 	/* Set up the PAT to power-on state */
 	state->g_pat = PAT_VALUE(0, PAT_WRITE_BACK)	|
 	    PAT_VALUE(1, PAT_WRITE_THROUGH)	|
 	    PAT_VALUE(2, PAT_UNCACHED)		|
 	    PAT_VALUE(3, PAT_UNCACHEABLE)	|
 	    PAT_VALUE(4, PAT_WRITE_BACK)	|
 	    PAT_VALUE(5, PAT_WRITE_THROUGH)	|
 	    PAT_VALUE(6, PAT_UNCACHED)		|
 	    PAT_VALUE(7, PAT_UNCACHEABLE);
 
 	/* Set up DR6/7 to power-on state */
 	state->dr6 = DBREG_DR6_RESERVED1;
 	state->dr7 = DBREG_DR7_RESERVED1;
 }
 
 /*
  * Initialize a virtual machine.
  */
 static void *
 svm_init(struct vm *vm, pmap_t pmap)
 {
 	struct svm_softc *svm_sc;
 	struct svm_vcpu *vcpu;
 	vm_paddr_t msrpm_pa, iopm_pa, pml4_pa;
 	int i;
 	uint16_t maxcpus;
 
 	svm_sc = malloc(sizeof (*svm_sc), M_SVM, M_WAITOK | M_ZERO);
 	if (((uintptr_t)svm_sc & PAGE_MASK) != 0)
 		panic("malloc of svm_softc not aligned on page boundary");
 
 	svm_sc->msr_bitmap = contigmalloc(SVM_MSR_BITMAP_SIZE, M_SVM,
 	    M_WAITOK, 0, ~(vm_paddr_t)0, PAGE_SIZE, 0);
 	if (svm_sc->msr_bitmap == NULL)
 		panic("contigmalloc of SVM MSR bitmap failed");
 	svm_sc->iopm_bitmap = contigmalloc(SVM_IO_BITMAP_SIZE, M_SVM,
 	    M_WAITOK, 0, ~(vm_paddr_t)0, PAGE_SIZE, 0);
 	if (svm_sc->iopm_bitmap == NULL)
 		panic("contigmalloc of SVM IO bitmap failed");
 
 	svm_sc->vm = vm;
 	svm_sc->nptp = (vm_offset_t)vtophys(pmap->pm_pmltop);
 
 	/*
 	 * Intercept read and write accesses to all MSRs.
 	 */
 	memset(svm_sc->msr_bitmap, 0xFF, SVM_MSR_BITMAP_SIZE);
 
 	/*
 	 * Access to the following MSRs is redirected to the VMCB when the
 	 * guest is executing. Therefore it is safe to allow the guest to
 	 * read/write these MSRs directly without hypervisor involvement.
 	 */
 	svm_msr_rw_ok(svm_sc->msr_bitmap, MSR_GSBASE);
 	svm_msr_rw_ok(svm_sc->msr_bitmap, MSR_FSBASE);
 	svm_msr_rw_ok(svm_sc->msr_bitmap, MSR_KGSBASE);
 
 	svm_msr_rw_ok(svm_sc->msr_bitmap, MSR_STAR);
 	svm_msr_rw_ok(svm_sc->msr_bitmap, MSR_LSTAR);
 	svm_msr_rw_ok(svm_sc->msr_bitmap, MSR_CSTAR);
 	svm_msr_rw_ok(svm_sc->msr_bitmap, MSR_SF_MASK);
 	svm_msr_rw_ok(svm_sc->msr_bitmap, MSR_SYSENTER_CS_MSR);
 	svm_msr_rw_ok(svm_sc->msr_bitmap, MSR_SYSENTER_ESP_MSR);
 	svm_msr_rw_ok(svm_sc->msr_bitmap, MSR_SYSENTER_EIP_MSR);
 	svm_msr_rw_ok(svm_sc->msr_bitmap, MSR_PAT);
 
 	svm_msr_rd_ok(svm_sc->msr_bitmap, MSR_TSC);
 
 	/*
 	 * Intercept writes to make sure that the EFER_SVM bit is not cleared.
 	 */
 	svm_msr_rd_ok(svm_sc->msr_bitmap, MSR_EFER);
 
 	/* Intercept access to all I/O ports. */
 	memset(svm_sc->iopm_bitmap, 0xFF, SVM_IO_BITMAP_SIZE);
 
 	iopm_pa = vtophys(svm_sc->iopm_bitmap);
 	msrpm_pa = vtophys(svm_sc->msr_bitmap);
 	pml4_pa = svm_sc->nptp;
 	maxcpus = vm_get_maxcpus(svm_sc->vm);
 	for (i = 0; i < maxcpus; i++) {
 		vcpu = svm_get_vcpu(svm_sc, i);
 		vcpu->nextrip = ~0;
 		vcpu->lastcpu = NOCPU;
 		vcpu->vmcb_pa = vtophys(&vcpu->vmcb);
 		vmcb_init(svm_sc, i, iopm_pa, msrpm_pa, pml4_pa);
 		svm_msr_guest_init(svm_sc, i);
 	}
 	return (svm_sc);
 }
 
 /*
  * Collateral for a generic SVM VM-exit.
  */
 static void
 vm_exit_svm(struct vm_exit *vme, uint64_t code, uint64_t info1, uint64_t info2)
 {
 
 	vme->exitcode = VM_EXITCODE_SVM;
 	vme->u.svm.exitcode = code;
 	vme->u.svm.exitinfo1 = info1;
 	vme->u.svm.exitinfo2 = info2;
 }
 
 static int
 svm_cpl(struct vmcb_state *state)
 {
 
 	/*
 	 * From APMv2:
 	 *   "Retrieve the CPL from the CPL field in the VMCB, not
 	 *    from any segment DPL"
 	 */
 	return (state->cpl);
 }
 
 static enum vm_cpu_mode
 svm_vcpu_mode(struct vmcb *vmcb)
 {
 	struct vmcb_segment seg;
 	struct vmcb_state *state;
 	int error;
 
 	state = &vmcb->state;
 
 	if (state->efer & EFER_LMA) {
 		error = vmcb_seg(vmcb, VM_REG_GUEST_CS, &seg);
 		KASSERT(error == 0, ("%s: vmcb_seg(cs) error %d", __func__,
 		    error));
 
 		/*
 		 * Section 4.8.1 for APM2, check if Code Segment has
 		 * Long attribute set in descriptor.
 		 */
 		if (seg.attrib & VMCB_CS_ATTRIB_L)
 			return (CPU_MODE_64BIT);
 		else
 			return (CPU_MODE_COMPATIBILITY);
 	} else  if (state->cr0 & CR0_PE) {
 		return (CPU_MODE_PROTECTED);
 	} else {
 		return (CPU_MODE_REAL);
 	}
 }
 
 static enum vm_paging_mode
 svm_paging_mode(uint64_t cr0, uint64_t cr4, uint64_t efer)
 {
 
 	if ((cr0 & CR0_PG) == 0)
 		return (PAGING_MODE_FLAT);
 	if ((cr4 & CR4_PAE) == 0)
 		return (PAGING_MODE_32);
 	if (efer & EFER_LME)
 		return (PAGING_MODE_64);
 	else
 		return (PAGING_MODE_PAE);
 }
 
 /*
  * ins/outs utility routines
  */
 static uint64_t
 svm_inout_str_index(struct svm_regctx *regs, int in)
 {
 	uint64_t val;
 
 	val = in ? regs->sctx_rdi : regs->sctx_rsi;
 
 	return (val);
 }
 
 static uint64_t
 svm_inout_str_count(struct svm_regctx *regs, int rep)
 {
 	uint64_t val;
 
 	val = rep ? regs->sctx_rcx : 1;
 
 	return (val);
 }
 
 static void
 svm_inout_str_seginfo(struct svm_softc *svm_sc, int vcpu, int64_t info1,
     int in, struct vm_inout_str *vis)
 {
 	int error, s;
 
 	if (in) {
 		vis->seg_name = VM_REG_GUEST_ES;
 	} else {
 		/* The segment field has standard encoding */
 		s = (info1 >> 10) & 0x7;
 		vis->seg_name = vm_segment_name(s);
 	}
 
 	error = svm_getdesc(svm_sc, vcpu, vis->seg_name, &vis->seg_desc);
 	KASSERT(error == 0, ("%s: svm_getdesc error %d", __func__, error));
 }
 
 static int
 svm_inout_str_addrsize(uint64_t info1)
 {
         uint32_t size;
 
         size = (info1 >> 7) & 0x7;
         switch (size) {
         case 1:
                 return (2);     /* 16 bit */
         case 2:
                 return (4);     /* 32 bit */
         case 4:
                 return (8);     /* 64 bit */
         default:
                 panic("%s: invalid size encoding %d", __func__, size);
         }
 }
 
 static void
 svm_paging_info(struct vmcb *vmcb, struct vm_guest_paging *paging)
 {
 	struct vmcb_state *state;
 
 	state = &vmcb->state;
 	paging->cr3 = state->cr3;
 	paging->cpl = svm_cpl(state);
 	paging->cpu_mode = svm_vcpu_mode(vmcb);
 	paging->paging_mode = svm_paging_mode(state->cr0, state->cr4,
 	    state->efer);
 }
 
 #define	UNHANDLED 0
 
 /*
  * Handle guest I/O intercept.
  */
 static int
 svm_handle_io(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit)
 {
 	struct vmcb_ctrl *ctrl;
 	struct vmcb_state *state;
 	struct svm_regctx *regs;
 	struct vm_inout_str *vis;
 	uint64_t info1;
 	int inout_string;
 
 	state = svm_get_vmcb_state(svm_sc, vcpu);
 	ctrl  = svm_get_vmcb_ctrl(svm_sc, vcpu);
 	regs  = svm_get_guest_regctx(svm_sc, vcpu);
 
 	info1 = ctrl->exitinfo1;
 	inout_string = info1 & BIT(2) ? 1 : 0;
 
 	/*
 	 * The effective segment number in EXITINFO1[12:10] is populated
 	 * only if the processor has the DecodeAssist capability.
 	 *
 	 * XXX this is not specified explicitly in APMv2 but can be verified
 	 * empirically.
 	 */
 	if (inout_string && !decode_assist())
 		return (UNHANDLED);
 
 	vmexit->exitcode 	= VM_EXITCODE_INOUT;
 	vmexit->u.inout.in 	= (info1 & BIT(0)) ? 1 : 0;
 	vmexit->u.inout.string 	= inout_string;
 	vmexit->u.inout.rep 	= (info1 & BIT(3)) ? 1 : 0;
 	vmexit->u.inout.bytes 	= (info1 >> 4) & 0x7;
 	vmexit->u.inout.port 	= (uint16_t)(info1 >> 16);
 	vmexit->u.inout.eax 	= (uint32_t)(state->rax);
 
 	if (inout_string) {
 		vmexit->exitcode = VM_EXITCODE_INOUT_STR;
 		vis = &vmexit->u.inout_str;
 		svm_paging_info(svm_get_vmcb(svm_sc, vcpu), &vis->paging);
 		vis->rflags = state->rflags;
 		vis->cr0 = state->cr0;
 		vis->index = svm_inout_str_index(regs, vmexit->u.inout.in);
 		vis->count = svm_inout_str_count(regs, vmexit->u.inout.rep);
 		vis->addrsize = svm_inout_str_addrsize(info1);
 		svm_inout_str_seginfo(svm_sc, vcpu, info1,
 		    vmexit->u.inout.in, vis);
 	}
 
 	return (UNHANDLED);
 }
 
 static int
 npf_fault_type(uint64_t exitinfo1)
 {
 
 	if (exitinfo1 & VMCB_NPF_INFO1_W)
 		return (VM_PROT_WRITE);
 	else if (exitinfo1 & VMCB_NPF_INFO1_ID)
 		return (VM_PROT_EXECUTE);
 	else
 		return (VM_PROT_READ);
 }
 
 static bool
 svm_npf_emul_fault(uint64_t exitinfo1)
 {
 
 	if (exitinfo1 & VMCB_NPF_INFO1_ID) {
 		return (false);
 	}
 
 	if (exitinfo1 & VMCB_NPF_INFO1_GPT) {
 		return (false);
 	}
 
 	if ((exitinfo1 & VMCB_NPF_INFO1_GPA) == 0) {
 		return (false);
 	}
 
 	return (true);	
 }
 
 static void
 svm_handle_inst_emul(struct vmcb *vmcb, uint64_t gpa, struct vm_exit *vmexit)
 {
 	struct vm_guest_paging *paging;
 	struct vmcb_segment seg;
 	struct vmcb_ctrl *ctrl;
 	char *inst_bytes;
 	int error, inst_len;
 
 	ctrl = &vmcb->ctrl;
 	paging = &vmexit->u.inst_emul.paging;
 
 	vmexit->exitcode = VM_EXITCODE_INST_EMUL;
 	vmexit->u.inst_emul.gpa = gpa;
 	vmexit->u.inst_emul.gla = VIE_INVALID_GLA;
 	svm_paging_info(vmcb, paging);
 
 	error = vmcb_seg(vmcb, VM_REG_GUEST_CS, &seg);
 	KASSERT(error == 0, ("%s: vmcb_seg(CS) error %d", __func__, error));
 
 	switch(paging->cpu_mode) {
 	case CPU_MODE_REAL:
 		vmexit->u.inst_emul.cs_base = seg.base;
 		vmexit->u.inst_emul.cs_d = 0;
 		break;
 	case CPU_MODE_PROTECTED:
 	case CPU_MODE_COMPATIBILITY:
 		vmexit->u.inst_emul.cs_base = seg.base;
 
 		/*
 		 * Section 4.8.1 of APM2, Default Operand Size or D bit.
 		 */
 		vmexit->u.inst_emul.cs_d = (seg.attrib & VMCB_CS_ATTRIB_D) ?
 		    1 : 0;
 		break;
 	default:
 		vmexit->u.inst_emul.cs_base = 0;
 		vmexit->u.inst_emul.cs_d = 0;
 		break;	
 	}
 
 	/*
 	 * Copy the instruction bytes into 'vie' if available.
 	 */
 	if (decode_assist() && !disable_npf_assist) {
 		inst_len = ctrl->inst_len;
 		inst_bytes = ctrl->inst_bytes;
 	} else {
 		inst_len = 0;
 		inst_bytes = NULL;
 	}
 	vie_init(&vmexit->u.inst_emul.vie, inst_bytes, inst_len);
 }
 
 #ifdef KTR
 static const char *
 intrtype_to_str(int intr_type)
 {
 	switch (intr_type) {
 	case VMCB_EVENTINJ_TYPE_INTR:
 		return ("hwintr");
 	case VMCB_EVENTINJ_TYPE_NMI:
 		return ("nmi");
 	case VMCB_EVENTINJ_TYPE_INTn:
 		return ("swintr");
 	case VMCB_EVENTINJ_TYPE_EXCEPTION:
 		return ("exception");
 	default:
 		panic("%s: unknown intr_type %d", __func__, intr_type);
 	}
 }
 #endif
 
 /*
  * Inject an event to vcpu as described in section 15.20, "Event injection".
  */
 static void
 svm_eventinject(struct svm_softc *sc, int vcpu, int intr_type, int vector,
 		 uint32_t error, bool ec_valid)
 {
 	struct vmcb_ctrl *ctrl;
 
 	ctrl = svm_get_vmcb_ctrl(sc, vcpu);
 
 	KASSERT((ctrl->eventinj & VMCB_EVENTINJ_VALID) == 0,
 	    ("%s: event already pending %#lx", __func__, ctrl->eventinj));
 
 	KASSERT(vector >=0 && vector <= 255, ("%s: invalid vector %d",
 	    __func__, vector));
 
 	switch (intr_type) {
 	case VMCB_EVENTINJ_TYPE_INTR:
 	case VMCB_EVENTINJ_TYPE_NMI:
 	case VMCB_EVENTINJ_TYPE_INTn:
 		break;
 	case VMCB_EVENTINJ_TYPE_EXCEPTION:
 		if (vector >= 0 && vector <= 31 && vector != 2)
 			break;
 		/* FALLTHROUGH */
 	default:
 		panic("%s: invalid intr_type/vector: %d/%d", __func__,
 		    intr_type, vector);
 	}
 	ctrl->eventinj = vector | (intr_type << 8) | VMCB_EVENTINJ_VALID;
 	if (ec_valid) {
 		ctrl->eventinj |= VMCB_EVENTINJ_EC_VALID;
 		ctrl->eventinj |= (uint64_t)error << 32;
 		VCPU_CTR3(sc->vm, vcpu, "Injecting %s at vector %d errcode %#x",
 		    intrtype_to_str(intr_type), vector, error);
 	} else {
 		VCPU_CTR2(sc->vm, vcpu, "Injecting %s at vector %d",
 		    intrtype_to_str(intr_type), vector);
 	}
 }
 
 static void
 svm_update_virqinfo(struct svm_softc *sc, int vcpu)
 {
 	struct vm *vm;
 	struct vlapic *vlapic;
 	struct vmcb_ctrl *ctrl;
 
 	vm = sc->vm;
 	vlapic = vm_lapic(vm, vcpu);
 	ctrl = svm_get_vmcb_ctrl(sc, vcpu);
 
 	/* Update %cr8 in the emulated vlapic */
 	vlapic_set_cr8(vlapic, ctrl->v_tpr);
 
 	/* Virtual interrupt injection is not used. */
 	KASSERT(ctrl->v_intr_vector == 0, ("%s: invalid "
 	    "v_intr_vector %d", __func__, ctrl->v_intr_vector));
 }
 
 static void
 svm_save_intinfo(struct svm_softc *svm_sc, int vcpu)
 {
 	struct vmcb_ctrl *ctrl;
 	uint64_t intinfo;
 
 	ctrl  = svm_get_vmcb_ctrl(svm_sc, vcpu);
 	intinfo = ctrl->exitintinfo;	
 	if (!VMCB_EXITINTINFO_VALID(intinfo))
 		return;
 
 	/*
 	 * From APMv2, Section "Intercepts during IDT interrupt delivery"
 	 *
 	 * If a #VMEXIT happened during event delivery then record the event
 	 * that was being delivered.
 	 */
 	VCPU_CTR2(svm_sc->vm, vcpu, "SVM:Pending INTINFO(0x%lx), vector=%d.\n",
 		intinfo, VMCB_EXITINTINFO_VECTOR(intinfo));
 	vmm_stat_incr(svm_sc->vm, vcpu, VCPU_EXITINTINFO, 1);
 	vm_exit_intinfo(svm_sc->vm, vcpu, intinfo);
 }
 
 #ifdef INVARIANTS
 static __inline int
 vintr_intercept_enabled(struct svm_softc *sc, int vcpu)
 {
 
 	return (svm_get_intercept(sc, vcpu, VMCB_CTRL1_INTCPT,
 	    VMCB_INTCPT_VINTR));
 }
 #endif
 
 static __inline void
 enable_intr_window_exiting(struct svm_softc *sc, int vcpu)
 {
 	struct vmcb_ctrl *ctrl;
 
 	ctrl = svm_get_vmcb_ctrl(sc, vcpu);
 
 	if (ctrl->v_irq && ctrl->v_intr_vector == 0) {
 		KASSERT(ctrl->v_ign_tpr, ("%s: invalid v_ign_tpr", __func__));
 		KASSERT(vintr_intercept_enabled(sc, vcpu),
 		    ("%s: vintr intercept should be enabled", __func__));
 		return;
 	}
 
 	VCPU_CTR0(sc->vm, vcpu, "Enable intr window exiting");
 	ctrl->v_irq = 1;
 	ctrl->v_ign_tpr = 1;
 	ctrl->v_intr_vector = 0;
 	svm_set_dirty(sc, vcpu, VMCB_CACHE_TPR);
 	svm_enable_intercept(sc, vcpu, VMCB_CTRL1_INTCPT, VMCB_INTCPT_VINTR);
 }
 
 static __inline void
 disable_intr_window_exiting(struct svm_softc *sc, int vcpu)
 {
 	struct vmcb_ctrl *ctrl;
 
 	ctrl = svm_get_vmcb_ctrl(sc, vcpu);
 
 	if (!ctrl->v_irq && ctrl->v_intr_vector == 0) {
 		KASSERT(!vintr_intercept_enabled(sc, vcpu),
 		    ("%s: vintr intercept should be disabled", __func__));
 		return;
 	}
 
 	VCPU_CTR0(sc->vm, vcpu, "Disable intr window exiting");
 	ctrl->v_irq = 0;
 	ctrl->v_intr_vector = 0;
 	svm_set_dirty(sc, vcpu, VMCB_CACHE_TPR);
 	svm_disable_intercept(sc, vcpu, VMCB_CTRL1_INTCPT, VMCB_INTCPT_VINTR);
 }
 
 static int
 svm_modify_intr_shadow(struct svm_softc *sc, int vcpu, uint64_t val)
 {
 	struct vmcb_ctrl *ctrl;
 	int oldval, newval;
 
 	ctrl = svm_get_vmcb_ctrl(sc, vcpu);
 	oldval = ctrl->intr_shadow;
 	newval = val ? 1 : 0;
 	if (newval != oldval) {
 		ctrl->intr_shadow = newval;
 		VCPU_CTR1(sc->vm, vcpu, "Setting intr_shadow to %d", newval);
 	}
 	return (0);
 }
 
 static int
 svm_get_intr_shadow(struct svm_softc *sc, int vcpu, uint64_t *val)
 {
 	struct vmcb_ctrl *ctrl;
 
 	ctrl = svm_get_vmcb_ctrl(sc, vcpu);
 	*val = ctrl->intr_shadow;
 	return (0);
 }
 
 /*
  * Once an NMI is injected it blocks delivery of further NMIs until the handler
  * executes an IRET. The IRET intercept is enabled when an NMI is injected to
  * to track when the vcpu is done handling the NMI.
  */
 static int
 nmi_blocked(struct svm_softc *sc, int vcpu)
 {
 	int blocked;
 
 	blocked = svm_get_intercept(sc, vcpu, VMCB_CTRL1_INTCPT,
 	    VMCB_INTCPT_IRET);
 	return (blocked);
 }
 
 static void
 enable_nmi_blocking(struct svm_softc *sc, int vcpu)
 {
 
 	KASSERT(!nmi_blocked(sc, vcpu), ("vNMI already blocked"));
 	VCPU_CTR0(sc->vm, vcpu, "vNMI blocking enabled");
 	svm_enable_intercept(sc, vcpu, VMCB_CTRL1_INTCPT, VMCB_INTCPT_IRET);
 }
 
 static void
 clear_nmi_blocking(struct svm_softc *sc, int vcpu)
 {
 	int error;
 
 	KASSERT(nmi_blocked(sc, vcpu), ("vNMI already unblocked"));
 	VCPU_CTR0(sc->vm, vcpu, "vNMI blocking cleared");
 	/*
 	 * When the IRET intercept is cleared the vcpu will attempt to execute
 	 * the "iret" when it runs next. However, it is possible to inject
 	 * another NMI into the vcpu before the "iret" has actually executed.
 	 *
 	 * For e.g. if the "iret" encounters a #NPF when accessing the stack
 	 * it will trap back into the hypervisor. If an NMI is pending for
 	 * the vcpu it will be injected into the guest.
 	 *
 	 * XXX this needs to be fixed
 	 */
 	svm_disable_intercept(sc, vcpu, VMCB_CTRL1_INTCPT, VMCB_INTCPT_IRET);
 
 	/*
 	 * Set 'intr_shadow' to prevent an NMI from being injected on the
 	 * immediate VMRUN.
 	 */
 	error = svm_modify_intr_shadow(sc, vcpu, 1);
 	KASSERT(!error, ("%s: error %d setting intr_shadow", __func__, error));
 }
 
 #define	EFER_MBZ_BITS	0xFFFFFFFFFFFF0200UL
 
 static int
 svm_write_efer(struct svm_softc *sc, int vcpu, uint64_t newval, bool *retu)
 {
 	struct vm_exit *vme;
 	struct vmcb_state *state;
 	uint64_t changed, lma, oldval;
 	int error;
 
 	state = svm_get_vmcb_state(sc, vcpu);
 
 	oldval = state->efer;
 	VCPU_CTR2(sc->vm, vcpu, "wrmsr(efer) %#lx/%#lx", oldval, newval);
 
 	newval &= ~0xFE;		/* clear the Read-As-Zero (RAZ) bits */
 	changed = oldval ^ newval;
 
 	if (newval & EFER_MBZ_BITS)
 		goto gpf;
 
 	/* APMv2 Table 14-5 "Long-Mode Consistency Checks" */
 	if (changed & EFER_LME) {
 		if (state->cr0 & CR0_PG)
 			goto gpf;
 	}
 
 	/* EFER.LMA = EFER.LME & CR0.PG */
 	if ((newval & EFER_LME) != 0 && (state->cr0 & CR0_PG) != 0)
 		lma = EFER_LMA;
 	else
 		lma = 0;
 
 	if ((newval & EFER_LMA) != lma)
 		goto gpf;
 
 	if (newval & EFER_NXE) {
 		if (!vm_cpuid_capability(sc->vm, vcpu, VCC_NO_EXECUTE))
 			goto gpf;
 	}
 
 	/*
 	 * XXX bhyve does not enforce segment limits in 64-bit mode. Until
 	 * this is fixed flag guest attempt to set EFER_LMSLE as an error.
 	 */
 	if (newval & EFER_LMSLE) {
 		vme = vm_exitinfo(sc->vm, vcpu);
 		vm_exit_svm(vme, VMCB_EXIT_MSR, 1, 0);
 		*retu = true;
 		return (0);
 	}
 
 	if (newval & EFER_FFXSR) {
 		if (!vm_cpuid_capability(sc->vm, vcpu, VCC_FFXSR))
 			goto gpf;
 	}
 
 	if (newval & EFER_TCE) {
 		if (!vm_cpuid_capability(sc->vm, vcpu, VCC_TCE))
 			goto gpf;
 	}
 
 	error = svm_setreg(sc, vcpu, VM_REG_GUEST_EFER, newval);
 	KASSERT(error == 0, ("%s: error %d updating efer", __func__, error));
 	return (0);
 gpf:
 	vm_inject_gp(sc->vm, vcpu);
 	return (0);
 }
 
 static int
 emulate_wrmsr(struct svm_softc *sc, int vcpu, u_int num, uint64_t val,
     bool *retu)
 {
 	int error;
 
 	if (lapic_msr(num))
 		error = lapic_wrmsr(sc->vm, vcpu, num, val, retu);
 	else if (num == MSR_EFER)
 		error = svm_write_efer(sc, vcpu, val, retu);
 	else
 		error = svm_wrmsr(sc, vcpu, num, val, retu);
 
 	return (error);
 }
 
 static int
 emulate_rdmsr(struct svm_softc *sc, int vcpu, u_int num, bool *retu)
 {
 	struct vmcb_state *state;
 	struct svm_regctx *ctx;
 	uint64_t result;
 	int error;
 
 	if (lapic_msr(num))
 		error = lapic_rdmsr(sc->vm, vcpu, num, &result, retu);
 	else
 		error = svm_rdmsr(sc, vcpu, num, &result, retu);
 
 	if (error == 0) {
 		state = svm_get_vmcb_state(sc, vcpu);
 		ctx = svm_get_guest_regctx(sc, vcpu);
 		state->rax = result & 0xffffffff;
 		ctx->sctx_rdx = result >> 32;
 	}
 
 	return (error);
 }
 
 #ifdef KTR
 static const char *
 exit_reason_to_str(uint64_t reason)
 {
 	int i;
 	static char reasonbuf[32];
 	static const struct {
 		int reason;
 		const char *str;
 	} reasons[] = {
 		{ .reason = VMCB_EXIT_INVALID,	.str = "invalvmcb" },
 		{ .reason = VMCB_EXIT_SHUTDOWN,	.str = "shutdown" },
 		{ .reason = VMCB_EXIT_NPF, 	.str = "nptfault" },
 		{ .reason = VMCB_EXIT_PAUSE,	.str = "pause" },
 		{ .reason = VMCB_EXIT_HLT,	.str = "hlt" },
 		{ .reason = VMCB_EXIT_CPUID,	.str = "cpuid" },
 		{ .reason = VMCB_EXIT_IO,	.str = "inout" },
 		{ .reason = VMCB_EXIT_MC,	.str = "mchk" },
 		{ .reason = VMCB_EXIT_INTR,	.str = "extintr" },
 		{ .reason = VMCB_EXIT_NMI,	.str = "nmi" },
 		{ .reason = VMCB_EXIT_VINTR,	.str = "vintr" },
 		{ .reason = VMCB_EXIT_MSR,	.str = "msr" },
 		{ .reason = VMCB_EXIT_IRET,	.str = "iret" },
 		{ .reason = VMCB_EXIT_MONITOR,	.str = "monitor" },
 		{ .reason = VMCB_EXIT_MWAIT,	.str = "mwait" },
 		{ .reason = VMCB_EXIT_VMRUN,	.str = "vmrun" },
 		{ .reason = VMCB_EXIT_VMMCALL,	.str = "vmmcall" },
 		{ .reason = VMCB_EXIT_VMLOAD,	.str = "vmload" },
 		{ .reason = VMCB_EXIT_VMSAVE,	.str = "vmsave" },
 		{ .reason = VMCB_EXIT_STGI,	.str = "stgi" },
 		{ .reason = VMCB_EXIT_CLGI,	.str = "clgi" },
 		{ .reason = VMCB_EXIT_SKINIT,	.str = "skinit" },
 		{ .reason = VMCB_EXIT_ICEBP,	.str = "icebp" },
 		{ .reason = VMCB_EXIT_INVD,	.str = "invd" },
 		{ .reason = VMCB_EXIT_INVLPGA,	.str = "invlpga" },
 	};
 
 	for (i = 0; i < nitems(reasons); i++) {
 		if (reasons[i].reason == reason)
 			return (reasons[i].str);
 	}
 	snprintf(reasonbuf, sizeof(reasonbuf), "%#lx", reason);
 	return (reasonbuf);
 }
 #endif	/* KTR */
 
 /*
  * From section "State Saved on Exit" in APMv2: nRIP is saved for all #VMEXITs
  * that are due to instruction intercepts as well as MSR and IOIO intercepts
  * and exceptions caused by INT3, INTO and BOUND instructions.
  *
  * Return 1 if the nRIP is valid and 0 otherwise.
  */
 static int
 nrip_valid(uint64_t exitcode)
 {
 	switch (exitcode) {
 	case 0x00 ... 0x0F:	/* read of CR0 through CR15 */
 	case 0x10 ... 0x1F:	/* write of CR0 through CR15 */
 	case 0x20 ... 0x2F:	/* read of DR0 through DR15 */
 	case 0x30 ... 0x3F:	/* write of DR0 through DR15 */
 	case 0x43:		/* INT3 */
 	case 0x44:		/* INTO */
 	case 0x45:		/* BOUND */
 	case 0x65 ... 0x7C:	/* VMEXIT_CR0_SEL_WRITE ... VMEXIT_MSR */
 	case 0x80 ... 0x8D:	/* VMEXIT_VMRUN ... VMEXIT_XSETBV */
 		return (1);
 	default:
 		return (0);
 	}
 }
 
 static int
 svm_vmexit(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit)
 {
 	struct vmcb *vmcb;
 	struct vmcb_state *state;
 	struct vmcb_ctrl *ctrl;
 	struct svm_regctx *ctx;
 	uint64_t code, info1, info2, val;
 	uint32_t eax, ecx, edx;
 	int error, errcode_valid, handled, idtvec, reflect;
 	bool retu;
 
 	ctx = svm_get_guest_regctx(svm_sc, vcpu);
 	vmcb = svm_get_vmcb(svm_sc, vcpu);
 	state = &vmcb->state;
 	ctrl = &vmcb->ctrl;
 
 	handled = 0;
 	code = ctrl->exitcode;
 	info1 = ctrl->exitinfo1;
 	info2 = ctrl->exitinfo2;
 
 	vmexit->exitcode = VM_EXITCODE_BOGUS;
 	vmexit->rip = state->rip;
 	vmexit->inst_length = nrip_valid(code) ? ctrl->nrip - state->rip : 0;
 
 	vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_COUNT, 1);
 
 	/*
 	 * #VMEXIT(INVALID) needs to be handled early because the VMCB is
 	 * in an inconsistent state and can trigger assertions that would
 	 * never happen otherwise.
 	 */
 	if (code == VMCB_EXIT_INVALID) {
 		vm_exit_svm(vmexit, code, info1, info2);
 		return (0);
 	}
 
 	KASSERT((ctrl->eventinj & VMCB_EVENTINJ_VALID) == 0, ("%s: event "
 	    "injection valid bit is set %#lx", __func__, ctrl->eventinj));
 
 	KASSERT(vmexit->inst_length >= 0 && vmexit->inst_length <= 15,
 	    ("invalid inst_length %d: code (%#lx), info1 (%#lx), info2 (%#lx)",
 	    vmexit->inst_length, code, info1, info2));
 
 	svm_update_virqinfo(svm_sc, vcpu);
 	svm_save_intinfo(svm_sc, vcpu);
 
 	switch (code) {
 	case VMCB_EXIT_IRET:
 		/*
 		 * Restart execution at "iret" but with the intercept cleared.
 		 */
 		vmexit->inst_length = 0;
 		clear_nmi_blocking(svm_sc, vcpu);
 		handled = 1;
 		break;
 	case VMCB_EXIT_VINTR:	/* interrupt window exiting */
 		vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_VINTR, 1);
 		handled = 1;
 		break;
 	case VMCB_EXIT_INTR:	/* external interrupt */
 		vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_EXTINT, 1);
 		handled = 1;
 		break;
 	case VMCB_EXIT_NMI:	/* external NMI */
 		handled = 1;
 		break;
 	case 0x40 ... 0x5F:
 		vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_EXCEPTION, 1);
 		reflect = 1;
 		idtvec = code - 0x40;
 		switch (idtvec) {
 		case IDT_MC:
 			/*
 			 * Call the machine check handler by hand. Also don't
 			 * reflect the machine check back into the guest.
 			 */
 			reflect = 0;
 			VCPU_CTR0(svm_sc->vm, vcpu, "Vectoring to MCE handler");
 			__asm __volatile("int $18");
 			break;
 		case IDT_PF:
 			error = svm_setreg(svm_sc, vcpu, VM_REG_GUEST_CR2,
 			    info2);
 			KASSERT(error == 0, ("%s: error %d updating cr2",
 			    __func__, error));
 			/* fallthru */
 		case IDT_NP:
 		case IDT_SS:
 		case IDT_GP:
 		case IDT_AC:
 		case IDT_TS:
 			errcode_valid = 1;
 			break;
 
 		case IDT_DF:
 			errcode_valid = 1;
 			info1 = 0;
 			break;
 
 		case IDT_BP:
 		case IDT_OF:
 		case IDT_BR:
 			/*
 			 * The 'nrip' field is populated for INT3, INTO and
 			 * BOUND exceptions and this also implies that
 			 * 'inst_length' is non-zero.
 			 *
 			 * Reset 'inst_length' to zero so the guest %rip at
 			 * event injection is identical to what it was when
 			 * the exception originally happened.
 			 */
 			VCPU_CTR2(svm_sc->vm, vcpu, "Reset inst_length from %d "
 			    "to zero before injecting exception %d",
 			    vmexit->inst_length, idtvec);
 			vmexit->inst_length = 0;
 			/* fallthru */
 		default:
 			errcode_valid = 0;
 			info1 = 0;
 			break;
 		}
 		KASSERT(vmexit->inst_length == 0, ("invalid inst_length (%d) "
 		    "when reflecting exception %d into guest",
 		    vmexit->inst_length, idtvec));
 
 		if (reflect) {
 			/* Reflect the exception back into the guest */
 			VCPU_CTR2(svm_sc->vm, vcpu, "Reflecting exception "
 			    "%d/%#x into the guest", idtvec, (int)info1);
 			error = vm_inject_exception(svm_sc->vm, vcpu, idtvec,
 			    errcode_valid, info1, 0);
 			KASSERT(error == 0, ("%s: vm_inject_exception error %d",
 			    __func__, error));
 		}
 		handled = 1;
 		break;
 	case VMCB_EXIT_MSR:	/* MSR access. */
 		eax = state->rax;
 		ecx = ctx->sctx_rcx;
 		edx = ctx->sctx_rdx;
 		retu = false;	
 
 		if (info1) {
 			vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_WRMSR, 1);
 			val = (uint64_t)edx << 32 | eax;
 			VCPU_CTR2(svm_sc->vm, vcpu, "wrmsr %#x val %#lx",
 			    ecx, val);
 			if (emulate_wrmsr(svm_sc, vcpu, ecx, val, &retu)) {
 				vmexit->exitcode = VM_EXITCODE_WRMSR;
 				vmexit->u.msr.code = ecx;
 				vmexit->u.msr.wval = val;
 			} else if (!retu) {
 				handled = 1;
 			} else {
 				KASSERT(vmexit->exitcode != VM_EXITCODE_BOGUS,
 				    ("emulate_wrmsr retu with bogus exitcode"));
 			}
 		} else {
 			VCPU_CTR1(svm_sc->vm, vcpu, "rdmsr %#x", ecx);
 			vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_RDMSR, 1);
 			if (emulate_rdmsr(svm_sc, vcpu, ecx, &retu)) {
 				vmexit->exitcode = VM_EXITCODE_RDMSR;
 				vmexit->u.msr.code = ecx;
 			} else if (!retu) {
 				handled = 1;
 			} else {
 				KASSERT(vmexit->exitcode != VM_EXITCODE_BOGUS,
 				    ("emulate_rdmsr retu with bogus exitcode"));
 			}
 		}
 		break;
 	case VMCB_EXIT_IO:
 		handled = svm_handle_io(svm_sc, vcpu, vmexit);
 		vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_INOUT, 1);
 		break;
 	case VMCB_EXIT_CPUID:
 		vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_CPUID, 1);
 		handled = x86_emulate_cpuid(svm_sc->vm, vcpu, &state->rax,
 		    &ctx->sctx_rbx, &ctx->sctx_rcx, &ctx->sctx_rdx);
 		break;
 	case VMCB_EXIT_HLT:
 		vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_HLT, 1);
 		vmexit->exitcode = VM_EXITCODE_HLT;
 		vmexit->u.hlt.rflags = state->rflags;
 		break;
 	case VMCB_EXIT_PAUSE:
 		vmexit->exitcode = VM_EXITCODE_PAUSE;
 		vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_PAUSE, 1);
 		break;
 	case VMCB_EXIT_NPF:
 		/* EXITINFO2 contains the faulting guest physical address */
 		if (info1 & VMCB_NPF_INFO1_RSV) {
 			VCPU_CTR2(svm_sc->vm, vcpu, "nested page fault with "
 			    "reserved bits set: info1(%#lx) info2(%#lx)",
 			    info1, info2);
 		} else if (vm_mem_allocated(svm_sc->vm, vcpu, info2)) {
 			vmexit->exitcode = VM_EXITCODE_PAGING;
 			vmexit->u.paging.gpa = info2;
 			vmexit->u.paging.fault_type = npf_fault_type(info1);
 			vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_NESTED_FAULT, 1);
 			VCPU_CTR3(svm_sc->vm, vcpu, "nested page fault "
 			    "on gpa %#lx/%#lx at rip %#lx",
 			    info2, info1, state->rip);
 		} else if (svm_npf_emul_fault(info1)) {
 			svm_handle_inst_emul(vmcb, info2, vmexit);
 			vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_INST_EMUL, 1);
 			VCPU_CTR3(svm_sc->vm, vcpu, "inst_emul fault "
 			    "for gpa %#lx/%#lx at rip %#lx",
 			    info2, info1, state->rip);
 		}
 		break;
 	case VMCB_EXIT_MONITOR:
 		vmexit->exitcode = VM_EXITCODE_MONITOR;
 		break;
 	case VMCB_EXIT_MWAIT:
 		vmexit->exitcode = VM_EXITCODE_MWAIT;
 		break;
 	case VMCB_EXIT_SHUTDOWN:
 	case VMCB_EXIT_VMRUN:
 	case VMCB_EXIT_VMMCALL:
 	case VMCB_EXIT_VMLOAD:
 	case VMCB_EXIT_VMSAVE:
 	case VMCB_EXIT_STGI:
 	case VMCB_EXIT_CLGI:
 	case VMCB_EXIT_SKINIT:
 	case VMCB_EXIT_ICEBP:
 	case VMCB_EXIT_INVD:
 	case VMCB_EXIT_INVLPGA:
 		vm_inject_ud(svm_sc->vm, vcpu);
 		handled = 1;
 		break;
 	default:
 		vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_UNKNOWN, 1);
 		break;
 	}	
 
 	VCPU_CTR4(svm_sc->vm, vcpu, "%s %s vmexit at %#lx/%d",
 	    handled ? "handled" : "unhandled", exit_reason_to_str(code),
 	    vmexit->rip, vmexit->inst_length);
 
 	if (handled) {
 		vmexit->rip += vmexit->inst_length;
 		vmexit->inst_length = 0;
 		state->rip = vmexit->rip;
 	} else {
 		if (vmexit->exitcode == VM_EXITCODE_BOGUS) {
 			/*
 			 * If this VM exit was not claimed by anybody then
 			 * treat it as a generic SVM exit.
 			 */
 			vm_exit_svm(vmexit, code, info1, info2);
 		} else {
 			/*
 			 * The exitcode and collateral have been populated.
 			 * The VM exit will be processed further in userland.
 			 */
 		}
 	}
 	return (handled);
 }
 
 static void
 svm_inj_intinfo(struct svm_softc *svm_sc, int vcpu)
 {
 	uint64_t intinfo;
 
 	if (!vm_entry_intinfo(svm_sc->vm, vcpu, &intinfo))
 		return;
 
 	KASSERT(VMCB_EXITINTINFO_VALID(intinfo), ("%s: entry intinfo is not "
 	    "valid: %#lx", __func__, intinfo));
 
 	svm_eventinject(svm_sc, vcpu, VMCB_EXITINTINFO_TYPE(intinfo),
 		VMCB_EXITINTINFO_VECTOR(intinfo),
 		VMCB_EXITINTINFO_EC(intinfo),
 		VMCB_EXITINTINFO_EC_VALID(intinfo));
 	vmm_stat_incr(svm_sc->vm, vcpu, VCPU_INTINFO_INJECTED, 1);
 	VCPU_CTR1(svm_sc->vm, vcpu, "Injected entry intinfo: %#lx", intinfo);
 }
 
 /*
  * Inject event to virtual cpu.
  */
 static void
 svm_inj_interrupts(struct svm_softc *sc, int vcpu, struct vlapic *vlapic)
 {
 	struct vmcb_ctrl *ctrl;
 	struct vmcb_state *state;
 	struct svm_vcpu *vcpustate;
 	uint8_t v_tpr;
 	int vector, need_intr_window;
 	int extint_pending;
 
 	state = svm_get_vmcb_state(sc, vcpu);
 	ctrl  = svm_get_vmcb_ctrl(sc, vcpu);
 	vcpustate = svm_get_vcpu(sc, vcpu);
 
 	need_intr_window = 0;
 
 	if (vcpustate->nextrip != state->rip) {
 		ctrl->intr_shadow = 0;
 		VCPU_CTR2(sc->vm, vcpu, "Guest interrupt blocking "
 		    "cleared due to rip change: %#lx/%#lx",
 		    vcpustate->nextrip, state->rip);
 	}
 
 	/*
 	 * Inject pending events or exceptions for this vcpu.
 	 *
 	 * An event might be pending because the previous #VMEXIT happened
 	 * during event delivery (i.e. ctrl->exitintinfo).
 	 *
 	 * An event might also be pending because an exception was injected
 	 * by the hypervisor (e.g. #PF during instruction emulation).
 	 */
 	svm_inj_intinfo(sc, vcpu);
 
 	/* NMI event has priority over interrupts. */
 	if (vm_nmi_pending(sc->vm, vcpu)) {
 		if (nmi_blocked(sc, vcpu)) {
 			/*
 			 * Can't inject another NMI if the guest has not
 			 * yet executed an "iret" after the last NMI.
 			 */
 			VCPU_CTR0(sc->vm, vcpu, "Cannot inject NMI due "
 			    "to NMI-blocking");
 		} else if (ctrl->intr_shadow) {
 			/*
 			 * Can't inject an NMI if the vcpu is in an intr_shadow.
 			 */
 			VCPU_CTR0(sc->vm, vcpu, "Cannot inject NMI due to "
 			    "interrupt shadow");
 			need_intr_window = 1;
 			goto done;
 		} else if (ctrl->eventinj & VMCB_EVENTINJ_VALID) {
 			/*
 			 * If there is already an exception/interrupt pending
 			 * then defer the NMI until after that.
 			 */
 			VCPU_CTR1(sc->vm, vcpu, "Cannot inject NMI due to "
 			    "eventinj %#lx", ctrl->eventinj);
 
 			/*
 			 * Use self-IPI to trigger a VM-exit as soon as
 			 * possible after the event injection is completed.
 			 *
 			 * This works only if the external interrupt exiting
 			 * is at a lower priority than the event injection.
 			 *
 			 * Although not explicitly specified in APMv2 the
 			 * relative priorities were verified empirically.
 			 */
 			ipi_cpu(curcpu, IPI_AST);	/* XXX vmm_ipinum? */
 		} else {
 			vm_nmi_clear(sc->vm, vcpu);
 
 			/* Inject NMI, vector number is not used */
 			svm_eventinject(sc, vcpu, VMCB_EVENTINJ_TYPE_NMI,
 			    IDT_NMI, 0, false);
 
 			/* virtual NMI blocking is now in effect */
 			enable_nmi_blocking(sc, vcpu);
 
 			VCPU_CTR0(sc->vm, vcpu, "Injecting vNMI");
 		}
 	}
 
 	extint_pending = vm_extint_pending(sc->vm, vcpu);
 	if (!extint_pending) {
 		if (!vlapic_pending_intr(vlapic, &vector))
 			goto done;
 		KASSERT(vector >= 16 && vector <= 255,
 		    ("invalid vector %d from local APIC", vector));
 	} else {
 		/* Ask the legacy pic for a vector to inject */
 		vatpic_pending_intr(sc->vm, &vector);
 		KASSERT(vector >= 0 && vector <= 255,
 		    ("invalid vector %d from INTR", vector));
 	}
 
 	/*
 	 * If the guest has disabled interrupts or is in an interrupt shadow
 	 * then we cannot inject the pending interrupt.
 	 */
 	if ((state->rflags & PSL_I) == 0) {
 		VCPU_CTR2(sc->vm, vcpu, "Cannot inject vector %d due to "
 		    "rflags %#lx", vector, state->rflags);
 		need_intr_window = 1;
 		goto done;
 	}
 
 	if (ctrl->intr_shadow) {
 		VCPU_CTR1(sc->vm, vcpu, "Cannot inject vector %d due to "
 		    "interrupt shadow", vector);
 		need_intr_window = 1;
 		goto done;
 	}
 
 	if (ctrl->eventinj & VMCB_EVENTINJ_VALID) {
 		VCPU_CTR2(sc->vm, vcpu, "Cannot inject vector %d due to "
 		    "eventinj %#lx", vector, ctrl->eventinj);
 		need_intr_window = 1;
 		goto done;
 	}
 
 	svm_eventinject(sc, vcpu, VMCB_EVENTINJ_TYPE_INTR, vector, 0, false);
 
 	if (!extint_pending) {
 		vlapic_intr_accepted(vlapic, vector);
 	} else {
 		vm_extint_clear(sc->vm, vcpu);
 		vatpic_intr_accepted(sc->vm, vector);
 	}
 
 	/*
 	 * Force a VM-exit as soon as the vcpu is ready to accept another
 	 * interrupt. This is done because the PIC might have another vector
 	 * that it wants to inject. Also, if the APIC has a pending interrupt
 	 * that was preempted by the ExtInt then it allows us to inject the
 	 * APIC vector as soon as possible.
 	 */
 	need_intr_window = 1;
 done:
 	/*
 	 * The guest can modify the TPR by writing to %CR8. In guest mode
 	 * the processor reflects this write to V_TPR without hypervisor
 	 * intervention.
 	 *
 	 * The guest can also modify the TPR by writing to it via the memory
 	 * mapped APIC page. In this case, the write will be emulated by the
 	 * hypervisor. For this reason V_TPR must be updated before every
 	 * VMRUN.
 	 */
 	v_tpr = vlapic_get_cr8(vlapic);
 	KASSERT(v_tpr <= 15, ("invalid v_tpr %#x", v_tpr));
 	if (ctrl->v_tpr != v_tpr) {
 		VCPU_CTR2(sc->vm, vcpu, "VMCB V_TPR changed from %#x to %#x",
 		    ctrl->v_tpr, v_tpr);
 		ctrl->v_tpr = v_tpr;
 		svm_set_dirty(sc, vcpu, VMCB_CACHE_TPR);
 	}
 
 	if (need_intr_window) {
 		/*
 		 * We use V_IRQ in conjunction with the VINTR intercept to
 		 * trap into the hypervisor as soon as a virtual interrupt
 		 * can be delivered.
 		 *
 		 * Since injected events are not subject to intercept checks
 		 * we need to ensure that the V_IRQ is not actually going to
 		 * be delivered on VM entry. The KASSERT below enforces this.
 		 */
 		KASSERT((ctrl->eventinj & VMCB_EVENTINJ_VALID) != 0 ||
 		    (state->rflags & PSL_I) == 0 || ctrl->intr_shadow,
 		    ("Bogus intr_window_exiting: eventinj (%#lx), "
 		    "intr_shadow (%u), rflags (%#lx)",
 		    ctrl->eventinj, ctrl->intr_shadow, state->rflags));
 		enable_intr_window_exiting(sc, vcpu);
 	} else {
 		disable_intr_window_exiting(sc, vcpu);
 	}
 }
 
 static __inline void
 restore_host_tss(void)
 {
 	struct system_segment_descriptor *tss_sd;
 
 	/*
 	 * The TSS descriptor was in use prior to launching the guest so it
 	 * has been marked busy.
 	 *
 	 * 'ltr' requires the descriptor to be marked available so change the
 	 * type to "64-bit available TSS".
 	 */
 	tss_sd = PCPU_GET(tss);
 	tss_sd->sd_type = SDT_SYSTSS;
 	ltr(GSEL(GPROC0_SEL, SEL_KPL));
 }
 
 static void
 svm_pmap_activate(struct svm_softc *sc, int vcpuid, pmap_t pmap)
 {
 	struct svm_vcpu *vcpustate;
 	struct vmcb_ctrl *ctrl;
 	long eptgen;
 	int cpu;
 	bool alloc_asid;
 
 	cpu = curcpu;
 	CPU_SET_ATOMIC(cpu, &pmap->pm_active);
 	smr_enter(pmap->pm_eptsmr);
 
 	vcpustate = svm_get_vcpu(sc, vcpuid);
 	ctrl = svm_get_vmcb_ctrl(sc, vcpuid);
 
 	/*
 	 * The TLB entries associated with the vcpu's ASID are not valid
 	 * if either of the following conditions is true:
 	 *
 	 * 1. The vcpu's ASID generation is different than the host cpu's
 	 *    ASID generation. This happens when the vcpu migrates to a new
 	 *    host cpu. It can also happen when the number of vcpus executing
 	 *    on a host cpu is greater than the number of ASIDs available.
 	 *
 	 * 2. The pmap generation number is different than the value cached in
 	 *    the 'vcpustate'. This happens when the host invalidates pages
 	 *    belonging to the guest.
 	 *
 	 *	asidgen		eptgen	      Action
 	 *	mismatch	mismatch
 	 *	   0		   0		(a)
 	 *	   0		   1		(b1) or (b2)
 	 *	   1		   0		(c)
 	 *	   1		   1		(d)
 	 *
 	 * (a) There is no mismatch in eptgen or ASID generation and therefore
 	 *     no further action is needed.
 	 *
 	 * (b1) If the cpu supports FlushByAsid then the vcpu's ASID is
 	 *      retained and the TLB entries associated with this ASID
 	 *      are flushed by VMRUN.
 	 *
 	 * (b2) If the cpu does not support FlushByAsid then a new ASID is
 	 *      allocated.
 	 *
 	 * (c) A new ASID is allocated.
 	 *
 	 * (d) A new ASID is allocated.
 	 */
 
 	alloc_asid = false;
 	eptgen = atomic_load_long(&pmap->pm_eptgen);
 	ctrl->tlb_ctrl = VMCB_TLB_FLUSH_NOTHING;
 
 	if (vcpustate->asid.gen != asid[cpu].gen) {
 		alloc_asid = true;	/* (c) and (d) */
 	} else if (vcpustate->eptgen != eptgen) {
 		if (flush_by_asid())
 			ctrl->tlb_ctrl = VMCB_TLB_FLUSH_GUEST;	/* (b1) */
 		else
 			alloc_asid = true;			/* (b2) */
 	} else {
 		/*
 		 * This is the common case (a).
 		 */
 		KASSERT(!alloc_asid, ("ASID allocation not necessary"));
 		KASSERT(ctrl->tlb_ctrl == VMCB_TLB_FLUSH_NOTHING,
 		    ("Invalid VMCB tlb_ctrl: %#x", ctrl->tlb_ctrl));
 	}
 
 	if (alloc_asid) {
 		if (++asid[cpu].num >= nasid) {
 			asid[cpu].num = 1;
 			if (++asid[cpu].gen == 0)
 				asid[cpu].gen = 1;
 			/*
 			 * If this cpu does not support "flush-by-asid"
 			 * then flush the entire TLB on a generation
 			 * bump. Subsequent ASID allocation in this
 			 * generation can be done without a TLB flush.
 			 */
 			if (!flush_by_asid())
 				ctrl->tlb_ctrl = VMCB_TLB_FLUSH_ALL;
 		}
 		vcpustate->asid.gen = asid[cpu].gen;
 		vcpustate->asid.num = asid[cpu].num;
 
 		ctrl->asid = vcpustate->asid.num;
 		svm_set_dirty(sc, vcpuid, VMCB_CACHE_ASID);
 		/*
 		 * If this cpu supports "flush-by-asid" then the TLB
 		 * was not flushed after the generation bump. The TLB
 		 * is flushed selectively after every new ASID allocation.
 		 */
 		if (flush_by_asid())
 			ctrl->tlb_ctrl = VMCB_TLB_FLUSH_GUEST;
 	}
 	vcpustate->eptgen = eptgen;
 
 	KASSERT(ctrl->asid != 0, ("Guest ASID must be non-zero"));
 	KASSERT(ctrl->asid == vcpustate->asid.num,
 	    ("ASID mismatch: %u/%u", ctrl->asid, vcpustate->asid.num));
 }
 
 static void
 svm_pmap_deactivate(pmap_t pmap)
 {
 	smr_exit(pmap->pm_eptsmr);
 	CPU_CLR_ATOMIC(curcpu, &pmap->pm_active);
 }
 
 static __inline void
 disable_gintr(void)
 {
 
 	__asm __volatile("clgi");
 }
 
 static __inline void
 enable_gintr(void)
 {
 
         __asm __volatile("stgi");
 }
 
 static __inline void
 svm_dr_enter_guest(struct svm_regctx *gctx)
 {
 
 	/* Save host control debug registers. */
 	gctx->host_dr7 = rdr7();
 	gctx->host_debugctl = rdmsr(MSR_DEBUGCTLMSR);
 
 	/*
 	 * Disable debugging in DR7 and DEBUGCTL to avoid triggering
 	 * exceptions in the host based on the guest DRx values.  The
 	 * guest DR6, DR7, and DEBUGCTL are saved/restored in the
 	 * VMCB.
 	 */
 	load_dr7(0);
 	wrmsr(MSR_DEBUGCTLMSR, 0);
 
 	/* Save host debug registers. */
 	gctx->host_dr0 = rdr0();
 	gctx->host_dr1 = rdr1();
 	gctx->host_dr2 = rdr2();
 	gctx->host_dr3 = rdr3();
 	gctx->host_dr6 = rdr6();
 
 	/* Restore guest debug registers. */
 	load_dr0(gctx->sctx_dr0);
 	load_dr1(gctx->sctx_dr1);
 	load_dr2(gctx->sctx_dr2);
 	load_dr3(gctx->sctx_dr3);
 }
 
 static __inline void
 svm_dr_leave_guest(struct svm_regctx *gctx)
 {
 
 	/* Save guest debug registers. */
 	gctx->sctx_dr0 = rdr0();
 	gctx->sctx_dr1 = rdr1();
 	gctx->sctx_dr2 = rdr2();
 	gctx->sctx_dr3 = rdr3();
 
 	/*
 	 * Restore host debug registers.  Restore DR7 and DEBUGCTL
 	 * last.
 	 */
 	load_dr0(gctx->host_dr0);
 	load_dr1(gctx->host_dr1);
 	load_dr2(gctx->host_dr2);
 	load_dr3(gctx->host_dr3);
 	load_dr6(gctx->host_dr6);
 	wrmsr(MSR_DEBUGCTLMSR, gctx->host_debugctl);
 	load_dr7(gctx->host_dr7);
 }
 
 /*
  * Start vcpu with specified RIP.
  */
 static int
 svm_run(void *arg, int vcpu, register_t rip, pmap_t pmap,
 	struct vm_eventinfo *evinfo)
 {
 	struct svm_regctx *gctx;
 	struct svm_softc *svm_sc;
 	struct svm_vcpu *vcpustate;
 	struct vmcb_state *state;
 	struct vmcb_ctrl *ctrl;
 	struct vm_exit *vmexit;
 	struct vlapic *vlapic;
 	struct vm *vm;
 	uint64_t vmcb_pa;
 	int handled;
 	uint16_t ldt_sel;
 
 	svm_sc = arg;
 	vm = svm_sc->vm;
 
 	vcpustate = svm_get_vcpu(svm_sc, vcpu);
 	state = svm_get_vmcb_state(svm_sc, vcpu);
 	ctrl = svm_get_vmcb_ctrl(svm_sc, vcpu);
 	vmexit = vm_exitinfo(vm, vcpu);
 	vlapic = vm_lapic(vm, vcpu);
 
 	gctx = svm_get_guest_regctx(svm_sc, vcpu);
 	vmcb_pa = svm_sc->vcpu[vcpu].vmcb_pa;
 
 	if (vcpustate->lastcpu != curcpu) {
 		/*
 		 * Force new ASID allocation by invalidating the generation.
 		 */
 		vcpustate->asid.gen = 0;
 
 		/*
 		 * Invalidate the VMCB state cache by marking all fields dirty.
 		 */
 		svm_set_dirty(svm_sc, vcpu, 0xffffffff);
 
 		/*
 		 * XXX
 		 * Setting 'vcpustate->lastcpu' here is bit premature because
 		 * we may return from this function without actually executing
 		 * the VMRUN  instruction. This could happen if a rendezvous
 		 * or an AST is pending on the first time through the loop.
 		 *
 		 * This works for now but any new side-effects of vcpu
 		 * migration should take this case into account.
 		 */
 		vcpustate->lastcpu = curcpu;
 		vmm_stat_incr(vm, vcpu, VCPU_MIGRATIONS, 1);
 	}
 
 	svm_msr_guest_enter(svm_sc, vcpu);
 
 	/* Update Guest RIP */
 	state->rip = rip;
 
 	do {
 		/*
 		 * Disable global interrupts to guarantee atomicity during
 		 * loading of guest state. This includes not only the state
 		 * loaded by the "vmrun" instruction but also software state
 		 * maintained by the hypervisor: suspended and rendezvous
 		 * state, NPT generation number, vlapic interrupts etc.
 		 */
 		disable_gintr();
 
 		if (vcpu_suspended(evinfo)) {
 			enable_gintr();
 			vm_exit_suspended(vm, vcpu, state->rip);
 			break;
 		}
 
 		if (vcpu_rendezvous_pending(evinfo)) {
 			enable_gintr();
 			vm_exit_rendezvous(vm, vcpu, state->rip);
 			break;
 		}
 
 		if (vcpu_reqidle(evinfo)) {
 			enable_gintr();
 			vm_exit_reqidle(vm, vcpu, state->rip);
 			break;
 		}
 
 		/* We are asked to give the cpu by scheduler. */
 		if (vcpu_should_yield(vm, vcpu)) {
 			enable_gintr();
 			vm_exit_astpending(vm, vcpu, state->rip);
 			break;
 		}
 
 		if (vcpu_debugged(vm, vcpu)) {
 			enable_gintr();
 			vm_exit_debug(vm, vcpu, state->rip);
 			break;
 		}
 
 		/*
 		 * #VMEXIT resumes the host with the guest LDTR, so
 		 * save the current LDT selector so it can be restored
 		 * after an exit.  The userspace hypervisor probably
 		 * doesn't use a LDT, but save and restore it to be
 		 * safe.
 		 */
 		ldt_sel = sldt();
 
 		svm_inj_interrupts(svm_sc, vcpu, vlapic);
 
 		/*
 		 * Check the pmap generation and the ASID generation to
 		 * ensure that the vcpu does not use stale TLB mappings.
 		 */
 		svm_pmap_activate(svm_sc, vcpu, pmap);
 
 		ctrl->vmcb_clean = vmcb_clean & ~vcpustate->dirty;
 		vcpustate->dirty = 0;
 		VCPU_CTR1(vm, vcpu, "vmcb clean %#x", ctrl->vmcb_clean);
 
 		/* Launch Virtual Machine. */
 		VCPU_CTR1(vm, vcpu, "Resume execution at %#lx", state->rip);
 		svm_dr_enter_guest(gctx);
 		svm_launch(vmcb_pa, gctx, get_pcpu());
 		svm_dr_leave_guest(gctx);
 
 		svm_pmap_deactivate(pmap);
 
 		/*
 		 * The host GDTR and IDTR is saved by VMRUN and restored
 		 * automatically on #VMEXIT. However, the host TSS needs
 		 * to be restored explicitly.
 		 */
 		restore_host_tss();
 
 		/* Restore host LDTR. */
 		lldt(ldt_sel);
 
 		/* #VMEXIT disables interrupts so re-enable them here. */ 
 		enable_gintr();
 
 		/* Update 'nextrip' */
 		vcpustate->nextrip = state->rip;
 
 		/* Handle #VMEXIT and if required return to user space. */
 		handled = svm_vmexit(svm_sc, vcpu, vmexit);
 	} while (handled);
 
 	svm_msr_guest_exit(svm_sc, vcpu);
 
 	return (0);
 }
 
 static void
 svm_cleanup(void *arg)
 {
 	struct svm_softc *sc = arg;
 
 	contigfree(sc->iopm_bitmap, SVM_IO_BITMAP_SIZE, M_SVM);
 	contigfree(sc->msr_bitmap, SVM_MSR_BITMAP_SIZE, M_SVM);
 	free(sc, M_SVM);
 }
 
 static register_t *
 swctx_regptr(struct svm_regctx *regctx, int reg)
 {
 
 	switch (reg) {
 	case VM_REG_GUEST_RBX:
 		return (&regctx->sctx_rbx);
 	case VM_REG_GUEST_RCX:
 		return (&regctx->sctx_rcx);
 	case VM_REG_GUEST_RDX:
 		return (&regctx->sctx_rdx);
 	case VM_REG_GUEST_RDI:
 		return (&regctx->sctx_rdi);
 	case VM_REG_GUEST_RSI:
 		return (&regctx->sctx_rsi);
 	case VM_REG_GUEST_RBP:
 		return (&regctx->sctx_rbp);
 	case VM_REG_GUEST_R8:
 		return (&regctx->sctx_r8);
 	case VM_REG_GUEST_R9:
 		return (&regctx->sctx_r9);
 	case VM_REG_GUEST_R10:
 		return (&regctx->sctx_r10);
 	case VM_REG_GUEST_R11:
 		return (&regctx->sctx_r11);
 	case VM_REG_GUEST_R12:
 		return (&regctx->sctx_r12);
 	case VM_REG_GUEST_R13:
 		return (&regctx->sctx_r13);
 	case VM_REG_GUEST_R14:
 		return (&regctx->sctx_r14);
 	case VM_REG_GUEST_R15:
 		return (&regctx->sctx_r15);
 	case VM_REG_GUEST_DR0:
 		return (&regctx->sctx_dr0);
 	case VM_REG_GUEST_DR1:
 		return (&regctx->sctx_dr1);
 	case VM_REG_GUEST_DR2:
 		return (&regctx->sctx_dr2);
 	case VM_REG_GUEST_DR3:
 		return (&regctx->sctx_dr3);
 	default:
 		return (NULL);
 	}
 }
 
 static int
 svm_getreg(void *arg, int vcpu, int ident, uint64_t *val)
 {
 	struct svm_softc *svm_sc;
 	register_t *reg;
 
 	svm_sc = arg;
 
 	if (ident == VM_REG_GUEST_INTR_SHADOW) {
 		return (svm_get_intr_shadow(svm_sc, vcpu, val));
 	}
 
 	if (vmcb_read(svm_sc, vcpu, ident, val) == 0) {
 		return (0);
 	}
 
 	reg = swctx_regptr(svm_get_guest_regctx(svm_sc, vcpu), ident);
 
 	if (reg != NULL) {
 		*val = *reg;
 		return (0);
 	}
 
 	VCPU_CTR1(svm_sc->vm, vcpu, "svm_getreg: unknown register %#x", ident);
 	return (EINVAL);
 }
 
 static int
 svm_setreg(void *arg, int vcpu, int ident, uint64_t val)
 {
 	struct svm_softc *svm_sc;
 	register_t *reg;
 
 	svm_sc = arg;
 
 	if (ident == VM_REG_GUEST_INTR_SHADOW) {
 		return (svm_modify_intr_shadow(svm_sc, vcpu, val));
 	}
 
 	/* Do not permit user write access to VMCB fields by offset. */
 	if (!VMCB_ACCESS_OK(ident)) {
 		if (vmcb_write(svm_sc, vcpu, ident, val) == 0) {
 			return (0);
 		}
 	}
 
 	reg = swctx_regptr(svm_get_guest_regctx(svm_sc, vcpu), ident);
 
 	if (reg != NULL) {
 		*reg = val;
 		return (0);
 	}
 
 	if (ident == VM_REG_GUEST_ENTRY_INST_LENGTH) {
 		/* Ignore. */
 		return (0);
 	}
 
 	/*
 	 * XXX deal with CR3 and invalidate TLB entries tagged with the
 	 * vcpu's ASID. This needs to be treated differently depending on
 	 * whether 'running' is true/false.
 	 */
 
 	VCPU_CTR1(svm_sc->vm, vcpu, "svm_setreg: unknown register %#x", ident);
 	return (EINVAL);
 }
 
 static int
 svm_getdesc(void *arg, int vcpu, int reg, struct seg_desc *desc)
 {
 	return (vmcb_getdesc(arg, vcpu, reg, desc));
 }
 
 static int
 svm_setdesc(void *arg, int vcpu, int reg, struct seg_desc *desc)
 {
 	return (vmcb_setdesc(arg, vcpu, reg, desc));
 }
 
 #ifdef BHYVE_SNAPSHOT
 static int
 svm_snapshot_reg(void *arg, int vcpu, int ident,
 		 struct vm_snapshot_meta *meta)
 {
 	int ret;
 	uint64_t val;
 
 	if (meta->op == VM_SNAPSHOT_SAVE) {
 		ret = svm_getreg(arg, vcpu, ident, &val);
 		if (ret != 0)
 			goto done;
 
 		SNAPSHOT_VAR_OR_LEAVE(val, meta, ret, done);
 	} else if (meta->op == VM_SNAPSHOT_RESTORE) {
 		SNAPSHOT_VAR_OR_LEAVE(val, meta, ret, done);
 
 		ret = svm_setreg(arg, vcpu, ident, val);
 		if (ret != 0)
 			goto done;
 	} else {
 		ret = EINVAL;
 		goto done;
 	}
 
 done:
 	return (ret);
 }
 #endif
 
 static int
 svm_setcap(void *arg, int vcpu, int type, int val)
 {
 	struct svm_softc *sc;
 	int error;
 
 	sc = arg;
 	error = 0;
 	switch (type) {
 	case VM_CAP_HALT_EXIT:
 		svm_set_intercept(sc, vcpu, VMCB_CTRL1_INTCPT,
 		    VMCB_INTCPT_HLT, val);
 		break;
 	case VM_CAP_PAUSE_EXIT:
 		svm_set_intercept(sc, vcpu, VMCB_CTRL1_INTCPT,
 		    VMCB_INTCPT_PAUSE, val);
 		break;
 	case VM_CAP_UNRESTRICTED_GUEST:
 		/* Unrestricted guest execution cannot be disabled in SVM */
 		if (val == 0)
 			error = EINVAL;
 		break;
 	default:
 		error = ENOENT;
 		break;
 	}
 	return (error);
 }
 
 static int
 svm_getcap(void *arg, int vcpu, int type, int *retval)
 {
 	struct svm_softc *sc;
 	int error;
 
 	sc = arg;
 	error = 0;
 
 	switch (type) {
 	case VM_CAP_HALT_EXIT:
 		*retval = svm_get_intercept(sc, vcpu, VMCB_CTRL1_INTCPT,
 		    VMCB_INTCPT_HLT);
 		break;
 	case VM_CAP_PAUSE_EXIT:
 		*retval = svm_get_intercept(sc, vcpu, VMCB_CTRL1_INTCPT,
 		    VMCB_INTCPT_PAUSE);
 		break;
 	case VM_CAP_UNRESTRICTED_GUEST:
 		*retval = 1;	/* unrestricted guest is always enabled */
 		break;
 	default:
 		error = ENOENT;
 		break;
 	}
 	return (error);
 }
 
 static struct vmspace *
 svm_vmspace_alloc(vm_offset_t min, vm_offset_t max)
 {
 	return (svm_npt_alloc(min, max));
 }
 
 static void
 svm_vmspace_free(struct vmspace *vmspace)
 {
 	svm_npt_free(vmspace);
 }
 
 static struct vlapic *
 svm_vlapic_init(void *arg, int vcpuid)
 {
 	struct svm_softc *svm_sc;
 	struct vlapic *vlapic;
 
 	svm_sc = arg;
 	vlapic = malloc(sizeof(struct vlapic), M_SVM_VLAPIC, M_WAITOK | M_ZERO);
 	vlapic->vm = svm_sc->vm;
 	vlapic->vcpuid = vcpuid;
 	vlapic->apic_page = (struct LAPIC *)&svm_sc->apic_page[vcpuid];
 
 	vlapic_init(vlapic);
 
 	return (vlapic);
 }
 
 static void
 svm_vlapic_cleanup(void *arg, struct vlapic *vlapic)
 {
 
         vlapic_cleanup(vlapic);
         free(vlapic, M_SVM_VLAPIC);
 }
 
 #ifdef BHYVE_SNAPSHOT
 static int
 svm_snapshot(void *arg, struct vm_snapshot_meta *meta)
 {
 	/* struct svm_softc is AMD's representation for SVM softc */
 	struct svm_softc *sc;
 	struct svm_vcpu *vcpu;
 	struct vmcb *vmcb;
 	uint64_t val;
 	int i;
 	int ret;
 
 	sc = arg;
 
 	KASSERT(sc != NULL, ("%s: arg was NULL", __func__));
 
 	SNAPSHOT_VAR_OR_LEAVE(sc->nptp, meta, ret, done);
 
 	for (i = 0; i < VM_MAXCPU; i++) {
 		vcpu = &sc->vcpu[i];
 		vmcb = &vcpu->vmcb;
 
 		/* VMCB fields for virtual cpu i */
 		SNAPSHOT_VAR_OR_LEAVE(vmcb->ctrl.v_tpr, meta, ret, done);
 		val = vmcb->ctrl.v_tpr;
 		SNAPSHOT_VAR_OR_LEAVE(val, meta, ret, done);
 		vmcb->ctrl.v_tpr = val;
 
 		SNAPSHOT_VAR_OR_LEAVE(vmcb->ctrl.asid, meta, ret, done);
 		val = vmcb->ctrl.np_enable;
 		SNAPSHOT_VAR_OR_LEAVE(val, meta, ret, done);
 		vmcb->ctrl.np_enable = val;
 
 		val = vmcb->ctrl.intr_shadow;
 		SNAPSHOT_VAR_OR_LEAVE(val, meta, ret, done);
 		vmcb->ctrl.intr_shadow = val;
 		SNAPSHOT_VAR_OR_LEAVE(vmcb->ctrl.tlb_ctrl, meta, ret, done);
 
 		SNAPSHOT_BUF_OR_LEAVE(vmcb->state.pad1,
 				      sizeof(vmcb->state.pad1),
 				      meta, ret, done);
 		SNAPSHOT_VAR_OR_LEAVE(vmcb->state.cpl, meta, ret, done);
 		SNAPSHOT_BUF_OR_LEAVE(vmcb->state.pad2,
 				      sizeof(vmcb->state.pad2),
 				      meta, ret, done);
 		SNAPSHOT_VAR_OR_LEAVE(vmcb->state.efer, meta, ret, done);
 		SNAPSHOT_BUF_OR_LEAVE(vmcb->state.pad3,
 				      sizeof(vmcb->state.pad3),
 				      meta, ret, done);
 		SNAPSHOT_VAR_OR_LEAVE(vmcb->state.cr4, meta, ret, done);
 		SNAPSHOT_VAR_OR_LEAVE(vmcb->state.cr3, meta, ret, done);
 		SNAPSHOT_VAR_OR_LEAVE(vmcb->state.cr0, meta, ret, done);
 		SNAPSHOT_VAR_OR_LEAVE(vmcb->state.dr7, meta, ret, done);
 		SNAPSHOT_VAR_OR_LEAVE(vmcb->state.dr6, meta, ret, done);
 		SNAPSHOT_VAR_OR_LEAVE(vmcb->state.rflags, meta, ret, done);
 		SNAPSHOT_VAR_OR_LEAVE(vmcb->state.rip, meta, ret, done);
 		SNAPSHOT_BUF_OR_LEAVE(vmcb->state.pad4,
 				      sizeof(vmcb->state.pad4),
 				      meta, ret, done);
 		SNAPSHOT_VAR_OR_LEAVE(vmcb->state.rsp, meta, ret, done);
 		SNAPSHOT_BUF_OR_LEAVE(vmcb->state.pad5,
 				      sizeof(vmcb->state.pad5),
 				      meta, ret, done);
 		SNAPSHOT_VAR_OR_LEAVE(vmcb->state.rax, meta, ret, done);
 		SNAPSHOT_VAR_OR_LEAVE(vmcb->state.star, meta, ret, done);
 		SNAPSHOT_VAR_OR_LEAVE(vmcb->state.lstar, meta, ret, done);
 		SNAPSHOT_VAR_OR_LEAVE(vmcb->state.cstar, meta, ret, done);
 		SNAPSHOT_VAR_OR_LEAVE(vmcb->state.sfmask, meta, ret, done);
 		SNAPSHOT_VAR_OR_LEAVE(vmcb->state.kernelgsbase,
 				      meta, ret, done);
 		SNAPSHOT_VAR_OR_LEAVE(vmcb->state.sysenter_cs, meta, ret, done);
 		SNAPSHOT_VAR_OR_LEAVE(vmcb->state.sysenter_esp,
 				      meta, ret, done);
 		SNAPSHOT_VAR_OR_LEAVE(vmcb->state.sysenter_eip,
 				      meta, ret, done);
 		SNAPSHOT_VAR_OR_LEAVE(vmcb->state.cr2, meta, ret, done);
 		SNAPSHOT_BUF_OR_LEAVE(vmcb->state.pad6,
 				      sizeof(vmcb->state.pad6),
 				      meta, ret, done);
 		SNAPSHOT_VAR_OR_LEAVE(vmcb->state.g_pat, meta, ret, done);
 		SNAPSHOT_VAR_OR_LEAVE(vmcb->state.dbgctl, meta, ret, done);
 		SNAPSHOT_VAR_OR_LEAVE(vmcb->state.br_from, meta, ret, done);
 		SNAPSHOT_VAR_OR_LEAVE(vmcb->state.br_to, meta, ret, done);
 		SNAPSHOT_VAR_OR_LEAVE(vmcb->state.int_from, meta, ret, done);
 		SNAPSHOT_VAR_OR_LEAVE(vmcb->state.int_to, meta, ret, done);
 		SNAPSHOT_BUF_OR_LEAVE(vmcb->state.pad7,
 				      sizeof(vmcb->state.pad7),
 				      meta, ret, done);
 
 		/* Snapshot swctx for virtual cpu i */
 		SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.sctx_rbp, meta, ret, done);
 		SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.sctx_rbx, meta, ret, done);
 		SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.sctx_rcx, meta, ret, done);
 		SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.sctx_rdx, meta, ret, done);
 		SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.sctx_rdi, meta, ret, done);
 		SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.sctx_rsi, meta, ret, done);
 		SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.sctx_r8, meta, ret, done);
 		SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.sctx_r9, meta, ret, done);
 		SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.sctx_r10, meta, ret, done);
 		SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.sctx_r11, meta, ret, done);
 		SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.sctx_r12, meta, ret, done);
 		SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.sctx_r13, meta, ret, done);
 		SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.sctx_r14, meta, ret, done);
 		SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.sctx_r15, meta, ret, done);
 		SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.sctx_dr0, meta, ret, done);
 		SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.sctx_dr1, meta, ret, done);
 		SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.sctx_dr2, meta, ret, done);
 		SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.sctx_dr3, meta, ret, done);
 
 		SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.host_dr0, meta, ret, done);
 		SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.host_dr1, meta, ret, done);
 		SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.host_dr2, meta, ret, done);
 		SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.host_dr3, meta, ret, done);
 		SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.host_dr6, meta, ret, done);
 		SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.host_dr7, meta, ret, done);
 		SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.host_debugctl, meta, ret,
 				      done);
 
 		/* Restore other svm_vcpu struct fields */
 
 		/* Restore NEXTRIP field */
 		SNAPSHOT_VAR_OR_LEAVE(vcpu->nextrip, meta, ret, done);
 
 		/* Restore lastcpu field */
 		SNAPSHOT_VAR_OR_LEAVE(vcpu->lastcpu, meta, ret, done);
 		SNAPSHOT_VAR_OR_LEAVE(vcpu->dirty, meta, ret, done);
 
 		/* Restore EPTGEN field - EPT is Extended Page Tabel */
 		SNAPSHOT_VAR_OR_LEAVE(vcpu->eptgen, meta, ret, done);
 
 		SNAPSHOT_VAR_OR_LEAVE(vcpu->asid.gen, meta, ret, done);
 		SNAPSHOT_VAR_OR_LEAVE(vcpu->asid.num, meta, ret, done);
 
 		/* Set all caches dirty */
 		if (meta->op == VM_SNAPSHOT_RESTORE) {
 			svm_set_dirty(sc, i, VMCB_CACHE_ASID);
 			svm_set_dirty(sc, i, VMCB_CACHE_IOPM);
 			svm_set_dirty(sc, i, VMCB_CACHE_I);
 			svm_set_dirty(sc, i, VMCB_CACHE_TPR);
 			svm_set_dirty(sc, i, VMCB_CACHE_CR2);
 			svm_set_dirty(sc, i, VMCB_CACHE_CR);
 			svm_set_dirty(sc, i, VMCB_CACHE_DT);
 			svm_set_dirty(sc, i, VMCB_CACHE_SEG);
 			svm_set_dirty(sc, i, VMCB_CACHE_NP);
 		}
 	}
 
 	if (meta->op == VM_SNAPSHOT_RESTORE)
 		flush_by_asid();
 
 done:
 	return (ret);
 }
 
 static int
 svm_vmcx_snapshot(void *arg, struct vm_snapshot_meta *meta, int vcpu)
 {
 	struct vmcb *vmcb;
 	struct svm_softc *sc;
 	int err, running, hostcpu;
 
 	sc = (struct svm_softc *)arg;
 	err = 0;
 
 	KASSERT(arg != NULL, ("%s: arg was NULL", __func__));
 	vmcb = svm_get_vmcb(sc, vcpu);
 
 	running = vcpu_is_running(sc->vm, vcpu, &hostcpu);
 	if (running && hostcpu !=curcpu) {
 		printf("%s: %s%d is running", __func__, vm_name(sc->vm), vcpu);
 		return (EINVAL);
 	}
 
 	err += svm_snapshot_reg(sc, vcpu, VM_REG_GUEST_CR0, meta);
 	err += svm_snapshot_reg(sc, vcpu, VM_REG_GUEST_CR2, meta);
 	err += svm_snapshot_reg(sc, vcpu, VM_REG_GUEST_CR3, meta);
 	err += svm_snapshot_reg(sc, vcpu, VM_REG_GUEST_CR4, meta);
 
 	err += svm_snapshot_reg(sc, vcpu, VM_REG_GUEST_DR7, meta);
 
 	err += svm_snapshot_reg(sc, vcpu, VM_REG_GUEST_RAX, meta);
 
 	err += svm_snapshot_reg(sc, vcpu, VM_REG_GUEST_RSP, meta);
 	err += svm_snapshot_reg(sc, vcpu, VM_REG_GUEST_RIP, meta);
 	err += svm_snapshot_reg(sc, vcpu, VM_REG_GUEST_RFLAGS, meta);
 
 	/* Guest segments */
 	/* ES */
 	err += svm_snapshot_reg(sc, vcpu, VM_REG_GUEST_ES, meta);
 	err += vmcb_snapshot_desc(sc, vcpu, VM_REG_GUEST_ES, meta);
 
 	/* CS */
 	err += svm_snapshot_reg(sc, vcpu, VM_REG_GUEST_CS, meta);
 	err += vmcb_snapshot_desc(sc, vcpu, VM_REG_GUEST_CS, meta);
 
 	/* SS */
 	err += svm_snapshot_reg(sc, vcpu, VM_REG_GUEST_SS, meta);
 	err += vmcb_snapshot_desc(sc, vcpu, VM_REG_GUEST_SS, meta);
 
 	/* DS */
 	err += svm_snapshot_reg(sc, vcpu, VM_REG_GUEST_DS, meta);
 	err += vmcb_snapshot_desc(sc, vcpu, VM_REG_GUEST_DS, meta);
 
 	/* FS */
 	err += svm_snapshot_reg(sc, vcpu, VM_REG_GUEST_FS, meta);
 	err += vmcb_snapshot_desc(sc, vcpu, VM_REG_GUEST_FS, meta);
 
 	/* GS */
 	err += svm_snapshot_reg(sc, vcpu, VM_REG_GUEST_GS, meta);
 	err += vmcb_snapshot_desc(sc, vcpu, VM_REG_GUEST_GS, meta);
 
 	/* TR */
 	err += svm_snapshot_reg(sc, vcpu, VM_REG_GUEST_TR, meta);
 	err += vmcb_snapshot_desc(sc, vcpu, VM_REG_GUEST_TR, meta);
 
 	/* LDTR */
 	err += svm_snapshot_reg(sc, vcpu, VM_REG_GUEST_LDTR, meta);
 	err += vmcb_snapshot_desc(sc, vcpu, VM_REG_GUEST_LDTR, meta);
 
 	/* EFER */
 	err += svm_snapshot_reg(sc, vcpu, VM_REG_GUEST_EFER, meta);
 
 	/* IDTR and GDTR */
 	err += vmcb_snapshot_desc(sc, vcpu, VM_REG_GUEST_IDTR, meta);
 	err += vmcb_snapshot_desc(sc, vcpu, VM_REG_GUEST_GDTR, meta);
 
 	/* Specific AMD registers */
 	err += vmcb_snapshot_any(sc, vcpu,
 				VMCB_ACCESS(VMCB_OFF_SYSENTER_CS, 8), meta);
 	err += vmcb_snapshot_any(sc, vcpu,
 				VMCB_ACCESS(VMCB_OFF_SYSENTER_ESP, 8), meta);
 	err += vmcb_snapshot_any(sc, vcpu,
 				VMCB_ACCESS(VMCB_OFF_SYSENTER_EIP, 8), meta);
 
 	err += vmcb_snapshot_any(sc, vcpu,
 				VMCB_ACCESS(VMCB_OFF_NPT_BASE, 8), meta);
 
 	err += vmcb_snapshot_any(sc, vcpu,
 				VMCB_ACCESS(VMCB_OFF_CR_INTERCEPT, 4), meta);
 	err += vmcb_snapshot_any(sc, vcpu,
 				VMCB_ACCESS(VMCB_OFF_DR_INTERCEPT, 4), meta);
 	err += vmcb_snapshot_any(sc, vcpu,
 				VMCB_ACCESS(VMCB_OFF_EXC_INTERCEPT, 4), meta);
 	err += vmcb_snapshot_any(sc, vcpu,
 				VMCB_ACCESS(VMCB_OFF_INST1_INTERCEPT, 4), meta);
 	err += vmcb_snapshot_any(sc, vcpu,
 				VMCB_ACCESS(VMCB_OFF_INST2_INTERCEPT, 4), meta);
 
 	err += vmcb_snapshot_any(sc, vcpu,
 				VMCB_ACCESS(VMCB_OFF_TLB_CTRL, 4), meta);
 
 	err += vmcb_snapshot_any(sc, vcpu,
 				VMCB_ACCESS(VMCB_OFF_EXITINFO1, 8), meta);
 	err += vmcb_snapshot_any(sc, vcpu,
 				VMCB_ACCESS(VMCB_OFF_EXITINFO2, 8), meta);
 	err += vmcb_snapshot_any(sc, vcpu,
 				VMCB_ACCESS(VMCB_OFF_EXITINTINFO, 8), meta);
 
 	err += vmcb_snapshot_any(sc, vcpu,
 				VMCB_ACCESS(VMCB_OFF_VIRQ, 8), meta);
 
 	err += vmcb_snapshot_any(sc, vcpu,
 				VMCB_ACCESS(VMCB_OFF_GUEST_PAT, 8), meta);
 
 	err += vmcb_snapshot_any(sc, vcpu,
 				VMCB_ACCESS(VMCB_OFF_AVIC_BAR, 8), meta);
 	err += vmcb_snapshot_any(sc, vcpu,
 				VMCB_ACCESS(VMCB_OFF_AVIC_PAGE, 8), meta);
 	err += vmcb_snapshot_any(sc, vcpu,
 				VMCB_ACCESS(VMCB_OFF_AVIC_LT, 8), meta);
 	err += vmcb_snapshot_any(sc, vcpu,
 				VMCB_ACCESS(VMCB_OFF_AVIC_PT, 8), meta);
 
 	err += vmcb_snapshot_any(sc, vcpu,
 				VMCB_ACCESS(VMCB_OFF_IO_PERM, 8), meta);
 	err += vmcb_snapshot_any(sc, vcpu,
 				VMCB_ACCESS(VMCB_OFF_MSR_PERM, 8), meta);
 
 	err += vmcb_snapshot_any(sc, vcpu,
 				VMCB_ACCESS(VMCB_OFF_ASID, 4), meta);
 
 	err += vmcb_snapshot_any(sc, vcpu,
 				VMCB_ACCESS(VMCB_OFF_EXIT_REASON, 8), meta);
 
 	err += svm_snapshot_reg(sc, vcpu, VM_REG_GUEST_INTR_SHADOW, meta);
 
 	return (err);
 }
 
 static int
 svm_restore_tsc(void *arg, int vcpu, uint64_t offset)
 {
 	int err;
 
 	err = svm_set_tsc_offset(arg, vcpu, offset);
 
 	return (err);
 }
 #endif
 
 const struct vmm_ops vmm_ops_amd = {
 	.modinit	= svm_modinit,
 	.modcleanup	= svm_modcleanup,
 	.modresume	= svm_modresume,
 	.init		= svm_init,
 	.run		= svm_run,
 	.cleanup	= svm_cleanup,
 	.getreg		= svm_getreg,
 	.setreg		= svm_setreg,
 	.getdesc	= svm_getdesc,
 	.setdesc	= svm_setdesc,
 	.getcap		= svm_getcap,
 	.setcap		= svm_setcap,
 	.vmspace_alloc	= svm_vmspace_alloc,
 	.vmspace_free	= svm_vmspace_free,
 	.vlapic_init	= svm_vlapic_init,
 	.vlapic_cleanup	= svm_vlapic_cleanup,
 #ifdef BHYVE_SNAPSHOT
 	.snapshot	= svm_snapshot,
 	.vmcx_snapshot	= svm_vmcx_snapshot,
 	.restore_tsc	= svm_restore_tsc,
 #endif
 };
diff --git a/sys/amd64/vmm/intel/vmx.c b/sys/amd64/vmm/intel/vmx.c
index 7017e4b9077c..e7ced1b2c3fa 100644
--- a/sys/amd64/vmm/intel/vmx.c
+++ b/sys/amd64/vmm/intel/vmx.c
@@ -1,4216 +1,4216 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2011 NetApp, Inc.
  * All rights reserved.
  * Copyright (c) 2018 Joyent, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_bhyve_snapshot.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/smp.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/pcpu.h>
 #include <sys/proc.h>
+#include <sys/reg.h>
 #include <sys/smr.h>
 #include <sys/sysctl.h>
 
 #include <vm/vm.h>
 #include <vm/pmap.h>
 
 #include <machine/psl.h>
 #include <machine/cpufunc.h>
 #include <machine/md_var.h>
-#include <machine/reg.h>
 #include <machine/segments.h>
 #include <machine/smp.h>
 #include <machine/specialreg.h>
 #include <machine/vmparam.h>
 
 #include <machine/vmm.h>
 #include <machine/vmm_dev.h>
 #include <machine/vmm_instruction_emul.h>
 #include <machine/vmm_snapshot.h>
 
 #include "vmm_lapic.h"
 #include "vmm_host.h"
 #include "vmm_ioport.h"
 #include "vmm_ktr.h"
 #include "vmm_stat.h"
 #include "vatpic.h"
 #include "vlapic.h"
 #include "vlapic_priv.h"
 
 #include "ept.h"
 #include "vmx_cpufunc.h"
 #include "vmx.h"
 #include "vmx_msr.h"
 #include "x86.h"
 #include "vmx_controls.h"
 
 #define	PINBASED_CTLS_ONE_SETTING					\
 	(PINBASED_EXTINT_EXITING	|				\
 	 PINBASED_NMI_EXITING		|				\
 	 PINBASED_VIRTUAL_NMI)
 #define	PINBASED_CTLS_ZERO_SETTING	0
 
 #define PROCBASED_CTLS_WINDOW_SETTING					\
 	(PROCBASED_INT_WINDOW_EXITING	|				\
 	 PROCBASED_NMI_WINDOW_EXITING)
 
 #define	PROCBASED_CTLS_ONE_SETTING					\
 	(PROCBASED_SECONDARY_CONTROLS	|				\
 	 PROCBASED_MWAIT_EXITING	|				\
 	 PROCBASED_MONITOR_EXITING	|				\
 	 PROCBASED_IO_EXITING		|				\
 	 PROCBASED_MSR_BITMAPS		|				\
 	 PROCBASED_CTLS_WINDOW_SETTING	|				\
 	 PROCBASED_CR8_LOAD_EXITING	|				\
 	 PROCBASED_CR8_STORE_EXITING)
 #define	PROCBASED_CTLS_ZERO_SETTING	\
 	(PROCBASED_CR3_LOAD_EXITING |	\
 	PROCBASED_CR3_STORE_EXITING |	\
 	PROCBASED_IO_BITMAPS)
 
 #define	PROCBASED_CTLS2_ONE_SETTING	PROCBASED2_ENABLE_EPT
 #define	PROCBASED_CTLS2_ZERO_SETTING	0
 
 #define	VM_EXIT_CTLS_ONE_SETTING					\
 	(VM_EXIT_SAVE_DEBUG_CONTROLS		|			\
 	VM_EXIT_HOST_LMA			|			\
 	VM_EXIT_SAVE_EFER			|			\
 	VM_EXIT_LOAD_EFER			|			\
 	VM_EXIT_ACKNOWLEDGE_INTERRUPT)
 
 #define	VM_EXIT_CTLS_ZERO_SETTING	0
 
 #define	VM_ENTRY_CTLS_ONE_SETTING					\
 	(VM_ENTRY_LOAD_DEBUG_CONTROLS		|			\
 	VM_ENTRY_LOAD_EFER)
 
 #define	VM_ENTRY_CTLS_ZERO_SETTING					\
 	(VM_ENTRY_INTO_SMM			|			\
 	VM_ENTRY_DEACTIVATE_DUAL_MONITOR)
 
 #define	HANDLED		1
 #define	UNHANDLED	0
 
 static MALLOC_DEFINE(M_VMX, "vmx", "vmx");
 static MALLOC_DEFINE(M_VLAPIC, "vlapic", "vlapic");
 
 SYSCTL_DECL(_hw_vmm);
 SYSCTL_NODE(_hw_vmm, OID_AUTO, vmx, CTLFLAG_RW | CTLFLAG_MPSAFE, NULL,
     NULL);
 
 int vmxon_enabled[MAXCPU];
 static char vmxon_region[MAXCPU][PAGE_SIZE] __aligned(PAGE_SIZE);
 
 static uint32_t pinbased_ctls, procbased_ctls, procbased_ctls2;
 static uint32_t exit_ctls, entry_ctls;
 
 static uint64_t cr0_ones_mask, cr0_zeros_mask;
 SYSCTL_ULONG(_hw_vmm_vmx, OID_AUTO, cr0_ones_mask, CTLFLAG_RD,
 	     &cr0_ones_mask, 0, NULL);
 SYSCTL_ULONG(_hw_vmm_vmx, OID_AUTO, cr0_zeros_mask, CTLFLAG_RD,
 	     &cr0_zeros_mask, 0, NULL);
 
 static uint64_t cr4_ones_mask, cr4_zeros_mask;
 SYSCTL_ULONG(_hw_vmm_vmx, OID_AUTO, cr4_ones_mask, CTLFLAG_RD,
 	     &cr4_ones_mask, 0, NULL);
 SYSCTL_ULONG(_hw_vmm_vmx, OID_AUTO, cr4_zeros_mask, CTLFLAG_RD,
 	     &cr4_zeros_mask, 0, NULL);
 
 static int vmx_initialized;
 SYSCTL_INT(_hw_vmm_vmx, OID_AUTO, initialized, CTLFLAG_RD,
 	   &vmx_initialized, 0, "Intel VMX initialized");
 
 /*
  * Optional capabilities
  */
 static SYSCTL_NODE(_hw_vmm_vmx, OID_AUTO, cap,
     CTLFLAG_RW | CTLFLAG_MPSAFE, NULL,
     NULL);
 
 static int cap_halt_exit;
 SYSCTL_INT(_hw_vmm_vmx_cap, OID_AUTO, halt_exit, CTLFLAG_RD, &cap_halt_exit, 0,
     "HLT triggers a VM-exit");
 
 static int cap_pause_exit;
 SYSCTL_INT(_hw_vmm_vmx_cap, OID_AUTO, pause_exit, CTLFLAG_RD, &cap_pause_exit,
     0, "PAUSE triggers a VM-exit");
 
 static int cap_rdpid;
 SYSCTL_INT(_hw_vmm_vmx_cap, OID_AUTO, rdpid, CTLFLAG_RD, &cap_rdpid, 0,
     "Guests are allowed to use RDPID");
 
 static int cap_rdtscp;
 SYSCTL_INT(_hw_vmm_vmx_cap, OID_AUTO, rdtscp, CTLFLAG_RD, &cap_rdtscp, 0,
     "Guests are allowed to use RDTSCP");
 
 static int cap_unrestricted_guest;
 SYSCTL_INT(_hw_vmm_vmx_cap, OID_AUTO, unrestricted_guest, CTLFLAG_RD,
     &cap_unrestricted_guest, 0, "Unrestricted guests");
 
 static int cap_monitor_trap;
 SYSCTL_INT(_hw_vmm_vmx_cap, OID_AUTO, monitor_trap, CTLFLAG_RD,
     &cap_monitor_trap, 0, "Monitor trap flag");
 
 static int cap_invpcid;
 SYSCTL_INT(_hw_vmm_vmx_cap, OID_AUTO, invpcid, CTLFLAG_RD, &cap_invpcid,
     0, "Guests are allowed to use INVPCID");
 
 static int tpr_shadowing;
 SYSCTL_INT(_hw_vmm_vmx_cap, OID_AUTO, tpr_shadowing, CTLFLAG_RD,
     &tpr_shadowing, 0, "TPR shadowing support");
 
 static int virtual_interrupt_delivery;
 SYSCTL_INT(_hw_vmm_vmx_cap, OID_AUTO, virtual_interrupt_delivery, CTLFLAG_RD,
     &virtual_interrupt_delivery, 0, "APICv virtual interrupt delivery support");
 
 static int posted_interrupts;
 SYSCTL_INT(_hw_vmm_vmx_cap, OID_AUTO, posted_interrupts, CTLFLAG_RD,
     &posted_interrupts, 0, "APICv posted interrupt support");
 
 static int pirvec = -1;
 SYSCTL_INT(_hw_vmm_vmx, OID_AUTO, posted_interrupt_vector, CTLFLAG_RD,
     &pirvec, 0, "APICv posted interrupt vector");
 
 static struct unrhdr *vpid_unr;
 static u_int vpid_alloc_failed;
 SYSCTL_UINT(_hw_vmm_vmx, OID_AUTO, vpid_alloc_failed, CTLFLAG_RD,
 	    &vpid_alloc_failed, 0, NULL);
 
 int guest_l1d_flush;
 SYSCTL_INT(_hw_vmm_vmx, OID_AUTO, l1d_flush, CTLFLAG_RD,
     &guest_l1d_flush, 0, NULL);
 int guest_l1d_flush_sw;
 SYSCTL_INT(_hw_vmm_vmx, OID_AUTO, l1d_flush_sw, CTLFLAG_RD,
     &guest_l1d_flush_sw, 0, NULL);
 
 static struct msr_entry msr_load_list[1] __aligned(16);
 
 /*
  * The definitions of SDT probes for VMX.
  */
 
 SDT_PROBE_DEFINE3(vmm, vmx, exit, entry,
     "struct vmx *", "int", "struct vm_exit *");
 
 SDT_PROBE_DEFINE4(vmm, vmx, exit, taskswitch,
     "struct vmx *", "int", "struct vm_exit *", "struct vm_task_switch *");
 
 SDT_PROBE_DEFINE4(vmm, vmx, exit, craccess,
     "struct vmx *", "int", "struct vm_exit *", "uint64_t");
 
 SDT_PROBE_DEFINE4(vmm, vmx, exit, rdmsr,
     "struct vmx *", "int", "struct vm_exit *", "uint32_t");
 
 SDT_PROBE_DEFINE5(vmm, vmx, exit, wrmsr,
     "struct vmx *", "int", "struct vm_exit *", "uint32_t", "uint64_t");
 
 SDT_PROBE_DEFINE3(vmm, vmx, exit, halt,
     "struct vmx *", "int", "struct vm_exit *");
 
 SDT_PROBE_DEFINE3(vmm, vmx, exit, mtrap,
     "struct vmx *", "int", "struct vm_exit *");
 
 SDT_PROBE_DEFINE3(vmm, vmx, exit, pause,
     "struct vmx *", "int", "struct vm_exit *");
 
 SDT_PROBE_DEFINE3(vmm, vmx, exit, intrwindow,
     "struct vmx *", "int", "struct vm_exit *");
 
 SDT_PROBE_DEFINE4(vmm, vmx, exit, interrupt,
     "struct vmx *", "int", "struct vm_exit *", "uint32_t");
 
 SDT_PROBE_DEFINE3(vmm, vmx, exit, nmiwindow,
     "struct vmx *", "int", "struct vm_exit *");
 
 SDT_PROBE_DEFINE3(vmm, vmx, exit, inout,
     "struct vmx *", "int", "struct vm_exit *");
 
 SDT_PROBE_DEFINE3(vmm, vmx, exit, cpuid,
     "struct vmx *", "int", "struct vm_exit *");
 
 SDT_PROBE_DEFINE5(vmm, vmx, exit, exception,
     "struct vmx *", "int", "struct vm_exit *", "uint32_t", "int");
 
 SDT_PROBE_DEFINE5(vmm, vmx, exit, nestedfault,
     "struct vmx *", "int", "struct vm_exit *", "uint64_t", "uint64_t");
 
 SDT_PROBE_DEFINE4(vmm, vmx, exit, mmiofault,
     "struct vmx *", "int", "struct vm_exit *", "uint64_t");
 
 SDT_PROBE_DEFINE3(vmm, vmx, exit, eoi,
     "struct vmx *", "int", "struct vm_exit *");
 
 SDT_PROBE_DEFINE3(vmm, vmx, exit, apicaccess,
     "struct vmx *", "int", "struct vm_exit *");
 
 SDT_PROBE_DEFINE4(vmm, vmx, exit, apicwrite,
     "struct vmx *", "int", "struct vm_exit *", "struct vlapic *");
 
 SDT_PROBE_DEFINE3(vmm, vmx, exit, xsetbv,
     "struct vmx *", "int", "struct vm_exit *");
 
 SDT_PROBE_DEFINE3(vmm, vmx, exit, monitor,
     "struct vmx *", "int", "struct vm_exit *");
 
 SDT_PROBE_DEFINE3(vmm, vmx, exit, mwait,
     "struct vmx *", "int", "struct vm_exit *");
 
 SDT_PROBE_DEFINE3(vmm, vmx, exit, vminsn,
     "struct vmx *", "int", "struct vm_exit *");
 
 SDT_PROBE_DEFINE4(vmm, vmx, exit, unknown,
     "struct vmx *", "int", "struct vm_exit *", "uint32_t");
 
 SDT_PROBE_DEFINE4(vmm, vmx, exit, return,
     "struct vmx *", "int", "struct vm_exit *", "int");
 
 /*
  * Use the last page below 4GB as the APIC access address. This address is
  * occupied by the boot firmware so it is guaranteed that it will not conflict
  * with a page in system memory.
  */
 #define	APIC_ACCESS_ADDRESS	0xFFFFF000
 
 static int vmx_getdesc(void *arg, int vcpu, int reg, struct seg_desc *desc);
 static int vmx_getreg(void *arg, int vcpu, int reg, uint64_t *retval);
 static int vmxctx_setreg(struct vmxctx *vmxctx, int reg, uint64_t val);
 static void vmx_inject_pir(struct vlapic *vlapic);
 #ifdef BHYVE_SNAPSHOT
 static int vmx_restore_tsc(void *arg, int vcpu, uint64_t now);
 #endif
 
 static inline bool
 host_has_rdpid(void)
 {
 	return ((cpu_stdext_feature2 & CPUID_STDEXT2_RDPID) != 0);
 }
 
 static inline bool
 host_has_rdtscp(void)
 {
 	return ((amd_feature & AMDID_RDTSCP) != 0);
 }
 
 #ifdef KTR
 static const char *
 exit_reason_to_str(int reason)
 {
 	static char reasonbuf[32];
 
 	switch (reason) {
 	case EXIT_REASON_EXCEPTION:
 		return "exception";
 	case EXIT_REASON_EXT_INTR:
 		return "extint";
 	case EXIT_REASON_TRIPLE_FAULT:
 		return "triplefault";
 	case EXIT_REASON_INIT:
 		return "init";
 	case EXIT_REASON_SIPI:
 		return "sipi";
 	case EXIT_REASON_IO_SMI:
 		return "iosmi";
 	case EXIT_REASON_SMI:
 		return "smi";
 	case EXIT_REASON_INTR_WINDOW:
 		return "intrwindow";
 	case EXIT_REASON_NMI_WINDOW:
 		return "nmiwindow";
 	case EXIT_REASON_TASK_SWITCH:
 		return "taskswitch";
 	case EXIT_REASON_CPUID:
 		return "cpuid";
 	case EXIT_REASON_GETSEC:
 		return "getsec";
 	case EXIT_REASON_HLT:
 		return "hlt";
 	case EXIT_REASON_INVD:
 		return "invd";
 	case EXIT_REASON_INVLPG:
 		return "invlpg";
 	case EXIT_REASON_RDPMC:
 		return "rdpmc";
 	case EXIT_REASON_RDTSC:
 		return "rdtsc";
 	case EXIT_REASON_RSM:
 		return "rsm";
 	case EXIT_REASON_VMCALL:
 		return "vmcall";
 	case EXIT_REASON_VMCLEAR:
 		return "vmclear";
 	case EXIT_REASON_VMLAUNCH:
 		return "vmlaunch";
 	case EXIT_REASON_VMPTRLD:
 		return "vmptrld";
 	case EXIT_REASON_VMPTRST:
 		return "vmptrst";
 	case EXIT_REASON_VMREAD:
 		return "vmread";
 	case EXIT_REASON_VMRESUME:
 		return "vmresume";
 	case EXIT_REASON_VMWRITE:
 		return "vmwrite";
 	case EXIT_REASON_VMXOFF:
 		return "vmxoff";
 	case EXIT_REASON_VMXON:
 		return "vmxon";
 	case EXIT_REASON_CR_ACCESS:
 		return "craccess";
 	case EXIT_REASON_DR_ACCESS:
 		return "draccess";
 	case EXIT_REASON_INOUT:
 		return "inout";
 	case EXIT_REASON_RDMSR:
 		return "rdmsr";
 	case EXIT_REASON_WRMSR:
 		return "wrmsr";
 	case EXIT_REASON_INVAL_VMCS:
 		return "invalvmcs";
 	case EXIT_REASON_INVAL_MSR:
 		return "invalmsr";
 	case EXIT_REASON_MWAIT:
 		return "mwait";
 	case EXIT_REASON_MTF:
 		return "mtf";
 	case EXIT_REASON_MONITOR:
 		return "monitor";
 	case EXIT_REASON_PAUSE:
 		return "pause";
 	case EXIT_REASON_MCE_DURING_ENTRY:
 		return "mce-during-entry";
 	case EXIT_REASON_TPR:
 		return "tpr";
 	case EXIT_REASON_APIC_ACCESS:
 		return "apic-access";
 	case EXIT_REASON_GDTR_IDTR:
 		return "gdtridtr";
 	case EXIT_REASON_LDTR_TR:
 		return "ldtrtr";
 	case EXIT_REASON_EPT_FAULT:
 		return "eptfault";
 	case EXIT_REASON_EPT_MISCONFIG:
 		return "eptmisconfig";
 	case EXIT_REASON_INVEPT:
 		return "invept";
 	case EXIT_REASON_RDTSCP:
 		return "rdtscp";
 	case EXIT_REASON_VMX_PREEMPT:
 		return "vmxpreempt";
 	case EXIT_REASON_INVVPID:
 		return "invvpid";
 	case EXIT_REASON_WBINVD:
 		return "wbinvd";
 	case EXIT_REASON_XSETBV:
 		return "xsetbv";
 	case EXIT_REASON_APIC_WRITE:
 		return "apic-write";
 	default:
 		snprintf(reasonbuf, sizeof(reasonbuf), "%d", reason);
 		return (reasonbuf);
 	}
 }
 #endif	/* KTR */
 
 static int
 vmx_allow_x2apic_msrs(struct vmx *vmx)
 {
 	int i, error;
 
 	error = 0;
 
 	/*
 	 * Allow readonly access to the following x2APIC MSRs from the guest.
 	 */
 	error += guest_msr_ro(vmx, MSR_APIC_ID);
 	error += guest_msr_ro(vmx, MSR_APIC_VERSION);
 	error += guest_msr_ro(vmx, MSR_APIC_LDR);
 	error += guest_msr_ro(vmx, MSR_APIC_SVR);
 
 	for (i = 0; i < 8; i++)
 		error += guest_msr_ro(vmx, MSR_APIC_ISR0 + i);
 
 	for (i = 0; i < 8; i++)
 		error += guest_msr_ro(vmx, MSR_APIC_TMR0 + i);
 
 	for (i = 0; i < 8; i++)
 		error += guest_msr_ro(vmx, MSR_APIC_IRR0 + i);
 
 	error += guest_msr_ro(vmx, MSR_APIC_ESR);
 	error += guest_msr_ro(vmx, MSR_APIC_LVT_TIMER);
 	error += guest_msr_ro(vmx, MSR_APIC_LVT_THERMAL);
 	error += guest_msr_ro(vmx, MSR_APIC_LVT_PCINT);
 	error += guest_msr_ro(vmx, MSR_APIC_LVT_LINT0);
 	error += guest_msr_ro(vmx, MSR_APIC_LVT_LINT1);
 	error += guest_msr_ro(vmx, MSR_APIC_LVT_ERROR);
 	error += guest_msr_ro(vmx, MSR_APIC_ICR_TIMER);
 	error += guest_msr_ro(vmx, MSR_APIC_DCR_TIMER);
 	error += guest_msr_ro(vmx, MSR_APIC_ICR);
 
 	/*
 	 * Allow TPR, EOI and SELF_IPI MSRs to be read and written by the guest.
 	 *
 	 * These registers get special treatment described in the section
 	 * "Virtualizing MSR-Based APIC Accesses".
 	 */
 	error += guest_msr_rw(vmx, MSR_APIC_TPR);
 	error += guest_msr_rw(vmx, MSR_APIC_EOI);
 	error += guest_msr_rw(vmx, MSR_APIC_SELF_IPI);
 
 	return (error);
 }
 
 u_long
 vmx_fix_cr0(u_long cr0)
 {
 
 	return ((cr0 | cr0_ones_mask) & ~cr0_zeros_mask);
 }
 
 u_long
 vmx_fix_cr4(u_long cr4)
 {
 
 	return ((cr4 | cr4_ones_mask) & ~cr4_zeros_mask);
 }
 
 static void
 vpid_free(int vpid)
 {
 	if (vpid < 0 || vpid > 0xffff)
 		panic("vpid_free: invalid vpid %d", vpid);
 
 	/*
 	 * VPIDs [0,VM_MAXCPU] are special and are not allocated from
 	 * the unit number allocator.
 	 */
 
 	if (vpid > VM_MAXCPU)
 		free_unr(vpid_unr, vpid);
 }
 
 static void
 vpid_alloc(uint16_t *vpid, int num)
 {
 	int i, x;
 
 	if (num <= 0 || num > VM_MAXCPU)
 		panic("invalid number of vpids requested: %d", num);
 
 	/*
 	 * If the "enable vpid" execution control is not enabled then the
 	 * VPID is required to be 0 for all vcpus.
 	 */
 	if ((procbased_ctls2 & PROCBASED2_ENABLE_VPID) == 0) {
 		for (i = 0; i < num; i++)
 			vpid[i] = 0;
 		return;
 	}
 
 	/*
 	 * Allocate a unique VPID for each vcpu from the unit number allocator.
 	 */
 	for (i = 0; i < num; i++) {
 		x = alloc_unr(vpid_unr);
 		if (x == -1)
 			break;
 		else
 			vpid[i] = x;
 	}
 
 	if (i < num) {
 		atomic_add_int(&vpid_alloc_failed, 1);
 
 		/*
 		 * If the unit number allocator does not have enough unique
 		 * VPIDs then we need to allocate from the [1,VM_MAXCPU] range.
 		 *
 		 * These VPIDs are not be unique across VMs but this does not
 		 * affect correctness because the combined mappings are also
 		 * tagged with the EP4TA which is unique for each VM.
 		 *
 		 * It is still sub-optimal because the invvpid will invalidate
 		 * combined mappings for a particular VPID across all EP4TAs.
 		 */
 		while (i-- > 0)
 			vpid_free(vpid[i]);
 
 		for (i = 0; i < num; i++)
 			vpid[i] = i + 1;
 	}
 }
 
 static void
 vpid_init(void)
 {
 	/*
 	 * VPID 0 is required when the "enable VPID" execution control is
 	 * disabled.
 	 *
 	 * VPIDs [1,VM_MAXCPU] are used as the "overflow namespace" when the
 	 * unit number allocator does not have sufficient unique VPIDs to
 	 * satisfy the allocation.
 	 *
 	 * The remaining VPIDs are managed by the unit number allocator.
 	 */
 	vpid_unr = new_unrhdr(VM_MAXCPU + 1, 0xffff, NULL);
 }
 
 static void
 vmx_disable(void *arg __unused)
 {
 	struct invvpid_desc invvpid_desc = { 0 };
 	struct invept_desc invept_desc = { 0 };
 
 	if (vmxon_enabled[curcpu]) {
 		/*
 		 * See sections 25.3.3.3 and 25.3.3.4 in Intel Vol 3b.
 		 *
 		 * VMXON or VMXOFF are not required to invalidate any TLB
 		 * caching structures. This prevents potential retention of
 		 * cached information in the TLB between distinct VMX episodes.
 		 */
 		invvpid(INVVPID_TYPE_ALL_CONTEXTS, invvpid_desc);
 		invept(INVEPT_TYPE_ALL_CONTEXTS, invept_desc);
 		vmxoff();
 	}
 	load_cr4(rcr4() & ~CR4_VMXE);
 }
 
 static int
 vmx_modcleanup(void)
 {
 
 	if (pirvec >= 0)
 		lapic_ipi_free(pirvec);
 
 	if (vpid_unr != NULL) {
 		delete_unrhdr(vpid_unr);
 		vpid_unr = NULL;
 	}
 
 	if (nmi_flush_l1d_sw == 1)
 		nmi_flush_l1d_sw = 0;
 
 	smp_rendezvous(NULL, vmx_disable, NULL, NULL);
 
 	return (0);
 }
 
 static void
 vmx_enable(void *arg __unused)
 {
 	int error;
 	uint64_t feature_control;
 
 	feature_control = rdmsr(MSR_IA32_FEATURE_CONTROL);
 	if ((feature_control & IA32_FEATURE_CONTROL_LOCK) == 0 ||
 	    (feature_control & IA32_FEATURE_CONTROL_VMX_EN) == 0) {
 		wrmsr(MSR_IA32_FEATURE_CONTROL,
 		    feature_control | IA32_FEATURE_CONTROL_VMX_EN |
 		    IA32_FEATURE_CONTROL_LOCK);
 	}
 
 	load_cr4(rcr4() | CR4_VMXE);
 
 	*(uint32_t *)vmxon_region[curcpu] = vmx_revision();
 	error = vmxon(vmxon_region[curcpu]);
 	if (error == 0)
 		vmxon_enabled[curcpu] = 1;
 }
 
 static void
 vmx_modresume(void)
 {
 
 	if (vmxon_enabled[curcpu])
 		vmxon(vmxon_region[curcpu]);
 }
 
 static int
 vmx_modinit(int ipinum)
 {
 	int error;
 	uint64_t basic, fixed0, fixed1, feature_control;
 	uint32_t tmp, procbased2_vid_bits;
 
 	/* CPUID.1:ECX[bit 5] must be 1 for processor to support VMX */
 	if (!(cpu_feature2 & CPUID2_VMX)) {
 		printf("vmx_modinit: processor does not support VMX "
 		    "operation\n");
 		return (ENXIO);
 	}
 
 	/*
 	 * Verify that MSR_IA32_FEATURE_CONTROL lock and VMXON enable bits
 	 * are set (bits 0 and 2 respectively).
 	 */
 	feature_control = rdmsr(MSR_IA32_FEATURE_CONTROL);
 	if ((feature_control & IA32_FEATURE_CONTROL_LOCK) == 1 &&
 	    (feature_control & IA32_FEATURE_CONTROL_VMX_EN) == 0) {
 		printf("vmx_modinit: VMX operation disabled by BIOS\n");
 		return (ENXIO);
 	}
 
 	/*
 	 * Verify capabilities MSR_VMX_BASIC:
 	 * - bit 54 indicates support for INS/OUTS decoding
 	 */
 	basic = rdmsr(MSR_VMX_BASIC);
 	if ((basic & (1UL << 54)) == 0) {
 		printf("vmx_modinit: processor does not support desired basic "
 		    "capabilities\n");
 		return (EINVAL);
 	}
 
 	/* Check support for primary processor-based VM-execution controls */
 	error = vmx_set_ctlreg(MSR_VMX_PROCBASED_CTLS,
 			       MSR_VMX_TRUE_PROCBASED_CTLS,
 			       PROCBASED_CTLS_ONE_SETTING,
 			       PROCBASED_CTLS_ZERO_SETTING, &procbased_ctls);
 	if (error) {
 		printf("vmx_modinit: processor does not support desired "
 		    "primary processor-based controls\n");
 		return (error);
 	}
 
 	/* Clear the processor-based ctl bits that are set on demand */
 	procbased_ctls &= ~PROCBASED_CTLS_WINDOW_SETTING;
 
 	/* Check support for secondary processor-based VM-execution controls */
 	error = vmx_set_ctlreg(MSR_VMX_PROCBASED_CTLS2,
 			       MSR_VMX_PROCBASED_CTLS2,
 			       PROCBASED_CTLS2_ONE_SETTING,
 			       PROCBASED_CTLS2_ZERO_SETTING, &procbased_ctls2);
 	if (error) {
 		printf("vmx_modinit: processor does not support desired "
 		    "secondary processor-based controls\n");
 		return (error);
 	}
 
 	/* Check support for VPID */
 	error = vmx_set_ctlreg(MSR_VMX_PROCBASED_CTLS2, MSR_VMX_PROCBASED_CTLS2,
 			       PROCBASED2_ENABLE_VPID, 0, &tmp);
 	if (error == 0)
 		procbased_ctls2 |= PROCBASED2_ENABLE_VPID;
 
 	/* Check support for pin-based VM-execution controls */
 	error = vmx_set_ctlreg(MSR_VMX_PINBASED_CTLS,
 			       MSR_VMX_TRUE_PINBASED_CTLS,
 			       PINBASED_CTLS_ONE_SETTING,
 			       PINBASED_CTLS_ZERO_SETTING, &pinbased_ctls);
 	if (error) {
 		printf("vmx_modinit: processor does not support desired "
 		    "pin-based controls\n");
 		return (error);
 	}
 
 	/* Check support for VM-exit controls */
 	error = vmx_set_ctlreg(MSR_VMX_EXIT_CTLS, MSR_VMX_TRUE_EXIT_CTLS,
 			       VM_EXIT_CTLS_ONE_SETTING,
 			       VM_EXIT_CTLS_ZERO_SETTING,
 			       &exit_ctls);
 	if (error) {
 		printf("vmx_modinit: processor does not support desired "
 		    "exit controls\n");
 		return (error);
 	}
 
 	/* Check support for VM-entry controls */
 	error = vmx_set_ctlreg(MSR_VMX_ENTRY_CTLS, MSR_VMX_TRUE_ENTRY_CTLS,
 	    VM_ENTRY_CTLS_ONE_SETTING, VM_ENTRY_CTLS_ZERO_SETTING,
 	    &entry_ctls);
 	if (error) {
 		printf("vmx_modinit: processor does not support desired "
 		    "entry controls\n");
 		return (error);
 	}
 
 	/*
 	 * Check support for optional features by testing them
 	 * as individual bits
 	 */
 	cap_halt_exit = (vmx_set_ctlreg(MSR_VMX_PROCBASED_CTLS,
 					MSR_VMX_TRUE_PROCBASED_CTLS,
 					PROCBASED_HLT_EXITING, 0,
 					&tmp) == 0);
 
 	cap_monitor_trap = (vmx_set_ctlreg(MSR_VMX_PROCBASED_CTLS,
 					MSR_VMX_PROCBASED_CTLS,
 					PROCBASED_MTF, 0,
 					&tmp) == 0);
 
 	cap_pause_exit = (vmx_set_ctlreg(MSR_VMX_PROCBASED_CTLS,
 					 MSR_VMX_TRUE_PROCBASED_CTLS,
 					 PROCBASED_PAUSE_EXITING, 0,
 					 &tmp) == 0);
 
 	/*
 	 * Check support for RDPID and/or RDTSCP.
 	 *
 	 * Support a pass-through-based implementation of these via the
 	 * "enable RDTSCP" VM-execution control and the "RDTSC exiting"
 	 * VM-execution control.
 	 *
 	 * The "enable RDTSCP" VM-execution control applies to both RDPID
 	 * and RDTSCP (see SDM volume 3, section 25.3, "Changes to
 	 * Instruction Behavior in VMX Non-root operation"); this is why
 	 * only this VM-execution control needs to be enabled in order to
 	 * enable passing through whichever of RDPID and/or RDTSCP are
 	 * supported by the host.
 	 *
 	 * The "RDTSC exiting" VM-execution control applies to both RDTSC
 	 * and RDTSCP (again, per SDM volume 3, section 25.3), and is
 	 * already set up for RDTSC and RDTSCP pass-through by the current
 	 * implementation of RDTSC.
 	 *
 	 * Although RDPID and RDTSCP are optional capabilities, since there
 	 * does not currently seem to be a use case for enabling/disabling
 	 * these via libvmmapi, choose not to support this and, instead,
 	 * just statically always enable or always disable this support
 	 * across all vCPUs on all VMs. (Note that there may be some
 	 * complications to providing this functionality, e.g., the MSR
 	 * bitmap is currently per-VM rather than per-vCPU while the
 	 * capability API wants to be able to control capabilities on a
 	 * per-vCPU basis).
 	 */
 	error = vmx_set_ctlreg(MSR_VMX_PROCBASED_CTLS2,
 			       MSR_VMX_PROCBASED_CTLS2,
 			       PROCBASED2_ENABLE_RDTSCP, 0, &tmp);
 	cap_rdpid = error == 0 && host_has_rdpid();
 	cap_rdtscp = error == 0 && host_has_rdtscp();
 	if (cap_rdpid || cap_rdtscp)
 		procbased_ctls2 |= PROCBASED2_ENABLE_RDTSCP;
 
 	cap_unrestricted_guest = (vmx_set_ctlreg(MSR_VMX_PROCBASED_CTLS2,
 					MSR_VMX_PROCBASED_CTLS2,
 					PROCBASED2_UNRESTRICTED_GUEST, 0,
 				        &tmp) == 0);
 
 	cap_invpcid = (vmx_set_ctlreg(MSR_VMX_PROCBASED_CTLS2,
 	    MSR_VMX_PROCBASED_CTLS2, PROCBASED2_ENABLE_INVPCID, 0,
 	    &tmp) == 0);
 
 	/*
 	 * Check support for TPR shadow.
 	 */
 	error = vmx_set_ctlreg(MSR_VMX_PROCBASED_CTLS,
 	    MSR_VMX_TRUE_PROCBASED_CTLS, PROCBASED_USE_TPR_SHADOW, 0,
 	    &tmp);
 	if (error == 0) {
 		tpr_shadowing = 1;
 		TUNABLE_INT_FETCH("hw.vmm.vmx.use_tpr_shadowing",
 		    &tpr_shadowing);
 	}
 
 	if (tpr_shadowing) {
 		procbased_ctls |= PROCBASED_USE_TPR_SHADOW;
 		procbased_ctls &= ~PROCBASED_CR8_LOAD_EXITING;
 		procbased_ctls &= ~PROCBASED_CR8_STORE_EXITING;
 	}
 
 	/*
 	 * Check support for virtual interrupt delivery.
 	 */
 	procbased2_vid_bits = (PROCBASED2_VIRTUALIZE_APIC_ACCESSES |
 	    PROCBASED2_VIRTUALIZE_X2APIC_MODE |
 	    PROCBASED2_APIC_REGISTER_VIRTUALIZATION |
 	    PROCBASED2_VIRTUAL_INTERRUPT_DELIVERY);
 
 	error = vmx_set_ctlreg(MSR_VMX_PROCBASED_CTLS2, MSR_VMX_PROCBASED_CTLS2,
 	    procbased2_vid_bits, 0, &tmp);
 	if (error == 0 && tpr_shadowing) {
 		virtual_interrupt_delivery = 1;
 		TUNABLE_INT_FETCH("hw.vmm.vmx.use_apic_vid",
 		    &virtual_interrupt_delivery);
 	}
 
 	if (virtual_interrupt_delivery) {
 		procbased_ctls |= PROCBASED_USE_TPR_SHADOW;
 		procbased_ctls2 |= procbased2_vid_bits;
 		procbased_ctls2 &= ~PROCBASED2_VIRTUALIZE_X2APIC_MODE;
 
 		/*
 		 * Check for Posted Interrupts only if Virtual Interrupt
 		 * Delivery is enabled.
 		 */
 		error = vmx_set_ctlreg(MSR_VMX_PINBASED_CTLS,
 		    MSR_VMX_TRUE_PINBASED_CTLS, PINBASED_POSTED_INTERRUPT, 0,
 		    &tmp);
 		if (error == 0) {
 			pirvec = lapic_ipi_alloc(pti ? &IDTVEC(justreturn1_pti) :
 			    &IDTVEC(justreturn));
 			if (pirvec < 0) {
 				if (bootverbose) {
 					printf("vmx_modinit: unable to "
 					    "allocate posted interrupt "
 					    "vector\n");
 				}
 			} else {
 				posted_interrupts = 1;
 				TUNABLE_INT_FETCH("hw.vmm.vmx.use_apic_pir",
 				    &posted_interrupts);
 			}
 		}
 	}
 
 	if (posted_interrupts)
 		    pinbased_ctls |= PINBASED_POSTED_INTERRUPT;
 
 	/* Initialize EPT */
 	error = ept_init(ipinum);
 	if (error) {
 		printf("vmx_modinit: ept initialization failed (%d)\n", error);
 		return (error);
 	}
 
 	guest_l1d_flush = (cpu_ia32_arch_caps &
 	    IA32_ARCH_CAP_SKIP_L1DFL_VMENTRY) == 0;
 	TUNABLE_INT_FETCH("hw.vmm.l1d_flush", &guest_l1d_flush);
 
 	/*
 	 * L1D cache flush is enabled.  Use IA32_FLUSH_CMD MSR when
 	 * available.  Otherwise fall back to the software flush
 	 * method which loads enough data from the kernel text to
 	 * flush existing L1D content, both on VMX entry and on NMI
 	 * return.
 	 */
 	if (guest_l1d_flush) {
 		if ((cpu_stdext_feature3 & CPUID_STDEXT3_L1D_FLUSH) == 0) {
 			guest_l1d_flush_sw = 1;
 			TUNABLE_INT_FETCH("hw.vmm.l1d_flush_sw",
 			    &guest_l1d_flush_sw);
 		}
 		if (guest_l1d_flush_sw) {
 			if (nmi_flush_l1d_sw <= 1)
 				nmi_flush_l1d_sw = 1;
 		} else {
 			msr_load_list[0].index = MSR_IA32_FLUSH_CMD;
 			msr_load_list[0].val = IA32_FLUSH_CMD_L1D;
 		}
 	}
 
 	/*
 	 * Stash the cr0 and cr4 bits that must be fixed to 0 or 1
 	 */
 	fixed0 = rdmsr(MSR_VMX_CR0_FIXED0);
 	fixed1 = rdmsr(MSR_VMX_CR0_FIXED1);
 	cr0_ones_mask = fixed0 & fixed1;
 	cr0_zeros_mask = ~fixed0 & ~fixed1;
 
 	/*
 	 * CR0_PE and CR0_PG can be set to zero in VMX non-root operation
 	 * if unrestricted guest execution is allowed.
 	 */
 	if (cap_unrestricted_guest)
 		cr0_ones_mask &= ~(CR0_PG | CR0_PE);
 
 	/*
 	 * Do not allow the guest to set CR0_NW or CR0_CD.
 	 */
 	cr0_zeros_mask |= (CR0_NW | CR0_CD);
 
 	fixed0 = rdmsr(MSR_VMX_CR4_FIXED0);
 	fixed1 = rdmsr(MSR_VMX_CR4_FIXED1);
 	cr4_ones_mask = fixed0 & fixed1;
 	cr4_zeros_mask = ~fixed0 & ~fixed1;
 
 	vpid_init();
 
 	vmx_msr_init();
 
 	/* enable VMX operation */
 	smp_rendezvous(NULL, vmx_enable, NULL, NULL);
 
 	vmx_initialized = 1;
 
 	return (0);
 }
 
 static void
 vmx_trigger_hostintr(int vector)
 {
 	uintptr_t func;
 	struct gate_descriptor *gd;
 
 	gd = &idt[vector];
 
 	KASSERT(vector >= 32 && vector <= 255, ("vmx_trigger_hostintr: "
 	    "invalid vector %d", vector));
 	KASSERT(gd->gd_p == 1, ("gate descriptor for vector %d not present",
 	    vector));
 	KASSERT(gd->gd_type == SDT_SYSIGT, ("gate descriptor for vector %d "
 	    "has invalid type %d", vector, gd->gd_type));
 	KASSERT(gd->gd_dpl == SEL_KPL, ("gate descriptor for vector %d "
 	    "has invalid dpl %d", vector, gd->gd_dpl));
 	KASSERT(gd->gd_selector == GSEL(GCODE_SEL, SEL_KPL), ("gate descriptor "
 	    "for vector %d has invalid selector %d", vector, gd->gd_selector));
 	KASSERT(gd->gd_ist == 0, ("gate descriptor for vector %d has invalid "
 	    "IST %d", vector, gd->gd_ist));
 
 	func = ((long)gd->gd_hioffset << 16 | gd->gd_looffset);
 	vmx_call_isr(func);
 }
 
 static int
 vmx_setup_cr_shadow(int which, struct vmcs *vmcs, uint32_t initial)
 {
 	int error, mask_ident, shadow_ident;
 	uint64_t mask_value;
 
 	if (which != 0 && which != 4)
 		panic("vmx_setup_cr_shadow: unknown cr%d", which);
 
 	if (which == 0) {
 		mask_ident = VMCS_CR0_MASK;
 		mask_value = cr0_ones_mask | cr0_zeros_mask;
 		shadow_ident = VMCS_CR0_SHADOW;
 	} else {
 		mask_ident = VMCS_CR4_MASK;
 		mask_value = cr4_ones_mask | cr4_zeros_mask;
 		shadow_ident = VMCS_CR4_SHADOW;
 	}
 
 	error = vmcs_setreg(vmcs, 0, VMCS_IDENT(mask_ident), mask_value);
 	if (error)
 		return (error);
 
 	error = vmcs_setreg(vmcs, 0, VMCS_IDENT(shadow_ident), initial);
 	if (error)
 		return (error);
 
 	return (0);
 }
 #define	vmx_setup_cr0_shadow(vmcs,init)	vmx_setup_cr_shadow(0, (vmcs), (init))
 #define	vmx_setup_cr4_shadow(vmcs,init)	vmx_setup_cr_shadow(4, (vmcs), (init))
 
 static void *
 vmx_init(struct vm *vm, pmap_t pmap)
 {
 	uint16_t vpid[VM_MAXCPU];
 	int i, error;
 	struct vmx *vmx;
 	struct vmcs *vmcs;
 	uint32_t exc_bitmap;
 	uint16_t maxcpus;
 
 	vmx = malloc(sizeof(struct vmx), M_VMX, M_WAITOK | M_ZERO);
 	if ((uintptr_t)vmx & PAGE_MASK) {
 		panic("malloc of struct vmx not aligned on %d byte boundary",
 		      PAGE_SIZE);
 	}
 	vmx->vm = vm;
 
 	vmx->eptp = eptp(vtophys((vm_offset_t)pmap->pm_pmltop));
 
 	/*
 	 * Clean up EPTP-tagged guest physical and combined mappings
 	 *
 	 * VMX transitions are not required to invalidate any guest physical
 	 * mappings. So, it may be possible for stale guest physical mappings
 	 * to be present in the processor TLBs.
 	 *
 	 * Combined mappings for this EP4TA are also invalidated for all VPIDs.
 	 */
 	ept_invalidate_mappings(vmx->eptp);
 
 	msr_bitmap_initialize(vmx->msr_bitmap);
 
 	/*
 	 * It is safe to allow direct access to MSR_GSBASE and MSR_FSBASE.
 	 * The guest FSBASE and GSBASE are saved and restored during
 	 * vm-exit and vm-entry respectively. The host FSBASE and GSBASE are
 	 * always restored from the vmcs host state area on vm-exit.
 	 *
 	 * The SYSENTER_CS/ESP/EIP MSRs are identical to FS/GSBASE in
 	 * how they are saved/restored so can be directly accessed by the
 	 * guest.
 	 *
 	 * MSR_EFER is saved and restored in the guest VMCS area on a
 	 * VM exit and entry respectively. It is also restored from the
 	 * host VMCS area on a VM exit.
 	 *
 	 * The TSC MSR is exposed read-only. Writes are disallowed as
 	 * that will impact the host TSC.  If the guest does a write
 	 * the "use TSC offsetting" execution control is enabled and the
 	 * difference between the host TSC and the guest TSC is written
 	 * into the TSC offset in the VMCS.
 	 *
 	 * Guest TSC_AUX support is enabled if any of guest RDPID and/or
 	 * guest RDTSCP support are enabled (since, as per Table 2-2 in SDM
 	 * volume 4, TSC_AUX is supported if any of RDPID and/or RDTSCP are
 	 * supported). If guest TSC_AUX support is enabled, TSC_AUX is
 	 * exposed read-only so that the VMM can do one fewer MSR read per
 	 * exit than if this register were exposed read-write; the guest
 	 * restore value can be updated during guest writes (expected to be
 	 * rare) instead of during all exits (common).
 	 */
 	if (guest_msr_rw(vmx, MSR_GSBASE) ||
 	    guest_msr_rw(vmx, MSR_FSBASE) ||
 	    guest_msr_rw(vmx, MSR_SYSENTER_CS_MSR) ||
 	    guest_msr_rw(vmx, MSR_SYSENTER_ESP_MSR) ||
 	    guest_msr_rw(vmx, MSR_SYSENTER_EIP_MSR) ||
 	    guest_msr_rw(vmx, MSR_EFER) ||
 	    guest_msr_ro(vmx, MSR_TSC) ||
 	    ((cap_rdpid || cap_rdtscp) && guest_msr_ro(vmx, MSR_TSC_AUX)))
 		panic("vmx_init: error setting guest msr access");
 
 	vpid_alloc(vpid, VM_MAXCPU);
 
 	if (virtual_interrupt_delivery) {
 		error = vm_map_mmio(vm, DEFAULT_APIC_BASE, PAGE_SIZE,
 		    APIC_ACCESS_ADDRESS);
 		/* XXX this should really return an error to the caller */
 		KASSERT(error == 0, ("vm_map_mmio(apicbase) error %d", error));
 	}
 
 	maxcpus = vm_get_maxcpus(vm);
 	for (i = 0; i < maxcpus; i++) {
 		vmcs = &vmx->vmcs[i];
 		vmcs->identifier = vmx_revision();
 		error = vmclear(vmcs);
 		if (error != 0) {
 			panic("vmx_init: vmclear error %d on vcpu %d\n",
 			      error, i);
 		}
 
 		vmx_msr_guest_init(vmx, i);
 
 		error = vmcs_init(vmcs);
 		KASSERT(error == 0, ("vmcs_init error %d", error));
 
 		VMPTRLD(vmcs);
 		error = 0;
 		error += vmwrite(VMCS_HOST_RSP, (u_long)&vmx->ctx[i]);
 		error += vmwrite(VMCS_EPTP, vmx->eptp);
 		error += vmwrite(VMCS_PIN_BASED_CTLS, pinbased_ctls);
 		error += vmwrite(VMCS_PRI_PROC_BASED_CTLS, procbased_ctls);
 		error += vmwrite(VMCS_SEC_PROC_BASED_CTLS, procbased_ctls2);
 		error += vmwrite(VMCS_EXIT_CTLS, exit_ctls);
 		error += vmwrite(VMCS_ENTRY_CTLS, entry_ctls);
 		error += vmwrite(VMCS_MSR_BITMAP, vtophys(vmx->msr_bitmap));
 		error += vmwrite(VMCS_VPID, vpid[i]);
 
 		if (guest_l1d_flush && !guest_l1d_flush_sw) {
 			vmcs_write(VMCS_ENTRY_MSR_LOAD, pmap_kextract(
 			    (vm_offset_t)&msr_load_list[0]));
 			vmcs_write(VMCS_ENTRY_MSR_LOAD_COUNT,
 			    nitems(msr_load_list));
 			vmcs_write(VMCS_EXIT_MSR_STORE, 0);
 			vmcs_write(VMCS_EXIT_MSR_STORE_COUNT, 0);
 		}
 
 		/* exception bitmap */
 		if (vcpu_trace_exceptions(vm, i))
 			exc_bitmap = 0xffffffff;
 		else
 			exc_bitmap = 1 << IDT_MC;
 		error += vmwrite(VMCS_EXCEPTION_BITMAP, exc_bitmap);
 
 		vmx->ctx[i].guest_dr6 = DBREG_DR6_RESERVED1;
 		error += vmwrite(VMCS_GUEST_DR7, DBREG_DR7_RESERVED1);
 
 		if (tpr_shadowing) {
 			error += vmwrite(VMCS_VIRTUAL_APIC,
 			    vtophys(&vmx->apic_page[i]));
 		}
 
 		if (virtual_interrupt_delivery) {
 			error += vmwrite(VMCS_APIC_ACCESS, APIC_ACCESS_ADDRESS);
 			error += vmwrite(VMCS_EOI_EXIT0, 0);
 			error += vmwrite(VMCS_EOI_EXIT1, 0);
 			error += vmwrite(VMCS_EOI_EXIT2, 0);
 			error += vmwrite(VMCS_EOI_EXIT3, 0);
 		}
 		if (posted_interrupts) {
 			error += vmwrite(VMCS_PIR_VECTOR, pirvec);
 			error += vmwrite(VMCS_PIR_DESC,
 			    vtophys(&vmx->pir_desc[i]));
 		}
 		VMCLEAR(vmcs);
 		KASSERT(error == 0, ("vmx_init: error customizing the vmcs"));
 
 		vmx->cap[i].set = 0;
 		vmx->cap[i].set |= cap_rdpid != 0 ? 1 << VM_CAP_RDPID : 0;
 		vmx->cap[i].set |= cap_rdtscp != 0 ? 1 << VM_CAP_RDTSCP : 0;
 		vmx->cap[i].proc_ctls = procbased_ctls;
 		vmx->cap[i].proc_ctls2 = procbased_ctls2;
 		vmx->cap[i].exc_bitmap = exc_bitmap;
 
 		vmx->state[i].nextrip = ~0;
 		vmx->state[i].lastcpu = NOCPU;
 		vmx->state[i].vpid = vpid[i];
 
 		/*
 		 * Set up the CR0/4 shadows, and init the read shadow
 		 * to the power-on register value from the Intel Sys Arch.
 		 *  CR0 - 0x60000010
 		 *  CR4 - 0
 		 */
 		error = vmx_setup_cr0_shadow(vmcs, 0x60000010);
 		if (error != 0)
 			panic("vmx_setup_cr0_shadow %d", error);
 
 		error = vmx_setup_cr4_shadow(vmcs, 0);
 		if (error != 0)
 			panic("vmx_setup_cr4_shadow %d", error);
 
 		vmx->ctx[i].pmap = pmap;
 	}
 
 	return (vmx);
 }
 
 static int
 vmx_handle_cpuid(struct vm *vm, int vcpu, struct vmxctx *vmxctx)
 {
 	int handled;
 
 	handled = x86_emulate_cpuid(vm, vcpu, (uint64_t *)&vmxctx->guest_rax,
 	    (uint64_t *)&vmxctx->guest_rbx, (uint64_t *)&vmxctx->guest_rcx,
 	    (uint64_t *)&vmxctx->guest_rdx);
 	return (handled);
 }
 
 static __inline void
 vmx_run_trace(struct vmx *vmx, int vcpu)
 {
 #ifdef KTR
 	VCPU_CTR1(vmx->vm, vcpu, "Resume execution at %#lx", vmcs_guest_rip());
 #endif
 }
 
 static __inline void
 vmx_exit_trace(struct vmx *vmx, int vcpu, uint64_t rip, uint32_t exit_reason,
 	       int handled)
 {
 #ifdef KTR
 	VCPU_CTR3(vmx->vm, vcpu, "%s %s vmexit at 0x%0lx",
 		 handled ? "handled" : "unhandled",
 		 exit_reason_to_str(exit_reason), rip);
 #endif
 }
 
 static __inline void
 vmx_astpending_trace(struct vmx *vmx, int vcpu, uint64_t rip)
 {
 #ifdef KTR
 	VCPU_CTR1(vmx->vm, vcpu, "astpending vmexit at 0x%0lx", rip);
 #endif
 }
 
 static VMM_STAT_INTEL(VCPU_INVVPID_SAVED, "Number of vpid invalidations saved");
 static VMM_STAT_INTEL(VCPU_INVVPID_DONE, "Number of vpid invalidations done");
 
 /*
  * Invalidate guest mappings identified by its vpid from the TLB.
  */
 static __inline void
 vmx_invvpid(struct vmx *vmx, int vcpu, pmap_t pmap, int running)
 {
 	struct vmxstate *vmxstate;
 	struct invvpid_desc invvpid_desc;
 
 	vmxstate = &vmx->state[vcpu];
 	if (vmxstate->vpid == 0)
 		return;
 
 	if (!running) {
 		/*
 		 * Set the 'lastcpu' to an invalid host cpu.
 		 *
 		 * This will invalidate TLB entries tagged with the vcpu's
 		 * vpid the next time it runs via vmx_set_pcpu_defaults().
 		 */
 		vmxstate->lastcpu = NOCPU;
 		return;
 	}
 
 	KASSERT(curthread->td_critnest > 0, ("%s: vcpu %d running outside "
 	    "critical section", __func__, vcpu));
 
 	/*
 	 * Invalidate all mappings tagged with 'vpid'
 	 *
 	 * We do this because this vcpu was executing on a different host
 	 * cpu when it last ran. We do not track whether it invalidated
 	 * mappings associated with its 'vpid' during that run. So we must
 	 * assume that the mappings associated with 'vpid' on 'curcpu' are
 	 * stale and invalidate them.
 	 *
 	 * Note that we incur this penalty only when the scheduler chooses to
 	 * move the thread associated with this vcpu between host cpus.
 	 *
 	 * Note also that this will invalidate mappings tagged with 'vpid'
 	 * for "all" EP4TAs.
 	 */
 	if (atomic_load_long(&pmap->pm_eptgen) == vmx->eptgen[curcpu]) {
 		invvpid_desc._res1 = 0;
 		invvpid_desc._res2 = 0;
 		invvpid_desc.vpid = vmxstate->vpid;
 		invvpid_desc.linear_addr = 0;
 		invvpid(INVVPID_TYPE_SINGLE_CONTEXT, invvpid_desc);
 		vmm_stat_incr(vmx->vm, vcpu, VCPU_INVVPID_DONE, 1);
 	} else {
 		/*
 		 * The invvpid can be skipped if an invept is going to
 		 * be performed before entering the guest. The invept
 		 * will invalidate combined mappings tagged with
 		 * 'vmx->eptp' for all vpids.
 		 */
 		vmm_stat_incr(vmx->vm, vcpu, VCPU_INVVPID_SAVED, 1);
 	}
 }
 
 static void
 vmx_set_pcpu_defaults(struct vmx *vmx, int vcpu, pmap_t pmap)
 {
 	struct vmxstate *vmxstate;
 
 	vmxstate = &vmx->state[vcpu];
 	if (vmxstate->lastcpu == curcpu)
 		return;
 
 	vmxstate->lastcpu = curcpu;
 
 	vmm_stat_incr(vmx->vm, vcpu, VCPU_MIGRATIONS, 1);
 
 	vmcs_write(VMCS_HOST_TR_BASE, vmm_get_host_trbase());
 	vmcs_write(VMCS_HOST_GDTR_BASE, vmm_get_host_gdtrbase());
 	vmcs_write(VMCS_HOST_GS_BASE, vmm_get_host_gsbase());
 	vmx_invvpid(vmx, vcpu, pmap, 1);
 }
 
 /*
  * We depend on 'procbased_ctls' to have the Interrupt Window Exiting bit set.
  */
 CTASSERT((PROCBASED_CTLS_ONE_SETTING & PROCBASED_INT_WINDOW_EXITING) != 0);
 
 static void __inline
 vmx_set_int_window_exiting(struct vmx *vmx, int vcpu)
 {
 
 	if ((vmx->cap[vcpu].proc_ctls & PROCBASED_INT_WINDOW_EXITING) == 0) {
 		vmx->cap[vcpu].proc_ctls |= PROCBASED_INT_WINDOW_EXITING;
 		vmcs_write(VMCS_PRI_PROC_BASED_CTLS, vmx->cap[vcpu].proc_ctls);
 		VCPU_CTR0(vmx->vm, vcpu, "Enabling interrupt window exiting");
 	}
 }
 
 static void __inline
 vmx_clear_int_window_exiting(struct vmx *vmx, int vcpu)
 {
 
 	KASSERT((vmx->cap[vcpu].proc_ctls & PROCBASED_INT_WINDOW_EXITING) != 0,
 	    ("intr_window_exiting not set: %#x", vmx->cap[vcpu].proc_ctls));
 	vmx->cap[vcpu].proc_ctls &= ~PROCBASED_INT_WINDOW_EXITING;
 	vmcs_write(VMCS_PRI_PROC_BASED_CTLS, vmx->cap[vcpu].proc_ctls);
 	VCPU_CTR0(vmx->vm, vcpu, "Disabling interrupt window exiting");
 }
 
 static void __inline
 vmx_set_nmi_window_exiting(struct vmx *vmx, int vcpu)
 {
 
 	if ((vmx->cap[vcpu].proc_ctls & PROCBASED_NMI_WINDOW_EXITING) == 0) {
 		vmx->cap[vcpu].proc_ctls |= PROCBASED_NMI_WINDOW_EXITING;
 		vmcs_write(VMCS_PRI_PROC_BASED_CTLS, vmx->cap[vcpu].proc_ctls);
 		VCPU_CTR0(vmx->vm, vcpu, "Enabling NMI window exiting");
 	}
 }
 
 static void __inline
 vmx_clear_nmi_window_exiting(struct vmx *vmx, int vcpu)
 {
 
 	KASSERT((vmx->cap[vcpu].proc_ctls & PROCBASED_NMI_WINDOW_EXITING) != 0,
 	    ("nmi_window_exiting not set %#x", vmx->cap[vcpu].proc_ctls));
 	vmx->cap[vcpu].proc_ctls &= ~PROCBASED_NMI_WINDOW_EXITING;
 	vmcs_write(VMCS_PRI_PROC_BASED_CTLS, vmx->cap[vcpu].proc_ctls);
 	VCPU_CTR0(vmx->vm, vcpu, "Disabling NMI window exiting");
 }
 
 int
 vmx_set_tsc_offset(struct vmx *vmx, int vcpu, uint64_t offset)
 {
 	int error;
 
 	if ((vmx->cap[vcpu].proc_ctls & PROCBASED_TSC_OFFSET) == 0) {
 		vmx->cap[vcpu].proc_ctls |= PROCBASED_TSC_OFFSET;
 		vmcs_write(VMCS_PRI_PROC_BASED_CTLS, vmx->cap[vcpu].proc_ctls);
 		VCPU_CTR0(vmx->vm, vcpu, "Enabling TSC offsetting");
 	}
 
 	error = vmwrite(VMCS_TSC_OFFSET, offset);
 #ifdef BHYVE_SNAPSHOT
 	if (error == 0)
 		error = vm_set_tsc_offset(vmx->vm, vcpu, offset);
 #endif
 	return (error);
 }
 
 #define	NMI_BLOCKING	(VMCS_INTERRUPTIBILITY_NMI_BLOCKING |		\
 			 VMCS_INTERRUPTIBILITY_MOVSS_BLOCKING)
 #define	HWINTR_BLOCKING	(VMCS_INTERRUPTIBILITY_STI_BLOCKING |		\
 			 VMCS_INTERRUPTIBILITY_MOVSS_BLOCKING)
 
 static void
 vmx_inject_nmi(struct vmx *vmx, int vcpu)
 {
 	uint32_t gi, info;
 
 	gi = vmcs_read(VMCS_GUEST_INTERRUPTIBILITY);
 	KASSERT((gi & NMI_BLOCKING) == 0, ("vmx_inject_nmi: invalid guest "
 	    "interruptibility-state %#x", gi));
 
 	info = vmcs_read(VMCS_ENTRY_INTR_INFO);
 	KASSERT((info & VMCS_INTR_VALID) == 0, ("vmx_inject_nmi: invalid "
 	    "VM-entry interruption information %#x", info));
 
 	/*
 	 * Inject the virtual NMI. The vector must be the NMI IDT entry
 	 * or the VMCS entry check will fail.
 	 */
 	info = IDT_NMI | VMCS_INTR_T_NMI | VMCS_INTR_VALID;
 	vmcs_write(VMCS_ENTRY_INTR_INFO, info);
 
 	VCPU_CTR0(vmx->vm, vcpu, "Injecting vNMI");
 
 	/* Clear the request */
 	vm_nmi_clear(vmx->vm, vcpu);
 }
 
 static void
 vmx_inject_interrupts(struct vmx *vmx, int vcpu, struct vlapic *vlapic,
     uint64_t guestrip)
 {
 	int vector, need_nmi_exiting, extint_pending;
 	uint64_t rflags, entryinfo;
 	uint32_t gi, info;
 
 	if (vmx->state[vcpu].nextrip != guestrip) {
 		gi = vmcs_read(VMCS_GUEST_INTERRUPTIBILITY);
 		if (gi & HWINTR_BLOCKING) {
 			VCPU_CTR2(vmx->vm, vcpu, "Guest interrupt blocking "
 			    "cleared due to rip change: %#lx/%#lx",
 			    vmx->state[vcpu].nextrip, guestrip);
 			gi &= ~HWINTR_BLOCKING;
 			vmcs_write(VMCS_GUEST_INTERRUPTIBILITY, gi);
 		}
 	}
 
 	if (vm_entry_intinfo(vmx->vm, vcpu, &entryinfo)) {
 		KASSERT((entryinfo & VMCS_INTR_VALID) != 0, ("%s: entry "
 		    "intinfo is not valid: %#lx", __func__, entryinfo));
 
 		info = vmcs_read(VMCS_ENTRY_INTR_INFO);
 		KASSERT((info & VMCS_INTR_VALID) == 0, ("%s: cannot inject "
 		     "pending exception: %#lx/%#x", __func__, entryinfo, info));
 
 		info = entryinfo;
 		vector = info & 0xff;
 		if (vector == IDT_BP || vector == IDT_OF) {
 			/*
 			 * VT-x requires #BP and #OF to be injected as software
 			 * exceptions.
 			 */
 			info &= ~VMCS_INTR_T_MASK;
 			info |= VMCS_INTR_T_SWEXCEPTION;
 		}
 
 		if (info & VMCS_INTR_DEL_ERRCODE)
 			vmcs_write(VMCS_ENTRY_EXCEPTION_ERROR, entryinfo >> 32);
 
 		vmcs_write(VMCS_ENTRY_INTR_INFO, info);
 	}
 
 	if (vm_nmi_pending(vmx->vm, vcpu)) {
 		/*
 		 * If there are no conditions blocking NMI injection then
 		 * inject it directly here otherwise enable "NMI window
 		 * exiting" to inject it as soon as we can.
 		 *
 		 * We also check for STI_BLOCKING because some implementations
 		 * don't allow NMI injection in this case. If we are running
 		 * on a processor that doesn't have this restriction it will
 		 * immediately exit and the NMI will be injected in the
 		 * "NMI window exiting" handler.
 		 */
 		need_nmi_exiting = 1;
 		gi = vmcs_read(VMCS_GUEST_INTERRUPTIBILITY);
 		if ((gi & (HWINTR_BLOCKING | NMI_BLOCKING)) == 0) {
 			info = vmcs_read(VMCS_ENTRY_INTR_INFO);
 			if ((info & VMCS_INTR_VALID) == 0) {
 				vmx_inject_nmi(vmx, vcpu);
 				need_nmi_exiting = 0;
 			} else {
 				VCPU_CTR1(vmx->vm, vcpu, "Cannot inject NMI "
 				    "due to VM-entry intr info %#x", info);
 			}
 		} else {
 			VCPU_CTR1(vmx->vm, vcpu, "Cannot inject NMI due to "
 			    "Guest Interruptibility-state %#x", gi);
 		}
 
 		if (need_nmi_exiting)
 			vmx_set_nmi_window_exiting(vmx, vcpu);
 	}
 
 	extint_pending = vm_extint_pending(vmx->vm, vcpu);
 
 	if (!extint_pending && virtual_interrupt_delivery) {
 		vmx_inject_pir(vlapic);
 		return;
 	}
 
 	/*
 	 * If interrupt-window exiting is already in effect then don't bother
 	 * checking for pending interrupts. This is just an optimization and
 	 * not needed for correctness.
 	 */
 	if ((vmx->cap[vcpu].proc_ctls & PROCBASED_INT_WINDOW_EXITING) != 0) {
 		VCPU_CTR0(vmx->vm, vcpu, "Skip interrupt injection due to "
 		    "pending int_window_exiting");
 		return;
 	}
 
 	if (!extint_pending) {
 		/* Ask the local apic for a vector to inject */
 		if (!vlapic_pending_intr(vlapic, &vector))
 			return;
 
 		/*
 		 * From the Intel SDM, Volume 3, Section "Maskable
 		 * Hardware Interrupts":
 		 * - maskable interrupt vectors [16,255] can be delivered
 		 *   through the local APIC.
 		*/
 		KASSERT(vector >= 16 && vector <= 255,
 		    ("invalid vector %d from local APIC", vector));
 	} else {
 		/* Ask the legacy pic for a vector to inject */
 		vatpic_pending_intr(vmx->vm, &vector);
 
 		/*
 		 * From the Intel SDM, Volume 3, Section "Maskable
 		 * Hardware Interrupts":
 		 * - maskable interrupt vectors [0,255] can be delivered
 		 *   through the INTR pin.
 		 */
 		KASSERT(vector >= 0 && vector <= 255,
 		    ("invalid vector %d from INTR", vector));
 	}
 
 	/* Check RFLAGS.IF and the interruptibility state of the guest */
 	rflags = vmcs_read(VMCS_GUEST_RFLAGS);
 	if ((rflags & PSL_I) == 0) {
 		VCPU_CTR2(vmx->vm, vcpu, "Cannot inject vector %d due to "
 		    "rflags %#lx", vector, rflags);
 		goto cantinject;
 	}
 
 	gi = vmcs_read(VMCS_GUEST_INTERRUPTIBILITY);
 	if (gi & HWINTR_BLOCKING) {
 		VCPU_CTR2(vmx->vm, vcpu, "Cannot inject vector %d due to "
 		    "Guest Interruptibility-state %#x", vector, gi);
 		goto cantinject;
 	}
 
 	info = vmcs_read(VMCS_ENTRY_INTR_INFO);
 	if (info & VMCS_INTR_VALID) {
 		/*
 		 * This is expected and could happen for multiple reasons:
 		 * - A vectoring VM-entry was aborted due to astpending
 		 * - A VM-exit happened during event injection.
 		 * - An exception was injected above.
 		 * - An NMI was injected above or after "NMI window exiting"
 		 */
 		VCPU_CTR2(vmx->vm, vcpu, "Cannot inject vector %d due to "
 		    "VM-entry intr info %#x", vector, info);
 		goto cantinject;
 	}
 
 	/* Inject the interrupt */
 	info = VMCS_INTR_T_HWINTR | VMCS_INTR_VALID;
 	info |= vector;
 	vmcs_write(VMCS_ENTRY_INTR_INFO, info);
 
 	if (!extint_pending) {
 		/* Update the Local APIC ISR */
 		vlapic_intr_accepted(vlapic, vector);
 	} else {
 		vm_extint_clear(vmx->vm, vcpu);
 		vatpic_intr_accepted(vmx->vm, vector);
 
 		/*
 		 * After we accepted the current ExtINT the PIC may
 		 * have posted another one.  If that is the case, set
 		 * the Interrupt Window Exiting execution control so
 		 * we can inject that one too.
 		 *
 		 * Also, interrupt window exiting allows us to inject any
 		 * pending APIC vector that was preempted by the ExtINT
 		 * as soon as possible. This applies both for the software
 		 * emulated vlapic and the hardware assisted virtual APIC.
 		 */
 		vmx_set_int_window_exiting(vmx, vcpu);
 	}
 
 	VCPU_CTR1(vmx->vm, vcpu, "Injecting hwintr at vector %d", vector);
 
 	return;
 
 cantinject:
 	/*
 	 * Set the Interrupt Window Exiting execution control so we can inject
 	 * the interrupt as soon as blocking condition goes away.
 	 */
 	vmx_set_int_window_exiting(vmx, vcpu);
 }
 
 /*
  * If the Virtual NMIs execution control is '1' then the logical processor
  * tracks virtual-NMI blocking in the Guest Interruptibility-state field of
  * the VMCS. An IRET instruction in VMX non-root operation will remove any
  * virtual-NMI blocking.
  *
  * This unblocking occurs even if the IRET causes a fault. In this case the
  * hypervisor needs to restore virtual-NMI blocking before resuming the guest.
  */
 static void
 vmx_restore_nmi_blocking(struct vmx *vmx, int vcpuid)
 {
 	uint32_t gi;
 
 	VCPU_CTR0(vmx->vm, vcpuid, "Restore Virtual-NMI blocking");
 	gi = vmcs_read(VMCS_GUEST_INTERRUPTIBILITY);
 	gi |= VMCS_INTERRUPTIBILITY_NMI_BLOCKING;
 	vmcs_write(VMCS_GUEST_INTERRUPTIBILITY, gi);
 }
 
 static void
 vmx_clear_nmi_blocking(struct vmx *vmx, int vcpuid)
 {
 	uint32_t gi;
 
 	VCPU_CTR0(vmx->vm, vcpuid, "Clear Virtual-NMI blocking");
 	gi = vmcs_read(VMCS_GUEST_INTERRUPTIBILITY);
 	gi &= ~VMCS_INTERRUPTIBILITY_NMI_BLOCKING;
 	vmcs_write(VMCS_GUEST_INTERRUPTIBILITY, gi);
 }
 
 static void
 vmx_assert_nmi_blocking(struct vmx *vmx, int vcpuid)
 {
 	uint32_t gi;
 
 	gi = vmcs_read(VMCS_GUEST_INTERRUPTIBILITY);
 	KASSERT(gi & VMCS_INTERRUPTIBILITY_NMI_BLOCKING,
 	    ("NMI blocking is not in effect %#x", gi));
 }
 
 static int
 vmx_emulate_xsetbv(struct vmx *vmx, int vcpu, struct vm_exit *vmexit)
 {
 	struct vmxctx *vmxctx;
 	uint64_t xcrval;
 	const struct xsave_limits *limits;
 
 	vmxctx = &vmx->ctx[vcpu];
 	limits = vmm_get_xsave_limits();
 
 	/*
 	 * Note that the processor raises a GP# fault on its own if
 	 * xsetbv is executed for CPL != 0, so we do not have to
 	 * emulate that fault here.
 	 */
 
 	/* Only xcr0 is supported. */
 	if (vmxctx->guest_rcx != 0) {
 		vm_inject_gp(vmx->vm, vcpu);
 		return (HANDLED);
 	}
 
 	/* We only handle xcr0 if both the host and guest have XSAVE enabled. */
 	if (!limits->xsave_enabled || !(vmcs_read(VMCS_GUEST_CR4) & CR4_XSAVE)) {
 		vm_inject_ud(vmx->vm, vcpu);
 		return (HANDLED);
 	}
 
 	xcrval = vmxctx->guest_rdx << 32 | (vmxctx->guest_rax & 0xffffffff);
 	if ((xcrval & ~limits->xcr0_allowed) != 0) {
 		vm_inject_gp(vmx->vm, vcpu);
 		return (HANDLED);
 	}
 
 	if (!(xcrval & XFEATURE_ENABLED_X87)) {
 		vm_inject_gp(vmx->vm, vcpu);
 		return (HANDLED);
 	}
 
 	/* AVX (YMM_Hi128) requires SSE. */
 	if (xcrval & XFEATURE_ENABLED_AVX &&
 	    (xcrval & XFEATURE_AVX) != XFEATURE_AVX) {
 		vm_inject_gp(vmx->vm, vcpu);
 		return (HANDLED);
 	}
 
 	/*
 	 * AVX512 requires base AVX (YMM_Hi128) as well as OpMask,
 	 * ZMM_Hi256, and Hi16_ZMM.
 	 */
 	if (xcrval & XFEATURE_AVX512 &&
 	    (xcrval & (XFEATURE_AVX512 | XFEATURE_AVX)) !=
 	    (XFEATURE_AVX512 | XFEATURE_AVX)) {
 		vm_inject_gp(vmx->vm, vcpu);
 		return (HANDLED);
 	}
 
 	/*
 	 * Intel MPX requires both bound register state flags to be
 	 * set.
 	 */
 	if (((xcrval & XFEATURE_ENABLED_BNDREGS) != 0) !=
 	    ((xcrval & XFEATURE_ENABLED_BNDCSR) != 0)) {
 		vm_inject_gp(vmx->vm, vcpu);
 		return (HANDLED);
 	}
 
 	/*
 	 * This runs "inside" vmrun() with the guest's FPU state, so
 	 * modifying xcr0 directly modifies the guest's xcr0, not the
 	 * host's.
 	 */
 	load_xcr(0, xcrval);
 	return (HANDLED);
 }
 
 static uint64_t
 vmx_get_guest_reg(struct vmx *vmx, int vcpu, int ident)
 {
 	const struct vmxctx *vmxctx;
 
 	vmxctx = &vmx->ctx[vcpu];
 
 	switch (ident) {
 	case 0:
 		return (vmxctx->guest_rax);
 	case 1:
 		return (vmxctx->guest_rcx);
 	case 2:
 		return (vmxctx->guest_rdx);
 	case 3:
 		return (vmxctx->guest_rbx);
 	case 4:
 		return (vmcs_read(VMCS_GUEST_RSP));
 	case 5:
 		return (vmxctx->guest_rbp);
 	case 6:
 		return (vmxctx->guest_rsi);
 	case 7:
 		return (vmxctx->guest_rdi);
 	case 8:
 		return (vmxctx->guest_r8);
 	case 9:
 		return (vmxctx->guest_r9);
 	case 10:
 		return (vmxctx->guest_r10);
 	case 11:
 		return (vmxctx->guest_r11);
 	case 12:
 		return (vmxctx->guest_r12);
 	case 13:
 		return (vmxctx->guest_r13);
 	case 14:
 		return (vmxctx->guest_r14);
 	case 15:
 		return (vmxctx->guest_r15);
 	default:
 		panic("invalid vmx register %d", ident);
 	}
 }
 
 static void
 vmx_set_guest_reg(struct vmx *vmx, int vcpu, int ident, uint64_t regval)
 {
 	struct vmxctx *vmxctx;
 
 	vmxctx = &vmx->ctx[vcpu];
 
 	switch (ident) {
 	case 0:
 		vmxctx->guest_rax = regval;
 		break;
 	case 1:
 		vmxctx->guest_rcx = regval;
 		break;
 	case 2:
 		vmxctx->guest_rdx = regval;
 		break;
 	case 3:
 		vmxctx->guest_rbx = regval;
 		break;
 	case 4:
 		vmcs_write(VMCS_GUEST_RSP, regval);
 		break;
 	case 5:
 		vmxctx->guest_rbp = regval;
 		break;
 	case 6:
 		vmxctx->guest_rsi = regval;
 		break;
 	case 7:
 		vmxctx->guest_rdi = regval;
 		break;
 	case 8:
 		vmxctx->guest_r8 = regval;
 		break;
 	case 9:
 		vmxctx->guest_r9 = regval;
 		break;
 	case 10:
 		vmxctx->guest_r10 = regval;
 		break;
 	case 11:
 		vmxctx->guest_r11 = regval;
 		break;
 	case 12:
 		vmxctx->guest_r12 = regval;
 		break;
 	case 13:
 		vmxctx->guest_r13 = regval;
 		break;
 	case 14:
 		vmxctx->guest_r14 = regval;
 		break;
 	case 15:
 		vmxctx->guest_r15 = regval;
 		break;
 	default:
 		panic("invalid vmx register %d", ident);
 	}
 }
 
 static int
 vmx_emulate_cr0_access(struct vmx *vmx, int vcpu, uint64_t exitqual)
 {
 	uint64_t crval, regval;
 
 	/* We only handle mov to %cr0 at this time */
 	if ((exitqual & 0xf0) != 0x00)
 		return (UNHANDLED);
 
 	regval = vmx_get_guest_reg(vmx, vcpu, (exitqual >> 8) & 0xf);
 
 	vmcs_write(VMCS_CR0_SHADOW, regval);
 
 	crval = regval | cr0_ones_mask;
 	crval &= ~cr0_zeros_mask;
 	vmcs_write(VMCS_GUEST_CR0, crval);
 
 	if (regval & CR0_PG) {
 		uint64_t efer, entry_ctls;
 
 		/*
 		 * If CR0.PG is 1 and EFER.LME is 1 then EFER.LMA and
 		 * the "IA-32e mode guest" bit in VM-entry control must be
 		 * equal.
 		 */
 		efer = vmcs_read(VMCS_GUEST_IA32_EFER);
 		if (efer & EFER_LME) {
 			efer |= EFER_LMA;
 			vmcs_write(VMCS_GUEST_IA32_EFER, efer);
 			entry_ctls = vmcs_read(VMCS_ENTRY_CTLS);
 			entry_ctls |= VM_ENTRY_GUEST_LMA;
 			vmcs_write(VMCS_ENTRY_CTLS, entry_ctls);
 		}
 	}
 
 	return (HANDLED);
 }
 
 static int
 vmx_emulate_cr4_access(struct vmx *vmx, int vcpu, uint64_t exitqual)
 {
 	uint64_t crval, regval;
 
 	/* We only handle mov to %cr4 at this time */
 	if ((exitqual & 0xf0) != 0x00)
 		return (UNHANDLED);
 
 	regval = vmx_get_guest_reg(vmx, vcpu, (exitqual >> 8) & 0xf);
 
 	vmcs_write(VMCS_CR4_SHADOW, regval);
 
 	crval = regval | cr4_ones_mask;
 	crval &= ~cr4_zeros_mask;
 	vmcs_write(VMCS_GUEST_CR4, crval);
 
 	return (HANDLED);
 }
 
 static int
 vmx_emulate_cr8_access(struct vmx *vmx, int vcpu, uint64_t exitqual)
 {
 	struct vlapic *vlapic;
 	uint64_t cr8;
 	int regnum;
 
 	/* We only handle mov %cr8 to/from a register at this time. */
 	if ((exitqual & 0xe0) != 0x00) {
 		return (UNHANDLED);
 	}
 
 	vlapic = vm_lapic(vmx->vm, vcpu);
 	regnum = (exitqual >> 8) & 0xf;
 	if (exitqual & 0x10) {
 		cr8 = vlapic_get_cr8(vlapic);
 		vmx_set_guest_reg(vmx, vcpu, regnum, cr8);
 	} else {
 		cr8 = vmx_get_guest_reg(vmx, vcpu, regnum);
 		vlapic_set_cr8(vlapic, cr8);
 	}
 
 	return (HANDLED);
 }
 
 /*
  * From section "Guest Register State" in the Intel SDM: CPL = SS.DPL
  */
 static int
 vmx_cpl(void)
 {
 	uint32_t ssar;
 
 	ssar = vmcs_read(VMCS_GUEST_SS_ACCESS_RIGHTS);
 	return ((ssar >> 5) & 0x3);
 }
 
 static enum vm_cpu_mode
 vmx_cpu_mode(void)
 {
 	uint32_t csar;
 
 	if (vmcs_read(VMCS_GUEST_IA32_EFER) & EFER_LMA) {
 		csar = vmcs_read(VMCS_GUEST_CS_ACCESS_RIGHTS);
 		if (csar & 0x2000)
 			return (CPU_MODE_64BIT);	/* CS.L = 1 */
 		else
 			return (CPU_MODE_COMPATIBILITY);
 	} else if (vmcs_read(VMCS_GUEST_CR0) & CR0_PE) {
 		return (CPU_MODE_PROTECTED);
 	} else {
 		return (CPU_MODE_REAL);
 	}
 }
 
 static enum vm_paging_mode
 vmx_paging_mode(void)
 {
 	uint64_t cr4;
 
 	if (!(vmcs_read(VMCS_GUEST_CR0) & CR0_PG))
 		return (PAGING_MODE_FLAT);
 	cr4 = vmcs_read(VMCS_GUEST_CR4);
 	if (!(cr4 & CR4_PAE))
 		return (PAGING_MODE_32);
 	if (vmcs_read(VMCS_GUEST_IA32_EFER) & EFER_LME) {
 		if (!(cr4 & CR4_LA57))
 			return (PAGING_MODE_64);
 		return (PAGING_MODE_64_LA57);
 	} else
 		return (PAGING_MODE_PAE);
 }
 
 static uint64_t
 inout_str_index(struct vmx *vmx, int vcpuid, int in)
 {
 	uint64_t val;
 	int error;
 	enum vm_reg_name reg;
 
 	reg = in ? VM_REG_GUEST_RDI : VM_REG_GUEST_RSI;
 	error = vmx_getreg(vmx, vcpuid, reg, &val);
 	KASSERT(error == 0, ("%s: vmx_getreg error %d", __func__, error));
 	return (val);
 }
 
 static uint64_t
 inout_str_count(struct vmx *vmx, int vcpuid, int rep)
 {
 	uint64_t val;
 	int error;
 
 	if (rep) {
 		error = vmx_getreg(vmx, vcpuid, VM_REG_GUEST_RCX, &val);
 		KASSERT(!error, ("%s: vmx_getreg error %d", __func__, error));
 	} else {
 		val = 1;
 	}
 	return (val);
 }
 
 static int
 inout_str_addrsize(uint32_t inst_info)
 {
 	uint32_t size;
 
 	size = (inst_info >> 7) & 0x7;
 	switch (size) {
 	case 0:
 		return (2);	/* 16 bit */
 	case 1:
 		return (4);	/* 32 bit */
 	case 2:
 		return (8);	/* 64 bit */
 	default:
 		panic("%s: invalid size encoding %d", __func__, size);
 	}
 }
 
 static void
 inout_str_seginfo(struct vmx *vmx, int vcpuid, uint32_t inst_info, int in,
     struct vm_inout_str *vis)
 {
 	int error, s;
 
 	if (in) {
 		vis->seg_name = VM_REG_GUEST_ES;
 	} else {
 		s = (inst_info >> 15) & 0x7;
 		vis->seg_name = vm_segment_name(s);
 	}
 
 	error = vmx_getdesc(vmx, vcpuid, vis->seg_name, &vis->seg_desc);
 	KASSERT(error == 0, ("%s: vmx_getdesc error %d", __func__, error));
 }
 
 static void
 vmx_paging_info(struct vm_guest_paging *paging)
 {
 	paging->cr3 = vmcs_guest_cr3();
 	paging->cpl = vmx_cpl();
 	paging->cpu_mode = vmx_cpu_mode();
 	paging->paging_mode = vmx_paging_mode();
 }
 
 static void
 vmexit_inst_emul(struct vm_exit *vmexit, uint64_t gpa, uint64_t gla)
 {
 	struct vm_guest_paging *paging;
 	uint32_t csar;
 
 	paging = &vmexit->u.inst_emul.paging;
 
 	vmexit->exitcode = VM_EXITCODE_INST_EMUL;
 	vmexit->inst_length = 0;
 	vmexit->u.inst_emul.gpa = gpa;
 	vmexit->u.inst_emul.gla = gla;
 	vmx_paging_info(paging);
 	switch (paging->cpu_mode) {
 	case CPU_MODE_REAL:
 		vmexit->u.inst_emul.cs_base = vmcs_read(VMCS_GUEST_CS_BASE);
 		vmexit->u.inst_emul.cs_d = 0;
 		break;
 	case CPU_MODE_PROTECTED:
 	case CPU_MODE_COMPATIBILITY:
 		vmexit->u.inst_emul.cs_base = vmcs_read(VMCS_GUEST_CS_BASE);
 		csar = vmcs_read(VMCS_GUEST_CS_ACCESS_RIGHTS);
 		vmexit->u.inst_emul.cs_d = SEG_DESC_DEF32(csar);
 		break;
 	default:
 		vmexit->u.inst_emul.cs_base = 0;
 		vmexit->u.inst_emul.cs_d = 0;
 		break;
 	}
 	vie_init(&vmexit->u.inst_emul.vie, NULL, 0);
 }
 
 static int
 ept_fault_type(uint64_t ept_qual)
 {
 	int fault_type;
 
 	if (ept_qual & EPT_VIOLATION_DATA_WRITE)
 		fault_type = VM_PROT_WRITE;
 	else if (ept_qual & EPT_VIOLATION_INST_FETCH)
 		fault_type = VM_PROT_EXECUTE;
 	else
 		fault_type= VM_PROT_READ;
 
 	return (fault_type);
 }
 
 static bool
 ept_emulation_fault(uint64_t ept_qual)
 {
 	int read, write;
 
 	/* EPT fault on an instruction fetch doesn't make sense here */
 	if (ept_qual & EPT_VIOLATION_INST_FETCH)
 		return (false);
 
 	/* EPT fault must be a read fault or a write fault */
 	read = ept_qual & EPT_VIOLATION_DATA_READ ? 1 : 0;
 	write = ept_qual & EPT_VIOLATION_DATA_WRITE ? 1 : 0;
 	if ((read | write) == 0)
 		return (false);
 
 	/*
 	 * The EPT violation must have been caused by accessing a
 	 * guest-physical address that is a translation of a guest-linear
 	 * address.
 	 */
 	if ((ept_qual & EPT_VIOLATION_GLA_VALID) == 0 ||
 	    (ept_qual & EPT_VIOLATION_XLAT_VALID) == 0) {
 		return (false);
 	}
 
 	return (true);
 }
 
 static __inline int
 apic_access_virtualization(struct vmx *vmx, int vcpuid)
 {
 	uint32_t proc_ctls2;
 
 	proc_ctls2 = vmx->cap[vcpuid].proc_ctls2;
 	return ((proc_ctls2 & PROCBASED2_VIRTUALIZE_APIC_ACCESSES) ? 1 : 0);
 }
 
 static __inline int
 x2apic_virtualization(struct vmx *vmx, int vcpuid)
 {
 	uint32_t proc_ctls2;
 
 	proc_ctls2 = vmx->cap[vcpuid].proc_ctls2;
 	return ((proc_ctls2 & PROCBASED2_VIRTUALIZE_X2APIC_MODE) ? 1 : 0);
 }
 
 static int
 vmx_handle_apic_write(struct vmx *vmx, int vcpuid, struct vlapic *vlapic,
     uint64_t qual)
 {
 	int error, handled, offset;
 	uint32_t *apic_regs, vector;
 	bool retu;
 
 	handled = HANDLED;
 	offset = APIC_WRITE_OFFSET(qual);
 
 	if (!apic_access_virtualization(vmx, vcpuid)) {
 		/*
 		 * In general there should not be any APIC write VM-exits
 		 * unless APIC-access virtualization is enabled.
 		 *
 		 * However self-IPI virtualization can legitimately trigger
 		 * an APIC-write VM-exit so treat it specially.
 		 */
 		if (x2apic_virtualization(vmx, vcpuid) &&
 		    offset == APIC_OFFSET_SELF_IPI) {
 			apic_regs = (uint32_t *)(vlapic->apic_page);
 			vector = apic_regs[APIC_OFFSET_SELF_IPI / 4];
 			vlapic_self_ipi_handler(vlapic, vector);
 			return (HANDLED);
 		} else
 			return (UNHANDLED);
 	}
 
 	switch (offset) {
 	case APIC_OFFSET_ID:
 		vlapic_id_write_handler(vlapic);
 		break;
 	case APIC_OFFSET_LDR:
 		vlapic_ldr_write_handler(vlapic);
 		break;
 	case APIC_OFFSET_DFR:
 		vlapic_dfr_write_handler(vlapic);
 		break;
 	case APIC_OFFSET_SVR:
 		vlapic_svr_write_handler(vlapic);
 		break;
 	case APIC_OFFSET_ESR:
 		vlapic_esr_write_handler(vlapic);
 		break;
 	case APIC_OFFSET_ICR_LOW:
 		retu = false;
 		error = vlapic_icrlo_write_handler(vlapic, &retu);
 		if (error != 0 || retu)
 			handled = UNHANDLED;
 		break;
 	case APIC_OFFSET_CMCI_LVT:
 	case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT:
 		vlapic_lvt_write_handler(vlapic, offset);
 		break;
 	case APIC_OFFSET_TIMER_ICR:
 		vlapic_icrtmr_write_handler(vlapic);
 		break;
 	case APIC_OFFSET_TIMER_DCR:
 		vlapic_dcr_write_handler(vlapic);
 		break;
 	default:
 		handled = UNHANDLED;
 		break;
 	}
 	return (handled);
 }
 
 static bool
 apic_access_fault(struct vmx *vmx, int vcpuid, uint64_t gpa)
 {
 
 	if (apic_access_virtualization(vmx, vcpuid) &&
 	    (gpa >= DEFAULT_APIC_BASE && gpa < DEFAULT_APIC_BASE + PAGE_SIZE))
 		return (true);
 	else
 		return (false);
 }
 
 static int
 vmx_handle_apic_access(struct vmx *vmx, int vcpuid, struct vm_exit *vmexit)
 {
 	uint64_t qual;
 	int access_type, offset, allowed;
 
 	if (!apic_access_virtualization(vmx, vcpuid))
 		return (UNHANDLED);
 
 	qual = vmexit->u.vmx.exit_qualification;
 	access_type = APIC_ACCESS_TYPE(qual);
 	offset = APIC_ACCESS_OFFSET(qual);
 
 	allowed = 0;
 	if (access_type == 0) {
 		/*
 		 * Read data access to the following registers is expected.
 		 */
 		switch (offset) {
 		case APIC_OFFSET_APR:
 		case APIC_OFFSET_PPR:
 		case APIC_OFFSET_RRR:
 		case APIC_OFFSET_CMCI_LVT:
 		case APIC_OFFSET_TIMER_CCR:
 			allowed = 1;
 			break;
 		default:
 			break;
 		}
 	} else if (access_type == 1) {
 		/*
 		 * Write data access to the following registers is expected.
 		 */
 		switch (offset) {
 		case APIC_OFFSET_VER:
 		case APIC_OFFSET_APR:
 		case APIC_OFFSET_PPR:
 		case APIC_OFFSET_RRR:
 		case APIC_OFFSET_ISR0 ... APIC_OFFSET_ISR7:
 		case APIC_OFFSET_TMR0 ... APIC_OFFSET_TMR7:
 		case APIC_OFFSET_IRR0 ... APIC_OFFSET_IRR7:
 		case APIC_OFFSET_CMCI_LVT:
 		case APIC_OFFSET_TIMER_CCR:
 			allowed = 1;
 			break;
 		default:
 			break;
 		}
 	}
 
 	if (allowed) {
 		vmexit_inst_emul(vmexit, DEFAULT_APIC_BASE + offset,
 		    VIE_INVALID_GLA);
 	}
 
 	/*
 	 * Regardless of whether the APIC-access is allowed this handler
 	 * always returns UNHANDLED:
 	 * - if the access is allowed then it is handled by emulating the
 	 *   instruction that caused the VM-exit (outside the critical section)
 	 * - if the access is not allowed then it will be converted to an
 	 *   exitcode of VM_EXITCODE_VMX and will be dealt with in userland.
 	 */
 	return (UNHANDLED);
 }
 
 static enum task_switch_reason
 vmx_task_switch_reason(uint64_t qual)
 {
 	int reason;
 
 	reason = (qual >> 30) & 0x3;
 	switch (reason) {
 	case 0:
 		return (TSR_CALL);
 	case 1:
 		return (TSR_IRET);
 	case 2:
 		return (TSR_JMP);
 	case 3:
 		return (TSR_IDT_GATE);
 	default:
 		panic("%s: invalid reason %d", __func__, reason);
 	}
 }
 
 static int
 emulate_wrmsr(struct vmx *vmx, int vcpuid, u_int num, uint64_t val, bool *retu)
 {
 	int error;
 
 	if (lapic_msr(num))
 		error = lapic_wrmsr(vmx->vm, vcpuid, num, val, retu);
 	else
 		error = vmx_wrmsr(vmx, vcpuid, num, val, retu);
 
 	return (error);
 }
 
 static int
 emulate_rdmsr(struct vmx *vmx, int vcpuid, u_int num, bool *retu)
 {
 	struct vmxctx *vmxctx;
 	uint64_t result;
 	uint32_t eax, edx;
 	int error;
 
 	if (lapic_msr(num))
 		error = lapic_rdmsr(vmx->vm, vcpuid, num, &result, retu);
 	else
 		error = vmx_rdmsr(vmx, vcpuid, num, &result, retu);
 
 	if (error == 0) {
 		eax = result;
 		vmxctx = &vmx->ctx[vcpuid];
 		error = vmxctx_setreg(vmxctx, VM_REG_GUEST_RAX, eax);
 		KASSERT(error == 0, ("vmxctx_setreg(rax) error %d", error));
 
 		edx = result >> 32;
 		error = vmxctx_setreg(vmxctx, VM_REG_GUEST_RDX, edx);
 		KASSERT(error == 0, ("vmxctx_setreg(rdx) error %d", error));
 	}
 
 	return (error);
 }
 
 static int
 vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit)
 {
 	int error, errcode, errcode_valid, handled, in;
 	struct vmxctx *vmxctx;
 	struct vlapic *vlapic;
 	struct vm_inout_str *vis;
 	struct vm_task_switch *ts;
 	uint32_t eax, ecx, edx, idtvec_info, idtvec_err, intr_info, inst_info;
 	uint32_t intr_type, intr_vec, reason;
 	uint64_t exitintinfo, qual, gpa;
 	bool retu;
 
 	CTASSERT((PINBASED_CTLS_ONE_SETTING & PINBASED_VIRTUAL_NMI) != 0);
 	CTASSERT((PINBASED_CTLS_ONE_SETTING & PINBASED_NMI_EXITING) != 0);
 
 	handled = UNHANDLED;
 	vmxctx = &vmx->ctx[vcpu];
 
 	qual = vmexit->u.vmx.exit_qualification;
 	reason = vmexit->u.vmx.exit_reason;
 	vmexit->exitcode = VM_EXITCODE_BOGUS;
 
 	vmm_stat_incr(vmx->vm, vcpu, VMEXIT_COUNT, 1);
 	SDT_PROBE3(vmm, vmx, exit, entry, vmx, vcpu, vmexit);
 
 	/*
 	 * VM-entry failures during or after loading guest state.
 	 *
 	 * These VM-exits are uncommon but must be handled specially
 	 * as most VM-exit fields are not populated as usual.
 	 */
 	if (__predict_false(reason == EXIT_REASON_MCE_DURING_ENTRY)) {
 		VCPU_CTR0(vmx->vm, vcpu, "Handling MCE during VM-entry");
 		__asm __volatile("int $18");
 		return (1);
 	}
 
 	/*
 	 * VM exits that can be triggered during event delivery need to
 	 * be handled specially by re-injecting the event if the IDT
 	 * vectoring information field's valid bit is set.
 	 *
 	 * See "Information for VM Exits During Event Delivery" in Intel SDM
 	 * for details.
 	 */
 	idtvec_info = vmcs_idt_vectoring_info();
 	if (idtvec_info & VMCS_IDT_VEC_VALID) {
 		idtvec_info &= ~(1 << 12); /* clear undefined bit */
 		exitintinfo = idtvec_info;
 		if (idtvec_info & VMCS_IDT_VEC_ERRCODE_VALID) {
 			idtvec_err = vmcs_idt_vectoring_err();
 			exitintinfo |= (uint64_t)idtvec_err << 32;
 		}
 		error = vm_exit_intinfo(vmx->vm, vcpu, exitintinfo);
 		KASSERT(error == 0, ("%s: vm_set_intinfo error %d",
 		    __func__, error));
 
 		/*
 		 * If 'virtual NMIs' are being used and the VM-exit
 		 * happened while injecting an NMI during the previous
 		 * VM-entry, then clear "blocking by NMI" in the
 		 * Guest Interruptibility-State so the NMI can be
 		 * reinjected on the subsequent VM-entry.
 		 *
 		 * However, if the NMI was being delivered through a task
 		 * gate, then the new task must start execution with NMIs
 		 * blocked so don't clear NMI blocking in this case.
 		 */
 		intr_type = idtvec_info & VMCS_INTR_T_MASK;
 		if (intr_type == VMCS_INTR_T_NMI) {
 			if (reason != EXIT_REASON_TASK_SWITCH)
 				vmx_clear_nmi_blocking(vmx, vcpu);
 			else
 				vmx_assert_nmi_blocking(vmx, vcpu);
 		}
 
 		/*
 		 * Update VM-entry instruction length if the event being
 		 * delivered was a software interrupt or software exception.
 		 */
 		if (intr_type == VMCS_INTR_T_SWINTR ||
 		    intr_type == VMCS_INTR_T_PRIV_SWEXCEPTION ||
 		    intr_type == VMCS_INTR_T_SWEXCEPTION) {
 			vmcs_write(VMCS_ENTRY_INST_LENGTH, vmexit->inst_length);
 		}
 	}
 
 	switch (reason) {
 	case EXIT_REASON_TASK_SWITCH:
 		ts = &vmexit->u.task_switch;
 		ts->tsssel = qual & 0xffff;
 		ts->reason = vmx_task_switch_reason(qual);
 		ts->ext = 0;
 		ts->errcode_valid = 0;
 		vmx_paging_info(&ts->paging);
 		/*
 		 * If the task switch was due to a CALL, JMP, IRET, software
 		 * interrupt (INT n) or software exception (INT3, INTO),
 		 * then the saved %rip references the instruction that caused
 		 * the task switch. The instruction length field in the VMCS
 		 * is valid in this case.
 		 *
 		 * In all other cases (e.g., NMI, hardware exception) the
 		 * saved %rip is one that would have been saved in the old TSS
 		 * had the task switch completed normally so the instruction
 		 * length field is not needed in this case and is explicitly
 		 * set to 0.
 		 */
 		if (ts->reason == TSR_IDT_GATE) {
 			KASSERT(idtvec_info & VMCS_IDT_VEC_VALID,
 			    ("invalid idtvec_info %#x for IDT task switch",
 			    idtvec_info));
 			intr_type = idtvec_info & VMCS_INTR_T_MASK;
 			if (intr_type != VMCS_INTR_T_SWINTR &&
 			    intr_type != VMCS_INTR_T_SWEXCEPTION &&
 			    intr_type != VMCS_INTR_T_PRIV_SWEXCEPTION) {
 				/* Task switch triggered by external event */
 				ts->ext = 1;
 				vmexit->inst_length = 0;
 				if (idtvec_info & VMCS_IDT_VEC_ERRCODE_VALID) {
 					ts->errcode_valid = 1;
 					ts->errcode = vmcs_idt_vectoring_err();
 				}
 			}
 		}
 		vmexit->exitcode = VM_EXITCODE_TASK_SWITCH;
 		SDT_PROBE4(vmm, vmx, exit, taskswitch, vmx, vcpu, vmexit, ts);
 		VCPU_CTR4(vmx->vm, vcpu, "task switch reason %d, tss 0x%04x, "
 		    "%s errcode 0x%016lx", ts->reason, ts->tsssel,
 		    ts->ext ? "external" : "internal",
 		    ((uint64_t)ts->errcode << 32) | ts->errcode_valid);
 		break;
 	case EXIT_REASON_CR_ACCESS:
 		vmm_stat_incr(vmx->vm, vcpu, VMEXIT_CR_ACCESS, 1);
 		SDT_PROBE4(vmm, vmx, exit, craccess, vmx, vcpu, vmexit, qual);
 		switch (qual & 0xf) {
 		case 0:
 			handled = vmx_emulate_cr0_access(vmx, vcpu, qual);
 			break;
 		case 4:
 			handled = vmx_emulate_cr4_access(vmx, vcpu, qual);
 			break;
 		case 8:
 			handled = vmx_emulate_cr8_access(vmx, vcpu, qual);
 			break;
 		}
 		break;
 	case EXIT_REASON_RDMSR:
 		vmm_stat_incr(vmx->vm, vcpu, VMEXIT_RDMSR, 1);
 		retu = false;
 		ecx = vmxctx->guest_rcx;
 		VCPU_CTR1(vmx->vm, vcpu, "rdmsr 0x%08x", ecx);
 		SDT_PROBE4(vmm, vmx, exit, rdmsr, vmx, vcpu, vmexit, ecx);
 		error = emulate_rdmsr(vmx, vcpu, ecx, &retu);
 		if (error) {
 			vmexit->exitcode = VM_EXITCODE_RDMSR;
 			vmexit->u.msr.code = ecx;
 		} else if (!retu) {
 			handled = HANDLED;
 		} else {
 			/* Return to userspace with a valid exitcode */
 			KASSERT(vmexit->exitcode != VM_EXITCODE_BOGUS,
 			    ("emulate_rdmsr retu with bogus exitcode"));
 		}
 		break;
 	case EXIT_REASON_WRMSR:
 		vmm_stat_incr(vmx->vm, vcpu, VMEXIT_WRMSR, 1);
 		retu = false;
 		eax = vmxctx->guest_rax;
 		ecx = vmxctx->guest_rcx;
 		edx = vmxctx->guest_rdx;
 		VCPU_CTR2(vmx->vm, vcpu, "wrmsr 0x%08x value 0x%016lx",
 		    ecx, (uint64_t)edx << 32 | eax);
 		SDT_PROBE5(vmm, vmx, exit, wrmsr, vmx, vmexit, vcpu, ecx,
 		    (uint64_t)edx << 32 | eax);
 		error = emulate_wrmsr(vmx, vcpu, ecx,
 		    (uint64_t)edx << 32 | eax, &retu);
 		if (error) {
 			vmexit->exitcode = VM_EXITCODE_WRMSR;
 			vmexit->u.msr.code = ecx;
 			vmexit->u.msr.wval = (uint64_t)edx << 32 | eax;
 		} else if (!retu) {
 			handled = HANDLED;
 		} else {
 			/* Return to userspace with a valid exitcode */
 			KASSERT(vmexit->exitcode != VM_EXITCODE_BOGUS,
 			    ("emulate_wrmsr retu with bogus exitcode"));
 		}
 		break;
 	case EXIT_REASON_HLT:
 		vmm_stat_incr(vmx->vm, vcpu, VMEXIT_HLT, 1);
 		SDT_PROBE3(vmm, vmx, exit, halt, vmx, vcpu, vmexit);
 		vmexit->exitcode = VM_EXITCODE_HLT;
 		vmexit->u.hlt.rflags = vmcs_read(VMCS_GUEST_RFLAGS);
 		if (virtual_interrupt_delivery)
 			vmexit->u.hlt.intr_status =
 			    vmcs_read(VMCS_GUEST_INTR_STATUS);
 		else
 			vmexit->u.hlt.intr_status = 0;
 		break;
 	case EXIT_REASON_MTF:
 		vmm_stat_incr(vmx->vm, vcpu, VMEXIT_MTRAP, 1);
 		SDT_PROBE3(vmm, vmx, exit, mtrap, vmx, vcpu, vmexit);
 		vmexit->exitcode = VM_EXITCODE_MTRAP;
 		vmexit->inst_length = 0;
 		break;
 	case EXIT_REASON_PAUSE:
 		vmm_stat_incr(vmx->vm, vcpu, VMEXIT_PAUSE, 1);
 		SDT_PROBE3(vmm, vmx, exit, pause, vmx, vcpu, vmexit);
 		vmexit->exitcode = VM_EXITCODE_PAUSE;
 		break;
 	case EXIT_REASON_INTR_WINDOW:
 		vmm_stat_incr(vmx->vm, vcpu, VMEXIT_INTR_WINDOW, 1);
 		SDT_PROBE3(vmm, vmx, exit, intrwindow, vmx, vcpu, vmexit);
 		vmx_clear_int_window_exiting(vmx, vcpu);
 		return (1);
 	case EXIT_REASON_EXT_INTR:
 		/*
 		 * External interrupts serve only to cause VM exits and allow
 		 * the host interrupt handler to run.
 		 *
 		 * If this external interrupt triggers a virtual interrupt
 		 * to a VM, then that state will be recorded by the
 		 * host interrupt handler in the VM's softc. We will inject
 		 * this virtual interrupt during the subsequent VM enter.
 		 */
 		intr_info = vmcs_read(VMCS_EXIT_INTR_INFO);
 		SDT_PROBE4(vmm, vmx, exit, interrupt,
 		    vmx, vcpu, vmexit, intr_info);
 
 		/*
 		 * XXX: Ignore this exit if VMCS_INTR_VALID is not set.
 		 * This appears to be a bug in VMware Fusion?
 		 */
 		if (!(intr_info & VMCS_INTR_VALID))
 			return (1);
 		KASSERT((intr_info & VMCS_INTR_VALID) != 0 &&
 		    (intr_info & VMCS_INTR_T_MASK) == VMCS_INTR_T_HWINTR,
 		    ("VM exit interruption info invalid: %#x", intr_info));
 		vmx_trigger_hostintr(intr_info & 0xff);
 
 		/*
 		 * This is special. We want to treat this as an 'handled'
 		 * VM-exit but not increment the instruction pointer.
 		 */
 		vmm_stat_incr(vmx->vm, vcpu, VMEXIT_EXTINT, 1);
 		return (1);
 	case EXIT_REASON_NMI_WINDOW:
 		SDT_PROBE3(vmm, vmx, exit, nmiwindow, vmx, vcpu, vmexit);
 		/* Exit to allow the pending virtual NMI to be injected */
 		if (vm_nmi_pending(vmx->vm, vcpu))
 			vmx_inject_nmi(vmx, vcpu);
 		vmx_clear_nmi_window_exiting(vmx, vcpu);
 		vmm_stat_incr(vmx->vm, vcpu, VMEXIT_NMI_WINDOW, 1);
 		return (1);
 	case EXIT_REASON_INOUT:
 		vmm_stat_incr(vmx->vm, vcpu, VMEXIT_INOUT, 1);
 		vmexit->exitcode = VM_EXITCODE_INOUT;
 		vmexit->u.inout.bytes = (qual & 0x7) + 1;
 		vmexit->u.inout.in = in = (qual & 0x8) ? 1 : 0;
 		vmexit->u.inout.string = (qual & 0x10) ? 1 : 0;
 		vmexit->u.inout.rep = (qual & 0x20) ? 1 : 0;
 		vmexit->u.inout.port = (uint16_t)(qual >> 16);
 		vmexit->u.inout.eax = (uint32_t)(vmxctx->guest_rax);
 		if (vmexit->u.inout.string) {
 			inst_info = vmcs_read(VMCS_EXIT_INSTRUCTION_INFO);
 			vmexit->exitcode = VM_EXITCODE_INOUT_STR;
 			vis = &vmexit->u.inout_str;
 			vmx_paging_info(&vis->paging);
 			vis->rflags = vmcs_read(VMCS_GUEST_RFLAGS);
 			vis->cr0 = vmcs_read(VMCS_GUEST_CR0);
 			vis->index = inout_str_index(vmx, vcpu, in);
 			vis->count = inout_str_count(vmx, vcpu, vis->inout.rep);
 			vis->addrsize = inout_str_addrsize(inst_info);
 			inout_str_seginfo(vmx, vcpu, inst_info, in, vis);
 		}
 		SDT_PROBE3(vmm, vmx, exit, inout, vmx, vcpu, vmexit);
 		break;
 	case EXIT_REASON_CPUID:
 		vmm_stat_incr(vmx->vm, vcpu, VMEXIT_CPUID, 1);
 		SDT_PROBE3(vmm, vmx, exit, cpuid, vmx, vcpu, vmexit);
 		handled = vmx_handle_cpuid(vmx->vm, vcpu, vmxctx);
 		break;
 	case EXIT_REASON_EXCEPTION:
 		vmm_stat_incr(vmx->vm, vcpu, VMEXIT_EXCEPTION, 1);
 		intr_info = vmcs_read(VMCS_EXIT_INTR_INFO);
 		KASSERT((intr_info & VMCS_INTR_VALID) != 0,
 		    ("VM exit interruption info invalid: %#x", intr_info));
 
 		intr_vec = intr_info & 0xff;
 		intr_type = intr_info & VMCS_INTR_T_MASK;
 
 		/*
 		 * If Virtual NMIs control is 1 and the VM-exit is due to a
 		 * fault encountered during the execution of IRET then we must
 		 * restore the state of "virtual-NMI blocking" before resuming
 		 * the guest.
 		 *
 		 * See "Resuming Guest Software after Handling an Exception".
 		 * See "Information for VM Exits Due to Vectored Events".
 		 */
 		if ((idtvec_info & VMCS_IDT_VEC_VALID) == 0 &&
 		    (intr_vec != IDT_DF) &&
 		    (intr_info & EXIT_QUAL_NMIUDTI) != 0)
 			vmx_restore_nmi_blocking(vmx, vcpu);
 
 		/*
 		 * The NMI has already been handled in vmx_exit_handle_nmi().
 		 */
 		if (intr_type == VMCS_INTR_T_NMI)
 			return (1);
 
 		/*
 		 * Call the machine check handler by hand. Also don't reflect
 		 * the machine check back into the guest.
 		 */
 		if (intr_vec == IDT_MC) {
 			VCPU_CTR0(vmx->vm, vcpu, "Vectoring to MCE handler");
 			__asm __volatile("int $18");
 			return (1);
 		}
 
 		/*
 		 * If the hypervisor has requested user exits for
 		 * debug exceptions, bounce them out to userland.
 		 */
 		if (intr_type == VMCS_INTR_T_SWEXCEPTION && intr_vec == IDT_BP &&
 		    (vmx->cap[vcpu].set & (1 << VM_CAP_BPT_EXIT))) {
 			vmexit->exitcode = VM_EXITCODE_BPT;
 			vmexit->u.bpt.inst_length = vmexit->inst_length;
 			vmexit->inst_length = 0;
 			break;
 		}
 
 		if (intr_vec == IDT_PF) {
 			error = vmxctx_setreg(vmxctx, VM_REG_GUEST_CR2, qual);
 			KASSERT(error == 0, ("%s: vmxctx_setreg(cr2) error %d",
 			    __func__, error));
 		}
 
 		/*
 		 * Software exceptions exhibit trap-like behavior. This in
 		 * turn requires populating the VM-entry instruction length
 		 * so that the %rip in the trap frame is past the INT3/INTO
 		 * instruction.
 		 */
 		if (intr_type == VMCS_INTR_T_SWEXCEPTION)
 			vmcs_write(VMCS_ENTRY_INST_LENGTH, vmexit->inst_length);
 
 		/* Reflect all other exceptions back into the guest */
 		errcode_valid = errcode = 0;
 		if (intr_info & VMCS_INTR_DEL_ERRCODE) {
 			errcode_valid = 1;
 			errcode = vmcs_read(VMCS_EXIT_INTR_ERRCODE);
 		}
 		VCPU_CTR2(vmx->vm, vcpu, "Reflecting exception %d/%#x into "
 		    "the guest", intr_vec, errcode);
 		SDT_PROBE5(vmm, vmx, exit, exception,
 		    vmx, vcpu, vmexit, intr_vec, errcode);
 		error = vm_inject_exception(vmx->vm, vcpu, intr_vec,
 		    errcode_valid, errcode, 0);
 		KASSERT(error == 0, ("%s: vm_inject_exception error %d",
 		    __func__, error));
 		return (1);
 
 	case EXIT_REASON_EPT_FAULT:
 		/*
 		 * If 'gpa' lies within the address space allocated to
 		 * memory then this must be a nested page fault otherwise
 		 * this must be an instruction that accesses MMIO space.
 		 */
 		gpa = vmcs_gpa();
 		if (vm_mem_allocated(vmx->vm, vcpu, gpa) ||
 		    apic_access_fault(vmx, vcpu, gpa)) {
 			vmexit->exitcode = VM_EXITCODE_PAGING;
 			vmexit->inst_length = 0;
 			vmexit->u.paging.gpa = gpa;
 			vmexit->u.paging.fault_type = ept_fault_type(qual);
 			vmm_stat_incr(vmx->vm, vcpu, VMEXIT_NESTED_FAULT, 1);
 			SDT_PROBE5(vmm, vmx, exit, nestedfault,
 			    vmx, vcpu, vmexit, gpa, qual);
 		} else if (ept_emulation_fault(qual)) {
 			vmexit_inst_emul(vmexit, gpa, vmcs_gla());
 			vmm_stat_incr(vmx->vm, vcpu, VMEXIT_INST_EMUL, 1);
 			SDT_PROBE4(vmm, vmx, exit, mmiofault,
 			    vmx, vcpu, vmexit, gpa);
 		}
 		/*
 		 * If Virtual NMIs control is 1 and the VM-exit is due to an
 		 * EPT fault during the execution of IRET then we must restore
 		 * the state of "virtual-NMI blocking" before resuming.
 		 *
 		 * See description of "NMI unblocking due to IRET" in
 		 * "Exit Qualification for EPT Violations".
 		 */
 		if ((idtvec_info & VMCS_IDT_VEC_VALID) == 0 &&
 		    (qual & EXIT_QUAL_NMIUDTI) != 0)
 			vmx_restore_nmi_blocking(vmx, vcpu);
 		break;
 	case EXIT_REASON_VIRTUALIZED_EOI:
 		vmexit->exitcode = VM_EXITCODE_IOAPIC_EOI;
 		vmexit->u.ioapic_eoi.vector = qual & 0xFF;
 		SDT_PROBE3(vmm, vmx, exit, eoi, vmx, vcpu, vmexit);
 		vmexit->inst_length = 0;	/* trap-like */
 		break;
 	case EXIT_REASON_APIC_ACCESS:
 		SDT_PROBE3(vmm, vmx, exit, apicaccess, vmx, vcpu, vmexit);
 		handled = vmx_handle_apic_access(vmx, vcpu, vmexit);
 		break;
 	case EXIT_REASON_APIC_WRITE:
 		/*
 		 * APIC-write VM exit is trap-like so the %rip is already
 		 * pointing to the next instruction.
 		 */
 		vmexit->inst_length = 0;
 		vlapic = vm_lapic(vmx->vm, vcpu);
 		SDT_PROBE4(vmm, vmx, exit, apicwrite,
 		    vmx, vcpu, vmexit, vlapic);
 		handled = vmx_handle_apic_write(vmx, vcpu, vlapic, qual);
 		break;
 	case EXIT_REASON_XSETBV:
 		SDT_PROBE3(vmm, vmx, exit, xsetbv, vmx, vcpu, vmexit);
 		handled = vmx_emulate_xsetbv(vmx, vcpu, vmexit);
 		break;
 	case EXIT_REASON_MONITOR:
 		SDT_PROBE3(vmm, vmx, exit, monitor, vmx, vcpu, vmexit);
 		vmexit->exitcode = VM_EXITCODE_MONITOR;
 		break;
 	case EXIT_REASON_MWAIT:
 		SDT_PROBE3(vmm, vmx, exit, mwait, vmx, vcpu, vmexit);
 		vmexit->exitcode = VM_EXITCODE_MWAIT;
 		break;
 	case EXIT_REASON_TPR:
 		vlapic = vm_lapic(vmx->vm, vcpu);
 		vlapic_sync_tpr(vlapic);
 		vmexit->inst_length = 0;
 		handled = HANDLED;
 		break;
 	case EXIT_REASON_VMCALL:
 	case EXIT_REASON_VMCLEAR:
 	case EXIT_REASON_VMLAUNCH:
 	case EXIT_REASON_VMPTRLD:
 	case EXIT_REASON_VMPTRST:
 	case EXIT_REASON_VMREAD:
 	case EXIT_REASON_VMRESUME:
 	case EXIT_REASON_VMWRITE:
 	case EXIT_REASON_VMXOFF:
 	case EXIT_REASON_VMXON:
 		SDT_PROBE3(vmm, vmx, exit, vminsn, vmx, vcpu, vmexit);
 		vmexit->exitcode = VM_EXITCODE_VMINSN;
 		break;
 	default:
 		SDT_PROBE4(vmm, vmx, exit, unknown,
 		    vmx, vcpu, vmexit, reason);
 		vmm_stat_incr(vmx->vm, vcpu, VMEXIT_UNKNOWN, 1);
 		break;
 	}
 
 	if (handled) {
 		/*
 		 * It is possible that control is returned to userland
 		 * even though we were able to handle the VM exit in the
 		 * kernel.
 		 *
 		 * In such a case we want to make sure that the userland
 		 * restarts guest execution at the instruction *after*
 		 * the one we just processed. Therefore we update the
 		 * guest rip in the VMCS and in 'vmexit'.
 		 */
 		vmexit->rip += vmexit->inst_length;
 		vmexit->inst_length = 0;
 		vmcs_write(VMCS_GUEST_RIP, vmexit->rip);
 	} else {
 		if (vmexit->exitcode == VM_EXITCODE_BOGUS) {
 			/*
 			 * If this VM exit was not claimed by anybody then
 			 * treat it as a generic VMX exit.
 			 */
 			vmexit->exitcode = VM_EXITCODE_VMX;
 			vmexit->u.vmx.status = VM_SUCCESS;
 			vmexit->u.vmx.inst_type = 0;
 			vmexit->u.vmx.inst_error = 0;
 		} else {
 			/*
 			 * The exitcode and collateral have been populated.
 			 * The VM exit will be processed further in userland.
 			 */
 		}
 	}
 
 	SDT_PROBE4(vmm, vmx, exit, return,
 	    vmx, vcpu, vmexit, handled);
 	return (handled);
 }
 
 static __inline void
 vmx_exit_inst_error(struct vmxctx *vmxctx, int rc, struct vm_exit *vmexit)
 {
 
 	KASSERT(vmxctx->inst_fail_status != VM_SUCCESS,
 	    ("vmx_exit_inst_error: invalid inst_fail_status %d",
 	    vmxctx->inst_fail_status));
 
 	vmexit->inst_length = 0;
 	vmexit->exitcode = VM_EXITCODE_VMX;
 	vmexit->u.vmx.status = vmxctx->inst_fail_status;
 	vmexit->u.vmx.inst_error = vmcs_instruction_error();
 	vmexit->u.vmx.exit_reason = ~0;
 	vmexit->u.vmx.exit_qualification = ~0;
 
 	switch (rc) {
 	case VMX_VMRESUME_ERROR:
 	case VMX_VMLAUNCH_ERROR:
 		vmexit->u.vmx.inst_type = rc;
 		break;
 	default:
 		panic("vm_exit_inst_error: vmx_enter_guest returned %d", rc);
 	}
 }
 
 /*
  * If the NMI-exiting VM execution control is set to '1' then an NMI in
  * non-root operation causes a VM-exit. NMI blocking is in effect so it is
  * sufficient to simply vector to the NMI handler via a software interrupt.
  * However, this must be done before maskable interrupts are enabled
  * otherwise the "iret" issued by an interrupt handler will incorrectly
  * clear NMI blocking.
  */
 static __inline void
 vmx_exit_handle_nmi(struct vmx *vmx, int vcpuid, struct vm_exit *vmexit)
 {
 	uint32_t intr_info;
 
 	KASSERT((read_rflags() & PSL_I) == 0, ("interrupts enabled"));
 
 	if (vmexit->u.vmx.exit_reason != EXIT_REASON_EXCEPTION)
 		return;
 
 	intr_info = vmcs_read(VMCS_EXIT_INTR_INFO);
 	KASSERT((intr_info & VMCS_INTR_VALID) != 0,
 	    ("VM exit interruption info invalid: %#x", intr_info));
 
 	if ((intr_info & VMCS_INTR_T_MASK) == VMCS_INTR_T_NMI) {
 		KASSERT((intr_info & 0xff) == IDT_NMI, ("VM exit due "
 		    "to NMI has invalid vector: %#x", intr_info));
 		VCPU_CTR0(vmx->vm, vcpuid, "Vectoring to NMI handler");
 		__asm __volatile("int $2");
 	}
 }
 
 static __inline void
 vmx_dr_enter_guest(struct vmxctx *vmxctx)
 {
 	register_t rflags;
 
 	/* Save host control debug registers. */
 	vmxctx->host_dr7 = rdr7();
 	vmxctx->host_debugctl = rdmsr(MSR_DEBUGCTLMSR);
 
 	/*
 	 * Disable debugging in DR7 and DEBUGCTL to avoid triggering
 	 * exceptions in the host based on the guest DRx values.  The
 	 * guest DR7 and DEBUGCTL are saved/restored in the VMCS.
 	 */
 	load_dr7(0);
 	wrmsr(MSR_DEBUGCTLMSR, 0);
 
 	/*
 	 * Disable single stepping the kernel to avoid corrupting the
 	 * guest DR6.  A debugger might still be able to corrupt the
 	 * guest DR6 by setting a breakpoint after this point and then
 	 * single stepping.
 	 */
 	rflags = read_rflags();
 	vmxctx->host_tf = rflags & PSL_T;
 	write_rflags(rflags & ~PSL_T);
 
 	/* Save host debug registers. */
 	vmxctx->host_dr0 = rdr0();
 	vmxctx->host_dr1 = rdr1();
 	vmxctx->host_dr2 = rdr2();
 	vmxctx->host_dr3 = rdr3();
 	vmxctx->host_dr6 = rdr6();
 
 	/* Restore guest debug registers. */
 	load_dr0(vmxctx->guest_dr0);
 	load_dr1(vmxctx->guest_dr1);
 	load_dr2(vmxctx->guest_dr2);
 	load_dr3(vmxctx->guest_dr3);
 	load_dr6(vmxctx->guest_dr6);
 }
 
 static __inline void
 vmx_dr_leave_guest(struct vmxctx *vmxctx)
 {
 
 	/* Save guest debug registers. */
 	vmxctx->guest_dr0 = rdr0();
 	vmxctx->guest_dr1 = rdr1();
 	vmxctx->guest_dr2 = rdr2();
 	vmxctx->guest_dr3 = rdr3();
 	vmxctx->guest_dr6 = rdr6();
 
 	/*
 	 * Restore host debug registers.  Restore DR7, DEBUGCTL, and
 	 * PSL_T last.
 	 */
 	load_dr0(vmxctx->host_dr0);
 	load_dr1(vmxctx->host_dr1);
 	load_dr2(vmxctx->host_dr2);
 	load_dr3(vmxctx->host_dr3);
 	load_dr6(vmxctx->host_dr6);
 	wrmsr(MSR_DEBUGCTLMSR, vmxctx->host_debugctl);
 	load_dr7(vmxctx->host_dr7);
 	write_rflags(read_rflags() | vmxctx->host_tf);
 }
 
 static __inline void
 vmx_pmap_activate(struct vmx *vmx, pmap_t pmap)
 {
 	long eptgen;
 	int cpu;
 
 	cpu = curcpu;
 
 	CPU_SET_ATOMIC(cpu, &pmap->pm_active);
 	smr_enter(pmap->pm_eptsmr);
 	eptgen = atomic_load_long(&pmap->pm_eptgen);
 	if (eptgen != vmx->eptgen[cpu]) {
 		vmx->eptgen[cpu] = eptgen;
 		invept(INVEPT_TYPE_SINGLE_CONTEXT,
 		    (struct invept_desc){ .eptp = vmx->eptp, ._res = 0 });
 	}
 }
 
 static __inline void
 vmx_pmap_deactivate(struct vmx *vmx, pmap_t pmap)
 {
 	smr_exit(pmap->pm_eptsmr);
 	CPU_CLR_ATOMIC(curcpu, &pmap->pm_active);
 }
 
 static int
 vmx_run(void *arg, int vcpu, register_t rip, pmap_t pmap,
     struct vm_eventinfo *evinfo)
 {
 	int rc, handled, launched;
 	struct vmx *vmx;
 	struct vm *vm;
 	struct vmxctx *vmxctx;
 	struct vmcs *vmcs;
 	struct vm_exit *vmexit;
 	struct vlapic *vlapic;
 	uint32_t exit_reason;
 	struct region_descriptor gdtr, idtr;
 	uint16_t ldt_sel;
 
 	vmx = arg;
 	vm = vmx->vm;
 	vmcs = &vmx->vmcs[vcpu];
 	vmxctx = &vmx->ctx[vcpu];
 	vlapic = vm_lapic(vm, vcpu);
 	vmexit = vm_exitinfo(vm, vcpu);
 	launched = 0;
 
 	KASSERT(vmxctx->pmap == pmap,
 	    ("pmap %p different than ctx pmap %p", pmap, vmxctx->pmap));
 
 	vmx_msr_guest_enter(vmx, vcpu);
 
 	VMPTRLD(vmcs);
 
 	/*
 	 * XXX
 	 * We do this every time because we may setup the virtual machine
 	 * from a different process than the one that actually runs it.
 	 *
 	 * If the life of a virtual machine was spent entirely in the context
 	 * of a single process we could do this once in vmx_init().
 	 */
 	vmcs_write(VMCS_HOST_CR3, rcr3());
 
 	vmcs_write(VMCS_GUEST_RIP, rip);
 	vmx_set_pcpu_defaults(vmx, vcpu, pmap);
 	do {
 		KASSERT(vmcs_guest_rip() == rip, ("%s: vmcs guest rip mismatch "
 		    "%#lx/%#lx", __func__, vmcs_guest_rip(), rip));
 
 		handled = UNHANDLED;
 		/*
 		 * Interrupts are disabled from this point on until the
 		 * guest starts executing. This is done for the following
 		 * reasons:
 		 *
 		 * If an AST is asserted on this thread after the check below,
 		 * then the IPI_AST notification will not be lost, because it
 		 * will cause a VM exit due to external interrupt as soon as
 		 * the guest state is loaded.
 		 *
 		 * A posted interrupt after 'vmx_inject_interrupts()' will
 		 * not be "lost" because it will be held pending in the host
 		 * APIC because interrupts are disabled. The pending interrupt
 		 * will be recognized as soon as the guest state is loaded.
 		 *
 		 * The same reasoning applies to the IPI generated by
 		 * pmap_invalidate_ept().
 		 */
 		disable_intr();
 		vmx_inject_interrupts(vmx, vcpu, vlapic, rip);
 
 		/*
 		 * Check for vcpu suspension after injecting events because
 		 * vmx_inject_interrupts() can suspend the vcpu due to a
 		 * triple fault.
 		 */
 		if (vcpu_suspended(evinfo)) {
 			enable_intr();
 			vm_exit_suspended(vmx->vm, vcpu, rip);
 			break;
 		}
 
 		if (vcpu_rendezvous_pending(evinfo)) {
 			enable_intr();
 			vm_exit_rendezvous(vmx->vm, vcpu, rip);
 			break;
 		}
 
 		if (vcpu_reqidle(evinfo)) {
 			enable_intr();
 			vm_exit_reqidle(vmx->vm, vcpu, rip);
 			break;
 		}
 
 		if (vcpu_should_yield(vm, vcpu)) {
 			enable_intr();
 			vm_exit_astpending(vmx->vm, vcpu, rip);
 			vmx_astpending_trace(vmx, vcpu, rip);
 			handled = HANDLED;
 			break;
 		}
 
 		if (vcpu_debugged(vm, vcpu)) {
 			enable_intr();
 			vm_exit_debug(vmx->vm, vcpu, rip);
 			break;
 		}
 
 		/*
 		 * If TPR Shadowing is enabled, the TPR Threshold
 		 * must be updated right before entering the guest.
 		 */
 		if (tpr_shadowing && !virtual_interrupt_delivery) {
 			if ((vmx->cap[vcpu].proc_ctls & PROCBASED_USE_TPR_SHADOW) != 0) {
 				vmcs_write(VMCS_TPR_THRESHOLD, vlapic_get_cr8(vlapic));
 			}
 		}
 
 		/*
 		 * VM exits restore the base address but not the
 		 * limits of GDTR and IDTR.  The VMCS only stores the
 		 * base address, so VM exits set the limits to 0xffff.
 		 * Save and restore the full GDTR and IDTR to restore
 		 * the limits.
 		 *
 		 * The VMCS does not save the LDTR at all, and VM
 		 * exits clear LDTR as if a NULL selector were loaded.
 		 * The userspace hypervisor probably doesn't use a
 		 * LDT, but save and restore it to be safe.
 		 */
 		sgdt(&gdtr);
 		sidt(&idtr);
 		ldt_sel = sldt();
 
 		/*
 		 * The TSC_AUX MSR must be saved/restored while interrupts
 		 * are disabled so that it is not possible for the guest
 		 * TSC_AUX MSR value to be overwritten by the resume
 		 * portion of the IPI_SUSPEND codepath. This is why the
 		 * transition of this MSR is handled separately from those
 		 * handled by vmx_msr_guest_{enter,exit}(), which are ok to
 		 * be transitioned with preemption disabled but interrupts
 		 * enabled.
 		 *
 		 * These vmx_msr_guest_{enter,exit}_tsc_aux() calls can be
 		 * anywhere in this loop so long as they happen with
 		 * interrupts disabled. This location is chosen for
 		 * simplicity.
 		 */
 		vmx_msr_guest_enter_tsc_aux(vmx, vcpu);
 
 		vmx_dr_enter_guest(vmxctx);
 
 		/*
 		 * Mark the EPT as active on this host CPU and invalidate
 		 * EPTP-tagged TLB entries if required.
 		 */
 		vmx_pmap_activate(vmx, pmap);
 
 		vmx_run_trace(vmx, vcpu);
 		rc = vmx_enter_guest(vmxctx, vmx, launched);
 
 		vmx_pmap_deactivate(vmx, pmap);
 		vmx_dr_leave_guest(vmxctx);
 		vmx_msr_guest_exit_tsc_aux(vmx, vcpu);
 
 		bare_lgdt(&gdtr);
 		lidt(&idtr);
 		lldt(ldt_sel);
 
 		/* Collect some information for VM exit processing */
 		vmexit->rip = rip = vmcs_guest_rip();
 		vmexit->inst_length = vmexit_instruction_length();
 		vmexit->u.vmx.exit_reason = exit_reason = vmcs_exit_reason();
 		vmexit->u.vmx.exit_qualification = vmcs_exit_qualification();
 
 		/* Update 'nextrip' */
 		vmx->state[vcpu].nextrip = rip;
 
 		if (rc == VMX_GUEST_VMEXIT) {
 			vmx_exit_handle_nmi(vmx, vcpu, vmexit);
 			enable_intr();
 			handled = vmx_exit_process(vmx, vcpu, vmexit);
 		} else {
 			enable_intr();
 			vmx_exit_inst_error(vmxctx, rc, vmexit);
 		}
 		launched = 1;
 		vmx_exit_trace(vmx, vcpu, rip, exit_reason, handled);
 		rip = vmexit->rip;
 	} while (handled);
 
 	/*
 	 * If a VM exit has been handled then the exitcode must be BOGUS
 	 * If a VM exit is not handled then the exitcode must not be BOGUS
 	 */
 	if ((handled && vmexit->exitcode != VM_EXITCODE_BOGUS) ||
 	    (!handled && vmexit->exitcode == VM_EXITCODE_BOGUS)) {
 		panic("Mismatch between handled (%d) and exitcode (%d)",
 		      handled, vmexit->exitcode);
 	}
 
 	if (!handled)
 		vmm_stat_incr(vm, vcpu, VMEXIT_USERSPACE, 1);
 
 	VCPU_CTR1(vm, vcpu, "returning from vmx_run: exitcode %d",
 	    vmexit->exitcode);
 
 	VMCLEAR(vmcs);
 	vmx_msr_guest_exit(vmx, vcpu);
 
 	return (0);
 }
 
 static void
 vmx_cleanup(void *arg)
 {
 	int i;
 	struct vmx *vmx = arg;
 	uint16_t maxcpus;
 
 	if (apic_access_virtualization(vmx, 0))
 		vm_unmap_mmio(vmx->vm, DEFAULT_APIC_BASE, PAGE_SIZE);
 
 	maxcpus = vm_get_maxcpus(vmx->vm);
 	for (i = 0; i < maxcpus; i++)
 		vpid_free(vmx->state[i].vpid);
 
 	free(vmx, M_VMX);
 
 	return;
 }
 
 static register_t *
 vmxctx_regptr(struct vmxctx *vmxctx, int reg)
 {
 
 	switch (reg) {
 	case VM_REG_GUEST_RAX:
 		return (&vmxctx->guest_rax);
 	case VM_REG_GUEST_RBX:
 		return (&vmxctx->guest_rbx);
 	case VM_REG_GUEST_RCX:
 		return (&vmxctx->guest_rcx);
 	case VM_REG_GUEST_RDX:
 		return (&vmxctx->guest_rdx);
 	case VM_REG_GUEST_RSI:
 		return (&vmxctx->guest_rsi);
 	case VM_REG_GUEST_RDI:
 		return (&vmxctx->guest_rdi);
 	case VM_REG_GUEST_RBP:
 		return (&vmxctx->guest_rbp);
 	case VM_REG_GUEST_R8:
 		return (&vmxctx->guest_r8);
 	case VM_REG_GUEST_R9:
 		return (&vmxctx->guest_r9);
 	case VM_REG_GUEST_R10:
 		return (&vmxctx->guest_r10);
 	case VM_REG_GUEST_R11:
 		return (&vmxctx->guest_r11);
 	case VM_REG_GUEST_R12:
 		return (&vmxctx->guest_r12);
 	case VM_REG_GUEST_R13:
 		return (&vmxctx->guest_r13);
 	case VM_REG_GUEST_R14:
 		return (&vmxctx->guest_r14);
 	case VM_REG_GUEST_R15:
 		return (&vmxctx->guest_r15);
 	case VM_REG_GUEST_CR2:
 		return (&vmxctx->guest_cr2);
 	case VM_REG_GUEST_DR0:
 		return (&vmxctx->guest_dr0);
 	case VM_REG_GUEST_DR1:
 		return (&vmxctx->guest_dr1);
 	case VM_REG_GUEST_DR2:
 		return (&vmxctx->guest_dr2);
 	case VM_REG_GUEST_DR3:
 		return (&vmxctx->guest_dr3);
 	case VM_REG_GUEST_DR6:
 		return (&vmxctx->guest_dr6);
 	default:
 		break;
 	}
 	return (NULL);
 }
 
 static int
 vmxctx_getreg(struct vmxctx *vmxctx, int reg, uint64_t *retval)
 {
 	register_t *regp;
 
 	if ((regp = vmxctx_regptr(vmxctx, reg)) != NULL) {
 		*retval = *regp;
 		return (0);
 	} else
 		return (EINVAL);
 }
 
 static int
 vmxctx_setreg(struct vmxctx *vmxctx, int reg, uint64_t val)
 {
 	register_t *regp;
 
 	if ((regp = vmxctx_regptr(vmxctx, reg)) != NULL) {
 		*regp = val;
 		return (0);
 	} else
 		return (EINVAL);
 }
 
 static int
 vmx_get_intr_shadow(struct vmx *vmx, int vcpu, int running, uint64_t *retval)
 {
 	uint64_t gi;
 	int error;
 
 	error = vmcs_getreg(&vmx->vmcs[vcpu], running,
 	    VMCS_IDENT(VMCS_GUEST_INTERRUPTIBILITY), &gi);
 	*retval = (gi & HWINTR_BLOCKING) ? 1 : 0;
 	return (error);
 }
 
 static int
 vmx_modify_intr_shadow(struct vmx *vmx, int vcpu, int running, uint64_t val)
 {
 	struct vmcs *vmcs;
 	uint64_t gi;
 	int error, ident;
 
 	/*
 	 * Forcing the vcpu into an interrupt shadow is not supported.
 	 */
 	if (val) {
 		error = EINVAL;
 		goto done;
 	}
 
 	vmcs = &vmx->vmcs[vcpu];
 	ident = VMCS_IDENT(VMCS_GUEST_INTERRUPTIBILITY);
 	error = vmcs_getreg(vmcs, running, ident, &gi);
 	if (error == 0) {
 		gi &= ~HWINTR_BLOCKING;
 		error = vmcs_setreg(vmcs, running, ident, gi);
 	}
 done:
 	VCPU_CTR2(vmx->vm, vcpu, "Setting intr_shadow to %#lx %s", val,
 	    error ? "failed" : "succeeded");
 	return (error);
 }
 
 static int
 vmx_shadow_reg(int reg)
 {
 	int shreg;
 
 	shreg = -1;
 
 	switch (reg) {
 	case VM_REG_GUEST_CR0:
 		shreg = VMCS_CR0_SHADOW;
 		break;
 	case VM_REG_GUEST_CR4:
 		shreg = VMCS_CR4_SHADOW;
 		break;
 	default:
 		break;
 	}
 
 	return (shreg);
 }
 
 static int
 vmx_getreg(void *arg, int vcpu, int reg, uint64_t *retval)
 {
 	int running, hostcpu;
 	struct vmx *vmx = arg;
 
 	running = vcpu_is_running(vmx->vm, vcpu, &hostcpu);
 	if (running && hostcpu != curcpu)
 		panic("vmx_getreg: %s%d is running", vm_name(vmx->vm), vcpu);
 
 	if (reg == VM_REG_GUEST_INTR_SHADOW)
 		return (vmx_get_intr_shadow(vmx, vcpu, running, retval));
 
 	if (vmxctx_getreg(&vmx->ctx[vcpu], reg, retval) == 0)
 		return (0);
 
 	return (vmcs_getreg(&vmx->vmcs[vcpu], running, reg, retval));
 }
 
 static int
 vmx_setreg(void *arg, int vcpu, int reg, uint64_t val)
 {
 	int error, hostcpu, running, shadow;
 	uint64_t ctls;
 	pmap_t pmap;
 	struct vmx *vmx = arg;
 
 	running = vcpu_is_running(vmx->vm, vcpu, &hostcpu);
 	if (running && hostcpu != curcpu)
 		panic("vmx_setreg: %s%d is running", vm_name(vmx->vm), vcpu);
 
 	if (reg == VM_REG_GUEST_INTR_SHADOW)
 		return (vmx_modify_intr_shadow(vmx, vcpu, running, val));
 
 	if (vmxctx_setreg(&vmx->ctx[vcpu], reg, val) == 0)
 		return (0);
 
 	/* Do not permit user write access to VMCS fields by offset. */
 	if (reg < 0)
 		return (EINVAL);
 
 	error = vmcs_setreg(&vmx->vmcs[vcpu], running, reg, val);
 
 	if (error == 0) {
 		/*
 		 * If the "load EFER" VM-entry control is 1 then the
 		 * value of EFER.LMA must be identical to "IA-32e mode guest"
 		 * bit in the VM-entry control.
 		 */
 		if ((entry_ctls & VM_ENTRY_LOAD_EFER) != 0 &&
 		    (reg == VM_REG_GUEST_EFER)) {
 			vmcs_getreg(&vmx->vmcs[vcpu], running,
 				    VMCS_IDENT(VMCS_ENTRY_CTLS), &ctls);
 			if (val & EFER_LMA)
 				ctls |= VM_ENTRY_GUEST_LMA;
 			else
 				ctls &= ~VM_ENTRY_GUEST_LMA;
 			vmcs_setreg(&vmx->vmcs[vcpu], running,
 				    VMCS_IDENT(VMCS_ENTRY_CTLS), ctls);
 		}
 
 		shadow = vmx_shadow_reg(reg);
 		if (shadow > 0) {
 			/*
 			 * Store the unmodified value in the shadow
 			 */
 			error = vmcs_setreg(&vmx->vmcs[vcpu], running,
 				    VMCS_IDENT(shadow), val);
 		}
 
 		if (reg == VM_REG_GUEST_CR3) {
 			/*
 			 * Invalidate the guest vcpu's TLB mappings to emulate
 			 * the behavior of updating %cr3.
 			 *
 			 * XXX the processor retains global mappings when %cr3
 			 * is updated but vmx_invvpid() does not.
 			 */
 			pmap = vmx->ctx[vcpu].pmap;
 			vmx_invvpid(vmx, vcpu, pmap, running);
 		}
 	}
 
 	return (error);
 }
 
 static int
 vmx_getdesc(void *arg, int vcpu, int reg, struct seg_desc *desc)
 {
 	int hostcpu, running;
 	struct vmx *vmx = arg;
 
 	running = vcpu_is_running(vmx->vm, vcpu, &hostcpu);
 	if (running && hostcpu != curcpu)
 		panic("vmx_getdesc: %s%d is running", vm_name(vmx->vm), vcpu);
 
 	return (vmcs_getdesc(&vmx->vmcs[vcpu], running, reg, desc));
 }
 
 static int
 vmx_setdesc(void *arg, int vcpu, int reg, struct seg_desc *desc)
 {
 	int hostcpu, running;
 	struct vmx *vmx = arg;
 
 	running = vcpu_is_running(vmx->vm, vcpu, &hostcpu);
 	if (running && hostcpu != curcpu)
 		panic("vmx_setdesc: %s%d is running", vm_name(vmx->vm), vcpu);
 
 	return (vmcs_setdesc(&vmx->vmcs[vcpu], running, reg, desc));
 }
 
 static int
 vmx_getcap(void *arg, int vcpu, int type, int *retval)
 {
 	struct vmx *vmx = arg;
 	int vcap;
 	int ret;
 
 	ret = ENOENT;
 
 	vcap = vmx->cap[vcpu].set;
 
 	switch (type) {
 	case VM_CAP_HALT_EXIT:
 		if (cap_halt_exit)
 			ret = 0;
 		break;
 	case VM_CAP_PAUSE_EXIT:
 		if (cap_pause_exit)
 			ret = 0;
 		break;
 	case VM_CAP_MTRAP_EXIT:
 		if (cap_monitor_trap)
 			ret = 0;
 		break;
 	case VM_CAP_RDPID:
 		if (cap_rdpid)
 			ret = 0;
 		break;
 	case VM_CAP_RDTSCP:
 		if (cap_rdtscp)
 			ret = 0;
 		break;
 	case VM_CAP_UNRESTRICTED_GUEST:
 		if (cap_unrestricted_guest)
 			ret = 0;
 		break;
 	case VM_CAP_ENABLE_INVPCID:
 		if (cap_invpcid)
 			ret = 0;
 		break;
 	case VM_CAP_BPT_EXIT:
 		ret = 0;
 		break;
 	default:
 		break;
 	}
 
 	if (ret == 0)
 		*retval = (vcap & (1 << type)) ? 1 : 0;
 
 	return (ret);
 }
 
 static int
 vmx_setcap(void *arg, int vcpu, int type, int val)
 {
 	struct vmx *vmx = arg;
 	struct vmcs *vmcs = &vmx->vmcs[vcpu];
 	uint32_t baseval;
 	uint32_t *pptr;
 	int error;
 	int flag;
 	int reg;
 	int retval;
 
 	retval = ENOENT;
 	pptr = NULL;
 
 	switch (type) {
 	case VM_CAP_HALT_EXIT:
 		if (cap_halt_exit) {
 			retval = 0;
 			pptr = &vmx->cap[vcpu].proc_ctls;
 			baseval = *pptr;
 			flag = PROCBASED_HLT_EXITING;
 			reg = VMCS_PRI_PROC_BASED_CTLS;
 		}
 		break;
 	case VM_CAP_MTRAP_EXIT:
 		if (cap_monitor_trap) {
 			retval = 0;
 			pptr = &vmx->cap[vcpu].proc_ctls;
 			baseval = *pptr;
 			flag = PROCBASED_MTF;
 			reg = VMCS_PRI_PROC_BASED_CTLS;
 		}
 		break;
 	case VM_CAP_PAUSE_EXIT:
 		if (cap_pause_exit) {
 			retval = 0;
 			pptr = &vmx->cap[vcpu].proc_ctls;
 			baseval = *pptr;
 			flag = PROCBASED_PAUSE_EXITING;
 			reg = VMCS_PRI_PROC_BASED_CTLS;
 		}
 		break;
 	case VM_CAP_RDPID:
 	case VM_CAP_RDTSCP:
 		if (cap_rdpid || cap_rdtscp)
 			/*
 			 * Choose not to support enabling/disabling
 			 * RDPID/RDTSCP via libvmmapi since, as per the
 			 * discussion in vmx_modinit(), RDPID/RDTSCP are
 			 * either always enabled or always disabled.
 			 */
 			error = EOPNOTSUPP;
 		break;
 	case VM_CAP_UNRESTRICTED_GUEST:
 		if (cap_unrestricted_guest) {
 			retval = 0;
 			pptr = &vmx->cap[vcpu].proc_ctls2;
 			baseval = *pptr;
 			flag = PROCBASED2_UNRESTRICTED_GUEST;
 			reg = VMCS_SEC_PROC_BASED_CTLS;
 		}
 		break;
 	case VM_CAP_ENABLE_INVPCID:
 		if (cap_invpcid) {
 			retval = 0;
 			pptr = &vmx->cap[vcpu].proc_ctls2;
 			baseval = *pptr;
 			flag = PROCBASED2_ENABLE_INVPCID;
 			reg = VMCS_SEC_PROC_BASED_CTLS;
 		}
 		break;
 	case VM_CAP_BPT_EXIT:
 		retval = 0;
 
 		/* Don't change the bitmap if we are tracing all exceptions. */
 		if (vmx->cap[vcpu].exc_bitmap != 0xffffffff) {
 			pptr = &vmx->cap[vcpu].exc_bitmap;
 			baseval = *pptr;
 			flag = (1 << IDT_BP);
 			reg = VMCS_EXCEPTION_BITMAP;
 		}
 		break;
 	default:
 		break;
 	}
 
 	if (retval)
 		return (retval);
 
 	if (pptr != NULL) {
 		if (val) {
 			baseval |= flag;
 		} else {
 			baseval &= ~flag;
 		}
 		VMPTRLD(vmcs);
 		error = vmwrite(reg, baseval);
 		VMCLEAR(vmcs);
 
 		if (error)
 			return (error);
 
 		/*
 		 * Update optional stored flags, and record
 		 * setting
 		 */
 		*pptr = baseval;
 	}
 
 	if (val) {
 		vmx->cap[vcpu].set |= (1 << type);
 	} else {
 		vmx->cap[vcpu].set &= ~(1 << type);
 	}
 
 	return (0);
 }
 
 static struct vmspace *
 vmx_vmspace_alloc(vm_offset_t min, vm_offset_t max)
 {
 	return (ept_vmspace_alloc(min, max));
 }
 
 static void
 vmx_vmspace_free(struct vmspace *vmspace)
 {
 	ept_vmspace_free(vmspace);
 }
 
 struct vlapic_vtx {
 	struct vlapic	vlapic;
 	struct pir_desc	*pir_desc;
 	struct vmx	*vmx;
 	u_int	pending_prio;
 };
 
 #define VPR_PRIO_BIT(vpr)	(1 << ((vpr) >> 4))
 
 #define	VMX_CTR_PIR(vm, vcpuid, pir_desc, notify, vector, level, msg)	\
 do {									\
 	VCPU_CTR2(vm, vcpuid, msg " assert %s-triggered vector %d",	\
 	    level ? "level" : "edge", vector);				\
 	VCPU_CTR1(vm, vcpuid, msg " pir0 0x%016lx", pir_desc->pir[0]);	\
 	VCPU_CTR1(vm, vcpuid, msg " pir1 0x%016lx", pir_desc->pir[1]);	\
 	VCPU_CTR1(vm, vcpuid, msg " pir2 0x%016lx", pir_desc->pir[2]);	\
 	VCPU_CTR1(vm, vcpuid, msg " pir3 0x%016lx", pir_desc->pir[3]);	\
 	VCPU_CTR1(vm, vcpuid, msg " notify: %s", notify ? "yes" : "no");\
 } while (0)
 
 /*
  * vlapic->ops handlers that utilize the APICv hardware assist described in
  * Chapter 29 of the Intel SDM.
  */
 static int
 vmx_set_intr_ready(struct vlapic *vlapic, int vector, bool level)
 {
 	struct vlapic_vtx *vlapic_vtx;
 	struct pir_desc *pir_desc;
 	uint64_t mask;
 	int idx, notify = 0;
 
 	vlapic_vtx = (struct vlapic_vtx *)vlapic;
 	pir_desc = vlapic_vtx->pir_desc;
 
 	/*
 	 * Keep track of interrupt requests in the PIR descriptor. This is
 	 * because the virtual APIC page pointed to by the VMCS cannot be
 	 * modified if the vcpu is running.
 	 */
 	idx = vector / 64;
 	mask = 1UL << (vector % 64);
 	atomic_set_long(&pir_desc->pir[idx], mask);
 
 	/*
 	 * A notification is required whenever the 'pending' bit makes a
 	 * transition from 0->1.
 	 *
 	 * Even if the 'pending' bit is already asserted, notification about
 	 * the incoming interrupt may still be necessary.  For example, if a
 	 * vCPU is HLTed with a high PPR, a low priority interrupt would cause
 	 * the 0->1 'pending' transition with a notification, but the vCPU
 	 * would ignore the interrupt for the time being.  The same vCPU would
 	 * need to then be notified if a high-priority interrupt arrived which
 	 * satisfied the PPR.
 	 *
 	 * The priorities of interrupts injected while 'pending' is asserted
 	 * are tracked in a custom bitfield 'pending_prio'.  Should the
 	 * to-be-injected interrupt exceed the priorities already present, the
 	 * notification is sent.  The priorities recorded in 'pending_prio' are
 	 * cleared whenever the 'pending' bit makes another 0->1 transition.
 	 */
 	if (atomic_cmpset_long(&pir_desc->pending, 0, 1) != 0) {
 		notify = 1;
 		vlapic_vtx->pending_prio = 0;
 	} else {
 		const u_int old_prio = vlapic_vtx->pending_prio;
 		const u_int prio_bit = VPR_PRIO_BIT(vector & APIC_TPR_INT);
 
 		if ((old_prio & prio_bit) == 0 && prio_bit > old_prio) {
 			atomic_set_int(&vlapic_vtx->pending_prio, prio_bit);
 			notify = 1;
 		}
 	}
 
 	VMX_CTR_PIR(vlapic->vm, vlapic->vcpuid, pir_desc, notify, vector,
 	    level, "vmx_set_intr_ready");
 	return (notify);
 }
 
 static int
 vmx_pending_intr(struct vlapic *vlapic, int *vecptr)
 {
 	struct vlapic_vtx *vlapic_vtx;
 	struct pir_desc *pir_desc;
 	struct LAPIC *lapic;
 	uint64_t pending, pirval;
 	uint32_t ppr, vpr;
 	int i;
 
 	/*
 	 * This function is only expected to be called from the 'HLT' exit
 	 * handler which does not care about the vector that is pending.
 	 */
 	KASSERT(vecptr == NULL, ("vmx_pending_intr: vecptr must be NULL"));
 
 	vlapic_vtx = (struct vlapic_vtx *)vlapic;
 	pir_desc = vlapic_vtx->pir_desc;
 
 	pending = atomic_load_acq_long(&pir_desc->pending);
 	if (!pending) {
 		/*
 		 * While a virtual interrupt may have already been
 		 * processed the actual delivery maybe pending the
 		 * interruptibility of the guest.  Recognize a pending
 		 * interrupt by reevaluating virtual interrupts
 		 * following Section 29.2.1 in the Intel SDM Volume 3.
 		 */
 		struct vm_exit *vmexit;
 		uint8_t rvi, ppr;
 
 		vmexit = vm_exitinfo(vlapic->vm, vlapic->vcpuid);
 		KASSERT(vmexit->exitcode == VM_EXITCODE_HLT,
 		    ("vmx_pending_intr: exitcode not 'HLT'"));
 		rvi = vmexit->u.hlt.intr_status & APIC_TPR_INT;
 		lapic = vlapic->apic_page;
 		ppr = lapic->ppr & APIC_TPR_INT;
 		if (rvi > ppr) {
 			return (1);
 		}
 
 		return (0);
 	}
 
 	/*
 	 * If there is an interrupt pending then it will be recognized only
 	 * if its priority is greater than the processor priority.
 	 *
 	 * Special case: if the processor priority is zero then any pending
 	 * interrupt will be recognized.
 	 */
 	lapic = vlapic->apic_page;
 	ppr = lapic->ppr & APIC_TPR_INT;
 	if (ppr == 0)
 		return (1);
 
 	VCPU_CTR1(vlapic->vm, vlapic->vcpuid, "HLT with non-zero PPR %d",
 	    lapic->ppr);
 
 	vpr = 0;
 	for (i = 3; i >= 0; i--) {
 		pirval = pir_desc->pir[i];
 		if (pirval != 0) {
 			vpr = (i * 64 + flsl(pirval) - 1) & APIC_TPR_INT;
 			break;
 		}
 	}
 
 	/*
 	 * If the highest-priority pending interrupt falls short of the
 	 * processor priority of this vCPU, ensure that 'pending_prio' does not
 	 * have any stale bits which would preclude a higher-priority interrupt
 	 * from incurring a notification later.
 	 */
 	if (vpr <= ppr) {
 		const u_int prio_bit = VPR_PRIO_BIT(vpr);
 		const u_int old = vlapic_vtx->pending_prio;
 
 		if (old > prio_bit && (old & prio_bit) == 0) {
 			vlapic_vtx->pending_prio = prio_bit;
 		}
 		return (0);
 	}
 	return (1);
 }
 
 static void
 vmx_intr_accepted(struct vlapic *vlapic, int vector)
 {
 
 	panic("vmx_intr_accepted: not expected to be called");
 }
 
 static void
 vmx_set_tmr(struct vlapic *vlapic, int vector, bool level)
 {
 	struct vlapic_vtx *vlapic_vtx;
 	struct vmx *vmx;
 	struct vmcs *vmcs;
 	uint64_t mask, val;
 
 	KASSERT(vector >= 0 && vector <= 255, ("invalid vector %d", vector));
 	KASSERT(!vcpu_is_running(vlapic->vm, vlapic->vcpuid, NULL),
 	    ("vmx_set_tmr: vcpu cannot be running"));
 
 	vlapic_vtx = (struct vlapic_vtx *)vlapic;
 	vmx = vlapic_vtx->vmx;
 	vmcs = &vmx->vmcs[vlapic->vcpuid];
 	mask = 1UL << (vector % 64);
 
 	VMPTRLD(vmcs);
 	val = vmcs_read(VMCS_EOI_EXIT(vector));
 	if (level)
 		val |= mask;
 	else
 		val &= ~mask;
 	vmcs_write(VMCS_EOI_EXIT(vector), val);
 	VMCLEAR(vmcs);
 }
 
 static void
 vmx_enable_x2apic_mode_ts(struct vlapic *vlapic)
 {
 	struct vmx *vmx;
 	struct vmcs *vmcs;
 	uint32_t proc_ctls;
 	int vcpuid;
 
 	vcpuid = vlapic->vcpuid;
 	vmx = ((struct vlapic_vtx *)vlapic)->vmx;
 	vmcs = &vmx->vmcs[vcpuid];
 
 	proc_ctls = vmx->cap[vcpuid].proc_ctls;
 	proc_ctls &= ~PROCBASED_USE_TPR_SHADOW;
 	proc_ctls |= PROCBASED_CR8_LOAD_EXITING;
 	proc_ctls |= PROCBASED_CR8_STORE_EXITING;
 	vmx->cap[vcpuid].proc_ctls = proc_ctls;
 
 	VMPTRLD(vmcs);
 	vmcs_write(VMCS_PRI_PROC_BASED_CTLS, proc_ctls);
 	VMCLEAR(vmcs);
 }
 
 static void
 vmx_enable_x2apic_mode_vid(struct vlapic *vlapic)
 {
 	struct vmx *vmx;
 	struct vmcs *vmcs;
 	uint32_t proc_ctls2;
 	int vcpuid, error;
 
 	vcpuid = vlapic->vcpuid;
 	vmx = ((struct vlapic_vtx *)vlapic)->vmx;
 	vmcs = &vmx->vmcs[vcpuid];
 
 	proc_ctls2 = vmx->cap[vcpuid].proc_ctls2;
 	KASSERT((proc_ctls2 & PROCBASED2_VIRTUALIZE_APIC_ACCESSES) != 0,
 	    ("%s: invalid proc_ctls2 %#x", __func__, proc_ctls2));
 
 	proc_ctls2 &= ~PROCBASED2_VIRTUALIZE_APIC_ACCESSES;
 	proc_ctls2 |= PROCBASED2_VIRTUALIZE_X2APIC_MODE;
 	vmx->cap[vcpuid].proc_ctls2 = proc_ctls2;
 
 	VMPTRLD(vmcs);
 	vmcs_write(VMCS_SEC_PROC_BASED_CTLS, proc_ctls2);
 	VMCLEAR(vmcs);
 
 	if (vlapic->vcpuid == 0) {
 		/*
 		 * The nested page table mappings are shared by all vcpus
 		 * so unmap the APIC access page just once.
 		 */
 		error = vm_unmap_mmio(vmx->vm, DEFAULT_APIC_BASE, PAGE_SIZE);
 		KASSERT(error == 0, ("%s: vm_unmap_mmio error %d",
 		    __func__, error));
 
 		/*
 		 * The MSR bitmap is shared by all vcpus so modify it only
 		 * once in the context of vcpu 0.
 		 */
 		error = vmx_allow_x2apic_msrs(vmx);
 		KASSERT(error == 0, ("%s: vmx_allow_x2apic_msrs error %d",
 		    __func__, error));
 	}
 }
 
 static void
 vmx_post_intr(struct vlapic *vlapic, int hostcpu)
 {
 
 	ipi_cpu(hostcpu, pirvec);
 }
 
 /*
  * Transfer the pending interrupts in the PIR descriptor to the IRR
  * in the virtual APIC page.
  */
 static void
 vmx_inject_pir(struct vlapic *vlapic)
 {
 	struct vlapic_vtx *vlapic_vtx;
 	struct pir_desc *pir_desc;
 	struct LAPIC *lapic;
 	uint64_t val, pirval;
 	int rvi, pirbase = -1;
 	uint16_t intr_status_old, intr_status_new;
 
 	vlapic_vtx = (struct vlapic_vtx *)vlapic;
 	pir_desc = vlapic_vtx->pir_desc;
 	if (atomic_cmpset_long(&pir_desc->pending, 1, 0) == 0) {
 		VCPU_CTR0(vlapic->vm, vlapic->vcpuid, "vmx_inject_pir: "
 		    "no posted interrupt pending");
 		return;
 	}
 
 	pirval = 0;
 	pirbase = -1;
 	lapic = vlapic->apic_page;
 
 	val = atomic_readandclear_long(&pir_desc->pir[0]);
 	if (val != 0) {
 		lapic->irr0 |= val;
 		lapic->irr1 |= val >> 32;
 		pirbase = 0;
 		pirval = val;
 	}
 
 	val = atomic_readandclear_long(&pir_desc->pir[1]);
 	if (val != 0) {
 		lapic->irr2 |= val;
 		lapic->irr3 |= val >> 32;
 		pirbase = 64;
 		pirval = val;
 	}
 
 	val = atomic_readandclear_long(&pir_desc->pir[2]);
 	if (val != 0) {
 		lapic->irr4 |= val;
 		lapic->irr5 |= val >> 32;
 		pirbase = 128;
 		pirval = val;
 	}
 
 	val = atomic_readandclear_long(&pir_desc->pir[3]);
 	if (val != 0) {
 		lapic->irr6 |= val;
 		lapic->irr7 |= val >> 32;
 		pirbase = 192;
 		pirval = val;
 	}
 
 	VLAPIC_CTR_IRR(vlapic, "vmx_inject_pir");
 
 	/*
 	 * Update RVI so the processor can evaluate pending virtual
 	 * interrupts on VM-entry.
 	 *
 	 * It is possible for pirval to be 0 here, even though the
 	 * pending bit has been set. The scenario is:
 	 * CPU-Y is sending a posted interrupt to CPU-X, which
 	 * is running a guest and processing posted interrupts in h/w.
 	 * CPU-X will eventually exit and the state seen in s/w is
 	 * the pending bit set, but no PIR bits set.
 	 *
 	 *      CPU-X                      CPU-Y
 	 *   (vm running)                (host running)
 	 *   rx posted interrupt
 	 *   CLEAR pending bit
 	 *				 SET PIR bit
 	 *   READ/CLEAR PIR bits
 	 *				 SET pending bit
 	 *   (vm exit)
 	 *   pending bit set, PIR 0
 	 */
 	if (pirval != 0) {
 		rvi = pirbase + flsl(pirval) - 1;
 		intr_status_old = vmcs_read(VMCS_GUEST_INTR_STATUS);
 		intr_status_new = (intr_status_old & 0xFF00) | rvi;
 		if (intr_status_new > intr_status_old) {
 			vmcs_write(VMCS_GUEST_INTR_STATUS, intr_status_new);
 			VCPU_CTR2(vlapic->vm, vlapic->vcpuid, "vmx_inject_pir: "
 			    "guest_intr_status changed from 0x%04x to 0x%04x",
 			    intr_status_old, intr_status_new);
 		}
 	}
 }
 
 static struct vlapic *
 vmx_vlapic_init(void *arg, int vcpuid)
 {
 	struct vmx *vmx;
 	struct vlapic *vlapic;
 	struct vlapic_vtx *vlapic_vtx;
 
 	vmx = arg;
 
 	vlapic = malloc(sizeof(struct vlapic_vtx), M_VLAPIC, M_WAITOK | M_ZERO);
 	vlapic->vm = vmx->vm;
 	vlapic->vcpuid = vcpuid;
 	vlapic->apic_page = (struct LAPIC *)&vmx->apic_page[vcpuid];
 
 	vlapic_vtx = (struct vlapic_vtx *)vlapic;
 	vlapic_vtx->pir_desc = &vmx->pir_desc[vcpuid];
 	vlapic_vtx->vmx = vmx;
 
 	if (tpr_shadowing) {
 		vlapic->ops.enable_x2apic_mode = vmx_enable_x2apic_mode_ts;
 	}
 
 	if (virtual_interrupt_delivery) {
 		vlapic->ops.set_intr_ready = vmx_set_intr_ready;
 		vlapic->ops.pending_intr = vmx_pending_intr;
 		vlapic->ops.intr_accepted = vmx_intr_accepted;
 		vlapic->ops.set_tmr = vmx_set_tmr;
 		vlapic->ops.enable_x2apic_mode = vmx_enable_x2apic_mode_vid;
 	}
 
 	if (posted_interrupts)
 		vlapic->ops.post_intr = vmx_post_intr;
 
 	vlapic_init(vlapic);
 
 	return (vlapic);
 }
 
 static void
 vmx_vlapic_cleanup(void *arg, struct vlapic *vlapic)
 {
 
 	vlapic_cleanup(vlapic);
 	free(vlapic, M_VLAPIC);
 }
 
 #ifdef BHYVE_SNAPSHOT
 static int
 vmx_snapshot(void *arg, struct vm_snapshot_meta *meta)
 {
 	struct vmx *vmx;
 	struct vmxctx *vmxctx;
 	int i;
 	int ret;
 
 	vmx = arg;
 
 	KASSERT(vmx != NULL, ("%s: arg was NULL", __func__));
 
 	for (i = 0; i < VM_MAXCPU; i++) {
 		SNAPSHOT_BUF_OR_LEAVE(vmx->guest_msrs[i],
 		      sizeof(vmx->guest_msrs[i]), meta, ret, done);
 
 		vmxctx = &vmx->ctx[i];
 		SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_rdi, meta, ret, done);
 		SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_rsi, meta, ret, done);
 		SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_rdx, meta, ret, done);
 		SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_rcx, meta, ret, done);
 		SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_r8, meta, ret, done);
 		SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_r9, meta, ret, done);
 		SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_rax, meta, ret, done);
 		SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_rbx, meta, ret, done);
 		SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_rbp, meta, ret, done);
 		SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_r10, meta, ret, done);
 		SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_r11, meta, ret, done);
 		SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_r12, meta, ret, done);
 		SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_r13, meta, ret, done);
 		SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_r14, meta, ret, done);
 		SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_r15, meta, ret, done);
 		SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_cr2, meta, ret, done);
 		SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_dr0, meta, ret, done);
 		SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_dr1, meta, ret, done);
 		SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_dr2, meta, ret, done);
 		SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_dr3, meta, ret, done);
 		SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_dr6, meta, ret, done);
 	}
 
 done:
 	return (ret);
 }
 
 static int
 vmx_vmcx_snapshot(void *arg, struct vm_snapshot_meta *meta, int vcpu)
 {
 	struct vmcs *vmcs;
 	struct vmx *vmx;
 	int err, run, hostcpu;
 
 	vmx = (struct vmx *)arg;
 	err = 0;
 
 	KASSERT(arg != NULL, ("%s: arg was NULL", __func__));
 	vmcs = &vmx->vmcs[vcpu];
 
 	run = vcpu_is_running(vmx->vm, vcpu, &hostcpu);
 	if (run && hostcpu != curcpu) {
 		printf("%s: %s%d is running", __func__, vm_name(vmx->vm), vcpu);
 		return (EINVAL);
 	}
 
 	err += vmcs_snapshot_reg(vmcs, run, VM_REG_GUEST_CR0, meta);
 	err += vmcs_snapshot_reg(vmcs, run, VM_REG_GUEST_CR3, meta);
 	err += vmcs_snapshot_reg(vmcs, run, VM_REG_GUEST_CR4, meta);
 	err += vmcs_snapshot_reg(vmcs, run, VM_REG_GUEST_DR7, meta);
 	err += vmcs_snapshot_reg(vmcs, run, VM_REG_GUEST_RSP, meta);
 	err += vmcs_snapshot_reg(vmcs, run, VM_REG_GUEST_RIP, meta);
 	err += vmcs_snapshot_reg(vmcs, run, VM_REG_GUEST_RFLAGS, meta);
 
 	/* Guest segments */
 	err += vmcs_snapshot_reg(vmcs, run, VM_REG_GUEST_ES, meta);
 	err += vmcs_snapshot_desc(vmcs, run, VM_REG_GUEST_ES, meta);
 
 	err += vmcs_snapshot_reg(vmcs, run, VM_REG_GUEST_CS, meta);
 	err += vmcs_snapshot_desc(vmcs, run, VM_REG_GUEST_CS, meta);
 
 	err += vmcs_snapshot_reg(vmcs, run, VM_REG_GUEST_SS, meta);
 	err += vmcs_snapshot_desc(vmcs, run, VM_REG_GUEST_SS, meta);
 
 	err += vmcs_snapshot_reg(vmcs, run, VM_REG_GUEST_DS, meta);
 	err += vmcs_snapshot_desc(vmcs, run, VM_REG_GUEST_DS, meta);
 
 	err += vmcs_snapshot_reg(vmcs, run, VM_REG_GUEST_FS, meta);
 	err += vmcs_snapshot_desc(vmcs, run, VM_REG_GUEST_FS, meta);
 
 	err += vmcs_snapshot_reg(vmcs, run, VM_REG_GUEST_GS, meta);
 	err += vmcs_snapshot_desc(vmcs, run, VM_REG_GUEST_GS, meta);
 
 	err += vmcs_snapshot_reg(vmcs, run, VM_REG_GUEST_TR, meta);
 	err += vmcs_snapshot_desc(vmcs, run, VM_REG_GUEST_TR, meta);
 
 	err += vmcs_snapshot_reg(vmcs, run, VM_REG_GUEST_LDTR, meta);
 	err += vmcs_snapshot_desc(vmcs, run, VM_REG_GUEST_LDTR, meta);
 
 	err += vmcs_snapshot_reg(vmcs, run, VM_REG_GUEST_EFER, meta);
 
 	err += vmcs_snapshot_desc(vmcs, run, VM_REG_GUEST_IDTR, meta);
 	err += vmcs_snapshot_desc(vmcs, run, VM_REG_GUEST_GDTR, meta);
 
 	/* Guest page tables */
 	err += vmcs_snapshot_reg(vmcs, run, VM_REG_GUEST_PDPTE0, meta);
 	err += vmcs_snapshot_reg(vmcs, run, VM_REG_GUEST_PDPTE1, meta);
 	err += vmcs_snapshot_reg(vmcs, run, VM_REG_GUEST_PDPTE2, meta);
 	err += vmcs_snapshot_reg(vmcs, run, VM_REG_GUEST_PDPTE3, meta);
 
 	/* Other guest state */
 	err += vmcs_snapshot_any(vmcs, run, VMCS_GUEST_IA32_SYSENTER_CS, meta);
 	err += vmcs_snapshot_any(vmcs, run, VMCS_GUEST_IA32_SYSENTER_ESP, meta);
 	err += vmcs_snapshot_any(vmcs, run, VMCS_GUEST_IA32_SYSENTER_EIP, meta);
 	err += vmcs_snapshot_any(vmcs, run, VMCS_GUEST_INTERRUPTIBILITY, meta);
 	err += vmcs_snapshot_any(vmcs, run, VMCS_GUEST_ACTIVITY, meta);
 	err += vmcs_snapshot_any(vmcs, run, VMCS_ENTRY_CTLS, meta);
 	err += vmcs_snapshot_any(vmcs, run, VMCS_EXIT_CTLS, meta);
 
 	return (err);
 }
 
 static int
 vmx_restore_tsc(void *arg, int vcpu, uint64_t offset)
 {
 	struct vmcs *vmcs;
 	struct vmx *vmx = (struct vmx *)arg;
 	int error, running, hostcpu;
 
 	KASSERT(arg != NULL, ("%s: arg was NULL", __func__));
 	vmcs = &vmx->vmcs[vcpu];
 
 	running = vcpu_is_running(vmx->vm, vcpu, &hostcpu);
 	if (running && hostcpu != curcpu) {
 		printf("%s: %s%d is running", __func__, vm_name(vmx->vm), vcpu);
 		return (EINVAL);
 	}
 
 	if (!running)
 		VMPTRLD(vmcs);
 
 	error = vmx_set_tsc_offset(vmx, vcpu, offset);
 
 	if (!running)
 		VMCLEAR(vmcs);
 	return (error);
 }
 #endif
 
 const struct vmm_ops vmm_ops_intel = {
 	.modinit	= vmx_modinit,
 	.modcleanup	= vmx_modcleanup,
 	.modresume	= vmx_modresume,
 	.init		= vmx_init,
 	.run		= vmx_run,
 	.cleanup	= vmx_cleanup,
 	.getreg		= vmx_getreg,
 	.setreg		= vmx_setreg,
 	.getdesc	= vmx_getdesc,
 	.setdesc	= vmx_setdesc,
 	.getcap		= vmx_getcap,
 	.setcap		= vmx_setcap,
 	.vmspace_alloc	= vmx_vmspace_alloc,
 	.vmspace_free	= vmx_vmspace_free,
 	.vlapic_init	= vmx_vlapic_init,
 	.vlapic_cleanup	= vmx_vlapic_cleanup,
 #ifdef BHYVE_SNAPSHOT
 	.snapshot	= vmx_snapshot,
 	.vmcx_snapshot	= vmx_vmcx_snapshot,
 	.restore_tsc	= vmx_restore_tsc,
 #endif
 };
diff --git a/sys/arm/arm/debug_monitor.c b/sys/arm/arm/debug_monitor.c
index 55b5f70b2397..b73249bedcf1 100644
--- a/sys/arm/arm/debug_monitor.c
+++ b/sys/arm/arm/debug_monitor.c
@@ -1,1070 +1,1070 @@
 /*
  * Copyright (c) 2015 Juniper Networks Inc.
  * All rights reserved.
  *
  * Developed by Semihalf.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_ddb.h"
 
 #include <sys/param.h>
 #include <sys/types.h>
 #include <sys/kdb.h>
 #include <sys/pcpu.h>
+#include <sys/reg.h>
 #include <sys/smp.h>
 #include <sys/systm.h>
 
 #include <machine/atomic.h>
 #include <machine/armreg.h>
 #include <machine/cpu.h>
 #include <machine/debug_monitor.h>
 #include <machine/kdb.h>
 #include <machine/pcb.h>
-#include <machine/reg.h>
 
 #include <ddb/ddb.h>
 #include <ddb/db_access.h>
 #include <ddb/db_sym.h>
 
 enum dbg_t {
 	DBG_TYPE_BREAKPOINT = 0,
 	DBG_TYPE_WATCHPOINT = 1,
 };
 
 struct dbg_wb_conf {
 	enum dbg_t		type;
 	enum dbg_access_t	access;
 	db_addr_t		address;
 	db_expr_t		size;
 	u_int			slot;
 };
 
 static int dbg_reset_state(void);
 static int dbg_setup_breakpoint(db_expr_t, db_expr_t, u_int);
 static int dbg_remove_breakpoint(u_int);
 static u_int dbg_find_slot(enum dbg_t, db_expr_t);
 static boolean_t dbg_check_slot_free(enum dbg_t, u_int);
 
 static int dbg_remove_xpoint(struct dbg_wb_conf *);
 static int dbg_setup_xpoint(struct dbg_wb_conf *);
 
 static int dbg_capable_var;	/* Indicates that machine is capable of using
 				   HW watchpoints/breakpoints */
 
 static uint32_t dbg_model;	/* Debug Arch. Model */
 static boolean_t dbg_ossr;	/* OS Save and Restore implemented */
 
 static uint32_t dbg_watchpoint_num;
 static uint32_t dbg_breakpoint_num;
 
 /* ID_DFR0 - Debug Feature Register 0 */
 #define	ID_DFR0_CP_DEBUG_M_SHIFT	0
 #define	ID_DFR0_CP_DEBUG_M_MASK		(0xF << ID_DFR0_CP_DEBUG_M_SHIFT)
 #define	ID_DFR0_CP_DEBUG_M_NS		(0x0) /* Not supported */
 #define	ID_DFR0_CP_DEBUG_M_V6		(0x2) /* v6 Debug arch. CP14 access */
 #define	ID_DFR0_CP_DEBUG_M_V6_1		(0x3) /* v6.1 Debug arch. CP14 access */
 #define	ID_DFR0_CP_DEBUG_M_V7		(0x4) /* v7 Debug arch. CP14 access */
 #define	ID_DFR0_CP_DEBUG_M_V7_1		(0x5) /* v7.1 Debug arch. CP14 access */
 
 /* DBGDIDR - Debug ID Register */
 #define	DBGDIDR_WRPS_SHIFT		28
 #define	DBGDIDR_WRPS_MASK		(0xF << DBGDIDR_WRPS_SHIFT)
 #define	DBGDIDR_WRPS_NUM(reg)		\
     ((((reg) & DBGDIDR_WRPS_MASK) >> DBGDIDR_WRPS_SHIFT) + 1)
 
 #define	DBGDIDR_BRPS_SHIFT		24
 #define	DBGDIDR_BRPS_MASK		(0xF << DBGDIDR_BRPS_SHIFT)
 #define	DBGDIDR_BRPS_NUM(reg)		\
     ((((reg) & DBGDIDR_BRPS_MASK) >> DBGDIDR_BRPS_SHIFT) + 1)
 
 /* DBGPRSR - Device Powerdown and Reset Status Register */
 #define	DBGPRSR_PU			(1 << 0) /* Powerup status */
 
 /* DBGOSLSR - OS Lock Status Register */
 #define	DBGOSLSR_OSLM0			(1 << 0)
 
 /* DBGOSDLR - OS Double Lock Register */
 #define	DBGPRSR_DLK			(1 << 0) /* OS Double Lock set */
 
 /* DBGDSCR - Debug Status and Control Register */
 #define	DBGSCR_MDBG_EN			(1 << 15) /* Monitor debug-mode enable */
 
 /* DBGWVR - Watchpoint Value Register */
 #define	DBGWVR_ADDR_MASK		(~0x3U)
 
 /* Watchpoints/breakpoints control register bitfields */
 #define	DBG_WB_CTRL_LEN_1		(0x1 << 5)
 #define	DBG_WB_CTRL_LEN_2		(0x3 << 5)
 #define	DBG_WB_CTRL_LEN_4		(0xf << 5)
 #define	DBG_WB_CTRL_LEN_8		(0xff << 5)
 #define	DBG_WB_CTRL_LEN_MASK(x)	((x) & (0xff << 5))
 #define	DBG_WB_CTRL_EXEC		(0x0 << 3)
 #define	DBG_WB_CTRL_LOAD		(0x1 << 3)
 #define	DBG_WB_CTRL_STORE		(0x2 << 3)
 #define	DBG_WB_CTRL_ACCESS_MASK(x)	((x) & (0x3 << 3))
 
 /* Common for breakpoint and watchpoint */
 #define	DBG_WB_CTRL_PL1		(0x1 << 1)
 #define	DBG_WB_CTRL_PL0		(0x2 << 1)
 #define	DBG_WB_CTRL_PLX_MASK(x)	((x) & (0x3 << 1))
 #define	DBG_WB_CTRL_E		(0x1 << 0)
 
 /*
  * Watchpoint/breakpoint helpers
  */
 #define	DBG_BKPT_BT_SLOT	0	/* Slot for branch taken */
 #define	DBG_BKPT_BNT_SLOT	1	/* Slot for branch not taken */
 
 #define	OP2_SHIFT		4
 
 /* Opc2 numbers for coprocessor instructions */
 #define	DBG_WB_BVR	4
 #define	DBG_WB_BCR	5
 #define	DBG_WB_WVR	6
 #define	DBG_WB_WCR	7
 
 #define	DBG_REG_BASE_BVR	(DBG_WB_BVR << OP2_SHIFT)
 #define	DBG_REG_BASE_BCR	(DBG_WB_BCR << OP2_SHIFT)
 #define	DBG_REG_BASE_WVR	(DBG_WB_WVR << OP2_SHIFT)
 #define	DBG_REG_BASE_WCR	(DBG_WB_WCR << OP2_SHIFT)
 
 #define	DBG_WB_READ(cn, cm, op2, val) do {					\
 	__asm __volatile("mrc p14, 0, %0, " #cn "," #cm "," #op2 : "=r" (val));	\
 } while (0)
 
 #define	DBG_WB_WRITE(cn, cm, op2, val) do {					\
 	__asm __volatile("mcr p14, 0, %0, " #cn "," #cm "," #op2 :: "r" (val));	\
 } while (0)
 
 #define	READ_WB_REG_CASE(op2, m, val)			\
 	case (((op2) << OP2_SHIFT) + m):		\
 		DBG_WB_READ(c0, c ## m, op2, val);	\
 		break
 
 #define	WRITE_WB_REG_CASE(op2, m, val)			\
 	case (((op2) << OP2_SHIFT) + m):		\
 		DBG_WB_WRITE(c0, c ## m, op2, val);	\
 		break
 
 #define	SWITCH_CASES_READ_WB_REG(op2, val)	\
 	READ_WB_REG_CASE(op2,  0, val);		\
 	READ_WB_REG_CASE(op2,  1, val);		\
 	READ_WB_REG_CASE(op2,  2, val);		\
 	READ_WB_REG_CASE(op2,  3, val);		\
 	READ_WB_REG_CASE(op2,  4, val);		\
 	READ_WB_REG_CASE(op2,  5, val);		\
 	READ_WB_REG_CASE(op2,  6, val);		\
 	READ_WB_REG_CASE(op2,  7, val);		\
 	READ_WB_REG_CASE(op2,  8, val);		\
 	READ_WB_REG_CASE(op2,  9, val);		\
 	READ_WB_REG_CASE(op2, 10, val);		\
 	READ_WB_REG_CASE(op2, 11, val);		\
 	READ_WB_REG_CASE(op2, 12, val);		\
 	READ_WB_REG_CASE(op2, 13, val);		\
 	READ_WB_REG_CASE(op2, 14, val);		\
 	READ_WB_REG_CASE(op2, 15, val)
 
 #define	SWITCH_CASES_WRITE_WB_REG(op2, val)	\
 	WRITE_WB_REG_CASE(op2,  0, val);	\
 	WRITE_WB_REG_CASE(op2,  1, val);	\
 	WRITE_WB_REG_CASE(op2,  2, val);	\
 	WRITE_WB_REG_CASE(op2,  3, val);	\
 	WRITE_WB_REG_CASE(op2,  4, val);	\
 	WRITE_WB_REG_CASE(op2,  5, val);	\
 	WRITE_WB_REG_CASE(op2,  6, val);	\
 	WRITE_WB_REG_CASE(op2,  7, val);	\
 	WRITE_WB_REG_CASE(op2,  8, val);	\
 	WRITE_WB_REG_CASE(op2,  9, val);	\
 	WRITE_WB_REG_CASE(op2, 10, val);	\
 	WRITE_WB_REG_CASE(op2, 11, val);	\
 	WRITE_WB_REG_CASE(op2, 12, val);	\
 	WRITE_WB_REG_CASE(op2, 13, val);	\
 	WRITE_WB_REG_CASE(op2, 14, val);	\
 	WRITE_WB_REG_CASE(op2, 15, val)
 
 static uint32_t
 dbg_wb_read_reg(int reg, int n)
 {
 	uint32_t val;
 
 	val = 0;
 
 	switch (reg + n) {
 	SWITCH_CASES_READ_WB_REG(DBG_WB_WVR, val);
 	SWITCH_CASES_READ_WB_REG(DBG_WB_WCR, val);
 	SWITCH_CASES_READ_WB_REG(DBG_WB_BVR, val);
 	SWITCH_CASES_READ_WB_REG(DBG_WB_BCR, val);
 	default:
 		db_printf(
 		    "trying to read from CP14 reg. using wrong opc2 %d\n",
 		    reg >> OP2_SHIFT);
 	}
 
 	return (val);
 }
 
 static void
 dbg_wb_write_reg(int reg, int n, uint32_t val)
 {
 
 	switch (reg + n) {
 	SWITCH_CASES_WRITE_WB_REG(DBG_WB_WVR, val);
 	SWITCH_CASES_WRITE_WB_REG(DBG_WB_WCR, val);
 	SWITCH_CASES_WRITE_WB_REG(DBG_WB_BVR, val);
 	SWITCH_CASES_WRITE_WB_REG(DBG_WB_BCR, val);
 	default:
 		db_printf(
 		    "trying to write to CP14 reg. using wrong opc2 %d\n",
 		    reg >> OP2_SHIFT);
 	}
 	isb();
 }
 
 static __inline boolean_t
 dbg_capable(void)
 {
 
 	return (atomic_cmpset_int(&dbg_capable_var, 0, 0) == 0);
 }
 
 boolean_t
 kdb_cpu_pc_is_singlestep(db_addr_t pc)
 {
 	/*
 	 * XXX: If the platform fails to enable its debug arch.
 	 *      there will be no stepping capabilities
 	 */
 	if (!dbg_capable())
 		return (FALSE);
 
 	if (dbg_find_slot(DBG_TYPE_BREAKPOINT, pc) != ~0U)
 		return (TRUE);
 
 	return (FALSE);
 }
 
 void
 kdb_cpu_set_singlestep(void)
 {
 	db_expr_t inst;
 	db_addr_t pc, brpc;
 	uint32_t wcr;
 	u_int i;
 
 	if (!dbg_capable())
 		return;
 
 	/*
 	 * Disable watchpoints, e.g. stepping over watched instruction will
 	 * trigger break exception instead of single-step exception and locks
 	 * CPU on that instruction for ever.
 	 */
 	for (i = 0; i < dbg_watchpoint_num; i++) {
 		wcr = dbg_wb_read_reg(DBG_REG_BASE_WCR, i);
 		if ((wcr & DBG_WB_CTRL_E) != 0) {
 			dbg_wb_write_reg(DBG_REG_BASE_WCR, i,
 			    (wcr & ~DBG_WB_CTRL_E));
 		}
 	}
 
 	pc = PC_REGS();
 
 	inst = db_get_value(pc, sizeof(pc), FALSE);
 	if (inst_branch(inst) || inst_call(inst) || inst_return(inst)) {
 		brpc = branch_taken(inst, pc);
 		dbg_setup_breakpoint(brpc, INSN_SIZE, DBG_BKPT_BT_SLOT);
 	}
 	pc = next_instr_address(pc, 0);
 	dbg_setup_breakpoint(pc, INSN_SIZE, DBG_BKPT_BNT_SLOT);
 }
 
 void
 kdb_cpu_clear_singlestep(void)
 {
 	uint32_t wvr, wcr;
 	u_int i;
 
 	if (!dbg_capable())
 		return;
 
 	dbg_remove_breakpoint(DBG_BKPT_BT_SLOT);
 	dbg_remove_breakpoint(DBG_BKPT_BNT_SLOT);
 
 	/* Restore all watchpoints */
 	for (i = 0; i < dbg_watchpoint_num; i++) {
 		wcr = dbg_wb_read_reg(DBG_REG_BASE_WCR, i);
 		wvr = dbg_wb_read_reg(DBG_REG_BASE_WVR, i);
 		/* Watchpoint considered not empty if address value is not 0 */
 		if ((wvr & DBGWVR_ADDR_MASK) != 0) {
 			dbg_wb_write_reg(DBG_REG_BASE_WCR, i,
 			    (wcr | DBG_WB_CTRL_E));
 		}
 	}
 }
 
 int
 kdb_cpu_set_watchpoint(vm_offset_t addr, size_t size, int access)
 {
 	enum dbg_access_t dbg_access;
 
 	switch (access) {
 	case KDB_DBG_ACCESS_R:
 		dbg_access = HW_WATCHPOINT_R;
 		break;
 	case KDB_DBG_ACCESS_W:
 		dbg_access = HW_WATCHPOINT_W;
 		break;
 	case KDB_DBG_ACCESS_RW:
 		dbg_access = HW_WATCHPOINT_RW;
 		break;
 	default:
 		return (EINVAL);
 	}
 
 	return (dbg_setup_watchpoint(addr, size, (enum dbg_access_t)access));
 }
 
 int
 kdb_cpu_clr_watchpoint(vm_offset_t addr, size_t size)
 {
 
 	return (dbg_remove_watchpoint(addr, size));
 }
 
 int
 dbg_setup_watchpoint(db_expr_t addr, db_expr_t size, enum dbg_access_t access)
 {
 	struct dbg_wb_conf conf;
 
 	if (access == HW_BREAKPOINT_X) {
 		db_printf("Invalid access type for watchpoint: %d\n", access);
 		return (EINVAL);
 	}
 
 	conf.address = addr;
 	conf.size = size;
 	conf.access = access;
 	conf.type = DBG_TYPE_WATCHPOINT;
 
 	return (dbg_setup_xpoint(&conf));
 }
 
 int
 dbg_remove_watchpoint(db_expr_t addr, db_expr_t size __unused)
 {
 	struct dbg_wb_conf conf;
 
 	conf.address = addr;
 	conf.type = DBG_TYPE_WATCHPOINT;
 
 	return (dbg_remove_xpoint(&conf));
 }
 
 static int
 dbg_setup_breakpoint(db_expr_t addr, db_expr_t size, u_int slot)
 {
 	struct dbg_wb_conf conf;
 
 	conf.address = addr;
 	conf.size = size;
 	conf.access = HW_BREAKPOINT_X;
 	conf.type = DBG_TYPE_BREAKPOINT;
 	conf.slot = slot;
 
 	return (dbg_setup_xpoint(&conf));
 }
 
 static int
 dbg_remove_breakpoint(u_int slot)
 {
 	struct dbg_wb_conf conf;
 
 	/* Slot already cleared. Don't recurse */
 	if (dbg_check_slot_free(DBG_TYPE_BREAKPOINT, slot))
 		return (0);
 
 	conf.slot = slot;
 	conf.type = DBG_TYPE_BREAKPOINT;
 
 	return (dbg_remove_xpoint(&conf));
 }
 
 static const char *
 dbg_watchtype_str(uint32_t type)
 {
 
 	switch (type) {
 		case DBG_WB_CTRL_EXEC:
 			return ("execute");
 		case DBG_WB_CTRL_STORE:
 			return ("write");
 		case DBG_WB_CTRL_LOAD:
 			return ("read");
 		case DBG_WB_CTRL_LOAD | DBG_WB_CTRL_STORE:
 			return ("read/write");
 		default:
 			return ("invalid");
 	}
 }
 
 static int
 dbg_watchtype_len(uint32_t len)
 {
 
 	switch (len) {
 	case DBG_WB_CTRL_LEN_1:
 		return (1);
 	case DBG_WB_CTRL_LEN_2:
 		return (2);
 	case DBG_WB_CTRL_LEN_4:
 		return (4);
 	case DBG_WB_CTRL_LEN_8:
 		return (8);
 	default:
 		return (0);
 	}
 }
 
 void
 dbg_show_watchpoint(void)
 {
 	uint32_t wcr, len, type;
 	uint32_t addr;
 	boolean_t is_enabled;
 	int i;
 
 	if (!dbg_capable()) {
 		db_printf("Architecture does not support HW "
 		    "breakpoints/watchpoints\n");
 		return;
 	}
 
 	db_printf("\nhardware watchpoints:\n");
 	db_printf("  watch    status        type  len     address              symbol\n");
 	db_printf("  -----  --------  ----------  ---  ----------  ------------------\n");
 	for (i = 0; i < dbg_watchpoint_num; i++) {
 		wcr = dbg_wb_read_reg(DBG_REG_BASE_WCR, i);
 		if ((wcr & DBG_WB_CTRL_E) != 0)
 			is_enabled = TRUE;
 		else
 			is_enabled = FALSE;
 
 		type = DBG_WB_CTRL_ACCESS_MASK(wcr);
 		len = DBG_WB_CTRL_LEN_MASK(wcr);
 		addr = dbg_wb_read_reg(DBG_REG_BASE_WVR, i) & DBGWVR_ADDR_MASK;
 		db_printf("  %-5d  %-8s  %10s  %3d  0x%08x  ", i,
 		    is_enabled ? "enabled" : "disabled",
 		    is_enabled ? dbg_watchtype_str(type) : "",
 		    is_enabled ? dbg_watchtype_len(len) : 0,
 		    addr);
 		db_printsym((db_addr_t)addr, DB_STGY_ANY);
 		db_printf("\n");
 	}
 }
 
 static boolean_t
 dbg_check_slot_free(enum dbg_t type, u_int slot)
 {
 	uint32_t cr, vr;
 	uint32_t max;
 
 	switch(type) {
 	case DBG_TYPE_BREAKPOINT:
 		max = dbg_breakpoint_num;
 		cr = DBG_REG_BASE_BCR;
 		vr = DBG_REG_BASE_BVR;
 		break;
 	case DBG_TYPE_WATCHPOINT:
 		max = dbg_watchpoint_num;
 		cr = DBG_REG_BASE_WCR;
 		vr = DBG_REG_BASE_WVR;
 		break;
 	default:
 		db_printf("%s: Unsupported event type %d\n", __func__, type);
 		return (FALSE);
 	}
 
 	if (slot >= max) {
 		db_printf("%s: Invalid slot number %d, max %d\n",
 		    __func__, slot, max - 1);
 		return (FALSE);
 	}
 
 	if ((dbg_wb_read_reg(cr, slot) & DBG_WB_CTRL_E) == 0 &&
 	    (dbg_wb_read_reg(vr, slot) & DBGWVR_ADDR_MASK) == 0)
 		return (TRUE);
 
 	return (FALSE);
 }
 
 static u_int
 dbg_find_free_slot(enum dbg_t type)
 {
 	u_int max, i;
 
 	switch(type) {
 	case DBG_TYPE_BREAKPOINT:
 		max = dbg_breakpoint_num;
 		break;
 	case DBG_TYPE_WATCHPOINT:
 		max = dbg_watchpoint_num;
 		break;
 	default:
 		db_printf("Unsupported debug type\n");
 		return (~0U);
 	}
 
 	for (i = 0; i < max; i++) {
 		if (dbg_check_slot_free(type, i))
 			return (i);
 	}
 
 	return (~0U);
 }
 
 static u_int
 dbg_find_slot(enum dbg_t type, db_expr_t addr)
 {
 	uint32_t reg_addr, reg_ctrl;
 	u_int max, i;
 
 	switch(type) {
 	case DBG_TYPE_BREAKPOINT:
 		max = dbg_breakpoint_num;
 		reg_addr = DBG_REG_BASE_BVR;
 		reg_ctrl = DBG_REG_BASE_BCR;
 		break;
 	case DBG_TYPE_WATCHPOINT:
 		max = dbg_watchpoint_num;
 		reg_addr = DBG_REG_BASE_WVR;
 		reg_ctrl = DBG_REG_BASE_WCR;
 		break;
 	default:
 		db_printf("Unsupported debug type\n");
 		return (~0U);
 	}
 
 	for (i = 0; i < max; i++) {
 		if ((dbg_wb_read_reg(reg_addr, i) == addr) &&
 		    ((dbg_wb_read_reg(reg_ctrl, i) & DBG_WB_CTRL_E) != 0))
 			return (i);
 	}
 
 	return (~0U);
 }
 
 static __inline boolean_t
 dbg_monitor_is_enabled(void)
 {
 
 	return ((cp14_dbgdscrint_get() & DBGSCR_MDBG_EN) != 0);
 }
 
 static int
 dbg_enable_monitor(void)
 {
 	uint32_t dbg_dscr;
 
 	/* Already enabled? Just return */
 	if (dbg_monitor_is_enabled())
 		return (0);
 
 	dbg_dscr = cp14_dbgdscrint_get();
 
 	switch (dbg_model) {
 	case ID_DFR0_CP_DEBUG_M_V6:
 	case ID_DFR0_CP_DEBUG_M_V6_1: /* fall through */
 		cp14_dbgdscr_v6_set(dbg_dscr | DBGSCR_MDBG_EN);
 		break;
 	case ID_DFR0_CP_DEBUG_M_V7: /* fall through */
 	case ID_DFR0_CP_DEBUG_M_V7_1:
 		cp14_dbgdscr_v7_set(dbg_dscr | DBGSCR_MDBG_EN);
 		break;
 	default:
 		break;
 	}
 	isb();
 
 	/* Verify that Monitor mode is set */
 	if (dbg_monitor_is_enabled())
 		return (0);
 
 	return (ENXIO);
 }
 
 static int
 dbg_setup_xpoint(struct dbg_wb_conf *conf)
 {
 	struct pcpu *pcpu;
 	struct dbreg *d;
 	const char *typestr;
 	uint32_t cr_size, cr_priv, cr_access;
 	uint32_t reg_ctrl, reg_addr, ctrl, addr;
 	boolean_t is_bkpt;
 	u_int cpu;
 	u_int i;
 
 	if (!dbg_capable())
 		return (ENXIO);
 
 	is_bkpt = (conf->type == DBG_TYPE_BREAKPOINT);
 	typestr = is_bkpt ? "breakpoint" : "watchpoint";
 
 	if (is_bkpt) {
 		if (dbg_breakpoint_num == 0) {
 			db_printf("Breakpoints not supported on this architecture\n");
 			return (ENXIO);
 		}
 		i = conf->slot;
 		if (!dbg_check_slot_free(DBG_TYPE_BREAKPOINT, i)) {
 			/*
 			 * This should never happen. If it does it means that
 			 * there is an erroneus scenario somewhere. Still, it can
 			 * be done but let's inform the user.
 			 */
 			db_printf("ERROR: Breakpoint already set. Replacing...\n");
 		}
 	} else {
 		i = dbg_find_free_slot(DBG_TYPE_WATCHPOINT);
 		if (i == ~0U) {
 			db_printf("Can not find slot for %s, max %d slots supported\n",
 			    typestr, dbg_watchpoint_num);
 			return (EBUSY);
 		}
 	}
 
 	/* Kernel access only */
 	cr_priv = DBG_WB_CTRL_PL1;
 
 	switch(conf->size) {
 	case 1:
 		cr_size = DBG_WB_CTRL_LEN_1;
 		break;
 	case 2:
 		cr_size = DBG_WB_CTRL_LEN_2;
 		break;
 	case 4:
 		cr_size = DBG_WB_CTRL_LEN_4;
 		break;
 	case 8:
 		cr_size = DBG_WB_CTRL_LEN_8;
 		break;
 	default:
 		db_printf("Unsupported address size for %s: %zu\n", typestr,
 		    conf->size);
 		return (EINVAL);
 	}
 
 	if (is_bkpt) {
 		cr_access = DBG_WB_CTRL_EXEC;
 		reg_ctrl = DBG_REG_BASE_BCR;
 		reg_addr = DBG_REG_BASE_BVR;
 		/* Always unlinked BKPT */
 		ctrl = (cr_size | cr_access | cr_priv | DBG_WB_CTRL_E);
 	} else {
 		switch(conf->access) {
 		case HW_WATCHPOINT_R:
 			cr_access = DBG_WB_CTRL_LOAD;
 			break;
 		case HW_WATCHPOINT_W:
 			cr_access = DBG_WB_CTRL_STORE;
 			break;
 		case HW_WATCHPOINT_RW:
 			cr_access = DBG_WB_CTRL_LOAD | DBG_WB_CTRL_STORE;
 			break;
 		default:
 			db_printf("Unsupported access type for %s: %d\n",
 			    typestr, conf->access);
 			return (EINVAL);
 		}
 
 		reg_ctrl = DBG_REG_BASE_WCR;
 		reg_addr = DBG_REG_BASE_WVR;
 		ctrl = (cr_size | cr_access | cr_priv | DBG_WB_CTRL_E);
 	}
 
 	addr = conf->address;
 
 	dbg_wb_write_reg(reg_addr, i, addr);
 	dbg_wb_write_reg(reg_ctrl, i, ctrl);
 
 	/*
 	 * Save watchpoint settings for all CPUs.
 	 * We don't need to do the same with breakpoints since HW breakpoints
 	 * are only used to perform single stepping.
 	 */
 	if (!is_bkpt) {
 		CPU_FOREACH(cpu) {
 			pcpu = pcpu_find(cpu);
 			/* Fill out the settings for watchpoint */
 			d = (struct dbreg *)pcpu->pc_dbreg;
 			d->dbg_wvr[i] = addr;
 			d->dbg_wcr[i] = ctrl;
 			/* Skip update command for the current CPU */
 			if (cpu != PCPU_GET(cpuid))
 				pcpu->pc_dbreg_cmd = PC_DBREG_CMD_LOAD;
 		}
 	}
 	/* Ensure all data is written before waking other CPUs */
 	atomic_thread_fence_rel();
 
 	return (0);
 }
 
 static int
 dbg_remove_xpoint(struct dbg_wb_conf *conf)
 {
 	struct pcpu *pcpu;
 	struct dbreg *d;
 	uint32_t reg_ctrl, reg_addr, addr;
 	boolean_t is_bkpt;
 	u_int cpu;
 	u_int i;
 
 	if (!dbg_capable())
 		return (ENXIO);
 
 	is_bkpt = (conf->type == DBG_TYPE_BREAKPOINT);
 	addr = conf->address;
 
 	if (is_bkpt) {
 		i = conf->slot;
 		reg_ctrl = DBG_REG_BASE_BCR;
 		reg_addr = DBG_REG_BASE_BVR;
 	} else {
 		i = dbg_find_slot(DBG_TYPE_WATCHPOINT, addr);
 		if (i == ~0U) {
 			db_printf("Can not find watchpoint for address 0%x\n", addr);
 			return (EINVAL);
 		}
 		reg_ctrl = DBG_REG_BASE_WCR;
 		reg_addr = DBG_REG_BASE_WVR;
 	}
 
 	dbg_wb_write_reg(reg_ctrl, i, 0);
 	dbg_wb_write_reg(reg_addr, i, 0);
 
 	/*
 	 * Save watchpoint settings for all CPUs.
 	 * We don't need to do the same with breakpoints since HW breakpoints
 	 * are only used to perform single stepping.
 	 */
 	if (!is_bkpt) {
 		CPU_FOREACH(cpu) {
 			pcpu = pcpu_find(cpu);
 			/* Fill out the settings for watchpoint */
 			d = (struct dbreg *)pcpu->pc_dbreg;
 			d->dbg_wvr[i] = 0;
 			d->dbg_wcr[i] = 0;
 			/* Skip update command for the current CPU */
 			if (cpu != PCPU_GET(cpuid))
 				pcpu->pc_dbreg_cmd = PC_DBREG_CMD_LOAD;
 		}
 		/* Ensure all data is written before waking other CPUs */
 		atomic_thread_fence_rel();
 	}
 
 	return (0);
 }
 
 static __inline uint32_t
 dbg_get_debug_model(void)
 {
 	uint32_t dbg_m;
 
 	dbg_m = ((cpuinfo.id_dfr0 & ID_DFR0_CP_DEBUG_M_MASK) >>
 	    ID_DFR0_CP_DEBUG_M_SHIFT);
 
 	return (dbg_m);
 }
 
 static __inline boolean_t
 dbg_get_ossr(void)
 {
 
 	switch (dbg_model) {
 	case ID_DFR0_CP_DEBUG_M_V7:
 		if ((cp14_dbgoslsr_get() & DBGOSLSR_OSLM0) != 0)
 			return (TRUE);
 
 		return (FALSE);
 	case ID_DFR0_CP_DEBUG_M_V7_1:
 		return (TRUE);
 	default:
 		return (FALSE);
 	}
 }
 
 static __inline boolean_t
 dbg_arch_supported(void)
 {
 	uint32_t dbg_didr;
 
 	switch (dbg_model) {
 	case ID_DFR0_CP_DEBUG_M_V6:
 	case ID_DFR0_CP_DEBUG_M_V6_1:
 		dbg_didr = cp14_dbgdidr_get();
 		/*
 		 * read-all-zeroes is used by QEMU
 		 * to indicate that ARMv6 debug support
 		 * is not implemented. Real hardware has at
 		 * least version bits set
 		 */
 		if (dbg_didr == 0)
 			return (FALSE);
 		return (TRUE);
 	case ID_DFR0_CP_DEBUG_M_V7:
 	case ID_DFR0_CP_DEBUG_M_V7_1:	/* fall through */
 		return (TRUE);
 	default:
 		/* We only support valid v6.x/v7.x modes through CP14 */
 		return (FALSE);
 	}
 }
 
 static __inline uint32_t
 dbg_get_wrp_num(void)
 {
 	uint32_t dbg_didr;
 
 	dbg_didr = cp14_dbgdidr_get();
 
 	return (DBGDIDR_WRPS_NUM(dbg_didr));
 }
 
 static __inline uint32_t
 dgb_get_brp_num(void)
 {
 	uint32_t dbg_didr;
 
 	dbg_didr = cp14_dbgdidr_get();
 
 	return (DBGDIDR_BRPS_NUM(dbg_didr));
 }
 
 static int
 dbg_reset_state(void)
 {
 	u_int cpuid;
 	size_t i;
 	int err;
 
 	cpuid = PCPU_GET(cpuid);
 	err = 0;
 
 	switch (dbg_model) {
 	case ID_DFR0_CP_DEBUG_M_V6:
 	case ID_DFR0_CP_DEBUG_M_V6_1: /* fall through */
 		/*
 		 * Arch needs monitor mode selected and enabled
 		 * to be able to access breakpoint/watchpoint registers.
 		 */
 		err = dbg_enable_monitor();
 		if (err != 0)
 			return (err);
 		goto vectr_clr;
 	case ID_DFR0_CP_DEBUG_M_V7:
 		/* Is core power domain powered up? */
 		if ((cp14_dbgprsr_get() & DBGPRSR_PU) == 0)
 			err = ENXIO;
 
 		if (err != 0)
 			break;
 
 		if (dbg_ossr)
 			goto vectr_clr;
 		break;
 	case ID_DFR0_CP_DEBUG_M_V7_1:
 		/* Is double lock set? */
 		if ((cp14_dbgosdlr_get() & DBGPRSR_DLK) != 0)
 			err = ENXIO;
 
 		break;
 	default:
 		break;
 	}
 
 	if (err != 0) {
 		db_printf("Debug facility locked (CPU%d)\n", cpuid);
 		return (err);
 	}
 
 	/*
 	 * DBGOSLAR is always implemented for v7.1 Debug Arch. however is
 	 * optional for v7 (depends on OS save and restore support).
 	 */
 	if (((dbg_model & ID_DFR0_CP_DEBUG_M_V7_1) != 0) || dbg_ossr) {
 		/*
 		 * Clear OS lock.
 		 * Writing any other value than 0xC5ACCESS will unlock.
 		 */
 		cp14_dbgoslar_set(0);
 		isb();
 	}
 
 vectr_clr:
 	/*
 	 * After reset we must ensure that DBGVCR has a defined value.
 	 * Disable all vector catch events. Safe to use - required in all
 	 * implementations.
 	 */
 	cp14_dbgvcr_set(0);
 	isb();
 
 	/*
 	 * We have limited number of {watch,break}points, each consists of
 	 * two registers:
 	 * - wcr/bcr regsiter configurates corresponding {watch,break}point
 	 *   behaviour
 	 * - wvr/bvr register keeps address we are hunting for
 	 *
 	 * Reset all breakpoints and watchpoints.
 	 */
 	for (i = 0; i < dbg_watchpoint_num; ++i) {
 		dbg_wb_write_reg(DBG_REG_BASE_WCR, i, 0);
 		dbg_wb_write_reg(DBG_REG_BASE_WVR, i, 0);
 	}
 
 	for (i = 0; i < dbg_breakpoint_num; ++i) {
 		dbg_wb_write_reg(DBG_REG_BASE_BCR, i, 0);
 		dbg_wb_write_reg(DBG_REG_BASE_BVR, i, 0);
 	}
 
 	return (0);
 }
 
 void
 dbg_monitor_init(void)
 {
 	int err;
 
 	/* Fetch ARM Debug Architecture model */
 	dbg_model = dbg_get_debug_model();
 
 	if (!dbg_arch_supported()) {
 		db_printf("ARM Debug Architecture not supported\n");
 		return;
 	}
 
 	if (bootverbose) {
 		db_printf("ARM Debug Architecture %s\n",
 		    (dbg_model == ID_DFR0_CP_DEBUG_M_V6) ? "v6" :
 		    (dbg_model == ID_DFR0_CP_DEBUG_M_V6_1) ? "v6.1" :
 		    (dbg_model == ID_DFR0_CP_DEBUG_M_V7) ? "v7" :
 		    (dbg_model == ID_DFR0_CP_DEBUG_M_V7_1) ? "v7.1" : "unknown");
 	}
 
 	/* Do we have OS Save and Restore mechanism? */
 	dbg_ossr = dbg_get_ossr();
 
 	/* Find out many breakpoints and watchpoints we can use */
 	dbg_watchpoint_num = dbg_get_wrp_num();
 	dbg_breakpoint_num = dgb_get_brp_num();
 
 	if (bootverbose) {
 		db_printf("%d watchpoints and %d breakpoints supported\n",
 		    dbg_watchpoint_num, dbg_breakpoint_num);
 	}
 
 	err = dbg_reset_state();
 	if (err == 0) {
 		err = dbg_enable_monitor();
 		if (err == 0) {
 			atomic_set_int(&dbg_capable_var, 1);
 			return;
 		}
 	}
 
 	db_printf("HW Breakpoints/Watchpoints not enabled on CPU%d\n",
 	    PCPU_GET(cpuid));
 }
 
 CTASSERT(sizeof(struct dbreg) == sizeof(((struct pcpu *)NULL)->pc_dbreg));
 
 void
 dbg_monitor_init_secondary(void)
 {
 	u_int cpuid;
 	int err;
 	/*
 	 * This flag is set on the primary CPU
 	 * and its meaning is valid for other CPUs too.
 	 */
 	if (!dbg_capable())
 		return;
 
 	cpuid = PCPU_GET(cpuid);
 
 	err = dbg_reset_state();
 	if (err != 0) {
 		/*
 		 * Something is very wrong.
 		 * WPs/BPs will not work correctly on this CPU.
 		 */
 		KASSERT(0, ("%s: Failed to reset Debug Architecture "
 		    "state on CPU%d", __func__, cpuid));
 		/* Disable HW debug capabilities for all CPUs */
 		atomic_set_int(&dbg_capable_var, 0);
 		return;
 	}
 	err = dbg_enable_monitor();
 	if (err != 0) {
 		KASSERT(0, ("%s: Failed to enable Debug Monitor"
 		    " on CPU%d", __func__, cpuid));
 		atomic_set_int(&dbg_capable_var, 0);
 	}
 }
 
 void
 dbg_resume_dbreg(void)
 {
 	struct dbreg *d;
 	u_int i;
 
 	/*
 	 * This flag is set on the primary CPU
 	 * and its meaning is valid for other CPUs too.
 	 */
 	if (!dbg_capable())
 		return;
 
 	atomic_thread_fence_acq();
 
 	switch (PCPU_GET(dbreg_cmd)) {
 	case PC_DBREG_CMD_LOAD:
 		d = (struct dbreg *)PCPU_PTR(dbreg);
 
 		/* Restore watchpoints */
 		for (i = 0; i < dbg_watchpoint_num; i++) {
 			dbg_wb_write_reg(DBG_REG_BASE_WVR, i, d->dbg_wvr[i]);
 			dbg_wb_write_reg(DBG_REG_BASE_WCR, i, d->dbg_wcr[i]);
 		}
 
 		PCPU_SET(dbreg_cmd, PC_DBREG_CMD_NONE);
 		break;
 	}
 }
diff --git a/sys/arm/arm/machdep_kdb.c b/sys/arm/arm/machdep_kdb.c
index 4dedb72edb22..b1f04c0832a1 100644
--- a/sys/arm/arm/machdep_kdb.c
+++ b/sys/arm/arm/machdep_kdb.c
@@ -1,144 +1,144 @@
 /*	$NetBSD: arm32_machdep.c,v 1.44 2004/03/24 15:34:47 atatat Exp $	*/
 
 /*-
  * Copyright (c) 2004 Olivier Houchard
  * Copyright (c) 1994-1998 Mark Brinicombe.
  * Copyright (c) 1994 Brini.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include "opt_ddb.h"
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/proc.h>
+#include <sys/reg.h>
 #include <sys/systm.h>
 
 #include <machine/cpu.h>
-#include <machine/reg.h>
 
 #ifdef DDB
 #include <ddb/ddb.h>
 
 DB_SHOW_COMMAND(cp15, db_show_cp15)
 {
 	u_int reg;
 
 	reg = cp15_midr_get();
 	db_printf("Cpu ID: 0x%08x\n", reg);
 	reg = cp15_ctr_get();
 	db_printf("Current Cache Lvl ID: 0x%08x\n",reg);
 
 	reg = cp15_sctlr_get();
 	db_printf("Ctrl: 0x%08x\n",reg);
 	reg = cp15_actlr_get();
 	db_printf("Aux Ctrl: 0x%08x\n",reg);
 
 	reg = cp15_id_pfr0_get();
 	db_printf("Processor Feat 0: 0x%08x\n", reg);
 	reg = cp15_id_pfr1_get();
 	db_printf("Processor Feat 1: 0x%08x\n", reg);
 	reg = cp15_id_dfr0_get();
 	db_printf("Debug Feat 0: 0x%08x\n", reg);
 	reg = cp15_id_afr0_get();
 	db_printf("Auxiliary Feat 0: 0x%08x\n", reg);
 	reg = cp15_id_mmfr0_get();
 	db_printf("Memory Model Feat 0: 0x%08x\n", reg);
 	reg = cp15_id_mmfr1_get();
 	db_printf("Memory Model Feat 1: 0x%08x\n", reg);
 	reg = cp15_id_mmfr2_get();
 	db_printf("Memory Model Feat 2: 0x%08x\n", reg);
 	reg = cp15_id_mmfr3_get();
 	db_printf("Memory Model Feat 3: 0x%08x\n", reg);
 	reg = cp15_ttbr_get();
 	db_printf("TTB0: 0x%08x\n", reg);
 }
 
 DB_SHOW_COMMAND(vtop, db_show_vtop)
 {
 	u_int reg;
 
 	if (have_addr) {
 		cp15_ats1cpr_set(addr);
 		reg = cp15_par_get();
 		db_printf("Physical address reg: 0x%08x\n",reg);
 	} else
 		db_printf("show vtop <virt_addr>\n");
 }
 #endif /* DDB */
 
 int
 fill_regs(struct thread *td, struct reg *regs)
 {
 	struct trapframe *tf = td->td_frame;
 	bcopy(&tf->tf_r0, regs->r, sizeof(regs->r));
 	regs->r_sp = tf->tf_usr_sp;
 	regs->r_lr = tf->tf_usr_lr;
 	regs->r_pc = tf->tf_pc;
 	regs->r_cpsr = tf->tf_spsr;
 	return (0);
 }
 
 int
 fill_fpregs(struct thread *td, struct fpreg *regs)
 {
 	bzero(regs, sizeof(*regs));
 	return (0);
 }
 
 int
 set_regs(struct thread *td, struct reg *regs)
 {
 	struct trapframe *tf = td->td_frame;
 
 	bcopy(regs->r, &tf->tf_r0, sizeof(regs->r));
 	tf->tf_usr_sp = regs->r_sp;
 	tf->tf_usr_lr = regs->r_lr;
 	tf->tf_pc = regs->r_pc;
 	tf->tf_spsr &=  ~PSR_FLAGS;
 	tf->tf_spsr |= regs->r_cpsr & PSR_FLAGS;
 	return (0);
 }
 
 int
 set_fpregs(struct thread *td, struct fpreg *regs)
 {
 	return (0);
 }
 
 int
 fill_dbregs(struct thread *td, struct dbreg *regs)
 {
 
 	bzero(regs, sizeof(*regs));
 	return (0);
 }
 
 int
 set_dbregs(struct thread *td, struct dbreg *regs)
 {
 	return (0);
 }
diff --git a/sys/arm/include/reg.h b/sys/arm/include/reg.h
index ab70ae128d39..8aee07c9b480 100644
--- a/sys/arm/include/reg.h
+++ b/sys/arm/include/reg.h
@@ -1,42 +1,33 @@
 /*	$NetBSD: reg.h,v 1.2 2001/02/23 21:23:52 reinoud Exp $	*/
 /* $FreeBSD$ */
 #ifndef MACHINE_REG_H
 #define MACHINE_REG_H
 
 struct reg {
 	unsigned int r[13];
 	unsigned int r_sp;
 	unsigned int r_lr;
 	unsigned int r_pc;
 	unsigned int r_cpsr;
 };
 
 struct fp_extended_precision {
 	u_int32_t fp_exponent;
 	u_int32_t fp_mantissa_hi;
 	u_int32_t fp_mantissa_lo;
 };
 
 typedef struct fp_extended_precision fp_reg_t;
 
 struct fpreg {
 	unsigned int fpr_fpsr;
 	fp_reg_t fpr[8];
 };
 
 struct dbreg {
 #define	ARM_WR_MAX	16 /* Maximum number of watchpoint registers */
 	unsigned int dbg_wcr[ARM_WR_MAX]; /* Watchpoint Control Registers */
 	unsigned int dbg_wvr[ARM_WR_MAX]; /* Watchpoint Value Registers */
 };
 
-#ifdef _KERNEL
-int     fill_regs(struct thread *, struct reg *);
-int     set_regs(struct thread *, struct reg *);
-int     fill_fpregs(struct thread *, struct fpreg *);
-int     set_fpregs(struct thread *, struct fpreg *);
-int     fill_dbregs(struct thread *, struct dbreg *);
-int     set_dbregs(struct thread *, struct dbreg *);
-#endif
-
 #endif /* !MACHINE_REG_H */
diff --git a/sys/arm64/arm64/machdep.c b/sys/arm64/arm64/machdep.c
index bce3baf8e1ec..ee6f5157f5f3 100644
--- a/sys/arm64/arm64/machdep.c
+++ b/sys/arm64/arm64/machdep.c
@@ -1,1511 +1,1511 @@
 /*-
  * Copyright (c) 2014 Andrew Turner
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  */
 
 #include "opt_acpi.h"
 #include "opt_platform.h"
 #include "opt_ddb.h"
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/buf.h>
 #include <sys/bus.h>
 #include <sys/cons.h>
 #include <sys/cpu.h>
 #include <sys/csan.h>
 #include <sys/devmap.h>
 #include <sys/efi.h>
 #include <sys/exec.h>
 #include <sys/imgact.h>
 #include <sys/kdb.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/limits.h>
 #include <sys/linker.h>
 #include <sys/msgbuf.h>
 #include <sys/pcpu.h>
 #include <sys/physmem.h>
 #include <sys/proc.h>
 #include <sys/ptrace.h>
 #include <sys/reboot.h>
+#include <sys/reg.h>
 #include <sys/rwlock.h>
 #include <sys/sched.h>
 #include <sys/signalvar.h>
 #include <sys/syscallsubr.h>
 #include <sys/sysent.h>
 #include <sys/sysproto.h>
 #include <sys/ucontext.h>
 #include <sys/vdso.h>
 #include <sys/vmmeter.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
 #include <vm/vm_phys.h>
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 #include <vm/vm_pager.h>
 
 #include <machine/armreg.h>
 #include <machine/cpu.h>
 #include <machine/debug_monitor.h>
 #include <machine/kdb.h>
 #include <machine/machdep.h>
 #include <machine/metadata.h>
 #include <machine/md_var.h>
 #include <machine/pcb.h>
-#include <machine/reg.h>
 #include <machine/undefined.h>
 #include <machine/vmparam.h>
 
 #ifdef VFP
 #include <machine/vfp.h>
 #endif
 
 #ifdef DEV_ACPI
 #include <contrib/dev/acpica/include/acpi.h>
 #include <machine/acpica_machdep.h>
 #endif
 
 #ifdef FDT
 #include <dev/fdt/fdt_common.h>
 #include <dev/ofw/openfirm.h>
 #endif
 
 static void get_fpcontext(struct thread *td, mcontext_t *mcp);
 static void set_fpcontext(struct thread *td, mcontext_t *mcp);
 
 enum arm64_bus arm64_bus_method = ARM64_BUS_NONE;
 
 struct pcpu __pcpu[MAXCPU];
 
 static struct trapframe proc0_tf;
 
 int early_boot = 1;
 int cold = 1;
 static int boot_el;
 
 struct kva_md_info kmi;
 
 int64_t dczva_line_size;	/* The size of cache line the dc zva zeroes */
 int has_pan;
 
 /*
  * Physical address of the EFI System Table. Stashed from the metadata hints
  * passed into the kernel and used by the EFI code to call runtime services.
  */
 vm_paddr_t efi_systbl_phys;
 static struct efi_map_header *efihdr;
 
 /* pagezero_* implementations are provided in support.S */
 void pagezero_simple(void *);
 void pagezero_cache(void *);
 
 /* pagezero_simple is default pagezero */
 void (*pagezero)(void *p) = pagezero_simple;
 
 int (*apei_nmi)(void);
 
 static void
 pan_setup(void)
 {
 	uint64_t id_aa64mfr1;
 
 	id_aa64mfr1 = READ_SPECIALREG(id_aa64mmfr1_el1);
 	if (ID_AA64MMFR1_PAN_VAL(id_aa64mfr1) != ID_AA64MMFR1_PAN_NONE)
 		has_pan = 1;
 }
 
 void
 pan_enable(void)
 {
 
 	/*
 	 * The LLVM integrated assembler doesn't understand the PAN
 	 * PSTATE field. Because of this we need to manually create
 	 * the instruction in an asm block. This is equivalent to:
 	 * msr pan, #1
 	 *
 	 * This sets the PAN bit, stopping the kernel from accessing
 	 * memory when userspace can also access it unless the kernel
 	 * uses the userspace load/store instructions.
 	 */
 	if (has_pan) {
 		WRITE_SPECIALREG(sctlr_el1,
 		    READ_SPECIALREG(sctlr_el1) & ~SCTLR_SPAN);
 		__asm __volatile(".inst 0xd500409f | (0x1 << 8)");
 	}
 }
 
 bool
 has_hyp(void)
 {
 
 	return (boot_el == 2);
 }
 
 static void
 cpu_startup(void *dummy)
 {
 	vm_paddr_t size;
 	int i;
 
 	printf("real memory  = %ju (%ju MB)\n", ptoa((uintmax_t)realmem),
 	    ptoa((uintmax_t)realmem) / 1024 / 1024);
 
 	if (bootverbose) {
 		printf("Physical memory chunk(s):\n");
 		for (i = 0; phys_avail[i + 1] != 0; i += 2) {
 			size = phys_avail[i + 1] - phys_avail[i];
 			printf("%#016jx - %#016jx, %ju bytes (%ju pages)\n",
 			    (uintmax_t)phys_avail[i],
 			    (uintmax_t)phys_avail[i + 1] - 1,
 			    (uintmax_t)size, (uintmax_t)size / PAGE_SIZE);
 		}
 	}
 
 	printf("avail memory = %ju (%ju MB)\n",
 	    ptoa((uintmax_t)vm_free_count()),
 	    ptoa((uintmax_t)vm_free_count()) / 1024 / 1024);
 
 	undef_init();
 	install_cpu_errata();
 
 	vm_ksubmap_init(&kmi);
 	bufinit();
 	vm_pager_bufferinit();
 }
 
 SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL);
 
 static void
 late_ifunc_resolve(void *dummy __unused)
 {
 	link_elf_late_ireloc();
 }
 SYSINIT(late_ifunc_resolve, SI_SUB_CPU, SI_ORDER_ANY, late_ifunc_resolve, NULL);
 
 int
 cpu_idle_wakeup(int cpu)
 {
 
 	return (0);
 }
 
 int
 fill_regs(struct thread *td, struct reg *regs)
 {
 	struct trapframe *frame;
 
 	frame = td->td_frame;
 	regs->sp = frame->tf_sp;
 	regs->lr = frame->tf_lr;
 	regs->elr = frame->tf_elr;
 	regs->spsr = frame->tf_spsr;
 
 	memcpy(regs->x, frame->tf_x, sizeof(regs->x));
 
 #ifdef COMPAT_FREEBSD32
 	/*
 	 * We may be called here for a 32bits process, if we're using a
 	 * 64bits debugger. If so, put PC and SPSR where it expects it.
 	 */
 	if (SV_PROC_FLAG(td->td_proc, SV_ILP32)) {
 		regs->x[15] = frame->tf_elr;
 		regs->x[16] = frame->tf_spsr;
 	}
 #endif
 	return (0);
 }
 
 int
 set_regs(struct thread *td, struct reg *regs)
 {
 	struct trapframe *frame;
 
 	frame = td->td_frame;
 	frame->tf_sp = regs->sp;
 	frame->tf_lr = regs->lr;
 	frame->tf_elr = regs->elr;
 	frame->tf_spsr &= ~PSR_FLAGS;
 	frame->tf_spsr |= regs->spsr & PSR_FLAGS;
 
 	memcpy(frame->tf_x, regs->x, sizeof(frame->tf_x));
 
 #ifdef COMPAT_FREEBSD32
 	if (SV_PROC_FLAG(td->td_proc, SV_ILP32)) {
 		/*
 		 * We may be called for a 32bits process if we're using
 		 * a 64bits debugger. If so, get PC and SPSR from where
 		 * it put it.
 		 */
 		frame->tf_elr = regs->x[15];
 		frame->tf_spsr = regs->x[16] & PSR_FLAGS;
 	}
 #endif
 	return (0);
 }
 
 int
 fill_fpregs(struct thread *td, struct fpreg *regs)
 {
 #ifdef VFP
 	struct pcb *pcb;
 
 	pcb = td->td_pcb;
 	if ((pcb->pcb_fpflags & PCB_FP_STARTED) != 0) {
 		/*
 		 * If we have just been running VFP instructions we will
 		 * need to save the state to memcpy it below.
 		 */
 		if (td == curthread)
 			vfp_save_state(td, pcb);
 
 		KASSERT(pcb->pcb_fpusaved == &pcb->pcb_fpustate,
 		    ("Called fill_fpregs while the kernel is using the VFP"));
 		memcpy(regs->fp_q, pcb->pcb_fpustate.vfp_regs,
 		    sizeof(regs->fp_q));
 		regs->fp_cr = pcb->pcb_fpustate.vfp_fpcr;
 		regs->fp_sr = pcb->pcb_fpustate.vfp_fpsr;
 	} else
 #endif
 		memset(regs, 0, sizeof(*regs));
 	return (0);
 }
 
 int
 set_fpregs(struct thread *td, struct fpreg *regs)
 {
 #ifdef VFP
 	struct pcb *pcb;
 
 	pcb = td->td_pcb;
 	KASSERT(pcb->pcb_fpusaved == &pcb->pcb_fpustate,
 	    ("Called set_fpregs while the kernel is using the VFP"));
 	memcpy(pcb->pcb_fpustate.vfp_regs, regs->fp_q, sizeof(regs->fp_q));
 	pcb->pcb_fpustate.vfp_fpcr = regs->fp_cr;
 	pcb->pcb_fpustate.vfp_fpsr = regs->fp_sr;
 #endif
 	return (0);
 }
 
 int
 fill_dbregs(struct thread *td, struct dbreg *regs)
 {
 	struct debug_monitor_state *monitor;
 	int i;
 	uint8_t debug_ver, nbkpts, nwtpts;
 
 	memset(regs, 0, sizeof(*regs));
 
 	extract_user_id_field(ID_AA64DFR0_EL1, ID_AA64DFR0_DebugVer_SHIFT,
 	    &debug_ver);
 	extract_user_id_field(ID_AA64DFR0_EL1, ID_AA64DFR0_BRPs_SHIFT,
 	    &nbkpts);
 	extract_user_id_field(ID_AA64DFR0_EL1, ID_AA64DFR0_WRPs_SHIFT,
 	    &nwtpts);
 
 	/*
 	 * The BRPs field contains the number of breakpoints - 1. Armv8-A
 	 * allows the hardware to provide 2-16 breakpoints so this won't
 	 * overflow an 8 bit value. The same applies to the WRPs field.
 	 */
 	nbkpts++;
 	nwtpts++;
 
 	regs->db_debug_ver = debug_ver;
 	regs->db_nbkpts = nbkpts;
 	regs->db_nwtpts = nwtpts;
 
 	monitor = &td->td_pcb->pcb_dbg_regs;
 	if ((monitor->dbg_flags & DBGMON_ENABLED) != 0) {
 		for (i = 0; i < nbkpts; i++) {
 			regs->db_breakregs[i].dbr_addr = monitor->dbg_bvr[i];
 			regs->db_breakregs[i].dbr_ctrl = monitor->dbg_bcr[i];
 		}
 		for (i = 0; i < nwtpts; i++) {
 			regs->db_watchregs[i].dbw_addr = monitor->dbg_wvr[i];
 			regs->db_watchregs[i].dbw_ctrl = monitor->dbg_wcr[i];
 		}
 	}
 
 	return (0);
 }
 
 int
 set_dbregs(struct thread *td, struct dbreg *regs)
 {
 	struct debug_monitor_state *monitor;
 	uint64_t addr;
 	uint32_t ctrl;
 	int count;
 	int i;
 
 	monitor = &td->td_pcb->pcb_dbg_regs;
 	count = 0;
 	monitor->dbg_enable_count = 0;
 
 	for (i = 0; i < DBG_BRP_MAX; i++) {
 		addr = regs->db_breakregs[i].dbr_addr;
 		ctrl = regs->db_breakregs[i].dbr_ctrl;
 
 		/*
 		 * Don't let the user set a breakpoint on a kernel or
 		 * non-canonical user address.
 		 */
 		if (addr >= VM_MAXUSER_ADDRESS)
 			return (EINVAL);
 
 		/*
 		 * The lowest 2 bits are ignored, so record the effective
 		 * address.
 		 */
 		addr = rounddown2(addr, 4);
 
 		/*
 		 * Some control fields are ignored, and other bits reserved.
 		 * Only unlinked, address-matching breakpoints are supported.
 		 *
 		 * XXX: fields that appear unvalidated, such as BAS, have
 		 * constrained undefined behaviour. If the user mis-programs
 		 * these, there is no risk to the system.
 		 */
 		ctrl &= DBG_BCR_EN | DBG_BCR_PMC | DBG_BCR_BAS;
 		if ((ctrl & DBG_BCR_EN) != 0) {
 			/* Only target EL0. */
 			if ((ctrl & DBG_BCR_PMC) != DBG_BCR_PMC_EL0)
 				return (EINVAL);
 
 			monitor->dbg_enable_count++;
 		}
 
 		monitor->dbg_bvr[i] = addr;
 		monitor->dbg_bcr[i] = ctrl;
 	}
 
 	for (i = 0; i < DBG_WRP_MAX; i++) {
 		addr = regs->db_watchregs[i].dbw_addr;
 		ctrl = regs->db_watchregs[i].dbw_ctrl;
 
 		/*
 		 * Don't let the user set a watchpoint on a kernel or
 		 * non-canonical user address.
 		 */
 		if (addr >= VM_MAXUSER_ADDRESS)
 			return (EINVAL);
 
 		/*
 		 * Some control fields are ignored, and other bits reserved.
 		 * Only unlinked watchpoints are supported.
 		 */
 		ctrl &= DBG_WCR_EN | DBG_WCR_PAC | DBG_WCR_LSC | DBG_WCR_BAS |
 		    DBG_WCR_MASK;
 
 		if ((ctrl & DBG_WCR_EN) != 0) {
 			/* Only target EL0. */
 			if ((ctrl & DBG_WCR_PAC) != DBG_WCR_PAC_EL0)
 				return (EINVAL);
 
 			/* Must set at least one of the load/store bits. */
 			if ((ctrl & DBG_WCR_LSC) == 0)
 				return (EINVAL);
 
 			/*
 			 * When specifying the address range with BAS, the MASK
 			 * field must be zero.
 			 */
 			if ((ctrl & DBG_WCR_BAS) != DBG_WCR_BAS_MASK &&
 			    (ctrl & DBG_WCR_MASK) != 0)
 				return (EINVAL);
 
 			monitor->dbg_enable_count++;
 		}
 		monitor->dbg_wvr[i] = addr;
 		monitor->dbg_wcr[i] = ctrl;
 	}
 
 	if (monitor->dbg_enable_count > 0)
 		monitor->dbg_flags |= DBGMON_ENABLED;
 
 	return (0);
 }
 
 #ifdef COMPAT_FREEBSD32
 int
 fill_regs32(struct thread *td, struct reg32 *regs)
 {
 	int i;
 	struct trapframe *tf;
 
 	tf = td->td_frame;
 	for (i = 0; i < 13; i++)
 		regs->r[i] = tf->tf_x[i];
 	/* For arm32, SP is r13 and LR is r14 */
 	regs->r_sp = tf->tf_x[13];
 	regs->r_lr = tf->tf_x[14];
 	regs->r_pc = tf->tf_elr;
 	regs->r_cpsr = tf->tf_spsr;
 
 	return (0);
 }
 
 int
 set_regs32(struct thread *td, struct reg32 *regs)
 {
 	int i;
 	struct trapframe *tf;
 
 	tf = td->td_frame;
 	for (i = 0; i < 13; i++)
 		tf->tf_x[i] = regs->r[i];
 	/* For arm 32, SP is r13 an LR is r14 */
 	tf->tf_x[13] = regs->r_sp;
 	tf->tf_x[14] = regs->r_lr;
 	tf->tf_elr = regs->r_pc;
 	tf->tf_spsr = regs->r_cpsr;
 
 	return (0);
 }
 
 /* XXX fill/set dbregs/fpregs are stubbed on 32-bit arm. */
 int
 fill_fpregs32(struct thread *td, struct fpreg32 *regs)
 {
 
 	memset(regs, 0, sizeof(*regs));
 	return (0);
 }
 
 int
 set_fpregs32(struct thread *td, struct fpreg32 *regs)
 {
 
 	return (0);
 }
 
 int
 fill_dbregs32(struct thread *td, struct dbreg32 *regs)
 {
 
 	memset(regs, 0, sizeof(*regs));
 	return (0);
 }
 
 int
 set_dbregs32(struct thread *td, struct dbreg32 *regs)
 {
 
 	return (0);
 }
 #endif
 
 int
 ptrace_set_pc(struct thread *td, u_long addr)
 {
 
 	td->td_frame->tf_elr = addr;
 	return (0);
 }
 
 int
 ptrace_single_step(struct thread *td)
 {
 
 	td->td_frame->tf_spsr |= PSR_SS;
 	td->td_pcb->pcb_flags |= PCB_SINGLE_STEP;
 	return (0);
 }
 
 int
 ptrace_clear_single_step(struct thread *td)
 {
 
 	td->td_frame->tf_spsr &= ~PSR_SS;
 	td->td_pcb->pcb_flags &= ~PCB_SINGLE_STEP;
 	return (0);
 }
 
 void
 exec_setregs(struct thread *td, struct image_params *imgp, uintptr_t stack)
 {
 	struct trapframe *tf = td->td_frame;
 	struct pcb *pcb = td->td_pcb;
 
 	memset(tf, 0, sizeof(struct trapframe));
 
 	tf->tf_x[0] = stack;
 	tf->tf_sp = STACKALIGN(stack);
 	tf->tf_lr = imgp->entry_addr;
 	tf->tf_elr = imgp->entry_addr;
 
 	td->td_pcb->pcb_tpidr_el0 = 0;
 	td->td_pcb->pcb_tpidrro_el0 = 0;
 	WRITE_SPECIALREG(tpidrro_el0, 0);
 	WRITE_SPECIALREG(tpidr_el0, 0);
 
 #ifdef VFP
 	vfp_reset_state(td, pcb);
 #endif
 
 	/*
 	 * Clear debug register state. It is not applicable to the new process.
 	 */
 	bzero(&pcb->pcb_dbg_regs, sizeof(pcb->pcb_dbg_regs));
 }
 
 /* Sanity check these are the same size, they will be memcpy'd to and fro */
 CTASSERT(sizeof(((struct trapframe *)0)->tf_x) ==
     sizeof((struct gpregs *)0)->gp_x);
 CTASSERT(sizeof(((struct trapframe *)0)->tf_x) ==
     sizeof((struct reg *)0)->x);
 
 int
 get_mcontext(struct thread *td, mcontext_t *mcp, int clear_ret)
 {
 	struct trapframe *tf = td->td_frame;
 
 	if (clear_ret & GET_MC_CLEAR_RET) {
 		mcp->mc_gpregs.gp_x[0] = 0;
 		mcp->mc_gpregs.gp_spsr = tf->tf_spsr & ~PSR_C;
 	} else {
 		mcp->mc_gpregs.gp_x[0] = tf->tf_x[0];
 		mcp->mc_gpregs.gp_spsr = tf->tf_spsr;
 	}
 
 	memcpy(&mcp->mc_gpregs.gp_x[1], &tf->tf_x[1],
 	    sizeof(mcp->mc_gpregs.gp_x[1]) * (nitems(mcp->mc_gpregs.gp_x) - 1));
 
 	mcp->mc_gpregs.gp_sp = tf->tf_sp;
 	mcp->mc_gpregs.gp_lr = tf->tf_lr;
 	mcp->mc_gpregs.gp_elr = tf->tf_elr;
 	get_fpcontext(td, mcp);
 
 	return (0);
 }
 
 int
 set_mcontext(struct thread *td, mcontext_t *mcp)
 {
 	struct trapframe *tf = td->td_frame;
 	uint32_t spsr;
 
 	spsr = mcp->mc_gpregs.gp_spsr;
 	if ((spsr & PSR_M_MASK) != PSR_M_EL0t ||
 	    (spsr & PSR_AARCH32) != 0 ||
 	    (spsr & PSR_DAIF) != (td->td_frame->tf_spsr & PSR_DAIF))
 		return (EINVAL); 
 
 	memcpy(tf->tf_x, mcp->mc_gpregs.gp_x, sizeof(tf->tf_x));
 
 	tf->tf_sp = mcp->mc_gpregs.gp_sp;
 	tf->tf_lr = mcp->mc_gpregs.gp_lr;
 	tf->tf_elr = mcp->mc_gpregs.gp_elr;
 	tf->tf_spsr = mcp->mc_gpregs.gp_spsr;
 	set_fpcontext(td, mcp);
 
 	return (0);
 }
 
 static void
 get_fpcontext(struct thread *td, mcontext_t *mcp)
 {
 #ifdef VFP
 	struct pcb *curpcb;
 
 	critical_enter();
 
 	curpcb = curthread->td_pcb;
 
 	if ((curpcb->pcb_fpflags & PCB_FP_STARTED) != 0) {
 		/*
 		 * If we have just been running VFP instructions we will
 		 * need to save the state to memcpy it below.
 		 */
 		vfp_save_state(td, curpcb);
 
 		KASSERT(curpcb->pcb_fpusaved == &curpcb->pcb_fpustate,
 		    ("Called get_fpcontext while the kernel is using the VFP"));
 		KASSERT((curpcb->pcb_fpflags & ~PCB_FP_USERMASK) == 0,
 		    ("Non-userspace FPU flags set in get_fpcontext"));
 		memcpy(mcp->mc_fpregs.fp_q, curpcb->pcb_fpustate.vfp_regs,
 		    sizeof(mcp->mc_fpregs.fp_q));
 		mcp->mc_fpregs.fp_cr = curpcb->pcb_fpustate.vfp_fpcr;
 		mcp->mc_fpregs.fp_sr = curpcb->pcb_fpustate.vfp_fpsr;
 		mcp->mc_fpregs.fp_flags = curpcb->pcb_fpflags;
 		mcp->mc_flags |= _MC_FP_VALID;
 	}
 
 	critical_exit();
 #endif
 }
 
 static void
 set_fpcontext(struct thread *td, mcontext_t *mcp)
 {
 #ifdef VFP
 	struct pcb *curpcb;
 
 	critical_enter();
 
 	if ((mcp->mc_flags & _MC_FP_VALID) != 0) {
 		curpcb = curthread->td_pcb;
 
 		/*
 		 * Discard any vfp state for the current thread, we
 		 * are about to override it.
 		 */
 		vfp_discard(td);
 
 		KASSERT(curpcb->pcb_fpusaved == &curpcb->pcb_fpustate,
 		    ("Called set_fpcontext while the kernel is using the VFP"));
 		memcpy(curpcb->pcb_fpustate.vfp_regs, mcp->mc_fpregs.fp_q,
 		    sizeof(mcp->mc_fpregs.fp_q));
 		curpcb->pcb_fpustate.vfp_fpcr = mcp->mc_fpregs.fp_cr;
 		curpcb->pcb_fpustate.vfp_fpsr = mcp->mc_fpregs.fp_sr;
 		curpcb->pcb_fpflags = mcp->mc_fpregs.fp_flags & PCB_FP_USERMASK;
 	}
 
 	critical_exit();
 #endif
 }
 
 void
 cpu_idle(int busy)
 {
 
 	spinlock_enter();
 	if (!busy)
 		cpu_idleclock();
 	if (!sched_runnable())
 		__asm __volatile(
 		    "dsb sy \n"
 		    "wfi    \n");
 	if (!busy)
 		cpu_activeclock();
 	spinlock_exit();
 }
 
 void
 cpu_halt(void)
 {
 
 	/* We should have shutdown by now, if not enter a low power sleep */
 	intr_disable();
 	while (1) {
 		__asm __volatile("wfi");
 	}
 }
 
 /*
  * Flush the D-cache for non-DMA I/O so that the I-cache can
  * be made coherent later.
  */
 void
 cpu_flush_dcache(void *ptr, size_t len)
 {
 
 	/* ARM64TODO TBD */
 }
 
 /* Get current clock frequency for the given CPU ID. */
 int
 cpu_est_clockrate(int cpu_id, uint64_t *rate)
 {
 	struct pcpu *pc;
 
 	pc = pcpu_find(cpu_id);
 	if (pc == NULL || rate == NULL)
 		return (EINVAL);
 
 	if (pc->pc_clock == 0)
 		return (EOPNOTSUPP);
 
 	*rate = pc->pc_clock;
 	return (0);
 }
 
 void
 cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size)
 {
 
 	pcpu->pc_acpi_id = 0xffffffff;
 	pcpu->pc_mpidr = 0xffffffff;
 }
 
 void
 spinlock_enter(void)
 {
 	struct thread *td;
 	register_t daif;
 
 	td = curthread;
 	if (td->td_md.md_spinlock_count == 0) {
 		daif = intr_disable();
 		td->td_md.md_spinlock_count = 1;
 		td->td_md.md_saved_daif = daif;
 		critical_enter();
 	} else
 		td->td_md.md_spinlock_count++;
 }
 
 void
 spinlock_exit(void)
 {
 	struct thread *td;
 	register_t daif;
 
 	td = curthread;
 	daif = td->td_md.md_saved_daif;
 	td->td_md.md_spinlock_count--;
 	if (td->td_md.md_spinlock_count == 0) {
 		critical_exit();
 		intr_restore(daif);
 	}
 }
 
 #ifndef	_SYS_SYSPROTO_H_
 struct sigreturn_args {
 	ucontext_t *ucp;
 };
 #endif
 
 int
 sys_sigreturn(struct thread *td, struct sigreturn_args *uap)
 {
 	ucontext_t uc;
 	int error;
 
 	if (copyin(uap->sigcntxp, &uc, sizeof(uc)))
 		return (EFAULT);
 
 	error = set_mcontext(td, &uc.uc_mcontext);
 	if (error != 0)
 		return (error);
 
 	/* Restore signal mask. */
 	kern_sigprocmask(td, SIG_SETMASK, &uc.uc_sigmask, NULL, 0);
 
 	return (EJUSTRETURN);
 }
 
 /*
  * Construct a PCB from a trapframe. This is called from kdb_trap() where
  * we want to start a backtrace from the function that caused us to enter
  * the debugger. We have the context in the trapframe, but base the trace
  * on the PCB. The PCB doesn't have to be perfect, as long as it contains
  * enough for a backtrace.
  */
 void
 makectx(struct trapframe *tf, struct pcb *pcb)
 {
 	int i;
 
 	for (i = 0; i < nitems(pcb->pcb_x); i++)
 		pcb->pcb_x[i] = tf->tf_x[i];
 
 	/* NB: pcb_lr is the PC, see PC_REGS() in db_machdep.h */
 	pcb->pcb_lr = tf->tf_elr;
 	pcb->pcb_sp = tf->tf_sp;
 }
 
 void
 sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
 {
 	struct thread *td;
 	struct proc *p;
 	struct trapframe *tf;
 	struct sigframe *fp, frame;
 	struct sigacts *psp;
 	struct sysentvec *sysent;
 	int onstack, sig;
 
 	td = curthread;
 	p = td->td_proc;
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 
 	sig = ksi->ksi_signo;
 	psp = p->p_sigacts;
 	mtx_assert(&psp->ps_mtx, MA_OWNED);
 
 	tf = td->td_frame;
 	onstack = sigonstack(tf->tf_sp);
 
 	CTR4(KTR_SIG, "sendsig: td=%p (%s) catcher=%p sig=%d", td, p->p_comm,
 	    catcher, sig);
 
 	/* Allocate and validate space for the signal handler context. */
 	if ((td->td_pflags & TDP_ALTSTACK) != 0 && !onstack &&
 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
 		fp = (struct sigframe *)((uintptr_t)td->td_sigstk.ss_sp +
 		    td->td_sigstk.ss_size);
 #if defined(COMPAT_43)
 		td->td_sigstk.ss_flags |= SS_ONSTACK;
 #endif
 	} else {
 		fp = (struct sigframe *)td->td_frame->tf_sp;
 	}
 
 	/* Make room, keeping the stack aligned */
 	fp--;
 	fp = (struct sigframe *)STACKALIGN(fp);
 
 	/* Fill in the frame to copy out */
 	bzero(&frame, sizeof(frame));
 	get_mcontext(td, &frame.sf_uc.uc_mcontext, 0);
 	frame.sf_si = ksi->ksi_info;
 	frame.sf_uc.uc_sigmask = *mask;
 	frame.sf_uc.uc_stack = td->td_sigstk;
 	frame.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK) != 0 ?
 	    (onstack ? SS_ONSTACK : 0) : SS_DISABLE;
 	mtx_unlock(&psp->ps_mtx);
 	PROC_UNLOCK(td->td_proc);
 
 	/* Copy the sigframe out to the user's stack. */
 	if (copyout(&frame, fp, sizeof(*fp)) != 0) {
 		/* Process has trashed its stack. Kill it. */
 		CTR2(KTR_SIG, "sendsig: sigexit td=%p fp=%p", td, fp);
 		PROC_LOCK(p);
 		sigexit(td, SIGILL);
 	}
 
 	tf->tf_x[0]= sig;
 	tf->tf_x[1] = (register_t)&fp->sf_si;
 	tf->tf_x[2] = (register_t)&fp->sf_uc;
 
 	tf->tf_elr = (register_t)catcher;
 	tf->tf_sp = (register_t)fp;
 	sysent = p->p_sysent;
 	if (sysent->sv_sigcode_base != 0)
 		tf->tf_lr = (register_t)sysent->sv_sigcode_base;
 	else
 		tf->tf_lr = (register_t)(sysent->sv_psstrings -
 		    *(sysent->sv_szsigcode));
 
 	CTR3(KTR_SIG, "sendsig: return td=%p pc=%#x sp=%#x", td, tf->tf_elr,
 	    tf->tf_sp);
 
 	PROC_LOCK(p);
 	mtx_lock(&psp->ps_mtx);
 }
 
 static void
 init_proc0(vm_offset_t kstack)
 {
 	struct pcpu *pcpup = &__pcpu[0];
 
 	proc_linkup0(&proc0, &thread0);
 	thread0.td_kstack = kstack;
 	thread0.td_kstack_pages = KSTACK_PAGES;
 	thread0.td_pcb = (struct pcb *)(thread0.td_kstack +
 	    thread0.td_kstack_pages * PAGE_SIZE) - 1;
 	thread0.td_pcb->pcb_fpflags = 0;
 	thread0.td_pcb->pcb_fpusaved = &thread0.td_pcb->pcb_fpustate;
 	thread0.td_pcb->pcb_vfpcpu = UINT_MAX;
 	thread0.td_frame = &proc0_tf;
 	pcpup->pc_curpcb = thread0.td_pcb;
 
 	/*
 	 * Unmask SError exceptions. They are used to signal a RAS failure,
 	 * or other hardware error.
 	 */
 	serror_enable();
 }
 
 typedef struct {
 	uint32_t type;
 	uint64_t phys_start;
 	uint64_t virt_start;
 	uint64_t num_pages;
 	uint64_t attr;
 } EFI_MEMORY_DESCRIPTOR;
 
 typedef void (*efi_map_entry_cb)(struct efi_md *);
 
 static void
 foreach_efi_map_entry(struct efi_map_header *efihdr, efi_map_entry_cb cb)
 {
 	struct efi_md *map, *p;
 	size_t efisz;
 	int ndesc, i;
 
 	/*
 	 * Memory map data provided by UEFI via the GetMemoryMap
 	 * Boot Services API.
 	 */
 	efisz = (sizeof(struct efi_map_header) + 0xf) & ~0xf;
 	map = (struct efi_md *)((uint8_t *)efihdr + efisz); 
 
 	if (efihdr->descriptor_size == 0)
 		return;
 	ndesc = efihdr->memory_size / efihdr->descriptor_size;
 
 	for (i = 0, p = map; i < ndesc; i++,
 	    p = efi_next_descriptor(p, efihdr->descriptor_size)) {
 		cb(p);
 	}
 }
 
 static void
 exclude_efi_map_entry(struct efi_md *p)
 {
 
 	switch (p->md_type) {
 	case EFI_MD_TYPE_CODE:
 	case EFI_MD_TYPE_DATA:
 	case EFI_MD_TYPE_BS_CODE:
 	case EFI_MD_TYPE_BS_DATA:
 	case EFI_MD_TYPE_FREE:
 		/*
 		 * We're allowed to use any entry with these types.
 		 */
 		break;
 	default:
 		physmem_exclude_region(p->md_phys, p->md_pages * PAGE_SIZE,
 		    EXFLAG_NOALLOC);
 	}
 }
 
 static void
 exclude_efi_map_entries(struct efi_map_header *efihdr)
 {
 
 	foreach_efi_map_entry(efihdr, exclude_efi_map_entry);
 }
 
 static void
 add_efi_map_entry(struct efi_md *p)
 {
 
 	switch (p->md_type) {
 	case EFI_MD_TYPE_RT_DATA:
 		/*
 		 * Runtime data will be excluded after the DMAP
 		 * region is created to stop it from being added
 		 * to phys_avail.
 		 */
 	case EFI_MD_TYPE_CODE:
 	case EFI_MD_TYPE_DATA:
 	case EFI_MD_TYPE_BS_CODE:
 	case EFI_MD_TYPE_BS_DATA:
 	case EFI_MD_TYPE_FREE:
 		/*
 		 * We're allowed to use any entry with these types.
 		 */
 		physmem_hardware_region(p->md_phys,
 		    p->md_pages * PAGE_SIZE);
 		break;
 	}
 }
 
 static void
 add_efi_map_entries(struct efi_map_header *efihdr)
 {
 
 	foreach_efi_map_entry(efihdr, add_efi_map_entry);
 }
 
 static void
 print_efi_map_entry(struct efi_md *p)
 {
 	const char *type;
 	static const char *types[] = {
 		"Reserved",
 		"LoaderCode",
 		"LoaderData",
 		"BootServicesCode",
 		"BootServicesData",
 		"RuntimeServicesCode",
 		"RuntimeServicesData",
 		"ConventionalMemory",
 		"UnusableMemory",
 		"ACPIReclaimMemory",
 		"ACPIMemoryNVS",
 		"MemoryMappedIO",
 		"MemoryMappedIOPortSpace",
 		"PalCode",
 		"PersistentMemory"
 	};
 
 	if (p->md_type < nitems(types))
 		type = types[p->md_type];
 	else
 		type = "<INVALID>";
 	printf("%23s %012lx %012lx %08lx ", type, p->md_phys,
 	    p->md_virt, p->md_pages);
 	if (p->md_attr & EFI_MD_ATTR_UC)
 		printf("UC ");
 	if (p->md_attr & EFI_MD_ATTR_WC)
 		printf("WC ");
 	if (p->md_attr & EFI_MD_ATTR_WT)
 		printf("WT ");
 	if (p->md_attr & EFI_MD_ATTR_WB)
 		printf("WB ");
 	if (p->md_attr & EFI_MD_ATTR_UCE)
 		printf("UCE ");
 	if (p->md_attr & EFI_MD_ATTR_WP)
 		printf("WP ");
 	if (p->md_attr & EFI_MD_ATTR_RP)
 		printf("RP ");
 	if (p->md_attr & EFI_MD_ATTR_XP)
 		printf("XP ");
 	if (p->md_attr & EFI_MD_ATTR_NV)
 		printf("NV ");
 	if (p->md_attr & EFI_MD_ATTR_MORE_RELIABLE)
 		printf("MORE_RELIABLE ");
 	if (p->md_attr & EFI_MD_ATTR_RO)
 		printf("RO ");
 	if (p->md_attr & EFI_MD_ATTR_RT)
 		printf("RUNTIME");
 	printf("\n");
 }
 
 static void
 print_efi_map_entries(struct efi_map_header *efihdr)
 {
 
 	printf("%23s %12s %12s %8s %4s\n",
 	    "Type", "Physical", "Virtual", "#Pages", "Attr");
 	foreach_efi_map_entry(efihdr, print_efi_map_entry);
 }
 
 #ifdef FDT
 static void
 try_load_dtb(caddr_t kmdp)
 {
 	vm_offset_t dtbp;
 
 	dtbp = MD_FETCH(kmdp, MODINFOMD_DTBP, vm_offset_t);
 #if defined(FDT_DTB_STATIC)
 	/*
 	 * In case the device tree blob was not retrieved (from metadata) try
 	 * to use the statically embedded one.
 	 */
 	if (dtbp == 0)
 		dtbp = (vm_offset_t)&fdt_static_dtb;
 #endif
 
 	if (dtbp == (vm_offset_t)NULL) {
 #ifndef TSLOG
 		printf("ERROR loading DTB\n");
 #endif
 		return;
 	}
 
 	if (OF_install(OFW_FDT, 0) == FALSE)
 		panic("Cannot install FDT");
 
 	if (OF_init((void *)dtbp) != 0)
 		panic("OF_init failed with the found device tree");
 
 	parse_fdt_bootargs();
 }
 #endif
 
 static bool
 bus_probe(void)
 {
 	bool has_acpi, has_fdt;
 	char *order, *env;
 
 	has_acpi = has_fdt = false;
 
 #ifdef FDT
 	has_fdt = (OF_peer(0) != 0);
 #endif
 #ifdef DEV_ACPI
 	has_acpi = (AcpiOsGetRootPointer() != 0);
 #endif
 
 	env = kern_getenv("kern.cfg.order");
 	if (env != NULL) {
 		order = env;
 		while (order != NULL) {
 			if (has_acpi &&
 			    strncmp(order, "acpi", 4) == 0 &&
 			    (order[4] == ',' || order[4] == '\0')) {
 				arm64_bus_method = ARM64_BUS_ACPI;
 				break;
 			}
 			if (has_fdt &&
 			    strncmp(order, "fdt", 3) == 0 &&
 			    (order[3] == ',' || order[3] == '\0')) {
 				arm64_bus_method = ARM64_BUS_FDT;
 				break;
 			}
 			order = strchr(order, ',');
 		}
 		freeenv(env);
 
 		/* If we set the bus method it is valid */
 		if (arm64_bus_method != ARM64_BUS_NONE)
 			return (true);
 	}
 	/* If no order or an invalid order was set use the default */
 	if (arm64_bus_method == ARM64_BUS_NONE) {
 		if (has_fdt)
 			arm64_bus_method = ARM64_BUS_FDT;
 		else if (has_acpi)
 			arm64_bus_method = ARM64_BUS_ACPI;
 	}
 
 	/*
 	 * If no option was set the default is valid, otherwise we are
 	 * setting one to get cninit() working, then calling panic to tell
 	 * the user about the invalid bus setup.
 	 */
 	return (env == NULL);
 }
 
 static void
 cache_setup(void)
 {
 	int dczva_line_shift;
 	uint32_t dczid_el0;
 
 	identify_cache(READ_SPECIALREG(ctr_el0));
 
 	dczid_el0 = READ_SPECIALREG(dczid_el0);
 
 	/* Check if dc zva is not prohibited */
 	if (dczid_el0 & DCZID_DZP)
 		dczva_line_size = 0;
 	else {
 		/* Same as with above calculations */
 		dczva_line_shift = DCZID_BS_SIZE(dczid_el0);
 		dczva_line_size = sizeof(int) << dczva_line_shift;
 
 		/* Change pagezero function */
 		pagezero = pagezero_cache;
 	}
 }
 
 int
 memory_mapping_mode(vm_paddr_t pa)
 {
 	struct efi_md *map, *p;
 	size_t efisz;
 	int ndesc, i;
 
 	if (efihdr == NULL)
 		return (VM_MEMATTR_WRITE_BACK);
 
 	/*
 	 * Memory map data provided by UEFI via the GetMemoryMap
 	 * Boot Services API.
 	 */
 	efisz = (sizeof(struct efi_map_header) + 0xf) & ~0xf;
 	map = (struct efi_md *)((uint8_t *)efihdr + efisz);
 
 	if (efihdr->descriptor_size == 0)
 		return (VM_MEMATTR_WRITE_BACK);
 	ndesc = efihdr->memory_size / efihdr->descriptor_size;
 
 	for (i = 0, p = map; i < ndesc; i++,
 	    p = efi_next_descriptor(p, efihdr->descriptor_size)) {
 		if (pa < p->md_phys ||
 		    pa >= p->md_phys + p->md_pages * EFI_PAGE_SIZE)
 			continue;
 		if (p->md_type == EFI_MD_TYPE_IOMEM ||
 		    p->md_type == EFI_MD_TYPE_IOPORT)
 			return (VM_MEMATTR_DEVICE);
 		else if ((p->md_attr & EFI_MD_ATTR_WB) != 0 ||
 		    p->md_type == EFI_MD_TYPE_RECLAIM)
 			return (VM_MEMATTR_WRITE_BACK);
 		else if ((p->md_attr & EFI_MD_ATTR_WT) != 0)
 			return (VM_MEMATTR_WRITE_THROUGH);
 		else if ((p->md_attr & EFI_MD_ATTR_WC) != 0)
 			return (VM_MEMATTR_WRITE_COMBINING);
 		break;
 	}
 
 	return (VM_MEMATTR_DEVICE);
 }
 
 void
 initarm(struct arm64_bootparams *abp)
 {
 	struct efi_fb *efifb;
 	struct pcpu *pcpup;
 	char *env;
 #ifdef FDT
 	struct mem_region mem_regions[FDT_MEM_REGIONS];
 	int mem_regions_sz;
 	phandle_t root;
 	char dts_version[255];
 #endif
 	vm_offset_t lastaddr;
 	caddr_t kmdp;
 	bool valid;
 
 	TSRAW(&thread0, TS_ENTER, __func__, NULL);
 
 	boot_el = abp->boot_el;
 
 	/* Parse loader or FDT boot parametes. Determine last used address. */
 	lastaddr = parse_boot_param(abp);
 
 	/* Find the kernel address */
 	kmdp = preload_search_by_type("elf kernel");
 	if (kmdp == NULL)
 		kmdp = preload_search_by_type("elf64 kernel");
 
 	identify_cpu(0);
 	update_special_regs(0);
 
 	link_elf_ireloc(kmdp);
 	try_load_dtb(kmdp);
 
 	efi_systbl_phys = MD_FETCH(kmdp, MODINFOMD_FW_HANDLE, vm_paddr_t);
 
 	/* Load the physical memory ranges */
 	efihdr = (struct efi_map_header *)preload_search_info(kmdp,
 	    MODINFO_METADATA | MODINFOMD_EFI_MAP);
 	if (efihdr != NULL)
 		add_efi_map_entries(efihdr);
 #ifdef FDT
 	else {
 		/* Grab physical memory regions information from device tree. */
 		if (fdt_get_mem_regions(mem_regions, &mem_regions_sz,
 		    NULL) != 0)
 			panic("Cannot get physical memory regions");
 		physmem_hardware_regions(mem_regions, mem_regions_sz);
 	}
 	if (fdt_get_reserved_mem(mem_regions, &mem_regions_sz) == 0)
 		physmem_exclude_regions(mem_regions, mem_regions_sz,
 		    EXFLAG_NODUMP | EXFLAG_NOALLOC);
 #endif
 
 	/* Exclude the EFI framebuffer from our view of physical memory. */
 	efifb = (struct efi_fb *)preload_search_info(kmdp,
 	    MODINFO_METADATA | MODINFOMD_EFI_FB);
 	if (efifb != NULL)
 		physmem_exclude_region(efifb->fb_addr, efifb->fb_size,
 		    EXFLAG_NOALLOC);
 
 	/* Set the pcpu data, this is needed by pmap_bootstrap */
 	pcpup = &__pcpu[0];
 	pcpu_init(pcpup, 0, sizeof(struct pcpu));
 
 	/*
 	 * Set the pcpu pointer with a backup in tpidr_el1 to be
 	 * loaded when entering the kernel from userland.
 	 */
 	__asm __volatile(
 	    "mov x18, %0 \n"
 	    "msr tpidr_el1, %0" :: "r"(pcpup));
 
 	PCPU_SET(curthread, &thread0);
 	PCPU_SET(midr, get_midr());
 
 	/* Do basic tuning, hz etc */
 	init_param1();
 
 	cache_setup();
 	pan_setup();
 
 	/* Bootstrap enough of pmap  to enter the kernel proper */
 	pmap_bootstrap(abp->kern_l0pt, abp->kern_l1pt,
 	    KERNBASE - abp->kern_delta, lastaddr - KERNBASE);
 	/* Exclude entries neexed in teh DMAP region, but not phys_avail */
 	if (efihdr != NULL)
 		exclude_efi_map_entries(efihdr);
 	physmem_init_kernel_globals();
 
 	devmap_bootstrap(0, NULL);
 
 	valid = bus_probe();
 
 	cninit();
 	set_ttbr0(abp->kern_ttbr0);
 	cpu_tlb_flushID();
 
 	if (!valid)
 		panic("Invalid bus configuration: %s",
 		    kern_getenv("kern.cfg.order"));
 
 	/*
 	 * Dump the boot metadata. We have to wait for cninit() since console
 	 * output is required. If it's grossly incorrect the kernel will never
 	 * make it this far.
 	 */
 	if (getenv_is_true("debug.dump_modinfo_at_boot"))
 		preload_dump();
 
 	init_proc0(abp->kern_stack);
 	msgbufinit(msgbufp, msgbufsize);
 	mutex_init();
 	init_param2(physmem);
 
 	dbg_init();
 	kdb_init();
 	pan_enable();
 
 	kcsan_cpu_init(0);
 
 	env = kern_getenv("kernelname");
 	if (env != NULL)
 		strlcpy(kernelname, env, sizeof(kernelname));
 
 #ifdef FDT
 	if (arm64_bus_method == ARM64_BUS_FDT) {
 		root = OF_finddevice("/");
 		if (OF_getprop(root, "freebsd,dts-version", dts_version, sizeof(dts_version)) > 0) {
 			if (strcmp(LINUX_DTS_VERSION, dts_version) != 0)
 				printf("WARNING: DTB version is %s while kernel expects %s, "
 				    "please update the DTB in the ESP\n",
 				    dts_version,
 				    LINUX_DTS_VERSION);
 		} else {
 			printf("WARNING: Cannot find freebsd,dts-version property, "
 			    "cannot check DTB compliance\n");
 		}
 	}
 #endif
 
 	if (boothowto & RB_VERBOSE) {
 		if (efihdr != NULL)
 			print_efi_map_entries(efihdr);
 		physmem_print_tables();
 	}
 
 	early_boot = 0;
 
 	TSEXIT();
 }
 
 void
 dbg_init(void)
 {
 
 	/* Clear OS lock */
 	WRITE_SPECIALREG(oslar_el1, 0);
 
 	/* This permits DDB to use debug registers for watchpoints. */
 	dbg_monitor_init();
 
 	/* TODO: Eventually will need to initialize debug registers here. */
 }
 
 #ifdef DDB
 #include <ddb/ddb.h>
 
 DB_SHOW_COMMAND(specialregs, db_show_spregs)
 {
 #define	PRINT_REG(reg)	\
     db_printf(__STRING(reg) " = %#016lx\n", READ_SPECIALREG(reg))
 
 	PRINT_REG(actlr_el1);
 	PRINT_REG(afsr0_el1);
 	PRINT_REG(afsr1_el1);
 	PRINT_REG(aidr_el1);
 	PRINT_REG(amair_el1);
 	PRINT_REG(ccsidr_el1);
 	PRINT_REG(clidr_el1);
 	PRINT_REG(contextidr_el1);
 	PRINT_REG(cpacr_el1);
 	PRINT_REG(csselr_el1);
 	PRINT_REG(ctr_el0);
 	PRINT_REG(currentel);
 	PRINT_REG(daif);
 	PRINT_REG(dczid_el0);
 	PRINT_REG(elr_el1);
 	PRINT_REG(esr_el1);
 	PRINT_REG(far_el1);
 #if 0
 	/* ARM64TODO: Enable VFP before reading floating-point registers */
 	PRINT_REG(fpcr);
 	PRINT_REG(fpsr);
 #endif
 	PRINT_REG(id_aa64afr0_el1);
 	PRINT_REG(id_aa64afr1_el1);
 	PRINT_REG(id_aa64dfr0_el1);
 	PRINT_REG(id_aa64dfr1_el1);
 	PRINT_REG(id_aa64isar0_el1);
 	PRINT_REG(id_aa64isar1_el1);
 	PRINT_REG(id_aa64pfr0_el1);
 	PRINT_REG(id_aa64pfr1_el1);
 	PRINT_REG(id_afr0_el1);
 	PRINT_REG(id_dfr0_el1);
 	PRINT_REG(id_isar0_el1);
 	PRINT_REG(id_isar1_el1);
 	PRINT_REG(id_isar2_el1);
 	PRINT_REG(id_isar3_el1);
 	PRINT_REG(id_isar4_el1);
 	PRINT_REG(id_isar5_el1);
 	PRINT_REG(id_mmfr0_el1);
 	PRINT_REG(id_mmfr1_el1);
 	PRINT_REG(id_mmfr2_el1);
 	PRINT_REG(id_mmfr3_el1);
 #if 0
 	/* Missing from llvm */
 	PRINT_REG(id_mmfr4_el1);
 #endif
 	PRINT_REG(id_pfr0_el1);
 	PRINT_REG(id_pfr1_el1);
 	PRINT_REG(isr_el1);
 	PRINT_REG(mair_el1);
 	PRINT_REG(midr_el1);
 	PRINT_REG(mpidr_el1);
 	PRINT_REG(mvfr0_el1);
 	PRINT_REG(mvfr1_el1);
 	PRINT_REG(mvfr2_el1);
 	PRINT_REG(revidr_el1);
 	PRINT_REG(sctlr_el1);
 	PRINT_REG(sp_el0);
 	PRINT_REG(spsel);
 	PRINT_REG(spsr_el1);
 	PRINT_REG(tcr_el1);
 	PRINT_REG(tpidr_el0);
 	PRINT_REG(tpidr_el1);
 	PRINT_REG(tpidrro_el0);
 	PRINT_REG(ttbr0_el1);
 	PRINT_REG(ttbr1_el1);
 	PRINT_REG(vbar_el1);
 #undef PRINT_REG
 }
 
 DB_SHOW_COMMAND(vtop, db_show_vtop)
 {
 	uint64_t phys;
 
 	if (have_addr) {
 		phys = arm64_address_translate_s1e1r(addr);
 		db_printf("EL1 physical address reg (read):  0x%016lx\n", phys);
 		phys = arm64_address_translate_s1e1w(addr);
 		db_printf("EL1 physical address reg (write): 0x%016lx\n", phys);
 		phys = arm64_address_translate_s1e0r(addr);
 		db_printf("EL0 physical address reg (read):  0x%016lx\n", phys);
 		phys = arm64_address_translate_s1e0w(addr);
 		db_printf("EL0 physical address reg (write): 0x%016lx\n", phys);
 	} else
 		db_printf("show vtop <virt_addr>\n");
 }
 #endif
diff --git a/sys/arm64/include/reg.h b/sys/arm64/include/reg.h
index 9cfc5ea1d437..d7c3354bfd2f 100644
--- a/sys/arm64/include/reg.h
+++ b/sys/arm64/include/reg.h
@@ -1,106 +1,86 @@
 /*-
  * Copyright (c) 2014 Andrew Turner
  * Copyright (c) 2014-2015 The FreeBSD Foundation
  * All rights reserved.
  *
  * This software was developed by Andrew Turner under
  * sponsorship from the FreeBSD Foundation.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef	_MACHINE_REG_H_
 #define	_MACHINE_REG_H_
 
 struct reg {
 	uint64_t x[30];
 	uint64_t lr;
 	uint64_t sp;
 	uint64_t elr;
 	uint32_t spsr;
 };
 
 struct reg32 {
 	unsigned int r[13];
 	unsigned int r_sp;
 	unsigned int r_lr;
 	unsigned int r_pc;
 	unsigned int r_cpsr;
 };
 
 struct fpreg {
 	__uint128_t	fp_q[32];
 	uint32_t	fp_sr;
 	uint32_t	fp_cr;
 };
 
 struct fpreg32 {
 	int dummy;
 };
 
 struct dbreg {
 	uint8_t		db_debug_ver;
 	uint8_t		db_nbkpts;
 	uint8_t		db_nwtpts;
 	uint8_t		db_pad[5];
 
 	struct {
 		uint64_t dbr_addr;
 		uint32_t dbr_ctrl;
 		uint32_t dbr_pad;
 	} db_breakregs[16];
 	struct {
 		uint64_t dbw_addr;
 		uint32_t dbw_ctrl;
 		uint32_t dbw_pad;
 	} db_watchregs[16];
 };
 
 struct dbreg32 {
 	int dummy;
 };
 
 #define	__HAVE_REG32
 
-#ifdef _KERNEL
-/*
- * XXX these interfaces are MI, so they should be declared in a MI place.
- */
-int	fill_regs(struct thread *, struct reg *);
-int	set_regs(struct thread *, struct reg *);
-int	fill_fpregs(struct thread *, struct fpreg *);
-int	set_fpregs(struct thread *, struct fpreg *);
-int	fill_dbregs(struct thread *, struct dbreg *);
-int	set_dbregs(struct thread *, struct dbreg *);
-#ifdef COMPAT_FREEBSD32
-int	fill_regs32(struct thread *, struct reg32 *);
-int	set_regs32(struct thread *, struct reg32 *);
-int	fill_fpregs32(struct thread *, struct fpreg32 *);
-int	set_fpregs32(struct thread *, struct fpreg32 *);
-int	fill_dbregs32(struct thread *, struct dbreg32 *);
-int	set_dbregs32(struct thread *, struct dbreg32 *);
-#endif
-#endif
-
 #endif /* !_MACHINE_REG_H_ */
diff --git a/sys/arm64/linux/linux_machdep.c b/sys/arm64/linux/linux_machdep.c
index 9ccda3c3d299..fae5bd11653b 100644
--- a/sys/arm64/linux/linux_machdep.c
+++ b/sys/arm64/linux/linux_machdep.c
@@ -1,148 +1,147 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2018 Turing Robotic Industries Inc.
  * Copyright (c) 2000 Marcel Moolenaar
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/fcntl.h>
 #include <sys/imgact.h>
 #include <sys/ktr.h>
 #include <sys/proc.h>
+#include <sys/reg.h>
 #include <sys/sdt.h>
 
 #include <security/audit/audit.h>
 
-#include <machine/reg.h>
-
 #include <arm64/linux/linux.h>
 #include <arm64/linux/linux_proto.h>
 #include <compat/linux/linux_dtrace.h>
 #include <compat/linux/linux_emul.h>
 #include <compat/linux/linux_fork.h>
 #include <compat/linux/linux_misc.h>
 #include <compat/linux/linux_mmap.h>
 #include <compat/linux/linux_util.h>
 
 /* DTrace init */
 LIN_SDT_PROVIDER_DECLARE(LINUX_DTRACE);
 
 /* DTrace probes */
 LIN_SDT_PROBE_DEFINE0(machdep, linux_mmap2, todo);
 
 /*
  * LINUXTODO: deduplicate; linux_execve is common across archs, except that on
  * amd64 compat linuxulator it calls freebsd32_exec_copyin_args.
  */
 int
 linux_execve(struct thread *td, struct linux_execve_args *uap)
 {
 	struct image_args eargs;
 	char *path;
 	int error;
 
 	if (!LUSECONVPATH(td)) {
 		error = exec_copyin_args(&eargs, uap->path, UIO_USERSPACE,
 		    uap->argp, uap->envp);
 	} else {
 		LCONVPATHEXIST(td, uap->path, &path);
 		error = exec_copyin_args(&eargs, path, UIO_SYSSPACE,
 		    uap->argp, uap->envp);
 		LFREEPATH(path);
 	}
 	if (error == 0)
 		error = linux_common_execve(td, &eargs);
 	AUDIT_SYSCALL_EXIT(error == EJUSTRETURN ? 0 : error, td);
 	return (error);
 }
 
 int
 linux_set_upcall(struct thread *td, register_t stack)
 {
 
 	if (stack)
 		td->td_frame->tf_sp = stack;
 
 	/*
 	 * The newly created Linux thread returns
 	 * to the user space by the same path that a parent does.
 	 */
 	td->td_frame->tf_x[0] = 0;
 	return (0);
 }
 
 /* LINUXTODO: deduplicate arm64 linux_mmap2 */
 int
 linux_mmap2(struct thread *td, struct linux_mmap2_args *uap)
 {
 
 	LIN_SDT_PROBE0(machdep, linux_mmap2, todo);
 	return (linux_mmap_common(td, PTROUT(uap->addr), uap->len, uap->prot,
 	    uap->flags, uap->fd, uap->pgoff));
 }
 
 int
 linux_mprotect(struct thread *td, struct linux_mprotect_args *uap)
 {
 
 	return (linux_mprotect_common(td, PTROUT(uap->addr), uap->len,
 	    uap->prot));
 }
 
 int
 linux_madvise(struct thread *td, struct linux_madvise_args *uap)
 {
 
 	return (linux_madvise_common(td, PTROUT(uap->addr), uap->len, uap->behav));
 }
 
 int
 linux_set_cloned_tls(struct thread *td, void *desc)
 {
 
 	if ((uint64_t)desc >= VM_MAXUSER_ADDRESS)
 		return (EPERM);
 
 	return (cpu_set_user_tls(td, desc));
 }
 
 void
 bsd_to_linux_regset(struct reg *b_reg, struct linux_pt_regset *l_regset)
 {
 
 	KASSERT(sizeof(l_regset->x) == sizeof(b_reg->x) + sizeof(l_ulong),
 	    ("%s: size mismatch\n", __func__));
 	memcpy(l_regset->x, b_reg->x, sizeof(b_reg->x));
 
 	l_regset->x[30] = b_reg->lr;
 	l_regset->sp = b_reg->sp;
 	l_regset->pc = b_reg->elr;
 	l_regset->cpsr = b_reg->spsr;
 }
diff --git a/sys/cddl/contrib/opensolaris/uts/intel/dtrace/fasttrap_isa.c b/sys/cddl/contrib/opensolaris/uts/intel/dtrace/fasttrap_isa.c
index 502273b73157..f92070d8088d 100644
--- a/sys/cddl/contrib/opensolaris/uts/intel/dtrace/fasttrap_isa.c
+++ b/sys/cddl/contrib/opensolaris/uts/intel/dtrace/fasttrap_isa.c
@@ -1,1841 +1,1841 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  *
  * Portions Copyright 2010 The FreeBSD Foundation
  *
  * $FreeBSD$
  */
 
 /*
  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
 #include <sys/fasttrap_isa.h>
 #include <sys/fasttrap_impl.h>
 #include <sys/dtrace.h>
 #include <sys/dtrace_impl.h>
 #include <sys/cmn_err.h>
 #include <sys/types.h>
 #include <sys/dtrace_bsd.h>
 #include <sys/proc.h>
+#include <sys/reg.h>
 #include <sys/rmlock.h>
 #include <cddl/dev/dtrace/dtrace_cddl.h>
 #include <cddl/dev/dtrace/x86/regset.h>
 #include <machine/segments.h>
-#include <machine/reg.h>
 #include <machine/pcb.h>
 #include <machine/trap.h>
 #include <sys/sysmacros.h>
 #include <sys/ptrace.h>
 
 #ifdef __i386__
 #define	r_rax	r_eax
 #define	r_rbx	r_ebx
 #define	r_rip	r_eip
 #define	r_rflags r_eflags
 #define	r_rsp	r_esp
 #define	r_rbp	r_ebp
 #endif
 
 /*
  * Lossless User-Land Tracing on x86
  * ---------------------------------
  *
  * The execution of most instructions is not dependent on the address; for
  * these instructions it is sufficient to copy them into the user process's
  * address space and execute them. To effectively single-step an instruction
  * in user-land, we copy out the following sequence of instructions to scratch
  * space in the user thread's ulwp_t structure.
  *
  * We then set the program counter (%eip or %rip) to point to this scratch
  * space. Once execution resumes, the original instruction is executed and
  * then control flow is redirected to what was originally the subsequent
  * instruction. If the kernel attemps to deliver a signal while single-
  * stepping, the signal is deferred and the program counter is moved into the
  * second sequence of instructions. The second sequence ends in a trap into
  * the kernel where the deferred signal is then properly handled and delivered.
  *
  * For instructions whose execute is position dependent, we perform simple
  * emulation. These instructions are limited to control transfer
  * instructions in 32-bit mode, but in 64-bit mode there's the added wrinkle
  * of %rip-relative addressing that means that almost any instruction can be
  * position dependent. For all the details on how we emulate generic
  * instructions included %rip-relative instructions, see the code in
  * fasttrap_pid_probe() below where we handle instructions of type
  * FASTTRAP_T_COMMON (under the header: Generic Instruction Tracing).
  */
 
 #define	FASTTRAP_MODRM_MOD(modrm)	(((modrm) >> 6) & 0x3)
 #define	FASTTRAP_MODRM_REG(modrm)	(((modrm) >> 3) & 0x7)
 #define	FASTTRAP_MODRM_RM(modrm)	((modrm) & 0x7)
 #define	FASTTRAP_MODRM(mod, reg, rm)	(((mod) << 6) | ((reg) << 3) | (rm))
 
 #define	FASTTRAP_SIB_SCALE(sib)		(((sib) >> 6) & 0x3)
 #define	FASTTRAP_SIB_INDEX(sib)		(((sib) >> 3) & 0x7)
 #define	FASTTRAP_SIB_BASE(sib)		((sib) & 0x7)
 
 #define	FASTTRAP_REX_W(rex)		(((rex) >> 3) & 1)
 #define	FASTTRAP_REX_R(rex)		(((rex) >> 2) & 1)
 #define	FASTTRAP_REX_X(rex)		(((rex) >> 1) & 1)
 #define	FASTTRAP_REX_B(rex)		((rex) & 1)
 #define	FASTTRAP_REX(w, r, x, b)	\
 	(0x40 | ((w) << 3) | ((r) << 2) | ((x) << 1) | (b))
 
 /*
  * Single-byte op-codes.
  */
 #define	FASTTRAP_PUSHL_EBP	0x55
 
 #define	FASTTRAP_JO		0x70
 #define	FASTTRAP_JNO		0x71
 #define	FASTTRAP_JB		0x72
 #define	FASTTRAP_JAE		0x73
 #define	FASTTRAP_JE		0x74
 #define	FASTTRAP_JNE		0x75
 #define	FASTTRAP_JBE		0x76
 #define	FASTTRAP_JA		0x77
 #define	FASTTRAP_JS		0x78
 #define	FASTTRAP_JNS		0x79
 #define	FASTTRAP_JP		0x7a
 #define	FASTTRAP_JNP		0x7b
 #define	FASTTRAP_JL		0x7c
 #define	FASTTRAP_JGE		0x7d
 #define	FASTTRAP_JLE		0x7e
 #define	FASTTRAP_JG		0x7f
 
 #define	FASTTRAP_NOP		0x90
 
 #define	FASTTRAP_MOV_EAX	0xb8
 #define	FASTTRAP_MOV_ECX	0xb9
 
 #define	FASTTRAP_RET16		0xc2
 #define	FASTTRAP_RET		0xc3
 
 #define	FASTTRAP_LOOPNZ		0xe0
 #define	FASTTRAP_LOOPZ		0xe1
 #define	FASTTRAP_LOOP		0xe2
 #define	FASTTRAP_JCXZ		0xe3
 
 #define	FASTTRAP_CALL		0xe8
 #define	FASTTRAP_JMP32		0xe9
 #define	FASTTRAP_JMP8		0xeb
 
 #define	FASTTRAP_INT3		0xcc
 #define	FASTTRAP_INT		0xcd
 
 #define	FASTTRAP_2_BYTE_OP	0x0f
 #define	FASTTRAP_GROUP5_OP	0xff
 
 /*
  * Two-byte op-codes (second byte only).
  */
 #define	FASTTRAP_0F_JO		0x80
 #define	FASTTRAP_0F_JNO		0x81
 #define	FASTTRAP_0F_JB		0x82
 #define	FASTTRAP_0F_JAE		0x83
 #define	FASTTRAP_0F_JE		0x84
 #define	FASTTRAP_0F_JNE		0x85
 #define	FASTTRAP_0F_JBE		0x86
 #define	FASTTRAP_0F_JA		0x87
 #define	FASTTRAP_0F_JS		0x88
 #define	FASTTRAP_0F_JNS		0x89
 #define	FASTTRAP_0F_JP		0x8a
 #define	FASTTRAP_0F_JNP		0x8b
 #define	FASTTRAP_0F_JL		0x8c
 #define	FASTTRAP_0F_JGE		0x8d
 #define	FASTTRAP_0F_JLE		0x8e
 #define	FASTTRAP_0F_JG		0x8f
 
 #define	FASTTRAP_EFLAGS_OF	0x800
 #define	FASTTRAP_EFLAGS_DF	0x400
 #define	FASTTRAP_EFLAGS_SF	0x080
 #define	FASTTRAP_EFLAGS_ZF	0x040
 #define	FASTTRAP_EFLAGS_AF	0x010
 #define	FASTTRAP_EFLAGS_PF	0x004
 #define	FASTTRAP_EFLAGS_CF	0x001
 
 /*
  * Instruction prefixes.
  */
 #define	FASTTRAP_PREFIX_OPERAND	0x66
 #define	FASTTRAP_PREFIX_ADDRESS	0x67
 #define	FASTTRAP_PREFIX_CS	0x2E
 #define	FASTTRAP_PREFIX_DS	0x3E
 #define	FASTTRAP_PREFIX_ES	0x26
 #define	FASTTRAP_PREFIX_FS	0x64
 #define	FASTTRAP_PREFIX_GS	0x65
 #define	FASTTRAP_PREFIX_SS	0x36
 #define	FASTTRAP_PREFIX_LOCK	0xF0
 #define	FASTTRAP_PREFIX_REP	0xF3
 #define	FASTTRAP_PREFIX_REPNE	0xF2
 
 #define	FASTTRAP_NOREG	0xff
 
 /*
  * Map between instruction register encodings and the kernel constants which
  * correspond to indicies into struct regs.
  */
 #ifdef __amd64
 static const uint8_t regmap[16] = {
 	REG_RAX, REG_RCX, REG_RDX, REG_RBX, REG_RSP, REG_RBP, REG_RSI, REG_RDI,
 	REG_R8, REG_R9, REG_R10, REG_R11, REG_R12, REG_R13, REG_R14, REG_R15,
 };
 #else
 static const uint8_t regmap[8] = {
 	EAX, ECX, EDX, EBX, UESP, EBP, ESI, EDI
 };
 #endif
 
 static ulong_t fasttrap_getreg(struct reg *, uint_t);
 
 static uint64_t
 fasttrap_anarg(struct reg *rp, int function_entry, int argno)
 {
 	uint64_t value = 0;
 	int shift = function_entry ? 1 : 0;
 
 #ifdef __amd64
 	if (curproc->p_model == DATAMODEL_LP64) {
 		uintptr_t *stack;
 
 		/*
 		 * In 64-bit mode, the first six arguments are stored in
 		 * registers.
 		 */
 		if (argno < 6)
 			switch (argno) {
 			case 0:
 				return (rp->r_rdi);
 			case 1:
 				return (rp->r_rsi);
 			case 2:
 				return (rp->r_rdx);
 			case 3:
 				return (rp->r_rcx);
 			case 4:
 				return (rp->r_r8);
 			case 5:
 				return (rp->r_r9);
 			}
 
 		stack = (uintptr_t *)rp->r_rsp;
 		DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
 		value = dtrace_fulword(&stack[argno - 6 + shift]);
 		DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT | CPU_DTRACE_BADADDR);
 	} else {
 #endif
 		uint32_t *stack = (uint32_t *)rp->r_rsp;
 		DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
 		value = dtrace_fuword32(&stack[argno + shift]);
 		DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT | CPU_DTRACE_BADADDR);
 #ifdef __amd64
 	}
 #endif
 
 	return (value);
 }
 
 /*ARGSUSED*/
 int
 fasttrap_tracepoint_init(proc_t *p, fasttrap_tracepoint_t *tp, uintptr_t pc,
     fasttrap_probe_type_t type)
 {
 	uint8_t instr[FASTTRAP_MAX_INSTR_SIZE + 10];
 	size_t len = FASTTRAP_MAX_INSTR_SIZE;
 	size_t first = MIN(len, PAGESIZE - (pc & PAGEOFFSET));
 	uint_t start = 0;
 	int rmindex, size;
 	uint8_t seg, rex = 0;
 
 	/*
 	 * Read the instruction at the given address out of the process's
 	 * address space. We don't have to worry about a debugger
 	 * changing this instruction before we overwrite it with our trap
 	 * instruction since P_PR_LOCK is set. Since instructions can span
 	 * pages, we potentially read the instruction in two parts. If the
 	 * second part fails, we just zero out that part of the instruction.
 	 */
 	if (uread(p, &instr[0], first, pc) != 0)
 		return (-1);
 	if (len > first &&
 	    uread(p, &instr[first], len - first, pc + first) != 0) {
 		bzero(&instr[first], len - first);
 		len = first;
 	}
 
 	/*
 	 * If the disassembly fails, then we have a malformed instruction.
 	 */
 	if ((size = dtrace_instr_size_isa(instr, p->p_model, &rmindex)) <= 0)
 		return (-1);
 
 	/*
 	 * Make sure the disassembler isn't completely broken.
 	 */
 	ASSERT(-1 <= rmindex && rmindex < size);
 
 	/*
 	 * If the computed size is greater than the number of bytes read,
 	 * then it was a malformed instruction possibly because it fell on a
 	 * page boundary and the subsequent page was missing or because of
 	 * some malicious user.
 	 */
 	if (size > len)
 		return (-1);
 
 	tp->ftt_size = (uint8_t)size;
 	tp->ftt_segment = FASTTRAP_SEG_NONE;
 
 	/*
 	 * Find the start of the instruction's opcode by processing any
 	 * legacy prefixes.
 	 */
 	for (;;) {
 		seg = 0;
 		switch (instr[start]) {
 		case FASTTRAP_PREFIX_SS:
 			seg++;
 			/*FALLTHRU*/
 		case FASTTRAP_PREFIX_GS:
 			seg++;
 			/*FALLTHRU*/
 		case FASTTRAP_PREFIX_FS:
 			seg++;
 			/*FALLTHRU*/
 		case FASTTRAP_PREFIX_ES:
 			seg++;
 			/*FALLTHRU*/
 		case FASTTRAP_PREFIX_DS:
 			seg++;
 			/*FALLTHRU*/
 		case FASTTRAP_PREFIX_CS:
 			seg++;
 			/*FALLTHRU*/
 		case FASTTRAP_PREFIX_OPERAND:
 		case FASTTRAP_PREFIX_ADDRESS:
 		case FASTTRAP_PREFIX_LOCK:
 		case FASTTRAP_PREFIX_REP:
 		case FASTTRAP_PREFIX_REPNE:
 			if (seg != 0) {
 				/*
 				 * It's illegal for an instruction to specify
 				 * two segment prefixes -- give up on this
 				 * illegal instruction.
 				 */
 				if (tp->ftt_segment != FASTTRAP_SEG_NONE)
 					return (-1);
 
 				tp->ftt_segment = seg;
 			}
 			start++;
 			continue;
 		}
 		break;
 	}
 
 #ifdef __amd64
 	/*
 	 * Identify the REX prefix on 64-bit processes.
 	 */
 	if (p->p_model == DATAMODEL_LP64 && (instr[start] & 0xf0) == 0x40)
 		rex = instr[start++];
 #endif
 
 	/*
 	 * Now that we're pretty sure that the instruction is okay, copy the
 	 * valid part to the tracepoint.
 	 */
 	bcopy(instr, tp->ftt_instr, FASTTRAP_MAX_INSTR_SIZE);
 
 	tp->ftt_type = FASTTRAP_T_COMMON;
 	if (instr[start] == FASTTRAP_2_BYTE_OP) {
 		switch (instr[start + 1]) {
 		case FASTTRAP_0F_JO:
 		case FASTTRAP_0F_JNO:
 		case FASTTRAP_0F_JB:
 		case FASTTRAP_0F_JAE:
 		case FASTTRAP_0F_JE:
 		case FASTTRAP_0F_JNE:
 		case FASTTRAP_0F_JBE:
 		case FASTTRAP_0F_JA:
 		case FASTTRAP_0F_JS:
 		case FASTTRAP_0F_JNS:
 		case FASTTRAP_0F_JP:
 		case FASTTRAP_0F_JNP:
 		case FASTTRAP_0F_JL:
 		case FASTTRAP_0F_JGE:
 		case FASTTRAP_0F_JLE:
 		case FASTTRAP_0F_JG:
 			tp->ftt_type = FASTTRAP_T_JCC;
 			tp->ftt_code = (instr[start + 1] & 0x0f) | FASTTRAP_JO;
 			tp->ftt_dest = pc + tp->ftt_size +
 			    /* LINTED - alignment */
 			    *(int32_t *)&instr[start + 2];
 			break;
 		}
 	} else if (instr[start] == FASTTRAP_GROUP5_OP) {
 		uint_t mod = FASTTRAP_MODRM_MOD(instr[start + 1]);
 		uint_t reg = FASTTRAP_MODRM_REG(instr[start + 1]);
 		uint_t rm = FASTTRAP_MODRM_RM(instr[start + 1]);
 
 		if (reg == 2 || reg == 4) {
 			uint_t i, sz;
 
 			if (reg == 2)
 				tp->ftt_type = FASTTRAP_T_CALL;
 			else
 				tp->ftt_type = FASTTRAP_T_JMP;
 
 			if (mod == 3)
 				tp->ftt_code = 2;
 			else
 				tp->ftt_code = 1;
 
 			ASSERT(p->p_model == DATAMODEL_LP64 || rex == 0);
 
 			/*
 			 * See AMD x86-64 Architecture Programmer's Manual
 			 * Volume 3, Section 1.2.7, Table 1-12, and
 			 * Appendix A.3.1, Table A-15.
 			 */
 			if (mod != 3 && rm == 4) {
 				uint8_t sib = instr[start + 2];
 				uint_t index = FASTTRAP_SIB_INDEX(sib);
 				uint_t base = FASTTRAP_SIB_BASE(sib);
 
 				tp->ftt_scale = FASTTRAP_SIB_SCALE(sib);
 
 				tp->ftt_index = (index == 4) ?
 				    FASTTRAP_NOREG :
 				    regmap[index | (FASTTRAP_REX_X(rex) << 3)];
 				tp->ftt_base = (mod == 0 && base == 5) ?
 				    FASTTRAP_NOREG :
 				    regmap[base | (FASTTRAP_REX_B(rex) << 3)];
 
 				i = 3;
 				sz = mod == 1 ? 1 : 4;
 			} else {
 				/*
 				 * In 64-bit mode, mod == 0 and r/m == 5
 				 * denotes %rip-relative addressing; in 32-bit
 				 * mode, the base register isn't used. In both
 				 * modes, there is a 32-bit operand.
 				 */
 				if (mod == 0 && rm == 5) {
 #ifdef __amd64
 					if (p->p_model == DATAMODEL_LP64)
 						tp->ftt_base = REG_RIP;
 					else
 #endif
 						tp->ftt_base = FASTTRAP_NOREG;
 					sz = 4;
 				} else  {
 					uint8_t base = rm |
 					    (FASTTRAP_REX_B(rex) << 3);
 
 					tp->ftt_base = regmap[base];
 					sz = mod == 1 ? 1 : mod == 2 ? 4 : 0;
 				}
 				tp->ftt_index = FASTTRAP_NOREG;
 				i = 2;
 			}
 
 			if (sz == 1) {
 				tp->ftt_dest = *(int8_t *)&instr[start + i];
 			} else if (sz == 4) {
 				/* LINTED - alignment */
 				tp->ftt_dest = *(int32_t *)&instr[start + i];
 			} else {
 				tp->ftt_dest = 0;
 			}
 		}
 	} else {
 		switch (instr[start]) {
 		case FASTTRAP_RET:
 			tp->ftt_type = FASTTRAP_T_RET;
 			break;
 
 		case FASTTRAP_RET16:
 			tp->ftt_type = FASTTRAP_T_RET16;
 			/* LINTED - alignment */
 			tp->ftt_dest = *(uint16_t *)&instr[start + 1];
 			break;
 
 		case FASTTRAP_JO:
 		case FASTTRAP_JNO:
 		case FASTTRAP_JB:
 		case FASTTRAP_JAE:
 		case FASTTRAP_JE:
 		case FASTTRAP_JNE:
 		case FASTTRAP_JBE:
 		case FASTTRAP_JA:
 		case FASTTRAP_JS:
 		case FASTTRAP_JNS:
 		case FASTTRAP_JP:
 		case FASTTRAP_JNP:
 		case FASTTRAP_JL:
 		case FASTTRAP_JGE:
 		case FASTTRAP_JLE:
 		case FASTTRAP_JG:
 			tp->ftt_type = FASTTRAP_T_JCC;
 			tp->ftt_code = instr[start];
 			tp->ftt_dest = pc + tp->ftt_size +
 			    (int8_t)instr[start + 1];
 			break;
 
 		case FASTTRAP_LOOPNZ:
 		case FASTTRAP_LOOPZ:
 		case FASTTRAP_LOOP:
 			tp->ftt_type = FASTTRAP_T_LOOP;
 			tp->ftt_code = instr[start];
 			tp->ftt_dest = pc + tp->ftt_size +
 			    (int8_t)instr[start + 1];
 			break;
 
 		case FASTTRAP_JCXZ:
 			tp->ftt_type = FASTTRAP_T_JCXZ;
 			tp->ftt_dest = pc + tp->ftt_size +
 			    (int8_t)instr[start + 1];
 			break;
 
 		case FASTTRAP_CALL:
 			tp->ftt_type = FASTTRAP_T_CALL;
 			tp->ftt_dest = pc + tp->ftt_size +
 			    /* LINTED - alignment */
 			    *(int32_t *)&instr[start + 1];
 			tp->ftt_code = 0;
 			break;
 
 		case FASTTRAP_JMP32:
 			tp->ftt_type = FASTTRAP_T_JMP;
 			tp->ftt_dest = pc + tp->ftt_size +
 			    /* LINTED - alignment */
 			    *(int32_t *)&instr[start + 1];
 			break;
 		case FASTTRAP_JMP8:
 			tp->ftt_type = FASTTRAP_T_JMP;
 			tp->ftt_dest = pc + tp->ftt_size +
 			    (int8_t)instr[start + 1];
 			break;
 
 		case FASTTRAP_PUSHL_EBP:
 			if (start == 0)
 				tp->ftt_type = FASTTRAP_T_PUSHL_EBP;
 			break;
 
 		case FASTTRAP_NOP:
 #ifdef __amd64
 			ASSERT(p->p_model == DATAMODEL_LP64 || rex == 0);
 
 			/*
 			 * On amd64 we have to be careful not to confuse a nop
 			 * (actually xchgl %eax, %eax) with an instruction using
 			 * the same opcode, but that does something different
 			 * (e.g. xchgl %r8d, %eax or xcghq %r8, %rax).
 			 */
 			if (FASTTRAP_REX_B(rex) == 0)
 #endif
 				tp->ftt_type = FASTTRAP_T_NOP;
 			break;
 
 		case FASTTRAP_INT3:
 			/*
 			 * The pid provider shares the int3 trap with debugger
 			 * breakpoints so we can't instrument them.
 			 */
 			ASSERT(instr[start] == FASTTRAP_INSTR);
 			return (-1);
 
 		case FASTTRAP_INT:
 			/*
 			 * Interrupts seem like they could be traced with
 			 * no negative implications, but it's possible that
 			 * a thread could be redirected by the trap handling
 			 * code which would eventually return to the
 			 * instruction after the interrupt. If the interrupt
 			 * were in our scratch space, the subsequent
 			 * instruction might be overwritten before we return.
 			 * Accordingly we refuse to instrument any interrupt.
 			 */
 			return (-1);
 		}
 	}
 
 #ifdef __amd64
 	if (p->p_model == DATAMODEL_LP64 && tp->ftt_type == FASTTRAP_T_COMMON) {
 		/*
 		 * If the process is 64-bit and the instruction type is still
 		 * FASTTRAP_T_COMMON -- meaning we're going to copy it out an
 		 * execute it -- we need to watch for %rip-relative
 		 * addressing mode. See the portion of fasttrap_pid_probe()
 		 * below where we handle tracepoints with type
 		 * FASTTRAP_T_COMMON for how we emulate instructions that
 		 * employ %rip-relative addressing.
 		 */
 		if (rmindex != -1) {
 			uint_t mod = FASTTRAP_MODRM_MOD(instr[rmindex]);
 			uint_t reg = FASTTRAP_MODRM_REG(instr[rmindex]);
 			uint_t rm = FASTTRAP_MODRM_RM(instr[rmindex]);
 
 			ASSERT(rmindex > start);
 
 			if (mod == 0 && rm == 5) {
 				/*
 				 * We need to be sure to avoid other
 				 * registers used by this instruction. While
 				 * the reg field may determine the op code
 				 * rather than denoting a register, assuming
 				 * that it denotes a register is always safe.
 				 * We leave the REX field intact and use
 				 * whatever value's there for simplicity.
 				 */
 				if (reg != 0) {
 					tp->ftt_ripmode = FASTTRAP_RIP_1 |
 					    (FASTTRAP_RIP_X *
 					    FASTTRAP_REX_B(rex));
 					rm = 0;
 				} else {
 					tp->ftt_ripmode = FASTTRAP_RIP_2 |
 					    (FASTTRAP_RIP_X *
 					    FASTTRAP_REX_B(rex));
 					rm = 1;
 				}
 
 				tp->ftt_modrm = tp->ftt_instr[rmindex];
 				tp->ftt_instr[rmindex] =
 				    FASTTRAP_MODRM(2, reg, rm);
 			}
 		}
 	}
 #endif
 
 	return (0);
 }
 
 int
 fasttrap_tracepoint_install(proc_t *p, fasttrap_tracepoint_t *tp)
 {
 	fasttrap_instr_t instr = FASTTRAP_INSTR;
 
 	if (uwrite(p, &instr, 1, tp->ftt_pc) != 0)
 		return (-1);
 
 	return (0);
 }
 
 int
 fasttrap_tracepoint_remove(proc_t *p, fasttrap_tracepoint_t *tp)
 {
 	uint8_t instr;
 
 	/*
 	 * Distinguish between read or write failures and a changed
 	 * instruction.
 	 */
 	if (uread(p, &instr, 1, tp->ftt_pc) != 0)
 		return (0);
 	if (instr != FASTTRAP_INSTR)
 		return (0);
 	if (uwrite(p, &tp->ftt_instr[0], 1, tp->ftt_pc) != 0)
 		return (-1);
 
 	return (0);
 }
 
 #ifdef __amd64
 static uintptr_t
 fasttrap_fulword_noerr(const void *uaddr)
 {
 	uintptr_t ret;
 
 	if ((ret = fasttrap_fulword(uaddr)) != -1)
 		return (ret);
 
 	return (0);
 }
 #endif
 
 static uint32_t
 fasttrap_fuword32_noerr(const void *uaddr)
 {
 	uint32_t ret;
 
 	if ((ret = fasttrap_fuword32(uaddr)) != -1)
 		return (ret);
 
 	return (0);
 }
 
 static void
 fasttrap_return_common(struct reg *rp, uintptr_t pc, pid_t pid,
     uintptr_t new_pc)
 {
 	fasttrap_tracepoint_t *tp;
 	fasttrap_bucket_t *bucket;
 	fasttrap_id_t *id;
 	struct rm_priotracker tracker;
 
 	rm_rlock(&fasttrap_tp_lock, &tracker);
 	bucket = &fasttrap_tpoints.fth_table[FASTTRAP_TPOINTS_INDEX(pid, pc)];
 
 	for (tp = bucket->ftb_data; tp != NULL; tp = tp->ftt_next) {
 		if (pid == tp->ftt_pid && pc == tp->ftt_pc &&
 		    tp->ftt_proc->ftpc_acount != 0)
 			break;
 	}
 
 	/*
 	 * Don't sweat it if we can't find the tracepoint again; unlike
 	 * when we're in fasttrap_pid_probe(), finding the tracepoint here
 	 * is not essential to the correct execution of the process.
 	 */
 	if (tp == NULL) {
 		rm_runlock(&fasttrap_tp_lock, &tracker);
 		return;
 	}
 
 	for (id = tp->ftt_retids; id != NULL; id = id->fti_next) {
 		/*
 		 * If there's a branch that could act as a return site, we
 		 * need to trace it, and check here if the program counter is
 		 * external to the function.
 		 */
 		if (tp->ftt_type != FASTTRAP_T_RET &&
 		    tp->ftt_type != FASTTRAP_T_RET16 &&
 		    new_pc - id->fti_probe->ftp_faddr <
 		    id->fti_probe->ftp_fsize)
 			continue;
 
 		dtrace_probe(id->fti_probe->ftp_id,
 		    pc - id->fti_probe->ftp_faddr,
 		    rp->r_rax, rp->r_rbx, 0, 0);
 	}
 
 	rm_runlock(&fasttrap_tp_lock, &tracker);
 }
 
 static void
 fasttrap_sigsegv(proc_t *p, kthread_t *t, uintptr_t addr)
 {
 	ksiginfo_t ksi;
 
 	ksiginfo_init(&ksi);
 	ksi.ksi_signo = SIGSEGV;
 	ksi.ksi_code = SEGV_MAPERR;
 	ksi.ksi_addr = (caddr_t)addr;
 	PROC_LOCK(p);
 	(void)tdksignal(t, SIGSEGV, &ksi);
 	PROC_UNLOCK(p);
 }
 
 #ifdef __amd64
 static void
 fasttrap_usdt_args64(fasttrap_probe_t *probe, struct reg *rp, int argc,
     uintptr_t *argv)
 {
 	int i, x, cap = MIN(argc, probe->ftp_nargs);
 	uintptr_t *stack = (uintptr_t *)rp->r_rsp;
 
 	for (i = 0; i < cap; i++) {
 		x = probe->ftp_argmap[i];
 
 		if (x < 6)
 			argv[i] = (&rp->r_rdi)[x];
 		else
 			argv[i] = fasttrap_fulword_noerr(&stack[x]);
 	}
 
 	for (; i < argc; i++) {
 		argv[i] = 0;
 	}
 }
 #endif
 
 static void
 fasttrap_usdt_args32(fasttrap_probe_t *probe, struct reg *rp, int argc,
     uint32_t *argv)
 {
 	int i, x, cap = MIN(argc, probe->ftp_nargs);
 	uint32_t *stack = (uint32_t *)rp->r_rsp;
 
 	for (i = 0; i < cap; i++) {
 		x = probe->ftp_argmap[i];
 
 		argv[i] = fasttrap_fuword32_noerr(&stack[x]);
 	}
 
 	for (; i < argc; i++) {
 		argv[i] = 0;
 	}
 }
 
 static int
 fasttrap_do_seg(fasttrap_tracepoint_t *tp, struct reg *rp, uintptr_t *addr)
 {
 	proc_t *p = curproc;
 #ifdef __i386__
 	struct segment_descriptor *desc;
 #else
 	struct user_segment_descriptor *desc;
 #endif
 	uint16_t sel = 0, ndx, type;
 	uintptr_t limit;
 
 	switch (tp->ftt_segment) {
 	case FASTTRAP_SEG_CS:
 		sel = rp->r_cs;
 		break;
 	case FASTTRAP_SEG_DS:
 		sel = rp->r_ds;
 		break;
 	case FASTTRAP_SEG_ES:
 		sel = rp->r_es;
 		break;
 	case FASTTRAP_SEG_FS:
 		sel = rp->r_fs;
 		break;
 	case FASTTRAP_SEG_GS:
 		sel = rp->r_gs;
 		break;
 	case FASTTRAP_SEG_SS:
 		sel = rp->r_ss;
 		break;
 	}
 
 	/*
 	 * Make sure the given segment register specifies a user priority
 	 * selector rather than a kernel selector.
 	 */
 	if (ISPL(sel) != SEL_UPL)
 		return (-1);
 
 	ndx = IDXSEL(sel);
 
 	/*
 	 * Check the bounds and grab the descriptor out of the specified
 	 * descriptor table.
 	 */
 	if (ISLDT(sel)) {
 #ifdef __i386__
 		if (ndx > p->p_md.md_ldt->ldt_len)
 			return (-1);
 
 		desc = (struct segment_descriptor *)
 		    p->p_md.md_ldt[ndx].ldt_base;
 #else
 		if (ndx > max_ldt_segment)
 			return (-1);
 
 		desc = (struct user_segment_descriptor *)
 		    p->p_md.md_ldt[ndx].ldt_base;
 #endif
 
 	} else {
 		if (ndx >= NGDT)
 			return (-1);
 
 #ifdef __i386__
 		desc = &gdt[ndx].sd;
 #else
 		desc = PCPU_PTR(gdt)[ndx];
 #endif
 	}
 
 	/*
 	 * The descriptor must have user privilege level and it must be
 	 * present in memory.
 	 */
 	if (desc->sd_dpl != SEL_UPL || desc->sd_p != 1)
 		return (-1);
 
 	type = desc->sd_type;
 
 	/*
 	 * If the S bit in the type field is not set, this descriptor can
 	 * only be used in system context.
 	 */
 	if ((type & 0x10) != 0x10)
 		return (-1);
 
 	limit = USD_GETLIMIT(desc) * (desc->sd_gran ? PAGESIZE : 1);
 
 	if (tp->ftt_segment == FASTTRAP_SEG_CS) {
 		/*
 		 * The code/data bit and readable bit must both be set.
 		 */
 		if ((type & 0xa) != 0xa)
 			return (-1);
 
 		if (*addr > limit)
 			return (-1);
 	} else {
 		/*
 		 * The code/data bit must be clear.
 		 */
 		if ((type & 0x8) != 0)
 			return (-1);
 
 		/*
 		 * If the expand-down bit is clear, we just check the limit as
 		 * it would naturally be applied. Otherwise, we need to check
 		 * that the address is the range [limit + 1 .. 0xffff] or
 		 * [limit + 1 ... 0xffffffff] depending on if the default
 		 * operand size bit is set.
 		 */
 		if ((type & 0x4) == 0) {
 			if (*addr > limit)
 				return (-1);
 		} else if (desc->sd_def32) {
 			if (*addr < limit + 1 || 0xffff < *addr)
 				return (-1);
 		} else {
 			if (*addr < limit + 1 || 0xffffffff < *addr)
 				return (-1);
 		}
 	}
 
 	*addr += USD_GETBASE(desc);
 
 	return (0);
 }
 
 int
 fasttrap_pid_probe(struct trapframe *tf)
 {
 	struct reg reg, *rp;
 	proc_t *p = curproc, *pp;
 	struct rm_priotracker tracker;
 	uint64_t gen;
 	uintptr_t pc;
 	uintptr_t new_pc = 0;
 	fasttrap_bucket_t *bucket;
 	fasttrap_tracepoint_t *tp, tp_local;
 	pid_t pid;
 	dtrace_icookie_t cookie;
 	uint_t is_enabled = 0;
 
 	fill_frame_regs(tf, &reg);
 	rp = &reg;
 
 	pc = rp->r_rip - 1;
 
 	/*
 	 * It's possible that a user (in a veritable orgy of bad planning)
 	 * could redirect this thread's flow of control before it reached the
 	 * return probe fasttrap. In this case we need to kill the process
 	 * since it's in a unrecoverable state.
 	 */
 	if (curthread->t_dtrace_step) {
 		ASSERT(curthread->t_dtrace_on);
 		fasttrap_sigtrap(p, curthread, pc);
 		return (0);
 	}
 
 	/*
 	 * Clear all user tracing flags.
 	 */
 	curthread->t_dtrace_ft = 0;
 	curthread->t_dtrace_pc = 0;
 	curthread->t_dtrace_npc = 0;
 	curthread->t_dtrace_scrpc = 0;
 	curthread->t_dtrace_astpc = 0;
 #ifdef __amd64
 	curthread->t_dtrace_regv = 0;
 #endif
 
 	/*
 	 * Treat a child created by a call to vfork(2) as if it were its
 	 * parent. We know that there's only one thread of control in such a
 	 * process: this one.
 	 */
 	pp = p;
 	sx_slock(&proctree_lock);
 	while (pp->p_vmspace == pp->p_pptr->p_vmspace)
 		pp = pp->p_pptr;
 	pid = pp->p_pid;
 	if (pp != p) {
 		PROC_LOCK(pp);
 		if ((pp->p_flag & P_WEXIT) != 0) {
 			/*
 			 * This can happen if the child was created with
 			 * rfork(2).  Userspace tracing cannot work reliably in
 			 * such a scenario, but we can at least try.
 			 */
 			PROC_UNLOCK(pp);
 			sx_sunlock(&proctree_lock);
 			return (-1);
 		}
 		_PHOLD_LITE(pp);
 		PROC_UNLOCK(pp);
 	}
 	sx_sunlock(&proctree_lock);
 
 	rm_rlock(&fasttrap_tp_lock, &tracker);
 
 	bucket = &fasttrap_tpoints.fth_table[FASTTRAP_TPOINTS_INDEX(pid, pc)];
 
 	/*
 	 * Lookup the tracepoint that the process just hit.
 	 */
 	for (tp = bucket->ftb_data; tp != NULL; tp = tp->ftt_next) {
 		if (pid == tp->ftt_pid && pc == tp->ftt_pc &&
 		    tp->ftt_proc->ftpc_acount != 0)
 			break;
 	}
 
 	/*
 	 * If we couldn't find a matching tracepoint, either a tracepoint has
 	 * been inserted without using the pid<pid> ioctl interface (see
 	 * fasttrap_ioctl), or somehow we have mislaid this tracepoint.
 	 */
 	if (tp == NULL) {
 		rm_runlock(&fasttrap_tp_lock, &tracker);
 		gen = atomic_load_acq_64(&pp->p_fasttrap_tp_gen);
 		if (pp != p)
 			PRELE(pp);
 		if (curthread->t_fasttrap_tp_gen != gen) {
 			/*
 			 * At least one tracepoint associated with this PID has
 			 * been removed from the table since #BP was raised.
 			 * Speculate that we hit a tracepoint that has since
 			 * been removed, and retry the instruction.
 			 */
 			curthread->t_fasttrap_tp_gen = gen;
 #ifdef __amd64
 			tf->tf_rip = pc;
 #else
 			tf->tf_eip = pc;
 #endif
 			return (0);
 		}
 		return (-1);
 	}
 	if (pp != p)
 		PRELE(pp);
 
 	/*
 	 * Set the program counter to the address of the traced instruction
 	 * so that it looks right in ustack() output.
 	 */
 	rp->r_rip = pc;
 
 	if (tp->ftt_ids != NULL) {
 		fasttrap_id_t *id;
 
 #ifdef __amd64
 		if (p->p_model == DATAMODEL_LP64) {
 			for (id = tp->ftt_ids; id != NULL; id = id->fti_next) {
 				fasttrap_probe_t *probe = id->fti_probe;
 
 				if (id->fti_ptype == DTFTP_ENTRY) {
 					/*
 					 * We note that this was an entry
 					 * probe to help ustack() find the
 					 * first caller.
 					 */
 					cookie = dtrace_interrupt_disable();
 					DTRACE_CPUFLAG_SET(CPU_DTRACE_ENTRY);
 					dtrace_probe(probe->ftp_id, rp->r_rdi,
 					    rp->r_rsi, rp->r_rdx, rp->r_rcx,
 					    rp->r_r8);
 					DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_ENTRY);
 					dtrace_interrupt_enable(cookie);
 				} else if (id->fti_ptype == DTFTP_IS_ENABLED) {
 					/*
 					 * Note that in this case, we don't
 					 * call dtrace_probe() since it's only
 					 * an artificial probe meant to change
 					 * the flow of control so that it
 					 * encounters the true probe.
 					 */
 					is_enabled = 1;
 				} else if (probe->ftp_argmap == NULL) {
 					dtrace_probe(probe->ftp_id, rp->r_rdi,
 					    rp->r_rsi, rp->r_rdx, rp->r_rcx,
 					    rp->r_r8);
 				} else {
 					uintptr_t t[5];
 
 					fasttrap_usdt_args64(probe, rp,
 					    sizeof (t) / sizeof (t[0]), t);
 
 					dtrace_probe(probe->ftp_id, t[0], t[1],
 					    t[2], t[3], t[4]);
 				}
 			}
 		} else {
 #endif
 			uintptr_t s0, s1, s2, s3, s4, s5;
 			uint32_t *stack = (uint32_t *)rp->r_rsp;
 
 			/*
 			 * In 32-bit mode, all arguments are passed on the
 			 * stack. If this is a function entry probe, we need
 			 * to skip the first entry on the stack as it
 			 * represents the return address rather than a
 			 * parameter to the function.
 			 */
 			s0 = fasttrap_fuword32_noerr(&stack[0]);
 			s1 = fasttrap_fuword32_noerr(&stack[1]);
 			s2 = fasttrap_fuword32_noerr(&stack[2]);
 			s3 = fasttrap_fuword32_noerr(&stack[3]);
 			s4 = fasttrap_fuword32_noerr(&stack[4]);
 			s5 = fasttrap_fuword32_noerr(&stack[5]);
 
 			for (id = tp->ftt_ids; id != NULL; id = id->fti_next) {
 				fasttrap_probe_t *probe = id->fti_probe;
 
 				if (id->fti_ptype == DTFTP_ENTRY) {
 					/*
 					 * We note that this was an entry
 					 * probe to help ustack() find the
 					 * first caller.
 					 */
 					cookie = dtrace_interrupt_disable();
 					DTRACE_CPUFLAG_SET(CPU_DTRACE_ENTRY);
 					dtrace_probe(probe->ftp_id, s1, s2,
 					    s3, s4, s5);
 					DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_ENTRY);
 					dtrace_interrupt_enable(cookie);
 				} else if (id->fti_ptype == DTFTP_IS_ENABLED) {
 					/*
 					 * Note that in this case, we don't
 					 * call dtrace_probe() since it's only
 					 * an artificial probe meant to change
 					 * the flow of control so that it
 					 * encounters the true probe.
 					 */
 					is_enabled = 1;
 				} else if (probe->ftp_argmap == NULL) {
 					dtrace_probe(probe->ftp_id, s0, s1,
 					    s2, s3, s4);
 				} else {
 					uint32_t t[5];
 
 					fasttrap_usdt_args32(probe, rp,
 					    sizeof (t) / sizeof (t[0]), t);
 
 					dtrace_probe(probe->ftp_id, t[0], t[1],
 					    t[2], t[3], t[4]);
 				}
 			}
 #ifdef __amd64
 		}
 #endif
 	}
 
 	/*
 	 * We're about to do a bunch of work so we cache a local copy of
 	 * the tracepoint to emulate the instruction, and then find the
 	 * tracepoint again later if we need to light up any return probes.
 	 */
 	tp_local = *tp;
 	rm_runlock(&fasttrap_tp_lock, &tracker);
 	tp = &tp_local;
 
 	/*
 	 * Set the program counter to appear as though the traced instruction
 	 * had completely executed. This ensures that fasttrap_getreg() will
 	 * report the expected value for REG_RIP.
 	 */
 	rp->r_rip = pc + tp->ftt_size;
 
 	/*
 	 * If there's an is-enabled probe connected to this tracepoint it
 	 * means that there was a 'xorl %eax, %eax' or 'xorq %rax, %rax'
 	 * instruction that was placed there by DTrace when the binary was
 	 * linked. As this probe is, in fact, enabled, we need to stuff 1
 	 * into %eax or %rax. Accordingly, we can bypass all the instruction
 	 * emulation logic since we know the inevitable result. It's possible
 	 * that a user could construct a scenario where the 'is-enabled'
 	 * probe was on some other instruction, but that would be a rather
 	 * exotic way to shoot oneself in the foot.
 	 */
 	if (is_enabled) {
 		rp->r_rax = 1;
 		new_pc = rp->r_rip;
 		goto done;
 	}
 
 	/*
 	 * We emulate certain types of instructions to ensure correctness
 	 * (in the case of position dependent instructions) or optimize
 	 * common cases. The rest we have the thread execute back in user-
 	 * land.
 	 */
 	switch (tp->ftt_type) {
 	case FASTTRAP_T_RET:
 	case FASTTRAP_T_RET16:
 	{
 		uintptr_t dst = 0;
 		uintptr_t addr = 0;
 		int ret = 0;
 
 		/*
 		 * We have to emulate _every_ facet of the behavior of a ret
 		 * instruction including what happens if the load from %esp
 		 * fails; in that case, we send a SIGSEGV.
 		 */
 #ifdef __amd64
 		if (p->p_model == DATAMODEL_NATIVE) {
 			ret = dst = fasttrap_fulword((void *)rp->r_rsp);
 			addr = rp->r_rsp + sizeof (uintptr_t);
 		} else {
 #endif
 			uint32_t dst32;
 			ret = dst32 = fasttrap_fuword32((void *)rp->r_rsp);
 			dst = dst32;
 			addr = rp->r_rsp + sizeof (uint32_t);
 #ifdef __amd64
 		}
 #endif
 
 		if (ret == -1) {
 			fasttrap_sigsegv(p, curthread, rp->r_rsp);
 			new_pc = pc;
 			break;
 		}
 
 		if (tp->ftt_type == FASTTRAP_T_RET16)
 			addr += tp->ftt_dest;
 
 		rp->r_rsp = addr;
 		new_pc = dst;
 		break;
 	}
 
 	case FASTTRAP_T_JCC:
 	{
 		uint_t taken = 0;
 
 		switch (tp->ftt_code) {
 		case FASTTRAP_JO:
 			taken = (rp->r_rflags & FASTTRAP_EFLAGS_OF) != 0;
 			break;
 		case FASTTRAP_JNO:
 			taken = (rp->r_rflags & FASTTRAP_EFLAGS_OF) == 0;
 			break;
 		case FASTTRAP_JB:
 			taken = (rp->r_rflags & FASTTRAP_EFLAGS_CF) != 0;
 			break;
 		case FASTTRAP_JAE:
 			taken = (rp->r_rflags & FASTTRAP_EFLAGS_CF) == 0;
 			break;
 		case FASTTRAP_JE:
 			taken = (rp->r_rflags & FASTTRAP_EFLAGS_ZF) != 0;
 			break;
 		case FASTTRAP_JNE:
 			taken = (rp->r_rflags & FASTTRAP_EFLAGS_ZF) == 0;
 			break;
 		case FASTTRAP_JBE:
 			taken = (rp->r_rflags & FASTTRAP_EFLAGS_CF) != 0 ||
 			    (rp->r_rflags & FASTTRAP_EFLAGS_ZF) != 0;
 			break;
 		case FASTTRAP_JA:
 			taken = (rp->r_rflags & FASTTRAP_EFLAGS_CF) == 0 &&
 			    (rp->r_rflags & FASTTRAP_EFLAGS_ZF) == 0;
 			break;
 		case FASTTRAP_JS:
 			taken = (rp->r_rflags & FASTTRAP_EFLAGS_SF) != 0;
 			break;
 		case FASTTRAP_JNS:
 			taken = (rp->r_rflags & FASTTRAP_EFLAGS_SF) == 0;
 			break;
 		case FASTTRAP_JP:
 			taken = (rp->r_rflags & FASTTRAP_EFLAGS_PF) != 0;
 			break;
 		case FASTTRAP_JNP:
 			taken = (rp->r_rflags & FASTTRAP_EFLAGS_PF) == 0;
 			break;
 		case FASTTRAP_JL:
 			taken = ((rp->r_rflags & FASTTRAP_EFLAGS_SF) == 0) !=
 			    ((rp->r_rflags & FASTTRAP_EFLAGS_OF) == 0);
 			break;
 		case FASTTRAP_JGE:
 			taken = ((rp->r_rflags & FASTTRAP_EFLAGS_SF) == 0) ==
 			    ((rp->r_rflags & FASTTRAP_EFLAGS_OF) == 0);
 			break;
 		case FASTTRAP_JLE:
 			taken = (rp->r_rflags & FASTTRAP_EFLAGS_ZF) != 0 ||
 			    ((rp->r_rflags & FASTTRAP_EFLAGS_SF) == 0) !=
 			    ((rp->r_rflags & FASTTRAP_EFLAGS_OF) == 0);
 			break;
 		case FASTTRAP_JG:
 			taken = (rp->r_rflags & FASTTRAP_EFLAGS_ZF) == 0 &&
 			    ((rp->r_rflags & FASTTRAP_EFLAGS_SF) == 0) ==
 			    ((rp->r_rflags & FASTTRAP_EFLAGS_OF) == 0);
 			break;
 
 		}
 
 		if (taken)
 			new_pc = tp->ftt_dest;
 		else
 			new_pc = pc + tp->ftt_size;
 		break;
 	}
 
 	case FASTTRAP_T_LOOP:
 	{
 		uint_t taken = 0;
 #ifdef __amd64
 		greg_t cx = rp->r_rcx--;
 #else
 		greg_t cx = rp->r_ecx--;
 #endif
 
 		switch (tp->ftt_code) {
 		case FASTTRAP_LOOPNZ:
 			taken = (rp->r_rflags & FASTTRAP_EFLAGS_ZF) == 0 &&
 			    cx != 0;
 			break;
 		case FASTTRAP_LOOPZ:
 			taken = (rp->r_rflags & FASTTRAP_EFLAGS_ZF) != 0 &&
 			    cx != 0;
 			break;
 		case FASTTRAP_LOOP:
 			taken = (cx != 0);
 			break;
 		}
 
 		if (taken)
 			new_pc = tp->ftt_dest;
 		else
 			new_pc = pc + tp->ftt_size;
 		break;
 	}
 
 	case FASTTRAP_T_JCXZ:
 	{
 #ifdef __amd64
 		greg_t cx = rp->r_rcx;
 #else
 		greg_t cx = rp->r_ecx;
 #endif
 
 		if (cx == 0)
 			new_pc = tp->ftt_dest;
 		else
 			new_pc = pc + tp->ftt_size;
 		break;
 	}
 
 	case FASTTRAP_T_PUSHL_EBP:
 	{
 		int ret = 0;
 
 #ifdef __amd64
 		if (p->p_model == DATAMODEL_NATIVE) {
 			rp->r_rsp -= sizeof (uintptr_t);
 			ret = fasttrap_sulword((void *)rp->r_rsp, rp->r_rbp);
 		} else {
 #endif
 			rp->r_rsp -= sizeof (uint32_t);
 			ret = fasttrap_suword32((void *)rp->r_rsp, rp->r_rbp);
 #ifdef __amd64
 		}
 #endif
 
 		if (ret == -1) {
 			fasttrap_sigsegv(p, curthread, rp->r_rsp);
 			new_pc = pc;
 			break;
 		}
 
 		new_pc = pc + tp->ftt_size;
 		break;
 	}
 
 	case FASTTRAP_T_NOP:
 		new_pc = pc + tp->ftt_size;
 		break;
 
 	case FASTTRAP_T_JMP:
 	case FASTTRAP_T_CALL:
 		if (tp->ftt_code == 0) {
 			new_pc = tp->ftt_dest;
 		} else {
 			uintptr_t value, addr = tp->ftt_dest;
 
 			if (tp->ftt_base != FASTTRAP_NOREG)
 				addr += fasttrap_getreg(rp, tp->ftt_base);
 			if (tp->ftt_index != FASTTRAP_NOREG)
 				addr += fasttrap_getreg(rp, tp->ftt_index) <<
 				    tp->ftt_scale;
 
 			if (tp->ftt_code == 1) {
 				/*
 				 * If there's a segment prefix for this
 				 * instruction, we'll need to check permissions
 				 * and bounds on the given selector, and adjust
 				 * the address accordingly.
 				 */
 				if (tp->ftt_segment != FASTTRAP_SEG_NONE &&
 				    fasttrap_do_seg(tp, rp, &addr) != 0) {
 					fasttrap_sigsegv(p, curthread, addr);
 					new_pc = pc;
 					break;
 				}
 
 #ifdef __amd64
 				if (p->p_model == DATAMODEL_NATIVE) {
 #endif
 					if ((value = fasttrap_fulword((void *)addr))
 					     == -1) {
 						fasttrap_sigsegv(p, curthread,
 						    addr);
 						new_pc = pc;
 						break;
 					}
 					new_pc = value;
 #ifdef __amd64
 				} else {
 					uint32_t value32;
 					addr = (uintptr_t)(uint32_t)addr;
 					if ((value32 = fasttrap_fuword32((void *)addr))
 					    == -1) {
 						fasttrap_sigsegv(p, curthread,
 						    addr);
 						new_pc = pc;
 						break;
 					}
 					new_pc = value32;
 				}
 #endif
 			} else {
 				new_pc = addr;
 			}
 		}
 
 		/*
 		 * If this is a call instruction, we need to push the return
 		 * address onto the stack. If this fails, we send the process
 		 * a SIGSEGV and reset the pc to emulate what would happen if
 		 * this instruction weren't traced.
 		 */
 		if (tp->ftt_type == FASTTRAP_T_CALL) {
 			int ret = 0;
 			uintptr_t addr = 0, pcps;
 #ifdef __amd64
 			if (p->p_model == DATAMODEL_NATIVE) {
 				addr = rp->r_rsp - sizeof (uintptr_t);
 				pcps = pc + tp->ftt_size;
 				ret = fasttrap_sulword((void *)addr, pcps);
 			} else {
 #endif
 				addr = rp->r_rsp - sizeof (uint32_t);
 				pcps = (uint32_t)(pc + tp->ftt_size);
 				ret = fasttrap_suword32((void *)addr, pcps);
 #ifdef __amd64
 			}
 #endif
 
 			if (ret == -1) {
 				fasttrap_sigsegv(p, curthread, addr);
 				new_pc = pc;
 				break;
 			}
 
 			rp->r_rsp = addr;
 		}
 
 		break;
 
 	case FASTTRAP_T_COMMON:
 	{
 		uintptr_t addr;
 #if defined(__amd64)
 		uint8_t scratch[2 * FASTTRAP_MAX_INSTR_SIZE + 22];
 #else
 		uint8_t scratch[2 * FASTTRAP_MAX_INSTR_SIZE + 7];
 #endif
 		uint_t i = 0;
 		fasttrap_scrspace_t *scrspace;
 		scrspace = fasttrap_scraddr(curthread, tp->ftt_proc);
 		if (scrspace == NULL) {
 			/*
 			 * We failed to allocate scratch space for this thread.
 			 * Try to write the original instruction back out and
 			 * reset the pc.
 			 */
 			if (fasttrap_copyout(tp->ftt_instr, (void *)pc,
 			    tp->ftt_size))
 				fasttrap_sigtrap(p, curthread, pc);
 			new_pc = pc;
 			break;
 		}
 		addr = scrspace->ftss_addr;
 
 		/*
 		 * Generic Instruction Tracing
 		 * ---------------------------
 		 *
 		 * This is the layout of the scratch space in the user-land
 		 * thread structure for our generated instructions.
 		 *
 		 *	32-bit mode			bytes
 		 *	------------------------	-----
 		 * a:	<original instruction>		<= 15
 		 *	jmp	<pc + tp->ftt_size>	    5
 		 * b:	<original instruction>		<= 15
 		 *	int	T_DTRACE_RET		    2
 		 *					-----
 		 *					<= 37
 		 *
 		 *	64-bit mode			bytes
 		 *	------------------------	-----
 		 * a:	<original instruction>		<= 15
 		 *	jmp	0(%rip)			    6
 		 *	<pc + tp->ftt_size>		    8
 		 * b:	<original instruction>		<= 15
 		 * 	int	T_DTRACE_RET		    2
 		 * 					-----
 		 * 					<= 46
 		 *
 		 * The %pc is set to a, and curthread->t_dtrace_astpc is set
 		 * to b. If we encounter a signal on the way out of the
 		 * kernel, trap() will set %pc to curthread->t_dtrace_astpc
 		 * so that we execute the original instruction and re-enter
 		 * the kernel rather than redirecting to the next instruction.
 		 *
 		 * If there are return probes (so we know that we're going to
 		 * need to reenter the kernel after executing the original
 		 * instruction), the scratch space will just contain the
 		 * original instruction followed by an interrupt -- the same
 		 * data as at b.
 		 *
 		 * %rip-relative Addressing
 		 * ------------------------
 		 *
 		 * There's a further complication in 64-bit mode due to %rip-
 		 * relative addressing. While this is clearly a beneficial
 		 * architectural decision for position independent code, it's
 		 * hard not to see it as a personal attack against the pid
 		 * provider since before there was a relatively small set of
 		 * instructions to emulate; with %rip-relative addressing,
 		 * almost every instruction can potentially depend on the
 		 * address at which it's executed. Rather than emulating
 		 * the broad spectrum of instructions that can now be
 		 * position dependent, we emulate jumps and others as in
 		 * 32-bit mode, and take a different tack for instructions
 		 * using %rip-relative addressing.
 		 *
 		 * For every instruction that uses the ModRM byte, the
 		 * in-kernel disassembler reports its location. We use the
 		 * ModRM byte to identify that an instruction uses
 		 * %rip-relative addressing and to see what other registers
 		 * the instruction uses. To emulate those instructions,
 		 * we modify the instruction to be %rax-relative rather than
 		 * %rip-relative (or %rcx-relative if the instruction uses
 		 * %rax; or %r8- or %r9-relative if the REX.B is present so
 		 * we don't have to rewrite the REX prefix). We then load
 		 * the value that %rip would have been into the scratch
 		 * register and generate an instruction to reset the scratch
 		 * register back to its original value. The instruction
 		 * sequence looks like this:
 		 *
 		 *	64-mode %rip-relative		bytes
 		 *	------------------------	-----
 		 * a:	<modified instruction>		<= 15
 		 *	movq	$<value>, %<scratch>	    6
 		 *	jmp	0(%rip)			    6
 		 *	<pc + tp->ftt_size>		    8
 		 * b:	<modified instruction>  	<= 15
 		 * 	int	T_DTRACE_RET		    2
 		 * 					-----
 		 *					   52
 		 *
 		 * We set curthread->t_dtrace_regv so that upon receiving
 		 * a signal we can reset the value of the scratch register.
 		 */
 
 		ASSERT(tp->ftt_size <= FASTTRAP_MAX_INSTR_SIZE);
 
 		curthread->t_dtrace_scrpc = addr;
 		bcopy(tp->ftt_instr, &scratch[i], tp->ftt_size);
 		i += tp->ftt_size;
 
 #ifdef __amd64
 		if (tp->ftt_ripmode != 0) {
 			greg_t *reg = NULL;
 
 			ASSERT(p->p_model == DATAMODEL_LP64);
 			ASSERT(tp->ftt_ripmode &
 			    (FASTTRAP_RIP_1 | FASTTRAP_RIP_2));
 
 			/*
 			 * If this was a %rip-relative instruction, we change
 			 * it to be either a %rax- or %rcx-relative
 			 * instruction (depending on whether those registers
 			 * are used as another operand; or %r8- or %r9-
 			 * relative depending on the value of REX.B). We then
 			 * set that register and generate a movq instruction
 			 * to reset the value.
 			 */
 			if (tp->ftt_ripmode & FASTTRAP_RIP_X)
 				scratch[i++] = FASTTRAP_REX(1, 0, 0, 1);
 			else
 				scratch[i++] = FASTTRAP_REX(1, 0, 0, 0);
 
 			if (tp->ftt_ripmode & FASTTRAP_RIP_1)
 				scratch[i++] = FASTTRAP_MOV_EAX;
 			else
 				scratch[i++] = FASTTRAP_MOV_ECX;
 
 			switch (tp->ftt_ripmode) {
 			case FASTTRAP_RIP_1:
 				reg = &rp->r_rax;
 				curthread->t_dtrace_reg = REG_RAX;
 				break;
 			case FASTTRAP_RIP_2:
 				reg = &rp->r_rcx;
 				curthread->t_dtrace_reg = REG_RCX;
 				break;
 			case FASTTRAP_RIP_1 | FASTTRAP_RIP_X:
 				reg = &rp->r_r8;
 				curthread->t_dtrace_reg = REG_R8;
 				break;
 			case FASTTRAP_RIP_2 | FASTTRAP_RIP_X:
 				reg = &rp->r_r9;
 				curthread->t_dtrace_reg = REG_R9;
 				break;
 			}
 
 			/* LINTED - alignment */
 			*(uint64_t *)&scratch[i] = *reg;
 			curthread->t_dtrace_regv = *reg;
 			*reg = pc + tp->ftt_size;
 			i += sizeof (uint64_t);
 		}
 #endif
 
 		/*
 		 * Generate the branch instruction to what would have
 		 * normally been the subsequent instruction. In 32-bit mode,
 		 * this is just a relative branch; in 64-bit mode this is a
 		 * %rip-relative branch that loads the 64-bit pc value
 		 * immediately after the jmp instruction.
 		 */
 #ifdef __amd64
 		if (p->p_model == DATAMODEL_LP64) {
 			scratch[i++] = FASTTRAP_GROUP5_OP;
 			scratch[i++] = FASTTRAP_MODRM(0, 4, 5);
 			/* LINTED - alignment */
 			*(uint32_t *)&scratch[i] = 0;
 			i += sizeof (uint32_t);
 			/* LINTED - alignment */
 			*(uint64_t *)&scratch[i] = pc + tp->ftt_size;
 			i += sizeof (uint64_t);
 		} else {
 #endif
 			/*
 			 * Set up the jmp to the next instruction; note that
 			 * the size of the traced instruction cancels out.
 			 */
 			scratch[i++] = FASTTRAP_JMP32;
 			/* LINTED - alignment */
 			*(uint32_t *)&scratch[i] = pc - addr - 5;
 			i += sizeof (uint32_t);
 #ifdef __amd64
 		}
 #endif
 
 		curthread->t_dtrace_astpc = addr + i;
 		bcopy(tp->ftt_instr, &scratch[i], tp->ftt_size);
 		i += tp->ftt_size;
 		scratch[i++] = FASTTRAP_INT;
 		scratch[i++] = T_DTRACE_RET;
 
 		ASSERT(i <= sizeof (scratch));
 
 		if (fasttrap_copyout(scratch, (char *)addr, i)) {
 			fasttrap_sigtrap(p, curthread, pc);
 			new_pc = pc;
 			break;
 		}
 		if (tp->ftt_retids != NULL) {
 			curthread->t_dtrace_step = 1;
 			curthread->t_dtrace_ret = 1;
 			new_pc = curthread->t_dtrace_astpc;
 		} else {
 			new_pc = curthread->t_dtrace_scrpc;
 		}
 
 		curthread->t_dtrace_pc = pc;
 		curthread->t_dtrace_npc = pc + tp->ftt_size;
 		curthread->t_dtrace_on = 1;
 		break;
 	}
 
 	default:
 		panic("fasttrap: mishandled an instruction");
 	}
 
 done:
 	/*
 	 * If there were no return probes when we first found the tracepoint,
 	 * we should feel no obligation to honor any return probes that were
 	 * subsequently enabled -- they'll just have to wait until the next
 	 * time around.
 	 */
 	if (tp->ftt_retids != NULL) {
 		/*
 		 * We need to wait until the results of the instruction are
 		 * apparent before invoking any return probes. If this
 		 * instruction was emulated we can just call
 		 * fasttrap_return_common(); if it needs to be executed, we
 		 * need to wait until the user thread returns to the kernel.
 		 */
 		if (tp->ftt_type != FASTTRAP_T_COMMON) {
 			/*
 			 * Set the program counter to the address of the traced
 			 * instruction so that it looks right in ustack()
 			 * output. We had previously set it to the end of the
 			 * instruction to simplify %rip-relative addressing.
 			 */
 			rp->r_rip = pc;
 
 			fasttrap_return_common(rp, pc, pid, new_pc);
 		} else {
 			ASSERT(curthread->t_dtrace_ret != 0);
 			ASSERT(curthread->t_dtrace_pc == pc);
 			ASSERT(curthread->t_dtrace_scrpc != 0);
 			ASSERT(new_pc == curthread->t_dtrace_astpc);
 		}
 	}
 
 	rp->r_rip = new_pc;
 
 	PROC_LOCK(p);
 	proc_write_regs(curthread, rp);
 	PROC_UNLOCK(p);
 
 	return (0);
 }
 
 int
 fasttrap_return_probe(struct trapframe *tf)
 {
 	struct reg reg, *rp;
 	proc_t *p = curproc;
 	uintptr_t pc = curthread->t_dtrace_pc;
 	uintptr_t npc = curthread->t_dtrace_npc;
 
 	fill_frame_regs(tf, &reg);
 	rp = &reg;
 
 	curthread->t_dtrace_pc = 0;
 	curthread->t_dtrace_npc = 0;
 	curthread->t_dtrace_scrpc = 0;
 	curthread->t_dtrace_astpc = 0;
 
 #ifdef illumos
 	/*
 	 * Treat a child created by a call to vfork(2) as if it were its
 	 * parent. We know that there's only one thread of control in such a
 	 * process: this one.
 	 */
 	while (p->p_flag & SVFORK) {
 		p = p->p_parent;
 	}
 #endif
 
 	/*
 	 * We set rp->r_rip to the address of the traced instruction so
 	 * that it appears to dtrace_probe() that we're on the original
 	 * instruction.
 	 */
 	rp->r_rip = pc;
 
 	fasttrap_return_common(rp, pc, p->p_pid, npc);
 
 	return (0);
 }
 
 /*ARGSUSED*/
 uint64_t
 fasttrap_pid_getarg(void *arg, dtrace_id_t id, void *parg, int argno,
     int aframes)
 {
 	struct reg r;
 
 	fill_regs(curthread, &r);
 
 	return (fasttrap_anarg(&r, 1, argno));
 }
 
 /*ARGSUSED*/
 uint64_t
 fasttrap_usdt_getarg(void *arg, dtrace_id_t id, void *parg, int argno,
     int aframes)
 {
 	struct reg r;
 
 	fill_regs(curthread, &r);
 
 	return (fasttrap_anarg(&r, 0, argno));
 }
 
 static ulong_t
 fasttrap_getreg(struct reg *rp, uint_t reg)
 {
 #ifdef __amd64
 	switch (reg) {
 	case REG_R15:		return (rp->r_r15);
 	case REG_R14:		return (rp->r_r14);
 	case REG_R13:		return (rp->r_r13);
 	case REG_R12:		return (rp->r_r12);
 	case REG_R11:		return (rp->r_r11);
 	case REG_R10:		return (rp->r_r10);
 	case REG_R9:		return (rp->r_r9);
 	case REG_R8:		return (rp->r_r8);
 	case REG_RDI:		return (rp->r_rdi);
 	case REG_RSI:		return (rp->r_rsi);
 	case REG_RBP:		return (rp->r_rbp);
 	case REG_RBX:		return (rp->r_rbx);
 	case REG_RDX:		return (rp->r_rdx);
 	case REG_RCX:		return (rp->r_rcx);
 	case REG_RAX:		return (rp->r_rax);
 	case REG_TRAPNO:	return (rp->r_trapno);
 	case REG_ERR:		return (rp->r_err);
 	case REG_RIP:		return (rp->r_rip);
 	case REG_CS:		return (rp->r_cs);
 	case REG_RFL:		return (rp->r_rflags);
 	case REG_RSP:		return (rp->r_rsp);
 	case REG_SS:		return (rp->r_ss);
 	case REG_FS:		return (rp->r_fs);
 	case REG_GS:		return (rp->r_gs);
 	case REG_DS:		return (rp->r_ds);
 	case REG_ES:		return (rp->r_es);
 	case REG_FSBASE:	return (rdmsr(MSR_FSBASE));
 	case REG_GSBASE:	return (rdmsr(MSR_GSBASE));
 	}
 
 	panic("dtrace: illegal register constant");
 	/*NOTREACHED*/
 #else
 #define _NGREG 19
 	if (reg >= _NGREG)
 		panic("dtrace: illegal register constant");
 
 	return (((greg_t *)&rp->r_gs)[reg]);
 #endif
 }
diff --git a/sys/cddl/dev/dtrace/aarch64/dtrace_isa.c b/sys/cddl/dev/dtrace/aarch64/dtrace_isa.c
index 033258e34062..b26b15a58070 100644
--- a/sys/cddl/dev/dtrace/aarch64/dtrace_isa.c
+++ b/sys/cddl/dev/dtrace/aarch64/dtrace_isa.c
@@ -1,395 +1,394 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License, Version 1.0 only
  * (the "License").  You may not use this file except in compliance
  * with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  *
  * $FreeBSD$
  */
 /*
  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 #include <sys/cdefs.h>
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/stack.h>
 #include <sys/pcpu.h>
 
 #include <machine/frame.h>
 #include <machine/md_var.h>
-#include <machine/reg.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/pmap.h>
 
 #include <machine/atomic.h>
 #include <machine/db_machdep.h>
 #include <machine/md_var.h>
 #include <machine/stack.h>
 #include <ddb/db_sym.h>
 #include <ddb/ddb.h>
 #include <sys/kdb.h>
 
 #include "regset.h"
 
 /*
  * Wee need some reasonable default to prevent backtrace code
  * from wandering too far
  */
 #define	MAX_FUNCTION_SIZE 0x10000
 #define	MAX_PROLOGUE_SIZE 0x100
 #define	MAX_USTACK_DEPTH  2048
 
 uint8_t dtrace_fuword8_nocheck(void *);
 uint16_t dtrace_fuword16_nocheck(void *);
 uint32_t dtrace_fuword32_nocheck(void *);
 uint64_t dtrace_fuword64_nocheck(void *);
 
 void
 dtrace_getpcstack(pc_t *pcstack, int pcstack_limit, int aframes,
     uint32_t *intrpc)
 {
 	struct unwind_state state;
 	int scp_offset;
 	int depth;
 
 	depth = 0;
 
 	if (intrpc != 0) {
 		pcstack[depth++] = (pc_t) intrpc;
 	}
 
 	aframes++;
 
 	state.fp = (uintptr_t)__builtin_frame_address(0);
 	state.pc = (uintptr_t)dtrace_getpcstack;
 
 	while (depth < pcstack_limit) {
 		if (!unwind_frame(curthread, &state))
 			break;
 		if (!INKERNEL(state.pc))
 			break;
 
 		/*
 		 * NB: Unlike some other architectures, we don't need to
 		 * explicitly insert cpu_dtrace_caller as it appears in the
 		 * normal kernel stack trace rather than a special trap frame.
 		 */
 		if (aframes > 0) {
 			aframes--;
 		} else {
 			pcstack[depth++] = state.pc;
 		}
 
 	}
 
 	for (; depth < pcstack_limit; depth++) {
 		pcstack[depth] = 0;
 	}
 }
 
 static int
 dtrace_getustack_common(uint64_t *pcstack, int pcstack_limit, uintptr_t pc,
     uintptr_t fp)
 {
 	volatile uint16_t *flags =
 	    (volatile uint16_t *)&cpu_core[curcpu].cpuc_dtrace_flags;
 	int ret = 0;
 	uintptr_t oldfp = fp;
 
 	ASSERT(pcstack == NULL || pcstack_limit > 0);
 
 	while (pc != 0) {
 		/*
 		 * We limit the number of times we can go around this
 		 * loop to account for a circular stack.
 		 */
 		if (ret++ >= MAX_USTACK_DEPTH) {
 			*flags |= CPU_DTRACE_BADSTACK;
 			cpu_core[curcpu].cpuc_dtrace_illval = fp;
 			break;
 		}
 
 		if (pcstack != NULL) {
 			*pcstack++ = (uint64_t)pc;
 			pcstack_limit--;
 			if (pcstack_limit <= 0)
 				break;
 		}
 
 		if (fp == 0)
 			break;
 
 		pc = dtrace_fuword64((void *)(fp +
 		    offsetof(struct arm64_frame, f_retaddr)));
 		fp = dtrace_fuword64((void *)fp);
 
 		if (fp == oldfp) {
 			*flags |= CPU_DTRACE_BADSTACK;
 			cpu_core[curcpu].cpuc_dtrace_illval = fp;
 			break;
 		}
 
 		/*
 		 * ARM64TODO:
 		 *     This workaround might not be necessary. It needs to be
 		 *     revised and removed from all architectures if found
 		 *     unwanted. Leaving the original x86 comment for reference.
 		 *
 		 * This is totally bogus:  if we faulted, we're going to clear
 		 * the fault and break.  This is to deal with the apparently
 		 * broken Java stacks on x86.
 		 */
 		if (*flags & CPU_DTRACE_FAULT) {
 			*flags &= ~CPU_DTRACE_FAULT;
 			break;
 		}
 
 		oldfp = fp;
 	}
 
 	return (ret);
 }
 
 void
 dtrace_getupcstack(uint64_t *pcstack, int pcstack_limit)
 {
 	proc_t *p = curproc;
 	struct trapframe *tf;
 	uintptr_t pc, fp;
 	volatile uint16_t *flags =
 	    (volatile uint16_t *)&cpu_core[curcpu].cpuc_dtrace_flags;
 	int n;
 
 	if (*flags & CPU_DTRACE_FAULT)
 		return;
 
 	if (pcstack_limit <= 0)
 		return;
 
 	/*
 	 * If there's no user context we still need to zero the stack.
 	 */
 	if (p == NULL || (tf = curthread->td_frame) == NULL)
 		goto zero;
 
 	*pcstack++ = (uint64_t)p->p_pid;
 	pcstack_limit--;
 
 	if (pcstack_limit <= 0)
 		return;
 
 	pc = tf->tf_elr;
 	fp = tf->tf_x[29];
 
 	if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_ENTRY)) {
 		/*
 		 * In an entry probe.  The frame pointer has not yet been
 		 * pushed (that happens in the function prologue).  The
 		 * best approach is to add the current pc as a missing top
 		 * of stack and back the pc up to the caller, which is stored
 		 * at the current stack pointer address since the call
 		 * instruction puts it there right before the branch.
 		 */
 
 		*pcstack++ = (uint64_t)pc;
 		pcstack_limit--;
 		if (pcstack_limit <= 0)
 			return;
 
 		pc = tf->tf_lr;
 	}
 
 	n = dtrace_getustack_common(pcstack, pcstack_limit, pc, fp);
 	ASSERT(n >= 0);
 	ASSERT(n <= pcstack_limit);
 
 	pcstack += n;
 	pcstack_limit -= n;
 
 zero:
 	while (pcstack_limit-- > 0)
 		*pcstack++ = 0;
 }
 
 int
 dtrace_getustackdepth(void)
 {
 
 	printf("IMPLEMENT ME: %s\n", __func__);
 
 	return (0);
 }
 
 void
 dtrace_getufpstack(uint64_t *pcstack, uint64_t *fpstack, int pcstack_limit)
 {
 
 	printf("IMPLEMENT ME: %s\n", __func__);
 }
 
 /*ARGSUSED*/
 uint64_t
 dtrace_getarg(int arg, int aframes)
 {
 
 	printf("IMPLEMENT ME: %s\n", __func__);
 
 	return (0);
 }
 
 int
 dtrace_getstackdepth(int aframes)
 {
 	struct unwind_state state;
 	int scp_offset;
 	int depth;
 	bool done;
 
 	depth = 1;
 	done = false;
 
 	state.fp = (uintptr_t)__builtin_frame_address(0);
 	state.pc = (uintptr_t)dtrace_getstackdepth;
 
 	do {
 		done = !unwind_frame(curthread, &state);
 		if (!INKERNEL(state.pc) || !INKERNEL(state.fp))
 			break;
 		depth++;
 	} while (!done);
 
 	if (depth < aframes)
 		return (0);
 	else
 		return (depth - aframes);
 }
 
 ulong_t
 dtrace_getreg(struct trapframe *rp, uint_t reg)
 {
 
 	printf("IMPLEMENT ME: %s\n", __func__);
 
 	return (0);
 }
 
 static int
 dtrace_copycheck(uintptr_t uaddr, uintptr_t kaddr, size_t size)
 {
 
 	if (uaddr + size > VM_MAXUSER_ADDRESS || uaddr + size < uaddr) {
 		DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
 		cpu_core[curcpu].cpuc_dtrace_illval = uaddr;
 		return (0);
 	}
 
 	return (1);
 }
 
 void
 dtrace_copyin(uintptr_t uaddr, uintptr_t kaddr, size_t size,
     volatile uint16_t *flags)
 {
 
 	if (dtrace_copycheck(uaddr, kaddr, size))
 		dtrace_copy(uaddr, kaddr, size);
 }
 
 void
 dtrace_copyout(uintptr_t kaddr, uintptr_t uaddr, size_t size,
     volatile uint16_t *flags)
 {
 
 	if (dtrace_copycheck(uaddr, kaddr, size))
 		dtrace_copy(kaddr, uaddr, size);
 }
 
 void
 dtrace_copyinstr(uintptr_t uaddr, uintptr_t kaddr, size_t size,
     volatile uint16_t *flags)
 {
 
 	if (dtrace_copycheck(uaddr, kaddr, size))
 		dtrace_copystr(uaddr, kaddr, size, flags);
 }
 
 void
 dtrace_copyoutstr(uintptr_t kaddr, uintptr_t uaddr, size_t size,
     volatile uint16_t *flags)
 {
 
 	if (dtrace_copycheck(uaddr, kaddr, size))
 		dtrace_copystr(kaddr, uaddr, size, flags);
 }
 
 uint8_t
 dtrace_fuword8(void *uaddr)
 {
 
 	if ((uintptr_t)uaddr > VM_MAXUSER_ADDRESS) {
 		DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
 		cpu_core[curcpu].cpuc_dtrace_illval = (uintptr_t)uaddr;
 		return (0);
 	}
 
 	return (dtrace_fuword8_nocheck(uaddr));
 }
 
 uint16_t
 dtrace_fuword16(void *uaddr)
 {
 
 	if ((uintptr_t)uaddr > VM_MAXUSER_ADDRESS) {
 		DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
 		cpu_core[curcpu].cpuc_dtrace_illval = (uintptr_t)uaddr;
 		return (0);
 	}
 
 	return (dtrace_fuword16_nocheck(uaddr));
 }
 
 uint32_t
 dtrace_fuword32(void *uaddr)
 {
 
 	if ((uintptr_t)uaddr > VM_MAXUSER_ADDRESS) {
 		DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
 		cpu_core[curcpu].cpuc_dtrace_illval = (uintptr_t)uaddr;
 		return (0);
 	}
 
 	return (dtrace_fuword32_nocheck(uaddr));
 }
 
 uint64_t
 dtrace_fuword64(void *uaddr)
 {
 
 	if ((uintptr_t)uaddr > VM_MAXUSER_ADDRESS) {
 		DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
 		cpu_core[curcpu].cpuc_dtrace_illval = (uintptr_t)uaddr;
 		return (0);
 	}
 
 	return (dtrace_fuword64_nocheck(uaddr));
 }
diff --git a/sys/cddl/dev/dtrace/amd64/dtrace_isa.c b/sys/cddl/dev/dtrace/amd64/dtrace_isa.c
index 07a4103bd716..71b448a99c1c 100644
--- a/sys/cddl/dev/dtrace/amd64/dtrace_isa.c
+++ b/sys/cddl/dev/dtrace/amd64/dtrace_isa.c
@@ -1,739 +1,738 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License, Version 1.0 only
  * (the "License").  You may not use this file except in compliance
  * with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  *
  * $FreeBSD$
  */
 /*
  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 #include <sys/cdefs.h>
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/stack.h>
 #include <sys/pcpu.h>
 
 #include <machine/frame.h>
 #include <machine/md_var.h>
-#include <machine/reg.h>
 #include <machine/stack.h>
 #include <x86/ifunc.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/pmap.h>
 
 #include "regset.h"
 
 uint8_t dtrace_fuword8_nocheck(void *);
 uint16_t dtrace_fuword16_nocheck(void *);
 uint32_t dtrace_fuword32_nocheck(void *);
 uint64_t dtrace_fuword64_nocheck(void *);
 
 int	dtrace_ustackdepth_max = 2048;
 
 void
 dtrace_getpcstack(pc_t *pcstack, int pcstack_limit, int aframes,
     uint32_t *intrpc)
 {
 	struct thread *td;
 	int depth = 0;
 	register_t rbp;
 	struct amd64_frame *frame;
 	vm_offset_t callpc;
 	pc_t caller = (pc_t) solaris_cpu[curcpu].cpu_dtrace_caller;
 
 	if (intrpc != 0)
 		pcstack[depth++] = (pc_t) intrpc;
 
 	aframes++;
 
 	__asm __volatile("movq %%rbp,%0" : "=r" (rbp));
 
 	frame = (struct amd64_frame *)rbp;
 	td = curthread;
 	while (depth < pcstack_limit) {
 		if (!kstack_contains(curthread, (vm_offset_t)frame,
 		    sizeof(*frame)))
 			break;
 
 		callpc = frame->f_retaddr;
 
 		if (!INKERNEL(callpc))
 			break;
 
 		if (aframes > 0) {
 			aframes--;
 			if ((aframes == 0) && (caller != 0)) {
 				pcstack[depth++] = caller;
 			}
 		} else {
 			pcstack[depth++] = callpc;
 		}
 
 		if ((vm_offset_t)frame->f_frame <= (vm_offset_t)frame)
 			break;
 		frame = frame->f_frame;
 	}
 
 	for (; depth < pcstack_limit; depth++) {
 		pcstack[depth] = 0;
 	}
 }
 
 static int
 dtrace_getustack_common(uint64_t *pcstack, int pcstack_limit, uintptr_t pc,
     uintptr_t sp)
 {
 	uintptr_t oldsp;
 	volatile uint16_t *flags =
 	    (volatile uint16_t *)&cpu_core[curcpu].cpuc_dtrace_flags;
 	int ret = 0;
 
 	ASSERT(pcstack == NULL || pcstack_limit > 0);
 	ASSERT(dtrace_ustackdepth_max > 0);
 
 	while (pc != 0) {
 		/*
 		 * We limit the number of times we can go around this
 		 * loop to account for a circular stack.
 		 */
 		if (ret++ >= dtrace_ustackdepth_max) {
 			*flags |= CPU_DTRACE_BADSTACK;
 			cpu_core[curcpu].cpuc_dtrace_illval = sp;
 			break;
 		}
 
 		if (pcstack != NULL) {
 			*pcstack++ = (uint64_t)pc;
 			pcstack_limit--;
 			if (pcstack_limit <= 0)
 				break;
 		}
 
 		if (sp == 0)
 			break;
 
 		oldsp = sp;
 
 		pc = dtrace_fuword64((void *)(sp +
 			offsetof(struct amd64_frame, f_retaddr)));
 		sp = dtrace_fuword64((void *)sp);
 
 		if (sp == oldsp) {
 			*flags |= CPU_DTRACE_BADSTACK;
 			cpu_core[curcpu].cpuc_dtrace_illval = sp;
 			break;
 		}
 
 		/*
 		 * This is totally bogus:  if we faulted, we're going to clear
 		 * the fault and break.  This is to deal with the apparently
 		 * broken Java stacks on x86.
 		 */
 		if (*flags & CPU_DTRACE_FAULT) {
 			*flags &= ~CPU_DTRACE_FAULT;
 			break;
 		}
 	}
 
 	return (ret);
 }
 
 void
 dtrace_getupcstack(uint64_t *pcstack, int pcstack_limit)
 {
 	proc_t *p = curproc;
 	struct trapframe *tf;
 	uintptr_t pc, sp, fp;
 	volatile uint16_t *flags =
 	    (volatile uint16_t *)&cpu_core[curcpu].cpuc_dtrace_flags;
 	int n;
 
 	if (*flags & CPU_DTRACE_FAULT)
 		return;
 
 	if (pcstack_limit <= 0)
 		return;
 
 	/*
 	 * If there's no user context we still need to zero the stack.
 	 */
 	if (p == NULL || (tf = curthread->td_frame) == NULL)
 		goto zero;
 
 	*pcstack++ = (uint64_t)p->p_pid;
 	pcstack_limit--;
 
 	if (pcstack_limit <= 0)
 		return;
 
 	pc = tf->tf_rip;
 	fp = tf->tf_rbp;
 	sp = tf->tf_rsp;
 
 	if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_ENTRY)) {
 		/* 
 		 * In an entry probe.  The frame pointer has not yet been
 		 * pushed (that happens in the function prologue).  The
 		 * best approach is to add the current pc as a missing top
 		 * of stack and back the pc up to the caller, which is stored
 		 * at the current stack pointer address since the call 
 		 * instruction puts it there right before the branch.
 		 */
 
 		*pcstack++ = (uint64_t)pc;
 		pcstack_limit--;
 		if (pcstack_limit <= 0)
 			return;
 
 		pc = dtrace_fuword64((void *) sp);
 	}
 
 	n = dtrace_getustack_common(pcstack, pcstack_limit, pc, fp);
 	ASSERT(n >= 0);
 	ASSERT(n <= pcstack_limit);
 
 	pcstack += n;
 	pcstack_limit -= n;
 
 zero:
 	while (pcstack_limit-- > 0)
 		*pcstack++ = 0;
 }
 
 int
 dtrace_getustackdepth(void)
 {
 	proc_t *p = curproc;
 	struct trapframe *tf;
 	uintptr_t pc, fp, sp;
 	int n = 0;
 
 	if (p == NULL || (tf = curthread->td_frame) == NULL)
 		return (0);
 
 	if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_FAULT))
 		return (-1);
 
 	pc = tf->tf_rip;
 	fp = tf->tf_rbp;
 	sp = tf->tf_rsp;
 
 	if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_ENTRY)) {
 		/* 
 		 * In an entry probe.  The frame pointer has not yet been
 		 * pushed (that happens in the function prologue).  The
 		 * best approach is to add the current pc as a missing top
 		 * of stack and back the pc up to the caller, which is stored
 		 * at the current stack pointer address since the call 
 		 * instruction puts it there right before the branch.
 		 */
 
 		pc = dtrace_fuword64((void *) sp);
 		n++;
 	}
 
 	n += dtrace_getustack_common(NULL, 0, pc, fp);
 
 	return (n);
 }
 
 void
 dtrace_getufpstack(uint64_t *pcstack, uint64_t *fpstack, int pcstack_limit)
 {
 	proc_t *p = curproc;
 	struct trapframe *tf;
 	uintptr_t pc, sp, fp;
 	volatile uint16_t *flags =
 	    (volatile uint16_t *)&cpu_core[curcpu].cpuc_dtrace_flags;
 #ifdef notyet	/* XXX signal stack */
 	uintptr_t oldcontext;
 	size_t s1, s2;
 #endif
 
 	if (*flags & CPU_DTRACE_FAULT)
 		return;
 
 	if (pcstack_limit <= 0)
 		return;
 
 	/*
 	 * If there's no user context we still need to zero the stack.
 	 */
 	if (p == NULL || (tf = curthread->td_frame) == NULL)
 		goto zero;
 
 	*pcstack++ = (uint64_t)p->p_pid;
 	pcstack_limit--;
 
 	if (pcstack_limit <= 0)
 		return;
 
 	pc = tf->tf_rip;
 	sp = tf->tf_rsp;
 	fp = tf->tf_rbp;
 
 #ifdef notyet /* XXX signal stack */
 	oldcontext = lwp->lwp_oldcontext;
 	s1 = sizeof (struct xframe) + 2 * sizeof (long);
 	s2 = s1 + sizeof (siginfo_t);
 #endif
 
 	if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_ENTRY)) {
 		*pcstack++ = (uint64_t)pc;
 		*fpstack++ = 0;
 		pcstack_limit--;
 		if (pcstack_limit <= 0)
 			return;
 
 		pc = dtrace_fuword64((void *)sp);
 	}
 
 	while (pc != 0) {
 		*pcstack++ = (uint64_t)pc;
 		*fpstack++ = fp;
 		pcstack_limit--;
 		if (pcstack_limit <= 0)
 			break;
 
 		if (fp == 0)
 			break;
 
 #ifdef notyet /* XXX signal stack */
 		if (oldcontext == sp + s1 || oldcontext == sp + s2) {
 			ucontext_t *ucp = (ucontext_t *)oldcontext;
 			greg_t *gregs = ucp->uc_mcontext.gregs;
 
 			sp = dtrace_fulword(&gregs[REG_FP]);
 			pc = dtrace_fulword(&gregs[REG_PC]);
 
 			oldcontext = dtrace_fulword(&ucp->uc_link);
 		} else
 #endif /* XXX */
 		{
 			pc = dtrace_fuword64((void *)(fp +
 				offsetof(struct amd64_frame, f_retaddr)));
 			fp = dtrace_fuword64((void *)fp);
 		}
 
 		/*
 		 * This is totally bogus:  if we faulted, we're going to clear
 		 * the fault and break.  This is to deal with the apparently
 		 * broken Java stacks on x86.
 		 */
 		if (*flags & CPU_DTRACE_FAULT) {
 			*flags &= ~CPU_DTRACE_FAULT;
 			break;
 		}
 	}
 
 zero:
 	while (pcstack_limit-- > 0)
 		*pcstack++ = 0;
 }
 
 /*ARGSUSED*/
 uint64_t
 dtrace_getarg(int arg, int aframes)
 {
 	uintptr_t val;
 	struct amd64_frame *fp = (struct amd64_frame *)dtrace_getfp();
 	uintptr_t *stack;
 	int i;
 
 	/*
 	 * A total of 6 arguments are passed via registers; any argument with
 	 * index of 5 or lower is therefore in a register.
 	 */
 	int inreg = 5;
 
 	for (i = 1; i <= aframes; i++) {
 		fp = fp->f_frame;
 
 		if (P2ROUNDUP(fp->f_retaddr, 16) ==
 		    (long)dtrace_invop_callsite) {
 			/*
 			 * In the case of amd64, we will use the pointer to the
 			 * regs structure that was pushed when we took the
 			 * trap.  To get this structure, we must increment
 			 * beyond the frame structure, and then again beyond
 			 * the calling RIP stored in dtrace_invop().  If the
 			 * argument that we're seeking is passed on the stack,
 			 * we'll pull the true stack pointer out of the saved
 			 * registers and decrement our argument by the number
 			 * of arguments passed in registers; if the argument
 			 * we're seeking is passed in registers, we can just
 			 * load it directly.
 			 */
 			struct trapframe *tf = (struct trapframe *)&fp[1];
 
 			if (arg <= inreg) {
 				switch (arg) {
 				case 0:
 					stack = (uintptr_t *)&tf->tf_rdi;
 					break;
 				case 1:
 					stack = (uintptr_t *)&tf->tf_rsi;
 					break;
 				case 2:
 					stack = (uintptr_t *)&tf->tf_rdx;
 					break;
 				case 3:
 					stack = (uintptr_t *)&tf->tf_rcx;
 					break;
 				case 4:
 					stack = (uintptr_t *)&tf->tf_r8;
 					break;
 				case 5:
 					stack = (uintptr_t *)&tf->tf_r9;
 					break;
 				}
 				arg = 0;
 			} else {
 				stack = (uintptr_t *)(tf->tf_rsp);
 				arg -= inreg;
 			}
 			goto load;
 		}
 
 	}
 
 	/*
 	 * We know that we did not come through a trap to get into
 	 * dtrace_probe() -- the provider simply called dtrace_probe()
 	 * directly.  As this is the case, we need to shift the argument
 	 * that we're looking for:  the probe ID is the first argument to
 	 * dtrace_probe(), so the argument n will actually be found where
 	 * one would expect to find argument (n + 1).
 	 */
 	arg++;
 
 	if (arg <= inreg) {
 		/*
 		 * This shouldn't happen.  If the argument is passed in a
 		 * register then it should have been, well, passed in a
 		 * register...
 		 */
 		DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
 		return (0);
 	}
 
 	arg -= (inreg + 1);
 	stack = (uintptr_t *)&fp[1];
 
 load:
 	DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
 	val = stack[arg];
 	DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
 
 	return (val);
 }
 
 int
 dtrace_getstackdepth(int aframes)
 {
 	int depth = 0;
 	struct amd64_frame *frame;
 	vm_offset_t rbp;
 
 	aframes++;
 	rbp = dtrace_getfp();
 	frame = (struct amd64_frame *)rbp;
 	depth++;
 	for(;;) {
 		if (!kstack_contains(curthread, (vm_offset_t)frame,
 		    sizeof(*frame)))
 			break;
 		depth++;
 		if (frame->f_frame <= frame)
 			break;
 		frame = frame->f_frame;
 	}
 	if (depth < aframes)
 		return 0;
 	else
 		return depth - aframes;
 }
 
 ulong_t
 dtrace_getreg(struct trapframe *rp, uint_t reg)
 {
 	/* This table is dependent on reg.d. */
 	int regmap[] = {
 		REG_GS,		/* 0  GS */
 		REG_FS,		/* 1  FS */
 		REG_ES,		/* 2  ES */
 		REG_DS,		/* 3  DS */
 		REG_RDI,	/* 4  EDI */
 		REG_RSI,	/* 5  ESI */
 		REG_RBP,	/* 6  EBP, REG_FP */
 		REG_RSP,	/* 7  ESP */
 		REG_RBX,	/* 8  EBX, REG_R1 */
 		REG_RDX,	/* 9  EDX */
 		REG_RCX,	/* 10 ECX */
 		REG_RAX,	/* 11 EAX, REG_R0 */
 		REG_TRAPNO,	/* 12 TRAPNO */
 		REG_ERR,	/* 13 ERR */
 		REG_RIP,	/* 14 EIP, REG_PC */
 		REG_CS,		/* 15 CS */
 		REG_RFL,	/* 16 EFL, REG_PS */
 		REG_RSP,	/* 17 UESP, REG_SP */
 		REG_SS		/* 18 SS */
 	};
 
 #ifdef illumos
 	if (reg <= SS) {
 #else	/* !illumos */
 	if (reg <= GS) {
 #endif
 		if (reg >= sizeof (regmap) / sizeof (int)) {
 			DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
 			return (0);
 		}
 
 		reg = regmap[reg];
 	} else {
 		/* This is dependent on reg.d. */
 #ifdef illumos
 		reg -= SS + 1;
 #else	/* !illumos */
 		reg -= GS + 1;
 #endif
 	}
 
 	switch (reg) {
 	case REG_RDI:
 		return (rp->tf_rdi);
 	case REG_RSI:
 		return (rp->tf_rsi);
 	case REG_RDX:
 		return (rp->tf_rdx);
 	case REG_RCX:
 		return (rp->tf_rcx);
 	case REG_R8:
 		return (rp->tf_r8);
 	case REG_R9:
 		return (rp->tf_r9);
 	case REG_RAX:
 		return (rp->tf_rax);
 	case REG_RBX:
 		return (rp->tf_rbx);
 	case REG_RBP:
 		return (rp->tf_rbp);
 	case REG_R10:
 		return (rp->tf_r10);
 	case REG_R11:
 		return (rp->tf_r11);
 	case REG_R12:
 		return (rp->tf_r12);
 	case REG_R13:
 		return (rp->tf_r13);
 	case REG_R14:
 		return (rp->tf_r14);
 	case REG_R15:
 		return (rp->tf_r15);
 	case REG_DS:
 		return (rp->tf_ds);
 	case REG_ES:
 		return (rp->tf_es);
 	case REG_FS:
 		return (rp->tf_fs);
 	case REG_GS:
 		return (rp->tf_gs);
 	case REG_TRAPNO:
 		return (rp->tf_trapno);
 	case REG_ERR:
 		return (rp->tf_err);
 	case REG_RIP:
 		return (rp->tf_rip);
 	case REG_CS:
 		return (rp->tf_cs);
 	case REG_SS:
 		return (rp->tf_ss);
 	case REG_RFL:
 		return (rp->tf_rflags);
 	case REG_RSP:
 		return (rp->tf_rsp);
 	default:
 		DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
 		return (0);
 	}
 }
 
 static int
 dtrace_copycheck(uintptr_t uaddr, uintptr_t kaddr, size_t size)
 {
 	ASSERT(INKERNEL(kaddr) && kaddr + size >= kaddr);
 
 	if (uaddr + size > VM_MAXUSER_ADDRESS || uaddr + size < uaddr) {
 		DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
 		cpu_core[curcpu].cpuc_dtrace_illval = uaddr;
 		return (0);
 	}
 
 	return (1);
 }
 
 void
 dtrace_copyin(uintptr_t uaddr, uintptr_t kaddr, size_t size,
     volatile uint16_t *flags)
 {
 	if (dtrace_copycheck(uaddr, kaddr, size))
 		dtrace_copy(uaddr, kaddr, size);
 }
 
 void
 dtrace_copyout(uintptr_t kaddr, uintptr_t uaddr, size_t size,
     volatile uint16_t *flags)
 {
 	if (dtrace_copycheck(uaddr, kaddr, size))
 		dtrace_copy(kaddr, uaddr, size);
 }
 
 void
 dtrace_copyinstr(uintptr_t uaddr, uintptr_t kaddr, size_t size,
     volatile uint16_t *flags)
 {
 	if (dtrace_copycheck(uaddr, kaddr, size))
 		dtrace_copystr(uaddr, kaddr, size, flags);
 }
 
 void
 dtrace_copyoutstr(uintptr_t kaddr, uintptr_t uaddr, size_t size,
     volatile uint16_t *flags)
 {
 	if (dtrace_copycheck(uaddr, kaddr, size))
 		dtrace_copystr(kaddr, uaddr, size, flags);
 }
 
 uint8_t
 dtrace_fuword8(void *uaddr)
 {
 	if ((uintptr_t)uaddr > VM_MAXUSER_ADDRESS) {
 		DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
 		cpu_core[curcpu].cpuc_dtrace_illval = (uintptr_t)uaddr;
 		return (0);
 	}
 	return (dtrace_fuword8_nocheck(uaddr));
 }
 
 uint16_t
 dtrace_fuword16(void *uaddr)
 {
 	if ((uintptr_t)uaddr > VM_MAXUSER_ADDRESS) {
 		DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
 		cpu_core[curcpu].cpuc_dtrace_illval = (uintptr_t)uaddr;
 		return (0);
 	}
 	return (dtrace_fuword16_nocheck(uaddr));
 }
 
 uint32_t
 dtrace_fuword32(void *uaddr)
 {
 	if ((uintptr_t)uaddr > VM_MAXUSER_ADDRESS) {
 		DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
 		cpu_core[curcpu].cpuc_dtrace_illval = (uintptr_t)uaddr;
 		return (0);
 	}
 	return (dtrace_fuword32_nocheck(uaddr));
 }
 
 uint64_t
 dtrace_fuword64(void *uaddr)
 {
 	if ((uintptr_t)uaddr > VM_MAXUSER_ADDRESS) {
 		DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
 		cpu_core[curcpu].cpuc_dtrace_illval = (uintptr_t)uaddr;
 		return (0);
 	}
 	return (dtrace_fuword64_nocheck(uaddr));
 }
 
 /*
  * ifunc resolvers for SMAP support
  */
 void dtrace_copy_nosmap(uintptr_t, uintptr_t, size_t);
 void dtrace_copy_smap(uintptr_t, uintptr_t, size_t);
 DEFINE_IFUNC(, void, dtrace_copy, (uintptr_t, uintptr_t, size_t))
 {
 
 	return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ?
 	    dtrace_copy_smap : dtrace_copy_nosmap);
 }
 
 void dtrace_copystr_nosmap(uintptr_t, uintptr_t, size_t, volatile uint16_t *);
 void dtrace_copystr_smap(uintptr_t, uintptr_t, size_t, volatile uint16_t *);
 DEFINE_IFUNC(, void, dtrace_copystr, (uintptr_t, uintptr_t, size_t,
     volatile uint16_t *))
 {
 
 	return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ?
 	    dtrace_copystr_smap : dtrace_copystr_nosmap);
 }
 
 uintptr_t dtrace_fulword_nosmap(void *);
 uintptr_t dtrace_fulword_smap(void *);
 DEFINE_IFUNC(, uintptr_t, dtrace_fulword, (void *))
 {
 
 	return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ?
 	    dtrace_fulword_smap : dtrace_fulword_nosmap);
 }
 
 uint8_t dtrace_fuword8_nocheck_nosmap(void *);
 uint8_t dtrace_fuword8_nocheck_smap(void *);
 DEFINE_IFUNC(, uint8_t, dtrace_fuword8_nocheck, (void *))
 {
 
 	return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ?
 	    dtrace_fuword8_nocheck_smap : dtrace_fuword8_nocheck_nosmap);
 }
 
 uint16_t dtrace_fuword16_nocheck_nosmap(void *);
 uint16_t dtrace_fuword16_nocheck_smap(void *);
 DEFINE_IFUNC(, uint16_t, dtrace_fuword16_nocheck, (void *))
 {
 
 	return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ?
 	    dtrace_fuword16_nocheck_smap : dtrace_fuword16_nocheck_nosmap);
 }
 
 uint32_t dtrace_fuword32_nocheck_nosmap(void *);
 uint32_t dtrace_fuword32_nocheck_smap(void *);
 DEFINE_IFUNC(, uint32_t, dtrace_fuword32_nocheck, (void *))
 {
 
 	return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ?
 	    dtrace_fuword32_nocheck_smap : dtrace_fuword32_nocheck_nosmap);
 }
 
 uint64_t dtrace_fuword64_nocheck_nosmap(void *);
 uint64_t dtrace_fuword64_nocheck_smap(void *);
 DEFINE_IFUNC(, uint64_t, dtrace_fuword64_nocheck, (void *))
 {
 
 	return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ?
 	    dtrace_fuword64_nocheck_smap : dtrace_fuword64_nocheck_nosmap);
 }
diff --git a/sys/cddl/dev/dtrace/arm/dtrace_isa.c b/sys/cddl/dev/dtrace/arm/dtrace_isa.c
index 9bac37669a84..ede352e6b873 100644
--- a/sys/cddl/dev/dtrace/arm/dtrace_isa.c
+++ b/sys/cddl/dev/dtrace/arm/dtrace_isa.c
@@ -1,263 +1,262 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License, Version 1.0 only
  * (the "License").  You may not use this file except in compliance
  * with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  *
  * $FreeBSD$
  */
 /*
  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 #include <sys/cdefs.h>
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/stack.h>
 #include <sys/pcpu.h>
 
 #include <machine/frame.h>
 #include <machine/md_var.h>
-#include <machine/reg.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/pmap.h>
 
 #include <machine/atomic.h>
 #include <machine/db_machdep.h>
 #include <machine/md_var.h>
 #include <machine/stack.h>
 #include <ddb/db_sym.h>
 #include <ddb/ddb.h>
 #include <sys/kdb.h>
 
 #include "regset.h"
 
 /*
  * Wee need some reasonable default to prevent backtrace code
  * from wandering too far
  */
 #define	MAX_FUNCTION_SIZE 0x10000
 #define	MAX_PROLOGUE_SIZE 0x100
 
 
 uint8_t dtrace_fuword8_nocheck(void *);
 uint16_t dtrace_fuword16_nocheck(void *);
 uint32_t dtrace_fuword32_nocheck(void *);
 uint64_t dtrace_fuword64_nocheck(void *);
 
 void
 dtrace_getpcstack(pc_t *pcstack, int pcstack_limit, int aframes,
     uint32_t *intrpc)
 {
 	struct unwind_state state;
 	register_t sp;
 	int scp_offset;
 	int depth = 0;
 
 	if (intrpc != 0)
 		pcstack[depth++] = (pc_t) intrpc;
 
 	aframes++;
 
 	__asm __volatile("mov %0, sp" : "=&r" (sp));
 
 	state.registers[FP] = (uint32_t)__builtin_frame_address(0);
 	state.registers[SP] = sp;
 	state.registers[LR] = (uint32_t)__builtin_return_address(0);
 	state.registers[PC] = (uint32_t)dtrace_getpcstack;
 
 	while (depth < pcstack_limit) {
 		int done;
 
 		done = unwind_stack_one(&state, 1);
 
 		/*
 		 * NB: Unlike some other architectures, we don't need to
 		 * explicitly insert cpu_dtrace_caller as it appears in the
 		 * normal kernel stack trace rather than a special trap frame.
 		 */
 		if (aframes > 0) {
 			aframes--;
 		} else {
 			pcstack[depth++] = state.registers[PC];
 		}
 
 		if (done)
 			break;
 	}
 
 	for (; depth < pcstack_limit; depth++) {
 		pcstack[depth] = 0;
 	}
 }
 
 void
 dtrace_getupcstack(uint64_t *pcstack, int pcstack_limit)
 {
 	printf("IMPLEMENT ME: %s\n", __func__);
 }
 
 int
 dtrace_getustackdepth(void)
 {
 	printf("IMPLEMENT ME: %s\n", __func__);
 	return (0);
 }
 
 void
 dtrace_getufpstack(uint64_t *pcstack, uint64_t *fpstack, int pcstack_limit)
 {
 	printf("IMPLEMENT ME: %s\n", __func__);
 }
 
 /*ARGSUSED*/
 uint64_t
 dtrace_getarg(int arg, int aframes)
 {
 /*	struct arm_frame *fp = (struct arm_frame *)dtrace_getfp();*/
 
 	return (0);
 }
 
 int
 dtrace_getstackdepth(int aframes)
 {
 	struct unwind_state state;
 	register_t sp;
 	int scp_offset;
 	int done = 0;
 	int depth = 1;
 
 	__asm __volatile("mov %0, sp" : "=&r" (sp));
 
 	state.registers[FP] = (uint32_t)__builtin_frame_address(0);
 	state.registers[SP] = sp;
 	state.registers[LR] = (uint32_t)__builtin_return_address(0);
 	state.registers[PC] = (uint32_t)dtrace_getstackdepth;
 
 	do {
 		done = unwind_stack_one(&state, 1);
 		depth++;
 	} while (!done);
 
 	if (depth < aframes)
 		return 0;
 	else
 		return depth - aframes;
 }
 
 ulong_t
 dtrace_getreg(struct trapframe *rp, uint_t reg)
 {
 	printf("IMPLEMENT ME: %s\n", __func__);
 
 	return (0);
 }
 
 static int
 dtrace_copycheck(uintptr_t uaddr, uintptr_t kaddr, size_t size)
 {
 
 	if (uaddr + size > VM_MAXUSER_ADDRESS || uaddr + size < uaddr) {
 		DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
 		cpu_core[curcpu].cpuc_dtrace_illval = uaddr;
 		return (0);
 	}
 
 	return (1);
 }
 
 void
 dtrace_copyin(uintptr_t uaddr, uintptr_t kaddr, size_t size,
     volatile uint16_t *flags)
 {
 	if (dtrace_copycheck(uaddr, kaddr, size))
 		dtrace_copy(uaddr, kaddr, size);
 }
 
 void
 dtrace_copyout(uintptr_t kaddr, uintptr_t uaddr, size_t size,
     volatile uint16_t *flags)
 {
 	if (dtrace_copycheck(uaddr, kaddr, size))
 		dtrace_copy(kaddr, uaddr, size);
 }
 
 void
 dtrace_copyinstr(uintptr_t uaddr, uintptr_t kaddr, size_t size,
     volatile uint16_t *flags)
 {
 	if (dtrace_copycheck(uaddr, kaddr, size))
 		dtrace_copystr(uaddr, kaddr, size, flags);
 }
 
 void
 dtrace_copyoutstr(uintptr_t kaddr, uintptr_t uaddr, size_t size,
     volatile uint16_t *flags)
 {
 	if (dtrace_copycheck(uaddr, kaddr, size))
 		dtrace_copystr(kaddr, uaddr, size, flags);
 }
 
 uint8_t
 dtrace_fuword8(void *uaddr)
 {
 	if ((uintptr_t)uaddr > VM_MAXUSER_ADDRESS) {
 		DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
 		cpu_core[curcpu].cpuc_dtrace_illval = (uintptr_t)uaddr;
 		return (0);
 	}
 	return (dtrace_fuword8_nocheck(uaddr));
 }
 
 uint16_t
 dtrace_fuword16(void *uaddr)
 {
 	if ((uintptr_t)uaddr > VM_MAXUSER_ADDRESS) {
 		DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
 		cpu_core[curcpu].cpuc_dtrace_illval = (uintptr_t)uaddr;
 		return (0);
 	}
 	return (dtrace_fuword16_nocheck(uaddr));
 }
 
 uint32_t
 dtrace_fuword32(void *uaddr)
 {
 	if ((uintptr_t)uaddr > VM_MAXUSER_ADDRESS) {
 		DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
 		cpu_core[curcpu].cpuc_dtrace_illval = (uintptr_t)uaddr;
 		return (0);
 	}
 	return (dtrace_fuword32_nocheck(uaddr));
 }
 
 uint64_t
 dtrace_fuword64(void *uaddr)
 {
 	if ((uintptr_t)uaddr > VM_MAXUSER_ADDRESS) {
 		DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
 		cpu_core[curcpu].cpuc_dtrace_illval = (uintptr_t)uaddr;
 		return (0);
 	}
 	return (dtrace_fuword64_nocheck(uaddr));
 }
diff --git a/sys/cddl/dev/dtrace/mips/dtrace_isa.c b/sys/cddl/dev/dtrace/mips/dtrace_isa.c
index c09225c734b9..8d65012dadf0 100644
--- a/sys/cddl/dev/dtrace/mips/dtrace_isa.c
+++ b/sys/cddl/dev/dtrace/mips/dtrace_isa.c
@@ -1,725 +1,724 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License, Version 1.0 only
  * (the "License").  You may not use this file except in compliance
  * with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  *
  * $FreeBSD$
  */
 /*
  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 #include <sys/cdefs.h>
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/stack.h>
 #include <sys/pcpu.h>
 
 #include <machine/frame.h>
 #include <machine/md_var.h>
-#include <machine/reg.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/pmap.h>
 
 #include <machine/db_machdep.h>
 #include <machine/md_var.h>
 #include <machine/mips_opcode.h>
 #include <ddb/db_sym.h>
 #include <ddb/ddb.h>
 #include <sys/kdb.h>
 
 #include "regset.h"
 
 #ifdef __mips_n64
 #define	MIPS_IS_VALID_KERNELADDR(reg)	((((reg) & 3) == 0) && \
 					((vm_offset_t)(reg) >= MIPS_XKPHYS_START))
 #else
 #define	MIPS_IS_VALID_KERNELADDR(reg)	((((reg) & 3) == 0) && \
 					((vm_offset_t)(reg) >= MIPS_KSEG0_START))
 #endif
 
 
 
 /*
  * Wee need some reasonable default to prevent backtrace code
  * from wandering too far
  */
 #define	MAX_FUNCTION_SIZE 0x10000
 #define	MAX_PROLOGUE_SIZE 0x100
 
 uint8_t dtrace_fuword8_nocheck(void *);
 uint16_t dtrace_fuword16_nocheck(void *);
 uint32_t dtrace_fuword32_nocheck(void *);
 uint64_t dtrace_fuword64_nocheck(void *);
 
 static int dtrace_next_frame(register_t *pc, register_t *sp, register_t *args, int *valid_args);
 static int dtrace_next_uframe(register_t *pc, register_t *sp, register_t *ra);
 
 void
 dtrace_getpcstack(pc_t *pcstack, int pcstack_limit, int aframes,
     uint32_t *intrpc)
 {
 	int depth = 0;
 	vm_offset_t callpc;
 	pc_t caller = (pc_t) solaris_cpu[curcpu].cpu_dtrace_caller;
 	register_t sp, ra, pc;
 
 	if (intrpc != 0)
 		pcstack[depth++] = (pc_t) intrpc;
 
 	aframes++;
 
 	sp = (register_t)(intptr_t)__builtin_frame_address(0);
 	ra = (register_t)(intptr_t)__builtin_return_address(0);
 
        	__asm __volatile(
 		"jal 99f\n"
 		"nop\n"
 		"99:\n"
 		"move %0, $31\n" /* get ra */
 		"move $31, %1\n" /* restore ra */
 		: "=r" (pc)
 		: "r" (ra));
 
 	while (depth < pcstack_limit) {
 
 		callpc = pc;
 
 		if (aframes > 0) {
 			aframes--;
 			if ((aframes == 0) && (caller != 0)) {
 				pcstack[depth++] = caller;
 			}
 		}
 		else {
 			pcstack[depth++] = callpc;
 		}
 
 		if (dtrace_next_frame(&pc, &sp, NULL, NULL) < 0)
 			break;
 	}
 
 	for (; depth < pcstack_limit; depth++) {
 		pcstack[depth] = 0;
 	}
 }
 
 void
 dtrace_getupcstack(uint64_t *pcstack, int pcstack_limit)
 {
 	proc_t *p = curproc;
 	struct trapframe *tf;
 	register_t sp, ra, pc;
 	volatile uint16_t *flags =
 	    (volatile uint16_t *)&cpu_core[curcpu].cpuc_dtrace_flags;
 
 	if (*flags & CPU_DTRACE_FAULT)
 		return;
 
 	if (pcstack_limit <= 0)
 		return;
 
 	/*
 	 * If there's no user context we still need to zero the stack.
 	 */
 	if (p == NULL || (tf = curthread->td_frame) == NULL)
 		goto zero;
 
 	*pcstack++ = (uint64_t)p->p_pid;
 	pcstack_limit--;
 
 	if (pcstack_limit <= 0)
 		return;
 
 	pc = (uint64_t)tf->pc;
 	sp = (uint64_t)tf->sp;
 	ra = (uint64_t)tf->ra;
 	*pcstack++ = (uint64_t)tf->pc;
 	
 	/*
 	 * Unwind, and unwind, and unwind
 	 */
 	while (1) {
 		if (dtrace_next_uframe(&pc, &sp, &ra) < 0)
 			break;
 
 		*pcstack++ = pc;
 		pcstack_limit--;
 
 		if (pcstack_limit <= 0)
 			break;
 	}
 
 zero:
 	while (pcstack_limit-- > 0)
 		*pcstack++ = 0;
 }
 
 int
 dtrace_getustackdepth(void)
 {
 	int n = 0;
 	proc_t *p = curproc;
 	struct trapframe *tf;
 	register_t sp, ra, pc;
 	volatile uint16_t *flags =
 	    (volatile uint16_t *)&cpu_core[curcpu].cpuc_dtrace_flags;
 
 	if (*flags & CPU_DTRACE_FAULT)
 		return (0);
 
 	if (p == NULL || (tf = curthread->td_frame) == NULL)
 		return (0);
 
 	pc = (uint64_t)tf->pc;
 	sp = (uint64_t)tf->sp;
 	ra = (uint64_t)tf->ra;
 	n++;
 	
 	/*
 	 * Unwind, and unwind, and unwind
 	 */
 	while (1) {
 		if (dtrace_next_uframe(&pc, &sp, &ra) < 0)
 			break;
 		n++;
 	}
 
 	return (n);
 }
 
 void
 dtrace_getufpstack(uint64_t *pcstack, uint64_t *fpstack, int pcstack_limit)
 {
 	printf("IMPLEMENT ME: %s\n", __func__);
 }
 
 /*ARGSUSED*/
 uint64_t
 dtrace_getarg(int arg, int aframes)
 {
 	int i;
 	register_t sp, ra, pc;
 	/* XXX: Fix this ugly code */
 	register_t args[8];
 	int valid[8];
 
 	sp = (register_t)(intptr_t)__builtin_frame_address(0);
 	ra = (register_t)(intptr_t)__builtin_return_address(0);
 
        	__asm __volatile(
 		"jal 99f\n"
 		"nop\n"
 		"99:\n"
 		"move %0, $31\n" /* get ra */
 		"move $31, %1\n" /* restore ra */
 		: "=r" (pc)
 		: "r" (ra));
 
 	for (i = 0; i <= aframes + 1; i++) {
 		if (dtrace_next_frame(&pc, &sp, args, valid) < 0) {
 			printf("%s: stack ends at frame #%d\n", __func__, i);
 			return (0);
 		}
 	}
 
 	if (arg < 8) {
 		if (valid[arg])
 			return (args[arg]);
 		else
 			printf("%s: request arg%d is not valid\n", __func__, arg);
 	}
 
 	return (0);
 }
 
 int
 dtrace_getstackdepth(int aframes)
 {
 	register_t sp, ra, pc;
 	int depth = 0;
 
 	sp = (register_t)(intptr_t)__builtin_frame_address(0);
 	ra = (register_t)(intptr_t)__builtin_return_address(0);
 
        	__asm __volatile(
 		"jal 99f\n"
 		"nop\n"
 		"99:\n"
 		"move %0, $31\n" /* get ra */
 		"move $31, %1\n" /* restore ra */
 		: "=r" (pc)
 		: "r" (ra));
 
 	for (;;) {
 		if (dtrace_next_frame(&pc, &sp, NULL, NULL) < 0)
 			break;
 		depth++;
 	}
 
 	if (depth < aframes)
 		return 0;
 	else
 		return depth - aframes;
 }
 
 ulong_t
 dtrace_getreg(struct trapframe *rp, uint_t reg)
 {
 
 	return (0);
 }
 
 static int
 dtrace_next_frame(register_t *pc, register_t *sp,
 	register_t *args, int *valid_args)
 {
 	InstFmt i;
 	/*
 	 * Arrays for a0..a3 registers and flags if content
 	 * of these registers is valid, e.g. obtained from the stack
 	 */
 	uintptr_t va;
 	unsigned instr, mask;
 	unsigned int frames = 0;
 	int more, stksize;
 	register_t ra = 0;
 	int arg, r;
 	vm_offset_t addr;
 
 	/*
 	 * Invalidate arguments values
 	 */
 	if (valid_args) {
 		for (r = 0; r < 8; r++)
 			valid_args[r] = 0;
 	}
 
 	/* Jump here after a nonstandard (interrupt handler) frame */
 	stksize = 0;
 	if (frames++ > 100) {
 		/* return breaks stackframe-size heuristics with gcc -O2 */
 		goto error;	/* XXX */
 	}
 
 	/* check for bad SP: could foul up next frame */
 	if (!MIPS_IS_VALID_KERNELADDR(*sp)) {
 		goto error;
 	}
 
 	/* check for bad PC */
 	if (!MIPS_IS_VALID_KERNELADDR(*pc)) {
 		goto error;
 	}
 
 	/*
 	 * Find the beginning of the current subroutine by scanning
 	 * backwards from the current PC for the end of the previous
 	 * subroutine.
 	 */
 	va = *pc - sizeof(int);
 	while (1) {
 		instr = kdbpeek((int *)va);
 
 		/* [d]addiu sp,sp,-X */
 		if (((instr & 0xffff8000) == 0x27bd8000)
 		    || ((instr & 0xffff8000) == 0x67bd8000))
 			break;
 
 		/* jr	ra */
 		if (instr == 0x03e00008) {
 			/* skip over branch-delay slot instruction */
 			va += 2 * sizeof(int);
 			break;
 		}
 
 		va -= sizeof(int);
 	}
 
 	/* skip over nulls which might separate .o files */
 	while ((instr = kdbpeek((int *)va)) == 0)
 		va += sizeof(int);
 
 	/* scan forwards to find stack size and any saved registers */
 	stksize = 0;
 	more = 3;
 	mask = 0;
 	for (; more; va += sizeof(int),
 	    more = (more == 3) ? 3 : more - 1) {
 		/* stop if hit our current position */
 		if (va >= *pc)
 			break;
 		instr = kdbpeek((int *)va);
 		i.word = instr;
 		switch (i.JType.op) {
 		case OP_SPECIAL:
 			switch (i.RType.func) {
 			case OP_JR:
 			case OP_JALR:
 				more = 2;	/* stop after next instruction */
 				break;
 
 			case OP_SYSCALL:
 			case OP_BREAK:
 				more = 1;	/* stop now */
 			};
 			break;
 
 		case OP_BCOND:
 		case OP_J:
 		case OP_JAL:
 		case OP_BEQ:
 		case OP_BNE:
 		case OP_BLEZ:
 		case OP_BGTZ:
 			more = 2;	/* stop after next instruction */
 			break;
 
 		case OP_COP0:
 		case OP_COP1:
 		case OP_COP2:
 		case OP_COP3:
 			switch (i.RType.rs) {
 			case OP_BCx:
 			case OP_BCy:
 				more = 2;	/* stop after next instruction */
 			};
 			break;
 
 		case OP_SW:
 			/* look for saved registers on the stack */
 			if (i.IType.rs != 29)
 				break;
 			/* only restore the first one */
 			if (mask & (1 << i.IType.rt))
 				break;
 			mask |= (1 << i.IType.rt);
 			addr = (vm_offset_t)(*sp + (short)i.IType.imm);
 			switch (i.IType.rt) {
 			case 4:/* a0 */
 			case 5:/* a1 */
 			case 6:/* a2 */
 			case 7:/* a3 */
 #if defined(__mips_n64) || defined(__mips_n32)
 			case 8:/* a4 */
 			case 9:/* a5 */
 			case 10:/* a6 */
 			case 11:/* a7 */
 #endif
 				arg = i.IType.rt - 4;
 				if (args)
 					args[arg] = kdbpeek((int*)addr);
 				if (valid_args)
 					valid_args[arg] = 1;
 				break;
 			case 31:	/* ra */
 				ra = kdbpeek((int *)addr);
 			}
 			break;
 
 		case OP_SD:
 			/* look for saved registers on the stack */
 			if (i.IType.rs != 29)
 				break;
 			/* only restore the first one */
 			if (mask & (1 << i.IType.rt))
 				break;
 			mask |= (1 << i.IType.rt);
 			addr = (vm_offset_t)(*sp + (short)i.IType.imm);
 			switch (i.IType.rt) {
 			case 4:/* a0 */
 			case 5:/* a1 */
 			case 6:/* a2 */
 			case 7:/* a3 */
 #if defined(__mips_n64) || defined(__mips_n32)
 			case 8:/* a4 */
 			case 9:/* a5 */
 			case 10:/* a6 */
 			case 11:/* a7 */
 #endif
 				arg = i.IType.rt - 4;
 				if (args)
 					args[arg] = kdbpeekd((int *)addr);
 				if (valid_args)
 					valid_args[arg] = 1;
 				break;
 
 			case 31:	/* ra */
 				ra = kdbpeekd((int *)addr);
 			}
 			break;
 
 		case OP_ADDI:
 		case OP_ADDIU:
 		case OP_DADDI:
 		case OP_DADDIU:
 			/* look for stack pointer adjustment */
 			if (i.IType.rs != 29 || i.IType.rt != 29)
 				break;
 			stksize = -((short)i.IType.imm);
 		}
 	}
 
 	if (!MIPS_IS_VALID_KERNELADDR(ra)) 
 		return (-1);
 
 	*pc = ra;
 	*sp += stksize;
 
 #if defined(__mips_o32)
 	/*
 	 * For MIPS32 fill out arguments 5..8 from the stack
 	 */
 	for (arg = 4; arg < 8; arg++) {
 		addr = (vm_offset_t)(*sp + arg*sizeof(register_t));
 		if (args)
 			args[arg] = kdbpeekd((int *)addr);
 		if (valid_args)
 			valid_args[arg] = 1;
 	}
 #endif
 
 	return (0);
 error:
 	return (-1);
 }
 
 static int
 dtrace_next_uframe(register_t *pc, register_t *sp, register_t *ra)
 {
 	int offset, registers_on_stack;
 	uint32_t opcode, mask;
 	register_t function_start;
 	int stksize;
 	InstFmt i;
 
 	volatile uint16_t *flags =
 	    (volatile uint16_t *)&cpu_core[curcpu].cpuc_dtrace_flags;
 
 	registers_on_stack = 0;
 	mask = 0;
 	function_start = 0;
 	offset = 0;
 	stksize = 0;
 
 	while (offset < MAX_FUNCTION_SIZE) {
 		opcode = dtrace_fuword32((void *)(vm_offset_t)(*pc - offset));
 
 		if (*flags & CPU_DTRACE_FAULT)
 			goto fault;
 
 		/* [d]addiu sp, sp, -X*/
 		if (((opcode & 0xffff8000) == 0x27bd8000)
 		    || ((opcode & 0xffff8000) == 0x67bd8000)) {
 			function_start = *pc - offset;
 			registers_on_stack = 1;
 			break;
 		}
 
 		/* lui gp, X */
 		if ((opcode & 0xffff8000) == 0x3c1c0000) {
 			/*
 			 * Function might start with this instruction
 			 * Keep an eye on "jr ra" and sp correction
 			 * with positive value further on
 			 */
 			function_start = *pc - offset;
 		}
 
 		if (function_start) {
 			/* 
 			 * Stop looking further. Possible end of
 			 * function instruction: it means there is no
 			 * stack modifications, sp is unchanged
 			 */
 
 			/* [d]addiu sp,sp,X */
 			if (((opcode & 0xffff8000) == 0x27bd0000)
 			    || ((opcode & 0xffff8000) == 0x67bd0000))
 				break;
 
 			if (opcode == 0x03e00008)
 				break;
 		}
 
 		offset += sizeof(int);
 	}
 
 	if (!function_start)
 		return (-1);
 
 	if (registers_on_stack) {
 		offset = 0;
 		while ((offset < MAX_PROLOGUE_SIZE) 
 		    && ((function_start + offset) < *pc)) {
 			i.word = 
 			    dtrace_fuword32((void *)(vm_offset_t)(function_start + offset));
 			switch (i.JType.op) {
 			case OP_SW:
 				/* look for saved registers on the stack */
 				if (i.IType.rs != 29)
 					break;
 				/* only restore the first one */
 				if (mask & (1 << i.IType.rt))
 					break;
 				mask |= (1 << i.IType.rt);
 				if (i.IType.rt == 31)
 					*ra = dtrace_fuword32((void *)(vm_offset_t)(*sp + (short)i.IType.imm));
 				break;
 
 			case OP_SD:
 				/* look for saved registers on the stack */
 				if (i.IType.rs != 29)
 					break;
 				/* only restore the first one */
 				if (mask & (1 << i.IType.rt))
 					break;
 				mask |= (1 << i.IType.rt);
 				/* ra */
 				if (i.IType.rt == 31)
 					*ra = dtrace_fuword64((void *)(vm_offset_t)(*sp + (short)i.IType.imm));
 			break;
 
 			case OP_ADDI:
 			case OP_ADDIU:
 			case OP_DADDI:
 			case OP_DADDIU:
 				/* look for stack pointer adjustment */
 				if (i.IType.rs != 29 || i.IType.rt != 29)
 					break;
 				stksize = -((short)i.IType.imm);
 			}
 
 			offset += sizeof(int);
 
 			if (*flags & CPU_DTRACE_FAULT)
 				goto fault;
 		}
 	}
 
 	/*
 	 * We reached the end of backtrace
 	 */
 	if (*pc == *ra)
 		return (-1);
 
 	*pc = *ra;
 	*sp += stksize;
 
 	return (0);
 fault:
 	/*
 	 * We just got lost in backtrace, no big deal
 	 */
 	*flags &= ~CPU_DTRACE_FAULT;
 	return (-1);
 }
 
 static int
 dtrace_copycheck(uintptr_t uaddr, uintptr_t kaddr, size_t size)
 {
 
 	if (uaddr + size > VM_MAXUSER_ADDRESS || uaddr + size < uaddr) {
 		DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
 		cpu_core[curcpu].cpuc_dtrace_illval = uaddr;
 		return (0);
 	}
 
 	return (1);
 }
 
 void
 dtrace_copyin(uintptr_t uaddr, uintptr_t kaddr, size_t size,
     volatile uint16_t *flags)
 {
 	if (dtrace_copycheck(uaddr, kaddr, size))
 		dtrace_copy(uaddr, kaddr, size);
 }
 
 void
 dtrace_copyout(uintptr_t kaddr, uintptr_t uaddr, size_t size,
     volatile uint16_t *flags)
 {
 	if (dtrace_copycheck(uaddr, kaddr, size))
 		dtrace_copy(kaddr, uaddr, size);
 }
 
 void
 dtrace_copyinstr(uintptr_t uaddr, uintptr_t kaddr, size_t size,
     volatile uint16_t *flags)
 {
 	if (dtrace_copycheck(uaddr, kaddr, size))
 		dtrace_copystr(uaddr, kaddr, size, flags);
 }
 
 void
 dtrace_copyoutstr(uintptr_t kaddr, uintptr_t uaddr, size_t size,
     volatile uint16_t *flags)
 {
 	if (dtrace_copycheck(uaddr, kaddr, size))
 		dtrace_copystr(kaddr, uaddr, size, flags);
 }
 
 uint8_t
 dtrace_fuword8(void *uaddr)
 {
 	if ((uintptr_t)uaddr > VM_MAXUSER_ADDRESS) {
 		DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
 		cpu_core[curcpu].cpuc_dtrace_illval = (uintptr_t)uaddr;
 		return (0);
 	}
 	return (dtrace_fuword8_nocheck(uaddr));
 }
 
 uint16_t
 dtrace_fuword16(void *uaddr)
 {
 	if ((uintptr_t)uaddr > VM_MAXUSER_ADDRESS) {
 		DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
 		cpu_core[curcpu].cpuc_dtrace_illval = (uintptr_t)uaddr;
 		return (0);
 	}
 	return (dtrace_fuword16_nocheck(uaddr));
 }
 
 uint32_t
 dtrace_fuword32(void *uaddr)
 {
 	if ((uintptr_t)uaddr > VM_MAXUSER_ADDRESS) {
 		DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
 		cpu_core[curcpu].cpuc_dtrace_illval = (uintptr_t)uaddr;
 		return (0);
 	}
 	return (dtrace_fuword32_nocheck(uaddr));
 }
 
 uint64_t
 dtrace_fuword64(void *uaddr)
 {
 	if ((uintptr_t)uaddr > VM_MAXUSER_ADDRESS) {
 		DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
 		cpu_core[curcpu].cpuc_dtrace_illval = (uintptr_t)uaddr;
 		return (0);
 	}
 	return (dtrace_fuword64_nocheck(uaddr));
 }
diff --git a/sys/cddl/dev/dtrace/powerpc/dtrace_isa.c b/sys/cddl/dev/dtrace/powerpc/dtrace_isa.c
index a188eafa777d..cce1c907b5d8 100644
--- a/sys/cddl/dev/dtrace/powerpc/dtrace_isa.c
+++ b/sys/cddl/dev/dtrace/powerpc/dtrace_isa.c
@@ -1,692 +1,691 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License, Version 1.0 only
  * (the "License").  You may not use this file except in compliance
  * with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  *
  * Portions Copyright 2012,2013 Justin Hibbits <jhibbits@freebsd.org>
  *
  * $FreeBSD$
  */
 /*
  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 #include <sys/cdefs.h>
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/stack.h>
 #include <sys/sysent.h>
 #include <sys/pcpu.h>
 
 #include <machine/frame.h>
 #include <machine/md_var.h>
 #include <machine/psl.h>
-#include <machine/reg.h>
 #include <machine/stack.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/pmap.h>
 
 #include "regset.h"
 
 /* Offset to the LR Save word (ppc32) */
 #define RETURN_OFFSET	4
 /* Offset to LR Save word (ppc64).  CR Save area sits between back chain and LR */
 #define RETURN_OFFSET64	16
 
 #ifdef __powerpc64__
 #define OFFSET 4 /* Account for the TOC reload slot */
 #define	FRAME_OFFSET	48
 #else
 #define OFFSET 0
 #define	FRAME_OFFSET	8
 #endif
 
 #define INKERNEL(x)	(((x) <= VM_MAX_KERNEL_ADDRESS && \
 		(x) >= VM_MIN_KERNEL_ADDRESS) || \
 		(PMAP_HAS_DMAP && (x) >= DMAP_BASE_ADDRESS && \
 		 (x) <= DMAP_MAX_ADDRESS))
 
 static __inline int
 dtrace_sp_inkernel(uintptr_t sp)
 {
 	struct trapframe *frame;
 	vm_offset_t callpc;
 
 	/* Not within the kernel, or not aligned. */
 	if (!INKERNEL(sp) || (sp & 0xf) != 0)
 		return (0);
 #ifdef __powerpc64__
 	callpc = *(vm_offset_t *)(sp + RETURN_OFFSET64);
 #else
 	callpc = *(vm_offset_t *)(sp + RETURN_OFFSET);
 #endif
 	if ((callpc & 3) || (callpc < 0x100))
 		return (0);
 
 	/*
 	 * trapexit() and asttrapexit() are sentinels
 	 * for kernel stack tracing.
 	 */
 	if (callpc + OFFSET == (vm_offset_t) &trapexit ||
 	    callpc + OFFSET == (vm_offset_t) &asttrapexit) {
 		frame = (struct trapframe *)(sp + FRAME_OFFSET);
 
 		return ((frame->srr1 & PSL_PR) == 0);
 	}
 
 	return (1);
 }
 
 static __inline void
 dtrace_next_sp_pc(uintptr_t sp, uintptr_t *nsp, uintptr_t *pc)
 {
 	vm_offset_t callpc;
 	struct trapframe *frame;
 
 #ifdef __powerpc64__
 	callpc = *(vm_offset_t *)(sp + RETURN_OFFSET64);
 #else
 	callpc = *(vm_offset_t *)(sp + RETURN_OFFSET);
 #endif
 
 	/*
 	 * trapexit() and asttrapexit() are sentinels
 	 * for kernel stack tracing.
 	 */
 	if ((callpc + OFFSET == (vm_offset_t) &trapexit ||
 	    callpc + OFFSET == (vm_offset_t) &asttrapexit)) {
 		/* Access the trap frame */
 		frame = (struct trapframe *)(sp + FRAME_OFFSET);
 
 		if (nsp != NULL)
 			*nsp = frame->fixreg[1];
 		if (pc != NULL)
 			*pc = frame->srr0;
 		return;
 	}
 
 	if (nsp != NULL)
 		*nsp = *(uintptr_t *)sp;
 	if (pc != NULL)
 		*pc = callpc;
 }
 
 void
 dtrace_getpcstack(pc_t *pcstack, int pcstack_limit, int aframes,
     uint32_t *intrpc)
 {
 	int depth = 0;
 	uintptr_t osp, sp;
 	vm_offset_t callpc;
 	pc_t caller = (pc_t) solaris_cpu[curcpu].cpu_dtrace_caller;
 
 	osp = PAGE_SIZE;
 	if (intrpc != 0)
 		pcstack[depth++] = (pc_t) intrpc;
 
 	aframes++;
 
 	sp = (uintptr_t)__builtin_frame_address(0);
 
 	while (depth < pcstack_limit) {
 		if (sp <= osp)
 			break;
 
 		if (!dtrace_sp_inkernel(sp))
 			break;
 		osp = sp;
 		dtrace_next_sp_pc(osp, &sp, &callpc);
 
 		if (aframes > 0) {
 			aframes--;
 			if ((aframes == 0) && (caller != 0)) {
 				pcstack[depth++] = caller;
 			}
 		}
 		else {
 			pcstack[depth++] = callpc;
 		}
 	}
 
 	for (; depth < pcstack_limit; depth++) {
 		pcstack[depth] = 0;
 	}
 }
 
 static int
 dtrace_getustack_common(uint64_t *pcstack, int pcstack_limit, uintptr_t pc,
     uintptr_t sp)
 {
 	proc_t *p = curproc;
 	int ret = 0;
 
 	ASSERT(pcstack == NULL || pcstack_limit > 0);
 
 	while (pc != 0) {
 		ret++;
 		if (pcstack != NULL) {
 			*pcstack++ = (uint64_t)pc;
 			pcstack_limit--;
 			if (pcstack_limit <= 0)
 				break;
 		}
 
 		if (sp == 0)
 			break;
 
 		if (SV_PROC_FLAG(p, SV_ILP32)) {
 			pc = dtrace_fuword32((void *)(sp + RETURN_OFFSET));
 			sp = dtrace_fuword32((void *)sp);
 		}
 		else {
 			pc = dtrace_fuword64((void *)(sp + RETURN_OFFSET64));
 			sp = dtrace_fuword64((void *)sp);
 		}
 	}
 
 	return (ret);
 }
 
 void
 dtrace_getupcstack(uint64_t *pcstack, int pcstack_limit)
 {
 	proc_t *p = curproc;
 	struct trapframe *tf;
 	uintptr_t pc, sp;
 	volatile uint16_t *flags =
 	    (volatile uint16_t *)&cpu_core[curcpu].cpuc_dtrace_flags;
 	int n;
 
 	if (*flags & CPU_DTRACE_FAULT)
 		return;
 
 	if (pcstack_limit <= 0)
 		return;
 
 	/*
 	 * If there's no user context we still need to zero the stack.
 	 */
 	if (p == NULL || (tf = curthread->td_frame) == NULL)
 		goto zero;
 
 	*pcstack++ = (uint64_t)p->p_pid;
 	pcstack_limit--;
 
 	if (pcstack_limit <= 0)
 		return;
 
 	pc = tf->srr0;
 	sp = tf->fixreg[1];
 
 	if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_ENTRY)) {
 		/* 
 		 * In an entry probe.  The frame pointer has not yet been
 		 * pushed (that happens in the function prologue).  The
 		 * best approach is to add the current pc as a missing top
 		 * of stack and back the pc up to the caller, which is stored
 		 * at the current stack pointer address since the call 
 		 * instruction puts it there right before the branch.
 		 */
 
 		*pcstack++ = (uint64_t)pc;
 		pcstack_limit--;
 		if (pcstack_limit <= 0)
 			return;
 
 		pc = tf->lr;
 	}
 
 	n = dtrace_getustack_common(pcstack, pcstack_limit, pc, sp);
 	ASSERT(n >= 0);
 	ASSERT(n <= pcstack_limit);
 
 	pcstack += n;
 	pcstack_limit -= n;
 
 zero:
 	while (pcstack_limit-- > 0)
 		*pcstack++ = 0;
 }
 
 int
 dtrace_getustackdepth(void)
 {
 	proc_t *p = curproc;
 	struct trapframe *tf;
 	uintptr_t pc, sp;
 	int n = 0;
 
 	if (p == NULL || (tf = curthread->td_frame) == NULL)
 		return (0);
 
 	if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_FAULT))
 		return (-1);
 
 	pc = tf->srr0;
 	sp = tf->fixreg[1];
 
 	if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_ENTRY)) {
 		/* 
 		 * In an entry probe.  The frame pointer has not yet been
 		 * pushed (that happens in the function prologue).  The
 		 * best approach is to add the current pc as a missing top
 		 * of stack and back the pc up to the caller, which is stored
 		 * at the current stack pointer address since the call 
 		 * instruction puts it there right before the branch.
 		 */
 
 		if (SV_PROC_FLAG(p, SV_ILP32)) {
 			pc = dtrace_fuword32((void *) sp);
 		}
 		else
 			pc = dtrace_fuword64((void *) sp);
 		n++;
 	}
 
 	n += dtrace_getustack_common(NULL, 0, pc, sp);
 
 	return (n);
 }
 
 void
 dtrace_getufpstack(uint64_t *pcstack, uint64_t *fpstack, int pcstack_limit)
 {
 	proc_t *p = curproc;
 	struct trapframe *tf;
 	uintptr_t pc, sp;
 	volatile uint16_t *flags =
 	    (volatile uint16_t *)&cpu_core[curcpu].cpuc_dtrace_flags;
 #ifdef notyet	/* XXX signal stack */
 	uintptr_t oldcontext;
 	size_t s1, s2;
 #endif
 
 	if (*flags & CPU_DTRACE_FAULT)
 		return;
 
 	if (pcstack_limit <= 0)
 		return;
 
 	/*
 	 * If there's no user context we still need to zero the stack.
 	 */
 	if (p == NULL || (tf = curthread->td_frame) == NULL)
 		goto zero;
 
 	*pcstack++ = (uint64_t)p->p_pid;
 	pcstack_limit--;
 
 	if (pcstack_limit <= 0)
 		return;
 
 	pc = tf->srr0;
 	sp = tf->fixreg[1];
 
 #ifdef notyet /* XXX signal stack */
 	oldcontext = lwp->lwp_oldcontext;
 	s1 = sizeof (struct xframe) + 2 * sizeof (long);
 	s2 = s1 + sizeof (siginfo_t);
 #endif
 
 	if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_ENTRY)) {
 		*pcstack++ = (uint64_t)pc;
 		*fpstack++ = 0;
 		pcstack_limit--;
 		if (pcstack_limit <= 0)
 			return;
 
 		if (SV_PROC_FLAG(p, SV_ILP32)) {
 			pc = dtrace_fuword32((void *)sp);
 		}
 		else {
 			pc = dtrace_fuword64((void *)sp);
 		}
 	}
 
 	while (pc != 0) {
 		*pcstack++ = (uint64_t)pc;
 		*fpstack++ = sp;
 		pcstack_limit--;
 		if (pcstack_limit <= 0)
 			break;
 
 		if (sp == 0)
 			break;
 
 #ifdef notyet /* XXX signal stack */
 		if (oldcontext == sp + s1 || oldcontext == sp + s2) {
 			ucontext_t *ucp = (ucontext_t *)oldcontext;
 			greg_t *gregs = ucp->uc_mcontext.gregs;
 
 			sp = dtrace_fulword(&gregs[REG_FP]);
 			pc = dtrace_fulword(&gregs[REG_PC]);
 
 			oldcontext = dtrace_fulword(&ucp->uc_link);
 		} else
 #endif /* XXX */
 		{
 			if (SV_PROC_FLAG(p, SV_ILP32)) {
 				pc = dtrace_fuword32((void *)(sp + RETURN_OFFSET));
 				sp = dtrace_fuword32((void *)sp);
 			}
 			else {
 				pc = dtrace_fuword64((void *)(sp + RETURN_OFFSET64));
 				sp = dtrace_fuword64((void *)sp);
 			}
 		}
 
 		/*
 		 * This is totally bogus:  if we faulted, we're going to clear
 		 * the fault and break.  This is to deal with the apparently
 		 * broken Java stacks on x86.
 		 */
 		if (*flags & CPU_DTRACE_FAULT) {
 			*flags &= ~CPU_DTRACE_FAULT;
 			break;
 		}
 	}
 
 zero:
 	while (pcstack_limit-- > 0)
 		*pcstack++ = 0;
 }
 
 /*ARGSUSED*/
 uint64_t
 dtrace_getarg(int arg, int aframes)
 {
 	uintptr_t val;
 	uintptr_t *fp = (uintptr_t *)__builtin_frame_address(0);
 	uintptr_t *stack;
 	int i;
 
 	/*
 	 * A total of 8 arguments are passed via registers; any argument with
 	 * index of 7 or lower is therefore in a register.
 	 */
 	int inreg = 7;
 
 	for (i = 1; i <= aframes; i++) {
 		fp = (uintptr_t *)*fp;
 
 		/*
 		 * On ppc32 trapexit() is the immediately following label.  On
 		 * ppc64 AIM trapexit() follows a nop.
 		 */
 #ifdef __powerpc64__
 		if ((long)(fp[2]) + 4 == (long)trapexit) {
 #else
 		if ((long)(fp[1]) == (long)trapexit) {
 #endif
 			/*
 			 * In the case of powerpc, we will use the pointer to the regs
 			 * structure that was pushed when we took the trap.  To get this
 			 * structure, we must increment beyond the frame structure.  If the
 			 * argument that we're seeking is passed on the stack, we'll pull
 			 * the true stack pointer out of the saved registers and decrement
 			 * our argument by the number of arguments passed in registers; if
 			 * the argument we're seeking is passed in regsiters, we can just
 			 * load it directly.
 			 */
 #ifdef __powerpc64__
 			struct reg *rp = (struct reg *)((uintptr_t)fp[0] + 48);
 #else
 			struct reg *rp = (struct reg *)((uintptr_t)fp[0] + 8);
 #endif
 
 			if (arg <= inreg) {
 				stack = &rp->fixreg[3];
 			} else {
 				stack = (uintptr_t *)(rp->fixreg[1]);
 				arg -= inreg;
 			}
 			goto load;
 		}
 
 	}
 
 	/*
 	 * We know that we did not come through a trap to get into
 	 * dtrace_probe() -- the provider simply called dtrace_probe()
 	 * directly.  As this is the case, we need to shift the argument
 	 * that we're looking for:  the probe ID is the first argument to
 	 * dtrace_probe(), so the argument n will actually be found where
 	 * one would expect to find argument (n + 1).
 	 */
 	arg++;
 
 	if (arg <= inreg) {
 		/*
 		 * This shouldn't happen.  If the argument is passed in a
 		 * register then it should have been, well, passed in a
 		 * register...
 		 */
 		DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
 		return (0);
 	}
 
 	arg -= (inreg + 1);
 	stack = fp + 2;
 
 load:
 	DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
 	val = stack[arg];
 	DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
 
 	return (val);
 }
 
 int
 dtrace_getstackdepth(int aframes)
 {
 	int depth = 0;
 	uintptr_t osp, sp;
 	vm_offset_t callpc;
 
 	osp = PAGE_SIZE;
 	sp = (uintptr_t)__builtin_frame_address(0);
 	for(;;) {
 		if (sp <= osp)
 			break;
 
 		if (!dtrace_sp_inkernel(sp))
 			break;
 
 		depth++;
 		osp = sp;
 		dtrace_next_sp_pc(sp, &sp, NULL);
 	}
 	if (depth < aframes)
 		return (0);
 
 	return (depth - aframes);
 }
 
 ulong_t
 dtrace_getreg(struct trapframe *rp, uint_t reg)
 {
 	if (reg < 32)
 		return (rp->fixreg[reg]);
 
 	switch (reg) {
 	case 32:
 		return (rp->lr);
 	case 33:
 		return (rp->cr);
 	case 34:
 		return (rp->xer);
 	case 35:
 		return (rp->ctr);
 	case 36:
 		return (rp->srr0);
 	case 37:
 		return (rp->srr1);
 	case 38:
 		return (rp->exc);
 	default:
 		DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
 		return (0);
 	}
 }
 
 static int
 dtrace_copycheck(uintptr_t uaddr, uintptr_t kaddr, size_t size)
 {
 	ASSERT(INKERNEL(kaddr) && kaddr + size >= kaddr);
 
 	if (uaddr + size > VM_MAXUSER_ADDRESS || uaddr + size < uaddr) {
 		DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
 		cpu_core[curcpu].cpuc_dtrace_illval = uaddr;
 		return (0);
 	}
 
 	return (1);
 }
 
 void
 dtrace_copyin(uintptr_t uaddr, uintptr_t kaddr, size_t size,
     volatile uint16_t *flags)
 {
 	if (dtrace_copycheck(uaddr, kaddr, size))
 		if (copyin((const void *)uaddr, (void *)kaddr, size)) {
 			DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
 			cpu_core[curcpu].cpuc_dtrace_illval = (uintptr_t)uaddr;
 		}
 }
 
 void
 dtrace_copyout(uintptr_t kaddr, uintptr_t uaddr, size_t size,
     volatile uint16_t *flags)
 {
 	if (dtrace_copycheck(uaddr, kaddr, size)) {
 		if (copyout((const void *)kaddr, (void *)uaddr, size)) {
 			DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
 			cpu_core[curcpu].cpuc_dtrace_illval = (uintptr_t)uaddr;
 		}
 	}
 }
 
 void
 dtrace_copyinstr(uintptr_t uaddr, uintptr_t kaddr, size_t size,
     volatile uint16_t *flags)
 {
 	size_t actual;
 	int    error;
 
 	if (dtrace_copycheck(uaddr, kaddr, size)) {
 		error = copyinstr((const void *)uaddr, (void *)kaddr,
 		    size, &actual);
 		
 		/* ENAMETOOLONG is not a fault condition. */
 		if (error && error != ENAMETOOLONG) {
 			DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
 			cpu_core[curcpu].cpuc_dtrace_illval = (uintptr_t)uaddr;
 		}
 	}
 }
 
 /*
  * The bulk of this function could be replaced to match dtrace_copyinstr() 
  * if we ever implement a copyoutstr().
  */
 void
 dtrace_copyoutstr(uintptr_t kaddr, uintptr_t uaddr, size_t size,
     volatile uint16_t *flags)
 {
 	size_t len;
 
 	if (dtrace_copycheck(uaddr, kaddr, size)) {
 		len = strlen((const char *)kaddr);
 		if (len > size)
 			len = size;
 
 		if (copyout((const void *)kaddr, (void *)uaddr, len)) {
 			DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
 			cpu_core[curcpu].cpuc_dtrace_illval = (uintptr_t)uaddr;
 		}
 	}
 }
 
 uint8_t
 dtrace_fuword8(void *uaddr)
 {
 	if ((uintptr_t)uaddr > VM_MAXUSER_ADDRESS) {
 		DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
 		cpu_core[curcpu].cpuc_dtrace_illval = (uintptr_t)uaddr;
 		return (0);
 	}
 	return (fubyte(uaddr));
 }
 
 uint16_t
 dtrace_fuword16(void *uaddr)
 {
 	uint16_t ret = 0;
 
 	if (dtrace_copycheck((uintptr_t)uaddr, (uintptr_t)&ret, sizeof(ret))) {
 		if (copyin((const void *)uaddr, (void *)&ret, sizeof(ret))) {
 			DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
 			cpu_core[curcpu].cpuc_dtrace_illval = (uintptr_t)uaddr;
 		}
 	}
 	return ret;
 }
 
 uint32_t
 dtrace_fuword32(void *uaddr)
 {
 	if ((uintptr_t)uaddr > VM_MAXUSER_ADDRESS) {
 		DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
 		cpu_core[curcpu].cpuc_dtrace_illval = (uintptr_t)uaddr;
 		return (0);
 	}
 	return (fuword32(uaddr));
 }
 
 uint64_t
 dtrace_fuword64(void *uaddr)
 {
 	uint64_t ret = 0;
 
 	if (dtrace_copycheck((uintptr_t)uaddr, (uintptr_t)&ret, sizeof(ret))) {
 		if (copyin((const void *)uaddr, (void *)&ret, sizeof(ret))) {
 			DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
 			cpu_core[curcpu].cpuc_dtrace_illval = (uintptr_t)uaddr;
 		}
 	}
 	return ret;
 }
 
 uintptr_t
 dtrace_fulword(void *uaddr)
 {
 	uintptr_t ret = 0;
 
 	if (dtrace_copycheck((uintptr_t)uaddr, (uintptr_t)&ret, sizeof(ret))) {
 		if (copyin((const void *)uaddr, (void *)&ret, sizeof(ret))) {
 			DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
 			cpu_core[curcpu].cpuc_dtrace_illval = (uintptr_t)uaddr;
 		}
 	}
 	return ret;
 }
diff --git a/sys/cddl/dev/dtrace/riscv/dtrace_isa.c b/sys/cddl/dev/dtrace/riscv/dtrace_isa.c
index f8663b5bfeab..d42299abcd35 100644
--- a/sys/cddl/dev/dtrace/riscv/dtrace_isa.c
+++ b/sys/cddl/dev/dtrace/riscv/dtrace_isa.c
@@ -1,396 +1,395 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License, Version 1.0 only
  * (the "License").  You may not use this file except in compliance
  * with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  *
  * Portions Copyright 2016 Ruslan Bukin <br@bsdpad.com>
  *
  * $FreeBSD$
  */
 /*
  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 #include <sys/cdefs.h>
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/stack.h>
 #include <sys/pcpu.h>
 
 #include <machine/frame.h>
 #include <machine/md_var.h>
-#include <machine/reg.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/pmap.h>
 
 #include <machine/atomic.h>
 #include <machine/db_machdep.h>
 #include <machine/md_var.h>
 #include <machine/stack.h>
 #include <ddb/db_sym.h>
 #include <ddb/ddb.h>
 #include <sys/kdb.h>
 
 #include "regset.h"
 
 /*
  * Wee need some reasonable default to prevent backtrace code
  * from wandering too far
  */
 #define	MAX_FUNCTION_SIZE 0x10000
 #define	MAX_PROLOGUE_SIZE 0x100
 #define	MAX_USTACK_DEPTH  2048
 
 uint8_t dtrace_fuword8_nocheck(void *);
 uint16_t dtrace_fuword16_nocheck(void *);
 uint32_t dtrace_fuword32_nocheck(void *);
 uint64_t dtrace_fuword64_nocheck(void *);
 
 void
 dtrace_getpcstack(pc_t *pcstack, int pcstack_limit, int aframes,
     uint32_t *intrpc)
 {
 	struct unwind_state state;
 	int scp_offset;
 	register_t sp;
 	int depth;
 
 	depth = 0;
 
 	if (intrpc != 0) {
 		pcstack[depth++] = (pc_t) intrpc;
 	}
 
 	aframes++;
 
 	__asm __volatile("mv %0, sp" : "=&r" (sp));
 
 	state.fp = (uintptr_t)__builtin_frame_address(0);
 	state.sp = sp;
 	state.pc = (uintptr_t)dtrace_getpcstack;
 
 	while (depth < pcstack_limit) {
 		if (!unwind_frame(curthread, &state))
 			break;
 
 		if (!INKERNEL(state.pc) || !INKERNEL(state.fp))
 			break;
 
 		/*
 		 * NB: Unlike some other architectures, we don't need to
 		 * explicitly insert cpu_dtrace_caller as it appears in the
 		 * normal kernel stack trace rather than a special trap frame.
 		 */
 		if (aframes > 0) {
 			aframes--;
 		} else {
 			pcstack[depth++] = state.pc;
 		}
 
 	}
 
 	for (; depth < pcstack_limit; depth++) {
 		pcstack[depth] = 0;
 	}
 }
 
 static int
 dtrace_getustack_common(uint64_t *pcstack, int pcstack_limit, uintptr_t pc,
     uintptr_t fp)
 {
 	volatile uint16_t *flags;
 	uintptr_t oldfp;
 	int ret;
 
 	oldfp = fp;
 	ret = 0;
 	flags = (volatile uint16_t *)&cpu_core[curcpu].cpuc_dtrace_flags;
 
 	ASSERT(pcstack == NULL || pcstack_limit > 0);
 
 	while (pc != 0) {
 		/*
 		 * We limit the number of times we can go around this
 		 * loop to account for a circular stack.
 		 */
 		if (ret++ >= MAX_USTACK_DEPTH) {
 			*flags |= CPU_DTRACE_BADSTACK;
 			cpu_core[curcpu].cpuc_dtrace_illval = fp;
 			break;
 		}
 
 		if (pcstack != NULL) {
 			*pcstack++ = (uint64_t)pc;
 			pcstack_limit--;
 			if (pcstack_limit <= 0)
 				break;
 		}
 
 		if (fp == 0)
 			break;
 
 		pc = dtrace_fuword64((void *)(fp +
 		    offsetof(struct riscv_frame, f_retaddr)));
 		fp = dtrace_fuword64((void *)fp);
 
 		if (fp == oldfp) {
 			*flags |= CPU_DTRACE_BADSTACK;
 			cpu_core[curcpu].cpuc_dtrace_illval = fp;
 			break;
 		}
 		oldfp = fp;
 	}
 
 	return (ret);
 }
 
 void
 dtrace_getupcstack(uint64_t *pcstack, int pcstack_limit)
 {
 	volatile uint16_t *flags;
 	struct trapframe *tf;
 	uintptr_t pc, sp, fp;
 	proc_t *p;
 	int n;
 
 	p = curproc;
 	flags = (volatile uint16_t *)&cpu_core[curcpu].cpuc_dtrace_flags;
 
 	if (*flags & CPU_DTRACE_FAULT)
 		return;
 
 	if (pcstack_limit <= 0)
 		return;
 
 	/*
 	 * If there's no user context we still need to zero the stack.
 	 */
 	if (p == NULL || (tf = curthread->td_frame) == NULL)
 		goto zero;
 
 	*pcstack++ = (uint64_t)p->p_pid;
 	pcstack_limit--;
 
 	if (pcstack_limit <= 0)
 		return;
 
 	pc = tf->tf_sepc;
 	sp = tf->tf_sp;
 	fp = tf->tf_s[0];
 
 	if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_ENTRY)) {
 		/*
 		 * In an entry probe.  The frame pointer has not yet been
 		 * pushed (that happens in the function prologue).  The
 		 * best approach is to add the current pc as a missing top
 		 * of stack and back the pc up to the caller, which is stored
 		 * at the current stack pointer address since the call
 		 * instruction puts it there right before the branch.
 		 */
 
 		*pcstack++ = (uint64_t)pc;
 		pcstack_limit--;
 		if (pcstack_limit <= 0)
 			return;
 
 		pc = tf->tf_ra;
 	}
 
 	n = dtrace_getustack_common(pcstack, pcstack_limit, pc, fp);
 	ASSERT(n >= 0);
 	ASSERT(n <= pcstack_limit);
 
 	pcstack += n;
 	pcstack_limit -= n;
 
 zero:
 	while (pcstack_limit-- > 0)
 		*pcstack++ = 0;
 }
 
 int
 dtrace_getustackdepth(void)
 {
 
 	printf("IMPLEMENT ME: %s\n", __func__);
 
 	return (0);
 }
 
 void
 dtrace_getufpstack(uint64_t *pcstack, uint64_t *fpstack, int pcstack_limit)
 {
 
 	printf("IMPLEMENT ME: %s\n", __func__);
 }
 
 /*ARGSUSED*/
 uint64_t
 dtrace_getarg(int arg, int aframes)
 {
 
 	printf("IMPLEMENT ME: %s\n", __func__);
 
 	return (0);
 }
 
 int
 dtrace_getstackdepth(int aframes)
 {
 	struct unwind_state state;
 	int scp_offset;
 	register_t sp;
 	int depth;
 	bool done;
 
 	depth = 1;
 	done = false;
 
 	__asm __volatile("mv %0, sp" : "=&r" (sp));
 
 	state.fp = (uintptr_t)__builtin_frame_address(0);
 	state.sp = sp;
 	state.pc = (uintptr_t)dtrace_getstackdepth;
 
 	do {
 		done = !unwind_frame(curthread, &state);
 		if (!INKERNEL(state.pc) || !INKERNEL(state.fp))
 			break;
 		depth++;
 	} while (!done);
 
 	if (depth < aframes)
 		return (0);
 	else
 		return (depth - aframes);
 }
 
 ulong_t
 dtrace_getreg(struct trapframe *rp, uint_t reg)
 {
 
 	printf("IMPLEMENT ME: %s\n", __func__);
 
 	return (0);
 }
 
 static int
 dtrace_copycheck(uintptr_t uaddr, uintptr_t kaddr, size_t size)
 {
 
 	if (uaddr + size > VM_MAXUSER_ADDRESS || uaddr + size < uaddr) {
 		DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
 		cpu_core[curcpu].cpuc_dtrace_illval = uaddr;
 		return (0);
 	}
 
 	return (1);
 }
 
 void
 dtrace_copyin(uintptr_t uaddr, uintptr_t kaddr, size_t size,
     volatile uint16_t *flags)
 {
 
 	if (dtrace_copycheck(uaddr, kaddr, size))
 		dtrace_copy(uaddr, kaddr, size);
 }
 
 void
 dtrace_copyout(uintptr_t kaddr, uintptr_t uaddr, size_t size,
     volatile uint16_t *flags)
 {
 
 	if (dtrace_copycheck(uaddr, kaddr, size))
 		dtrace_copy(kaddr, uaddr, size);
 }
 
 void
 dtrace_copyinstr(uintptr_t uaddr, uintptr_t kaddr, size_t size,
     volatile uint16_t *flags)
 {
 
 	if (dtrace_copycheck(uaddr, kaddr, size))
 		dtrace_copystr(uaddr, kaddr, size, flags);
 }
 
 void
 dtrace_copyoutstr(uintptr_t kaddr, uintptr_t uaddr, size_t size,
     volatile uint16_t *flags)
 {
 
 	if (dtrace_copycheck(uaddr, kaddr, size))
 		dtrace_copystr(kaddr, uaddr, size, flags);
 }
 
 uint8_t
 dtrace_fuword8(void *uaddr)
 {
 
 	if ((uintptr_t)uaddr > VM_MAXUSER_ADDRESS) {
 		DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
 		cpu_core[curcpu].cpuc_dtrace_illval = (uintptr_t)uaddr;
 		return (0);
 	}
 
 	return (dtrace_fuword8_nocheck(uaddr));
 }
 
 uint16_t
 dtrace_fuword16(void *uaddr)
 {
 
 	if ((uintptr_t)uaddr > VM_MAXUSER_ADDRESS) {
 		DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
 		cpu_core[curcpu].cpuc_dtrace_illval = (uintptr_t)uaddr;
 		return (0);
 	}
 
 	return (dtrace_fuword16_nocheck(uaddr));
 }
 
 uint32_t
 dtrace_fuword32(void *uaddr)
 {
 
 	if ((uintptr_t)uaddr > VM_MAXUSER_ADDRESS) {
 		DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
 		cpu_core[curcpu].cpuc_dtrace_illval = (uintptr_t)uaddr;
 		return (0);
 	}
 
 	return (dtrace_fuword32_nocheck(uaddr));
 }
 
 uint64_t
 dtrace_fuword64(void *uaddr)
 {
 
 	if ((uintptr_t)uaddr > VM_MAXUSER_ADDRESS) {
 		DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
 		cpu_core[curcpu].cpuc_dtrace_illval = (uintptr_t)uaddr;
 		return (0);
 	}
 
 	return (dtrace_fuword64_nocheck(uaddr));
 }
diff --git a/sys/compat/linux/linux_elf.c b/sys/compat/linux/linux_elf.c
index 077e0c93c884..9192388b347a 100644
--- a/sys/compat/linux/linux_elf.c
+++ b/sys/compat/linux/linux_elf.c
@@ -1,294 +1,295 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 2021 Edward Tomasz Napierala <trasz@FreeBSD.org>
  * Copyright (c) 2018 Chuck Tuffli
  * Copyright (c) 2017 Dell EMC
  * Copyright (c) 2000 David O'Brien
  * Copyright (c) 1995-1996 Søren Schmidt
  * Copyright (c) 1996 Peter Wemm
  * All rights reserved.
  *
  * This software was developed by the University of Cambridge Computer
  * Laboratory as part of the CHERI for Hypervisors and Operating Systems
  * (CHaOS) project, funded by EPSRC grant EP/V000292/1.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer
  *    in this position and unchanged.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. The name of the author may not be used to endorse or promote products
  *    derived from this software without specific prior written permission
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/exec.h>
 #include <sys/imgact.h>
 #include <sys/imgact_elf.h>
 #include <sys/kernel.h>
 #include <sys/proc.h>
 #include <sys/procfs.h>
 #include <sys/ptrace.h>
+#include <sys/reg.h>
 #include <sys/sbuf.h>
 #include <sys/sysent.h>
 #include <sys/user.h>
 #include <sys/vnode.h>
 
 #include <machine/elf.h>
 
 #if __ELF_WORD_SIZE == 32
 #define linux_pt_regset linux_pt_regset32
 #define bsd_to_linux_regset bsd_to_linux_regset32
 #include <machine/../linux32/linux.h>
 #else
 #include <machine/../linux/linux.h>
 #endif
 #include <compat/linux/linux_elf.h>
 #include <compat/linux/linux_emul.h>
 #include <compat/linux/linux_misc.h>
 
 /* This adds "linux32_" and "linux64_" prefixes. */
 #define	__linuxN(x)	__CONCAT(__CONCAT(__CONCAT(linux,__ELF_WORD_SIZE),_),x)
 
 #define	LINUX_NT_AUXV	6
 
 static void __linuxN(note_fpregset)(void *, struct sbuf *, size_t *);
 static void __linuxN(note_prpsinfo)(void *, struct sbuf *, size_t *);
 static void __linuxN(note_prstatus)(void *, struct sbuf *, size_t *);
 static void __linuxN(note_threadmd)(void *, struct sbuf *, size_t *);
 static void __linuxN(note_nt_auxv)(void *, struct sbuf *, size_t *);
 
 void
 __linuxN(prepare_notes)(struct thread *td, struct note_info_list *list,
     size_t *sizep)
 {
 	struct proc *p;
 	struct thread *thr;
 	size_t size;
 
 	p = td->td_proc;
 	size = 0;
 
 	/*
 	 * To have the debugger select the right thread (LWP) as the initial
 	 * thread, we dump the state of the thread passed to us in td first.
 	 * This is the thread that causes the core dump and thus likely to
 	 * be the right thread one wants to have selected in the debugger.
 	 */
 	thr = td;
 	while (thr != NULL) {
 		size += __elfN(register_note)(td, list,
 		    NT_PRSTATUS, __linuxN(note_prstatus), thr);
 		size += __elfN(register_note)(td, list,
 		    NT_PRPSINFO, __linuxN(note_prpsinfo), p);
 		size += __elfN(register_note)(td, list,
 		    LINUX_NT_AUXV, __linuxN(note_nt_auxv), p);
 		size += __elfN(register_note)(td, list,
 		    NT_FPREGSET, __linuxN(note_fpregset), thr);
 		size += __elfN(register_note)(td, list,
 		    -1, __linuxN(note_threadmd), thr);
 
 		thr = thr == td ? TAILQ_FIRST(&p->p_threads) :
 		    TAILQ_NEXT(thr, td_plist);
 		if (thr == td)
 			thr = TAILQ_NEXT(thr, td_plist);
 	}
 
 	*sizep = size;
 }
 
 typedef struct linux_elf_prstatus linux_elf_prstatus_t;
 #if __ELF_WORD_SIZE == 32
 typedef struct prpsinfo32 linux_elf_prpsinfo_t;
 typedef struct fpreg32 linux_elf_prfpregset_t;
 #else
 typedef prpsinfo_t linux_elf_prpsinfo_t;
 typedef prfpregset_t linux_elf_prfpregset_t;
 #endif
 
 static void
 __linuxN(note_prpsinfo)(void *arg, struct sbuf *sb, size_t *sizep)
 {
 	struct sbuf sbarg;
 	size_t len;
 	char *cp, *end;
 	struct proc *p;
 	linux_elf_prpsinfo_t *psinfo;
 	int error;
 
 	p = arg;
 	if (sb != NULL) {
 		KASSERT(*sizep == sizeof(*psinfo), ("invalid size"));
 		psinfo = malloc(sizeof(*psinfo), M_TEMP, M_ZERO | M_WAITOK);
 		psinfo->pr_version = PRPSINFO_VERSION;
 		psinfo->pr_psinfosz = sizeof(linux_elf_prpsinfo_t);
 		strlcpy(psinfo->pr_fname, p->p_comm, sizeof(psinfo->pr_fname));
 		PROC_LOCK(p);
 		if (p->p_args != NULL) {
 			len = sizeof(psinfo->pr_psargs) - 1;
 			if (len > p->p_args->ar_length)
 				len = p->p_args->ar_length;
 			memcpy(psinfo->pr_psargs, p->p_args->ar_args, len);
 			PROC_UNLOCK(p);
 			error = 0;
 		} else {
 			_PHOLD(p);
 			PROC_UNLOCK(p);
 			sbuf_new(&sbarg, psinfo->pr_psargs,
 			    sizeof(psinfo->pr_psargs), SBUF_FIXEDLEN);
 			error = proc_getargv(curthread, p, &sbarg);
 			PRELE(p);
 			if (sbuf_finish(&sbarg) == 0)
 				len = sbuf_len(&sbarg) - 1;
 			else
 				len = sizeof(psinfo->pr_psargs) - 1;
 			sbuf_delete(&sbarg);
 		}
 		if (error || len == 0)
 			strlcpy(psinfo->pr_psargs, p->p_comm,
 			    sizeof(psinfo->pr_psargs));
 		else {
 			KASSERT(len < sizeof(psinfo->pr_psargs),
 			    ("len is too long: %zu vs %zu", len,
 			    sizeof(psinfo->pr_psargs)));
 			cp = psinfo->pr_psargs;
 			end = cp + len - 1;
 			for (;;) {
 				cp = memchr(cp, '\0', end - cp);
 				if (cp == NULL)
 					break;
 				*cp = ' ';
 			}
 		}
 		psinfo->pr_pid = p->p_pid;
 		sbuf_bcat(sb, psinfo, sizeof(*psinfo));
 		free(psinfo, M_TEMP);
 	}
 	*sizep = sizeof(*psinfo);
 }
 
 static void
 __linuxN(note_prstatus)(void *arg, struct sbuf *sb, size_t *sizep)
 {
 	struct thread *td;
 	linux_elf_prstatus_t *status;
 #if __ELF_WORD_SIZE == 32
 	struct reg32 pr_reg;
 #else
 	struct reg pr_reg;
 #endif
 
 	td = arg;
 	if (sb != NULL) {
 		KASSERT(*sizep == sizeof(*status), ("invalid size"));
 		status = malloc(sizeof(*status), M_TEMP, M_ZERO | M_WAITOK);
 
 		/*
 		 * XXX: Some fields missing.
 		 */
 		status->pr_cursig = td->td_proc->p_sig;
 		status->pr_pid = td->td_tid;
 
 #if __ELF_WORD_SIZE == 32
 		fill_regs32(td, &pr_reg);
 #else
 		fill_regs(td, &pr_reg);
 #endif
 		bsd_to_linux_regset(&pr_reg, &status->pr_reg);
 		sbuf_bcat(sb, status, sizeof(*status));
 		free(status, M_TEMP);
 	}
 	*sizep = sizeof(*status);
 }
 
 static void
 __linuxN(note_fpregset)(void *arg, struct sbuf *sb, size_t *sizep)
 {
 	struct thread *td;
 	linux_elf_prfpregset_t *fpregset;
 
 	td = arg;
 	if (sb != NULL) {
 		KASSERT(*sizep == sizeof(*fpregset), ("invalid size"));
 		fpregset = malloc(sizeof(*fpregset), M_TEMP, M_ZERO | M_WAITOK);
 #if __ELF_WORD_SIZE == 32
 		fill_fpregs32(td, fpregset);
 #else
 		fill_fpregs(td, fpregset);
 #endif
 		sbuf_bcat(sb, fpregset, sizeof(*fpregset));
 		free(fpregset, M_TEMP);
 	}
 	*sizep = sizeof(*fpregset);
 }
 
 /*
  * Allow for MD specific notes, as well as any MD
  * specific preparations for writing MI notes.
  */
 static void
 __linuxN(note_threadmd)(void *arg, struct sbuf *sb, size_t *sizep)
 {
 	struct thread *td;
 	void *buf;
 	size_t size;
 
 	td = arg;
 	size = *sizep;
 	if (size != 0 && sb != NULL)
 		buf = malloc(size, M_TEMP, M_ZERO | M_WAITOK);
 	else
 		buf = NULL;
 	size = 0;
 	__elfN(dump_thread)(td, buf, &size);
 	KASSERT(sb == NULL || *sizep == size, ("invalid size"));
 	if (size != 0 && sb != NULL)
 		sbuf_bcat(sb, buf, size);
 	free(buf, M_TEMP);
 	*sizep = size;
 }
 
 static void
 __linuxN(note_nt_auxv)(void *arg, struct sbuf *sb, size_t *sizep)
 {
 	struct proc *p;
 	size_t size;
 
 	p = arg;
 	if (sb == NULL) {
 		size = 0;
 		sb = sbuf_new(NULL, NULL, LINUX_AT_COUNT * sizeof(Elf_Auxinfo),
 		    SBUF_FIXEDLEN);
 		sbuf_set_drain(sb, sbuf_count_drain, &size);
 		PHOLD(p);
 		proc_getauxv(curthread, p, sb);
 		PRELE(p);
 		sbuf_finish(sb);
 		sbuf_delete(sb);
 		*sizep = size;
 	} else {
 		PHOLD(p);
 		proc_getauxv(curthread, p, sb);
 		PRELE(p);
 	}
 }
diff --git a/sys/ddb/db_run.c b/sys/ddb/db_run.c
index af99eb757609..c9ea87fca79c 100644
--- a/sys/ddb/db_run.c
+++ b/sys/ddb/db_run.c
@@ -1,414 +1,415 @@
 /*-
  * SPDX-License-Identifier: MIT-CMU
  *
  * Mach Operating System
  * Copyright (c) 1991,1990 Carnegie Mellon University
  * All Rights Reserved.
  *
  * Permission to use, copy, modify and distribute this software and its
  * documentation is hereby granted, provided that both the copyright
  * notice and this permission notice appear in all copies of the
  * software, derivative works or modified versions, and any portions
  * thereof, and that both notices appear in supporting documentation.
  *
  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS
  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  *
  * Carnegie Mellon requests users of this software to return to
  *
  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  *  School of Computer Science
  *  Carnegie Mellon University
  *  Pittsburgh PA 15213-3890
  *
  * any improvements or extensions that they make and grant Carnegie the
  * rights to redistribute these changes.
  */
 /*
  * 	Author: David B. Golub, Carnegie Mellon University
  *	Date:	7/90
  */
 
 /*
  * Commands to run process.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/kdb.h>
 #include <sys/proc.h>
+#include <sys/reg.h>
 #include <sys/systm.h>
 
 #include <machine/kdb.h>
 #include <machine/pcb.h>
 
 #include <vm/vm.h>
 
 #include <ddb/ddb.h>
 #include <ddb/db_access.h>
 #include <ddb/db_break.h>
 #include <ddb/db_command.h>
 
 #define	STEP_ONCE	1
 #define	STEP_RETURN	2
 #define	STEP_CALLT	3
 #define	STEP_CONTINUE	4
 #define	STEP_INVISIBLE	5
 #define	STEP_COUNT	6
 static int	db_run_mode = STEP_CONTINUE;
 
 static bool		db_sstep_multiple;
 static bool		db_sstep_print;
 static int		db_loop_count;
 static int		db_call_depth;
 
 int		db_inst_count;
 int		db_load_count;
 int		db_store_count;
 
 #ifdef SOFTWARE_SSTEP
 db_breakpoint_t	db_not_taken_bkpt = 0;
 db_breakpoint_t	db_taken_bkpt = 0;
 #endif
 
 #ifndef db_set_single_step
 void db_set_single_step(void);
 #endif
 #ifndef db_clear_single_step
 void db_clear_single_step(void);
 #endif
 #ifndef db_pc_is_singlestep
 static bool
 db_pc_is_singlestep(db_addr_t pc)
 {
 #ifdef SOFTWARE_SSTEP
 	if ((db_not_taken_bkpt != 0 && pc == db_not_taken_bkpt->address)
 	    || (db_taken_bkpt != 0 && pc == db_taken_bkpt->address))
 		return (true);
 #endif
 	return (false);
 }
 #endif
 
 bool
 db_stop_at_pc(int type, int code, bool *is_breakpoint, bool *is_watchpoint)
 {
 	db_addr_t	pc;
 	db_breakpoint_t bkpt;
 
 	*is_breakpoint = IS_BREAKPOINT_TRAP(type, code);
 	*is_watchpoint = IS_WATCHPOINT_TRAP(type, code);
 	pc = PC_REGS();
 	if (db_pc_is_singlestep(pc))
 		*is_breakpoint = false;
 
 	db_clear_single_step();
 	db_clear_breakpoints();
 	db_clear_watchpoints();
 
 #ifdef	FIXUP_PC_AFTER_BREAK
 	if (*is_breakpoint) {
 	    /*
 	     * Breakpoint trap.  Fix up the PC if the
 	     * machine requires it.
 	     */
 	    FIXUP_PC_AFTER_BREAK
 	    pc = PC_REGS();
 	}
 #endif
 
 	/*
 	 * Now check for a breakpoint at this address.
 	 */
 	bkpt = db_find_breakpoint_here(pc);
 	if (bkpt) {
 	    if (--bkpt->count == 0) {
 		bkpt->count = bkpt->init_count;
 		*is_breakpoint = true;
 		return (true);	/* stop here */
 	    }
 	    return (false);	/* continue the countdown */
 	} else if (*is_breakpoint) {
 #ifdef BKPT_SKIP
 		BKPT_SKIP;
 #endif
 	}
 
 	*is_breakpoint = false;	/* might be a breakpoint, but not ours */
 
 	/*
 	 * If not stepping, then silently ignore single-step traps
 	 * (except for clearing the single-step-flag above).
 	 *
 	 * If stepping, then abort if the trap type is unexpected.
 	 * Breakpoints owned by us are expected and were handled above.
 	 * Single-steps are expected and are handled below.  All others
 	 * are unexpected.
 	 *
 	 * Only do either of these if the MD layer claims to classify
 	 * single-step traps unambiguously (by defining IS_SSTEP_TRAP).
 	 * Otherwise, fall through to the bad historical behaviour
 	 * given by turning unexpected traps into expected traps: if not
 	 * stepping, then expect only breakpoints and stop, and if
 	 * stepping, then expect only single-steps and step.
 	 */
 #ifdef IS_SSTEP_TRAP
 	if (db_run_mode == STEP_CONTINUE && IS_SSTEP_TRAP(type, code))
 	    return (false);
 	if (db_run_mode != STEP_CONTINUE && !IS_SSTEP_TRAP(type, code)) {
 	    printf("Stepping aborted\n");
 	    return (true);
 	}
 #endif
 
 	if (db_run_mode == STEP_INVISIBLE) {
 	    db_run_mode = STEP_CONTINUE;
 	    return (false);	/* continue */
 	}
 	if (db_run_mode == STEP_COUNT) {
 	    return (false); /* continue */
 	}
 	if (db_run_mode == STEP_ONCE) {
 	    if (--db_loop_count > 0) {
 		if (db_sstep_print) {
 		    db_printf("\t\t");
 		    db_print_loc_and_inst(pc);
 		}
 		return (false);	/* continue */
 	    }
 	}
 	if (db_run_mode == STEP_RETURN) {
 	    /* continue until matching return */
 	    db_expr_t ins;
 
 	    ins = db_get_value(pc, sizeof(int), false);
 	    if (!inst_trap_return(ins) &&
 		(!inst_return(ins) || --db_call_depth != 0)) {
 		if (db_sstep_print) {
 		    if (inst_call(ins) || inst_return(ins)) {
 			int i;
 
 			db_printf("[after %6d]     ", db_inst_count);
 			for (i = db_call_depth; --i > 0; )
 			    db_printf("  ");
 			db_print_loc_and_inst(pc);
 		    }
 		}
 		if (inst_call(ins))
 		    db_call_depth++;
 		return (false);	/* continue */
 	    }
 	}
 	if (db_run_mode == STEP_CALLT) {
 	    /* continue until call or return */
 	    db_expr_t ins;
 
 	    ins = db_get_value(pc, sizeof(int), false);
 	    if (!inst_call(ins) &&
 		!inst_return(ins) &&
 		!inst_trap_return(ins)) {
 		return (false);	/* continue */
 	    }
 	}
 	return (true);
 }
 
 void
 db_restart_at_pc(bool watchpt)
 {
 	db_addr_t	pc = PC_REGS();
 
 	if ((db_run_mode == STEP_COUNT) ||
 	    ((db_run_mode == STEP_ONCE) && db_sstep_multiple) ||
 	    (db_run_mode == STEP_RETURN) ||
 	    (db_run_mode == STEP_CALLT)) {
 	    /*
 	     * We are about to execute this instruction,
 	     * so count it now.
 	     */
 #ifdef	SOFTWARE_SSTEP
 	    db_expr_t		ins =
 #endif
 	    db_get_value(pc, sizeof(int), false);
 	    db_inst_count++;
 	    db_load_count += inst_load(ins);
 	    db_store_count += inst_store(ins);
 #ifdef	SOFTWARE_SSTEP
 	    /* XXX works on mips, but... */
 	    if (inst_branch(ins) || inst_call(ins)) {
 		ins = db_get_value(next_instr_address(pc,1),
 				   sizeof(int), false);
 		db_inst_count++;
 		db_load_count += inst_load(ins);
 		db_store_count += inst_store(ins);
 	    }
 #endif	/* SOFTWARE_SSTEP */
 	}
 
 	if (db_run_mode == STEP_CONTINUE) {
 	    if (watchpt || db_find_breakpoint_here(pc)) {
 		/*
 		 * Step over breakpoint/watchpoint.
 		 */
 		db_run_mode = STEP_INVISIBLE;
 		db_set_single_step();
 	    } else {
 		db_set_breakpoints();
 		db_set_watchpoints();
 	    }
 	} else {
 	    db_set_single_step();
 	}
 }
 
 #ifdef	SOFTWARE_SSTEP
 /*
  *	Software implementation of single-stepping.
  *	If your machine does not have a trace mode
  *	similar to the vax or sun ones you can use
  *	this implementation, done for the mips.
  *	Just define the above conditional and provide
  *	the functions/macros defined below.
  *
  * extern bool
  *	inst_branch(),		returns true if the instruction might branch
  * extern unsigned
  *	branch_taken(),		return the address the instruction might
  *				branch to
  *	db_getreg_val();	return the value of a user register,
  *				as indicated in the hardware instruction
  *				encoding, e.g. 8 for r8
  *
  * next_instr_address(pc,bd)	returns the address of the first
  *				instruction following the one at "pc",
  *				which is either in the taken path of
  *				the branch (bd==1) or not.  This is
  *				for machines (mips) with branch delays.
  *
  *	A single-step may involve at most 2 breakpoints -
  *	one for branch-not-taken and one for branch taken.
  *	If one of these addresses does not already have a breakpoint,
  *	we allocate a breakpoint and save it here.
  *	These breakpoints are deleted on return.
  */
 
 void
 db_set_single_step(void)
 {
 	db_addr_t pc = PC_REGS(), brpc;
 	unsigned inst;
 
 	/*
 	 *	User was stopped at pc, e.g. the instruction
 	 *	at pc was not executed.
 	 */
 	inst = db_get_value(pc, sizeof(int), false);
 	if (inst_branch(inst) || inst_call(inst) || inst_return(inst)) {
 		brpc = branch_taken(inst, pc);
 		if (brpc != pc) {	/* self-branches are hopeless */
 			db_taken_bkpt = db_set_temp_breakpoint(brpc);
 		}
 		pc = next_instr_address(pc, 1);
 	}
 	pc = next_instr_address(pc, 0);
 	db_not_taken_bkpt = db_set_temp_breakpoint(pc);
 }
 
 void
 db_clear_single_step(void)
 {
 
 	if (db_not_taken_bkpt != 0) {
 		db_delete_temp_breakpoint(db_not_taken_bkpt);
 		db_not_taken_bkpt = 0;
 	}
 	if (db_taken_bkpt != 0) {
 		db_delete_temp_breakpoint(db_taken_bkpt);
 		db_taken_bkpt = 0;
 	}
 }
 
 #endif	/* SOFTWARE_SSTEP */
 
 /* single-step */
 /*ARGSUSED*/
 void
 db_single_step_cmd(db_expr_t addr, bool have_addr, db_expr_t count, char *modif)
 {
 	bool		print = false;
 
 	if (count == -1)
 	    count = 1;
 
 	if (modif[0] == 'p')
 	    print = true;
 
 	db_run_mode = STEP_ONCE;
 	db_loop_count = count;
 	db_sstep_multiple = (count != 1);
 	db_sstep_print = print;
 	db_inst_count = 0;
 	db_load_count = 0;
 	db_store_count = 0;
 
 	db_cmd_loop_done = 1;
 }
 
 /* trace and print until call/return */
 /*ARGSUSED*/
 void
 db_trace_until_call_cmd(db_expr_t addr, bool have_addr, db_expr_t count,
     char *modif)
 {
 	bool	print = false;
 
 	if (modif[0] == 'p')
 	    print = true;
 
 	db_run_mode = STEP_CALLT;
 	db_sstep_print = print;
 	db_inst_count = 0;
 	db_load_count = 0;
 	db_store_count = 0;
 
 	db_cmd_loop_done = 1;
 }
 
 /*ARGSUSED*/
 void
 db_trace_until_matching_cmd(db_expr_t addr, bool have_addr, db_expr_t count,
     char *modif)
 {
 	bool	print = false;
 
 	if (modif[0] == 'p')
 	    print = true;
 
 	db_run_mode = STEP_RETURN;
 	db_call_depth = 1;
 	db_sstep_print = print;
 	db_inst_count = 0;
 	db_load_count = 0;
 	db_store_count = 0;
 
 	db_cmd_loop_done = 1;
 }
 
 /* continue */
 /*ARGSUSED*/
 void
 db_continue_cmd(db_expr_t addr, bool have_addr, db_expr_t count, char *modif)
 {
 	if (modif[0] == 'c')
 	    db_run_mode = STEP_COUNT;
 	else
 	    db_run_mode = STEP_CONTINUE;
 	db_inst_count = 0;
 	db_load_count = 0;
 	db_store_count = 0;
 
 	db_cmd_loop_done = 1;
 }
diff --git a/sys/fs/procfs/procfs_dbregs.c b/sys/fs/procfs/procfs_dbregs.c
index 8d2206d2995c..acc0f7f8c5ac 100644
--- a/sys/fs/procfs/procfs_dbregs.c
+++ b/sys/fs/procfs/procfs_dbregs.c
@@ -1,135 +1,133 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1999 Brian Scott Dean, brdean@unx.sas.com.
  *                    All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * Jan-Simon Pendry under the following copyrights and conditions:
  *
  * Copyright (c) 1993 Jan-Simon Pendry
  * Copyright (c) 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * Jan-Simon Pendry.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	From: @(#)procfs_regs.c	8.4 (Berkeley) 6/15/94
  *
  * From:
  *	$Id: procfs_regs.c,v 3.2 1993/12/15 09:40:17 jsp Exp $
  * $FreeBSD$
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/ptrace.h>
 #include <sys/sysent.h>
 #include <sys/uio.h>
 
-#include <machine/reg.h>
-
 #include <fs/pseudofs/pseudofs.h>
 #include <fs/procfs/procfs.h>
 
 #ifdef COMPAT_FREEBSD32
 #include <sys/procfs.h>
 #include <machine/fpu.h>
 
 /*
  * PROC(write, dbregs, td2, &r) becomes
  * proc_write_dbregs(td2, &r)   or
  * proc_write_dbregs32(td2, &r32)
  *
  * UIOMOVE_FROMBUF(r, uio) becomes
  * uiomove_frombuf(&r, sizeof(r), uio)  or
  * uiomove_frombuf(&r32, sizeof(r32), uio)
  */
 #define	PROC(d, w, t, r)	wrap32 ? \
 	proc_ ## d ## _ ## w ## 32(t, r ## 32) : \
 	proc_ ## d ## _ ## w(t, r)
 #define	UIOMOVE_FROMBUF(k, u)	wrap32 ? \
 	uiomove_frombuf(& k ## 32, sizeof(k ## 32), u) : \
 	uiomove_frombuf(& k, sizeof(k), u)
 #else
 #define	PROC(d, w, t, r)	proc_ ## d ## _ ## w(t, r)
 #define	UIOMOVE_FROMBUF(k, u)	uiomove_frombuf(& k, sizeof(k), u)
 #endif
 
 int
 procfs_doprocdbregs(PFS_FILL_ARGS)
 {
 	int error;
 	struct dbreg r;
 	struct thread *td2;
 #ifdef COMPAT_FREEBSD32
 	struct dbreg32 r32;
 	int wrap32 = 0;
 #endif
 
 	if (uio->uio_offset != 0)
 		return (0);
 
 	PROC_LOCK(p);
 	PROC_ASSERT_HELD(p);
 	if (p_candebug(td, p) != 0) {
 		PROC_UNLOCK(p);
 		return (EPERM);
 	}
 
 	td2 = FIRST_THREAD_IN_PROC(p);
 #ifdef COMPAT_FREEBSD32
 	if (SV_CURPROC_FLAG(SV_ILP32)) {
 		if (SV_PROC_FLAG(td2->td_proc, SV_ILP32) == 0) {
 			PROC_UNLOCK(p);
 			return (EINVAL);
 		}
 		wrap32 = 1;
 		memset(&r32, 0, sizeof(r32));
 	} else
 #endif
 		memset(&r, 0, sizeof(r));
 	error = PROC(read, dbregs, td2, &r);
 	if (error == 0) {
 		PROC_UNLOCK(p);
 		error = UIOMOVE_FROMBUF(r, uio);
 		PROC_LOCK(p);
 	}
 	if (error == 0 && uio->uio_rw == UIO_WRITE) {
 		if (!P_SHOULDSTOP(p)) /* XXXKSE should be P_TRACED? */
 			error = EBUSY;
 		else
 			/* XXXKSE: */
 			error = PROC(write, dbregs, td2, &r);
 	}
 	PROC_UNLOCK(p);
 
 	return (error);
 }
diff --git a/sys/fs/procfs/procfs_fpregs.c b/sys/fs/procfs/procfs_fpregs.c
index 9675030df3c0..1118eb68dcbc 100644
--- a/sys/fs/procfs/procfs_fpregs.c
+++ b/sys/fs/procfs/procfs_fpregs.c
@@ -1,133 +1,131 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1993 Jan-Simon Pendry
  * Copyright (c) 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * Jan-Simon Pendry.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)procfs_fpregs.c	8.2 (Berkeley) 6/15/94
  *
  * From:
  *	$Id: procfs_regs.c,v 3.2 1993/12/15 09:40:17 jsp Exp $
  * $FreeBSD$
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/ptrace.h>
 #include <sys/sysent.h>
 #include <sys/uio.h>
 
-#include <machine/reg.h>
-
 #include <fs/pseudofs/pseudofs.h>
 #include <fs/procfs/procfs.h>
 
 #ifdef COMPAT_FREEBSD32
 #include <sys/procfs.h>
 #include <machine/fpu.h>
 
 /*
  * PROC(write, fpregs, td2, &r) becomes
  * proc_write_fpregs(td2, &r)   or
  * proc_write_fpregs32(td2, &r32)
  *
  * UIOMOVE_FROMBUF(r, uio) becomes
  * uiomove_frombuf(&r, sizeof(r), uio)  or
  * uiomove_frombuf(&r32, sizeof(r32), uio)
  */
 #define	PROC(d, w, t, r)	wrap32 ? \
 	proc_ ## d ## _ ## w ## 32(t, r ## 32) : \
 	proc_ ## d ## _ ## w(t, r)
 #define	UIOMOVE_FROMBUF(k, u)	wrap32 ? \
 	uiomove_frombuf(& k ## 32, sizeof(k ## 32), u) : \
 	uiomove_frombuf(& k, sizeof(k), u)
 #else
 #define	PROC(d, w, t, r)	proc_ ## d ## _ ## w(t, r)
 #define	UIOMOVE_FROMBUF(k, u)	uiomove_frombuf(& k, sizeof(k), u)
 #endif
 
 int
 procfs_doprocfpregs(PFS_FILL_ARGS)
 {
 	int error;
 	struct fpreg r;
 	struct thread *td2;
 #ifdef COMPAT_FREEBSD32
 	struct fpreg32 r32;
 	int wrap32 = 0;
 #endif
 
 	if (uio->uio_offset != 0)
 		return (0);
 
 	PROC_LOCK(p);
 	PROC_ASSERT_HELD(p);
 	if (p_candebug(td, p)) {
 		PROC_UNLOCK(p);
 		return (EPERM);
 	}
 	if (!P_SHOULDSTOP(p)) {
 		PROC_UNLOCK(p);
 		return (EBUSY);
 	}
 
 	td2 = FIRST_THREAD_IN_PROC(p);
 #ifdef COMPAT_FREEBSD32
 	if (SV_CURPROC_FLAG(SV_ILP32)) {
 		if (SV_PROC_FLAG(td2->td_proc, SV_ILP32) == 0) {
 			PROC_UNLOCK(p);
 			return (EINVAL);
 		}
 		wrap32 = 1;
 		memset(&r32, 0, sizeof(r32));
 	} else
 #endif
 		memset(&r, 0, sizeof(r));
 	error = PROC(read, fpregs, td2, &r);
 	if (error == 0) {
 		PROC_UNLOCK(p);
 		error = UIOMOVE_FROMBUF(r, uio);
 		PROC_LOCK(p);
 	}
 	if (error == 0 && uio->uio_rw == UIO_WRITE) {
 		if (!P_SHOULDSTOP(p))
 			error = EBUSY;
 		else
 			/* XXXKSE: */
 			error = PROC(write, fpregs, td2, &r);
 	}
 	PROC_UNLOCK(p);
 
 	return (error);
 }
diff --git a/sys/fs/procfs/procfs_regs.c b/sys/fs/procfs/procfs_regs.c
index 032141a9e32e..703dad64a182 100644
--- a/sys/fs/procfs/procfs_regs.c
+++ b/sys/fs/procfs/procfs_regs.c
@@ -1,133 +1,131 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1993 Jan-Simon Pendry
  * Copyright (c) 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * Jan-Simon Pendry.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)procfs_regs.c	8.4 (Berkeley) 6/15/94
  *
  * From:
  *	$Id: procfs_regs.c,v 3.2 1993/12/15 09:40:17 jsp Exp $
  * $FreeBSD$
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/ptrace.h>
 #include <sys/sysent.h>
 #include <sys/uio.h>
 
-#include <machine/reg.h>
-
 #include <fs/pseudofs/pseudofs.h>
 #include <fs/procfs/procfs.h>
 
 #ifdef COMPAT_FREEBSD32
 #include <sys/procfs.h>
 #include <machine/fpu.h>
 
 /*
  * PROC(write, regs, td2, &r) becomes
  * proc_write_regs(td2, &r)   or
  * proc_write_regs32(td2, &r32)
  *
  * UIOMOVE_FROMBUF(r, uio) becomes
  * uiomove_frombuf(&r, sizeof(r), uio)  or
  * uiomove_frombuf(&r32, sizeof(r32), uio)
  */
 #define	PROC(d, w, t, r)	wrap32 ? \
 	proc_ ## d ## _ ## w ## 32(t, r ## 32) : \
 	proc_ ## d ## _ ## w(t, r)
 #define	UIOMOVE_FROMBUF(k, u)	wrap32 ? \
 	uiomove_frombuf(& k ## 32, sizeof(k ## 32), u) : \
 	uiomove_frombuf(& k, sizeof(k), u)
 #else
 #define	PROC(d, w, t, r)	proc_ ## d ## _ ## w(t, r)
 #define	UIOMOVE_FROMBUF(k, u)	uiomove_frombuf(& k, sizeof(k), u)
 #endif
 
 int
 procfs_doprocregs(PFS_FILL_ARGS)
 {
 	int error;
 	struct reg r;
 	struct thread *td2;
 #ifdef COMPAT_FREEBSD32
 	struct reg32 r32;
 	int wrap32 = 0;
 #endif
 
 	if (uio->uio_offset != 0)
 		return (0);
 
 	PROC_LOCK(p);
 	PROC_ASSERT_HELD(p);
 	if (p_candebug(td, p)) {
 		PROC_UNLOCK(p);
 		return (EPERM);
 	}
 	if (!P_SHOULDSTOP(p)) {
 		PROC_UNLOCK(p);
 		return (EBUSY);
 	}
 
 	td2 = FIRST_THREAD_IN_PROC(p);
 #ifdef COMPAT_FREEBSD32
 	if (SV_CURPROC_FLAG(SV_ILP32)) {
 		if ((SV_PROC_FLAG(td2->td_proc, SV_ILP32)) == 0) {
 			PROC_UNLOCK(p);
 			return (EINVAL);
 		}
 		wrap32 = 1;
 		memset(&r32, 0, sizeof(r32));
 	} else
 #endif
 		memset(&r, 0, sizeof(r));
 	error = PROC(read, regs, td2, &r);
 	if (error == 0) {
 		PROC_UNLOCK(p);
 		error = UIOMOVE_FROMBUF(r, uio);
 		PROC_LOCK(p);
 	}
 	if (error == 0 && uio->uio_rw == UIO_WRITE) {
 		if (!P_SHOULDSTOP(p))
 			error = EBUSY;
 		else
 			/* XXXKSE: */
 			error = PROC(write, regs, td2, &r);
 	}
 	PROC_UNLOCK(p);
 
 	return (error);
 }
diff --git a/sys/i386/i386/db_trace.c b/sys/i386/i386/db_trace.c
index 80ef0fe715d8..529b94b76cc4 100644
--- a/sys/i386/i386/db_trace.c
+++ b/sys/i386/i386/db_trace.c
@@ -1,620 +1,620 @@
 /*-
  * Mach Operating System
  * Copyright (c) 1991,1990 Carnegie Mellon University
  * All Rights Reserved.
  *
  * Permission to use, copy, modify and distribute this software and its
  * documentation is hereby granted, provided that both the copyright
  * notice and this permission notice appear in all copies of the
  * software, derivative works or modified versions, and any portions
  * thereof, and that both notices appear in supporting documentation.
  *
  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS
  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  *
  * Carnegie Mellon requests users of this software to return to
  *
  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  *  School of Computer Science
  *  Carnegie Mellon University
  *  Pittsburgh PA 15213-3890
  *
  * any improvements or extensions that they make and grant Carnegie the
  * rights to redistribute these changes.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kdb.h>
 #include <sys/proc.h>
+#include <sys/reg.h>
 #include <sys/sysent.h>
 
 #include <machine/cpu.h>
 #include <machine/frame.h>
 #include <machine/md_var.h>
 #include <machine/pcb.h>
-#include <machine/reg.h>
 #include <machine/stack.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/pmap.h>
 
 #include <ddb/ddb.h>
 #include <ddb/db_access.h>
 #include <ddb/db_sym.h>
 #include <ddb/db_variables.h>
 
 static db_varfcn_t db_esp;
 static db_varfcn_t db_frame;
 static db_varfcn_t db_frame_seg;
 static db_varfcn_t db_gs;
 static db_varfcn_t db_ss;
 
 /*
  * Machine register set.
  */
 #define	DB_OFFSET(x)	(db_expr_t *)offsetof(struct trapframe, x)
 struct db_variable db_regs[] = {
 	{ "cs",		DB_OFFSET(tf_cs),	db_frame_seg },
 	{ "ds",		DB_OFFSET(tf_ds),	db_frame_seg },
 	{ "es",		DB_OFFSET(tf_es),	db_frame_seg },
 	{ "fs",		DB_OFFSET(tf_fs),	db_frame_seg },
 	{ "gs",		NULL,			db_gs },
 	{ "ss",		NULL,			db_ss },
 	{ "eax",	DB_OFFSET(tf_eax),	db_frame },
 	{ "ecx",	DB_OFFSET(tf_ecx),	db_frame },
 	{ "edx",	DB_OFFSET(tf_edx),	db_frame },
 	{ "ebx",	DB_OFFSET(tf_ebx),	db_frame },
 	{ "esp",	NULL,			db_esp },
 	{ "ebp",	DB_OFFSET(tf_ebp),	db_frame },
 	{ "esi",	DB_OFFSET(tf_esi),	db_frame },
 	{ "edi",	DB_OFFSET(tf_edi),	db_frame },
 	{ "eip",	DB_OFFSET(tf_eip),	db_frame },
 	{ "efl",	DB_OFFSET(tf_eflags),	db_frame },
 };
 struct db_variable *db_eregs = db_regs + nitems(db_regs);
 
 static __inline int
 get_esp(struct trapframe *tf)
 {
 	return (TF_HAS_STACKREGS(tf) ? tf->tf_esp : (intptr_t)&tf->tf_esp);
 }
 
 static int
 db_frame(struct db_variable *vp, db_expr_t *valuep, int op)
 {
 	int *reg;
 
 	if (kdb_frame == NULL)
 		return (0);
 
 	reg = (int *)((uintptr_t)kdb_frame + (db_expr_t)vp->valuep);
 	if (op == DB_VAR_GET)
 		*valuep = *reg;
 	else
 		*reg = *valuep;
 	return (1);
 }
 
 static int
 db_frame_seg(struct db_variable *vp, db_expr_t *valuep, int op)
 {
 	struct trapframe_vm86 *tfp;
 	int off;
 	uint16_t *reg;
 
 	if (kdb_frame == NULL)
 		return (0);
 
 	off = (intptr_t)vp->valuep;
 	if (kdb_frame->tf_eflags & PSL_VM) {
 		tfp = (void *)kdb_frame;
 		switch ((intptr_t)vp->valuep) {
 		case (intptr_t)DB_OFFSET(tf_cs):
 			reg = (uint16_t *)&tfp->tf_cs;
 			break;
 		case (intptr_t)DB_OFFSET(tf_ds):
 			reg = (uint16_t *)&tfp->tf_vm86_ds;
 			break;
 		case (intptr_t)DB_OFFSET(tf_es):
 			reg = (uint16_t *)&tfp->tf_vm86_es;
 			break;
 		case (intptr_t)DB_OFFSET(tf_fs):
 			reg = (uint16_t *)&tfp->tf_vm86_fs;
 			break;
 		}
 	} else
 		reg = (uint16_t *)((uintptr_t)kdb_frame + off);
 	if (op == DB_VAR_GET)
 		*valuep = *reg;
 	else
 		*reg = *valuep;
 	return (1);
 }
 
 static int
 db_esp(struct db_variable *vp, db_expr_t *valuep, int op)
 {
 
 	if (kdb_frame == NULL)
 		return (0);
 
 	if (op == DB_VAR_GET)
 		*valuep = get_esp(kdb_frame);
 	else if (TF_HAS_STACKREGS(kdb_frame))
 		kdb_frame->tf_esp = *valuep;
 	return (1);
 }
 
 static int
 db_gs(struct db_variable *vp, db_expr_t *valuep, int op)
 {
 	struct trapframe_vm86 *tfp;
 
 	if (kdb_frame != NULL && kdb_frame->tf_eflags & PSL_VM) {
 		tfp = (void *)kdb_frame;
 		if (op == DB_VAR_GET)
 			*valuep = tfp->tf_vm86_gs;
 		else
 			tfp->tf_vm86_gs = *valuep;
 		return (1);
 	}
 	if (op == DB_VAR_GET)
 		*valuep = rgs();
 	else
 		load_gs(*valuep);
 	return (1);
 }
 
 static int
 db_ss(struct db_variable *vp, db_expr_t *valuep, int op)
 {
 
 	if (kdb_frame == NULL)
 		return (0);
 
 	if (op == DB_VAR_GET)
 		*valuep = TF_HAS_STACKREGS(kdb_frame) ? kdb_frame->tf_ss :
 		    rss();
 	else if (TF_HAS_STACKREGS(kdb_frame))
 		kdb_frame->tf_ss = *valuep;
 	return (1);
 }
 
 #define NORMAL		0
 #define	TRAP		1
 #define	INTERRUPT	2
 #define	SYSCALL		3
 #define	DOUBLE_FAULT	4
 
 static void db_nextframe(struct i386_frame **, db_addr_t *, struct thread *);
 static int db_numargs(struct i386_frame *);
 static void db_print_stack_entry(const char *, int, char **, int *, db_addr_t,
     void *);
 static void decode_syscall(int, struct thread *);
 
 /*
  * Figure out how many arguments were passed into the frame at "fp".
  */
 static int
 db_numargs(fp)
 	struct i386_frame *fp;
 {
 	char   *argp;
 	int	inst;
 	int	args;
 
 	argp = (char *)db_get_value((int)&fp->f_retaddr, 4, false);
 	/*
 	 * XXX etext is wrong for LKMs.  We should attempt to interpret
 	 * the instruction at the return address in all cases.  This
 	 * may require better fault handling.
 	 */
 	if (argp < btext || argp >= etext) {
 		args = -1;
 	} else {
 retry:
 		inst = db_get_value((int)argp, 4, false);
 		if ((inst & 0xff) == 0x59)	/* popl %ecx */
 			args = 1;
 		else if ((inst & 0xffff) == 0xc483)	/* addl $Ibs, %esp */
 			args = ((inst >> 16) & 0xff) / 4;
 		else if ((inst & 0xf8ff) == 0xc089) {	/* movl %eax, %Reg */
 			argp += 2;
 			goto retry;
 		} else
 			args = -1;
 	}
 	return (args);
 }
 
 static void
 db_print_stack_entry(name, narg, argnp, argp, callpc, frame)
 	const char *name;
 	int narg;
 	char **argnp;
 	int *argp;
 	db_addr_t callpc;
 	void *frame;
 {
 	int n = narg >= 0 ? narg : 5;
 
 	db_printf("%s(", name);
 	while (n) {
 		if (argnp)
 			db_printf("%s=", *argnp++);
 		db_printf("%r", db_get_value((int)argp, 4, false));
 		argp++;
 		if (--n != 0)
 			db_printf(",");
 	}
 	if (narg < 0)
 		db_printf(",...");
 	db_printf(") at ");
 	db_printsym(callpc, DB_STGY_PROC);
 	if (frame != NULL)
 		db_printf("/frame 0x%r", (register_t)frame);
 	db_printf("\n");
 }
 
 static void
 decode_syscall(int number, struct thread *td)
 {
 	struct proc *p;
 	c_db_sym_t sym;
 	db_expr_t diff;
 	sy_call_t *f;
 	const char *symname;
 
 	db_printf(" (%d", number);
 	p = (td != NULL) ? td->td_proc : NULL;
 	if (p != NULL && 0 <= number && number < p->p_sysent->sv_size) {
 		f = p->p_sysent->sv_table[number].sy_call;
 		sym = db_search_symbol((db_addr_t)f, DB_STGY_ANY, &diff);
 		if (sym != DB_SYM_NULL && diff == 0) {
 			db_symbol_values(sym, &symname, NULL);
 			db_printf(", %s, %s", p->p_sysent->sv_name, symname);
 		}
 	}
 	db_printf(")");
 }
 
 /*
  * Figure out the next frame up in the call stack.
  */
 static void
 db_nextframe(struct i386_frame **fp, db_addr_t *ip, struct thread *td)
 {
 	struct trapframe *tf;
 	int frame_type;
 	int eip, esp, ebp;
 	db_expr_t offset;
 	c_db_sym_t sym;
 	const char *name;
 
 	eip = db_get_value((int) &(*fp)->f_retaddr, 4, false);
 	ebp = db_get_value((int) &(*fp)->f_frame, 4, false);
 
 	/*
 	 * Figure out frame type.  We look at the address just before
 	 * the saved instruction pointer as the saved EIP is after the
 	 * call function, and if the function being called is marked as
 	 * dead (such as panic() at the end of dblfault_handler()), then
 	 * the instruction at the saved EIP will be part of a different
 	 * function (syscall() in this example) rather than the one that
 	 * actually made the call.
 	 */
 	frame_type = NORMAL;
 
 	if (eip >= PMAP_TRM_MIN_ADDRESS) {
 		sym = db_search_symbol(eip - 1 - setidt_disp, DB_STGY_ANY,
 		    &offset);
 	} else {
 		sym = db_search_symbol(eip - 1, DB_STGY_ANY, &offset);
 	}
 	db_symbol_values(sym, &name, NULL);
 	if (name != NULL) {
 		if (strcmp(name, "calltrap") == 0 ||
 		    strcmp(name, "fork_trampoline") == 0)
 			frame_type = TRAP;
 		else if (strncmp(name, "Xatpic_intr", 11) == 0 ||
 		    strncmp(name, "Xapic_isr", 9) == 0) {
 			frame_type = INTERRUPT;
 		} else if (strcmp(name, "Xlcall_syscall") == 0 ||
 		    strcmp(name, "Xint0x80_syscall") == 0)
 			frame_type = SYSCALL;
 		else if (strcmp(name, "dblfault_handler") == 0)
 			frame_type = DOUBLE_FAULT;
 		else if (strcmp(name, "Xtimerint") == 0 ||
 		    strcmp(name, "Xxen_intr_upcall") == 0)
 			frame_type = INTERRUPT;
 		else if (strcmp(name, "Xcpustop") == 0 ||
 		    strcmp(name, "Xrendezvous") == 0 ||
 		    strcmp(name, "Xipi_intr_bitmap_handler") == 0) {
 			/* No arguments. */
 			frame_type = INTERRUPT;
 		}
 	}
 
 	/*
 	 * Normal frames need no special processing.
 	 */
 	if (frame_type == NORMAL) {
 		*ip = (db_addr_t) eip;
 		*fp = (struct i386_frame *) ebp;
 		return;
 	}
 
 	db_print_stack_entry(name, 0, 0, 0, eip, &(*fp)->f_frame);
 
 	/*
 	 * For a double fault, we have to snag the values from the
 	 * previous TSS since a double fault uses a task gate to
 	 * switch to a known good state.
 	 */
 	if (frame_type == DOUBLE_FAULT) {
 		esp = PCPU_GET(common_tssp)->tss_esp;
 		eip = PCPU_GET(common_tssp)->tss_eip;
 		ebp = PCPU_GET(common_tssp)->tss_ebp;
 		db_printf(
 		    "--- trap 0x17, eip = %#r, esp = %#r, ebp = %#r ---\n",
 		    eip, esp, ebp);
 		*ip = (db_addr_t) eip;
 		*fp = (struct i386_frame *) ebp;
 		return;
 	}
 
 	/*
 	 * Point to base of trapframe which is just above the current
 	 * frame.  Pointer to it was put into %ebp by the kernel entry
 	 * code.
 	 */
 	tf = (struct trapframe *)(*fp)->f_frame;
 
 	/*
 	 * This can be the case for e.g. fork_trampoline, last frame
 	 * of a kernel thread stack.
 	 */
 	if (tf == NULL) {
 		*ip = 0;
 		*fp = 0;
 		db_printf("--- kthread start\n");
 		return;
 	}
 
 	esp = get_esp(tf);
 	eip = tf->tf_eip;
 	ebp = tf->tf_ebp;
 	switch (frame_type) {
 	case TRAP:
 		db_printf("--- trap %#r", tf->tf_trapno);
 		break;
 	case SYSCALL:
 		db_printf("--- syscall");
 		decode_syscall(tf->tf_eax, td);
 		break;
 	case INTERRUPT:
 		db_printf("--- interrupt");
 		break;
 	default:
 		panic("The moon has moved again.");
 	}
 	db_printf(", eip = %#r, esp = %#r, ebp = %#r ---\n", eip, esp, ebp);
 
 	/*
 	 * Detect the last (trap) frame on the kernel stack, where we
 	 * entered kernel from usermode.  Terminate tracing in this
 	 * case.
 	 */
 	switch (frame_type) {
 	case TRAP:
 	case INTERRUPT:
 		if (!TRAPF_USERMODE(tf))
 			break;
 		/* FALLTHROUGH */
 	case SYSCALL:
 		ebp = 0;
 		eip = 0;
 		break;
 	}
 
 	*ip = (db_addr_t) eip;
 	*fp = (struct i386_frame *) ebp;
 }
 
 static int
 db_backtrace(struct thread *td, struct trapframe *tf, struct i386_frame *frame,
     db_addr_t pc, register_t sp, int count)
 {
 	struct i386_frame *actframe;
 #define MAXNARG	16
 	char *argnames[MAXNARG], **argnp = NULL;
 	const char *name;
 	int *argp;
 	db_expr_t offset;
 	c_db_sym_t sym;
 	int instr, narg;
 	bool first;
 
 	if (db_segsize(tf) == 16) {
 		db_printf(
 "--- 16-bit%s, cs:eip = %#x:%#x, ss:esp = %#x:%#x, ebp = %#x, tf = %p ---\n",
 		    (tf->tf_eflags & PSL_VM) ? " (vm86)" : "",
 		    tf->tf_cs, tf->tf_eip,
 		    TF_HAS_STACKREGS(tf) ? tf->tf_ss : rss(),
 		    TF_HAS_STACKREGS(tf) ? tf->tf_esp : (intptr_t)&tf->tf_esp,
 		    tf->tf_ebp, tf);
 		return (0);
 	}
 
 	/* 'frame' can be null initially.  Just print the pc then. */
 	if (frame == NULL)
 		goto out;
 
 	/*
 	 * If an indirect call via an invalid pointer caused a trap,
 	 * %pc contains the invalid address while the return address
 	 * of the unlucky caller has been saved by CPU on the stack
 	 * just before the trap frame.  In this case, try to recover
 	 * the caller's address so that the first frame is assigned
 	 * to the right spot in the right function, for that is where
 	 * the failure actually happened.
 	 *
 	 * This trick depends on the fault address stashed in tf_err
 	 * by trap_fatal() before entering KDB.
 	 */
 	if (kdb_frame && pc == kdb_frame->tf_err) {
 		/*
 		 * Find where the trap frame actually ends.
 		 * It won't contain tf_esp or tf_ss unless crossing rings.
 		 */
 		if (TF_HAS_STACKREGS(kdb_frame))
 			instr = (int)(kdb_frame + 1);
 		else
 			instr = (int)&kdb_frame->tf_esp;
 		pc = db_get_value(instr, 4, false);
 	}
 
 	if (count == -1)
 		count = 1024;
 
 	first = true;
 	while (count-- && !db_pager_quit) {
 		sym = db_search_symbol(pc, DB_STGY_ANY, &offset);
 		db_symbol_values(sym, &name, NULL);
 
 		/*
 		 * Attempt to determine a (possibly fake) frame that gives
 		 * the caller's pc.  It may differ from `frame' if the
 		 * current function never sets up a standard frame or hasn't
 		 * set one up yet or has just discarded one.  The last two
 		 * cases can be guessed fairly reliably for code generated
 		 * by gcc.  The first case is too much trouble to handle in
 		 * general because the amount of junk on the stack depends
 		 * on the pc (the special handling of "calltrap", etc. in
 		 * db_nextframe() works because the `next' pc is special).
 		 */
 		actframe = frame;
 		if (first) {
 			first = false;
 			if (sym == C_DB_SYM_NULL && sp != 0) {
 				/*
 				 * If a symbol couldn't be found, we've probably
 				 * jumped to a bogus location, so try and use
 				 * the return address to find our caller.
 				 */
 				db_print_stack_entry(name, 0, 0, 0, pc,
 				    NULL);
 				pc = db_get_value(sp, 4, false);
 				if (db_search_symbol(pc, DB_STGY_PROC,
 				    &offset) == C_DB_SYM_NULL)
 					break;
 				continue;
 			} else if (tf != NULL) {
 				instr = db_get_value(pc, 4, false);
 				if ((instr & 0xffffff) == 0x00e58955) {
 					/* pushl %ebp; movl %esp, %ebp */
 					actframe = (void *)(get_esp(tf) - 4);
 				} else if ((instr & 0xffff) == 0x0000e589) {
 					/* movl %esp, %ebp */
 					actframe = (void *)get_esp(tf);
 					if (tf->tf_ebp == 0) {
 						/* Fake frame better. */
 						frame = actframe;
 					}
 				} else if ((instr & 0xff) == 0x000000c3) {
 					/* ret */
 					actframe = (void *)(get_esp(tf) - 4);
 				} else if (offset == 0) {
 					/* Probably an assembler symbol. */
 					actframe = (void *)(get_esp(tf) - 4);
 				}
 			} else if (strcmp(name, "fork_trampoline") == 0) {
 				/*
 				 * Don't try to walk back on a stack for a
 				 * process that hasn't actually been run yet.
 				 */
 				db_print_stack_entry(name, 0, 0, 0, pc,
 				    actframe);
 				break;
 			}
 		}
 
 		argp = &actframe->f_arg0;
 		narg = MAXNARG;
 		if (sym != NULL && db_sym_numargs(sym, &narg, argnames)) {
 			argnp = argnames;
 		} else {
 			narg = db_numargs(frame);
 		}
 
 		db_print_stack_entry(name, narg, argnp, argp, pc, actframe);
 
 		if (actframe != frame) {
 			/* `frame' belongs to caller. */
 			pc = (db_addr_t)
 			    db_get_value((int)&actframe->f_retaddr, 4, false);
 			continue;
 		}
 
 		db_nextframe(&frame, &pc, td);
 
 out:
 		/*
 		 * 'frame' can be null here, either because it was initially
 		 * null or because db_nextframe() found no frame.
 		 * db_nextframe() may also have found a non-kernel frame.
 		 * !INKERNEL() classifies both.  Stop tracing if either,
 		 * after printing the pc if it is the kernel.
 		 */
 		if (frame == NULL || frame <= actframe) {
 			if (pc != 0) {
 				sym = db_search_symbol(pc, DB_STGY_ANY,
 				    &offset);
 				db_symbol_values(sym, &name, NULL);
 				db_print_stack_entry(name, 0, 0, 0, pc, frame);
 			}
 			break;
 		}
 	}
 
 	return (0);
 }
 
 void
 db_trace_self(void)
 {
 	struct i386_frame *frame;
 	db_addr_t callpc;
 	register_t ebp;
 
 	__asm __volatile("movl %%ebp,%0" : "=r" (ebp));
 	frame = (struct i386_frame *)ebp;
 	callpc = (db_addr_t)db_get_value((int)&frame->f_retaddr, 4, false);
 	frame = frame->f_frame;
 	db_backtrace(curthread, NULL, frame, callpc, 0, -1);
 }
 
 int
 db_trace_thread(struct thread *thr, int count)
 {
 	struct pcb *ctx;
 	struct trapframe *tf;
 
 	ctx = kdb_thr_ctx(thr);
 	tf = thr == kdb_thread ? kdb_frame : NULL;
 	return (db_backtrace(thr, tf, (struct i386_frame *)ctx->pcb_ebp,
 	    ctx->pcb_eip, ctx->pcb_esp, count));
 }
 
 void
 db_md_list_watchpoints(void)
 {
 
 	dbreg_list_watchpoints();
 }
diff --git a/sys/i386/i386/gdb_machdep.c b/sys/i386/i386/gdb_machdep.c
index 549c6de7ba1b..766f38f181f8 100644
--- a/sys/i386/i386/gdb_machdep.c
+++ b/sys/i386/i386/gdb_machdep.c
@@ -1,152 +1,152 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2004 Marcel Moolenaar
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  *
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kdb.h>
 #include <sys/kernel.h>
 #include <sys/proc.h>
+#include <sys/reg.h>
 #include <sys/signal.h>
 
 #include <machine/endian.h>
 #include <machine/frame.h>
 #include <machine/gdb_machdep.h>
 #include <machine/pcb.h>
-#include <machine/reg.h>
 #include <machine/trap.h>
 
 #include <gdb/gdb.h>
 #include <gdb/gdb_int.h>
 
 void *
 gdb_cpu_getreg(int regnum, size_t *regsz)
 {
 	static uint32_t _kcodesel = GSEL(GCODE_SEL, SEL_KPL);
 	static uint32_t _kdatasel = GSEL(GDATA_SEL, SEL_KPL);
 	static uint32_t _kprivsel = GSEL(GPRIV_SEL, SEL_KPL);
 
 	*regsz = gdb_cpu_regsz(regnum);
 
 	if (kdb_thread == curthread) {
 		switch (regnum) {
 		case 0:	return (&kdb_frame->tf_eax);
 		case 1:	return (&kdb_frame->tf_ecx);
 		case 2:	return (&kdb_frame->tf_edx);
 		case 9: return (&kdb_frame->tf_eflags);
 		case 10: return (&kdb_frame->tf_cs);
 		case 12: return (&kdb_frame->tf_ds);
 		case 13: return (&kdb_frame->tf_es);
 		case 14: return (&kdb_frame->tf_fs);
 		}
 	}
 	switch (regnum) {
 	case 3:  return (&kdb_thrctx->pcb_ebx);
 	case 4:  return (&kdb_thrctx->pcb_esp);
 	case 5:  return (&kdb_thrctx->pcb_ebp);
 	case 6:  return (&kdb_thrctx->pcb_esi);
 	case 7:  return (&kdb_thrctx->pcb_edi);
 	case 8:  return (&kdb_thrctx->pcb_eip);
 	case 10: return (&_kcodesel);
 	case 11: return (&_kdatasel);
 	case 12: return (&_kdatasel);
 	case 13: return (&_kdatasel);
 	case 14: return (&_kprivsel);
 	case 15: return (&kdb_thrctx->pcb_gs);
 	}
 	return (NULL);
 }
 
 void
 gdb_cpu_setreg(int regnum, void *val)
 {
 
 	switch (regnum) {
 	case GDB_REG_PC:
 		kdb_thrctx->pcb_eip = *(register_t *)val;
 		if (kdb_thread  == curthread)
 			kdb_frame->tf_eip = *(register_t *)val;
 	}
 }
 
 int
 gdb_cpu_signal(int type, int code)
 {
 
 	switch (type & ~T_USER) {
 	case T_BPTFLT: return (SIGTRAP);
 	case T_ARITHTRAP: return (SIGFPE);
 	case T_PROTFLT: return (SIGSEGV);
 	case T_TRCTRAP: return (SIGTRAP);
 	case T_PAGEFLT: return (SIGSEGV);
 	case T_DIVIDE: return (SIGFPE);
 	case T_NMI: return (SIGTRAP);
 	case T_FPOPFLT: return (SIGILL);
 	case T_TSSFLT: return (SIGSEGV);
 	case T_SEGNPFLT: return (SIGSEGV);
 	case T_STKFLT: return (SIGSEGV);
 	case T_XMMFLT: return (SIGFPE);
 	}
 	return (SIGEMT);
 }
 
 void
 gdb_cpu_stop_reason(int type, int code)
 {
 	uintmax_t val;
 
 	val = 0;
 	if (type == T_TRCTRAP) {
 		/* NB: 'code' contains the value of dr6 at the trap. */
 		if ((code & DBREG_DR6_B(0)) != 0) {
 			val = rdr0();
 		}
 		if ((code & DBREG_DR6_B(1)) != 0) {
 			val = rdr1();
 		}
 		if ((code & DBREG_DR6_B(2)) != 0) {
 			val = rdr2();
 		}
 		if ((code & DBREG_DR6_B(3)) != 0) {
 			val = rdr3();
 		}
 
 		/*
 		 * TODO: validate the bits in DR7 to differentiate between a
 		 * watchpoint trap and a hardware breakpoint trap (currently
 		 * unsupported).
 		 */
 		if (val != 0) {
 			gdb_tx_str("watch:");
 			gdb_tx_varhex(val);
 			gdb_tx_char(';');
 		}
 	}
 }
diff --git a/sys/i386/i386/machdep.c b/sys/i386/i386/machdep.c
index 869ee9958d25..1366939cda6e 100644
--- a/sys/i386/i386/machdep.c
+++ b/sys/i386/i386/machdep.c
@@ -1,3260 +1,3260 @@
 /*-
  * SPDX-License-Identifier: BSD-4-Clause
  *
  * Copyright (c) 2018 The FreeBSD Foundation
  * Copyright (c) 1992 Terrence R. Lambert.
  * Copyright (c) 1982, 1987, 1990 The Regents of the University of California.
  * All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * William Jolitz.
  *
  * Portions of this software were developed by A. Joseph Koshy under
  * sponsorship from the FreeBSD Foundation and Google, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from: @(#)machdep.c	7.4 (Berkeley) 6/3/91
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_apic.h"
 #include "opt_atpic.h"
 #include "opt_cpu.h"
 #include "opt_ddb.h"
 #include "opt_inet.h"
 #include "opt_isa.h"
 #include "opt_kstack_pages.h"
 #include "opt_maxmem.h"
 #include "opt_mp_watchdog.h"
 #include "opt_perfmon.h"
 #include "opt_platform.h"
 
 #include <sys/param.h>
 #include <sys/proc.h>
 #include <sys/systm.h>
 #include <sys/bio.h>
 #include <sys/buf.h>
 #include <sys/bus.h>
 #include <sys/callout.h>
 #include <sys/cons.h>
 #include <sys/cpu.h>
 #include <sys/eventhandler.h>
 #include <sys/exec.h>
 #include <sys/imgact.h>
 #include <sys/kdb.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/linker.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/memrange.h>
 #include <sys/msgbuf.h>
 #include <sys/mutex.h>
 #include <sys/pcpu.h>
 #include <sys/ptrace.h>
 #include <sys/reboot.h>
+#include <sys/reg.h>
 #include <sys/rwlock.h>
 #include <sys/sched.h>
 #include <sys/signalvar.h>
 #include <sys/smp.h>
 #include <sys/syscallsubr.h>
 #include <sys/sysctl.h>
 #include <sys/sysent.h>
 #include <sys/sysproto.h>
 #include <sys/ucontext.h>
 #include <sys/vmmeter.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_page.h>
 #include <vm/vm_map.h>
 #include <vm/vm_object.h>
 #include <vm/vm_pager.h>
 #include <vm/vm_phys.h>
 #include <vm/vm_dumpset.h>
 
 #ifdef DDB
 #ifndef KDB
 #error KDB must be enabled in order for DDB to work!
 #endif
 #include <ddb/ddb.h>
 #include <ddb/db_sym.h>
 #endif
 
 #include <isa/rtc.h>
 
 #include <net/netisr.h>
 
 #include <machine/bootinfo.h>
 #include <machine/clock.h>
 #include <machine/cpu.h>
 #include <machine/cputypes.h>
 #include <machine/intr_machdep.h>
 #include <x86/mca.h>
 #include <machine/md_var.h>
 #include <machine/metadata.h>
 #include <machine/mp_watchdog.h>
 #include <machine/pc/bios.h>
 #include <machine/pcb.h>
 #include <machine/pcb_ext.h>
 #include <machine/proc.h>
-#include <machine/reg.h>
 #include <machine/sigframe.h>
 #include <machine/specialreg.h>
 #include <machine/sysarch.h>
 #include <machine/trap.h>
 #include <x86/ucode.h>
 #include <machine/vm86.h>
 #include <x86/init.h>
 #ifdef PERFMON
 #include <machine/perfmon.h>
 #endif
 #ifdef SMP
 #include <machine/smp.h>
 #endif
 #ifdef FDT
 #include <x86/fdt.h>
 #endif
 
 #ifdef DEV_APIC
 #include <x86/apicvar.h>
 #endif
 
 #ifdef DEV_ISA
 #include <x86/isa/icu.h>
 #endif
 
 /* Sanity check for __curthread() */
 CTASSERT(offsetof(struct pcpu, pc_curthread) == 0);
 
 register_t init386(int first);
 void dblfault_handler(void);
 void identify_cpu(void);
 
 static void cpu_startup(void *);
 static void fpstate_drop(struct thread *td);
 static void get_fpcontext(struct thread *td, mcontext_t *mcp,
     char *xfpusave, size_t xfpusave_len);
 static int  set_fpcontext(struct thread *td, mcontext_t *mcp,
     char *xfpustate, size_t xfpustate_len);
 SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL);
 
 /* Intel ICH registers */
 #define ICH_PMBASE	0x400
 #define ICH_SMI_EN	ICH_PMBASE + 0x30
 
 int	_udatasel, _ucodesel;
 u_int	basemem;
 static int above4g_allow = 1;
 static int above24g_allow = 0;
 
 int cold = 1;
 
 #ifdef COMPAT_43
 static void osendsig(sig_t catcher, ksiginfo_t *, sigset_t *mask);
 #endif
 #ifdef COMPAT_FREEBSD4
 static void freebsd4_sendsig(sig_t catcher, ksiginfo_t *, sigset_t *mask);
 #endif
 
 long Maxmem = 0;
 long realmem = 0;
 
 #ifdef PAE
 FEATURE(pae, "Physical Address Extensions");
 #endif
 
 struct kva_md_info kmi;
 
 static struct trapframe proc0_tf;
 struct pcpu __pcpu[MAXCPU];
 
 struct mtx icu_lock;
 
 struct mem_range_softc mem_range_softc;
 
 extern char start_exceptions[], end_exceptions[];
 
 extern struct sysentvec elf32_freebsd_sysvec;
 
 /* Default init_ops implementation. */
 struct init_ops init_ops = {
 	.early_clock_source_init =	i8254_init,
 	.early_delay =			i8254_delay,
 };
 
 static void
 cpu_startup(dummy)
 	void *dummy;
 {
 	uintmax_t memsize;
 	char *sysenv;
 
 	/*
 	 * On MacBooks, we need to disallow the legacy USB circuit to
 	 * generate an SMI# because this can cause several problems,
 	 * namely: incorrect CPU frequency detection and failure to
 	 * start the APs.
 	 * We do this by disabling a bit in the SMI_EN (SMI Control and
 	 * Enable register) of the Intel ICH LPC Interface Bridge.
 	 */
 	sysenv = kern_getenv("smbios.system.product");
 	if (sysenv != NULL) {
 		if (strncmp(sysenv, "MacBook1,1", 10) == 0 ||
 		    strncmp(sysenv, "MacBook3,1", 10) == 0 ||
 		    strncmp(sysenv, "MacBook4,1", 10) == 0 ||
 		    strncmp(sysenv, "MacBookPro1,1", 13) == 0 ||
 		    strncmp(sysenv, "MacBookPro1,2", 13) == 0 ||
 		    strncmp(sysenv, "MacBookPro3,1", 13) == 0 ||
 		    strncmp(sysenv, "MacBookPro4,1", 13) == 0 ||
 		    strncmp(sysenv, "Macmini1,1", 10) == 0) {
 			if (bootverbose)
 				printf("Disabling LEGACY_USB_EN bit on "
 				    "Intel ICH.\n");
 			outl(ICH_SMI_EN, inl(ICH_SMI_EN) & ~0x8);
 		}
 		freeenv(sysenv);
 	}
 
 	/*
 	 * Good {morning,afternoon,evening,night}.
 	 */
 	startrtclock();
 	printcpuinfo();
 	panicifcpuunsupported();
 #ifdef PERFMON
 	perfmon_init();
 #endif
 
 	/*
 	 * Display physical memory if SMBIOS reports reasonable amount.
 	 */
 	memsize = 0;
 	sysenv = kern_getenv("smbios.memory.enabled");
 	if (sysenv != NULL) {
 		memsize = (uintmax_t)strtoul(sysenv, (char **)NULL, 10) << 10;
 		freeenv(sysenv);
 	}
 	if (memsize < ptoa((uintmax_t)vm_free_count()))
 		memsize = ptoa((uintmax_t)Maxmem);
 	printf("real memory  = %ju (%ju MB)\n", memsize, memsize >> 20);
 	realmem = atop(memsize);
 
 	/*
 	 * Display any holes after the first chunk of extended memory.
 	 */
 	if (bootverbose) {
 		int indx;
 
 		printf("Physical memory chunk(s):\n");
 		for (indx = 0; phys_avail[indx + 1] != 0; indx += 2) {
 			vm_paddr_t size;
 
 			size = phys_avail[indx + 1] - phys_avail[indx];
 			printf(
 			    "0x%016jx - 0x%016jx, %ju bytes (%ju pages)\n",
 			    (uintmax_t)phys_avail[indx],
 			    (uintmax_t)phys_avail[indx + 1] - 1,
 			    (uintmax_t)size, (uintmax_t)size / PAGE_SIZE);
 		}
 	}
 
 	vm_ksubmap_init(&kmi);
 
 	printf("avail memory = %ju (%ju MB)\n",
 	    ptoa((uintmax_t)vm_free_count()),
 	    ptoa((uintmax_t)vm_free_count()) / 1048576);
 
 	/*
 	 * Set up buffers, so they can be used to read disk labels.
 	 */
 	bufinit();
 	vm_pager_bufferinit();
 	cpu_setregs();
 }
 
 /*
  * Send an interrupt to process.
  *
  * Stack is set up to allow sigcode stored
  * at top to call routine, followed by call
  * to sigreturn routine below.  After sigreturn
  * resets the signal mask, the stack, and the
  * frame pointer, it returns to the user
  * specified pc, psl.
  */
 #ifdef COMPAT_43
 static void
 osendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
 {
 	struct osigframe sf, *fp;
 	struct proc *p;
 	struct thread *td;
 	struct sigacts *psp;
 	struct trapframe *regs;
 	int sig;
 	int oonstack;
 
 	td = curthread;
 	p = td->td_proc;
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	sig = ksi->ksi_signo;
 	psp = p->p_sigacts;
 	mtx_assert(&psp->ps_mtx, MA_OWNED);
 	regs = td->td_frame;
 	oonstack = sigonstack(regs->tf_esp);
 
 	/* Allocate space for the signal handler context. */
 	if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
 		fp = (struct osigframe *)((uintptr_t)td->td_sigstk.ss_sp +
 		    td->td_sigstk.ss_size - sizeof(struct osigframe));
 #if defined(COMPAT_43)
 		td->td_sigstk.ss_flags |= SS_ONSTACK;
 #endif
 	} else
 		fp = (struct osigframe *)regs->tf_esp - 1;
 
 	/* Build the argument list for the signal handler. */
 	sf.sf_signum = sig;
 	sf.sf_scp = (register_t)&fp->sf_siginfo.si_sc;
 	bzero(&sf.sf_siginfo, sizeof(sf.sf_siginfo));
 	if (SIGISMEMBER(psp->ps_siginfo, sig)) {
 		/* Signal handler installed with SA_SIGINFO. */
 		sf.sf_arg2 = (register_t)&fp->sf_siginfo;
 		sf.sf_siginfo.si_signo = sig;
 		sf.sf_siginfo.si_code = ksi->ksi_code;
 		sf.sf_ahu.sf_action = (__osiginfohandler_t *)catcher;
 		sf.sf_addr = 0;
 	} else {
 		/* Old FreeBSD-style arguments. */
 		sf.sf_arg2 = ksi->ksi_code;
 		sf.sf_addr = (register_t)ksi->ksi_addr;
 		sf.sf_ahu.sf_handler = catcher;
 	}
 	mtx_unlock(&psp->ps_mtx);
 	PROC_UNLOCK(p);
 
 	/* Save most if not all of trap frame. */
 	sf.sf_siginfo.si_sc.sc_eax = regs->tf_eax;
 	sf.sf_siginfo.si_sc.sc_ebx = regs->tf_ebx;
 	sf.sf_siginfo.si_sc.sc_ecx = regs->tf_ecx;
 	sf.sf_siginfo.si_sc.sc_edx = regs->tf_edx;
 	sf.sf_siginfo.si_sc.sc_esi = regs->tf_esi;
 	sf.sf_siginfo.si_sc.sc_edi = regs->tf_edi;
 	sf.sf_siginfo.si_sc.sc_cs = regs->tf_cs;
 	sf.sf_siginfo.si_sc.sc_ds = regs->tf_ds;
 	sf.sf_siginfo.si_sc.sc_ss = regs->tf_ss;
 	sf.sf_siginfo.si_sc.sc_es = regs->tf_es;
 	sf.sf_siginfo.si_sc.sc_fs = regs->tf_fs;
 	sf.sf_siginfo.si_sc.sc_gs = rgs();
 	sf.sf_siginfo.si_sc.sc_isp = regs->tf_isp;
 
 	/* Build the signal context to be used by osigreturn(). */
 	sf.sf_siginfo.si_sc.sc_onstack = (oonstack) ? 1 : 0;
 	SIG2OSIG(*mask, sf.sf_siginfo.si_sc.sc_mask);
 	sf.sf_siginfo.si_sc.sc_sp = regs->tf_esp;
 	sf.sf_siginfo.si_sc.sc_fp = regs->tf_ebp;
 	sf.sf_siginfo.si_sc.sc_pc = regs->tf_eip;
 	sf.sf_siginfo.si_sc.sc_ps = regs->tf_eflags;
 	sf.sf_siginfo.si_sc.sc_trapno = regs->tf_trapno;
 	sf.sf_siginfo.si_sc.sc_err = regs->tf_err;
 
 	/*
 	 * If we're a vm86 process, we want to save the segment registers.
 	 * We also change eflags to be our emulated eflags, not the actual
 	 * eflags.
 	 */
 	if (regs->tf_eflags & PSL_VM) {
 		/* XXX confusing names: `tf' isn't a trapframe; `regs' is. */
 		struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
 		struct vm86_kernel *vm86 = &td->td_pcb->pcb_ext->ext_vm86;
 
 		sf.sf_siginfo.si_sc.sc_gs = tf->tf_vm86_gs;
 		sf.sf_siginfo.si_sc.sc_fs = tf->tf_vm86_fs;
 		sf.sf_siginfo.si_sc.sc_es = tf->tf_vm86_es;
 		sf.sf_siginfo.si_sc.sc_ds = tf->tf_vm86_ds;
 
 		if (vm86->vm86_has_vme == 0)
 			sf.sf_siginfo.si_sc.sc_ps =
 			    (tf->tf_eflags & ~(PSL_VIF | PSL_VIP)) |
 			    (vm86->vm86_eflags & (PSL_VIF | PSL_VIP));
 
 		/* See sendsig() for comments. */
 		tf->tf_eflags &= ~(PSL_VM | PSL_NT | PSL_VIF | PSL_VIP);
 	}
 
 	/*
 	 * Copy the sigframe out to the user's stack.
 	 */
 	if (copyout(&sf, fp, sizeof(*fp)) != 0) {
 		PROC_LOCK(p);
 		sigexit(td, SIGILL);
 	}
 
 	regs->tf_esp = (int)fp;
 	if (p->p_sysent->sv_sigcode_base != 0) {
 		regs->tf_eip = p->p_sysent->sv_sigcode_base + szsigcode -
 		    szosigcode;
 	} else {
 		/* a.out sysentvec does not use shared page */
 		regs->tf_eip = p->p_sysent->sv_psstrings - szosigcode;
 	}
 	regs->tf_eflags &= ~(PSL_T | PSL_D);
 	regs->tf_cs = _ucodesel;
 	regs->tf_ds = _udatasel;
 	regs->tf_es = _udatasel;
 	regs->tf_fs = _udatasel;
 	load_gs(_udatasel);
 	regs->tf_ss = _udatasel;
 	PROC_LOCK(p);
 	mtx_lock(&psp->ps_mtx);
 }
 #endif /* COMPAT_43 */
 
 #ifdef COMPAT_FREEBSD4
 static void
 freebsd4_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
 {
 	struct sigframe4 sf, *sfp;
 	struct proc *p;
 	struct thread *td;
 	struct sigacts *psp;
 	struct trapframe *regs;
 	int sig;
 	int oonstack;
 
 	td = curthread;
 	p = td->td_proc;
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	sig = ksi->ksi_signo;
 	psp = p->p_sigacts;
 	mtx_assert(&psp->ps_mtx, MA_OWNED);
 	regs = td->td_frame;
 	oonstack = sigonstack(regs->tf_esp);
 
 	/* Save user context. */
 	bzero(&sf, sizeof(sf));
 	sf.sf_uc.uc_sigmask = *mask;
 	sf.sf_uc.uc_stack = td->td_sigstk;
 	sf.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
 	    ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE;
 	sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0;
 	sf.sf_uc.uc_mcontext.mc_gs = rgs();
 	bcopy(regs, &sf.sf_uc.uc_mcontext.mc_fs, sizeof(*regs));
 	bzero(sf.sf_uc.uc_mcontext.mc_fpregs,
 	    sizeof(sf.sf_uc.uc_mcontext.mc_fpregs));
 	bzero(sf.sf_uc.uc_mcontext.__spare__,
 	    sizeof(sf.sf_uc.uc_mcontext.__spare__));
 	bzero(sf.sf_uc.__spare__, sizeof(sf.sf_uc.__spare__));
 
 	/* Allocate space for the signal handler context. */
 	if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack &&
 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
 		sfp = (struct sigframe4 *)((uintptr_t)td->td_sigstk.ss_sp +
 		    td->td_sigstk.ss_size - sizeof(struct sigframe4));
 #if defined(COMPAT_43)
 		td->td_sigstk.ss_flags |= SS_ONSTACK;
 #endif
 	} else
 		sfp = (struct sigframe4 *)regs->tf_esp - 1;
 
 	/* Build the argument list for the signal handler. */
 	sf.sf_signum = sig;
 	sf.sf_ucontext = (register_t)&sfp->sf_uc;
 	bzero(&sf.sf_si, sizeof(sf.sf_si));
 	if (SIGISMEMBER(psp->ps_siginfo, sig)) {
 		/* Signal handler installed with SA_SIGINFO. */
 		sf.sf_siginfo = (register_t)&sfp->sf_si;
 		sf.sf_ahu.sf_action = (__siginfohandler_t *)catcher;
 
 		/* Fill in POSIX parts */
 		sf.sf_si.si_signo = sig;
 		sf.sf_si.si_code = ksi->ksi_code;
 		sf.sf_si.si_addr = ksi->ksi_addr;
 	} else {
 		/* Old FreeBSD-style arguments. */
 		sf.sf_siginfo = ksi->ksi_code;
 		sf.sf_addr = (register_t)ksi->ksi_addr;
 		sf.sf_ahu.sf_handler = catcher;
 	}
 	mtx_unlock(&psp->ps_mtx);
 	PROC_UNLOCK(p);
 
 	/*
 	 * If we're a vm86 process, we want to save the segment registers.
 	 * We also change eflags to be our emulated eflags, not the actual
 	 * eflags.
 	 */
 	if (regs->tf_eflags & PSL_VM) {
 		struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
 		struct vm86_kernel *vm86 = &td->td_pcb->pcb_ext->ext_vm86;
 
 		sf.sf_uc.uc_mcontext.mc_gs = tf->tf_vm86_gs;
 		sf.sf_uc.uc_mcontext.mc_fs = tf->tf_vm86_fs;
 		sf.sf_uc.uc_mcontext.mc_es = tf->tf_vm86_es;
 		sf.sf_uc.uc_mcontext.mc_ds = tf->tf_vm86_ds;
 
 		if (vm86->vm86_has_vme == 0)
 			sf.sf_uc.uc_mcontext.mc_eflags =
 			    (tf->tf_eflags & ~(PSL_VIF | PSL_VIP)) |
 			    (vm86->vm86_eflags & (PSL_VIF | PSL_VIP));
 
 		/*
 		 * Clear PSL_NT to inhibit T_TSSFLT faults on return from
 		 * syscalls made by the signal handler.  This just avoids
 		 * wasting time for our lazy fixup of such faults.  PSL_NT
 		 * does nothing in vm86 mode, but vm86 programs can set it
 		 * almost legitimately in probes for old cpu types.
 		 */
 		tf->tf_eflags &= ~(PSL_VM | PSL_NT | PSL_VIF | PSL_VIP);
 	}
 
 	/*
 	 * Copy the sigframe out to the user's stack.
 	 */
 	if (copyout(&sf, sfp, sizeof(*sfp)) != 0) {
 		PROC_LOCK(p);
 		sigexit(td, SIGILL);
 	}
 
 	regs->tf_esp = (int)sfp;
 	regs->tf_eip = p->p_sysent->sv_sigcode_base + szsigcode -
 	    szfreebsd4_sigcode;
 	regs->tf_eflags &= ~(PSL_T | PSL_D);
 	regs->tf_cs = _ucodesel;
 	regs->tf_ds = _udatasel;
 	regs->tf_es = _udatasel;
 	regs->tf_fs = _udatasel;
 	regs->tf_ss = _udatasel;
 	PROC_LOCK(p);
 	mtx_lock(&psp->ps_mtx);
 }
 #endif	/* COMPAT_FREEBSD4 */
 
 void
 sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
 {
 	struct sigframe sf, *sfp;
 	struct proc *p;
 	struct thread *td;
 	struct sigacts *psp;
 	char *sp;
 	struct trapframe *regs;
 	struct segment_descriptor *sdp;
 	char *xfpusave;
 	size_t xfpusave_len;
 	int sig;
 	int oonstack;
 
 	td = curthread;
 	p = td->td_proc;
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	sig = ksi->ksi_signo;
 	psp = p->p_sigacts;
 	mtx_assert(&psp->ps_mtx, MA_OWNED);
 #ifdef COMPAT_FREEBSD4
 	if (SIGISMEMBER(psp->ps_freebsd4, sig)) {
 		freebsd4_sendsig(catcher, ksi, mask);
 		return;
 	}
 #endif
 #ifdef COMPAT_43
 	if (SIGISMEMBER(psp->ps_osigset, sig)) {
 		osendsig(catcher, ksi, mask);
 		return;
 	}
 #endif
 	regs = td->td_frame;
 	oonstack = sigonstack(regs->tf_esp);
 
 	if (cpu_max_ext_state_size > sizeof(union savefpu) && use_xsave) {
 		xfpusave_len = cpu_max_ext_state_size - sizeof(union savefpu);
 		xfpusave = __builtin_alloca(xfpusave_len);
 	} else {
 		xfpusave_len = 0;
 		xfpusave = NULL;
 	}
 
 	/* Save user context. */
 	bzero(&sf, sizeof(sf));
 	sf.sf_uc.uc_sigmask = *mask;
 	sf.sf_uc.uc_stack = td->td_sigstk;
 	sf.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
 	    ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE;
 	sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0;
 	sf.sf_uc.uc_mcontext.mc_gs = rgs();
 	bcopy(regs, &sf.sf_uc.uc_mcontext.mc_fs, sizeof(*regs));
 	sf.sf_uc.uc_mcontext.mc_len = sizeof(sf.sf_uc.uc_mcontext); /* magic */
 	get_fpcontext(td, &sf.sf_uc.uc_mcontext, xfpusave, xfpusave_len);
 	fpstate_drop(td);
 	/*
 	 * Unconditionally fill the fsbase and gsbase into the mcontext.
 	 */
 	sdp = &td->td_pcb->pcb_fsd;
 	sf.sf_uc.uc_mcontext.mc_fsbase = sdp->sd_hibase << 24 |
 	    sdp->sd_lobase;
 	sdp = &td->td_pcb->pcb_gsd;
 	sf.sf_uc.uc_mcontext.mc_gsbase = sdp->sd_hibase << 24 |
 	    sdp->sd_lobase;
 	bzero(sf.sf_uc.uc_mcontext.mc_spare2,
 	    sizeof(sf.sf_uc.uc_mcontext.mc_spare2));
 
 	/* Allocate space for the signal handler context. */
 	if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack &&
 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
 		sp = (char *)td->td_sigstk.ss_sp + td->td_sigstk.ss_size;
 #if defined(COMPAT_43)
 		td->td_sigstk.ss_flags |= SS_ONSTACK;
 #endif
 	} else
 		sp = (char *)regs->tf_esp - 128;
 	if (xfpusave != NULL) {
 		sp -= xfpusave_len;
 		sp = (char *)((unsigned int)sp & ~0x3F);
 		sf.sf_uc.uc_mcontext.mc_xfpustate = (register_t)sp;
 	}
 	sp -= sizeof(struct sigframe);
 
 	/* Align to 16 bytes. */
 	sfp = (struct sigframe *)((unsigned int)sp & ~0xF);
 
 	/* Build the argument list for the signal handler. */
 	sf.sf_signum = sig;
 	sf.sf_ucontext = (register_t)&sfp->sf_uc;
 	bzero(&sf.sf_si, sizeof(sf.sf_si));
 	if (SIGISMEMBER(psp->ps_siginfo, sig)) {
 		/* Signal handler installed with SA_SIGINFO. */
 		sf.sf_siginfo = (register_t)&sfp->sf_si;
 		sf.sf_ahu.sf_action = (__siginfohandler_t *)catcher;
 
 		/* Fill in POSIX parts */
 		sf.sf_si = ksi->ksi_info;
 		sf.sf_si.si_signo = sig; /* maybe a translated signal */
 	} else {
 		/* Old FreeBSD-style arguments. */
 		sf.sf_siginfo = ksi->ksi_code;
 		sf.sf_addr = (register_t)ksi->ksi_addr;
 		sf.sf_ahu.sf_handler = catcher;
 	}
 	mtx_unlock(&psp->ps_mtx);
 	PROC_UNLOCK(p);
 
 	/*
 	 * If we're a vm86 process, we want to save the segment registers.
 	 * We also change eflags to be our emulated eflags, not the actual
 	 * eflags.
 	 */
 	if (regs->tf_eflags & PSL_VM) {
 		struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
 		struct vm86_kernel *vm86 = &td->td_pcb->pcb_ext->ext_vm86;
 
 		sf.sf_uc.uc_mcontext.mc_gs = tf->tf_vm86_gs;
 		sf.sf_uc.uc_mcontext.mc_fs = tf->tf_vm86_fs;
 		sf.sf_uc.uc_mcontext.mc_es = tf->tf_vm86_es;
 		sf.sf_uc.uc_mcontext.mc_ds = tf->tf_vm86_ds;
 
 		if (vm86->vm86_has_vme == 0)
 			sf.sf_uc.uc_mcontext.mc_eflags =
 			    (tf->tf_eflags & ~(PSL_VIF | PSL_VIP)) |
 			    (vm86->vm86_eflags & (PSL_VIF | PSL_VIP));
 
 		/*
 		 * Clear PSL_NT to inhibit T_TSSFLT faults on return from
 		 * syscalls made by the signal handler.  This just avoids
 		 * wasting time for our lazy fixup of such faults.  PSL_NT
 		 * does nothing in vm86 mode, but vm86 programs can set it
 		 * almost legitimately in probes for old cpu types.
 		 */
 		tf->tf_eflags &= ~(PSL_VM | PSL_NT | PSL_VIF | PSL_VIP);
 	}
 
 	/*
 	 * Copy the sigframe out to the user's stack.
 	 */
 	if (copyout(&sf, sfp, sizeof(*sfp)) != 0 ||
 	    (xfpusave != NULL && copyout(xfpusave,
 	    (void *)sf.sf_uc.uc_mcontext.mc_xfpustate, xfpusave_len)
 	    != 0)) {
 		PROC_LOCK(p);
 		sigexit(td, SIGILL);
 	}
 
 	regs->tf_esp = (int)sfp;
 	regs->tf_eip = p->p_sysent->sv_sigcode_base;
 	if (regs->tf_eip == 0)
 		regs->tf_eip = p->p_sysent->sv_psstrings - szsigcode;
 	regs->tf_eflags &= ~(PSL_T | PSL_D);
 	regs->tf_cs = _ucodesel;
 	regs->tf_ds = _udatasel;
 	regs->tf_es = _udatasel;
 	regs->tf_fs = _udatasel;
 	regs->tf_ss = _udatasel;
 	PROC_LOCK(p);
 	mtx_lock(&psp->ps_mtx);
 }
 
 /*
  * System call to cleanup state after a signal
  * has been taken.  Reset signal mask and
  * stack state from context left by sendsig (above).
  * Return to previous pc and psl as specified by
  * context left by sendsig. Check carefully to
  * make sure that the user has not modified the
  * state to gain improper privileges.
  *
  * MPSAFE
  */
 #ifdef COMPAT_43
 int
 osigreturn(td, uap)
 	struct thread *td;
 	struct osigreturn_args /* {
 		struct osigcontext *sigcntxp;
 	} */ *uap;
 {
 	struct osigcontext sc;
 	struct trapframe *regs;
 	struct osigcontext *scp;
 	int eflags, error;
 	ksiginfo_t ksi;
 
 	regs = td->td_frame;
 	error = copyin(uap->sigcntxp, &sc, sizeof(sc));
 	if (error != 0)
 		return (error);
 	scp = &sc;
 	eflags = scp->sc_ps;
 	if (eflags & PSL_VM) {
 		struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
 		struct vm86_kernel *vm86;
 
 		/*
 		 * if pcb_ext == 0 or vm86_inited == 0, the user hasn't
 		 * set up the vm86 area, and we can't enter vm86 mode.
 		 */
 		if (td->td_pcb->pcb_ext == 0)
 			return (EINVAL);
 		vm86 = &td->td_pcb->pcb_ext->ext_vm86;
 		if (vm86->vm86_inited == 0)
 			return (EINVAL);
 
 		/* Go back to user mode if both flags are set. */
 		if ((eflags & PSL_VIP) && (eflags & PSL_VIF)) {
 			ksiginfo_init_trap(&ksi);
 			ksi.ksi_signo = SIGBUS;
 			ksi.ksi_code = BUS_OBJERR;
 			ksi.ksi_addr = (void *)regs->tf_eip;
 			trapsignal(td, &ksi);
 		}
 
 		if (vm86->vm86_has_vme) {
 			eflags = (tf->tf_eflags & ~VME_USERCHANGE) |
 			    (eflags & VME_USERCHANGE) | PSL_VM;
 		} else {
 			vm86->vm86_eflags = eflags;	/* save VIF, VIP */
 			eflags = (tf->tf_eflags & ~VM_USERCHANGE) |
 			    (eflags & VM_USERCHANGE) | PSL_VM;
 		}
 		tf->tf_vm86_ds = scp->sc_ds;
 		tf->tf_vm86_es = scp->sc_es;
 		tf->tf_vm86_fs = scp->sc_fs;
 		tf->tf_vm86_gs = scp->sc_gs;
 		tf->tf_ds = _udatasel;
 		tf->tf_es = _udatasel;
 		tf->tf_fs = _udatasel;
 	} else {
 		/*
 		 * Don't allow users to change privileged or reserved flags.
 		 */
 		if (!EFL_SECURE(eflags, regs->tf_eflags)) {
 	    		return (EINVAL);
 		}
 
 		/*
 		 * Don't allow users to load a valid privileged %cs.  Let the
 		 * hardware check for invalid selectors, excess privilege in
 		 * other selectors, invalid %eip's and invalid %esp's.
 		 */
 		if (!CS_SECURE(scp->sc_cs)) {
 			ksiginfo_init_trap(&ksi);
 			ksi.ksi_signo = SIGBUS;
 			ksi.ksi_code = BUS_OBJERR;
 			ksi.ksi_trapno = T_PROTFLT;
 			ksi.ksi_addr = (void *)regs->tf_eip;
 			trapsignal(td, &ksi);
 			return (EINVAL);
 		}
 		regs->tf_ds = scp->sc_ds;
 		regs->tf_es = scp->sc_es;
 		regs->tf_fs = scp->sc_fs;
 	}
 
 	/* Restore remaining registers. */
 	regs->tf_eax = scp->sc_eax;
 	regs->tf_ebx = scp->sc_ebx;
 	regs->tf_ecx = scp->sc_ecx;
 	regs->tf_edx = scp->sc_edx;
 	regs->tf_esi = scp->sc_esi;
 	regs->tf_edi = scp->sc_edi;
 	regs->tf_cs = scp->sc_cs;
 	regs->tf_ss = scp->sc_ss;
 	regs->tf_isp = scp->sc_isp;
 	regs->tf_ebp = scp->sc_fp;
 	regs->tf_esp = scp->sc_sp;
 	regs->tf_eip = scp->sc_pc;
 	regs->tf_eflags = eflags;
 
 #if defined(COMPAT_43)
 	if (scp->sc_onstack & 1)
 		td->td_sigstk.ss_flags |= SS_ONSTACK;
 	else
 		td->td_sigstk.ss_flags &= ~SS_ONSTACK;
 #endif
 	kern_sigprocmask(td, SIG_SETMASK, (sigset_t *)&scp->sc_mask, NULL,
 	    SIGPROCMASK_OLD);
 	return (EJUSTRETURN);
 }
 #endif /* COMPAT_43 */
 
 #ifdef COMPAT_FREEBSD4
 /*
  * MPSAFE
  */
 int
 freebsd4_sigreturn(td, uap)
 	struct thread *td;
 	struct freebsd4_sigreturn_args /* {
 		const ucontext4 *sigcntxp;
 	} */ *uap;
 {
 	struct ucontext4 uc;
 	struct trapframe *regs;
 	struct ucontext4 *ucp;
 	int cs, eflags, error;
 	ksiginfo_t ksi;
 
 	error = copyin(uap->sigcntxp, &uc, sizeof(uc));
 	if (error != 0)
 		return (error);
 	ucp = &uc;
 	regs = td->td_frame;
 	eflags = ucp->uc_mcontext.mc_eflags;
 	if (eflags & PSL_VM) {
 		struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
 		struct vm86_kernel *vm86;
 
 		/*
 		 * if pcb_ext == 0 or vm86_inited == 0, the user hasn't
 		 * set up the vm86 area, and we can't enter vm86 mode.
 		 */
 		if (td->td_pcb->pcb_ext == 0)
 			return (EINVAL);
 		vm86 = &td->td_pcb->pcb_ext->ext_vm86;
 		if (vm86->vm86_inited == 0)
 			return (EINVAL);
 
 		/* Go back to user mode if both flags are set. */
 		if ((eflags & PSL_VIP) && (eflags & PSL_VIF)) {
 			ksiginfo_init_trap(&ksi);
 			ksi.ksi_signo = SIGBUS;
 			ksi.ksi_code = BUS_OBJERR;
 			ksi.ksi_addr = (void *)regs->tf_eip;
 			trapsignal(td, &ksi);
 		}
 		if (vm86->vm86_has_vme) {
 			eflags = (tf->tf_eflags & ~VME_USERCHANGE) |
 			    (eflags & VME_USERCHANGE) | PSL_VM;
 		} else {
 			vm86->vm86_eflags = eflags;	/* save VIF, VIP */
 			eflags = (tf->tf_eflags & ~VM_USERCHANGE) |
 			    (eflags & VM_USERCHANGE) | PSL_VM;
 		}
 		bcopy(&ucp->uc_mcontext.mc_fs, tf, sizeof(struct trapframe));
 		tf->tf_eflags = eflags;
 		tf->tf_vm86_ds = tf->tf_ds;
 		tf->tf_vm86_es = tf->tf_es;
 		tf->tf_vm86_fs = tf->tf_fs;
 		tf->tf_vm86_gs = ucp->uc_mcontext.mc_gs;
 		tf->tf_ds = _udatasel;
 		tf->tf_es = _udatasel;
 		tf->tf_fs = _udatasel;
 	} else {
 		/*
 		 * Don't allow users to change privileged or reserved flags.
 		 */
 		if (!EFL_SECURE(eflags, regs->tf_eflags)) {
 			uprintf("pid %d (%s): freebsd4_sigreturn eflags = 0x%x\n",
 			    td->td_proc->p_pid, td->td_name, eflags);
 	    		return (EINVAL);
 		}
 
 		/*
 		 * Don't allow users to load a valid privileged %cs.  Let the
 		 * hardware check for invalid selectors, excess privilege in
 		 * other selectors, invalid %eip's and invalid %esp's.
 		 */
 		cs = ucp->uc_mcontext.mc_cs;
 		if (!CS_SECURE(cs)) {
 			uprintf("pid %d (%s): freebsd4_sigreturn cs = 0x%x\n",
 			    td->td_proc->p_pid, td->td_name, cs);
 			ksiginfo_init_trap(&ksi);
 			ksi.ksi_signo = SIGBUS;
 			ksi.ksi_code = BUS_OBJERR;
 			ksi.ksi_trapno = T_PROTFLT;
 			ksi.ksi_addr = (void *)regs->tf_eip;
 			trapsignal(td, &ksi);
 			return (EINVAL);
 		}
 
 		bcopy(&ucp->uc_mcontext.mc_fs, regs, sizeof(*regs));
 	}
 
 #if defined(COMPAT_43)
 	if (ucp->uc_mcontext.mc_onstack & 1)
 		td->td_sigstk.ss_flags |= SS_ONSTACK;
 	else
 		td->td_sigstk.ss_flags &= ~SS_ONSTACK;
 #endif
 	kern_sigprocmask(td, SIG_SETMASK, &ucp->uc_sigmask, NULL, 0);
 	return (EJUSTRETURN);
 }
 #endif	/* COMPAT_FREEBSD4 */
 
 /*
  * MPSAFE
  */
 int
 sys_sigreturn(td, uap)
 	struct thread *td;
 	struct sigreturn_args /* {
 		const struct __ucontext *sigcntxp;
 	} */ *uap;
 {
 	ucontext_t uc;
 	struct proc *p;
 	struct trapframe *regs;
 	ucontext_t *ucp;
 	char *xfpustate;
 	size_t xfpustate_len;
 	int cs, eflags, error, ret;
 	ksiginfo_t ksi;
 
 	p = td->td_proc;
 
 	error = copyin(uap->sigcntxp, &uc, sizeof(uc));
 	if (error != 0)
 		return (error);
 	ucp = &uc;
 	if ((ucp->uc_mcontext.mc_flags & ~_MC_FLAG_MASK) != 0) {
 		uprintf("pid %d (%s): sigreturn mc_flags %x\n", p->p_pid,
 		    td->td_name, ucp->uc_mcontext.mc_flags);
 		return (EINVAL);
 	}
 	regs = td->td_frame;
 	eflags = ucp->uc_mcontext.mc_eflags;
 	if (eflags & PSL_VM) {
 		struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
 		struct vm86_kernel *vm86;
 
 		/*
 		 * if pcb_ext == 0 or vm86_inited == 0, the user hasn't
 		 * set up the vm86 area, and we can't enter vm86 mode.
 		 */
 		if (td->td_pcb->pcb_ext == 0)
 			return (EINVAL);
 		vm86 = &td->td_pcb->pcb_ext->ext_vm86;
 		if (vm86->vm86_inited == 0)
 			return (EINVAL);
 
 		/* Go back to user mode if both flags are set. */
 		if ((eflags & PSL_VIP) && (eflags & PSL_VIF)) {
 			ksiginfo_init_trap(&ksi);
 			ksi.ksi_signo = SIGBUS;
 			ksi.ksi_code = BUS_OBJERR;
 			ksi.ksi_addr = (void *)regs->tf_eip;
 			trapsignal(td, &ksi);
 		}
 
 		if (vm86->vm86_has_vme) {
 			eflags = (tf->tf_eflags & ~VME_USERCHANGE) |
 			    (eflags & VME_USERCHANGE) | PSL_VM;
 		} else {
 			vm86->vm86_eflags = eflags;	/* save VIF, VIP */
 			eflags = (tf->tf_eflags & ~VM_USERCHANGE) |
 			    (eflags & VM_USERCHANGE) | PSL_VM;
 		}
 		bcopy(&ucp->uc_mcontext.mc_fs, tf, sizeof(struct trapframe));
 		tf->tf_eflags = eflags;
 		tf->tf_vm86_ds = tf->tf_ds;
 		tf->tf_vm86_es = tf->tf_es;
 		tf->tf_vm86_fs = tf->tf_fs;
 		tf->tf_vm86_gs = ucp->uc_mcontext.mc_gs;
 		tf->tf_ds = _udatasel;
 		tf->tf_es = _udatasel;
 		tf->tf_fs = _udatasel;
 	} else {
 		/*
 		 * Don't allow users to change privileged or reserved flags.
 		 */
 		if (!EFL_SECURE(eflags, regs->tf_eflags)) {
 			uprintf("pid %d (%s): sigreturn eflags = 0x%x\n",
 			    td->td_proc->p_pid, td->td_name, eflags);
 	    		return (EINVAL);
 		}
 
 		/*
 		 * Don't allow users to load a valid privileged %cs.  Let the
 		 * hardware check for invalid selectors, excess privilege in
 		 * other selectors, invalid %eip's and invalid %esp's.
 		 */
 		cs = ucp->uc_mcontext.mc_cs;
 		if (!CS_SECURE(cs)) {
 			uprintf("pid %d (%s): sigreturn cs = 0x%x\n",
 			    td->td_proc->p_pid, td->td_name, cs);
 			ksiginfo_init_trap(&ksi);
 			ksi.ksi_signo = SIGBUS;
 			ksi.ksi_code = BUS_OBJERR;
 			ksi.ksi_trapno = T_PROTFLT;
 			ksi.ksi_addr = (void *)regs->tf_eip;
 			trapsignal(td, &ksi);
 			return (EINVAL);
 		}
 
 		if ((uc.uc_mcontext.mc_flags & _MC_HASFPXSTATE) != 0) {
 			xfpustate_len = uc.uc_mcontext.mc_xfpustate_len;
 			if (xfpustate_len > cpu_max_ext_state_size -
 			    sizeof(union savefpu)) {
 				uprintf(
 			    "pid %d (%s): sigreturn xfpusave_len = 0x%zx\n",
 				    p->p_pid, td->td_name, xfpustate_len);
 				return (EINVAL);
 			}
 			xfpustate = __builtin_alloca(xfpustate_len);
 			error = copyin((const void *)uc.uc_mcontext.mc_xfpustate,
 			    xfpustate, xfpustate_len);
 			if (error != 0) {
 				uprintf(
 	"pid %d (%s): sigreturn copying xfpustate failed\n",
 				    p->p_pid, td->td_name);
 				return (error);
 			}
 		} else {
 			xfpustate = NULL;
 			xfpustate_len = 0;
 		}
 		ret = set_fpcontext(td, &ucp->uc_mcontext, xfpustate,
 		    xfpustate_len);
 		if (ret != 0)
 			return (ret);
 		bcopy(&ucp->uc_mcontext.mc_fs, regs, sizeof(*regs));
 	}
 
 #if defined(COMPAT_43)
 	if (ucp->uc_mcontext.mc_onstack & 1)
 		td->td_sigstk.ss_flags |= SS_ONSTACK;
 	else
 		td->td_sigstk.ss_flags &= ~SS_ONSTACK;
 #endif
 
 	kern_sigprocmask(td, SIG_SETMASK, &ucp->uc_sigmask, NULL, 0);
 	return (EJUSTRETURN);
 }
 
 #ifdef COMPAT_43
 static void
 setup_priv_lcall_gate(struct proc *p)
 {
 	struct i386_ldt_args uap;
 	union descriptor desc;
 	u_int lcall_addr;
 
 	bzero(&uap, sizeof(uap));
 	uap.start = 0;
 	uap.num = 1;
 	lcall_addr = p->p_sysent->sv_psstrings - sz_lcall_tramp;
 	bzero(&desc, sizeof(desc));
 	desc.sd.sd_type = SDT_MEMERA;
 	desc.sd.sd_dpl = SEL_UPL;
 	desc.sd.sd_p = 1;
 	desc.sd.sd_def32 = 1;
 	desc.sd.sd_gran = 1;
 	desc.sd.sd_lolimit = 0xffff;
 	desc.sd.sd_hilimit = 0xf;
 	desc.sd.sd_lobase = lcall_addr;
 	desc.sd.sd_hibase = lcall_addr >> 24;
 	i386_set_ldt(curthread, &uap, &desc);
 }
 #endif
 
 /*
  * Reset the hardware debug registers if they were in use.
  * They won't have any meaning for the newly exec'd process.
  */
 void
 x86_clear_dbregs(struct pcb *pcb)
 {
         if ((pcb->pcb_flags & PCB_DBREGS) == 0)
 		return;
 
 	pcb->pcb_dr0 = 0;
 	pcb->pcb_dr1 = 0;
 	pcb->pcb_dr2 = 0;
 	pcb->pcb_dr3 = 0;
 	pcb->pcb_dr6 = 0;
 	pcb->pcb_dr7 = 0;
 
 	if (pcb == curpcb) {
 		/*
 		 * Clear the debug registers on the running CPU,
 		 * otherwise they will end up affecting the next
 		 * process we switch to.
 		 */
 		reset_dbregs();
 	}
 	pcb->pcb_flags &= ~PCB_DBREGS;
 }
 
 /*
  * Reset registers to default values on exec.
  */
 void
 exec_setregs(struct thread *td, struct image_params *imgp, uintptr_t stack)
 {
 	struct trapframe *regs;
 	struct pcb *pcb;
 	register_t saved_eflags;
 
 	regs = td->td_frame;
 	pcb = td->td_pcb;
 
 	/* Reset pc->pcb_gs and %gs before possibly invalidating it. */
 	pcb->pcb_gs = _udatasel;
 	load_gs(_udatasel);
 
 	mtx_lock_spin(&dt_lock);
 	if (td->td_proc->p_md.md_ldt != NULL)
 		user_ldt_free(td);
 	else
 		mtx_unlock_spin(&dt_lock);
 
 #ifdef COMPAT_43
 	if (td->td_proc->p_sysent->sv_psstrings !=
 	    elf32_freebsd_sysvec.sv_psstrings)
 		setup_priv_lcall_gate(td->td_proc);
 #endif
   
 	/*
 	 * Reset the fs and gs bases.  The values from the old address
 	 * space do not make sense for the new program.  In particular,
 	 * gsbase might be the TLS base for the old program but the new
 	 * program has no TLS now.
 	 */
 	set_fsbase(td, 0);
 	set_gsbase(td, 0);
 
 	/* Make sure edx is 0x0 on entry. Linux binaries depend on it. */
 	saved_eflags = regs->tf_eflags & PSL_T;
 	bzero((char *)regs, sizeof(struct trapframe));
 	regs->tf_eip = imgp->entry_addr;
 	regs->tf_esp = stack;
 	regs->tf_eflags = PSL_USER | saved_eflags;
 	regs->tf_ss = _udatasel;
 	regs->tf_ds = _udatasel;
 	regs->tf_es = _udatasel;
 	regs->tf_fs = _udatasel;
 	regs->tf_cs = _ucodesel;
 
 	/* PS_STRINGS value for BSD/OS binaries.  It is 0 for non-BSD/OS. */
 	regs->tf_ebx = (register_t)imgp->ps_strings;
 
 	x86_clear_dbregs(pcb);
 
 	pcb->pcb_initial_npxcw = __INITIAL_NPXCW__;
 
 	/*
 	 * Drop the FP state if we hold it, so that the process gets a
 	 * clean FP state if it uses the FPU again.
 	 */
 	fpstate_drop(td);
 }
 
 void
 cpu_setregs(void)
 {
 	unsigned int cr0;
 
 	cr0 = rcr0();
 
 	/*
 	 * CR0_MP, CR0_NE and CR0_TS are set for NPX (FPU) support:
 	 *
 	 * Prepare to trap all ESC (i.e., NPX) instructions and all WAIT
 	 * instructions.  We must set the CR0_MP bit and use the CR0_TS
 	 * bit to control the trap, because setting the CR0_EM bit does
 	 * not cause WAIT instructions to trap.  It's important to trap
 	 * WAIT instructions - otherwise the "wait" variants of no-wait
 	 * control instructions would degenerate to the "no-wait" variants
 	 * after FP context switches but work correctly otherwise.  It's
 	 * particularly important to trap WAITs when there is no NPX -
 	 * otherwise the "wait" variants would always degenerate.
 	 *
 	 * Try setting CR0_NE to get correct error reporting on 486DX's.
 	 * Setting it should fail or do nothing on lesser processors.
 	 */
 	cr0 |= CR0_MP | CR0_NE | CR0_TS | CR0_WP | CR0_AM;
 	load_cr0(cr0);
 	load_gs(_udatasel);
 }
 
 u_long bootdev;		/* not a struct cdev *- encoding is different */
 SYSCTL_ULONG(_machdep, OID_AUTO, guessed_bootdev,
 	CTLFLAG_RD, &bootdev, 0, "Maybe the Boot device (not in struct cdev *format)");
 
 /*
  * Initialize 386 and configure to run kernel
  */
 
 /*
  * Initialize segments & interrupt table
  */
 
 int _default_ldt;
 
 struct mtx dt_lock;			/* lock for GDT and LDT */
 
 union descriptor gdt0[NGDT];	/* initial global descriptor table */
 union descriptor *gdt = gdt0;	/* global descriptor table */
 
 union descriptor *ldt;		/* local descriptor table */
 
 static struct gate_descriptor idt0[NIDT];
 struct gate_descriptor *idt = &idt0[0];	/* interrupt descriptor table */
 
 static struct i386tss *dblfault_tss;
 static char *dblfault_stack;
 
 static struct i386tss common_tss0;
 
 vm_offset_t proc0kstack;
 
 /*
  * software prototypes -- in more palatable form.
  *
  * GCODE_SEL through GUDATA_SEL must be in this order for syscall/sysret
  * GUFS_SEL and GUGS_SEL must be in this order (swtch.s knows it)
  */
 struct soft_segment_descriptor gdt_segs[] = {
 /* GNULL_SEL	0 Null Descriptor */
 {	.ssd_base = 0x0,
 	.ssd_limit = 0x0,
 	.ssd_type = 0,
 	.ssd_dpl = SEL_KPL,
 	.ssd_p = 0,
 	.ssd_xx = 0, .ssd_xx1 = 0,
 	.ssd_def32 = 0,
 	.ssd_gran = 0		},
 /* GPRIV_SEL	1 SMP Per-Processor Private Data Descriptor */
 {	.ssd_base = 0x0,
 	.ssd_limit = 0xfffff,
 	.ssd_type = SDT_MEMRWA,
 	.ssd_dpl = SEL_KPL,
 	.ssd_p = 1,
 	.ssd_xx = 0, .ssd_xx1 = 0,
 	.ssd_def32 = 1,
 	.ssd_gran = 1		},
 /* GUFS_SEL	2 %fs Descriptor for user */
 {	.ssd_base = 0x0,
 	.ssd_limit = 0xfffff,
 	.ssd_type = SDT_MEMRWA,
 	.ssd_dpl = SEL_UPL,
 	.ssd_p = 1,
 	.ssd_xx = 0, .ssd_xx1 = 0,
 	.ssd_def32 = 1,
 	.ssd_gran = 1		},
 /* GUGS_SEL	3 %gs Descriptor for user */
 {	.ssd_base = 0x0,
 	.ssd_limit = 0xfffff,
 	.ssd_type = SDT_MEMRWA,
 	.ssd_dpl = SEL_UPL,
 	.ssd_p = 1,
 	.ssd_xx = 0, .ssd_xx1 = 0,
 	.ssd_def32 = 1,
 	.ssd_gran = 1		},
 /* GCODE_SEL	4 Code Descriptor for kernel */
 {	.ssd_base = 0x0,
 	.ssd_limit = 0xfffff,
 	.ssd_type = SDT_MEMERA,
 	.ssd_dpl = SEL_KPL,
 	.ssd_p = 1,
 	.ssd_xx = 0, .ssd_xx1 = 0,
 	.ssd_def32 = 1,
 	.ssd_gran = 1		},
 /* GDATA_SEL	5 Data Descriptor for kernel */
 {	.ssd_base = 0x0,
 	.ssd_limit = 0xfffff,
 	.ssd_type = SDT_MEMRWA,
 	.ssd_dpl = SEL_KPL,
 	.ssd_p = 1,
 	.ssd_xx = 0, .ssd_xx1 = 0,
 	.ssd_def32 = 1,
 	.ssd_gran = 1		},
 /* GUCODE_SEL	6 Code Descriptor for user */
 {	.ssd_base = 0x0,
 	.ssd_limit = 0xfffff,
 	.ssd_type = SDT_MEMERA,
 	.ssd_dpl = SEL_UPL,
 	.ssd_p = 1,
 	.ssd_xx = 0, .ssd_xx1 = 0,
 	.ssd_def32 = 1,
 	.ssd_gran = 1		},
 /* GUDATA_SEL	7 Data Descriptor for user */
 {	.ssd_base = 0x0,
 	.ssd_limit = 0xfffff,
 	.ssd_type = SDT_MEMRWA,
 	.ssd_dpl = SEL_UPL,
 	.ssd_p = 1,
 	.ssd_xx = 0, .ssd_xx1 = 0,
 	.ssd_def32 = 1,
 	.ssd_gran = 1		},
 /* GBIOSLOWMEM_SEL 8 BIOS access to realmode segment 0x40, must be #8 in GDT */
 {	.ssd_base = 0x400,
 	.ssd_limit = 0xfffff,
 	.ssd_type = SDT_MEMRWA,
 	.ssd_dpl = SEL_KPL,
 	.ssd_p = 1,
 	.ssd_xx = 0, .ssd_xx1 = 0,
 	.ssd_def32 = 1,
 	.ssd_gran = 1		},
 /* GPROC0_SEL	9 Proc 0 Tss Descriptor */
 {
 	.ssd_base = 0x0,
 	.ssd_limit = sizeof(struct i386tss)-1,
 	.ssd_type = SDT_SYS386TSS,
 	.ssd_dpl = 0,
 	.ssd_p = 1,
 	.ssd_xx = 0, .ssd_xx1 = 0,
 	.ssd_def32 = 0,
 	.ssd_gran = 0		},
 /* GLDT_SEL	10 LDT Descriptor */
 {	.ssd_base = 0,
 	.ssd_limit = sizeof(union descriptor) * NLDT - 1,
 	.ssd_type = SDT_SYSLDT,
 	.ssd_dpl = SEL_UPL,
 	.ssd_p = 1,
 	.ssd_xx = 0, .ssd_xx1 = 0,
 	.ssd_def32 = 0,
 	.ssd_gran = 0		},
 /* GUSERLDT_SEL	11 User LDT Descriptor per process */
 {	.ssd_base = 0,
 	.ssd_limit = (512 * sizeof(union descriptor)-1),
 	.ssd_type = SDT_SYSLDT,
 	.ssd_dpl = 0,
 	.ssd_p = 1,
 	.ssd_xx = 0, .ssd_xx1 = 0,
 	.ssd_def32 = 0,
 	.ssd_gran = 0		},
 /* GPANIC_SEL	12 Panic Tss Descriptor */
 {	.ssd_base = 0,
 	.ssd_limit = sizeof(struct i386tss)-1,
 	.ssd_type = SDT_SYS386TSS,
 	.ssd_dpl = 0,
 	.ssd_p = 1,
 	.ssd_xx = 0, .ssd_xx1 = 0,
 	.ssd_def32 = 0,
 	.ssd_gran = 0		},
 /* GBIOSCODE32_SEL 13 BIOS 32-bit interface (32bit Code) */
 {	.ssd_base = 0,
 	.ssd_limit = 0xfffff,
 	.ssd_type = SDT_MEMERA,
 	.ssd_dpl = 0,
 	.ssd_p = 1,
 	.ssd_xx = 0, .ssd_xx1 = 0,
 	.ssd_def32 = 0,
 	.ssd_gran = 1		},
 /* GBIOSCODE16_SEL 14 BIOS 32-bit interface (16bit Code) */
 {	.ssd_base = 0,
 	.ssd_limit = 0xfffff,
 	.ssd_type = SDT_MEMERA,
 	.ssd_dpl = 0,
 	.ssd_p = 1,
 	.ssd_xx = 0, .ssd_xx1 = 0,
 	.ssd_def32 = 0,
 	.ssd_gran = 1		},
 /* GBIOSDATA_SEL 15 BIOS 32-bit interface (Data) */
 {	.ssd_base = 0,
 	.ssd_limit = 0xfffff,
 	.ssd_type = SDT_MEMRWA,
 	.ssd_dpl = 0,
 	.ssd_p = 1,
 	.ssd_xx = 0, .ssd_xx1 = 0,
 	.ssd_def32 = 1,
 	.ssd_gran = 1		},
 /* GBIOSUTIL_SEL 16 BIOS 16-bit interface (Utility) */
 {	.ssd_base = 0,
 	.ssd_limit = 0xfffff,
 	.ssd_type = SDT_MEMRWA,
 	.ssd_dpl = 0,
 	.ssd_p = 1,
 	.ssd_xx = 0, .ssd_xx1 = 0,
 	.ssd_def32 = 0,
 	.ssd_gran = 1		},
 /* GBIOSARGS_SEL 17 BIOS 16-bit interface (Arguments) */
 {	.ssd_base = 0,
 	.ssd_limit = 0xfffff,
 	.ssd_type = SDT_MEMRWA,
 	.ssd_dpl = 0,
 	.ssd_p = 1,
 	.ssd_xx = 0, .ssd_xx1 = 0,
 	.ssd_def32 = 0,
 	.ssd_gran = 1		},
 /* GNDIS_SEL	18 NDIS Descriptor */
 {	.ssd_base = 0x0,
 	.ssd_limit = 0x0,
 	.ssd_type = 0,
 	.ssd_dpl = 0,
 	.ssd_p = 0,
 	.ssd_xx = 0, .ssd_xx1 = 0,
 	.ssd_def32 = 0,
 	.ssd_gran = 0		},
 };
 
 static struct soft_segment_descriptor ldt_segs[] = {
 	/* Null Descriptor - overwritten by call gate */
 {	.ssd_base = 0x0,
 	.ssd_limit = 0x0,
 	.ssd_type = 0,
 	.ssd_dpl = 0,
 	.ssd_p = 0,
 	.ssd_xx = 0, .ssd_xx1 = 0,
 	.ssd_def32 = 0,
 	.ssd_gran = 0		},
 	/* Null Descriptor - overwritten by call gate */
 {	.ssd_base = 0x0,
 	.ssd_limit = 0x0,
 	.ssd_type = 0,
 	.ssd_dpl = 0,
 	.ssd_p = 0,
 	.ssd_xx = 0, .ssd_xx1 = 0,
 	.ssd_def32 = 0,
 	.ssd_gran = 0		},
 	/* Null Descriptor - overwritten by call gate */
 {	.ssd_base = 0x0,
 	.ssd_limit = 0x0,
 	.ssd_type = 0,
 	.ssd_dpl = 0,
 	.ssd_p = 0,
 	.ssd_xx = 0, .ssd_xx1 = 0,
 	.ssd_def32 = 0,
 	.ssd_gran = 0		},
 	/* Code Descriptor for user */
 {	.ssd_base = 0x0,
 	.ssd_limit = 0xfffff,
 	.ssd_type = SDT_MEMERA,
 	.ssd_dpl = SEL_UPL,
 	.ssd_p = 1,
 	.ssd_xx = 0, .ssd_xx1 = 0,
 	.ssd_def32 = 1,
 	.ssd_gran = 1		},
 	/* Null Descriptor - overwritten by call gate */
 {	.ssd_base = 0x0,
 	.ssd_limit = 0x0,
 	.ssd_type = 0,
 	.ssd_dpl = 0,
 	.ssd_p = 0,
 	.ssd_xx = 0, .ssd_xx1 = 0,
 	.ssd_def32 = 0,
 	.ssd_gran = 0		},
 	/* Data Descriptor for user */
 {	.ssd_base = 0x0,
 	.ssd_limit = 0xfffff,
 	.ssd_type = SDT_MEMRWA,
 	.ssd_dpl = SEL_UPL,
 	.ssd_p = 1,
 	.ssd_xx = 0, .ssd_xx1 = 0,
 	.ssd_def32 = 1,
 	.ssd_gran = 1		},
 };
 
 size_t setidt_disp;
 
 void
 setidt(int idx, inthand_t *func, int typ, int dpl, int selec)
 {
 	uintptr_t off;
 
 	off = func != NULL ? (uintptr_t)func + setidt_disp : 0;
 	setidt_nodisp(idx, off, typ, dpl, selec);
 }
 
 void
 setidt_nodisp(int idx, uintptr_t off, int typ, int dpl, int selec)
 {
 	struct gate_descriptor *ip;
 
 	ip = idt + idx;
 	ip->gd_looffset = off;
 	ip->gd_selector = selec;
 	ip->gd_stkcpy = 0;
 	ip->gd_xx = 0;
 	ip->gd_type = typ;
 	ip->gd_dpl = dpl;
 	ip->gd_p = 1;
 	ip->gd_hioffset = ((u_int)off) >> 16 ;
 }
 
 extern inthand_t
 	IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl),
 	IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(fpusegm),
 	IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot),
 	IDTVEC(page), IDTVEC(mchk), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align),
 	IDTVEC(xmm),
 #ifdef KDTRACE_HOOKS
 	IDTVEC(dtrace_ret),
 #endif
 #ifdef XENHVM
 	IDTVEC(xen_intr_upcall),
 #endif
 	IDTVEC(int0x80_syscall);
 
 #ifdef DDB
 /*
  * Display the index and function name of any IDT entries that don't use
  * the default 'rsvd' entry point.
  */
 DB_SHOW_COMMAND(idt, db_show_idt)
 {
 	struct gate_descriptor *ip;
 	int idx;
 	uintptr_t func, func_trm;
 	bool trm;
 
 	ip = idt;
 	for (idx = 0; idx < NIDT && !db_pager_quit; idx++) {
 		if (ip->gd_type == SDT_SYSTASKGT) {
 			db_printf("%3d\t<TASK>\n", idx);
 		} else {
 			func = (ip->gd_hioffset << 16 | ip->gd_looffset);
 			if (func >= PMAP_TRM_MIN_ADDRESS) {
 				func_trm = func;
 				func -= setidt_disp;
 				trm = true;
 			} else
 				trm = false;
 			if (func != (uintptr_t)&IDTVEC(rsvd)) {
 				db_printf("%3d\t", idx);
 				db_printsym(func, DB_STGY_PROC);
 				if (trm)
 					db_printf(" (trampoline %#x)",
 					    func_trm);
 				db_printf("\n");
 			}
 		}
 		ip++;
 	}
 }
 
 /* Show privileged registers. */
 DB_SHOW_COMMAND(sysregs, db_show_sysregs)
 {
 	uint64_t idtr, gdtr;
 
 	idtr = ridt();
 	db_printf("idtr\t0x%08x/%04x\n",
 	    (u_int)(idtr >> 16), (u_int)idtr & 0xffff);
 	gdtr = rgdt();
 	db_printf("gdtr\t0x%08x/%04x\n",
 	    (u_int)(gdtr >> 16), (u_int)gdtr & 0xffff);
 	db_printf("ldtr\t0x%04x\n", rldt());
 	db_printf("tr\t0x%04x\n", rtr());
 	db_printf("cr0\t0x%08x\n", rcr0());
 	db_printf("cr2\t0x%08x\n", rcr2());
 	db_printf("cr3\t0x%08x\n", rcr3());
 	db_printf("cr4\t0x%08x\n", rcr4());
 	if (rcr4() & CR4_XSAVE)
 		db_printf("xcr0\t0x%016llx\n", rxcr(0));
 	if (amd_feature & (AMDID_NX | AMDID_LM))
 		db_printf("EFER\t0x%016llx\n", rdmsr(MSR_EFER));
 	if (cpu_feature2 & (CPUID2_VMX | CPUID2_SMX))
 		db_printf("FEATURES_CTL\t0x%016llx\n",
 		    rdmsr(MSR_IA32_FEATURE_CONTROL));
 	if (((cpu_vendor_id == CPU_VENDOR_INTEL ||
 	    cpu_vendor_id == CPU_VENDOR_AMD) && CPUID_TO_FAMILY(cpu_id) >= 6) ||
 	    cpu_vendor_id == CPU_VENDOR_HYGON)
 		db_printf("DEBUG_CTL\t0x%016llx\n", rdmsr(MSR_DEBUGCTLMSR));
 	if (cpu_feature & CPUID_PAT)
 		db_printf("PAT\t0x%016llx\n", rdmsr(MSR_PAT));
 }
 
 DB_SHOW_COMMAND(dbregs, db_show_dbregs)
 {
 
 	db_printf("dr0\t0x%08x\n", rdr0());
 	db_printf("dr1\t0x%08x\n", rdr1());
 	db_printf("dr2\t0x%08x\n", rdr2());
 	db_printf("dr3\t0x%08x\n", rdr3());
 	db_printf("dr6\t0x%08x\n", rdr6());
 	db_printf("dr7\t0x%08x\n", rdr7());	
 }
 
 DB_SHOW_COMMAND(frame, db_show_frame)
 {
 	struct trapframe *frame;
 
 	frame = have_addr ? (struct trapframe *)addr : curthread->td_frame;
 	printf("ss %#x esp %#x efl %#x cs %#x eip %#x\n",
 	    frame->tf_ss, frame->tf_esp, frame->tf_eflags, frame->tf_cs,
 	    frame->tf_eip);
 	printf("err %#x trapno %d\n", frame->tf_err, frame->tf_trapno);
 	printf("ds %#x es %#x fs %#x\n",
 	    frame->tf_ds, frame->tf_es, frame->tf_fs);
 	printf("eax %#x ecx %#x edx %#x ebx %#x\n",
 	    frame->tf_eax, frame->tf_ecx, frame->tf_edx, frame->tf_ebx);
 	printf("ebp %#x esi %#x edi %#x\n",
 	    frame->tf_ebp, frame->tf_esi, frame->tf_edi);
 
 }
 #endif
 
 void
 sdtossd(sd, ssd)
 	struct segment_descriptor *sd;
 	struct soft_segment_descriptor *ssd;
 {
 	ssd->ssd_base  = (sd->sd_hibase << 24) | sd->sd_lobase;
 	ssd->ssd_limit = (sd->sd_hilimit << 16) | sd->sd_lolimit;
 	ssd->ssd_type  = sd->sd_type;
 	ssd->ssd_dpl   = sd->sd_dpl;
 	ssd->ssd_p     = sd->sd_p;
 	ssd->ssd_def32 = sd->sd_def32;
 	ssd->ssd_gran  = sd->sd_gran;
 }
 
 static int
 add_physmap_entry(uint64_t base, uint64_t length, vm_paddr_t *physmap,
     int *physmap_idxp)
 {
 	uint64_t lim, ign;
 	int i, insert_idx, physmap_idx;
 
 	physmap_idx = *physmap_idxp;
 
 	if (length == 0)
 		return (1);
 
 	lim = 0x100000000;					/*  4G */
 	if (pae_mode && above4g_allow)
 		lim = above24g_allow ? -1ULL : 0x600000000;	/* 24G */
 	if (base >= lim) {
 		printf("%uK of memory above %uGB ignored, pae %d "
 		    "above4g_allow %d above24g_allow %d\n",
 		    (u_int)(length / 1024), (u_int)(lim >> 30), pae_mode,
 		    above4g_allow, above24g_allow);
 		return (1);
 	}
 	if (base + length >= lim) {
 		ign = base + length - lim;
 		length -= ign;
 		printf("%uK of memory above %uGB ignored, pae %d "
 		    "above4g_allow %d above24g_allow %d\n",
 		    (u_int)(ign / 1024), (u_int)(lim >> 30), pae_mode,
 		    above4g_allow, above24g_allow);
 	}
 
 	/*
 	 * Find insertion point while checking for overlap.  Start off by
 	 * assuming the new entry will be added to the end.
 	 */
 	insert_idx = physmap_idx + 2;
 	for (i = 0; i <= physmap_idx; i += 2) {
 		if (base < physmap[i + 1]) {
 			if (base + length <= physmap[i]) {
 				insert_idx = i;
 				break;
 			}
 			if (boothowto & RB_VERBOSE)
 				printf(
 		    "Overlapping memory regions, ignoring second region\n");
 			return (1);
 		}
 	}
 
 	/* See if we can prepend to the next entry. */
 	if (insert_idx <= physmap_idx && base + length == physmap[insert_idx]) {
 		physmap[insert_idx] = base;
 		return (1);
 	}
 
 	/* See if we can append to the previous entry. */
 	if (insert_idx > 0 && base == physmap[insert_idx - 1]) {
 		physmap[insert_idx - 1] += length;
 		return (1);
 	}
 
 	physmap_idx += 2;
 	*physmap_idxp = physmap_idx;
 	if (physmap_idx == PHYS_AVAIL_ENTRIES) {
 		printf(
 		"Too many segments in the physical address map, giving up\n");
 		return (0);
 	}
 
 	/*
 	 * Move the last 'N' entries down to make room for the new
 	 * entry if needed.
 	 */
 	for (i = physmap_idx; i > insert_idx; i -= 2) {
 		physmap[i] = physmap[i - 2];
 		physmap[i + 1] = physmap[i - 1];
 	}
 
 	/* Insert the new entry. */
 	physmap[insert_idx] = base;
 	physmap[insert_idx + 1] = base + length;
 	return (1);
 }
 
 static int
 add_smap_entry(struct bios_smap *smap, vm_paddr_t *physmap, int *physmap_idxp)
 {
 	if (boothowto & RB_VERBOSE)
 		printf("SMAP type=%02x base=%016llx len=%016llx\n",
 		    smap->type, smap->base, smap->length);
 
 	if (smap->type != SMAP_TYPE_MEMORY)
 		return (1);
 
 	return (add_physmap_entry(smap->base, smap->length, physmap,
 	    physmap_idxp));
 }
 
 static void
 add_smap_entries(struct bios_smap *smapbase, vm_paddr_t *physmap,
     int *physmap_idxp)
 {
 	struct bios_smap *smap, *smapend;
 	u_int32_t smapsize;
 	/*
 	 * Memory map from INT 15:E820.
 	 *
 	 * subr_module.c says:
 	 * "Consumer may safely assume that size value precedes data."
 	 * ie: an int32_t immediately precedes SMAP.
 	 */
 	smapsize = *((u_int32_t *)smapbase - 1);
 	smapend = (struct bios_smap *)((uintptr_t)smapbase + smapsize);
 
 	for (smap = smapbase; smap < smapend; smap++)
 		if (!add_smap_entry(smap, physmap, physmap_idxp))
 			break;
 }
 
 static void
 basemem_setup(void)
 {
 
 	if (basemem > 640) {
 		printf("Preposterous BIOS basemem of %uK, truncating to 640K\n",
 			basemem);
 		basemem = 640;
 	}
 
 	pmap_basemem_setup(basemem);
 }
 
 /*
  * Populate the (physmap) array with base/bound pairs describing the
  * available physical memory in the system, then test this memory and
  * build the phys_avail array describing the actually-available memory.
  *
  * If we cannot accurately determine the physical memory map, then use
  * value from the 0xE801 call, and failing that, the RTC.
  *
  * Total memory size may be set by the kernel environment variable
  * hw.physmem or the compile-time define MAXMEM.
  *
  * XXX first should be vm_paddr_t.
  */
 static void
 getmemsize(int first)
 {
 	int has_smap, off, physmap_idx, pa_indx, da_indx;
 	u_long memtest;
 	vm_paddr_t physmap[PHYS_AVAIL_ENTRIES];
 	quad_t dcons_addr, dcons_size, physmem_tunable;
 	int hasbrokenint12, i, res;
 	u_int extmem;
 	struct vm86frame vmf;
 	struct vm86context vmc;
 	vm_paddr_t pa;
 	struct bios_smap *smap, *smapbase;
 	caddr_t kmdp;
 
 	has_smap = 0;
 	bzero(&vmf, sizeof(vmf));
 	bzero(physmap, sizeof(physmap));
 	basemem = 0;
 
 	/*
 	 * Tell the physical memory allocator about pages used to store
 	 * the kernel and preloaded data.  See kmem_bootstrap_free().
 	 */
 	vm_phys_early_add_seg((vm_paddr_t)KERNLOAD, trunc_page(first));
 
 	TUNABLE_INT_FETCH("hw.above4g_allow", &above4g_allow);
 	TUNABLE_INT_FETCH("hw.above24g_allow", &above24g_allow);
 
 	/*
 	 * Check if the loader supplied an SMAP memory map.  If so,
 	 * use that and do not make any VM86 calls.
 	 */
 	physmap_idx = 0;
 	kmdp = preload_search_by_type("elf kernel");
 	if (kmdp == NULL)
 		kmdp = preload_search_by_type("elf32 kernel");
 	smapbase = (struct bios_smap *)preload_search_info(kmdp,
 	    MODINFO_METADATA | MODINFOMD_SMAP);
 	if (smapbase != NULL) {
 		add_smap_entries(smapbase, physmap, &physmap_idx);
 		has_smap = 1;
 		goto have_smap;
 	}
 
 	/*
 	 * Some newer BIOSes have a broken INT 12H implementation
 	 * which causes a kernel panic immediately.  In this case, we
 	 * need use the SMAP to determine the base memory size.
 	 */
 	hasbrokenint12 = 0;
 	TUNABLE_INT_FETCH("hw.hasbrokenint12", &hasbrokenint12);
 	if (hasbrokenint12 == 0) {
 		/* Use INT12 to determine base memory size. */
 		vm86_intcall(0x12, &vmf);
 		basemem = vmf.vmf_ax;
 		basemem_setup();
 	}
 
 	/*
 	 * Fetch the memory map with INT 15:E820.  Map page 1 R/W into
 	 * the kernel page table so we can use it as a buffer.  The
 	 * kernel will unmap this page later.
 	 */
 	vmc.npages = 0;
 	smap = (void *)vm86_addpage(&vmc, 1, PMAP_MAP_LOW + ptoa(1));
 	res = vm86_getptr(&vmc, (vm_offset_t)smap, &vmf.vmf_es, &vmf.vmf_di);
 	KASSERT(res != 0, ("vm86_getptr() failed: address not found"));
 
 	vmf.vmf_ebx = 0;
 	do {
 		vmf.vmf_eax = 0xE820;
 		vmf.vmf_edx = SMAP_SIG;
 		vmf.vmf_ecx = sizeof(struct bios_smap);
 		i = vm86_datacall(0x15, &vmf, &vmc);
 		if (i || vmf.vmf_eax != SMAP_SIG)
 			break;
 		has_smap = 1;
 		if (!add_smap_entry(smap, physmap, &physmap_idx))
 			break;
 	} while (vmf.vmf_ebx != 0);
 
 have_smap:
 	/*
 	 * If we didn't fetch the "base memory" size from INT12,
 	 * figure it out from the SMAP (or just guess).
 	 */
 	if (basemem == 0) {
 		for (i = 0; i <= physmap_idx; i += 2) {
 			if (physmap[i] == 0x00000000) {
 				basemem = physmap[i + 1] / 1024;
 				break;
 			}
 		}
 
 		/* XXX: If we couldn't find basemem from SMAP, just guess. */
 		if (basemem == 0)
 			basemem = 640;
 		basemem_setup();
 	}
 
 	if (physmap[1] != 0)
 		goto physmap_done;
 
 	/*
 	 * If we failed to find an SMAP, figure out the extended
 	 * memory size.  We will then build a simple memory map with
 	 * two segments, one for "base memory" and the second for
 	 * "extended memory".  Note that "extended memory" starts at a
 	 * physical address of 1MB and that both basemem and extmem
 	 * are in units of 1KB.
 	 *
 	 * First, try to fetch the extended memory size via INT 15:E801.
 	 */
 	vmf.vmf_ax = 0xE801;
 	if (vm86_intcall(0x15, &vmf) == 0) {
 		extmem = vmf.vmf_cx + vmf.vmf_dx * 64;
 	} else {
 		/*
 		 * If INT15:E801 fails, this is our last ditch effort
 		 * to determine the extended memory size.  Currently
 		 * we prefer the RTC value over INT15:88.
 		 */
 #if 0
 		vmf.vmf_ah = 0x88;
 		vm86_intcall(0x15, &vmf);
 		extmem = vmf.vmf_ax;
 #else
 		extmem = rtcin(RTC_EXTLO) + (rtcin(RTC_EXTHI) << 8);
 #endif
 	}
 
 	/*
 	 * Special hack for chipsets that still remap the 384k hole when
 	 * there's 16MB of memory - this really confuses people that
 	 * are trying to use bus mastering ISA controllers with the
 	 * "16MB limit"; they only have 16MB, but the remapping puts
 	 * them beyond the limit.
 	 *
 	 * If extended memory is between 15-16MB (16-17MB phys address range),
 	 *	chop it to 15MB.
 	 */
 	if ((extmem > 15 * 1024) && (extmem < 16 * 1024))
 		extmem = 15 * 1024;
 
 	physmap[0] = 0;
 	physmap[1] = basemem * 1024;
 	physmap_idx = 2;
 	physmap[physmap_idx] = 0x100000;
 	physmap[physmap_idx + 1] = physmap[physmap_idx] + extmem * 1024;
 
 physmap_done:
 	/*
 	 * Now, physmap contains a map of physical memory.
 	 */
 
 #ifdef SMP
 	/* make hole for AP bootstrap code */
 	alloc_ap_trampoline(physmap, &physmap_idx);
 #endif
 
 	/*
 	 * Maxmem isn't the "maximum memory", it's one larger than the
 	 * highest page of the physical address space.  It should be
 	 * called something like "Maxphyspage".  We may adjust this 
 	 * based on ``hw.physmem'' and the results of the memory test.
 	 *
 	 * This is especially confusing when it is much larger than the
 	 * memory size and is displayed as "realmem".
 	 */
 	Maxmem = atop(physmap[physmap_idx + 1]);
 
 #ifdef MAXMEM
 	Maxmem = MAXMEM / 4;
 #endif
 
 	if (TUNABLE_QUAD_FETCH("hw.physmem", &physmem_tunable))
 		Maxmem = atop(physmem_tunable);
 
 	/*
 	 * If we have an SMAP, don't allow MAXMEM or hw.physmem to extend
 	 * the amount of memory in the system.
 	 */
 	if (has_smap && Maxmem > atop(physmap[physmap_idx + 1]))
 		Maxmem = atop(physmap[physmap_idx + 1]);
 
 	/*
 	 * The boot memory test is disabled by default, as it takes a
 	 * significant amount of time on large-memory systems, and is
 	 * unfriendly to virtual machines as it unnecessarily touches all
 	 * pages.
 	 *
 	 * A general name is used as the code may be extended to support
 	 * additional tests beyond the current "page present" test.
 	 */
 	memtest = 0;
 	TUNABLE_ULONG_FETCH("hw.memtest.tests", &memtest);
 
 	if (atop(physmap[physmap_idx + 1]) != Maxmem &&
 	    (boothowto & RB_VERBOSE))
 		printf("Physical memory use set to %ldK\n", Maxmem * 4);
 
 	/*
 	 * If Maxmem has been increased beyond what the system has detected,
 	 * extend the last memory segment to the new limit.
 	 */ 
 	if (atop(physmap[physmap_idx + 1]) < Maxmem)
 		physmap[physmap_idx + 1] = ptoa((vm_paddr_t)Maxmem);
 
 	/* call pmap initialization to make new kernel address space */
 	pmap_bootstrap(first);
 
 	/*
 	 * Size up each available chunk of physical memory.
 	 */
 	physmap[0] = PAGE_SIZE;		/* mask off page 0 */
 	pa_indx = 0;
 	da_indx = 1;
 	phys_avail[pa_indx++] = physmap[0];
 	phys_avail[pa_indx] = physmap[0];
 	dump_avail[da_indx] = physmap[0];
 
 	/*
 	 * Get dcons buffer address
 	 */
 	if (getenv_quad("dcons.addr", &dcons_addr) == 0 ||
 	    getenv_quad("dcons.size", &dcons_size) == 0)
 		dcons_addr = 0;
 
 	/*
 	 * physmap is in bytes, so when converting to page boundaries,
 	 * round up the start address and round down the end address.
 	 */
 	for (i = 0; i <= physmap_idx; i += 2) {
 		vm_paddr_t end;
 
 		end = ptoa((vm_paddr_t)Maxmem);
 		if (physmap[i + 1] < end)
 			end = trunc_page(physmap[i + 1]);
 		for (pa = round_page(physmap[i]); pa < end; pa += PAGE_SIZE) {
 			int tmp, page_bad, full;
 			int *ptr;
 
 			full = FALSE;
 			/*
 			 * block out kernel memory as not available.
 			 */
 			if (pa >= KERNLOAD && pa < first)
 				goto do_dump_avail;
 
 			/*
 			 * block out dcons buffer
 			 */
 			if (dcons_addr > 0
 			    && pa >= trunc_page(dcons_addr)
 			    && pa < dcons_addr + dcons_size)
 				goto do_dump_avail;
 
 			page_bad = FALSE;
 			if (memtest == 0)
 				goto skip_memtest;
 
 			/*
 			 * map page into kernel: valid, read/write,non-cacheable
 			 */
 			ptr = (int *)pmap_cmap3(pa, PG_V | PG_RW | PG_N);
 
 			tmp = *(int *)ptr;
 			/*
 			 * Test for alternating 1's and 0's
 			 */
 			*(volatile int *)ptr = 0xaaaaaaaa;
 			if (*(volatile int *)ptr != 0xaaaaaaaa)
 				page_bad = TRUE;
 			/*
 			 * Test for alternating 0's and 1's
 			 */
 			*(volatile int *)ptr = 0x55555555;
 			if (*(volatile int *)ptr != 0x55555555)
 				page_bad = TRUE;
 			/*
 			 * Test for all 1's
 			 */
 			*(volatile int *)ptr = 0xffffffff;
 			if (*(volatile int *)ptr != 0xffffffff)
 				page_bad = TRUE;
 			/*
 			 * Test for all 0's
 			 */
 			*(volatile int *)ptr = 0x0;
 			if (*(volatile int *)ptr != 0x0)
 				page_bad = TRUE;
 			/*
 			 * Restore original value.
 			 */
 			*(int *)ptr = tmp;
 
 skip_memtest:
 			/*
 			 * Adjust array of valid/good pages.
 			 */
 			if (page_bad == TRUE)
 				continue;
 			/*
 			 * If this good page is a continuation of the
 			 * previous set of good pages, then just increase
 			 * the end pointer. Otherwise start a new chunk.
 			 * Note that "end" points one higher than end,
 			 * making the range >= start and < end.
 			 * If we're also doing a speculative memory
 			 * test and we at or past the end, bump up Maxmem
 			 * so that we keep going. The first bad page
 			 * will terminate the loop.
 			 */
 			if (phys_avail[pa_indx] == pa) {
 				phys_avail[pa_indx] += PAGE_SIZE;
 			} else {
 				pa_indx++;
 				if (pa_indx == PHYS_AVAIL_ENTRIES) {
 					printf(
 		"Too many holes in the physical address space, giving up\n");
 					pa_indx--;
 					full = TRUE;
 					goto do_dump_avail;
 				}
 				phys_avail[pa_indx++] = pa;	/* start */
 				phys_avail[pa_indx] = pa + PAGE_SIZE; /* end */
 			}
 			physmem++;
 do_dump_avail:
 			if (dump_avail[da_indx] == pa) {
 				dump_avail[da_indx] += PAGE_SIZE;
 			} else {
 				da_indx++;
 				if (da_indx == PHYS_AVAIL_ENTRIES) {
 					da_indx--;
 					goto do_next;
 				}
 				dump_avail[da_indx++] = pa;	/* start */
 				dump_avail[da_indx] = pa + PAGE_SIZE; /* end */
 			}
 do_next:
 			if (full)
 				break;
 		}
 	}
 	pmap_cmap3(0, 0);
 
 	/*
 	 * XXX
 	 * The last chunk must contain at least one page plus the message
 	 * buffer to avoid complicating other code (message buffer address
 	 * calculation, etc.).
 	 */
 	while (phys_avail[pa_indx - 1] + PAGE_SIZE +
 	    round_page(msgbufsize) >= phys_avail[pa_indx]) {
 		physmem -= atop(phys_avail[pa_indx] - phys_avail[pa_indx - 1]);
 		phys_avail[pa_indx--] = 0;
 		phys_avail[pa_indx--] = 0;
 	}
 
 	Maxmem = atop(phys_avail[pa_indx]);
 
 	/* Trim off space for the message buffer. */
 	phys_avail[pa_indx] -= round_page(msgbufsize);
 
 	/* Map the message buffer. */
 	for (off = 0; off < round_page(msgbufsize); off += PAGE_SIZE)
 		pmap_kenter((vm_offset_t)msgbufp + off, phys_avail[pa_indx] +
 		    off);
 }
 
 static void
 i386_kdb_init(void)
 {
 #ifdef DDB
 	db_fetch_ksymtab(bootinfo.bi_symtab, bootinfo.bi_esymtab, 0);
 #endif
 	kdb_init();
 #ifdef KDB
 	if (boothowto & RB_KDB)
 		kdb_enter(KDB_WHY_BOOTFLAGS, "Boot flags requested debugger");
 #endif
 }
 
 static void
 fixup_idt(void)
 {
 	struct gate_descriptor *ip;
 	uintptr_t off;
 	int x;
 
 	for (x = 0; x < NIDT; x++) {
 		ip = &idt[x];
 		if (ip->gd_type != SDT_SYS386IGT &&
 		    ip->gd_type != SDT_SYS386TGT)
 			continue;
 		off = ip->gd_looffset + (((u_int)ip->gd_hioffset) << 16);
 		KASSERT(off >= (uintptr_t)start_exceptions &&
 		    off < (uintptr_t)end_exceptions,
 		    ("IDT[%d] type %d off %#x", x, ip->gd_type, off));
 		off += setidt_disp;
 		MPASS(off >= PMAP_TRM_MIN_ADDRESS &&
 		    off < PMAP_TRM_MAX_ADDRESS);
 		ip->gd_looffset = off;
 		ip->gd_hioffset = off >> 16;
 	}
 }
 
 static void
 i386_setidt1(void)
 {
 	int x;
 
 	/* exceptions */
 	for (x = 0; x < NIDT; x++)
 		setidt(x, &IDTVEC(rsvd), SDT_SYS386IGT, SEL_KPL,
 		    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(IDT_DE, &IDTVEC(div), SDT_SYS386IGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(IDT_DB, &IDTVEC(dbg), SDT_SYS386IGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(IDT_NMI, &IDTVEC(nmi), SDT_SYS386IGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(IDT_BP, &IDTVEC(bpt), SDT_SYS386IGT, SEL_UPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(IDT_OF, &IDTVEC(ofl), SDT_SYS386IGT, SEL_UPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(IDT_BR, &IDTVEC(bnd), SDT_SYS386IGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(IDT_UD, &IDTVEC(ill), SDT_SYS386IGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(IDT_NM, &IDTVEC(dna), SDT_SYS386IGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(IDT_DF, 0, SDT_SYSTASKGT, SEL_KPL, GSEL(GPANIC_SEL,
 	    SEL_KPL));
 	setidt(IDT_FPUGP, &IDTVEC(fpusegm), SDT_SYS386IGT,
 	    SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 	setidt(IDT_TS, &IDTVEC(tss), SDT_SYS386IGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(IDT_NP, &IDTVEC(missing), SDT_SYS386IGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(IDT_SS, &IDTVEC(stk), SDT_SYS386IGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(IDT_GP, &IDTVEC(prot), SDT_SYS386IGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(IDT_PF, &IDTVEC(page), SDT_SYS386IGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(IDT_MF, &IDTVEC(fpu), SDT_SYS386IGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(IDT_AC, &IDTVEC(align), SDT_SYS386IGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(IDT_MC, &IDTVEC(mchk), SDT_SYS386IGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(IDT_XF, &IDTVEC(xmm), SDT_SYS386IGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(IDT_SYSCALL, &IDTVEC(int0x80_syscall),
 	    SDT_SYS386IGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL));
 #ifdef KDTRACE_HOOKS
 	setidt(IDT_DTRACE_RET, &IDTVEC(dtrace_ret),
 	    SDT_SYS386IGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL));
 #endif
 #ifdef XENHVM
 	setidt(IDT_EVTCHN, &IDTVEC(xen_intr_upcall),
 	    SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 #endif
 }
 
 static void
 i386_setidt2(void)
 {
 
 	setidt(IDT_UD, &IDTVEC(ill), SDT_SYS386IGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(IDT_GP, &IDTVEC(prot), SDT_SYS386IGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 }
 
 #if defined(DEV_ISA) && !defined(DEV_ATPIC)
 static void
 i386_setidt3(void)
 {
 
 	setidt(IDT_IO_INTS + 7, IDTVEC(spuriousint),
 	    SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 	setidt(IDT_IO_INTS + 15, IDTVEC(spuriousint),
 	    SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 }
 #endif
 
 register_t
 init386(int first)
 {
 	struct region_descriptor r_gdt, r_idt;	/* table descriptors */
 	int gsel_tss, metadata_missing, x, pa;
 	struct pcpu *pc;
 	struct xstate_hdr *xhdr;
 	caddr_t kmdp;
 	vm_offset_t addend;
 	size_t ucode_len;
 	int late_console;
 
 	thread0.td_kstack = proc0kstack;
 	thread0.td_kstack_pages = TD0_KSTACK_PAGES;
 
 	/*
  	 * This may be done better later if it gets more high level
  	 * components in it. If so just link td->td_proc here.
 	 */
 	proc_linkup0(&proc0, &thread0);
 
 	if (bootinfo.bi_modulep) {
 		metadata_missing = 0;
 		addend = (vm_paddr_t)bootinfo.bi_modulep < KERNBASE ?
 		    PMAP_MAP_LOW : 0;
 		preload_metadata = (caddr_t)bootinfo.bi_modulep + addend;
 		preload_bootstrap_relocate(addend);
 	} else {
 		metadata_missing = 1;
 	}
 
 	if (bootinfo.bi_envp != 0) {
 		addend = (vm_paddr_t)bootinfo.bi_envp < KERNBASE ?
 		    PMAP_MAP_LOW : 0;
 		init_static_kenv((char *)bootinfo.bi_envp + addend, 0);
 	} else {
 		init_static_kenv(NULL, 0);
 	}
 
 	/*
 	 * Re-evaluate CPU features if we loaded a microcode update.
 	 */
 	ucode_len = ucode_load_bsp(first);
 	if (ucode_len != 0) {
 		identify_cpu();
 		first = roundup2(first + ucode_len, PAGE_SIZE);
 	}
 
 	identify_hypervisor();
 
 	/* Init basic tunables, hz etc */
 	init_param1();
 
 	/* Set bootmethod to BIOS: it's the only supported on i386. */
 	strlcpy(bootmethod, "BIOS", sizeof(bootmethod));
 
 	/*
 	 * Make gdt memory segments.  All segments cover the full 4GB
 	 * of address space and permissions are enforced at page level.
 	 */
 	gdt_segs[GCODE_SEL].ssd_limit = atop(0 - 1);
 	gdt_segs[GDATA_SEL].ssd_limit = atop(0 - 1);
 	gdt_segs[GUCODE_SEL].ssd_limit = atop(0 - 1);
 	gdt_segs[GUDATA_SEL].ssd_limit = atop(0 - 1);
 	gdt_segs[GUFS_SEL].ssd_limit = atop(0 - 1);
 	gdt_segs[GUGS_SEL].ssd_limit = atop(0 - 1);
 
 	pc = &__pcpu[0];
 	gdt_segs[GPRIV_SEL].ssd_limit = atop(0 - 1);
 	gdt_segs[GPRIV_SEL].ssd_base = (int)pc;
 	gdt_segs[GPROC0_SEL].ssd_base = (int)&common_tss0;
 
 	for (x = 0; x < NGDT; x++)
 		ssdtosd(&gdt_segs[x], &gdt0[x].sd);
 
 	r_gdt.rd_limit = NGDT * sizeof(gdt0[0]) - 1;
 	r_gdt.rd_base =  (int)gdt0;
 	mtx_init(&dt_lock, "descriptor tables", NULL, MTX_SPIN);
 	lgdt(&r_gdt);
 
 	pcpu_init(pc, 0, sizeof(struct pcpu));
 	for (pa = first; pa < first + DPCPU_SIZE; pa += PAGE_SIZE)
 		pmap_kenter(pa, pa);
 	dpcpu_init((void *)first, 0);
 	first += DPCPU_SIZE;
 	PCPU_SET(prvspace, pc);
 	PCPU_SET(curthread, &thread0);
 	/* Non-late cninit() and printf() can be moved up to here. */
 
 	/*
 	 * Initialize mutexes.
 	 *
 	 * icu_lock: in order to allow an interrupt to occur in a critical
 	 * 	     section, to set pcpu->ipending (etc...) properly, we
 	 *	     must be able to get the icu lock, so it can't be
 	 *	     under witness.
 	 */
 	mutex_init();
 	mtx_init(&icu_lock, "icu", NULL, MTX_SPIN | MTX_NOWITNESS | MTX_NOPROFILE);
 
 	i386_setidt1();
 
 	r_idt.rd_limit = sizeof(idt0) - 1;
 	r_idt.rd_base = (int) idt;
 	lidt(&r_idt);
 
 	/*
 	 * Initialize the clock before the console so that console
 	 * initialization can use DELAY().
 	 */
 	clock_init();
 
 	finishidentcpu();	/* Final stage of CPU initialization */
 	i386_setidt2();
 	pmap_set_nx();
 	initializecpu();	/* Initialize CPU registers */
 	initializecpucache();
 
 	/* pointer to selector slot for %fs/%gs */
 	PCPU_SET(fsgs_gdt, &gdt[GUFS_SEL].sd);
 
 	/* Initialize the tss (except for the final esp0) early for vm86. */
 	common_tss0.tss_esp0 = thread0.td_kstack + thread0.td_kstack_pages *
 	    PAGE_SIZE - VM86_STACK_SPACE;
 	common_tss0.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL);
 	common_tss0.tss_ioopt = sizeof(struct i386tss) << 16;
 	gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
 	PCPU_SET(tss_gdt, &gdt[GPROC0_SEL].sd);
 	PCPU_SET(common_tssd, *PCPU_GET(tss_gdt));
 	ltr(gsel_tss);
 
 	/* Initialize the PIC early for vm86 calls. */
 #ifdef DEV_ISA
 #ifdef DEV_ATPIC
 	elcr_probe();
 	atpic_startup();
 #else
 	/* Reset and mask the atpics and leave them shut down. */
 	atpic_reset();
 
 	/*
 	 * Point the ICU spurious interrupt vectors at the APIC spurious
 	 * interrupt handler.
 	 */
 	i386_setidt3();
 #endif
 #endif
 
 	/*
 	 * The console and kdb should be initialized even earlier than here,
 	 * but some console drivers don't work until after getmemsize().
 	 * Default to late console initialization to support these drivers.
 	 * This loses mainly printf()s in getmemsize() and early debugging.
 	 */
 	late_console = 1;
 	TUNABLE_INT_FETCH("debug.late_console", &late_console);
 	if (!late_console) {
 		cninit();
 		i386_kdb_init();
 	}
 
 	kmdp = preload_search_by_type("elf kernel");
 	link_elf_ireloc(kmdp);
 
 	vm86_initialize();
 	getmemsize(first);
 	init_param2(physmem);
 
 	/* now running on new page tables, configured,and u/iom is accessible */
 
 	if (late_console)
 		cninit();
 
 	if (metadata_missing)
 		printf("WARNING: loader(8) metadata is missing!\n");
 
 	if (late_console)
 		i386_kdb_init();
 
 	msgbufinit(msgbufp, msgbufsize);
 	npxinit(true);
 	/*
 	 * Set up thread0 pcb after npxinit calculated pcb + fpu save
 	 * area size.  Zero out the extended state header in fpu save
 	 * area.
 	 */
 	thread0.td_pcb = get_pcb_td(&thread0);
 	thread0.td_pcb->pcb_save = get_pcb_user_save_td(&thread0);
 	bzero(get_pcb_user_save_td(&thread0), cpu_max_ext_state_size);
 	if (use_xsave) {
 		xhdr = (struct xstate_hdr *)(get_pcb_user_save_td(&thread0) +
 		    1);
 		xhdr->xstate_bv = xsave_mask;
 	}
 	PCPU_SET(curpcb, thread0.td_pcb);
 	/* Move esp0 in the tss to its final place. */
 	/* Note: -16 is so we can grow the trapframe if we came from vm86 */
 	common_tss0.tss_esp0 = (vm_offset_t)thread0.td_pcb - VM86_STACK_SPACE;
 	PCPU_SET(kesp0, common_tss0.tss_esp0);
 	gdt[GPROC0_SEL].sd.sd_type = SDT_SYS386TSS;	/* clear busy bit */
 	ltr(gsel_tss);
 
 	/* transfer to user mode */
 
 	_ucodesel = GSEL(GUCODE_SEL, SEL_UPL);
 	_udatasel = GSEL(GUDATA_SEL, SEL_UPL);
 
 	/* setup proc 0's pcb */
 	thread0.td_pcb->pcb_flags = 0;
 	thread0.td_pcb->pcb_cr3 = pmap_get_kcr3();
 	thread0.td_pcb->pcb_ext = 0;
 	thread0.td_frame = &proc0_tf;
 
 #ifdef FDT
 	x86_init_fdt();
 #endif
 
 	/* Location of kernel stack for locore */
 	return ((register_t)thread0.td_pcb);
 }
 
 static void
 machdep_init_trampoline(void)
 {
 	struct region_descriptor r_gdt, r_idt;
 	struct i386tss *tss;
 	char *copyout_buf, *trampoline, *tramp_stack_base;
 	int x;
 
 	gdt = pmap_trm_alloc(sizeof(union descriptor) * NGDT * mp_ncpus,
 	    M_NOWAIT | M_ZERO);
 	bcopy(gdt0, gdt, sizeof(union descriptor) * NGDT);
 	r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1;
 	r_gdt.rd_base = (int)gdt;
 	lgdt(&r_gdt);
 
 	tss = pmap_trm_alloc(sizeof(struct i386tss) * mp_ncpus,
 	    M_NOWAIT | M_ZERO);
 	bcopy(&common_tss0, tss, sizeof(struct i386tss));
 	gdt[GPROC0_SEL].sd.sd_lobase = (int)tss;
 	gdt[GPROC0_SEL].sd.sd_hibase = (u_int)tss >> 24;
 	gdt[GPROC0_SEL].sd.sd_type = SDT_SYS386TSS;
 
 	PCPU_SET(fsgs_gdt, &gdt[GUFS_SEL].sd);
 	PCPU_SET(tss_gdt, &gdt[GPROC0_SEL].sd);
 	PCPU_SET(common_tssd, *PCPU_GET(tss_gdt));
 	PCPU_SET(common_tssp, tss);
 	ltr(GSEL(GPROC0_SEL, SEL_KPL));
 
 	trampoline = pmap_trm_alloc(end_exceptions - start_exceptions,
 	    M_NOWAIT);
 	bcopy(start_exceptions, trampoline, end_exceptions - start_exceptions);
 	tramp_stack_base = pmap_trm_alloc(TRAMP_STACK_SZ, M_NOWAIT);
 	PCPU_SET(trampstk, (uintptr_t)tramp_stack_base + TRAMP_STACK_SZ -
 	    VM86_STACK_SPACE);
 	tss[0].tss_esp0 = PCPU_GET(trampstk);
 
 	idt = pmap_trm_alloc(sizeof(idt0), M_NOWAIT | M_ZERO);
 	bcopy(idt0, idt, sizeof(idt0));
 
 	/* Re-initialize new IDT since the handlers were relocated */
 	setidt_disp = trampoline - start_exceptions;
 	fixup_idt();
 
 	r_idt.rd_limit = sizeof(struct gate_descriptor) * NIDT - 1;
 	r_idt.rd_base = (int)idt;
 	lidt(&r_idt);
 
 	/* dblfault TSS */
 	dblfault_tss = pmap_trm_alloc(sizeof(struct i386tss), M_NOWAIT | M_ZERO);
 	dblfault_stack = pmap_trm_alloc(PAGE_SIZE, M_NOWAIT);
 	dblfault_tss->tss_esp = dblfault_tss->tss_esp0 =
 	    dblfault_tss->tss_esp1 = dblfault_tss->tss_esp2 =
 	    (int)dblfault_stack + PAGE_SIZE;
 	dblfault_tss->tss_ss = dblfault_tss->tss_ss0 = dblfault_tss->tss_ss1 =
 	    dblfault_tss->tss_ss2 = GSEL(GDATA_SEL, SEL_KPL);
 	dblfault_tss->tss_cr3 = pmap_get_kcr3();
 	dblfault_tss->tss_eip = (int)dblfault_handler;
 	dblfault_tss->tss_eflags = PSL_KERNEL;
 	dblfault_tss->tss_ds = dblfault_tss->tss_es =
 	    dblfault_tss->tss_gs = GSEL(GDATA_SEL, SEL_KPL);
 	dblfault_tss->tss_fs = GSEL(GPRIV_SEL, SEL_KPL);
 	dblfault_tss->tss_cs = GSEL(GCODE_SEL, SEL_KPL);
 	dblfault_tss->tss_ldt = GSEL(GLDT_SEL, SEL_KPL);
 	gdt[GPANIC_SEL].sd.sd_lobase = (int)dblfault_tss;
 	gdt[GPANIC_SEL].sd.sd_hibase = (u_int)dblfault_tss >> 24;
 
 	/* make ldt memory segments */
 	ldt = pmap_trm_alloc(sizeof(union descriptor) * NLDT,
 	    M_NOWAIT | M_ZERO);
 	gdt[GLDT_SEL].sd.sd_lobase = (int)ldt;
 	gdt[GLDT_SEL].sd.sd_hibase = (u_int)ldt >> 24;
 	ldt_segs[LUCODE_SEL].ssd_limit = atop(0 - 1);
 	ldt_segs[LUDATA_SEL].ssd_limit = atop(0 - 1);
 	for (x = 0; x < nitems(ldt_segs); x++)
 		ssdtosd(&ldt_segs[x], &ldt[x].sd);
 
 	_default_ldt = GSEL(GLDT_SEL, SEL_KPL);
 	lldt(_default_ldt);
 	PCPU_SET(currentldt, _default_ldt);
 
 	copyout_buf = pmap_trm_alloc(TRAMP_COPYOUT_SZ, M_NOWAIT);
 	PCPU_SET(copyout_buf, copyout_buf);
 	copyout_init_tramp();
 }
 SYSINIT(vm_mem, SI_SUB_VM, SI_ORDER_SECOND, machdep_init_trampoline, NULL);
 
 #ifdef COMPAT_43
 static void
 i386_setup_lcall_gate(void)
 {
 	struct sysentvec *sv;
 	struct user_segment_descriptor desc;
 	u_int lcall_addr;
 
 	sv = &elf32_freebsd_sysvec;
 	lcall_addr = (uintptr_t)sv->sv_psstrings - sz_lcall_tramp;
 
 	bzero(&desc, sizeof(desc));
 	desc.sd_type = SDT_MEMERA;
 	desc.sd_dpl = SEL_UPL;
 	desc.sd_p = 1;
 	desc.sd_def32 = 1;
 	desc.sd_gran = 1;
 	desc.sd_lolimit = 0xffff;
 	desc.sd_hilimit = 0xf;
 	desc.sd_lobase = lcall_addr;
 	desc.sd_hibase = lcall_addr >> 24;
 	bcopy(&desc, &ldt[LSYS5CALLS_SEL], sizeof(desc));
 }
 SYSINIT(elf32, SI_SUB_EXEC, SI_ORDER_ANY, i386_setup_lcall_gate, NULL);
 #endif
 
 void
 cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size)
 {
 
 	pcpu->pc_acpi_id = 0xffffffff;
 }
 
 static int
 smap_sysctl_handler(SYSCTL_HANDLER_ARGS)
 {
 	struct bios_smap *smapbase;
 	struct bios_smap_xattr smap;
 	caddr_t kmdp;
 	uint32_t *smapattr;
 	int count, error, i;
 
 	/* Retrieve the system memory map from the loader. */
 	kmdp = preload_search_by_type("elf kernel");
 	if (kmdp == NULL)
 		kmdp = preload_search_by_type("elf32 kernel");
 	smapbase = (struct bios_smap *)preload_search_info(kmdp,
 	    MODINFO_METADATA | MODINFOMD_SMAP);
 	if (smapbase == NULL)
 		return (0);
 	smapattr = (uint32_t *)preload_search_info(kmdp,
 	    MODINFO_METADATA | MODINFOMD_SMAP_XATTR);
 	count = *((u_int32_t *)smapbase - 1) / sizeof(*smapbase);
 	error = 0;
 	for (i = 0; i < count; i++) {
 		smap.base = smapbase[i].base;
 		smap.length = smapbase[i].length;
 		smap.type = smapbase[i].type;
 		if (smapattr != NULL)
 			smap.xattr = smapattr[i];
 		else
 			smap.xattr = 0;
 		error = SYSCTL_OUT(req, &smap, sizeof(smap));
 	}
 	return (error);
 }
 SYSCTL_PROC(_machdep, OID_AUTO, smap,
     CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0,
     smap_sysctl_handler, "S,bios_smap_xattr",
     "Raw BIOS SMAP data");
 
 void
 spinlock_enter(void)
 {
 	struct thread *td;
 	register_t flags;
 
 	td = curthread;
 	if (td->td_md.md_spinlock_count == 0) {
 		flags = intr_disable();
 		td->td_md.md_spinlock_count = 1;
 		td->td_md.md_saved_flags = flags;
 		critical_enter();
 	} else
 		td->td_md.md_spinlock_count++;
 }
 
 void
 spinlock_exit(void)
 {
 	struct thread *td;
 	register_t flags;
 
 	td = curthread;
 	flags = td->td_md.md_saved_flags;
 	td->td_md.md_spinlock_count--;
 	if (td->td_md.md_spinlock_count == 0) {
 		critical_exit();
 		intr_restore(flags);
 	}
 }
 
 #if defined(I586_CPU) && !defined(NO_F00F_HACK)
 static void f00f_hack(void *unused);
 SYSINIT(f00f_hack, SI_SUB_INTRINSIC, SI_ORDER_FIRST, f00f_hack, NULL);
 
 static void
 f00f_hack(void *unused)
 {
 	struct region_descriptor r_idt;
 	struct gate_descriptor *new_idt;
 	vm_offset_t tmp;
 
 	if (!has_f00f_bug)
 		return;
 
 	GIANT_REQUIRED;
 
 	printf("Intel Pentium detected, installing workaround for F00F bug\n");
 
 	tmp = (vm_offset_t)pmap_trm_alloc(PAGE_SIZE * 3, M_NOWAIT | M_ZERO);
 	if (tmp == 0)
 		panic("kmem_malloc returned 0");
 	tmp = round_page(tmp);
 
 	/* Put the problematic entry (#6) at the end of the lower page. */
 	new_idt = (struct gate_descriptor *)
 	    (tmp + PAGE_SIZE - 7 * sizeof(struct gate_descriptor));
 	bcopy(idt, new_idt, sizeof(idt0));
 	r_idt.rd_base = (u_int)new_idt;
 	r_idt.rd_limit = sizeof(idt0) - 1;
 	lidt(&r_idt);
 	/* SMP machines do not need the F00F hack. */
 	idt = new_idt;
 	pmap_protect(kernel_pmap, tmp, tmp + PAGE_SIZE, VM_PROT_READ);
 }
 #endif /* defined(I586_CPU) && !NO_F00F_HACK */
 
 /*
  * Construct a PCB from a trapframe. This is called from kdb_trap() where
  * we want to start a backtrace from the function that caused us to enter
  * the debugger. We have the context in the trapframe, but base the trace
  * on the PCB. The PCB doesn't have to be perfect, as long as it contains
  * enough for a backtrace.
  */
 void
 makectx(struct trapframe *tf, struct pcb *pcb)
 {
 
 	pcb->pcb_edi = tf->tf_edi;
 	pcb->pcb_esi = tf->tf_esi;
 	pcb->pcb_ebp = tf->tf_ebp;
 	pcb->pcb_ebx = tf->tf_ebx;
 	pcb->pcb_eip = tf->tf_eip;
 	pcb->pcb_esp = (ISPL(tf->tf_cs)) ? tf->tf_esp : (int)(tf + 1) - 8;
 	pcb->pcb_gs = rgs();
 }
 
 int
 ptrace_set_pc(struct thread *td, u_long addr)
 {
 
 	td->td_frame->tf_eip = addr;
 	return (0);
 }
 
 int
 ptrace_single_step(struct thread *td)
 {
 
 	PROC_LOCK_ASSERT(td->td_proc, MA_OWNED);
 	if ((td->td_frame->tf_eflags & PSL_T) == 0) {
 		td->td_frame->tf_eflags |= PSL_T;
 		td->td_dbgflags |= TDB_STEP;
 	}
 	return (0);
 }
 
 int
 ptrace_clear_single_step(struct thread *td)
 {
 
 	PROC_LOCK_ASSERT(td->td_proc, MA_OWNED);
 	td->td_frame->tf_eflags &= ~PSL_T;
 	td->td_dbgflags &= ~TDB_STEP;
 	return (0);
 }
 
 int
 fill_regs(struct thread *td, struct reg *regs)
 {
 	struct pcb *pcb;
 	struct trapframe *tp;
 
 	tp = td->td_frame;
 	pcb = td->td_pcb;
 	regs->r_gs = pcb->pcb_gs;
 	return (fill_frame_regs(tp, regs));
 }
 
 int
 fill_frame_regs(struct trapframe *tp, struct reg *regs)
 {
 
 	regs->r_fs = tp->tf_fs;
 	regs->r_es = tp->tf_es;
 	regs->r_ds = tp->tf_ds;
 	regs->r_edi = tp->tf_edi;
 	regs->r_esi = tp->tf_esi;
 	regs->r_ebp = tp->tf_ebp;
 	regs->r_ebx = tp->tf_ebx;
 	regs->r_edx = tp->tf_edx;
 	regs->r_ecx = tp->tf_ecx;
 	regs->r_eax = tp->tf_eax;
 	regs->r_eip = tp->tf_eip;
 	regs->r_cs = tp->tf_cs;
 	regs->r_eflags = tp->tf_eflags;
 	regs->r_esp = tp->tf_esp;
 	regs->r_ss = tp->tf_ss;
 	regs->r_err = 0;
 	regs->r_trapno = 0;
 	return (0);
 }
 
 int
 set_regs(struct thread *td, struct reg *regs)
 {
 	struct pcb *pcb;
 	struct trapframe *tp;
 
 	tp = td->td_frame;
 	if (!EFL_SECURE(regs->r_eflags, tp->tf_eflags) ||
 	    !CS_SECURE(regs->r_cs))
 		return (EINVAL);
 	pcb = td->td_pcb;
 	tp->tf_fs = regs->r_fs;
 	tp->tf_es = regs->r_es;
 	tp->tf_ds = regs->r_ds;
 	tp->tf_edi = regs->r_edi;
 	tp->tf_esi = regs->r_esi;
 	tp->tf_ebp = regs->r_ebp;
 	tp->tf_ebx = regs->r_ebx;
 	tp->tf_edx = regs->r_edx;
 	tp->tf_ecx = regs->r_ecx;
 	tp->tf_eax = regs->r_eax;
 	tp->tf_eip = regs->r_eip;
 	tp->tf_cs = regs->r_cs;
 	tp->tf_eflags = regs->r_eflags;
 	tp->tf_esp = regs->r_esp;
 	tp->tf_ss = regs->r_ss;
 	pcb->pcb_gs = regs->r_gs;
 	return (0);
 }
 
 int
 fill_fpregs(struct thread *td, struct fpreg *fpregs)
 {
 
 	KASSERT(td == curthread || TD_IS_SUSPENDED(td) ||
 	    P_SHOULDSTOP(td->td_proc),
 	    ("not suspended thread %p", td));
 	npxgetregs(td);
 	if (cpu_fxsr)
 		npx_fill_fpregs_xmm(&get_pcb_user_save_td(td)->sv_xmm,
 		    (struct save87 *)fpregs);
 	else
 		bcopy(&get_pcb_user_save_td(td)->sv_87, fpregs,
 		    sizeof(*fpregs));
 	return (0);
 }
 
 int
 set_fpregs(struct thread *td, struct fpreg *fpregs)
 {
 
 	critical_enter();
 	if (cpu_fxsr)
 		npx_set_fpregs_xmm((struct save87 *)fpregs,
 		    &get_pcb_user_save_td(td)->sv_xmm);
 	else
 		bcopy(fpregs, &get_pcb_user_save_td(td)->sv_87,
 		    sizeof(*fpregs));
 	npxuserinited(td);
 	critical_exit();
 	return (0);
 }
 
 /*
  * Get machine context.
  */
 int
 get_mcontext(struct thread *td, mcontext_t *mcp, int flags)
 {
 	struct trapframe *tp;
 	struct segment_descriptor *sdp;
 
 	tp = td->td_frame;
 
 	PROC_LOCK(curthread->td_proc);
 	mcp->mc_onstack = sigonstack(tp->tf_esp);
 	PROC_UNLOCK(curthread->td_proc);
 	mcp->mc_gs = td->td_pcb->pcb_gs;
 	mcp->mc_fs = tp->tf_fs;
 	mcp->mc_es = tp->tf_es;
 	mcp->mc_ds = tp->tf_ds;
 	mcp->mc_edi = tp->tf_edi;
 	mcp->mc_esi = tp->tf_esi;
 	mcp->mc_ebp = tp->tf_ebp;
 	mcp->mc_isp = tp->tf_isp;
 	mcp->mc_eflags = tp->tf_eflags;
 	if (flags & GET_MC_CLEAR_RET) {
 		mcp->mc_eax = 0;
 		mcp->mc_edx = 0;
 		mcp->mc_eflags &= ~PSL_C;
 	} else {
 		mcp->mc_eax = tp->tf_eax;
 		mcp->mc_edx = tp->tf_edx;
 	}
 	mcp->mc_ebx = tp->tf_ebx;
 	mcp->mc_ecx = tp->tf_ecx;
 	mcp->mc_eip = tp->tf_eip;
 	mcp->mc_cs = tp->tf_cs;
 	mcp->mc_esp = tp->tf_esp;
 	mcp->mc_ss = tp->tf_ss;
 	mcp->mc_len = sizeof(*mcp);
 	get_fpcontext(td, mcp, NULL, 0);
 	sdp = &td->td_pcb->pcb_fsd;
 	mcp->mc_fsbase = sdp->sd_hibase << 24 | sdp->sd_lobase;
 	sdp = &td->td_pcb->pcb_gsd;
 	mcp->mc_gsbase = sdp->sd_hibase << 24 | sdp->sd_lobase;
 	mcp->mc_flags = 0;
 	mcp->mc_xfpustate = 0;
 	mcp->mc_xfpustate_len = 0;
 	bzero(mcp->mc_spare2, sizeof(mcp->mc_spare2));
 	return (0);
 }
 
 /*
  * Set machine context.
  *
  * However, we don't set any but the user modifiable flags, and we won't
  * touch the cs selector.
  */
 int
 set_mcontext(struct thread *td, mcontext_t *mcp)
 {
 	struct trapframe *tp;
 	char *xfpustate;
 	int eflags, ret;
 
 	tp = td->td_frame;
 	if (mcp->mc_len != sizeof(*mcp) ||
 	    (mcp->mc_flags & ~_MC_FLAG_MASK) != 0)
 		return (EINVAL);
 	eflags = (mcp->mc_eflags & PSL_USERCHANGE) |
 	    (tp->tf_eflags & ~PSL_USERCHANGE);
 	if (mcp->mc_flags & _MC_HASFPXSTATE) {
 		if (mcp->mc_xfpustate_len > cpu_max_ext_state_size -
 		    sizeof(union savefpu))
 			return (EINVAL);
 		xfpustate = __builtin_alloca(mcp->mc_xfpustate_len);
 		ret = copyin((void *)mcp->mc_xfpustate, xfpustate,
 		    mcp->mc_xfpustate_len);
 		if (ret != 0)
 			return (ret);
 	} else
 		xfpustate = NULL;
 	ret = set_fpcontext(td, mcp, xfpustate, mcp->mc_xfpustate_len);
 	if (ret != 0)
 		return (ret);
 	tp->tf_fs = mcp->mc_fs;
 	tp->tf_es = mcp->mc_es;
 	tp->tf_ds = mcp->mc_ds;
 	tp->tf_edi = mcp->mc_edi;
 	tp->tf_esi = mcp->mc_esi;
 	tp->tf_ebp = mcp->mc_ebp;
 	tp->tf_ebx = mcp->mc_ebx;
 	tp->tf_edx = mcp->mc_edx;
 	tp->tf_ecx = mcp->mc_ecx;
 	tp->tf_eax = mcp->mc_eax;
 	tp->tf_eip = mcp->mc_eip;
 	tp->tf_eflags = eflags;
 	tp->tf_esp = mcp->mc_esp;
 	tp->tf_ss = mcp->mc_ss;
 	td->td_pcb->pcb_gs = mcp->mc_gs;
 	return (0);
 }
 
 static void
 get_fpcontext(struct thread *td, mcontext_t *mcp, char *xfpusave,
     size_t xfpusave_len)
 {
 	size_t max_len, len;
 
 	mcp->mc_ownedfp = npxgetregs(td);
 	bcopy(get_pcb_user_save_td(td), &mcp->mc_fpstate[0],
 	    sizeof(mcp->mc_fpstate));
 	mcp->mc_fpformat = npxformat();
 	if (!use_xsave || xfpusave_len == 0)
 		return;
 	max_len = cpu_max_ext_state_size - sizeof(union savefpu);
 	len = xfpusave_len;
 	if (len > max_len) {
 		len = max_len;
 		bzero(xfpusave + max_len, len - max_len);
 	}
 	mcp->mc_flags |= _MC_HASFPXSTATE;
 	mcp->mc_xfpustate_len = len;
 	bcopy(get_pcb_user_save_td(td) + 1, xfpusave, len);
 }
 
 static int
 set_fpcontext(struct thread *td, mcontext_t *mcp, char *xfpustate,
     size_t xfpustate_len)
 {
 	int error;
 
 	if (mcp->mc_fpformat == _MC_FPFMT_NODEV)
 		return (0);
 	else if (mcp->mc_fpformat != _MC_FPFMT_387 &&
 	    mcp->mc_fpformat != _MC_FPFMT_XMM)
 		return (EINVAL);
 	else if (mcp->mc_ownedfp == _MC_FPOWNED_NONE) {
 		/* We don't care what state is left in the FPU or PCB. */
 		fpstate_drop(td);
 		error = 0;
 	} else if (mcp->mc_ownedfp == _MC_FPOWNED_FPU ||
 	    mcp->mc_ownedfp == _MC_FPOWNED_PCB) {
 		error = npxsetregs(td, (union savefpu *)&mcp->mc_fpstate,
 		    xfpustate, xfpustate_len);
 	} else
 		return (EINVAL);
 	return (error);
 }
 
 static void
 fpstate_drop(struct thread *td)
 {
 
 	KASSERT(PCB_USER_FPU(td->td_pcb), ("fpstate_drop: kernel-owned fpu"));
 	critical_enter();
 	if (PCPU_GET(fpcurthread) == td)
 		npxdrop();
 	/*
 	 * XXX force a full drop of the npx.  The above only drops it if we
 	 * owned it.  npxgetregs() has the same bug in the !cpu_fxsr case.
 	 *
 	 * XXX I don't much like npxgetregs()'s semantics of doing a full
 	 * drop.  Dropping only to the pcb matches fnsave's behaviour.
 	 * We only need to drop to !PCB_INITDONE in sendsig().  But
 	 * sendsig() is the only caller of npxgetregs()... perhaps we just
 	 * have too many layers.
 	 */
 	curthread->td_pcb->pcb_flags &= ~(PCB_NPXINITDONE |
 	    PCB_NPXUSERINITDONE);
 	critical_exit();
 }
 
 int
 fill_dbregs(struct thread *td, struct dbreg *dbregs)
 {
 	struct pcb *pcb;
 
 	if (td == NULL) {
 		dbregs->dr[0] = rdr0();
 		dbregs->dr[1] = rdr1();
 		dbregs->dr[2] = rdr2();
 		dbregs->dr[3] = rdr3();
 		dbregs->dr[6] = rdr6();
 		dbregs->dr[7] = rdr7();
 	} else {
 		pcb = td->td_pcb;
 		dbregs->dr[0] = pcb->pcb_dr0;
 		dbregs->dr[1] = pcb->pcb_dr1;
 		dbregs->dr[2] = pcb->pcb_dr2;
 		dbregs->dr[3] = pcb->pcb_dr3;
 		dbregs->dr[6] = pcb->pcb_dr6;
 		dbregs->dr[7] = pcb->pcb_dr7;
 	}
 	dbregs->dr[4] = 0;
 	dbregs->dr[5] = 0;
 	return (0);
 }
 
 int
 set_dbregs(struct thread *td, struct dbreg *dbregs)
 {
 	struct pcb *pcb;
 	int i;
 
 	if (td == NULL) {
 		load_dr0(dbregs->dr[0]);
 		load_dr1(dbregs->dr[1]);
 		load_dr2(dbregs->dr[2]);
 		load_dr3(dbregs->dr[3]);
 		load_dr6(dbregs->dr[6]);
 		load_dr7(dbregs->dr[7]);
 	} else {
 		/*
 		 * Don't let an illegal value for dr7 get set.	Specifically,
 		 * check for undefined settings.  Setting these bit patterns
 		 * result in undefined behaviour and can lead to an unexpected
 		 * TRCTRAP.
 		 */
 		for (i = 0; i < 4; i++) {
 			if (DBREG_DR7_ACCESS(dbregs->dr[7], i) == 0x02)
 				return (EINVAL);
 			if (DBREG_DR7_LEN(dbregs->dr[7], i) == 0x02)
 				return (EINVAL);
 		}
 		
 		pcb = td->td_pcb;
 		
 		/*
 		 * Don't let a process set a breakpoint that is not within the
 		 * process's address space.  If a process could do this, it
 		 * could halt the system by setting a breakpoint in the kernel
 		 * (if ddb was enabled).  Thus, we need to check to make sure
 		 * that no breakpoints are being enabled for addresses outside
 		 * process's address space.
 		 *
 		 * XXX - what about when the watched area of the user's
 		 * address space is written into from within the kernel
 		 * ... wouldn't that still cause a breakpoint to be generated
 		 * from within kernel mode?
 		 */
 
 		if (DBREG_DR7_ENABLED(dbregs->dr[7], 0)) {
 			/* dr0 is enabled */
 			if (dbregs->dr[0] >= VM_MAXUSER_ADDRESS)
 				return (EINVAL);
 		}
 			
 		if (DBREG_DR7_ENABLED(dbregs->dr[7], 1)) {
 			/* dr1 is enabled */
 			if (dbregs->dr[1] >= VM_MAXUSER_ADDRESS)
 				return (EINVAL);
 		}
 			
 		if (DBREG_DR7_ENABLED(dbregs->dr[7], 2)) {
 			/* dr2 is enabled */
 			if (dbregs->dr[2] >= VM_MAXUSER_ADDRESS)
 				return (EINVAL);
 		}
 			
 		if (DBREG_DR7_ENABLED(dbregs->dr[7], 3)) {
 			/* dr3 is enabled */
 			if (dbregs->dr[3] >= VM_MAXUSER_ADDRESS)
 				return (EINVAL);
 		}
 
 		pcb->pcb_dr0 = dbregs->dr[0];
 		pcb->pcb_dr1 = dbregs->dr[1];
 		pcb->pcb_dr2 = dbregs->dr[2];
 		pcb->pcb_dr3 = dbregs->dr[3];
 		pcb->pcb_dr6 = dbregs->dr[6];
 		pcb->pcb_dr7 = dbregs->dr[7];
 
 		pcb->pcb_flags |= PCB_DBREGS;
 	}
 
 	return (0);
 }
 
 /*
  * Return > 0 if a hardware breakpoint has been hit, and the
  * breakpoint was in user space.  Return 0, otherwise.
  */
 int
 user_dbreg_trap(register_t dr6)
 {
         u_int32_t dr7;
         u_int32_t bp;       /* breakpoint bits extracted from dr6 */
         int nbp;            /* number of breakpoints that triggered */
         caddr_t addr[4];    /* breakpoint addresses */
         int i;
 
         bp = dr6 & DBREG_DR6_BMASK;
         if (bp == 0) {
                 /*
                  * None of the breakpoint bits are set meaning this
                  * trap was not caused by any of the debug registers
                  */
                 return 0;
         }
 
         dr7 = rdr7();
         if ((dr7 & 0x000000ff) == 0) {
                 /*
                  * all GE and LE bits in the dr7 register are zero,
                  * thus the trap couldn't have been caused by the
                  * hardware debug registers
                  */
                 return 0;
         }
 
         nbp = 0;
 
         /*
          * at least one of the breakpoints were hit, check to see
          * which ones and if any of them are user space addresses
          */
 
         if (bp & 0x01) {
                 addr[nbp++] = (caddr_t)rdr0();
         }
         if (bp & 0x02) {
                 addr[nbp++] = (caddr_t)rdr1();
         }
         if (bp & 0x04) {
                 addr[nbp++] = (caddr_t)rdr2();
         }
         if (bp & 0x08) {
                 addr[nbp++] = (caddr_t)rdr3();
         }
 
         for (i = 0; i < nbp; i++) {
                 if (addr[i] < (caddr_t)VM_MAXUSER_ADDRESS) {
                         /*
                          * addr[i] is in user space
                          */
                         return nbp;
                 }
         }
 
         /*
          * None of the breakpoints are in user space.
          */
         return 0;
 }
 
 #ifdef KDB
 
 /*
  * Provide inb() and outb() as functions.  They are normally only available as
  * inline functions, thus cannot be called from the debugger.
  */
 
 /* silence compiler warnings */
 u_char inb_(u_short);
 void outb_(u_short, u_char);
 
 u_char
 inb_(u_short port)
 {
 	return inb(port);
 }
 
 void
 outb_(u_short port, u_char data)
 {
 	outb(port, data);
 }
 
 #endif /* KDB */
diff --git a/sys/i386/include/db_machdep.h b/sys/i386/include/db_machdep.h
index 42f0f19d29a9..83daab106b82 100644
--- a/sys/i386/include/db_machdep.h
+++ b/sys/i386/include/db_machdep.h
@@ -1,90 +1,89 @@
 /*-
  * Mach Operating System
  * Copyright (c) 1991,1990 Carnegie Mellon University
  * All Rights Reserved.
  *
  * Permission to use, copy, modify and distribute this software and its
  * documentation is hereby granted, provided that both the copyright
  * notice and this permission notice appear in all copies of the
  * software, derivative works or modified versions, and any portions
  * thereof, and that both notices appear in supporting documentation.
  *
  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  *
  * Carnegie Mellon requests users of this software to return to
  *
  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  *  School of Computer Science
  *  Carnegie Mellon University
  *  Pittsburgh PA 15213-3890
  *
  * any improvements or extensions that they make and grant Carnegie Mellon
  * the rights to redistribute these changes.
  *
  * $FreeBSD$
  */
 
 #ifndef _MACHINE_DB_MACHDEP_H_
 #define	_MACHINE_DB_MACHDEP_H_
 
 #include <machine/frame.h>
-#include <machine/reg.h>
 #include <machine/trap.h>
 
 typedef	vm_offset_t	db_addr_t;	/* address - unsigned */
 typedef	int		db_expr_t;	/* expression - signed */
 
 #define	PC_REGS()	((db_addr_t)(kdb_frame->tf_eflags & PSL_VM ?	\
 			    (kdb_frame->tf_eip & 0xffff) +		\
 			    ((kdb_frame->tf_cs & 0xffff) << 4) :	\
 			    kdb_frame->tf_eip))
 
 #define	BKPT_INST	0xcc		/* breakpoint instruction */
 #define	BKPT_SIZE	(1)		/* size of breakpoint inst */
 #define	BKPT_SET(inst)	(BKPT_INST)
 
 #define BKPT_SKIP				\
 do {						\
 	kdb_frame->tf_eip += 1;			\
 	kdb_thrctx->pcb_eip += 1;		\
 } while(0)
 
 #define	FIXUP_PC_AFTER_BREAK			\
 do {						\
 	kdb_frame->tf_eip -= 1;			\
 	kdb_thrctx->pcb_eip -= 1;		\
 } while(0);
 
 #define	db_clear_single_step	kdb_cpu_clear_singlestep
 #define	db_set_single_step	kdb_cpu_set_singlestep
 
 /*
  * The debug exception type is copied from %dr6 to 'code' and used to
  * disambiguate single step traps.  Watchpoints have no special support.
  * Our hardware breakpoints are not well integrated with ddb and are too
  * different from watchpoints.  ddb treats them as unknown traps with
  * unknown addresses and doesn't turn them off while it is running.
  */
 #define	IS_BREAKPOINT_TRAP(type, code)	((type) == T_BPTFLT)
 #define	IS_SSTEP_TRAP(type, code)					\
 	((type) == T_TRCTRAP && (code) & DBREG_DR6_BS)
 #define	IS_WATCHPOINT_TRAP(type, code)	0
 
 #define	I_CALL		0xe8
 #define	I_CALLI		0xff
 #define	I_RET		0xc3
 #define	I_IRET		0xcf
 
 #define	inst_trap_return(ins)	(((ins)&0xff) == I_IRET)
 #define	inst_return(ins)	(((ins)&0xff) == I_RET)
 #define	inst_call(ins)		(((ins)&0xff) == I_CALL || \
 				 (((ins)&0xff) == I_CALLI && \
 				  ((ins)&0x3800) == 0x1000))
 #define inst_load(ins)		0
 #define inst_store(ins)		0
 
 int	db_segsize(struct trapframe *tfp);
 
 #endif /* !_MACHINE_DB_MACHDEP_H_ */
diff --git a/sys/i386/linux/linux_ptrace.c b/sys/i386/linux/linux_ptrace.c
index 0c93d0266b87..d9e0ed50c750 100644
--- a/sys/i386/linux/linux_ptrace.c
+++ b/sys/i386/linux/linux_ptrace.c
@@ -1,474 +1,473 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2001 Alexander Kabaev
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_cpu.h"
 
 #include <sys/param.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/ptrace.h>
 #include <sys/syscallsubr.h>
 #include <sys/systm.h>
 
 #include <machine/md_var.h>
 #include <machine/pcb.h>
-#include <machine/reg.h>
 
 #include <i386/linux/linux.h>
 #include <i386/linux/linux_proto.h>
 #include <compat/linux/linux_signal.h>
 
 /*
  *   Linux ptrace requests numbers. Mostly identical to FreeBSD,
  *   except for MD ones and PT_ATTACH/PT_DETACH.
  */
 #define	PTRACE_TRACEME		0
 #define	PTRACE_PEEKTEXT		1
 #define	PTRACE_PEEKDATA		2
 #define	PTRACE_PEEKUSR		3
 #define	PTRACE_POKETEXT		4
 #define	PTRACE_POKEDATA		5
 #define	PTRACE_POKEUSR		6
 #define	PTRACE_CONT		7
 #define	PTRACE_KILL		8
 #define	PTRACE_SINGLESTEP	9
 
 #define PTRACE_ATTACH		16
 #define PTRACE_DETACH		17
 
 #define	LINUX_PTRACE_SYSCALL	24
 
 #define PTRACE_GETREGS		12
 #define PTRACE_SETREGS		13
 #define PTRACE_GETFPREGS	14
 #define PTRACE_SETFPREGS	15
 #define PTRACE_GETFPXREGS	18
 #define PTRACE_SETFPXREGS	19
 
 #define PTRACE_SETOPTIONS	21
 
 /*
  * Linux keeps debug registers at the following
  * offset in the user struct
  */
 #define LINUX_DBREG_OFFSET	252
 #define LINUX_DBREG_SIZE	(8*sizeof(l_int))
 
 static __inline int
 map_signum(int signum)
 {
 
 	signum = linux_to_bsd_signal(signum);
 	return ((signum == SIGSTOP)? 0 : signum);
 }
 
 struct linux_pt_reg {
 	l_long	ebx;
 	l_long	ecx;
 	l_long	edx;
 	l_long	esi;
 	l_long	edi;
 	l_long	ebp;
 	l_long	eax;
 	l_int	xds;
 	l_int	xes;
 	l_int	xfs;
 	l_int	xgs;
 	l_long	orig_eax;
 	l_long	eip;
 	l_int	xcs;
 	l_long	eflags;
 	l_long	esp;
 	l_int	xss;
 };
 
 /*
  *   Translate i386 ptrace registers between Linux and FreeBSD formats.
  *   The translation is pretty straighforward, for all registers, but
  *   orig_eax on Linux side and r_trapno and r_err in FreeBSD
  */
 static void
 map_regs_to_linux(struct reg *bsd_r, struct linux_pt_reg *linux_r)
 {
 	linux_r->ebx = bsd_r->r_ebx;
 	linux_r->ecx = bsd_r->r_ecx;
 	linux_r->edx = bsd_r->r_edx;
 	linux_r->esi = bsd_r->r_esi;
 	linux_r->edi = bsd_r->r_edi;
 	linux_r->ebp = bsd_r->r_ebp;
 	linux_r->eax = bsd_r->r_eax;
 	linux_r->xds = bsd_r->r_ds;
 	linux_r->xes = bsd_r->r_es;
 	linux_r->xfs = bsd_r->r_fs;
 	linux_r->xgs = bsd_r->r_gs;
 	linux_r->orig_eax = bsd_r->r_eax;
 	linux_r->eip = bsd_r->r_eip;
 	linux_r->xcs = bsd_r->r_cs;
 	linux_r->eflags = bsd_r->r_eflags;
 	linux_r->esp = bsd_r->r_esp;
 	linux_r->xss = bsd_r->r_ss;
 }
 
 static void
 map_regs_from_linux(struct reg *bsd_r, struct linux_pt_reg *linux_r)
 {
 	bsd_r->r_ebx = linux_r->ebx;
 	bsd_r->r_ecx = linux_r->ecx;
 	bsd_r->r_edx = linux_r->edx;
 	bsd_r->r_esi = linux_r->esi;
 	bsd_r->r_edi = linux_r->edi;
 	bsd_r->r_ebp = linux_r->ebp;
 	bsd_r->r_eax = linux_r->eax;
 	bsd_r->r_ds  = linux_r->xds;
 	bsd_r->r_es  = linux_r->xes;
 	bsd_r->r_fs  = linux_r->xfs;
 	bsd_r->r_gs  = linux_r->xgs;
 	bsd_r->r_eip = linux_r->eip;
 	bsd_r->r_cs  = linux_r->xcs;
 	bsd_r->r_eflags = linux_r->eflags;
 	bsd_r->r_esp = linux_r->esp;
 	bsd_r->r_ss = linux_r->xss;
 }
 
 struct linux_pt_fpreg {
 	l_long cwd;
 	l_long swd;
 	l_long twd;
 	l_long fip;
 	l_long fcs;
 	l_long foo;
 	l_long fos;
 	l_long st_space[2*10];
 };
 
 static void
 map_fpregs_to_linux(struct fpreg *bsd_r, struct linux_pt_fpreg *linux_r)
 {
 	linux_r->cwd = bsd_r->fpr_env[0];
 	linux_r->swd = bsd_r->fpr_env[1];
 	linux_r->twd = bsd_r->fpr_env[2];
 	linux_r->fip = bsd_r->fpr_env[3];
 	linux_r->fcs = bsd_r->fpr_env[4];
 	linux_r->foo = bsd_r->fpr_env[5];
 	linux_r->fos = bsd_r->fpr_env[6];
 	bcopy(bsd_r->fpr_acc, linux_r->st_space, sizeof(linux_r->st_space));
 }
 
 static void
 map_fpregs_from_linux(struct fpreg *bsd_r, struct linux_pt_fpreg *linux_r)
 {
 	bsd_r->fpr_env[0] = linux_r->cwd;
 	bsd_r->fpr_env[1] = linux_r->swd;
 	bsd_r->fpr_env[2] = linux_r->twd;
 	bsd_r->fpr_env[3] = linux_r->fip;
 	bsd_r->fpr_env[4] = linux_r->fcs;
 	bsd_r->fpr_env[5] = linux_r->foo;
 	bsd_r->fpr_env[6] = linux_r->fos;
 	bcopy(bsd_r->fpr_acc, linux_r->st_space, sizeof(bsd_r->fpr_acc));
 }
 
 struct linux_pt_fpxreg {
 	l_ushort	cwd;
 	l_ushort	swd;
 	l_ushort	twd;
 	l_ushort	fop;
 	l_long		fip;
 	l_long		fcs;
 	l_long		foo;
 	l_long		fos;
 	l_long		mxcsr;
 	l_long		reserved;
 	l_long		st_space[32];
 	l_long		xmm_space[32];
 	l_long		padding[56];
 };
 
 static int
 linux_proc_read_fpxregs(struct thread *td, struct linux_pt_fpxreg *fpxregs)
 {
 
 	PROC_LOCK_ASSERT(td->td_proc, MA_OWNED);
 	if (cpu_fxsr == 0 || (td->td_proc->p_flag & P_INMEM) == 0)
 		return (EIO);
 	bcopy(&get_pcb_user_save_td(td)->sv_xmm, fpxregs, sizeof(*fpxregs));
 	return (0);
 }
 
 static int
 linux_proc_write_fpxregs(struct thread *td, struct linux_pt_fpxreg *fpxregs)
 {
 
 	PROC_LOCK_ASSERT(td->td_proc, MA_OWNED);
 	if (cpu_fxsr == 0 || (td->td_proc->p_flag & P_INMEM) == 0)
 		return (EIO);
 	bcopy(fpxregs, &get_pcb_user_save_td(td)->sv_xmm, sizeof(*fpxregs));
 	return (0);
 }
 
 int
 linux_ptrace(struct thread *td, struct linux_ptrace_args *uap)
 {
 	union {
 		struct linux_pt_reg	reg;
 		struct linux_pt_fpreg	fpreg;
 		struct linux_pt_fpxreg	fpxreg;
 	} r;
 	union {
 		struct reg		bsd_reg;
 		struct fpreg		bsd_fpreg;
 		struct dbreg		bsd_dbreg;
 	} u;
 	void *addr;
 	pid_t pid;
 	int error, req;
 
 	error = 0;
 
 	/* by default, just copy data intact */
 	req  = uap->req;
 	pid  = (pid_t)uap->pid;
 	addr = (void *)uap->addr;
 
 	switch (req) {
 	case PTRACE_TRACEME:
 	case PTRACE_POKETEXT:
 	case PTRACE_POKEDATA:
 	case PTRACE_KILL:
 		error = kern_ptrace(td, req, pid, addr, uap->data);
 		break;
 	case PTRACE_PEEKTEXT:
 	case PTRACE_PEEKDATA: {
 		/* need to preserve return value */
 		int rval = td->td_retval[0];
 		error = kern_ptrace(td, req, pid, addr, 0);
 		if (error == 0)
 			error = copyout(td->td_retval, (void *)uap->data,
 			    sizeof(l_int));
 		td->td_retval[0] = rval;
 		break;
 	}
 	case PTRACE_DETACH:
 		error = kern_ptrace(td, PT_DETACH, pid, (void *)1,
 		     map_signum(uap->data));
 		break;
 	case PTRACE_SINGLESTEP:
 	case PTRACE_CONT:
 		error = kern_ptrace(td, req, pid, (void *)1,
 		     map_signum(uap->data));
 		break;
 	case PTRACE_ATTACH:
 		error = kern_ptrace(td, PT_ATTACH, pid, addr, uap->data);
 		break;
 	case PTRACE_GETREGS:
 		/* Linux is using data where FreeBSD is using addr */
 		error = kern_ptrace(td, PT_GETREGS, pid, &u.bsd_reg, 0);
 		if (error == 0) {
 			map_regs_to_linux(&u.bsd_reg, &r.reg);
 			error = copyout(&r.reg, (void *)uap->data,
 			    sizeof(r.reg));
 		}
 		break;
 	case PTRACE_SETREGS:
 		/* Linux is using data where FreeBSD is using addr */
 		error = copyin((void *)uap->data, &r.reg, sizeof(r.reg));
 		if (error == 0) {
 			map_regs_from_linux(&u.bsd_reg, &r.reg);
 			error = kern_ptrace(td, PT_SETREGS, pid, &u.bsd_reg, 0);
 		}
 		break;
 	case PTRACE_GETFPREGS:
 		/* Linux is using data where FreeBSD is using addr */
 		error = kern_ptrace(td, PT_GETFPREGS, pid, &u.bsd_fpreg, 0);
 		if (error == 0) {
 			map_fpregs_to_linux(&u.bsd_fpreg, &r.fpreg);
 			error = copyout(&r.fpreg, (void *)uap->data,
 			    sizeof(r.fpreg));
 		}
 		break;
 	case PTRACE_SETFPREGS:
 		/* Linux is using data where FreeBSD is using addr */
 		error = copyin((void *)uap->data, &r.fpreg, sizeof(r.fpreg));
 		if (error == 0) {
 			map_fpregs_from_linux(&u.bsd_fpreg, &r.fpreg);
 			error = kern_ptrace(td, PT_SETFPREGS, pid,
 			    &u.bsd_fpreg, 0);
 		}
 		break;
 	case PTRACE_SETFPXREGS:
 		error = copyin((void *)uap->data, &r.fpxreg, sizeof(r.fpxreg));
 		if (error)
 			break;
 		/* FALL THROUGH */
 	case PTRACE_GETFPXREGS: {
 		struct proc *p;
 		struct thread *td2;
 
 		if (sizeof(struct linux_pt_fpxreg) != sizeof(struct savexmm)) {
 			static int once = 0;
 			if (!once) {
 				printf("linux: savexmm != linux_pt_fpxreg\n");
 				once = 1;
 			}
 			error = EIO;
 			break;
 		}
 
 		if ((p = pfind(uap->pid)) == NULL) {
 			error = ESRCH;
 			break;
 		}
 
 		/* Exiting processes can't be debugged. */
 		if ((p->p_flag & P_WEXIT) != 0) {
 			error = ESRCH;
 			goto fail;
 		}
 
 		if ((error = p_candebug(td, p)) != 0)
 			goto fail;
 
 		/* System processes can't be debugged. */
 		if ((p->p_flag & P_SYSTEM) != 0) {
 			error = EINVAL;
 			goto fail;
 		}
 
 		/* not being traced... */
 		if ((p->p_flag & P_TRACED) == 0) {
 			error = EPERM;
 			goto fail;
 		}
 
 		/* not being traced by YOU */
 		if (p->p_pptr != td->td_proc) {
 			error = EBUSY;
 			goto fail;
 		}
 
 		/* not currently stopped */
 		if (!P_SHOULDSTOP(p) || (p->p_flag & P_WAITED) == 0) {
 			error = EBUSY;
 			goto fail;
 		}
 
 		if (req == PTRACE_GETFPXREGS) {
 			_PHOLD(p);	/* may block */
 			td2 = FIRST_THREAD_IN_PROC(p);
 			error = linux_proc_read_fpxregs(td2, &r.fpxreg);
 			_PRELE(p);
 			PROC_UNLOCK(p);
 			if (error == 0)
 				error = copyout(&r.fpxreg, (void *)uap->data,
 				    sizeof(r.fpxreg));
 		} else {
 			/* clear dangerous bits exactly as Linux does*/
 			r.fpxreg.mxcsr &= 0xffbf;
 			_PHOLD(p);	/* may block */
 			td2 = FIRST_THREAD_IN_PROC(p);
 			error = linux_proc_write_fpxregs(td2, &r.fpxreg);
 			_PRELE(p);
 			PROC_UNLOCK(p);
 		}
 		break;
 
 	fail:
 		PROC_UNLOCK(p);
 		break;
 	}
 	case PTRACE_PEEKUSR:
 	case PTRACE_POKEUSR: {
 		error = EIO;
 
 		/* check addr for alignment */
 		if (uap->addr < 0 || uap->addr & (sizeof(l_int) - 1))
 			break;
 		/*
 		 * Allow Linux programs to access register values in
 		 * user struct. We simulate this through PT_GET/SETREGS
 		 * as necessary.
 		 */
 		if (uap->addr < sizeof(struct linux_pt_reg)) {
 			error = kern_ptrace(td, PT_GETREGS, pid, &u.bsd_reg, 0);
 			if (error != 0)
 				break;
 
 			map_regs_to_linux(&u.bsd_reg, &r.reg);
 			if (req == PTRACE_PEEKUSR) {
 				error = copyout((char *)&r.reg + uap->addr,
 				    (void *)uap->data, sizeof(l_int));
 				break;
 			}
 
 			*(l_int *)((char *)&r.reg + uap->addr) =
 			    (l_int)uap->data;
 
 			map_regs_from_linux(&u.bsd_reg, &r.reg);
 			error = kern_ptrace(td, PT_SETREGS, pid, &u.bsd_reg, 0);
 		}
 
 		/*
 		 * Simulate debug registers access
 		 */
 		if (uap->addr >= LINUX_DBREG_OFFSET &&
 		    uap->addr <= LINUX_DBREG_OFFSET + LINUX_DBREG_SIZE) {
 			error = kern_ptrace(td, PT_GETDBREGS, pid, &u.bsd_dbreg,
 			    0);
 			if (error != 0)
 				break;
 
 			uap->addr -= LINUX_DBREG_OFFSET;
 			if (req == PTRACE_PEEKUSR) {
 				error = copyout((char *)&u.bsd_dbreg +
 				    uap->addr, (void *)uap->data,
 				    sizeof(l_int));
 				break;
 			}
 
 			*(l_int *)((char *)&u.bsd_dbreg + uap->addr) =
 			     uap->data;
 			error = kern_ptrace(td, PT_SETDBREGS, pid,
 			    &u.bsd_dbreg, 0);
 		}
 
 		break;
 	}
 	case LINUX_PTRACE_SYSCALL:
 		/* fall through */
 	default:
 		printf("linux: ptrace(%u, ...) not implemented\n",
 		    (unsigned int)uap->req);
 		error = EINVAL;
 		break;
 	}
 
 	return (error);
 }
diff --git a/sys/kern/imgact_elf.c b/sys/kern/imgact_elf.c
index ac0427a74818..5b888766daea 100644
--- a/sys/kern/imgact_elf.c
+++ b/sys/kern/imgact_elf.c
@@ -1,2699 +1,2700 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 2017 Dell EMC
  * Copyright (c) 2000-2001, 2003 David O'Brien
  * Copyright (c) 1995-1996 Søren Schmidt
  * Copyright (c) 1996 Peter Wemm
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer
  *    in this position and unchanged.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. The name of the author may not be used to endorse or promote products
  *    derived from this software without specific prior written permission
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_capsicum.h"
 
 #include <sys/param.h>
 #include <sys/capsicum.h>
 #include <sys/compressor.h>
 #include <sys/exec.h>
 #include <sys/fcntl.h>
 #include <sys/imgact.h>
 #include <sys/imgact_elf.h>
 #include <sys/jail.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mount.h>
 #include <sys/mman.h>
 #include <sys/namei.h>
 #include <sys/proc.h>
 #include <sys/procfs.h>
 #include <sys/ptrace.h>
 #include <sys/racct.h>
+#include <sys/reg.h>
 #include <sys/resourcevar.h>
 #include <sys/rwlock.h>
 #include <sys/sbuf.h>
 #include <sys/sf_buf.h>
 #include <sys/smp.h>
 #include <sys/systm.h>
 #include <sys/signalvar.h>
 #include <sys/stat.h>
 #include <sys/sx.h>
 #include <sys/syscall.h>
 #include <sys/sysctl.h>
 #include <sys/sysent.h>
 #include <sys/vnode.h>
 #include <sys/syslog.h>
 #include <sys/eventhandler.h>
 #include <sys/user.h>
 
 #include <vm/vm.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_param.h>
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 #include <vm/vm_object.h>
 #include <vm/vm_extern.h>
 
 #include <machine/elf.h>
 #include <machine/md_var.h>
 
 #define ELF_NOTE_ROUNDSIZE	4
 #define OLD_EI_BRAND	8
 
 static int __elfN(check_header)(const Elf_Ehdr *hdr);
 static Elf_Brandinfo *__elfN(get_brandinfo)(struct image_params *imgp,
     const char *interp, int32_t *osrel, uint32_t *fctl0);
 static int __elfN(load_file)(struct proc *p, const char *file, u_long *addr,
     u_long *entry);
 static int __elfN(load_section)(struct image_params *imgp, vm_ooffset_t offset,
     caddr_t vmaddr, size_t memsz, size_t filsz, vm_prot_t prot);
 static int __CONCAT(exec_, __elfN(imgact))(struct image_params *imgp);
 static bool __elfN(freebsd_trans_osrel)(const Elf_Note *note,
     int32_t *osrel);
 static bool kfreebsd_trans_osrel(const Elf_Note *note, int32_t *osrel);
 static boolean_t __elfN(check_note)(struct image_params *imgp,
     Elf_Brandnote *checknote, int32_t *osrel, boolean_t *has_fctl0,
     uint32_t *fctl0);
 static vm_prot_t __elfN(trans_prot)(Elf_Word);
 static Elf_Word __elfN(untrans_prot)(vm_prot_t);
 
 SYSCTL_NODE(_kern, OID_AUTO, __CONCAT(elf, __ELF_WORD_SIZE),
     CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "");
 
 int __elfN(fallback_brand) = -1;
 SYSCTL_INT(__CONCAT(_kern_elf, __ELF_WORD_SIZE), OID_AUTO,
     fallback_brand, CTLFLAG_RWTUN, &__elfN(fallback_brand), 0,
     __XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE)) " brand of last resort");
 
 static int elf_legacy_coredump = 0;
 SYSCTL_INT(_debug, OID_AUTO, __elfN(legacy_coredump), CTLFLAG_RW, 
     &elf_legacy_coredump, 0,
     "include all and only RW pages in core dumps");
 
 int __elfN(nxstack) =
 #if defined(__amd64__) || defined(__powerpc64__) /* both 64 and 32 bit */ || \
     (defined(__arm__) && __ARM_ARCH >= 7) || defined(__aarch64__) || \
     defined(__riscv)
 	1;
 #else
 	0;
 #endif
 SYSCTL_INT(__CONCAT(_kern_elf, __ELF_WORD_SIZE), OID_AUTO,
     nxstack, CTLFLAG_RW, &__elfN(nxstack), 0,
     __XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE)) ": enable non-executable stack");
 
 #if __ELF_WORD_SIZE == 32 && (defined(__amd64__) || defined(__i386__))
 int i386_read_exec = 0;
 SYSCTL_INT(_kern_elf32, OID_AUTO, read_exec, CTLFLAG_RW, &i386_read_exec, 0,
     "enable execution from readable segments");
 #endif
 
 static u_long __elfN(pie_base) = ET_DYN_LOAD_ADDR;
 static int
 sysctl_pie_base(SYSCTL_HANDLER_ARGS)
 {
 	u_long val;
 	int error;
 
 	val = __elfN(pie_base);
 	error = sysctl_handle_long(oidp, &val, 0, req);
 	if (error != 0 || req->newptr == NULL)
 		return (error);
 	if ((val & PAGE_MASK) != 0)
 		return (EINVAL);
 	__elfN(pie_base) = val;
 	return (0);
 }
 SYSCTL_PROC(__CONCAT(_kern_elf, __ELF_WORD_SIZE), OID_AUTO, pie_base,
     CTLTYPE_ULONG | CTLFLAG_MPSAFE | CTLFLAG_RW, NULL, 0,
     sysctl_pie_base, "LU",
     "PIE load base without randomization");
 
 SYSCTL_NODE(__CONCAT(_kern_elf, __ELF_WORD_SIZE), OID_AUTO, aslr,
     CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "");
 #define	ASLR_NODE_OID	__CONCAT(__CONCAT(_kern_elf, __ELF_WORD_SIZE), _aslr)
 
 static int __elfN(aslr_enabled) = 0;
 SYSCTL_INT(ASLR_NODE_OID, OID_AUTO, enable, CTLFLAG_RWTUN,
     &__elfN(aslr_enabled), 0,
     __XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE))
     ": enable address map randomization");
 
 static int __elfN(pie_aslr_enabled) = 0;
 SYSCTL_INT(ASLR_NODE_OID, OID_AUTO, pie_enable, CTLFLAG_RWTUN,
     &__elfN(pie_aslr_enabled), 0,
     __XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE))
     ": enable address map randomization for PIE binaries");
 
 static int __elfN(aslr_honor_sbrk) = 1;
 SYSCTL_INT(ASLR_NODE_OID, OID_AUTO, honor_sbrk, CTLFLAG_RW,
     &__elfN(aslr_honor_sbrk), 0,
     __XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE)) ": assume sbrk is used");
 
 static int __elfN(aslr_stack_gap) = 3;
 SYSCTL_INT(ASLR_NODE_OID, OID_AUTO, stack_gap, CTLFLAG_RW,
     &__elfN(aslr_stack_gap), 0,
     __XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE))
     ": maximum percentage of main stack to waste on a random gap");
 
 static int __elfN(sigfastblock) = 1;
 SYSCTL_INT(__CONCAT(_kern_elf, __ELF_WORD_SIZE), OID_AUTO, sigfastblock,
     CTLFLAG_RWTUN, &__elfN(sigfastblock), 0,
     "enable sigfastblock for new processes");
 
 static bool __elfN(allow_wx) = true;
 SYSCTL_BOOL(__CONCAT(_kern_elf, __ELF_WORD_SIZE), OID_AUTO, allow_wx,
     CTLFLAG_RWTUN, &__elfN(allow_wx), 0,
     "Allow pages to be mapped simultaneously writable and executable");
 
 static Elf_Brandinfo *elf_brand_list[MAX_BRANDS];
 
 #define	aligned(a, t)	(rounddown2((u_long)(a), sizeof(t)) == (u_long)(a))
 
 Elf_Brandnote __elfN(freebsd_brandnote) = {
 	.hdr.n_namesz	= sizeof(FREEBSD_ABI_VENDOR),
 	.hdr.n_descsz	= sizeof(int32_t),
 	.hdr.n_type	= NT_FREEBSD_ABI_TAG,
 	.vendor		= FREEBSD_ABI_VENDOR,
 	.flags		= BN_TRANSLATE_OSREL,
 	.trans_osrel	= __elfN(freebsd_trans_osrel)
 };
 
 static bool
 __elfN(freebsd_trans_osrel)(const Elf_Note *note, int32_t *osrel)
 {
 	uintptr_t p;
 
 	p = (uintptr_t)(note + 1);
 	p += roundup2(note->n_namesz, ELF_NOTE_ROUNDSIZE);
 	*osrel = *(const int32_t *)(p);
 
 	return (true);
 }
 
 static const char GNU_ABI_VENDOR[] = "GNU";
 static int GNU_KFREEBSD_ABI_DESC = 3;
 
 Elf_Brandnote __elfN(kfreebsd_brandnote) = {
 	.hdr.n_namesz	= sizeof(GNU_ABI_VENDOR),
 	.hdr.n_descsz	= 16,	/* XXX at least 16 */
 	.hdr.n_type	= 1,
 	.vendor		= GNU_ABI_VENDOR,
 	.flags		= BN_TRANSLATE_OSREL,
 	.trans_osrel	= kfreebsd_trans_osrel
 };
 
 static bool
 kfreebsd_trans_osrel(const Elf_Note *note, int32_t *osrel)
 {
 	const Elf32_Word *desc;
 	uintptr_t p;
 
 	p = (uintptr_t)(note + 1);
 	p += roundup2(note->n_namesz, ELF_NOTE_ROUNDSIZE);
 
 	desc = (const Elf32_Word *)p;
 	if (desc[0] != GNU_KFREEBSD_ABI_DESC)
 		return (false);
 
 	/*
 	 * Debian GNU/kFreeBSD embed the earliest compatible kernel version
 	 * (__FreeBSD_version: <major><two digit minor>Rxx) in the LSB way.
 	 */
 	*osrel = desc[1] * 100000 + desc[2] * 1000 + desc[3];
 
 	return (true);
 }
 
 int
 __elfN(insert_brand_entry)(Elf_Brandinfo *entry)
 {
 	int i;
 
 	for (i = 0; i < MAX_BRANDS; i++) {
 		if (elf_brand_list[i] == NULL) {
 			elf_brand_list[i] = entry;
 			break;
 		}
 	}
 	if (i == MAX_BRANDS) {
 		printf("WARNING: %s: could not insert brandinfo entry: %p\n",
 			__func__, entry);
 		return (-1);
 	}
 	return (0);
 }
 
 int
 __elfN(remove_brand_entry)(Elf_Brandinfo *entry)
 {
 	int i;
 
 	for (i = 0; i < MAX_BRANDS; i++) {
 		if (elf_brand_list[i] == entry) {
 			elf_brand_list[i] = NULL;
 			break;
 		}
 	}
 	if (i == MAX_BRANDS)
 		return (-1);
 	return (0);
 }
 
 int
 __elfN(brand_inuse)(Elf_Brandinfo *entry)
 {
 	struct proc *p;
 	int rval = FALSE;
 
 	sx_slock(&allproc_lock);
 	FOREACH_PROC_IN_SYSTEM(p) {
 		if (p->p_sysent == entry->sysvec) {
 			rval = TRUE;
 			break;
 		}
 	}
 	sx_sunlock(&allproc_lock);
 
 	return (rval);
 }
 
 static Elf_Brandinfo *
 __elfN(get_brandinfo)(struct image_params *imgp, const char *interp,
     int32_t *osrel, uint32_t *fctl0)
 {
 	const Elf_Ehdr *hdr = (const Elf_Ehdr *)imgp->image_header;
 	Elf_Brandinfo *bi, *bi_m;
 	boolean_t ret, has_fctl0;
 	int i, interp_name_len;
 
 	interp_name_len = interp != NULL ? strlen(interp) + 1 : 0;
 
 	/*
 	 * We support four types of branding -- (1) the ELF EI_OSABI field
 	 * that SCO added to the ELF spec, (2) FreeBSD 3.x's traditional string
 	 * branding w/in the ELF header, (3) path of the `interp_path'
 	 * field, and (4) the ".note.ABI-tag" ELF section.
 	 */
 
 	/* Look for an ".note.ABI-tag" ELF section */
 	bi_m = NULL;
 	for (i = 0; i < MAX_BRANDS; i++) {
 		bi = elf_brand_list[i];
 		if (bi == NULL)
 			continue;
 		if (interp != NULL && (bi->flags & BI_BRAND_ONLY_STATIC) != 0)
 			continue;
 		if (hdr->e_machine == bi->machine && (bi->flags &
 		    (BI_BRAND_NOTE|BI_BRAND_NOTE_MANDATORY)) != 0) {
 			has_fctl0 = false;
 			*fctl0 = 0;
 			*osrel = 0;
 			ret = __elfN(check_note)(imgp, bi->brand_note, osrel,
 			    &has_fctl0, fctl0);
 			/* Give brand a chance to veto check_note's guess */
 			if (ret && bi->header_supported) {
 				ret = bi->header_supported(imgp, osrel,
 				    has_fctl0 ? fctl0 : NULL);
 			}
 			/*
 			 * If note checker claimed the binary, but the
 			 * interpreter path in the image does not
 			 * match default one for the brand, try to
 			 * search for other brands with the same
 			 * interpreter.  Either there is better brand
 			 * with the right interpreter, or, failing
 			 * this, we return first brand which accepted
 			 * our note and, optionally, header.
 			 */
 			if (ret && bi_m == NULL && interp != NULL &&
 			    (bi->interp_path == NULL ||
 			    (strlen(bi->interp_path) + 1 != interp_name_len ||
 			    strncmp(interp, bi->interp_path, interp_name_len)
 			    != 0))) {
 				bi_m = bi;
 				ret = 0;
 			}
 			if (ret)
 				return (bi);
 		}
 	}
 	if (bi_m != NULL)
 		return (bi_m);
 
 	/* If the executable has a brand, search for it in the brand list. */
 	for (i = 0; i < MAX_BRANDS; i++) {
 		bi = elf_brand_list[i];
 		if (bi == NULL || (bi->flags & BI_BRAND_NOTE_MANDATORY) != 0 ||
 		    (interp != NULL && (bi->flags & BI_BRAND_ONLY_STATIC) != 0))
 			continue;
 		if (hdr->e_machine == bi->machine &&
 		    (hdr->e_ident[EI_OSABI] == bi->brand ||
 		    (bi->compat_3_brand != NULL &&
 		    strcmp((const char *)&hdr->e_ident[OLD_EI_BRAND],
 		    bi->compat_3_brand) == 0))) {
 			/* Looks good, but give brand a chance to veto */
 			if (bi->header_supported == NULL ||
 			    bi->header_supported(imgp, NULL, NULL)) {
 				/*
 				 * Again, prefer strictly matching
 				 * interpreter path.
 				 */
 				if (interp_name_len == 0 &&
 				    bi->interp_path == NULL)
 					return (bi);
 				if (bi->interp_path != NULL &&
 				    strlen(bi->interp_path) + 1 ==
 				    interp_name_len && strncmp(interp,
 				    bi->interp_path, interp_name_len) == 0)
 					return (bi);
 				if (bi_m == NULL)
 					bi_m = bi;
 			}
 		}
 	}
 	if (bi_m != NULL)
 		return (bi_m);
 
 	/* No known brand, see if the header is recognized by any brand */
 	for (i = 0; i < MAX_BRANDS; i++) {
 		bi = elf_brand_list[i];
 		if (bi == NULL || bi->flags & BI_BRAND_NOTE_MANDATORY ||
 		    bi->header_supported == NULL)
 			continue;
 		if (hdr->e_machine == bi->machine) {
 			ret = bi->header_supported(imgp, NULL, NULL);
 			if (ret)
 				return (bi);
 		}
 	}
 
 	/* Lacking a known brand, search for a recognized interpreter. */
 	if (interp != NULL) {
 		for (i = 0; i < MAX_BRANDS; i++) {
 			bi = elf_brand_list[i];
 			if (bi == NULL || (bi->flags &
 			    (BI_BRAND_NOTE_MANDATORY | BI_BRAND_ONLY_STATIC))
 			    != 0)
 				continue;
 			if (hdr->e_machine == bi->machine &&
 			    bi->interp_path != NULL &&
 			    /* ELF image p_filesz includes terminating zero */
 			    strlen(bi->interp_path) + 1 == interp_name_len &&
 			    strncmp(interp, bi->interp_path, interp_name_len)
 			    == 0 && (bi->header_supported == NULL ||
 			    bi->header_supported(imgp, NULL, NULL)))
 				return (bi);
 		}
 	}
 
 	/* Lacking a recognized interpreter, try the default brand */
 	for (i = 0; i < MAX_BRANDS; i++) {
 		bi = elf_brand_list[i];
 		if (bi == NULL || (bi->flags & BI_BRAND_NOTE_MANDATORY) != 0 ||
 		    (interp != NULL && (bi->flags & BI_BRAND_ONLY_STATIC) != 0))
 			continue;
 		if (hdr->e_machine == bi->machine &&
 		    __elfN(fallback_brand) == bi->brand &&
 		    (bi->header_supported == NULL ||
 		    bi->header_supported(imgp, NULL, NULL)))
 			return (bi);
 	}
 	return (NULL);
 }
 
 static bool
 __elfN(phdr_in_zero_page)(const Elf_Ehdr *hdr)
 {
 	return (hdr->e_phoff <= PAGE_SIZE &&
 	    (u_int)hdr->e_phentsize * hdr->e_phnum <= PAGE_SIZE - hdr->e_phoff);
 }
 
 static int
 __elfN(check_header)(const Elf_Ehdr *hdr)
 {
 	Elf_Brandinfo *bi;
 	int i;
 
 	if (!IS_ELF(*hdr) ||
 	    hdr->e_ident[EI_CLASS] != ELF_TARG_CLASS ||
 	    hdr->e_ident[EI_DATA] != ELF_TARG_DATA ||
 	    hdr->e_ident[EI_VERSION] != EV_CURRENT ||
 	    hdr->e_phentsize != sizeof(Elf_Phdr) ||
 	    hdr->e_version != ELF_TARG_VER)
 		return (ENOEXEC);
 
 	/*
 	 * Make sure we have at least one brand for this machine.
 	 */
 
 	for (i = 0; i < MAX_BRANDS; i++) {
 		bi = elf_brand_list[i];
 		if (bi != NULL && bi->machine == hdr->e_machine)
 			break;
 	}
 	if (i == MAX_BRANDS)
 		return (ENOEXEC);
 
 	return (0);
 }
 
 static int
 __elfN(map_partial)(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
     vm_offset_t start, vm_offset_t end, vm_prot_t prot)
 {
 	struct sf_buf *sf;
 	int error;
 	vm_offset_t off;
 
 	/*
 	 * Create the page if it doesn't exist yet. Ignore errors.
 	 */
 	vm_map_fixed(map, NULL, 0, trunc_page(start), round_page(end) -
 	    trunc_page(start), VM_PROT_ALL, VM_PROT_ALL, MAP_CHECK_EXCL);
 
 	/*
 	 * Find the page from the underlying object.
 	 */
 	if (object != NULL) {
 		sf = vm_imgact_map_page(object, offset);
 		if (sf == NULL)
 			return (KERN_FAILURE);
 		off = offset - trunc_page(offset);
 		error = copyout((caddr_t)sf_buf_kva(sf) + off, (caddr_t)start,
 		    end - start);
 		vm_imgact_unmap_page(sf);
 		if (error != 0)
 			return (KERN_FAILURE);
 	}
 
 	return (KERN_SUCCESS);
 }
 
 static int
 __elfN(map_insert)(struct image_params *imgp, vm_map_t map, vm_object_t object,
     vm_ooffset_t offset, vm_offset_t start, vm_offset_t end, vm_prot_t prot,
     int cow)
 {
 	struct sf_buf *sf;
 	vm_offset_t off;
 	vm_size_t sz;
 	int error, locked, rv;
 
 	if (start != trunc_page(start)) {
 		rv = __elfN(map_partial)(map, object, offset, start,
 		    round_page(start), prot);
 		if (rv != KERN_SUCCESS)
 			return (rv);
 		offset += round_page(start) - start;
 		start = round_page(start);
 	}
 	if (end != round_page(end)) {
 		rv = __elfN(map_partial)(map, object, offset +
 		    trunc_page(end) - start, trunc_page(end), end, prot);
 		if (rv != KERN_SUCCESS)
 			return (rv);
 		end = trunc_page(end);
 	}
 	if (start >= end)
 		return (KERN_SUCCESS);
 	if ((offset & PAGE_MASK) != 0) {
 		/*
 		 * The mapping is not page aligned.  This means that we have
 		 * to copy the data.
 		 */
 		rv = vm_map_fixed(map, NULL, 0, start, end - start,
 		    prot | VM_PROT_WRITE, VM_PROT_ALL, MAP_CHECK_EXCL);
 		if (rv != KERN_SUCCESS)
 			return (rv);
 		if (object == NULL)
 			return (KERN_SUCCESS);
 		for (; start < end; start += sz) {
 			sf = vm_imgact_map_page(object, offset);
 			if (sf == NULL)
 				return (KERN_FAILURE);
 			off = offset - trunc_page(offset);
 			sz = end - start;
 			if (sz > PAGE_SIZE - off)
 				sz = PAGE_SIZE - off;
 			error = copyout((caddr_t)sf_buf_kva(sf) + off,
 			    (caddr_t)start, sz);
 			vm_imgact_unmap_page(sf);
 			if (error != 0)
 				return (KERN_FAILURE);
 			offset += sz;
 		}
 	} else {
 		vm_object_reference(object);
 		rv = vm_map_fixed(map, object, offset, start, end - start,
 		    prot, VM_PROT_ALL, cow | MAP_CHECK_EXCL |
 		    (object != NULL ? MAP_VN_EXEC : 0));
 		if (rv != KERN_SUCCESS) {
 			locked = VOP_ISLOCKED(imgp->vp);
 			VOP_UNLOCK(imgp->vp);
 			vm_object_deallocate(object);
 			vn_lock(imgp->vp, locked | LK_RETRY);
 			return (rv);
 		} else if (object != NULL) {
 			MPASS(imgp->vp->v_object == object);
 			VOP_SET_TEXT_CHECKED(imgp->vp);
 		}
 	}
 	return (KERN_SUCCESS);
 }
 
 static int
 __elfN(load_section)(struct image_params *imgp, vm_ooffset_t offset,
     caddr_t vmaddr, size_t memsz, size_t filsz, vm_prot_t prot)
 {
 	struct sf_buf *sf;
 	size_t map_len;
 	vm_map_t map;
 	vm_object_t object;
 	vm_offset_t map_addr;
 	int error, rv, cow;
 	size_t copy_len;
 	vm_ooffset_t file_addr;
 
 	/*
 	 * It's necessary to fail if the filsz + offset taken from the
 	 * header is greater than the actual file pager object's size.
 	 * If we were to allow this, then the vm_map_find() below would
 	 * walk right off the end of the file object and into the ether.
 	 *
 	 * While I'm here, might as well check for something else that
 	 * is invalid: filsz cannot be greater than memsz.
 	 */
 	if ((filsz != 0 && (off_t)filsz + offset > imgp->attr->va_size) ||
 	    filsz > memsz) {
 		uprintf("elf_load_section: truncated ELF file\n");
 		return (ENOEXEC);
 	}
 
 	object = imgp->object;
 	map = &imgp->proc->p_vmspace->vm_map;
 	map_addr = trunc_page((vm_offset_t)vmaddr);
 	file_addr = trunc_page(offset);
 
 	/*
 	 * We have two choices.  We can either clear the data in the last page
 	 * of an oversized mapping, or we can start the anon mapping a page
 	 * early and copy the initialized data into that first page.  We
 	 * choose the second.
 	 */
 	if (filsz == 0)
 		map_len = 0;
 	else if (memsz > filsz)
 		map_len = trunc_page(offset + filsz) - file_addr;
 	else
 		map_len = round_page(offset + filsz) - file_addr;
 
 	if (map_len != 0) {
 		/* cow flags: don't dump readonly sections in core */
 		cow = MAP_COPY_ON_WRITE | MAP_PREFAULT |
 		    (prot & VM_PROT_WRITE ? 0 : MAP_DISABLE_COREDUMP);
 
 		rv = __elfN(map_insert)(imgp, map, object, file_addr,
 		    map_addr, map_addr + map_len, prot, cow);
 		if (rv != KERN_SUCCESS)
 			return (EINVAL);
 
 		/* we can stop now if we've covered it all */
 		if (memsz == filsz)
 			return (0);
 	}
 
 	/*
 	 * We have to get the remaining bit of the file into the first part
 	 * of the oversized map segment.  This is normally because the .data
 	 * segment in the file is extended to provide bss.  It's a neat idea
 	 * to try and save a page, but it's a pain in the behind to implement.
 	 */
 	copy_len = filsz == 0 ? 0 : (offset + filsz) - trunc_page(offset +
 	    filsz);
 	map_addr = trunc_page((vm_offset_t)vmaddr + filsz);
 	map_len = round_page((vm_offset_t)vmaddr + memsz) - map_addr;
 
 	/* This had damn well better be true! */
 	if (map_len != 0) {
 		rv = __elfN(map_insert)(imgp, map, NULL, 0, map_addr,
 		    map_addr + map_len, prot, 0);
 		if (rv != KERN_SUCCESS)
 			return (EINVAL);
 	}
 
 	if (copy_len != 0) {
 		sf = vm_imgact_map_page(object, offset + filsz);
 		if (sf == NULL)
 			return (EIO);
 
 		/* send the page fragment to user space */
 		error = copyout((caddr_t)sf_buf_kva(sf), (caddr_t)map_addr,
 		    copy_len);
 		vm_imgact_unmap_page(sf);
 		if (error != 0)
 			return (error);
 	}
 
 	/*
 	 * Remove write access to the page if it was only granted by map_insert
 	 * to allow copyout.
 	 */
 	if ((prot & VM_PROT_WRITE) == 0)
 		vm_map_protect(map, trunc_page(map_addr), round_page(map_addr +
 		    map_len), prot, 0, VM_MAP_PROTECT_SET_PROT);
 
 	return (0);
 }
 
 static int
 __elfN(load_sections)(struct image_params *imgp, const Elf_Ehdr *hdr,
     const Elf_Phdr *phdr, u_long rbase, u_long *base_addrp)
 {
 	vm_prot_t prot;
 	u_long base_addr;
 	bool first;
 	int error, i;
 
 	ASSERT_VOP_LOCKED(imgp->vp, __func__);
 
 	base_addr = 0;
 	first = true;
 
 	for (i = 0; i < hdr->e_phnum; i++) {
 		if (phdr[i].p_type != PT_LOAD || phdr[i].p_memsz == 0)
 			continue;
 
 		/* Loadable segment */
 		prot = __elfN(trans_prot)(phdr[i].p_flags);
 		error = __elfN(load_section)(imgp, phdr[i].p_offset,
 		    (caddr_t)(uintptr_t)phdr[i].p_vaddr + rbase,
 		    phdr[i].p_memsz, phdr[i].p_filesz, prot);
 		if (error != 0)
 			return (error);
 
 		/*
 		 * Establish the base address if this is the first segment.
 		 */
 		if (first) {
   			base_addr = trunc_page(phdr[i].p_vaddr + rbase);
 			first = false;
 		}
 	}
 
 	if (base_addrp != NULL)
 		*base_addrp = base_addr;
 
 	return (0);
 }
 
 /*
  * Load the file "file" into memory.  It may be either a shared object
  * or an executable.
  *
  * The "addr" reference parameter is in/out.  On entry, it specifies
  * the address where a shared object should be loaded.  If the file is
  * an executable, this value is ignored.  On exit, "addr" specifies
  * where the file was actually loaded.
  *
  * The "entry" reference parameter is out only.  On exit, it specifies
  * the entry point for the loaded file.
  */
 static int
 __elfN(load_file)(struct proc *p, const char *file, u_long *addr,
 	u_long *entry)
 {
 	struct {
 		struct nameidata nd;
 		struct vattr attr;
 		struct image_params image_params;
 	} *tempdata;
 	const Elf_Ehdr *hdr = NULL;
 	const Elf_Phdr *phdr = NULL;
 	struct nameidata *nd;
 	struct vattr *attr;
 	struct image_params *imgp;
 	u_long rbase;
 	u_long base_addr = 0;
 	int error;
 
 #ifdef CAPABILITY_MODE
 	/*
 	 * XXXJA: This check can go away once we are sufficiently confident
 	 * that the checks in namei() are correct.
 	 */
 	if (IN_CAPABILITY_MODE(curthread))
 		return (ECAPMODE);
 #endif
 
 	tempdata = malloc(sizeof(*tempdata), M_TEMP, M_WAITOK | M_ZERO);
 	nd = &tempdata->nd;
 	attr = &tempdata->attr;
 	imgp = &tempdata->image_params;
 
 	/*
 	 * Initialize part of the common data
 	 */
 	imgp->proc = p;
 	imgp->attr = attr;
 
 	NDINIT(nd, LOOKUP, ISOPEN | FOLLOW | LOCKSHARED | LOCKLEAF,
 	    UIO_SYSSPACE, file, curthread);
 	if ((error = namei(nd)) != 0) {
 		nd->ni_vp = NULL;
 		goto fail;
 	}
 	NDFREE(nd, NDF_ONLY_PNBUF);
 	imgp->vp = nd->ni_vp;
 
 	/*
 	 * Check permissions, modes, uid, etc on the file, and "open" it.
 	 */
 	error = exec_check_permissions(imgp);
 	if (error)
 		goto fail;
 
 	error = exec_map_first_page(imgp);
 	if (error)
 		goto fail;
 
 	imgp->object = nd->ni_vp->v_object;
 
 	hdr = (const Elf_Ehdr *)imgp->image_header;
 	if ((error = __elfN(check_header)(hdr)) != 0)
 		goto fail;
 	if (hdr->e_type == ET_DYN)
 		rbase = *addr;
 	else if (hdr->e_type == ET_EXEC)
 		rbase = 0;
 	else {
 		error = ENOEXEC;
 		goto fail;
 	}
 
 	/* Only support headers that fit within first page for now      */
 	if (!__elfN(phdr_in_zero_page)(hdr)) {
 		error = ENOEXEC;
 		goto fail;
 	}
 
 	phdr = (const Elf_Phdr *)(imgp->image_header + hdr->e_phoff);
 	if (!aligned(phdr, Elf_Addr)) {
 		error = ENOEXEC;
 		goto fail;
 	}
 
 	error = __elfN(load_sections)(imgp, hdr, phdr, rbase, &base_addr);
 	if (error != 0)
 		goto fail;
 
 	*addr = base_addr;
 	*entry = (unsigned long)hdr->e_entry + rbase;
 
 fail:
 	if (imgp->firstpage)
 		exec_unmap_first_page(imgp);
 
 	if (nd->ni_vp) {
 		if (imgp->textset)
 			VOP_UNSET_TEXT_CHECKED(nd->ni_vp);
 		vput(nd->ni_vp);
 	}
 	free(tempdata, M_TEMP);
 
 	return (error);
 }
 
 static u_long
 __CONCAT(rnd_, __elfN(base))(vm_map_t map __unused, u_long minv, u_long maxv,
     u_int align)
 {
 	u_long rbase, res;
 
 	MPASS(vm_map_min(map) <= minv);
 	MPASS(maxv <= vm_map_max(map));
 	MPASS(minv < maxv);
 	MPASS(minv + align < maxv);
 	arc4rand(&rbase, sizeof(rbase), 0);
 	res = roundup(minv, (u_long)align) + rbase % (maxv - minv);
 	res &= ~((u_long)align - 1);
 	if (res >= maxv)
 		res -= align;
 	KASSERT(res >= minv,
 	    ("res %#lx < minv %#lx, maxv %#lx rbase %#lx",
 	    res, minv, maxv, rbase));
 	KASSERT(res < maxv,
 	    ("res %#lx > maxv %#lx, minv %#lx rbase %#lx",
 	    res, maxv, minv, rbase));
 	return (res);
 }
 
 static int
 __elfN(enforce_limits)(struct image_params *imgp, const Elf_Ehdr *hdr,
     const Elf_Phdr *phdr, u_long et_dyn_addr)
 {
 	struct vmspace *vmspace;
 	const char *err_str;
 	u_long text_size, data_size, total_size, text_addr, data_addr;
 	u_long seg_size, seg_addr;
 	int i;
 
 	err_str = NULL;
 	text_size = data_size = total_size = text_addr = data_addr = 0;
 
 	for (i = 0; i < hdr->e_phnum; i++) {
 		if (phdr[i].p_type != PT_LOAD || phdr[i].p_memsz == 0)
 			continue;
 
 		seg_addr = trunc_page(phdr[i].p_vaddr + et_dyn_addr);
 		seg_size = round_page(phdr[i].p_memsz +
 		    phdr[i].p_vaddr + et_dyn_addr - seg_addr);
 
 		/*
 		 * Make the largest executable segment the official
 		 * text segment and all others data.
 		 *
 		 * Note that obreak() assumes that data_addr + data_size == end
 		 * of data load area, and the ELF file format expects segments
 		 * to be sorted by address.  If multiple data segments exist,
 		 * the last one will be used.
 		 */
 
 		if ((phdr[i].p_flags & PF_X) != 0 && text_size < seg_size) {
 			text_size = seg_size;
 			text_addr = seg_addr;
 		} else {
 			data_size = seg_size;
 			data_addr = seg_addr;
 		}
 		total_size += seg_size;
 	}
 
 	if (data_addr == 0 && data_size == 0) {
 		data_addr = text_addr;
 		data_size = text_size;
 	}
 
 	/*
 	 * Check limits.  It should be safe to check the
 	 * limits after loading the segments since we do
 	 * not actually fault in all the segments pages.
 	 */
 	PROC_LOCK(imgp->proc);
 	if (data_size > lim_cur_proc(imgp->proc, RLIMIT_DATA))
 		err_str = "Data segment size exceeds process limit";
 	else if (text_size > maxtsiz)
 		err_str = "Text segment size exceeds system limit";
 	else if (total_size > lim_cur_proc(imgp->proc, RLIMIT_VMEM))
 		err_str = "Total segment size exceeds process limit";
 	else if (racct_set(imgp->proc, RACCT_DATA, data_size) != 0)
 		err_str = "Data segment size exceeds resource limit";
 	else if (racct_set(imgp->proc, RACCT_VMEM, total_size) != 0)
 		err_str = "Total segment size exceeds resource limit";
 	PROC_UNLOCK(imgp->proc);
 	if (err_str != NULL) {
 		uprintf("%s\n", err_str);
 		return (ENOMEM);
 	}
 
 	vmspace = imgp->proc->p_vmspace;
 	vmspace->vm_tsize = text_size >> PAGE_SHIFT;
 	vmspace->vm_taddr = (caddr_t)(uintptr_t)text_addr;
 	vmspace->vm_dsize = data_size >> PAGE_SHIFT;
 	vmspace->vm_daddr = (caddr_t)(uintptr_t)data_addr;
 
 	return (0);
 }
 
 static int
 __elfN(get_interp)(struct image_params *imgp, const Elf_Phdr *phdr,
     char **interpp, bool *free_interpp)
 {
 	struct thread *td;
 	char *interp;
 	int error, interp_name_len;
 
 	KASSERT(phdr->p_type == PT_INTERP,
 	    ("%s: p_type %u != PT_INTERP", __func__, phdr->p_type));
 	ASSERT_VOP_LOCKED(imgp->vp, __func__);
 
 	td = curthread;
 
 	/* Path to interpreter */
 	if (phdr->p_filesz < 2 || phdr->p_filesz > MAXPATHLEN) {
 		uprintf("Invalid PT_INTERP\n");
 		return (ENOEXEC);
 	}
 
 	interp_name_len = phdr->p_filesz;
 	if (phdr->p_offset > PAGE_SIZE ||
 	    interp_name_len > PAGE_SIZE - phdr->p_offset) {
 		/*
 		 * The vnode lock might be needed by the pagedaemon to
 		 * clean pages owned by the vnode.  Do not allow sleep
 		 * waiting for memory with the vnode locked, instead
 		 * try non-sleepable allocation first, and if it
 		 * fails, go to the slow path were we drop the lock
 		 * and do M_WAITOK.  A text reference prevents
 		 * modifications to the vnode content.
 		 */
 		interp = malloc(interp_name_len + 1, M_TEMP, M_NOWAIT);
 		if (interp == NULL) {
 			VOP_UNLOCK(imgp->vp);
 			interp = malloc(interp_name_len + 1, M_TEMP, M_WAITOK);
 			vn_lock(imgp->vp, LK_SHARED | LK_RETRY);
 		}
 
 		error = vn_rdwr(UIO_READ, imgp->vp, interp,
 		    interp_name_len, phdr->p_offset,
 		    UIO_SYSSPACE, IO_NODELOCKED, td->td_ucred,
 		    NOCRED, NULL, td);
 		if (error != 0) {
 			free(interp, M_TEMP);
 			uprintf("i/o error PT_INTERP %d\n", error);
 			return (error);
 		}
 		interp[interp_name_len] = '\0';
 
 		*interpp = interp;
 		*free_interpp = true;
 		return (0);
 	}
 
 	interp = __DECONST(char *, imgp->image_header) + phdr->p_offset;
 	if (interp[interp_name_len - 1] != '\0') {
 		uprintf("Invalid PT_INTERP\n");
 		return (ENOEXEC);
 	}
 
 	*interpp = interp;
 	*free_interpp = false;
 	return (0);
 }
 
 static int
 __elfN(load_interp)(struct image_params *imgp, const Elf_Brandinfo *brand_info,
     const char *interp, u_long *addr, u_long *entry)
 {
 	char *path;
 	int error;
 
 	if (brand_info->emul_path != NULL &&
 	    brand_info->emul_path[0] != '\0') {
 		path = malloc(MAXPATHLEN, M_TEMP, M_WAITOK);
 		snprintf(path, MAXPATHLEN, "%s%s",
 		    brand_info->emul_path, interp);
 		error = __elfN(load_file)(imgp->proc, path, addr, entry);
 		free(path, M_TEMP);
 		if (error == 0)
 			return (0);
 	}
 
 	if (brand_info->interp_newpath != NULL &&
 	    (brand_info->interp_path == NULL ||
 	    strcmp(interp, brand_info->interp_path) == 0)) {
 		error = __elfN(load_file)(imgp->proc,
 		    brand_info->interp_newpath, addr, entry);
 		if (error == 0)
 			return (0);
 	}
 
 	error = __elfN(load_file)(imgp->proc, interp, addr, entry);
 	if (error == 0)
 		return (0);
 
 	uprintf("ELF interpreter %s not found, error %d\n", interp, error);
 	return (error);
 }
 
 /*
  * Impossible et_dyn_addr initial value indicating that the real base
  * must be calculated later with some randomization applied.
  */
 #define	ET_DYN_ADDR_RAND	1
 
 static int
 __CONCAT(exec_, __elfN(imgact))(struct image_params *imgp)
 {
 	struct thread *td;
 	const Elf_Ehdr *hdr;
 	const Elf_Phdr *phdr;
 	Elf_Auxargs *elf_auxargs;
 	struct vmspace *vmspace;
 	vm_map_t map;
 	char *interp;
 	Elf_Brandinfo *brand_info;
 	struct sysentvec *sv;
 	u_long addr, baddr, et_dyn_addr, entry, proghdr;
 	u_long maxalign, mapsz, maxv, maxv1;
 	uint32_t fctl0;
 	int32_t osrel;
 	bool free_interp;
 	int error, i, n;
 
 	hdr = (const Elf_Ehdr *)imgp->image_header;
 
 	/*
 	 * Do we have a valid ELF header ?
 	 *
 	 * Only allow ET_EXEC & ET_DYN here, reject ET_DYN later
 	 * if particular brand doesn't support it.
 	 */
 	if (__elfN(check_header)(hdr) != 0 ||
 	    (hdr->e_type != ET_EXEC && hdr->e_type != ET_DYN))
 		return (-1);
 
 	/*
 	 * From here on down, we return an errno, not -1, as we've
 	 * detected an ELF file.
 	 */
 
 	if (!__elfN(phdr_in_zero_page)(hdr)) {
 		uprintf("Program headers not in the first page\n");
 		return (ENOEXEC);
 	}
 	phdr = (const Elf_Phdr *)(imgp->image_header + hdr->e_phoff); 
 	if (!aligned(phdr, Elf_Addr)) {
 		uprintf("Unaligned program headers\n");
 		return (ENOEXEC);
 	}
 
 	n = error = 0;
 	baddr = 0;
 	osrel = 0;
 	fctl0 = 0;
 	entry = proghdr = 0;
 	interp = NULL;
 	free_interp = false;
 	td = curthread;
 	maxalign = PAGE_SIZE;
 	mapsz = 0;
 
 	for (i = 0; i < hdr->e_phnum; i++) {
 		switch (phdr[i].p_type) {
 		case PT_LOAD:
 			if (n == 0)
 				baddr = phdr[i].p_vaddr;
 			if (phdr[i].p_align > maxalign)
 				maxalign = phdr[i].p_align;
 			mapsz += phdr[i].p_memsz;
 			n++;
 
 			/*
 			 * If this segment contains the program headers,
 			 * remember their virtual address for the AT_PHDR
 			 * aux entry. Static binaries don't usually include
 			 * a PT_PHDR entry.
 			 */
 			if (phdr[i].p_offset == 0 &&
 			    hdr->e_phoff + hdr->e_phnum * hdr->e_phentsize
 				<= phdr[i].p_filesz)
 				proghdr = phdr[i].p_vaddr + hdr->e_phoff;
 			break;
 		case PT_INTERP:
 			/* Path to interpreter */
 			if (interp != NULL) {
 				uprintf("Multiple PT_INTERP headers\n");
 				error = ENOEXEC;
 				goto ret;
 			}
 			error = __elfN(get_interp)(imgp, &phdr[i], &interp,
 			    &free_interp);
 			if (error != 0)
 				goto ret;
 			break;
 		case PT_GNU_STACK:
 			if (__elfN(nxstack))
 				imgp->stack_prot =
 				    __elfN(trans_prot)(phdr[i].p_flags);
 			imgp->stack_sz = phdr[i].p_memsz;
 			break;
 		case PT_PHDR: 	/* Program header table info */
 			proghdr = phdr[i].p_vaddr;
 			break;
 		}
 	}
 
 	brand_info = __elfN(get_brandinfo)(imgp, interp, &osrel, &fctl0);
 	if (brand_info == NULL) {
 		uprintf("ELF binary type \"%u\" not known.\n",
 		    hdr->e_ident[EI_OSABI]);
 		error = ENOEXEC;
 		goto ret;
 	}
 	sv = brand_info->sysvec;
 	et_dyn_addr = 0;
 	if (hdr->e_type == ET_DYN) {
 		if ((brand_info->flags & BI_CAN_EXEC_DYN) == 0) {
 			uprintf("Cannot execute shared object\n");
 			error = ENOEXEC;
 			goto ret;
 		}
 		/*
 		 * Honour the base load address from the dso if it is
 		 * non-zero for some reason.
 		 */
 		if (baddr == 0) {
 			if ((sv->sv_flags & SV_ASLR) == 0 ||
 			    (fctl0 & NT_FREEBSD_FCTL_ASLR_DISABLE) != 0)
 				et_dyn_addr = __elfN(pie_base);
 			else if ((__elfN(pie_aslr_enabled) &&
 			    (imgp->proc->p_flag2 & P2_ASLR_DISABLE) == 0) ||
 			    (imgp->proc->p_flag2 & P2_ASLR_ENABLE) != 0)
 				et_dyn_addr = ET_DYN_ADDR_RAND;
 			else
 				et_dyn_addr = __elfN(pie_base);
 		}
 	}
 
 	/*
 	 * Avoid a possible deadlock if the current address space is destroyed
 	 * and that address space maps the locked vnode.  In the common case,
 	 * the locked vnode's v_usecount is decremented but remains greater
 	 * than zero.  Consequently, the vnode lock is not needed by vrele().
 	 * However, in cases where the vnode lock is external, such as nullfs,
 	 * v_usecount may become zero.
 	 *
 	 * The VV_TEXT flag prevents modifications to the executable while
 	 * the vnode is unlocked.
 	 */
 	VOP_UNLOCK(imgp->vp);
 
 	/*
 	 * Decide whether to enable randomization of user mappings.
 	 * First, reset user preferences for the setid binaries.
 	 * Then, account for the support of the randomization by the
 	 * ABI, by user preferences, and make special treatment for
 	 * PIE binaries.
 	 */
 	if (imgp->credential_setid) {
 		PROC_LOCK(imgp->proc);
 		imgp->proc->p_flag2 &= ~(P2_ASLR_ENABLE | P2_ASLR_DISABLE);
 		PROC_UNLOCK(imgp->proc);
 	}
 	if ((sv->sv_flags & SV_ASLR) == 0 ||
 	    (imgp->proc->p_flag2 & P2_ASLR_DISABLE) != 0 ||
 	    (fctl0 & NT_FREEBSD_FCTL_ASLR_DISABLE) != 0) {
 		KASSERT(et_dyn_addr != ET_DYN_ADDR_RAND,
 		    ("et_dyn_addr == RAND and !ASLR"));
 	} else if ((imgp->proc->p_flag2 & P2_ASLR_ENABLE) != 0 ||
 	    (__elfN(aslr_enabled) && hdr->e_type == ET_EXEC) ||
 	    et_dyn_addr == ET_DYN_ADDR_RAND) {
 		imgp->map_flags |= MAP_ASLR;
 		/*
 		 * If user does not care about sbrk, utilize the bss
 		 * grow region for mappings as well.  We can select
 		 * the base for the image anywere and still not suffer
 		 * from the fragmentation.
 		 */
 		if (!__elfN(aslr_honor_sbrk) ||
 		    (imgp->proc->p_flag2 & P2_ASLR_IGNSTART) != 0)
 			imgp->map_flags |= MAP_ASLR_IGNSTART;
 	}
 
 	if (!__elfN(allow_wx) && (fctl0 & NT_FREEBSD_FCTL_WXNEEDED) == 0)
 		imgp->map_flags |= MAP_WXORX;
 
 	error = exec_new_vmspace(imgp, sv);
 	vmspace = imgp->proc->p_vmspace;
 	map = &vmspace->vm_map;
 
 	imgp->proc->p_sysent = sv;
 	imgp->proc->p_elf_brandinfo = brand_info;
 
 	maxv = vm_map_max(map) - lim_max(td, RLIMIT_STACK);
 	if (et_dyn_addr == ET_DYN_ADDR_RAND) {
 		KASSERT((map->flags & MAP_ASLR) != 0,
 		    ("ET_DYN_ADDR_RAND but !MAP_ASLR"));
 		et_dyn_addr = __CONCAT(rnd_, __elfN(base))(map,
 		    vm_map_min(map) + mapsz + lim_max(td, RLIMIT_DATA),
 		    /* reserve half of the address space to interpreter */
 		    maxv / 2, 1UL << flsl(maxalign));
 	}
 
 	vn_lock(imgp->vp, LK_SHARED | LK_RETRY);
 	if (error != 0)
 		goto ret;
 
 	error = __elfN(load_sections)(imgp, hdr, phdr, et_dyn_addr, NULL);
 	if (error != 0)
 		goto ret;
 
 	error = __elfN(enforce_limits)(imgp, hdr, phdr, et_dyn_addr);
 	if (error != 0)
 		goto ret;
 
 	entry = (u_long)hdr->e_entry + et_dyn_addr;
 
 	/*
 	 * We load the dynamic linker where a userland call
 	 * to mmap(0, ...) would put it.  The rationale behind this
 	 * calculation is that it leaves room for the heap to grow to
 	 * its maximum allowed size.
 	 */
 	addr = round_page((vm_offset_t)vmspace->vm_daddr + lim_max(td,
 	    RLIMIT_DATA));
 	if ((map->flags & MAP_ASLR) != 0) {
 		maxv1 = maxv / 2 + addr / 2;
 		MPASS(maxv1 >= addr);	/* No overflow */
 		map->anon_loc = __CONCAT(rnd_, __elfN(base))(map, addr, maxv1,
 		    (MAXPAGESIZES > 1 && pagesizes[1] != 0) ?
 		    pagesizes[1] : pagesizes[0]);
 	} else {
 		map->anon_loc = addr;
 	}
 
 	imgp->entry_addr = entry;
 
 	if (interp != NULL) {
 		VOP_UNLOCK(imgp->vp);
 		if ((map->flags & MAP_ASLR) != 0) {
 			/* Assume that interpreter fits into 1/4 of AS */
 			maxv1 = maxv / 2 + addr / 2;
 			MPASS(maxv1 >= addr);	/* No overflow */
 			addr = __CONCAT(rnd_, __elfN(base))(map, addr,
 			    maxv1, PAGE_SIZE);
 		}
 		error = __elfN(load_interp)(imgp, brand_info, interp, &addr,
 		    &imgp->entry_addr);
 		vn_lock(imgp->vp, LK_SHARED | LK_RETRY);
 		if (error != 0)
 			goto ret;
 	} else
 		addr = et_dyn_addr;
 
 	/*
 	 * Construct auxargs table (used by the copyout_auxargs routine)
 	 */
 	elf_auxargs = malloc(sizeof(Elf_Auxargs), M_TEMP, M_NOWAIT);
 	if (elf_auxargs == NULL) {
 		VOP_UNLOCK(imgp->vp);
 		elf_auxargs = malloc(sizeof(Elf_Auxargs), M_TEMP, M_WAITOK);
 		vn_lock(imgp->vp, LK_SHARED | LK_RETRY);
 	}
 	elf_auxargs->execfd = -1;
 	elf_auxargs->phdr = proghdr + et_dyn_addr;
 	elf_auxargs->phent = hdr->e_phentsize;
 	elf_auxargs->phnum = hdr->e_phnum;
 	elf_auxargs->pagesz = PAGE_SIZE;
 	elf_auxargs->base = addr;
 	elf_auxargs->flags = 0;
 	elf_auxargs->entry = entry;
 	elf_auxargs->hdr_eflags = hdr->e_flags;
 
 	imgp->auxargs = elf_auxargs;
 	imgp->interpreted = 0;
 	imgp->reloc_base = addr;
 	imgp->proc->p_osrel = osrel;
 	imgp->proc->p_fctl0 = fctl0;
 	imgp->proc->p_elf_flags = hdr->e_flags;
 
 ret:
 	if (free_interp)
 		free(interp, M_TEMP);
 	return (error);
 }
 
 #define	elf_suword __CONCAT(suword, __ELF_WORD_SIZE)
 
 int
 __elfN(freebsd_copyout_auxargs)(struct image_params *imgp, uintptr_t base)
 {
 	Elf_Auxargs *args = (Elf_Auxargs *)imgp->auxargs;
 	Elf_Auxinfo *argarray, *pos;
 	int error;
 
 	argarray = pos = malloc(AT_COUNT * sizeof(*pos), M_TEMP,
 	    M_WAITOK | M_ZERO);
 
 	if (args->execfd != -1)
 		AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd);
 	AUXARGS_ENTRY(pos, AT_PHDR, args->phdr);
 	AUXARGS_ENTRY(pos, AT_PHENT, args->phent);
 	AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum);
 	AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz);
 	AUXARGS_ENTRY(pos, AT_FLAGS, args->flags);
 	AUXARGS_ENTRY(pos, AT_ENTRY, args->entry);
 	AUXARGS_ENTRY(pos, AT_BASE, args->base);
 	AUXARGS_ENTRY(pos, AT_EHDRFLAGS, args->hdr_eflags);
 	if (imgp->execpathp != 0)
 		AUXARGS_ENTRY_PTR(pos, AT_EXECPATH, imgp->execpathp);
 	AUXARGS_ENTRY(pos, AT_OSRELDATE,
 	    imgp->proc->p_ucred->cr_prison->pr_osreldate);
 	if (imgp->canary != 0) {
 		AUXARGS_ENTRY_PTR(pos, AT_CANARY, imgp->canary);
 		AUXARGS_ENTRY(pos, AT_CANARYLEN, imgp->canarylen);
 	}
 	AUXARGS_ENTRY(pos, AT_NCPUS, mp_ncpus);
 	if (imgp->pagesizes != 0) {
 		AUXARGS_ENTRY_PTR(pos, AT_PAGESIZES, imgp->pagesizes);
 		AUXARGS_ENTRY(pos, AT_PAGESIZESLEN, imgp->pagesizeslen);
 	}
 	if (imgp->sysent->sv_timekeep_base != 0) {
 		AUXARGS_ENTRY(pos, AT_TIMEKEEP,
 		    imgp->sysent->sv_timekeep_base);
 	}
 	AUXARGS_ENTRY(pos, AT_STACKPROT, imgp->sysent->sv_shared_page_obj
 	    != NULL && imgp->stack_prot != 0 ? imgp->stack_prot :
 	    imgp->sysent->sv_stackprot);
 	if (imgp->sysent->sv_hwcap != NULL)
 		AUXARGS_ENTRY(pos, AT_HWCAP, *imgp->sysent->sv_hwcap);
 	if (imgp->sysent->sv_hwcap2 != NULL)
 		AUXARGS_ENTRY(pos, AT_HWCAP2, *imgp->sysent->sv_hwcap2);
 	AUXARGS_ENTRY(pos, AT_BSDFLAGS, __elfN(sigfastblock) ?
 	    ELF_BSDF_SIGFASTBLK : 0);
 	AUXARGS_ENTRY(pos, AT_ARGC, imgp->args->argc);
 	AUXARGS_ENTRY_PTR(pos, AT_ARGV, imgp->argv);
 	AUXARGS_ENTRY(pos, AT_ENVC, imgp->args->envc);
 	AUXARGS_ENTRY_PTR(pos, AT_ENVV, imgp->envv);
 	AUXARGS_ENTRY_PTR(pos, AT_PS_STRINGS, imgp->ps_strings);
 	if (imgp->sysent->sv_fxrng_gen_base != 0)
 		AUXARGS_ENTRY(pos, AT_FXRNG, imgp->sysent->sv_fxrng_gen_base);
 	AUXARGS_ENTRY(pos, AT_NULL, 0);
 
 	free(imgp->auxargs, M_TEMP);
 	imgp->auxargs = NULL;
 	KASSERT(pos - argarray <= AT_COUNT, ("Too many auxargs"));
 
 	error = copyout(argarray, (void *)base, sizeof(*argarray) * AT_COUNT);
 	free(argarray, M_TEMP);
 	return (error);
 }
 
 int
 __elfN(freebsd_fixup)(uintptr_t *stack_base, struct image_params *imgp)
 {
 	Elf_Addr *base;
 
 	base = (Elf_Addr *)*stack_base;
 	base--;
 	if (elf_suword(base, imgp->args->argc) == -1)
 		return (EFAULT);
 	*stack_base = (uintptr_t)base;
 	return (0);
 }
 
 /*
  * Code for generating ELF core dumps.
  */
 
 typedef void (*segment_callback)(vm_map_entry_t, void *);
 
 /* Closure for cb_put_phdr(). */
 struct phdr_closure {
 	Elf_Phdr *phdr;		/* Program header to fill in */
 	Elf_Off offset;		/* Offset of segment in core file */
 };
 
 struct note_info {
 	int		type;		/* Note type. */
 	outfunc_t 	outfunc; 	/* Output function. */
 	void		*outarg;	/* Argument for the output function. */
 	size_t		outsize;	/* Output size. */
 	TAILQ_ENTRY(note_info) link;	/* Link to the next note info. */
 };
 
 TAILQ_HEAD(note_info_list, note_info);
 
 extern int compress_user_cores;
 extern int compress_user_cores_level;
 
 static void cb_put_phdr(vm_map_entry_t, void *);
 static void cb_size_segment(vm_map_entry_t, void *);
 static void each_dumpable_segment(struct thread *, segment_callback, void *,
     int);
 static int __elfN(corehdr)(struct coredump_params *, int, void *, size_t,
     struct note_info_list *, size_t, int);
 static void __elfN(putnote)(struct thread *td, struct note_info *, struct sbuf *);
 
 static void __elfN(note_fpregset)(void *, struct sbuf *, size_t *);
 static void __elfN(note_prpsinfo)(void *, struct sbuf *, size_t *);
 static void __elfN(note_prstatus)(void *, struct sbuf *, size_t *);
 static void __elfN(note_threadmd)(void *, struct sbuf *, size_t *);
 static void __elfN(note_thrmisc)(void *, struct sbuf *, size_t *);
 static void __elfN(note_ptlwpinfo)(void *, struct sbuf *, size_t *);
 static void __elfN(note_procstat_auxv)(void *, struct sbuf *, size_t *);
 static void __elfN(note_procstat_proc)(void *, struct sbuf *, size_t *);
 static void __elfN(note_procstat_psstrings)(void *, struct sbuf *, size_t *);
 static void note_procstat_files(void *, struct sbuf *, size_t *);
 static void note_procstat_groups(void *, struct sbuf *, size_t *);
 static void note_procstat_osrel(void *, struct sbuf *, size_t *);
 static void note_procstat_rlimit(void *, struct sbuf *, size_t *);
 static void note_procstat_umask(void *, struct sbuf *, size_t *);
 static void note_procstat_vmmap(void *, struct sbuf *, size_t *);
 
 static int
 core_compressed_write(void *base, size_t len, off_t offset, void *arg)
 {
 
 	return (core_write((struct coredump_params *)arg, base, len, offset,
 	    UIO_SYSSPACE, NULL));
 }
 
 int
 __elfN(coredump)(struct thread *td, struct vnode *vp, off_t limit, int flags)
 {
 	struct ucred *cred = td->td_ucred;
 	int compm, error = 0;
 	struct sseg_closure seginfo;
 	struct note_info_list notelst;
 	struct coredump_params params;
 	struct note_info *ninfo;
 	void *hdr, *tmpbuf;
 	size_t hdrsize, notesz, coresize;
 
 	hdr = NULL;
 	tmpbuf = NULL;
 	TAILQ_INIT(&notelst);
 
 	/* Size the program segments. */
 	__elfN(size_segments)(td, &seginfo, flags);
 
 	/*
 	 * Collect info about the core file header area.
 	 */
 	hdrsize = sizeof(Elf_Ehdr) + sizeof(Elf_Phdr) * (1 + seginfo.count);
 	if (seginfo.count + 1 >= PN_XNUM)
 		hdrsize += sizeof(Elf_Shdr);
 	td->td_proc->p_sysent->sv_elf_core_prepare_notes(td, &notelst, &notesz);
 	coresize = round_page(hdrsize + notesz) + seginfo.size;
 
 	/* Set up core dump parameters. */
 	params.offset = 0;
 	params.active_cred = cred;
 	params.file_cred = NOCRED;
 	params.td = td;
 	params.vp = vp;
 	params.comp = NULL;
 
 #ifdef RACCT
 	if (racct_enable) {
 		PROC_LOCK(td->td_proc);
 		error = racct_add(td->td_proc, RACCT_CORE, coresize);
 		PROC_UNLOCK(td->td_proc);
 		if (error != 0) {
 			error = EFAULT;
 			goto done;
 		}
 	}
 #endif
 	if (coresize >= limit) {
 		error = EFAULT;
 		goto done;
 	}
 
 	/* Create a compression stream if necessary. */
 	compm = compress_user_cores;
 	if ((flags & (SVC_PT_COREDUMP | SVC_NOCOMPRESS)) == SVC_PT_COREDUMP &&
 	    compm == 0)
 		compm = COMPRESS_GZIP;
 	if (compm != 0) {
 		params.comp = compressor_init(core_compressed_write,
 		    compm, CORE_BUF_SIZE,
 		    compress_user_cores_level, &params);
 		if (params.comp == NULL) {
 			error = EFAULT;
 			goto done;
 		}
 		tmpbuf = malloc(CORE_BUF_SIZE, M_TEMP, M_WAITOK | M_ZERO);
         }
 
 	/*
 	 * Allocate memory for building the header, fill it up,
 	 * and write it out following the notes.
 	 */
 	hdr = malloc(hdrsize, M_TEMP, M_WAITOK);
 	error = __elfN(corehdr)(&params, seginfo.count, hdr, hdrsize, &notelst,
 	    notesz, flags);
 
 	/* Write the contents of all of the writable segments. */
 	if (error == 0) {
 		Elf_Phdr *php;
 		off_t offset;
 		int i;
 
 		php = (Elf_Phdr *)((char *)hdr + sizeof(Elf_Ehdr)) + 1;
 		offset = round_page(hdrsize + notesz);
 		for (i = 0; i < seginfo.count; i++) {
 			error = core_output((char *)(uintptr_t)php->p_vaddr,
 			    php->p_filesz, offset, &params, tmpbuf);
 			if (error != 0)
 				break;
 			offset += php->p_filesz;
 			php++;
 		}
 		if (error == 0 && params.comp != NULL)
 			error = compressor_flush(params.comp);
 	}
 	if (error) {
 		log(LOG_WARNING,
 		    "Failed to write core file for process %s (error %d)\n",
 		    curproc->p_comm, error);
 	}
 
 done:
 	free(tmpbuf, M_TEMP);
 	if (params.comp != NULL)
 		compressor_fini(params.comp);
 	while ((ninfo = TAILQ_FIRST(&notelst)) != NULL) {
 		TAILQ_REMOVE(&notelst, ninfo, link);
 		free(ninfo, M_TEMP);
 	}
 	if (hdr != NULL)
 		free(hdr, M_TEMP);
 
 	return (error);
 }
 
 /*
  * A callback for each_dumpable_segment() to write out the segment's
  * program header entry.
  */
 static void
 cb_put_phdr(vm_map_entry_t entry, void *closure)
 {
 	struct phdr_closure *phc = (struct phdr_closure *)closure;
 	Elf_Phdr *phdr = phc->phdr;
 
 	phc->offset = round_page(phc->offset);
 
 	phdr->p_type = PT_LOAD;
 	phdr->p_offset = phc->offset;
 	phdr->p_vaddr = entry->start;
 	phdr->p_paddr = 0;
 	phdr->p_filesz = phdr->p_memsz = entry->end - entry->start;
 	phdr->p_align = PAGE_SIZE;
 	phdr->p_flags = __elfN(untrans_prot)(entry->protection);
 
 	phc->offset += phdr->p_filesz;
 	phc->phdr++;
 }
 
 /*
  * A callback for each_dumpable_segment() to gather information about
  * the number of segments and their total size.
  */
 static void
 cb_size_segment(vm_map_entry_t entry, void *closure)
 {
 	struct sseg_closure *ssc = (struct sseg_closure *)closure;
 
 	ssc->count++;
 	ssc->size += entry->end - entry->start;
 }
 
 void
 __elfN(size_segments)(struct thread *td, struct sseg_closure *seginfo,
     int flags)
 {
 	seginfo->count = 0;
 	seginfo->size = 0;
 
 	each_dumpable_segment(td, cb_size_segment, seginfo, flags);
 }
 
 /*
  * For each writable segment in the process's memory map, call the given
  * function with a pointer to the map entry and some arbitrary
  * caller-supplied data.
  */
 static void
 each_dumpable_segment(struct thread *td, segment_callback func, void *closure,
     int flags)
 {
 	struct proc *p = td->td_proc;
 	vm_map_t map = &p->p_vmspace->vm_map;
 	vm_map_entry_t entry;
 	vm_object_t backing_object, object;
 	bool ignore_entry;
 
 	vm_map_lock_read(map);
 	VM_MAP_ENTRY_FOREACH(entry, map) {
 		/*
 		 * Don't dump inaccessible mappings, deal with legacy
 		 * coredump mode.
 		 *
 		 * Note that read-only segments related to the elf binary
 		 * are marked MAP_ENTRY_NOCOREDUMP now so we no longer
 		 * need to arbitrarily ignore such segments.
 		 */
 		if ((flags & SVC_ALL) == 0) {
 			if (elf_legacy_coredump) {
 				if ((entry->protection & VM_PROT_RW) !=
 				    VM_PROT_RW)
 					continue;
 			} else {
 				if ((entry->protection & VM_PROT_ALL) == 0)
 					continue;
 			}
 		}
 
 		/*
 		 * Dont include memory segment in the coredump if
 		 * MAP_NOCORE is set in mmap(2) or MADV_NOCORE in
 		 * madvise(2).  Do not dump submaps (i.e. parts of the
 		 * kernel map).
 		 */
 		if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) != 0)
 			continue;
 		if ((entry->eflags & MAP_ENTRY_NOCOREDUMP) != 0 &&
 		    (flags & SVC_ALL) == 0)
 			continue;
 		if ((object = entry->object.vm_object) == NULL)
 			continue;
 
 		/* Ignore memory-mapped devices and such things. */
 		VM_OBJECT_RLOCK(object);
 		while ((backing_object = object->backing_object) != NULL) {
 			VM_OBJECT_RLOCK(backing_object);
 			VM_OBJECT_RUNLOCK(object);
 			object = backing_object;
 		}
 		ignore_entry = (object->flags & OBJ_FICTITIOUS) != 0;
 		VM_OBJECT_RUNLOCK(object);
 		if (ignore_entry)
 			continue;
 
 		(*func)(entry, closure);
 	}
 	vm_map_unlock_read(map);
 }
 
 /*
  * Write the core file header to the file, including padding up to
  * the page boundary.
  */
 static int
 __elfN(corehdr)(struct coredump_params *p, int numsegs, void *hdr,
     size_t hdrsize, struct note_info_list *notelst, size_t notesz,
     int flags)
 {
 	struct note_info *ninfo;
 	struct sbuf *sb;
 	int error;
 
 	/* Fill in the header. */
 	bzero(hdr, hdrsize);
 	__elfN(puthdr)(p->td, hdr, hdrsize, numsegs, notesz, flags);
 
 	sb = sbuf_new(NULL, NULL, CORE_BUF_SIZE, SBUF_FIXEDLEN);
 	sbuf_set_drain(sb, sbuf_drain_core_output, p);
 	sbuf_start_section(sb, NULL);
 	sbuf_bcat(sb, hdr, hdrsize);
 	TAILQ_FOREACH(ninfo, notelst, link)
 	    __elfN(putnote)(p->td, ninfo, sb);
 	/* Align up to a page boundary for the program segments. */
 	sbuf_end_section(sb, -1, PAGE_SIZE, 0);
 	error = sbuf_finish(sb);
 	sbuf_delete(sb);
 
 	return (error);
 }
 
 void
 __elfN(prepare_notes)(struct thread *td, struct note_info_list *list,
     size_t *sizep)
 {
 	struct proc *p;
 	struct thread *thr;
 	size_t size;
 
 	p = td->td_proc;
 	size = 0;
 
 	size += __elfN(register_note)(td, list, NT_PRPSINFO, __elfN(note_prpsinfo), p);
 
 	/*
 	 * To have the debugger select the right thread (LWP) as the initial
 	 * thread, we dump the state of the thread passed to us in td first.
 	 * This is the thread that causes the core dump and thus likely to
 	 * be the right thread one wants to have selected in the debugger.
 	 */
 	thr = td;
 	while (thr != NULL) {
 		size += __elfN(register_note)(td, list, NT_PRSTATUS,
 		    __elfN(note_prstatus), thr);
 		size += __elfN(register_note)(td, list, NT_FPREGSET,
 		    __elfN(note_fpregset), thr);
 		size += __elfN(register_note)(td, list, NT_THRMISC,
 		    __elfN(note_thrmisc), thr);
 		size += __elfN(register_note)(td, list, NT_PTLWPINFO,
 		    __elfN(note_ptlwpinfo), thr);
 		size += __elfN(register_note)(td, list, -1,
 		    __elfN(note_threadmd), thr);
 
 		thr = thr == td ? TAILQ_FIRST(&p->p_threads) :
 		    TAILQ_NEXT(thr, td_plist);
 		if (thr == td)
 			thr = TAILQ_NEXT(thr, td_plist);
 	}
 
 	size += __elfN(register_note)(td, list, NT_PROCSTAT_PROC,
 	    __elfN(note_procstat_proc), p);
 	size += __elfN(register_note)(td, list, NT_PROCSTAT_FILES,
 	    note_procstat_files, p);
 	size += __elfN(register_note)(td, list, NT_PROCSTAT_VMMAP,
 	    note_procstat_vmmap, p);
 	size += __elfN(register_note)(td, list, NT_PROCSTAT_GROUPS,
 	    note_procstat_groups, p);
 	size += __elfN(register_note)(td, list, NT_PROCSTAT_UMASK,
 	    note_procstat_umask, p);
 	size += __elfN(register_note)(td, list, NT_PROCSTAT_RLIMIT,
 	    note_procstat_rlimit, p);
 	size += __elfN(register_note)(td, list, NT_PROCSTAT_OSREL,
 	    note_procstat_osrel, p);
 	size += __elfN(register_note)(td, list, NT_PROCSTAT_PSSTRINGS,
 	    __elfN(note_procstat_psstrings), p);
 	size += __elfN(register_note)(td, list, NT_PROCSTAT_AUXV,
 	    __elfN(note_procstat_auxv), p);
 
 	*sizep = size;
 }
 
 void
 __elfN(puthdr)(struct thread *td, void *hdr, size_t hdrsize, int numsegs,
     size_t notesz, int flags)
 {
 	Elf_Ehdr *ehdr;
 	Elf_Phdr *phdr;
 	Elf_Shdr *shdr;
 	struct phdr_closure phc;
 	Elf_Brandinfo *bi;
 
 	ehdr = (Elf_Ehdr *)hdr;
 	bi = td->td_proc->p_elf_brandinfo;
 
 	ehdr->e_ident[EI_MAG0] = ELFMAG0;
 	ehdr->e_ident[EI_MAG1] = ELFMAG1;
 	ehdr->e_ident[EI_MAG2] = ELFMAG2;
 	ehdr->e_ident[EI_MAG3] = ELFMAG3;
 	ehdr->e_ident[EI_CLASS] = ELF_CLASS;
 	ehdr->e_ident[EI_DATA] = ELF_DATA;
 	ehdr->e_ident[EI_VERSION] = EV_CURRENT;
 	ehdr->e_ident[EI_OSABI] = td->td_proc->p_sysent->sv_elf_core_osabi;
 	ehdr->e_ident[EI_ABIVERSION] = 0;
 	ehdr->e_ident[EI_PAD] = 0;
 	ehdr->e_type = ET_CORE;
 	ehdr->e_machine = bi->machine;
 	ehdr->e_version = EV_CURRENT;
 	ehdr->e_entry = 0;
 	ehdr->e_phoff = sizeof(Elf_Ehdr);
 	ehdr->e_flags = td->td_proc->p_elf_flags;
 	ehdr->e_ehsize = sizeof(Elf_Ehdr);
 	ehdr->e_phentsize = sizeof(Elf_Phdr);
 	ehdr->e_shentsize = sizeof(Elf_Shdr);
 	ehdr->e_shstrndx = SHN_UNDEF;
 	if (numsegs + 1 < PN_XNUM) {
 		ehdr->e_phnum = numsegs + 1;
 		ehdr->e_shnum = 0;
 	} else {
 		ehdr->e_phnum = PN_XNUM;
 		ehdr->e_shnum = 1;
 
 		ehdr->e_shoff = ehdr->e_phoff +
 		    (numsegs + 1) * ehdr->e_phentsize;
 		KASSERT(ehdr->e_shoff == hdrsize - sizeof(Elf_Shdr),
 		    ("e_shoff: %zu, hdrsize - shdr: %zu",
 		     (size_t)ehdr->e_shoff, hdrsize - sizeof(Elf_Shdr)));
 
 		shdr = (Elf_Shdr *)((char *)hdr + ehdr->e_shoff);
 		memset(shdr, 0, sizeof(*shdr));
 		/*
 		 * A special first section is used to hold large segment and
 		 * section counts.  This was proposed by Sun Microsystems in
 		 * Solaris and has been adopted by Linux; the standard ELF
 		 * tools are already familiar with the technique.
 		 *
 		 * See table 7-7 of the Solaris "Linker and Libraries Guide"
 		 * (or 12-7 depending on the version of the document) for more
 		 * details.
 		 */
 		shdr->sh_type = SHT_NULL;
 		shdr->sh_size = ehdr->e_shnum;
 		shdr->sh_link = ehdr->e_shstrndx;
 		shdr->sh_info = numsegs + 1;
 	}
 
 	/*
 	 * Fill in the program header entries.
 	 */
 	phdr = (Elf_Phdr *)((char *)hdr + ehdr->e_phoff);
 
 	/* The note segement. */
 	phdr->p_type = PT_NOTE;
 	phdr->p_offset = hdrsize;
 	phdr->p_vaddr = 0;
 	phdr->p_paddr = 0;
 	phdr->p_filesz = notesz;
 	phdr->p_memsz = 0;
 	phdr->p_flags = PF_R;
 	phdr->p_align = ELF_NOTE_ROUNDSIZE;
 	phdr++;
 
 	/* All the writable segments from the program. */
 	phc.phdr = phdr;
 	phc.offset = round_page(hdrsize + notesz);
 	each_dumpable_segment(td, cb_put_phdr, &phc, flags);
 }
 
 size_t
 __elfN(register_note)(struct thread *td, struct note_info_list *list,
     int type, outfunc_t out, void *arg)
 {
 	const struct sysentvec *sv;
 	struct note_info *ninfo;
 	size_t size, notesize;
 
 	sv = td->td_proc->p_sysent;
 	size = 0;
 	out(arg, NULL, &size);
 	ninfo = malloc(sizeof(*ninfo), M_TEMP, M_ZERO | M_WAITOK);
 	ninfo->type = type;
 	ninfo->outfunc = out;
 	ninfo->outarg = arg;
 	ninfo->outsize = size;
 	TAILQ_INSERT_TAIL(list, ninfo, link);
 
 	if (type == -1)
 		return (size);
 
 	notesize = sizeof(Elf_Note) +		/* note header */
 	    roundup2(strlen(sv->sv_elf_core_abi_vendor) + 1, ELF_NOTE_ROUNDSIZE) +
 						/* note name */
 	    roundup2(size, ELF_NOTE_ROUNDSIZE);	/* note description */
 
 	return (notesize);
 }
 
 static size_t
 append_note_data(const void *src, void *dst, size_t len)
 {
 	size_t padded_len;
 
 	padded_len = roundup2(len, ELF_NOTE_ROUNDSIZE);
 	if (dst != NULL) {
 		bcopy(src, dst, len);
 		bzero((char *)dst + len, padded_len - len);
 	}
 	return (padded_len);
 }
 
 size_t
 __elfN(populate_note)(int type, void *src, void *dst, size_t size, void **descp)
 {
 	Elf_Note *note;
 	char *buf;
 	size_t notesize;
 
 	buf = dst;
 	if (buf != NULL) {
 		note = (Elf_Note *)buf;
 		note->n_namesz = sizeof(FREEBSD_ABI_VENDOR);
 		note->n_descsz = size;
 		note->n_type = type;
 		buf += sizeof(*note);
 		buf += append_note_data(FREEBSD_ABI_VENDOR, buf,
 		    sizeof(FREEBSD_ABI_VENDOR));
 		append_note_data(src, buf, size);
 		if (descp != NULL)
 			*descp = buf;
 	}
 
 	notesize = sizeof(Elf_Note) +		/* note header */
 	    roundup2(sizeof(FREEBSD_ABI_VENDOR), ELF_NOTE_ROUNDSIZE) +
 						/* note name */
 	    roundup2(size, ELF_NOTE_ROUNDSIZE);	/* note description */
 
 	return (notesize);
 }
 
 static void
 __elfN(putnote)(struct thread *td, struct note_info *ninfo, struct sbuf *sb)
 {
 	Elf_Note note;
 	const struct sysentvec *sv;
 	ssize_t old_len, sect_len;
 	size_t new_len, descsz, i;
 
 	if (ninfo->type == -1) {
 		ninfo->outfunc(ninfo->outarg, sb, &ninfo->outsize);
 		return;
 	}
 
 	sv = td->td_proc->p_sysent;
 
 	note.n_namesz = strlen(sv->sv_elf_core_abi_vendor) + 1;
 	note.n_descsz = ninfo->outsize;
 	note.n_type = ninfo->type;
 
 	sbuf_bcat(sb, &note, sizeof(note));
 	sbuf_start_section(sb, &old_len);
 	sbuf_bcat(sb, sv->sv_elf_core_abi_vendor,
 	    strlen(sv->sv_elf_core_abi_vendor) + 1);
 	sbuf_end_section(sb, old_len, ELF_NOTE_ROUNDSIZE, 0);
 	if (note.n_descsz == 0)
 		return;
 	sbuf_start_section(sb, &old_len);
 	ninfo->outfunc(ninfo->outarg, sb, &ninfo->outsize);
 	sect_len = sbuf_end_section(sb, old_len, ELF_NOTE_ROUNDSIZE, 0);
 	if (sect_len < 0)
 		return;
 
 	new_len = (size_t)sect_len;
 	descsz = roundup(note.n_descsz, ELF_NOTE_ROUNDSIZE);
 	if (new_len < descsz) {
 		/*
 		 * It is expected that individual note emitters will correctly
 		 * predict their expected output size and fill up to that size
 		 * themselves, padding in a format-specific way if needed.
 		 * However, in case they don't, just do it here with zeros.
 		 */
 		for (i = 0; i < descsz - new_len; i++)
 			sbuf_putc(sb, 0);
 	} else if (new_len > descsz) {
 		/*
 		 * We can't always truncate sb -- we may have drained some
 		 * of it already.
 		 */
 		KASSERT(new_len == descsz, ("%s: Note type %u changed as we "
 		    "read it (%zu > %zu).  Since it is longer than "
 		    "expected, this coredump's notes are corrupt.  THIS "
 		    "IS A BUG in the note_procstat routine for type %u.\n",
 		    __func__, (unsigned)note.n_type, new_len, descsz,
 		    (unsigned)note.n_type));
 	}
 }
 
 /*
  * Miscellaneous note out functions.
  */
 
 #if defined(COMPAT_FREEBSD32) && __ELF_WORD_SIZE == 32
 #include <compat/freebsd32/freebsd32.h>
 #include <compat/freebsd32/freebsd32_signal.h>
 
 typedef struct prstatus32 elf_prstatus_t;
 typedef struct prpsinfo32 elf_prpsinfo_t;
 typedef struct fpreg32 elf_prfpregset_t;
 typedef struct fpreg32 elf_fpregset_t;
 typedef struct reg32 elf_gregset_t;
 typedef struct thrmisc32 elf_thrmisc_t;
 #define ELF_KERN_PROC_MASK	KERN_PROC_MASK32
 typedef struct kinfo_proc32 elf_kinfo_proc_t;
 typedef uint32_t elf_ps_strings_t;
 #else
 typedef prstatus_t elf_prstatus_t;
 typedef prpsinfo_t elf_prpsinfo_t;
 typedef prfpregset_t elf_prfpregset_t;
 typedef prfpregset_t elf_fpregset_t;
 typedef gregset_t elf_gregset_t;
 typedef thrmisc_t elf_thrmisc_t;
 #define ELF_KERN_PROC_MASK	0
 typedef struct kinfo_proc elf_kinfo_proc_t;
 typedef vm_offset_t elf_ps_strings_t;
 #endif
 
 static void
 __elfN(note_prpsinfo)(void *arg, struct sbuf *sb, size_t *sizep)
 {
 	struct sbuf sbarg;
 	size_t len;
 	char *cp, *end;
 	struct proc *p;
 	elf_prpsinfo_t *psinfo;
 	int error;
 
 	p = arg;
 	if (sb != NULL) {
 		KASSERT(*sizep == sizeof(*psinfo), ("invalid size"));
 		psinfo = malloc(sizeof(*psinfo), M_TEMP, M_ZERO | M_WAITOK);
 		psinfo->pr_version = PRPSINFO_VERSION;
 		psinfo->pr_psinfosz = sizeof(elf_prpsinfo_t);
 		strlcpy(psinfo->pr_fname, p->p_comm, sizeof(psinfo->pr_fname));
 		PROC_LOCK(p);
 		if (p->p_args != NULL) {
 			len = sizeof(psinfo->pr_psargs) - 1;
 			if (len > p->p_args->ar_length)
 				len = p->p_args->ar_length;
 			memcpy(psinfo->pr_psargs, p->p_args->ar_args, len);
 			PROC_UNLOCK(p);
 			error = 0;
 		} else {
 			_PHOLD(p);
 			PROC_UNLOCK(p);
 			sbuf_new(&sbarg, psinfo->pr_psargs,
 			    sizeof(psinfo->pr_psargs), SBUF_FIXEDLEN);
 			error = proc_getargv(curthread, p, &sbarg);
 			PRELE(p);
 			if (sbuf_finish(&sbarg) == 0)
 				len = sbuf_len(&sbarg) - 1;
 			else
 				len = sizeof(psinfo->pr_psargs) - 1;
 			sbuf_delete(&sbarg);
 		}
 		if (error || len == 0)
 			strlcpy(psinfo->pr_psargs, p->p_comm,
 			    sizeof(psinfo->pr_psargs));
 		else {
 			KASSERT(len < sizeof(psinfo->pr_psargs),
 			    ("len is too long: %zu vs %zu", len,
 			    sizeof(psinfo->pr_psargs)));
 			cp = psinfo->pr_psargs;
 			end = cp + len - 1;
 			for (;;) {
 				cp = memchr(cp, '\0', end - cp);
 				if (cp == NULL)
 					break;
 				*cp = ' ';
 			}
 		}
 		psinfo->pr_pid = p->p_pid;
 		sbuf_bcat(sb, psinfo, sizeof(*psinfo));
 		free(psinfo, M_TEMP);
 	}
 	*sizep = sizeof(*psinfo);
 }
 
 static void
 __elfN(note_prstatus)(void *arg, struct sbuf *sb, size_t *sizep)
 {
 	struct thread *td;
 	elf_prstatus_t *status;
 
 	td = arg;
 	if (sb != NULL) {
 		KASSERT(*sizep == sizeof(*status), ("invalid size"));
 		status = malloc(sizeof(*status), M_TEMP, M_ZERO | M_WAITOK);
 		status->pr_version = PRSTATUS_VERSION;
 		status->pr_statussz = sizeof(elf_prstatus_t);
 		status->pr_gregsetsz = sizeof(elf_gregset_t);
 		status->pr_fpregsetsz = sizeof(elf_fpregset_t);
 		status->pr_osreldate = osreldate;
 		status->pr_cursig = td->td_proc->p_sig;
 		status->pr_pid = td->td_tid;
 #if defined(COMPAT_FREEBSD32) && __ELF_WORD_SIZE == 32
 		fill_regs32(td, &status->pr_reg);
 #else
 		fill_regs(td, &status->pr_reg);
 #endif
 		sbuf_bcat(sb, status, sizeof(*status));
 		free(status, M_TEMP);
 	}
 	*sizep = sizeof(*status);
 }
 
 static void
 __elfN(note_fpregset)(void *arg, struct sbuf *sb, size_t *sizep)
 {
 	struct thread *td;
 	elf_prfpregset_t *fpregset;
 
 	td = arg;
 	if (sb != NULL) {
 		KASSERT(*sizep == sizeof(*fpregset), ("invalid size"));
 		fpregset = malloc(sizeof(*fpregset), M_TEMP, M_ZERO | M_WAITOK);
 #if defined(COMPAT_FREEBSD32) && __ELF_WORD_SIZE == 32
 		fill_fpregs32(td, fpregset);
 #else
 		fill_fpregs(td, fpregset);
 #endif
 		sbuf_bcat(sb, fpregset, sizeof(*fpregset));
 		free(fpregset, M_TEMP);
 	}
 	*sizep = sizeof(*fpregset);
 }
 
 static void
 __elfN(note_thrmisc)(void *arg, struct sbuf *sb, size_t *sizep)
 {
 	struct thread *td;
 	elf_thrmisc_t thrmisc;
 
 	td = arg;
 	if (sb != NULL) {
 		KASSERT(*sizep == sizeof(thrmisc), ("invalid size"));
 		bzero(&thrmisc, sizeof(thrmisc));
 		strcpy(thrmisc.pr_tname, td->td_name);
 		sbuf_bcat(sb, &thrmisc, sizeof(thrmisc));
 	}
 	*sizep = sizeof(thrmisc);
 }
 
 static void
 __elfN(note_ptlwpinfo)(void *arg, struct sbuf *sb, size_t *sizep)
 {
 	struct thread *td;
 	size_t size;
 	int structsize;
 #if defined(COMPAT_FREEBSD32) && __ELF_WORD_SIZE == 32
 	struct ptrace_lwpinfo32 pl;
 #else
 	struct ptrace_lwpinfo pl;
 #endif
 
 	td = arg;
 	size = sizeof(structsize) + sizeof(pl);
 	if (sb != NULL) {
 		KASSERT(*sizep == size, ("invalid size"));
 		structsize = sizeof(pl);
 		sbuf_bcat(sb, &structsize, sizeof(structsize));
 		bzero(&pl, sizeof(pl));
 		pl.pl_lwpid = td->td_tid;
 		pl.pl_event = PL_EVENT_NONE;
 		pl.pl_sigmask = td->td_sigmask;
 		pl.pl_siglist = td->td_siglist;
 		if (td->td_si.si_signo != 0) {
 			pl.pl_event = PL_EVENT_SIGNAL;
 			pl.pl_flags |= PL_FLAG_SI;
 #if defined(COMPAT_FREEBSD32) && __ELF_WORD_SIZE == 32
 			siginfo_to_siginfo32(&td->td_si, &pl.pl_siginfo);
 #else
 			pl.pl_siginfo = td->td_si;
 #endif
 		}
 		strcpy(pl.pl_tdname, td->td_name);
 		/* XXX TODO: supply more information in struct ptrace_lwpinfo*/
 		sbuf_bcat(sb, &pl, sizeof(pl));
 	}
 	*sizep = size;
 }
 
 /*
  * Allow for MD specific notes, as well as any MD
  * specific preparations for writing MI notes.
  */
 static void
 __elfN(note_threadmd)(void *arg, struct sbuf *sb, size_t *sizep)
 {
 	struct thread *td;
 	void *buf;
 	size_t size;
 
 	td = (struct thread *)arg;
 	size = *sizep;
 	if (size != 0 && sb != NULL)
 		buf = malloc(size, M_TEMP, M_ZERO | M_WAITOK);
 	else
 		buf = NULL;
 	size = 0;
 	__elfN(dump_thread)(td, buf, &size);
 	KASSERT(sb == NULL || *sizep == size, ("invalid size"));
 	if (size != 0 && sb != NULL)
 		sbuf_bcat(sb, buf, size);
 	free(buf, M_TEMP);
 	*sizep = size;
 }
 
 #ifdef KINFO_PROC_SIZE
 CTASSERT(sizeof(struct kinfo_proc) == KINFO_PROC_SIZE);
 #endif
 
 static void
 __elfN(note_procstat_proc)(void *arg, struct sbuf *sb, size_t *sizep)
 {
 	struct proc *p;
 	size_t size;
 	int structsize;
 
 	p = arg;
 	size = sizeof(structsize) + p->p_numthreads *
 	    sizeof(elf_kinfo_proc_t);
 
 	if (sb != NULL) {
 		KASSERT(*sizep == size, ("invalid size"));
 		structsize = sizeof(elf_kinfo_proc_t);
 		sbuf_bcat(sb, &structsize, sizeof(structsize));
 		sx_slock(&proctree_lock);
 		PROC_LOCK(p);
 		kern_proc_out(p, sb, ELF_KERN_PROC_MASK);
 		sx_sunlock(&proctree_lock);
 	}
 	*sizep = size;
 }
 
 #ifdef KINFO_FILE_SIZE
 CTASSERT(sizeof(struct kinfo_file) == KINFO_FILE_SIZE);
 #endif
 
 static void
 note_procstat_files(void *arg, struct sbuf *sb, size_t *sizep)
 {
 	struct proc *p;
 	size_t size, sect_sz, i;
 	ssize_t start_len, sect_len;
 	int structsize, filedesc_flags;
 
 	if (coredump_pack_fileinfo)
 		filedesc_flags = KERN_FILEDESC_PACK_KINFO;
 	else
 		filedesc_flags = 0;
 
 	p = arg;
 	structsize = sizeof(struct kinfo_file);
 	if (sb == NULL) {
 		size = 0;
 		sb = sbuf_new(NULL, NULL, 128, SBUF_FIXEDLEN);
 		sbuf_set_drain(sb, sbuf_count_drain, &size);
 		sbuf_bcat(sb, &structsize, sizeof(structsize));
 		PROC_LOCK(p);
 		kern_proc_filedesc_out(p, sb, -1, filedesc_flags);
 		sbuf_finish(sb);
 		sbuf_delete(sb);
 		*sizep = size;
 	} else {
 		sbuf_start_section(sb, &start_len);
 
 		sbuf_bcat(sb, &structsize, sizeof(structsize));
 		PROC_LOCK(p);
 		kern_proc_filedesc_out(p, sb, *sizep - sizeof(structsize),
 		    filedesc_flags);
 
 		sect_len = sbuf_end_section(sb, start_len, 0, 0);
 		if (sect_len < 0)
 			return;
 		sect_sz = sect_len;
 
 		KASSERT(sect_sz <= *sizep,
 		    ("kern_proc_filedesc_out did not respect maxlen; "
 		     "requested %zu, got %zu", *sizep - sizeof(structsize),
 		     sect_sz - sizeof(structsize)));
 
 		for (i = 0; i < *sizep - sect_sz && sb->s_error == 0; i++)
 			sbuf_putc(sb, 0);
 	}
 }
 
 #ifdef KINFO_VMENTRY_SIZE
 CTASSERT(sizeof(struct kinfo_vmentry) == KINFO_VMENTRY_SIZE);
 #endif
 
 static void
 note_procstat_vmmap(void *arg, struct sbuf *sb, size_t *sizep)
 {
 	struct proc *p;
 	size_t size;
 	int structsize, vmmap_flags;
 
 	if (coredump_pack_vmmapinfo)
 		vmmap_flags = KERN_VMMAP_PACK_KINFO;
 	else
 		vmmap_flags = 0;
 
 	p = arg;
 	structsize = sizeof(struct kinfo_vmentry);
 	if (sb == NULL) {
 		size = 0;
 		sb = sbuf_new(NULL, NULL, 128, SBUF_FIXEDLEN);
 		sbuf_set_drain(sb, sbuf_count_drain, &size);
 		sbuf_bcat(sb, &structsize, sizeof(structsize));
 		PROC_LOCK(p);
 		kern_proc_vmmap_out(p, sb, -1, vmmap_flags);
 		sbuf_finish(sb);
 		sbuf_delete(sb);
 		*sizep = size;
 	} else {
 		sbuf_bcat(sb, &structsize, sizeof(structsize));
 		PROC_LOCK(p);
 		kern_proc_vmmap_out(p, sb, *sizep - sizeof(structsize),
 		    vmmap_flags);
 	}
 }
 
 static void
 note_procstat_groups(void *arg, struct sbuf *sb, size_t *sizep)
 {
 	struct proc *p;
 	size_t size;
 	int structsize;
 
 	p = arg;
 	size = sizeof(structsize) + p->p_ucred->cr_ngroups * sizeof(gid_t);
 	if (sb != NULL) {
 		KASSERT(*sizep == size, ("invalid size"));
 		structsize = sizeof(gid_t);
 		sbuf_bcat(sb, &structsize, sizeof(structsize));
 		sbuf_bcat(sb, p->p_ucred->cr_groups, p->p_ucred->cr_ngroups *
 		    sizeof(gid_t));
 	}
 	*sizep = size;
 }
 
 static void
 note_procstat_umask(void *arg, struct sbuf *sb, size_t *sizep)
 {
 	struct proc *p;
 	size_t size;
 	int structsize;
 
 	p = arg;
 	size = sizeof(structsize) + sizeof(p->p_pd->pd_cmask);
 	if (sb != NULL) {
 		KASSERT(*sizep == size, ("invalid size"));
 		structsize = sizeof(p->p_pd->pd_cmask);
 		sbuf_bcat(sb, &structsize, sizeof(structsize));
 		sbuf_bcat(sb, &p->p_pd->pd_cmask, sizeof(p->p_pd->pd_cmask));
 	}
 	*sizep = size;
 }
 
 static void
 note_procstat_rlimit(void *arg, struct sbuf *sb, size_t *sizep)
 {
 	struct proc *p;
 	struct rlimit rlim[RLIM_NLIMITS];
 	size_t size;
 	int structsize, i;
 
 	p = arg;
 	size = sizeof(structsize) + sizeof(rlim);
 	if (sb != NULL) {
 		KASSERT(*sizep == size, ("invalid size"));
 		structsize = sizeof(rlim);
 		sbuf_bcat(sb, &structsize, sizeof(structsize));
 		PROC_LOCK(p);
 		for (i = 0; i < RLIM_NLIMITS; i++)
 			lim_rlimit_proc(p, i, &rlim[i]);
 		PROC_UNLOCK(p);
 		sbuf_bcat(sb, rlim, sizeof(rlim));
 	}
 	*sizep = size;
 }
 
 static void
 note_procstat_osrel(void *arg, struct sbuf *sb, size_t *sizep)
 {
 	struct proc *p;
 	size_t size;
 	int structsize;
 
 	p = arg;
 	size = sizeof(structsize) + sizeof(p->p_osrel);
 	if (sb != NULL) {
 		KASSERT(*sizep == size, ("invalid size"));
 		structsize = sizeof(p->p_osrel);
 		sbuf_bcat(sb, &structsize, sizeof(structsize));
 		sbuf_bcat(sb, &p->p_osrel, sizeof(p->p_osrel));
 	}
 	*sizep = size;
 }
 
 static void
 __elfN(note_procstat_psstrings)(void *arg, struct sbuf *sb, size_t *sizep)
 {
 	struct proc *p;
 	elf_ps_strings_t ps_strings;
 	size_t size;
 	int structsize;
 
 	p = arg;
 	size = sizeof(structsize) + sizeof(ps_strings);
 	if (sb != NULL) {
 		KASSERT(*sizep == size, ("invalid size"));
 		structsize = sizeof(ps_strings);
 #if defined(COMPAT_FREEBSD32) && __ELF_WORD_SIZE == 32
 		ps_strings = PTROUT(p->p_sysent->sv_psstrings);
 #else
 		ps_strings = p->p_sysent->sv_psstrings;
 #endif
 		sbuf_bcat(sb, &structsize, sizeof(structsize));
 		sbuf_bcat(sb, &ps_strings, sizeof(ps_strings));
 	}
 	*sizep = size;
 }
 
 static void
 __elfN(note_procstat_auxv)(void *arg, struct sbuf *sb, size_t *sizep)
 {
 	struct proc *p;
 	size_t size;
 	int structsize;
 
 	p = arg;
 	if (sb == NULL) {
 		size = 0;
 		sb = sbuf_new(NULL, NULL, AT_COUNT * sizeof(Elf_Auxinfo),
 		    SBUF_FIXEDLEN);
 		sbuf_set_drain(sb, sbuf_count_drain, &size);
 		sbuf_bcat(sb, &structsize, sizeof(structsize));
 		PHOLD(p);
 		proc_getauxv(curthread, p, sb);
 		PRELE(p);
 		sbuf_finish(sb);
 		sbuf_delete(sb);
 		*sizep = size;
 	} else {
 		structsize = sizeof(Elf_Auxinfo);
 		sbuf_bcat(sb, &structsize, sizeof(structsize));
 		PHOLD(p);
 		proc_getauxv(curthread, p, sb);
 		PRELE(p);
 	}
 }
 
 static boolean_t
 __elfN(parse_notes)(struct image_params *imgp, Elf_Note *checknote,
     const char *note_vendor, const Elf_Phdr *pnote,
     boolean_t (*cb)(const Elf_Note *, void *, boolean_t *), void *cb_arg)
 {
 	const Elf_Note *note, *note0, *note_end;
 	const char *note_name;
 	char *buf;
 	int i, error;
 	boolean_t res;
 
 	/* We need some limit, might as well use PAGE_SIZE. */
 	if (pnote == NULL || pnote->p_filesz > PAGE_SIZE)
 		return (FALSE);
 	ASSERT_VOP_LOCKED(imgp->vp, "parse_notes");
 	if (pnote->p_offset > PAGE_SIZE ||
 	    pnote->p_filesz > PAGE_SIZE - pnote->p_offset) {
 		buf = malloc(pnote->p_filesz, M_TEMP, M_NOWAIT);
 		if (buf == NULL) {
 			VOP_UNLOCK(imgp->vp);
 			buf = malloc(pnote->p_filesz, M_TEMP, M_WAITOK);
 			vn_lock(imgp->vp, LK_SHARED | LK_RETRY);
 		}
 		error = vn_rdwr(UIO_READ, imgp->vp, buf, pnote->p_filesz,
 		    pnote->p_offset, UIO_SYSSPACE, IO_NODELOCKED,
 		    curthread->td_ucred, NOCRED, NULL, curthread);
 		if (error != 0) {
 			uprintf("i/o error PT_NOTE\n");
 			goto retf;
 		}
 		note = note0 = (const Elf_Note *)buf;
 		note_end = (const Elf_Note *)(buf + pnote->p_filesz);
 	} else {
 		note = note0 = (const Elf_Note *)(imgp->image_header +
 		    pnote->p_offset);
 		note_end = (const Elf_Note *)(imgp->image_header +
 		    pnote->p_offset + pnote->p_filesz);
 		buf = NULL;
 	}
 	for (i = 0; i < 100 && note >= note0 && note < note_end; i++) {
 		if (!aligned(note, Elf32_Addr) || (const char *)note_end -
 		    (const char *)note < sizeof(Elf_Note)) {
 			goto retf;
 		}
 		if (note->n_namesz != checknote->n_namesz ||
 		    note->n_descsz != checknote->n_descsz ||
 		    note->n_type != checknote->n_type)
 			goto nextnote;
 		note_name = (const char *)(note + 1);
 		if (note_name + checknote->n_namesz >=
 		    (const char *)note_end || strncmp(note_vendor,
 		    note_name, checknote->n_namesz) != 0)
 			goto nextnote;
 
 		if (cb(note, cb_arg, &res))
 			goto ret;
 nextnote:
 		note = (const Elf_Note *)((const char *)(note + 1) +
 		    roundup2(note->n_namesz, ELF_NOTE_ROUNDSIZE) +
 		    roundup2(note->n_descsz, ELF_NOTE_ROUNDSIZE));
 	}
 retf:
 	res = FALSE;
 ret:
 	free(buf, M_TEMP);
 	return (res);
 }
 
 struct brandnote_cb_arg {
 	Elf_Brandnote *brandnote;
 	int32_t *osrel;
 };
 
 static boolean_t
 brandnote_cb(const Elf_Note *note, void *arg0, boolean_t *res)
 {
 	struct brandnote_cb_arg *arg;
 
 	arg = arg0;
 
 	/*
 	 * Fetch the osreldate for binary from the ELF OSABI-note if
 	 * necessary.
 	 */
 	*res = (arg->brandnote->flags & BN_TRANSLATE_OSREL) != 0 &&
 	    arg->brandnote->trans_osrel != NULL ?
 	    arg->brandnote->trans_osrel(note, arg->osrel) : TRUE;
 
 	return (TRUE);
 }
 
 static Elf_Note fctl_note = {
 	.n_namesz = sizeof(FREEBSD_ABI_VENDOR),
 	.n_descsz = sizeof(uint32_t),
 	.n_type = NT_FREEBSD_FEATURE_CTL,
 };
 
 struct fctl_cb_arg {
 	boolean_t *has_fctl0;
 	uint32_t *fctl0;
 };
 
 static boolean_t
 note_fctl_cb(const Elf_Note *note, void *arg0, boolean_t *res)
 {
 	struct fctl_cb_arg *arg;
 	const Elf32_Word *desc;
 	uintptr_t p;
 
 	arg = arg0;
 	p = (uintptr_t)(note + 1);
 	p += roundup2(note->n_namesz, ELF_NOTE_ROUNDSIZE);
 	desc = (const Elf32_Word *)p;
 	*arg->has_fctl0 = TRUE;
 	*arg->fctl0 = desc[0];
 	*res = TRUE;
 	return (TRUE);
 }
 
 /*
  * Try to find the appropriate ABI-note section for checknote, fetch
  * the osreldate and feature control flags for binary from the ELF
  * OSABI-note.  Only the first page of the image is searched, the same
  * as for headers.
  */
 static boolean_t
 __elfN(check_note)(struct image_params *imgp, Elf_Brandnote *brandnote,
     int32_t *osrel, boolean_t *has_fctl0, uint32_t *fctl0)
 {
 	const Elf_Phdr *phdr;
 	const Elf_Ehdr *hdr;
 	struct brandnote_cb_arg b_arg;
 	struct fctl_cb_arg f_arg;
 	int i, j;
 
 	hdr = (const Elf_Ehdr *)imgp->image_header;
 	phdr = (const Elf_Phdr *)(imgp->image_header + hdr->e_phoff);
 	b_arg.brandnote = brandnote;
 	b_arg.osrel = osrel;
 	f_arg.has_fctl0 = has_fctl0;
 	f_arg.fctl0 = fctl0;
 
 	for (i = 0; i < hdr->e_phnum; i++) {
 		if (phdr[i].p_type == PT_NOTE && __elfN(parse_notes)(imgp,
 		    &brandnote->hdr, brandnote->vendor, &phdr[i], brandnote_cb,
 		    &b_arg)) {
 			for (j = 0; j < hdr->e_phnum; j++) {
 				if (phdr[j].p_type == PT_NOTE &&
 				    __elfN(parse_notes)(imgp, &fctl_note,
 				    FREEBSD_ABI_VENDOR, &phdr[j],
 				    note_fctl_cb, &f_arg))
 					break;
 			}
 			return (TRUE);
 		}
 	}
 	return (FALSE);
 
 }
 
 /*
  * Tell kern_execve.c about it, with a little help from the linker.
  */
 static struct execsw __elfN(execsw) = {
 	.ex_imgact = __CONCAT(exec_, __elfN(imgact)),
 	.ex_name = __XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE))
 };
 EXEC_SET(__CONCAT(elf, __ELF_WORD_SIZE), __elfN(execsw));
 
 static vm_prot_t
 __elfN(trans_prot)(Elf_Word flags)
 {
 	vm_prot_t prot;
 
 	prot = 0;
 	if (flags & PF_X)
 		prot |= VM_PROT_EXECUTE;
 	if (flags & PF_W)
 		prot |= VM_PROT_WRITE;
 	if (flags & PF_R)
 		prot |= VM_PROT_READ;
 #if __ELF_WORD_SIZE == 32 && (defined(__amd64__) || defined(__i386__))
 	if (i386_read_exec && (flags & PF_R))
 		prot |= VM_PROT_EXECUTE;
 #endif
 	return (prot);
 }
 
 static Elf_Word
 __elfN(untrans_prot)(vm_prot_t prot)
 {
 	Elf_Word flags;
 
 	flags = 0;
 	if (prot & VM_PROT_EXECUTE)
 		flags |= PF_X;
 	if (prot & VM_PROT_READ)
 		flags |= PF_R;
 	if (prot & VM_PROT_WRITE)
 		flags |= PF_W;
 	return (flags);
 }
 
 void
 __elfN(stackgap)(struct image_params *imgp, uintptr_t *stack_base)
 {
 	uintptr_t range, rbase, gap;
 	int pct;
 
 	pct = __elfN(aslr_stack_gap);
 	if (pct == 0)
 		return;
 	if (pct > 50)
 		pct = 50;
 	range = imgp->eff_stack_sz * pct / 100;
 	arc4rand(&rbase, sizeof(rbase), 0);
 	gap = rbase % range;
 	gap &= ~(sizeof(u_long) - 1);
 	*stack_base -= gap;
 }
diff --git a/sys/kern/kern_exec.c b/sys/kern/kern_exec.c
index e6564ae48ced..b6959dc18f50 100644
--- a/sys/kern/kern_exec.c
+++ b/sys/kern/kern_exec.c
@@ -1,2033 +1,2032 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 1993, David Greenman
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_capsicum.h"
 #include "opt_hwpmc_hooks.h"
 #include "opt_ktrace.h"
 #include "opt_vm.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/acct.h>
 #include <sys/asan.h>
 #include <sys/capsicum.h>
 #include <sys/compressor.h>
 #include <sys/eventhandler.h>
 #include <sys/exec.h>
 #include <sys/fcntl.h>
 #include <sys/filedesc.h>
 #include <sys/imgact.h>
 #include <sys/imgact_elf.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mman.h>
 #include <sys/mount.h>
 #include <sys/mutex.h>
 #include <sys/namei.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/ptrace.h>
+#include <sys/reg.h>
 #include <sys/resourcevar.h>
 #include <sys/rwlock.h>
 #include <sys/sched.h>
 #include <sys/sdt.h>
 #include <sys/sf_buf.h>
 #include <sys/shm.h>
 #include <sys/signalvar.h>
 #include <sys/smp.h>
 #include <sys/stat.h>
 #include <sys/syscallsubr.h>
 #include <sys/sysctl.h>
 #include <sys/sysent.h>
 #include <sys/sysproto.h>
 #include <sys/timers.h>
 #include <sys/umtxvar.h>
 #include <sys/vnode.h>
 #include <sys/wait.h>
 #ifdef KTRACE
 #include <sys/ktrace.h>
 #endif
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/pmap.h>
 #include <vm/vm_page.h>
 #include <vm/vm_map.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_object.h>
 #include <vm/vm_pager.h>
 
 #ifdef	HWPMC_HOOKS
 #include <sys/pmckern.h>
 #endif
 
-#include <machine/reg.h>
-
 #include <security/audit/audit.h>
 #include <security/mac/mac_framework.h>
 
 #ifdef KDTRACE_HOOKS
 #include <sys/dtrace_bsd.h>
 dtrace_execexit_func_t	dtrace_fasttrap_exec;
 #endif
 
 SDT_PROVIDER_DECLARE(proc);
 SDT_PROBE_DEFINE1(proc, , , exec, "char *");
 SDT_PROBE_DEFINE1(proc, , , exec__failure, "int");
 SDT_PROBE_DEFINE1(proc, , , exec__success, "char *");
 
 MALLOC_DEFINE(M_PARGS, "proc-args", "Process arguments");
 
 int coredump_pack_fileinfo = 1;
 SYSCTL_INT(_kern, OID_AUTO, coredump_pack_fileinfo, CTLFLAG_RWTUN,
     &coredump_pack_fileinfo, 0,
     "Enable file path packing in 'procstat -f' coredump notes");
 
 int coredump_pack_vmmapinfo = 1;
 SYSCTL_INT(_kern, OID_AUTO, coredump_pack_vmmapinfo, CTLFLAG_RWTUN,
     &coredump_pack_vmmapinfo, 0,
     "Enable file path packing in 'procstat -v' coredump notes");
 
 static int sysctl_kern_ps_strings(SYSCTL_HANDLER_ARGS);
 static int sysctl_kern_usrstack(SYSCTL_HANDLER_ARGS);
 static int sysctl_kern_stackprot(SYSCTL_HANDLER_ARGS);
 static int do_execve(struct thread *td, struct image_args *args,
     struct mac *mac_p, struct vmspace *oldvmspace);
 
 /* XXX This should be vm_size_t. */
 SYSCTL_PROC(_kern, KERN_PS_STRINGS, ps_strings, CTLTYPE_ULONG|CTLFLAG_RD|
     CTLFLAG_CAPRD|CTLFLAG_MPSAFE, NULL, 0, sysctl_kern_ps_strings, "LU",
     "Location of process' ps_strings structure");
 
 /* XXX This should be vm_size_t. */
 SYSCTL_PROC(_kern, KERN_USRSTACK, usrstack, CTLTYPE_ULONG|CTLFLAG_RD|
     CTLFLAG_CAPRD|CTLFLAG_MPSAFE, NULL, 0, sysctl_kern_usrstack, "LU",
     "Top of process stack");
 
 SYSCTL_PROC(_kern, OID_AUTO, stackprot, CTLTYPE_INT|CTLFLAG_RD|CTLFLAG_MPSAFE,
     NULL, 0, sysctl_kern_stackprot, "I",
     "Stack memory permissions");
 
 u_long ps_arg_cache_limit = PAGE_SIZE / 16;
 SYSCTL_ULONG(_kern, OID_AUTO, ps_arg_cache_limit, CTLFLAG_RW, 
     &ps_arg_cache_limit, 0,
     "Process' command line characters cache limit");
 
 static int disallow_high_osrel;
 SYSCTL_INT(_kern, OID_AUTO, disallow_high_osrel, CTLFLAG_RW,
     &disallow_high_osrel, 0,
     "Disallow execution of binaries built for higher version of the world");
 
 static int map_at_zero = 0;
 SYSCTL_INT(_security_bsd, OID_AUTO, map_at_zero, CTLFLAG_RWTUN, &map_at_zero, 0,
     "Permit processes to map an object at virtual address 0.");
 
 static int
 sysctl_kern_ps_strings(SYSCTL_HANDLER_ARGS)
 {
 	struct proc *p;
 	int error;
 
 	p = curproc;
 #ifdef SCTL_MASK32
 	if (req->flags & SCTL_MASK32) {
 		unsigned int val;
 		val = (unsigned int)p->p_sysent->sv_psstrings;
 		error = SYSCTL_OUT(req, &val, sizeof(val));
 	} else
 #endif
 		error = SYSCTL_OUT(req, &p->p_sysent->sv_psstrings,
 		   sizeof(p->p_sysent->sv_psstrings));
 	return error;
 }
 
 static int
 sysctl_kern_usrstack(SYSCTL_HANDLER_ARGS)
 {
 	struct proc *p;
 	int error;
 
 	p = curproc;
 #ifdef SCTL_MASK32
 	if (req->flags & SCTL_MASK32) {
 		unsigned int val;
 		val = (unsigned int)p->p_sysent->sv_usrstack;
 		error = SYSCTL_OUT(req, &val, sizeof(val));
 	} else
 #endif
 		error = SYSCTL_OUT(req, &p->p_sysent->sv_usrstack,
 		    sizeof(p->p_sysent->sv_usrstack));
 	return error;
 }
 
 static int
 sysctl_kern_stackprot(SYSCTL_HANDLER_ARGS)
 {
 	struct proc *p;
 
 	p = curproc;
 	return (SYSCTL_OUT(req, &p->p_sysent->sv_stackprot,
 	    sizeof(p->p_sysent->sv_stackprot)));
 }
 
 /*
  * Each of the items is a pointer to a `const struct execsw', hence the
  * double pointer here.
  */
 static const struct execsw **execsw;
 
 #ifndef _SYS_SYSPROTO_H_
 struct execve_args {
 	char    *fname; 
 	char    **argv;
 	char    **envv; 
 };
 #endif
 
 int
 sys_execve(struct thread *td, struct execve_args *uap)
 {
 	struct image_args args;
 	struct vmspace *oldvmspace;
 	int error;
 
 	error = pre_execve(td, &oldvmspace);
 	if (error != 0)
 		return (error);
 	error = exec_copyin_args(&args, uap->fname, UIO_USERSPACE,
 	    uap->argv, uap->envv);
 	if (error == 0)
 		error = kern_execve(td, &args, NULL, oldvmspace);
 	post_execve(td, error, oldvmspace);
 	AUDIT_SYSCALL_EXIT(error == EJUSTRETURN ? 0 : error, td);
 	return (error);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct fexecve_args {
 	int	fd;
 	char	**argv;
 	char	**envv;
 };
 #endif
 int
 sys_fexecve(struct thread *td, struct fexecve_args *uap)
 {
 	struct image_args args;
 	struct vmspace *oldvmspace;
 	int error;
 
 	error = pre_execve(td, &oldvmspace);
 	if (error != 0)
 		return (error);
 	error = exec_copyin_args(&args, NULL, UIO_SYSSPACE,
 	    uap->argv, uap->envv);
 	if (error == 0) {
 		args.fd = uap->fd;
 		error = kern_execve(td, &args, NULL, oldvmspace);
 	}
 	post_execve(td, error, oldvmspace);
 	AUDIT_SYSCALL_EXIT(error == EJUSTRETURN ? 0 : error, td);
 	return (error);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct __mac_execve_args {
 	char	*fname;
 	char	**argv;
 	char	**envv;
 	struct mac	*mac_p;
 };
 #endif
 
 int
 sys___mac_execve(struct thread *td, struct __mac_execve_args *uap)
 {
 #ifdef MAC
 	struct image_args args;
 	struct vmspace *oldvmspace;
 	int error;
 
 	error = pre_execve(td, &oldvmspace);
 	if (error != 0)
 		return (error);
 	error = exec_copyin_args(&args, uap->fname, UIO_USERSPACE,
 	    uap->argv, uap->envv);
 	if (error == 0)
 		error = kern_execve(td, &args, uap->mac_p, oldvmspace);
 	post_execve(td, error, oldvmspace);
 	AUDIT_SYSCALL_EXIT(error == EJUSTRETURN ? 0 : error, td);
 	return (error);
 #else
 	return (ENOSYS);
 #endif
 }
 
 int
 pre_execve(struct thread *td, struct vmspace **oldvmspace)
 {
 	struct proc *p;
 	int error;
 
 	KASSERT(td == curthread, ("non-current thread %p", td));
 	error = 0;
 	p = td->td_proc;
 	if ((p->p_flag & P_HADTHREADS) != 0) {
 		PROC_LOCK(p);
 		if (thread_single(p, SINGLE_BOUNDARY) != 0)
 			error = ERESTART;
 		PROC_UNLOCK(p);
 	}
 	KASSERT(error != 0 || (td->td_pflags & TDP_EXECVMSPC) == 0,
 	    ("nested execve"));
 	*oldvmspace = p->p_vmspace;
 	return (error);
 }
 
 void
 post_execve(struct thread *td, int error, struct vmspace *oldvmspace)
 {
 	struct proc *p;
 
 	KASSERT(td == curthread, ("non-current thread %p", td));
 	p = td->td_proc;
 	if ((p->p_flag & P_HADTHREADS) != 0) {
 		PROC_LOCK(p);
 		/*
 		 * If success, we upgrade to SINGLE_EXIT state to
 		 * force other threads to suicide.
 		 */
 		if (error == EJUSTRETURN)
 			thread_single(p, SINGLE_EXIT);
 		else
 			thread_single_end(p, SINGLE_BOUNDARY);
 		PROC_UNLOCK(p);
 	}
 	exec_cleanup(td, oldvmspace);
 }
 
 /*
  * kern_execve() has the astonishing property of not always returning to
  * the caller.  If sufficiently bad things happen during the call to
  * do_execve(), it can end up calling exit1(); as a result, callers must
  * avoid doing anything which they might need to undo (e.g., allocating
  * memory).
  */
 int
 kern_execve(struct thread *td, struct image_args *args, struct mac *mac_p,
     struct vmspace *oldvmspace)
 {
 
 	AUDIT_ARG_ARGV(args->begin_argv, args->argc,
 	    exec_args_get_begin_envv(args) - args->begin_argv);
 	AUDIT_ARG_ENVV(exec_args_get_begin_envv(args), args->envc,
 	    args->endp - exec_args_get_begin_envv(args));
 	return (do_execve(td, args, mac_p, oldvmspace));
 }
 
 static void
 execve_nosetid(struct image_params *imgp)
 {
 	imgp->credential_setid = false;
 	if (imgp->newcred != NULL) {
 		crfree(imgp->newcred);
 		imgp->newcred = NULL;
 	}
 }
 
 /*
  * In-kernel implementation of execve().  All arguments are assumed to be
  * userspace pointers from the passed thread.
  */
 static int
 do_execve(struct thread *td, struct image_args *args, struct mac *mac_p,
     struct vmspace *oldvmspace)
 {
 	struct proc *p = td->td_proc;
 	struct nameidata nd;
 	struct ucred *oldcred;
 	struct uidinfo *euip = NULL;
 	uintptr_t stack_base;
 	struct image_params image_params, *imgp;
 	struct vattr attr;
 	int (*img_first)(struct image_params *);
 	struct pargs *oldargs = NULL, *newargs = NULL;
 	struct sigacts *oldsigacts = NULL, *newsigacts = NULL;
 #ifdef KTRACE
 	struct ktr_io_params *kiop;
 #endif
 	struct vnode *oldtextvp = NULL, *newtextvp;
 	int credential_changing;
 #ifdef MAC
 	struct label *interpvplabel = NULL;
 	int will_transition;
 #endif
 #ifdef HWPMC_HOOKS
 	struct pmckern_procexec pe;
 #endif
 	int error, i, orig_osrel;
 	uint32_t orig_fctl0;
 	Elf_Brandinfo *orig_brandinfo;
 	static const char fexecv_proc_title[] = "(fexecv)";
 
 	imgp = &image_params;
 #ifdef KTRACE
 	kiop = NULL;
 #endif
 
 	/*
 	 * Lock the process and set the P_INEXEC flag to indicate that
 	 * it should be left alone until we're done here.  This is
 	 * necessary to avoid race conditions - e.g. in ptrace() -
 	 * that might allow a local user to illicitly obtain elevated
 	 * privileges.
 	 */
 	PROC_LOCK(p);
 	KASSERT((p->p_flag & P_INEXEC) == 0,
 	    ("%s(): process already has P_INEXEC flag", __func__));
 	p->p_flag |= P_INEXEC;
 	PROC_UNLOCK(p);
 
 	/*
 	 * Initialize part of the common data
 	 */
 	bzero(imgp, sizeof(*imgp));
 	imgp->proc = p;
 	imgp->attr = &attr;
 	imgp->args = args;
 	oldcred = p->p_ucred;
 	orig_osrel = p->p_osrel;
 	orig_fctl0 = p->p_fctl0;
 	orig_brandinfo = p->p_elf_brandinfo;
 
 #ifdef MAC
 	error = mac_execve_enter(imgp, mac_p);
 	if (error)
 		goto exec_fail;
 #endif
 
 	/*
 	 * Translate the file name. namei() returns a vnode pointer
 	 *	in ni_vp among other things.
 	 *
 	 * XXXAUDIT: It would be desirable to also audit the name of the
 	 * interpreter if this is an interpreted binary.
 	 */
 	if (args->fname != NULL) {
 		NDINIT(&nd, LOOKUP, ISOPEN | LOCKLEAF | LOCKSHARED | FOLLOW |
 		    SAVENAME | AUDITVNODE1, UIO_SYSSPACE, args->fname, td);
 	}
 
 	SDT_PROBE1(proc, , , exec, args->fname);
 
 interpret:
 	if (args->fname != NULL) {
 #ifdef CAPABILITY_MODE
 		/*
 		 * While capability mode can't reach this point via direct
 		 * path arguments to execve(), we also don't allow
 		 * interpreters to be used in capability mode (for now).
 		 * Catch indirect lookups and return a permissions error.
 		 */
 		if (IN_CAPABILITY_MODE(td)) {
 			error = ECAPMODE;
 			goto exec_fail;
 		}
 #endif
 		error = namei(&nd);
 		if (error)
 			goto exec_fail;
 
 		newtextvp = nd.ni_vp;
 		imgp->vp = newtextvp;
 	} else {
 		AUDIT_ARG_FD(args->fd);
 		/*
 		 * Descriptors opened only with O_EXEC or O_RDONLY are allowed.
 		 */
 		error = fgetvp_exec(td, args->fd, &cap_fexecve_rights, &newtextvp);
 		if (error)
 			goto exec_fail;
 		vn_lock(newtextvp, LK_SHARED | LK_RETRY);
 		AUDIT_ARG_VNODE1(newtextvp);
 		imgp->vp = newtextvp;
 	}
 
 	/*
 	 * Check file permissions.  Also 'opens' file and sets its vnode to
 	 * text mode.
 	 */
 	error = exec_check_permissions(imgp);
 	if (error)
 		goto exec_fail_dealloc;
 
 	imgp->object = imgp->vp->v_object;
 	if (imgp->object != NULL)
 		vm_object_reference(imgp->object);
 
 	error = exec_map_first_page(imgp);
 	if (error)
 		goto exec_fail_dealloc;
 
 	imgp->proc->p_osrel = 0;
 	imgp->proc->p_fctl0 = 0;
 	imgp->proc->p_elf_brandinfo = NULL;
 
 	/*
 	 * Implement image setuid/setgid.
 	 *
 	 * Determine new credentials before attempting image activators
 	 * so that it can be used by process_exec handlers to determine
 	 * credential/setid changes.
 	 *
 	 * Don't honor setuid/setgid if the filesystem prohibits it or if
 	 * the process is being traced.
 	 *
 	 * We disable setuid/setgid/etc in capability mode on the basis
 	 * that most setugid applications are not written with that
 	 * environment in mind, and will therefore almost certainly operate
 	 * incorrectly. In principle there's no reason that setugid
 	 * applications might not be useful in capability mode, so we may want
 	 * to reconsider this conservative design choice in the future.
 	 *
 	 * XXXMAC: For the time being, use NOSUID to also prohibit
 	 * transitions on the file system.
 	 */
 	credential_changing = 0;
 	credential_changing |= (attr.va_mode & S_ISUID) &&
 	    oldcred->cr_uid != attr.va_uid;
 	credential_changing |= (attr.va_mode & S_ISGID) &&
 	    oldcred->cr_gid != attr.va_gid;
 #ifdef MAC
 	will_transition = mac_vnode_execve_will_transition(oldcred, imgp->vp,
 	    interpvplabel, imgp);
 	credential_changing |= will_transition;
 #endif
 
 	/* Don't inherit PROC_PDEATHSIG_CTL value if setuid/setgid. */
 	if (credential_changing)
 		imgp->proc->p_pdeathsig = 0;
 
 	if (credential_changing &&
 #ifdef CAPABILITY_MODE
 	    ((oldcred->cr_flags & CRED_FLAG_CAPMODE) == 0) &&
 #endif
 	    (imgp->vp->v_mount->mnt_flag & MNT_NOSUID) == 0 &&
 	    (p->p_flag & P_TRACED) == 0) {
 		imgp->credential_setid = true;
 		VOP_UNLOCK(imgp->vp);
 		imgp->newcred = crdup(oldcred);
 		if (attr.va_mode & S_ISUID) {
 			euip = uifind(attr.va_uid);
 			change_euid(imgp->newcred, euip);
 		}
 		vn_lock(imgp->vp, LK_SHARED | LK_RETRY);
 		if (attr.va_mode & S_ISGID)
 			change_egid(imgp->newcred, attr.va_gid);
 		/*
 		 * Implement correct POSIX saved-id behavior.
 		 *
 		 * XXXMAC: Note that the current logic will save the
 		 * uid and gid if a MAC domain transition occurs, even
 		 * though maybe it shouldn't.
 		 */
 		change_svuid(imgp->newcred, imgp->newcred->cr_uid);
 		change_svgid(imgp->newcred, imgp->newcred->cr_gid);
 	} else {
 		/*
 		 * Implement correct POSIX saved-id behavior.
 		 *
 		 * XXX: It's not clear that the existing behavior is
 		 * POSIX-compliant.  A number of sources indicate that the
 		 * saved uid/gid should only be updated if the new ruid is
 		 * not equal to the old ruid, or the new euid is not equal
 		 * to the old euid and the new euid is not equal to the old
 		 * ruid.  The FreeBSD code always updates the saved uid/gid.
 		 * Also, this code uses the new (replaced) euid and egid as
 		 * the source, which may or may not be the right ones to use.
 		 */
 		if (oldcred->cr_svuid != oldcred->cr_uid ||
 		    oldcred->cr_svgid != oldcred->cr_gid) {
 			VOP_UNLOCK(imgp->vp);
 			imgp->newcred = crdup(oldcred);
 			vn_lock(imgp->vp, LK_SHARED | LK_RETRY);
 			change_svuid(imgp->newcred, imgp->newcred->cr_uid);
 			change_svgid(imgp->newcred, imgp->newcred->cr_gid);
 		}
 	}
 	/* The new credentials are installed into the process later. */
 
 	/*
 	 * Do the best to calculate the full path to the image file.
 	 */
 	if (args->fname != NULL && args->fname[0] == '/')
 		imgp->execpath = args->fname;
 	else {
 		VOP_UNLOCK(imgp->vp);
 		if (vn_fullpath(imgp->vp, &imgp->execpath, &imgp->freepath) != 0)
 			imgp->execpath = args->fname;
 		vn_lock(imgp->vp, LK_SHARED | LK_RETRY);
 	}
 
 	/*
 	 *	If the current process has a special image activator it
 	 *	wants to try first, call it.   For example, emulating shell
 	 *	scripts differently.
 	 */
 	error = -1;
 	if ((img_first = imgp->proc->p_sysent->sv_imgact_try) != NULL)
 		error = img_first(imgp);
 
 	/*
 	 *	Loop through the list of image activators, calling each one.
 	 *	An activator returns -1 if there is no match, 0 on success,
 	 *	and an error otherwise.
 	 */
 	for (i = 0; error == -1 && execsw[i]; ++i) {
 		if (execsw[i]->ex_imgact == NULL ||
 		    execsw[i]->ex_imgact == img_first) {
 			continue;
 		}
 		error = (*execsw[i]->ex_imgact)(imgp);
 	}
 
 	if (error) {
 		if (error == -1)
 			error = ENOEXEC;
 		goto exec_fail_dealloc;
 	}
 
 	/*
 	 * Special interpreter operation, cleanup and loop up to try to
 	 * activate the interpreter.
 	 */
 	if (imgp->interpreted) {
 		exec_unmap_first_page(imgp);
 		/*
 		 * The text reference needs to be removed for scripts.
 		 * There is a short period before we determine that
 		 * something is a script where text reference is active.
 		 * The vnode lock is held over this entire period
 		 * so nothing should illegitimately be blocked.
 		 */
 		MPASS(imgp->textset);
 		VOP_UNSET_TEXT_CHECKED(newtextvp);
 		imgp->textset = false;
 		/* free name buffer and old vnode */
 		if (args->fname != NULL)
 			NDFREE(&nd, NDF_ONLY_PNBUF);
 #ifdef MAC
 		mac_execve_interpreter_enter(newtextvp, &interpvplabel);
 #endif
 		if (imgp->opened) {
 			VOP_CLOSE(newtextvp, FREAD, td->td_ucred, td);
 			imgp->opened = 0;
 		}
 		vput(newtextvp);
 		vm_object_deallocate(imgp->object);
 		imgp->object = NULL;
 		execve_nosetid(imgp);
 		imgp->execpath = NULL;
 		free(imgp->freepath, M_TEMP);
 		imgp->freepath = NULL;
 		/* set new name to that of the interpreter */
 		NDINIT(&nd, LOOKUP, ISOPEN | LOCKLEAF | LOCKSHARED | FOLLOW |
 		    SAVENAME, UIO_SYSSPACE, imgp->interpreter_name, td);
 		args->fname = imgp->interpreter_name;
 		goto interpret;
 	}
 
 	/*
 	 * NB: We unlock the vnode here because it is believed that none
 	 * of the sv_copyout_strings/sv_fixup operations require the vnode.
 	 */
 	VOP_UNLOCK(imgp->vp);
 
 	if (disallow_high_osrel &&
 	    P_OSREL_MAJOR(p->p_osrel) > P_OSREL_MAJOR(__FreeBSD_version)) {
 		error = ENOEXEC;
 		uprintf("Osrel %d for image %s too high\n", p->p_osrel,
 		    imgp->execpath != NULL ? imgp->execpath : "<unresolved>");
 		vn_lock(imgp->vp, LK_SHARED | LK_RETRY);
 		goto exec_fail_dealloc;
 	}
 
 	/* ABI enforces the use of Capsicum. Switch into capabilities mode. */
 	if (SV_PROC_FLAG(p, SV_CAPSICUM))
 		sys_cap_enter(td, NULL);
 
 	/*
 	 * Copy out strings (args and env) and initialize stack base.
 	 */
 	error = (*p->p_sysent->sv_copyout_strings)(imgp, &stack_base);
 	if (error != 0) {
 		vn_lock(imgp->vp, LK_SHARED | LK_RETRY);
 		goto exec_fail_dealloc;
 	}
 
 	/*
 	 * Stack setup.
 	 */
 	error = (*p->p_sysent->sv_fixup)(&stack_base, imgp);
 	if (error != 0) {
 		vn_lock(imgp->vp, LK_SHARED | LK_RETRY);
 		goto exec_fail_dealloc;
 	}
 
 	if (args->fdp != NULL) {
 		/* Install a brand new file descriptor table. */
 		fdinstall_remapped(td, args->fdp);
 		args->fdp = NULL;
 	} else {
 		/*
 		 * Keep on using the existing file descriptor table. For
 		 * security and other reasons, the file descriptor table
 		 * cannot be shared after an exec.
 		 */
 		fdunshare(td);
 		pdunshare(td);
 		/* close files on exec */
 		fdcloseexec(td);
 	}
 
 	/*
 	 * Malloc things before we need locks.
 	 */
 	i = exec_args_get_begin_envv(imgp->args) - imgp->args->begin_argv;
 	/* Cache arguments if they fit inside our allowance */
 	if (ps_arg_cache_limit >= i + sizeof(struct pargs)) {
 		newargs = pargs_alloc(i);
 		bcopy(imgp->args->begin_argv, newargs->ar_args, i);
 	}
 
 	/*
 	 * For security and other reasons, signal handlers cannot
 	 * be shared after an exec. The new process gets a copy of the old
 	 * handlers. In execsigs(), the new process will have its signals
 	 * reset.
 	 */
 	if (sigacts_shared(p->p_sigacts)) {
 		oldsigacts = p->p_sigacts;
 		newsigacts = sigacts_alloc();
 		sigacts_copy(newsigacts, oldsigacts);
 	}
 
 	vn_lock(imgp->vp, LK_SHARED | LK_RETRY);
 
 	PROC_LOCK(p);
 	if (oldsigacts)
 		p->p_sigacts = newsigacts;
 	/* Stop profiling */
 	stopprofclock(p);
 
 	/* reset caught signals */
 	execsigs(p);
 
 	/* name this process - nameiexec(p, ndp) */
 	bzero(p->p_comm, sizeof(p->p_comm));
 	if (args->fname)
 		bcopy(nd.ni_cnd.cn_nameptr, p->p_comm,
 		    min(nd.ni_cnd.cn_namelen, MAXCOMLEN));
 	else if (vn_commname(newtextvp, p->p_comm, sizeof(p->p_comm)) != 0)
 		bcopy(fexecv_proc_title, p->p_comm, sizeof(fexecv_proc_title));
 	bcopy(p->p_comm, td->td_name, sizeof(td->td_name));
 #ifdef KTR
 	sched_clear_tdname(td);
 #endif
 
 	/*
 	 * mark as execed, wakeup the process that vforked (if any) and tell
 	 * it that it now has its own resources back
 	 */
 	p->p_flag |= P_EXEC;
 	if ((p->p_flag2 & P2_NOTRACE_EXEC) == 0)
 		p->p_flag2 &= ~P2_NOTRACE;
 	if ((p->p_flag2 & P2_STKGAP_DISABLE_EXEC) == 0)
 		p->p_flag2 &= ~P2_STKGAP_DISABLE;
 	if (p->p_flag & P_PPWAIT) {
 		p->p_flag &= ~(P_PPWAIT | P_PPTRACE);
 		cv_broadcast(&p->p_pwait);
 		/* STOPs are no longer ignored, arrange for AST */
 		signotify(td);
 	}
 
 	if ((imgp->sysent->sv_setid_allowed != NULL &&
 	    !(*imgp->sysent->sv_setid_allowed)(td, imgp)) ||
 	    (p->p_flag2 & P2_NO_NEW_PRIVS) != 0)
 		execve_nosetid(imgp);
 
 	/*
 	 * Implement image setuid/setgid installation.
 	 */
 	if (imgp->credential_setid) {
 		/*
 		 * Turn off syscall tracing for set-id programs, except for
 		 * root.  Record any set-id flags first to make sure that
 		 * we do not regain any tracing during a possible block.
 		 */
 		setsugid(p);
 #ifdef KTRACE
 		kiop = ktrprocexec(p);
 #endif
 		/*
 		 * Close any file descriptors 0..2 that reference procfs,
 		 * then make sure file descriptors 0..2 are in use.
 		 *
 		 * Both fdsetugidsafety() and fdcheckstd() may call functions
 		 * taking sleepable locks, so temporarily drop our locks.
 		 */
 		PROC_UNLOCK(p);
 		VOP_UNLOCK(imgp->vp);
 		fdsetugidsafety(td);
 		error = fdcheckstd(td);
 		vn_lock(imgp->vp, LK_SHARED | LK_RETRY);
 		if (error != 0)
 			goto exec_fail_dealloc;
 		PROC_LOCK(p);
 #ifdef MAC
 		if (will_transition) {
 			mac_vnode_execve_transition(oldcred, imgp->newcred,
 			    imgp->vp, interpvplabel, imgp);
 		}
 #endif
 	} else {
 		if (oldcred->cr_uid == oldcred->cr_ruid &&
 		    oldcred->cr_gid == oldcred->cr_rgid)
 			p->p_flag &= ~P_SUGID;
 	}
 	/*
 	 * Set the new credentials.
 	 */
 	if (imgp->newcred != NULL) {
 		proc_set_cred(p, imgp->newcred);
 		crfree(oldcred);
 		oldcred = NULL;
 	}
 
 	/*
 	 * Store the vp for use in procfs.  This vnode was referenced by namei
 	 * or fgetvp_exec.
 	 */
 	oldtextvp = p->p_textvp;
 	p->p_textvp = newtextvp;
 
 #ifdef KDTRACE_HOOKS
 	/*
 	 * Tell the DTrace fasttrap provider about the exec if it
 	 * has declared an interest.
 	 */
 	if (dtrace_fasttrap_exec)
 		dtrace_fasttrap_exec(p);
 #endif
 
 	/*
 	 * Notify others that we exec'd, and clear the P_INEXEC flag
 	 * as we're now a bona fide freshly-execed process.
 	 */
 	KNOTE_LOCKED(p->p_klist, NOTE_EXEC);
 	p->p_flag &= ~P_INEXEC;
 
 	/* clear "fork but no exec" flag, as we _are_ execing */
 	p->p_acflag &= ~AFORK;
 
 	/*
 	 * Free any previous argument cache and replace it with
 	 * the new argument cache, if any.
 	 */
 	oldargs = p->p_args;
 	p->p_args = newargs;
 	newargs = NULL;
 
 	PROC_UNLOCK(p);
 
 #ifdef	HWPMC_HOOKS
 	/*
 	 * Check if system-wide sampling is in effect or if the
 	 * current process is using PMCs.  If so, do exec() time
 	 * processing.  This processing needs to happen AFTER the
 	 * P_INEXEC flag is cleared.
 	 */
 	if (PMC_SYSTEM_SAMPLING_ACTIVE() || PMC_PROC_IS_USING_PMCS(p)) {
 		VOP_UNLOCK(imgp->vp);
 		pe.pm_credentialschanged = credential_changing;
 		pe.pm_entryaddr = imgp->entry_addr;
 
 		PMC_CALL_HOOK_X(td, PMC_FN_PROCESS_EXEC, (void *) &pe);
 		vn_lock(imgp->vp, LK_SHARED | LK_RETRY);
 	}
 #endif
 
 	/* Set values passed into the program in registers. */
 	(*p->p_sysent->sv_setregs)(td, imgp, stack_base);
 
 	VOP_MMAPPED(imgp->vp);
 
 	SDT_PROBE1(proc, , , exec__success, args->fname);
 
 exec_fail_dealloc:
 	if (error != 0) {
 		p->p_osrel = orig_osrel;
 		p->p_fctl0 = orig_fctl0;
 		p->p_elf_brandinfo = orig_brandinfo;
 	}
 
 	if (imgp->firstpage != NULL)
 		exec_unmap_first_page(imgp);
 
 	if (imgp->vp != NULL) {
 		if (args->fname)
 			NDFREE(&nd, NDF_ONLY_PNBUF);
 		if (imgp->opened)
 			VOP_CLOSE(imgp->vp, FREAD, td->td_ucred, td);
 		if (imgp->textset)
 			VOP_UNSET_TEXT_CHECKED(imgp->vp);
 		if (error != 0)
 			vput(imgp->vp);
 		else
 			VOP_UNLOCK(imgp->vp);
 	}
 
 	if (imgp->object != NULL)
 		vm_object_deallocate(imgp->object);
 
 	free(imgp->freepath, M_TEMP);
 
 	if (error == 0) {
 		if (p->p_ptevents & PTRACE_EXEC) {
 			PROC_LOCK(p);
 			if (p->p_ptevents & PTRACE_EXEC)
 				td->td_dbgflags |= TDB_EXEC;
 			PROC_UNLOCK(p);
 		}
 	} else {
 exec_fail:
 		/* we're done here, clear P_INEXEC */
 		PROC_LOCK(p);
 		p->p_flag &= ~P_INEXEC;
 		PROC_UNLOCK(p);
 
 		SDT_PROBE1(proc, , , exec__failure, error);
 	}
 
 	if (imgp->newcred != NULL && oldcred != NULL)
 		crfree(imgp->newcred);
 
 #ifdef MAC
 	mac_execve_exit(imgp);
 	mac_execve_interpreter_exit(interpvplabel);
 #endif
 	exec_free_args(args);
 
 	/*
 	 * Handle deferred decrement of ref counts.
 	 */
 	if (oldtextvp != NULL)
 		vrele(oldtextvp);
 #ifdef KTRACE
 	ktr_io_params_free(kiop);
 #endif
 	pargs_drop(oldargs);
 	pargs_drop(newargs);
 	if (oldsigacts != NULL)
 		sigacts_free(oldsigacts);
 	if (euip != NULL)
 		uifree(euip);
 
 	if (error && imgp->vmspace_destroyed) {
 		/* sorry, no more process anymore. exit gracefully */
 		exec_cleanup(td, oldvmspace);
 		exit1(td, 0, SIGABRT);
 		/* NOT REACHED */
 	}
 
 #ifdef KTRACE
 	if (error == 0)
 		ktrprocctor(p);
 #endif
 
 	/*
 	 * We don't want cpu_set_syscall_retval() to overwrite any of
 	 * the register values put in place by exec_setregs().
 	 * Implementations of cpu_set_syscall_retval() will leave
 	 * registers unmodified when returning EJUSTRETURN.
 	 */
 	return (error == 0 ? EJUSTRETURN : error);
 }
 
 void
 exec_cleanup(struct thread *td, struct vmspace *oldvmspace)
 {
 	if ((td->td_pflags & TDP_EXECVMSPC) != 0) {
 		KASSERT(td->td_proc->p_vmspace != oldvmspace,
 		    ("oldvmspace still used"));
 		vmspace_free(oldvmspace);
 		td->td_pflags &= ~TDP_EXECVMSPC;
 	}
 }
 
 int
 exec_map_first_page(struct image_params *imgp)
 {
 	vm_object_t object;
 	vm_page_t m;
 	int error;
 
 	if (imgp->firstpage != NULL)
 		exec_unmap_first_page(imgp);
 
 	object = imgp->vp->v_object;
 	if (object == NULL)
 		return (EACCES);
 #if VM_NRESERVLEVEL > 0
 	if ((object->flags & OBJ_COLORED) == 0) {
 		VM_OBJECT_WLOCK(object);
 		vm_object_color(object, 0);
 		VM_OBJECT_WUNLOCK(object);
 	}
 #endif
 	error = vm_page_grab_valid_unlocked(&m, object, 0,
 	    VM_ALLOC_COUNT(VM_INITIAL_PAGEIN) |
             VM_ALLOC_NORMAL | VM_ALLOC_NOBUSY | VM_ALLOC_WIRED);
 
 	if (error != VM_PAGER_OK)
 		return (EIO);
 	imgp->firstpage = sf_buf_alloc(m, 0);
 	imgp->image_header = (char *)sf_buf_kva(imgp->firstpage);
 
 	return (0);
 }
 
 void
 exec_unmap_first_page(struct image_params *imgp)
 {
 	vm_page_t m;
 
 	if (imgp->firstpage != NULL) {
 		m = sf_buf_page(imgp->firstpage);
 		sf_buf_free(imgp->firstpage);
 		imgp->firstpage = NULL;
 		vm_page_unwire(m, PQ_ACTIVE);
 	}
 }
 
 void
 exec_onexec_old(struct thread *td)
 {
 	sigfastblock_clear(td);
 	umtx_exec(td->td_proc);
 }
 
 /*
  * Destroy old address space, and allocate a new stack.
  *	The new stack is only sgrowsiz large because it is grown
  *	automatically on a page fault.
  */
 int
 exec_new_vmspace(struct image_params *imgp, struct sysentvec *sv)
 {
 	int error;
 	struct proc *p = imgp->proc;
 	struct vmspace *vmspace = p->p_vmspace;
 	struct thread *td = curthread;
 	vm_object_t obj;
 	struct rlimit rlim_stack;
 	vm_offset_t sv_minuser, stack_addr;
 	vm_map_t map;
 	vm_prot_t stack_prot;
 	u_long ssiz;
 
 	imgp->vmspace_destroyed = 1;
 	imgp->sysent = sv;
 
 	if (p->p_sysent->sv_onexec_old != NULL)
 		p->p_sysent->sv_onexec_old(td);
 	itimers_exec(p);
 
 	EVENTHANDLER_DIRECT_INVOKE(process_exec, p, imgp);
 
 	/*
 	 * Blow away entire process VM, if address space not shared,
 	 * otherwise, create a new VM space so that other threads are
 	 * not disrupted
 	 */
 	map = &vmspace->vm_map;
 	if (map_at_zero)
 		sv_minuser = sv->sv_minuser;
 	else
 		sv_minuser = MAX(sv->sv_minuser, PAGE_SIZE);
 	if (refcount_load(&vmspace->vm_refcnt) == 1 &&
 	    vm_map_min(map) == sv_minuser &&
 	    vm_map_max(map) == sv->sv_maxuser &&
 	    cpu_exec_vmspace_reuse(p, map)) {
 		shmexit(vmspace);
 		pmap_remove_pages(vmspace_pmap(vmspace));
 		vm_map_remove(map, vm_map_min(map), vm_map_max(map));
 		/*
 		 * An exec terminates mlockall(MCL_FUTURE).
 		 * ASLR and W^X states must be re-evaluated.
 		 */
 		vm_map_lock(map);
 		vm_map_modflags(map, 0, MAP_WIREFUTURE | MAP_ASLR |
 		    MAP_ASLR_IGNSTART | MAP_WXORX);
 		vm_map_unlock(map);
 	} else {
 		error = vmspace_exec(p, sv_minuser, sv->sv_maxuser);
 		if (error)
 			return (error);
 		vmspace = p->p_vmspace;
 		map = &vmspace->vm_map;
 	}
 	map->flags |= imgp->map_flags;
 
 	/* Map a shared page */
 	obj = sv->sv_shared_page_obj;
 	if (obj != NULL) {
 		vm_object_reference(obj);
 		error = vm_map_fixed(map, obj, 0,
 		    sv->sv_shared_page_base, sv->sv_shared_page_len,
 		    VM_PROT_READ | VM_PROT_EXECUTE,
 		    VM_PROT_READ | VM_PROT_EXECUTE,
 		    MAP_INHERIT_SHARE | MAP_ACC_NO_CHARGE);
 		if (error != KERN_SUCCESS) {
 			vm_object_deallocate(obj);
 			return (vm_mmap_to_errno(error));
 		}
 	}
 
 	/* Allocate a new stack */
 	if (imgp->stack_sz != 0) {
 		ssiz = trunc_page(imgp->stack_sz);
 		PROC_LOCK(p);
 		lim_rlimit_proc(p, RLIMIT_STACK, &rlim_stack);
 		PROC_UNLOCK(p);
 		if (ssiz > rlim_stack.rlim_max)
 			ssiz = rlim_stack.rlim_max;
 		if (ssiz > rlim_stack.rlim_cur) {
 			rlim_stack.rlim_cur = ssiz;
 			kern_setrlimit(curthread, RLIMIT_STACK, &rlim_stack);
 		}
 	} else if (sv->sv_maxssiz != NULL) {
 		ssiz = *sv->sv_maxssiz;
 	} else {
 		ssiz = maxssiz;
 	}
 	imgp->eff_stack_sz = lim_cur(curthread, RLIMIT_STACK);
 	if (ssiz < imgp->eff_stack_sz)
 		imgp->eff_stack_sz = ssiz;
 	stack_addr = sv->sv_usrstack - ssiz;
 	stack_prot = obj != NULL && imgp->stack_prot != 0 ?
 	    imgp->stack_prot : sv->sv_stackprot;
 	error = vm_map_stack(map, stack_addr, (vm_size_t)ssiz, stack_prot,
 	    VM_PROT_ALL, MAP_STACK_GROWS_DOWN);
 	if (error != KERN_SUCCESS) {
 		uprintf("exec_new_vmspace: mapping stack size %#jx prot %#x "
 		    "failed mach error %d errno %d\n", (uintmax_t)ssiz,
 		    stack_prot, error, vm_mmap_to_errno(error));
 		return (vm_mmap_to_errno(error));
 	}
 
 	/*
 	 * vm_ssize and vm_maxsaddr are somewhat antiquated concepts, but they
 	 * are still used to enforce the stack rlimit on the process stack.
 	 */
 	vmspace->vm_ssize = sgrowsiz >> PAGE_SHIFT;
 	vmspace->vm_maxsaddr = (char *)stack_addr;
 
 	return (sv->sv_onexec != NULL ? sv->sv_onexec(p, imgp) : 0);
 }
 
 /*
  * Copy out argument and environment strings from the old process address
  * space into the temporary string buffer.
  */
 int
 exec_copyin_args(struct image_args *args, const char *fname,
     enum uio_seg segflg, char **argv, char **envv)
 {
 	u_long arg, env;
 	int error;
 
 	bzero(args, sizeof(*args));
 	if (argv == NULL)
 		return (EFAULT);
 
 	/*
 	 * Allocate demand-paged memory for the file name, argument, and
 	 * environment strings.
 	 */
 	error = exec_alloc_args(args);
 	if (error != 0)
 		return (error);
 
 	/*
 	 * Copy the file name.
 	 */
 	error = exec_args_add_fname(args, fname, segflg);
 	if (error != 0)
 		goto err_exit;
 
 	/*
 	 * extract arguments first
 	 */
 	for (;;) {
 		error = fueword(argv++, &arg);
 		if (error == -1) {
 			error = EFAULT;
 			goto err_exit;
 		}
 		if (arg == 0)
 			break;
 		error = exec_args_add_arg(args, (char *)(uintptr_t)arg,
 		    UIO_USERSPACE);
 		if (error != 0)
 			goto err_exit;
 	}
 
 	/*
 	 * extract environment strings
 	 */
 	if (envv) {
 		for (;;) {
 			error = fueword(envv++, &env);
 			if (error == -1) {
 				error = EFAULT;
 				goto err_exit;
 			}
 			if (env == 0)
 				break;
 			error = exec_args_add_env(args,
 			    (char *)(uintptr_t)env, UIO_USERSPACE);
 			if (error != 0)
 				goto err_exit;
 		}
 	}
 
 	return (0);
 
 err_exit:
 	exec_free_args(args);
 	return (error);
 }
 
 int
 exec_copyin_data_fds(struct thread *td, struct image_args *args,
     const void *data, size_t datalen, const int *fds, size_t fdslen)
 {
 	struct filedesc *ofdp;
 	const char *p;
 	int *kfds;
 	int error;
 
 	memset(args, '\0', sizeof(*args));
 	ofdp = td->td_proc->p_fd;
 	if (datalen >= ARG_MAX || fdslen >= ofdp->fd_nfiles)
 		return (E2BIG);
 	error = exec_alloc_args(args);
 	if (error != 0)
 		return (error);
 
 	args->begin_argv = args->buf;
 	args->stringspace = ARG_MAX;
 
 	if (datalen > 0) {
 		/*
 		 * Argument buffer has been provided. Copy it into the
 		 * kernel as a single string and add a terminating null
 		 * byte.
 		 */
 		error = copyin(data, args->begin_argv, datalen);
 		if (error != 0)
 			goto err_exit;
 		args->begin_argv[datalen] = '\0';
 		args->endp = args->begin_argv + datalen + 1;
 		args->stringspace -= datalen + 1;
 
 		/*
 		 * Traditional argument counting. Count the number of
 		 * null bytes.
 		 */
 		for (p = args->begin_argv; p < args->endp; ++p)
 			if (*p == '\0')
 				++args->argc;
 	} else {
 		/* No argument buffer provided. */
 		args->endp = args->begin_argv;
 	}
 
 	/* Create new file descriptor table. */
 	kfds = malloc(fdslen * sizeof(int), M_TEMP, M_WAITOK);
 	error = copyin(fds, kfds, fdslen * sizeof(int));
 	if (error != 0) {
 		free(kfds, M_TEMP);
 		goto err_exit;
 	}
 	error = fdcopy_remapped(ofdp, kfds, fdslen, &args->fdp);
 	free(kfds, M_TEMP);
 	if (error != 0)
 		goto err_exit;
 
 	return (0);
 err_exit:
 	exec_free_args(args);
 	return (error);
 }
 
 struct exec_args_kva {
 	vm_offset_t addr;
 	u_int gen;
 	SLIST_ENTRY(exec_args_kva) next;
 };
 
 DPCPU_DEFINE_STATIC(struct exec_args_kva *, exec_args_kva);
 
 static SLIST_HEAD(, exec_args_kva) exec_args_kva_freelist;
 static struct mtx exec_args_kva_mtx;
 static u_int exec_args_gen;
 
 static void
 exec_prealloc_args_kva(void *arg __unused)
 {
 	struct exec_args_kva *argkva;
 	u_int i;
 
 	SLIST_INIT(&exec_args_kva_freelist);
 	mtx_init(&exec_args_kva_mtx, "exec args kva", NULL, MTX_DEF);
 	for (i = 0; i < exec_map_entries; i++) {
 		argkva = malloc(sizeof(*argkva), M_PARGS, M_WAITOK);
 		argkva->addr = kmap_alloc_wait(exec_map, exec_map_entry_size);
 		argkva->gen = exec_args_gen;
 		SLIST_INSERT_HEAD(&exec_args_kva_freelist, argkva, next);
 	}
 }
 SYSINIT(exec_args_kva, SI_SUB_EXEC, SI_ORDER_ANY, exec_prealloc_args_kva, NULL);
 
 static vm_offset_t
 exec_alloc_args_kva(void **cookie)
 {
 	struct exec_args_kva *argkva;
 
 	argkva = (void *)atomic_readandclear_ptr(
 	    (uintptr_t *)DPCPU_PTR(exec_args_kva));
 	if (argkva == NULL) {
 		mtx_lock(&exec_args_kva_mtx);
 		while ((argkva = SLIST_FIRST(&exec_args_kva_freelist)) == NULL)
 			(void)mtx_sleep(&exec_args_kva_freelist,
 			    &exec_args_kva_mtx, 0, "execkva", 0);
 		SLIST_REMOVE_HEAD(&exec_args_kva_freelist, next);
 		mtx_unlock(&exec_args_kva_mtx);
 	}
 	kasan_mark((void *)argkva->addr, exec_map_entry_size,
 	    exec_map_entry_size, 0);
 	*(struct exec_args_kva **)cookie = argkva;
 	return (argkva->addr);
 }
 
 static void
 exec_release_args_kva(struct exec_args_kva *argkva, u_int gen)
 {
 	vm_offset_t base;
 
 	base = argkva->addr;
 	kasan_mark((void *)argkva->addr, 0, exec_map_entry_size,
 	    KASAN_EXEC_ARGS_FREED);
 	if (argkva->gen != gen) {
 		(void)vm_map_madvise(exec_map, base, base + exec_map_entry_size,
 		    MADV_FREE);
 		argkva->gen = gen;
 	}
 	if (!atomic_cmpset_ptr((uintptr_t *)DPCPU_PTR(exec_args_kva),
 	    (uintptr_t)NULL, (uintptr_t)argkva)) {
 		mtx_lock(&exec_args_kva_mtx);
 		SLIST_INSERT_HEAD(&exec_args_kva_freelist, argkva, next);
 		wakeup_one(&exec_args_kva_freelist);
 		mtx_unlock(&exec_args_kva_mtx);
 	}
 }
 
 static void
 exec_free_args_kva(void *cookie)
 {
 
 	exec_release_args_kva(cookie, exec_args_gen);
 }
 
 static void
 exec_args_kva_lowmem(void *arg __unused)
 {
 	SLIST_HEAD(, exec_args_kva) head;
 	struct exec_args_kva *argkva;
 	u_int gen;
 	int i;
 
 	gen = atomic_fetchadd_int(&exec_args_gen, 1) + 1;
 
 	/*
 	 * Force an madvise of each KVA range. Any currently allocated ranges
 	 * will have MADV_FREE applied once they are freed.
 	 */
 	SLIST_INIT(&head);
 	mtx_lock(&exec_args_kva_mtx);
 	SLIST_SWAP(&head, &exec_args_kva_freelist, exec_args_kva);
 	mtx_unlock(&exec_args_kva_mtx);
 	while ((argkva = SLIST_FIRST(&head)) != NULL) {
 		SLIST_REMOVE_HEAD(&head, next);
 		exec_release_args_kva(argkva, gen);
 	}
 
 	CPU_FOREACH(i) {
 		argkva = (void *)atomic_readandclear_ptr(
 		    (uintptr_t *)DPCPU_ID_PTR(i, exec_args_kva));
 		if (argkva != NULL)
 			exec_release_args_kva(argkva, gen);
 	}
 }
 EVENTHANDLER_DEFINE(vm_lowmem, exec_args_kva_lowmem, NULL,
     EVENTHANDLER_PRI_ANY);
 
 /*
  * Allocate temporary demand-paged, zero-filled memory for the file name,
  * argument, and environment strings.
  */
 int
 exec_alloc_args(struct image_args *args)
 {
 
 	args->buf = (char *)exec_alloc_args_kva(&args->bufkva);
 	return (0);
 }
 
 void
 exec_free_args(struct image_args *args)
 {
 
 	if (args->buf != NULL) {
 		exec_free_args_kva(args->bufkva);
 		args->buf = NULL;
 	}
 	if (args->fname_buf != NULL) {
 		free(args->fname_buf, M_TEMP);
 		args->fname_buf = NULL;
 	}
 	if (args->fdp != NULL)
 		fdescfree_remapped(args->fdp);
 }
 
 /*
  * A set to functions to fill struct image args.
  *
  * NOTE: exec_args_add_fname() must be called (possibly with a NULL
  * fname) before the other functions.  All exec_args_add_arg() calls must
  * be made before any exec_args_add_env() calls.  exec_args_adjust_args()
  * may be called any time after exec_args_add_fname().
  *
  * exec_args_add_fname() - install path to be executed
  * exec_args_add_arg() - append an argument string
  * exec_args_add_env() - append an env string
  * exec_args_adjust_args() - adjust location of the argument list to
  *                           allow new arguments to be prepended
  */
 int
 exec_args_add_fname(struct image_args *args, const char *fname,
     enum uio_seg segflg)
 {
 	int error;
 	size_t length;
 
 	KASSERT(args->fname == NULL, ("fname already appended"));
 	KASSERT(args->endp == NULL, ("already appending to args"));
 
 	if (fname != NULL) {
 		args->fname = args->buf;
 		error = segflg == UIO_SYSSPACE ?
 		    copystr(fname, args->fname, PATH_MAX, &length) :
 		    copyinstr(fname, args->fname, PATH_MAX, &length);
 		if (error != 0)
 			return (error == ENAMETOOLONG ? E2BIG : error);
 	} else
 		length = 0;
 
 	/* Set up for _arg_*()/_env_*() */
 	args->endp = args->buf + length;
 	/* begin_argv must be set and kept updated */
 	args->begin_argv = args->endp;
 	KASSERT(exec_map_entry_size - length >= ARG_MAX,
 	    ("too little space remaining for arguments %zu < %zu",
 	    exec_map_entry_size - length, (size_t)ARG_MAX));
 	args->stringspace = ARG_MAX;
 
 	return (0);
 }
 
 static int
 exec_args_add_str(struct image_args *args, const char *str,
     enum uio_seg segflg, int *countp)
 {
 	int error;
 	size_t length;
 
 	KASSERT(args->endp != NULL, ("endp not initialized"));
 	KASSERT(args->begin_argv != NULL, ("begin_argp not initialized"));
 
 	error = (segflg == UIO_SYSSPACE) ?
 	    copystr(str, args->endp, args->stringspace, &length) :
 	    copyinstr(str, args->endp, args->stringspace, &length);
 	if (error != 0)
 		return (error == ENAMETOOLONG ? E2BIG : error);
 	args->stringspace -= length;
 	args->endp += length;
 	(*countp)++;
 
 	return (0);
 }
 
 int
 exec_args_add_arg(struct image_args *args, const char *argp,
     enum uio_seg segflg)
 {
 
 	KASSERT(args->envc == 0, ("appending args after env"));
 
 	return (exec_args_add_str(args, argp, segflg, &args->argc));
 }
 
 int
 exec_args_add_env(struct image_args *args, const char *envp,
     enum uio_seg segflg)
 {
 
 	if (args->envc == 0)
 		args->begin_envv = args->endp;
 
 	return (exec_args_add_str(args, envp, segflg, &args->envc));
 }
 
 int
 exec_args_adjust_args(struct image_args *args, size_t consume, ssize_t extend)
 {
 	ssize_t offset;
 
 	KASSERT(args->endp != NULL, ("endp not initialized"));
 	KASSERT(args->begin_argv != NULL, ("begin_argp not initialized"));
 
 	offset = extend - consume;
 	if (args->stringspace < offset)
 		return (E2BIG);
 	memmove(args->begin_argv + extend, args->begin_argv + consume,
 	    args->endp - args->begin_argv + consume);
 	if (args->envc > 0)
 		args->begin_envv += offset;
 	args->endp += offset;
 	args->stringspace -= offset;
 	return (0);
 }
 
 char *
 exec_args_get_begin_envv(struct image_args *args)
 {
 
 	KASSERT(args->endp != NULL, ("endp not initialized"));
 
 	if (args->envc > 0)
 		return (args->begin_envv);
 	return (args->endp);
 }
 
 void
 exec_stackgap(struct image_params *imgp, uintptr_t *dp)
 {
 	if (imgp->sysent->sv_stackgap == NULL ||
 	    (imgp->proc->p_fctl0 & (NT_FREEBSD_FCTL_ASLR_DISABLE |
 	    NT_FREEBSD_FCTL_ASG_DISABLE)) != 0 ||
 	    (imgp->map_flags & MAP_ASLR) == 0)
 		return;
 	imgp->sysent->sv_stackgap(imgp, dp);
 }
 
 /*
  * Copy strings out to the new process address space, constructing new arg
  * and env vector tables. Return a pointer to the base so that it can be used
  * as the initial stack pointer.
  */
 int
 exec_copyout_strings(struct image_params *imgp, uintptr_t *stack_base)
 {
 	int argc, envc;
 	char **vectp;
 	char *stringp;
 	uintptr_t destp, ustringp;
 	struct ps_strings *arginfo;
 	struct proc *p;
 	size_t execpath_len;
 	int error, szsigcode, szps;
 	char canary[sizeof(long) * 8];
 
 	szps = sizeof(pagesizes[0]) * MAXPAGESIZES;
 	/*
 	 * Calculate string base and vector table pointers.
 	 * Also deal with signal trampoline code for this exec type.
 	 */
 	if (imgp->execpath != NULL && imgp->auxargs != NULL)
 		execpath_len = strlen(imgp->execpath) + 1;
 	else
 		execpath_len = 0;
 	p = imgp->proc;
 	szsigcode = 0;
 	arginfo = (struct ps_strings *)p->p_sysent->sv_psstrings;
 	imgp->ps_strings = arginfo;
 	if (p->p_sysent->sv_sigcode_base == 0) {
 		if (p->p_sysent->sv_szsigcode != NULL)
 			szsigcode = *(p->p_sysent->sv_szsigcode);
 	}
 	destp =	(uintptr_t)arginfo;
 
 	/*
 	 * install sigcode
 	 */
 	if (szsigcode != 0) {
 		destp -= szsigcode;
 		destp = rounddown2(destp, sizeof(void *));
 		error = copyout(p->p_sysent->sv_sigcode, (void *)destp,
 		    szsigcode);
 		if (error != 0)
 			return (error);
 	}
 
 	/*
 	 * Copy the image path for the rtld.
 	 */
 	if (execpath_len != 0) {
 		destp -= execpath_len;
 		destp = rounddown2(destp, sizeof(void *));
 		imgp->execpathp = (void *)destp;
 		error = copyout(imgp->execpath, imgp->execpathp, execpath_len);
 		if (error != 0)
 			return (error);
 	}
 
 	/*
 	 * Prepare the canary for SSP.
 	 */
 	arc4rand(canary, sizeof(canary), 0);
 	destp -= sizeof(canary);
 	imgp->canary = (void *)destp;
 	error = copyout(canary, imgp->canary, sizeof(canary));
 	if (error != 0)
 		return (error);
 	imgp->canarylen = sizeof(canary);
 
 	/*
 	 * Prepare the pagesizes array.
 	 */
 	destp -= szps;
 	destp = rounddown2(destp, sizeof(void *));
 	imgp->pagesizes = (void *)destp;
 	error = copyout(pagesizes, imgp->pagesizes, szps);
 	if (error != 0)
 		return (error);
 	imgp->pagesizeslen = szps;
 
 	/*
 	 * Allocate room for the argument and environment strings.
 	 */
 	destp -= ARG_MAX - imgp->args->stringspace;
 	destp = rounddown2(destp, sizeof(void *));
 	ustringp = destp;
 
 	exec_stackgap(imgp, &destp);
 
 	if (imgp->auxargs) {
 		/*
 		 * Allocate room on the stack for the ELF auxargs
 		 * array.  It has up to AT_COUNT entries.
 		 */
 		destp -= AT_COUNT * sizeof(Elf_Auxinfo);
 		destp = rounddown2(destp, sizeof(void *));
 	}
 
 	vectp = (char **)destp;
 
 	/*
 	 * Allocate room for the argv[] and env vectors including the
 	 * terminating NULL pointers.
 	 */
 	vectp -= imgp->args->argc + 1 + imgp->args->envc + 1;
 
 	/*
 	 * vectp also becomes our initial stack base
 	 */
 	*stack_base = (uintptr_t)vectp;
 
 	stringp = imgp->args->begin_argv;
 	argc = imgp->args->argc;
 	envc = imgp->args->envc;
 
 	/*
 	 * Copy out strings - arguments and environment.
 	 */
 	error = copyout(stringp, (void *)ustringp,
 	    ARG_MAX - imgp->args->stringspace);
 	if (error != 0)
 		return (error);
 
 	/*
 	 * Fill in "ps_strings" struct for ps, w, etc.
 	 */
 	imgp->argv = vectp;
 	if (suword(&arginfo->ps_argvstr, (long)(intptr_t)vectp) != 0 ||
 	    suword32(&arginfo->ps_nargvstr, argc) != 0)
 		return (EFAULT);
 
 	/*
 	 * Fill in argument portion of vector table.
 	 */
 	for (; argc > 0; --argc) {
 		if (suword(vectp++, ustringp) != 0)
 			return (EFAULT);
 		while (*stringp++ != 0)
 			ustringp++;
 		ustringp++;
 	}
 
 	/* a null vector table pointer separates the argp's from the envp's */
 	if (suword(vectp++, 0) != 0)
 		return (EFAULT);
 
 	imgp->envv = vectp;
 	if (suword(&arginfo->ps_envstr, (long)(intptr_t)vectp) != 0 ||
 	    suword32(&arginfo->ps_nenvstr, envc) != 0)
 		return (EFAULT);
 
 	/*
 	 * Fill in environment portion of vector table.
 	 */
 	for (; envc > 0; --envc) {
 		if (suword(vectp++, ustringp) != 0)
 			return (EFAULT);
 		while (*stringp++ != 0)
 			ustringp++;
 		ustringp++;
 	}
 
 	/* end of vector table is a null pointer */
 	if (suword(vectp, 0) != 0)
 		return (EFAULT);
 
 	if (imgp->auxargs) {
 		vectp++;
 		error = imgp->sysent->sv_copyout_auxargs(imgp,
 		    (uintptr_t)vectp);
 		if (error != 0)
 			return (error);
 	}
 
 	return (0);
 }
 
 /*
  * Check permissions of file to execute.
  *	Called with imgp->vp locked.
  *	Return 0 for success or error code on failure.
  */
 int
 exec_check_permissions(struct image_params *imgp)
 {
 	struct vnode *vp = imgp->vp;
 	struct vattr *attr = imgp->attr;
 	struct thread *td;
 	int error;
 
 	td = curthread;
 
 	/* Get file attributes */
 	error = VOP_GETATTR(vp, attr, td->td_ucred);
 	if (error)
 		return (error);
 
 #ifdef MAC
 	error = mac_vnode_check_exec(td->td_ucred, imgp->vp, imgp);
 	if (error)
 		return (error);
 #endif
 
 	/*
 	 * 1) Check if file execution is disabled for the filesystem that
 	 *    this file resides on.
 	 * 2) Ensure that at least one execute bit is on. Otherwise, a
 	 *    privileged user will always succeed, and we don't want this
 	 *    to happen unless the file really is executable.
 	 * 3) Ensure that the file is a regular file.
 	 */
 	if ((vp->v_mount->mnt_flag & MNT_NOEXEC) ||
 	    (attr->va_mode & (S_IXUSR | S_IXGRP | S_IXOTH)) == 0 ||
 	    (attr->va_type != VREG))
 		return (EACCES);
 
 	/*
 	 * Zero length files can't be exec'd
 	 */
 	if (attr->va_size == 0)
 		return (ENOEXEC);
 
 	/*
 	 *  Check for execute permission to file based on current credentials.
 	 */
 	error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td);
 	if (error)
 		return (error);
 
 	/*
 	 * Check number of open-for-writes on the file and deny execution
 	 * if there are any.
 	 *
 	 * Add a text reference now so no one can write to the
 	 * executable while we're activating it.
 	 *
 	 * Remember if this was set before and unset it in case this is not
 	 * actually an executable image.
 	 */
 	error = VOP_SET_TEXT(vp);
 	if (error != 0)
 		return (error);
 	imgp->textset = true;
 
 	/*
 	 * Call filesystem specific open routine (which does nothing in the
 	 * general case).
 	 */
 	error = VOP_OPEN(vp, FREAD, td->td_ucred, td, NULL);
 	if (error == 0)
 		imgp->opened = 1;
 	return (error);
 }
 
 /*
  * Exec handler registration
  */
 int
 exec_register(const struct execsw *execsw_arg)
 {
 	const struct execsw **es, **xs, **newexecsw;
 	u_int count = 2;	/* New slot and trailing NULL */
 
 	if (execsw)
 		for (es = execsw; *es; es++)
 			count++;
 	newexecsw = malloc(count * sizeof(*es), M_TEMP, M_WAITOK);
 	xs = newexecsw;
 	if (execsw)
 		for (es = execsw; *es; es++)
 			*xs++ = *es;
 	*xs++ = execsw_arg;
 	*xs = NULL;
 	if (execsw)
 		free(execsw, M_TEMP);
 	execsw = newexecsw;
 	return (0);
 }
 
 int
 exec_unregister(const struct execsw *execsw_arg)
 {
 	const struct execsw **es, **xs, **newexecsw;
 	int count = 1;
 
 	if (execsw == NULL)
 		panic("unregister with no handlers left?\n");
 
 	for (es = execsw; *es; es++) {
 		if (*es == execsw_arg)
 			break;
 	}
 	if (*es == NULL)
 		return (ENOENT);
 	for (es = execsw; *es; es++)
 		if (*es != execsw_arg)
 			count++;
 	newexecsw = malloc(count * sizeof(*es), M_TEMP, M_WAITOK);
 	xs = newexecsw;
 	for (es = execsw; *es; es++)
 		if (*es != execsw_arg)
 			*xs++ = *es;
 	*xs = NULL;
 	if (execsw)
 		free(execsw, M_TEMP);
 	execsw = newexecsw;
 	return (0);
 }
 
 /*
  * Write out a core segment to the compression stream.
  */
 static int
 compress_chunk(struct coredump_params *cp, char *base, char *buf, u_int len)
 {
 	u_int chunk_len;
 	int error;
 
 	while (len > 0) {
 		chunk_len = MIN(len, CORE_BUF_SIZE);
 
 		/*
 		 * We can get EFAULT error here.
 		 * In that case zero out the current chunk of the segment.
 		 */
 		error = copyin(base, buf, chunk_len);
 		if (error != 0)
 			bzero(buf, chunk_len);
 		error = compressor_write(cp->comp, buf, chunk_len);
 		if (error != 0)
 			break;
 		base += chunk_len;
 		len -= chunk_len;
 	}
 	return (error);
 }
 
 int
 core_write(struct coredump_params *cp, const void *base, size_t len,
     off_t offset, enum uio_seg seg, size_t *resid)
 {
 
 	return (vn_rdwr_inchunks(UIO_WRITE, cp->vp, __DECONST(void *, base),
 	    len, offset, seg, IO_UNIT | IO_DIRECT | IO_RANGELOCKED,
 	    cp->active_cred, cp->file_cred, resid, cp->td));
 }
 
 int
 core_output(char *base, size_t len, off_t offset, struct coredump_params *cp,
     void *tmpbuf)
 {
 	vm_map_t map;
 	struct mount *mp;
 	size_t resid, runlen;
 	int error;
 	bool success;
 
 	KASSERT((uintptr_t)base % PAGE_SIZE == 0,
 	    ("%s: user address %p is not page-aligned", __func__, base));
 
 	if (cp->comp != NULL)
 		return (compress_chunk(cp, base, tmpbuf, len));
 
 	map = &cp->td->td_proc->p_vmspace->vm_map;
 	for (; len > 0; base += runlen, offset += runlen, len -= runlen) {
 		/*
 		 * Attempt to page in all virtual pages in the range.  If a
 		 * virtual page is not backed by the pager, it is represented as
 		 * a hole in the file.  This can occur with zero-filled
 		 * anonymous memory or truncated files, for example.
 		 */
 		for (runlen = 0; runlen < len; runlen += PAGE_SIZE) {
 			error = vm_fault(map, (uintptr_t)base + runlen,
 			    VM_PROT_READ, VM_FAULT_NOFILL, NULL);
 			if (runlen == 0)
 				success = error == KERN_SUCCESS;
 			else if ((error == KERN_SUCCESS) != success)
 				break;
 		}
 
 		if (success) {
 			error = core_write(cp, base, runlen, offset,
 			    UIO_USERSPACE, &resid);
 			if (error != 0) {
 				if (error != EFAULT)
 					break;
 
 				/*
 				 * EFAULT may be returned if the user mapping
 				 * could not be accessed, e.g., because a mapped
 				 * file has been truncated.  Skip the page if no
 				 * progress was made, to protect against a
 				 * hypothetical scenario where vm_fault() was
 				 * successful but core_write() returns EFAULT
 				 * anyway.
 				 */
 				runlen -= resid;
 				if (runlen == 0) {
 					success = false;
 					runlen = PAGE_SIZE;
 				}
 			}
 		}
 		if (!success) {
 			error = vn_start_write(cp->vp, &mp, V_WAIT);
 			if (error != 0)
 				break;
 			vn_lock(cp->vp, LK_EXCLUSIVE | LK_RETRY);
 			error = vn_truncate_locked(cp->vp, offset + runlen,
 			    false, cp->td->td_ucred);
 			VOP_UNLOCK(cp->vp);
 			vn_finished_write(mp);
 			if (error != 0)
 				break;
 		}
 	}
 	return (error);
 }
 
 /*
  * Drain into a core file.
  */
 int
 sbuf_drain_core_output(void *arg, const char *data, int len)
 {
 	struct coredump_params *cp;
 	struct proc *p;
 	int error, locked;
 
 	cp = arg;
 	p = cp->td->td_proc;
 
 	/*
 	 * Some kern_proc out routines that print to this sbuf may
 	 * call us with the process lock held. Draining with the
 	 * non-sleepable lock held is unsafe. The lock is needed for
 	 * those routines when dumping a live process. In our case we
 	 * can safely release the lock before draining and acquire
 	 * again after.
 	 */
 	locked = PROC_LOCKED(p);
 	if (locked)
 		PROC_UNLOCK(p);
 	if (cp->comp != NULL)
 		error = compressor_write(cp->comp, __DECONST(char *, data), len);
 	else
 		error = core_write(cp, __DECONST(void *, data), len, cp->offset,
 		    UIO_SYSSPACE, NULL);
 	if (locked)
 		PROC_LOCK(p);
 	if (error != 0)
 		return (-error);
 	cp->offset += len;
 	return (len);
 }
diff --git a/sys/kern/sys_process.c b/sys/kern/sys_process.c
index b56c33203906..1b97424c58ca 100644
--- a/sys/kern/sys_process.c
+++ b/sys/kern/sys_process.c
@@ -1,1470 +1,1469 @@
 /*-
  * SPDX-License-Identifier: BSD-4-Clause
  *
  * Copyright (c) 1994, Sean Eric Fagan
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by Sean Eric Fagan.
  * 4. The name of the author may not be used to endorse or promote products
  *    derived from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/ktr.h>
 #include <sys/limits.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
+#include <sys/reg.h>
 #include <sys/syscallsubr.h>
 #include <sys/sysent.h>
 #include <sys/sysproto.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/vnode.h>
 #include <sys/ptrace.h>
 #include <sys/rwlock.h>
 #include <sys/sx.h>
 #include <sys/malloc.h>
 #include <sys/signalvar.h>
 #include <sys/caprights.h>
 #include <sys/filedesc.h>
 
-#include <machine/reg.h>
-
 #include <security/audit/audit.h>
 
 #include <vm/vm.h>
 #include <vm/pmap.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_map.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
 #include <vm/vm_param.h>
 
 #ifdef COMPAT_FREEBSD32
 #include <sys/procfs.h>
 #endif
 
 /*
  * Functions implemented using PROC_ACTION():
  *
  * proc_read_regs(proc, regs)
  *	Get the current user-visible register set from the process
  *	and copy it into the regs structure (<machine/reg.h>).
  *	The process is stopped at the time read_regs is called.
  *
  * proc_write_regs(proc, regs)
  *	Update the current register set from the passed in regs
  *	structure.  Take care to avoid clobbering special CPU
  *	registers or privileged bits in the PSL.
  *	Depending on the architecture this may have fix-up work to do,
  *	especially if the IAR or PCW are modified.
  *	The process is stopped at the time write_regs is called.
  *
  * proc_read_fpregs, proc_write_fpregs
  *	deal with the floating point register set, otherwise as above.
  *
  * proc_read_dbregs, proc_write_dbregs
  *	deal with the processor debug register set, otherwise as above.
  *
  * proc_sstep(proc)
  *	Arrange for the process to trap after executing a single instruction.
  */
 
 #define	PROC_ACTION(action) do {					\
 	int error;							\
 									\
 	PROC_LOCK_ASSERT(td->td_proc, MA_OWNED);			\
 	if ((td->td_proc->p_flag & P_INMEM) == 0)			\
 		error = EIO;						\
 	else								\
 		error = (action);					\
 	return (error);							\
 } while (0)
 
 int
 proc_read_regs(struct thread *td, struct reg *regs)
 {
 
 	PROC_ACTION(fill_regs(td, regs));
 }
 
 int
 proc_write_regs(struct thread *td, struct reg *regs)
 {
 
 	PROC_ACTION(set_regs(td, regs));
 }
 
 int
 proc_read_dbregs(struct thread *td, struct dbreg *dbregs)
 {
 
 	PROC_ACTION(fill_dbregs(td, dbregs));
 }
 
 int
 proc_write_dbregs(struct thread *td, struct dbreg *dbregs)
 {
 
 	PROC_ACTION(set_dbregs(td, dbregs));
 }
 
 /*
  * Ptrace doesn't support fpregs at all, and there are no security holes
  * or translations for fpregs, so we can just copy them.
  */
 int
 proc_read_fpregs(struct thread *td, struct fpreg *fpregs)
 {
 
 	PROC_ACTION(fill_fpregs(td, fpregs));
 }
 
 int
 proc_write_fpregs(struct thread *td, struct fpreg *fpregs)
 {
 
 	PROC_ACTION(set_fpregs(td, fpregs));
 }
 
 #ifdef COMPAT_FREEBSD32
 /* For 32 bit binaries, we need to expose the 32 bit regs layouts. */
 int
 proc_read_regs32(struct thread *td, struct reg32 *regs32)
 {
 
 	PROC_ACTION(fill_regs32(td, regs32));
 }
 
 int
 proc_write_regs32(struct thread *td, struct reg32 *regs32)
 {
 
 	PROC_ACTION(set_regs32(td, regs32));
 }
 
 int
 proc_read_dbregs32(struct thread *td, struct dbreg32 *dbregs32)
 {
 
 	PROC_ACTION(fill_dbregs32(td, dbregs32));
 }
 
 int
 proc_write_dbregs32(struct thread *td, struct dbreg32 *dbregs32)
 {
 
 	PROC_ACTION(set_dbregs32(td, dbregs32));
 }
 
 int
 proc_read_fpregs32(struct thread *td, struct fpreg32 *fpregs32)
 {
 
 	PROC_ACTION(fill_fpregs32(td, fpregs32));
 }
 
 int
 proc_write_fpregs32(struct thread *td, struct fpreg32 *fpregs32)
 {
 
 	PROC_ACTION(set_fpregs32(td, fpregs32));
 }
 #endif
 
 int
 proc_sstep(struct thread *td)
 {
 
 	PROC_ACTION(ptrace_single_step(td));
 }
 
 int
 proc_rwmem(struct proc *p, struct uio *uio)
 {
 	vm_map_t map;
 	vm_offset_t pageno;		/* page number */
 	vm_prot_t reqprot;
 	int error, fault_flags, page_offset, writing;
 
 	/*
 	 * Assert that someone has locked this vmspace.  (Should be
 	 * curthread but we can't assert that.)  This keeps the process
 	 * from exiting out from under us until this operation completes.
 	 */
 	PROC_ASSERT_HELD(p);
 	PROC_LOCK_ASSERT(p, MA_NOTOWNED);
 
 	/*
 	 * The map we want...
 	 */
 	map = &p->p_vmspace->vm_map;
 
 	/*
 	 * If we are writing, then we request vm_fault() to create a private
 	 * copy of each page.  Since these copies will not be writeable by the
 	 * process, we must explicity request that they be dirtied.
 	 */
 	writing = uio->uio_rw == UIO_WRITE;
 	reqprot = writing ? VM_PROT_COPY | VM_PROT_READ : VM_PROT_READ;
 	fault_flags = writing ? VM_FAULT_DIRTY : VM_FAULT_NORMAL;
 
 	/*
 	 * Only map in one page at a time.  We don't have to, but it
 	 * makes things easier.  This way is trivial - right?
 	 */
 	do {
 		vm_offset_t uva;
 		u_int len;
 		vm_page_t m;
 
 		uva = (vm_offset_t)uio->uio_offset;
 
 		/*
 		 * Get the page number of this segment.
 		 */
 		pageno = trunc_page(uva);
 		page_offset = uva - pageno;
 
 		/*
 		 * How many bytes to copy
 		 */
 		len = min(PAGE_SIZE - page_offset, uio->uio_resid);
 
 		/*
 		 * Fault and hold the page on behalf of the process.
 		 */
 		error = vm_fault(map, pageno, reqprot, fault_flags, &m);
 		if (error != KERN_SUCCESS) {
 			if (error == KERN_RESOURCE_SHORTAGE)
 				error = ENOMEM;
 			else
 				error = EFAULT;
 			break;
 		}
 
 		/*
 		 * Now do the i/o move.
 		 */
 		error = uiomove_fromphys(&m, page_offset, len, uio);
 
 		/* Make the I-cache coherent for breakpoints. */
 		if (writing && error == 0) {
 			vm_map_lock_read(map);
 			if (vm_map_check_protection(map, pageno, pageno +
 			    PAGE_SIZE, VM_PROT_EXECUTE))
 				vm_sync_icache(map, uva, len);
 			vm_map_unlock_read(map);
 		}
 
 		/*
 		 * Release the page.
 		 */
 		vm_page_unwire(m, PQ_ACTIVE);
 
 	} while (error == 0 && uio->uio_resid > 0);
 
 	return (error);
 }
 
 static ssize_t
 proc_iop(struct thread *td, struct proc *p, vm_offset_t va, void *buf,
     size_t len, enum uio_rw rw)
 {
 	struct iovec iov;
 	struct uio uio;
 	ssize_t slen;
 
 	MPASS(len < SSIZE_MAX);
 	slen = (ssize_t)len;
 
 	iov.iov_base = (caddr_t)buf;
 	iov.iov_len = len;
 	uio.uio_iov = &iov;
 	uio.uio_iovcnt = 1;
 	uio.uio_offset = va;
 	uio.uio_resid = slen;
 	uio.uio_segflg = UIO_SYSSPACE;
 	uio.uio_rw = rw;
 	uio.uio_td = td;
 	proc_rwmem(p, &uio);
 	if (uio.uio_resid == slen)
 		return (-1);
 	return (slen - uio.uio_resid);
 }
 
 ssize_t
 proc_readmem(struct thread *td, struct proc *p, vm_offset_t va, void *buf,
     size_t len)
 {
 
 	return (proc_iop(td, p, va, buf, len, UIO_READ));
 }
 
 ssize_t
 proc_writemem(struct thread *td, struct proc *p, vm_offset_t va, void *buf,
     size_t len)
 {
 
 	return (proc_iop(td, p, va, buf, len, UIO_WRITE));
 }
 
 static int
 ptrace_vm_entry(struct thread *td, struct proc *p, struct ptrace_vm_entry *pve)
 {
 	struct vattr vattr;
 	vm_map_t map;
 	vm_map_entry_t entry;
 	vm_object_t obj, tobj, lobj;
 	struct vmspace *vm;
 	struct vnode *vp;
 	char *freepath, *fullpath;
 	u_int pathlen;
 	int error, index;
 
 	error = 0;
 	obj = NULL;
 
 	vm = vmspace_acquire_ref(p);
 	map = &vm->vm_map;
 	vm_map_lock_read(map);
 
 	do {
 		KASSERT((map->header.eflags & MAP_ENTRY_IS_SUB_MAP) == 0,
 		    ("Submap in map header"));
 		index = 0;
 		VM_MAP_ENTRY_FOREACH(entry, map) {
 			if (index >= pve->pve_entry &&
 			    (entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0)
 				break;
 			index++;
 		}
 		if (index < pve->pve_entry) {
 			error = EINVAL;
 			break;
 		}
 		if (entry == &map->header) {
 			error = ENOENT;
 			break;
 		}
 
 		/* We got an entry. */
 		pve->pve_entry = index + 1;
 		pve->pve_timestamp = map->timestamp;
 		pve->pve_start = entry->start;
 		pve->pve_end = entry->end - 1;
 		pve->pve_offset = entry->offset;
 		pve->pve_prot = entry->protection;
 
 		/* Backing object's path needed? */
 		if (pve->pve_pathlen == 0)
 			break;
 
 		pathlen = pve->pve_pathlen;
 		pve->pve_pathlen = 0;
 
 		obj = entry->object.vm_object;
 		if (obj != NULL)
 			VM_OBJECT_RLOCK(obj);
 	} while (0);
 
 	vm_map_unlock_read(map);
 
 	pve->pve_fsid = VNOVAL;
 	pve->pve_fileid = VNOVAL;
 
 	if (error == 0 && obj != NULL) {
 		lobj = obj;
 		for (tobj = obj; tobj != NULL; tobj = tobj->backing_object) {
 			if (tobj != obj)
 				VM_OBJECT_RLOCK(tobj);
 			if (lobj != obj)
 				VM_OBJECT_RUNLOCK(lobj);
 			lobj = tobj;
 			pve->pve_offset += tobj->backing_object_offset;
 		}
 		vp = vm_object_vnode(lobj);
 		if (vp != NULL)
 			vref(vp);
 		if (lobj != obj)
 			VM_OBJECT_RUNLOCK(lobj);
 		VM_OBJECT_RUNLOCK(obj);
 
 		if (vp != NULL) {
 			freepath = NULL;
 			fullpath = NULL;
 			vn_fullpath(vp, &fullpath, &freepath);
 			vn_lock(vp, LK_SHARED | LK_RETRY);
 			if (VOP_GETATTR(vp, &vattr, td->td_ucred) == 0) {
 				pve->pve_fileid = vattr.va_fileid;
 				pve->pve_fsid = vattr.va_fsid;
 			}
 			vput(vp);
 
 			if (fullpath != NULL) {
 				pve->pve_pathlen = strlen(fullpath) + 1;
 				if (pve->pve_pathlen <= pathlen) {
 					error = copyout(fullpath, pve->pve_path,
 					    pve->pve_pathlen);
 				} else
 					error = ENAMETOOLONG;
 			}
 			if (freepath != NULL)
 				free(freepath, M_TEMP);
 		}
 	}
 	vmspace_free(vm);
 	if (error == 0)
 		CTR3(KTR_PTRACE, "PT_VM_ENTRY: pid %d, entry %d, start %p",
 		    p->p_pid, pve->pve_entry, pve->pve_start);
 
 	return (error);
 }
 
 /*
  * Process debugging system call.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct ptrace_args {
 	int	req;
 	pid_t	pid;
 	caddr_t	addr;
 	int	data;
 };
 #endif
 
 int
 sys_ptrace(struct thread *td, struct ptrace_args *uap)
 {
 	/*
 	 * XXX this obfuscation is to reduce stack usage, but the register
 	 * structs may be too large to put on the stack anyway.
 	 */
 	union {
 		struct ptrace_io_desc piod;
 		struct ptrace_lwpinfo pl;
 		struct ptrace_vm_entry pve;
 		struct ptrace_coredump pc;
 		struct dbreg dbreg;
 		struct fpreg fpreg;
 		struct reg reg;
 		char args[sizeof(td->td_sa.args)];
 		struct ptrace_sc_ret psr;
 		int ptevents;
 	} r;
 	void *addr;
 	int error = 0;
 
 	AUDIT_ARG_PID(uap->pid);
 	AUDIT_ARG_CMD(uap->req);
 	AUDIT_ARG_VALUE(uap->data);
 	addr = &r;
 	switch (uap->req) {
 	case PT_GET_EVENT_MASK:
 	case PT_LWPINFO:
 	case PT_GET_SC_ARGS:
 	case PT_GET_SC_RET:
 		break;
 	case PT_GETREGS:
 		bzero(&r.reg, sizeof(r.reg));
 		break;
 	case PT_GETFPREGS:
 		bzero(&r.fpreg, sizeof(r.fpreg));
 		break;
 	case PT_GETDBREGS:
 		bzero(&r.dbreg, sizeof(r.dbreg));
 		break;
 	case PT_SETREGS:
 		error = copyin(uap->addr, &r.reg, sizeof(r.reg));
 		break;
 	case PT_SETFPREGS:
 		error = copyin(uap->addr, &r.fpreg, sizeof(r.fpreg));
 		break;
 	case PT_SETDBREGS:
 		error = copyin(uap->addr, &r.dbreg, sizeof(r.dbreg));
 		break;
 	case PT_SET_EVENT_MASK:
 		if (uap->data != sizeof(r.ptevents))
 			error = EINVAL;
 		else
 			error = copyin(uap->addr, &r.ptevents, uap->data);
 		break;
 	case PT_IO:
 		error = copyin(uap->addr, &r.piod, sizeof(r.piod));
 		break;
 	case PT_VM_ENTRY:
 		error = copyin(uap->addr, &r.pve, sizeof(r.pve));
 		break;
 	case PT_COREDUMP:
 		if (uap->data != sizeof(r.pc))
 			error = EINVAL;
 		else
 			error = copyin(uap->addr, &r.pc, uap->data);
 		break;
 	default:
 		addr = uap->addr;
 		break;
 	}
 	if (error)
 		return (error);
 
 	error = kern_ptrace(td, uap->req, uap->pid, addr, uap->data);
 	if (error)
 		return (error);
 
 	switch (uap->req) {
 	case PT_VM_ENTRY:
 		error = copyout(&r.pve, uap->addr, sizeof(r.pve));
 		break;
 	case PT_IO:
 		error = copyout(&r.piod, uap->addr, sizeof(r.piod));
 		break;
 	case PT_GETREGS:
 		error = copyout(&r.reg, uap->addr, sizeof(r.reg));
 		break;
 	case PT_GETFPREGS:
 		error = copyout(&r.fpreg, uap->addr, sizeof(r.fpreg));
 		break;
 	case PT_GETDBREGS:
 		error = copyout(&r.dbreg, uap->addr, sizeof(r.dbreg));
 		break;
 	case PT_GET_EVENT_MASK:
 		/* NB: The size in uap->data is validated in kern_ptrace(). */
 		error = copyout(&r.ptevents, uap->addr, uap->data);
 		break;
 	case PT_LWPINFO:
 		/* NB: The size in uap->data is validated in kern_ptrace(). */
 		error = copyout(&r.pl, uap->addr, uap->data);
 		break;
 	case PT_GET_SC_ARGS:
 		error = copyout(r.args, uap->addr, MIN(uap->data,
 		    sizeof(r.args)));
 		break;
 	case PT_GET_SC_RET:
 		error = copyout(&r.psr, uap->addr, MIN(uap->data,
 		    sizeof(r.psr)));
 		break;
 	}
 
 	return (error);
 }
 
 #ifdef COMPAT_FREEBSD32
 /*
  *   PROC_READ(regs, td2, addr);
  * becomes either:
  *   proc_read_regs(td2, addr);
  * or
  *   proc_read_regs32(td2, addr);
  * .. except this is done at runtime.  There is an additional
  * complication in that PROC_WRITE disallows 32 bit consumers
  * from writing to 64 bit address space targets.
  */
 #define	PROC_READ(w, t, a)	wrap32 ? \
 	proc_read_ ## w ## 32(t, a) : \
 	proc_read_ ## w (t, a)
 #define	PROC_WRITE(w, t, a)	wrap32 ? \
 	(safe ? proc_write_ ## w ## 32(t, a) : EINVAL ) : \
 	proc_write_ ## w (t, a)
 #else
 #define	PROC_READ(w, t, a)	proc_read_ ## w (t, a)
 #define	PROC_WRITE(w, t, a)	proc_write_ ## w (t, a)
 #endif
 
 void
 proc_set_traced(struct proc *p, bool stop)
 {
 
 	sx_assert(&proctree_lock, SX_XLOCKED);
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	p->p_flag |= P_TRACED;
 	if (stop)
 		p->p_flag2 |= P2_PTRACE_FSTP;
 	p->p_ptevents = PTRACE_DEFAULT;
 }
 
 void
 ptrace_unsuspend(struct proc *p)
 {
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 
 	PROC_SLOCK(p);
 	p->p_flag &= ~(P_STOPPED_TRACE | P_STOPPED_SIG | P_WAITED);
 	thread_unsuspend(p);
 	PROC_SUNLOCK(p);
 	itimer_proc_continue(p);
 	kqtimer_proc_continue(p);
 }
 
 static int
 proc_can_ptrace(struct thread *td, struct proc *p)
 {
 	int error;
 
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 
 	if ((p->p_flag & P_WEXIT) != 0)
 		return (ESRCH);
 
 	if ((error = p_cansee(td, p)) != 0)
 		return (error);
 	if ((error = p_candebug(td, p)) != 0)
 		return (error);
 
 	/* not being traced... */
 	if ((p->p_flag & P_TRACED) == 0)
 		return (EPERM);
 
 	/* not being traced by YOU */
 	if (p->p_pptr != td->td_proc)
 		return (EBUSY);
 
 	/* not currently stopped */
 	if ((p->p_flag & P_STOPPED_TRACE) == 0 ||
 	    p->p_suspcount != p->p_numthreads  ||
 	    (p->p_flag & P_WAITED) == 0)
 		return (EBUSY);
 
 	return (0);
 }
 
 static struct thread *
 ptrace_sel_coredump_thread(struct proc *p)
 {
 	struct thread *td2;
 
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	MPASS((p->p_flag & P_STOPPED_TRACE) != 0);
 
 	FOREACH_THREAD_IN_PROC(p, td2) {
 		if ((td2->td_dbgflags & TDB_SSWITCH) != 0)
 			return (td2);
 	}
 	return (NULL);
 }
 
 int
 kern_ptrace(struct thread *td, int req, pid_t pid, void *addr, int data)
 {
 	struct iovec iov;
 	struct uio uio;
 	struct proc *curp, *p, *pp;
 	struct thread *td2 = NULL, *td3;
 	struct ptrace_io_desc *piod = NULL;
 	struct ptrace_lwpinfo *pl;
 	struct ptrace_sc_ret *psr;
 	struct file *fp;
 	struct ptrace_coredump *pc;
 	struct thr_coredump_req *tcq;
 	int error, num, tmp;
 	lwpid_t tid = 0, *buf;
 #ifdef COMPAT_FREEBSD32
 	int wrap32 = 0, safe = 0;
 #endif
 	bool proctree_locked, p2_req_set;
 
 	curp = td->td_proc;
 	proctree_locked = false;
 	p2_req_set = false;
 
 	/* Lock proctree before locking the process. */
 	switch (req) {
 	case PT_TRACE_ME:
 	case PT_ATTACH:
 	case PT_STEP:
 	case PT_CONTINUE:
 	case PT_TO_SCE:
 	case PT_TO_SCX:
 	case PT_SYSCALL:
 	case PT_FOLLOW_FORK:
 	case PT_LWP_EVENTS:
 	case PT_GET_EVENT_MASK:
 	case PT_SET_EVENT_MASK:
 	case PT_DETACH:
 	case PT_GET_SC_ARGS:
 		sx_xlock(&proctree_lock);
 		proctree_locked = true;
 		break;
 	default:
 		break;
 	}
 
 	if (req == PT_TRACE_ME) {
 		p = td->td_proc;
 		PROC_LOCK(p);
 	} else {
 		if (pid <= PID_MAX) {
 			if ((p = pfind(pid)) == NULL) {
 				if (proctree_locked)
 					sx_xunlock(&proctree_lock);
 				return (ESRCH);
 			}
 		} else {
 			td2 = tdfind(pid, -1);
 			if (td2 == NULL) {
 				if (proctree_locked)
 					sx_xunlock(&proctree_lock);
 				return (ESRCH);
 			}
 			p = td2->td_proc;
 			tid = pid;
 			pid = p->p_pid;
 		}
 	}
 	AUDIT_ARG_PROCESS(p);
 
 	if ((p->p_flag & P_WEXIT) != 0) {
 		error = ESRCH;
 		goto fail;
 	}
 	if ((error = p_cansee(td, p)) != 0)
 		goto fail;
 
 	if ((error = p_candebug(td, p)) != 0)
 		goto fail;
 
 	/*
 	 * System processes can't be debugged.
 	 */
 	if ((p->p_flag & P_SYSTEM) != 0) {
 		error = EINVAL;
 		goto fail;
 	}
 
 	if (tid == 0) {
 		if ((p->p_flag & P_STOPPED_TRACE) != 0) {
 			KASSERT(p->p_xthread != NULL, ("NULL p_xthread"));
 			td2 = p->p_xthread;
 		} else {
 			td2 = FIRST_THREAD_IN_PROC(p);
 		}
 		tid = td2->td_tid;
 	}
 
 #ifdef COMPAT_FREEBSD32
 	/*
 	 * Test if we're a 32 bit client and what the target is.
 	 * Set the wrap controls accordingly.
 	 */
 	if (SV_CURPROC_FLAG(SV_ILP32)) {
 		if (SV_PROC_FLAG(td2->td_proc, SV_ILP32))
 			safe = 1;
 		wrap32 = 1;
 	}
 #endif
 	/*
 	 * Permissions check
 	 */
 	switch (req) {
 	case PT_TRACE_ME:
 		/*
 		 * Always legal, when there is a parent process which
 		 * could trace us.  Otherwise, reject.
 		 */
 		if ((p->p_flag & P_TRACED) != 0) {
 			error = EBUSY;
 			goto fail;
 		}
 		if (p->p_pptr == initproc) {
 			error = EPERM;
 			goto fail;
 		}
 		break;
 
 	case PT_ATTACH:
 		/* Self */
 		if (p == td->td_proc) {
 			error = EINVAL;
 			goto fail;
 		}
 
 		/* Already traced */
 		if (p->p_flag & P_TRACED) {
 			error = EBUSY;
 			goto fail;
 		}
 
 		/* Can't trace an ancestor if you're being traced. */
 		if (curp->p_flag & P_TRACED) {
 			for (pp = curp->p_pptr; pp != NULL; pp = pp->p_pptr) {
 				if (pp == p) {
 					error = EINVAL;
 					goto fail;
 				}
 			}
 		}
 
 		/* OK */
 		break;
 
 	case PT_CLEARSTEP:
 		/* Allow thread to clear single step for itself */
 		if (td->td_tid == tid)
 			break;
 
 		/* FALLTHROUGH */
 	default:
 		/*
 		 * Check for ptrace eligibility before waiting for
 		 * holds to drain.
 		 */
 		error = proc_can_ptrace(td, p);
 		if (error != 0)
 			goto fail;
 
 		/*
 		 * Block parallel ptrace requests.  Most important, do
 		 * not allow other thread in debugger to continue the
 		 * debuggee until coredump finished.
 		 */
 		while ((p->p_flag2 & P2_PTRACEREQ) != 0) {
 			if (proctree_locked)
 				sx_xunlock(&proctree_lock);
 			error = msleep(&p->p_flag2, &p->p_mtx, PPAUSE | PCATCH |
 			    (proctree_locked ? PDROP : 0), "pptrace", 0);
 			if (proctree_locked) {
 				sx_xlock(&proctree_lock);
 				PROC_LOCK(p);
 			}
 			if (error == 0 && td2->td_proc != p)
 				error = ESRCH;
 			if (error == 0)
 				error = proc_can_ptrace(td, p);
 			if (error != 0)
 				goto fail;
 		}
 
 		/* Ok */
 		break;
 	}
 
 	/*
 	 * Keep this process around and request parallel ptrace()
 	 * request to wait until we finish this request.
 	 */
 	MPASS((p->p_flag2 & P2_PTRACEREQ) == 0);
 	p->p_flag2 |= P2_PTRACEREQ;
 	p2_req_set = true;
 	_PHOLD(p);
 
 	/*
 	 * Actually do the requests
 	 */
 
 	td->td_retval[0] = 0;
 
 	switch (req) {
 	case PT_TRACE_ME:
 		/* set my trace flag and "owner" so it can read/write me */
 		proc_set_traced(p, false);
 		if (p->p_flag & P_PPWAIT)
 			p->p_flag |= P_PPTRACE;
 		CTR1(KTR_PTRACE, "PT_TRACE_ME: pid %d", p->p_pid);
 		break;
 
 	case PT_ATTACH:
 		/* security check done above */
 		/*
 		 * It would be nice if the tracing relationship was separate
 		 * from the parent relationship but that would require
 		 * another set of links in the proc struct or for "wait"
 		 * to scan the entire proc table.  To make life easier,
 		 * we just re-parent the process we're trying to trace.
 		 * The old parent is remembered so we can put things back
 		 * on a "detach".
 		 */
 		proc_set_traced(p, true);
 		proc_reparent(p, td->td_proc, false);
 		CTR2(KTR_PTRACE, "PT_ATTACH: pid %d, oppid %d", p->p_pid,
 		    p->p_oppid);
 
 		sx_xunlock(&proctree_lock);
 		proctree_locked = false;
 		MPASS(p->p_xthread == NULL);
 		MPASS((p->p_flag & P_STOPPED_TRACE) == 0);
 
 		/*
 		 * If already stopped due to a stop signal, clear the
 		 * existing stop before triggering a traced SIGSTOP.
 		 */
 		if ((p->p_flag & P_STOPPED_SIG) != 0) {
 			PROC_SLOCK(p);
 			p->p_flag &= ~(P_STOPPED_SIG | P_WAITED);
 			thread_unsuspend(p);
 			PROC_SUNLOCK(p);
 		}
 
 		kern_psignal(p, SIGSTOP);
 		break;
 
 	case PT_CLEARSTEP:
 		CTR2(KTR_PTRACE, "PT_CLEARSTEP: tid %d (pid %d)", td2->td_tid,
 		    p->p_pid);
 		error = ptrace_clear_single_step(td2);
 		break;
 
 	case PT_SETSTEP:
 		CTR2(KTR_PTRACE, "PT_SETSTEP: tid %d (pid %d)", td2->td_tid,
 		    p->p_pid);
 		error = ptrace_single_step(td2);
 		break;
 
 	case PT_SUSPEND:
 		CTR2(KTR_PTRACE, "PT_SUSPEND: tid %d (pid %d)", td2->td_tid,
 		    p->p_pid);
 		td2->td_dbgflags |= TDB_SUSPEND;
 		thread_lock(td2);
 		td2->td_flags |= TDF_NEEDSUSPCHK;
 		thread_unlock(td2);
 		break;
 
 	case PT_RESUME:
 		CTR2(KTR_PTRACE, "PT_RESUME: tid %d (pid %d)", td2->td_tid,
 		    p->p_pid);
 		td2->td_dbgflags &= ~TDB_SUSPEND;
 		break;
 
 	case PT_FOLLOW_FORK:
 		CTR3(KTR_PTRACE, "PT_FOLLOW_FORK: pid %d %s -> %s", p->p_pid,
 		    p->p_ptevents & PTRACE_FORK ? "enabled" : "disabled",
 		    data ? "enabled" : "disabled");
 		if (data)
 			p->p_ptevents |= PTRACE_FORK;
 		else
 			p->p_ptevents &= ~PTRACE_FORK;
 		break;
 
 	case PT_LWP_EVENTS:
 		CTR3(KTR_PTRACE, "PT_LWP_EVENTS: pid %d %s -> %s", p->p_pid,
 		    p->p_ptevents & PTRACE_LWP ? "enabled" : "disabled",
 		    data ? "enabled" : "disabled");
 		if (data)
 			p->p_ptevents |= PTRACE_LWP;
 		else
 			p->p_ptevents &= ~PTRACE_LWP;
 		break;
 
 	case PT_GET_EVENT_MASK:
 		if (data != sizeof(p->p_ptevents)) {
 			error = EINVAL;
 			break;
 		}
 		CTR2(KTR_PTRACE, "PT_GET_EVENT_MASK: pid %d mask %#x", p->p_pid,
 		    p->p_ptevents);
 		*(int *)addr = p->p_ptevents;
 		break;
 
 	case PT_SET_EVENT_MASK:
 		if (data != sizeof(p->p_ptevents)) {
 			error = EINVAL;
 			break;
 		}
 		tmp = *(int *)addr;
 		if ((tmp & ~(PTRACE_EXEC | PTRACE_SCE | PTRACE_SCX |
 		    PTRACE_FORK | PTRACE_LWP | PTRACE_VFORK)) != 0) {
 			error = EINVAL;
 			break;
 		}
 		CTR3(KTR_PTRACE, "PT_SET_EVENT_MASK: pid %d mask %#x -> %#x",
 		    p->p_pid, p->p_ptevents, tmp);
 		p->p_ptevents = tmp;
 		break;
 
 	case PT_GET_SC_ARGS:
 		CTR1(KTR_PTRACE, "PT_GET_SC_ARGS: pid %d", p->p_pid);
 		if ((td2->td_dbgflags & (TDB_SCE | TDB_SCX)) == 0
 #ifdef COMPAT_FREEBSD32
 		    || (wrap32 && !safe)
 #endif
 		    ) {
 			error = EINVAL;
 			break;
 		}
 		bzero(addr, sizeof(td2->td_sa.args));
 		bcopy(td2->td_sa.args, addr, td2->td_sa.callp->sy_narg *
 		    sizeof(register_t));
 		break;
 
 	case PT_GET_SC_RET:
 		if ((td2->td_dbgflags & (TDB_SCX)) == 0
 #ifdef COMPAT_FREEBSD32
 		    || (wrap32 && !safe)
 #endif
 		    ) {
 			error = EINVAL;
 			break;
 		}
 		psr = addr;
 		bzero(psr, sizeof(*psr));
 		psr->sr_error = td2->td_errno;
 		if (psr->sr_error == 0) {
 			psr->sr_retval[0] = td2->td_retval[0];
 			psr->sr_retval[1] = td2->td_retval[1];
 		}
 		CTR4(KTR_PTRACE,
 		    "PT_GET_SC_RET: pid %d error %d retval %#lx,%#lx",
 		    p->p_pid, psr->sr_error, psr->sr_retval[0],
 		    psr->sr_retval[1]);
 		break;
 
 	case PT_STEP:
 	case PT_CONTINUE:
 	case PT_TO_SCE:
 	case PT_TO_SCX:
 	case PT_SYSCALL:
 	case PT_DETACH:
 		/* Zero means do not send any signal */
 		if (data < 0 || data > _SIG_MAXSIG) {
 			error = EINVAL;
 			break;
 		}
 
 		switch (req) {
 		case PT_STEP:
 			CTR3(KTR_PTRACE, "PT_STEP: tid %d (pid %d), sig = %d",
 			    td2->td_tid, p->p_pid, data);
 			error = ptrace_single_step(td2);
 			if (error)
 				goto out;
 			break;
 		case PT_CONTINUE:
 		case PT_TO_SCE:
 		case PT_TO_SCX:
 		case PT_SYSCALL:
 			if (addr != (void *)1) {
 				error = ptrace_set_pc(td2,
 				    (u_long)(uintfptr_t)addr);
 				if (error)
 					goto out;
 			}
 			switch (req) {
 			case PT_TO_SCE:
 				p->p_ptevents |= PTRACE_SCE;
 				CTR4(KTR_PTRACE,
 		    "PT_TO_SCE: pid %d, events = %#x, PC = %#lx, sig = %d",
 				    p->p_pid, p->p_ptevents,
 				    (u_long)(uintfptr_t)addr, data);
 				break;
 			case PT_TO_SCX:
 				p->p_ptevents |= PTRACE_SCX;
 				CTR4(KTR_PTRACE,
 		    "PT_TO_SCX: pid %d, events = %#x, PC = %#lx, sig = %d",
 				    p->p_pid, p->p_ptevents,
 				    (u_long)(uintfptr_t)addr, data);
 				break;
 			case PT_SYSCALL:
 				p->p_ptevents |= PTRACE_SYSCALL;
 				CTR4(KTR_PTRACE,
 		    "PT_SYSCALL: pid %d, events = %#x, PC = %#lx, sig = %d",
 				    p->p_pid, p->p_ptevents,
 				    (u_long)(uintfptr_t)addr, data);
 				break;
 			case PT_CONTINUE:
 				CTR3(KTR_PTRACE,
 				    "PT_CONTINUE: pid %d, PC = %#lx, sig = %d",
 				    p->p_pid, (u_long)(uintfptr_t)addr, data);
 				break;
 			}
 			break;
 		case PT_DETACH:
 			/*
 			 * Clear P_TRACED before reparenting
 			 * a detached process back to its original
 			 * parent.  Otherwise the debugee will be set
 			 * as an orphan of the debugger.
 			 */
 			p->p_flag &= ~(P_TRACED | P_WAITED);
 
 			/*
 			 * Reset the process parent.
 			 */
 			if (p->p_oppid != p->p_pptr->p_pid) {
 				PROC_LOCK(p->p_pptr);
 				sigqueue_take(p->p_ksi);
 				PROC_UNLOCK(p->p_pptr);
 
 				pp = proc_realparent(p);
 				proc_reparent(p, pp, false);
 				if (pp == initproc)
 					p->p_sigparent = SIGCHLD;
 				CTR3(KTR_PTRACE,
 			    "PT_DETACH: pid %d reparented to pid %d, sig %d",
 				    p->p_pid, pp->p_pid, data);
 			} else {
 				CTR2(KTR_PTRACE, "PT_DETACH: pid %d, sig %d",
 				    p->p_pid, data);
 			}
 
 			p->p_ptevents = 0;
 			FOREACH_THREAD_IN_PROC(p, td3) {
 				if ((td3->td_dbgflags & TDB_FSTP) != 0) {
 					sigqueue_delete(&td3->td_sigqueue,
 					    SIGSTOP);
 				}
 				td3->td_dbgflags &= ~(TDB_XSIG | TDB_FSTP |
 				    TDB_SUSPEND);
 			}
 
 			if ((p->p_flag2 & P2_PTRACE_FSTP) != 0) {
 				sigqueue_delete(&p->p_sigqueue, SIGSTOP);
 				p->p_flag2 &= ~P2_PTRACE_FSTP;
 			}
 
 			/* should we send SIGCHLD? */
 			/* childproc_continued(p); */
 			break;
 		}
 
 		sx_xunlock(&proctree_lock);
 		proctree_locked = false;
 
 	sendsig:
 		MPASS(!proctree_locked);
 
 		/*
 		 * Clear the pending event for the thread that just
 		 * reported its event (p_xthread).  This may not be
 		 * the thread passed to PT_CONTINUE, PT_STEP, etc. if
 		 * the debugger is resuming a different thread.
 		 *
 		 * Deliver any pending signal via the reporting thread.
 		 */
 		MPASS(p->p_xthread != NULL);
 		p->p_xthread->td_dbgflags &= ~TDB_XSIG;
 		p->p_xthread->td_xsig = data;
 		p->p_xthread = NULL;
 		p->p_xsig = data;
 
 		/*
 		 * P_WKILLED is insurance that a PT_KILL/SIGKILL
 		 * always works immediately, even if another thread is
 		 * unsuspended first and attempts to handle a
 		 * different signal or if the POSIX.1b style signal
 		 * queue cannot accommodate any new signals.
 		 */
 		if (data == SIGKILL)
 			proc_wkilled(p);
 
 		/*
 		 * Unsuspend all threads.  To leave a thread
 		 * suspended, use PT_SUSPEND to suspend it before
 		 * continuing the process.
 		 */
 		ptrace_unsuspend(p);
 		break;
 
 	case PT_WRITE_I:
 	case PT_WRITE_D:
 		td2->td_dbgflags |= TDB_USERWR;
 		PROC_UNLOCK(p);
 		error = 0;
 		if (proc_writemem(td, p, (off_t)(uintptr_t)addr, &data,
 		    sizeof(int)) != sizeof(int))
 			error = ENOMEM;
 		else
 			CTR3(KTR_PTRACE, "PT_WRITE: pid %d: %p <= %#x",
 			    p->p_pid, addr, data);
 		PROC_LOCK(p);
 		break;
 
 	case PT_READ_I:
 	case PT_READ_D:
 		PROC_UNLOCK(p);
 		error = tmp = 0;
 		if (proc_readmem(td, p, (off_t)(uintptr_t)addr, &tmp,
 		    sizeof(int)) != sizeof(int))
 			error = ENOMEM;
 		else
 			CTR3(KTR_PTRACE, "PT_READ: pid %d: %p >= %#x",
 			    p->p_pid, addr, tmp);
 		td->td_retval[0] = tmp;
 		PROC_LOCK(p);
 		break;
 
 	case PT_IO:
 		piod = addr;
 		iov.iov_base = piod->piod_addr;
 		iov.iov_len = piod->piod_len;
 		uio.uio_offset = (off_t)(uintptr_t)piod->piod_offs;
 		uio.uio_resid = piod->piod_len;
 		uio.uio_iov = &iov;
 		uio.uio_iovcnt = 1;
 		uio.uio_segflg = UIO_USERSPACE;
 		uio.uio_td = td;
 		switch (piod->piod_op) {
 		case PIOD_READ_D:
 		case PIOD_READ_I:
 			CTR3(KTR_PTRACE, "PT_IO: pid %d: READ (%p, %#x)",
 			    p->p_pid, (uintptr_t)uio.uio_offset, uio.uio_resid);
 			uio.uio_rw = UIO_READ;
 			break;
 		case PIOD_WRITE_D:
 		case PIOD_WRITE_I:
 			CTR3(KTR_PTRACE, "PT_IO: pid %d: WRITE (%p, %#x)",
 			    p->p_pid, (uintptr_t)uio.uio_offset, uio.uio_resid);
 			td2->td_dbgflags |= TDB_USERWR;
 			uio.uio_rw = UIO_WRITE;
 			break;
 		default:
 			error = EINVAL;
 			goto out;
 		}
 		PROC_UNLOCK(p);
 		error = proc_rwmem(p, &uio);
 		piod->piod_len -= uio.uio_resid;
 		PROC_LOCK(p);
 		break;
 
 	case PT_KILL:
 		CTR1(KTR_PTRACE, "PT_KILL: pid %d", p->p_pid);
 		data = SIGKILL;
 		goto sendsig;	/* in PT_CONTINUE above */
 
 	case PT_SETREGS:
 		CTR2(KTR_PTRACE, "PT_SETREGS: tid %d (pid %d)", td2->td_tid,
 		    p->p_pid);
 		td2->td_dbgflags |= TDB_USERWR;
 		error = PROC_WRITE(regs, td2, addr);
 		break;
 
 	case PT_GETREGS:
 		CTR2(KTR_PTRACE, "PT_GETREGS: tid %d (pid %d)", td2->td_tid,
 		    p->p_pid);
 		error = PROC_READ(regs, td2, addr);
 		break;
 
 	case PT_SETFPREGS:
 		CTR2(KTR_PTRACE, "PT_SETFPREGS: tid %d (pid %d)", td2->td_tid,
 		    p->p_pid);
 		td2->td_dbgflags |= TDB_USERWR;
 		error = PROC_WRITE(fpregs, td2, addr);
 		break;
 
 	case PT_GETFPREGS:
 		CTR2(KTR_PTRACE, "PT_GETFPREGS: tid %d (pid %d)", td2->td_tid,
 		    p->p_pid);
 		error = PROC_READ(fpregs, td2, addr);
 		break;
 
 	case PT_SETDBREGS:
 		CTR2(KTR_PTRACE, "PT_SETDBREGS: tid %d (pid %d)", td2->td_tid,
 		    p->p_pid);
 		td2->td_dbgflags |= TDB_USERWR;
 		error = PROC_WRITE(dbregs, td2, addr);
 		break;
 
 	case PT_GETDBREGS:
 		CTR2(KTR_PTRACE, "PT_GETDBREGS: tid %d (pid %d)", td2->td_tid,
 		    p->p_pid);
 		error = PROC_READ(dbregs, td2, addr);
 		break;
 
 	case PT_LWPINFO:
 		if (data <= 0 || data > sizeof(*pl)) {
 			error = EINVAL;
 			break;
 		}
 		pl = addr;
 		bzero(pl, sizeof(*pl));
 		pl->pl_lwpid = td2->td_tid;
 		pl->pl_event = PL_EVENT_NONE;
 		pl->pl_flags = 0;
 		if (td2->td_dbgflags & TDB_XSIG) {
 			pl->pl_event = PL_EVENT_SIGNAL;
 			if (td2->td_si.si_signo != 0 &&
 			    data >= offsetof(struct ptrace_lwpinfo, pl_siginfo)
 			    + sizeof(pl->pl_siginfo)){
 				pl->pl_flags |= PL_FLAG_SI;
 				pl->pl_siginfo = td2->td_si;
 			}
 		}
 		if (td2->td_dbgflags & TDB_SCE)
 			pl->pl_flags |= PL_FLAG_SCE;
 		else if (td2->td_dbgflags & TDB_SCX)
 			pl->pl_flags |= PL_FLAG_SCX;
 		if (td2->td_dbgflags & TDB_EXEC)
 			pl->pl_flags |= PL_FLAG_EXEC;
 		if (td2->td_dbgflags & TDB_FORK) {
 			pl->pl_flags |= PL_FLAG_FORKED;
 			pl->pl_child_pid = td2->td_dbg_forked;
 			if (td2->td_dbgflags & TDB_VFORK)
 				pl->pl_flags |= PL_FLAG_VFORKED;
 		} else if ((td2->td_dbgflags & (TDB_SCX | TDB_VFORK)) ==
 		    TDB_VFORK)
 			pl->pl_flags |= PL_FLAG_VFORK_DONE;
 		if (td2->td_dbgflags & TDB_CHILD)
 			pl->pl_flags |= PL_FLAG_CHILD;
 		if (td2->td_dbgflags & TDB_BORN)
 			pl->pl_flags |= PL_FLAG_BORN;
 		if (td2->td_dbgflags & TDB_EXIT)
 			pl->pl_flags |= PL_FLAG_EXITED;
 		pl->pl_sigmask = td2->td_sigmask;
 		pl->pl_siglist = td2->td_siglist;
 		strcpy(pl->pl_tdname, td2->td_name);
 		if ((td2->td_dbgflags & (TDB_SCE | TDB_SCX)) != 0) {
 			pl->pl_syscall_code = td2->td_sa.code;
 			pl->pl_syscall_narg = td2->td_sa.callp->sy_narg;
 		} else {
 			pl->pl_syscall_code = 0;
 			pl->pl_syscall_narg = 0;
 		}
 		CTR6(KTR_PTRACE,
     "PT_LWPINFO: tid %d (pid %d) event %d flags %#x child pid %d syscall %d",
 		    td2->td_tid, p->p_pid, pl->pl_event, pl->pl_flags,
 		    pl->pl_child_pid, pl->pl_syscall_code);
 		break;
 
 	case PT_GETNUMLWPS:
 		CTR2(KTR_PTRACE, "PT_GETNUMLWPS: pid %d: %d threads", p->p_pid,
 		    p->p_numthreads);
 		td->td_retval[0] = p->p_numthreads;
 		break;
 
 	case PT_GETLWPLIST:
 		CTR3(KTR_PTRACE, "PT_GETLWPLIST: pid %d: data %d, actual %d",
 		    p->p_pid, data, p->p_numthreads);
 		if (data <= 0) {
 			error = EINVAL;
 			break;
 		}
 		num = imin(p->p_numthreads, data);
 		PROC_UNLOCK(p);
 		buf = malloc(num * sizeof(lwpid_t), M_TEMP, M_WAITOK);
 		tmp = 0;
 		PROC_LOCK(p);
 		FOREACH_THREAD_IN_PROC(p, td2) {
 			if (tmp >= num)
 				break;
 			buf[tmp++] = td2->td_tid;
 		}
 		PROC_UNLOCK(p);
 		error = copyout(buf, addr, tmp * sizeof(lwpid_t));
 		free(buf, M_TEMP);
 		if (!error)
 			td->td_retval[0] = tmp;
 		PROC_LOCK(p);
 		break;
 
 	case PT_VM_TIMESTAMP:
 		CTR2(KTR_PTRACE, "PT_VM_TIMESTAMP: pid %d: timestamp %d",
 		    p->p_pid, p->p_vmspace->vm_map.timestamp);
 		td->td_retval[0] = p->p_vmspace->vm_map.timestamp;
 		break;
 
 	case PT_VM_ENTRY:
 		PROC_UNLOCK(p);
 		error = ptrace_vm_entry(td, p, addr);
 		PROC_LOCK(p);
 		break;
 
 	case PT_COREDUMP:
 		pc = addr;
 		CTR2(KTR_PTRACE, "PT_COREDUMP: pid %d, fd %d",
 		    p->p_pid, pc->pc_fd);
 
 		if ((pc->pc_flags & ~(PC_COMPRESS | PC_ALL)) != 0) {
 			error = EINVAL;
 			break;
 		}
 		PROC_UNLOCK(p);
 
 		tcq = malloc(sizeof(*tcq), M_TEMP, M_WAITOK | M_ZERO);
 		fp = NULL;
 		error = fget_write(td, pc->pc_fd, &cap_write_rights, &fp);
 		if (error != 0)
 			goto coredump_cleanup_nofp;
 		if (fp->f_type != DTYPE_VNODE || fp->f_vnode->v_type != VREG) {
 			error = EPIPE;
 			goto coredump_cleanup;
 		}
 
 		PROC_LOCK(p);
 		error = proc_can_ptrace(td, p);
 		if (error != 0)
 			goto coredump_cleanup_locked;
 
 		td2 = ptrace_sel_coredump_thread(p);
 		if (td2 == NULL) {
 			error = EBUSY;
 			goto coredump_cleanup_locked;
 		}
 		KASSERT((td2->td_dbgflags & TDB_COREDUMPRQ) == 0,
 		    ("proc %d tid %d req coredump", p->p_pid, td2->td_tid));
 
 		tcq->tc_vp = fp->f_vnode;
 		tcq->tc_limit = pc->pc_limit == 0 ? OFF_MAX : pc->pc_limit;
 		tcq->tc_flags = SVC_PT_COREDUMP;
 		if ((pc->pc_flags & PC_COMPRESS) == 0)
 			tcq->tc_flags |= SVC_NOCOMPRESS;
 		if ((pc->pc_flags & PC_ALL) != 0)
 			tcq->tc_flags |= SVC_ALL;
 		td2->td_coredump = tcq;
 		td2->td_dbgflags |= TDB_COREDUMPRQ;
 		thread_run_flash(td2);
 		while ((td2->td_dbgflags & TDB_COREDUMPRQ) != 0)
 			msleep(p, &p->p_mtx, PPAUSE, "crdmp", 0);
 		error = tcq->tc_error;
 coredump_cleanup_locked:
 		PROC_UNLOCK(p);
 coredump_cleanup:
 		fdrop(fp, td);
 coredump_cleanup_nofp:
 		free(tcq, M_TEMP);
 		PROC_LOCK(p);
 		break;
 
 	default:
 #ifdef __HAVE_PTRACE_MACHDEP
 		if (req >= PT_FIRSTMACH) {
 			PROC_UNLOCK(p);
 			error = cpu_ptrace(td2, req, addr, data);
 			PROC_LOCK(p);
 		} else
 #endif
 			/* Unknown request. */
 			error = EINVAL;
 		break;
 	}
 out:
 	/* Drop our hold on this process now that the request has completed. */
 	_PRELE(p);
 fail:
 	if (p2_req_set) {
 		if ((p->p_flag2 & P2_PTRACEREQ) != 0)
 			wakeup(&p->p_flag2);
 		p->p_flag2 &= ~P2_PTRACEREQ;
 	}
 	PROC_UNLOCK(p);
 	if (proctree_locked)
 		sx_xunlock(&proctree_lock);
 	return (error);
 }
 #undef PROC_READ
 #undef PROC_WRITE
diff --git a/sys/mips/include/md_var.h b/sys/mips/include/md_var.h
index 8462b1beb22e..b322c4d6323a 100644
--- a/sys/mips/include/md_var.h
+++ b/sys/mips/include/md_var.h
@@ -1,87 +1,87 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1995 Bruce D. Evans.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the author nor the names of contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from: src/sys/i386/include/md_var.h,v 1.35 2000/02/20 20:51:23 bsd
  *	JNPR: md_var.h,v 1.4 2006/10/16 12:30:34 katta
  * $FreeBSD$
  */
 
 #ifndef _MACHINE_MD_VAR_H_
 #define	_MACHINE_MD_VAR_H_
 
-#include <machine/reg.h>
+#include <machine/regnum.h>
 
 /*
  * Miscellaneous machine-dependent declarations.
  */
 extern	long	Maxmem;
 extern	char	cpu_board[];
 extern	char	cpu_model[];
 extern	char	sigcode[];
 extern	int	szsigcode;
 #if defined(__mips_n32) || defined(__mips_n64)
 extern	char	sigcode32[];
 extern	int	szsigcode32;
 #endif
 
 extern vm_offset_t kstack0;
 extern vm_offset_t kernel_kseg0_end;
 
 uint32_t MipsFPID(void);
 void	MipsSaveCurFPState(struct thread *);
 void	fork_trampoline(void);
 uintptr_t MipsEmulateBranch(struct trapframe *, uintptr_t, int, uintptr_t);
 void MipsSwitchFPState(struct thread *, struct trapframe *);
 int	is_cacheable_mem(vm_paddr_t addr);
 void	mips_wait(void);
 
 #define	MIPS_DEBUG   0
 
 #if MIPS_DEBUG
 #define	MIPS_DEBUG_PRINT(fmt, args...)	printf("%s: " fmt "\n" , __FUNCTION__ , ## args)
 #else
 #define	MIPS_DEBUG_PRINT(fmt, args...)
 #endif
 
 void	mips_vector_init(void);
 void	mips_cpu_init(void);
 void	mips_pcpu0_init(void);
 void	mips_proc0_init(void);
 void	mips_postboot_fixup(void);
 void	cpu_identify(void);
 void	cpu_switch_set_userlocal(void) __asm(__STRING(cpu_switch_set_userlocal));
 
 extern int busdma_swi_pending;
 void	busdma_swi(void);
 
 struct	dumperinfo;
 int	minidumpsys(struct dumperinfo *);
 
 #endif /* !_MACHINE_MD_VAR_H_ */
diff --git a/sys/mips/include/reg.h b/sys/mips/include/reg.h
index a1f5aa4da23c..440b791bffc7 100644
--- a/sys/mips/include/reg.h
+++ b/sys/mips/include/reg.h
@@ -1,109 +1,93 @@
 /*	$OpenBSD: reg.h,v 1.1 1998/01/28 11:14:53 pefo Exp $	*/
 
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1988 University of Utah.
  * Copyright (c) 1992, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * the Systems Programming Group of the University of Utah Computer
  * Science Department and Ralph Campbell.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from: Utah Hdr: reg.h 1.1 90/07/09
  *	@(#)reg.h	8.2 (Berkeley) 1/11/94
  *	JNPR: reg.h,v 1.6 2006/09/15 12:52:34 katta
  * $FreeBSD$
  */
 
 #ifndef _MACHINE_REG_H_
 #define	_MACHINE_REG_H_
 
 /*
  * Location of the users' stored registers relative to ZERO.
  * must be visible to assembly code.
  */
 #include <machine/regnum.h>
 
 /*
  * Register set accessible via /proc/$pid/reg
  */
 struct reg {
 	register_t r_regs[NUMSAVEREGS];	/* numbered as above */
 };
 
 struct fpreg {
 	f_register_t r_regs[NUMFPREGS];
 };
 
 /*
  * Placeholder.
  */
 struct dbreg {
 	unsigned long junk;
 };
 
 #ifdef __LP64__
 /* Must match struct trapframe */
 struct reg32 {
 	uint32_t r_regs[NUMSAVEREGS];
 };
 
 struct fpreg32 {
 	int32_t r_regs[NUMFPREGS];
 };
 
 struct dbreg32 {
 	uint32_t junk;
 };
 
 #define __HAVE_REG32
 #endif
 
-#ifdef _KERNEL
-int	fill_fpregs(struct thread *, struct fpreg *);
-int	fill_regs(struct thread *, struct reg *);
-int	set_fpregs(struct thread *, struct fpreg *);
-int	set_regs(struct thread *, struct reg *);
-int	fill_dbregs(struct thread *, struct dbreg *);
-int	set_dbregs(struct thread *, struct dbreg *);
-#endif
-
 #ifdef COMPAT_FREEBSD32
-struct image_params;
-
-int	fill_regs32(struct thread *, struct reg32 *);
-int	set_regs32(struct thread *, struct reg32 *);
-int	fill_fpregs32(struct thread *, struct fpreg32 *);
-int	set_fpregs32(struct thread *, struct fpreg32 *);
-
 #define	fill_dbregs32(td, reg)	0
 #define	set_dbregs32(td, reg)	0
 #endif
 
 #endif /* !_MACHINE_REG_H_ */
diff --git a/sys/mips/mips/freebsd32_machdep.c b/sys/mips/mips/freebsd32_machdep.c
index e30753e7a59c..eb5f82c572ce 100644
--- a/sys/mips/mips/freebsd32_machdep.c
+++ b/sys/mips/mips/freebsd32_machdep.c
@@ -1,488 +1,488 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2012 Juli Mallett <jmallett@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 /*
  * Based on nwhitehorn's COMPAT_FREEBSD32 support code for PowerPC64.
  */
 
 #define __ELF_WORD_SIZE 32
 
 #include <sys/types.h>
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/systm.h>
 #include <sys/sysent.h>
 #include <sys/exec.h>
 #include <sys/imgact.h>
 #include <sys/ktr.h>
 #include <sys/malloc.h>
 #include <sys/proc.h>
 #include <sys/namei.h>
 #include <sys/fcntl.h>
 #include <sys/sysent.h>
 #include <sys/imgact_elf.h>
+#include <sys/reg.h>
 #include <sys/syscall.h>
 #include <sys/syscallsubr.h>
 #include <sys/sysproto.h>
 #include <sys/signalvar.h>
 #include <sys/vnode.h>
 #include <sys/linker.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 
 #include <machine/cpuinfo.h>
 #include <machine/md_var.h>
-#include <machine/reg.h>
 #include <machine/sigframe.h>
 #include <machine/sysarch.h>
 #include <machine/tls.h>
 
 #include <compat/freebsd32/freebsd32_signal.h>
 #include <compat/freebsd32/freebsd32_util.h>
 #include <compat/freebsd32/freebsd32_proto.h>
 
 static int get_mcontext32(struct thread *, mcontext32_t *, int);
 static int set_mcontext32(struct thread *, mcontext32_t *);
 static void freebsd32_sendsig(sig_t, ksiginfo_t *, sigset_t *);
 
 extern const char *freebsd32_syscallnames[];
 
 struct sysentvec elf32_freebsd_sysvec = {
 	.sv_size	= SYS_MAXSYSCALL,
 	.sv_table	= freebsd32_sysent,
 	.sv_transtrap	= NULL,
 	.sv_fixup	= __elfN(freebsd_fixup),
 	.sv_sendsig	= freebsd32_sendsig,
 	.sv_sigcode	= sigcode32,
 	.sv_szsigcode	= &szsigcode32,
 	.sv_name	= "FreeBSD ELF32",
 	.sv_coredump	= __elfN(coredump),
 	.sv_elf_core_osabi = ELFOSABI_FREEBSD,
 	.sv_elf_core_abi_vendor = FREEBSD_ABI_VENDOR,
 	.sv_elf_core_prepare_notes = __elfN(prepare_notes),
 	.sv_imgact_try	= NULL,
 	.sv_minsigstksz	= MINSIGSTKSZ,
 	.sv_minuser	= VM_MIN_ADDRESS,
 	.sv_maxuser	= ((vm_offset_t)0x80000000),
 	.sv_usrstack	= FREEBSD32_USRSTACK,
 	.sv_psstrings	= FREEBSD32_PS_STRINGS,
 	.sv_stackprot	= VM_PROT_ALL,
 	.sv_copyout_auxargs = __elfN(freebsd_copyout_auxargs),
 	.sv_copyout_strings = freebsd32_copyout_strings,
 	.sv_setregs	= exec_setregs,
 	.sv_fixlimit	= NULL,
 	.sv_maxssiz	= NULL,
 	.sv_flags	= SV_ABI_FREEBSD | SV_ILP32 | SV_RNG_SEED_VER,
 	.sv_set_syscall_retval = cpu_set_syscall_retval,
 	.sv_fetch_syscall_args = cpu_fetch_syscall_args,
 	.sv_syscallnames = freebsd32_syscallnames,
 	.sv_schedtail	= NULL,
 	.sv_thread_detach = NULL,
 	.sv_trap	= NULL,
 };
 INIT_SYSENTVEC(elf32_sysvec, &elf32_freebsd_sysvec);
 
 static Elf32_Brandinfo freebsd_brand_info = {
 	.brand		= ELFOSABI_FREEBSD,
 	.machine	= EM_MIPS,
 	.compat_3_brand	= "FreeBSD",
 	.emul_path	= NULL,
 	.interp_path	= "/libexec/ld-elf.so.1",
 	.sysvec		= &elf32_freebsd_sysvec,
 	.interp_newpath	= "/libexec/ld-elf32.so.1",
 	.brand_note	= &elf32_freebsd_brandnote,
 	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE
 };
 
 SYSINIT(elf32, SI_SUB_EXEC, SI_ORDER_FIRST,
     (sysinit_cfunc_t) elf32_insert_brand_entry,
     &freebsd_brand_info);
 
 int
 set_regs32(struct thread *td, struct reg32 *regs)
 {
 	struct reg r;
 	unsigned i;
 
 	for (i = 0; i < NUMSAVEREGS; i++)
 		r.r_regs[i] = regs->r_regs[i];
 
 	return (set_regs(td, &r));
 }
 
 int
 fill_regs32(struct thread *td, struct reg32 *regs)
 {
 	struct reg r;
 	unsigned i;
 	int error;
 
 	error = fill_regs(td, &r);
 	if (error != 0)
 		return (error);
 
 	for (i = 0; i < NUMSAVEREGS; i++)
 		regs->r_regs[i] = r.r_regs[i];
 
 	return (0);
 }
 
 int
 set_fpregs32(struct thread *td, struct fpreg32 *fpregs)
 {
 	struct fpreg fp;
 	unsigned i;
 
 	for (i = 0; i < NUMFPREGS; i++)
 		fp.r_regs[i] = fpregs->r_regs[i];
 
 	return (set_fpregs(td, &fp));
 }
 
 int
 fill_fpregs32(struct thread *td, struct fpreg32 *fpregs)
 {
 	struct fpreg fp;
 	unsigned i;
 	int error;
 
 	error = fill_fpregs(td, &fp);
 	if (error != 0)
 		return (error);
 
 	for (i = 0; i < NUMFPREGS; i++)
 		fpregs->r_regs[i] = fp.r_regs[i];
 
 	return (0);
 }
 
 static int
 get_mcontext32(struct thread *td, mcontext32_t *mcp, int flags)
 {
 	mcontext_t mcp64;
 	unsigned i;
 	int error;
 
 	error = get_mcontext(td, &mcp64, flags);
 	if (error != 0)
 		return (error);
 
 	mcp->mc_onstack = mcp64.mc_onstack;
 	mcp->mc_pc = mcp64.mc_pc;
 	for (i = 0; i < 32; i++)
 		mcp->mc_regs[i] = mcp64.mc_regs[i];
 	mcp->sr = mcp64.sr;
 	mcp->mullo = mcp64.mullo;
 	mcp->mulhi = mcp64.mulhi;
 	mcp->mc_fpused = mcp64.mc_fpused;
 	for (i = 0; i < 33; i++)
 		mcp->mc_fpregs[i] = mcp64.mc_fpregs[i];
 	mcp->mc_fpc_eir = mcp64.mc_fpc_eir;
 	mcp->mc_tls = (int32_t)(intptr_t)mcp64.mc_tls;
 
 	return (0);
 }
 
 static int
 set_mcontext32(struct thread *td, mcontext32_t *mcp)
 {
 	mcontext_t mcp64;
 	unsigned i;
 
 	mcp64.mc_onstack = mcp->mc_onstack;
 	mcp64.mc_pc = mcp->mc_pc;
 	for (i = 0; i < 32; i++)
 		mcp64.mc_regs[i] = mcp->mc_regs[i];
 	mcp64.sr = mcp->sr;
 	mcp64.mullo = mcp->mullo;
 	mcp64.mulhi = mcp->mulhi;
 	mcp64.mc_fpused = mcp->mc_fpused;
 	for (i = 0; i < 33; i++)
 		mcp64.mc_fpregs[i] = mcp->mc_fpregs[i];
 	mcp64.mc_fpc_eir = mcp->mc_fpc_eir;
 	mcp64.mc_tls = (void *)(intptr_t)mcp->mc_tls;
 
 	return (set_mcontext(td, &mcp64));
 }
 
 int
 freebsd32_sigreturn(struct thread *td, struct freebsd32_sigreturn_args *uap)
 {
 	ucontext32_t uc;
 	int error;
 
 	CTR2(KTR_SIG, "sigreturn: td=%p ucp=%p", td, uap->sigcntxp);
 
 	if (copyin(uap->sigcntxp, &uc, sizeof(uc)) != 0) {
 		CTR1(KTR_SIG, "sigreturn: efault td=%p", td);
 		return (EFAULT);
 	}
 
 	error = set_mcontext32(td, &uc.uc_mcontext);
 	if (error != 0)
 		return (error);
 
 	kern_sigprocmask(td, SIG_SETMASK, &uc.uc_sigmask, NULL, 0);
 
 #if 0
 	CTR3(KTR_SIG, "sigreturn: return td=%p pc=%#x sp=%#x",
 	     td, uc.uc_mcontext.mc_srr0, uc.uc_mcontext.mc_gpr[1]);
 #endif
 
 	return (EJUSTRETURN);
 }
 
 /*
  * The first two fields of a ucontext_t are the signal mask and the machine
  * context.  The next field is uc_link; we want to avoid destroying the link
  * when copying out contexts.
  */
 #define	UC32_COPY_SIZE	offsetof(ucontext32_t, uc_link)
 
 int
 freebsd32_getcontext(struct thread *td, struct freebsd32_getcontext_args *uap)
 {
 	ucontext32_t uc;
 	int ret;
 
 	if (uap->ucp == NULL)
 		ret = EINVAL;
 	else {
 		bzero(&uc, sizeof(uc));
 		get_mcontext32(td, &uc.uc_mcontext, GET_MC_CLEAR_RET);
 		PROC_LOCK(td->td_proc);
 		uc.uc_sigmask = td->td_sigmask;
 		PROC_UNLOCK(td->td_proc);
 		ret = copyout(&uc, uap->ucp, UC32_COPY_SIZE);
 	}
 	return (ret);
 }
 
 int
 freebsd32_setcontext(struct thread *td, struct freebsd32_setcontext_args *uap)
 {
 	ucontext32_t uc;
 	int ret;	
 
 	if (uap->ucp == NULL)
 		ret = EINVAL;
 	else {
 		ret = copyin(uap->ucp, &uc, UC32_COPY_SIZE);
 		if (ret == 0) {
 			ret = set_mcontext32(td, &uc.uc_mcontext);
 			if (ret == 0) {
 				kern_sigprocmask(td, SIG_SETMASK,
 				    &uc.uc_sigmask, NULL, 0);
 			}
 		}
 	}
 	return (ret == 0 ? EJUSTRETURN : ret);
 }
 
 int
 freebsd32_swapcontext(struct thread *td, struct freebsd32_swapcontext_args *uap)
 {
 	ucontext32_t uc;
 	int ret;
 
 	if (uap->oucp == NULL || uap->ucp == NULL)
 		ret = EINVAL;
 	else {
 		bzero(&uc, sizeof(uc));
 		get_mcontext32(td, &uc.uc_mcontext, GET_MC_CLEAR_RET);
 		PROC_LOCK(td->td_proc);
 		uc.uc_sigmask = td->td_sigmask;
 		PROC_UNLOCK(td->td_proc);
 		ret = copyout(&uc, uap->oucp, UC32_COPY_SIZE);
 		if (ret == 0) {
 			ret = copyin(uap->ucp, &uc, UC32_COPY_SIZE);
 			if (ret == 0) {
 				ret = set_mcontext32(td, &uc.uc_mcontext);
 				if (ret == 0) {
 					kern_sigprocmask(td, SIG_SETMASK,
 					    &uc.uc_sigmask, NULL, 0);
 				}
 			}
 		}
 	}
 	return (ret == 0 ? EJUSTRETURN : ret);
 }
 
 #define	UCONTEXT_MAGIC	0xACEDBADE
 
 /*
  * Send an interrupt to process.
  *
  * Stack is set up to allow sigcode stored
  * at top to call routine, followed by kcall
  * to sigreturn routine below.	After sigreturn
  * resets the signal mask, the stack, and the
  * frame pointer, it returns to the user
  * specified pc, psl.
  */
 static void
 freebsd32_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
 {
 	struct proc *p;
 	struct thread *td;
 	struct fpreg32 fpregs;
 	struct reg32 regs;
 	struct sigacts *psp;
 	struct sigframe32 sf, *sfp;
 	int sig;
 	int oonstack;
 	unsigned i;
 
 	td = curthread;
 	p = td->td_proc;
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	sig = ksi->ksi_signo;
 	psp = p->p_sigacts;
 	mtx_assert(&psp->ps_mtx, MA_OWNED);
 
 	fill_regs32(td, &regs);
 	oonstack = sigonstack(td->td_frame->sp);
 
 	/* save user context */
 	bzero(&sf, sizeof sf);
 	sf.sf_uc.uc_sigmask = *mask;
 	sf.sf_uc.uc_stack.ss_sp = (int32_t)(intptr_t)td->td_sigstk.ss_sp;
 	sf.sf_uc.uc_stack.ss_size = td->td_sigstk.ss_size;
 	sf.sf_uc.uc_stack.ss_flags = td->td_sigstk.ss_flags;
 	sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0;
 	sf.sf_uc.uc_mcontext.mc_pc = regs.r_regs[PC];
 	sf.sf_uc.uc_mcontext.mullo = regs.r_regs[MULLO];
 	sf.sf_uc.uc_mcontext.mulhi = regs.r_regs[MULHI];
 	sf.sf_uc.uc_mcontext.mc_tls = (int32_t)(intptr_t)td->td_md.md_tls;
 	sf.sf_uc.uc_mcontext.mc_regs[0] = UCONTEXT_MAGIC;  /* magic number */
 	for (i = 1; i < 32; i++)
 		sf.sf_uc.uc_mcontext.mc_regs[i] = regs.r_regs[i];
 	sf.sf_uc.uc_mcontext.mc_fpused = td->td_md.md_flags & MDTD_FPUSED;
 	if (sf.sf_uc.uc_mcontext.mc_fpused) {
 		/* if FPU has current state, save it first */
 		if (td == PCPU_GET(fpcurthread))
 			MipsSaveCurFPState(td);
 		fill_fpregs32(td, &fpregs);
 		for (i = 0; i < 33; i++)
 			sf.sf_uc.uc_mcontext.mc_fpregs[i] = fpregs.r_regs[i];
 	}
 
 	/* Allocate and validate space for the signal handler context. */
 	if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack &&
 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
 		sfp = (struct sigframe32 *)(((uintptr_t)td->td_sigstk.ss_sp +
 		    td->td_sigstk.ss_size - sizeof(struct sigframe32))
 		    & ~(sizeof(__int64_t) - 1));
 	} else
 		sfp = (struct sigframe32 *)((vm_offset_t)(td->td_frame->sp - 
 		    sizeof(struct sigframe32)) & ~(sizeof(__int64_t) - 1));
 
 	/* Build the argument list for the signal handler. */
 	td->td_frame->a0 = sig;
 	td->td_frame->a2 = (register_t)(intptr_t)&sfp->sf_uc;
 	if (SIGISMEMBER(psp->ps_siginfo, sig)) {
 		/* Signal handler installed with SA_SIGINFO. */
 		td->td_frame->a1 = (register_t)(intptr_t)&sfp->sf_si;
 		/* sf.sf_ahu.sf_action = (__siginfohandler_t *)catcher; */
 
 		/* fill siginfo structure */
 		sf.sf_si.si_signo = sig;
 		sf.sf_si.si_code = ksi->ksi_code;
 		sf.sf_si.si_addr = td->td_frame->badvaddr;
 	} else {
 		/* Old FreeBSD-style arguments. */
 		td->td_frame->a1 = ksi->ksi_code;
 		td->td_frame->a3 = td->td_frame->badvaddr;
 		/* sf.sf_ahu.sf_handler = catcher; */
 	}
 
 	mtx_unlock(&psp->ps_mtx);
 	PROC_UNLOCK(p);
 
 	/*
 	 * Copy the sigframe out to the user's stack.
 	 */
 	if (copyout(&sf, sfp, sizeof(struct sigframe32)) != 0) {
 		/*
 		 * Something is wrong with the stack pointer.
 		 * ...Kill the process.
 		 */
 		PROC_LOCK(p);
 		sigexit(td, SIGILL);
 	}
 
 	td->td_frame->pc = (register_t)(intptr_t)catcher;
 	td->td_frame->t9 = (register_t)(intptr_t)catcher;
 	td->td_frame->sp = (register_t)(intptr_t)sfp;
 	/*
 	 * Signal trampoline code is at base of user stack.
 	 */
 	td->td_frame->ra = (register_t)(intptr_t)FREEBSD32_PS_STRINGS - *(p->p_sysent->sv_szsigcode);
 	PROC_LOCK(p);
 	mtx_lock(&psp->ps_mtx);
 }
 
 int
 freebsd32_sysarch(struct thread *td, struct freebsd32_sysarch_args *uap)
 {
 	int error;
 	int32_t tlsbase;
 
 	switch (uap->op) {
 	case MIPS_SET_TLS:
 		td->td_md.md_tls = (void *)(intptr_t)uap->parms;
 
 		/*
 		 * If there is an user local register implementation (ULRI)
 		 * update it as well.  Add the TLS and TCB offsets so the
 		 * value in this register is adjusted like in the case of the
 		 * rdhwr trap() instruction handler.
 		 */
 		if (cpuinfo.userlocal_reg == true) {
 			mips_wr_userlocal((unsigned long)(uap->parms +
 			    td->td_proc->p_md.md_tls_tcb_offset));
 		}
 		return (0);
 	case MIPS_GET_TLS: 
 		tlsbase = (int32_t)(intptr_t)td->td_md.md_tls;
 		error = copyout(&tlsbase, uap->parms, sizeof(tlsbase));
 		return (error);
 	default:
 		break;
 	}
 	return (EINVAL);
 }
 
 void
 elf32_dump_thread(struct thread *td __unused, void *dst __unused,
     size_t *off __unused)
 {
 }
diff --git a/sys/mips/mips/gdb_machdep.c b/sys/mips/mips/gdb_machdep.c
index 130a78f9eb87..4b999613d7f6 100644
--- a/sys/mips/mips/gdb_machdep.c
+++ b/sys/mips/mips/gdb_machdep.c
@@ -1,191 +1,190 @@
 /*	$NetBSD: kgdb_machdep.c,v 1.11 2005/12/24 22:45:35 perry Exp $	*/
 
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD AND BSD-2-Clause-NetBSD
  *
  * Copyright (c) 2004 Marcel Moolenaar
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  *
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 /*-
  * Copyright (c) 1997 The NetBSD Foundation, Inc.
  * All rights reserved.
  *
  * This code is derived from software contributed to The NetBSD Foundation
  * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
  * NASA Ames Research Center.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
 /*
  * Copyright (c) 1996 Matthias Pfaller.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by Matthias Pfaller.
  * 4. The name of the author may not be used to endorse or promote products
  *    derived from this software without specific prior written permission
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  *	JNPR: gdb_machdep.c,v 1.1 2007/08/09 12:25:25 katta
  * $FreeBSD$
  */
 
 #include <sys/cdefs.h>
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kdb.h>
 #include <sys/kernel.h>
 #include <sys/signal.h>
 #include <sys/pcpu.h>
 
 #include <machine/gdb_machdep.h>
 #include <machine/pcb.h>
-#include <machine/reg.h>
 #include <machine/trap.h>
 
 #include <gdb/gdb.h>
 
 void *
 gdb_cpu_getreg(int regnum, size_t *regsz)
 {
 
  	*regsz = gdb_cpu_regsz(regnum);
  	if (kdb_thread == curthread) {
 		register_t *zero_ptr = &kdb_frame->zero;
 		return zero_ptr + regnum;
 	}
 
 	switch (regnum) {
 	/* 
 	 * S0..S7
 	 */
 	case 16:
 	case 17:
 	case 18:
 	case 19:
 	case 20:
 	case 21:
 	case 22:
 	case 23:
  		return (&kdb_thrctx->pcb_context[PCB_REG_S0 + regnum - 16]);
 	case 28: 
 		return (&kdb_thrctx->pcb_context[PCB_REG_GP]);
 	case 29: 
 		return (&kdb_thrctx->pcb_context[PCB_REG_SP]);
 	case 30: 
 		return (&kdb_thrctx->pcb_context[PCB_REG_S8]);
 	case 31: 
 		return (&kdb_thrctx->pcb_context[PCB_REG_RA]);
 	case 37: 
 		return (&kdb_thrctx->pcb_context[PCB_REG_PC]);
 	}
 	return (NULL);
 }
 
 void
 gdb_cpu_setreg(int regnum, void *val)
 {
 	switch (regnum) {
 	case GDB_REG_PC:
 		kdb_thrctx->pcb_context[10] = *(register_t *)val;
 		if (kdb_thread == curthread)
 			kdb_frame->pc = *(register_t *)val;
 	}
 }
 
 int
 gdb_cpu_signal(int entry, int code)
 {
 	switch (entry) {
 	case T_TLB_MOD:
 	case T_TLB_MOD+T_USER:
 	case T_TLB_LD_MISS:
 	case T_TLB_ST_MISS:
 	case T_TLB_LD_MISS+T_USER:
 	case T_TLB_ST_MISS+T_USER:
 	case T_ADDR_ERR_LD:		/* misaligned access */
 	case T_ADDR_ERR_ST:		/* misaligned access */
 	case T_BUS_ERR_LD_ST:		/* BERR asserted to CPU */
 	case T_ADDR_ERR_LD+T_USER:	/* misaligned or kseg access */
 	case T_ADDR_ERR_ST+T_USER:	/* misaligned or kseg access */
 	case T_BUS_ERR_IFETCH+T_USER:	/* BERR asserted to CPU */
 	case T_BUS_ERR_LD_ST+T_USER:	/* BERR asserted to CPU */
 		return (SIGSEGV);
 
 	case T_BREAK:
 	case T_BREAK+T_USER:
 		return (SIGTRAP);
 
 	case T_RES_INST+T_USER:
 	case T_COP_UNUSABLE+T_USER:
 		return (SIGILL);
 
 	case T_FPE+T_USER:
 	case T_OVFLOW+T_USER:
 		return (SIGFPE);
 
 	default:
 		return (SIGEMT);
 	}
 }
diff --git a/sys/mips/mips/machdep.c b/sys/mips/mips/machdep.c
index fd0f83e5df98..ec0f3f31c254 100644
--- a/sys/mips/mips/machdep.c
+++ b/sys/mips/mips/machdep.c
@@ -1,579 +1,580 @@
     /*	$OpenBSD: machdep.c,v 1.33 1998/09/15 10:58:54 pefo Exp $	*/
 /* tracked to 1.38 */
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1988 University of Utah.
  * Copyright (c) 1992, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * the Systems Programming Group of the University of Utah Computer
  * Science Department, The Mach Operating System project at
  * Carnegie-Mellon University and Ralph Campbell.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from: @(#)machdep.c	8.3 (Berkeley) 1/12/94
  *	Id: machdep.c,v 1.33 1998/09/15 10:58:54 pefo Exp
  *	JNPR: machdep.c,v 1.11.2.3 2007/08/29 12:24:49
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_ddb.h"
 #include "opt_md.h"
 
 #include <sys/param.h>
 #include <sys/proc.h>
 #include <sys/systm.h>
 #include <sys/buf.h>
 #include <sys/bus.h>
 #include <sys/conf.h>
 #include <sys/cpu.h>
 #include <sys/kernel.h>
 #include <sys/linker.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/msgbuf.h>
 #include <sys/reboot.h>
+#include <sys/reg.h>
 #include <sys/rwlock.h>
 #include <sys/sched.h>
 #include <sys/sysctl.h>
 #include <sys/sysproto.h>
 #include <sys/vmmeter.h>
 
 #include <vm/vm.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
 #include <vm/vm_phys.h>
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 #include <vm/vm_pager.h>
 #include <vm/vm_extern.h>
 #include <sys/socket.h>
 
 #include <sys/user.h>
 #include <sys/interrupt.h>
 #include <sys/cons.h>
 #include <sys/syslog.h>
 #include <machine/asm.h>
 #include <machine/bootinfo.h>
 #include <machine/cache.h>
 #include <machine/clock.h>
 #include <machine/cpu.h>
 #include <machine/cpuregs.h>
 #include <machine/elf.h>
 #include <machine/hwfunc.h>
 #include <machine/intr_machdep.h>
 #include <machine/md_var.h>
 #include <machine/tlb.h>
 #ifdef DDB
 #include <sys/kdb.h>
 #include <ddb/ddb.h>
 #endif
 
 #include <sys/random.h>
 #include <net/if.h>
 
 #define	BOOTINFO_DEBUG	0
 
 char machine[] = "mips";
 SYSCTL_STRING(_hw, HW_MACHINE, machine, CTLFLAG_RD, machine, 0, "Machine class");
 
 char cpu_model[80];
 SYSCTL_STRING(_hw, HW_MODEL, model, CTLFLAG_RD, cpu_model, 0, "Machine model");
 
 char cpu_board[80];
 SYSCTL_STRING(_hw, OID_AUTO, board, CTLFLAG_RD, cpu_board, 0, "Machine board");
 
 int cold = 1;
 long realmem = 0;
 long Maxmem = 0;
 int cpu_clock = MIPS_DEFAULT_HZ;
 SYSCTL_INT(_hw, OID_AUTO, clockrate, CTLFLAG_RD, 
     &cpu_clock, 0, "CPU instruction clock rate");
 int clocks_running = 0;
 
 vm_offset_t kstack0;
 
 /*
  * Each entry in the pcpu_space[] array is laid out in the following manner:
  * struct pcpu for cpu 'n'	pcpu_space[n]
  * boot stack for cpu 'n'	pcpu_space[n] + PAGE_SIZE * 2 - CALLFRAME_SIZ
  *
  * Note that the boot stack grows downwards and we assume that we never
  * use enough stack space to trample over the 'struct pcpu' that is at
  * the beginning of the array.
  *
  * The array is aligned on a (PAGE_SIZE * 2) boundary so that the 'struct pcpu'
  * is always in the even page frame of the wired TLB entry on SMP kernels.
  *
  * The array is in the .data section so that the stack does not get zeroed out
  * when the .bss section is zeroed.
  */
 char pcpu_space[MAXCPU][PAGE_SIZE * 2] \
 	__aligned(PAGE_SIZE * 2) __section(".data");
 
 struct pcpu *pcpup = (struct pcpu *)pcpu_space;
 
 vm_paddr_t physmem_desc[PHYS_AVAIL_COUNT];
 
 #ifdef UNIMPLEMENTED
 struct platform platform;
 #endif
 
 static void cpu_startup(void *);
 SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL);
 
 struct kva_md_info kmi;
 
 int cpucfg;			/* Value of processor config register */
 int num_tlbentries = 64;	/* Size of the CPU tlb */
 int cputype;
 
 extern char MipsException[], MipsExceptionEnd[];
 
 /* TLB miss handler address and end */
 extern char MipsTLBMiss[], MipsTLBMissEnd[];
 
 /* Cache error handler */
 extern char MipsCache[], MipsCacheEnd[];
 
 /* MIPS wait skip region */
 extern char MipsWaitStart[], MipsWaitEnd[];
 
 extern char edata[], end[];
 
 u_int32_t bootdev;
 struct bootinfo bootinfo;
 /*
  * First kseg0 address available for use. By default it's equal to &end.
  * But in some cases there might be additional data placed right after 
  * _end by loader or ELF trampoline.
  */
 vm_offset_t kernel_kseg0_end = (vm_offset_t)&end;
 
 static void
 cpu_startup(void *dummy)
 {
 
 	if (boothowto & RB_VERBOSE)
 		bootverbose++;
 
 	cpu_identify();
 
 	printf("real memory  = %ju (%juK bytes)\n", ptoa((uintmax_t)realmem),
 	    ptoa((uintmax_t)realmem) / 1024);
 
 	/*
 	 * Display any holes after the first chunk of extended memory.
 	 */
 	if (bootverbose) {
 		int indx;
 
 		printf("Physical memory chunk(s):\n");
 		for (indx = 0; phys_avail[indx + 1] != 0; indx += 2) {
 			vm_paddr_t size1 = phys_avail[indx + 1] - phys_avail[indx];
 
 			printf("0x%08jx - 0x%08jx, %ju bytes (%ju pages)\n",
 			    (uintmax_t)phys_avail[indx],
 			    (uintmax_t)phys_avail[indx + 1] - 1,
 			    (uintmax_t)size1,
 			    (uintmax_t)size1 / PAGE_SIZE);
 		}
 	}
 
 	vm_ksubmap_init(&kmi);
 
 	printf("avail memory = %ju (%juMB)\n", 
 	    ptoa((uintmax_t)vm_free_count()),
 	    ptoa((uintmax_t)vm_free_count()) / 1048576);
 	cpu_init_interrupts();
 
 	/*
 	 * Set up buffers, so they can be used to read disk labels.
 	 */
 	bufinit();
 	vm_pager_bufferinit();
 }
 
 /*
  * Shutdown the CPU as much as possible
  */
 void
 cpu_reset(void)
 {
 
 	platform_reset();
 }
 
 /*
  * Flush the D-cache for non-DMA I/O so that the I-cache can
  * be made coherent later.
  */
 void
 cpu_flush_dcache(void *ptr, size_t len)
 {
 	/* TBD */
 }
 
 /* Get current clock frequency for the given cpu id. */
 int
 cpu_est_clockrate(int cpu_id, uint64_t *rate)
 {
 
 	return (ENXIO);
 }
 
 /*
  * Shutdown the CPU as much as possible
  */
 void
 cpu_halt(void)
 {
 	for (;;)
 		;
 }
 
 SYSCTL_STRUCT(_machdep, OID_AUTO, bootinfo, CTLFLAG_RD, &bootinfo,
     bootinfo, "Bootinfo struct: kernel filename, BIOS harddisk geometry, etc");
 
 /*
  * Initialize per cpu data structures, include curthread.
  */
 void
 mips_pcpu0_init()
 {
 	/* Initialize pcpu info of cpu-zero */
 	pcpu_init(PCPU_ADDR(0), 0, sizeof(struct pcpu));
 	PCPU_SET(curthread, &thread0);
 }
 
 /*
  * Initialize mips and configure to run kernel
  */
 void
 mips_proc0_init(void)
 {
 #ifdef SMP
 	if (platform_processor_id() != 0)
 		panic("BSP must be processor number 0");
 #endif
 	proc_linkup0(&proc0, &thread0);
 
 	KASSERT((kstack0 & PAGE_MASK) == 0,
 		("kstack0 is not aligned on a page boundary: 0x%0lx",
 		(long)kstack0));
 	thread0.td_kstack = kstack0;
 	thread0.td_kstack_pages = KSTACK_PAGES;
 	/* 
 	 * Do not use cpu_thread_alloc to initialize these fields 
 	 * thread0 is the only thread that has kstack located in KSEG0 
 	 * while cpu_thread_alloc handles kstack allocated in KSEG2.
 	 */
 	thread0.td_pcb = (struct pcb *)(thread0.td_kstack +
 	    thread0.td_kstack_pages * PAGE_SIZE) - 1;
 	thread0.td_frame = &thread0.td_pcb->pcb_regs;
 
 	/* Steal memory for the dynamic per-cpu area. */
 	dpcpu_init((void *)pmap_steal_memory(DPCPU_SIZE), 0);
 
 	PCPU_SET(curpcb, thread0.td_pcb);
 	/*
 	 * There is no need to initialize md_upte array for thread0 as it's
 	 * located in .bss section and should be explicitly zeroed during 
 	 * kernel initialization.
 	 */
 }
 
 void
 cpu_initclocks(void)
 {
 
 	platform_initclocks();
 	cpu_initclocks_bsp();
 }
 
 /*
  * Initialize the hardware exception vectors, and the jump table used to
  * call locore cache and TLB management functions, based on the kind
  * of CPU the kernel is running on.
  */
 void
 mips_vector_init(void)
 {
 	/*
 	 * Make sure that the Wait region logic is not been 
 	 * changed
 	 */
 	if (MipsWaitEnd - MipsWaitStart != 16)
 		panic("startup: MIPS wait region not correct");
 	/*
 	 * Copy down exception vector code.
 	 */
 	if (MipsTLBMissEnd - MipsTLBMiss > 0x80)
 		panic("startup: UTLB code too large");
 
 	if (MipsCacheEnd - MipsCache > 0x80)
 		panic("startup: Cache error code too large");
 
 	bcopy(MipsTLBMiss, (void *)MIPS_UTLB_MISS_EXC_VEC,
 	      MipsTLBMissEnd - MipsTLBMiss);
 
 	/*
 	 * XXXRW: Why don't we install the XTLB handler for all 64-bit
 	 * architectures?
 	 */
 #if defined(__mips_n64) || defined(CPU_RMI) || defined(CPU_NLM) || defined(CPU_BERI)
 /* Fake, but sufficient, for the 32-bit with 64-bit hardware addresses  */
 	bcopy(MipsTLBMiss, (void *)MIPS_XTLB_MISS_EXC_VEC,
 	      MipsTLBMissEnd - MipsTLBMiss);
 #endif
 
 	bcopy(MipsException, (void *)MIPS_GEN_EXC_VEC,
 	      MipsExceptionEnd - MipsException);
 
 	bcopy(MipsCache, (void *)MIPS_CACHE_ERR_EXC_VEC,
 	      MipsCacheEnd - MipsCache);
 
 	/*
 	 * Clear out the I and D caches.
 	 */
 	mips_icache_sync_all();
 	mips_dcache_wbinv_all();
 
 	/* 
 	 * Mask all interrupts. Each interrupt will be enabled
 	 * when handler is installed for it
 	 */
 	set_intr_mask(0);
 
 	/* Clear BEV in SR so we start handling our own exceptions */
 	mips_wr_status(mips_rd_status() & ~MIPS_SR_BEV);
 }
 
 /*
  * Fix kernel_kseg0_end address in case trampoline placed debug sympols 
  * data there
  */
 void
 mips_postboot_fixup(void)
 {
 	/*
 	 * We store u_long sized objects into the reload area, so the array
 	 * must be so aligned. The standard allows any alignment for char data.
 	 */
 	_Alignas(_Alignof(u_long)) static char fake_preload[256];
 	caddr_t preload_ptr = (caddr_t)&fake_preload[0];
 	size_t size = 0;
 
 #define PRELOAD_PUSH_VALUE(type, value) do {		\
 	*(type *)(preload_ptr + size) = (value);	\
 	size += sizeof(type);				\
 } while (0);
 
 	/*
 	 * Provide kernel module file information
 	 */
 	PRELOAD_PUSH_VALUE(uint32_t, MODINFO_NAME);
 	PRELOAD_PUSH_VALUE(uint32_t, strlen("kernel") + 1);
 	strcpy((char*)(preload_ptr + size), "kernel");
 	size += strlen("kernel") + 1;
 	size = roundup(size, sizeof(u_long));
 
 	PRELOAD_PUSH_VALUE(uint32_t, MODINFO_TYPE);
 	PRELOAD_PUSH_VALUE(uint32_t, strlen("elf kernel") + 1);
 	strcpy((char*)(preload_ptr + size), "elf kernel");
 	size += strlen("elf kernel") + 1;
 	size = roundup(size, sizeof(u_long));
 
 	PRELOAD_PUSH_VALUE(uint32_t, MODINFO_ADDR);
 	PRELOAD_PUSH_VALUE(uint32_t, sizeof(vm_offset_t));
 	PRELOAD_PUSH_VALUE(vm_offset_t, KERNLOADADDR);
 	size = roundup(size, sizeof(u_long));
 
 	PRELOAD_PUSH_VALUE(uint32_t, MODINFO_SIZE);
 	PRELOAD_PUSH_VALUE(uint32_t, sizeof(size_t));
 	PRELOAD_PUSH_VALUE(size_t, (size_t)&end - KERNLOADADDR);
 	size = roundup(size, sizeof(u_long));
 
 	/* End marker */
 	PRELOAD_PUSH_VALUE(uint32_t, 0);
 	PRELOAD_PUSH_VALUE(uint32_t, 0);
 
 #undef	PRELOAD_PUSH_VALUE
 
 	KASSERT((size < sizeof(fake_preload)),
 		("fake preload size is more thenallocated"));
 
 	preload_metadata = (void *)fake_preload;
 
 #ifdef DDB
 	Elf_Size *trampoline_data = (Elf_Size*)kernel_kseg0_end;
 	Elf_Size symtabsize = 0;
 	vm_offset_t ksym_start;
 	vm_offset_t ksym_end;
 
 	if (trampoline_data[0] == SYMTAB_MAGIC) {
 		symtabsize = trampoline_data[1];
 		kernel_kseg0_end += 2 * sizeof(Elf_Size);
 		/* start of .symtab */
 		ksym_start = kernel_kseg0_end;
 		kernel_kseg0_end += symtabsize;
 		/* end of .strtab */
 		ksym_end = kernel_kseg0_end;
 		db_fetch_ksymtab(ksym_start, ksym_end, 0);
 	}
 #endif
 }
 
 #ifdef SMP
 void
 mips_pcpu_tlb_init(struct pcpu *pcpu)
 {
 	vm_paddr_t pa;
 	pt_entry_t pte;
 
 	/*
 	 * Map the pcpu structure at the virtual address 'pcpup'.
 	 * We use a wired tlb index to do this one-time mapping.
 	 */
 	pa = vtophys(pcpu);
 	pte = PTE_D | PTE_V | PTE_G | PTE_C_CACHE;
 	tlb_insert_wired(PCPU_TLB_ENTRY, (vm_offset_t)pcpup,
 			 TLBLO_PA_TO_PFN(pa) | pte,
 			 TLBLO_PA_TO_PFN(pa + PAGE_SIZE) | pte);
 }
 #endif
 
 /*
  * Initialise a struct pcpu.
  */
 void
 cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size)
 {
 
 	pcpu->pc_next_asid = 1;
 	pcpu->pc_asid_generation = 1;
 	pcpu->pc_self = pcpu;
 #ifdef SMP
 	if ((vm_offset_t)pcpup >= VM_MIN_KERNEL_ADDRESS &&
 	    (vm_offset_t)pcpup <= VM_MAX_KERNEL_ADDRESS) {
 		mips_pcpu_tlb_init(pcpu);
 	}
 #endif
 }
 
 int
 fill_dbregs(struct thread *td, struct dbreg *dbregs)
 {
 
 	/* No debug registers on mips */
 	return (ENOSYS);
 }
 
 int
 set_dbregs(struct thread *td, struct dbreg *dbregs)
 {
 
 	/* No debug registers on mips */
 	return (ENOSYS);
 }
 
 void
 spinlock_enter(void)
 {
 	struct thread *td;
 	register_t intr;
 
 	td = curthread;
 	if (td->td_md.md_spinlock_count == 0) {
 		intr = intr_disable();
 		td->td_md.md_spinlock_count = 1;
 		td->td_md.md_saved_intr = intr;
 		critical_enter();
 	} else
 		td->td_md.md_spinlock_count++;
 }
 
 void
 spinlock_exit(void)
 {
 	struct thread *td;
 	register_t intr;
 
 	td = curthread;
 	intr = td->td_md.md_saved_intr;
 	td->td_md.md_spinlock_count--;
 	if (td->td_md.md_spinlock_count == 0) {
 		critical_exit();
 		intr_restore(intr);
 	}
 }
 
 /*
  * call platform specific code to halt (until next interrupt) for the idle loop
  */
 void
 cpu_idle(int busy)
 {
 	KASSERT((mips_rd_status() & MIPS_SR_INT_IE) != 0,
 		("interrupts disabled in idle process."));
 	KASSERT((mips_rd_status() & MIPS_INT_MASK) != 0,
 		("all interrupts masked in idle process."));
 
 	if (!busy) {
 		critical_enter();
 		cpu_idleclock();
 	}
 	mips_wait();
 	if (!busy) {
 		cpu_activeclock();
 		critical_exit();
 	}
 }
 
 int
 cpu_idle_wakeup(int cpu)
 {
 
 	return (0);
 }
 
 int
 is_cacheable_mem(vm_paddr_t pa)
 {
 	int i;
 
 	for (i = 0; physmem_desc[i + 1] != 0; i += 2) {
 		if (pa >= physmem_desc[i] && pa < physmem_desc[i + 1])
 			return (1);
 	}
 
 	return (0);
 }
diff --git a/sys/mips/mips/pm_machdep.c b/sys/mips/mips/pm_machdep.c
index 8d8cf4e1023d..7a9db29b6ff6 100644
--- a/sys/mips/mips/pm_machdep.c
+++ b/sys/mips/mips/pm_machdep.c
@@ -1,517 +1,517 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1992 Terrence R. Lambert.
  * Copyright (c) 1982, 1987, 1990 The Regents of the University of California.
  * All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * William Jolitz.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from: @(#)machdep.c	7.4 (Berkeley) 6/3/91
  *	from: src/sys/i386/i386/machdep.c,v 1.385.2.3 2000/05/10 02:04:46 obrien
  *	JNPR: pm_machdep.c,v 1.9.2.1 2007/08/16 15:59:10 girish
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/sysent.h>
 #include <sys/proc.h>
+#include <sys/reg.h>
 #include <sys/signalvar.h>
 #include <sys/exec.h>
 #include <sys/ktr.h>
 #include <sys/imgact.h>
 #include <sys/ucontext.h>
 #include <sys/lock.h>
 #include <sys/syscallsubr.h>
 #include <sys/sysproto.h>
 #include <sys/ptrace.h>
 #include <sys/syslog.h>
 #include <vm/vm.h>
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 #include <vm/vm_extern.h>
 #include <sys/user.h>
 #include <sys/uio.h>
 #include <machine/abi.h>
 #include <machine/cpuinfo.h>
-#include <machine/reg.h>
 #include <machine/md_var.h>
 #include <machine/sigframe.h>
 #include <machine/tls.h>
 #include <machine/vmparam.h>
 #include <sys/vnode.h>
 #include <fs/pseudofs/pseudofs.h>
 #include <fs/procfs/procfs.h>
 
 #define	UCONTEXT_MAGIC	0xACEDBADE
 
 /*
  * Send an interrupt to process.
  *
  * Stack is set up to allow sigcode stored
  * at top to call routine, followed by kcall
  * to sigreturn routine below.	After sigreturn
  * resets the signal mask, the stack, and the
  * frame pointer, it returns to the user
  * specified pc, psl.
  */
 void
 sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
 {
 	struct proc *p;
 	struct thread *td;
 	struct trapframe *regs;
 	struct sigacts *psp;
 	struct sigframe sf, *sfp;
 	int sig;
 	int oonstack;
 
 	td = curthread;
 	p = td->td_proc;
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	sig = ksi->ksi_signo;
 	psp = p->p_sigacts;
 	mtx_assert(&psp->ps_mtx, MA_OWNED);
 
 	regs = td->td_frame;
 	oonstack = sigonstack(regs->sp);
 
 	/* save user context */
 	bzero(&sf, sizeof(struct sigframe));
 	sf.sf_uc.uc_sigmask = *mask;
 	sf.sf_uc.uc_stack = td->td_sigstk;
 	sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0;
 	sf.sf_uc.uc_mcontext.mc_pc = regs->pc;
 	sf.sf_uc.uc_mcontext.mullo = regs->mullo;
 	sf.sf_uc.uc_mcontext.mulhi = regs->mulhi;
 	sf.sf_uc.uc_mcontext.mc_tls = td->td_md.md_tls;
 	sf.sf_uc.uc_mcontext.mc_regs[0] = UCONTEXT_MAGIC;  /* magic number */
 	bcopy((void *)&regs->ast, (void *)&sf.sf_uc.uc_mcontext.mc_regs[1],
 	    sizeof(sf.sf_uc.uc_mcontext.mc_regs) - sizeof(register_t));
 	sf.sf_uc.uc_mcontext.mc_fpused = td->td_md.md_flags & MDTD_FPUSED;
 	if (sf.sf_uc.uc_mcontext.mc_fpused) {
 		/* if FPU has current state, save it first */
 		if (td == PCPU_GET(fpcurthread))
 			MipsSaveCurFPState(td);
 		bcopy((void *)&td->td_frame->f0,
 		    (void *)sf.sf_uc.uc_mcontext.mc_fpregs,
 		    sizeof(sf.sf_uc.uc_mcontext.mc_fpregs));
 	}
 
 	/* Allocate and validate space for the signal handler context. */
 	if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack &&
 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
 		sfp = (struct sigframe *)(((uintptr_t)td->td_sigstk.ss_sp +
 		    td->td_sigstk.ss_size - sizeof(struct sigframe))
 		    & ~(STACK_ALIGN - 1));
 	} else
 		sfp = (struct sigframe *)((vm_offset_t)(regs->sp - 
 		    sizeof(struct sigframe)) & ~(STACK_ALIGN - 1));
 
 	/* Build the argument list for the signal handler. */
 	regs->a0 = sig;
 	regs->a2 = (register_t)(intptr_t)&sfp->sf_uc;
 	if (SIGISMEMBER(psp->ps_siginfo, sig)) {
 		/* Signal handler installed with SA_SIGINFO. */
 		regs->a1 = (register_t)(intptr_t)&sfp->sf_si;
 		/* sf.sf_ahu.sf_action = (__siginfohandler_t *)catcher; */
 
 		/* fill siginfo structure */
 		sf.sf_si = ksi->ksi_info;
 		sf.sf_si.si_signo = sig;
 	} else {
 		/* Old FreeBSD-style arguments. */
 		regs->a1 = ksi->ksi_code;
 		regs->a3 = (uintptr_t)ksi->ksi_addr;
 		/* sf.sf_ahu.sf_handler = catcher; */
 	}
 
 	mtx_unlock(&psp->ps_mtx);
 	PROC_UNLOCK(p);
 
 	/*
 	 * Copy the sigframe out to the user's stack.
 	 */
 	if (copyout(&sf, sfp, sizeof(struct sigframe)) != 0) {
 		/*
 		 * Something is wrong with the stack pointer.
 		 * ...Kill the process.
 		 */
 		PROC_LOCK(p);
 		sigexit(td, SIGILL);
 	}
 
 	regs->pc = (register_t)(intptr_t)catcher;
 	regs->t9 = (register_t)(intptr_t)catcher;
 	regs->sp = (register_t)(intptr_t)sfp;
 	/*
 	 * Signal trampoline code is at base of user stack.
 	 */
 	regs->ra = (register_t)(intptr_t)PS_STRINGS - *(p->p_sysent->sv_szsigcode);
 	PROC_LOCK(p);
 	mtx_lock(&psp->ps_mtx);
 }
 
 /*
  * System call to cleanup state after a signal
  * has been taken.  Reset signal mask and
  * stack state from context left by sendsig (above).
  * Return to previous pc as specified by
  * context left by sendsig.
  */
 int
 sys_sigreturn(struct thread *td, struct sigreturn_args *uap)
 {
 	ucontext_t uc;
 	int error;
 
 	error = copyin(uap->sigcntxp, &uc, sizeof(uc));
 	if (error != 0)
 	    return (error);
 
 	error = set_mcontext(td, &uc.uc_mcontext);
 	if (error != 0)
 		return (error);
 
 	kern_sigprocmask(td, SIG_SETMASK, &uc.uc_sigmask, NULL, 0);
 
 	return (EJUSTRETURN);
 }
 
 int
 ptrace_set_pc(struct thread *td, unsigned long addr)
 {
 	td->td_frame->pc = (register_t) addr;
 	return 0;
 }
 
 static int
 ptrace_read_int(struct thread *td, uintptr_t addr, int *v)
 {
 
 	if (proc_readmem(td, td->td_proc, addr, v, sizeof(*v)) != sizeof(*v))
 		return (EFAULT);
 	return (0);
 }
 
 static int
 ptrace_write_int(struct thread *td, uintptr_t addr, int v)
 {
 
 	if (proc_writemem(td, td->td_proc, addr, &v, sizeof(v)) != sizeof(v))
 		return (EFAULT);
 	return (0);
 }
 
 int
 ptrace_single_step(struct thread *td)
 {
 	uintptr_t va;
 	struct trapframe *locr0 = td->td_frame;
 	int error;
 	int bpinstr = MIPS_BREAK_SSTEP;
 	int curinstr;
 	struct proc *p;
 
 	p = td->td_proc;
 	PROC_UNLOCK(p);
 	/*
 	 * Fetch what's at the current location.
 	 */
 	error = ptrace_read_int(td, locr0->pc, &curinstr);
 	if (error)
 		goto out;
 
 	CTR3(KTR_PTRACE,
 	    "ptrace_single_step: tid %d, current instr at %#lx: %#08x",
 	    td->td_tid, locr0->pc, curinstr);
 
 	/* compute next address after current location */
 	if (locr0->cause & MIPS_CR_BR_DELAY) {
 		va = MipsEmulateBranch(locr0, locr0->pc, locr0->fsr,
 		    (uintptr_t)&curinstr);
 	} else {
 		va = locr0->pc + 4;
 	}
 	if (td->td_md.md_ss_addr) {
 		printf("SS %s (%d): breakpoint already set at %p (va %p)\n",
 		    p->p_comm, p->p_pid, (void *)td->td_md.md_ss_addr,
 		    (void *)va); /* XXX */
 		error = EFAULT;
 		goto out;
 	}
 	td->td_md.md_ss_addr = va;
 	/*
 	 * Fetch what's at the current location.
 	 */
 	error = ptrace_read_int(td, (off_t)va, &td->td_md.md_ss_instr);
 	if (error)
 		goto out;
 
 	/*
 	 * Store breakpoint instruction at the "next" location now.
 	 */
 	error = ptrace_write_int(td, va, bpinstr);
 
 	/*
 	 * The sync'ing of I & D caches is done by proc_rwmem()
 	 * through proc_writemem().
 	 */
 
 out:
 	PROC_LOCK(p);
 	if (error == 0)
 		CTR3(KTR_PTRACE,
 		    "ptrace_single_step: tid %d, break set at %#lx: (%#08x)",
 		    td->td_tid, va, td->td_md.md_ss_instr); 
 	return (error);
 }
 
 void
 makectx(struct trapframe *tf, struct pcb *pcb)
 {
 
 	pcb->pcb_context[PCB_REG_RA] = tf->ra;
 	pcb->pcb_context[PCB_REG_PC] = tf->pc;
 	pcb->pcb_context[PCB_REG_SP] = tf->sp;
 }
 
 int
 fill_regs(struct thread *td, struct reg *regs)
 {
 	memcpy(regs, td->td_frame, sizeof(struct reg));
 	return (0);
 }
 
 int
 set_regs(struct thread *td, struct reg *regs)
 {
 	struct trapframe *f;
 	register_t sr;
 
 	f = (struct trapframe *) td->td_frame;
 	/*
 	 * Don't allow the user to change SR
 	 */
 	sr = f->sr;
 	memcpy(td->td_frame, regs, sizeof(struct reg));
 	f->sr = sr;
 	return (0);
 }
 
 int
 get_mcontext(struct thread *td, mcontext_t *mcp, int flags)
 {
 	struct trapframe *tp;
 
 	tp = td->td_frame;
 	PROC_LOCK(curthread->td_proc);
 	mcp->mc_onstack = sigonstack(tp->sp);
 	PROC_UNLOCK(curthread->td_proc);
 	bcopy((void *)&td->td_frame->zero, (void *)&mcp->mc_regs,
 	    sizeof(mcp->mc_regs));
 
 	mcp->mc_fpused = td->td_md.md_flags & MDTD_FPUSED;
 	if (mcp->mc_fpused) {
 		bcopy((void *)&td->td_frame->f0, (void *)&mcp->mc_fpregs,
 		    sizeof(mcp->mc_fpregs));
 	}
 	if (flags & GET_MC_CLEAR_RET) {
 		mcp->mc_regs[V0] = 0;
 		mcp->mc_regs[V1] = 0;
 		mcp->mc_regs[A3] = 0;
 	}
 
 	mcp->mc_pc = td->td_frame->pc;
 	mcp->mullo = td->td_frame->mullo;
 	mcp->mulhi = td->td_frame->mulhi;
 	mcp->mc_tls = td->td_md.md_tls;
 	return (0);
 }
 
 int
 set_mcontext(struct thread *td, mcontext_t *mcp)
 {
 	struct trapframe *tp;
 
 	tp = td->td_frame;
 	bcopy((void *)&mcp->mc_regs, (void *)&td->td_frame->zero,
 	    sizeof(mcp->mc_regs));
 
 	td->td_md.md_flags = mcp->mc_fpused & MDTD_FPUSED;
 	if (mcp->mc_fpused) {
 		bcopy((void *)&mcp->mc_fpregs, (void *)&td->td_frame->f0,
 		    sizeof(mcp->mc_fpregs));
 	}
 	td->td_frame->pc = mcp->mc_pc;
 	td->td_frame->mullo = mcp->mullo;
 	td->td_frame->mulhi = mcp->mulhi;
 	td->td_md.md_tls = mcp->mc_tls;
 	/* Dont let user to set any bits in status and cause registers. */
 
 	return (0);
 }
 
 int
 fill_fpregs(struct thread *td, struct fpreg *fpregs)
 {
 	if (td == PCPU_GET(fpcurthread))
 		MipsSaveCurFPState(td);
 	memcpy(fpregs, &td->td_frame->f0, sizeof(struct fpreg));
 	fpregs->r_regs[FIR_NUM] = cpuinfo.fpu_id;
 	return 0;
 }
 
 int
 set_fpregs(struct thread *td, struct fpreg *fpregs)
 {
 	if (PCPU_GET(fpcurthread) == td)
 		PCPU_SET(fpcurthread, (struct thread *)0);
 	memcpy(&td->td_frame->f0, fpregs, sizeof(struct fpreg));
 	return 0;
 }
 
 /*
  * Clear registers on exec
  * $sp is set to the stack pointer passed in.  $pc is set to the entry
  * point given by the exec_package passed in, as is $t9 (used for PIC
  * code by the MIPS elf abi).
  */
 void
 exec_setregs(struct thread *td, struct image_params *imgp, uintptr_t stack)
 {
 
 	bzero((caddr_t)td->td_frame, sizeof(struct trapframe));
 
 	td->td_frame->sp = ((register_t)stack) & ~(STACK_ALIGN - 1);
 
 	/*
 	 * If we're running o32 or n32 programs but have 64-bit registers,
 	 * GCC may use stack-relative addressing near the top of user
 	 * address space that, due to sign extension, will yield an
 	 * invalid address.  For instance, if sp is 0x7fffff00 then GCC
 	 * might do something like this to load a word from 0x7ffffff0:
 	 *
 	 * 	addu	sp, sp, 32768
 	 * 	lw	t0, -32528(sp)
 	 *
 	 * On systems with 64-bit registers, sp is sign-extended to
 	 * 0xffffffff80007f00 and the load is instead done from
 	 * 0xffffffff7ffffff0.
 	 *
 	 * To prevent this, we subtract 64K from the stack pointer here
 	 * for processes with 32-bit pointers.
 	 */
 #if defined(__mips_n32) || defined(__mips_n64)
 	if (!SV_PROC_FLAG(td->td_proc, SV_LP64))
 		td->td_frame->sp -= 65536;
 #endif
 
 	td->td_frame->pc = imgp->entry_addr & ~3;
 	td->td_frame->t9 = imgp->entry_addr & ~3; /* abicall req */
 	td->td_frame->sr = MIPS_SR_KSU_USER | MIPS_SR_EXL | MIPS_SR_INT_IE |
 	    (mips_rd_status() & MIPS_SR_INT_MASK);
 #if defined(__mips_n32) || defined(__mips_n64)
 	td->td_frame->sr |= MIPS_SR_PX;
 #endif
 #if defined(__mips_n64)
 	if (SV_PROC_FLAG(td->td_proc, SV_LP64))
 		td->td_frame->sr |= MIPS_SR_UX;
 	td->td_frame->sr |= MIPS_SR_KX;
 #endif
 	/*
 	 * FREEBSD_DEVELOPERS_FIXME:
 	 * Setup any other CPU-Specific registers (Not MIPS Standard)
 	 * and/or bits in other standard MIPS registers (if CPU-Specific)
 	 *  that are needed.
 	 */
 
 	/*
 	 * Set up arguments for the rtld-capable crt0:
 	 *	a0	stack pointer
 	 *	a1	rtld cleanup (filled in by dynamic loader)
 	 *	a2	rtld object (filled in by dynamic loader)
 	 *	a3	ps_strings
 	 */
 	td->td_frame->a0 = (register_t) stack;
 	td->td_frame->a1 = 0;
 	td->td_frame->a2 = 0;
 	td->td_frame->a3 = (register_t)imgp->ps_strings;
 
 	td->td_md.md_flags &= ~MDTD_FPUSED;
 	if (PCPU_GET(fpcurthread) == td)
 	    PCPU_SET(fpcurthread, (struct thread *)0);
 	td->td_md.md_ss_addr = 0;
 
 	td->td_md.md_tls = NULL;
 #ifdef COMPAT_FREEBSD32
 	if (!SV_PROC_FLAG(td->td_proc, SV_LP64))
 		td->td_proc->p_md.md_tls_tcb_offset = TLS_TP_OFFSET +
 		    TLS_TCB_SIZE32;
 	else
 #endif
 		td->td_proc->p_md.md_tls_tcb_offset = TLS_TP_OFFSET +
 		    TLS_TCB_SIZE;
 }
 
 int
 ptrace_clear_single_step(struct thread *td)
 {
 	struct proc *p;
 	int error;
 
 	p = td->td_proc;
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	if (!td->td_md.md_ss_addr)
 		return EINVAL;
 
 	/*
 	 * Restore original instruction and clear BP
 	 */
 	PROC_UNLOCK(p);
 	CTR3(KTR_PTRACE,
 	    "ptrace_clear_single_step: tid %d, restore instr at %#lx: %#08x",
 	    td->td_tid, td->td_md.md_ss_addr, td->td_md.md_ss_instr);
 	error = ptrace_write_int(td, td->td_md.md_ss_addr,
 	    td->td_md.md_ss_instr);
 	PROC_LOCK(p);
 
 	/* The sync'ing of I & D caches is done by proc_rwmem(). */
 
 	if (error != 0) {
 		log(LOG_ERR,
 		    "SS %s %d: can't restore instruction at %p: %x\n",
 		    p->p_comm, p->p_pid, (void *)td->td_md.md_ss_addr,
 		    td->td_md.md_ss_instr);
 	}
 	td->td_md.md_ss_addr = 0;
 	return 0;
 }
diff --git a/sys/mips/nlm/cms.c b/sys/mips/nlm/cms.c
index 32ee6e1c9546..4b6ad7c95db4 100644
--- a/sys/mips/nlm/cms.c
+++ b/sys/mips/nlm/cms.c
@@ -1,498 +1,497 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright 2003-2011 Netlogic Microsystems (Netlogic). All rights
  * reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are
  * met:
  *
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in
  *    the documentation and/or other materials provided with the
  *    distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY Netlogic Microsystems ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NETLOGIC OR CONTRIBUTORS BE
  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  * THE POSSIBILITY OF SUCH DAMAGE.
  *
  * NETLOGIC_BSD */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 #include <sys/types.h>
 #include <sys/systm.h>
 #include <sys/param.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/limits.h>
 #include <sys/bus.h>
 #include <sys/sbuf.h>
 
 #include <sys/ktr.h>
 #include <sys/kernel.h>
 #include <sys/kthread.h>
 #include <sys/proc.h>
 #include <sys/resourcevar.h>
 #include <sys/sched.h>
 #include <sys/unistd.h>
 #include <sys/sysctl.h>
 #include <sys/malloc.h>
 
-#include <machine/reg.h>
 #include <machine/cpu.h>
 #include <machine/hwfunc.h>
 #include <machine/mips_opcode.h>
 #include <machine/intr_machdep.h>
 
 #include <mips/nlm/hal/mips-extns.h>
 #include <mips/nlm/hal/haldefs.h>
 #include <mips/nlm/hal/iomap.h>
 #include <mips/nlm/hal/cop2.h>
 #include <mips/nlm/hal/fmn.h>
 #include <mips/nlm/hal/pic.h>
 
 #include <mips/nlm/msgring.h>
 #include <mips/nlm/interrupt.h>
 #include <mips/nlm/xlp.h>
 
 #define	MSGRNG_NSTATIONS	1024
 /*
  * Keep track of our message ring handler threads, each core has a
  * different message station. Ideally we will need to start a few
  * message handling threads every core, and wake them up depending on
  * load
  */
 struct msgring_thread {
 	struct thread	*thread;	/* msgring handler threads */
 	int	needed;			/* thread needs to wake up */
 };
 static struct msgring_thread msgring_threads[XLP_MAX_CORES * XLP_MAX_THREADS];
 static struct proc *msgring_proc;	/* all threads are under a proc */
 
 /*
  * The device drivers can register a handler for the messages sent
  * from a station (corresponding to the device).
  */
 struct tx_stn_handler {
 	msgring_handler action;
 	void *arg;
 };
 static struct tx_stn_handler msgmap[MSGRNG_NSTATIONS];
 static struct mtx	msgmap_lock;
 uint32_t xlp_msg_thread_mask;
 static int xlp_msg_threads_per_core = XLP_MAX_THREADS;
 
 static void create_msgring_thread(int hwtid);
 static int msgring_process_fast_intr(void *arg);
 
 /* Debug counters */
 static int msgring_nintr[XLP_MAX_CORES * XLP_MAX_THREADS];
 static int msgring_wakeup_sleep[XLP_MAX_CORES * XLP_MAX_THREADS];
 static int msgring_wakeup_nosleep[XLP_MAX_CORES * XLP_MAX_THREADS];
 static int fmn_msgcount[XLP_MAX_CORES * XLP_MAX_THREADS][4];
 static int fmn_loops[XLP_MAX_CORES * XLP_MAX_THREADS];
 
 /* Whether polled driver implementation */
 static int polled = 0;
 
 /* We do only i/o device credit setup here. CPU credit setup is now
  * moved to xlp_msgring_cpu_init() so that the credits get setup
  * only if the CPU exists. xlp_msgring_cpu_init() gets called from
  * platform_init_ap; and this makes it easy for us to setup CMS
  * credits for various types of XLP chips, with varying number of
  * cpu's and cores.
  */
 static void
 xlp_cms_credit_setup(int credit)
 {
 	uint64_t cmspcibase, cmsbase, pcibase;
 	uint32_t devoffset;
 	int dev, fn, maxqid;
 	int src, qid, i;
 
 	for (i = 0; i < XLP_MAX_NODES; i++) {
 		cmspcibase = nlm_get_cms_pcibase(i);
 		if (!nlm_dev_exists(XLP_IO_CMS_OFFSET(i)))
 			continue;
 		cmsbase = nlm_get_cms_regbase(i);
 		maxqid = nlm_read_reg(cmspcibase, XLP_PCI_DEVINFO_REG0);
 		for (dev = 0; dev < 8; dev++) {
 			for (fn = 0; fn < 8; fn++) {
 				devoffset = XLP_HDR_OFFSET(i, 0, dev, fn);
 				if (nlm_dev_exists(devoffset) == 0)
 					continue;
 				pcibase = nlm_pcicfg_base(devoffset);
 				src = nlm_qidstart(pcibase);
 				if (src == 0)
 					continue;
 #if 0 /* Debug */
 				printf("Setup CMS credits for queues ");
 				printf("[%d to %d] from src %d\n", 0,
 				    maxqid, src);
 #endif
 				for (qid = 0; qid < maxqid; qid++)
 					nlm_cms_setup_credits(cmsbase, qid,
 					    src, credit);
 			}
 		}
 	}
 }
 
 void
 xlp_msgring_cpu_init(int node, int cpu, int credit)
 {
 	uint64_t cmspcibase = nlm_get_cms_pcibase(node);
 	uint64_t cmsbase = nlm_get_cms_regbase(node);
 	int qid, maxqid, src;
 
 	maxqid = nlm_read_reg(cmspcibase, XLP_PCI_DEVINFO_REG0);
 
 	/* cpu credit setup is done only from thread-0 of each core */
 	if((cpu % 4) == 0) {
 		src = cpu << 2; /* each thread has 4 vc's */
 		for (qid = 0; qid < maxqid; qid++)
 			nlm_cms_setup_credits(cmsbase, qid, src, credit);
 	}
 }
 
 /*
  * Drain out max_messages for the buckets set in the bucket mask.
  * Use max_msgs = 0 to drain out all messages.
  */
 int
 xlp_handle_msg_vc(u_int vcmask, int max_msgs)
 {
 	struct nlm_fmn_msg msg;
 	int srcid = 0, size = 0, code = 0;
 	struct tx_stn_handler *he;
 	uint32_t mflags, status;
 	int n_msgs = 0, vc, m, hwtid;
 	u_int msgmask;
 
 	hwtid = nlm_cpuid();
 	for (;;) {
 		/* check if VC empty */
 		mflags = nlm_save_flags_cop2();
 		status = nlm_read_c2_msgstatus1();
 		nlm_restore_flags(mflags);
 
 		msgmask = ((status >> 24) & 0xf) ^ 0xf;
 		msgmask &= vcmask;
 		if (msgmask == 0)
 			    break;
 		m = 0;
 		for (vc = 0; vc < 4; vc++) {
 			if ((msgmask & (1 << vc)) == 0)
 				continue;
 
 			mflags = nlm_save_flags_cop2();
 			status = nlm_fmn_msgrcv(vc, &srcid, &size, &code,
 			    &msg);
 			nlm_restore_flags(mflags);
 			if (status != 0)	/*  no msg or error */
 				continue;
 			if (srcid < 0 || srcid >= 1024) {
 				printf("[%s]: bad src id %d\n", __func__,
 				    srcid);
 				continue;
 			}
 			he = &msgmap[srcid];
 			if(he->action != NULL)
 				(he->action)(vc, size, code, srcid, &msg,
 				he->arg);
 #if 0
 			else
 				printf("[%s]: No Handler for msg from stn %d,"
 				    " vc=%d, size=%d, msg0=%jx, droppinge\n",
 				    __func__, srcid, vc, size,
 				    (uintmax_t)msg.msg[0]);
 #endif
 			fmn_msgcount[hwtid][vc] += 1;
 			m++;	/* msgs handled in this iter */
 		}
 		if (m == 0)
 			break;	/* nothing done in this iter */
 		n_msgs += m;
 		if (max_msgs > 0 && n_msgs >= max_msgs)
 			break;
 	}
 
 	return (n_msgs);
 }
 
 static void
 xlp_discard_msg_vc(u_int vcmask)
 {
 	struct nlm_fmn_msg msg;
 	int srcid = 0, size = 0, code = 0, vc;
 	uint32_t mflags, status;
 
 	for (vc = 0; vc < 4; vc++) {
 		for (;;) {
 			mflags = nlm_save_flags_cop2();
 			status = nlm_fmn_msgrcv(vc, &srcid,
 			    &size, &code, &msg);
 			nlm_restore_flags(mflags);
 
 			/* break if there is no msg or error */
 			if (status != 0)
 				break;
 		}
 	}
 }
 
 void
 xlp_cms_enable_intr(int node, int cpu, int type, int watermark)
 {
 	uint64_t cmsbase;
 	int i, qid;
 
 	cmsbase = nlm_get_cms_regbase(node);
 
 	for (i = 0; i < 4; i++) {
 		qid = (i + (cpu * 4)) & 0x7f;
 		nlm_cms_per_queue_level_intr(cmsbase, qid, type, watermark);
 		nlm_cms_per_queue_timer_intr(cmsbase, qid, 0x1, 0);
 	}
 }
 
 static int
 msgring_process_fast_intr(void *arg)
 {
 	struct msgring_thread *mthd;
 	struct thread *td;
 	int	cpu;
 
 	cpu = nlm_cpuid();
 	mthd = &msgring_threads[cpu];
 	msgring_nintr[cpu]++;
 	td = mthd->thread;
 
 	/* clear pending interrupts */
 	nlm_write_c0_eirr(1ULL << IRQ_MSGRING);
 
 	/* wake up the target thread */
 	mthd->needed = 1;
 	thread_lock(td);
 	if (TD_AWAITING_INTR(td)) {
 		msgring_wakeup_sleep[cpu]++;
 		TD_CLR_IWAIT(td);
 		sched_add(td, SRQ_INTR);
 	} else {
 		thread_unlock(td);
 		msgring_wakeup_nosleep[cpu]++;
 	}
 
 	return (FILTER_HANDLED);
 }
 
 static void
 msgring_process(void * arg)
 {
 	volatile struct msgring_thread *mthd;
 	struct thread *td;
 	uint32_t mflags, msgstatus1;
 	int hwtid, nmsgs;
 
 	hwtid = (intptr_t)arg;
 	mthd = &msgring_threads[hwtid];
 	td = mthd->thread;
 	KASSERT(curthread == td,
 	    ("%s:msg_ithread and proc linkage out of sync", __func__));
 
 	/* First bind this thread to the right CPU */
 	thread_lock(td);
 	sched_bind(td, xlp_hwtid_to_cpuid[hwtid]);
 	thread_unlock(td);
 
 	if (hwtid != nlm_cpuid())
 		printf("Misscheduled hwtid %d != cpuid %d\n", hwtid,
 		    nlm_cpuid());
 
 	xlp_discard_msg_vc(0xf);
 	xlp_msgring_cpu_init(nlm_nodeid(), nlm_cpuid(), CMS_DEFAULT_CREDIT);
 	if (polled == 0) {
 		mflags = nlm_save_flags_cop2();
 		nlm_fmn_cpu_init(IRQ_MSGRING, 0, 0, 0, 0, 0);
 		nlm_restore_flags(mflags);
 		xlp_cms_enable_intr(nlm_nodeid(), nlm_cpuid(), 0x2, 0);
 		/* clear pending interrupts.
 		 *  they will get re-raised if still valid */
 		nlm_write_c0_eirr(1ULL << IRQ_MSGRING);
 	}
 
 	/* start processing messages */
 	for (;;) {
 		atomic_store_rel_int(&mthd->needed, 0);
 		nmsgs = xlp_handle_msg_vc(0xf, 0);
 
 		/* sleep */
 		if (polled == 0) {
 			/* clear VC-pend bits */
 			mflags = nlm_save_flags_cop2();
 			msgstatus1 = nlm_read_c2_msgstatus1();
 			msgstatus1 |= (0xf << 16);
 			nlm_write_c2_msgstatus1(msgstatus1);
 			nlm_restore_flags(mflags);
 
 			thread_lock(td);
 			if (mthd->needed) {
 				thread_unlock(td);
 				continue;
 			}
 			sched_class(td, PRI_ITHD);
 			TD_SET_IWAIT(td);
 			mi_switch(SW_VOL);
 		} else
 			pause("wmsg", 1);
 
 		fmn_loops[hwtid]++;
 	}
 }
 
 static void
 create_msgring_thread(int hwtid)
 {
 	struct msgring_thread *mthd;
 	struct thread *td;
 	int	error;
 
 	mthd = &msgring_threads[hwtid];
 	error = kproc_kthread_add(msgring_process, (void *)(uintptr_t)hwtid,
 	    &msgring_proc, &td, RFSTOPPED, 2, "msgrngproc",
 	    "msgthr%d", hwtid);
 	if (error)
 		panic("kproc_kthread_add() failed with %d", error);
 	mthd->thread = td;
 
 	thread_lock(td);
 	sched_class(td, PRI_ITHD);
 	sched_add(td, SRQ_INTR);
 }
 
 int
 register_msgring_handler(int startb, int endb, msgring_handler action,
     void *arg)
 {
 	int	i;
 
 	if (bootverbose)
 		printf("Register handler %d-%d %p(%p)\n",
 		    startb, endb, action, arg);
 	KASSERT(startb >= 0 && startb <= endb && endb < MSGRNG_NSTATIONS,
 	    ("Invalid value for bucket range %d,%d", startb, endb));
 
 	mtx_lock_spin(&msgmap_lock);
 	for (i = startb; i <= endb; i++) {
 		KASSERT(msgmap[i].action == NULL,
 		   ("Bucket %d already used [action %p]", i, msgmap[i].action));
 		msgmap[i].action = action;
 		msgmap[i].arg = arg;
 	}
 	mtx_unlock_spin(&msgmap_lock);
 	return (0);
 }
 
 /*
  * Initialize the messaging subsystem.
  *
  * Message Stations are shared among all threads in a cpu core, this
  * has to be called once from every core which is online.
  */
 static void
 xlp_msgring_config(void *arg)
 {
 	void *cookie;
 	unsigned int thrmask, mask;
 	int i;
 
 	/* used polled handler for Ax silion */
 	if (nlm_is_xlp8xx_ax())
 		polled = 1;
 
 	/* Don't poll on all threads, if polled */
 	if (polled)
 		xlp_msg_threads_per_core -= 1;
 
 	mtx_init(&msgmap_lock, "msgring", NULL, MTX_SPIN);
 	if (xlp_threads_per_core < xlp_msg_threads_per_core)
 		xlp_msg_threads_per_core = xlp_threads_per_core;
 	thrmask = ((1 << xlp_msg_threads_per_core) - 1);
 	mask = 0;
 	for (i = 0; i < XLP_MAX_CORES; i++) {
 		mask <<= XLP_MAX_THREADS;
 		mask |= thrmask;
 	}
 	xlp_msg_thread_mask = xlp_hw_thread_mask & mask;
 #if 0
 	printf("CMS Message handler thread mask %#jx\n",
 	    (uintmax_t)xlp_msg_thread_mask);
 #endif
 	xlp_cms_credit_setup(CMS_DEFAULT_CREDIT);
 	create_msgring_thread(0);
 	cpu_establish_hardintr("msgring", msgring_process_fast_intr, NULL,
 	    NULL, IRQ_MSGRING, INTR_TYPE_NET, &cookie);
 }
 
 /*
  * Start message ring processing threads on other CPUs, after SMP start
  */
 static void
 start_msgring_threads(void *arg)
 {
 	int	hwt;
 
 	for (hwt = 1; hwt < XLP_MAX_CORES * XLP_MAX_THREADS; hwt++) {
 		if ((xlp_msg_thread_mask & (1 << hwt)) == 0)
 			continue;
 		create_msgring_thread(hwt);
 	}
 }
 
 SYSINIT(xlp_msgring_config, SI_SUB_DRIVERS, SI_ORDER_FIRST,
     xlp_msgring_config, NULL);
 SYSINIT(start_msgring_threads, SI_SUB_SMP, SI_ORDER_MIDDLE,
     start_msgring_threads, NULL);
 
 /*
  * DEBUG support, XXX: static buffer, not locked
  */
 static int
 sys_print_debug(SYSCTL_HANDLER_ARGS)
 {
 	struct sbuf sb;
 	int error, i;
 
 	sbuf_new_for_sysctl(&sb, NULL, 64, req);
 	sbuf_printf(&sb, 
 	    "\nID     vc0       vc1       vc2     vc3     loops\n");
 	for (i = 0; i < 32; i++) {
 		if ((xlp_hw_thread_mask & (1 << i)) == 0)
 			continue;
 		sbuf_printf(&sb, "%2d: %8d %8d %8d %8d %8d\n", i,
 		    fmn_msgcount[i][0], fmn_msgcount[i][1],
 		    fmn_msgcount[i][2], fmn_msgcount[i][3],
 		    fmn_loops[i]);
 	}
 	error = sbuf_finish(&sb);
 	sbuf_delete(&sb);
 	return (error);
 }
 
 SYSCTL_PROC(_debug, OID_AUTO, msgring,
     CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_NEEDGIANT, 0, 0,
     sys_print_debug, "A",
     "msgring debug info");
diff --git a/sys/mips/nlm/dev/net/xlpge.c b/sys/mips/nlm/dev/net/xlpge.c
index e9dadf83c7eb..20fe16048212 100644
--- a/sys/mips/nlm/dev/net/xlpge.c
+++ b/sys/mips/nlm/dev/net/xlpge.c
@@ -1,1542 +1,1541 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2003-2012 Broadcom Corporation
  * All Rights Reserved
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  *
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in
  *    the documentation and/or other materials provided with the
  *    distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY BROADCOM ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED. IN NO EVENT SHALL BROADCOM OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
  * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
  * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/endian.h>
 #include <sys/systm.h>
 #include <sys/sockio.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/limits.h>
 #include <sys/bus.h>
 #include <sys/mbuf.h>
 #include <sys/malloc.h>
 #include <sys/kernel.h>
 #include <sys/module.h>
 #include <sys/socket.h>
 #define __RMAN_RESOURCE_VISIBLE
 #include <sys/rman.h>
 #include <sys/taskqueue.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_arp.h>
 #include <net/ethernet.h>
 #include <net/if_dl.h>
 #include <net/if_media.h>
 #include <net/bpf.h>
 #include <net/if_types.h>
 #include <net/if_vlan_var.h>
 
 #include <dev/pci/pcivar.h>
 
 #include <netinet/in_systm.h>
 #include <netinet/in.h>
 #include <netinet/ip.h>
 
 #include <vm/vm.h>
 #include <vm/pmap.h>
 #include <vm/uma.h>
 
-#include <machine/reg.h>
 #include <machine/cpu.h>
 #include <machine/mips_opcode.h>
 #include <machine/asm.h>
 #include <machine/cpuregs.h>
 
 #include <machine/intr_machdep.h>
 #include <machine/clock.h>	/* for DELAY */
 #include <machine/bus.h>
 #include <machine/resource.h>
 #include <mips/nlm/hal/haldefs.h>
 #include <mips/nlm/hal/iomap.h>
 #include <mips/nlm/hal/mips-extns.h>
 #include <mips/nlm/hal/cop2.h>
 #include <mips/nlm/hal/fmn.h>
 #include <mips/nlm/hal/sys.h>
 #include <mips/nlm/hal/nae.h>
 #include <mips/nlm/hal/mdio.h>
 #include <mips/nlm/hal/sgmii.h>
 #include <mips/nlm/hal/xaui.h>
 #include <mips/nlm/hal/poe.h>
 #include <ucore_app_bin.h>
 #include <mips/nlm/hal/ucore_loader.h>
 #include <mips/nlm/xlp.h>
 #include <mips/nlm/board.h>
 #include <mips/nlm/msgring.h>
 
 #include <dev/mii/mii.h>
 #include <dev/mii/miivar.h>
 #include "miidevs.h"
 #include <dev/mii/brgphyreg.h>
 #include "miibus_if.h"
 #include <sys/sysctl.h>
 
 #include <mips/nlm/dev/net/xlpge.h>
 
 /*#define XLP_DRIVER_LOOPBACK*/
 
 static struct nae_port_config nae_port_config[64];
 
 int poe_cl_tbl[MAX_POE_CLASSES] = {
 	0x0, 0x249249,
 	0x492492, 0x6db6db,
 	0x924924, 0xb6db6d,
 	0xdb6db6, 0xffffff
 };
 
 /* #define DUMP_PACKET */
 
 static uint64_t
 nlm_paddr_ld(uint64_t paddr)
 {
 	uint64_t xkaddr = 0x9800000000000000 | paddr;
 
 	return (nlm_load_dword_daddr(xkaddr));
 }
 
 struct nlm_xlp_portdata ifp_ports[64];
 static uma_zone_t nl_tx_desc_zone;
 
 /* This implementation will register the following tree of device
  * registration:
  *                      pcibus
  *                       |
  *                      xlpnae (1 instance - virtual entity)
  *                       |
  *                     xlpge
  *      (18 sgmii / 4 xaui / 2 interlaken instances)
  *                       |
  *                    miibus
  */
 
 static int nlm_xlpnae_probe(device_t);
 static int nlm_xlpnae_attach(device_t);
 static int nlm_xlpnae_detach(device_t);
 static int nlm_xlpnae_suspend(device_t);
 static int nlm_xlpnae_resume(device_t);
 static int nlm_xlpnae_shutdown(device_t);
 
 static device_method_t nlm_xlpnae_methods[] = {
 	/* Methods from the device interface */
 	DEVMETHOD(device_probe,		nlm_xlpnae_probe),
 	DEVMETHOD(device_attach,	nlm_xlpnae_attach),
 	DEVMETHOD(device_detach,	nlm_xlpnae_detach),
 	DEVMETHOD(device_suspend,	nlm_xlpnae_suspend),
 	DEVMETHOD(device_resume,	nlm_xlpnae_resume),
 	DEVMETHOD(device_shutdown,	nlm_xlpnae_shutdown),
 
 	DEVMETHOD(bus_driver_added,	bus_generic_driver_added),
 
 	DEVMETHOD_END
 };
 
 static driver_t nlm_xlpnae_driver = {
 	"xlpnae",
 	nlm_xlpnae_methods,
 	sizeof(struct nlm_xlpnae_softc)
 };
 
 static devclass_t nlm_xlpnae_devclass;
 
 static int nlm_xlpge_probe(device_t);
 static int nlm_xlpge_attach(device_t);
 static int nlm_xlpge_detach(device_t);
 static int nlm_xlpge_suspend(device_t);
 static int nlm_xlpge_resume(device_t);
 static int nlm_xlpge_shutdown(device_t);
 
 /* mii override functions */
 static int nlm_xlpge_mii_read(device_t, int, int);
 static int nlm_xlpge_mii_write(device_t, int, int, int);
 static void nlm_xlpge_mii_statchg(device_t);
 
 static device_method_t nlm_xlpge_methods[] = {
 	/* Methods from the device interface */
 	DEVMETHOD(device_probe,		nlm_xlpge_probe),
 	DEVMETHOD(device_attach,	nlm_xlpge_attach),
 	DEVMETHOD(device_detach,	nlm_xlpge_detach),
 	DEVMETHOD(device_suspend,	nlm_xlpge_suspend),
 	DEVMETHOD(device_resume,	nlm_xlpge_resume),
 	DEVMETHOD(device_shutdown,	nlm_xlpge_shutdown),
 
 	/* Methods from the nexus bus needed for explicitly
 	 * probing children when driver is loaded as a kernel module
 	 */
 	DEVMETHOD(miibus_readreg,	nlm_xlpge_mii_read),
 	DEVMETHOD(miibus_writereg,	nlm_xlpge_mii_write),
 	DEVMETHOD(miibus_statchg,	nlm_xlpge_mii_statchg),
 
 	/* Terminate method list */
 	DEVMETHOD_END
 };
 
 static driver_t nlm_xlpge_driver = {
 	"xlpge",
 	nlm_xlpge_methods,
 	sizeof(struct nlm_xlpge_softc)
 };
 
 static devclass_t nlm_xlpge_devclass;
 
 DRIVER_MODULE(xlpnae, pci, nlm_xlpnae_driver, nlm_xlpnae_devclass, 0, 0);
 DRIVER_MODULE(xlpge, xlpnae, nlm_xlpge_driver, nlm_xlpge_devclass, 0, 0);
 DRIVER_MODULE(miibus, xlpge, miibus_driver, miibus_devclass, 0, 0);
 
 MODULE_DEPEND(pci, xlpnae, 1, 1, 1);
 MODULE_DEPEND(xlpnae, xlpge, 1, 1, 1);
 MODULE_DEPEND(xlpge, ether, 1, 1, 1);
 MODULE_DEPEND(xlpge, miibus, 1, 1, 1);
 
 #define SGMII_RCV_CONTEXT_WIDTH 8
 
 /* prototypes */
 static void nlm_xlpge_msgring_handler(int vc, int size,
     int code, int srcid, struct nlm_fmn_msg *msg, void *data);
 static void nlm_xlpge_submit_rx_free_desc(struct nlm_xlpge_softc *sc, int num);
 static void nlm_xlpge_init(void *addr);
 static void nlm_xlpge_port_disable(struct nlm_xlpge_softc *sc);
 static void nlm_xlpge_port_enable(struct nlm_xlpge_softc *sc);
 
 /* globals */
 int dbg_on = 1;
 int cntx2port[524];
 
 static __inline void
 atomic_incr_long(unsigned long *addr)
 {
 	atomic_add_long(addr, 1);
 }
 
 /*
  * xlpnae driver implementation
  */
 static int
 nlm_xlpnae_probe(device_t dev)
 {
 	if (pci_get_vendor(dev) != PCI_VENDOR_NETLOGIC ||
 	    pci_get_device(dev) != PCI_DEVICE_ID_NLM_NAE)
 		return (ENXIO);
 
 	return (BUS_PROBE_DEFAULT);
 }
 
 static void
 nlm_xlpnae_print_frin_desc_carving(struct nlm_xlpnae_softc *sc)
 {
 	int intf;
 	uint32_t value;
 	int start, size;
 
 	/* XXXJC: use max_ports instead of 20 ? */
 	for (intf = 0; intf < 20; intf++) {
 		nlm_write_nae_reg(sc->base, NAE_FREE_IN_FIFO_CFG,
 		    (0x80000000 | intf));
 		value = nlm_read_nae_reg(sc->base, NAE_FREE_IN_FIFO_CFG);
 		size = 2 * ((value >> 20) & 0x3ff);
 		start = 2 * ((value >> 8) & 0x1ff);
 	}
 }
 
 static void
 nlm_config_egress(struct nlm_xlpnae_softc *sc, int nblock,
     int context_base, int hwport, int max_channels)
 {
 	int offset, num_channels;
 	uint32_t data;
 
 	num_channels = sc->portcfg[hwport].num_channels;
 
 	data = (2048 << 12) | (hwport << 4) | 1;
 	nlm_write_nae_reg(sc->base, NAE_TX_IF_BURSTMAX_CMD, data);
 
 	data = ((context_base + num_channels - 1) << 22) |
 	    (context_base << 12) | (hwport << 4) | 1;
 	nlm_write_nae_reg(sc->base, NAE_TX_DDR_ACTVLIST_CMD, data);
 
 	config_egress_fifo_carvings(sc->base, hwport,
 	    context_base, num_channels, max_channels, sc->portcfg);
 	config_egress_fifo_credits(sc->base, hwport,
 	    context_base, num_channels, max_channels, sc->portcfg);
 
 	data = nlm_read_nae_reg(sc->base, NAE_DMA_TX_CREDIT_TH);
 	data |= (1 << 25) | (1 << 24);
 	nlm_write_nae_reg(sc->base, NAE_DMA_TX_CREDIT_TH, data);
 
 	for (offset = 0; offset < num_channels; offset++) {
 		nlm_write_nae_reg(sc->base, NAE_TX_SCHED_MAP_CMD1,
 		    NAE_DRR_QUANTA);
 		data = (hwport << 15) | ((context_base + offset) << 5);
 		if (sc->cmplx_type[nblock] == ILC)
 			data |= (offset << 20);
 		nlm_write_nae_reg(sc->base, NAE_TX_SCHED_MAP_CMD0, data | 1);
 		nlm_write_nae_reg(sc->base, NAE_TX_SCHED_MAP_CMD0, data);
 	}
 }
 
 static int
 xlpnae_get_maxchannels(struct nlm_xlpnae_softc *sc)
 {
 	int maxchans = 0;
 	int i;
 
 	for (i = 0; i < sc->max_ports; i++) {
 		if (sc->portcfg[i].type == UNKNOWN)
 			continue;
 		maxchans += sc->portcfg[i].num_channels;
 	}
 
 	return (maxchans);
 }
 
 static void
 nlm_setup_interface(struct nlm_xlpnae_softc *sc, int nblock,
     int port, uint32_t cur_flow_base, uint32_t flow_mask,
     int max_channels, int context)
 {
 	uint64_t nae_base = sc->base;
 	int mtu = 1536;			/* XXXJC: don't hard code */
 	uint32_t ucore_mask;
 
 	if (sc->cmplx_type[nblock] == XAUIC)
 		nlm_config_xaui(nae_base, nblock, mtu,
 		    mtu, sc->portcfg[port].vlan_pri_en);
 	nlm_config_freein_fifo_uniq_cfg(nae_base,
 	    port, sc->portcfg[port].free_desc_sizes);
 	nlm_config_ucore_iface_mask_cfg(nae_base,
 	    port, sc->portcfg[port].ucore_mask);
 
 	nlm_program_flow_cfg(nae_base, port, cur_flow_base, flow_mask);
 
 	if (sc->cmplx_type[nblock] == SGMIIC)
 		nlm_configure_sgmii_interface(nae_base, nblock, port, mtu, 0);
 
 	nlm_config_egress(sc, nblock, context, port, max_channels);
 
 	nlm_nae_init_netior(nae_base, sc->nblocks);
 	nlm_nae_open_if(nae_base, nblock, sc->cmplx_type[nblock], port,
 	    sc->portcfg[port].free_desc_sizes);
 
 	/*  XXXJC: check mask calculation */
 	ucore_mask = (1 << sc->nucores) - 1;
 	nlm_nae_init_ucore(nae_base, port, ucore_mask);
 }
 
 static void
 nlm_setup_interfaces(struct nlm_xlpnae_softc *sc)
 {
 	uint64_t nae_base;
 	uint32_t cur_slot, cur_slot_base;
 	uint32_t cur_flow_base, port, flow_mask;
 	int max_channels;
 	int i, context;
 
 	cur_slot = 0;
 	cur_slot_base = 0;
 	cur_flow_base = 0;
 	nae_base = sc->base;
 	flow_mask = nlm_get_flow_mask(sc->total_num_ports);
 	/* calculate max_channels */
 	max_channels = xlpnae_get_maxchannels(sc);
 
 	port = 0;
 	context = 0;
 	for (i = 0; i < sc->max_ports; i++) {
 		if (sc->portcfg[i].type == UNKNOWN)
 			continue;
 		nlm_setup_interface(sc, sc->portcfg[i].block, i, cur_flow_base,
 		    flow_mask, max_channels, context);
 		cur_flow_base += sc->per_port_num_flows;
 		context += sc->portcfg[i].num_channels;
 	}
 }
 
 static void
 nlm_xlpnae_init(int node, struct nlm_xlpnae_softc *sc)
 {
 	uint64_t nae_base;
 	uint32_t ucoremask = 0;
 	uint32_t val;
 	int i;
 
 	nae_base = sc->base;
 
 	nlm_nae_flush_free_fifo(nae_base, sc->nblocks);
 	nlm_deflate_frin_fifo_carving(nae_base, sc->max_ports);
 	nlm_reset_nae(node);
 
 	for (i = 0; i < sc->nucores; i++)	/* XXXJC: code repeated below */
 		ucoremask |= (0x1 << i);
 	printf("Loading 0x%x ucores with microcode\n", ucoremask);
 	nlm_ucore_load_all(nae_base, ucoremask, 1);
 
 	val = nlm_set_device_frequency(node, DFS_DEVICE_NAE, sc->freq);
 	printf("Setup NAE frequency to %dMHz\n", val);
 
 	nlm_mdio_reset_all(nae_base);
 
 	printf("Initialze SGMII PCS for blocks 0x%x\n", sc->sgmiimask);
 	nlm_sgmii_pcs_init(nae_base, sc->sgmiimask);
 
 	printf("Initialze XAUI PCS for blocks 0x%x\n", sc->xauimask);
 	nlm_xaui_pcs_init(nae_base, sc->xauimask);
 
 	/* clear NETIOR soft reset */
 	nlm_write_nae_reg(nae_base, NAE_LANE_CFG_SOFTRESET, 0x0);
 
 	/* Disable RX enable bit in RX_CONFIG */
 	val = nlm_read_nae_reg(nae_base, NAE_RX_CONFIG);
 	val &= 0xfffffffe;
 	nlm_write_nae_reg(nae_base, NAE_RX_CONFIG, val);
 
 	if (nlm_is_xlp8xx_ax() == 0) {
 		val = nlm_read_nae_reg(nae_base, NAE_TX_CONFIG);
 		val &= ~(1 << 3);
 		nlm_write_nae_reg(nae_base, NAE_TX_CONFIG, val);
 	}
 
 	nlm_setup_poe_class_config(nae_base, MAX_POE_CLASSES,
 	    sc->ncontexts, poe_cl_tbl);
 
 	nlm_setup_vfbid_mapping(nae_base);
 
 	nlm_setup_flow_crc_poly(nae_base, sc->flow_crc_poly);
 
 	nlm_setup_rx_cal_cfg(nae_base, sc->max_ports, sc->portcfg);
 	/* note: xlp8xx Ax does not have Tx Calendering */
 	if (!nlm_is_xlp8xx_ax())
 		nlm_setup_tx_cal_cfg(nae_base, sc->max_ports, sc->portcfg);
 
 	nlm_setup_interfaces(sc);
 	nlm_config_poe(sc->poe_base, sc->poedv_base);
 
 	if (sc->hw_parser_en)
 		nlm_enable_hardware_parser(nae_base);
 
 	if (sc->prepad_en)
 		nlm_prepad_enable(nae_base, sc->prepad_size);
 
 	if (sc->ieee_1588_en)
 		nlm_setup_1588_timer(sc->base, sc->portcfg);
 }
 
 static void
 nlm_xlpnae_update_pde(void *dummy __unused)
 {
 	struct nlm_xlpnae_softc *sc;
 	uint32_t dv[NUM_WORDS_PER_DV];
 	device_t dev;
 	int vec;
 
 	dev = devclass_get_device(devclass_find("xlpnae"), 0);
 	sc = device_get_softc(dev);
 
 	nlm_write_poe_reg(sc->poe_base, POE_DISTR_EN, 0);
 	for (vec = 0; vec < NUM_DIST_VEC; vec++) {
 		if (nlm_get_poe_distvec(vec, dv) != 0)
 			continue;
 
 		nlm_write_poe_distvec(sc->poedv_base, vec, dv);
 	}
 	nlm_write_poe_reg(sc->poe_base, POE_DISTR_EN, 1);
 }
 
 SYSINIT(nlm_xlpnae_update_pde, SI_SUB_SMP, SI_ORDER_ANY,
     nlm_xlpnae_update_pde, NULL);
 
 /* configuration common for sgmii, xaui, ilaken goes here */
 static void
 nlm_setup_portcfg(struct nlm_xlpnae_softc *sc, struct xlp_nae_ivars *naep,
     int block, int port)
 {
 	int i;
 	uint32_t ucore_mask = 0;
 	struct xlp_block_ivars *bp;
 	struct xlp_port_ivars *p;
 
 	bp = &(naep->block_ivars[block]);
 	p  = &(bp->port_ivars[port & 0x3]);
 
 	sc->portcfg[port].node = p->node;
 	sc->portcfg[port].block = p->block;
 	sc->portcfg[port].port = p->port;
 	sc->portcfg[port].type = p->type;
 	sc->portcfg[port].mdio_bus = p->mdio_bus;
 	sc->portcfg[port].phy_addr = p->phy_addr;
 	sc->portcfg[port].loopback_mode = p->loopback_mode;
 	sc->portcfg[port].num_channels = p->num_channels;
 	if (p->free_desc_sizes != MCLBYTES) {
 		printf("[%d, %d] Error: free_desc_sizes %d != %d\n",
 		    block, port, p->free_desc_sizes, MCLBYTES);
 		return;
 	}
 	sc->portcfg[port].free_desc_sizes = p->free_desc_sizes;
 	for (i = 0; i < sc->nucores; i++)	/* XXXJC: configure this */
 		ucore_mask |= (0x1 << i);
 	sc->portcfg[port].ucore_mask = ucore_mask;
 	sc->portcfg[port].vlan_pri_en = p->vlan_pri_en;
 	sc->portcfg[port].num_free_descs = p->num_free_descs;
 	sc->portcfg[port].iface_fifo_size = p->iface_fifo_size;
 	sc->portcfg[port].rxbuf_size = p->rxbuf_size;
 	sc->portcfg[port].rx_slots_reqd = p->rx_slots_reqd;
 	sc->portcfg[port].tx_slots_reqd = p->tx_slots_reqd;
 	sc->portcfg[port].pseq_fifo_size = p->pseq_fifo_size;
 
 	sc->portcfg[port].stg2_fifo_size = p->stg2_fifo_size;
 	sc->portcfg[port].eh_fifo_size = p->eh_fifo_size;
 	sc->portcfg[port].frout_fifo_size = p->frout_fifo_size;
 	sc->portcfg[port].ms_fifo_size = p->ms_fifo_size;
 	sc->portcfg[port].pkt_fifo_size = p->pkt_fifo_size;
 	sc->portcfg[port].pktlen_fifo_size = p->pktlen_fifo_size;
 	sc->portcfg[port].max_stg2_offset = p->max_stg2_offset;
 	sc->portcfg[port].max_eh_offset = p->max_eh_offset;
 	sc->portcfg[port].max_frout_offset = p->max_frout_offset;
 	sc->portcfg[port].max_ms_offset = p->max_ms_offset;
 	sc->portcfg[port].max_pmem_offset = p->max_pmem_offset;
 	sc->portcfg[port].stg1_2_credit = p->stg1_2_credit;
 	sc->portcfg[port].stg2_eh_credit = p->stg2_eh_credit;
 	sc->portcfg[port].stg2_frout_credit = p->stg2_frout_credit;
 	sc->portcfg[port].stg2_ms_credit = p->stg2_ms_credit;
 	sc->portcfg[port].ieee1588_inc_intg = p->ieee1588_inc_intg;
 	sc->portcfg[port].ieee1588_inc_den = p->ieee1588_inc_den;
 	sc->portcfg[port].ieee1588_inc_num = p->ieee1588_inc_num;
 	sc->portcfg[port].ieee1588_userval = p->ieee1588_userval;
 	sc->portcfg[port].ieee1588_ptpoff = p->ieee1588_ptpoff;
 	sc->portcfg[port].ieee1588_tmr1 = p->ieee1588_tmr1;
 	sc->portcfg[port].ieee1588_tmr2 = p->ieee1588_tmr2;
 	sc->portcfg[port].ieee1588_tmr3 = p->ieee1588_tmr3;
 
 	sc->total_free_desc += sc->portcfg[port].free_desc_sizes;
 	sc->total_num_ports++;
 }
 
 static int
 nlm_xlpnae_attach(device_t dev)
 {
 	struct xlp_nae_ivars	*nae_ivars;
 	struct nlm_xlpnae_softc *sc;
 	device_t tmpd;
 	uint32_t dv[NUM_WORDS_PER_DV];
 	int port, i, j, nchan, nblock, node, qstart, qnum;
 	int offset, context, txq_base, rxvcbase;
 	uint64_t poe_pcibase, nae_pcibase;
 
 	node = pci_get_slot(dev) / 8;
 	nae_ivars = &xlp_board_info.nodes[node].nae_ivars;
 
 	sc = device_get_softc(dev);
 	sc->xlpnae_dev = dev;
 	sc->node = nae_ivars->node;
 	sc->base = nlm_get_nae_regbase(sc->node);
 	sc->poe_base = nlm_get_poe_regbase(sc->node);
 	sc->poedv_base = nlm_get_poedv_regbase(sc->node);
 	sc->portcfg = nae_port_config;
 	sc->blockmask = nae_ivars->blockmask;
 	sc->ilmask = nae_ivars->ilmask;
 	sc->xauimask = nae_ivars->xauimask;
 	sc->sgmiimask = nae_ivars->sgmiimask;
 	sc->nblocks = nae_ivars->nblocks;
 	sc->freq = nae_ivars->freq;
 
 	/* flow table generation is done by CRC16 polynomial */
 	sc->flow_crc_poly = nae_ivars->flow_crc_poly;
 
 	sc->hw_parser_en = nae_ivars->hw_parser_en;
 	sc->prepad_en = nae_ivars->prepad_en;
 	sc->prepad_size = nae_ivars->prepad_size;
 	sc->ieee_1588_en = nae_ivars->ieee_1588_en;
 
 	nae_pcibase = nlm_get_nae_pcibase(sc->node);
 	sc->ncontexts = nlm_read_reg(nae_pcibase, XLP_PCI_DEVINFO_REG5);
 	sc->nucores = nlm_num_uengines(nae_pcibase);
 
 	for (nblock = 0; nblock < sc->nblocks; nblock++) {
 		sc->cmplx_type[nblock] = nae_ivars->block_ivars[nblock].type;
 		sc->portmask[nblock] = nae_ivars->block_ivars[nblock].portmask;
 	}
 
 	for (i = 0; i < sc->ncontexts; i++)
 		cntx2port[i] = 18;	/* 18 is an invalid port */
 
 	if (sc->nblocks == 5)
 		sc->max_ports = 18;	/* 8xx has a block 4 with 2 ports */
 	else
 		sc->max_ports = sc->nblocks * PORTS_PER_CMPLX;
 
 	for (i = 0; i < sc->max_ports; i++)
 		sc->portcfg[i].type = UNKNOWN; /* Port Not Present */
 	/*
 	 * Now setup all internal fifo carvings based on
 	 * total number of ports in the system
 	 */
 	sc->total_free_desc = 0;
 	sc->total_num_ports = 0;
 	port = 0;
 	context = 0;
 	txq_base = nlm_qidstart(nae_pcibase);
 	rxvcbase = txq_base + sc->ncontexts;
 	for (i = 0; i < sc->nblocks; i++) {
 		uint32_t portmask;
 
 		if ((nae_ivars->blockmask & (1 << i)) == 0) {
 			port += 4;
 			continue;
 		}
 		portmask = nae_ivars->block_ivars[i].portmask;
 		for (j = 0; j < PORTS_PER_CMPLX; j++, port++) {
 			if ((portmask & (1 << j)) == 0)
 				continue;
 			nlm_setup_portcfg(sc, nae_ivars, i, port);
 			nchan = sc->portcfg[port].num_channels;
 			for (offset = 0; offset < nchan; offset++)
 				cntx2port[context + offset] = port;
 			sc->portcfg[port].txq = txq_base + context;
 			sc->portcfg[port].rxfreeq = rxvcbase + port;
 			context += nchan;
 		}
 	}
 
 	poe_pcibase = nlm_get_poe_pcibase(sc->node);
 	sc->per_port_num_flows =
 	    nlm_poe_max_flows(poe_pcibase) / sc->total_num_ports;
 
 	/* zone for P2P descriptors */
 	nl_tx_desc_zone = uma_zcreate("NL Tx Desc",
 	    sizeof(struct xlpge_tx_desc), NULL, NULL, NULL, NULL,
 	    NAE_CACHELINE_SIZE, 0);
 
 	/* NAE FMN messages have CMS src station id's in the
 	 * range of qstart to qnum.
 	 */
 	qstart = nlm_qidstart(nae_pcibase);
 	qnum = nlm_qnum(nae_pcibase);
 	if (register_msgring_handler(qstart, qstart + qnum - 1,
 	    nlm_xlpge_msgring_handler, sc)) {
 		panic("Couldn't register NAE msgring handler\n");
 	}
 
 	/* POE FMN messages have CMS src station id's in the
 	 * range of qstart to qnum.
 	 */
 	qstart = nlm_qidstart(poe_pcibase);
 	qnum = nlm_qnum(poe_pcibase);
 	if (register_msgring_handler(qstart, qstart + qnum - 1,
 	    nlm_xlpge_msgring_handler, sc)) {
 		panic("Couldn't register POE msgring handler\n");
 	}
 
 	nlm_xlpnae_init(node, sc);
 
 	for (i = 0; i < sc->max_ports; i++) {
 		char desc[32];
 		int block, port;
 
 		if (sc->portcfg[i].type == UNKNOWN)
 			continue;
 		block = sc->portcfg[i].block;
 		port = sc->portcfg[i].port;
 		tmpd = device_add_child(dev, "xlpge", i);
 		device_set_ivars(tmpd,
 		    &(nae_ivars->block_ivars[block].port_ivars[port]));
 		sprintf(desc, "XLP NAE Port %d,%d", block, port);
 		device_set_desc_copy(tmpd, desc);
 	}
 	nlm_setup_iface_fifo_cfg(sc->base, sc->max_ports, sc->portcfg);
 	nlm_setup_rx_base_config(sc->base, sc->max_ports, sc->portcfg);
 	nlm_setup_rx_buf_config(sc->base, sc->max_ports, sc->portcfg);
 	nlm_setup_freein_fifo_cfg(sc->base, sc->portcfg);
 	nlm_program_nae_parser_seq_fifo(sc->base, sc->max_ports, sc->portcfg);
 
 	nlm_xlpnae_print_frin_desc_carving(sc);
 	bus_generic_probe(dev);
 	bus_generic_attach(dev);
 
 	/*
 	 * Enable only boot cpu at this point, full distribution comes
 	 * only after SMP is started
 	 */
 	nlm_write_poe_reg(sc->poe_base, POE_DISTR_EN, 0);
 	nlm_calc_poe_distvec(0x1, 0, 0, 0, 0x1 << XLPGE_RX_VC, dv);
 	nlm_write_poe_distvec(sc->poedv_base, 0, dv);
 	nlm_write_poe_reg(sc->poe_base, POE_DISTR_EN, 1);
 
 	return (0);
 }
 
 static int
 nlm_xlpnae_detach(device_t dev)
 {
 	/*  TODO - free zone here */
 	return (0);
 }
 
 static int
 nlm_xlpnae_suspend(device_t dev)
 {
 	return (0);
 }
 
 static int
 nlm_xlpnae_resume(device_t dev)
 {
 	return (0);
 }
 
 static int
 nlm_xlpnae_shutdown(device_t dev)
 {
 	return (0);
 }
 
 /*
  * xlpge driver implementation
  */
 
 static void
 nlm_xlpge_mac_set_rx_mode(struct nlm_xlpge_softc *sc)
 {
 	if (sc->if_flags & IFF_PROMISC) {
 		if (sc->type == SGMIIC)
 			nlm_nae_setup_rx_mode_sgmii(sc->base_addr,
 			    sc->block, sc->port, sc->type, 1 /* broadcast */,
 			    1/* multicast */, 0 /* pause */, 1 /* promisc */);
 		else
 			nlm_nae_setup_rx_mode_xaui(sc->base_addr,
 			    sc->block, sc->port, sc->type, 1 /* broadcast */,
 			    1/* multicast */, 0 /* pause */, 1 /* promisc */);
 	} else {
 		if (sc->type == SGMIIC)
 			nlm_nae_setup_rx_mode_sgmii(sc->base_addr,
 			    sc->block, sc->port, sc->type, 1 /* broadcast */,
 			    1/* multicast */, 0 /* pause */, 0 /* promisc */);
 		else
 			nlm_nae_setup_rx_mode_xaui(sc->base_addr,
 			    sc->block, sc->port, sc->type, 1 /* broadcast */,
 			    1/* multicast */, 0 /* pause */, 0 /* promisc */);
 	}
 }
 
 static int
 nlm_xlpge_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
 {
 	struct mii_data		*mii;
 	struct nlm_xlpge_softc	*sc;
 	struct ifreq		*ifr;
 	int			error;
 
 	sc = ifp->if_softc;
 	error = 0;
 	ifr = (struct ifreq *)data;
 
 	switch (command) {
 	case SIOCSIFFLAGS:
 		XLPGE_LOCK(sc);
 		sc->if_flags = ifp->if_flags;
 		if (ifp->if_flags & IFF_UP) {
 			if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
 				nlm_xlpge_init(sc);
 			else
 				nlm_xlpge_port_enable(sc);
 			nlm_xlpge_mac_set_rx_mode(sc);
 			sc->link = NLM_LINK_UP;
 		} else {
 			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
 				nlm_xlpge_port_disable(sc);
 			sc->link = NLM_LINK_DOWN;
 		}
 		XLPGE_UNLOCK(sc);
 		error = 0;
 		break;
 	case SIOCGIFMEDIA:
 	case SIOCSIFMEDIA:
 		if (sc->mii_bus != NULL) {
 			mii = device_get_softc(sc->mii_bus);
 			error = ifmedia_ioctl(ifp, ifr, &mii->mii_media,
 			    command);
 		}
 		break;
 	default:
 		error = ether_ioctl(ifp, command, data);
 		break;
 	}
 
 	return (error);
 }
 
 static int
 xlpge_tx(struct ifnet *ifp, struct mbuf *mbuf_chain)
 {
 	struct nlm_fmn_msg	msg;
 	struct xlpge_tx_desc	*p2p;
 	struct nlm_xlpge_softc	*sc;
 	struct mbuf	*m;
 	vm_paddr_t      paddr;
 	int		fbid, dst, pos, err;
 	int		ret = 0, tx_msgstatus, retries;
 
 	err = 0;
 	if (mbuf_chain == NULL)
 		return (0);
 
 	sc = ifp->if_softc;
 	p2p = NULL;
 	if (!(ifp->if_drv_flags & IFF_DRV_RUNNING) ||
 	    ifp->if_drv_flags & IFF_DRV_OACTIVE) {
 		err = ENXIO;
 		goto fail;
 	}
 
 	/* free a few in coming messages on the fb vc */
 	xlp_handle_msg_vc(1 << XLPGE_FB_VC, 2);
 
 	/* vfb id table is setup to map cpu to vc 3 of the cpu */
 	fbid = nlm_cpuid();
 	dst = sc->txq;
 
 	pos = 0;
 	p2p = uma_zalloc(nl_tx_desc_zone, M_NOWAIT);
 	if (p2p == NULL) {
 		printf("alloc fail\n");
 		err = ENOBUFS;
 		goto fail;
 	}
 
 	for (m = mbuf_chain; m != NULL; m = m->m_next) {
 		vm_offset_t buf = (vm_offset_t) m->m_data;
 		int	len = m->m_len;
 		int	frag_sz;
 		uint64_t desc;
 
 		/*printf("m_data = %p len %d\n", m->m_data, len); */
 		while (len) {
 			if (pos == XLP_NTXFRAGS - 3) {
 				device_printf(sc->xlpge_dev,
 				    "packet defrag %d\n",
 				    m_length(mbuf_chain, NULL));
 				err = ENOBUFS; /* TODO fix error */
 				goto fail;
 			}
 			paddr = vtophys(buf);
 			frag_sz = PAGE_SIZE - (buf & PAGE_MASK);
 			if (len < frag_sz)
 				frag_sz = len;
 			desc = nae_tx_desc(P2D_NEOP, 0, 127,
 			    frag_sz, paddr);
 			p2p->frag[pos] = htobe64(desc);
 			pos++;
 			len -= frag_sz;
 			buf += frag_sz;
 		}
 	}
 
 	KASSERT(pos != 0, ("Zero-length mbuf chain?\n"));
 
 	/* Make the last one P2D EOP */
 	p2p->frag[pos-1] |= htobe64((uint64_t)P2D_EOP << 62);
 
 	/* stash useful pointers in the desc */
 	p2p->frag[XLP_NTXFRAGS-3] = 0xf00bad;
 	p2p->frag[XLP_NTXFRAGS-2] = (uintptr_t)p2p;
 	p2p->frag[XLP_NTXFRAGS-1] = (uintptr_t)mbuf_chain;
 
 	paddr = vtophys(p2p);
 	msg.msg[0] = nae_tx_desc(P2P, 0, fbid, pos, paddr);
 
 	for (retries = 16;  retries > 0; retries--) {
 		ret = nlm_fmn_msgsend(dst, 1, FMN_SWCODE_NAE, &msg);
 		if (ret == 0)
 			return (0);
 	}
 
 fail:
 	if (ret != 0) {
 		tx_msgstatus = nlm_read_c2_txmsgstatus();
 		if ((tx_msgstatus >> 24) & 0x1)
 			device_printf(sc->xlpge_dev, "Transmit queue full - ");
 		if ((tx_msgstatus >> 3) & 0x1)
 			device_printf(sc->xlpge_dev, "ECC error - ");
 		if ((tx_msgstatus >> 2) & 0x1)
 			device_printf(sc->xlpge_dev, "Pending Sync - ");
 		if ((tx_msgstatus >> 1) & 0x1)
 			device_printf(sc->xlpge_dev,
 			    "Insufficient input queue credits - ");
 		if (tx_msgstatus & 0x1)
 			device_printf(sc->xlpge_dev,
 			    "Insufficient output queue credits - ");
 	}
 	device_printf(sc->xlpge_dev, "Send failed! err = %d\n", err);
 	if (p2p)
 		uma_zfree(nl_tx_desc_zone, p2p);
 	m_freem(mbuf_chain);
 	if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1);
 	return (err);
 }
 
 static int
 nlm_xlpge_gmac_config_speed(struct nlm_xlpge_softc *sc)
 {
 	struct mii_data *mii;
 
 	if (sc->type == XAUIC || sc->type == ILC)
 		return (0);
 
 	if (sc->mii_bus) {
 		mii = device_get_softc(sc->mii_bus);
 		mii_pollstat(mii);
 	}
 
 	return (0);
 }
 
 static void
 nlm_xlpge_port_disable(struct nlm_xlpge_softc *sc)
 {
 	struct ifnet   *ifp;
 
 	ifp = sc->xlpge_if;
 	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
 
 	callout_stop(&sc->xlpge_callout);
 	nlm_mac_disable(sc->base_addr, sc->block, sc->type, sc->port);
 }
 
 static void
 nlm_mii_pollstat(void *arg)
 {
 	struct nlm_xlpge_softc *sc = (struct nlm_xlpge_softc *)arg;
 	struct mii_data *mii = NULL;
 
 	if (sc->mii_bus) {
 		mii = device_get_softc(sc->mii_bus);
 
 		KASSERT(mii != NULL, ("mii ptr is NULL"));
 
 		mii_pollstat(mii);
 
 		callout_reset(&sc->xlpge_callout, hz,
 		    nlm_mii_pollstat, sc);
 	}
 }
 
 static void
 nlm_xlpge_port_enable(struct nlm_xlpge_softc *sc)
 {
 	if ((sc->type != SGMIIC) && (sc->type != XAUIC))
 		return;
 	nlm_mac_enable(sc->base_addr, sc->block, sc->type, sc->port);
 	nlm_mii_pollstat((void *)sc);
 }
 
 static void
 nlm_xlpge_init(void *addr)
 {
 	struct nlm_xlpge_softc *sc;
 	struct ifnet   *ifp;
 	struct mii_data *mii = NULL;
 
 	sc = (struct nlm_xlpge_softc *)addr;
 	ifp = sc->xlpge_if;
 
 	if (ifp->if_drv_flags & IFF_DRV_RUNNING)
 		return;
 
 	if (sc->mii_bus) {
 		mii = device_get_softc(sc->mii_bus);
 		mii_mediachg(mii);
 	}
 
 	nlm_xlpge_gmac_config_speed(sc);
 	ifp->if_drv_flags |= IFF_DRV_RUNNING;
 	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
 	nlm_xlpge_port_enable(sc);
 
 	/* start the callout */
 	callout_reset(&sc->xlpge_callout, hz, nlm_mii_pollstat, sc);
 }
 
 /*
  * Read the MAC address from FDT or board eeprom.
  */
 static void
 xlpge_read_mac_addr(struct nlm_xlpge_softc *sc)
 {
 
 	xlpge_get_macaddr(sc->dev_addr);
 	/* last octet is port specific */
 	sc->dev_addr[5] += (sc->block * 4) + sc->port;
 
 	if (sc->type == SGMIIC)
 		nlm_nae_setup_mac_addr_sgmii(sc->base_addr, sc->block,
 		    sc->port, sc->type, sc->dev_addr);
 	else if (sc->type == XAUIC)
 		nlm_nae_setup_mac_addr_xaui(sc->base_addr, sc->block,
 		    sc->port, sc->type, sc->dev_addr);
 }
 
 static int
 xlpge_mediachange(struct ifnet *ifp)
 {
 	return (0);
 }
 
 static void
 xlpge_mediastatus(struct ifnet *ifp, struct ifmediareq *ifmr)
 {
 	struct nlm_xlpge_softc *sc;
 	struct mii_data *md;
 
 	md = NULL;
 	sc = ifp->if_softc;
 
 	if (sc->mii_bus)
 		md = device_get_softc(sc->mii_bus);
 
 	ifmr->ifm_status = IFM_AVALID;
 	ifmr->ifm_active = IFM_ETHER;
 
 	if (sc->link == NLM_LINK_DOWN)
 		return;
 
 	if (md != NULL)
 		ifmr->ifm_active = md->mii_media.ifm_cur->ifm_media;
 	ifmr->ifm_status |= IFM_ACTIVE;
 }
 
 static int
 nlm_xlpge_ifinit(struct nlm_xlpge_softc *sc)
 {
 	struct ifnet *ifp;
 	device_t dev;
 	int port = sc->block * 4 + sc->port;
 
 	dev = sc->xlpge_dev;
 	ifp = sc->xlpge_if = if_alloc(IFT_ETHER);
 	/*(sc->network_sc)->ifp_ports[port].xlpge_if = ifp;*/
 	ifp_ports[port].xlpge_if = ifp;
 
 	if (ifp == NULL) {
 		device_printf(dev, "cannot if_alloc()\n");
 		return (ENOSPC);
 	}
 	ifp->if_softc = sc;
 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
 	sc->if_flags = ifp->if_flags;
 	/*ifp->if_capabilities = IFCAP_TXCSUM | IFCAP_VLAN_HWTAGGING;*/
 	ifp->if_capabilities = 0;
 	ifp->if_capenable = ifp->if_capabilities;
 	ifp->if_ioctl = nlm_xlpge_ioctl;
 	ifp->if_init  = nlm_xlpge_init ;
 	ifp->if_hwassist = 0;
 	ifp->if_snd.ifq_drv_maxlen = NLM_XLPGE_TXQ_SIZE; /* TODO: make this a sysint */
 	IFQ_SET_MAXLEN(&ifp->if_snd, ifp->if_snd.ifq_drv_maxlen);
 	IFQ_SET_READY(&ifp->if_snd);
 
 	ifmedia_init(&sc->xlpge_mii.mii_media, 0, xlpge_mediachange,
 	    xlpge_mediastatus);
 	ifmedia_add(&sc->xlpge_mii.mii_media, IFM_ETHER | IFM_AUTO, 0, NULL);
 	ifmedia_set(&sc->xlpge_mii.mii_media, IFM_ETHER | IFM_AUTO);
 	sc->xlpge_mii.mii_media.ifm_media =
 	    sc->xlpge_mii.mii_media.ifm_cur->ifm_media;
 	xlpge_read_mac_addr(sc);
 
 	ether_ifattach(ifp, sc->dev_addr);
 
 	/* override if_transmit : per ifnet(9), do it after if_attach */
 	ifp->if_transmit = xlpge_tx;
 
 	return (0);
 }
 
 static int
 nlm_xlpge_probe(device_t dev)
 {
 	return (BUS_PROBE_DEFAULT);
 }
 
 static void *
 get_buf(void)
 {
 	struct mbuf     *m_new;
 	uint64_t        *md;
 #ifdef INVARIANTS
 	vm_paddr_t      temp1, temp2;
 #endif
 
 	if ((m_new = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR)) == NULL)
 		return (NULL);
 	m_new->m_len = m_new->m_pkthdr.len = MCLBYTES;
 	KASSERT(((uintptr_t)m_new->m_data & (NAE_CACHELINE_SIZE - 1)) == 0,
 	    ("m_new->m_data is not cacheline aligned"));
 	md = (uint64_t *)m_new->m_data;
 	md[0] = (intptr_t)m_new;        /* Back Ptr */
 	md[1] = 0xf00bad;
 	m_adj(m_new, NAE_CACHELINE_SIZE);
 
 #ifdef INVARIANTS
 	temp1 = vtophys((vm_offset_t) m_new->m_data);
 	temp2 = vtophys((vm_offset_t) m_new->m_data + 1536);
 	KASSERT((temp1 + 1536) == temp2,
 	    ("Alloced buffer is not contiguous"));
 #endif
 	return ((void *)m_new->m_data);
 }
 
 static void
 nlm_xlpge_mii_init(device_t dev, struct nlm_xlpge_softc *sc)
 {
 	int error;
 
 	error = mii_attach(dev, &sc->mii_bus, sc->xlpge_if,
 			xlpge_mediachange, xlpge_mediastatus,
 			BMSR_DEFCAPMASK, sc->phy_addr, MII_OFFSET_ANY, 0);
 
 	if (error) {
 		device_printf(dev, "attaching PHYs failed\n");
 		sc->mii_bus = NULL;
 	}
 
 	if (sc->mii_bus != NULL) {
 		/* enable MDIO interrupts in the PHY */
 		/* XXXJC: TODO */
 	}
 }
 
 static int
 xlpge_stats_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	struct nlm_xlpge_softc *sc;
 	uint32_t val;
 	int reg, field;
 
 	sc = arg1;
 	field = arg2;
 	reg = SGMII_STATS_MLR(sc->block, sc->port) + field;
 	val = nlm_read_nae_reg(sc->base_addr, reg);
 	return (sysctl_handle_int(oidp, &val, 0, req));
 }
 
 static void
 nlm_xlpge_setup_stats_sysctl(device_t dev, struct nlm_xlpge_softc *sc)
 {
 	struct sysctl_ctx_list *ctx;
 	struct sysctl_oid_list *child;
 	struct sysctl_oid *tree;
 
 	ctx = device_get_sysctl_ctx(dev);
 	tree = device_get_sysctl_tree(dev);
 	child = SYSCTL_CHILDREN(tree);
 
 #define XLPGE_STAT(name, offset, desc)				\
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, name,		\
 	    CTLTYPE_UINT | CTLFLAG_RD | CTLFLAG_NEEDGIANT,	\
 	    sc, offset,	xlpge_stats_sysctl, "IU", desc)
 
 	XLPGE_STAT("tr127", nlm_sgmii_stats_tr127, "TxRx 64 - 127 Bytes");
 	XLPGE_STAT("tr255", nlm_sgmii_stats_tr255, "TxRx 128 - 255 Bytes");
 	XLPGE_STAT("tr511", nlm_sgmii_stats_tr511, "TxRx 256 - 511 Bytes");
 	XLPGE_STAT("tr1k",  nlm_sgmii_stats_tr1k,  "TxRx 512 - 1023 Bytes");
 	XLPGE_STAT("trmax", nlm_sgmii_stats_trmax, "TxRx 1024 - 1518 Bytes");
 	XLPGE_STAT("trmgv", nlm_sgmii_stats_trmgv, "TxRx 1519 - 1522 Bytes");
 
 	XLPGE_STAT("rbyt", nlm_sgmii_stats_rbyt, "Rx Bytes");
 	XLPGE_STAT("rpkt", nlm_sgmii_stats_rpkt, "Rx Packets");
 	XLPGE_STAT("rfcs", nlm_sgmii_stats_rfcs, "Rx FCS Error");
 	XLPGE_STAT("rmca", nlm_sgmii_stats_rmca, "Rx Multicast Packets");
 	XLPGE_STAT("rbca", nlm_sgmii_stats_rbca, "Rx Broadcast Packets");
 	XLPGE_STAT("rxcf", nlm_sgmii_stats_rxcf, "Rx Control Frames");
 	XLPGE_STAT("rxpf", nlm_sgmii_stats_rxpf, "Rx Pause Frames");
 	XLPGE_STAT("rxuo", nlm_sgmii_stats_rxuo, "Rx Unknown Opcode");
 	XLPGE_STAT("raln", nlm_sgmii_stats_raln, "Rx Alignment Errors");
 	XLPGE_STAT("rflr", nlm_sgmii_stats_rflr, "Rx Framelength Errors");
 	XLPGE_STAT("rcde", nlm_sgmii_stats_rcde, "Rx Code Errors");
 	XLPGE_STAT("rcse", nlm_sgmii_stats_rcse, "Rx Carrier Sense Errors");
 	XLPGE_STAT("rund", nlm_sgmii_stats_rund, "Rx Undersize Packet Errors");
 	XLPGE_STAT("rovr", nlm_sgmii_stats_rovr, "Rx Oversize Packet Errors");
 	XLPGE_STAT("rfrg", nlm_sgmii_stats_rfrg, "Rx Fragments");
 	XLPGE_STAT("rjbr", nlm_sgmii_stats_rjbr, "Rx Jabber");
 
 	XLPGE_STAT("tbyt", nlm_sgmii_stats_tbyt, "Tx Bytes");
 	XLPGE_STAT("tpkt", nlm_sgmii_stats_tpkt, "Tx Packets");
 	XLPGE_STAT("tmca", nlm_sgmii_stats_tmca, "Tx Multicast Packets");
 	XLPGE_STAT("tbca", nlm_sgmii_stats_tbca, "Tx Broadcast Packets");
 	XLPGE_STAT("txpf", nlm_sgmii_stats_txpf, "Tx Pause Frame");
 	XLPGE_STAT("tdfr", nlm_sgmii_stats_tdfr, "Tx Deferral Packets");
 	XLPGE_STAT("tedf", nlm_sgmii_stats_tedf, "Tx Excessive Deferral Pkts");
 	XLPGE_STAT("tscl", nlm_sgmii_stats_tscl, "Tx Single Collisions");
 	XLPGE_STAT("tmcl", nlm_sgmii_stats_tmcl, "Tx Multiple Collisions");
 	XLPGE_STAT("tlcl", nlm_sgmii_stats_tlcl, "Tx Late Collision Pkts");
 	XLPGE_STAT("txcl", nlm_sgmii_stats_txcl, "Tx Excessive Collisions");
 	XLPGE_STAT("tncl", nlm_sgmii_stats_tncl, "Tx Total Collisions");
 	XLPGE_STAT("tjbr", nlm_sgmii_stats_tjbr, "Tx Jabber Frames");
 	XLPGE_STAT("tfcs", nlm_sgmii_stats_tfcs, "Tx FCS Errors");
 	XLPGE_STAT("txcf", nlm_sgmii_stats_txcf, "Tx Control Frames");
 	XLPGE_STAT("tovr", nlm_sgmii_stats_tovr, "Tx Oversize Frames");
 	XLPGE_STAT("tund", nlm_sgmii_stats_tund, "Tx Undersize Frames");
 	XLPGE_STAT("tfrg", nlm_sgmii_stats_tfrg, "Tx Fragments");
 #undef XLPGE_STAT
 }
 
 static int
 nlm_xlpge_attach(device_t dev)
 {
 	struct xlp_port_ivars *pv;
 	struct nlm_xlpge_softc *sc;
 	int port;
 
 	pv = device_get_ivars(dev);
 	sc = device_get_softc(dev);
 	sc->xlpge_dev = dev;
 	sc->mii_bus = NULL;
 	sc->block = pv->block;
 	sc->node = pv->node;
 	sc->port = pv->port;
 	sc->type = pv->type;
 	sc->xlpge_if = NULL;
 	sc->phy_addr = pv->phy_addr;
 	sc->mdio_bus = pv->mdio_bus;
 	sc->portcfg = nae_port_config;
 	sc->hw_parser_en = pv->hw_parser_en;
 
 	/* default settings */
 	sc->speed = NLM_SGMII_SPEED_10;
 	sc->duplexity = NLM_SGMII_DUPLEX_FULL;
 	sc->link = NLM_LINK_DOWN;
 	sc->flowctrl = NLM_FLOWCTRL_DISABLED;
 
 	sc->network_sc = device_get_softc(device_get_parent(dev));
 	sc->base_addr = sc->network_sc->base;
 	sc->prepad_en = sc->network_sc->prepad_en;
 	sc->prepad_size = sc->network_sc->prepad_size;
 
 	callout_init(&sc->xlpge_callout, 1);
 
 	XLPGE_LOCK_INIT(sc, device_get_nameunit(dev));
 
 	port = (sc->block*4)+sc->port;
 	sc->nfree_desc = nae_port_config[port].num_free_descs;
 	sc->txq = nae_port_config[port].txq;
 	sc->rxfreeq = nae_port_config[port].rxfreeq;
 
 	nlm_xlpge_submit_rx_free_desc(sc, sc->nfree_desc);
 	if (sc->hw_parser_en)
 		nlm_enable_hardware_parser_per_port(sc->base_addr,
 		    sc->block, sc->port);
 
 	nlm_xlpge_ifinit(sc);
 	ifp_ports[port].xlpge_sc = sc;
 	nlm_xlpge_mii_init(dev, sc);
 
 	nlm_xlpge_setup_stats_sysctl(dev, sc);
 
 	return (0);
 }
 
 static int
 nlm_xlpge_detach(device_t dev)
 {
 	return (0);
 }
 
 static int
 nlm_xlpge_suspend(device_t dev)
 {
 	return (0);
 }
 
 static int
 nlm_xlpge_resume(device_t dev)
 {
 	return (0);
 }
 
 static int
 nlm_xlpge_shutdown(device_t dev)
 {
 	return (0);
 }
 
 /*
  * miibus function with custom implementation
  */
 static int
 nlm_xlpge_mii_read(device_t dev, int phyaddr, int regidx)
 {
 	struct nlm_xlpge_softc *sc;
 	int val;
 
 	sc = device_get_softc(dev);
 	if (sc->type == SGMIIC)
 		val = nlm_gmac_mdio_read(sc->base_addr, sc->mdio_bus,
 		    BLOCK_7, LANE_CFG, phyaddr, regidx);
 	else
 		val = 0xffff;
 
 	return (val);
 }
 
 static int
 nlm_xlpge_mii_write(device_t dev, int phyaddr, int regidx, int val)
 {
 	struct nlm_xlpge_softc *sc;
 
 	sc = device_get_softc(dev);
 	if (sc->type == SGMIIC)
 		nlm_gmac_mdio_write(sc->base_addr, sc->mdio_bus, BLOCK_7,
 		    LANE_CFG, phyaddr, regidx, val);
 
 	return (0);
 }
 
 static void
 nlm_xlpge_mii_statchg(device_t dev)
 {
 	struct nlm_xlpge_softc *sc;
 	struct mii_data *mii;
 	char *speed, *duplexity;
 
 	sc = device_get_softc(dev);
 	if (sc->mii_bus == NULL)
 		return;
 
 	mii = device_get_softc(sc->mii_bus);
 	if (mii->mii_media_status & IFM_ACTIVE) {
 		if (IFM_SUBTYPE(mii->mii_media_active) ==  IFM_10_T) {
 			sc->speed = NLM_SGMII_SPEED_10;
 			speed =  "10Mbps";
 		} else if (IFM_SUBTYPE(mii->mii_media_active) == IFM_100_TX) {
 			sc->speed = NLM_SGMII_SPEED_100;
 			speed = "100Mbps";
 		} else { /* default to 1G */
 			sc->speed = NLM_SGMII_SPEED_1000;
 			speed =  "1Gbps";
 		}
 
 		if ((mii->mii_media_active & IFM_GMASK) == IFM_FDX) {
 			sc->duplexity = NLM_SGMII_DUPLEX_FULL;
 			duplexity =  "full";
 		} else {
 			sc->duplexity = NLM_SGMII_DUPLEX_HALF;
 			duplexity = "half";
 		}
 
 		printf("Port [%d, %d] setup with speed=%s duplex=%s\n",
 		    sc->block, sc->port, speed, duplexity);
 
 		nlm_nae_setup_mac(sc->base_addr, sc->block, sc->port, 0, 1, 1,
 		    sc->speed, sc->duplexity);
 	}
 }
 
 /*
  * xlpge support function implementations
  */
 static void
 nlm_xlpge_release_mbuf(uint64_t paddr)
 {
 	uint64_t	mag, desc, mbuf;
 
 	paddr += (XLP_NTXFRAGS - 3) * sizeof(uint64_t);
 	mag = nlm_paddr_ld(paddr);
 	desc = nlm_paddr_ld(paddr + sizeof(uint64_t));
 	mbuf = nlm_paddr_ld(paddr + 2 * sizeof(uint64_t));
 
 	if (mag != 0xf00bad) {
 		/* somebody else packet Error - FIXME in intialization */
 		printf("cpu %d: ERR Tx packet paddr %jx, mag %jx, desc %jx mbuf %jx\n",
 		    nlm_cpuid(), (uintmax_t)paddr, (uintmax_t)mag,
 		    (intmax_t)desc, (uintmax_t)mbuf);
 		return;
 	}
 	m_freem((struct mbuf *)(uintptr_t)mbuf);
 	uma_zfree(nl_tx_desc_zone, (void *)(uintptr_t)desc);
 }
 
 static void
 nlm_xlpge_rx(struct nlm_xlpge_softc *sc, int port, vm_paddr_t paddr, int len)
 {
 	struct ifnet	*ifp;
 	struct mbuf	*m;
 	vm_offset_t	temp;
 	unsigned long	mag;
 	int		prepad_size;
 
 	ifp = sc->xlpge_if;
 	temp = nlm_paddr_ld(paddr - NAE_CACHELINE_SIZE);
 	mag = nlm_paddr_ld(paddr - NAE_CACHELINE_SIZE + sizeof(uint64_t));
 
 	m = (struct mbuf *)(intptr_t)temp;
 	if (mag != 0xf00bad) {
 		/* somebody else packet Error - FIXME in intialization */
 		printf("cpu %d: ERR Rx packet paddr %jx, temp %p, mag %lx\n",
 		    nlm_cpuid(), (uintmax_t)paddr, (void *)temp, mag);
 		return;
 	}
 
 	m->m_pkthdr.rcvif = ifp;
 
 #ifdef DUMP_PACKET
 	{
 		int     i = 0, j = 64;
 		unsigned char *buf = (char *)m->m_data;
 		printf("(cpu_%d: nlge_rx, !RX_COPY) Rx Packet: length=%d\n",
 				nlm_cpuid(), len);
 		if (len < j)
 			j = len;
 		if (sc->prepad_en)
 			j += ((sc->prepad_size + 1) * 16);
 		for (i = 0; i < j; i++) {
 			if (i && (i % 16) == 0)
 				printf("\n");
 			printf("%02x ", buf[i]);
 		}
 		printf("\n");
 	}
 #endif
 
 	if (sc->prepad_en) {
 		prepad_size = ((sc->prepad_size + 1) * 16);
 		m->m_data += prepad_size;
 		m->m_pkthdr.len = m->m_len = (len - prepad_size);
 	} else
 		m->m_pkthdr.len = m->m_len = len;
 
 	if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
 #ifdef XLP_DRIVER_LOOPBACK
 	if (port == 16 || port == 17)
 		(*ifp->if_input)(ifp, m);
 	else
 		xlpge_tx(ifp, m);
 #else
 	(*ifp->if_input)(ifp, m);
 #endif
 }
 
 void
 nlm_xlpge_submit_rx_free_desc(struct nlm_xlpge_softc *sc, int num)
 {
 	int i, size, ret, n;
 	struct nlm_fmn_msg msg;
 	void *ptr;
 
 	for(i = 0; i < num; i++) {
 		memset(&msg, 0, sizeof(msg));
 		ptr = get_buf();
 		if (!ptr) {
 			device_printf(sc->xlpge_dev, "Cannot allocate mbuf\n");
 			break;
 		}
 
 		msg.msg[0] = vtophys(ptr);
 		if (msg.msg[0] == 0) {
 			printf("Bad ptr for %p\n", ptr);
 			break;
 		}
 		size = 1;
 
 		n = 0;
 		while (1) {
 			/* on success returns 1, else 0 */
 			ret = nlm_fmn_msgsend(sc->rxfreeq, size, 0, &msg);
 			if (ret == 0)
 				break;
 			if (n++ > 10000) {
 				printf("Too many credit fails for send free desc\n");
 				break;
 			}
 		}
 	}
 }
 
 void
 nlm_xlpge_msgring_handler(int vc, int size, int code, int src_id,
     struct nlm_fmn_msg *msg, void *data)
 {
 	uint64_t phys_addr;
 	struct nlm_xlpnae_softc *sc;
 	struct nlm_xlpge_softc *xlpge_sc;
 	struct ifnet *ifp;
 	uint32_t context;
 	uint32_t port = 0;
 	uint32_t length;
 
 	sc = (struct nlm_xlpnae_softc *)data;
 	KASSERT(sc != NULL, ("Null sc in msgring handler"));
 
 	if (size == 1) { /* process transmit complete */
 		phys_addr = msg->msg[0] & 0xffffffffffULL;
 
 		/* context is SGMII_RCV_CONTEXT_NUM + three bit vlan type
 		 * or vlan priority
 		 */
 		context = (msg->msg[0] >> 40) & 0x3fff;
 		port = cntx2port[context];
 
 		if (port >= XLP_MAX_PORTS) {
 			printf("%s:%d Bad port %d (context=%d)\n",
 				__func__, __LINE__, port, context);
 			return;
 		}
 		ifp = ifp_ports[port].xlpge_if;
 		xlpge_sc = ifp_ports[port].xlpge_sc;
 
 		nlm_xlpge_release_mbuf(phys_addr);
 
 		if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
 
 	} else if (size > 1) { /* Recieve packet */
 		phys_addr = msg->msg[1] & 0xffffffffc0ULL;
 		length = (msg->msg[1] >> 40) & 0x3fff;
 		length -= MAC_CRC_LEN;
 
 		/* context is SGMII_RCV_CONTEXT_NUM + three bit vlan type
 		 * or vlan priority
 		 */
 		context = (msg->msg[1] >> 54) & 0x3ff;
 		port = cntx2port[context];
 
 		if (port >= XLP_MAX_PORTS) {
 			printf("%s:%d Bad port %d (context=%d)\n",
 				__func__, __LINE__, port, context);
 			return;
 		}
 
 		ifp = ifp_ports[port].xlpge_if;
 		xlpge_sc = ifp_ports[port].xlpge_sc;
 
 		nlm_xlpge_rx(xlpge_sc, port, phys_addr, length);
 		/* return back a free descriptor to NA */
 		nlm_xlpge_submit_rx_free_desc(xlpge_sc, 1);
 	}
 }
diff --git a/sys/powerpc/aim/aim_machdep.c b/sys/powerpc/aim/aim_machdep.c
index 784207e9a70d..d582489d9f7e 100644
--- a/sys/powerpc/aim/aim_machdep.c
+++ b/sys/powerpc/aim/aim_machdep.c
@@ -1,790 +1,789 @@
 /*-
  * Copyright (C) 1995, 1996 Wolfgang Solfrank.
  * Copyright (C) 1995, 1996 TooLs GmbH.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *      This product includes software developed by TooLs GmbH.
  * 4. The name of TooLs GmbH may not be used to endorse or promote products
  *    derived from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 /*-
  * Copyright (C) 2001 Benno Rice
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY Benno Rice ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *	$NetBSD: machdep.c,v 1.74.2.1 2000/11/01 16:13:48 tv Exp $
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_ddb.h"
 #include "opt_kstack_pages.h"
 #include "opt_platform.h"
 
 #include <sys/endian.h>
 #include <sys/param.h>
 #include <sys/proc.h>
 #include <sys/systm.h>
 #include <sys/bio.h>
 #include <sys/buf.h>
 #include <sys/bus.h>
 #include <sys/cons.h>
 #include <sys/cpu.h>
 #include <sys/eventhandler.h>
 #include <sys/exec.h>
 #include <sys/imgact.h>
 #include <sys/kdb.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/linker.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/msgbuf.h>
 #include <sys/mutex.h>
 #include <sys/ptrace.h>
 #include <sys/reboot.h>
 #include <sys/rwlock.h>
 #include <sys/signalvar.h>
 #include <sys/syscallsubr.h>
 #include <sys/sysctl.h>
 #include <sys/sysent.h>
 #include <sys/sysproto.h>
 #include <sys/ucontext.h>
 #include <sys/uio.h>
 #include <sys/vmmeter.h>
 #include <sys/vnode.h>
 
 #include <net/netisr.h>
 
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_page.h>
 #include <vm/vm_map.h>
 #include <vm/vm_object.h>
 #include <vm/vm_pager.h>
 
 #include <machine/altivec.h>
 #ifndef __powerpc64__
 #include <machine/bat.h>
 #endif
 #include <machine/cpu.h>
 #include <machine/elf.h>
 #include <machine/fpu.h>
 #include <machine/hid.h>
 #include <machine/kdb.h>
 #include <machine/md_var.h>
 #include <machine/metadata.h>
 #include <machine/mmuvar.h>
 #include <machine/pcb.h>
-#include <machine/reg.h>
 #include <machine/sigframe.h>
 #include <machine/spr.h>
 #include <machine/trap.h>
 #include <machine/vmparam.h>
 #include <machine/ofw_machdep.h>
 
 #include <ddb/ddb.h>
 
 #include <dev/ofw/openfirm.h>
 
 #ifdef __powerpc64__
 #include "mmu_oea64.h"
 #endif
 
 #ifndef __powerpc64__
 struct bat	battable[16];
 #endif
 
 int radix_mmu = 0;
 
 #ifndef __powerpc64__
 /* Bits for running on 64-bit systems in 32-bit mode. */
 extern void	*testppc64, *testppc64size;
 extern void	*restorebridge, *restorebridgesize;
 extern void	*rfid_patch, *rfi_patch1, *rfi_patch2;
 extern void	*trapcode64;
 
 extern Elf_Addr	_GLOBAL_OFFSET_TABLE_[];
 #endif
 
 extern void	*rstcode, *rstcodeend;
 extern void	*trapcode, *trapcodeend;
 extern void	*hypertrapcode, *hypertrapcodeend;
 extern void	*generictrap, *generictrap64;
 extern void	*alitrap, *aliend;
 extern void	*dsitrap, *dsiend;
 extern void	*decrint, *decrsize;
 extern void     *extint, *extsize;
 extern void	*dblow, *dbend;
 extern void	*imisstrap, *imisssize;
 extern void	*dlmisstrap, *dlmisssize;
 extern void	*dsmisstrap, *dsmisssize;
 
 extern void *ap_pcpu;
 extern void __restartkernel(vm_offset_t, vm_offset_t, vm_offset_t, void *, uint32_t, register_t offset, register_t msr);
 extern void __restartkernel_virtual(vm_offset_t, vm_offset_t, vm_offset_t, void *, uint32_t, register_t offset, register_t msr);
 
 void aim_early_init(vm_offset_t fdt, vm_offset_t toc, vm_offset_t ofentry,
     void *mdp, uint32_t mdp_cookie);
 void aim_cpu_init(vm_offset_t toc);
 
 void
 aim_early_init(vm_offset_t fdt, vm_offset_t toc, vm_offset_t ofentry, void *mdp,
     uint32_t mdp_cookie)
 {
 	register_t	scratch;
 
 	/*
 	 * If running from an FDT, make sure we are in real mode to avoid
 	 * tromping on firmware page tables. Everything in the kernel assumes
 	 * 1:1 mappings out of firmware, so this won't break anything not
 	 * already broken. This doesn't work if there is live OF, since OF
 	 * may internally use non-1:1 mappings.
 	 */
 	if (ofentry == 0)
 		mtmsr(mfmsr() & ~(PSL_IR | PSL_DR));
 
 #ifdef __powerpc64__
 	/*
 	 * Relocate to high memory so that the kernel
 	 * can execute from the direct map.
 	 *
 	 * If we are in virtual mode already, use a special entry point
 	 * that sets up a temporary DMAP to execute from until we can
 	 * properly set up the MMU.
 	 */
 	if ((vm_offset_t)&aim_early_init < DMAP_BASE_ADDRESS) {
 		if (mfmsr() & PSL_DR) {
 			__restartkernel_virtual(fdt, 0, ofentry, mdp,
 			    mdp_cookie, DMAP_BASE_ADDRESS, mfmsr());
 		} else {
 			__restartkernel(fdt, 0, ofentry, mdp, mdp_cookie,
 			    DMAP_BASE_ADDRESS, mfmsr());
 		}
 	}
 #endif
 
 	/* Various very early CPU fix ups */
 	switch (mfpvr() >> 16) {
 		/*
 		 * PowerPC 970 CPUs have a misfeature requested by Apple that
 		 * makes them pretend they have a 32-byte cacheline. Turn this
 		 * off before we measure the cacheline size.
 		 */
 		case IBM970:
 		case IBM970FX:
 		case IBM970MP:
 		case IBM970GX:
 			scratch = mfspr(SPR_HID5);
 			scratch &= ~HID5_970_DCBZ_SIZE_HI;
 			mtspr(SPR_HID5, scratch);
 			break;
 	#ifdef __powerpc64__
 		case IBMPOWER7:
 		case IBMPOWER7PLUS:
 		case IBMPOWER8:
 		case IBMPOWER8E:
 		case IBMPOWER8NVL:
 		case IBMPOWER9:
 			/* XXX: get from ibm,slb-size in device tree */
 			n_slbs = 32;
 			break;
 	#endif
 	}
 }
 
 void
 aim_cpu_init(vm_offset_t toc)
 {
 	size_t		trap_offset, trapsize;
 	vm_offset_t	trap;
 	register_t	msr;
 	uint8_t		*cache_check;
 	int		cacheline_warn;
 #ifndef __powerpc64__
 	register_t	scratch;
 	int		ppc64;
 #endif
 
 	trap_offset = 0;
 	cacheline_warn = 0;
 
 	/* General setup for AIM CPUs */
 	psl_kernset = PSL_EE | PSL_ME | PSL_IR | PSL_DR | PSL_RI;
 
 #ifdef __powerpc64__
 	psl_kernset |= PSL_SF;
 	if (mfmsr() & PSL_HV)
 		psl_kernset |= PSL_HV;
 
 #if BYTE_ORDER == LITTLE_ENDIAN
 	psl_kernset |= PSL_LE;
 #endif
 
 #endif
 	psl_userset = psl_kernset | PSL_PR;
 #ifdef __powerpc64__
 	psl_userset32 = psl_userset & ~PSL_SF;
 #endif
 
 	/*
 	 * Zeroed bits in this variable signify that the value of the bit
 	 * in its position is allowed to vary between userspace contexts.
 	 *
 	 * All other bits are required to be identical for every userspace
 	 * context. The actual *value* of the bit is determined by
 	 * psl_userset and/or psl_userset32, and is not allowed to change.
 	 *
 	 * Remember to update this set when implementing support for
 	 * *conditionally* enabling a processor facility. Failing to do
 	 * this will cause swapcontext() in userspace to break when a
 	 * process uses a conditionally-enabled facility.
 	 *
 	 * When *unconditionally* implementing support for a processor
 	 * facility, update psl_userset / psl_userset32 instead.
 	 *
 	 * See the access control check in set_mcontext().
 	 */
 	psl_userstatic = ~(PSL_VSX | PSL_VEC | PSL_FP | PSL_FE0 | PSL_FE1);
 	/*
 	 * Mask bits from the SRR1 that aren't really the MSR:
 	 * Bits 1-4, 10-15 (ppc32), 33-36, 42-47 (ppc64)
 	 */
 	psl_userstatic &= ~0x783f0000UL;
 
 	/*
 	 * Initialize the interrupt tables and figure out our cache line
 	 * size and whether or not we need the 64-bit bridge code.
 	 */
 
 	/*
 	 * Disable translation in case the vector area hasn't been
 	 * mapped (G5). Note that no OFW calls can be made until
 	 * translation is re-enabled.
 	 */
 
 	msr = mfmsr();
 	mtmsr((msr & ~(PSL_IR | PSL_DR)) | PSL_RI);
 
 	/*
 	 * Measure the cacheline size using dcbz
 	 *
 	 * Use EXC_PGM as a playground. We are about to overwrite it
 	 * anyway, we know it exists, and we know it is cache-aligned.
 	 */
 
 	cache_check = (void *)EXC_PGM;
 
 	for (cacheline_size = 0; cacheline_size < 0x100; cacheline_size++)
 		cache_check[cacheline_size] = 0xff;
 
 	__asm __volatile("dcbz 0,%0":: "r" (cache_check) : "memory");
 
 	/* Find the first byte dcbz did not zero to get the cache line size */
 	for (cacheline_size = 0; cacheline_size < 0x100 &&
 	    cache_check[cacheline_size] == 0; cacheline_size++);
 
 	/* Work around psim bug */
 	if (cacheline_size == 0) {
 		cacheline_warn = 1;
 		cacheline_size = 32;
 	}
 
 	#ifndef __powerpc64__
 	/*
 	 * Figure out whether we need to use the 64 bit PMAP. This works by
 	 * executing an instruction that is only legal on 64-bit PPC (mtmsrd),
 	 * and setting ppc64 = 0 if that causes a trap.
 	 */
 
 	ppc64 = 1;
 
 	bcopy(&testppc64, (void *)EXC_PGM,  (size_t)&testppc64size);
 	__syncicache((void *)EXC_PGM, (size_t)&testppc64size);
 
 	__asm __volatile("\
 		mfmsr %0;	\
 		mtsprg2 %1;	\
 				\
 		mtmsrd %0;	\
 		mfsprg2 %1;"
 	    : "=r"(scratch), "=r"(ppc64));
 
 	if (ppc64)
 		cpu_features |= PPC_FEATURE_64;
 
 	/*
 	 * Now copy restorebridge into all the handlers, if necessary,
 	 * and set up the trap tables.
 	 */
 
 	if (cpu_features & PPC_FEATURE_64) {
 		/* Patch the two instances of rfi -> rfid */
 		bcopy(&rfid_patch,&rfi_patch1,4);
 	#ifdef KDB
 		/* rfi_patch2 is at the end of dbleave */
 		bcopy(&rfid_patch,&rfi_patch2,4);
 	#endif
 	}
 	#else /* powerpc64 */
 	cpu_features |= PPC_FEATURE_64;
 	#endif
 
 	trapsize = (size_t)&trapcodeend - (size_t)&trapcode;
 
 	/*
 	 * Copy generic handler into every possible trap. Special cases will get
 	 * different ones in a minute.
 	 */
 	for (trap = EXC_RST; trap < EXC_LAST; trap += 0x20)
 		bcopy(&trapcode, (void *)trap, trapsize);
 
 	#ifndef __powerpc64__
 	if (cpu_features & PPC_FEATURE_64) {
 		/*
 		 * Copy a code snippet to restore 32-bit bridge mode
 		 * to the top of every non-generic trap handler
 		 */
 
 		trap_offset += (size_t)&restorebridgesize;
 		bcopy(&restorebridge, (void *)EXC_RST, trap_offset);
 		bcopy(&restorebridge, (void *)EXC_DSI, trap_offset);
 		bcopy(&restorebridge, (void *)EXC_ALI, trap_offset);
 		bcopy(&restorebridge, (void *)EXC_PGM, trap_offset);
 		bcopy(&restorebridge, (void *)EXC_MCHK, trap_offset);
 		bcopy(&restorebridge, (void *)EXC_TRC, trap_offset);
 		bcopy(&restorebridge, (void *)EXC_BPT, trap_offset);
 	} else {
 		/*
 		 * Use an IBAT and a DBAT to map the bottom 256M segment.
 		 *
 		 * It is very important to do it *now* to avoid taking a
 		 * fault in .text / .data before the MMU is bootstrapped,
 		 * because until then, the translation data has not been
 		 * copied over from OpenFirmware, so our DSI/ISI will fail
 		 * to find a match.
 		 */
 
 		battable[0x0].batl = BATL(0x00000000, BAT_M, BAT_PP_RW);
 		battable[0x0].batu = BATU(0x00000000, BAT_BL_256M, BAT_Vs);
 
 		__asm (".balign 32; \n"
 		    "mtibatu 0,%0; mtibatl 0,%1; isync; \n"
 		    "mtdbatu 0,%0; mtdbatl 0,%1; isync"
 		    :: "r"(battable[0].batu), "r"(battable[0].batl));
 	}
 	#else
 	trapsize = (size_t)&hypertrapcodeend - (size_t)&hypertrapcode;
 	bcopy(&hypertrapcode, (void *)(EXC_HEA + trap_offset), trapsize);
 	bcopy(&hypertrapcode, (void *)(EXC_HMI + trap_offset), trapsize);
 	bcopy(&hypertrapcode, (void *)(EXC_HVI + trap_offset), trapsize);
 	bcopy(&hypertrapcode, (void *)(EXC_SOFT_PATCH + trap_offset), trapsize);
 	#endif
 
 	bcopy(&rstcode, (void *)(EXC_RST + trap_offset), (size_t)&rstcodeend -
 	    (size_t)&rstcode);
 
 #ifdef KDB
 	bcopy(&dblow, (void *)(EXC_MCHK + trap_offset), (size_t)&dbend -
 	    (size_t)&dblow);
 	bcopy(&dblow, (void *)(EXC_PGM + trap_offset), (size_t)&dbend -
 	    (size_t)&dblow);
 	bcopy(&dblow, (void *)(EXC_TRC + trap_offset), (size_t)&dbend -
 	    (size_t)&dblow);
 	bcopy(&dblow, (void *)(EXC_BPT + trap_offset), (size_t)&dbend -
 	    (size_t)&dblow);
 #endif
 	bcopy(&alitrap,  (void *)(EXC_ALI + trap_offset),  (size_t)&aliend -
 	    (size_t)&alitrap);
 	bcopy(&dsitrap,  (void *)(EXC_DSI + trap_offset),  (size_t)&dsiend -
 	    (size_t)&dsitrap);
 
 	/* Set address of generictrap for self-reloc calculations */
 	*((void **)TRAP_GENTRAP) = &generictrap;
 	#ifdef __powerpc64__
 	/* Set TOC base so that the interrupt code can get at it */
 	*((void **)TRAP_ENTRY) = &generictrap;
 	*((register_t *)TRAP_TOCBASE) = toc;
 	#else
 	/* Set branch address for trap code */
 	if (cpu_features & PPC_FEATURE_64)
 		*((void **)TRAP_ENTRY) = &generictrap64;
 	else
 		*((void **)TRAP_ENTRY) = &generictrap;
 	*((void **)TRAP_TOCBASE) = _GLOBAL_OFFSET_TABLE_;
 
 	/* G2-specific TLB miss helper handlers */
 	bcopy(&imisstrap, (void *)EXC_IMISS,  (size_t)&imisssize);
 	bcopy(&dlmisstrap, (void *)EXC_DLMISS,  (size_t)&dlmisssize);
 	bcopy(&dsmisstrap, (void *)EXC_DSMISS,  (size_t)&dsmisssize);
 	#endif
 	__syncicache(EXC_RSVD, EXC_LAST - EXC_RSVD);
 
 	/*
 	 * Restore MSR
 	 */
 	mtmsr(msr);
 
 	/* Warn if cachline size was not determined */
 	if (cacheline_warn == 1) {
 		printf("WARNING: cacheline size undetermined, setting to 32\n");
 	}
 
 	/*
 	 * Initialise virtual memory. Use BUS_PROBE_GENERIC priority
 	 * in case the platform module had a better idea of what we
 	 * should do.
 	 */
 	if (cpu_features2 & PPC_FEATURE2_ARCH_3_00) {
 		radix_mmu = 0;
 		TUNABLE_INT_FETCH("radix_mmu", &radix_mmu);
 		if (radix_mmu)
 			pmap_mmu_install(MMU_TYPE_RADIX, BUS_PROBE_GENERIC);
 		else
 			pmap_mmu_install(MMU_TYPE_G5, BUS_PROBE_GENERIC);
 	} else if (cpu_features & PPC_FEATURE_64)
 		pmap_mmu_install(MMU_TYPE_G5, BUS_PROBE_GENERIC);
 	else
 		pmap_mmu_install(MMU_TYPE_OEA, BUS_PROBE_GENERIC);
 }
 
 /*
  * Shutdown the CPU as much as possible.
  */
 void
 cpu_halt(void)
 {
 
 	OF_exit();
 }
 
 int
 ptrace_single_step(struct thread *td)
 {
 	struct trapframe *tf;
 
 	tf = td->td_frame;
 	tf->srr1 |= PSL_SE;
 
 	return (0);
 }
 
 int
 ptrace_clear_single_step(struct thread *td)
 {
 	struct trapframe *tf;
 
 	tf = td->td_frame;
 	tf->srr1 &= ~PSL_SE;
 
 	return (0);
 }
 
 void
 kdb_cpu_clear_singlestep(void)
 {
 
 	kdb_frame->srr1 &= ~PSL_SE;
 }
 
 void
 kdb_cpu_set_singlestep(void)
 {
 
 	kdb_frame->srr1 |= PSL_SE;
 }
 
 /*
  * Initialise a struct pcpu.
  */
 void
 cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t sz)
 {
 #ifdef __powerpc64__
 /* Copy the SLB contents from the current CPU */
 memcpy(pcpu->pc_aim.slb, PCPU_GET(aim.slb), sizeof(pcpu->pc_aim.slb));
 #endif
 }
 
 /* Return 0 on handled success, otherwise signal number. */
 int
 cpu_machine_check(struct thread *td, struct trapframe *frame, int *ucode)
 {
 #ifdef __powerpc64__
 	/*
 	 * This block is 64-bit CPU specific currently.  Punt running in 32-bit
 	 * mode on 64-bit CPUs.
 	 */
 	/* Check if the important information is in DSISR */
 	if ((frame->srr1 & SRR1_MCHK_DATA) != 0) {
 		printf("Machine check, DSISR: %016lx\n", frame->cpu.aim.dsisr);
 		/* SLB multi-hit is recoverable. */
 		if ((frame->cpu.aim.dsisr & DSISR_MC_SLB_MULTIHIT) != 0)
 			return (0);
 		if ((frame->cpu.aim.dsisr &
 		    (DSISR_MC_DERAT_MULTIHIT | DSISR_MC_TLB_MULTIHIT)) != 0) {
 			pmap_tlbie_all();
 			return (0);
 		}
 		/* TODO: Add other machine check recovery procedures. */
 	} else {
 		if ((frame->srr1 & SRR1_MCHK_IFETCH_M) == SRR1_MCHK_IFETCH_SLBMH)
 			return (0);
 	}
 #endif
 	*ucode = BUS_OBJERR;
 	return (SIGBUS);
 }
 
 #ifndef __powerpc64__
 uint64_t
 va_to_vsid(pmap_t pm, vm_offset_t va)
 {
 	return ((pm->pm_sr[(uintptr_t)va >> ADDR_SR_SHFT]) & SR_VSID_MASK);
 }
 
 #endif
 
 /*
  * These functions need to provide addresses that both (a) work in real mode
  * (or whatever mode/circumstances the kernel is in in early boot (now)) and
  * (b) can still, in principle, work once the kernel is going. Because these
  * rely on existing mappings/real mode, unmap is a no-op.
  */
 vm_offset_t
 pmap_early_io_map(vm_paddr_t pa, vm_size_t size)
 {
 	KASSERT(!pmap_bootstrapped, ("Not available after PMAP started!"));
 
 	/*
 	 * If we have the MMU up in early boot, assume it is 1:1. Otherwise,
 	 * try to get the address in a memory region compatible with the
 	 * direct map for efficiency later.
 	 */
 	if (mfmsr() & PSL_DR)
 		return (pa);
 	else
 		return (DMAP_BASE_ADDRESS + pa);
 }
 
 void
 pmap_early_io_unmap(vm_offset_t va, vm_size_t size)
 {
 
 	KASSERT(!pmap_bootstrapped, ("Not available after PMAP started!"));
 }
 
 /* From p3-53 of the MPC7450 RISC Microprocessor Family Reference Manual */
 void
 flush_disable_caches(void)
 {
 	register_t msr;
 	register_t msscr0;
 	register_t cache_reg;
 	volatile uint32_t *memp;
 	uint32_t temp;
 	int i;
 	int x;
 
 	msr = mfmsr();
 	powerpc_sync();
 	mtmsr(msr & ~(PSL_EE | PSL_DR));
 	msscr0 = mfspr(SPR_MSSCR0);
 	msscr0 &= ~MSSCR0_L2PFE;
 	mtspr(SPR_MSSCR0, msscr0);
 	powerpc_sync();
 	isync();
 	/* 7e00066c: dssall */
 	__asm__ __volatile__(".long 0x7e00066c; sync");
 	powerpc_sync();
 	isync();
 	__asm__ __volatile__("dcbf 0,%0" :: "r"(0));
 	__asm__ __volatile__("dcbf 0,%0" :: "r"(0));
 	__asm__ __volatile__("dcbf 0,%0" :: "r"(0));
 
 	/* Lock the L1 Data cache. */
 	mtspr(SPR_LDSTCR, mfspr(SPR_LDSTCR) | 0xFF);
 	powerpc_sync();
 	isync();
 
 	mtspr(SPR_LDSTCR, 0);
 
 	/*
 	 * Perform this in two stages: Flush the cache starting in RAM, then do it
 	 * from ROM.
 	 */
 	memp = (volatile uint32_t *)0x00000000;
 	for (i = 0; i < 128 * 1024; i++) {
 		temp = *memp;
 		__asm__ __volatile__("dcbf 0,%0" :: "r"(memp));
 		memp += 32/sizeof(*memp);
 	}
 
 	memp = (volatile uint32_t *)0xfff00000;
 	x = 0xfe;
 
 	for (; x != 0xff;) {
 		mtspr(SPR_LDSTCR, x);
 		for (i = 0; i < 128; i++) {
 			temp = *memp;
 			__asm__ __volatile__("dcbf 0,%0" :: "r"(memp));
 			memp += 32/sizeof(*memp);
 		}
 		x = ((x << 1) | 1) & 0xff;
 	}
 	mtspr(SPR_LDSTCR, 0);
 
 	cache_reg = mfspr(SPR_L2CR);
 	if (cache_reg & L2CR_L2E) {
 		cache_reg &= ~(L2CR_L2IO_7450 | L2CR_L2DO_7450);
 		mtspr(SPR_L2CR, cache_reg);
 		powerpc_sync();
 		mtspr(SPR_L2CR, cache_reg | L2CR_L2HWF);
 		while (mfspr(SPR_L2CR) & L2CR_L2HWF)
 			; /* Busy wait for cache to flush */
 		powerpc_sync();
 		cache_reg &= ~L2CR_L2E;
 		mtspr(SPR_L2CR, cache_reg);
 		powerpc_sync();
 		mtspr(SPR_L2CR, cache_reg | L2CR_L2I);
 		powerpc_sync();
 		while (mfspr(SPR_L2CR) & L2CR_L2I)
 			; /* Busy wait for L2 cache invalidate */
 		powerpc_sync();
 	}
 
 	cache_reg = mfspr(SPR_L3CR);
 	if (cache_reg & L3CR_L3E) {
 		cache_reg &= ~(L3CR_L3IO | L3CR_L3DO);
 		mtspr(SPR_L3CR, cache_reg);
 		powerpc_sync();
 		mtspr(SPR_L3CR, cache_reg | L3CR_L3HWF);
 		while (mfspr(SPR_L3CR) & L3CR_L3HWF)
 			; /* Busy wait for cache to flush */
 		powerpc_sync();
 		cache_reg &= ~L3CR_L3E;
 		mtspr(SPR_L3CR, cache_reg);
 		powerpc_sync();
 		mtspr(SPR_L3CR, cache_reg | L3CR_L3I);
 		powerpc_sync();
 		while (mfspr(SPR_L3CR) & L3CR_L3I)
 			; /* Busy wait for L3 cache invalidate */
 		powerpc_sync();
 	}
 
 	mtspr(SPR_HID0, mfspr(SPR_HID0) & ~HID0_DCE);
 	powerpc_sync();
 	isync();
 
 	mtmsr(msr);
 }
 
 #ifndef __powerpc64__
 void
 mpc745x_sleep()
 {
 	static u_quad_t timebase = 0;
 	static register_t sprgs[4];
 	static register_t srrs[2];
 
 	jmp_buf resetjb;
 	struct thread *fputd;
 	struct thread *vectd;
 	register_t hid0;
 	register_t msr;
 	register_t saved_msr;
 
 	ap_pcpu = pcpup;
 
 	PCPU_SET(restore, &resetjb);
 
 	saved_msr = mfmsr();
 	fputd = PCPU_GET(fputhread);
 	vectd = PCPU_GET(vecthread);
 	if (fputd != NULL)
 		save_fpu(fputd);
 	if (vectd != NULL)
 		save_vec(vectd);
 	if (setjmp(resetjb) == 0) {
 		sprgs[0] = mfspr(SPR_SPRG0);
 		sprgs[1] = mfspr(SPR_SPRG1);
 		sprgs[2] = mfspr(SPR_SPRG2);
 		sprgs[3] = mfspr(SPR_SPRG3);
 		srrs[0] = mfspr(SPR_SRR0);
 		srrs[1] = mfspr(SPR_SRR1);
 		timebase = mftb();
 		powerpc_sync();
 		flush_disable_caches();
 		hid0 = mfspr(SPR_HID0);
 		hid0 = (hid0 & ~(HID0_DOZE | HID0_NAP)) | HID0_SLEEP;
 		powerpc_sync();
 		isync();
 		msr = mfmsr() | PSL_POW;
 		mtspr(SPR_HID0, hid0);
 		powerpc_sync();
 
 		while (1)
 			mtmsr(msr);
 	}
 	/* XXX: The mttb() means this *only* works on single-CPU systems. */
 	mttb(timebase);
 	PCPU_SET(curthread, curthread);
 	PCPU_SET(curpcb, curthread->td_pcb);
 	pmap_activate(curthread);
 	powerpc_sync();
 	mtspr(SPR_SPRG0, sprgs[0]);
 	mtspr(SPR_SPRG1, sprgs[1]);
 	mtspr(SPR_SPRG2, sprgs[2]);
 	mtspr(SPR_SPRG3, sprgs[3]);
 	mtspr(SPR_SRR0, srrs[0]);
 	mtspr(SPR_SRR1, srrs[1]);
 	mtmsr(saved_msr);
 	if (fputd == curthread)
 		enable_fpu(curthread);
 	if (vectd == curthread)
 		enable_vec(curthread);
 	powerpc_sync();
 }
 #endif
diff --git a/sys/powerpc/booke/booke_machdep.c b/sys/powerpc/booke/booke_machdep.c
index 09396d854045..5db1a17fc243 100644
--- a/sys/powerpc/booke/booke_machdep.c
+++ b/sys/powerpc/booke/booke_machdep.c
@@ -1,473 +1,472 @@
 /*-
  * Copyright (C) 2006-2012 Semihalf
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN
  * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 /*-
  * Copyright (C) 2001 Benno Rice
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY Benno Rice ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  * $NetBSD: machdep.c,v 1.74.2.1 2000/11/01 16:13:48 tv Exp $
  */
 /*-
  * Copyright (C) 1995, 1996 Wolfgang Solfrank.
  * Copyright (C) 1995, 1996 TooLs GmbH.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *      This product includes software developed by TooLs GmbH.
  * 4. The name of TooLs GmbH may not be used to endorse or promote products
  *    derived from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_ddb.h"
 #include "opt_hwpmc_hooks.h"
 #include "opt_kstack_pages.h"
 #include "opt_platform.h"
 
 #include <sys/cdefs.h>
 #include <sys/types.h>
 #include <sys/param.h>
 #include <sys/proc.h>
 #include <sys/systm.h>
 #include <sys/time.h>
 #include <sys/bio.h>
 #include <sys/buf.h>
 #include <sys/bus.h>
 #include <sys/cons.h>
 #include <sys/cpu.h>
 #include <sys/kdb.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/rwlock.h>
 #include <sys/sysctl.h>
 #include <sys/exec.h>
 #include <sys/ktr.h>
 #include <sys/syscallsubr.h>
 #include <sys/sysproto.h>
 #include <sys/signalvar.h>
 #include <sys/sysent.h>
 #include <sys/imgact.h>
 #include <sys/msgbuf.h>
 #include <sys/ptrace.h>
 
 #include <vm/vm.h>
 #include <vm/pmap.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_page.h>
 #include <vm/vm_object.h>
 #include <vm/vm_pager.h>
 
 #include <machine/cpu.h>
 #include <machine/kdb.h>
-#include <machine/reg.h>
 #include <machine/vmparam.h>
 #include <machine/spr.h>
 #include <machine/hid.h>
 #include <machine/psl.h>
 #include <machine/trap.h>
 #include <machine/md_var.h>
 #include <machine/mmuvar.h>
 #include <machine/sigframe.h>
 #include <machine/machdep.h>
 #include <machine/metadata.h>
 #include <machine/platform.h>
 
 #include <sys/linker.h>
 #include <sys/reboot.h>
 
 #include <contrib/libfdt/libfdt.h>
 #include <dev/fdt/fdt_common.h>
 #include <dev/ofw/openfirm.h>
 
 #ifdef DDB
 #include <ddb/ddb.h>
 #endif
 
 #ifdef  DEBUG
 #define debugf(fmt, args...) printf(fmt, ##args)
 #else
 #define debugf(fmt, args...)
 #endif
 
 extern unsigned char _etext[];
 extern unsigned char _edata[];
 extern unsigned char __bss_start[];
 extern unsigned char __sbss_start[];
 extern unsigned char __sbss_end[];
 extern unsigned char _end[];
 extern vm_offset_t __endkernel;
 extern vm_paddr_t kernload;
 
 /*
  * Bootinfo is passed to us by legacy loaders. Save the address of the
  * structure to handle backward compatibility.
  */
 uint32_t *bootinfo;
 
 void print_kernel_section_addr(void);
 void print_kenv(void);
 uintptr_t booke_init(u_long, u_long);
 void ivor_setup(void);
 
 extern void *interrupt_vector_base;
 extern void *int_critical_input;
 extern void *int_machine_check;
 extern void *int_data_storage;
 extern void *int_instr_storage;
 extern void *int_external_input;
 extern void *int_alignment;
 extern void *int_fpu;
 extern void *int_program;
 extern void *int_syscall;
 extern void *int_decrementer;
 extern void *int_fixed_interval_timer;
 extern void *int_watchdog;
 extern void *int_data_tlb_error;
 extern void *int_inst_tlb_error;
 extern void *int_debug;
 extern void *int_debug_ed;
 extern void *int_vec;
 extern void *int_vecast;
 #ifdef __SPE__
 extern void *int_spe_fpdata;
 extern void *int_spe_fpround;
 #endif
 #ifdef HWPMC_HOOKS
 extern void *int_performance_counter;
 #endif
 
 #define SET_TRAP(ivor, handler) \
 	KASSERT(((uintptr_t)(&handler) & ~0xffffUL) == \
 	    ((uintptr_t)(&interrupt_vector_base) & ~0xffffUL), \
 	    ("Handler " #handler " too far from interrupt vector base")); \
 	mtspr(ivor, (uintptr_t)(&handler) & 0xffffUL);
 
 uintptr_t powerpc_init(vm_offset_t fdt, vm_offset_t, vm_offset_t, void *mdp,
     uint32_t mdp_cookie);
 void booke_cpu_init(void);
 
 void
 booke_cpu_init(void)
 {
 
 	cpu_features |= PPC_FEATURE_BOOKE;
 
 	psl_kernset = PSL_CE | PSL_ME | PSL_EE;
 #ifdef __powerpc64__
 	psl_kernset |= PSL_CM;
 #endif
 	psl_userset = psl_kernset | PSL_PR;
 #ifdef __powerpc64__
 	psl_userset32 = psl_userset & ~PSL_CM;
 #endif
 	/*
 	 * Zeroed bits in this variable signify that the value of the bit
 	 * in its position is allowed to vary between userspace contexts.
 	 *
 	 * All other bits are required to be identical for every userspace
 	 * context. The actual *value* of the bit is determined by
 	 * psl_userset and/or psl_userset32, and is not allowed to change.
 	 *
 	 * Remember to update this set when implementing support for
 	 * *conditionally* enabling a processor facility. Failing to do
 	 * this will cause swapcontext() in userspace to break when a
 	 * process uses a conditionally-enabled facility.
 	 *
 	 * When *unconditionally* implementing support for a processor
 	 * facility, update psl_userset / psl_userset32 instead.
 	 *
 	 * See the access control check in set_mcontext().
 	 */
 	psl_userstatic = ~(PSL_VEC | PSL_FP | PSL_FE0 | PSL_FE1);
 
 	pmap_mmu_install(MMU_TYPE_BOOKE, BUS_PROBE_GENERIC);
 }
 
 void
 ivor_setup(void)
 {
 
 	mtspr(SPR_IVPR, ((uintptr_t)&interrupt_vector_base) & ~0xffffUL);
 
 	SET_TRAP(SPR_IVOR0, int_critical_input);
 	SET_TRAP(SPR_IVOR1, int_machine_check);
 	SET_TRAP(SPR_IVOR2, int_data_storage);
 	SET_TRAP(SPR_IVOR3, int_instr_storage);
 	SET_TRAP(SPR_IVOR4, int_external_input);
 	SET_TRAP(SPR_IVOR5, int_alignment);
 	SET_TRAP(SPR_IVOR6, int_program);
 	SET_TRAP(SPR_IVOR8, int_syscall);
 	SET_TRAP(SPR_IVOR10, int_decrementer);
 	SET_TRAP(SPR_IVOR11, int_fixed_interval_timer);
 	SET_TRAP(SPR_IVOR12, int_watchdog);
 	SET_TRAP(SPR_IVOR13, int_data_tlb_error);
 	SET_TRAP(SPR_IVOR14, int_inst_tlb_error);
 	SET_TRAP(SPR_IVOR15, int_debug);
 #ifdef HWPMC_HOOKS
 	SET_TRAP(SPR_IVOR35, int_performance_counter);
 #endif
 	switch ((mfpvr() >> 16) & 0xffff) {
 	case FSL_E6500:
 		SET_TRAP(SPR_IVOR32, int_vec);
 		SET_TRAP(SPR_IVOR33, int_vecast);
 		/* FALLTHROUGH */
 	case FSL_E500mc:
 	case FSL_E5500:
 		SET_TRAP(SPR_IVOR7, int_fpu);
 		SET_TRAP(SPR_IVOR15, int_debug_ed);
 		break;
 	case FSL_E500v1:
 	case FSL_E500v2:
 		SET_TRAP(SPR_IVOR32, int_vec);
 #ifdef __SPE__
 		SET_TRAP(SPR_IVOR33, int_spe_fpdata);
 		SET_TRAP(SPR_IVOR34, int_spe_fpround);
 #endif
 		break;
 	}
 
 #ifdef __powerpc64__
 	/* Set 64-bit interrupt mode. */
 	mtspr(SPR_EPCR, mfspr(SPR_EPCR) | EPCR_ICM);
 #endif
 }
 
 static int
 booke_check_for_fdt(uint32_t arg1, vm_offset_t *dtbp)
 {
 	void *ptr;
 	int fdt_size;
 
 	if (arg1 % 8 != 0)
 		return (-1);
 
 	ptr = (void *)pmap_early_io_map(arg1, PAGE_SIZE);
 	if (fdt_check_header(ptr) != 0)
 		return (-1);
 
 	/*
 	 * Read FDT total size from the header of FDT.
 	 * This for sure hits within first page which is
 	 * already mapped.
 	 */
 	fdt_size = fdt_totalsize((void *)ptr);
 
 	/* 
 	 * Ok, arg1 points to FDT, so we need to map it in.
 	 * First, unmap this page and then map FDT again with full size
 	 */
 	pmap_early_io_unmap((vm_offset_t)ptr, PAGE_SIZE);
 	ptr = (void *)pmap_early_io_map(arg1, fdt_size); 
 	*dtbp = (vm_offset_t)ptr;
 
 	return (0);
 }
 
 uintptr_t
 booke_init(u_long arg1, u_long arg2)
 {
 	uintptr_t ret;
 	void *mdp;
 	vm_offset_t dtbp, end;
 
 	end = (uintptr_t)_end;
 	dtbp = (vm_offset_t)NULL;
 
 	/* Set up TLB initially */
 	bootinfo = NULL;
 	bzero(__sbss_start, __sbss_end - __sbss_start);
 	bzero(__bss_start, _end - __bss_start);
 	tlb1_init();
 
 	/*
 	 * Handle the various ways we can get loaded and started:
 	 *  -	FreeBSD's loader passes the pointer to the metadata
 	 *	in arg1, with arg2 undefined. arg1 has a value that's
 	 *	relative to the kernel's link address (i.e. larger
 	 *	than 0xc0000000).
 	 *  -	Juniper's loader passes the metadata pointer in arg2
 	 *	and sets arg1 to zero. This is to signal that the
 	 *	loader maps the kernel and starts it at its link
 	 *	address (unlike the FreeBSD loader).
 	 *  -	U-Boot passes the standard argc and argv parameters
 	 *	in arg1 and arg2 (resp). arg1 is between 1 and some
 	 *	relatively small number, such as 64K. arg2 is the
 	 *	physical address of the argv vector.
 	 *  -   ePAPR loaders pass an FDT blob in r3 (arg1) and the magic hex
 	 *      string 0x45504150 ('EPAP') in r6 (which has been lost by now).
 	 *      r4 (arg2) is supposed to be set to zero, but is not always.
 	 */
 
 	if (arg1 == 0)				/* Juniper loader */
 		mdp = (void *)arg2;
 	else if (booke_check_for_fdt(arg1, &dtbp) == 0) { /* ePAPR */
 		end = roundup(end, 8);
 		memmove((void *)end, (void *)dtbp, fdt_totalsize((void *)dtbp));
 		dtbp = end;
 		end += fdt_totalsize((void *)dtbp);
 		__endkernel = end;
 		mdp = NULL;
 	} else if (arg1 > (uintptr_t)kernload)	/* FreeBSD loader */
 		mdp = (void *)arg1;
 	else					/* U-Boot */
 		mdp = NULL;
 
 	/* Default to 32 byte cache line size. */
 	switch ((mfpvr()) >> 16) {
 	case FSL_E500mc:
 	case FSL_E5500:
 	case FSL_E6500:
 		cacheline_size = 64;
 		break;
 	}
 
 	/*
 	 * Last element is a magic cookie that indicates that the metadata
 	 * pointer is meaningful.
 	 */
 	ret = powerpc_init(dtbp, 0, 0, mdp, (mdp == NULL) ? 0 : 0xfb5d104d);
 
 	/* Enable caches */
 	booke_enable_l1_cache();
 	booke_enable_l2_cache();
 
 	booke_enable_bpred();
 
 	return (ret);
 }
 
 #define RES_GRANULE cacheline_size
 extern uintptr_t tlb0_miss_locks[];
 
 /* Initialise a struct pcpu. */
 void
 cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t sz)
 {
 
 	pcpu->pc_booke.tid_next = TID_MIN;
 
 #ifdef SMP
 	uintptr_t *ptr;
 	int words_per_gran = RES_GRANULE / sizeof(uintptr_t);
 
 	ptr = &tlb0_miss_locks[cpuid * words_per_gran];
 	pcpu->pc_booke.tlb_lock = ptr;
 	*ptr = TLB_UNLOCKED;
 	*(ptr + 1) = 0;		/* recurse counter */
 #endif
 }
 
 /* Shutdown the CPU as much as possible. */
 void
 cpu_halt(void)
 {
 
 	mtmsr(mfmsr() & ~(PSL_CE | PSL_EE | PSL_ME | PSL_DE));
 	while (1)
 		;
 }
 
 int
 ptrace_single_step(struct thread *td)
 {
 	struct trapframe *tf;
 
 	tf = td->td_frame;
 	tf->srr1 |= PSL_DE;
 	tf->cpu.booke.dbcr0 |= (DBCR0_IDM | DBCR0_IC);
 	return (0);
 }
 
 int
 ptrace_clear_single_step(struct thread *td)
 {
 	struct trapframe *tf;
 
 	tf = td->td_frame;
 	tf->srr1 &= ~PSL_DE;
 	tf->cpu.booke.dbcr0 &= ~(DBCR0_IDM | DBCR0_IC);
 	return (0);
 }
 
 void
 kdb_cpu_clear_singlestep(void)
 {
 	register_t r;
 
 	r = mfspr(SPR_DBCR0);
 	mtspr(SPR_DBCR0, r & ~DBCR0_IC);
 	kdb_frame->srr1 &= ~PSL_DE;
 }
 
 void
 kdb_cpu_set_singlestep(void)
 {
 	register_t r;
 
 	r = mfspr(SPR_DBCR0);
 	mtspr(SPR_DBCR0, r | DBCR0_IC | DBCR0_IDM);
 	kdb_frame->srr1 |= PSL_DE;
 }
diff --git a/sys/powerpc/fpu/fpu_add.c b/sys/powerpc/fpu/fpu_add.c
index 6dd594ca0ec9..fdab0d8872ab 100644
--- a/sys/powerpc/fpu/fpu_add.c
+++ b/sys/powerpc/fpu/fpu_add.c
@@ -1,228 +1,227 @@
 /*	$NetBSD: fpu_add.c,v 1.4 2005/12/11 12:18:42 christos Exp $ */
 
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1992, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This software was developed by the Computer Systems Engineering group
  * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
  * contributed to Berkeley.
  *
  * All advertising materials mentioning features or use of this software
  * must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Lawrence Berkeley Laboratory.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)fpu_add.c	8.1 (Berkeley) 6/11/93
  */
 
 /*
  * Perform an FPU add (return x + y).
  *
  * To subtract, negate y and call add.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/types.h>
 #include <sys/systm.h>
 
 #include <machine/fpu.h>
 #include <machine/ieeefp.h>
-#include <machine/reg.h>
 
 #include <powerpc/fpu/fpu_arith.h>
 #include <powerpc/fpu/fpu_emu.h>
 
 struct fpn *
 fpu_add(struct fpemu *fe)
 {
 	struct fpn *x = &fe->fe_f1, *y = &fe->fe_f2, *r;
 	u_int r0, r1, r2, r3;
 	int rd;
 
 	/*
 	 * Put the `heavier' operand on the right (see fpu_emu.h).
 	 * Then we will have one of the following cases, taken in the
 	 * following order:
 	 *
 	 *  - y = NaN.  Implied: if only one is a signalling NaN, y is.
 	 *	The result is y.
 	 *  - y = Inf.  Implied: x != NaN (is 0, number, or Inf: the NaN
 	 *    case was taken care of earlier).
 	 *	If x = -y, the result is NaN.  Otherwise the result
 	 *	is y (an Inf of whichever sign).
 	 *  - y is 0.  Implied: x = 0.
 	 *	If x and y differ in sign (one positive, one negative),
 	 *	the result is +0 except when rounding to -Inf.  If same:
 	 *	+0 + +0 = +0; -0 + -0 = -0.
 	 *  - x is 0.  Implied: y != 0.
 	 *	Result is y.
 	 *  - other.  Implied: both x and y are numbers.
 	 *	Do addition a la Hennessey & Patterson.
 	 */
 	DPRINTF(FPE_REG, ("fpu_add:\n"));
 	DUMPFPN(FPE_REG, x);
 	DUMPFPN(FPE_REG, y);
 	DPRINTF(FPE_REG, ("=>\n"));
 	ORDER(x, y);
 	if (ISNAN(y)) {
 		fe->fe_cx |= FPSCR_VXSNAN;
 		DUMPFPN(FPE_REG, y);
 		return (y);
 	}
 	if (ISINF(y)) {
 		if (ISINF(x) && x->fp_sign != y->fp_sign) {
 			fe->fe_cx |= FPSCR_VXISI;
 			return (fpu_newnan(fe));
 		}
 		DUMPFPN(FPE_REG, y);
 		return (y);
 	}
 	rd = ((fe->fe_fpscr) & FPSCR_RN);
 	if (ISZERO(y)) {
 		if (rd != FP_RM)	/* only -0 + -0 gives -0 */
 			y->fp_sign &= x->fp_sign;
 		else			/* any -0 operand gives -0 */
 			y->fp_sign |= x->fp_sign;
 		DUMPFPN(FPE_REG, y);
 		return (y);
 	}
 	if (ISZERO(x)) {
 		DUMPFPN(FPE_REG, y);
 		return (y);
 	}
 	/*
 	 * We really have two numbers to add, although their signs may
 	 * differ.  Make the exponents match, by shifting the smaller
 	 * number right (e.g., 1.011 => 0.1011) and increasing its
 	 * exponent (2^3 => 2^4).  Note that we do not alter the exponents
 	 * of x and y here.
 	 */
 	r = &fe->fe_f3;
 	r->fp_class = FPC_NUM;
 	if (x->fp_exp == y->fp_exp) {
 		r->fp_exp = x->fp_exp;
 		r->fp_sticky = 0;
 	} else {
 		if (x->fp_exp < y->fp_exp) {
 			/*
 			 * Try to avoid subtract case iii (see below).
 			 * This also guarantees that x->fp_sticky = 0.
 			 */
 			SWAP(x, y);
 		}
 		/* now x->fp_exp > y->fp_exp */
 		r->fp_exp = x->fp_exp;
 		r->fp_sticky = fpu_shr(y, x->fp_exp - y->fp_exp);
 	}
 	r->fp_sign = x->fp_sign;
 	if (x->fp_sign == y->fp_sign) {
 		FPU_DECL_CARRY
 
 		/*
 		 * The signs match, so we simply add the numbers.  The result
 		 * may be `supernormal' (as big as 1.111...1 + 1.111...1, or
 		 * 11.111...0).  If so, a single bit shift-right will fix it
 		 * (but remember to adjust the exponent).
 		 */
 		/* r->fp_mant = x->fp_mant + y->fp_mant */
 		FPU_ADDS(r->fp_mant[3], x->fp_mant[3], y->fp_mant[3]);
 		FPU_ADDCS(r->fp_mant[2], x->fp_mant[2], y->fp_mant[2]);
 		FPU_ADDCS(r->fp_mant[1], x->fp_mant[1], y->fp_mant[1]);
 		FPU_ADDC(r0, x->fp_mant[0], y->fp_mant[0]);
 		if ((r->fp_mant[0] = r0) >= FP_2) {
 			(void) fpu_shr(r, 1);
 			r->fp_exp++;
 		}
 	} else {
 		FPU_DECL_CARRY
 
 		/*
 		 * The signs differ, so things are rather more difficult.
 		 * H&P would have us negate the negative operand and add;
 		 * this is the same as subtracting the negative operand.
 		 * This is quite a headache.  Instead, we will subtract
 		 * y from x, regardless of whether y itself is the negative
 		 * operand.  When this is done one of three conditions will
 		 * hold, depending on the magnitudes of x and y:
 		 *   case i)   |x| > |y|.  The result is just x - y,
 		 *	with x's sign, but it may need to be normalized.
 		 *   case ii)  |x| = |y|.  The result is 0 (maybe -0)
 		 *	so must be fixed up.
 		 *   case iii) |x| < |y|.  We goofed; the result should
 		 *	be (y - x), with the same sign as y.
 		 * We could compare |x| and |y| here and avoid case iii,
 		 * but that would take just as much work as the subtract.
 		 * We can tell case iii has occurred by an overflow.
 		 *
 		 * N.B.: since x->fp_exp >= y->fp_exp, x->fp_sticky = 0.
 		 */
 		/* r->fp_mant = x->fp_mant - y->fp_mant */
 		FPU_SET_CARRY(y->fp_sticky);
 		FPU_SUBCS(r3, x->fp_mant[3], y->fp_mant[3]);
 		FPU_SUBCS(r2, x->fp_mant[2], y->fp_mant[2]);
 		FPU_SUBCS(r1, x->fp_mant[1], y->fp_mant[1]);
 		FPU_SUBC(r0, x->fp_mant[0], y->fp_mant[0]);
 		if (r0 < FP_2) {
 			/* cases i and ii */
 			if ((r0 | r1 | r2 | r3) == 0) {
 				/* case ii */
 				r->fp_class = FPC_ZERO;
 				r->fp_sign = rd == FP_RM;
 				return (r);
 			}
 		} else {
 			/*
 			 * Oops, case iii.  This can only occur when the
 			 * exponents were equal, in which case neither
 			 * x nor y have sticky bits set.  Flip the sign
 			 * (to y's sign) and negate the result to get y - x.
 			 */
 #ifdef DIAGNOSTIC
 			if (x->fp_exp != y->fp_exp || r->fp_sticky)
 				panic("fpu_add");
 #endif
 			r->fp_sign = y->fp_sign;
 			FPU_SUBS(r3, 0, r3);
 			FPU_SUBCS(r2, 0, r2);
 			FPU_SUBCS(r1, 0, r1);
 			FPU_SUBC(r0, 0, r0);
 		}
 		r->fp_mant[3] = r3;
 		r->fp_mant[2] = r2;
 		r->fp_mant[1] = r1;
 		r->fp_mant[0] = r0;
 		if (r0 < FP_1)
 			fpu_norm(r);
 	}
 	DUMPFPN(FPE_REG, r);
 	return (r);
 }
diff --git a/sys/powerpc/fpu/fpu_compare.c b/sys/powerpc/fpu/fpu_compare.c
index 48d7c488ce8a..633d32dcc717 100644
--- a/sys/powerpc/fpu/fpu_compare.c
+++ b/sys/powerpc/fpu/fpu_compare.c
@@ -1,164 +1,163 @@
 /*	$NetBSD: fpu_compare.c,v 1.4 2005/12/11 12:18:42 christos Exp $ */
 
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1992, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This software was developed by the Computer Systems Engineering group
  * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
  * contributed to Berkeley.
  *
  * All advertising materials mentioning features or use of this software
  * must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Lawrence Berkeley Laboratory.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)fpu_compare.c	8.1 (Berkeley) 6/11/93
  */
 
 /*
  * FCMPU and FCMPO instructions.
  *
  * These rely on the fact that our internal wide format is achieved by
  * adding zero bits to the end of narrower mantissas.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/types.h>
 #include <sys/systm.h>
 
 #include <machine/fpu.h>
-#include <machine/reg.h>
 
 #include <powerpc/fpu/fpu_arith.h>
 #include <powerpc/fpu/fpu_emu.h>
 
 /*
  * Perform a compare instruction (with or without unordered exception).
  * This updates the fcc field in the fsr.
  *
  * If either operand is NaN, the result is unordered.  For ordered, this
  * causes an NV exception.  Everything else is ordered:
  *	|Inf| > |numbers| > |0|.
  * We already arranged for fp_class(Inf) > fp_class(numbers) > fp_class(0),
  * so we get this directly.  Note, however, that two zeros compare equal
  * regardless of sign, while everything else depends on sign.
  *
  * Incidentally, two Infs of the same sign compare equal (per the 80387
  * manual---it would be nice if the SPARC documentation were more
  * complete).
  */
 void
 fpu_compare(struct fpemu *fe, int ordered)
 {
 	struct fpn *a, *b, *r;
 	int cc;
 
 	a = &fe->fe_f1;
 	b = &fe->fe_f2;
 	r = &fe->fe_f3;
 
 	if (ISNAN(a) || ISNAN(b)) {
 		/*
 		 * In any case, we already got an exception for signalling
 		 * NaNs; here we may replace that one with an identical
 		 * exception, but so what?.
 		 */
 		cc = FPSCR_FU;
 		if (ISSNAN(a) || ISSNAN(b))
 			cc |= FPSCR_VXSNAN;
 		if (ordered) {
 			if (fe->fe_fpscr & FPSCR_VE || ISQNAN(a) || ISQNAN(b))
 				cc |= FPSCR_VXVC;
 		}
 		goto done;
 	}
 
 	/*
 	 * Must handle both-zero early to avoid sign goofs.  Otherwise,
 	 * at most one is 0, and if the signs differ we are done.
 	 */
 	if (ISZERO(a) && ISZERO(b)) {
 		cc = FPSCR_FE;
 		goto done;
 	}
 	if (a->fp_sign) {		/* a < 0 (or -0) */
 		if (!b->fp_sign) {	/* b >= 0 (or if a = -0, b > 0) */
 			cc = FPSCR_FL;
 			goto done;
 		}
 	} else {			/* a > 0 (or +0) */
 		if (b->fp_sign) {	/* b <= -0 (or if a = +0, b < 0) */
 			cc = FPSCR_FG;
 			goto done;
 		}
 	}
 
 	/*
 	 * Now the signs are the same (but may both be negative).  All
 	 * we have left are these cases:
 	 *
 	 *	|a| < |b|		[classes or values differ]
 	 *	|a| > |b|		[classes or values differ]
 	 *	|a| == |b|		[classes and values identical]
 	 *
 	 * We define `diff' here to expand these as:
 	 *
 	 *	|a| < |b|, a,b >= 0: a < b => FSR_CC_LT
 	 *	|a| < |b|, a,b < 0:  a > b => FSR_CC_GT
 	 *	|a| > |b|, a,b >= 0: a > b => FSR_CC_GT
 	 *	|a| > |b|, a,b < 0:  a < b => FSR_CC_LT
 	 */
 #define opposite_cc(cc) ((cc) == FPSCR_FL ? FPSCR_FG : FPSCR_FL)
 #define	diff(magnitude) (a->fp_sign ? opposite_cc(magnitude) :  (magnitude))
 	if (a->fp_class < b->fp_class) {	/* |a| < |b| */
 		cc = diff(FPSCR_FL);
 		goto done;
 	}
 	if (a->fp_class > b->fp_class) {	/* |a| > |b| */
 		cc = diff(FPSCR_FG);
 		goto done;
 	}
 	/* now none can be 0: only Inf and numbers remain */
 	if (ISINF(a)) {				/* |Inf| = |Inf| */
 		cc = FPSCR_FE;
 		goto done;
 	}
 	fpu_sub(fe);
 	if (ISZERO(r))
 		cc = FPSCR_FE;
 	else if (r->fp_sign)
 		cc = FPSCR_FL;
 	else
 		cc = FPSCR_FG;
 done:
 	fe->fe_cx = cc;
 }
diff --git a/sys/powerpc/fpu/fpu_div.c b/sys/powerpc/fpu/fpu_div.c
index 5b625f640f13..7536d4aaa69e 100644
--- a/sys/powerpc/fpu/fpu_div.c
+++ b/sys/powerpc/fpu/fpu_div.c
@@ -1,294 +1,293 @@
 /*	$NetBSD: fpu_div.c,v 1.4 2005/12/11 12:18:42 christos Exp $ */
 
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1992, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This software was developed by the Computer Systems Engineering group
  * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
  * contributed to Berkeley.
  *
  * All advertising materials mentioning features or use of this software
  * must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Lawrence Berkeley Laboratory.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)fpu_div.c	8.1 (Berkeley) 6/11/93
  */
 
 /*
  * Perform an FPU divide (return x / y).
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/types.h>
 #include <sys/systm.h>
 
 #include <machine/fpu.h>
-#include <machine/reg.h>
 
 #include <powerpc/fpu/fpu_arith.h>
 #include <powerpc/fpu/fpu_emu.h>
 
 /*
  * Division of normal numbers is done as follows:
  *
  * x and y are floating point numbers, i.e., in the form 1.bbbb * 2^e.
  * If X and Y are the mantissas (1.bbbb's), the quotient is then:
  *
  *	q = (X / Y) * 2^((x exponent) - (y exponent))
  *
  * Since X and Y are both in [1.0,2.0), the quotient's mantissa (X / Y)
  * will be in [0.5,2.0).  Moreover, it will be less than 1.0 if and only
  * if X < Y.  In that case, it will have to be shifted left one bit to
  * become a normal number, and the exponent decremented.  Thus, the
  * desired exponent is:
  *
  *	left_shift = x->fp_mant < y->fp_mant;
  *	result_exp = x->fp_exp - y->fp_exp - left_shift;
  *
  * The quotient mantissa X/Y can then be computed one bit at a time
  * using the following algorithm:
  *
  *	Q = 0;			-- Initial quotient.
  *	R = X;			-- Initial remainder,
  *	if (left_shift)		--   but fixed up in advance.
  *		R *= 2;
  *	for (bit = FP_NMANT; --bit >= 0; R *= 2) {
  *		if (R >= Y) {
  *			Q |= 1 << bit;
  *			R -= Y;
  *		}
  *	}
  *
  * The subtraction R -= Y always removes the uppermost bit from R (and
  * can sometimes remove additional lower-order 1 bits); this proof is
  * left to the reader.
  *
  * This loop correctly calculates the guard and round bits since they are
  * included in the expanded internal representation.  The sticky bit
  * is to be set if and only if any other bits beyond guard and round
  * would be set.  From the above it is obvious that this is true if and
  * only if the remainder R is nonzero when the loop terminates.
  *
  * Examining the loop above, we can see that the quotient Q is built
  * one bit at a time ``from the top down''.  This means that we can
  * dispense with the multi-word arithmetic and just build it one word
  * at a time, writing each result word when it is done.
  *
  * Furthermore, since X and Y are both in [1.0,2.0), we know that,
  * initially, R >= Y.  (Recall that, if X < Y, R is set to X * 2 and
  * is therefore at in [2.0,4.0).)  Thus Q is sure to have bit FP_NMANT-1
  * set, and R can be set initially to either X - Y (when X >= Y) or
  * 2X - Y (when X < Y).  In addition, comparing R and Y is difficult,
  * so we will simply calculate R - Y and see if that underflows.
  * This leads to the following revised version of the algorithm:
  *
  *	R = X;
  *	bit = FP_1;
  *	D = R - Y;
  *	if (D >= 0) {
  *		result_exp = x->fp_exp - y->fp_exp;
  *		R = D;
  *		q = bit;
  *		bit >>= 1;
  *	} else {
  *		result_exp = x->fp_exp - y->fp_exp - 1;
  *		q = 0;
  *	}
  *	R <<= 1;
  *	do  {
  *		D = R - Y;
  *		if (D >= 0) {
  *			q |= bit;
  *			R = D;
  *		}
  *		R <<= 1;
  *	} while ((bit >>= 1) != 0);
  *	Q[0] = q;
  *	for (i = 1; i < 4; i++) {
  *		q = 0, bit = 1 << 31;
  *		do {
  *			D = R - Y;
  *			if (D >= 0) {
  *				q |= bit;
  *				R = D;
  *			}
  *			R <<= 1;
  *		} while ((bit >>= 1) != 0);
  *		Q[i] = q;
  *	}
  *
  * This can be refined just a bit further by moving the `R <<= 1'
  * calculations to the front of the do-loops and eliding the first one.
  * The process can be terminated immediately whenever R becomes 0, but
  * this is relatively rare, and we do not bother.
  */
 
 struct fpn *
 fpu_div(struct fpemu *fe)
 {
 	struct fpn *x = &fe->fe_f1, *y = &fe->fe_f2;
 	u_int q, bit;
 	u_int r0, r1, r2, r3, d0, d1, d2, d3, y0, y1, y2, y3;
 	FPU_DECL_CARRY
 
 	/*
 	 * Since divide is not commutative, we cannot just use ORDER.
 	 * Check either operand for NaN first; if there is at least one,
 	 * order the signalling one (if only one) onto the right, then
 	 * return it.  Otherwise we have the following cases:
 	 *
 	 *	Inf / Inf = NaN, plus NV exception
 	 *	Inf / num = Inf [i.e., return x]
 	 *	Inf / 0   = Inf [i.e., return x]
 	 *	0 / Inf = 0 [i.e., return x]
 	 *	0 / num = 0 [i.e., return x]
 	 *	0 / 0   = NaN, plus NV exception
 	 *	num / Inf = 0
 	 *	num / num = num (do the divide)
 	 *	num / 0   = Inf, plus DZ exception
 	 */
 	DPRINTF(FPE_REG, ("fpu_div:\n"));
 	DUMPFPN(FPE_REG, x);
 	DUMPFPN(FPE_REG, y);
 	DPRINTF(FPE_REG, ("=>\n"));
 	if (ISNAN(x) || ISNAN(y)) {
 		ORDER(x, y);
 		fe->fe_cx |= FPSCR_VXSNAN;
 		DUMPFPN(FPE_REG, y);
 		return (y);
 	}
 	/*
 	 * Need to split the following out cause they generate different
 	 * exceptions. 
 	 */
 	if (ISINF(x)) {
 		if (x->fp_class == y->fp_class) {
 			fe->fe_cx |= FPSCR_VXIDI;
 			return (fpu_newnan(fe));
 		}
 		DUMPFPN(FPE_REG, x);
 		return (x);
 	}
 	if (ISZERO(x)) {
 		fe->fe_cx |= FPSCR_ZX;
 		if (x->fp_class == y->fp_class) {
 			fe->fe_cx |= FPSCR_VXZDZ;
 			return (fpu_newnan(fe));
 		}
 		DUMPFPN(FPE_REG, x);
 		return (x);
 	}
 
 	/* all results at this point use XOR of operand signs */
 	x->fp_sign ^= y->fp_sign;
 	if (ISINF(y)) {
 		x->fp_class = FPC_ZERO;
 		DUMPFPN(FPE_REG, x);
 		return (x);
 	}
 	if (ISZERO(y)) {
 		fe->fe_cx = FPSCR_ZX;
 		x->fp_class = FPC_INF;
 		DUMPFPN(FPE_REG, x);
 		return (x);
 	}
 
 	/*
 	 * Macros for the divide.  See comments at top for algorithm.
 	 * Note that we expand R, D, and Y here.
 	 */
 
 #define	SUBTRACT		/* D = R - Y */ \
 	FPU_SUBS(d3, r3, y3); FPU_SUBCS(d2, r2, y2); \
 	FPU_SUBCS(d1, r1, y1); FPU_SUBC(d0, r0, y0)
 
 #define	NONNEGATIVE		/* D >= 0 */ \
 	((int)d0 >= 0)
 
 #ifdef FPU_SHL1_BY_ADD
 #define	SHL1			/* R <<= 1 */ \
 	FPU_ADDS(r3, r3, r3); FPU_ADDCS(r2, r2, r2); \
 	FPU_ADDCS(r1, r1, r1); FPU_ADDC(r0, r0, r0)
 #else
 #define	SHL1 \
 	r0 = (r0 << 1) | (r1 >> 31), r1 = (r1 << 1) | (r2 >> 31), \
 	r2 = (r2 << 1) | (r3 >> 31), r3 <<= 1
 #endif
 
 #define	LOOP			/* do ... while (bit >>= 1) */ \
 	do { \
 		SHL1; \
 		SUBTRACT; \
 		if (NONNEGATIVE) { \
 			q |= bit; \
 			r0 = d0, r1 = d1, r2 = d2, r3 = d3; \
 		} \
 	} while ((bit >>= 1) != 0)
 
 #define	WORD(r, i)			/* calculate r->fp_mant[i] */ \
 	q = 0; \
 	bit = 1 << 31; \
 	LOOP; \
 	(x)->fp_mant[i] = q
 
 	/* Setup.  Note that we put our result in x. */
 	r0 = x->fp_mant[0];
 	r1 = x->fp_mant[1];
 	r2 = x->fp_mant[2];
 	r3 = x->fp_mant[3];
 	y0 = y->fp_mant[0];
 	y1 = y->fp_mant[1];
 	y2 = y->fp_mant[2];
 	y3 = y->fp_mant[3];
 
 	bit = FP_1;
 	SUBTRACT;
 	if (NONNEGATIVE) {
 		x->fp_exp -= y->fp_exp;
 		r0 = d0, r1 = d1, r2 = d2, r3 = d3;
 		q = bit;
 		bit >>= 1;
 	} else {
 		x->fp_exp -= y->fp_exp + 1;
 		q = 0;
 	}
 	LOOP;
 	x->fp_mant[0] = q;
 	WORD(x, 1);
 	WORD(x, 2);
 	WORD(x, 3);
 	x->fp_sticky = r0 | r1 | r2 | r3;
 
 	DUMPFPN(FPE_REG, x);
 	return (x);
 }
diff --git a/sys/powerpc/fpu/fpu_emu.c b/sys/powerpc/fpu/fpu_emu.c
index 6c1b91fb9725..c8c1f0b4b7ee 100644
--- a/sys/powerpc/fpu/fpu_emu.c
+++ b/sys/powerpc/fpu/fpu_emu.c
@@ -1,791 +1,790 @@
 /*	$NetBSD: fpu_emu.c,v 1.14 2005/12/11 12:18:42 christos Exp $ */
 
 /*-
  * SPDX-License-Identifier: BSD-4-Clause
  *
  * Copyright 2001 Wasabi Systems, Inc.
  * All rights reserved.
  *
  * Written by Eduardo Horvath and Simon Burge for Wasabi Systems, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *      This product includes software developed for the NetBSD Project by
  *      Wasabi Systems, Inc.
  * 4. The name of Wasabi Systems, Inc. may not be used to endorse
  *    or promote products derived from this software without specific prior
  *    written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
 /*
  * Copyright (c) 1992, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This software was developed by the Computer Systems Engineering group
  * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
  * contributed to Berkeley.
  *
  * All advertising materials mentioning features or use of this software
  * must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Lawrence Berkeley Laboratory.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)fpu.c	8.1 (Berkeley) 6/11/93
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_ddb.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kdb.h>
 #include <sys/kernel.h>
 #include <sys/proc.h>
 #include <sys/sysctl.h>
 #include <sys/signal.h>
 #include <sys/syslog.h>
 #include <sys/signalvar.h>
 
 #include <machine/fpu.h>
-#include <machine/reg.h>
 
 #include <powerpc/fpu/fpu_emu.h>
 #include <powerpc/fpu/fpu_extern.h>
 #include <powerpc/fpu/fpu_instr.h>
 
 static SYSCTL_NODE(_hw, OID_AUTO, fpu_emu, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "FPU emulator");
 
 #define	FPU_EMU_EVCNT_DECL(name)					\
 static u_int fpu_emu_evcnt_##name;					\
 SYSCTL_INT(_hw_fpu_emu, OID_AUTO, evcnt_##name, CTLFLAG_RD,		\
     &fpu_emu_evcnt_##name, 0, "")
 
 #define	FPU_EMU_EVCNT_INCR(name)	fpu_emu_evcnt_##name++
 
 FPU_EMU_EVCNT_DECL(stfiwx);
 FPU_EMU_EVCNT_DECL(fpstore);
 FPU_EMU_EVCNT_DECL(fpload);
 FPU_EMU_EVCNT_DECL(fcmpu);
 FPU_EMU_EVCNT_DECL(frsp);
 FPU_EMU_EVCNT_DECL(fctiw);
 FPU_EMU_EVCNT_DECL(fcmpo);
 FPU_EMU_EVCNT_DECL(mtfsb1);
 FPU_EMU_EVCNT_DECL(fnegabs);
 FPU_EMU_EVCNT_DECL(mcrfs);
 FPU_EMU_EVCNT_DECL(mtfsb0);
 FPU_EMU_EVCNT_DECL(fmr);
 FPU_EMU_EVCNT_DECL(mtfsfi);
 FPU_EMU_EVCNT_DECL(fnabs);
 FPU_EMU_EVCNT_DECL(fabs);
 FPU_EMU_EVCNT_DECL(mffs);
 FPU_EMU_EVCNT_DECL(mtfsf);
 FPU_EMU_EVCNT_DECL(fctid);
 FPU_EMU_EVCNT_DECL(fcfid);
 FPU_EMU_EVCNT_DECL(fdiv);
 FPU_EMU_EVCNT_DECL(fsub);
 FPU_EMU_EVCNT_DECL(fadd);
 FPU_EMU_EVCNT_DECL(fsqrt);
 FPU_EMU_EVCNT_DECL(fsel);
 FPU_EMU_EVCNT_DECL(fpres);
 FPU_EMU_EVCNT_DECL(fmul);
 FPU_EMU_EVCNT_DECL(frsqrte);
 FPU_EMU_EVCNT_DECL(fmulsub);
 FPU_EMU_EVCNT_DECL(fmuladd);
 FPU_EMU_EVCNT_DECL(fnmsub);
 FPU_EMU_EVCNT_DECL(fnmadd);
 
 /* FPSR exception masks */
 #define FPSR_EX_MSK	(FPSCR_VX|FPSCR_OX|FPSCR_UX|FPSCR_ZX|		\
 			FPSCR_XX|FPSCR_VXSNAN|FPSCR_VXISI|FPSCR_VXIDI|	\
 			FPSCR_VXZDZ|FPSCR_VXIMZ|FPSCR_VXVC|FPSCR_VXSOFT|\
 			FPSCR_VXSQRT|FPSCR_VXCVI)
 #define	FPSR_EX		(FPSCR_VE|FPSCR_OE|FPSCR_UE|FPSCR_ZE|FPSCR_XE)
 #define	FPSR_EXOP	(FPSR_EX_MSK&(~FPSR_EX))
 
 int fpe_debug = 0;
 
 #ifdef DEBUG
 vm_offset_t opc_disasm(vm_offset_t, int);
 
 /*
  * Dump a `fpn' structure.
  */
 void
 fpu_dumpfpn(struct fpn *fp)
 {
 	static const char *class[] = {
 		"SNAN", "QNAN", "ZERO", "NUM", "INF"
 	};
 
 	printf("%s %c.%x %x %x %xE%d", class[fp->fp_class + 2],
 		fp->fp_sign ? '-' : ' ',
 		fp->fp_mant[0],	fp->fp_mant[1],
 		fp->fp_mant[2], fp->fp_mant[3], 
 		fp->fp_exp);
 }
 #endif
 
 /*
  * fpu_execute returns the following error numbers (0 = no error):
  */
 #define	FPE		1	/* take a floating point exception */
 #define	NOTFPU		2	/* not an FPU instruction */
 #define	FAULT		3
 
 /*
  * Emulate a floating-point instruction.
  * Return zero for success, else signal number.
  * (Typically: zero, SIGFPE, SIGILL, SIGSEGV)
  */
 int
 fpu_emulate(struct trapframe *frame, struct fpu *fpf)
 {
 	union instr insn;
 	struct fpemu fe;
 	int sig;
 
 	/* initialize insn.is_datasize to tell it is *not* initialized */
 	fe.fe_fpstate = fpf;
 	fe.fe_cx = 0;
 
 	/* always set this (to avoid a warning) */
 
 	if (copyin((void *) (frame->srr0), &insn.i_int, sizeof (insn.i_int))) {
 #ifdef DEBUG
 		printf("fpu_emulate: fault reading opcode\n");
 #endif
 		return SIGSEGV;
 	}
 
 	DPRINTF(FPE_EX, ("fpu_emulate: emulating insn %x at %p\n",
 	    insn.i_int, (void *)frame->srr0));
 
 	if ((insn.i_any.i_opcd == OPC_TWI) ||
 	    ((insn.i_any.i_opcd == OPC_integer_31) &&
 	    (insn.i_x.i_xo == OPC31_TW))) {
 		/* Check for the two trap insns. */
 		DPRINTF(FPE_EX, ("fpu_emulate: SIGTRAP\n"));
 		return (SIGTRAP);
 	}
 	sig = 0;
 	switch (fpu_execute(frame, &fe, &insn)) {
 	case 0:
 		DPRINTF(FPE_EX, ("fpu_emulate: success\n"));
 		frame->srr0 += 4;
 		break;
 
 	case FPE:
 		DPRINTF(FPE_EX, ("fpu_emulate: SIGFPE\n"));
 		sig = SIGFPE;
 		break;
 
 	case FAULT:
 		DPRINTF(FPE_EX, ("fpu_emulate: SIGSEGV\n"));
 		sig = SIGSEGV;
 		break;
 
 	case NOTFPU:
 	default:
 		DPRINTF(FPE_EX, ("fpu_emulate: SIGILL\n"));
 #ifdef DEBUG
 		if (fpe_debug & FPE_EX) {
 			printf("fpu_emulate:  illegal insn %x at %p:",
 			insn.i_int, (void *) (frame->srr0));
 			opc_disasm(frame->srr0, insn.i_int);
 		}
 #endif
 		sig = SIGILL;
 #ifdef DEBUG
 		if (fpe_debug & FPE_EX)
 			kdb_enter(KDB_WHY_UNSET, "illegal instruction");
 #endif
 		break;
 	}
 
 	return (sig);
 }
 
 /*
  * Execute an FPU instruction (one that runs entirely in the FPU; not
  * FBfcc or STF, for instance).  On return, fe->fe_fs->fs_fsr will be
  * modified to reflect the setting the hardware would have left.
  *
  * Note that we do not catch all illegal opcodes, so you can, for instance,
  * multiply two integers this way.
  */
 int
 fpu_execute(struct trapframe *tf, struct fpemu *fe, union instr *insn)
 {
 	struct fpn *fp;
 	union instr instr = *insn;
 	int *a;
 	vm_offset_t addr;
 	int ra, rb, rc, rt, type, mask, fsr, cx, bf, setcr;
 	unsigned int cond;
 	struct fpu *fs;
 
 	/* Setup work. */
 	fp = NULL;
 	fs = fe->fe_fpstate;
 	fe->fe_fpscr = ((int *)&fs->fpscr)[1];
 
 	/*
 	 * On PowerPC all floating point values are stored in registers
 	 * as doubles, even when used for single precision operations.
 	 */
 	type = FTYPE_DBL;
 	cond = instr.i_any.i_rc;
 	setcr = 0;
 	bf = 0;	/* XXX gcc */
 
 #if defined(DDB) && defined(DEBUG)
 	if (fpe_debug & FPE_EX) {
 		vm_offset_t loc = tf->srr0;
 
 		printf("Trying to emulate: %p ", (void *)loc);
 		opc_disasm(loc, instr.i_int);
 	}
 #endif
 
 	/*
 	 * `Decode' and execute instruction.
 	 */
 
 	if ((instr.i_any.i_opcd >= OPC_LFS && instr.i_any.i_opcd <= OPC_STFDU) ||
 	    instr.i_any.i_opcd == OPC_integer_31) {
 		/*
 		 * Handle load/store insns:
 		 *
 		 * Convert to/from single if needed, calculate addr,
 		 * and update index reg if needed.
 		 */
 		double buf;
 		size_t size = sizeof(float);
 		int store, update;
 
 		cond = 0; /* ld/st never set condition codes */
 
 		if (instr.i_any.i_opcd == OPC_integer_31) {
 			if (instr.i_x.i_xo == OPC31_STFIWX) {
 				FPU_EMU_EVCNT_INCR(stfiwx);
 
 				/* Store as integer */
 				ra = instr.i_x.i_ra;
 				rb = instr.i_x.i_rb;
 				DPRINTF(FPE_INSN,
 					("reg %d has %jx reg %d has %jx\n",
 					ra, (uintmax_t)tf->fixreg[ra], rb,
 					(uintmax_t)tf->fixreg[rb]));
 
 				addr = tf->fixreg[rb];
 				if (ra != 0)
 					addr += tf->fixreg[ra];
 				rt = instr.i_x.i_rt;
 				a = (int *)&fs->fpr[rt].fpr;
 				DPRINTF(FPE_INSN,
 					("fpu_execute: Store INT %x at %p\n",
 						a[1], (void *)addr));
 				if (copyout(&a[1], (void *)addr, sizeof(int)))
 					return (FAULT);
 				return (0);
 			}
 
 			if ((instr.i_x.i_xo & OPC31_FPMASK) != OPC31_FPOP)
 				/* Not an indexed FP load/store op */
 				return (NOTFPU);
 
 			store = (instr.i_x.i_xo & 0x80);
 			if (instr.i_x.i_xo & 0x40)
 				size = sizeof(double);
 			else
 				type = FTYPE_SNG;
 			update = (instr.i_x.i_xo & 0x20);
 			
 			/* calculate EA of load/store */
 			ra = instr.i_x.i_ra;
 			rb = instr.i_x.i_rb;
 			DPRINTF(FPE_INSN, ("reg %d has %jx reg %d has %jx\n",
 				ra, (uintmax_t)tf->fixreg[ra], rb,
 				(uintmax_t)tf->fixreg[rb]));
 			addr = tf->fixreg[rb];
 			if (ra != 0)
 				addr += tf->fixreg[ra];
 			rt = instr.i_x.i_rt;
 		} else {
 			store = instr.i_d.i_opcd & 0x4;
 			if (instr.i_d.i_opcd & 0x2)
 				size = sizeof(double);
 			else
 				type = FTYPE_SNG;
 			update = instr.i_d.i_opcd & 0x1;
 
 			/* calculate EA of load/store */
 			ra = instr.i_d.i_ra;
 			addr = instr.i_d.i_d;
 			DPRINTF(FPE_INSN, ("reg %d has %jx displ %jx\n",
 				ra, (uintmax_t)tf->fixreg[ra],
 				(uintmax_t)addr));
 			if (ra != 0)
 				addr += tf->fixreg[ra];
 			rt = instr.i_d.i_rt;
 		}
 
 		if (update && ra == 0)
 			return (NOTFPU);
 
 		if (store) {
 			/* Store */
 			FPU_EMU_EVCNT_INCR(fpstore);
 			if (type != FTYPE_DBL) {
 				DPRINTF(FPE_INSN,
 					("fpu_execute: Store SNG at %p\n",
 						(void *)addr));
 				fpu_explode(fe, fp = &fe->fe_f1, FTYPE_DBL, rt);
 				fpu_implode(fe, fp, type, (void *)&buf);
 				if (copyout(&buf, (void *)addr, size))
 					return (FAULT);
 			} else {
 				DPRINTF(FPE_INSN, 
 					("fpu_execute: Store DBL at %p\n",
 						(void *)addr));
 				if (copyout(&fs->fpr[rt].fpr, (void *)addr,
 				    size))
 					return (FAULT);
 			}
 		} else {
 			/* Load */
 			FPU_EMU_EVCNT_INCR(fpload);
 			DPRINTF(FPE_INSN, ("fpu_execute: Load from %p\n",
 				(void *)addr));
 			if (copyin((const void *)addr, &fs->fpr[rt].fpr,
 			    size))
 				return (FAULT);
 			if (type != FTYPE_DBL) {
 				fpu_explode(fe, fp = &fe->fe_f1, type, rt);
 				fpu_implode(fe, fp, FTYPE_DBL, 
 					(u_int *)&fs->fpr[rt].fpr);
 			}
 		}
 		if (update) 
 			tf->fixreg[ra] = addr;
 		/* Complete. */
 		return (0);
 #ifdef notyet
 	} else if (instr.i_any.i_opcd == OPC_load_st_62) {
 		/* These are 64-bit extensions */
 		return (NOTFPU);
 #endif
 	} else if (instr.i_any.i_opcd == OPC_sp_fp_59 ||
 		instr.i_any.i_opcd == OPC_dp_fp_63) {
 		if (instr.i_any.i_opcd == OPC_dp_fp_63 &&
 		    !(instr.i_a.i_xo & OPC63M_MASK)) {
 			/* Format X */
 			rt = instr.i_x.i_rt;
 			ra = instr.i_x.i_ra;
 			rb = instr.i_x.i_rb;
 
 			/* One of the special opcodes.... */
 			switch (instr.i_x.i_xo) {
 			case	OPC63_FCMPU:
 				FPU_EMU_EVCNT_INCR(fcmpu);
 				DPRINTF(FPE_INSN, ("fpu_execute: FCMPU\n"));
 				rt >>= 2;
 				fpu_explode(fe, &fe->fe_f1, type, ra);
 				fpu_explode(fe, &fe->fe_f2, type, rb);
 				fpu_compare(fe, 0);
 				/* Make sure we do the condition regs. */
 				cond = 0;
 				/* N.B.: i_rs is already left shifted by two. */
 				bf = instr.i_x.i_rs & 0xfc;
 				setcr = 1;
 				break;
 
 			case	OPC63_FRSP:
 				/*
 				 * Convert to single: 
 				 *
 				 * PowerPC uses this to round a double
 				 * precision value to single precision,
 				 * but values in registers are always 
 				 * stored in double precision format.
 				 */
 				FPU_EMU_EVCNT_INCR(frsp);
 				DPRINTF(FPE_INSN, ("fpu_execute: FRSP\n"));
 				fpu_explode(fe, fp = &fe->fe_f1, FTYPE_DBL, rb);
 				fpu_implode(fe, fp, FTYPE_SNG, 
 					(u_int *)&fs->fpr[rt].fpr);
 				fpu_explode(fe, fp = &fe->fe_f1, FTYPE_SNG, rt);
 				type = FTYPE_DBL;
 				break;
 			case	OPC63_FCTIW:
 			case	OPC63_FCTIWZ:
 				FPU_EMU_EVCNT_INCR(fctiw);
 				DPRINTF(FPE_INSN, ("fpu_execute: FCTIW\n"));
 				fpu_explode(fe, fp = &fe->fe_f1, type, rb);
 				type = FTYPE_INT;
 				break;
 			case	OPC63_FCMPO:
 				FPU_EMU_EVCNT_INCR(fcmpo);
 				DPRINTF(FPE_INSN, ("fpu_execute: FCMPO\n"));
 				rt >>= 2;
 				fpu_explode(fe, &fe->fe_f1, type, ra);
 				fpu_explode(fe, &fe->fe_f2, type, rb);
 				fpu_compare(fe, 1);
 				/* Make sure we do the condition regs. */
 				cond = 0;
 				/* N.B.: i_rs is already left shifted by two. */
 				bf = instr.i_x.i_rs & 0xfc;
 				setcr = 1;
 				break;
 			case	OPC63_MTFSB1:
 				FPU_EMU_EVCNT_INCR(mtfsb1);
 				DPRINTF(FPE_INSN, ("fpu_execute: MTFSB1\n"));
 				fe->fe_fpscr |= 
 					(~(FPSCR_VX|FPSR_EX) & (1<<(31-rt)));
 				break;
 			case	OPC63_FNEG:
 				FPU_EMU_EVCNT_INCR(fnegabs);
 				DPRINTF(FPE_INSN, ("fpu_execute: FNEGABS\n"));
 				memcpy(&fs->fpr[rt].fpr, &fs->fpr[rb].fpr,
 					sizeof(double));
 				a = (int *)&fs->fpr[rt].fpr;
 				*a ^= (1U << 31);
 				break;
 			case	OPC63_MCRFS:
 				FPU_EMU_EVCNT_INCR(mcrfs);
 				DPRINTF(FPE_INSN, ("fpu_execute: MCRFS\n"));
 				cond = 0;
 				rt &= 0x1c;
 				ra &= 0x1c;
 				/* Extract the bits we want */
 				mask = (fe->fe_fpscr >> (28 - ra)) & 0xf;
 				/* Clear the bits we copied. */
 				fe->fe_cx =
 					(FPSR_EX_MSK | (0xf << (28 - ra)));
 				fe->fe_fpscr &= fe->fe_cx;
 				/* Now shove them in the right part of cr */
 				tf->cr &= ~(0xf << (28 - rt));
 				tf->cr |= (mask << (28 - rt));
 				break;
 			case	OPC63_MTFSB0:
 				FPU_EMU_EVCNT_INCR(mtfsb0);
 				DPRINTF(FPE_INSN, ("fpu_execute: MTFSB0\n"));
 				fe->fe_fpscr &=
 					((FPSCR_VX|FPSR_EX) & ~(1<<(31-rt)));
 				break;
 			case	OPC63_FMR:
 				FPU_EMU_EVCNT_INCR(fmr);
 				DPRINTF(FPE_INSN, ("fpu_execute: FMR\n"));
 				memcpy(&fs->fpr[rt].fpr, &fs->fpr[rb].fpr,
 					sizeof(double));
 				break;
 			case	OPC63_MTFSFI:
 				FPU_EMU_EVCNT_INCR(mtfsfi);
 				DPRINTF(FPE_INSN, ("fpu_execute: MTFSFI\n"));
 				rb >>= 1;
 				rt &= 0x1c; /* Already left-shifted 4 */
 				fe->fe_cx = rb << (28 - rt);
 				mask = 0xf<<(28 - rt);
 				fe->fe_fpscr = (fe->fe_fpscr & ~mask) | 
 					fe->fe_cx;
 /* XXX weird stuff about OX, FX, FEX, and VX should be handled */
 				break;
 			case	OPC63_FNABS:
 				FPU_EMU_EVCNT_INCR(fnabs);
 				DPRINTF(FPE_INSN, ("fpu_execute: FABS\n"));
 				memcpy(&fs->fpr[rt].fpr, &fs->fpr[rb].fpr,
 					sizeof(double));
 				a = (int *)&fs->fpr[rt].fpr;
 				*a |= (1U << 31);
 				break;
 			case	OPC63_FABS:
 				FPU_EMU_EVCNT_INCR(fabs);
 				DPRINTF(FPE_INSN, ("fpu_execute: FABS\n"));
 				memcpy(&fs->fpr[rt].fpr, &fs->fpr[rb].fpr,
 					sizeof(double));
 				a = (int *)&fs->fpr[rt].fpr;
 				*a &= ~(1U << 31);
 				break;
 			case	OPC63_MFFS:
 				FPU_EMU_EVCNT_INCR(mffs);
 				DPRINTF(FPE_INSN, ("fpu_execute: MFFS\n"));
 				memcpy(&fs->fpr[rt].fpr, &fs->fpscr,
 					sizeof(fs->fpscr));
 				break;
 			case	OPC63_MTFSF:
 				FPU_EMU_EVCNT_INCR(mtfsf);
 				DPRINTF(FPE_INSN, ("fpu_execute: MTFSF\n"));
 				if ((rt = instr.i_xfl.i_flm) == -1)
 					mask = -1;
 				else {
 					mask = 0;
 					/* Convert 1 bit -> 4 bits */
 					for (ra = 0; ra < 8; ra ++)
 						if (rt & (1<<ra))
 							mask |= (0xf<<(4*ra));
 				}
 				a = (int *)&fs->fpr[rt].fpr;
 				fe->fe_cx = mask & a[1];
 				fe->fe_fpscr = (fe->fe_fpscr&~mask) | 
 					(fe->fe_cx);
 /* XXX weird stuff about OX, FX, FEX, and VX should be handled */
 				break;
 			case	OPC63_FCTID:
 			case	OPC63_FCTIDZ:
 				FPU_EMU_EVCNT_INCR(fctid);
 				DPRINTF(FPE_INSN, ("fpu_execute: FCTID\n"));
 				fpu_explode(fe, fp = &fe->fe_f1, type, rb);
 				type = FTYPE_LNG;
 				break;
 			case	OPC63_FCFID:
 				FPU_EMU_EVCNT_INCR(fcfid);
 				DPRINTF(FPE_INSN, ("fpu_execute: FCFID\n"));
 				type = FTYPE_LNG;
 				fpu_explode(fe, fp = &fe->fe_f1, type, rb);
 				type = FTYPE_DBL;
 				break;
 			default:
 				return (NOTFPU);
 				break;
 			}
 		} else {
 			/* Format A */
 			rt = instr.i_a.i_frt;
 			ra = instr.i_a.i_fra;
 			rb = instr.i_a.i_frb;
 			rc = instr.i_a.i_frc;
 
 			/*
 			 * All arithmetic operations work on registers, which
 			 * are stored as doubles.
 			 */
 			type = FTYPE_DBL;
 			switch ((unsigned int)instr.i_a.i_xo) {
 			case	OPC59_FDIVS:
 				FPU_EMU_EVCNT_INCR(fdiv);
 				DPRINTF(FPE_INSN, ("fpu_execute: FDIV\n"));
 				fpu_explode(fe, &fe->fe_f1, type, ra);
 				fpu_explode(fe, &fe->fe_f2, type, rb);
 				fp = fpu_div(fe);
 				break;
 			case	OPC59_FSUBS:
 				FPU_EMU_EVCNT_INCR(fsub);
 				DPRINTF(FPE_INSN, ("fpu_execute: FSUB\n"));
 				fpu_explode(fe, &fe->fe_f1, type, ra);
 				fpu_explode(fe, &fe->fe_f2, type, rb);
 				fp = fpu_sub(fe);
 				break;
 			case	OPC59_FADDS:
 				FPU_EMU_EVCNT_INCR(fadd);
 				DPRINTF(FPE_INSN, ("fpu_execute: FADD\n"));
 				fpu_explode(fe, &fe->fe_f1, type, ra);
 				fpu_explode(fe, &fe->fe_f2, type, rb);
 				fp = fpu_add(fe);
 				break;
 			case	OPC59_FSQRTS:
 				FPU_EMU_EVCNT_INCR(fsqrt);
 				DPRINTF(FPE_INSN, ("fpu_execute: FSQRT\n"));
 				fpu_explode(fe, &fe->fe_f1, type, rb);
 				fp = fpu_sqrt(fe);
 				break;
 			case	OPC63M_FSEL:
 				FPU_EMU_EVCNT_INCR(fsel);
 				DPRINTF(FPE_INSN, ("fpu_execute: FSEL\n"));
 				a = (int *)&fe->fe_fpstate->fpr[ra].fpr;
 				if ((*a & 0x80000000) && (*a & 0x7fffffff)) 
 					/* fra < 0 */
 					rc = rb;
 				DPRINTF(FPE_INSN, ("f%d => f%d\n", rc, rt));
 				memcpy(&fs->fpr[rt].fpr, &fs->fpr[rc].fpr,
 					sizeof(double));
 				break;
 			case	OPC59_FRES:
 				FPU_EMU_EVCNT_INCR(fpres);
 				DPRINTF(FPE_INSN, ("fpu_execute: FPRES\n"));
 				fpu_explode(fe, &fe->fe_f1, type, rb);
 				fp = fpu_sqrt(fe);
 				/* now we've gotta overwrite the dest reg */
 				*((int *)&fe->fe_fpstate->fpr[rt].fpr) = 1;
 				fpu_explode(fe, &fe->fe_f1, FTYPE_INT, rt);
 				fpu_div(fe);
 				break;
 			case	OPC59_FMULS:
 				FPU_EMU_EVCNT_INCR(fmul);
 				DPRINTF(FPE_INSN, ("fpu_execute: FMUL\n"));
 				fpu_explode(fe, &fe->fe_f1, type, ra);
 				fpu_explode(fe, &fe->fe_f2, type, rc);
 				fp = fpu_mul(fe);
 				break;
 			case	OPC63M_FRSQRTE:
 				/* Reciprocal sqrt() estimate */
 				FPU_EMU_EVCNT_INCR(frsqrte);
 				DPRINTF(FPE_INSN, ("fpu_execute: FRSQRTE\n"));
 				fpu_explode(fe, &fe->fe_f1, type, rb);
 				fp = fpu_sqrt(fe);
 				fe->fe_f2 = *fp;
 				/* now we've gotta overwrite the dest reg */
 				*((int *)&fe->fe_fpstate->fpr[rt].fpr) = 1;
 				fpu_explode(fe, &fe->fe_f1, FTYPE_INT, rt);
 				fpu_div(fe);
 				break;
 			case	OPC59_FMSUBS:
 				FPU_EMU_EVCNT_INCR(fmulsub);
 				DPRINTF(FPE_INSN, ("fpu_execute: FMULSUB\n"));
 				fpu_explode(fe, &fe->fe_f1, type, ra);
 				fpu_explode(fe, &fe->fe_f2, type, rc);
 				fp = fpu_mul(fe);
 				fe->fe_f1 = *fp;
 				fpu_explode(fe, &fe->fe_f2, type, rb);
 				fp = fpu_sub(fe);
 				break;
 			case	OPC59_FMADDS:
 				FPU_EMU_EVCNT_INCR(fmuladd);
 				DPRINTF(FPE_INSN, ("fpu_execute: FMULADD\n"));
 				fpu_explode(fe, &fe->fe_f1, type, ra);
 				fpu_explode(fe, &fe->fe_f2, type, rc);
 				fp = fpu_mul(fe);
 				fe->fe_f1 = *fp;
 				fpu_explode(fe, &fe->fe_f2, type, rb);
 				fp = fpu_add(fe);
 				break;
 			case	OPC59_FNMSUBS:
 				FPU_EMU_EVCNT_INCR(fnmsub);
 				DPRINTF(FPE_INSN, ("fpu_execute: FNMSUB\n"));
 				fpu_explode(fe, &fe->fe_f1, type, ra);
 				fpu_explode(fe, &fe->fe_f2, type, rc);
 				fp = fpu_mul(fe);
 				fe->fe_f1 = *fp;
 				fpu_explode(fe, &fe->fe_f2, type, rb);
 				fp = fpu_sub(fe);
 				/* Negate */
 				fp->fp_sign ^= 1;
 				break;
 			case	OPC59_FNMADDS:
 				FPU_EMU_EVCNT_INCR(fnmadd);
 				DPRINTF(FPE_INSN, ("fpu_execute: FNMADD\n"));
 				fpu_explode(fe, &fe->fe_f1, type, ra);
 				fpu_explode(fe, &fe->fe_f2, type, rc);
 				fp = fpu_mul(fe);
 				fe->fe_f1 = *fp;
 				fpu_explode(fe, &fe->fe_f2, type, rb);
 				fp = fpu_add(fe);
 				/* Negate */
 				fp->fp_sign ^= 1;
 				break;
 			default:
 				return (NOTFPU);
 				break;
 			}
 
 			/* If the instruction was single precision, round */
 			if (!(instr.i_any.i_opcd & 0x4)) {
 				fpu_implode(fe, fp, FTYPE_SNG, 
 					(u_int *)&fs->fpr[rt].fpr);
 				fpu_explode(fe, fp = &fe->fe_f1, FTYPE_SNG, rt);
 			}
 		}
 	} else {
 		return (NOTFPU);
 	}
 
 	/*
 	 * ALU operation is complete.  Collapse the result and then check
 	 * for exceptions.  If we got any, and they are enabled, do not
 	 * alter the destination register, just stop with an exception.
 	 * Otherwise set new current exceptions and accrue.
 	 */
 	if (fp)
 		fpu_implode(fe, fp, type, (u_int *)&fs->fpr[rt].fpr);
 	cx = fe->fe_cx;
 	fsr = fe->fe_fpscr;
 	if (cx != 0) {
 		fsr &= ~FPSCR_FX;
 		if ((cx^fsr)&FPSR_EX_MSK)
 			fsr |= FPSCR_FX;
 		mask = fsr & FPSR_EX;
 		mask <<= (25-3);
 		if (cx & mask) 
 			fsr |= FPSCR_FEX;
 		if (cx & FPSCR_FPRF) {
 			/* Need to replace CC */
 			fsr &= ~FPSCR_FPRF;
 		}
 		if (cx & (FPSR_EXOP))
 			fsr |= FPSCR_VX;
 		fsr |= cx;
 		DPRINTF(FPE_INSN, ("fpu_execute: cx %x, fsr %x\n", cx, fsr));
 	}
 
 	if (cond) {
 		cond = fsr & 0xf0000000;
 		/* Isolate condition codes */
 		cond >>= 28;
 		/* Move fpu condition codes to cr[1] */
 		tf->cr &= (0x0f000000);
 		tf->cr |= (cond<<24);
 		DPRINTF(FPE_INSN, ("fpu_execute: cr[1] <= %x\n", cond));
 	}
 
 	if (setcr) {
 		cond = fsr & FPSCR_FPCC;
 		/* Isolate condition codes */
 		cond <<= 16;
 		/* Move fpu condition codes to cr[1] */
 		tf->cr &= ~(0xf0000000>>bf);
 		tf->cr |= (cond>>bf);
 		DPRINTF(FPE_INSN, ("fpu_execute: cr[%d] (cr=%jx) <= %x\n",
 			bf/4, (uintmax_t)tf->cr, cond));
 	}
 
 	((int *)&fs->fpscr)[1] = fsr;
 	if (fsr & FPSCR_FEX)
 		return(FPE);
 	return (0);	/* success */
 }
diff --git a/sys/powerpc/fpu/fpu_implode.c b/sys/powerpc/fpu/fpu_implode.c
index 69a58bf79315..8ccf4b2f8243 100644
--- a/sys/powerpc/fpu/fpu_implode.c
+++ b/sys/powerpc/fpu/fpu_implode.c
@@ -1,459 +1,458 @@
 /*	$NetBSD: fpu_implode.c,v 1.6 2005/12/11 12:18:42 christos Exp $ */
 
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1992, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This software was developed by the Computer Systems Engineering group
  * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
  * contributed to Berkeley.
  *
  * All advertising materials mentioning features or use of this software
  * must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Lawrence Berkeley Laboratory.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)fpu_implode.c	8.1 (Berkeley) 6/11/93
  */
 
 /*
  * FPU subroutines: `implode' internal format numbers into the machine's
  * `packed binary' format.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/types.h>
 #include <sys/systm.h>
 
 #include <machine/fpu.h>
 #include <machine/ieee.h>
 #include <machine/ieeefp.h>
-#include <machine/reg.h>
 
 #include <powerpc/fpu/fpu_arith.h>
 #include <powerpc/fpu/fpu_emu.h>
 #include <powerpc/fpu/fpu_extern.h>
 #include <powerpc/fpu/fpu_instr.h>
 
 static int round(struct fpemu *, struct fpn *);
 static int toinf(struct fpemu *, int);
 
 /*
  * Round a number (algorithm from Motorola MC68882 manual, modified for
  * our internal format).  Set inexact exception if rounding is required.
  * Return true iff we rounded up.
  *
  * After rounding, we discard the guard and round bits by shifting right
  * 2 bits (a la fpu_shr(), but we do not bother with fp->fp_sticky).
  * This saves effort later.
  *
  * Note that we may leave the value 2.0 in fp->fp_mant; it is the caller's
  * responsibility to fix this if necessary.
  */
 static int
 round(struct fpemu *fe, struct fpn *fp)
 {
 	u_int m0, m1, m2, m3;
 	int gr, s;
 	FPU_DECL_CARRY;
 
 	m0 = fp->fp_mant[0];
 	m1 = fp->fp_mant[1];
 	m2 = fp->fp_mant[2];
 	m3 = fp->fp_mant[3];
 	gr = m3 & 3;
 	s = fp->fp_sticky;
 
 	/* mant >>= FP_NG */
 	m3 = (m3 >> FP_NG) | (m2 << (32 - FP_NG));
 	m2 = (m2 >> FP_NG) | (m1 << (32 - FP_NG));
 	m1 = (m1 >> FP_NG) | (m0 << (32 - FP_NG));
 	m0 >>= FP_NG;
 
 	if ((gr | s) == 0)	/* result is exact: no rounding needed */
 		goto rounddown;
 
 	fe->fe_cx |= FPSCR_XX|FPSCR_FI;	/* inexact */
 
 	/* Go to rounddown to round down; break to round up. */
 	switch ((fe->fe_fpscr) & FPSCR_RN) {
 	case FP_RN:
 	default:
 		/*
 		 * Round only if guard is set (gr & 2).  If guard is set,
 		 * but round & sticky both clear, then we want to round
 		 * but have a tie, so round to even, i.e., add 1 iff odd.
 		 */
 		if ((gr & 2) == 0)
 			goto rounddown;
 		if ((gr & 1) || fp->fp_sticky || (m3 & 1))
 			break;
 		goto rounddown;
 
 	case FP_RZ:
 		/* Round towards zero, i.e., down. */
 		goto rounddown;
 
 	case FP_RM:
 		/* Round towards -Inf: up if negative, down if positive. */
 		if (fp->fp_sign)
 			break;
 		goto rounddown;
 
 	case FP_RP:
 		/* Round towards +Inf: up if positive, down otherwise. */
 		if (!fp->fp_sign)
 			break;
 		goto rounddown;
 	}
 
 	/* Bump low bit of mantissa, with carry. */
 	fe->fe_cx |= FPSCR_FR;
 
 	FPU_ADDS(m3, m3, 1);
 	FPU_ADDCS(m2, m2, 0);
 	FPU_ADDCS(m1, m1, 0);
 	FPU_ADDC(m0, m0, 0);
 	fp->fp_mant[0] = m0;
 	fp->fp_mant[1] = m1;
 	fp->fp_mant[2] = m2;
 	fp->fp_mant[3] = m3;
 	return (1);
 
 rounddown:
 	fp->fp_mant[0] = m0;
 	fp->fp_mant[1] = m1;
 	fp->fp_mant[2] = m2;
 	fp->fp_mant[3] = m3;
 	return (0);
 }
 
 /*
  * For overflow: return true if overflow is to go to +/-Inf, according
  * to the sign of the overflowing result.  If false, overflow is to go
  * to the largest magnitude value instead.
  */
 static int
 toinf(struct fpemu *fe, int sign)
 {
 	int inf;
 
 	/* look at rounding direction */
 	switch ((fe->fe_fpscr) & FPSCR_RN) {
 	default:
 	case FP_RN:		/* the nearest value is always Inf */
 		inf = 1;
 		break;
 
 	case FP_RZ:		/* toward 0 => never towards Inf */
 		inf = 0;
 		break;
 
 	case FP_RP:		/* toward +Inf iff positive */
 		inf = sign == 0;
 		break;
 
 	case FP_RM:		/* toward -Inf iff negative */
 		inf = sign;
 		break;
 	}
 	if (inf)
 		fe->fe_cx |= FPSCR_OX;
 	return (inf);
 }
 
 /*
  * fpn -> int (int value returned as return value).
  *
  * N.B.: this conversion always rounds towards zero (this is a peculiarity
  * of the SPARC instruction set).
  */
 u_int
 fpu_ftoi(struct fpemu *fe, struct fpn *fp)
 {
 	u_int i;
 	int sign, exp;
 
 	sign = fp->fp_sign;
 	switch (fp->fp_class) {
 	case FPC_ZERO:
 		return (0);
 
 	case FPC_NUM:
 		/*
 		 * If exp >= 2^32, overflow.  Otherwise shift value right
 		 * into last mantissa word (this will not exceed 0xffffffff),
 		 * shifting any guard and round bits out into the sticky
 		 * bit.  Then ``round'' towards zero, i.e., just set an
 		 * inexact exception if sticky is set (see round()).
 		 * If the result is > 0x80000000, or is positive and equals
 		 * 0x80000000, overflow; otherwise the last fraction word
 		 * is the result.
 		 */
 		if ((exp = fp->fp_exp) >= 32)
 			break;
 		/* NB: the following includes exp < 0 cases */
 		if (fpu_shr(fp, FP_NMANT - 1 - exp) != 0)
 			fe->fe_cx |= FPSCR_UX;
 		i = fp->fp_mant[3];
 		if (i >= ((u_int)0x80000000 + sign))
 			break;
 		return (sign ? -i : i);
 
 	default:		/* Inf, qNaN, sNaN */
 		break;
 	}
 	/* overflow: replace any inexact exception with invalid */
 	fe->fe_cx |= FPSCR_VXCVI;
 	return (0x7fffffff + sign);
 }
 
 /*
  * fpn -> extended int (high bits of int value returned as return value).
  *
  * N.B.: this conversion always rounds towards zero (this is a peculiarity
  * of the SPARC instruction set).
  */
 u_int
 fpu_ftox(struct fpemu *fe, struct fpn *fp, u_int *res)
 {
 	u_int64_t i;
 	int sign, exp;
 
 	sign = fp->fp_sign;
 	switch (fp->fp_class) {
 	case FPC_ZERO:
 		res[1] = 0;
 		return (0);
 
 	case FPC_NUM:
 		/*
 		 * If exp >= 2^64, overflow.  Otherwise shift value right
 		 * into last mantissa word (this will not exceed 0xffffffffffffffff),
 		 * shifting any guard and round bits out into the sticky
 		 * bit.  Then ``round'' towards zero, i.e., just set an
 		 * inexact exception if sticky is set (see round()).
 		 * If the result is > 0x8000000000000000, or is positive and equals
 		 * 0x8000000000000000, overflow; otherwise the last fraction word
 		 * is the result.
 		 */
 		if ((exp = fp->fp_exp) >= 64)
 			break;
 		/* NB: the following includes exp < 0 cases */
 		if (fpu_shr(fp, FP_NMANT - 1 - exp) != 0)
 			fe->fe_cx |= FPSCR_UX;
 		i = ((u_int64_t)fp->fp_mant[2]<<32)|fp->fp_mant[3];
 		if (i >= ((u_int64_t)0x8000000000000000LL + sign))
 			break;
 		return (sign ? -i : i);
 
 	default:		/* Inf, qNaN, sNaN */
 		break;
 	}
 	/* overflow: replace any inexact exception with invalid */
 	fe->fe_cx |= FPSCR_VXCVI;
 	return (0x7fffffffffffffffLL + sign);
 }
 
 /*
  * fpn -> single (32 bit single returned as return value).
  * We assume <= 29 bits in a single-precision fraction (1.f part).
  */
 u_int
 fpu_ftos(struct fpemu *fe, struct fpn *fp)
 {
 	u_int sign = fp->fp_sign << 31;
 	int exp;
 
 #define	SNG_EXP(e)	((e) << SNG_FRACBITS)	/* makes e an exponent */
 #define	SNG_MASK	(SNG_EXP(1) - 1)	/* mask for fraction */
 
 	/* Take care of non-numbers first. */
 	if (ISNAN(fp)) {
 		/*
 		 * Preserve upper bits of NaN, per SPARC V8 appendix N.
 		 * Note that fp->fp_mant[0] has the quiet bit set,
 		 * even if it is classified as a signalling NaN.
 		 */
 		(void) fpu_shr(fp, FP_NMANT - 1 - SNG_FRACBITS);
 		exp = SNG_EXP_INFNAN;
 		goto done;
 	}
 	if (ISINF(fp))
 		return (sign | SNG_EXP(SNG_EXP_INFNAN));
 	if (ISZERO(fp))
 		return (sign);
 
 	/*
 	 * Normals (including subnormals).  Drop all the fraction bits
 	 * (including the explicit ``implied'' 1 bit) down into the
 	 * single-precision range.  If the number is subnormal, move
 	 * the ``implied'' 1 into the explicit range as well, and shift
 	 * right to introduce leading zeroes.  Rounding then acts
 	 * differently for normals and subnormals: the largest subnormal
 	 * may round to the smallest normal (1.0 x 2^minexp), or may
 	 * remain subnormal.  In the latter case, signal an underflow
 	 * if the result was inexact or if underflow traps are enabled.
 	 *
 	 * Rounding a normal, on the other hand, always produces another
 	 * normal (although either way the result might be too big for
 	 * single precision, and cause an overflow).  If rounding a
 	 * normal produces 2.0 in the fraction, we need not adjust that
 	 * fraction at all, since both 1.0 and 2.0 are zero under the
 	 * fraction mask.
 	 *
 	 * Note that the guard and round bits vanish from the number after
 	 * rounding.
 	 */
 	if ((exp = fp->fp_exp + SNG_EXP_BIAS) <= 0) {	/* subnormal */
 		/* -NG for g,r; -SNG_FRACBITS-exp for fraction */
 		(void) fpu_shr(fp, FP_NMANT - FP_NG - SNG_FRACBITS - exp);
 		if (round(fe, fp) && fp->fp_mant[3] == SNG_EXP(1))
 			return (sign | SNG_EXP(1) | 0);
 		if ((fe->fe_cx & FPSCR_FI) ||
 		    (fe->fe_fpscr & FPSCR_UX))
 			fe->fe_cx |= FPSCR_UX;
 		return (sign | SNG_EXP(0) | fp->fp_mant[3]);
 	}
 	/* -FP_NG for g,r; -1 for implied 1; -SNG_FRACBITS for fraction */
 	(void) fpu_shr(fp, FP_NMANT - FP_NG - 1 - SNG_FRACBITS);
 #ifdef DIAGNOSTIC
 	if ((fp->fp_mant[3] & SNG_EXP(1 << FP_NG)) == 0)
 		panic("fpu_ftos");
 #endif
 	if (round(fe, fp) && fp->fp_mant[3] == SNG_EXP(2))
 		exp++;
 	if (exp >= SNG_EXP_INFNAN) {
 		/* overflow to inf or to max single */
 		if (toinf(fe, sign))
 			return (sign | SNG_EXP(SNG_EXP_INFNAN));
 		return (sign | SNG_EXP(SNG_EXP_INFNAN - 1) | SNG_MASK);
 	}
 done:
 	/* phew, made it */
 	return (sign | SNG_EXP(exp) | (fp->fp_mant[3] & SNG_MASK));
 }
 
 /*
  * fpn -> double (32 bit high-order result returned; 32-bit low order result
  * left in res[1]).  Assumes <= 61 bits in double precision fraction.
  *
  * This code mimics fpu_ftos; see it for comments.
  */
 u_int
 fpu_ftod(struct fpemu *fe, struct fpn *fp, u_int *res)
 {
 	u_int sign = fp->fp_sign << 31;
 	int exp;
 
 #define	DBL_EXP(e)	((e) << (DBL_FRACBITS & 31))
 #define	DBL_MASK	(DBL_EXP(1) - 1)
 
 	if (ISNAN(fp)) {
 		(void) fpu_shr(fp, FP_NMANT - 1 - DBL_FRACBITS);
 		exp = DBL_EXP_INFNAN;
 		goto done;
 	}
 	if (ISINF(fp)) {
 		sign |= DBL_EXP(DBL_EXP_INFNAN);
 		goto zero;
 	}
 	if (ISZERO(fp)) {
 zero:		res[1] = 0;
 		return (sign);
 	}
 
 	if ((exp = fp->fp_exp + DBL_EXP_BIAS) <= 0) {
 		(void) fpu_shr(fp, FP_NMANT - FP_NG - DBL_FRACBITS - exp);
 		if (round(fe, fp) && fp->fp_mant[2] == DBL_EXP(1)) {
 			res[1] = 0;
 			return (sign | DBL_EXP(1) | 0);
 		}
 		if ((fe->fe_cx & FPSCR_FI) ||
 		    (fe->fe_fpscr & FPSCR_UX))
 			fe->fe_cx |= FPSCR_UX;
 		exp = 0;
 		goto done;
 	}
 	(void) fpu_shr(fp, FP_NMANT - FP_NG - 1 - DBL_FRACBITS);
 	if (round(fe, fp) && fp->fp_mant[2] == DBL_EXP(2))
 		exp++;
 	if (exp >= DBL_EXP_INFNAN) {
 		fe->fe_cx |= FPSCR_OX | FPSCR_UX;
 		if (toinf(fe, sign)) {
 			res[1] = 0;
 			return (sign | DBL_EXP(DBL_EXP_INFNAN) | 0);
 		}
 		res[1] = ~0;
 		return (sign | DBL_EXP(DBL_EXP_INFNAN) | DBL_MASK);
 	}
 done:
 	res[1] = fp->fp_mant[3];
 	return (sign | DBL_EXP(exp) | (fp->fp_mant[2] & DBL_MASK));
 }
 
 /*
  * Implode an fpn, writing the result into the given space.
  */
 void
 fpu_implode(struct fpemu *fe, struct fpn *fp, int type, u_int *space)
 {
 
 	switch (type) {
 	case FTYPE_LNG:
 		space[0] = fpu_ftox(fe, fp, space);
 		DPRINTF(FPE_REG, ("fpu_implode: long %x %x\n",
 			space[0], space[1]));
 		break;
 
 	case FTYPE_INT:
 		space[0] = 0;
 		space[1] = fpu_ftoi(fe, fp);
 		DPRINTF(FPE_REG, ("fpu_implode: int %x\n",
 			space[1]));
 		break;
 
 	case FTYPE_SNG:
 		space[0] = fpu_ftos(fe, fp);
 		DPRINTF(FPE_REG, ("fpu_implode: single %x\n",
 			space[0]));
 		break;
 
 	case FTYPE_DBL:
 		space[0] = fpu_ftod(fe, fp, space);
 		DPRINTF(FPE_REG, ("fpu_implode: double %x %x\n",
 			space[0], space[1]));
 		break;		break;
 
 	default:
 		panic("fpu_implode: invalid type %d", type);
 	}
 }
diff --git a/sys/powerpc/fpu/fpu_mul.c b/sys/powerpc/fpu/fpu_mul.c
index 609b261492a4..336ddaee9607 100644
--- a/sys/powerpc/fpu/fpu_mul.c
+++ b/sys/powerpc/fpu/fpu_mul.c
@@ -1,241 +1,240 @@
 /*	$NetBSD: fpu_mul.c,v 1.4 2005/12/11 12:18:42 christos Exp $ */
 
 /*
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1992, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This software was developed by the Computer Systems Engineering group
  * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
  * contributed to Berkeley.
  *
  * All advertising materials mentioning features or use of this software
  * must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Lawrence Berkeley Laboratory.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)fpu_mul.c	8.1 (Berkeley) 6/11/93
  */
 
 /*
  * Perform an FPU multiply (return x * y).
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/types.h>
 #include <sys/systm.h>
 
 #include <machine/fpu.h>
-#include <machine/reg.h>
 
 #include <powerpc/fpu/fpu_arith.h>
 #include <powerpc/fpu/fpu_emu.h>
 
 /*
  * The multiplication algorithm for normal numbers is as follows:
  *
  * The fraction of the product is built in the usual stepwise fashion.
  * Each step consists of shifting the accumulator right one bit
  * (maintaining any guard bits) and, if the next bit in y is set,
  * adding the multiplicand (x) to the accumulator.  Then, in any case,
  * we advance one bit leftward in y.  Algorithmically:
  *
  *	A = 0;
  *	for (bit = 0; bit < FP_NMANT; bit++) {
  *		sticky |= A & 1, A >>= 1;
  *		if (Y & (1 << bit))
  *			A += X;
  *	}
  *
  * (X and Y here represent the mantissas of x and y respectively.)
  * The resultant accumulator (A) is the product's mantissa.  It may
  * be as large as 11.11111... in binary and hence may need to be
  * shifted right, but at most one bit.
  *
  * Since we do not have efficient multiword arithmetic, we code the
  * accumulator as four separate words, just like any other mantissa.
  * We use local variables in the hope that this is faster than memory.
  * We keep x->fp_mant in locals for the same reason.
  *
  * In the algorithm above, the bits in y are inspected one at a time.
  * We will pick them up 32 at a time and then deal with those 32, one
  * at a time.  Note, however, that we know several things about y:
  *
  *    - the guard and round bits at the bottom are sure to be zero;
  *
  *    - often many low bits are zero (y is often from a single or double
  *	precision source);
  *
  *    - bit FP_NMANT-1 is set, and FP_1*2 fits in a word.
  *
  * We can also test for 32-zero-bits swiftly.  In this case, the center
  * part of the loop---setting sticky, shifting A, and not adding---will
  * run 32 times without adding X to A.  We can do a 32-bit shift faster
  * by simply moving words.  Since zeros are common, we optimize this case.
  * Furthermore, since A is initially zero, we can omit the shift as well
  * until we reach a nonzero word.
  */
 struct fpn *
 fpu_mul(struct fpemu *fe)
 {
 	struct fpn *x = &fe->fe_f1, *y = &fe->fe_f2;
 	u_int a3, a2, a1, a0, x3, x2, x1, x0, bit, m;
 	int sticky;
 	FPU_DECL_CARRY;
 
 	/*
 	 * Put the `heavier' operand on the right (see fpu_emu.h).
 	 * Then we will have one of the following cases, taken in the
 	 * following order:
 	 *
 	 *  - y = NaN.  Implied: if only one is a signalling NaN, y is.
 	 *	The result is y.
 	 *  - y = Inf.  Implied: x != NaN (is 0, number, or Inf: the NaN
 	 *    case was taken care of earlier).
 	 *	If x = 0, the result is NaN.  Otherwise the result
 	 *	is y, with its sign reversed if x is negative.
 	 *  - x = 0.  Implied: y is 0 or number.
 	 *	The result is 0 (with XORed sign as usual).
 	 *  - other.  Implied: both x and y are numbers.
 	 *	The result is x * y (XOR sign, multiply bits, add exponents).
 	 */
 	DPRINTF(FPE_REG, ("fpu_mul:\n"));
 	DUMPFPN(FPE_REG, x);
 	DUMPFPN(FPE_REG, y);
 	DPRINTF(FPE_REG, ("=>\n"));
 
 	ORDER(x, y);
 	if (ISNAN(y)) {
 		y->fp_sign ^= x->fp_sign;
 		fe->fe_cx |= FPSCR_VXSNAN;
 		DUMPFPN(FPE_REG, y);
 		return (y);
 	}
 	if (ISINF(y)) {
 		if (ISZERO(x)) {
 			fe->fe_cx |= FPSCR_VXIMZ;
 			return (fpu_newnan(fe));
 		}
 		y->fp_sign ^= x->fp_sign;
 			DUMPFPN(FPE_REG, y);
 		return (y);
 	}
 	if (ISZERO(x)) {
 		x->fp_sign ^= y->fp_sign;
 		DUMPFPN(FPE_REG, x);
 		return (x);
 	}
 
 	/*
 	 * Setup.  In the code below, the mask `m' will hold the current
 	 * mantissa byte from y.  The variable `bit' denotes the bit
 	 * within m.  We also define some macros to deal with everything.
 	 */
 	x3 = x->fp_mant[3];
 	x2 = x->fp_mant[2];
 	x1 = x->fp_mant[1];
 	x0 = x->fp_mant[0];
 	sticky = a3 = a2 = a1 = a0 = 0;
 
 #define	ADD	/* A += X */ \
 	FPU_ADDS(a3, a3, x3); \
 	FPU_ADDCS(a2, a2, x2); \
 	FPU_ADDCS(a1, a1, x1); \
 	FPU_ADDC(a0, a0, x0)
 
 #define	SHR1	/* A >>= 1, with sticky */ \
 	sticky |= a3 & 1, a3 = (a3 >> 1) | (a2 << 31), \
 	a2 = (a2 >> 1) | (a1 << 31), a1 = (a1 >> 1) | (a0 << 31), a0 >>= 1
 
 #define	SHR32	/* A >>= 32, with sticky */ \
 	sticky |= a3, a3 = a2, a2 = a1, a1 = a0, a0 = 0
 
 #define	STEP	/* each 1-bit step of the multiplication */ \
 	SHR1; if (bit & m) { ADD; }; bit <<= 1
 
 	/*
 	 * We are ready to begin.  The multiply loop runs once for each
 	 * of the four 32-bit words.  Some words, however, are special.
 	 * As noted above, the low order bits of Y are often zero.  Even
 	 * if not, the first loop can certainly skip the guard bits.
 	 * The last word of y has its highest 1-bit in position FP_NMANT-1,
 	 * so we stop the loop when we move past that bit.
 	 */
 	if ((m = y->fp_mant[3]) == 0) {
 		/* SHR32; */			/* unneeded since A==0 */
 	} else {
 		bit = 1 << FP_NG;
 		do {
 			STEP;
 		} while (bit != 0);
 	}
 	if ((m = y->fp_mant[2]) == 0) {
 		SHR32;
 	} else {
 		bit = 1;
 		do {
 			STEP;
 		} while (bit != 0);
 	}
 	if ((m = y->fp_mant[1]) == 0) {
 		SHR32;
 	} else {
 		bit = 1;
 		do {
 			STEP;
 		} while (bit != 0);
 	}
 	m = y->fp_mant[0];		/* definitely != 0 */
 	bit = 1;
 	do {
 		STEP;
 	} while (bit <= m);
 
 	/*
 	 * Done with mantissa calculation.  Get exponent and handle
 	 * 11.111...1 case, then put result in place.  We reuse x since
 	 * it already has the right class (FP_NUM).
 	 */
 	m = x->fp_exp + y->fp_exp;
 	if (a0 >= FP_2) {
 		SHR1;
 		m++;
 	}
 	x->fp_sign ^= y->fp_sign;
 	x->fp_exp = m;
 	x->fp_sticky = sticky;
 	x->fp_mant[3] = a3;
 	x->fp_mant[2] = a2;
 	x->fp_mant[1] = a1;
 	x->fp_mant[0] = a0;
 
 	DUMPFPN(FPE_REG, x);
 	return (x);
 }
diff --git a/sys/powerpc/fpu/fpu_sqrt.c b/sys/powerpc/fpu/fpu_sqrt.c
index e1f5bec9006f..5a95d7eae115 100644
--- a/sys/powerpc/fpu/fpu_sqrt.c
+++ b/sys/powerpc/fpu/fpu_sqrt.c
@@ -1,417 +1,416 @@
 /*	$NetBSD: fpu_sqrt.c,v 1.4 2005/12/11 12:18:42 christos Exp $ */
 
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1992, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This software was developed by the Computer Systems Engineering group
  * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
  * contributed to Berkeley.
  *
  * All advertising materials mentioning features or use of this software
  * must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Lawrence Berkeley Laboratory.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)fpu_sqrt.c	8.1 (Berkeley) 6/11/93
  */
 
 /*
  * Perform an FPU square root (return sqrt(x)).
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/types.h>
 #include <sys/systm.h>
 
 #include <machine/fpu.h>
-#include <machine/reg.h>
 
 #include <powerpc/fpu/fpu_arith.h>
 #include <powerpc/fpu/fpu_emu.h>
 
 /*
  * Our task is to calculate the square root of a floating point number x0.
  * This number x normally has the form:
  *
  *		    exp
  *	x = mant * 2		(where 1 <= mant < 2 and exp is an integer)
  *
  * This can be left as it stands, or the mantissa can be doubled and the
  * exponent decremented:
  *
  *			  exp-1
  *	x = (2 * mant) * 2	(where 2 <= 2 * mant < 4)
  *
  * If the exponent `exp' is even, the square root of the number is best
  * handled using the first form, and is by definition equal to:
  *
  *				exp/2
  *	sqrt(x) = sqrt(mant) * 2
  *
  * If exp is odd, on the other hand, it is convenient to use the second
  * form, giving:
  *
  *				    (exp-1)/2
  *	sqrt(x) = sqrt(2 * mant) * 2
  *
  * In the first case, we have
  *
  *	1 <= mant < 2
  *
  * and therefore
  *
  *	sqrt(1) <= sqrt(mant) < sqrt(2)
  *
  * while in the second case we have
  *
  *	2 <= 2*mant < 4
  *
  * and therefore
  *
  *	sqrt(2) <= sqrt(2*mant) < sqrt(4)
  *
  * so that in any case, we are sure that
  *
  *	sqrt(1) <= sqrt(n * mant) < sqrt(4),	n = 1 or 2
  *
  * or
  *
  *	1 <= sqrt(n * mant) < 2,		n = 1 or 2.
  *
  * This root is therefore a properly formed mantissa for a floating
  * point number.  The exponent of sqrt(x) is either exp/2 or (exp-1)/2
  * as above.  This leaves us with the problem of finding the square root
  * of a fixed-point number in the range [1..4).
  *
  * Though it may not be instantly obvious, the following square root
  * algorithm works for any integer x of an even number of bits, provided
  * that no overflows occur:
  *
  *	let q = 0
  *	for k = NBITS-1 to 0 step -1 do -- for each digit in the answer...
  *		x *= 2			-- multiply by radix, for next digit
  *		if x >= 2q + 2^k then	-- if adding 2^k does not
  *			x -= 2q + 2^k	-- exceed the correct root,
  *			q += 2^k	-- add 2^k and adjust x
  *		fi
  *	done
  *	sqrt = q / 2^(NBITS/2)		-- (and any remainder is in x)
  *
  * If NBITS is odd (so that k is initially even), we can just add another
  * zero bit at the top of x.  Doing so means that q is not going to acquire
  * a 1 bit in the first trip around the loop (since x0 < 2^NBITS).  If the
  * final value in x is not needed, or can be off by a factor of 2, this is
  * equivalant to moving the `x *= 2' step to the bottom of the loop:
  *
  *	for k = NBITS-1 to 0 step -1 do if ... fi; x *= 2; done
  *
  * and the result q will then be sqrt(x0) * 2^floor(NBITS / 2).
  * (Since the algorithm is destructive on x, we will call x's initial
  * value, for which q is some power of two times its square root, x0.)
  *
  * If we insert a loop invariant y = 2q, we can then rewrite this using
  * C notation as:
  *
  *	q = y = 0; x = x0;
  *	for (k = NBITS; --k >= 0;) {
  * #if (NBITS is even)
  *		x *= 2;
  * #endif
  *		t = y + (1 << k);
  *		if (x >= t) {
  *			x -= t;
  *			q += 1 << k;
  *			y += 1 << (k + 1);
  *		}
  * #if (NBITS is odd)
  *		x *= 2;
  * #endif
  *	}
  *
  * If x0 is fixed point, rather than an integer, we can simply alter the
  * scale factor between q and sqrt(x0).  As it happens, we can easily arrange
  * for the scale factor to be 2**0 or 1, so that sqrt(x0) == q.
  *
  * In our case, however, x0 (and therefore x, y, q, and t) are multiword
  * integers, which adds some complication.  But note that q is built one
  * bit at a time, from the top down, and is not used itself in the loop
  * (we use 2q as held in y instead).  This means we can build our answer
  * in an integer, one word at a time, which saves a bit of work.  Also,
  * since 1 << k is always a `new' bit in q, 1 << k and 1 << (k+1) are
  * `new' bits in y and we can set them with an `or' operation rather than
  * a full-blown multiword add.
  *
  * We are almost done, except for one snag.  We must prove that none of our
  * intermediate calculations can overflow.  We know that x0 is in [1..4)
  * and therefore the square root in q will be in [1..2), but what about x,
  * y, and t?
  *
  * We know that y = 2q at the beginning of each loop.  (The relation only
  * fails temporarily while y and q are being updated.)  Since q < 2, y < 4.
  * The sum in t can, in our case, be as much as y+(1<<1) = y+2 < 6, and.
  * Furthermore, we can prove with a bit of work that x never exceeds y by
  * more than 2, so that even after doubling, 0 <= x < 8.  (This is left as
  * an exercise to the reader, mostly because I have become tired of working
  * on this comment.)
  *
  * If our floating point mantissas (which are of the form 1.frac) occupy
  * B+1 bits, our largest intermediary needs at most B+3 bits, or two extra.
  * In fact, we want even one more bit (for a carry, to avoid compares), or
  * three extra.  There is a comment in fpu_emu.h reminding maintainers of
  * this, so we have some justification in assuming it.
  */
 struct fpn *
 fpu_sqrt(struct fpemu *fe)
 {
 	struct fpn *x = &fe->fe_f1;
 	u_int bit, q, tt;
 	u_int x0, x1, x2, x3;
 	u_int y0, y1, y2, y3;
 	u_int d0, d1, d2, d3;
 	int e;
 	FPU_DECL_CARRY;
 
 	/*
 	 * Take care of special cases first.  In order:
 	 *
 	 *	sqrt(NaN) = NaN
 	 *	sqrt(+0) = +0
 	 *	sqrt(-0) = -0
 	 *	sqrt(x < 0) = NaN	(including sqrt(-Inf))
 	 *	sqrt(+Inf) = +Inf
 	 *
 	 * Then all that remains are numbers with mantissas in [1..2).
 	 */
 	DPRINTF(FPE_REG, ("fpu_sqer:\n"));
 	DUMPFPN(FPE_REG, x);
 	DPRINTF(FPE_REG, ("=>\n"));
 	if (ISNAN(x)) {
 		fe->fe_cx |= FPSCR_VXSNAN;
 		DUMPFPN(FPE_REG, x);
 		return (x);
 	}
 	if (ISZERO(x)) {
 		fe->fe_cx |= FPSCR_ZX;
 		x->fp_class = FPC_INF;
 		DUMPFPN(FPE_REG, x);
 		return (x);
 	}
 	if (x->fp_sign) {
 		fe->fe_cx |= FPSCR_VXSQRT;
 		return (fpu_newnan(fe));
 	}
 	if (ISINF(x)) {
 		DUMPFPN(FPE_REG, x);
 		return (x);
 	}
 
 	/*
 	 * Calculate result exponent.  As noted above, this may involve
 	 * doubling the mantissa.  We will also need to double x each
 	 * time around the loop, so we define a macro for this here, and
 	 * we break out the multiword mantissa.
 	 */
 #ifdef FPU_SHL1_BY_ADD
 #define	DOUBLE_X { \
 	FPU_ADDS(x3, x3, x3); FPU_ADDCS(x2, x2, x2); \
 	FPU_ADDCS(x1, x1, x1); FPU_ADDC(x0, x0, x0); \
 }
 #else
 #define	DOUBLE_X { \
 	x0 = (x0 << 1) | (x1 >> 31); x1 = (x1 << 1) | (x2 >> 31); \
 	x2 = (x2 << 1) | (x3 >> 31); x3 <<= 1; \
 }
 #endif
 #if (FP_NMANT & 1) != 0
 # define ODD_DOUBLE	DOUBLE_X
 # define EVEN_DOUBLE	/* nothing */
 #else
 # define ODD_DOUBLE	/* nothing */
 # define EVEN_DOUBLE	DOUBLE_X
 #endif
 	x0 = x->fp_mant[0];
 	x1 = x->fp_mant[1];
 	x2 = x->fp_mant[2];
 	x3 = x->fp_mant[3];
 	e = x->fp_exp;
 	if (e & 1)		/* exponent is odd; use sqrt(2mant) */
 		DOUBLE_X;
 	/* THE FOLLOWING ASSUMES THAT RIGHT SHIFT DOES SIGN EXTENSION */
 	x->fp_exp = e >> 1;	/* calculates (e&1 ? (e-1)/2 : e/2 */
 
 	/*
 	 * Now calculate the mantissa root.  Since x is now in [1..4),
 	 * we know that the first trip around the loop will definitely
 	 * set the top bit in q, so we can do that manually and start
 	 * the loop at the next bit down instead.  We must be sure to
 	 * double x correctly while doing the `known q=1.0'.
 	 *
 	 * We do this one mantissa-word at a time, as noted above, to
 	 * save work.  To avoid `(1U << 31) << 1', we also do the top bit
 	 * outside of each per-word loop.
 	 *
 	 * The calculation `t = y + bit' breaks down into `t0 = y0, ...,
 	 * t3 = y3, t? |= bit' for the appropriate word.  Since the bit
 	 * is always a `new' one, this means that three of the `t?'s are
 	 * just the corresponding `y?'; we use `#define's here for this.
 	 * The variable `tt' holds the actual `t?' variable.
 	 */
 
 	/* calculate q0 */
 #define	t0 tt
 	bit = FP_1;
 	EVEN_DOUBLE;
 	/* if (x >= (t0 = y0 | bit)) { */	/* always true */
 		q = bit;
 		x0 -= bit;
 		y0 = bit << 1;
 	/* } */
 	ODD_DOUBLE;
 	while ((bit >>= 1) != 0) {	/* for remaining bits in q0 */
 		EVEN_DOUBLE;
 		t0 = y0 | bit;		/* t = y + bit */
 		if (x0 >= t0) {		/* if x >= t then */
 			x0 -= t0;	/*	x -= t */
 			q |= bit;	/*	q += bit */
 			y0 |= bit << 1;	/*	y += bit << 1 */
 		}
 		ODD_DOUBLE;
 	}
 	x->fp_mant[0] = q;
 #undef t0
 
 	/* calculate q1.  note (y0&1)==0. */
 #define t0 y0
 #define t1 tt
 	q = 0;
 	y1 = 0;
 	bit = 1 << 31;
 	EVEN_DOUBLE;
 	t1 = bit;
 	FPU_SUBS(d1, x1, t1);
 	FPU_SUBC(d0, x0, t0);		/* d = x - t */
 	if ((int)d0 >= 0) {		/* if d >= 0 (i.e., x >= t) then */
 		x0 = d0, x1 = d1;	/*	x -= t */
 		q = bit;		/*	q += bit */
 		y0 |= 1;		/*	y += bit << 1 */
 	}
 	ODD_DOUBLE;
 	while ((bit >>= 1) != 0) {	/* for remaining bits in q1 */
 		EVEN_DOUBLE;		/* as before */
 		t1 = y1 | bit;
 		FPU_SUBS(d1, x1, t1);
 		FPU_SUBC(d0, x0, t0);
 		if ((int)d0 >= 0) {
 			x0 = d0, x1 = d1;
 			q |= bit;
 			y1 |= bit << 1;
 		}
 		ODD_DOUBLE;
 	}
 	x->fp_mant[1] = q;
 #undef t1
 
 	/* calculate q2.  note (y1&1)==0; y0 (aka t0) is fixed. */
 #define t1 y1
 #define t2 tt
 	q = 0;
 	y2 = 0;
 	bit = 1 << 31;
 	EVEN_DOUBLE;
 	t2 = bit;
 	FPU_SUBS(d2, x2, t2);
 	FPU_SUBCS(d1, x1, t1);
 	FPU_SUBC(d0, x0, t0);
 	if ((int)d0 >= 0) {
 		x0 = d0, x1 = d1, x2 = d2;
 		q = bit;
 		y1 |= 1;		/* now t1, y1 are set in concrete */
 	}
 	ODD_DOUBLE;
 	while ((bit >>= 1) != 0) {
 		EVEN_DOUBLE;
 		t2 = y2 | bit;
 		FPU_SUBS(d2, x2, t2);
 		FPU_SUBCS(d1, x1, t1);
 		FPU_SUBC(d0, x0, t0);
 		if ((int)d0 >= 0) {
 			x0 = d0, x1 = d1, x2 = d2;
 			q |= bit;
 			y2 |= bit << 1;
 		}
 		ODD_DOUBLE;
 	}
 	x->fp_mant[2] = q;
 #undef t2
 
 	/* calculate q3.  y0, t0, y1, t1 all fixed; y2, t2, almost done. */
 #define t2 y2
 #define t3 tt
 	q = 0;
 	y3 = 0;
 	bit = 1 << 31;
 	EVEN_DOUBLE;
 	t3 = bit;
 	FPU_SUBS(d3, x3, t3);
 	FPU_SUBCS(d2, x2, t2);
 	FPU_SUBCS(d1, x1, t1);
 	FPU_SUBC(d0, x0, t0);
 	if ((int)d0 >= 0) {
 		x0 = d0, x1 = d1, x2 = d2; x3 = d3;
 		q = bit;
 		y2 |= 1;
 	}
 	ODD_DOUBLE;
 	while ((bit >>= 1) != 0) {
 		EVEN_DOUBLE;
 		t3 = y3 | bit;
 		FPU_SUBS(d3, x3, t3);
 		FPU_SUBCS(d2, x2, t2);
 		FPU_SUBCS(d1, x1, t1);
 		FPU_SUBC(d0, x0, t0);
 		if ((int)d0 >= 0) {
 			x0 = d0, x1 = d1, x2 = d2; x3 = d3;
 			q |= bit;
 			y3 |= bit << 1;
 		}
 		ODD_DOUBLE;
 	}
 	x->fp_mant[3] = q;
 
 	/*
 	 * The result, which includes guard and round bits, is exact iff
 	 * x is now zero; any nonzero bits in x represent sticky bits.
 	 */
 	x->fp_sticky = x0 | x1 | x2 | x3;
 	DUMPFPN(FPE_REG, x);
 	return (x);
 }
diff --git a/sys/powerpc/fpu/fpu_subr.c b/sys/powerpc/fpu/fpu_subr.c
index fc817a7ee587..fbe1bc70aa5a 100644
--- a/sys/powerpc/fpu/fpu_subr.c
+++ b/sys/powerpc/fpu/fpu_subr.c
@@ -1,222 +1,221 @@
 /*	$NetBSD: fpu_subr.c,v 1.4 2005/12/11 12:18:42 christos Exp $ */
 
 /*
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1992, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This software was developed by the Computer Systems Engineering group
  * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
  * contributed to Berkeley.
  *
  * All advertising materials mentioning features or use of this software
  * must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Lawrence Berkeley Laboratory.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)fpu_subr.c	8.1 (Berkeley) 6/11/93
  */
 
 /*
  * FPU subroutines.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/types.h>
 #include <sys/systm.h>
 
 #include <machine/fpu.h>
-#include <machine/reg.h>
 
 #include <powerpc/fpu/fpu_arith.h>
 #include <powerpc/fpu/fpu_emu.h>
 
 /*
  * Shift the given number right rsh bits.  Any bits that `fall off' will get
  * shoved into the sticky field; we return the resulting sticky.  Note that
  * shifting NaNs is legal (this will never shift all bits out); a NaN's
  * sticky field is ignored anyway.
  */
 int
 fpu_shr(struct fpn *fp, int rsh)
 {
 	u_int m0, m1, m2, m3, s;
 	int lsh;
 
 #ifdef DIAGNOSTIC
 	if (rsh <= 0 || (fp->fp_class != FPC_NUM && !ISNAN(fp)))
 		panic("fpu_rightshift 1");
 #endif
 
 	m0 = fp->fp_mant[0];
 	m1 = fp->fp_mant[1];
 	m2 = fp->fp_mant[2];
 	m3 = fp->fp_mant[3];
 
 	/* If shifting all the bits out, take a shortcut. */
 	if (rsh >= FP_NMANT) {
 #ifdef DIAGNOSTIC
 		if ((m0 | m1 | m2 | m3) == 0)
 			panic("fpu_rightshift 2");
 #endif
 		fp->fp_mant[0] = 0;
 		fp->fp_mant[1] = 0;
 		fp->fp_mant[2] = 0;
 		fp->fp_mant[3] = 0;
 #ifdef notdef
 		if ((m0 | m1 | m2 | m3) == 0)
 			fp->fp_class = FPC_ZERO;
 		else
 #endif
 			fp->fp_sticky = 1;
 		return (1);
 	}
 
 	/* Squish out full words. */
 	s = fp->fp_sticky;
 	if (rsh >= 32 * 3) {
 		s |= m3 | m2 | m1;
 		m3 = m0, m2 = 0, m1 = 0, m0 = 0;
 	} else if (rsh >= 32 * 2) {
 		s |= m3 | m2;
 		m3 = m1, m2 = m0, m1 = 0, m0 = 0;
 	} else if (rsh >= 32) {
 		s |= m3;
 		m3 = m2, m2 = m1, m1 = m0, m0 = 0;
 	}
 
 	/* Handle any remaining partial word. */
 	if ((rsh &= 31) != 0) {
 		lsh = 32 - rsh;
 		s |= m3 << lsh;
 		m3 = (m3 >> rsh) | (m2 << lsh);
 		m2 = (m2 >> rsh) | (m1 << lsh);
 		m1 = (m1 >> rsh) | (m0 << lsh);
 		m0 >>= rsh;
 	}
 	fp->fp_mant[0] = m0;
 	fp->fp_mant[1] = m1;
 	fp->fp_mant[2] = m2;
 	fp->fp_mant[3] = m3;
 	fp->fp_sticky = s;
 	return (s);
 }
 
 /*
  * Force a number to be normal, i.e., make its fraction have all zero
  * bits before FP_1, then FP_1, then all 1 bits.  This is used for denorms
  * and (sometimes) for intermediate results.
  *
  * Internally, this may use a `supernormal' -- a number whose fp_mant
  * is greater than or equal to 2.0 -- so as a side effect you can hand it
  * a supernormal and it will fix it (provided fp->fp_mant[3] == 0).
  */
 void
 fpu_norm(struct fpn *fp)
 {
 	u_int m0, m1, m2, m3, top, sup, nrm;
 	int lsh, rsh, exp;
 
 	exp = fp->fp_exp;
 	m0 = fp->fp_mant[0];
 	m1 = fp->fp_mant[1];
 	m2 = fp->fp_mant[2];
 	m3 = fp->fp_mant[3];
 
 	/* Handle severe subnormals with 32-bit moves. */
 	if (m0 == 0) {
 		if (m1)
 			m0 = m1, m1 = m2, m2 = m3, m3 = 0, exp -= 32;
 		else if (m2)
 			m0 = m2, m1 = m3, m2 = 0, m3 = 0, exp -= 2 * 32;
 		else if (m3)
 			m0 = m3, m1 = 0, m2 = 0, m3 = 0, exp -= 3 * 32;
 		else {
 			fp->fp_class = FPC_ZERO;
 			return;
 		}
 	}
 
 	/* Now fix any supernormal or remaining subnormal. */
 	nrm = FP_1;
 	sup = nrm << 1;
 	if (m0 >= sup) {
 		/*
 		 * We have a supernormal number.  We need to shift it right.
 		 * We may assume m3==0.
 		 */
 		for (rsh = 1, top = m0 >> 1; top >= sup; rsh++)	/* XXX slow */
 			top >>= 1;
 		exp += rsh;
 		lsh = 32 - rsh;
 		m3 = m2 << lsh;
 		m2 = (m2 >> rsh) | (m1 << lsh);
 		m1 = (m1 >> rsh) | (m0 << lsh);
 		m0 = top;
 	} else if (m0 < nrm) {
 		/*
 		 * We have a regular denorm (a subnormal number), and need
 		 * to shift it left.
 		 */
 		for (lsh = 1, top = m0 << 1; top < nrm; lsh++)	/* XXX slow */
 			top <<= 1;
 		exp -= lsh;
 		rsh = 32 - lsh;
 		m0 = top | (m1 >> rsh);
 		m1 = (m1 << lsh) | (m2 >> rsh);
 		m2 = (m2 << lsh) | (m3 >> rsh);
 		m3 <<= lsh;
 	}
 
 	fp->fp_exp = exp;
 	fp->fp_mant[0] = m0;
 	fp->fp_mant[1] = m1;
 	fp->fp_mant[2] = m2;
 	fp->fp_mant[3] = m3;
 }
 
 /*
  * Concoct a `fresh' Quiet NaN per Appendix N.
  * As a side effect, we set NV (invalid) for the current exceptions.
  */
 struct fpn *
 fpu_newnan(struct fpemu *fe)
 {
 	struct fpn *fp;
 
 	fe->fe_cx |= FPSCR_VXSNAN;
 	fp = &fe->fe_f3;
 	fp->fp_class = FPC_QNAN;
 	fp->fp_sign = 0;
 	fp->fp_mant[0] = FP_1 - 1;
 	fp->fp_mant[1] = fp->fp_mant[2] = fp->fp_mant[3] = ~0;
 	DUMPFPN(FPE_REG, fp);
 	return (fp);
 }
diff --git a/sys/powerpc/powerpc/elf32_machdep.c b/sys/powerpc/powerpc/elf32_machdep.c
index 341352fdd589..a99aeb0a7444 100644
--- a/sys/powerpc/powerpc/elf32_machdep.c
+++ b/sys/powerpc/powerpc/elf32_machdep.c
@@ -1,469 +1,469 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright 1996-1998 John D. Polstra.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/systm.h>
 
 #define __ELF_WORD_SIZE 32
 
 #include <sys/exec.h>
 #include <sys/imgact.h>
 #include <sys/malloc.h>
 #include <sys/proc.h>
 #include <sys/namei.h>
 #include <sys/fcntl.h>
 #include <sys/sysent.h>
 #include <sys/imgact_elf.h>
 #include <sys/jail.h>
+#include <sys/reg.h>
 #include <sys/smp.h>
 #include <sys/syscall.h>
 #include <sys/sysctl.h>
 #include <sys/signalvar.h>
 #include <sys/vnode.h>
 #include <sys/linker.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 
 #include <machine/altivec.h>
 #include <machine/cpu.h>
 #include <machine/fpu.h>
 #include <machine/elf.h>
-#include <machine/reg.h>
 #include <machine/md_var.h>
 
 #include <powerpc/powerpc/elf_common.c>
 
 #ifdef __powerpc64__
 #include <compat/freebsd32/freebsd32_proto.h>
 #include <compat/freebsd32/freebsd32_util.h>
 
 extern const char *freebsd32_syscallnames[];
 static void ppc32_fixlimit(struct rlimit *rl, int which);
 
 static SYSCTL_NODE(_compat, OID_AUTO, ppc32, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "32-bit mode");
 
 #define PPC32_MAXDSIZ (1024*1024*1024)
 static u_long ppc32_maxdsiz = PPC32_MAXDSIZ;
 SYSCTL_ULONG(_compat_ppc32, OID_AUTO, maxdsiz, CTLFLAG_RWTUN, &ppc32_maxdsiz,
              0, "");
 #define PPC32_MAXSSIZ (64*1024*1024)
 u_long ppc32_maxssiz = PPC32_MAXSSIZ;
 SYSCTL_ULONG(_compat_ppc32, OID_AUTO, maxssiz, CTLFLAG_RWTUN, &ppc32_maxssiz,
              0, "");
 #else
 static void ppc32_runtime_resolve(void);
 #endif
 
 struct sysentvec elf32_freebsd_sysvec = {
 	.sv_size	= SYS_MAXSYSCALL,
 #ifdef __powerpc64__
 	.sv_table	= freebsd32_sysent,
 #else
 	.sv_table	= sysent,
 #endif
 	.sv_transtrap	= NULL,
 	.sv_fixup	= __elfN(freebsd_fixup),
 	.sv_copyout_auxargs = __elfN(powerpc_copyout_auxargs),
 	.sv_sendsig	= sendsig,
 	.sv_sigcode	= sigcode32,
 	.sv_szsigcode	= &szsigcode32,
 	.sv_name	= "FreeBSD ELF32",
 	.sv_coredump	= __elfN(coredump),
 	.sv_elf_core_osabi = ELFOSABI_FREEBSD,
 	.sv_elf_core_abi_vendor = FREEBSD_ABI_VENDOR,
 	.sv_elf_core_prepare_notes = __elfN(prepare_notes),
 	.sv_imgact_try	= NULL,
 	.sv_minsigstksz	= MINSIGSTKSZ,
 	.sv_minuser	= VM_MIN_ADDRESS,
 	.sv_stackprot	= VM_PROT_ALL,
 #ifdef __powerpc64__
 	.sv_maxuser	= VM_MAXUSER_ADDRESS32,
 	.sv_usrstack	= FREEBSD32_USRSTACK,
 	.sv_psstrings	= FREEBSD32_PS_STRINGS,
 	.sv_copyout_strings = freebsd32_copyout_strings,
 	.sv_setregs	= ppc32_setregs,
 	.sv_syscallnames = freebsd32_syscallnames,
 	.sv_fixlimit	= ppc32_fixlimit,
 #else
 	.sv_maxuser	= VM_MAXUSER_ADDRESS,
 	.sv_usrstack	= USRSTACK,
 	.sv_psstrings	= PS_STRINGS,
 	.sv_copyout_strings = exec_copyout_strings,
 	.sv_setregs	= exec_setregs,
 	.sv_syscallnames = syscallnames,
 	.sv_fixlimit	= NULL,
 #endif
 	.sv_maxssiz	= NULL,
 	.sv_flags	= SV_ABI_FREEBSD | SV_ILP32 | SV_SHP | SV_ASLR |
 			    SV_TIMEKEEP | SV_RNG_SEED_VER,
 	.sv_set_syscall_retval = cpu_set_syscall_retval,
 	.sv_fetch_syscall_args = cpu_fetch_syscall_args,
 	.sv_shared_page_base = FREEBSD32_SHAREDPAGE,
 	.sv_shared_page_len = PAGE_SIZE,
 	.sv_schedtail	= NULL,
 	.sv_thread_detach = NULL,
 	.sv_trap	= NULL,
 	.sv_hwcap	= &cpu_features,
 	.sv_hwcap2	= &cpu_features2,
 	.sv_onexec_old	= exec_onexec_old,
 	.sv_onexit	= exit_onexit,
 };
 INIT_SYSENTVEC(elf32_sysvec, &elf32_freebsd_sysvec);
 
 static Elf32_Brandinfo freebsd_brand_info = {
 	.brand		= ELFOSABI_FREEBSD,
 	.machine	= EM_PPC,
 	.compat_3_brand	= "FreeBSD",
 	.emul_path	= NULL,
 	.interp_path	= "/libexec/ld-elf.so.1",
 	.sysvec		= &elf32_freebsd_sysvec,
 #ifdef __powerpc64__
 	.interp_newpath	= "/libexec/ld-elf32.so.1",
 #else
 	.interp_newpath	= NULL,
 #endif
 	.brand_note	= &elf32_freebsd_brandnote,
 	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE
 };
 
 SYSINIT(elf32, SI_SUB_EXEC, SI_ORDER_FIRST,
     (sysinit_cfunc_t) elf32_insert_brand_entry,
     &freebsd_brand_info);
 
 static Elf32_Brandinfo freebsd_brand_oinfo = {
 	.brand		= ELFOSABI_FREEBSD,
 	.machine	= EM_PPC,
 	.compat_3_brand	= "FreeBSD",
 	.emul_path	= NULL,
 	.interp_path	= "/usr/libexec/ld-elf.so.1",
 	.sysvec		= &elf32_freebsd_sysvec,
 	.interp_newpath	= NULL,
 	.brand_note	= &elf32_freebsd_brandnote,
 	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE
 };
 
 SYSINIT(oelf32, SI_SUB_EXEC, SI_ORDER_ANY,
 	(sysinit_cfunc_t) elf32_insert_brand_entry,
 	&freebsd_brand_oinfo);
 
 void elf_reloc_self(Elf_Dyn *dynp, Elf_Addr relocbase);
 
 void
 elf32_dump_thread(struct thread *td, void *dst, size_t *off)
 {
 	size_t len;
 	struct pcb *pcb;
 	uint64_t vshr[32];
 	uint64_t *vsr_dw1;
 	int vsr_idx;
 
 	len = 0;
 	pcb = td->td_pcb;
 
 	if (pcb->pcb_flags & PCB_VEC) {
 		save_vec_nodrop(td);
 		if (dst != NULL) {
 			len += elf32_populate_note(NT_PPC_VMX,
 			    &pcb->pcb_vec, (char *)dst + len,
 			    sizeof(pcb->pcb_vec), NULL);
 		} else
 			len += elf32_populate_note(NT_PPC_VMX, NULL, NULL,
 			    sizeof(pcb->pcb_vec), NULL);
 	}
 
 	if (pcb->pcb_flags & PCB_VSX) {
 		save_fpu_nodrop(td);
 		if (dst != NULL) {
 			/*
 			 * Doubleword 0 of VSR0-VSR31 overlap with FPR0-FPR31 and
 			 * VSR32-VSR63 overlap with VR0-VR31, so we only copy
 			 * the non-overlapping data, which is doubleword 1 of VSR0-VSR31.
 			 */
 			for (vsr_idx = 0; vsr_idx < nitems(vshr); vsr_idx++) {
 				vsr_dw1 = (uint64_t *)&pcb->pcb_fpu.fpr[vsr_idx].vsr[2];
 				vshr[vsr_idx] = *vsr_dw1;
 			}
 			len += elf32_populate_note(NT_PPC_VSX,
 			    vshr, (char *)dst + len,
 			    sizeof(vshr), NULL);
 		} else
 			len += elf32_populate_note(NT_PPC_VSX, NULL, NULL,
 			    sizeof(vshr), NULL);
 	}
 
 	*off = len;
 }
 
 #ifndef __powerpc64__
 bool
 elf_is_ifunc_reloc(Elf_Size r_info)
 {
 
 	return (ELF_R_TYPE(r_info) == R_PPC_IRELATIVE);
 }
 
 /* Process one elf relocation with addend. */
 static int
 elf_reloc_internal(linker_file_t lf, Elf_Addr relocbase, const void *data,
     int type, int local, elf_lookup_fn lookup)
 {
 	Elf_Addr *where;
 	Elf_Half *hwhere;
 	Elf_Addr addr;
 	Elf_Addr addend, val;
 	Elf_Word rtype, symidx;
 	const Elf_Rela *rela;
 	int error;
 
 	switch (type) {
 	case ELF_RELOC_REL:
 		panic("PPC only supports RELA relocations");
 		break;
 	case ELF_RELOC_RELA:
 		rela = (const Elf_Rela *)data;
 		where = (Elf_Addr *) ((uintptr_t)relocbase + rela->r_offset);
 		hwhere = (Elf_Half *) ((uintptr_t)relocbase + rela->r_offset);
 		addend = rela->r_addend;
 		rtype = ELF_R_TYPE(rela->r_info);
 		symidx = ELF_R_SYM(rela->r_info);
 		break;
 	default:
 		panic("elf_reloc: unknown relocation mode %d\n", type);
 	}
 
 	switch (rtype) {
 	case R_PPC_NONE:
 		break;
 
 	case R_PPC_ADDR32: /* word32 S + A */
 		error = lookup(lf, symidx, 1, &addr);
 		if (error != 0)
 			return (-1);
 		*where = elf_relocaddr(lf, addr + addend);
 			break;
 
 	case R_PPC_ADDR16_LO: /* #lo(S) */
 		error = lookup(lf, symidx, 1, &addr);
 		if (error != 0)
 			return (-1);
 		/*
 		 * addend values are sometimes relative to sections
 		 * (i.e. .rodata) in rela, where in reality they
 		 * are relative to relocbase. Detect this condition.
 		 */
 		if (addr > relocbase && addr <= (relocbase + addend))
 			addr = relocbase;
 		addr = elf_relocaddr(lf, addr + addend);
 		*hwhere = addr & 0xffff;
 		break;
 
 	case R_PPC_ADDR16_HA: /* #ha(S) */
 		error = lookup(lf, symidx, 1, &addr);
 		if (error != 0)
 			return (-1);
 		/*
 		 * addend values are sometimes relative to sections
 		 * (i.e. .rodata) in rela, where in reality they
 		 * are relative to relocbase. Detect this condition.
 		 */
 		if (addr > relocbase && addr <= (relocbase + addend))
 			addr = relocbase;
 		addr = elf_relocaddr(lf, addr + addend);
 		*hwhere = ((addr >> 16) + ((addr & 0x8000) ? 1 : 0))
 		    & 0xffff;
 		break;
 
 	case R_PPC_RELATIVE: /* word32 B + A */
 		*where = elf_relocaddr(lf, relocbase + addend);
 		break;
 
 	case R_PPC_JMP_SLOT: /* PLT jump slot entry */
 		/*
 		 * We currently only support Secure-PLT jump slots.
 		 * Given that we reject BSS-PLT modules during load, we
 		 * don't need to check again.
 		 * The method we are using here is equivilent to
 		 * LD_BIND_NOW.
 		 */
 		error = lookup(lf, symidx, 1, &addr);
 		if (error != 0)
 			return (-1);
 		*where = elf_relocaddr(lf, addr + addend);
 		break;
 
 	case R_PPC_IRELATIVE:
 		addr = relocbase + addend;
 		val = ((Elf32_Addr (*)(void))addr)();
 		if (*where != val)
 			*where = val;
 		break;
 
 	default:
 		printf("kldload: unexpected relocation type %d, "
 		    "symbol index %d\n", (int)rtype, symidx);
 		return (-1);
 	}
 	return (0);
 }
 
 void
 elf_reloc_self(Elf_Dyn *dynp, Elf_Addr relocbase)
 {
 	Elf_Rela *rela = NULL, *relalim;
 	Elf_Addr relasz = 0;
 	Elf_Addr *where;
 
 	/*
 	 * Extract the rela/relasz values from the dynamic section
 	 */
 	for (; dynp->d_tag != DT_NULL; dynp++) {
 		switch (dynp->d_tag) {
 		case DT_RELA:
 			rela = (Elf_Rela *)(relocbase+dynp->d_un.d_ptr);
 			break;
 		case DT_RELASZ:
 			relasz = dynp->d_un.d_val;
 			break;
 		}
 	}
 
 	/*
 	 * Relocate these values
 	 */
 	relalim = (Elf_Rela *)((caddr_t)rela + relasz);
 	for (; rela < relalim; rela++) {
 		if (ELF_R_TYPE(rela->r_info) != R_PPC_RELATIVE)
 			continue;
 		where = (Elf_Addr *)(relocbase + rela->r_offset);
 		*where = (Elf_Addr)(relocbase + rela->r_addend);
 	}
 }
 
 int
 elf_reloc(linker_file_t lf, Elf_Addr relocbase, const void *data, int type,
     elf_lookup_fn lookup)
 {
 
 	return (elf_reloc_internal(lf, relocbase, data, type, 0, lookup));
 }
 
 int
 elf_reloc_local(linker_file_t lf, Elf_Addr relocbase, const void *data,
     int type, elf_lookup_fn lookup)
 {
 
 	return (elf_reloc_internal(lf, relocbase, data, type, 1, lookup));
 }
 
 int
 elf_cpu_load_file(linker_file_t lf)
 {
 
 	/* Only sync the cache for non-kernel modules */
 	if (lf->id != 1)
 		__syncicache(lf->address, lf->size);
 	return (0);
 }
 
 int
 elf_cpu_unload_file(linker_file_t lf __unused)
 {
 
 	return (0);
 }
 
 static void
 ppc32_runtime_resolve()
 {
 
 	/*
 	 * Since we don't support lazy binding, panic immediately if anyone
 	 * manages to call the runtime resolver.
 	 */
 	panic("kldload: Runtime resolver was called unexpectedly!");
 }
 
 int
 elf_cpu_parse_dynamic(caddr_t loadbase, Elf_Dyn *dynamic)
 {
 	Elf_Dyn *dp;
 	bool has_plt = false;
 	bool secure_plt = false;
 	Elf_Addr *got;
 
 	for (dp = dynamic; dp->d_tag != DT_NULL; dp++) {
 		switch (dp->d_tag) {
 		case DT_PPC_GOT:
 			secure_plt = true;
 			got = (Elf_Addr *)(loadbase + dp->d_un.d_ptr);
 			/* Install runtime resolver canary. */
 			got[1] = (Elf_Addr)ppc32_runtime_resolve;
 			got[2] = (Elf_Addr)0;
 			break;
 		case DT_PLTGOT:
 			has_plt = true;
 			break;
 		}
 	}
 
 	if (has_plt && !secure_plt) {
 		printf("kldload: BSS-PLT modules are not supported.\n");
 		return (-1);
 	}
 	return (0);
 }
 #endif
 
 #ifdef __powerpc64__
 static void
 ppc32_fixlimit(struct rlimit *rl, int which)
 {
 	switch (which) {
 	case RLIMIT_DATA:
 		if (ppc32_maxdsiz != 0) {
 			if (rl->rlim_cur > ppc32_maxdsiz)
 				rl->rlim_cur = ppc32_maxdsiz;
 			if (rl->rlim_max > ppc32_maxdsiz)
 				rl->rlim_max = ppc32_maxdsiz;
 		}
 		break;
 	case RLIMIT_STACK:
 		if (ppc32_maxssiz != 0) {
 			if (rl->rlim_cur > ppc32_maxssiz)
 				rl->rlim_cur = ppc32_maxssiz;
 			if (rl->rlim_max > ppc32_maxssiz)
 				rl->rlim_max = ppc32_maxssiz;
 		}
 		break;
 	}
 }
 #endif
diff --git a/sys/powerpc/powerpc/exec_machdep.c b/sys/powerpc/powerpc/exec_machdep.c
index 869fad96e2bd..d90071f13650 100644
--- a/sys/powerpc/powerpc/exec_machdep.c
+++ b/sys/powerpc/powerpc/exec_machdep.c
@@ -1,1212 +1,1212 @@
 /*-
  * SPDX-License-Identifier: BSD-4-Clause AND BSD-2-Clause-FreeBSD
  *
  * Copyright (C) 1995, 1996 Wolfgang Solfrank.
  * Copyright (C) 1995, 1996 TooLs GmbH.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *      This product includes software developed by TooLs GmbH.
  * 4. The name of TooLs GmbH may not be used to endorse or promote products
  *    derived from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 /*-
  * Copyright (C) 2001 Benno Rice
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY Benno Rice ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *	$NetBSD: machdep.c,v 1.74.2.1 2000/11/01 16:13:48 tv Exp $
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_fpu_emu.h"
 
 #include <sys/param.h>
 #include <sys/proc.h>
 #include <sys/systm.h>
 #include <sys/bio.h>
 #include <sys/buf.h>
 #include <sys/bus.h>
 #include <sys/cons.h>
 #include <sys/cpu.h>
 #include <sys/exec.h>
 #include <sys/imgact.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
+#include <sys/reg.h>
 #include <sys/signalvar.h>
 #include <sys/syscallsubr.h>
 #include <sys/syscall.h>
 #include <sys/sysent.h>
 #include <sys/sysproto.h>
 #include <sys/ucontext.h>
 #include <sys/uio.h>
 
 #include <machine/altivec.h>
 #include <machine/cpu.h>
 #include <machine/elf.h>
 #include <machine/fpu.h>
 #include <machine/pcb.h>
-#include <machine/reg.h>
 #include <machine/sigframe.h>
 #include <machine/trap.h>
 #include <machine/vmparam.h>
 
 #include <vm/pmap.h>
 
 #ifdef FPU_EMU
 #include <powerpc/fpu/fpu_extern.h>
 #endif
 
 #ifdef COMPAT_FREEBSD32
 #include <compat/freebsd32/freebsd32_signal.h>
 #include <compat/freebsd32/freebsd32_util.h>
 #include <compat/freebsd32/freebsd32_proto.h>
 
 typedef struct __ucontext32 {
 	sigset_t		uc_sigmask;
 	mcontext32_t		uc_mcontext;
 	uint32_t		uc_link;
 	struct sigaltstack32    uc_stack;
 	uint32_t		uc_flags;
 	uint32_t		__spare__[4];
 } ucontext32_t;
 
 struct sigframe32 {
 	ucontext32_t		sf_uc;
 	struct siginfo32	sf_si;
 };
 
 static int	grab_mcontext32(struct thread *td, mcontext32_t *, int flags);
 #endif
 
 static int	grab_mcontext(struct thread *, mcontext_t *, int);
 
 static void	cleanup_power_extras(struct thread *);
 
 #ifdef __powerpc64__
 extern struct sysentvec elf64_freebsd_sysvec_v2;
 #endif
 
 void
 sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
 {
 	struct trapframe *tf;
 	struct sigacts *psp;
 	struct sigframe sf;
 	struct thread *td;
 	struct proc *p;
 	#ifdef COMPAT_FREEBSD32
 	struct siginfo32 siginfo32;
 	struct sigframe32 sf32;
 	#endif
 	size_t sfpsize;
 	caddr_t sfp, usfp;
 	register_t sp;
 	int oonstack, rndfsize;
 	int sig;
 	int code;
 
 	td = curthread;
 	p = td->td_proc;
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 
 	psp = p->p_sigacts;
 	mtx_assert(&psp->ps_mtx, MA_OWNED);
 	tf = td->td_frame;
 
 	/*
 	 * Fill siginfo structure.
 	 */
 	ksi->ksi_info.si_signo = ksi->ksi_signo;
 	ksi->ksi_info.si_addr =
 	    (void *)((tf->exc == EXC_DSI || tf->exc == EXC_DSE) ? 
 	    tf->dar : tf->srr0);
 
 	#ifdef COMPAT_FREEBSD32
 	if (SV_PROC_FLAG(p, SV_ILP32)) {
 		siginfo_to_siginfo32(&ksi->ksi_info, &siginfo32);
 		sig = siginfo32.si_signo;
 		code = siginfo32.si_code;
 		sfp = (caddr_t)&sf32;
 		sfpsize = sizeof(sf32);
 		rndfsize = roundup(sizeof(sf32), 16);
 		sp = (uint32_t)tf->fixreg[1];
 		oonstack = sigonstack(sp);
 
 		/*
 		 * Save user context
 		 */
 
 		memset(&sf32, 0, sizeof(sf32));
 		grab_mcontext32(td, &sf32.sf_uc.uc_mcontext, 0);
 
 		sf32.sf_uc.uc_sigmask = *mask;
 		sf32.sf_uc.uc_stack.ss_sp = (uintptr_t)td->td_sigstk.ss_sp;
 		sf32.sf_uc.uc_stack.ss_size = (uint32_t)td->td_sigstk.ss_size;
 		sf32.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
 		    ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE;
 
 		sf32.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0;
 	} else {
 	#endif
 		sig = ksi->ksi_signo;
 		code = ksi->ksi_code;
 		sfp = (caddr_t)&sf;
 		sfpsize = sizeof(sf);
 		#ifdef __powerpc64__
 		/*
 		 * 64-bit PPC defines a 288 byte scratch region
 		 * below the stack.
 		 */
 		rndfsize = 288 + roundup(sizeof(sf), 48);
 		#else
 		rndfsize = roundup(sizeof(sf), 16);
 		#endif
 		sp = tf->fixreg[1];
 		oonstack = sigonstack(sp);
 
 		/*
 		 * Save user context
 		 */
 
 		memset(&sf, 0, sizeof(sf));
 		grab_mcontext(td, &sf.sf_uc.uc_mcontext, 0);
 
 		sf.sf_uc.uc_sigmask = *mask;
 		sf.sf_uc.uc_stack = td->td_sigstk;
 		sf.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
 		    ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE;
 
 		sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0;
 	#ifdef COMPAT_FREEBSD32
 	}
 	#endif
 
 	CTR4(KTR_SIG, "sendsig: td=%p (%s) catcher=%p sig=%d", td, p->p_comm,
 	     catcher, sig);
 
 	/*
 	 * Allocate and validate space for the signal handler context.
 	 */
 	if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack &&
 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
 		usfp = (void *)(((uintptr_t)td->td_sigstk.ss_sp +
 		   td->td_sigstk.ss_size - rndfsize) & ~0xFul);
 	} else {
 		usfp = (void *)((sp - rndfsize) & ~0xFul);
 	}
 
 	/*
 	 * Set Floating Point facility to "Ignore Exceptions Mode" so signal
 	 * handler can run.
 	 */
 	if (td->td_pcb->pcb_flags & PCB_FPU)
 		tf->srr1 = tf->srr1 & ~(PSL_FE0 | PSL_FE1);
 
 	/*
 	 * Set up the registers to return to sigcode.
 	 *
 	 *   r1/sp - sigframe ptr
 	 *   lr    - sig function, dispatched to by blrl in trampoline
 	 *   r3    - sig number
 	 *   r4    - SIGINFO ? &siginfo : exception code
 	 *   r5    - user context
 	 *   srr0  - trampoline function addr
 	 */
 	tf->lr = (register_t)catcher;
 	tf->fixreg[1] = (register_t)usfp;
 	tf->fixreg[FIRSTARG] = sig;
 	#ifdef COMPAT_FREEBSD32
 	tf->fixreg[FIRSTARG+2] = (register_t)usfp +
 	    ((SV_PROC_FLAG(p, SV_ILP32)) ?
 	    offsetof(struct sigframe32, sf_uc) :
 	    offsetof(struct sigframe, sf_uc));
 	#else
 	tf->fixreg[FIRSTARG+2] = (register_t)usfp +
 	    offsetof(struct sigframe, sf_uc);
 	#endif
 	if (SIGISMEMBER(psp->ps_siginfo, sig)) {
 		/*
 		 * Signal handler installed with SA_SIGINFO.
 		 */
 		#ifdef COMPAT_FREEBSD32
 		if (SV_PROC_FLAG(p, SV_ILP32)) {
 			sf32.sf_si = siginfo32;
 			tf->fixreg[FIRSTARG+1] = (register_t)usfp +
 			    offsetof(struct sigframe32, sf_si);
 			sf32.sf_si = siginfo32;
 		} else  {
 		#endif
 			tf->fixreg[FIRSTARG+1] = (register_t)usfp +
 			    offsetof(struct sigframe, sf_si);
 			sf.sf_si = ksi->ksi_info;
 		#ifdef COMPAT_FREEBSD32
 		}
 		#endif
 	} else {
 		/* Old FreeBSD-style arguments. */
 		tf->fixreg[FIRSTARG+1] = code;
 		tf->fixreg[FIRSTARG+3] = (tf->exc == EXC_DSI) ? 
 		    tf->dar : tf->srr0;
 	}
 	mtx_unlock(&psp->ps_mtx);
 	PROC_UNLOCK(p);
 
 	tf->srr0 = (register_t)p->p_sysent->sv_sigcode_base;
 
 	/*
 	 * copy the frame out to userland.
 	 */
 	if (copyout(sfp, usfp, sfpsize) != 0) {
 		/*
 		 * Process has trashed its stack. Kill it.
 		 */
 		CTR2(KTR_SIG, "sendsig: sigexit td=%p sfp=%p", td, sfp);
 		PROC_LOCK(p);
 		sigexit(td, SIGILL);
 	}
 
 	CTR3(KTR_SIG, "sendsig: return td=%p pc=%#x sp=%#x", td,
 	     tf->srr0, tf->fixreg[1]);
 
 	PROC_LOCK(p);
 	mtx_lock(&psp->ps_mtx);
 }
 
 int
 sys_sigreturn(struct thread *td, struct sigreturn_args *uap)
 {
 	ucontext_t uc;
 	int error;
 
 	CTR2(KTR_SIG, "sigreturn: td=%p ucp=%p", td, uap->sigcntxp);
 
 	if (copyin(uap->sigcntxp, &uc, sizeof(uc)) != 0) {
 		CTR1(KTR_SIG, "sigreturn: efault td=%p", td);
 		return (EFAULT);
 	}
 
 	error = set_mcontext(td, &uc.uc_mcontext);
 	if (error != 0)
 		return (error);
 
 	/*
 	 * Save FPU state if needed. User may have changed it on
 	 * signal handler
 	 */
 	if (uc.uc_mcontext.mc_srr1 & PSL_FP)
 		save_fpu(td);
 
 	kern_sigprocmask(td, SIG_SETMASK, &uc.uc_sigmask, NULL, 0);
 
 	CTR3(KTR_SIG, "sigreturn: return td=%p pc=%#x sp=%#x",
 	     td, uc.uc_mcontext.mc_srr0, uc.uc_mcontext.mc_gpr[1]);
 
 	return (EJUSTRETURN);
 }
 
 #ifdef COMPAT_FREEBSD4
 int
 freebsd4_sigreturn(struct thread *td, struct freebsd4_sigreturn_args *uap)
 {
 
 	return sys_sigreturn(td, (struct sigreturn_args *)uap);
 }
 #endif
 
 /*
  * Construct a PCB from a trapframe. This is called from kdb_trap() where
  * we want to start a backtrace from the function that caused us to enter
  * the debugger. We have the context in the trapframe, but base the trace
  * on the PCB. The PCB doesn't have to be perfect, as long as it contains
  * enough for a backtrace.
  */
 void
 makectx(struct trapframe *tf, struct pcb *pcb)
 {
 
 	pcb->pcb_lr = tf->srr0;
 	pcb->pcb_sp = tf->fixreg[1];
 }
 
 /*
  * get_mcontext/sendsig helper routine that doesn't touch the
  * proc lock
  */
 static int
 grab_mcontext(struct thread *td, mcontext_t *mcp, int flags)
 {
 	struct pcb *pcb;
 	int i;
 
 	pcb = td->td_pcb;
 
 	memset(mcp, 0, sizeof(mcontext_t));
 
 	mcp->mc_vers = _MC_VERSION;
 	mcp->mc_flags = 0;
 	memcpy(&mcp->mc_frame, td->td_frame, sizeof(struct trapframe));
 	if (flags & GET_MC_CLEAR_RET) {
 		mcp->mc_gpr[3] = 0;
 		mcp->mc_gpr[4] = 0;
 	}
 
 	/*
 	 * This assumes that floating-point context is *not* lazy,
 	 * so if the thread has used FP there would have been a
 	 * FP-unavailable exception that would have set things up
 	 * correctly.
 	 */
 	if (pcb->pcb_flags & PCB_FPREGS) {
 		if (pcb->pcb_flags & PCB_FPU) {
 			KASSERT(td == curthread,
 				("get_mcontext: fp save not curthread"));
 			critical_enter();
 			save_fpu(td);
 			critical_exit();
 		}
 		mcp->mc_flags |= _MC_FP_VALID;
 		memcpy(&mcp->mc_fpscr, &pcb->pcb_fpu.fpscr, sizeof(double));
 		for (i = 0; i < 32; i++)
 			memcpy(&mcp->mc_fpreg[i], &pcb->pcb_fpu.fpr[i].fpr,
 			    sizeof(double));
 	}
 
 	if (pcb->pcb_flags & PCB_VSX) {
 		for (i = 0; i < 32; i++)
 			memcpy(&mcp->mc_vsxfpreg[i],
 			    &pcb->pcb_fpu.fpr[i].vsr[2], sizeof(double));
 	}
 
 	/*
 	 * Repeat for Altivec context
 	 */
 
 	if (pcb->pcb_flags & PCB_VEC) {
 		KASSERT(td == curthread,
 			("get_mcontext: fp save not curthread"));
 		critical_enter();
 		save_vec(td);
 		critical_exit();
 		mcp->mc_flags |= _MC_AV_VALID;
 		mcp->mc_vscr  = pcb->pcb_vec.vscr;
 		mcp->mc_vrsave =  pcb->pcb_vec.vrsave;
 		memcpy(mcp->mc_avec, pcb->pcb_vec.vr, sizeof(mcp->mc_avec));
 	}
 
 	mcp->mc_len = sizeof(*mcp);
 
 	return (0);
 }
 
 int
 get_mcontext(struct thread *td, mcontext_t *mcp, int flags)
 {
 	int error;
 
 	error = grab_mcontext(td, mcp, flags);
 	if (error == 0) {
 		PROC_LOCK(curthread->td_proc);
 		mcp->mc_onstack = sigonstack(td->td_frame->fixreg[1]);
 		PROC_UNLOCK(curthread->td_proc);
 	}
 
 	return (error);
 }
 
 int
 set_mcontext(struct thread *td, mcontext_t *mcp)
 {
 	struct pcb *pcb;
 	struct trapframe *tf;
 	register_t tls;
 	int i;
 
 	pcb = td->td_pcb;
 	tf = td->td_frame;
 
 	if (mcp->mc_vers != _MC_VERSION || mcp->mc_len != sizeof(*mcp))
 		return (EINVAL);
 
 	/*
 	 * Don't let the user change privileged MSR bits.
 	 *
 	 * psl_userstatic is used here to mask off any bits that can
 	 * legitimately vary between user contexts (Floating point
 	 * exception control and any facilities that we are using the
 	 * "enable on first use" pattern with.)
 	 *
 	 * All other bits are required to match psl_userset(32).
 	 *
 	 * Remember to update the platform cpu_init code when implementing
 	 * support for a new conditional facility!
 	 */
 	if ((mcp->mc_srr1 & psl_userstatic) != (tf->srr1 & psl_userstatic)) {
 		return (EINVAL);
 	}
 
 	/* Copy trapframe, preserving TLS pointer across context change */
 	if (SV_PROC_FLAG(td->td_proc, SV_LP64))
 		tls = tf->fixreg[13];
 	else
 		tls = tf->fixreg[2];
 	memcpy(tf, mcp->mc_frame, sizeof(mcp->mc_frame));
 	if (SV_PROC_FLAG(td->td_proc, SV_LP64))
 		tf->fixreg[13] = tls;
 	else
 		tf->fixreg[2] = tls;
 
 	/*
 	 * Force the FPU back off to ensure the new context will not bypass
 	 * the enable_fpu() setup code accidentally.
 	 *
 	 * This prevents an issue where a process that uses floating point
 	 * inside a signal handler could end up in a state where the MSR
 	 * did not match pcb_flags.
 	 *
 	 * Additionally, ensure VSX is disabled as well, as it is illegal
 	 * to leave it turned on when FP or VEC are off.
 	 */
 	tf->srr1 &= ~(PSL_FP | PSL_VSX);
 	pcb->pcb_flags &= ~(PCB_FPU | PCB_VSX);
 
 	if (mcp->mc_flags & _MC_FP_VALID) {
 		/* enable_fpu() will happen lazily on a fault */
 		pcb->pcb_flags |= PCB_FPREGS;
 		memcpy(&pcb->pcb_fpu.fpscr, &mcp->mc_fpscr, sizeof(double));
 		bzero(pcb->pcb_fpu.fpr, sizeof(pcb->pcb_fpu.fpr));
 		for (i = 0; i < 32; i++) {
 			memcpy(&pcb->pcb_fpu.fpr[i].fpr, &mcp->mc_fpreg[i],
 			    sizeof(double));
 			memcpy(&pcb->pcb_fpu.fpr[i].vsr[2],
 			    &mcp->mc_vsxfpreg[i], sizeof(double));
 		}
 	}
 
 	if (mcp->mc_flags & _MC_AV_VALID) {
 		if ((pcb->pcb_flags & PCB_VEC) != PCB_VEC) {
 			critical_enter();
 			enable_vec(td);
 			critical_exit();
 		}
 		pcb->pcb_vec.vscr = mcp->mc_vscr;
 		pcb->pcb_vec.vrsave = mcp->mc_vrsave;
 		memcpy(pcb->pcb_vec.vr, mcp->mc_avec, sizeof(mcp->mc_avec));
 	} else {
 		tf->srr1 &= ~PSL_VEC;
 		pcb->pcb_flags &= ~PCB_VEC;
 	}
 
 	return (0);
 }
 
 /*
  * Clean up extra POWER state.  Some per-process registers and states are not
  * managed by the MSR, so must be cleaned up explicitly on thread exit.
  *
  * Currently this includes:
  * DSCR -- Data stream control register (PowerISA 2.06+)
  * FSCR -- Facility Status and Control Register (PowerISA 2.07+)
  */
 static void
 cleanup_power_extras(struct thread *td)
 {
 	uint32_t pcb_flags;
 
 	if (td != curthread)
 		return;
 
 	pcb_flags = td->td_pcb->pcb_flags;
 	/* Clean up registers not managed by MSR. */
 	if (pcb_flags & PCB_CFSCR)
 		mtspr(SPR_FSCR, 0);
 	if (pcb_flags & PCB_CDSCR) 
 		mtspr(SPR_DSCRP, 0);
 
 	if (pcb_flags & PCB_FPU)
 		cleanup_fpscr();
 }
 
 /*
  * Set set up registers on exec.
  */
 void
 exec_setregs(struct thread *td, struct image_params *imgp, uintptr_t stack)
 {
 	struct trapframe	*tf;
 	register_t		argc;
 
 	tf = trapframe(td);
 	bzero(tf, sizeof *tf);
 	#ifdef __powerpc64__
 	tf->fixreg[1] = -roundup(-stack + 48, 16);
 	#else
 	tf->fixreg[1] = -roundup(-stack + 8, 16);
 	#endif
 
 	/*
 	 * Set up arguments for _start():
 	 *	_start(argc, argv, envp, obj, cleanup, ps_strings);
 	 *
 	 * Notes:
 	 *	- obj and cleanup are the auxilliary and termination
 	 *	  vectors.  They are fixed up by ld.elf_so.
 	 *	- ps_strings is a NetBSD extention, and will be
 	 * 	  ignored by executables which are strictly
 	 *	  compliant with the SVR4 ABI.
 	 */
 
 	/* Collect argc from the user stack */
 	argc = fuword((void *)stack);
 
 	tf->fixreg[3] = argc;
 	tf->fixreg[4] = stack + sizeof(register_t);
 	tf->fixreg[5] = stack + (2 + argc)*sizeof(register_t);
 	tf->fixreg[6] = 0;				/* auxillary vector */
 	tf->fixreg[7] = 0;				/* termination vector */
 	tf->fixreg[8] = (register_t)imgp->ps_strings;	/* NetBSD extension */
 
 	tf->srr0 = imgp->entry_addr;
 	#ifdef __powerpc64__
 	tf->fixreg[12] = imgp->entry_addr;
 	#endif
 	tf->srr1 = psl_userset | PSL_FE_DFLT;
 	cleanup_power_extras(td);
 	td->td_pcb->pcb_flags = 0;
 }
 
 #ifdef COMPAT_FREEBSD32
 void
 ppc32_setregs(struct thread *td, struct image_params *imgp, uintptr_t stack)
 {
 	struct trapframe	*tf;
 	uint32_t		argc;
 
 	tf = trapframe(td);
 	bzero(tf, sizeof *tf);
 	tf->fixreg[1] = -roundup(-stack + 8, 16);
 
 	argc = fuword32((void *)stack);
 
 	tf->fixreg[3] = argc;
 	tf->fixreg[4] = stack + sizeof(uint32_t);
 	tf->fixreg[5] = stack + (2 + argc)*sizeof(uint32_t);
 	tf->fixreg[6] = 0;				/* auxillary vector */
 	tf->fixreg[7] = 0;				/* termination vector */
 	tf->fixreg[8] = (register_t)imgp->ps_strings;	/* NetBSD extension */
 
 	tf->srr0 = imgp->entry_addr;
 	tf->srr1 = psl_userset32 | PSL_FE_DFLT;
 	cleanup_power_extras(td);
 	td->td_pcb->pcb_flags = 0;
 }
 #endif
 
 int
 fill_regs(struct thread *td, struct reg *regs)
 {
 	struct trapframe *tf;
 
 	tf = td->td_frame;
 	memcpy(regs, tf, sizeof(struct reg));
 
 	return (0);
 }
 
 int
 fill_dbregs(struct thread *td, struct dbreg *dbregs)
 {
 	/* No debug registers on PowerPC */
 	return (ENOSYS);
 }
 
 int
 fill_fpregs(struct thread *td, struct fpreg *fpregs)
 {
 	struct pcb *pcb;
 	int i;
 
 	pcb = td->td_pcb;
 
 	if ((pcb->pcb_flags & PCB_FPREGS) == 0)
 		memset(fpregs, 0, sizeof(struct fpreg));
 	else {
 		memcpy(&fpregs->fpscr, &pcb->pcb_fpu.fpscr, sizeof(double));
 		for (i = 0; i < 32; i++)
 			memcpy(&fpregs->fpreg[i], &pcb->pcb_fpu.fpr[i].fpr,
 			    sizeof(double));
 	}
 
 	return (0);
 }
 
 int
 set_regs(struct thread *td, struct reg *regs)
 {
 	struct trapframe *tf;
 
 	tf = td->td_frame;
 	memcpy(tf, regs, sizeof(struct reg));
 
 	return (0);
 }
 
 int
 set_dbregs(struct thread *td, struct dbreg *dbregs)
 {
 	/* No debug registers on PowerPC */
 	return (ENOSYS);
 }
 
 int
 set_fpregs(struct thread *td, struct fpreg *fpregs)
 {
 	struct pcb *pcb;
 	int i;
 
 	pcb = td->td_pcb;
 	pcb->pcb_flags |= PCB_FPREGS;
 	memcpy(&pcb->pcb_fpu.fpscr, &fpregs->fpscr, sizeof(double));
 	for (i = 0; i < 32; i++) {
 		memcpy(&pcb->pcb_fpu.fpr[i].fpr, &fpregs->fpreg[i],
 		    sizeof(double));
 	}
 
 	return (0);
 }
 
 #ifdef COMPAT_FREEBSD32
 int
 set_regs32(struct thread *td, struct reg32 *regs)
 {
 	struct trapframe *tf;
 	int i;
 
 	tf = td->td_frame;
 	for (i = 0; i < 32; i++)
 		tf->fixreg[i] = regs->fixreg[i];
 	tf->lr = regs->lr;
 	tf->cr = regs->cr;
 	tf->xer = regs->xer;
 	tf->ctr = regs->ctr;
 	tf->srr0 = regs->pc;
 
 	return (0);
 }
 
 int
 fill_regs32(struct thread *td, struct reg32 *regs)
 {
 	struct trapframe *tf;
 	int i;
 
 	tf = td->td_frame;
 	for (i = 0; i < 32; i++)
 		regs->fixreg[i] = tf->fixreg[i];
 	regs->lr = tf->lr;
 	regs->cr = tf->cr;
 	regs->xer = tf->xer;
 	regs->ctr = tf->ctr;
 	regs->pc = tf->srr0;
 
 	return (0);
 }
 
 static int
 grab_mcontext32(struct thread *td, mcontext32_t *mcp, int flags)
 {
 	mcontext_t mcp64;
 	int i, error;
 
 	error = grab_mcontext(td, &mcp64, flags);
 	if (error != 0)
 		return (error);
 
 	mcp->mc_vers = mcp64.mc_vers;
 	mcp->mc_flags = mcp64.mc_flags;
 	mcp->mc_onstack = mcp64.mc_onstack;
 	mcp->mc_len = mcp64.mc_len;
 	memcpy(mcp->mc_avec,mcp64.mc_avec,sizeof(mcp64.mc_avec));
 	memcpy(mcp->mc_av,mcp64.mc_av,sizeof(mcp64.mc_av));
 	for (i = 0; i < 42; i++)
 		mcp->mc_frame[i] = mcp64.mc_frame[i];
 	memcpy(mcp->mc_fpreg,mcp64.mc_fpreg,sizeof(mcp64.mc_fpreg));
 	memcpy(mcp->mc_vsxfpreg,mcp64.mc_vsxfpreg,sizeof(mcp64.mc_vsxfpreg));
 
 	return (0);
 }
 
 static int
 get_mcontext32(struct thread *td, mcontext32_t *mcp, int flags)
 {
 	int error;
 
 	error = grab_mcontext32(td, mcp, flags);
 	if (error == 0) {
 		PROC_LOCK(curthread->td_proc);
 		mcp->mc_onstack = sigonstack(td->td_frame->fixreg[1]);
 		PROC_UNLOCK(curthread->td_proc);
 	}
 
 	return (error);
 }
 
 static int
 set_mcontext32(struct thread *td, mcontext32_t *mcp)
 {
 	mcontext_t mcp64;
 	int i, error;
 
 	mcp64.mc_vers = mcp->mc_vers;
 	mcp64.mc_flags = mcp->mc_flags;
 	mcp64.mc_onstack = mcp->mc_onstack;
 	mcp64.mc_len = mcp->mc_len;
 	memcpy(mcp64.mc_avec,mcp->mc_avec,sizeof(mcp64.mc_avec));
 	memcpy(mcp64.mc_av,mcp->mc_av,sizeof(mcp64.mc_av));
 	for (i = 0; i < 42; i++)
 		mcp64.mc_frame[i] = mcp->mc_frame[i];
 	mcp64.mc_srr1 |= (td->td_frame->srr1 & 0xFFFFFFFF00000000ULL);
 	memcpy(mcp64.mc_fpreg,mcp->mc_fpreg,sizeof(mcp64.mc_fpreg));
 	memcpy(mcp64.mc_vsxfpreg,mcp->mc_vsxfpreg,sizeof(mcp64.mc_vsxfpreg));
 
 	error = set_mcontext(td, &mcp64);
 
 	return (error);
 }
 #endif
 
 #ifdef COMPAT_FREEBSD32
 int
 freebsd32_sigreturn(struct thread *td, struct freebsd32_sigreturn_args *uap)
 {
 	ucontext32_t uc;
 	int error;
 
 	CTR2(KTR_SIG, "sigreturn: td=%p ucp=%p", td, uap->sigcntxp);
 
 	if (copyin(uap->sigcntxp, &uc, sizeof(uc)) != 0) {
 		CTR1(KTR_SIG, "sigreturn: efault td=%p", td);
 		return (EFAULT);
 	}
 
 	error = set_mcontext32(td, &uc.uc_mcontext);
 	if (error != 0)
 		return (error);
 
 	/*
 	 * Save FPU state if needed. User may have changed it on
 	 * signal handler
 	 */
 	if (uc.uc_mcontext.mc_srr1 & PSL_FP)
 		save_fpu(td);
 
 	kern_sigprocmask(td, SIG_SETMASK, &uc.uc_sigmask, NULL, 0);
 
 	CTR3(KTR_SIG, "sigreturn: return td=%p pc=%#x sp=%#x",
 	     td, uc.uc_mcontext.mc_srr0, uc.uc_mcontext.mc_gpr[1]);
 
 	return (EJUSTRETURN);
 }
 
 /*
  * The first two fields of a ucontext_t are the signal mask and the machine
  * context.  The next field is uc_link; we want to avoid destroying the link
  * when copying out contexts.
  */
 #define	UC32_COPY_SIZE	offsetof(ucontext32_t, uc_link)
 
 int
 freebsd32_getcontext(struct thread *td, struct freebsd32_getcontext_args *uap)
 {
 	ucontext32_t uc;
 	int ret;
 
 	if (uap->ucp == NULL)
 		ret = EINVAL;
 	else {
 		bzero(&uc, sizeof(uc));
 		get_mcontext32(td, &uc.uc_mcontext, GET_MC_CLEAR_RET);
 		PROC_LOCK(td->td_proc);
 		uc.uc_sigmask = td->td_sigmask;
 		PROC_UNLOCK(td->td_proc);
 		ret = copyout(&uc, uap->ucp, UC32_COPY_SIZE);
 	}
 	return (ret);
 }
 
 int
 freebsd32_setcontext(struct thread *td, struct freebsd32_setcontext_args *uap)
 {
 	ucontext32_t uc;
 	int ret;	
 
 	if (uap->ucp == NULL)
 		ret = EINVAL;
 	else {
 		ret = copyin(uap->ucp, &uc, UC32_COPY_SIZE);
 		if (ret == 0) {
 			ret = set_mcontext32(td, &uc.uc_mcontext);
 			if (ret == 0) {
 				kern_sigprocmask(td, SIG_SETMASK,
 				    &uc.uc_sigmask, NULL, 0);
 			}
 		}
 	}
 	return (ret == 0 ? EJUSTRETURN : ret);
 }
 
 int
 freebsd32_swapcontext(struct thread *td, struct freebsd32_swapcontext_args *uap)
 {
 	ucontext32_t uc;
 	int ret;
 
 	if (uap->oucp == NULL || uap->ucp == NULL)
 		ret = EINVAL;
 	else {
 		bzero(&uc, sizeof(uc));
 		get_mcontext32(td, &uc.uc_mcontext, GET_MC_CLEAR_RET);
 		PROC_LOCK(td->td_proc);
 		uc.uc_sigmask = td->td_sigmask;
 		PROC_UNLOCK(td->td_proc);
 		ret = copyout(&uc, uap->oucp, UC32_COPY_SIZE);
 		if (ret == 0) {
 			ret = copyin(uap->ucp, &uc, UC32_COPY_SIZE);
 			if (ret == 0) {
 				ret = set_mcontext32(td, &uc.uc_mcontext);
 				if (ret == 0) {
 					kern_sigprocmask(td, SIG_SETMASK,
 					    &uc.uc_sigmask, NULL, 0);
 				}
 			}
 		}
 	}
 	return (ret == 0 ? EJUSTRETURN : ret);
 }
 
 #endif
 
 void
 cpu_set_syscall_retval(struct thread *td, int error)
 {
 	struct proc *p;
 	struct trapframe *tf;
 	int fixup;
 
 	if (error == EJUSTRETURN)
 		return;
 
 	p = td->td_proc;
 	tf = td->td_frame;
 
 	if (tf->fixreg[0] == SYS___syscall &&
 	    (SV_PROC_FLAG(p, SV_ILP32))) {
 		int code = tf->fixreg[FIRSTARG + 1];
 		fixup = (
 #if defined(COMPAT_FREEBSD6) && defined(SYS_freebsd6_lseek)
 		    code != SYS_freebsd6_lseek &&
 #endif
 		    code != SYS_lseek) ?  1 : 0;
 	} else
 		fixup = 0;
 
 	switch (error) {
 	case 0:
 		if (fixup) {
 			/*
 			 * 64-bit return, 32-bit syscall. Fixup byte order
 			 */
 			tf->fixreg[FIRSTARG] = 0;
 			tf->fixreg[FIRSTARG + 1] = td->td_retval[0];
 		} else {
 			tf->fixreg[FIRSTARG] = td->td_retval[0];
 			tf->fixreg[FIRSTARG + 1] = td->td_retval[1];
 		}
 		tf->cr &= ~0x10000000;		/* Unset summary overflow */
 		break;
 	case ERESTART:
 		/*
 		 * Set user's pc back to redo the system call.
 		 */
 		tf->srr0 -= 4;
 		break;
 	default:
 		tf->fixreg[FIRSTARG] = error;
 		tf->cr |= 0x10000000;		/* Set summary overflow */
 		break;
 	}
 }
 
 /*
  * Threading functions
  */
 void
 cpu_thread_exit(struct thread *td)
 {
 	cleanup_power_extras(td);
 }
 
 void
 cpu_thread_clean(struct thread *td)
 {
 }
 
 void
 cpu_thread_alloc(struct thread *td)
 {
 	struct pcb *pcb;
 
 	pcb = (struct pcb *)((td->td_kstack + td->td_kstack_pages * PAGE_SIZE -
 	    sizeof(struct pcb)) & ~0x2fUL);
 	td->td_pcb = pcb;
 	td->td_frame = (struct trapframe *)pcb - 1;
 }
 
 void
 cpu_thread_free(struct thread *td)
 {
 }
 
 int
 cpu_set_user_tls(struct thread *td, void *tls_base)
 {
 
 	if (SV_PROC_FLAG(td->td_proc, SV_LP64))
 		td->td_frame->fixreg[13] = (register_t)tls_base + 0x7010;
 	else
 		td->td_frame->fixreg[2] = (register_t)tls_base + 0x7008;
 	return (0);
 }
 
 void
 cpu_copy_thread(struct thread *td, struct thread *td0)
 {
 	struct pcb *pcb2;
 	struct trapframe *tf;
 	struct callframe *cf;
 
 	pcb2 = td->td_pcb;
 
 	/* Copy the upcall pcb */
 	bcopy(td0->td_pcb, pcb2, sizeof(*pcb2));
 
 	/* Create a stack for the new thread */
 	tf = td->td_frame;
 	bcopy(td0->td_frame, tf, sizeof(struct trapframe));
 	tf->fixreg[FIRSTARG] = 0;
 	tf->fixreg[FIRSTARG + 1] = 0;
 	tf->cr &= ~0x10000000;
 
 	/* Set registers for trampoline to user mode. */
 	cf = (struct callframe *)tf - 1;
 	memset(cf, 0, sizeof(struct callframe));
 	cf->cf_func = (register_t)fork_return;
 	cf->cf_arg0 = (register_t)td;
 	cf->cf_arg1 = (register_t)tf;
 
 	pcb2->pcb_sp = (register_t)cf;
 	#if defined(__powerpc64__) && (!defined(_CALL_ELF) || _CALL_ELF == 1)
 	pcb2->pcb_lr = ((register_t *)fork_trampoline)[0];
 	pcb2->pcb_toc = ((register_t *)fork_trampoline)[1];
 	#else
 	pcb2->pcb_lr = (register_t)fork_trampoline;
 	pcb2->pcb_context[0] = pcb2->pcb_lr;
 	#endif
 	pcb2->pcb_cpu.aim.usr_vsid = 0;
 #ifdef __SPE__
 	pcb2->pcb_vec.vscr = SPEFSCR_DFLT;
 #endif
 
 	/* Setup to release spin count in fork_exit(). */
 	td->td_md.md_spinlock_count = 1;
 	td->td_md.md_saved_msr = psl_kernset;
 }
 
 void
 cpu_set_upcall(struct thread *td, void (*entry)(void *), void *arg,
     stack_t *stack)
 {
 	struct trapframe *tf;
 	uintptr_t sp;
 
 	tf = td->td_frame;
 	/* align stack and alloc space for frame ptr and saved LR */
 	#ifdef __powerpc64__
 	sp = ((uintptr_t)stack->ss_sp + stack->ss_size - 48) &
 	    ~0x1f;
 	#else
 	sp = ((uintptr_t)stack->ss_sp + stack->ss_size - 8) &
 	    ~0x1f;
 	#endif
 	bzero(tf, sizeof(struct trapframe));
 
 	tf->fixreg[1] = (register_t)sp;
 	tf->fixreg[3] = (register_t)arg;
 	if (SV_PROC_FLAG(td->td_proc, SV_ILP32)) {
 		tf->srr0 = (register_t)entry;
 		#ifdef __powerpc64__
 		tf->srr1 = psl_userset32 | PSL_FE_DFLT;
 		#else
 		tf->srr1 = psl_userset | PSL_FE_DFLT;
 		#endif
 	} else {
 	    #ifdef __powerpc64__
 		if (td->td_proc->p_sysent == &elf64_freebsd_sysvec_v2) {
 			tf->srr0 = (register_t)entry;
 			/* ELFv2 ABI requires that the global entry point be in r12. */
 			tf->fixreg[12] = (register_t)entry;
 		}
 		else {
 			register_t entry_desc[3];
 			(void)copyin((void *)entry, entry_desc, sizeof(entry_desc));
 			tf->srr0 = entry_desc[0];
 			tf->fixreg[2] = entry_desc[1];
 			tf->fixreg[11] = entry_desc[2];
 		}
 		tf->srr1 = psl_userset | PSL_FE_DFLT;
 	    #endif
 	}
 
 	td->td_pcb->pcb_flags = 0;
 #ifdef __SPE__
 	td->td_pcb->pcb_vec.vscr = SPEFSCR_DFLT;
 #endif
 
 	td->td_retval[0] = (register_t)entry;
 	td->td_retval[1] = 0;
 }
 
 static int
 emulate_mfspr(int spr, int reg, struct trapframe *frame){
 	struct thread *td;
 
 	td = curthread;
 
 	if (spr == SPR_DSCR || spr == SPR_DSCRP) {
 		if (!(cpu_features2 & PPC_FEATURE2_DSCR))
 			return (SIGILL);
 		// If DSCR was never set, get the default DSCR
 		if ((td->td_pcb->pcb_flags & PCB_CDSCR) == 0)
 			td->td_pcb->pcb_dscr = mfspr(SPR_DSCRP);
 
 		frame->fixreg[reg] = td->td_pcb->pcb_dscr;
 		frame->srr0 += 4;
 		return (0);
 	} else
 		return (SIGILL);
 }
 
 static int
 emulate_mtspr(int spr, int reg, struct trapframe *frame){
 	struct thread *td;
 
 	td = curthread;
 
 	if (spr == SPR_DSCR || spr == SPR_DSCRP) {
 		if (!(cpu_features2 & PPC_FEATURE2_DSCR))
 			return (SIGILL);
 		td->td_pcb->pcb_flags |= PCB_CDSCR;
 		td->td_pcb->pcb_dscr = frame->fixreg[reg];
 		mtspr(SPR_DSCRP, frame->fixreg[reg]);
 		frame->srr0 += 4;
 		return (0);
 	} else
 		return (SIGILL);
 }
 
 #define XFX 0xFC0007FF
 int
 ppc_instr_emulate(struct trapframe *frame, struct thread *td)
 {
 	struct pcb *pcb;
 	uint32_t instr;
 	int reg, sig;
 	int rs, spr;
 
 	instr = fuword32((void *)frame->srr0);
 	sig = SIGILL;
 
 	if ((instr & 0xfc1fffff) == 0x7c1f42a6) {	/* mfpvr */
 		reg = (instr & ~0xfc1fffff) >> 21;
 		frame->fixreg[reg] = mfpvr();
 		frame->srr0 += 4;
 		return (0);
 	} else if ((instr & XFX) == 0x7c0002a6) {	/* mfspr */
 		rs = (instr &  0x3e00000) >> 21;
 		spr = (instr & 0x1ff800) >> 16;
 		return emulate_mfspr(spr, rs, frame);
 	} else if ((instr & XFX) == 0x7c0003a6) {	/* mtspr */
 		rs = (instr &  0x3e00000) >> 21;
 		spr = (instr & 0x1ff800) >> 16;
 		return emulate_mtspr(spr, rs, frame);
 	} else if ((instr & 0xfc000ffe) == 0x7c0004ac) {	/* various sync */
 		powerpc_sync(); /* Do a heavy-weight sync */
 		frame->srr0 += 4;
 		return (0);
 	}
 
 	pcb = td->td_pcb;
 #ifdef FPU_EMU
 	if (!(pcb->pcb_flags & PCB_FPREGS)) {
 		bzero(&pcb->pcb_fpu, sizeof(pcb->pcb_fpu));
 		pcb->pcb_flags |= PCB_FPREGS;
 	} else if (pcb->pcb_flags & PCB_FPU)
 		save_fpu(td);
 	sig = fpu_emulate(frame, &pcb->pcb_fpu);
 	if ((sig == 0 || sig == SIGFPE) && pcb->pcb_flags & PCB_FPU)
 		enable_fpu(td);
 #endif
 	if (sig == SIGILL) {
 		if (pcb->pcb_lastill != frame->srr0) {
 			/* Allow a second chance, in case of cache sync issues. */
 			sig = 0;
 			pmap_sync_icache(PCPU_GET(curpmap), frame->srr0, 4);
 			pcb->pcb_lastill = frame->srr0;
 		}
 	}
 
 	return (sig);
 }
diff --git a/sys/powerpc/powerpc/gdb_machdep.c b/sys/powerpc/powerpc/gdb_machdep.c
index a7f1de512e31..e334bd46375c 100644
--- a/sys/powerpc/powerpc/gdb_machdep.c
+++ b/sys/powerpc/powerpc/gdb_machdep.c
@@ -1,127 +1,126 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2006 Marcel Moolenaar
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  *
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kdb.h>
 #include <sys/kernel.h>
 #include <sys/proc.h>
 #include <sys/signal.h>
 
 #include <machine/gdb_machdep.h>
 #include <machine/pcb.h>
-#include <machine/reg.h>
 
 #include <machine/hid.h>
 #include <machine/spr.h>
 
 #include <machine/trap.h>
 
 #include <gdb/gdb.h>
 #include <gdb/gdb_int.h>
 
 extern vm_offset_t __startkernel;
 
 void *
 gdb_cpu_getreg(int regnum, size_t *regsz)
 {
 
 	*regsz = gdb_cpu_regsz(regnum);
 
 	if (kdb_thread == curthread) {
 		if (regnum == 0 || (regnum >= 2 && regnum <= 31))
 			return (kdb_frame->fixreg + regnum);
 		if (regnum == 64)
 			return (&kdb_frame->srr0);
 		if (regnum == 67)
 			return (&kdb_frame->lr);
 	}
 
 	if (regnum == 1)
 		return (&kdb_thrctx->pcb_sp);
 	if (regnum == 2 && *regsz == 8)
 		return (&kdb_thrctx->pcb_toc);
 	if (regnum >= 12 && regnum <= 31)
 		return (kdb_thrctx->pcb_context + (regnum - 12));
 	if (regnum == 64)
 		return (&kdb_thrctx->pcb_lr);
 
 	return (NULL);
 }
 
 void
 gdb_cpu_setreg(int regnum, void *val)
 {
 
 	switch (regnum) {
 	case GDB_REG_PC:
 		break;
 	}
 }
 
 int
 gdb_cpu_signal(int vector, int dummy __unused)
 {
 #if defined(BOOKE)
 	if (vector == EXC_DEBUG || vector == EXC_PGM)
 		return (SIGTRAP);
 #else
 	if (vector == EXC_TRC || vector == EXC_RUNMODETRC)
 		return (SIGTRAP);
 #endif
 
 	return (SIGEMT);
 }
 
 void
 gdb_cpu_do_offsets(void)
 {
 	/*
 	 * On PowerPC, .text starts at KERNBASE + SIZEOF_HEADERS and
 	 * text segment at KERNBASE - SIZEOF_HEADERS.
 	 * On PowerPC64, .text starts at KERNBASE and text segment at
 	 * KERNBASE - 0x100.
 	 * In both cases, the text segment offset is aligned to 64KB.
 	 *
 	 * The __startkernel variable holds the relocated KERNBASE offset.
 	 * Thus, as long as SIZEOF_HEADERS doesn't get bigger than 0x100
 	 * (which would lead to other issues), aligning __startkernel to
 	 * 64KB gives the text segment offset.
 	 *
 	 * TODO: Add DataSeg to response. On PowerPC64 all sections reside
 	 * in a single LOAD segment, but on PowerPC modifiable data reside
 	 * in a separate segment, that GDB should also relocate.
 	 */
 	gdb_tx_begin(0);
 	gdb_tx_str("TextSeg=");
 	gdb_tx_varhex(__startkernel & ~0xffff);
 	gdb_tx_end();
 }
diff --git a/sys/powerpc/powerpc/machdep.c b/sys/powerpc/powerpc/machdep.c
index f35469f4051c..622af17b3305 100644
--- a/sys/powerpc/powerpc/machdep.c
+++ b/sys/powerpc/powerpc/machdep.c
@@ -1,850 +1,850 @@
 /*-
  * Copyright (C) 1995, 1996 Wolfgang Solfrank.
  * Copyright (C) 1995, 1996 TooLs GmbH.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *      This product includes software developed by TooLs GmbH.
  * 4. The name of TooLs GmbH may not be used to endorse or promote products
  *    derived from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 /*-
  * Copyright (C) 2001 Benno Rice
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY Benno Rice ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *	$NetBSD: machdep.c,v 1.74.2.1 2000/11/01 16:13:48 tv Exp $
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_ddb.h"
 #include "opt_kstack_pages.h"
 #include "opt_platform.h"
 
 #include <sys/param.h>
 #include <sys/proc.h>
 #include <sys/systm.h>
 #include <sys/bio.h>
 #include <sys/buf.h>
 #include <sys/bus.h>
 #include <sys/cons.h>
 #include <sys/cpu.h>
 #include <sys/eventhandler.h>
 #include <sys/exec.h>
 #include <sys/imgact.h>
 #include <sys/kdb.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/linker.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/msgbuf.h>
 #include <sys/mutex.h>
 #include <sys/ptrace.h>
 #include <sys/reboot.h>
+#include <sys/reg.h>
 #include <sys/rwlock.h>
 #include <sys/signalvar.h>
 #include <sys/syscallsubr.h>
 #include <sys/sysctl.h>
 #include <sys/sysent.h>
 #include <sys/sysproto.h>
 #include <sys/ucontext.h>
 #include <sys/uio.h>
 #include <sys/vmmeter.h>
 #include <sys/vnode.h>
 
 #include <net/netisr.h>
 
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_page.h>
 #include <vm/vm_phys.h>
 #include <vm/vm_map.h>
 #include <vm/vm_object.h>
 #include <vm/vm_pager.h>
 
 #include <machine/altivec.h>
 #ifndef __powerpc64__
 #include <machine/bat.h>
 #endif
 #include <machine/cpu.h>
 #include <machine/elf.h>
 #include <machine/fpu.h>
 #include <machine/hid.h>
 #include <machine/ifunc.h>
 #include <machine/kdb.h>
 #include <machine/md_var.h>
 #include <machine/metadata.h>
 #include <machine/mmuvar.h>
 #include <machine/pcb.h>
-#include <machine/reg.h>
 #include <machine/sigframe.h>
 #include <machine/spr.h>
 #include <machine/trap.h>
 #include <machine/vmparam.h>
 #include <machine/ofw_machdep.h>
 
 #include <ddb/ddb.h>
 
 #include <dev/ofw/openfirm.h>
 #include <dev/ofw/ofw_subr.h>
 
 int cold = 1;
 #ifdef __powerpc64__
 int cacheline_size = 128;
 #else
 int cacheline_size = 32;
 #endif
 #ifdef __powerpc64__
 int hw_direct_map = -1;
 #else
 int hw_direct_map = 1;
 #endif
 
 #ifdef BOOKE
 extern vm_paddr_t kernload;
 #endif
 
 extern void *ap_pcpu;
 
 struct pcpu __pcpu[MAXCPU] __aligned(PAGE_SIZE);
 static char init_kenv[2048];
 
 static struct trapframe frame0;
 
 char		machine[] = "powerpc";
 SYSCTL_STRING(_hw, HW_MACHINE, machine, CTLFLAG_RD, machine, 0, "");
 
 static void	cpu_startup(void *);
 SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL);
 
 SYSCTL_INT(_machdep, CPU_CACHELINE, cacheline_size,
 	   CTLFLAG_RD, &cacheline_size, 0, "");
 
 uintptr_t	powerpc_init(vm_offset_t, vm_offset_t, vm_offset_t, void *,
 		    uint32_t);
 
 static void	fake_preload_metadata(void);
 
 long		Maxmem = 0;
 long		realmem = 0;
 
 /* Default MSR values set in the AIM/Book-E early startup code */
 register_t	psl_kernset;
 register_t	psl_userset;
 register_t	psl_userstatic;
 #ifdef __powerpc64__
 register_t	psl_userset32;
 #endif
 
 struct kva_md_info kmi;
 
 static void
 cpu_startup(void *dummy)
 {
 
 	/*
 	 * Initialise the decrementer-based clock.
 	 */
 	decr_init();
 
 	/*
 	 * Good {morning,afternoon,evening,night}.
 	 */
 	cpu_setup(PCPU_GET(cpuid));
 
 #ifdef PERFMON
 	perfmon_init();
 #endif
 	printf("real memory  = %ju (%ju MB)\n", ptoa((uintmax_t)physmem),
 	    ptoa((uintmax_t)physmem) / 1048576);
 	realmem = physmem;
 
 	if (bootverbose)
 		printf("available KVA = %zu (%zu MB)\n",
 		    virtual_end - virtual_avail,
 		    (virtual_end - virtual_avail) / 1048576);
 
 	/*
 	 * Display any holes after the first chunk of extended memory.
 	 */
 	if (bootverbose) {
 		int indx;
 
 		printf("Physical memory chunk(s):\n");
 		for (indx = 0; phys_avail[indx + 1] != 0; indx += 2) {
 			vm_paddr_t size1 =
 			    phys_avail[indx + 1] - phys_avail[indx];
 
 			#ifdef __powerpc64__
 			printf("0x%016jx - 0x%016jx, %ju bytes (%ju pages)\n",
 			#else
 			printf("0x%09jx - 0x%09jx, %ju bytes (%ju pages)\n",
 			#endif
 			    (uintmax_t)phys_avail[indx],
 			    (uintmax_t)phys_avail[indx + 1] - 1,
 			    (uintmax_t)size1, (uintmax_t)size1 / PAGE_SIZE);
 		}
 	}
 
 	vm_ksubmap_init(&kmi);
 
 	printf("avail memory = %ju (%ju MB)\n",
 	    ptoa((uintmax_t)vm_free_count()),
 	    ptoa((uintmax_t)vm_free_count()) / 1048576);
 
 	/*
 	 * Set up buffers, so they can be used to read disk labels.
 	 */
 	bufinit();
 	vm_pager_bufferinit();
 }
 
 extern vm_offset_t	__startkernel, __endkernel;
 extern unsigned char	__bss_start[];
 extern unsigned char	__sbss_start[];
 extern unsigned char	__sbss_end[];
 extern unsigned char	_end[];
 
 void aim_early_init(vm_offset_t fdt, vm_offset_t toc, vm_offset_t ofentry,
     void *mdp, uint32_t mdp_cookie);
 void aim_cpu_init(vm_offset_t toc);
 void booke_cpu_init(void);
 
 #ifdef DDB
 static void	load_external_symtab(void);
 #endif
 
 uintptr_t
 powerpc_init(vm_offset_t fdt, vm_offset_t toc, vm_offset_t ofentry, void *mdp,
     uint32_t mdp_cookie)
 {
 	struct		pcpu *pc;
 	struct cpuref	bsp;
 	vm_offset_t	startkernel, endkernel;
 	char		*env;
 	void		*kmdp = NULL;
         bool		ofw_bootargs = false;
 #ifdef DDB
 	bool		symbols_provided = false;
 	vm_offset_t ksym_start;
 	vm_offset_t ksym_end;
 	vm_offset_t ksym_sz;
 #endif
 
 	/* First guess at start/end kernel positions */
 	startkernel = __startkernel;
 	endkernel = __endkernel;
 
 	/*
 	 * If the metadata pointer cookie is not set to the magic value,
 	 * the number in mdp should be treated as nonsense.
 	 */
 	if (mdp_cookie != 0xfb5d104d)
 		mdp = NULL;
 
 #if !defined(BOOKE)
 	/*
 	 * On BOOKE the BSS is already cleared and some variables
 	 * initialized.  Do not wipe them out.
 	 */
 	bzero(__sbss_start, __sbss_end - __sbss_start);
 	bzero(__bss_start, _end - __bss_start);
 #endif
 
 	cpu_feature_setup();
 
 #ifdef AIM
 	aim_early_init(fdt, toc, ofentry, mdp, mdp_cookie);
 #endif
 
 	/*
 	 * At this point, we are executing in our correct memory space.
 	 * Book-E started there, and AIM has done an rfi and restarted
 	 * execution from _start.
 	 *
 	 * We may still be in real mode, however. If we are running out of
 	 * the direct map on 64 bit, this is possible to do.
 	 */
 
 	/*
 	 * Parse metadata if present and fetch parameters.  Must be done
 	 * before console is inited so cninit gets the right value of
 	 * boothowto.
 	 */
 	if (mdp != NULL) {
 		/*
 		 * Starting up from loader.
 		 *
 		 * Full metadata has been provided, but we need to figure
 		 * out the correct address to relocate it to.
 		 */
 		char *envp = NULL;
 		uintptr_t md_offset = 0;
 		vm_paddr_t kernelstartphys, kernelendphys;
 
 #ifdef AIM
 		if ((uintptr_t)&powerpc_init > DMAP_BASE_ADDRESS)
 			md_offset = DMAP_BASE_ADDRESS;
 #else /* BOOKE */
 		md_offset = VM_MIN_KERNEL_ADDRESS - kernload;
 #endif
 
 		preload_metadata = mdp;
 		if (md_offset > 0) {
 			/* Translate phys offset into DMAP offset. */
 			preload_metadata += md_offset;
 			preload_bootstrap_relocate(md_offset);
 		}
 		kmdp = preload_search_by_type("elf kernel");
 		if (kmdp != NULL) {
 			boothowto = MD_FETCH(kmdp, MODINFOMD_HOWTO, int);
 			envp = MD_FETCH(kmdp, MODINFOMD_ENVP, char *);
 			if (envp != NULL)
 				envp += md_offset;
 			init_static_kenv(envp, 0);
 			if (fdt == 0) {
 				fdt = MD_FETCH(kmdp, MODINFOMD_DTBP, uintptr_t);
 				if (fdt != 0)
 					fdt += md_offset;
 			}
 			kernelstartphys = MD_FETCH(kmdp, MODINFO_ADDR,
 			    vm_offset_t);
 			/* kernelstartphys is already relocated. */
 			kernelendphys = MD_FETCH(kmdp, MODINFOMD_KERNEND,
 			    vm_offset_t);
 			if (kernelendphys != 0)
 				kernelendphys += md_offset;
 			endkernel = ulmax(endkernel, kernelendphys);
 #ifdef DDB
 			ksym_start = MD_FETCH(kmdp, MODINFOMD_SSYM, uintptr_t);
 			ksym_end = MD_FETCH(kmdp, MODINFOMD_ESYM, uintptr_t);
 			ksym_sz = *(Elf_Size*)ksym_start;
 
 			db_fetch_ksymtab(ksym_start, ksym_end, md_offset);
 			/* Symbols provided by loader. */
 			symbols_provided = true;
 #endif
 		}
 	} else {
 		/*
 		 * Self-loading kernel, we have to fake up metadata.
 		 *
 		 * Since we are creating the metadata from the final
 		 * memory space, we don't need to call
 		 * preload_boostrap_relocate().
 		 */
 		fake_preload_metadata();
 		kmdp = preload_search_by_type("elf kernel");
 		init_static_kenv(init_kenv, sizeof(init_kenv));
 		ofw_bootargs = true;
 	}
 
 	/* Store boot environment state */
 	OF_initial_setup((void *)fdt, NULL, (int (*)(void *))ofentry);
 
 	/*
 	 * Init params/tunables that can be overridden by the loader
 	 */
 	init_param1();
 
 	/*
 	 * Start initializing proc0 and thread0.
 	 */
 	proc_linkup0(&proc0, &thread0);
 	thread0.td_frame = &frame0;
 #ifdef __powerpc64__
 	__asm __volatile("mr 13,%0" :: "r"(&thread0));
 #else
 	__asm __volatile("mr 2,%0" :: "r"(&thread0));
 #endif
 
 	/*
 	 * Init mutexes, which we use heavily in PMAP
 	 */
 	mutex_init();
 
 	/*
 	 * Install the OF client interface
 	 */
 	OF_bootstrap();
 
 #ifdef DDB
 	if (!symbols_provided && hw_direct_map)
 		load_external_symtab();
 #endif
 
 	if (ofw_bootargs)
 		ofw_parse_bootargs();
 
 	/*
 	 * Initialize the console before printing anything.
 	 */
 	cninit();
 
 #ifdef AIM
 	aim_cpu_init(toc);
 #else /* BOOKE */
 	booke_cpu_init();
 
 	/* Make sure the kernel icache is valid before we go too much further */
 	__syncicache((caddr_t)startkernel, endkernel - startkernel);
 #endif
 
 	/*
 	 * Choose a platform module so we can get the physical memory map.
 	 */
 
 	platform_probe_and_attach();
 
 	/*
 	 * Set up per-cpu data for the BSP now that the platform can tell
 	 * us which that is.
 	 */
 	if (platform_smp_get_bsp(&bsp) != 0)
 		bsp.cr_cpuid = 0;
 	pc = &__pcpu[bsp.cr_cpuid];
 	__asm __volatile("mtsprg 0, %0" :: "r"(pc));
 	pcpu_init(pc, bsp.cr_cpuid, sizeof(struct pcpu));
 	pc->pc_curthread = &thread0;
 	thread0.td_oncpu = bsp.cr_cpuid;
 	pc->pc_cpuid = bsp.cr_cpuid;
 	pc->pc_hwref = bsp.cr_hwref;
 
 	/*
 	 * Init KDB
 	 */
 	kdb_init();
 
 	/*
 	 * Bring up MMU
 	 */
 	pmap_mmu_init();
 	link_elf_ireloc(kmdp);
 	pmap_bootstrap(startkernel, endkernel);
 	mtmsr(psl_kernset & ~PSL_EE);
 
 	/*
 	 * Initialize params/tunables that are derived from memsize
 	 */
 	init_param2(physmem);
 
 	/*
 	 * Grab booted kernel's name
 	 */
         env = kern_getenv("kernelname");
         if (env != NULL) {
 		strlcpy(kernelname, env, sizeof(kernelname));
 		freeenv(env);
 	}
 
 	/*
 	 * Finish setting up thread0.
 	 */
 	thread0.td_pcb = (struct pcb *)
 	    ((thread0.td_kstack + thread0.td_kstack_pages * PAGE_SIZE -
 	    sizeof(struct pcb)) & ~15UL);
 	bzero((void *)thread0.td_pcb, sizeof(struct pcb));
 	pc->pc_curpcb = thread0.td_pcb;
 
 	/* Initialise the message buffer. */
 	msgbufinit(msgbufp, msgbufsize);
 
 #ifdef KDB
 	if (boothowto & RB_KDB)
 		kdb_enter(KDB_WHY_BOOTFLAGS,
 		    "Boot flags requested debugger");
 #endif
 
 	return (((uintptr_t)thread0.td_pcb -
 	    (sizeof(struct callframe) - 3*sizeof(register_t))) & ~15UL);
 }
 
 #ifdef DDB
 /*
  * On powernv and some booke systems, we might not have symbols loaded via
  * loader. However, if the user passed the kernel in as the initrd as well,
  * we can manually load it via reinterpreting the initrd copy of the kernel.
  *
  * In the BOOKE case, we don't actually have a DMAP yet, so we have to use
  * temporary maps to inspect the memory, but write DMAP addresses to the
  * configuration variables.
  */
 static void
 load_external_symtab(void) {
 	phandle_t chosen;
 	vm_paddr_t start, end;
 	pcell_t cell[2];
 	ssize_t size;
 	u_char *kernelimg;		/* Temporary map */
 	u_char *kernelimg_final;	/* Final location */
 
 	int i;
 
 	Elf_Ehdr *ehdr;
 	Elf_Phdr *phdr;
 	Elf_Shdr *shdr;
 
 	vm_offset_t ksym_start, ksym_sz, kstr_start, kstr_sz,
 	    ksym_start_final, kstr_start_final;
 
 	if (!hw_direct_map)
 		return;
 
 	chosen = OF_finddevice("/chosen");
 	if (chosen <= 0)
 		return;
 
 	if (!OF_hasprop(chosen, "linux,initrd-start") ||
 	    !OF_hasprop(chosen, "linux,initrd-end"))
 		return;
 
 	size = OF_getencprop(chosen, "linux,initrd-start", cell, sizeof(cell));
 	if (size == 4)
 		start = cell[0];
 	else if (size == 8)
 		start = (uint64_t)cell[0] << 32 | cell[1];
 	else
 		return;
 
 	size = OF_getencprop(chosen, "linux,initrd-end", cell, sizeof(cell));
 	if (size == 4)
 		end = cell[0];
 	else if (size == 8)
 		end = (uint64_t)cell[0] << 32 | cell[1];
 	else
 		return;
 
 	if (!(end - start > 0))
 		return;
 
 	kernelimg_final = (u_char *) PHYS_TO_DMAP(start);
 #ifdef	AIM
 	kernelimg = kernelimg_final;
 #else	/* BOOKE */
 	kernelimg = (u_char *)pmap_early_io_map(start, PAGE_SIZE);
 #endif
 	ehdr = (Elf_Ehdr *)kernelimg;
 
 	if (!IS_ELF(*ehdr)) {
 #ifdef	BOOKE
 		pmap_early_io_unmap(start, PAGE_SIZE);
 #endif
 		return;
 	}
 
 #ifdef	BOOKE
 	pmap_early_io_unmap(start, PAGE_SIZE);
 	kernelimg = (u_char *)pmap_early_io_map(start, (end - start));
 #endif
 
 	phdr = (Elf_Phdr *)(kernelimg + ehdr->e_phoff);
 	shdr = (Elf_Shdr *)(kernelimg + ehdr->e_shoff);
 
 	ksym_start = 0;
 	ksym_sz = 0;
 	ksym_start_final = 0;
 	kstr_start = 0;
 	kstr_sz = 0;
 	kstr_start_final = 0;
 	for (i = 0; i < ehdr->e_shnum; i++) {
 		if (shdr[i].sh_type == SHT_SYMTAB) {
 			ksym_start = (vm_offset_t)(kernelimg +
 			    shdr[i].sh_offset);
 			ksym_start_final = (vm_offset_t)
 			    (kernelimg_final + shdr[i].sh_offset);
 			ksym_sz = (vm_offset_t)(shdr[i].sh_size);
 			kstr_start = (vm_offset_t)(kernelimg +
 			    shdr[shdr[i].sh_link].sh_offset);
 			kstr_start_final = (vm_offset_t)
 			    (kernelimg_final +
 			    shdr[shdr[i].sh_link].sh_offset);
 
 			kstr_sz = (vm_offset_t)
 			    (shdr[shdr[i].sh_link].sh_size);
 		}
 	}
 
 	if (ksym_start != 0 && kstr_start != 0 && ksym_sz != 0 &&
 	    kstr_sz != 0 && ksym_start < kstr_start) {
 		/*
 		 * We can't use db_fetch_ksymtab() here, because we need to
 		 * feed in DMAP addresses that are not mapped yet on booke.
 		 *
 		 * Write the variables directly, where db_init() will pick
 		 * them up later, after the DMAP is up.
 		 */
 		ksymtab = ksym_start_final;
 		ksymtab_size = ksym_sz;
 		kstrtab = kstr_start_final;
 		ksymtab_relbase = (__startkernel - KERNBASE);
 	}
 
 #ifdef	BOOKE
 	pmap_early_io_unmap(start, (end - start));
 #endif
 
 };
 #endif
 
 /*
  * When not being loaded from loader, we need to create our own metadata
  * so we can interact with the kernel linker.
  */
 static void
 fake_preload_metadata(void) {
 	/* We depend on dword alignment here. */
 	static uint32_t fake_preload[36] __aligned(8);
 	int i = 0;
 
 	fake_preload[i++] = MODINFO_NAME;
 	fake_preload[i++] = strlen("kernel") + 1;
 	strcpy((char*)&fake_preload[i], "kernel");
 	/* ['k' 'e' 'r' 'n'] ['e' 'l' '\0' ..] */
 	i += 2;
 
 	fake_preload[i++] = MODINFO_TYPE;
 	fake_preload[i++] = strlen("elf kernel") + 1;
 	strcpy((char*)&fake_preload[i], "elf kernel");
 	/* ['e' 'l' 'f' ' '] ['k' 'e' 'r' 'n'] ['e' 'l' '\0' ..] */
 	i += 3;
 
 #ifdef __powerpc64__
 	/* Padding -- Fields start on u_long boundaries */
 	fake_preload[i++] = 0;
 #endif
 
 	fake_preload[i++] = MODINFO_ADDR;
 	fake_preload[i++] = sizeof(vm_offset_t);
 	*(vm_offset_t *)&fake_preload[i] =
 	    (vm_offset_t)(__startkernel);
 	i += (sizeof(vm_offset_t) / 4);
 
 	fake_preload[i++] = MODINFO_SIZE;
 	fake_preload[i++] = sizeof(vm_offset_t);
 	*(vm_offset_t *)&fake_preload[i] =
 	    (vm_offset_t)(__endkernel) - (vm_offset_t)(__startkernel);
 	i += (sizeof(vm_offset_t) / 4);
 
 	/*
 	 * MODINFOMD_SSYM and MODINFOMD_ESYM cannot be provided here,
 	 * as the memory comes from outside the loaded ELF sections.
 	 *
 	 * If the symbols are being provided by other means (MFS), the
 	 * tables will be loaded into the debugger directly.
 	 */
 
 	/* Null field at end to mark end of data. */
 	fake_preload[i++] = 0;
 	fake_preload[i] = 0;
 	preload_metadata = (void*)fake_preload;
 }
 
 /*
  * Flush the D-cache for non-DMA I/O so that the I-cache can
  * be made coherent later.
  */
 void
 cpu_flush_dcache(void *ptr, size_t len)
 {
 	register_t addr, off;
 
 	/*
 	 * Align the address to a cacheline and adjust the length
 	 * accordingly. Then round the length to a multiple of the
 	 * cacheline for easy looping.
 	 */
 	addr = (uintptr_t)ptr;
 	off = addr & (cacheline_size - 1);
 	addr -= off;
 	len = roundup2(len + off, cacheline_size);
 
 	while (len > 0) {
 		__asm __volatile ("dcbf 0,%0" :: "r"(addr));
 		__asm __volatile ("sync");
 		addr += cacheline_size;
 		len -= cacheline_size;
 	}
 }
 
 int
 ptrace_set_pc(struct thread *td, unsigned long addr)
 {
 	struct trapframe *tf;
 
 	tf = td->td_frame;
 	tf->srr0 = (register_t)addr;
 
 	return (0);
 }
 
 void
 spinlock_enter(void)
 {
 	struct thread *td;
 	register_t msr;
 
 	td = curthread;
 	if (td->td_md.md_spinlock_count == 0) {
 		nop_prio_mhigh();
 		msr = intr_disable();
 		td->td_md.md_spinlock_count = 1;
 		td->td_md.md_saved_msr = msr;
 		critical_enter();
 	} else
 		td->td_md.md_spinlock_count++;
 }
 
 void
 spinlock_exit(void)
 {
 	struct thread *td;
 	register_t msr;
 
 	td = curthread;
 	msr = td->td_md.md_saved_msr;
 	td->td_md.md_spinlock_count--;
 	if (td->td_md.md_spinlock_count == 0) {
 		critical_exit();
 		intr_restore(msr);
 		nop_prio_medium();
 	}
 }
 
 /*
  * Simple ddb(4) command/hack to view any SPR on the running CPU.
  * Uses a trivial asm function to perform the mfspr, and rewrites the mfspr
  * instruction each time.
  * XXX: Since it uses code modification, it won't work if the kernel code pages
  * are marked RO.
  */
 extern register_t get_spr(int);
 
 #ifdef DDB
 DB_SHOW_COMMAND(spr, db_show_spr)
 {
 	register_t spr;
 	volatile uint32_t *p;
 	int sprno, saved_sprno;
 
 	if (!have_addr)
 		return;
 
 	saved_sprno = sprno = (intptr_t) addr;
 	sprno = ((sprno & 0x3e0) >> 5) | ((sprno & 0x1f) << 5);
 	p = (uint32_t *)(void *)&get_spr;
 #ifdef __powerpc64__
 #if defined(_CALL_ELF) && _CALL_ELF == 2
 	/* Account for ELFv2 function prologue. */
 	p += 2;
 #else
 	p = *(volatile uint32_t * volatile *)p;
 #endif
 #endif
 	*p = (*p & ~0x001ff800) | (sprno << 11);
 	__syncicache(__DEVOLATILE(uint32_t *, p), cacheline_size);
 	spr = get_spr(sprno);
 
 	db_printf("SPR %d(%x): %lx\n", saved_sprno, saved_sprno,
 	    (unsigned long)spr);
 }
 
 DB_SHOW_COMMAND(frame, db_show_frame)
 {
 	struct trapframe *tf;
 	long reg;
 	int i;
 
 	tf = have_addr ? (struct trapframe *)addr : curthread->td_frame;
 
 	/*
 	 * Everything casts through long to simplify the printing.
 	 * 'long' is native register size anyway.
 	 */
 	db_printf("trap frame %p\n", tf);
 	for (i = 0; i < nitems(tf->fixreg); i++) {
 		reg = tf->fixreg[i];
 		db_printf("  r%d:\t%#lx (%ld)\n", i, reg, reg);
 	}
 	reg = tf->lr;
 	db_printf("  lr:\t%#lx\n", reg);
 	reg = tf->cr;
 	db_printf("  cr:\t%#lx\n", reg);
 	reg = tf->xer;
 	db_printf("  xer:\t%#lx\n", reg);
 	reg = tf->ctr;
 	db_printf("  ctr:\t%#lx (%ld)\n", reg, reg);
 	reg = tf->srr0;
 	db_printf("  srr0:\t%#lx\n", reg);
 	reg = tf->srr1;
 	db_printf("  srr1:\t%#lx\n", reg);
 	reg = tf->exc;
 	db_printf("  exc:\t%#lx\n", reg);
 	reg = tf->dar;
 	db_printf("  dar:\t%#lx\n", reg);
 #ifdef AIM
 	reg = tf->cpu.aim.dsisr;
 	db_printf("  dsisr:\t%#lx\n", reg);
 #else
 	reg = tf->cpu.booke.esr;
 	db_printf("  esr:\t%#lx\n", reg);
 	reg = tf->cpu.booke.dbcr0;
 	db_printf("  dbcr0:\t%#lx\n", reg);
 #endif
 }
 #endif
 
 /* __stack_chk_fail_local() is called in secure-plt (32-bit). */
 #if !defined(__powerpc64__)
 extern void __stack_chk_fail(void);
 void __stack_chk_fail_local(void);
 
 void
 __stack_chk_fail_local(void)
 {
 
 	__stack_chk_fail();
 }
 #endif
diff --git a/sys/riscv/include/reg.h b/sys/riscv/include/reg.h
index 9e94b4a5768d..003e696e30e3 100644
--- a/sys/riscv/include/reg.h
+++ b/sys/riscv/include/reg.h
@@ -1,73 +1,61 @@
 /*-
  * Copyright (c) 2015-2016 Ruslan Bukin <br@bsdpad.com>
  * All rights reserved.
  *
  * Portions of this software were developed by SRI International and the
  * University of Cambridge Computer Laboratory under DARPA/AFRL contract
  * FA8750-10-C-0237 ("CTSRD"), as part of the DARPA CRASH research programme.
  *
  * Portions of this software were developed by the University of Cambridge
  * Computer Laboratory as part of the CTSRD Project, with support from the
  * UK Higher Education Innovation Fund (HEIF).
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef	_MACHINE_REG_H_
 #define	_MACHINE_REG_H_
 
 struct reg {
 	uint64_t ra;		/* return address */
 	uint64_t sp;		/* stack pointer */
 	uint64_t gp;		/* global pointer */
 	uint64_t tp;		/* thread pointer */
 	uint64_t t[7];		/* temporaries */
 	uint64_t s[12];		/* saved registers */
 	uint64_t a[8];		/* function arguments */
 	uint64_t sepc;		/* exception program counter */
 	uint64_t sstatus;	/* status register */
 };
 
 struct fpreg {
 	uint64_t	fp_x[32][2];	/* Floating point registers */
 	uint64_t	fp_fcsr;	/* Floating point control reg */
 };
 
 struct dbreg {
 	int dummy;
 };
 
-#ifdef _KERNEL
-/*
- * XXX these interfaces are MI, so they should be declared in a MI place.
- */
-int	fill_regs(struct thread *, struct reg *);
-int	set_regs(struct thread *, struct reg *);
-int	fill_fpregs(struct thread *, struct fpreg *);
-int	set_fpregs(struct thread *, struct fpreg *);
-int	fill_dbregs(struct thread *, struct dbreg *);
-int	set_dbregs(struct thread *, struct dbreg *);
-#endif
-
 #endif /* !_MACHINE_REG_H_ */
diff --git a/sys/riscv/riscv/machdep.c b/sys/riscv/riscv/machdep.c
index b7fb142a59b4..ad57e8339c7d 100644
--- a/sys/riscv/riscv/machdep.c
+++ b/sys/riscv/riscv/machdep.c
@@ -1,981 +1,981 @@
 /*-
  * Copyright (c) 2014 Andrew Turner
  * Copyright (c) 2015-2017 Ruslan Bukin <br@bsdpad.com>
  * All rights reserved.
  *
  * Portions of this software were developed by SRI International and the
  * University of Cambridge Computer Laboratory under DARPA/AFRL contract
  * FA8750-10-C-0237 ("CTSRD"), as part of the DARPA CRASH research programme.
  *
  * Portions of this software were developed by the University of Cambridge
  * Computer Laboratory as part of the CTSRD Project, with support from the
  * UK Higher Education Innovation Fund (HEIF).
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include "opt_platform.h"
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/boot.h>
 #include <sys/buf.h>
 #include <sys/bus.h>
 #include <sys/cons.h>
 #include <sys/cpu.h>
 #include <sys/devmap.h>
 #include <sys/exec.h>
 #include <sys/imgact.h>
 #include <sys/kdb.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/limits.h>
 #include <sys/linker.h>
 #include <sys/msgbuf.h>
 #include <sys/pcpu.h>
 #include <sys/physmem.h>
 #include <sys/proc.h>
 #include <sys/ptrace.h>
 #include <sys/reboot.h>
+#include <sys/reg.h>
 #include <sys/rwlock.h>
 #include <sys/sched.h>
 #include <sys/signalvar.h>
 #include <sys/syscallsubr.h>
 #include <sys/sysent.h>
 #include <sys/sysproto.h>
 #include <sys/tslog.h>
 #include <sys/ucontext.h>
 #include <sys/vmmeter.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
 #include <vm/vm_phys.h>
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 #include <vm/vm_pager.h>
 
 #include <machine/cpu.h>
 #include <machine/intr.h>
 #include <machine/kdb.h>
 #include <machine/machdep.h>
 #include <machine/metadata.h>
 #include <machine/pcb.h>
 #include <machine/pte.h>
-#include <machine/reg.h>
 #include <machine/riscvreg.h>
 #include <machine/sbi.h>
 #include <machine/trap.h>
 #include <machine/vmparam.h>
 
 #ifdef FPE
 #include <machine/fpe.h>
 #endif
 
 #ifdef FDT
 #include <contrib/libfdt/libfdt.h>
 #include <dev/fdt/fdt_common.h>
 #include <dev/ofw/openfirm.h>
 #endif
 
 static void get_fpcontext(struct thread *td, mcontext_t *mcp);
 static void set_fpcontext(struct thread *td, mcontext_t *mcp);
 
 struct pcpu __pcpu[MAXCPU];
 
 static struct trapframe proc0_tf;
 
 int early_boot = 1;
 int cold = 1;
 
 #define	DTB_SIZE_MAX	(1024 * 1024)
 
 vm_paddr_t physmap[PHYS_AVAIL_ENTRIES];
 u_int physmap_idx;
 
 struct kva_md_info kmi;
 
 int64_t dcache_line_size;	/* The minimum D cache line size */
 int64_t icache_line_size;	/* The minimum I cache line size */
 int64_t idcache_line_size;	/* The minimum cache line size */
 
 #define BOOT_HART_INVALID	0xffffffff
 uint32_t boot_hart = BOOT_HART_INVALID;	/* The hart we booted on. */
 
 cpuset_t all_harts;
 
 extern int *end;
 
 static char static_kenv[PAGE_SIZE];
 
 static void
 cpu_startup(void *dummy)
 {
 
 	sbi_print_version();
 	identify_cpu();
 
 	printf("real memory  = %ju (%ju MB)\n", ptoa((uintmax_t)realmem),
 	    ptoa((uintmax_t)realmem) / (1024 * 1024));
 
 	/*
 	 * Display any holes after the first chunk of extended memory.
 	 */
 	if (bootverbose) {
 		int indx;
 
 		printf("Physical memory chunk(s):\n");
 		for (indx = 0; phys_avail[indx + 1] != 0; indx += 2) {
 			vm_paddr_t size;
 
 			size = phys_avail[indx + 1] - phys_avail[indx];
 			printf(
 			    "0x%016jx - 0x%016jx, %ju bytes (%ju pages)\n",
 			    (uintmax_t)phys_avail[indx],
 			    (uintmax_t)phys_avail[indx + 1] - 1,
 			    (uintmax_t)size, (uintmax_t)size / PAGE_SIZE);
 		}
 	}
 
 	vm_ksubmap_init(&kmi);
 
 	printf("avail memory = %ju (%ju MB)\n",
 	    ptoa((uintmax_t)vm_free_count()),
 	    ptoa((uintmax_t)vm_free_count()) / (1024 * 1024));
 	if (bootverbose)
 		devmap_print_table();
 
 	bufinit();
 	vm_pager_bufferinit();
 }
 
 SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL);
 
 int
 cpu_idle_wakeup(int cpu)
 {
 
 	return (0);
 }
 
 int
 fill_regs(struct thread *td, struct reg *regs)
 {
 	struct trapframe *frame;
 
 	frame = td->td_frame;
 	regs->sepc = frame->tf_sepc;
 	regs->sstatus = frame->tf_sstatus;
 	regs->ra = frame->tf_ra;
 	regs->sp = frame->tf_sp;
 	regs->gp = frame->tf_gp;
 	regs->tp = frame->tf_tp;
 
 	memcpy(regs->t, frame->tf_t, sizeof(regs->t));
 	memcpy(regs->s, frame->tf_s, sizeof(regs->s));
 	memcpy(regs->a, frame->tf_a, sizeof(regs->a));
 
 	return (0);
 }
 
 int
 set_regs(struct thread *td, struct reg *regs)
 {
 	struct trapframe *frame;
 
 	frame = td->td_frame;
 	frame->tf_sepc = regs->sepc;
 	frame->tf_ra = regs->ra;
 	frame->tf_sp = regs->sp;
 	frame->tf_gp = regs->gp;
 	frame->tf_tp = regs->tp;
 
 	memcpy(frame->tf_t, regs->t, sizeof(frame->tf_t));
 	memcpy(frame->tf_s, regs->s, sizeof(frame->tf_s));
 	memcpy(frame->tf_a, regs->a, sizeof(frame->tf_a));
 
 	return (0);
 }
 
 int
 fill_fpregs(struct thread *td, struct fpreg *regs)
 {
 #ifdef FPE
 	struct pcb *pcb;
 
 	pcb = td->td_pcb;
 
 	if ((pcb->pcb_fpflags & PCB_FP_STARTED) != 0) {
 		/*
 		 * If we have just been running FPE instructions we will
 		 * need to save the state to memcpy it below.
 		 */
 		if (td == curthread)
 			fpe_state_save(td);
 
 		memcpy(regs->fp_x, pcb->pcb_x, sizeof(regs->fp_x));
 		regs->fp_fcsr = pcb->pcb_fcsr;
 	} else
 #endif
 		memset(regs, 0, sizeof(*regs));
 
 	return (0);
 }
 
 int
 set_fpregs(struct thread *td, struct fpreg *regs)
 {
 #ifdef FPE
 	struct trapframe *frame;
 	struct pcb *pcb;
 
 	frame = td->td_frame;
 	pcb = td->td_pcb;
 
 	memcpy(pcb->pcb_x, regs->fp_x, sizeof(regs->fp_x));
 	pcb->pcb_fcsr = regs->fp_fcsr;
 	pcb->pcb_fpflags |= PCB_FP_STARTED;
 	frame->tf_sstatus &= ~SSTATUS_FS_MASK;
 	frame->tf_sstatus |= SSTATUS_FS_CLEAN;
 #endif
 
 	return (0);
 }
 
 int
 fill_dbregs(struct thread *td, struct dbreg *regs)
 {
 
 	panic("fill_dbregs");
 }
 
 int
 set_dbregs(struct thread *td, struct dbreg *regs)
 {
 
 	panic("set_dbregs");
 }
 
 int
 ptrace_set_pc(struct thread *td, u_long addr)
 {
 
 	td->td_frame->tf_sepc = addr;
 	return (0);
 }
 
 int
 ptrace_single_step(struct thread *td)
 {
 
 	/* TODO; */
 	return (EOPNOTSUPP);
 }
 
 int
 ptrace_clear_single_step(struct thread *td)
 {
 
 	/* TODO; */
 	return (EOPNOTSUPP);
 }
 
 void
 exec_setregs(struct thread *td, struct image_params *imgp, uintptr_t stack)
 {
 	struct trapframe *tf;
 	struct pcb *pcb;
 
 	tf = td->td_frame;
 	pcb = td->td_pcb;
 
 	memset(tf, 0, sizeof(struct trapframe));
 
 	tf->tf_a[0] = stack;
 	tf->tf_sp = STACKALIGN(stack);
 	tf->tf_ra = imgp->entry_addr;
 	tf->tf_sepc = imgp->entry_addr;
 
 	pcb->pcb_fpflags &= ~PCB_FP_STARTED;
 }
 
 /* Sanity check these are the same size, they will be memcpy'd to and fro */
 CTASSERT(sizeof(((struct trapframe *)0)->tf_a) ==
     sizeof((struct gpregs *)0)->gp_a);
 CTASSERT(sizeof(((struct trapframe *)0)->tf_s) ==
     sizeof((struct gpregs *)0)->gp_s);
 CTASSERT(sizeof(((struct trapframe *)0)->tf_t) ==
     sizeof((struct gpregs *)0)->gp_t);
 CTASSERT(sizeof(((struct trapframe *)0)->tf_a) ==
     sizeof((struct reg *)0)->a);
 CTASSERT(sizeof(((struct trapframe *)0)->tf_s) ==
     sizeof((struct reg *)0)->s);
 CTASSERT(sizeof(((struct trapframe *)0)->tf_t) ==
     sizeof((struct reg *)0)->t);
 
 /* Support for FDT configurations only. */
 CTASSERT(FDT);
 
 int
 get_mcontext(struct thread *td, mcontext_t *mcp, int clear_ret)
 {
 	struct trapframe *tf = td->td_frame;
 
 	memcpy(mcp->mc_gpregs.gp_t, tf->tf_t, sizeof(mcp->mc_gpregs.gp_t));
 	memcpy(mcp->mc_gpregs.gp_s, tf->tf_s, sizeof(mcp->mc_gpregs.gp_s));
 	memcpy(mcp->mc_gpregs.gp_a, tf->tf_a, sizeof(mcp->mc_gpregs.gp_a));
 
 	if (clear_ret & GET_MC_CLEAR_RET) {
 		mcp->mc_gpregs.gp_a[0] = 0;
 		mcp->mc_gpregs.gp_t[0] = 0; /* clear syscall error */
 	}
 
 	mcp->mc_gpregs.gp_ra = tf->tf_ra;
 	mcp->mc_gpregs.gp_sp = tf->tf_sp;
 	mcp->mc_gpregs.gp_gp = tf->tf_gp;
 	mcp->mc_gpregs.gp_tp = tf->tf_tp;
 	mcp->mc_gpregs.gp_sepc = tf->tf_sepc;
 	mcp->mc_gpregs.gp_sstatus = tf->tf_sstatus;
 	get_fpcontext(td, mcp);
 
 	return (0);
 }
 
 int
 set_mcontext(struct thread *td, mcontext_t *mcp)
 {
 	struct trapframe *tf;
 
 	tf = td->td_frame;
 
 	/*
 	 * Permit changes to the USTATUS bits of SSTATUS.
 	 *
 	 * Ignore writes to read-only bits (SD, XS).
 	 *
 	 * Ignore writes to the FS field as set_fpcontext() will set
 	 * it explicitly.
 	 */
 	if (((mcp->mc_gpregs.gp_sstatus ^ tf->tf_sstatus) &
 	    ~(SSTATUS_SD | SSTATUS_XS_MASK | SSTATUS_FS_MASK | SSTATUS_UPIE |
 	    SSTATUS_UIE)) != 0)
 		return (EINVAL);
 
 	memcpy(tf->tf_t, mcp->mc_gpregs.gp_t, sizeof(tf->tf_t));
 	memcpy(tf->tf_s, mcp->mc_gpregs.gp_s, sizeof(tf->tf_s));
 	memcpy(tf->tf_a, mcp->mc_gpregs.gp_a, sizeof(tf->tf_a));
 
 	tf->tf_ra = mcp->mc_gpregs.gp_ra;
 	tf->tf_sp = mcp->mc_gpregs.gp_sp;
 	tf->tf_gp = mcp->mc_gpregs.gp_gp;
 	tf->tf_sepc = mcp->mc_gpregs.gp_sepc;
 	tf->tf_sstatus = mcp->mc_gpregs.gp_sstatus;
 	set_fpcontext(td, mcp);
 
 	return (0);
 }
 
 static void
 get_fpcontext(struct thread *td, mcontext_t *mcp)
 {
 #ifdef FPE
 	struct pcb *curpcb;
 
 	critical_enter();
 
 	curpcb = curthread->td_pcb;
 
 	KASSERT(td->td_pcb == curpcb, ("Invalid fpe pcb"));
 
 	if ((curpcb->pcb_fpflags & PCB_FP_STARTED) != 0) {
 		/*
 		 * If we have just been running FPE instructions we will
 		 * need to save the state to memcpy it below.
 		 */
 		fpe_state_save(td);
 
 		KASSERT((curpcb->pcb_fpflags & ~PCB_FP_USERMASK) == 0,
 		    ("Non-userspace FPE flags set in get_fpcontext"));
 		memcpy(mcp->mc_fpregs.fp_x, curpcb->pcb_x,
 		    sizeof(mcp->mc_fpregs.fp_x));
 		mcp->mc_fpregs.fp_fcsr = curpcb->pcb_fcsr;
 		mcp->mc_fpregs.fp_flags = curpcb->pcb_fpflags;
 		mcp->mc_flags |= _MC_FP_VALID;
 	}
 
 	critical_exit();
 #endif
 }
 
 static void
 set_fpcontext(struct thread *td, mcontext_t *mcp)
 {
 #ifdef FPE
 	struct pcb *curpcb;
 #endif
 
 	td->td_frame->tf_sstatus &= ~SSTATUS_FS_MASK;
 	td->td_frame->tf_sstatus |= SSTATUS_FS_OFF;
 
 #ifdef FPE
 	critical_enter();
 
 	if ((mcp->mc_flags & _MC_FP_VALID) != 0) {
 		curpcb = curthread->td_pcb;
 		/* FPE usage is enabled, override registers. */
 		memcpy(curpcb->pcb_x, mcp->mc_fpregs.fp_x,
 		    sizeof(mcp->mc_fpregs.fp_x));
 		curpcb->pcb_fcsr = mcp->mc_fpregs.fp_fcsr;
 		curpcb->pcb_fpflags = mcp->mc_fpregs.fp_flags & PCB_FP_USERMASK;
 		td->td_frame->tf_sstatus |= SSTATUS_FS_CLEAN;
 	}
 
 	critical_exit();
 #endif
 }
 
 void
 cpu_idle(int busy)
 {
 
 	spinlock_enter();
 	if (!busy)
 		cpu_idleclock();
 	if (!sched_runnable())
 		__asm __volatile(
 		    "fence \n"
 		    "wfi   \n");
 	if (!busy)
 		cpu_activeclock();
 	spinlock_exit();
 }
 
 void
 cpu_halt(void)
 {
 
 	/*
 	 * Try to power down using the HSM SBI extension and fall back to a
 	 * simple wfi loop.
 	 */
 	intr_disable();
 	if (sbi_probe_extension(SBI_EXT_ID_HSM) != 0)
 		sbi_hsm_hart_stop();
 	for (;;)
 		__asm __volatile("wfi");
 	/* NOTREACHED */
 }
 
 /*
  * Flush the D-cache for non-DMA I/O so that the I-cache can
  * be made coherent later.
  */
 void
 cpu_flush_dcache(void *ptr, size_t len)
 {
 
 	/* TBD */
 }
 
 /* Get current clock frequency for the given CPU ID. */
 int
 cpu_est_clockrate(int cpu_id, uint64_t *rate)
 {
 
 	panic("cpu_est_clockrate");
 }
 
 void
 cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size)
 {
 }
 
 void
 spinlock_enter(void)
 {
 	struct thread *td;
 	register_t reg;
 
 	td = curthread;
 	if (td->td_md.md_spinlock_count == 0) {
 		reg = intr_disable();
 		td->td_md.md_spinlock_count = 1;
 		td->td_md.md_saved_sstatus_ie = reg;
 		critical_enter();
 	} else
 		td->td_md.md_spinlock_count++;
 }
 
 void
 spinlock_exit(void)
 {
 	struct thread *td;
 	register_t sstatus_ie;
 
 	td = curthread;
 	sstatus_ie = td->td_md.md_saved_sstatus_ie;
 	td->td_md.md_spinlock_count--;
 	if (td->td_md.md_spinlock_count == 0) {
 		critical_exit();
 		intr_restore(sstatus_ie);
 	}
 }
 
 #ifndef	_SYS_SYSPROTO_H_
 struct sigreturn_args {
 	ucontext_t *ucp;
 };
 #endif
 
 int
 sys_sigreturn(struct thread *td, struct sigreturn_args *uap)
 {
 	ucontext_t uc;
 	int error;
 
 	if (copyin(uap->sigcntxp, &uc, sizeof(uc)))
 		return (EFAULT);
 
 	error = set_mcontext(td, &uc.uc_mcontext);
 	if (error != 0)
 		return (error);
 
 	/* Restore signal mask. */
 	kern_sigprocmask(td, SIG_SETMASK, &uc.uc_sigmask, NULL, 0);
 
 	return (EJUSTRETURN);
 }
 
 /*
  * Construct a PCB from a trapframe. This is called from kdb_trap() where
  * we want to start a backtrace from the function that caused us to enter
  * the debugger. We have the context in the trapframe, but base the trace
  * on the PCB. The PCB doesn't have to be perfect, as long as it contains
  * enough for a backtrace.
  */
 void
 makectx(struct trapframe *tf, struct pcb *pcb)
 {
 
 	memcpy(pcb->pcb_s, tf->tf_s, sizeof(tf->tf_s));
 
 	pcb->pcb_ra = tf->tf_sepc;
 	pcb->pcb_sp = tf->tf_sp;
 	pcb->pcb_gp = tf->tf_gp;
 	pcb->pcb_tp = tf->tf_tp;
 }
 
 void
 sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
 {
 	struct sigframe *fp, frame;
 	struct sysentvec *sysent;
 	struct trapframe *tf;
 	struct sigacts *psp;
 	struct thread *td;
 	struct proc *p;
 	int onstack;
 	int sig;
 
 	td = curthread;
 	p = td->td_proc;
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 
 	sig = ksi->ksi_signo;
 	psp = p->p_sigacts;
 	mtx_assert(&psp->ps_mtx, MA_OWNED);
 
 	tf = td->td_frame;
 	onstack = sigonstack(tf->tf_sp);
 
 	CTR4(KTR_SIG, "sendsig: td=%p (%s) catcher=%p sig=%d", td, p->p_comm,
 	    catcher, sig);
 
 	/* Allocate and validate space for the signal handler context. */
 	if ((td->td_pflags & TDP_ALTSTACK) != 0 && !onstack &&
 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
 		fp = (struct sigframe *)((uintptr_t)td->td_sigstk.ss_sp +
 		    td->td_sigstk.ss_size);
 	} else {
 		fp = (struct sigframe *)td->td_frame->tf_sp;
 	}
 
 	/* Make room, keeping the stack aligned */
 	fp--;
 	fp = (struct sigframe *)STACKALIGN(fp);
 
 	/* Fill in the frame to copy out */
 	bzero(&frame, sizeof(frame));
 	get_mcontext(td, &frame.sf_uc.uc_mcontext, 0);
 	frame.sf_si = ksi->ksi_info;
 	frame.sf_uc.uc_sigmask = *mask;
 	frame.sf_uc.uc_stack = td->td_sigstk;
 	frame.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK) != 0 ?
 	    (onstack ? SS_ONSTACK : 0) : SS_DISABLE;
 	mtx_unlock(&psp->ps_mtx);
 	PROC_UNLOCK(td->td_proc);
 
 	/* Copy the sigframe out to the user's stack. */
 	if (copyout(&frame, fp, sizeof(*fp)) != 0) {
 		/* Process has trashed its stack. Kill it. */
 		CTR2(KTR_SIG, "sendsig: sigexit td=%p fp=%p", td, fp);
 		PROC_LOCK(p);
 		sigexit(td, SIGILL);
 	}
 
 	tf->tf_a[0] = sig;
 	tf->tf_a[1] = (register_t)&fp->sf_si;
 	tf->tf_a[2] = (register_t)&fp->sf_uc;
 
 	tf->tf_sepc = (register_t)catcher;
 	tf->tf_sp = (register_t)fp;
 
 	sysent = p->p_sysent;
 	if (sysent->sv_sigcode_base != 0)
 		tf->tf_ra = (register_t)sysent->sv_sigcode_base;
 	else
 		tf->tf_ra = (register_t)(sysent->sv_psstrings -
 		    *(sysent->sv_szsigcode));
 
 	CTR3(KTR_SIG, "sendsig: return td=%p pc=%#x sp=%#x", td, tf->tf_sepc,
 	    tf->tf_sp);
 
 	PROC_LOCK(p);
 	mtx_lock(&psp->ps_mtx);
 }
 
 static void
 init_proc0(vm_offset_t kstack)
 {
 	struct pcpu *pcpup;
 
 	pcpup = &__pcpu[0];
 
 	proc_linkup0(&proc0, &thread0);
 	thread0.td_kstack = kstack;
 	thread0.td_kstack_pages = KSTACK_PAGES;
 	thread0.td_pcb = (struct pcb *)(thread0.td_kstack +
 	    thread0.td_kstack_pages * PAGE_SIZE) - 1;
 	thread0.td_pcb->pcb_fpflags = 0;
 	thread0.td_frame = &proc0_tf;
 	pcpup->pc_curpcb = thread0.td_pcb;
 }
 
 #ifdef FDT
 static void
 try_load_dtb(caddr_t kmdp)
 {
 	vm_offset_t dtbp;
 
 	dtbp = MD_FETCH(kmdp, MODINFOMD_DTBP, vm_offset_t);
 
 #if defined(FDT_DTB_STATIC)
 	/*
 	 * In case the device tree blob was not retrieved (from metadata) try
 	 * to use the statically embedded one.
 	 */
 	if (dtbp == (vm_offset_t)NULL)
 		dtbp = (vm_offset_t)&fdt_static_dtb;
 #endif
 
 	if (dtbp == (vm_offset_t)NULL) {
 		printf("ERROR loading DTB\n");
 		return;
 	}
 
 	if (OF_install(OFW_FDT, 0) == FALSE)
 		panic("Cannot install FDT");
 
 	if (OF_init((void *)dtbp) != 0)
 		panic("OF_init failed with the found device tree");
 }
 #endif
 
 static void
 cache_setup(void)
 {
 
 	/* TODO */
 
 	dcache_line_size = 0;
 	icache_line_size = 0;
 	idcache_line_size = 0;
 }
 
 /*
  * Fake up a boot descriptor table.
  */
 static void
 fake_preload_metadata(struct riscv_bootparams *rvbp)
 {
 	static uint32_t fake_preload[48];
 	vm_offset_t lastaddr;
 	size_t fake_size, dtb_size;
 
 #define PRELOAD_PUSH_VALUE(type, value) do {			\
 	*(type *)((char *)fake_preload + fake_size) = (value);	\
 	fake_size += sizeof(type);				\
 } while (0)
 
 #define PRELOAD_PUSH_STRING(str) do {				\
 	uint32_t ssize;						\
 	ssize = strlen(str) + 1;				\
 	PRELOAD_PUSH_VALUE(uint32_t, ssize);			\
 	strcpy(((char *)fake_preload + fake_size), str);	\
 	fake_size += ssize;					\
 	fake_size = roundup(fake_size, sizeof(u_long));		\
 } while (0)
 
 	fake_size = 0;
 	lastaddr = (vm_offset_t)&end;
 
 	PRELOAD_PUSH_VALUE(uint32_t, MODINFO_NAME);
 	PRELOAD_PUSH_STRING("kernel");
 	PRELOAD_PUSH_VALUE(uint32_t, MODINFO_TYPE);
 	PRELOAD_PUSH_STRING("elf kernel");
 
 	PRELOAD_PUSH_VALUE(uint32_t, MODINFO_ADDR);
 	PRELOAD_PUSH_VALUE(uint32_t, sizeof(vm_offset_t));
 	PRELOAD_PUSH_VALUE(uint64_t, KERNBASE);
 
 	PRELOAD_PUSH_VALUE(uint32_t, MODINFO_SIZE);
 	PRELOAD_PUSH_VALUE(uint32_t, sizeof(size_t));
 	PRELOAD_PUSH_VALUE(uint64_t, (size_t)((vm_offset_t)&end - KERNBASE));
 
 	/* Copy the DTB to KVA space. */
 	lastaddr = roundup(lastaddr, sizeof(int));
 	PRELOAD_PUSH_VALUE(uint32_t, MODINFO_METADATA | MODINFOMD_DTBP);
 	PRELOAD_PUSH_VALUE(uint32_t, sizeof(vm_offset_t));
 	PRELOAD_PUSH_VALUE(vm_offset_t, lastaddr);
 	dtb_size = fdt_totalsize(rvbp->dtbp_virt);
 	memmove((void *)lastaddr, (const void *)rvbp->dtbp_virt, dtb_size);
 	lastaddr = roundup(lastaddr + dtb_size, sizeof(int));
 
 	PRELOAD_PUSH_VALUE(uint32_t, MODINFO_METADATA | MODINFOMD_KERNEND);
 	PRELOAD_PUSH_VALUE(uint32_t, sizeof(vm_offset_t));
 	PRELOAD_PUSH_VALUE(vm_offset_t, lastaddr);
 
 	PRELOAD_PUSH_VALUE(uint32_t, MODINFO_METADATA | MODINFOMD_HOWTO);
 	PRELOAD_PUSH_VALUE(uint32_t, sizeof(int));
 	PRELOAD_PUSH_VALUE(int, RB_VERBOSE);
 
 	/* End marker */
 	PRELOAD_PUSH_VALUE(uint32_t, 0);
 	PRELOAD_PUSH_VALUE(uint32_t, 0);
 	preload_metadata = (caddr_t)fake_preload;
 
 	/* Check if bootloader clobbered part of the kernel with the DTB. */
 	KASSERT(rvbp->dtbp_phys + dtb_size <= rvbp->kern_phys ||
 		rvbp->dtbp_phys >= rvbp->kern_phys + (lastaddr - KERNBASE),
 	    ("FDT (%lx-%lx) and kernel (%lx-%lx) overlap", rvbp->dtbp_phys,
 		rvbp->dtbp_phys + dtb_size, rvbp->kern_phys,
 		rvbp->kern_phys + (lastaddr - KERNBASE)));
 	KASSERT(fake_size < sizeof(fake_preload),
 	    ("Too many fake_preload items"));
 
 	if (boothowto & RB_VERBOSE)
 		printf("FDT phys (%lx-%lx), kernel phys (%lx-%lx)\n",
 		    rvbp->dtbp_phys, rvbp->dtbp_phys + dtb_size,
 		    rvbp->kern_phys, rvbp->kern_phys + (lastaddr - KERNBASE));
 }
 
 #ifdef FDT
 static void
 parse_fdt_bootargs(void)
 {
 	char bootargs[512];
 
 	bootargs[sizeof(bootargs) - 1] = '\0';
 	if (fdt_get_chosen_bootargs(bootargs, sizeof(bootargs) - 1) == 0) {
 		boothowto |= boot_parse_cmdline(bootargs);
 	}
 }
 #endif
 
 static vm_offset_t
 parse_metadata(void)
 {
 	caddr_t kmdp;
 	vm_offset_t lastaddr;
 #ifdef DDB
 	vm_offset_t ksym_start, ksym_end;
 #endif
 	char *kern_envp;
 
 	/* Find the kernel address */
 	kmdp = preload_search_by_type("elf kernel");
 	if (kmdp == NULL)
 		kmdp = preload_search_by_type("elf64 kernel");
 	KASSERT(kmdp != NULL, ("No preload metadata found!"));
 
 	/* Read the boot metadata */
 	boothowto = MD_FETCH(kmdp, MODINFOMD_HOWTO, int);
 	lastaddr = MD_FETCH(kmdp, MODINFOMD_KERNEND, vm_offset_t);
 	kern_envp = MD_FETCH(kmdp, MODINFOMD_ENVP, char *);
 	if (kern_envp != NULL)
 		init_static_kenv(kern_envp, 0);
 	else
 		init_static_kenv(static_kenv, sizeof(static_kenv));
 #ifdef DDB
 	ksym_start = MD_FETCH(kmdp, MODINFOMD_SSYM, uintptr_t);
 	ksym_end = MD_FETCH(kmdp, MODINFOMD_ESYM, uintptr_t);
 	db_fetch_ksymtab(ksym_start, ksym_end);
 #endif
 #ifdef FDT
 	try_load_dtb(kmdp);
 	if (kern_envp == NULL)
 		parse_fdt_bootargs();
 #endif
 	return (lastaddr);
 }
 
 void
 initriscv(struct riscv_bootparams *rvbp)
 {
 	struct mem_region mem_regions[FDT_MEM_REGIONS];
 	struct pcpu *pcpup;
 	int mem_regions_sz;
 	vm_offset_t lastaddr;
 	vm_size_t kernlen;
 #ifdef FDT
 	phandle_t chosen;
 	uint32_t hart;
 #endif
 	char *env;
 
 	TSRAW(&thread0, TS_ENTER, __func__, NULL);
 
 	/* Set the pcpu data, this is needed by pmap_bootstrap */
 	pcpup = &__pcpu[0];
 	pcpu_init(pcpup, 0, sizeof(struct pcpu));
 
 	/* Set the pcpu pointer */
 	__asm __volatile("mv tp, %0" :: "r"(pcpup));
 
 	PCPU_SET(curthread, &thread0);
 
 	/* Initialize SBI interface. */
 	sbi_init();
 
 	/* Parse the boot metadata. */
 	if (rvbp->modulep != 0) {
 		preload_metadata = (caddr_t)rvbp->modulep;
 	} else {
 		fake_preload_metadata(rvbp);
 	}
 	lastaddr = parse_metadata();
 
 #ifdef FDT
 	/*
 	 * Look for the boot hart ID. This was either passed in directly from
 	 * the SBI firmware and handled by locore, or was stored in the device
 	 * tree by an earlier boot stage.
 	 */
 	chosen = OF_finddevice("/chosen");
 	if (OF_getencprop(chosen, "boot-hartid", &hart, sizeof(hart)) != -1) {
 		boot_hart = hart;
 	}
 #endif
 	if (boot_hart == BOOT_HART_INVALID) {
 		panic("Boot hart ID was not properly set");
 	}
 	pcpup->pc_hart = boot_hart;
 
 #ifdef FDT
 	/*
 	 * Exclude reserved memory specified by the device tree. Typically,
 	 * this contains an entry for memory used by the runtime SBI firmware.
 	 */
 	if (fdt_get_reserved_mem(mem_regions, &mem_regions_sz) == 0) {
 		physmem_exclude_regions(mem_regions, mem_regions_sz,
 		    EXFLAG_NODUMP | EXFLAG_NOALLOC);
 	}
 
 	/* Grab physical memory regions information from device tree. */
 	if (fdt_get_mem_regions(mem_regions, &mem_regions_sz, NULL) != 0) {
 		panic("Cannot get physical memory regions");
 	}
 	physmem_hardware_regions(mem_regions, mem_regions_sz);
 #endif
 
 	/* Do basic tuning, hz etc */
 	init_param1();
 
 	cache_setup();
 
 	/* Bootstrap enough of pmap to enter the kernel proper */
 	kernlen = (lastaddr - KERNBASE);
 	pmap_bootstrap(rvbp->kern_l1pt, rvbp->kern_phys, kernlen);
 
 #ifdef FDT
 	/*
 	 * XXX: Exclude the lowest 2MB of physical memory, if it hasn't been
 	 * already, as this area is assumed to contain the SBI firmware. This
 	 * is a little fragile, but it is consistent with the platforms we
 	 * support so far.
 	 *
 	 * TODO: remove this when the all regular booting methods properly
 	 * report their reserved memory in the device tree.
 	 */
 	if (mem_regions[0].mr_start == physmap[0]) {
 		physmem_exclude_region(mem_regions[0].mr_start, L2_SIZE,
 		    EXFLAG_NODUMP | EXFLAG_NOALLOC);
 	}
 #endif
 	physmem_init_kernel_globals();
 
 	/* Establish static device mappings */
 	devmap_bootstrap(0, NULL);
 
 	cninit();
 
 	/*
 	 * Dump the boot metadata. We have to wait for cninit() since console
 	 * output is required. If it's grossly incorrect the kernel will never
 	 * make it this far.
 	 */
 	if (getenv_is_true("debug.dump_modinfo_at_boot"))
 		preload_dump();
 
 	init_proc0(rvbp->kern_stack);
 
 	msgbufinit(msgbufp, msgbufsize);
 	mutex_init();
 	init_param2(physmem);
 	kdb_init();
 
 	env = kern_getenv("kernelname");
 	if (env != NULL)
 		strlcpy(kernelname, env, sizeof(kernelname));
 
 	if (boothowto & RB_VERBOSE)
 		physmem_print_tables();
 
 	early_boot = 0;
 
 	TSEXIT();
 }
diff --git a/sys/arm64/include/reg.h b/sys/sys/reg.h
similarity index 70%
copy from sys/arm64/include/reg.h
copy to sys/sys/reg.h
index 9cfc5ea1d437..910ea802029e 100644
--- a/sys/arm64/include/reg.h
+++ b/sys/sys/reg.h
@@ -1,106 +1,68 @@
 /*-
- * Copyright (c) 2014 Andrew Turner
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (C) 2014,2019 Andrew Turner
  * Copyright (c) 2014-2015 The FreeBSD Foundation
- * All rights reserved.
  *
  * This software was developed by Andrew Turner under
  * sponsorship from the FreeBSD Foundation.
  *
+ * This software was developed by SRI International and the University of
+ * Cambridge Computer Laboratory under DARPA/AFRL contract FA8750-10-C-0237
+ * ("CTSRD"), as part of the DARPA CRASH research programme.
+ *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
-#ifndef	_MACHINE_REG_H_
-#define	_MACHINE_REG_H_
-
-struct reg {
-	uint64_t x[30];
-	uint64_t lr;
-	uint64_t sp;
-	uint64_t elr;
-	uint32_t spsr;
-};
-
-struct reg32 {
-	unsigned int r[13];
-	unsigned int r_sp;
-	unsigned int r_lr;
-	unsigned int r_pc;
-	unsigned int r_cpsr;
-};
-
-struct fpreg {
-	__uint128_t	fp_q[32];
-	uint32_t	fp_sr;
-	uint32_t	fp_cr;
-};
-
-struct fpreg32 {
-	int dummy;
-};
-
-struct dbreg {
-	uint8_t		db_debug_ver;
-	uint8_t		db_nbkpts;
-	uint8_t		db_nwtpts;
-	uint8_t		db_pad[5];
-
-	struct {
-		uint64_t dbr_addr;
-		uint32_t dbr_ctrl;
-		uint32_t dbr_pad;
-	} db_breakregs[16];
-	struct {
-		uint64_t dbw_addr;
-		uint32_t dbw_ctrl;
-		uint32_t dbw_pad;
-	} db_watchregs[16];
-};
+#ifndef	_SYS_REG_H_
+#define	_SYS_REG_H_
 
-struct dbreg32 {
-	int dummy;
-};
-
-#define	__HAVE_REG32
+#include <machine/reg.h>
 
 #ifdef _KERNEL
-/*
- * XXX these interfaces are MI, so they should be declared in a MI place.
- */
 int	fill_regs(struct thread *, struct reg *);
 int	set_regs(struct thread *, struct reg *);
 int	fill_fpregs(struct thread *, struct fpreg *);
 int	set_fpregs(struct thread *, struct fpreg *);
 int	fill_dbregs(struct thread *, struct dbreg *);
 int	set_dbregs(struct thread *, struct dbreg *);
 #ifdef COMPAT_FREEBSD32
 int	fill_regs32(struct thread *, struct reg32 *);
 int	set_regs32(struct thread *, struct reg32 *);
+#ifndef fill_fpregs32
 int	fill_fpregs32(struct thread *, struct fpreg32 *);
+#endif
+#ifndef set_fpregs32
 int	set_fpregs32(struct thread *, struct fpreg32 *);
+#endif
+#ifndef fill_dbregs32
 int	fill_dbregs32(struct thread *, struct dbreg32 *);
+#endif
+#ifndef set_dbregs32
 int	set_dbregs32(struct thread *, struct dbreg32 *);
 #endif
 #endif
+#endif
 
-#endif /* !_MACHINE_REG_H_ */
+#endif
diff --git a/sys/x86/include/reg.h b/sys/x86/include/reg.h
index 6bfe7aaedd06..7be3e24850c3 100644
--- a/sys/x86/include/reg.h
+++ b/sys/x86/include/reg.h
@@ -1,269 +1,253 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 2003 Peter Wemm.
  * Copyright (c) 1990 The Regents of the University of California.
  * All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * William Jolitz.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from: @(#)reg.h	5.5 (Berkeley) 1/18/91
  * $FreeBSD$
  */
 
 #ifndef _MACHINE_REG_H_
 #define	_MACHINE_REG_H_
 
 #include <sys/_types.h>
 
 #ifdef __i386__
 /*
  * Indices for registers in `struct trapframe' and `struct regs'.
  *
  * This interface is deprecated.  In the kernel, it is only used in FPU
  * emulators to convert from register numbers encoded in instructions to
  * register values.  Everything else just accesses the relevant struct
  * members.  In userland, debuggers tend to abuse this interface since
  * they don't understand that `struct regs' is a struct.  I hope they have
  * stopped accessing the registers in the trap frame via PT_{READ,WRITE}_U
  * and we can stop supporting the user area soon.
  */
 #define	tFS	(0)
 #define	tES	(1)
 #define	tDS	(2)
 #define	tEDI	(3)
 #define	tESI	(4)
 #define	tEBP	(5)
 #define	tISP	(6)
 #define	tEBX	(7)
 #define	tEDX	(8)
 #define	tECX	(9)
 #define	tEAX	(10)
 #define	tERR	(12)
 #define	tEIP	(13)
 #define	tCS	(14)
 #define	tEFLAGS	(15)
 #define	tESP	(16)
 #define	tSS	(17)
 
 /*
  * Indices for registers in `struct regs' only.
  *
  * Some registers live in the pcb and are only in an "array" with the
  * other registers in application interfaces that copy all the registers
  * to or from a `struct regs'.
  */
 #define	tGS	(18)
 #endif /* __i386__ */
 
 /* Rename the structs below depending on the machine architecture. */
 #ifdef	__i386__
 #define	__reg32		reg
 #define	__fpreg32	fpreg
 #define	__dbreg32	dbreg
 #else
 #define	__reg32		reg32
 #define	__reg64		reg
 #define	__fpreg32	fpreg32
 #define	__fpreg64	fpreg
 #define	__dbreg32	dbreg32
 #define	__dbreg64	dbreg
 #define	__HAVE_REG32
 #endif
 
 /*
  * Register set accessible via /proc/$pid/regs and PT_{SET,GET}REGS.
  */
 struct __reg32 {
 	__uint32_t	r_fs;
 	__uint32_t	r_es;
 	__uint32_t	r_ds;
 	__uint32_t	r_edi;
 	__uint32_t	r_esi;
 	__uint32_t	r_ebp;
 	__uint32_t	r_isp;
 	__uint32_t	r_ebx;
 	__uint32_t	r_edx;
 	__uint32_t	r_ecx;
 	__uint32_t	r_eax;
 	__uint32_t	r_trapno;
 	__uint32_t	r_err;
 	__uint32_t	r_eip;
 	__uint32_t	r_cs;
 	__uint32_t	r_eflags;
 	__uint32_t	r_esp;
 	__uint32_t	r_ss;
 	__uint32_t	r_gs;
 };
 
 struct __reg64 {
 	__int64_t	r_r15;
 	__int64_t	r_r14;
 	__int64_t	r_r13;
 	__int64_t	r_r12;
 	__int64_t	r_r11;
 	__int64_t	r_r10;
 	__int64_t	r_r9;
 	__int64_t	r_r8;
 	__int64_t	r_rdi;
 	__int64_t	r_rsi;
 	__int64_t	r_rbp;
 	__int64_t	r_rbx;
 	__int64_t	r_rdx;
 	__int64_t	r_rcx;
 	__int64_t	r_rax;
 	__uint32_t	r_trapno;
 	__uint16_t	r_fs;
 	__uint16_t	r_gs;
 	__uint32_t	r_err;
 	__uint16_t	r_es;
 	__uint16_t	r_ds;
 	__int64_t	r_rip;
 	__int64_t	r_cs;
 	__int64_t	r_rflags;
 	__int64_t	r_rsp;
 	__int64_t	r_ss;
 };
 
 /*
  * Register set accessible via /proc/$pid/fpregs.
  *
  * XXX should get struct from fpu.h.  Here we give a slightly
  * simplified struct.  This may be too much detail.  Perhaps
  * an array of unsigned longs is best.
  */
 struct __fpreg32 {
 	__uint32_t	fpr_env[7];
 	__uint8_t	fpr_acc[8][10];
 	__uint32_t	fpr_ex_sw;
 	__uint8_t	fpr_pad[64];
 };
 
 struct __fpreg64 {
 	__uint64_t	fpr_env[4];
 	__uint8_t	fpr_acc[8][16];
 	__uint8_t	fpr_xacc[16][16];
 	__uint64_t	fpr_spare[12];
 };
 
 /*
  * Register set accessible via PT_GETXMMREGS (i386).
  */
 struct xmmreg {
 	/*
 	 * XXX should get struct from npx.h.  Here we give a slightly
 	 * simplified struct.  This may be too much detail.  Perhaps
 	 * an array of unsigned longs is best.
 	 */
 	__uint32_t	xmm_env[8];
 	__uint8_t	xmm_acc[8][16];
 	__uint8_t	xmm_reg[8][16];
 	__uint8_t	xmm_pad[224];
 };
 
 /*
  * Register set accessible via /proc/$pid/dbregs.
  */
 struct __dbreg32 {
 	__uint32_t	dr[8];	/* debug registers */
 				/* Index 0-3: debug address registers */
 				/* Index 4-5: reserved */
 				/* Index 6: debug status */
 				/* Index 7: debug control */
 };
 
 struct __dbreg64 {
 	__uint64_t	dr[16];	/* debug registers */
 				/* Index 0-3: debug address registers */
 				/* Index 4-5: reserved */
 				/* Index 6: debug status */
 				/* Index 7: debug control */
 				/* Index 8-15: reserved */
 };
 
 #define	DBREG_DR6_RESERVED1	0xffff0ff0
 #define	DBREG_DR6_BMASK		0x000f
 #define	DBREG_DR6_B(i)		(1 << (i))
 #define	DBREG_DR6_BD		0x2000
 #define	DBREG_DR6_BS		0x4000
 #define	DBREG_DR6_BT		0x8000
 
 #define	DBREG_DR7_RESERVED1	0x0400
 #define	DBREG_DR7_LOCAL_ENABLE	0x01
 #define	DBREG_DR7_GLOBAL_ENABLE	0x02
 #define	DBREG_DR7_LEN_1		0x00	/* 1 byte length          */
 #define	DBREG_DR7_LEN_2		0x01
 #define	DBREG_DR7_LEN_4		0x03
 #define	DBREG_DR7_LEN_8		0x02
 #define	DBREG_DR7_EXEC		0x00	/* break on execute       */
 #define	DBREG_DR7_WRONLY	0x01	/* break on write         */
 #define	DBREG_DR7_RDWR		0x03	/* break on read or write */
 #define	DBREG_DR7_MASK(i)	\
 	((__u_register_t)(0xf) << ((i) * 4 + 16) | 0x3 << (i) * 2)
 #define	DBREG_DR7_SET(i, len, access, enable)				\
 	((__u_register_t)((len) << 2 | (access)) << ((i) * 4 + 16) | 	\
 	(enable) << (i) * 2)
 #define	DBREG_DR7_GD		0x2000
 #define	DBREG_DR7_ENABLED(d, i)	(((d) & 0x3 << (i) * 2) != 0)
 #define	DBREG_DR7_ACCESS(d, i)	((d) >> ((i) * 4 + 16) & 0x3)
 #define	DBREG_DR7_LEN(d, i)	((d) >> ((i) * 4 + 18) & 0x3)
 
 #define	DBREG_DRX(d,x)	((d)->dr[(x)])	/* reference dr0 - dr7 by
 					   register number */
 
 #undef __reg32
 #undef __reg64
 #undef __fpreg32
 #undef __fpreg64
 #undef __dbreg32
 #undef __dbreg64
 
 #ifdef _KERNEL
-struct thread;
-
 /*
  * XXX these interfaces are MI, so they should be declared in a MI place.
  */
-int	fill_regs(struct thread *, struct reg *);
 int	fill_frame_regs(struct trapframe *, struct reg *);
-int	set_regs(struct thread *, struct reg *);
-int	fill_fpregs(struct thread *, struct fpreg *);
-int	set_fpregs(struct thread *, struct fpreg *);
-int	fill_dbregs(struct thread *, struct dbreg *);
-int	set_dbregs(struct thread *, struct dbreg *);
-#ifdef COMPAT_FREEBSD32
-int	fill_regs32(struct thread *, struct reg32 *);
-int	set_regs32(struct thread *, struct reg32 *);
-int	fill_fpregs32(struct thread *, struct fpreg32 *);
-int	set_fpregs32(struct thread *, struct fpreg32 *);
-int	fill_dbregs32(struct thread *, struct dbreg32 *);
-int	set_dbregs32(struct thread *, struct dbreg32 *);
-#endif
 #endif
 
 #endif /* !_MACHINE_REG_H_ */
diff --git a/sys/x86/x86/dbreg.c b/sys/x86/x86/dbreg.c
index ef30cc2d614e..3361807c6226 100644
--- a/sys/x86/x86/dbreg.c
+++ b/sys/x86/x86/dbreg.c
@@ -1,302 +1,303 @@
 /*-
  * Mach Operating System
  * Copyright (c) 1991,1990 Carnegie Mellon University
  * All Rights Reserved.
  *
  * Permission to use, copy, modify and distribute this software and its
  * documentation is hereby granted, provided that both the copyright
  * notice and this permission notice appear in all copies of the
  * software, derivative works or modified versions, and any portions
  * thereof, and that both notices appear in supporting documentation.
  *
  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS
  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  *
  * Carnegie Mellon requests users of this software to return to
  *
  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  *  School of Computer Science
  *  Carnegie Mellon University
  *  Pittsburgh PA 15213-3890
  *
  * any improvements or extensions that they make and grant Carnegie the
  * rights to redistribute these changes.
  */
 
 #include "opt_ddb.h"
 
 #include <sys/types.h>
 #include <sys/kdb.h>
 #include <sys/pcpu.h>
+#include <sys/reg.h>
 #include <sys/smp.h>
 #include <sys/systm.h>
 
 #include <machine/frame.h>
 #include <machine/kdb.h>
 #include <machine/md_var.h>
 
 #include <ddb/ddb.h>
 #include <ddb/db_sym.h>
 
 #define NDBREGS		4
 #ifdef __amd64__
 #define	MAXWATCHSIZE	8
 #else
 #define	MAXWATCHSIZE	4
 #endif
 
 /*
  * Set a watchpoint in the debug register denoted by 'watchnum'.
  */
 static void
 dbreg_set_watchreg(int watchnum, vm_offset_t watchaddr, vm_size_t size,
     int access, struct dbreg *d)
 {
 	int len;
 
 	MPASS(watchnum >= 0 && watchnum < NDBREGS);
 
 	/* size must be 1 for an execution breakpoint */
 	if (access == DBREG_DR7_EXEC)
 		size = 1;
 
 	/*
 	 * we can watch a 1, 2, or 4 byte sized location
 	 */
 	switch (size) {
 	case 1:
 		len = DBREG_DR7_LEN_1;
 		break;
 	case 2:
 		len = DBREG_DR7_LEN_2;
 		break;
 	case 4:
 		len = DBREG_DR7_LEN_4;
 		break;
 #if MAXWATCHSIZE >= 8
 	case 8:
 		len = DBREG_DR7_LEN_8;
 		break;
 #endif
 	default:
 		return;
 	}
 
 	/* clear the bits we are about to affect */
 	d->dr[7] &= ~DBREG_DR7_MASK(watchnum);
 
 	/* set drN register to the address, N=watchnum */
 	DBREG_DRX(d, watchnum) = watchaddr;
 
 	/* enable the watchpoint */
 	d->dr[7] |= DBREG_DR7_SET(watchnum, len, access,
 	    DBREG_DR7_GLOBAL_ENABLE);
 }
 
 /*
  * Remove a watchpoint from the debug register denoted by 'watchnum'.
  */
 static void
 dbreg_clr_watchreg(int watchnum, struct dbreg *d)
 {
 	MPASS(watchnum >= 0 && watchnum < NDBREGS);
 
 	d->dr[7] &= ~DBREG_DR7_MASK(watchnum);
 	DBREG_DRX(d, watchnum) = 0;
 }
 
 /*
  * Sync the debug registers. Other cores will read these values from the PCPU
  * area when they resume. See amd64_db_resume_dbreg() below.
  */
 static void
 dbreg_sync(struct dbreg *dp)
 {
 #ifdef __amd64__
 	struct pcpu *pc;
 	int cpu, c;
 
 	cpu = PCPU_GET(cpuid);
 	CPU_FOREACH(c) {
 		if (c == cpu)
 			continue;
 		pc = pcpu_find(c);
 		memcpy(pc->pc_dbreg, dp, sizeof(*dp));
 		pc->pc_dbreg_cmd = PC_DBREG_CMD_LOAD;
 	}
 #endif
 }
 
 int
 dbreg_set_watchpoint(vm_offset_t addr, vm_size_t size, int access)
 {
 	struct dbreg *d;
 	int avail, i, wsize;
 
 #ifdef __amd64__
 	d = (struct dbreg *)PCPU_PTR(dbreg);
 #else
 	/* debug registers aren't stored in PCPU on i386. */
 	struct dbreg d_temp;
 	d = &d_temp;
 #endif
 
 	/* Validate the access type */
 	if (access != DBREG_DR7_EXEC && access != DBREG_DR7_WRONLY &&
 	    access != DBREG_DR7_RDWR)
 		return (EINVAL);
 
 	fill_dbregs(NULL, d);
 
 	/*
 	 * Check if there are enough available registers to cover the desired
 	 * area.
 	 */
 	avail = 0;
 	for (i = 0; i < NDBREGS; i++) {
 		if (!DBREG_DR7_ENABLED(d->dr[7], i))
 			avail++;
 	}
 
 	if (avail * MAXWATCHSIZE < size)
 		return (EBUSY);
 
 	for (i = 0; i < NDBREGS && size > 0; i++) {
 		if (!DBREG_DR7_ENABLED(d->dr[7], i)) {
 			if ((size >= 8 || (avail == 1 && size > 4)) &&
 			    MAXWATCHSIZE == 8)
 				wsize = 8;
 			else if (size > 2)
 				wsize = 4;
 			else
 				wsize = size;
 			dbreg_set_watchreg(i, addr, wsize, access, d);
 			addr += wsize;
 			size -= wsize;
 			avail--;
 		}
 	}
 
 	set_dbregs(NULL, d);
 	dbreg_sync(d);
 
 	return (0);
 }
 
 int
 dbreg_clr_watchpoint(vm_offset_t addr, vm_size_t size)
 {
 	struct dbreg *d;
 	int i;
 
 #ifdef __amd64__
 	d = (struct dbreg *)PCPU_PTR(dbreg);
 #else
 	/* debug registers aren't stored in PCPU on i386. */
 	struct dbreg d_temp;
 	d = &d_temp;
 #endif
 	fill_dbregs(NULL, d);
 
 	for (i = 0; i < NDBREGS; i++) {
 		if (DBREG_DR7_ENABLED(d->dr[7], i)) {
 			if (DBREG_DRX((d), i) >= addr &&
 			    DBREG_DRX((d), i) < addr + size)
 				dbreg_clr_watchreg(i, d);
 		}
 	}
 
 	set_dbregs(NULL, d);
 	dbreg_sync(d);
 
 	return (0);
 }
 
 #ifdef DDB
 static const char *
 watchtype_str(int type)
 {
 
 	switch (type) {
 	case DBREG_DR7_EXEC:
 		return ("execute");
 	case DBREG_DR7_RDWR:
 		return ("read/write");
 	case DBREG_DR7_WRONLY:
 		return ("write");
 	default:
 		return ("invalid");
 	}
 }
 
 void
 dbreg_list_watchpoints(void)
 {
 	struct dbreg d;
 	int i, len, type;
 
 	fill_dbregs(NULL, &d);
 
 	db_printf("\nhardware watchpoints:\n");
 	db_printf("  watch    status        type  len     address\n");
 	db_printf("  -----  --------  ----------  ---  ----------\n");
 	for (i = 0; i < NDBREGS; i++) {
 		if (DBREG_DR7_ENABLED(d.dr[7], i)) {
 			type = DBREG_DR7_ACCESS(d.dr[7], i);
 			len = DBREG_DR7_LEN(d.dr[7], i);
 			db_printf("  %-5d  %-8s  %10s  %3d  ",
 			    i, "enabled", watchtype_str(type), len + 1);
 			db_printsym((db_addr_t)DBREG_DRX(&d, i), DB_STGY_ANY);
 			db_printf("\n");
 		} else {
 			db_printf("  %-5d  disabled\n", i);
 		}
 	}
 }
 #endif
 
 #ifdef __amd64__
 /* Sync debug registers when resuming from debugger. */
 void
 amd64_db_resume_dbreg(void)
 {
 	struct dbreg *d;
 
 	switch (PCPU_GET(dbreg_cmd)) {
 	case PC_DBREG_CMD_LOAD:
 		d = (struct dbreg *)PCPU_PTR(dbreg);
 		set_dbregs(NULL, d);
 		PCPU_SET(dbreg_cmd, PC_DBREG_CMD_NONE);
 		break;
 	}
 }
 #endif
 
 int
 kdb_cpu_set_watchpoint(vm_offset_t addr, vm_size_t size, int access)
 {
 
 	/* Convert the KDB access type */
 	switch (access) {
 	case KDB_DBG_ACCESS_W:
 		access = DBREG_DR7_WRONLY;
 		break;
 	case KDB_DBG_ACCESS_RW:
 		access = DBREG_DR7_RDWR;
 		break;
 	case KDB_DBG_ACCESS_R:
 		/* FALLTHROUGH: read-only not supported */
 	default:
 		return (EINVAL);
 	}
 
 	return (dbreg_set_watchpoint(addr, size, access));
 }
 
 int
 kdb_cpu_clr_watchpoint(vm_offset_t addr, vm_size_t size)
 {
 
 	return (dbreg_clr_watchpoint(addr, size));
 }