Index: stable/12/sys/amd64/amd64/trap.c
===================================================================
--- stable/12/sys/amd64/amd64/trap.c	(revision 349015)
+++ stable/12/sys/amd64/amd64/trap.c	(revision 349016)
@@ -1,1210 +1,1231 @@
 /*-
  * SPDX-License-Identifier: BSD-4-Clause
  *
  * Copyright (C) 1994, David Greenman
  * Copyright (c) 1990, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * the University of Utah, and William Jolitz.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from: @(#)trap.c	7.4 (Berkeley) 5/13/91
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /*
  * AMD64 Trap and System call handling
  */
 
 #include "opt_clock.h"
 #include "opt_compat.h"
 #include "opt_cpu.h"
 #include "opt_hwpmc_hooks.h"
 #include "opt_isa.h"
 #include "opt_kdb.h"
 #include "opt_stack.h"
 
 #include <sys/param.h>
 #include <sys/bus.h>
 #include <sys/systm.h>
 #include <sys/proc.h>
 #include <sys/pioctl.h>
 #include <sys/ptrace.h>
 #include <sys/kdb.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/resourcevar.h>
 #include <sys/signalvar.h>
 #include <sys/syscall.h>
 #include <sys/sysctl.h>
 #include <sys/sysent.h>
 #include <sys/uio.h>
 #include <sys/vmmeter.h>
 #ifdef HWPMC_HOOKS
 #include <sys/pmckern.h>
 PMC_SOFT_DEFINE( , , page_fault, all);
 PMC_SOFT_DEFINE( , , page_fault, read);
 PMC_SOFT_DEFINE( , , page_fault, write);
 #endif
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/pmap.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_map.h>
 #include <vm/vm_page.h>
 #include <vm/vm_extern.h>
 
 #include <machine/cpu.h>
 #include <machine/intr_machdep.h>
 #include <x86/mca.h>
 #include <machine/md_var.h>
 #include <machine/pcb.h>
 #ifdef SMP
 #include <machine/smp.h>
 #endif
 #include <machine/stack.h>
 #include <machine/trap.h>
 #include <machine/tss.h>
 
 #ifdef KDTRACE_HOOKS
 #include <sys/dtrace_bsd.h>
 #endif
 
 extern inthand_t IDTVEC(bpt), IDTVEC(bpt_pti), IDTVEC(dbg),
     IDTVEC(fast_syscall), IDTVEC(fast_syscall_pti), IDTVEC(fast_syscall32),
     IDTVEC(int0x80_syscall_pti), IDTVEC(int0x80_syscall);
 
 void __noinline trap(struct trapframe *frame);
 void trap_check(struct trapframe *frame);
 void dblfault_handler(struct trapframe *frame);
 
 static int trap_pfault(struct trapframe *, int);
 static void trap_fatal(struct trapframe *, vm_offset_t);
+#ifdef KDTRACE_HOOKS
+static bool trap_user_dtrace(struct trapframe *,
+    int (**hook)(struct trapframe *));
+#endif
 
 #define MAX_TRAP_MSG		32
 static char *trap_msg[] = {
 	"",					/*  0 unused */
 	"privileged instruction fault",		/*  1 T_PRIVINFLT */
 	"",					/*  2 unused */
 	"breakpoint instruction fault",		/*  3 T_BPTFLT */
 	"",					/*  4 unused */
 	"",					/*  5 unused */
 	"arithmetic trap",			/*  6 T_ARITHTRAP */
 	"",					/*  7 unused */
 	"",					/*  8 unused */
 	"general protection fault",		/*  9 T_PROTFLT */
 	"debug exception",			/* 10 T_TRCTRAP */
 	"",					/* 11 unused */
 	"page fault",				/* 12 T_PAGEFLT */
 	"",					/* 13 unused */
 	"alignment fault",			/* 14 T_ALIGNFLT */
 	"",					/* 15 unused */
 	"",					/* 16 unused */
 	"",					/* 17 unused */
 	"integer divide fault",			/* 18 T_DIVIDE */
 	"non-maskable interrupt trap",		/* 19 T_NMI */
 	"overflow trap",			/* 20 T_OFLOW */
 	"FPU bounds check fault",		/* 21 T_BOUND */
 	"FPU device not available",		/* 22 T_DNA */
 	"double fault",				/* 23 T_DOUBLEFLT */
 	"FPU operand fetch fault",		/* 24 T_FPOPFLT */
 	"invalid TSS fault",			/* 25 T_TSSFLT */
 	"segment not present fault",		/* 26 T_SEGNPFLT */
 	"stack fault",				/* 27 T_STKFLT */
 	"machine check trap",			/* 28 T_MCHK */
 	"SIMD floating-point exception",	/* 29 T_XMMFLT */
 	"reserved (unknown) fault",		/* 30 T_RESERVED */
 	"",					/* 31 unused (reserved) */
 	"DTrace pid return trap",		/* 32 T_DTRACE_RET */
 };
 
 static int prot_fault_translation;
 SYSCTL_INT(_machdep, OID_AUTO, prot_fault_translation, CTLFLAG_RWTUN,
     &prot_fault_translation, 0,
     "Select signal to deliver on protection fault");
 static int uprintf_signal;
 SYSCTL_INT(_machdep, OID_AUTO, uprintf_signal, CTLFLAG_RWTUN,
     &uprintf_signal, 0,
     "Print debugging information on trap signal to ctty");
 
 /*
  * Control L1D flush on return from NMI.
  *
  * Tunable  can be set to the following values:
  * 0 - only enable flush on return from NMI if required by vmm.ko (default)
  * >1 - always flush on return from NMI.
  *
  * Post-boot, the sysctl indicates if flushing is currently enabled.
  */
 int nmi_flush_l1d_sw;
 SYSCTL_INT(_machdep, OID_AUTO, nmi_flush_l1d_sw, CTLFLAG_RWTUN,
     &nmi_flush_l1d_sw, 0,
     "Flush L1 Data Cache on NMI exit, software bhyve L1TF mitigation assist");
 
 /*
  * Exception, fault, and trap interface to the FreeBSD kernel.
  * This common code is called from assembly language IDT gate entry
  * routines that prepare a suitable stack frame, and restore this
  * frame after the exception has been processed.
  */
 
 void
 trap(struct trapframe *frame)
 {
 	ksiginfo_t ksi;
 	struct thread *td;
 	struct proc *p;
 	register_t addr, dr6;
 	int signo, ucode;
 	u_int type;
 
 	td = curthread;
 	p = td->td_proc;
 	signo = 0;
 	ucode = 0;
 	addr = 0;
 	dr6 = 0;
 
 	VM_CNT_INC(v_trap);
 	type = frame->tf_trapno;
 
 #ifdef SMP
 	/* Handler for NMI IPIs used for stopping CPUs. */
 	if (type == T_NMI && ipi_nmi_handler() == 0)
 		return;
 #endif
 
 #ifdef KDB
 	if (kdb_active) {
 		kdb_reenter();
 		return;
 	}
 #endif
 
 	if (type == T_RESERVED) {
 		trap_fatal(frame, 0);
 		return;
 	}
 
 	if (type == T_NMI) {
 #ifdef HWPMC_HOOKS
 		/*
 		 * CPU PMCs interrupt using an NMI.  If the PMC module is
 		 * active, pass the 'rip' value to the PMC module's interrupt
 		 * handler.  A non-zero return value from the handler means that
 		 * the NMI was consumed by it and we can return immediately.
 		 */
 		if (pmc_intr != NULL &&
 		    (*pmc_intr)(frame) != 0)
 			return;
 #endif
 
 #ifdef STACK
 		if (stack_nmi_handler(frame) != 0)
 			return;
 #endif
 	}
 
 	if ((frame->tf_rflags & PSL_I) == 0) {
 		/*
 		 * Buggy application or kernel code has disabled
 		 * interrupts and then trapped.  Enabling interrupts
 		 * now is wrong, but it is better than running with
 		 * interrupts disabled until they are accidentally
 		 * enabled later.
 		 */
 		if (TRAPF_USERMODE(frame))
 			uprintf(
 			    "pid %ld (%s): trap %d with interrupts disabled\n",
 			    (long)curproc->p_pid, curthread->td_name, type);
 		else if (type != T_NMI && type != T_BPTFLT &&
 		    type != T_TRCTRAP) {
 			/*
 			 * XXX not quite right, since this may be for a
 			 * multiple fault in user mode.
 			 */
 			printf("kernel trap %d with interrupts disabled\n",
 			    type);
 
 			/*
 			 * We shouldn't enable interrupts while holding a
 			 * spin lock.
 			 */
 			if (td->td_md.md_spinlock_count == 0)
 				enable_intr();
 		}
 	}
 
 	if (TRAPF_USERMODE(frame)) {
 		/* user trap */
 
 		td->td_pticks = 0;
 		td->td_frame = frame;
 		addr = frame->tf_rip;
 		if (td->td_cowgen != p->p_cowgen)
 			thread_cow_update(td);
 
 		switch (type) {
 		case T_PRIVINFLT:	/* privileged instruction fault */
 			signo = SIGILL;
 			ucode = ILL_PRVOPC;
 			break;
 
 		case T_BPTFLT:		/* bpt instruction fault */
-			enable_intr();
 #ifdef KDTRACE_HOOKS
-			if (dtrace_pid_probe_ptr != NULL &&
-			    dtrace_pid_probe_ptr(frame) == 0)
+			if (trap_user_dtrace(frame, &dtrace_pid_probe_ptr))
 				return;
+#else
+			enable_intr();
 #endif
 			signo = SIGTRAP;
 			ucode = TRAP_BRKPT;
 			break;
 
 		case T_TRCTRAP:		/* debug exception */
 			enable_intr();
 			signo = SIGTRAP;
 			ucode = TRAP_TRACE;
 			dr6 = rdr6();
 			if ((dr6 & DBREG_DR6_BS) != 0) {
 				PROC_LOCK(td->td_proc);
 				if ((td->td_dbgflags & TDB_STEP) != 0) {
 					td->td_frame->tf_rflags &= ~PSL_T;
 					td->td_dbgflags &= ~TDB_STEP;
 				}
 				PROC_UNLOCK(td->td_proc);
 			}
 			break;
 
 		case T_ARITHTRAP:	/* arithmetic trap */
 			ucode = fputrap_x87();
 			if (ucode == -1)
 				return;
 			signo = SIGFPE;
 			break;
 
 		case T_PROTFLT:		/* general protection fault */
 			signo = SIGBUS;
 			ucode = BUS_OBJERR;
 			break;
 		case T_STKFLT:		/* stack fault */
 		case T_SEGNPFLT:	/* segment not present fault */
 			signo = SIGBUS;
 			ucode = BUS_ADRERR;
 			break;
 		case T_TSSFLT:		/* invalid TSS fault */
 			signo = SIGBUS;
 			ucode = BUS_OBJERR;
 			break;
 		case T_ALIGNFLT:
 			signo = SIGBUS;
 			ucode = BUS_ADRALN;
 			break;
 		case T_DOUBLEFLT:	/* double fault */
 		default:
 			signo = SIGBUS;
 			ucode = BUS_OBJERR;
 			break;
 
 		case T_PAGEFLT:		/* page fault */
 			/*
 			 * Emulator can take care about this trap?
 			 */
 			if (*p->p_sysent->sv_trap != NULL &&
 			    (*p->p_sysent->sv_trap)(td) == 0)
 				return;
 
 			addr = frame->tf_addr;
 			signo = trap_pfault(frame, TRUE);
 			if (signo == -1)
 				return;
 			if (signo == 0)
 				goto userret;
 			if (signo == SIGSEGV) {
 				ucode = SEGV_MAPERR;
 			} else if (prot_fault_translation == 0) {
 				/*
 				 * Autodetect.  This check also covers
 				 * the images without the ABI-tag ELF
 				 * note.
 				 */
 				if (SV_CURPROC_ABI() == SV_ABI_FREEBSD &&
 				    p->p_osrel >= P_OSREL_SIGSEGV) {
 					signo = SIGSEGV;
 					ucode = SEGV_ACCERR;
 				} else {
 					signo = SIGBUS;
 					ucode = T_PAGEFLT;
 				}
 			} else if (prot_fault_translation == 1) {
 				/*
 				 * Always compat mode.
 				 */
 				signo = SIGBUS;
 				ucode = T_PAGEFLT;
 			} else {
 				/*
 				 * Always SIGSEGV mode.
 				 */
 				signo = SIGSEGV;
 				ucode = SEGV_ACCERR;
 			}
 			break;
 
 		case T_DIVIDE:		/* integer divide fault */
 			ucode = FPE_INTDIV;
 			signo = SIGFPE;
 			break;
 
 #ifdef DEV_ISA
 		case T_NMI:
 			nmi_handle_intr(type, frame);
 			return;
 #endif
 
 		case T_OFLOW:		/* integer overflow fault */
 			ucode = FPE_INTOVF;
 			signo = SIGFPE;
 			break;
 
 		case T_BOUND:		/* bounds check fault */
 			ucode = FPE_FLTSUB;
 			signo = SIGFPE;
 			break;
 
 		case T_DNA:
 			/* transparent fault (due to context switch "late") */
 			KASSERT(PCB_USER_FPU(td->td_pcb),
 			    ("kernel FPU ctx has leaked"));
 			fpudna();
 			return;
 
 		case T_FPOPFLT:		/* FPU operand fetch fault */
 			ucode = ILL_COPROC;
 			signo = SIGILL;
 			break;
 
 		case T_XMMFLT:		/* SIMD floating-point exception */
 			ucode = fputrap_sse();
 			if (ucode == -1)
 				return;
 			signo = SIGFPE;
 			break;
 #ifdef KDTRACE_HOOKS
 		case T_DTRACE_RET:
-			enable_intr();
-			if (dtrace_return_probe_ptr != NULL)
-				dtrace_return_probe_ptr(frame);
+			(void)trap_user_dtrace(frame, &dtrace_return_probe_ptr);
 			return;
 #endif
 		}
 	} else {
 		/* kernel trap */
 
 		KASSERT(cold || td->td_ucred != NULL,
 		    ("kernel trap doesn't have ucred"));
 		switch (type) {
 		case T_PAGEFLT:			/* page fault */
 			(void) trap_pfault(frame, FALSE);
 			return;
 
 		case T_DNA:
 			if (PCB_USER_FPU(td->td_pcb))
 				panic("Unregistered use of FPU in kernel");
 			fpudna();
 			return;
 
 		case T_ARITHTRAP:	/* arithmetic trap */
 		case T_XMMFLT:		/* SIMD floating-point exception */
 		case T_FPOPFLT:		/* FPU operand fetch fault */
 			/*
 			 * For now, supporting kernel handler
 			 * registration for FPU traps is overkill.
 			 */
 			trap_fatal(frame, 0);
 			return;
 
 		case T_STKFLT:		/* stack fault */
 		case T_PROTFLT:		/* general protection fault */
 		case T_SEGNPFLT:	/* segment not present fault */
 			if (td->td_intr_nesting_level != 0)
 				break;
 
 			/*
 			 * Invalid segment selectors and out of bounds
 			 * %rip's and %rsp's can be set up in user mode.
 			 * This causes a fault in kernel mode when the
 			 * kernel tries to return to user mode.  We want
 			 * to get this fault so that we can fix the
 			 * problem here and not have to check all the
 			 * selectors and pointers when the user changes
 			 * them.
 			 *
 			 * In case of PTI, the IRETQ faulted while the
 			 * kernel used the pti stack, and exception
 			 * frame records %rsp value pointing to that
 			 * stack.  If we return normally to
 			 * doreti_iret_fault, the trapframe is
 			 * reconstructed on pti stack, and calltrap()
 			 * called on it as well.  Due to the very
 			 * limited pti stack size, kernel does not
 			 * survive for too long.  Switch to the normal
 			 * thread stack for the trap handling.
 			 *
 			 * Magic '5' is the number of qwords occupied by
 			 * the hardware trap frame.
 			 */
 			if (frame->tf_rip == (long)doreti_iret) {
 				frame->tf_rip = (long)doreti_iret_fault;
 				if ((PCPU_GET(curpmap)->pm_ucr3 !=
 				    PMAP_NO_CR3) &&
 				    (frame->tf_rsp == (uintptr_t)PCPU_GET(
 				    pti_rsp0) - 5 * sizeof(register_t))) {
 					frame->tf_rsp = PCPU_GET(rsp0) - 5 *
 					    sizeof(register_t);
 				}
 				return;
 			}
 			if (frame->tf_rip == (long)ld_ds) {
 				frame->tf_rip = (long)ds_load_fault;
 				return;
 			}
 			if (frame->tf_rip == (long)ld_es) {
 				frame->tf_rip = (long)es_load_fault;
 				return;
 			}
 			if (frame->tf_rip == (long)ld_fs) {
 				frame->tf_rip = (long)fs_load_fault;
 				return;
 			}
 			if (frame->tf_rip == (long)ld_gs) {
 				frame->tf_rip = (long)gs_load_fault;
 				return;
 			}
 			if (frame->tf_rip == (long)ld_gsbase) {
 				frame->tf_rip = (long)gsbase_load_fault;
 				return;
 			}
 			if (frame->tf_rip == (long)ld_fsbase) {
 				frame->tf_rip = (long)fsbase_load_fault;
 				return;
 			}
 			if (curpcb->pcb_onfault != NULL) {
 				frame->tf_rip = (long)curpcb->pcb_onfault;
 				return;
 			}
 			break;
 
 		case T_TSSFLT:
 			/*
 			 * PSL_NT can be set in user mode and isn't cleared
 			 * automatically when the kernel is entered.  This
 			 * causes a TSS fault when the kernel attempts to
 			 * `iret' because the TSS link is uninitialized.  We
 			 * want to get this fault so that we can fix the
 			 * problem here and not every time the kernel is
 			 * entered.
 			 */
 			if (frame->tf_rflags & PSL_NT) {
 				frame->tf_rflags &= ~PSL_NT;
 				return;
 			}
 			break;
 
 		case T_TRCTRAP:	 /* debug exception */
 			/* Clear any pending debug events. */
 			dr6 = rdr6();
 			load_dr6(0);
 
 			/*
 			 * Ignore debug register exceptions due to
 			 * accesses in the user's address space, which
 			 * can happen under several conditions such as
 			 * if a user sets a watchpoint on a buffer and
 			 * then passes that buffer to a system call.
 			 * We still want to get TRCTRAPS for addresses
 			 * in kernel space because that is useful when
 			 * debugging the kernel.
 			 */
 			if (user_dbreg_trap(dr6))
 				return;
 
 			/*
 			 * Malicious user code can configure a debug
 			 * register watchpoint to trap on data access
 			 * to the top of stack and then execute 'pop
 			 * %ss; int 3'.  Due to exception deferral for
 			 * 'pop %ss', the CPU will not interrupt 'int
 			 * 3' to raise the DB# exception for the debug
 			 * register but will postpone the DB# until
 			 * execution of the first instruction of the
 			 * BP# handler (in kernel mode).  Normally the
 			 * previous check would ignore DB# exceptions
 			 * for watchpoints on user addresses raised in
 			 * kernel mode.  However, some CPU errata
 			 * include cases where DB# exceptions do not
 			 * properly set bits in %dr6, e.g. Haswell
 			 * HSD23 and Skylake-X SKZ24.
 			 *
 			 * A deferred DB# can also be raised on the
 			 * first instructions of system call entry
 			 * points or single-step traps via similar use
 			 * of 'pop %ss' or 'mov xxx, %ss'.
 			 */
 			if (pti) {
 				if (frame->tf_rip ==
 				    (uintptr_t)IDTVEC(fast_syscall_pti) ||
 #ifdef COMPAT_FREEBSD32
 				    frame->tf_rip ==
 				    (uintptr_t)IDTVEC(int0x80_syscall_pti) ||
 #endif
 				    frame->tf_rip == (uintptr_t)IDTVEC(bpt_pti))
 					return;
 			} else {
 				if (frame->tf_rip ==
 				    (uintptr_t)IDTVEC(fast_syscall) ||
 #ifdef COMPAT_FREEBSD32
 				    frame->tf_rip ==
 				    (uintptr_t)IDTVEC(int0x80_syscall) ||
 #endif
 				    frame->tf_rip == (uintptr_t)IDTVEC(bpt))
 					return;
 			}
 			if (frame->tf_rip == (uintptr_t)IDTVEC(dbg) ||
 			    /* Needed for AMD. */
 			    frame->tf_rip == (uintptr_t)IDTVEC(fast_syscall32))
 				return;
 			/*
 			 * FALLTHROUGH (TRCTRAP kernel mode, kernel address)
 			 */
 		case T_BPTFLT:
 			/*
 			 * If KDB is enabled, let it handle the debugger trap.
 			 * Otherwise, debugger traps "can't happen".
 			 */
 #ifdef KDB
 			if (kdb_trap(type, dr6, frame))
 				return;
 #endif
 			break;
 
 #ifdef DEV_ISA
 		case T_NMI:
 			nmi_handle_intr(type, frame);
 			return;
 #endif
 		}
 
 		trap_fatal(frame, 0);
 		return;
 	}
 
 	/* Translate fault for emulators (e.g. Linux) */
 	if (*p->p_sysent->sv_transtrap != NULL)
 		signo = (*p->p_sysent->sv_transtrap)(signo, type);
 
 	ksiginfo_init_trap(&ksi);
 	ksi.ksi_signo = signo;
 	ksi.ksi_code = ucode;
 	ksi.ksi_trapno = type;
 	ksi.ksi_addr = (void *)addr;
 	if (uprintf_signal) {
 		uprintf("pid %d comm %s: signal %d err %lx code %d type %d "
 		    "addr 0x%lx rsp 0x%lx rip 0x%lx "
 		    "<%02x %02x %02x %02x %02x %02x %02x %02x>\n",
 		    p->p_pid, p->p_comm, signo, frame->tf_err, ucode, type,
 		    addr, frame->tf_rsp, frame->tf_rip,
 		    fubyte((void *)(frame->tf_rip + 0)),
 		    fubyte((void *)(frame->tf_rip + 1)),
 		    fubyte((void *)(frame->tf_rip + 2)),
 		    fubyte((void *)(frame->tf_rip + 3)),
 		    fubyte((void *)(frame->tf_rip + 4)),
 		    fubyte((void *)(frame->tf_rip + 5)),
 		    fubyte((void *)(frame->tf_rip + 6)),
 		    fubyte((void *)(frame->tf_rip + 7)));
 	}
 	KASSERT((read_rflags() & PSL_I) != 0, ("interrupts disabled"));
 	trapsignal(td, &ksi);
 
 userret:
 	userret(td, frame);
 	KASSERT(PCB_USER_FPU(td->td_pcb),
 	    ("Return from trap with kernel FPU ctx leaked"));
 }
 
 /*
  * Ensure that we ignore any DTrace-induced faults. This function cannot
  * be instrumented, so it cannot generate such faults itself.
  */
 void
 trap_check(struct trapframe *frame)
 {
 
 #ifdef KDTRACE_HOOKS
 	if (dtrace_trap_func != NULL &&
 	    (*dtrace_trap_func)(frame, frame->tf_trapno) != 0)
 		return;
 #endif
 	trap(frame);
 }
 
 static bool
 trap_is_smap(struct trapframe *frame)
 {
 
 	/*
 	 * A page fault on a userspace address is classified as
 	 * SMAP-induced if:
 	 * - SMAP is supported;
 	 * - kernel mode accessed present data page;
 	 * - rflags.AC was cleared.
 	 * Kernel must never access user space with rflags.AC cleared
 	 * if SMAP is enabled.
 	 */
 	return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 &&
 	    (frame->tf_err & (PGEX_P | PGEX_U | PGEX_I | PGEX_RSV)) ==
 	    PGEX_P && (frame->tf_rflags & PSL_AC) == 0);
 }
 
 static bool
 trap_is_pti(struct trapframe *frame)
 {
 
 	return (PCPU_GET(curpmap)->pm_ucr3 != PMAP_NO_CR3 &&
 	    pg_nx != 0 && (frame->tf_err & (PGEX_P | PGEX_W |
 	    PGEX_U | PGEX_I)) == (PGEX_P | PGEX_U | PGEX_I) &&
 	    (curpcb->pcb_saved_ucr3 & ~CR3_PCID_MASK) ==
 	    (PCPU_GET(curpmap)->pm_cr3 & ~CR3_PCID_MASK));
 }
 
 static int
 trap_pfault(struct trapframe *frame, int usermode)
 {
 	struct thread *td;
 	struct proc *p;
 	vm_map_t map;
 	vm_offset_t va;
 	int rv;
 	vm_prot_t ftype;
 	vm_offset_t eva;
 
 	td = curthread;
 	p = td->td_proc;
 	eva = frame->tf_addr;
 
 	if (__predict_false((td->td_pflags & TDP_NOFAULTING) != 0)) {
 		/*
 		 * Due to both processor errata and lazy TLB invalidation when
 		 * access restrictions are removed from virtual pages, memory
 		 * accesses that are allowed by the physical mapping layer may
 		 * nonetheless cause one spurious page fault per virtual page. 
 		 * When the thread is executing a "no faulting" section that
 		 * is bracketed by vm_fault_{disable,enable}_pagefaults(),
 		 * every page fault is treated as a spurious page fault,
 		 * unless it accesses the same virtual address as the most
 		 * recent page fault within the same "no faulting" section.
 		 */
 		if (td->td_md.md_spurflt_addr != eva ||
 		    (td->td_pflags & TDP_RESETSPUR) != 0) {
 			/*
 			 * Do nothing to the TLB.  A stale TLB entry is
 			 * flushed automatically by a page fault.
 			 */
 			td->td_md.md_spurflt_addr = eva;
 			td->td_pflags &= ~TDP_RESETSPUR;
 			return (0);
 		}
 	} else {
 		/*
 		 * If we get a page fault while in a critical section, then
 		 * it is most likely a fatal kernel page fault.  The kernel
 		 * is already going to panic trying to get a sleep lock to
 		 * do the VM lookup, so just consider it a fatal trap so the
 		 * kernel can print out a useful trap message and even get
 		 * to the debugger.
 		 *
 		 * If we get a page fault while holding a non-sleepable
 		 * lock, then it is most likely a fatal kernel page fault.
 		 * If WITNESS is enabled, then it's going to whine about
 		 * bogus LORs with various VM locks, so just skip to the
 		 * fatal trap handling directly.
 		 */
 		if (td->td_critnest != 0 ||
 		    WITNESS_CHECK(WARN_SLEEPOK | WARN_GIANTOK, NULL,
 		    "Kernel page fault") != 0) {
 			trap_fatal(frame, eva);
 			return (-1);
 		}
 	}
 	va = trunc_page(eva);
 	if (va >= VM_MIN_KERNEL_ADDRESS) {
 		/*
 		 * Don't allow user-mode faults in kernel address space.
 		 */
 		if (usermode)
 			return (SIGSEGV);
 
 		map = kernel_map;
 	} else {
 		map = &p->p_vmspace->vm_map;
 
 		/*
 		 * When accessing a usermode address, kernel must be
 		 * ready to accept the page fault, and provide a
 		 * handling routine.  Since accessing the address
 		 * without the handler is a bug, do not try to handle
 		 * it normally, and panic immediately.
 		 *
 		 * If SMAP is enabled, filter SMAP faults also,
 		 * because illegal access might occur to the mapped
 		 * user address, causing infinite loop.
 		 */
 		if (!usermode && (td->td_intr_nesting_level != 0 ||
 		    trap_is_smap(frame) || curpcb->pcb_onfault == NULL)) {
 			trap_fatal(frame, eva);
 			return (-1);
 		}
 	}
 
 	/*
 	 * If the trap was caused by errant bits in the PTE then panic.
 	 */
 	if (frame->tf_err & PGEX_RSV) {
 		trap_fatal(frame, eva);
 		return (-1);
 	}
 
 	/*
 	 * User-mode protection key violation (PKU).  May happen
 	 * either from usermode or from kernel if copyin accessed
 	 * key-protected mapping.
 	 */
 	if ((frame->tf_err & PGEX_PK) != 0) {
 		if (eva > VM_MAXUSER_ADDRESS) {
 			trap_fatal(frame, eva);
 			return (-1);
 		}
 		rv = KERN_PROTECTION_FAILURE;
 		goto after_vmfault;
 	}
 
 	/*
 	 * If nx protection of the usermode portion of kernel page
 	 * tables caused trap, panic.
 	 */
 	if (usermode && trap_is_pti(frame))
 		panic("PTI: pid %d comm %s tf_err %#lx", p->p_pid,
 		    p->p_comm, frame->tf_err);
 
 	/*
 	 * PGEX_I is defined only if the execute disable bit capability is
 	 * supported and enabled.
 	 */
 	if (frame->tf_err & PGEX_W)
 		ftype = VM_PROT_WRITE;
 	else if ((frame->tf_err & PGEX_I) && pg_nx != 0)
 		ftype = VM_PROT_EXECUTE;
 	else
 		ftype = VM_PROT_READ;
 
 	/* Fault in the page. */
 	rv = vm_fault(map, va, ftype, VM_FAULT_NORMAL);
 	if (rv == KERN_SUCCESS) {
 #ifdef HWPMC_HOOKS
 		if (ftype == VM_PROT_READ || ftype == VM_PROT_WRITE) {
 			PMC_SOFT_CALL_TF( , , page_fault, all, frame);
 			if (ftype == VM_PROT_READ)
 				PMC_SOFT_CALL_TF( , , page_fault, read,
 				    frame);
 			else
 				PMC_SOFT_CALL_TF( , , page_fault, write,
 				    frame);
 		}
 #endif
 		return (0);
 	}
 after_vmfault:
 	if (!usermode) {
 		if (td->td_intr_nesting_level == 0 &&
 		    curpcb->pcb_onfault != NULL) {
 			frame->tf_rip = (long)curpcb->pcb_onfault;
 			return (0);
 		}
 		trap_fatal(frame, eva);
 		return (-1);
 	}
 	return ((rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV);
 }
 
 static void
 trap_fatal(frame, eva)
 	struct trapframe *frame;
 	vm_offset_t eva;
 {
 	int code, ss;
 	u_int type;
 	struct soft_segment_descriptor softseg;
 	char *msg;
 #ifdef KDB
 	bool handled;
 #endif
 
 	code = frame->tf_err;
 	type = frame->tf_trapno;
 	sdtossd(&gdt[NGDT * PCPU_GET(cpuid) + IDXSEL(frame->tf_cs & 0xffff)],
 	    &softseg);
 
 	if (type <= MAX_TRAP_MSG)
 		msg = trap_msg[type];
 	else
 		msg = "UNKNOWN";
 	printf("\n\nFatal trap %d: %s while in %s mode\n", type, msg,
 	    TRAPF_USERMODE(frame) ? "user" : "kernel");
 #ifdef SMP
 	/* two separate prints in case of a trap on an unmapped page */
 	printf("cpuid = %d; ", PCPU_GET(cpuid));
 	printf("apic id = %02x\n", PCPU_GET(apic_id));
 #endif
 	if (type == T_PAGEFLT) {
 		printf("fault virtual address	= 0x%lx\n", eva);
 		printf("fault code		= %s %s %s%s%s, %s\n",
 			code & PGEX_U ? "user" : "supervisor",
 			code & PGEX_W ? "write" : "read",
 			code & PGEX_I ? "instruction" : "data",
 			code & PGEX_PK ? " prot key" : "",
 			code & PGEX_SGX ? " SGX" : "",
 			code & PGEX_RSV ? "reserved bits in PTE" :
 			code & PGEX_P ? "protection violation" : "page not present");
 	}
 	printf("instruction pointer	= 0x%lx:0x%lx\n",
 	       frame->tf_cs & 0xffff, frame->tf_rip);
 	ss = frame->tf_ss & 0xffff;
 	printf("stack pointer	        = 0x%x:0x%lx\n", ss, frame->tf_rsp);
 	printf("frame pointer	        = 0x%x:0x%lx\n", ss, frame->tf_rbp);
 	printf("code segment		= base 0x%lx, limit 0x%lx, type 0x%x\n",
 	       softseg.ssd_base, softseg.ssd_limit, softseg.ssd_type);
 	printf("			= DPL %d, pres %d, long %d, def32 %d, gran %d\n",
 	       softseg.ssd_dpl, softseg.ssd_p, softseg.ssd_long, softseg.ssd_def32,
 	       softseg.ssd_gran);
 	printf("processor eflags	= ");
 	if (frame->tf_rflags & PSL_T)
 		printf("trace trap, ");
 	if (frame->tf_rflags & PSL_I)
 		printf("interrupt enabled, ");
 	if (frame->tf_rflags & PSL_NT)
 		printf("nested task, ");
 	if (frame->tf_rflags & PSL_RF)
 		printf("resume, ");
 	printf("IOPL = %ld\n", (frame->tf_rflags & PSL_IOPL) >> 12);
 	printf("current process		= %d (%s)\n",
 	    curproc->p_pid, curthread->td_name);
 
 #ifdef KDB
 	if (debugger_on_trap) {
 		kdb_why = KDB_WHY_TRAP;
 		handled = kdb_trap(type, 0, frame);
 		kdb_why = KDB_WHY_UNSET;
 		if (handled)
 			return;
 	}
 #endif
 	printf("trap number		= %d\n", type);
 	if (type <= MAX_TRAP_MSG)
 		panic("%s", trap_msg[type]);
 	else
 		panic("unknown/reserved trap");
 }
+
+#ifdef KDTRACE_HOOKS
+/*
+ * Invoke a userspace DTrace hook.  The hook pointer is cleared when no
+ * userspace probes are enabled, so we must synchronize with DTrace to ensure
+ * that a trapping thread is able to call the hook before it is cleared.
+ */
+static bool
+trap_user_dtrace(struct trapframe *frame, int (**hookp)(struct trapframe *))
+{
+	int (*hook)(struct trapframe *);
+
+	hook = (int (*)(struct trapframe *))atomic_load_ptr(hookp);
+	enable_intr();
+	if (hook != NULL)
+		return ((hook)(frame) == 0);
+	return (false);
+}
+#endif
 
 /*
  * Double fault handler. Called when a fault occurs while writing
  * a frame for a trap/exception onto the stack. This usually occurs
  * when the stack overflows (such is the case with infinite recursion,
  * for example).
  */
 void
 dblfault_handler(struct trapframe *frame)
 {
 #ifdef KDTRACE_HOOKS
 	if (dtrace_doubletrap_func != NULL)
 		(*dtrace_doubletrap_func)();
 #endif
 	printf("\nFatal double fault\n"
 	    "rip %#lx rsp %#lx rbp %#lx\n"
 	    "rax %#lx rdx %#lx rbx %#lx\n"
 	    "rcx %#lx rsi %#lx rdi %#lx\n"
 	    "r8 %#lx r9 %#lx r10 %#lx\n"
 	    "r11 %#lx r12 %#lx r13 %#lx\n"
 	    "r14 %#lx r15 %#lx rflags %#lx\n"
 	    "cs %#lx ss %#lx ds %#hx es %#hx fs %#hx gs %#hx\n"
 	    "fsbase %#lx gsbase %#lx kgsbase %#lx\n",
 	    frame->tf_rip, frame->tf_rsp, frame->tf_rbp,
 	    frame->tf_rax, frame->tf_rdx, frame->tf_rbx,
 	    frame->tf_rcx, frame->tf_rdi, frame->tf_rsi,
 	    frame->tf_r8, frame->tf_r9, frame->tf_r10,
 	    frame->tf_r11, frame->tf_r12, frame->tf_r13,
 	    frame->tf_r14, frame->tf_r15, frame->tf_rflags,
 	    frame->tf_cs, frame->tf_ss, frame->tf_ds, frame->tf_es,
 	    frame->tf_fs, frame->tf_gs,
 	    rdmsr(MSR_FSBASE), rdmsr(MSR_GSBASE), rdmsr(MSR_KGSBASE));
 #ifdef SMP
 	/* two separate prints in case of a trap on an unmapped page */
 	printf("cpuid = %d; ", PCPU_GET(cpuid));
 	printf("apic id = %02x\n", PCPU_GET(apic_id));
 #endif
 	panic("double fault");
 }
 
 static int __noinline
 cpu_fetch_syscall_args_fallback(struct thread *td, struct syscall_args *sa)
 {
 	struct proc *p;
 	struct trapframe *frame;
 	register_t *argp;
 	caddr_t params;
 	int reg, regcnt, error;
 
 	p = td->td_proc;
 	frame = td->td_frame;
 	reg = 0;
 	regcnt = NARGREGS;
 
 	sa->code = frame->tf_rax;
 
 	if (sa->code == SYS_syscall || sa->code == SYS___syscall) {
 		sa->code = frame->tf_rdi;
 		reg++;
 		regcnt--;
 	}
  	if (p->p_sysent->sv_mask)
  		sa->code &= p->p_sysent->sv_mask;
 
  	if (sa->code >= p->p_sysent->sv_size)
  		sa->callp = &p->p_sysent->sv_table[0];
   	else
  		sa->callp = &p->p_sysent->sv_table[sa->code];
 
 	sa->narg = sa->callp->sy_narg;
 	KASSERT(sa->narg <= nitems(sa->args), ("Too many syscall arguments!"));
 	argp = &frame->tf_rdi;
 	argp += reg;
 	memcpy(sa->args, argp, sizeof(sa->args[0]) * NARGREGS);
 	if (sa->narg > regcnt) {
 		params = (caddr_t)frame->tf_rsp + sizeof(register_t);
 		error = copyin(params, &sa->args[regcnt],
 	    	    (sa->narg - regcnt) * sizeof(sa->args[0]));
 		if (__predict_false(error != 0))
 			return (error);
 	}
 
 	td->td_retval[0] = 0;
 	td->td_retval[1] = frame->tf_rdx;
 
 	return (0);
 }
 
 int
 cpu_fetch_syscall_args(struct thread *td)
 {
 	struct proc *p;
 	struct trapframe *frame;
 	struct syscall_args *sa;
 
 	p = td->td_proc;
 	frame = td->td_frame;
 	sa = &td->td_sa;
 
 	sa->code = frame->tf_rax;
 
 	if (__predict_false(sa->code == SYS_syscall ||
 	    sa->code == SYS___syscall ||
 	    sa->code >= p->p_sysent->sv_size))
 		return (cpu_fetch_syscall_args_fallback(td, sa));
 
 	sa->callp = &p->p_sysent->sv_table[sa->code];
 	sa->narg = sa->callp->sy_narg;
 	KASSERT(sa->narg <= nitems(sa->args), ("Too many syscall arguments!"));
 
 	if (p->p_sysent->sv_mask)
 		sa->code &= p->p_sysent->sv_mask;
 
 	if (__predict_false(sa->narg > NARGREGS))
 		return (cpu_fetch_syscall_args_fallback(td, sa));
 
 	memcpy(sa->args, &frame->tf_rdi, sizeof(sa->args[0]) * NARGREGS);
 
 	td->td_retval[0] = 0;
 	td->td_retval[1] = frame->tf_rdx;
 
 	return (0);
 }
 
 #include "../../kern/subr_syscall.c"
 
 static void (*syscall_ret_l1d_flush)(void);
 int syscall_ret_l1d_flush_mode;
 
 static void
 flush_l1d_hw(void)
 {
 
 	wrmsr(MSR_IA32_FLUSH_CMD, IA32_FLUSH_CMD_L1D);
 }
 
 static void __inline
 amd64_syscall_ret_flush_l1d_inline(int error)
 {
 	void (*p)(void);
 
 	if (error != 0 && error != EEXIST && error != EAGAIN &&
 	    error != EXDEV && error != ENOENT && error != ENOTCONN &&
 	    error != EINPROGRESS) {
 		p = syscall_ret_l1d_flush;
 		if (p != NULL)
 			p();
 	}
 }
 
 void
 amd64_syscall_ret_flush_l1d(int error)
 {
 
 	amd64_syscall_ret_flush_l1d_inline(error);
 }
 
 void
 amd64_syscall_ret_flush_l1d_recalc(void)
 {
 	bool l1d_hw;
 
 	l1d_hw = (cpu_stdext_feature3 & CPUID_STDEXT3_L1D_FLUSH) != 0;
 again:
 	switch (syscall_ret_l1d_flush_mode) {
 	case 0:
 		syscall_ret_l1d_flush = NULL;
 		break;
 	case 1:
 		syscall_ret_l1d_flush = l1d_hw ? flush_l1d_hw :
 		    flush_l1d_sw_abi;
 		break;
 	case 2:
 		syscall_ret_l1d_flush = l1d_hw ? flush_l1d_hw : NULL;
 		break;
 	case 3:
 		syscall_ret_l1d_flush = flush_l1d_sw_abi;
 		break;
 	default:
 		syscall_ret_l1d_flush_mode = 1;
 		goto again;
 	}
 }
 
 static int
 machdep_syscall_ret_flush_l1d(SYSCTL_HANDLER_ARGS)
 {
 	int error, val;
 
 	val = syscall_ret_l1d_flush_mode;
 	error = sysctl_handle_int(oidp, &val, 0, req);
 	if (error != 0 || req->newptr == NULL)
 		return (error);
 	syscall_ret_l1d_flush_mode = val;
 	amd64_syscall_ret_flush_l1d_recalc();
 	return (0);
 }
 SYSCTL_PROC(_machdep, OID_AUTO, syscall_ret_flush_l1d, CTLTYPE_INT |
     CTLFLAG_RWTUN | CTLFLAG_NOFETCH | CTLFLAG_MPSAFE, NULL, 0,
     machdep_syscall_ret_flush_l1d, "I",
     "Flush L1D on syscall return with error (0 - off, 1 - on, "
     "2 - use hw only, 3 - use sw only");
 
 
 /*
  * System call handler for native binaries.  The trap frame is already
  * set up by the assembler trampoline and a pointer to it is saved in
  * td_frame.
  */
 void
 amd64_syscall(struct thread *td, int traced)
 {
 	int error;
 	ksiginfo_t ksi;
 
 #ifdef DIAGNOSTIC
 	if (!TRAPF_USERMODE(td->td_frame)) {
 		panic("syscall");
 		/* NOT REACHED */
 	}
 #endif
 	error = syscallenter(td);
 
 	/*
 	 * Traced syscall.
 	 */
 	if (__predict_false(traced)) {
 		td->td_frame->tf_rflags &= ~PSL_T;
 		ksiginfo_init_trap(&ksi);
 		ksi.ksi_signo = SIGTRAP;
 		ksi.ksi_code = TRAP_TRACE;
 		ksi.ksi_addr = (void *)td->td_frame->tf_rip;
 		trapsignal(td, &ksi);
 	}
 
 	KASSERT(PCB_USER_FPU(td->td_pcb),
 	    ("System call %s returning with kernel FPU ctx leaked",
 	     syscallname(td->td_proc, td->td_sa.code)));
 	KASSERT(td->td_pcb->pcb_save == get_pcb_user_save_td(td),
 	    ("System call %s returning with mangled pcb_save",
 	     syscallname(td->td_proc, td->td_sa.code)));
 	KASSERT(td->td_md.md_invl_gen.gen == 0,
 	    ("System call %s returning with leaked invl_gen %lu",
 	    syscallname(td->td_proc, td->td_sa.code),
 	    td->td_md.md_invl_gen.gen));
 
 	syscallret(td, error);
 
 	/*
 	 * If the user-supplied value of %rip is not a canonical
 	 * address, then some CPUs will trigger a ring 0 #GP during
 	 * the sysret instruction.  However, the fault handler would
 	 * execute in ring 0 with the user's %gs and %rsp which would
 	 * not be safe.  Instead, use the full return path which
 	 * catches the problem safely.
 	 */
 	if (__predict_false(td->td_frame->tf_rip >= VM_MAXUSER_ADDRESS))
 		set_pcb_flags(td->td_pcb, PCB_FULL_IRET);
 
 	amd64_syscall_ret_flush_l1d_inline(error);
 }
Index: stable/12/sys/cddl/contrib/opensolaris/uts/common/dtrace/fasttrap.c
===================================================================
--- stable/12/sys/cddl/contrib/opensolaris/uts/common/dtrace/fasttrap.c	(revision 349015)
+++ stable/12/sys/cddl/contrib/opensolaris/uts/common/dtrace/fasttrap.c	(revision 349016)
@@ -1,2678 +1,2664 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  *
  * Portions Copyright 2010 The FreeBSD Foundation
  *
  * $FreeBSD$
  */
 
 /*
  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
 /*
  * Copyright (c) 2015, Joyent, Inc. All rights reserved.
  */
 
 #include <sys/atomic.h>
 #include <sys/errno.h>
 #include <sys/stat.h>
 #include <sys/modctl.h>
 #include <sys/conf.h>
 #include <sys/systm.h>
 #ifdef illumos
 #include <sys/ddi.h>
 #endif
 #include <sys/sunddi.h>
 #include <sys/cpuvar.h>
 #include <sys/kmem.h>
 #ifdef illumos
 #include <sys/strsubr.h>
 #endif
 #include <sys/fasttrap.h>
 #include <sys/fasttrap_impl.h>
 #include <sys/fasttrap_isa.h>
 #include <sys/dtrace.h>
 #include <sys/dtrace_impl.h>
 #include <sys/sysmacros.h>
 #include <sys/proc.h>
 #include <sys/policy.h>
 #ifdef illumos
 #include <util/qsort.h>
 #endif
 #include <sys/mutex.h>
 #include <sys/kernel.h>
 #ifndef illumos
 #include <sys/dtrace_bsd.h>
 #include <sys/eventhandler.h>
 #include <sys/rmlock.h>
 #include <sys/sysent.h>
 #include <sys/sysctl.h>
 #include <sys/u8_textprep.h>
 #include <sys/user.h>
 
 #include <vm/vm.h>
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 #include <vm/vm_param.h>
 
 #include <cddl/dev/dtrace/dtrace_cddl.h>
 #endif
 
 /*
  * User-Land Trap-Based Tracing
  * ----------------------------
  *
  * The fasttrap provider allows DTrace consumers to instrument any user-level
  * instruction to gather data; this includes probes with semantic
  * signifigance like entry and return as well as simple offsets into the
  * function. While the specific techniques used are very ISA specific, the
  * methodology is generalizable to any architecture.
  *
  *
  * The General Methodology
  * -----------------------
  *
  * With the primary goal of tracing every user-land instruction and the
  * limitation that we can't trust user space so don't want to rely on much
  * information there, we begin by replacing the instructions we want to trace
  * with trap instructions. Each instruction we overwrite is saved into a hash
  * table keyed by process ID and pc address. When we enter the kernel due to
  * this trap instruction, we need the effects of the replaced instruction to
  * appear to have occurred before we proceed with the user thread's
  * execution.
  *
  * Each user level thread is represented by a ulwp_t structure which is
  * always easily accessible through a register. The most basic way to produce
  * the effects of the instruction we replaced is to copy that instruction out
  * to a bit of scratch space reserved in the user thread's ulwp_t structure
  * (a sort of kernel-private thread local storage), set the PC to that
  * scratch space and single step. When we reenter the kernel after single
  * stepping the instruction we must then adjust the PC to point to what would
  * normally be the next instruction. Of course, special care must be taken
  * for branches and jumps, but these represent such a small fraction of any
  * instruction set that writing the code to emulate these in the kernel is
  * not too difficult.
  *
  * Return probes may require several tracepoints to trace every return site,
  * and, conversely, each tracepoint may activate several probes (the entry
  * and offset 0 probes, for example). To solve this muliplexing problem,
  * tracepoints contain lists of probes to activate and probes contain lists
  * of tracepoints to enable. If a probe is activated, it adds its ID to
  * existing tracepoints or creates new ones as necessary.
  *
  * Most probes are activated _before_ the instruction is executed, but return
  * probes are activated _after_ the effects of the last instruction of the
  * function are visible. Return probes must be fired _after_ we have
  * single-stepped the instruction whereas all other probes are fired
  * beforehand.
  *
  *
  * Lock Ordering
  * -------------
  *
  * The lock ordering below -- both internally and with respect to the DTrace
  * framework -- is a little tricky and bears some explanation. Each provider
  * has a lock (ftp_mtx) that protects its members including reference counts
  * for enabled probes (ftp_rcount), consumers actively creating probes
  * (ftp_ccount) and USDT consumers (ftp_mcount); all three prevent a provider
  * from being freed. A provider is looked up by taking the bucket lock for the
  * provider hash table, and is returned with its lock held. The provider lock
  * may be taken in functions invoked by the DTrace framework, but may not be
  * held while calling functions in the DTrace framework.
  *
  * To ensure consistency over multiple calls to the DTrace framework, the
  * creation lock (ftp_cmtx) should be held. Naturally, the creation lock may
  * not be taken when holding the provider lock as that would create a cyclic
  * lock ordering. In situations where one would naturally take the provider
  * lock and then the creation lock, we instead up a reference count to prevent
  * the provider from disappearing, drop the provider lock, and acquire the
  * creation lock.
  *
  * Briefly:
  * 	bucket lock before provider lock
  *	DTrace before provider lock
  *	creation lock before DTrace
  *	never hold the provider lock and creation lock simultaneously
  */
 
 static d_open_t fasttrap_open;
 static d_ioctl_t fasttrap_ioctl;
 
 static struct cdevsw fasttrap_cdevsw = {
 	.d_version	= D_VERSION,
 	.d_open		= fasttrap_open,
 	.d_ioctl	= fasttrap_ioctl,
 	.d_name		= "fasttrap",
 };
 static struct cdev *fasttrap_cdev;
 static dtrace_meta_provider_id_t fasttrap_meta_id;
 
 static struct proc *fasttrap_cleanup_proc;
 static struct mtx fasttrap_cleanup_mtx;
 static uint_t fasttrap_cleanup_work, fasttrap_cleanup_drain, fasttrap_cleanup_cv;
 
 /*
  * Generation count on modifications to the global tracepoint lookup table.
  */
 static volatile uint64_t fasttrap_mod_gen;
 
 /*
  * When the fasttrap provider is loaded, fasttrap_max is set to either
  * FASTTRAP_MAX_DEFAULT, or the value for fasttrap-max-probes in the
  * fasttrap.conf file (Illumos), or the value provied in the loader.conf (FreeBSD).
  * Each time a probe is created, fasttrap_total is incremented by the number
  * of tracepoints that may be associated with that probe; fasttrap_total is capped
  * at fasttrap_max.
  */
 #define	FASTTRAP_MAX_DEFAULT		250000
 static uint32_t fasttrap_max = FASTTRAP_MAX_DEFAULT;
 static uint32_t fasttrap_total;
 
 /*
  * Copyright (c) 2011, Joyent, Inc. All rights reserved.
  */
 
 #define	FASTTRAP_TPOINTS_DEFAULT_SIZE	0x4000
 #define	FASTTRAP_PROVIDERS_DEFAULT_SIZE	0x100
 #define	FASTTRAP_PROCS_DEFAULT_SIZE	0x100
 
 #define	FASTTRAP_PID_NAME		"pid"
 
 fasttrap_hash_t			fasttrap_tpoints;
 static fasttrap_hash_t		fasttrap_provs;
 static fasttrap_hash_t		fasttrap_procs;
 
 static uint64_t			fasttrap_pid_count;	/* pid ref count */
 static kmutex_t			fasttrap_count_mtx;	/* lock on ref count */
 
 #define	FASTTRAP_ENABLE_FAIL	1
 #define	FASTTRAP_ENABLE_PARTIAL	2
 
 static int fasttrap_tracepoint_enable(proc_t *, fasttrap_probe_t *, uint_t);
 static void fasttrap_tracepoint_disable(proc_t *, fasttrap_probe_t *, uint_t);
 
 static fasttrap_provider_t *fasttrap_provider_lookup(pid_t, const char *,
     const dtrace_pattr_t *);
 static void fasttrap_provider_retire(pid_t, const char *, int);
 static void fasttrap_provider_free(fasttrap_provider_t *);
 
 static fasttrap_proc_t *fasttrap_proc_lookup(pid_t);
 static void fasttrap_proc_release(fasttrap_proc_t *);
 
 #ifndef illumos
 static void fasttrap_thread_dtor(void *, struct thread *);
 #endif
 
 #define	FASTTRAP_PROVS_INDEX(pid, name) \
 	((fasttrap_hash_str(name) + (pid)) & fasttrap_provs.fth_mask)
 
 #define	FASTTRAP_PROCS_INDEX(pid) ((pid) & fasttrap_procs.fth_mask)
 
 #ifndef illumos
 struct rmlock fasttrap_tp_lock;
 static eventhandler_tag fasttrap_thread_dtor_tag;
 #endif
 
 static unsigned long tpoints_hash_size = FASTTRAP_TPOINTS_DEFAULT_SIZE;
 
 #ifdef __FreeBSD__
 SYSCTL_DECL(_kern_dtrace);
 SYSCTL_NODE(_kern_dtrace, OID_AUTO, fasttrap, CTLFLAG_RD, 0, "DTrace fasttrap parameters");
 SYSCTL_UINT(_kern_dtrace_fasttrap, OID_AUTO, max_probes, CTLFLAG_RWTUN, &fasttrap_max,
     FASTTRAP_MAX_DEFAULT, "Maximum number of fasttrap probes");
 SYSCTL_ULONG(_kern_dtrace_fasttrap, OID_AUTO, tpoints_hash_size, CTLFLAG_RDTUN, &tpoints_hash_size,
     FASTTRAP_TPOINTS_DEFAULT_SIZE, "Size of the tracepoint hash table");
 #endif
 
 static int
 fasttrap_highbit(ulong_t i)
 {
 	int h = 1;
 
 	if (i == 0)
 		return (0);
 #ifdef _LP64
 	if (i & 0xffffffff00000000ul) {
 		h += 32; i >>= 32;
 	}
 #endif
 	if (i & 0xffff0000) {
 		h += 16; i >>= 16;
 	}
 	if (i & 0xff00) {
 		h += 8; i >>= 8;
 	}
 	if (i & 0xf0) {
 		h += 4; i >>= 4;
 	}
 	if (i & 0xc) {
 		h += 2; i >>= 2;
 	}
 	if (i & 0x2) {
 		h += 1;
 	}
 	return (h);
 }
 
 static uint_t
 fasttrap_hash_str(const char *p)
 {
 	unsigned int g;
 	uint_t hval = 0;
 
 	while (*p) {
 		hval = (hval << 4) + *p++;
 		if ((g = (hval & 0xf0000000)) != 0)
 			hval ^= g >> 24;
 		hval &= ~g;
 	}
 	return (hval);
 }
 
 void
 fasttrap_sigtrap(proc_t *p, kthread_t *t, uintptr_t pc)
 {
 	ksiginfo_t ksi;
 
 	ksiginfo_init(&ksi);
 	ksi.ksi_signo = SIGTRAP;
 	ksi.ksi_code = TRAP_DTRACE;
 	ksi.ksi_addr = (caddr_t)pc;
 	PROC_LOCK(p);
 	(void)tdsendsignal(p, t, SIGTRAP, &ksi);
 	PROC_UNLOCK(p);
 }
 
 #ifndef illumos
 /*
  * Obtain a chunk of scratch space in the address space of the target process.
  */
 fasttrap_scrspace_t *
 fasttrap_scraddr(struct thread *td, fasttrap_proc_t *fprc)
 {
 	fasttrap_scrblock_t *scrblk;
 	fasttrap_scrspace_t *scrspc;
 	struct proc *p;
 	vm_offset_t addr;
 	int error, i;
 
 	scrspc = NULL;
 	if (td->t_dtrace_sscr != NULL) {
 		/* If the thread already has scratch space, we're done. */
 		scrspc = (fasttrap_scrspace_t *)td->t_dtrace_sscr;
 		return (scrspc);
 	}
 
 	p = td->td_proc;
 
 	mutex_enter(&fprc->ftpc_mtx);
 	if (LIST_EMPTY(&fprc->ftpc_fscr)) {
 		/*
 		 * No scratch space is available, so we'll map a new scratch
 		 * space block into the traced process' address space.
 		 */
 		addr = 0;
 		error = vm_map_find(&p->p_vmspace->vm_map, NULL, 0, &addr,
 		    FASTTRAP_SCRBLOCK_SIZE, 0, VMFS_ANY_SPACE, VM_PROT_ALL,
 		    VM_PROT_ALL, 0);
 		if (error != KERN_SUCCESS)
 			goto done;
 
 		scrblk = malloc(sizeof(*scrblk), M_SOLARIS, M_WAITOK);
 		scrblk->ftsb_addr = addr;
 		LIST_INSERT_HEAD(&fprc->ftpc_scrblks, scrblk, ftsb_next);
 
 		/*
 		 * Carve the block up into chunks and put them on the free list.
 		 */
 		for (i = 0;
 		    i < FASTTRAP_SCRBLOCK_SIZE / FASTTRAP_SCRSPACE_SIZE; i++) {
 			scrspc = malloc(sizeof(*scrspc), M_SOLARIS, M_WAITOK);
 			scrspc->ftss_addr = addr +
 			    i * FASTTRAP_SCRSPACE_SIZE;
 			LIST_INSERT_HEAD(&fprc->ftpc_fscr, scrspc,
 			    ftss_next);
 		}
 	}
 
 	/*
 	 * Take the first scratch chunk off the free list, put it on the
 	 * allocated list, and return its address.
 	 */
 	scrspc = LIST_FIRST(&fprc->ftpc_fscr);
 	LIST_REMOVE(scrspc, ftss_next);
 	LIST_INSERT_HEAD(&fprc->ftpc_ascr, scrspc, ftss_next);
 
 	/*
 	 * This scratch space is reserved for use by td until the thread exits.
 	 */
 	td->t_dtrace_sscr = scrspc;
 
 done:
 	mutex_exit(&fprc->ftpc_mtx);
 
 	return (scrspc);
 }
 
 /*
  * Return any allocated per-thread scratch space chunks back to the process'
  * free list.
  */
 static void
 fasttrap_thread_dtor(void *arg __unused, struct thread *td)
 {
 	fasttrap_bucket_t *bucket;
 	fasttrap_proc_t *fprc;
 	fasttrap_scrspace_t *scrspc;
 	pid_t pid;
 
 	if (td->t_dtrace_sscr == NULL)
 		return;
 
 	pid = td->td_proc->p_pid;
 	bucket = &fasttrap_procs.fth_table[FASTTRAP_PROCS_INDEX(pid)];
 	fprc = NULL;
 
 	/* Look up the fasttrap process handle for this process. */
 	mutex_enter(&bucket->ftb_mtx);
 	for (fprc = bucket->ftb_data; fprc != NULL; fprc = fprc->ftpc_next) {
 		if (fprc->ftpc_pid == pid) {
 			mutex_enter(&fprc->ftpc_mtx);
 			mutex_exit(&bucket->ftb_mtx);
 			break;
 		}
 	}
 	if (fprc == NULL) {
 		mutex_exit(&bucket->ftb_mtx);
 		return;
 	}
 
 	scrspc = (fasttrap_scrspace_t *)td->t_dtrace_sscr;
 	LIST_REMOVE(scrspc, ftss_next);
 	LIST_INSERT_HEAD(&fprc->ftpc_fscr, scrspc, ftss_next);
 
 	mutex_exit(&fprc->ftpc_mtx);
 }
 #endif
 
 /*
  * This function ensures that no threads are actively using the memory
  * associated with probes that were formerly live.
  */
 static void
 fasttrap_mod_barrier(uint64_t gen)
 {
 	int i;
 
 	if (gen < fasttrap_mod_gen)
 		return;
 
 	fasttrap_mod_gen++;
 
 #ifdef illumos
 	CPU_FOREACH(i) {
 		mutex_enter(&fasttrap_cpuc_pid_lock[i]);
 		mutex_exit(&fasttrap_cpuc_pid_lock[i]);
 	}
 #else
 	rm_wlock(&fasttrap_tp_lock);
 	rm_wunlock(&fasttrap_tp_lock);
 #endif
 }
 
 /*
  * This function performs asynchronous cleanup of fasttrap providers. The
  * Solaris implementation of this mechanism use a timeout that's activated in
  * fasttrap_pid_cleanup(), but this doesn't work in FreeBSD: one may sleep while
  * holding the DTrace mutexes, but it is unsafe to sleep in a callout handler.
  * Thus we use a dedicated process to perform the cleanup when requested.
  */
 /*ARGSUSED*/
 static void
 fasttrap_pid_cleanup_cb(void *data)
 {
 	fasttrap_provider_t **fpp, *fp;
 	fasttrap_bucket_t *bucket;
 	dtrace_provider_id_t provid;
 	int i, later = 0, rval;
 
 	mtx_lock(&fasttrap_cleanup_mtx);
 	while (!fasttrap_cleanup_drain || later > 0) {
 		fasttrap_cleanup_work = 0;
 		mtx_unlock(&fasttrap_cleanup_mtx);
 
 		later = 0;
 
 		/*
 		 * Iterate over all the providers trying to remove the marked
 		 * ones. If a provider is marked but not retired, we just
 		 * have to take a crack at removing it -- it's no big deal if
 		 * we can't.
 		 */
 		for (i = 0; i < fasttrap_provs.fth_nent; i++) {
 			bucket = &fasttrap_provs.fth_table[i];
 			mutex_enter(&bucket->ftb_mtx);
 			fpp = (fasttrap_provider_t **)&bucket->ftb_data;
 
 			while ((fp = *fpp) != NULL) {
 				if (!fp->ftp_marked) {
 					fpp = &fp->ftp_next;
 					continue;
 				}
 
 				mutex_enter(&fp->ftp_mtx);
 
 				/*
 				 * If this provider has consumers actively
 				 * creating probes (ftp_ccount) or is a USDT
 				 * provider (ftp_mcount), we can't unregister
 				 * or even condense.
 				 */
 				if (fp->ftp_ccount != 0 ||
 				    fp->ftp_mcount != 0) {
 					mutex_exit(&fp->ftp_mtx);
 					fp->ftp_marked = 0;
 					continue;
 				}
 
 				if (!fp->ftp_retired || fp->ftp_rcount != 0)
 					fp->ftp_marked = 0;
 
 				mutex_exit(&fp->ftp_mtx);
 
 				/*
 				 * If we successfully unregister this
 				 * provider we can remove it from the hash
 				 * chain and free the memory. If our attempt
 				 * to unregister fails and this is a retired
 				 * provider, increment our flag to try again
 				 * pretty soon. If we've consumed more than
 				 * half of our total permitted number of
 				 * probes call dtrace_condense() to try to
 				 * clean out the unenabled probes.
 				 */
 				provid = fp->ftp_provid;
 				if ((rval = dtrace_unregister(provid)) != 0) {
 					if (fasttrap_total > fasttrap_max / 2)
 						(void) dtrace_condense(provid);
 
 					if (rval == EAGAIN)
 						fp->ftp_marked = 1;
 
 					later += fp->ftp_marked;
 					fpp = &fp->ftp_next;
 				} else {
 					*fpp = fp->ftp_next;
 					fasttrap_provider_free(fp);
 				}
 			}
 			mutex_exit(&bucket->ftb_mtx);
 		}
 		mtx_lock(&fasttrap_cleanup_mtx);
 
 		/*
 		 * If we were unable to retire a provider, try again after a
 		 * second. This situation can occur in certain circumstances
 		 * where providers cannot be unregistered even though they have
 		 * no probes enabled because of an execution of dtrace -l or
 		 * something similar.
 		 */
 		if (later > 0 || fasttrap_cleanup_work ||
 		    fasttrap_cleanup_drain) {
 			mtx_unlock(&fasttrap_cleanup_mtx);
 			pause("ftclean", hz);
 			mtx_lock(&fasttrap_cleanup_mtx);
 		} else
 			mtx_sleep(&fasttrap_cleanup_cv, &fasttrap_cleanup_mtx,
 			    0, "ftcl", 0);
 	}
 
 	/*
 	 * Wake up the thread in fasttrap_unload() now that we're done.
 	 */
 	wakeup(&fasttrap_cleanup_drain);
 	mtx_unlock(&fasttrap_cleanup_mtx);
 
 	kthread_exit();
 }
 
 /*
  * Activates the asynchronous cleanup mechanism.
  */
 static void
 fasttrap_pid_cleanup(void)
 {
 
 	mtx_lock(&fasttrap_cleanup_mtx);
 	if (!fasttrap_cleanup_work) {
 		fasttrap_cleanup_work = 1;
 		wakeup(&fasttrap_cleanup_cv);
 	}
 	mtx_unlock(&fasttrap_cleanup_mtx);
 }
 
 /*
  * This is called from cfork() via dtrace_fasttrap_fork(). The child
  * process's address space is (roughly) a copy of the parent process's so
  * we have to remove all the instrumentation we had previously enabled in the
  * parent.
  */
 static void
 fasttrap_fork(proc_t *p, proc_t *cp)
 {
 #ifndef illumos
 	fasttrap_scrblock_t *scrblk;
 	fasttrap_proc_t *fprc = NULL;
 #endif
 	pid_t ppid = p->p_pid;
 	int i;
 
 	ASSERT(curproc == p);
 #ifdef illumos
 	ASSERT(p->p_proc_flag & P_PR_LOCK);
 #else
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 #endif
 #ifdef illumos
 	ASSERT(p->p_dtrace_count > 0);
 #else
 	/*
 	 * This check is purposely here instead of in kern_fork.c because,
 	 * for legal resons, we cannot include the dtrace_cddl.h header
 	 * inside kern_fork.c and insert if-clause there.
 	 */
 	if (p->p_dtrace_count == 0 && p->p_dtrace_helpers == NULL)
 		return;
 #endif
 
 	ASSERT(cp->p_dtrace_count == 0);
 
 	/*
 	 * This would be simpler and faster if we maintained per-process
 	 * hash tables of enabled tracepoints. It could, however, potentially
 	 * slow down execution of a tracepoint since we'd need to go
 	 * through two levels of indirection. In the future, we should
 	 * consider either maintaining per-process ancillary lists of
 	 * enabled tracepoints or hanging a pointer to a per-process hash
 	 * table of enabled tracepoints off the proc structure.
 	 */
 
 	/*
 	 * We don't have to worry about the child process disappearing
 	 * because we're in fork().
 	 */
 #ifdef illumos
 	mtx_lock_spin(&cp->p_slock);
 	sprlock_proc(cp);
 	mtx_unlock_spin(&cp->p_slock);
 #else
 	/*
 	 * fasttrap_tracepoint_remove() expects the child process to be
 	 * unlocked and the VM then expects curproc to be unlocked.
 	 */
 	_PHOLD(cp);
 	PROC_UNLOCK(cp);
 	PROC_UNLOCK(p);
 	if (p->p_dtrace_count == 0)
 		goto dup_helpers;
 #endif
 
 	/*
 	 * Iterate over every tracepoint looking for ones that belong to the
 	 * parent process, and remove each from the child process.
 	 */
 	for (i = 0; i < fasttrap_tpoints.fth_nent; i++) {
 		fasttrap_tracepoint_t *tp;
 		fasttrap_bucket_t *bucket = &fasttrap_tpoints.fth_table[i];
 
 		mutex_enter(&bucket->ftb_mtx);
 		for (tp = bucket->ftb_data; tp != NULL; tp = tp->ftt_next) {
 			if (tp->ftt_pid == ppid &&
 			    tp->ftt_proc->ftpc_acount != 0) {
 				int ret = fasttrap_tracepoint_remove(cp, tp);
 				ASSERT(ret == 0);
 
 				/*
 				 * The count of active providers can only be
 				 * decremented (i.e. to zero) during exec,
 				 * exit, and removal of a meta provider so it
 				 * should be impossible to drop the count
 				 * mid-fork.
 				 */
 				ASSERT(tp->ftt_proc->ftpc_acount != 0);
 #ifndef illumos
 				fprc = tp->ftt_proc;
 #endif
 			}
 		}
 		mutex_exit(&bucket->ftb_mtx);
 
 #ifndef illumos
 		/*
 		 * Unmap any scratch space inherited from the parent's address
 		 * space.
 		 */
 		if (fprc != NULL) {
 			mutex_enter(&fprc->ftpc_mtx);
 			LIST_FOREACH(scrblk, &fprc->ftpc_scrblks, ftsb_next) {
 				vm_map_remove(&cp->p_vmspace->vm_map,
 				    scrblk->ftsb_addr,
 				    scrblk->ftsb_addr + FASTTRAP_SCRBLOCK_SIZE);
 			}
 			mutex_exit(&fprc->ftpc_mtx);
 		}
 #endif
 	}
 
 #ifdef illumos
 	mutex_enter(&cp->p_lock);
 	sprunlock(cp);
 #else
 dup_helpers:
 	if (p->p_dtrace_helpers != NULL)
 		dtrace_helpers_duplicate(p, cp);
 	PROC_LOCK(p);
 	PROC_LOCK(cp);
 	_PRELE(cp);
 #endif
 }
 
 /*
  * This is called from proc_exit() or from exec_common() if p_dtrace_probes
  * is set on the proc structure to indicate that there is a pid provider
  * associated with this process.
  */
 static void
 fasttrap_exec_exit(proc_t *p)
 {
 #ifndef illumos
 	struct thread *td;
 #endif
 
 #ifdef illumos
 	ASSERT(p == curproc);
 #else
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	_PHOLD(p);
 	/*
 	 * Since struct threads may be recycled, we cannot rely on t_dtrace_sscr
 	 * fields to be zeroed by kdtrace_thread_ctor. Thus we must zero it
 	 * ourselves when a process exits.
 	 */
 	FOREACH_THREAD_IN_PROC(p, td)
 		td->t_dtrace_sscr = NULL;
 	PROC_UNLOCK(p);
 #endif
 
 	/*
 	 * We clean up the pid provider for this process here; user-land
 	 * static probes are handled by the meta-provider remove entry point.
 	 */
 	fasttrap_provider_retire(p->p_pid, FASTTRAP_PID_NAME, 0);
 #ifndef illumos
 	if (p->p_dtrace_helpers)
 		dtrace_helpers_destroy(p);
 	PROC_LOCK(p);
 	_PRELE(p);
 #endif
 }
 
 
 /*ARGSUSED*/
 static void
 fasttrap_pid_provide(void *arg, dtrace_probedesc_t *desc)
 {
 	/*
 	 * There are no "default" pid probes.
 	 */
 }
 
 static int
 fasttrap_tracepoint_enable(proc_t *p, fasttrap_probe_t *probe, uint_t index)
 {
 	fasttrap_tracepoint_t *tp, *new_tp = NULL;
 	fasttrap_bucket_t *bucket;
 	fasttrap_id_t *id;
 	pid_t pid;
 	uintptr_t pc;
 
 	ASSERT(index < probe->ftp_ntps);
 
 	pid = probe->ftp_pid;
 	pc = probe->ftp_tps[index].fit_tp->ftt_pc;
 	id = &probe->ftp_tps[index].fit_id;
 
 	ASSERT(probe->ftp_tps[index].fit_tp->ftt_pid == pid);
 
 #ifdef illumos
 	ASSERT(!(p->p_flag & SVFORK));
 #endif
 
 	/*
 	 * Before we make any modifications, make sure we've imposed a barrier
 	 * on the generation in which this probe was last modified.
 	 */
 	fasttrap_mod_barrier(probe->ftp_gen);
 
 	bucket = &fasttrap_tpoints.fth_table[FASTTRAP_TPOINTS_INDEX(pid, pc)];
 
 	/*
 	 * If the tracepoint has already been enabled, just add our id to the
 	 * list of interested probes. This may be our second time through
 	 * this path in which case we'll have constructed the tracepoint we'd
 	 * like to install. If we can't find a match, and have an allocated
 	 * tracepoint ready to go, enable that one now.
 	 *
 	 * A tracepoint whose process is defunct is also considered defunct.
 	 */
 again:
 	mutex_enter(&bucket->ftb_mtx);
 	for (tp = bucket->ftb_data; tp != NULL; tp = tp->ftt_next) {
 		/*
 		 * Note that it's safe to access the active count on the
 		 * associated proc structure because we know that at least one
 		 * provider (this one) will still be around throughout this
 		 * operation.
 		 */
 		if (tp->ftt_pid != pid || tp->ftt_pc != pc ||
 		    tp->ftt_proc->ftpc_acount == 0)
 			continue;
 
 		/*
 		 * Now that we've found a matching tracepoint, it would be
 		 * a decent idea to confirm that the tracepoint is still
 		 * enabled and the trap instruction hasn't been overwritten.
 		 * Since this is a little hairy, we'll punt for now.
 		 */
 
 		/*
 		 * This can't be the first interested probe. We don't have
 		 * to worry about another thread being in the midst of
 		 * deleting this tracepoint (which would be the only valid
 		 * reason for a tracepoint to have no interested probes)
 		 * since we're holding P_PR_LOCK for this process.
 		 */
 		ASSERT(tp->ftt_ids != NULL || tp->ftt_retids != NULL);
 
 		switch (id->fti_ptype) {
 		case DTFTP_ENTRY:
 		case DTFTP_OFFSETS:
 		case DTFTP_IS_ENABLED:
 			id->fti_next = tp->ftt_ids;
 			membar_producer();
 			tp->ftt_ids = id;
 			membar_producer();
 			break;
 
 		case DTFTP_RETURN:
 		case DTFTP_POST_OFFSETS:
 			id->fti_next = tp->ftt_retids;
 			membar_producer();
 			tp->ftt_retids = id;
 			membar_producer();
 			break;
 
 		default:
 			ASSERT(0);
 		}
 
 		mutex_exit(&bucket->ftb_mtx);
 
 		if (new_tp != NULL) {
 			new_tp->ftt_ids = NULL;
 			new_tp->ftt_retids = NULL;
 		}
 
 		return (0);
 	}
 
 	/*
 	 * If we have a good tracepoint ready to go, install it now while
 	 * we have the lock held and no one can screw with us.
 	 */
 	if (new_tp != NULL) {
 		int rc = 0;
 
 		new_tp->ftt_next = bucket->ftb_data;
 		membar_producer();
 		bucket->ftb_data = new_tp;
 		membar_producer();
 		mutex_exit(&bucket->ftb_mtx);
 
 		/*
 		 * Activate the tracepoint in the ISA-specific manner.
 		 * If this fails, we need to report the failure, but
 		 * indicate that this tracepoint must still be disabled
 		 * by calling fasttrap_tracepoint_disable().
 		 */
 		if (fasttrap_tracepoint_install(p, new_tp) != 0)
 			rc = FASTTRAP_ENABLE_PARTIAL;
 
 		/*
 		 * Increment the count of the number of tracepoints active in
 		 * the victim process.
 		 */
 #ifdef illumos
 		ASSERT(p->p_proc_flag & P_PR_LOCK);
 #endif
 		p->p_dtrace_count++;
 
 		return (rc);
 	}
 
 	mutex_exit(&bucket->ftb_mtx);
 
 	/*
 	 * Initialize the tracepoint that's been preallocated with the probe.
 	 */
 	new_tp = probe->ftp_tps[index].fit_tp;
 
 	ASSERT(new_tp->ftt_pid == pid);
 	ASSERT(new_tp->ftt_pc == pc);
 	ASSERT(new_tp->ftt_proc == probe->ftp_prov->ftp_proc);
 	ASSERT(new_tp->ftt_ids == NULL);
 	ASSERT(new_tp->ftt_retids == NULL);
 
 	switch (id->fti_ptype) {
 	case DTFTP_ENTRY:
 	case DTFTP_OFFSETS:
 	case DTFTP_IS_ENABLED:
 		id->fti_next = NULL;
 		new_tp->ftt_ids = id;
 		break;
 
 	case DTFTP_RETURN:
 	case DTFTP_POST_OFFSETS:
 		id->fti_next = NULL;
 		new_tp->ftt_retids = id;
 		break;
 
 	default:
 		ASSERT(0);
 	}
 
 #ifdef __FreeBSD__
 	if (SV_PROC_FLAG(p, SV_LP64))
 		p->p_model = DATAMODEL_LP64;
 	else
 		p->p_model = DATAMODEL_ILP32;
 #endif
 
 	/*
 	 * If the ISA-dependent initialization goes to plan, go back to the
 	 * beginning and try to install this freshly made tracepoint.
 	 */
 	if (fasttrap_tracepoint_init(p, new_tp, pc, id->fti_ptype) == 0)
 		goto again;
 
 	new_tp->ftt_ids = NULL;
 	new_tp->ftt_retids = NULL;
 
 	return (FASTTRAP_ENABLE_FAIL);
 }
 
 static void
 fasttrap_tracepoint_disable(proc_t *p, fasttrap_probe_t *probe, uint_t index)
 {
 	fasttrap_bucket_t *bucket;
 	fasttrap_provider_t *provider = probe->ftp_prov;
 	fasttrap_tracepoint_t **pp, *tp;
 	fasttrap_id_t *id, **idp = NULL;
 	pid_t pid;
 	uintptr_t pc;
 
 	ASSERT(index < probe->ftp_ntps);
 
 	pid = probe->ftp_pid;
 	pc = probe->ftp_tps[index].fit_tp->ftt_pc;
 	id = &probe->ftp_tps[index].fit_id;
 
 	ASSERT(probe->ftp_tps[index].fit_tp->ftt_pid == pid);
 
 	/*
 	 * Find the tracepoint and make sure that our id is one of the
 	 * ones registered with it.
 	 */
 	bucket = &fasttrap_tpoints.fth_table[FASTTRAP_TPOINTS_INDEX(pid, pc)];
 	mutex_enter(&bucket->ftb_mtx);
 	for (tp = bucket->ftb_data; tp != NULL; tp = tp->ftt_next) {
 		if (tp->ftt_pid == pid && tp->ftt_pc == pc &&
 		    tp->ftt_proc == provider->ftp_proc)
 			break;
 	}
 
 	/*
 	 * If we somehow lost this tracepoint, we're in a world of hurt.
 	 */
 	ASSERT(tp != NULL);
 
 	switch (id->fti_ptype) {
 	case DTFTP_ENTRY:
 	case DTFTP_OFFSETS:
 	case DTFTP_IS_ENABLED:
 		ASSERT(tp->ftt_ids != NULL);
 		idp = &tp->ftt_ids;
 		break;
 
 	case DTFTP_RETURN:
 	case DTFTP_POST_OFFSETS:
 		ASSERT(tp->ftt_retids != NULL);
 		idp = &tp->ftt_retids;
 		break;
 
 	default:
 		ASSERT(0);
 	}
 
 	while ((*idp)->fti_probe != probe) {
 		idp = &(*idp)->fti_next;
 		ASSERT(*idp != NULL);
 	}
 
 	id = *idp;
 	*idp = id->fti_next;
 	membar_producer();
 
 	ASSERT(id->fti_probe == probe);
 
 	/*
 	 * If there are other registered enablings of this tracepoint, we're
 	 * all done, but if this was the last probe assocated with this
 	 * this tracepoint, we need to remove and free it.
 	 */
 	if (tp->ftt_ids != NULL || tp->ftt_retids != NULL) {
 
 		/*
 		 * If the current probe's tracepoint is in use, swap it
 		 * for an unused tracepoint.
 		 */
 		if (tp == probe->ftp_tps[index].fit_tp) {
 			fasttrap_probe_t *tmp_probe;
 			fasttrap_tracepoint_t **tmp_tp;
 			uint_t tmp_index;
 
 			if (tp->ftt_ids != NULL) {
 				tmp_probe = tp->ftt_ids->fti_probe;
 				/* LINTED - alignment */
 				tmp_index = FASTTRAP_ID_INDEX(tp->ftt_ids);
 				tmp_tp = &tmp_probe->ftp_tps[tmp_index].fit_tp;
 			} else {
 				tmp_probe = tp->ftt_retids->fti_probe;
 				/* LINTED - alignment */
 				tmp_index = FASTTRAP_ID_INDEX(tp->ftt_retids);
 				tmp_tp = &tmp_probe->ftp_tps[tmp_index].fit_tp;
 			}
 
 			ASSERT(*tmp_tp != NULL);
 			ASSERT(*tmp_tp != probe->ftp_tps[index].fit_tp);
 			ASSERT((*tmp_tp)->ftt_ids == NULL);
 			ASSERT((*tmp_tp)->ftt_retids == NULL);
 
 			probe->ftp_tps[index].fit_tp = *tmp_tp;
 			*tmp_tp = tp;
 		}
 
 		mutex_exit(&bucket->ftb_mtx);
 
 		/*
 		 * Tag the modified probe with the generation in which it was
 		 * changed.
 		 */
 		probe->ftp_gen = fasttrap_mod_gen;
 		return;
 	}
 
 	mutex_exit(&bucket->ftb_mtx);
 
 	/*
 	 * We can't safely remove the tracepoint from the set of active
 	 * tracepoints until we've actually removed the fasttrap instruction
 	 * from the process's text. We can, however, operate on this
 	 * tracepoint secure in the knowledge that no other thread is going to
 	 * be looking at it since we hold P_PR_LOCK on the process if it's
 	 * live or we hold the provider lock on the process if it's dead and
 	 * gone.
 	 */
 
 	/*
 	 * We only need to remove the actual instruction if we're looking
 	 * at an existing process
 	 */
 	if (p != NULL) {
 		/*
 		 * If we fail to restore the instruction we need to kill
 		 * this process since it's in a completely unrecoverable
 		 * state.
 		 */
 		if (fasttrap_tracepoint_remove(p, tp) != 0)
 			fasttrap_sigtrap(p, NULL, pc);
 
 		/*
 		 * Decrement the count of the number of tracepoints active
 		 * in the victim process.
 		 */
 #ifdef illumos
 		ASSERT(p->p_proc_flag & P_PR_LOCK);
 #endif
 		p->p_dtrace_count--;
 
 		atomic_add_rel_64(&p->p_fasttrap_tp_gen, 1);
 	}
 
 	/*
 	 * Remove the probe from the hash table of active tracepoints.
 	 */
 	mutex_enter(&bucket->ftb_mtx);
 	pp = (fasttrap_tracepoint_t **)&bucket->ftb_data;
 	ASSERT(*pp != NULL);
 	while (*pp != tp) {
 		pp = &(*pp)->ftt_next;
 		ASSERT(*pp != NULL);
 	}
 
 	*pp = tp->ftt_next;
 	membar_producer();
 
 	mutex_exit(&bucket->ftb_mtx);
 
 	/*
 	 * Tag the modified probe with the generation in which it was changed.
 	 */
 	probe->ftp_gen = fasttrap_mod_gen;
 }
 
 static void
 fasttrap_enable_callbacks(void)
 {
 	/*
 	 * We don't have to play the rw lock game here because we're
 	 * providing something rather than taking something away --
 	 * we can be sure that no threads have tried to follow this
 	 * function pointer yet.
 	 */
 	mutex_enter(&fasttrap_count_mtx);
 	if (fasttrap_pid_count == 0) {
 		ASSERT(dtrace_pid_probe_ptr == NULL);
 		ASSERT(dtrace_return_probe_ptr == NULL);
 		dtrace_pid_probe_ptr = &fasttrap_pid_probe;
 		dtrace_return_probe_ptr = &fasttrap_return_probe;
 	}
 	ASSERT(dtrace_pid_probe_ptr == &fasttrap_pid_probe);
 	ASSERT(dtrace_return_probe_ptr == &fasttrap_return_probe);
 	fasttrap_pid_count++;
 	mutex_exit(&fasttrap_count_mtx);
 }
 
 static void
 fasttrap_disable_callbacks(void)
 {
-#ifdef illumos
-	ASSERT(MUTEX_HELD(&cpu_lock));
-#endif
-
-
 	mutex_enter(&fasttrap_count_mtx);
 	ASSERT(fasttrap_pid_count > 0);
 	fasttrap_pid_count--;
 	if (fasttrap_pid_count == 0) {
-#ifdef illumos
-		cpu_t *cur, *cpu = CPU;
-
-		for (cur = cpu->cpu_next_onln; cur != cpu;
-		    cur = cur->cpu_next_onln) {
-			rw_enter(&cur->cpu_ft_lock, RW_WRITER);
-		}
-#endif
+		/*
+		 * Synchronize with the breakpoint handler, which is careful to
+		 * enable interrupts only after loading the hook pointer.
+		 */
+		dtrace_sync();
 		dtrace_pid_probe_ptr = NULL;
 		dtrace_return_probe_ptr = NULL;
-#ifdef illumos
-		for (cur = cpu->cpu_next_onln; cur != cpu;
-		    cur = cur->cpu_next_onln) {
-			rw_exit(&cur->cpu_ft_lock);
-		}
-#endif
 	}
 	mutex_exit(&fasttrap_count_mtx);
 }
 
 /*ARGSUSED*/
 static void
 fasttrap_pid_enable(void *arg, dtrace_id_t id, void *parg)
 {
 	fasttrap_probe_t *probe = parg;
 	proc_t *p = NULL;
 	int i, rc;
 
 	ASSERT(probe != NULL);
 	ASSERT(!probe->ftp_enabled);
 	ASSERT(id == probe->ftp_id);
 #ifdef illumos
 	ASSERT(MUTEX_HELD(&cpu_lock));
 #endif
 
 	/*
 	 * Increment the count of enabled probes on this probe's provider;
 	 * the provider can't go away while the probe still exists. We
 	 * must increment this even if we aren't able to properly enable
 	 * this probe.
 	 */
 	mutex_enter(&probe->ftp_prov->ftp_mtx);
 	probe->ftp_prov->ftp_rcount++;
 	mutex_exit(&probe->ftp_prov->ftp_mtx);
 
 	/*
 	 * If this probe's provider is retired (meaning it was valid in a
 	 * previously exec'ed incarnation of this address space), bail out. The
 	 * provider can't go away while we're in this code path.
 	 */
 	if (probe->ftp_prov->ftp_retired)
 		return;
 
 	/*
 	 * If we can't find the process, it may be that we're in the context of
 	 * a fork in which the traced process is being born and we're copying
 	 * USDT probes. Otherwise, the process is gone so bail.
 	 */
 #ifdef illumos
 	if ((p = sprlock(probe->ftp_pid)) == NULL) {
 		if ((curproc->p_flag & SFORKING) == 0)
 			return;
 
 		mutex_enter(&pidlock);
 		p = prfind(probe->ftp_pid);
 
 		if (p == NULL) {
 			/*
 			 * So it's not that the target process is being born,
 			 * it's that it isn't there at all (and we simply
 			 * happen to be forking).  Anyway, we know that the
 			 * target is definitely gone, so bail out.
 			 */
 			mutex_exit(&pidlock);
 			return (0);
 		}
 
 		/*
 		 * Confirm that curproc is indeed forking the process in which
 		 * we're trying to enable probes.
 		 */
 		ASSERT(p->p_parent == curproc);
 		ASSERT(p->p_stat == SIDL);
 
 		mutex_enter(&p->p_lock);
 		mutex_exit(&pidlock);
 
 		sprlock_proc(p);
 	}
 
 	ASSERT(!(p->p_flag & SVFORK));
 	mutex_exit(&p->p_lock);
 #else
 	if (pget(probe->ftp_pid, PGET_HOLD | PGET_NOTWEXIT, &p) != 0)
 		return;
 #endif
 
 	/*
 	 * We have to enable the trap entry point before any user threads have
 	 * the chance to execute the trap instruction we're about to place
 	 * in their process's text.
 	 */
 	fasttrap_enable_callbacks();
 
 	/*
 	 * Enable all the tracepoints and add this probe's id to each
 	 * tracepoint's list of active probes.
 	 */
 	for (i = 0; i < probe->ftp_ntps; i++) {
 		if ((rc = fasttrap_tracepoint_enable(p, probe, i)) != 0) {
 			/*
 			 * If enabling the tracepoint failed completely,
 			 * we don't have to disable it; if the failure
 			 * was only partial we must disable it.
 			 */
 			if (rc == FASTTRAP_ENABLE_FAIL)
 				i--;
 			else
 				ASSERT(rc == FASTTRAP_ENABLE_PARTIAL);
 
 			/*
 			 * Back up and pull out all the tracepoints we've
 			 * created so far for this probe.
 			 */
 			while (i >= 0) {
 				fasttrap_tracepoint_disable(p, probe, i);
 				i--;
 			}
 
 #ifdef illumos
 			mutex_enter(&p->p_lock);
 			sprunlock(p);
 #else
 			PRELE(p);
 #endif
 
 			/*
 			 * Since we're not actually enabling this probe,
 			 * drop our reference on the trap table entry.
 			 */
 			fasttrap_disable_callbacks();
 			return;
 		}
 	}
 #ifdef illumos
 	mutex_enter(&p->p_lock);
 	sprunlock(p);
 #else
 	PRELE(p);
 #endif
 
 	probe->ftp_enabled = 1;
 }
 
 /*ARGSUSED*/
 static void
 fasttrap_pid_disable(void *arg, dtrace_id_t id, void *parg)
 {
 	fasttrap_probe_t *probe = parg;
 	fasttrap_provider_t *provider = probe->ftp_prov;
 	proc_t *p;
 	int i, whack = 0;
 
 	ASSERT(id == probe->ftp_id);
 
 	mutex_enter(&provider->ftp_mtx);
 
 	/*
 	 * We won't be able to acquire a /proc-esque lock on the process
 	 * iff the process is dead and gone. In this case, we rely on the
 	 * provider lock as a point of mutual exclusion to prevent other
 	 * DTrace consumers from disabling this probe.
 	 */
 	if (pget(probe->ftp_pid, PGET_HOLD | PGET_NOTWEXIT, &p) != 0)
 		p = NULL;
 
 	/*
 	 * Disable all the associated tracepoints (for fully enabled probes).
 	 */
 	if (probe->ftp_enabled) {
 		for (i = 0; i < probe->ftp_ntps; i++) {
 			fasttrap_tracepoint_disable(p, probe, i);
 		}
 	}
 
 	ASSERT(provider->ftp_rcount > 0);
 	provider->ftp_rcount--;
 
 	if (p != NULL) {
 		/*
 		 * Even though we may not be able to remove it entirely, we
 		 * mark this retired provider to get a chance to remove some
 		 * of the associated probes.
 		 */
 		if (provider->ftp_retired && !provider->ftp_marked)
 			whack = provider->ftp_marked = 1;
 		mutex_exit(&provider->ftp_mtx);
 	} else {
 		/*
 		 * If the process is dead, we're just waiting for the
 		 * last probe to be disabled to be able to free it.
 		 */
 		if (provider->ftp_rcount == 0 && !provider->ftp_marked)
 			whack = provider->ftp_marked = 1;
 		mutex_exit(&provider->ftp_mtx);
 	}
 
 	if (whack)
 		fasttrap_pid_cleanup();
 
 #ifdef __FreeBSD__
 	if (p != NULL)
 		PRELE(p);
 #endif
 	if (!probe->ftp_enabled)
 		return;
 
 	probe->ftp_enabled = 0;
 
 #ifdef illumos
 	ASSERT(MUTEX_HELD(&cpu_lock));
 #endif
 	fasttrap_disable_callbacks();
 }
 
 /*ARGSUSED*/
 static void
 fasttrap_pid_getargdesc(void *arg, dtrace_id_t id, void *parg,
     dtrace_argdesc_t *desc)
 {
 	fasttrap_probe_t *probe = parg;
 	char *str;
 	int i, ndx;
 
 	desc->dtargd_native[0] = '\0';
 	desc->dtargd_xlate[0] = '\0';
 
 	if (probe->ftp_prov->ftp_retired != 0 ||
 	    desc->dtargd_ndx >= probe->ftp_nargs) {
 		desc->dtargd_ndx = DTRACE_ARGNONE;
 		return;
 	}
 
 	ndx = (probe->ftp_argmap != NULL) ?
 	    probe->ftp_argmap[desc->dtargd_ndx] : desc->dtargd_ndx;
 
 	str = probe->ftp_ntypes;
 	for (i = 0; i < ndx; i++) {
 		str += strlen(str) + 1;
 	}
 
 	ASSERT(strlen(str + 1) < sizeof (desc->dtargd_native));
 	(void) strcpy(desc->dtargd_native, str);
 
 	if (probe->ftp_xtypes == NULL)
 		return;
 
 	str = probe->ftp_xtypes;
 	for (i = 0; i < desc->dtargd_ndx; i++) {
 		str += strlen(str) + 1;
 	}
 
 	ASSERT(strlen(str + 1) < sizeof (desc->dtargd_xlate));
 	(void) strcpy(desc->dtargd_xlate, str);
 }
 
 /*ARGSUSED*/
 static void
 fasttrap_pid_destroy(void *arg, dtrace_id_t id, void *parg)
 {
 	fasttrap_probe_t *probe = parg;
 	int i;
 	size_t size;
 
 	ASSERT(probe != NULL);
 	ASSERT(!probe->ftp_enabled);
 	ASSERT(fasttrap_total >= probe->ftp_ntps);
 
 	atomic_add_32(&fasttrap_total, -probe->ftp_ntps);
 	size = offsetof(fasttrap_probe_t, ftp_tps[probe->ftp_ntps]);
 
 	if (probe->ftp_gen + 1 >= fasttrap_mod_gen)
 		fasttrap_mod_barrier(probe->ftp_gen);
 
 	for (i = 0; i < probe->ftp_ntps; i++) {
 		kmem_free(probe->ftp_tps[i].fit_tp,
 		    sizeof (fasttrap_tracepoint_t));
 	}
 
 	kmem_free(probe, size);
 }
 
 
 static const dtrace_pattr_t pid_attr = {
 { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_ISA },
 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
 { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_ISA },
 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
 };
 
 static dtrace_pops_t pid_pops = {
 	.dtps_provide =		fasttrap_pid_provide,
 	.dtps_provide_module =	NULL,
 	.dtps_enable =		fasttrap_pid_enable,
 	.dtps_disable =		fasttrap_pid_disable,
 	.dtps_suspend =		NULL,
 	.dtps_resume =		NULL,
 	.dtps_getargdesc =	fasttrap_pid_getargdesc,
 	.dtps_getargval =	fasttrap_pid_getarg,
 	.dtps_usermode =	NULL,
 	.dtps_destroy =		fasttrap_pid_destroy
 };
 
 static dtrace_pops_t usdt_pops = {
 	.dtps_provide =		fasttrap_pid_provide,
 	.dtps_provide_module =	NULL,
 	.dtps_enable =		fasttrap_pid_enable,
 	.dtps_disable =		fasttrap_pid_disable,
 	.dtps_suspend =		NULL,
 	.dtps_resume =		NULL,
 	.dtps_getargdesc =	fasttrap_pid_getargdesc,
 	.dtps_getargval =	fasttrap_usdt_getarg,
 	.dtps_usermode =	NULL,
 	.dtps_destroy =		fasttrap_pid_destroy
 };
 
 static fasttrap_proc_t *
 fasttrap_proc_lookup(pid_t pid)
 {
 	fasttrap_bucket_t *bucket;
 	fasttrap_proc_t *fprc, *new_fprc;
 
 
 	bucket = &fasttrap_procs.fth_table[FASTTRAP_PROCS_INDEX(pid)];
 	mutex_enter(&bucket->ftb_mtx);
 
 	for (fprc = bucket->ftb_data; fprc != NULL; fprc = fprc->ftpc_next) {
 		if (fprc->ftpc_pid == pid && fprc->ftpc_acount != 0) {
 			mutex_enter(&fprc->ftpc_mtx);
 			mutex_exit(&bucket->ftb_mtx);
 			fprc->ftpc_rcount++;
 			atomic_inc_64(&fprc->ftpc_acount);
 			ASSERT(fprc->ftpc_acount <= fprc->ftpc_rcount);
 			mutex_exit(&fprc->ftpc_mtx);
 
 			return (fprc);
 		}
 	}
 
 	/*
 	 * Drop the bucket lock so we don't try to perform a sleeping
 	 * allocation under it.
 	 */
 	mutex_exit(&bucket->ftb_mtx);
 
 	new_fprc = kmem_zalloc(sizeof (fasttrap_proc_t), KM_SLEEP);
 	new_fprc->ftpc_pid = pid;
 	new_fprc->ftpc_rcount = 1;
 	new_fprc->ftpc_acount = 1;
 #ifndef illumos
 	mutex_init(&new_fprc->ftpc_mtx, "fasttrap proc mtx", MUTEX_DEFAULT,
 	    NULL);
 #endif
 
 	mutex_enter(&bucket->ftb_mtx);
 
 	/*
 	 * Take another lap through the list to make sure a proc hasn't
 	 * been created for this pid while we weren't under the bucket lock.
 	 */
 	for (fprc = bucket->ftb_data; fprc != NULL; fprc = fprc->ftpc_next) {
 		if (fprc->ftpc_pid == pid && fprc->ftpc_acount != 0) {
 			mutex_enter(&fprc->ftpc_mtx);
 			mutex_exit(&bucket->ftb_mtx);
 			fprc->ftpc_rcount++;
 			atomic_inc_64(&fprc->ftpc_acount);
 			ASSERT(fprc->ftpc_acount <= fprc->ftpc_rcount);
 			mutex_exit(&fprc->ftpc_mtx);
 
 			kmem_free(new_fprc, sizeof (fasttrap_proc_t));
 
 			return (fprc);
 		}
 	}
 
 	new_fprc->ftpc_next = bucket->ftb_data;
 	bucket->ftb_data = new_fprc;
 
 	mutex_exit(&bucket->ftb_mtx);
 
 	return (new_fprc);
 }
 
 static void
 fasttrap_proc_release(fasttrap_proc_t *proc)
 {
 	fasttrap_bucket_t *bucket;
 	fasttrap_proc_t *fprc, **fprcp;
 	pid_t pid = proc->ftpc_pid;
 #ifndef illumos
 	fasttrap_scrblock_t *scrblk, *scrblktmp;
 	fasttrap_scrspace_t *scrspc, *scrspctmp;
 	struct proc *p;
 	struct thread *td;
 #endif
 
 	mutex_enter(&proc->ftpc_mtx);
 
 	ASSERT(proc->ftpc_rcount != 0);
 	ASSERT(proc->ftpc_acount <= proc->ftpc_rcount);
 
 	if (--proc->ftpc_rcount != 0) {
 		mutex_exit(&proc->ftpc_mtx);
 		return;
 	}
 
 #ifndef illumos
 	/*
 	 * Free all structures used to manage per-thread scratch space.
 	 */
 	LIST_FOREACH_SAFE(scrblk, &proc->ftpc_scrblks, ftsb_next,
 	    scrblktmp) {
 		LIST_REMOVE(scrblk, ftsb_next);
 		free(scrblk, M_SOLARIS);
 	}
 	LIST_FOREACH_SAFE(scrspc, &proc->ftpc_fscr, ftss_next, scrspctmp) {
 		LIST_REMOVE(scrspc, ftss_next);
 		free(scrspc, M_SOLARIS);
 	}
 	LIST_FOREACH_SAFE(scrspc, &proc->ftpc_ascr, ftss_next, scrspctmp) {
 		LIST_REMOVE(scrspc, ftss_next);
 		free(scrspc, M_SOLARIS);
 	}
 
 	if ((p = pfind(pid)) != NULL) {
 		FOREACH_THREAD_IN_PROC(p, td)
 			td->t_dtrace_sscr = NULL;
 		PROC_UNLOCK(p);
 	}
 #endif
 
 	mutex_exit(&proc->ftpc_mtx);
 
 	/*
 	 * There should definitely be no live providers associated with this
 	 * process at this point.
 	 */
 	ASSERT(proc->ftpc_acount == 0);
 
 	bucket = &fasttrap_procs.fth_table[FASTTRAP_PROCS_INDEX(pid)];
 	mutex_enter(&bucket->ftb_mtx);
 
 	fprcp = (fasttrap_proc_t **)&bucket->ftb_data;
 	while ((fprc = *fprcp) != NULL) {
 		if (fprc == proc)
 			break;
 
 		fprcp = &fprc->ftpc_next;
 	}
 
 	/*
 	 * Something strange has happened if we can't find the proc.
 	 */
 	ASSERT(fprc != NULL);
 
 	*fprcp = fprc->ftpc_next;
 
 	mutex_exit(&bucket->ftb_mtx);
 
 	kmem_free(fprc, sizeof (fasttrap_proc_t));
 }
 
 /*
  * Lookup a fasttrap-managed provider based on its name and associated pid.
  * If the pattr argument is non-NULL, this function instantiates the provider
  * if it doesn't exist otherwise it returns NULL. The provider is returned
  * with its lock held.
  */
 static fasttrap_provider_t *
 fasttrap_provider_lookup(pid_t pid, const char *name,
     const dtrace_pattr_t *pattr)
 {
 	fasttrap_provider_t *fp, *new_fp = NULL;
 	fasttrap_bucket_t *bucket;
 	char provname[DTRACE_PROVNAMELEN];
 	proc_t *p;
 	cred_t *cred;
 
 	ASSERT(strlen(name) < sizeof (fp->ftp_name));
 	ASSERT(pattr != NULL);
 
 	bucket = &fasttrap_provs.fth_table[FASTTRAP_PROVS_INDEX(pid, name)];
 	mutex_enter(&bucket->ftb_mtx);
 
 	/*
 	 * Take a lap through the list and return the match if we find it.
 	 */
 	for (fp = bucket->ftb_data; fp != NULL; fp = fp->ftp_next) {
 		if (fp->ftp_pid == pid && strcmp(fp->ftp_name, name) == 0 &&
 		    !fp->ftp_retired) {
 			mutex_enter(&fp->ftp_mtx);
 			mutex_exit(&bucket->ftb_mtx);
 			return (fp);
 		}
 	}
 
 	/*
 	 * Drop the bucket lock so we don't try to perform a sleeping
 	 * allocation under it.
 	 */
 	mutex_exit(&bucket->ftb_mtx);
 
 	/*
 	 * Make sure the process exists, isn't a child created as the result
 	 * of a vfork(2), and isn't a zombie (but may be in fork).
 	 */
 	if ((p = pfind(pid)) == NULL)
 		return (NULL);
 
 	/*
 	 * Increment p_dtrace_probes so that the process knows to inform us
 	 * when it exits or execs. fasttrap_provider_free() decrements this
 	 * when we're done with this provider.
 	 */
 	p->p_dtrace_probes++;
 
 	/*
 	 * Grab the credentials for this process so we have
 	 * something to pass to dtrace_register().
 	 */
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	crhold(p->p_ucred);
 	cred = p->p_ucred;
 	PROC_UNLOCK(p);
 
 	new_fp = kmem_zalloc(sizeof (fasttrap_provider_t), KM_SLEEP);
 	new_fp->ftp_pid = pid;
 	new_fp->ftp_proc = fasttrap_proc_lookup(pid);
 #ifndef illumos
 	mutex_init(&new_fp->ftp_mtx, "provider mtx", MUTEX_DEFAULT, NULL);
 	mutex_init(&new_fp->ftp_cmtx, "lock on creating", MUTEX_DEFAULT, NULL);
 #endif
 
 	ASSERT(new_fp->ftp_proc != NULL);
 
 	mutex_enter(&bucket->ftb_mtx);
 
 	/*
 	 * Take another lap through the list to make sure a provider hasn't
 	 * been created for this pid while we weren't under the bucket lock.
 	 */
 	for (fp = bucket->ftb_data; fp != NULL; fp = fp->ftp_next) {
 		if (fp->ftp_pid == pid && strcmp(fp->ftp_name, name) == 0 &&
 		    !fp->ftp_retired) {
 			mutex_enter(&fp->ftp_mtx);
 			mutex_exit(&bucket->ftb_mtx);
 			fasttrap_provider_free(new_fp);
 			crfree(cred);
 			return (fp);
 		}
 	}
 
 	(void) strcpy(new_fp->ftp_name, name);
 
 	/*
 	 * Fail and return NULL if either the provider name is too long
 	 * or we fail to register this new provider with the DTrace
 	 * framework. Note that this is the only place we ever construct
 	 * the full provider name -- we keep it in pieces in the provider
 	 * structure.
 	 */
 	if (snprintf(provname, sizeof (provname), "%s%u", name, (uint_t)pid) >=
 	    sizeof (provname) ||
 	    dtrace_register(provname, pattr,
 	    DTRACE_PRIV_PROC | DTRACE_PRIV_OWNER | DTRACE_PRIV_ZONEOWNER, cred,
 	    pattr == &pid_attr ? &pid_pops : &usdt_pops, new_fp,
 	    &new_fp->ftp_provid) != 0) {
 		mutex_exit(&bucket->ftb_mtx);
 		fasttrap_provider_free(new_fp);
 		crfree(cred);
 		return (NULL);
 	}
 
 	new_fp->ftp_next = bucket->ftb_data;
 	bucket->ftb_data = new_fp;
 
 	mutex_enter(&new_fp->ftp_mtx);
 	mutex_exit(&bucket->ftb_mtx);
 
 	crfree(cred);
 	return (new_fp);
 }
 
 static void
 fasttrap_provider_free(fasttrap_provider_t *provider)
 {
 	pid_t pid = provider->ftp_pid;
 	proc_t *p;
 
 	/*
 	 * There need to be no associated enabled probes, no consumers
 	 * creating probes, and no meta providers referencing this provider.
 	 */
 	ASSERT(provider->ftp_rcount == 0);
 	ASSERT(provider->ftp_ccount == 0);
 	ASSERT(provider->ftp_mcount == 0);
 
 	/*
 	 * If this provider hasn't been retired, we need to explicitly drop the
 	 * count of active providers on the associated process structure.
 	 */
 	if (!provider->ftp_retired) {
 		atomic_dec_64(&provider->ftp_proc->ftpc_acount);
 		ASSERT(provider->ftp_proc->ftpc_acount <
 		    provider->ftp_proc->ftpc_rcount);
 	}
 
 	fasttrap_proc_release(provider->ftp_proc);
 
 #ifndef illumos
 	mutex_destroy(&provider->ftp_mtx);
 	mutex_destroy(&provider->ftp_cmtx);
 #endif
 	kmem_free(provider, sizeof (fasttrap_provider_t));
 
 	/*
 	 * Decrement p_dtrace_probes on the process whose provider we're
 	 * freeing. We don't have to worry about clobbering somone else's
 	 * modifications to it because we have locked the bucket that
 	 * corresponds to this process's hash chain in the provider hash
 	 * table. Don't sweat it if we can't find the process.
 	 */
 	if ((p = pfind(pid)) == NULL) {
 		return;
 	}
 
 	p->p_dtrace_probes--;
 #ifndef illumos
 	PROC_UNLOCK(p);
 #endif
 }
 
 static void
 fasttrap_provider_retire(pid_t pid, const char *name, int mprov)
 {
 	fasttrap_provider_t *fp;
 	fasttrap_bucket_t *bucket;
 	dtrace_provider_id_t provid;
 
 	ASSERT(strlen(name) < sizeof (fp->ftp_name));
 
 	bucket = &fasttrap_provs.fth_table[FASTTRAP_PROVS_INDEX(pid, name)];
 	mutex_enter(&bucket->ftb_mtx);
 
 	for (fp = bucket->ftb_data; fp != NULL; fp = fp->ftp_next) {
 		if (fp->ftp_pid == pid && strcmp(fp->ftp_name, name) == 0 &&
 		    !fp->ftp_retired)
 			break;
 	}
 
 	if (fp == NULL) {
 		mutex_exit(&bucket->ftb_mtx);
 		return;
 	}
 
 	mutex_enter(&fp->ftp_mtx);
 	ASSERT(!mprov || fp->ftp_mcount > 0);
 	if (mprov && --fp->ftp_mcount != 0)  {
 		mutex_exit(&fp->ftp_mtx);
 		mutex_exit(&bucket->ftb_mtx);
 		return;
 	}
 
 	/*
 	 * Mark the provider to be removed in our post-processing step, mark it
 	 * retired, and drop the active count on its proc. Marking it indicates
 	 * that we should try to remove it; setting the retired flag indicates
 	 * that we're done with this provider; dropping the active the proc
 	 * releases our hold, and when this reaches zero (as it will during
 	 * exit or exec) the proc and associated providers become defunct.
 	 *
 	 * We obviously need to take the bucket lock before the provider lock
 	 * to perform the lookup, but we need to drop the provider lock
 	 * before calling into the DTrace framework since we acquire the
 	 * provider lock in callbacks invoked from the DTrace framework. The
 	 * bucket lock therefore protects the integrity of the provider hash
 	 * table.
 	 */
 	atomic_dec_64(&fp->ftp_proc->ftpc_acount);
 	ASSERT(fp->ftp_proc->ftpc_acount < fp->ftp_proc->ftpc_rcount);
 
 	fp->ftp_retired = 1;
 	fp->ftp_marked = 1;
 	provid = fp->ftp_provid;
 	mutex_exit(&fp->ftp_mtx);
 
 	/*
 	 * We don't have to worry about invalidating the same provider twice
 	 * since fasttrap_provider_lookup() will ignore provider that have
 	 * been marked as retired.
 	 */
 	dtrace_invalidate(provid);
 
 	mutex_exit(&bucket->ftb_mtx);
 
 	fasttrap_pid_cleanup();
 }
 
 static int
 fasttrap_uint32_cmp(const void *ap, const void *bp)
 {
 	return (*(const uint32_t *)ap - *(const uint32_t *)bp);
 }
 
 static int
 fasttrap_uint64_cmp(const void *ap, const void *bp)
 {
 	return (*(const uint64_t *)ap - *(const uint64_t *)bp);
 }
 
 static int
 fasttrap_add_probe(fasttrap_probe_spec_t *pdata)
 {
 	fasttrap_provider_t *provider;
 	fasttrap_probe_t *pp;
 	fasttrap_tracepoint_t *tp;
 	char *name;
 	int i, aframes = 0, whack;
 
 	/*
 	 * There needs to be at least one desired trace point.
 	 */
 	if (pdata->ftps_noffs == 0)
 		return (EINVAL);
 
 	switch (pdata->ftps_type) {
 	case DTFTP_ENTRY:
 		name = "entry";
 		aframes = FASTTRAP_ENTRY_AFRAMES;
 		break;
 	case DTFTP_RETURN:
 		name = "return";
 		aframes = FASTTRAP_RETURN_AFRAMES;
 		break;
 	case DTFTP_OFFSETS:
 		name = NULL;
 		break;
 	default:
 		return (EINVAL);
 	}
 
 	if ((provider = fasttrap_provider_lookup(pdata->ftps_pid,
 	    FASTTRAP_PID_NAME, &pid_attr)) == NULL)
 		return (ESRCH);
 
 	/*
 	 * Increment this reference count to indicate that a consumer is
 	 * actively adding a new probe associated with this provider. This
 	 * prevents the provider from being deleted -- we'll need to check
 	 * for pending deletions when we drop this reference count.
 	 */
 	provider->ftp_ccount++;
 	mutex_exit(&provider->ftp_mtx);
 
 	/*
 	 * Grab the creation lock to ensure consistency between calls to
 	 * dtrace_probe_lookup() and dtrace_probe_create() in the face of
 	 * other threads creating probes. We must drop the provider lock
 	 * before taking this lock to avoid a three-way deadlock with the
 	 * DTrace framework.
 	 */
 	mutex_enter(&provider->ftp_cmtx);
 
 	if (name == NULL) {
 		for (i = 0; i < pdata->ftps_noffs; i++) {
 			char name_str[17];
 
 			(void) sprintf(name_str, "%llx",
 			    (unsigned long long)pdata->ftps_offs[i]);
 
 			if (dtrace_probe_lookup(provider->ftp_provid,
 			    pdata->ftps_mod, pdata->ftps_func, name_str) != 0)
 				continue;
 
 			atomic_inc_32(&fasttrap_total);
 
 			if (fasttrap_total > fasttrap_max) {
 				atomic_dec_32(&fasttrap_total);
 				goto no_mem;
 			}
 
 			pp = kmem_zalloc(sizeof (fasttrap_probe_t), KM_SLEEP);
 
 			pp->ftp_prov = provider;
 			pp->ftp_faddr = pdata->ftps_pc;
 			pp->ftp_fsize = pdata->ftps_size;
 			pp->ftp_pid = pdata->ftps_pid;
 			pp->ftp_ntps = 1;
 
 			tp = kmem_zalloc(sizeof (fasttrap_tracepoint_t),
 			    KM_SLEEP);
 
 			tp->ftt_proc = provider->ftp_proc;
 			tp->ftt_pc = pdata->ftps_offs[i] + pdata->ftps_pc;
 			tp->ftt_pid = pdata->ftps_pid;
 
 			pp->ftp_tps[0].fit_tp = tp;
 			pp->ftp_tps[0].fit_id.fti_probe = pp;
 			pp->ftp_tps[0].fit_id.fti_ptype = pdata->ftps_type;
 
 			pp->ftp_id = dtrace_probe_create(provider->ftp_provid,
 			    pdata->ftps_mod, pdata->ftps_func, name_str,
 			    FASTTRAP_OFFSET_AFRAMES, pp);
 		}
 
 	} else if (dtrace_probe_lookup(provider->ftp_provid, pdata->ftps_mod,
 	    pdata->ftps_func, name) == 0) {
 		atomic_add_32(&fasttrap_total, pdata->ftps_noffs);
 
 		if (fasttrap_total > fasttrap_max) {
 			atomic_add_32(&fasttrap_total, -pdata->ftps_noffs);
 			goto no_mem;
 		}
 
 		/*
 		 * Make sure all tracepoint program counter values are unique.
 		 * We later assume that each probe has exactly one tracepoint
 		 * for a given pc.
 		 */
 		qsort(pdata->ftps_offs, pdata->ftps_noffs,
 		    sizeof (uint64_t), fasttrap_uint64_cmp);
 		for (i = 1; i < pdata->ftps_noffs; i++) {
 			if (pdata->ftps_offs[i] > pdata->ftps_offs[i - 1])
 				continue;
 
 			atomic_add_32(&fasttrap_total, -pdata->ftps_noffs);
 			goto no_mem;
 		}
 
 		ASSERT(pdata->ftps_noffs > 0);
 		pp = kmem_zalloc(offsetof(fasttrap_probe_t,
 		    ftp_tps[pdata->ftps_noffs]), KM_SLEEP);
 
 		pp->ftp_prov = provider;
 		pp->ftp_faddr = pdata->ftps_pc;
 		pp->ftp_fsize = pdata->ftps_size;
 		pp->ftp_pid = pdata->ftps_pid;
 		pp->ftp_ntps = pdata->ftps_noffs;
 
 		for (i = 0; i < pdata->ftps_noffs; i++) {
 			tp = kmem_zalloc(sizeof (fasttrap_tracepoint_t),
 			    KM_SLEEP);
 
 			tp->ftt_proc = provider->ftp_proc;
 			tp->ftt_pc = pdata->ftps_offs[i] + pdata->ftps_pc;
 			tp->ftt_pid = pdata->ftps_pid;
 
 			pp->ftp_tps[i].fit_tp = tp;
 			pp->ftp_tps[i].fit_id.fti_probe = pp;
 			pp->ftp_tps[i].fit_id.fti_ptype = pdata->ftps_type;
 		}
 
 		pp->ftp_id = dtrace_probe_create(provider->ftp_provid,
 		    pdata->ftps_mod, pdata->ftps_func, name, aframes, pp);
 	}
 
 	mutex_exit(&provider->ftp_cmtx);
 
 	/*
 	 * We know that the provider is still valid since we incremented the
 	 * creation reference count. If someone tried to clean up this provider
 	 * while we were using it (e.g. because the process called exec(2) or
 	 * exit(2)), take note of that and try to clean it up now.
 	 */
 	mutex_enter(&provider->ftp_mtx);
 	provider->ftp_ccount--;
 	whack = provider->ftp_retired;
 	mutex_exit(&provider->ftp_mtx);
 
 	if (whack)
 		fasttrap_pid_cleanup();
 
 	return (0);
 
 no_mem:
 	/*
 	 * If we've exhausted the allowable resources, we'll try to remove
 	 * this provider to free some up. This is to cover the case where
 	 * the user has accidentally created many more probes than was
 	 * intended (e.g. pid123:::).
 	 */
 	mutex_exit(&provider->ftp_cmtx);
 	mutex_enter(&provider->ftp_mtx);
 	provider->ftp_ccount--;
 	provider->ftp_marked = 1;
 	mutex_exit(&provider->ftp_mtx);
 
 	fasttrap_pid_cleanup();
 
 	return (ENOMEM);
 }
 
 /*ARGSUSED*/
 static void *
 fasttrap_meta_provide(void *arg, dtrace_helper_provdesc_t *dhpv, pid_t pid)
 {
 	fasttrap_provider_t *provider;
 
 	/*
 	 * A 32-bit unsigned integer (like a pid for example) can be
 	 * expressed in 10 or fewer decimal digits. Make sure that we'll
 	 * have enough space for the provider name.
 	 */
 	if (strlen(dhpv->dthpv_provname) + 10 >=
 	    sizeof (provider->ftp_name)) {
 		printf("failed to instantiate provider %s: "
 		    "name too long to accomodate pid", dhpv->dthpv_provname);
 		return (NULL);
 	}
 
 	/*
 	 * Don't let folks spoof the true pid provider.
 	 */
 	if (strcmp(dhpv->dthpv_provname, FASTTRAP_PID_NAME) == 0) {
 		printf("failed to instantiate provider %s: "
 		    "%s is an invalid name", dhpv->dthpv_provname,
 		    FASTTRAP_PID_NAME);
 		return (NULL);
 	}
 
 	/*
 	 * The highest stability class that fasttrap supports is ISA; cap
 	 * the stability of the new provider accordingly.
 	 */
 	if (dhpv->dthpv_pattr.dtpa_provider.dtat_class > DTRACE_CLASS_ISA)
 		dhpv->dthpv_pattr.dtpa_provider.dtat_class = DTRACE_CLASS_ISA;
 	if (dhpv->dthpv_pattr.dtpa_mod.dtat_class > DTRACE_CLASS_ISA)
 		dhpv->dthpv_pattr.dtpa_mod.dtat_class = DTRACE_CLASS_ISA;
 	if (dhpv->dthpv_pattr.dtpa_func.dtat_class > DTRACE_CLASS_ISA)
 		dhpv->dthpv_pattr.dtpa_func.dtat_class = DTRACE_CLASS_ISA;
 	if (dhpv->dthpv_pattr.dtpa_name.dtat_class > DTRACE_CLASS_ISA)
 		dhpv->dthpv_pattr.dtpa_name.dtat_class = DTRACE_CLASS_ISA;
 	if (dhpv->dthpv_pattr.dtpa_args.dtat_class > DTRACE_CLASS_ISA)
 		dhpv->dthpv_pattr.dtpa_args.dtat_class = DTRACE_CLASS_ISA;
 
 	if ((provider = fasttrap_provider_lookup(pid, dhpv->dthpv_provname,
 	    &dhpv->dthpv_pattr)) == NULL) {
 		printf("failed to instantiate provider %s for "
 		    "process %u",  dhpv->dthpv_provname, (uint_t)pid);
 		return (NULL);
 	}
 
 	/*
 	 * Up the meta provider count so this provider isn't removed until
 	 * the meta provider has been told to remove it.
 	 */
 	provider->ftp_mcount++;
 
 	mutex_exit(&provider->ftp_mtx);
 
 	return (provider);
 }
 
 /*
  * We know a few things about our context here:  we know that the probe being
  * created doesn't already exist (DTrace won't load DOF at the same address
  * twice, even if explicitly told to do so) and we know that we are
  * single-threaded with respect to the meta provider machinery. Knowing that
  * this is a new probe and that there is no way for us to race with another
  * operation on this provider allows us an important optimization: we need not
  * lookup a probe before adding it.  Saving this lookup is important because
  * this code is in the fork path for processes with USDT probes, and lookups
  * here are potentially very expensive because of long hash conflicts on
  * module, function and name (DTrace doesn't hash on provider name).
  */
 /*ARGSUSED*/
 static void
 fasttrap_meta_create_probe(void *arg, void *parg,
     dtrace_helper_probedesc_t *dhpb)
 {
 	fasttrap_provider_t *provider = parg;
 	fasttrap_probe_t *pp;
 	fasttrap_tracepoint_t *tp;
 	int i, j;
 	uint32_t ntps;
 
 	/*
 	 * Since the meta provider count is non-zero we don't have to worry
 	 * about this provider disappearing.
 	 */
 	ASSERT(provider->ftp_mcount > 0);
 
 	/*
 	 * The offsets must be unique.
 	 */
 	qsort(dhpb->dthpb_offs, dhpb->dthpb_noffs, sizeof (uint32_t),
 	    fasttrap_uint32_cmp);
 	for (i = 1; i < dhpb->dthpb_noffs; i++) {
 		if (dhpb->dthpb_base + dhpb->dthpb_offs[i] <=
 		    dhpb->dthpb_base + dhpb->dthpb_offs[i - 1])
 			return;
 	}
 
 	qsort(dhpb->dthpb_enoffs, dhpb->dthpb_nenoffs, sizeof (uint32_t),
 	    fasttrap_uint32_cmp);
 	for (i = 1; i < dhpb->dthpb_nenoffs; i++) {
 		if (dhpb->dthpb_base + dhpb->dthpb_enoffs[i] <=
 		    dhpb->dthpb_base + dhpb->dthpb_enoffs[i - 1])
 			return;
 	}
 
 	ntps = dhpb->dthpb_noffs + dhpb->dthpb_nenoffs;
 	ASSERT(ntps > 0);
 
 	atomic_add_32(&fasttrap_total, ntps);
 
 	if (fasttrap_total > fasttrap_max) {
 		atomic_add_32(&fasttrap_total, -ntps);
 		return;
 	}
 
 	pp = kmem_zalloc(offsetof(fasttrap_probe_t, ftp_tps[ntps]), KM_SLEEP);
 
 	pp->ftp_prov = provider;
 	pp->ftp_pid = provider->ftp_pid;
 	pp->ftp_ntps = ntps;
 	pp->ftp_nargs = dhpb->dthpb_xargc;
 	pp->ftp_xtypes = dhpb->dthpb_xtypes;
 	pp->ftp_ntypes = dhpb->dthpb_ntypes;
 
 	/*
 	 * First create a tracepoint for each actual point of interest.
 	 */
 	for (i = 0; i < dhpb->dthpb_noffs; i++) {
 		tp = kmem_zalloc(sizeof (fasttrap_tracepoint_t), KM_SLEEP);
 
 		tp->ftt_proc = provider->ftp_proc;
 		tp->ftt_pc = dhpb->dthpb_base + dhpb->dthpb_offs[i];
 		tp->ftt_pid = provider->ftp_pid;
 
 		pp->ftp_tps[i].fit_tp = tp;
 		pp->ftp_tps[i].fit_id.fti_probe = pp;
 #ifdef __sparc
 		pp->ftp_tps[i].fit_id.fti_ptype = DTFTP_POST_OFFSETS;
 #else
 		pp->ftp_tps[i].fit_id.fti_ptype = DTFTP_OFFSETS;
 #endif
 	}
 
 	/*
 	 * Then create a tracepoint for each is-enabled point.
 	 */
 	for (j = 0; i < ntps; i++, j++) {
 		tp = kmem_zalloc(sizeof (fasttrap_tracepoint_t), KM_SLEEP);
 
 		tp->ftt_proc = provider->ftp_proc;
 		tp->ftt_pc = dhpb->dthpb_base + dhpb->dthpb_enoffs[j];
 		tp->ftt_pid = provider->ftp_pid;
 
 		pp->ftp_tps[i].fit_tp = tp;
 		pp->ftp_tps[i].fit_id.fti_probe = pp;
 		pp->ftp_tps[i].fit_id.fti_ptype = DTFTP_IS_ENABLED;
 	}
 
 	/*
 	 * If the arguments are shuffled around we set the argument remapping
 	 * table. Later, when the probe fires, we only remap the arguments
 	 * if the table is non-NULL.
 	 */
 	for (i = 0; i < dhpb->dthpb_xargc; i++) {
 		if (dhpb->dthpb_args[i] != i) {
 			pp->ftp_argmap = dhpb->dthpb_args;
 			break;
 		}
 	}
 
 	/*
 	 * The probe is fully constructed -- register it with DTrace.
 	 */
 	pp->ftp_id = dtrace_probe_create(provider->ftp_provid, dhpb->dthpb_mod,
 	    dhpb->dthpb_func, dhpb->dthpb_name, FASTTRAP_OFFSET_AFRAMES, pp);
 }
 
 /*ARGSUSED*/
 static void
 fasttrap_meta_remove(void *arg, dtrace_helper_provdesc_t *dhpv, pid_t pid)
 {
 	/*
 	 * Clean up the USDT provider. There may be active consumers of the
 	 * provider busy adding probes, no damage will actually befall the
 	 * provider until that count has dropped to zero. This just puts
 	 * the provider on death row.
 	 */
 	fasttrap_provider_retire(pid, dhpv->dthpv_provname, 1);
 }
 
 static dtrace_mops_t fasttrap_mops = {
 	.dtms_create_probe =	fasttrap_meta_create_probe,
 	.dtms_provide_pid =	fasttrap_meta_provide,
 	.dtms_remove_pid =	fasttrap_meta_remove
 };
 
 /*ARGSUSED*/
 static int
 fasttrap_open(struct cdev *dev __unused, int oflags __unused,
     int devtype __unused, struct thread *td __unused)
 {
 	return (0);
 }
 
 /*ARGSUSED*/
 static int
 fasttrap_ioctl(struct cdev *dev, u_long cmd, caddr_t arg, int fflag,
     struct thread *td)
 {
 	if (!dtrace_attached())
 		return (EAGAIN);
 
 	if (cmd == FASTTRAPIOC_MAKEPROBE) {
 		fasttrap_probe_spec_t *uprobe = *(fasttrap_probe_spec_t **)arg;
 		fasttrap_probe_spec_t *probe;
 		uint64_t noffs;
 		size_t size;
 		int ret, err;
 
 		if (copyin(&uprobe->ftps_noffs, &noffs,
 		    sizeof (uprobe->ftps_noffs)))
 			return (EFAULT);
 
 		/*
 		 * Probes must have at least one tracepoint.
 		 */
 		if (noffs == 0)
 			return (EINVAL);
 
 		size = sizeof (fasttrap_probe_spec_t) +
 		    sizeof (probe->ftps_offs[0]) * (noffs - 1);
 
 		if (size > 1024 * 1024)
 			return (ENOMEM);
 
 		probe = kmem_alloc(size, KM_SLEEP);
 
 		if (copyin(uprobe, probe, size) != 0 ||
 		    probe->ftps_noffs != noffs) {
 			kmem_free(probe, size);
 			return (EFAULT);
 		}
 
 		/*
 		 * Verify that the function and module strings contain no
 		 * funny characters.
 		 */
 		if (u8_validate(probe->ftps_func, strlen(probe->ftps_func),
 		    NULL, U8_VALIDATE_ENTIRE, &err) < 0) {
 			ret = EINVAL;
 			goto err;
 		}
 
 		if (u8_validate(probe->ftps_mod, strlen(probe->ftps_mod),
 		    NULL, U8_VALIDATE_ENTIRE, &err) < 0) {
 			ret = EINVAL;
 			goto err;
 		}
 
 #ifdef notyet
 		if (!PRIV_POLICY_CHOICE(cr, PRIV_ALL, B_FALSE)) {
 			proc_t *p;
 			pid_t pid = probe->ftps_pid;
 
 			mutex_enter(&pidlock);
 			/*
 			 * Report an error if the process doesn't exist
 			 * or is actively being birthed.
 			 */
 			if ((p = pfind(pid)) == NULL || p->p_stat == SIDL) {
 				mutex_exit(&pidlock);
 				return (ESRCH);
 			}
 			mutex_enter(&p->p_lock);
 			mutex_exit(&pidlock);
 
 			if ((ret = priv_proc_cred_perm(cr, p, NULL,
 			    VREAD | VWRITE)) != 0) {
 				mutex_exit(&p->p_lock);
 				return (ret);
 			}
 			mutex_exit(&p->p_lock);
 		}
 #endif /* notyet */
 
 		ret = fasttrap_add_probe(probe);
 err:
 		kmem_free(probe, size);
 
 		return (ret);
 
 	} else if (cmd == FASTTRAPIOC_GETINSTR) {
 		fasttrap_instr_query_t instr;
 		fasttrap_tracepoint_t *tp;
 		uint_t index;
 #ifdef notyet
 		int ret;
 #endif
 
 #ifdef illumos
 		if (copyin((void *)arg, &instr, sizeof (instr)) != 0)
 			return (EFAULT);
 #endif
 
 #ifdef notyet
 		if (!PRIV_POLICY_CHOICE(cr, PRIV_ALL, B_FALSE)) {
 			proc_t *p;
 			pid_t pid = instr.ftiq_pid;
 
 			mutex_enter(&pidlock);
 			/*
 			 * Report an error if the process doesn't exist
 			 * or is actively being birthed.
 			 */
 			if ((p == pfind(pid)) == NULL || p->p_stat == SIDL) {
 				mutex_exit(&pidlock);
 				return (ESRCH);
 			}
 			mutex_enter(&p->p_lock);
 			mutex_exit(&pidlock);
 
 			if ((ret = priv_proc_cred_perm(cr, p, NULL,
 			    VREAD)) != 0) {
 				mutex_exit(&p->p_lock);
 				return (ret);
 			}
 
 			mutex_exit(&p->p_lock);
 		}
 #endif /* notyet */
 
 		index = FASTTRAP_TPOINTS_INDEX(instr.ftiq_pid, instr.ftiq_pc);
 
 		mutex_enter(&fasttrap_tpoints.fth_table[index].ftb_mtx);
 		tp = fasttrap_tpoints.fth_table[index].ftb_data;
 		while (tp != NULL) {
 			if (instr.ftiq_pid == tp->ftt_pid &&
 			    instr.ftiq_pc == tp->ftt_pc &&
 			    tp->ftt_proc->ftpc_acount != 0)
 				break;
 
 			tp = tp->ftt_next;
 		}
 
 		if (tp == NULL) {
 			mutex_exit(&fasttrap_tpoints.fth_table[index].ftb_mtx);
 			return (ENOENT);
 		}
 
 		bcopy(&tp->ftt_instr, &instr.ftiq_instr,
 		    sizeof (instr.ftiq_instr));
 		mutex_exit(&fasttrap_tpoints.fth_table[index].ftb_mtx);
 
 		if (copyout(&instr, (void *)arg, sizeof (instr)) != 0)
 			return (EFAULT);
 
 		return (0);
 	}
 
 	return (EINVAL);
 }
 
 static int
 fasttrap_load(void)
 {
 	ulong_t nent;
 	int i, ret;
 
         /* Create the /dev/dtrace/fasttrap entry. */
         fasttrap_cdev = make_dev(&fasttrap_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600,
             "dtrace/fasttrap");
 
 	mtx_init(&fasttrap_cleanup_mtx, "fasttrap clean", "dtrace", MTX_DEF);
 	mutex_init(&fasttrap_count_mtx, "fasttrap count mtx", MUTEX_DEFAULT,
 	    NULL);
 
 #ifdef illumos
 	fasttrap_max = ddi_getprop(DDI_DEV_T_ANY, devi, DDI_PROP_DONTPASS,
 	    "fasttrap-max-probes", FASTTRAP_MAX_DEFAULT);
 #endif
 	fasttrap_total = 0;
 
 	/*
 	 * Conjure up the tracepoints hashtable...
 	 */
 #ifdef illumos
 	nent = ddi_getprop(DDI_DEV_T_ANY, devi, DDI_PROP_DONTPASS,
 	    "fasttrap-hash-size", FASTTRAP_TPOINTS_DEFAULT_SIZE);
 #else
 	nent = tpoints_hash_size;
 #endif
 
 	if (nent == 0 || nent > 0x1000000)
 		nent = FASTTRAP_TPOINTS_DEFAULT_SIZE;
 
 	tpoints_hash_size = nent;
 
 	if (ISP2(nent))
 		fasttrap_tpoints.fth_nent = nent;
 	else
 		fasttrap_tpoints.fth_nent = 1 << fasttrap_highbit(nent);
 	ASSERT(fasttrap_tpoints.fth_nent > 0);
 	fasttrap_tpoints.fth_mask = fasttrap_tpoints.fth_nent - 1;
 	fasttrap_tpoints.fth_table = kmem_zalloc(fasttrap_tpoints.fth_nent *
 	    sizeof (fasttrap_bucket_t), KM_SLEEP);
 #ifndef illumos
 	for (i = 0; i < fasttrap_tpoints.fth_nent; i++)
 		mutex_init(&fasttrap_tpoints.fth_table[i].ftb_mtx,
 		    "tracepoints bucket mtx", MUTEX_DEFAULT, NULL);
 #endif
 
 	/*
 	 * ... and the providers hash table...
 	 */
 	nent = FASTTRAP_PROVIDERS_DEFAULT_SIZE;
 	if (ISP2(nent))
 		fasttrap_provs.fth_nent = nent;
 	else
 		fasttrap_provs.fth_nent = 1 << fasttrap_highbit(nent);
 	ASSERT(fasttrap_provs.fth_nent > 0);
 	fasttrap_provs.fth_mask = fasttrap_provs.fth_nent - 1;
 	fasttrap_provs.fth_table = kmem_zalloc(fasttrap_provs.fth_nent *
 	    sizeof (fasttrap_bucket_t), KM_SLEEP);
 #ifndef illumos
 	for (i = 0; i < fasttrap_provs.fth_nent; i++)
 		mutex_init(&fasttrap_provs.fth_table[i].ftb_mtx, 
 		    "providers bucket mtx", MUTEX_DEFAULT, NULL);
 #endif
 
 	ret = kproc_create(fasttrap_pid_cleanup_cb, NULL,
 	    &fasttrap_cleanup_proc, 0, 0, "ftcleanup");
 	if (ret != 0) {
 		destroy_dev(fasttrap_cdev);
 #ifndef illumos
 		for (i = 0; i < fasttrap_provs.fth_nent; i++)
 			mutex_destroy(&fasttrap_provs.fth_table[i].ftb_mtx);
 		for (i = 0; i < fasttrap_tpoints.fth_nent; i++)
 			mutex_destroy(&fasttrap_tpoints.fth_table[i].ftb_mtx);
 #endif
 		kmem_free(fasttrap_provs.fth_table, fasttrap_provs.fth_nent *
 		    sizeof (fasttrap_bucket_t));
 		mtx_destroy(&fasttrap_cleanup_mtx);
 		mutex_destroy(&fasttrap_count_mtx);
 		return (ret);
 	}
 
 
 	/*
 	 * ... and the procs hash table.
 	 */
 	nent = FASTTRAP_PROCS_DEFAULT_SIZE;
 	if (ISP2(nent))
 		fasttrap_procs.fth_nent = nent;
 	else
 		fasttrap_procs.fth_nent = 1 << fasttrap_highbit(nent);
 	ASSERT(fasttrap_procs.fth_nent > 0);
 	fasttrap_procs.fth_mask = fasttrap_procs.fth_nent - 1;
 	fasttrap_procs.fth_table = kmem_zalloc(fasttrap_procs.fth_nent *
 	    sizeof (fasttrap_bucket_t), KM_SLEEP);
 #ifndef illumos
 	for (i = 0; i < fasttrap_procs.fth_nent; i++)
 		mutex_init(&fasttrap_procs.fth_table[i].ftb_mtx,
 		    "processes bucket mtx", MUTEX_DEFAULT, NULL);
 
 	rm_init(&fasttrap_tp_lock, "fasttrap tracepoint");
 
 	/*
 	 * This event handler must run before kdtrace_thread_dtor() since it
 	 * accesses the thread's struct kdtrace_thread.
 	 */
 	fasttrap_thread_dtor_tag = EVENTHANDLER_REGISTER(thread_dtor,
 	    fasttrap_thread_dtor, NULL, EVENTHANDLER_PRI_FIRST);
 #endif
 
 	/*
 	 * Install our hooks into fork(2), exec(2), and exit(2).
 	 */
 	dtrace_fasttrap_fork = &fasttrap_fork;
 	dtrace_fasttrap_exit = &fasttrap_exec_exit;
 	dtrace_fasttrap_exec = &fasttrap_exec_exit;
 
 	(void) dtrace_meta_register("fasttrap", &fasttrap_mops, NULL,
 	    &fasttrap_meta_id);
 
 	return (0);
 }
 
 static int
 fasttrap_unload(void)
 {
 	int i, fail = 0;
 
 	/*
 	 * Unregister the meta-provider to make sure no new fasttrap-
 	 * managed providers come along while we're trying to close up
 	 * shop. If we fail to detach, we'll need to re-register as a
 	 * meta-provider. We can fail to unregister as a meta-provider
 	 * if providers we manage still exist.
 	 */
 	if (fasttrap_meta_id != DTRACE_METAPROVNONE &&
 	    dtrace_meta_unregister(fasttrap_meta_id) != 0)
 		return (-1);
 
 	/*
 	 * Iterate over all of our providers. If there's still a process
 	 * that corresponds to that pid, fail to detach.
 	 */
 	for (i = 0; i < fasttrap_provs.fth_nent; i++) {
 		fasttrap_provider_t **fpp, *fp;
 		fasttrap_bucket_t *bucket = &fasttrap_provs.fth_table[i];
 
 		mutex_enter(&bucket->ftb_mtx);
 		fpp = (fasttrap_provider_t **)&bucket->ftb_data;
 		while ((fp = *fpp) != NULL) {
 			/*
 			 * Acquire and release the lock as a simple way of
 			 * waiting for any other consumer to finish with
 			 * this provider. A thread must first acquire the
 			 * bucket lock so there's no chance of another thread
 			 * blocking on the provider's lock.
 			 */
 			mutex_enter(&fp->ftp_mtx);
 			mutex_exit(&fp->ftp_mtx);
 
 			if (dtrace_unregister(fp->ftp_provid) != 0) {
 				fail = 1;
 				fpp = &fp->ftp_next;
 			} else {
 				*fpp = fp->ftp_next;
 				fasttrap_provider_free(fp);
 			}
 		}
 
 		mutex_exit(&bucket->ftb_mtx);
 	}
 
 	if (fail) {
 		(void) dtrace_meta_register("fasttrap", &fasttrap_mops, NULL,
 		    &fasttrap_meta_id);
 
 		return (-1);
 	}
 
 	/*
 	 * Stop new processes from entering these hooks now, before the
 	 * fasttrap_cleanup thread runs.  That way all processes will hopefully
 	 * be out of these hooks before we free fasttrap_provs.fth_table
 	 */
 	ASSERT(dtrace_fasttrap_fork == &fasttrap_fork);
 	dtrace_fasttrap_fork = NULL;
 
 	ASSERT(dtrace_fasttrap_exec == &fasttrap_exec_exit);
 	dtrace_fasttrap_exec = NULL;
 
 	ASSERT(dtrace_fasttrap_exit == &fasttrap_exec_exit);
 	dtrace_fasttrap_exit = NULL;
 
 	mtx_lock(&fasttrap_cleanup_mtx);
 	fasttrap_cleanup_drain = 1;
 	/* Wait for the cleanup thread to finish up and signal us. */
 	wakeup(&fasttrap_cleanup_cv);
 	mtx_sleep(&fasttrap_cleanup_drain, &fasttrap_cleanup_mtx, 0, "ftcld",
 	    0);
 	fasttrap_cleanup_proc = NULL;
 	mtx_destroy(&fasttrap_cleanup_mtx);
 
 #ifdef DEBUG
 	mutex_enter(&fasttrap_count_mtx);
 	ASSERT(fasttrap_pid_count == 0);
 	mutex_exit(&fasttrap_count_mtx);
 #endif
 
 #ifndef illumos
 	EVENTHANDLER_DEREGISTER(thread_dtor, fasttrap_thread_dtor_tag);
 
 	for (i = 0; i < fasttrap_tpoints.fth_nent; i++)
 		mutex_destroy(&fasttrap_tpoints.fth_table[i].ftb_mtx);
 	for (i = 0; i < fasttrap_provs.fth_nent; i++)
 		mutex_destroy(&fasttrap_provs.fth_table[i].ftb_mtx);
 	for (i = 0; i < fasttrap_procs.fth_nent; i++)
 		mutex_destroy(&fasttrap_procs.fth_table[i].ftb_mtx);
 #endif
 	kmem_free(fasttrap_tpoints.fth_table,
 	    fasttrap_tpoints.fth_nent * sizeof (fasttrap_bucket_t));
 	fasttrap_tpoints.fth_nent = 0;
 
 	kmem_free(fasttrap_provs.fth_table,
 	    fasttrap_provs.fth_nent * sizeof (fasttrap_bucket_t));
 	fasttrap_provs.fth_nent = 0;
 
 	kmem_free(fasttrap_procs.fth_table,
 	    fasttrap_procs.fth_nent * sizeof (fasttrap_bucket_t));
 	fasttrap_procs.fth_nent = 0;
 
 #ifndef illumos
 	destroy_dev(fasttrap_cdev);
 	mutex_destroy(&fasttrap_count_mtx);
 	rm_destroy(&fasttrap_tp_lock);
 #endif
 
 	return (0);
 }
 
 /* ARGSUSED */
 static int
 fasttrap_modevent(module_t mod __unused, int type, void *data __unused)
 {
 	int error = 0;
 
 	switch (type) {
 	case MOD_LOAD:
 		break;
 
 	case MOD_UNLOAD:
 		break;
 
 	case MOD_SHUTDOWN:
 		break;
 
 	default:
 		error = EOPNOTSUPP;
 		break;
 	}
 	return (error);
 }
 
 SYSINIT(fasttrap_load, SI_SUB_DTRACE_PROVIDER, SI_ORDER_ANY, fasttrap_load,
     NULL);
 SYSUNINIT(fasttrap_unload, SI_SUB_DTRACE_PROVIDER, SI_ORDER_ANY,
     fasttrap_unload, NULL);
 
 DEV_MODULE(fasttrap, fasttrap_modevent, NULL);
 MODULE_VERSION(fasttrap, 1);
 MODULE_DEPEND(fasttrap, dtrace, 1, 1, 1);
 MODULE_DEPEND(fasttrap, opensolaris, 1, 1, 1);
Index: stable/12/sys/i386/i386/trap.c
===================================================================
--- stable/12/sys/i386/i386/trap.c	(revision 349015)
+++ stable/12/sys/i386/i386/trap.c	(revision 349016)
@@ -1,1168 +1,1189 @@
 /*-
  * SPDX-License-Identifier: BSD-4-Clause
  *
  * Copyright (C) 1994, David Greenman
  * Copyright (c) 1990, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * the University of Utah, and William Jolitz.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from: @(#)trap.c	7.4 (Berkeley) 5/13/91
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /*
  * 386 Trap and System call handling
  */
 
 #include "opt_clock.h"
 #include "opt_compat.h"
 #include "opt_cpu.h"
 #include "opt_hwpmc_hooks.h"
 #include "opt_isa.h"
 #include "opt_kdb.h"
 #include "opt_stack.h"
 #include "opt_trap.h"
 
 #include <sys/param.h>
 #include <sys/bus.h>
 #include <sys/systm.h>
 #include <sys/proc.h>
 #include <sys/pioctl.h>
 #include <sys/ptrace.h>
 #include <sys/kdb.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/resourcevar.h>
 #include <sys/signalvar.h>
 #include <sys/syscall.h>
 #include <sys/sysctl.h>
 #include <sys/sysent.h>
 #include <sys/uio.h>
 #include <sys/vmmeter.h>
 #ifdef HWPMC_HOOKS
 #include <sys/pmckern.h>
 PMC_SOFT_DEFINE( , , page_fault, all);
 PMC_SOFT_DEFINE( , , page_fault, read);
 PMC_SOFT_DEFINE( , , page_fault, write);
 #endif
 #include <security/audit/audit.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/pmap.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_map.h>
 #include <vm/vm_page.h>
 #include <vm/vm_extern.h>
 
 #include <machine/cpu.h>
 #include <machine/intr_machdep.h>
 #include <x86/mca.h>
 #include <machine/md_var.h>
 #include <machine/pcb.h>
 #ifdef SMP
 #include <machine/smp.h>
 #endif
 #include <machine/stack.h>
 #include <machine/trap.h>
 #include <machine/tss.h>
 #include <machine/vm86.h>
 
 #ifdef POWERFAIL_NMI
 #include <sys/syslog.h>
 #include <machine/clock.h>
 #endif
 
 #ifdef KDTRACE_HOOKS
 #include <sys/dtrace_bsd.h>
 #endif
 
 void trap(struct trapframe *frame);
 void syscall(struct trapframe *frame);
 
 static int trap_pfault(struct trapframe *, int, vm_offset_t);
 static void trap_fatal(struct trapframe *, vm_offset_t);
+#ifdef KDTRACE_HOOKS
+static bool trap_user_dtrace(struct trapframe *,
+    int (**hook)(struct trapframe *));
+#endif
 void dblfault_handler(void);
 
 extern inthand_t IDTVEC(bpt), IDTVEC(dbg), IDTVEC(int0x80_syscall);
 
 #define MAX_TRAP_MSG		32
 
 struct trap_data {
 	bool		ei;
 	const char	*msg;
 };
 
 static const struct trap_data trap_data[] = {
 	[T_PRIVINFLT] =	{ .ei = true,	.msg = "privileged instruction fault" },
 	[T_BPTFLT] =	{ .ei = false,	.msg = "breakpoint instruction fault" },
 	[T_ARITHTRAP] =	{ .ei = true,	.msg = "arithmetic trap" },
 	[T_PROTFLT] =	{ .ei = true,	.msg = "general protection fault" },
 	[T_TRCTRAP] =	{ .ei = false,	.msg = "debug exception" },
 	[T_PAGEFLT] =	{ .ei = true,	.msg = "page fault" },
 	[T_ALIGNFLT] = 	{ .ei = true,	.msg = "alignment fault" },
 	[T_DIVIDE] =	{ .ei = true,	.msg = "integer divide fault" },
 	[T_NMI] =	{ .ei = false,	.msg = "non-maskable interrupt trap" },
 	[T_OFLOW] =	{ .ei = true,	.msg = "overflow trap" },
 	[T_BOUND] =	{ .ei = true,	.msg = "FPU bounds check fault" },
 	[T_DNA] =	{ .ei = true,	.msg = "FPU device not available" },
 	[T_DOUBLEFLT] =	{ .ei = false,	.msg = "double fault" },
 	[T_FPOPFLT] =	{ .ei = true,	.msg = "FPU operand fetch fault" },
 	[T_TSSFLT] =	{ .ei = true,	.msg = "invalid TSS fault" },
 	[T_SEGNPFLT] =	{ .ei = true,	.msg = "segment not present fault" },
 	[T_STKFLT] =	{ .ei = true,	.msg = "stack fault" },
 	[T_MCHK] =	{ .ei = true,	.msg = "machine check trap" },
 	[T_XMMFLT] =	{ .ei = true,	.msg = "SIMD floating-point exception" },
 	[T_DTRACE_RET] ={ .ei = true,	.msg = "DTrace pid return trap" },
 };
 
 static bool
 trap_enable_intr(int trapno)
 {
 
 	MPASS(trapno > 0);
 	if (trapno < nitems(trap_data) && trap_data[trapno].msg != NULL)
 		return (trap_data[trapno].ei);
 	return (false);
 }
 
 static const char *
 trap_msg(int trapno)
 {
 	const char *res;
 	static const char unkn[] = "UNKNOWN";
 
 	res = NULL;
 	if (trapno < nitems(trap_data))
 		res = trap_data[trapno].msg;
 	if (res == NULL)
 		res = unkn;
 	return (res);
 }
 
 #if defined(I586_CPU) && !defined(NO_F00F_HACK)
 int has_f00f_bug = 0;		/* Initialized so that it can be patched. */
 #endif
 
 static int prot_fault_translation = 0;
 SYSCTL_INT(_machdep, OID_AUTO, prot_fault_translation, CTLFLAG_RW,
 	&prot_fault_translation, 0, "Select signal to deliver on protection fault");
 static int uprintf_signal;
 SYSCTL_INT(_machdep, OID_AUTO, uprintf_signal, CTLFLAG_RW,
     &uprintf_signal, 0,
     "Print debugging information on trap signal to ctty");
 
 /*
  * Exception, fault, and trap interface to the FreeBSD kernel.
  * This common code is called from assembly language IDT gate entry
  * routines that prepare a suitable stack frame, and restore this
  * frame after the exception has been processed.
  */
 
 void
 trap(struct trapframe *frame)
 {
 	ksiginfo_t ksi;
 	struct thread *td;
 	struct proc *p;
 	int signo, ucode;
 	u_int type;
 	register_t addr, dr6;
 	vm_offset_t eva;
 #ifdef POWERFAIL_NMI
 	static int lastalert = 0;
 #endif
 
 	td = curthread;
 	p = td->td_proc;
 	signo = 0;
 	ucode = 0;
 	addr = 0;
 	dr6 = 0;
 
 	VM_CNT_INC(v_trap);
 	type = frame->tf_trapno;
 
 	KASSERT((read_eflags() & PSL_I) == 0,
 	    ("trap: interrupts enabled, type %d frame %p", type, frame));
 
 #ifdef SMP
 	/* Handler for NMI IPIs used for stopping CPUs. */
 	if (type == T_NMI && ipi_nmi_handler() == 0)
 		return;
 #endif /* SMP */
 
 #ifdef KDB
 	if (kdb_active) {
 		kdb_reenter();
 		return;
 	}
 #endif
 
 	if (type == T_RESERVED) {
 		trap_fatal(frame, 0);
 		return;
 	}
 
 	if (type == T_NMI) {
 #ifdef HWPMC_HOOKS
 		/*
 		 * CPU PMCs interrupt using an NMI so we check for that first.
 		 * If the HWPMC module is active, 'pmc_hook' will point to
 		 * the function to be called.  A non-zero return value from the
 		 * hook means that the NMI was consumed by it and that we can
 		 * return immediately.
 		 */
 		if (pmc_intr != NULL &&
 		    (*pmc_intr)(frame) != 0)
 			return;
 #endif
 
 #ifdef STACK
 		if (stack_nmi_handler(frame) != 0)
 			return;
 #endif
 	}
 
 	if (type == T_MCHK) {
 		mca_intr();
 		return;
 	}
 
 #ifdef KDTRACE_HOOKS
 	/*
 	 * A trap can occur while DTrace executes a probe. Before
 	 * executing the probe, DTrace blocks re-scheduling and sets
 	 * a flag in its per-cpu flags to indicate that it doesn't
 	 * want to fault. On returning from the probe, the no-fault
 	 * flag is cleared and finally re-scheduling is enabled.
 	 */
 	if ((type == T_PROTFLT || type == T_PAGEFLT) &&
 	    dtrace_trap_func != NULL && (*dtrace_trap_func)(frame, type))
 		return;
 #endif
 
 	/*
 	 * We must not allow context switches until %cr2 is read.
 	 * Also, for some Cyrix CPUs, %cr2 is clobbered by interrupts.
 	 * All faults use interrupt gates, so %cr2 can be safely read
 	 * now, before optional enable of the interrupts below.
 	 */
 	if (type == T_PAGEFLT)
 		eva = rcr2();
 
 	/*
 	 * Buggy application or kernel code has disabled interrupts
 	 * and then trapped.  Enabling interrupts now is wrong, but it
 	 * is better than running with interrupts disabled until they
 	 * are accidentally enabled later.
 	 */
 	if ((frame->tf_eflags & PSL_I) == 0 && TRAPF_USERMODE(frame) &&
 	    (curpcb->pcb_flags & PCB_VM86CALL) == 0)
 		uprintf("pid %ld (%s): trap %d with interrupts disabled\n",
 		    (long)curproc->p_pid, curthread->td_name, type);
 
 	/*
 	 * Conditionally reenable interrupts.  If we hold a spin lock,
 	 * then we must not reenable interrupts.  This might be a
 	 * spurious page fault.
 	 */
 	if (trap_enable_intr(type) && td->td_md.md_spinlock_count == 0 &&
 	    frame->tf_eip != (int)cpu_switch_load_gs)
 		enable_intr();
 
         if (TRAPF_USERMODE(frame) && (curpcb->pcb_flags & PCB_VM86CALL) == 0) {
 		/* user trap */
 
 		td->td_pticks = 0;
 		td->td_frame = frame;
 		addr = frame->tf_eip;
 		if (td->td_cowgen != p->p_cowgen)
 			thread_cow_update(td);
 
 		switch (type) {
 		case T_PRIVINFLT:	/* privileged instruction fault */
 			signo = SIGILL;
 			ucode = ILL_PRVOPC;
 			break;
 
 		case T_BPTFLT:		/* bpt instruction fault */
-			enable_intr();
 #ifdef KDTRACE_HOOKS
-			if (dtrace_pid_probe_ptr != NULL &&
-			    dtrace_pid_probe_ptr(frame) == 0)
+			if (trap_user_dtrace(frame, &dtrace_pid_probe_ptr))
 				return;
+#else
+			enable_intr();
 #endif
 			signo = SIGTRAP;
 			ucode = TRAP_BRKPT;
 			break;
 
 		case T_TRCTRAP:		/* debug exception */
 			enable_intr();
 user_trctrap_out:
 			signo = SIGTRAP;
 			ucode = TRAP_TRACE;
 			dr6 = rdr6();
 			if ((dr6 & DBREG_DR6_BS) != 0) {
 				PROC_LOCK(td->td_proc);
 				if ((td->td_dbgflags & TDB_STEP) != 0) {
 					td->td_frame->tf_eflags &= ~PSL_T;
 					td->td_dbgflags &= ~TDB_STEP;
 				}
 				PROC_UNLOCK(td->td_proc);
 			}
 			break;
 
 		case T_ARITHTRAP:	/* arithmetic trap */
 			ucode = npxtrap_x87();
 			if (ucode == -1)
 				return;
 			signo = SIGFPE;
 			break;
 
 		/*
 		 * The following two traps can happen in vm86 mode,
 		 * and, if so, we want to handle them specially.
 		 */
 		case T_PROTFLT:		/* general protection fault */
 		case T_STKFLT:		/* stack fault */
 			if (frame->tf_eflags & PSL_VM) {
 				signo = vm86_emulate((struct vm86frame *)frame);
 				if (signo == SIGTRAP) {
 					load_dr6(rdr6() | 0x4000);
 					goto user_trctrap_out;
 				}
 				if (signo == 0)
 					goto user;
 				break;
 			}
 			signo = SIGBUS;
 			ucode = (type == T_PROTFLT) ? BUS_OBJERR : BUS_ADRERR;
 			break;
 		case T_SEGNPFLT:	/* segment not present fault */
 			signo = SIGBUS;
 			ucode = BUS_ADRERR;
 			break;
 		case T_TSSFLT:		/* invalid TSS fault */
 			signo = SIGBUS;
 			ucode = BUS_OBJERR;
 			break;
 		case T_ALIGNFLT:
 			signo = SIGBUS;
 			ucode = BUS_ADRALN;
 			break;
 		case T_DOUBLEFLT:	/* double fault */
 		default:
 			signo = SIGBUS;
 			ucode = BUS_OBJERR;
 			break;
 
 		case T_PAGEFLT:		/* page fault */
 			signo = trap_pfault(frame, TRUE, eva);
 #if defined(I586_CPU) && !defined(NO_F00F_HACK)
 			if (signo == -2) {
 				/*
 				 * The f00f hack workaround has triggered, so
 				 * treat the fault as an illegal instruction 
 				 * (T_PRIVINFLT) instead of a page fault.
 				 */
 				type = frame->tf_trapno = T_PRIVINFLT;
 
 				/* Proceed as in that case. */
 				ucode = ILL_PRVOPC;
 				signo = SIGILL;
 				break;
 			}
 #endif
 			if (signo == -1)
 				return;
 			if (signo == 0)
 				goto user;
 
 			if (signo == SIGSEGV)
 				ucode = SEGV_MAPERR;
 			else if (prot_fault_translation == 0) {
 				/*
 				 * Autodetect.  This check also covers
 				 * the images without the ABI-tag ELF
 				 * note.
 				 */
 				if (SV_CURPROC_ABI() == SV_ABI_FREEBSD &&
 				    p->p_osrel >= P_OSREL_SIGSEGV) {
 					signo = SIGSEGV;
 					ucode = SEGV_ACCERR;
 				} else {
 					signo = SIGBUS;
 					ucode = T_PAGEFLT;
 				}
 			} else if (prot_fault_translation == 1) {
 				/*
 				 * Always compat mode.
 				 */
 				signo = SIGBUS;
 				ucode = T_PAGEFLT;
 			} else {
 				/*
 				 * Always SIGSEGV mode.
 				 */
 				signo = SIGSEGV;
 				ucode = SEGV_ACCERR;
 			}
 			addr = eva;
 			break;
 
 		case T_DIVIDE:		/* integer divide fault */
 			ucode = FPE_INTDIV;
 			signo = SIGFPE;
 			break;
 
 #ifdef DEV_ISA
 		case T_NMI:
 #ifdef POWERFAIL_NMI
 #ifndef TIMER_FREQ
 #  define TIMER_FREQ 1193182
 #endif
 			if (time_second - lastalert > 10) {
 				log(LOG_WARNING, "NMI: power fail\n");
 				sysbeep(880, hz);
 				lastalert = time_second;
 			}
 			return;
 #else /* !POWERFAIL_NMI */
 			nmi_handle_intr(type, frame);
 			return;
 #endif /* POWERFAIL_NMI */
 #endif /* DEV_ISA */
 
 		case T_OFLOW:		/* integer overflow fault */
 			ucode = FPE_INTOVF;
 			signo = SIGFPE;
 			break;
 
 		case T_BOUND:		/* bounds check fault */
 			ucode = FPE_FLTSUB;
 			signo = SIGFPE;
 			break;
 
 		case T_DNA:
 			KASSERT(PCB_USER_FPU(td->td_pcb),
 			    ("kernel FPU ctx has leaked"));
 			/* transparent fault (due to context switch "late") */
 			if (npxdna())
 				return;
 			uprintf("pid %d killed due to lack of floating point\n",
 				p->p_pid);
 			signo = SIGKILL;
 			ucode = 0;
 			break;
 
 		case T_FPOPFLT:		/* FPU operand fetch fault */
 			ucode = ILL_COPROC;
 			signo = SIGILL;
 			break;
 
 		case T_XMMFLT:		/* SIMD floating-point exception */
 			ucode = npxtrap_sse();
 			if (ucode == -1)
 				return;
 			signo = SIGFPE;
 			break;
 #ifdef KDTRACE_HOOKS
 		case T_DTRACE_RET:
-			enable_intr();
-			if (dtrace_return_probe_ptr != NULL)
-				dtrace_return_probe_ptr(frame);
+			(void)trap_user_dtrace(frame, &dtrace_return_probe_ptr);
 			return;
 #endif
 		}
 	} else {
 		/* kernel trap */
 
 		KASSERT(cold || td->td_ucred != NULL,
 		    ("kernel trap doesn't have ucred"));
 		switch (type) {
 		case T_PAGEFLT:			/* page fault */
 			(void) trap_pfault(frame, FALSE, eva);
 			return;
 
 		case T_DNA:
 			if (PCB_USER_FPU(td->td_pcb))
 				panic("Unregistered use of FPU in kernel");
 			if (npxdna())
 				return;
 			break;
 
 		case T_ARITHTRAP:	/* arithmetic trap */
 		case T_XMMFLT:		/* SIMD floating-point exception */
 		case T_FPOPFLT:		/* FPU operand fetch fault */
 			/*
 			 * XXXKIB for now disable any FPU traps in kernel
 			 * handler registration seems to be overkill
 			 */
 			trap_fatal(frame, 0);
 			return;
 
 			/*
 			 * The following two traps can happen in
 			 * vm86 mode, and, if so, we want to handle
 			 * them specially.
 			 */
 		case T_PROTFLT:		/* general protection fault */
 		case T_STKFLT:		/* stack fault */
 			if (frame->tf_eflags & PSL_VM) {
 				signo = vm86_emulate((struct vm86frame *)frame);
 				if (signo == SIGTRAP) {
 					type = T_TRCTRAP;
 					load_dr6(rdr6() | 0x4000);
 					goto kernel_trctrap;
 				}
 				if (signo != 0)
 					/*
 					 * returns to original process
 					 */
 					vm86_trap((struct vm86frame *)frame);
 				return;
 			}
 			/* FALL THROUGH */
 		case T_SEGNPFLT:	/* segment not present fault */
 			if (curpcb->pcb_flags & PCB_VM86CALL)
 				break;
 
 			/*
 			 * Invalid %fs's and %gs's can be created using
 			 * procfs or PT_SETREGS or by invalidating the
 			 * underlying LDT entry.  This causes a fault
 			 * in kernel mode when the kernel attempts to
 			 * switch contexts.  Lose the bad context
 			 * (XXX) so that we can continue, and generate
 			 * a signal.
 			 */
 			if (frame->tf_eip == (int)cpu_switch_load_gs) {
 				curpcb->pcb_gs = 0;
 #if 0				
 				PROC_LOCK(p);
 				kern_psignal(p, SIGBUS);
 				PROC_UNLOCK(p);
 #endif				
 				return;
 			}
 
 			if (td->td_intr_nesting_level != 0)
 				break;
 
 			/*
 			 * Invalid segment selectors and out of bounds
 			 * %eip's and %esp's can be set up in user mode.
 			 * This causes a fault in kernel mode when the
 			 * kernel tries to return to user mode.  We want
 			 * to get this fault so that we can fix the
 			 * problem here and not have to check all the
 			 * selectors and pointers when the user changes
 			 * them.
 			 *
 			 * N.B. Comparing to long mode, 32-bit mode
 			 * does not push %esp on the trap frame,
 			 * because iretl faulted while in ring 0.  As
 			 * the consequence, there is no need to fixup
 			 * the stack pointer for doreti_iret_fault,
 			 * the fixup and the complimentary trap() call
 			 * are executed on the main thread stack, not
 			 * on the trampoline stack.
 			 */
 			if (frame->tf_eip == (int)doreti_iret + setidt_disp) {
 				frame->tf_eip = (int)doreti_iret_fault +
 				    setidt_disp;
 				return;
 			}
 			if (type == T_STKFLT)
 				break;
 
 			if (frame->tf_eip == (int)doreti_popl_ds +
 			    setidt_disp) {
 				frame->tf_eip = (int)doreti_popl_ds_fault +
 				    setidt_disp;
 				return;
 			}
 			if (frame->tf_eip == (int)doreti_popl_es +
 			    setidt_disp) {
 				frame->tf_eip = (int)doreti_popl_es_fault +
 				    setidt_disp;
 				return;
 			}
 			if (frame->tf_eip == (int)doreti_popl_fs +
 			    setidt_disp) {
 				frame->tf_eip = (int)doreti_popl_fs_fault +
 				    setidt_disp;
 				return;
 			}
 			if (curpcb->pcb_onfault != NULL) {
 				frame->tf_eip = (int)curpcb->pcb_onfault;
 				return;
 			}
 			break;
 
 		case T_TSSFLT:
 			/*
 			 * PSL_NT can be set in user mode and isn't cleared
 			 * automatically when the kernel is entered.  This
 			 * causes a TSS fault when the kernel attempts to
 			 * `iret' because the TSS link is uninitialized.  We
 			 * want to get this fault so that we can fix the
 			 * problem here and not every time the kernel is
 			 * entered.
 			 */
 			if (frame->tf_eflags & PSL_NT) {
 				frame->tf_eflags &= ~PSL_NT;
 				return;
 			}
 			break;
 
 		case T_TRCTRAP:	 /* debug exception */
 kernel_trctrap:
 			/* Clear any pending debug events. */
 			dr6 = rdr6();
 			load_dr6(0);
 
 			/*
 			 * Ignore debug register exceptions due to
 			 * accesses in the user's address space, which
 			 * can happen under several conditions such as
 			 * if a user sets a watchpoint on a buffer and
 			 * then passes that buffer to a system call.
 			 * We still want to get TRCTRAPS for addresses
 			 * in kernel space because that is useful when
 			 * debugging the kernel.
 			 */
 			if (user_dbreg_trap(dr6) &&
 			   !(curpcb->pcb_flags & PCB_VM86CALL))
 				return;
 
 			/*
 			 * Malicious user code can configure a debug
 			 * register watchpoint to trap on data access
 			 * to the top of stack and then execute 'pop
 			 * %ss; int 3'.  Due to exception deferral for
 			 * 'pop %ss', the CPU will not interrupt 'int
 			 * 3' to raise the DB# exception for the debug
 			 * register but will postpone the DB# until
 			 * execution of the first instruction of the
 			 * BP# handler (in kernel mode).  Normally the
 			 * previous check would ignore DB# exceptions
 			 * for watchpoints on user addresses raised in
 			 * kernel mode.  However, some CPU errata
 			 * include cases where DB# exceptions do not
 			 * properly set bits in %dr6, e.g. Haswell
 			 * HSD23 and Skylake-X SKZ24.
 			 *
 			 * A deferred DB# can also be raised on the
 			 * first instructions of system call entry
 			 * points or single-step traps via similar use
 			 * of 'pop %ss' or 'mov xxx, %ss'.
 			 */
 			if (frame->tf_eip ==
 			    (uintptr_t)IDTVEC(int0x80_syscall) + setidt_disp ||
 			    frame->tf_eip == (uintptr_t)IDTVEC(bpt) +
 			    setidt_disp ||
 			    frame->tf_eip == (uintptr_t)IDTVEC(dbg) +
 			    setidt_disp)
 				return;
 			/*
 			 * FALLTHROUGH (TRCTRAP kernel mode, kernel address)
 			 */
 		case T_BPTFLT:
 			/*
 			 * If KDB is enabled, let it handle the debugger trap.
 			 * Otherwise, debugger traps "can't happen".
 			 */
 #ifdef KDB
 			if (kdb_trap(type, dr6, frame))
 				return;
 #endif
 			break;
 
 #ifdef DEV_ISA
 		case T_NMI:
 #ifdef POWERFAIL_NMI
 			if (time_second - lastalert > 10) {
 				log(LOG_WARNING, "NMI: power fail\n");
 				sysbeep(880, hz);
 				lastalert = time_second;
 			}
 			return;
 #else /* !POWERFAIL_NMI */
 			nmi_handle_intr(type, frame);
 			return;
 #endif /* POWERFAIL_NMI */
 #endif /* DEV_ISA */
 		}
 
 		trap_fatal(frame, eva);
 		return;
 	}
 
 	/* Translate fault for emulators (e.g. Linux) */
 	if (*p->p_sysent->sv_transtrap != NULL)
 		signo = (*p->p_sysent->sv_transtrap)(signo, type);
 
 	ksiginfo_init_trap(&ksi);
 	ksi.ksi_signo = signo;
 	ksi.ksi_code = ucode;
 	ksi.ksi_addr = (void *)addr;
 	ksi.ksi_trapno = type;
 	if (uprintf_signal) {
 		uprintf("pid %d comm %s: signal %d err %x code %d type %d "
 		    "addr 0x%x ss 0x%04x esp 0x%08x cs 0x%04x eip 0x%08x "
 		    "<%02x %02x %02x %02x %02x %02x %02x %02x>\n",
 		    p->p_pid, p->p_comm, signo, frame->tf_err, ucode, type,
 		    addr, frame->tf_ss, frame->tf_esp, frame->tf_cs,
 		    frame->tf_eip,
 		    fubyte((void *)(frame->tf_eip + 0)),
 		    fubyte((void *)(frame->tf_eip + 1)),
 		    fubyte((void *)(frame->tf_eip + 2)),
 		    fubyte((void *)(frame->tf_eip + 3)),
 		    fubyte((void *)(frame->tf_eip + 4)),
 		    fubyte((void *)(frame->tf_eip + 5)),
 		    fubyte((void *)(frame->tf_eip + 6)),
 		    fubyte((void *)(frame->tf_eip + 7)));
 	}
 	KASSERT((read_eflags() & PSL_I) != 0, ("interrupts disabled"));
 	trapsignal(td, &ksi);
 
 user:
 	userret(td, frame);
 	KASSERT(PCB_USER_FPU(td->td_pcb),
 	    ("Return from trap with kernel FPU ctx leaked"));
 }
 
 static int
 trap_pfault(struct trapframe *frame, int usermode, vm_offset_t eva)
 {
 	struct thread *td;
 	struct proc *p;
 	vm_offset_t va;
 	vm_map_t map;
 	int rv;
 	vm_prot_t ftype;
 
 	td = curthread;
 	p = td->td_proc;
 
 	if (__predict_false((td->td_pflags & TDP_NOFAULTING) != 0)) {
 		/*
 		 * Due to both processor errata and lazy TLB invalidation when
 		 * access restrictions are removed from virtual pages, memory
 		 * accesses that are allowed by the physical mapping layer may
 		 * nonetheless cause one spurious page fault per virtual page. 
 		 * When the thread is executing a "no faulting" section that
 		 * is bracketed by vm_fault_{disable,enable}_pagefaults(),
 		 * every page fault is treated as a spurious page fault,
 		 * unless it accesses the same virtual address as the most
 		 * recent page fault within the same "no faulting" section.
 		 */
 		if (td->td_md.md_spurflt_addr != eva ||
 		    (td->td_pflags & TDP_RESETSPUR) != 0) {
 			/*
 			 * Do nothing to the TLB.  A stale TLB entry is
 			 * flushed automatically by a page fault.
 			 */
 			td->td_md.md_spurflt_addr = eva;
 			td->td_pflags &= ~TDP_RESETSPUR;
 			return (0);
 		}
 	} else {
 		/*
 		 * If we get a page fault while in a critical section, then
 		 * it is most likely a fatal kernel page fault.  The kernel
 		 * is already going to panic trying to get a sleep lock to
 		 * do the VM lookup, so just consider it a fatal trap so the
 		 * kernel can print out a useful trap message and even get
 		 * to the debugger.
 		 *
 		 * If we get a page fault while holding a non-sleepable
 		 * lock, then it is most likely a fatal kernel page fault.
 		 * If WITNESS is enabled, then it's going to whine about
 		 * bogus LORs with various VM locks, so just skip to the
 		 * fatal trap handling directly.
 		 */
 		if (td->td_critnest != 0 ||
 		    WITNESS_CHECK(WARN_SLEEPOK | WARN_GIANTOK, NULL,
 		    "Kernel page fault") != 0) {
 			trap_fatal(frame, eva);
 			return (-1);
 		}
 	}
 	va = trunc_page(eva);
 	if (va >= PMAP_TRM_MIN_ADDRESS) {
 		/*
 		 * Don't allow user-mode faults in kernel address space.
 		 * An exception:  if the faulting address is the invalid
 		 * instruction entry in the IDT, then the Intel Pentium
 		 * F00F bug workaround was triggered, and we need to
 		 * treat it is as an illegal instruction, and not a page
 		 * fault.
 		 */
 #if defined(I586_CPU) && !defined(NO_F00F_HACK)
 		if ((eva == (unsigned int)&idt[6]) && has_f00f_bug)
 			return (-2);
 #endif
 		if (usermode)
 			return (SIGSEGV);
 		trap_fatal(frame, eva);
 		return (-1);
 	} else {
 		map = usermode ? &p->p_vmspace->vm_map : kernel_map;
 
 		/*
 		 * Kernel cannot access a user-space address directly
 		 * because user pages are not mapped.  Also, page
 		 * faults must not be caused during the interrupts.
 		 */
 		if (!usermode && td->td_intr_nesting_level != 0) {
 			trap_fatal(frame, eva);
 			return (-1);
 		}
 	}
 
 	/*
 	 * If the trap was caused by errant bits in the PTE then panic.
 	 */
 	if (frame->tf_err & PGEX_RSV) {
 		trap_fatal(frame, eva);
 		return (-1);
 	}
 
 	/*
 	 * PGEX_I is defined only if the execute disable bit capability is
 	 * supported and enabled.
 	 */
 	if (frame->tf_err & PGEX_W)
 		ftype = VM_PROT_WRITE;
 #if defined(PAE) || defined(PAE_TABLES)
 	else if ((frame->tf_err & PGEX_I) && pg_nx != 0)
 		ftype = VM_PROT_EXECUTE;
 #endif
 	else
 		ftype = VM_PROT_READ;
 
 	/* Fault in the page. */
 	rv = vm_fault(map, va, ftype, VM_FAULT_NORMAL);
 	if (rv == KERN_SUCCESS) {
 #ifdef HWPMC_HOOKS
 		if (ftype == VM_PROT_READ || ftype == VM_PROT_WRITE) {
 			PMC_SOFT_CALL_TF( , , page_fault, all, frame);
 			if (ftype == VM_PROT_READ)
 				PMC_SOFT_CALL_TF( , , page_fault, read,
 				    frame);
 			else
 				PMC_SOFT_CALL_TF( , , page_fault, write,
 				    frame);
 		}
 #endif
 		return (0);
 	}
 	if (!usermode) {
 		if (td->td_intr_nesting_level == 0 &&
 		    curpcb->pcb_onfault != NULL) {
 			frame->tf_eip = (int)curpcb->pcb_onfault;
 			return (0);
 		}
 		trap_fatal(frame, eva);
 		return (-1);
 	}
 	return ((rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV);
 }
 
 static void
 trap_fatal(frame, eva)
 	struct trapframe *frame;
 	vm_offset_t eva;
 {
 	int code, ss, esp;
 	u_int type;
 	struct soft_segment_descriptor softseg;
 #ifdef KDB
 	bool handled;
 #endif
 
 	code = frame->tf_err;
 	type = frame->tf_trapno;
 	sdtossd(&gdt[IDXSEL(frame->tf_cs & 0xffff)].sd, &softseg);
 
 	printf("\n\nFatal trap %d: %s while in %s mode\n", type, trap_msg(type),
 	    frame->tf_eflags & PSL_VM ? "vm86" :
 	    ISPL(frame->tf_cs) == SEL_UPL ? "user" : "kernel");
 #ifdef SMP
 	/* two separate prints in case of a trap on an unmapped page */
 	printf("cpuid = %d; ", PCPU_GET(cpuid));
 	printf("apic id = %02x\n", PCPU_GET(apic_id));
 #endif
 	if (type == T_PAGEFLT) {
 		printf("fault virtual address	= 0x%x\n", eva);
 		printf("fault code		= %s %s%s, %s\n",
 			code & PGEX_U ? "user" : "supervisor",
 			code & PGEX_W ? "write" : "read",
 #if defined(PAE) || defined(PAE_TABLES)
 			pg_nx != 0 ?
 			(code & PGEX_I ? " instruction" : " data") :
 #endif
 			"",
 			code & PGEX_RSV ? "reserved bits in PTE" :
 			code & PGEX_P ? "protection violation" : "page not present");
 	} else {
 		printf("error code		= %#x\n", code);
 	}
 	printf("instruction pointer	= 0x%x:0x%x\n",
 	       frame->tf_cs & 0xffff, frame->tf_eip);
         if (TF_HAS_STACKREGS(frame)) {
 		ss = frame->tf_ss & 0xffff;
 		esp = frame->tf_esp;
 	} else {
 		ss = GSEL(GDATA_SEL, SEL_KPL);
 		esp = (int)&frame->tf_esp;
 	}
 	printf("stack pointer	        = 0x%x:0x%x\n", ss, esp);
 	printf("frame pointer	        = 0x%x:0x%x\n", ss, frame->tf_ebp);
 	printf("code segment		= base 0x%x, limit 0x%x, type 0x%x\n",
 	       softseg.ssd_base, softseg.ssd_limit, softseg.ssd_type);
 	printf("			= DPL %d, pres %d, def32 %d, gran %d\n",
 	       softseg.ssd_dpl, softseg.ssd_p, softseg.ssd_def32,
 	       softseg.ssd_gran);
 	printf("processor eflags	= ");
 	if (frame->tf_eflags & PSL_T)
 		printf("trace trap, ");
 	if (frame->tf_eflags & PSL_I)
 		printf("interrupt enabled, ");
 	if (frame->tf_eflags & PSL_NT)
 		printf("nested task, ");
 	if (frame->tf_eflags & PSL_RF)
 		printf("resume, ");
 	if (frame->tf_eflags & PSL_VM)
 		printf("vm86, ");
 	printf("IOPL = %d\n", (frame->tf_eflags & PSL_IOPL) >> 12);
 	printf("current process		= %d (%s)\n",
 	    curproc->p_pid, curthread->td_name);
 
 #ifdef KDB
 	if (debugger_on_trap) {
 		kdb_why = KDB_WHY_TRAP;
 		frame->tf_err = eva;	/* smuggle fault address to ddb */
 		handled = kdb_trap(type, 0, frame);
 		frame->tf_err = code;	/* restore error code */
 		kdb_why = KDB_WHY_UNSET;
 		if (handled)
 			return;
 	}
 #endif
 	printf("trap number		= %d\n", type);
 	if (trap_msg(type) != NULL)
 		panic("%s", trap_msg(type));
 	else
 		panic("unknown/reserved trap");
 }
+
+#ifdef KDTRACE_HOOKS
+/*
+ * Invoke a userspace DTrace hook.  The hook pointer is cleared when no
+ * userspace probes are enabled, so we must synchronize with DTrace to ensure
+ * that a trapping thread is able to call the hook before it is cleared.
+ */
+static bool
+trap_user_dtrace(struct trapframe *frame, int (**hookp)(struct trapframe *))
+{
+	int (*hook)(struct trapframe *);
+
+	hook = (int (*)(struct trapframe *))atomic_load_ptr(hookp);
+	enable_intr();
+	if (hook != NULL)
+		return ((hook)(frame) == 0);
+	return (false);
+}
+#endif
 
 /*
  * Double fault handler. Called when a fault occurs while writing
  * a frame for a trap/exception onto the stack. This usually occurs
  * when the stack overflows (such is the case with infinite recursion,
  * for example).
  *
  * XXX Note that the current PTD gets replaced by IdlePTD when the
  * task switch occurs. This means that the stack that was active at
  * the time of the double fault is not available at <kstack> unless
  * the machine was idle when the double fault occurred. The downside
  * of this is that "trace <ebp>" in ddb won't work.
  */
 void
 dblfault_handler(void)
 {
 #ifdef KDTRACE_HOOKS
 	if (dtrace_doubletrap_func != NULL)
 		(*dtrace_doubletrap_func)();
 #endif
 	printf("\nFatal double fault:\n");
 	printf("eip = 0x%x\n", PCPU_GET(common_tssp)->tss_eip);
 	printf("esp = 0x%x\n", PCPU_GET(common_tssp)->tss_esp);
 	printf("ebp = 0x%x\n", PCPU_GET(common_tssp)->tss_ebp);
 #ifdef SMP
 	/* two separate prints in case of a trap on an unmapped page */
 	printf("cpuid = %d; ", PCPU_GET(cpuid));
 	printf("apic id = %02x\n", PCPU_GET(apic_id));
 #endif
 	panic("double fault");
 }
 
 int
 cpu_fetch_syscall_args(struct thread *td)
 {
 	struct proc *p;
 	struct trapframe *frame;
 	struct syscall_args *sa;
 	caddr_t params;
 	long tmp;
 	int error;
 #ifdef COMPAT_43
 	u_int32_t eip;
 	int cs;
 #endif
 
 	p = td->td_proc;
 	frame = td->td_frame;
 	sa = &td->td_sa;
 
 #ifdef COMPAT_43
 	if (__predict_false(frame->tf_cs == 7 && frame->tf_eip == 2)) {
 		/*
 		 * In lcall $7,$0 after int $0x80.  Convert the user
 		 * frame to what it would be for a direct int 0x80 instead
 		 * of lcall $7,$0, by popping the lcall return address.
 		 */
 		error = fueword32((void *)frame->tf_esp, &eip);
 		if (error == -1)
 			return (EFAULT);
 		cs = fuword16((void *)(frame->tf_esp + sizeof(u_int32_t)));
 		if (cs == -1)
 			return (EFAULT);
 
 		/*
 		 * Unwind in-kernel frame after all stack frame pieces
 		 * were successfully read.
 		 */
 		frame->tf_eip = eip;
 		frame->tf_cs = cs;
 		frame->tf_esp += 2 * sizeof(u_int32_t);
 		frame->tf_err = 7;	/* size of lcall $7,$0 */
 	}
 #endif
 
 	sa->code = frame->tf_eax;
 	params = (caddr_t)frame->tf_esp + sizeof(uint32_t);
 
 	/*
 	 * Need to check if this is a 32 bit or 64 bit syscall.
 	 */
 	if (sa->code == SYS_syscall) {
 		/*
 		 * Code is first argument, followed by actual args.
 		 */
 		error = fueword(params, &tmp);
 		if (error == -1)
 			return (EFAULT);
 		sa->code = tmp;
 		params += sizeof(uint32_t);
 	} else if (sa->code == SYS___syscall) {
 		/*
 		 * Like syscall, but code is a quad, so as to maintain
 		 * quad alignment for the rest of the arguments.
 		 */
 		error = fueword(params, &tmp);
 		if (error == -1)
 			return (EFAULT);
 		sa->code = tmp;
 		params += sizeof(quad_t);
 	}
 
  	if (p->p_sysent->sv_mask)
  		sa->code &= p->p_sysent->sv_mask;
  	if (sa->code >= p->p_sysent->sv_size)
  		sa->callp = &p->p_sysent->sv_table[0];
   	else
  		sa->callp = &p->p_sysent->sv_table[sa->code];
 	sa->narg = sa->callp->sy_narg;
 
 	if (params != NULL && sa->narg != 0)
 		error = copyin(params, (caddr_t)sa->args,
 		    (u_int)(sa->narg * sizeof(uint32_t)));
 	else
 		error = 0;
 
 	if (error == 0) {
 		td->td_retval[0] = 0;
 		td->td_retval[1] = frame->tf_edx;
 	}
 		
 	return (error);
 }
 
 #include "../../kern/subr_syscall.c"
 
 /*
  * syscall - system call request C handler.  A system call is
  * essentially treated as a trap by reusing the frame layout.
  */
 void
 syscall(struct trapframe *frame)
 {
 	struct thread *td;
 	register_t orig_tf_eflags;
 	int error;
 	ksiginfo_t ksi;
 
 #ifdef DIAGNOSTIC
 	if (!(TRAPF_USERMODE(frame) &&
 	    (curpcb->pcb_flags & PCB_VM86CALL) == 0)) {
 		panic("syscall");
 		/* NOT REACHED */
 	}
 #endif
 	orig_tf_eflags = frame->tf_eflags;
 
 	td = curthread;
 	td->td_frame = frame;
 
 	error = syscallenter(td);
 
 	/*
 	 * Traced syscall.
 	 */
 	if ((orig_tf_eflags & PSL_T) && !(orig_tf_eflags & PSL_VM)) {
 		frame->tf_eflags &= ~PSL_T;
 		ksiginfo_init_trap(&ksi);
 		ksi.ksi_signo = SIGTRAP;
 		ksi.ksi_code = TRAP_TRACE;
 		ksi.ksi_addr = (void *)frame->tf_eip;
 		trapsignal(td, &ksi);
 	}
 
 	KASSERT(PCB_USER_FPU(td->td_pcb),
 	    ("System call %s returning with kernel FPU ctx leaked",
 	     syscallname(td->td_proc, td->td_sa.code)));
 	KASSERT(td->td_pcb->pcb_save == get_pcb_user_save_td(td),
 	    ("System call %s returning with mangled pcb_save",
 	     syscallname(td->td_proc, td->td_sa.code)));
 
 	syscallret(td, error);
 }
Index: stable/12
===================================================================
--- stable/12	(revision 349015)
+++ stable/12	(revision 349016)

Property changes on: stable/12
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head:r348742