Index: stable/12/sys/amd64/amd64/trap.c =================================================================== --- stable/12/sys/amd64/amd64/trap.c (revision 349015) +++ stable/12/sys/amd64/amd64/trap.c (revision 349016) @@ -1,1210 +1,1231 @@ /*- * SPDX-License-Identifier: BSD-4-Clause * * Copyright (C) 1994, David Greenman * Copyright (c) 1990, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * the University of Utah, and William Jolitz. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)trap.c 7.4 (Berkeley) 5/13/91 */ #include __FBSDID("$FreeBSD$"); /* * AMD64 Trap and System call handling */ #include "opt_clock.h" #include "opt_compat.h" #include "opt_cpu.h" #include "opt_hwpmc_hooks.h" #include "opt_isa.h" #include "opt_kdb.h" #include "opt_stack.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef HWPMC_HOOKS #include PMC_SOFT_DEFINE( , , page_fault, all); PMC_SOFT_DEFINE( , , page_fault, read); PMC_SOFT_DEFINE( , , page_fault, write); #endif #include #include #include #include #include #include #include #include #include #include #include #include #ifdef SMP #include #endif #include #include #include #ifdef KDTRACE_HOOKS #include #endif extern inthand_t IDTVEC(bpt), IDTVEC(bpt_pti), IDTVEC(dbg), IDTVEC(fast_syscall), IDTVEC(fast_syscall_pti), IDTVEC(fast_syscall32), IDTVEC(int0x80_syscall_pti), IDTVEC(int0x80_syscall); void __noinline trap(struct trapframe *frame); void trap_check(struct trapframe *frame); void dblfault_handler(struct trapframe *frame); static int trap_pfault(struct trapframe *, int); static void trap_fatal(struct trapframe *, vm_offset_t); +#ifdef KDTRACE_HOOKS +static bool trap_user_dtrace(struct trapframe *, + int (**hook)(struct trapframe *)); +#endif #define MAX_TRAP_MSG 32 static char *trap_msg[] = { "", /* 0 unused */ "privileged instruction fault", /* 1 T_PRIVINFLT */ "", /* 2 unused */ "breakpoint instruction fault", /* 3 T_BPTFLT */ "", /* 4 unused */ "", /* 5 unused */ "arithmetic trap", /* 6 T_ARITHTRAP */ "", /* 7 unused */ "", /* 8 unused */ "general protection fault", /* 9 T_PROTFLT */ "debug exception", /* 10 T_TRCTRAP */ "", /* 11 unused */ "page fault", /* 12 T_PAGEFLT */ "", /* 13 unused */ "alignment fault", /* 14 T_ALIGNFLT */ "", /* 15 unused */ "", /* 16 unused */ "", /* 17 unused */ "integer divide fault", /* 18 T_DIVIDE */ "non-maskable interrupt trap", /* 19 T_NMI */ "overflow trap", /* 20 T_OFLOW */ "FPU bounds check fault", /* 21 T_BOUND */ "FPU device not available", /* 22 T_DNA */ "double fault", /* 23 T_DOUBLEFLT */ "FPU operand fetch fault", /* 24 T_FPOPFLT */ "invalid TSS fault", /* 25 T_TSSFLT */ "segment not present fault", /* 26 T_SEGNPFLT */ "stack fault", /* 27 T_STKFLT */ "machine check trap", /* 28 T_MCHK */ "SIMD floating-point exception", /* 29 T_XMMFLT */ "reserved (unknown) fault", /* 30 T_RESERVED */ "", /* 31 unused (reserved) */ "DTrace pid return trap", /* 32 T_DTRACE_RET */ }; static int prot_fault_translation; SYSCTL_INT(_machdep, OID_AUTO, prot_fault_translation, CTLFLAG_RWTUN, &prot_fault_translation, 0, "Select signal to deliver on protection fault"); static int uprintf_signal; SYSCTL_INT(_machdep, OID_AUTO, uprintf_signal, CTLFLAG_RWTUN, &uprintf_signal, 0, "Print debugging information on trap signal to ctty"); /* * Control L1D flush on return from NMI. * * Tunable can be set to the following values: * 0 - only enable flush on return from NMI if required by vmm.ko (default) * >1 - always flush on return from NMI. * * Post-boot, the sysctl indicates if flushing is currently enabled. */ int nmi_flush_l1d_sw; SYSCTL_INT(_machdep, OID_AUTO, nmi_flush_l1d_sw, CTLFLAG_RWTUN, &nmi_flush_l1d_sw, 0, "Flush L1 Data Cache on NMI exit, software bhyve L1TF mitigation assist"); /* * Exception, fault, and trap interface to the FreeBSD kernel. * This common code is called from assembly language IDT gate entry * routines that prepare a suitable stack frame, and restore this * frame after the exception has been processed. */ void trap(struct trapframe *frame) { ksiginfo_t ksi; struct thread *td; struct proc *p; register_t addr, dr6; int signo, ucode; u_int type; td = curthread; p = td->td_proc; signo = 0; ucode = 0; addr = 0; dr6 = 0; VM_CNT_INC(v_trap); type = frame->tf_trapno; #ifdef SMP /* Handler for NMI IPIs used for stopping CPUs. */ if (type == T_NMI && ipi_nmi_handler() == 0) return; #endif #ifdef KDB if (kdb_active) { kdb_reenter(); return; } #endif if (type == T_RESERVED) { trap_fatal(frame, 0); return; } if (type == T_NMI) { #ifdef HWPMC_HOOKS /* * CPU PMCs interrupt using an NMI. If the PMC module is * active, pass the 'rip' value to the PMC module's interrupt * handler. A non-zero return value from the handler means that * the NMI was consumed by it and we can return immediately. */ if (pmc_intr != NULL && (*pmc_intr)(frame) != 0) return; #endif #ifdef STACK if (stack_nmi_handler(frame) != 0) return; #endif } if ((frame->tf_rflags & PSL_I) == 0) { /* * Buggy application or kernel code has disabled * interrupts and then trapped. Enabling interrupts * now is wrong, but it is better than running with * interrupts disabled until they are accidentally * enabled later. */ if (TRAPF_USERMODE(frame)) uprintf( "pid %ld (%s): trap %d with interrupts disabled\n", (long)curproc->p_pid, curthread->td_name, type); else if (type != T_NMI && type != T_BPTFLT && type != T_TRCTRAP) { /* * XXX not quite right, since this may be for a * multiple fault in user mode. */ printf("kernel trap %d with interrupts disabled\n", type); /* * We shouldn't enable interrupts while holding a * spin lock. */ if (td->td_md.md_spinlock_count == 0) enable_intr(); } } if (TRAPF_USERMODE(frame)) { /* user trap */ td->td_pticks = 0; td->td_frame = frame; addr = frame->tf_rip; if (td->td_cowgen != p->p_cowgen) thread_cow_update(td); switch (type) { case T_PRIVINFLT: /* privileged instruction fault */ signo = SIGILL; ucode = ILL_PRVOPC; break; case T_BPTFLT: /* bpt instruction fault */ - enable_intr(); #ifdef KDTRACE_HOOKS - if (dtrace_pid_probe_ptr != NULL && - dtrace_pid_probe_ptr(frame) == 0) + if (trap_user_dtrace(frame, &dtrace_pid_probe_ptr)) return; +#else + enable_intr(); #endif signo = SIGTRAP; ucode = TRAP_BRKPT; break; case T_TRCTRAP: /* debug exception */ enable_intr(); signo = SIGTRAP; ucode = TRAP_TRACE; dr6 = rdr6(); if ((dr6 & DBREG_DR6_BS) != 0) { PROC_LOCK(td->td_proc); if ((td->td_dbgflags & TDB_STEP) != 0) { td->td_frame->tf_rflags &= ~PSL_T; td->td_dbgflags &= ~TDB_STEP; } PROC_UNLOCK(td->td_proc); } break; case T_ARITHTRAP: /* arithmetic trap */ ucode = fputrap_x87(); if (ucode == -1) return; signo = SIGFPE; break; case T_PROTFLT: /* general protection fault */ signo = SIGBUS; ucode = BUS_OBJERR; break; case T_STKFLT: /* stack fault */ case T_SEGNPFLT: /* segment not present fault */ signo = SIGBUS; ucode = BUS_ADRERR; break; case T_TSSFLT: /* invalid TSS fault */ signo = SIGBUS; ucode = BUS_OBJERR; break; case T_ALIGNFLT: signo = SIGBUS; ucode = BUS_ADRALN; break; case T_DOUBLEFLT: /* double fault */ default: signo = SIGBUS; ucode = BUS_OBJERR; break; case T_PAGEFLT: /* page fault */ /* * Emulator can take care about this trap? */ if (*p->p_sysent->sv_trap != NULL && (*p->p_sysent->sv_trap)(td) == 0) return; addr = frame->tf_addr; signo = trap_pfault(frame, TRUE); if (signo == -1) return; if (signo == 0) goto userret; if (signo == SIGSEGV) { ucode = SEGV_MAPERR; } else if (prot_fault_translation == 0) { /* * Autodetect. This check also covers * the images without the ABI-tag ELF * note. */ if (SV_CURPROC_ABI() == SV_ABI_FREEBSD && p->p_osrel >= P_OSREL_SIGSEGV) { signo = SIGSEGV; ucode = SEGV_ACCERR; } else { signo = SIGBUS; ucode = T_PAGEFLT; } } else if (prot_fault_translation == 1) { /* * Always compat mode. */ signo = SIGBUS; ucode = T_PAGEFLT; } else { /* * Always SIGSEGV mode. */ signo = SIGSEGV; ucode = SEGV_ACCERR; } break; case T_DIVIDE: /* integer divide fault */ ucode = FPE_INTDIV; signo = SIGFPE; break; #ifdef DEV_ISA case T_NMI: nmi_handle_intr(type, frame); return; #endif case T_OFLOW: /* integer overflow fault */ ucode = FPE_INTOVF; signo = SIGFPE; break; case T_BOUND: /* bounds check fault */ ucode = FPE_FLTSUB; signo = SIGFPE; break; case T_DNA: /* transparent fault (due to context switch "late") */ KASSERT(PCB_USER_FPU(td->td_pcb), ("kernel FPU ctx has leaked")); fpudna(); return; case T_FPOPFLT: /* FPU operand fetch fault */ ucode = ILL_COPROC; signo = SIGILL; break; case T_XMMFLT: /* SIMD floating-point exception */ ucode = fputrap_sse(); if (ucode == -1) return; signo = SIGFPE; break; #ifdef KDTRACE_HOOKS case T_DTRACE_RET: - enable_intr(); - if (dtrace_return_probe_ptr != NULL) - dtrace_return_probe_ptr(frame); + (void)trap_user_dtrace(frame, &dtrace_return_probe_ptr); return; #endif } } else { /* kernel trap */ KASSERT(cold || td->td_ucred != NULL, ("kernel trap doesn't have ucred")); switch (type) { case T_PAGEFLT: /* page fault */ (void) trap_pfault(frame, FALSE); return; case T_DNA: if (PCB_USER_FPU(td->td_pcb)) panic("Unregistered use of FPU in kernel"); fpudna(); return; case T_ARITHTRAP: /* arithmetic trap */ case T_XMMFLT: /* SIMD floating-point exception */ case T_FPOPFLT: /* FPU operand fetch fault */ /* * For now, supporting kernel handler * registration for FPU traps is overkill. */ trap_fatal(frame, 0); return; case T_STKFLT: /* stack fault */ case T_PROTFLT: /* general protection fault */ case T_SEGNPFLT: /* segment not present fault */ if (td->td_intr_nesting_level != 0) break; /* * Invalid segment selectors and out of bounds * %rip's and %rsp's can be set up in user mode. * This causes a fault in kernel mode when the * kernel tries to return to user mode. We want * to get this fault so that we can fix the * problem here and not have to check all the * selectors and pointers when the user changes * them. * * In case of PTI, the IRETQ faulted while the * kernel used the pti stack, and exception * frame records %rsp value pointing to that * stack. If we return normally to * doreti_iret_fault, the trapframe is * reconstructed on pti stack, and calltrap() * called on it as well. Due to the very * limited pti stack size, kernel does not * survive for too long. Switch to the normal * thread stack for the trap handling. * * Magic '5' is the number of qwords occupied by * the hardware trap frame. */ if (frame->tf_rip == (long)doreti_iret) { frame->tf_rip = (long)doreti_iret_fault; if ((PCPU_GET(curpmap)->pm_ucr3 != PMAP_NO_CR3) && (frame->tf_rsp == (uintptr_t)PCPU_GET( pti_rsp0) - 5 * sizeof(register_t))) { frame->tf_rsp = PCPU_GET(rsp0) - 5 * sizeof(register_t); } return; } if (frame->tf_rip == (long)ld_ds) { frame->tf_rip = (long)ds_load_fault; return; } if (frame->tf_rip == (long)ld_es) { frame->tf_rip = (long)es_load_fault; return; } if (frame->tf_rip == (long)ld_fs) { frame->tf_rip = (long)fs_load_fault; return; } if (frame->tf_rip == (long)ld_gs) { frame->tf_rip = (long)gs_load_fault; return; } if (frame->tf_rip == (long)ld_gsbase) { frame->tf_rip = (long)gsbase_load_fault; return; } if (frame->tf_rip == (long)ld_fsbase) { frame->tf_rip = (long)fsbase_load_fault; return; } if (curpcb->pcb_onfault != NULL) { frame->tf_rip = (long)curpcb->pcb_onfault; return; } break; case T_TSSFLT: /* * PSL_NT can be set in user mode and isn't cleared * automatically when the kernel is entered. This * causes a TSS fault when the kernel attempts to * `iret' because the TSS link is uninitialized. We * want to get this fault so that we can fix the * problem here and not every time the kernel is * entered. */ if (frame->tf_rflags & PSL_NT) { frame->tf_rflags &= ~PSL_NT; return; } break; case T_TRCTRAP: /* debug exception */ /* Clear any pending debug events. */ dr6 = rdr6(); load_dr6(0); /* * Ignore debug register exceptions due to * accesses in the user's address space, which * can happen under several conditions such as * if a user sets a watchpoint on a buffer and * then passes that buffer to a system call. * We still want to get TRCTRAPS for addresses * in kernel space because that is useful when * debugging the kernel. */ if (user_dbreg_trap(dr6)) return; /* * Malicious user code can configure a debug * register watchpoint to trap on data access * to the top of stack and then execute 'pop * %ss; int 3'. Due to exception deferral for * 'pop %ss', the CPU will not interrupt 'int * 3' to raise the DB# exception for the debug * register but will postpone the DB# until * execution of the first instruction of the * BP# handler (in kernel mode). Normally the * previous check would ignore DB# exceptions * for watchpoints on user addresses raised in * kernel mode. However, some CPU errata * include cases where DB# exceptions do not * properly set bits in %dr6, e.g. Haswell * HSD23 and Skylake-X SKZ24. * * A deferred DB# can also be raised on the * first instructions of system call entry * points or single-step traps via similar use * of 'pop %ss' or 'mov xxx, %ss'. */ if (pti) { if (frame->tf_rip == (uintptr_t)IDTVEC(fast_syscall_pti) || #ifdef COMPAT_FREEBSD32 frame->tf_rip == (uintptr_t)IDTVEC(int0x80_syscall_pti) || #endif frame->tf_rip == (uintptr_t)IDTVEC(bpt_pti)) return; } else { if (frame->tf_rip == (uintptr_t)IDTVEC(fast_syscall) || #ifdef COMPAT_FREEBSD32 frame->tf_rip == (uintptr_t)IDTVEC(int0x80_syscall) || #endif frame->tf_rip == (uintptr_t)IDTVEC(bpt)) return; } if (frame->tf_rip == (uintptr_t)IDTVEC(dbg) || /* Needed for AMD. */ frame->tf_rip == (uintptr_t)IDTVEC(fast_syscall32)) return; /* * FALLTHROUGH (TRCTRAP kernel mode, kernel address) */ case T_BPTFLT: /* * If KDB is enabled, let it handle the debugger trap. * Otherwise, debugger traps "can't happen". */ #ifdef KDB if (kdb_trap(type, dr6, frame)) return; #endif break; #ifdef DEV_ISA case T_NMI: nmi_handle_intr(type, frame); return; #endif } trap_fatal(frame, 0); return; } /* Translate fault for emulators (e.g. Linux) */ if (*p->p_sysent->sv_transtrap != NULL) signo = (*p->p_sysent->sv_transtrap)(signo, type); ksiginfo_init_trap(&ksi); ksi.ksi_signo = signo; ksi.ksi_code = ucode; ksi.ksi_trapno = type; ksi.ksi_addr = (void *)addr; if (uprintf_signal) { uprintf("pid %d comm %s: signal %d err %lx code %d type %d " "addr 0x%lx rsp 0x%lx rip 0x%lx " "<%02x %02x %02x %02x %02x %02x %02x %02x>\n", p->p_pid, p->p_comm, signo, frame->tf_err, ucode, type, addr, frame->tf_rsp, frame->tf_rip, fubyte((void *)(frame->tf_rip + 0)), fubyte((void *)(frame->tf_rip + 1)), fubyte((void *)(frame->tf_rip + 2)), fubyte((void *)(frame->tf_rip + 3)), fubyte((void *)(frame->tf_rip + 4)), fubyte((void *)(frame->tf_rip + 5)), fubyte((void *)(frame->tf_rip + 6)), fubyte((void *)(frame->tf_rip + 7))); } KASSERT((read_rflags() & PSL_I) != 0, ("interrupts disabled")); trapsignal(td, &ksi); userret: userret(td, frame); KASSERT(PCB_USER_FPU(td->td_pcb), ("Return from trap with kernel FPU ctx leaked")); } /* * Ensure that we ignore any DTrace-induced faults. This function cannot * be instrumented, so it cannot generate such faults itself. */ void trap_check(struct trapframe *frame) { #ifdef KDTRACE_HOOKS if (dtrace_trap_func != NULL && (*dtrace_trap_func)(frame, frame->tf_trapno) != 0) return; #endif trap(frame); } static bool trap_is_smap(struct trapframe *frame) { /* * A page fault on a userspace address is classified as * SMAP-induced if: * - SMAP is supported; * - kernel mode accessed present data page; * - rflags.AC was cleared. * Kernel must never access user space with rflags.AC cleared * if SMAP is enabled. */ return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 && (frame->tf_err & (PGEX_P | PGEX_U | PGEX_I | PGEX_RSV)) == PGEX_P && (frame->tf_rflags & PSL_AC) == 0); } static bool trap_is_pti(struct trapframe *frame) { return (PCPU_GET(curpmap)->pm_ucr3 != PMAP_NO_CR3 && pg_nx != 0 && (frame->tf_err & (PGEX_P | PGEX_W | PGEX_U | PGEX_I)) == (PGEX_P | PGEX_U | PGEX_I) && (curpcb->pcb_saved_ucr3 & ~CR3_PCID_MASK) == (PCPU_GET(curpmap)->pm_cr3 & ~CR3_PCID_MASK)); } static int trap_pfault(struct trapframe *frame, int usermode) { struct thread *td; struct proc *p; vm_map_t map; vm_offset_t va; int rv; vm_prot_t ftype; vm_offset_t eva; td = curthread; p = td->td_proc; eva = frame->tf_addr; if (__predict_false((td->td_pflags & TDP_NOFAULTING) != 0)) { /* * Due to both processor errata and lazy TLB invalidation when * access restrictions are removed from virtual pages, memory * accesses that are allowed by the physical mapping layer may * nonetheless cause one spurious page fault per virtual page. * When the thread is executing a "no faulting" section that * is bracketed by vm_fault_{disable,enable}_pagefaults(), * every page fault is treated as a spurious page fault, * unless it accesses the same virtual address as the most * recent page fault within the same "no faulting" section. */ if (td->td_md.md_spurflt_addr != eva || (td->td_pflags & TDP_RESETSPUR) != 0) { /* * Do nothing to the TLB. A stale TLB entry is * flushed automatically by a page fault. */ td->td_md.md_spurflt_addr = eva; td->td_pflags &= ~TDP_RESETSPUR; return (0); } } else { /* * If we get a page fault while in a critical section, then * it is most likely a fatal kernel page fault. The kernel * is already going to panic trying to get a sleep lock to * do the VM lookup, so just consider it a fatal trap so the * kernel can print out a useful trap message and even get * to the debugger. * * If we get a page fault while holding a non-sleepable * lock, then it is most likely a fatal kernel page fault. * If WITNESS is enabled, then it's going to whine about * bogus LORs with various VM locks, so just skip to the * fatal trap handling directly. */ if (td->td_critnest != 0 || WITNESS_CHECK(WARN_SLEEPOK | WARN_GIANTOK, NULL, "Kernel page fault") != 0) { trap_fatal(frame, eva); return (-1); } } va = trunc_page(eva); if (va >= VM_MIN_KERNEL_ADDRESS) { /* * Don't allow user-mode faults in kernel address space. */ if (usermode) return (SIGSEGV); map = kernel_map; } else { map = &p->p_vmspace->vm_map; /* * When accessing a usermode address, kernel must be * ready to accept the page fault, and provide a * handling routine. Since accessing the address * without the handler is a bug, do not try to handle * it normally, and panic immediately. * * If SMAP is enabled, filter SMAP faults also, * because illegal access might occur to the mapped * user address, causing infinite loop. */ if (!usermode && (td->td_intr_nesting_level != 0 || trap_is_smap(frame) || curpcb->pcb_onfault == NULL)) { trap_fatal(frame, eva); return (-1); } } /* * If the trap was caused by errant bits in the PTE then panic. */ if (frame->tf_err & PGEX_RSV) { trap_fatal(frame, eva); return (-1); } /* * User-mode protection key violation (PKU). May happen * either from usermode or from kernel if copyin accessed * key-protected mapping. */ if ((frame->tf_err & PGEX_PK) != 0) { if (eva > VM_MAXUSER_ADDRESS) { trap_fatal(frame, eva); return (-1); } rv = KERN_PROTECTION_FAILURE; goto after_vmfault; } /* * If nx protection of the usermode portion of kernel page * tables caused trap, panic. */ if (usermode && trap_is_pti(frame)) panic("PTI: pid %d comm %s tf_err %#lx", p->p_pid, p->p_comm, frame->tf_err); /* * PGEX_I is defined only if the execute disable bit capability is * supported and enabled. */ if (frame->tf_err & PGEX_W) ftype = VM_PROT_WRITE; else if ((frame->tf_err & PGEX_I) && pg_nx != 0) ftype = VM_PROT_EXECUTE; else ftype = VM_PROT_READ; /* Fault in the page. */ rv = vm_fault(map, va, ftype, VM_FAULT_NORMAL); if (rv == KERN_SUCCESS) { #ifdef HWPMC_HOOKS if (ftype == VM_PROT_READ || ftype == VM_PROT_WRITE) { PMC_SOFT_CALL_TF( , , page_fault, all, frame); if (ftype == VM_PROT_READ) PMC_SOFT_CALL_TF( , , page_fault, read, frame); else PMC_SOFT_CALL_TF( , , page_fault, write, frame); } #endif return (0); } after_vmfault: if (!usermode) { if (td->td_intr_nesting_level == 0 && curpcb->pcb_onfault != NULL) { frame->tf_rip = (long)curpcb->pcb_onfault; return (0); } trap_fatal(frame, eva); return (-1); } return ((rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV); } static void trap_fatal(frame, eva) struct trapframe *frame; vm_offset_t eva; { int code, ss; u_int type; struct soft_segment_descriptor softseg; char *msg; #ifdef KDB bool handled; #endif code = frame->tf_err; type = frame->tf_trapno; sdtossd(&gdt[NGDT * PCPU_GET(cpuid) + IDXSEL(frame->tf_cs & 0xffff)], &softseg); if (type <= MAX_TRAP_MSG) msg = trap_msg[type]; else msg = "UNKNOWN"; printf("\n\nFatal trap %d: %s while in %s mode\n", type, msg, TRAPF_USERMODE(frame) ? "user" : "kernel"); #ifdef SMP /* two separate prints in case of a trap on an unmapped page */ printf("cpuid = %d; ", PCPU_GET(cpuid)); printf("apic id = %02x\n", PCPU_GET(apic_id)); #endif if (type == T_PAGEFLT) { printf("fault virtual address = 0x%lx\n", eva); printf("fault code = %s %s %s%s%s, %s\n", code & PGEX_U ? "user" : "supervisor", code & PGEX_W ? "write" : "read", code & PGEX_I ? "instruction" : "data", code & PGEX_PK ? " prot key" : "", code & PGEX_SGX ? " SGX" : "", code & PGEX_RSV ? "reserved bits in PTE" : code & PGEX_P ? "protection violation" : "page not present"); } printf("instruction pointer = 0x%lx:0x%lx\n", frame->tf_cs & 0xffff, frame->tf_rip); ss = frame->tf_ss & 0xffff; printf("stack pointer = 0x%x:0x%lx\n", ss, frame->tf_rsp); printf("frame pointer = 0x%x:0x%lx\n", ss, frame->tf_rbp); printf("code segment = base 0x%lx, limit 0x%lx, type 0x%x\n", softseg.ssd_base, softseg.ssd_limit, softseg.ssd_type); printf(" = DPL %d, pres %d, long %d, def32 %d, gran %d\n", softseg.ssd_dpl, softseg.ssd_p, softseg.ssd_long, softseg.ssd_def32, softseg.ssd_gran); printf("processor eflags = "); if (frame->tf_rflags & PSL_T) printf("trace trap, "); if (frame->tf_rflags & PSL_I) printf("interrupt enabled, "); if (frame->tf_rflags & PSL_NT) printf("nested task, "); if (frame->tf_rflags & PSL_RF) printf("resume, "); printf("IOPL = %ld\n", (frame->tf_rflags & PSL_IOPL) >> 12); printf("current process = %d (%s)\n", curproc->p_pid, curthread->td_name); #ifdef KDB if (debugger_on_trap) { kdb_why = KDB_WHY_TRAP; handled = kdb_trap(type, 0, frame); kdb_why = KDB_WHY_UNSET; if (handled) return; } #endif printf("trap number = %d\n", type); if (type <= MAX_TRAP_MSG) panic("%s", trap_msg[type]); else panic("unknown/reserved trap"); } + +#ifdef KDTRACE_HOOKS +/* + * Invoke a userspace DTrace hook. The hook pointer is cleared when no + * userspace probes are enabled, so we must synchronize with DTrace to ensure + * that a trapping thread is able to call the hook before it is cleared. + */ +static bool +trap_user_dtrace(struct trapframe *frame, int (**hookp)(struct trapframe *)) +{ + int (*hook)(struct trapframe *); + + hook = (int (*)(struct trapframe *))atomic_load_ptr(hookp); + enable_intr(); + if (hook != NULL) + return ((hook)(frame) == 0); + return (false); +} +#endif /* * Double fault handler. Called when a fault occurs while writing * a frame for a trap/exception onto the stack. This usually occurs * when the stack overflows (such is the case with infinite recursion, * for example). */ void dblfault_handler(struct trapframe *frame) { #ifdef KDTRACE_HOOKS if (dtrace_doubletrap_func != NULL) (*dtrace_doubletrap_func)(); #endif printf("\nFatal double fault\n" "rip %#lx rsp %#lx rbp %#lx\n" "rax %#lx rdx %#lx rbx %#lx\n" "rcx %#lx rsi %#lx rdi %#lx\n" "r8 %#lx r9 %#lx r10 %#lx\n" "r11 %#lx r12 %#lx r13 %#lx\n" "r14 %#lx r15 %#lx rflags %#lx\n" "cs %#lx ss %#lx ds %#hx es %#hx fs %#hx gs %#hx\n" "fsbase %#lx gsbase %#lx kgsbase %#lx\n", frame->tf_rip, frame->tf_rsp, frame->tf_rbp, frame->tf_rax, frame->tf_rdx, frame->tf_rbx, frame->tf_rcx, frame->tf_rdi, frame->tf_rsi, frame->tf_r8, frame->tf_r9, frame->tf_r10, frame->tf_r11, frame->tf_r12, frame->tf_r13, frame->tf_r14, frame->tf_r15, frame->tf_rflags, frame->tf_cs, frame->tf_ss, frame->tf_ds, frame->tf_es, frame->tf_fs, frame->tf_gs, rdmsr(MSR_FSBASE), rdmsr(MSR_GSBASE), rdmsr(MSR_KGSBASE)); #ifdef SMP /* two separate prints in case of a trap on an unmapped page */ printf("cpuid = %d; ", PCPU_GET(cpuid)); printf("apic id = %02x\n", PCPU_GET(apic_id)); #endif panic("double fault"); } static int __noinline cpu_fetch_syscall_args_fallback(struct thread *td, struct syscall_args *sa) { struct proc *p; struct trapframe *frame; register_t *argp; caddr_t params; int reg, regcnt, error; p = td->td_proc; frame = td->td_frame; reg = 0; regcnt = NARGREGS; sa->code = frame->tf_rax; if (sa->code == SYS_syscall || sa->code == SYS___syscall) { sa->code = frame->tf_rdi; reg++; regcnt--; } if (p->p_sysent->sv_mask) sa->code &= p->p_sysent->sv_mask; if (sa->code >= p->p_sysent->sv_size) sa->callp = &p->p_sysent->sv_table[0]; else sa->callp = &p->p_sysent->sv_table[sa->code]; sa->narg = sa->callp->sy_narg; KASSERT(sa->narg <= nitems(sa->args), ("Too many syscall arguments!")); argp = &frame->tf_rdi; argp += reg; memcpy(sa->args, argp, sizeof(sa->args[0]) * NARGREGS); if (sa->narg > regcnt) { params = (caddr_t)frame->tf_rsp + sizeof(register_t); error = copyin(params, &sa->args[regcnt], (sa->narg - regcnt) * sizeof(sa->args[0])); if (__predict_false(error != 0)) return (error); } td->td_retval[0] = 0; td->td_retval[1] = frame->tf_rdx; return (0); } int cpu_fetch_syscall_args(struct thread *td) { struct proc *p; struct trapframe *frame; struct syscall_args *sa; p = td->td_proc; frame = td->td_frame; sa = &td->td_sa; sa->code = frame->tf_rax; if (__predict_false(sa->code == SYS_syscall || sa->code == SYS___syscall || sa->code >= p->p_sysent->sv_size)) return (cpu_fetch_syscall_args_fallback(td, sa)); sa->callp = &p->p_sysent->sv_table[sa->code]; sa->narg = sa->callp->sy_narg; KASSERT(sa->narg <= nitems(sa->args), ("Too many syscall arguments!")); if (p->p_sysent->sv_mask) sa->code &= p->p_sysent->sv_mask; if (__predict_false(sa->narg > NARGREGS)) return (cpu_fetch_syscall_args_fallback(td, sa)); memcpy(sa->args, &frame->tf_rdi, sizeof(sa->args[0]) * NARGREGS); td->td_retval[0] = 0; td->td_retval[1] = frame->tf_rdx; return (0); } #include "../../kern/subr_syscall.c" static void (*syscall_ret_l1d_flush)(void); int syscall_ret_l1d_flush_mode; static void flush_l1d_hw(void) { wrmsr(MSR_IA32_FLUSH_CMD, IA32_FLUSH_CMD_L1D); } static void __inline amd64_syscall_ret_flush_l1d_inline(int error) { void (*p)(void); if (error != 0 && error != EEXIST && error != EAGAIN && error != EXDEV && error != ENOENT && error != ENOTCONN && error != EINPROGRESS) { p = syscall_ret_l1d_flush; if (p != NULL) p(); } } void amd64_syscall_ret_flush_l1d(int error) { amd64_syscall_ret_flush_l1d_inline(error); } void amd64_syscall_ret_flush_l1d_recalc(void) { bool l1d_hw; l1d_hw = (cpu_stdext_feature3 & CPUID_STDEXT3_L1D_FLUSH) != 0; again: switch (syscall_ret_l1d_flush_mode) { case 0: syscall_ret_l1d_flush = NULL; break; case 1: syscall_ret_l1d_flush = l1d_hw ? flush_l1d_hw : flush_l1d_sw_abi; break; case 2: syscall_ret_l1d_flush = l1d_hw ? flush_l1d_hw : NULL; break; case 3: syscall_ret_l1d_flush = flush_l1d_sw_abi; break; default: syscall_ret_l1d_flush_mode = 1; goto again; } } static int machdep_syscall_ret_flush_l1d(SYSCTL_HANDLER_ARGS) { int error, val; val = syscall_ret_l1d_flush_mode; error = sysctl_handle_int(oidp, &val, 0, req); if (error != 0 || req->newptr == NULL) return (error); syscall_ret_l1d_flush_mode = val; amd64_syscall_ret_flush_l1d_recalc(); return (0); } SYSCTL_PROC(_machdep, OID_AUTO, syscall_ret_flush_l1d, CTLTYPE_INT | CTLFLAG_RWTUN | CTLFLAG_NOFETCH | CTLFLAG_MPSAFE, NULL, 0, machdep_syscall_ret_flush_l1d, "I", "Flush L1D on syscall return with error (0 - off, 1 - on, " "2 - use hw only, 3 - use sw only"); /* * System call handler for native binaries. The trap frame is already * set up by the assembler trampoline and a pointer to it is saved in * td_frame. */ void amd64_syscall(struct thread *td, int traced) { int error; ksiginfo_t ksi; #ifdef DIAGNOSTIC if (!TRAPF_USERMODE(td->td_frame)) { panic("syscall"); /* NOT REACHED */ } #endif error = syscallenter(td); /* * Traced syscall. */ if (__predict_false(traced)) { td->td_frame->tf_rflags &= ~PSL_T; ksiginfo_init_trap(&ksi); ksi.ksi_signo = SIGTRAP; ksi.ksi_code = TRAP_TRACE; ksi.ksi_addr = (void *)td->td_frame->tf_rip; trapsignal(td, &ksi); } KASSERT(PCB_USER_FPU(td->td_pcb), ("System call %s returning with kernel FPU ctx leaked", syscallname(td->td_proc, td->td_sa.code))); KASSERT(td->td_pcb->pcb_save == get_pcb_user_save_td(td), ("System call %s returning with mangled pcb_save", syscallname(td->td_proc, td->td_sa.code))); KASSERT(td->td_md.md_invl_gen.gen == 0, ("System call %s returning with leaked invl_gen %lu", syscallname(td->td_proc, td->td_sa.code), td->td_md.md_invl_gen.gen)); syscallret(td, error); /* * If the user-supplied value of %rip is not a canonical * address, then some CPUs will trigger a ring 0 #GP during * the sysret instruction. However, the fault handler would * execute in ring 0 with the user's %gs and %rsp which would * not be safe. Instead, use the full return path which * catches the problem safely. */ if (__predict_false(td->td_frame->tf_rip >= VM_MAXUSER_ADDRESS)) set_pcb_flags(td->td_pcb, PCB_FULL_IRET); amd64_syscall_ret_flush_l1d_inline(error); } Index: stable/12/sys/cddl/contrib/opensolaris/uts/common/dtrace/fasttrap.c =================================================================== --- stable/12/sys/cddl/contrib/opensolaris/uts/common/dtrace/fasttrap.c (revision 349015) +++ stable/12/sys/cddl/contrib/opensolaris/uts/common/dtrace/fasttrap.c (revision 349016) @@ -1,2678 +1,2664 @@ /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END * * Portions Copyright 2010 The FreeBSD Foundation * * $FreeBSD$ */ /* * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ /* * Copyright (c) 2015, Joyent, Inc. All rights reserved. */ #include #include #include #include #include #include #ifdef illumos #include #endif #include #include #include #ifdef illumos #include #endif #include #include #include #include #include #include #include #include #ifdef illumos #include #endif #include #include #ifndef illumos #include #include #include #include #include #include #include #include #include #include #include #include #endif /* * User-Land Trap-Based Tracing * ---------------------------- * * The fasttrap provider allows DTrace consumers to instrument any user-level * instruction to gather data; this includes probes with semantic * signifigance like entry and return as well as simple offsets into the * function. While the specific techniques used are very ISA specific, the * methodology is generalizable to any architecture. * * * The General Methodology * ----------------------- * * With the primary goal of tracing every user-land instruction and the * limitation that we can't trust user space so don't want to rely on much * information there, we begin by replacing the instructions we want to trace * with trap instructions. Each instruction we overwrite is saved into a hash * table keyed by process ID and pc address. When we enter the kernel due to * this trap instruction, we need the effects of the replaced instruction to * appear to have occurred before we proceed with the user thread's * execution. * * Each user level thread is represented by a ulwp_t structure which is * always easily accessible through a register. The most basic way to produce * the effects of the instruction we replaced is to copy that instruction out * to a bit of scratch space reserved in the user thread's ulwp_t structure * (a sort of kernel-private thread local storage), set the PC to that * scratch space and single step. When we reenter the kernel after single * stepping the instruction we must then adjust the PC to point to what would * normally be the next instruction. Of course, special care must be taken * for branches and jumps, but these represent such a small fraction of any * instruction set that writing the code to emulate these in the kernel is * not too difficult. * * Return probes may require several tracepoints to trace every return site, * and, conversely, each tracepoint may activate several probes (the entry * and offset 0 probes, for example). To solve this muliplexing problem, * tracepoints contain lists of probes to activate and probes contain lists * of tracepoints to enable. If a probe is activated, it adds its ID to * existing tracepoints or creates new ones as necessary. * * Most probes are activated _before_ the instruction is executed, but return * probes are activated _after_ the effects of the last instruction of the * function are visible. Return probes must be fired _after_ we have * single-stepped the instruction whereas all other probes are fired * beforehand. * * * Lock Ordering * ------------- * * The lock ordering below -- both internally and with respect to the DTrace * framework -- is a little tricky and bears some explanation. Each provider * has a lock (ftp_mtx) that protects its members including reference counts * for enabled probes (ftp_rcount), consumers actively creating probes * (ftp_ccount) and USDT consumers (ftp_mcount); all three prevent a provider * from being freed. A provider is looked up by taking the bucket lock for the * provider hash table, and is returned with its lock held. The provider lock * may be taken in functions invoked by the DTrace framework, but may not be * held while calling functions in the DTrace framework. * * To ensure consistency over multiple calls to the DTrace framework, the * creation lock (ftp_cmtx) should be held. Naturally, the creation lock may * not be taken when holding the provider lock as that would create a cyclic * lock ordering. In situations where one would naturally take the provider * lock and then the creation lock, we instead up a reference count to prevent * the provider from disappearing, drop the provider lock, and acquire the * creation lock. * * Briefly: * bucket lock before provider lock * DTrace before provider lock * creation lock before DTrace * never hold the provider lock and creation lock simultaneously */ static d_open_t fasttrap_open; static d_ioctl_t fasttrap_ioctl; static struct cdevsw fasttrap_cdevsw = { .d_version = D_VERSION, .d_open = fasttrap_open, .d_ioctl = fasttrap_ioctl, .d_name = "fasttrap", }; static struct cdev *fasttrap_cdev; static dtrace_meta_provider_id_t fasttrap_meta_id; static struct proc *fasttrap_cleanup_proc; static struct mtx fasttrap_cleanup_mtx; static uint_t fasttrap_cleanup_work, fasttrap_cleanup_drain, fasttrap_cleanup_cv; /* * Generation count on modifications to the global tracepoint lookup table. */ static volatile uint64_t fasttrap_mod_gen; /* * When the fasttrap provider is loaded, fasttrap_max is set to either * FASTTRAP_MAX_DEFAULT, or the value for fasttrap-max-probes in the * fasttrap.conf file (Illumos), or the value provied in the loader.conf (FreeBSD). * Each time a probe is created, fasttrap_total is incremented by the number * of tracepoints that may be associated with that probe; fasttrap_total is capped * at fasttrap_max. */ #define FASTTRAP_MAX_DEFAULT 250000 static uint32_t fasttrap_max = FASTTRAP_MAX_DEFAULT; static uint32_t fasttrap_total; /* * Copyright (c) 2011, Joyent, Inc. All rights reserved. */ #define FASTTRAP_TPOINTS_DEFAULT_SIZE 0x4000 #define FASTTRAP_PROVIDERS_DEFAULT_SIZE 0x100 #define FASTTRAP_PROCS_DEFAULT_SIZE 0x100 #define FASTTRAP_PID_NAME "pid" fasttrap_hash_t fasttrap_tpoints; static fasttrap_hash_t fasttrap_provs; static fasttrap_hash_t fasttrap_procs; static uint64_t fasttrap_pid_count; /* pid ref count */ static kmutex_t fasttrap_count_mtx; /* lock on ref count */ #define FASTTRAP_ENABLE_FAIL 1 #define FASTTRAP_ENABLE_PARTIAL 2 static int fasttrap_tracepoint_enable(proc_t *, fasttrap_probe_t *, uint_t); static void fasttrap_tracepoint_disable(proc_t *, fasttrap_probe_t *, uint_t); static fasttrap_provider_t *fasttrap_provider_lookup(pid_t, const char *, const dtrace_pattr_t *); static void fasttrap_provider_retire(pid_t, const char *, int); static void fasttrap_provider_free(fasttrap_provider_t *); static fasttrap_proc_t *fasttrap_proc_lookup(pid_t); static void fasttrap_proc_release(fasttrap_proc_t *); #ifndef illumos static void fasttrap_thread_dtor(void *, struct thread *); #endif #define FASTTRAP_PROVS_INDEX(pid, name) \ ((fasttrap_hash_str(name) + (pid)) & fasttrap_provs.fth_mask) #define FASTTRAP_PROCS_INDEX(pid) ((pid) & fasttrap_procs.fth_mask) #ifndef illumos struct rmlock fasttrap_tp_lock; static eventhandler_tag fasttrap_thread_dtor_tag; #endif static unsigned long tpoints_hash_size = FASTTRAP_TPOINTS_DEFAULT_SIZE; #ifdef __FreeBSD__ SYSCTL_DECL(_kern_dtrace); SYSCTL_NODE(_kern_dtrace, OID_AUTO, fasttrap, CTLFLAG_RD, 0, "DTrace fasttrap parameters"); SYSCTL_UINT(_kern_dtrace_fasttrap, OID_AUTO, max_probes, CTLFLAG_RWTUN, &fasttrap_max, FASTTRAP_MAX_DEFAULT, "Maximum number of fasttrap probes"); SYSCTL_ULONG(_kern_dtrace_fasttrap, OID_AUTO, tpoints_hash_size, CTLFLAG_RDTUN, &tpoints_hash_size, FASTTRAP_TPOINTS_DEFAULT_SIZE, "Size of the tracepoint hash table"); #endif static int fasttrap_highbit(ulong_t i) { int h = 1; if (i == 0) return (0); #ifdef _LP64 if (i & 0xffffffff00000000ul) { h += 32; i >>= 32; } #endif if (i & 0xffff0000) { h += 16; i >>= 16; } if (i & 0xff00) { h += 8; i >>= 8; } if (i & 0xf0) { h += 4; i >>= 4; } if (i & 0xc) { h += 2; i >>= 2; } if (i & 0x2) { h += 1; } return (h); } static uint_t fasttrap_hash_str(const char *p) { unsigned int g; uint_t hval = 0; while (*p) { hval = (hval << 4) + *p++; if ((g = (hval & 0xf0000000)) != 0) hval ^= g >> 24; hval &= ~g; } return (hval); } void fasttrap_sigtrap(proc_t *p, kthread_t *t, uintptr_t pc) { ksiginfo_t ksi; ksiginfo_init(&ksi); ksi.ksi_signo = SIGTRAP; ksi.ksi_code = TRAP_DTRACE; ksi.ksi_addr = (caddr_t)pc; PROC_LOCK(p); (void)tdsendsignal(p, t, SIGTRAP, &ksi); PROC_UNLOCK(p); } #ifndef illumos /* * Obtain a chunk of scratch space in the address space of the target process. */ fasttrap_scrspace_t * fasttrap_scraddr(struct thread *td, fasttrap_proc_t *fprc) { fasttrap_scrblock_t *scrblk; fasttrap_scrspace_t *scrspc; struct proc *p; vm_offset_t addr; int error, i; scrspc = NULL; if (td->t_dtrace_sscr != NULL) { /* If the thread already has scratch space, we're done. */ scrspc = (fasttrap_scrspace_t *)td->t_dtrace_sscr; return (scrspc); } p = td->td_proc; mutex_enter(&fprc->ftpc_mtx); if (LIST_EMPTY(&fprc->ftpc_fscr)) { /* * No scratch space is available, so we'll map a new scratch * space block into the traced process' address space. */ addr = 0; error = vm_map_find(&p->p_vmspace->vm_map, NULL, 0, &addr, FASTTRAP_SCRBLOCK_SIZE, 0, VMFS_ANY_SPACE, VM_PROT_ALL, VM_PROT_ALL, 0); if (error != KERN_SUCCESS) goto done; scrblk = malloc(sizeof(*scrblk), M_SOLARIS, M_WAITOK); scrblk->ftsb_addr = addr; LIST_INSERT_HEAD(&fprc->ftpc_scrblks, scrblk, ftsb_next); /* * Carve the block up into chunks and put them on the free list. */ for (i = 0; i < FASTTRAP_SCRBLOCK_SIZE / FASTTRAP_SCRSPACE_SIZE; i++) { scrspc = malloc(sizeof(*scrspc), M_SOLARIS, M_WAITOK); scrspc->ftss_addr = addr + i * FASTTRAP_SCRSPACE_SIZE; LIST_INSERT_HEAD(&fprc->ftpc_fscr, scrspc, ftss_next); } } /* * Take the first scratch chunk off the free list, put it on the * allocated list, and return its address. */ scrspc = LIST_FIRST(&fprc->ftpc_fscr); LIST_REMOVE(scrspc, ftss_next); LIST_INSERT_HEAD(&fprc->ftpc_ascr, scrspc, ftss_next); /* * This scratch space is reserved for use by td until the thread exits. */ td->t_dtrace_sscr = scrspc; done: mutex_exit(&fprc->ftpc_mtx); return (scrspc); } /* * Return any allocated per-thread scratch space chunks back to the process' * free list. */ static void fasttrap_thread_dtor(void *arg __unused, struct thread *td) { fasttrap_bucket_t *bucket; fasttrap_proc_t *fprc; fasttrap_scrspace_t *scrspc; pid_t pid; if (td->t_dtrace_sscr == NULL) return; pid = td->td_proc->p_pid; bucket = &fasttrap_procs.fth_table[FASTTRAP_PROCS_INDEX(pid)]; fprc = NULL; /* Look up the fasttrap process handle for this process. */ mutex_enter(&bucket->ftb_mtx); for (fprc = bucket->ftb_data; fprc != NULL; fprc = fprc->ftpc_next) { if (fprc->ftpc_pid == pid) { mutex_enter(&fprc->ftpc_mtx); mutex_exit(&bucket->ftb_mtx); break; } } if (fprc == NULL) { mutex_exit(&bucket->ftb_mtx); return; } scrspc = (fasttrap_scrspace_t *)td->t_dtrace_sscr; LIST_REMOVE(scrspc, ftss_next); LIST_INSERT_HEAD(&fprc->ftpc_fscr, scrspc, ftss_next); mutex_exit(&fprc->ftpc_mtx); } #endif /* * This function ensures that no threads are actively using the memory * associated with probes that were formerly live. */ static void fasttrap_mod_barrier(uint64_t gen) { int i; if (gen < fasttrap_mod_gen) return; fasttrap_mod_gen++; #ifdef illumos CPU_FOREACH(i) { mutex_enter(&fasttrap_cpuc_pid_lock[i]); mutex_exit(&fasttrap_cpuc_pid_lock[i]); } #else rm_wlock(&fasttrap_tp_lock); rm_wunlock(&fasttrap_tp_lock); #endif } /* * This function performs asynchronous cleanup of fasttrap providers. The * Solaris implementation of this mechanism use a timeout that's activated in * fasttrap_pid_cleanup(), but this doesn't work in FreeBSD: one may sleep while * holding the DTrace mutexes, but it is unsafe to sleep in a callout handler. * Thus we use a dedicated process to perform the cleanup when requested. */ /*ARGSUSED*/ static void fasttrap_pid_cleanup_cb(void *data) { fasttrap_provider_t **fpp, *fp; fasttrap_bucket_t *bucket; dtrace_provider_id_t provid; int i, later = 0, rval; mtx_lock(&fasttrap_cleanup_mtx); while (!fasttrap_cleanup_drain || later > 0) { fasttrap_cleanup_work = 0; mtx_unlock(&fasttrap_cleanup_mtx); later = 0; /* * Iterate over all the providers trying to remove the marked * ones. If a provider is marked but not retired, we just * have to take a crack at removing it -- it's no big deal if * we can't. */ for (i = 0; i < fasttrap_provs.fth_nent; i++) { bucket = &fasttrap_provs.fth_table[i]; mutex_enter(&bucket->ftb_mtx); fpp = (fasttrap_provider_t **)&bucket->ftb_data; while ((fp = *fpp) != NULL) { if (!fp->ftp_marked) { fpp = &fp->ftp_next; continue; } mutex_enter(&fp->ftp_mtx); /* * If this provider has consumers actively * creating probes (ftp_ccount) or is a USDT * provider (ftp_mcount), we can't unregister * or even condense. */ if (fp->ftp_ccount != 0 || fp->ftp_mcount != 0) { mutex_exit(&fp->ftp_mtx); fp->ftp_marked = 0; continue; } if (!fp->ftp_retired || fp->ftp_rcount != 0) fp->ftp_marked = 0; mutex_exit(&fp->ftp_mtx); /* * If we successfully unregister this * provider we can remove it from the hash * chain and free the memory. If our attempt * to unregister fails and this is a retired * provider, increment our flag to try again * pretty soon. If we've consumed more than * half of our total permitted number of * probes call dtrace_condense() to try to * clean out the unenabled probes. */ provid = fp->ftp_provid; if ((rval = dtrace_unregister(provid)) != 0) { if (fasttrap_total > fasttrap_max / 2) (void) dtrace_condense(provid); if (rval == EAGAIN) fp->ftp_marked = 1; later += fp->ftp_marked; fpp = &fp->ftp_next; } else { *fpp = fp->ftp_next; fasttrap_provider_free(fp); } } mutex_exit(&bucket->ftb_mtx); } mtx_lock(&fasttrap_cleanup_mtx); /* * If we were unable to retire a provider, try again after a * second. This situation can occur in certain circumstances * where providers cannot be unregistered even though they have * no probes enabled because of an execution of dtrace -l or * something similar. */ if (later > 0 || fasttrap_cleanup_work || fasttrap_cleanup_drain) { mtx_unlock(&fasttrap_cleanup_mtx); pause("ftclean", hz); mtx_lock(&fasttrap_cleanup_mtx); } else mtx_sleep(&fasttrap_cleanup_cv, &fasttrap_cleanup_mtx, 0, "ftcl", 0); } /* * Wake up the thread in fasttrap_unload() now that we're done. */ wakeup(&fasttrap_cleanup_drain); mtx_unlock(&fasttrap_cleanup_mtx); kthread_exit(); } /* * Activates the asynchronous cleanup mechanism. */ static void fasttrap_pid_cleanup(void) { mtx_lock(&fasttrap_cleanup_mtx); if (!fasttrap_cleanup_work) { fasttrap_cleanup_work = 1; wakeup(&fasttrap_cleanup_cv); } mtx_unlock(&fasttrap_cleanup_mtx); } /* * This is called from cfork() via dtrace_fasttrap_fork(). The child * process's address space is (roughly) a copy of the parent process's so * we have to remove all the instrumentation we had previously enabled in the * parent. */ static void fasttrap_fork(proc_t *p, proc_t *cp) { #ifndef illumos fasttrap_scrblock_t *scrblk; fasttrap_proc_t *fprc = NULL; #endif pid_t ppid = p->p_pid; int i; ASSERT(curproc == p); #ifdef illumos ASSERT(p->p_proc_flag & P_PR_LOCK); #else PROC_LOCK_ASSERT(p, MA_OWNED); #endif #ifdef illumos ASSERT(p->p_dtrace_count > 0); #else /* * This check is purposely here instead of in kern_fork.c because, * for legal resons, we cannot include the dtrace_cddl.h header * inside kern_fork.c and insert if-clause there. */ if (p->p_dtrace_count == 0 && p->p_dtrace_helpers == NULL) return; #endif ASSERT(cp->p_dtrace_count == 0); /* * This would be simpler and faster if we maintained per-process * hash tables of enabled tracepoints. It could, however, potentially * slow down execution of a tracepoint since we'd need to go * through two levels of indirection. In the future, we should * consider either maintaining per-process ancillary lists of * enabled tracepoints or hanging a pointer to a per-process hash * table of enabled tracepoints off the proc structure. */ /* * We don't have to worry about the child process disappearing * because we're in fork(). */ #ifdef illumos mtx_lock_spin(&cp->p_slock); sprlock_proc(cp); mtx_unlock_spin(&cp->p_slock); #else /* * fasttrap_tracepoint_remove() expects the child process to be * unlocked and the VM then expects curproc to be unlocked. */ _PHOLD(cp); PROC_UNLOCK(cp); PROC_UNLOCK(p); if (p->p_dtrace_count == 0) goto dup_helpers; #endif /* * Iterate over every tracepoint looking for ones that belong to the * parent process, and remove each from the child process. */ for (i = 0; i < fasttrap_tpoints.fth_nent; i++) { fasttrap_tracepoint_t *tp; fasttrap_bucket_t *bucket = &fasttrap_tpoints.fth_table[i]; mutex_enter(&bucket->ftb_mtx); for (tp = bucket->ftb_data; tp != NULL; tp = tp->ftt_next) { if (tp->ftt_pid == ppid && tp->ftt_proc->ftpc_acount != 0) { int ret = fasttrap_tracepoint_remove(cp, tp); ASSERT(ret == 0); /* * The count of active providers can only be * decremented (i.e. to zero) during exec, * exit, and removal of a meta provider so it * should be impossible to drop the count * mid-fork. */ ASSERT(tp->ftt_proc->ftpc_acount != 0); #ifndef illumos fprc = tp->ftt_proc; #endif } } mutex_exit(&bucket->ftb_mtx); #ifndef illumos /* * Unmap any scratch space inherited from the parent's address * space. */ if (fprc != NULL) { mutex_enter(&fprc->ftpc_mtx); LIST_FOREACH(scrblk, &fprc->ftpc_scrblks, ftsb_next) { vm_map_remove(&cp->p_vmspace->vm_map, scrblk->ftsb_addr, scrblk->ftsb_addr + FASTTRAP_SCRBLOCK_SIZE); } mutex_exit(&fprc->ftpc_mtx); } #endif } #ifdef illumos mutex_enter(&cp->p_lock); sprunlock(cp); #else dup_helpers: if (p->p_dtrace_helpers != NULL) dtrace_helpers_duplicate(p, cp); PROC_LOCK(p); PROC_LOCK(cp); _PRELE(cp); #endif } /* * This is called from proc_exit() or from exec_common() if p_dtrace_probes * is set on the proc structure to indicate that there is a pid provider * associated with this process. */ static void fasttrap_exec_exit(proc_t *p) { #ifndef illumos struct thread *td; #endif #ifdef illumos ASSERT(p == curproc); #else PROC_LOCK_ASSERT(p, MA_OWNED); _PHOLD(p); /* * Since struct threads may be recycled, we cannot rely on t_dtrace_sscr * fields to be zeroed by kdtrace_thread_ctor. Thus we must zero it * ourselves when a process exits. */ FOREACH_THREAD_IN_PROC(p, td) td->t_dtrace_sscr = NULL; PROC_UNLOCK(p); #endif /* * We clean up the pid provider for this process here; user-land * static probes are handled by the meta-provider remove entry point. */ fasttrap_provider_retire(p->p_pid, FASTTRAP_PID_NAME, 0); #ifndef illumos if (p->p_dtrace_helpers) dtrace_helpers_destroy(p); PROC_LOCK(p); _PRELE(p); #endif } /*ARGSUSED*/ static void fasttrap_pid_provide(void *arg, dtrace_probedesc_t *desc) { /* * There are no "default" pid probes. */ } static int fasttrap_tracepoint_enable(proc_t *p, fasttrap_probe_t *probe, uint_t index) { fasttrap_tracepoint_t *tp, *new_tp = NULL; fasttrap_bucket_t *bucket; fasttrap_id_t *id; pid_t pid; uintptr_t pc; ASSERT(index < probe->ftp_ntps); pid = probe->ftp_pid; pc = probe->ftp_tps[index].fit_tp->ftt_pc; id = &probe->ftp_tps[index].fit_id; ASSERT(probe->ftp_tps[index].fit_tp->ftt_pid == pid); #ifdef illumos ASSERT(!(p->p_flag & SVFORK)); #endif /* * Before we make any modifications, make sure we've imposed a barrier * on the generation in which this probe was last modified. */ fasttrap_mod_barrier(probe->ftp_gen); bucket = &fasttrap_tpoints.fth_table[FASTTRAP_TPOINTS_INDEX(pid, pc)]; /* * If the tracepoint has already been enabled, just add our id to the * list of interested probes. This may be our second time through * this path in which case we'll have constructed the tracepoint we'd * like to install. If we can't find a match, and have an allocated * tracepoint ready to go, enable that one now. * * A tracepoint whose process is defunct is also considered defunct. */ again: mutex_enter(&bucket->ftb_mtx); for (tp = bucket->ftb_data; tp != NULL; tp = tp->ftt_next) { /* * Note that it's safe to access the active count on the * associated proc structure because we know that at least one * provider (this one) will still be around throughout this * operation. */ if (tp->ftt_pid != pid || tp->ftt_pc != pc || tp->ftt_proc->ftpc_acount == 0) continue; /* * Now that we've found a matching tracepoint, it would be * a decent idea to confirm that the tracepoint is still * enabled and the trap instruction hasn't been overwritten. * Since this is a little hairy, we'll punt for now. */ /* * This can't be the first interested probe. We don't have * to worry about another thread being in the midst of * deleting this tracepoint (which would be the only valid * reason for a tracepoint to have no interested probes) * since we're holding P_PR_LOCK for this process. */ ASSERT(tp->ftt_ids != NULL || tp->ftt_retids != NULL); switch (id->fti_ptype) { case DTFTP_ENTRY: case DTFTP_OFFSETS: case DTFTP_IS_ENABLED: id->fti_next = tp->ftt_ids; membar_producer(); tp->ftt_ids = id; membar_producer(); break; case DTFTP_RETURN: case DTFTP_POST_OFFSETS: id->fti_next = tp->ftt_retids; membar_producer(); tp->ftt_retids = id; membar_producer(); break; default: ASSERT(0); } mutex_exit(&bucket->ftb_mtx); if (new_tp != NULL) { new_tp->ftt_ids = NULL; new_tp->ftt_retids = NULL; } return (0); } /* * If we have a good tracepoint ready to go, install it now while * we have the lock held and no one can screw with us. */ if (new_tp != NULL) { int rc = 0; new_tp->ftt_next = bucket->ftb_data; membar_producer(); bucket->ftb_data = new_tp; membar_producer(); mutex_exit(&bucket->ftb_mtx); /* * Activate the tracepoint in the ISA-specific manner. * If this fails, we need to report the failure, but * indicate that this tracepoint must still be disabled * by calling fasttrap_tracepoint_disable(). */ if (fasttrap_tracepoint_install(p, new_tp) != 0) rc = FASTTRAP_ENABLE_PARTIAL; /* * Increment the count of the number of tracepoints active in * the victim process. */ #ifdef illumos ASSERT(p->p_proc_flag & P_PR_LOCK); #endif p->p_dtrace_count++; return (rc); } mutex_exit(&bucket->ftb_mtx); /* * Initialize the tracepoint that's been preallocated with the probe. */ new_tp = probe->ftp_tps[index].fit_tp; ASSERT(new_tp->ftt_pid == pid); ASSERT(new_tp->ftt_pc == pc); ASSERT(new_tp->ftt_proc == probe->ftp_prov->ftp_proc); ASSERT(new_tp->ftt_ids == NULL); ASSERT(new_tp->ftt_retids == NULL); switch (id->fti_ptype) { case DTFTP_ENTRY: case DTFTP_OFFSETS: case DTFTP_IS_ENABLED: id->fti_next = NULL; new_tp->ftt_ids = id; break; case DTFTP_RETURN: case DTFTP_POST_OFFSETS: id->fti_next = NULL; new_tp->ftt_retids = id; break; default: ASSERT(0); } #ifdef __FreeBSD__ if (SV_PROC_FLAG(p, SV_LP64)) p->p_model = DATAMODEL_LP64; else p->p_model = DATAMODEL_ILP32; #endif /* * If the ISA-dependent initialization goes to plan, go back to the * beginning and try to install this freshly made tracepoint. */ if (fasttrap_tracepoint_init(p, new_tp, pc, id->fti_ptype) == 0) goto again; new_tp->ftt_ids = NULL; new_tp->ftt_retids = NULL; return (FASTTRAP_ENABLE_FAIL); } static void fasttrap_tracepoint_disable(proc_t *p, fasttrap_probe_t *probe, uint_t index) { fasttrap_bucket_t *bucket; fasttrap_provider_t *provider = probe->ftp_prov; fasttrap_tracepoint_t **pp, *tp; fasttrap_id_t *id, **idp = NULL; pid_t pid; uintptr_t pc; ASSERT(index < probe->ftp_ntps); pid = probe->ftp_pid; pc = probe->ftp_tps[index].fit_tp->ftt_pc; id = &probe->ftp_tps[index].fit_id; ASSERT(probe->ftp_tps[index].fit_tp->ftt_pid == pid); /* * Find the tracepoint and make sure that our id is one of the * ones registered with it. */ bucket = &fasttrap_tpoints.fth_table[FASTTRAP_TPOINTS_INDEX(pid, pc)]; mutex_enter(&bucket->ftb_mtx); for (tp = bucket->ftb_data; tp != NULL; tp = tp->ftt_next) { if (tp->ftt_pid == pid && tp->ftt_pc == pc && tp->ftt_proc == provider->ftp_proc) break; } /* * If we somehow lost this tracepoint, we're in a world of hurt. */ ASSERT(tp != NULL); switch (id->fti_ptype) { case DTFTP_ENTRY: case DTFTP_OFFSETS: case DTFTP_IS_ENABLED: ASSERT(tp->ftt_ids != NULL); idp = &tp->ftt_ids; break; case DTFTP_RETURN: case DTFTP_POST_OFFSETS: ASSERT(tp->ftt_retids != NULL); idp = &tp->ftt_retids; break; default: ASSERT(0); } while ((*idp)->fti_probe != probe) { idp = &(*idp)->fti_next; ASSERT(*idp != NULL); } id = *idp; *idp = id->fti_next; membar_producer(); ASSERT(id->fti_probe == probe); /* * If there are other registered enablings of this tracepoint, we're * all done, but if this was the last probe assocated with this * this tracepoint, we need to remove and free it. */ if (tp->ftt_ids != NULL || tp->ftt_retids != NULL) { /* * If the current probe's tracepoint is in use, swap it * for an unused tracepoint. */ if (tp == probe->ftp_tps[index].fit_tp) { fasttrap_probe_t *tmp_probe; fasttrap_tracepoint_t **tmp_tp; uint_t tmp_index; if (tp->ftt_ids != NULL) { tmp_probe = tp->ftt_ids->fti_probe; /* LINTED - alignment */ tmp_index = FASTTRAP_ID_INDEX(tp->ftt_ids); tmp_tp = &tmp_probe->ftp_tps[tmp_index].fit_tp; } else { tmp_probe = tp->ftt_retids->fti_probe; /* LINTED - alignment */ tmp_index = FASTTRAP_ID_INDEX(tp->ftt_retids); tmp_tp = &tmp_probe->ftp_tps[tmp_index].fit_tp; } ASSERT(*tmp_tp != NULL); ASSERT(*tmp_tp != probe->ftp_tps[index].fit_tp); ASSERT((*tmp_tp)->ftt_ids == NULL); ASSERT((*tmp_tp)->ftt_retids == NULL); probe->ftp_tps[index].fit_tp = *tmp_tp; *tmp_tp = tp; } mutex_exit(&bucket->ftb_mtx); /* * Tag the modified probe with the generation in which it was * changed. */ probe->ftp_gen = fasttrap_mod_gen; return; } mutex_exit(&bucket->ftb_mtx); /* * We can't safely remove the tracepoint from the set of active * tracepoints until we've actually removed the fasttrap instruction * from the process's text. We can, however, operate on this * tracepoint secure in the knowledge that no other thread is going to * be looking at it since we hold P_PR_LOCK on the process if it's * live or we hold the provider lock on the process if it's dead and * gone. */ /* * We only need to remove the actual instruction if we're looking * at an existing process */ if (p != NULL) { /* * If we fail to restore the instruction we need to kill * this process since it's in a completely unrecoverable * state. */ if (fasttrap_tracepoint_remove(p, tp) != 0) fasttrap_sigtrap(p, NULL, pc); /* * Decrement the count of the number of tracepoints active * in the victim process. */ #ifdef illumos ASSERT(p->p_proc_flag & P_PR_LOCK); #endif p->p_dtrace_count--; atomic_add_rel_64(&p->p_fasttrap_tp_gen, 1); } /* * Remove the probe from the hash table of active tracepoints. */ mutex_enter(&bucket->ftb_mtx); pp = (fasttrap_tracepoint_t **)&bucket->ftb_data; ASSERT(*pp != NULL); while (*pp != tp) { pp = &(*pp)->ftt_next; ASSERT(*pp != NULL); } *pp = tp->ftt_next; membar_producer(); mutex_exit(&bucket->ftb_mtx); /* * Tag the modified probe with the generation in which it was changed. */ probe->ftp_gen = fasttrap_mod_gen; } static void fasttrap_enable_callbacks(void) { /* * We don't have to play the rw lock game here because we're * providing something rather than taking something away -- * we can be sure that no threads have tried to follow this * function pointer yet. */ mutex_enter(&fasttrap_count_mtx); if (fasttrap_pid_count == 0) { ASSERT(dtrace_pid_probe_ptr == NULL); ASSERT(dtrace_return_probe_ptr == NULL); dtrace_pid_probe_ptr = &fasttrap_pid_probe; dtrace_return_probe_ptr = &fasttrap_return_probe; } ASSERT(dtrace_pid_probe_ptr == &fasttrap_pid_probe); ASSERT(dtrace_return_probe_ptr == &fasttrap_return_probe); fasttrap_pid_count++; mutex_exit(&fasttrap_count_mtx); } static void fasttrap_disable_callbacks(void) { -#ifdef illumos - ASSERT(MUTEX_HELD(&cpu_lock)); -#endif - - mutex_enter(&fasttrap_count_mtx); ASSERT(fasttrap_pid_count > 0); fasttrap_pid_count--; if (fasttrap_pid_count == 0) { -#ifdef illumos - cpu_t *cur, *cpu = CPU; - - for (cur = cpu->cpu_next_onln; cur != cpu; - cur = cur->cpu_next_onln) { - rw_enter(&cur->cpu_ft_lock, RW_WRITER); - } -#endif + /* + * Synchronize with the breakpoint handler, which is careful to + * enable interrupts only after loading the hook pointer. + */ + dtrace_sync(); dtrace_pid_probe_ptr = NULL; dtrace_return_probe_ptr = NULL; -#ifdef illumos - for (cur = cpu->cpu_next_onln; cur != cpu; - cur = cur->cpu_next_onln) { - rw_exit(&cur->cpu_ft_lock); - } -#endif } mutex_exit(&fasttrap_count_mtx); } /*ARGSUSED*/ static void fasttrap_pid_enable(void *arg, dtrace_id_t id, void *parg) { fasttrap_probe_t *probe = parg; proc_t *p = NULL; int i, rc; ASSERT(probe != NULL); ASSERT(!probe->ftp_enabled); ASSERT(id == probe->ftp_id); #ifdef illumos ASSERT(MUTEX_HELD(&cpu_lock)); #endif /* * Increment the count of enabled probes on this probe's provider; * the provider can't go away while the probe still exists. We * must increment this even if we aren't able to properly enable * this probe. */ mutex_enter(&probe->ftp_prov->ftp_mtx); probe->ftp_prov->ftp_rcount++; mutex_exit(&probe->ftp_prov->ftp_mtx); /* * If this probe's provider is retired (meaning it was valid in a * previously exec'ed incarnation of this address space), bail out. The * provider can't go away while we're in this code path. */ if (probe->ftp_prov->ftp_retired) return; /* * If we can't find the process, it may be that we're in the context of * a fork in which the traced process is being born and we're copying * USDT probes. Otherwise, the process is gone so bail. */ #ifdef illumos if ((p = sprlock(probe->ftp_pid)) == NULL) { if ((curproc->p_flag & SFORKING) == 0) return; mutex_enter(&pidlock); p = prfind(probe->ftp_pid); if (p == NULL) { /* * So it's not that the target process is being born, * it's that it isn't there at all (and we simply * happen to be forking). Anyway, we know that the * target is definitely gone, so bail out. */ mutex_exit(&pidlock); return (0); } /* * Confirm that curproc is indeed forking the process in which * we're trying to enable probes. */ ASSERT(p->p_parent == curproc); ASSERT(p->p_stat == SIDL); mutex_enter(&p->p_lock); mutex_exit(&pidlock); sprlock_proc(p); } ASSERT(!(p->p_flag & SVFORK)); mutex_exit(&p->p_lock); #else if (pget(probe->ftp_pid, PGET_HOLD | PGET_NOTWEXIT, &p) != 0) return; #endif /* * We have to enable the trap entry point before any user threads have * the chance to execute the trap instruction we're about to place * in their process's text. */ fasttrap_enable_callbacks(); /* * Enable all the tracepoints and add this probe's id to each * tracepoint's list of active probes. */ for (i = 0; i < probe->ftp_ntps; i++) { if ((rc = fasttrap_tracepoint_enable(p, probe, i)) != 0) { /* * If enabling the tracepoint failed completely, * we don't have to disable it; if the failure * was only partial we must disable it. */ if (rc == FASTTRAP_ENABLE_FAIL) i--; else ASSERT(rc == FASTTRAP_ENABLE_PARTIAL); /* * Back up and pull out all the tracepoints we've * created so far for this probe. */ while (i >= 0) { fasttrap_tracepoint_disable(p, probe, i); i--; } #ifdef illumos mutex_enter(&p->p_lock); sprunlock(p); #else PRELE(p); #endif /* * Since we're not actually enabling this probe, * drop our reference on the trap table entry. */ fasttrap_disable_callbacks(); return; } } #ifdef illumos mutex_enter(&p->p_lock); sprunlock(p); #else PRELE(p); #endif probe->ftp_enabled = 1; } /*ARGSUSED*/ static void fasttrap_pid_disable(void *arg, dtrace_id_t id, void *parg) { fasttrap_probe_t *probe = parg; fasttrap_provider_t *provider = probe->ftp_prov; proc_t *p; int i, whack = 0; ASSERT(id == probe->ftp_id); mutex_enter(&provider->ftp_mtx); /* * We won't be able to acquire a /proc-esque lock on the process * iff the process is dead and gone. In this case, we rely on the * provider lock as a point of mutual exclusion to prevent other * DTrace consumers from disabling this probe. */ if (pget(probe->ftp_pid, PGET_HOLD | PGET_NOTWEXIT, &p) != 0) p = NULL; /* * Disable all the associated tracepoints (for fully enabled probes). */ if (probe->ftp_enabled) { for (i = 0; i < probe->ftp_ntps; i++) { fasttrap_tracepoint_disable(p, probe, i); } } ASSERT(provider->ftp_rcount > 0); provider->ftp_rcount--; if (p != NULL) { /* * Even though we may not be able to remove it entirely, we * mark this retired provider to get a chance to remove some * of the associated probes. */ if (provider->ftp_retired && !provider->ftp_marked) whack = provider->ftp_marked = 1; mutex_exit(&provider->ftp_mtx); } else { /* * If the process is dead, we're just waiting for the * last probe to be disabled to be able to free it. */ if (provider->ftp_rcount == 0 && !provider->ftp_marked) whack = provider->ftp_marked = 1; mutex_exit(&provider->ftp_mtx); } if (whack) fasttrap_pid_cleanup(); #ifdef __FreeBSD__ if (p != NULL) PRELE(p); #endif if (!probe->ftp_enabled) return; probe->ftp_enabled = 0; #ifdef illumos ASSERT(MUTEX_HELD(&cpu_lock)); #endif fasttrap_disable_callbacks(); } /*ARGSUSED*/ static void fasttrap_pid_getargdesc(void *arg, dtrace_id_t id, void *parg, dtrace_argdesc_t *desc) { fasttrap_probe_t *probe = parg; char *str; int i, ndx; desc->dtargd_native[0] = '\0'; desc->dtargd_xlate[0] = '\0'; if (probe->ftp_prov->ftp_retired != 0 || desc->dtargd_ndx >= probe->ftp_nargs) { desc->dtargd_ndx = DTRACE_ARGNONE; return; } ndx = (probe->ftp_argmap != NULL) ? probe->ftp_argmap[desc->dtargd_ndx] : desc->dtargd_ndx; str = probe->ftp_ntypes; for (i = 0; i < ndx; i++) { str += strlen(str) + 1; } ASSERT(strlen(str + 1) < sizeof (desc->dtargd_native)); (void) strcpy(desc->dtargd_native, str); if (probe->ftp_xtypes == NULL) return; str = probe->ftp_xtypes; for (i = 0; i < desc->dtargd_ndx; i++) { str += strlen(str) + 1; } ASSERT(strlen(str + 1) < sizeof (desc->dtargd_xlate)); (void) strcpy(desc->dtargd_xlate, str); } /*ARGSUSED*/ static void fasttrap_pid_destroy(void *arg, dtrace_id_t id, void *parg) { fasttrap_probe_t *probe = parg; int i; size_t size; ASSERT(probe != NULL); ASSERT(!probe->ftp_enabled); ASSERT(fasttrap_total >= probe->ftp_ntps); atomic_add_32(&fasttrap_total, -probe->ftp_ntps); size = offsetof(fasttrap_probe_t, ftp_tps[probe->ftp_ntps]); if (probe->ftp_gen + 1 >= fasttrap_mod_gen) fasttrap_mod_barrier(probe->ftp_gen); for (i = 0; i < probe->ftp_ntps; i++) { kmem_free(probe->ftp_tps[i].fit_tp, sizeof (fasttrap_tracepoint_t)); } kmem_free(probe, size); } static const dtrace_pattr_t pid_attr = { { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_ISA }, { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN }, { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN }, { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_ISA }, { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN }, }; static dtrace_pops_t pid_pops = { .dtps_provide = fasttrap_pid_provide, .dtps_provide_module = NULL, .dtps_enable = fasttrap_pid_enable, .dtps_disable = fasttrap_pid_disable, .dtps_suspend = NULL, .dtps_resume = NULL, .dtps_getargdesc = fasttrap_pid_getargdesc, .dtps_getargval = fasttrap_pid_getarg, .dtps_usermode = NULL, .dtps_destroy = fasttrap_pid_destroy }; static dtrace_pops_t usdt_pops = { .dtps_provide = fasttrap_pid_provide, .dtps_provide_module = NULL, .dtps_enable = fasttrap_pid_enable, .dtps_disable = fasttrap_pid_disable, .dtps_suspend = NULL, .dtps_resume = NULL, .dtps_getargdesc = fasttrap_pid_getargdesc, .dtps_getargval = fasttrap_usdt_getarg, .dtps_usermode = NULL, .dtps_destroy = fasttrap_pid_destroy }; static fasttrap_proc_t * fasttrap_proc_lookup(pid_t pid) { fasttrap_bucket_t *bucket; fasttrap_proc_t *fprc, *new_fprc; bucket = &fasttrap_procs.fth_table[FASTTRAP_PROCS_INDEX(pid)]; mutex_enter(&bucket->ftb_mtx); for (fprc = bucket->ftb_data; fprc != NULL; fprc = fprc->ftpc_next) { if (fprc->ftpc_pid == pid && fprc->ftpc_acount != 0) { mutex_enter(&fprc->ftpc_mtx); mutex_exit(&bucket->ftb_mtx); fprc->ftpc_rcount++; atomic_inc_64(&fprc->ftpc_acount); ASSERT(fprc->ftpc_acount <= fprc->ftpc_rcount); mutex_exit(&fprc->ftpc_mtx); return (fprc); } } /* * Drop the bucket lock so we don't try to perform a sleeping * allocation under it. */ mutex_exit(&bucket->ftb_mtx); new_fprc = kmem_zalloc(sizeof (fasttrap_proc_t), KM_SLEEP); new_fprc->ftpc_pid = pid; new_fprc->ftpc_rcount = 1; new_fprc->ftpc_acount = 1; #ifndef illumos mutex_init(&new_fprc->ftpc_mtx, "fasttrap proc mtx", MUTEX_DEFAULT, NULL); #endif mutex_enter(&bucket->ftb_mtx); /* * Take another lap through the list to make sure a proc hasn't * been created for this pid while we weren't under the bucket lock. */ for (fprc = bucket->ftb_data; fprc != NULL; fprc = fprc->ftpc_next) { if (fprc->ftpc_pid == pid && fprc->ftpc_acount != 0) { mutex_enter(&fprc->ftpc_mtx); mutex_exit(&bucket->ftb_mtx); fprc->ftpc_rcount++; atomic_inc_64(&fprc->ftpc_acount); ASSERT(fprc->ftpc_acount <= fprc->ftpc_rcount); mutex_exit(&fprc->ftpc_mtx); kmem_free(new_fprc, sizeof (fasttrap_proc_t)); return (fprc); } } new_fprc->ftpc_next = bucket->ftb_data; bucket->ftb_data = new_fprc; mutex_exit(&bucket->ftb_mtx); return (new_fprc); } static void fasttrap_proc_release(fasttrap_proc_t *proc) { fasttrap_bucket_t *bucket; fasttrap_proc_t *fprc, **fprcp; pid_t pid = proc->ftpc_pid; #ifndef illumos fasttrap_scrblock_t *scrblk, *scrblktmp; fasttrap_scrspace_t *scrspc, *scrspctmp; struct proc *p; struct thread *td; #endif mutex_enter(&proc->ftpc_mtx); ASSERT(proc->ftpc_rcount != 0); ASSERT(proc->ftpc_acount <= proc->ftpc_rcount); if (--proc->ftpc_rcount != 0) { mutex_exit(&proc->ftpc_mtx); return; } #ifndef illumos /* * Free all structures used to manage per-thread scratch space. */ LIST_FOREACH_SAFE(scrblk, &proc->ftpc_scrblks, ftsb_next, scrblktmp) { LIST_REMOVE(scrblk, ftsb_next); free(scrblk, M_SOLARIS); } LIST_FOREACH_SAFE(scrspc, &proc->ftpc_fscr, ftss_next, scrspctmp) { LIST_REMOVE(scrspc, ftss_next); free(scrspc, M_SOLARIS); } LIST_FOREACH_SAFE(scrspc, &proc->ftpc_ascr, ftss_next, scrspctmp) { LIST_REMOVE(scrspc, ftss_next); free(scrspc, M_SOLARIS); } if ((p = pfind(pid)) != NULL) { FOREACH_THREAD_IN_PROC(p, td) td->t_dtrace_sscr = NULL; PROC_UNLOCK(p); } #endif mutex_exit(&proc->ftpc_mtx); /* * There should definitely be no live providers associated with this * process at this point. */ ASSERT(proc->ftpc_acount == 0); bucket = &fasttrap_procs.fth_table[FASTTRAP_PROCS_INDEX(pid)]; mutex_enter(&bucket->ftb_mtx); fprcp = (fasttrap_proc_t **)&bucket->ftb_data; while ((fprc = *fprcp) != NULL) { if (fprc == proc) break; fprcp = &fprc->ftpc_next; } /* * Something strange has happened if we can't find the proc. */ ASSERT(fprc != NULL); *fprcp = fprc->ftpc_next; mutex_exit(&bucket->ftb_mtx); kmem_free(fprc, sizeof (fasttrap_proc_t)); } /* * Lookup a fasttrap-managed provider based on its name and associated pid. * If the pattr argument is non-NULL, this function instantiates the provider * if it doesn't exist otherwise it returns NULL. The provider is returned * with its lock held. */ static fasttrap_provider_t * fasttrap_provider_lookup(pid_t pid, const char *name, const dtrace_pattr_t *pattr) { fasttrap_provider_t *fp, *new_fp = NULL; fasttrap_bucket_t *bucket; char provname[DTRACE_PROVNAMELEN]; proc_t *p; cred_t *cred; ASSERT(strlen(name) < sizeof (fp->ftp_name)); ASSERT(pattr != NULL); bucket = &fasttrap_provs.fth_table[FASTTRAP_PROVS_INDEX(pid, name)]; mutex_enter(&bucket->ftb_mtx); /* * Take a lap through the list and return the match if we find it. */ for (fp = bucket->ftb_data; fp != NULL; fp = fp->ftp_next) { if (fp->ftp_pid == pid && strcmp(fp->ftp_name, name) == 0 && !fp->ftp_retired) { mutex_enter(&fp->ftp_mtx); mutex_exit(&bucket->ftb_mtx); return (fp); } } /* * Drop the bucket lock so we don't try to perform a sleeping * allocation under it. */ mutex_exit(&bucket->ftb_mtx); /* * Make sure the process exists, isn't a child created as the result * of a vfork(2), and isn't a zombie (but may be in fork). */ if ((p = pfind(pid)) == NULL) return (NULL); /* * Increment p_dtrace_probes so that the process knows to inform us * when it exits or execs. fasttrap_provider_free() decrements this * when we're done with this provider. */ p->p_dtrace_probes++; /* * Grab the credentials for this process so we have * something to pass to dtrace_register(). */ PROC_LOCK_ASSERT(p, MA_OWNED); crhold(p->p_ucred); cred = p->p_ucred; PROC_UNLOCK(p); new_fp = kmem_zalloc(sizeof (fasttrap_provider_t), KM_SLEEP); new_fp->ftp_pid = pid; new_fp->ftp_proc = fasttrap_proc_lookup(pid); #ifndef illumos mutex_init(&new_fp->ftp_mtx, "provider mtx", MUTEX_DEFAULT, NULL); mutex_init(&new_fp->ftp_cmtx, "lock on creating", MUTEX_DEFAULT, NULL); #endif ASSERT(new_fp->ftp_proc != NULL); mutex_enter(&bucket->ftb_mtx); /* * Take another lap through the list to make sure a provider hasn't * been created for this pid while we weren't under the bucket lock. */ for (fp = bucket->ftb_data; fp != NULL; fp = fp->ftp_next) { if (fp->ftp_pid == pid && strcmp(fp->ftp_name, name) == 0 && !fp->ftp_retired) { mutex_enter(&fp->ftp_mtx); mutex_exit(&bucket->ftb_mtx); fasttrap_provider_free(new_fp); crfree(cred); return (fp); } } (void) strcpy(new_fp->ftp_name, name); /* * Fail and return NULL if either the provider name is too long * or we fail to register this new provider with the DTrace * framework. Note that this is the only place we ever construct * the full provider name -- we keep it in pieces in the provider * structure. */ if (snprintf(provname, sizeof (provname), "%s%u", name, (uint_t)pid) >= sizeof (provname) || dtrace_register(provname, pattr, DTRACE_PRIV_PROC | DTRACE_PRIV_OWNER | DTRACE_PRIV_ZONEOWNER, cred, pattr == &pid_attr ? &pid_pops : &usdt_pops, new_fp, &new_fp->ftp_provid) != 0) { mutex_exit(&bucket->ftb_mtx); fasttrap_provider_free(new_fp); crfree(cred); return (NULL); } new_fp->ftp_next = bucket->ftb_data; bucket->ftb_data = new_fp; mutex_enter(&new_fp->ftp_mtx); mutex_exit(&bucket->ftb_mtx); crfree(cred); return (new_fp); } static void fasttrap_provider_free(fasttrap_provider_t *provider) { pid_t pid = provider->ftp_pid; proc_t *p; /* * There need to be no associated enabled probes, no consumers * creating probes, and no meta providers referencing this provider. */ ASSERT(provider->ftp_rcount == 0); ASSERT(provider->ftp_ccount == 0); ASSERT(provider->ftp_mcount == 0); /* * If this provider hasn't been retired, we need to explicitly drop the * count of active providers on the associated process structure. */ if (!provider->ftp_retired) { atomic_dec_64(&provider->ftp_proc->ftpc_acount); ASSERT(provider->ftp_proc->ftpc_acount < provider->ftp_proc->ftpc_rcount); } fasttrap_proc_release(provider->ftp_proc); #ifndef illumos mutex_destroy(&provider->ftp_mtx); mutex_destroy(&provider->ftp_cmtx); #endif kmem_free(provider, sizeof (fasttrap_provider_t)); /* * Decrement p_dtrace_probes on the process whose provider we're * freeing. We don't have to worry about clobbering somone else's * modifications to it because we have locked the bucket that * corresponds to this process's hash chain in the provider hash * table. Don't sweat it if we can't find the process. */ if ((p = pfind(pid)) == NULL) { return; } p->p_dtrace_probes--; #ifndef illumos PROC_UNLOCK(p); #endif } static void fasttrap_provider_retire(pid_t pid, const char *name, int mprov) { fasttrap_provider_t *fp; fasttrap_bucket_t *bucket; dtrace_provider_id_t provid; ASSERT(strlen(name) < sizeof (fp->ftp_name)); bucket = &fasttrap_provs.fth_table[FASTTRAP_PROVS_INDEX(pid, name)]; mutex_enter(&bucket->ftb_mtx); for (fp = bucket->ftb_data; fp != NULL; fp = fp->ftp_next) { if (fp->ftp_pid == pid && strcmp(fp->ftp_name, name) == 0 && !fp->ftp_retired) break; } if (fp == NULL) { mutex_exit(&bucket->ftb_mtx); return; } mutex_enter(&fp->ftp_mtx); ASSERT(!mprov || fp->ftp_mcount > 0); if (mprov && --fp->ftp_mcount != 0) { mutex_exit(&fp->ftp_mtx); mutex_exit(&bucket->ftb_mtx); return; } /* * Mark the provider to be removed in our post-processing step, mark it * retired, and drop the active count on its proc. Marking it indicates * that we should try to remove it; setting the retired flag indicates * that we're done with this provider; dropping the active the proc * releases our hold, and when this reaches zero (as it will during * exit or exec) the proc and associated providers become defunct. * * We obviously need to take the bucket lock before the provider lock * to perform the lookup, but we need to drop the provider lock * before calling into the DTrace framework since we acquire the * provider lock in callbacks invoked from the DTrace framework. The * bucket lock therefore protects the integrity of the provider hash * table. */ atomic_dec_64(&fp->ftp_proc->ftpc_acount); ASSERT(fp->ftp_proc->ftpc_acount < fp->ftp_proc->ftpc_rcount); fp->ftp_retired = 1; fp->ftp_marked = 1; provid = fp->ftp_provid; mutex_exit(&fp->ftp_mtx); /* * We don't have to worry about invalidating the same provider twice * since fasttrap_provider_lookup() will ignore provider that have * been marked as retired. */ dtrace_invalidate(provid); mutex_exit(&bucket->ftb_mtx); fasttrap_pid_cleanup(); } static int fasttrap_uint32_cmp(const void *ap, const void *bp) { return (*(const uint32_t *)ap - *(const uint32_t *)bp); } static int fasttrap_uint64_cmp(const void *ap, const void *bp) { return (*(const uint64_t *)ap - *(const uint64_t *)bp); } static int fasttrap_add_probe(fasttrap_probe_spec_t *pdata) { fasttrap_provider_t *provider; fasttrap_probe_t *pp; fasttrap_tracepoint_t *tp; char *name; int i, aframes = 0, whack; /* * There needs to be at least one desired trace point. */ if (pdata->ftps_noffs == 0) return (EINVAL); switch (pdata->ftps_type) { case DTFTP_ENTRY: name = "entry"; aframes = FASTTRAP_ENTRY_AFRAMES; break; case DTFTP_RETURN: name = "return"; aframes = FASTTRAP_RETURN_AFRAMES; break; case DTFTP_OFFSETS: name = NULL; break; default: return (EINVAL); } if ((provider = fasttrap_provider_lookup(pdata->ftps_pid, FASTTRAP_PID_NAME, &pid_attr)) == NULL) return (ESRCH); /* * Increment this reference count to indicate that a consumer is * actively adding a new probe associated with this provider. This * prevents the provider from being deleted -- we'll need to check * for pending deletions when we drop this reference count. */ provider->ftp_ccount++; mutex_exit(&provider->ftp_mtx); /* * Grab the creation lock to ensure consistency between calls to * dtrace_probe_lookup() and dtrace_probe_create() in the face of * other threads creating probes. We must drop the provider lock * before taking this lock to avoid a three-way deadlock with the * DTrace framework. */ mutex_enter(&provider->ftp_cmtx); if (name == NULL) { for (i = 0; i < pdata->ftps_noffs; i++) { char name_str[17]; (void) sprintf(name_str, "%llx", (unsigned long long)pdata->ftps_offs[i]); if (dtrace_probe_lookup(provider->ftp_provid, pdata->ftps_mod, pdata->ftps_func, name_str) != 0) continue; atomic_inc_32(&fasttrap_total); if (fasttrap_total > fasttrap_max) { atomic_dec_32(&fasttrap_total); goto no_mem; } pp = kmem_zalloc(sizeof (fasttrap_probe_t), KM_SLEEP); pp->ftp_prov = provider; pp->ftp_faddr = pdata->ftps_pc; pp->ftp_fsize = pdata->ftps_size; pp->ftp_pid = pdata->ftps_pid; pp->ftp_ntps = 1; tp = kmem_zalloc(sizeof (fasttrap_tracepoint_t), KM_SLEEP); tp->ftt_proc = provider->ftp_proc; tp->ftt_pc = pdata->ftps_offs[i] + pdata->ftps_pc; tp->ftt_pid = pdata->ftps_pid; pp->ftp_tps[0].fit_tp = tp; pp->ftp_tps[0].fit_id.fti_probe = pp; pp->ftp_tps[0].fit_id.fti_ptype = pdata->ftps_type; pp->ftp_id = dtrace_probe_create(provider->ftp_provid, pdata->ftps_mod, pdata->ftps_func, name_str, FASTTRAP_OFFSET_AFRAMES, pp); } } else if (dtrace_probe_lookup(provider->ftp_provid, pdata->ftps_mod, pdata->ftps_func, name) == 0) { atomic_add_32(&fasttrap_total, pdata->ftps_noffs); if (fasttrap_total > fasttrap_max) { atomic_add_32(&fasttrap_total, -pdata->ftps_noffs); goto no_mem; } /* * Make sure all tracepoint program counter values are unique. * We later assume that each probe has exactly one tracepoint * for a given pc. */ qsort(pdata->ftps_offs, pdata->ftps_noffs, sizeof (uint64_t), fasttrap_uint64_cmp); for (i = 1; i < pdata->ftps_noffs; i++) { if (pdata->ftps_offs[i] > pdata->ftps_offs[i - 1]) continue; atomic_add_32(&fasttrap_total, -pdata->ftps_noffs); goto no_mem; } ASSERT(pdata->ftps_noffs > 0); pp = kmem_zalloc(offsetof(fasttrap_probe_t, ftp_tps[pdata->ftps_noffs]), KM_SLEEP); pp->ftp_prov = provider; pp->ftp_faddr = pdata->ftps_pc; pp->ftp_fsize = pdata->ftps_size; pp->ftp_pid = pdata->ftps_pid; pp->ftp_ntps = pdata->ftps_noffs; for (i = 0; i < pdata->ftps_noffs; i++) { tp = kmem_zalloc(sizeof (fasttrap_tracepoint_t), KM_SLEEP); tp->ftt_proc = provider->ftp_proc; tp->ftt_pc = pdata->ftps_offs[i] + pdata->ftps_pc; tp->ftt_pid = pdata->ftps_pid; pp->ftp_tps[i].fit_tp = tp; pp->ftp_tps[i].fit_id.fti_probe = pp; pp->ftp_tps[i].fit_id.fti_ptype = pdata->ftps_type; } pp->ftp_id = dtrace_probe_create(provider->ftp_provid, pdata->ftps_mod, pdata->ftps_func, name, aframes, pp); } mutex_exit(&provider->ftp_cmtx); /* * We know that the provider is still valid since we incremented the * creation reference count. If someone tried to clean up this provider * while we were using it (e.g. because the process called exec(2) or * exit(2)), take note of that and try to clean it up now. */ mutex_enter(&provider->ftp_mtx); provider->ftp_ccount--; whack = provider->ftp_retired; mutex_exit(&provider->ftp_mtx); if (whack) fasttrap_pid_cleanup(); return (0); no_mem: /* * If we've exhausted the allowable resources, we'll try to remove * this provider to free some up. This is to cover the case where * the user has accidentally created many more probes than was * intended (e.g. pid123:::). */ mutex_exit(&provider->ftp_cmtx); mutex_enter(&provider->ftp_mtx); provider->ftp_ccount--; provider->ftp_marked = 1; mutex_exit(&provider->ftp_mtx); fasttrap_pid_cleanup(); return (ENOMEM); } /*ARGSUSED*/ static void * fasttrap_meta_provide(void *arg, dtrace_helper_provdesc_t *dhpv, pid_t pid) { fasttrap_provider_t *provider; /* * A 32-bit unsigned integer (like a pid for example) can be * expressed in 10 or fewer decimal digits. Make sure that we'll * have enough space for the provider name. */ if (strlen(dhpv->dthpv_provname) + 10 >= sizeof (provider->ftp_name)) { printf("failed to instantiate provider %s: " "name too long to accomodate pid", dhpv->dthpv_provname); return (NULL); } /* * Don't let folks spoof the true pid provider. */ if (strcmp(dhpv->dthpv_provname, FASTTRAP_PID_NAME) == 0) { printf("failed to instantiate provider %s: " "%s is an invalid name", dhpv->dthpv_provname, FASTTRAP_PID_NAME); return (NULL); } /* * The highest stability class that fasttrap supports is ISA; cap * the stability of the new provider accordingly. */ if (dhpv->dthpv_pattr.dtpa_provider.dtat_class > DTRACE_CLASS_ISA) dhpv->dthpv_pattr.dtpa_provider.dtat_class = DTRACE_CLASS_ISA; if (dhpv->dthpv_pattr.dtpa_mod.dtat_class > DTRACE_CLASS_ISA) dhpv->dthpv_pattr.dtpa_mod.dtat_class = DTRACE_CLASS_ISA; if (dhpv->dthpv_pattr.dtpa_func.dtat_class > DTRACE_CLASS_ISA) dhpv->dthpv_pattr.dtpa_func.dtat_class = DTRACE_CLASS_ISA; if (dhpv->dthpv_pattr.dtpa_name.dtat_class > DTRACE_CLASS_ISA) dhpv->dthpv_pattr.dtpa_name.dtat_class = DTRACE_CLASS_ISA; if (dhpv->dthpv_pattr.dtpa_args.dtat_class > DTRACE_CLASS_ISA) dhpv->dthpv_pattr.dtpa_args.dtat_class = DTRACE_CLASS_ISA; if ((provider = fasttrap_provider_lookup(pid, dhpv->dthpv_provname, &dhpv->dthpv_pattr)) == NULL) { printf("failed to instantiate provider %s for " "process %u", dhpv->dthpv_provname, (uint_t)pid); return (NULL); } /* * Up the meta provider count so this provider isn't removed until * the meta provider has been told to remove it. */ provider->ftp_mcount++; mutex_exit(&provider->ftp_mtx); return (provider); } /* * We know a few things about our context here: we know that the probe being * created doesn't already exist (DTrace won't load DOF at the same address * twice, even if explicitly told to do so) and we know that we are * single-threaded with respect to the meta provider machinery. Knowing that * this is a new probe and that there is no way for us to race with another * operation on this provider allows us an important optimization: we need not * lookup a probe before adding it. Saving this lookup is important because * this code is in the fork path for processes with USDT probes, and lookups * here are potentially very expensive because of long hash conflicts on * module, function and name (DTrace doesn't hash on provider name). */ /*ARGSUSED*/ static void fasttrap_meta_create_probe(void *arg, void *parg, dtrace_helper_probedesc_t *dhpb) { fasttrap_provider_t *provider = parg; fasttrap_probe_t *pp; fasttrap_tracepoint_t *tp; int i, j; uint32_t ntps; /* * Since the meta provider count is non-zero we don't have to worry * about this provider disappearing. */ ASSERT(provider->ftp_mcount > 0); /* * The offsets must be unique. */ qsort(dhpb->dthpb_offs, dhpb->dthpb_noffs, sizeof (uint32_t), fasttrap_uint32_cmp); for (i = 1; i < dhpb->dthpb_noffs; i++) { if (dhpb->dthpb_base + dhpb->dthpb_offs[i] <= dhpb->dthpb_base + dhpb->dthpb_offs[i - 1]) return; } qsort(dhpb->dthpb_enoffs, dhpb->dthpb_nenoffs, sizeof (uint32_t), fasttrap_uint32_cmp); for (i = 1; i < dhpb->dthpb_nenoffs; i++) { if (dhpb->dthpb_base + dhpb->dthpb_enoffs[i] <= dhpb->dthpb_base + dhpb->dthpb_enoffs[i - 1]) return; } ntps = dhpb->dthpb_noffs + dhpb->dthpb_nenoffs; ASSERT(ntps > 0); atomic_add_32(&fasttrap_total, ntps); if (fasttrap_total > fasttrap_max) { atomic_add_32(&fasttrap_total, -ntps); return; } pp = kmem_zalloc(offsetof(fasttrap_probe_t, ftp_tps[ntps]), KM_SLEEP); pp->ftp_prov = provider; pp->ftp_pid = provider->ftp_pid; pp->ftp_ntps = ntps; pp->ftp_nargs = dhpb->dthpb_xargc; pp->ftp_xtypes = dhpb->dthpb_xtypes; pp->ftp_ntypes = dhpb->dthpb_ntypes; /* * First create a tracepoint for each actual point of interest. */ for (i = 0; i < dhpb->dthpb_noffs; i++) { tp = kmem_zalloc(sizeof (fasttrap_tracepoint_t), KM_SLEEP); tp->ftt_proc = provider->ftp_proc; tp->ftt_pc = dhpb->dthpb_base + dhpb->dthpb_offs[i]; tp->ftt_pid = provider->ftp_pid; pp->ftp_tps[i].fit_tp = tp; pp->ftp_tps[i].fit_id.fti_probe = pp; #ifdef __sparc pp->ftp_tps[i].fit_id.fti_ptype = DTFTP_POST_OFFSETS; #else pp->ftp_tps[i].fit_id.fti_ptype = DTFTP_OFFSETS; #endif } /* * Then create a tracepoint for each is-enabled point. */ for (j = 0; i < ntps; i++, j++) { tp = kmem_zalloc(sizeof (fasttrap_tracepoint_t), KM_SLEEP); tp->ftt_proc = provider->ftp_proc; tp->ftt_pc = dhpb->dthpb_base + dhpb->dthpb_enoffs[j]; tp->ftt_pid = provider->ftp_pid; pp->ftp_tps[i].fit_tp = tp; pp->ftp_tps[i].fit_id.fti_probe = pp; pp->ftp_tps[i].fit_id.fti_ptype = DTFTP_IS_ENABLED; } /* * If the arguments are shuffled around we set the argument remapping * table. Later, when the probe fires, we only remap the arguments * if the table is non-NULL. */ for (i = 0; i < dhpb->dthpb_xargc; i++) { if (dhpb->dthpb_args[i] != i) { pp->ftp_argmap = dhpb->dthpb_args; break; } } /* * The probe is fully constructed -- register it with DTrace. */ pp->ftp_id = dtrace_probe_create(provider->ftp_provid, dhpb->dthpb_mod, dhpb->dthpb_func, dhpb->dthpb_name, FASTTRAP_OFFSET_AFRAMES, pp); } /*ARGSUSED*/ static void fasttrap_meta_remove(void *arg, dtrace_helper_provdesc_t *dhpv, pid_t pid) { /* * Clean up the USDT provider. There may be active consumers of the * provider busy adding probes, no damage will actually befall the * provider until that count has dropped to zero. This just puts * the provider on death row. */ fasttrap_provider_retire(pid, dhpv->dthpv_provname, 1); } static dtrace_mops_t fasttrap_mops = { .dtms_create_probe = fasttrap_meta_create_probe, .dtms_provide_pid = fasttrap_meta_provide, .dtms_remove_pid = fasttrap_meta_remove }; /*ARGSUSED*/ static int fasttrap_open(struct cdev *dev __unused, int oflags __unused, int devtype __unused, struct thread *td __unused) { return (0); } /*ARGSUSED*/ static int fasttrap_ioctl(struct cdev *dev, u_long cmd, caddr_t arg, int fflag, struct thread *td) { if (!dtrace_attached()) return (EAGAIN); if (cmd == FASTTRAPIOC_MAKEPROBE) { fasttrap_probe_spec_t *uprobe = *(fasttrap_probe_spec_t **)arg; fasttrap_probe_spec_t *probe; uint64_t noffs; size_t size; int ret, err; if (copyin(&uprobe->ftps_noffs, &noffs, sizeof (uprobe->ftps_noffs))) return (EFAULT); /* * Probes must have at least one tracepoint. */ if (noffs == 0) return (EINVAL); size = sizeof (fasttrap_probe_spec_t) + sizeof (probe->ftps_offs[0]) * (noffs - 1); if (size > 1024 * 1024) return (ENOMEM); probe = kmem_alloc(size, KM_SLEEP); if (copyin(uprobe, probe, size) != 0 || probe->ftps_noffs != noffs) { kmem_free(probe, size); return (EFAULT); } /* * Verify that the function and module strings contain no * funny characters. */ if (u8_validate(probe->ftps_func, strlen(probe->ftps_func), NULL, U8_VALIDATE_ENTIRE, &err) < 0) { ret = EINVAL; goto err; } if (u8_validate(probe->ftps_mod, strlen(probe->ftps_mod), NULL, U8_VALIDATE_ENTIRE, &err) < 0) { ret = EINVAL; goto err; } #ifdef notyet if (!PRIV_POLICY_CHOICE(cr, PRIV_ALL, B_FALSE)) { proc_t *p; pid_t pid = probe->ftps_pid; mutex_enter(&pidlock); /* * Report an error if the process doesn't exist * or is actively being birthed. */ if ((p = pfind(pid)) == NULL || p->p_stat == SIDL) { mutex_exit(&pidlock); return (ESRCH); } mutex_enter(&p->p_lock); mutex_exit(&pidlock); if ((ret = priv_proc_cred_perm(cr, p, NULL, VREAD | VWRITE)) != 0) { mutex_exit(&p->p_lock); return (ret); } mutex_exit(&p->p_lock); } #endif /* notyet */ ret = fasttrap_add_probe(probe); err: kmem_free(probe, size); return (ret); } else if (cmd == FASTTRAPIOC_GETINSTR) { fasttrap_instr_query_t instr; fasttrap_tracepoint_t *tp; uint_t index; #ifdef notyet int ret; #endif #ifdef illumos if (copyin((void *)arg, &instr, sizeof (instr)) != 0) return (EFAULT); #endif #ifdef notyet if (!PRIV_POLICY_CHOICE(cr, PRIV_ALL, B_FALSE)) { proc_t *p; pid_t pid = instr.ftiq_pid; mutex_enter(&pidlock); /* * Report an error if the process doesn't exist * or is actively being birthed. */ if ((p == pfind(pid)) == NULL || p->p_stat == SIDL) { mutex_exit(&pidlock); return (ESRCH); } mutex_enter(&p->p_lock); mutex_exit(&pidlock); if ((ret = priv_proc_cred_perm(cr, p, NULL, VREAD)) != 0) { mutex_exit(&p->p_lock); return (ret); } mutex_exit(&p->p_lock); } #endif /* notyet */ index = FASTTRAP_TPOINTS_INDEX(instr.ftiq_pid, instr.ftiq_pc); mutex_enter(&fasttrap_tpoints.fth_table[index].ftb_mtx); tp = fasttrap_tpoints.fth_table[index].ftb_data; while (tp != NULL) { if (instr.ftiq_pid == tp->ftt_pid && instr.ftiq_pc == tp->ftt_pc && tp->ftt_proc->ftpc_acount != 0) break; tp = tp->ftt_next; } if (tp == NULL) { mutex_exit(&fasttrap_tpoints.fth_table[index].ftb_mtx); return (ENOENT); } bcopy(&tp->ftt_instr, &instr.ftiq_instr, sizeof (instr.ftiq_instr)); mutex_exit(&fasttrap_tpoints.fth_table[index].ftb_mtx); if (copyout(&instr, (void *)arg, sizeof (instr)) != 0) return (EFAULT); return (0); } return (EINVAL); } static int fasttrap_load(void) { ulong_t nent; int i, ret; /* Create the /dev/dtrace/fasttrap entry. */ fasttrap_cdev = make_dev(&fasttrap_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600, "dtrace/fasttrap"); mtx_init(&fasttrap_cleanup_mtx, "fasttrap clean", "dtrace", MTX_DEF); mutex_init(&fasttrap_count_mtx, "fasttrap count mtx", MUTEX_DEFAULT, NULL); #ifdef illumos fasttrap_max = ddi_getprop(DDI_DEV_T_ANY, devi, DDI_PROP_DONTPASS, "fasttrap-max-probes", FASTTRAP_MAX_DEFAULT); #endif fasttrap_total = 0; /* * Conjure up the tracepoints hashtable... */ #ifdef illumos nent = ddi_getprop(DDI_DEV_T_ANY, devi, DDI_PROP_DONTPASS, "fasttrap-hash-size", FASTTRAP_TPOINTS_DEFAULT_SIZE); #else nent = tpoints_hash_size; #endif if (nent == 0 || nent > 0x1000000) nent = FASTTRAP_TPOINTS_DEFAULT_SIZE; tpoints_hash_size = nent; if (ISP2(nent)) fasttrap_tpoints.fth_nent = nent; else fasttrap_tpoints.fth_nent = 1 << fasttrap_highbit(nent); ASSERT(fasttrap_tpoints.fth_nent > 0); fasttrap_tpoints.fth_mask = fasttrap_tpoints.fth_nent - 1; fasttrap_tpoints.fth_table = kmem_zalloc(fasttrap_tpoints.fth_nent * sizeof (fasttrap_bucket_t), KM_SLEEP); #ifndef illumos for (i = 0; i < fasttrap_tpoints.fth_nent; i++) mutex_init(&fasttrap_tpoints.fth_table[i].ftb_mtx, "tracepoints bucket mtx", MUTEX_DEFAULT, NULL); #endif /* * ... and the providers hash table... */ nent = FASTTRAP_PROVIDERS_DEFAULT_SIZE; if (ISP2(nent)) fasttrap_provs.fth_nent = nent; else fasttrap_provs.fth_nent = 1 << fasttrap_highbit(nent); ASSERT(fasttrap_provs.fth_nent > 0); fasttrap_provs.fth_mask = fasttrap_provs.fth_nent - 1; fasttrap_provs.fth_table = kmem_zalloc(fasttrap_provs.fth_nent * sizeof (fasttrap_bucket_t), KM_SLEEP); #ifndef illumos for (i = 0; i < fasttrap_provs.fth_nent; i++) mutex_init(&fasttrap_provs.fth_table[i].ftb_mtx, "providers bucket mtx", MUTEX_DEFAULT, NULL); #endif ret = kproc_create(fasttrap_pid_cleanup_cb, NULL, &fasttrap_cleanup_proc, 0, 0, "ftcleanup"); if (ret != 0) { destroy_dev(fasttrap_cdev); #ifndef illumos for (i = 0; i < fasttrap_provs.fth_nent; i++) mutex_destroy(&fasttrap_provs.fth_table[i].ftb_mtx); for (i = 0; i < fasttrap_tpoints.fth_nent; i++) mutex_destroy(&fasttrap_tpoints.fth_table[i].ftb_mtx); #endif kmem_free(fasttrap_provs.fth_table, fasttrap_provs.fth_nent * sizeof (fasttrap_bucket_t)); mtx_destroy(&fasttrap_cleanup_mtx); mutex_destroy(&fasttrap_count_mtx); return (ret); } /* * ... and the procs hash table. */ nent = FASTTRAP_PROCS_DEFAULT_SIZE; if (ISP2(nent)) fasttrap_procs.fth_nent = nent; else fasttrap_procs.fth_nent = 1 << fasttrap_highbit(nent); ASSERT(fasttrap_procs.fth_nent > 0); fasttrap_procs.fth_mask = fasttrap_procs.fth_nent - 1; fasttrap_procs.fth_table = kmem_zalloc(fasttrap_procs.fth_nent * sizeof (fasttrap_bucket_t), KM_SLEEP); #ifndef illumos for (i = 0; i < fasttrap_procs.fth_nent; i++) mutex_init(&fasttrap_procs.fth_table[i].ftb_mtx, "processes bucket mtx", MUTEX_DEFAULT, NULL); rm_init(&fasttrap_tp_lock, "fasttrap tracepoint"); /* * This event handler must run before kdtrace_thread_dtor() since it * accesses the thread's struct kdtrace_thread. */ fasttrap_thread_dtor_tag = EVENTHANDLER_REGISTER(thread_dtor, fasttrap_thread_dtor, NULL, EVENTHANDLER_PRI_FIRST); #endif /* * Install our hooks into fork(2), exec(2), and exit(2). */ dtrace_fasttrap_fork = &fasttrap_fork; dtrace_fasttrap_exit = &fasttrap_exec_exit; dtrace_fasttrap_exec = &fasttrap_exec_exit; (void) dtrace_meta_register("fasttrap", &fasttrap_mops, NULL, &fasttrap_meta_id); return (0); } static int fasttrap_unload(void) { int i, fail = 0; /* * Unregister the meta-provider to make sure no new fasttrap- * managed providers come along while we're trying to close up * shop. If we fail to detach, we'll need to re-register as a * meta-provider. We can fail to unregister as a meta-provider * if providers we manage still exist. */ if (fasttrap_meta_id != DTRACE_METAPROVNONE && dtrace_meta_unregister(fasttrap_meta_id) != 0) return (-1); /* * Iterate over all of our providers. If there's still a process * that corresponds to that pid, fail to detach. */ for (i = 0; i < fasttrap_provs.fth_nent; i++) { fasttrap_provider_t **fpp, *fp; fasttrap_bucket_t *bucket = &fasttrap_provs.fth_table[i]; mutex_enter(&bucket->ftb_mtx); fpp = (fasttrap_provider_t **)&bucket->ftb_data; while ((fp = *fpp) != NULL) { /* * Acquire and release the lock as a simple way of * waiting for any other consumer to finish with * this provider. A thread must first acquire the * bucket lock so there's no chance of another thread * blocking on the provider's lock. */ mutex_enter(&fp->ftp_mtx); mutex_exit(&fp->ftp_mtx); if (dtrace_unregister(fp->ftp_provid) != 0) { fail = 1; fpp = &fp->ftp_next; } else { *fpp = fp->ftp_next; fasttrap_provider_free(fp); } } mutex_exit(&bucket->ftb_mtx); } if (fail) { (void) dtrace_meta_register("fasttrap", &fasttrap_mops, NULL, &fasttrap_meta_id); return (-1); } /* * Stop new processes from entering these hooks now, before the * fasttrap_cleanup thread runs. That way all processes will hopefully * be out of these hooks before we free fasttrap_provs.fth_table */ ASSERT(dtrace_fasttrap_fork == &fasttrap_fork); dtrace_fasttrap_fork = NULL; ASSERT(dtrace_fasttrap_exec == &fasttrap_exec_exit); dtrace_fasttrap_exec = NULL; ASSERT(dtrace_fasttrap_exit == &fasttrap_exec_exit); dtrace_fasttrap_exit = NULL; mtx_lock(&fasttrap_cleanup_mtx); fasttrap_cleanup_drain = 1; /* Wait for the cleanup thread to finish up and signal us. */ wakeup(&fasttrap_cleanup_cv); mtx_sleep(&fasttrap_cleanup_drain, &fasttrap_cleanup_mtx, 0, "ftcld", 0); fasttrap_cleanup_proc = NULL; mtx_destroy(&fasttrap_cleanup_mtx); #ifdef DEBUG mutex_enter(&fasttrap_count_mtx); ASSERT(fasttrap_pid_count == 0); mutex_exit(&fasttrap_count_mtx); #endif #ifndef illumos EVENTHANDLER_DEREGISTER(thread_dtor, fasttrap_thread_dtor_tag); for (i = 0; i < fasttrap_tpoints.fth_nent; i++) mutex_destroy(&fasttrap_tpoints.fth_table[i].ftb_mtx); for (i = 0; i < fasttrap_provs.fth_nent; i++) mutex_destroy(&fasttrap_provs.fth_table[i].ftb_mtx); for (i = 0; i < fasttrap_procs.fth_nent; i++) mutex_destroy(&fasttrap_procs.fth_table[i].ftb_mtx); #endif kmem_free(fasttrap_tpoints.fth_table, fasttrap_tpoints.fth_nent * sizeof (fasttrap_bucket_t)); fasttrap_tpoints.fth_nent = 0; kmem_free(fasttrap_provs.fth_table, fasttrap_provs.fth_nent * sizeof (fasttrap_bucket_t)); fasttrap_provs.fth_nent = 0; kmem_free(fasttrap_procs.fth_table, fasttrap_procs.fth_nent * sizeof (fasttrap_bucket_t)); fasttrap_procs.fth_nent = 0; #ifndef illumos destroy_dev(fasttrap_cdev); mutex_destroy(&fasttrap_count_mtx); rm_destroy(&fasttrap_tp_lock); #endif return (0); } /* ARGSUSED */ static int fasttrap_modevent(module_t mod __unused, int type, void *data __unused) { int error = 0; switch (type) { case MOD_LOAD: break; case MOD_UNLOAD: break; case MOD_SHUTDOWN: break; default: error = EOPNOTSUPP; break; } return (error); } SYSINIT(fasttrap_load, SI_SUB_DTRACE_PROVIDER, SI_ORDER_ANY, fasttrap_load, NULL); SYSUNINIT(fasttrap_unload, SI_SUB_DTRACE_PROVIDER, SI_ORDER_ANY, fasttrap_unload, NULL); DEV_MODULE(fasttrap, fasttrap_modevent, NULL); MODULE_VERSION(fasttrap, 1); MODULE_DEPEND(fasttrap, dtrace, 1, 1, 1); MODULE_DEPEND(fasttrap, opensolaris, 1, 1, 1); Index: stable/12/sys/i386/i386/trap.c =================================================================== --- stable/12/sys/i386/i386/trap.c (revision 349015) +++ stable/12/sys/i386/i386/trap.c (revision 349016) @@ -1,1168 +1,1189 @@ /*- * SPDX-License-Identifier: BSD-4-Clause * * Copyright (C) 1994, David Greenman * Copyright (c) 1990, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * the University of Utah, and William Jolitz. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)trap.c 7.4 (Berkeley) 5/13/91 */ #include __FBSDID("$FreeBSD$"); /* * 386 Trap and System call handling */ #include "opt_clock.h" #include "opt_compat.h" #include "opt_cpu.h" #include "opt_hwpmc_hooks.h" #include "opt_isa.h" #include "opt_kdb.h" #include "opt_stack.h" #include "opt_trap.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef HWPMC_HOOKS #include PMC_SOFT_DEFINE( , , page_fault, all); PMC_SOFT_DEFINE( , , page_fault, read); PMC_SOFT_DEFINE( , , page_fault, write); #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef SMP #include #endif #include #include #include #include #ifdef POWERFAIL_NMI #include #include #endif #ifdef KDTRACE_HOOKS #include #endif void trap(struct trapframe *frame); void syscall(struct trapframe *frame); static int trap_pfault(struct trapframe *, int, vm_offset_t); static void trap_fatal(struct trapframe *, vm_offset_t); +#ifdef KDTRACE_HOOKS +static bool trap_user_dtrace(struct trapframe *, + int (**hook)(struct trapframe *)); +#endif void dblfault_handler(void); extern inthand_t IDTVEC(bpt), IDTVEC(dbg), IDTVEC(int0x80_syscall); #define MAX_TRAP_MSG 32 struct trap_data { bool ei; const char *msg; }; static const struct trap_data trap_data[] = { [T_PRIVINFLT] = { .ei = true, .msg = "privileged instruction fault" }, [T_BPTFLT] = { .ei = false, .msg = "breakpoint instruction fault" }, [T_ARITHTRAP] = { .ei = true, .msg = "arithmetic trap" }, [T_PROTFLT] = { .ei = true, .msg = "general protection fault" }, [T_TRCTRAP] = { .ei = false, .msg = "debug exception" }, [T_PAGEFLT] = { .ei = true, .msg = "page fault" }, [T_ALIGNFLT] = { .ei = true, .msg = "alignment fault" }, [T_DIVIDE] = { .ei = true, .msg = "integer divide fault" }, [T_NMI] = { .ei = false, .msg = "non-maskable interrupt trap" }, [T_OFLOW] = { .ei = true, .msg = "overflow trap" }, [T_BOUND] = { .ei = true, .msg = "FPU bounds check fault" }, [T_DNA] = { .ei = true, .msg = "FPU device not available" }, [T_DOUBLEFLT] = { .ei = false, .msg = "double fault" }, [T_FPOPFLT] = { .ei = true, .msg = "FPU operand fetch fault" }, [T_TSSFLT] = { .ei = true, .msg = "invalid TSS fault" }, [T_SEGNPFLT] = { .ei = true, .msg = "segment not present fault" }, [T_STKFLT] = { .ei = true, .msg = "stack fault" }, [T_MCHK] = { .ei = true, .msg = "machine check trap" }, [T_XMMFLT] = { .ei = true, .msg = "SIMD floating-point exception" }, [T_DTRACE_RET] ={ .ei = true, .msg = "DTrace pid return trap" }, }; static bool trap_enable_intr(int trapno) { MPASS(trapno > 0); if (trapno < nitems(trap_data) && trap_data[trapno].msg != NULL) return (trap_data[trapno].ei); return (false); } static const char * trap_msg(int trapno) { const char *res; static const char unkn[] = "UNKNOWN"; res = NULL; if (trapno < nitems(trap_data)) res = trap_data[trapno].msg; if (res == NULL) res = unkn; return (res); } #if defined(I586_CPU) && !defined(NO_F00F_HACK) int has_f00f_bug = 0; /* Initialized so that it can be patched. */ #endif static int prot_fault_translation = 0; SYSCTL_INT(_machdep, OID_AUTO, prot_fault_translation, CTLFLAG_RW, &prot_fault_translation, 0, "Select signal to deliver on protection fault"); static int uprintf_signal; SYSCTL_INT(_machdep, OID_AUTO, uprintf_signal, CTLFLAG_RW, &uprintf_signal, 0, "Print debugging information on trap signal to ctty"); /* * Exception, fault, and trap interface to the FreeBSD kernel. * This common code is called from assembly language IDT gate entry * routines that prepare a suitable stack frame, and restore this * frame after the exception has been processed. */ void trap(struct trapframe *frame) { ksiginfo_t ksi; struct thread *td; struct proc *p; int signo, ucode; u_int type; register_t addr, dr6; vm_offset_t eva; #ifdef POWERFAIL_NMI static int lastalert = 0; #endif td = curthread; p = td->td_proc; signo = 0; ucode = 0; addr = 0; dr6 = 0; VM_CNT_INC(v_trap); type = frame->tf_trapno; KASSERT((read_eflags() & PSL_I) == 0, ("trap: interrupts enabled, type %d frame %p", type, frame)); #ifdef SMP /* Handler for NMI IPIs used for stopping CPUs. */ if (type == T_NMI && ipi_nmi_handler() == 0) return; #endif /* SMP */ #ifdef KDB if (kdb_active) { kdb_reenter(); return; } #endif if (type == T_RESERVED) { trap_fatal(frame, 0); return; } if (type == T_NMI) { #ifdef HWPMC_HOOKS /* * CPU PMCs interrupt using an NMI so we check for that first. * If the HWPMC module is active, 'pmc_hook' will point to * the function to be called. A non-zero return value from the * hook means that the NMI was consumed by it and that we can * return immediately. */ if (pmc_intr != NULL && (*pmc_intr)(frame) != 0) return; #endif #ifdef STACK if (stack_nmi_handler(frame) != 0) return; #endif } if (type == T_MCHK) { mca_intr(); return; } #ifdef KDTRACE_HOOKS /* * A trap can occur while DTrace executes a probe. Before * executing the probe, DTrace blocks re-scheduling and sets * a flag in its per-cpu flags to indicate that it doesn't * want to fault. On returning from the probe, the no-fault * flag is cleared and finally re-scheduling is enabled. */ if ((type == T_PROTFLT || type == T_PAGEFLT) && dtrace_trap_func != NULL && (*dtrace_trap_func)(frame, type)) return; #endif /* * We must not allow context switches until %cr2 is read. * Also, for some Cyrix CPUs, %cr2 is clobbered by interrupts. * All faults use interrupt gates, so %cr2 can be safely read * now, before optional enable of the interrupts below. */ if (type == T_PAGEFLT) eva = rcr2(); /* * Buggy application or kernel code has disabled interrupts * and then trapped. Enabling interrupts now is wrong, but it * is better than running with interrupts disabled until they * are accidentally enabled later. */ if ((frame->tf_eflags & PSL_I) == 0 && TRAPF_USERMODE(frame) && (curpcb->pcb_flags & PCB_VM86CALL) == 0) uprintf("pid %ld (%s): trap %d with interrupts disabled\n", (long)curproc->p_pid, curthread->td_name, type); /* * Conditionally reenable interrupts. If we hold a spin lock, * then we must not reenable interrupts. This might be a * spurious page fault. */ if (trap_enable_intr(type) && td->td_md.md_spinlock_count == 0 && frame->tf_eip != (int)cpu_switch_load_gs) enable_intr(); if (TRAPF_USERMODE(frame) && (curpcb->pcb_flags & PCB_VM86CALL) == 0) { /* user trap */ td->td_pticks = 0; td->td_frame = frame; addr = frame->tf_eip; if (td->td_cowgen != p->p_cowgen) thread_cow_update(td); switch (type) { case T_PRIVINFLT: /* privileged instruction fault */ signo = SIGILL; ucode = ILL_PRVOPC; break; case T_BPTFLT: /* bpt instruction fault */ - enable_intr(); #ifdef KDTRACE_HOOKS - if (dtrace_pid_probe_ptr != NULL && - dtrace_pid_probe_ptr(frame) == 0) + if (trap_user_dtrace(frame, &dtrace_pid_probe_ptr)) return; +#else + enable_intr(); #endif signo = SIGTRAP; ucode = TRAP_BRKPT; break; case T_TRCTRAP: /* debug exception */ enable_intr(); user_trctrap_out: signo = SIGTRAP; ucode = TRAP_TRACE; dr6 = rdr6(); if ((dr6 & DBREG_DR6_BS) != 0) { PROC_LOCK(td->td_proc); if ((td->td_dbgflags & TDB_STEP) != 0) { td->td_frame->tf_eflags &= ~PSL_T; td->td_dbgflags &= ~TDB_STEP; } PROC_UNLOCK(td->td_proc); } break; case T_ARITHTRAP: /* arithmetic trap */ ucode = npxtrap_x87(); if (ucode == -1) return; signo = SIGFPE; break; /* * The following two traps can happen in vm86 mode, * and, if so, we want to handle them specially. */ case T_PROTFLT: /* general protection fault */ case T_STKFLT: /* stack fault */ if (frame->tf_eflags & PSL_VM) { signo = vm86_emulate((struct vm86frame *)frame); if (signo == SIGTRAP) { load_dr6(rdr6() | 0x4000); goto user_trctrap_out; } if (signo == 0) goto user; break; } signo = SIGBUS; ucode = (type == T_PROTFLT) ? BUS_OBJERR : BUS_ADRERR; break; case T_SEGNPFLT: /* segment not present fault */ signo = SIGBUS; ucode = BUS_ADRERR; break; case T_TSSFLT: /* invalid TSS fault */ signo = SIGBUS; ucode = BUS_OBJERR; break; case T_ALIGNFLT: signo = SIGBUS; ucode = BUS_ADRALN; break; case T_DOUBLEFLT: /* double fault */ default: signo = SIGBUS; ucode = BUS_OBJERR; break; case T_PAGEFLT: /* page fault */ signo = trap_pfault(frame, TRUE, eva); #if defined(I586_CPU) && !defined(NO_F00F_HACK) if (signo == -2) { /* * The f00f hack workaround has triggered, so * treat the fault as an illegal instruction * (T_PRIVINFLT) instead of a page fault. */ type = frame->tf_trapno = T_PRIVINFLT; /* Proceed as in that case. */ ucode = ILL_PRVOPC; signo = SIGILL; break; } #endif if (signo == -1) return; if (signo == 0) goto user; if (signo == SIGSEGV) ucode = SEGV_MAPERR; else if (prot_fault_translation == 0) { /* * Autodetect. This check also covers * the images without the ABI-tag ELF * note. */ if (SV_CURPROC_ABI() == SV_ABI_FREEBSD && p->p_osrel >= P_OSREL_SIGSEGV) { signo = SIGSEGV; ucode = SEGV_ACCERR; } else { signo = SIGBUS; ucode = T_PAGEFLT; } } else if (prot_fault_translation == 1) { /* * Always compat mode. */ signo = SIGBUS; ucode = T_PAGEFLT; } else { /* * Always SIGSEGV mode. */ signo = SIGSEGV; ucode = SEGV_ACCERR; } addr = eva; break; case T_DIVIDE: /* integer divide fault */ ucode = FPE_INTDIV; signo = SIGFPE; break; #ifdef DEV_ISA case T_NMI: #ifdef POWERFAIL_NMI #ifndef TIMER_FREQ # define TIMER_FREQ 1193182 #endif if (time_second - lastalert > 10) { log(LOG_WARNING, "NMI: power fail\n"); sysbeep(880, hz); lastalert = time_second; } return; #else /* !POWERFAIL_NMI */ nmi_handle_intr(type, frame); return; #endif /* POWERFAIL_NMI */ #endif /* DEV_ISA */ case T_OFLOW: /* integer overflow fault */ ucode = FPE_INTOVF; signo = SIGFPE; break; case T_BOUND: /* bounds check fault */ ucode = FPE_FLTSUB; signo = SIGFPE; break; case T_DNA: KASSERT(PCB_USER_FPU(td->td_pcb), ("kernel FPU ctx has leaked")); /* transparent fault (due to context switch "late") */ if (npxdna()) return; uprintf("pid %d killed due to lack of floating point\n", p->p_pid); signo = SIGKILL; ucode = 0; break; case T_FPOPFLT: /* FPU operand fetch fault */ ucode = ILL_COPROC; signo = SIGILL; break; case T_XMMFLT: /* SIMD floating-point exception */ ucode = npxtrap_sse(); if (ucode == -1) return; signo = SIGFPE; break; #ifdef KDTRACE_HOOKS case T_DTRACE_RET: - enable_intr(); - if (dtrace_return_probe_ptr != NULL) - dtrace_return_probe_ptr(frame); + (void)trap_user_dtrace(frame, &dtrace_return_probe_ptr); return; #endif } } else { /* kernel trap */ KASSERT(cold || td->td_ucred != NULL, ("kernel trap doesn't have ucred")); switch (type) { case T_PAGEFLT: /* page fault */ (void) trap_pfault(frame, FALSE, eva); return; case T_DNA: if (PCB_USER_FPU(td->td_pcb)) panic("Unregistered use of FPU in kernel"); if (npxdna()) return; break; case T_ARITHTRAP: /* arithmetic trap */ case T_XMMFLT: /* SIMD floating-point exception */ case T_FPOPFLT: /* FPU operand fetch fault */ /* * XXXKIB for now disable any FPU traps in kernel * handler registration seems to be overkill */ trap_fatal(frame, 0); return; /* * The following two traps can happen in * vm86 mode, and, if so, we want to handle * them specially. */ case T_PROTFLT: /* general protection fault */ case T_STKFLT: /* stack fault */ if (frame->tf_eflags & PSL_VM) { signo = vm86_emulate((struct vm86frame *)frame); if (signo == SIGTRAP) { type = T_TRCTRAP; load_dr6(rdr6() | 0x4000); goto kernel_trctrap; } if (signo != 0) /* * returns to original process */ vm86_trap((struct vm86frame *)frame); return; } /* FALL THROUGH */ case T_SEGNPFLT: /* segment not present fault */ if (curpcb->pcb_flags & PCB_VM86CALL) break; /* * Invalid %fs's and %gs's can be created using * procfs or PT_SETREGS or by invalidating the * underlying LDT entry. This causes a fault * in kernel mode when the kernel attempts to * switch contexts. Lose the bad context * (XXX) so that we can continue, and generate * a signal. */ if (frame->tf_eip == (int)cpu_switch_load_gs) { curpcb->pcb_gs = 0; #if 0 PROC_LOCK(p); kern_psignal(p, SIGBUS); PROC_UNLOCK(p); #endif return; } if (td->td_intr_nesting_level != 0) break; /* * Invalid segment selectors and out of bounds * %eip's and %esp's can be set up in user mode. * This causes a fault in kernel mode when the * kernel tries to return to user mode. We want * to get this fault so that we can fix the * problem here and not have to check all the * selectors and pointers when the user changes * them. * * N.B. Comparing to long mode, 32-bit mode * does not push %esp on the trap frame, * because iretl faulted while in ring 0. As * the consequence, there is no need to fixup * the stack pointer for doreti_iret_fault, * the fixup and the complimentary trap() call * are executed on the main thread stack, not * on the trampoline stack. */ if (frame->tf_eip == (int)doreti_iret + setidt_disp) { frame->tf_eip = (int)doreti_iret_fault + setidt_disp; return; } if (type == T_STKFLT) break; if (frame->tf_eip == (int)doreti_popl_ds + setidt_disp) { frame->tf_eip = (int)doreti_popl_ds_fault + setidt_disp; return; } if (frame->tf_eip == (int)doreti_popl_es + setidt_disp) { frame->tf_eip = (int)doreti_popl_es_fault + setidt_disp; return; } if (frame->tf_eip == (int)doreti_popl_fs + setidt_disp) { frame->tf_eip = (int)doreti_popl_fs_fault + setidt_disp; return; } if (curpcb->pcb_onfault != NULL) { frame->tf_eip = (int)curpcb->pcb_onfault; return; } break; case T_TSSFLT: /* * PSL_NT can be set in user mode and isn't cleared * automatically when the kernel is entered. This * causes a TSS fault when the kernel attempts to * `iret' because the TSS link is uninitialized. We * want to get this fault so that we can fix the * problem here and not every time the kernel is * entered. */ if (frame->tf_eflags & PSL_NT) { frame->tf_eflags &= ~PSL_NT; return; } break; case T_TRCTRAP: /* debug exception */ kernel_trctrap: /* Clear any pending debug events. */ dr6 = rdr6(); load_dr6(0); /* * Ignore debug register exceptions due to * accesses in the user's address space, which * can happen under several conditions such as * if a user sets a watchpoint on a buffer and * then passes that buffer to a system call. * We still want to get TRCTRAPS for addresses * in kernel space because that is useful when * debugging the kernel. */ if (user_dbreg_trap(dr6) && !(curpcb->pcb_flags & PCB_VM86CALL)) return; /* * Malicious user code can configure a debug * register watchpoint to trap on data access * to the top of stack and then execute 'pop * %ss; int 3'. Due to exception deferral for * 'pop %ss', the CPU will not interrupt 'int * 3' to raise the DB# exception for the debug * register but will postpone the DB# until * execution of the first instruction of the * BP# handler (in kernel mode). Normally the * previous check would ignore DB# exceptions * for watchpoints on user addresses raised in * kernel mode. However, some CPU errata * include cases where DB# exceptions do not * properly set bits in %dr6, e.g. Haswell * HSD23 and Skylake-X SKZ24. * * A deferred DB# can also be raised on the * first instructions of system call entry * points or single-step traps via similar use * of 'pop %ss' or 'mov xxx, %ss'. */ if (frame->tf_eip == (uintptr_t)IDTVEC(int0x80_syscall) + setidt_disp || frame->tf_eip == (uintptr_t)IDTVEC(bpt) + setidt_disp || frame->tf_eip == (uintptr_t)IDTVEC(dbg) + setidt_disp) return; /* * FALLTHROUGH (TRCTRAP kernel mode, kernel address) */ case T_BPTFLT: /* * If KDB is enabled, let it handle the debugger trap. * Otherwise, debugger traps "can't happen". */ #ifdef KDB if (kdb_trap(type, dr6, frame)) return; #endif break; #ifdef DEV_ISA case T_NMI: #ifdef POWERFAIL_NMI if (time_second - lastalert > 10) { log(LOG_WARNING, "NMI: power fail\n"); sysbeep(880, hz); lastalert = time_second; } return; #else /* !POWERFAIL_NMI */ nmi_handle_intr(type, frame); return; #endif /* POWERFAIL_NMI */ #endif /* DEV_ISA */ } trap_fatal(frame, eva); return; } /* Translate fault for emulators (e.g. Linux) */ if (*p->p_sysent->sv_transtrap != NULL) signo = (*p->p_sysent->sv_transtrap)(signo, type); ksiginfo_init_trap(&ksi); ksi.ksi_signo = signo; ksi.ksi_code = ucode; ksi.ksi_addr = (void *)addr; ksi.ksi_trapno = type; if (uprintf_signal) { uprintf("pid %d comm %s: signal %d err %x code %d type %d " "addr 0x%x ss 0x%04x esp 0x%08x cs 0x%04x eip 0x%08x " "<%02x %02x %02x %02x %02x %02x %02x %02x>\n", p->p_pid, p->p_comm, signo, frame->tf_err, ucode, type, addr, frame->tf_ss, frame->tf_esp, frame->tf_cs, frame->tf_eip, fubyte((void *)(frame->tf_eip + 0)), fubyte((void *)(frame->tf_eip + 1)), fubyte((void *)(frame->tf_eip + 2)), fubyte((void *)(frame->tf_eip + 3)), fubyte((void *)(frame->tf_eip + 4)), fubyte((void *)(frame->tf_eip + 5)), fubyte((void *)(frame->tf_eip + 6)), fubyte((void *)(frame->tf_eip + 7))); } KASSERT((read_eflags() & PSL_I) != 0, ("interrupts disabled")); trapsignal(td, &ksi); user: userret(td, frame); KASSERT(PCB_USER_FPU(td->td_pcb), ("Return from trap with kernel FPU ctx leaked")); } static int trap_pfault(struct trapframe *frame, int usermode, vm_offset_t eva) { struct thread *td; struct proc *p; vm_offset_t va; vm_map_t map; int rv; vm_prot_t ftype; td = curthread; p = td->td_proc; if (__predict_false((td->td_pflags & TDP_NOFAULTING) != 0)) { /* * Due to both processor errata and lazy TLB invalidation when * access restrictions are removed from virtual pages, memory * accesses that are allowed by the physical mapping layer may * nonetheless cause one spurious page fault per virtual page. * When the thread is executing a "no faulting" section that * is bracketed by vm_fault_{disable,enable}_pagefaults(), * every page fault is treated as a spurious page fault, * unless it accesses the same virtual address as the most * recent page fault within the same "no faulting" section. */ if (td->td_md.md_spurflt_addr != eva || (td->td_pflags & TDP_RESETSPUR) != 0) { /* * Do nothing to the TLB. A stale TLB entry is * flushed automatically by a page fault. */ td->td_md.md_spurflt_addr = eva; td->td_pflags &= ~TDP_RESETSPUR; return (0); } } else { /* * If we get a page fault while in a critical section, then * it is most likely a fatal kernel page fault. The kernel * is already going to panic trying to get a sleep lock to * do the VM lookup, so just consider it a fatal trap so the * kernel can print out a useful trap message and even get * to the debugger. * * If we get a page fault while holding a non-sleepable * lock, then it is most likely a fatal kernel page fault. * If WITNESS is enabled, then it's going to whine about * bogus LORs with various VM locks, so just skip to the * fatal trap handling directly. */ if (td->td_critnest != 0 || WITNESS_CHECK(WARN_SLEEPOK | WARN_GIANTOK, NULL, "Kernel page fault") != 0) { trap_fatal(frame, eva); return (-1); } } va = trunc_page(eva); if (va >= PMAP_TRM_MIN_ADDRESS) { /* * Don't allow user-mode faults in kernel address space. * An exception: if the faulting address is the invalid * instruction entry in the IDT, then the Intel Pentium * F00F bug workaround was triggered, and we need to * treat it is as an illegal instruction, and not a page * fault. */ #if defined(I586_CPU) && !defined(NO_F00F_HACK) if ((eva == (unsigned int)&idt[6]) && has_f00f_bug) return (-2); #endif if (usermode) return (SIGSEGV); trap_fatal(frame, eva); return (-1); } else { map = usermode ? &p->p_vmspace->vm_map : kernel_map; /* * Kernel cannot access a user-space address directly * because user pages are not mapped. Also, page * faults must not be caused during the interrupts. */ if (!usermode && td->td_intr_nesting_level != 0) { trap_fatal(frame, eva); return (-1); } } /* * If the trap was caused by errant bits in the PTE then panic. */ if (frame->tf_err & PGEX_RSV) { trap_fatal(frame, eva); return (-1); } /* * PGEX_I is defined only if the execute disable bit capability is * supported and enabled. */ if (frame->tf_err & PGEX_W) ftype = VM_PROT_WRITE; #if defined(PAE) || defined(PAE_TABLES) else if ((frame->tf_err & PGEX_I) && pg_nx != 0) ftype = VM_PROT_EXECUTE; #endif else ftype = VM_PROT_READ; /* Fault in the page. */ rv = vm_fault(map, va, ftype, VM_FAULT_NORMAL); if (rv == KERN_SUCCESS) { #ifdef HWPMC_HOOKS if (ftype == VM_PROT_READ || ftype == VM_PROT_WRITE) { PMC_SOFT_CALL_TF( , , page_fault, all, frame); if (ftype == VM_PROT_READ) PMC_SOFT_CALL_TF( , , page_fault, read, frame); else PMC_SOFT_CALL_TF( , , page_fault, write, frame); } #endif return (0); } if (!usermode) { if (td->td_intr_nesting_level == 0 && curpcb->pcb_onfault != NULL) { frame->tf_eip = (int)curpcb->pcb_onfault; return (0); } trap_fatal(frame, eva); return (-1); } return ((rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV); } static void trap_fatal(frame, eva) struct trapframe *frame; vm_offset_t eva; { int code, ss, esp; u_int type; struct soft_segment_descriptor softseg; #ifdef KDB bool handled; #endif code = frame->tf_err; type = frame->tf_trapno; sdtossd(&gdt[IDXSEL(frame->tf_cs & 0xffff)].sd, &softseg); printf("\n\nFatal trap %d: %s while in %s mode\n", type, trap_msg(type), frame->tf_eflags & PSL_VM ? "vm86" : ISPL(frame->tf_cs) == SEL_UPL ? "user" : "kernel"); #ifdef SMP /* two separate prints in case of a trap on an unmapped page */ printf("cpuid = %d; ", PCPU_GET(cpuid)); printf("apic id = %02x\n", PCPU_GET(apic_id)); #endif if (type == T_PAGEFLT) { printf("fault virtual address = 0x%x\n", eva); printf("fault code = %s %s%s, %s\n", code & PGEX_U ? "user" : "supervisor", code & PGEX_W ? "write" : "read", #if defined(PAE) || defined(PAE_TABLES) pg_nx != 0 ? (code & PGEX_I ? " instruction" : " data") : #endif "", code & PGEX_RSV ? "reserved bits in PTE" : code & PGEX_P ? "protection violation" : "page not present"); } else { printf("error code = %#x\n", code); } printf("instruction pointer = 0x%x:0x%x\n", frame->tf_cs & 0xffff, frame->tf_eip); if (TF_HAS_STACKREGS(frame)) { ss = frame->tf_ss & 0xffff; esp = frame->tf_esp; } else { ss = GSEL(GDATA_SEL, SEL_KPL); esp = (int)&frame->tf_esp; } printf("stack pointer = 0x%x:0x%x\n", ss, esp); printf("frame pointer = 0x%x:0x%x\n", ss, frame->tf_ebp); printf("code segment = base 0x%x, limit 0x%x, type 0x%x\n", softseg.ssd_base, softseg.ssd_limit, softseg.ssd_type); printf(" = DPL %d, pres %d, def32 %d, gran %d\n", softseg.ssd_dpl, softseg.ssd_p, softseg.ssd_def32, softseg.ssd_gran); printf("processor eflags = "); if (frame->tf_eflags & PSL_T) printf("trace trap, "); if (frame->tf_eflags & PSL_I) printf("interrupt enabled, "); if (frame->tf_eflags & PSL_NT) printf("nested task, "); if (frame->tf_eflags & PSL_RF) printf("resume, "); if (frame->tf_eflags & PSL_VM) printf("vm86, "); printf("IOPL = %d\n", (frame->tf_eflags & PSL_IOPL) >> 12); printf("current process = %d (%s)\n", curproc->p_pid, curthread->td_name); #ifdef KDB if (debugger_on_trap) { kdb_why = KDB_WHY_TRAP; frame->tf_err = eva; /* smuggle fault address to ddb */ handled = kdb_trap(type, 0, frame); frame->tf_err = code; /* restore error code */ kdb_why = KDB_WHY_UNSET; if (handled) return; } #endif printf("trap number = %d\n", type); if (trap_msg(type) != NULL) panic("%s", trap_msg(type)); else panic("unknown/reserved trap"); } + +#ifdef KDTRACE_HOOKS +/* + * Invoke a userspace DTrace hook. The hook pointer is cleared when no + * userspace probes are enabled, so we must synchronize with DTrace to ensure + * that a trapping thread is able to call the hook before it is cleared. + */ +static bool +trap_user_dtrace(struct trapframe *frame, int (**hookp)(struct trapframe *)) +{ + int (*hook)(struct trapframe *); + + hook = (int (*)(struct trapframe *))atomic_load_ptr(hookp); + enable_intr(); + if (hook != NULL) + return ((hook)(frame) == 0); + return (false); +} +#endif /* * Double fault handler. Called when a fault occurs while writing * a frame for a trap/exception onto the stack. This usually occurs * when the stack overflows (such is the case with infinite recursion, * for example). * * XXX Note that the current PTD gets replaced by IdlePTD when the * task switch occurs. This means that the stack that was active at * the time of the double fault is not available at unless * the machine was idle when the double fault occurred. The downside * of this is that "trace " in ddb won't work. */ void dblfault_handler(void) { #ifdef KDTRACE_HOOKS if (dtrace_doubletrap_func != NULL) (*dtrace_doubletrap_func)(); #endif printf("\nFatal double fault:\n"); printf("eip = 0x%x\n", PCPU_GET(common_tssp)->tss_eip); printf("esp = 0x%x\n", PCPU_GET(common_tssp)->tss_esp); printf("ebp = 0x%x\n", PCPU_GET(common_tssp)->tss_ebp); #ifdef SMP /* two separate prints in case of a trap on an unmapped page */ printf("cpuid = %d; ", PCPU_GET(cpuid)); printf("apic id = %02x\n", PCPU_GET(apic_id)); #endif panic("double fault"); } int cpu_fetch_syscall_args(struct thread *td) { struct proc *p; struct trapframe *frame; struct syscall_args *sa; caddr_t params; long tmp; int error; #ifdef COMPAT_43 u_int32_t eip; int cs; #endif p = td->td_proc; frame = td->td_frame; sa = &td->td_sa; #ifdef COMPAT_43 if (__predict_false(frame->tf_cs == 7 && frame->tf_eip == 2)) { /* * In lcall $7,$0 after int $0x80. Convert the user * frame to what it would be for a direct int 0x80 instead * of lcall $7,$0, by popping the lcall return address. */ error = fueword32((void *)frame->tf_esp, &eip); if (error == -1) return (EFAULT); cs = fuword16((void *)(frame->tf_esp + sizeof(u_int32_t))); if (cs == -1) return (EFAULT); /* * Unwind in-kernel frame after all stack frame pieces * were successfully read. */ frame->tf_eip = eip; frame->tf_cs = cs; frame->tf_esp += 2 * sizeof(u_int32_t); frame->tf_err = 7; /* size of lcall $7,$0 */ } #endif sa->code = frame->tf_eax; params = (caddr_t)frame->tf_esp + sizeof(uint32_t); /* * Need to check if this is a 32 bit or 64 bit syscall. */ if (sa->code == SYS_syscall) { /* * Code is first argument, followed by actual args. */ error = fueword(params, &tmp); if (error == -1) return (EFAULT); sa->code = tmp; params += sizeof(uint32_t); } else if (sa->code == SYS___syscall) { /* * Like syscall, but code is a quad, so as to maintain * quad alignment for the rest of the arguments. */ error = fueword(params, &tmp); if (error == -1) return (EFAULT); sa->code = tmp; params += sizeof(quad_t); } if (p->p_sysent->sv_mask) sa->code &= p->p_sysent->sv_mask; if (sa->code >= p->p_sysent->sv_size) sa->callp = &p->p_sysent->sv_table[0]; else sa->callp = &p->p_sysent->sv_table[sa->code]; sa->narg = sa->callp->sy_narg; if (params != NULL && sa->narg != 0) error = copyin(params, (caddr_t)sa->args, (u_int)(sa->narg * sizeof(uint32_t))); else error = 0; if (error == 0) { td->td_retval[0] = 0; td->td_retval[1] = frame->tf_edx; } return (error); } #include "../../kern/subr_syscall.c" /* * syscall - system call request C handler. A system call is * essentially treated as a trap by reusing the frame layout. */ void syscall(struct trapframe *frame) { struct thread *td; register_t orig_tf_eflags; int error; ksiginfo_t ksi; #ifdef DIAGNOSTIC if (!(TRAPF_USERMODE(frame) && (curpcb->pcb_flags & PCB_VM86CALL) == 0)) { panic("syscall"); /* NOT REACHED */ } #endif orig_tf_eflags = frame->tf_eflags; td = curthread; td->td_frame = frame; error = syscallenter(td); /* * Traced syscall. */ if ((orig_tf_eflags & PSL_T) && !(orig_tf_eflags & PSL_VM)) { frame->tf_eflags &= ~PSL_T; ksiginfo_init_trap(&ksi); ksi.ksi_signo = SIGTRAP; ksi.ksi_code = TRAP_TRACE; ksi.ksi_addr = (void *)frame->tf_eip; trapsignal(td, &ksi); } KASSERT(PCB_USER_FPU(td->td_pcb), ("System call %s returning with kernel FPU ctx leaked", syscallname(td->td_proc, td->td_sa.code))); KASSERT(td->td_pcb->pcb_save == get_pcb_user_save_td(td), ("System call %s returning with mangled pcb_save", syscallname(td->td_proc, td->td_sa.code))); syscallret(td, error); } Index: stable/12 =================================================================== --- stable/12 (revision 349015) +++ stable/12 (revision 349016) Property changes on: stable/12 ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head:r348742