Index: stable/11/sys/amd64/amd64/trap.c =================================================================== --- stable/11/sys/amd64/amd64/trap.c (revision 327550) +++ stable/11/sys/amd64/amd64/trap.c (revision 327551) @@ -1,952 +1,947 @@ /*- * Copyright (C) 1994, David Greenman * Copyright (c) 1990, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * the University of Utah, and William Jolitz. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)trap.c 7.4 (Berkeley) 5/13/91 */ #include __FBSDID("$FreeBSD$"); /* * AMD64 Trap and System call handling */ #include "opt_clock.h" #include "opt_cpu.h" #include "opt_hwpmc_hooks.h" #include "opt_isa.h" #include "opt_kdb.h" #include "opt_stack.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef HWPMC_HOOKS #include PMC_SOFT_DEFINE( , , page_fault, all); PMC_SOFT_DEFINE( , , page_fault, read); PMC_SOFT_DEFINE( , , page_fault, write); #endif #include #include #include #include #include #include #include #include #include #include #include #include #ifdef SMP #include #endif #include #include #ifdef KDTRACE_HOOKS #include #endif void __noinline trap(struct trapframe *frame); void trap_check(struct trapframe *frame); void dblfault_handler(struct trapframe *frame); static int trap_pfault(struct trapframe *, int); static void trap_fatal(struct trapframe *, vm_offset_t); #define MAX_TRAP_MSG 32 static char *trap_msg[] = { "", /* 0 unused */ "privileged instruction fault", /* 1 T_PRIVINFLT */ "", /* 2 unused */ "breakpoint instruction fault", /* 3 T_BPTFLT */ "", /* 4 unused */ "", /* 5 unused */ "arithmetic trap", /* 6 T_ARITHTRAP */ "", /* 7 unused */ "", /* 8 unused */ "general protection fault", /* 9 T_PROTFLT */ "trace trap", /* 10 T_TRCTRAP */ "", /* 11 unused */ "page fault", /* 12 T_PAGEFLT */ "", /* 13 unused */ "alignment fault", /* 14 T_ALIGNFLT */ "", /* 15 unused */ "", /* 16 unused */ "", /* 17 unused */ "integer divide fault", /* 18 T_DIVIDE */ "non-maskable interrupt trap", /* 19 T_NMI */ "overflow trap", /* 20 T_OFLOW */ "FPU bounds check fault", /* 21 T_BOUND */ "FPU device not available", /* 22 T_DNA */ "double fault", /* 23 T_DOUBLEFLT */ "FPU operand fetch fault", /* 24 T_FPOPFLT */ "invalid TSS fault", /* 25 T_TSSFLT */ "segment not present fault", /* 26 T_SEGNPFLT */ "stack fault", /* 27 T_STKFLT */ "machine check trap", /* 28 T_MCHK */ "SIMD floating-point exception", /* 29 T_XMMFLT */ "reserved (unknown) fault", /* 30 T_RESERVED */ "", /* 31 unused (reserved) */ "DTrace pid return trap", /* 32 T_DTRACE_RET */ }; static int prot_fault_translation; SYSCTL_INT(_machdep, OID_AUTO, prot_fault_translation, CTLFLAG_RWTUN, &prot_fault_translation, 0, "Select signal to deliver on protection fault"); static int uprintf_signal; SYSCTL_INT(_machdep, OID_AUTO, uprintf_signal, CTLFLAG_RWTUN, &uprintf_signal, 0, "Print debugging information on trap signal to ctty"); /* * Exception, fault, and trap interface to the FreeBSD kernel. * This common code is called from assembly language IDT gate entry * routines that prepare a suitable stack frame, and restore this * frame after the exception has been processed. */ void trap(struct trapframe *frame) { -#ifdef KDTRACE_HOOKS - struct reg regs; -#endif ksiginfo_t ksi; struct thread *td; struct proc *p; register_t addr; #ifdef KDB register_t dr6; #endif int signo, ucode; u_int type; td = curthread; p = td->td_proc; signo = 0; ucode = 0; addr = 0; PCPU_INC(cnt.v_trap); type = frame->tf_trapno; #ifdef SMP /* Handler for NMI IPIs used for stopping CPUs. */ if (type == T_NMI && ipi_nmi_handler() == 0) return; #endif #ifdef KDB if (kdb_active) { kdb_reenter(); return; } #endif if (type == T_RESERVED) { trap_fatal(frame, 0); return; } if (type == T_NMI) { #ifdef HWPMC_HOOKS /* * CPU PMCs interrupt using an NMI. If the PMC module is * active, pass the 'rip' value to the PMC module's interrupt * handler. A non-zero return value from the handler means that * the NMI was consumed by it and we can return immediately. */ if (pmc_intr != NULL && (*pmc_intr)(PCPU_GET(cpuid), frame) != 0) return; #endif #ifdef STACK if (stack_nmi_handler(frame) != 0) return; #endif } if (type == T_MCHK) { mca_intr(); return; } if ((frame->tf_rflags & PSL_I) == 0) { /* * Buggy application or kernel code has disabled * interrupts and then trapped. Enabling interrupts * now is wrong, but it is better than running with * interrupts disabled until they are accidentally * enabled later. */ if (TRAPF_USERMODE(frame)) uprintf( "pid %ld (%s): trap %d with interrupts disabled\n", (long)curproc->p_pid, curthread->td_name, type); else if (type != T_NMI && type != T_BPTFLT && type != T_TRCTRAP) { /* * XXX not quite right, since this may be for a * multiple fault in user mode. */ printf("kernel trap %d with interrupts disabled\n", type); /* * We shouldn't enable interrupts while holding a * spin lock. */ if (td->td_md.md_spinlock_count == 0) enable_intr(); } } if (TRAPF_USERMODE(frame)) { /* user trap */ td->td_pticks = 0; td->td_frame = frame; addr = frame->tf_rip; if (td->td_cowgen != p->p_cowgen) thread_cow_update(td); switch (type) { case T_PRIVINFLT: /* privileged instruction fault */ signo = SIGILL; ucode = ILL_PRVOPC; break; case T_BPTFLT: /* bpt instruction fault */ case T_TRCTRAP: /* trace trap */ enable_intr(); #ifdef KDTRACE_HOOKS if (type == T_BPTFLT) { - fill_frame_regs(frame, ®s); if (dtrace_pid_probe_ptr != NULL && - dtrace_pid_probe_ptr(®s) == 0) + dtrace_pid_probe_ptr(frame) == 0) return; } #endif frame->tf_rflags &= ~PSL_T; signo = SIGTRAP; ucode = (type == T_TRCTRAP ? TRAP_TRACE : TRAP_BRKPT); break; case T_ARITHTRAP: /* arithmetic trap */ ucode = fputrap_x87(); if (ucode == -1) return; signo = SIGFPE; break; case T_PROTFLT: /* general protection fault */ signo = SIGBUS; ucode = BUS_OBJERR; break; case T_STKFLT: /* stack fault */ case T_SEGNPFLT: /* segment not present fault */ signo = SIGBUS; ucode = BUS_ADRERR; break; case T_TSSFLT: /* invalid TSS fault */ signo = SIGBUS; ucode = BUS_OBJERR; break; case T_ALIGNFLT: signo = SIGBUS; ucode = BUS_ADRALN; break; case T_DOUBLEFLT: /* double fault */ default: signo = SIGBUS; ucode = BUS_OBJERR; break; case T_PAGEFLT: /* page fault */ /* * Emulator can take care about this trap? */ if (*p->p_sysent->sv_trap != NULL && (*p->p_sysent->sv_trap)(td) == 0) return; addr = frame->tf_addr; signo = trap_pfault(frame, TRUE); if (signo == -1) return; if (signo == 0) goto userret; if (signo == SIGSEGV) { ucode = SEGV_MAPERR; } else if (prot_fault_translation == 0) { /* * Autodetect. This check also covers * the images without the ABI-tag ELF * note. */ if (SV_CURPROC_ABI() == SV_ABI_FREEBSD && p->p_osrel >= P_OSREL_SIGSEGV) { signo = SIGSEGV; ucode = SEGV_ACCERR; } else { signo = SIGBUS; ucode = BUS_PAGE_FAULT; } } else if (prot_fault_translation == 1) { /* * Always compat mode. */ signo = SIGBUS; ucode = BUS_PAGE_FAULT; } else { /* * Always SIGSEGV mode. */ signo = SIGSEGV; ucode = SEGV_ACCERR; } break; case T_DIVIDE: /* integer divide fault */ ucode = FPE_INTDIV; signo = SIGFPE; break; #ifdef DEV_ISA case T_NMI: nmi_handle_intr(type, frame); return; #endif case T_OFLOW: /* integer overflow fault */ ucode = FPE_INTOVF; signo = SIGFPE; break; case T_BOUND: /* bounds check fault */ ucode = FPE_FLTSUB; signo = SIGFPE; break; case T_DNA: /* transparent fault (due to context switch "late") */ KASSERT(PCB_USER_FPU(td->td_pcb), ("kernel FPU ctx has leaked")); fpudna(); return; case T_FPOPFLT: /* FPU operand fetch fault */ ucode = ILL_COPROC; signo = SIGILL; break; case T_XMMFLT: /* SIMD floating-point exception */ ucode = fputrap_sse(); if (ucode == -1) return; signo = SIGFPE; break; #ifdef KDTRACE_HOOKS case T_DTRACE_RET: enable_intr(); - fill_frame_regs(frame, ®s); if (dtrace_return_probe_ptr != NULL) - dtrace_return_probe_ptr(®s); + dtrace_return_probe_ptr(frame); return; #endif } } else { /* kernel trap */ KASSERT(cold || td->td_ucred != NULL, ("kernel trap doesn't have ucred")); switch (type) { case T_PAGEFLT: /* page fault */ (void) trap_pfault(frame, FALSE); return; case T_DNA: if (PCB_USER_FPU(td->td_pcb)) panic("Unregistered use of FPU in kernel"); fpudna(); return; case T_ARITHTRAP: /* arithmetic trap */ case T_XMMFLT: /* SIMD floating-point exception */ case T_FPOPFLT: /* FPU operand fetch fault */ /* * For now, supporting kernel handler * registration for FPU traps is overkill. */ trap_fatal(frame, 0); return; case T_STKFLT: /* stack fault */ case T_PROTFLT: /* general protection fault */ case T_SEGNPFLT: /* segment not present fault */ if (td->td_intr_nesting_level != 0) break; /* * Invalid segment selectors and out of bounds * %rip's and %rsp's can be set up in user mode. * This causes a fault in kernel mode when the * kernel tries to return to user mode. We want * to get this fault so that we can fix the * problem here and not have to check all the * selectors and pointers when the user changes * them. */ if (frame->tf_rip == (long)doreti_iret) { frame->tf_rip = (long)doreti_iret_fault; return; } if (frame->tf_rip == (long)ld_ds) { frame->tf_rip = (long)ds_load_fault; return; } if (frame->tf_rip == (long)ld_es) { frame->tf_rip = (long)es_load_fault; return; } if (frame->tf_rip == (long)ld_fs) { frame->tf_rip = (long)fs_load_fault; return; } if (frame->tf_rip == (long)ld_gs) { frame->tf_rip = (long)gs_load_fault; return; } if (frame->tf_rip == (long)ld_gsbase) { frame->tf_rip = (long)gsbase_load_fault; return; } if (frame->tf_rip == (long)ld_fsbase) { frame->tf_rip = (long)fsbase_load_fault; return; } if (curpcb->pcb_onfault != NULL) { frame->tf_rip = (long)curpcb->pcb_onfault; return; } break; case T_TSSFLT: /* * PSL_NT can be set in user mode and isn't cleared * automatically when the kernel is entered. This * causes a TSS fault when the kernel attempts to * `iret' because the TSS link is uninitialized. We * want to get this fault so that we can fix the * problem here and not every time the kernel is * entered. */ if (frame->tf_rflags & PSL_NT) { frame->tf_rflags &= ~PSL_NT; return; } break; case T_TRCTRAP: /* trace trap */ /* * Ignore debug register trace traps due to * accesses in the user's address space, which * can happen under several conditions such as * if a user sets a watchpoint on a buffer and * then passes that buffer to a system call. * We still want to get TRCTRAPS for addresses * in kernel space because that is useful when * debugging the kernel. */ if (user_dbreg_trap()) { /* * Reset breakpoint bits because the * processor doesn't */ load_dr6(rdr6() & ~0xf); return; } /* * FALLTHROUGH (TRCTRAP kernel mode, kernel address) */ case T_BPTFLT: /* * If KDB is enabled, let it handle the debugger trap. * Otherwise, debugger traps "can't happen". */ #ifdef KDB /* XXX %dr6 is not quite reentrant. */ dr6 = rdr6(); load_dr6(dr6 & ~0x4000); if (kdb_trap(type, dr6, frame)) return; #endif break; #ifdef DEV_ISA case T_NMI: nmi_handle_intr(type, frame); return; #endif } trap_fatal(frame, 0); return; } /* Translate fault for emulators (e.g. Linux) */ if (*p->p_sysent->sv_transtrap != NULL) signo = (*p->p_sysent->sv_transtrap)(signo, type); ksiginfo_init_trap(&ksi); ksi.ksi_signo = signo; ksi.ksi_code = ucode; ksi.ksi_trapno = type; ksi.ksi_addr = (void *)addr; if (uprintf_signal) { uprintf("pid %d comm %s: signal %d err %lx code %d type %d " "addr 0x%lx rsp 0x%lx rip 0x%lx " "<%02x %02x %02x %02x %02x %02x %02x %02x>\n", p->p_pid, p->p_comm, signo, frame->tf_err, ucode, type, addr, frame->tf_rsp, frame->tf_rip, fubyte((void *)(frame->tf_rip + 0)), fubyte((void *)(frame->tf_rip + 1)), fubyte((void *)(frame->tf_rip + 2)), fubyte((void *)(frame->tf_rip + 3)), fubyte((void *)(frame->tf_rip + 4)), fubyte((void *)(frame->tf_rip + 5)), fubyte((void *)(frame->tf_rip + 6)), fubyte((void *)(frame->tf_rip + 7))); } KASSERT((read_rflags() & PSL_I) != 0, ("interrupts disabled")); trapsignal(td, &ksi); userret: userret(td, frame); KASSERT(PCB_USER_FPU(td->td_pcb), ("Return from trap with kernel FPU ctx leaked")); } /* * Ensure that we ignore any DTrace-induced faults. This function cannot * be instrumented, so it cannot generate such faults itself. */ void trap_check(struct trapframe *frame) { #ifdef KDTRACE_HOOKS if (dtrace_trap_func != NULL && (*dtrace_trap_func)(frame, frame->tf_trapno) != 0) return; #endif trap(frame); } static int trap_pfault(struct trapframe *frame, int usermode) { struct thread *td; struct proc *p; vm_map_t map; vm_offset_t va; int rv; vm_prot_t ftype; vm_offset_t eva; td = curthread; p = td->td_proc; eva = frame->tf_addr; rv = 0; if (__predict_false((td->td_pflags & TDP_NOFAULTING) != 0)) { /* * Due to both processor errata and lazy TLB invalidation when * access restrictions are removed from virtual pages, memory * accesses that are allowed by the physical mapping layer may * nonetheless cause one spurious page fault per virtual page. * When the thread is executing a "no faulting" section that * is bracketed by vm_fault_{disable,enable}_pagefaults(), * every page fault is treated as a spurious page fault, * unless it accesses the same virtual address as the most * recent page fault within the same "no faulting" section. */ if (td->td_md.md_spurflt_addr != eva || (td->td_pflags & TDP_RESETSPUR) != 0) { /* * Do nothing to the TLB. A stale TLB entry is * flushed automatically by a page fault. */ td->td_md.md_spurflt_addr = eva; td->td_pflags &= ~TDP_RESETSPUR; return (0); } } else { /* * If we get a page fault while in a critical section, then * it is most likely a fatal kernel page fault. The kernel * is already going to panic trying to get a sleep lock to * do the VM lookup, so just consider it a fatal trap so the * kernel can print out a useful trap message and even get * to the debugger. * * If we get a page fault while holding a non-sleepable * lock, then it is most likely a fatal kernel page fault. * If WITNESS is enabled, then it's going to whine about * bogus LORs with various VM locks, so just skip to the * fatal trap handling directly. */ if (td->td_critnest != 0 || WITNESS_CHECK(WARN_SLEEPOK | WARN_GIANTOK, NULL, "Kernel page fault") != 0) { trap_fatal(frame, eva); return (-1); } } va = trunc_page(eva); if (va >= VM_MIN_KERNEL_ADDRESS) { /* * Don't allow user-mode faults in kernel address space. */ if (usermode) goto nogo; map = kernel_map; } else { map = &p->p_vmspace->vm_map; /* * When accessing a usermode address, kernel must be * ready to accept the page fault, and provide a * handling routine. Since accessing the address * without the handler is a bug, do not try to handle * it normally, and panic immediately. */ if (!usermode && (td->td_intr_nesting_level != 0 || curpcb->pcb_onfault == NULL)) { trap_fatal(frame, eva); return (-1); } } /* * If the trap was caused by errant bits in the PTE then panic. */ if (frame->tf_err & PGEX_RSV) { trap_fatal(frame, eva); return (-1); } /* * PGEX_I is defined only if the execute disable bit capability is * supported and enabled. */ if (frame->tf_err & PGEX_W) ftype = VM_PROT_WRITE; else if ((frame->tf_err & PGEX_I) && pg_nx != 0) ftype = VM_PROT_EXECUTE; else ftype = VM_PROT_READ; /* Fault in the page. */ rv = vm_fault(map, va, ftype, VM_FAULT_NORMAL); if (rv == KERN_SUCCESS) { #ifdef HWPMC_HOOKS if (ftype == VM_PROT_READ || ftype == VM_PROT_WRITE) { PMC_SOFT_CALL_TF( , , page_fault, all, frame); if (ftype == VM_PROT_READ) PMC_SOFT_CALL_TF( , , page_fault, read, frame); else PMC_SOFT_CALL_TF( , , page_fault, write, frame); } #endif return (0); } nogo: if (!usermode) { if (td->td_intr_nesting_level == 0 && curpcb->pcb_onfault != NULL) { frame->tf_rip = (long)curpcb->pcb_onfault; return (0); } trap_fatal(frame, eva); return (-1); } return ((rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV); } static void trap_fatal(frame, eva) struct trapframe *frame; vm_offset_t eva; { int code, ss; u_int type; struct soft_segment_descriptor softseg; char *msg; code = frame->tf_err; type = frame->tf_trapno; sdtossd(&gdt[NGDT * PCPU_GET(cpuid) + IDXSEL(frame->tf_cs & 0xffff)], &softseg); if (type <= MAX_TRAP_MSG) msg = trap_msg[type]; else msg = "UNKNOWN"; printf("\n\nFatal trap %d: %s while in %s mode\n", type, msg, TRAPF_USERMODE(frame) ? "user" : "kernel"); #ifdef SMP /* two separate prints in case of a trap on an unmapped page */ printf("cpuid = %d; ", PCPU_GET(cpuid)); printf("apic id = %02x\n", PCPU_GET(apic_id)); #endif if (type == T_PAGEFLT) { printf("fault virtual address = 0x%lx\n", eva); printf("fault code = %s %s %s, %s\n", code & PGEX_U ? "user" : "supervisor", code & PGEX_W ? "write" : "read", code & PGEX_I ? "instruction" : "data", code & PGEX_RSV ? "reserved bits in PTE" : code & PGEX_P ? "protection violation" : "page not present"); } printf("instruction pointer = 0x%lx:0x%lx\n", frame->tf_cs & 0xffff, frame->tf_rip); ss = frame->tf_ss & 0xffff; printf("stack pointer = 0x%x:0x%lx\n", ss, frame->tf_rsp); printf("frame pointer = 0x%x:0x%lx\n", ss, frame->tf_rbp); printf("code segment = base 0x%lx, limit 0x%lx, type 0x%x\n", softseg.ssd_base, softseg.ssd_limit, softseg.ssd_type); printf(" = DPL %d, pres %d, long %d, def32 %d, gran %d\n", softseg.ssd_dpl, softseg.ssd_p, softseg.ssd_long, softseg.ssd_def32, softseg.ssd_gran); printf("processor eflags = "); if (frame->tf_rflags & PSL_T) printf("trace trap, "); if (frame->tf_rflags & PSL_I) printf("interrupt enabled, "); if (frame->tf_rflags & PSL_NT) printf("nested task, "); if (frame->tf_rflags & PSL_RF) printf("resume, "); printf("IOPL = %ld\n", (frame->tf_rflags & PSL_IOPL) >> 12); printf("current process = %d (%s)\n", curproc->p_pid, curthread->td_name); #ifdef KDB if (debugger_on_panic || kdb_active) if (kdb_trap(type, 0, frame)) return; #endif printf("trap number = %d\n", type); if (type <= MAX_TRAP_MSG) panic("%s", trap_msg[type]); else panic("unknown/reserved trap"); } /* * Double fault handler. Called when a fault occurs while writing * a frame for a trap/exception onto the stack. This usually occurs * when the stack overflows (such is the case with infinite recursion, * for example). */ void dblfault_handler(struct trapframe *frame) { #ifdef KDTRACE_HOOKS if (dtrace_doubletrap_func != NULL) (*dtrace_doubletrap_func)(); #endif printf("\nFatal double fault\n" "rip %#lx rsp %#lx rbp %#lx\n" "rax %#lx rdx %#lx rbx %#lx\n" "rcx %#lx rsi %#lx rdi %#lx\n" "r8 %#lx r9 %#lx r10 %#lx\n" "r11 %#lx r12 %#lx r13 %#lx\n" "r14 %#lx r15 %#lx rflags %#lx\n" "cs %#lx ss %#lx ds %#hx es %#hx fs %#hx gs %#hx\n" "fsbase %#lx gsbase %#lx kgsbase %#lx\n", frame->tf_rip, frame->tf_rsp, frame->tf_rbp, frame->tf_rax, frame->tf_rdx, frame->tf_rbx, frame->tf_rcx, frame->tf_rdi, frame->tf_rsi, frame->tf_r8, frame->tf_r9, frame->tf_r10, frame->tf_r11, frame->tf_r12, frame->tf_r13, frame->tf_r14, frame->tf_r15, frame->tf_rflags, frame->tf_cs, frame->tf_ss, frame->tf_ds, frame->tf_es, frame->tf_fs, frame->tf_gs, rdmsr(MSR_FSBASE), rdmsr(MSR_GSBASE), rdmsr(MSR_KGSBASE)); #ifdef SMP /* two separate prints in case of a trap on an unmapped page */ printf("cpuid = %d; ", PCPU_GET(cpuid)); printf("apic id = %02x\n", PCPU_GET(apic_id)); #endif panic("double fault"); } int cpu_fetch_syscall_args(struct thread *td) { struct proc *p; struct trapframe *frame; register_t *argp; struct syscall_args *sa; caddr_t params; int reg, regcnt, error; p = td->td_proc; frame = td->td_frame; sa = &td->td_sa; reg = 0; regcnt = 6; params = (caddr_t)frame->tf_rsp + sizeof(register_t); sa->code = frame->tf_rax; if (sa->code == SYS_syscall || sa->code == SYS___syscall) { sa->code = frame->tf_rdi; reg++; regcnt--; } if (p->p_sysent->sv_mask) sa->code &= p->p_sysent->sv_mask; if (sa->code >= p->p_sysent->sv_size) sa->callp = &p->p_sysent->sv_table[0]; else sa->callp = &p->p_sysent->sv_table[sa->code]; sa->narg = sa->callp->sy_narg; KASSERT(sa->narg <= sizeof(sa->args) / sizeof(sa->args[0]), ("Too many syscall arguments!")); error = 0; argp = &frame->tf_rdi; argp += reg; bcopy(argp, sa->args, sizeof(sa->args[0]) * regcnt); if (sa->narg > regcnt) { KASSERT(params != NULL, ("copyin args with no params!")); error = copyin(params, &sa->args[regcnt], (sa->narg - regcnt) * sizeof(sa->args[0])); } if (error == 0) { td->td_retval[0] = 0; td->td_retval[1] = frame->tf_rdx; } return (error); } #include "../../kern/subr_syscall.c" /* * System call handler for native binaries. The trap frame is already * set up by the assembler trampoline and a pointer to it is saved in * td_frame. */ void amd64_syscall(struct thread *td, int traced) { int error; ksiginfo_t ksi; #ifdef DIAGNOSTIC if (!TRAPF_USERMODE(td->td_frame)) { panic("syscall"); /* NOT REACHED */ } #endif error = syscallenter(td); /* * Traced syscall. */ if (__predict_false(traced)) { td->td_frame->tf_rflags &= ~PSL_T; ksiginfo_init_trap(&ksi); ksi.ksi_signo = SIGTRAP; ksi.ksi_code = TRAP_TRACE; ksi.ksi_addr = (void *)td->td_frame->tf_rip; trapsignal(td, &ksi); } KASSERT(PCB_USER_FPU(td->td_pcb), ("System call %s returning with kernel FPU ctx leaked", syscallname(td->td_proc, td->td_sa.code))); KASSERT(td->td_pcb->pcb_save == get_pcb_user_save_td(td), ("System call %s returning with mangled pcb_save", syscallname(td->td_proc, td->td_sa.code))); KASSERT(td->td_md.md_invl_gen.gen == 0, ("System call %s returning with leaked invl_gen %lu", syscallname(td->td_proc, td->td_sa.code), td->td_md.md_invl_gen.gen)); syscallret(td, error); /* * If the user-supplied value of %rip is not a canonical * address, then some CPUs will trigger a ring 0 #GP during * the sysret instruction. However, the fault handler would * execute in ring 0 with the user's %gs and %rsp which would * not be safe. Instead, use the full return path which * catches the problem safely. */ if (__predict_false(td->td_frame->tf_rip >= VM_MAXUSER_ADDRESS)) set_pcb_flags(td->td_pcb, PCB_FULL_IRET); } Index: stable/11/sys/cddl/contrib/opensolaris/uts/common/sys/fasttrap_impl.h =================================================================== --- stable/11/sys/cddl/contrib/opensolaris/uts/common/sys/fasttrap_impl.h (revision 327550) +++ stable/11/sys/cddl/contrib/opensolaris/uts/common/sys/fasttrap_impl.h (revision 327551) @@ -1,235 +1,235 @@ /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #ifndef _FASTTRAP_IMPL_H #define _FASTTRAP_IMPL_H #pragma ident "%Z%%M% %I% %E% SMI" #include #include #include #include #include #include #ifdef __cplusplus extern "C" { #endif /* * Fasttrap Providers, Probes and Tracepoints * * Each Solaris process can have multiple providers -- the pid provider as * well as any number of user-level statically defined tracing (USDT) * providers. Those providers are each represented by a fasttrap_provider_t. * All providers for a given process have a pointer to a shared * fasttrap_proc_t. The fasttrap_proc_t has two states: active or defunct. * When the count of active providers goes to zero it becomes defunct; a * provider drops its active count when it is removed individually or as part * of a mass removal when a process exits or performs an exec. * * Each probe is represented by a fasttrap_probe_t which has a pointer to * its associated provider as well as a list of fasttrap_id_tp_t structures * which are tuples combining a fasttrap_id_t and a fasttrap_tracepoint_t. * A fasttrap_tracepoint_t represents the actual point of instrumentation * and it contains two lists of fasttrap_id_t structures (to be fired pre- * and post-instruction emulation) that identify the probes attached to the * tracepoint. Tracepoints also have a pointer to the fasttrap_proc_t for the * process they trace which is used when looking up a tracepoint both when a * probe fires and when enabling and disabling probes. * * It's important to note that probes are preallocated with the necessary * number of tracepoints, but that tracepoints can be shared by probes and * swapped between probes. If a probe's preallocated tracepoint is enabled * (and, therefore, the associated probe is enabled), and that probe is * then disabled, ownership of that tracepoint may be exchanged for an * unused tracepoint belonging to another probe that was attached to the * enabled tracepoint. * * On FreeBSD, fasttrap providers also maintain per-thread scratch space for use * by the ISA-specific fasttrap code. The fasttrap_scrblock_t type stores the * virtual address of a page-sized memory block that is mapped into a process' * address space. Each block is carved up into chunks (fasttrap_scrspace_t) for * use by individual threads, which keep the address of their scratch space * chunk in their struct kdtrace_thread. A thread's scratch space isn't released * until it exits. */ #ifndef illumos typedef struct fasttrap_scrblock { vm_offset_t ftsb_addr; /* address of a scratch block */ LIST_ENTRY(fasttrap_scrblock) ftsb_next;/* next block in list */ } fasttrap_scrblock_t; #define FASTTRAP_SCRBLOCK_SIZE PAGE_SIZE typedef struct fasttrap_scrspace { uintptr_t ftss_addr; /* scratch space address */ LIST_ENTRY(fasttrap_scrspace) ftss_next;/* next in list */ } fasttrap_scrspace_t; #define FASTTRAP_SCRSPACE_SIZE 64 #endif typedef struct fasttrap_proc { pid_t ftpc_pid; /* process ID for this proc */ uint64_t ftpc_acount; /* count of active providers */ uint64_t ftpc_rcount; /* count of extant providers */ kmutex_t ftpc_mtx; /* lock on all but acount */ struct fasttrap_proc *ftpc_next; /* next proc in hash chain */ #ifndef illumos LIST_HEAD(, fasttrap_scrblock) ftpc_scrblks; /* mapped scratch blocks */ LIST_HEAD(, fasttrap_scrspace) ftpc_fscr; /* free scratch space */ LIST_HEAD(, fasttrap_scrspace) ftpc_ascr; /* used scratch space */ #endif } fasttrap_proc_t; typedef struct fasttrap_provider { pid_t ftp_pid; /* process ID for this prov */ char ftp_name[DTRACE_PROVNAMELEN]; /* prov name (w/o the pid) */ dtrace_provider_id_t ftp_provid; /* DTrace provider handle */ uint_t ftp_marked; /* mark for possible removal */ uint_t ftp_retired; /* mark when retired */ kmutex_t ftp_mtx; /* provider lock */ kmutex_t ftp_cmtx; /* lock on creating probes */ uint64_t ftp_rcount; /* enabled probes ref count */ uint64_t ftp_ccount; /* consumers creating probes */ uint64_t ftp_mcount; /* meta provider count */ fasttrap_proc_t *ftp_proc; /* shared proc for all provs */ struct fasttrap_provider *ftp_next; /* next prov in hash chain */ } fasttrap_provider_t; typedef struct fasttrap_id fasttrap_id_t; typedef struct fasttrap_probe fasttrap_probe_t; typedef struct fasttrap_tracepoint fasttrap_tracepoint_t; struct fasttrap_id { fasttrap_probe_t *fti_probe; /* referrring probe */ fasttrap_id_t *fti_next; /* enabled probe list on tp */ fasttrap_probe_type_t fti_ptype; /* probe type */ }; typedef struct fasttrap_id_tp { fasttrap_id_t fit_id; fasttrap_tracepoint_t *fit_tp; } fasttrap_id_tp_t; struct fasttrap_probe { dtrace_id_t ftp_id; /* DTrace probe identifier */ pid_t ftp_pid; /* pid for this probe */ fasttrap_provider_t *ftp_prov; /* this probe's provider */ uintptr_t ftp_faddr; /* associated function's addr */ size_t ftp_fsize; /* associated function's size */ uint64_t ftp_gen; /* modification generation */ uint64_t ftp_ntps; /* number of tracepoints */ uint8_t *ftp_argmap; /* native to translated args */ uint8_t ftp_nargs; /* translated argument count */ uint8_t ftp_enabled; /* is this probe enabled */ char *ftp_xtypes; /* translated types index */ char *ftp_ntypes; /* native types index */ fasttrap_id_tp_t ftp_tps[1]; /* flexible array */ }; #define FASTTRAP_ID_INDEX(id) \ ((fasttrap_id_tp_t *)(((char *)(id) - offsetof(fasttrap_id_tp_t, fit_id))) - \ &(id)->fti_probe->ftp_tps[0]) struct fasttrap_tracepoint { fasttrap_proc_t *ftt_proc; /* associated process struct */ uintptr_t ftt_pc; /* address of tracepoint */ pid_t ftt_pid; /* pid of tracepoint */ fasttrap_machtp_t ftt_mtp; /* ISA-specific portion */ fasttrap_id_t *ftt_ids; /* NULL-terminated list */ fasttrap_id_t *ftt_retids; /* NULL-terminated list */ fasttrap_tracepoint_t *ftt_next; /* link in global hash */ }; typedef struct fasttrap_bucket { kmutex_t ftb_mtx; /* bucket lock */ void *ftb_data; /* data payload */ uint8_t ftb_pad[64 - sizeof (kmutex_t) - sizeof (void *)]; } fasttrap_bucket_t; typedef struct fasttrap_hash { ulong_t fth_nent; /* power-of-2 num. of entries */ ulong_t fth_mask; /* fth_nent - 1 */ fasttrap_bucket_t *fth_table; /* array of buckets */ } fasttrap_hash_t; /* * If at some future point these assembly functions become observable by * DTrace, then these defines should become separate functions so that the * fasttrap provider doesn't trigger probes during internal operations. */ #define fasttrap_copyout copyout #define fasttrap_fuword32 fuword32 #define fasttrap_suword32 suword32 #define fasttrap_suword64 suword64 #ifdef __amd64__ #define fasttrap_fulword fuword64 #define fasttrap_sulword suword64 #else #define fasttrap_fulword fuword32 #define fasttrap_sulword suword32 #endif extern void fasttrap_sigtrap(proc_t *, kthread_t *, uintptr_t); #ifndef illumos extern fasttrap_scrspace_t *fasttrap_scraddr(struct thread *, fasttrap_proc_t *); #endif extern dtrace_id_t fasttrap_probe_id; extern fasttrap_hash_t fasttrap_tpoints; #ifndef illumos extern struct rmlock fasttrap_tp_lock; #endif #define FASTTRAP_TPOINTS_INDEX(pid, pc) \ (((pc) / sizeof (fasttrap_instr_t) + (pid)) & fasttrap_tpoints.fth_mask) /* * Must be implemented by fasttrap_isa.c */ extern int fasttrap_tracepoint_init(proc_t *, fasttrap_tracepoint_t *, uintptr_t, fasttrap_probe_type_t); extern int fasttrap_tracepoint_install(proc_t *, fasttrap_tracepoint_t *); extern int fasttrap_tracepoint_remove(proc_t *, fasttrap_tracepoint_t *); -struct reg; -extern int fasttrap_pid_probe(struct reg *); -extern int fasttrap_return_probe(struct reg *); +struct trapframe; +extern int fasttrap_pid_probe(struct trapframe *); +extern int fasttrap_return_probe(struct trapframe *); extern uint64_t fasttrap_pid_getarg(void *, dtrace_id_t, void *, int, int); extern uint64_t fasttrap_usdt_getarg(void *, dtrace_id_t, void *, int, int); #ifdef __cplusplus } #endif #endif /* _FASTTRAP_IMPL_H */ Index: stable/11/sys/cddl/contrib/opensolaris/uts/intel/dtrace/fasttrap_isa.c =================================================================== --- stable/11/sys/cddl/contrib/opensolaris/uts/intel/dtrace/fasttrap_isa.c (revision 327550) +++ stable/11/sys/cddl/contrib/opensolaris/uts/intel/dtrace/fasttrap_isa.c (revision 327551) @@ -1,1905 +1,1910 @@ /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END * * Portions Copyright 2010 The FreeBSD Foundation * * $FreeBSD$ */ /* * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #ifdef illumos #pragma ident "%Z%%M% %I% %E% SMI" #endif #include #include #include #include #include #ifdef illumos #include #include #include #include #else #include #include #include #include #include #include #include #include #include #endif #include #ifdef illumos #include #include #else #include #endif /* illumos */ #ifdef __i386__ #define r_rax r_eax #define r_rbx r_ebx #define r_rip r_eip #define r_rflags r_eflags #define r_rsp r_esp #define r_rbp r_ebp #endif /* * Lossless User-Land Tracing on x86 * --------------------------------- * * The execution of most instructions is not dependent on the address; for * these instructions it is sufficient to copy them into the user process's * address space and execute them. To effectively single-step an instruction * in user-land, we copy out the following sequence of instructions to scratch * space in the user thread's ulwp_t structure. * * We then set the program counter (%eip or %rip) to point to this scratch * space. Once execution resumes, the original instruction is executed and * then control flow is redirected to what was originally the subsequent * instruction. If the kernel attemps to deliver a signal while single- * stepping, the signal is deferred and the program counter is moved into the * second sequence of instructions. The second sequence ends in a trap into * the kernel where the deferred signal is then properly handled and delivered. * * For instructions whose execute is position dependent, we perform simple * emulation. These instructions are limited to control transfer * instructions in 32-bit mode, but in 64-bit mode there's the added wrinkle * of %rip-relative addressing that means that almost any instruction can be * position dependent. For all the details on how we emulate generic * instructions included %rip-relative instructions, see the code in * fasttrap_pid_probe() below where we handle instructions of type * FASTTRAP_T_COMMON (under the header: Generic Instruction Tracing). */ #define FASTTRAP_MODRM_MOD(modrm) (((modrm) >> 6) & 0x3) #define FASTTRAP_MODRM_REG(modrm) (((modrm) >> 3) & 0x7) #define FASTTRAP_MODRM_RM(modrm) ((modrm) & 0x7) #define FASTTRAP_MODRM(mod, reg, rm) (((mod) << 6) | ((reg) << 3) | (rm)) #define FASTTRAP_SIB_SCALE(sib) (((sib) >> 6) & 0x3) #define FASTTRAP_SIB_INDEX(sib) (((sib) >> 3) & 0x7) #define FASTTRAP_SIB_BASE(sib) ((sib) & 0x7) #define FASTTRAP_REX_W(rex) (((rex) >> 3) & 1) #define FASTTRAP_REX_R(rex) (((rex) >> 2) & 1) #define FASTTRAP_REX_X(rex) (((rex) >> 1) & 1) #define FASTTRAP_REX_B(rex) ((rex) & 1) #define FASTTRAP_REX(w, r, x, b) \ (0x40 | ((w) << 3) | ((r) << 2) | ((x) << 1) | (b)) /* * Single-byte op-codes. */ #define FASTTRAP_PUSHL_EBP 0x55 #define FASTTRAP_JO 0x70 #define FASTTRAP_JNO 0x71 #define FASTTRAP_JB 0x72 #define FASTTRAP_JAE 0x73 #define FASTTRAP_JE 0x74 #define FASTTRAP_JNE 0x75 #define FASTTRAP_JBE 0x76 #define FASTTRAP_JA 0x77 #define FASTTRAP_JS 0x78 #define FASTTRAP_JNS 0x79 #define FASTTRAP_JP 0x7a #define FASTTRAP_JNP 0x7b #define FASTTRAP_JL 0x7c #define FASTTRAP_JGE 0x7d #define FASTTRAP_JLE 0x7e #define FASTTRAP_JG 0x7f #define FASTTRAP_NOP 0x90 #define FASTTRAP_MOV_EAX 0xb8 #define FASTTRAP_MOV_ECX 0xb9 #define FASTTRAP_RET16 0xc2 #define FASTTRAP_RET 0xc3 #define FASTTRAP_LOOPNZ 0xe0 #define FASTTRAP_LOOPZ 0xe1 #define FASTTRAP_LOOP 0xe2 #define FASTTRAP_JCXZ 0xe3 #define FASTTRAP_CALL 0xe8 #define FASTTRAP_JMP32 0xe9 #define FASTTRAP_JMP8 0xeb #define FASTTRAP_INT3 0xcc #define FASTTRAP_INT 0xcd #define FASTTRAP_2_BYTE_OP 0x0f #define FASTTRAP_GROUP5_OP 0xff /* * Two-byte op-codes (second byte only). */ #define FASTTRAP_0F_JO 0x80 #define FASTTRAP_0F_JNO 0x81 #define FASTTRAP_0F_JB 0x82 #define FASTTRAP_0F_JAE 0x83 #define FASTTRAP_0F_JE 0x84 #define FASTTRAP_0F_JNE 0x85 #define FASTTRAP_0F_JBE 0x86 #define FASTTRAP_0F_JA 0x87 #define FASTTRAP_0F_JS 0x88 #define FASTTRAP_0F_JNS 0x89 #define FASTTRAP_0F_JP 0x8a #define FASTTRAP_0F_JNP 0x8b #define FASTTRAP_0F_JL 0x8c #define FASTTRAP_0F_JGE 0x8d #define FASTTRAP_0F_JLE 0x8e #define FASTTRAP_0F_JG 0x8f #define FASTTRAP_EFLAGS_OF 0x800 #define FASTTRAP_EFLAGS_DF 0x400 #define FASTTRAP_EFLAGS_SF 0x080 #define FASTTRAP_EFLAGS_ZF 0x040 #define FASTTRAP_EFLAGS_AF 0x010 #define FASTTRAP_EFLAGS_PF 0x004 #define FASTTRAP_EFLAGS_CF 0x001 /* * Instruction prefixes. */ #define FASTTRAP_PREFIX_OPERAND 0x66 #define FASTTRAP_PREFIX_ADDRESS 0x67 #define FASTTRAP_PREFIX_CS 0x2E #define FASTTRAP_PREFIX_DS 0x3E #define FASTTRAP_PREFIX_ES 0x26 #define FASTTRAP_PREFIX_FS 0x64 #define FASTTRAP_PREFIX_GS 0x65 #define FASTTRAP_PREFIX_SS 0x36 #define FASTTRAP_PREFIX_LOCK 0xF0 #define FASTTRAP_PREFIX_REP 0xF3 #define FASTTRAP_PREFIX_REPNE 0xF2 #define FASTTRAP_NOREG 0xff /* * Map between instruction register encodings and the kernel constants which * correspond to indicies into struct regs. */ #ifdef __amd64 static const uint8_t regmap[16] = { REG_RAX, REG_RCX, REG_RDX, REG_RBX, REG_RSP, REG_RBP, REG_RSI, REG_RDI, REG_R8, REG_R9, REG_R10, REG_R11, REG_R12, REG_R13, REG_R14, REG_R15, }; #else static const uint8_t regmap[8] = { EAX, ECX, EDX, EBX, UESP, EBP, ESI, EDI }; #endif static ulong_t fasttrap_getreg(struct reg *, uint_t); static uint64_t fasttrap_anarg(struct reg *rp, int function_entry, int argno) { uint64_t value = 0; int shift = function_entry ? 1 : 0; #ifdef __amd64 if (curproc->p_model == DATAMODEL_LP64) { uintptr_t *stack; /* * In 64-bit mode, the first six arguments are stored in * registers. */ if (argno < 6) switch (argno) { case 0: return (rp->r_rdi); case 1: return (rp->r_rsi); case 2: return (rp->r_rdx); case 3: return (rp->r_rcx); case 4: return (rp->r_r8); case 5: return (rp->r_r9); } stack = (uintptr_t *)rp->r_rsp; DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); value = dtrace_fulword(&stack[argno - 6 + shift]); DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT | CPU_DTRACE_BADADDR); } else { #endif #ifdef __i386 uint32_t *stack = (uint32_t *)rp->r_esp; DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); value = dtrace_fuword32(&stack[argno + shift]); DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT | CPU_DTRACE_BADADDR); #endif #ifdef __amd64 } #endif return (value); } /*ARGSUSED*/ int fasttrap_tracepoint_init(proc_t *p, fasttrap_tracepoint_t *tp, uintptr_t pc, fasttrap_probe_type_t type) { uint8_t instr[FASTTRAP_MAX_INSTR_SIZE + 10]; size_t len = FASTTRAP_MAX_INSTR_SIZE; size_t first = MIN(len, PAGESIZE - (pc & PAGEOFFSET)); uint_t start = 0; int rmindex, size; uint8_t seg, rex = 0; /* * Read the instruction at the given address out of the process's * address space. We don't have to worry about a debugger * changing this instruction before we overwrite it with our trap * instruction since P_PR_LOCK is set. Since instructions can span * pages, we potentially read the instruction in two parts. If the * second part fails, we just zero out that part of the instruction. */ if (uread(p, &instr[0], first, pc) != 0) return (-1); if (len > first && uread(p, &instr[first], len - first, pc + first) != 0) { bzero(&instr[first], len - first); len = first; } /* * If the disassembly fails, then we have a malformed instruction. */ if ((size = dtrace_instr_size_isa(instr, p->p_model, &rmindex)) <= 0) return (-1); /* * Make sure the disassembler isn't completely broken. */ ASSERT(-1 <= rmindex && rmindex < size); /* * If the computed size is greater than the number of bytes read, * then it was a malformed instruction possibly because it fell on a * page boundary and the subsequent page was missing or because of * some malicious user. */ if (size > len) return (-1); tp->ftt_size = (uint8_t)size; tp->ftt_segment = FASTTRAP_SEG_NONE; /* * Find the start of the instruction's opcode by processing any * legacy prefixes. */ for (;;) { seg = 0; switch (instr[start]) { case FASTTRAP_PREFIX_SS: seg++; /*FALLTHRU*/ case FASTTRAP_PREFIX_GS: seg++; /*FALLTHRU*/ case FASTTRAP_PREFIX_FS: seg++; /*FALLTHRU*/ case FASTTRAP_PREFIX_ES: seg++; /*FALLTHRU*/ case FASTTRAP_PREFIX_DS: seg++; /*FALLTHRU*/ case FASTTRAP_PREFIX_CS: seg++; /*FALLTHRU*/ case FASTTRAP_PREFIX_OPERAND: case FASTTRAP_PREFIX_ADDRESS: case FASTTRAP_PREFIX_LOCK: case FASTTRAP_PREFIX_REP: case FASTTRAP_PREFIX_REPNE: if (seg != 0) { /* * It's illegal for an instruction to specify * two segment prefixes -- give up on this * illegal instruction. */ if (tp->ftt_segment != FASTTRAP_SEG_NONE) return (-1); tp->ftt_segment = seg; } start++; continue; } break; } #ifdef __amd64 /* * Identify the REX prefix on 64-bit processes. */ if (p->p_model == DATAMODEL_LP64 && (instr[start] & 0xf0) == 0x40) rex = instr[start++]; #endif /* * Now that we're pretty sure that the instruction is okay, copy the * valid part to the tracepoint. */ bcopy(instr, tp->ftt_instr, FASTTRAP_MAX_INSTR_SIZE); tp->ftt_type = FASTTRAP_T_COMMON; if (instr[start] == FASTTRAP_2_BYTE_OP) { switch (instr[start + 1]) { case FASTTRAP_0F_JO: case FASTTRAP_0F_JNO: case FASTTRAP_0F_JB: case FASTTRAP_0F_JAE: case FASTTRAP_0F_JE: case FASTTRAP_0F_JNE: case FASTTRAP_0F_JBE: case FASTTRAP_0F_JA: case FASTTRAP_0F_JS: case FASTTRAP_0F_JNS: case FASTTRAP_0F_JP: case FASTTRAP_0F_JNP: case FASTTRAP_0F_JL: case FASTTRAP_0F_JGE: case FASTTRAP_0F_JLE: case FASTTRAP_0F_JG: tp->ftt_type = FASTTRAP_T_JCC; tp->ftt_code = (instr[start + 1] & 0x0f) | FASTTRAP_JO; tp->ftt_dest = pc + tp->ftt_size + /* LINTED - alignment */ *(int32_t *)&instr[start + 2]; break; } } else if (instr[start] == FASTTRAP_GROUP5_OP) { uint_t mod = FASTTRAP_MODRM_MOD(instr[start + 1]); uint_t reg = FASTTRAP_MODRM_REG(instr[start + 1]); uint_t rm = FASTTRAP_MODRM_RM(instr[start + 1]); if (reg == 2 || reg == 4) { uint_t i, sz; if (reg == 2) tp->ftt_type = FASTTRAP_T_CALL; else tp->ftt_type = FASTTRAP_T_JMP; if (mod == 3) tp->ftt_code = 2; else tp->ftt_code = 1; ASSERT(p->p_model == DATAMODEL_LP64 || rex == 0); /* * See AMD x86-64 Architecture Programmer's Manual * Volume 3, Section 1.2.7, Table 1-12, and * Appendix A.3.1, Table A-15. */ if (mod != 3 && rm == 4) { uint8_t sib = instr[start + 2]; uint_t index = FASTTRAP_SIB_INDEX(sib); uint_t base = FASTTRAP_SIB_BASE(sib); tp->ftt_scale = FASTTRAP_SIB_SCALE(sib); tp->ftt_index = (index == 4) ? FASTTRAP_NOREG : regmap[index | (FASTTRAP_REX_X(rex) << 3)]; tp->ftt_base = (mod == 0 && base == 5) ? FASTTRAP_NOREG : regmap[base | (FASTTRAP_REX_B(rex) << 3)]; i = 3; sz = mod == 1 ? 1 : 4; } else { /* * In 64-bit mode, mod == 0 and r/m == 5 * denotes %rip-relative addressing; in 32-bit * mode, the base register isn't used. In both * modes, there is a 32-bit operand. */ if (mod == 0 && rm == 5) { #ifdef __amd64 if (p->p_model == DATAMODEL_LP64) tp->ftt_base = REG_RIP; else #endif tp->ftt_base = FASTTRAP_NOREG; sz = 4; } else { uint8_t base = rm | (FASTTRAP_REX_B(rex) << 3); tp->ftt_base = regmap[base]; sz = mod == 1 ? 1 : mod == 2 ? 4 : 0; } tp->ftt_index = FASTTRAP_NOREG; i = 2; } if (sz == 1) { tp->ftt_dest = *(int8_t *)&instr[start + i]; } else if (sz == 4) { /* LINTED - alignment */ tp->ftt_dest = *(int32_t *)&instr[start + i]; } else { tp->ftt_dest = 0; } } } else { switch (instr[start]) { case FASTTRAP_RET: tp->ftt_type = FASTTRAP_T_RET; break; case FASTTRAP_RET16: tp->ftt_type = FASTTRAP_T_RET16; /* LINTED - alignment */ tp->ftt_dest = *(uint16_t *)&instr[start + 1]; break; case FASTTRAP_JO: case FASTTRAP_JNO: case FASTTRAP_JB: case FASTTRAP_JAE: case FASTTRAP_JE: case FASTTRAP_JNE: case FASTTRAP_JBE: case FASTTRAP_JA: case FASTTRAP_JS: case FASTTRAP_JNS: case FASTTRAP_JP: case FASTTRAP_JNP: case FASTTRAP_JL: case FASTTRAP_JGE: case FASTTRAP_JLE: case FASTTRAP_JG: tp->ftt_type = FASTTRAP_T_JCC; tp->ftt_code = instr[start]; tp->ftt_dest = pc + tp->ftt_size + (int8_t)instr[start + 1]; break; case FASTTRAP_LOOPNZ: case FASTTRAP_LOOPZ: case FASTTRAP_LOOP: tp->ftt_type = FASTTRAP_T_LOOP; tp->ftt_code = instr[start]; tp->ftt_dest = pc + tp->ftt_size + (int8_t)instr[start + 1]; break; case FASTTRAP_JCXZ: tp->ftt_type = FASTTRAP_T_JCXZ; tp->ftt_dest = pc + tp->ftt_size + (int8_t)instr[start + 1]; break; case FASTTRAP_CALL: tp->ftt_type = FASTTRAP_T_CALL; tp->ftt_dest = pc + tp->ftt_size + /* LINTED - alignment */ *(int32_t *)&instr[start + 1]; tp->ftt_code = 0; break; case FASTTRAP_JMP32: tp->ftt_type = FASTTRAP_T_JMP; tp->ftt_dest = pc + tp->ftt_size + /* LINTED - alignment */ *(int32_t *)&instr[start + 1]; break; case FASTTRAP_JMP8: tp->ftt_type = FASTTRAP_T_JMP; tp->ftt_dest = pc + tp->ftt_size + (int8_t)instr[start + 1]; break; case FASTTRAP_PUSHL_EBP: if (start == 0) tp->ftt_type = FASTTRAP_T_PUSHL_EBP; break; case FASTTRAP_NOP: #ifdef __amd64 ASSERT(p->p_model == DATAMODEL_LP64 || rex == 0); /* * On amd64 we have to be careful not to confuse a nop * (actually xchgl %eax, %eax) with an instruction using * the same opcode, but that does something different * (e.g. xchgl %r8d, %eax or xcghq %r8, %rax). */ if (FASTTRAP_REX_B(rex) == 0) #endif tp->ftt_type = FASTTRAP_T_NOP; break; case FASTTRAP_INT3: /* * The pid provider shares the int3 trap with debugger * breakpoints so we can't instrument them. */ ASSERT(instr[start] == FASTTRAP_INSTR); return (-1); case FASTTRAP_INT: /* * Interrupts seem like they could be traced with * no negative implications, but it's possible that * a thread could be redirected by the trap handling * code which would eventually return to the * instruction after the interrupt. If the interrupt * were in our scratch space, the subsequent * instruction might be overwritten before we return. * Accordingly we refuse to instrument any interrupt. */ return (-1); } } #ifdef __amd64 if (p->p_model == DATAMODEL_LP64 && tp->ftt_type == FASTTRAP_T_COMMON) { /* * If the process is 64-bit and the instruction type is still * FASTTRAP_T_COMMON -- meaning we're going to copy it out an * execute it -- we need to watch for %rip-relative * addressing mode. See the portion of fasttrap_pid_probe() * below where we handle tracepoints with type * FASTTRAP_T_COMMON for how we emulate instructions that * employ %rip-relative addressing. */ if (rmindex != -1) { uint_t mod = FASTTRAP_MODRM_MOD(instr[rmindex]); uint_t reg = FASTTRAP_MODRM_REG(instr[rmindex]); uint_t rm = FASTTRAP_MODRM_RM(instr[rmindex]); ASSERT(rmindex > start); if (mod == 0 && rm == 5) { /* * We need to be sure to avoid other * registers used by this instruction. While * the reg field may determine the op code * rather than denoting a register, assuming * that it denotes a register is always safe. * We leave the REX field intact and use * whatever value's there for simplicity. */ if (reg != 0) { tp->ftt_ripmode = FASTTRAP_RIP_1 | (FASTTRAP_RIP_X * FASTTRAP_REX_B(rex)); rm = 0; } else { tp->ftt_ripmode = FASTTRAP_RIP_2 | (FASTTRAP_RIP_X * FASTTRAP_REX_B(rex)); rm = 1; } tp->ftt_modrm = tp->ftt_instr[rmindex]; tp->ftt_instr[rmindex] = FASTTRAP_MODRM(2, reg, rm); } } } #endif return (0); } int fasttrap_tracepoint_install(proc_t *p, fasttrap_tracepoint_t *tp) { fasttrap_instr_t instr = FASTTRAP_INSTR; if (uwrite(p, &instr, 1, tp->ftt_pc) != 0) return (-1); return (0); } int fasttrap_tracepoint_remove(proc_t *p, fasttrap_tracepoint_t *tp) { uint8_t instr; /* * Distinguish between read or write failures and a changed * instruction. */ if (uread(p, &instr, 1, tp->ftt_pc) != 0) return (0); if (instr != FASTTRAP_INSTR) return (0); if (uwrite(p, &tp->ftt_instr[0], 1, tp->ftt_pc) != 0) return (-1); return (0); } #ifdef __amd64 static uintptr_t fasttrap_fulword_noerr(const void *uaddr) { uintptr_t ret; if ((ret = fasttrap_fulword(uaddr)) != -1) return (ret); return (0); } #endif #ifdef __i386__ static uint32_t fasttrap_fuword32_noerr(const void *uaddr) { uint32_t ret; if ((ret = fasttrap_fuword32(uaddr)) != -1) return (ret); return (0); } #endif static void fasttrap_return_common(struct reg *rp, uintptr_t pc, pid_t pid, uintptr_t new_pc) { fasttrap_tracepoint_t *tp; fasttrap_bucket_t *bucket; fasttrap_id_t *id; #ifdef illumos kmutex_t *pid_mtx; pid_mtx = &cpu_core[CPU->cpu_id].cpuc_pid_lock; mutex_enter(pid_mtx); #else struct rm_priotracker tracker; rm_rlock(&fasttrap_tp_lock, &tracker); #endif bucket = &fasttrap_tpoints.fth_table[FASTTRAP_TPOINTS_INDEX(pid, pc)]; for (tp = bucket->ftb_data; tp != NULL; tp = tp->ftt_next) { if (pid == tp->ftt_pid && pc == tp->ftt_pc && tp->ftt_proc->ftpc_acount != 0) break; } /* * Don't sweat it if we can't find the tracepoint again; unlike * when we're in fasttrap_pid_probe(), finding the tracepoint here * is not essential to the correct execution of the process. */ if (tp == NULL) { #ifdef illumos mutex_exit(pid_mtx); #else rm_runlock(&fasttrap_tp_lock, &tracker); #endif return; } for (id = tp->ftt_retids; id != NULL; id = id->fti_next) { /* * If there's a branch that could act as a return site, we * need to trace it, and check here if the program counter is * external to the function. */ if (tp->ftt_type != FASTTRAP_T_RET && tp->ftt_type != FASTTRAP_T_RET16 && new_pc - id->fti_probe->ftp_faddr < id->fti_probe->ftp_fsize) continue; dtrace_probe(id->fti_probe->ftp_id, pc - id->fti_probe->ftp_faddr, rp->r_rax, rp->r_rbx, 0, 0); } #ifdef illumos mutex_exit(pid_mtx); #else rm_runlock(&fasttrap_tp_lock, &tracker); #endif } static void fasttrap_sigsegv(proc_t *p, kthread_t *t, uintptr_t addr) { #ifdef illumos sigqueue_t *sqp = kmem_zalloc(sizeof (sigqueue_t), KM_SLEEP); sqp->sq_info.si_signo = SIGSEGV; sqp->sq_info.si_code = SEGV_MAPERR; sqp->sq_info.si_addr = (caddr_t)addr; mutex_enter(&p->p_lock); sigaddqa(p, t, sqp); mutex_exit(&p->p_lock); if (t != NULL) aston(t); #else ksiginfo_t *ksi = kmem_zalloc(sizeof (ksiginfo_t), KM_SLEEP); ksiginfo_init(ksi); ksi->ksi_signo = SIGSEGV; ksi->ksi_code = SEGV_MAPERR; ksi->ksi_addr = (caddr_t)addr; (void) tdksignal(t, SIGSEGV, ksi); #endif } #ifdef __amd64 static void fasttrap_usdt_args64(fasttrap_probe_t *probe, struct reg *rp, int argc, uintptr_t *argv) { int i, x, cap = MIN(argc, probe->ftp_nargs); uintptr_t *stack = (uintptr_t *)rp->r_rsp; for (i = 0; i < cap; i++) { x = probe->ftp_argmap[i]; if (x < 6) argv[i] = (&rp->r_rdi)[x]; else argv[i] = fasttrap_fulword_noerr(&stack[x]); } for (; i < argc; i++) { argv[i] = 0; } } #endif #ifdef __i386__ static void fasttrap_usdt_args32(fasttrap_probe_t *probe, struct reg *rp, int argc, uint32_t *argv) { int i, x, cap = MIN(argc, probe->ftp_nargs); uint32_t *stack = (uint32_t *)rp->r_rsp; for (i = 0; i < cap; i++) { x = probe->ftp_argmap[i]; argv[i] = fasttrap_fuword32_noerr(&stack[x]); } for (; i < argc; i++) { argv[i] = 0; } } #endif static int fasttrap_do_seg(fasttrap_tracepoint_t *tp, struct reg *rp, uintptr_t *addr) { proc_t *p = curproc; #ifdef __i386__ struct segment_descriptor *desc; #else struct user_segment_descriptor *desc; #endif uint16_t sel = 0, ndx, type; uintptr_t limit; switch (tp->ftt_segment) { case FASTTRAP_SEG_CS: sel = rp->r_cs; break; case FASTTRAP_SEG_DS: sel = rp->r_ds; break; case FASTTRAP_SEG_ES: sel = rp->r_es; break; case FASTTRAP_SEG_FS: sel = rp->r_fs; break; case FASTTRAP_SEG_GS: sel = rp->r_gs; break; case FASTTRAP_SEG_SS: sel = rp->r_ss; break; } /* * Make sure the given segment register specifies a user priority * selector rather than a kernel selector. */ if (ISPL(sel) != SEL_UPL) return (-1); ndx = IDXSEL(sel); /* * Check the bounds and grab the descriptor out of the specified * descriptor table. */ if (ISLDT(sel)) { #ifdef __i386__ if (ndx > p->p_md.md_ldt->ldt_len) return (-1); desc = (struct segment_descriptor *) p->p_md.md_ldt[ndx].ldt_base; #else if (ndx > max_ldt_segment) return (-1); desc = (struct user_segment_descriptor *) p->p_md.md_ldt[ndx].ldt_base; #endif } else { if (ndx >= NGDT) return (-1); #ifdef __i386__ desc = &gdt[ndx].sd; #else desc = &gdt[ndx]; #endif } /* * The descriptor must have user privilege level and it must be * present in memory. */ if (desc->sd_dpl != SEL_UPL || desc->sd_p != 1) return (-1); type = desc->sd_type; /* * If the S bit in the type field is not set, this descriptor can * only be used in system context. */ if ((type & 0x10) != 0x10) return (-1); limit = USD_GETLIMIT(desc) * (desc->sd_gran ? PAGESIZE : 1); if (tp->ftt_segment == FASTTRAP_SEG_CS) { /* * The code/data bit and readable bit must both be set. */ if ((type & 0xa) != 0xa) return (-1); if (*addr > limit) return (-1); } else { /* * The code/data bit must be clear. */ if ((type & 0x8) != 0) return (-1); /* * If the expand-down bit is clear, we just check the limit as * it would naturally be applied. Otherwise, we need to check * that the address is the range [limit + 1 .. 0xffff] or * [limit + 1 ... 0xffffffff] depending on if the default * operand size bit is set. */ if ((type & 0x4) == 0) { if (*addr > limit) return (-1); } else if (desc->sd_def32) { if (*addr < limit + 1 || 0xffff < *addr) return (-1); } else { if (*addr < limit + 1 || 0xffffffff < *addr) return (-1); } } *addr += USD_GETBASE(desc); return (0); } int -fasttrap_pid_probe(struct reg *rp) +fasttrap_pid_probe(struct trapframe *tf) { - proc_t *p = curproc; -#ifndef illumos + struct reg reg, *rp; + proc_t *p = curproc, *pp; struct rm_priotracker tracker; - proc_t *pp; -#endif - uintptr_t pc = rp->r_rip - 1; + uintptr_t pc; uintptr_t new_pc = 0; fasttrap_bucket_t *bucket; #ifdef illumos kmutex_t *pid_mtx; #endif fasttrap_tracepoint_t *tp, tp_local; pid_t pid; dtrace_icookie_t cookie; uint_t is_enabled = 0; + fill_frame_regs(tf, ®); + rp = ® + + pc = rp->r_rip - 1; + /* * It's possible that a user (in a veritable orgy of bad planning) * could redirect this thread's flow of control before it reached the * return probe fasttrap. In this case we need to kill the process * since it's in a unrecoverable state. */ if (curthread->t_dtrace_step) { ASSERT(curthread->t_dtrace_on); fasttrap_sigtrap(p, curthread, pc); return (0); } /* * Clear all user tracing flags. */ curthread->t_dtrace_ft = 0; curthread->t_dtrace_pc = 0; curthread->t_dtrace_npc = 0; curthread->t_dtrace_scrpc = 0; curthread->t_dtrace_astpc = 0; #ifdef __amd64 curthread->t_dtrace_regv = 0; #endif /* * Treat a child created by a call to vfork(2) as if it were its * parent. We know that there's only one thread of control in such a * process: this one. */ #ifdef illumos while (p->p_flag & SVFORK) { p = p->p_parent; } pid = p->p_pid; pid_mtx = &cpu_core[CPU->cpu_id].cpuc_pid_lock; mutex_enter(pid_mtx); #else pp = p; sx_slock(&proctree_lock); while (pp->p_vmspace == pp->p_pptr->p_vmspace) pp = pp->p_pptr; pid = pp->p_pid; sx_sunlock(&proctree_lock); pp = NULL; rm_rlock(&fasttrap_tp_lock, &tracker); #endif bucket = &fasttrap_tpoints.fth_table[FASTTRAP_TPOINTS_INDEX(pid, pc)]; /* * Lookup the tracepoint that the process just hit. */ for (tp = bucket->ftb_data; tp != NULL; tp = tp->ftt_next) { if (pid == tp->ftt_pid && pc == tp->ftt_pc && tp->ftt_proc->ftpc_acount != 0) break; } /* * If we couldn't find a matching tracepoint, either a tracepoint has * been inserted without using the pid ioctl interface (see * fasttrap_ioctl), or somehow we have mislaid this tracepoint. */ if (tp == NULL) { #ifdef illumos mutex_exit(pid_mtx); #else rm_runlock(&fasttrap_tp_lock, &tracker); #endif return (-1); } /* * Set the program counter to the address of the traced instruction * so that it looks right in ustack() output. */ rp->r_rip = pc; if (tp->ftt_ids != NULL) { fasttrap_id_t *id; #ifdef __amd64 if (p->p_model == DATAMODEL_LP64) { for (id = tp->ftt_ids; id != NULL; id = id->fti_next) { fasttrap_probe_t *probe = id->fti_probe; if (id->fti_ptype == DTFTP_ENTRY) { /* * We note that this was an entry * probe to help ustack() find the * first caller. */ cookie = dtrace_interrupt_disable(); DTRACE_CPUFLAG_SET(CPU_DTRACE_ENTRY); dtrace_probe(probe->ftp_id, rp->r_rdi, rp->r_rsi, rp->r_rdx, rp->r_rcx, rp->r_r8); DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_ENTRY); dtrace_interrupt_enable(cookie); } else if (id->fti_ptype == DTFTP_IS_ENABLED) { /* * Note that in this case, we don't * call dtrace_probe() since it's only * an artificial probe meant to change * the flow of control so that it * encounters the true probe. */ is_enabled = 1; } else if (probe->ftp_argmap == NULL) { dtrace_probe(probe->ftp_id, rp->r_rdi, rp->r_rsi, rp->r_rdx, rp->r_rcx, rp->r_r8); } else { uintptr_t t[5]; fasttrap_usdt_args64(probe, rp, sizeof (t) / sizeof (t[0]), t); dtrace_probe(probe->ftp_id, t[0], t[1], t[2], t[3], t[4]); } } } else { #else /* __amd64 */ uintptr_t s0, s1, s2, s3, s4, s5; uint32_t *stack = (uint32_t *)rp->r_esp; /* * In 32-bit mode, all arguments are passed on the * stack. If this is a function entry probe, we need * to skip the first entry on the stack as it * represents the return address rather than a * parameter to the function. */ s0 = fasttrap_fuword32_noerr(&stack[0]); s1 = fasttrap_fuword32_noerr(&stack[1]); s2 = fasttrap_fuword32_noerr(&stack[2]); s3 = fasttrap_fuword32_noerr(&stack[3]); s4 = fasttrap_fuword32_noerr(&stack[4]); s5 = fasttrap_fuword32_noerr(&stack[5]); for (id = tp->ftt_ids; id != NULL; id = id->fti_next) { fasttrap_probe_t *probe = id->fti_probe; if (id->fti_ptype == DTFTP_ENTRY) { /* * We note that this was an entry * probe to help ustack() find the * first caller. */ cookie = dtrace_interrupt_disable(); DTRACE_CPUFLAG_SET(CPU_DTRACE_ENTRY); dtrace_probe(probe->ftp_id, s1, s2, s3, s4, s5); DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_ENTRY); dtrace_interrupt_enable(cookie); } else if (id->fti_ptype == DTFTP_IS_ENABLED) { /* * Note that in this case, we don't * call dtrace_probe() since it's only * an artificial probe meant to change * the flow of control so that it * encounters the true probe. */ is_enabled = 1; } else if (probe->ftp_argmap == NULL) { dtrace_probe(probe->ftp_id, s0, s1, s2, s3, s4); } else { uint32_t t[5]; fasttrap_usdt_args32(probe, rp, sizeof (t) / sizeof (t[0]), t); dtrace_probe(probe->ftp_id, t[0], t[1], t[2], t[3], t[4]); } } #endif /* __amd64 */ #ifdef __amd64 } #endif } /* * We're about to do a bunch of work so we cache a local copy of * the tracepoint to emulate the instruction, and then find the * tracepoint again later if we need to light up any return probes. */ tp_local = *tp; #ifdef illumos mutex_exit(pid_mtx); #else rm_runlock(&fasttrap_tp_lock, &tracker); #endif tp = &tp_local; /* * Set the program counter to appear as though the traced instruction * had completely executed. This ensures that fasttrap_getreg() will * report the expected value for REG_RIP. */ rp->r_rip = pc + tp->ftt_size; /* * If there's an is-enabled probe connected to this tracepoint it * means that there was a 'xorl %eax, %eax' or 'xorq %rax, %rax' * instruction that was placed there by DTrace when the binary was * linked. As this probe is, in fact, enabled, we need to stuff 1 * into %eax or %rax. Accordingly, we can bypass all the instruction * emulation logic since we know the inevitable result. It's possible * that a user could construct a scenario where the 'is-enabled' * probe was on some other instruction, but that would be a rather * exotic way to shoot oneself in the foot. */ if (is_enabled) { rp->r_rax = 1; new_pc = rp->r_rip; goto done; } /* * We emulate certain types of instructions to ensure correctness * (in the case of position dependent instructions) or optimize * common cases. The rest we have the thread execute back in user- * land. */ switch (tp->ftt_type) { case FASTTRAP_T_RET: case FASTTRAP_T_RET16: { uintptr_t dst = 0; uintptr_t addr = 0; int ret = 0; /* * We have to emulate _every_ facet of the behavior of a ret * instruction including what happens if the load from %esp * fails; in that case, we send a SIGSEGV. */ #ifdef __amd64 if (p->p_model == DATAMODEL_NATIVE) { ret = dst = fasttrap_fulword((void *)rp->r_rsp); addr = rp->r_rsp + sizeof (uintptr_t); } else { #endif #ifdef __i386__ uint32_t dst32; ret = dst32 = fasttrap_fuword32((void *)rp->r_esp); dst = dst32; addr = rp->r_esp + sizeof (uint32_t); #endif #ifdef __amd64 } #endif if (ret == -1) { fasttrap_sigsegv(p, curthread, rp->r_rsp); new_pc = pc; break; } if (tp->ftt_type == FASTTRAP_T_RET16) addr += tp->ftt_dest; rp->r_rsp = addr; new_pc = dst; break; } case FASTTRAP_T_JCC: { uint_t taken = 0; switch (tp->ftt_code) { case FASTTRAP_JO: taken = (rp->r_rflags & FASTTRAP_EFLAGS_OF) != 0; break; case FASTTRAP_JNO: taken = (rp->r_rflags & FASTTRAP_EFLAGS_OF) == 0; break; case FASTTRAP_JB: taken = (rp->r_rflags & FASTTRAP_EFLAGS_CF) != 0; break; case FASTTRAP_JAE: taken = (rp->r_rflags & FASTTRAP_EFLAGS_CF) == 0; break; case FASTTRAP_JE: taken = (rp->r_rflags & FASTTRAP_EFLAGS_ZF) != 0; break; case FASTTRAP_JNE: taken = (rp->r_rflags & FASTTRAP_EFLAGS_ZF) == 0; break; case FASTTRAP_JBE: taken = (rp->r_rflags & FASTTRAP_EFLAGS_CF) != 0 || (rp->r_rflags & FASTTRAP_EFLAGS_ZF) != 0; break; case FASTTRAP_JA: taken = (rp->r_rflags & FASTTRAP_EFLAGS_CF) == 0 && (rp->r_rflags & FASTTRAP_EFLAGS_ZF) == 0; break; case FASTTRAP_JS: taken = (rp->r_rflags & FASTTRAP_EFLAGS_SF) != 0; break; case FASTTRAP_JNS: taken = (rp->r_rflags & FASTTRAP_EFLAGS_SF) == 0; break; case FASTTRAP_JP: taken = (rp->r_rflags & FASTTRAP_EFLAGS_PF) != 0; break; case FASTTRAP_JNP: taken = (rp->r_rflags & FASTTRAP_EFLAGS_PF) == 0; break; case FASTTRAP_JL: taken = ((rp->r_rflags & FASTTRAP_EFLAGS_SF) == 0) != ((rp->r_rflags & FASTTRAP_EFLAGS_OF) == 0); break; case FASTTRAP_JGE: taken = ((rp->r_rflags & FASTTRAP_EFLAGS_SF) == 0) == ((rp->r_rflags & FASTTRAP_EFLAGS_OF) == 0); break; case FASTTRAP_JLE: taken = (rp->r_rflags & FASTTRAP_EFLAGS_ZF) != 0 || ((rp->r_rflags & FASTTRAP_EFLAGS_SF) == 0) != ((rp->r_rflags & FASTTRAP_EFLAGS_OF) == 0); break; case FASTTRAP_JG: taken = (rp->r_rflags & FASTTRAP_EFLAGS_ZF) == 0 && ((rp->r_rflags & FASTTRAP_EFLAGS_SF) == 0) == ((rp->r_rflags & FASTTRAP_EFLAGS_OF) == 0); break; } if (taken) new_pc = tp->ftt_dest; else new_pc = pc + tp->ftt_size; break; } case FASTTRAP_T_LOOP: { uint_t taken = 0; #ifdef __amd64 greg_t cx = rp->r_rcx--; #else greg_t cx = rp->r_ecx--; #endif switch (tp->ftt_code) { case FASTTRAP_LOOPNZ: taken = (rp->r_rflags & FASTTRAP_EFLAGS_ZF) == 0 && cx != 0; break; case FASTTRAP_LOOPZ: taken = (rp->r_rflags & FASTTRAP_EFLAGS_ZF) != 0 && cx != 0; break; case FASTTRAP_LOOP: taken = (cx != 0); break; } if (taken) new_pc = tp->ftt_dest; else new_pc = pc + tp->ftt_size; break; } case FASTTRAP_T_JCXZ: { #ifdef __amd64 greg_t cx = rp->r_rcx; #else greg_t cx = rp->r_ecx; #endif if (cx == 0) new_pc = tp->ftt_dest; else new_pc = pc + tp->ftt_size; break; } case FASTTRAP_T_PUSHL_EBP: { int ret = 0; #ifdef __amd64 if (p->p_model == DATAMODEL_NATIVE) { rp->r_rsp -= sizeof (uintptr_t); ret = fasttrap_sulword((void *)rp->r_rsp, rp->r_rbp); } else { #endif #ifdef __i386__ rp->r_rsp -= sizeof (uint32_t); ret = fasttrap_suword32((void *)rp->r_rsp, rp->r_rbp); #endif #ifdef __amd64 } #endif if (ret == -1) { fasttrap_sigsegv(p, curthread, rp->r_rsp); new_pc = pc; break; } new_pc = pc + tp->ftt_size; break; } case FASTTRAP_T_NOP: new_pc = pc + tp->ftt_size; break; case FASTTRAP_T_JMP: case FASTTRAP_T_CALL: if (tp->ftt_code == 0) { new_pc = tp->ftt_dest; } else { uintptr_t value, addr = tp->ftt_dest; if (tp->ftt_base != FASTTRAP_NOREG) addr += fasttrap_getreg(rp, tp->ftt_base); if (tp->ftt_index != FASTTRAP_NOREG) addr += fasttrap_getreg(rp, tp->ftt_index) << tp->ftt_scale; if (tp->ftt_code == 1) { /* * If there's a segment prefix for this * instruction, we'll need to check permissions * and bounds on the given selector, and adjust * the address accordingly. */ if (tp->ftt_segment != FASTTRAP_SEG_NONE && fasttrap_do_seg(tp, rp, &addr) != 0) { fasttrap_sigsegv(p, curthread, addr); new_pc = pc; break; } #ifdef __amd64 if (p->p_model == DATAMODEL_NATIVE) { #endif if ((value = fasttrap_fulword((void *)addr)) == -1) { fasttrap_sigsegv(p, curthread, addr); new_pc = pc; break; } new_pc = value; #ifdef __amd64 } else { uint32_t value32; addr = (uintptr_t)(uint32_t)addr; if ((value32 = fasttrap_fuword32((void *)addr)) == -1) { fasttrap_sigsegv(p, curthread, addr); new_pc = pc; break; } new_pc = value32; } #endif } else { new_pc = addr; } } /* * If this is a call instruction, we need to push the return * address onto the stack. If this fails, we send the process * a SIGSEGV and reset the pc to emulate what would happen if * this instruction weren't traced. */ if (tp->ftt_type == FASTTRAP_T_CALL) { int ret = 0; uintptr_t addr = 0, pcps; #ifdef __amd64 if (p->p_model == DATAMODEL_NATIVE) { addr = rp->r_rsp - sizeof (uintptr_t); pcps = pc + tp->ftt_size; ret = fasttrap_sulword((void *)addr, pcps); } else { #endif addr = rp->r_rsp - sizeof (uint32_t); pcps = (uint32_t)(pc + tp->ftt_size); ret = fasttrap_suword32((void *)addr, pcps); #ifdef __amd64 } #endif if (ret == -1) { fasttrap_sigsegv(p, curthread, addr); new_pc = pc; break; } rp->r_rsp = addr; } break; case FASTTRAP_T_COMMON: { uintptr_t addr; #if defined(__amd64) uint8_t scratch[2 * FASTTRAP_MAX_INSTR_SIZE + 22]; #else uint8_t scratch[2 * FASTTRAP_MAX_INSTR_SIZE + 7]; #endif uint_t i = 0; #ifdef illumos klwp_t *lwp = ttolwp(curthread); /* * Compute the address of the ulwp_t and step over the * ul_self pointer. The method used to store the user-land * thread pointer is very different on 32- and 64-bit * kernels. */ #if defined(__amd64) if (p->p_model == DATAMODEL_LP64) { addr = lwp->lwp_pcb.pcb_fsbase; addr += sizeof (void *); } else { addr = lwp->lwp_pcb.pcb_gsbase; addr += sizeof (caddr32_t); } #else addr = USD_GETBASE(&lwp->lwp_pcb.pcb_gsdesc); addr += sizeof (void *); #endif #else /* !illumos */ fasttrap_scrspace_t *scrspace; scrspace = fasttrap_scraddr(curthread, tp->ftt_proc); if (scrspace == NULL) { /* * We failed to allocate scratch space for this thread. * Try to write the original instruction back out and * reset the pc. */ if (fasttrap_copyout(tp->ftt_instr, (void *)pc, tp->ftt_size)) fasttrap_sigtrap(p, curthread, pc); new_pc = pc; break; } addr = scrspace->ftss_addr; #endif /* illumos */ /* * Generic Instruction Tracing * --------------------------- * * This is the layout of the scratch space in the user-land * thread structure for our generated instructions. * * 32-bit mode bytes * ------------------------ ----- * a: <= 15 * jmp ftt_size> 5 * b: <= 15 * int T_DTRACE_RET 2 * ----- * <= 37 * * 64-bit mode bytes * ------------------------ ----- * a: <= 15 * jmp 0(%rip) 6 * ftt_size> 8 * b: <= 15 * int T_DTRACE_RET 2 * ----- * <= 46 * * The %pc is set to a, and curthread->t_dtrace_astpc is set * to b. If we encounter a signal on the way out of the * kernel, trap() will set %pc to curthread->t_dtrace_astpc * so that we execute the original instruction and re-enter * the kernel rather than redirecting to the next instruction. * * If there are return probes (so we know that we're going to * need to reenter the kernel after executing the original * instruction), the scratch space will just contain the * original instruction followed by an interrupt -- the same * data as at b. * * %rip-relative Addressing * ------------------------ * * There's a further complication in 64-bit mode due to %rip- * relative addressing. While this is clearly a beneficial * architectural decision for position independent code, it's * hard not to see it as a personal attack against the pid * provider since before there was a relatively small set of * instructions to emulate; with %rip-relative addressing, * almost every instruction can potentially depend on the * address at which it's executed. Rather than emulating * the broad spectrum of instructions that can now be * position dependent, we emulate jumps and others as in * 32-bit mode, and take a different tack for instructions * using %rip-relative addressing. * * For every instruction that uses the ModRM byte, the * in-kernel disassembler reports its location. We use the * ModRM byte to identify that an instruction uses * %rip-relative addressing and to see what other registers * the instruction uses. To emulate those instructions, * we modify the instruction to be %rax-relative rather than * %rip-relative (or %rcx-relative if the instruction uses * %rax; or %r8- or %r9-relative if the REX.B is present so * we don't have to rewrite the REX prefix). We then load * the value that %rip would have been into the scratch * register and generate an instruction to reset the scratch * register back to its original value. The instruction * sequence looks like this: * * 64-mode %rip-relative bytes * ------------------------ ----- * a: <= 15 * movq $, % 6 * jmp 0(%rip) 6 * ftt_size> 8 * b: <= 15 * int T_DTRACE_RET 2 * ----- * 52 * * We set curthread->t_dtrace_regv so that upon receiving * a signal we can reset the value of the scratch register. */ ASSERT(tp->ftt_size <= FASTTRAP_MAX_INSTR_SIZE); curthread->t_dtrace_scrpc = addr; bcopy(tp->ftt_instr, &scratch[i], tp->ftt_size); i += tp->ftt_size; #ifdef __amd64 if (tp->ftt_ripmode != 0) { greg_t *reg = NULL; ASSERT(p->p_model == DATAMODEL_LP64); ASSERT(tp->ftt_ripmode & (FASTTRAP_RIP_1 | FASTTRAP_RIP_2)); /* * If this was a %rip-relative instruction, we change * it to be either a %rax- or %rcx-relative * instruction (depending on whether those registers * are used as another operand; or %r8- or %r9- * relative depending on the value of REX.B). We then * set that register and generate a movq instruction * to reset the value. */ if (tp->ftt_ripmode & FASTTRAP_RIP_X) scratch[i++] = FASTTRAP_REX(1, 0, 0, 1); else scratch[i++] = FASTTRAP_REX(1, 0, 0, 0); if (tp->ftt_ripmode & FASTTRAP_RIP_1) scratch[i++] = FASTTRAP_MOV_EAX; else scratch[i++] = FASTTRAP_MOV_ECX; switch (tp->ftt_ripmode) { case FASTTRAP_RIP_1: reg = &rp->r_rax; curthread->t_dtrace_reg = REG_RAX; break; case FASTTRAP_RIP_2: reg = &rp->r_rcx; curthread->t_dtrace_reg = REG_RCX; break; case FASTTRAP_RIP_1 | FASTTRAP_RIP_X: reg = &rp->r_r8; curthread->t_dtrace_reg = REG_R8; break; case FASTTRAP_RIP_2 | FASTTRAP_RIP_X: reg = &rp->r_r9; curthread->t_dtrace_reg = REG_R9; break; } /* LINTED - alignment */ *(uint64_t *)&scratch[i] = *reg; curthread->t_dtrace_regv = *reg; *reg = pc + tp->ftt_size; i += sizeof (uint64_t); } #endif /* * Generate the branch instruction to what would have * normally been the subsequent instruction. In 32-bit mode, * this is just a relative branch; in 64-bit mode this is a * %rip-relative branch that loads the 64-bit pc value * immediately after the jmp instruction. */ #ifdef __amd64 if (p->p_model == DATAMODEL_LP64) { scratch[i++] = FASTTRAP_GROUP5_OP; scratch[i++] = FASTTRAP_MODRM(0, 4, 5); /* LINTED - alignment */ *(uint32_t *)&scratch[i] = 0; i += sizeof (uint32_t); /* LINTED - alignment */ *(uint64_t *)&scratch[i] = pc + tp->ftt_size; i += sizeof (uint64_t); } else { #endif #ifdef __i386__ /* * Set up the jmp to the next instruction; note that * the size of the traced instruction cancels out. */ scratch[i++] = FASTTRAP_JMP32; /* LINTED - alignment */ *(uint32_t *)&scratch[i] = pc - addr - 5; i += sizeof (uint32_t); #endif #ifdef __amd64 } #endif curthread->t_dtrace_astpc = addr + i; bcopy(tp->ftt_instr, &scratch[i], tp->ftt_size); i += tp->ftt_size; scratch[i++] = FASTTRAP_INT; scratch[i++] = T_DTRACE_RET; ASSERT(i <= sizeof (scratch)); if (fasttrap_copyout(scratch, (char *)addr, i)) { fasttrap_sigtrap(p, curthread, pc); new_pc = pc; break; } if (tp->ftt_retids != NULL) { curthread->t_dtrace_step = 1; curthread->t_dtrace_ret = 1; new_pc = curthread->t_dtrace_astpc; } else { new_pc = curthread->t_dtrace_scrpc; } curthread->t_dtrace_pc = pc; curthread->t_dtrace_npc = pc + tp->ftt_size; curthread->t_dtrace_on = 1; break; } default: panic("fasttrap: mishandled an instruction"); } done: /* * If there were no return probes when we first found the tracepoint, * we should feel no obligation to honor any return probes that were * subsequently enabled -- they'll just have to wait until the next * time around. */ if (tp->ftt_retids != NULL) { /* * We need to wait until the results of the instruction are * apparent before invoking any return probes. If this * instruction was emulated we can just call * fasttrap_return_common(); if it needs to be executed, we * need to wait until the user thread returns to the kernel. */ if (tp->ftt_type != FASTTRAP_T_COMMON) { /* * Set the program counter to the address of the traced * instruction so that it looks right in ustack() * output. We had previously set it to the end of the * instruction to simplify %rip-relative addressing. */ rp->r_rip = pc; fasttrap_return_common(rp, pc, pid, new_pc); } else { ASSERT(curthread->t_dtrace_ret != 0); ASSERT(curthread->t_dtrace_pc == pc); ASSERT(curthread->t_dtrace_scrpc != 0); ASSERT(new_pc == curthread->t_dtrace_astpc); } } rp->r_rip = new_pc; #ifndef illumos PROC_LOCK(p); proc_write_regs(curthread, rp); PROC_UNLOCK(p); #endif return (0); } int -fasttrap_return_probe(struct reg *rp) +fasttrap_return_probe(struct trapframe *tf) { + struct reg reg, *rp; proc_t *p = curproc; uintptr_t pc = curthread->t_dtrace_pc; uintptr_t npc = curthread->t_dtrace_npc; + fill_frame_regs(tf, ®); + rp = ® + curthread->t_dtrace_pc = 0; curthread->t_dtrace_npc = 0; curthread->t_dtrace_scrpc = 0; curthread->t_dtrace_astpc = 0; #ifdef illumos /* * Treat a child created by a call to vfork(2) as if it were its * parent. We know that there's only one thread of control in such a * process: this one. */ while (p->p_flag & SVFORK) { p = p->p_parent; } #endif /* * We set rp->r_rip to the address of the traced instruction so * that it appears to dtrace_probe() that we're on the original - * instruction, and so that the user can't easily detect our - * complex web of lies. dtrace_return_probe() (our caller) - * will correctly set %pc after we return. + * instruction. */ rp->r_rip = pc; fasttrap_return_common(rp, pc, p->p_pid, npc); return (0); } /*ARGSUSED*/ uint64_t fasttrap_pid_getarg(void *arg, dtrace_id_t id, void *parg, int argno, int aframes) { struct reg r; fill_regs(curthread, &r); return (fasttrap_anarg(&r, 1, argno)); } /*ARGSUSED*/ uint64_t fasttrap_usdt_getarg(void *arg, dtrace_id_t id, void *parg, int argno, int aframes) { struct reg r; fill_regs(curthread, &r); return (fasttrap_anarg(&r, 0, argno)); } static ulong_t fasttrap_getreg(struct reg *rp, uint_t reg) { #ifdef __amd64 switch (reg) { case REG_R15: return (rp->r_r15); case REG_R14: return (rp->r_r14); case REG_R13: return (rp->r_r13); case REG_R12: return (rp->r_r12); case REG_R11: return (rp->r_r11); case REG_R10: return (rp->r_r10); case REG_R9: return (rp->r_r9); case REG_R8: return (rp->r_r8); case REG_RDI: return (rp->r_rdi); case REG_RSI: return (rp->r_rsi); case REG_RBP: return (rp->r_rbp); case REG_RBX: return (rp->r_rbx); case REG_RDX: return (rp->r_rdx); case REG_RCX: return (rp->r_rcx); case REG_RAX: return (rp->r_rax); case REG_TRAPNO: return (rp->r_trapno); case REG_ERR: return (rp->r_err); case REG_RIP: return (rp->r_rip); case REG_CS: return (rp->r_cs); #ifdef illumos case REG_RFL: return (rp->r_rfl); #endif case REG_RSP: return (rp->r_rsp); case REG_SS: return (rp->r_ss); case REG_FS: return (rp->r_fs); case REG_GS: return (rp->r_gs); case REG_DS: return (rp->r_ds); case REG_ES: return (rp->r_es); case REG_FSBASE: return (rdmsr(MSR_FSBASE)); case REG_GSBASE: return (rdmsr(MSR_GSBASE)); } panic("dtrace: illegal register constant"); /*NOTREACHED*/ #else #define _NGREG 19 if (reg >= _NGREG) panic("dtrace: illegal register constant"); return (((greg_t *)&rp->r_gs)[reg]); #endif } Index: stable/11/sys/cddl/contrib/opensolaris/uts/powerpc/dtrace/fasttrap_isa.c =================================================================== --- stable/11/sys/cddl/contrib/opensolaris/uts/powerpc/dtrace/fasttrap_isa.c (revision 327550) +++ stable/11/sys/cddl/contrib/opensolaris/uts/powerpc/dtrace/fasttrap_isa.c (revision 327551) @@ -1,542 +1,548 @@ /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* Portions Copyright 2013 Justin Hibbits */ /* * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #include #include #include #include #include #include #include #include #include #include #include #define OP(x) ((x) >> 26) #define OPX(x) (((x) >> 2) & 0x3FF) #define OP_BO(x) (((x) & 0x03E00000) >> 21) #define OP_BI(x) (((x) & 0x001F0000) >> 16) #define OP_RS(x) (((x) & 0x03E00000) >> 21) #define OP_RA(x) (((x) & 0x001F0000) >> 16) #define OP_RB(x) (((x) & 0x0000F100) >> 11) int fasttrap_tracepoint_install(proc_t *p, fasttrap_tracepoint_t *tp) { fasttrap_instr_t instr = FASTTRAP_INSTR; if (uwrite(p, &instr, 4, tp->ftt_pc) != 0) return (-1); return (0); } int fasttrap_tracepoint_remove(proc_t *p, fasttrap_tracepoint_t *tp) { uint32_t instr; /* * Distinguish between read or write failures and a changed * instruction. */ if (uread(p, &instr, 4, tp->ftt_pc) != 0) return (0); if (instr != FASTTRAP_INSTR) return (0); if (uwrite(p, &tp->ftt_instr, 4, tp->ftt_pc) != 0) return (-1); return (0); } int fasttrap_tracepoint_init(proc_t *p, fasttrap_tracepoint_t *tp, uintptr_t pc, fasttrap_probe_type_t type) { uint32_t instr; //int32_t disp; /* * Read the instruction at the given address out of the process's * address space. We don't have to worry about a debugger * changing this instruction before we overwrite it with our trap * instruction since P_PR_LOCK is set. */ if (uread(p, &instr, 4, pc) != 0) return (-1); /* * Decode the instruction to fill in the probe flags. We can have * the process execute most instructions on its own using a pc/npc * trick, but pc-relative control transfer present a problem since * we're relocating the instruction. We emulate these instructions * in the kernel. We assume a default type and over-write that as * needed. * * pc-relative instructions must be emulated for correctness; * other instructions (which represent a large set of commonly traced * instructions) are emulated or otherwise optimized for performance. */ tp->ftt_type = FASTTRAP_T_COMMON; tp->ftt_instr = instr; switch (OP(instr)) { /* The following are invalid for trapping (invalid opcodes, tw/twi). */ case 0: case 1: case 2: case 4: case 5: case 6: case 30: case 39: case 58: case 62: case 3: /* twi */ return (-1); case 31: /* tw */ if (OPX(instr) == 4) return (-1); else if (OPX(instr) == 444 && OP_RS(instr) == OP_RA(instr) && OP_RS(instr) == OP_RB(instr)) tp->ftt_type = FASTTRAP_T_NOP; break; case 16: tp->ftt_type = FASTTRAP_T_BC; tp->ftt_dest = instr & 0x0000FFFC; /* Extract target address */ if (instr & 0x00008000) tp->ftt_dest |= 0xFFFF0000; /* Use as offset if not absolute address. */ if (!(instr & 0x02)) tp->ftt_dest += pc; tp->ftt_bo = OP_BO(instr); tp->ftt_bi = OP_BI(instr); break; case 18: tp->ftt_type = FASTTRAP_T_B; tp->ftt_dest = instr & 0x03FFFFFC; /* Extract target address */ if (instr & 0x02000000) tp->ftt_dest |= 0xFC000000; /* Use as offset if not absolute address. */ if (!(instr & 0x02)) tp->ftt_dest += pc; break; case 19: switch (OPX(instr)) { case 528: /* bcctr */ tp->ftt_type = FASTTRAP_T_BCTR; tp->ftt_bo = OP_BO(instr); tp->ftt_bi = OP_BI(instr); break; case 16: /* bclr */ tp->ftt_type = FASTTRAP_T_BCTR; tp->ftt_bo = OP_BO(instr); tp->ftt_bi = OP_BI(instr); break; }; break; case 24: if (OP_RS(instr) == OP_RA(instr) && (instr & 0x0000FFFF) == 0) tp->ftt_type = FASTTRAP_T_NOP; break; }; /* * We don't know how this tracepoint is going to be used, but in case * it's used as part of a function return probe, we need to indicate * whether it's always a return site or only potentially a return * site. If it's part of a return probe, it's always going to be a * return from that function if it's a restore instruction or if * the previous instruction was a return. If we could reliably * distinguish jump tables from return sites, this wouldn't be * necessary. */ #if 0 if (tp->ftt_type != FASTTRAP_T_RESTORE && (uread(p, &instr, 4, pc - sizeof (instr)) != 0 || !(OP(instr) == 2 && OP3(instr) == OP3_RETURN))) tp->ftt_flags |= FASTTRAP_F_RETMAYBE; #endif return (0); } static uint64_t fasttrap_anarg(struct reg *rp, int argno) { uint64_t value; proc_t *p = curproc; /* The first 8 arguments are in registers. */ if (argno < 8) return rp->fixreg[argno + 3]; /* Arguments on stack start after SP+LR (2 register slots). */ if (SV_PROC_FLAG(p, SV_ILP32)) { DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); value = dtrace_fuword32((void *)(rp->fixreg[1] + 8 + ((argno - 8) * sizeof(uint32_t)))); DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT | CPU_DTRACE_BADADDR); } else { DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); value = dtrace_fuword64((void *)(rp->fixreg[1] + 48 + ((argno - 8) * sizeof(uint64_t)))); DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT | CPU_DTRACE_BADADDR); } return value; } uint64_t fasttrap_pid_getarg(void *arg, dtrace_id_t id, void *parg, int argno, int aframes) { struct reg r; fill_regs(curthread, &r); return (fasttrap_anarg(&r, argno)); } uint64_t fasttrap_usdt_getarg(void *arg, dtrace_id_t id, void *parg, int argno, int aframes) { struct reg r; fill_regs(curthread, &r); return (fasttrap_anarg(&r, argno)); } static void fasttrap_usdt_args(fasttrap_probe_t *probe, struct reg *rp, int argc, uintptr_t *argv) { int i, x, cap = MIN(argc, probe->ftp_nargs); for (i = 0; i < cap; i++) { x = probe->ftp_argmap[i]; if (x < 8) argv[i] = rp->fixreg[x]; else if (SV_PROC_FLAG(curproc, SV_ILP32)) argv[i] = fuword32((void *)(rp->fixreg[1] + 8 + (x * sizeof(uint32_t)))); else argv[i] = fuword64((void *)(rp->fixreg[1] + 48 + (x * sizeof(uint64_t)))); } for (; i < argc; i++) { argv[i] = 0; } } static void fasttrap_return_common(struct reg *rp, uintptr_t pc, pid_t pid, uintptr_t new_pc) { struct rm_priotracker tracker; fasttrap_tracepoint_t *tp; fasttrap_bucket_t *bucket; fasttrap_id_t *id; rm_rlock(&fasttrap_tp_lock, &tracker); bucket = &fasttrap_tpoints.fth_table[FASTTRAP_TPOINTS_INDEX(pid, pc)]; for (tp = bucket->ftb_data; tp != NULL; tp = tp->ftt_next) { if (pid == tp->ftt_pid && pc == tp->ftt_pc && tp->ftt_proc->ftpc_acount != 0) break; } /* * Don't sweat it if we can't find the tracepoint again; unlike * when we're in fasttrap_pid_probe(), finding the tracepoint here * is not essential to the correct execution of the process. */ if (tp == NULL) { rm_runlock(&fasttrap_tp_lock, &tracker); return; } for (id = tp->ftt_retids; id != NULL; id = id->fti_next) { /* * If there's a branch that could act as a return site, we * need to trace it, and check here if the program counter is * external to the function. */ /* Skip function-local branches. */ if ((new_pc - id->fti_probe->ftp_faddr) < id->fti_probe->ftp_fsize) continue; dtrace_probe(id->fti_probe->ftp_id, pc - id->fti_probe->ftp_faddr, rp->fixreg[3], rp->fixreg[4], 0, 0); } rm_runlock(&fasttrap_tp_lock, &tracker); } static int fasttrap_branch_taken(int bo, int bi, struct reg *regs) { int crzero = 0; /* Branch always? */ if ((bo & 0x14) == 0x14) return 1; /* Handle decrementing ctr */ if (!(bo & 0x04)) { --regs->ctr; crzero = (regs->ctr == 0); if (bo & 0x10) { return (!(crzero ^ (bo >> 1))); } } return (crzero | (((regs->cr >> (31 - bi)) ^ (bo >> 3)) ^ 1)); } int -fasttrap_pid_probe(struct reg *rp) +fasttrap_pid_probe(struct trapframe *frame) { + struct reg reg, *rp; struct rm_priotracker tracker; proc_t *p = curproc; - uintptr_t pc = rp->pc; + uintptr_t pc; uintptr_t new_pc = 0; fasttrap_bucket_t *bucket; fasttrap_tracepoint_t *tp, tp_local; pid_t pid; dtrace_icookie_t cookie; uint_t is_enabled = 0; + fill_regs(curthread, ®); + rp = ® + pc = rp->pc; + /* * It's possible that a user (in a veritable orgy of bad planning) * could redirect this thread's flow of control before it reached the * return probe fasttrap. In this case we need to kill the process * since it's in a unrecoverable state. */ if (curthread->t_dtrace_step) { ASSERT(curthread->t_dtrace_on); fasttrap_sigtrap(p, curthread, pc); return (0); } /* * Clear all user tracing flags. */ curthread->t_dtrace_ft = 0; curthread->t_dtrace_pc = 0; curthread->t_dtrace_npc = 0; curthread->t_dtrace_scrpc = 0; curthread->t_dtrace_astpc = 0; rm_rlock(&fasttrap_tp_lock, &tracker); pid = p->p_pid; bucket = &fasttrap_tpoints.fth_table[FASTTRAP_TPOINTS_INDEX(pid, pc)]; /* * Lookup the tracepoint that the process just hit. */ for (tp = bucket->ftb_data; tp != NULL; tp = tp->ftt_next) { if (pid == tp->ftt_pid && pc == tp->ftt_pc && tp->ftt_proc->ftpc_acount != 0) break; } /* * If we couldn't find a matching tracepoint, either a tracepoint has * been inserted without using the pid ioctl interface (see * fasttrap_ioctl), or somehow we have mislaid this tracepoint. */ if (tp == NULL) { rm_runlock(&fasttrap_tp_lock, &tracker); return (-1); } if (tp->ftt_ids != NULL) { fasttrap_id_t *id; for (id = tp->ftt_ids; id != NULL; id = id->fti_next) { fasttrap_probe_t *probe = id->fti_probe; if (id->fti_ptype == DTFTP_ENTRY) { /* * We note that this was an entry * probe to help ustack() find the * first caller. */ cookie = dtrace_interrupt_disable(); DTRACE_CPUFLAG_SET(CPU_DTRACE_ENTRY); dtrace_probe(probe->ftp_id, rp->fixreg[3], rp->fixreg[4], rp->fixreg[5], rp->fixreg[6], rp->fixreg[7]); DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_ENTRY); dtrace_interrupt_enable(cookie); } else if (id->fti_ptype == DTFTP_IS_ENABLED) { /* * Note that in this case, we don't * call dtrace_probe() since it's only * an artificial probe meant to change * the flow of control so that it * encounters the true probe. */ is_enabled = 1; } else if (probe->ftp_argmap == NULL) { dtrace_probe(probe->ftp_id, rp->fixreg[3], rp->fixreg[4], rp->fixreg[5], rp->fixreg[6], rp->fixreg[7]); } else { uintptr_t t[5]; fasttrap_usdt_args(probe, rp, sizeof (t) / sizeof (t[0]), t); dtrace_probe(probe->ftp_id, t[0], t[1], t[2], t[3], t[4]); } } } /* * We're about to do a bunch of work so we cache a local copy of * the tracepoint to emulate the instruction, and then find the * tracepoint again later if we need to light up any return probes. */ tp_local = *tp; rm_runlock(&fasttrap_tp_lock, &tracker); tp = &tp_local; /* * If there's an is-enabled probe connected to this tracepoint it * means that there was a 'xor r3, r3, r3' * instruction that was placed there by DTrace when the binary was * linked. As this probe is, in fact, enabled, we need to stuff 1 * into R3. Accordingly, we can bypass all the instruction * emulation logic since we know the inevitable result. It's possible * that a user could construct a scenario where the 'is-enabled' * probe was on some other instruction, but that would be a rather * exotic way to shoot oneself in the foot. */ if (is_enabled) { rp->fixreg[3] = 1; new_pc = rp->pc + 4; goto done; } switch (tp->ftt_type) { case FASTTRAP_T_NOP: new_pc = rp->pc + 4; break; case FASTTRAP_T_BC: if (!fasttrap_branch_taken(tp->ftt_bo, tp->ftt_bi, rp)) break; /* FALLTHROUGH */ case FASTTRAP_T_B: if (tp->ftt_instr & 0x01) rp->lr = rp->pc + 4; new_pc = tp->ftt_dest; break; case FASTTRAP_T_BLR: case FASTTRAP_T_BCTR: if (!fasttrap_branch_taken(tp->ftt_bo, tp->ftt_bi, rp)) break; /* FALLTHROUGH */ if (tp->ftt_type == FASTTRAP_T_BCTR) new_pc = rp->ctr; else new_pc = rp->lr; if (tp->ftt_instr & 0x01) rp->lr = rp->pc + 4; break; case FASTTRAP_T_COMMON: break; }; done: /* * If there were no return probes when we first found the tracepoint, * we should feel no obligation to honor any return probes that were * subsequently enabled -- they'll just have to wait until the next * time around. */ if (tp->ftt_retids != NULL) { /* * We need to wait until the results of the instruction are * apparent before invoking any return probes. If this * instruction was emulated we can just call * fasttrap_return_common(); if it needs to be executed, we * need to wait until the user thread returns to the kernel. */ if (tp->ftt_type != FASTTRAP_T_COMMON) { fasttrap_return_common(rp, pc, pid, new_pc); } else { ASSERT(curthread->t_dtrace_ret != 0); ASSERT(curthread->t_dtrace_pc == pc); ASSERT(curthread->t_dtrace_scrpc != 0); ASSERT(new_pc == curthread->t_dtrace_astpc); } } rp->pc = new_pc; set_regs(curthread, rp); return (0); } int -fasttrap_return_probe(struct reg *rp) +fasttrap_return_probe(struct trapframe *tf) { + struct reg reg, *rp; proc_t *p = curproc; uintptr_t pc = curthread->t_dtrace_pc; uintptr_t npc = curthread->t_dtrace_npc; curthread->t_dtrace_pc = 0; curthread->t_dtrace_npc = 0; curthread->t_dtrace_scrpc = 0; curthread->t_dtrace_astpc = 0; + fill_regs(curthread, ®); + rp = ® + /* * We set rp->pc to the address of the traced instruction so * that it appears to dtrace_probe() that we're on the original - * instruction, and so that the user can't easily detect our - * complex web of lies. dtrace_return_probe() (our caller) - * will correctly set %pc after we return. + * instruction. */ rp->pc = pc; fasttrap_return_common(rp, pc, p->p_pid, npc); return (0); } - Index: stable/11/sys/i386/i386/trap.c =================================================================== --- stable/11/sys/i386/i386/trap.c (revision 327550) +++ stable/11/sys/i386/i386/trap.c (revision 327551) @@ -1,1110 +1,1105 @@ /*- * Copyright (C) 1994, David Greenman * Copyright (c) 1990, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * the University of Utah, and William Jolitz. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)trap.c 7.4 (Berkeley) 5/13/91 */ #include __FBSDID("$FreeBSD$"); /* * 386 Trap and System call handling */ #include "opt_clock.h" #include "opt_cpu.h" #include "opt_hwpmc_hooks.h" #include "opt_isa.h" #include "opt_kdb.h" #include "opt_stack.h" #include "opt_trap.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef HWPMC_HOOKS #include PMC_SOFT_DEFINE( , , page_fault, all); PMC_SOFT_DEFINE( , , page_fault, read); PMC_SOFT_DEFINE( , , page_fault, write); #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef SMP #include #endif #include #include #include #ifdef POWERFAIL_NMI #include #include #endif #ifdef KDTRACE_HOOKS #include #endif void trap(struct trapframe *frame); void syscall(struct trapframe *frame); static int trap_pfault(struct trapframe *, int, vm_offset_t); static void trap_fatal(struct trapframe *, vm_offset_t); void dblfault_handler(void); extern inthand_t IDTVEC(lcall_syscall); #define MAX_TRAP_MSG 32 static char *trap_msg[] = { "", /* 0 unused */ "privileged instruction fault", /* 1 T_PRIVINFLT */ "", /* 2 unused */ "breakpoint instruction fault", /* 3 T_BPTFLT */ "", /* 4 unused */ "", /* 5 unused */ "arithmetic trap", /* 6 T_ARITHTRAP */ "", /* 7 unused */ "", /* 8 unused */ "general protection fault", /* 9 T_PROTFLT */ "trace trap", /* 10 T_TRCTRAP */ "", /* 11 unused */ "page fault", /* 12 T_PAGEFLT */ "", /* 13 unused */ "alignment fault", /* 14 T_ALIGNFLT */ "", /* 15 unused */ "", /* 16 unused */ "", /* 17 unused */ "integer divide fault", /* 18 T_DIVIDE */ "non-maskable interrupt trap", /* 19 T_NMI */ "overflow trap", /* 20 T_OFLOW */ "FPU bounds check fault", /* 21 T_BOUND */ "FPU device not available", /* 22 T_DNA */ "double fault", /* 23 T_DOUBLEFLT */ "FPU operand fetch fault", /* 24 T_FPOPFLT */ "invalid TSS fault", /* 25 T_TSSFLT */ "segment not present fault", /* 26 T_SEGNPFLT */ "stack fault", /* 27 T_STKFLT */ "machine check trap", /* 28 T_MCHK */ "SIMD floating-point exception", /* 29 T_XMMFLT */ "reserved (unknown) fault", /* 30 T_RESERVED */ "", /* 31 unused (reserved) */ "DTrace pid return trap", /* 32 T_DTRACE_RET */ }; #if defined(I586_CPU) && !defined(NO_F00F_HACK) int has_f00f_bug = 0; /* Initialized so that it can be patched. */ #endif static int prot_fault_translation = 0; SYSCTL_INT(_machdep, OID_AUTO, prot_fault_translation, CTLFLAG_RW, &prot_fault_translation, 0, "Select signal to deliver on protection fault"); static int uprintf_signal; SYSCTL_INT(_machdep, OID_AUTO, uprintf_signal, CTLFLAG_RW, &uprintf_signal, 0, "Print debugging information on trap signal to ctty"); /* * Exception, fault, and trap interface to the FreeBSD kernel. * This common code is called from assembly language IDT gate entry * routines that prepare a suitable stack frame, and restore this * frame after the exception has been processed. */ void trap(struct trapframe *frame) { -#ifdef KDTRACE_HOOKS - struct reg regs; -#endif ksiginfo_t ksi; struct thread *td; struct proc *p; #ifdef KDB register_t dr6; #endif int signo, ucode; u_int type; register_t addr; vm_offset_t eva; #ifdef POWERFAIL_NMI static int lastalert = 0; #endif td = curthread; p = td->td_proc; signo = 0; ucode = 0; addr = 0; PCPU_INC(cnt.v_trap); type = frame->tf_trapno; #ifdef SMP /* Handler for NMI IPIs used for stopping CPUs. */ if (type == T_NMI && ipi_nmi_handler() == 0) return; #endif /* SMP */ #ifdef KDB if (kdb_active) { kdb_reenter(); return; } #endif if (type == T_RESERVED) { trap_fatal(frame, 0); return; } if (type == T_NMI) { #ifdef HWPMC_HOOKS /* * CPU PMCs interrupt using an NMI so we check for that first. * If the HWPMC module is active, 'pmc_hook' will point to * the function to be called. A non-zero return value from the * hook means that the NMI was consumed by it and that we can * return immediately. */ if (pmc_intr != NULL && (*pmc_intr)(PCPU_GET(cpuid), frame) != 0) return; #endif #ifdef STACK if (stack_nmi_handler(frame) != 0) return; #endif } if (type == T_MCHK) { mca_intr(); return; } #ifdef KDTRACE_HOOKS /* * A trap can occur while DTrace executes a probe. Before * executing the probe, DTrace blocks re-scheduling and sets * a flag in its per-cpu flags to indicate that it doesn't * want to fault. On returning from the probe, the no-fault * flag is cleared and finally re-scheduling is enabled. */ if ((type == T_PROTFLT || type == T_PAGEFLT) && dtrace_trap_func != NULL && (*dtrace_trap_func)(frame, type)) return; #endif if ((frame->tf_eflags & PSL_I) == 0) { /* * Buggy application or kernel code has disabled * interrupts and then trapped. Enabling interrupts * now is wrong, but it is better than running with * interrupts disabled until they are accidentally * enabled later. */ if (TRAPF_USERMODE(frame) && (curpcb->pcb_flags & PCB_VM86CALL) == 0) uprintf( "pid %ld (%s): trap %d with interrupts disabled\n", (long)curproc->p_pid, curthread->td_name, type); else if (type != T_NMI && type != T_BPTFLT && type != T_TRCTRAP && frame->tf_eip != (int)cpu_switch_load_gs) { /* * XXX not quite right, since this may be for a * multiple fault in user mode. */ printf("kernel trap %d with interrupts disabled\n", type); /* * Page faults need interrupts disabled until later, * and we shouldn't enable interrupts while holding * a spin lock. */ if (type != T_PAGEFLT && td->td_md.md_spinlock_count == 0) enable_intr(); } } eva = 0; if (type == T_PAGEFLT) { /* * For some Cyrix CPUs, %cr2 is clobbered by * interrupts. This problem is worked around by using * an interrupt gate for the pagefault handler. We * are finally ready to read %cr2 and conditionally * reenable interrupts. If we hold a spin lock, then * we must not reenable interrupts. This might be a * spurious page fault. */ eva = rcr2(); if (td->td_md.md_spinlock_count == 0) enable_intr(); } if (TRAPF_USERMODE(frame) && (curpcb->pcb_flags & PCB_VM86CALL) == 0) { /* user trap */ td->td_pticks = 0; td->td_frame = frame; addr = frame->tf_eip; if (td->td_cowgen != p->p_cowgen) thread_cow_update(td); switch (type) { case T_PRIVINFLT: /* privileged instruction fault */ signo = SIGILL; ucode = ILL_PRVOPC; break; case T_BPTFLT: /* bpt instruction fault */ case T_TRCTRAP: /* trace trap */ enable_intr(); #ifdef KDTRACE_HOOKS if (type == T_BPTFLT) { - fill_frame_regs(frame, ®s); if (dtrace_pid_probe_ptr != NULL && - dtrace_pid_probe_ptr(®s) == 0) + dtrace_pid_probe_ptr(frame) == 0) return; } #endif user_trctrap_out: frame->tf_eflags &= ~PSL_T; signo = SIGTRAP; ucode = (type == T_TRCTRAP ? TRAP_TRACE : TRAP_BRKPT); break; case T_ARITHTRAP: /* arithmetic trap */ ucode = npxtrap_x87(); if (ucode == -1) return; signo = SIGFPE; break; /* * The following two traps can happen in vm86 mode, * and, if so, we want to handle them specially. */ case T_PROTFLT: /* general protection fault */ case T_STKFLT: /* stack fault */ if (frame->tf_eflags & PSL_VM) { signo = vm86_emulate((struct vm86frame *)frame); if (signo == SIGTRAP) { type = T_TRCTRAP; load_dr6(rdr6() | 0x4000); goto user_trctrap_out; } if (signo == 0) goto user; break; } signo = SIGBUS; ucode = (type == T_PROTFLT) ? BUS_OBJERR : BUS_ADRERR; break; case T_SEGNPFLT: /* segment not present fault */ signo = SIGBUS; ucode = BUS_ADRERR; break; case T_TSSFLT: /* invalid TSS fault */ signo = SIGBUS; ucode = BUS_OBJERR; break; case T_ALIGNFLT: signo = SIGBUS; ucode = BUS_ADRALN; break; case T_DOUBLEFLT: /* double fault */ default: signo = SIGBUS; ucode = BUS_OBJERR; break; case T_PAGEFLT: /* page fault */ signo = trap_pfault(frame, TRUE, eva); #if defined(I586_CPU) && !defined(NO_F00F_HACK) if (signo == -2) { /* * The f00f hack workaround has triggered, so * treat the fault as an illegal instruction * (T_PRIVINFLT) instead of a page fault. */ type = frame->tf_trapno = T_PRIVINFLT; /* Proceed as in that case. */ ucode = ILL_PRVOPC; signo = SIGILL; break; } #endif if (signo == -1) return; if (signo == 0) goto user; if (signo == SIGSEGV) ucode = SEGV_MAPERR; else if (prot_fault_translation == 0) { /* * Autodetect. This check also covers * the images without the ABI-tag ELF * note. */ if (SV_CURPROC_ABI() == SV_ABI_FREEBSD && p->p_osrel >= P_OSREL_SIGSEGV) { signo = SIGSEGV; ucode = SEGV_ACCERR; } else { signo = SIGBUS; ucode = BUS_PAGE_FAULT; } } else if (prot_fault_translation == 1) { /* * Always compat mode. */ signo = SIGBUS; ucode = BUS_PAGE_FAULT; } else { /* * Always SIGSEGV mode. */ signo = SIGSEGV; ucode = SEGV_ACCERR; } addr = eva; break; case T_DIVIDE: /* integer divide fault */ ucode = FPE_INTDIV; signo = SIGFPE; break; #ifdef DEV_ISA case T_NMI: #ifdef POWERFAIL_NMI #ifndef TIMER_FREQ # define TIMER_FREQ 1193182 #endif if (time_second - lastalert > 10) { log(LOG_WARNING, "NMI: power fail\n"); sysbeep(880, hz); lastalert = time_second; } return; #else /* !POWERFAIL_NMI */ nmi_handle_intr(type, frame); return; #endif /* POWERFAIL_NMI */ #endif /* DEV_ISA */ case T_OFLOW: /* integer overflow fault */ ucode = FPE_INTOVF; signo = SIGFPE; break; case T_BOUND: /* bounds check fault */ ucode = FPE_FLTSUB; signo = SIGFPE; break; case T_DNA: KASSERT(PCB_USER_FPU(td->td_pcb), ("kernel FPU ctx has leaked")); /* transparent fault (due to context switch "late") */ if (npxdna()) return; uprintf("pid %d killed due to lack of floating point\n", p->p_pid); signo = SIGKILL; ucode = 0; break; case T_FPOPFLT: /* FPU operand fetch fault */ ucode = ILL_COPROC; signo = SIGILL; break; case T_XMMFLT: /* SIMD floating-point exception */ ucode = npxtrap_sse(); if (ucode == -1) return; signo = SIGFPE; break; #ifdef KDTRACE_HOOKS case T_DTRACE_RET: enable_intr(); - fill_frame_regs(frame, ®s); if (dtrace_return_probe_ptr != NULL) - dtrace_return_probe_ptr(®s); + dtrace_return_probe_ptr(frame); return; #endif } } else { /* kernel trap */ KASSERT(cold || td->td_ucred != NULL, ("kernel trap doesn't have ucred")); switch (type) { case T_PAGEFLT: /* page fault */ (void) trap_pfault(frame, FALSE, eva); return; case T_DNA: if (PCB_USER_FPU(td->td_pcb)) panic("Unregistered use of FPU in kernel"); if (npxdna()) return; break; case T_ARITHTRAP: /* arithmetic trap */ case T_XMMFLT: /* SIMD floating-point exception */ case T_FPOPFLT: /* FPU operand fetch fault */ /* * XXXKIB for now disable any FPU traps in kernel * handler registration seems to be overkill */ trap_fatal(frame, 0); return; /* * The following two traps can happen in * vm86 mode, and, if so, we want to handle * them specially. */ case T_PROTFLT: /* general protection fault */ case T_STKFLT: /* stack fault */ if (frame->tf_eflags & PSL_VM) { signo = vm86_emulate((struct vm86frame *)frame); if (signo == SIGTRAP) { type = T_TRCTRAP; load_dr6(rdr6() | 0x4000); goto kernel_trctrap; } if (signo != 0) /* * returns to original process */ vm86_trap((struct vm86frame *)frame); return; } /* FALL THROUGH */ case T_SEGNPFLT: /* segment not present fault */ if (curpcb->pcb_flags & PCB_VM86CALL) break; /* * Invalid %fs's and %gs's can be created using * procfs or PT_SETREGS or by invalidating the * underlying LDT entry. This causes a fault * in kernel mode when the kernel attempts to * switch contexts. Lose the bad context * (XXX) so that we can continue, and generate * a signal. */ if (frame->tf_eip == (int)cpu_switch_load_gs) { curpcb->pcb_gs = 0; #if 0 PROC_LOCK(p); kern_psignal(p, SIGBUS); PROC_UNLOCK(p); #endif return; } if (td->td_intr_nesting_level != 0) break; /* * Invalid segment selectors and out of bounds * %eip's and %esp's can be set up in user mode. * This causes a fault in kernel mode when the * kernel tries to return to user mode. We want * to get this fault so that we can fix the * problem here and not have to check all the * selectors and pointers when the user changes * them. */ if (frame->tf_eip == (int)doreti_iret) { frame->tf_eip = (int)doreti_iret_fault; return; } if (type == T_STKFLT) break; if (frame->tf_eip == (int)doreti_popl_ds) { frame->tf_eip = (int)doreti_popl_ds_fault; return; } if (frame->tf_eip == (int)doreti_popl_es) { frame->tf_eip = (int)doreti_popl_es_fault; return; } if (frame->tf_eip == (int)doreti_popl_fs) { frame->tf_eip = (int)doreti_popl_fs_fault; return; } if (curpcb->pcb_onfault != NULL) { frame->tf_eip = (int)curpcb->pcb_onfault; return; } break; case T_TSSFLT: /* * PSL_NT can be set in user mode and isn't cleared * automatically when the kernel is entered. This * causes a TSS fault when the kernel attempts to * `iret' because the TSS link is uninitialized. We * want to get this fault so that we can fix the * problem here and not every time the kernel is * entered. */ if (frame->tf_eflags & PSL_NT) { frame->tf_eflags &= ~PSL_NT; return; } break; case T_TRCTRAP: /* trace trap */ kernel_trctrap: if (frame->tf_eip == (int)IDTVEC(lcall_syscall)) { /* * We've just entered system mode via the * syscall lcall. Continue single stepping * silently until the syscall handler has * saved the flags. */ return; } if (frame->tf_eip == (int)IDTVEC(lcall_syscall) + 1) { /* * The syscall handler has now saved the * flags. Stop single stepping it. */ frame->tf_eflags &= ~PSL_T; return; } /* * Ignore debug register trace traps due to * accesses in the user's address space, which * can happen under several conditions such as * if a user sets a watchpoint on a buffer and * then passes that buffer to a system call. * We still want to get TRCTRAPS for addresses * in kernel space because that is useful when * debugging the kernel. */ if (user_dbreg_trap() && !(curpcb->pcb_flags & PCB_VM86CALL)) { /* * Reset breakpoint bits because the * processor doesn't */ load_dr6(rdr6() & ~0xf); return; } /* * FALLTHROUGH (TRCTRAP kernel mode, kernel address) */ case T_BPTFLT: /* * If KDB is enabled, let it handle the debugger trap. * Otherwise, debugger traps "can't happen". */ #ifdef KDB /* XXX %dr6 is not quite reentrant. */ dr6 = rdr6(); load_dr6(dr6 & ~0x4000); if (kdb_trap(type, dr6, frame)) return; #endif break; #ifdef DEV_ISA case T_NMI: #ifdef POWERFAIL_NMI if (time_second - lastalert > 10) { log(LOG_WARNING, "NMI: power fail\n"); sysbeep(880, hz); lastalert = time_second; } return; #else /* !POWERFAIL_NMI */ nmi_handle_intr(type, frame); return; #endif /* POWERFAIL_NMI */ #endif /* DEV_ISA */ } trap_fatal(frame, eva); return; } /* Translate fault for emulators (e.g. Linux) */ if (*p->p_sysent->sv_transtrap != NULL) signo = (*p->p_sysent->sv_transtrap)(signo, type); ksiginfo_init_trap(&ksi); ksi.ksi_signo = signo; ksi.ksi_code = ucode; ksi.ksi_addr = (void *)addr; ksi.ksi_trapno = type; if (uprintf_signal) { uprintf("pid %d comm %s: signal %d err %x code %d type %d " "addr 0x%x esp 0x%08x eip 0x%08x " "<%02x %02x %02x %02x %02x %02x %02x %02x>\n", p->p_pid, p->p_comm, signo, frame->tf_err, ucode, type, addr, frame->tf_esp, frame->tf_eip, fubyte((void *)(frame->tf_eip + 0)), fubyte((void *)(frame->tf_eip + 1)), fubyte((void *)(frame->tf_eip + 2)), fubyte((void *)(frame->tf_eip + 3)), fubyte((void *)(frame->tf_eip + 4)), fubyte((void *)(frame->tf_eip + 5)), fubyte((void *)(frame->tf_eip + 6)), fubyte((void *)(frame->tf_eip + 7))); } KASSERT((read_eflags() & PSL_I) != 0, ("interrupts disabled")); trapsignal(td, &ksi); user: userret(td, frame); KASSERT(PCB_USER_FPU(td->td_pcb), ("Return from trap with kernel FPU ctx leaked")); } static int trap_pfault(struct trapframe *frame, int usermode, vm_offset_t eva) { struct thread *td; struct proc *p; vm_offset_t va; vm_map_t map; int rv; vm_prot_t ftype; td = curthread; p = td->td_proc; rv = 0; if (__predict_false((td->td_pflags & TDP_NOFAULTING) != 0)) { /* * Due to both processor errata and lazy TLB invalidation when * access restrictions are removed from virtual pages, memory * accesses that are allowed by the physical mapping layer may * nonetheless cause one spurious page fault per virtual page. * When the thread is executing a "no faulting" section that * is bracketed by vm_fault_{disable,enable}_pagefaults(), * every page fault is treated as a spurious page fault, * unless it accesses the same virtual address as the most * recent page fault within the same "no faulting" section. */ if (td->td_md.md_spurflt_addr != eva || (td->td_pflags & TDP_RESETSPUR) != 0) { /* * Do nothing to the TLB. A stale TLB entry is * flushed automatically by a page fault. */ td->td_md.md_spurflt_addr = eva; td->td_pflags &= ~TDP_RESETSPUR; return (0); } } else { /* * If we get a page fault while in a critical section, then * it is most likely a fatal kernel page fault. The kernel * is already going to panic trying to get a sleep lock to * do the VM lookup, so just consider it a fatal trap so the * kernel can print out a useful trap message and even get * to the debugger. * * If we get a page fault while holding a non-sleepable * lock, then it is most likely a fatal kernel page fault. * If WITNESS is enabled, then it's going to whine about * bogus LORs with various VM locks, so just skip to the * fatal trap handling directly. */ if (td->td_critnest != 0 || WITNESS_CHECK(WARN_SLEEPOK | WARN_GIANTOK, NULL, "Kernel page fault") != 0) { trap_fatal(frame, eva); return (-1); } } va = trunc_page(eva); if (va >= KERNBASE) { /* * Don't allow user-mode faults in kernel address space. * An exception: if the faulting address is the invalid * instruction entry in the IDT, then the Intel Pentium * F00F bug workaround was triggered, and we need to * treat it is as an illegal instruction, and not a page * fault. */ #if defined(I586_CPU) && !defined(NO_F00F_HACK) if ((eva == (unsigned int)&idt[6]) && has_f00f_bug) return (-2); #endif if (usermode) goto nogo; map = kernel_map; } else { map = &p->p_vmspace->vm_map; /* * When accessing a user-space address, kernel must be * ready to accept the page fault, and provide a * handling routine. Since accessing the address * without the handler is a bug, do not try to handle * it normally, and panic immediately. */ if (!usermode && (td->td_intr_nesting_level != 0 || curpcb->pcb_onfault == NULL)) { trap_fatal(frame, eva); return (-1); } } /* * If the trap was caused by errant bits in the PTE then panic. */ if (frame->tf_err & PGEX_RSV) { trap_fatal(frame, eva); return (-1); } /* * PGEX_I is defined only if the execute disable bit capability is * supported and enabled. */ if (frame->tf_err & PGEX_W) ftype = VM_PROT_WRITE; #if defined(PAE) || defined(PAE_TABLES) else if ((frame->tf_err & PGEX_I) && pg_nx != 0) ftype = VM_PROT_EXECUTE; #endif else ftype = VM_PROT_READ; /* Fault in the page. */ rv = vm_fault(map, va, ftype, VM_FAULT_NORMAL); if (rv == KERN_SUCCESS) { #ifdef HWPMC_HOOKS if (ftype == VM_PROT_READ || ftype == VM_PROT_WRITE) { PMC_SOFT_CALL_TF( , , page_fault, all, frame); if (ftype == VM_PROT_READ) PMC_SOFT_CALL_TF( , , page_fault, read, frame); else PMC_SOFT_CALL_TF( , , page_fault, write, frame); } #endif return (0); } nogo: if (!usermode) { if (td->td_intr_nesting_level == 0 && curpcb->pcb_onfault != NULL) { frame->tf_eip = (int)curpcb->pcb_onfault; return (0); } trap_fatal(frame, eva); return (-1); } return ((rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV); } static void trap_fatal(frame, eva) struct trapframe *frame; vm_offset_t eva; { int code, ss, esp; u_int type; struct soft_segment_descriptor softseg; char *msg; code = frame->tf_err; type = frame->tf_trapno; sdtossd(&gdt[IDXSEL(frame->tf_cs & 0xffff)].sd, &softseg); if (type <= MAX_TRAP_MSG) msg = trap_msg[type]; else msg = "UNKNOWN"; printf("\n\nFatal trap %d: %s while in %s mode\n", type, msg, frame->tf_eflags & PSL_VM ? "vm86" : ISPL(frame->tf_cs) == SEL_UPL ? "user" : "kernel"); #ifdef SMP /* two separate prints in case of a trap on an unmapped page */ printf("cpuid = %d; ", PCPU_GET(cpuid)); printf("apic id = %02x\n", PCPU_GET(apic_id)); #endif if (type == T_PAGEFLT) { printf("fault virtual address = 0x%x\n", eva); printf("fault code = %s %s%s, %s\n", code & PGEX_U ? "user" : "supervisor", code & PGEX_W ? "write" : "read", #if defined(PAE) || defined(PAE_TABLES) pg_nx != 0 ? (code & PGEX_I ? " instruction" : " data") : #endif "", code & PGEX_RSV ? "reserved bits in PTE" : code & PGEX_P ? "protection violation" : "page not present"); } printf("instruction pointer = 0x%x:0x%x\n", frame->tf_cs & 0xffff, frame->tf_eip); if (TF_HAS_STACKREGS(frame)) { ss = frame->tf_ss & 0xffff; esp = frame->tf_esp; } else { ss = GSEL(GDATA_SEL, SEL_KPL); esp = (int)&frame->tf_esp; } printf("stack pointer = 0x%x:0x%x\n", ss, esp); printf("frame pointer = 0x%x:0x%x\n", ss, frame->tf_ebp); printf("code segment = base 0x%x, limit 0x%x, type 0x%x\n", softseg.ssd_base, softseg.ssd_limit, softseg.ssd_type); printf(" = DPL %d, pres %d, def32 %d, gran %d\n", softseg.ssd_dpl, softseg.ssd_p, softseg.ssd_def32, softseg.ssd_gran); printf("processor eflags = "); if (frame->tf_eflags & PSL_T) printf("trace trap, "); if (frame->tf_eflags & PSL_I) printf("interrupt enabled, "); if (frame->tf_eflags & PSL_NT) printf("nested task, "); if (frame->tf_eflags & PSL_RF) printf("resume, "); if (frame->tf_eflags & PSL_VM) printf("vm86, "); printf("IOPL = %d\n", (frame->tf_eflags & PSL_IOPL) >> 12); printf("current process = %d (%s)\n", curproc->p_pid, curthread->td_name); #ifdef KDB if (debugger_on_panic || kdb_active) { frame->tf_err = eva; /* smuggle fault address to ddb */ if (kdb_trap(type, 0, frame)) { frame->tf_err = code; /* restore error code */ return; } frame->tf_err = code; /* restore error code */ } #endif printf("trap number = %d\n", type); if (type <= MAX_TRAP_MSG) panic("%s", trap_msg[type]); else panic("unknown/reserved trap"); } /* * Double fault handler. Called when a fault occurs while writing * a frame for a trap/exception onto the stack. This usually occurs * when the stack overflows (such is the case with infinite recursion, * for example). * * XXX Note that the current PTD gets replaced by IdlePTD when the * task switch occurs. This means that the stack that was active at * the time of the double fault is not available at unless * the machine was idle when the double fault occurred. The downside * of this is that "trace " in ddb won't work. */ void dblfault_handler() { #ifdef KDTRACE_HOOKS if (dtrace_doubletrap_func != NULL) (*dtrace_doubletrap_func)(); #endif printf("\nFatal double fault:\n"); printf("eip = 0x%x\n", PCPU_GET(common_tss.tss_eip)); printf("esp = 0x%x\n", PCPU_GET(common_tss.tss_esp)); printf("ebp = 0x%x\n", PCPU_GET(common_tss.tss_ebp)); #ifdef SMP /* two separate prints in case of a trap on an unmapped page */ printf("cpuid = %d; ", PCPU_GET(cpuid)); printf("apic id = %02x\n", PCPU_GET(apic_id)); #endif panic("double fault"); } int cpu_fetch_syscall_args(struct thread *td) { struct proc *p; struct trapframe *frame; struct syscall_args *sa; caddr_t params; long tmp; int error; p = td->td_proc; frame = td->td_frame; sa = &td->td_sa; params = (caddr_t)frame->tf_esp + sizeof(int); sa->code = frame->tf_eax; /* * Need to check if this is a 32 bit or 64 bit syscall. */ if (sa->code == SYS_syscall) { /* * Code is first argument, followed by actual args. */ error = fueword(params, &tmp); if (error == -1) return (EFAULT); sa->code = tmp; params += sizeof(int); } else if (sa->code == SYS___syscall) { /* * Like syscall, but code is a quad, so as to maintain * quad alignment for the rest of the arguments. */ error = fueword(params, &tmp); if (error == -1) return (EFAULT); sa->code = tmp; params += sizeof(quad_t); } if (p->p_sysent->sv_mask) sa->code &= p->p_sysent->sv_mask; if (sa->code >= p->p_sysent->sv_size) sa->callp = &p->p_sysent->sv_table[0]; else sa->callp = &p->p_sysent->sv_table[sa->code]; sa->narg = sa->callp->sy_narg; if (params != NULL && sa->narg != 0) error = copyin(params, (caddr_t)sa->args, (u_int)(sa->narg * sizeof(int))); else error = 0; if (error == 0) { td->td_retval[0] = 0; td->td_retval[1] = frame->tf_edx; } return (error); } #include "../../kern/subr_syscall.c" /* * syscall - system call request C handler. A system call is * essentially treated as a trap by reusing the frame layout. */ void syscall(struct trapframe *frame) { struct thread *td; register_t orig_tf_eflags; int error; ksiginfo_t ksi; #ifdef DIAGNOSTIC if (!(TRAPF_USERMODE(frame) && (curpcb->pcb_flags & PCB_VM86CALL) == 0)) { panic("syscall"); /* NOT REACHED */ } #endif orig_tf_eflags = frame->tf_eflags; td = curthread; td->td_frame = frame; error = syscallenter(td); /* * Traced syscall. */ if ((orig_tf_eflags & PSL_T) && !(orig_tf_eflags & PSL_VM)) { frame->tf_eflags &= ~PSL_T; ksiginfo_init_trap(&ksi); ksi.ksi_signo = SIGTRAP; ksi.ksi_code = TRAP_TRACE; ksi.ksi_addr = (void *)frame->tf_eip; trapsignal(td, &ksi); } KASSERT(PCB_USER_FPU(td->td_pcb), ("System call %s returning with kernel FPU ctx leaked", syscallname(td->td_proc, td->td_sa.code))); KASSERT(td->td_pcb->pcb_save == get_pcb_user_save_td(td), ("System call %s returning with mangled pcb_save", syscallname(td->td_proc, td->td_sa.code))); syscallret(td, error); } Index: stable/11/sys/powerpc/powerpc/trap.c =================================================================== --- stable/11/sys/powerpc/powerpc/trap.c (revision 327550) +++ stable/11/sys/powerpc/powerpc/trap.c (revision 327551) @@ -1,830 +1,828 @@ /*- * Copyright (C) 1995, 1996 Wolfgang Solfrank. * Copyright (C) 1995, 1996 TooLs GmbH. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by TooLs GmbH. * 4. The name of TooLs GmbH may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $NetBSD: trap.c,v 1.58 2002/03/04 04:07:35 dbj Exp $ */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* Below matches setjmp.S */ #define FAULTBUF_LR 21 #define FAULTBUF_R1 1 #define FAULTBUF_R2 2 #define FAULTBUF_CR 22 #define FAULTBUF_R14 3 #define MOREARGS(sp) ((caddr_t)((uintptr_t)(sp) + \ sizeof(struct callframe) - 3*sizeof(register_t))) /* more args go here */ static void trap_fatal(struct trapframe *frame); static void printtrap(u_int vector, struct trapframe *frame, int isfatal, int user); static int trap_pfault(struct trapframe *frame, int user); static int fix_unaligned(struct thread *td, struct trapframe *frame); static int handle_onfault(struct trapframe *frame); static void syscall(struct trapframe *frame); #ifdef __powerpc64__ void handle_kernel_slb_spill(int, register_t, register_t); static int handle_user_slb_spill(pmap_t pm, vm_offset_t addr); extern int n_slbs; #endif struct powerpc_exception { u_int vector; char *name; }; #ifdef KDTRACE_HOOKS #include int (*dtrace_invop_jump_addr)(struct trapframe *); #endif static struct powerpc_exception powerpc_exceptions[] = { { EXC_CRIT, "critical input" }, { EXC_RST, "system reset" }, { EXC_MCHK, "machine check" }, { EXC_DSI, "data storage interrupt" }, { EXC_DSE, "data segment exception" }, { EXC_ISI, "instruction storage interrupt" }, { EXC_ISE, "instruction segment exception" }, { EXC_EXI, "external interrupt" }, { EXC_ALI, "alignment" }, { EXC_PGM, "program" }, { EXC_FPU, "floating-point unavailable" }, { EXC_APU, "auxiliary proc unavailable" }, { EXC_DECR, "decrementer" }, { EXC_FIT, "fixed-interval timer" }, { EXC_WDOG, "watchdog timer" }, { EXC_SC, "system call" }, { EXC_TRC, "trace" }, { EXC_FPA, "floating-point assist" }, { EXC_DEBUG, "debug" }, { EXC_PERF, "performance monitoring" }, { EXC_VEC, "altivec unavailable" }, { EXC_VSX, "vsx unavailable" }, { EXC_ITMISS, "instruction tlb miss" }, { EXC_DLMISS, "data load tlb miss" }, { EXC_DSMISS, "data store tlb miss" }, { EXC_BPT, "instruction breakpoint" }, { EXC_SMI, "system management" }, { EXC_VECAST_G4, "altivec assist" }, { EXC_THRM, "thermal management" }, { EXC_RUNMODETRC, "run mode/trace" }, { EXC_LAST, NULL } }; static const char * trapname(u_int vector) { struct powerpc_exception *pe; for (pe = powerpc_exceptions; pe->vector != EXC_LAST; pe++) { if (pe->vector == vector) return (pe->name); } return ("unknown"); } void trap(struct trapframe *frame) { struct thread *td; struct proc *p; #ifdef KDTRACE_HOOKS uint32_t inst; #endif int sig, type, user; u_int ucode; ksiginfo_t ksi; PCPU_INC(cnt.v_trap); td = curthread; p = td->td_proc; type = ucode = frame->exc; sig = 0; user = frame->srr1 & PSL_PR; CTR3(KTR_TRAP, "trap: %s type=%s (%s)", td->td_name, trapname(type), user ? "user" : "kernel"); #ifdef KDTRACE_HOOKS /* * A trap can occur while DTrace executes a probe. Before * executing the probe, DTrace blocks re-scheduling and sets * a flag in its per-cpu flags to indicate that it doesn't * want to fault. On returning from the probe, the no-fault * flag is cleared and finally re-scheduling is enabled. * * If the DTrace kernel module has registered a trap handler, * call it and if it returns non-zero, assume that it has * handled the trap and modified the trap frame so that this * function can return normally. */ if (dtrace_trap_func != NULL && (*dtrace_trap_func)(frame, type) != 0) return; #endif if (user) { td->td_pticks = 0; td->td_frame = frame; if (td->td_cowgen != p->p_cowgen) thread_cow_update(td); /* User Mode Traps */ switch (type) { case EXC_RUNMODETRC: case EXC_TRC: frame->srr1 &= ~PSL_SE; sig = SIGTRAP; ucode = TRAP_TRACE; break; #ifdef __powerpc64__ case EXC_ISE: case EXC_DSE: if (handle_user_slb_spill(&p->p_vmspace->vm_pmap, (type == EXC_ISE) ? frame->srr0 : frame->dar) != 0){ sig = SIGSEGV; ucode = SEGV_MAPERR; } break; #endif case EXC_DSI: case EXC_ISI: sig = trap_pfault(frame, 1); if (sig == SIGSEGV) ucode = SEGV_MAPERR; break; case EXC_SC: syscall(frame); break; case EXC_FPU: KASSERT((td->td_pcb->pcb_flags & PCB_FPU) != PCB_FPU, ("FPU already enabled for thread")); enable_fpu(td); break; case EXC_VEC: KASSERT((td->td_pcb->pcb_flags & PCB_VEC) != PCB_VEC, ("Altivec already enabled for thread")); enable_vec(td); break; case EXC_VSX: KASSERT((td->td_pcb->pcb_flags & PCB_VSX) != PCB_VSX, ("VSX already enabled for thread")); if (!(td->td_pcb->pcb_flags & PCB_VEC)) enable_vec(td); if (!(td->td_pcb->pcb_flags & PCB_FPU)) save_fpu(td); td->td_pcb->pcb_flags |= PCB_VSX; enable_fpu(td); break; case EXC_VECAST_E: case EXC_VECAST_G4: case EXC_VECAST_G5: /* * We get a VPU assist exception for IEEE mode * vector operations on denormalized floats. * Emulating this is a giant pain, so for now, * just switch off IEEE mode and treat them as * zero. */ save_vec(td); td->td_pcb->pcb_vec.vscr |= ALTIVEC_VSCR_NJ; enable_vec(td); break; case EXC_ALI: if (fix_unaligned(td, frame) != 0) { sig = SIGBUS; ucode = BUS_ADRALN; } else frame->srr0 += 4; break; case EXC_DEBUG: /* Single stepping */ mtspr(SPR_DBSR, mfspr(SPR_DBSR)); frame->srr1 &= ~PSL_DE; frame->cpu.booke.dbcr0 &= ~(DBCR0_IDM | DBCR0_IC); sig = SIGTRAP; ucode = TRAP_TRACE; break; case EXC_PGM: /* Identify the trap reason */ #ifdef AIM if (frame->srr1 & EXC_PGM_TRAP) { #else if (frame->cpu.booke.esr & ESR_PTR) { #endif #ifdef KDTRACE_HOOKS inst = fuword32((const void *)frame->srr0); if (inst == 0x0FFFDDDD && dtrace_pid_probe_ptr != NULL) { - struct reg regs; - fill_regs(td, ®s); - (*dtrace_pid_probe_ptr)(®s); + (*dtrace_pid_probe_ptr)(frame); break; } #endif sig = SIGTRAP; ucode = TRAP_BRKPT; } else { sig = ppc_instr_emulate(frame, td->td_pcb); if (sig == SIGILL) { if (frame->srr1 & EXC_PGM_PRIV) ucode = ILL_PRVOPC; else if (frame->srr1 & EXC_PGM_ILLEGAL) ucode = ILL_ILLOPC; } else if (sig == SIGFPE) ucode = FPE_FLTINV; /* Punt for now, invalid operation. */ } break; case EXC_MCHK: /* * Note that this may not be recoverable for the user * process, depending on the type of machine check, * but it at least prevents the kernel from dying. */ sig = SIGBUS; ucode = BUS_OBJERR; break; default: trap_fatal(frame); } } else { /* Kernel Mode Traps */ KASSERT(cold || td->td_ucred != NULL, ("kernel trap doesn't have ucred")); switch (type) { #ifdef KDTRACE_HOOKS case EXC_PGM: if (frame->srr1 & EXC_PGM_TRAP) { if (*(uint32_t *)frame->srr0 == EXC_DTRACE) { if (dtrace_invop_jump_addr != NULL) { dtrace_invop_jump_addr(frame); return; } } } break; #endif #ifdef __powerpc64__ case EXC_DSE: if ((frame->dar & SEGMENT_MASK) == USER_ADDR) { __asm __volatile ("slbmte %0, %1" :: "r"(td->td_pcb->pcb_cpu.aim.usr_vsid), "r"(USER_SLB_SLBE)); return; } break; #endif case EXC_DSI: if (trap_pfault(frame, 0) == 0) return; break; case EXC_MCHK: if (handle_onfault(frame)) return; break; default: break; } trap_fatal(frame); } if (sig != 0) { if (p->p_sysent->sv_transtrap != NULL) sig = (p->p_sysent->sv_transtrap)(sig, type); ksiginfo_init_trap(&ksi); ksi.ksi_signo = sig; ksi.ksi_code = (int) ucode; /* XXX, not POSIX */ /* ksi.ksi_addr = ? */ ksi.ksi_trapno = type; trapsignal(td, &ksi); } userret(td, frame); } static void trap_fatal(struct trapframe *frame) { printtrap(frame->exc, frame, 1, (frame->srr1 & PSL_PR)); #ifdef KDB if ((debugger_on_panic || kdb_active) && kdb_trap(frame->exc, 0, frame)) return; #endif panic("%s trap", trapname(frame->exc)); } static void printtrap(u_int vector, struct trapframe *frame, int isfatal, int user) { uint16_t ver; #ifdef BOOKE vm_paddr_t pa; #endif printf("\n"); printf("%s %s trap:\n", isfatal ? "fatal" : "handled", user ? "user" : "kernel"); printf("\n"); printf(" exception = 0x%x (%s)\n", vector, trapname(vector)); switch (vector) { case EXC_DSE: case EXC_DSI: case EXC_DTMISS: printf(" virtual address = 0x%" PRIxPTR "\n", frame->dar); #ifdef AIM printf(" dsisr = 0x%lx\n", (u_long)frame->cpu.aim.dsisr); #endif break; case EXC_ISE: case EXC_ISI: case EXC_ITMISS: printf(" virtual address = 0x%" PRIxPTR "\n", frame->srr0); break; case EXC_MCHK: ver = mfpvr() >> 16; #if defined(AIM) if (MPC745X_P(ver)) printf(" msssr0 = 0x%lx\n", (u_long)mfspr(SPR_MSSSR0)); #elif defined(BOOKE) pa = mfspr(SPR_MCARU); pa = (pa << 32) | (u_register_t)mfspr(SPR_MCAR); printf(" mcsr = 0x%lx\n", (u_long)mfspr(SPR_MCSR)); printf(" mcar = 0x%jx\n", (uintmax_t)pa); #endif break; } #ifdef BOOKE printf(" esr = 0x%" PRIxPTR "\n", frame->cpu.booke.esr); #endif printf(" srr0 = 0x%" PRIxPTR "\n", frame->srr0); printf(" srr1 = 0x%lx\n", (u_long)frame->srr1); printf(" lr = 0x%" PRIxPTR "\n", frame->lr); printf(" curthread = %p\n", curthread); if (curthread != NULL) printf(" pid = %d, comm = %s\n", curthread->td_proc->p_pid, curthread->td_name); printf("\n"); } /* * Handles a fatal fault when we have onfault state to recover. Returns * non-zero if there was onfault recovery state available. */ static int handle_onfault(struct trapframe *frame) { struct thread *td; jmp_buf *fb; td = curthread; fb = td->td_pcb->pcb_onfault; if (fb != NULL) { frame->srr0 = (*fb)->_jb[FAULTBUF_LR]; frame->fixreg[1] = (*fb)->_jb[FAULTBUF_R1]; frame->fixreg[2] = (*fb)->_jb[FAULTBUF_R2]; frame->fixreg[3] = 1; frame->cr = (*fb)->_jb[FAULTBUF_CR]; bcopy(&(*fb)->_jb[FAULTBUF_R14], &frame->fixreg[14], 18 * sizeof(register_t)); td->td_pcb->pcb_onfault = NULL; /* Returns twice, not thrice */ return (1); } return (0); } int cpu_fetch_syscall_args(struct thread *td) { struct proc *p; struct trapframe *frame; struct syscall_args *sa; caddr_t params; size_t argsz; int error, n, i; p = td->td_proc; frame = td->td_frame; sa = &td->td_sa; sa->code = frame->fixreg[0]; params = (caddr_t)(frame->fixreg + FIRSTARG); n = NARGREG; if (sa->code == SYS_syscall) { /* * code is first argument, * followed by actual args. */ sa->code = *(register_t *) params; params += sizeof(register_t); n -= 1; } else if (sa->code == SYS___syscall) { /* * Like syscall, but code is a quad, * so as to maintain quad alignment * for the rest of the args. */ if (SV_PROC_FLAG(p, SV_ILP32)) { params += sizeof(register_t); sa->code = *(register_t *) params; params += sizeof(register_t); n -= 2; } else { sa->code = *(register_t *) params; params += sizeof(register_t); n -= 1; } } if (p->p_sysent->sv_mask) sa->code &= p->p_sysent->sv_mask; if (sa->code >= p->p_sysent->sv_size) sa->callp = &p->p_sysent->sv_table[0]; else sa->callp = &p->p_sysent->sv_table[sa->code]; sa->narg = sa->callp->sy_narg; if (SV_PROC_FLAG(p, SV_ILP32)) { argsz = sizeof(uint32_t); for (i = 0; i < n; i++) sa->args[i] = ((u_register_t *)(params))[i] & 0xffffffff; } else { argsz = sizeof(uint64_t); for (i = 0; i < n; i++) sa->args[i] = ((u_register_t *)(params))[i]; } if (sa->narg > n) error = copyin(MOREARGS(frame->fixreg[1]), sa->args + n, (sa->narg - n) * argsz); else error = 0; #ifdef __powerpc64__ if (SV_PROC_FLAG(p, SV_ILP32) && sa->narg > n) { /* Expand the size of arguments copied from the stack */ for (i = sa->narg; i >= n; i--) sa->args[i] = ((uint32_t *)(&sa->args[n]))[i-n]; } #endif if (error == 0) { td->td_retval[0] = 0; td->td_retval[1] = frame->fixreg[FIRSTARG + 1]; } return (error); } #include "../../kern/subr_syscall.c" void syscall(struct trapframe *frame) { struct thread *td; int error; td = curthread; td->td_frame = frame; #ifdef __powerpc64__ /* * Speculatively restore last user SLB segment, which we know is * invalid already, since we are likely to do copyin()/copyout(). */ __asm __volatile ("slbmte %0, %1; isync" :: "r"(td->td_pcb->pcb_cpu.aim.usr_vsid), "r"(USER_SLB_SLBE)); #endif error = syscallenter(td); syscallret(td, error); } #ifdef __powerpc64__ /* Handle kernel SLB faults -- runs in real mode, all seat belts off */ void handle_kernel_slb_spill(int type, register_t dar, register_t srr0) { struct slb *slbcache; uint64_t slbe, slbv; uint64_t esid, addr; int i; addr = (type == EXC_ISE) ? srr0 : dar; slbcache = PCPU_GET(slb); esid = (uintptr_t)addr >> ADDR_SR_SHFT; slbe = (esid << SLBE_ESID_SHIFT) | SLBE_VALID; /* See if the hardware flushed this somehow (can happen in LPARs) */ for (i = 0; i < n_slbs; i++) if (slbcache[i].slbe == (slbe | (uint64_t)i)) return; /* Not in the map, needs to actually be added */ slbv = kernel_va_to_slbv(addr); if (slbcache[USER_SLB_SLOT].slbe == 0) { for (i = 0; i < n_slbs; i++) { if (i == USER_SLB_SLOT) continue; if (!(slbcache[i].slbe & SLBE_VALID)) goto fillkernslb; } if (i == n_slbs) slbcache[USER_SLB_SLOT].slbe = 1; } /* Sacrifice a random SLB entry that is not the user entry */ i = mftb() % n_slbs; if (i == USER_SLB_SLOT) i = (i+1) % n_slbs; fillkernslb: /* Write new entry */ slbcache[i].slbv = slbv; slbcache[i].slbe = slbe | (uint64_t)i; /* Trap handler will restore from cache on exit */ } static int handle_user_slb_spill(pmap_t pm, vm_offset_t addr) { struct slb *user_entry; uint64_t esid; int i; esid = (uintptr_t)addr >> ADDR_SR_SHFT; PMAP_LOCK(pm); user_entry = user_va_to_slb_entry(pm, addr); if (user_entry == NULL) { /* allocate_vsid auto-spills it */ (void)allocate_user_vsid(pm, esid, 0); } else { /* * Check that another CPU has not already mapped this. * XXX: Per-thread SLB caches would be better. */ for (i = 0; i < pm->pm_slb_len; i++) if (pm->pm_slb[i] == user_entry) break; if (i == pm->pm_slb_len) slb_insert_user(pm, user_entry); } PMAP_UNLOCK(pm); return (0); } #endif static int trap_pfault(struct trapframe *frame, int user) { vm_offset_t eva, va; struct thread *td; struct proc *p; vm_map_t map; vm_prot_t ftype; int rv; #ifdef AIM register_t user_sr; #endif td = curthread; p = td->td_proc; if (frame->exc == EXC_ISI) { eva = frame->srr0; ftype = VM_PROT_EXECUTE; if (frame->srr1 & SRR1_ISI_PFAULT) ftype |= VM_PROT_READ; } else { eva = frame->dar; #ifdef BOOKE if (frame->cpu.booke.esr & ESR_ST) #else if (frame->cpu.aim.dsisr & DSISR_STORE) #endif ftype = VM_PROT_WRITE; else ftype = VM_PROT_READ; } if (user) { KASSERT(p->p_vmspace != NULL, ("trap_pfault: vmspace NULL")); map = &p->p_vmspace->vm_map; } else { #ifdef BOOKE if (eva < VM_MAXUSER_ADDRESS) { #else if ((eva >> ADDR_SR_SHFT) == (USER_ADDR >> ADDR_SR_SHFT)) { #endif map = &p->p_vmspace->vm_map; #ifdef AIM user_sr = td->td_pcb->pcb_cpu.aim.usr_segm; eva &= ADDR_PIDX | ADDR_POFF; eva |= user_sr << ADDR_SR_SHFT; #endif } else { map = kernel_map; } } va = trunc_page(eva); /* Fault in the page. */ rv = vm_fault(map, va, ftype, VM_FAULT_NORMAL); /* * XXXDTRACE: add dtrace_doubletrap_func here? */ if (rv == KERN_SUCCESS) return (0); if (!user && handle_onfault(frame)) return (0); return (SIGSEGV); } /* * For now, this only deals with the particular unaligned access case * that gcc tends to generate. Eventually it should handle all of the * possibilities that can happen on a 32-bit PowerPC in big-endian mode. */ static int fix_unaligned(struct thread *td, struct trapframe *frame) { struct thread *fputhread; int indicator, reg; double *fpr; indicator = EXC_ALI_OPCODE_INDICATOR(frame->cpu.aim.dsisr); switch (indicator) { case EXC_ALI_LFD: case EXC_ALI_STFD: reg = EXC_ALI_RST(frame->cpu.aim.dsisr); fpr = &td->td_pcb->pcb_fpu.fpr[reg].fpr; fputhread = PCPU_GET(fputhread); /* Juggle the FPU to ensure that we've initialized * the FPRs, and that their current state is in * the PCB. */ if (fputhread != td) { if (fputhread) save_fpu(fputhread); enable_fpu(td); } save_fpu(td); if (indicator == EXC_ALI_LFD) { if (copyin((void *)frame->dar, fpr, sizeof(double)) != 0) return (-1); enable_fpu(td); } else { if (copyout(fpr, (void *)frame->dar, sizeof(double)) != 0) return (-1); } return (0); break; } return (-1); } #ifdef KDB int db_trap_glue(struct trapframe *); /* Called from trap_subr.S */ int db_trap_glue(struct trapframe *frame) { if (!(frame->srr1 & PSL_PR) && (frame->exc == EXC_TRC || frame->exc == EXC_RUNMODETRC #ifdef AIM || (frame->exc == EXC_PGM && (frame->srr1 & EXC_PGM_TRAP)) #else || (frame->exc == EXC_DEBUG) #endif || frame->exc == EXC_BPT || frame->exc == EXC_DSI)) { int type = frame->exc; /* Ignore DTrace traps. */ if (*(uint32_t *)frame->srr0 == EXC_DTRACE) return (0); #ifdef AIM if (type == EXC_PGM && (frame->srr1 & EXC_PGM_TRAP)) { #else if (frame->cpu.booke.esr & ESR_PTR) { #endif type = T_BREAKPOINT; } return (kdb_trap(type, 0, frame)); } return (0); } #endif Index: stable/11/sys/sys/dtrace_bsd.h =================================================================== --- stable/11/sys/sys/dtrace_bsd.h (revision 327550) +++ stable/11/sys/sys/dtrace_bsd.h (revision 327551) @@ -1,174 +1,173 @@ /*- * Copyright (c) 2007-2008 John Birrell (jb@freebsd.org) * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ * * This file contains BSD shims for Sun's DTrace code. */ #ifndef _SYS_DTRACE_BSD_H #define _SYS_DTRACE_BSD_H /* Forward definitions: */ struct mbuf; struct trapframe; struct thread; struct vattr; struct vnode; -struct reg; int dtrace_trap(struct trapframe *, u_int); /* * The dtrace module handles traps that occur during a DTrace probe. * This type definition is used in the trap handler to provide a * hook for the dtrace module to register its handler with. */ typedef int (*dtrace_trap_func_t)(struct trapframe *, u_int); extern dtrace_trap_func_t dtrace_trap_func; /* * A hook which removes active FBT probes before executing the double fault * handler. We want to ensure that DTrace doesn't trigger another trap, which * would result in a reset. */ typedef void (*dtrace_doubletrap_func_t)(void); extern dtrace_doubletrap_func_t dtrace_doubletrap_func; /* Pid provider hooks */ -typedef int (*dtrace_pid_probe_ptr_t)(struct reg *); +typedef int (*dtrace_pid_probe_ptr_t)(struct trapframe *); extern dtrace_pid_probe_ptr_t dtrace_pid_probe_ptr; -typedef int (*dtrace_return_probe_ptr_t)(struct reg *); +typedef int (*dtrace_return_probe_ptr_t)(struct trapframe *); extern dtrace_return_probe_ptr_t dtrace_return_probe_ptr; /* Virtual time hook function type. */ typedef void (*dtrace_vtime_switch_func_t)(struct thread *); extern int dtrace_vtime_active; extern dtrace_vtime_switch_func_t dtrace_vtime_switch_func; /* The fasttrap module hooks into the fork, exit and exit. */ typedef void (*dtrace_fork_func_t)(struct proc *, struct proc *); typedef void (*dtrace_execexit_func_t)(struct proc *); /* Global variable in kern_fork.c */ extern dtrace_fork_func_t dtrace_fasttrap_fork; /* Global variable in kern_exec.c */ extern dtrace_execexit_func_t dtrace_fasttrap_exec; /* Global variable in kern_exit.c */ extern dtrace_execexit_func_t dtrace_fasttrap_exit; /* The dtmalloc provider hooks into malloc. */ typedef void (*dtrace_malloc_probe_func_t)(u_int32_t, uintptr_t arg0, uintptr_t arg1, uintptr_t arg2, uintptr_t arg3, uintptr_t arg4); extern dtrace_malloc_probe_func_t dtrace_malloc_probe; /* dtnfsclient NFSv[34] access cache provider hooks. */ typedef void (*dtrace_nfsclient_accesscache_flush_probe_func_t)(uint32_t, struct vnode *); extern dtrace_nfsclient_accesscache_flush_probe_func_t dtrace_nfsclient_accesscache_flush_done_probe; extern dtrace_nfsclient_accesscache_flush_probe_func_t dtrace_nfscl_accesscache_flush_done_probe; typedef void (*dtrace_nfsclient_accesscache_get_probe_func_t)(uint32_t, struct vnode *, uid_t, uint32_t); extern dtrace_nfsclient_accesscache_get_probe_func_t dtrace_nfsclient_accesscache_get_hit_probe, dtrace_nfsclient_accesscache_get_miss_probe; extern dtrace_nfsclient_accesscache_get_probe_func_t dtrace_nfscl_accesscache_get_hit_probe, dtrace_nfscl_accesscache_get_miss_probe; typedef void (*dtrace_nfsclient_accesscache_load_probe_func_t)(uint32_t, struct vnode *, uid_t, uint32_t, int); extern dtrace_nfsclient_accesscache_load_probe_func_t dtrace_nfsclient_accesscache_load_done_probe; extern dtrace_nfsclient_accesscache_load_probe_func_t dtrace_nfscl_accesscache_load_done_probe; /* dtnfsclient NFSv[234] attribute cache provider hooks. */ typedef void (*dtrace_nfsclient_attrcache_flush_probe_func_t)(uint32_t, struct vnode *); extern dtrace_nfsclient_attrcache_flush_probe_func_t dtrace_nfsclient_attrcache_flush_done_probe; extern dtrace_nfsclient_attrcache_flush_probe_func_t dtrace_nfscl_attrcache_flush_done_probe; typedef void (*dtrace_nfsclient_attrcache_get_hit_probe_func_t)(uint32_t, struct vnode *, struct vattr *); extern dtrace_nfsclient_attrcache_get_hit_probe_func_t dtrace_nfsclient_attrcache_get_hit_probe; extern dtrace_nfsclient_attrcache_get_hit_probe_func_t dtrace_nfscl_attrcache_get_hit_probe; typedef void (*dtrace_nfsclient_attrcache_get_miss_probe_func_t)(uint32_t, struct vnode *); extern dtrace_nfsclient_attrcache_get_miss_probe_func_t dtrace_nfsclient_attrcache_get_miss_probe; extern dtrace_nfsclient_attrcache_get_miss_probe_func_t dtrace_nfscl_attrcache_get_miss_probe; typedef void (*dtrace_nfsclient_attrcache_load_probe_func_t)(uint32_t, struct vnode *, struct vattr *, int); extern dtrace_nfsclient_attrcache_load_probe_func_t dtrace_nfsclient_attrcache_load_done_probe; extern dtrace_nfsclient_attrcache_load_probe_func_t dtrace_nfscl_attrcache_load_done_probe; /* dtnfsclient NFSv[234] RPC provider hooks. */ typedef void (*dtrace_nfsclient_nfs23_start_probe_func_t)(uint32_t, struct vnode *, struct mbuf *, struct ucred *, int); extern dtrace_nfsclient_nfs23_start_probe_func_t dtrace_nfsclient_nfs23_start_probe; extern dtrace_nfsclient_nfs23_start_probe_func_t dtrace_nfscl_nfs234_start_probe; typedef void (*dtrace_nfsclient_nfs23_done_probe_func_t)(uint32_t, struct vnode *, struct mbuf *, struct ucred *, int, int); extern dtrace_nfsclient_nfs23_done_probe_func_t dtrace_nfsclient_nfs23_done_probe; extern dtrace_nfsclient_nfs23_done_probe_func_t dtrace_nfscl_nfs234_done_probe; /* * Functions which allow the dtrace module to check that the kernel * hooks have been compiled with sufficient space for it's private * structures. */ size_t kdtrace_proc_size(void); size_t kdtrace_thread_size(void); /* * OpenSolaris compatible time functions returning nanoseconds. * On OpenSolaris these return hrtime_t which we define as uint64_t. */ uint64_t dtrace_gethrtime(void); uint64_t dtrace_gethrestime(void); #endif /* _SYS_DTRACE_BSD_H */ Index: stable/11 =================================================================== --- stable/11 (revision 327550) +++ stable/11 (revision 327551) Property changes on: stable/11 ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head:r326774,326811