Index: head/sys/amd64/amd64/trap.c =================================================================== --- head/sys/amd64/amd64/trap.c (revision 305806) +++ head/sys/amd64/amd64/trap.c (revision 305807) @@ -1,979 +1,979 @@ /*- * Copyright (C) 1994, David Greenman * Copyright (c) 1990, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * the University of Utah, and William Jolitz. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)trap.c 7.4 (Berkeley) 5/13/91 */ #include __FBSDID("$FreeBSD$"); /* * AMD64 Trap and System call handling */ #include "opt_clock.h" #include "opt_cpu.h" #include "opt_hwpmc_hooks.h" #include "opt_isa.h" #include "opt_kdb.h" #include "opt_stack.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef HWPMC_HOOKS #include PMC_SOFT_DEFINE( , , page_fault, all); PMC_SOFT_DEFINE( , , page_fault, read); PMC_SOFT_DEFINE( , , page_fault, write); #endif #include #include #include #include #include #include #include #include #include #include #include #include #ifdef SMP #include #endif #include #include #ifdef KDTRACE_HOOKS #include #endif extern void __noinline trap(struct trapframe *frame); extern void trap_check(struct trapframe *frame); extern void syscall(struct trapframe *frame); void dblfault_handler(struct trapframe *frame); static int trap_pfault(struct trapframe *, int); static void trap_fatal(struct trapframe *, vm_offset_t); #define MAX_TRAP_MSG 32 static char *trap_msg[] = { "", /* 0 unused */ "privileged instruction fault", /* 1 T_PRIVINFLT */ "", /* 2 unused */ "breakpoint instruction fault", /* 3 T_BPTFLT */ "", /* 4 unused */ "", /* 5 unused */ "arithmetic trap", /* 6 T_ARITHTRAP */ "", /* 7 unused */ "", /* 8 unused */ "general protection fault", /* 9 T_PROTFLT */ "trace trap", /* 10 T_TRCTRAP */ "", /* 11 unused */ "page fault", /* 12 T_PAGEFLT */ "", /* 13 unused */ "alignment fault", /* 14 T_ALIGNFLT */ "", /* 15 unused */ "", /* 16 unused */ "", /* 17 unused */ "integer divide fault", /* 18 T_DIVIDE */ "non-maskable interrupt trap", /* 19 T_NMI */ "overflow trap", /* 20 T_OFLOW */ "FPU bounds check fault", /* 21 T_BOUND */ "FPU device not available", /* 22 T_DNA */ "double fault", /* 23 T_DOUBLEFLT */ "FPU operand fetch fault", /* 24 T_FPOPFLT */ "invalid TSS fault", /* 25 T_TSSFLT */ "segment not present fault", /* 26 T_SEGNPFLT */ "stack fault", /* 27 T_STKFLT */ "machine check trap", /* 28 T_MCHK */ "SIMD floating-point exception", /* 29 T_XMMFLT */ "reserved (unknown) fault", /* 30 T_RESERVED */ "", /* 31 unused (reserved) */ "DTrace pid return trap", /* 32 T_DTRACE_RET */ }; #ifdef KDB static int kdb_on_nmi = 1; SYSCTL_INT(_machdep, OID_AUTO, kdb_on_nmi, CTLFLAG_RWTUN, &kdb_on_nmi, 0, "Go to KDB on NMI"); #endif static int panic_on_nmi = 1; SYSCTL_INT(_machdep, OID_AUTO, panic_on_nmi, CTLFLAG_RWTUN, &panic_on_nmi, 0, "Panic on NMI"); static int prot_fault_translation; SYSCTL_INT(_machdep, OID_AUTO, prot_fault_translation, CTLFLAG_RWTUN, &prot_fault_translation, 0, "Select signal to deliver on protection fault"); static int uprintf_signal; SYSCTL_INT(_machdep, OID_AUTO, uprintf_signal, CTLFLAG_RWTUN, &uprintf_signal, 0, "Print debugging information on trap signal to ctty"); /* * Exception, fault, and trap interface to the FreeBSD kernel. * This common code is called from assembly language IDT gate entry * routines that prepare a suitable stack frame, and restore this * frame after the exception has been processed. */ void trap(struct trapframe *frame) { #ifdef KDTRACE_HOOKS struct reg regs; #endif struct thread *td = curthread; struct proc *p = td->td_proc; int i = 0, ucode = 0, code; u_int type; register_t addr = 0; ksiginfo_t ksi; PCPU_INC(cnt.v_trap); type = frame->tf_trapno; #ifdef SMP /* Handler for NMI IPIs used for stopping CPUs. */ if (type == T_NMI) { if (ipi_nmi_handler() == 0) goto out; } #endif /* SMP */ #ifdef KDB if (kdb_active) { kdb_reenter(); goto out; } #endif if (type == T_RESERVED) { trap_fatal(frame, 0); goto out; } if (type == T_NMI) { #ifdef HWPMC_HOOKS /* * CPU PMCs interrupt using an NMI. If the PMC module is * active, pass the 'rip' value to the PMC module's interrupt * handler. A non-zero return value from the handler means that * the NMI was consumed by it and we can return immediately. */ if (pmc_intr != NULL && (*pmc_intr)(PCPU_GET(cpuid), frame) != 0) goto out; #endif #ifdef STACK if (stack_nmi_handler(frame) != 0) goto out; #endif } if (type == T_MCHK) { mca_intr(); goto out; } if ((frame->tf_rflags & PSL_I) == 0) { /* * Buggy application or kernel code has disabled * interrupts and then trapped. Enabling interrupts * now is wrong, but it is better than running with * interrupts disabled until they are accidentally * enabled later. */ - if (ISPL(frame->tf_cs) == SEL_UPL) + if (TRAPF_USERMODE(frame)) uprintf( "pid %ld (%s): trap %d with interrupts disabled\n", (long)curproc->p_pid, curthread->td_name, type); else if (type != T_NMI && type != T_BPTFLT && type != T_TRCTRAP) { /* * XXX not quite right, since this may be for a * multiple fault in user mode. */ printf("kernel trap %d with interrupts disabled\n", type); /* * We shouldn't enable interrupts while holding a * spin lock. */ if (td->td_md.md_spinlock_count == 0) enable_intr(); } } code = frame->tf_err; - if (ISPL(frame->tf_cs) == SEL_UPL) { + if (TRAPF_USERMODE(frame)) { /* user trap */ td->td_pticks = 0; td->td_frame = frame; addr = frame->tf_rip; if (td->td_cowgen != p->p_cowgen) thread_cow_update(td); switch (type) { case T_PRIVINFLT: /* privileged instruction fault */ i = SIGILL; ucode = ILL_PRVOPC; break; case T_BPTFLT: /* bpt instruction fault */ case T_TRCTRAP: /* trace trap */ enable_intr(); #ifdef KDTRACE_HOOKS if (type == T_BPTFLT) { fill_frame_regs(frame, ®s); if (dtrace_pid_probe_ptr != NULL && dtrace_pid_probe_ptr(®s) == 0) goto out; } #endif frame->tf_rflags &= ~PSL_T; i = SIGTRAP; ucode = (type == T_TRCTRAP ? TRAP_TRACE : TRAP_BRKPT); break; case T_ARITHTRAP: /* arithmetic trap */ ucode = fputrap_x87(); if (ucode == -1) goto userout; i = SIGFPE; break; case T_PROTFLT: /* general protection fault */ i = SIGBUS; ucode = BUS_OBJERR; break; case T_STKFLT: /* stack fault */ case T_SEGNPFLT: /* segment not present fault */ i = SIGBUS; ucode = BUS_ADRERR; break; case T_TSSFLT: /* invalid TSS fault */ i = SIGBUS; ucode = BUS_OBJERR; break; case T_ALIGNFLT: i = SIGBUS; ucode = BUS_ADRALN; break; case T_DOUBLEFLT: /* double fault */ default: i = SIGBUS; ucode = BUS_OBJERR; break; case T_PAGEFLT: /* page fault */ /* * Emulator can take care about this trap? */ if (*p->p_sysent->sv_trap != NULL && (*p->p_sysent->sv_trap)(td) == 0) goto userout; addr = frame->tf_addr; i = trap_pfault(frame, TRUE); if (i == -1) goto userout; if (i == 0) goto user; if (i == SIGSEGV) ucode = SEGV_MAPERR; else { if (prot_fault_translation == 0) { /* * Autodetect. * This check also covers the images * without the ABI-tag ELF note. */ if (SV_CURPROC_ABI() == SV_ABI_FREEBSD && p->p_osrel >= P_OSREL_SIGSEGV) { i = SIGSEGV; ucode = SEGV_ACCERR; } else { i = SIGBUS; ucode = BUS_PAGE_FAULT; } } else if (prot_fault_translation == 1) { /* * Always compat mode. */ i = SIGBUS; ucode = BUS_PAGE_FAULT; } else { /* * Always SIGSEGV mode. */ i = SIGSEGV; ucode = SEGV_ACCERR; } } break; case T_DIVIDE: /* integer divide fault */ ucode = FPE_INTDIV; i = SIGFPE; break; #ifdef DEV_ISA case T_NMI: /* machine/parity/power fail/"kitchen sink" faults */ if (isa_nmi(code) == 0) { #ifdef KDB /* * NMI can be hooked up to a pushbutton * for debugging. */ if (kdb_on_nmi) { printf ("NMI ... going to debugger\n"); kdb_trap(type, 0, frame); } #endif /* KDB */ goto userout; } else if (panic_on_nmi) panic("NMI indicates hardware failure"); break; #endif /* DEV_ISA */ case T_OFLOW: /* integer overflow fault */ ucode = FPE_INTOVF; i = SIGFPE; break; case T_BOUND: /* bounds check fault */ ucode = FPE_FLTSUB; i = SIGFPE; break; case T_DNA: /* transparent fault (due to context switch "late") */ KASSERT(PCB_USER_FPU(td->td_pcb), ("kernel FPU ctx has leaked")); fpudna(); goto userout; case T_FPOPFLT: /* FPU operand fetch fault */ ucode = ILL_COPROC; i = SIGILL; break; case T_XMMFLT: /* SIMD floating-point exception */ ucode = fputrap_sse(); if (ucode == -1) goto userout; i = SIGFPE; break; #ifdef KDTRACE_HOOKS case T_DTRACE_RET: enable_intr(); fill_frame_regs(frame, ®s); if (dtrace_return_probe_ptr != NULL && dtrace_return_probe_ptr(®s) == 0) goto out; break; #endif } } else { /* kernel trap */ KASSERT(cold || td->td_ucred != NULL, ("kernel trap doesn't have ucred")); switch (type) { case T_PAGEFLT: /* page fault */ (void) trap_pfault(frame, FALSE); goto out; case T_DNA: if (PCB_USER_FPU(td->td_pcb)) panic("Unregistered use of FPU in kernel"); fpudna(); goto out; case T_ARITHTRAP: /* arithmetic trap */ case T_XMMFLT: /* SIMD floating-point exception */ case T_FPOPFLT: /* FPU operand fetch fault */ /* * For now, supporting kernel handler * registration for FPU traps is overkill. */ trap_fatal(frame, 0); goto out; case T_STKFLT: /* stack fault */ case T_PROTFLT: /* general protection fault */ case T_SEGNPFLT: /* segment not present fault */ if (td->td_intr_nesting_level != 0) break; /* * Invalid segment selectors and out of bounds * %rip's and %rsp's can be set up in user mode. * This causes a fault in kernel mode when the * kernel tries to return to user mode. We want * to get this fault so that we can fix the * problem here and not have to check all the * selectors and pointers when the user changes * them. */ if (frame->tf_rip == (long)doreti_iret) { frame->tf_rip = (long)doreti_iret_fault; goto out; } if (frame->tf_rip == (long)ld_ds) { frame->tf_rip = (long)ds_load_fault; goto out; } if (frame->tf_rip == (long)ld_es) { frame->tf_rip = (long)es_load_fault; goto out; } if (frame->tf_rip == (long)ld_fs) { frame->tf_rip = (long)fs_load_fault; goto out; } if (frame->tf_rip == (long)ld_gs) { frame->tf_rip = (long)gs_load_fault; goto out; } if (frame->tf_rip == (long)ld_gsbase) { frame->tf_rip = (long)gsbase_load_fault; goto out; } if (frame->tf_rip == (long)ld_fsbase) { frame->tf_rip = (long)fsbase_load_fault; goto out; } if (curpcb->pcb_onfault != NULL) { frame->tf_rip = (long)curpcb->pcb_onfault; goto out; } break; case T_TSSFLT: /* * PSL_NT can be set in user mode and isn't cleared * automatically when the kernel is entered. This * causes a TSS fault when the kernel attempts to * `iret' because the TSS link is uninitialized. We * want to get this fault so that we can fix the * problem here and not every time the kernel is * entered. */ if (frame->tf_rflags & PSL_NT) { frame->tf_rflags &= ~PSL_NT; goto out; } break; case T_TRCTRAP: /* trace trap */ /* * Ignore debug register trace traps due to * accesses in the user's address space, which * can happen under several conditions such as * if a user sets a watchpoint on a buffer and * then passes that buffer to a system call. * We still want to get TRCTRAPS for addresses * in kernel space because that is useful when * debugging the kernel. */ if (user_dbreg_trap()) { /* * Reset breakpoint bits because the * processor doesn't */ /* XXX check upper bits here */ load_dr6(rdr6() & 0xfffffff0); goto out; } /* * FALLTHROUGH (TRCTRAP kernel mode, kernel address) */ case T_BPTFLT: /* * If KDB is enabled, let it handle the debugger trap. * Otherwise, debugger traps "can't happen". */ #ifdef KDB if (kdb_trap(type, 0, frame)) goto out; #endif break; #ifdef DEV_ISA case T_NMI: /* machine/parity/power fail/"kitchen sink" faults */ if (isa_nmi(code) == 0) { #ifdef KDB /* * NMI can be hooked up to a pushbutton * for debugging. */ if (kdb_on_nmi) { printf ("NMI ... going to debugger\n"); kdb_trap(type, 0, frame); } #endif /* KDB */ goto out; } else if (panic_on_nmi == 0) goto out; /* FALLTHROUGH */ #endif /* DEV_ISA */ } trap_fatal(frame, 0); goto out; } /* Translate fault for emulators (e.g. Linux) */ if (*p->p_sysent->sv_transtrap) i = (*p->p_sysent->sv_transtrap)(i, type); ksiginfo_init_trap(&ksi); ksi.ksi_signo = i; ksi.ksi_code = ucode; ksi.ksi_trapno = type; ksi.ksi_addr = (void *)addr; if (uprintf_signal) { uprintf("pid %d comm %s: signal %d err %lx code %d type %d " "addr 0x%lx rsp 0x%lx rip 0x%lx " "<%02x %02x %02x %02x %02x %02x %02x %02x>\n", p->p_pid, p->p_comm, i, frame->tf_err, ucode, type, addr, frame->tf_rsp, frame->tf_rip, fubyte((void *)(frame->tf_rip + 0)), fubyte((void *)(frame->tf_rip + 1)), fubyte((void *)(frame->tf_rip + 2)), fubyte((void *)(frame->tf_rip + 3)), fubyte((void *)(frame->tf_rip + 4)), fubyte((void *)(frame->tf_rip + 5)), fubyte((void *)(frame->tf_rip + 6)), fubyte((void *)(frame->tf_rip + 7))); } KASSERT((read_rflags() & PSL_I) != 0, ("interrupts disabled")); trapsignal(td, &ksi); user: userret(td, frame); KASSERT(PCB_USER_FPU(td->td_pcb), ("Return from trap with kernel FPU ctx leaked")); userout: out: return; } /* * Ensure that we ignore any DTrace-induced faults. This function cannot * be instrumented, so it cannot generate such faults itself. */ void trap_check(struct trapframe *frame) { #ifdef KDTRACE_HOOKS if (dtrace_trap_func != NULL && (*dtrace_trap_func)(frame, frame->tf_trapno) != 0) return; #endif trap(frame); } static int trap_pfault(frame, usermode) struct trapframe *frame; int usermode; { vm_offset_t va; vm_map_t map; int rv = 0; vm_prot_t ftype; struct thread *td = curthread; struct proc *p = td->td_proc; vm_offset_t eva = frame->tf_addr; if (__predict_false((td->td_pflags & TDP_NOFAULTING) != 0)) { /* * Due to both processor errata and lazy TLB invalidation when * access restrictions are removed from virtual pages, memory * accesses that are allowed by the physical mapping layer may * nonetheless cause one spurious page fault per virtual page. * When the thread is executing a "no faulting" section that * is bracketed by vm_fault_{disable,enable}_pagefaults(), * every page fault is treated as a spurious page fault, * unless it accesses the same virtual address as the most * recent page fault within the same "no faulting" section. */ if (td->td_md.md_spurflt_addr != eva || (td->td_pflags & TDP_RESETSPUR) != 0) { /* * Do nothing to the TLB. A stale TLB entry is * flushed automatically by a page fault. */ td->td_md.md_spurflt_addr = eva; td->td_pflags &= ~TDP_RESETSPUR; return (0); } } else { /* * If we get a page fault while in a critical section, then * it is most likely a fatal kernel page fault. The kernel * is already going to panic trying to get a sleep lock to * do the VM lookup, so just consider it a fatal trap so the * kernel can print out a useful trap message and even get * to the debugger. * * If we get a page fault while holding a non-sleepable * lock, then it is most likely a fatal kernel page fault. * If WITNESS is enabled, then it's going to whine about * bogus LORs with various VM locks, so just skip to the * fatal trap handling directly. */ if (td->td_critnest != 0 || WITNESS_CHECK(WARN_SLEEPOK | WARN_GIANTOK, NULL, "Kernel page fault") != 0) { trap_fatal(frame, eva); return (-1); } } va = trunc_page(eva); if (va >= VM_MIN_KERNEL_ADDRESS) { /* * Don't allow user-mode faults in kernel address space. */ if (usermode) goto nogo; map = kernel_map; } else { map = &p->p_vmspace->vm_map; /* * When accessing a usermode address, kernel must be * ready to accept the page fault, and provide a * handling routine. Since accessing the address * without the handler is a bug, do not try to handle * it normally, and panic immediately. */ if (!usermode && (td->td_intr_nesting_level != 0 || curpcb->pcb_onfault == NULL)) { trap_fatal(frame, eva); return (-1); } } /* * If the trap was caused by errant bits in the PTE then panic. */ if (frame->tf_err & PGEX_RSV) { trap_fatal(frame, eva); return (-1); } /* * PGEX_I is defined only if the execute disable bit capability is * supported and enabled. */ if (frame->tf_err & PGEX_W) ftype = VM_PROT_WRITE; else if ((frame->tf_err & PGEX_I) && pg_nx != 0) ftype = VM_PROT_EXECUTE; else ftype = VM_PROT_READ; /* Fault in the page. */ rv = vm_fault(map, va, ftype, VM_FAULT_NORMAL); if (rv == KERN_SUCCESS) { #ifdef HWPMC_HOOKS if (ftype == VM_PROT_READ || ftype == VM_PROT_WRITE) { PMC_SOFT_CALL_TF( , , page_fault, all, frame); if (ftype == VM_PROT_READ) PMC_SOFT_CALL_TF( , , page_fault, read, frame); else PMC_SOFT_CALL_TF( , , page_fault, write, frame); } #endif return (0); } nogo: if (!usermode) { if (td->td_intr_nesting_level == 0 && curpcb->pcb_onfault != NULL) { frame->tf_rip = (long)curpcb->pcb_onfault; return (0); } trap_fatal(frame, eva); return (-1); } return ((rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV); } static void trap_fatal(frame, eva) struct trapframe *frame; vm_offset_t eva; { int code, ss; u_int type; long esp; struct soft_segment_descriptor softseg; char *msg; code = frame->tf_err; type = frame->tf_trapno; sdtossd(&gdt[NGDT * PCPU_GET(cpuid) + IDXSEL(frame->tf_cs & 0xffff)], &softseg); if (type <= MAX_TRAP_MSG) msg = trap_msg[type]; else msg = "UNKNOWN"; printf("\n\nFatal trap %d: %s while in %s mode\n", type, msg, - ISPL(frame->tf_cs) == SEL_UPL ? "user" : "kernel"); + TRAPF_USERMODE(frame) ? "user" : "kernel"); #ifdef SMP /* two separate prints in case of a trap on an unmapped page */ printf("cpuid = %d; ", PCPU_GET(cpuid)); printf("apic id = %02x\n", PCPU_GET(apic_id)); #endif if (type == T_PAGEFLT) { printf("fault virtual address = 0x%lx\n", eva); printf("fault code = %s %s %s%s, %s\n", code & PGEX_U ? "user" : "supervisor", code & PGEX_W ? "write" : "read", code & PGEX_I ? "instruction" : "data", code & PGEX_RSV ? " rsv" : "", code & PGEX_P ? "protection violation" : "page not present"); } printf("instruction pointer = 0x%lx:0x%lx\n", frame->tf_cs & 0xffff, frame->tf_rip); - if (ISPL(frame->tf_cs) == SEL_UPL) { + if (TF_HAS_STACKREGS(frame)) { ss = frame->tf_ss & 0xffff; esp = frame->tf_rsp; } else { ss = GSEL(GDATA_SEL, SEL_KPL); esp = (long)&frame->tf_rsp; } printf("stack pointer = 0x%x:0x%lx\n", ss, esp); printf("frame pointer = 0x%x:0x%lx\n", ss, frame->tf_rbp); printf("code segment = base 0x%lx, limit 0x%lx, type 0x%x\n", softseg.ssd_base, softseg.ssd_limit, softseg.ssd_type); printf(" = DPL %d, pres %d, long %d, def32 %d, gran %d\n", softseg.ssd_dpl, softseg.ssd_p, softseg.ssd_long, softseg.ssd_def32, softseg.ssd_gran); printf("processor eflags = "); if (frame->tf_rflags & PSL_T) printf("trace trap, "); if (frame->tf_rflags & PSL_I) printf("interrupt enabled, "); if (frame->tf_rflags & PSL_NT) printf("nested task, "); if (frame->tf_rflags & PSL_RF) printf("resume, "); printf("IOPL = %ld\n", (frame->tf_rflags & PSL_IOPL) >> 12); printf("current process = %d (%s)\n", curproc->p_pid, curthread->td_name); #ifdef KDB if (debugger_on_panic || kdb_active) if (kdb_trap(type, 0, frame)) return; #endif printf("trap number = %d\n", type); if (type <= MAX_TRAP_MSG) panic("%s", trap_msg[type]); else panic("unknown/reserved trap"); } /* * Double fault handler. Called when a fault occurs while writing * a frame for a trap/exception onto the stack. This usually occurs * when the stack overflows (such is the case with infinite recursion, * for example). */ void dblfault_handler(struct trapframe *frame) { #ifdef KDTRACE_HOOKS if (dtrace_doubletrap_func != NULL) (*dtrace_doubletrap_func)(); #endif printf("\nFatal double fault\n"); printf("rip = 0x%lx\n", frame->tf_rip); printf("rsp = 0x%lx\n", frame->tf_rsp); printf("rbp = 0x%lx\n", frame->tf_rbp); #ifdef SMP /* two separate prints in case of a trap on an unmapped page */ printf("cpuid = %d; ", PCPU_GET(cpuid)); printf("apic id = %02x\n", PCPU_GET(apic_id)); #endif panic("double fault"); } int cpu_fetch_syscall_args(struct thread *td, struct syscall_args *sa) { struct proc *p; struct trapframe *frame; register_t *argp; caddr_t params; int reg, regcnt, error; p = td->td_proc; frame = td->td_frame; reg = 0; regcnt = 6; params = (caddr_t)frame->tf_rsp + sizeof(register_t); sa->code = frame->tf_rax; if (sa->code == SYS_syscall || sa->code == SYS___syscall) { sa->code = frame->tf_rdi; reg++; regcnt--; } if (p->p_sysent->sv_mask) sa->code &= p->p_sysent->sv_mask; if (sa->code >= p->p_sysent->sv_size) sa->callp = &p->p_sysent->sv_table[0]; else sa->callp = &p->p_sysent->sv_table[sa->code]; sa->narg = sa->callp->sy_narg; KASSERT(sa->narg <= sizeof(sa->args) / sizeof(sa->args[0]), ("Too many syscall arguments!")); error = 0; argp = &frame->tf_rdi; argp += reg; bcopy(argp, sa->args, sizeof(sa->args[0]) * regcnt); if (sa->narg > regcnt) { KASSERT(params != NULL, ("copyin args with no params!")); error = copyin(params, &sa->args[regcnt], (sa->narg - regcnt) * sizeof(sa->args[0])); } if (error == 0) { td->td_retval[0] = 0; td->td_retval[1] = frame->tf_rdx; } return (error); } #include "../../kern/subr_syscall.c" /* * System call handler for native binaries. The trap frame is already * set up by the assembler trampoline and a pointer to it is saved in * td_frame. */ void amd64_syscall(struct thread *td, int traced) { struct syscall_args sa; int error; ksiginfo_t ksi; #ifdef DIAGNOSTIC - if (ISPL(td->td_frame->tf_cs) != SEL_UPL) { + if (!TRAPF_USERMODE(frame)) { panic("syscall"); /* NOT REACHED */ } #endif error = syscallenter(td, &sa); /* * Traced syscall. */ if (__predict_false(traced)) { td->td_frame->tf_rflags &= ~PSL_T; ksiginfo_init_trap(&ksi); ksi.ksi_signo = SIGTRAP; ksi.ksi_code = TRAP_TRACE; ksi.ksi_addr = (void *)td->td_frame->tf_rip; trapsignal(td, &ksi); } KASSERT(PCB_USER_FPU(td->td_pcb), ("System call %s returing with kernel FPU ctx leaked", syscallname(td->td_proc, sa.code))); KASSERT(td->td_pcb->pcb_save == get_pcb_user_save_td(td), ("System call %s returning with mangled pcb_save", syscallname(td->td_proc, sa.code))); KASSERT(td->td_md.md_invl_gen.gen == 0, ("System call %s returning with leaked invl_gen %lu", syscallname(td->td_proc, sa.code), td->td_md.md_invl_gen.gen)); syscallret(td, error, &sa); /* * If the user-supplied value of %rip is not a canonical * address, then some CPUs will trigger a ring 0 #GP during * the sysret instruction. However, the fault handler would * execute in ring 0 with the user's %gs and %rsp which would * not be safe. Instead, use the full return path which * catches the problem safely. */ if (td->td_frame->tf_rip >= VM_MAXUSER_ADDRESS) set_pcb_flags(td->td_pcb, PCB_FULL_IRET); } Index: head/sys/i386/i386/db_trace.c =================================================================== --- head/sys/i386/i386/db_trace.c (revision 305806) +++ head/sys/i386/i386/db_trace.c (revision 305807) @@ -1,758 +1,757 @@ /*- * Mach Operating System * Copyright (c) 1991,1990 Carnegie Mellon University * All Rights Reserved. * * Permission to use, copy, modify and distribute this software and its * documentation is hereby granted, provided that both the copyright * notice and this permission notice appear in all copies of the * software, derivative works or modified versions, and any portions * thereof, and that both notices appear in supporting documentation. * * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. * * Carnegie Mellon requests users of this software to return to * * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU * School of Computer Science * Carnegie Mellon University * Pittsburgh PA 15213-3890 * * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static db_varfcn_t db_esp; static db_varfcn_t db_frame; static db_varfcn_t db_frame_seg; static db_varfcn_t db_gs; static db_varfcn_t db_ss; /* * Machine register set. */ #define DB_OFFSET(x) (db_expr_t *)offsetof(struct trapframe, x) struct db_variable db_regs[] = { { "cs", DB_OFFSET(tf_cs), db_frame_seg }, { "ds", DB_OFFSET(tf_ds), db_frame_seg }, { "es", DB_OFFSET(tf_es), db_frame_seg }, { "fs", DB_OFFSET(tf_fs), db_frame_seg }, { "gs", NULL, db_gs }, { "ss", NULL, db_ss }, { "eax", DB_OFFSET(tf_eax), db_frame }, { "ecx", DB_OFFSET(tf_ecx), db_frame }, { "edx", DB_OFFSET(tf_edx), db_frame }, { "ebx", DB_OFFSET(tf_ebx), db_frame }, { "esp", NULL, db_esp }, { "ebp", DB_OFFSET(tf_ebp), db_frame }, { "esi", DB_OFFSET(tf_esi), db_frame }, { "edi", DB_OFFSET(tf_edi), db_frame }, { "eip", DB_OFFSET(tf_eip), db_frame }, { "efl", DB_OFFSET(tf_eflags), db_frame }, }; struct db_variable *db_eregs = db_regs + nitems(db_regs); static __inline int get_esp(struct trapframe *tf) { - return ((ISPL(tf->tf_cs) || kdb_frame->tf_eflags & PSL_VM) ? - tf->tf_esp : (intptr_t)&tf->tf_esp); + return (TF_HAS_STACKREGS(tf) ? tf->tf_esp : (intptr_t)&tf->tf_esp); } static int db_frame(struct db_variable *vp, db_expr_t *valuep, int op) { int *reg; if (kdb_frame == NULL) return (0); reg = (int *)((uintptr_t)kdb_frame + (db_expr_t)vp->valuep); if (op == DB_VAR_GET) *valuep = *reg; else *reg = *valuep; return (1); } static int db_frame_seg(struct db_variable *vp, db_expr_t *valuep, int op) { struct trapframe_vm86 *tfp; int off; uint16_t *reg; if (kdb_frame == NULL) return (0); off = (intptr_t)vp->valuep; if (kdb_frame->tf_eflags & PSL_VM) { tfp = (void *)kdb_frame; switch ((intptr_t)vp->valuep) { case (intptr_t)DB_OFFSET(tf_cs): reg = (uint16_t *)&tfp->tf_cs; break; case (intptr_t)DB_OFFSET(tf_ds): reg = (uint16_t *)&tfp->tf_vm86_ds; break; case (intptr_t)DB_OFFSET(tf_es): reg = (uint16_t *)&tfp->tf_vm86_es; break; case (intptr_t)DB_OFFSET(tf_fs): reg = (uint16_t *)&tfp->tf_vm86_fs; break; } } else reg = (uint16_t *)((uintptr_t)kdb_frame + off); if (op == DB_VAR_GET) *valuep = *reg; else *reg = *valuep; return (1); } static int db_esp(struct db_variable *vp, db_expr_t *valuep, int op) { if (kdb_frame == NULL) return (0); if (op == DB_VAR_GET) *valuep = get_esp(kdb_frame); - else if (ISPL(kdb_frame->tf_cs)) + else if (TF_HAS_STACKREGS(kdb_frame)) kdb_frame->tf_esp = *valuep; return (1); } static int db_gs(struct db_variable *vp, db_expr_t *valuep, int op) { struct trapframe_vm86 *tfp; if (kdb_frame != NULL && kdb_frame->tf_eflags & PSL_VM) { tfp = (void *)kdb_frame; if (op == DB_VAR_GET) *valuep = tfp->tf_vm86_gs; else tfp->tf_vm86_gs = *valuep; return (1); } if (op == DB_VAR_GET) *valuep = rgs(); else load_gs(*valuep); return (1); } static int db_ss(struct db_variable *vp, db_expr_t *valuep, int op) { if (kdb_frame == NULL) return (0); if (op == DB_VAR_GET) - *valuep = (ISPL(kdb_frame->tf_cs) || - kdb_frame->tf_eflags & PSL_VM) ? kdb_frame->tf_ss : rss(); - else if (ISPL(kdb_frame->tf_cs) || kdb_frame->tf_eflags & PSL_VM) + *valuep = TF_HAS_STACKREGS(kdb_frame) ? kdb_frame->tf_ss : + rss(); + else if (TF_HAS_STACKREGS(kdb_frame)) kdb_frame->tf_ss = *valuep; return (1); } #define NORMAL 0 #define TRAP 1 #define INTERRUPT 2 #define SYSCALL 3 #define DOUBLE_FAULT 4 #define TRAP_INTERRUPT 5 #define TRAP_TIMERINT 6 static void db_nextframe(struct i386_frame **, db_addr_t *, struct thread *); static int db_numargs(struct i386_frame *); static void db_print_stack_entry(const char *, int, char **, int *, db_addr_t, void *); static void decode_syscall(int, struct thread *); static const char * watchtype_str(int type); int i386_set_watch(int watchnum, unsigned int watchaddr, int size, int access, struct dbreg *d); int i386_clr_watch(int watchnum, struct dbreg *d); /* * Figure out how many arguments were passed into the frame at "fp". */ static int db_numargs(fp) struct i386_frame *fp; { char *argp; int inst; int args; argp = (char *)db_get_value((int)&fp->f_retaddr, 4, FALSE); /* * XXX etext is wrong for LKMs. We should attempt to interpret * the instruction at the return address in all cases. This * may require better fault handling. */ if (argp < btext || argp >= etext) { args = -1; } else { retry: inst = db_get_value((int)argp, 4, FALSE); if ((inst & 0xff) == 0x59) /* popl %ecx */ args = 1; else if ((inst & 0xffff) == 0xc483) /* addl $Ibs, %esp */ args = ((inst >> 16) & 0xff) / 4; else if ((inst & 0xf8ff) == 0xc089) { /* movl %eax, %Reg */ argp += 2; goto retry; } else args = -1; } return (args); } static void db_print_stack_entry(name, narg, argnp, argp, callpc, frame) const char *name; int narg; char **argnp; int *argp; db_addr_t callpc; void *frame; { int n = narg >= 0 ? narg : 5; db_printf("%s(", name); while (n) { if (argnp) db_printf("%s=", *argnp++); db_printf("%r", db_get_value((int)argp, 4, FALSE)); argp++; if (--n != 0) db_printf(","); } if (narg < 0) db_printf(",..."); db_printf(") at "); db_printsym(callpc, DB_STGY_PROC); if (frame != NULL) db_printf("/frame 0x%r", (register_t)frame); db_printf("\n"); } static void decode_syscall(int number, struct thread *td) { struct proc *p; c_db_sym_t sym; db_expr_t diff; sy_call_t *f; const char *symname; db_printf(" (%d", number); p = (td != NULL) ? td->td_proc : NULL; if (p != NULL && 0 <= number && number < p->p_sysent->sv_size) { f = p->p_sysent->sv_table[number].sy_call; sym = db_search_symbol((db_addr_t)f, DB_STGY_ANY, &diff); if (sym != DB_SYM_NULL && diff == 0) { db_symbol_values(sym, &symname, NULL); db_printf(", %s, %s", p->p_sysent->sv_name, symname); } } db_printf(")"); } /* * Figure out the next frame up in the call stack. */ static void db_nextframe(struct i386_frame **fp, db_addr_t *ip, struct thread *td) { struct trapframe *tf; int frame_type; int eip, esp, ebp; db_expr_t offset; c_db_sym_t sym; const char *name; eip = db_get_value((int) &(*fp)->f_retaddr, 4, FALSE); ebp = db_get_value((int) &(*fp)->f_frame, 4, FALSE); /* * Figure out frame type. We look at the address just before * the saved instruction pointer as the saved EIP is after the * call function, and if the function being called is marked as * dead (such as panic() at the end of dblfault_handler()), then * the instruction at the saved EIP will be part of a different * function (syscall() in this example) rather than the one that * actually made the call. */ frame_type = NORMAL; sym = db_search_symbol(eip - 1, DB_STGY_ANY, &offset); db_symbol_values(sym, &name, NULL); if (name != NULL) { if (strcmp(name, "calltrap") == 0 || strcmp(name, "fork_trampoline") == 0) frame_type = TRAP; else if (strncmp(name, "Xatpic_intr", 11) == 0 || strncmp(name, "Xapic_isr", 9) == 0) frame_type = INTERRUPT; else if (strcmp(name, "Xlcall_syscall") == 0 || strcmp(name, "Xint0x80_syscall") == 0) frame_type = SYSCALL; else if (strcmp(name, "dblfault_handler") == 0) frame_type = DOUBLE_FAULT; /* XXX: These are interrupts with trap frames. */ else if (strcmp(name, "Xtimerint") == 0) frame_type = TRAP_TIMERINT; else if (strcmp(name, "Xcpustop") == 0 || strcmp(name, "Xrendezvous") == 0 || strcmp(name, "Xipi_intr_bitmap_handler") == 0) frame_type = TRAP_INTERRUPT; } /* * Normal frames need no special processing. */ if (frame_type == NORMAL) { *ip = (db_addr_t) eip; *fp = (struct i386_frame *) ebp; return; } db_print_stack_entry(name, 0, 0, 0, eip, &(*fp)->f_frame); /* * For a double fault, we have to snag the values from the * previous TSS since a double fault uses a task gate to * switch to a known good state. */ if (frame_type == DOUBLE_FAULT) { esp = PCPU_GET(common_tss.tss_esp); eip = PCPU_GET(common_tss.tss_eip); ebp = PCPU_GET(common_tss.tss_ebp); db_printf( "--- trap 0x17, eip = %#r, esp = %#r, ebp = %#r ---\n", eip, esp, ebp); *ip = (db_addr_t) eip; *fp = (struct i386_frame *) ebp; return; } /* * Point to base of trapframe which is just above the * current frame. */ if (frame_type == INTERRUPT) tf = (struct trapframe *)((int)*fp + 16); else if (frame_type == TRAP_INTERRUPT) tf = (struct trapframe *)((int)*fp + 8); else tf = (struct trapframe *)((int)*fp + 12); if (INKERNEL((int) tf)) { esp = get_esp(tf); eip = tf->tf_eip; ebp = tf->tf_ebp; switch (frame_type) { case TRAP: db_printf("--- trap %#r", tf->tf_trapno); break; case SYSCALL: db_printf("--- syscall"); decode_syscall(tf->tf_eax, td); break; case TRAP_TIMERINT: case TRAP_INTERRUPT: case INTERRUPT: db_printf("--- interrupt"); break; default: panic("The moon has moved again."); } db_printf(", eip = %#r, esp = %#r, ebp = %#r ---\n", eip, esp, ebp); } *ip = (db_addr_t) eip; *fp = (struct i386_frame *) ebp; } static int db_backtrace(struct thread *td, struct trapframe *tf, struct i386_frame *frame, db_addr_t pc, register_t sp, int count) { struct i386_frame *actframe; #define MAXNARG 16 char *argnames[MAXNARG], **argnp = NULL; const char *name; int *argp; db_expr_t offset; c_db_sym_t sym; int instr, narg; boolean_t first; /* * If an indirect call via an invalid pointer caused a trap, * %pc contains the invalid address while the return address * of the unlucky caller has been saved by CPU on the stack * just before the trap frame. In this case, try to recover * the caller's address so that the first frame is assigned * to the right spot in the right function, for that is where * the failure actually happened. * * This trick depends on the fault address stashed in tf_err * by trap_fatal() before entering KDB. */ if (kdb_frame && pc == kdb_frame->tf_err) { /* * Find where the trap frame actually ends. * It won't contain tf_esp or tf_ss unless crossing rings. */ - if (ISPL(kdb_frame->tf_cs)) + if (TF_HAS_STACKREGS(kdb_frame)) instr = (int)(kdb_frame + 1); else instr = (int)&kdb_frame->tf_esp; pc = db_get_value(instr, 4, FALSE); } if (count == -1) count = 1024; first = TRUE; while (count-- && !db_pager_quit) { sym = db_search_symbol(pc, DB_STGY_ANY, &offset); db_symbol_values(sym, &name, NULL); /* * Attempt to determine a (possibly fake) frame that gives * the caller's pc. It may differ from `frame' if the * current function never sets up a standard frame or hasn't * set one up yet or has just discarded one. The last two * cases can be guessed fairly reliably for code generated * by gcc. The first case is too much trouble to handle in * general because the amount of junk on the stack depends * on the pc (the special handling of "calltrap", etc. in * db_nextframe() works because the `next' pc is special). */ actframe = frame; if (first) { first = FALSE; if (sym == C_DB_SYM_NULL && sp != 0) { /* * If a symbol couldn't be found, we've probably * jumped to a bogus location, so try and use * the return address to find our caller. */ db_print_stack_entry(name, 0, 0, 0, pc, NULL); pc = db_get_value(sp, 4, FALSE); if (db_search_symbol(pc, DB_STGY_PROC, &offset) == C_DB_SYM_NULL) break; continue; } else if (tf != NULL) { instr = db_get_value(pc, 4, FALSE); if ((instr & 0xffffff) == 0x00e58955) { /* pushl %ebp; movl %esp, %ebp */ actframe = (void *)(get_esp(tf) - 4); } else if ((instr & 0xffff) == 0x0000e589) { /* movl %esp, %ebp */ actframe = (void *)get_esp(tf); if (tf->tf_ebp == 0) { /* Fake frame better. */ frame = actframe; } } else if ((instr & 0xff) == 0x000000c3) { /* ret */ actframe = (void *)(get_esp(tf) - 4); } else if (offset == 0) { /* Probably an assembler symbol. */ actframe = (void *)(get_esp(tf) - 4); } } else if (strcmp(name, "fork_trampoline") == 0) { /* * Don't try to walk back on a stack for a * process that hasn't actually been run yet. */ db_print_stack_entry(name, 0, 0, 0, pc, actframe); break; } } argp = &actframe->f_arg0; narg = MAXNARG; if (sym != NULL && db_sym_numargs(sym, &narg, argnames)) { argnp = argnames; } else { narg = db_numargs(frame); } db_print_stack_entry(name, narg, argnp, argp, pc, actframe); if (actframe != frame) { /* `frame' belongs to caller. */ pc = (db_addr_t) db_get_value((int)&actframe->f_retaddr, 4, FALSE); continue; } db_nextframe(&frame, &pc, td); if (INKERNEL((int)pc) && !INKERNEL((int) frame)) { sym = db_search_symbol(pc, DB_STGY_ANY, &offset); db_symbol_values(sym, &name, NULL); db_print_stack_entry(name, 0, 0, 0, pc, frame); break; } if (!INKERNEL((int) frame)) { break; } } return (0); } void db_trace_self(void) { struct i386_frame *frame; db_addr_t callpc; register_t ebp; __asm __volatile("movl %%ebp,%0" : "=r" (ebp)); frame = (struct i386_frame *)ebp; callpc = (db_addr_t)db_get_value((int)&frame->f_retaddr, 4, FALSE); frame = frame->f_frame; db_backtrace(curthread, NULL, frame, callpc, 0, -1); } int db_trace_thread(struct thread *thr, int count) { struct pcb *ctx; struct trapframe *tf; ctx = kdb_thr_ctx(thr); tf = thr == kdb_thread ? kdb_frame : NULL; return (db_backtrace(thr, tf, (struct i386_frame *)ctx->pcb_ebp, ctx->pcb_eip, ctx->pcb_esp, count)); } int i386_set_watch(watchnum, watchaddr, size, access, d) int watchnum; unsigned int watchaddr; int size; int access; struct dbreg *d; { int i, len; if (watchnum == -1) { for (i = 0; i < 4; i++) if (!DBREG_DR7_ENABLED(d->dr[7], i)) break; if (i < 4) watchnum = i; else return (-1); } switch (access) { case DBREG_DR7_EXEC: size = 1; /* size must be 1 for an execution breakpoint */ /* fall through */ case DBREG_DR7_WRONLY: case DBREG_DR7_RDWR: break; default: return (-1); } /* * we can watch a 1, 2, or 4 byte sized location */ switch (size) { case 1: len = DBREG_DR7_LEN_1; break; case 2: len = DBREG_DR7_LEN_2; break; case 4: len = DBREG_DR7_LEN_4; break; default: return (-1); } /* clear the bits we are about to affect */ d->dr[7] &= ~DBREG_DR7_MASK(watchnum); /* set drN register to the address, N=watchnum */ DBREG_DRX(d, watchnum) = watchaddr; /* enable the watchpoint */ d->dr[7] |= DBREG_DR7_SET(watchnum, len, access, DBREG_DR7_GLOBAL_ENABLE); return (watchnum); } int i386_clr_watch(watchnum, d) int watchnum; struct dbreg *d; { if (watchnum < 0 || watchnum >= 4) return (-1); d->dr[7] &= ~DBREG_DR7_MASK(watchnum); DBREG_DRX(d, watchnum) = 0; return (0); } int db_md_set_watchpoint(addr, size) db_expr_t addr; db_expr_t size; { struct dbreg d; int avail, i, wsize; fill_dbregs(NULL, &d); avail = 0; for(i = 0; i < 4; i++) { if (!DBREG_DR7_ENABLED(d.dr[7], i)) avail++; } if (avail * 4 < size) return (-1); for (i = 0; i < 4 && (size > 0); i++) { if (!DBREG_DR7_ENABLED(d.dr[7], i)) { if (size > 2) wsize = 4; else wsize = size; i386_set_watch(i, addr, wsize, DBREG_DR7_WRONLY, &d); addr += wsize; size -= wsize; } } set_dbregs(NULL, &d); return(0); } int db_md_clr_watchpoint(addr, size) db_expr_t addr; db_expr_t size; { struct dbreg d; int i; fill_dbregs(NULL, &d); for(i = 0; i < 4; i++) { if (DBREG_DR7_ENABLED(d.dr[7], i)) { if ((DBREG_DRX((&d), i) >= addr) && (DBREG_DRX((&d), i) < addr+size)) i386_clr_watch(i, &d); } } set_dbregs(NULL, &d); return(0); } static const char * watchtype_str(type) int type; { switch (type) { case DBREG_DR7_EXEC : return "execute"; break; case DBREG_DR7_RDWR : return "read/write"; break; case DBREG_DR7_WRONLY : return "write"; break; default : return "invalid"; break; } } void db_md_list_watchpoints() { struct dbreg d; int i, len, type; fill_dbregs(NULL, &d); db_printf("\nhardware watchpoints:\n"); db_printf(" watch status type len address\n"); db_printf(" ----- -------- ---------- --- ----------\n"); for (i = 0; i < 4; i++) { if (DBREG_DR7_ENABLED(d.dr[7], i)) { type = DBREG_DR7_ACCESS(d.dr[7], i); len = DBREG_DR7_LEN(d.dr[7], i); db_printf(" %-5d %-8s %10s %3d ", i, "enabled", watchtype_str(type), len + 1); db_printsym((db_addr_t)DBREG_DRX((&d), i), DB_STGY_ANY); db_printf("\n"); } else { db_printf(" %-5d disabled\n", i); } } db_printf("\ndebug register values:\n"); for (i = 0; i < 8; i++) { db_printf(" dr%d 0x%08x\n", i, DBREG_DRX((&d), i)); } db_printf("\n"); } Index: head/sys/i386/i386/trap.c =================================================================== --- head/sys/i386/i386/trap.c (revision 305806) +++ head/sys/i386/i386/trap.c (revision 305807) @@ -1,1152 +1,1152 @@ /*- * Copyright (C) 1994, David Greenman * Copyright (c) 1990, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * the University of Utah, and William Jolitz. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)trap.c 7.4 (Berkeley) 5/13/91 */ #include __FBSDID("$FreeBSD$"); /* * 386 Trap and System call handling */ #include "opt_clock.h" #include "opt_cpu.h" #include "opt_hwpmc_hooks.h" #include "opt_isa.h" #include "opt_kdb.h" #include "opt_npx.h" #include "opt_stack.h" #include "opt_trap.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef HWPMC_HOOKS #include PMC_SOFT_DEFINE( , , page_fault, all); PMC_SOFT_DEFINE( , , page_fault, read); PMC_SOFT_DEFINE( , , page_fault, write); #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef SMP #include #endif #include #include #include #ifdef POWERFAIL_NMI #include #include #endif #ifdef KDTRACE_HOOKS #include #endif extern void trap(struct trapframe *frame); extern void syscall(struct trapframe *frame); static int trap_pfault(struct trapframe *, int, vm_offset_t); static void trap_fatal(struct trapframe *, vm_offset_t); void dblfault_handler(void); extern inthand_t IDTVEC(lcall_syscall); #define MAX_TRAP_MSG 32 static char *trap_msg[] = { "", /* 0 unused */ "privileged instruction fault", /* 1 T_PRIVINFLT */ "", /* 2 unused */ "breakpoint instruction fault", /* 3 T_BPTFLT */ "", /* 4 unused */ "", /* 5 unused */ "arithmetic trap", /* 6 T_ARITHTRAP */ "", /* 7 unused */ "", /* 8 unused */ "general protection fault", /* 9 T_PROTFLT */ "trace trap", /* 10 T_TRCTRAP */ "", /* 11 unused */ "page fault", /* 12 T_PAGEFLT */ "", /* 13 unused */ "alignment fault", /* 14 T_ALIGNFLT */ "", /* 15 unused */ "", /* 16 unused */ "", /* 17 unused */ "integer divide fault", /* 18 T_DIVIDE */ "non-maskable interrupt trap", /* 19 T_NMI */ "overflow trap", /* 20 T_OFLOW */ "FPU bounds check fault", /* 21 T_BOUND */ "FPU device not available", /* 22 T_DNA */ "double fault", /* 23 T_DOUBLEFLT */ "FPU operand fetch fault", /* 24 T_FPOPFLT */ "invalid TSS fault", /* 25 T_TSSFLT */ "segment not present fault", /* 26 T_SEGNPFLT */ "stack fault", /* 27 T_STKFLT */ "machine check trap", /* 28 T_MCHK */ "SIMD floating-point exception", /* 29 T_XMMFLT */ "reserved (unknown) fault", /* 30 T_RESERVED */ "", /* 31 unused (reserved) */ "DTrace pid return trap", /* 32 T_DTRACE_RET */ }; #if defined(I586_CPU) && !defined(NO_F00F_HACK) int has_f00f_bug = 0; /* Initialized so that it can be patched. */ #endif #ifdef KDB static int kdb_on_nmi = 1; SYSCTL_INT(_machdep, OID_AUTO, kdb_on_nmi, CTLFLAG_RWTUN, &kdb_on_nmi, 0, "Go to KDB on NMI"); #endif static int panic_on_nmi = 1; SYSCTL_INT(_machdep, OID_AUTO, panic_on_nmi, CTLFLAG_RWTUN, &panic_on_nmi, 0, "Panic on NMI"); static int prot_fault_translation = 0; SYSCTL_INT(_machdep, OID_AUTO, prot_fault_translation, CTLFLAG_RW, &prot_fault_translation, 0, "Select signal to deliver on protection fault"); static int uprintf_signal; SYSCTL_INT(_machdep, OID_AUTO, uprintf_signal, CTLFLAG_RW, &uprintf_signal, 0, "Print debugging information on trap signal to ctty"); /* * Exception, fault, and trap interface to the FreeBSD kernel. * This common code is called from assembly language IDT gate entry * routines that prepare a suitable stack frame, and restore this * frame after the exception has been processed. */ void trap(struct trapframe *frame) { #ifdef KDTRACE_HOOKS struct reg regs; #endif struct thread *td = curthread; struct proc *p = td->td_proc; int i = 0, ucode = 0, code; u_int type; register_t addr = 0; vm_offset_t eva; ksiginfo_t ksi; #ifdef POWERFAIL_NMI static int lastalert = 0; #endif PCPU_INC(cnt.v_trap); type = frame->tf_trapno; #ifdef SMP /* Handler for NMI IPIs used for stopping CPUs. */ if (type == T_NMI) { if (ipi_nmi_handler() == 0) goto out; } #endif /* SMP */ #ifdef KDB if (kdb_active) { kdb_reenter(); goto out; } #endif if (type == T_RESERVED) { trap_fatal(frame, 0); goto out; } if (type == T_NMI) { #ifdef HWPMC_HOOKS /* * CPU PMCs interrupt using an NMI so we check for that first. * If the HWPMC module is active, 'pmc_hook' will point to * the function to be called. A non-zero return value from the * hook means that the NMI was consumed by it and that we can * return immediately. */ if (pmc_intr != NULL && (*pmc_intr)(PCPU_GET(cpuid), frame) != 0) goto out; #endif #ifdef STACK if (stack_nmi_handler(frame) != 0) goto out; #endif } if (type == T_MCHK) { mca_intr(); goto out; } #ifdef KDTRACE_HOOKS /* * A trap can occur while DTrace executes a probe. Before * executing the probe, DTrace blocks re-scheduling and sets * a flag in its per-cpu flags to indicate that it doesn't * want to fault. On returning from the probe, the no-fault * flag is cleared and finally re-scheduling is enabled. */ if ((type == T_PROTFLT || type == T_PAGEFLT) && dtrace_trap_func != NULL && (*dtrace_trap_func)(frame, type)) goto out; #endif if ((frame->tf_eflags & PSL_I) == 0) { /* * Buggy application or kernel code has disabled * interrupts and then trapped. Enabling interrupts * now is wrong, but it is better than running with * interrupts disabled until they are accidentally * enabled later. */ - if (ISPL(frame->tf_cs) == SEL_UPL || (frame->tf_eflags & PSL_VM)) + if (TRAPF_USERMODE(frame) && + (curpcb->pcb_flags & PCB_VM86CALL) == 0) uprintf( "pid %ld (%s): trap %d with interrupts disabled\n", (long)curproc->p_pid, curthread->td_name, type); else if (type != T_NMI && type != T_BPTFLT && type != T_TRCTRAP && frame->tf_eip != (int)cpu_switch_load_gs) { /* * XXX not quite right, since this may be for a * multiple fault in user mode. */ printf("kernel trap %d with interrupts disabled\n", type); /* * Page faults need interrupts disabled until later, * and we shouldn't enable interrupts while holding * a spin lock. */ if (type != T_PAGEFLT && td->td_md.md_spinlock_count == 0) enable_intr(); } } eva = 0; code = frame->tf_err; if (type == T_PAGEFLT) { /* * For some Cyrix CPUs, %cr2 is clobbered by * interrupts. This problem is worked around by using * an interrupt gate for the pagefault handler. We * are finally ready to read %cr2 and conditionally * reenable interrupts. If we hold a spin lock, then * we must not reenable interrupts. This might be a * spurious page fault. */ eva = rcr2(); if (td->td_md.md_spinlock_count == 0) enable_intr(); } - if ((ISPL(frame->tf_cs) == SEL_UPL) || - ((frame->tf_eflags & PSL_VM) && - !(curpcb->pcb_flags & PCB_VM86CALL))) { + if (TRAPF_USERMODE(frame) && (curpcb->pcb_flags & PCB_VM86CALL) == 0) { /* user trap */ td->td_pticks = 0; td->td_frame = frame; addr = frame->tf_eip; if (td->td_cowgen != p->p_cowgen) thread_cow_update(td); switch (type) { case T_PRIVINFLT: /* privileged instruction fault */ i = SIGILL; ucode = ILL_PRVOPC; break; case T_BPTFLT: /* bpt instruction fault */ case T_TRCTRAP: /* trace trap */ enable_intr(); #ifdef KDTRACE_HOOKS if (type == T_BPTFLT) { fill_frame_regs(frame, ®s); if (dtrace_pid_probe_ptr != NULL && dtrace_pid_probe_ptr(®s) == 0) goto out; } #endif user_trctrap_out: frame->tf_eflags &= ~PSL_T; i = SIGTRAP; ucode = (type == T_TRCTRAP ? TRAP_TRACE : TRAP_BRKPT); break; case T_ARITHTRAP: /* arithmetic trap */ #ifdef DEV_NPX ucode = npxtrap_x87(); if (ucode == -1) goto userout; #else ucode = 0; #endif i = SIGFPE; break; /* * The following two traps can happen in * vm86 mode, and, if so, we want to handle * them specially. */ case T_PROTFLT: /* general protection fault */ case T_STKFLT: /* stack fault */ if (frame->tf_eflags & PSL_VM) { i = vm86_emulate((struct vm86frame *)frame); if (i == SIGTRAP) { type = T_TRCTRAP; goto user_trctrap_out; } if (i == 0) goto user; break; } i = SIGBUS; ucode = (type == T_PROTFLT) ? BUS_OBJERR : BUS_ADRERR; break; case T_SEGNPFLT: /* segment not present fault */ i = SIGBUS; ucode = BUS_ADRERR; break; case T_TSSFLT: /* invalid TSS fault */ i = SIGBUS; ucode = BUS_OBJERR; break; case T_ALIGNFLT: i = SIGBUS; ucode = BUS_ADRALN; break; case T_DOUBLEFLT: /* double fault */ default: i = SIGBUS; ucode = BUS_OBJERR; break; case T_PAGEFLT: /* page fault */ i = trap_pfault(frame, TRUE, eva); #if defined(I586_CPU) && !defined(NO_F00F_HACK) if (i == -2) { /* * The f00f hack workaround has triggered, so * treat the fault as an illegal instruction * (T_PRIVINFLT) instead of a page fault. */ type = frame->tf_trapno = T_PRIVINFLT; /* Proceed as in that case. */ ucode = ILL_PRVOPC; i = SIGILL; break; } #endif if (i == -1) goto userout; if (i == 0) goto user; if (i == SIGSEGV) ucode = SEGV_MAPERR; else { if (prot_fault_translation == 0) { /* * Autodetect. * This check also covers the images * without the ABI-tag ELF note. */ if (SV_CURPROC_ABI() == SV_ABI_FREEBSD && p->p_osrel >= P_OSREL_SIGSEGV) { i = SIGSEGV; ucode = SEGV_ACCERR; } else { i = SIGBUS; ucode = BUS_PAGE_FAULT; } } else if (prot_fault_translation == 1) { /* * Always compat mode. */ i = SIGBUS; ucode = BUS_PAGE_FAULT; } else { /* * Always SIGSEGV mode. */ i = SIGSEGV; ucode = SEGV_ACCERR; } } addr = eva; break; case T_DIVIDE: /* integer divide fault */ ucode = FPE_INTDIV; i = SIGFPE; break; #ifdef DEV_ISA case T_NMI: #ifdef POWERFAIL_NMI #ifndef TIMER_FREQ # define TIMER_FREQ 1193182 #endif if (time_second - lastalert > 10) { log(LOG_WARNING, "NMI: power fail\n"); sysbeep(880, hz); lastalert = time_second; } goto userout; #else /* !POWERFAIL_NMI */ /* machine/parity/power fail/"kitchen sink" faults */ if (isa_nmi(code) == 0) { #ifdef KDB /* * NMI can be hooked up to a pushbutton * for debugging. */ if (kdb_on_nmi) { printf ("NMI ... going to debugger\n"); kdb_trap(type, 0, frame); } #endif /* KDB */ goto userout; } else if (panic_on_nmi) panic("NMI indicates hardware failure"); break; #endif /* POWERFAIL_NMI */ #endif /* DEV_ISA */ case T_OFLOW: /* integer overflow fault */ ucode = FPE_INTOVF; i = SIGFPE; break; case T_BOUND: /* bounds check fault */ ucode = FPE_FLTSUB; i = SIGFPE; break; case T_DNA: #ifdef DEV_NPX KASSERT(PCB_USER_FPU(td->td_pcb), ("kernel FPU ctx has leaked")); /* transparent fault (due to context switch "late") */ if (npxdna()) goto userout; #endif uprintf("pid %d killed due to lack of floating point\n", p->p_pid); i = SIGKILL; ucode = 0; break; case T_FPOPFLT: /* FPU operand fetch fault */ ucode = ILL_COPROC; i = SIGILL; break; case T_XMMFLT: /* SIMD floating-point exception */ #if defined(DEV_NPX) && !defined(CPU_DISABLE_SSE) && defined(I686_CPU) ucode = npxtrap_sse(); if (ucode == -1) goto userout; #else ucode = 0; #endif i = SIGFPE; break; #ifdef KDTRACE_HOOKS case T_DTRACE_RET: enable_intr(); fill_frame_regs(frame, ®s); if (dtrace_return_probe_ptr != NULL && dtrace_return_probe_ptr(®s) == 0) goto out; break; #endif } } else { /* kernel trap */ KASSERT(cold || td->td_ucred != NULL, ("kernel trap doesn't have ucred")); switch (type) { case T_PAGEFLT: /* page fault */ (void) trap_pfault(frame, FALSE, eva); goto out; case T_DNA: #ifdef DEV_NPX if (PCB_USER_FPU(td->td_pcb)) panic("Unregistered use of FPU in kernel"); if (npxdna()) goto out; #endif break; case T_ARITHTRAP: /* arithmetic trap */ case T_XMMFLT: /* SIMD floating-point exception */ case T_FPOPFLT: /* FPU operand fetch fault */ /* * XXXKIB for now disable any FPU traps in kernel * handler registration seems to be overkill */ trap_fatal(frame, 0); goto out; /* * The following two traps can happen in * vm86 mode, and, if so, we want to handle * them specially. */ case T_PROTFLT: /* general protection fault */ case T_STKFLT: /* stack fault */ if (frame->tf_eflags & PSL_VM) { i = vm86_emulate((struct vm86frame *)frame); if (i == SIGTRAP) { type = T_TRCTRAP; goto kernel_trctrap; } if (i != 0) /* * returns to original process */ vm86_trap((struct vm86frame *)frame); goto out; } if (type == T_STKFLT) break; /* FALL THROUGH */ case T_SEGNPFLT: /* segment not present fault */ if (curpcb->pcb_flags & PCB_VM86CALL) break; /* * Invalid %fs's and %gs's can be created using * procfs or PT_SETREGS or by invalidating the * underlying LDT entry. This causes a fault * in kernel mode when the kernel attempts to * switch contexts. Lose the bad context * (XXX) so that we can continue, and generate * a signal. */ if (frame->tf_eip == (int)cpu_switch_load_gs) { curpcb->pcb_gs = 0; #if 0 PROC_LOCK(p); kern_psignal(p, SIGBUS); PROC_UNLOCK(p); #endif goto out; } if (td->td_intr_nesting_level != 0) break; /* * Invalid segment selectors and out of bounds * %eip's and %esp's can be set up in user mode. * This causes a fault in kernel mode when the * kernel tries to return to user mode. We want * to get this fault so that we can fix the * problem here and not have to check all the * selectors and pointers when the user changes * them. */ if (frame->tf_eip == (int)doreti_iret) { frame->tf_eip = (int)doreti_iret_fault; goto out; } if (frame->tf_eip == (int)doreti_popl_ds) { frame->tf_eip = (int)doreti_popl_ds_fault; goto out; } if (frame->tf_eip == (int)doreti_popl_es) { frame->tf_eip = (int)doreti_popl_es_fault; goto out; } if (frame->tf_eip == (int)doreti_popl_fs) { frame->tf_eip = (int)doreti_popl_fs_fault; goto out; } if (curpcb->pcb_onfault != NULL) { frame->tf_eip = (int)curpcb->pcb_onfault; goto out; } break; case T_TSSFLT: /* * PSL_NT can be set in user mode and isn't cleared * automatically when the kernel is entered. This * causes a TSS fault when the kernel attempts to * `iret' because the TSS link is uninitialized. We * want to get this fault so that we can fix the * problem here and not every time the kernel is * entered. */ if (frame->tf_eflags & PSL_NT) { frame->tf_eflags &= ~PSL_NT; goto out; } break; case T_TRCTRAP: /* trace trap */ kernel_trctrap: if (frame->tf_eip == (int)IDTVEC(lcall_syscall)) { /* * We've just entered system mode via the * syscall lcall. Continue single stepping * silently until the syscall handler has * saved the flags. */ goto out; } if (frame->tf_eip == (int)IDTVEC(lcall_syscall) + 1) { /* * The syscall handler has now saved the * flags. Stop single stepping it. */ frame->tf_eflags &= ~PSL_T; goto out; } /* * Ignore debug register trace traps due to * accesses in the user's address space, which * can happen under several conditions such as * if a user sets a watchpoint on a buffer and * then passes that buffer to a system call. * We still want to get TRCTRAPS for addresses * in kernel space because that is useful when * debugging the kernel. */ if (user_dbreg_trap() && !(curpcb->pcb_flags & PCB_VM86CALL)) { /* * Reset breakpoint bits because the * processor doesn't */ load_dr6(rdr6() & 0xfffffff0); goto out; } /* * FALLTHROUGH (TRCTRAP kernel mode, kernel address) */ case T_BPTFLT: /* * If KDB is enabled, let it handle the debugger trap. * Otherwise, debugger traps "can't happen". */ #ifdef KDB if (kdb_trap(type, 0, frame)) goto out; #endif break; #ifdef DEV_ISA case T_NMI: #ifdef POWERFAIL_NMI if (time_second - lastalert > 10) { log(LOG_WARNING, "NMI: power fail\n"); sysbeep(880, hz); lastalert = time_second; } goto out; #else /* !POWERFAIL_NMI */ /* machine/parity/power fail/"kitchen sink" faults */ if (isa_nmi(code) == 0) { #ifdef KDB /* * NMI can be hooked up to a pushbutton * for debugging. */ if (kdb_on_nmi) { printf ("NMI ... going to debugger\n"); kdb_trap(type, 0, frame); } #endif /* KDB */ goto out; } else if (panic_on_nmi == 0) goto out; /* FALLTHROUGH */ #endif /* POWERFAIL_NMI */ #endif /* DEV_ISA */ } trap_fatal(frame, eva); goto out; } /* Translate fault for emulators (e.g. Linux) */ if (*p->p_sysent->sv_transtrap) i = (*p->p_sysent->sv_transtrap)(i, type); ksiginfo_init_trap(&ksi); ksi.ksi_signo = i; ksi.ksi_code = ucode; ksi.ksi_addr = (void *)addr; ksi.ksi_trapno = type; if (uprintf_signal) { uprintf("pid %d comm %s: signal %d err %x code %d type %d " "addr 0x%x esp 0x%08x eip 0x%08x " "<%02x %02x %02x %02x %02x %02x %02x %02x>\n", p->p_pid, p->p_comm, i, frame->tf_err, ucode, type, addr, frame->tf_esp, frame->tf_eip, fubyte((void *)(frame->tf_eip + 0)), fubyte((void *)(frame->tf_eip + 1)), fubyte((void *)(frame->tf_eip + 2)), fubyte((void *)(frame->tf_eip + 3)), fubyte((void *)(frame->tf_eip + 4)), fubyte((void *)(frame->tf_eip + 5)), fubyte((void *)(frame->tf_eip + 6)), fubyte((void *)(frame->tf_eip + 7))); } KASSERT((read_eflags() & PSL_I) != 0, ("interrupts disabled")); trapsignal(td, &ksi); #ifdef DEBUG if (type <= MAX_TRAP_MSG) { uprintf("fatal process exception: %s", trap_msg[type]); if ((type == T_PAGEFLT) || (type == T_PROTFLT)) uprintf(", fault VA = 0x%lx", (u_long)eva); uprintf("\n"); } #endif user: userret(td, frame); KASSERT(PCB_USER_FPU(td->td_pcb), ("Return from trap with kernel FPU ctx leaked")); userout: out: return; } static int trap_pfault(frame, usermode, eva) struct trapframe *frame; int usermode; vm_offset_t eva; { vm_offset_t va; vm_map_t map; int rv = 0; vm_prot_t ftype; struct thread *td = curthread; struct proc *p = td->td_proc; if (__predict_false((td->td_pflags & TDP_NOFAULTING) != 0)) { /* * Due to both processor errata and lazy TLB invalidation when * access restrictions are removed from virtual pages, memory * accesses that are allowed by the physical mapping layer may * nonetheless cause one spurious page fault per virtual page. * When the thread is executing a "no faulting" section that * is bracketed by vm_fault_{disable,enable}_pagefaults(), * every page fault is treated as a spurious page fault, * unless it accesses the same virtual address as the most * recent page fault within the same "no faulting" section. */ if (td->td_md.md_spurflt_addr != eva || (td->td_pflags & TDP_RESETSPUR) != 0) { /* * Do nothing to the TLB. A stale TLB entry is * flushed automatically by a page fault. */ td->td_md.md_spurflt_addr = eva; td->td_pflags &= ~TDP_RESETSPUR; return (0); } } else { /* * If we get a page fault while in a critical section, then * it is most likely a fatal kernel page fault. The kernel * is already going to panic trying to get a sleep lock to * do the VM lookup, so just consider it a fatal trap so the * kernel can print out a useful trap message and even get * to the debugger. * * If we get a page fault while holding a non-sleepable * lock, then it is most likely a fatal kernel page fault. * If WITNESS is enabled, then it's going to whine about * bogus LORs with various VM locks, so just skip to the * fatal trap handling directly. */ if (td->td_critnest != 0 || WITNESS_CHECK(WARN_SLEEPOK | WARN_GIANTOK, NULL, "Kernel page fault") != 0) { trap_fatal(frame, eva); return (-1); } } va = trunc_page(eva); if (va >= KERNBASE) { /* * Don't allow user-mode faults in kernel address space. * An exception: if the faulting address is the invalid * instruction entry in the IDT, then the Intel Pentium * F00F bug workaround was triggered, and we need to * treat it is as an illegal instruction, and not a page * fault. */ #if defined(I586_CPU) && !defined(NO_F00F_HACK) if ((eva == (unsigned int)&idt[6]) && has_f00f_bug) return (-2); #endif if (usermode) goto nogo; map = kernel_map; } else { map = &p->p_vmspace->vm_map; /* * When accessing a user-space address, kernel must be * ready to accept the page fault, and provide a * handling routine. Since accessing the address * without the handler is a bug, do not try to handle * it normally, and panic immediately. */ if (!usermode && (td->td_intr_nesting_level != 0 || curpcb->pcb_onfault == NULL)) { trap_fatal(frame, eva); return (-1); } } /* * PGEX_I is defined only if the execute disable bit capability is * supported and enabled. */ if (frame->tf_err & PGEX_W) ftype = VM_PROT_WRITE; #if defined(PAE) || defined(PAE_TABLES) else if ((frame->tf_err & PGEX_I) && pg_nx != 0) ftype = VM_PROT_EXECUTE; #endif else ftype = VM_PROT_READ; /* Fault in the page. */ rv = vm_fault(map, va, ftype, VM_FAULT_NORMAL); if (rv == KERN_SUCCESS) { #ifdef HWPMC_HOOKS if (ftype == VM_PROT_READ || ftype == VM_PROT_WRITE) { PMC_SOFT_CALL_TF( , , page_fault, all, frame); if (ftype == VM_PROT_READ) PMC_SOFT_CALL_TF( , , page_fault, read, frame); else PMC_SOFT_CALL_TF( , , page_fault, write, frame); } #endif return (0); } nogo: if (!usermode) { if (td->td_intr_nesting_level == 0 && curpcb->pcb_onfault != NULL) { frame->tf_eip = (int)curpcb->pcb_onfault; return (0); } trap_fatal(frame, eva); return (-1); } return ((rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV); } static void trap_fatal(frame, eva) struct trapframe *frame; vm_offset_t eva; { int code, ss, esp; u_int type; struct soft_segment_descriptor softseg; char *msg; code = frame->tf_err; type = frame->tf_trapno; sdtossd(&gdt[IDXSEL(frame->tf_cs & 0xffff)].sd, &softseg); if (type <= MAX_TRAP_MSG) msg = trap_msg[type]; else msg = "UNKNOWN"; printf("\n\nFatal trap %d: %s while in %s mode\n", type, msg, frame->tf_eflags & PSL_VM ? "vm86" : ISPL(frame->tf_cs) == SEL_UPL ? "user" : "kernel"); #ifdef SMP /* two separate prints in case of a trap on an unmapped page */ printf("cpuid = %d; ", PCPU_GET(cpuid)); printf("apic id = %02x\n", PCPU_GET(apic_id)); #endif if (type == T_PAGEFLT) { printf("fault virtual address = 0x%x\n", eva); printf("fault code = %s %s, %s\n", code & PGEX_U ? "user" : "supervisor", code & PGEX_W ? "write" : "read", code & PGEX_P ? "protection violation" : "page not present"); } printf("instruction pointer = 0x%x:0x%x\n", frame->tf_cs & 0xffff, frame->tf_eip); - if ((ISPL(frame->tf_cs) == SEL_UPL) || (frame->tf_eflags & PSL_VM)) { + if (TF_HAS_STACKREGS(frame)) { ss = frame->tf_ss & 0xffff; esp = frame->tf_esp; } else { ss = GSEL(GDATA_SEL, SEL_KPL); esp = (int)&frame->tf_esp; } printf("stack pointer = 0x%x:0x%x\n", ss, esp); printf("frame pointer = 0x%x:0x%x\n", ss, frame->tf_ebp); printf("code segment = base 0x%x, limit 0x%x, type 0x%x\n", softseg.ssd_base, softseg.ssd_limit, softseg.ssd_type); printf(" = DPL %d, pres %d, def32 %d, gran %d\n", softseg.ssd_dpl, softseg.ssd_p, softseg.ssd_def32, softseg.ssd_gran); printf("processor eflags = "); if (frame->tf_eflags & PSL_T) printf("trace trap, "); if (frame->tf_eflags & PSL_I) printf("interrupt enabled, "); if (frame->tf_eflags & PSL_NT) printf("nested task, "); if (frame->tf_eflags & PSL_RF) printf("resume, "); if (frame->tf_eflags & PSL_VM) printf("vm86, "); printf("IOPL = %d\n", (frame->tf_eflags & PSL_IOPL) >> 12); printf("current process = %d (%s)\n", curproc->p_pid, curthread->td_name); #ifdef KDB if (debugger_on_panic || kdb_active) { frame->tf_err = eva; /* smuggle fault address to ddb */ if (kdb_trap(type, 0, frame)) { frame->tf_err = code; /* restore error code */ return; } frame->tf_err = code; /* restore error code */ } #endif printf("trap number = %d\n", type); if (type <= MAX_TRAP_MSG) panic("%s", trap_msg[type]); else panic("unknown/reserved trap"); } /* * Double fault handler. Called when a fault occurs while writing * a frame for a trap/exception onto the stack. This usually occurs * when the stack overflows (such is the case with infinite recursion, * for example). * * XXX Note that the current PTD gets replaced by IdlePTD when the * task switch occurs. This means that the stack that was active at * the time of the double fault is not available at unless * the machine was idle when the double fault occurred. The downside * of this is that "trace " in ddb won't work. */ void dblfault_handler() { #ifdef KDTRACE_HOOKS if (dtrace_doubletrap_func != NULL) (*dtrace_doubletrap_func)(); #endif printf("\nFatal double fault:\n"); printf("eip = 0x%x\n", PCPU_GET(common_tss.tss_eip)); printf("esp = 0x%x\n", PCPU_GET(common_tss.tss_esp)); printf("ebp = 0x%x\n", PCPU_GET(common_tss.tss_ebp)); #ifdef SMP /* two separate prints in case of a trap on an unmapped page */ printf("cpuid = %d; ", PCPU_GET(cpuid)); printf("apic id = %02x\n", PCPU_GET(apic_id)); #endif panic("double fault"); } int cpu_fetch_syscall_args(struct thread *td, struct syscall_args *sa) { struct proc *p; struct trapframe *frame; caddr_t params; long tmp; int error; p = td->td_proc; frame = td->td_frame; params = (caddr_t)frame->tf_esp + sizeof(int); sa->code = frame->tf_eax; /* * Need to check if this is a 32 bit or 64 bit syscall. */ if (sa->code == SYS_syscall) { /* * Code is first argument, followed by actual args. */ error = fueword(params, &tmp); if (error == -1) return (EFAULT); sa->code = tmp; params += sizeof(int); } else if (sa->code == SYS___syscall) { /* * Like syscall, but code is a quad, so as to maintain * quad alignment for the rest of the arguments. */ error = fueword(params, &tmp); if (error == -1) return (EFAULT); sa->code = tmp; params += sizeof(quad_t); } if (p->p_sysent->sv_mask) sa->code &= p->p_sysent->sv_mask; if (sa->code >= p->p_sysent->sv_size) sa->callp = &p->p_sysent->sv_table[0]; else sa->callp = &p->p_sysent->sv_table[sa->code]; sa->narg = sa->callp->sy_narg; if (params != NULL && sa->narg != 0) error = copyin(params, (caddr_t)sa->args, (u_int)(sa->narg * sizeof(int))); else error = 0; if (error == 0) { td->td_retval[0] = 0; td->td_retval[1] = frame->tf_edx; } return (error); } #include "../../kern/subr_syscall.c" /* * syscall - system call request C handler. A system call is * essentially treated as a trap by reusing the frame layout. */ void syscall(struct trapframe *frame) { struct thread *td; struct syscall_args sa; register_t orig_tf_eflags; int error; ksiginfo_t ksi; #ifdef DIAGNOSTIC - if (ISPL(frame->tf_cs) != SEL_UPL) { + if (!(TRAPF_USERMODE(frame) && + (curpcb->pcb_flags & PCB_VM86CALL) == 0)) { panic("syscall"); /* NOT REACHED */ } #endif orig_tf_eflags = frame->tf_eflags; td = curthread; td->td_frame = frame; error = syscallenter(td, &sa); /* * Traced syscall. */ if ((orig_tf_eflags & PSL_T) && !(orig_tf_eflags & PSL_VM)) { frame->tf_eflags &= ~PSL_T; ksiginfo_init_trap(&ksi); ksi.ksi_signo = SIGTRAP; ksi.ksi_code = TRAP_TRACE; ksi.ksi_addr = (void *)frame->tf_eip; trapsignal(td, &ksi); } KASSERT(PCB_USER_FPU(td->td_pcb), ("System call %s returning with kernel FPU ctx leaked", syscallname(td->td_proc, sa.code))); KASSERT(td->td_pcb->pcb_save == get_pcb_user_save_td(td), ("System call %s returning with mangled pcb_save", syscallname(td->td_proc, sa.code))); syscallret(td, error, &sa); } Index: head/sys/x86/include/frame.h =================================================================== --- head/sys/x86/include/frame.h (revision 305806) +++ head/sys/x86/include/frame.h (revision 305807) @@ -1,148 +1,158 @@ /*- * Copyright (c) 2003 Peter Wemm. * Copyright (c) 1990 The Regents of the University of California. * All rights reserved. * * This code is derived from software contributed to Berkeley by * William Jolitz. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)frame.h 5.2 (Berkeley) 1/18/91 * $FreeBSD$ */ #ifndef _MACHINE_FRAME_H_ #define _MACHINE_FRAME_H_ 1 /* * System stack frames. */ #ifdef __i386__ /* * Exception/Trap Stack Frame */ struct trapframe { int tf_fs; int tf_es; int tf_ds; int tf_edi; int tf_esi; int tf_ebp; int tf_isp; int tf_ebx; int tf_edx; int tf_ecx; int tf_eax; int tf_trapno; /* below portion defined in 386 hardware */ int tf_err; int tf_eip; int tf_cs; int tf_eflags; - /* below only when crossing rings (e.g. user to kernel) */ + /* below only when crossing rings (user to kernel) */ int tf_esp; int tf_ss; }; /* Superset of trap frame, for traps from virtual-8086 mode */ struct trapframe_vm86 { int tf_fs; int tf_es; int tf_ds; int tf_edi; int tf_esi; int tf_ebp; int tf_isp; int tf_ebx; int tf_edx; int tf_ecx; int tf_eax; int tf_trapno; /* below portion defined in 386 hardware */ int tf_err; int tf_eip; int tf_cs; int tf_eflags; - /* below only when crossing rings (e.g. user to kernel) */ + /* below only when crossing rings (user (including vm86) to kernel) */ int tf_esp; int tf_ss; - /* below only when switching out of VM86 mode */ + /* below only when crossing from vm86 mode to kernel */ int tf_vm86_es; int tf_vm86_ds; int tf_vm86_fs; int tf_vm86_gs; }; #endif /* __i386__ */ #ifdef __amd64__ /* * Exception/Trap Stack Frame * * The ordering of this is specifically so that we can take first 6 * the syscall arguments directly from the beginning of the frame. */ struct trapframe { register_t tf_rdi; register_t tf_rsi; register_t tf_rdx; register_t tf_rcx; register_t tf_r8; register_t tf_r9; register_t tf_rax; register_t tf_rbx; register_t tf_rbp; register_t tf_r10; register_t tf_r11; register_t tf_r12; register_t tf_r13; register_t tf_r14; register_t tf_r15; uint32_t tf_trapno; uint16_t tf_fs; uint16_t tf_gs; register_t tf_addr; uint32_t tf_flags; uint16_t tf_es; uint16_t tf_ds; /* below portion defined in hardware */ register_t tf_err; register_t tf_rip; register_t tf_cs; register_t tf_rflags; + /* below only when crossing rings (user to kernel) */ register_t tf_rsp; register_t tf_ss; }; #define TF_HASSEGS 0x1 #define TF_HASBASES 0x2 #define TF_HASFPXSTATE 0x4 #endif /* __amd64__ */ + +/* + * This alias for the MI TRAPF_USERMODE() should be used when we don't + * care about user mode itself, but need to know if a frame has stack + * registers. The difference is only logical, but on i386 the logic + * for using TRAPF_USERMODE() is complicated by sometimes treating vm86 + * bioscall mode (which is a special ring 3 user mode) as kernel mode. + */ +#define TF_HAS_STACKREGS(tf) TRAPF_USERMODE(tf) #endif /* _MACHINE_FRAME_H_ */