Index: head/sys/amd64/amd64/exception.S =================================================================== --- head/sys/amd64/amd64/exception.S (revision 297930) +++ head/sys/amd64/amd64/exception.S (revision 297931) @@ -1,928 +1,929 @@ /*- * Copyright (c) 1989, 1990 William F. Jolitz. * Copyright (c) 1990 The Regents of the University of California. * Copyright (c) 2007 The FreeBSD Foundation * All rights reserved. * * Portions of this software were developed by A. Joseph Koshy under * sponsorship from the FreeBSD Foundation and Google, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include "opt_atpic.h" #include "opt_compat.h" #include "opt_hwpmc_hooks.h" #include #include #include #include #include "assym.s" #ifdef KDTRACE_HOOKS .bss .globl dtrace_invop_jump_addr .align 8 .type dtrace_invop_jump_addr,@object .size dtrace_invop_jump_addr,8 dtrace_invop_jump_addr: .zero 8 .globl dtrace_invop_calltrap_addr .align 8 .type dtrace_invop_calltrap_addr,@object .size dtrace_invop_calltrap_addr,8 dtrace_invop_calltrap_addr: .zero 8 #endif .text #ifdef HWPMC_HOOKS ENTRY(start_exceptions) #endif /*****************************************************************************/ /* Trap handling */ /*****************************************************************************/ /* * Trap and fault vector routines. * * All traps are 'interrupt gates', SDT_SYSIGT. An interrupt gate pushes * state on the stack but also disables interrupts. This is important for * us for the use of the swapgs instruction. We cannot be interrupted * until the GS.base value is correct. For most traps, we automatically * then enable interrupts if the interrupted context had them enabled. * This is equivalent to the i386 port's use of SDT_SYS386TGT. * * The cpu will push a certain amount of state onto the kernel stack for * the current process. See amd64/include/frame.h. * This includes the current RFLAGS (status register, which includes * the interrupt disable state prior to the trap), the code segment register, * and the return instruction pointer are pushed by the cpu. The cpu * will also push an 'error' code for certain traps. We push a dummy * error code for those traps where the cpu doesn't in order to maintain * a consistent frame. We also push a contrived 'trap number'. * * The CPU does not push the general registers, so we must do that, and we * must restore them prior to calling 'iret'. The CPU adjusts %cs and %ss * but does not mess with %ds, %es, %gs or %fs. We swap the %gs base for * for the kernel mode operation shortly, without changes to the selector * loaded. Since superuser long mode works with any selectors loaded into * segment registers other then %cs, which makes them mostly unused in long * mode, and kernel does not reference %fs, leave them alone. The segment * registers are reloaded on return to the usermode. */ MCOUNT_LABEL(user) MCOUNT_LABEL(btrap) /* Traps that we leave interrupts disabled for.. */ #define TRAP_NOEN(a) \ subq $TF_RIP,%rsp; \ movl $(a),TF_TRAPNO(%rsp) ; \ movq $0,TF_ADDR(%rsp) ; \ movq $0,TF_ERR(%rsp) ; \ jmp alltraps_noen IDTVEC(dbg) TRAP_NOEN(T_TRCTRAP) IDTVEC(bpt) TRAP_NOEN(T_BPTFLT) #ifdef KDTRACE_HOOKS IDTVEC(dtrace_ret) TRAP_NOEN(T_DTRACE_RET) #endif /* Regular traps; The cpu does not supply tf_err for these. */ #define TRAP(a) \ subq $TF_RIP,%rsp; \ movl $(a),TF_TRAPNO(%rsp) ; \ movq $0,TF_ADDR(%rsp) ; \ movq $0,TF_ERR(%rsp) ; \ jmp alltraps IDTVEC(div) TRAP(T_DIVIDE) IDTVEC(ofl) TRAP(T_OFLOW) IDTVEC(bnd) TRAP(T_BOUND) IDTVEC(ill) TRAP(T_PRIVINFLT) IDTVEC(dna) TRAP(T_DNA) IDTVEC(fpusegm) TRAP(T_FPOPFLT) IDTVEC(mchk) TRAP(T_MCHK) IDTVEC(rsvd) TRAP(T_RESERVED) IDTVEC(fpu) TRAP(T_ARITHTRAP) IDTVEC(xmm) TRAP(T_XMMFLT) /* This group of traps have tf_err already pushed by the cpu */ #define TRAP_ERR(a) \ subq $TF_ERR,%rsp; \ movl $(a),TF_TRAPNO(%rsp) ; \ movq $0,TF_ADDR(%rsp) ; \ jmp alltraps IDTVEC(tss) TRAP_ERR(T_TSSFLT) IDTVEC(missing) subq $TF_ERR,%rsp movl $T_SEGNPFLT,TF_TRAPNO(%rsp) jmp prot_addrf IDTVEC(stk) subq $TF_ERR,%rsp movl $T_STKFLT,TF_TRAPNO(%rsp) jmp prot_addrf IDTVEC(align) TRAP_ERR(T_ALIGNFLT) /* * alltraps entry point. Use swapgs if this is the first time in the * kernel from userland. Reenable interrupts if they were enabled * before the trap. This approximates SDT_SYS386TGT on the i386 port. */ SUPERALIGN_TEXT .globl alltraps .type alltraps,@function alltraps: movq %rdi,TF_RDI(%rsp) testb $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */ jz alltraps_testi /* already running with kernel GS.base */ swapgs movq PCPU(CURPCB),%rdi andl $~PCB_FULL_IRET,PCB_FLAGS(%rdi) movw %fs,TF_FS(%rsp) movw %gs,TF_GS(%rsp) movw %es,TF_ES(%rsp) movw %ds,TF_DS(%rsp) alltraps_testi: testl $PSL_I,TF_RFLAGS(%rsp) jz alltraps_pushregs_no_rdi sti alltraps_pushregs_no_rdi: movq %rsi,TF_RSI(%rsp) movq %rdx,TF_RDX(%rsp) movq %rcx,TF_RCX(%rsp) movq %r8,TF_R8(%rsp) movq %r9,TF_R9(%rsp) movq %rax,TF_RAX(%rsp) movq %rbx,TF_RBX(%rsp) movq %rbp,TF_RBP(%rsp) movq %r10,TF_R10(%rsp) movq %r11,TF_R11(%rsp) movq %r12,TF_R12(%rsp) movq %r13,TF_R13(%rsp) movq %r14,TF_R14(%rsp) movq %r15,TF_R15(%rsp) movl $TF_HASSEGS,TF_FLAGS(%rsp) cld FAKE_MCOUNT(TF_RIP(%rsp)) #ifdef KDTRACE_HOOKS /* * DTrace Function Boundary Trace (fbt) probes are triggered * by int3 (0xcc) which causes the #BP (T_BPTFLT) breakpoint * interrupt. For all other trap types, just handle them in * the usual way. */ testb $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */ jnz calltrap /* ignore userland traps */ cmpl $T_BPTFLT,TF_TRAPNO(%rsp) jne calltrap /* Check if there is no DTrace hook registered. */ cmpq $0,dtrace_invop_jump_addr je calltrap /* * Set our jump address for the jump back in the event that * the breakpoint wasn't caused by DTrace at all. */ movq $calltrap,dtrace_invop_calltrap_addr(%rip) /* Jump to the code hooked in by DTrace. */ jmpq *dtrace_invop_jump_addr #endif .globl calltrap .type calltrap,@function calltrap: movq %rsp,%rdi call trap_check MEXITCOUNT jmp doreti /* Handle any pending ASTs */ /* * alltraps_noen entry point. Unlike alltraps above, we want to * leave the interrupts disabled. This corresponds to * SDT_SYS386IGT on the i386 port. */ SUPERALIGN_TEXT .globl alltraps_noen .type alltraps_noen,@function alltraps_noen: movq %rdi,TF_RDI(%rsp) testb $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */ jz 1f /* already running with kernel GS.base */ swapgs movq PCPU(CURPCB),%rdi andl $~PCB_FULL_IRET,PCB_FLAGS(%rdi) 1: movw %fs,TF_FS(%rsp) movw %gs,TF_GS(%rsp) movw %es,TF_ES(%rsp) movw %ds,TF_DS(%rsp) jmp alltraps_pushregs_no_rdi IDTVEC(dblfault) subq $TF_ERR,%rsp movl $T_DOUBLEFLT,TF_TRAPNO(%rsp) movq $0,TF_ADDR(%rsp) movq $0,TF_ERR(%rsp) movq %rdi,TF_RDI(%rsp) movq %rsi,TF_RSI(%rsp) movq %rdx,TF_RDX(%rsp) movq %rcx,TF_RCX(%rsp) movq %r8,TF_R8(%rsp) movq %r9,TF_R9(%rsp) movq %rax,TF_RAX(%rsp) movq %rbx,TF_RBX(%rsp) movq %rbp,TF_RBP(%rsp) movq %r10,TF_R10(%rsp) movq %r11,TF_R11(%rsp) movq %r12,TF_R12(%rsp) movq %r13,TF_R13(%rsp) movq %r14,TF_R14(%rsp) movq %r15,TF_R15(%rsp) movw %fs,TF_FS(%rsp) movw %gs,TF_GS(%rsp) movw %es,TF_ES(%rsp) movw %ds,TF_DS(%rsp) movl $TF_HASSEGS,TF_FLAGS(%rsp) cld testb $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */ jz 1f /* already running with kernel GS.base */ swapgs 1: movq %rsp,%rdi call dblfault_handler 2: hlt jmp 2b IDTVEC(page) subq $TF_ERR,%rsp movl $T_PAGEFLT,TF_TRAPNO(%rsp) movq %rdi,TF_RDI(%rsp) /* free up a GP register */ testb $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */ jz 1f /* already running with kernel GS.base */ swapgs movq PCPU(CURPCB),%rdi andl $~PCB_FULL_IRET,PCB_FLAGS(%rdi) 1: movq %cr2,%rdi /* preserve %cr2 before .. */ movq %rdi,TF_ADDR(%rsp) /* enabling interrupts. */ movw %fs,TF_FS(%rsp) movw %gs,TF_GS(%rsp) movw %es,TF_ES(%rsp) movw %ds,TF_DS(%rsp) testl $PSL_I,TF_RFLAGS(%rsp) jz alltraps_pushregs_no_rdi sti jmp alltraps_pushregs_no_rdi /* * We have to special-case this one. If we get a trap in doreti() at * the iretq stage, we'll reenter with the wrong gs state. We'll have * to do a special the swapgs in this case even coming from the kernel. * XXX linux has a trap handler for their equivalent of load_gs(). */ IDTVEC(prot) subq $TF_ERR,%rsp movl $T_PROTFLT,TF_TRAPNO(%rsp) prot_addrf: movq $0,TF_ADDR(%rsp) movq %rdi,TF_RDI(%rsp) /* free up a GP register */ leaq doreti_iret(%rip),%rdi cmpq %rdi,TF_RIP(%rsp) je 1f /* kernel but with user gsbase!! */ testb $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */ jz 2f /* already running with kernel GS.base */ 1: swapgs 2: movq PCPU(CURPCB),%rdi orl $PCB_FULL_IRET,PCB_FLAGS(%rdi) /* always full iret from GPF */ movw %fs,TF_FS(%rsp) movw %gs,TF_GS(%rsp) movw %es,TF_ES(%rsp) movw %ds,TF_DS(%rsp) testl $PSL_I,TF_RFLAGS(%rsp) jz alltraps_pushregs_no_rdi sti jmp alltraps_pushregs_no_rdi /* * Fast syscall entry point. We enter here with just our new %cs/%ss set, * and the new privilige level. We are still running on the old user stack * pointer. We have to juggle a few things around to find our stack etc. * swapgs gives us access to our PCPU space only. * * We do not support invoking this from a custom %cs or %ss (e.g. using * entries from an LDT). */ IDTVEC(fast_syscall) swapgs movq %rsp,PCPU(SCRATCH_RSP) movq PCPU(RSP0),%rsp /* Now emulate a trapframe. Make the 8 byte alignment odd for call. */ subq $TF_SIZE,%rsp /* defer TF_RSP till we have a spare register */ movq %r11,TF_RFLAGS(%rsp) movq %rcx,TF_RIP(%rsp) /* %rcx original value is in %r10 */ movq PCPU(SCRATCH_RSP),%r11 /* %r11 already saved */ movq %r11,TF_RSP(%rsp) /* user stack pointer */ movw %fs,TF_FS(%rsp) movw %gs,TF_GS(%rsp) movw %es,TF_ES(%rsp) movw %ds,TF_DS(%rsp) movq PCPU(CURPCB),%r11 andl $~PCB_FULL_IRET,PCB_FLAGS(%r11) sti movq $KUDSEL,TF_SS(%rsp) movq $KUCSEL,TF_CS(%rsp) movq $2,TF_ERR(%rsp) movq %rdi,TF_RDI(%rsp) /* arg 1 */ movq %rsi,TF_RSI(%rsp) /* arg 2 */ movq %rdx,TF_RDX(%rsp) /* arg 3 */ movq %r10,TF_RCX(%rsp) /* arg 4 */ movq %r8,TF_R8(%rsp) /* arg 5 */ movq %r9,TF_R9(%rsp) /* arg 6 */ movq %rax,TF_RAX(%rsp) /* syscall number */ movq %rbx,TF_RBX(%rsp) /* C preserved */ movq %rbp,TF_RBP(%rsp) /* C preserved */ movq %r12,TF_R12(%rsp) /* C preserved */ movq %r13,TF_R13(%rsp) /* C preserved */ movq %r14,TF_R14(%rsp) /* C preserved */ movq %r15,TF_R15(%rsp) /* C preserved */ movl $TF_HASSEGS,TF_FLAGS(%rsp) cld FAKE_MCOUNT(TF_RIP(%rsp)) movq PCPU(CURTHREAD),%rdi movq %rsp,TD_FRAME(%rdi) movl TF_RFLAGS(%rsp),%esi andl $PSL_T,%esi call amd64_syscall 1: movq PCPU(CURPCB),%rax /* Disable interrupts before testing PCB_FULL_IRET. */ cli testl $PCB_FULL_IRET,PCB_FLAGS(%rax) jnz 3f /* Check for and handle AST's on return to userland. */ movq PCPU(CURTHREAD),%rax testl $TDF_ASTPENDING | TDF_NEEDRESCHED,TD_FLAGS(%rax) jne 2f /* Restore preserved registers. */ MEXITCOUNT movq TF_RDI(%rsp),%rdi /* bonus; preserve arg 1 */ movq TF_RSI(%rsp),%rsi /* bonus: preserve arg 2 */ movq TF_RDX(%rsp),%rdx /* return value 2 */ movq TF_RAX(%rsp),%rax /* return value 1 */ movq TF_RFLAGS(%rsp),%r11 /* original %rflags */ movq TF_RIP(%rsp),%rcx /* original %rip */ movq TF_RSP(%rsp),%rsp /* user stack pointer */ swapgs sysretq 2: /* AST scheduled. */ sti movq %rsp,%rdi call ast jmp 1b 3: /* Requested full context restore, use doreti for that. */ MEXITCOUNT jmp doreti /* * Here for CYA insurance, in case a "syscall" instruction gets * issued from 32 bit compatability mode. MSR_CSTAR has to point * to *something* if EFER_SCE is enabled. */ IDTVEC(fast_syscall32) sysret /* * NMI handling is special. * * First, NMIs do not respect the state of the processor's RFLAGS.IF * bit. The NMI handler may be entered at any time, including when * the processor is in a critical section with RFLAGS.IF == 0. * The processor's GS.base value could be invalid on entry to the * handler. * * Second, the processor treats NMIs specially, blocking further NMIs * until an 'iretq' instruction is executed. We thus need to execute * the NMI handler with interrupts disabled, to prevent a nested interrupt * from executing an 'iretq' instruction and inadvertently taking the * processor out of NMI mode. * * Third, the NMI handler runs on its own stack (tss_ist2). The canonical * GS.base value for the processor is stored just above the bottom of its * NMI stack. For NMIs taken from kernel mode, the current value in * the processor's GS.base is saved at entry to C-preserved register %r12, * the canonical value for GS.base is then loaded into the processor, and * the saved value is restored at exit time. For NMIs taken from user mode, * the cheaper 'SWAPGS' instructions are used for swapping GS.base. */ IDTVEC(nmi) subq $TF_RIP,%rsp movl $(T_NMI),TF_TRAPNO(%rsp) movq $0,TF_ADDR(%rsp) movq $0,TF_ERR(%rsp) movq %rdi,TF_RDI(%rsp) movq %rsi,TF_RSI(%rsp) movq %rdx,TF_RDX(%rsp) movq %rcx,TF_RCX(%rsp) movq %r8,TF_R8(%rsp) movq %r9,TF_R9(%rsp) movq %rax,TF_RAX(%rsp) movq %rbx,TF_RBX(%rsp) movq %rbp,TF_RBP(%rsp) movq %r10,TF_R10(%rsp) movq %r11,TF_R11(%rsp) movq %r12,TF_R12(%rsp) movq %r13,TF_R13(%rsp) movq %r14,TF_R14(%rsp) movq %r15,TF_R15(%rsp) movw %fs,TF_FS(%rsp) movw %gs,TF_GS(%rsp) movw %es,TF_ES(%rsp) movw %ds,TF_DS(%rsp) movl $TF_HASSEGS,TF_FLAGS(%rsp) cld xorl %ebx,%ebx testb $SEL_RPL_MASK,TF_CS(%rsp) jnz nmi_fromuserspace /* * We've interrupted the kernel. Preserve GS.base in %r12. */ movl $MSR_GSBASE,%ecx rdmsr movq %rax,%r12 shlq $32,%rdx orq %rdx,%r12 /* Retrieve and load the canonical value for GS.base. */ movq TF_SIZE(%rsp),%rdx movl %edx,%eax shrq $32,%rdx wrmsr jmp nmi_calltrap nmi_fromuserspace: incl %ebx swapgs /* Note: this label is also used by ddb and gdb: */ nmi_calltrap: FAKE_MCOUNT(TF_RIP(%rsp)) movq %rsp,%rdi call trap MEXITCOUNT #ifdef HWPMC_HOOKS /* * Capture a userspace callchain if needed. * * - Check if the current trap was from user mode. * - Check if the current thread is valid. * - Check if the thread requires a user call chain to be * captured. * * We are still in NMI mode at this point. */ testl %ebx,%ebx jz nocallchain /* not from userspace */ movq PCPU(CURTHREAD),%rax orq %rax,%rax /* curthread present? */ jz nocallchain testl $TDP_CALLCHAIN,TD_PFLAGS(%rax) /* flagged for capture? */ jz nocallchain /* * A user callchain is to be captured, so: * - Move execution to the regular kernel stack, to allow for * nested NMI interrupts. * - Take the processor out of "NMI" mode by faking an "iret". * - Enable interrupts, so that copyin() can work. */ movq %rsp,%rsi /* source stack pointer */ movq $TF_SIZE,%rcx movq PCPU(RSP0),%rdx subq %rcx,%rdx movq %rdx,%rdi /* destination stack pointer */ shrq $3,%rcx /* trap frame size in long words */ cld rep movsq /* copy trapframe */ movl %ss,%eax pushq %rax /* tf_ss */ pushq %rdx /* tf_rsp (on kernel stack) */ pushfq /* tf_rflags */ movl %cs,%eax pushq %rax /* tf_cs */ pushq $outofnmi /* tf_rip */ iretq outofnmi: /* * At this point the processor has exited NMI mode and is running * with interrupts turned off on the normal kernel stack. * * If a pending NMI gets recognized at or after this point, it * will cause a kernel callchain to be traced. * * We turn interrupts back on, and call the user callchain capture hook. */ movq pmc_hook,%rax orq %rax,%rax jz nocallchain movq PCPU(CURTHREAD),%rdi /* thread */ movq $PMC_FN_USER_CALLCHAIN,%rsi /* command */ movq %rsp,%rdx /* frame */ sti call *%rax cli nocallchain: #endif testl %ebx,%ebx jnz doreti_exit nmi_kernelexit: /* * Put back the preserved MSR_GSBASE value. */ movl $MSR_GSBASE,%ecx movq %r12,%rdx movl %edx,%eax shrq $32,%rdx wrmsr nmi_restoreregs: movq TF_RDI(%rsp),%rdi movq TF_RSI(%rsp),%rsi movq TF_RDX(%rsp),%rdx movq TF_RCX(%rsp),%rcx movq TF_R8(%rsp),%r8 movq TF_R9(%rsp),%r9 movq TF_RAX(%rsp),%rax movq TF_RBX(%rsp),%rbx movq TF_RBP(%rsp),%rbp movq TF_R10(%rsp),%r10 movq TF_R11(%rsp),%r11 movq TF_R12(%rsp),%r12 movq TF_R13(%rsp),%r13 movq TF_R14(%rsp),%r14 movq TF_R15(%rsp),%r15 addq $TF_RIP,%rsp jmp doreti_iret ENTRY(fork_trampoline) movq %r12,%rdi /* function */ movq %rbx,%rsi /* arg1 */ movq %rsp,%rdx /* trapframe pointer */ call fork_exit MEXITCOUNT jmp doreti /* Handle any ASTs */ /* * To efficiently implement classification of trap and interrupt handlers * for profiling, there must be only trap handlers between the labels btrap * and bintr, and only interrupt handlers between the labels bintr and * eintr. This is implemented (partly) by including files that contain * some of the handlers. Before including the files, set up a normal asm * environment so that the included files doen't need to know that they are * included. */ #ifdef COMPAT_FREEBSD32 .data .p2align 4 .text SUPERALIGN_TEXT #include #endif .data .p2align 4 .text SUPERALIGN_TEXT MCOUNT_LABEL(bintr) #include #ifdef DEV_ATPIC .data .p2align 4 .text SUPERALIGN_TEXT #include #endif .text MCOUNT_LABEL(eintr) /* * void doreti(struct trapframe) * * Handle return from interrupts, traps and syscalls. */ .text SUPERALIGN_TEXT .type doreti,@function + .globl doreti doreti: FAKE_MCOUNT($bintr) /* init "from" bintr -> doreti */ /* * Check if ASTs can be handled now. */ testb $SEL_RPL_MASK,TF_CS(%rsp) /* are we returning to user mode? */ jz doreti_exit /* can't handle ASTs now if not */ doreti_ast: /* * Check for ASTs atomically with returning. Disabling CPU * interrupts provides sufficient locking even in the SMP case, * since we will be informed of any new ASTs by an IPI. */ cli movq PCPU(CURTHREAD),%rax testl $TDF_ASTPENDING | TDF_NEEDRESCHED,TD_FLAGS(%rax) je doreti_exit sti movq %rsp,%rdi /* pass a pointer to the trapframe */ call ast jmp doreti_ast /* * doreti_exit: pop registers, iret. * * The segment register pop is a special case, since it may * fault if (for example) a sigreturn specifies bad segment * registers. The fault is handled in trap.c. */ doreti_exit: MEXITCOUNT movq PCPU(CURPCB),%r8 /* * Do not reload segment registers for kernel. * Since we do not reload segments registers with sane * values on kernel entry, descriptors referenced by * segments registers might be not valid. This is fatal * for user mode, but is not a problem for the kernel. */ testb $SEL_RPL_MASK,TF_CS(%rsp) jz ld_regs testl $PCB_FULL_IRET,PCB_FLAGS(%r8) jz ld_regs testl $TF_HASSEGS,TF_FLAGS(%rsp) je set_segs do_segs: /* Restore %fs and fsbase */ movw TF_FS(%rsp),%ax .globl ld_fs ld_fs: movw %ax,%fs cmpw $KUF32SEL,%ax jne 1f movl $MSR_FSBASE,%ecx movl PCB_FSBASE(%r8),%eax movl PCB_FSBASE+4(%r8),%edx .globl ld_fsbase ld_fsbase: wrmsr 1: /* Restore %gs and gsbase */ movw TF_GS(%rsp),%si pushfq cli movl $MSR_GSBASE,%ecx /* Save current kernel %gs base into %r12d:%r13d */ rdmsr movl %eax,%r12d movl %edx,%r13d .globl ld_gs ld_gs: movw %si,%gs /* Save user %gs base into %r14d:%r15d */ rdmsr movl %eax,%r14d movl %edx,%r15d /* Restore kernel %gs base */ movl %r12d,%eax movl %r13d,%edx wrmsr popfq /* * Restore user %gs base, either from PCB if used for TLS, or * from the previously saved msr read. */ movl $MSR_KGSBASE,%ecx cmpw $KUG32SEL,%si jne 1f movl PCB_GSBASE(%r8),%eax movl PCB_GSBASE+4(%r8),%edx jmp ld_gsbase 1: movl %r14d,%eax movl %r15d,%edx .globl ld_gsbase ld_gsbase: wrmsr /* May trap if non-canonical, but only for TLS. */ .globl ld_es ld_es: movw TF_ES(%rsp),%es .globl ld_ds ld_ds: movw TF_DS(%rsp),%ds ld_regs: movq TF_RDI(%rsp),%rdi movq TF_RSI(%rsp),%rsi movq TF_RDX(%rsp),%rdx movq TF_RCX(%rsp),%rcx movq TF_R8(%rsp),%r8 movq TF_R9(%rsp),%r9 movq TF_RAX(%rsp),%rax movq TF_RBX(%rsp),%rbx movq TF_RBP(%rsp),%rbp movq TF_R10(%rsp),%r10 movq TF_R11(%rsp),%r11 movq TF_R12(%rsp),%r12 movq TF_R13(%rsp),%r13 movq TF_R14(%rsp),%r14 movq TF_R15(%rsp),%r15 testb $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */ jz 1f /* keep running with kernel GS.base */ cli swapgs 1: addq $TF_RIP,%rsp /* skip over tf_err, tf_trapno */ .globl doreti_iret doreti_iret: iretq set_segs: movw $KUDSEL,%ax movw %ax,TF_DS(%rsp) movw %ax,TF_ES(%rsp) movw $KUF32SEL,TF_FS(%rsp) movw $KUG32SEL,TF_GS(%rsp) jmp do_segs /* * doreti_iret_fault. Alternative return code for * the case where we get a fault in the doreti_exit code * above. trap() (amd64/amd64/trap.c) catches this specific * case, sends the process a signal and continues in the * corresponding place in the code below. */ ALIGN_TEXT .globl doreti_iret_fault doreti_iret_fault: subq $TF_RIP,%rsp /* space including tf_err, tf_trapno */ testl $PSL_I,TF_RFLAGS(%rsp) jz 1f sti 1: movw %fs,TF_FS(%rsp) movw %gs,TF_GS(%rsp) movw %es,TF_ES(%rsp) movw %ds,TF_DS(%rsp) movl $TF_HASSEGS,TF_FLAGS(%rsp) movq %rdi,TF_RDI(%rsp) movq %rsi,TF_RSI(%rsp) movq %rdx,TF_RDX(%rsp) movq %rcx,TF_RCX(%rsp) movq %r8,TF_R8(%rsp) movq %r9,TF_R9(%rsp) movq %rax,TF_RAX(%rsp) movq %rbx,TF_RBX(%rsp) movq %rbp,TF_RBP(%rsp) movq %r10,TF_R10(%rsp) movq %r11,TF_R11(%rsp) movq %r12,TF_R12(%rsp) movq %r13,TF_R13(%rsp) movq %r14,TF_R14(%rsp) movq %r15,TF_R15(%rsp) movl $T_PROTFLT,TF_TRAPNO(%rsp) movq $0,TF_ERR(%rsp) /* XXX should be the error code */ movq $0,TF_ADDR(%rsp) FAKE_MCOUNT(TF_RIP(%rsp)) jmp calltrap ALIGN_TEXT .globl ds_load_fault ds_load_fault: movl $T_PROTFLT,TF_TRAPNO(%rsp) testl $PSL_I,TF_RFLAGS(%rsp) jz 1f sti 1: movq %rsp,%rdi call trap movw $KUDSEL,TF_DS(%rsp) jmp doreti ALIGN_TEXT .globl es_load_fault es_load_fault: movl $T_PROTFLT,TF_TRAPNO(%rsp) testl $PSL_I,TF_RFLAGS(%rsp) jz 1f sti 1: movq %rsp,%rdi call trap movw $KUDSEL,TF_ES(%rsp) jmp doreti ALIGN_TEXT .globl fs_load_fault fs_load_fault: testl $PSL_I,TF_RFLAGS(%rsp) jz 1f sti 1: movl $T_PROTFLT,TF_TRAPNO(%rsp) movq %rsp,%rdi call trap movw $KUF32SEL,TF_FS(%rsp) jmp doreti ALIGN_TEXT .globl gs_load_fault gs_load_fault: popfq movl $T_PROTFLT,TF_TRAPNO(%rsp) testl $PSL_I,TF_RFLAGS(%rsp) jz 1f sti 1: movq %rsp,%rdi call trap movw $KUG32SEL,TF_GS(%rsp) jmp doreti ALIGN_TEXT .globl fsbase_load_fault fsbase_load_fault: movl $T_PROTFLT,TF_TRAPNO(%rsp) testl $PSL_I,TF_RFLAGS(%rsp) jz 1f sti 1: movq %rsp,%rdi call trap movq PCPU(CURTHREAD),%r8 movq TD_PCB(%r8),%r8 movq $0,PCB_FSBASE(%r8) jmp doreti ALIGN_TEXT .globl gsbase_load_fault gsbase_load_fault: movl $T_PROTFLT,TF_TRAPNO(%rsp) testl $PSL_I,TF_RFLAGS(%rsp) jz 1f sti 1: movq %rsp,%rdi call trap movq PCPU(CURTHREAD),%r8 movq TD_PCB(%r8),%r8 movq $0,PCB_GSBASE(%r8) jmp doreti #ifdef HWPMC_HOOKS ENTRY(end_exceptions) #endif Index: head/sys/i386/i386/exception.s =================================================================== --- head/sys/i386/i386/exception.s (revision 297930) +++ head/sys/i386/i386/exception.s (revision 297931) @@ -1,481 +1,482 @@ /*- * Copyright (c) 1989, 1990 William F. Jolitz. * Copyright (c) 1990 The Regents of the University of California. * Copyright (c) 2007 The FreeBSD Foundation * All rights reserved. * * Portions of this software were developed by A. Joseph Koshy under * sponsorship from the FreeBSD Foundation and Google, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include "opt_apic.h" #include "opt_atpic.h" #include "opt_hwpmc_hooks.h" #include "opt_npx.h" #include #include #include #include "assym.s" #define SEL_RPL_MASK 0x0003 #define GSEL_KPL 0x0020 /* GSEL(GCODE_SEL, SEL_KPL) */ #ifdef KDTRACE_HOOKS .bss .globl dtrace_invop_jump_addr .align 4 .type dtrace_invop_jump_addr, @object .size dtrace_invop_jump_addr, 4 dtrace_invop_jump_addr: .zero 4 .globl dtrace_invop_calltrap_addr .align 4 .type dtrace_invop_calltrap_addr, @object .size dtrace_invop_calltrap_addr, 4 dtrace_invop_calltrap_addr: .zero 8 #endif .text #ifdef HWPMC_HOOKS ENTRY(start_exceptions) #endif /*****************************************************************************/ /* Trap handling */ /*****************************************************************************/ /* * Trap and fault vector routines. * * Most traps are 'trap gates', SDT_SYS386TGT. A trap gate pushes state on * the stack that mostly looks like an interrupt, but does not disable * interrupts. A few of the traps we are use are interrupt gates, * SDT_SYS386IGT, which are nearly the same thing except interrupts are * disabled on entry. * * The cpu will push a certain amount of state onto the kernel stack for * the current process. The amount of state depends on the type of trap * and whether the trap crossed rings or not. See i386/include/frame.h. * At the very least the current EFLAGS (status register, which includes * the interrupt disable state prior to the trap), the code segment register, * and the return instruction pointer are pushed by the cpu. The cpu * will also push an 'error' code for certain traps. We push a dummy * error code for those traps where the cpu doesn't in order to maintain * a consistent frame. We also push a contrived 'trap number'. * * The cpu does not push the general registers, we must do that, and we * must restore them prior to calling 'iret'. The cpu adjusts the %cs and * %ss segment registers, but does not mess with %ds, %es, or %fs. Thus we * must load them with appropriate values for supervisor mode operation. */ MCOUNT_LABEL(user) MCOUNT_LABEL(btrap) #define TRAP(a) pushl $(a) ; jmp alltraps IDTVEC(div) pushl $0; TRAP(T_DIVIDE) IDTVEC(dbg) pushl $0; TRAP(T_TRCTRAP) IDTVEC(nmi) pushl $0; TRAP(T_NMI) IDTVEC(bpt) pushl $0; TRAP(T_BPTFLT) IDTVEC(dtrace_ret) pushl $0; TRAP(T_DTRACE_RET) IDTVEC(ofl) pushl $0; TRAP(T_OFLOW) IDTVEC(bnd) pushl $0; TRAP(T_BOUND) #ifndef KDTRACE_HOOKS IDTVEC(ill) pushl $0; TRAP(T_PRIVINFLT) #endif IDTVEC(dna) pushl $0; TRAP(T_DNA) IDTVEC(fpusegm) pushl $0; TRAP(T_FPOPFLT) IDTVEC(tss) TRAP(T_TSSFLT) IDTVEC(missing) TRAP(T_SEGNPFLT) IDTVEC(stk) TRAP(T_STKFLT) IDTVEC(prot) TRAP(T_PROTFLT) IDTVEC(page) TRAP(T_PAGEFLT) IDTVEC(mchk) pushl $0; TRAP(T_MCHK) IDTVEC(rsvd) pushl $0; TRAP(T_RESERVED) IDTVEC(fpu) pushl $0; TRAP(T_ARITHTRAP) IDTVEC(align) TRAP(T_ALIGNFLT) IDTVEC(xmm) pushl $0; TRAP(T_XMMFLT) /* * All traps except ones for syscalls jump to alltraps. If * interrupts were enabled when the trap occurred, then interrupts * are enabled now if the trap was through a trap gate, else * disabled if the trap was through an interrupt gate. Note that * int0x80_syscall is a trap gate. Interrupt gates are used by * page faults, non-maskable interrupts, debug and breakpoint * exceptions. */ SUPERALIGN_TEXT .globl alltraps .type alltraps,@function alltraps: pushal pushl $0 movw %ds,(%esp) pushl $0 movw %es,(%esp) pushl $0 movw %fs,(%esp) alltraps_with_regs_pushed: SET_KERNEL_SREGS cld FAKE_MCOUNT(TF_EIP(%esp)) calltrap: pushl %esp call trap add $4, %esp /* * Return via doreti to handle ASTs. */ MEXITCOUNT jmp doreti /* * Privileged instruction fault. */ #ifdef KDTRACE_HOOKS SUPERALIGN_TEXT IDTVEC(ill) /* Check if there is no DTrace hook registered. */ cmpl $0,dtrace_invop_jump_addr je norm_ill /* Check if this is a user fault. */ cmpl $GSEL_KPL, 4(%esp) /* Check the code segment. */ /* If so, just handle it as a normal trap. */ jne norm_ill /* * This is a kernel instruction fault that might have been caused * by a DTrace provider. */ pushal /* Push all registers onto the stack. */ /* * Set our jump address for the jump back in the event that * the exception wasn't caused by DTrace at all. */ movl $norm_ill, dtrace_invop_calltrap_addr /* Jump to the code hooked in by DTrace. */ jmpl *dtrace_invop_jump_addr /* * Process the instruction fault in the normal way. */ norm_ill: pushl $0 TRAP(T_PRIVINFLT) #endif /* * Call gate entry for syscalls (lcall 7,0). * This is used by FreeBSD 1.x a.out executables and "old" NetBSD executables. * * The intersegment call has been set up to specify one dummy parameter. * This leaves a place to put eflags so that the call frame can be * converted to a trap frame. Note that the eflags is (semi-)bogusly * pushed into (what will be) tf_err and then copied later into the * final spot. It has to be done this way because esp can't be just * temporarily altered for the pushfl - an interrupt might come in * and clobber the saved cs/eip. */ SUPERALIGN_TEXT IDTVEC(lcall_syscall) pushfl /* save eflags */ popl 8(%esp) /* shuffle into tf_eflags */ pushl $7 /* sizeof "lcall 7,0" */ subl $4,%esp /* skip over tf_trapno */ pushal pushl $0 movw %ds,(%esp) pushl $0 movw %es,(%esp) pushl $0 movw %fs,(%esp) SET_KERNEL_SREGS cld FAKE_MCOUNT(TF_EIP(%esp)) pushl %esp call syscall add $4, %esp MEXITCOUNT jmp doreti /* * Trap gate entry for syscalls (int 0x80). * This is used by FreeBSD ELF executables, "new" NetBSD executables, and all * Linux executables. * * Even though the name says 'int0x80', this is actually a trap gate, not an * interrupt gate. Thus interrupts are enabled on entry just as they are for * a normal syscall. */ SUPERALIGN_TEXT IDTVEC(int0x80_syscall) pushl $2 /* sizeof "int 0x80" */ subl $4,%esp /* skip over tf_trapno */ pushal pushl $0 movw %ds,(%esp) pushl $0 movw %es,(%esp) pushl $0 movw %fs,(%esp) SET_KERNEL_SREGS cld FAKE_MCOUNT(TF_EIP(%esp)) pushl %esp call syscall add $4, %esp MEXITCOUNT jmp doreti ENTRY(fork_trampoline) pushl %esp /* trapframe pointer */ pushl %ebx /* arg1 */ pushl %esi /* function */ call fork_exit addl $12,%esp /* cut from syscall */ /* * Return via doreti to handle ASTs. */ MEXITCOUNT jmp doreti /* * To efficiently implement classification of trap and interrupt handlers * for profiling, there must be only trap handlers between the labels btrap * and bintr, and only interrupt handlers between the labels bintr and * eintr. This is implemented (partly) by including files that contain * some of the handlers. Before including the files, set up a normal asm * environment so that the included files doen't need to know that they are * included. */ .data .p2align 4 .text SUPERALIGN_TEXT MCOUNT_LABEL(bintr) #ifdef DEV_ATPIC #include #endif #if defined(DEV_APIC) && defined(DEV_ATPIC) .data .p2align 4 .text SUPERALIGN_TEXT #endif #ifdef DEV_APIC #include #endif .data .p2align 4 .text SUPERALIGN_TEXT #include .text MCOUNT_LABEL(eintr) /* * void doreti(struct trapframe) * * Handle return from interrupts, traps and syscalls. */ .text SUPERALIGN_TEXT .type doreti,@function + .globl doreti doreti: FAKE_MCOUNT($bintr) /* init "from" bintr -> doreti */ doreti_next: /* * Check if ASTs can be handled now. ASTs cannot be safely * processed when returning from an NMI. */ cmpb $T_NMI,TF_TRAPNO(%esp) #ifdef HWPMC_HOOKS je doreti_nmi #else je doreti_exit #endif /* * PSL_VM must be checked first since segment registers only * have an RPL in non-VM86 mode. * ASTs can not be handled now if we are in a vm86 call. */ testl $PSL_VM,TF_EFLAGS(%esp) jz doreti_notvm86 movl PCPU(CURPCB),%ecx testl $PCB_VM86CALL,PCB_FLAGS(%ecx) jz doreti_ast jmp doreti_exit doreti_notvm86: testb $SEL_RPL_MASK,TF_CS(%esp) /* are we returning to user mode? */ jz doreti_exit /* can't handle ASTs now if not */ doreti_ast: /* * Check for ASTs atomically with returning. Disabling CPU * interrupts provides sufficient locking even in the SMP case, * since we will be informed of any new ASTs by an IPI. */ cli movl PCPU(CURTHREAD),%eax testl $TDF_ASTPENDING | TDF_NEEDRESCHED,TD_FLAGS(%eax) je doreti_exit sti pushl %esp /* pass a pointer to the trapframe */ call ast add $4,%esp jmp doreti_ast /* * doreti_exit: pop registers, iret. * * The segment register pop is a special case, since it may * fault if (for example) a sigreturn specifies bad segment * registers. The fault is handled in trap.c. */ doreti_exit: MEXITCOUNT .globl doreti_popl_fs doreti_popl_fs: popl %fs .globl doreti_popl_es doreti_popl_es: popl %es .globl doreti_popl_ds doreti_popl_ds: popl %ds popal addl $8,%esp .globl doreti_iret doreti_iret: iret /* * doreti_iret_fault and friends. Alternative return code for * the case where we get a fault in the doreti_exit code * above. trap() (i386/i386/trap.c) catches this specific * case, sends the process a signal and continues in the * corresponding place in the code below. */ ALIGN_TEXT .globl doreti_iret_fault doreti_iret_fault: subl $8,%esp pushal pushl $0 movw %ds,(%esp) .globl doreti_popl_ds_fault doreti_popl_ds_fault: pushl $0 movw %es,(%esp) .globl doreti_popl_es_fault doreti_popl_es_fault: pushl $0 movw %fs,(%esp) .globl doreti_popl_fs_fault doreti_popl_fs_fault: sti movl $0,TF_ERR(%esp) /* XXX should be the error code */ movl $T_PROTFLT,TF_TRAPNO(%esp) jmp alltraps_with_regs_pushed #ifdef HWPMC_HOOKS doreti_nmi: /* * Since we are returning from an NMI, check if the current trap * was from user mode and if so whether the current thread * needs a user call chain capture. */ testb $SEL_RPL_MASK,TF_CS(%esp) jz doreti_exit movl PCPU(CURTHREAD),%eax /* curthread present? */ orl %eax,%eax jz doreti_exit testl $TDP_CALLCHAIN,TD_PFLAGS(%eax) /* flagged for capture? */ jz doreti_exit /* * Take the processor out of NMI mode by executing a fake "iret". */ pushfl pushl %cs pushl $outofnmi iret outofnmi: /* * Call the callchain capture hook after turning interrupts back on. */ movl pmc_hook,%ecx orl %ecx,%ecx jz doreti_exit pushl %esp /* frame pointer */ pushl $PMC_FN_USER_CALLCHAIN /* command */ movl PCPU(CURTHREAD),%eax pushl %eax /* curthread */ sti call *%ecx addl $12,%esp jmp doreti_ast ENTRY(end_exceptions) #endif