Index: head/sys/amd64/amd64/exception.S =================================================================== --- head/sys/amd64/amd64/exception.S (revision 334519) +++ head/sys/amd64/amd64/exception.S (revision 334520) @@ -1,1300 +1,1307 @@ /*- * Copyright (c) 1989, 1990 William F. Jolitz. * Copyright (c) 1990 The Regents of the University of California. * Copyright (c) 2007-2018 The FreeBSD Foundation * All rights reserved. * * Portions of this software were developed by A. Joseph Koshy under * sponsorship from the FreeBSD Foundation and Google, Inc. * * Portions of this software were developed by * Konstantin Belousov under sponsorship from * the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include "opt_atpic.h" #include "opt_hwpmc_hooks.h" #include "assym.inc" #include #include #include #include #ifdef KDTRACE_HOOKS .bss .globl dtrace_invop_jump_addr .align 8 .type dtrace_invop_jump_addr,@object .size dtrace_invop_jump_addr,8 dtrace_invop_jump_addr: .zero 8 .globl dtrace_invop_calltrap_addr .align 8 .type dtrace_invop_calltrap_addr,@object .size dtrace_invop_calltrap_addr,8 dtrace_invop_calltrap_addr: .zero 8 #endif .text #ifdef HWPMC_HOOKS ENTRY(start_exceptions) #endif /*****************************************************************************/ /* Trap handling */ /*****************************************************************************/ /* * Trap and fault vector routines. * * All traps are 'interrupt gates', SDT_SYSIGT. An interrupt gate pushes * state on the stack but also disables interrupts. This is important for * us for the use of the swapgs instruction. We cannot be interrupted * until the GS.base value is correct. For most traps, we automatically * then enable interrupts if the interrupted context had them enabled. * This is equivalent to the i386 port's use of SDT_SYS386TGT. * * The cpu will push a certain amount of state onto the kernel stack for * the current process. See amd64/include/frame.h. * This includes the current RFLAGS (status register, which includes * the interrupt disable state prior to the trap), the code segment register, * and the return instruction pointer are pushed by the cpu. The cpu * will also push an 'error' code for certain traps. We push a dummy * error code for those traps where the cpu doesn't in order to maintain * a consistent frame. We also push a contrived 'trap number'. * * The CPU does not push the general registers, so we must do that, and we * must restore them prior to calling 'iret'. The CPU adjusts %cs and %ss * but does not mess with %ds, %es, %gs or %fs. We swap the %gs base for * for the kernel mode operation shortly, without changes to the selector * loaded. Since superuser long mode works with any selectors loaded into * segment registers other then %cs, which makes them mostly unused in long * mode, and kernel does not reference %fs, leave them alone. The segment * registers are reloaded on return to the usermode. */ MCOUNT_LABEL(user) MCOUNT_LABEL(btrap) /* Traps that we leave interrupts disabled for. */ .macro TRAP_NOEN l, trapno PTI_ENTRY \l,X\l .globl X\l .type X\l,@function X\l: subq $TF_RIP,%rsp movl $\trapno,TF_TRAPNO(%rsp) movq $0,TF_ADDR(%rsp) movq $0,TF_ERR(%rsp) jmp alltraps_noen .endm TRAP_NOEN bpt, T_BPTFLT #ifdef KDTRACE_HOOKS TRAP_NOEN dtrace_ret, T_DTRACE_RET #endif /* Regular traps; The cpu does not supply tf_err for these. */ .macro TRAP l, trapno PTI_ENTRY \l,X\l .globl X\l .type X\l,@function X\l: subq $TF_RIP,%rsp movl $\trapno,TF_TRAPNO(%rsp) movq $0,TF_ADDR(%rsp) movq $0,TF_ERR(%rsp) jmp alltraps .endm TRAP div, T_DIVIDE TRAP ofl, T_OFLOW TRAP bnd, T_BOUND TRAP ill, T_PRIVINFLT TRAP dna, T_DNA TRAP fpusegm, T_FPOPFLT TRAP rsvd, T_RESERVED TRAP fpu, T_ARITHTRAP TRAP xmm, T_XMMFLT /* This group of traps have tf_err already pushed by the cpu. */ .macro TRAP_ERR l, trapno PTI_ENTRY \l,X\l,has_err=1 .globl X\l .type X\l,@function X\l: subq $TF_ERR,%rsp movl $\trapno,TF_TRAPNO(%rsp) movq $0,TF_ADDR(%rsp) jmp alltraps .endm TRAP_ERR tss, T_TSSFLT TRAP_ERR align, T_ALIGNFLT /* * alltraps entry point. Use swapgs if this is the first time in the * kernel from userland. Reenable interrupts if they were enabled * before the trap. This approximates SDT_SYS386TGT on the i386 port. */ SUPERALIGN_TEXT .globl alltraps .type alltraps,@function alltraps: movq %rdi,TF_RDI(%rsp) testb $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */ jz 1f /* already running with kernel GS.base */ swapgs movq PCPU(CURPCB),%rdi andl $~PCB_FULL_IRET,PCB_FLAGS(%rdi) 1: SAVE_SEGS movq %rdx,TF_RDX(%rsp) movq %rax,TF_RAX(%rsp) movq %rcx,TF_RCX(%rsp) testb $SEL_RPL_MASK,TF_CS(%rsp) jz 2f call handle_ibrs_entry 2: testl $PSL_I,TF_RFLAGS(%rsp) jz alltraps_pushregs_no_rax sti alltraps_pushregs_no_rax: movq %rsi,TF_RSI(%rsp) movq %r8,TF_R8(%rsp) movq %r9,TF_R9(%rsp) movq %rbx,TF_RBX(%rsp) movq %rbp,TF_RBP(%rsp) movq %r10,TF_R10(%rsp) movq %r11,TF_R11(%rsp) movq %r12,TF_R12(%rsp) movq %r13,TF_R13(%rsp) movq %r14,TF_R14(%rsp) movq %r15,TF_R15(%rsp) movl $TF_HASSEGS,TF_FLAGS(%rsp) cld FAKE_MCOUNT(TF_RIP(%rsp)) #ifdef KDTRACE_HOOKS /* * DTrace Function Boundary Trace (fbt) probes are triggered * by int3 (0xcc) which causes the #BP (T_BPTFLT) breakpoint * interrupt. For all other trap types, just handle them in * the usual way. */ testb $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */ jnz calltrap /* ignore userland traps */ cmpl $T_BPTFLT,TF_TRAPNO(%rsp) jne calltrap /* Check if there is no DTrace hook registered. */ cmpq $0,dtrace_invop_jump_addr je calltrap /* * Set our jump address for the jump back in the event that * the breakpoint wasn't caused by DTrace at all. */ movq $calltrap,dtrace_invop_calltrap_addr(%rip) /* Jump to the code hooked in by DTrace. */ jmpq *dtrace_invop_jump_addr #endif .globl calltrap .type calltrap,@function calltrap: movq %rsp,%rdi call trap_check MEXITCOUNT jmp doreti /* Handle any pending ASTs */ /* * alltraps_noen entry point. Unlike alltraps above, we want to * leave the interrupts disabled. This corresponds to * SDT_SYS386IGT on the i386 port. */ SUPERALIGN_TEXT .globl alltraps_noen .type alltraps_noen,@function alltraps_noen: movq %rdi,TF_RDI(%rsp) testb $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */ jz 1f /* already running with kernel GS.base */ swapgs movq PCPU(CURPCB),%rdi andl $~PCB_FULL_IRET,PCB_FLAGS(%rdi) 1: SAVE_SEGS movq %rdx,TF_RDX(%rsp) movq %rax,TF_RAX(%rsp) movq %rcx,TF_RCX(%rsp) testb $SEL_RPL_MASK,TF_CS(%rsp) jz alltraps_pushregs_no_rax call handle_ibrs_entry jmp alltraps_pushregs_no_rax IDTVEC(dblfault) subq $TF_ERR,%rsp movl $T_DOUBLEFLT,TF_TRAPNO(%rsp) movq $0,TF_ADDR(%rsp) movq $0,TF_ERR(%rsp) movq %rdi,TF_RDI(%rsp) movq %rsi,TF_RSI(%rsp) movq %rdx,TF_RDX(%rsp) movq %rcx,TF_RCX(%rsp) movq %r8,TF_R8(%rsp) movq %r9,TF_R9(%rsp) movq %rax,TF_RAX(%rsp) movq %rbx,TF_RBX(%rsp) movq %rbp,TF_RBP(%rsp) movq %r10,TF_R10(%rsp) movq %r11,TF_R11(%rsp) movq %r12,TF_R12(%rsp) movq %r13,TF_R13(%rsp) movq %r14,TF_R14(%rsp) movq %r15,TF_R15(%rsp) SAVE_SEGS movl $TF_HASSEGS,TF_FLAGS(%rsp) cld testb $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */ jz 1f /* already running with kernel GS.base */ swapgs 1: movq PCPU(KCR3),%rax cmpq $~0,%rax je 2f movq %rax,%cr3 2: movq %rsp,%rdi call dblfault_handler 3: hlt jmp 3b ALIGN_TEXT IDTVEC(page_pti) testb $SEL_RPL_MASK,PTI_CS-2*8(%rsp) jz Xpage swapgs pushq %rax movq %cr3,%rax movq %rax,PCPU(SAVED_UCR3) cmpq $~0,PCPU(UCR3) jne 1f popq %rax jmp 2f 1: pushq %rdx PTI_UUENTRY has_err=1 2: subq $TF_ERR,%rsp movq %rdi,TF_RDI(%rsp) movq %rax,TF_RAX(%rsp) movq %rdx,TF_RDX(%rsp) movq %rcx,TF_RCX(%rsp) jmp page_u IDTVEC(page) subq $TF_ERR,%rsp movq %rdi,TF_RDI(%rsp) /* free up GP registers */ movq %rax,TF_RAX(%rsp) movq %rdx,TF_RDX(%rsp) movq %rcx,TF_RCX(%rsp) testb $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */ jz page_cr2 /* already running with kernel GS.base */ swapgs page_u: movq PCPU(CURPCB),%rdi andl $~PCB_FULL_IRET,PCB_FLAGS(%rdi) movq PCPU(SAVED_UCR3),%rax movq %rax,PCB_SAVED_UCR3(%rdi) call handle_ibrs_entry page_cr2: movq %cr2,%rdi /* preserve %cr2 before .. */ movq %rdi,TF_ADDR(%rsp) /* enabling interrupts. */ SAVE_SEGS movl $T_PAGEFLT,TF_TRAPNO(%rsp) testl $PSL_I,TF_RFLAGS(%rsp) jz alltraps_pushregs_no_rax sti jmp alltraps_pushregs_no_rax /* * We have to special-case this one. If we get a trap in doreti() at * the iretq stage, we'll reenter with the wrong gs state. We'll have * to do a special the swapgs in this case even coming from the kernel. * XXX linux has a trap handler for their equivalent of load_gs(). * * On the stack, we have the hardware interrupt frame to return * to usermode (faulted) and another frame with error code, for * fault. For PTI, copy both frames to the main thread stack. * Handle the potential 16-byte alignment adjustment incurred * during the second fault by copying both frames independently * while unwinding the stack in between. */ .macro PROTF_ENTRY name,trapno \name\()_pti_doreti: swapgs cmpq $~0,PCPU(UCR3) je 1f pushq %rax pushq %rdx movq PCPU(KCR3),%rax movq %rax,%cr3 movq PCPU(RSP0),%rax subq $2*PTI_SIZE-3*8,%rax /* no err, %rax, %rdx in faulted frame */ MOVE_STACKS (PTI_SIZE / 8) addq $PTI_SIZE,%rax movq PTI_RSP(%rsp),%rsp MOVE_STACKS (PTI_SIZE / 8 - 3) subq $PTI_SIZE,%rax movq %rax,%rsp popq %rdx popq %rax 1: swapgs jmp X\name IDTVEC(\name\()_pti) cmpq $doreti_iret,PTI_RIP-2*8(%rsp) je \name\()_pti_doreti testb $SEL_RPL_MASK,PTI_CS-2*8(%rsp) /* %rax, %rdx not yet pushed */ jz X\name PTI_UENTRY has_err=1 swapgs IDTVEC(\name) subq $TF_ERR,%rsp movl $\trapno,TF_TRAPNO(%rsp) jmp prot_addrf .endm PROTF_ENTRY missing, T_SEGNPFLT PROTF_ENTRY stk, T_STKFLT PROTF_ENTRY prot, T_PROTFLT prot_addrf: movq $0,TF_ADDR(%rsp) movq %rdi,TF_RDI(%rsp) /* free up a GP register */ movq %rax,TF_RAX(%rsp) movq %rdx,TF_RDX(%rsp) movq %rcx,TF_RCX(%rsp) movw %fs,TF_FS(%rsp) movw %gs,TF_GS(%rsp) leaq doreti_iret(%rip),%rdi cmpq %rdi,TF_RIP(%rsp) je 5f /* kernel but with user gsbase!! */ testb $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */ jz 6f /* already running with kernel GS.base */ testb $CPUID_STDEXT_FSGSBASE,cpu_stdext_feature(%rip) jz 2f cmpw $KUF32SEL,TF_FS(%rsp) jne 1f rdfsbase %rax 1: cmpw $KUG32SEL,TF_GS(%rsp) jne 2f rdgsbase %rdx 2: swapgs movq PCPU(CURPCB),%rdi testb $CPUID_STDEXT_FSGSBASE,cpu_stdext_feature(%rip) jz 4f cmpw $KUF32SEL,TF_FS(%rsp) jne 3f movq %rax,PCB_FSBASE(%rdi) 3: cmpw $KUG32SEL,TF_GS(%rsp) jne 4f movq %rdx,PCB_GSBASE(%rdi) 4: call handle_ibrs_entry orl $PCB_FULL_IRET,PCB_FLAGS(%rdi) /* always full iret from GPF */ movw %es,TF_ES(%rsp) movw %ds,TF_DS(%rsp) testl $PSL_I,TF_RFLAGS(%rsp) jz alltraps_pushregs_no_rax sti jmp alltraps_pushregs_no_rax 5: swapgs 6: movq PCPU(CURPCB),%rdi jmp 4b /* * Fast syscall entry point. We enter here with just our new %cs/%ss set, * and the new privilige level. We are still running on the old user stack * pointer. We have to juggle a few things around to find our stack etc. * swapgs gives us access to our PCPU space only. * * We do not support invoking this from a custom segment registers, * esp. %cs, %ss, %fs, %gs, e.g. using entries from an LDT. */ SUPERALIGN_TEXT IDTVEC(fast_syscall_pti) swapgs movq %rax,PCPU(SCRATCH_RAX) cmpq $~0,PCPU(UCR3) je fast_syscall_common movq PCPU(KCR3),%rax movq %rax,%cr3 jmp fast_syscall_common SUPERALIGN_TEXT IDTVEC(fast_syscall) swapgs movq %rax,PCPU(SCRATCH_RAX) fast_syscall_common: movq %rsp,PCPU(SCRATCH_RSP) movq PCPU(RSP0),%rsp /* Now emulate a trapframe. Make the 8 byte alignment odd for call. */ subq $TF_SIZE,%rsp /* defer TF_RSP till we have a spare register */ movq %r11,TF_RFLAGS(%rsp) movq %rcx,TF_RIP(%rsp) /* %rcx original value is in %r10 */ movq PCPU(SCRATCH_RSP),%r11 /* %r11 already saved */ movq %r11,TF_RSP(%rsp) /* user stack pointer */ movq PCPU(SCRATCH_RAX),%rax + /* + * Save a few arg registers early to free them for use in + * handle_ibrs_entry(). %r10 is especially tricky. It is not an + * arg register, but it holds the arg register %rcx. Profiling + * preserves %rcx, but may clobber %r10. Profiling may also + * clobber %r11, but %r11 (original %eflags) has been saved. + */ movq %rax,TF_RAX(%rsp) /* syscall number */ movq %rdx,TF_RDX(%rsp) /* arg 3 */ + movq %r10,TF_RCX(%rsp) /* arg 4 */ SAVE_SEGS call handle_ibrs_entry movq PCPU(CURPCB),%r11 andl $~PCB_FULL_IRET,PCB_FLAGS(%r11) sti movq $KUDSEL,TF_SS(%rsp) movq $KUCSEL,TF_CS(%rsp) movq $2,TF_ERR(%rsp) movq %rdi,TF_RDI(%rsp) /* arg 1 */ movq %rsi,TF_RSI(%rsp) /* arg 2 */ - movq %r10,TF_RCX(%rsp) /* arg 4 */ movq %r8,TF_R8(%rsp) /* arg 5 */ movq %r9,TF_R9(%rsp) /* arg 6 */ movq %rbx,TF_RBX(%rsp) /* C preserved */ movq %rbp,TF_RBP(%rsp) /* C preserved */ movq %r12,TF_R12(%rsp) /* C preserved */ movq %r13,TF_R13(%rsp) /* C preserved */ movq %r14,TF_R14(%rsp) /* C preserved */ movq %r15,TF_R15(%rsp) /* C preserved */ movl $TF_HASSEGS,TF_FLAGS(%rsp) FAKE_MCOUNT(TF_RIP(%rsp)) movq PCPU(CURTHREAD),%rdi movq %rsp,TD_FRAME(%rdi) movl TF_RFLAGS(%rsp),%esi andl $PSL_T,%esi call amd64_syscall 1: movq PCPU(CURPCB),%rax /* Disable interrupts before testing PCB_FULL_IRET. */ cli testl $PCB_FULL_IRET,PCB_FLAGS(%rax) jnz 4f /* Check for and handle AST's on return to userland. */ movq PCPU(CURTHREAD),%rax testl $TDF_ASTPENDING | TDF_NEEDRESCHED,TD_FLAGS(%rax) jne 3f call handle_ibrs_exit /* Restore preserved registers. */ MEXITCOUNT movq TF_RDI(%rsp),%rdi /* bonus; preserve arg 1 */ movq TF_RSI(%rsp),%rsi /* bonus: preserve arg 2 */ movq TF_RDX(%rsp),%rdx /* return value 2 */ movq TF_RAX(%rsp),%rax /* return value 1 */ movq TF_RFLAGS(%rsp),%r11 /* original %rflags */ movq TF_RIP(%rsp),%rcx /* original %rip */ movq TF_RSP(%rsp),%rsp /* user stack pointer */ cmpq $~0,PCPU(UCR3) je 2f movq PCPU(UCR3),%r9 movq %r9,%cr3 xorl %r9d,%r9d 2: swapgs sysretq 3: /* AST scheduled. */ sti movq %rsp,%rdi call ast jmp 1b 4: /* Requested full context restore, use doreti for that. */ MEXITCOUNT jmp doreti /* * Here for CYA insurance, in case a "syscall" instruction gets * issued from 32 bit compatibility mode. MSR_CSTAR has to point * to *something* if EFER_SCE is enabled. */ IDTVEC(fast_syscall32) sysret /* * DB# handler is very similar to NM#, because 'mov/pop %ss' delay * generation of exception until the next instruction is executed, * which might be a kernel entry. So we must execute the handler * on IST stack and be ready for non-kernel GSBASE. */ IDTVEC(dbg) subq $TF_RIP,%rsp movl $(T_TRCTRAP),TF_TRAPNO(%rsp) movq $0,TF_ADDR(%rsp) movq $0,TF_ERR(%rsp) movq %rdi,TF_RDI(%rsp) movq %rsi,TF_RSI(%rsp) movq %rdx,TF_RDX(%rsp) movq %rcx,TF_RCX(%rsp) movq %r8,TF_R8(%rsp) movq %r9,TF_R9(%rsp) movq %rax,TF_RAX(%rsp) movq %rbx,TF_RBX(%rsp) movq %rbp,TF_RBP(%rsp) movq %r10,TF_R10(%rsp) movq %r11,TF_R11(%rsp) movq %r12,TF_R12(%rsp) movq %r13,TF_R13(%rsp) movq %r14,TF_R14(%rsp) movq %r15,TF_R15(%rsp) SAVE_SEGS movl $TF_HASSEGS,TF_FLAGS(%rsp) cld testb $SEL_RPL_MASK,TF_CS(%rsp) jnz dbg_fromuserspace /* * We've interrupted the kernel. Preserve GS.base in %r12, * %cr3 in %r13, and possibly lower half of MSR_IA32_SPEC_CTL in %r14d. */ movl $MSR_GSBASE,%ecx rdmsr movq %rax,%r12 shlq $32,%rdx orq %rdx,%r12 /* Retrieve and load the canonical value for GS.base. */ movq TF_SIZE(%rsp),%rdx movl %edx,%eax shrq $32,%rdx wrmsr movq %cr3,%r13 movq PCPU(KCR3),%rax cmpq $~0,%rax je 1f movq %rax,%cr3 1: testl $CPUID_STDEXT3_IBPB,cpu_stdext_feature3(%rip) je 2f movl $MSR_IA32_SPEC_CTRL,%ecx rdmsr movl %eax,%r14d call handle_ibrs_entry 2: FAKE_MCOUNT(TF_RIP(%rsp)) movq %rsp,%rdi call trap MEXITCOUNT testl $CPUID_STDEXT3_IBPB,cpu_stdext_feature3(%rip) je 3f movl %r14d,%eax xorl %edx,%edx movl $MSR_IA32_SPEC_CTRL,%ecx wrmsr /* * Put back the preserved MSR_GSBASE value. */ 3: movl $MSR_GSBASE,%ecx movq %r12,%rdx movl %edx,%eax shrq $32,%rdx wrmsr movq %r13,%cr3 RESTORE_REGS addq $TF_RIP,%rsp jmp doreti_iret dbg_fromuserspace: /* * Switch to kernel GSBASE and kernel page table, and copy frame * from the IST stack to the normal kernel stack, since trap() * re-enables interrupts, and since we might trap on DB# while * in trap(). */ swapgs movq PCPU(KCR3),%rax cmpq $~0,%rax je 1f movq %rax,%cr3 1: movq PCPU(RSP0),%rax movl $TF_SIZE,%ecx subq %rcx,%rax movq %rax,%rdi movq %rsp,%rsi rep;movsb movq %rax,%rsp call handle_ibrs_entry movq PCPU(CURPCB),%rdi orl $PCB_FULL_IRET,PCB_FLAGS(%rdi) testb $CPUID_STDEXT_FSGSBASE,cpu_stdext_feature(%rip) jz 3f cmpw $KUF32SEL,TF_FS(%rsp) jne 2f rdfsbase %rax movq %rax,PCB_FSBASE(%rdi) 2: cmpw $KUG32SEL,TF_GS(%rsp) jne 3f movl $MSR_KGSBASE,%ecx rdmsr shlq $32,%rdx orq %rdx,%rax movq %rax,PCB_GSBASE(%rdi) 3: jmp calltrap /* * NMI handling is special. * * First, NMIs do not respect the state of the processor's RFLAGS.IF * bit. The NMI handler may be entered at any time, including when * the processor is in a critical section with RFLAGS.IF == 0. * The processor's GS.base value could be invalid on entry to the * handler. * * Second, the processor treats NMIs specially, blocking further NMIs * until an 'iretq' instruction is executed. We thus need to execute * the NMI handler with interrupts disabled, to prevent a nested interrupt * from executing an 'iretq' instruction and inadvertently taking the * processor out of NMI mode. * * Third, the NMI handler runs on its own stack (tss_ist2). The canonical * GS.base value for the processor is stored just above the bottom of its * NMI stack. For NMIs taken from kernel mode, the current value in * the processor's GS.base is saved at entry to C-preserved register %r12, * the canonical value for GS.base is then loaded into the processor, and * the saved value is restored at exit time. For NMIs taken from user mode, * the cheaper 'SWAPGS' instructions are used for swapping GS.base. */ IDTVEC(nmi) subq $TF_RIP,%rsp movl $(T_NMI),TF_TRAPNO(%rsp) movq $0,TF_ADDR(%rsp) movq $0,TF_ERR(%rsp) movq %rdi,TF_RDI(%rsp) movq %rsi,TF_RSI(%rsp) movq %rdx,TF_RDX(%rsp) movq %rcx,TF_RCX(%rsp) movq %r8,TF_R8(%rsp) movq %r9,TF_R9(%rsp) movq %rax,TF_RAX(%rsp) movq %rbx,TF_RBX(%rsp) movq %rbp,TF_RBP(%rsp) movq %r10,TF_R10(%rsp) movq %r11,TF_R11(%rsp) movq %r12,TF_R12(%rsp) movq %r13,TF_R13(%rsp) movq %r14,TF_R14(%rsp) movq %r15,TF_R15(%rsp) SAVE_SEGS movl $TF_HASSEGS,TF_FLAGS(%rsp) cld xorl %ebx,%ebx testb $SEL_RPL_MASK,TF_CS(%rsp) jnz nmi_fromuserspace /* * We've interrupted the kernel. Preserve GS.base in %r12, * %cr3 in %r13, and possibly lower half of MSR_IA32_SPEC_CTL in %r14d. */ movl $MSR_GSBASE,%ecx rdmsr movq %rax,%r12 shlq $32,%rdx orq %rdx,%r12 /* Retrieve and load the canonical value for GS.base. */ movq TF_SIZE(%rsp),%rdx movl %edx,%eax shrq $32,%rdx wrmsr movq %cr3,%r13 movq PCPU(KCR3),%rax cmpq $~0,%rax je 1f movq %rax,%cr3 1: testl $CPUID_STDEXT3_IBPB,cpu_stdext_feature3(%rip) je nmi_calltrap movl $MSR_IA32_SPEC_CTRL,%ecx rdmsr movl %eax,%r14d call handle_ibrs_entry jmp nmi_calltrap nmi_fromuserspace: incl %ebx swapgs movq %cr3,%r13 movq PCPU(KCR3),%rax cmpq $~0,%rax je 1f movq %rax,%cr3 1: call handle_ibrs_entry movq PCPU(CURPCB),%rdi testq %rdi,%rdi jz 3f orl $PCB_FULL_IRET,PCB_FLAGS(%rdi) testb $CPUID_STDEXT_FSGSBASE,cpu_stdext_feature(%rip) jz 3f cmpw $KUF32SEL,TF_FS(%rsp) jne 2f rdfsbase %rax movq %rax,PCB_FSBASE(%rdi) 2: cmpw $KUG32SEL,TF_GS(%rsp) jne 3f movl $MSR_KGSBASE,%ecx rdmsr shlq $32,%rdx orq %rdx,%rax movq %rax,PCB_GSBASE(%rdi) 3: /* Note: this label is also used by ddb and gdb: */ nmi_calltrap: FAKE_MCOUNT(TF_RIP(%rsp)) movq %rsp,%rdi call trap MEXITCOUNT #ifdef HWPMC_HOOKS /* * Capture a userspace callchain if needed. * * - Check if the current trap was from user mode. * - Check if the current thread is valid. * - Check if the thread requires a user call chain to be * captured. * * We are still in NMI mode at this point. */ testl %ebx,%ebx jz nocallchain /* not from userspace */ movq PCPU(CURTHREAD),%rax orq %rax,%rax /* curthread present? */ jz nocallchain /* * Move execution to the regular kernel stack, because we * committed to return through doreti. */ movq %rsp,%rsi /* source stack pointer */ movq $TF_SIZE,%rcx movq PCPU(RSP0),%rdx subq %rcx,%rdx movq %rdx,%rdi /* destination stack pointer */ shrq $3,%rcx /* trap frame size in long words */ cld rep movsq /* copy trapframe */ movq %rdx,%rsp /* we are on the regular kstack */ testl $TDP_CALLCHAIN,TD_PFLAGS(%rax) /* flagged for capture? */ jz nocallchain /* * A user callchain is to be captured, so: * - Take the processor out of "NMI" mode by faking an "iret", * to allow for nested NMI interrupts. * - Enable interrupts, so that copyin() can work. */ movl %ss,%eax pushq %rax /* tf_ss */ pushq %rdx /* tf_rsp (on kernel stack) */ pushfq /* tf_rflags */ movl %cs,%eax pushq %rax /* tf_cs */ pushq $outofnmi /* tf_rip */ iretq outofnmi: /* * At this point the processor has exited NMI mode and is running * with interrupts turned off on the normal kernel stack. * * If a pending NMI gets recognized at or after this point, it * will cause a kernel callchain to be traced. * * We turn interrupts back on, and call the user callchain capture hook. */ movq pmc_hook,%rax orq %rax,%rax jz nocallchain movq PCPU(CURTHREAD),%rdi /* thread */ movq $PMC_FN_USER_CALLCHAIN,%rsi /* command */ movq %rsp,%rdx /* frame */ sti call *%rax cli nocallchain: #endif testl %ebx,%ebx /* %ebx == 0 => return to userland */ jnz doreti_exit /* * Restore speculation control MSR, if preserved. */ testl $CPUID_STDEXT3_IBPB,cpu_stdext_feature3(%rip) je 1f movl %r14d,%eax xorl %edx,%edx movl $MSR_IA32_SPEC_CTRL,%ecx wrmsr /* * Put back the preserved MSR_GSBASE value. */ 1: movl $MSR_GSBASE,%ecx movq %r12,%rdx movl %edx,%eax shrq $32,%rdx wrmsr movq %r13,%cr3 RESTORE_REGS addq $TF_RIP,%rsp jmp doreti_iret /* * MC# handling is similar to NMI. * * As with NMIs, machine check exceptions do not respect RFLAGS.IF and * can occur at any time with a GS.base value that does not correspond * to the privilege level in CS. * * Machine checks are not unblocked by iretq, but it is best to run * the handler with interrupts disabled since the exception may have * interrupted a critical section. * * The MC# handler runs on its own stack (tss_ist3). The canonical * GS.base value for the processor is stored just above the bottom of * its MC# stack. For exceptions taken from kernel mode, the current * value in the processor's GS.base is saved at entry to C-preserved * register %r12, the canonical value for GS.base is then loaded into * the processor, and the saved value is restored at exit time. For * exceptions taken from user mode, the cheaper 'SWAPGS' instructions * are used for swapping GS.base. */ IDTVEC(mchk) subq $TF_RIP,%rsp movl $(T_MCHK),TF_TRAPNO(%rsp) movq $0,TF_ADDR(%rsp) movq $0,TF_ERR(%rsp) movq %rdi,TF_RDI(%rsp) movq %rsi,TF_RSI(%rsp) movq %rdx,TF_RDX(%rsp) movq %rcx,TF_RCX(%rsp) movq %r8,TF_R8(%rsp) movq %r9,TF_R9(%rsp) movq %rax,TF_RAX(%rsp) movq %rbx,TF_RBX(%rsp) movq %rbp,TF_RBP(%rsp) movq %r10,TF_R10(%rsp) movq %r11,TF_R11(%rsp) movq %r12,TF_R12(%rsp) movq %r13,TF_R13(%rsp) movq %r14,TF_R14(%rsp) movq %r15,TF_R15(%rsp) SAVE_SEGS movl $TF_HASSEGS,TF_FLAGS(%rsp) cld xorl %ebx,%ebx testb $SEL_RPL_MASK,TF_CS(%rsp) jnz mchk_fromuserspace /* * We've interrupted the kernel. Preserve GS.base in %r12, * %cr3 in %r13, and possibly lower half of MSR_IA32_SPEC_CTL in %r14d. */ movl $MSR_GSBASE,%ecx rdmsr movq %rax,%r12 shlq $32,%rdx orq %rdx,%r12 /* Retrieve and load the canonical value for GS.base. */ movq TF_SIZE(%rsp),%rdx movl %edx,%eax shrq $32,%rdx wrmsr movq %cr3,%r13 movq PCPU(KCR3),%rax cmpq $~0,%rax je 1f movq %rax,%cr3 1: testl $CPUID_STDEXT3_IBPB,cpu_stdext_feature3(%rip) je mchk_calltrap movl $MSR_IA32_SPEC_CTRL,%ecx rdmsr movl %eax,%r14d call handle_ibrs_entry jmp mchk_calltrap mchk_fromuserspace: incl %ebx swapgs movq %cr3,%r13 movq PCPU(KCR3),%rax cmpq $~0,%rax je 1f movq %rax,%cr3 1: call handle_ibrs_entry /* Note: this label is also used by ddb and gdb: */ mchk_calltrap: FAKE_MCOUNT(TF_RIP(%rsp)) movq %rsp,%rdi call mca_intr MEXITCOUNT testl %ebx,%ebx /* %ebx == 0 => return to userland */ jnz doreti_exit /* * Restore speculation control MSR, if preserved. */ testl $CPUID_STDEXT3_IBPB,cpu_stdext_feature3(%rip) je 1f movl %r14d,%eax xorl %edx,%edx movl $MSR_IA32_SPEC_CTRL,%ecx wrmsr /* * Put back the preserved MSR_GSBASE value. */ 1: movl $MSR_GSBASE,%ecx movq %r12,%rdx movl %edx,%eax shrq $32,%rdx wrmsr movq %r13,%cr3 RESTORE_REGS addq $TF_RIP,%rsp jmp doreti_iret ENTRY(fork_trampoline) movq %r12,%rdi /* function */ movq %rbx,%rsi /* arg1 */ movq %rsp,%rdx /* trapframe pointer */ call fork_exit MEXITCOUNT jmp doreti /* Handle any ASTs */ /* * To efficiently implement classification of trap and interrupt handlers * for profiling, there must be only trap handlers between the labels btrap * and bintr, and only interrupt handlers between the labels bintr and * eintr. This is implemented (partly) by including files that contain * some of the handlers. Before including the files, set up a normal asm * environment so that the included files doen't need to know that they are * included. */ #ifdef COMPAT_FREEBSD32 .data .p2align 4 .text SUPERALIGN_TEXT #include #endif .data .p2align 4 .text SUPERALIGN_TEXT MCOUNT_LABEL(bintr) #include #ifdef DEV_ATPIC .data .p2align 4 .text SUPERALIGN_TEXT #include #endif .text MCOUNT_LABEL(eintr) /* * void doreti(struct trapframe) * * Handle return from interrupts, traps and syscalls. */ .text SUPERALIGN_TEXT .type doreti,@function .globl doreti doreti: FAKE_MCOUNT($bintr) /* init "from" bintr -> doreti */ /* * Check if ASTs can be handled now. */ testb $SEL_RPL_MASK,TF_CS(%rsp) /* are we returning to user mode? */ jz doreti_exit /* can't handle ASTs now if not */ doreti_ast: /* * Check for ASTs atomically with returning. Disabling CPU * interrupts provides sufficient locking even in the SMP case, * since we will be informed of any new ASTs by an IPI. */ cli movq PCPU(CURTHREAD),%rax testl $TDF_ASTPENDING | TDF_NEEDRESCHED,TD_FLAGS(%rax) je doreti_exit sti movq %rsp,%rdi /* pass a pointer to the trapframe */ call ast jmp doreti_ast /* * doreti_exit: pop registers, iret. * * The segment register pop is a special case, since it may * fault if (for example) a sigreturn specifies bad segment * registers. The fault is handled in trap.c. */ doreti_exit: MEXITCOUNT movq PCPU(CURPCB),%r8 /* * Do not reload segment registers for kernel. * Since we do not reload segments registers with sane * values on kernel entry, descriptors referenced by * segments registers might be not valid. This is fatal * for user mode, but is not a problem for the kernel. */ testb $SEL_RPL_MASK,TF_CS(%rsp) jz ld_regs testl $PCB_FULL_IRET,PCB_FLAGS(%r8) jz ld_regs andl $~PCB_FULL_IRET,PCB_FLAGS(%r8) testl $TF_HASSEGS,TF_FLAGS(%rsp) je set_segs do_segs: /* Restore %fs and fsbase */ movw TF_FS(%rsp),%ax .globl ld_fs ld_fs: movw %ax,%fs cmpw $KUF32SEL,%ax jne 1f movl $MSR_FSBASE,%ecx movl PCB_FSBASE(%r8),%eax movl PCB_FSBASE+4(%r8),%edx .globl ld_fsbase ld_fsbase: wrmsr 1: /* Restore %gs and gsbase */ movw TF_GS(%rsp),%si pushfq cli movl $MSR_GSBASE,%ecx /* Save current kernel %gs base into %r12d:%r13d */ rdmsr movl %eax,%r12d movl %edx,%r13d .globl ld_gs ld_gs: movw %si,%gs /* Save user %gs base into %r14d:%r15d */ rdmsr movl %eax,%r14d movl %edx,%r15d /* Restore kernel %gs base */ movl %r12d,%eax movl %r13d,%edx wrmsr popfq /* * Restore user %gs base, either from PCB if used for TLS, or * from the previously saved msr read. */ movl $MSR_KGSBASE,%ecx cmpw $KUG32SEL,%si jne 1f movl PCB_GSBASE(%r8),%eax movl PCB_GSBASE+4(%r8),%edx jmp ld_gsbase 1: movl %r14d,%eax movl %r15d,%edx .globl ld_gsbase ld_gsbase: wrmsr /* May trap if non-canonical, but only for TLS. */ .globl ld_es ld_es: movw TF_ES(%rsp),%es .globl ld_ds ld_ds: movw TF_DS(%rsp),%ds ld_regs: RESTORE_REGS testb $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */ jz 2f /* keep running with kernel GS.base */ cli call handle_ibrs_exit_rs cmpq $~0,PCPU(UCR3) je 1f pushq %rdx movq PCPU(PTI_RSP0),%rdx subq $PTI_SIZE,%rdx movq %rax,PTI_RAX(%rdx) popq %rax movq %rax,PTI_RDX(%rdx) movq TF_RIP(%rsp),%rax movq %rax,PTI_RIP(%rdx) movq TF_CS(%rsp),%rax movq %rax,PTI_CS(%rdx) movq TF_RFLAGS(%rsp),%rax movq %rax,PTI_RFLAGS(%rdx) movq TF_RSP(%rsp),%rax movq %rax,PTI_RSP(%rdx) movq TF_SS(%rsp),%rax movq %rax,PTI_SS(%rdx) movq PCPU(UCR3),%rax swapgs movq %rdx,%rsp movq %rax,%cr3 popq %rdx popq %rax addq $8,%rsp jmp doreti_iret 1: swapgs 2: addq $TF_RIP,%rsp .globl doreti_iret doreti_iret: iretq set_segs: movw $KUDSEL,%ax movw %ax,TF_DS(%rsp) movw %ax,TF_ES(%rsp) movw $KUF32SEL,TF_FS(%rsp) movw $KUG32SEL,TF_GS(%rsp) jmp do_segs /* * doreti_iret_fault. Alternative return code for * the case where we get a fault in the doreti_exit code * above. trap() (amd64/amd64/trap.c) catches this specific * case, sends the process a signal and continues in the * corresponding place in the code below. */ ALIGN_TEXT .globl doreti_iret_fault doreti_iret_fault: subq $TF_RIP,%rsp /* space including tf_err, tf_trapno */ movq %rax,TF_RAX(%rsp) movq %rdx,TF_RDX(%rsp) movq %rcx,TF_RCX(%rsp) call handle_ibrs_entry testb $SEL_RPL_MASK,TF_CS(%rsp) jz 1f sti 1: SAVE_SEGS movl $TF_HASSEGS,TF_FLAGS(%rsp) movq %rdi,TF_RDI(%rsp) movq %rsi,TF_RSI(%rsp) movq %r8,TF_R8(%rsp) movq %r9,TF_R9(%rsp) movq %rbx,TF_RBX(%rsp) movq %rbp,TF_RBP(%rsp) movq %r10,TF_R10(%rsp) movq %r11,TF_R11(%rsp) movq %r12,TF_R12(%rsp) movq %r13,TF_R13(%rsp) movq %r14,TF_R14(%rsp) movq %r15,TF_R15(%rsp) movl $T_PROTFLT,TF_TRAPNO(%rsp) movq $0,TF_ERR(%rsp) /* XXX should be the error code */ movq $0,TF_ADDR(%rsp) FAKE_MCOUNT(TF_RIP(%rsp)) jmp calltrap ALIGN_TEXT .globl ds_load_fault ds_load_fault: movl $T_PROTFLT,TF_TRAPNO(%rsp) testb $SEL_RPL_MASK,TF_CS(%rsp) jz 1f sti 1: movq %rsp,%rdi call trap movw $KUDSEL,TF_DS(%rsp) jmp doreti ALIGN_TEXT .globl es_load_fault es_load_fault: movl $T_PROTFLT,TF_TRAPNO(%rsp) testl $PSL_I,TF_RFLAGS(%rsp) jz 1f sti 1: movq %rsp,%rdi call trap movw $KUDSEL,TF_ES(%rsp) jmp doreti ALIGN_TEXT .globl fs_load_fault fs_load_fault: testl $PSL_I,TF_RFLAGS(%rsp) jz 1f sti 1: movl $T_PROTFLT,TF_TRAPNO(%rsp) movq %rsp,%rdi call trap movw $KUF32SEL,TF_FS(%rsp) jmp doreti ALIGN_TEXT .globl gs_load_fault gs_load_fault: popfq movl $T_PROTFLT,TF_TRAPNO(%rsp) testl $PSL_I,TF_RFLAGS(%rsp) jz 1f sti 1: movq %rsp,%rdi call trap movw $KUG32SEL,TF_GS(%rsp) jmp doreti ALIGN_TEXT .globl fsbase_load_fault fsbase_load_fault: movl $T_PROTFLT,TF_TRAPNO(%rsp) testl $PSL_I,TF_RFLAGS(%rsp) jz 1f sti 1: movq %rsp,%rdi call trap movq PCPU(CURTHREAD),%r8 movq TD_PCB(%r8),%r8 movq $0,PCB_FSBASE(%r8) jmp doreti ALIGN_TEXT .globl gsbase_load_fault gsbase_load_fault: movl $T_PROTFLT,TF_TRAPNO(%rsp) testl $PSL_I,TF_RFLAGS(%rsp) jz 1f sti 1: movq %rsp,%rdi call trap movq PCPU(CURTHREAD),%r8 movq TD_PCB(%r8),%r8 movq $0,PCB_GSBASE(%r8) jmp doreti #ifdef HWPMC_HOOKS ENTRY(end_exceptions) #endif Index: head/sys/i386/i386/exception.s =================================================================== --- head/sys/i386/i386/exception.s (revision 334519) +++ head/sys/i386/i386/exception.s (revision 334520) @@ -1,653 +1,655 @@ /*- * Copyright (c) 1989, 1990 William F. Jolitz. * Copyright (c) 1990 The Regents of the University of California. * Copyright (c) 2007, 2018 The FreeBSD Foundation * All rights reserved. * * Portions of this software were developed by A. Joseph Koshy under * sponsorship from the FreeBSD Foundation and Google, Inc. * Portions of this software were developed by Konstantin Belousov * under sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include "opt_apic.h" #include "opt_atpic.h" #include "opt_hwpmc_hooks.h" #include "assym.inc" #include #include #include #ifdef KDTRACE_HOOKS .bss .globl dtrace_invop_jump_addr .align 4 .type dtrace_invop_jump_addr, @object .size dtrace_invop_jump_addr, 4 dtrace_invop_jump_addr: .zero 4 .globl dtrace_invop_calltrap_addr .align 4 .type dtrace_invop_calltrap_addr, @object .size dtrace_invop_calltrap_addr, 4 dtrace_invop_calltrap_addr: .zero 8 #endif .text ENTRY(start_exceptions) .globl tramp_idleptd tramp_idleptd: .long 0 /*****************************************************************************/ /* Trap handling */ /*****************************************************************************/ /* * Trap and fault vector routines. * * All traps are 'interrupt gates', SDT_SYS386IGT. Interrupts are disabled * by hardware to not allow interrupts until code switched to the kernel * address space and the kernel thread stack. * * The cpu will push a certain amount of state onto the kernel stack for * the current process. The amount of state depends on the type of trap * and whether the trap crossed rings or not. See i386/include/frame.h. * At the very least the current EFLAGS (status register, which includes * the interrupt disable state prior to the trap), the code segment register, * and the return instruction pointer are pushed by the cpu. The cpu * will also push an 'error' code for certain traps. We push a dummy * error code for those traps where the cpu doesn't in order to maintain * a consistent frame. We also push a contrived 'trap number'. * * The cpu does not push the general registers, we must do that, and we * must restore them prior to calling 'iret'. The cpu adjusts the %cs and * %ss segment registers, but does not mess with %ds, %es, or %fs. Thus we * must load them with appropriate values for supervisor mode operation. * * This code is not executed at the linked address, it is copied to the * trampoline area. As the consequence, all code there and in included files * must be PIC. */ MCOUNT_LABEL(user) MCOUNT_LABEL(btrap) #define TRAP(a) pushl $(a) ; jmp alltraps IDTVEC(div) pushl $0; TRAP(T_DIVIDE) IDTVEC(bpt) pushl $0; TRAP(T_BPTFLT) IDTVEC(dtrace_ret) pushl $0; TRAP(T_DTRACE_RET) IDTVEC(ofl) pushl $0; TRAP(T_OFLOW) IDTVEC(bnd) pushl $0; TRAP(T_BOUND) #ifndef KDTRACE_HOOKS IDTVEC(ill) pushl $0; TRAP(T_PRIVINFLT) #endif IDTVEC(dna) pushl $0; TRAP(T_DNA) IDTVEC(fpusegm) pushl $0; TRAP(T_FPOPFLT) IDTVEC(tss) TRAP(T_TSSFLT) IDTVEC(missing) pushl $T_SEGNPFLT jmp irettraps IDTVEC(stk) pushl $T_STKFLT jmp irettraps IDTVEC(prot) pushl $T_PROTFLT jmp irettraps IDTVEC(page) cmpl $PMAP_TRM_MIN_ADDRESS, TF_EIP-TF_ERR(%esp) jb 1f movl %ebx, %cr3 movl %edx, TF_EIP-TF_ERR(%esp) addl $4, %esp iret 1: pushl $T_PAGEFLT jmp alltraps IDTVEC(rsvd_pti) IDTVEC(rsvd) pushl $0; TRAP(T_RESERVED) IDTVEC(fpu) pushl $0; TRAP(T_ARITHTRAP) IDTVEC(align) TRAP(T_ALIGNFLT) IDTVEC(xmm) pushl $0; TRAP(T_XMMFLT) /* * All traps except ones for syscalls or invalid segment, * jump to alltraps. If * interrupts were enabled when the trap occurred, then interrupts * are enabled now if the trap was through a trap gate, else * disabled if the trap was through an interrupt gate. Note that * int0x80_syscall is a trap gate. Interrupt gates are used by * page faults, non-maskable interrupts, debug and breakpoint * exceptions. */ SUPERALIGN_TEXT .globl alltraps .type alltraps,@function alltraps: PUSH_FRAME2 alltraps_with_regs_pushed: SET_KERNEL_SREGS cld KENTER FAKE_MCOUNT(TF_EIP(%esp)) calltrap: pushl %esp movl $trap,%eax call *%eax add $4, %esp /* * Return via doreti to handle ASTs. */ MEXITCOUNT jmp doreti .globl irettraps .type irettraps,@function irettraps: testl $PSL_VM, TF_EFLAGS-TF_TRAPNO(%esp) jnz alltraps testb $SEL_RPL_MASK, TF_CS-TF_TRAPNO(%esp) jnz alltraps /* * Kernel mode. * The special case there is the kernel mode with user %cr3 and * trampoline stack. We need to copy both current frame and the * hardware portion of the frame we tried to return to, to the * normal stack. This logic must follow the stack unwind order * in doreti. */ PUSH_FRAME2 SET_KERNEL_SREGS cld call 1f 1: popl %ebx leal (doreti_iret - 1b)(%ebx), %edx cmpl %edx, TF_EIP(%esp) jne 2f movl $(2 * TF_SZ - TF_EIP), %ecx jmp 6f 2: leal (doreti_popl_ds - 1b)(%ebx), %edx cmpl %edx, TF_EIP(%esp) jne 3f movl $(2 * TF_SZ - TF_DS), %ecx jmp 6f 3: leal (doreti_popl_es - 1b)(%ebx), %edx cmpl %edx, TF_EIP(%esp) jne 4f movl $(2 * TF_SZ - TF_ES), %ecx jmp 6f 4: leal (doreti_popl_fs - 1b)(%ebx), %edx cmpl %edx, TF_EIP(%esp) jne 5f movl $(2 * TF_SZ - TF_FS), %ecx jmp 6f /* kernel mode, normal */ 5: FAKE_MCOUNT(TF_EIP(%esp)) jmp calltrap 6: cmpl $PMAP_TRM_MIN_ADDRESS, %esp /* trampoline stack ? */ jb 5b /* if not, no need to change stacks */ movl (tramp_idleptd - 1b)(%ebx), %eax movl %eax, %cr3 movl PCPU(KESP0), %edx subl %ecx, %edx movl %edx, %edi movl %esp, %esi rep; movsb movl %edx, %esp FAKE_MCOUNT(TF_EIP(%esp)) jmp calltrap /* * Privileged instruction fault. */ #ifdef KDTRACE_HOOKS SUPERALIGN_TEXT IDTVEC(ill) /* * Check if this is a user fault. If so, just handle it as a normal * trap. */ testl $PSL_VM, 8(%esp) /* and vm86 mode. */ jnz norm_ill cmpl $GSEL_KPL, 4(%esp) /* Check the code segment */ jne norm_ill /* * Check if a DTrace hook is registered. The trampoline cannot * be instrumented. */ cmpl $0, dtrace_invop_jump_addr je norm_ill /* * This is a kernel instruction fault that might have been caused * by a DTrace provider. */ pushal cld /* * Set our jump address for the jump back in the event that * the exception wasn't caused by DTrace at all. */ movl $norm_ill, dtrace_invop_calltrap_addr /* Jump to the code hooked in by DTrace. */ jmpl *dtrace_invop_jump_addr /* * Process the instruction fault in the normal way. */ norm_ill: pushl $0 pushl $T_PRIVINFLT jmp alltraps #endif /* * See comment in the handler for the kernel case T_TRCTRAP in trap.c. * The exception handler must be ready to execute with wrong %cr3. * We save original %cr3 in frame->tf_err, similarly to NMI and MCE * handlers. */ IDTVEC(dbg) pushl $0 pushl $T_TRCTRAP PUSH_FRAME2 SET_KERNEL_SREGS cld movl %cr3, %eax movl %eax, TF_ERR(%esp) call 1f 1: popl %eax movl (tramp_idleptd - 1b)(%eax), %eax movl %eax, %cr3 FAKE_MCOUNT(TF_EIP(%esp)) testl $PSL_VM, TF_EFLAGS(%esp) jnz dbg_user testb $SEL_RPL_MASK,TF_CS(%esp) jz calltrap dbg_user: NMOVE_STACKS movl $handle_ibrs_entry,%eax call *%eax pushl %esp movl $trap,%eax call *%eax add $4, %esp movl $T_RESERVED, TF_TRAPNO(%esp) MEXITCOUNT jmp doreti IDTVEC(mchk) pushl $0 pushl $T_MCHK jmp nmi_mchk_common IDTVEC(nmi) pushl $0 pushl $T_NMI nmi_mchk_common: PUSH_FRAME2 SET_KERNEL_SREGS cld /* * Save %cr3 into tf_err. There is no good place to put it. * Always reload %cr3, since we might have interrupted the * kernel entry or exit. * Do not switch to the thread kernel stack, otherwise we might * obliterate the previous context partially copied from the * trampoline stack. * Do not re-enable IBRS, there is no good place to store * previous state if we come from the kernel. */ movl %cr3, %eax movl %eax, TF_ERR(%esp) call 1f 1: popl %eax movl (tramp_idleptd - 1b)(%eax), %eax movl %eax, %cr3 FAKE_MCOUNT(TF_EIP(%esp)) jmp calltrap /* * Trap gate entry for syscalls (int 0x80). * This is used by FreeBSD ELF executables, "new" a.out executables, and all * Linux executables. * * Even though the name says 'int0x80', this is actually a trap gate, not an * interrupt gate. Thus interrupts are enabled on entry just as they are for * a normal syscall. */ SUPERALIGN_TEXT IDTVEC(int0x80_syscall) pushl $2 /* sizeof "int 0x80" */ pushl $0 /* tf_trapno */ PUSH_FRAME2 SET_KERNEL_SREGS cld MOVE_STACKS movl $handle_ibrs_entry,%eax call *%eax sti FAKE_MCOUNT(TF_EIP(%esp)) pushl %esp movl $syscall, %eax call *%eax add $4, %esp MEXITCOUNT jmp doreti ENTRY(fork_trampoline) pushl %esp /* trapframe pointer */ pushl %ebx /* arg1 */ pushl %esi /* function */ movl $fork_exit, %eax call *%eax addl $12,%esp /* cut from syscall */ /* * Return via doreti to handle ASTs. */ MEXITCOUNT jmp doreti /* * To efficiently implement classification of trap and interrupt handlers * for profiling, there must be only trap handlers between the labels btrap * and bintr, and only interrupt handlers between the labels bintr and * eintr. This is implemented (partly) by including files that contain * some of the handlers. Before including the files, set up a normal asm * environment so that the included files doen't need to know that they are * included. */ .data .p2align 4 .text SUPERALIGN_TEXT MCOUNT_LABEL(bintr) #ifdef DEV_ATPIC #include #endif #if defined(DEV_APIC) && defined(DEV_ATPIC) .data .p2align 4 .text SUPERALIGN_TEXT #endif #ifdef DEV_APIC #include #endif .data .p2align 4 .text SUPERALIGN_TEXT #include .text MCOUNT_LABEL(eintr) #include /* * void doreti(struct trapframe) * * Handle return from interrupts, traps and syscalls. */ .text SUPERALIGN_TEXT .type doreti,@function .globl doreti doreti: FAKE_MCOUNT($bintr) /* init "from" bintr -> doreti */ doreti_next: /* * Check if ASTs can be handled now. ASTs cannot be safely * processed when returning from an NMI. */ cmpb $T_NMI,TF_TRAPNO(%esp) #ifdef HWPMC_HOOKS je doreti_nmi #else je doreti_exit #endif /* * PSL_VM must be checked first since segment registers only * have an RPL in non-VM86 mode. * ASTs can not be handled now if we are in a vm86 call. */ testl $PSL_VM,TF_EFLAGS(%esp) jz doreti_notvm86 movl PCPU(CURPCB),%ecx testl $PCB_VM86CALL,PCB_FLAGS(%ecx) jz doreti_ast jmp doreti_popl_fs doreti_notvm86: testb $SEL_RPL_MASK,TF_CS(%esp) /* are we returning to user mode? */ jz doreti_exit /* can't handle ASTs now if not */ doreti_ast: /* * Check for ASTs atomically with returning. Disabling CPU * interrupts provides sufficient locking even in the SMP case, * since we will be informed of any new ASTs by an IPI. */ cli movl PCPU(CURTHREAD),%eax testl $TDF_ASTPENDING | TDF_NEEDRESCHED,TD_FLAGS(%eax) je doreti_exit sti pushl %esp /* pass a pointer to the trapframe */ movl $ast, %eax call *%eax add $4,%esp jmp doreti_ast /* * doreti_exit: pop registers, iret. * * The segment register pop is a special case, since it may * fault if (for example) a sigreturn specifies bad segment * registers. The fault is handled in trap.c. */ doreti_exit: MEXITCOUNT cmpl $T_NMI, TF_TRAPNO(%esp) je doreti_iret_nmi cmpl $T_MCHK, TF_TRAPNO(%esp) je doreti_iret_nmi cmpl $T_TRCTRAP, TF_TRAPNO(%esp) je doreti_iret_nmi movl $TF_SZ, %ecx testl $PSL_VM,TF_EFLAGS(%esp) jz 1f /* PCB_VM86CALL is not set */ addl $VM86_STACK_SPACE, %ecx jmp 2f 1: testl $SEL_RPL_MASK, TF_CS(%esp) jz doreti_popl_fs 2: movl $handle_ibrs_exit,%eax + pushl %ecx /* preserve enough call-used regs */ call *%eax + popl %ecx movl %esp, %esi movl PCPU(TRAMPSTK), %edx subl %ecx, %edx movl %edx, %edi rep; movsb movl %edx, %esp movl PCPU(CURPCB),%eax movl PCB_CR3(%eax), %eax movl %eax, %cr3 .globl doreti_popl_fs doreti_popl_fs: popl %fs .globl doreti_popl_es doreti_popl_es: popl %es .globl doreti_popl_ds doreti_popl_ds: popl %ds popal addl $8,%esp .globl doreti_iret doreti_iret: iret doreti_iret_nmi: movl TF_ERR(%esp), %eax movl %eax, %cr3 jmp doreti_popl_fs /* * doreti_iret_fault and friends. Alternative return code for * the case where we get a fault in the doreti_exit code * above. trap() (i386/i386/trap.c) catches this specific * case, and continues in the corresponding place in the code * below. * * If the fault occured during return to usermode, we recreate * the trap frame and call trap() to send a signal. Otherwise * the kernel was tricked into fault by attempt to restore invalid * usermode segment selectors on return from nested fault or * interrupt, where interrupted kernel entry code not yet loaded * kernel selectors. In the latter case, emulate iret and zero * the invalid selector. */ ALIGN_TEXT .globl doreti_iret_fault doreti_iret_fault: pushl $0 /* tf_err */ pushl $0 /* tf_trapno XXXKIB: provide more useful value ? */ pushal pushl $0 movw %ds,(%esp) .globl doreti_popl_ds_fault doreti_popl_ds_fault: testb $SEL_RPL_MASK,TF_CS-TF_DS(%esp) jz doreti_popl_ds_kfault pushl $0 movw %es,(%esp) .globl doreti_popl_es_fault doreti_popl_es_fault: testb $SEL_RPL_MASK,TF_CS-TF_ES(%esp) jz doreti_popl_es_kfault pushl $0 movw %fs,(%esp) .globl doreti_popl_fs_fault doreti_popl_fs_fault: testb $SEL_RPL_MASK,TF_CS-TF_FS(%esp) jz doreti_popl_fs_kfault movl $0,TF_ERR(%esp) /* XXX should be the error code */ movl $T_PROTFLT,TF_TRAPNO(%esp) SET_KERNEL_SREGS jmp calltrap doreti_popl_ds_kfault: movl $0,(%esp) jmp doreti_popl_ds doreti_popl_es_kfault: movl $0,(%esp) jmp doreti_popl_es doreti_popl_fs_kfault: movl $0,(%esp) jmp doreti_popl_fs #ifdef HWPMC_HOOKS doreti_nmi: /* * Since we are returning from an NMI, check if the current trap * was from user mode and if so whether the current thread * needs a user call chain capture. */ testl $PSL_VM, TF_EFLAGS(%esp) jnz doreti_exit testb $SEL_RPL_MASK,TF_CS(%esp) jz doreti_exit movl PCPU(CURTHREAD),%eax /* curthread present? */ orl %eax,%eax jz doreti_exit testl $TDP_CALLCHAIN,TD_PFLAGS(%eax) /* flagged for capture? */ jz doreti_exit /* * Switch to thread stack. Reset tf_trapno to not indicate NMI, * to cause normal userspace exit. */ movl $T_RESERVED, TF_TRAPNO(%esp) NMOVE_STACKS /* * Take the processor out of NMI mode by executing a fake "iret". */ pushfl pushl %cs call 1f 1: popl %eax leal (outofnmi-1b)(%eax),%eax pushl %eax iret outofnmi: /* * Call the callchain capture hook after turning interrupts back on. */ movl pmc_hook,%ecx orl %ecx,%ecx jz doreti_exit pushl %esp /* frame pointer */ pushl $PMC_FN_USER_CALLCHAIN /* command */ movl PCPU(CURTHREAD),%eax pushl %eax /* curthread */ sti call *%ecx addl $12,%esp jmp doreti_ast #endif ENTRY(end_exceptions) Index: head/sys/i386/i386/prof_machdep.c =================================================================== --- head/sys/i386/i386/prof_machdep.c (revision 334519) +++ head/sys/i386/i386/prof_machdep.c (revision 334520) @@ -1,378 +1,384 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 1996 Bruce D. Evans. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #ifdef GUPROF #include "opt_i586_guprof.h" #include "opt_perfmon.h" #include #include #include #include #include #include #include #include #include #include #include #include #define CPUTIME_CLOCK_UNINITIALIZED 0 #define CPUTIME_CLOCK_I8254 1 #define CPUTIME_CLOCK_TSC 2 #define CPUTIME_CLOCK_I586_PMC 3 #define CPUTIME_CLOCK_I8254_SHIFT 7 int cputime_bias = 1; /* initialize for locality of reference */ static int cputime_clock = CPUTIME_CLOCK_UNINITIALIZED; #if defined(PERFMON) && defined(I586_PMC_GUPROF) static u_int cputime_clock_pmc_conf = I586_PMC_GUPROF; static int cputime_clock_pmc_init; static struct gmonparam saved_gmp; #endif #if defined(I586_CPU) || defined(I686_CPU) static int cputime_prof_active; #endif #endif /* GUPROF */ #ifdef __GNUCLIKE_ASM __asm(" \n\ GM_STATE = 0 \n\ GMON_PROF_OFF = 3 \n\ \n\ .text \n\ .p2align 4,0x90 \n\ .globl __mcount \n\ .type __mcount,@function \n\ __mcount: \n\ # \n\ # Check that we are profiling. Do it early for speed. \n\ # \n\ cmpl $GMON_PROF_OFF,_gmonparam+GM_STATE \n\ je .mcount_exit \n\ # \n\ # __mcount is the same as [.]mcount except the caller \n\ # hasn't changed the stack except to call here, so the \n\ # caller's raddr is above our raddr. \n\ # \n\ movl 4(%esp),%edx \n\ jmp .got_frompc \n\ \n\ .p2align 4,0x90 \n\ .globl .mcount \n\ .mcount: \n\ cmpl $GMON_PROF_OFF,_gmonparam+GM_STATE \n\ je .mcount_exit \n\ # \n\ # The caller's stack frame has already been built, so \n\ # %ebp is the caller's frame pointer. The caller's \n\ # raddr is in the caller's frame following the caller's \n\ # caller's frame pointer. \n\ # \n\ movl 4(%ebp),%edx \n\ .got_frompc: \n\ # \n\ # Our raddr is the caller's pc. \n\ # \n\ movl (%esp),%eax \n\ \n\ pushfl \n\ pushl %eax \n\ pushl %edx \n\ cli \n\ call mcount \n\ addl $8,%esp \n\ popfl \n\ .mcount_exit: \n\ ret $0 \n\ "); + +void __mcount(void); +void (*__mcountp)(void) = __mcount; #else /* !__GNUCLIKE_ASM */ #error "this file needs to be ported to your compiler" #endif /* __GNUCLIKE_ASM */ #ifdef GUPROF /* * [.]mexitcount saves the return register(s), loads selfpc and calls * mexitcount(selfpc) to do the work. Someday it should be in a machine * dependent file together with cputime(), __mcount and [.]mcount. cputime() * can't just be put in machdep.c because it has to be compiled without -pg. */ #ifdef __GNUCLIKE_ASM __asm(" \n\ .text \n\ # \n\ # Dummy label to be seen when gprof -u hides [.]mexitcount. \n\ # \n\ .p2align 4,0x90 \n\ .globl __mexitcount \n\ .type __mexitcount,@function \n\ __mexitcount: \n\ nop \n\ \n\ GMON_PROF_HIRES = 4 \n\ \n\ .p2align 4,0x90 \n\ .globl .mexitcount \n\ .mexitcount: \n\ cmpl $GMON_PROF_HIRES,_gmonparam+GM_STATE \n\ jne .mexitcount_exit \n\ pushl %edx \n\ pushl %eax \n\ movl 8(%esp),%eax \n\ pushfl \n\ pushl %eax \n\ cli \n\ call mexitcount \n\ addl $4,%esp \n\ popfl \n\ popl %eax \n\ popl %edx \n\ .mexitcount_exit: \n\ ret $0 \n\ "); #endif /* __GNUCLIKE_ASM */ + +void __mexitcount(void); +void (*__mexitcountp)(void) = __mexitcount; /* * Return the time elapsed since the last call. The units are machine- * dependent. */ int cputime() { u_int count; int delta; #if (defined(I586_CPU) || defined(I686_CPU)) && \ defined(PERFMON) && defined(I586_PMC_GUPROF) && !defined(SMP) u_quad_t event_count; #endif u_char high, low; static u_int prev_count; #if defined(I586_CPU) || defined(I686_CPU) if (cputime_clock == CPUTIME_CLOCK_TSC) { /* * Scale the TSC a little to make cputime()'s frequency * fit in an int, assuming that the TSC frequency fits * in a u_int. Use a fixed scale since dynamic scaling * would be slower and we can't really use the low bit * of precision. */ count = (u_int)rdtsc() & ~1u; delta = (int)(count - prev_count) >> 1; prev_count = count; return (delta); } #if defined(PERFMON) && defined(I586_PMC_GUPROF) && !defined(SMP) if (cputime_clock == CPUTIME_CLOCK_I586_PMC) { /* * XXX permon_read() should be inlined so that the * perfmon module doesn't need to be compiled with * profiling disabled and so that it is fast. */ perfmon_read(0, &event_count); count = (u_int)event_count; delta = (int)(count - prev_count); prev_count = count; return (delta); } #endif /* PERFMON && I586_PMC_GUPROF && !SMP */ #endif /* I586_CPU || I686_CPU */ /* * Read the current value of the 8254 timer counter 0. */ outb(TIMER_MODE, TIMER_SEL0 | TIMER_LATCH); low = inb(TIMER_CNTR0); high = inb(TIMER_CNTR0); count = ((high << 8) | low) << CPUTIME_CLOCK_I8254_SHIFT; /* * The timer counts down from TIMER_CNTR0_MAX to 0 and then resets. * While profiling is enabled, this routine is called at least twice * per timer reset (for mcounting and mexitcounting hardclock()), * so at most one reset has occurred since the last call, and one * has occurred iff the current count is larger than the previous * count. This allows counter underflow to be detected faster * than in microtime(). */ delta = prev_count - count; prev_count = count; if ((int) delta <= 0) return (delta + (i8254_max_count << CPUTIME_CLOCK_I8254_SHIFT)); return (delta); } static int sysctl_machdep_cputime_clock(SYSCTL_HANDLER_ARGS) { int clock; int error; #if defined(PERFMON) && defined(I586_PMC_GUPROF) int event; struct pmc pmc; #endif clock = cputime_clock; #if defined(PERFMON) && defined(I586_PMC_GUPROF) if (clock == CPUTIME_CLOCK_I586_PMC) { pmc.pmc_val = cputime_clock_pmc_conf; clock += pmc.pmc_event; } #endif error = sysctl_handle_opaque(oidp, &clock, sizeof clock, req); if (error == 0 && req->newptr != NULL) { #if defined(PERFMON) && defined(I586_PMC_GUPROF) if (clock >= CPUTIME_CLOCK_I586_PMC) { event = clock - CPUTIME_CLOCK_I586_PMC; if (event >= 256) return (EINVAL); pmc.pmc_num = 0; pmc.pmc_event = event; pmc.pmc_unit = 0; pmc.pmc_flags = PMCF_E | PMCF_OS | PMCF_USR; pmc.pmc_mask = 0; cputime_clock_pmc_conf = pmc.pmc_val; cputime_clock = CPUTIME_CLOCK_I586_PMC; } else #endif { if (clock < 0 || clock >= CPUTIME_CLOCK_I586_PMC) return (EINVAL); cputime_clock = clock; } } return (error); } SYSCTL_PROC(_machdep, OID_AUTO, cputime_clock, CTLTYPE_INT | CTLFLAG_RW, 0, sizeof(u_int), sysctl_machdep_cputime_clock, "I", ""); /* * The start and stop routines need not be here since we turn off profiling * before calling them. They are here for convenience. */ void startguprof(gp) struct gmonparam *gp; { #if defined(I586_CPU) || defined(I686_CPU) uint64_t freq; freq = atomic_load_acq_64(&tsc_freq); if (cputime_clock == CPUTIME_CLOCK_UNINITIALIZED) { if (freq != 0 && mp_ncpus == 1) cputime_clock = CPUTIME_CLOCK_TSC; else cputime_clock = CPUTIME_CLOCK_I8254; } if (cputime_clock == CPUTIME_CLOCK_TSC) { gp->profrate = freq >> 1; cputime_prof_active = 1; } else gp->profrate = i8254_freq << CPUTIME_CLOCK_I8254_SHIFT; #if defined(PERFMON) && defined(I586_PMC_GUPROF) if (cputime_clock == CPUTIME_CLOCK_I586_PMC) { if (perfmon_avail() && perfmon_setup(0, cputime_clock_pmc_conf) == 0) { if (perfmon_start(0) != 0) perfmon_fini(0); else { /* XXX 1 event == 1 us. */ gp->profrate = 1000000; saved_gmp = *gp; /* Zap overheads. They are invalid. */ gp->cputime_overhead = 0; gp->mcount_overhead = 0; gp->mcount_post_overhead = 0; gp->mcount_pre_overhead = 0; gp->mexitcount_overhead = 0; gp->mexitcount_post_overhead = 0; gp->mexitcount_pre_overhead = 0; cputime_clock_pmc_init = TRUE; } } } #endif /* PERFMON && I586_PMC_GUPROF */ #else /* !(I586_CPU || I686_CPU) */ if (cputime_clock == CPUTIME_CLOCK_UNINITIALIZED) cputime_clock = CPUTIME_CLOCK_I8254; gp->profrate = i8254_freq << CPUTIME_CLOCK_I8254_SHIFT; #endif /* I586_CPU || I686_CPU */ cputime_bias = 0; cputime(); } void stopguprof(gp) struct gmonparam *gp; { #if defined(PERFMON) && defined(I586_PMC_GUPROF) if (cputime_clock_pmc_init) { *gp = saved_gmp; perfmon_fini(0); cputime_clock_pmc_init = FALSE; } #endif #if defined(I586_CPU) || defined(I686_CPU) if (cputime_clock == CPUTIME_CLOCK_TSC) cputime_prof_active = 0; #endif } #if defined(I586_CPU) || defined(I686_CPU) /* If the cpu frequency changed while profiling, report a warning. */ static void tsc_freq_changed(void *arg, const struct cf_level *level, int status) { /* * If there was an error during the transition or * TSC is P-state invariant, don't do anything. */ if (status != 0 || tsc_is_invariant) return; if (cputime_prof_active && cputime_clock == CPUTIME_CLOCK_TSC) printf("warning: cpu freq changed while profiling active\n"); } EVENTHANDLER_DEFINE(cpufreq_post_change, tsc_freq_changed, NULL, EVENTHANDLER_PRI_ANY); #endif /* I586_CPU || I686_CPU */ #endif /* GUPROF */ Index: head/sys/i386/i386/support.s =================================================================== --- head/sys/i386/i386/support.s (revision 334519) +++ head/sys/i386/i386/support.s (revision 334520) @@ -1,475 +1,474 @@ /*- * Copyright (c) 1993 The Regents of the University of California. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include #include #include #include #include "assym.inc" #define IDXSHIFT 10 .text /* * bcopy family * void bzero(void *buf, u_int len) */ ENTRY(bzero) pushl %edi movl 8(%esp),%edi movl 12(%esp),%ecx xorl %eax,%eax shrl $2,%ecx rep stosl movl 12(%esp),%ecx andl $3,%ecx rep stosb popl %edi ret END(bzero) ENTRY(sse2_pagezero) pushl %ebx movl 8(%esp),%ecx movl %ecx,%eax addl $4096,%eax xor %ebx,%ebx jmp 1f /* * The loop takes 14 bytes. Ensure that it doesn't cross a 16-byte * cache line. */ .p2align 4,0x90 1: movnti %ebx,(%ecx) movnti %ebx,4(%ecx) addl $8,%ecx cmpl %ecx,%eax jne 1b sfence popl %ebx ret END(sse2_pagezero) ENTRY(i686_pagezero) pushl %edi pushl %ebx movl 12(%esp),%edi movl $1024,%ecx ALIGN_TEXT 1: xorl %eax,%eax repe scasl jnz 2f popl %ebx popl %edi ret ALIGN_TEXT 2: incl %ecx subl $4,%edi movl %ecx,%edx cmpl $16,%ecx jge 3f movl %edi,%ebx andl $0x3f,%ebx shrl %ebx shrl %ebx movl $16,%ecx subl %ebx,%ecx 3: subl %ecx,%edx rep stosl movl %edx,%ecx testl %edx,%edx jnz 1b popl %ebx popl %edi ret END(i686_pagezero) /* fillw(pat, base, cnt) */ ENTRY(fillw) pushl %edi movl 8(%esp),%eax movl 12(%esp),%edi movl 16(%esp),%ecx rep stosw popl %edi ret END(fillw) /* * memmove(dst, src, cnt) (return dst) * bcopy(src, dst, cnt) * ws@tools.de (Wolfgang Solfrank, TooLs GmbH) +49-228-985800 */ ENTRY(bcopy) + movl 4(%esp),%eax + movl 8(%esp),%edx + movl %eax,8(%esp) + movl %edx,4(%esp) + MEXITCOUNT + jmp memmove +END(bcopy) + +ENTRY(memmove) pushl %ebp movl %esp,%ebp pushl %esi pushl %edi - movl 8(%ebp),%esi - movl 12(%ebp),%edi - jmp 1f -ALTENTRY(memmove) - pushl %ebp - movl %esp,%ebp - pushl %esi - pushl %edi movl 8(%ebp),%edi movl 12(%ebp),%esi 1: movl 16(%ebp),%ecx movl %edi,%eax subl %esi,%eax cmpl %ecx,%eax /* overlapping && src < dst? */ jb 1f shrl $2,%ecx /* copy by 32-bit words */ rep movsl movl 16(%ebp),%ecx andl $3,%ecx /* any bytes left? */ rep movsb popl %edi popl %esi movl 8(%ebp),%eax /* return dst for memmove */ popl %ebp ret ALIGN_TEXT 1: addl %ecx,%edi /* copy backwards */ addl %ecx,%esi decl %edi decl %esi andl $3,%ecx /* any fractional bytes? */ std rep movsb movl 16(%ebp),%ecx /* copy remainder by 32-bit words */ shrl $2,%ecx subl $3,%esi subl $3,%edi rep movsl popl %edi popl %esi cld movl 8(%ebp),%eax /* return dst for memmove */ popl %ebp ret -END(bcopy) +END(memmove) /* * Note: memcpy does not support overlapping copies */ ENTRY(memcpy) pushl %edi pushl %esi movl 12(%esp),%edi movl 16(%esp),%esi movl 20(%esp),%ecx movl %edi,%eax shrl $2,%ecx /* copy by 32-bit words */ rep movsl movl 20(%esp),%ecx andl $3,%ecx /* any bytes left? */ rep movsb popl %esi popl %edi ret END(memcpy) /* * copystr(from, to, maxlen, int *lencopied) - MP SAFE */ ENTRY(copystr) pushl %esi pushl %edi movl 12(%esp),%esi /* %esi = from */ movl 16(%esp),%edi /* %edi = to */ movl 20(%esp),%edx /* %edx = maxlen */ incl %edx 1: decl %edx jz 4f lodsb stosb orb %al,%al jnz 1b /* Success -- 0 byte reached */ decl %edx xorl %eax,%eax jmp 6f 4: /* edx is zero -- return ENAMETOOLONG */ movl $ENAMETOOLONG,%eax 6: /* set *lencopied and return %eax */ movl 20(%esp),%ecx subl %edx,%ecx movl 24(%esp),%edx testl %edx,%edx jz 7f movl %ecx,(%edx) 7: popl %edi popl %esi ret END(copystr) ENTRY(bcmp) pushl %edi pushl %esi movl 12(%esp),%edi movl 16(%esp),%esi movl 20(%esp),%edx movl %edx,%ecx shrl $2,%ecx repe cmpsl jne 1f movl %edx,%ecx andl $3,%ecx repe cmpsb 1: setne %al movsbl %al,%eax popl %esi popl %edi ret END(bcmp) /* * Handling of special 386 registers and descriptor tables etc */ /* void lgdt(struct region_descriptor *rdp); */ ENTRY(lgdt) /* reload the descriptor table */ movl 4(%esp),%eax lgdt (%eax) /* flush the prefetch q */ jmp 1f nop 1: /* reload "stale" selectors */ movl $KDSEL,%eax movl %eax,%ds movl %eax,%es movl %eax,%gs movl %eax,%ss movl $KPSEL,%eax movl %eax,%fs /* reload code selector by turning return into intersegmental return */ movl (%esp),%eax pushl %eax movl $KCSEL,4(%esp) MEXITCOUNT lret END(lgdt) /* ssdtosd(*ssdp,*sdp) */ ENTRY(ssdtosd) pushl %ebx movl 8(%esp),%ecx movl 8(%ecx),%ebx shll $16,%ebx movl (%ecx),%edx roll $16,%edx movb %dh,%bl movb %dl,%bh rorl $8,%ebx movl 4(%ecx),%eax movw %ax,%dx andl $0xf0000,%eax orl %eax,%ebx movl 12(%esp),%ecx movl %edx,(%ecx) movl %ebx,4(%ecx) popl %ebx ret END(ssdtosd) /* void reset_dbregs() */ ENTRY(reset_dbregs) movl $0,%eax movl %eax,%dr7 /* disable all breakpoints first */ movl %eax,%dr0 movl %eax,%dr1 movl %eax,%dr2 movl %eax,%dr3 movl %eax,%dr6 ret END(reset_dbregs) /*****************************************************************************/ /* setjump, longjump */ /*****************************************************************************/ ENTRY(setjmp) movl 4(%esp),%eax movl %ebx,(%eax) /* save ebx */ movl %esp,4(%eax) /* save esp */ movl %ebp,8(%eax) /* save ebp */ movl %esi,12(%eax) /* save esi */ movl %edi,16(%eax) /* save edi */ movl (%esp),%edx /* get rta */ movl %edx,20(%eax) /* save eip */ xorl %eax,%eax /* return(0); */ ret END(setjmp) ENTRY(longjmp) movl 4(%esp),%eax movl (%eax),%ebx /* restore ebx */ movl 4(%eax),%esp /* restore esp */ movl 8(%eax),%ebp /* restore ebp */ movl 12(%eax),%esi /* restore esi */ movl 16(%eax),%edi /* restore edi */ movl 20(%eax),%edx /* get rta */ movl %edx,(%esp) /* put in return frame */ xorl %eax,%eax /* return(1); */ incl %eax ret END(longjmp) /* * Support for reading MSRs in the safe manner. (Instead of panic on #gp, * return an error.) */ ENTRY(rdmsr_safe) /* int rdmsr_safe(u_int msr, uint64_t *data) */ movl PCPU(CURPCB),%ecx movl $msr_onfault,PCB_ONFAULT(%ecx) movl 4(%esp),%ecx rdmsr movl 8(%esp),%ecx movl %eax,(%ecx) movl %edx,4(%ecx) xorl %eax,%eax movl PCPU(CURPCB),%ecx movl %eax,PCB_ONFAULT(%ecx) ret /* * Support for writing MSRs in the safe manner. (Instead of panic on #gp, * return an error.) */ ENTRY(wrmsr_safe) /* int wrmsr_safe(u_int msr, uint64_t data) */ movl PCPU(CURPCB),%ecx movl $msr_onfault,PCB_ONFAULT(%ecx) movl 4(%esp),%ecx movl 8(%esp),%eax movl 12(%esp),%edx wrmsr xorl %eax,%eax movl PCPU(CURPCB),%ecx movl %eax,PCB_ONFAULT(%ecx) ret /* * MSR operations fault handler */ ALIGN_TEXT msr_onfault: movl PCPU(CURPCB),%ecx movl $0,PCB_ONFAULT(%ecx) movl $EFAULT,%eax ret ENTRY(handle_ibrs_entry) cmpb $0,hw_ibrs_active je 1f movl $MSR_IA32_SPEC_CTRL,%ecx rdmsr orl $(IA32_SPEC_CTRL_IBRS|IA32_SPEC_CTRL_STIBP),%eax orl $(IA32_SPEC_CTRL_IBRS|IA32_SPEC_CTRL_STIBP)>>32,%edx wrmsr movb $1,PCPU(IBPB_SET) /* * i386 does not implement SMEP, but the 4/4 split makes this not * that important. */ 1: ret END(handle_ibrs_entry) ENTRY(handle_ibrs_exit) cmpb $0,PCPU(IBPB_SET) je 1f - pushl %ecx movl $MSR_IA32_SPEC_CTRL,%ecx rdmsr andl $~(IA32_SPEC_CTRL_IBRS|IA32_SPEC_CTRL_STIBP),%eax andl $~((IA32_SPEC_CTRL_IBRS|IA32_SPEC_CTRL_STIBP)>>32),%edx wrmsr - popl %ecx movb $0,PCPU(IBPB_SET) 1: ret END(handle_ibrs_exit) Index: head/sys/i386/include/asmacros.h =================================================================== --- head/sys/i386/include/asmacros.h (revision 334519) +++ head/sys/i386/include/asmacros.h (revision 334520) @@ -1,260 +1,260 @@ /* -*- mode: asm -*- */ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1993 The Regents of the University of California. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _MACHINE_ASMACROS_H_ #define _MACHINE_ASMACROS_H_ #include /* XXX too much duplication in various asm*.h's. */ /* * CNAME is used to manage the relationship between symbol names in C * and the equivalent assembly language names. CNAME is given a name as * it would be used in a C program. It expands to the equivalent assembly * language name. */ #define CNAME(csym) csym #define ALIGN_DATA .p2align 2 /* 4 byte alignment, zero filled */ #ifdef GPROF #define ALIGN_TEXT .p2align 4,0x90 /* 16-byte alignment, nop filled */ #else #define ALIGN_TEXT .p2align 2,0x90 /* 4-byte alignment, nop filled */ #endif #define SUPERALIGN_TEXT .p2align 4,0x90 /* 16-byte alignment, nop filled */ #define GEN_ENTRY(name) ALIGN_TEXT; .globl CNAME(name); \ .type CNAME(name),@function; CNAME(name): #define NON_GPROF_ENTRY(name) GEN_ENTRY(name) #define NON_GPROF_RET .byte 0xc3 /* opcode for `ret' */ #define END(name) .size name, . - name #ifdef GPROF /* * __mcount is like [.]mcount except that doesn't require its caller to set * up a frame pointer. It must be called before pushing anything onto the * stack. gcc should eventually generate code to call __mcount in most * cases. This would make -pg in combination with -fomit-frame-pointer * useful. gcc has a configuration variable PROFILE_BEFORE_PROLOGUE to * allow profiling before setting up the frame pointer, but this is * inadequate for good handling of special cases, e.g., -fpic works best * with profiling after the prologue. * * [.]mexitcount is a new function to support non-statistical profiling if an * accurate clock is available. For C sources, calls to it are generated * by the FreeBSD extension `-mprofiler-epilogue' to gcc. It is best to * call [.]mexitcount at the end of a function like the MEXITCOUNT macro does, * but gcc currently generates calls to it at the start of the epilogue to * avoid problems with -fpic. * * [.]mcount and __mcount may clobber the call-used registers and %ef. * [.]mexitcount may clobber %ecx and %ef. * * Cross-jumping makes non-statistical profiling timing more complicated. * It is handled in many cases by calling [.]mexitcount before jumping. It * is handled for conditional jumps using CROSSJUMP() and CROSSJUMP_LABEL(). * It is handled for some fault-handling jumps by not sharing the exit * routine. * * ALTENTRY() must be before a corresponding ENTRY() so that it can jump to * the main entry point. Note that alt entries are counted twice. They * have to be counted as ordinary entries for gprof to get the call times * right for the ordinary entries. * * High local labels are used in macros to avoid clashes with local labels * in functions. * * Ordinary `ret' is used instead of a macro `RET' because there are a lot * of `ret's. 0xc3 is the opcode for `ret' (`#define ret ... ret' can't * be used because this file is sometimes preprocessed in traditional mode). * `ret' clobbers eflags but this doesn't matter. */ #define ALTENTRY(name) GEN_ENTRY(name) ; MCOUNT ; MEXITCOUNT ; jmp 9f #define CROSSJUMP(jtrue, label, jfalse) \ jfalse 8f; MEXITCOUNT; jmp __CONCAT(to,label); 8: #define CROSSJUMPTARGET(label) \ ALIGN_TEXT; __CONCAT(to,label): ; MCOUNT; jmp label #define ENTRY(name) GEN_ENTRY(name) ; 9: ; MCOUNT -#define FAKE_MCOUNT(caller) pushl caller ; call __mcount ; popl %ecx -#define MCOUNT call __mcount +#define FAKE_MCOUNT(caller) pushl caller ; call *__mcountp ; popl %ecx +#define MCOUNT call *__mcountp #define MCOUNT_LABEL(name) GEN_ENTRY(name) ; nop ; ALIGN_TEXT #ifdef GUPROF -#define MEXITCOUNT call .mexitcount +#define MEXITCOUNT call *__mexitcountp #define ret MEXITCOUNT ; NON_GPROF_RET #else #define MEXITCOUNT #endif #else /* !GPROF */ /* * ALTENTRY() has to align because it is before a corresponding ENTRY(). * ENTRY() has to align to because there may be no ALTENTRY() before it. * If there is a previous ALTENTRY() then the alignment code for ENTRY() * is empty. */ #define ALTENTRY(name) GEN_ENTRY(name) #define CROSSJUMP(jtrue, label, jfalse) jtrue label #define CROSSJUMPTARGET(label) #define ENTRY(name) GEN_ENTRY(name) #define FAKE_MCOUNT(caller) #define MCOUNT #define MCOUNT_LABEL(name) #define MEXITCOUNT #endif /* GPROF */ #ifdef LOCORE #define GSEL_KPL 0x0020 /* GSEL(GCODE_SEL, SEL_KPL) */ #define SEL_RPL_MASK 0x0003 /* * Convenience macro for declaring interrupt entry points. */ #define IDTVEC(name) ALIGN_TEXT; .globl __CONCAT(X,name); \ .type __CONCAT(X,name),@function; __CONCAT(X,name): /* * Macros to create and destroy a trap frame. */ .macro PUSH_FRAME2 pushal pushl $0 movw %ds,(%esp) pushl $0 movw %es,(%esp) pushl $0 movw %fs,(%esp) .endm .macro PUSH_FRAME pushl $0 /* dummy error code */ pushl $0 /* dummy trap type */ PUSH_FRAME2 .endm /* * Access per-CPU data. */ #define PCPU(member) %fs:PC_ ## member #define PCPU_ADDR(member, reg) \ movl %fs:PC_PRVSPACE, reg ; \ addl $PC_ ## member, reg /* * Setup the kernel segment registers. */ .macro SET_KERNEL_SREGS movl $KDSEL, %eax /* reload with kernel's data segment */ movl %eax, %ds movl %eax, %es movl $KPSEL, %eax /* reload with per-CPU data segment */ movl %eax, %fs .endm .macro NMOVE_STACKS movl PCPU(KESP0), %edx movl $TF_SZ, %ecx testl $PSL_VM, TF_EFLAGS(%esp) jz .L\@.1 addl $VM86_STACK_SPACE, %ecx .L\@.1: subl %ecx, %edx movl %edx, %edi movl %esp, %esi rep; movsb movl %edx, %esp .endm .macro LOAD_KCR3 call .L\@.1 .L\@.1: popl %eax movl (tramp_idleptd - .L\@.1)(%eax), %eax movl %eax, %cr3 .endm .macro MOVE_STACKS LOAD_KCR3 NMOVE_STACKS .endm .macro KENTER testl $PSL_VM, TF_EFLAGS(%esp) jz .L\@.1 LOAD_KCR3 movl PCPU(CURPCB), %eax testl $PCB_VM86CALL, PCB_FLAGS(%eax) jnz .L\@.3 NMOVE_STACKS movl $handle_ibrs_entry,%edx call *%edx jmp .L\@.3 .L\@.1: testb $SEL_RPL_MASK, TF_CS(%esp) jz .L\@.3 .L\@.2: MOVE_STACKS movl $handle_ibrs_entry,%edx call *%edx .L\@.3: .endm #endif /* LOCORE */ #ifdef __STDC__ #define ELFNOTE(name, type, desctype, descdata...) \ .pushsection .note.name ; \ .align 4 ; \ .long 2f - 1f /* namesz */ ; \ .long 4f - 3f /* descsz */ ; \ .long type ; \ 1:.asciz #name ; \ 2:.align 4 ; \ 3:desctype descdata ; \ 4:.align 4 ; \ .popsection #else /* !__STDC__, i.e. -traditional */ #define ELFNOTE(name, type, desctype, descdata) \ .pushsection .note.name ; \ .align 4 ; \ .long 2f - 1f /* namesz */ ; \ .long 4f - 3f /* descsz */ ; \ .long type ; \ 1:.asciz "name" ; \ 2:.align 4 ; \ 3:desctype descdata ; \ 4:.align 4 ; \ .popsection #endif /* __STDC__ */ #endif /* !_MACHINE_ASMACROS_H_ */ Index: head/sys/i386/include/profile.h =================================================================== --- head/sys/i386/include/profile.h (revision 334519) +++ head/sys/i386/include/profile.h (revision 334520) @@ -1,173 +1,186 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1992, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)profile.h 8.1 (Berkeley) 6/11/93 * $FreeBSD$ */ #ifndef _MACHINE_PROFILE_H_ #define _MACHINE_PROFILE_H_ #ifndef _SYS_CDEFS_H_ #error this file needs sys/cdefs.h as a prerequisite #endif #ifdef _KERNEL /* * Config generates something to tell the compiler to align functions on 16 * byte boundaries. A strict alignment is good for keeping the tables small. */ #define FUNCTION_ALIGNMENT 16 /* * The kernel uses assembler stubs instead of unportable inlines. * This is mainly to save a little time when profiling is not enabled, * which is the usual case for the kernel. */ #define _MCOUNT_DECL void mcount #define MCOUNT #ifdef GUPROF #define MCOUNT_DECL(s) #define MCOUNT_ENTER(s) #define MCOUNT_EXIT(s) #ifdef __GNUCLIKE_ASM #define MCOUNT_OVERHEAD(label) \ __asm __volatile("pushl %0; call __mcount; popl %%ecx" \ : \ : "i" (label) \ : "ax", "dx", "cx", "memory") #define MEXITCOUNT_OVERHEAD() \ __asm __volatile("call .mexitcount; 1:" \ : : \ : "ax", "dx", "cx", "memory") #define MEXITCOUNT_OVERHEAD_GETLABEL(labelp) \ __asm __volatile("movl $1b,%0" : "=rm" (labelp)) #else #error #endif /* !__GNUCLIKE_ASM */ #else /* !GUPROF */ #define MCOUNT_DECL(s) register_t s; #ifdef SMP extern int mcount_lock; #define MCOUNT_ENTER(s) { s = intr_disable(); \ while (!atomic_cmpset_acq_int(&mcount_lock, 0, 1)) \ /* nothing */ ; } #define MCOUNT_EXIT(s) { atomic_store_rel_int(&mcount_lock, 0); \ intr_restore(s); } #else #define MCOUNT_ENTER(s) { s = intr_disable(); } #define MCOUNT_EXIT(s) (intr_restore(s)) #endif #endif /* GUPROF */ void bintr(void); void btrap(void); void eintr(void); +#if 0 +void end_exceptions(void); +void start_exceptions(void); +#else +#include /* XXX */ +#endif void user(void); -#define MCOUNT_FROMPC_USER(pc) \ - ((pc < (uintfptr_t)VM_MAXUSER_ADDRESS) ? (uintfptr_t)user : pc) +#include /* XXX for setidt_disp */ +#define MCOUNT_DETRAMP(pc) do { \ + if ((pc) >= (uintfptr_t)start_exceptions + setidt_disp && \ + (pc) < (uintfptr_t)end_exceptions + setidt_disp) \ + (pc) -= setidt_disp; \ +} while (0) + #define MCOUNT_FROMPC_INTR(pc) \ ((pc >= (uintfptr_t)btrap && pc < (uintfptr_t)eintr) ? \ ((pc >= (uintfptr_t)bintr) ? (uintfptr_t)bintr : \ (uintfptr_t)btrap) : ~0U) + +#define MCOUNT_USERPC ((uintfptr_t)user) #else /* !_KERNEL */ #define FUNCTION_ALIGNMENT 4 #define _MCOUNT_DECL static __inline void _mcount #ifdef __GNUCLIKE_ASM #define MCOUNT \ void \ mcount() \ { \ uintfptr_t selfpc, frompc, ecx; \ /* \ * In gcc 4.2, ecx might be used in the caller as the arg \ * pointer if the stack realignment option is set (-mstackrealign) \ * or if the caller has the force_align_arg_pointer attribute \ * (stack realignment is ALWAYS on for main). Preserve ecx \ * here. \ */ \ __asm("" : "=c" (ecx)); \ /* \ * Find the return address for mcount, \ * and the return address for mcount's caller. \ * \ * selfpc = pc pushed by call to mcount \ */ \ __asm("movl 4(%%ebp),%0" : "=r" (selfpc)); \ /* \ * frompc = pc pushed by call to mcount's caller. \ * The caller's stack frame has already been built, so %ebp is \ * the caller's frame pointer. The caller's raddr is in the \ * caller's frame following the caller's caller's frame pointer.\ */ \ __asm("movl (%%ebp),%0" : "=r" (frompc)); \ frompc = ((uintfptr_t *)frompc)[1]; \ _mcount(frompc, selfpc); \ __asm("" : : "c" (ecx)); \ } #else /* !__GNUCLIKE_ASM */ #define MCOUNT #endif /* __GNUCLIKE_ASM */ typedef u_int uintfptr_t; #endif /* _KERNEL */ /* * An unsigned integral type that can hold non-negative difference between * function pointers. */ typedef u_int fptrdiff_t; #ifdef _KERNEL void mcount(uintfptr_t frompc, uintfptr_t selfpc); #else /* !_KERNEL */ #include __BEGIN_DECLS #ifdef __GNUCLIKE_ASM void mcount(void) __asm(".mcount"); #endif __END_DECLS #endif /* _KERNEL */ #endif /* !_MACHINE_PROFILE_H_ */ Index: head/sys/libkern/mcount.c =================================================================== --- head/sys/libkern/mcount.c (revision 334519) +++ head/sys/libkern/mcount.c (revision 334520) @@ -1,298 +1,316 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1983, 1992, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #ifdef _KERNEL #ifndef GUPROF #include #endif #include #include #include #endif /* * mcount is called on entry to each function compiled with the profiling * switch set. _mcount(), which is declared in a machine-dependent way * with _MCOUNT_DECL, does the actual work and is either inlined into a * C routine or called by an assembly stub. In any case, this magic is * taken care of by the MCOUNT definition in . * * _mcount updates data structures that represent traversals of the * program's call graph edges. frompc and selfpc are the return * address and function address that represents the given call graph edge. * * Note: the original BSD code used the same variable (frompcindex) for * both frompcindex and frompc. Any reasonable, modern compiler will * perform this optimization. */ _MCOUNT_DECL(uintfptr_t frompc, uintfptr_t selfpc) /* _mcount; may be static, inline, etc */ { #ifdef GUPROF int delta; #endif fptrdiff_t frompci; u_short *frompcindex; struct tostruct *top, *prevtop; struct gmonparam *p; long toindex; #ifdef _KERNEL MCOUNT_DECL(s) #endif p = &_gmonparam; #ifndef GUPROF /* XXX */ /* * check that we are profiling * and that we aren't recursively invoked. */ if (p->state != GMON_PROF_ON) return; #endif #ifdef _KERNEL MCOUNT_ENTER(s); #else p->state = GMON_PROF_BUSY; #endif #ifdef _KERNEL + /* De-relocate any addresses in a (single) trampoline. */ +#ifdef MCOUNT_DETRAMP + MCOUNT_DETRAMP(frompc); + MCOUNT_DETRAMP(selfpc); +#endif /* * When we are called from an exception handler, frompc may be * a user address. Convert such frompc's to some representation * in kernel address space. */ +#ifdef MCOUNT_FROMPC_USER frompc = MCOUNT_FROMPC_USER(frompc); +#elif defined(MCOUNT_USERPC) + /* + * For separate address spaces, we can only guess that addresses + * in the range known to us are actually kernel addresses. Outside + * of this range, conerting to the user address is fail-safe. + */ + if (frompc < p->lowpc || frompc - p->lowpc >= p->textsize) + frompc = MCOUNT_USERPC; #endif +#endif /* _KERNEL */ frompci = frompc - p->lowpc; if (frompci >= p->textsize) goto done; #ifdef GUPROF if (p->state == GMON_PROF_HIRES) { /* * Count the time since cputime() was previously called * against `frompc'. Compensate for overheads. * * cputime() sets its prev_count variable to the count when * it is called. This in effect starts a counter for * the next period of execution (normally from now until * the next call to mcount() or mexitcount()). We set * cputime_bias to compensate for our own overhead. * * We use the usual sampling counters since they can be * located efficiently. 4-byte counters are usually * necessary. gprof will add up the scattered counts * just like it does for statistical profiling. All * counts are signed so that underflow in the subtractions * doesn't matter much (negative counts are normally * compensated for by larger counts elsewhere). Underflow * shouldn't occur, but may be caused by slightly wrong * calibrations or from not clearing cputime_bias. */ delta = cputime() - cputime_bias - p->mcount_pre_overhead; cputime_bias = p->mcount_post_overhead; KCOUNT(p, frompci) += delta; *p->cputime_count += p->cputime_overhead; *p->mcount_count += p->mcount_overhead; } #endif /* GUPROF */ #ifdef _KERNEL /* * When we are called from an exception handler, frompc is faked * to be for where the exception occurred. We've just solidified * the count for there. Now convert frompci to an index that * represents the kind of exception so that interruptions appear * in the call graph as calls from those index instead of calls * from all over. */ frompc = MCOUNT_FROMPC_INTR(selfpc); if ((frompc - p->lowpc) < p->textsize) frompci = frompc - p->lowpc; #endif /* * check that frompc is a reasonable pc value. * for example: signal catchers get called from the stack, * not from text space. too bad. */ if (frompci >= p->textsize) goto done; frompcindex = &p->froms[frompci / (p->hashfraction * sizeof(*p->froms))]; toindex = *frompcindex; if (toindex == 0) { /* * first time traversing this arc */ toindex = ++p->tos[0].link; if (toindex >= p->tolimit) /* halt further profiling */ goto overflow; *frompcindex = toindex; top = &p->tos[toindex]; top->selfpc = selfpc; top->count = 1; top->link = 0; goto done; } top = &p->tos[toindex]; if (top->selfpc == selfpc) { /* * arc at front of chain; usual case. */ top->count++; goto done; } /* * have to go looking down chain for it. * top points to what we are looking at, * prevtop points to previous top. * we know it is not at the head of the chain. */ for (; /* goto done */; ) { if (top->link == 0) { /* * top is end of the chain and none of the chain * had top->selfpc == selfpc. * so we allocate a new tostruct * and link it to the head of the chain. */ toindex = ++p->tos[0].link; if (toindex >= p->tolimit) goto overflow; top = &p->tos[toindex]; top->selfpc = selfpc; top->count = 1; top->link = *frompcindex; *frompcindex = toindex; goto done; } /* * otherwise, check the next arc on the chain. */ prevtop = top; top = &p->tos[top->link]; if (top->selfpc == selfpc) { /* * there it is. * increment its count * move it to the head of the chain. */ top->count++; toindex = prevtop->link; prevtop->link = top->link; top->link = *frompcindex; *frompcindex = toindex; goto done; } } done: #ifdef _KERNEL MCOUNT_EXIT(s); #else p->state = GMON_PROF_ON; #endif return; overflow: p->state = GMON_PROF_ERROR; #ifdef _KERNEL MCOUNT_EXIT(s); #endif return; } /* * Actual definition of mcount function. Defined in , * which is included by . */ MCOUNT #ifdef GUPROF void mexitcount(uintfptr_t selfpc) { struct gmonparam *p; uintfptr_t selfpcdiff; p = &_gmonparam; +#ifdef MCOUNT_DETRAMP + MCOUNT_DETRAMP(selfpc); +#endif selfpcdiff = selfpc - (uintfptr_t)p->lowpc; if (selfpcdiff < p->textsize) { int delta; /* * Count the time since cputime() was previously called * against `selfpc'. Compensate for overheads. */ delta = cputime() - cputime_bias - p->mexitcount_pre_overhead; cputime_bias = p->mexitcount_post_overhead; KCOUNT(p, selfpcdiff) += delta; *p->cputime_count += p->cputime_overhead; *p->mexitcount_count += p->mexitcount_overhead; } } #ifndef __GNUCLIKE_ASM #error "This file uses null asms to prevent timing loops being optimized away." #endif void empty_loop() { int i; for (i = 0; i < CALIB_SCALE; i++) __asm __volatile(""); } void nullfunc() { __asm __volatile(""); } void nullfunc_loop() { int i; for (i = 0; i < CALIB_SCALE; i++) nullfunc(); } #endif /* GUPROF */