Index: stand/common/reloc_elf.c =================================================================== --- stand/common/reloc_elf.c +++ stand/common/reloc_elf.c @@ -115,6 +115,7 @@ /* XXX, definitions not available on i386. */ #define R_X86_64_64 1 #define R_X86_64_RELATIVE 8 +#define R_X86_64_IRELATIVE 37 switch (rtype) { case R_X86_64_64: /* S + A */ @@ -129,6 +130,9 @@ val = addr; *where = val; break; + case R_X86_64_IRELATIVE: + /* leave it to kernel */ + break; default: printf("\nunhandled relocation type %u\n", (u_int)rtype); return (EFTYPE); @@ -173,6 +177,7 @@ #define R_386_32 1 /* Add symbol value. */ #define R_386_GLOB_DAT 6 /* Set GOT entry to data address. */ #define R_386_RELATIVE 8 /* Add load address of shared object. */ +#define R_386_IRELATIVE 42 switch (rtype) { case R_386_RELATIVE: @@ -186,6 +191,9 @@ val = addr + addend; *where = val; break; + case R_386_IRELATIVE: + /* leave it to kernel */ + break; default: printf("\nunhandled relocation type %u\n", (u_int)rtype); return (EFTYPE); Index: sys/amd64/amd64/elf_machdep.c =================================================================== --- sys/amd64/amd64/elf_machdep.c +++ sys/amd64/amd64/elf_machdep.c @@ -175,10 +175,13 @@ *off = len; } +#define ERI_LOCAL 0x0001 +#define ERI_ONLYIFUNC 0x0002 + /* Process one elf relocation with addend. */ static int elf_reloc_internal(linker_file_t lf, Elf_Addr relocbase, const void *data, - int type, int local, elf_lookup_fn lookup) + int type, elf_lookup_fn lookup, int flags) { Elf64_Addr *where, val; Elf32_Addr *where32, val32; @@ -218,6 +221,9 @@ panic("unknown reloc type %d\n", type); } + if (((flags & ERI_ONLYIFUNC) == 0) ^ (rtype != R_X86_64_IRELATIVE)) + return (0); + switch (rtype) { case R_X86_64_NONE: /* none */ @@ -260,7 +266,7 @@ * objects. */ printf("kldload: unexpected R_COPY relocation\n"); - return -1; + return (-1); break; case R_X86_64_GLOB_DAT: /* S */ @@ -279,12 +285,28 @@ *where = val; break; + case R_X86_64_IRELATIVE: + addr = relocbase + addend; + val = ((Elf64_Addr (*)(void))addr)(); + if (*where != val) + *where = val; + break; + default: printf("kldload: unexpected relocation type %ld\n", rtype); - return -1; + return (-1); } - return(0); + return (0); +} + +int +elf_reloc_ifunc(linker_file_t lf, Elf_Addr relocbase, const void *data, + int type, elf_lookup_fn lookup) +{ + + return (elf_reloc_internal(lf, relocbase, data, type, lookup, + ERI_ONLYIFUNC)); } int @@ -292,7 +314,7 @@ elf_lookup_fn lookup) { - return (elf_reloc_internal(lf, relocbase, data, type, 0, lookup)); + return (elf_reloc_internal(lf, relocbase, data, type, lookup, 0)); } int @@ -300,7 +322,8 @@ int type, elf_lookup_fn lookup) { - return (elf_reloc_internal(lf, relocbase, data, type, 1, lookup)); + return (elf_reloc_internal(lf, relocbase, data, type, lookup, + ERI_LOCAL)); } int Index: sys/amd64/amd64/exception.S =================================================================== --- sys/amd64/amd64/exception.S +++ sys/amd64/amd64/exception.S @@ -203,7 +203,9 @@ movq %r14,TF_R14(%rsp) movq %r15,TF_R15(%rsp) movl $TF_HASSEGS,TF_FLAGS(%rsp) - cld + pushfq + andl $~(PSL_D | PSL_AC),(%rsp) + popfq FAKE_MCOUNT(TF_RIP(%rsp)) #ifdef KDTRACE_HOOKS /* @@ -284,7 +286,9 @@ movw %es,TF_ES(%rsp) movw %ds,TF_DS(%rsp) movl $TF_HASSEGS,TF_FLAGS(%rsp) - cld + pushfq + andl $~(PSL_D | PSL_AC),(%rsp) + popfq testb $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */ jz 1f /* already running with kernel GS.base */ swapgs @@ -410,7 +414,6 @@ movq %r14,TF_R14(%rsp) /* C preserved */ movq %r15,TF_R15(%rsp) /* C preserved */ movl $TF_HASSEGS,TF_FLAGS(%rsp) - cld FAKE_MCOUNT(TF_RIP(%rsp)) movq PCPU(CURTHREAD),%rdi movq %rsp,TD_FRAME(%rdi) @@ -505,7 +508,9 @@ movw %es,TF_ES(%rsp) movw %ds,TF_DS(%rsp) movl $TF_HASSEGS,TF_FLAGS(%rsp) - cld + pushfq + andl $~(PSL_D | PSL_AC),(%rsp) + popfq xorl %ebx,%ebx testb $SEL_RPL_MASK,TF_CS(%rsp) jnz nmi_fromuserspace Index: sys/amd64/amd64/fpu.c =================================================================== --- sys/amd64/amd64/fpu.c +++ sys/amd64/amd64/fpu.c @@ -61,6 +61,7 @@ #include #include #include +#include /* * Floating point support. @@ -151,24 +152,58 @@ u_int size; } *xsave_area_desc; -void -fpusave(void *addr) +static void +fpusave_xsave(void *addr) { - if (use_xsave) - xsave((char *)addr, xsave_mask); - else - fxsave((char *)addr); + xsave((char *)addr, xsave_mask); } -void -fpurestore(void *addr) +static void +fpurestore_xrstor(void *addr) +{ + + xrstor((char *)addr, xsave_mask); +} + +static void +fpusave_fxsave(void *addr) +{ + + fxsave((char *)addr); +} + +static void +fpurestore_fxrstor(void *addr) +{ + + fxrstor((char *)addr); +} + +static void +init_xsave(void) { if (use_xsave) - xrstor((char *)addr, xsave_mask); - else - fxrstor((char *)addr); + return; + if ((cpu_feature2 & CPUID2_XSAVE) == 0) + return; + use_xsave = 1; + TUNABLE_INT_FETCH("hw.use_xsave", &use_xsave); +} + +DEFINE_IFUNC(, void, fpusave, (void *), static) +{ + + init_xsave(); + return (use_xsave ? fpusave_xsave : fpusave_fxsave); +} + +DEFINE_IFUNC(, void, fpurestore, (void *), static) +{ + + init_xsave(); + return (use_xsave ? fpurestore_xrstor : fpurestore_fxrstor); } void @@ -206,13 +241,8 @@ u_int cp[4]; uint64_t xsave_mask_user; - if ((cpu_feature2 & CPUID2_XSAVE) != 0) { - use_xsave = 1; - TUNABLE_INT_FETCH("hw.use_xsave", &use_xsave); - } if (!use_xsave) return; - cpuid_count(0xd, 0x0, cp); xsave_mask = XFEATURE_ENABLED_X87 | XFEATURE_ENABLED_SSE; if ((cp[0] & xsave_mask) != xsave_mask) Index: sys/amd64/amd64/initcpu.c =================================================================== --- sys/amd64/amd64/initcpu.c +++ sys/amd64/amd64/initcpu.c @@ -215,8 +215,12 @@ * to the kernel tables. The boot loader enables the U bit in * its tables. */ - if (!IS_BSP() && (cpu_stdext_feature & CPUID_STDEXT_SMEP)) - cr4 |= CR4_SMEP; + if (!IS_BSP()) { + if (cpu_stdext_feature & CPUID_STDEXT_SMEP) + cr4 |= CR4_SMEP; + if (cpu_stdext_feature & CPUID_STDEXT_SMAP) + cr4 |= CR4_SMAP; + } load_cr4(cr4); if ((amd_feature & AMDID_NX) != 0) { msr = rdmsr(MSR_EFER) | EFER_NXE; Index: sys/amd64/amd64/machdep.c =================================================================== --- sys/amd64/amd64/machdep.c +++ sys/amd64/amd64/machdep.c @@ -1512,6 +1512,22 @@ #endif } +/* Set up the fast syscall stuff */ +void +amd64_conf_fast_syscall(void) +{ + uint64_t msr; + + msr = rdmsr(MSR_EFER) | EFER_SCE; + wrmsr(MSR_EFER, msr); + wrmsr(MSR_LSTAR, (u_int64_t)IDTVEC(fast_syscall)); + wrmsr(MSR_CSTAR, (u_int64_t)IDTVEC(fast_syscall32)); + msr = ((u_int64_t)GSEL(GCODE_SEL, SEL_KPL) << 32) | + ((u_int64_t)GSEL(GUCODE32_SEL, SEL_UPL) << 48); + wrmsr(MSR_STAR, msr); + wrmsr(MSR_SF_MASK, PSL_NT | PSL_T | PSL_I | PSL_C | PSL_D | PSL_AC); +} + u_int64_t hammer_time(u_int64_t modulep, u_int64_t physfree) { @@ -1520,7 +1536,6 @@ struct pcpu *pc; struct nmi_pcpu *np; struct xstate_hdr *xhdr; - u_int64_t msr; char *env; size_t kstack0_sz; int late_console; @@ -1538,6 +1553,8 @@ identify_cpu1(); identify_hypervisor(); + /* link_elf_ireloc(kmdp); */ + /* Init basic tunables, hz etc */ init_param1(); @@ -1663,15 +1680,7 @@ gsel_tss = GSEL(GPROC0_SEL, SEL_KPL); ltr(gsel_tss); - /* Set up the fast syscall stuff */ - msr = rdmsr(MSR_EFER) | EFER_SCE; - wrmsr(MSR_EFER, msr); - wrmsr(MSR_LSTAR, (u_int64_t)IDTVEC(fast_syscall)); - wrmsr(MSR_CSTAR, (u_int64_t)IDTVEC(fast_syscall32)); - msr = ((u_int64_t)GSEL(GCODE_SEL, SEL_KPL) << 32) | - ((u_int64_t)GSEL(GUCODE32_SEL, SEL_UPL) << 48); - wrmsr(MSR_STAR, msr); - wrmsr(MSR_SF_MASK, PSL_NT|PSL_T|PSL_I|PSL_C|PSL_D); + amd64_conf_fast_syscall(); /* * Temporary forge some valid pointer to PCB, for exception @@ -1695,6 +1704,7 @@ cninit(); amd64_kdb_init(); } + link_elf_ireloc(kmdp); getmemsize(kmdp, physfree); init_param2(physmem); Index: sys/amd64/amd64/mp_machdep.c =================================================================== --- sys/amd64/amd64/mp_machdep.c +++ sys/amd64/amd64/mp_machdep.c @@ -190,7 +190,7 @@ { struct pcpu *pc; struct nmi_pcpu *np; - u_int64_t msr, cr0; + u_int64_t cr0; int cpu, gsel_tss, x; struct region_descriptor ap_gdt; @@ -265,15 +265,7 @@ cr0 &= ~(CR0_CD | CR0_NW | CR0_EM); load_cr0(cr0); - /* Set up the fast syscall stuff */ - msr = rdmsr(MSR_EFER) | EFER_SCE; - wrmsr(MSR_EFER, msr); - wrmsr(MSR_LSTAR, (u_int64_t)IDTVEC(fast_syscall)); - wrmsr(MSR_CSTAR, (u_int64_t)IDTVEC(fast_syscall32)); - msr = ((u_int64_t)GSEL(GCODE_SEL, SEL_KPL) << 32) | - ((u_int64_t)GSEL(GUCODE32_SEL, SEL_UPL) << 48); - wrmsr(MSR_STAR, msr); - wrmsr(MSR_SF_MASK, PSL_NT|PSL_T|PSL_I|PSL_C|PSL_D); + amd64_conf_fast_syscall(); /* signal our startup to the BSP. */ mp_naps++; Index: sys/amd64/amd64/pmap.c =================================================================== --- sys/amd64/amd64/pmap.c +++ sys/amd64/amd64/pmap.c @@ -141,6 +141,7 @@ #include #include +#include #include #include #include @@ -628,6 +629,10 @@ vm_page_t m, vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp); static void pmap_fill_ptp(pt_entry_t *firstpte, pt_entry_t newpte); static int pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte); +static void pmap_invalidate_cache_range_selfsnoop(vm_offset_t sva, + vm_offset_t eva); +static void pmap_invalidate_cache_range_all(vm_offset_t sva, + vm_offset_t eva); static void pmap_invalidate_pde_page(pmap_t pmap, vm_offset_t va, pd_entry_t pde); static void pmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, int mode); @@ -1000,6 +1005,7 @@ { vm_offset_t va; pt_entry_t *pte; + uint64_t cr4; int i; /* @@ -1022,11 +1028,21 @@ virtual_end = VM_MAX_KERNEL_ADDRESS; - /* XXX do %cr0 as well */ - load_cr4(rcr4() | CR4_PGE); + /* + * Enable PG_G global pages, then switch to the kernel page + * table from the bootstrap page table. After the switch, it + * is possible to enable SMEP and SMAP since PG_U bits are + * correct now. + */ + cr4 = rcr4(); + cr4 |= CR4_PGE; + load_cr4(cr4); load_cr3(KPML4phys); if (cpu_stdext_feature & CPUID_STDEXT_SMEP) - load_cr4(rcr4() | CR4_SMEP); + cr4 |= CR4_SMEP; + if (cpu_stdext_feature & CPUID_STDEXT_SMAP) + cr4 |= CR4_SMAP; + load_cr4(cr4); /* * Initialize the kernel pmap (which is statically allocated). @@ -1881,36 +1897,55 @@ pmap_invalidate_page(pmap, va); } +DEFINE_IFUNC(, void, pmap_invalidate_cache_range, + (vm_offset_t sva, vm_offset_t eva), static) +{ + + if ((cpu_feature & CPUID_SS) != 0) + return (pmap_invalidate_cache_range_selfsnoop); + if ((cpu_feature & CPUID_CLFSH) != 0) + return (pmap_force_invalidate_cache_range); + return (pmap_invalidate_cache_range_all); +} + #define PMAP_CLFLUSH_THRESHOLD (2 * 1024 * 1024) -void -pmap_invalidate_cache_range(vm_offset_t sva, vm_offset_t eva, boolean_t force) +static void +pmap_invalidate_cache_range_selfsnoop(vm_offset_t sva, vm_offset_t eva) { - if (force) { - sva &= ~(vm_offset_t)(cpu_clflush_line_size - 1); - } else { - KASSERT((sva & PAGE_MASK) == 0, - ("pmap_invalidate_cache_range: sva not page-aligned")); - KASSERT((eva & PAGE_MASK) == 0, - ("pmap_invalidate_cache_range: eva not page-aligned")); - } + KASSERT((sva & PAGE_MASK) == 0, + ("pmap_invalidate_cache_range: sva not page-aligned")); + KASSERT((eva & PAGE_MASK) == 0, + ("pmap_invalidate_cache_range: eva not page-aligned")); +} + +void +pmap_force_invalidate_cache_range(vm_offset_t sva, vm_offset_t eva) +{ - if ((cpu_feature & CPUID_SS) != 0 && !force) - ; /* If "Self Snoop" is supported and allowed, do nothing. */ - else if ((cpu_stdext_feature & CPUID_STDEXT_CLFLUSHOPT) != 0 && - eva - sva < PMAP_CLFLUSH_THRESHOLD) { + sva &= ~(vm_offset_t)(cpu_clflush_line_size - 1); + if (eva - sva >= PMAP_CLFLUSH_THRESHOLD) { /* - * XXX: Some CPUs fault, hang, or trash the local APIC - * registers if we use CLFLUSH on the local APIC - * range. The local APIC is always uncached, so we - * don't need to flush for that range anyway. + * The supplied range is bigger than 2MB. + * Globally invalidate cache. */ - if (pmap_kextract(sva) == lapic_paddr) - return; + pmap_invalidate_cache(); + return; + } + + /* + * XXX: Some CPUs fault, hang, or trash the local APIC + * registers if we use CLFLUSH on the local APIC + * range. The local APIC is always uncached, so we + * don't need to flush for that range anyway. + */ + if (pmap_kextract(sva) == lapic_paddr) + return; + if ((cpu_stdext_feature & CPUID_STDEXT_CLFLUSHOPT) != 0) { /* - * Otherwise, do per-cache line flush. Use the sfence + * Do per-cache line flush. Use the sfence * instruction to insure that previous stores are * included in the write-back. The processor * propagates flush to other processors in the cache @@ -1920,10 +1955,7 @@ for (; sva < eva; sva += cpu_clflush_line_size) clflushopt(sva); sfence(); - } else if ((cpu_feature & CPUID_CLFSH) != 0 && - eva - sva < PMAP_CLFLUSH_THRESHOLD) { - if (pmap_kextract(sva) == lapic_paddr) - return; + } else { /* * Writes are ordered by CLFLUSH on Intel CPUs. */ @@ -1933,17 +1965,17 @@ clflush(sva); if (cpu_vendor_id != CPU_VENDOR_INTEL) mfence(); - } else { - - /* - * No targeted cache flush methods are supported by CPU, - * or the supplied range is bigger than 2MB. - * Globally invalidate cache. - */ - pmap_invalidate_cache(); } } +static void +pmap_invalidate_cache_range_all(vm_offset_t sva, vm_offset_t eva) +{ + + pmap_invalidate_cache_range_selfsnoop(sva, eva); + pmap_invalidate_cache(); +} + /* * Remove the specified set of pages from the data and instruction caches. * @@ -6581,7 +6613,7 @@ for (tmpsize = 0; tmpsize < size; tmpsize += PAGE_SIZE) pmap_kenter_attr(va + tmpsize, pa + tmpsize, mode); pmap_invalidate_range(kernel_pmap, va, va + tmpsize); - pmap_invalidate_cache_range(va, va + tmpsize, FALSE); + pmap_invalidate_cache_range(va, va + tmpsize); return ((void *)(va + offset)); } @@ -6940,7 +6972,7 @@ */ if (changed) { pmap_invalidate_range(kernel_pmap, base, tmpva); - pmap_invalidate_cache_range(base, tmpva, FALSE); + pmap_invalidate_cache_range(base, tmpva); } return (error); } Index: sys/amd64/amd64/support.S =================================================================== --- sys/amd64/amd64/support.S +++ sys/amd64/amd64/support.S @@ -241,7 +241,7 @@ * copyout(from_kernel, to_user, len) * %rdi, %rsi, %rdx */ -ENTRY(copyout) +ENTRY(copyout_nosmap) PUSH_FRAME_POINTER movq PCPU(CURPCB),%rax movq $copyout_fault,PCB_ONFAULT(%rax) @@ -284,6 +284,55 @@ rep movsb + jmp done_copyout +END(copyout_nosmap) + +ENTRY(copyout_smap) + PUSH_FRAME_POINTER + movq PCPU(CURPCB),%rax + /* Trap entry clears PSL.AC */ + movq $copyout_fault,PCB_ONFAULT(%rax) + testq %rdx,%rdx /* anything to do? */ + jz done_copyout + + /* + * Check explicitly for non-user addresses. If 486 write protection + * is being used, this check is essential because we are in kernel + * mode so the h/w does not provide any protection against writing + * kernel addresses. + */ + + /* + * First, prevent address wrapping. + */ + movq %rsi,%rax + addq %rdx,%rax + jc copyout_fault +/* + * XXX STOP USING VM_MAXUSER_ADDRESS. + * It is an end address, not a max, so every time it is used correctly it + * looks like there is an off by one error, and of course it caused an off + * by one error in several places. + */ + movq $VM_MAXUSER_ADDRESS,%rcx + cmpq %rcx,%rax + ja copyout_fault + + xchgq %rdi,%rsi + /* bcopy(%rsi, %rdi, %rdx) */ + movq %rdx,%rcx + + shrq $3,%rcx + cld + stac + rep + movsq + movb %dl,%cl + andb $7,%cl + rep + movsb + clac + done_copyout: xorl %eax,%eax movq PCPU(CURPCB),%rdx @@ -304,7 +353,39 @@ * copyin(from_user, to_kernel, len) * %rdi, %rsi, %rdx */ -ENTRY(copyin) +ENTRY(copyin_nosmap) + PUSH_FRAME_POINTER + movq PCPU(CURPCB),%rax + movq $copyin_fault,PCB_ONFAULT(%rax) + testq %rdx,%rdx /* anything to do? */ + jz done_copyin + + /* + * make sure address is valid + */ + movq %rdi,%rax + addq %rdx,%rax + jc copyin_fault + movq $VM_MAXUSER_ADDRESS,%rcx + cmpq %rcx,%rax + ja copyin_fault + + xchgq %rdi,%rsi + movq %rdx,%rcx + movb %cl,%al + shrq $3,%rcx /* copy longword-wise */ + cld + rep + movsq + movb %al,%cl + andb $7,%cl /* copy remaining bytes */ + rep + movsb + + jmp done_copyin +END(copyin_nosmap) + +ENTRY(copyin_smap) PUSH_FRAME_POINTER movq PCPU(CURPCB),%rax movq $copyin_fault,PCB_ONFAULT(%rax) @@ -325,6 +406,7 @@ movq %rdx,%rcx movb %cl,%al shrq $3,%rcx /* copy longword-wise */ + stac cld rep movsq @@ -332,6 +414,7 @@ andb $7,%cl /* copy remaining bytes */ rep movsb + clac done_copyin: xorl %eax,%eax @@ -339,6 +422,7 @@ movq %rax,PCB_ONFAULT(%rdx) POP_FRAME_POINTER ret +END(copyin_smap) ALIGN_TEXT copyin_fault: @@ -347,14 +431,13 @@ movq $EFAULT,%rax POP_FRAME_POINTER ret -END(copyin) /* * casueword32. Compare and set user integer. Returns -1 on fault, * 0 if access was successful. Old value is written to *oldp. * dst = %rdi, old = %esi, oldp = %rdx, new = %ecx */ -ENTRY(casueword32) +ENTRY(casueword32_nosmap) PUSH_FRAME_POINTER movq PCPU(CURPCB),%r8 movq $fusufault,PCB_ONFAULT(%r8) @@ -386,14 +469,78 @@ movl %esi,(%rdx) /* oldp = %rdx */ POP_FRAME_POINTER ret -END(casueword32) +END(casueword32_nosmap) + +ENTRY(casueword32_smap) + PUSH_FRAME_POINTER + movq PCPU(CURPCB),%r8 + movq $fusufault,PCB_ONFAULT(%r8) + + movq $VM_MAXUSER_ADDRESS-4,%rax + cmpq %rax,%rdi /* verify address is valid */ + ja fusufault + + movl %esi,%eax /* old */ + stac +#ifdef SMP + lock +#endif + cmpxchgl %ecx,(%rdi) /* new = %ecx */ + clac + + /* + * The old value is in %eax. If the store succeeded it will be the + * value we expected (old) from before the store, otherwise it will + * be the current value. Save %eax into %esi to prepare the return + * value. + */ + movl %eax,%esi + xorl %eax,%eax + movq %rax,PCB_ONFAULT(%r8) + + /* + * Access the oldp after the pcb_onfault is cleared, to correctly + * catch corrupted pointer. + */ + movl %esi,(%rdx) /* oldp = %rdx */ + POP_FRAME_POINTER + ret +END(casueword32_smap) /* * casueword. Compare and set user long. Returns -1 on fault, * 0 if access was successful. Old value is written to *oldp. * dst = %rdi, old = %rsi, oldp = %rdx, new = %rcx */ -ENTRY(casueword) +ENTRY(casueword_nosmap) + PUSH_FRAME_POINTER + movq PCPU(CURPCB),%r8 + movq $fusufault,PCB_ONFAULT(%r8) + + movq $VM_MAXUSER_ADDRESS-4,%rax + cmpq %rax,%rdi /* verify address is valid */ + ja fusufault + + movq %rsi,%rax /* old */ +#ifdef SMP + lock +#endif + cmpxchgq %rcx,(%rdi) /* new = %rcx */ + + /* + * The old value is in %rax. If the store succeeded it will be the + * value we expected (old) from before the store, otherwise it will + * be the current value. + */ + movq %rax,%rsi + xorl %eax,%eax + movq %rax,PCB_ONFAULT(%r8) + movq %rsi,(%rdx) + POP_FRAME_POINTER + ret +END(casueword_nosmap) + +ENTRY(casueword_smap) PUSH_FRAME_POINTER movq PCPU(CURPCB),%r8 movq $fusufault,PCB_ONFAULT(%r8) @@ -403,10 +550,12 @@ ja fusufault movq %rsi,%rax /* old */ + stac #ifdef SMP lock #endif cmpxchgq %rcx,(%rdi) /* new = %rcx */ + clac /* * The old value is in %rax. If the store succeeded it will be the @@ -419,7 +568,7 @@ movq %rsi,(%rdx) POP_FRAME_POINTER ret -END(casueword) +END(casueword_smap) /* * Fetch (load) a 64-bit word, a 32-bit word, a 16-bit word, or an 8-bit @@ -427,8 +576,7 @@ * addr = %rdi, valp = %rsi */ -ALTENTRY(fueword64) -ENTRY(fueword) +ENTRY(fueword_nosmap) PUSH_FRAME_POINTER movq PCPU(CURPCB),%rcx movq $fusufault,PCB_ONFAULT(%rcx) @@ -443,10 +591,28 @@ movq %r11,(%rsi) POP_FRAME_POINTER ret -END(fueword64) -END(fueword) +END(fueword64_nosmap) -ENTRY(fueword32) +ENTRY(fueword_smap) + PUSH_FRAME_POINTER + movq PCPU(CURPCB),%rcx + movq $fusufault,PCB_ONFAULT(%rcx) + + movq $VM_MAXUSER_ADDRESS-8,%rax + cmpq %rax,%rdi /* verify address is valid */ + ja fusufault + + xorl %eax,%eax + stac + movq (%rdi),%r11 + clac + movq %rax,PCB_ONFAULT(%rcx) + movq %r11,(%rsi) + POP_FRAME_POINTER + ret +END(fueword64_smap) + +ENTRY(fueword32_nosmap) PUSH_FRAME_POINTER movq PCPU(CURPCB),%rcx movq $fusufault,PCB_ONFAULT(%rcx) @@ -461,7 +627,26 @@ movl %r11d,(%rsi) POP_FRAME_POINTER ret -END(fueword32) +END(fueword32_nosmap) + +ENTRY(fueword32_smap) + PUSH_FRAME_POINTER + movq PCPU(CURPCB),%rcx + movq $fusufault,PCB_ONFAULT(%rcx) + + movq $VM_MAXUSER_ADDRESS-4,%rax + cmpq %rax,%rdi /* verify address is valid */ + ja fusufault + + xorl %eax,%eax + stac + movl (%rdi),%r11d + clac + movq %rax,PCB_ONFAULT(%rcx) + movl %r11d,(%rsi) + POP_FRAME_POINTER + ret +END(fueword32_smap) /* * fuswintr() and suswintr() are specialized variants of fuword16() and @@ -477,7 +662,7 @@ END(suswintr) END(fuswintr) -ENTRY(fuword16) +ENTRY(fuword16_nosmap) PUSH_FRAME_POINTER movq PCPU(CURPCB),%rcx movq $fusufault,PCB_ONFAULT(%rcx) @@ -490,9 +675,41 @@ movq $0,PCB_ONFAULT(%rcx) POP_FRAME_POINTER ret -END(fuword16) +END(fuword16_nosmap) + +ENTRY(fuword16_smap) + PUSH_FRAME_POINTER + movq PCPU(CURPCB),%rcx + movq $fusufault,PCB_ONFAULT(%rcx) + + movq $VM_MAXUSER_ADDRESS-2,%rax + cmpq %rax,%rdi + ja fusufault + + stac + movzwl (%rdi),%eax + clac + movq $0,PCB_ONFAULT(%rcx) + POP_FRAME_POINTER + ret +END(fuword16_smap) + +ENTRY(fubyte_nosmap) + PUSH_FRAME_POINTER + movq PCPU(CURPCB),%rcx + movq $fusufault,PCB_ONFAULT(%rcx) + + movq $VM_MAXUSER_ADDRESS-1,%rax + cmpq %rax,%rdi + ja fusufault + + movzbl (%rdi),%eax + movq $0,PCB_ONFAULT(%rcx) + POP_FRAME_POINTER + ret +END(fubyte_nosmap) -ENTRY(fubyte) +ENTRY(fubyte_smap) PUSH_FRAME_POINTER movq PCPU(CURPCB),%rcx movq $fusufault,PCB_ONFAULT(%rcx) @@ -501,13 +718,16 @@ cmpq %rax,%rdi ja fusufault + stac movzbl (%rdi),%eax + clac movq $0,PCB_ONFAULT(%rcx) POP_FRAME_POINTER ret -END(fubyte) +END(fubyte_smap) ALIGN_TEXT + /* Fault entry clears PSL.AC */ fusufault: movq PCPU(CURPCB),%rcx xorl %eax,%eax @@ -521,8 +741,7 @@ * user memory. * addr = %rdi, value = %rsi */ -ALTENTRY(suword64) -ENTRY(suword) +ENTRY(suword_nosmap) PUSH_FRAME_POINTER movq PCPU(CURPCB),%rcx movq $fusufault,PCB_ONFAULT(%rcx) @@ -537,10 +756,28 @@ movq %rax,PCB_ONFAULT(%rcx) POP_FRAME_POINTER ret -END(suword64) -END(suword) +END(suword_nosmap) -ENTRY(suword32) +ENTRY(suword_smap) + PUSH_FRAME_POINTER + movq PCPU(CURPCB),%rcx + movq $fusufault,PCB_ONFAULT(%rcx) + + movq $VM_MAXUSER_ADDRESS-8,%rax + cmpq %rax,%rdi /* verify address validity */ + ja fusufault + + stac + movq %rsi,(%rdi) + clac + xorl %eax,%eax + movq PCPU(CURPCB),%rcx + movq %rax,PCB_ONFAULT(%rcx) + POP_FRAME_POINTER + ret +END(suword_smap) + +ENTRY(suword32_nosmap) PUSH_FRAME_POINTER movq PCPU(CURPCB),%rcx movq $fusufault,PCB_ONFAULT(%rcx) @@ -555,9 +792,28 @@ movq %rax,PCB_ONFAULT(%rcx) POP_FRAME_POINTER ret -END(suword32) +END(suword32_nosmap) -ENTRY(suword16) +ENTRY(suword32_smap) + PUSH_FRAME_POINTER + movq PCPU(CURPCB),%rcx + movq $fusufault,PCB_ONFAULT(%rcx) + + movq $VM_MAXUSER_ADDRESS-4,%rax + cmpq %rax,%rdi /* verify address validity */ + ja fusufault + + stac + movl %esi,(%rdi) + clac + xorl %eax,%eax + movq PCPU(CURPCB),%rcx + movq %rax,PCB_ONFAULT(%rcx) + POP_FRAME_POINTER + ret +END(suword32_smap) + +ENTRY(suword16_nosmap) PUSH_FRAME_POINTER movq PCPU(CURPCB),%rcx movq $fusufault,PCB_ONFAULT(%rcx) @@ -572,9 +828,28 @@ movq %rax,PCB_ONFAULT(%rcx) POP_FRAME_POINTER ret -END(suword16) +END(suword16_nosmap) -ENTRY(subyte) +ENTRY(suword16_smap) + PUSH_FRAME_POINTER + movq PCPU(CURPCB),%rcx + movq $fusufault,PCB_ONFAULT(%rcx) + + movq $VM_MAXUSER_ADDRESS-2,%rax + cmpq %rax,%rdi /* verify address validity */ + ja fusufault + + stac + movw %si,(%rdi) + clac + xorl %eax,%eax + movq PCPU(CURPCB),%rcx /* restore trashed register */ + movq %rax,PCB_ONFAULT(%rcx) + POP_FRAME_POINTER + ret +END(suword16_smap) + +ENTRY(subyte_nosmap) PUSH_FRAME_POINTER movq PCPU(CURPCB),%rcx movq $fusufault,PCB_ONFAULT(%rcx) @@ -590,7 +865,27 @@ movq %rax,PCB_ONFAULT(%rcx) POP_FRAME_POINTER ret -END(subyte) +END(subyte_nosmap) + +ENTRY(subyte_smap) + PUSH_FRAME_POINTER + movq PCPU(CURPCB),%rcx + movq $fusufault,PCB_ONFAULT(%rcx) + + movq $VM_MAXUSER_ADDRESS-1,%rax + cmpq %rax,%rdi /* verify address validity */ + ja fusufault + + movl %esi,%eax + stac + movb %al,(%rdi) + clac + xorl %eax,%eax + movq PCPU(CURPCB),%rcx /* restore trashed register */ + movq %rax,PCB_ONFAULT(%rcx) + POP_FRAME_POINTER + ret +END(subyte_smap) /* * copyinstr(from, to, maxlen, int *lencopied) @@ -601,7 +896,42 @@ * EFAULT on protection violations. If lencopied is non-zero, * return the actual length in *lencopied. */ -ENTRY(copyinstr) +ENTRY(copyinstr_nosmap) + PUSH_FRAME_POINTER + movq %rdx,%r8 /* %r8 = maxlen */ + movq %rcx,%r9 /* %r9 = *len */ + xchgq %rdi,%rsi /* %rdi = from, %rsi = to */ + movq PCPU(CURPCB),%rcx + movq $cpystrflt,PCB_ONFAULT(%rcx) + + movq $VM_MAXUSER_ADDRESS,%rax + + /* make sure 'from' is within bounds */ + subq %rsi,%rax + jbe cpystrflt + + /* restrict maxlen to <= VM_MAXUSER_ADDRESS-from */ + cmpq %rdx,%rax + jae 1f + movq %rax,%rdx + movq %rax,%r8 +1: + incq %rdx + cld + +2: + decq %rdx + jz copyinstr_toolong + + lodsb + stosb + orb %al,%al + jnz 2b + + jmp copyinstr_succ +END(copyinstr_nosmap) + +ENTRY(copyinstr_smap) PUSH_FRAME_POINTER movq %rdx,%r8 /* %r8 = maxlen */ movq %rcx,%r9 /* %r9 = *len */ @@ -626,26 +956,29 @@ 2: decq %rdx - jz 3f + jz copyinstr_succ + stac lodsb stosb + clac orb %al,%al jnz 2b +copyinstr_succ: /* Success -- 0 byte reached */ decq %rdx xorl %eax,%eax jmp cpystrflt_x -3: +copyinstr_toolong: /* rdx is zero - return ENAMETOOLONG or EFAULT */ movq $VM_MAXUSER_ADDRESS,%rax cmpq %rax,%rsi jae cpystrflt -4: movq $ENAMETOOLONG,%rax jmp cpystrflt_x + /* Fault entry clears PSL.AC */ cpystrflt: movq $EFAULT,%rax @@ -661,7 +994,7 @@ 1: POP_FRAME_POINTER ret -END(copyinstr) +END(copyinstr_smap) /* * copystr(from, to, maxlen, int *lencopied) Index: sys/amd64/amd64/trap.c =================================================================== --- sys/amd64/amd64/trap.c +++ sys/amd64/amd64/trap.c @@ -594,6 +594,21 @@ trap(frame); } +static bool +trap_is_smap(struct trapframe *frame) +{ + + /* + * A page fault is classified as SMAP-induced if: + * - SMAP is supported; + * - kernel mode accessed present page; + * - rflags.AC was cleared. + */ + return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 && + (frame->tf_err & (PGEX_P | PGEX_U | PGEX_RSV)) == PGEX_P && + (frame->tf_rflags & PSL_AC) == 0); +} + static int trap_pfault(struct trapframe *frame, int usermode) { @@ -671,9 +686,13 @@ * handling routine. Since accessing the address * without the handler is a bug, do not try to handle * it normally, and panic immediately. + * + * If SMAP is enabled, filter SMAP faults also, + * because illegal access might occur to the mapped + * user address, causing infinite loop. */ if (!usermode && (td->td_intr_nesting_level != 0 || - curpcb->pcb_onfault == NULL)) { + trap_is_smap(frame) || curpcb->pcb_onfault == NULL)) { trap_fatal(frame, eva); return (-1); } Index: sys/amd64/amd64/vm_machdep.c =================================================================== --- sys/amd64/amd64/vm_machdep.c +++ sys/amd64/amd64/vm_machdep.c @@ -74,6 +74,7 @@ #include #include #include +#include #include #include @@ -723,3 +724,141 @@ return 1; } + +int fubyte_nosmap(volatile const void *base); +int fubyte_smap(volatile const void *base); +DEFINE_IFUNC(, int, fubyte, (volatile const void *), static) +{ + + return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ? + fubyte_smap : fubyte_nosmap); +} + +int fuword16_nosmap(volatile const void *base); +int fuword16_smap(volatile const void *base); +DEFINE_IFUNC(, int, fuword16, (volatile const void *), static) +{ + + return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ? + fuword16_smap : fuword16_nosmap); +} + +int fueword_nosmap(volatile const void *base, long *val); +int fueword_smap(volatile const void *base, long *val); +DEFINE_IFUNC(, int, fueword, (volatile const void *, long *), static) +{ + + return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ? + fueword_smap : fueword_nosmap); +} +DEFINE_IFUNC(, int, fueword64, (volatile const void *, int64_t *), static) +{ + + return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ? + fueword_smap : fueword_nosmap); +} + +int fueword32_nosmap(volatile const void *base, int32_t *val); +int fueword32_smap(volatile const void *base, int32_t *val); +DEFINE_IFUNC(, int, fueword32, (volatile const void *, int32_t *), static) +{ + + return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ? + fueword32_smap : fueword32_nosmap); +} + +int subyte_nosmap(volatile void *base, int byte); +int subyte_smap(volatile void *base, int byte); +DEFINE_IFUNC(, int, subyte, (volatile void *, int), static) +{ + + return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ? + subyte_smap : subyte_nosmap); +} + +int suword16_nosmap(volatile void *base, int word); +int suword16_smap(volatile void *base, int word); +DEFINE_IFUNC(, int, suword16, (volatile void *, int), static) +{ + + return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ? + suword16_smap : suword16_nosmap); +} + +int suword32_nosmap(volatile void *base, int32_t word); +int suword32_smap(volatile void *base, int32_t word); +DEFINE_IFUNC(, int, suword32, (volatile void *, int32_t), static) +{ + + return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ? + suword32_smap : suword32_nosmap); +} + +int suword_nosmap(volatile void *base, long word); +int suword_smap(volatile void *base, long word); +DEFINE_IFUNC(, int, suword, (volatile void *, long), static) +{ + + return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ? + suword_smap : suword_nosmap); +} +DEFINE_IFUNC(, int, suword64, (volatile void *, int64_t), static) +{ + + return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ? + suword_smap : suword_nosmap); +} + +int casueword32_nosmap(volatile uint32_t *base, uint32_t oldval, + uint32_t *oldvalp, uint32_t newval); +int casueword32_smap(volatile uint32_t *base, uint32_t oldval, + uint32_t *oldvalp, uint32_t newval); +DEFINE_IFUNC(, int, casueword32, (volatile uint32_t *, uint32_t, uint32_t *, + uint32_t), static) +{ + + return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ? + casueword32_smap : casueword32_nosmap); +} + +int casueword_nosmap(volatile u_long *p, u_long oldval, u_long *oldvalp, + u_long newval); +int casueword_smap(volatile u_long *p, u_long oldval, u_long *oldvalp, + u_long newval); +DEFINE_IFUNC(, int, casueword, (volatile u_long *, u_long, u_long *, u_long), + static) +{ + + return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ? + casueword_smap : casueword_nosmap); +} + +int copyinstr_nosmap(const void *udaddr, void *kaddr, size_t len, + size_t *lencopied); +int copyinstr_smap(const void *udaddr, void *kaddr, size_t len, + size_t *lencopied); +DEFINE_IFUNC(, int, copyinstr, (const void *, void *, size_t, size_t *), + static) +{ + + return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ? + copyinstr_smap : copyinstr_nosmap); +} + +int copyin_nosmap(const void *udaddr, void *kaddr, size_t len); +int copyin_smap(const void *udaddr, void *kaddr, size_t len); +DEFINE_IFUNC(, int, copyin, (const void *, void *, size_t), static) +{ + + return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ? + copyin_smap : copyin_nosmap); +} + +int copyout_nosmap(const void *kaddr, void *udaddr, size_t len); +int copyout_smap(const void *kaddr, void *udaddr, size_t len); +DEFINE_IFUNC(, int, copyout, (const void *, void *, size_t), static) +{ + + return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ? + copyout_smap : copyout_nosmap); +} Index: sys/amd64/ia32/ia32_exception.S =================================================================== --- sys/amd64/ia32/ia32_exception.S +++ sys/amd64/ia32/ia32_exception.S @@ -67,7 +67,9 @@ movq %r14,TF_R14(%rsp) movq %r15,TF_R15(%rsp) movl $TF_HASSEGS,TF_FLAGS(%rsp) - cld + pushfq + andl $~(PSL_D | PSL_AC),(%rsp) + popfq FAKE_MCOUNT(TF_RIP(%rsp)) movq %rsp, %rdi call ia32_syscall Index: sys/amd64/include/asmacros.h =================================================================== --- sys/amd64/include/asmacros.h +++ sys/amd64/include/asmacros.h @@ -179,7 +179,7 @@ movw %es,TF_ES(%rsp) ; \ movw %ds,TF_DS(%rsp) ; \ movl $TF_HASSEGS,TF_FLAGS(%rsp) ; \ - cld ; \ + pushfq; andl $~(PSL_D | PSL_AC),(%rsp); popfq; \ testb $SEL_RPL_MASK,TF_CS(%rsp) ; /* come from kernel ? */ \ jz 2f ; /* yes, leave PCB_FULL_IRET alone */ \ movq PCPU(CURPCB),%r8 ; \ Index: sys/amd64/include/cpufunc.h =================================================================== --- sys/amd64/include/cpufunc.h +++ sys/amd64/include/cpufunc.h @@ -836,6 +836,20 @@ write_rflags(rflags); } +static __inline void +slac(void) +{ + + __asm __volatile("slac" : : : "cc"); +} + +static __inline void +clac(void) +{ + + __asm __volatile("clac" : : : "cc"); +} + enum { SGX_ECREATE = 0x0, SGX_EADD = 0x1, Index: sys/amd64/include/md_var.h =================================================================== --- sys/amd64/include/md_var.h +++ sys/amd64/include/md_var.h @@ -42,6 +42,7 @@ struct savefpu; struct sysentvec; +void amd64_conf_fast_syscall(void); void amd64_db_resume_dbreg(void); void amd64_lower_shared_page(struct sysentvec *); void amd64_syscall(struct thread *td, int traced); Index: sys/amd64/include/pmap.h =================================================================== --- sys/amd64/include/pmap.h +++ sys/amd64/include/pmap.h @@ -424,8 +424,8 @@ void pmap_invalidate_all(pmap_t); void pmap_invalidate_cache(void); void pmap_invalidate_cache_pages(vm_page_t *pages, int count); -void pmap_invalidate_cache_range(vm_offset_t sva, vm_offset_t eva, - boolean_t force); +void pmap_invalidate_cache_range(vm_offset_t sva, vm_offset_t eva); +void pmap_force_invalidate_cache_range(vm_offset_t sva, vm_offset_t eva); void pmap_get_mapping(pmap_t pmap, vm_offset_t va, uint64_t *ptr, int *num); boolean_t pmap_map_io_transient(vm_page_t *, vm_offset_t *, int, boolean_t); void pmap_unmap_io_transient(vm_page_t *, vm_offset_t *, int, boolean_t); Index: sys/dev/drm2/drm_os_freebsd.c =================================================================== --- sys/dev/drm2/drm_os_freebsd.c +++ sys/dev/drm2/drm_os_freebsd.c @@ -394,8 +394,8 @@ { #if defined(__i386__) || defined(__amd64__) - pmap_invalidate_cache_range((vm_offset_t)addr, - (vm_offset_t)addr + length, TRUE); + pmap_force_invalidate_cache_range((vm_offset_t)addr, + (vm_offset_t)addr + length); #else DRM_ERROR("drm_clflush_virt_range not implemented on this architecture"); #endif Index: sys/dev/drm2/i915/intel_ringbuffer.c =================================================================== --- sys/dev/drm2/i915/intel_ringbuffer.c +++ sys/dev/drm2/i915/intel_ringbuffer.c @@ -471,8 +471,8 @@ if (pc->cpu_page == NULL) goto err_unpin; pmap_qenter((uintptr_t)pc->cpu_page, &obj->pages[0], 1); - pmap_invalidate_cache_range((vm_offset_t)pc->cpu_page, - (vm_offset_t)pc->cpu_page + PAGE_SIZE, FALSE); + pmap_force_invalidate_cache_range((vm_offset_t)pc->cpu_page, + (vm_offset_t)pc->cpu_page + PAGE_SIZE); pc->obj = obj; ring->private = pc; @@ -1102,8 +1102,9 @@ } pmap_qenter((vm_offset_t)ring->status_page.page_addr, &obj->pages[0], 1); - pmap_invalidate_cache_range((vm_offset_t)ring->status_page.page_addr, - (vm_offset_t)ring->status_page.page_addr + PAGE_SIZE, FALSE); + pmap_force_invalidate_cache_range( + (vm_offset_t)ring->status_page.page_addr, + (vm_offset_t)ring->status_page.page_addr + PAGE_SIZE); ring->status_page.obj = obj; memset(ring->status_page.page_addr, 0, PAGE_SIZE); Index: sys/dev/hyperv/vmbus/amd64/vmbus_vector.S =================================================================== --- sys/dev/hyperv/vmbus/amd64/vmbus_vector.S +++ sys/dev/hyperv/vmbus/amd64/vmbus_vector.S @@ -27,6 +27,7 @@ */ #include +#include #include #include "assym.s" Index: sys/i386/i386/elf_machdep.c =================================================================== --- sys/i386/i386/elf_machdep.c +++ sys/i386/i386/elf_machdep.c @@ -160,10 +160,13 @@ *off = len; } +#define ERI_LOCAL 0x0001 +#define ERI_ONLYIFUNC 0x0002 + /* Process one elf relocation with addend. */ static int elf_reloc_internal(linker_file_t lf, Elf_Addr relocbase, const void *data, - int type, int local, elf_lookup_fn lookup) + int type, elf_lookup_fn lookup, int flags) { Elf_Addr *where; Elf_Addr addr; @@ -192,7 +195,10 @@ panic("unknown reloc type %d\n", type); } - if (local) { + if (((flags & ERI_ONLYIFUNC) == 0) ^ (rtype != R_386_IRELATIVE)) + return (0); + + if ((flags & ERI_LOCAL) != 0) { if (rtype == R_386_RELATIVE) { /* A + B */ addr = elf_relocaddr(lf, relocbase + addend); if (*where != addr) @@ -244,6 +250,12 @@ case R_386_RELATIVE: break; + case R_386_IRELATIVE: + addr = relocbase + addend; + addr = ((Elf_Addr (*)(void))addr)(); + if (*where != addr) + *where = addr; + break; default: printf("kldload: unexpected relocation type %d\n", rtype); @@ -252,12 +264,21 @@ return(0); } +int +elf_reloc_ifunc(linker_file_t lf, Elf_Addr relocbase, const void *data, + int type, elf_lookup_fn lookup) +{ + + return (elf_reloc_internal(lf, relocbase, data, type, lookup, + ERI_ONLYIFUNC)); +} + int elf_reloc(linker_file_t lf, Elf_Addr relocbase, const void *data, int type, elf_lookup_fn lookup) { - return (elf_reloc_internal(lf, relocbase, data, type, 0, lookup)); + return (elf_reloc_internal(lf, relocbase, data, type, lookup, 0)); } int @@ -265,7 +286,8 @@ int type, elf_lookup_fn lookup) { - return (elf_reloc_internal(lf, relocbase, data, type, 1, lookup)); + return (elf_reloc_internal(lf, relocbase, data, type, lookup, + ERI_LOCAL)); } int Index: sys/i386/i386/machdep.c =================================================================== --- sys/i386/i386/machdep.c +++ sys/i386/i386/machdep.c @@ -2137,6 +2137,7 @@ int gsel_tss, metadata_missing, x, pa; struct pcpu *pc; struct xstate_hdr *xhdr; + caddr_t kmdp; int late_console; thread0.td_kstack = proc0kstack; @@ -2353,6 +2354,9 @@ i386_kdb_init(); } + kmdp = preload_search_by_type("elf kernel"); + link_elf_ireloc(kmdp); + vm86_initialize(); getmemsize(first); init_param2(physmem); Index: sys/i386/i386/npx.c =================================================================== --- sys/i386/i386/npx.c +++ sys/i386/i386/npx.c @@ -67,6 +67,7 @@ #include #include #include +#include #include @@ -183,7 +184,6 @@ static void fpu_clean_state(void); -static void fpusave(union savefpu *); static void fpurstor(union savefpu *); int hw_float; @@ -201,8 +201,6 @@ u_int size; } *xsave_area_desc; -static int use_xsaveopt; - static volatile u_int npx_traps_while_probing; alias_for_inthand_t probetrap; @@ -309,6 +307,69 @@ return (hw_float); } +static void +npxsave_xsaveopt(union savefpu *addr) +{ + + xsaveopt((char *)addr, xsave_mask); +} + +static void +fpusave_xsave(union savefpu *addr) +{ + + xsave((char *)addr, xsave_mask); +} + +static void +fpusave_fxsave(union savefpu *addr) +{ + + fxsave((char *)addr); +} + +static void +fpusave_fnsave(union savefpu *addr) +{ + + fnsave((char *)addr); +} + +static void +init_xsave(void) +{ + + if (use_xsave) + return; + if (!cpu_fxsr || (cpu_feature2 & CPUID2_XSAVE) == 0) + return; + use_xsave = 1; + TUNABLE_INT_FETCH("hw.use_xsave", &use_xsave); +} + +DEFINE_IFUNC(, void, npxsave_core, (union savefpu *), static) +{ + + init_xsave(); + if (use_xsave) + return ((cpu_stdext_feature & CPUID_EXTSTATE_XSAVEOPT) != 0 ? + npxsave_xsaveopt : fpusave_xsave); + if (cpu_fxsr) + return (fpusave_fxsave); + return (fpusave_fnsave); +} + +DEFINE_IFUNC(, void, fpusave, (union savefpu *), static) +{ + + init_xsave(); + if (use_xsave) + return (fpusave_xsave); + if (cpu_fxsr) + return (fpusave_fxsave); + return (fpusave_fnsave); +} + /* * Enable XSAVE if supported and allowed by user. * Calculate the xsave_mask. @@ -319,13 +380,8 @@ u_int cp[4]; uint64_t xsave_mask_user; - if (cpu_fxsr && (cpu_feature2 & CPUID2_XSAVE) != 0) { - use_xsave = 1; - TUNABLE_INT_FETCH("hw.use_xsave", &use_xsave); - } if (!use_xsave) return; - cpuid_count(0xd, 0x0, cp); xsave_mask = XFEATURE_ENABLED_X87 | XFEATURE_ENABLED_SSE; if ((cp[0] & xsave_mask) != xsave_mask) @@ -339,14 +395,9 @@ xsave_mask &= ~XFEATURE_AVX512; if ((xsave_mask & XFEATURE_MPX) != XFEATURE_MPX) xsave_mask &= ~XFEATURE_MPX; - - cpuid_count(0xd, 0x1, cp); - if ((cp[0] & CPUID_EXTSTATE_XSAVEOPT) != 0) - use_xsaveopt = 1; } /* - * Calculate the fpu save area size. */ static void @@ -852,15 +903,11 @@ * npxsave() atomically with checking fpcurthread. */ void -npxsave(addr) - union savefpu *addr; +npxsave(union savefpu *addr) { stop_emulating(); - if (use_xsaveopt) - xsaveopt((char *)addr, xsave_mask); - else - fpusave(addr); + npxsave_core(addr); start_emulating(); PCPU_SET(fpcurthread, NULL); } @@ -1072,19 +1119,6 @@ return (0); } -static void -fpusave(addr) - union savefpu *addr; -{ - - if (use_xsave) - xsave((char *)addr, xsave_mask); - else if (cpu_fxsr) - fxsave(addr); - else - fnsave(addr); -} - static void npx_fill_fpregs_xmm1(struct savexmm *sv_xmm, struct save87 *sv_87) { Index: sys/i386/i386/pmap.c =================================================================== --- sys/i386/i386/pmap.c +++ sys/i386/i386/pmap.c @@ -141,6 +141,7 @@ #include #include #endif +#include #include #include #include @@ -300,6 +301,10 @@ vm_page_t m, vm_prot_t prot, vm_page_t mpte); static void pmap_flush_page(vm_page_t m); static int pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte); +static void pmap_invalidate_cache_range_selfsnoop(vm_offset_t sva, + vm_offset_t eva); +static void pmap_invalidate_cache_range_all(vm_offset_t sva, + vm_offset_t eva); static void pmap_invalidate_pde_page(pmap_t pmap, vm_offset_t va, pd_entry_t pde); static void pmap_fill_ptp(pt_entry_t *firstpte, pt_entry_t newpte); @@ -1281,37 +1286,55 @@ pmap_invalidate_page(pmap, va); } +DEFINE_IFUNC(, void, pmap_invalidate_cache_range, (vm_offset_t, vm_offset_t), + static) +{ + + if ((cpu_feature & CPUID_SS) != 0) + return (pmap_invalidate_cache_range_selfsnoop); + if ((cpu_feature & CPUID_CLFSH) != 0) + return (pmap_force_invalidate_cache_range); + return (pmap_invalidate_cache_range_all); +} + #define PMAP_CLFLUSH_THRESHOLD (2 * 1024 * 1024) -void -pmap_invalidate_cache_range(vm_offset_t sva, vm_offset_t eva, boolean_t force) +static void +pmap_invalidate_cache_range_selfsnoop(vm_offset_t sva, vm_offset_t eva) { - if (force) { - sva &= ~(vm_offset_t)(cpu_clflush_line_size - 1); - } else { - KASSERT((sva & PAGE_MASK) == 0, - ("pmap_invalidate_cache_range: sva not page-aligned")); - KASSERT((eva & PAGE_MASK) == 0, - ("pmap_invalidate_cache_range: eva not page-aligned")); - } + KASSERT((sva & PAGE_MASK) == 0, + ("pmap_invalidate_cache_range: sva not page-aligned")); + KASSERT((eva & PAGE_MASK) == 0, + ("pmap_invalidate_cache_range: eva not page-aligned")); +} - if ((cpu_feature & CPUID_SS) != 0 && !force) - ; /* If "Self Snoop" is supported and allowed, do nothing. */ - else if ((cpu_stdext_feature & CPUID_STDEXT_CLFLUSHOPT) != 0 && - eva - sva < PMAP_CLFLUSH_THRESHOLD) { -#ifdef DEV_APIC +void +pmap_force_invalidate_cache_range(vm_offset_t sva, vm_offset_t eva) +{ + + sva &= ~(vm_offset_t)(cpu_clflush_line_size - 1); + if (eva - sva >= PMAP_CLFLUSH_THRESHOLD) { /* - * XXX: Some CPUs fault, hang, or trash the local APIC - * registers if we use CLFLUSH on the local APIC - * range. The local APIC is always uncached, so we - * don't need to flush for that range anyway. + * The supplied range is bigger than 2MB. + * Globally invalidate cache. */ - if (pmap_kextract(sva) == lapic_paddr) - return; -#endif + pmap_invalidate_cache(); + return; + } + + /* + * XXX: Some CPUs fault, hang, or trash the local APIC + * registers if we use CLFLUSH on the local APIC + * range. The local APIC is always uncached, so we + * don't need to flush for that range anyway. + */ + if (pmap_kextract(sva) == lapic_paddr) + return; + + if ((cpu_stdext_feature & CPUID_STDEXT_CLFLUSHOPT) != 0) { /* - * Otherwise, do per-cache line flush. Use the sfence + * Do per-cache line flush. Use the sfence * instruction to insure that previous stores are * included in the write-back. The processor * propagates flush to other processors in the cache @@ -1321,12 +1344,7 @@ for (; sva < eva; sva += cpu_clflush_line_size) clflushopt(sva); sfence(); - } else if ((cpu_feature & CPUID_CLFSH) != 0 && - eva - sva < PMAP_CLFLUSH_THRESHOLD) { -#ifdef DEV_APIC - if (pmap_kextract(sva) == lapic_paddr) - return; -#endif + } else { /* * Writes are ordered by CLFLUSH on Intel CPUs. */ @@ -1336,17 +1354,17 @@ clflush(sva); if (cpu_vendor_id != CPU_VENDOR_INTEL) mfence(); - } else { - - /* - * No targeted cache flush methods are supported by CPU, - * or the supplied range is bigger than 2MB. - * Globally invalidate cache. - */ - pmap_invalidate_cache(); } } +static void +pmap_invalidate_cache_range_all(vm_offset_t sva, vm_offset_t eva) +{ + + pmap_invalidate_cache_range_selfsnoop(sva, eva); + pmap_invalidate_cache(); +} + void pmap_invalidate_cache_pages(vm_page_t *pages, int count) { @@ -5231,7 +5249,7 @@ for (tmpsize = 0; tmpsize < size; tmpsize += PAGE_SIZE) pmap_kenter_attr(va + tmpsize, pa + tmpsize, mode); pmap_invalidate_range(kernel_pmap, va, va + tmpsize); - pmap_invalidate_cache_range(va, va + size, FALSE); + pmap_invalidate_cache_range(va, va + size); return ((void *)(va + offset)); } @@ -5469,7 +5487,7 @@ */ if (changed) { pmap_invalidate_range(kernel_pmap, base, tmpva); - pmap_invalidate_cache_range(base, tmpva, FALSE); + pmap_invalidate_cache_range(base, tmpva); } return (0); } Index: sys/i386/i386/vm_machdep.c =================================================================== --- sys/i386/i386/vm_machdep.c +++ sys/i386/i386/vm_machdep.c @@ -796,7 +796,7 @@ * settings are recalculated. */ pmap_qenter(sf->kva, &m, 1); - pmap_invalidate_cache_range(sf->kva, sf->kva + PAGE_SIZE, FALSE); + pmap_invalidate_cache_range(sf->kva, sf->kva + PAGE_SIZE); } /* Index: sys/i386/include/pmap.h =================================================================== --- sys/i386/include/pmap.h +++ sys/i386/include/pmap.h @@ -394,8 +394,8 @@ void pmap_invalidate_all(pmap_t); void pmap_invalidate_cache(void); void pmap_invalidate_cache_pages(vm_page_t *pages, int count); -void pmap_invalidate_cache_range(vm_offset_t sva, vm_offset_t eva, - boolean_t force); +void pmap_invalidate_cache_range(vm_offset_t sva, vm_offset_t eva); +void pmap_force_invalidate_cache_range(vm_offset_t sva, vm_offset_t eva); void invltlb_glob(void); Index: sys/kern/link_elf.c =================================================================== --- sys/kern/link_elf.c +++ sys/kern/link_elf.c @@ -190,6 +190,9 @@ static int parse_dynamic(elf_file_t); static int relocate_file(elf_file_t); +static int relocate_file1(elf_file_t ef, int (*elf_reloc_func)( + linker_file_t lf, Elf_Addr relocbase, const void *data, + int type, elf_lookup_fn lookup)); static int link_elf_preload_parse_symbols(elf_file_t); static struct elf_set_head set_pcpu_list; @@ -1177,7 +1180,8 @@ } static int -relocate_file(elf_file_t ef) +relocate_file1(elf_file_t ef, int (*elf_reloc_func)(linker_file_t lf, + Elf_Addr relocbase, const void *data, int type, elf_lookup_fn lookup)) { const Elf_Rel *rellim; const Elf_Rel *rel; @@ -1191,7 +1195,7 @@ rellim = (const Elf_Rel *) ((const char *)ef->rel + ef->relsize); while (rel < rellim) { - if (elf_reloc(&ef->lf, (Elf_Addr)ef->address, rel, + if (elf_reloc_func(&ef->lf, (Elf_Addr)ef->address, rel, ELF_RELOC_REL, elf_lookup)) { symname = symbol_name(ef, rel->r_info); printf("link_elf: symbol %s undefined\n", symname); @@ -1207,7 +1211,7 @@ relalim = (const Elf_Rela *) ((const char *)ef->rela + ef->relasize); while (rela < relalim) { - if (elf_reloc(&ef->lf, (Elf_Addr)ef->address, rela, + if (elf_reloc_func(&ef->lf, (Elf_Addr)ef->address, rela, ELF_RELOC_RELA, elf_lookup)) { symname = symbol_name(ef, rela->r_info); printf("link_elf: symbol %s undefined\n", @@ -1224,7 +1228,7 @@ rellim = (const Elf_Rel *) ((const char *)ef->pltrel + ef->pltrelsize); while (rel < rellim) { - if (elf_reloc(&ef->lf, (Elf_Addr)ef->address, rel, + if (elf_reloc_func(&ef->lf, (Elf_Addr)ef->address, rel, ELF_RELOC_REL, elf_lookup)) { symname = symbol_name(ef, rel->r_info); printf("link_elf: symbol %s undefined\n", @@ -1241,7 +1245,7 @@ relalim = (const Elf_Rela *) ((const char *)ef->pltrela + ef->pltrelasize); while (rela < relalim) { - if (elf_reloc(&ef->lf, (Elf_Addr)ef->address, rela, + if (elf_reloc_func(&ef->lf, (Elf_Addr)ef->address, rela, ELF_RELOC_RELA, elf_lookup)) { symname = symbol_name(ef, rela->r_info); printf("link_elf: symbol %s undefined\n", @@ -1255,6 +1259,17 @@ return (0); } +static int +relocate_file(elf_file_t ef) +{ + int e; + + e = relocate_file1(ef, elf_reloc); + if (e == 0) + e = relocate_file1(ef, elf_reloc_ifunc); + return (e); +} + /* * Hash function for symbol table lookup. Don't even think about changing * this. It is specified by the System V ABI. @@ -1312,7 +1327,8 @@ if (strcmp(name, strp) == 0) { if (symp->st_shndx != SHN_UNDEF || (symp->st_value != 0 && - ELF_ST_TYPE(symp->st_info) == STT_FUNC)) { + (ELF_ST_TYPE(symp->st_info) == STT_FUNC || + ELF_ST_TYPE(symp->st_info) == STT_GNU_IFUNC))) { *sym = (c_linker_sym_t) symp; return (0); } @@ -1332,7 +1348,8 @@ if (strcmp(name, strp) == 0) { if (symp->st_shndx != SHN_UNDEF || (symp->st_value != 0 && - ELF_ST_TYPE(symp->st_info) == STT_FUNC)) { + (ELF_ST_TYPE(symp->st_info) == STT_FUNC || + ELF_ST_TYPE(symp->st_info) == STT_GNU_IFUNC))) { *sym = (c_linker_sym_t) symp; return (0); } @@ -1347,12 +1364,18 @@ link_elf_symbol_values(linker_file_t lf, c_linker_sym_t sym, linker_symval_t *symval) { - elf_file_t ef = (elf_file_t) lf; - const Elf_Sym* es = (const Elf_Sym*) sym; + elf_file_t ef; + const Elf_Sym *es; + caddr_t val; + ef = (elf_file_t)lf; + es = (const Elf_Sym *)sym; if (es >= ef->symtab && es < (ef->symtab + ef->nchains)) { symval->name = ef->strtab + es->st_name; - symval->value = (caddr_t) ef->address + es->st_value; + val = (caddr_t)ef->address + es->st_value; + if (ELF_ST_TYPE(es->st_info) == STT_GNU_IFUNC) + val = ((caddr_t (*)(void))val)(); + symval->value = val; symval->size = es->st_size; return (0); } @@ -1360,7 +1383,10 @@ return (ENOENT); if (es >= ef->ddbsymtab && es < (ef->ddbsymtab + ef->ddbsymcnt)) { symval->name = ef->ddbstrtab + es->st_name; - symval->value = (caddr_t) ef->address + es->st_value; + val = (caddr_t)ef->address + es->st_value; + if (ELF_ST_TYPE(es->st_info) == STT_GNU_IFUNC) + val = ((caddr_t (*)(void))val)(); + symval->value = val; symval->size = es->st_size; return (0); } @@ -1470,7 +1496,8 @@ /* Exhaustive search */ for (i = 0, symp = ef->ddbsymtab; i < ef->ddbsymcnt; i++, symp++) { if (symp->st_value != 0 && - ELF_ST_TYPE(symp->st_info) == STT_FUNC) { + (ELF_ST_TYPE(symp->st_info) == STT_FUNC || + ELF_ST_TYPE(symp->st_info) == STT_GNU_IFUNC)) { error = callback(ef->ddbstrtab + symp->st_name, opaque); if (error != 0) return (error); @@ -1491,7 +1518,8 @@ /* Exhaustive search */ for (i = 0, symp = ef->ddbsymtab; i < ef->ddbsymcnt; i++, symp++) { if (symp->st_value != 0 && - ELF_ST_TYPE(symp->st_info) == STT_FUNC) { + (ELF_ST_TYPE(symp->st_info) == STT_FUNC || + ELF_ST_TYPE(symp->st_info) == STT_GNU_IFUNC)) { error = link_elf_symbol_values(file, (c_linker_sym_t) symp, &symval); if (error != 0) @@ -1650,3 +1678,19 @@ return (ef->ddbstrcnt); } + +void +link_elf_ireloc(caddr_t kmdp) +{ + struct elf_file eff; + elf_file_t ef; + + ef = &eff; + bzero(ef, sizeof(*ef)); + ef->modptr = kmdp; + ef->dynamic = (Elf_Dyn *)&_DYNAMIC; + parse_dynamic(ef); + ef->address = 0; + link_elf_preload_parse_symbols(ef); + relocate_file1(ef, elf_reloc_ifunc); +} Index: sys/kern/link_elf_obj.c =================================================================== --- sys/kern/link_elf_obj.c +++ sys/kern/link_elf_obj.c @@ -1166,12 +1166,19 @@ link_elf_symbol_values(linker_file_t lf, c_linker_sym_t sym, linker_symval_t *symval) { - elf_file_t ef = (elf_file_t) lf; - const Elf_Sym *es = (const Elf_Sym*) sym; + elf_file_t ef; + const Elf_Sym *es; + caddr_t val; + ef = (elf_file_t) lf; + es = (const Elf_Sym*) sym; + val = (caddr_t)es->st_value; if (es >= ef->ddbsymtab && es < (ef->ddbsymtab + ef->ddbsymcnt)) { symval->name = ef->ddbstrtab + es->st_name; - symval->value = (caddr_t)es->st_value; + val = (caddr_t)es->st_value; + if (ELF_ST_TYPE(es->st_info) == STT_GNU_IFUNC) + val = ((caddr_t (*)(void))val)(); + symval->value = val; symval->size = es->st_size; return 0; } @@ -1256,7 +1263,8 @@ /* Exhaustive search */ for (i = 0, symp = ef->ddbsymtab; i < ef->ddbsymcnt; i++, symp++) { if (symp->st_value != 0 && - ELF_ST_TYPE(symp->st_info) == STT_FUNC) { + (ELF_ST_TYPE(symp->st_info) == STT_FUNC || + ELF_ST_TYPE(symp->st_info) == STT_GNU_IFUNC)) { error = callback(ef->ddbstrtab + symp->st_name, opaque); if (error) return (error); @@ -1277,8 +1285,10 @@ /* Exhaustive search */ for (i = 0, symp = ef->ddbsymtab; i < ef->ddbsymcnt; i++, symp++) { if (symp->st_value != 0 && - ELF_ST_TYPE(symp->st_info) == STT_FUNC) { - error = link_elf_symbol_values(file, (c_linker_sym_t) symp, &symval); + (ELF_ST_TYPE(symp->st_info) == STT_FUNC || + ELF_ST_TYPE(symp->st_info) == STT_GNU_IFUNC)) { + error = link_elf_symbol_values(file, + (c_linker_sym_t)symp, &symval); if (error) return (error); error = callback(file, i, &symval, opaque); Index: sys/sys/linker.h =================================================================== --- sys/sys/linker.h +++ sys/sys/linker.h @@ -272,11 +272,16 @@ typedef int elf_lookup_fn(linker_file_t, Elf_Size, int, Elf_Addr *); /* Support functions */ -int elf_reloc(linker_file_t _lf, Elf_Addr base, const void *_rel, int _type, elf_lookup_fn _lu); -int elf_reloc_local(linker_file_t _lf, Elf_Addr base, const void *_rel, int _type, elf_lookup_fn _lu); +int elf_reloc(linker_file_t _lf, Elf_Addr base, const void *_rel, + int _type, elf_lookup_fn _lu); +int elf_reloc_ifunc(linker_file_t _lf, Elf_Addr base, const void *_rel, + int _type, elf_lookup_fn _lu); +int elf_reloc_local(linker_file_t _lf, Elf_Addr base, const void *_rel, + int _type, elf_lookup_fn _lu); Elf_Addr elf_relocaddr(linker_file_t _lf, Elf_Addr addr); const Elf_Sym *elf_get_sym(linker_file_t _lf, Elf_Size _symidx); const char *elf_get_symname(linker_file_t _lf, Elf_Size _symidx); +void link_elf_ireloc(caddr_t kmdp); typedef struct linker_ctf { const uint8_t *ctftab; /* Decompressed CTF data. */ Index: sys/x86/include/ifunc.h =================================================================== --- /dev/null +++ sys/x86/include/ifunc.h @@ -0,0 +1,58 @@ +/*- + * Copyright (c) 2015, 2017 The FreeBSD Foundation + * All rights reserved. + * + * This software was developed by Konstantin Belousov + * under sponsorship from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef __X86_IFUNC_H +#define __X86_IFUNC_H + +#define DECLARE_LIFUNC(ret_type, name, args) \ +ret_type name args + +#define DEFINE_LIFUNC(scope, selector_qual, ret_type, name, args) \ +__asm__ (scope "\t" #name "\n" \ + "\t.type\t" #name ",@function\n" \ + #name ":\n" \ + "\tjmp *" #name "_selector\n" \ + "\t.size\t" #name ",\t. - "#name); \ +selector_qual ret_type (*name##_selector)args __used; \ +DECLARE_LIFUNC(ret_type, name, args) + +#define DEFINE_STATIC_LIFUNC(ret_type, name, args) \ + DEFINE_LIFUNC(".local", static, ret_type, name, args) + +#define DEFINE_GLOBAL_LIFUNC(ret_type, name, args) \ + DEFINE_LIFUNC(".globl", , ret_type, name, args) + +#define DEFINE_IFUNC(qual, ret_type, name, args, resolver_qual) \ + resolver_qual ret_type (*name##_resolver(void))args __used; \ + qual ret_type name args __attribute__((ifunc(#name "_resolver"))); \ + resolver_qual ret_type (*name##_resolver(void))args + +#endif Index: sys/x86/iommu/intel_utils.c =================================================================== --- sys/x86/iommu/intel_utils.c +++ sys/x86/iommu/intel_utils.c @@ -368,8 +368,7 @@ * If DMAR does not snoop paging structures accesses, flush * CPU cache to memory. */ - pmap_invalidate_cache_range((uintptr_t)dst, (uintptr_t)dst + sz, - TRUE); + pmap_force_invalidate_cache_range((uintptr_t)dst, (uintptr_t)dst + sz); } void