Page MenuHomeFreeBSD

D13838.id42363.diff
No OneTemporary

D13838.id42363.diff

Index: sys/amd64/amd64/copyout.c
===================================================================
--- /dev/null
+++ sys/amd64/amd64/copyout.c
@@ -0,0 +1,178 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2018 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * This software was developed by Konstantin Belousov <kib@FreeBSD.org>
+ * under sponsorship from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+
+#include <machine/md_var.h>
+#include <machine/specialreg.h>
+#include <x86/ifunc.h>
+
+int fubyte_nosmap(volatile const void *base);
+int fubyte_smap(volatile const void *base);
+DEFINE_IFUNC(, int, fubyte, (volatile const void *), static)
+{
+
+ return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ?
+ fubyte_smap : fubyte_nosmap);
+}
+
+int fuword16_nosmap(volatile const void *base);
+int fuword16_smap(volatile const void *base);
+DEFINE_IFUNC(, int, fuword16, (volatile const void *), static)
+{
+
+ return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ?
+ fuword16_smap : fuword16_nosmap);
+}
+
+int fueword_nosmap(volatile const void *base, long *val);
+int fueword_smap(volatile const void *base, long *val);
+DEFINE_IFUNC(, int, fueword, (volatile const void *, long *), static)
+{
+
+ return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ?
+ fueword_smap : fueword_nosmap);
+}
+DEFINE_IFUNC(, int, fueword64, (volatile const void *, int64_t *), static)
+{
+
+ return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ?
+ fueword_smap : fueword_nosmap);
+}
+
+int fueword32_nosmap(volatile const void *base, int32_t *val);
+int fueword32_smap(volatile const void *base, int32_t *val);
+DEFINE_IFUNC(, int, fueword32, (volatile const void *, int32_t *), static)
+{
+
+ return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ?
+ fueword32_smap : fueword32_nosmap);
+}
+
+int subyte_nosmap(volatile void *base, int byte);
+int subyte_smap(volatile void *base, int byte);
+DEFINE_IFUNC(, int, subyte, (volatile void *, int), static)
+{
+
+ return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ?
+ subyte_smap : subyte_nosmap);
+}
+
+int suword16_nosmap(volatile void *base, int word);
+int suword16_smap(volatile void *base, int word);
+DEFINE_IFUNC(, int, suword16, (volatile void *, int), static)
+{
+
+ return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ?
+ suword16_smap : suword16_nosmap);
+}
+
+int suword32_nosmap(volatile void *base, int32_t word);
+int suword32_smap(volatile void *base, int32_t word);
+DEFINE_IFUNC(, int, suword32, (volatile void *, int32_t), static)
+{
+
+ return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ?
+ suword32_smap : suword32_nosmap);
+}
+
+int suword_nosmap(volatile void *base, long word);
+int suword_smap(volatile void *base, long word);
+DEFINE_IFUNC(, int, suword, (volatile void *, long), static)
+{
+
+ return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ?
+ suword_smap : suword_nosmap);
+}
+DEFINE_IFUNC(, int, suword64, (volatile void *, int64_t), static)
+{
+
+ return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ?
+ suword_smap : suword_nosmap);
+}
+
+int casueword32_nosmap(volatile uint32_t *base, uint32_t oldval,
+ uint32_t *oldvalp, uint32_t newval);
+int casueword32_smap(volatile uint32_t *base, uint32_t oldval,
+ uint32_t *oldvalp, uint32_t newval);
+DEFINE_IFUNC(, int, casueword32, (volatile uint32_t *, uint32_t, uint32_t *,
+ uint32_t), static)
+{
+
+ return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ?
+ casueword32_smap : casueword32_nosmap);
+}
+
+int casueword_nosmap(volatile u_long *p, u_long oldval, u_long *oldvalp,
+ u_long newval);
+int casueword_smap(volatile u_long *p, u_long oldval, u_long *oldvalp,
+ u_long newval);
+DEFINE_IFUNC(, int, casueword, (volatile u_long *, u_long, u_long *, u_long),
+ static)
+{
+
+ return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ?
+ casueword_smap : casueword_nosmap);
+}
+
+int copyinstr_nosmap(const void *udaddr, void *kaddr, size_t len,
+ size_t *lencopied);
+int copyinstr_smap(const void *udaddr, void *kaddr, size_t len,
+ size_t *lencopied);
+DEFINE_IFUNC(, int, copyinstr, (const void *, void *, size_t, size_t *),
+ static)
+{
+
+ return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ?
+ copyinstr_smap : copyinstr_nosmap);
+}
+
+int copyin_nosmap(const void *udaddr, void *kaddr, size_t len);
+int copyin_smap(const void *udaddr, void *kaddr, size_t len);
+DEFINE_IFUNC(, int, copyin, (const void *, void *, size_t), static)
+{
+
+ return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ?
+ copyin_smap : copyin_nosmap);
+}
+
+int copyout_nosmap(const void *kaddr, void *udaddr, size_t len);
+int copyout_smap(const void *kaddr, void *udaddr, size_t len);
+DEFINE_IFUNC(, int, copyout, (const void *, void *, size_t), static)
+{
+
+ return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ?
+ copyout_smap : copyout_nosmap);
+}
Index: sys/amd64/amd64/exception.S
===================================================================
--- sys/amd64/amd64/exception.S
+++ sys/amd64/amd64/exception.S
@@ -43,8 +43,8 @@
#include "assym.inc"
-#include <machine/asmacros.h>
#include <machine/psl.h>
+#include <machine/asmacros.h>
#include <machine/trap.h>
#include <machine/specialreg.h>
@@ -196,7 +196,9 @@
movq %r14,TF_R14(%rsp)
movq %r15,TF_R15(%rsp)
movl $TF_HASSEGS,TF_FLAGS(%rsp)
- cld
+ pushfq
+ andq $~(PSL_D | PSL_AC),(%rsp)
+ popfq
FAKE_MCOUNT(TF_RIP(%rsp))
#ifdef KDTRACE_HOOKS
/*
@@ -277,7 +279,9 @@
movq %r15,TF_R15(%rsp)
SAVE_SEGS
movl $TF_HASSEGS,TF_FLAGS(%rsp)
- cld
+ pushfq
+ andq $~(PSL_D | PSL_AC),(%rsp)
+ popfq
testb $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */
jz 1f /* already running with kernel GS.base */
swapgs
@@ -697,7 +701,9 @@
movq %r15,TF_R15(%rsp)
SAVE_SEGS
movl $TF_HASSEGS,TF_FLAGS(%rsp)
- cld
+ pushfq
+ andq $~(PSL_D | PSL_AC),(%rsp)
+ popfq
xorl %ebx,%ebx
testb $SEL_RPL_MASK,TF_CS(%rsp)
jnz nmi_fromuserspace
@@ -786,7 +792,9 @@
subq %rcx,%rdx
movq %rdx,%rdi /* destination stack pointer */
shrq $3,%rcx /* trap frame size in long words */
- cld
+ pushfq
+ andq $~(PSL_D | PSL_AC),(%rsp)
+ popfq
rep
movsq /* copy trapframe */
movq %rdx,%rsp /* we are on the regular kstack */
@@ -895,7 +903,9 @@
movq %r15,TF_R15(%rsp)
SAVE_SEGS
movl $TF_HASSEGS,TF_FLAGS(%rsp)
- cld
+ pushfq
+ andq $~(PSL_D | PSL_AC),(%rsp)
+ popfq
xorl %ebx,%ebx
testb $SEL_RPL_MASK,TF_CS(%rsp)
jnz mchk_fromuserspace
Index: sys/amd64/amd64/initcpu.c
===================================================================
--- sys/amd64/amd64/initcpu.c
+++ sys/amd64/amd64/initcpu.c
@@ -215,8 +215,12 @@
* to the kernel tables. The boot loader enables the U bit in
* its tables.
*/
- if (!IS_BSP() && (cpu_stdext_feature & CPUID_STDEXT_SMEP))
- cr4 |= CR4_SMEP;
+ if (!IS_BSP()) {
+ if (cpu_stdext_feature & CPUID_STDEXT_SMEP)
+ cr4 |= CR4_SMEP;
+ if (cpu_stdext_feature & CPUID_STDEXT_SMAP)
+ cr4 |= CR4_SMAP;
+ }
load_cr4(cr4);
if (IS_BSP() && (amd_feature & AMDID_NX) != 0) {
msr = rdmsr(MSR_EFER) | EFER_NXE;
Index: sys/amd64/amd64/machdep.c
===================================================================
--- sys/amd64/amd64/machdep.c
+++ sys/amd64/amd64/machdep.c
@@ -1538,7 +1538,7 @@
msr = ((u_int64_t)GSEL(GCODE_SEL, SEL_KPL) << 32) |
((u_int64_t)GSEL(GUCODE32_SEL, SEL_UPL) << 48);
wrmsr(MSR_STAR, msr);
- wrmsr(MSR_SF_MASK, PSL_NT | PSL_T | PSL_I | PSL_C | PSL_D);
+ wrmsr(MSR_SF_MASK, PSL_NT | PSL_T | PSL_I | PSL_C | PSL_D | PSL_AC);
}
u_int64_t
Index: sys/amd64/amd64/pmap.c
===================================================================
--- sys/amd64/amd64/pmap.c
+++ sys/amd64/amd64/pmap.c
@@ -146,6 +146,7 @@
#include <machine/intr_machdep.h>
#include <x86/apicvar.h>
+#include <x86/ifunc.h>
#include <machine/cpu.h>
#include <machine/cputypes.h>
#include <machine/md_var.h>
@@ -645,6 +646,10 @@
vm_page_t m, vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp);
static void pmap_fill_ptp(pt_entry_t *firstpte, pt_entry_t newpte);
static int pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte);
+static void pmap_invalidate_cache_range_selfsnoop(vm_offset_t sva,
+ vm_offset_t eva);
+static void pmap_invalidate_cache_range_all(vm_offset_t sva,
+ vm_offset_t eva);
static void pmap_invalidate_pde_page(pmap_t pmap, vm_offset_t va,
pd_entry_t pde);
static void pmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, int mode);
@@ -1089,6 +1094,7 @@
{
vm_offset_t va;
pt_entry_t *pte;
+ uint64_t cr4;
int i;
if (!pti)
@@ -1113,11 +1119,21 @@
virtual_end = VM_MAX_KERNEL_ADDRESS;
- /* XXX do %cr0 as well */
- load_cr4(rcr4() | CR4_PGE);
+ /*
+ * Enable PG_G global pages, then switch to the kernel page
+ * table from the bootstrap page table. After the switch, it
+ * is possible to enable SMEP and SMAP since PG_U bits are
+ * correct now.
+ */
+ cr4 = rcr4();
+ cr4 |= CR4_PGE;
+ load_cr4(cr4);
load_cr3(KPML4phys);
if (cpu_stdext_feature & CPUID_STDEXT_SMEP)
- load_cr4(rcr4() | CR4_SMEP);
+ cr4 |= CR4_SMEP;
+ if (cpu_stdext_feature & CPUID_STDEXT_SMAP)
+ cr4 |= CR4_SMAP;
+ load_cr4(cr4);
/*
* Initialize the kernel pmap (which is statically allocated).
@@ -2121,36 +2137,55 @@
pmap_invalidate_page(pmap, va);
}
+DEFINE_IFUNC(, void, pmap_invalidate_cache_range,
+ (vm_offset_t sva, vm_offset_t eva), static)
+{
+
+ if ((cpu_feature & CPUID_SS) != 0)
+ return (pmap_invalidate_cache_range_selfsnoop);
+ if ((cpu_feature & CPUID_CLFSH) != 0)
+ return (pmap_force_invalidate_cache_range);
+ return (pmap_invalidate_cache_range_all);
+}
+
#define PMAP_CLFLUSH_THRESHOLD (2 * 1024 * 1024)
-void
-pmap_invalidate_cache_range(vm_offset_t sva, vm_offset_t eva, boolean_t force)
+static void
+pmap_invalidate_cache_range_selfsnoop(vm_offset_t sva, vm_offset_t eva)
{
- if (force) {
- sva &= ~(vm_offset_t)(cpu_clflush_line_size - 1);
- } else {
- KASSERT((sva & PAGE_MASK) == 0,
- ("pmap_invalidate_cache_range: sva not page-aligned"));
- KASSERT((eva & PAGE_MASK) == 0,
- ("pmap_invalidate_cache_range: eva not page-aligned"));
- }
+ KASSERT((sva & PAGE_MASK) == 0,
+ ("pmap_invalidate_cache_range: sva not page-aligned"));
+ KASSERT((eva & PAGE_MASK) == 0,
+ ("pmap_invalidate_cache_range: eva not page-aligned"));
+}
+
+void
+pmap_force_invalidate_cache_range(vm_offset_t sva, vm_offset_t eva)
+{
- if ((cpu_feature & CPUID_SS) != 0 && !force)
- ; /* If "Self Snoop" is supported and allowed, do nothing. */
- else if ((cpu_stdext_feature & CPUID_STDEXT_CLFLUSHOPT) != 0 &&
- eva - sva < PMAP_CLFLUSH_THRESHOLD) {
+ sva &= ~(vm_offset_t)(cpu_clflush_line_size - 1);
+ if (eva - sva >= PMAP_CLFLUSH_THRESHOLD) {
/*
- * XXX: Some CPUs fault, hang, or trash the local APIC
- * registers if we use CLFLUSH on the local APIC
- * range. The local APIC is always uncached, so we
- * don't need to flush for that range anyway.
+ * The supplied range is bigger than 2MB.
+ * Globally invalidate cache.
*/
- if (pmap_kextract(sva) == lapic_paddr)
- return;
+ pmap_invalidate_cache();
+ return;
+ }
+
+ /*
+ * XXX: Some CPUs fault, hang, or trash the local APIC
+ * registers if we use CLFLUSH on the local APIC
+ * range. The local APIC is always uncached, so we
+ * don't need to flush for that range anyway.
+ */
+ if (pmap_kextract(sva) == lapic_paddr)
+ return;
+ if ((cpu_stdext_feature & CPUID_STDEXT_CLFLUSHOPT) != 0) {
/*
- * Otherwise, do per-cache line flush. Use the sfence
+ * Do per-cache line flush. Use the sfence
* instruction to insure that previous stores are
* included in the write-back. The processor
* propagates flush to other processors in the cache
@@ -2160,10 +2195,7 @@
for (; sva < eva; sva += cpu_clflush_line_size)
clflushopt(sva);
sfence();
- } else if ((cpu_feature & CPUID_CLFSH) != 0 &&
- eva - sva < PMAP_CLFLUSH_THRESHOLD) {
- if (pmap_kextract(sva) == lapic_paddr)
- return;
+ } else {
/*
* Writes are ordered by CLFLUSH on Intel CPUs.
*/
@@ -2173,17 +2205,17 @@
clflush(sva);
if (cpu_vendor_id != CPU_VENDOR_INTEL)
mfence();
- } else {
-
- /*
- * No targeted cache flush methods are supported by CPU,
- * or the supplied range is bigger than 2MB.
- * Globally invalidate cache.
- */
- pmap_invalidate_cache();
}
}
+static void
+pmap_invalidate_cache_range_all(vm_offset_t sva, vm_offset_t eva)
+{
+
+ pmap_invalidate_cache_range_selfsnoop(sva, eva);
+ pmap_invalidate_cache();
+}
+
/*
* Remove the specified set of pages from the data and instruction caches.
*
@@ -6858,7 +6890,7 @@
for (tmpsize = 0; tmpsize < size; tmpsize += PAGE_SIZE)
pmap_kenter_attr(va + tmpsize, pa + tmpsize, mode);
pmap_invalidate_range(kernel_pmap, va, va + tmpsize);
- pmap_invalidate_cache_range(va, va + tmpsize, FALSE);
+ pmap_invalidate_cache_range(va, va + tmpsize);
return ((void *)(va + offset));
}
@@ -7217,7 +7249,7 @@
*/
if (changed) {
pmap_invalidate_range(kernel_pmap, base, tmpva);
- pmap_invalidate_cache_range(base, tmpva, FALSE);
+ pmap_invalidate_cache_range(base, tmpva);
}
return (error);
}
Index: sys/amd64/amd64/support.S
===================================================================
--- sys/amd64/amd64/support.S
+++ sys/amd64/amd64/support.S
@@ -334,7 +334,7 @@
* copyout(from_kernel, to_user, len)
* %rdi, %rsi, %rdx
*/
-ENTRY(copyout)
+ENTRY(copyout_nosmap)
PUSH_FRAME_POINTER
movq PCPU(CURPCB),%rax
movq $copyout_fault,PCB_ONFAULT(%rax)
@@ -375,6 +375,55 @@
rep
movsb
+ jmp done_copyout
+END(copyout_nosmap)
+
+ENTRY(copyout_smap)
+ PUSH_FRAME_POINTER
+ movq PCPU(CURPCB),%rax
+ /* Trap entry clears PSL.AC */
+ movq $copyout_fault,PCB_ONFAULT(%rax)
+ testq %rdx,%rdx /* anything to do? */
+ jz done_copyout
+
+ /*
+ * Check explicitly for non-user addresses. If 486 write protection
+ * is being used, this check is essential because we are in kernel
+ * mode so the h/w does not provide any protection against writing
+ * kernel addresses.
+ */
+
+ /*
+ * First, prevent address wrapping.
+ */
+ movq %rsi,%rax
+ addq %rdx,%rax
+ jc copyout_fault
+/*
+ * XXX STOP USING VM_MAXUSER_ADDRESS.
+ * It is an end address, not a max, so every time it is used correctly it
+ * looks like there is an off by one error, and of course it caused an off
+ * by one error in several places.
+ */
+ movq $VM_MAXUSER_ADDRESS,%rcx
+ cmpq %rcx,%rax
+ ja copyout_fault
+
+ xchgq %rdi,%rsi
+ /* bcopy(%rsi, %rdi, %rdx) */
+ movq %rdx,%rcx
+
+ shrq $3,%rcx
+ cld
+ stac
+ rep
+ movsq
+ movb %dl,%cl
+ andb $7,%cl
+ rep
+ movsb
+ clac
+
done_copyout:
xorl %eax,%eax
movq PCPU(CURPCB),%rdx
@@ -395,7 +444,39 @@
* copyin(from_user, to_kernel, len)
* %rdi, %rsi, %rdx
*/
-ENTRY(copyin)
+ENTRY(copyin_nosmap)
+ PUSH_FRAME_POINTER
+ movq PCPU(CURPCB),%rax
+ movq $copyin_fault,PCB_ONFAULT(%rax)
+ testq %rdx,%rdx /* anything to do? */
+ jz done_copyin
+
+ /*
+ * make sure address is valid
+ */
+ movq %rdi,%rax
+ addq %rdx,%rax
+ jc copyin_fault
+ movq $VM_MAXUSER_ADDRESS,%rcx
+ cmpq %rcx,%rax
+ ja copyin_fault
+
+ xchgq %rdi,%rsi
+ movq %rdx,%rcx
+ movb %cl,%al
+ shrq $3,%rcx /* copy longword-wise */
+ cld
+ rep
+ movsq
+ movb %al,%cl
+ andb $7,%cl /* copy remaining bytes */
+ rep
+ movsb
+
+ jmp done_copyin
+END(copyin_nosmap)
+
+ENTRY(copyin_smap)
PUSH_FRAME_POINTER
movq PCPU(CURPCB),%rax
movq $copyin_fault,PCB_ONFAULT(%rax)
@@ -416,12 +497,14 @@
movq %rdx,%rcx
movb %cl,%al
shrq $3,%rcx /* copy longword-wise */
+ stac
rep
movsq
movb %al,%cl
andb $7,%cl /* copy remaining bytes */
rep
movsb
+ clac
done_copyin:
xorl %eax,%eax
@@ -429,6 +512,7 @@
movq %rax,PCB_ONFAULT(%rdx)
POP_FRAME_POINTER
ret
+END(copyin_smap)
ALIGN_TEXT
copyin_fault:
@@ -437,14 +521,47 @@
movq $EFAULT,%rax
POP_FRAME_POINTER
ret
-END(copyin)
/*
* casueword32. Compare and set user integer. Returns -1 on fault,
* 0 if access was successful. Old value is written to *oldp.
* dst = %rdi, old = %esi, oldp = %rdx, new = %ecx
*/
-ENTRY(casueword32)
+ENTRY(casueword32_nosmap)
+ PUSH_FRAME_POINTER
+ movq PCPU(CURPCB),%r8
+ movq $fusufault,PCB_ONFAULT(%r8)
+
+ movq $VM_MAXUSER_ADDRESS-4,%rax
+ cmpq %rax,%rdi /* verify address is valid */
+ ja fusufault
+
+ movl %esi,%eax /* old */
+#ifdef SMP
+ lock
+#endif
+ cmpxchgl %ecx,(%rdi) /* new = %ecx */
+
+ /*
+ * The old value is in %eax. If the store succeeded it will be the
+ * value we expected (old) from before the store, otherwise it will
+ * be the current value. Save %eax into %esi to prepare the return
+ * value.
+ */
+ movl %eax,%esi
+ xorl %eax,%eax
+ movq %rax,PCB_ONFAULT(%r8)
+
+ /*
+ * Access the oldp after the pcb_onfault is cleared, to correctly
+ * catch corrupted pointer.
+ */
+ movl %esi,(%rdx) /* oldp = %rdx */
+ POP_FRAME_POINTER
+ ret
+END(casueword32_nosmap)
+
+ENTRY(casueword32_smap)
PUSH_FRAME_POINTER
movq PCPU(CURPCB),%r8
movq $fusufault,PCB_ONFAULT(%r8)
@@ -454,10 +571,12 @@
ja fusufault
movl %esi,%eax /* old */
+ stac
#ifdef SMP
lock
#endif
cmpxchgl %ecx,(%rdi) /* new = %ecx */
+ clac
/*
* The old value is in %eax. If the store succeeded it will be the
@@ -476,14 +595,14 @@
movl %esi,(%rdx) /* oldp = %rdx */
POP_FRAME_POINTER
ret
-END(casueword32)
+END(casueword32_smap)
/*
* casueword. Compare and set user long. Returns -1 on fault,
* 0 if access was successful. Old value is written to *oldp.
* dst = %rdi, old = %rsi, oldp = %rdx, new = %rcx
*/
-ENTRY(casueword)
+ENTRY(casueword_nosmap)
PUSH_FRAME_POINTER
movq PCPU(CURPCB),%r8
movq $fusufault,PCB_ONFAULT(%r8)
@@ -509,7 +628,37 @@
movq %rsi,(%rdx)
POP_FRAME_POINTER
ret
-END(casueword)
+END(casueword_nosmap)
+
+ENTRY(casueword_smap)
+ PUSH_FRAME_POINTER
+ movq PCPU(CURPCB),%r8
+ movq $fusufault,PCB_ONFAULT(%r8)
+
+ movq $VM_MAXUSER_ADDRESS-4,%rax
+ cmpq %rax,%rdi /* verify address is valid */
+ ja fusufault
+
+ movq %rsi,%rax /* old */
+ stac
+#ifdef SMP
+ lock
+#endif
+ cmpxchgq %rcx,(%rdi) /* new = %rcx */
+ clac
+
+ /*
+ * The old value is in %rax. If the store succeeded it will be the
+ * value we expected (old) from before the store, otherwise it will
+ * be the current value.
+ */
+ movq %rax,%rsi
+ xorl %eax,%eax
+ movq %rax,PCB_ONFAULT(%r8)
+ movq %rsi,(%rdx)
+ POP_FRAME_POINTER
+ ret
+END(casueword_smap)
/*
* Fetch (load) a 64-bit word, a 32-bit word, a 16-bit word, or an 8-bit
@@ -517,8 +666,24 @@
* addr = %rdi, valp = %rsi
*/
-ALTENTRY(fueword64)
-ENTRY(fueword)
+ENTRY(fueword_nosmap)
+ PUSH_FRAME_POINTER
+ movq PCPU(CURPCB),%rcx
+ movq $fusufault,PCB_ONFAULT(%rcx)
+
+ movq $VM_MAXUSER_ADDRESS-8,%rax
+ cmpq %rax,%rdi /* verify address is valid */
+ ja fusufault
+
+ xorl %eax,%eax
+ movq (%rdi),%r11
+ movq %rax,PCB_ONFAULT(%rcx)
+ movq %r11,(%rsi)
+ POP_FRAME_POINTER
+ ret
+END(fueword64_nosmap)
+
+ENTRY(fueword_smap)
PUSH_FRAME_POINTER
movq PCPU(CURPCB),%rcx
movq $fusufault,PCB_ONFAULT(%rcx)
@@ -528,15 +693,33 @@
ja fusufault
xorl %eax,%eax
+ stac
movq (%rdi),%r11
+ clac
movq %rax,PCB_ONFAULT(%rcx)
movq %r11,(%rsi)
POP_FRAME_POINTER
ret
-END(fueword64)
-END(fueword)
+END(fueword64_smap)
+
+ENTRY(fueword32_nosmap)
+ PUSH_FRAME_POINTER
+ movq PCPU(CURPCB),%rcx
+ movq $fusufault,PCB_ONFAULT(%rcx)
+
+ movq $VM_MAXUSER_ADDRESS-4,%rax
+ cmpq %rax,%rdi /* verify address is valid */
+ ja fusufault
+
+ xorl %eax,%eax
+ movl (%rdi),%r11d
+ movq %rax,PCB_ONFAULT(%rcx)
+ movl %r11d,(%rsi)
+ POP_FRAME_POINTER
+ ret
+END(fueword32_nosmap)
-ENTRY(fueword32)
+ENTRY(fueword32_smap)
PUSH_FRAME_POINTER
movq PCPU(CURPCB),%rcx
movq $fusufault,PCB_ONFAULT(%rcx)
@@ -546,14 +729,16 @@
ja fusufault
xorl %eax,%eax
+ stac
movl (%rdi),%r11d
+ clac
movq %rax,PCB_ONFAULT(%rcx)
movl %r11d,(%rsi)
POP_FRAME_POINTER
ret
-END(fueword32)
+END(fueword32_smap)
-ENTRY(fuword16)
+ENTRY(fuword16_nosmap)
PUSH_FRAME_POINTER
movq PCPU(CURPCB),%rcx
movq $fusufault,PCB_ONFAULT(%rcx)
@@ -566,9 +751,26 @@
movq $0,PCB_ONFAULT(%rcx)
POP_FRAME_POINTER
ret
-END(fuword16)
+END(fuword16_nosmap)
-ENTRY(fubyte)
+ENTRY(fuword16_smap)
+ PUSH_FRAME_POINTER
+ movq PCPU(CURPCB),%rcx
+ movq $fusufault,PCB_ONFAULT(%rcx)
+
+ movq $VM_MAXUSER_ADDRESS-2,%rax
+ cmpq %rax,%rdi
+ ja fusufault
+
+ stac
+ movzwl (%rdi),%eax
+ clac
+ movq $0,PCB_ONFAULT(%rcx)
+ POP_FRAME_POINTER
+ ret
+END(fuword16_smap)
+
+ENTRY(fubyte_nosmap)
PUSH_FRAME_POINTER
movq PCPU(CURPCB),%rcx
movq $fusufault,PCB_ONFAULT(%rcx)
@@ -581,9 +783,27 @@
movq $0,PCB_ONFAULT(%rcx)
POP_FRAME_POINTER
ret
-END(fubyte)
+END(fubyte_nosmap)
+
+ENTRY(fubyte_smap)
+ PUSH_FRAME_POINTER
+ movq PCPU(CURPCB),%rcx
+ movq $fusufault,PCB_ONFAULT(%rcx)
+
+ movq $VM_MAXUSER_ADDRESS-1,%rax
+ cmpq %rax,%rdi
+ ja fusufault
+
+ stac
+ movzbl (%rdi),%eax
+ clac
+ movq $0,PCB_ONFAULT(%rcx)
+ POP_FRAME_POINTER
+ ret
+END(fubyte_smap)
ALIGN_TEXT
+ /* Fault entry clears PSL.AC */
fusufault:
movq PCPU(CURPCB),%rcx
xorl %eax,%eax
@@ -597,8 +817,24 @@
* user memory.
* addr = %rdi, value = %rsi
*/
-ALTENTRY(suword64)
-ENTRY(suword)
+ENTRY(suword_nosmap)
+ PUSH_FRAME_POINTER
+ movq PCPU(CURPCB),%rcx
+ movq $fusufault,PCB_ONFAULT(%rcx)
+
+ movq $VM_MAXUSER_ADDRESS-8,%rax
+ cmpq %rax,%rdi /* verify address validity */
+ ja fusufault
+
+ movq %rsi,(%rdi)
+ xorl %eax,%eax
+ movq PCPU(CURPCB),%rcx
+ movq %rax,PCB_ONFAULT(%rcx)
+ POP_FRAME_POINTER
+ ret
+END(suword_nosmap)
+
+ENTRY(suword_smap)
PUSH_FRAME_POINTER
movq PCPU(CURPCB),%rcx
movq $fusufault,PCB_ONFAULT(%rcx)
@@ -607,16 +843,34 @@
cmpq %rax,%rdi /* verify address validity */
ja fusufault
+ stac
movq %rsi,(%rdi)
+ clac
+ xorl %eax,%eax
+ movq PCPU(CURPCB),%rcx
+ movq %rax,PCB_ONFAULT(%rcx)
+ POP_FRAME_POINTER
+ ret
+END(suword_smap)
+
+ENTRY(suword32_nosmap)
+ PUSH_FRAME_POINTER
+ movq PCPU(CURPCB),%rcx
+ movq $fusufault,PCB_ONFAULT(%rcx)
+
+ movq $VM_MAXUSER_ADDRESS-4,%rax
+ cmpq %rax,%rdi /* verify address validity */
+ ja fusufault
+
+ movl %esi,(%rdi)
xorl %eax,%eax
movq PCPU(CURPCB),%rcx
movq %rax,PCB_ONFAULT(%rcx)
POP_FRAME_POINTER
ret
-END(suword64)
-END(suword)
+END(suword32_nosmap)
-ENTRY(suword32)
+ENTRY(suword32_smap)
PUSH_FRAME_POINTER
movq PCPU(CURPCB),%rcx
movq $fusufault,PCB_ONFAULT(%rcx)
@@ -625,15 +879,17 @@
cmpq %rax,%rdi /* verify address validity */
ja fusufault
+ stac
movl %esi,(%rdi)
+ clac
xorl %eax,%eax
movq PCPU(CURPCB),%rcx
movq %rax,PCB_ONFAULT(%rcx)
POP_FRAME_POINTER
ret
-END(suword32)
+END(suword32_smap)
-ENTRY(suword16)
+ENTRY(suword16_nosmap)
PUSH_FRAME_POINTER
movq PCPU(CURPCB),%rcx
movq $fusufault,PCB_ONFAULT(%rcx)
@@ -648,9 +904,28 @@
movq %rax,PCB_ONFAULT(%rcx)
POP_FRAME_POINTER
ret
-END(suword16)
+END(suword16_nosmap)
-ENTRY(subyte)
+ENTRY(suword16_smap)
+ PUSH_FRAME_POINTER
+ movq PCPU(CURPCB),%rcx
+ movq $fusufault,PCB_ONFAULT(%rcx)
+
+ movq $VM_MAXUSER_ADDRESS-2,%rax
+ cmpq %rax,%rdi /* verify address validity */
+ ja fusufault
+
+ stac
+ movw %si,(%rdi)
+ clac
+ xorl %eax,%eax
+ movq PCPU(CURPCB),%rcx /* restore trashed register */
+ movq %rax,PCB_ONFAULT(%rcx)
+ POP_FRAME_POINTER
+ ret
+END(suword16_smap)
+
+ENTRY(subyte_nosmap)
PUSH_FRAME_POINTER
movq PCPU(CURPCB),%rcx
movq $fusufault,PCB_ONFAULT(%rcx)
@@ -666,7 +941,27 @@
movq %rax,PCB_ONFAULT(%rcx)
POP_FRAME_POINTER
ret
-END(subyte)
+END(subyte_nosmap)
+
+ENTRY(subyte_smap)
+ PUSH_FRAME_POINTER
+ movq PCPU(CURPCB),%rcx
+ movq $fusufault,PCB_ONFAULT(%rcx)
+
+ movq $VM_MAXUSER_ADDRESS-1,%rax
+ cmpq %rax,%rdi /* verify address validity */
+ ja fusufault
+
+ movl %esi,%eax
+ stac
+ movb %al,(%rdi)
+ clac
+ xorl %eax,%eax
+ movq PCPU(CURPCB),%rcx /* restore trashed register */
+ movq %rax,PCB_ONFAULT(%rcx)
+ POP_FRAME_POINTER
+ ret
+END(subyte_smap)
/*
* copyinstr(from, to, maxlen, int *lencopied)
@@ -677,7 +972,42 @@
* EFAULT on protection violations. If lencopied is non-zero,
* return the actual length in *lencopied.
*/
-ENTRY(copyinstr)
+ENTRY(copyinstr_nosmap)
+ PUSH_FRAME_POINTER
+ movq %rdx,%r8 /* %r8 = maxlen */
+ movq %rcx,%r9 /* %r9 = *len */
+ xchgq %rdi,%rsi /* %rdi = from, %rsi = to */
+ movq PCPU(CURPCB),%rcx
+ movq $cpystrflt,PCB_ONFAULT(%rcx)
+
+ movq $VM_MAXUSER_ADDRESS,%rax
+
+ /* make sure 'from' is within bounds */
+ subq %rsi,%rax
+ jbe cpystrflt
+
+ /* restrict maxlen to <= VM_MAXUSER_ADDRESS-from */
+ cmpq %rdx,%rax
+ jae 1f
+ movq %rax,%rdx
+ movq %rax,%r8
+1:
+ incq %rdx
+ cld
+
+2:
+ decq %rdx
+ jz copyinstr_toolong
+
+ lodsb
+ stosb
+ orb %al,%al
+ jnz 2b
+
+ jmp copyinstr_succ
+END(copyinstr_nosmap)
+
+ENTRY(copyinstr_smap)
PUSH_FRAME_POINTER
movq %rdx,%r8 /* %r8 = maxlen */
movq %rcx,%r9 /* %r9 = *len */
@@ -701,26 +1031,29 @@
2:
decq %rdx
- jz 3f
+ jz copyinstr_succ
+ stac
lodsb
stosb
+ clac
orb %al,%al
jnz 2b
+copyinstr_succ:
/* Success -- 0 byte reached */
decq %rdx
xorl %eax,%eax
jmp cpystrflt_x
-3:
+copyinstr_toolong:
/* rdx is zero - return ENAMETOOLONG or EFAULT */
movq $VM_MAXUSER_ADDRESS,%rax
cmpq %rax,%rsi
jae cpystrflt
-4:
movq $ENAMETOOLONG,%rax
jmp cpystrflt_x
+ /* Fault entry clears PSL.AC */
cpystrflt:
movq $EFAULT,%rax
@@ -736,7 +1069,7 @@
1:
POP_FRAME_POINTER
ret
-END(copyinstr)
+END(copyinstr_smap)
/*
* copystr(from, to, maxlen, int *lencopied)
Index: sys/amd64/amd64/trap.c
===================================================================
--- sys/amd64/amd64/trap.c
+++ sys/amd64/amd64/trap.c
@@ -662,6 +662,21 @@
trap(frame);
}
+static bool
+trap_is_smap(struct trapframe *frame)
+{
+
+ /*
+ * A page fault is classified as SMAP-induced if:
+ * - SMAP is supported;
+ * - kernel mode accessed present page;
+ * - rflags.AC was cleared.
+ */
+ return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 &&
+ (frame->tf_err & (PGEX_P | PGEX_U | PGEX_RSV)) == PGEX_P &&
+ (frame->tf_rflags & PSL_AC) == 0);
+}
+
static int
trap_pfault(struct trapframe *frame, int usermode)
{
@@ -739,9 +754,13 @@
* handling routine. Since accessing the address
* without the handler is a bug, do not try to handle
* it normally, and panic immediately.
+ *
+ * If SMAP is enabled, filter SMAP faults also,
+ * because illegal access might occur to the mapped
+ * user address, causing infinite loop.
*/
if (!usermode && (td->td_intr_nesting_level != 0 ||
- curpcb->pcb_onfault == NULL)) {
+ trap_is_smap(frame) || curpcb->pcb_onfault == NULL)) {
trap_fatal(frame, eva);
return (-1);
}
Index: sys/amd64/ia32/ia32_exception.S
===================================================================
--- sys/amd64/ia32/ia32_exception.S
+++ sys/amd64/ia32/ia32_exception.S
@@ -70,7 +70,9 @@
movq %r14,TF_R14(%rsp)
movq %r15,TF_R15(%rsp)
movl $TF_HASSEGS,TF_FLAGS(%rsp)
- cld
+ pushfq
+ andq $~(PSL_D | PSL_AC),(%rsp)
+ popfq
FAKE_MCOUNT(TF_RIP(%rsp))
movq %rsp, %rdi
call ia32_syscall
Index: sys/amd64/include/asmacros.h
===================================================================
--- sys/amd64/include/asmacros.h
+++ sys/amd64/include/asmacros.h
@@ -255,7 +255,9 @@
movq %r15,TF_R15(%rsp)
SAVE_SEGS
movl $TF_HASSEGS,TF_FLAGS(%rsp)
- cld
+ pushfq
+ andq $~(PSL_D|PSL_AC),(%rsp)
+ popfq
testb $SEL_RPL_MASK,TF_CS(%rsp) /* come from kernel ? */
jz 1f /* yes, leave PCB_FULL_IRET alone */
movq PCPU(CURPCB),%r8
Index: sys/amd64/include/pmap.h
===================================================================
--- sys/amd64/include/pmap.h
+++ sys/amd64/include/pmap.h
@@ -430,8 +430,8 @@
void pmap_invalidate_all(pmap_t);
void pmap_invalidate_cache(void);
void pmap_invalidate_cache_pages(vm_page_t *pages, int count);
-void pmap_invalidate_cache_range(vm_offset_t sva, vm_offset_t eva,
- boolean_t force);
+void pmap_invalidate_cache_range(vm_offset_t sva, vm_offset_t eva);
+void pmap_force_invalidate_cache_range(vm_offset_t sva, vm_offset_t eva);
void pmap_get_mapping(pmap_t pmap, vm_offset_t va, uint64_t *ptr, int *num);
boolean_t pmap_map_io_transient(vm_page_t *, vm_offset_t *, int, boolean_t);
void pmap_unmap_io_transient(vm_page_t *, vm_offset_t *, int, boolean_t);
Index: sys/conf/files.amd64
===================================================================
--- sys/conf/files.amd64
+++ sys/conf/files.amd64
@@ -129,6 +129,7 @@
amd64/amd64/atomic.c standard
amd64/amd64/bios.c standard
amd64/amd64/bpf_jit_machdep.c optional bpf_jitter
+amd64/amd64/copyout.c standard
amd64/amd64/cpu_switch.S standard
amd64/amd64/db_disasm.c optional ddb
amd64/amd64/db_interface.c optional ddb
Index: sys/dev/drm2/drm_os_freebsd.c
===================================================================
--- sys/dev/drm2/drm_os_freebsd.c
+++ sys/dev/drm2/drm_os_freebsd.c
@@ -394,8 +394,8 @@
{
#if defined(__i386__) || defined(__amd64__)
- pmap_invalidate_cache_range((vm_offset_t)addr,
- (vm_offset_t)addr + length, TRUE);
+ pmap_force_invalidate_cache_range((vm_offset_t)addr,
+ (vm_offset_t)addr + length);
#else
DRM_ERROR("drm_clflush_virt_range not implemented on this architecture");
#endif
Index: sys/dev/drm2/i915/intel_ringbuffer.c
===================================================================
--- sys/dev/drm2/i915/intel_ringbuffer.c
+++ sys/dev/drm2/i915/intel_ringbuffer.c
@@ -471,8 +471,8 @@
if (pc->cpu_page == NULL)
goto err_unpin;
pmap_qenter((uintptr_t)pc->cpu_page, &obj->pages[0], 1);
- pmap_invalidate_cache_range((vm_offset_t)pc->cpu_page,
- (vm_offset_t)pc->cpu_page + PAGE_SIZE, FALSE);
+ pmap_force_invalidate_cache_range((vm_offset_t)pc->cpu_page,
+ (vm_offset_t)pc->cpu_page + PAGE_SIZE);
pc->obj = obj;
ring->private = pc;
@@ -1102,8 +1102,9 @@
}
pmap_qenter((vm_offset_t)ring->status_page.page_addr, &obj->pages[0],
1);
- pmap_invalidate_cache_range((vm_offset_t)ring->status_page.page_addr,
- (vm_offset_t)ring->status_page.page_addr + PAGE_SIZE, FALSE);
+ pmap_force_invalidate_cache_range(
+ (vm_offset_t)ring->status_page.page_addr,
+ (vm_offset_t)ring->status_page.page_addr + PAGE_SIZE);
ring->status_page.obj = obj;
memset(ring->status_page.page_addr, 0, PAGE_SIZE);
Index: sys/dev/hyperv/vmbus/amd64/vmbus_vector.S
===================================================================
--- sys/dev/hyperv/vmbus/amd64/vmbus_vector.S
+++ sys/dev/hyperv/vmbus/amd64/vmbus_vector.S
@@ -28,6 +28,7 @@
#include "assym.inc"
+#include <machine/psl.h>
#include <machine/asmacros.h>
#include <machine/specialreg.h>
Index: sys/i386/i386/npx.c
===================================================================
--- sys/i386/i386/npx.c
+++ sys/i386/i386/npx.c
@@ -67,6 +67,7 @@
#include <machine/specialreg.h>
#include <machine/segments.h>
#include <machine/ucontext.h>
+#include <x86/ifunc.h>
#include <machine/intr_machdep.h>
@@ -183,7 +184,6 @@
static void fpu_clean_state(void);
-static void fpusave(union savefpu *);
static void fpurstor(union savefpu *);
int hw_float;
@@ -201,8 +201,6 @@
u_int size;
} *xsave_area_desc;
-static int use_xsaveopt;
-
static volatile u_int npx_traps_while_probing;
alias_for_inthand_t probetrap;
@@ -309,6 +307,69 @@
return (hw_float);
}
+static void
+npxsave_xsaveopt(union savefpu *addr)
+{
+
+ xsaveopt((char *)addr, xsave_mask);
+}
+
+static void
+fpusave_xsave(union savefpu *addr)
+{
+
+ xsave((char *)addr, xsave_mask);
+}
+
+static void
+fpusave_fxsave(union savefpu *addr)
+{
+
+ fxsave((char *)addr);
+}
+
+static void
+fpusave_fnsave(union savefpu *addr)
+{
+
+ fnsave((char *)addr);
+}
+
+static void
+init_xsave(void)
+{
+
+ if (use_xsave)
+ return;
+ if (!cpu_fxsr || (cpu_feature2 & CPUID2_XSAVE) == 0)
+ return;
+ use_xsave = 1;
+ TUNABLE_INT_FETCH("hw.use_xsave", &use_xsave);
+}
+
+DEFINE_IFUNC(, void, npxsave_core, (union savefpu *), static)
+{
+
+ init_xsave();
+ if (use_xsave)
+ return ((cpu_stdext_feature & CPUID_EXTSTATE_XSAVEOPT) != 0 ?
+ npxsave_xsaveopt : fpusave_xsave);
+ if (cpu_fxsr)
+ return (fpusave_fxsave);
+ return (fpusave_fnsave);
+}
+
+DEFINE_IFUNC(, void, fpusave, (union savefpu *), static)
+{
+
+ init_xsave();
+ if (use_xsave)
+ return (fpusave_xsave);
+ if (cpu_fxsr)
+ return (fpusave_fxsave);
+ return (fpusave_fnsave);
+}
+
/*
* Enable XSAVE if supported and allowed by user.
* Calculate the xsave_mask.
@@ -319,13 +380,8 @@
u_int cp[4];
uint64_t xsave_mask_user;
- if (cpu_fxsr && (cpu_feature2 & CPUID2_XSAVE) != 0) {
- use_xsave = 1;
- TUNABLE_INT_FETCH("hw.use_xsave", &use_xsave);
- }
if (!use_xsave)
return;
-
cpuid_count(0xd, 0x0, cp);
xsave_mask = XFEATURE_ENABLED_X87 | XFEATURE_ENABLED_SSE;
if ((cp[0] & xsave_mask) != xsave_mask)
@@ -339,14 +395,9 @@
xsave_mask &= ~XFEATURE_AVX512;
if ((xsave_mask & XFEATURE_MPX) != XFEATURE_MPX)
xsave_mask &= ~XFEATURE_MPX;
-
- cpuid_count(0xd, 0x1, cp);
- if ((cp[0] & CPUID_EXTSTATE_XSAVEOPT) != 0)
- use_xsaveopt = 1;
}
/*
-
* Calculate the fpu save area size.
*/
static void
@@ -852,15 +903,11 @@
* npxsave() atomically with checking fpcurthread.
*/
void
-npxsave(addr)
- union savefpu *addr;
+npxsave(union savefpu *addr)
{
stop_emulating();
- if (use_xsaveopt)
- xsaveopt((char *)addr, xsave_mask);
- else
- fpusave(addr);
+ npxsave_core(addr);
start_emulating();
PCPU_SET(fpcurthread, NULL);
}
@@ -1072,19 +1119,6 @@
return (0);
}
-static void
-fpusave(addr)
- union savefpu *addr;
-{
-
- if (use_xsave)
- xsave((char *)addr, xsave_mask);
- else if (cpu_fxsr)
- fxsave(addr);
- else
- fnsave(addr);
-}
-
static void
npx_fill_fpregs_xmm1(struct savexmm *sv_xmm, struct save87 *sv_87)
{
Index: sys/i386/i386/pmap.c
===================================================================
--- sys/i386/i386/pmap.c
+++ sys/i386/i386/pmap.c
@@ -148,6 +148,7 @@
#include <machine/intr_machdep.h>
#include <x86/apicvar.h>
#endif
+#include <x86/ifunc.h>
#include <machine/bootinfo.h>
#include <machine/cpu.h>
#include <machine/cputypes.h>
@@ -305,6 +306,10 @@
vm_page_t m, vm_prot_t prot, vm_page_t mpte);
static void pmap_flush_page(vm_page_t m);
static int pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte);
+static void pmap_invalidate_cache_range_selfsnoop(vm_offset_t sva,
+ vm_offset_t eva);
+static void pmap_invalidate_cache_range_all(vm_offset_t sva,
+ vm_offset_t eva);
static void pmap_invalidate_pde_page(pmap_t pmap, vm_offset_t va,
pd_entry_t pde);
static void pmap_fill_ptp(pt_entry_t *firstpte, pt_entry_t newpte);
@@ -1366,37 +1371,55 @@
pmap_invalidate_page(pmap, va);
}
+DEFINE_IFUNC(, void, pmap_invalidate_cache_range, (vm_offset_t, vm_offset_t),
+ static)
+{
+
+ if ((cpu_feature & CPUID_SS) != 0)
+ return (pmap_invalidate_cache_range_selfsnoop);
+ if ((cpu_feature & CPUID_CLFSH) != 0)
+ return (pmap_force_invalidate_cache_range);
+ return (pmap_invalidate_cache_range_all);
+}
+
#define PMAP_CLFLUSH_THRESHOLD (2 * 1024 * 1024)
-void
-pmap_invalidate_cache_range(vm_offset_t sva, vm_offset_t eva, boolean_t force)
+static void
+pmap_invalidate_cache_range_selfsnoop(vm_offset_t sva, vm_offset_t eva)
{
- if (force) {
- sva &= ~(vm_offset_t)(cpu_clflush_line_size - 1);
- } else {
- KASSERT((sva & PAGE_MASK) == 0,
- ("pmap_invalidate_cache_range: sva not page-aligned"));
- KASSERT((eva & PAGE_MASK) == 0,
- ("pmap_invalidate_cache_range: eva not page-aligned"));
- }
+ KASSERT((sva & PAGE_MASK) == 0,
+ ("pmap_invalidate_cache_range: sva not page-aligned"));
+ KASSERT((eva & PAGE_MASK) == 0,
+ ("pmap_invalidate_cache_range: eva not page-aligned"));
+}
- if ((cpu_feature & CPUID_SS) != 0 && !force)
- ; /* If "Self Snoop" is supported and allowed, do nothing. */
- else if ((cpu_stdext_feature & CPUID_STDEXT_CLFLUSHOPT) != 0 &&
- eva - sva < PMAP_CLFLUSH_THRESHOLD) {
-#ifdef DEV_APIC
+void
+pmap_force_invalidate_cache_range(vm_offset_t sva, vm_offset_t eva)
+{
+
+ sva &= ~(vm_offset_t)(cpu_clflush_line_size - 1);
+ if (eva - sva >= PMAP_CLFLUSH_THRESHOLD) {
/*
- * XXX: Some CPUs fault, hang, or trash the local APIC
- * registers if we use CLFLUSH on the local APIC
- * range. The local APIC is always uncached, so we
- * don't need to flush for that range anyway.
+ * The supplied range is bigger than 2MB.
+ * Globally invalidate cache.
*/
- if (pmap_kextract(sva) == lapic_paddr)
- return;
-#endif
+ pmap_invalidate_cache();
+ return;
+ }
+
+ /*
+ * XXX: Some CPUs fault, hang, or trash the local APIC
+ * registers if we use CLFLUSH on the local APIC
+ * range. The local APIC is always uncached, so we
+ * don't need to flush for that range anyway.
+ */
+ if (pmap_kextract(sva) == lapic_paddr)
+ return;
+
+ if ((cpu_stdext_feature & CPUID_STDEXT_CLFLUSHOPT) != 0) {
/*
- * Otherwise, do per-cache line flush. Use the sfence
+ * Do per-cache line flush. Use the sfence
* instruction to insure that previous stores are
* included in the write-back. The processor
* propagates flush to other processors in the cache
@@ -1406,12 +1429,7 @@
for (; sva < eva; sva += cpu_clflush_line_size)
clflushopt(sva);
sfence();
- } else if ((cpu_feature & CPUID_CLFSH) != 0 &&
- eva - sva < PMAP_CLFLUSH_THRESHOLD) {
-#ifdef DEV_APIC
- if (pmap_kextract(sva) == lapic_paddr)
- return;
-#endif
+ } else {
/*
* Writes are ordered by CLFLUSH on Intel CPUs.
*/
@@ -1421,17 +1439,17 @@
clflush(sva);
if (cpu_vendor_id != CPU_VENDOR_INTEL)
mfence();
- } else {
-
- /*
- * No targeted cache flush methods are supported by CPU,
- * or the supplied range is bigger than 2MB.
- * Globally invalidate cache.
- */
- pmap_invalidate_cache();
}
}
+static void
+pmap_invalidate_cache_range_all(vm_offset_t sva, vm_offset_t eva)
+{
+
+ pmap_invalidate_cache_range_selfsnoop(sva, eva);
+ pmap_invalidate_cache();
+}
+
void
pmap_invalidate_cache_pages(vm_page_t *pages, int count)
{
@@ -5177,7 +5195,7 @@
for (tmpsize = 0; tmpsize < size; tmpsize += PAGE_SIZE)
pmap_kenter_attr(va + tmpsize, pa + tmpsize, mode);
pmap_invalidate_range(kernel_pmap, va, va + tmpsize);
- pmap_invalidate_cache_range(va, va + size, FALSE);
+ pmap_invalidate_cache_range(va, va + size);
return ((void *)(va + offset));
}
@@ -5415,7 +5433,7 @@
*/
if (changed) {
pmap_invalidate_range(kernel_pmap, base, tmpva);
- pmap_invalidate_cache_range(base, tmpva, FALSE);
+ pmap_invalidate_cache_range(base, tmpva);
}
return (0);
}
Index: sys/i386/i386/vm_machdep.c
===================================================================
--- sys/i386/i386/vm_machdep.c
+++ sys/i386/i386/vm_machdep.c
@@ -650,7 +650,7 @@
* settings are recalculated.
*/
pmap_qenter(sf->kva, &m, 1);
- pmap_invalidate_cache_range(sf->kva, sf->kva + PAGE_SIZE, FALSE);
+ pmap_invalidate_cache_range(sf->kva, sf->kva + PAGE_SIZE);
}
/*
Index: sys/i386/include/pmap.h
===================================================================
--- sys/i386/include/pmap.h
+++ sys/i386/include/pmap.h
@@ -394,8 +394,8 @@
void pmap_invalidate_all(pmap_t);
void pmap_invalidate_cache(void);
void pmap_invalidate_cache_pages(vm_page_t *pages, int count);
-void pmap_invalidate_cache_range(vm_offset_t sva, vm_offset_t eva,
- boolean_t force);
+void pmap_invalidate_cache_range(vm_offset_t sva, vm_offset_t eva);
+void pmap_force_invalidate_cache_range(vm_offset_t sva, vm_offset_t eva);
void *pmap_trm_alloc(size_t size, int flags);
void pmap_trm_free(void *addr, size_t size);
Index: sys/x86/iommu/intel_utils.c
===================================================================
--- sys/x86/iommu/intel_utils.c
+++ sys/x86/iommu/intel_utils.c
@@ -368,8 +368,7 @@
* If DMAR does not snoop paging structures accesses, flush
* CPU cache to memory.
*/
- pmap_invalidate_cache_range((uintptr_t)dst, (uintptr_t)dst + sz,
- TRUE);
+ pmap_force_invalidate_cache_range((uintptr_t)dst, (uintptr_t)dst + sz);
}
void

File Metadata

Mime Type
text/plain
Expires
Sun, Oct 19, 10:28 PM (2 h, 54 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
23944870
Default Alt Text
D13838.id42363.diff (40 KB)

Event Timeline