Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F144299225
D25483.id74271.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
12 KB
Referenced Files
None
Subscribers
None
D25483.id74271.diff
View Options
Index: sys/amd64/amd64/exception.S
===================================================================
--- sys/amd64/amd64/exception.S
+++ sys/amd64/amd64/exception.S
@@ -47,6 +47,7 @@
#include <machine/asmacros.h>
#include <machine/trap.h>
#include <machine/specialreg.h>
+#include <machine/pmap.h>
#ifdef KDTRACE_HOOKS
.bss
@@ -607,8 +608,10 @@
cmpq $~0,PCPU(UCR3)
je 2f
movq PCPU(UCR3),%r9
+ andq PCPU(UCR3_LOAD_MASK),%r9
movq %r9,%cr3
2: xorl %r9d,%r9d
+ movq $PMAP_UCR3_NOMASK,PCPU(UCR3_LOAD_MASK)
swapgs
sysretq
@@ -1262,6 +1265,8 @@
movq TF_SS(%rsp),%rax
movq %rax,PTI_SS(%rdx)
movq PCPU(UCR3),%rax
+ andq PCPU(UCR3_LOAD_MASK),%rax
+ movq $PMAP_UCR3_NOMASK,PCPU(UCR3_LOAD_MASK)
swapgs
movq %rdx,%rsp
movq %rax,%cr3
Index: sys/amd64/amd64/genassym.c
===================================================================
--- sys/amd64/amd64/genassym.c
+++ sys/amd64/amd64/genassym.c
@@ -230,6 +230,7 @@
ASSYM(PC_PM_SAVE_CNT, offsetof(struct pcpu, pc_pm_save_cnt));
ASSYM(PC_KCR3, offsetof(struct pcpu, pc_kcr3));
ASSYM(PC_UCR3, offsetof(struct pcpu, pc_ucr3));
+ASSYM(PC_UCR3_LOAD_MASK, offsetof(struct pcpu, pc_ucr3_load_mask));
ASSYM(PC_SAVED_UCR3, offsetof(struct pcpu, pc_saved_ucr3));
ASSYM(PC_PTI_STACK, offsetof(struct pcpu, pc_pti_stack));
ASSYM(PC_PTI_STACK_SZ, PC_PTI_STACK_SZ);
Index: sys/amd64/amd64/machdep.c
===================================================================
--- sys/amd64/amd64/machdep.c
+++ sys/amd64/amd64/machdep.c
@@ -1555,6 +1555,7 @@
PCPU_SET(ldt, (struct system_segment_descriptor *)&gdt[GUSERLDT_SEL]);
PCPU_SET(fs32p, &gdt[GUFS32_SEL]);
PCPU_SET(gs32p, &gdt[GUGS32_SEL]);
+ PCPU_SET(ucr3_load_mask, PMAP_UCR3_NOMASK);
}
void
Index: sys/amd64/amd64/mp_machdep.c
===================================================================
--- sys/amd64/amd64/mp_machdep.c
+++ sys/amd64/amd64/mp_machdep.c
@@ -310,6 +310,7 @@
pc->pc_fs32p = &gdt[GUFS32_SEL];
pc->pc_gs32p = &gdt[GUGS32_SEL];
pc->pc_ldt = (struct system_segment_descriptor *)&gdt[GUSERLDT_SEL];
+ pc->pc_ucr3_load_mask = PMAP_UCR3_NOMASK;
/* See comment in pmap_bootstrap(). */
pc->pc_pcid_next = PMAP_PCID_KERN + 2;
pc->pc_pcid_gen = 1;
@@ -591,8 +592,8 @@
invpcid(&d, INVPCID_CTXGLOB);
} else {
invpcid(&d, INVPCID_CTX);
- d.pcid |= PMAP_PCID_USER_PT;
- invpcid(&d, INVPCID_CTX);
+ if (smp_tlb_pmap == PCPU_GET(curpmap))
+ PCPU_SET(ucr3_load_mask, ~CR3_PCID_SAVE);
}
PCPU_SET(smp_tlb_done, generation);
}
@@ -600,7 +601,6 @@
void
invltlb_pcid_handler(void)
{
- uint64_t kcr3, ucr3;
uint32_t generation, pcid;
#ifdef COUNT_XINVLTLB_HITS
@@ -621,15 +621,11 @@
* invalidation when switching to the pmap on this
* CPU.
*/
- if (PCPU_GET(curpmap) == smp_tlb_pmap) {
+ if (smp_tlb_pmap == PCPU_GET(curpmap)) {
pcid = smp_tlb_pmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid;
- kcr3 = smp_tlb_pmap->pm_cr3 | pcid;
- ucr3 = smp_tlb_pmap->pm_ucr3;
- if (ucr3 != PMAP_NO_CR3) {
- ucr3 |= PMAP_PCID_USER_PT | pcid;
- pmap_pti_pcid_invalidate(ucr3, kcr3);
- } else
- load_cr3(kcr3);
+ load_cr3(smp_tlb_pmap->pm_cr3 | pcid);
+ if (smp_tlb_pmap->pm_ucr3 != PMAP_NO_CR3)
+ PCPU_SET(ucr3_load_mask, ~CR3_PCID_SAVE);
}
}
PCPU_SET(smp_tlb_done, generation);
@@ -650,7 +646,9 @@
generation = smp_tlb_generation; /* Overlap with serialization */
invlpg(smp_tlb_addr1);
- if (smp_tlb_pmap->pm_ucr3 != PMAP_NO_CR3) {
+ if (smp_tlb_pmap == PCPU_GET(curpmap) &&
+ smp_tlb_pmap->pm_ucr3 != PMAP_NO_CR3 &&
+ PCPU_GET(ucr3_load_mask) == PMAP_UCR3_NOMASK) {
d.pcid = smp_tlb_pmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid |
PMAP_PCID_USER_PT;
d.pad = 0;
@@ -677,7 +675,8 @@
generation = smp_tlb_generation; /* Overlap with serialization */
invlpg(smp_tlb_addr1);
if (smp_tlb_pmap == PCPU_GET(curpmap) &&
- (ucr3 = smp_tlb_pmap->pm_ucr3) != PMAP_NO_CR3) {
+ (ucr3 = smp_tlb_pmap->pm_ucr3) != PMAP_NO_CR3 &&
+ PCPU_GET(ucr3_load_mask) == PMAP_UCR3_NOMASK) {
pcid = smp_tlb_pmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid;
kcr3 = smp_tlb_pmap->pm_cr3 | pcid | CR3_PCID_SAVE;
ucr3 |= pcid | PMAP_PCID_USER_PT | CR3_PCID_SAVE;
@@ -707,7 +706,9 @@
invlpg(addr);
addr += PAGE_SIZE;
} while (addr < addr2);
- if (smp_tlb_pmap->pm_ucr3 != PMAP_NO_CR3) {
+ if (smp_tlb_pmap == PCPU_GET(curpmap) &&
+ smp_tlb_pmap->pm_ucr3 != PMAP_NO_CR3 &&
+ PCPU_GET(ucr3_load_mask) == PMAP_UCR3_NOMASK) {
d.pcid = smp_tlb_pmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid |
PMAP_PCID_USER_PT;
d.pad = 0;
@@ -743,7 +744,8 @@
addr += PAGE_SIZE;
} while (addr < addr2);
if (smp_tlb_pmap == PCPU_GET(curpmap) &&
- (ucr3 = smp_tlb_pmap->pm_ucr3) != PMAP_NO_CR3) {
+ (ucr3 = smp_tlb_pmap->pm_ucr3) != PMAP_NO_CR3 &&
+ PCPU_GET(ucr3_load_mask) == PMAP_UCR3_NOMASK) {
pcid = smp_tlb_pmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid;
kcr3 = smp_tlb_pmap->pm_cr3 | pcid | CR3_PCID_SAVE;
ucr3 |= pcid | PMAP_PCID_USER_PT | CR3_PCID_SAVE;
Index: sys/amd64/amd64/pmap.c
===================================================================
--- sys/amd64/amd64/pmap.c
+++ sys/amd64/amd64/pmap.c
@@ -2520,7 +2520,16 @@
cpuid = PCPU_GET(cpuid);
if (pmap == PCPU_GET(curpmap)) {
- if (pmap->pm_ucr3 != PMAP_NO_CR3) {
+ if (pmap->pm_ucr3 != PMAP_NO_CR3 &&
+ /*
+ * If we context-switched right after
+ * PCPU_GET(ucr3_load_mask), we could read the
+ * ~CR3_PCID_SAVE mask, which causes us to skip
+ * the code below to invalidate user pages. This
+ * is handled in pmap_activate_sw_pcid_pti() by
+ * clearing pm_gen if ucr3_load_mask is ~CR3_PCID_SAVE.
+ */
+ PCPU_GET(ucr3_load_mask) == PMAP_UCR3_NOMASK) {
/*
* Because pm_pcid is recalculated on a
* context switch, we must disable switching.
@@ -2635,7 +2644,8 @@
cpuid = PCPU_GET(cpuid);
if (pmap == PCPU_GET(curpmap)) {
- if (pmap->pm_ucr3 != PMAP_NO_CR3) {
+ if (pmap->pm_ucr3 != PMAP_NO_CR3 &&
+ PCPU_GET(ucr3_load_mask) == PMAP_UCR3_NOMASK) {
critical_enter();
pcid = pmap->pm_pcids[cpuid].pm_pcid;
if (invpcid_works1) {
@@ -2736,7 +2746,7 @@
pmap_invalidate_all_pcid(pmap_t pmap, bool invpcid_works1)
{
struct invpcid_descr d;
- uint64_t kcr3, ucr3;
+ uint64_t kcr3;
uint32_t pcid;
u_int cpuid, i;
@@ -2757,20 +2767,12 @@
d.pad = 0;
d.addr = 0;
invpcid(&d, INVPCID_CTX);
- if (pmap->pm_ucr3 != PMAP_NO_CR3) {
- d.pcid |= PMAP_PCID_USER_PT;
- invpcid(&d, INVPCID_CTX);
- }
} else {
kcr3 = pmap->pm_cr3 | pcid;
- ucr3 = pmap->pm_ucr3;
- if (ucr3 != PMAP_NO_CR3) {
- ucr3 |= pcid | PMAP_PCID_USER_PT;
- pmap_pti_pcid_invalidate(ucr3, kcr3);
- } else {
- load_cr3(kcr3);
- }
+ load_cr3(kcr3);
}
+ if (pmap->pm_ucr3 != PMAP_NO_CR3)
+ PCPU_SET(ucr3_load_mask, ~CR3_PCID_SAVE);
critical_exit();
} else
pmap->pm_pcids[cpuid].pm_gen = 0;
@@ -8814,12 +8816,23 @@
PCPU_GET(pti_rsp0) : (uintptr_t)td->td_md.md_stack_base;
}
-static void inline
-pmap_activate_sw_pcid_pti(pmap_t pmap, u_int cpuid, const bool invpcid_works1)
+static void
+pmap_activate_sw_pcid_pti(struct thread *td, pmap_t pmap, u_int cpuid)
{
- struct invpcid_descr d;
+ pmap_t old_pmap;
uint64_t cached, cr3, kcr3, ucr3;
+ KASSERT((read_rflags() & PSL_I) == 0,
+ ("PCID needs interrupts disabled in pmap_activate_sw()"));
+
+ /* See the comment in pmap_invalidate_page_pcid(). */
+ if (PCPU_GET(ucr3_load_mask) != PMAP_UCR3_NOMASK) {
+ PCPU_SET(ucr3_load_mask, PMAP_UCR3_NOMASK);
+ old_pmap = PCPU_GET(curpmap);
+ MPASS(old_pmap->pm_ucr3 != PMAP_NO_CR3);
+ old_pmap->pm_pcids[cpuid].pm_gen = 0;
+ }
+
cached = pmap_pcid_alloc_checked(pmap, cpuid);
cr3 = rcr3();
if ((cr3 & ~CR3_PCID_MASK) != pmap->pm_cr3)
@@ -8829,68 +8842,14 @@
ucr3 = pmap->pm_ucr3 | pmap->pm_pcids[cpuid].pm_pcid |
PMAP_PCID_USER_PT;
- if (!cached && pmap->pm_ucr3 != PMAP_NO_CR3) {
- /*
- * Explicitly invalidate translations cached from the
- * user page table. They are not automatically
- * flushed by reload of cr3 with the kernel page table
- * pointer above.
- *
- * Note that the if() condition is resolved statically
- * by using the function argument instead of
- * runtime-evaluated invpcid_works value.
- */
- if (invpcid_works1) {
- d.pcid = PMAP_PCID_USER_PT |
- pmap->pm_pcids[cpuid].pm_pcid;
- d.pad = 0;
- d.addr = 0;
- invpcid(&d, INVPCID_CTX);
- } else {
- pmap_pti_pcid_invalidate(ucr3, kcr3);
- }
- }
+ if (!cached && pmap->pm_ucr3 != PMAP_NO_CR3)
+ PCPU_SET(ucr3_load_mask, ~CR3_PCID_SAVE);
PCPU_SET(kcr3, kcr3 | CR3_PCID_SAVE);
PCPU_SET(ucr3, ucr3 | CR3_PCID_SAVE);
if (cached)
PCPU_INC(pm_save_cnt);
-}
-static void
-pmap_activate_sw_pcid_invpcid_pti(struct thread *td, pmap_t pmap, u_int cpuid)
-{
-
- pmap_activate_sw_pcid_pti(pmap, cpuid, true);
- pmap_activate_sw_pti_post(td, pmap);
-}
-
-static void
-pmap_activate_sw_pcid_noinvpcid_pti(struct thread *td, pmap_t pmap,
- u_int cpuid)
-{
- register_t rflags;
-
- /*
- * If the INVPCID instruction is not available,
- * invltlb_pcid_handler() is used to handle an invalidate_all
- * IPI, which checks for curpmap == smp_tlb_pmap. The below
- * sequence of operations has a window where %CR3 is loaded
- * with the new pmap's PML4 address, but the curpmap value has
- * not yet been updated. This causes the invltlb IPI handler,
- * which is called between the updates, to execute as a NOP,
- * which leaves stale TLB entries.
- *
- * Note that the most typical use of pmap_activate_sw(), from
- * the context switch, is immune to this race, because
- * interrupts are disabled (while the thread lock is owned),
- * and the IPI happens after curpmap is updated. Protect
- * other callers in a similar way, by disabling interrupts
- * around the %cr3 register reload and curpmap assignment.
- */
- rflags = intr_disable();
- pmap_activate_sw_pcid_pti(pmap, cpuid, false);
- intr_restore(rflags);
pmap_activate_sw_pti_post(td, pmap);
}
@@ -8900,6 +8859,9 @@
{
uint64_t cached, cr3;
+ KASSERT((read_rflags() & PSL_I) == 0,
+ ("PCID needs interrupts disabled in pmap_activate_sw()"));
+
cached = pmap_pcid_alloc_checked(pmap, cpuid);
cr3 = rcr3();
if (!cached || (cr3 & ~CR3_PCID_MASK) != pmap->pm_cr3)
@@ -8910,17 +8872,6 @@
PCPU_INC(pm_save_cnt);
}
-static void
-pmap_activate_sw_pcid_noinvpcid_nopti(struct thread *td __unused, pmap_t pmap,
- u_int cpuid)
-{
- register_t rflags;
-
- rflags = intr_disable();
- pmap_activate_sw_pcid_nopti(td, pmap, cpuid);
- intr_restore(rflags);
-}
-
static void
pmap_activate_sw_nopcid_nopti(struct thread *td __unused, pmap_t pmap,
u_int cpuid __unused)
@@ -8945,14 +8896,10 @@
u_int))
{
- if (pmap_pcid_enabled && pti && invpcid_works)
- return (pmap_activate_sw_pcid_invpcid_pti);
- else if (pmap_pcid_enabled && pti && !invpcid_works)
- return (pmap_activate_sw_pcid_noinvpcid_pti);
- else if (pmap_pcid_enabled && !pti && invpcid_works)
+ if (pmap_pcid_enabled && pti)
+ return (pmap_activate_sw_pcid_pti);
+ else if (pmap_pcid_enabled && !pti)
return (pmap_activate_sw_pcid_nopti);
- else if (pmap_pcid_enabled && !pti && !invpcid_works)
- return (pmap_activate_sw_pcid_noinvpcid_nopti);
else if (!pmap_pcid_enabled && pti)
return (pmap_activate_sw_nopcid_pti);
else /* if (!pmap_pcid_enabled && !pti) */
@@ -8989,10 +8936,26 @@
void
pmap_activate(struct thread *td)
{
-
- critical_enter();
+ /*
+ * invltlb_{invpcid,}_pcid_handler() is used to handle an
+ * invalidate_all IPI, which checks for curpmap ==
+ * smp_tlb_pmap. The below sequence of operations has a
+ * window where %CR3 is loaded with the new pmap's PML4
+ * address, but the curpmap value has not yet been updated.
+ * This causes the invltlb IPI handler, which is called
+ * between the updates, to execute as a NOP, which leaves
+ * stale TLB entries.
+ *
+ * Note that the most common use of pmap_activate_sw(), from
+ * a context switch, is immune to this race, because
+ * interrupts are disabled (while the thread lock is owned),
+ * so the IPI is delayed until after curpmap is updated. Protect
+ * other callers in a similar way, by disabling interrupts
+ * around the %cr3 register reload and curpmap assignment.
+ */
+ spinlock_enter();
pmap_activate_sw(td);
- critical_exit();
+ spinlock_exit();
}
void
Index: sys/amd64/include/pcpu.h
===================================================================
--- sys/amd64/include/pcpu.h
+++ sys/amd64/include/pcpu.h
@@ -94,7 +94,8 @@
u_int pc_ipi_bitmap; \
struct amd64tss pc_common_tss; \
struct user_segment_descriptor pc_gdt[NGDT]; \
- char __pad[2956] /* pad to UMA_PCPU_ALLOC_SIZE */
+ uint64_t pc_ucr3_load_mask; \
+ char __pad[2948] /* pad to UMA_PCPU_ALLOC_SIZE */
#define PC_DBREG_CMD_NONE 0
#define PC_DBREG_CMD_LOAD 1
Index: sys/amd64/include/pmap.h
===================================================================
--- sys/amd64/include/pmap.h
+++ sys/amd64/include/pmap.h
@@ -241,6 +241,7 @@
#define PMAP_PCID_USER_PT 0x800
#define PMAP_NO_CR3 (~0UL)
+#define PMAP_UCR3_NOMASK (~0UL)
#ifndef LOCORE
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Sun, Feb 8, 5:32 PM (14 h, 6 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
28478994
Default Alt Text
D25483.id74271.diff (12 KB)
Attached To
Mode
D25483: amd64 pmap: microoptimize local shootdowns for PCID PTI configurations
Attached
Detach File
Event Timeline
Log In to Comment