Page MenuHomeFreeBSD

D25483.id73818.diff
No OneTemporary

D25483.id73818.diff

Index: sys/amd64/amd64/exception.S
===================================================================
--- sys/amd64/amd64/exception.S
+++ sys/amd64/amd64/exception.S
@@ -47,6 +47,7 @@
#include <machine/asmacros.h>
#include <machine/trap.h>
#include <machine/specialreg.h>
+#include <machine/pmap.h>
#ifdef KDTRACE_HOOKS
.bss
@@ -607,8 +608,10 @@
cmpq $~0,PCPU(UCR3)
je 2f
movq PCPU(UCR3),%r9
+ andq PCPU(UCR3_LOAD_MASK),%r9
movq %r9,%cr3
2: xorl %r9d,%r9d
+ movq $PMAP_UCR3_NOMASK,PCPU(UCR3_LOAD_MASK)
swapgs
sysretq
@@ -1262,6 +1265,8 @@
movq TF_SS(%rsp),%rax
movq %rax,PTI_SS(%rdx)
movq PCPU(UCR3),%rax
+ andq PCPU(UCR3_LOAD_MASK),%rax
+ movq $PMAP_UCR3_NOMASK,PCPU(UCR3_LOAD_MASK)
swapgs
movq %rdx,%rsp
movq %rax,%cr3
Index: sys/amd64/amd64/genassym.c
===================================================================
--- sys/amd64/amd64/genassym.c
+++ sys/amd64/amd64/genassym.c
@@ -230,6 +230,7 @@
ASSYM(PC_PM_SAVE_CNT, offsetof(struct pcpu, pc_pm_save_cnt));
ASSYM(PC_KCR3, offsetof(struct pcpu, pc_kcr3));
ASSYM(PC_UCR3, offsetof(struct pcpu, pc_ucr3));
+ASSYM(PC_UCR3_LOAD_MASK, offsetof(struct pcpu, pc_ucr3_load_mask));
ASSYM(PC_SAVED_UCR3, offsetof(struct pcpu, pc_saved_ucr3));
ASSYM(PC_PTI_STACK, offsetof(struct pcpu, pc_pti_stack));
ASSYM(PC_PTI_STACK_SZ, PC_PTI_STACK_SZ);
Index: sys/amd64/amd64/machdep.c
===================================================================
--- sys/amd64/amd64/machdep.c
+++ sys/amd64/amd64/machdep.c
@@ -1555,6 +1555,7 @@
PCPU_SET(ldt, (struct system_segment_descriptor *)&gdt[GUSERLDT_SEL]);
PCPU_SET(fs32p, &gdt[GUFS32_SEL]);
PCPU_SET(gs32p, &gdt[GUGS32_SEL]);
+ PCPU_SET(ucr3_load_mask, PMAP_UCR3_NOMASK);
}
void
Index: sys/amd64/amd64/mp_machdep.c
===================================================================
--- sys/amd64/amd64/mp_machdep.c
+++ sys/amd64/amd64/mp_machdep.c
@@ -310,6 +310,7 @@
pc->pc_fs32p = &gdt[GUFS32_SEL];
pc->pc_gs32p = &gdt[GUGS32_SEL];
pc->pc_ldt = (struct system_segment_descriptor *)&gdt[GUSERLDT_SEL];
+ pc->pc_ucr3_load_mask = PMAP_UCR3_NOMASK;
/* See comment in pmap_bootstrap(). */
pc->pc_pcid_next = PMAP_PCID_KERN + 2;
pc->pc_pcid_gen = 1;
@@ -591,8 +592,8 @@
invpcid(&d, INVPCID_CTXGLOB);
} else {
invpcid(&d, INVPCID_CTX);
- d.pcid |= PMAP_PCID_USER_PT;
- invpcid(&d, INVPCID_CTX);
+ if (smp_tlb_pmap == PCPU_GET(curpmap))
+ PCPU_SET(ucr3_load_mask, ~CR3_PCID_SAVE);
}
PCPU_SET(smp_tlb_done, generation);
}
@@ -625,11 +626,9 @@
pcid = smp_tlb_pmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid;
kcr3 = smp_tlb_pmap->pm_cr3 | pcid;
ucr3 = smp_tlb_pmap->pm_ucr3;
- if (ucr3 != PMAP_NO_CR3) {
- ucr3 |= PMAP_PCID_USER_PT | pcid;
- pmap_pti_pcid_invalidate(ucr3, kcr3);
- } else
- load_cr3(kcr3);
+ load_cr3(kcr3);
+ if (ucr3 != PMAP_NO_CR3)
+ PCPU_SET(ucr3_load_mask, ~CR3_PCID_SAVE);
}
}
PCPU_SET(smp_tlb_done, generation);
@@ -650,7 +649,9 @@
generation = smp_tlb_generation; /* Overlap with serialization */
invlpg(smp_tlb_addr1);
- if (smp_tlb_pmap->pm_ucr3 != PMAP_NO_CR3) {
+ if (smp_tlb_pmap == PCPU_GET(curpmap) &&
+ smp_tlb_pmap->pm_ucr3 != PMAP_NO_CR3 &&
+ PCPU_GET(ucr3_load_mask) == PMAP_UCR3_NOMASK) {
d.pcid = smp_tlb_pmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid |
PMAP_PCID_USER_PT;
d.pad = 0;
@@ -677,7 +678,8 @@
generation = smp_tlb_generation; /* Overlap with serialization */
invlpg(smp_tlb_addr1);
if (smp_tlb_pmap == PCPU_GET(curpmap) &&
- (ucr3 = smp_tlb_pmap->pm_ucr3) != PMAP_NO_CR3) {
+ (ucr3 = smp_tlb_pmap->pm_ucr3) != PMAP_NO_CR3 &&
+ PCPU_GET(ucr3_load_mask) == PMAP_UCR3_NOMASK) {
pcid = smp_tlb_pmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid;
kcr3 = smp_tlb_pmap->pm_cr3 | pcid | CR3_PCID_SAVE;
ucr3 |= pcid | PMAP_PCID_USER_PT | CR3_PCID_SAVE;
@@ -707,7 +709,9 @@
invlpg(addr);
addr += PAGE_SIZE;
} while (addr < addr2);
- if (smp_tlb_pmap->pm_ucr3 != PMAP_NO_CR3) {
+ if (smp_tlb_pmap == PCPU_GET(curpmap) &&
+ smp_tlb_pmap->pm_ucr3 != PMAP_NO_CR3 &&
+ PCPU_GET(ucr3_load_mask) == PMAP_UCR3_NOMASK) {
d.pcid = smp_tlb_pmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid |
PMAP_PCID_USER_PT;
d.pad = 0;
@@ -743,7 +747,8 @@
addr += PAGE_SIZE;
} while (addr < addr2);
if (smp_tlb_pmap == PCPU_GET(curpmap) &&
- (ucr3 = smp_tlb_pmap->pm_ucr3) != PMAP_NO_CR3) {
+ (ucr3 = smp_tlb_pmap->pm_ucr3) != PMAP_NO_CR3 &&
+ PCPU_GET(ucr3_load_mask) == PMAP_UCR3_NOMASK) {
pcid = smp_tlb_pmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid;
kcr3 = smp_tlb_pmap->pm_cr3 | pcid | CR3_PCID_SAVE;
ucr3 |= pcid | PMAP_PCID_USER_PT | CR3_PCID_SAVE;
Index: sys/amd64/amd64/pmap.c
===================================================================
--- sys/amd64/amd64/pmap.c
+++ sys/amd64/amd64/pmap.c
@@ -2520,7 +2520,17 @@
cpuid = PCPU_GET(cpuid);
if (pmap == PCPU_GET(curpmap)) {
- if (pmap->pm_ucr3 != PMAP_NO_CR3) {
+ if (pmap->pm_ucr3 != PMAP_NO_CR3 &&
+ /*
+ * If we context-switched right after
+ * PCPU_GET(ucr3_load_mask), we could read the
+ * ~CR3_PCID_SAVE mask, which causes us to skip
+ * the code below to invalidate user pages. This
+ * is handled in pmap_activate_sw_pcid_pti() by
+ * clearing pm_gen if ucr3_load_mask is not
+ * trivial.
+ */
+ PCPU_GET(ucr3_load_mask) == PMAP_UCR3_NOMASK) {
/*
* Because pm_pcid is recalculated on a
* context switch, we must disable switching.
@@ -2635,7 +2645,8 @@
cpuid = PCPU_GET(cpuid);
if (pmap == PCPU_GET(curpmap)) {
- if (pmap->pm_ucr3 != PMAP_NO_CR3) {
+ if (pmap->pm_ucr3 != PMAP_NO_CR3 &&
+ PCPU_GET(ucr3_load_mask) == PMAP_UCR3_NOMASK) {
critical_enter();
pcid = pmap->pm_pcids[cpuid].pm_pcid;
if (invpcid_works1) {
@@ -2757,19 +2768,14 @@
d.pad = 0;
d.addr = 0;
invpcid(&d, INVPCID_CTX);
- if (pmap->pm_ucr3 != PMAP_NO_CR3) {
- d.pcid |= PMAP_PCID_USER_PT;
- invpcid(&d, INVPCID_CTX);
- }
+ if (pmap->pm_ucr3 != PMAP_NO_CR3)
+ PCPU_SET(ucr3_load_mask, ~CR3_PCID_SAVE);
} else {
kcr3 = pmap->pm_cr3 | pcid;
ucr3 = pmap->pm_ucr3;
- if (ucr3 != PMAP_NO_CR3) {
- ucr3 |= pcid | PMAP_PCID_USER_PT;
- pmap_pti_pcid_invalidate(ucr3, kcr3);
- } else {
- load_cr3(kcr3);
- }
+ load_cr3(kcr3);
+ if (ucr3 != PMAP_NO_CR3)
+ PCPU_SET(ucr3_load_mask, ~CR3_PCID_SAVE);
}
critical_exit();
} else
@@ -8814,12 +8820,22 @@
PCPU_GET(pti_rsp0) : (uintptr_t)td->td_md.md_stack_base;
}
-static void inline
-pmap_activate_sw_pcid_pti(pmap_t pmap, u_int cpuid, const bool invpcid_works1)
+static void
+pmap_activate_sw_pcid_pti(struct thread *td, pmap_t pmap, u_int cpuid)
{
- struct invpcid_descr d;
+ pmap_t old_pmap;
uint64_t cached, cr3, kcr3, ucr3;
+ KASSERT((read_rflags() & PSL_I) == 0,
+ ("PCID needs interrupts disabled in pmap_activate_sw()"));
+
+ if (PCPU_GET(ucr3_load_mask) != PMAP_UCR3_NOMASK) {
+ PCPU_SET(ucr3_load_mask, PMAP_UCR3_NOMASK);
+ old_pmap = PCPU_GET(curpmap);
+ MPASS(old_pmap->pm_ucr3 != PMAP_NO_CR3);
+ old_pmap->pm_pcids[cpuid].pm_gen = 0;
+ }
+
cached = pmap_pcid_alloc_checked(pmap, cpuid);
cr3 = rcr3();
if ((cr3 & ~CR3_PCID_MASK) != pmap->pm_cr3)
@@ -8829,68 +8845,14 @@
ucr3 = pmap->pm_ucr3 | pmap->pm_pcids[cpuid].pm_pcid |
PMAP_PCID_USER_PT;
- if (!cached && pmap->pm_ucr3 != PMAP_NO_CR3) {
- /*
- * Explicitly invalidate translations cached from the
- * user page table. They are not automatically
- * flushed by reload of cr3 with the kernel page table
- * pointer above.
- *
- * Note that the if() condition is resolved statically
- * by using the function argument instead of
- * runtime-evaluated invpcid_works value.
- */
- if (invpcid_works1) {
- d.pcid = PMAP_PCID_USER_PT |
- pmap->pm_pcids[cpuid].pm_pcid;
- d.pad = 0;
- d.addr = 0;
- invpcid(&d, INVPCID_CTX);
- } else {
- pmap_pti_pcid_invalidate(ucr3, kcr3);
- }
- }
+ if (!cached && pmap->pm_ucr3 != PMAP_NO_CR3)
+ PCPU_SET(ucr3_load_mask, ~CR3_PCID_SAVE);
PCPU_SET(kcr3, kcr3 | CR3_PCID_SAVE);
PCPU_SET(ucr3, ucr3 | CR3_PCID_SAVE);
if (cached)
PCPU_INC(pm_save_cnt);
-}
-static void
-pmap_activate_sw_pcid_invpcid_pti(struct thread *td, pmap_t pmap, u_int cpuid)
-{
-
- pmap_activate_sw_pcid_pti(pmap, cpuid, true);
- pmap_activate_sw_pti_post(td, pmap);
-}
-
-static void
-pmap_activate_sw_pcid_noinvpcid_pti(struct thread *td, pmap_t pmap,
- u_int cpuid)
-{
- register_t rflags;
-
- /*
- * If the INVPCID instruction is not available,
- * invltlb_pcid_handler() is used to handle an invalidate_all
- * IPI, which checks for curpmap == smp_tlb_pmap. The below
- * sequence of operations has a window where %CR3 is loaded
- * with the new pmap's PML4 address, but the curpmap value has
- * not yet been updated. This causes the invltlb IPI handler,
- * which is called between the updates, to execute as a NOP,
- * which leaves stale TLB entries.
- *
- * Note that the most typical use of pmap_activate_sw(), from
- * the context switch, is immune to this race, because
- * interrupts are disabled (while the thread lock is owned),
- * and the IPI happens after curpmap is updated. Protect
- * other callers in a similar way, by disabling interrupts
- * around the %cr3 register reload and curpmap assignment.
- */
- rflags = intr_disable();
- pmap_activate_sw_pcid_pti(pmap, cpuid, false);
- intr_restore(rflags);
pmap_activate_sw_pti_post(td, pmap);
}
@@ -8900,6 +8862,9 @@
{
uint64_t cached, cr3;
+ KASSERT((read_rflags() & PSL_I) == 0,
+ ("PCID needs interrupts disabled in pmap_activate_sw()"));
+
cached = pmap_pcid_alloc_checked(pmap, cpuid);
cr3 = rcr3();
if (!cached || (cr3 & ~CR3_PCID_MASK) != pmap->pm_cr3)
@@ -8910,17 +8875,6 @@
PCPU_INC(pm_save_cnt);
}
-static void
-pmap_activate_sw_pcid_noinvpcid_nopti(struct thread *td __unused, pmap_t pmap,
- u_int cpuid)
-{
- register_t rflags;
-
- rflags = intr_disable();
- pmap_activate_sw_pcid_nopti(td, pmap, cpuid);
- intr_restore(rflags);
-}
-
static void
pmap_activate_sw_nopcid_nopti(struct thread *td __unused, pmap_t pmap,
u_int cpuid __unused)
@@ -8945,14 +8899,10 @@
u_int))
{
- if (pmap_pcid_enabled && pti && invpcid_works)
- return (pmap_activate_sw_pcid_invpcid_pti);
- else if (pmap_pcid_enabled && pti && !invpcid_works)
- return (pmap_activate_sw_pcid_noinvpcid_pti);
- else if (pmap_pcid_enabled && !pti && invpcid_works)
+ if (pmap_pcid_enabled && pti)
+ return (pmap_activate_sw_pcid_pti);
+ else if (pmap_pcid_enabled && !pti)
return (pmap_activate_sw_pcid_nopti);
- else if (pmap_pcid_enabled && !pti && !invpcid_works)
- return (pmap_activate_sw_pcid_noinvpcid_nopti);
else if (!pmap_pcid_enabled && pti)
return (pmap_activate_sw_nopcid_pti);
else /* if (!pmap_pcid_enabled && !pti) */
@@ -8989,10 +8939,26 @@
void
pmap_activate(struct thread *td)
{
-
- critical_enter();
+ /*
+ * invltlb_{invpcid,}_pcid_handler() is used to handle an
+ * invalidate_all IPI, which checks for curpmap ==
+ * smp_tlb_pmap. The below sequence of operations has a
+ * window where %CR3 is loaded with the new pmap's PML4
+ * address, but the curpmap value has not yet been updated.
+ * This causes the invltlb IPI handler, which is called
+ * between the updates, to execute as a NOP, which leaves
+ * stale TLB entries.
+ *
+ * Note that the most typical use of pmap_activate_sw(), from
+ * the context switch, is immune to this race, because
+ * interrupts are disabled (while the thread lock is owned),
+ * and the IPI happens after curpmap is updated. Protect
+ * other callers in a similar way, by disabling interrupts
+ * around the %cr3 register reload and curpmap assignment.
+ */
+ spinlock_enter();
pmap_activate_sw(td);
- critical_exit();
+ spinlock_exit();
}
void
Index: sys/amd64/include/pcpu.h
===================================================================
--- sys/amd64/include/pcpu.h
+++ sys/amd64/include/pcpu.h
@@ -94,7 +94,8 @@
u_int pc_ipi_bitmap; \
struct amd64tss pc_common_tss; \
struct user_segment_descriptor pc_gdt[NGDT]; \
- char __pad[2956] /* pad to UMA_PCPU_ALLOC_SIZE */
+ uint64_t pc_ucr3_load_mask; \
+ char __pad[2948] /* pad to UMA_PCPU_ALLOC_SIZE */
#define PC_DBREG_CMD_NONE 0
#define PC_DBREG_CMD_LOAD 1
Index: sys/amd64/include/pmap.h
===================================================================
--- sys/amd64/include/pmap.h
+++ sys/amd64/include/pmap.h
@@ -241,6 +241,7 @@
#define PMAP_PCID_USER_PT 0x800
#define PMAP_NO_CR3 (~0UL)
+#define PMAP_UCR3_NOMASK (~0UL)
#ifndef LOCORE

File Metadata

Mime Type
text/plain
Expires
Sun, Feb 8, 11:13 PM (21 h, 18 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
28508845
Default Alt Text
D25483.id73818.diff (12 KB)

Event Timeline