Page MenuHomeFreeBSD

D25483.id74630.diff
No OneTemporary

D25483.id74630.diff

Index: head/sys/amd64/amd64/exception.S
===================================================================
--- head/sys/amd64/amd64/exception.S
+++ head/sys/amd64/amd64/exception.S
@@ -47,6 +47,7 @@
#include <machine/asmacros.h>
#include <machine/trap.h>
#include <machine/specialreg.h>
+#include <machine/pmap.h>
#ifdef KDTRACE_HOOKS
.bss
@@ -607,8 +608,10 @@
cmpq $~0,PCPU(UCR3)
je 2f
movq PCPU(UCR3),%r9
+ andq PCPU(UCR3_LOAD_MASK),%r9
movq %r9,%cr3
2: xorl %r9d,%r9d
+ movq $PMAP_UCR3_NOMASK,PCPU(UCR3_LOAD_MASK)
swapgs
sysretq
@@ -1262,6 +1265,8 @@
movq TF_SS(%rsp),%rax
movq %rax,PTI_SS(%rdx)
movq PCPU(UCR3),%rax
+ andq PCPU(UCR3_LOAD_MASK),%rax
+ movq $PMAP_UCR3_NOMASK,PCPU(UCR3_LOAD_MASK)
swapgs
movq %rdx,%rsp
movq %rax,%cr3
Index: head/sys/amd64/amd64/genassym.c
===================================================================
--- head/sys/amd64/amd64/genassym.c
+++ head/sys/amd64/amd64/genassym.c
@@ -230,6 +230,7 @@
ASSYM(PC_PM_SAVE_CNT, offsetof(struct pcpu, pc_pm_save_cnt));
ASSYM(PC_KCR3, offsetof(struct pcpu, pc_kcr3));
ASSYM(PC_UCR3, offsetof(struct pcpu, pc_ucr3));
+ASSYM(PC_UCR3_LOAD_MASK, offsetof(struct pcpu, pc_ucr3_load_mask));
ASSYM(PC_SAVED_UCR3, offsetof(struct pcpu, pc_saved_ucr3));
ASSYM(PC_PTI_STACK, offsetof(struct pcpu, pc_pti_stack));
ASSYM(PC_PTI_STACK_SZ, PC_PTI_STACK_SZ);
Index: head/sys/amd64/amd64/machdep.c
===================================================================
--- head/sys/amd64/amd64/machdep.c
+++ head/sys/amd64/amd64/machdep.c
@@ -1562,6 +1562,7 @@
PCPU_SET(ldt, (struct system_segment_descriptor *)&gdt[GUSERLDT_SEL]);
PCPU_SET(fs32p, &gdt[GUFS32_SEL]);
PCPU_SET(gs32p, &gdt[GUGS32_SEL]);
+ PCPU_SET(ucr3_load_mask, PMAP_UCR3_NOMASK);
PCPU_SET(smp_tlb_gen, 1);
}
Index: head/sys/amd64/amd64/mp_machdep.c
===================================================================
--- head/sys/amd64/amd64/mp_machdep.c
+++ head/sys/amd64/amd64/mp_machdep.c
@@ -283,6 +283,7 @@
pc->pc_fs32p = &gdt[GUFS32_SEL];
pc->pc_gs32p = &gdt[GUGS32_SEL];
pc->pc_ldt = (struct system_segment_descriptor *)&gdt[GUSERLDT_SEL];
+ pc->pc_ucr3_load_mask = PMAP_UCR3_NOMASK;
/* See comment in pmap_bootstrap(). */
pc->pc_pcid_next = PMAP_PCID_KERN + 2;
pc->pc_pcid_gen = 1;
@@ -821,15 +822,14 @@
invpcid(&d, INVPCID_CTXGLOB);
} else {
invpcid(&d, INVPCID_CTX);
- d.pcid |= PMAP_PCID_USER_PT;
- invpcid(&d, INVPCID_CTX);
+ if (smp_tlb_pmap == PCPU_GET(curpmap))
+ PCPU_SET(ucr3_load_mask, ~CR3_PCID_SAVE);
}
}
static void
invltlb_pcid_handler(pmap_t smp_tlb_pmap)
{
- uint64_t kcr3, ucr3;
uint32_t pcid;
#ifdef COUNT_XINVLTLB_HITS
@@ -849,15 +849,11 @@
* invalidation when switching to the pmap on this
* CPU.
*/
- if (PCPU_GET(curpmap) == smp_tlb_pmap) {
+ if (smp_tlb_pmap == PCPU_GET(curpmap)) {
pcid = smp_tlb_pmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid;
- kcr3 = smp_tlb_pmap->pm_cr3 | pcid;
- ucr3 = smp_tlb_pmap->pm_ucr3;
- if (ucr3 != PMAP_NO_CR3) {
- ucr3 |= PMAP_PCID_USER_PT | pcid;
- pmap_pti_pcid_invalidate(ucr3, kcr3);
- } else
- load_cr3(kcr3);
+ load_cr3(smp_tlb_pmap->pm_cr3 | pcid);
+ if (smp_tlb_pmap->pm_ucr3 != PMAP_NO_CR3)
+ PCPU_SET(ucr3_load_mask, ~CR3_PCID_SAVE);
}
}
}
@@ -888,7 +884,9 @@
#endif /* COUNT_IPIS */
invlpg(smp_tlb_addr1);
- if (smp_tlb_pmap->pm_ucr3 != PMAP_NO_CR3) {
+ if (smp_tlb_pmap == PCPU_GET(curpmap) &&
+ smp_tlb_pmap->pm_ucr3 != PMAP_NO_CR3 &&
+ PCPU_GET(ucr3_load_mask) == PMAP_UCR3_NOMASK) {
d.pcid = smp_tlb_pmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid |
PMAP_PCID_USER_PT;
d.pad = 0;
@@ -912,7 +910,8 @@
invlpg(smp_tlb_addr1);
if (smp_tlb_pmap == PCPU_GET(curpmap) &&
- (ucr3 = smp_tlb_pmap->pm_ucr3) != PMAP_NO_CR3) {
+ (ucr3 = smp_tlb_pmap->pm_ucr3) != PMAP_NO_CR3 &&
+ PCPU_GET(ucr3_load_mask) == PMAP_UCR3_NOMASK) {
pcid = smp_tlb_pmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid;
kcr3 = smp_tlb_pmap->pm_cr3 | pcid | CR3_PCID_SAVE;
ucr3 |= pcid | PMAP_PCID_USER_PT | CR3_PCID_SAVE;
@@ -960,7 +959,9 @@
invlpg(addr);
addr += PAGE_SIZE;
} while (addr < addr2);
- if (smp_tlb_pmap->pm_ucr3 != PMAP_NO_CR3) {
+ if (smp_tlb_pmap == PCPU_GET(curpmap) &&
+ smp_tlb_pmap->pm_ucr3 != PMAP_NO_CR3 &&
+ PCPU_GET(ucr3_load_mask) == PMAP_UCR3_NOMASK) {
d.pcid = smp_tlb_pmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid |
PMAP_PCID_USER_PT;
d.pad = 0;
@@ -994,7 +995,8 @@
addr += PAGE_SIZE;
} while (addr < addr2);
if (smp_tlb_pmap == PCPU_GET(curpmap) &&
- (ucr3 = smp_tlb_pmap->pm_ucr3) != PMAP_NO_CR3) {
+ (ucr3 = smp_tlb_pmap->pm_ucr3) != PMAP_NO_CR3 &&
+ PCPU_GET(ucr3_load_mask) == PMAP_UCR3_NOMASK) {
pcid = smp_tlb_pmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid;
kcr3 = smp_tlb_pmap->pm_cr3 | pcid | CR3_PCID_SAVE;
ucr3 |= pcid | PMAP_PCID_USER_PT | CR3_PCID_SAVE;
Index: head/sys/amd64/amd64/pmap.c
===================================================================
--- head/sys/amd64/amd64/pmap.c
+++ head/sys/amd64/amd64/pmap.c
@@ -2520,7 +2520,16 @@
cpuid = PCPU_GET(cpuid);
if (pmap == PCPU_GET(curpmap)) {
- if (pmap->pm_ucr3 != PMAP_NO_CR3) {
+ if (pmap->pm_ucr3 != PMAP_NO_CR3 &&
+ /*
+ * If we context-switched right after
+ * PCPU_GET(ucr3_load_mask), we could read the
+ * ~CR3_PCID_SAVE mask, which causes us to skip
+ * the code below to invalidate user pages. This
+ * is handled in pmap_activate_sw_pcid_pti() by
+ * clearing pm_gen if ucr3_load_mask is ~CR3_PCID_SAVE.
+ */
+ PCPU_GET(ucr3_load_mask) == PMAP_UCR3_NOMASK) {
/*
* Because pm_pcid is recalculated on a
* context switch, we must disable switching.
@@ -2635,7 +2644,8 @@
cpuid = PCPU_GET(cpuid);
if (pmap == PCPU_GET(curpmap)) {
- if (pmap->pm_ucr3 != PMAP_NO_CR3) {
+ if (pmap->pm_ucr3 != PMAP_NO_CR3 &&
+ PCPU_GET(ucr3_load_mask) == PMAP_UCR3_NOMASK) {
critical_enter();
pcid = pmap->pm_pcids[cpuid].pm_pcid;
if (invpcid_works1) {
@@ -2736,7 +2746,7 @@
pmap_invalidate_all_pcid(pmap_t pmap, bool invpcid_works1)
{
struct invpcid_descr d;
- uint64_t kcr3, ucr3;
+ uint64_t kcr3;
uint32_t pcid;
u_int cpuid, i;
@@ -2757,20 +2767,12 @@
d.pad = 0;
d.addr = 0;
invpcid(&d, INVPCID_CTX);
- if (pmap->pm_ucr3 != PMAP_NO_CR3) {
- d.pcid |= PMAP_PCID_USER_PT;
- invpcid(&d, INVPCID_CTX);
- }
} else {
kcr3 = pmap->pm_cr3 | pcid;
- ucr3 = pmap->pm_ucr3;
- if (ucr3 != PMAP_NO_CR3) {
- ucr3 |= pcid | PMAP_PCID_USER_PT;
- pmap_pti_pcid_invalidate(ucr3, kcr3);
- } else {
- load_cr3(kcr3);
- }
+ load_cr3(kcr3);
}
+ if (pmap->pm_ucr3 != PMAP_NO_CR3)
+ PCPU_SET(ucr3_load_mask, ~CR3_PCID_SAVE);
critical_exit();
} else
pmap->pm_pcids[cpuid].pm_gen = 0;
@@ -8816,12 +8818,23 @@
PCPU_GET(pti_rsp0) : (uintptr_t)td->td_md.md_stack_base;
}
-static void inline
-pmap_activate_sw_pcid_pti(pmap_t pmap, u_int cpuid, const bool invpcid_works1)
+static void
+pmap_activate_sw_pcid_pti(struct thread *td, pmap_t pmap, u_int cpuid)
{
- struct invpcid_descr d;
+ pmap_t old_pmap;
uint64_t cached, cr3, kcr3, ucr3;
+ KASSERT((read_rflags() & PSL_I) == 0,
+ ("PCID needs interrupts disabled in pmap_activate_sw()"));
+
+ /* See the comment in pmap_invalidate_page_pcid(). */
+ if (PCPU_GET(ucr3_load_mask) != PMAP_UCR3_NOMASK) {
+ PCPU_SET(ucr3_load_mask, PMAP_UCR3_NOMASK);
+ old_pmap = PCPU_GET(curpmap);
+ MPASS(old_pmap->pm_ucr3 != PMAP_NO_CR3);
+ old_pmap->pm_pcids[cpuid].pm_gen = 0;
+ }
+
cached = pmap_pcid_alloc_checked(pmap, cpuid);
cr3 = rcr3();
if ((cr3 & ~CR3_PCID_MASK) != pmap->pm_cr3)
@@ -8831,77 +8844,26 @@
ucr3 = pmap->pm_ucr3 | pmap->pm_pcids[cpuid].pm_pcid |
PMAP_PCID_USER_PT;
- if (!cached && pmap->pm_ucr3 != PMAP_NO_CR3) {
- /*
- * Explicitly invalidate translations cached from the
- * user page table. They are not automatically
- * flushed by reload of cr3 with the kernel page table
- * pointer above.
- *
- * Note that the if() condition is resolved statically
- * by using the function argument instead of
- * runtime-evaluated invpcid_works value.
- */
- if (invpcid_works1) {
- d.pcid = PMAP_PCID_USER_PT |
- pmap->pm_pcids[cpuid].pm_pcid;
- d.pad = 0;
- d.addr = 0;
- invpcid(&d, INVPCID_CTX);
- } else {
- pmap_pti_pcid_invalidate(ucr3, kcr3);
- }
- }
+ if (!cached && pmap->pm_ucr3 != PMAP_NO_CR3)
+ PCPU_SET(ucr3_load_mask, ~CR3_PCID_SAVE);
PCPU_SET(kcr3, kcr3 | CR3_PCID_SAVE);
PCPU_SET(ucr3, ucr3 | CR3_PCID_SAVE);
if (cached)
PCPU_INC(pm_save_cnt);
-}
-static void
-pmap_activate_sw_pcid_invpcid_pti(struct thread *td, pmap_t pmap, u_int cpuid)
-{
-
- pmap_activate_sw_pcid_pti(pmap, cpuid, true);
pmap_activate_sw_pti_post(td, pmap);
}
static void
-pmap_activate_sw_pcid_noinvpcid_pti(struct thread *td, pmap_t pmap,
- u_int cpuid)
-{
- register_t rflags;
-
- /*
- * If the INVPCID instruction is not available,
- * invltlb_pcid_handler() is used to handle an invalidate_all
- * IPI, which checks for curpmap == smp_tlb_pmap. The below
- * sequence of operations has a window where %CR3 is loaded
- * with the new pmap's PML4 address, but the curpmap value has
- * not yet been updated. This causes the invltlb IPI handler,
- * which is called between the updates, to execute as a NOP,
- * which leaves stale TLB entries.
- *
- * Note that the most typical use of pmap_activate_sw(), from
- * the context switch, is immune to this race, because
- * interrupts are disabled (while the thread lock is owned),
- * and the IPI happens after curpmap is updated. Protect
- * other callers in a similar way, by disabling interrupts
- * around the %cr3 register reload and curpmap assignment.
- */
- rflags = intr_disable();
- pmap_activate_sw_pcid_pti(pmap, cpuid, false);
- intr_restore(rflags);
- pmap_activate_sw_pti_post(td, pmap);
-}
-
-static void
pmap_activate_sw_pcid_nopti(struct thread *td __unused, pmap_t pmap,
u_int cpuid)
{
uint64_t cached, cr3;
+ KASSERT((read_rflags() & PSL_I) == 0,
+ ("PCID needs interrupts disabled in pmap_activate_sw()"));
+
cached = pmap_pcid_alloc_checked(pmap, cpuid);
cr3 = rcr3();
if (!cached || (cr3 & ~CR3_PCID_MASK) != pmap->pm_cr3)
@@ -8913,17 +8875,6 @@
}
static void
-pmap_activate_sw_pcid_noinvpcid_nopti(struct thread *td __unused, pmap_t pmap,
- u_int cpuid)
-{
- register_t rflags;
-
- rflags = intr_disable();
- pmap_activate_sw_pcid_nopti(td, pmap, cpuid);
- intr_restore(rflags);
-}
-
-static void
pmap_activate_sw_nopcid_nopti(struct thread *td __unused, pmap_t pmap,
u_int cpuid __unused)
{
@@ -8947,14 +8898,10 @@
u_int))
{
- if (pmap_pcid_enabled && pti && invpcid_works)
- return (pmap_activate_sw_pcid_invpcid_pti);
- else if (pmap_pcid_enabled && pti && !invpcid_works)
- return (pmap_activate_sw_pcid_noinvpcid_pti);
- else if (pmap_pcid_enabled && !pti && invpcid_works)
+ if (pmap_pcid_enabled && pti)
+ return (pmap_activate_sw_pcid_pti);
+ else if (pmap_pcid_enabled && !pti)
return (pmap_activate_sw_pcid_nopti);
- else if (pmap_pcid_enabled && !pti && !invpcid_works)
- return (pmap_activate_sw_pcid_noinvpcid_nopti);
else if (!pmap_pcid_enabled && pti)
return (pmap_activate_sw_nopcid_pti);
else /* if (!pmap_pcid_enabled && !pti) */
@@ -8991,10 +8938,26 @@
void
pmap_activate(struct thread *td)
{
-
- critical_enter();
+ /*
+ * invltlb_{invpcid,}_pcid_handler() is used to handle an
+ * invalidate_all IPI, which checks for curpmap ==
+ * smp_tlb_pmap. The below sequence of operations has a
+ * window where %CR3 is loaded with the new pmap's PML4
+ * address, but the curpmap value has not yet been updated.
+ * This causes the invltlb IPI handler, which is called
+ * between the updates, to execute as a NOP, which leaves
+ * stale TLB entries.
+ *
+ * Note that the most common use of pmap_activate_sw(), from
+ * a context switch, is immune to this race, because
+ * interrupts are disabled (while the thread lock is owned),
+ * so the IPI is delayed until after curpmap is updated. Protect
+ * other callers in a similar way, by disabling interrupts
+ * around the %cr3 register reload and curpmap assignment.
+ */
+ spinlock_enter();
pmap_activate_sw(td);
- critical_exit();
+ spinlock_exit();
}
void
Index: head/sys/amd64/include/pcpu.h
===================================================================
--- head/sys/amd64/include/pcpu.h
+++ head/sys/amd64/include/pcpu.h
@@ -99,7 +99,8 @@
uint64_t pc_smp_tlb_addr2; \
uint32_t pc_smp_tlb_gen; \
u_int pc_smp_tlb_op; \
- char __pad[2924] /* pad to UMA_PCPU_ALLOC_SIZE */
+ uint64_t pc_ucr3_load_mask; \
+ char __pad[2916] /* pad to UMA_PCPU_ALLOC_SIZE */
#define PC_DBREG_CMD_NONE 0
#define PC_DBREG_CMD_LOAD 1
Index: head/sys/amd64/include/pmap.h
===================================================================
--- head/sys/amd64/include/pmap.h
+++ head/sys/amd64/include/pmap.h
@@ -241,6 +241,7 @@
#define PMAP_PCID_USER_PT 0x800
#define PMAP_NO_CR3 (~0UL)
+#define PMAP_UCR3_NOMASK (~0UL)
#ifndef LOCORE

File Metadata

Mime Type
text/plain
Expires
Sun, Feb 8, 9:28 PM (14 h, 24 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
28499931
Default Alt Text
D25483.id74630.diff (12 KB)

Event Timeline