Page MenuHomeFreeBSD

D13985.id38521.diff
No OneTemporary

D13985.id38521.diff

Index: head/sys/amd64/amd64/apic_vector.S
===================================================================
--- head/sys/amd64/amd64/apic_vector.S
+++ head/sys/amd64/amd64/apic_vector.S
@@ -184,10 +184,14 @@
call invltlb_pcid_handler
jmp invltlb_ret
- INTR_HANDLER invltlb_invpcid
+ INTR_HANDLER invltlb_invpcid_nopti
call invltlb_invpcid_handler
jmp invltlb_ret
+ INTR_HANDLER invltlb_invpcid_pti
+ call invltlb_invpcid_pti_handler
+ jmp invltlb_ret
+
/*
* Single page TLB shootdown
*/
@@ -195,11 +199,27 @@
call invlpg_handler
jmp invltlb_ret
+ INTR_HANDLER invlpg_invpcid
+ call invlpg_invpcid_handler
+ jmp invltlb_ret
+
+ INTR_HANDLER invlpg_pcid
+ call invlpg_pcid_handler
+ jmp invltlb_ret
+
/*
* Page range TLB shootdown.
*/
INTR_HANDLER invlrng
call invlrng_handler
+ jmp invltlb_ret
+
+ INTR_HANDLER invlrng_invpcid
+ call invlrng_invpcid_handler
+ jmp invltlb_ret
+
+ INTR_HANDLER invlrng_pcid
+ call invlrng_pcid_handler
jmp invltlb_ret
/*
Index: head/sys/amd64/amd64/mp_machdep.c
===================================================================
--- head/sys/amd64/amd64/mp_machdep.c
+++ head/sys/amd64/amd64/mp_machdep.c
@@ -133,20 +133,30 @@
/* Install an inter-CPU IPI for TLB invalidation */
if (pmap_pcid_enabled) {
if (invpcid_works) {
- setidt(IPI_INVLTLB, pti ? IDTVEC(invltlb_invpcid_pti) :
- IDTVEC(invltlb_invpcid), SDT_SYSIGT, SEL_KPL, 0);
+ setidt(IPI_INVLTLB, pti ?
+ IDTVEC(invltlb_invpcid_pti_pti) :
+ IDTVEC(invltlb_invpcid_nopti), SDT_SYSIGT,
+ SEL_KPL, 0);
+ setidt(IPI_INVLPG, pti ? IDTVEC(invlpg_invpcid_pti) :
+ IDTVEC(invlpg_invpcid), SDT_SYSIGT, SEL_KPL, 0);
+ setidt(IPI_INVLRNG, pti ? IDTVEC(invlrng_invpcid_pti) :
+ IDTVEC(invlrng_invpcid), SDT_SYSIGT, SEL_KPL, 0);
} else {
setidt(IPI_INVLTLB, pti ? IDTVEC(invltlb_pcid_pti) :
IDTVEC(invltlb_pcid), SDT_SYSIGT, SEL_KPL, 0);
+ setidt(IPI_INVLPG, pti ? IDTVEC(invlpg_pcid_pti) :
+ IDTVEC(invlpg_pcid), SDT_SYSIGT, SEL_KPL, 0);
+ setidt(IPI_INVLRNG, pti ? IDTVEC(invlrng_pcid_pti) :
+ IDTVEC(invlrng_pcid), SDT_SYSIGT, SEL_KPL, 0);
}
} else {
setidt(IPI_INVLTLB, pti ? IDTVEC(invltlb_pti) : IDTVEC(invltlb),
SDT_SYSIGT, SEL_KPL, 0);
+ setidt(IPI_INVLPG, pti ? IDTVEC(invlpg_pti) : IDTVEC(invlpg),
+ SDT_SYSIGT, SEL_KPL, 0);
+ setidt(IPI_INVLRNG, pti ? IDTVEC(invlrng_pti) : IDTVEC(invlrng),
+ SDT_SYSIGT, SEL_KPL, 0);
}
- setidt(IPI_INVLPG, pti ? IDTVEC(invlpg_pti) : IDTVEC(invlpg),
- SDT_SYSIGT, SEL_KPL, 0);
- setidt(IPI_INVLRNG, pti ? IDTVEC(invlrng_pti) : IDTVEC(invlrng),
- SDT_SYSIGT, SEL_KPL, 0);
/* Install an inter-CPU IPI for cache invalidation. */
setidt(IPI_INVLCACHE, pti ? IDTVEC(invlcache_pti) : IDTVEC(invlcache),
@@ -440,9 +450,43 @@
}
void
-invltlb_pcid_handler(void)
+invltlb_invpcid_pti_handler(void)
{
+ struct invpcid_descr d;
uint32_t generation;
+
+#ifdef COUNT_XINVLTLB_HITS
+ xhits_gbl[PCPU_GET(cpuid)]++;
+#endif /* COUNT_XINVLTLB_HITS */
+#ifdef COUNT_IPIS
+ (*ipi_invltlb_counts[PCPU_GET(cpuid)])++;
+#endif /* COUNT_IPIS */
+
+ generation = smp_tlb_generation;
+ d.pcid = smp_tlb_pmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid;
+ d.pad = 0;
+ d.addr = 0;
+ if (smp_tlb_pmap == kernel_pmap) {
+ /*
+ * This invalidation actually needs to clear kernel
+ * mappings from the TLB in the current pmap, but
+ * since we were asked for the flush in the kernel
+ * pmap, achieve it by performing global flush.
+ */
+ invpcid(&d, INVPCID_CTXGLOB);
+ } else {
+ invpcid(&d, INVPCID_CTX);
+ d.pcid |= PMAP_PCID_USER_PT;
+ invpcid(&d, INVPCID_CTX);
+ }
+ PCPU_SET(smp_tlb_done, generation);
+}
+
+void
+invltlb_pcid_handler(void)
+{
+ uint64_t kcr3, ucr3;
+ uint32_t generation, pcid;
#ifdef COUNT_XINVLTLB_HITS
xhits_gbl[PCPU_GET(cpuid)]++;
@@ -463,9 +507,132 @@
* CPU.
*/
if (PCPU_GET(curpmap) == smp_tlb_pmap) {
- load_cr3(smp_tlb_pmap->pm_cr3 |
- smp_tlb_pmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid);
+ pcid = smp_tlb_pmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid;
+ kcr3 = smp_tlb_pmap->pm_cr3 | pcid;
+ ucr3 = smp_tlb_pmap->pm_ucr3;
+ if (ucr3 != PMAP_NO_CR3) {
+ ucr3 |= PMAP_PCID_USER_PT | pcid;
+ pmap_pti_pcid_invalidate(ucr3, kcr3);
+ } else
+ load_cr3(kcr3);
}
+ }
+ PCPU_SET(smp_tlb_done, generation);
+}
+
+void
+invlpg_invpcid_handler(void)
+{
+ struct invpcid_descr d;
+ uint32_t generation;
+
+#ifdef COUNT_XINVLTLB_HITS
+ xhits_pg[PCPU_GET(cpuid)]++;
+#endif /* COUNT_XINVLTLB_HITS */
+#ifdef COUNT_IPIS
+ (*ipi_invlpg_counts[PCPU_GET(cpuid)])++;
+#endif /* COUNT_IPIS */
+
+ generation = smp_tlb_generation; /* Overlap with serialization */
+ invlpg(smp_tlb_addr1);
+ if (smp_tlb_pmap->pm_ucr3 != PMAP_NO_CR3) {
+ d.pcid = smp_tlb_pmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid |
+ PMAP_PCID_USER_PT;
+ d.pad = 0;
+ d.addr = smp_tlb_addr1;
+ invpcid(&d, INVPCID_ADDR);
+ }
+ PCPU_SET(smp_tlb_done, generation);
+}
+
+void
+invlpg_pcid_handler(void)
+{
+ uint64_t kcr3, ucr3;
+ uint32_t generation;
+ uint32_t pcid;
+
+#ifdef COUNT_XINVLTLB_HITS
+ xhits_pg[PCPU_GET(cpuid)]++;
+#endif /* COUNT_XINVLTLB_HITS */
+#ifdef COUNT_IPIS
+ (*ipi_invlpg_counts[PCPU_GET(cpuid)])++;
+#endif /* COUNT_IPIS */
+
+ generation = smp_tlb_generation; /* Overlap with serialization */
+ invlpg(smp_tlb_addr1);
+ if (smp_tlb_pmap == PCPU_GET(curpmap) &&
+ (ucr3 = smp_tlb_pmap->pm_ucr3) != PMAP_NO_CR3) {
+ pcid = smp_tlb_pmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid;
+ kcr3 = smp_tlb_pmap->pm_cr3 | pcid | CR3_PCID_SAVE;
+ ucr3 |= pcid | PMAP_PCID_USER_PT | CR3_PCID_SAVE;
+ pmap_pti_pcid_invlpg(ucr3, kcr3, smp_tlb_addr1);
+ }
+ PCPU_SET(smp_tlb_done, generation);
+}
+
+void
+invlrng_invpcid_handler(void)
+{
+ struct invpcid_descr d;
+ vm_offset_t addr, addr2;
+ uint32_t generation;
+
+#ifdef COUNT_XINVLTLB_HITS
+ xhits_rng[PCPU_GET(cpuid)]++;
+#endif /* COUNT_XINVLTLB_HITS */
+#ifdef COUNT_IPIS
+ (*ipi_invlrng_counts[PCPU_GET(cpuid)])++;
+#endif /* COUNT_IPIS */
+
+ addr = smp_tlb_addr1;
+ addr2 = smp_tlb_addr2;
+ generation = smp_tlb_generation; /* Overlap with serialization */
+ do {
+ invlpg(addr);
+ addr += PAGE_SIZE;
+ } while (addr < addr2);
+ if (smp_tlb_pmap->pm_ucr3 != PMAP_NO_CR3) {
+ d.pcid = smp_tlb_pmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid |
+ PMAP_PCID_USER_PT;
+ d.pad = 0;
+ d.addr = smp_tlb_addr1;
+ do {
+ invpcid(&d, INVPCID_ADDR);
+ d.addr += PAGE_SIZE;
+ } while (d.addr < addr2);
+ }
+ PCPU_SET(smp_tlb_done, generation);
+}
+
+void
+invlrng_pcid_handler(void)
+{
+ vm_offset_t addr, addr2;
+ uint64_t kcr3, ucr3;
+ uint32_t generation;
+ uint32_t pcid;
+
+#ifdef COUNT_XINVLTLB_HITS
+ xhits_rng[PCPU_GET(cpuid)]++;
+#endif /* COUNT_XINVLTLB_HITS */
+#ifdef COUNT_IPIS
+ (*ipi_invlrng_counts[PCPU_GET(cpuid)])++;
+#endif /* COUNT_IPIS */
+
+ addr = smp_tlb_addr1;
+ addr2 = smp_tlb_addr2;
+ generation = smp_tlb_generation; /* Overlap with serialization */
+ do {
+ invlpg(addr);
+ addr += PAGE_SIZE;
+ } while (addr < addr2);
+ if (smp_tlb_pmap == PCPU_GET(curpmap) &&
+ (ucr3 = smp_tlb_pmap->pm_ucr3) != PMAP_NO_CR3) {
+ pcid = smp_tlb_pmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid;
+ kcr3 = smp_tlb_pmap->pm_cr3 | pcid | CR3_PCID_SAVE;
+ ucr3 |= pcid | PMAP_PCID_USER_PT | CR3_PCID_SAVE;
+ pmap_pti_pcid_invlrng(ucr3, kcr3, smp_tlb_addr1, addr2);
}
PCPU_SET(smp_tlb_done, generation);
}
Index: head/sys/amd64/amd64/pmap.c
===================================================================
--- head/sys/amd64/amd64/pmap.c
+++ head/sys/amd64/amd64/pmap.c
@@ -1060,6 +1060,7 @@
PMAP_LOCK_INIT(kernel_pmap);
kernel_pmap->pm_pml4 = (pdp_entry_t *)PHYS_TO_DMAP(KPML4phys);
kernel_pmap->pm_cr3 = KPML4phys;
+ kernel_pmap->pm_ucr3 = PMAP_NO_CR3;
CPU_FILL(&kernel_pmap->pm_active); /* don't allow deactivation */
TAILQ_INIT(&kernel_pmap->pm_pvchunk);
kernel_pmap->pm_flags = pmap_flags;
@@ -1097,8 +1098,6 @@
pmap_init_pat();
/* Initialize TLB Context Id. */
- if (pti)
- pmap_pcid_enabled = 0;
TUNABLE_INT_FETCH("vm.pmap.pcid_enabled", &pmap_pcid_enabled);
if ((cpu_feature2 & CPUID2_PCID) != 0 && pmap_pcid_enabled) {
/* Check for INVPCID support */
@@ -1576,6 +1575,9 @@
pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
{
cpuset_t *mask;
+ struct invpcid_descr d;
+ uint64_t kcr3, ucr3;
+ uint32_t pcid;
u_int cpuid, i;
if (pmap_type_guest(pmap)) {
@@ -1592,9 +1594,32 @@
mask = &all_cpus;
} else {
cpuid = PCPU_GET(cpuid);
- if (pmap == PCPU_GET(curpmap))
+ if (pmap == PCPU_GET(curpmap)) {
invlpg(va);
- else if (pmap_pcid_enabled)
+ if (pmap_pcid_enabled && pmap->pm_ucr3 != PMAP_NO_CR3) {
+ /*
+ * Disable context switching. pm_pcid
+ * is recalculated on switch, which
+ * might make us use wrong pcid below.
+ */
+ critical_enter();
+ pcid = pmap->pm_pcids[cpuid].pm_pcid;
+
+ if (invpcid_works) {
+ d.pcid = pcid | PMAP_PCID_USER_PT;
+ d.pad = 0;
+ d.addr = va;
+ invpcid(&d, INVPCID_ADDR);
+ } else {
+ kcr3 = pmap->pm_cr3 | pcid |
+ CR3_PCID_SAVE;
+ ucr3 = pmap->pm_ucr3 | pcid |
+ PMAP_PCID_USER_PT | CR3_PCID_SAVE;
+ pmap_pti_pcid_invlpg(ucr3, kcr3, va);
+ }
+ critical_exit();
+ }
+ } else if (pmap_pcid_enabled)
pmap->pm_pcids[cpuid].pm_gen = 0;
if (pmap_pcid_enabled) {
CPU_FOREACH(i) {
@@ -1604,7 +1629,7 @@
}
mask = &pmap->pm_active;
}
- smp_masked_invlpg(*mask, va);
+ smp_masked_invlpg(*mask, va, pmap);
sched_unpin();
}
@@ -1615,7 +1640,10 @@
pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
{
cpuset_t *mask;
+ struct invpcid_descr d;
vm_offset_t addr;
+ uint64_t kcr3, ucr3;
+ uint32_t pcid;
u_int cpuid, i;
if (eva - sva >= PMAP_INVLPG_THRESHOLD) {
@@ -1641,6 +1669,26 @@
if (pmap == PCPU_GET(curpmap)) {
for (addr = sva; addr < eva; addr += PAGE_SIZE)
invlpg(addr);
+ if (pmap_pcid_enabled && pmap->pm_ucr3 != PMAP_NO_CR3) {
+ critical_enter();
+ pcid = pmap->pm_pcids[cpuid].pm_pcid;
+ if (invpcid_works) {
+ d.pcid = pcid | PMAP_PCID_USER_PT;
+ d.pad = 0;
+ d.addr = sva;
+ for (; d.addr < eva; d.addr +=
+ PAGE_SIZE)
+ invpcid(&d, INVPCID_ADDR);
+ } else {
+ kcr3 = pmap->pm_cr3 | pcid |
+ CR3_PCID_SAVE;
+ ucr3 = pmap->pm_ucr3 | pcid |
+ PMAP_PCID_USER_PT | CR3_PCID_SAVE;
+ pmap_pti_pcid_invlrng(ucr3, kcr3, sva,
+ eva);
+ }
+ critical_exit();
+ }
} else if (pmap_pcid_enabled) {
pmap->pm_pcids[cpuid].pm_gen = 0;
}
@@ -1652,7 +1700,7 @@
}
mask = &pmap->pm_active;
}
- smp_masked_invlpg_range(*mask, sva, eva);
+ smp_masked_invlpg_range(*mask, sva, eva, pmap);
sched_unpin();
}
@@ -1661,6 +1709,8 @@
{
cpuset_t *mask;
struct invpcid_descr d;
+ uint64_t kcr3, ucr3;
+ uint32_t pcid;
u_int cpuid, i;
if (pmap_type_guest(pmap)) {
@@ -1684,15 +1734,29 @@
cpuid = PCPU_GET(cpuid);
if (pmap == PCPU_GET(curpmap)) {
if (pmap_pcid_enabled) {
+ critical_enter();
+ pcid = pmap->pm_pcids[cpuid].pm_pcid;
if (invpcid_works) {
- d.pcid = pmap->pm_pcids[cpuid].pm_pcid;
+ d.pcid = pcid;
d.pad = 0;
d.addr = 0;
invpcid(&d, INVPCID_CTX);
+ if (pmap->pm_ucr3 != PMAP_NO_CR3) {
+ d.pcid |= PMAP_PCID_USER_PT;
+ invpcid(&d, INVPCID_CTX);
+ }
} else {
- load_cr3(pmap->pm_cr3 | pmap->pm_pcids
- [PCPU_GET(cpuid)].pm_pcid);
+ kcr3 = pmap->pm_cr3 | pcid;
+ ucr3 = pmap->pm_ucr3;
+ if (ucr3 != PMAP_NO_CR3) {
+ ucr3 |= pcid | PMAP_PCID_USER_PT;
+ pmap_pti_pcid_invalidate(ucr3,
+ kcr3);
+ } else {
+ load_cr3(kcr3);
+ }
}
+ critical_exit();
} else {
invltlb();
}
@@ -1797,6 +1861,9 @@
void
pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
{
+ struct invpcid_descr d;
+ uint64_t kcr3, ucr3;
+ uint32_t pcid;
if (pmap->pm_type == PT_RVI || pmap->pm_type == PT_EPT) {
pmap->pm_eptgen++;
@@ -1805,16 +1872,35 @@
KASSERT(pmap->pm_type == PT_X86,
("pmap_invalidate_range: unknown type %d", pmap->pm_type));
- if (pmap == kernel_pmap || pmap == PCPU_GET(curpmap))
+ if (pmap == kernel_pmap || pmap == PCPU_GET(curpmap)) {
invlpg(va);
- else if (pmap_pcid_enabled)
+ if (pmap == PCPU_GET(curpmap) && pmap_pcid_enabled &&
+ pmap->pm_ucr3 != PMAP_NO_CR3) {
+ critical_enter();
+ pcid = pmap->pm_pcids[0].pm_pcid;
+ if (invpcid_works) {
+ d.pcid = pcid | PMAP_PCID_USER_PT;
+ d.pad = 0;
+ d.addr = va;
+ invpcid(&d, INVPCID_ADDR);
+ } else {
+ kcr3 = pmap->pm_cr3 | pcid | CR3_PCID_SAVE;
+ ucr3 = pmap->pm_ucr3 | pcid |
+ PMAP_PCID_USER_PT | CR3_PCID_SAVE;
+ pmap_pti_pcid_invlpg(ucr3, kcr3, va);
+ }
+ critical_exit();
+ }
+ } else if (pmap_pcid_enabled)
pmap->pm_pcids[0].pm_gen = 0;
}
void
pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
{
+ struct invpcid_descr d;
vm_offset_t addr;
+ uint64_t kcr3, ucr3;
if (pmap->pm_type == PT_RVI || pmap->pm_type == PT_EPT) {
pmap->pm_eptgen++;
@@ -1826,6 +1912,25 @@
if (pmap == kernel_pmap || pmap == PCPU_GET(curpmap)) {
for (addr = sva; addr < eva; addr += PAGE_SIZE)
invlpg(addr);
+ if (pmap == PCPU_GET(curpmap) && pmap_pcid_enabled &&
+ pmap->pm_ucr3 != PMAP_NO_CR3) {
+ critical_enter();
+ if (invpcid_works) {
+ d.pcid = pmap->pm_pcids[0].pm_pcid |
+ PMAP_PCID_USER_PT;
+ d.pad = 0;
+ d.addr = sva;
+ for (; d.addr < eva; d.addr += PAGE_SIZE)
+ invpcid(&d, INVPCID_ADDR);
+ } else {
+ kcr3 = pmap->pm_cr3 | pmap->pm_pcids[0].
+ pm_pcid | CR3_PCID_SAVE;
+ ucr3 = pmap->pm_ucr3 | pmap->pm_pcids[0].
+ pm_pcid | PMAP_PCID_USER_PT | CR3_PCID_SAVE;
+ pmap_pti_pcid_invlrng(ucr3, kcr3, sva, eva);
+ }
+ critical_exit();
+ }
} else if (pmap_pcid_enabled) {
pmap->pm_pcids[0].pm_gen = 0;
}
@@ -1835,6 +1940,7 @@
pmap_invalidate_all(pmap_t pmap)
{
struct invpcid_descr d;
+ uint64_t kcr3, ucr3;
if (pmap->pm_type == PT_RVI || pmap->pm_type == PT_EPT) {
pmap->pm_eptgen++;
@@ -1852,15 +1958,26 @@
}
} else if (pmap == PCPU_GET(curpmap)) {
if (pmap_pcid_enabled) {
+ critical_enter();
if (invpcid_works) {
d.pcid = pmap->pm_pcids[0].pm_pcid;
d.pad = 0;
d.addr = 0;
invpcid(&d, INVPCID_CTX);
+ if (pmap->pm_ucr3 != PMAP_NO_CR3) {
+ d.pcid |= PMAP_PCID_USER_PT;
+ invpcid(&d, INVPCID_CTX);
+ }
} else {
- load_cr3(pmap->pm_cr3 | pmap->pm_pcids[0].
- pm_pcid);
+ kcr3 = pmap->pm_cr3 | pmap->pm_pcids[0].pm_pcid;
+ if (pmap->pm_ucr3 != PMAP_NO_CR3) {
+ ucr3 = pmap->pm_ucr3 | pmap->pm_pcids[
+ 0].pm_pcid | PMAP_PCID_USER_PT;
+ pmap_pti_pcid_invalidate(ucr3, kcr3);
+ } else
+ load_cr3(kcr3);
}
+ critical_exit();
} else {
invltlb();
}
@@ -2398,7 +2515,8 @@
pmap->pm_pml4 = (pml4_entry_t *)PHYS_TO_DMAP(KPML4phys);
pmap->pm_pml4u = NULL;
pmap->pm_cr3 = KPML4phys;
- pmap->pm_ucr3 = ~0UL;
+ /* hack to keep pmap_pti_pcid_invalidate() alive */
+ pmap->pm_ucr3 = PMAP_NO_CR3;
pmap->pm_root.rt_root = 0;
CPU_ZERO(&pmap->pm_active);
TAILQ_INIT(&pmap->pm_pvchunk);
@@ -2408,7 +2526,7 @@
pmap->pm_pcids[i].pm_pcid = PMAP_PCID_NONE;
pmap->pm_pcids[i].pm_gen = 0;
if (!pti)
- __pcpu[i].pc_kcr3 = ~0ul;
+ __pcpu[i].pc_kcr3 = PMAP_NO_CR3;
}
PCPU_SET(curpmap, kernel_pmap);
pmap_activate(curthread);
@@ -2472,7 +2590,8 @@
pmap->pm_pcids[i].pm_pcid = PMAP_PCID_NONE;
pmap->pm_pcids[i].pm_gen = 0;
}
- pmap->pm_cr3 = ~0l; /* initialize to an invalid value */
+ pmap->pm_cr3 = PMAP_NO_CR3; /* initialize to an invalid value */
+ pmap->pm_ucr3 = PMAP_NO_CR3;
pmap->pm_pml4u = NULL;
pmap->pm_type = pm_type;
@@ -7134,13 +7253,15 @@
CRITICAL_ASSERT(curthread);
gen = PCPU_GET(pcid_gen);
- if (pmap->pm_pcids[cpuid].pm_pcid == PMAP_PCID_KERN ||
- pmap->pm_pcids[cpuid].pm_gen == gen)
+ if (!pti && (pmap->pm_pcids[cpuid].pm_pcid == PMAP_PCID_KERN ||
+ pmap->pm_pcids[cpuid].pm_gen == gen))
return (CR3_PCID_SAVE);
pcid_next = PCPU_GET(pcid_next);
- KASSERT(pcid_next <= PMAP_PCID_OVERMAX, ("cpu %d pcid_next %#x",
- cpuid, pcid_next));
- if (pcid_next == PMAP_PCID_OVERMAX) {
+ KASSERT((!pti && pcid_next <= PMAP_PCID_OVERMAX) ||
+ (pti && pcid_next <= PMAP_PCID_OVERMAX_KERN),
+ ("cpu %d pcid_next %#x", cpuid, pcid_next));
+ if ((!pti && pcid_next == PMAP_PCID_OVERMAX) ||
+ (pti && pcid_next == PMAP_PCID_OVERMAX_KERN)) {
new_gen = gen + 1;
if (new_gen == 0)
new_gen = 1;
@@ -7159,7 +7280,8 @@
pmap_activate_sw(struct thread *td)
{
pmap_t oldpmap, pmap;
- uint64_t cached, cr3;
+ struct invpcid_descr d;
+ uint64_t cached, cr3, kcr3, ucr3;
register_t rflags;
u_int cpuid;
@@ -7215,6 +7337,32 @@
PCPU_INC(pm_save_cnt);
}
PCPU_SET(curpmap, pmap);
+ if (pti) {
+ kcr3 = pmap->pm_cr3 | pmap->pm_pcids[cpuid].pm_pcid;
+ ucr3 = pmap->pm_ucr3 | pmap->pm_pcids[cpuid].pm_pcid |
+ PMAP_PCID_USER_PT;
+
+ /*
+ * Manually invalidate translations cached
+ * from the user page table, which are not
+ * flushed by reload of cr3 with the kernel
+ * page table pointer above.
+ */
+ if (pmap->pm_ucr3 != PMAP_NO_CR3) {
+ if (invpcid_works) {
+ d.pcid = PMAP_PCID_USER_PT |
+ pmap->pm_pcids[cpuid].pm_pcid;
+ d.pad = 0;
+ d.addr = 0;
+ invpcid(&d, INVPCID_CTX);
+ } else {
+ pmap_pti_pcid_invalidate(ucr3, kcr3);
+ }
+ }
+
+ PCPU_SET(kcr3, kcr3 | CR3_PCID_SAVE);
+ PCPU_SET(ucr3, ucr3 | CR3_PCID_SAVE);
+ }
if (!invpcid_works)
intr_restore(rflags);
} else if (cr3 != pmap->pm_cr3) {
Index: head/sys/amd64/amd64/support.S
===================================================================
--- head/sys/amd64/amd64/support.S
+++ head/sys/amd64/amd64/support.S
@@ -802,3 +802,51 @@
movl $EFAULT,%eax
POP_FRAME_POINTER
ret
+
+/*
+ * void pmap_pti_pcid_invalidate(uint64_t ucr3, uint64_t kcr3);
+ * Invalidates address space addressed by ucr3, then returns to kcr3.
+ * Done in assembler to ensure no other memory accesses happen while
+ * on ucr3.
+ */
+ ALIGN_TEXT
+ENTRY(pmap_pti_pcid_invalidate)
+ pushfq
+ cli
+ movq %rdi,%cr3 /* to user page table */
+ movq %rsi,%cr3 /* back to kernel */
+ popfq
+ retq
+
+/*
+ * void pmap_pti_pcid_invlpg(uint64_t ucr3, uint64_t kcr3, vm_offset_t va);
+ * Invalidates virtual address va in address space ucr3, then returns to kcr3.
+ */
+ ALIGN_TEXT
+ENTRY(pmap_pti_pcid_invlpg)
+ pushfq
+ cli
+ movq %rdi,%cr3 /* to user page table */
+ invlpg (%rdx)
+ movq %rsi,%cr3 /* back to kernel */
+ popfq
+ retq
+
+/*
+ * void pmap_pti_pcid_invlrng(uint64_t ucr3, uint64_t kcr3, vm_offset_t sva,
+ * vm_offset_t eva);
+ * Invalidates virtual addresses between sva and eva in address space ucr3,
+ * then returns to kcr3.
+ */
+ ALIGN_TEXT
+ENTRY(pmap_pti_pcid_invlrng)
+ pushfq
+ cli
+ movq %rdi,%cr3 /* to user page table */
+1: invlpg (%rdx)
+ addq $PAGE_SIZE,%rdx
+ cmpq %rdx,%rcx
+ ja 1b
+ movq %rsi,%cr3 /* back to kernel */
+ popfq
+ retq
Index: head/sys/amd64/include/pmap.h
===================================================================
--- head/sys/amd64/include/pmap.h
+++ head/sys/amd64/include/pmap.h
@@ -225,7 +225,11 @@
#define PMAP_PCID_NONE 0xffffffff
#define PMAP_PCID_KERN 0
#define PMAP_PCID_OVERMAX 0x1000
+#define PMAP_PCID_OVERMAX_KERN 0x800
+#define PMAP_PCID_USER_PT 0x800
+#define PMAP_NO_CR3 (~0UL)
+
#ifndef LOCORE
#include <sys/queue.h>
@@ -433,6 +437,10 @@
void pmap_unmap_io_transient(vm_page_t *, vm_offset_t *, int, boolean_t);
void pmap_pti_add_kva(vm_offset_t sva, vm_offset_t eva, bool exec);
void pmap_pti_remove_kva(vm_offset_t sva, vm_offset_t eva);
+void pmap_pti_pcid_invalidate(uint64_t ucr3, uint64_t kcr3);
+void pmap_pti_pcid_invlpg(uint64_t ucr3, uint64_t kcr3, vm_offset_t va);
+void pmap_pti_pcid_invlrng(uint64_t ucr3, uint64_t kcr3, vm_offset_t sva,
+ vm_offset_t eva);
#endif /* _KERNEL */
/* Return various clipped indexes for a given VA */
Index: head/sys/amd64/include/smp.h
===================================================================
--- head/sys/amd64/include/smp.h
+++ head/sys/amd64/include/smp.h
@@ -28,15 +28,23 @@
/* IPI handlers */
inthand_t
- IDTVEC(invltlb_pcid), /* TLB shootdowns - global, pcid */
- IDTVEC(invltlb_invpcid),/* TLB shootdowns - global, invpcid */
IDTVEC(justreturn), /* interrupt CPU with minimum overhead */
- IDTVEC(invltlb_pcid_pti),
- IDTVEC(invltlb_invpcid_pti),
IDTVEC(justreturn1_pti),
IDTVEC(invltlb_pti),
+ IDTVEC(invltlb_pcid_pti),
+ IDTVEC(invltlb_pcid), /* TLB shootdowns - global, pcid */
+ IDTVEC(invltlb_invpcid_pti_pti),
+ IDTVEC(invltlb_invpcid_nopti),
IDTVEC(invlpg_pti),
+ IDTVEC(invlpg_invpcid_pti),
+ IDTVEC(invlpg_invpcid),
+ IDTVEC(invlpg_pcid_pti),
+ IDTVEC(invlpg_pcid),
IDTVEC(invlrng_pti),
+ IDTVEC(invlrng_invpcid_pti),
+ IDTVEC(invlrng_invpcid),
+ IDTVEC(invlrng_pcid_pti),
+ IDTVEC(invlrng_pcid),
IDTVEC(invlcache_pti),
IDTVEC(ipi_intr_bitmap_handler_pti),
IDTVEC(cpustop_pti),
@@ -45,6 +53,11 @@
void invltlb_pcid_handler(void);
void invltlb_invpcid_handler(void);
+void invltlb_invpcid_pti_handler(void);
+void invlpg_invpcid_handler(void);
+void invlpg_pcid_handler(void);
+void invlrng_invpcid_handler(void);
+void invlrng_pcid_handler(void);
int native_start_all_aps(void);
#endif /* !LOCORE */
Index: head/sys/i386/i386/pmap.c
===================================================================
--- head/sys/i386/i386/pmap.c
+++ head/sys/i386/i386/pmap.c
@@ -1045,7 +1045,7 @@
CPU_AND(&other_cpus, &pmap->pm_active);
mask = &other_cpus;
}
- smp_masked_invlpg(*mask, va);
+ smp_masked_invlpg(*mask, va, pmap);
sched_unpin();
}
@@ -1079,7 +1079,7 @@
CPU_AND(&other_cpus, &pmap->pm_active);
mask = &other_cpus;
}
- smp_masked_invlpg_range(*mask, sva, eva);
+ smp_masked_invlpg_range(*mask, sva, eva, pmap);
sched_unpin();
}
Index: head/sys/i386/i386/vm_machdep.c
===================================================================
--- head/sys/i386/i386/vm_machdep.c
+++ head/sys/i386/i386/vm_machdep.c
@@ -768,7 +768,7 @@
CPU_NAND(&other_cpus, &sf->cpumask);
if (!CPU_EMPTY(&other_cpus)) {
CPU_OR(&sf->cpumask, &other_cpus);
- smp_masked_invlpg(other_cpus, sf->kva);
+ smp_masked_invlpg(other_cpus, sf->kva, kernel_pmap);
}
}
sched_unpin();
Index: head/sys/x86/include/x86_smp.h
===================================================================
--- head/sys/x86/include/x86_smp.h
+++ head/sys/x86/include/x86_smp.h
@@ -39,6 +39,7 @@
extern int cpu_cores;
extern volatile uint32_t smp_tlb_generation;
extern struct pmap *smp_tlb_pmap;
+extern vm_offset_t smp_tlb_addr1, smp_tlb_addr2;
extern u_int xhits_gbl[];
extern u_int xhits_pg[];
extern u_int xhits_rng[];
@@ -97,9 +98,9 @@
u_int mp_bootaddress(u_int);
void set_interrupt_apic_ids(void);
void smp_cache_flush(void);
-void smp_masked_invlpg(cpuset_t mask, vm_offset_t addr);
+void smp_masked_invlpg(cpuset_t mask, vm_offset_t addr, struct pmap *pmap);
void smp_masked_invlpg_range(cpuset_t mask, vm_offset_t startva,
- vm_offset_t endva);
+ vm_offset_t endva, struct pmap *pmap);
void smp_masked_invltlb(cpuset_t mask, struct pmap *pmap);
void mem_range_AP_init(void);
void topo_probe(void);
Index: head/sys/x86/x86/mp_x86.c
===================================================================
--- head/sys/x86/x86/mp_x86.c
+++ head/sys/x86/x86/mp_x86.c
@@ -1506,7 +1506,7 @@
*/
/* Variables needed for SMP tlb shootdown. */
-static vm_offset_t smp_tlb_addr1, smp_tlb_addr2;
+vm_offset_t smp_tlb_addr1, smp_tlb_addr2;
pmap_t smp_tlb_pmap;
volatile uint32_t smp_tlb_generation;
@@ -1583,11 +1583,11 @@
}
void
-smp_masked_invlpg(cpuset_t mask, vm_offset_t addr)
+smp_masked_invlpg(cpuset_t mask, vm_offset_t addr, pmap_t pmap)
{
if (smp_started) {
- smp_targeted_tlb_shootdown(mask, IPI_INVLPG, NULL, addr, 0);
+ smp_targeted_tlb_shootdown(mask, IPI_INVLPG, pmap, addr, 0);
#ifdef COUNT_XINVLTLB_HITS
ipi_page++;
#endif
@@ -1595,11 +1595,12 @@
}
void
-smp_masked_invlpg_range(cpuset_t mask, vm_offset_t addr1, vm_offset_t addr2)
+smp_masked_invlpg_range(cpuset_t mask, vm_offset_t addr1, vm_offset_t addr2,
+ pmap_t pmap)
{
if (smp_started) {
- smp_targeted_tlb_shootdown(mask, IPI_INVLRNG, NULL,
+ smp_targeted_tlb_shootdown(mask, IPI_INVLRNG, pmap,
addr1, addr2);
#ifdef COUNT_XINVLTLB_HITS
ipi_range++;

File Metadata

Mime Type
text/plain
Expires
Sat, Dec 13, 2:43 PM (12 h, 26 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
26933524
Default Alt Text
D13985.id38521.diff (23 KB)

Event Timeline