Page MenuHomeFreeBSD

D17181.id48081.diff
No OneTemporary

D17181.id48081.diff

Index: sys/amd64/amd64/machdep.c
===================================================================
--- sys/amd64/amd64/machdep.c
+++ sys/amd64/amd64/machdep.c
@@ -1580,6 +1580,18 @@
* re-evaluted by the below call to finishidentcpu().
*/
identify_cpu2();
+ pti = pti_get_default();
+ /* Check for pti, ... */
+ TUNABLE_INT_FETCH("vm.pmap.pti", &pti);
+ /* ... pcid, ... */
+ TUNABLE_INT_FETCH("vm.pmap.pcid_enabled", &pmap_pcid_enabled);
+ if ((cpu_feature2 & CPUID2_PCID) != 0 && pmap_pcid_enabled) {
+ /* ... and for INVPCID support */
+ invpcid_works = (cpu_stdext_feature &
+ CPUID_STDEXT_INVPCID) != 0;
+ } else {
+ pmap_pcid_enabled = 0;
+ }
link_elf_ireloc(kmdp);
@@ -1645,9 +1657,6 @@
mtx_init(&dt_lock, "descriptor tables", NULL, MTX_DEF);
/* exceptions */
- pti = pti_get_default();
- TUNABLE_INT_FETCH("vm.pmap.pti", &pti);
-
for (x = 0; x < NIDT; x++)
setidt(x, pti ? &IDTVEC(rsvd_pti) : &IDTVEC(rsvd), SDT_SYSIGT,
SEL_KPL, 0);
Index: sys/amd64/amd64/pmap.c
===================================================================
--- sys/amd64/amd64/pmap.c
+++ sys/amd64/amd64/pmap.c
@@ -146,6 +146,7 @@
#include <machine/intr_machdep.h>
#include <x86/apicvar.h>
+#include <x86/ifunc.h>
#include <machine/cpu.h>
#include <machine/cputypes.h>
#include <machine/md_var.h>
@@ -1179,11 +1180,7 @@
pmap_init_pat();
/* Initialize TLB Context Id. */
- TUNABLE_INT_FETCH("vm.pmap.pcid_enabled", &pmap_pcid_enabled);
- if ((cpu_feature2 & CPUID2_PCID) != 0 && pmap_pcid_enabled) {
- /* Check for INVPCID support */
- invpcid_works = (cpu_stdext_feature & CPUID_STDEXT_INVPCID)
- != 0;
+ if (pmap_pcid_enabled) {
for (i = 0; i < MAXCPU; i++) {
kernel_pmap->pm_pcids[i].pm_pcid = PMAP_PCID_KERN;
kernel_pmap->pm_pcids[i].pm_gen = 1;
@@ -1204,8 +1201,6 @@
* during pcpu setup.
*/
load_cr4(rcr4() | CR4_PCIDE);
- } else {
- pmap_pcid_enabled = 0;
}
}
@@ -7441,17 +7436,179 @@
return (0);
}
+static uint64_t
+pmap_pcid_alloc_checked(pmap_t pmap, u_int cpuid)
+{
+ uint64_t cached;
+
+ cached = pmap_pcid_alloc(pmap, cpuid);
+ KASSERT(pmap->pm_pcids[cpuid].pm_pcid >= 0 &&
+ pmap->pm_pcids[cpuid].pm_pcid < PMAP_PCID_OVERMAX,
+ ("pmap %p cpu %d pcid %#x", pmap, cpuid,
+ pmap->pm_pcids[cpuid].pm_pcid));
+ KASSERT(pmap->pm_pcids[cpuid].pm_pcid != PMAP_PCID_KERN ||
+ pmap == kernel_pmap,
+ ("non-kernel pmap pmap %p cpu %d pcid %#x",
+ pmap, cpuid, pmap->pm_pcids[cpuid].pm_pcid));
+ return (cached);
+}
+
+static void
+pmap_activate_sw_pti_post(pmap_t pmap)
+{
+
+ if (pmap->pm_ucr3 != PMAP_NO_CR3)
+ PCPU_GET(tssp)->tss_rsp0 = ((vm_offset_t)PCPU_PTR(pti_stack) +
+ PC_PTI_STACK_SZ * sizeof(uint64_t)) & ~0xful;
+}
+
+static void inline
+pmap_activate_sw_pcid_pti(pmap_t pmap, u_int cpuid, bool invpcid_works1)
+{
+ struct invpcid_descr d;
+ uint64_t cached, cr3, kcr3, ucr3;
+
+ cached = pmap_pcid_alloc_checked(pmap, cpuid);
+ cr3 = rcr3();
+ if ((cr3 & ~CR3_PCID_MASK) != pmap->pm_cr3)
+ load_cr3(pmap->pm_cr3 | pmap->pm_pcids[cpuid].pm_pcid);
+ PCPU_SET(curpmap, pmap);
+ kcr3 = pmap->pm_cr3 | pmap->pm_pcids[cpuid].pm_pcid;
+ ucr3 = pmap->pm_ucr3 | pmap->pm_pcids[cpuid].pm_pcid |
+ PMAP_PCID_USER_PT;
+
+ if (!cached && pmap->pm_ucr3 != PMAP_NO_CR3) {
+ /*
+ * Manually invalidate translations cached from the
+ * user page table. They are not flushed by reload of
+ * cr3 with the kernel page table pointer above.
+ *
+ * Note that the if() condition is resolved
+ * statically by using the function argument instead
+ * of runtime-evaluated invpcid_works value.
+ */
+ if (invpcid_works1) {
+ d.pcid = PMAP_PCID_USER_PT |
+ pmap->pm_pcids[cpuid].pm_pcid;
+ d.pad = 0;
+ d.addr = 0;
+ invpcid(&d, INVPCID_CTX);
+ } else {
+ pmap_pti_pcid_invalidate(ucr3, kcr3);
+ }
+ }
+
+ PCPU_SET(kcr3, kcr3 | CR3_PCID_SAVE);
+ PCPU_SET(ucr3, ucr3 | CR3_PCID_SAVE);
+ if (cached)
+ PCPU_INC(pm_save_cnt);
+}
+
+static void
+pmap_activate_sw_pcid_invpcid_pti(pmap_t pmap, u_int cpuid)
+{
+
+ pmap_activate_sw_pcid_pti(pmap, cpuid, true);
+ pmap_activate_sw_pti_post(pmap);
+}
+
+static void
+pmap_activate_sw_pcid_noinvpcid_pti(pmap_t pmap, u_int cpuid)
+{
+ register_t rflags;
+
+ /*
+ * If the INVPCID instruction is not available,
+ * invltlb_pcid_handler() is used for handle
+ * invalidate_all IPI, which checks for curpmap ==
+ * smp_tlb_pmap. Below operations sequence has a
+ * window where %CR3 is loaded with the new pmap's
+ * PML4 address, but curpmap value is not yet updated.
+ * This causes invltlb IPI handler, called between the
+ * updates, to execute as NOP, which leaves stale TLB
+ * entries.
+ *
+ * Note that the most typical use of
+ * pmap_activate_sw(), from the context switch, is
+ * immune to this race, because interrupts are
+ * disabled (while the thread lock is owned), and IPI
+ * happens after curpmap is updated. Protect other
+ * callers in a similar way, by disabling interrupts
+ * around the %cr3 register reload and curpmap
+ * assignment.
+ */
+ rflags = intr_disable();
+ pmap_activate_sw_pcid_pti(pmap, cpuid, false);
+ intr_restore(rflags);
+ pmap_activate_sw_pti_post(pmap);
+}
+
+static void
+pmap_activate_sw_pcid_nopti(pmap_t pmap, u_int cpuid)
+{
+ uint64_t cached, cr3;
+
+ cached = pmap_pcid_alloc_checked(pmap, cpuid);
+ cr3 = rcr3();
+ if (!cached || (cr3 & ~CR3_PCID_MASK) != pmap->pm_cr3)
+ load_cr3(pmap->pm_cr3 | pmap->pm_pcids[cpuid].pm_pcid |
+ cached);
+ PCPU_SET(curpmap, pmap);
+ if (cached)
+ PCPU_INC(pm_save_cnt);
+}
+
+static void
+pmap_activate_sw_pcid_noinvpcid_nopti(pmap_t pmap, u_int cpuid)
+{
+ register_t rflags;
+
+ rflags = intr_disable();
+ pmap_activate_sw_pcid_nopti(pmap, cpuid);
+ intr_restore(rflags);
+}
+
+static void
+pmap_activate_sw_nopcid_nopti(pmap_t pmap, u_int cpuid __unused)
+{
+
+ load_cr3(pmap->pm_cr3);
+ PCPU_SET(curpmap, pmap);
+}
+
+static void
+pmap_activate_sw_nopcid_pti(pmap_t pmap, u_int cpuid __unused)
+{
+
+ pmap_activate_sw_nopcid_nopti(pmap, cpuid);
+ PCPU_SET(kcr3, pmap->pm_cr3);
+ PCPU_SET(ucr3, pmap->pm_ucr3);
+ pmap_activate_sw_pti_post(pmap);
+}
+
+DEFINE_IFUNC(, void, pmap_activate_sw_mode, (pmap_t, u_int), static)
+{
+
+ if (pmap_pcid_enabled && pti && invpcid_works)
+ return (pmap_activate_sw_pcid_invpcid_pti);
+ else if (pmap_pcid_enabled && pti && !invpcid_works)
+ return (pmap_activate_sw_pcid_noinvpcid_pti);
+ else if (pmap_pcid_enabled && !pti && invpcid_works)
+ return (pmap_activate_sw_pcid_nopti);
+ else if (pmap_pcid_enabled && !pti && !invpcid_works)
+ return (pmap_activate_sw_pcid_noinvpcid_nopti);
+ else if (!pmap_pcid_enabled && pti)
+ return (pmap_activate_sw_nopcid_pti);
+ else /* if (!pmap_pcid_enabled && !pti) */
+ return (pmap_activate_sw_nopcid_nopti);
+}
+
void
pmap_activate_sw(struct thread *td)
{
pmap_t oldpmap, pmap;
- struct invpcid_descr d;
- uint64_t cached, cr3, kcr3, kern_pti_cached, rsp0, ucr3;
- register_t rflags;
u_int cpuid;
- struct amd64tss *tssp;
- rflags = 0;
oldpmap = PCPU_GET(curpmap);
pmap = vmspace_pmap(td->td_proc->p_vmspace);
if (oldpmap == pmap)
@@ -7462,91 +7619,7 @@
#else
CPU_SET(cpuid, &pmap->pm_active);
#endif
- cr3 = rcr3();
- if (pmap_pcid_enabled) {
- cached = pmap_pcid_alloc(pmap, cpuid);
- KASSERT(pmap->pm_pcids[cpuid].pm_pcid >= 0 &&
- pmap->pm_pcids[cpuid].pm_pcid < PMAP_PCID_OVERMAX,
- ("pmap %p cpu %d pcid %#x", pmap, cpuid,
- pmap->pm_pcids[cpuid].pm_pcid));
- KASSERT(pmap->pm_pcids[cpuid].pm_pcid != PMAP_PCID_KERN ||
- pmap == kernel_pmap,
- ("non-kernel pmap thread %p pmap %p cpu %d pcid %#x",
- td, pmap, cpuid, pmap->pm_pcids[cpuid].pm_pcid));
-
- /*
- * If the INVPCID instruction is not available,
- * invltlb_pcid_handler() is used for handle
- * invalidate_all IPI, which checks for curpmap ==
- * smp_tlb_pmap. Below operations sequence has a
- * window where %CR3 is loaded with the new pmap's
- * PML4 address, but curpmap value is not yet updated.
- * This causes invltlb IPI handler, called between the
- * updates, to execute as NOP, which leaves stale TLB
- * entries.
- *
- * Note that the most typical use of
- * pmap_activate_sw(), from the context switch, is
- * immune to this race, because interrupts are
- * disabled (while the thread lock is owned), and IPI
- * happens after curpmap is updated. Protect other
- * callers in a similar way, by disabling interrupts
- * around the %cr3 register reload and curpmap
- * assignment.
- */
- if (!invpcid_works)
- rflags = intr_disable();
-
- kern_pti_cached = pti ? 0 : cached;
- if (!kern_pti_cached || (cr3 & ~CR3_PCID_MASK) != pmap->pm_cr3) {
- load_cr3(pmap->pm_cr3 | pmap->pm_pcids[cpuid].pm_pcid |
- kern_pti_cached);
- }
- PCPU_SET(curpmap, pmap);
- if (pti) {
- kcr3 = pmap->pm_cr3 | pmap->pm_pcids[cpuid].pm_pcid;
- ucr3 = pmap->pm_ucr3 | pmap->pm_pcids[cpuid].pm_pcid |
- PMAP_PCID_USER_PT;
-
- if (!cached && pmap->pm_ucr3 != PMAP_NO_CR3) {
- /*
- * Manually invalidate translations cached
- * from the user page table. They are not
- * flushed by reload of cr3 with the kernel
- * page table pointer above.
- */
- if (invpcid_works) {
- d.pcid = PMAP_PCID_USER_PT |
- pmap->pm_pcids[cpuid].pm_pcid;
- d.pad = 0;
- d.addr = 0;
- invpcid(&d, INVPCID_CTX);
- } else {
- pmap_pti_pcid_invalidate(ucr3, kcr3);
- }
- }
-
- PCPU_SET(kcr3, kcr3 | CR3_PCID_SAVE);
- PCPU_SET(ucr3, ucr3 | CR3_PCID_SAVE);
- }
- if (!invpcid_works)
- intr_restore(rflags);
- if (cached)
- PCPU_INC(pm_save_cnt);
- } else {
- load_cr3(pmap->pm_cr3);
- PCPU_SET(curpmap, pmap);
- if (pti) {
- PCPU_SET(kcr3, pmap->pm_cr3);
- PCPU_SET(ucr3, pmap->pm_ucr3);
- }
- }
- if (pmap->pm_ucr3 != PMAP_NO_CR3) {
- rsp0 = ((vm_offset_t)PCPU_PTR(pti_stack) +
- PC_PTI_STACK_SZ * sizeof(uint64_t)) & ~0xful;
- tssp = PCPU_GET(tssp);
- tssp->tss_rsp0 = rsp0;
- }
+ pmap_activate_sw_mode(pmap, cpuid);
#ifdef SMP
CPU_CLR_ATOMIC(cpuid, &oldpmap->pm_active);
#else
Index: sys/amd64/amd64/trap.c
===================================================================
--- sys/amd64/amd64/trap.c
+++ sys/amd64/amd64/trap.c
@@ -705,6 +705,16 @@
PGEX_P && (frame->tf_rflags & PSL_AC) == 0);
}
+static bool
+trap_is_pti(struct trapframe *frame)
+{
+ return (PCPU_GET(curpmap)->pm_ucr3 != PMAP_NO_CR3 &&
+ pg_nx != 0 && (frame->tf_err & (PGEX_P | PGEX_W |
+ PGEX_U | PGEX_I)) == (PGEX_P | PGEX_U | PGEX_I) &&
+ (curpcb->pcb_saved_ucr3 & ~CR3_PCID_MASK)==
+ (PCPU_GET(curpmap)->pm_cr3 & ~CR3_PCID_MASK));
+}
+
static int
trap_pfault(struct trapframe *frame, int usermode)
{
@@ -806,12 +816,8 @@
* If nx protection of the usermode portion of kernel page
* tables caused trap, panic.
*/
- if (usermode && PCPU_GET(curpmap)->pm_ucr3 != PMAP_NO_CR3 &&
- pg_nx != 0 && (frame->tf_err & (PGEX_P | PGEX_W |
- PGEX_U | PGEX_I)) == (PGEX_P | PGEX_U | PGEX_I) &&
- (curpcb->pcb_saved_ucr3 & ~CR3_PCID_MASK)==
- (PCPU_GET(curpmap)->pm_cr3 & ~CR3_PCID_MASK))
- panic("PTI: pid %d comm %s tf_err %#lx\n", p->p_pid,
+ if (usermode && trap_is_pti(frame))
+ panic("PTI: pid %d comm %s tf_err %#lx", p->p_pid,
p->p_comm, frame->tf_err);
/*

File Metadata

Mime Type
text/plain
Expires
Mon, Jan 13, 7:05 PM (16 h, 25 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
15785928
Default Alt Text
D17181.id48081.diff (11 KB)

Event Timeline