Index: sys/amd64/amd64/apic_vector.S =================================================================== --- sys/amd64/amd64/apic_vector.S +++ sys/amd64/amd64/apic_vector.S @@ -184,10 +184,14 @@ call invltlb_pcid_handler jmp invltlb_ret - INTR_HANDLER invltlb_invpcid + INTR_HANDLER invltlb_invpcid_nopti call invltlb_invpcid_handler jmp invltlb_ret + INTR_HANDLER invltlb_invpcid_pti + call invltlb_invpcid_pti_handler + jmp invltlb_ret + /* * Single page TLB shootdown */ @@ -195,6 +199,14 @@ call invlpg_handler jmp invltlb_ret + INTR_HANDLER invlpg_invpcid + call invlpg_invpcid_handler + jmp invltlb_ret + + INTR_HANDLER invlpg_pcid + call invlpg_pcid_handler + jmp invltlb_ret + /* * Page range TLB shootdown. */ @@ -202,6 +214,14 @@ call invlrng_handler jmp invltlb_ret + INTR_HANDLER invlrng_invpcid + call invlrng_invpcid_handler + jmp invltlb_ret + + INTR_HANDLER invlrng_pcid + call invlrng_pcid_handler + jmp invltlb_ret + /* * Invalidate cache. */ Index: sys/amd64/amd64/mp_machdep.c =================================================================== --- sys/amd64/amd64/mp_machdep.c +++ sys/amd64/amd64/mp_machdep.c @@ -133,20 +133,31 @@ /* Install an inter-CPU IPI for TLB invalidation */ if (pmap_pcid_enabled) { if (invpcid_works) { - setidt(IPI_INVLTLB, pti ? IDTVEC(invltlb_invpcid_pti) : - IDTVEC(invltlb_invpcid), SDT_SYSIGT, SEL_KPL, 0); + setidt(IPI_INVLTLB, pti ? + IDTVEC(invltlb_invpcid_pti_pti) : + IDTVEC(invltlb_invpcid_nopti), SDT_SYSIGT, + SEL_KPL, 0); + setidt(IPI_INVLPG, pti ? IDTVEC(invlpg_invpcid_pti) : + IDTVEC(invlpg_invpcid), SDT_SYSIGT, SEL_KPL, + 0); + setidt(IPI_INVLRNG, pti ? IDTVEC(invlrng_invpcid_pti) : + IDTVEC(invlrng_invpcid), SDT_SYSIGT, SEL_KPL, 0); } else { setidt(IPI_INVLTLB, pti ? IDTVEC(invltlb_pcid_pti) : IDTVEC(invltlb_pcid), SDT_SYSIGT, SEL_KPL, 0); + setidt(IPI_INVLPG, pti ? IDTVEC(invlpg_pcid_pti) : + IDTVEC(invlpg_pcid), SDT_SYSIGT, SEL_KPL, 0); + setidt(IPI_INVLRNG, pti ? IDTVEC(invlrng_pcid_pti) : + IDTVEC(invlrng_pcid), SDT_SYSIGT, SEL_KPL, 0); } } else { setidt(IPI_INVLTLB, pti ? IDTVEC(invltlb_pti) : IDTVEC(invltlb), SDT_SYSIGT, SEL_KPL, 0); + setidt(IPI_INVLPG, pti ? IDTVEC(invlpg_pti) : IDTVEC(invlpg), + SDT_SYSIGT, SEL_KPL, 0); + setidt(IPI_INVLRNG, pti ? IDTVEC(invlrng_pti) : IDTVEC(invlrng), + SDT_SYSIGT, SEL_KPL, 0); } - setidt(IPI_INVLPG, pti ? IDTVEC(invlpg_pti) : IDTVEC(invlpg), - SDT_SYSIGT, SEL_KPL, 0); - setidt(IPI_INVLRNG, pti ? IDTVEC(invlrng_pti) : IDTVEC(invlrng), - SDT_SYSIGT, SEL_KPL, 0); /* Install an inter-CPU IPI for cache invalidation. */ setidt(IPI_INVLCACHE, pti ? IDTVEC(invlcache_pti) : IDTVEC(invlcache), @@ -440,9 +451,43 @@ } void -invltlb_pcid_handler(void) +invltlb_invpcid_pti_handler(void) { + struct invpcid_descr d; uint32_t generation; + +#ifdef COUNT_XINVLTLB_HITS + xhits_gbl[PCPU_GET(cpuid)]++; +#endif /* COUNT_XINVLTLB_HITS */ +#ifdef COUNT_IPIS + (*ipi_invltlb_counts[PCPU_GET(cpuid)])++; +#endif /* COUNT_IPIS */ + + generation = smp_tlb_generation; + d.pcid = smp_tlb_pmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid; + d.pad = 0; + d.addr = 0; + if (smp_tlb_pmap == kernel_pmap) { + /* + * This invalidation actually needs to clear kernel + * mappings from the TLB in the current pmap, but + * since we were asked for the flush in the kernel + * pmap, achieve it by performing global flush. + */ + invpcid(&d, INVPCID_CTXGLOB); + } else { + invpcid(&d, INVPCID_CTX); + d.pcid |= PMAP_PCID_USER_PT; + invpcid(&d, INVPCID_CTX); + } + PCPU_SET(smp_tlb_done, generation); +} + +void +invltlb_pcid_handler(void) +{ + uint64_t kcr3, ucr3; + uint32_t generation, pcid; #ifdef COUNT_XINVLTLB_HITS xhits_gbl[PCPU_GET(cpuid)]++; @@ -463,9 +508,132 @@ * CPU. */ if (PCPU_GET(curpmap) == smp_tlb_pmap) { - load_cr3(smp_tlb_pmap->pm_cr3 | - smp_tlb_pmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid); + pcid = smp_tlb_pmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid; + kcr3 = smp_tlb_pmap->pm_cr3 | pcid; + ucr3 = smp_tlb_pmap->pm_ucr3; + if (ucr3 != PMAP_NO_CR3) { + ucr3 |= PMAP_PCID_USER_PT | pcid; + pmap_pti_pcid_invalidate(ucr3, kcr3); + } else + load_cr3(kcr3); } } PCPU_SET(smp_tlb_done, generation); } + +void +invlpg_invpcid_handler(void) +{ + struct invpcid_descr d; + uint32_t generation; + +#ifdef COUNT_XINVLTLB_HITS + xhits_pg[PCPU_GET(cpuid)]++; +#endif /* COUNT_XINVLTLB_HITS */ +#ifdef COUNT_IPIS + (*ipi_invlpg_counts[PCPU_GET(cpuid)])++; +#endif /* COUNT_IPIS */ + + generation = smp_tlb_generation; /* Overlap with serialization */ + invlpg(smp_tlb_addr1); + if (smp_tlb_pmap->pm_ucr3 != PMAP_NO_CR3) { + d.pcid = smp_tlb_pmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid | + PMAP_PCID_USER_PT; + d.pad = 0; + d.addr = smp_tlb_addr1; + invpcid(&d, INVPCID_ADDR); + } + PCPU_SET(smp_tlb_done, generation); +} + +void +invlpg_pcid_handler(void) +{ + uint64_t kcr3, ucr3; + uint32_t generation; + uint32_t pcid; + +#ifdef COUNT_XINVLTLB_HITS + xhits_pg[PCPU_GET(cpuid)]++; +#endif /* COUNT_XINVLTLB_HITS */ +#ifdef COUNT_IPIS + (*ipi_invlpg_counts[PCPU_GET(cpuid)])++; +#endif /* COUNT_IPIS */ + + generation = smp_tlb_generation; /* Overlap with serialization */ + invlpg(smp_tlb_addr1); + if (smp_tlb_pmap == PCPU_GET(curpmap) && + (ucr3 = smp_tlb_pmap->pm_ucr3) != PMAP_NO_CR3) { + pcid = smp_tlb_pmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid; + kcr3 = smp_tlb_pmap->pm_cr3 | pcid | CR3_PCID_SAVE; + ucr3 |= pcid | PMAP_PCID_USER_PT | CR3_PCID_SAVE; + pmap_pti_pcid_invlpg(ucr3, kcr3, smp_tlb_addr1); + } + PCPU_SET(smp_tlb_done, generation); +} + +void +invlrng_invpcid_handler(void) +{ + struct invpcid_descr d; + vm_offset_t addr, addr2; + uint32_t generation; + +#ifdef COUNT_XINVLTLB_HITS + xhits_rng[PCPU_GET(cpuid)]++; +#endif /* COUNT_XINVLTLB_HITS */ +#ifdef COUNT_IPIS + (*ipi_invlrng_counts[PCPU_GET(cpuid)])++; +#endif /* COUNT_IPIS */ + + addr = smp_tlb_addr1; + addr2 = smp_tlb_addr2; + generation = smp_tlb_generation; /* Overlap with serialization */ + do { + invlpg(addr); + addr += PAGE_SIZE; + } while (addr < addr2); + if (smp_tlb_pmap->pm_ucr3 != PMAP_NO_CR3) { + d.pcid = smp_tlb_pmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid | + PMAP_PCID_USER_PT; + d.pad = 0; + d.addr = smp_tlb_addr1; + do { + invpcid(&d, INVPCID_ADDR); + d.addr += PAGE_SIZE; + } while (d.addr < addr2); + } + PCPU_SET(smp_tlb_done, generation); +} + +void +invlrng_pcid_handler(void) +{ + vm_offset_t addr, addr2; + uint64_t kcr3, ucr3; + uint32_t generation; + uint32_t pcid; + +#ifdef COUNT_XINVLTLB_HITS + xhits_rng[PCPU_GET(cpuid)]++; +#endif /* COUNT_XINVLTLB_HITS */ +#ifdef COUNT_IPIS + (*ipi_invlrng_counts[PCPU_GET(cpuid)])++; +#endif /* COUNT_IPIS */ + + addr = smp_tlb_addr1; + addr2 = smp_tlb_addr2; + generation = smp_tlb_generation; /* Overlap with serialization */ + do { + invlpg(addr); + addr += PAGE_SIZE; + } while (addr < addr2); + if (smp_tlb_pmap == PCPU_GET(curpmap) && + (ucr3 = smp_tlb_pmap->pm_ucr3) != PMAP_NO_CR3) { + pcid = smp_tlb_pmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid; + kcr3 = smp_tlb_pmap->pm_cr3 | pcid | CR3_PCID_SAVE; + ucr3 |= pcid | PMAP_PCID_USER_PT | CR3_PCID_SAVE; + pmap_pti_pcid_invlrng(ucr3, kcr3, smp_tlb_addr1, addr2); + } + PCPU_SET(smp_tlb_done, generation); +} Index: sys/amd64/amd64/pmap.c =================================================================== --- sys/amd64/amd64/pmap.c +++ sys/amd64/amd64/pmap.c @@ -1060,6 +1060,7 @@ PMAP_LOCK_INIT(kernel_pmap); kernel_pmap->pm_pml4 = (pdp_entry_t *)PHYS_TO_DMAP(KPML4phys); kernel_pmap->pm_cr3 = KPML4phys; + kernel_pmap->pm_ucr3 = PMAP_NO_CR3; CPU_FILL(&kernel_pmap->pm_active); /* don't allow deactivation */ TAILQ_INIT(&kernel_pmap->pm_pvchunk); kernel_pmap->pm_flags = pmap_flags; @@ -1097,8 +1098,6 @@ pmap_init_pat(); /* Initialize TLB Context Id. */ - if (pti) - pmap_pcid_enabled = 0; TUNABLE_INT_FETCH("vm.pmap.pcid_enabled", &pmap_pcid_enabled); if ((cpu_feature2 & CPUID2_PCID) != 0 && pmap_pcid_enabled) { /* Check for INVPCID support */ @@ -1576,6 +1575,9 @@ pmap_invalidate_page(pmap_t pmap, vm_offset_t va) { cpuset_t *mask; + struct invpcid_descr d; + uint64_t kcr3, ucr3; + uint32_t pcid; u_int cpuid, i; if (pmap_type_guest(pmap)) { @@ -1592,9 +1594,32 @@ mask = &all_cpus; } else { cpuid = PCPU_GET(cpuid); - if (pmap == PCPU_GET(curpmap)) + if (pmap == PCPU_GET(curpmap)) { invlpg(va); - else if (pmap_pcid_enabled) + if (pmap_pcid_enabled && pmap->pm_ucr3 != PMAP_NO_CR3) { + /* + * Disable context switching. pm_pcid + * is recalculated on switch, which + * might make us use wrong pcid below. + */ + critical_enter(); + pcid = pmap->pm_pcids[cpuid].pm_pcid; + + if (invpcid_works) { + d.pcid = pcid | PMAP_PCID_USER_PT; + d.pad = 0; + d.addr = va; + invpcid(&d, INVPCID_ADDR); + } else { + kcr3 = pmap->pm_cr3 | pcid | + CR3_PCID_SAVE; + ucr3 = pmap->pm_ucr3 | pcid | + PMAP_PCID_USER_PT | CR3_PCID_SAVE; + pmap_pti_pcid_invlpg(ucr3, kcr3, va); + } + critical_exit(); + } + } else if (pmap_pcid_enabled) pmap->pm_pcids[cpuid].pm_gen = 0; if (pmap_pcid_enabled) { CPU_FOREACH(i) { @@ -1604,7 +1629,7 @@ } mask = &pmap->pm_active; } - smp_masked_invlpg(*mask, va); + smp_masked_invlpg(*mask, va, pmap); sched_unpin(); } @@ -1615,7 +1640,10 @@ pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) { cpuset_t *mask; + struct invpcid_descr d; vm_offset_t addr; + uint64_t kcr3, ucr3; + uint32_t pcid; u_int cpuid, i; if (eva - sva >= PMAP_INVLPG_THRESHOLD) { @@ -1641,6 +1669,26 @@ if (pmap == PCPU_GET(curpmap)) { for (addr = sva; addr < eva; addr += PAGE_SIZE) invlpg(addr); + if (pmap_pcid_enabled && pmap->pm_ucr3 != PMAP_NO_CR3) { + critical_enter(); + pcid = pmap->pm_pcids[cpuid].pm_pcid; + if (invpcid_works) { + d.pcid = pcid | PMAP_PCID_USER_PT; + d.pad = 0; + d.addr = sva; + for (; d.addr < eva; d.addr += + PAGE_SIZE) + invpcid(&d, INVPCID_ADDR); + } else { + kcr3 = pmap->pm_cr3 | pcid | + CR3_PCID_SAVE; + ucr3 = pmap->pm_ucr3 | pcid | + PMAP_PCID_USER_PT | CR3_PCID_SAVE; + pmap_pti_pcid_invlrng(ucr3, kcr3, sva, + eva); + } + critical_exit(); + } } else if (pmap_pcid_enabled) { pmap->pm_pcids[cpuid].pm_gen = 0; } @@ -1652,7 +1700,7 @@ } mask = &pmap->pm_active; } - smp_masked_invlpg_range(*mask, sva, eva); + smp_masked_invlpg_range(*mask, sva, eva, pmap); sched_unpin(); } @@ -1661,6 +1709,8 @@ { cpuset_t *mask; struct invpcid_descr d; + uint64_t kcr3, ucr3; + uint32_t pcid; u_int cpuid, i; if (pmap_type_guest(pmap)) { @@ -1684,15 +1734,29 @@ cpuid = PCPU_GET(cpuid); if (pmap == PCPU_GET(curpmap)) { if (pmap_pcid_enabled) { + critical_enter(); + pcid = pmap->pm_pcids[cpuid].pm_pcid; if (invpcid_works) { - d.pcid = pmap->pm_pcids[cpuid].pm_pcid; + d.pcid = pcid; d.pad = 0; d.addr = 0; invpcid(&d, INVPCID_CTX); + if (pmap->pm_ucr3 != PMAP_NO_CR3) { + d.pcid |= PMAP_PCID_USER_PT; + invpcid(&d, INVPCID_CTX); + } } else { - load_cr3(pmap->pm_cr3 | pmap->pm_pcids - [PCPU_GET(cpuid)].pm_pcid); + kcr3 = pmap->pm_cr3 | pcid; + ucr3 = pmap->pm_ucr3; + if (ucr3 != PMAP_NO_CR3) { + ucr3 |= pcid | PMAP_PCID_USER_PT; + pmap_pti_pcid_invalidate(ucr3, + kcr3); + } else { + load_cr3(kcr3); + } } + critical_exit(); } else { invltlb(); } @@ -1797,6 +1861,9 @@ void pmap_invalidate_page(pmap_t pmap, vm_offset_t va) { + struct invpcid_descr d; + uint64_t kcr3, ucr3; + uint32_t pcid; if (pmap->pm_type == PT_RVI || pmap->pm_type == PT_EPT) { pmap->pm_eptgen++; @@ -1805,16 +1872,35 @@ KASSERT(pmap->pm_type == PT_X86, ("pmap_invalidate_range: unknown type %d", pmap->pm_type)); - if (pmap == kernel_pmap || pmap == PCPU_GET(curpmap)) + if (pmap == kernel_pmap || pmap == PCPU_GET(curpmap)) { invlpg(va); - else if (pmap_pcid_enabled) + if (pmap == PCPU_GET(curpmap) && pmap_pcid_enabled && + pmap->pm_ucr3 != PMAP_NO_CR3) { + critical_enter(); + pcid = pmap->pm_pcids[0].pm_pcid; + if (invpcid_works) { + d.pcid = pcid | PMAP_PCID_USER_PT; + d.pad = 0; + d.addr = va; + invpcid(&d, INVPCID_ADDR); + } else { + kcr3 = pmap->pm_cr3 | pcid | CR3_PCID_SAVE; + ucr3 = pmap->pm_ucr3 | pcid | + PMAP_PCID_USER_PT | CR3_PCID_SAVE; + pmap_pti_pcid_invlpg(ucr3, kcr3, va); + } + critical_exit(); + } + } else if (pmap_pcid_enabled) pmap->pm_pcids[0].pm_gen = 0; } void pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) { + struct invpcid_descr d; vm_offset_t addr; + uint64_t kcr3, ucr3; if (pmap->pm_type == PT_RVI || pmap->pm_type == PT_EPT) { pmap->pm_eptgen++; @@ -1826,6 +1912,25 @@ if (pmap == kernel_pmap || pmap == PCPU_GET(curpmap)) { for (addr = sva; addr < eva; addr += PAGE_SIZE) invlpg(addr); + if (pmap == PCPU_GET(curpmap) && pmap_pcid_enabled && + pmap->pm_ucr3 != PMAP_NO_CR3) { + critical_enter(); + if (invpcid_works) { + d.pcid = pmap->pm_pcids[0].pm_pcid | + PMAP_PCID_USER_PT; + d.pad = 0; + d.addr = sva; + for (; d.addr < eva; d.addr += PAGE_SIZE) + invpcid(&d, INVPCID_ADDR); + } else { + kcr3 = pmap->pm_cr3 | pmap->pm_pcids[0]. + pm_pcid | CR3_PCID_SAVE; + ucr3 = pmap->pm_ucr3 | pmap->pm_pcids[0]. + pm_pcid | PMAP_PCID_USER_PT | CR3_PCID_SAVE; + pmap_pti_pcid_invlrng(ucr3, kcr3, sva, eva); + } + critical_exit(); + } } else if (pmap_pcid_enabled) { pmap->pm_pcids[0].pm_gen = 0; } @@ -1835,6 +1940,7 @@ pmap_invalidate_all(pmap_t pmap) { struct invpcid_descr d; + uint64_t kcr3, ucr3; if (pmap->pm_type == PT_RVI || pmap->pm_type == PT_EPT) { pmap->pm_eptgen++; @@ -1852,15 +1958,26 @@ } } else if (pmap == PCPU_GET(curpmap)) { if (pmap_pcid_enabled) { + critical_enter(); if (invpcid_works) { d.pcid = pmap->pm_pcids[0].pm_pcid; d.pad = 0; d.addr = 0; invpcid(&d, INVPCID_CTX); + if (pmap->pm_ucr3 != PMAP_NO_CR3) { + d.pcid |= PMAP_PCID_USER_PT; + invpcid(&d, INVPCID_CTX); + } } else { - load_cr3(pmap->pm_cr3 | pmap->pm_pcids[0]. - pm_pcid); + kcr3 = pmap->pm_cr3 | pmap->pm_pcids[0].pm_pcid; + if (pmap->pm_ucr3 != PMAP_NO_CR3) { + ucr3 = pmap->pm_ucr3 | pmap->pm_pcids[ + 0].pm_pcid | PMAP_PCID_USER_PT; + pmap_pti_pcid_invalidate(ucr3, kcr3); + } else + load_cr3(kcr3); } + critical_exit(); } else { invltlb(); } @@ -2398,7 +2515,8 @@ pmap->pm_pml4 = (pml4_entry_t *)PHYS_TO_DMAP(KPML4phys); pmap->pm_pml4u = NULL; pmap->pm_cr3 = KPML4phys; - pmap->pm_ucr3 = ~0UL; + /* hack to keep pmap_pti_pcid_invalidate() alive */ + pmap->pm_ucr3 = PMAP_NO_CR3; pmap->pm_root.rt_root = 0; CPU_ZERO(&pmap->pm_active); TAILQ_INIT(&pmap->pm_pvchunk); @@ -2408,7 +2526,7 @@ pmap->pm_pcids[i].pm_pcid = PMAP_PCID_NONE; pmap->pm_pcids[i].pm_gen = 0; if (!pti) - __pcpu[i].pc_kcr3 = ~0ul; + __pcpu[i].pc_kcr3 = PMAP_NO_CR3; } PCPU_SET(curpmap, kernel_pmap); pmap_activate(curthread); @@ -2472,7 +2590,8 @@ pmap->pm_pcids[i].pm_pcid = PMAP_PCID_NONE; pmap->pm_pcids[i].pm_gen = 0; } - pmap->pm_cr3 = ~0l; /* initialize to an invalid value */ + pmap->pm_cr3 = PMAP_NO_CR3; /* initialize to an invalid value */ + pmap->pm_ucr3 = PMAP_NO_CR3; pmap->pm_pml4u = NULL; pmap->pm_type = pm_type; @@ -7134,13 +7253,15 @@ CRITICAL_ASSERT(curthread); gen = PCPU_GET(pcid_gen); - if (pmap->pm_pcids[cpuid].pm_pcid == PMAP_PCID_KERN || - pmap->pm_pcids[cpuid].pm_gen == gen) + if (!pti && (pmap->pm_pcids[cpuid].pm_pcid == PMAP_PCID_KERN || + pmap->pm_pcids[cpuid].pm_gen == gen)) return (CR3_PCID_SAVE); pcid_next = PCPU_GET(pcid_next); - KASSERT(pcid_next <= PMAP_PCID_OVERMAX, ("cpu %d pcid_next %#x", - cpuid, pcid_next)); - if (pcid_next == PMAP_PCID_OVERMAX) { + KASSERT((!pti && pcid_next <= PMAP_PCID_OVERMAX) || + (pti && pcid_next <= PMAP_PCID_OVERMAX_KERN), + ("cpu %d pcid_next %#x", cpuid, pcid_next)); + if ((!pti && pcid_next == PMAP_PCID_OVERMAX) || + (pti && pcid_next == PMAP_PCID_OVERMAX_KERN)) { new_gen = gen + 1; if (new_gen == 0) new_gen = 1; @@ -7159,7 +7280,8 @@ pmap_activate_sw(struct thread *td) { pmap_t oldpmap, pmap; - uint64_t cached, cr3; + struct invpcid_descr d; + uint64_t cached, cr3, kcr3, ucr3; register_t rflags; u_int cpuid; @@ -7215,6 +7337,32 @@ PCPU_INC(pm_save_cnt); } PCPU_SET(curpmap, pmap); + if (pti) { + kcr3 = pmap->pm_cr3 | pmap->pm_pcids[cpuid].pm_pcid; + ucr3 = pmap->pm_ucr3 | pmap->pm_pcids[cpuid].pm_pcid | + PMAP_PCID_USER_PT; + + /* + * Manually invalidate translations cached + * from the user page table, which are not + * flushed by reload of cr3 with the kernel + * page table pointer above. + */ + if (pmap->pm_ucr3 != PMAP_NO_CR3) { + if (invpcid_works) { + d.pcid = PMAP_PCID_USER_PT | + pmap->pm_pcids[cpuid].pm_pcid; + d.pad = 0; + d.addr = 0; + invpcid(&d, INVPCID_CTX); + } else { + pmap_pti_pcid_invalidate(ucr3, kcr3); + } + } + + PCPU_SET(kcr3, kcr3 | CR3_PCID_SAVE); + PCPU_SET(ucr3, ucr3 | CR3_PCID_SAVE); + } if (!invpcid_works) intr_restore(rflags); } else if (cr3 != pmap->pm_cr3) { Index: sys/amd64/amd64/support.S =================================================================== --- sys/amd64/amd64/support.S +++ sys/amd64/amd64/support.S @@ -802,3 +802,51 @@ movl $EFAULT,%eax POP_FRAME_POINTER ret + +/* + * void pmap_pti_pcid_invalidate(uint64_t ucr3, uint64_t kcr3); + * Invalidates address space addressed by ucr3, then returns to kcr3. + * Done in assembler to ensure no other memory accesses happen while + * on ucr3. + */ + ALIGN_TEXT +ENTRY(pmap_pti_pcid_invalidate) + pushfq + cli + movq %rdi,%cr3 /* to user page table */ + movq %rsi,%cr3 /* back to kernel */ + popfq + retq + +/* + * void pmap_pti_pcid_invlpg(uint64_t ucr3, uint64_t kcr3, vm_offset_t va); + * Invalidates virtual address va in address space ucr3, then returns to kcr3. + */ + ALIGN_TEXT +ENTRY(pmap_pti_pcid_invlpg) + pushfq + cli + movq %rdi,%cr3 /* to user page table */ + invlpg (%rdx) + movq %rsi,%cr3 /* back to kernel */ + popfq + retq + +/* + * void pmap_pti_pcid_invlrng(uint64_t ucr3, uint64_t kcr3, vm_offset_t sva, + * vm_offset_t eva); + * Invalidates virtual addresses between sva and eva in address space ucr3, + * then returns to kcr3. + */ + ALIGN_TEXT +ENTRY(pmap_pti_pcid_invlrng) + pushfq + cli + movq %rdi,%cr3 /* to user page table */ +1: invlpg (%rdx) + addq $PAGE_SIZE,%rdx + cmpq %rdx,%rcx + ja 1b + movq %rsi,%cr3 /* back to kernel */ + popfq + retq Index: sys/amd64/include/pmap.h =================================================================== --- sys/amd64/include/pmap.h +++ sys/amd64/include/pmap.h @@ -225,6 +225,10 @@ #define PMAP_PCID_NONE 0xffffffff #define PMAP_PCID_KERN 0 #define PMAP_PCID_OVERMAX 0x1000 +#define PMAP_PCID_OVERMAX_KERN 0x800 +#define PMAP_PCID_USER_PT 0x800 + +#define PMAP_NO_CR3 (~0UL) #ifndef LOCORE @@ -433,6 +437,10 @@ void pmap_unmap_io_transient(vm_page_t *, vm_offset_t *, int, boolean_t); void pmap_pti_add_kva(vm_offset_t sva, vm_offset_t eva, bool exec); void pmap_pti_remove_kva(vm_offset_t sva, vm_offset_t eva); +void pmap_pti_pcid_invalidate(uint64_t ucr3, uint64_t kcr3); +void pmap_pti_pcid_invlpg(uint64_t ucr3, uint64_t kcr3, vm_offset_t va); +void pmap_pti_pcid_invlrng(uint64_t ucr3, uint64_t kcr3, vm_offset_t sva, + vm_offset_t eva); #endif /* _KERNEL */ /* Return various clipped indexes for a given VA */ Index: sys/amd64/include/smp.h =================================================================== --- sys/amd64/include/smp.h +++ sys/amd64/include/smp.h @@ -28,15 +28,23 @@ /* IPI handlers */ inthand_t - IDTVEC(invltlb_pcid), /* TLB shootdowns - global, pcid */ - IDTVEC(invltlb_invpcid),/* TLB shootdowns - global, invpcid */ IDTVEC(justreturn), /* interrupt CPU with minimum overhead */ - IDTVEC(invltlb_pcid_pti), - IDTVEC(invltlb_invpcid_pti), IDTVEC(justreturn1_pti), IDTVEC(invltlb_pti), + IDTVEC(invltlb_pcid_pti), + IDTVEC(invltlb_pcid), /* TLB shootdowns - global, pcid */ + IDTVEC(invltlb_invpcid_pti_pti), + IDTVEC(invltlb_invpcid_nopti), IDTVEC(invlpg_pti), + IDTVEC(invlpg_invpcid_pti), + IDTVEC(invlpg_invpcid), + IDTVEC(invlpg_pcid_pti), + IDTVEC(invlpg_pcid), IDTVEC(invlrng_pti), + IDTVEC(invlrng_invpcid_pti), + IDTVEC(invlrng_invpcid), + IDTVEC(invlrng_pcid_pti), + IDTVEC(invlrng_pcid), IDTVEC(invlcache_pti), IDTVEC(ipi_intr_bitmap_handler_pti), IDTVEC(cpustop_pti), @@ -45,6 +53,11 @@ void invltlb_pcid_handler(void); void invltlb_invpcid_handler(void); +void invltlb_invpcid_pti_handler(void); +void invlpg_invpcid_handler(void); +void invlpg_pcid_handler(void); +void invlrng_invpcid_handler(void); +void invlrng_pcid_handler(void); int native_start_all_aps(void); #endif /* !LOCORE */ Index: sys/i386/i386/pmap.c =================================================================== --- sys/i386/i386/pmap.c +++ sys/i386/i386/pmap.c @@ -1045,7 +1045,7 @@ CPU_AND(&other_cpus, &pmap->pm_active); mask = &other_cpus; } - smp_masked_invlpg(*mask, va); + smp_masked_invlpg(*mask, va, pmap); sched_unpin(); } @@ -1079,7 +1079,7 @@ CPU_AND(&other_cpus, &pmap->pm_active); mask = &other_cpus; } - smp_masked_invlpg_range(*mask, sva, eva); + smp_masked_invlpg_range(*mask, sva, eva, pmap); sched_unpin(); } Index: sys/i386/i386/vm_machdep.c =================================================================== --- sys/i386/i386/vm_machdep.c +++ sys/i386/i386/vm_machdep.c @@ -768,7 +768,7 @@ CPU_NAND(&other_cpus, &sf->cpumask); if (!CPU_EMPTY(&other_cpus)) { CPU_OR(&sf->cpumask, &other_cpus); - smp_masked_invlpg(other_cpus, sf->kva); + smp_masked_invlpg(other_cpus, sf->kva, kernel_pmap); } } sched_unpin(); Index: sys/x86/include/x86_smp.h =================================================================== --- sys/x86/include/x86_smp.h +++ sys/x86/include/x86_smp.h @@ -39,6 +39,7 @@ extern int cpu_cores; extern volatile uint32_t smp_tlb_generation; extern struct pmap *smp_tlb_pmap; +extern vm_offset_t smp_tlb_addr1, smp_tlb_addr2; extern u_int xhits_gbl[]; extern u_int xhits_pg[]; extern u_int xhits_rng[]; @@ -97,9 +98,9 @@ u_int mp_bootaddress(u_int); void set_interrupt_apic_ids(void); void smp_cache_flush(void); -void smp_masked_invlpg(cpuset_t mask, vm_offset_t addr); +void smp_masked_invlpg(cpuset_t mask, vm_offset_t addr, struct pmap *pmap); void smp_masked_invlpg_range(cpuset_t mask, vm_offset_t startva, - vm_offset_t endva); + vm_offset_t endva, struct pmap *pmap); void smp_masked_invltlb(cpuset_t mask, struct pmap *pmap); void mem_range_AP_init(void); void topo_probe(void); Index: sys/x86/x86/mp_x86.c =================================================================== --- sys/x86/x86/mp_x86.c +++ sys/x86/x86/mp_x86.c @@ -1506,7 +1506,7 @@ */ /* Variables needed for SMP tlb shootdown. */ -static vm_offset_t smp_tlb_addr1, smp_tlb_addr2; +vm_offset_t smp_tlb_addr1, smp_tlb_addr2; pmap_t smp_tlb_pmap; volatile uint32_t smp_tlb_generation; @@ -1583,11 +1583,11 @@ } void -smp_masked_invlpg(cpuset_t mask, vm_offset_t addr) +smp_masked_invlpg(cpuset_t mask, vm_offset_t addr, pmap_t pmap) { if (smp_started) { - smp_targeted_tlb_shootdown(mask, IPI_INVLPG, NULL, addr, 0); + smp_targeted_tlb_shootdown(mask, IPI_INVLPG, pmap, addr, 0); #ifdef COUNT_XINVLTLB_HITS ipi_page++; #endif @@ -1595,11 +1595,12 @@ } void -smp_masked_invlpg_range(cpuset_t mask, vm_offset_t addr1, vm_offset_t addr2) +smp_masked_invlpg_range(cpuset_t mask, vm_offset_t addr1, vm_offset_t addr2, + pmap_t pmap) { if (smp_started) { - smp_targeted_tlb_shootdown(mask, IPI_INVLRNG, NULL, + smp_targeted_tlb_shootdown(mask, IPI_INVLRNG, pmap, addr1, addr2); #ifdef COUNT_XINVLTLB_HITS ipi_range++;