Index: sys/amd64/amd64/mp_machdep.c =================================================================== --- sys/amd64/amd64/mp_machdep.c +++ sys/amd64/amd64/mp_machdep.c @@ -638,8 +638,8 @@ * Used by pmap to request cache or TLB invalidation on local and * remote processors. Mask provides the set of remote CPUs which are * to be signalled with the invalidation IPI. As an optimization, the - * curcpu_cb callback is invoked on the calling CPU while waiting for - * remote CPUs to complete the operation. + * curcpu_cb callback is invoked on the calling CPU in critical + * section while waiting for remote CPUs to complete the operation. * * The callback function is called unconditionally on the caller's * underlying processor, even when this processor is not set in the @@ -662,23 +662,19 @@ * It is not necessary to signal other CPUs while booting or * when in the debugger. */ - if (kdb_active || KERNEL_PANICKED() || !smp_started) { - curcpu_cb(pmap, addr1, addr2); - return; - } - - sched_pin(); + if (kdb_active || KERNEL_PANICKED() || !smp_started) + goto local_cb; /* * Check for other cpus. Return if none. */ if (CPU_ISFULLSET(&mask)) { if (mp_ncpus <= 1) - goto nospinexit; + goto local_cb; } else { CPU_CLR(PCPU_GET(cpuid), &mask); if (CPU_EMPTY(&mask)) - goto nospinexit; + goto local_cb; } /* @@ -735,12 +731,12 @@ ia32_pause(); } critical_exit(); - sched_unpin(); return; -nospinexit: +local_cb: + critical_enter(); curcpu_cb(pmap, addr1, addr2); - sched_unpin(); + critical_exit(); } void Index: sys/amd64/amd64/pmap.c =================================================================== --- sys/amd64/amd64/pmap.c +++ sys/amd64/amd64/pmap.c @@ -2773,53 +2773,16 @@ static cpuset_t pmap_invalidate_cpu_mask(pmap_t pmap) { - return (pmap == kernel_pmap ? all_cpus : pmap->pm_active); } static inline void -pmap_invalidate_page_pcid(pmap_t pmap, vm_offset_t va, - const bool invpcid_works1) +pmap_invalidate_preipi_pcid(pmap_t pmap) { - struct invpcid_descr d; - uint64_t kcr3, ucr3; - uint32_t pcid; u_int cpuid, i; cpuid = PCPU_GET(cpuid); - if (pmap == PCPU_GET(curpmap)) { - if (pmap->pm_ucr3 != PMAP_NO_CR3 && - /* - * If we context-switched right after - * PCPU_GET(ucr3_load_mask), we could read the - * ~CR3_PCID_SAVE mask, which causes us to skip - * the code below to invalidate user pages. This - * is handled in pmap_activate_sw_pcid_pti() by - * clearing pm_gen if ucr3_load_mask is ~CR3_PCID_SAVE. - */ - PCPU_GET(ucr3_load_mask) == PMAP_UCR3_NOMASK) { - /* - * Because pm_pcid is recalculated on a - * context switch, we must disable switching. - * Otherwise, we might use a stale value - * below. - */ - critical_enter(); - pcid = pmap->pm_pcids[cpuid].pm_pcid; - if (invpcid_works1) { - d.pcid = pcid | PMAP_PCID_USER_PT; - d.pad = 0; - d.addr = va; - invpcid(&d, INVPCID_ADDR); - } else { - kcr3 = pmap->pm_cr3 | pcid | CR3_PCID_SAVE; - ucr3 = pmap->pm_ucr3 | pcid | - PMAP_PCID_USER_PT | CR3_PCID_SAVE; - pmap_pti_pcid_invlpg(ucr3, kcr3, va); - } - critical_exit(); - } - } else + if (pmap != PCPU_GET(curpmap)) pmap->pm_pcids[cpuid].pm_gen = 0; CPU_FOREACH(i) { @@ -2841,51 +2804,97 @@ } static void -pmap_invalidate_page_pcid_invpcid(pmap_t pmap, vm_offset_t va) +pmap_invalidate_preipi_nopcid(pmap_t pmap __unused) { +} - pmap_invalidate_page_pcid(pmap, va, true); +DEFINE_IFUNC(static, void, pmap_invalidate_preipi, (pmap_t)) +{ + return (pmap_pcid_enabled ? pmap_invalidate_preipi_pcid : + pmap_invalidate_preipi_nopcid); } -static void -pmap_invalidate_page_pcid_noinvpcid(pmap_t pmap, vm_offset_t va) +static inline void +pmap_invalidate_page_pcid_cb(pmap_t pmap, vm_offset_t va, + const bool invpcid_works1) { + struct invpcid_descr d; + uint64_t kcr3, ucr3; + uint32_t pcid; + u_int cpuid; + + /* + * If we context-switched right after + * PCPU_GET(ucr3_load_mask), we could read the ~CR3_PCID_SAVE + * mask, which causes us to skip the code below to invalidate + * user pages. This is handled in pmap_activate_sw_pcid_pti() + * by clearing pm_gen if ucr3_load_mask is ~CR3_PCID_SAVE. + */ + if (pmap->pm_ucr3 == PMAP_NO_CR3 || + PCPU_GET(ucr3_load_mask) != PMAP_UCR3_NOMASK) + return; + + /* + * Because pm_pcid is recalculated on a context switch, we + * must ensure there is no switching, not just pinning. + * Otherwise, we might use a stale value below. + */ + CRITICAL_ASSERT(curthread); + cpuid = PCPU_GET(cpuid); - pmap_invalidate_page_pcid(pmap, va, false); + pcid = pmap->pm_pcids[cpuid].pm_pcid; + if (invpcid_works1) { + d.pcid = pcid | PMAP_PCID_USER_PT; + d.pad = 0; + d.addr = va; + invpcid(&d, INVPCID_ADDR); + } else { + kcr3 = pmap->pm_cr3 | pcid | CR3_PCID_SAVE; + ucr3 = pmap->pm_ucr3 | pcid | PMAP_PCID_USER_PT | CR3_PCID_SAVE; + pmap_pti_pcid_invlpg(ucr3, kcr3, va); + } +} + +static void +pmap_invalidate_page_pcid_invpcid_cb(pmap_t pmap, vm_offset_t va) +{ + pmap_invalidate_page_pcid_cb(pmap, va, true); } static void -pmap_invalidate_page_nopcid(pmap_t pmap, vm_offset_t va) +pmap_invalidate_page_pcid_noinvpcid_cb(pmap_t pmap, vm_offset_t va) { + pmap_invalidate_page_pcid_cb(pmap, va, false); } -DEFINE_IFUNC(static, void, pmap_invalidate_page_mode, (pmap_t, vm_offset_t)) +static void +pmap_invalidate_page_nopcid_cb(pmap_t pmap __unused, vm_offset_t va __unused) { +} +DEFINE_IFUNC(static, void, pmap_invalidate_page_cb, (pmap_t, vm_offset_t)) +{ if (pmap_pcid_enabled) - return (invpcid_works ? pmap_invalidate_page_pcid_invpcid : - pmap_invalidate_page_pcid_noinvpcid); - return (pmap_invalidate_page_nopcid); + return (invpcid_works ? pmap_invalidate_page_pcid_invpcid_cb : + pmap_invalidate_page_pcid_noinvpcid_cb); + return (pmap_invalidate_page_nopcid_cb); } static void pmap_invalidate_page_curcpu_cb(pmap_t pmap, vm_offset_t va, vm_offset_t addr2 __unused) { - if (pmap == kernel_pmap) { invlpg(va); - } else { - if (pmap == PCPU_GET(curpmap)) - invlpg(va); - pmap_invalidate_page_mode(pmap, va); + } else if (pmap == PCPU_GET(curpmap)) { + invlpg(va); + pmap_invalidate_page_cb(pmap, va); } } void pmap_invalidate_page(pmap_t pmap, vm_offset_t va) { - if (pmap_type_guest(pmap)) { pmap_invalidate_ept(pmap); return; @@ -2894,82 +2903,73 @@ KASSERT(pmap->pm_type == PT_X86, ("pmap_invalidate_page: invalid type %d", pmap->pm_type)); + sched_pin(); + pmap_invalidate_preipi(pmap); smp_masked_invlpg(pmap_invalidate_cpu_mask(pmap), va, pmap, pmap_invalidate_page_curcpu_cb); + sched_unpin(); } /* 4k PTEs -- Chosen to exceed the total size of Broadwell L2 TLB */ #define PMAP_INVLPG_THRESHOLD (4 * 1024 * PAGE_SIZE) static void -pmap_invalidate_range_pcid(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, +pmap_invalidate_range_pcid_cb(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, const bool invpcid_works1) { struct invpcid_descr d; uint64_t kcr3, ucr3; uint32_t pcid; - u_int cpuid, i; + u_int cpuid; + + if (pmap != PCPU_GET(curpmap) || + pmap->pm_ucr3 == PMAP_NO_CR3 || + PCPU_GET(ucr3_load_mask) != PMAP_UCR3_NOMASK) + return; + CRITICAL_ASSERT(curthread); cpuid = PCPU_GET(cpuid); - if (pmap == PCPU_GET(curpmap)) { - if (pmap->pm_ucr3 != PMAP_NO_CR3 && - PCPU_GET(ucr3_load_mask) == PMAP_UCR3_NOMASK) { - critical_enter(); - pcid = pmap->pm_pcids[cpuid].pm_pcid; - if (invpcid_works1) { - d.pcid = pcid | PMAP_PCID_USER_PT; - d.pad = 0; - d.addr = sva; - for (; d.addr < eva; d.addr += PAGE_SIZE) - invpcid(&d, INVPCID_ADDR); - } else { - kcr3 = pmap->pm_cr3 | pcid | CR3_PCID_SAVE; - ucr3 = pmap->pm_ucr3 | pcid | - PMAP_PCID_USER_PT | CR3_PCID_SAVE; - pmap_pti_pcid_invlrng(ucr3, kcr3, sva, eva); - } - critical_exit(); - } - } else - pmap->pm_pcids[cpuid].pm_gen = 0; - CPU_FOREACH(i) { - if (cpuid != i) - pmap->pm_pcids[i].pm_gen = 0; + pcid = pmap->pm_pcids[cpuid].pm_pcid; + if (invpcid_works1) { + d.pcid = pcid | PMAP_PCID_USER_PT; + d.pad = 0; + for (d.addr = sva; d.addr < eva; d.addr += PAGE_SIZE) + invpcid(&d, INVPCID_ADDR); + } else { + kcr3 = pmap->pm_cr3 | pcid | CR3_PCID_SAVE; + ucr3 = pmap->pm_ucr3 | pcid | PMAP_PCID_USER_PT | CR3_PCID_SAVE; + pmap_pti_pcid_invlrng(ucr3, kcr3, sva, eva); } - /* See the comment in pmap_invalidate_page_pcid(). */ - atomic_thread_fence_seq_cst(); } static void -pmap_invalidate_range_pcid_invpcid(pmap_t pmap, vm_offset_t sva, +pmap_invalidate_range_pcid_invpcid_cb(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) { - - pmap_invalidate_range_pcid(pmap, sva, eva, true); + pmap_invalidate_range_pcid_cb(pmap, sva, eva, true); } static void -pmap_invalidate_range_pcid_noinvpcid(pmap_t pmap, vm_offset_t sva, +pmap_invalidate_range_pcid_noinvpcid_cb(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) { - - pmap_invalidate_range_pcid(pmap, sva, eva, false); + pmap_invalidate_range_pcid_cb(pmap, sva, eva, false); } static void -pmap_invalidate_range_nopcid(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) +pmap_invalidate_range_nopcid_cb(pmap_t pmap __unused, vm_offset_t sva __unused, + vm_offset_t eva __unused) { } -DEFINE_IFUNC(static, void, pmap_invalidate_range_mode, (pmap_t, vm_offset_t, +DEFINE_IFUNC(static, void, pmap_invalidate_range_cb, (pmap_t, vm_offset_t, vm_offset_t)) { - if (pmap_pcid_enabled) - return (invpcid_works ? pmap_invalidate_range_pcid_invpcid : - pmap_invalidate_range_pcid_noinvpcid); - return (pmap_invalidate_range_nopcid); + return (invpcid_works ? pmap_invalidate_range_pcid_invpcid_cb : + pmap_invalidate_range_pcid_noinvpcid_cb); + return (pmap_invalidate_range_nopcid_cb); } static void @@ -2980,19 +2980,16 @@ if (pmap == kernel_pmap) { for (addr = sva; addr < eva; addr += PAGE_SIZE) invlpg(addr); - } else { - if (pmap == PCPU_GET(curpmap)) { - for (addr = sva; addr < eva; addr += PAGE_SIZE) - invlpg(addr); - } - pmap_invalidate_range_mode(pmap, sva, eva); + } else if (pmap == PCPU_GET(curpmap)) { + for (addr = sva; addr < eva; addr += PAGE_SIZE) + invlpg(addr); + pmap_invalidate_range_cb(pmap, sva, eva); } } void pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) { - if (eva - sva >= PMAP_INVLPG_THRESHOLD) { pmap_invalidate_all(pmap); return; @@ -3006,17 +3003,20 @@ KASSERT(pmap->pm_type == PT_X86, ("pmap_invalidate_range: invalid type %d", pmap->pm_type)); + sched_pin(); + pmap_invalidate_preipi(pmap); smp_masked_invlpg_range(pmap_invalidate_cpu_mask(pmap), sva, eva, pmap, pmap_invalidate_range_curcpu_cb); + sched_unpin(); } static inline void -pmap_invalidate_all_pcid(pmap_t pmap, bool invpcid_works1) +pmap_invalidate_all_pcid_cb(pmap_t pmap, bool invpcid_works1) { struct invpcid_descr d; uint64_t kcr3; uint32_t pcid; - u_int cpuid, i; + u_int cpuid; if (pmap == kernel_pmap) { if (invpcid_works1) { @@ -3025,79 +3025,64 @@ } else { invltlb_glob(); } - } else { + } else if (pmap == PCPU_GET(curpmap)) { + CRITICAL_ASSERT(curthread); cpuid = PCPU_GET(cpuid); - if (pmap == PCPU_GET(curpmap)) { - critical_enter(); - pcid = pmap->pm_pcids[cpuid].pm_pcid; - if (invpcid_works1) { - d.pcid = pcid; - d.pad = 0; - d.addr = 0; - invpcid(&d, INVPCID_CTX); - } else { - kcr3 = pmap->pm_cr3 | pcid; - load_cr3(kcr3); - } - if (pmap->pm_ucr3 != PMAP_NO_CR3) - PCPU_SET(ucr3_load_mask, ~CR3_PCID_SAVE); - critical_exit(); - } else - pmap->pm_pcids[cpuid].pm_gen = 0; - CPU_FOREACH(i) { - if (cpuid != i) - pmap->pm_pcids[i].pm_gen = 0; + + pcid = pmap->pm_pcids[cpuid].pm_pcid; + if (invpcid_works1) { + d.pcid = pcid; + d.pad = 0; + d.addr = 0; + invpcid(&d, INVPCID_CTX); + } else { + kcr3 = pmap->pm_cr3 | pcid; + load_cr3(kcr3); } + if (pmap->pm_ucr3 != PMAP_NO_CR3) + PCPU_SET(ucr3_load_mask, ~CR3_PCID_SAVE); } - /* See the comment in pmap_invalidate_page_pcid(). */ - atomic_thread_fence_seq_cst(); } static void -pmap_invalidate_all_pcid_invpcid(pmap_t pmap) +pmap_invalidate_all_pcid_invpcid_cb(pmap_t pmap) { - - pmap_invalidate_all_pcid(pmap, true); + pmap_invalidate_all_pcid_cb(pmap, true); } static void -pmap_invalidate_all_pcid_noinvpcid(pmap_t pmap) +pmap_invalidate_all_pcid_noinvpcid_cb(pmap_t pmap) { - - pmap_invalidate_all_pcid(pmap, false); + pmap_invalidate_all_pcid_cb(pmap, false); } static void -pmap_invalidate_all_nopcid(pmap_t pmap) +pmap_invalidate_all_nopcid_cb(pmap_t pmap) { - if (pmap == kernel_pmap) invltlb_glob(); else if (pmap == PCPU_GET(curpmap)) invltlb(); } -DEFINE_IFUNC(static, void, pmap_invalidate_all_mode, (pmap_t)) +DEFINE_IFUNC(static, void, pmap_invalidate_all_cb, (pmap_t)) { - if (pmap_pcid_enabled) - return (invpcid_works ? pmap_invalidate_all_pcid_invpcid : - pmap_invalidate_all_pcid_noinvpcid); - return (pmap_invalidate_all_nopcid); + return (invpcid_works ? pmap_invalidate_all_pcid_invpcid_cb : + pmap_invalidate_all_pcid_noinvpcid_cb); + return (pmap_invalidate_all_nopcid_cb); } static void pmap_invalidate_all_curcpu_cb(pmap_t pmap, vm_offset_t addr1 __unused, vm_offset_t addr2 __unused) { - - pmap_invalidate_all_mode(pmap); + pmap_invalidate_all_cb(pmap); } void pmap_invalidate_all(pmap_t pmap) { - if (pmap_type_guest(pmap)) { pmap_invalidate_ept(pmap); return; @@ -3106,23 +3091,26 @@ KASSERT(pmap->pm_type == PT_X86, ("pmap_invalidate_all: invalid type %d", pmap->pm_type)); + sched_pin(); + pmap_invalidate_preipi(pmap); smp_masked_invltlb(pmap_invalidate_cpu_mask(pmap), pmap, pmap_invalidate_all_curcpu_cb); + sched_unpin(); } static void pmap_invalidate_cache_curcpu_cb(pmap_t pmap __unused, vm_offset_t va __unused, vm_offset_t addr2 __unused) { - wbinvd(); } void pmap_invalidate_cache(void) { - + sched_pin(); smp_cache_flush(pmap_invalidate_cache_curcpu_cb); + sched_unpin(); } struct pde_action {