Index: sys/amd64/amd64/pmap.c =================================================================== --- sys/amd64/amd64/pmap.c +++ sys/amd64/amd64/pmap.c @@ -2591,6 +2591,20 @@ return (pmap_invalidate_page_nopcid); } +static void +pmap_invalidate_page_curcpu_cb(pmap_t pmap, vm_offset_t va, + vm_offset_t addr2 __unused) +{ + + if (pmap == kernel_pmap) { + invlpg(va); + } else { + if (pmap == PCPU_GET(curpmap)) + invlpg(va); + pmap_invalidate_page_mode(pmap, va); + } +} + void pmap_invalidate_page(pmap_t pmap, vm_offset_t va) { @@ -2603,16 +2617,8 @@ KASSERT(pmap->pm_type == PT_X86, ("pmap_invalidate_page: invalid type %d", pmap->pm_type)); - sched_pin(); - if (pmap == kernel_pmap) { - invlpg(va); - } else { - if (pmap == PCPU_GET(curpmap)) - invlpg(va); - pmap_invalidate_page_mode(pmap, va); - } - smp_masked_invlpg(pmap_invalidate_cpu_mask(pmap), va, pmap); - sched_unpin(); + smp_masked_invlpg(pmap_invalidate_cpu_mask(pmap), va, pmap, + pmap_invalidate_page_curcpu_cb); } /* 4k PTEs -- Chosen to exceed the total size of Broadwell L2 TLB */ @@ -2688,10 +2694,26 @@ return (pmap_invalidate_range_nopcid); } +static void +pmap_invalidate_range_curcpu_cb(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) +{ + vm_offset_t addr; + + if (pmap == kernel_pmap) { + for (addr = sva; addr < eva; addr += PAGE_SIZE) + invlpg(addr); + } else { + if (pmap == PCPU_GET(curpmap)) { + for (addr = sva; addr < eva; addr += PAGE_SIZE) + invlpg(addr); + } + pmap_invalidate_range_mode(pmap, sva, eva); + } +} + void pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) { - vm_offset_t addr; if (eva - sva >= PMAP_INVLPG_THRESHOLD) { pmap_invalidate_all(pmap); @@ -2706,19 +2728,8 @@ KASSERT(pmap->pm_type == PT_X86, ("pmap_invalidate_range: invalid type %d", pmap->pm_type)); - sched_pin(); - if (pmap == kernel_pmap) { - for (addr = sva; addr < eva; addr += PAGE_SIZE) - invlpg(addr); - } else { - if (pmap == PCPU_GET(curpmap)) { - for (addr = sva; addr < eva; addr += PAGE_SIZE) - invlpg(addr); - } - pmap_invalidate_range_mode(pmap, sva, eva); - } - smp_masked_invlpg_range(pmap_invalidate_cpu_mask(pmap), sva, eva, pmap); - sched_unpin(); + smp_masked_invlpg_range(pmap_invalidate_cpu_mask(pmap), sva, eva, pmap, + pmap_invalidate_range_curcpu_cb); } static inline void @@ -2805,6 +2816,14 @@ return (pmap_invalidate_all_nopcid); } +static void +pmap_invalidate_all_curcpu_cb(pmap_t pmap, vm_offset_t addr1 __unused, + vm_offset_t addr2 __unused) +{ + + pmap_invalidate_all_mode(pmap); +} + void pmap_invalidate_all(pmap_t pmap) { @@ -2817,20 +2836,23 @@ KASSERT(pmap->pm_type == PT_X86, ("pmap_invalidate_all: invalid type %d", pmap->pm_type)); - sched_pin(); - pmap_invalidate_all_mode(pmap); - smp_masked_invltlb(pmap_invalidate_cpu_mask(pmap), pmap); - sched_unpin(); + smp_masked_invltlb(pmap_invalidate_cpu_mask(pmap), pmap, + pmap_invalidate_all_curcpu_cb); +} + +static void +pmap_invalidate_cache_curcpu_cb(pmap_t pmap __unused, vm_offset_t va __unused, + vm_offset_t addr2 __unused) +{ + + wbinvd(); } void pmap_invalidate_cache(void) { - sched_pin(); - wbinvd(); - smp_cache_flush(); - sched_unpin(); + smp_cache_flush(pmap_invalidate_cache_curcpu_cb); } struct pde_action { Index: sys/i386/i386/pmap.c =================================================================== --- sys/i386/i386/pmap.c +++ sys/i386/i386/pmap.c @@ -1203,6 +1203,13 @@ } #ifdef SMP + +static void +pmap_curcpu_cb_dummy(pmap_t pmap __unused, vm_offset_t addr1 __unused, + vm_offset_t addr2 __unused) +{ +} + /* * For SMP, these functions have to use the IPI mechanism for coherence. * @@ -1241,7 +1248,7 @@ CPU_AND(&other_cpus, &pmap->pm_active); mask = &other_cpus; } - smp_masked_invlpg(*mask, va, pmap); + smp_masked_invlpg(*mask, va, pmap, pmap_curcpu_cb_dummy); sched_unpin(); } @@ -1274,7 +1281,7 @@ CPU_AND(&other_cpus, &pmap->pm_active); mask = &other_cpus; } - smp_masked_invlpg_range(*mask, sva, eva, pmap); + smp_masked_invlpg_range(*mask, sva, eva, pmap, pmap_curcpu_cb_dummy); sched_unpin(); } @@ -1297,18 +1304,21 @@ CPU_AND(&other_cpus, &pmap->pm_active); mask = &other_cpus; } - smp_masked_invltlb(*mask, pmap); + smp_masked_invltlb(*mask, pmap, pmap_curcpu_cb_dummy); sched_unpin(); } static void -__CONCAT(PMTYPE, invalidate_cache)(void) +pmap_invalidate_cache_curcpu_cb(pmap_t pmap __unused, + vm_offset_t addr1 __unused, vm_offset_t addr2 __unused) { - - sched_pin(); wbinvd(); - smp_cache_flush(); - sched_unpin(); +} + +static void +__CONCAT(PMTYPE, invalidate_cache)(void) +{ + smp_cache_flush(pmap_invalidate_cache_curcpu_cb); } struct pde_action { Index: sys/i386/i386/vm_machdep.c =================================================================== --- sys/i386/i386/vm_machdep.c +++ sys/i386/i386/vm_machdep.c @@ -578,6 +578,12 @@ } #ifdef SMP +static void +sf_buf_shootdown_curcpu_cb(pmap_t pmap __unused, + vm_offset_t addr1 __unused, vm_offset_t addr2 __unused) +{ +} + void sf_buf_shootdown(struct sf_buf *sf, int flags) { @@ -596,7 +602,8 @@ CPU_ANDNOT(&other_cpus, &sf->cpumask); if (!CPU_EMPTY(&other_cpus)) { CPU_OR(&sf->cpumask, &other_cpus); - smp_masked_invlpg(other_cpus, sf->kva, kernel_pmap); + smp_masked_invlpg(other_cpus, sf->kva, kernel_pmap, + sf_buf_shootdown_curcpu_cb); } } sched_unpin(); Index: sys/x86/include/x86_smp.h =================================================================== --- sys/x86/include/x86_smp.h +++ sys/x86/include/x86_smp.h @@ -84,6 +84,9 @@ IDTVEC(cpususpend), /* CPU suspends & waits to be resumed */ IDTVEC(rendezvous); /* handle CPU rendezvous */ +typedef void (*smp_invl_cb_t)(struct pmap *, vm_offset_t addr1, + vm_offset_t addr2); + /* functions in x86_mp.c */ void assign_cpu_ids(void); void cpu_add(u_int apic_id, char boot_cpu); @@ -103,11 +106,13 @@ int ipi_nmi_handler(void); void ipi_selected(cpuset_t cpus, u_int ipi); void set_interrupt_apic_ids(void); -void smp_cache_flush(void); -void smp_masked_invlpg(cpuset_t mask, vm_offset_t addr, struct pmap *pmap); +void smp_cache_flush(smp_invl_cb_t curcpu_cb); +void smp_masked_invlpg(cpuset_t mask, vm_offset_t addr, struct pmap *pmap, + smp_invl_cb_t curcpu_cb); void smp_masked_invlpg_range(cpuset_t mask, vm_offset_t startva, - vm_offset_t endva, struct pmap *pmap); -void smp_masked_invltlb(cpuset_t mask, struct pmap *pmap); + vm_offset_t endva, struct pmap *pmap, smp_invl_cb_t curcpu_cb); +void smp_masked_invltlb(cpuset_t mask, struct pmap *pmap, + smp_invl_cb_t curcpu_cb); void mem_range_AP_init(void); void topo_probe(void); void ipi_send_cpu(int cpu, u_int ipi); Index: sys/x86/x86/mp_x86.c =================================================================== --- sys/x86/x86/mp_x86.c +++ sys/x86/x86/mp_x86.c @@ -1676,9 +1676,21 @@ #define read_eflags() read_rflags() #endif +/* + * Used by pmap to request invalidation of TLB or cache on local and + * remote processors. Mask provides the set of remote CPUs which are + * to be signalled with the IPI specified by vector. The curcpu_cb + * callback is invoked on the calling CPU while waiting for remote + * CPUs to complete the operation. + * + * The callback function is called unconditionally on the caller's + * underlying processor, even when this processor is not set in the + * mask. So, the callback function must be prepared to handle such + * spurious invocations. + */ static void smp_targeted_tlb_shootdown(cpuset_t mask, u_int vector, pmap_t pmap, - vm_offset_t addr1, vm_offset_t addr2) + vm_offset_t addr1, vm_offset_t addr2, smp_invl_cb_t curcpu_cb) { cpuset_t other_cpus; volatile uint32_t *p_cpudone; @@ -1686,19 +1698,23 @@ int cpu; /* It is not necessary to signal other CPUs while in the debugger. */ - if (kdb_active || KERNEL_PANICKED()) + if (kdb_active || KERNEL_PANICKED()) { + curcpu_cb(pmap, addr1, addr2); return; + } + + sched_pin(); /* * Check for other cpus. Return if none. */ if (CPU_ISFULLSET(&mask)) { if (mp_ncpus <= 1) - return; + goto nospinexit; } else { CPU_CLR(PCPU_GET(cpuid), &mask); if (CPU_EMPTY(&mask)) - return; + goto nospinexit; } if (!(read_eflags() & PSL_I)) @@ -1722,6 +1738,7 @@ ipi_send_cpu(cpu, vector); } } + curcpu_cb(pmap, addr1, addr2); while ((cpu = CPU_FFS(&other_cpus)) != 0) { cpu--; CPU_CLR(cpu, &other_cpus); @@ -1730,14 +1747,21 @@ ia32_pause(); } mtx_unlock_spin(&smp_ipi_mtx); + sched_unpin(); + return; + +nospinexit: + curcpu_cb(pmap, addr1, addr2); + sched_unpin(); } void -smp_masked_invltlb(cpuset_t mask, pmap_t pmap) +smp_masked_invltlb(cpuset_t mask, pmap_t pmap, smp_invl_cb_t curcpu_cb) { if (smp_started) { - smp_targeted_tlb_shootdown(mask, IPI_INVLTLB, pmap, 0, 0); + smp_targeted_tlb_shootdown(mask, IPI_INVLTLB, pmap, 0, 0, + curcpu_cb); #ifdef COUNT_XINVLTLB_HITS ipi_global++; #endif @@ -1745,11 +1769,13 @@ } void -smp_masked_invlpg(cpuset_t mask, vm_offset_t addr, pmap_t pmap) +smp_masked_invlpg(cpuset_t mask, vm_offset_t addr, pmap_t pmap, + smp_invl_cb_t curcpu_cb) { if (smp_started) { - smp_targeted_tlb_shootdown(mask, IPI_INVLPG, pmap, addr, 0); + smp_targeted_tlb_shootdown(mask, IPI_INVLPG, pmap, addr, 0, + curcpu_cb); #ifdef COUNT_XINVLTLB_HITS ipi_page++; #endif @@ -1758,12 +1784,12 @@ void smp_masked_invlpg_range(cpuset_t mask, vm_offset_t addr1, vm_offset_t addr2, - pmap_t pmap) + pmap_t pmap, smp_invl_cb_t curcpu_cb) { if (smp_started) { smp_targeted_tlb_shootdown(mask, IPI_INVLRNG, pmap, - addr1, addr2); + addr1, addr2, curcpu_cb); #ifdef COUNT_XINVLTLB_HITS ipi_range++; ipi_range_size += (addr2 - addr1) / PAGE_SIZE; @@ -1772,12 +1798,12 @@ } void -smp_cache_flush(void) +smp_cache_flush(smp_invl_cb_t curcpu_cb) { if (smp_started) { smp_targeted_tlb_shootdown(all_cpus, IPI_INVLCACHE, NULL, - 0, 0); + 0, 0, curcpu_cb); } }