Index: sys/amd64/amd64/mp_machdep.c =================================================================== --- sys/amd64/amd64/mp_machdep.c +++ sys/amd64/amd64/mp_machdep.c @@ -422,7 +422,7 @@ d.addr = 0; invpcid(&d, smp_tlb_pmap == kernel_pmap ? INVPCID_CTXGLOB : INVPCID_CTX); - atomic_add_int(&smp_tlb_wait, 1); + PCPU_SET(smp_tlb_done, smp_tlb_generation); } void @@ -450,5 +450,5 @@ smp_tlb_pmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid); } } - atomic_add_int(&smp_tlb_wait, 1); + PCPU_SET(smp_tlb_done, smp_tlb_generation); } Index: sys/amd64/include/pcpu.h =================================================================== --- sys/amd64/include/pcpu.h +++ sys/amd64/include/pcpu.h @@ -65,7 +65,8 @@ u_int pc_vcpu_id; /* Xen vCPU ID */ \ uint32_t pc_pcid_next; \ uint32_t pc_pcid_gen; \ - char __pad[149] /* be divisor of PAGE_SIZE \ + uint64_t pc_smp_tlb_done; /* TLB op acknowledgement */ \ + char __pad[141] /* be divisor of PAGE_SIZE \ after cache alignment */ #define PC_DBREG_CMD_NONE 0 Index: sys/x86/include/x86_smp.h =================================================================== --- sys/x86/include/x86_smp.h +++ sys/x86/include/x86_smp.h @@ -35,7 +35,7 @@ extern struct mtx ap_boot_mtx; extern int cpu_logical; extern int cpu_cores; -extern volatile int smp_tlb_wait; +extern volatile uint64_t smp_tlb_generation; extern struct pmap *smp_tlb_pmap; extern u_int xhits_gbl[]; extern u_int xhits_pg[]; Index: sys/x86/x86/mp_x86.c =================================================================== --- sys/x86/x86/mp_x86.c +++ sys/x86/x86/mp_x86.c @@ -1309,7 +1309,7 @@ #endif /* COUNT_IPIS */ wbinvd(); - atomic_add_int(&smp_tlb_wait, 1); + PCPU_SET(smp_tlb_done, smp_tlb_generation); } /* @@ -1367,7 +1367,7 @@ /* Variables needed for SMP tlb shootdown. */ static vm_offset_t smp_tlb_addr1, smp_tlb_addr2; pmap_t smp_tlb_pmap; -volatile int smp_tlb_wait; +volatile uint64_t smp_tlb_generation; #ifdef __amd64__ #define read_eflags() read_rflags() @@ -1377,15 +1377,16 @@ smp_targeted_tlb_shootdown(cpuset_t mask, u_int vector, pmap_t pmap, vm_offset_t addr1, vm_offset_t addr2) { - int cpu, ncpu, othercpus; - - othercpus = mp_ncpus - 1; /* does not shootdown self */ + cpuset_t other_cpus; + volatile uint64_t *p_cpudone; + uint64_t generation; + int cpu; /* * Check for other cpus. Return if none. */ if (CPU_ISFULLSET(&mask)) { - if (othercpus < 1) + if (mp_ncpus <= 1) return; } else { CPU_CLR(PCPU_GET(cpuid), &mask); @@ -1399,23 +1400,28 @@ smp_tlb_addr1 = addr1; smp_tlb_addr2 = addr2; smp_tlb_pmap = pmap; - smp_tlb_wait = 0; + generation = ++smp_tlb_generation; if (CPU_ISFULLSET(&mask)) { - ncpu = othercpus; ipi_all_but_self(vector); + other_cpus = all_cpus; + CPU_CLR(PCPU_GET(cpuid), &other_cpus); } else { - ncpu = 0; + other_cpus = mask; while ((cpu = CPU_FFS(&mask)) != 0) { cpu--; CPU_CLR(cpu, &mask); CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu, vector); ipi_send_cpu(cpu, vector); - ncpu++; } } - while (smp_tlb_wait < ncpu) - ia32_pause(); + while ((cpu = CPU_FFS(&other_cpus)) != 0) { + cpu--; + CPU_CLR(cpu, &other_cpus); + p_cpudone = &cpuid_to_pcpu[cpu]->pc_smp_tlb_done; + while (*p_cpudone != generation) + ia32_pause(); + } mtx_unlock_spin(&smp_ipi_mtx); } @@ -1484,7 +1490,7 @@ invltlb_glob(); else invltlb(); - atomic_add_int(&smp_tlb_wait, 1); + PCPU_SET(smp_tlb_done, smp_tlb_generation); } void @@ -1498,7 +1504,7 @@ #endif /* COUNT_IPIS */ invlpg(smp_tlb_addr1); - atomic_add_int(&smp_tlb_wait, 1); + PCPU_SET(smp_tlb_done, smp_tlb_generation); } void @@ -1519,5 +1525,5 @@ addr += PAGE_SIZE; } while (addr < smp_tlb_addr2); - atomic_add_int(&smp_tlb_wait, 1); + PCPU_SET(smp_tlb_done, smp_tlb_generation); }