Page MenuHomeFreeBSD

D25510.id74137.diff
No OneTemporary

D25510.id74137.diff

Index: sys/amd64/amd64/apic_vector.S
===================================================================
--- sys/amd64/amd64/apic_vector.S
+++ sys/amd64/amd64/apic_vector.S
@@ -171,63 +171,13 @@
.text
SUPERALIGN_TEXT
-invltlb_ret:
- call as_lapic_eoi
- jmp ld_regs
-
- SUPERALIGN_TEXT
- INTR_HANDLER invltlb
- call invltlb_handler
- jmp invltlb_ret
-
- INTR_HANDLER invltlb_pcid
- call invltlb_pcid_handler
- jmp invltlb_ret
-
- INTR_HANDLER invltlb_invpcid_nopti
- call invltlb_invpcid_handler
- jmp invltlb_ret
-
- INTR_HANDLER invltlb_invpcid_pti
- call invltlb_invpcid_pti_handler
- jmp invltlb_ret
-
-/*
- * Single page TLB shootdown
- */
- INTR_HANDLER invlpg
- call invlpg_handler
- jmp invltlb_ret
-
- INTR_HANDLER invlpg_invpcid
- call invlpg_invpcid_handler
- jmp invltlb_ret
-
- INTR_HANDLER invlpg_pcid
- call invlpg_pcid_handler
- jmp invltlb_ret
-
-/*
- * Page range TLB shootdown.
- */
- INTR_HANDLER invlrng
- call invlrng_handler
- jmp invltlb_ret
-
- INTR_HANDLER invlrng_invpcid
- call invlrng_invpcid_handler
- jmp invltlb_ret
-
- INTR_HANDLER invlrng_pcid
- call invlrng_pcid_handler
- jmp invltlb_ret
-
/*
- * Invalidate cache.
+ * IPI shootdown handler
*/
- INTR_HANDLER invlcache
- call invlcache_handler
- jmp invltlb_ret
+ INTR_HANDLER invlop
+ call invlop_handler
+ call as_lapic_eoi
+ jmp ld_regs
/*
* Handler for IPIs sent via the per-cpu IPI bitmap.
Index: sys/amd64/amd64/db_interface.c
===================================================================
--- sys/amd64/amd64/db_interface.c
+++ sys/amd64/amd64/db_interface.c
@@ -107,5 +107,4 @@
db_printf("gs32p = %p\n", pc->pc_gs32p);
db_printf("ldt = %p\n", pc->pc_ldt);
db_printf("tss = %p\n", pc->pc_tss);
- db_printf("tlb gen = %u\n", pc->pc_smp_tlb_done);
}
Index: sys/amd64/amd64/mp_machdep.c
===================================================================
--- sys/amd64/amd64/mp_machdep.c
+++ sys/amd64/amd64/mp_machdep.c
@@ -44,6 +44,7 @@
#ifdef GPROF
#include <sys/gmon.h>
#endif
+#include <sys/kdb.h>
#include <sys/kernel.h>
#include <sys/ktr.h>
#include <sys/lock.h>
@@ -202,36 +203,8 @@
cpu_apic_ids[i] = -1;
}
- /* Install an inter-CPU IPI for TLB invalidation */
- if (pmap_pcid_enabled) {
- if (invpcid_works) {
- setidt(IPI_INVLTLB, pti ?
- IDTVEC(invltlb_invpcid_pti_pti) :
- IDTVEC(invltlb_invpcid_nopti), SDT_SYSIGT,
- SEL_KPL, 0);
- setidt(IPI_INVLPG, pti ? IDTVEC(invlpg_invpcid_pti) :
- IDTVEC(invlpg_invpcid), SDT_SYSIGT, SEL_KPL, 0);
- setidt(IPI_INVLRNG, pti ? IDTVEC(invlrng_invpcid_pti) :
- IDTVEC(invlrng_invpcid), SDT_SYSIGT, SEL_KPL, 0);
- } else {
- setidt(IPI_INVLTLB, pti ? IDTVEC(invltlb_pcid_pti) :
- IDTVEC(invltlb_pcid), SDT_SYSIGT, SEL_KPL, 0);
- setidt(IPI_INVLPG, pti ? IDTVEC(invlpg_pcid_pti) :
- IDTVEC(invlpg_pcid), SDT_SYSIGT, SEL_KPL, 0);
- setidt(IPI_INVLRNG, pti ? IDTVEC(invlrng_pcid_pti) :
- IDTVEC(invlrng_pcid), SDT_SYSIGT, SEL_KPL, 0);
- }
- } else {
- setidt(IPI_INVLTLB, pti ? IDTVEC(invltlb_pti) : IDTVEC(invltlb),
- SDT_SYSIGT, SEL_KPL, 0);
- setidt(IPI_INVLPG, pti ? IDTVEC(invlpg_pti) : IDTVEC(invlpg),
- SDT_SYSIGT, SEL_KPL, 0);
- setidt(IPI_INVLRNG, pti ? IDTVEC(invlrng_pti) : IDTVEC(invlrng),
- SDT_SYSIGT, SEL_KPL, 0);
- }
-
- /* Install an inter-CPU IPI for cache invalidation. */
- setidt(IPI_INVLCACHE, pti ? IDTVEC(invlcache_pti) : IDTVEC(invlcache),
+ /* Install an inter-CPU IPI for invalidations. */
+ setidt(IPI_INVLOP, pti ? IDTVEC(invlop_pti) : IDTVEC(invlop),
SDT_SYSIGT, SEL_KPL, 0);
/* Install an inter-CPU IPI for all-CPU rendezvous */
@@ -314,6 +287,8 @@
pc->pc_pcid_next = PMAP_PCID_KERN + 2;
pc->pc_pcid_gen = 1;
+ pc->pc_smp_tlb_gen = 1;
+
/* Init tss */
pc->pc_common_tss = __pcpu[0].pc_common_tss;
pc->pc_common_tss.tss_iobase = sizeof(struct amd64tss) +
@@ -542,11 +517,246 @@
return 0; /* return FAILURE */
}
+/*
+ * Flush the TLB on other CPU's
+ */
+
+/*
+ * Invalidation request. PCPU pc_smp_tlb_op uses u_int instead of the
+ * enum to avoid both namespace and ABI issues (with enums).
+ */
+enum invl_op_codes {
+ INVL_OP_TLB = 1,
+ INVL_OP_TLB_INVPCID = 2,
+ INVL_OP_TLB_INVPCID_PTI = 3,
+ INVL_OP_TLB_PCID = 4,
+ INVL_OP_PGRNG = 5,
+ INVL_OP_PGRNG_INVPCID = 6,
+ INVL_OP_PGRNG_PCID = 7,
+ INVL_OP_PG = 8,
+ INVL_OP_PG_INVPCID = 9,
+ INVL_OP_PG_PCID = 10,
+ INVL_OP_CACHE = 11,
+};
+static enum invl_op_codes invl_op_tlb;
+static enum invl_op_codes invl_op_pgrng;
+static enum invl_op_codes invl_op_pg;
+
+/*
+ * Scoreboard of IPI completion notifications from target to IPI initiator.
+ */
+static uint32_t *invl_scoreboard;
+
+static void
+invl_scoreboard_init(void *arg __unused)
+{
+ u_int i;
+
+ invl_scoreboard = malloc(sizeof(uint32_t) * (mp_maxid + 1) *
+ (mp_maxid + 1), M_DEVBUF, M_WAITOK);
+ for (i = 0; i < (mp_maxid + 1) * (mp_maxid + 1); i++)
+ invl_scoreboard[i] = 1;
+
+ if (pmap_pcid_enabled) {
+ if (invpcid_works) {
+ if (pti)
+ invl_op_tlb = INVL_OP_TLB_INVPCID_PTI;
+ else
+ invl_op_tlb = INVL_OP_TLB_INVPCID;
+ invl_op_pgrng = INVL_OP_PGRNG_INVPCID;
+ invl_op_pg = INVL_OP_PG_INVPCID;
+ } else {
+ invl_op_tlb = INVL_OP_TLB_PCID;
+ invl_op_pgrng = INVL_OP_PGRNG_PCID;
+ invl_op_pg = INVL_OP_PG_PCID;
+ }
+ } else {
+ invl_op_tlb = INVL_OP_TLB;
+ invl_op_pgrng = INVL_OP_PGRNG;
+ invl_op_pg = INVL_OP_PG;
+ }
+}
+SYSINIT(invl_ops, SI_SUB_SMP, SI_ORDER_FIRST, invl_scoreboard_init, NULL);
+
+static uint32_t *
+invl_scoreboard_getcpu(u_int cpu)
+{
+ return (invl_scoreboard + cpu * (mp_maxid + 1));
+}
+
+static uint32_t *
+invl_scoreboard_slot(u_int cpu)
+{
+ return (invl_scoreboard_getcpu(cpu) + PCPU_GET(cpuid));
+}
+
+/*
+ * Used by pmap to request invalidation of TLB or cache on local and
+ * remote processors. Mask provides the set of remote CPUs which are
+ * to be signalled with the IPI specified by vector. The curcpu_cb
+ * callback is invoked on the calling CPU while waiting for remote
+ * CPUs to complete the operation.
+ *
+ * The callback function is called unconditionally on the caller's
+ * underlying processor, even when this processor is not set in the
+ * mask. So, the callback function must be prepared to handle such
+ * spurious invocations.
+ *
+ * Interrupts must be enabled when calling the function with smp
+ * started, to avoid deadlock with other IPIs which are protected with
+ * smp_ipi_mtx at initiator side.
+ */
+ */
+static void
+smp_targeted_tlb_shootdown(cpuset_t mask, enum invl_op_codes op, pmap_t pmap,
+ vm_offset_t addr1, vm_offset_t addr2, smp_invl_cb_t curcpu_cb)
+{
+ cpuset_t other_cpus, mask1;
+ uint32_t generation, *p_cpudone;
+ int cpu;
+
+ /*
+ * It is not necessary to signal other CPUs while booting or
+ * when in the debugger.
+ */
+ if (kdb_active || KERNEL_PANICKED() || !smp_started) {
+ curcpu_cb(pmap, addr1, addr2);
+ return;
+ }
+
+ sched_pin();
+
+ /*
+ * Check for other cpus. Return if none.
+ */
+ if (CPU_ISFULLSET(&mask)) {
+ if (mp_ncpus <= 1)
+ goto nospinexit;
+ } else {
+ CPU_CLR(PCPU_GET(cpuid), &mask);
+ if (CPU_EMPTY(&mask))
+ goto nospinexit;
+ }
+
+#ifdef INVARIANTS
+ if (!(read_rflags() & PSL_I))
+ panic("%s: interrupts disabled", __func__);
+#endif
+ critical_enter();
+ PCPU_SET(smp_tlb_addr1, addr1);
+ PCPU_SET(smp_tlb_addr2, addr2);
+ PCPU_SET(smp_tlb_pmap, pmap);
+ generation = PCPU_GET(smp_tlb_gen);
+ if (++generation == 0)
+ generation = 1;
+ PCPU_SET(smp_tlb_gen, generation);
+ PCPU_SET(smp_tlb_op, op);
+ /* Fence between filling smp_tlb fields and clearing scoreboard. */
+ atomic_thread_fence_rel();
+
+ mask1 = mask;
+ while ((cpu = CPU_FFS(&mask1)) != 0) {
+ cpu--;
+ CPU_CLR(cpu, &mask1);
+ KASSERT(*invl_scoreboard_slot(cpu) != 0,
+ ("IPI scoreboard is zero, initiator %d target %d",
+ PCPU_GET(cpuid), cpu));
+ *invl_scoreboard_slot(cpu) = 0;
+ }
+
+ if (CPU_ISFULLSET(&mask)) {
+ ipi_all_but_self(IPI_INVLTLB);
+ other_cpus = all_cpus;
+ CPU_CLR(PCPU_GET(cpuid), &other_cpus);
+ } else {
+ other_cpus = mask;
+ while ((cpu = CPU_FFS(&mask)) != 0) {
+ cpu--;
+ CPU_CLR(cpu, &mask);
+ CTR3(KTR_SMP, "%s: cpu: %d invl ipi op: %x", __func__,
+ cpu, op);
+ ipi_send_cpu(cpu, IPI_INVLTLB);
+ }
+ }
+ curcpu_cb(pmap, addr1, addr2);
+ while ((cpu = CPU_FFS(&other_cpus)) != 0) {
+ cpu--;
+ CPU_CLR(cpu, &other_cpus);
+ p_cpudone = invl_scoreboard_slot(cpu);
+ while (atomic_load_int(p_cpudone) != generation)
+ ia32_pause();
+ }
+ critical_exit();
+ sched_unpin();
+ return;
+
+nospinexit:
+ curcpu_cb(pmap, addr1, addr2);
+ sched_unpin();
+}
+
void
-invltlb_invpcid_handler(void)
+smp_masked_invltlb(cpuset_t mask, pmap_t pmap, smp_invl_cb_t curcpu_cb)
+{
+ smp_targeted_tlb_shootdown(mask, invl_op_tlb, pmap, 0, 0, curcpu_cb);
+#ifdef COUNT_XINVLTLB_HITS
+ ipi_global++;
+#endif
+}
+
+void
+smp_masked_invlpg(cpuset_t mask, vm_offset_t addr, pmap_t pmap,
+ smp_invl_cb_t curcpu_cb)
+{
+ smp_targeted_tlb_shootdown(mask, invl_op_pg, pmap, addr, 0, curcpu_cb);
+#ifdef COUNT_XINVLTLB_HITS
+ ipi_page++;
+#endif
+}
+
+void
+smp_masked_invlpg_range(cpuset_t mask, vm_offset_t addr1, vm_offset_t addr2,
+ pmap_t pmap, smp_invl_cb_t curcpu_cb)
+{
+ smp_targeted_tlb_shootdown(mask, invl_op_pgrng, pmap, addr1, addr2,
+ curcpu_cb);
+#ifdef COUNT_XINVLTLB_HITS
+ ipi_range++;
+ ipi_range_size += (addr2 - addr1) / PAGE_SIZE;
+#endif
+}
+
+void
+smp_cache_flush(smp_invl_cb_t curcpu_cb)
+{
+
+ smp_targeted_tlb_shootdown(all_cpus, INVL_OP_CACHE, NULL, 0, 0,
+ curcpu_cb);
+}
+
+/*
+ * Handlers for TLB related IPIs
+ */
+static void
+invltlb_handler(pmap_t smp_tlb_pmap)
+{
+#ifdef COUNT_XINVLTLB_HITS
+ xhits_gbl[PCPU_GET(cpuid)]++;
+#endif /* COUNT_XINVLTLB_HITS */
+#ifdef COUNT_IPIS
+ (*ipi_invltlb_counts[PCPU_GET(cpuid)])++;
+#endif /* COUNT_IPIS */
+
+ if (smp_tlb_pmap == kernel_pmap)
+ invltlb_glob();
+ else
+ invltlb();
+}
+
+static void
+invltlb_invpcid_handler(pmap_t smp_tlb_pmap)
{
struct invpcid_descr d;
- uint32_t generation;
#ifdef COUNT_XINVLTLB_HITS
xhits_gbl[PCPU_GET(cpuid)]++;
@@ -555,20 +765,17 @@
(*ipi_invltlb_counts[PCPU_GET(cpuid)])++;
#endif /* COUNT_IPIS */
- generation = smp_tlb_generation;
d.pcid = smp_tlb_pmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid;
d.pad = 0;
d.addr = 0;
invpcid(&d, smp_tlb_pmap == kernel_pmap ? INVPCID_CTXGLOB :
INVPCID_CTX);
- PCPU_SET(smp_tlb_done, generation);
}
-void
-invltlb_invpcid_pti_handler(void)
+static void
+invltlb_invpcid_pti_handler(pmap_t smp_tlb_pmap)
{
struct invpcid_descr d;
- uint32_t generation;
#ifdef COUNT_XINVLTLB_HITS
xhits_gbl[PCPU_GET(cpuid)]++;
@@ -577,7 +784,6 @@
(*ipi_invltlb_counts[PCPU_GET(cpuid)])++;
#endif /* COUNT_IPIS */
- generation = smp_tlb_generation;
d.pcid = smp_tlb_pmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid;
d.pad = 0;
d.addr = 0;
@@ -594,14 +800,13 @@
d.pcid |= PMAP_PCID_USER_PT;
invpcid(&d, INVPCID_CTX);
}
- PCPU_SET(smp_tlb_done, generation);
}
-void
-invltlb_pcid_handler(void)
+static void
+invltlb_pcid_handler(pmap_t smp_tlb_pmap)
{
uint64_t kcr3, ucr3;
- uint32_t generation, pcid;
+ uint32_t pcid;
#ifdef COUNT_XINVLTLB_HITS
xhits_gbl[PCPU_GET(cpuid)]++;
@@ -610,7 +815,6 @@
(*ipi_invltlb_counts[PCPU_GET(cpuid)])++;
#endif /* COUNT_IPIS */
- generation = smp_tlb_generation; /* Overlap with serialization */
if (smp_tlb_pmap == kernel_pmap) {
invltlb_glob();
} else {
@@ -632,14 +836,25 @@
load_cr3(kcr3);
}
}
- PCPU_SET(smp_tlb_done, generation);
}
-void
-invlpg_invpcid_handler(void)
+static void
+invlpg_handler(pmap_t smp_tlb_pmap, vm_offset_t smp_tlb_addr1)
+{
+#ifdef COUNT_XINVLTLB_HITS
+ xhits_pg[PCPU_GET(cpuid)]++;
+#endif /* COUNT_XINVLTLB_HITS */
+#ifdef COUNT_IPIS
+ (*ipi_invlpg_counts[PCPU_GET(cpuid)])++;
+#endif /* COUNT_IPIS */
+
+ invlpg(smp_tlb_addr1);
+}
+
+static void
+invlpg_invpcid_handler(pmap_t smp_tlb_pmap, vm_offset_t smp_tlb_addr1)
{
struct invpcid_descr d;
- uint32_t generation;
#ifdef COUNT_XINVLTLB_HITS
xhits_pg[PCPU_GET(cpuid)]++;
@@ -648,7 +863,6 @@
(*ipi_invlpg_counts[PCPU_GET(cpuid)])++;
#endif /* COUNT_IPIS */
- generation = smp_tlb_generation; /* Overlap with serialization */
invlpg(smp_tlb_addr1);
if (smp_tlb_pmap->pm_ucr3 != PMAP_NO_CR3) {
d.pcid = smp_tlb_pmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid |
@@ -657,14 +871,12 @@
d.addr = smp_tlb_addr1;
invpcid(&d, INVPCID_ADDR);
}
- PCPU_SET(smp_tlb_done, generation);
}
-void
-invlpg_pcid_handler(void)
+static void
+invlpg_pcid_handler(pmap_t smp_tlb_pmap, vm_offset_t smp_tlb_addr1)
{
uint64_t kcr3, ucr3;
- uint32_t generation;
uint32_t pcid;
#ifdef COUNT_XINVLTLB_HITS
@@ -674,7 +886,6 @@
(*ipi_invlpg_counts[PCPU_GET(cpuid)])++;
#endif /* COUNT_IPIS */
- generation = smp_tlb_generation; /* Overlap with serialization */
invlpg(smp_tlb_addr1);
if (smp_tlb_pmap == PCPU_GET(curpmap) &&
(ucr3 = smp_tlb_pmap->pm_ucr3) != PMAP_NO_CR3) {
@@ -683,15 +894,35 @@
ucr3 |= pcid | PMAP_PCID_USER_PT | CR3_PCID_SAVE;
pmap_pti_pcid_invlpg(ucr3, kcr3, smp_tlb_addr1);
}
- PCPU_SET(smp_tlb_done, generation);
}
-void
-invlrng_invpcid_handler(void)
+static void
+invlrng_handler(pmap_t smp_tlb_pmap, vm_offset_t smp_tlb_addr1,
+ vm_offset_t smp_tlb_addr2)
+{
+ vm_offset_t addr, addr2;
+
+#ifdef COUNT_XINVLTLB_HITS
+ xhits_rng[PCPU_GET(cpuid)]++;
+#endif /* COUNT_XINVLTLB_HITS */
+#ifdef COUNT_IPIS
+ (*ipi_invlrng_counts[PCPU_GET(cpuid)])++;
+#endif /* COUNT_IPIS */
+
+ addr = smp_tlb_addr1;
+ addr2 = smp_tlb_addr2;
+ do {
+ invlpg(addr);
+ addr += PAGE_SIZE;
+ } while (addr < addr2);
+}
+
+static void
+invlrng_invpcid_handler(pmap_t smp_tlb_pmap, vm_offset_t smp_tlb_addr1,
+ vm_offset_t smp_tlb_addr2)
{
struct invpcid_descr d;
vm_offset_t addr, addr2;
- uint32_t generation;
#ifdef COUNT_XINVLTLB_HITS
xhits_rng[PCPU_GET(cpuid)]++;
@@ -702,7 +933,6 @@
addr = smp_tlb_addr1;
addr2 = smp_tlb_addr2;
- generation = smp_tlb_generation; /* Overlap with serialization */
do {
invlpg(addr);
addr += PAGE_SIZE;
@@ -717,15 +947,14 @@
d.addr += PAGE_SIZE;
} while (d.addr < addr2);
}
- PCPU_SET(smp_tlb_done, generation);
}
-void
-invlrng_pcid_handler(void)
+static void
+invlrng_pcid_handler(pmap_t smp_tlb_pmap, vm_offset_t smp_tlb_addr1,
+ vm_offset_t smp_tlb_addr2)
{
vm_offset_t addr, addr2;
uint64_t kcr3, ucr3;
- uint32_t generation;
uint32_t pcid;
#ifdef COUNT_XINVLTLB_HITS
@@ -737,7 +966,6 @@
addr = smp_tlb_addr1;
addr2 = smp_tlb_addr2;
- generation = smp_tlb_generation; /* Overlap with serialization */
do {
invlpg(addr);
addr += PAGE_SIZE;
@@ -749,5 +977,108 @@
ucr3 |= pcid | PMAP_PCID_USER_PT | CR3_PCID_SAVE;
pmap_pti_pcid_invlrng(ucr3, kcr3, smp_tlb_addr1, addr2);
}
- PCPU_SET(smp_tlb_done, generation);
+}
+
+static void
+invlcache_handler(void)
+{
+#ifdef COUNT_IPIS
+ (*ipi_invlcache_counts[PCPU_GET(cpuid)])++;
+#endif /* COUNT_IPIS */
+ wbinvd();
+}
+
+static void
+invlop_handler_one_req(enum invl_op_codes smp_tlb_op, pmap_t smp_tlb_pmap,
+ vm_offset_t smp_tlb_addr1, vm_offset_t smp_tlb_addr2)
+{
+ switch (smp_tlb_op) {
+ case INVL_OP_TLB:
+ invltlb_handler(smp_tlb_pmap);
+ break;
+ case INVL_OP_TLB_INVPCID:
+ invltlb_invpcid_handler(smp_tlb_pmap);
+ break;
+ case INVL_OP_TLB_INVPCID_PTI:
+ invltlb_invpcid_pti_handler(smp_tlb_pmap);
+ break;
+ case INVL_OP_TLB_PCID:
+ invltlb_pcid_handler(smp_tlb_pmap);
+ break;
+ case INVL_OP_PGRNG:
+ invlrng_handler(smp_tlb_pmap, smp_tlb_addr1, smp_tlb_addr2);
+ break;
+ case INVL_OP_PGRNG_INVPCID:
+ invlrng_invpcid_handler(smp_tlb_pmap, smp_tlb_addr1,
+ smp_tlb_addr2);
+ break;
+ case INVL_OP_PGRNG_PCID:
+ invlrng_pcid_handler(smp_tlb_pmap, smp_tlb_addr1,
+ smp_tlb_addr2);
+ break;
+ case INVL_OP_PG:
+ invlpg_handler(smp_tlb_pmap, smp_tlb_addr1);
+ break;
+ case INVL_OP_PG_INVPCID:
+ invlpg_invpcid_handler(smp_tlb_pmap, smp_tlb_addr1);
+ break;
+ case INVL_OP_PG_PCID:
+ invlpg_pcid_handler(smp_tlb_pmap, smp_tlb_addr1);
+ break;
+ case INVL_OP_CACHE:
+ invlcache_handler();
+ break;
+ default:
+ __assert_unreachable();
+ break;
+ }
+}
+
+void
+invlop_handler(void)
+{
+ struct pcpu *initiator_pc;
+ pmap_t smp_tlb_pmap;
+ vm_offset_t smp_tlb_addr1, smp_tlb_addr2;
+ u_int initiator_cpu_id;
+ enum invl_op_codes smp_tlb_op;
+ uint32_t *scoreboard, smp_tlb_gen;
+
+ scoreboard = invl_scoreboard_getcpu(PCPU_GET(cpuid));
+ for (;;) {
+ for (initiator_cpu_id = 0; initiator_cpu_id <= mp_maxid;
+ initiator_cpu_id++) {
+ if (scoreboard[initiator_cpu_id] == 0)
+ break;
+ }
+ if (initiator_cpu_id > mp_maxid)
+ break;
+ initiator_pc = cpuid_to_pcpu[initiator_cpu_id];
+
+ /*
+ * Dual acquire fence, for release fence in
+ * smp_targeted_tlb_shootdown(), between reading zero
+ * scoreboard slot and accessing PCPU of initiator for
+ * smp_tlb_XXX values.
+ */
+ atomic_thread_fence_acq();
+ smp_tlb_pmap = initiator_pc->pc_smp_tlb_pmap;
+ smp_tlb_addr1 = initiator_pc->pc_smp_tlb_addr1;
+ smp_tlb_addr2 = initiator_pc->pc_smp_tlb_addr2;
+ smp_tlb_op = initiator_pc->pc_smp_tlb_op;
+ smp_tlb_gen = initiator_pc->pc_smp_tlb_gen;
+
+ /*
+ * Ensure that we do not make our scoreboard
+ * notification visible to the initiator until the
+ * pc_smp_tlb values are read. The dual fence is
+ * implicitly provided by the barrier in IPI send
+ * operation before APIC ICR register write.
+ */
+ atomic_thread_fence_acq();
+ atomic_store_int(&scoreboard[initiator_cpu_id], smp_tlb_gen);
+
+ invlop_handler_one_req(smp_tlb_op, smp_tlb_pmap, smp_tlb_addr1,
+ smp_tlb_addr2);
+ }
}
Index: sys/amd64/include/pcpu.h
===================================================================
--- sys/amd64/include/pcpu.h
+++ sys/amd64/include/pcpu.h
@@ -85,7 +85,7 @@
u_int pc_vcpu_id; /* Xen vCPU ID */ \
uint32_t pc_pcid_next; \
uint32_t pc_pcid_gen; \
- uint32_t pc_smp_tlb_done; /* TLB op acknowledgement */ \
+ uint32_t pc_unused; \
uint32_t pc_ibpb_set; \
void *pc_mds_buf; \
void *pc_mds_buf64; \
@@ -94,7 +94,12 @@
u_int pc_ipi_bitmap; \
struct amd64tss pc_common_tss; \
struct user_segment_descriptor pc_gdt[NGDT]; \
- char __pad[2956] /* pad to UMA_PCPU_ALLOC_SIZE */
+ void *pc_smp_tlb_pmap; \
+ uint64_t pc_smp_tlb_addr1; \
+ uint64_t pc_smp_tlb_addr2; \
+ uint32_t pc_smp_tlb_gen; \
+ u_int pc_smp_tlb_op; \
+ char __pad[2924] /* pad to UMA_PCPU_ALLOC_SIZE */
#define PC_DBREG_CMD_NONE 0
#define PC_DBREG_CMD_LOAD 1
Index: sys/amd64/include/smp.h
===================================================================
--- sys/amd64/include/smp.h
+++ sys/amd64/include/smp.h
@@ -29,34 +29,14 @@
inthand_t
IDTVEC(justreturn), /* interrupt CPU with minimum overhead */
IDTVEC(justreturn1_pti),
- IDTVEC(invltlb_pti),
- IDTVEC(invltlb_pcid_pti),
- IDTVEC(invltlb_pcid), /* TLB shootdowns - global, pcid */
- IDTVEC(invltlb_invpcid_pti_pti),
- IDTVEC(invltlb_invpcid_nopti),
- IDTVEC(invlpg_pti),
- IDTVEC(invlpg_invpcid_pti),
- IDTVEC(invlpg_invpcid),
- IDTVEC(invlpg_pcid_pti),
- IDTVEC(invlpg_pcid),
- IDTVEC(invlrng_pti),
- IDTVEC(invlrng_invpcid_pti),
- IDTVEC(invlrng_invpcid),
- IDTVEC(invlrng_pcid_pti),
- IDTVEC(invlrng_pcid),
- IDTVEC(invlcache_pti),
+ IDTVEC(invlop_pti),
+ IDTVEC(invlop),
IDTVEC(ipi_intr_bitmap_handler_pti),
IDTVEC(cpustop_pti),
IDTVEC(cpususpend_pti),
IDTVEC(rendezvous_pti);
-void invltlb_pcid_handler(void);
-void invltlb_invpcid_handler(void);
-void invltlb_invpcid_pti_handler(void);
-void invlpg_invpcid_handler(void);
-void invlpg_pcid_handler(void);
-void invlrng_invpcid_handler(void);
-void invlrng_pcid_handler(void);
+void invlop_handler(void);
int native_start_all_aps(void);
void mp_bootaddress(vm_paddr_t *, unsigned int *);
Index: sys/i386/i386/mp_machdep.c
===================================================================
--- sys/i386/i386/mp_machdep.c
+++ sys/i386/i386/mp_machdep.c
@@ -467,3 +467,233 @@
}
return 0; /* return FAILURE */
}
+
+/*
+ * Flush the TLB on other CPU's
+ */
+
+/* Variables needed for SMP tlb shootdown. */
+vm_offset_t smp_tlb_addr1, smp_tlb_addr2;
+pmap_t smp_tlb_pmap;
+volatile uint32_t smp_tlb_generation;
+
+/*
+ * Used by pmap to request invalidation of TLB or cache on local and
+ * remote processors. Mask provides the set of remote CPUs which are
+ * to be signalled with the IPI specified by vector. The curcpu_cb
+ * callback is invoked on the calling CPU while waiting for remote
+ * CPUs to complete the operation.
+ *
+ * The callback function is called unconditionally on the caller's
+ * underlying processor, even when this processor is not set in the
+ * mask. So, the callback function must be prepared to handle such
+ * spurious invocations.
+ */
+static void
+smp_targeted_tlb_shootdown(cpuset_t mask, u_int vector, pmap_t pmap,
+ vm_offset_t addr1, vm_offset_t addr2, smp_invl_cb_t curcpu_cb)
+{
+ cpuset_t other_cpus;
+ volatile uint32_t *p_cpudone;
+ uint32_t generation;
+ int cpu;
+
+ /*
+ * It is not necessary to signal other CPUs while booting or
+ * when in the debugger.
+ */
+ if (kdb_active || KERNEL_PANICKED() || !smp_started) {
+ curcpu_cb(pmap, addr1, addr2);
+ return;
+ }
+
+ sched_pin();
+
+ /*
+ * Check for other cpus. Return if none.
+ */
+ if (CPU_ISFULLSET(&mask)) {
+ if (mp_ncpus <= 1)
+ goto nospinexit;
+ } else {
+ CPU_CLR(PCPU_GET(cpuid), &mask);
+ if (CPU_EMPTY(&mask))
+ goto nospinexit;
+ }
+
+ if (!(read_eflags() & PSL_I))
+ panic("%s: interrupts disabled", __func__);
+ mtx_lock_spin(&smp_ipi_mtx);
+ smp_tlb_addr1 = addr1;
+ smp_tlb_addr2 = addr2;
+ smp_tlb_pmap = pmap;
+ generation = ++smp_tlb_generation;
+ if (CPU_ISFULLSET(&mask)) {
+ ipi_all_but_self(vector);
+ other_cpus = all_cpus;
+ CPU_CLR(PCPU_GET(cpuid), &other_cpus);
+ } else {
+ other_cpus = mask;
+ while ((cpu = CPU_FFS(&mask)) != 0) {
+ cpu--;
+ CPU_CLR(cpu, &mask);
+ CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__,
+ cpu, vector);
+ ipi_send_cpu(cpu, vector);
+ }
+ }
+ curcpu_cb(pmap, addr1, addr2);
+ while ((cpu = CPU_FFS(&other_cpus)) != 0) {
+ cpu--;
+ CPU_CLR(cpu, &other_cpus);
+ p_cpudone = &cpuid_to_pcpu[cpu]->pc_smp_tlb_done;
+ while (*p_cpudone != generation)
+ ia32_pause();
+ }
+ mtx_unlock_spin(&smp_ipi_mtx);
+ sched_unpin();
+ return;
+
+nospinexit:
+ curcpu_cb(pmap, addr1, addr2);
+ sched_unpin();
+}
+
+void
+smp_masked_invltlb(cpuset_t mask, pmap_t pmap, smp_invl_cb_t curcpu_cb)
+{
+
+ smp_targeted_tlb_shootdown(mask, IPI_INVLTLB, pmap, 0, 0, curcpu_cb);
+#ifdef COUNT_XINVLTLB_HITS
+ ipi_global++;
+#endif
+}
+
+void
+smp_masked_invlpg(cpuset_t mask, vm_offset_t addr, pmap_t pmap,
+ smp_invl_cb_t curcpu_cb)
+{
+
+ smp_targeted_tlb_shootdown(mask, IPI_INVLPG, pmap, addr, 0, curcpu_cb);
+#ifdef COUNT_XINVLTLB_HITS
+ ipi_page++;
+#endif
+}
+
+void
+smp_masked_invlpg_range(cpuset_t mask, vm_offset_t addr1, vm_offset_t addr2,
+ pmap_t pmap, smp_invl_cb_t curcpu_cb)
+{
+
+ smp_targeted_tlb_shootdown(mask, IPI_INVLRNG, pmap, addr1, addr2,
+ curcpu_cb);
+#ifdef COUNT_XINVLTLB_HITS
+ ipi_range++;
+ ipi_range_size += (addr2 - addr1) / PAGE_SIZE;
+#endif
+}
+
+void
+smp_cache_flush(smp_invl_cb_t curcpu_cb)
+{
+
+ smp_targeted_tlb_shootdown(all_cpus, IPI_INVLCACHE, NULL, 0, 0,
+ curcpu_cb);
+}
+
+/*
+ * Handlers for TLB related IPIs
+ */
+void
+invltlb_handler(void)
+{
+ uint32_t generation;
+
+#ifdef COUNT_XINVLTLB_HITS
+ xhits_gbl[PCPU_GET(cpuid)]++;
+#endif /* COUNT_XINVLTLB_HITS */
+#ifdef COUNT_IPIS
+ (*ipi_invltlb_counts[PCPU_GET(cpuid)])++;
+#endif /* COUNT_IPIS */
+
+ /*
+ * Reading the generation here allows greater parallelism
+ * since invalidating the TLB is a serializing operation.
+ */
+ generation = smp_tlb_generation;
+ if (smp_tlb_pmap == kernel_pmap)
+ invltlb_glob();
+#ifdef __amd64__
+ else
+ invltlb();
+#endif
+ PCPU_SET(smp_tlb_done, generation);
+}
+
+void
+invlpg_handler(void)
+{
+ uint32_t generation;
+
+#ifdef COUNT_XINVLTLB_HITS
+ xhits_pg[PCPU_GET(cpuid)]++;
+#endif /* COUNT_XINVLTLB_HITS */
+#ifdef COUNT_IPIS
+ (*ipi_invlpg_counts[PCPU_GET(cpuid)])++;
+#endif /* COUNT_IPIS */
+
+ generation = smp_tlb_generation; /* Overlap with serialization */
+#ifdef __i386__
+ if (smp_tlb_pmap == kernel_pmap)
+#endif
+ invlpg(smp_tlb_addr1);
+ PCPU_SET(smp_tlb_done, generation);
+}
+
+void
+invlrng_handler(void)
+{
+ vm_offset_t addr, addr2;
+ uint32_t generation;
+
+#ifdef COUNT_XINVLTLB_HITS
+ xhits_rng[PCPU_GET(cpuid)]++;
+#endif /* COUNT_XINVLTLB_HITS */
+#ifdef COUNT_IPIS
+ (*ipi_invlrng_counts[PCPU_GET(cpuid)])++;
+#endif /* COUNT_IPIS */
+
+ addr = smp_tlb_addr1;
+ addr2 = smp_tlb_addr2;
+ generation = smp_tlb_generation; /* Overlap with serialization */
+#ifdef __i386__
+ if (smp_tlb_pmap == kernel_pmap)
+#endif
+ do {
+ invlpg(addr);
+ addr += PAGE_SIZE;
+ } while (addr < addr2);
+
+ PCPU_SET(smp_tlb_done, generation);
+}
+
+void
+invlcache_handler(void)
+{
+ uint32_t generation;
+
+#ifdef COUNT_IPIS
+ (*ipi_invlcache_counts[PCPU_GET(cpuid)])++;
+#endif /* COUNT_IPIS */
+
+ /*
+ * Reading the generation here allows greater parallelism
+ * since wbinvd is a serializing instruction. Without the
+ * temporary, we'd wait for wbinvd to complete, then the read
+ * would execute, then the dependent write, which must then
+ * complete before return from interrupt.
+ */
+ generation = smp_tlb_generation;
+ wbinvd();
+ PCPU_SET(smp_tlb_done, generation);
+}
Index: sys/i386/include/smp.h
===================================================================
--- sys/i386/include/smp.h
+++ sys/i386/include/smp.h
@@ -27,9 +27,20 @@
#include <x86/apicvar.h>
#include <machine/pcb.h>
+inthand_t
+ IDTVEC(invltlb), /* TLB shootdowns - global */
+ IDTVEC(invlpg), /* TLB shootdowns - 1 page */
+ IDTVEC(invlrng), /* TLB shootdowns - page range */
+ IDTVEC(invlcache); /* Write back and invalidate cache */
+
/* functions in mpboot.s */
void bootMP(void);
+void invltlb_handler(void);
+void invlpg_handler(void);
+void invlrng_handler(void);
+void invlcache_handler(void);
+
#endif /* !LOCORE */
#endif /* SMP */
Index: sys/x86/include/apicvar.h
===================================================================
--- sys/x86/include/apicvar.h
+++ sys/x86/include/apicvar.h
@@ -112,7 +112,8 @@
#define APIC_IPI_INTS (APIC_LOCAL_INTS + 3)
#define IPI_RENDEZVOUS (APIC_IPI_INTS) /* Inter-CPU rendezvous. */
-#define IPI_INVLTLB (APIC_IPI_INTS + 1) /* TLB Shootdown IPIs */
+#define IPI_INVLOP (APIC_IPI_INTS + 1) /* TLB Shootdown IPIs, amd64 */
+#define IPI_INVLTLB (APIC_IPI_INTS + 1) /* TLB Shootdown IPIs, i386 */
#define IPI_INVLPG (APIC_IPI_INTS + 2)
#define IPI_INVLRNG (APIC_IPI_INTS + 3)
#define IPI_INVLCACHE (APIC_IPI_INTS + 4)
Index: sys/x86/include/x86_smp.h
===================================================================
--- sys/x86/include/x86_smp.h
+++ sys/x86/include/x86_smp.h
@@ -75,10 +75,6 @@
/* IPI handlers */
inthand_t
- IDTVEC(invltlb), /* TLB shootdowns - global */
- IDTVEC(invlpg), /* TLB shootdowns - 1 page */
- IDTVEC(invlrng), /* TLB shootdowns - page range */
- IDTVEC(invlcache), /* Write back and invalidate cache */
IDTVEC(ipi_intr_bitmap_handler), /* Bitmap based IPIs */
IDTVEC(cpustop), /* CPU stops & waits to be restarted */
IDTVEC(cpususpend), /* CPU suspends & waits to be resumed */
@@ -94,10 +90,6 @@
void cpususpend_handler(void);
void alloc_ap_trampoline(vm_paddr_t *physmap, unsigned int *physmap_idx);
void init_secondary_tail(void);
-void invltlb_handler(void);
-void invlpg_handler(void);
-void invlrng_handler(void);
-void invlcache_handler(void);
void init_secondary(void);
void ipi_startup(int apic_id, int vector);
void ipi_all_but_self(u_int ipi);
Index: sys/x86/x86/mp_x86.c
===================================================================
--- sys/x86/x86/mp_x86.c
+++ sys/x86/x86/mp_x86.c
@@ -1593,28 +1593,6 @@
CPU_CLR_ATOMIC(cpu, &toresume_cpus);
}
-
-void
-invlcache_handler(void)
-{
- uint32_t generation;
-
-#ifdef COUNT_IPIS
- (*ipi_invlcache_counts[PCPU_GET(cpuid)])++;
-#endif /* COUNT_IPIS */
-
- /*
- * Reading the generation here allows greater parallelism
- * since wbinvd is a serializing instruction. Without the
- * temporary, we'd wait for wbinvd to complete, then the read
- * would execute, then the dependent write, which must then
- * complete before return from interrupt.
- */
- generation = smp_tlb_generation;
- wbinvd();
- PCPU_SET(smp_tlb_done, generation);
-}
-
/*
* This is called once the rest of the system is up and running and we're
* ready to let the AP's out of the pen.
@@ -1662,216 +1640,3 @@
}
SYSINIT(mp_ipi_intrcnt, SI_SUB_INTR, SI_ORDER_MIDDLE, mp_ipi_intrcnt, NULL);
#endif
-
-/*
- * Flush the TLB on other CPU's
- */
-
-/* Variables needed for SMP tlb shootdown. */
-vm_offset_t smp_tlb_addr1, smp_tlb_addr2;
-pmap_t smp_tlb_pmap;
-volatile uint32_t smp_tlb_generation;
-
-#ifdef __amd64__
-#define read_eflags() read_rflags()
-#endif
-
-/*
- * Used by pmap to request invalidation of TLB or cache on local and
- * remote processors. Mask provides the set of remote CPUs which are
- * to be signalled with the IPI specified by vector. The curcpu_cb
- * callback is invoked on the calling CPU while waiting for remote
- * CPUs to complete the operation.
- *
- * The callback function is called unconditionally on the caller's
- * underlying processor, even when this processor is not set in the
- * mask. So, the callback function must be prepared to handle such
- * spurious invocations.
- */
-static void
-smp_targeted_tlb_shootdown(cpuset_t mask, u_int vector, pmap_t pmap,
- vm_offset_t addr1, vm_offset_t addr2, smp_invl_cb_t curcpu_cb)
-{
- cpuset_t other_cpus;
- volatile uint32_t *p_cpudone;
- uint32_t generation;
- int cpu;
-
- /*
- * It is not necessary to signal other CPUs while booting or
- * when in the debugger.
- */
- if (kdb_active || KERNEL_PANICKED() || !smp_started) {
- curcpu_cb(pmap, addr1, addr2);
- return;
- }
-
- sched_pin();
-
- /*
- * Check for other cpus. Return if none.
- */
- if (CPU_ISFULLSET(&mask)) {
- if (mp_ncpus <= 1)
- goto nospinexit;
- } else {
- CPU_CLR(PCPU_GET(cpuid), &mask);
- if (CPU_EMPTY(&mask))
- goto nospinexit;
- }
-
- if (!(read_eflags() & PSL_I))
- panic("%s: interrupts disabled", __func__);
- mtx_lock_spin(&smp_ipi_mtx);
- smp_tlb_addr1 = addr1;
- smp_tlb_addr2 = addr2;
- smp_tlb_pmap = pmap;
- generation = ++smp_tlb_generation;
- if (CPU_ISFULLSET(&mask)) {
- ipi_all_but_self(vector);
- other_cpus = all_cpus;
- CPU_CLR(PCPU_GET(cpuid), &other_cpus);
- } else {
- other_cpus = mask;
- while ((cpu = CPU_FFS(&mask)) != 0) {
- cpu--;
- CPU_CLR(cpu, &mask);
- CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__,
- cpu, vector);
- ipi_send_cpu(cpu, vector);
- }
- }
- curcpu_cb(pmap, addr1, addr2);
- while ((cpu = CPU_FFS(&other_cpus)) != 0) {
- cpu--;
- CPU_CLR(cpu, &other_cpus);
- p_cpudone = &cpuid_to_pcpu[cpu]->pc_smp_tlb_done;
- while (*p_cpudone != generation)
- ia32_pause();
- }
- mtx_unlock_spin(&smp_ipi_mtx);
- sched_unpin();
- return;
-
-nospinexit:
- curcpu_cb(pmap, addr1, addr2);
- sched_unpin();
-}
-
-void
-smp_masked_invltlb(cpuset_t mask, pmap_t pmap, smp_invl_cb_t curcpu_cb)
-{
-
- smp_targeted_tlb_shootdown(mask, IPI_INVLTLB, pmap, 0, 0, curcpu_cb);
-#ifdef COUNT_XINVLTLB_HITS
- ipi_global++;
-#endif
-}
-
-void
-smp_masked_invlpg(cpuset_t mask, vm_offset_t addr, pmap_t pmap,
- smp_invl_cb_t curcpu_cb)
-{
-
- smp_targeted_tlb_shootdown(mask, IPI_INVLPG, pmap, addr, 0, curcpu_cb);
-#ifdef COUNT_XINVLTLB_HITS
- ipi_page++;
-#endif
-}
-
-void
-smp_masked_invlpg_range(cpuset_t mask, vm_offset_t addr1, vm_offset_t addr2,
- pmap_t pmap, smp_invl_cb_t curcpu_cb)
-{
-
- smp_targeted_tlb_shootdown(mask, IPI_INVLRNG, pmap, addr1, addr2,
- curcpu_cb);
-#ifdef COUNT_XINVLTLB_HITS
- ipi_range++;
- ipi_range_size += (addr2 - addr1) / PAGE_SIZE;
-#endif
-}
-
-void
-smp_cache_flush(smp_invl_cb_t curcpu_cb)
-{
-
- smp_targeted_tlb_shootdown(all_cpus, IPI_INVLCACHE, NULL, 0, 0,
- curcpu_cb);
-}
-
-/*
- * Handlers for TLB related IPIs
- */
-void
-invltlb_handler(void)
-{
- uint32_t generation;
-
-#ifdef COUNT_XINVLTLB_HITS
- xhits_gbl[PCPU_GET(cpuid)]++;
-#endif /* COUNT_XINVLTLB_HITS */
-#ifdef COUNT_IPIS
- (*ipi_invltlb_counts[PCPU_GET(cpuid)])++;
-#endif /* COUNT_IPIS */
-
- /*
- * Reading the generation here allows greater parallelism
- * since invalidating the TLB is a serializing operation.
- */
- generation = smp_tlb_generation;
- if (smp_tlb_pmap == kernel_pmap)
- invltlb_glob();
-#ifdef __amd64__
- else
- invltlb();
-#endif
- PCPU_SET(smp_tlb_done, generation);
-}
-
-void
-invlpg_handler(void)
-{
- uint32_t generation;
-
-#ifdef COUNT_XINVLTLB_HITS
- xhits_pg[PCPU_GET(cpuid)]++;
-#endif /* COUNT_XINVLTLB_HITS */
-#ifdef COUNT_IPIS
- (*ipi_invlpg_counts[PCPU_GET(cpuid)])++;
-#endif /* COUNT_IPIS */
-
- generation = smp_tlb_generation; /* Overlap with serialization */
-#ifdef __i386__
- if (smp_tlb_pmap == kernel_pmap)
-#endif
- invlpg(smp_tlb_addr1);
- PCPU_SET(smp_tlb_done, generation);
-}
-
-void
-invlrng_handler(void)
-{
- vm_offset_t addr, addr2;
- uint32_t generation;
-
-#ifdef COUNT_XINVLTLB_HITS
- xhits_rng[PCPU_GET(cpuid)]++;
-#endif /* COUNT_XINVLTLB_HITS */
-#ifdef COUNT_IPIS
- (*ipi_invlrng_counts[PCPU_GET(cpuid)])++;
-#endif /* COUNT_IPIS */
-
- addr = smp_tlb_addr1;
- addr2 = smp_tlb_addr2;
- generation = smp_tlb_generation; /* Overlap with serialization */
-#ifdef __i386__
- if (smp_tlb_pmap == kernel_pmap)
-#endif
- do {
- invlpg(addr);
- addr += PAGE_SIZE;
- } while (addr < addr2);
-
- PCPU_SET(smp_tlb_done, generation);
-}

File Metadata

Mime Type
text/plain
Expires
Thu, Oct 16, 4:51 PM (12 h, 53 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
23798000
Default Alt Text
D25510.id74137.diff (33 KB)

Event Timeline