Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F132301910
D25510.id74137.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
33 KB
Referenced Files
None
Subscribers
None
D25510.id74137.diff
View Options
Index: sys/amd64/amd64/apic_vector.S
===================================================================
--- sys/amd64/amd64/apic_vector.S
+++ sys/amd64/amd64/apic_vector.S
@@ -171,63 +171,13 @@
.text
SUPERALIGN_TEXT
-invltlb_ret:
- call as_lapic_eoi
- jmp ld_regs
-
- SUPERALIGN_TEXT
- INTR_HANDLER invltlb
- call invltlb_handler
- jmp invltlb_ret
-
- INTR_HANDLER invltlb_pcid
- call invltlb_pcid_handler
- jmp invltlb_ret
-
- INTR_HANDLER invltlb_invpcid_nopti
- call invltlb_invpcid_handler
- jmp invltlb_ret
-
- INTR_HANDLER invltlb_invpcid_pti
- call invltlb_invpcid_pti_handler
- jmp invltlb_ret
-
-/*
- * Single page TLB shootdown
- */
- INTR_HANDLER invlpg
- call invlpg_handler
- jmp invltlb_ret
-
- INTR_HANDLER invlpg_invpcid
- call invlpg_invpcid_handler
- jmp invltlb_ret
-
- INTR_HANDLER invlpg_pcid
- call invlpg_pcid_handler
- jmp invltlb_ret
-
-/*
- * Page range TLB shootdown.
- */
- INTR_HANDLER invlrng
- call invlrng_handler
- jmp invltlb_ret
-
- INTR_HANDLER invlrng_invpcid
- call invlrng_invpcid_handler
- jmp invltlb_ret
-
- INTR_HANDLER invlrng_pcid
- call invlrng_pcid_handler
- jmp invltlb_ret
-
/*
- * Invalidate cache.
+ * IPI shootdown handler
*/
- INTR_HANDLER invlcache
- call invlcache_handler
- jmp invltlb_ret
+ INTR_HANDLER invlop
+ call invlop_handler
+ call as_lapic_eoi
+ jmp ld_regs
/*
* Handler for IPIs sent via the per-cpu IPI bitmap.
Index: sys/amd64/amd64/db_interface.c
===================================================================
--- sys/amd64/amd64/db_interface.c
+++ sys/amd64/amd64/db_interface.c
@@ -107,5 +107,4 @@
db_printf("gs32p = %p\n", pc->pc_gs32p);
db_printf("ldt = %p\n", pc->pc_ldt);
db_printf("tss = %p\n", pc->pc_tss);
- db_printf("tlb gen = %u\n", pc->pc_smp_tlb_done);
}
Index: sys/amd64/amd64/mp_machdep.c
===================================================================
--- sys/amd64/amd64/mp_machdep.c
+++ sys/amd64/amd64/mp_machdep.c
@@ -44,6 +44,7 @@
#ifdef GPROF
#include <sys/gmon.h>
#endif
+#include <sys/kdb.h>
#include <sys/kernel.h>
#include <sys/ktr.h>
#include <sys/lock.h>
@@ -202,36 +203,8 @@
cpu_apic_ids[i] = -1;
}
- /* Install an inter-CPU IPI for TLB invalidation */
- if (pmap_pcid_enabled) {
- if (invpcid_works) {
- setidt(IPI_INVLTLB, pti ?
- IDTVEC(invltlb_invpcid_pti_pti) :
- IDTVEC(invltlb_invpcid_nopti), SDT_SYSIGT,
- SEL_KPL, 0);
- setidt(IPI_INVLPG, pti ? IDTVEC(invlpg_invpcid_pti) :
- IDTVEC(invlpg_invpcid), SDT_SYSIGT, SEL_KPL, 0);
- setidt(IPI_INVLRNG, pti ? IDTVEC(invlrng_invpcid_pti) :
- IDTVEC(invlrng_invpcid), SDT_SYSIGT, SEL_KPL, 0);
- } else {
- setidt(IPI_INVLTLB, pti ? IDTVEC(invltlb_pcid_pti) :
- IDTVEC(invltlb_pcid), SDT_SYSIGT, SEL_KPL, 0);
- setidt(IPI_INVLPG, pti ? IDTVEC(invlpg_pcid_pti) :
- IDTVEC(invlpg_pcid), SDT_SYSIGT, SEL_KPL, 0);
- setidt(IPI_INVLRNG, pti ? IDTVEC(invlrng_pcid_pti) :
- IDTVEC(invlrng_pcid), SDT_SYSIGT, SEL_KPL, 0);
- }
- } else {
- setidt(IPI_INVLTLB, pti ? IDTVEC(invltlb_pti) : IDTVEC(invltlb),
- SDT_SYSIGT, SEL_KPL, 0);
- setidt(IPI_INVLPG, pti ? IDTVEC(invlpg_pti) : IDTVEC(invlpg),
- SDT_SYSIGT, SEL_KPL, 0);
- setidt(IPI_INVLRNG, pti ? IDTVEC(invlrng_pti) : IDTVEC(invlrng),
- SDT_SYSIGT, SEL_KPL, 0);
- }
-
- /* Install an inter-CPU IPI for cache invalidation. */
- setidt(IPI_INVLCACHE, pti ? IDTVEC(invlcache_pti) : IDTVEC(invlcache),
+ /* Install an inter-CPU IPI for invalidations. */
+ setidt(IPI_INVLOP, pti ? IDTVEC(invlop_pti) : IDTVEC(invlop),
SDT_SYSIGT, SEL_KPL, 0);
/* Install an inter-CPU IPI for all-CPU rendezvous */
@@ -314,6 +287,8 @@
pc->pc_pcid_next = PMAP_PCID_KERN + 2;
pc->pc_pcid_gen = 1;
+ pc->pc_smp_tlb_gen = 1;
+
/* Init tss */
pc->pc_common_tss = __pcpu[0].pc_common_tss;
pc->pc_common_tss.tss_iobase = sizeof(struct amd64tss) +
@@ -542,11 +517,246 @@
return 0; /* return FAILURE */
}
+/*
+ * Flush the TLB on other CPU's
+ */
+
+/*
+ * Invalidation request. PCPU pc_smp_tlb_op uses u_int instead of the
+ * enum to avoid both namespace and ABI issues (with enums).
+ */
+enum invl_op_codes {
+ INVL_OP_TLB = 1,
+ INVL_OP_TLB_INVPCID = 2,
+ INVL_OP_TLB_INVPCID_PTI = 3,
+ INVL_OP_TLB_PCID = 4,
+ INVL_OP_PGRNG = 5,
+ INVL_OP_PGRNG_INVPCID = 6,
+ INVL_OP_PGRNG_PCID = 7,
+ INVL_OP_PG = 8,
+ INVL_OP_PG_INVPCID = 9,
+ INVL_OP_PG_PCID = 10,
+ INVL_OP_CACHE = 11,
+};
+static enum invl_op_codes invl_op_tlb;
+static enum invl_op_codes invl_op_pgrng;
+static enum invl_op_codes invl_op_pg;
+
+/*
+ * Scoreboard of IPI completion notifications from target to IPI initiator.
+ */
+static uint32_t *invl_scoreboard;
+
+static void
+invl_scoreboard_init(void *arg __unused)
+{
+ u_int i;
+
+ invl_scoreboard = malloc(sizeof(uint32_t) * (mp_maxid + 1) *
+ (mp_maxid + 1), M_DEVBUF, M_WAITOK);
+ for (i = 0; i < (mp_maxid + 1) * (mp_maxid + 1); i++)
+ invl_scoreboard[i] = 1;
+
+ if (pmap_pcid_enabled) {
+ if (invpcid_works) {
+ if (pti)
+ invl_op_tlb = INVL_OP_TLB_INVPCID_PTI;
+ else
+ invl_op_tlb = INVL_OP_TLB_INVPCID;
+ invl_op_pgrng = INVL_OP_PGRNG_INVPCID;
+ invl_op_pg = INVL_OP_PG_INVPCID;
+ } else {
+ invl_op_tlb = INVL_OP_TLB_PCID;
+ invl_op_pgrng = INVL_OP_PGRNG_PCID;
+ invl_op_pg = INVL_OP_PG_PCID;
+ }
+ } else {
+ invl_op_tlb = INVL_OP_TLB;
+ invl_op_pgrng = INVL_OP_PGRNG;
+ invl_op_pg = INVL_OP_PG;
+ }
+}
+SYSINIT(invl_ops, SI_SUB_SMP, SI_ORDER_FIRST, invl_scoreboard_init, NULL);
+
+static uint32_t *
+invl_scoreboard_getcpu(u_int cpu)
+{
+ return (invl_scoreboard + cpu * (mp_maxid + 1));
+}
+
+static uint32_t *
+invl_scoreboard_slot(u_int cpu)
+{
+ return (invl_scoreboard_getcpu(cpu) + PCPU_GET(cpuid));
+}
+
+/*
+ * Used by pmap to request invalidation of TLB or cache on local and
+ * remote processors. Mask provides the set of remote CPUs which are
+ * to be signalled with the IPI specified by vector. The curcpu_cb
+ * callback is invoked on the calling CPU while waiting for remote
+ * CPUs to complete the operation.
+ *
+ * The callback function is called unconditionally on the caller's
+ * underlying processor, even when this processor is not set in the
+ * mask. So, the callback function must be prepared to handle such
+ * spurious invocations.
+ *
+ * Interrupts must be enabled when calling the function with smp
+ * started, to avoid deadlock with other IPIs which are protected with
+ * smp_ipi_mtx at initiator side.
+ */
+ */
+static void
+smp_targeted_tlb_shootdown(cpuset_t mask, enum invl_op_codes op, pmap_t pmap,
+ vm_offset_t addr1, vm_offset_t addr2, smp_invl_cb_t curcpu_cb)
+{
+ cpuset_t other_cpus, mask1;
+ uint32_t generation, *p_cpudone;
+ int cpu;
+
+ /*
+ * It is not necessary to signal other CPUs while booting or
+ * when in the debugger.
+ */
+ if (kdb_active || KERNEL_PANICKED() || !smp_started) {
+ curcpu_cb(pmap, addr1, addr2);
+ return;
+ }
+
+ sched_pin();
+
+ /*
+ * Check for other cpus. Return if none.
+ */
+ if (CPU_ISFULLSET(&mask)) {
+ if (mp_ncpus <= 1)
+ goto nospinexit;
+ } else {
+ CPU_CLR(PCPU_GET(cpuid), &mask);
+ if (CPU_EMPTY(&mask))
+ goto nospinexit;
+ }
+
+#ifdef INVARIANTS
+ if (!(read_rflags() & PSL_I))
+ panic("%s: interrupts disabled", __func__);
+#endif
+ critical_enter();
+ PCPU_SET(smp_tlb_addr1, addr1);
+ PCPU_SET(smp_tlb_addr2, addr2);
+ PCPU_SET(smp_tlb_pmap, pmap);
+ generation = PCPU_GET(smp_tlb_gen);
+ if (++generation == 0)
+ generation = 1;
+ PCPU_SET(smp_tlb_gen, generation);
+ PCPU_SET(smp_tlb_op, op);
+ /* Fence between filling smp_tlb fields and clearing scoreboard. */
+ atomic_thread_fence_rel();
+
+ mask1 = mask;
+ while ((cpu = CPU_FFS(&mask1)) != 0) {
+ cpu--;
+ CPU_CLR(cpu, &mask1);
+ KASSERT(*invl_scoreboard_slot(cpu) != 0,
+ ("IPI scoreboard is zero, initiator %d target %d",
+ PCPU_GET(cpuid), cpu));
+ *invl_scoreboard_slot(cpu) = 0;
+ }
+
+ if (CPU_ISFULLSET(&mask)) {
+ ipi_all_but_self(IPI_INVLTLB);
+ other_cpus = all_cpus;
+ CPU_CLR(PCPU_GET(cpuid), &other_cpus);
+ } else {
+ other_cpus = mask;
+ while ((cpu = CPU_FFS(&mask)) != 0) {
+ cpu--;
+ CPU_CLR(cpu, &mask);
+ CTR3(KTR_SMP, "%s: cpu: %d invl ipi op: %x", __func__,
+ cpu, op);
+ ipi_send_cpu(cpu, IPI_INVLTLB);
+ }
+ }
+ curcpu_cb(pmap, addr1, addr2);
+ while ((cpu = CPU_FFS(&other_cpus)) != 0) {
+ cpu--;
+ CPU_CLR(cpu, &other_cpus);
+ p_cpudone = invl_scoreboard_slot(cpu);
+ while (atomic_load_int(p_cpudone) != generation)
+ ia32_pause();
+ }
+ critical_exit();
+ sched_unpin();
+ return;
+
+nospinexit:
+ curcpu_cb(pmap, addr1, addr2);
+ sched_unpin();
+}
+
void
-invltlb_invpcid_handler(void)
+smp_masked_invltlb(cpuset_t mask, pmap_t pmap, smp_invl_cb_t curcpu_cb)
+{
+ smp_targeted_tlb_shootdown(mask, invl_op_tlb, pmap, 0, 0, curcpu_cb);
+#ifdef COUNT_XINVLTLB_HITS
+ ipi_global++;
+#endif
+}
+
+void
+smp_masked_invlpg(cpuset_t mask, vm_offset_t addr, pmap_t pmap,
+ smp_invl_cb_t curcpu_cb)
+{
+ smp_targeted_tlb_shootdown(mask, invl_op_pg, pmap, addr, 0, curcpu_cb);
+#ifdef COUNT_XINVLTLB_HITS
+ ipi_page++;
+#endif
+}
+
+void
+smp_masked_invlpg_range(cpuset_t mask, vm_offset_t addr1, vm_offset_t addr2,
+ pmap_t pmap, smp_invl_cb_t curcpu_cb)
+{
+ smp_targeted_tlb_shootdown(mask, invl_op_pgrng, pmap, addr1, addr2,
+ curcpu_cb);
+#ifdef COUNT_XINVLTLB_HITS
+ ipi_range++;
+ ipi_range_size += (addr2 - addr1) / PAGE_SIZE;
+#endif
+}
+
+void
+smp_cache_flush(smp_invl_cb_t curcpu_cb)
+{
+
+ smp_targeted_tlb_shootdown(all_cpus, INVL_OP_CACHE, NULL, 0, 0,
+ curcpu_cb);
+}
+
+/*
+ * Handlers for TLB related IPIs
+ */
+static void
+invltlb_handler(pmap_t smp_tlb_pmap)
+{
+#ifdef COUNT_XINVLTLB_HITS
+ xhits_gbl[PCPU_GET(cpuid)]++;
+#endif /* COUNT_XINVLTLB_HITS */
+#ifdef COUNT_IPIS
+ (*ipi_invltlb_counts[PCPU_GET(cpuid)])++;
+#endif /* COUNT_IPIS */
+
+ if (smp_tlb_pmap == kernel_pmap)
+ invltlb_glob();
+ else
+ invltlb();
+}
+
+static void
+invltlb_invpcid_handler(pmap_t smp_tlb_pmap)
{
struct invpcid_descr d;
- uint32_t generation;
#ifdef COUNT_XINVLTLB_HITS
xhits_gbl[PCPU_GET(cpuid)]++;
@@ -555,20 +765,17 @@
(*ipi_invltlb_counts[PCPU_GET(cpuid)])++;
#endif /* COUNT_IPIS */
- generation = smp_tlb_generation;
d.pcid = smp_tlb_pmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid;
d.pad = 0;
d.addr = 0;
invpcid(&d, smp_tlb_pmap == kernel_pmap ? INVPCID_CTXGLOB :
INVPCID_CTX);
- PCPU_SET(smp_tlb_done, generation);
}
-void
-invltlb_invpcid_pti_handler(void)
+static void
+invltlb_invpcid_pti_handler(pmap_t smp_tlb_pmap)
{
struct invpcid_descr d;
- uint32_t generation;
#ifdef COUNT_XINVLTLB_HITS
xhits_gbl[PCPU_GET(cpuid)]++;
@@ -577,7 +784,6 @@
(*ipi_invltlb_counts[PCPU_GET(cpuid)])++;
#endif /* COUNT_IPIS */
- generation = smp_tlb_generation;
d.pcid = smp_tlb_pmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid;
d.pad = 0;
d.addr = 0;
@@ -594,14 +800,13 @@
d.pcid |= PMAP_PCID_USER_PT;
invpcid(&d, INVPCID_CTX);
}
- PCPU_SET(smp_tlb_done, generation);
}
-void
-invltlb_pcid_handler(void)
+static void
+invltlb_pcid_handler(pmap_t smp_tlb_pmap)
{
uint64_t kcr3, ucr3;
- uint32_t generation, pcid;
+ uint32_t pcid;
#ifdef COUNT_XINVLTLB_HITS
xhits_gbl[PCPU_GET(cpuid)]++;
@@ -610,7 +815,6 @@
(*ipi_invltlb_counts[PCPU_GET(cpuid)])++;
#endif /* COUNT_IPIS */
- generation = smp_tlb_generation; /* Overlap with serialization */
if (smp_tlb_pmap == kernel_pmap) {
invltlb_glob();
} else {
@@ -632,14 +836,25 @@
load_cr3(kcr3);
}
}
- PCPU_SET(smp_tlb_done, generation);
}
-void
-invlpg_invpcid_handler(void)
+static void
+invlpg_handler(pmap_t smp_tlb_pmap, vm_offset_t smp_tlb_addr1)
+{
+#ifdef COUNT_XINVLTLB_HITS
+ xhits_pg[PCPU_GET(cpuid)]++;
+#endif /* COUNT_XINVLTLB_HITS */
+#ifdef COUNT_IPIS
+ (*ipi_invlpg_counts[PCPU_GET(cpuid)])++;
+#endif /* COUNT_IPIS */
+
+ invlpg(smp_tlb_addr1);
+}
+
+static void
+invlpg_invpcid_handler(pmap_t smp_tlb_pmap, vm_offset_t smp_tlb_addr1)
{
struct invpcid_descr d;
- uint32_t generation;
#ifdef COUNT_XINVLTLB_HITS
xhits_pg[PCPU_GET(cpuid)]++;
@@ -648,7 +863,6 @@
(*ipi_invlpg_counts[PCPU_GET(cpuid)])++;
#endif /* COUNT_IPIS */
- generation = smp_tlb_generation; /* Overlap with serialization */
invlpg(smp_tlb_addr1);
if (smp_tlb_pmap->pm_ucr3 != PMAP_NO_CR3) {
d.pcid = smp_tlb_pmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid |
@@ -657,14 +871,12 @@
d.addr = smp_tlb_addr1;
invpcid(&d, INVPCID_ADDR);
}
- PCPU_SET(smp_tlb_done, generation);
}
-void
-invlpg_pcid_handler(void)
+static void
+invlpg_pcid_handler(pmap_t smp_tlb_pmap, vm_offset_t smp_tlb_addr1)
{
uint64_t kcr3, ucr3;
- uint32_t generation;
uint32_t pcid;
#ifdef COUNT_XINVLTLB_HITS
@@ -674,7 +886,6 @@
(*ipi_invlpg_counts[PCPU_GET(cpuid)])++;
#endif /* COUNT_IPIS */
- generation = smp_tlb_generation; /* Overlap with serialization */
invlpg(smp_tlb_addr1);
if (smp_tlb_pmap == PCPU_GET(curpmap) &&
(ucr3 = smp_tlb_pmap->pm_ucr3) != PMAP_NO_CR3) {
@@ -683,15 +894,35 @@
ucr3 |= pcid | PMAP_PCID_USER_PT | CR3_PCID_SAVE;
pmap_pti_pcid_invlpg(ucr3, kcr3, smp_tlb_addr1);
}
- PCPU_SET(smp_tlb_done, generation);
}
-void
-invlrng_invpcid_handler(void)
+static void
+invlrng_handler(pmap_t smp_tlb_pmap, vm_offset_t smp_tlb_addr1,
+ vm_offset_t smp_tlb_addr2)
+{
+ vm_offset_t addr, addr2;
+
+#ifdef COUNT_XINVLTLB_HITS
+ xhits_rng[PCPU_GET(cpuid)]++;
+#endif /* COUNT_XINVLTLB_HITS */
+#ifdef COUNT_IPIS
+ (*ipi_invlrng_counts[PCPU_GET(cpuid)])++;
+#endif /* COUNT_IPIS */
+
+ addr = smp_tlb_addr1;
+ addr2 = smp_tlb_addr2;
+ do {
+ invlpg(addr);
+ addr += PAGE_SIZE;
+ } while (addr < addr2);
+}
+
+static void
+invlrng_invpcid_handler(pmap_t smp_tlb_pmap, vm_offset_t smp_tlb_addr1,
+ vm_offset_t smp_tlb_addr2)
{
struct invpcid_descr d;
vm_offset_t addr, addr2;
- uint32_t generation;
#ifdef COUNT_XINVLTLB_HITS
xhits_rng[PCPU_GET(cpuid)]++;
@@ -702,7 +933,6 @@
addr = smp_tlb_addr1;
addr2 = smp_tlb_addr2;
- generation = smp_tlb_generation; /* Overlap with serialization */
do {
invlpg(addr);
addr += PAGE_SIZE;
@@ -717,15 +947,14 @@
d.addr += PAGE_SIZE;
} while (d.addr < addr2);
}
- PCPU_SET(smp_tlb_done, generation);
}
-void
-invlrng_pcid_handler(void)
+static void
+invlrng_pcid_handler(pmap_t smp_tlb_pmap, vm_offset_t smp_tlb_addr1,
+ vm_offset_t smp_tlb_addr2)
{
vm_offset_t addr, addr2;
uint64_t kcr3, ucr3;
- uint32_t generation;
uint32_t pcid;
#ifdef COUNT_XINVLTLB_HITS
@@ -737,7 +966,6 @@
addr = smp_tlb_addr1;
addr2 = smp_tlb_addr2;
- generation = smp_tlb_generation; /* Overlap with serialization */
do {
invlpg(addr);
addr += PAGE_SIZE;
@@ -749,5 +977,108 @@
ucr3 |= pcid | PMAP_PCID_USER_PT | CR3_PCID_SAVE;
pmap_pti_pcid_invlrng(ucr3, kcr3, smp_tlb_addr1, addr2);
}
- PCPU_SET(smp_tlb_done, generation);
+}
+
+static void
+invlcache_handler(void)
+{
+#ifdef COUNT_IPIS
+ (*ipi_invlcache_counts[PCPU_GET(cpuid)])++;
+#endif /* COUNT_IPIS */
+ wbinvd();
+}
+
+static void
+invlop_handler_one_req(enum invl_op_codes smp_tlb_op, pmap_t smp_tlb_pmap,
+ vm_offset_t smp_tlb_addr1, vm_offset_t smp_tlb_addr2)
+{
+ switch (smp_tlb_op) {
+ case INVL_OP_TLB:
+ invltlb_handler(smp_tlb_pmap);
+ break;
+ case INVL_OP_TLB_INVPCID:
+ invltlb_invpcid_handler(smp_tlb_pmap);
+ break;
+ case INVL_OP_TLB_INVPCID_PTI:
+ invltlb_invpcid_pti_handler(smp_tlb_pmap);
+ break;
+ case INVL_OP_TLB_PCID:
+ invltlb_pcid_handler(smp_tlb_pmap);
+ break;
+ case INVL_OP_PGRNG:
+ invlrng_handler(smp_tlb_pmap, smp_tlb_addr1, smp_tlb_addr2);
+ break;
+ case INVL_OP_PGRNG_INVPCID:
+ invlrng_invpcid_handler(smp_tlb_pmap, smp_tlb_addr1,
+ smp_tlb_addr2);
+ break;
+ case INVL_OP_PGRNG_PCID:
+ invlrng_pcid_handler(smp_tlb_pmap, smp_tlb_addr1,
+ smp_tlb_addr2);
+ break;
+ case INVL_OP_PG:
+ invlpg_handler(smp_tlb_pmap, smp_tlb_addr1);
+ break;
+ case INVL_OP_PG_INVPCID:
+ invlpg_invpcid_handler(smp_tlb_pmap, smp_tlb_addr1);
+ break;
+ case INVL_OP_PG_PCID:
+ invlpg_pcid_handler(smp_tlb_pmap, smp_tlb_addr1);
+ break;
+ case INVL_OP_CACHE:
+ invlcache_handler();
+ break;
+ default:
+ __assert_unreachable();
+ break;
+ }
+}
+
+void
+invlop_handler(void)
+{
+ struct pcpu *initiator_pc;
+ pmap_t smp_tlb_pmap;
+ vm_offset_t smp_tlb_addr1, smp_tlb_addr2;
+ u_int initiator_cpu_id;
+ enum invl_op_codes smp_tlb_op;
+ uint32_t *scoreboard, smp_tlb_gen;
+
+ scoreboard = invl_scoreboard_getcpu(PCPU_GET(cpuid));
+ for (;;) {
+ for (initiator_cpu_id = 0; initiator_cpu_id <= mp_maxid;
+ initiator_cpu_id++) {
+ if (scoreboard[initiator_cpu_id] == 0)
+ break;
+ }
+ if (initiator_cpu_id > mp_maxid)
+ break;
+ initiator_pc = cpuid_to_pcpu[initiator_cpu_id];
+
+ /*
+ * Dual acquire fence, for release fence in
+ * smp_targeted_tlb_shootdown(), between reading zero
+ * scoreboard slot and accessing PCPU of initiator for
+ * smp_tlb_XXX values.
+ */
+ atomic_thread_fence_acq();
+ smp_tlb_pmap = initiator_pc->pc_smp_tlb_pmap;
+ smp_tlb_addr1 = initiator_pc->pc_smp_tlb_addr1;
+ smp_tlb_addr2 = initiator_pc->pc_smp_tlb_addr2;
+ smp_tlb_op = initiator_pc->pc_smp_tlb_op;
+ smp_tlb_gen = initiator_pc->pc_smp_tlb_gen;
+
+ /*
+ * Ensure that we do not make our scoreboard
+ * notification visible to the initiator until the
+ * pc_smp_tlb values are read. The dual fence is
+ * implicitly provided by the barrier in IPI send
+ * operation before APIC ICR register write.
+ */
+ atomic_thread_fence_acq();
+ atomic_store_int(&scoreboard[initiator_cpu_id], smp_tlb_gen);
+
+ invlop_handler_one_req(smp_tlb_op, smp_tlb_pmap, smp_tlb_addr1,
+ smp_tlb_addr2);
+ }
}
Index: sys/amd64/include/pcpu.h
===================================================================
--- sys/amd64/include/pcpu.h
+++ sys/amd64/include/pcpu.h
@@ -85,7 +85,7 @@
u_int pc_vcpu_id; /* Xen vCPU ID */ \
uint32_t pc_pcid_next; \
uint32_t pc_pcid_gen; \
- uint32_t pc_smp_tlb_done; /* TLB op acknowledgement */ \
+ uint32_t pc_unused; \
uint32_t pc_ibpb_set; \
void *pc_mds_buf; \
void *pc_mds_buf64; \
@@ -94,7 +94,12 @@
u_int pc_ipi_bitmap; \
struct amd64tss pc_common_tss; \
struct user_segment_descriptor pc_gdt[NGDT]; \
- char __pad[2956] /* pad to UMA_PCPU_ALLOC_SIZE */
+ void *pc_smp_tlb_pmap; \
+ uint64_t pc_smp_tlb_addr1; \
+ uint64_t pc_smp_tlb_addr2; \
+ uint32_t pc_smp_tlb_gen; \
+ u_int pc_smp_tlb_op; \
+ char __pad[2924] /* pad to UMA_PCPU_ALLOC_SIZE */
#define PC_DBREG_CMD_NONE 0
#define PC_DBREG_CMD_LOAD 1
Index: sys/amd64/include/smp.h
===================================================================
--- sys/amd64/include/smp.h
+++ sys/amd64/include/smp.h
@@ -29,34 +29,14 @@
inthand_t
IDTVEC(justreturn), /* interrupt CPU with minimum overhead */
IDTVEC(justreturn1_pti),
- IDTVEC(invltlb_pti),
- IDTVEC(invltlb_pcid_pti),
- IDTVEC(invltlb_pcid), /* TLB shootdowns - global, pcid */
- IDTVEC(invltlb_invpcid_pti_pti),
- IDTVEC(invltlb_invpcid_nopti),
- IDTVEC(invlpg_pti),
- IDTVEC(invlpg_invpcid_pti),
- IDTVEC(invlpg_invpcid),
- IDTVEC(invlpg_pcid_pti),
- IDTVEC(invlpg_pcid),
- IDTVEC(invlrng_pti),
- IDTVEC(invlrng_invpcid_pti),
- IDTVEC(invlrng_invpcid),
- IDTVEC(invlrng_pcid_pti),
- IDTVEC(invlrng_pcid),
- IDTVEC(invlcache_pti),
+ IDTVEC(invlop_pti),
+ IDTVEC(invlop),
IDTVEC(ipi_intr_bitmap_handler_pti),
IDTVEC(cpustop_pti),
IDTVEC(cpususpend_pti),
IDTVEC(rendezvous_pti);
-void invltlb_pcid_handler(void);
-void invltlb_invpcid_handler(void);
-void invltlb_invpcid_pti_handler(void);
-void invlpg_invpcid_handler(void);
-void invlpg_pcid_handler(void);
-void invlrng_invpcid_handler(void);
-void invlrng_pcid_handler(void);
+void invlop_handler(void);
int native_start_all_aps(void);
void mp_bootaddress(vm_paddr_t *, unsigned int *);
Index: sys/i386/i386/mp_machdep.c
===================================================================
--- sys/i386/i386/mp_machdep.c
+++ sys/i386/i386/mp_machdep.c
@@ -467,3 +467,233 @@
}
return 0; /* return FAILURE */
}
+
+/*
+ * Flush the TLB on other CPU's
+ */
+
+/* Variables needed for SMP tlb shootdown. */
+vm_offset_t smp_tlb_addr1, smp_tlb_addr2;
+pmap_t smp_tlb_pmap;
+volatile uint32_t smp_tlb_generation;
+
+/*
+ * Used by pmap to request invalidation of TLB or cache on local and
+ * remote processors. Mask provides the set of remote CPUs which are
+ * to be signalled with the IPI specified by vector. The curcpu_cb
+ * callback is invoked on the calling CPU while waiting for remote
+ * CPUs to complete the operation.
+ *
+ * The callback function is called unconditionally on the caller's
+ * underlying processor, even when this processor is not set in the
+ * mask. So, the callback function must be prepared to handle such
+ * spurious invocations.
+ */
+static void
+smp_targeted_tlb_shootdown(cpuset_t mask, u_int vector, pmap_t pmap,
+ vm_offset_t addr1, vm_offset_t addr2, smp_invl_cb_t curcpu_cb)
+{
+ cpuset_t other_cpus;
+ volatile uint32_t *p_cpudone;
+ uint32_t generation;
+ int cpu;
+
+ /*
+ * It is not necessary to signal other CPUs while booting or
+ * when in the debugger.
+ */
+ if (kdb_active || KERNEL_PANICKED() || !smp_started) {
+ curcpu_cb(pmap, addr1, addr2);
+ return;
+ }
+
+ sched_pin();
+
+ /*
+ * Check for other cpus. Return if none.
+ */
+ if (CPU_ISFULLSET(&mask)) {
+ if (mp_ncpus <= 1)
+ goto nospinexit;
+ } else {
+ CPU_CLR(PCPU_GET(cpuid), &mask);
+ if (CPU_EMPTY(&mask))
+ goto nospinexit;
+ }
+
+ if (!(read_eflags() & PSL_I))
+ panic("%s: interrupts disabled", __func__);
+ mtx_lock_spin(&smp_ipi_mtx);
+ smp_tlb_addr1 = addr1;
+ smp_tlb_addr2 = addr2;
+ smp_tlb_pmap = pmap;
+ generation = ++smp_tlb_generation;
+ if (CPU_ISFULLSET(&mask)) {
+ ipi_all_but_self(vector);
+ other_cpus = all_cpus;
+ CPU_CLR(PCPU_GET(cpuid), &other_cpus);
+ } else {
+ other_cpus = mask;
+ while ((cpu = CPU_FFS(&mask)) != 0) {
+ cpu--;
+ CPU_CLR(cpu, &mask);
+ CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__,
+ cpu, vector);
+ ipi_send_cpu(cpu, vector);
+ }
+ }
+ curcpu_cb(pmap, addr1, addr2);
+ while ((cpu = CPU_FFS(&other_cpus)) != 0) {
+ cpu--;
+ CPU_CLR(cpu, &other_cpus);
+ p_cpudone = &cpuid_to_pcpu[cpu]->pc_smp_tlb_done;
+ while (*p_cpudone != generation)
+ ia32_pause();
+ }
+ mtx_unlock_spin(&smp_ipi_mtx);
+ sched_unpin();
+ return;
+
+nospinexit:
+ curcpu_cb(pmap, addr1, addr2);
+ sched_unpin();
+}
+
+void
+smp_masked_invltlb(cpuset_t mask, pmap_t pmap, smp_invl_cb_t curcpu_cb)
+{
+
+ smp_targeted_tlb_shootdown(mask, IPI_INVLTLB, pmap, 0, 0, curcpu_cb);
+#ifdef COUNT_XINVLTLB_HITS
+ ipi_global++;
+#endif
+}
+
+void
+smp_masked_invlpg(cpuset_t mask, vm_offset_t addr, pmap_t pmap,
+ smp_invl_cb_t curcpu_cb)
+{
+
+ smp_targeted_tlb_shootdown(mask, IPI_INVLPG, pmap, addr, 0, curcpu_cb);
+#ifdef COUNT_XINVLTLB_HITS
+ ipi_page++;
+#endif
+}
+
+void
+smp_masked_invlpg_range(cpuset_t mask, vm_offset_t addr1, vm_offset_t addr2,
+ pmap_t pmap, smp_invl_cb_t curcpu_cb)
+{
+
+ smp_targeted_tlb_shootdown(mask, IPI_INVLRNG, pmap, addr1, addr2,
+ curcpu_cb);
+#ifdef COUNT_XINVLTLB_HITS
+ ipi_range++;
+ ipi_range_size += (addr2 - addr1) / PAGE_SIZE;
+#endif
+}
+
+void
+smp_cache_flush(smp_invl_cb_t curcpu_cb)
+{
+
+ smp_targeted_tlb_shootdown(all_cpus, IPI_INVLCACHE, NULL, 0, 0,
+ curcpu_cb);
+}
+
+/*
+ * Handlers for TLB related IPIs
+ */
+void
+invltlb_handler(void)
+{
+ uint32_t generation;
+
+#ifdef COUNT_XINVLTLB_HITS
+ xhits_gbl[PCPU_GET(cpuid)]++;
+#endif /* COUNT_XINVLTLB_HITS */
+#ifdef COUNT_IPIS
+ (*ipi_invltlb_counts[PCPU_GET(cpuid)])++;
+#endif /* COUNT_IPIS */
+
+ /*
+ * Reading the generation here allows greater parallelism
+ * since invalidating the TLB is a serializing operation.
+ */
+ generation = smp_tlb_generation;
+ if (smp_tlb_pmap == kernel_pmap)
+ invltlb_glob();
+#ifdef __amd64__
+ else
+ invltlb();
+#endif
+ PCPU_SET(smp_tlb_done, generation);
+}
+
+void
+invlpg_handler(void)
+{
+ uint32_t generation;
+
+#ifdef COUNT_XINVLTLB_HITS
+ xhits_pg[PCPU_GET(cpuid)]++;
+#endif /* COUNT_XINVLTLB_HITS */
+#ifdef COUNT_IPIS
+ (*ipi_invlpg_counts[PCPU_GET(cpuid)])++;
+#endif /* COUNT_IPIS */
+
+ generation = smp_tlb_generation; /* Overlap with serialization */
+#ifdef __i386__
+ if (smp_tlb_pmap == kernel_pmap)
+#endif
+ invlpg(smp_tlb_addr1);
+ PCPU_SET(smp_tlb_done, generation);
+}
+
+void
+invlrng_handler(void)
+{
+ vm_offset_t addr, addr2;
+ uint32_t generation;
+
+#ifdef COUNT_XINVLTLB_HITS
+ xhits_rng[PCPU_GET(cpuid)]++;
+#endif /* COUNT_XINVLTLB_HITS */
+#ifdef COUNT_IPIS
+ (*ipi_invlrng_counts[PCPU_GET(cpuid)])++;
+#endif /* COUNT_IPIS */
+
+ addr = smp_tlb_addr1;
+ addr2 = smp_tlb_addr2;
+ generation = smp_tlb_generation; /* Overlap with serialization */
+#ifdef __i386__
+ if (smp_tlb_pmap == kernel_pmap)
+#endif
+ do {
+ invlpg(addr);
+ addr += PAGE_SIZE;
+ } while (addr < addr2);
+
+ PCPU_SET(smp_tlb_done, generation);
+}
+
+void
+invlcache_handler(void)
+{
+ uint32_t generation;
+
+#ifdef COUNT_IPIS
+ (*ipi_invlcache_counts[PCPU_GET(cpuid)])++;
+#endif /* COUNT_IPIS */
+
+ /*
+ * Reading the generation here allows greater parallelism
+ * since wbinvd is a serializing instruction. Without the
+ * temporary, we'd wait for wbinvd to complete, then the read
+ * would execute, then the dependent write, which must then
+ * complete before return from interrupt.
+ */
+ generation = smp_tlb_generation;
+ wbinvd();
+ PCPU_SET(smp_tlb_done, generation);
+}
Index: sys/i386/include/smp.h
===================================================================
--- sys/i386/include/smp.h
+++ sys/i386/include/smp.h
@@ -27,9 +27,20 @@
#include <x86/apicvar.h>
#include <machine/pcb.h>
+inthand_t
+ IDTVEC(invltlb), /* TLB shootdowns - global */
+ IDTVEC(invlpg), /* TLB shootdowns - 1 page */
+ IDTVEC(invlrng), /* TLB shootdowns - page range */
+ IDTVEC(invlcache); /* Write back and invalidate cache */
+
/* functions in mpboot.s */
void bootMP(void);
+void invltlb_handler(void);
+void invlpg_handler(void);
+void invlrng_handler(void);
+void invlcache_handler(void);
+
#endif /* !LOCORE */
#endif /* SMP */
Index: sys/x86/include/apicvar.h
===================================================================
--- sys/x86/include/apicvar.h
+++ sys/x86/include/apicvar.h
@@ -112,7 +112,8 @@
#define APIC_IPI_INTS (APIC_LOCAL_INTS + 3)
#define IPI_RENDEZVOUS (APIC_IPI_INTS) /* Inter-CPU rendezvous. */
-#define IPI_INVLTLB (APIC_IPI_INTS + 1) /* TLB Shootdown IPIs */
+#define IPI_INVLOP (APIC_IPI_INTS + 1) /* TLB Shootdown IPIs, amd64 */
+#define IPI_INVLTLB (APIC_IPI_INTS + 1) /* TLB Shootdown IPIs, i386 */
#define IPI_INVLPG (APIC_IPI_INTS + 2)
#define IPI_INVLRNG (APIC_IPI_INTS + 3)
#define IPI_INVLCACHE (APIC_IPI_INTS + 4)
Index: sys/x86/include/x86_smp.h
===================================================================
--- sys/x86/include/x86_smp.h
+++ sys/x86/include/x86_smp.h
@@ -75,10 +75,6 @@
/* IPI handlers */
inthand_t
- IDTVEC(invltlb), /* TLB shootdowns - global */
- IDTVEC(invlpg), /* TLB shootdowns - 1 page */
- IDTVEC(invlrng), /* TLB shootdowns - page range */
- IDTVEC(invlcache), /* Write back and invalidate cache */
IDTVEC(ipi_intr_bitmap_handler), /* Bitmap based IPIs */
IDTVEC(cpustop), /* CPU stops & waits to be restarted */
IDTVEC(cpususpend), /* CPU suspends & waits to be resumed */
@@ -94,10 +90,6 @@
void cpususpend_handler(void);
void alloc_ap_trampoline(vm_paddr_t *physmap, unsigned int *physmap_idx);
void init_secondary_tail(void);
-void invltlb_handler(void);
-void invlpg_handler(void);
-void invlrng_handler(void);
-void invlcache_handler(void);
void init_secondary(void);
void ipi_startup(int apic_id, int vector);
void ipi_all_but_self(u_int ipi);
Index: sys/x86/x86/mp_x86.c
===================================================================
--- sys/x86/x86/mp_x86.c
+++ sys/x86/x86/mp_x86.c
@@ -1593,28 +1593,6 @@
CPU_CLR_ATOMIC(cpu, &toresume_cpus);
}
-
-void
-invlcache_handler(void)
-{
- uint32_t generation;
-
-#ifdef COUNT_IPIS
- (*ipi_invlcache_counts[PCPU_GET(cpuid)])++;
-#endif /* COUNT_IPIS */
-
- /*
- * Reading the generation here allows greater parallelism
- * since wbinvd is a serializing instruction. Without the
- * temporary, we'd wait for wbinvd to complete, then the read
- * would execute, then the dependent write, which must then
- * complete before return from interrupt.
- */
- generation = smp_tlb_generation;
- wbinvd();
- PCPU_SET(smp_tlb_done, generation);
-}
-
/*
* This is called once the rest of the system is up and running and we're
* ready to let the AP's out of the pen.
@@ -1662,216 +1640,3 @@
}
SYSINIT(mp_ipi_intrcnt, SI_SUB_INTR, SI_ORDER_MIDDLE, mp_ipi_intrcnt, NULL);
#endif
-
-/*
- * Flush the TLB on other CPU's
- */
-
-/* Variables needed for SMP tlb shootdown. */
-vm_offset_t smp_tlb_addr1, smp_tlb_addr2;
-pmap_t smp_tlb_pmap;
-volatile uint32_t smp_tlb_generation;
-
-#ifdef __amd64__
-#define read_eflags() read_rflags()
-#endif
-
-/*
- * Used by pmap to request invalidation of TLB or cache on local and
- * remote processors. Mask provides the set of remote CPUs which are
- * to be signalled with the IPI specified by vector. The curcpu_cb
- * callback is invoked on the calling CPU while waiting for remote
- * CPUs to complete the operation.
- *
- * The callback function is called unconditionally on the caller's
- * underlying processor, even when this processor is not set in the
- * mask. So, the callback function must be prepared to handle such
- * spurious invocations.
- */
-static void
-smp_targeted_tlb_shootdown(cpuset_t mask, u_int vector, pmap_t pmap,
- vm_offset_t addr1, vm_offset_t addr2, smp_invl_cb_t curcpu_cb)
-{
- cpuset_t other_cpus;
- volatile uint32_t *p_cpudone;
- uint32_t generation;
- int cpu;
-
- /*
- * It is not necessary to signal other CPUs while booting or
- * when in the debugger.
- */
- if (kdb_active || KERNEL_PANICKED() || !smp_started) {
- curcpu_cb(pmap, addr1, addr2);
- return;
- }
-
- sched_pin();
-
- /*
- * Check for other cpus. Return if none.
- */
- if (CPU_ISFULLSET(&mask)) {
- if (mp_ncpus <= 1)
- goto nospinexit;
- } else {
- CPU_CLR(PCPU_GET(cpuid), &mask);
- if (CPU_EMPTY(&mask))
- goto nospinexit;
- }
-
- if (!(read_eflags() & PSL_I))
- panic("%s: interrupts disabled", __func__);
- mtx_lock_spin(&smp_ipi_mtx);
- smp_tlb_addr1 = addr1;
- smp_tlb_addr2 = addr2;
- smp_tlb_pmap = pmap;
- generation = ++smp_tlb_generation;
- if (CPU_ISFULLSET(&mask)) {
- ipi_all_but_self(vector);
- other_cpus = all_cpus;
- CPU_CLR(PCPU_GET(cpuid), &other_cpus);
- } else {
- other_cpus = mask;
- while ((cpu = CPU_FFS(&mask)) != 0) {
- cpu--;
- CPU_CLR(cpu, &mask);
- CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__,
- cpu, vector);
- ipi_send_cpu(cpu, vector);
- }
- }
- curcpu_cb(pmap, addr1, addr2);
- while ((cpu = CPU_FFS(&other_cpus)) != 0) {
- cpu--;
- CPU_CLR(cpu, &other_cpus);
- p_cpudone = &cpuid_to_pcpu[cpu]->pc_smp_tlb_done;
- while (*p_cpudone != generation)
- ia32_pause();
- }
- mtx_unlock_spin(&smp_ipi_mtx);
- sched_unpin();
- return;
-
-nospinexit:
- curcpu_cb(pmap, addr1, addr2);
- sched_unpin();
-}
-
-void
-smp_masked_invltlb(cpuset_t mask, pmap_t pmap, smp_invl_cb_t curcpu_cb)
-{
-
- smp_targeted_tlb_shootdown(mask, IPI_INVLTLB, pmap, 0, 0, curcpu_cb);
-#ifdef COUNT_XINVLTLB_HITS
- ipi_global++;
-#endif
-}
-
-void
-smp_masked_invlpg(cpuset_t mask, vm_offset_t addr, pmap_t pmap,
- smp_invl_cb_t curcpu_cb)
-{
-
- smp_targeted_tlb_shootdown(mask, IPI_INVLPG, pmap, addr, 0, curcpu_cb);
-#ifdef COUNT_XINVLTLB_HITS
- ipi_page++;
-#endif
-}
-
-void
-smp_masked_invlpg_range(cpuset_t mask, vm_offset_t addr1, vm_offset_t addr2,
- pmap_t pmap, smp_invl_cb_t curcpu_cb)
-{
-
- smp_targeted_tlb_shootdown(mask, IPI_INVLRNG, pmap, addr1, addr2,
- curcpu_cb);
-#ifdef COUNT_XINVLTLB_HITS
- ipi_range++;
- ipi_range_size += (addr2 - addr1) / PAGE_SIZE;
-#endif
-}
-
-void
-smp_cache_flush(smp_invl_cb_t curcpu_cb)
-{
-
- smp_targeted_tlb_shootdown(all_cpus, IPI_INVLCACHE, NULL, 0, 0,
- curcpu_cb);
-}
-
-/*
- * Handlers for TLB related IPIs
- */
-void
-invltlb_handler(void)
-{
- uint32_t generation;
-
-#ifdef COUNT_XINVLTLB_HITS
- xhits_gbl[PCPU_GET(cpuid)]++;
-#endif /* COUNT_XINVLTLB_HITS */
-#ifdef COUNT_IPIS
- (*ipi_invltlb_counts[PCPU_GET(cpuid)])++;
-#endif /* COUNT_IPIS */
-
- /*
- * Reading the generation here allows greater parallelism
- * since invalidating the TLB is a serializing operation.
- */
- generation = smp_tlb_generation;
- if (smp_tlb_pmap == kernel_pmap)
- invltlb_glob();
-#ifdef __amd64__
- else
- invltlb();
-#endif
- PCPU_SET(smp_tlb_done, generation);
-}
-
-void
-invlpg_handler(void)
-{
- uint32_t generation;
-
-#ifdef COUNT_XINVLTLB_HITS
- xhits_pg[PCPU_GET(cpuid)]++;
-#endif /* COUNT_XINVLTLB_HITS */
-#ifdef COUNT_IPIS
- (*ipi_invlpg_counts[PCPU_GET(cpuid)])++;
-#endif /* COUNT_IPIS */
-
- generation = smp_tlb_generation; /* Overlap with serialization */
-#ifdef __i386__
- if (smp_tlb_pmap == kernel_pmap)
-#endif
- invlpg(smp_tlb_addr1);
- PCPU_SET(smp_tlb_done, generation);
-}
-
-void
-invlrng_handler(void)
-{
- vm_offset_t addr, addr2;
- uint32_t generation;
-
-#ifdef COUNT_XINVLTLB_HITS
- xhits_rng[PCPU_GET(cpuid)]++;
-#endif /* COUNT_XINVLTLB_HITS */
-#ifdef COUNT_IPIS
- (*ipi_invlrng_counts[PCPU_GET(cpuid)])++;
-#endif /* COUNT_IPIS */
-
- addr = smp_tlb_addr1;
- addr2 = smp_tlb_addr2;
- generation = smp_tlb_generation; /* Overlap with serialization */
-#ifdef __i386__
- if (smp_tlb_pmap == kernel_pmap)
-#endif
- do {
- invlpg(addr);
- addr += PAGE_SIZE;
- } while (addr < addr2);
-
- PCPU_SET(smp_tlb_done, generation);
-}
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Thu, Oct 16, 4:51 PM (12 h, 53 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
23798000
Default Alt Text
D25510.id74137.diff (33 KB)
Attached To
Mode
D25510: amd64: allow parallel shootdown IPIs
Attached
Detach File
Event Timeline
Log In to Comment