Index: head/sys/amd64/amd64/apic_vector.S =================================================================== --- head/sys/amd64/amd64/apic_vector.S +++ head/sys/amd64/amd64/apic_vector.S @@ -171,63 +171,13 @@ .text SUPERALIGN_TEXT -invltlb_ret: - call as_lapic_eoi - jmp ld_regs - - SUPERALIGN_TEXT - INTR_HANDLER invltlb - call invltlb_handler - jmp invltlb_ret - - INTR_HANDLER invltlb_pcid - call invltlb_pcid_handler - jmp invltlb_ret - - INTR_HANDLER invltlb_invpcid_nopti - call invltlb_invpcid_handler - jmp invltlb_ret - - INTR_HANDLER invltlb_invpcid_pti - call invltlb_invpcid_pti_handler - jmp invltlb_ret - /* - * Single page TLB shootdown + * IPI handler for cache and TLB shootdown */ - INTR_HANDLER invlpg - call invlpg_handler - jmp invltlb_ret - - INTR_HANDLER invlpg_invpcid - call invlpg_invpcid_handler - jmp invltlb_ret - - INTR_HANDLER invlpg_pcid - call invlpg_pcid_handler - jmp invltlb_ret - -/* - * Page range TLB shootdown. - */ - INTR_HANDLER invlrng - call invlrng_handler - jmp invltlb_ret - - INTR_HANDLER invlrng_invpcid - call invlrng_invpcid_handler - jmp invltlb_ret - - INTR_HANDLER invlrng_pcid - call invlrng_pcid_handler - jmp invltlb_ret - -/* - * Invalidate cache. - */ - INTR_HANDLER invlcache - call invlcache_handler - jmp invltlb_ret + INTR_HANDLER invlop + call invlop_handler + call as_lapic_eoi + jmp ld_regs /* * Handler for IPIs sent via the per-cpu IPI bitmap. Index: head/sys/amd64/amd64/db_interface.c =================================================================== --- head/sys/amd64/amd64/db_interface.c +++ head/sys/amd64/amd64/db_interface.c @@ -107,5 +107,4 @@ db_printf("gs32p = %p\n", pc->pc_gs32p); db_printf("ldt = %p\n", pc->pc_ldt); db_printf("tss = %p\n", pc->pc_tss); - db_printf("tlb gen = %u\n", pc->pc_smp_tlb_done); } Index: head/sys/amd64/amd64/machdep.c =================================================================== --- head/sys/amd64/amd64/machdep.c +++ head/sys/amd64/amd64/machdep.c @@ -1562,6 +1562,7 @@ PCPU_SET(ldt, (struct system_segment_descriptor *)&gdt[GUSERLDT_SEL]); PCPU_SET(fs32p, &gdt[GUFS32_SEL]); PCPU_SET(gs32p, &gdt[GUGS32_SEL]); + PCPU_SET(smp_tlb_gen, 1); } void Index: head/sys/amd64/amd64/mp_machdep.c =================================================================== --- head/sys/amd64/amd64/mp_machdep.c +++ head/sys/amd64/amd64/mp_machdep.c @@ -44,6 +44,7 @@ #ifdef GPROF #include #endif +#include #include #include #include @@ -202,36 +203,8 @@ cpu_apic_ids[i] = -1; } - /* Install an inter-CPU IPI for TLB invalidation */ - if (pmap_pcid_enabled) { - if (invpcid_works) { - setidt(IPI_INVLTLB, pti ? - IDTVEC(invltlb_invpcid_pti_pti) : - IDTVEC(invltlb_invpcid_nopti), SDT_SYSIGT, - SEL_KPL, 0); - setidt(IPI_INVLPG, pti ? IDTVEC(invlpg_invpcid_pti) : - IDTVEC(invlpg_invpcid), SDT_SYSIGT, SEL_KPL, 0); - setidt(IPI_INVLRNG, pti ? IDTVEC(invlrng_invpcid_pti) : - IDTVEC(invlrng_invpcid), SDT_SYSIGT, SEL_KPL, 0); - } else { - setidt(IPI_INVLTLB, pti ? IDTVEC(invltlb_pcid_pti) : - IDTVEC(invltlb_pcid), SDT_SYSIGT, SEL_KPL, 0); - setidt(IPI_INVLPG, pti ? IDTVEC(invlpg_pcid_pti) : - IDTVEC(invlpg_pcid), SDT_SYSIGT, SEL_KPL, 0); - setidt(IPI_INVLRNG, pti ? IDTVEC(invlrng_pcid_pti) : - IDTVEC(invlrng_pcid), SDT_SYSIGT, SEL_KPL, 0); - } - } else { - setidt(IPI_INVLTLB, pti ? IDTVEC(invltlb_pti) : IDTVEC(invltlb), - SDT_SYSIGT, SEL_KPL, 0); - setidt(IPI_INVLPG, pti ? IDTVEC(invlpg_pti) : IDTVEC(invlpg), - SDT_SYSIGT, SEL_KPL, 0); - setidt(IPI_INVLRNG, pti ? IDTVEC(invlrng_pti) : IDTVEC(invlrng), - SDT_SYSIGT, SEL_KPL, 0); - } - - /* Install an inter-CPU IPI for cache invalidation. */ - setidt(IPI_INVLCACHE, pti ? IDTVEC(invlcache_pti) : IDTVEC(invlcache), + /* Install an inter-CPU IPI for for cache and TLB invalidations. */ + setidt(IPI_INVLOP, pti ? IDTVEC(invlop_pti) : IDTVEC(invlop), SDT_SYSIGT, SEL_KPL, 0); /* Install an inter-CPU IPI for all-CPU rendezvous */ @@ -314,6 +287,8 @@ pc->pc_pcid_next = PMAP_PCID_KERN + 2; pc->pc_pcid_gen = 1; + pc->pc_smp_tlb_gen = 1; + /* Init tss */ pc->pc_common_tss = __pcpu[0].pc_common_tss; pc->pc_common_tss.tss_iobase = sizeof(struct amd64tss) + @@ -542,11 +517,270 @@ return 0; /* return FAILURE */ } +/* + * Flush the TLB on other CPU's + */ + +/* + * Invalidation request. PCPU pc_smp_tlb_op uses u_int instead of the + * enum to avoid both namespace and ABI issues (with enums). + */ +enum invl_op_codes { + INVL_OP_TLB = 1, + INVL_OP_TLB_INVPCID = 2, + INVL_OP_TLB_INVPCID_PTI = 3, + INVL_OP_TLB_PCID = 4, + INVL_OP_PGRNG = 5, + INVL_OP_PGRNG_INVPCID = 6, + INVL_OP_PGRNG_PCID = 7, + INVL_OP_PG = 8, + INVL_OP_PG_INVPCID = 9, + INVL_OP_PG_PCID = 10, + INVL_OP_CACHE = 11, +}; + +/* + * These variables are initialized at startup to reflect how each of + * the different kinds of invalidations should be performed on the + * current machine and environment. + */ +static enum invl_op_codes invl_op_tlb; +static enum invl_op_codes invl_op_pgrng; +static enum invl_op_codes invl_op_pg; + +/* + * Scoreboard of IPI completion notifications from target to IPI initiator. + * + * Each CPU can initiate shootdown IPI independently from other CPUs. + * Initiator enters critical section, then fills its local PCPU + * shootdown info (pc_smp_tlb_ vars), then clears scoreboard generation + * at location (cpu, my_cpuid) for each target cpu. After that IPI is + * sent to all targets which scan for zeroed scoreboard generation + * words. Upon finding such word the shootdown data is read from + * corresponding cpu' pcpu, and generation is set. Meantime initiator + * loops waiting for all zeroed generations in scoreboard to update. + */ +static uint32_t *invl_scoreboard; + +static void +invl_scoreboard_init(void *arg __unused) +{ + u_int i; + + invl_scoreboard = malloc(sizeof(uint32_t) * (mp_maxid + 1) * + (mp_maxid + 1), M_DEVBUF, M_WAITOK); + for (i = 0; i < (mp_maxid + 1) * (mp_maxid + 1); i++) + invl_scoreboard[i] = 1; + + if (pmap_pcid_enabled) { + if (invpcid_works) { + if (pti) + invl_op_tlb = INVL_OP_TLB_INVPCID_PTI; + else + invl_op_tlb = INVL_OP_TLB_INVPCID; + invl_op_pgrng = INVL_OP_PGRNG_INVPCID; + invl_op_pg = INVL_OP_PG_INVPCID; + } else { + invl_op_tlb = INVL_OP_TLB_PCID; + invl_op_pgrng = INVL_OP_PGRNG_PCID; + invl_op_pg = INVL_OP_PG_PCID; + } + } else { + invl_op_tlb = INVL_OP_TLB; + invl_op_pgrng = INVL_OP_PGRNG; + invl_op_pg = INVL_OP_PG; + } +} +SYSINIT(invl_ops, SI_SUB_SMP, SI_ORDER_FIRST, invl_scoreboard_init, NULL); + +static uint32_t * +invl_scoreboard_getcpu(u_int cpu) +{ + return (invl_scoreboard + cpu * (mp_maxid + 1)); +} + +static uint32_t * +invl_scoreboard_slot(u_int cpu) +{ + return (invl_scoreboard_getcpu(cpu) + PCPU_GET(cpuid)); +} + +/* + * Used by pmap to request cache or TLB invalidation on local and + * remote processors. Mask provides the set of remote CPUs which are + * to be signalled with the invalidation IPI. As an optimization, the + * curcpu_cb callback is invoked on the calling CPU while waiting for + * remote CPUs to complete the operation. + * + * The callback function is called unconditionally on the caller's + * underlying processor, even when this processor is not set in the + * mask. So, the callback function must be prepared to handle such + * spurious invocations. + * + * Interrupts must be enabled when calling the function with smp + * started, to avoid deadlock with other IPIs that are protected with + * smp_ipi_mtx spinlock at the initiator side. + */ +static void +smp_targeted_tlb_shootdown(cpuset_t mask, pmap_t pmap, vm_offset_t addr1, + vm_offset_t addr2, smp_invl_cb_t curcpu_cb, enum invl_op_codes op) +{ + cpuset_t other_cpus, mask1; + uint32_t generation, *p_cpudone; + int cpu; + + /* + * It is not necessary to signal other CPUs while booting or + * when in the debugger. + */ + if (kdb_active || KERNEL_PANICKED() || !smp_started) { + curcpu_cb(pmap, addr1, addr2); + return; + } + + sched_pin(); + + /* + * Check for other cpus. Return if none. + */ + if (CPU_ISFULLSET(&mask)) { + if (mp_ncpus <= 1) + goto nospinexit; + } else { + CPU_CLR(PCPU_GET(cpuid), &mask); + if (CPU_EMPTY(&mask)) + goto nospinexit; + } + + /* + * Initiator must have interrupts enabled, which prevents + * non-invalidation IPIs, that takes smp_ipi_mtx spinlock, + * from deadlocking with as. On the other hand, preemption + * must be disabled to pin initiator to the instance of the + * pcpu pc_smp_tlb data and scoreboard line. + */ + KASSERT((read_rflags() & PSL_I) != 0, + ("smp_targeted_tlb_shootdown: interrupts disabled")); + critical_enter(); + + PCPU_SET(smp_tlb_addr1, addr1); + PCPU_SET(smp_tlb_addr2, addr2); + PCPU_SET(smp_tlb_pmap, pmap); + generation = PCPU_GET(smp_tlb_gen); + if (++generation == 0) + generation = 1; + PCPU_SET(smp_tlb_gen, generation); + PCPU_SET(smp_tlb_op, op); + /* Fence between filling smp_tlb fields and clearing scoreboard. */ + atomic_thread_fence_rel(); + + mask1 = mask; + while ((cpu = CPU_FFS(&mask1)) != 0) { + cpu--; + CPU_CLR(cpu, &mask1); + KASSERT(*invl_scoreboard_slot(cpu) != 0, + ("IPI scoreboard is zero, initiator %d target %d", + PCPU_GET(cpuid), cpu)); + *invl_scoreboard_slot(cpu) = 0; + } + + /* + * IPI acts as a fence between writing to the scoreboard above + * (zeroing slot) and reading from it below (wait for + * acknowledge). + */ + if (CPU_ISFULLSET(&mask)) { + ipi_all_but_self(IPI_INVLOP); + other_cpus = all_cpus; + CPU_CLR(PCPU_GET(cpuid), &other_cpus); + } else { + other_cpus = mask; + while ((cpu = CPU_FFS(&mask)) != 0) { + cpu--; + CPU_CLR(cpu, &mask); + CTR3(KTR_SMP, "%s: cpu: %d invl ipi op: %x", __func__, + cpu, op); + ipi_send_cpu(cpu, IPI_INVLOP); + } + } + curcpu_cb(pmap, addr1, addr2); + while ((cpu = CPU_FFS(&other_cpus)) != 0) { + cpu--; + CPU_CLR(cpu, &other_cpus); + p_cpudone = invl_scoreboard_slot(cpu); + while (atomic_load_int(p_cpudone) != generation) + ia32_pause(); + } + critical_exit(); + sched_unpin(); + return; + +nospinexit: + curcpu_cb(pmap, addr1, addr2); + sched_unpin(); +} + void -invltlb_invpcid_handler(void) +smp_masked_invltlb(cpuset_t mask, pmap_t pmap, smp_invl_cb_t curcpu_cb) { + smp_targeted_tlb_shootdown(mask, pmap, 0, 0, curcpu_cb, invl_op_tlb); +#ifdef COUNT_XINVLTLB_HITS + ipi_global++; +#endif +} + +void +smp_masked_invlpg(cpuset_t mask, vm_offset_t addr, pmap_t pmap, + smp_invl_cb_t curcpu_cb) +{ + smp_targeted_tlb_shootdown(mask, pmap, addr, 0, curcpu_cb, invl_op_pg); +#ifdef COUNT_XINVLTLB_HITS + ipi_page++; +#endif +} + +void +smp_masked_invlpg_range(cpuset_t mask, vm_offset_t addr1, vm_offset_t addr2, + pmap_t pmap, smp_invl_cb_t curcpu_cb) +{ + smp_targeted_tlb_shootdown(mask, pmap, addr1, addr2, curcpu_cb, + invl_op_pgrng); +#ifdef COUNT_XINVLTLB_HITS + ipi_range++; + ipi_range_size += (addr2 - addr1) / PAGE_SIZE; +#endif +} + +void +smp_cache_flush(smp_invl_cb_t curcpu_cb) +{ + smp_targeted_tlb_shootdown(all_cpus, NULL, 0, 0, curcpu_cb, + INVL_OP_CACHE); +} + +/* + * Handlers for TLB related IPIs + */ +static void +invltlb_handler(pmap_t smp_tlb_pmap) +{ +#ifdef COUNT_XINVLTLB_HITS + xhits_gbl[PCPU_GET(cpuid)]++; +#endif /* COUNT_XINVLTLB_HITS */ +#ifdef COUNT_IPIS + (*ipi_invltlb_counts[PCPU_GET(cpuid)])++; +#endif /* COUNT_IPIS */ + + if (smp_tlb_pmap == kernel_pmap) + invltlb_glob(); + else + invltlb(); +} + +static void +invltlb_invpcid_handler(pmap_t smp_tlb_pmap) +{ struct invpcid_descr d; - uint32_t generation; #ifdef COUNT_XINVLTLB_HITS xhits_gbl[PCPU_GET(cpuid)]++; @@ -555,20 +789,17 @@ (*ipi_invltlb_counts[PCPU_GET(cpuid)])++; #endif /* COUNT_IPIS */ - generation = smp_tlb_generation; d.pcid = smp_tlb_pmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid; d.pad = 0; d.addr = 0; invpcid(&d, smp_tlb_pmap == kernel_pmap ? INVPCID_CTXGLOB : INVPCID_CTX); - PCPU_SET(smp_tlb_done, generation); } -void -invltlb_invpcid_pti_handler(void) +static void +invltlb_invpcid_pti_handler(pmap_t smp_tlb_pmap) { struct invpcid_descr d; - uint32_t generation; #ifdef COUNT_XINVLTLB_HITS xhits_gbl[PCPU_GET(cpuid)]++; @@ -577,7 +808,6 @@ (*ipi_invltlb_counts[PCPU_GET(cpuid)])++; #endif /* COUNT_IPIS */ - generation = smp_tlb_generation; d.pcid = smp_tlb_pmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid; d.pad = 0; d.addr = 0; @@ -594,14 +824,13 @@ d.pcid |= PMAP_PCID_USER_PT; invpcid(&d, INVPCID_CTX); } - PCPU_SET(smp_tlb_done, generation); } -void -invltlb_pcid_handler(void) +static void +invltlb_pcid_handler(pmap_t smp_tlb_pmap) { uint64_t kcr3, ucr3; - uint32_t generation, pcid; + uint32_t pcid; #ifdef COUNT_XINVLTLB_HITS xhits_gbl[PCPU_GET(cpuid)]++; @@ -610,7 +839,6 @@ (*ipi_invltlb_counts[PCPU_GET(cpuid)])++; #endif /* COUNT_IPIS */ - generation = smp_tlb_generation; /* Overlap with serialization */ if (smp_tlb_pmap == kernel_pmap) { invltlb_glob(); } else { @@ -632,14 +860,25 @@ load_cr3(kcr3); } } - PCPU_SET(smp_tlb_done, generation); } -void -invlpg_invpcid_handler(void) +static void +invlpg_handler(vm_offset_t smp_tlb_addr1) { +#ifdef COUNT_XINVLTLB_HITS + xhits_pg[PCPU_GET(cpuid)]++; +#endif /* COUNT_XINVLTLB_HITS */ +#ifdef COUNT_IPIS + (*ipi_invlpg_counts[PCPU_GET(cpuid)])++; +#endif /* COUNT_IPIS */ + + invlpg(smp_tlb_addr1); +} + +static void +invlpg_invpcid_handler(pmap_t smp_tlb_pmap, vm_offset_t smp_tlb_addr1) +{ struct invpcid_descr d; - uint32_t generation; #ifdef COUNT_XINVLTLB_HITS xhits_pg[PCPU_GET(cpuid)]++; @@ -648,7 +887,6 @@ (*ipi_invlpg_counts[PCPU_GET(cpuid)])++; #endif /* COUNT_IPIS */ - generation = smp_tlb_generation; /* Overlap with serialization */ invlpg(smp_tlb_addr1); if (smp_tlb_pmap->pm_ucr3 != PMAP_NO_CR3) { d.pcid = smp_tlb_pmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid | @@ -657,14 +895,12 @@ d.addr = smp_tlb_addr1; invpcid(&d, INVPCID_ADDR); } - PCPU_SET(smp_tlb_done, generation); } -void -invlpg_pcid_handler(void) +static void +invlpg_pcid_handler(pmap_t smp_tlb_pmap, vm_offset_t smp_tlb_addr1) { uint64_t kcr3, ucr3; - uint32_t generation; uint32_t pcid; #ifdef COUNT_XINVLTLB_HITS @@ -674,7 +910,6 @@ (*ipi_invlpg_counts[PCPU_GET(cpuid)])++; #endif /* COUNT_IPIS */ - generation = smp_tlb_generation; /* Overlap with serialization */ invlpg(smp_tlb_addr1); if (smp_tlb_pmap == PCPU_GET(curpmap) && (ucr3 = smp_tlb_pmap->pm_ucr3) != PMAP_NO_CR3) { @@ -683,15 +918,34 @@ ucr3 |= pcid | PMAP_PCID_USER_PT | CR3_PCID_SAVE; pmap_pti_pcid_invlpg(ucr3, kcr3, smp_tlb_addr1); } - PCPU_SET(smp_tlb_done, generation); } -void -invlrng_invpcid_handler(void) +static void +invlrng_handler(vm_offset_t smp_tlb_addr1, vm_offset_t smp_tlb_addr2) { + vm_offset_t addr, addr2; + +#ifdef COUNT_XINVLTLB_HITS + xhits_rng[PCPU_GET(cpuid)]++; +#endif /* COUNT_XINVLTLB_HITS */ +#ifdef COUNT_IPIS + (*ipi_invlrng_counts[PCPU_GET(cpuid)])++; +#endif /* COUNT_IPIS */ + + addr = smp_tlb_addr1; + addr2 = smp_tlb_addr2; + do { + invlpg(addr); + addr += PAGE_SIZE; + } while (addr < addr2); +} + +static void +invlrng_invpcid_handler(pmap_t smp_tlb_pmap, vm_offset_t smp_tlb_addr1, + vm_offset_t smp_tlb_addr2) +{ struct invpcid_descr d; vm_offset_t addr, addr2; - uint32_t generation; #ifdef COUNT_XINVLTLB_HITS xhits_rng[PCPU_GET(cpuid)]++; @@ -702,7 +956,6 @@ addr = smp_tlb_addr1; addr2 = smp_tlb_addr2; - generation = smp_tlb_generation; /* Overlap with serialization */ do { invlpg(addr); addr += PAGE_SIZE; @@ -717,15 +970,14 @@ d.addr += PAGE_SIZE; } while (d.addr < addr2); } - PCPU_SET(smp_tlb_done, generation); } -void -invlrng_pcid_handler(void) +static void +invlrng_pcid_handler(pmap_t smp_tlb_pmap, vm_offset_t smp_tlb_addr1, + vm_offset_t smp_tlb_addr2) { vm_offset_t addr, addr2; uint64_t kcr3, ucr3; - uint32_t generation; uint32_t pcid; #ifdef COUNT_XINVLTLB_HITS @@ -737,7 +989,6 @@ addr = smp_tlb_addr1; addr2 = smp_tlb_addr2; - generation = smp_tlb_generation; /* Overlap with serialization */ do { invlpg(addr); addr += PAGE_SIZE; @@ -749,5 +1000,116 @@ ucr3 |= pcid | PMAP_PCID_USER_PT | CR3_PCID_SAVE; pmap_pti_pcid_invlrng(ucr3, kcr3, smp_tlb_addr1, addr2); } - PCPU_SET(smp_tlb_done, generation); +} + +static void +invlcache_handler(void) +{ +#ifdef COUNT_IPIS + (*ipi_invlcache_counts[PCPU_GET(cpuid)])++; +#endif /* COUNT_IPIS */ + wbinvd(); +} + +static void +invlop_handler_one_req(enum invl_op_codes smp_tlb_op, pmap_t smp_tlb_pmap, + vm_offset_t smp_tlb_addr1, vm_offset_t smp_tlb_addr2) +{ + switch (smp_tlb_op) { + case INVL_OP_TLB: + invltlb_handler(smp_tlb_pmap); + break; + case INVL_OP_TLB_INVPCID: + invltlb_invpcid_handler(smp_tlb_pmap); + break; + case INVL_OP_TLB_INVPCID_PTI: + invltlb_invpcid_pti_handler(smp_tlb_pmap); + break; + case INVL_OP_TLB_PCID: + invltlb_pcid_handler(smp_tlb_pmap); + break; + case INVL_OP_PGRNG: + invlrng_handler(smp_tlb_addr1, smp_tlb_addr2); + break; + case INVL_OP_PGRNG_INVPCID: + invlrng_invpcid_handler(smp_tlb_pmap, smp_tlb_addr1, + smp_tlb_addr2); + break; + case INVL_OP_PGRNG_PCID: + invlrng_pcid_handler(smp_tlb_pmap, smp_tlb_addr1, + smp_tlb_addr2); + break; + case INVL_OP_PG: + invlpg_handler(smp_tlb_addr1); + break; + case INVL_OP_PG_INVPCID: + invlpg_invpcid_handler(smp_tlb_pmap, smp_tlb_addr1); + break; + case INVL_OP_PG_PCID: + invlpg_pcid_handler(smp_tlb_pmap, smp_tlb_addr1); + break; + case INVL_OP_CACHE: + invlcache_handler(); + break; + default: + __assert_unreachable(); + break; + } +} + +void +invlop_handler(void) +{ + struct pcpu *initiator_pc; + pmap_t smp_tlb_pmap; + vm_offset_t smp_tlb_addr1, smp_tlb_addr2; + u_int initiator_cpu_id; + enum invl_op_codes smp_tlb_op; + uint32_t *scoreboard, smp_tlb_gen; + + scoreboard = invl_scoreboard_getcpu(PCPU_GET(cpuid)); + for (;;) { + for (initiator_cpu_id = 0; initiator_cpu_id <= mp_maxid; + initiator_cpu_id++) { + if (scoreboard[initiator_cpu_id] == 0) + break; + } + if (initiator_cpu_id > mp_maxid) + break; + initiator_pc = cpuid_to_pcpu[initiator_cpu_id]; + + /* + * This acquire fence and its corresponding release + * fence in smp_targeted_tlb_shootdown(), is between + * reading zero scoreboard slot and accessing PCPU of + * initiator for pc_smp_tlb values. + */ + atomic_thread_fence_acq(); + smp_tlb_pmap = initiator_pc->pc_smp_tlb_pmap; + smp_tlb_addr1 = initiator_pc->pc_smp_tlb_addr1; + smp_tlb_addr2 = initiator_pc->pc_smp_tlb_addr2; + smp_tlb_op = initiator_pc->pc_smp_tlb_op; + smp_tlb_gen = initiator_pc->pc_smp_tlb_gen; + + /* + * Ensure that we do not make our scoreboard + * notification visible to the initiator until the + * pc_smp_tlb values are read. The corresponding + * fence is implicitly provided by the barrier in the + * IPI send operation before the APIC ICR register + * write. + * + * As an optimization, the request is acknowledged + * before the actual invalidation is performed. It is + * safe because target CPU cannot return to userspace + * before handler finishes. Only NMI can preempt the + * handler, but NMI would see the kernel handler frame + * and not touch not-invalidated user page table. + */ + atomic_thread_fence_acq(); + atomic_store_int(&scoreboard[initiator_cpu_id], smp_tlb_gen); + + invlop_handler_one_req(smp_tlb_op, smp_tlb_pmap, smp_tlb_addr1, + smp_tlb_addr2); + } } Index: head/sys/amd64/include/pcpu.h =================================================================== --- head/sys/amd64/include/pcpu.h +++ head/sys/amd64/include/pcpu.h @@ -85,7 +85,7 @@ u_int pc_vcpu_id; /* Xen vCPU ID */ \ uint32_t pc_pcid_next; \ uint32_t pc_pcid_gen; \ - uint32_t pc_smp_tlb_done; /* TLB op acknowledgement */ \ + uint32_t pc_unused; \ uint32_t pc_ibpb_set; \ void *pc_mds_buf; \ void *pc_mds_buf64; \ @@ -94,7 +94,12 @@ u_int pc_ipi_bitmap; \ struct amd64tss pc_common_tss; \ struct user_segment_descriptor pc_gdt[NGDT]; \ - char __pad[2956] /* pad to UMA_PCPU_ALLOC_SIZE */ + void *pc_smp_tlb_pmap; \ + uint64_t pc_smp_tlb_addr1; \ + uint64_t pc_smp_tlb_addr2; \ + uint32_t pc_smp_tlb_gen; \ + u_int pc_smp_tlb_op; \ + char __pad[2924] /* pad to UMA_PCPU_ALLOC_SIZE */ #define PC_DBREG_CMD_NONE 0 #define PC_DBREG_CMD_LOAD 1 Index: head/sys/amd64/include/smp.h =================================================================== --- head/sys/amd64/include/smp.h +++ head/sys/amd64/include/smp.h @@ -29,34 +29,14 @@ inthand_t IDTVEC(justreturn), /* interrupt CPU with minimum overhead */ IDTVEC(justreturn1_pti), - IDTVEC(invltlb_pti), - IDTVEC(invltlb_pcid_pti), - IDTVEC(invltlb_pcid), /* TLB shootdowns - global, pcid */ - IDTVEC(invltlb_invpcid_pti_pti), - IDTVEC(invltlb_invpcid_nopti), - IDTVEC(invlpg_pti), - IDTVEC(invlpg_invpcid_pti), - IDTVEC(invlpg_invpcid), - IDTVEC(invlpg_pcid_pti), - IDTVEC(invlpg_pcid), - IDTVEC(invlrng_pti), - IDTVEC(invlrng_invpcid_pti), - IDTVEC(invlrng_invpcid), - IDTVEC(invlrng_pcid_pti), - IDTVEC(invlrng_pcid), - IDTVEC(invlcache_pti), + IDTVEC(invlop_pti), + IDTVEC(invlop), IDTVEC(ipi_intr_bitmap_handler_pti), IDTVEC(cpustop_pti), IDTVEC(cpususpend_pti), IDTVEC(rendezvous_pti); -void invltlb_pcid_handler(void); -void invltlb_invpcid_handler(void); -void invltlb_invpcid_pti_handler(void); -void invlpg_invpcid_handler(void); -void invlpg_pcid_handler(void); -void invlrng_invpcid_handler(void); -void invlrng_pcid_handler(void); +void invlop_handler(void); int native_start_all_aps(void); void mp_bootaddress(vm_paddr_t *, unsigned int *); Index: head/sys/i386/i386/mp_machdep.c =================================================================== --- head/sys/i386/i386/mp_machdep.c +++ head/sys/i386/i386/mp_machdep.c @@ -54,6 +54,7 @@ #ifdef GPROF #include #endif +#include #include #include #include @@ -466,4 +467,223 @@ DELAY(1000); } return 0; /* return FAILURE */ +} + +/* + * Flush the TLB on other CPU's + */ + +/* Variables needed for SMP tlb shootdown. */ +vm_offset_t smp_tlb_addr1, smp_tlb_addr2; +pmap_t smp_tlb_pmap; +volatile uint32_t smp_tlb_generation; + +/* + * Used by pmap to request cache or TLB invalidation on local and + * remote processors. Mask provides the set of remote CPUs which are + * to be signalled with the invalidation IPI, specified by vector. As + * an optimization, the curcpu_cb callback is invoked on the calling + * CPU while waiting for remote CPUs to complete the operation. + * + * The callback function is called unconditionally on the caller's + * underlying processor, even when this processor is not set in the + * mask. So, the callback function must be prepared to handle such + * spurious invocations. + */ +static void +smp_targeted_tlb_shootdown(cpuset_t mask, u_int vector, pmap_t pmap, + vm_offset_t addr1, vm_offset_t addr2, smp_invl_cb_t curcpu_cb) +{ + cpuset_t other_cpus; + volatile uint32_t *p_cpudone; + uint32_t generation; + int cpu; + + /* + * It is not necessary to signal other CPUs while booting or + * when in the debugger. + */ + if (kdb_active || KERNEL_PANICKED() || !smp_started) { + curcpu_cb(pmap, addr1, addr2); + return; + } + + sched_pin(); + + /* + * Check for other cpus. Return if none. + */ + if (CPU_ISFULLSET(&mask)) { + if (mp_ncpus <= 1) + goto nospinexit; + } else { + CPU_CLR(PCPU_GET(cpuid), &mask); + if (CPU_EMPTY(&mask)) + goto nospinexit; + } + + KASSERT((read_eflags() & PSL_I) != 0, + ("smp_targeted_tlb_shootdown: interrupts disabled")); + mtx_lock_spin(&smp_ipi_mtx); + smp_tlb_addr1 = addr1; + smp_tlb_addr2 = addr2; + smp_tlb_pmap = pmap; + generation = ++smp_tlb_generation; + if (CPU_ISFULLSET(&mask)) { + ipi_all_but_self(vector); + other_cpus = all_cpus; + CPU_CLR(PCPU_GET(cpuid), &other_cpus); + } else { + other_cpus = mask; + while ((cpu = CPU_FFS(&mask)) != 0) { + cpu--; + CPU_CLR(cpu, &mask); + CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, + cpu, vector); + ipi_send_cpu(cpu, vector); + } + } + curcpu_cb(pmap, addr1, addr2); + while ((cpu = CPU_FFS(&other_cpus)) != 0) { + cpu--; + CPU_CLR(cpu, &other_cpus); + p_cpudone = &cpuid_to_pcpu[cpu]->pc_smp_tlb_done; + while (*p_cpudone != generation) + ia32_pause(); + } + mtx_unlock_spin(&smp_ipi_mtx); + sched_unpin(); + return; + +nospinexit: + curcpu_cb(pmap, addr1, addr2); + sched_unpin(); +} + +void +smp_masked_invltlb(cpuset_t mask, pmap_t pmap, smp_invl_cb_t curcpu_cb) +{ + smp_targeted_tlb_shootdown(mask, IPI_INVLTLB, pmap, 0, 0, curcpu_cb); +#ifdef COUNT_XINVLTLB_HITS + ipi_global++; +#endif +} + +void +smp_masked_invlpg(cpuset_t mask, vm_offset_t addr, pmap_t pmap, + smp_invl_cb_t curcpu_cb) +{ + smp_targeted_tlb_shootdown(mask, IPI_INVLPG, pmap, addr, 0, curcpu_cb); +#ifdef COUNT_XINVLTLB_HITS + ipi_page++; +#endif +} + +void +smp_masked_invlpg_range(cpuset_t mask, vm_offset_t addr1, vm_offset_t addr2, + pmap_t pmap, smp_invl_cb_t curcpu_cb) +{ + smp_targeted_tlb_shootdown(mask, IPI_INVLRNG, pmap, addr1, addr2, + curcpu_cb); +#ifdef COUNT_XINVLTLB_HITS + ipi_range++; + ipi_range_size += (addr2 - addr1) / PAGE_SIZE; +#endif +} + +void +smp_cache_flush(smp_invl_cb_t curcpu_cb) +{ + smp_targeted_tlb_shootdown(all_cpus, IPI_INVLCACHE, NULL, 0, 0, + curcpu_cb); +} + +/* + * Handlers for TLB related IPIs + */ +void +invltlb_handler(void) +{ + uint32_t generation; + +#ifdef COUNT_XINVLTLB_HITS + xhits_gbl[PCPU_GET(cpuid)]++; +#endif /* COUNT_XINVLTLB_HITS */ +#ifdef COUNT_IPIS + (*ipi_invltlb_counts[PCPU_GET(cpuid)])++; +#endif /* COUNT_IPIS */ + + /* + * Reading the generation here allows greater parallelism + * since invalidating the TLB is a serializing operation. + */ + generation = smp_tlb_generation; + if (smp_tlb_pmap == kernel_pmap) + invltlb_glob(); + PCPU_SET(smp_tlb_done, generation); +} + +void +invlpg_handler(void) +{ + uint32_t generation; + +#ifdef COUNT_XINVLTLB_HITS + xhits_pg[PCPU_GET(cpuid)]++; +#endif /* COUNT_XINVLTLB_HITS */ +#ifdef COUNT_IPIS + (*ipi_invlpg_counts[PCPU_GET(cpuid)])++; +#endif /* COUNT_IPIS */ + + generation = smp_tlb_generation; /* Overlap with serialization */ + if (smp_tlb_pmap == kernel_pmap) + invlpg(smp_tlb_addr1); + PCPU_SET(smp_tlb_done, generation); +} + +void +invlrng_handler(void) +{ + vm_offset_t addr, addr2; + uint32_t generation; + +#ifdef COUNT_XINVLTLB_HITS + xhits_rng[PCPU_GET(cpuid)]++; +#endif /* COUNT_XINVLTLB_HITS */ +#ifdef COUNT_IPIS + (*ipi_invlrng_counts[PCPU_GET(cpuid)])++; +#endif /* COUNT_IPIS */ + + addr = smp_tlb_addr1; + addr2 = smp_tlb_addr2; + generation = smp_tlb_generation; /* Overlap with serialization */ + if (smp_tlb_pmap == kernel_pmap) { + do { + invlpg(addr); + addr += PAGE_SIZE; + } while (addr < addr2); + } + + PCPU_SET(smp_tlb_done, generation); +} + +void +invlcache_handler(void) +{ + uint32_t generation; + +#ifdef COUNT_IPIS + (*ipi_invlcache_counts[PCPU_GET(cpuid)])++; +#endif /* COUNT_IPIS */ + + /* + * Reading the generation here allows greater parallelism + * since wbinvd is a serializing instruction. Without the + * temporary, we'd wait for wbinvd to complete, then the read + * would execute, then the dependent write, which must then + * complete before return from interrupt. + */ + generation = smp_tlb_generation; + wbinvd(); + PCPU_SET(smp_tlb_done, generation); } Index: head/sys/i386/include/smp.h =================================================================== --- head/sys/i386/include/smp.h +++ head/sys/i386/include/smp.h @@ -27,8 +27,19 @@ #include #include +inthand_t + IDTVEC(invltlb), /* TLB shootdowns - global */ + IDTVEC(invlpg), /* TLB shootdowns - 1 page */ + IDTVEC(invlrng), /* TLB shootdowns - page range */ + IDTVEC(invlcache); /* Write back and invalidate cache */ + /* functions in mpboot.s */ void bootMP(void); + +void invltlb_handler(void); +void invlpg_handler(void); +void invlrng_handler(void); +void invlcache_handler(void); #endif /* !LOCORE */ #endif /* SMP */ Index: head/sys/x86/include/apicvar.h =================================================================== --- head/sys/x86/include/apicvar.h +++ head/sys/x86/include/apicvar.h @@ -112,7 +112,8 @@ #define APIC_IPI_INTS (APIC_LOCAL_INTS + 3) #define IPI_RENDEZVOUS (APIC_IPI_INTS) /* Inter-CPU rendezvous. */ -#define IPI_INVLTLB (APIC_IPI_INTS + 1) /* TLB Shootdown IPIs */ +#define IPI_INVLOP (APIC_IPI_INTS + 1) /* TLB Shootdown IPIs, amd64 */ +#define IPI_INVLTLB (APIC_IPI_INTS + 1) /* TLB Shootdown IPIs, i386 */ #define IPI_INVLPG (APIC_IPI_INTS + 2) #define IPI_INVLRNG (APIC_IPI_INTS + 3) #define IPI_INVLCACHE (APIC_IPI_INTS + 4) Index: head/sys/x86/include/x86_smp.h =================================================================== --- head/sys/x86/include/x86_smp.h +++ head/sys/x86/include/x86_smp.h @@ -75,10 +75,6 @@ /* IPI handlers */ inthand_t - IDTVEC(invltlb), /* TLB shootdowns - global */ - IDTVEC(invlpg), /* TLB shootdowns - 1 page */ - IDTVEC(invlrng), /* TLB shootdowns - page range */ - IDTVEC(invlcache), /* Write back and invalidate cache */ IDTVEC(ipi_intr_bitmap_handler), /* Bitmap based IPIs */ IDTVEC(cpustop), /* CPU stops & waits to be restarted */ IDTVEC(cpususpend), /* CPU suspends & waits to be resumed */ @@ -94,10 +90,6 @@ void cpususpend_handler(void); void alloc_ap_trampoline(vm_paddr_t *physmap, unsigned int *physmap_idx); void init_secondary_tail(void); -void invltlb_handler(void); -void invlpg_handler(void); -void invlrng_handler(void); -void invlcache_handler(void); void init_secondary(void); void ipi_startup(int apic_id, int vector); void ipi_all_but_self(u_int ipi); Index: head/sys/x86/x86/mp_x86.c =================================================================== --- head/sys/x86/x86/mp_x86.c +++ head/sys/x86/x86/mp_x86.c @@ -1593,28 +1593,6 @@ CPU_CLR_ATOMIC(cpu, &toresume_cpus); } - -void -invlcache_handler(void) -{ - uint32_t generation; - -#ifdef COUNT_IPIS - (*ipi_invlcache_counts[PCPU_GET(cpuid)])++; -#endif /* COUNT_IPIS */ - - /* - * Reading the generation here allows greater parallelism - * since wbinvd is a serializing instruction. Without the - * temporary, we'd wait for wbinvd to complete, then the read - * would execute, then the dependent write, which must then - * complete before return from interrupt. - */ - generation = smp_tlb_generation; - wbinvd(); - PCPU_SET(smp_tlb_done, generation); -} - /* * This is called once the rest of the system is up and running and we're * ready to let the AP's out of the pen. @@ -1662,216 +1640,3 @@ } SYSINIT(mp_ipi_intrcnt, SI_SUB_INTR, SI_ORDER_MIDDLE, mp_ipi_intrcnt, NULL); #endif - -/* - * Flush the TLB on other CPU's - */ - -/* Variables needed for SMP tlb shootdown. */ -vm_offset_t smp_tlb_addr1, smp_tlb_addr2; -pmap_t smp_tlb_pmap; -volatile uint32_t smp_tlb_generation; - -#ifdef __amd64__ -#define read_eflags() read_rflags() -#endif - -/* - * Used by pmap to request invalidation of TLB or cache on local and - * remote processors. Mask provides the set of remote CPUs which are - * to be signalled with the IPI specified by vector. The curcpu_cb - * callback is invoked on the calling CPU while waiting for remote - * CPUs to complete the operation. - * - * The callback function is called unconditionally on the caller's - * underlying processor, even when this processor is not set in the - * mask. So, the callback function must be prepared to handle such - * spurious invocations. - */ -static void -smp_targeted_tlb_shootdown(cpuset_t mask, u_int vector, pmap_t pmap, - vm_offset_t addr1, vm_offset_t addr2, smp_invl_cb_t curcpu_cb) -{ - cpuset_t other_cpus; - volatile uint32_t *p_cpudone; - uint32_t generation; - int cpu; - - /* - * It is not necessary to signal other CPUs while booting or - * when in the debugger. - */ - if (kdb_active || KERNEL_PANICKED() || !smp_started) { - curcpu_cb(pmap, addr1, addr2); - return; - } - - sched_pin(); - - /* - * Check for other cpus. Return if none. - */ - if (CPU_ISFULLSET(&mask)) { - if (mp_ncpus <= 1) - goto nospinexit; - } else { - CPU_CLR(PCPU_GET(cpuid), &mask); - if (CPU_EMPTY(&mask)) - goto nospinexit; - } - - if (!(read_eflags() & PSL_I)) - panic("%s: interrupts disabled", __func__); - mtx_lock_spin(&smp_ipi_mtx); - smp_tlb_addr1 = addr1; - smp_tlb_addr2 = addr2; - smp_tlb_pmap = pmap; - generation = ++smp_tlb_generation; - if (CPU_ISFULLSET(&mask)) { - ipi_all_but_self(vector); - other_cpus = all_cpus; - CPU_CLR(PCPU_GET(cpuid), &other_cpus); - } else { - other_cpus = mask; - while ((cpu = CPU_FFS(&mask)) != 0) { - cpu--; - CPU_CLR(cpu, &mask); - CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, - cpu, vector); - ipi_send_cpu(cpu, vector); - } - } - curcpu_cb(pmap, addr1, addr2); - while ((cpu = CPU_FFS(&other_cpus)) != 0) { - cpu--; - CPU_CLR(cpu, &other_cpus); - p_cpudone = &cpuid_to_pcpu[cpu]->pc_smp_tlb_done; - while (*p_cpudone != generation) - ia32_pause(); - } - mtx_unlock_spin(&smp_ipi_mtx); - sched_unpin(); - return; - -nospinexit: - curcpu_cb(pmap, addr1, addr2); - sched_unpin(); -} - -void -smp_masked_invltlb(cpuset_t mask, pmap_t pmap, smp_invl_cb_t curcpu_cb) -{ - - smp_targeted_tlb_shootdown(mask, IPI_INVLTLB, pmap, 0, 0, curcpu_cb); -#ifdef COUNT_XINVLTLB_HITS - ipi_global++; -#endif -} - -void -smp_masked_invlpg(cpuset_t mask, vm_offset_t addr, pmap_t pmap, - smp_invl_cb_t curcpu_cb) -{ - - smp_targeted_tlb_shootdown(mask, IPI_INVLPG, pmap, addr, 0, curcpu_cb); -#ifdef COUNT_XINVLTLB_HITS - ipi_page++; -#endif -} - -void -smp_masked_invlpg_range(cpuset_t mask, vm_offset_t addr1, vm_offset_t addr2, - pmap_t pmap, smp_invl_cb_t curcpu_cb) -{ - - smp_targeted_tlb_shootdown(mask, IPI_INVLRNG, pmap, addr1, addr2, - curcpu_cb); -#ifdef COUNT_XINVLTLB_HITS - ipi_range++; - ipi_range_size += (addr2 - addr1) / PAGE_SIZE; -#endif -} - -void -smp_cache_flush(smp_invl_cb_t curcpu_cb) -{ - - smp_targeted_tlb_shootdown(all_cpus, IPI_INVLCACHE, NULL, 0, 0, - curcpu_cb); -} - -/* - * Handlers for TLB related IPIs - */ -void -invltlb_handler(void) -{ - uint32_t generation; - -#ifdef COUNT_XINVLTLB_HITS - xhits_gbl[PCPU_GET(cpuid)]++; -#endif /* COUNT_XINVLTLB_HITS */ -#ifdef COUNT_IPIS - (*ipi_invltlb_counts[PCPU_GET(cpuid)])++; -#endif /* COUNT_IPIS */ - - /* - * Reading the generation here allows greater parallelism - * since invalidating the TLB is a serializing operation. - */ - generation = smp_tlb_generation; - if (smp_tlb_pmap == kernel_pmap) - invltlb_glob(); -#ifdef __amd64__ - else - invltlb(); -#endif - PCPU_SET(smp_tlb_done, generation); -} - -void -invlpg_handler(void) -{ - uint32_t generation; - -#ifdef COUNT_XINVLTLB_HITS - xhits_pg[PCPU_GET(cpuid)]++; -#endif /* COUNT_XINVLTLB_HITS */ -#ifdef COUNT_IPIS - (*ipi_invlpg_counts[PCPU_GET(cpuid)])++; -#endif /* COUNT_IPIS */ - - generation = smp_tlb_generation; /* Overlap with serialization */ -#ifdef __i386__ - if (smp_tlb_pmap == kernel_pmap) -#endif - invlpg(smp_tlb_addr1); - PCPU_SET(smp_tlb_done, generation); -} - -void -invlrng_handler(void) -{ - vm_offset_t addr, addr2; - uint32_t generation; - -#ifdef COUNT_XINVLTLB_HITS - xhits_rng[PCPU_GET(cpuid)]++; -#endif /* COUNT_XINVLTLB_HITS */ -#ifdef COUNT_IPIS - (*ipi_invlrng_counts[PCPU_GET(cpuid)])++; -#endif /* COUNT_IPIS */ - - addr = smp_tlb_addr1; - addr2 = smp_tlb_addr2; - generation = smp_tlb_generation; /* Overlap with serialization */ -#ifdef __i386__ - if (smp_tlb_pmap == kernel_pmap) -#endif - do { - invlpg(addr); - addr += PAGE_SIZE; - } while (addr < addr2); - - PCPU_SET(smp_tlb_done, generation); -} Index: head/sys/x86/xen/xen_apic.c =================================================================== --- head/sys/x86/xen/xen_apic.c +++ head/sys/x86/xen/xen_apic.c @@ -65,10 +65,14 @@ /*--------------------------- Forward Declarations ---------------------------*/ #ifdef SMP static driver_filter_t xen_smp_rendezvous_action; +#ifdef __amd64__ +static driver_filter_t xen_invlop; +#else static driver_filter_t xen_invltlb; static driver_filter_t xen_invlpg; static driver_filter_t xen_invlrng; static driver_filter_t xen_invlcache; +#endif static driver_filter_t xen_ipi_bitmap_handler; static driver_filter_t xen_cpustop_handler; static driver_filter_t xen_cpususpend_handler; @@ -88,10 +92,14 @@ static struct xen_ipi_handler xen_ipis[] = { [IPI_TO_IDX(IPI_RENDEZVOUS)] = { xen_smp_rendezvous_action, "r" }, +#ifdef __amd64__ + [IPI_TO_IDX(IPI_INVLOP)] = { xen_invlop, "itlb"}, +#else [IPI_TO_IDX(IPI_INVLTLB)] = { xen_invltlb, "itlb"}, [IPI_TO_IDX(IPI_INVLPG)] = { xen_invlpg, "ipg" }, [IPI_TO_IDX(IPI_INVLRNG)] = { xen_invlrng, "irg" }, [IPI_TO_IDX(IPI_INVLCACHE)] = { xen_invlcache, "ic" }, +#endif [IPI_TO_IDX(IPI_BITMAP_VECTOR)] = { xen_ipi_bitmap_handler, "b" }, [IPI_TO_IDX(IPI_STOP)] = { xen_cpustop_handler, "st" }, [IPI_TO_IDX(IPI_SUSPEND)] = { xen_cpususpend_handler, "sp" }, @@ -454,73 +462,26 @@ return (FILTER_HANDLED); } -static int -xen_invltlb(void *arg) -{ - - invltlb_handler(); - return (FILTER_HANDLED); -} - #ifdef __amd64__ static int -xen_invltlb_invpcid(void *arg) +xen_invlop(void *arg) { - invltlb_invpcid_handler(); + invlop_handler(); return (FILTER_HANDLED); } -static int -xen_invltlb_pcid(void *arg) -{ +#else /* __i386__ */ - invltlb_pcid_handler(); - return (FILTER_HANDLED); -} - static int -xen_invltlb_invpcid_pti(void *arg) +xen_invltlb(void *arg) { - invltlb_invpcid_pti_handler(); + invltlb_handler(); return (FILTER_HANDLED); } static int -xen_invlpg_invpcid_handler(void *arg) -{ - - invlpg_invpcid_handler(); - return (FILTER_HANDLED); -} - -static int -xen_invlpg_pcid_handler(void *arg) -{ - - invlpg_pcid_handler(); - return (FILTER_HANDLED); -} - -static int -xen_invlrng_invpcid_handler(void *arg) -{ - - invlrng_invpcid_handler(); - return (FILTER_HANDLED); -} - -static int -xen_invlrng_pcid_handler(void *arg) -{ - - invlrng_pcid_handler(); - return (FILTER_HANDLED); -} -#endif - -static int xen_invlpg(void *arg) { @@ -543,6 +504,7 @@ invlcache_handler(); return (FILTER_HANDLED); } +#endif /* __amd64__ */ static int xen_cpustop_handler(void *arg) @@ -598,22 +560,6 @@ if (!xen_vector_callback_enabled) return; -#ifdef __amd64__ - if (pmap_pcid_enabled) { - if (pti) - xen_ipis[IPI_TO_IDX(IPI_INVLTLB)].filter = - invpcid_works ? xen_invltlb_invpcid_pti : - xen_invltlb_pcid; - else - xen_ipis[IPI_TO_IDX(IPI_INVLTLB)].filter = - invpcid_works ? xen_invltlb_invpcid : - xen_invltlb_pcid; - xen_ipis[IPI_TO_IDX(IPI_INVLPG)].filter = invpcid_works ? - xen_invlpg_invpcid_handler : xen_invlpg_pcid_handler; - xen_ipis[IPI_TO_IDX(IPI_INVLRNG)].filter = invpcid_works ? - xen_invlrng_invpcid_handler : xen_invlrng_pcid_handler; - } -#endif CPU_FOREACH(i) xen_cpu_ipi_init(i);