Index: sys/kern/kern_clock.c =================================================================== --- sys/kern/kern_clock.c +++ sys/kern/kern_clock.c @@ -508,6 +508,7 @@ if (i > 0 && i <= newticks) watchdog_fire(); } + intr_event_handle(delay_intr_event, NULL); } if (curcpu == CPU_FIRST()) cpu_tick_calibration(); Index: sys/kern/kern_intr.c =================================================================== --- sys/kern/kern_intr.c +++ sys/kern/kern_intr.c @@ -60,6 +60,7 @@ #include #include #include +#include #include #ifdef DDB #include @@ -85,6 +86,7 @@ uintptr_t event; }; +struct intr_event *delay_intr_event; struct intr_event *tty_intr_event; void *vm_ih; struct proc *intrproc; @@ -1018,7 +1020,7 @@ void *arg, int pri, enum intr_type flags, void **cookiep) { struct intr_event *ie; - int error; + int error = 0; if (flags & INTR_ENTROPY) return (EINVAL); @@ -1036,8 +1038,10 @@ if (eventp != NULL) *eventp = ie; } - error = intr_event_add_handler(ie, name, NULL, handler, arg, - PI_SWI(pri), flags, cookiep); + if (handler != NULL) { + error = intr_event_add_handler(ie, name, NULL, handler, arg, + PI_SWI(pri), flags, cookiep); + } return (error); } @@ -1055,9 +1059,11 @@ CTR3(KTR_INTR, "swi_sched: %s %s need=%d", ie->ie_name, ih->ih_name, ih->ih_need); - entropy.event = (uintptr_t)ih; - entropy.td = curthread; - random_harvest_queue(&entropy, sizeof(entropy), RANDOM_SWI); + if ((flags & SWI_FROMNMI) == 0) { + entropy.event = (uintptr_t)ih; + entropy.td = curthread; + random_harvest_queue(&entropy, sizeof(entropy), RANDOM_SWI); + } /* * Set ih_need for this handler so that if the ithread is already @@ -1066,7 +1072,16 @@ */ ih->ih_need = 1; - if (!(flags & SWI_DELAY)) { + if (flags & SWI_DELAY) + return; + + if (flags & SWI_FROMNMI) { +#if defined(SMP) && (defined(__i386__) || defined(__amd64__)) + KASSERT(ie == delay_intr_event, + ("SWI_FROMNMI used not with delay_intr_event")); + ipi_self(IPI_SWI); +#endif + } else { VM_CNT_INC(v_soft); error = intr_event_schedule_thread(ie); KASSERT(error == 0, ("stray software interrupt")); @@ -1346,6 +1361,8 @@ CK_SLIST_FOREACH(ih, &ie->ie_handlers, ih_next) { if ((ih->ih_flags & IH_SUSP) != 0) continue; + if ((ie->ie_flags & IE_SOFT) != 0 && ih->ih_need == 0) + continue; if (ih->ih_filter == NULL) { thread = true; continue; @@ -1570,6 +1587,9 @@ start_softintr(void *dummy) { + if (swi_add(&delay_intr_event, "delay", NULL, NULL, SWI_CLOCK, + INTR_MPSAFE, NULL)) + panic("died while creating delayed swi ithread"); if (swi_add(NULL, "vm", swi_vm, NULL, SWI_VM, INTR_MPSAFE, &vm_ih)) panic("died while creating vm swi ithread"); } Index: sys/sys/interrupt.h =================================================================== --- sys/sys/interrupt.h +++ sys/sys/interrupt.h @@ -133,7 +133,8 @@ #define IE_SOFT 0x000001 /* Software interrupt. */ #define IE_ADDING_THREAD 0x000004 /* Currently building an ithread. */ -/* Flags to pass to sched_swi. */ +/* Flags to pass to swi_sched. */ +#define SWI_FROMNMI 0x1 #define SWI_DELAY 0x2 /* @@ -151,6 +152,7 @@ struct proc; +extern struct intr_event *delay_intr_event; extern struct intr_event *tty_intr_event; extern void *vm_ih; Index: sys/x86/include/apicvar.h =================================================================== --- sys/x86/include/apicvar.h +++ sys/x86/include/apicvar.h @@ -125,7 +125,8 @@ #define IPI_PREEMPT 1 #define IPI_HARDCLOCK 2 #define IPI_TRACE 3 /* Collect stack trace. */ -#define IPI_BITMAP_LAST IPI_TRACE +#define IPI_SWI 4 /* Run delay_intr_event. */ +#define IPI_BITMAP_LAST IPI_SWI #define IPI_IS_BITMAPED(x) ((x) <= IPI_BITMAP_LAST) #define IPI_STOP (APIC_IPI_INTS + 6) /* Stop CPU until restarted. */ Index: sys/x86/include/x86_smp.h =================================================================== --- sys/x86/include/x86_smp.h +++ sys/x86/include/x86_smp.h @@ -97,6 +97,7 @@ void ipi_cpu(int cpu, u_int ipi); int ipi_nmi_handler(void); void ipi_selected(cpuset_t cpus, u_int ipi); +void ipi_self(u_int ipi); void set_interrupt_apic_ids(void); void smp_cache_flush(smp_invl_cb_t curcpu_cb); void smp_masked_invlpg(cpuset_t mask, vm_offset_t addr, struct pmap *pmap, @@ -107,6 +108,5 @@ smp_invl_cb_t curcpu_cb); void mem_range_AP_init(void); void topo_probe(void); -void ipi_send_cpu(int cpu, u_int ipi); #endif Index: sys/x86/x86/local_apic.c =================================================================== --- sys/x86/x86/local_apic.c +++ sys/x86/x86/local_apic.c @@ -254,22 +254,6 @@ #ifdef SMP static uint64_t -lapic_read_icr(void) -{ - uint64_t v; - uint32_t vhi, vlo; - - if (x2apic_mode) { - v = rdmsr(MSR_APIC_000 + LAPIC_ICR_LO); - } else { - vhi = lapic_read32(LAPIC_ICR_HI); - vlo = lapic_read32(LAPIC_ICR_LO); - v = ((uint64_t)vhi << 32) | vlo; - } - return (v); -} - -static uint64_t lapic_read_icr_lo(void) { @@ -279,6 +263,7 @@ static void lapic_write_icr(uint32_t vhi, uint32_t vlo) { + register_t saveintr; uint64_t v; if (x2apic_mode) { @@ -286,10 +271,24 @@ mfence(); wrmsr(MSR_APIC_000 + LAPIC_ICR_LO, v); } else { + saveintr = intr_disable(); lapic_write32(LAPIC_ICR_HI, vhi); lapic_write32(LAPIC_ICR_LO, vlo); + intr_restore(saveintr); } } + +static void +lapic_write_icr_lo(uint32_t vlo) +{ + + if (x2apic_mode) { + mfence(); + wrmsr(MSR_APIC_000 + LAPIC_ICR_LO, vlo); + } else { + lapic_write32(LAPIC_ICR_LO, vlo); + } +} #endif /* SMP */ static void @@ -1991,9 +1990,7 @@ static void native_lapic_ipi_raw(register_t icrlo, u_int dest) { - uint64_t icr; - uint32_t vhi, vlo; - register_t saveintr; + uint32_t icrhi; /* XXX: Need more sanity checking of icrlo? */ KASSERT(x2apic_mode || lapic_map != NULL, @@ -2004,35 +2001,15 @@ KASSERT((icrlo & APIC_ICRLO_RESV_MASK) == 0, ("%s: reserved bits set in ICR LO register", __func__)); - /* Set destination in ICR HI register if it is being used. */ - if (!x2apic_mode) { - saveintr = intr_disable(); - icr = lapic_read_icr(); - } - if ((icrlo & APIC_DEST_MASK) == APIC_DEST_DESTFLD) { - if (x2apic_mode) { - vhi = dest; - } else { - vhi = icr >> 32; - vhi &= ~APIC_ID_MASK; - vhi |= dest << APIC_ID_SHIFT; - } + if (x2apic_mode) + icrhi = dest; + else + icrhi = dest << APIC_ID_SHIFT; + lapic_write_icr(icrhi, icrlo); } else { - vhi = 0; + lapic_write_icr_lo(icrlo); } - - /* Program the contents of the IPI and dispatch it. */ - if (x2apic_mode) { - vlo = icrlo; - } else { - vlo = icr; - vlo &= APIC_ICRLO_RESV_MASK; - vlo |= icrlo; - } - lapic_write_icr(vhi, vlo); - if (!x2apic_mode) - intr_restore(saveintr); } #define BEFORE_SPIN 50000 Index: sys/x86/x86/mp_x86.c =================================================================== --- sys/x86/x86/mp_x86.c +++ sys/x86/x86/mp_x86.c @@ -47,6 +47,7 @@ #ifdef GPROF #include #endif +#include #include #include #include @@ -109,6 +110,7 @@ u_long *ipi_invlcache_counts[MAXCPU]; u_long *ipi_rendezvous_counts[MAXCPU]; static u_long *ipi_hardclock_counts[MAXCPU]; +static u_long *ipi_swi_counts[MAXCPU]; #endif /* Default cpu_ops implementation. */ @@ -1233,32 +1235,39 @@ DELAY(200); /* wait ~200uS */ } +static bool +ipi_bitmap_set(int cpu, u_int ipi) +{ + u_int bitmap, old, new; + u_int *cpu_bitmap; + + bitmap = 1 << ipi; + cpu_bitmap = &cpuid_to_pcpu[cpu]->pc_ipi_bitmap; + old = *cpu_bitmap; + for (;;) { + if ((old & bitmap) != 0) + break; + new = old | bitmap; + if (atomic_fcmpset_int(cpu_bitmap, &old, new)) + break; + } + return (old != 0); +} + /* * Send an IPI to specified CPU handling the bitmap logic. */ -void +static void ipi_send_cpu(int cpu, u_int ipi) { - u_int bitmap, old, new; - u_int *cpu_bitmap; KASSERT((u_int)cpu < MAXCPU && cpu_apic_ids[cpu] != -1, ("IPI to non-existent CPU %d", cpu)); if (IPI_IS_BITMAPED(ipi)) { - bitmap = 1 << ipi; - ipi = IPI_BITMAP_VECTOR; - cpu_bitmap = &cpuid_to_pcpu[cpu]->pc_ipi_bitmap; - old = *cpu_bitmap; - for (;;) { - if ((old & bitmap) == bitmap) - break; - new = old | bitmap; - if (atomic_fcmpset_int(cpu_bitmap, &old, new)) - break; - } - if (old) + if (ipi_bitmap_set(cpu, ipi)) return; + ipi = IPI_BITMAP_VECTOR; } lapic_ipi_vectored(ipi, cpu_apic_ids[cpu]); } @@ -1314,6 +1323,12 @@ td->td_intr_nesting_level--; if (ipi_bitmap & (1 << IPI_HARDCLOCK)) critical_exit(); + if (ipi_bitmap & (1 << IPI_SWI)) { +#ifdef COUNT_IPIS + (*ipi_swi_counts[cpu])++; +#endif + intr_event_handle(delay_intr_event, &frame); + } } /* @@ -1366,24 +1381,53 @@ ipi_all_but_self(u_int ipi) { cpuset_t other_cpus; + int cpu, c; - other_cpus = all_cpus; - CPU_CLR(PCPU_GET(cpuid), &other_cpus); + /* + * IPI_STOP_HARD maps to a NMI and the trap handler needs a bit + * of help in order to understand what is the source. + * Set the mask of receiving CPUs for this purpose. + */ + if (ipi == IPI_STOP_HARD) { + other_cpus = all_cpus; + CPU_CLR(PCPU_GET(cpuid), &other_cpus); + CPU_OR_ATOMIC(&ipi_stop_nmi_pending, &other_cpus); + } + + CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi); if (IPI_IS_BITMAPED(ipi)) { - ipi_selected(other_cpus, ipi); - return; + cpu = PCPU_GET(cpuid); + CPU_FOREACH(c) { + if (c != cpu) + ipi_bitmap_set(c, ipi); + } + ipi = IPI_BITMAP_VECTOR; } + lapic_ipi_vectored(ipi, APIC_IPI_DEST_OTHERS); +} +/* + * send an IPI to myself + */ +void +ipi_self(u_int ipi) +{ + /* * IPI_STOP_HARD maps to a NMI and the trap handler needs a bit * of help in order to understand what is the source. * Set the mask of receiving CPUs for this purpose. */ if (ipi == IPI_STOP_HARD) - CPU_OR_ATOMIC(&ipi_stop_nmi_pending, &other_cpus); + CPU_SET_ATOMIC(PCPU_GET(cpuid), &ipi_stop_nmi_pending); CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi); - lapic_ipi_vectored(ipi, APIC_IPI_DEST_OTHERS); + if (IPI_IS_BITMAPED(ipi)) { + if (ipi_bitmap_set(PCPU_GET(cpuid), ipi)) + return; + ipi = IPI_BITMAP_VECTOR; + } + lapic_ipi_vectored(ipi, APIC_IPI_DEST_SELF); } int @@ -1636,7 +1680,9 @@ intrcnt_add(buf, &ipi_rendezvous_counts[i]); snprintf(buf, sizeof(buf), "cpu%d:hardclock", i); intrcnt_add(buf, &ipi_hardclock_counts[i]); - } + snprintf(buf, sizeof(buf), "cpu%d:swi", i); + intrcnt_add(buf, &ipi_swi_counts[i]); + } } SYSINIT(mp_ipi_intrcnt, SI_SUB_INTR, SI_ORDER_MIDDLE, mp_ipi_intrcnt, NULL); #endif