Index: head/sys/amd64/include/intr_machdep.h =================================================================== --- head/sys/amd64/include/intr_machdep.h (revision 322587) +++ head/sys/amd64/include/intr_machdep.h (revision 322588) @@ -1,195 +1,196 @@ /*- * Copyright (c) 2003 John Baldwin * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef __MACHINE_INTR_MACHDEP_H__ #define __MACHINE_INTR_MACHDEP_H__ #ifdef _KERNEL /* * The maximum number of I/O interrupts we allow. This number is rather * arbitrary as it is just the maximum IRQ resource value. The interrupt * source for a given IRQ maps that I/O interrupt to device interrupt * source whether it be a pin on an interrupt controller or an MSI interrupt. * The 16 ISA IRQs are assigned fixed IDT vectors, but all other device * interrupts allocate IDT vectors on demand. Currently we have 191 IDT * vectors available for device interrupts. On many systems with I/O APICs, * a lot of the IRQs are not used, so this number can be much larger than * 191 and still be safe since only interrupt sources in actual use will * allocate IDT vectors. * * The first 255 IRQs (0 - 254) are reserved for ISA IRQs and PCI intline IRQs. * IRQ values from 256 to 767 are used by MSI. When running under the Xen * Hypervisor, IRQ values from 768 to 4863 are available for binding to * event channel events. We leave 255 unused to avoid confusion since 255 is * used in PCI to indicate an invalid IRQ. */ #define NUM_MSI_INTS 512 #define FIRST_MSI_INT 256 #ifdef XENHVM #include #include #define NUM_EVTCHN_INTS NR_EVENT_CHANNELS #define FIRST_EVTCHN_INT \ (FIRST_MSI_INT + NUM_MSI_INTS) #define LAST_EVTCHN_INT \ (FIRST_EVTCHN_INT + NUM_EVTCHN_INTS - 1) #else #define NUM_EVTCHN_INTS 0 #endif #define NUM_IO_INTS (FIRST_MSI_INT + NUM_MSI_INTS + NUM_EVTCHN_INTS) /* * Default base address for MSI messages on x86 platforms. */ #define MSI_INTEL_ADDR_BASE 0xfee00000 /* * - 1 ??? dummy counter. * - 2 counters for each I/O interrupt. * - 1 counter for each CPU for lapic timer. * - 8 counters for each CPU for IPI counters for SMP. */ #ifdef SMP #define INTRCNT_COUNT (1 + NUM_IO_INTS * 2 + (1 + 8) * MAXCPU) #else #define INTRCNT_COUNT (1 + NUM_IO_INTS * 2 + 1) #endif #ifndef LOCORE typedef void inthand_t(void); #define IDTVEC(name) __CONCAT(X,name) struct intsrc; /* * Methods that a PIC provides to mask/unmask a given interrupt source, * "turn on" the interrupt on the CPU side by setting up an IDT entry, and * return the vector associated with this source. */ struct pic { void (*pic_enable_source)(struct intsrc *); void (*pic_disable_source)(struct intsrc *, int); void (*pic_eoi_source)(struct intsrc *); void (*pic_enable_intr)(struct intsrc *); void (*pic_disable_intr)(struct intsrc *); int (*pic_vector)(struct intsrc *); int (*pic_source_pending)(struct intsrc *); void (*pic_suspend)(struct pic *); void (*pic_resume)(struct pic *, bool suspend_cancelled); int (*pic_config_intr)(struct intsrc *, enum intr_trigger, enum intr_polarity); int (*pic_assign_cpu)(struct intsrc *, u_int apic_id); void (*pic_reprogram_pin)(struct intsrc *); TAILQ_ENTRY(pic) pics; }; /* Flags for pic_disable_source() */ enum { PIC_EOI, PIC_NO_EOI, }; /* * An interrupt source. The upper-layer code uses the PIC methods to * control a given source. The lower-layer PIC drivers can store additional * private data in a given interrupt source such as an interrupt pin number * or an I/O APIC pointer. */ struct intsrc { struct pic *is_pic; struct intr_event *is_event; u_long *is_count; u_long *is_straycount; u_int is_index; u_int is_handlers; + u_int is_cpu; }; struct trapframe; /* * The following data structure holds per-cpu data, and is placed just * above the top of the space used for the NMI stack. */ struct nmi_pcpu { register_t np_pcpu; register_t __padding; /* pad to 16 bytes */ }; #ifdef SMP extern cpuset_t intr_cpus; #endif extern struct mtx icu_lock; extern int elcr_found; #ifdef SMP extern int msix_disable_migration; #endif #ifndef DEV_ATPIC void atpic_reset(void); #endif /* XXX: The elcr_* prototypes probably belong somewhere else. */ int elcr_probe(void); enum intr_trigger elcr_read_trigger(u_int irq); void elcr_resume(void); void elcr_write_trigger(u_int irq, enum intr_trigger trigger); #ifdef SMP void intr_add_cpu(u_int cpu); #endif int intr_add_handler(const char *name, int vector, driver_filter_t filter, driver_intr_t handler, void *arg, enum intr_type flags, void **cookiep); #ifdef SMP int intr_bind(u_int vector, u_char cpu); #endif int intr_config_intr(int vector, enum intr_trigger trig, enum intr_polarity pol); int intr_describe(u_int vector, void *ih, const char *descr); void intr_execute_handlers(struct intsrc *isrc, struct trapframe *frame); u_int intr_next_cpu(void); struct intsrc *intr_lookup_source(int vector); int intr_register_pic(struct pic *pic); int intr_register_source(struct intsrc *isrc); int intr_remove_handler(void *cookie); void intr_resume(bool suspend_cancelled); void intr_suspend(void); void intr_reprogram(void); void intrcnt_add(const char *name, u_long **countp); void nexus_add_irq(u_long irq); int msi_alloc(device_t dev, int count, int maxcount, int *irqs); void msi_init(void); int msi_map(int irq, uint64_t *addr, uint32_t *data); int msi_release(int *irqs, int count); int msix_alloc(device_t dev, int *irq); int msix_release(int irq); #endif /* !LOCORE */ #endif /* _KERNEL */ #endif /* !__MACHINE_INTR_MACHDEP_H__ */ Index: head/sys/i386/include/intr_machdep.h =================================================================== --- head/sys/i386/include/intr_machdep.h (revision 322587) +++ head/sys/i386/include/intr_machdep.h (revision 322588) @@ -1,185 +1,186 @@ /*- * Copyright (c) 2003 John Baldwin * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef __MACHINE_INTR_MACHDEP_H__ #define __MACHINE_INTR_MACHDEP_H__ #ifdef _KERNEL /* * The maximum number of I/O interrupts we allow. This number is rather * arbitrary as it is just the maximum IRQ resource value. The interrupt * source for a given IRQ maps that I/O interrupt to device interrupt * source whether it be a pin on an interrupt controller or an MSI interrupt. * The 16 ISA IRQs are assigned fixed IDT vectors, but all other device * interrupts allocate IDT vectors on demand. Currently we have 191 IDT * vectors available for device interrupts. On many systems with I/O APICs, * a lot of the IRQs are not used, so this number can be much larger than * 191 and still be safe since only interrupt sources in actual use will * allocate IDT vectors. * * The first 255 IRQs (0 - 254) are reserved for ISA IRQs and PCI intline IRQs. * IRQ values from 256 to 767 are used by MSI. When running under the Xen * Hypervisor, IRQ values from 768 to 4863 are available for binding to * event channel events. We leave 255 unused to avoid confusion since 255 is * used in PCI to indicate an invalid IRQ. */ #define NUM_MSI_INTS 512 #define FIRST_MSI_INT 256 #ifdef XENHVM #include #include #define NUM_EVTCHN_INTS NR_EVENT_CHANNELS #define FIRST_EVTCHN_INT \ (FIRST_MSI_INT + NUM_MSI_INTS) #define LAST_EVTCHN_INT \ (FIRST_EVTCHN_INT + NUM_EVTCHN_INTS - 1) #else /* !XENHVM */ #define NUM_EVTCHN_INTS 0 #endif #define NUM_IO_INTS (FIRST_MSI_INT + NUM_MSI_INTS + NUM_EVTCHN_INTS) /* * Default base address for MSI messages on x86 platforms. */ #define MSI_INTEL_ADDR_BASE 0xfee00000 /* * - 1 ??? dummy counter. * - 2 counters for each I/O interrupt. * - 1 counter for each CPU for lapic timer. * - 9 counters for each CPU for IPI counters for SMP. */ #ifdef SMP #define INTRCNT_COUNT (1 + NUM_IO_INTS * 2 + (1 + 9) * MAXCPU) #else #define INTRCNT_COUNT (1 + NUM_IO_INTS * 2 + 1) #endif #ifndef LOCORE typedef void inthand_t(void); #define IDTVEC(name) __CONCAT(X,name) struct intsrc; /* * Methods that a PIC provides to mask/unmask a given interrupt source, * "turn on" the interrupt on the CPU side by setting up an IDT entry, and * return the vector associated with this source. */ struct pic { void (*pic_enable_source)(struct intsrc *); void (*pic_disable_source)(struct intsrc *, int); void (*pic_eoi_source)(struct intsrc *); void (*pic_enable_intr)(struct intsrc *); void (*pic_disable_intr)(struct intsrc *); int (*pic_vector)(struct intsrc *); int (*pic_source_pending)(struct intsrc *); void (*pic_suspend)(struct pic *); void (*pic_resume)(struct pic *, bool suspend_cancelled); int (*pic_config_intr)(struct intsrc *, enum intr_trigger, enum intr_polarity); int (*pic_assign_cpu)(struct intsrc *, u_int apic_id); void (*pic_reprogram_pin)(struct intsrc *); TAILQ_ENTRY(pic) pics; }; /* Flags for pic_disable_source() */ enum { PIC_EOI, PIC_NO_EOI, }; /* * An interrupt source. The upper-layer code uses the PIC methods to * control a given source. The lower-layer PIC drivers can store additional * private data in a given interrupt source such as an interrupt pin number * or an I/O APIC pointer. */ struct intsrc { struct pic *is_pic; struct intr_event *is_event; u_long *is_count; u_long *is_straycount; u_int is_index; u_int is_handlers; + u_int is_cpu; }; struct trapframe; #ifdef SMP extern cpuset_t intr_cpus; #endif extern struct mtx icu_lock; extern int elcr_found; #ifdef SMP extern int msix_disable_migration; #endif #ifndef DEV_ATPIC void atpic_reset(void); #endif /* XXX: The elcr_* prototypes probably belong somewhere else. */ int elcr_probe(void); enum intr_trigger elcr_read_trigger(u_int irq); void elcr_resume(void); void elcr_write_trigger(u_int irq, enum intr_trigger trigger); #ifdef SMP void intr_add_cpu(u_int cpu); #endif int intr_add_handler(const char *name, int vector, driver_filter_t filter, driver_intr_t handler, void *arg, enum intr_type flags, void **cookiep); #ifdef SMP int intr_bind(u_int vector, u_char cpu); #endif int intr_config_intr(int vector, enum intr_trigger trig, enum intr_polarity pol); int intr_describe(u_int vector, void *ih, const char *descr); void intr_execute_handlers(struct intsrc *isrc, struct trapframe *frame); u_int intr_next_cpu(void); struct intsrc *intr_lookup_source(int vector); int intr_register_pic(struct pic *pic); int intr_register_source(struct intsrc *isrc); int intr_remove_handler(void *cookie); void intr_resume(bool suspend_cancelled); void intr_suspend(void); void intr_reprogram(void); void intrcnt_add(const char *name, u_long **countp); void nexus_add_irq(u_long irq); int msi_alloc(device_t dev, int count, int maxcount, int *irqs); void msi_init(void); int msi_map(int irq, uint64_t *addr, uint32_t *data); int msi_release(int* irqs, int count); int msix_alloc(device_t dev, int *irq); int msix_release(int irq); #endif /* !LOCORE */ #endif /* _KERNEL */ #endif /* !__MACHINE_INTR_MACHDEP_H__ */ Index: head/sys/x86/x86/intr_machdep.c =================================================================== --- head/sys/x86/x86/intr_machdep.c (revision 322587) +++ head/sys/x86/x86/intr_machdep.c (revision 322588) @@ -1,605 +1,737 @@ /*- * Copyright (c) 2003 John Baldwin * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ /* * Machine dependent interrupt code for x86. For x86, we have to * deal with different PICs. Thus, we use the passed in vector to lookup * an interrupt source associated with that vector. The interrupt source * describes which PIC the source belongs to and includes methods to handle * that source. */ #include "opt_atpic.h" #include "opt_ddb.h" #include #include #include #include #include #include #include #include +#include +#include #include #include +#include #include #include +#include #include #include #include #include #ifdef DDB #include #endif #ifndef DEV_ATPIC #include #include #include #include #include #endif #define MAX_STRAY_LOG 5 typedef void (*mask_fn)(void *); static int intrcnt_index; static struct intsrc *interrupt_sources[NUM_IO_INTS]; +static struct intsrc *interrupt_sorted[NUM_IO_INTS]; +CTASSERT(sizeof(interrupt_sources) == sizeof(interrupt_sorted)); +static int intrbalance; +SYSCTL_INT(_hw, OID_AUTO, intrbalance, CTLFLAG_RW, &intrbalance, 0, + "Interrupt auto-balance interval (seconds). Zero disables."); +static struct timeout_task intrbalance_task; static struct sx intrsrc_lock; static struct mtx intrpic_lock; static struct mtx intrcnt_lock; static TAILQ_HEAD(pics_head, pic) pics; #if defined(SMP) && !defined(EARLY_AP_STARTUP) static int assign_cpu; #endif u_long intrcnt[INTRCNT_COUNT]; char intrnames[INTRCNT_COUNT * (MAXCOMLEN + 1)]; size_t sintrcnt = sizeof(intrcnt); size_t sintrnames = sizeof(intrnames); static int intr_assign_cpu(void *arg, int cpu); static void intr_disable_src(void *arg); static void intr_init(void *__dummy); static int intr_pic_registered(struct pic *pic); static void intrcnt_setname(const char *name, int index); static void intrcnt_updatename(struct intsrc *is); static void intrcnt_register(struct intsrc *is); static int intr_pic_registered(struct pic *pic) { struct pic *p; TAILQ_FOREACH(p, &pics, pics) { if (p == pic) return (1); } return (0); } /* * Register a new interrupt controller (PIC). This is to support suspend * and resume where we suspend/resume controllers rather than individual * sources. This also allows controllers with no active sources (such as * 8259As in a system using the APICs) to participate in suspend and resume. */ int intr_register_pic(struct pic *pic) { int error; mtx_lock(&intrpic_lock); if (intr_pic_registered(pic)) error = EBUSY; else { TAILQ_INSERT_TAIL(&pics, pic, pics); error = 0; } mtx_unlock(&intrpic_lock); return (error); } /* * Register a new interrupt source with the global interrupt system. * The global interrupts need to be disabled when this function is * called. */ int intr_register_source(struct intsrc *isrc) { int error, vector; KASSERT(intr_pic_registered(isrc->is_pic), ("unregistered PIC")); vector = isrc->is_pic->pic_vector(isrc); if (interrupt_sources[vector] != NULL) return (EEXIST); error = intr_event_create(&isrc->is_event, isrc, 0, vector, intr_disable_src, (mask_fn)isrc->is_pic->pic_enable_source, (mask_fn)isrc->is_pic->pic_eoi_source, intr_assign_cpu, "irq%d:", vector); if (error) return (error); sx_xlock(&intrsrc_lock); if (interrupt_sources[vector] != NULL) { sx_xunlock(&intrsrc_lock); intr_event_destroy(isrc->is_event); return (EEXIST); } intrcnt_register(isrc); interrupt_sources[vector] = isrc; isrc->is_handlers = 0; sx_xunlock(&intrsrc_lock); return (0); } struct intsrc * intr_lookup_source(int vector) { return (interrupt_sources[vector]); } int intr_add_handler(const char *name, int vector, driver_filter_t filter, driver_intr_t handler, void *arg, enum intr_type flags, void **cookiep) { struct intsrc *isrc; int error; isrc = intr_lookup_source(vector); if (isrc == NULL) return (EINVAL); error = intr_event_add_handler(isrc->is_event, name, filter, handler, arg, intr_priority(flags), flags, cookiep); if (error == 0) { sx_xlock(&intrsrc_lock); intrcnt_updatename(isrc); isrc->is_handlers++; if (isrc->is_handlers == 1) { isrc->is_pic->pic_enable_intr(isrc); isrc->is_pic->pic_enable_source(isrc); } sx_xunlock(&intrsrc_lock); } return (error); } int intr_remove_handler(void *cookie) { struct intsrc *isrc; int error; isrc = intr_handler_source(cookie); error = intr_event_remove_handler(cookie); if (error == 0) { sx_xlock(&intrsrc_lock); isrc->is_handlers--; if (isrc->is_handlers == 0) { isrc->is_pic->pic_disable_source(isrc, PIC_NO_EOI); isrc->is_pic->pic_disable_intr(isrc); } intrcnt_updatename(isrc); sx_xunlock(&intrsrc_lock); } return (error); } int intr_config_intr(int vector, enum intr_trigger trig, enum intr_polarity pol) { struct intsrc *isrc; isrc = intr_lookup_source(vector); if (isrc == NULL) return (EINVAL); return (isrc->is_pic->pic_config_intr(isrc, trig, pol)); } static void intr_disable_src(void *arg) { struct intsrc *isrc; isrc = arg; isrc->is_pic->pic_disable_source(isrc, PIC_EOI); } void intr_execute_handlers(struct intsrc *isrc, struct trapframe *frame) { struct intr_event *ie; int vector; /* * We count software interrupts when we process them. The * code here follows previous practice, but there's an * argument for counting hardware interrupts when they're * processed too. */ (*isrc->is_count)++; VM_CNT_INC(v_intr); ie = isrc->is_event; /* * XXX: We assume that IRQ 0 is only used for the ISA timer * device (clk). */ vector = isrc->is_pic->pic_vector(isrc); if (vector == 0) clkintr_pending = 1; /* * For stray interrupts, mask and EOI the source, bump the * stray count, and log the condition. */ if (intr_event_handle(ie, frame) != 0) { isrc->is_pic->pic_disable_source(isrc, PIC_EOI); (*isrc->is_straycount)++; if (*isrc->is_straycount < MAX_STRAY_LOG) log(LOG_ERR, "stray irq%d\n", vector); else if (*isrc->is_straycount == MAX_STRAY_LOG) log(LOG_CRIT, "too many stray irq %d's: not logging anymore\n", vector); } } void intr_resume(bool suspend_cancelled) { struct pic *pic; #ifndef DEV_ATPIC atpic_reset(); #endif mtx_lock(&intrpic_lock); TAILQ_FOREACH(pic, &pics, pics) { if (pic->pic_resume != NULL) pic->pic_resume(pic, suspend_cancelled); } mtx_unlock(&intrpic_lock); } void intr_suspend(void) { struct pic *pic; mtx_lock(&intrpic_lock); TAILQ_FOREACH_REVERSE(pic, &pics, pics_head, pics) { if (pic->pic_suspend != NULL) pic->pic_suspend(pic); } mtx_unlock(&intrpic_lock); } static int intr_assign_cpu(void *arg, int cpu) { #ifdef SMP struct intsrc *isrc; int error; #ifdef EARLY_AP_STARTUP MPASS(mp_ncpus == 1 || smp_started); /* Nothing to do if there is only a single CPU. */ if (mp_ncpus > 1 && cpu != NOCPU) { #else /* * Don't do anything during early boot. We will pick up the * assignment once the APs are started. */ if (assign_cpu && cpu != NOCPU) { #endif isrc = arg; sx_xlock(&intrsrc_lock); error = isrc->is_pic->pic_assign_cpu(isrc, cpu_apic_ids[cpu]); + if (error == 0) + isrc->is_cpu = cpu; sx_xunlock(&intrsrc_lock); } else error = 0; return (error); #else return (EOPNOTSUPP); #endif } static void intrcnt_setname(const char *name, int index) { snprintf(intrnames + (MAXCOMLEN + 1) * index, MAXCOMLEN + 1, "%-*s", MAXCOMLEN, name); } static void intrcnt_updatename(struct intsrc *is) { intrcnt_setname(is->is_event->ie_fullname, is->is_index); } static void intrcnt_register(struct intsrc *is) { char straystr[MAXCOMLEN + 1]; KASSERT(is->is_event != NULL, ("%s: isrc with no event", __func__)); mtx_lock_spin(&intrcnt_lock); is->is_index = intrcnt_index; intrcnt_index += 2; snprintf(straystr, MAXCOMLEN + 1, "stray irq%d", is->is_pic->pic_vector(is)); intrcnt_updatename(is); is->is_count = &intrcnt[is->is_index]; intrcnt_setname(straystr, is->is_index + 1); is->is_straycount = &intrcnt[is->is_index + 1]; mtx_unlock_spin(&intrcnt_lock); } void intrcnt_add(const char *name, u_long **countp) { mtx_lock_spin(&intrcnt_lock); *countp = &intrcnt[intrcnt_index]; intrcnt_setname(name, intrcnt_index); intrcnt_index++; mtx_unlock_spin(&intrcnt_lock); } static void intr_init(void *dummy __unused) { intrcnt_setname("???", 0); intrcnt_index = 1; TAILQ_INIT(&pics); mtx_init(&intrpic_lock, "intrpic", NULL, MTX_DEF); sx_init(&intrsrc_lock, "intrsrc"); mtx_init(&intrcnt_lock, "intrcnt", NULL, MTX_SPIN); } SYSINIT(intr_init, SI_SUB_INTR, SI_ORDER_FIRST, intr_init, NULL); static void intr_init_final(void *dummy __unused) { /* * Enable interrupts on the BSP after all of the interrupt * controllers are initialized. Device interrupts are still * disabled in the interrupt controllers until interrupt * handlers are registered. Interrupts are enabled on each AP * after their first context switch. */ enable_intr(); } SYSINIT(intr_init_final, SI_SUB_INTR, SI_ORDER_ANY, intr_init_final, NULL); #ifndef DEV_ATPIC /* Initialize the two 8259A's to a known-good shutdown state. */ void atpic_reset(void) { outb(IO_ICU1, ICW1_RESET | ICW1_IC4); outb(IO_ICU1 + ICU_IMR_OFFSET, IDT_IO_INTS); outb(IO_ICU1 + ICU_IMR_OFFSET, IRQ_MASK(ICU_SLAVEID)); outb(IO_ICU1 + ICU_IMR_OFFSET, MASTER_MODE); outb(IO_ICU1 + ICU_IMR_OFFSET, 0xff); outb(IO_ICU1, OCW3_SEL | OCW3_RR); outb(IO_ICU2, ICW1_RESET | ICW1_IC4); outb(IO_ICU2 + ICU_IMR_OFFSET, IDT_IO_INTS + 8); outb(IO_ICU2 + ICU_IMR_OFFSET, ICU_SLAVEID); outb(IO_ICU2 + ICU_IMR_OFFSET, SLAVE_MODE); outb(IO_ICU2 + ICU_IMR_OFFSET, 0xff); outb(IO_ICU2, OCW3_SEL | OCW3_RR); } #endif /* Add a description to an active interrupt handler. */ int intr_describe(u_int vector, void *ih, const char *descr) { struct intsrc *isrc; int error; isrc = intr_lookup_source(vector); if (isrc == NULL) return (EINVAL); error = intr_event_describe_handler(isrc->is_event, ih, descr); if (error) return (error); intrcnt_updatename(isrc); return (0); } void intr_reprogram(void) { struct intsrc *is; int v; sx_xlock(&intrsrc_lock); for (v = 0; v < NUM_IO_INTS; v++) { is = interrupt_sources[v]; if (is == NULL) continue; if (is->is_pic->pic_reprogram_pin != NULL) is->is_pic->pic_reprogram_pin(is); } sx_xunlock(&intrsrc_lock); } #ifdef DDB /* * Dump data about interrupt handlers */ DB_SHOW_COMMAND(irqs, db_show_irqs) { struct intsrc **isrc; int i, verbose; if (strcmp(modif, "v") == 0) verbose = 1; else verbose = 0; isrc = interrupt_sources; for (i = 0; i < NUM_IO_INTS && !db_pager_quit; i++, isrc++) if (*isrc != NULL) db_dump_intr_event((*isrc)->is_event, verbose); } #endif #ifdef SMP /* * Support for balancing interrupt sources across CPUs. For now we just * allocate CPUs round-robin. */ cpuset_t intr_cpus = CPUSET_T_INITIALIZER(0x1); static int current_cpu; /* * Return the CPU that the next interrupt source should use. For now * this just returns the next local APIC according to round-robin. */ u_int intr_next_cpu(void) { u_int apic_id; #ifdef EARLY_AP_STARTUP MPASS(mp_ncpus == 1 || smp_started); if (mp_ncpus == 1) return (PCPU_GET(apic_id)); #else /* Leave all interrupts on the BSP during boot. */ if (!assign_cpu) return (PCPU_GET(apic_id)); #endif mtx_lock_spin(&icu_lock); apic_id = cpu_apic_ids[current_cpu]; do { current_cpu++; if (current_cpu > mp_maxid) current_cpu = 0; } while (!CPU_ISSET(current_cpu, &intr_cpus)); mtx_unlock_spin(&icu_lock); return (apic_id); } /* Attempt to bind the specified IRQ to the specified CPU. */ int intr_bind(u_int vector, u_char cpu) { struct intsrc *isrc; isrc = intr_lookup_source(vector); if (isrc == NULL) return (EINVAL); return (intr_event_bind(isrc->is_event, cpu)); } /* * Add a CPU to our mask of valid CPUs that can be destinations of * interrupts. */ void intr_add_cpu(u_int cpu) { if (cpu >= MAXCPU) panic("%s: Invalid CPU ID", __func__); if (bootverbose) printf("INTR: Adding local APIC %d as a target\n", cpu_apic_ids[cpu]); CPU_SET(cpu, &intr_cpus); } #ifndef EARLY_AP_STARTUP /* * Distribute all the interrupt sources among the available CPUs once the * AP's have been launched. */ static void intr_shuffle_irqs(void *arg __unused) { struct intsrc *isrc; + u_int cpu; int i; /* Don't bother on UP. */ if (mp_ncpus == 1) return; /* Round-robin assign a CPU to each enabled source. */ sx_xlock(&intrsrc_lock); assign_cpu = 1; for (i = 0; i < NUM_IO_INTS; i++) { isrc = interrupt_sources[i]; if (isrc != NULL && isrc->is_handlers > 0) { /* * If this event is already bound to a CPU, * then assign the source to that CPU instead * of picking one via round-robin. Note that * this is careful to only advance the * round-robin if the CPU assignment succeeds. */ - if (isrc->is_event->ie_cpu != NOCPU) - (void)isrc->is_pic->pic_assign_cpu(isrc, - cpu_apic_ids[isrc->is_event->ie_cpu]); - else if (isrc->is_pic->pic_assign_cpu(isrc, - cpu_apic_ids[current_cpu]) == 0) - (void)intr_next_cpu(); - + cpu = isrc->is_event->ie_cpu; + if (cpu == NOCPU) + cpu = current_cpu; + if (isrc->is_pic->pic_assign_cpu(isrc, + cpu_apic_ids[cpu]) == 0) { + isrc->is_cpu = cpu; + if (isrc->is_event->ie_cpu == NOCPU) + intr_next_cpu(); + } } } sx_xunlock(&intrsrc_lock); } SYSINIT(intr_shuffle_irqs, SI_SUB_SMP, SI_ORDER_SECOND, intr_shuffle_irqs, NULL); #endif + +/* + * TODO: Export this information in a non-MD fashion, integrate with vmstat -i. + */ +static int +sysctl_hw_intrs(SYSCTL_HANDLER_ARGS) +{ + struct sbuf sbuf; + struct intsrc *isrc; + int error; + int i; + + error = sysctl_wire_old_buffer(req, 0); + if (error != 0) + return (error); + + sbuf_new_for_sysctl(&sbuf, NULL, 128, req); + sx_slock(&intrsrc_lock); + for (i = 0; i < NUM_IO_INTS; i++) { + isrc = interrupt_sources[i]; + if (isrc == NULL) + continue; + sbuf_printf(&sbuf, "%s:%d @%d: %ld\n", + isrc->is_event->ie_fullname, + isrc->is_index, + isrc->is_cpu, + *isrc->is_count); + } + + sx_sunlock(&intrsrc_lock); + error = sbuf_finish(&sbuf); + sbuf_delete(&sbuf); + return (error); +} +SYSCTL_PROC(_hw, OID_AUTO, intrs, CTLTYPE_STRING | CTLFLAG_RW, + 0, 0, sysctl_hw_intrs, "A", "interrupt:number @cpu: count"); + +/* + * Compare two, possibly NULL, entries in the interrupt source array + * by load. + */ +static int +intrcmp(const void *one, const void *two) +{ + const struct intsrc *i1, *i2; + + i1 = *(const struct intsrc * const *)one; + i2 = *(const struct intsrc * const *)two; + if (i1 != NULL && i2 != NULL) + return (*i1->is_count - *i2->is_count); + if (i1 != NULL) + return (1); + if (i2 != NULL) + return (-1); + return (0); +} + +/* + * Balance IRQs across available CPUs according to load. + */ +static void +intr_balance(void *dummy __unused, int pending __unused) +{ + struct intsrc *isrc; + int interval; + u_int cpu; + int i; + + interval = intrbalance; + if (interval == 0) + goto out; + + /* + * Sort interrupts according to count. + */ + sx_xlock(&intrsrc_lock); + memcpy(interrupt_sorted, interrupt_sources, sizeof(interrupt_sorted)); + qsort(interrupt_sorted, NUM_IO_INTS, sizeof(interrupt_sorted[0]), + intrcmp); + + /* + * Restart the scan from the same location to avoid moving in the + * common case. + */ + current_cpu = 0; + + /* + * Assign round-robin from most loaded to least. + */ + for (i = NUM_IO_INTS - 1; i >= 0; i--) { + isrc = interrupt_sorted[i]; + if (isrc == NULL || isrc->is_event->ie_cpu != NOCPU) + continue; + cpu = current_cpu; + intr_next_cpu(); + if (isrc->is_cpu != cpu && + isrc->is_pic->pic_assign_cpu(isrc, + cpu_apic_ids[cpu]) == 0) + isrc->is_cpu = cpu; + } + sx_xunlock(&intrsrc_lock); +out: + taskqueue_enqueue_timeout(taskqueue_thread, &intrbalance_task, + interval ? hz * interval : hz * 60); + +} + +static void +intr_balance_init(void *dummy __unused) +{ + + TIMEOUT_TASK_INIT(taskqueue_thread, &intrbalance_task, 0, intr_balance, + NULL); + taskqueue_enqueue_timeout(taskqueue_thread, &intrbalance_task, hz); +} +SYSINIT(intr_balance_init, SI_SUB_SMP, SI_ORDER_ANY, intr_balance_init, NULL); + #else /* * Always route interrupts to the current processor in the UP case. */ u_int intr_next_cpu(void) { return (PCPU_GET(apic_id)); } #endif