Index: stable/11/sys/x86/acpica/madt.c =================================================================== --- stable/11/sys/x86/acpica/madt.c (revision 306627) +++ stable/11/sys/x86/acpica/madt.c (revision 306628) @@ -1,690 +1,704 @@ /*- * Copyright (c) 2003 John Baldwin * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* These two arrays are indexed by APIC IDs. */ static struct { void *io_apic; UINT32 io_vector; } *ioapics; static struct lapic_info { u_int la_enabled; u_int la_acpi_id; } lapics[MAX_APIC_ID + 1]; int madt_found_sci_override; static ACPI_TABLE_MADT *madt; static vm_paddr_t madt_physaddr; static vm_offset_t madt_length; static MALLOC_DEFINE(M_MADT, "madt_table", "ACPI MADT Table Items"); static enum intr_polarity interrupt_polarity(UINT16 IntiFlags, UINT8 Source); static enum intr_trigger interrupt_trigger(UINT16 IntiFlags, UINT8 Source); static int madt_find_cpu(u_int acpi_id, u_int *apic_id); static int madt_find_interrupt(int intr, void **apic, u_int *pin); static void madt_parse_apics(ACPI_SUBTABLE_HEADER *entry, void *arg); static void madt_parse_interrupt_override( ACPI_MADT_INTERRUPT_OVERRIDE *intr); static void madt_parse_ints(ACPI_SUBTABLE_HEADER *entry, void *arg __unused); static void madt_parse_local_nmi(ACPI_MADT_LOCAL_APIC_NMI *nmi); static void madt_parse_nmi(ACPI_MADT_NMI_SOURCE *nmi); static int madt_probe(void); static int madt_probe_cpus(void); static void madt_probe_cpus_handler(ACPI_SUBTABLE_HEADER *entry, void *arg __unused); static void madt_register(void *dummy); static int madt_setup_local(void); static int madt_setup_io(void); static void madt_walk_table(acpi_subtable_handler *handler, void *arg); static struct apic_enumerator madt_enumerator = { "MADT", madt_probe, madt_probe_cpus, madt_setup_local, madt_setup_io }; /* * Look for an ACPI Multiple APIC Description Table ("APIC") */ static int madt_probe(void) { madt_physaddr = acpi_find_table(ACPI_SIG_MADT); if (madt_physaddr == 0) return (ENXIO); return (-50); } /* * Run through the MP table enumerating CPUs. */ static int madt_probe_cpus(void) { madt = acpi_map_table(madt_physaddr, ACPI_SIG_MADT); madt_length = madt->Header.Length; KASSERT(madt != NULL, ("Unable to re-map MADT")); madt_walk_table(madt_probe_cpus_handler, NULL); acpi_unmap_table(madt); madt = NULL; return (0); } /* * Initialize the local APIC on the BSP. */ static int madt_setup_local(void) { ACPI_TABLE_DMAR *dmartbl; vm_paddr_t dmartbl_physaddr; const char *reason; char *hw_vendor; u_int p[4]; + int user_x2apic; + bool bios_x2apic; madt = pmap_mapbios(madt_physaddr, madt_length); if ((cpu_feature2 & CPUID2_X2APIC) != 0) { - x2apic_mode = 1; reason = NULL; /* * Automatically detect several configurations where * x2APIC mode is known to cause troubles. User can * override the setting with hw.x2apic_enable tunable. */ dmartbl_physaddr = acpi_find_table(ACPI_SIG_DMAR); if (dmartbl_physaddr != 0) { dmartbl = acpi_map_table(dmartbl_physaddr, ACPI_SIG_DMAR); - if ((dmartbl->Flags & ACPI_DMAR_X2APIC_OPT_OUT) != 0) { - x2apic_mode = 0; + if ((dmartbl->Flags & ACPI_DMAR_X2APIC_OPT_OUT) != 0) reason = "by DMAR table"; - } acpi_unmap_table(dmartbl); } if (vm_guest == VM_GUEST_VMWARE) { vmware_hvcall(VMW_HVCMD_GETVCPU_INFO, p); if ((p[0] & VMW_VCPUINFO_VCPU_RESERVED) != 0 || - (p[0] & VMW_VCPUINFO_LEGACY_X2APIC) == 0) { - x2apic_mode = 0; - reason = "inside VMWare without intr redirection"; - } + (p[0] & VMW_VCPUINFO_LEGACY_X2APIC) == 0) + reason = + "inside VMWare without intr redirection"; } else if (vm_guest == VM_GUEST_XEN) { - x2apic_mode = 0; reason = "due to running under XEN"; } else if (vm_guest == VM_GUEST_NO && CPUID_TO_FAMILY(cpu_id) == 0x6 && CPUID_TO_MODEL(cpu_id) == 0x2a) { hw_vendor = kern_getenv("smbios.planar.maker"); /* * It seems that some Lenovo and ASUS * SandyBridge-based notebook BIOSes have a * bug which prevents booting AP in x2APIC * mode. Since the only way to detect mobile * CPU is to check northbridge pci id, which * cannot be done that early, disable x2APIC * for all Lenovo and ASUS SandyBridge * machines. */ if (hw_vendor != NULL) { if (!strcmp(hw_vendor, "LENOVO") || !strcmp(hw_vendor, "ASUSTeK Computer Inc.")) { - x2apic_mode = 0; reason = "for a suspected SandyBridge BIOS bug"; } freeenv(hw_vendor); } } - TUNABLE_INT_FETCH("hw.x2apic_enable", &x2apic_mode); - if (!x2apic_mode && reason != NULL && bootverbose) + bios_x2apic = lapic_is_x2apic(); + if (reason != NULL && bios_x2apic) { + if (bootverbose) + printf("x2APIC should be disabled %s but " + "already enabled by BIOS; enabling.\n", + reason); + reason = NULL; + } + if (reason == NULL) + x2apic_mode = 1; + else if (bootverbose) printf("x2APIC available but disabled %s\n", reason); + user_x2apic = x2apic_mode; + TUNABLE_INT_FETCH("hw.x2apic_enable", &user_x2apic); + if (user_x2apic != x2apic_mode) { + if (bios_x2apic && !user_x2apic) + printf("x2APIC disabled by tunable and " + "enabled by BIOS; ignoring tunable."); + else + x2apic_mode = user_x2apic; + } } lapic_init(madt->Address); printf("ACPI APIC Table: <%.*s %.*s>\n", (int)sizeof(madt->Header.OemId), madt->Header.OemId, (int)sizeof(madt->Header.OemTableId), madt->Header.OemTableId); /* * We ignore 64-bit local APIC override entries. Should we * perhaps emit a warning here if we find one? */ return (0); } /* * Enumerate I/O APICs and setup interrupt sources. */ static int madt_setup_io(void) { void *ioapic; u_int pin; int i; /* Try to initialize ACPI so that we can access the FADT. */ i = acpi_Startup(); if (ACPI_FAILURE(i)) { printf("MADT: ACPI Startup failed with %s\n", AcpiFormatException(i)); printf("Try disabling either ACPI or apic support.\n"); panic("Using MADT but ACPI doesn't work"); } ioapics = malloc(sizeof(*ioapics) * (MAX_APIC_ID + 1), M_MADT, M_WAITOK | M_ZERO); /* First, we run through adding I/O APIC's. */ madt_walk_table(madt_parse_apics, NULL); /* Second, we run through the table tweaking interrupt sources. */ madt_walk_table(madt_parse_ints, NULL); /* * If there was not an explicit override entry for the SCI, * force it to use level trigger and active-low polarity. */ if (!madt_found_sci_override) { if (madt_find_interrupt(AcpiGbl_FADT.SciInterrupt, &ioapic, &pin) != 0) printf("MADT: Could not find APIC for SCI IRQ %u\n", AcpiGbl_FADT.SciInterrupt); else { printf( "MADT: Forcing active-low polarity and level trigger for SCI\n"); ioapic_set_polarity(ioapic, pin, INTR_POLARITY_LOW); ioapic_set_triggermode(ioapic, pin, INTR_TRIGGER_LEVEL); } } /* Third, we register all the I/O APIC's. */ for (i = 0; i <= MAX_APIC_ID; i++) if (ioapics[i].io_apic != NULL) ioapic_register(ioapics[i].io_apic); /* Finally, we throw the switch to enable the I/O APIC's. */ acpi_SetDefaultIntrModel(ACPI_INTR_APIC); free(ioapics, M_MADT); ioapics = NULL; return (0); } static void madt_register(void *dummy __unused) { apic_register_enumerator(&madt_enumerator); } SYSINIT(madt_register, SI_SUB_TUNABLES - 1, SI_ORDER_FIRST, madt_register, NULL); /* * Call the handler routine for each entry in the MADT table. */ static void madt_walk_table(acpi_subtable_handler *handler, void *arg) { acpi_walk_subtables(madt + 1, (char *)madt + madt->Header.Length, handler, arg); } static void madt_add_cpu(u_int acpi_id, u_int apic_id, u_int flags) { struct lapic_info *la; /* * The MADT does not include a BSP flag, so we have to let the * MP code figure out which CPU is the BSP on its own. */ if (bootverbose) printf("MADT: Found CPU APIC ID %u ACPI ID %u: %s\n", apic_id, acpi_id, flags & ACPI_MADT_ENABLED ? "enabled" : "disabled"); if (!(flags & ACPI_MADT_ENABLED)) return; if (apic_id > MAX_APIC_ID) { printf("MADT: Ignoring local APIC ID %u (too high)\n", apic_id); return; } la = &lapics[apic_id]; KASSERT(la->la_enabled == 0, ("Duplicate local APIC ID %u", apic_id)); la->la_enabled = 1; la->la_acpi_id = acpi_id; lapic_create(apic_id, 0); } static void madt_probe_cpus_handler(ACPI_SUBTABLE_HEADER *entry, void *arg) { ACPI_MADT_LOCAL_APIC *proc; ACPI_MADT_LOCAL_X2APIC *x2apic; switch (entry->Type) { case ACPI_MADT_TYPE_LOCAL_APIC: proc = (ACPI_MADT_LOCAL_APIC *)entry; madt_add_cpu(proc->ProcessorId, proc->Id, proc->LapicFlags); break; case ACPI_MADT_TYPE_LOCAL_X2APIC: x2apic = (ACPI_MADT_LOCAL_X2APIC *)entry; madt_add_cpu(x2apic->Uid, x2apic->LocalApicId, x2apic->LapicFlags); break; } } /* * Add an I/O APIC from an entry in the table. */ static void madt_parse_apics(ACPI_SUBTABLE_HEADER *entry, void *arg __unused) { ACPI_MADT_IO_APIC *apic; switch (entry->Type) { case ACPI_MADT_TYPE_IO_APIC: apic = (ACPI_MADT_IO_APIC *)entry; if (bootverbose) printf( "MADT: Found IO APIC ID %u, Interrupt %u at %p\n", apic->Id, apic->GlobalIrqBase, (void *)(uintptr_t)apic->Address); if (apic->Id > MAX_APIC_ID) panic("%s: I/O APIC ID %u too high", __func__, apic->Id); if (ioapics[apic->Id].io_apic != NULL) panic("%s: Double APIC ID %u", __func__, apic->Id); if (apic->GlobalIrqBase >= FIRST_MSI_INT) { printf("MADT: Ignoring bogus I/O APIC ID %u", apic->Id); break; } ioapics[apic->Id].io_apic = ioapic_create(apic->Address, apic->Id, apic->GlobalIrqBase); ioapics[apic->Id].io_vector = apic->GlobalIrqBase; break; default: break; } } /* * Determine properties of an interrupt source. Note that for ACPI these * functions are only used for ISA interrupts, so we assume ISA bus values * (Active Hi, Edge Triggered) for conforming values except for the ACPI * SCI for which we use Active Lo, Level Triggered. */ static enum intr_polarity interrupt_polarity(UINT16 IntiFlags, UINT8 Source) { switch (IntiFlags & ACPI_MADT_POLARITY_MASK) { default: printf("WARNING: Bogus Interrupt Polarity. Assume CONFORMS\n"); /* FALLTHROUGH*/ case ACPI_MADT_POLARITY_CONFORMS: if (Source == AcpiGbl_FADT.SciInterrupt) return (INTR_POLARITY_LOW); else return (INTR_POLARITY_HIGH); case ACPI_MADT_POLARITY_ACTIVE_HIGH: return (INTR_POLARITY_HIGH); case ACPI_MADT_POLARITY_ACTIVE_LOW: return (INTR_POLARITY_LOW); } } static enum intr_trigger interrupt_trigger(UINT16 IntiFlags, UINT8 Source) { switch (IntiFlags & ACPI_MADT_TRIGGER_MASK) { default: printf("WARNING: Bogus Interrupt Trigger Mode. Assume CONFORMS.\n"); /*FALLTHROUGH*/ case ACPI_MADT_TRIGGER_CONFORMS: if (Source == AcpiGbl_FADT.SciInterrupt) return (INTR_TRIGGER_LEVEL); else return (INTR_TRIGGER_EDGE); case ACPI_MADT_TRIGGER_EDGE: return (INTR_TRIGGER_EDGE); case ACPI_MADT_TRIGGER_LEVEL: return (INTR_TRIGGER_LEVEL); } } /* * Find the local APIC ID associated with a given ACPI Processor ID. */ static int madt_find_cpu(u_int acpi_id, u_int *apic_id) { int i; for (i = 0; i <= MAX_APIC_ID; i++) { if (!lapics[i].la_enabled) continue; if (lapics[i].la_acpi_id != acpi_id) continue; *apic_id = i; return (0); } return (ENOENT); } /* * Find the IO APIC and pin on that APIC associated with a given global * interrupt. */ static int madt_find_interrupt(int intr, void **apic, u_int *pin) { int i, best; best = -1; for (i = 0; i <= MAX_APIC_ID; i++) { if (ioapics[i].io_apic == NULL || ioapics[i].io_vector > intr) continue; if (best == -1 || ioapics[best].io_vector < ioapics[i].io_vector) best = i; } if (best == -1) return (ENOENT); *apic = ioapics[best].io_apic; *pin = intr - ioapics[best].io_vector; if (*pin > 32) printf("WARNING: Found intpin of %u for vector %d\n", *pin, intr); return (0); } void madt_parse_interrupt_values(void *entry, enum intr_trigger *trig, enum intr_polarity *pol) { ACPI_MADT_INTERRUPT_OVERRIDE *intr; char buf[64]; intr = entry; if (bootverbose) printf("MADT: Interrupt override: source %u, irq %u\n", intr->SourceIrq, intr->GlobalIrq); KASSERT(intr->Bus == 0, ("bus for interrupt overrides must be zero")); /* * Lookup the appropriate trigger and polarity modes for this * entry. */ *trig = interrupt_trigger(intr->IntiFlags, intr->SourceIrq); *pol = interrupt_polarity(intr->IntiFlags, intr->SourceIrq); /* * If the SCI is identity mapped but has edge trigger and * active-hi polarity or the force_sci_lo tunable is set, * force it to use level/lo. */ if (intr->SourceIrq == AcpiGbl_FADT.SciInterrupt) { madt_found_sci_override = 1; if (getenv_string("hw.acpi.sci.trigger", buf, sizeof(buf))) { if (tolower(buf[0]) == 'e') *trig = INTR_TRIGGER_EDGE; else if (tolower(buf[0]) == 'l') *trig = INTR_TRIGGER_LEVEL; else panic( "Invalid trigger %s: must be 'edge' or 'level'", buf); printf("MADT: Forcing SCI to %s trigger\n", *trig == INTR_TRIGGER_EDGE ? "edge" : "level"); } if (getenv_string("hw.acpi.sci.polarity", buf, sizeof(buf))) { if (tolower(buf[0]) == 'h') *pol = INTR_POLARITY_HIGH; else if (tolower(buf[0]) == 'l') *pol = INTR_POLARITY_LOW; else panic( "Invalid polarity %s: must be 'high' or 'low'", buf); printf("MADT: Forcing SCI to active %s polarity\n", *pol == INTR_POLARITY_HIGH ? "high" : "low"); } } } /* * Parse an interrupt source override for an ISA interrupt. */ static void madt_parse_interrupt_override(ACPI_MADT_INTERRUPT_OVERRIDE *intr) { void *new_ioapic, *old_ioapic; u_int new_pin, old_pin; enum intr_trigger trig; enum intr_polarity pol; if (acpi_quirks & ACPI_Q_MADT_IRQ0 && intr->SourceIrq == 0 && intr->GlobalIrq == 2) { if (bootverbose) printf("MADT: Skipping timer override\n"); return; } if (madt_find_interrupt(intr->GlobalIrq, &new_ioapic, &new_pin) != 0) { printf("MADT: Could not find APIC for vector %u (IRQ %u)\n", intr->GlobalIrq, intr->SourceIrq); return; } madt_parse_interrupt_values(intr, &trig, &pol); /* Remap the IRQ if it is mapped to a different interrupt vector. */ if (intr->SourceIrq != intr->GlobalIrq) { /* * If the SCI is remapped to a non-ISA global interrupt, * then override the vector we use to setup and allocate * the interrupt. */ if (intr->GlobalIrq > 15 && intr->SourceIrq == AcpiGbl_FADT.SciInterrupt) acpi_OverrideInterruptLevel(intr->GlobalIrq); else ioapic_remap_vector(new_ioapic, new_pin, intr->SourceIrq); if (madt_find_interrupt(intr->SourceIrq, &old_ioapic, &old_pin) != 0) printf("MADT: Could not find APIC for source IRQ %u\n", intr->SourceIrq); else if (ioapic_get_vector(old_ioapic, old_pin) == intr->SourceIrq) ioapic_disable_pin(old_ioapic, old_pin); } /* Program the polarity and trigger mode. */ ioapic_set_triggermode(new_ioapic, new_pin, trig); ioapic_set_polarity(new_ioapic, new_pin, pol); } /* * Parse an entry for an NMI routed to an IO APIC. */ static void madt_parse_nmi(ACPI_MADT_NMI_SOURCE *nmi) { void *ioapic; u_int pin; if (madt_find_interrupt(nmi->GlobalIrq, &ioapic, &pin) != 0) { printf("MADT: Could not find APIC for vector %u\n", nmi->GlobalIrq); return; } ioapic_set_nmi(ioapic, pin); if (!(nmi->IntiFlags & ACPI_MADT_TRIGGER_CONFORMS)) ioapic_set_triggermode(ioapic, pin, interrupt_trigger(nmi->IntiFlags, 0)); if (!(nmi->IntiFlags & ACPI_MADT_POLARITY_CONFORMS)) ioapic_set_polarity(ioapic, pin, interrupt_polarity(nmi->IntiFlags, 0)); } /* * Parse an entry for an NMI routed to a local APIC LVT pin. */ static void madt_handle_local_nmi(u_int acpi_id, UINT8 Lint, UINT16 IntiFlags) { u_int apic_id, pin; if (acpi_id == 0xffffffff) apic_id = APIC_ID_ALL; else if (madt_find_cpu(acpi_id, &apic_id) != 0) { if (bootverbose) printf("MADT: Ignoring local NMI routed to " "ACPI CPU %u\n", acpi_id); return; } if (Lint == 0) pin = APIC_LVT_LINT0; else pin = APIC_LVT_LINT1; lapic_set_lvt_mode(apic_id, pin, APIC_LVT_DM_NMI); if (!(IntiFlags & ACPI_MADT_TRIGGER_CONFORMS)) lapic_set_lvt_triggermode(apic_id, pin, interrupt_trigger(IntiFlags, 0)); if (!(IntiFlags & ACPI_MADT_POLARITY_CONFORMS)) lapic_set_lvt_polarity(apic_id, pin, interrupt_polarity(IntiFlags, 0)); } static void madt_parse_local_nmi(ACPI_MADT_LOCAL_APIC_NMI *nmi) { madt_handle_local_nmi(nmi->ProcessorId == 0xff ? 0xffffffff : nmi->ProcessorId, nmi->Lint, nmi->IntiFlags); } static void madt_parse_local_x2apic_nmi(ACPI_MADT_LOCAL_X2APIC_NMI *nmi) { madt_handle_local_nmi(nmi->Uid, nmi->Lint, nmi->IntiFlags); } /* * Parse interrupt entries. */ static void madt_parse_ints(ACPI_SUBTABLE_HEADER *entry, void *arg __unused) { switch (entry->Type) { case ACPI_MADT_TYPE_INTERRUPT_OVERRIDE: madt_parse_interrupt_override( (ACPI_MADT_INTERRUPT_OVERRIDE *)entry); break; case ACPI_MADT_TYPE_NMI_SOURCE: madt_parse_nmi((ACPI_MADT_NMI_SOURCE *)entry); break; case ACPI_MADT_TYPE_LOCAL_APIC_NMI: madt_parse_local_nmi((ACPI_MADT_LOCAL_APIC_NMI *)entry); break; case ACPI_MADT_TYPE_LOCAL_X2APIC_NMI: madt_parse_local_x2apic_nmi( (ACPI_MADT_LOCAL_X2APIC_NMI *)entry); break; } } /* * Setup per-CPU ACPI IDs. */ static void madt_set_ids(void *dummy) { struct lapic_info *la; struct pcpu *pc; u_int i; if (madt == NULL) return; CPU_FOREACH(i) { pc = pcpu_find(i); KASSERT(pc != NULL, ("no pcpu data for CPU %u", i)); la = &lapics[pc->pc_apic_id]; if (!la->la_enabled) panic("APIC: CPU with APIC ID %u is not enabled", pc->pc_apic_id); pc->pc_acpi_id = la->la_acpi_id; if (bootverbose) printf("APIC: CPU %u has ACPI ID %u\n", i, la->la_acpi_id); } } SYSINIT(madt_set_ids, SI_SUB_CPU, SI_ORDER_MIDDLE, madt_set_ids, NULL); Index: stable/11/sys/x86/include/apicvar.h =================================================================== --- stable/11/sys/x86/include/apicvar.h (revision 306627) +++ stable/11/sys/x86/include/apicvar.h (revision 306628) @@ -1,467 +1,475 @@ /*- * Copyright (c) 2003 John Baldwin * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _X86_APICVAR_H_ #define _X86_APICVAR_H_ /* * Local && I/O APIC variable definitions. */ /* * Layout of local APIC interrupt vectors: * * 0xff (255) +-------------+ * | | 15 (Spurious / IPIs / Local Interrupts) * 0xf0 (240) +-------------+ * | | 14 (I/O Interrupts / Timer) * 0xe0 (224) +-------------+ * | | 13 (I/O Interrupts) * 0xd0 (208) +-------------+ * | | 12 (I/O Interrupts) * 0xc0 (192) +-------------+ * | | 11 (I/O Interrupts) * 0xb0 (176) +-------------+ * | | 10 (I/O Interrupts) * 0xa0 (160) +-------------+ * | | 9 (I/O Interrupts) * 0x90 (144) +-------------+ * | | 8 (I/O Interrupts / System Calls) * 0x80 (128) +-------------+ * | | 7 (I/O Interrupts) * 0x70 (112) +-------------+ * | | 6 (I/O Interrupts) * 0x60 (96) +-------------+ * | | 5 (I/O Interrupts) * 0x50 (80) +-------------+ * | | 4 (I/O Interrupts) * 0x40 (64) +-------------+ * | | 3 (I/O Interrupts) * 0x30 (48) +-------------+ * | | 2 (ATPIC Interrupts) * 0x20 (32) +-------------+ * | | 1 (Exceptions, traps, faults, etc.) * 0x10 (16) +-------------+ * | | 0 (Exceptions, traps, faults, etc.) * 0x00 (0) +-------------+ * * Note: 0x80 needs to be handled specially and not allocated to an * I/O device! */ #define MAX_APIC_ID 0xfe #define APIC_ID_ALL 0xff /* I/O Interrupts are used for external devices such as ISA, PCI, etc. */ #define APIC_IO_INTS (IDT_IO_INTS + 16) #define APIC_NUM_IOINTS 191 /* The timer interrupt is used for clock handling and drives hardclock, etc. */ #define APIC_TIMER_INT (APIC_IO_INTS + APIC_NUM_IOINTS) /* ********************* !!! WARNING !!! ****************************** * Each local apic has an interrupt receive fifo that is two entries deep * for each interrupt priority class (higher 4 bits of interrupt vector). * Once the fifo is full the APIC can no longer receive interrupts for this * class and sending IPIs from other CPUs will be blocked. * To avoid deadlocks there should be no more than two IPI interrupts * pending at the same time. * Currently this is guaranteed by dividing the IPIs in two groups that have * each at most one IPI interrupt pending. The first group is protected by the * smp_ipi_mtx and waits for the completion of the IPI (Only one IPI user * at a time) The second group uses a single interrupt and a bitmap to avoid * redundant IPI interrupts. */ /* Interrupts for local APIC LVT entries other than the timer. */ #define APIC_LOCAL_INTS 240 #define APIC_ERROR_INT APIC_LOCAL_INTS #define APIC_THERMAL_INT (APIC_LOCAL_INTS + 1) #define APIC_CMC_INT (APIC_LOCAL_INTS + 2) #define APIC_IPI_INTS (APIC_LOCAL_INTS + 3) #define IPI_RENDEZVOUS (APIC_IPI_INTS) /* Inter-CPU rendezvous. */ #define IPI_INVLTLB (APIC_IPI_INTS + 1) /* TLB Shootdown IPIs */ #define IPI_INVLPG (APIC_IPI_INTS + 2) #define IPI_INVLRNG (APIC_IPI_INTS + 3) #define IPI_INVLCACHE (APIC_IPI_INTS + 4) /* Vector to handle bitmap based IPIs */ #define IPI_BITMAP_VECTOR (APIC_IPI_INTS + 5) /* IPIs handled by IPI_BITMAP_VECTOR */ #define IPI_AST 0 /* Generate software trap. */ #define IPI_PREEMPT 1 #define IPI_HARDCLOCK 2 #define IPI_BITMAP_LAST IPI_HARDCLOCK #define IPI_IS_BITMAPED(x) ((x) <= IPI_BITMAP_LAST) #define IPI_STOP (APIC_IPI_INTS + 6) /* Stop CPU until restarted. */ #define IPI_SUSPEND (APIC_IPI_INTS + 7) /* Suspend CPU until restarted. */ #ifdef __i386__ #define IPI_LAZYPMAP (APIC_IPI_INTS + 8) /* Lazy pmap release. */ #define IPI_DYN_FIRST (APIC_IPI_INTS + 9) #else #define IPI_DYN_FIRST (APIC_IPI_INTS + 8) #endif #define IPI_DYN_LAST (253) /* IPIs allocated at runtime */ /* * IPI_STOP_HARD does not need to occupy a slot in the IPI vector space since * it is delivered using an NMI anyways. */ #define IPI_NMI_FIRST 254 #define IPI_TRACE 254 /* Interrupt for tracing. */ #define IPI_STOP_HARD 255 /* Stop CPU with a NMI. */ /* * The spurious interrupt can share the priority class with the IPIs since * it is not a normal interrupt. (Does not use the APIC's interrupt fifo) */ #define APIC_SPURIOUS_INT 255 #ifndef LOCORE #define APIC_IPI_DEST_SELF -1 #define APIC_IPI_DEST_ALL -2 #define APIC_IPI_DEST_OTHERS -3 #define APIC_BUS_UNKNOWN -1 #define APIC_BUS_ISA 0 #define APIC_BUS_EISA 1 #define APIC_BUS_PCI 2 #define APIC_BUS_MAX APIC_BUS_PCI #define IRQ_EXTINT (NUM_IO_INTS + 1) #define IRQ_NMI (NUM_IO_INTS + 2) #define IRQ_SMI (NUM_IO_INTS + 3) #define IRQ_DISABLED (NUM_IO_INTS + 4) /* * An APIC enumerator is a psuedo bus driver that enumerates APIC's including * CPU's and I/O APIC's. */ struct apic_enumerator { const char *apic_name; int (*apic_probe)(void); int (*apic_probe_cpus)(void); int (*apic_setup_local)(void); int (*apic_setup_io)(void); SLIST_ENTRY(apic_enumerator) apic_next; }; inthand_t IDTVEC(apic_isr1), IDTVEC(apic_isr2), IDTVEC(apic_isr3), IDTVEC(apic_isr4), IDTVEC(apic_isr5), IDTVEC(apic_isr6), IDTVEC(apic_isr7), IDTVEC(cmcint), IDTVEC(errorint), IDTVEC(spuriousint), IDTVEC(timerint); extern vm_paddr_t lapic_paddr; extern int apic_cpuids[]; void apic_register_enumerator(struct apic_enumerator *enumerator); void *ioapic_create(vm_paddr_t addr, int32_t apic_id, int intbase); int ioapic_disable_pin(void *cookie, u_int pin); int ioapic_get_vector(void *cookie, u_int pin); void ioapic_register(void *cookie); int ioapic_remap_vector(void *cookie, u_int pin, int vector); int ioapic_set_bus(void *cookie, u_int pin, int bus_type); int ioapic_set_extint(void *cookie, u_int pin); int ioapic_set_nmi(void *cookie, u_int pin); int ioapic_set_polarity(void *cookie, u_int pin, enum intr_polarity pol); int ioapic_set_triggermode(void *cookie, u_int pin, enum intr_trigger trigger); int ioapic_set_smi(void *cookie, u_int pin); /* * Struct containing pointers to APIC functions whose * implementation is run time selectable. */ struct apic_ops { void (*create)(u_int, int); void (*init)(vm_paddr_t); void (*xapic_mode)(void); + bool (*is_x2apic)(void); void (*setup)(int); void (*dump)(const char *); void (*disable)(void); void (*eoi)(void); int (*id)(void); int (*intr_pending)(u_int); void (*set_logical_id)(u_int, u_int, u_int); u_int (*cpuid)(u_int); /* Vectors */ u_int (*alloc_vector)(u_int, u_int); u_int (*alloc_vectors)(u_int, u_int *, u_int, u_int); void (*enable_vector)(u_int, u_int); void (*disable_vector)(u_int, u_int); void (*free_vector)(u_int, u_int, u_int); /* PMC */ int (*enable_pmc)(void); void (*disable_pmc)(void); void (*reenable_pmc)(void); /* CMC */ void (*enable_cmc)(void); /* IPI */ void (*ipi_raw)(register_t, u_int); void (*ipi_vectored)(u_int, int); int (*ipi_wait)(int); int (*ipi_alloc)(inthand_t *ipifunc); void (*ipi_free)(int vector); /* LVT */ int (*set_lvt_mask)(u_int, u_int, u_char); int (*set_lvt_mode)(u_int, u_int, u_int32_t); int (*set_lvt_polarity)(u_int, u_int, enum intr_polarity); int (*set_lvt_triggermode)(u_int, u_int, enum intr_trigger); }; extern struct apic_ops apic_ops; static inline void lapic_create(u_int apic_id, int boot_cpu) { apic_ops.create(apic_id, boot_cpu); } static inline void lapic_init(vm_paddr_t addr) { apic_ops.init(addr); } static inline void lapic_xapic_mode(void) { apic_ops.xapic_mode(); +} + +static inline bool +lapic_is_x2apic(void) +{ + + return (apic_ops.is_x2apic()); } static inline void lapic_setup(int boot) { apic_ops.setup(boot); } static inline void lapic_dump(const char *str) { apic_ops.dump(str); } static inline void lapic_disable(void) { apic_ops.disable(); } static inline void lapic_eoi(void) { apic_ops.eoi(); } static inline int lapic_id(void) { return (apic_ops.id()); } static inline int lapic_intr_pending(u_int vector) { return (apic_ops.intr_pending(vector)); } /* XXX: UNUSED */ static inline void lapic_set_logical_id(u_int apic_id, u_int cluster, u_int cluster_id) { apic_ops.set_logical_id(apic_id, cluster, cluster_id); } static inline u_int apic_cpuid(u_int apic_id) { return (apic_ops.cpuid(apic_id)); } static inline u_int apic_alloc_vector(u_int apic_id, u_int irq) { return (apic_ops.alloc_vector(apic_id, irq)); } static inline u_int apic_alloc_vectors(u_int apic_id, u_int *irqs, u_int count, u_int align) { return (apic_ops.alloc_vectors(apic_id, irqs, count, align)); } static inline void apic_enable_vector(u_int apic_id, u_int vector) { apic_ops.enable_vector(apic_id, vector); } static inline void apic_disable_vector(u_int apic_id, u_int vector) { apic_ops.disable_vector(apic_id, vector); } static inline void apic_free_vector(u_int apic_id, u_int vector, u_int irq) { apic_ops.free_vector(apic_id, vector, irq); } static inline int lapic_enable_pmc(void) { return (apic_ops.enable_pmc()); } static inline void lapic_disable_pmc(void) { apic_ops.disable_pmc(); } static inline void lapic_reenable_pmc(void) { apic_ops.reenable_pmc(); } static inline void lapic_enable_cmc(void) { apic_ops.enable_cmc(); } static inline void lapic_ipi_raw(register_t icrlo, u_int dest) { apic_ops.ipi_raw(icrlo, dest); } static inline void lapic_ipi_vectored(u_int vector, int dest) { apic_ops.ipi_vectored(vector, dest); } static inline int lapic_ipi_wait(int delay) { return (apic_ops.ipi_wait(delay)); } static inline int lapic_ipi_alloc(inthand_t *ipifunc) { return (apic_ops.ipi_alloc(ipifunc)); } static inline void lapic_ipi_free(int vector) { return (apic_ops.ipi_free(vector)); } static inline int lapic_set_lvt_mask(u_int apic_id, u_int lvt, u_char masked) { return (apic_ops.set_lvt_mask(apic_id, lvt, masked)); } static inline int lapic_set_lvt_mode(u_int apic_id, u_int lvt, u_int32_t mode) { return (apic_ops.set_lvt_mode(apic_id, lvt, mode)); } static inline int lapic_set_lvt_polarity(u_int apic_id, u_int lvt, enum intr_polarity pol) { return (apic_ops.set_lvt_polarity(apic_id, lvt, pol)); } static inline int lapic_set_lvt_triggermode(u_int apic_id, u_int lvt, enum intr_trigger trigger) { return (apic_ops.set_lvt_triggermode(apic_id, lvt, trigger)); } void lapic_handle_cmc(void); void lapic_handle_error(void); void lapic_handle_intr(int vector, struct trapframe *frame); void lapic_handle_timer(struct trapframe *frame); extern int x2apic_mode; extern int lapic_eoi_suppression; #ifdef _SYS_SYSCTL_H_ SYSCTL_DECL(_hw_apic); #endif #endif /* !LOCORE */ #endif /* _X86_APICVAR_H_ */ Index: stable/11/sys/x86/x86/local_apic.c =================================================================== --- stable/11/sys/x86/x86/local_apic.c (revision 306627) +++ stable/11/sys/x86/x86/local_apic.c (revision 306628) @@ -1,1987 +1,1998 @@ /*- * Copyright (c) 2003 John Baldwin * Copyright (c) 1996, by Steve Passe * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. The name of the developer may NOT be used to endorse or promote products * derived from this software without specific prior written permission. * 3. Neither the name of the author nor the names of any co-contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * Local APIC support on Pentium and later processors. */ #include __FBSDID("$FreeBSD$"); #include "opt_atpic.h" #include "opt_hwpmc_hooks.h" #include "opt_ddb.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef DDB #include #include #endif #ifdef __amd64__ #define SDT_APIC SDT_SYSIGT #define SDT_APICT SDT_SYSIGT #define GSEL_APIC 0 #else #define SDT_APIC SDT_SYS386IGT #define SDT_APICT SDT_SYS386TGT #define GSEL_APIC GSEL(GCODE_SEL, SEL_KPL) #endif /* Sanity checks on IDT vectors. */ CTASSERT(APIC_IO_INTS + APIC_NUM_IOINTS == APIC_TIMER_INT); CTASSERT(APIC_TIMER_INT < APIC_LOCAL_INTS); CTASSERT(APIC_LOCAL_INTS == 240); CTASSERT(IPI_STOP < APIC_SPURIOUS_INT); /* Magic IRQ values for the timer and syscalls. */ #define IRQ_TIMER (NUM_IO_INTS + 1) #define IRQ_SYSCALL (NUM_IO_INTS + 2) #define IRQ_DTRACE_RET (NUM_IO_INTS + 3) #define IRQ_EVTCHN (NUM_IO_INTS + 4) enum lat_timer_mode { LAT_MODE_UNDEF = 0, LAT_MODE_PERIODIC = 1, LAT_MODE_ONESHOT = 2, LAT_MODE_DEADLINE = 3, }; /* * Support for local APICs. Local APICs manage interrupts on each * individual processor as opposed to I/O APICs which receive interrupts * from I/O devices and then forward them on to the local APICs. * * Local APICs can also send interrupts to each other thus providing the * mechanism for IPIs. */ struct lvt { u_int lvt_edgetrigger:1; u_int lvt_activehi:1; u_int lvt_masked:1; u_int lvt_active:1; u_int lvt_mode:16; u_int lvt_vector:8; }; struct lapic { struct lvt la_lvts[APIC_LVT_MAX + 1]; u_int la_id:8; u_int la_cluster:4; u_int la_cluster_id:2; u_int la_present:1; u_long *la_timer_count; uint64_t la_timer_period; enum lat_timer_mode la_timer_mode; uint32_t lvt_timer_base; uint32_t lvt_timer_last; /* Include IDT_SYSCALL to make indexing easier. */ int la_ioint_irqs[APIC_NUM_IOINTS + 1]; } static lapics[MAX_APIC_ID + 1]; /* Global defaults for local APIC LVT entries. */ static struct lvt lvts[APIC_LVT_MAX + 1] = { { 1, 1, 1, 1, APIC_LVT_DM_EXTINT, 0 }, /* LINT0: masked ExtINT */ { 1, 1, 0, 1, APIC_LVT_DM_NMI, 0 }, /* LINT1: NMI */ { 1, 1, 1, 1, APIC_LVT_DM_FIXED, APIC_TIMER_INT }, /* Timer */ { 1, 1, 0, 1, APIC_LVT_DM_FIXED, APIC_ERROR_INT }, /* Error */ { 1, 1, 1, 1, APIC_LVT_DM_NMI, 0 }, /* PMC */ { 1, 1, 1, 1, APIC_LVT_DM_FIXED, APIC_THERMAL_INT }, /* Thermal */ { 1, 1, 1, 1, APIC_LVT_DM_FIXED, APIC_CMC_INT }, /* CMCI */ }; static inthand_t *ioint_handlers[] = { NULL, /* 0 - 31 */ IDTVEC(apic_isr1), /* 32 - 63 */ IDTVEC(apic_isr2), /* 64 - 95 */ IDTVEC(apic_isr3), /* 96 - 127 */ IDTVEC(apic_isr4), /* 128 - 159 */ IDTVEC(apic_isr5), /* 160 - 191 */ IDTVEC(apic_isr6), /* 192 - 223 */ IDTVEC(apic_isr7), /* 224 - 255 */ }; static u_int32_t lapic_timer_divisors[] = { APIC_TDCR_1, APIC_TDCR_2, APIC_TDCR_4, APIC_TDCR_8, APIC_TDCR_16, APIC_TDCR_32, APIC_TDCR_64, APIC_TDCR_128 }; extern inthand_t IDTVEC(rsvd); volatile char *lapic_map; vm_paddr_t lapic_paddr; int x2apic_mode; int lapic_eoi_suppression; static int lapic_timer_tsc_deadline; static u_long lapic_timer_divisor, count_freq; static struct eventtimer lapic_et; #ifdef SMP static uint64_t lapic_ipi_wait_mult; #endif SYSCTL_NODE(_hw, OID_AUTO, apic, CTLFLAG_RD, 0, "APIC options"); SYSCTL_INT(_hw_apic, OID_AUTO, x2apic_mode, CTLFLAG_RD, &x2apic_mode, 0, ""); SYSCTL_INT(_hw_apic, OID_AUTO, eoi_suppression, CTLFLAG_RD, &lapic_eoi_suppression, 0, ""); SYSCTL_INT(_hw_apic, OID_AUTO, timer_tsc_deadline, CTLFLAG_RD, &lapic_timer_tsc_deadline, 0, ""); static uint32_t lapic_read32(enum LAPIC_REGISTERS reg) { uint32_t res; if (x2apic_mode) { res = rdmsr32(MSR_APIC_000 + reg); } else { res = *(volatile uint32_t *)(lapic_map + reg * LAPIC_MEM_MUL); } return (res); } static void lapic_write32(enum LAPIC_REGISTERS reg, uint32_t val) { if (x2apic_mode) { mfence(); wrmsr(MSR_APIC_000 + reg, val); } else { *(volatile uint32_t *)(lapic_map + reg * LAPIC_MEM_MUL) = val; } } static void lapic_write32_nofence(enum LAPIC_REGISTERS reg, uint32_t val) { if (x2apic_mode) { wrmsr(MSR_APIC_000 + reg, val); } else { *(volatile uint32_t *)(lapic_map + reg * LAPIC_MEM_MUL) = val; } } #ifdef SMP static uint64_t lapic_read_icr(void) { uint64_t v; uint32_t vhi, vlo; if (x2apic_mode) { v = rdmsr(MSR_APIC_000 + LAPIC_ICR_LO); } else { vhi = lapic_read32(LAPIC_ICR_HI); vlo = lapic_read32(LAPIC_ICR_LO); v = ((uint64_t)vhi << 32) | vlo; } return (v); } static uint64_t lapic_read_icr_lo(void) { return (lapic_read32(LAPIC_ICR_LO)); } static void lapic_write_icr(uint32_t vhi, uint32_t vlo) { uint64_t v; if (x2apic_mode) { v = ((uint64_t)vhi << 32) | vlo; mfence(); wrmsr(MSR_APIC_000 + LAPIC_ICR_LO, v); } else { lapic_write32(LAPIC_ICR_HI, vhi); lapic_write32(LAPIC_ICR_LO, vlo); } } #endif /* SMP */ static void native_lapic_enable_x2apic(void) { uint64_t apic_base; apic_base = rdmsr(MSR_APICBASE); apic_base |= APICBASE_X2APIC | APICBASE_ENABLED; wrmsr(MSR_APICBASE, apic_base); } +static bool +native_lapic_is_x2apic(void) +{ + uint64_t apic_base; + + apic_base = rdmsr(MSR_APICBASE); + return ((apic_base & (APICBASE_X2APIC | APICBASE_ENABLED)) == + (APICBASE_X2APIC | APICBASE_ENABLED)); +} + static void lapic_enable(void); static void lapic_resume(struct pic *pic, bool suspend_cancelled); static void lapic_timer_oneshot(struct lapic *); static void lapic_timer_oneshot_nointr(struct lapic *, uint32_t); static void lapic_timer_periodic(struct lapic *); static void lapic_timer_deadline(struct lapic *); static void lapic_timer_stop(struct lapic *); static void lapic_timer_set_divisor(u_int divisor); static uint32_t lvt_mode(struct lapic *la, u_int pin, uint32_t value); static int lapic_et_start(struct eventtimer *et, sbintime_t first, sbintime_t period); static int lapic_et_stop(struct eventtimer *et); static u_int apic_idt_to_irq(u_int apic_id, u_int vector); static void lapic_set_tpr(u_int vector); struct pic lapic_pic = { .pic_resume = lapic_resume }; /* Forward declarations for apic_ops */ static void native_lapic_create(u_int apic_id, int boot_cpu); static void native_lapic_init(vm_paddr_t addr); static void native_lapic_xapic_mode(void); static void native_lapic_setup(int boot); static void native_lapic_dump(const char *str); static void native_lapic_disable(void); static void native_lapic_eoi(void); static int native_lapic_id(void); static int native_lapic_intr_pending(u_int vector); static u_int native_apic_cpuid(u_int apic_id); static u_int native_apic_alloc_vector(u_int apic_id, u_int irq); static u_int native_apic_alloc_vectors(u_int apic_id, u_int *irqs, u_int count, u_int align); static void native_apic_disable_vector(u_int apic_id, u_int vector); static void native_apic_enable_vector(u_int apic_id, u_int vector); static void native_apic_free_vector(u_int apic_id, u_int vector, u_int irq); static void native_lapic_set_logical_id(u_int apic_id, u_int cluster, u_int cluster_id); static int native_lapic_enable_pmc(void); static void native_lapic_disable_pmc(void); static void native_lapic_reenable_pmc(void); static void native_lapic_enable_cmc(void); static int native_lapic_set_lvt_mask(u_int apic_id, u_int lvt, u_char masked); static int native_lapic_set_lvt_mode(u_int apic_id, u_int lvt, uint32_t mode); static int native_lapic_set_lvt_polarity(u_int apic_id, u_int lvt, enum intr_polarity pol); static int native_lapic_set_lvt_triggermode(u_int apic_id, u_int lvt, enum intr_trigger trigger); #ifdef SMP static void native_lapic_ipi_raw(register_t icrlo, u_int dest); static void native_lapic_ipi_vectored(u_int vector, int dest); static int native_lapic_ipi_wait(int delay); #endif /* SMP */ static int native_lapic_ipi_alloc(inthand_t *ipifunc); static void native_lapic_ipi_free(int vector); struct apic_ops apic_ops = { .create = native_lapic_create, .init = native_lapic_init, .xapic_mode = native_lapic_xapic_mode, + .is_x2apic = native_lapic_is_x2apic, .setup = native_lapic_setup, .dump = native_lapic_dump, .disable = native_lapic_disable, .eoi = native_lapic_eoi, .id = native_lapic_id, .intr_pending = native_lapic_intr_pending, .set_logical_id = native_lapic_set_logical_id, .cpuid = native_apic_cpuid, .alloc_vector = native_apic_alloc_vector, .alloc_vectors = native_apic_alloc_vectors, .enable_vector = native_apic_enable_vector, .disable_vector = native_apic_disable_vector, .free_vector = native_apic_free_vector, .enable_pmc = native_lapic_enable_pmc, .disable_pmc = native_lapic_disable_pmc, .reenable_pmc = native_lapic_reenable_pmc, .enable_cmc = native_lapic_enable_cmc, #ifdef SMP .ipi_raw = native_lapic_ipi_raw, .ipi_vectored = native_lapic_ipi_vectored, .ipi_wait = native_lapic_ipi_wait, #endif .ipi_alloc = native_lapic_ipi_alloc, .ipi_free = native_lapic_ipi_free, .set_lvt_mask = native_lapic_set_lvt_mask, .set_lvt_mode = native_lapic_set_lvt_mode, .set_lvt_polarity = native_lapic_set_lvt_polarity, .set_lvt_triggermode = native_lapic_set_lvt_triggermode, }; static uint32_t lvt_mode(struct lapic *la, u_int pin, uint32_t value) { struct lvt *lvt; KASSERT(pin <= APIC_LVT_MAX, ("%s: pin %u out of range", __func__, pin)); if (la->la_lvts[pin].lvt_active) lvt = &la->la_lvts[pin]; else lvt = &lvts[pin]; value &= ~(APIC_LVT_M | APIC_LVT_TM | APIC_LVT_IIPP | APIC_LVT_DM | APIC_LVT_VECTOR); if (lvt->lvt_edgetrigger == 0) value |= APIC_LVT_TM; if (lvt->lvt_activehi == 0) value |= APIC_LVT_IIPP_INTALO; if (lvt->lvt_masked) value |= APIC_LVT_M; value |= lvt->lvt_mode; switch (lvt->lvt_mode) { case APIC_LVT_DM_NMI: case APIC_LVT_DM_SMI: case APIC_LVT_DM_INIT: case APIC_LVT_DM_EXTINT: if (!lvt->lvt_edgetrigger && bootverbose) { printf("lapic%u: Forcing LINT%u to edge trigger\n", la->la_id, pin); value |= APIC_LVT_TM; } /* Use a vector of 0. */ break; case APIC_LVT_DM_FIXED: value |= lvt->lvt_vector; break; default: panic("bad APIC LVT delivery mode: %#x\n", value); } return (value); } /* * Map the local APIC and setup necessary interrupt vectors. */ static void native_lapic_init(vm_paddr_t addr) { #ifdef SMP uint64_t r, r1, r2, rx; #endif uint32_t ver; u_int regs[4]; int i, arat; /* * Enable x2APIC mode if possible. Map the local APIC * registers page. * * Keep the LAPIC registers page mapped uncached for x2APIC * mode too, to have direct map page attribute set to * uncached. This is needed to work around CPU errata present * on all Intel processors. */ KASSERT(trunc_page(addr) == addr, ("local APIC not aligned on a page boundary")); lapic_paddr = addr; lapic_map = pmap_mapdev(addr, PAGE_SIZE); if (x2apic_mode) { native_lapic_enable_x2apic(); lapic_map = NULL; } /* Setup the spurious interrupt handler. */ setidt(APIC_SPURIOUS_INT, IDTVEC(spuriousint), SDT_APIC, SEL_KPL, GSEL_APIC); /* Perform basic initialization of the BSP's local APIC. */ lapic_enable(); /* Set BSP's per-CPU local APIC ID. */ PCPU_SET(apic_id, lapic_id()); /* Local APIC timer interrupt. */ setidt(APIC_TIMER_INT, IDTVEC(timerint), SDT_APIC, SEL_KPL, GSEL_APIC); /* Local APIC error interrupt. */ setidt(APIC_ERROR_INT, IDTVEC(errorint), SDT_APIC, SEL_KPL, GSEL_APIC); /* XXX: Thermal interrupt */ /* Local APIC CMCI. */ setidt(APIC_CMC_INT, IDTVEC(cmcint), SDT_APICT, SEL_KPL, GSEL_APIC); if ((resource_int_value("apic", 0, "clock", &i) != 0 || i != 0)) { arat = 0; /* Intel CPUID 0x06 EAX[2] set if APIC timer runs in C3. */ if (cpu_vendor_id == CPU_VENDOR_INTEL && cpu_high >= 6) { do_cpuid(0x06, regs); if ((regs[0] & CPUTPM1_ARAT) != 0) arat = 1; } bzero(&lapic_et, sizeof(lapic_et)); lapic_et.et_name = "LAPIC"; lapic_et.et_flags = ET_FLAGS_PERIODIC | ET_FLAGS_ONESHOT | ET_FLAGS_PERCPU; lapic_et.et_quality = 600; if (!arat) { lapic_et.et_flags |= ET_FLAGS_C3STOP; lapic_et.et_quality -= 200; } else if ((cpu_feature & CPUID_TSC) != 0 && (cpu_feature2 & CPUID2_TSCDLT) != 0 && tsc_is_invariant && tsc_freq != 0) { lapic_timer_tsc_deadline = 1; TUNABLE_INT_FETCH("hw.lapic_tsc_deadline", &lapic_timer_tsc_deadline); } lapic_et.et_frequency = 0; /* We don't know frequency yet, so trying to guess. */ lapic_et.et_min_period = 0x00001000LL; lapic_et.et_max_period = SBT_1S; lapic_et.et_start = lapic_et_start; lapic_et.et_stop = lapic_et_stop; lapic_et.et_priv = NULL; et_register(&lapic_et); } /* * Set lapic_eoi_suppression after lapic_enable(), to not * enable suppression in the hardware prematurely. Note that * we by default enable suppression even when system only has * one IO-APIC, since EOI is broadcasted to all APIC agents, * including CPUs, otherwise. * * It seems that at least some KVM versions report * EOI_SUPPRESSION bit, but auto-EOI does not work. */ ver = lapic_read32(LAPIC_VERSION); if ((ver & APIC_VER_EOI_SUPPRESSION) != 0) { lapic_eoi_suppression = 1; if (vm_guest == VM_GUEST_KVM) { if (bootverbose) printf( "KVM -- disabling lapic eoi suppression\n"); lapic_eoi_suppression = 0; } TUNABLE_INT_FETCH("hw.lapic_eoi_suppression", &lapic_eoi_suppression); } #ifdef SMP #define LOOPS 100000 /* * Calibrate the busy loop waiting for IPI ack in xAPIC mode. * lapic_ipi_wait_mult contains the number of iterations which * approximately delay execution for 1 microsecond (the * argument to native_lapic_ipi_wait() is in microseconds). * * We assume that TSC is present and already measured. * Possible TSC frequency jumps are irrelevant to the * calibration loop below, the CPU clock management code is * not yet started, and we do not enter sleep states. */ KASSERT((cpu_feature & CPUID_TSC) != 0 && tsc_freq != 0, ("TSC not initialized")); if (!x2apic_mode) { r = rdtsc(); for (rx = 0; rx < LOOPS; rx++) { (void)lapic_read_icr_lo(); ia32_pause(); } r = rdtsc() - r; r1 = tsc_freq * LOOPS; r2 = r * 1000000; lapic_ipi_wait_mult = r1 >= r2 ? r1 / r2 : 1; if (bootverbose) { printf("LAPIC: ipi_wait() us multiplier %ju (r %ju " "tsc %ju)\n", (uintmax_t)lapic_ipi_wait_mult, (uintmax_t)r, (uintmax_t)tsc_freq); } } #undef LOOPS #endif /* SMP */ } /* * Create a local APIC instance. */ static void native_lapic_create(u_int apic_id, int boot_cpu) { int i; if (apic_id > MAX_APIC_ID) { printf("APIC: Ignoring local APIC with ID %d\n", apic_id); if (boot_cpu) panic("Can't ignore BSP"); return; } KASSERT(!lapics[apic_id].la_present, ("duplicate local APIC %u", apic_id)); /* * Assume no local LVT overrides and a cluster of 0 and * intra-cluster ID of 0. */ lapics[apic_id].la_present = 1; lapics[apic_id].la_id = apic_id; for (i = 0; i <= APIC_LVT_MAX; i++) { lapics[apic_id].la_lvts[i] = lvts[i]; lapics[apic_id].la_lvts[i].lvt_active = 0; } for (i = 0; i <= APIC_NUM_IOINTS; i++) lapics[apic_id].la_ioint_irqs[i] = -1; lapics[apic_id].la_ioint_irqs[IDT_SYSCALL - APIC_IO_INTS] = IRQ_SYSCALL; lapics[apic_id].la_ioint_irqs[APIC_TIMER_INT - APIC_IO_INTS] = IRQ_TIMER; #ifdef KDTRACE_HOOKS lapics[apic_id].la_ioint_irqs[IDT_DTRACE_RET - APIC_IO_INTS] = IRQ_DTRACE_RET; #endif #ifdef XENHVM lapics[apic_id].la_ioint_irqs[IDT_EVTCHN - APIC_IO_INTS] = IRQ_EVTCHN; #endif #ifdef SMP cpu_add(apic_id, boot_cpu); #endif } /* * Dump contents of local APIC registers */ static void native_lapic_dump(const char* str) { uint32_t maxlvt; maxlvt = (lapic_read32(LAPIC_VERSION) & APIC_VER_MAXLVT) >> MAXLVTSHIFT; printf("cpu%d %s:\n", PCPU_GET(cpuid), str); printf(" ID: 0x%08x VER: 0x%08x LDR: 0x%08x DFR: 0x%08x", lapic_read32(LAPIC_ID), lapic_read32(LAPIC_VERSION), lapic_read32(LAPIC_LDR), x2apic_mode ? 0 : lapic_read32(LAPIC_DFR)); if ((cpu_feature2 & CPUID2_X2APIC) != 0) printf(" x2APIC: %d", x2apic_mode); printf("\n lint0: 0x%08x lint1: 0x%08x TPR: 0x%08x SVR: 0x%08x\n", lapic_read32(LAPIC_LVT_LINT0), lapic_read32(LAPIC_LVT_LINT1), lapic_read32(LAPIC_TPR), lapic_read32(LAPIC_SVR)); printf(" timer: 0x%08x therm: 0x%08x err: 0x%08x", lapic_read32(LAPIC_LVT_TIMER), lapic_read32(LAPIC_LVT_THERMAL), lapic_read32(LAPIC_LVT_ERROR)); if (maxlvt >= APIC_LVT_PMC) printf(" pmc: 0x%08x", lapic_read32(LAPIC_LVT_PCINT)); printf("\n"); if (maxlvt >= APIC_LVT_CMCI) printf(" cmci: 0x%08x\n", lapic_read32(LAPIC_LVT_CMCI)); } static void native_lapic_xapic_mode(void) { register_t saveintr; saveintr = intr_disable(); if (x2apic_mode) native_lapic_enable_x2apic(); intr_restore(saveintr); } static void native_lapic_setup(int boot) { struct lapic *la; uint32_t maxlvt; register_t saveintr; char buf[MAXCOMLEN + 1]; saveintr = intr_disable(); la = &lapics[lapic_id()]; KASSERT(la->la_present, ("missing APIC structure")); maxlvt = (lapic_read32(LAPIC_VERSION) & APIC_VER_MAXLVT) >> MAXLVTSHIFT; /* Initialize the TPR to allow all interrupts. */ lapic_set_tpr(0); /* Setup spurious vector and enable the local APIC. */ lapic_enable(); /* Program LINT[01] LVT entries. */ lapic_write32(LAPIC_LVT_LINT0, lvt_mode(la, APIC_LVT_LINT0, lapic_read32(LAPIC_LVT_LINT0))); lapic_write32(LAPIC_LVT_LINT1, lvt_mode(la, APIC_LVT_LINT1, lapic_read32(LAPIC_LVT_LINT1))); /* Program the PMC LVT entry if present. */ if (maxlvt >= APIC_LVT_PMC) { lapic_write32(LAPIC_LVT_PCINT, lvt_mode(la, APIC_LVT_PMC, LAPIC_LVT_PCINT)); } /* Program timer LVT and setup handler. */ la->lvt_timer_base = lvt_mode(la, APIC_LVT_TIMER, lapic_read32(LAPIC_LVT_TIMER)); la->lvt_timer_last = la->lvt_timer_base; lapic_write32(LAPIC_LVT_TIMER, la->lvt_timer_base); if (boot) { snprintf(buf, sizeof(buf), "cpu%d:timer", PCPU_GET(cpuid)); intrcnt_add(buf, &la->la_timer_count); } /* Setup the timer if configured. */ if (la->la_timer_mode != LAT_MODE_UNDEF) { KASSERT(la->la_timer_period != 0, ("lapic%u: zero divisor", lapic_id())); switch (la->la_timer_mode) { case LAT_MODE_PERIODIC: lapic_timer_set_divisor(lapic_timer_divisor); lapic_timer_periodic(la); break; case LAT_MODE_ONESHOT: lapic_timer_set_divisor(lapic_timer_divisor); lapic_timer_oneshot(la); break; case LAT_MODE_DEADLINE: lapic_timer_deadline(la); break; default: panic("corrupted la_timer_mode %p %d", la, la->la_timer_mode); } } /* Program error LVT and clear any existing errors. */ lapic_write32(LAPIC_LVT_ERROR, lvt_mode(la, APIC_LVT_ERROR, lapic_read32(LAPIC_LVT_ERROR))); lapic_write32(LAPIC_ESR, 0); /* XXX: Thermal LVT */ /* Program the CMCI LVT entry if present. */ if (maxlvt >= APIC_LVT_CMCI) { lapic_write32(LAPIC_LVT_CMCI, lvt_mode(la, APIC_LVT_CMCI, lapic_read32(LAPIC_LVT_CMCI))); } intr_restore(saveintr); } static void native_lapic_reenable_pmc(void) { #ifdef HWPMC_HOOKS uint32_t value; value = lapic_read32(LAPIC_LVT_PCINT); value &= ~APIC_LVT_M; lapic_write32(LAPIC_LVT_PCINT, value); #endif } #ifdef HWPMC_HOOKS static void lapic_update_pmc(void *dummy) { struct lapic *la; la = &lapics[lapic_id()]; lapic_write32(LAPIC_LVT_PCINT, lvt_mode(la, APIC_LVT_PMC, lapic_read32(LAPIC_LVT_PCINT))); } #endif static int native_lapic_enable_pmc(void) { #ifdef HWPMC_HOOKS u_int32_t maxlvt; /* Fail if the local APIC is not present. */ if (!x2apic_mode && lapic_map == NULL) return (0); /* Fail if the PMC LVT is not present. */ maxlvt = (lapic_read32(LAPIC_VERSION) & APIC_VER_MAXLVT) >> MAXLVTSHIFT; if (maxlvt < APIC_LVT_PMC) return (0); lvts[APIC_LVT_PMC].lvt_masked = 0; #ifdef EARLY_AP_STARTUP MPASS(mp_ncpus == 1 || smp_started); smp_rendezvous(NULL, lapic_update_pmc, NULL, NULL); #else #ifdef SMP /* * If hwpmc was loaded at boot time then the APs may not be * started yet. In that case, don't forward the request to * them as they will program the lvt when they start. */ if (smp_started) smp_rendezvous(NULL, lapic_update_pmc, NULL, NULL); else #endif lapic_update_pmc(NULL); #endif return (1); #else return (0); #endif } static void native_lapic_disable_pmc(void) { #ifdef HWPMC_HOOKS u_int32_t maxlvt; /* Fail if the local APIC is not present. */ if (!x2apic_mode && lapic_map == NULL) return; /* Fail if the PMC LVT is not present. */ maxlvt = (lapic_read32(LAPIC_VERSION) & APIC_VER_MAXLVT) >> MAXLVTSHIFT; if (maxlvt < APIC_LVT_PMC) return; lvts[APIC_LVT_PMC].lvt_masked = 1; #ifdef SMP /* The APs should always be started when hwpmc is unloaded. */ KASSERT(mp_ncpus == 1 || smp_started, ("hwpmc unloaded too early")); #endif smp_rendezvous(NULL, lapic_update_pmc, NULL, NULL); #endif } static void lapic_calibrate_initcount(struct eventtimer *et, struct lapic *la) { u_long value; /* Start off with a divisor of 2 (power on reset default). */ lapic_timer_divisor = 2; /* Try to calibrate the local APIC timer. */ do { lapic_timer_set_divisor(lapic_timer_divisor); lapic_timer_oneshot_nointr(la, APIC_TIMER_MAX_COUNT); DELAY(1000000); value = APIC_TIMER_MAX_COUNT - lapic_read32(LAPIC_CCR_TIMER); if (value != APIC_TIMER_MAX_COUNT) break; lapic_timer_divisor <<= 1; } while (lapic_timer_divisor <= 128); if (lapic_timer_divisor > 128) panic("lapic: Divisor too big"); if (bootverbose) { printf("lapic: Divisor %lu, Frequency %lu Hz\n", lapic_timer_divisor, value); } count_freq = value; } static void lapic_calibrate_deadline(struct eventtimer *et, struct lapic *la __unused) { if (bootverbose) { printf("lapic: deadline tsc mode, Frequency %ju Hz\n", (uintmax_t)tsc_freq); } } static void lapic_change_mode(struct eventtimer *et, struct lapic *la, enum lat_timer_mode newmode) { if (la->la_timer_mode == newmode) return; switch (newmode) { case LAT_MODE_PERIODIC: lapic_timer_set_divisor(lapic_timer_divisor); et->et_frequency = count_freq; break; case LAT_MODE_DEADLINE: et->et_frequency = tsc_freq; break; case LAT_MODE_ONESHOT: lapic_timer_set_divisor(lapic_timer_divisor); et->et_frequency = count_freq; break; default: panic("lapic_change_mode %d", newmode); } la->la_timer_mode = newmode; et->et_min_period = (0x00000002LLU << 32) / et->et_frequency; et->et_max_period = (0xfffffffeLLU << 32) / et->et_frequency; } static int lapic_et_start(struct eventtimer *et, sbintime_t first, sbintime_t period) { struct lapic *la; la = &lapics[PCPU_GET(apic_id)]; if (et->et_frequency == 0) { lapic_calibrate_initcount(et, la); if (lapic_timer_tsc_deadline) lapic_calibrate_deadline(et, la); } if (period != 0) { lapic_change_mode(et, la, LAT_MODE_PERIODIC); la->la_timer_period = ((uint32_t)et->et_frequency * period) >> 32; lapic_timer_periodic(la); } else if (lapic_timer_tsc_deadline) { lapic_change_mode(et, la, LAT_MODE_DEADLINE); la->la_timer_period = (et->et_frequency * first) >> 32; lapic_timer_deadline(la); } else { lapic_change_mode(et, la, LAT_MODE_ONESHOT); la->la_timer_period = ((uint32_t)et->et_frequency * first) >> 32; lapic_timer_oneshot(la); } return (0); } static int lapic_et_stop(struct eventtimer *et) { struct lapic *la; la = &lapics[PCPU_GET(apic_id)]; lapic_timer_stop(la); la->la_timer_mode = LAT_MODE_UNDEF; return (0); } static void native_lapic_disable(void) { uint32_t value; /* Software disable the local APIC. */ value = lapic_read32(LAPIC_SVR); value &= ~APIC_SVR_SWEN; lapic_write32(LAPIC_SVR, value); } static void lapic_enable(void) { uint32_t value; /* Program the spurious vector to enable the local APIC. */ value = lapic_read32(LAPIC_SVR); value &= ~(APIC_SVR_VECTOR | APIC_SVR_FOCUS); value |= APIC_SVR_FEN | APIC_SVR_SWEN | APIC_SPURIOUS_INT; if (lapic_eoi_suppression) value |= APIC_SVR_EOI_SUPPRESSION; lapic_write32(LAPIC_SVR, value); } /* Reset the local APIC on the BSP during resume. */ static void lapic_resume(struct pic *pic, bool suspend_cancelled) { lapic_setup(0); } static int native_lapic_id(void) { uint32_t v; KASSERT(x2apic_mode || lapic_map != NULL, ("local APIC is not mapped")); v = lapic_read32(LAPIC_ID); if (!x2apic_mode) v >>= APIC_ID_SHIFT; return (v); } static int native_lapic_intr_pending(u_int vector) { uint32_t irr; /* * The IRR registers are an array of registers each of which * only describes 32 interrupts in the low 32 bits. Thus, we * divide the vector by 32 to get the register index. * Finally, we modulus the vector by 32 to determine the * individual bit to test. */ irr = lapic_read32(LAPIC_IRR0 + vector / 32); return (irr & 1 << (vector % 32)); } static void native_lapic_set_logical_id(u_int apic_id, u_int cluster, u_int cluster_id) { struct lapic *la; KASSERT(lapics[apic_id].la_present, ("%s: APIC %u doesn't exist", __func__, apic_id)); KASSERT(cluster <= APIC_MAX_CLUSTER, ("%s: cluster %u too big", __func__, cluster)); KASSERT(cluster_id <= APIC_MAX_INTRACLUSTER_ID, ("%s: intra cluster id %u too big", __func__, cluster_id)); la = &lapics[apic_id]; la->la_cluster = cluster; la->la_cluster_id = cluster_id; } static int native_lapic_set_lvt_mask(u_int apic_id, u_int pin, u_char masked) { if (pin > APIC_LVT_MAX) return (EINVAL); if (apic_id == APIC_ID_ALL) { lvts[pin].lvt_masked = masked; if (bootverbose) printf("lapic:"); } else { KASSERT(lapics[apic_id].la_present, ("%s: missing APIC %u", __func__, apic_id)); lapics[apic_id].la_lvts[pin].lvt_masked = masked; lapics[apic_id].la_lvts[pin].lvt_active = 1; if (bootverbose) printf("lapic%u:", apic_id); } if (bootverbose) printf(" LINT%u %s\n", pin, masked ? "masked" : "unmasked"); return (0); } static int native_lapic_set_lvt_mode(u_int apic_id, u_int pin, u_int32_t mode) { struct lvt *lvt; if (pin > APIC_LVT_MAX) return (EINVAL); if (apic_id == APIC_ID_ALL) { lvt = &lvts[pin]; if (bootverbose) printf("lapic:"); } else { KASSERT(lapics[apic_id].la_present, ("%s: missing APIC %u", __func__, apic_id)); lvt = &lapics[apic_id].la_lvts[pin]; lvt->lvt_active = 1; if (bootverbose) printf("lapic%u:", apic_id); } lvt->lvt_mode = mode; switch (mode) { case APIC_LVT_DM_NMI: case APIC_LVT_DM_SMI: case APIC_LVT_DM_INIT: case APIC_LVT_DM_EXTINT: lvt->lvt_edgetrigger = 1; lvt->lvt_activehi = 1; if (mode == APIC_LVT_DM_EXTINT) lvt->lvt_masked = 1; else lvt->lvt_masked = 0; break; default: panic("Unsupported delivery mode: 0x%x\n", mode); } if (bootverbose) { printf(" Routing "); switch (mode) { case APIC_LVT_DM_NMI: printf("NMI"); break; case APIC_LVT_DM_SMI: printf("SMI"); break; case APIC_LVT_DM_INIT: printf("INIT"); break; case APIC_LVT_DM_EXTINT: printf("ExtINT"); break; } printf(" -> LINT%u\n", pin); } return (0); } static int native_lapic_set_lvt_polarity(u_int apic_id, u_int pin, enum intr_polarity pol) { if (pin > APIC_LVT_MAX || pol == INTR_POLARITY_CONFORM) return (EINVAL); if (apic_id == APIC_ID_ALL) { lvts[pin].lvt_activehi = (pol == INTR_POLARITY_HIGH); if (bootverbose) printf("lapic:"); } else { KASSERT(lapics[apic_id].la_present, ("%s: missing APIC %u", __func__, apic_id)); lapics[apic_id].la_lvts[pin].lvt_active = 1; lapics[apic_id].la_lvts[pin].lvt_activehi = (pol == INTR_POLARITY_HIGH); if (bootverbose) printf("lapic%u:", apic_id); } if (bootverbose) printf(" LINT%u polarity: %s\n", pin, pol == INTR_POLARITY_HIGH ? "high" : "low"); return (0); } static int native_lapic_set_lvt_triggermode(u_int apic_id, u_int pin, enum intr_trigger trigger) { if (pin > APIC_LVT_MAX || trigger == INTR_TRIGGER_CONFORM) return (EINVAL); if (apic_id == APIC_ID_ALL) { lvts[pin].lvt_edgetrigger = (trigger == INTR_TRIGGER_EDGE); if (bootverbose) printf("lapic:"); } else { KASSERT(lapics[apic_id].la_present, ("%s: missing APIC %u", __func__, apic_id)); lapics[apic_id].la_lvts[pin].lvt_edgetrigger = (trigger == INTR_TRIGGER_EDGE); lapics[apic_id].la_lvts[pin].lvt_active = 1; if (bootverbose) printf("lapic%u:", apic_id); } if (bootverbose) printf(" LINT%u trigger: %s\n", pin, trigger == INTR_TRIGGER_EDGE ? "edge" : "level"); return (0); } /* * Adjust the TPR of the current CPU so that it blocks all interrupts below * the passed in vector. */ static void lapic_set_tpr(u_int vector) { #ifdef CHEAP_TPR lapic_write32(LAPIC_TPR, vector); #else uint32_t tpr; tpr = lapic_read32(LAPIC_TPR) & ~APIC_TPR_PRIO; tpr |= vector; lapic_write32(LAPIC_TPR, tpr); #endif } static void native_lapic_eoi(void) { lapic_write32_nofence(LAPIC_EOI, 0); } void lapic_handle_intr(int vector, struct trapframe *frame) { struct intsrc *isrc; isrc = intr_lookup_source(apic_idt_to_irq(PCPU_GET(apic_id), vector)); intr_execute_handlers(isrc, frame); } void lapic_handle_timer(struct trapframe *frame) { struct lapic *la; struct trapframe *oldframe; struct thread *td; /* Send EOI first thing. */ lapic_eoi(); #if defined(SMP) && !defined(SCHED_ULE) /* * Don't do any accounting for the disabled HTT cores, since it * will provide misleading numbers for the userland. * * No locking is necessary here, since even if we lose the race * when hlt_cpus_mask changes it is not a big deal, really. * * Don't do that for ULE, since ULE doesn't consider hlt_cpus_mask * and unlike other schedulers it actually schedules threads to * those CPUs. */ if (CPU_ISSET(PCPU_GET(cpuid), &hlt_cpus_mask)) return; #endif /* Look up our local APIC structure for the tick counters. */ la = &lapics[PCPU_GET(apic_id)]; (*la->la_timer_count)++; critical_enter(); if (lapic_et.et_active) { td = curthread; td->td_intr_nesting_level++; oldframe = td->td_intr_frame; td->td_intr_frame = frame; lapic_et.et_event_cb(&lapic_et, lapic_et.et_arg); td->td_intr_frame = oldframe; td->td_intr_nesting_level--; } critical_exit(); } static void lapic_timer_set_divisor(u_int divisor) { KASSERT(powerof2(divisor), ("lapic: invalid divisor %u", divisor)); KASSERT(ffs(divisor) <= nitems(lapic_timer_divisors), ("lapic: invalid divisor %u", divisor)); lapic_write32(LAPIC_DCR_TIMER, lapic_timer_divisors[ffs(divisor) - 1]); } static void lapic_timer_oneshot(struct lapic *la) { uint32_t value; value = la->lvt_timer_base; value &= ~(APIC_LVTT_TM | APIC_LVT_M); value |= APIC_LVTT_TM_ONE_SHOT; la->lvt_timer_last = value; lapic_write32(LAPIC_LVT_TIMER, value); lapic_write32(LAPIC_ICR_TIMER, la->la_timer_period); } static void lapic_timer_oneshot_nointr(struct lapic *la, uint32_t count) { uint32_t value; value = la->lvt_timer_base; value &= ~APIC_LVTT_TM; value |= APIC_LVTT_TM_ONE_SHOT | APIC_LVT_M; la->lvt_timer_last = value; lapic_write32(LAPIC_LVT_TIMER, value); lapic_write32(LAPIC_ICR_TIMER, count); } static void lapic_timer_periodic(struct lapic *la) { uint32_t value; value = la->lvt_timer_base; value &= ~(APIC_LVTT_TM | APIC_LVT_M); value |= APIC_LVTT_TM_PERIODIC; la->lvt_timer_last = value; lapic_write32(LAPIC_LVT_TIMER, value); lapic_write32(LAPIC_ICR_TIMER, la->la_timer_period); } static void lapic_timer_deadline(struct lapic *la) { uint32_t value; value = la->lvt_timer_base; value &= ~(APIC_LVTT_TM | APIC_LVT_M); value |= APIC_LVTT_TM_TSCDLT; if (value != la->lvt_timer_last) { la->lvt_timer_last = value; lapic_write32_nofence(LAPIC_LVT_TIMER, value); if (!x2apic_mode) mfence(); } wrmsr(MSR_TSC_DEADLINE, la->la_timer_period + rdtsc()); } static void lapic_timer_stop(struct lapic *la) { uint32_t value; if (la->la_timer_mode == LAT_MODE_DEADLINE) { wrmsr(MSR_TSC_DEADLINE, 0); mfence(); } else { value = la->lvt_timer_base; value &= ~APIC_LVTT_TM; value |= APIC_LVT_M; la->lvt_timer_last = value; lapic_write32(LAPIC_LVT_TIMER, value); } } void lapic_handle_cmc(void) { lapic_eoi(); cmc_intr(); } /* * Called from the mca_init() to activate the CMC interrupt if this CPU is * responsible for monitoring any MC banks for CMC events. Since mca_init() * is called prior to lapic_setup() during boot, this just needs to unmask * this CPU's LVT_CMCI entry. */ static void native_lapic_enable_cmc(void) { u_int apic_id; #ifdef DEV_ATPIC if (!x2apic_mode && lapic_map == NULL) return; #endif apic_id = PCPU_GET(apic_id); KASSERT(lapics[apic_id].la_present, ("%s: missing APIC %u", __func__, apic_id)); lapics[apic_id].la_lvts[APIC_LVT_CMCI].lvt_masked = 0; lapics[apic_id].la_lvts[APIC_LVT_CMCI].lvt_active = 1; if (bootverbose) printf("lapic%u: CMCI unmasked\n", apic_id); } void lapic_handle_error(void) { uint32_t esr; /* * Read the contents of the error status register. Write to * the register first before reading from it to force the APIC * to update its value to indicate any errors that have * occurred since the previous write to the register. */ lapic_write32(LAPIC_ESR, 0); esr = lapic_read32(LAPIC_ESR); printf("CPU%d: local APIC error 0x%x\n", PCPU_GET(cpuid), esr); lapic_eoi(); } static u_int native_apic_cpuid(u_int apic_id) { #ifdef SMP return apic_cpuids[apic_id]; #else return 0; #endif } /* Request a free IDT vector to be used by the specified IRQ. */ static u_int native_apic_alloc_vector(u_int apic_id, u_int irq) { u_int vector; KASSERT(irq < NUM_IO_INTS, ("Invalid IRQ %u", irq)); /* * Search for a free vector. Currently we just use a very simple * algorithm to find the first free vector. */ mtx_lock_spin(&icu_lock); for (vector = 0; vector < APIC_NUM_IOINTS; vector++) { if (lapics[apic_id].la_ioint_irqs[vector] != -1) continue; lapics[apic_id].la_ioint_irqs[vector] = irq; mtx_unlock_spin(&icu_lock); return (vector + APIC_IO_INTS); } mtx_unlock_spin(&icu_lock); return (0); } /* * Request 'count' free contiguous IDT vectors to be used by 'count' * IRQs. 'count' must be a power of two and the vectors will be * aligned on a boundary of 'align'. If the request cannot be * satisfied, 0 is returned. */ static u_int native_apic_alloc_vectors(u_int apic_id, u_int *irqs, u_int count, u_int align) { u_int first, run, vector; KASSERT(powerof2(count), ("bad count")); KASSERT(powerof2(align), ("bad align")); KASSERT(align >= count, ("align < count")); #ifdef INVARIANTS for (run = 0; run < count; run++) KASSERT(irqs[run] < NUM_IO_INTS, ("Invalid IRQ %u at index %u", irqs[run], run)); #endif /* * Search for 'count' free vectors. As with apic_alloc_vector(), * this just uses a simple first fit algorithm. */ run = 0; first = 0; mtx_lock_spin(&icu_lock); for (vector = 0; vector < APIC_NUM_IOINTS; vector++) { /* Vector is in use, end run. */ if (lapics[apic_id].la_ioint_irqs[vector] != -1) { run = 0; first = 0; continue; } /* Start a new run if run == 0 and vector is aligned. */ if (run == 0) { if ((vector & (align - 1)) != 0) continue; first = vector; } run++; /* Keep looping if the run isn't long enough yet. */ if (run < count) continue; /* Found a run, assign IRQs and return the first vector. */ for (vector = 0; vector < count; vector++) lapics[apic_id].la_ioint_irqs[first + vector] = irqs[vector]; mtx_unlock_spin(&icu_lock); return (first + APIC_IO_INTS); } mtx_unlock_spin(&icu_lock); printf("APIC: Couldn't find APIC vectors for %u IRQs\n", count); return (0); } /* * Enable a vector for a particular apic_id. Since all lapics share idt * entries and ioint_handlers this enables the vector on all lapics. lapics * which do not have the vector configured would report spurious interrupts * should it fire. */ static void native_apic_enable_vector(u_int apic_id, u_int vector) { KASSERT(vector != IDT_SYSCALL, ("Attempt to overwrite syscall entry")); KASSERT(ioint_handlers[vector / 32] != NULL, ("No ISR handler for vector %u", vector)); #ifdef KDTRACE_HOOKS KASSERT(vector != IDT_DTRACE_RET, ("Attempt to overwrite DTrace entry")); #endif setidt(vector, ioint_handlers[vector / 32], SDT_APIC, SEL_KPL, GSEL_APIC); } static void native_apic_disable_vector(u_int apic_id, u_int vector) { KASSERT(vector != IDT_SYSCALL, ("Attempt to overwrite syscall entry")); #ifdef KDTRACE_HOOKS KASSERT(vector != IDT_DTRACE_RET, ("Attempt to overwrite DTrace entry")); #endif KASSERT(ioint_handlers[vector / 32] != NULL, ("No ISR handler for vector %u", vector)); #ifdef notyet /* * We can not currently clear the idt entry because other cpus * may have a valid vector at this offset. */ setidt(vector, &IDTVEC(rsvd), SDT_APICT, SEL_KPL, GSEL_APIC); #endif } /* Release an APIC vector when it's no longer in use. */ static void native_apic_free_vector(u_int apic_id, u_int vector, u_int irq) { struct thread *td; KASSERT(vector >= APIC_IO_INTS && vector != IDT_SYSCALL && vector <= APIC_IO_INTS + APIC_NUM_IOINTS, ("Vector %u does not map to an IRQ line", vector)); KASSERT(irq < NUM_IO_INTS, ("Invalid IRQ %u", irq)); KASSERT(lapics[apic_id].la_ioint_irqs[vector - APIC_IO_INTS] == irq, ("IRQ mismatch")); #ifdef KDTRACE_HOOKS KASSERT(vector != IDT_DTRACE_RET, ("Attempt to overwrite DTrace entry")); #endif /* * Bind us to the cpu that owned the vector before freeing it so * we don't lose an interrupt delivery race. */ td = curthread; if (!rebooting) { thread_lock(td); if (sched_is_bound(td)) panic("apic_free_vector: Thread already bound.\n"); sched_bind(td, apic_cpuid(apic_id)); thread_unlock(td); } mtx_lock_spin(&icu_lock); lapics[apic_id].la_ioint_irqs[vector - APIC_IO_INTS] = -1; mtx_unlock_spin(&icu_lock); if (!rebooting) { thread_lock(td); sched_unbind(td); thread_unlock(td); } } /* Map an IDT vector (APIC) to an IRQ (interrupt source). */ static u_int apic_idt_to_irq(u_int apic_id, u_int vector) { int irq; KASSERT(vector >= APIC_IO_INTS && vector != IDT_SYSCALL && vector <= APIC_IO_INTS + APIC_NUM_IOINTS, ("Vector %u does not map to an IRQ line", vector)); #ifdef KDTRACE_HOOKS KASSERT(vector != IDT_DTRACE_RET, ("Attempt to overwrite DTrace entry")); #endif irq = lapics[apic_id].la_ioint_irqs[vector - APIC_IO_INTS]; if (irq < 0) irq = 0; return (irq); } #ifdef DDB /* * Dump data about APIC IDT vector mappings. */ DB_SHOW_COMMAND(apic, db_show_apic) { struct intsrc *isrc; int i, verbose; u_int apic_id; u_int irq; if (strcmp(modif, "vv") == 0) verbose = 2; else if (strcmp(modif, "v") == 0) verbose = 1; else verbose = 0; for (apic_id = 0; apic_id <= MAX_APIC_ID; apic_id++) { if (lapics[apic_id].la_present == 0) continue; db_printf("Interrupts bound to lapic %u\n", apic_id); for (i = 0; i < APIC_NUM_IOINTS + 1 && !db_pager_quit; i++) { irq = lapics[apic_id].la_ioint_irqs[i]; if (irq == -1 || irq == IRQ_SYSCALL) continue; #ifdef KDTRACE_HOOKS if (irq == IRQ_DTRACE_RET) continue; #endif #ifdef XENHVM if (irq == IRQ_EVTCHN) continue; #endif db_printf("vec 0x%2x -> ", i + APIC_IO_INTS); if (irq == IRQ_TIMER) db_printf("lapic timer\n"); else if (irq < NUM_IO_INTS) { isrc = intr_lookup_source(irq); if (isrc == NULL || verbose == 0) db_printf("IRQ %u\n", irq); else db_dump_intr_event(isrc->is_event, verbose == 2); } else db_printf("IRQ %u ???\n", irq); } } } static void dump_mask(const char *prefix, uint32_t v, int base) { int i, first; first = 1; for (i = 0; i < 32; i++) if (v & (1 << i)) { if (first) { db_printf("%s:", prefix); first = 0; } db_printf(" %02x", base + i); } if (!first) db_printf("\n"); } /* Show info from the lapic regs for this CPU. */ DB_SHOW_COMMAND(lapic, db_show_lapic) { uint32_t v; db_printf("lapic ID = %d\n", lapic_id()); v = lapic_read32(LAPIC_VERSION); db_printf("version = %d.%d\n", (v & APIC_VER_VERSION) >> 4, v & 0xf); db_printf("max LVT = %d\n", (v & APIC_VER_MAXLVT) >> MAXLVTSHIFT); v = lapic_read32(LAPIC_SVR); db_printf("SVR = %02x (%s)\n", v & APIC_SVR_VECTOR, v & APIC_SVR_ENABLE ? "enabled" : "disabled"); db_printf("TPR = %02x\n", lapic_read32(LAPIC_TPR)); #define dump_field(prefix, regn, index) \ dump_mask(__XSTRING(prefix ## index), \ lapic_read32(LAPIC_ ## regn ## index), \ index * 32) db_printf("In-service Interrupts:\n"); dump_field(isr, ISR, 0); dump_field(isr, ISR, 1); dump_field(isr, ISR, 2); dump_field(isr, ISR, 3); dump_field(isr, ISR, 4); dump_field(isr, ISR, 5); dump_field(isr, ISR, 6); dump_field(isr, ISR, 7); db_printf("TMR Interrupts:\n"); dump_field(tmr, TMR, 0); dump_field(tmr, TMR, 1); dump_field(tmr, TMR, 2); dump_field(tmr, TMR, 3); dump_field(tmr, TMR, 4); dump_field(tmr, TMR, 5); dump_field(tmr, TMR, 6); dump_field(tmr, TMR, 7); db_printf("IRR Interrupts:\n"); dump_field(irr, IRR, 0); dump_field(irr, IRR, 1); dump_field(irr, IRR, 2); dump_field(irr, IRR, 3); dump_field(irr, IRR, 4); dump_field(irr, IRR, 5); dump_field(irr, IRR, 6); dump_field(irr, IRR, 7); #undef dump_field } #endif /* * APIC probing support code. This includes code to manage enumerators. */ static SLIST_HEAD(, apic_enumerator) enumerators = SLIST_HEAD_INITIALIZER(enumerators); static struct apic_enumerator *best_enum; void apic_register_enumerator(struct apic_enumerator *enumerator) { #ifdef INVARIANTS struct apic_enumerator *apic_enum; SLIST_FOREACH(apic_enum, &enumerators, apic_next) { if (apic_enum == enumerator) panic("%s: Duplicate register of %s", __func__, enumerator->apic_name); } #endif SLIST_INSERT_HEAD(&enumerators, enumerator, apic_next); } /* * We have to look for CPU's very, very early because certain subsystems * want to know how many CPU's we have extremely early on in the boot * process. */ static void apic_init(void *dummy __unused) { struct apic_enumerator *enumerator; int retval, best; /* We only support built in local APICs. */ if (!(cpu_feature & CPUID_APIC)) return; /* Don't probe if APIC mode is disabled. */ if (resource_disabled("apic", 0)) return; /* Probe all the enumerators to find the best match. */ best_enum = NULL; best = 0; SLIST_FOREACH(enumerator, &enumerators, apic_next) { retval = enumerator->apic_probe(); if (retval > 0) continue; if (best_enum == NULL || best < retval) { best_enum = enumerator; best = retval; } } if (best_enum == NULL) { if (bootverbose) printf("APIC: Could not find any APICs.\n"); #ifndef DEV_ATPIC panic("running without device atpic requires a local APIC"); #endif return; } if (bootverbose) printf("APIC: Using the %s enumerator.\n", best_enum->apic_name); #ifdef I686_CPU /* * To work around an errata, we disable the local APIC on some * CPUs during early startup. We need to turn the local APIC back * on on such CPUs now. */ ppro_reenable_apic(); #endif /* Probe the CPU's in the system. */ retval = best_enum->apic_probe_cpus(); if (retval != 0) printf("%s: Failed to probe CPUs: returned %d\n", best_enum->apic_name, retval); } SYSINIT(apic_init, SI_SUB_TUNABLES - 1, SI_ORDER_SECOND, apic_init, NULL); /* * Setup the local APIC. We have to do this prior to starting up the APs * in the SMP case. */ static void apic_setup_local(void *dummy __unused) { int retval; if (best_enum == NULL) return; /* Initialize the local APIC. */ retval = best_enum->apic_setup_local(); if (retval != 0) printf("%s: Failed to setup the local APIC: returned %d\n", best_enum->apic_name, retval); } SYSINIT(apic_setup_local, SI_SUB_CPU, SI_ORDER_SECOND, apic_setup_local, NULL); /* * Setup the I/O APICs. */ static void apic_setup_io(void *dummy __unused) { int retval; if (best_enum == NULL) return; /* * Local APIC must be registered before other PICs and pseudo PICs * for proper suspend/resume order. */ intr_register_pic(&lapic_pic); retval = best_enum->apic_setup_io(); if (retval != 0) printf("%s: Failed to setup I/O APICs: returned %d\n", best_enum->apic_name, retval); /* * Finish setting up the local APIC on the BSP once we know * how to properly program the LINT pins. In particular, this * enables the EOI suppression mode, if LAPIC support it and * user did not disabled the mode. */ lapic_setup(1); if (bootverbose) lapic_dump("BSP"); /* Enable the MSI "pic". */ init_ops.msi_init(); } SYSINIT(apic_setup_io, SI_SUB_INTR, SI_ORDER_THIRD, apic_setup_io, NULL); #ifdef SMP /* * Inter Processor Interrupt functions. The lapic_ipi_*() functions are * private to the MD code. The public interface for the rest of the * kernel is defined in mp_machdep.c. */ /* * Wait delay microseconds for IPI to be sent. If delay is -1, we * wait forever. */ static int native_lapic_ipi_wait(int delay) { uint64_t rx; /* LAPIC_ICR.APIC_DELSTAT_MASK is undefined in x2APIC mode */ if (x2apic_mode) return (1); for (rx = 0; delay == -1 || rx < lapic_ipi_wait_mult * delay; rx++) { if ((lapic_read_icr_lo() & APIC_DELSTAT_MASK) == APIC_DELSTAT_IDLE) return (1); ia32_pause(); } return (0); } static void native_lapic_ipi_raw(register_t icrlo, u_int dest) { uint64_t icr; uint32_t vhi, vlo; register_t saveintr; /* XXX: Need more sanity checking of icrlo? */ KASSERT(x2apic_mode || lapic_map != NULL, ("%s called too early", __func__)); KASSERT(x2apic_mode || (dest & ~(APIC_ID_MASK >> APIC_ID_SHIFT)) == 0, ("%s: invalid dest field", __func__)); KASSERT((icrlo & APIC_ICRLO_RESV_MASK) == 0, ("%s: reserved bits set in ICR LO register", __func__)); /* Set destination in ICR HI register if it is being used. */ if (!x2apic_mode) { saveintr = intr_disable(); icr = lapic_read_icr(); } if ((icrlo & APIC_DEST_MASK) == APIC_DEST_DESTFLD) { if (x2apic_mode) { vhi = dest; } else { vhi = icr >> 32; vhi &= ~APIC_ID_MASK; vhi |= dest << APIC_ID_SHIFT; } } else { vhi = 0; } /* Program the contents of the IPI and dispatch it. */ if (x2apic_mode) { vlo = icrlo; } else { vlo = icr; vlo &= APIC_ICRLO_RESV_MASK; vlo |= icrlo; } lapic_write_icr(vhi, vlo); if (!x2apic_mode) intr_restore(saveintr); } #define BEFORE_SPIN 50000 #ifdef DETECT_DEADLOCK #define AFTER_SPIN 50 #endif static void native_lapic_ipi_vectored(u_int vector, int dest) { register_t icrlo, destfield; KASSERT((vector & ~APIC_VECTOR_MASK) == 0, ("%s: invalid vector %d", __func__, vector)); icrlo = APIC_DESTMODE_PHY | APIC_TRIGMOD_EDGE | APIC_LEVEL_ASSERT; /* * NMI IPIs are just fake vectors used to send a NMI. Use special rules * regarding NMIs if passed, otherwise specify the vector. */ if (vector >= IPI_NMI_FIRST) icrlo |= APIC_DELMODE_NMI; else icrlo |= vector | APIC_DELMODE_FIXED; destfield = 0; switch (dest) { case APIC_IPI_DEST_SELF: icrlo |= APIC_DEST_SELF; break; case APIC_IPI_DEST_ALL: icrlo |= APIC_DEST_ALLISELF; break; case APIC_IPI_DEST_OTHERS: icrlo |= APIC_DEST_ALLESELF; break; default: KASSERT(x2apic_mode || (dest & ~(APIC_ID_MASK >> APIC_ID_SHIFT)) == 0, ("%s: invalid destination 0x%x", __func__, dest)); destfield = dest; } /* Wait for an earlier IPI to finish. */ if (!lapic_ipi_wait(BEFORE_SPIN)) { if (panicstr != NULL) return; else panic("APIC: Previous IPI is stuck"); } lapic_ipi_raw(icrlo, destfield); #ifdef DETECT_DEADLOCK /* Wait for IPI to be delivered. */ if (!lapic_ipi_wait(AFTER_SPIN)) { #ifdef needsattention /* * XXX FIXME: * * The above function waits for the message to actually be * delivered. It breaks out after an arbitrary timeout * since the message should eventually be delivered (at * least in theory) and that if it wasn't we would catch * the failure with the check above when the next IPI is * sent. * * We could skip this wait entirely, EXCEPT it probably * protects us from other routines that assume that the * message was delivered and acted upon when this function * returns. */ printf("APIC: IPI might be stuck\n"); #else /* !needsattention */ /* Wait until mesage is sent without a timeout. */ while (lapic_read_icr_lo() & APIC_DELSTAT_PEND) ia32_pause(); #endif /* needsattention */ } #endif /* DETECT_DEADLOCK */ } #endif /* SMP */ /* * Since the IDT is shared by all CPUs the IPI slot update needs to be globally * visible. * * Consider the case where an IPI is generated immediately after allocation: * vector = lapic_ipi_alloc(ipifunc); * ipi_selected(other_cpus, vector); * * In xAPIC mode a write to ICR_LO has serializing semantics because the * APIC page is mapped as an uncached region. In x2APIC mode there is an * explicit 'mfence' before the ICR MSR is written. Therefore in both cases * the IDT slot update is globally visible before the IPI is delivered. */ static int native_lapic_ipi_alloc(inthand_t *ipifunc) { struct gate_descriptor *ip; long func; int idx, vector; KASSERT(ipifunc != &IDTVEC(rsvd), ("invalid ipifunc %p", ipifunc)); vector = -1; mtx_lock_spin(&icu_lock); for (idx = IPI_DYN_FIRST; idx <= IPI_DYN_LAST; idx++) { ip = &idt[idx]; func = (ip->gd_hioffset << 16) | ip->gd_looffset; if (func == (uintptr_t)&IDTVEC(rsvd)) { vector = idx; setidt(vector, ipifunc, SDT_APIC, SEL_KPL, GSEL_APIC); break; } } mtx_unlock_spin(&icu_lock); return (vector); } static void native_lapic_ipi_free(int vector) { struct gate_descriptor *ip; long func; KASSERT(vector >= IPI_DYN_FIRST && vector <= IPI_DYN_LAST, ("%s: invalid vector %d", __func__, vector)); mtx_lock_spin(&icu_lock); ip = &idt[vector]; func = (ip->gd_hioffset << 16) | ip->gd_looffset; KASSERT(func != (uintptr_t)&IDTVEC(rsvd), ("invalid idtfunc %#lx", func)); setidt(vector, &IDTVEC(rsvd), SDT_APICT, SEL_KPL, GSEL_APIC); mtx_unlock_spin(&icu_lock); } Index: stable/11/sys/x86/xen/xen_apic.c =================================================================== --- stable/11/sys/x86/xen/xen_apic.c (revision 306627) +++ stable/11/sys/x86/xen/xen_apic.c (revision 306628) @@ -1,541 +1,549 @@ /* * Copyright (c) 2014 Roger Pau Monné * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /*--------------------------------- Macros -----------------------------------*/ #define XEN_APIC_UNSUPPORTED \ panic("%s: not available in Xen PV port.", __func__) /*--------------------------- Forward Declarations ---------------------------*/ #ifdef SMP static driver_filter_t xen_smp_rendezvous_action; static driver_filter_t xen_invltlb; static driver_filter_t xen_invlpg; static driver_filter_t xen_invlrng; static driver_filter_t xen_invlcache; static driver_filter_t xen_ipi_bitmap_handler; static driver_filter_t xen_cpustop_handler; static driver_filter_t xen_cpususpend_handler; static driver_filter_t xen_cpustophard_handler; #endif /*---------------------------------- Macros ----------------------------------*/ #define IPI_TO_IDX(ipi) ((ipi) - APIC_IPI_INTS) /*--------------------------------- Xen IPIs ---------------------------------*/ #ifdef SMP struct xen_ipi_handler { driver_filter_t *filter; const char *description; }; static struct xen_ipi_handler xen_ipis[] = { [IPI_TO_IDX(IPI_RENDEZVOUS)] = { xen_smp_rendezvous_action, "r" }, [IPI_TO_IDX(IPI_INVLTLB)] = { xen_invltlb, "itlb"}, [IPI_TO_IDX(IPI_INVLPG)] = { xen_invlpg, "ipg" }, [IPI_TO_IDX(IPI_INVLRNG)] = { xen_invlrng, "irg" }, [IPI_TO_IDX(IPI_INVLCACHE)] = { xen_invlcache, "ic" }, [IPI_TO_IDX(IPI_BITMAP_VECTOR)] = { xen_ipi_bitmap_handler, "b" }, [IPI_TO_IDX(IPI_STOP)] = { xen_cpustop_handler, "st" }, [IPI_TO_IDX(IPI_SUSPEND)] = { xen_cpususpend_handler, "sp" }, [IPI_TO_IDX(IPI_STOP_HARD)] = { xen_cpustophard_handler, "sth" }, }; #endif /*------------------------------- Per-CPU Data -------------------------------*/ #ifdef SMP DPCPU_DEFINE(xen_intr_handle_t, ipi_handle[nitems(xen_ipis)]); #endif /*------------------------------- Xen PV APIC --------------------------------*/ static void xen_pv_lapic_create(u_int apic_id, int boot_cpu) { #ifdef SMP cpu_add(apic_id, boot_cpu); #endif } static void xen_pv_lapic_init(vm_paddr_t addr) { } static void xen_pv_lapic_setup(int boot) { } static void xen_pv_lapic_dump(const char *str) { printf("cpu%d %s XEN PV LAPIC\n", PCPU_GET(cpuid), str); } static void xen_pv_lapic_disable(void) { } +static bool +xen_pv_lapic_is_x2apic(void) +{ + + return (false); +} + static void xen_pv_lapic_eoi(void) { XEN_APIC_UNSUPPORTED; } static int xen_pv_lapic_id(void) { return (PCPU_GET(apic_id)); } static int xen_pv_lapic_intr_pending(u_int vector) { XEN_APIC_UNSUPPORTED; return (0); } static u_int xen_pv_apic_cpuid(u_int apic_id) { #ifdef SMP return (apic_cpuids[apic_id]); #else return (0); #endif } static u_int xen_pv_apic_alloc_vector(u_int apic_id, u_int irq) { XEN_APIC_UNSUPPORTED; return (0); } static u_int xen_pv_apic_alloc_vectors(u_int apic_id, u_int *irqs, u_int count, u_int align) { XEN_APIC_UNSUPPORTED; return (0); } static void xen_pv_apic_disable_vector(u_int apic_id, u_int vector) { XEN_APIC_UNSUPPORTED; } static void xen_pv_apic_enable_vector(u_int apic_id, u_int vector) { XEN_APIC_UNSUPPORTED; } static void xen_pv_apic_free_vector(u_int apic_id, u_int vector, u_int irq) { XEN_APIC_UNSUPPORTED; } static void xen_pv_lapic_set_logical_id(u_int apic_id, u_int cluster, u_int cluster_id) { XEN_APIC_UNSUPPORTED; } static int xen_pv_lapic_enable_pmc(void) { XEN_APIC_UNSUPPORTED; return (0); } static void xen_pv_lapic_disable_pmc(void) { XEN_APIC_UNSUPPORTED; } static void xen_pv_lapic_reenable_pmc(void) { XEN_APIC_UNSUPPORTED; } static void xen_pv_lapic_enable_cmc(void) { } #ifdef SMP static void xen_pv_lapic_ipi_raw(register_t icrlo, u_int dest) { XEN_APIC_UNSUPPORTED; } static void xen_pv_lapic_ipi_vectored(u_int vector, int dest) { xen_intr_handle_t *ipi_handle; int ipi_idx, to_cpu, self; ipi_idx = IPI_TO_IDX(vector); if (ipi_idx >= nitems(xen_ipis)) panic("IPI out of range"); switch(dest) { case APIC_IPI_DEST_SELF: ipi_handle = DPCPU_GET(ipi_handle); xen_intr_signal(ipi_handle[ipi_idx]); break; case APIC_IPI_DEST_ALL: CPU_FOREACH(to_cpu) { ipi_handle = DPCPU_ID_GET(to_cpu, ipi_handle); xen_intr_signal(ipi_handle[ipi_idx]); } break; case APIC_IPI_DEST_OTHERS: self = PCPU_GET(cpuid); CPU_FOREACH(to_cpu) { if (to_cpu != self) { ipi_handle = DPCPU_ID_GET(to_cpu, ipi_handle); xen_intr_signal(ipi_handle[ipi_idx]); } } break; default: to_cpu = apic_cpuid(dest); ipi_handle = DPCPU_ID_GET(to_cpu, ipi_handle); xen_intr_signal(ipi_handle[ipi_idx]); break; } } static int xen_pv_lapic_ipi_wait(int delay) { XEN_APIC_UNSUPPORTED; return (0); } #endif /* SMP */ static int xen_pv_lapic_ipi_alloc(inthand_t *ipifunc) { XEN_APIC_UNSUPPORTED; return (-1); } static void xen_pv_lapic_ipi_free(int vector) { XEN_APIC_UNSUPPORTED; } static int xen_pv_lapic_set_lvt_mask(u_int apic_id, u_int lvt, u_char masked) { XEN_APIC_UNSUPPORTED; return (0); } static int xen_pv_lapic_set_lvt_mode(u_int apic_id, u_int lvt, uint32_t mode) { XEN_APIC_UNSUPPORTED; return (0); } static int xen_pv_lapic_set_lvt_polarity(u_int apic_id, u_int lvt, enum intr_polarity pol) { XEN_APIC_UNSUPPORTED; return (0); } static int xen_pv_lapic_set_lvt_triggermode(u_int apic_id, u_int lvt, enum intr_trigger trigger) { XEN_APIC_UNSUPPORTED; return (0); } /* Xen apic_ops implementation */ struct apic_ops xen_apic_ops = { .create = xen_pv_lapic_create, .init = xen_pv_lapic_init, .xapic_mode = xen_pv_lapic_disable, + .is_x2apic = xen_pv_lapic_is_x2apic, .setup = xen_pv_lapic_setup, .dump = xen_pv_lapic_dump, .disable = xen_pv_lapic_disable, .eoi = xen_pv_lapic_eoi, .id = xen_pv_lapic_id, .intr_pending = xen_pv_lapic_intr_pending, .set_logical_id = xen_pv_lapic_set_logical_id, .cpuid = xen_pv_apic_cpuid, .alloc_vector = xen_pv_apic_alloc_vector, .alloc_vectors = xen_pv_apic_alloc_vectors, .enable_vector = xen_pv_apic_enable_vector, .disable_vector = xen_pv_apic_disable_vector, .free_vector = xen_pv_apic_free_vector, .enable_pmc = xen_pv_lapic_enable_pmc, .disable_pmc = xen_pv_lapic_disable_pmc, .reenable_pmc = xen_pv_lapic_reenable_pmc, .enable_cmc = xen_pv_lapic_enable_cmc, #ifdef SMP .ipi_raw = xen_pv_lapic_ipi_raw, .ipi_vectored = xen_pv_lapic_ipi_vectored, .ipi_wait = xen_pv_lapic_ipi_wait, #endif .ipi_alloc = xen_pv_lapic_ipi_alloc, .ipi_free = xen_pv_lapic_ipi_free, .set_lvt_mask = xen_pv_lapic_set_lvt_mask, .set_lvt_mode = xen_pv_lapic_set_lvt_mode, .set_lvt_polarity = xen_pv_lapic_set_lvt_polarity, .set_lvt_triggermode = xen_pv_lapic_set_lvt_triggermode, }; #ifdef SMP /*---------------------------- XEN PV IPI Handlers ---------------------------*/ /* * These are C clones of the ASM functions found in apic_vector. */ static int xen_ipi_bitmap_handler(void *arg) { struct trapframe *frame; frame = arg; ipi_bitmap_handler(*frame); return (FILTER_HANDLED); } static int xen_smp_rendezvous_action(void *arg) { #ifdef COUNT_IPIS (*ipi_rendezvous_counts[PCPU_GET(cpuid)])++; #endif /* COUNT_IPIS */ smp_rendezvous_action(); return (FILTER_HANDLED); } static int xen_invltlb(void *arg) { invltlb_handler(); return (FILTER_HANDLED); } #ifdef __amd64__ static int xen_invltlb_invpcid(void *arg) { invltlb_invpcid_handler(); return (FILTER_HANDLED); } static int xen_invltlb_pcid(void *arg) { invltlb_pcid_handler(); return (FILTER_HANDLED); } #endif static int xen_invlpg(void *arg) { invlpg_handler(); return (FILTER_HANDLED); } static int xen_invlrng(void *arg) { invlrng_handler(); return (FILTER_HANDLED); } static int xen_invlcache(void *arg) { invlcache_handler(); return (FILTER_HANDLED); } static int xen_cpustop_handler(void *arg) { cpustop_handler(); return (FILTER_HANDLED); } static int xen_cpususpend_handler(void *arg) { cpususpend_handler(); return (FILTER_HANDLED); } static int xen_cpustophard_handler(void *arg) { ipi_nmi_handler(); return (FILTER_HANDLED); } /*----------------------------- XEN PV IPI setup -----------------------------*/ /* * Those functions are provided outside of the Xen PV APIC implementation * so PVHVM guests can also use PV IPIs without having an actual Xen PV APIC, * because on PVHVM there's an emulated LAPIC provided by Xen. */ static void xen_cpu_ipi_init(int cpu) { xen_intr_handle_t *ipi_handle; const struct xen_ipi_handler *ipi; device_t dev; int idx, rc; ipi_handle = DPCPU_ID_GET(cpu, ipi_handle); dev = pcpu_find(cpu)->pc_device; KASSERT((dev != NULL), ("NULL pcpu device_t")); for (ipi = xen_ipis, idx = 0; idx < nitems(xen_ipis); ipi++, idx++) { if (ipi->filter == NULL) { ipi_handle[idx] = NULL; continue; } rc = xen_intr_alloc_and_bind_ipi(dev, cpu, ipi->filter, INTR_TYPE_TTY, &ipi_handle[idx]); if (rc != 0) panic("Unable to allocate a XEN IPI port"); xen_intr_describe(ipi_handle[idx], "%s", ipi->description); } } static void xen_setup_cpus(void) { int i; if (!xen_vector_callback_enabled) return; #ifdef __amd64__ if (pmap_pcid_enabled) { xen_ipis[IPI_TO_IDX(IPI_INVLTLB)].filter = invpcid_works ? xen_invltlb_invpcid : xen_invltlb_pcid; } #endif CPU_FOREACH(i) xen_cpu_ipi_init(i); /* Set the xen pv ipi ops to replace the native ones */ if (xen_hvm_domain()) apic_ops.ipi_vectored = xen_pv_lapic_ipi_vectored; } /* We need to setup IPIs before APs are started */ SYSINIT(xen_setup_cpus, SI_SUB_SMP-1, SI_ORDER_FIRST, xen_setup_cpus, NULL); #endif /* SMP */ Index: stable/11 =================================================================== --- stable/11 (revision 306627) +++ stable/11 (revision 306628) Property changes on: stable/11 ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head:r305978