Index: head/sys/conf/options.i386 =================================================================== --- head/sys/conf/options.i386 (revision 145079) +++ head/sys/conf/options.i386 (revision 145080) @@ -1,163 +1,162 @@ # $FreeBSD$ # Options specific to the i386 platform kernels AUTO_EOI_1 opt_auto_eoi.h AUTO_EOI_2 opt_auto_eoi.h BROKEN_KEYBOARD_RESET opt_reset.h DISABLE_PG_G opt_pmap.h DISABLE_PSE opt_pmap.h I586_PMC_GUPROF opt_i586_guprof.h MAXMEM MPTABLE_FORCE_HTT -NO_MIXED_MODE PERFMON PMAP_SHPGPERPROC opt_pmap.h POWERFAIL_NMI opt_trap.h PPC_DEBUG opt_ppc.h PPC_PROBE_CHIPSET opt_ppc.h MP_WATCHDOG opt_mp_watchdog.h # Options for emulators. These should only be used at config time, so # they are handled like options for static filesystems # (see src/sys/conf/options), except for broken debugging options. COMPAT_AOUT opt_dontuse.h IBCS2 opt_dontuse.h COMPAT_LINUX opt_dontuse.h COMPAT_SVR4 opt_dontuse.h DEBUG_SVR4 opt_svr4.h NDISAPI opt_dontuse.h PECOFF_DEBUG opt_pecoff.h PECOFF_SUPPORT opt_dontuse.h # Change KVM size. Changes things all over the kernel. KVA_PAGES opt_global.h # Physical address extensions and support for >4G ram. As above. PAE opt_global.h CLK_CALIBRATION_LOOP opt_clock.h CLK_USE_I8254_CALIBRATION opt_clock.h TIMER_FREQ opt_clock.h CPU_ATHLON_SSE_HACK opt_cpu.h CPU_BLUELIGHTNING_3X opt_cpu.h CPU_BLUELIGHTNING_FPU_OP_CACHE opt_cpu.h CPU_BTB_EN opt_cpu.h CPU_CYRIX_NO_LOCK opt_cpu.h CPU_DIRECT_MAPPED_CACHE opt_cpu.h CPU_DISABLE_5X86_LSSER opt_cpu.h CPU_DISABLE_CMPXCHG opt_global.h # XXX global, unlike other CPU_* CPU_DISABLE_SSE opt_cpu.h CPU_ELAN opt_cpu.h CPU_ELAN_PPS opt_cpu.h CPU_ELAN_XTAL opt_cpu.h CPU_ENABLE_LONGRUN opt_cpu.h CPU_ENABLE_SSE opt_cpu.h CPU_FASTER_5X86_FPU opt_cpu.h CPU_GEODE opt_cpu.h CPU_I486_ON_386 opt_cpu.h CPU_IORT opt_cpu.h CPU_L2_LATENCY opt_cpu.h CPU_LOOP_EN opt_cpu.h CPU_PPRO2CELERON opt_cpu.h CPU_RSTK_EN opt_cpu.h CPU_SOEKRIS opt_cpu.h CPU_SUSP_HLT opt_cpu.h CPU_UPGRADE_HW_CACHE opt_cpu.h CPU_WT_ALLOC opt_cpu.h CYRIX_CACHE_REALLY_WORKS opt_cpu.h CYRIX_CACHE_WORKS opt_cpu.h NO_F00F_HACK opt_cpu.h NO_MEMORY_HOLE opt_cpu.h # The CPU type affects the endian conversion functions all over the kernel. I486_CPU opt_global.h I586_CPU opt_global.h I686_CPU opt_global.h VGA_ALT_SEQACCESS opt_vga.h VGA_DEBUG opt_vga.h VGA_NO_FONT_LOADING opt_vga.h VGA_NO_MODE_CHANGE opt_vga.h VGA_SLOW_IOACCESS opt_vga.h VGA_WIDTH90 opt_vga.h VESA VESA_DEBUG opt_vesa.h PSM_DEBUG opt_psm.h PSM_HOOKRESUME opt_psm.h PSM_RESETAFTERSUSPEND opt_psm.h ATKBD_DFLT_KEYMAP opt_atkbd.h # pcvt(4) has a bunch of options FAT_CURSOR opt_pcvt.h PCVT_123GENERIC opt_pcvt.h PCVT_24LINESDEF opt_pcvt.h PCVT_CTRL_ALT_DEL opt_pcvt.h PCVT_GREENSAVER opt_pcvt.h PCVT_INHIBIT_NUMLOCK opt_pcvt.h PCVT_META_ESC opt_pcvt.h PCVT_NO_LED_UPDATE opt_pcvt.h PCVT_NSCREENS opt_pcvt.h PCVT_NULLCHARS opt_pcvt.h PCVT_PRETTYSCRNS opt_pcvt.h PCVT_SCANSET opt_pcvt.h PCVT_SCREENSAVER opt_pcvt.h PCVT_SETCOLOR opt_pcvt.h PCVT_SHOWKEYS opt_pcvt.h PCVT_SLOW_INTERRUPT opt_pcvt.h PCVT_SYSBEEPF opt_pcvt.h PCVT_UPDATEFAST opt_pcvt.h PCVT_UPDATESLOW opt_pcvt.h PCVT_USEKBDSEC opt_pcvt.h PCVT_VT220KEYB opt_pcvt.h XSERVER opt_pcvt.h # Video spigot SPIGOT_UNSECURE opt_spigot.h # Enables NETGRAPH support for Cronyx adapters NETGRAPH_CRONYX opt_ng_cronyx.h # ------------------------------- # isdn4bsd: passive ISA cards # ------------------------------- TEL_S0_8 opt_i4b.h TEL_S0_16 opt_i4b.h TEL_S0_16_3 opt_i4b.h AVM_A1 opt_i4b.h USR_STI opt_i4b.h ITKIX1 opt_i4b.h ELSA_PCC16 opt_i4b.h # ------------------------------- # isdn4bsd: passive ISA PnP cards # ------------------------------- CRTX_S0_P opt_i4b.h DRN_NGO opt_i4b.h TEL_S0_16_3_P opt_i4b.h SEDLBAUER opt_i4b.h DYNALINK opt_i4b.h ASUSCOM_IPAC opt_i4b.h ELSA_QS1ISA opt_i4b.h SIEMENS_ISURF2 opt_i4b.h EICON_DIVA opt_i4b.h COMPAQ_M610 opt_i4b.h # ------------------------------- # isdn4bsd: passive PCI cards # ------------------------------- ELSA_QS1PCI opt_i4b.h # ------------------------------- # isdn4bsd: misc options # ------------------------------- # temporary workaround for SMP machines I4B_SMP_WORKAROUND opt_i4b.h # enable VJ compression code for ipr i/f IPR_VJ opt_i4b.h IPR_LOG opt_i4b.h # Device options DEV_APIC opt_apic.h DEV_NPX opt_npx.h ASR_COMPAT opt_asr.h Index: head/sys/i386/acpica/madt.c =================================================================== --- head/sys/i386/acpica/madt.c (revision 145079) +++ head/sys/i386/acpica/madt.c (revision 145080) @@ -1,782 +1,780 @@ /*- * Copyright (c) 2003 John Baldwin * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the author nor the names of any co-contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "acpi.h" #include #include #include #define NIOAPICS 32 /* Max number of I/O APICs */ #define NLAPICS 32 /* Max number of local APICs */ typedef void madt_entry_handler(APIC_HEADER *entry, void *arg); /* These two arrays are indexed by APIC IDs. */ struct ioapic_info { void *io_apic; UINT32 io_vector; } ioapics[NIOAPICS]; struct lapic_info { u_int la_enabled:1; u_int la_acpi_id:8; } lapics[NLAPICS]; static int madt_found_sci_override; static MULTIPLE_APIC_TABLE *madt; static vm_paddr_t madt_physaddr; static vm_offset_t madt_length; MALLOC_DEFINE(M_MADT, "MADT Table", "ACPI MADT Table Items"); static enum intr_polarity interrupt_polarity(UINT16 Polarity, UINT8 Source); static enum intr_trigger interrupt_trigger(UINT16 TriggerMode, UINT8 Source); static int madt_find_cpu(u_int acpi_id, u_int *apic_id); static int madt_find_interrupt(int intr, void **apic, u_int *pin); static void *madt_map(vm_paddr_t pa, int offset, vm_offset_t length); static void *madt_map_table(vm_paddr_t pa, int offset, const char *sig); static void madt_parse_apics(APIC_HEADER *entry, void *arg); static void madt_parse_interrupt_override(MADT_INTERRUPT_OVERRIDE *intr); static void madt_parse_ints(APIC_HEADER *entry, void *arg __unused); static void madt_parse_local_nmi(MADT_LOCAL_APIC_NMI *nmi); static void madt_parse_nmi(MADT_NMI_SOURCE *nmi); static int madt_probe(void); static int madt_probe_cpus(void); static void madt_probe_cpus_handler(APIC_HEADER *entry, void *arg __unused); static int madt_probe_table(vm_paddr_t address); static void madt_register(void *dummy); static int madt_setup_local(void); static int madt_setup_io(void); static void madt_unmap(void *data, vm_offset_t length); static void madt_unmap_table(void *table); static void madt_walk_table(madt_entry_handler *handler, void *arg); static struct apic_enumerator madt_enumerator = { "MADT", madt_probe, madt_probe_cpus, madt_setup_local, madt_setup_io }; /* * Code to abuse the crashdump map to map in the tables for the early * probe. We cheat and make the following assumptions about how we * use this KVA: page 0 is used to map in the first page of each table * found via the RSDT or XSDT and pages 1 to n are used to map in the * RSDT or XSDT. The offset is in pages; the length is in bytes. */ static void * madt_map(vm_paddr_t pa, int offset, vm_offset_t length) { vm_offset_t va, off; void *data; off = pa & PAGE_MASK; length = roundup(length + off, PAGE_SIZE); pa = pa & PG_FRAME; va = (vm_offset_t)pmap_kenter_temporary(pa, offset) + (offset * PAGE_SIZE); data = (void *)(va + off); length -= PAGE_SIZE; while (length > 0) { va += PAGE_SIZE; pa += PAGE_SIZE; length -= PAGE_SIZE; pmap_kenter(va, pa); invlpg(va); } return (data); } static void madt_unmap(void *data, vm_offset_t length) { vm_offset_t va, off; va = (vm_offset_t)data; off = va & PAGE_MASK; length = roundup(length + off, PAGE_SIZE); va &= ~PAGE_MASK; while (length > 0) { pmap_kremove(va); invlpg(va); va += PAGE_SIZE; length -= PAGE_SIZE; } } static void * madt_map_table(vm_paddr_t pa, int offset, const char *sig) { ACPI_TABLE_HEADER *header; vm_offset_t length; void *table; header = madt_map(pa, offset, sizeof(ACPI_TABLE_HEADER)); if (strncmp(header->Signature, sig, 4) != 0) { madt_unmap(header, sizeof(ACPI_TABLE_HEADER)); return (NULL); } length = header->Length; madt_unmap(header, sizeof(ACPI_TABLE_HEADER)); table = madt_map(pa, offset, length); if (ACPI_FAILURE(AcpiTbVerifyTableChecksum(table))) { if (bootverbose) printf("MADT: Failed checksum for table %s\n", sig); madt_unmap(table, length); return (NULL); } return (table); } static void madt_unmap_table(void *table) { ACPI_TABLE_HEADER *header; header = (ACPI_TABLE_HEADER *)table; madt_unmap(table, header->Length); } /* * Look for an ACPI Multiple APIC Description Table ("APIC") */ static int madt_probe(void) { ACPI_POINTER rsdp_ptr; RSDP_DESCRIPTOR *rsdp; RSDT_DESCRIPTOR *rsdt; XSDT_DESCRIPTOR *xsdt; int i, count; if (resource_disabled("acpi", 0)) return (ENXIO); /* * Map in the RSDP. Since ACPI uses AcpiOsMapMemory() which in turn * calls pmap_mapdev() to find the RSDP, we assume that we can use * pmap_mapdev() to map the RSDP. */ if (AcpiOsGetRootPointer(ACPI_LOGICAL_ADDRESSING, &rsdp_ptr) != AE_OK) return (ENXIO); #ifdef __i386__ KASSERT(rsdp_ptr.Pointer.Physical < KERNLOAD, ("RSDP too high")); #endif rsdp = pmap_mapdev(rsdp_ptr.Pointer.Physical, sizeof(RSDP_DESCRIPTOR)); if (rsdp == NULL) { if (bootverbose) printf("MADT: Failed to map RSDP\n"); return (ENXIO); } /* * For ACPI < 2.0, use the RSDT. For ACPI >= 2.0, use the XSDT. * We map the XSDT and RSDT at page 1 in the crashdump area. * Page 0 is used to map in the headers of candidate ACPI tables. */ if (rsdp->Revision >= 2) { /* * AcpiOsGetRootPointer only verifies the checksum for * the version 1.0 portion of the RSDP. Version 2.0 has * an additional checksum that we verify first. */ if (AcpiTbChecksum(rsdp, ACPI_RSDP_XCHECKSUM_LENGTH) != 0) { if (bootverbose) printf("MADT: RSDP failed extended checksum\n"); return (ENXIO); } xsdt = madt_map_table(rsdp->XsdtPhysicalAddress, 1, XSDT_SIG); if (xsdt == NULL) { if (bootverbose) printf("MADT: Failed to map XSDT\n"); return (ENXIO); } count = (xsdt->Length - sizeof(ACPI_TABLE_HEADER)) / sizeof(UINT64); for (i = 0; i < count; i++) if (madt_probe_table(xsdt->TableOffsetEntry[i])) break; madt_unmap_table(xsdt); } else { rsdt = madt_map_table(rsdp->RsdtPhysicalAddress, 1, RSDT_SIG); if (rsdt == NULL) { if (bootverbose) printf("MADT: Failed to map RSDT\n"); return (ENXIO); } count = (rsdt->Length - sizeof(ACPI_TABLE_HEADER)) / sizeof(UINT32); for (i = 0; i < count; i++) if (madt_probe_table(rsdt->TableOffsetEntry[i])) break; madt_unmap_table(rsdt); } pmap_unmapdev((vm_offset_t)rsdp, sizeof(RSDP_DESCRIPTOR)); if (madt_physaddr == 0) { if (bootverbose) printf("MADT: No MADT table found\n"); return (ENXIO); } if (bootverbose) printf("MADT: Found table at 0x%jx\n", (uintmax_t)madt_physaddr); /* * Verify that we can map the full table and that its checksum is * correct, etc. */ madt = madt_map_table(madt_physaddr, 0, APIC_SIG); if (madt == NULL) return (ENXIO); madt_unmap_table(madt); madt = NULL; return (0); } /* * See if a given ACPI table is the MADT. */ static int madt_probe_table(vm_paddr_t address) { ACPI_TABLE_HEADER *table; table = madt_map(address, 0, sizeof(ACPI_TABLE_HEADER)); if (table == NULL) { if (bootverbose) printf("MADT: Failed to map table at 0x%jx\n", (uintmax_t)address); return (0); } if (bootverbose) printf("Table '%.4s' at 0x%jx\n", table->Signature, (uintmax_t)address); if (strncmp(table->Signature, APIC_SIG, 4) != 0) { madt_unmap(table, sizeof(ACPI_TABLE_HEADER)); return (0); } madt_physaddr = address; madt_length = table->Length; madt_unmap(table, sizeof(ACPI_TABLE_HEADER)); return (1); } /* * Run through the MP table enumerating CPUs. */ static int madt_probe_cpus(void) { madt = madt_map_table(madt_physaddr, 0, APIC_SIG); KASSERT(madt != NULL, ("Unable to re-map MADT")); madt_walk_table(madt_probe_cpus_handler, NULL); madt_unmap_table(madt); madt = NULL; return (0); } /* * Initialize the local APIC on the BSP. */ static int madt_setup_local(void) { madt = pmap_mapdev(madt_physaddr, madt_length); lapic_init((uintptr_t)madt->LocalApicAddress); printf("ACPI APIC Table: <%.*s %.*s>\n", (int)sizeof(madt->OemId), madt->OemId, (int)sizeof(madt->OemTableId), madt->OemTableId); /* * We ignore 64-bit local APIC override entries. Should we * perhaps emit a warning here if we find one? */ return (0); } /* * Enumerate I/O APICs and setup interrupt sources. */ static int madt_setup_io(void) { void *ioapic; u_int pin; int i; /* Try to initialize ACPI so that we can access the FADT. */ i = acpi_Startup(); if (ACPI_FAILURE(i)) { printf("MADT: ACPI Startup failed with %s\n", AcpiFormatException(i)); printf("Try disabling either ACPI or apic support.\n"); panic("Using MADT but ACPI doesn't work"); } /* First, we run through adding I/O APIC's. */ - if (madt->PCATCompat && !(acpi_quirks & ACPI_Q_MADT_IRQ0)) - ioapic_enable_mixed_mode(); madt_walk_table(madt_parse_apics, NULL); /* Second, we run through the table tweaking interrupt sources. */ madt_walk_table(madt_parse_ints, NULL); /* * If there was not an explicit override entry for the SCI, * force it to use level trigger and active-low polarity. */ if (!madt_found_sci_override) { if (madt_find_interrupt(AcpiGbl_FADT->SciInt, &ioapic, &pin) != 0) printf("MADT: Could not find APIC for SCI IRQ %d\n", AcpiGbl_FADT->SciInt); else { printf( "MADT: Forcing active-low polarity and level trigger for SCI\n"); ioapic_set_polarity(ioapic, pin, INTR_POLARITY_LOW); ioapic_set_triggermode(ioapic, pin, INTR_TRIGGER_LEVEL); } } /* Third, we register all the I/O APIC's. */ for (i = 0; i < NIOAPICS; i++) if (ioapics[i].io_apic != NULL) ioapic_register(ioapics[i].io_apic); /* Finally, we throw the switch to enable the I/O APIC's. */ acpi_SetDefaultIntrModel(ACPI_INTR_APIC); return (0); } static void madt_register(void *dummy __unused) { apic_register_enumerator(&madt_enumerator); } SYSINIT(madt_register, SI_SUB_CPU - 1, SI_ORDER_FIRST, madt_register, NULL) /* * Call the handler routine for each entry in the MADT table. */ static void madt_walk_table(madt_entry_handler *handler, void *arg) { APIC_HEADER *entry; u_char *p, *end; end = (u_char *)(madt) + madt->Length; for (p = (u_char *)(madt + 1); p < end; ) { entry = (APIC_HEADER *)p; handler(entry, arg); p += entry->Length; } } static void madt_probe_cpus_handler(APIC_HEADER *entry, void *arg) { MADT_PROCESSOR_APIC *proc; struct lapic_info *la; switch (entry->Type) { case APIC_PROCESSOR: /* * The MADT does not include a BSP flag, so we have to * let the MP code figure out which CPU is the BSP on * its own. */ proc = (MADT_PROCESSOR_APIC *)entry; if (bootverbose) printf("MADT: Found CPU APIC ID %d ACPI ID %d: %s\n", proc->LocalApicId, proc->ProcessorId, proc->ProcessorEnabled ? "enabled" : "disabled"); if (!proc->ProcessorEnabled) break; if (proc->LocalApicId >= NLAPICS) panic("%s: CPU ID %d too high", __func__, proc->LocalApicId); la = &lapics[proc->LocalApicId]; KASSERT(la->la_enabled == 0, ("Duplicate local APIC ID %d", proc->LocalApicId)); la->la_enabled = 1; la->la_acpi_id = proc->ProcessorId; lapic_create(proc->LocalApicId, 0); break; } } /* * Add an I/O APIC from an entry in the table. */ static void madt_parse_apics(APIC_HEADER *entry, void *arg __unused) { MADT_IO_APIC *apic; switch (entry->Type) { case APIC_IO: apic = (MADT_IO_APIC *)entry; if (bootverbose) printf("MADT: Found IO APIC ID %d, Interrupt %d at %p\n", apic->IoApicId, apic->Interrupt, (void *)(uintptr_t)apic->Address); if (apic->IoApicId >= NIOAPICS) panic("%s: I/O APIC ID %d too high", __func__, apic->IoApicId); if (ioapics[apic->IoApicId].io_apic != NULL) panic("%s: Double APIC ID %d", __func__, apic->IoApicId); ioapics[apic->IoApicId].io_apic = ioapic_create( (uintptr_t)apic->Address, apic->IoApicId, apic->Interrupt); ioapics[apic->IoApicId].io_vector = apic->Interrupt; break; default: break; } } /* * Determine properties of an interrupt source. Note that for ACPI these * functions are only used for ISA interrupts, so we assume ISA bus values * (Active Hi, Edge Triggered) for conforming values except for the ACPI * SCI for which we use Active Lo, Level Triggered. */ static enum intr_polarity interrupt_polarity(UINT16 Polarity, UINT8 Source) { switch (Polarity) { case POLARITY_CONFORMS: if (Source == AcpiGbl_FADT->SciInt) return (INTR_POLARITY_LOW); else return (INTR_POLARITY_HIGH); case POLARITY_ACTIVE_HIGH: return (INTR_POLARITY_HIGH); case POLARITY_ACTIVE_LOW: return (INTR_POLARITY_LOW); default: panic("Bogus Interrupt Polarity"); } } static enum intr_trigger interrupt_trigger(UINT16 TriggerMode, UINT8 Source) { switch (TriggerMode) { case TRIGGER_CONFORMS: if (Source == AcpiGbl_FADT->SciInt) return (INTR_TRIGGER_LEVEL); else return (INTR_TRIGGER_EDGE); case TRIGGER_EDGE: return (INTR_TRIGGER_EDGE); case TRIGGER_LEVEL: return (INTR_TRIGGER_LEVEL); default: panic("Bogus Interrupt Trigger Mode"); } } /* * Find the local APIC ID associated with a given ACPI Processor ID. */ static int madt_find_cpu(u_int acpi_id, u_int *apic_id) { int i; for (i = 0; i < NLAPICS; i++) { if (!lapics[i].la_enabled) continue; if (lapics[i].la_acpi_id != acpi_id) continue; *apic_id = i; return (0); } return (ENOENT); } /* * Find the IO APIC and pin on that APIC associated with a given global * interrupt. */ static int madt_find_interrupt(int intr, void **apic, u_int *pin) { int i, best; best = -1; for (i = 0; i < NIOAPICS; i++) { if (ioapics[i].io_apic == NULL || ioapics[i].io_vector > intr) continue; if (best == -1 || ioapics[best].io_vector < ioapics[i].io_vector) best = i; } if (best == -1) return (ENOENT); *apic = ioapics[best].io_apic; *pin = intr - ioapics[best].io_vector; if (*pin > 32) printf("WARNING: Found intpin of %u for vector %d\n", *pin, intr); return (0); } /* * Parse an interrupt source override for an ISA interrupt. */ static void madt_parse_interrupt_override(MADT_INTERRUPT_OVERRIDE *intr) { void *new_ioapic, *old_ioapic; u_int new_pin, old_pin; enum intr_trigger trig; enum intr_polarity pol; char buf[64]; if (acpi_quirks & ACPI_Q_MADT_IRQ0 && intr->Source == 0 && intr->Interrupt == 2) { if (bootverbose) printf("MADT: Skipping timer override\n"); return; } if (bootverbose) printf("MADT: Interrupt override: source %u, irq %u\n", intr->Source, intr->Interrupt); KASSERT(intr->Bus == 0, ("bus for interrupt overrides must be zero")); if (madt_find_interrupt(intr->Interrupt, &new_ioapic, &new_pin) != 0) { printf("MADT: Could not find APIC for vector %d (IRQ %d)\n", intr->Interrupt, intr->Source); return; } /* * Lookup the appropriate trigger and polarity modes for this * entry. */ trig = interrupt_trigger(intr->TriggerMode, intr->Source); pol = interrupt_polarity(intr->Polarity, intr->Source); /* * If the SCI is identity mapped but has edge trigger and * active-hi polarity or the force_sci_lo tunable is set, * force it to use level/lo. */ if (intr->Source == AcpiGbl_FADT->SciInt) { madt_found_sci_override = 1; if (getenv_string("hw.acpi.sci.trigger", buf, sizeof(buf))) { if (tolower(buf[0]) == 'e') trig = INTR_TRIGGER_EDGE; else if (tolower(buf[0]) == 'l') trig = INTR_TRIGGER_LEVEL; else panic( "Invalid trigger %s: must be 'edge' or 'level'", buf); printf("MADT: Forcing SCI to %s trigger\n", trig == INTR_TRIGGER_EDGE ? "edge" : "level"); } if (getenv_string("hw.acpi.sci.polarity", buf, sizeof(buf))) { if (tolower(buf[0]) == 'h') pol = INTR_POLARITY_HIGH; else if (tolower(buf[0]) == 'l') pol = INTR_POLARITY_LOW; else panic( "Invalid polarity %s: must be 'high' or 'low'", buf); printf("MADT: Forcing SCI to active %s polarity\n", pol == INTR_POLARITY_HIGH ? "high" : "low"); } } /* Remap the IRQ if it is mapped to a different interrupt vector. */ if (intr->Source != intr->Interrupt) { /* * If the SCI is remapped to a non-ISA global interrupt, * then override the vector we use to setup and allocate * the interrupt. */ if (intr->Interrupt > 15 && intr->Source == AcpiGbl_FADT->SciInt) acpi_OverrideInterruptLevel(intr->Interrupt); else ioapic_remap_vector(new_ioapic, new_pin, intr->Source); if (madt_find_interrupt(intr->Source, &old_ioapic, &old_pin) != 0) printf("MADT: Could not find APIC for source IRQ %d\n", intr->Source); else if (ioapic_get_vector(old_ioapic, old_pin) == intr->Source) ioapic_disable_pin(old_ioapic, old_pin); } /* Program the polarity and trigger mode. */ ioapic_set_triggermode(new_ioapic, new_pin, trig); ioapic_set_polarity(new_ioapic, new_pin, pol); } /* * Parse an entry for an NMI routed to an IO APIC. */ static void madt_parse_nmi(MADT_NMI_SOURCE *nmi) { void *ioapic; u_int pin; if (madt_find_interrupt(nmi->Interrupt, &ioapic, &pin) != 0) { printf("MADT: Could not find APIC for vector %d\n", nmi->Interrupt); return; } ioapic_set_nmi(ioapic, pin); if (nmi->TriggerMode != TRIGGER_CONFORMS) ioapic_set_triggermode(ioapic, pin, interrupt_trigger(nmi->TriggerMode, 0)); if (nmi->Polarity != TRIGGER_CONFORMS) ioapic_set_polarity(ioapic, pin, interrupt_polarity(nmi->Polarity, 0)); } /* * Parse an entry for an NMI routed to a local APIC LVT pin. */ static void madt_parse_local_nmi(MADT_LOCAL_APIC_NMI *nmi) { u_int apic_id, pin; if (nmi->ProcessorId == 0xff) apic_id = APIC_ID_ALL; else if (madt_find_cpu(nmi->ProcessorId, &apic_id) != 0) { if (bootverbose) printf("MADT: Ignoring local NMI routed to ACPI CPU %u\n", nmi->ProcessorId); return; } if (nmi->Lint == 0) pin = LVT_LINT0; else pin = LVT_LINT1; lapic_set_lvt_mode(apic_id, pin, APIC_LVT_DM_NMI); if (nmi->TriggerMode != TRIGGER_CONFORMS) lapic_set_lvt_triggermode(apic_id, pin, interrupt_trigger(nmi->TriggerMode, 0)); if (nmi->Polarity != POLARITY_CONFORMS) lapic_set_lvt_polarity(apic_id, pin, interrupt_polarity(nmi->Polarity, 0)); } /* * Parse interrupt entries. */ static void madt_parse_ints(APIC_HEADER *entry, void *arg __unused) { switch (entry->Type) { case APIC_XRUPT_OVERRIDE: madt_parse_interrupt_override( (MADT_INTERRUPT_OVERRIDE *)entry); break; case APIC_NMI: madt_parse_nmi((MADT_NMI_SOURCE *)entry); break; case APIC_LOCAL_NMI: madt_parse_local_nmi((MADT_LOCAL_APIC_NMI *)entry); break; } } /* * Setup per-CPU ACPI IDs. */ static void madt_set_ids(void *dummy) { struct lapic_info *la; struct pcpu *pc; u_int i; if (madt == NULL) return; for (i = 0; i <= mp_maxid; i++) { if (CPU_ABSENT(i)) continue; pc = pcpu_find(i); KASSERT(pc != NULL, ("no pcpu data for CPU %d", i)); la = &lapics[pc->pc_apic_id]; if (!la->la_enabled) panic("APIC: CPU with APIC ID %u is not enabled", pc->pc_apic_id); pc->pc_acpi_id = la->la_acpi_id; if (bootverbose) printf("APIC: CPU %u has ACPI ID %u\n", i, la->la_acpi_id); } } SYSINIT(madt_set_ids, SI_SUB_CPU, SI_ORDER_ANY, madt_set_ids, NULL) Index: head/sys/i386/i386/io_apic.c =================================================================== --- head/sys/i386/i386/io_apic.c (revision 145079) +++ head/sys/i386/i386/io_apic.c (revision 145080) @@ -1,860 +1,779 @@ /*- * Copyright (c) 2003 John Baldwin * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the author nor the names of any co-contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_isa.h" -#include "opt_no_mixed_mode.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define IOAPIC_ISA_INTS 16 #define IOAPIC_MEM_REGION 32 #define IOAPIC_REDTBL_LO(i) (IOAPIC_REDTBL + (i) * 2) #define IOAPIC_REDTBL_HI(i) (IOAPIC_REDTBL_LO(i) + 1) #define VECTOR_EXTINT 252 #define VECTOR_NMI 253 #define VECTOR_SMI 254 #define VECTOR_DISABLED 255 #define DEST_NONE -1 -#define DEST_EXTINT -2 #define TODO printf("%s: not implemented!\n", __func__) static MALLOC_DEFINE(M_IOAPIC, "I/O APIC", "I/O APIC structures"); /* * New interrupt support code.. * * XXX: we really should have the interrupt cookie passed up from new-bus * just be a int pin, and not map 1:1 to interrupt vector number but should * use INTR_TYPE_FOO to set priority bands for device classes and do all the * magic remapping of intpin to vector in here. For now we just cheat as on * ia64 and map intpin X to vector NRSVIDT + X. Note that we assume that the * first IO APIC has ISA interrupts on pins 1-15. Not sure how you are * really supposed to figure out which IO APIC in a system with multiple IO * APIC's actually has the ISA interrupts routed to it. As far as interrupt * pin numbers, we use the ACPI System Interrupt number model where each * IO APIC has a contiguous chunk of the System Interrupt address space. */ -/* - * Direct the ExtINT pin on the first I/O APIC to a logical cluster of - * CPUs rather than a physical destination of just the BSP. - * - * Note: This is disabled by default as test systems seem to croak with it - * enabled. -#define ENABLE_EXTINT_LOGICAL_DESTINATION - */ - struct ioapic_intsrc { struct intsrc io_intsrc; u_int io_intpin:8; u_int io_vector:8; u_int io_activehi:1; u_int io_edgetrigger:1; u_int io_masked:1; int io_dest:5; int io_bus:4; }; struct ioapic { struct pic io_pic; u_int io_id:8; /* logical ID */ u_int io_apic_id:4; u_int io_intbase:8; /* System Interrupt base */ u_int io_numintr:8; volatile ioapic_t *io_addr; /* XXX: should use bus_space */ STAILQ_ENTRY(ioapic) io_next; struct ioapic_intsrc io_pins[0]; }; static u_int ioapic_read(volatile ioapic_t *apic, int reg); static void ioapic_write(volatile ioapic_t *apic, int reg, u_int val); static const char *ioapic_bus_string(int bus_type); static void ioapic_print_vector(struct ioapic_intsrc *intpin); static void ioapic_enable_source(struct intsrc *isrc); static void ioapic_disable_source(struct intsrc *isrc, int eoi); static void ioapic_eoi_source(struct intsrc *isrc); static void ioapic_enable_intr(struct intsrc *isrc); static int ioapic_vector(struct intsrc *isrc); static int ioapic_source_pending(struct intsrc *isrc); static int ioapic_config_intr(struct intsrc *isrc, enum intr_trigger trig, enum intr_polarity pol); static void ioapic_suspend(struct intsrc *isrc); static void ioapic_resume(struct intsrc *isrc); static void ioapic_program_destination(struct ioapic_intsrc *intpin); static void ioapic_program_intpin(struct ioapic_intsrc *intpin); -static void ioapic_setup_mixed_mode(struct ioapic_intsrc *intpin); static STAILQ_HEAD(,ioapic) ioapic_list = STAILQ_HEAD_INITIALIZER(ioapic_list); struct pic ioapic_template = { ioapic_enable_source, ioapic_disable_source, ioapic_eoi_source, ioapic_enable_intr, ioapic_vector, ioapic_source_pending, ioapic_suspend, ioapic_resume, ioapic_config_intr }; static int bsp_id, current_cluster, logical_clusters, next_ioapic_base; -static u_int mixed_mode_enabled, next_id, program_logical_dest; -#ifdef NO_MIXED_MODE -static int mixed_mode_active = 0; -#else -static int mixed_mode_active = 1; -#endif -TUNABLE_INT("hw.apic.mixed_mode", &mixed_mode_active); +static u_int next_id, program_logical_dest; static __inline void _ioapic_eoi_source(struct intsrc *isrc) { lapic_eoi(); } static u_int ioapic_read(volatile ioapic_t *apic, int reg) { mtx_assert(&icu_lock, MA_OWNED); apic->ioregsel = reg; return (apic->iowin); } static void ioapic_write(volatile ioapic_t *apic, int reg, u_int val) { mtx_assert(&icu_lock, MA_OWNED); apic->ioregsel = reg; apic->iowin = val; } static const char * ioapic_bus_string(int bus_type) { switch (bus_type) { case APIC_BUS_ISA: return ("ISA"); case APIC_BUS_EISA: return ("EISA"); case APIC_BUS_PCI: return ("PCI"); default: return ("unknown"); } } static void ioapic_print_vector(struct ioapic_intsrc *intpin) { switch (intpin->io_vector) { case VECTOR_DISABLED: printf("disabled"); break; case VECTOR_EXTINT: printf("ExtINT"); break; case VECTOR_NMI: printf("NMI"); break; case VECTOR_SMI: printf("SMI"); break; default: printf("%s IRQ %u", ioapic_bus_string(intpin->io_bus), intpin->io_vector); } } static void ioapic_enable_source(struct intsrc *isrc) { struct ioapic_intsrc *intpin = (struct ioapic_intsrc *)isrc; struct ioapic *io = (struct ioapic *)isrc->is_pic; uint32_t flags; mtx_lock_spin(&icu_lock); if (intpin->io_masked) { flags = ioapic_read(io->io_addr, IOAPIC_REDTBL_LO(intpin->io_intpin)); flags &= ~(IOART_INTMASK); ioapic_write(io->io_addr, IOAPIC_REDTBL_LO(intpin->io_intpin), flags); intpin->io_masked = 0; } mtx_unlock_spin(&icu_lock); } static void ioapic_disable_source(struct intsrc *isrc, int eoi) { struct ioapic_intsrc *intpin = (struct ioapic_intsrc *)isrc; struct ioapic *io = (struct ioapic *)isrc->is_pic; uint32_t flags; mtx_lock_spin(&icu_lock); if (!intpin->io_masked && !intpin->io_edgetrigger) { flags = ioapic_read(io->io_addr, IOAPIC_REDTBL_LO(intpin->io_intpin)); flags |= IOART_INTMSET; ioapic_write(io->io_addr, IOAPIC_REDTBL_LO(intpin->io_intpin), flags); intpin->io_masked = 1; } if (eoi == PIC_EOI) _ioapic_eoi_source(isrc); mtx_unlock_spin(&icu_lock); } static void ioapic_eoi_source(struct intsrc *isrc) { _ioapic_eoi_source(isrc); } /* * Completely program an intpin based on the data in its interrupt source * structure. */ static void ioapic_program_intpin(struct ioapic_intsrc *intpin) { struct ioapic *io = (struct ioapic *)intpin->io_intsrc.is_pic; uint32_t low, high, value; - /* - * For pins routed via mixed mode or disabled, just ensure that - * they are masked. - */ - if (intpin->io_dest == DEST_EXTINT || - intpin->io_vector == VECTOR_DISABLED) { + /* For disabled pins, just ensure that they are masked. */ + if (intpin->io_vector == VECTOR_DISABLED) { low = ioapic_read(io->io_addr, IOAPIC_REDTBL_LO(intpin->io_intpin)); if ((low & IOART_INTMASK) == IOART_INTMCLR) ioapic_write(io->io_addr, IOAPIC_REDTBL_LO(intpin->io_intpin), low | IOART_INTMSET); return; } /* Set the destination. */ if (intpin->io_dest == DEST_NONE) { low = IOART_DESTPHY; high = bsp_id << APIC_ID_SHIFT; } else { low = IOART_DESTLOG; high = (intpin->io_dest << APIC_ID_CLUSTER_SHIFT | APIC_ID_CLUSTER_ID) << APIC_ID_SHIFT; } /* Program the rest of the low word. */ if (intpin->io_edgetrigger) low |= IOART_TRGREDG; else low |= IOART_TRGRLVL; if (intpin->io_activehi) low |= IOART_INTAHI; else low |= IOART_INTALO; if (intpin->io_masked) low |= IOART_INTMSET; switch (intpin->io_vector) { case VECTOR_EXTINT: KASSERT(intpin->io_edgetrigger, ("EXTINT not edge triggered")); low |= IOART_DELEXINT; break; case VECTOR_NMI: KASSERT(intpin->io_edgetrigger, ("NMI not edge triggered")); low |= IOART_DELNMI; break; case VECTOR_SMI: KASSERT(intpin->io_edgetrigger, ("SMI not edge triggered")); low |= IOART_DELSMI; break; default: low |= IOART_DELLOPRI | apic_irq_to_idt(intpin->io_vector); } /* Write the values to the APIC. */ mtx_lock_spin(&icu_lock); ioapic_write(io->io_addr, IOAPIC_REDTBL_LO(intpin->io_intpin), low); value = ioapic_read(io->io_addr, IOAPIC_REDTBL_HI(intpin->io_intpin)); value &= ~IOART_DEST; value |= high; ioapic_write(io->io_addr, IOAPIC_REDTBL_HI(intpin->io_intpin), value); mtx_unlock_spin(&icu_lock); } /* * Program an individual intpin's logical destination. */ static void ioapic_program_destination(struct ioapic_intsrc *intpin) { struct ioapic *io = (struct ioapic *)intpin->io_intsrc.is_pic; KASSERT(intpin->io_dest != DEST_NONE, ("intpin not assigned to a cluster")); - KASSERT(intpin->io_dest != DEST_EXTINT, - ("intpin routed via ExtINT")); if (bootverbose) { printf("ioapic%u: routing intpin %u (", io->io_id, intpin->io_intpin); ioapic_print_vector(intpin); printf(") to cluster %u\n", intpin->io_dest); } ioapic_program_intpin(intpin); } static void ioapic_assign_cluster(struct ioapic_intsrc *intpin) { /* * Assign this intpin to a logical APIC cluster in a * round-robin fashion. We don't actually use the logical * destination for this intpin until after all the CPU's * have been started so that we don't end up with interrupts * that don't go anywhere. Another alternative might be to * start up the CPU's earlier so that they can handle interrupts * sooner. */ intpin->io_dest = current_cluster; current_cluster++; if (current_cluster >= logical_clusters) current_cluster = 0; if (program_logical_dest) ioapic_program_destination(intpin); } static void ioapic_enable_intr(struct intsrc *isrc) { struct ioapic_intsrc *intpin = (struct ioapic_intsrc *)isrc; - KASSERT(intpin->io_dest != DEST_EXTINT, - ("ExtINT pin trying to use ioapic enable_intr method")); if (intpin->io_dest == DEST_NONE) { ioapic_assign_cluster(intpin); lapic_enable_intr(intpin->io_vector); } } static int ioapic_vector(struct intsrc *isrc) { struct ioapic_intsrc *pin; pin = (struct ioapic_intsrc *)isrc; return (pin->io_vector); } static int ioapic_source_pending(struct intsrc *isrc) { struct ioapic_intsrc *intpin = (struct ioapic_intsrc *)isrc; return (lapic_intr_pending(intpin->io_vector)); } static int ioapic_config_intr(struct intsrc *isrc, enum intr_trigger trig, enum intr_polarity pol) { struct ioapic_intsrc *intpin = (struct ioapic_intsrc *)isrc; struct ioapic *io = (struct ioapic *)isrc->is_pic; int changed; KASSERT(!(trig == INTR_TRIGGER_CONFORM || pol == INTR_POLARITY_CONFORM), ("%s: Conforming trigger or polarity\n", __func__)); /* * EISA interrupts always use active high polarity, so don't allow * them to be set to active low. * * XXX: Should we write to the ELCR if the trigger mode changes for * an EISA IRQ or an ISA IRQ with the ELCR present? */ if (intpin->io_bus == APIC_BUS_EISA) pol = INTR_POLARITY_HIGH; changed = 0; if (intpin->io_edgetrigger != (trig == INTR_TRIGGER_EDGE)) { if (bootverbose) printf("ioapic%u: Changing trigger for pin %u to %s\n", io->io_id, intpin->io_intpin, trig == INTR_TRIGGER_EDGE ? "edge" : "level"); intpin->io_edgetrigger = (trig == INTR_TRIGGER_EDGE); changed++; } if (intpin->io_activehi != (pol == INTR_POLARITY_HIGH)) { if (bootverbose) printf("ioapic%u: Changing polarity for pin %u to %s\n", io->io_id, intpin->io_intpin, pol == INTR_POLARITY_HIGH ? "high" : "low"); intpin->io_activehi = (pol == INTR_POLARITY_HIGH); changed++; } if (changed) ioapic_program_intpin(intpin); return (0); } static void ioapic_suspend(struct intsrc *isrc) { TODO; } static void ioapic_resume(struct intsrc *isrc) { ioapic_program_intpin((struct ioapic_intsrc *)isrc); } /* - * APIC enumerators call this function to indicate that the 8259A AT PICs - * are available and that mixed mode can be used. - */ -void -ioapic_enable_mixed_mode(void) -{ - - mixed_mode_enabled = 1; -} - -/* * Allocate and return a logical cluster ID. Note that the first time * this is called, it returns cluster 0. ioapic_enable_intr() treats * the two cases of logical_clusters == 0 and logical_clusters == 1 the * same: one cluster of ID 0 exists. The logical_clusters == 0 case is * for UP kernels, which should never call this function. */ int ioapic_next_logical_cluster(void) { if (logical_clusters >= APIC_MAX_CLUSTER) panic("WARNING: Local APIC cluster IDs exhausted!"); return (logical_clusters++); } /* * Create a plain I/O APIC object. */ void * ioapic_create(uintptr_t addr, int32_t apic_id, int intbase) { struct ioapic *io; struct ioapic_intsrc *intpin; volatile ioapic_t *apic; u_int numintr, i; uint32_t value; /* Map the register window so we can access the device. */ apic = (ioapic_t *)pmap_mapdev(addr, IOAPIC_MEM_REGION); mtx_lock_spin(&icu_lock); value = ioapic_read(apic, IOAPIC_VER); mtx_unlock_spin(&icu_lock); /* If it's version register doesn't seem to work, punt. */ if (value == 0xffffff) { pmap_unmapdev((vm_offset_t)apic, IOAPIC_MEM_REGION); return (NULL); } /* Determine the number of vectors and set the APIC ID. */ numintr = ((value & IOART_VER_MAXREDIR) >> MAXREDIRSHIFT) + 1; io = malloc(sizeof(struct ioapic) + numintr * sizeof(struct ioapic_intsrc), M_IOAPIC, M_WAITOK); io->io_pic = ioapic_template; mtx_lock_spin(&icu_lock); io->io_id = next_id++; io->io_apic_id = ioapic_read(apic, IOAPIC_ID) >> APIC_ID_SHIFT; if (apic_id != -1 && io->io_apic_id != apic_id) { ioapic_write(apic, IOAPIC_ID, apic_id << APIC_ID_SHIFT); mtx_unlock_spin(&icu_lock); io->io_apic_id = apic_id; printf("ioapic%u: Changing APIC ID to %d\n", io->io_id, apic_id); } else mtx_unlock_spin(&icu_lock); if (intbase == -1) { intbase = next_ioapic_base; printf("ioapic%u: Assuming intbase of %d\n", io->io_id, intbase); } else if (intbase != next_ioapic_base) printf("ioapic%u: WARNING: intbase %d != expected base %d\n", io->io_id, intbase, next_ioapic_base); io->io_intbase = intbase; next_ioapic_base = intbase + numintr; io->io_numintr = numintr; io->io_addr = apic; /* * Initialize pins. Start off with interrupts disabled. Default * to active-hi and edge-triggered for ISA interrupts and active-lo * and level-triggered for all others. */ bzero(io->io_pins, sizeof(struct ioapic_intsrc) * numintr); mtx_lock_spin(&icu_lock); for (i = 0, intpin = io->io_pins; i < numintr; i++, intpin++) { intpin->io_intsrc.is_pic = (struct pic *)io; intpin->io_intpin = i; intpin->io_vector = intbase + i; /* - * Assume that pin 0 on the first I/O APIC is an ExtINT pin - * if mixed mode is enabled and an ISA interrupt if not. + * Assume that pin 0 on the first I/O APIC is an ExtINT pin. * Assume that pins 1-15 are ISA interrupts and that all * other pins are PCI interrupts. */ - if (intpin->io_vector == 0 && mixed_mode_enabled) + if (intpin->io_vector == 0) ioapic_set_extint(io, i); else if (intpin->io_vector < IOAPIC_ISA_INTS) { intpin->io_bus = APIC_BUS_ISA; intpin->io_activehi = 1; intpin->io_edgetrigger = 1; intpin->io_masked = 1; } else { intpin->io_bus = APIC_BUS_PCI; intpin->io_activehi = 0; intpin->io_edgetrigger = 0; intpin->io_masked = 1; } /* * Route interrupts to the BSP by default using physical * addressing. Vectored interrupts get readdressed using * logical IDs to CPU clusters when they are enabled. */ intpin->io_dest = DEST_NONE; if (bootverbose && intpin->io_vector != VECTOR_DISABLED) { printf("ioapic%u: intpin %d -> ", io->io_id, i); ioapic_print_vector(intpin); printf(" (%s, %s)\n", intpin->io_edgetrigger ? "edge" : "level", intpin->io_activehi ? "high" : "low"); } value = ioapic_read(apic, IOAPIC_REDTBL_LO(i)); ioapic_write(apic, IOAPIC_REDTBL_LO(i), value | IOART_INTMSET); } mtx_unlock_spin(&icu_lock); return (io); } int ioapic_get_vector(void *cookie, u_int pin) { struct ioapic *io; io = (struct ioapic *)cookie; if (pin >= io->io_numintr) return (-1); return (io->io_pins[pin].io_vector); } int ioapic_disable_pin(void *cookie, u_int pin) { struct ioapic *io; io = (struct ioapic *)cookie; if (pin >= io->io_numintr) return (EINVAL); if (io->io_pins[pin].io_vector == VECTOR_DISABLED) return (EINVAL); io->io_pins[pin].io_vector = VECTOR_DISABLED; if (bootverbose) printf("ioapic%u: intpin %d disabled\n", io->io_id, pin); return (0); } int ioapic_remap_vector(void *cookie, u_int pin, int vector) { struct ioapic *io; io = (struct ioapic *)cookie; if (pin >= io->io_numintr || vector < 0) return (EINVAL); if (io->io_pins[pin].io_vector >= NUM_IO_INTS) return (EINVAL); io->io_pins[pin].io_vector = vector; if (bootverbose) printf("ioapic%u: Routing IRQ %d -> intpin %d\n", io->io_id, vector, pin); return (0); } int ioapic_set_bus(void *cookie, u_int pin, int bus_type) { struct ioapic *io; if (bus_type < 0 || bus_type > APIC_BUS_MAX) return (EINVAL); io = (struct ioapic *)cookie; if (pin >= io->io_numintr) return (EINVAL); if (io->io_pins[pin].io_vector >= NUM_IO_INTS) return (EINVAL); io->io_pins[pin].io_bus = bus_type; if (bootverbose) printf("ioapic%u: intpin %d bus %s\n", io->io_id, pin, ioapic_bus_string(bus_type)); return (0); } int ioapic_set_nmi(void *cookie, u_int pin) { struct ioapic *io; io = (struct ioapic *)cookie; if (pin >= io->io_numintr) return (EINVAL); if (io->io_pins[pin].io_vector == VECTOR_NMI) return (0); if (io->io_pins[pin].io_vector >= NUM_IO_INTS) return (EINVAL); io->io_pins[pin].io_bus = APIC_BUS_UNKNOWN; io->io_pins[pin].io_vector = VECTOR_NMI; io->io_pins[pin].io_masked = 0; io->io_pins[pin].io_edgetrigger = 1; io->io_pins[pin].io_activehi = 1; if (bootverbose) printf("ioapic%u: Routing NMI -> intpin %d\n", io->io_id, pin); return (0); } int ioapic_set_smi(void *cookie, u_int pin) { struct ioapic *io; io = (struct ioapic *)cookie; if (pin >= io->io_numintr) return (EINVAL); if (io->io_pins[pin].io_vector == VECTOR_SMI) return (0); if (io->io_pins[pin].io_vector >= NUM_IO_INTS) return (EINVAL); io->io_pins[pin].io_bus = APIC_BUS_UNKNOWN; io->io_pins[pin].io_vector = VECTOR_SMI; io->io_pins[pin].io_masked = 0; io->io_pins[pin].io_edgetrigger = 1; io->io_pins[pin].io_activehi = 1; if (bootverbose) printf("ioapic%u: Routing SMI -> intpin %d\n", io->io_id, pin); return (0); } int ioapic_set_extint(void *cookie, u_int pin) { struct ioapic *io; io = (struct ioapic *)cookie; if (pin >= io->io_numintr) return (EINVAL); if (io->io_pins[pin].io_vector == VECTOR_EXTINT) return (0); if (io->io_pins[pin].io_vector >= NUM_IO_INTS) return (EINVAL); io->io_pins[pin].io_bus = APIC_BUS_UNKNOWN; io->io_pins[pin].io_vector = VECTOR_EXTINT; - - /* Enable this pin if mixed mode is available and active. */ - if (mixed_mode_enabled && mixed_mode_active) - io->io_pins[pin].io_masked = 0; - else - io->io_pins[pin].io_masked = 1; + io->io_pins[pin].io_masked = 1; io->io_pins[pin].io_edgetrigger = 1; io->io_pins[pin].io_activehi = 1; if (bootverbose) printf("ioapic%u: Routing external 8259A's -> intpin %d\n", io->io_id, pin); return (0); } int ioapic_set_polarity(void *cookie, u_int pin, enum intr_polarity pol) { struct ioapic *io; io = (struct ioapic *)cookie; if (pin >= io->io_numintr || pol == INTR_POLARITY_CONFORM) return (EINVAL); if (io->io_pins[pin].io_vector >= NUM_IO_INTS) return (EINVAL); io->io_pins[pin].io_activehi = (pol == INTR_POLARITY_HIGH); if (bootverbose) printf("ioapic%u: intpin %d polarity: %s\n", io->io_id, pin, pol == INTR_POLARITY_HIGH ? "high" : "low"); return (0); } int ioapic_set_triggermode(void *cookie, u_int pin, enum intr_trigger trigger) { struct ioapic *io; io = (struct ioapic *)cookie; if (pin >= io->io_numintr || trigger == INTR_TRIGGER_CONFORM) return (EINVAL); if (io->io_pins[pin].io_vector >= NUM_IO_INTS) return (EINVAL); io->io_pins[pin].io_edgetrigger = (trigger == INTR_TRIGGER_EDGE); if (bootverbose) printf("ioapic%u: intpin %d trigger: %s\n", io->io_id, pin, trigger == INTR_TRIGGER_EDGE ? "edge" : "level"); return (0); } /* * Register a complete I/O APIC object with the interrupt subsystem. */ void ioapic_register(void *cookie) { struct ioapic_intsrc *pin; struct ioapic *io; volatile ioapic_t *apic; uint32_t flags; int i; io = (struct ioapic *)cookie; apic = io->io_addr; mtx_lock_spin(&icu_lock); flags = ioapic_read(apic, IOAPIC_VER) & IOART_VER_VERSION; STAILQ_INSERT_TAIL(&ioapic_list, io, io_next); mtx_unlock_spin(&icu_lock); printf("ioapic%u irqs %u-%u on motherboard\n", io->io_id, flags >> 4, flags & 0xf, io->io_intbase, io->io_intbase + io->io_numintr - 1); bsp_id = PCPU_GET(apic_id); for (i = 0, pin = io->io_pins; i < io->io_numintr; i++, pin++) { /* * Finish initializing the pins by programming the vectors * and delivery mode. */ if (pin->io_vector == VECTOR_DISABLED) continue; ioapic_program_intpin(pin); if (pin->io_vector >= NUM_IO_INTS) continue; - /* - * Route IRQ0 via the 8259A using mixed mode if mixed mode - * is available and turned on. - */ - if (pin->io_vector == 0 && mixed_mode_active && - mixed_mode_enabled) - ioapic_setup_mixed_mode(pin); - else - intr_register_source(&pin->io_intsrc); + intr_register_source(&pin->io_intsrc); } } /* * Program all the intpins to use logical destinations once the AP's * have been launched. */ static void ioapic_set_logical_destinations(void *arg __unused) { struct ioapic *io; int i; program_logical_dest = 1; STAILQ_FOREACH(io, &ioapic_list, io_next) for (i = 0; i < io->io_numintr; i++) - if (io->io_pins[i].io_dest != DEST_NONE && - io->io_pins[i].io_dest != DEST_EXTINT) + if (io->io_pins[i].io_dest != DEST_NONE) ioapic_program_destination(&io->io_pins[i]); } SYSINIT(ioapic_destinations, SI_SUB_SMP, SI_ORDER_SECOND, ioapic_set_logical_destinations, NULL) - -/* - * Support for mixed-mode interrupt sources. These sources route an ISA - * IRQ through the 8259A's via the ExtINT on pin 0 of the I/O APIC that - * routes the ISA interrupts. We just ignore the intpins that use this - * mode and allow the atpic driver to register its interrupt source for - * that IRQ instead. - */ - -static void -ioapic_setup_mixed_mode(struct ioapic_intsrc *intpin) -{ - struct ioapic_intsrc *extint; - struct ioapic *io; - - /* - * Mark the associated I/O APIC intpin as being delivered via - * ExtINT and enable the ExtINT pin on the I/O APIC if needed. - */ - intpin->io_dest = DEST_EXTINT; - io = (struct ioapic *)intpin->io_intsrc.is_pic; - extint = &io->io_pins[0]; - if (extint->io_vector != VECTOR_EXTINT) - panic("Can't find ExtINT pin to route through!"); -#ifdef ENABLE_EXTINT_LOGICAL_DESTINATION - if (extint->io_dest == DEST_NONE) - ioapic_assign_cluster(extint); -#endif -} Index: head/sys/i386/i386/machdep.c =================================================================== --- head/sys/i386/i386/machdep.c (revision 145079) +++ head/sys/i386/i386/machdep.c (revision 145080) @@ -1,3008 +1,3003 @@ /*- * Copyright (c) 1992 Terrence R. Lambert. * Copyright (c) 1982, 1987, 1990 The Regents of the University of California. * All rights reserved. * * This code is derived from software contributed to Berkeley by * William Jolitz. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)machdep.c 7.4 (Berkeley) 6/3/91 */ #include __FBSDID("$FreeBSD$"); #include "opt_apic.h" #include "opt_atalk.h" #include "opt_compat.h" #include "opt_cpu.h" #include "opt_ddb.h" #include "opt_inet.h" #include "opt_ipx.h" #include "opt_isa.h" #include "opt_kstack_pages.h" #include "opt_maxmem.h" #include "opt_msgbuf.h" #include "opt_npx.h" #include "opt_perfmon.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef DDB #ifndef KDB #error KDB must be enabled in order for DDB to work! #endif #include #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef PERFMON #include #endif #ifdef SMP #include #include #endif #ifdef DEV_ISA #include #endif /* Sanity check for __curthread() */ CTASSERT(offsetof(struct pcpu, pc_curthread) == 0); extern void init386(int first); extern void dblfault_handler(void); extern void printcpuinfo(void); /* XXX header file */ extern void finishidentcpu(void); extern void panicifcpuunsupported(void); extern void initializecpu(void); #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) #define EFL_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) #if !defined(CPU_ENABLE_SSE) && defined(I686_CPU) #define CPU_ENABLE_SSE #endif #if defined(CPU_DISABLE_SSE) #undef CPU_ENABLE_SSE #endif static void cpu_startup(void *); static void fpstate_drop(struct thread *td); static void get_fpcontext(struct thread *td, mcontext_t *mcp); static int set_fpcontext(struct thread *td, const mcontext_t *mcp); #ifdef CPU_ENABLE_SSE static void set_fpregs_xmm(struct save87 *, struct savexmm *); static void fill_fpregs_xmm(struct savexmm *, struct save87 *); #endif /* CPU_ENABLE_SSE */ SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL) #ifdef DDB extern vm_offset_t ksym_start, ksym_end; #endif int _udatasel, _ucodesel; u_int basemem; int cold = 1; #ifdef COMPAT_43 static void osendsig(sig_t catcher, int sig, sigset_t *mask, u_long code); #endif #ifdef COMPAT_FREEBSD4 static void freebsd4_sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code); #endif long Maxmem = 0; long realmem = 0; vm_paddr_t phys_avail[10]; /* must be 2 less so 0 0 can signal end of chunks */ #define PHYS_AVAIL_ARRAY_END ((sizeof(phys_avail) / sizeof(vm_offset_t)) - 2) struct kva_md_info kmi; static struct trapframe proc0_tf; #ifndef SMP static struct pcpu __pcpu; #endif struct mtx icu_lock; struct mem_range_softc mem_range_softc; static void cpu_startup(dummy) void *dummy; { /* * Good {morning,afternoon,evening,night}. */ startrtclock(); printcpuinfo(); panicifcpuunsupported(); #ifdef PERFMON perfmon_init(); #endif printf("real memory = %ju (%ju MB)\n", ptoa((uintmax_t)Maxmem), ptoa((uintmax_t)Maxmem) / 1048576); realmem = Maxmem; /* * Display any holes after the first chunk of extended memory. */ if (bootverbose) { int indx; printf("Physical memory chunk(s):\n"); for (indx = 0; phys_avail[indx + 1] != 0; indx += 2) { vm_paddr_t size; size = phys_avail[indx + 1] - phys_avail[indx]; printf( "0x%016jx - 0x%016jx, %ju bytes (%ju pages)\n", (uintmax_t)phys_avail[indx], (uintmax_t)phys_avail[indx + 1] - 1, (uintmax_t)size, (uintmax_t)size / PAGE_SIZE); } } vm_ksubmap_init(&kmi); printf("avail memory = %ju (%ju MB)\n", ptoa((uintmax_t)cnt.v_free_count), ptoa((uintmax_t)cnt.v_free_count) / 1048576); /* * Set up buffers, so they can be used to read disk labels. */ bufinit(); vm_pager_bufferinit(); cpu_setregs(); } /* * Send an interrupt to process. * * Stack is set up to allow sigcode stored * at top to call routine, followed by kcall * to sigreturn routine below. After sigreturn * resets the signal mask, the stack, and the * frame pointer, it returns to the user * specified pc, psl. */ #ifdef COMPAT_43 static void osendsig(catcher, sig, mask, code) sig_t catcher; int sig; sigset_t *mask; u_long code; { struct osigframe sf, *fp; struct proc *p; struct thread *td; struct sigacts *psp; struct trapframe *regs; int oonstack; td = curthread; p = td->td_proc; PROC_LOCK_ASSERT(p, MA_OWNED); psp = p->p_sigacts; mtx_assert(&psp->ps_mtx, MA_OWNED); regs = td->td_frame; oonstack = sigonstack(regs->tf_esp); /* Allocate space for the signal handler context. */ if ((td->td_pflags & TDP_ALTSTACK) && !oonstack && SIGISMEMBER(psp->ps_sigonstack, sig)) { fp = (struct osigframe *)(td->td_sigstk.ss_sp + td->td_sigstk.ss_size - sizeof(struct osigframe)); #if defined(COMPAT_43) td->td_sigstk.ss_flags |= SS_ONSTACK; #endif } else fp = (struct osigframe *)regs->tf_esp - 1; /* Translate the signal if appropriate. */ if (p->p_sysent->sv_sigtbl && sig <= p->p_sysent->sv_sigsize) sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; /* Build the argument list for the signal handler. */ sf.sf_signum = sig; sf.sf_scp = (register_t)&fp->sf_siginfo.si_sc; if (SIGISMEMBER(psp->ps_siginfo, sig)) { /* Signal handler installed with SA_SIGINFO. */ sf.sf_arg2 = (register_t)&fp->sf_siginfo; sf.sf_siginfo.si_signo = sig; sf.sf_siginfo.si_code = code; sf.sf_ahu.sf_action = (__osiginfohandler_t *)catcher; } else { /* Old FreeBSD-style arguments. */ sf.sf_arg2 = code; sf.sf_addr = regs->tf_err; sf.sf_ahu.sf_handler = catcher; } mtx_unlock(&psp->ps_mtx); PROC_UNLOCK(p); /* Save most if not all of trap frame. */ sf.sf_siginfo.si_sc.sc_eax = regs->tf_eax; sf.sf_siginfo.si_sc.sc_ebx = regs->tf_ebx; sf.sf_siginfo.si_sc.sc_ecx = regs->tf_ecx; sf.sf_siginfo.si_sc.sc_edx = regs->tf_edx; sf.sf_siginfo.si_sc.sc_esi = regs->tf_esi; sf.sf_siginfo.si_sc.sc_edi = regs->tf_edi; sf.sf_siginfo.si_sc.sc_cs = regs->tf_cs; sf.sf_siginfo.si_sc.sc_ds = regs->tf_ds; sf.sf_siginfo.si_sc.sc_ss = regs->tf_ss; sf.sf_siginfo.si_sc.sc_es = regs->tf_es; sf.sf_siginfo.si_sc.sc_fs = regs->tf_fs; sf.sf_siginfo.si_sc.sc_gs = rgs(); sf.sf_siginfo.si_sc.sc_isp = regs->tf_isp; /* Build the signal context to be used by osigreturn(). */ sf.sf_siginfo.si_sc.sc_onstack = (oonstack) ? 1 : 0; SIG2OSIG(*mask, sf.sf_siginfo.si_sc.sc_mask); sf.sf_siginfo.si_sc.sc_sp = regs->tf_esp; sf.sf_siginfo.si_sc.sc_fp = regs->tf_ebp; sf.sf_siginfo.si_sc.sc_pc = regs->tf_eip; sf.sf_siginfo.si_sc.sc_ps = regs->tf_eflags; sf.sf_siginfo.si_sc.sc_trapno = regs->tf_trapno; sf.sf_siginfo.si_sc.sc_err = regs->tf_err; /* * If we're a vm86 process, we want to save the segment registers. * We also change eflags to be our emulated eflags, not the actual * eflags. */ if (regs->tf_eflags & PSL_VM) { /* XXX confusing names: `tf' isn't a trapframe; `regs' is. */ struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs; struct vm86_kernel *vm86 = &td->td_pcb->pcb_ext->ext_vm86; sf.sf_siginfo.si_sc.sc_gs = tf->tf_vm86_gs; sf.sf_siginfo.si_sc.sc_fs = tf->tf_vm86_fs; sf.sf_siginfo.si_sc.sc_es = tf->tf_vm86_es; sf.sf_siginfo.si_sc.sc_ds = tf->tf_vm86_ds; if (vm86->vm86_has_vme == 0) sf.sf_siginfo.si_sc.sc_ps = (tf->tf_eflags & ~(PSL_VIF | PSL_VIP)) | (vm86->vm86_eflags & (PSL_VIF | PSL_VIP)); /* See sendsig() for comments. */ tf->tf_eflags &= ~(PSL_VM | PSL_NT | PSL_VIF | PSL_VIP); } /* * Copy the sigframe out to the user's stack. */ if (copyout(&sf, fp, sizeof(*fp)) != 0) { #ifdef DEBUG printf("process %ld has trashed its stack\n", (long)p->p_pid); #endif PROC_LOCK(p); sigexit(td, SIGILL); } regs->tf_esp = (int)fp; regs->tf_eip = PS_STRINGS - szosigcode; regs->tf_eflags &= ~PSL_T; regs->tf_cs = _ucodesel; regs->tf_ds = _udatasel; regs->tf_es = _udatasel; regs->tf_fs = _udatasel; load_gs(_udatasel); regs->tf_ss = _udatasel; PROC_LOCK(p); mtx_lock(&psp->ps_mtx); } #endif /* COMPAT_43 */ #ifdef COMPAT_FREEBSD4 static void freebsd4_sendsig(catcher, sig, mask, code) sig_t catcher; int sig; sigset_t *mask; u_long code; { struct sigframe4 sf, *sfp; struct proc *p; struct thread *td; struct sigacts *psp; struct trapframe *regs; int oonstack; td = curthread; p = td->td_proc; PROC_LOCK_ASSERT(p, MA_OWNED); psp = p->p_sigacts; mtx_assert(&psp->ps_mtx, MA_OWNED); regs = td->td_frame; oonstack = sigonstack(regs->tf_esp); /* Save user context. */ bzero(&sf, sizeof(sf)); sf.sf_uc.uc_sigmask = *mask; sf.sf_uc.uc_stack = td->td_sigstk; sf.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK) ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE; sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0; sf.sf_uc.uc_mcontext.mc_gs = rgs(); bcopy(regs, &sf.sf_uc.uc_mcontext.mc_fs, sizeof(*regs)); /* Allocate space for the signal handler context. */ if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack && SIGISMEMBER(psp->ps_sigonstack, sig)) { sfp = (struct sigframe4 *)(td->td_sigstk.ss_sp + td->td_sigstk.ss_size - sizeof(struct sigframe4)); #if defined(COMPAT_43) td->td_sigstk.ss_flags |= SS_ONSTACK; #endif } else sfp = (struct sigframe4 *)regs->tf_esp - 1; /* Translate the signal if appropriate. */ if (p->p_sysent->sv_sigtbl && sig <= p->p_sysent->sv_sigsize) sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; /* Build the argument list for the signal handler. */ sf.sf_signum = sig; sf.sf_ucontext = (register_t)&sfp->sf_uc; if (SIGISMEMBER(psp->ps_siginfo, sig)) { /* Signal handler installed with SA_SIGINFO. */ sf.sf_siginfo = (register_t)&sfp->sf_si; sf.sf_ahu.sf_action = (__siginfohandler_t *)catcher; /* Fill in POSIX parts */ sf.sf_si.si_signo = sig; sf.sf_si.si_code = code; sf.sf_si.si_addr = (void *)regs->tf_err; } else { /* Old FreeBSD-style arguments. */ sf.sf_siginfo = code; sf.sf_addr = regs->tf_err; sf.sf_ahu.sf_handler = catcher; } mtx_unlock(&psp->ps_mtx); PROC_UNLOCK(p); /* * If we're a vm86 process, we want to save the segment registers. * We also change eflags to be our emulated eflags, not the actual * eflags. */ if (regs->tf_eflags & PSL_VM) { struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs; struct vm86_kernel *vm86 = &td->td_pcb->pcb_ext->ext_vm86; sf.sf_uc.uc_mcontext.mc_gs = tf->tf_vm86_gs; sf.sf_uc.uc_mcontext.mc_fs = tf->tf_vm86_fs; sf.sf_uc.uc_mcontext.mc_es = tf->tf_vm86_es; sf.sf_uc.uc_mcontext.mc_ds = tf->tf_vm86_ds; if (vm86->vm86_has_vme == 0) sf.sf_uc.uc_mcontext.mc_eflags = (tf->tf_eflags & ~(PSL_VIF | PSL_VIP)) | (vm86->vm86_eflags & (PSL_VIF | PSL_VIP)); /* * Clear PSL_NT to inhibit T_TSSFLT faults on return from * syscalls made by the signal handler. This just avoids * wasting time for our lazy fixup of such faults. PSL_NT * does nothing in vm86 mode, but vm86 programs can set it * almost legitimately in probes for old cpu types. */ tf->tf_eflags &= ~(PSL_VM | PSL_NT | PSL_VIF | PSL_VIP); } /* * Copy the sigframe out to the user's stack. */ if (copyout(&sf, sfp, sizeof(*sfp)) != 0) { #ifdef DEBUG printf("process %ld has trashed its stack\n", (long)p->p_pid); #endif PROC_LOCK(p); sigexit(td, SIGILL); } regs->tf_esp = (int)sfp; regs->tf_eip = PS_STRINGS - szfreebsd4_sigcode; regs->tf_eflags &= ~PSL_T; regs->tf_cs = _ucodesel; regs->tf_ds = _udatasel; regs->tf_es = _udatasel; regs->tf_fs = _udatasel; regs->tf_ss = _udatasel; PROC_LOCK(p); mtx_lock(&psp->ps_mtx); } #endif /* COMPAT_FREEBSD4 */ void sendsig(catcher, sig, mask, code) sig_t catcher; int sig; sigset_t *mask; u_long code; { struct sigframe sf, *sfp; struct proc *p; struct thread *td; struct sigacts *psp; char *sp; struct trapframe *regs; int oonstack; td = curthread; p = td->td_proc; PROC_LOCK_ASSERT(p, MA_OWNED); psp = p->p_sigacts; mtx_assert(&psp->ps_mtx, MA_OWNED); #ifdef COMPAT_FREEBSD4 if (SIGISMEMBER(psp->ps_freebsd4, sig)) { freebsd4_sendsig(catcher, sig, mask, code); return; } #endif #ifdef COMPAT_43 if (SIGISMEMBER(psp->ps_osigset, sig)) { osendsig(catcher, sig, mask, code); return; } #endif regs = td->td_frame; oonstack = sigonstack(regs->tf_esp); /* Save user context. */ bzero(&sf, sizeof(sf)); sf.sf_uc.uc_sigmask = *mask; sf.sf_uc.uc_stack = td->td_sigstk; sf.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK) ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE; sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0; sf.sf_uc.uc_mcontext.mc_gs = rgs(); bcopy(regs, &sf.sf_uc.uc_mcontext.mc_fs, sizeof(*regs)); sf.sf_uc.uc_mcontext.mc_len = sizeof(sf.sf_uc.uc_mcontext); /* magic */ get_fpcontext(td, &sf.sf_uc.uc_mcontext); fpstate_drop(td); /* Allocate space for the signal handler context. */ if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack && SIGISMEMBER(psp->ps_sigonstack, sig)) { sp = td->td_sigstk.ss_sp + td->td_sigstk.ss_size - sizeof(struct sigframe); #if defined(COMPAT_43) td->td_sigstk.ss_flags |= SS_ONSTACK; #endif } else sp = (char *)regs->tf_esp - sizeof(struct sigframe); /* Align to 16 bytes. */ sfp = (struct sigframe *)((unsigned int)sp & ~0xF); /* Translate the signal if appropriate. */ if (p->p_sysent->sv_sigtbl && sig <= p->p_sysent->sv_sigsize) sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; /* Build the argument list for the signal handler. */ sf.sf_signum = sig; sf.sf_ucontext = (register_t)&sfp->sf_uc; if (SIGISMEMBER(psp->ps_siginfo, sig)) { /* Signal handler installed with SA_SIGINFO. */ sf.sf_siginfo = (register_t)&sfp->sf_si; sf.sf_ahu.sf_action = (__siginfohandler_t *)catcher; /* Fill in POSIX parts */ sf.sf_si.si_signo = sig; sf.sf_si.si_code = code; sf.sf_si.si_addr = (void *)regs->tf_err; } else { /* Old FreeBSD-style arguments. */ sf.sf_siginfo = code; sf.sf_addr = regs->tf_err; sf.sf_ahu.sf_handler = catcher; } mtx_unlock(&psp->ps_mtx); PROC_UNLOCK(p); /* * If we're a vm86 process, we want to save the segment registers. * We also change eflags to be our emulated eflags, not the actual * eflags. */ if (regs->tf_eflags & PSL_VM) { struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs; struct vm86_kernel *vm86 = &td->td_pcb->pcb_ext->ext_vm86; sf.sf_uc.uc_mcontext.mc_gs = tf->tf_vm86_gs; sf.sf_uc.uc_mcontext.mc_fs = tf->tf_vm86_fs; sf.sf_uc.uc_mcontext.mc_es = tf->tf_vm86_es; sf.sf_uc.uc_mcontext.mc_ds = tf->tf_vm86_ds; if (vm86->vm86_has_vme == 0) sf.sf_uc.uc_mcontext.mc_eflags = (tf->tf_eflags & ~(PSL_VIF | PSL_VIP)) | (vm86->vm86_eflags & (PSL_VIF | PSL_VIP)); /* * Clear PSL_NT to inhibit T_TSSFLT faults on return from * syscalls made by the signal handler. This just avoids * wasting time for our lazy fixup of such faults. PSL_NT * does nothing in vm86 mode, but vm86 programs can set it * almost legitimately in probes for old cpu types. */ tf->tf_eflags &= ~(PSL_VM | PSL_NT | PSL_VIF | PSL_VIP); } /* * Copy the sigframe out to the user's stack. */ if (copyout(&sf, sfp, sizeof(*sfp)) != 0) { #ifdef DEBUG printf("process %ld has trashed its stack\n", (long)p->p_pid); #endif PROC_LOCK(p); sigexit(td, SIGILL); } regs->tf_esp = (int)sfp; regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode); regs->tf_eflags &= ~PSL_T; regs->tf_cs = _ucodesel; regs->tf_ds = _udatasel; regs->tf_es = _udatasel; regs->tf_fs = _udatasel; regs->tf_ss = _udatasel; PROC_LOCK(p); mtx_lock(&psp->ps_mtx); } /* * Build siginfo_t for SA thread */ void cpu_thread_siginfo(int sig, u_long code, siginfo_t *si) { struct proc *p; struct thread *td; td = curthread; p = td->td_proc; PROC_LOCK_ASSERT(p, MA_OWNED); bzero(si, sizeof(*si)); si->si_signo = sig; si->si_code = code; si->si_addr = (void *)td->td_frame->tf_err; /* XXXKSE fill other fields */ } /* * System call to cleanup state after a signal * has been taken. Reset signal mask and * stack state from context left by sendsig (above). * Return to previous pc and psl as specified by * context left by sendsig. Check carefully to * make sure that the user has not modified the * state to gain improper privileges. * * MPSAFE */ #ifdef COMPAT_43 int osigreturn(td, uap) struct thread *td; struct osigreturn_args /* { struct osigcontext *sigcntxp; } */ *uap; { struct osigcontext sc; struct trapframe *regs; struct osigcontext *scp; struct proc *p = td->td_proc; int eflags, error; regs = td->td_frame; error = copyin(uap->sigcntxp, &sc, sizeof(sc)); if (error != 0) return (error); scp = ≻ eflags = scp->sc_ps; if (eflags & PSL_VM) { struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs; struct vm86_kernel *vm86; /* * if pcb_ext == 0 or vm86_inited == 0, the user hasn't * set up the vm86 area, and we can't enter vm86 mode. */ if (td->td_pcb->pcb_ext == 0) return (EINVAL); vm86 = &td->td_pcb->pcb_ext->ext_vm86; if (vm86->vm86_inited == 0) return (EINVAL); /* Go back to user mode if both flags are set. */ if ((eflags & PSL_VIP) && (eflags & PSL_VIF)) trapsignal(td, SIGBUS, 0); if (vm86->vm86_has_vme) { eflags = (tf->tf_eflags & ~VME_USERCHANGE) | (eflags & VME_USERCHANGE) | PSL_VM; } else { vm86->vm86_eflags = eflags; /* save VIF, VIP */ eflags = (tf->tf_eflags & ~VM_USERCHANGE) | (eflags & VM_USERCHANGE) | PSL_VM; } tf->tf_vm86_ds = scp->sc_ds; tf->tf_vm86_es = scp->sc_es; tf->tf_vm86_fs = scp->sc_fs; tf->tf_vm86_gs = scp->sc_gs; tf->tf_ds = _udatasel; tf->tf_es = _udatasel; tf->tf_fs = _udatasel; } else { /* * Don't allow users to change privileged or reserved flags. */ /* * XXX do allow users to change the privileged flag PSL_RF. * The cpu sets PSL_RF in tf_eflags for faults. Debuggers * should sometimes set it there too. tf_eflags is kept in * the signal context during signal handling and there is no * other place to remember it, so the PSL_RF bit may be * corrupted by the signal handler without us knowing. * Corruption of the PSL_RF bit at worst causes one more or * one less debugger trap, so allowing it is fairly harmless. */ if (!EFL_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) { return (EINVAL); } /* * Don't allow users to load a valid privileged %cs. Let the * hardware check for invalid selectors, excess privilege in * other selectors, invalid %eip's and invalid %esp's. */ if (!CS_SECURE(scp->sc_cs)) { trapsignal(td, SIGBUS, T_PROTFLT); return (EINVAL); } regs->tf_ds = scp->sc_ds; regs->tf_es = scp->sc_es; regs->tf_fs = scp->sc_fs; } /* Restore remaining registers. */ regs->tf_eax = scp->sc_eax; regs->tf_ebx = scp->sc_ebx; regs->tf_ecx = scp->sc_ecx; regs->tf_edx = scp->sc_edx; regs->tf_esi = scp->sc_esi; regs->tf_edi = scp->sc_edi; regs->tf_cs = scp->sc_cs; regs->tf_ss = scp->sc_ss; regs->tf_isp = scp->sc_isp; regs->tf_ebp = scp->sc_fp; regs->tf_esp = scp->sc_sp; regs->tf_eip = scp->sc_pc; regs->tf_eflags = eflags; PROC_LOCK(p); #if defined(COMPAT_43) if (scp->sc_onstack & 1) td->td_sigstk.ss_flags |= SS_ONSTACK; else td->td_sigstk.ss_flags &= ~SS_ONSTACK; #endif SIGSETOLD(td->td_sigmask, scp->sc_mask); SIG_CANTMASK(td->td_sigmask); signotify(td); PROC_UNLOCK(p); return (EJUSTRETURN); } #endif /* COMPAT_43 */ #ifdef COMPAT_FREEBSD4 /* * MPSAFE */ int freebsd4_sigreturn(td, uap) struct thread *td; struct freebsd4_sigreturn_args /* { const ucontext4 *sigcntxp; } */ *uap; { struct ucontext4 uc; struct proc *p = td->td_proc; struct trapframe *regs; const struct ucontext4 *ucp; int cs, eflags, error; error = copyin(uap->sigcntxp, &uc, sizeof(uc)); if (error != 0) return (error); ucp = &uc; regs = td->td_frame; eflags = ucp->uc_mcontext.mc_eflags; if (eflags & PSL_VM) { struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs; struct vm86_kernel *vm86; /* * if pcb_ext == 0 or vm86_inited == 0, the user hasn't * set up the vm86 area, and we can't enter vm86 mode. */ if (td->td_pcb->pcb_ext == 0) return (EINVAL); vm86 = &td->td_pcb->pcb_ext->ext_vm86; if (vm86->vm86_inited == 0) return (EINVAL); /* Go back to user mode if both flags are set. */ if ((eflags & PSL_VIP) && (eflags & PSL_VIF)) trapsignal(td, SIGBUS, 0); if (vm86->vm86_has_vme) { eflags = (tf->tf_eflags & ~VME_USERCHANGE) | (eflags & VME_USERCHANGE) | PSL_VM; } else { vm86->vm86_eflags = eflags; /* save VIF, VIP */ eflags = (tf->tf_eflags & ~VM_USERCHANGE) | (eflags & VM_USERCHANGE) | PSL_VM; } bcopy(&ucp->uc_mcontext.mc_fs, tf, sizeof(struct trapframe)); tf->tf_eflags = eflags; tf->tf_vm86_ds = tf->tf_ds; tf->tf_vm86_es = tf->tf_es; tf->tf_vm86_fs = tf->tf_fs; tf->tf_vm86_gs = ucp->uc_mcontext.mc_gs; tf->tf_ds = _udatasel; tf->tf_es = _udatasel; tf->tf_fs = _udatasel; } else { /* * Don't allow users to change privileged or reserved flags. */ /* * XXX do allow users to change the privileged flag PSL_RF. * The cpu sets PSL_RF in tf_eflags for faults. Debuggers * should sometimes set it there too. tf_eflags is kept in * the signal context during signal handling and there is no * other place to remember it, so the PSL_RF bit may be * corrupted by the signal handler without us knowing. * Corruption of the PSL_RF bit at worst causes one more or * one less debugger trap, so allowing it is fairly harmless. */ if (!EFL_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) { printf("freebsd4_sigreturn: eflags = 0x%x\n", eflags); return (EINVAL); } /* * Don't allow users to load a valid privileged %cs. Let the * hardware check for invalid selectors, excess privilege in * other selectors, invalid %eip's and invalid %esp's. */ cs = ucp->uc_mcontext.mc_cs; if (!CS_SECURE(cs)) { printf("freebsd4_sigreturn: cs = 0x%x\n", cs); trapsignal(td, SIGBUS, T_PROTFLT); return (EINVAL); } bcopy(&ucp->uc_mcontext.mc_fs, regs, sizeof(*regs)); } PROC_LOCK(p); #if defined(COMPAT_43) if (ucp->uc_mcontext.mc_onstack & 1) td->td_sigstk.ss_flags |= SS_ONSTACK; else td->td_sigstk.ss_flags &= ~SS_ONSTACK; #endif td->td_sigmask = ucp->uc_sigmask; SIG_CANTMASK(td->td_sigmask); signotify(td); PROC_UNLOCK(p); return (EJUSTRETURN); } #endif /* COMPAT_FREEBSD4 */ /* * MPSAFE */ int sigreturn(td, uap) struct thread *td; struct sigreturn_args /* { const __ucontext *sigcntxp; } */ *uap; { ucontext_t uc; struct proc *p = td->td_proc; struct trapframe *regs; const ucontext_t *ucp; int cs, eflags, error, ret; error = copyin(uap->sigcntxp, &uc, sizeof(uc)); if (error != 0) return (error); ucp = &uc; regs = td->td_frame; eflags = ucp->uc_mcontext.mc_eflags; if (eflags & PSL_VM) { struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs; struct vm86_kernel *vm86; /* * if pcb_ext == 0 or vm86_inited == 0, the user hasn't * set up the vm86 area, and we can't enter vm86 mode. */ if (td->td_pcb->pcb_ext == 0) return (EINVAL); vm86 = &td->td_pcb->pcb_ext->ext_vm86; if (vm86->vm86_inited == 0) return (EINVAL); /* Go back to user mode if both flags are set. */ if ((eflags & PSL_VIP) && (eflags & PSL_VIF)) trapsignal(td, SIGBUS, 0); if (vm86->vm86_has_vme) { eflags = (tf->tf_eflags & ~VME_USERCHANGE) | (eflags & VME_USERCHANGE) | PSL_VM; } else { vm86->vm86_eflags = eflags; /* save VIF, VIP */ eflags = (tf->tf_eflags & ~VM_USERCHANGE) | (eflags & VM_USERCHANGE) | PSL_VM; } bcopy(&ucp->uc_mcontext.mc_fs, tf, sizeof(struct trapframe)); tf->tf_eflags = eflags; tf->tf_vm86_ds = tf->tf_ds; tf->tf_vm86_es = tf->tf_es; tf->tf_vm86_fs = tf->tf_fs; tf->tf_vm86_gs = ucp->uc_mcontext.mc_gs; tf->tf_ds = _udatasel; tf->tf_es = _udatasel; tf->tf_fs = _udatasel; } else { /* * Don't allow users to change privileged or reserved flags. */ /* * XXX do allow users to change the privileged flag PSL_RF. * The cpu sets PSL_RF in tf_eflags for faults. Debuggers * should sometimes set it there too. tf_eflags is kept in * the signal context during signal handling and there is no * other place to remember it, so the PSL_RF bit may be * corrupted by the signal handler without us knowing. * Corruption of the PSL_RF bit at worst causes one more or * one less debugger trap, so allowing it is fairly harmless. */ if (!EFL_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) { printf("sigreturn: eflags = 0x%x\n", eflags); return (EINVAL); } /* * Don't allow users to load a valid privileged %cs. Let the * hardware check for invalid selectors, excess privilege in * other selectors, invalid %eip's and invalid %esp's. */ cs = ucp->uc_mcontext.mc_cs; if (!CS_SECURE(cs)) { printf("sigreturn: cs = 0x%x\n", cs); trapsignal(td, SIGBUS, T_PROTFLT); return (EINVAL); } ret = set_fpcontext(td, &ucp->uc_mcontext); if (ret != 0) return (ret); bcopy(&ucp->uc_mcontext.mc_fs, regs, sizeof(*regs)); } PROC_LOCK(p); #if defined(COMPAT_43) if (ucp->uc_mcontext.mc_onstack & 1) td->td_sigstk.ss_flags |= SS_ONSTACK; else td->td_sigstk.ss_flags &= ~SS_ONSTACK; #endif td->td_sigmask = ucp->uc_sigmask; SIG_CANTMASK(td->td_sigmask); signotify(td); PROC_UNLOCK(p); return (EJUSTRETURN); } /* * Machine dependent boot() routine * * I haven't seen anything to put here yet * Possibly some stuff might be grafted back here from boot() */ void cpu_boot(int howto) { } /* Get current clock frequency for the given cpu id. */ int cpu_est_clockrate(int cpu_id, uint64_t *rate) { register_t reg; uint64_t tsc1, tsc2; if (pcpu_find(cpu_id) == NULL || rate == NULL) return (EINVAL); if (!tsc_present) return (EOPNOTSUPP); /* If we're booting, trust the rate calibrated moments ago. */ if (cold) { *rate = tsc_freq; return (0); } #ifdef SMP /* Schedule ourselves on the indicated cpu. */ mtx_lock_spin(&sched_lock); sched_bind(curthread, cpu_id); mtx_unlock_spin(&sched_lock); #endif /* Calibrate by measuring a short delay. */ reg = intr_disable(); tsc1 = rdtsc(); DELAY(1000); tsc2 = rdtsc(); intr_restore(reg); #ifdef SMP mtx_lock_spin(&sched_lock); sched_unbind(curthread); mtx_unlock_spin(&sched_lock); #endif /* * Calculate the difference in readings, convert to Mhz, and * subtract 0.5% of the total. Empirical testing has shown that * overhead in DELAY() works out to approximately this value. */ tsc2 -= tsc1; *rate = tsc2 * 1000 - tsc2 * 5; return (0); } /* * Shutdown the CPU as much as possible */ void cpu_halt(void) { for (;;) __asm__ ("hlt"); } /* * Hook to idle the CPU when possible. In the SMP case we default to * off because a halted cpu will not currently pick up a new thread in the * run queue until the next timer tick. If turned on this will result in * approximately a 4.2% loss in real time performance in buildworld tests * (but improves user and sys times oddly enough), and saves approximately * 5% in power consumption on an idle machine (tests w/2xCPU 1.1GHz P3). * * XXX we need to have a cpu mask of idle cpus and generate an IPI or * otherwise generate some sort of interrupt to wake up cpus sitting in HLT. * Then we can have our cake and eat it too. * * XXX I'm turning it on for SMP as well by default for now. It seems to * help lock contention somewhat, and this is critical for HTT. -Peter */ static int cpu_idle_hlt = 1; SYSCTL_INT(_machdep, OID_AUTO, cpu_idle_hlt, CTLFLAG_RW, &cpu_idle_hlt, 0, "Idle loop HLT enable"); static void cpu_idle_default(void) { /* * we must absolutely guarentee that hlt is the * absolute next instruction after sti or we * introduce a timing window. */ __asm __volatile("sti; hlt"); } /* * Note that we have to be careful here to avoid a race between checking * sched_runnable() and actually halting. If we don't do this, we may waste * the time between calling hlt and the next interrupt even though there * is a runnable process. */ void cpu_idle(void) { #ifdef SMP if (mp_grab_cpu_hlt()) return; #endif if (cpu_idle_hlt) { disable_intr(); if (sched_runnable()) enable_intr(); else (*cpu_idle_hook)(); } } /* Other subsystems (e.g., ACPI) can hook this later. */ void (*cpu_idle_hook)(void) = cpu_idle_default; /* * Clear registers on exec */ void exec_setregs(td, entry, stack, ps_strings) struct thread *td; u_long entry; u_long stack; u_long ps_strings; { struct trapframe *regs = td->td_frame; struct pcb *pcb = td->td_pcb; /* Reset pc->pcb_gs and %gs before possibly invalidating it. */ pcb->pcb_gs = _udatasel; load_gs(_udatasel); if (td->td_proc->p_md.md_ldt) user_ldt_free(td); bzero((char *)regs, sizeof(struct trapframe)); regs->tf_eip = entry; regs->tf_esp = stack; regs->tf_eflags = PSL_USER | (regs->tf_eflags & PSL_T); regs->tf_ss = _udatasel; regs->tf_ds = _udatasel; regs->tf_es = _udatasel; regs->tf_fs = _udatasel; regs->tf_cs = _ucodesel; /* PS_STRINGS value for BSD/OS binaries. It is 0 for non-BSD/OS. */ regs->tf_ebx = ps_strings; /* * Reset the hardware debug registers if they were in use. * They won't have any meaning for the newly exec'd process. */ if (pcb->pcb_flags & PCB_DBREGS) { pcb->pcb_dr0 = 0; pcb->pcb_dr1 = 0; pcb->pcb_dr2 = 0; pcb->pcb_dr3 = 0; pcb->pcb_dr6 = 0; pcb->pcb_dr7 = 0; if (pcb == PCPU_GET(curpcb)) { /* * Clear the debug registers on the running * CPU, otherwise they will end up affecting * the next process we switch to. */ reset_dbregs(); } pcb->pcb_flags &= ~PCB_DBREGS; } /* * Initialize the math emulator (if any) for the current process. * Actually, just clear the bit that says that the emulator has * been initialized. Initialization is delayed until the process * traps to the emulator (if it is done at all) mainly because * emulators don't provide an entry point for initialization. */ td->td_pcb->pcb_flags &= ~FP_SOFTFP; /* * Drop the FP state if we hold it, so that the process gets a * clean FP state if it uses the FPU again. */ fpstate_drop(td); /* * XXX - Linux emulator * Make sure sure edx is 0x0 on entry. Linux binaries depend * on it. */ td->td_retval[1] = 0; } void cpu_setregs(void) { unsigned int cr0; cr0 = rcr0(); /* * CR0_MP, CR0_NE and CR0_TS are also set by npx_probe() for the * BSP. See the comments there about why we set them. */ cr0 |= CR0_MP | CR0_NE | CR0_TS | CR0_WP | CR0_AM; load_cr0(cr0); load_gs(_udatasel); } static int sysctl_machdep_adjkerntz(SYSCTL_HANDLER_ARGS) { int error; error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2, req); if (!error && req->newptr) resettodr(); return (error); } SYSCTL_PROC(_machdep, CPU_ADJKERNTZ, adjkerntz, CTLTYPE_INT|CTLFLAG_RW, &adjkerntz, 0, sysctl_machdep_adjkerntz, "I", ""); SYSCTL_INT(_machdep, CPU_DISRTCSET, disable_rtc_set, CTLFLAG_RW, &disable_rtc_set, 0, ""); SYSCTL_STRUCT(_machdep, CPU_BOOTINFO, bootinfo, CTLFLAG_RD, &bootinfo, bootinfo, ""); SYSCTL_INT(_machdep, CPU_WALLCLOCK, wall_cmos_clock, CTLFLAG_RW, &wall_cmos_clock, 0, ""); u_long bootdev; /* not a struct cdev *- encoding is different */ SYSCTL_ULONG(_machdep, OID_AUTO, guessed_bootdev, CTLFLAG_RD, &bootdev, 0, "Maybe the Boot device (not in struct cdev *format)"); /* * Initialize 386 and configure to run kernel */ /* * Initialize segments & interrupt table */ int _default_ldt; union descriptor gdt[NGDT * MAXCPU]; /* global descriptor table */ static struct gate_descriptor idt0[NIDT]; struct gate_descriptor *idt = &idt0[0]; /* interrupt descriptor table */ union descriptor ldt[NLDT]; /* local descriptor table */ struct region_descriptor r_gdt, r_idt; /* table descriptors */ int private_tss; /* flag indicating private tss */ #if defined(I586_CPU) && !defined(NO_F00F_HACK) extern int has_f00f_bug; #endif static struct i386tss dblfault_tss; static char dblfault_stack[PAGE_SIZE]; extern vm_offset_t proc0kstack; /* * software prototypes -- in more palatable form. * * GCODE_SEL through GUDATA_SEL must be in this order for syscall/sysret * GUFS_SEL and GUGS_SEL must be in this order (swtch.s knows it) */ struct soft_segment_descriptor gdt_segs[] = { /* GNULL_SEL 0 Null Descriptor */ { 0x0, /* segment base address */ 0x0, /* length */ 0, /* segment type */ 0, /* segment descriptor priority level */ 0, /* segment descriptor present */ 0, 0, 0, /* default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* GPRIV_SEL 1 SMP Per-Processor Private Data Descriptor */ { 0x0, /* segment base address */ 0xfffff, /* length - all address space */ SDT_MEMRWA, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 1, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, /* GUFS_SEL 2 %fs Descriptor for user */ { 0x0, /* segment base address */ 0xfffff, /* length - all address space */ SDT_MEMRWA, /* segment type */ SEL_UPL, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 1, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, /* GUGS_SEL 3 %gs Descriptor for user */ { 0x0, /* segment base address */ 0xfffff, /* length - all address space */ SDT_MEMRWA, /* segment type */ SEL_UPL, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 1, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, /* GCODE_SEL 4 Code Descriptor for kernel */ { 0x0, /* segment base address */ 0xfffff, /* length - all address space */ SDT_MEMERA, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 1, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, /* GDATA_SEL 5 Data Descriptor for kernel */ { 0x0, /* segment base address */ 0xfffff, /* length - all address space */ SDT_MEMRWA, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 1, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, /* GUCODE_SEL 6 Code Descriptor for user */ { 0x0, /* segment base address */ 0xfffff, /* length - all address space */ SDT_MEMERA, /* segment type */ SEL_UPL, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 1, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, /* GUDATA_SEL 7 Data Descriptor for user */ { 0x0, /* segment base address */ 0xfffff, /* length - all address space */ SDT_MEMRWA, /* segment type */ SEL_UPL, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 1, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, /* GBIOSLOWMEM_SEL 8 BIOS access to realmode segment 0x40, must be #8 in GDT */ { 0x400, /* segment base address */ 0xfffff, /* length */ SDT_MEMRWA, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 1, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, /* GPROC0_SEL 9 Proc 0 Tss Descriptor */ { 0x0, /* segment base address */ sizeof(struct i386tss)-1,/* length */ SDT_SYS386TSS, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 0, /* unused - default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* GLDT_SEL 10 LDT Descriptor */ { (int) ldt, /* segment base address */ sizeof(ldt)-1, /* length - all address space */ SDT_SYSLDT, /* segment type */ SEL_UPL, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 0, /* unused - default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* GUSERLDT_SEL 11 User LDT Descriptor per process */ { (int) ldt, /* segment base address */ (512 * sizeof(union descriptor)-1), /* length */ SDT_SYSLDT, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 0, /* unused - default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* GPANIC_SEL 12 Panic Tss Descriptor */ { (int) &dblfault_tss, /* segment base address */ sizeof(struct i386tss)-1,/* length - all address space */ SDT_SYS386TSS, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 0, /* unused - default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* GBIOSCODE32_SEL 13 BIOS 32-bit interface (32bit Code) */ { 0, /* segment base address (overwritten) */ 0xfffff, /* length */ SDT_MEMERA, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 0, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, /* GBIOSCODE16_SEL 14 BIOS 32-bit interface (16bit Code) */ { 0, /* segment base address (overwritten) */ 0xfffff, /* length */ SDT_MEMERA, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 0, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, /* GBIOSDATA_SEL 15 BIOS 32-bit interface (Data) */ { 0, /* segment base address (overwritten) */ 0xfffff, /* length */ SDT_MEMRWA, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 1, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, /* GBIOSUTIL_SEL 16 BIOS 16-bit interface (Utility) */ { 0, /* segment base address (overwritten) */ 0xfffff, /* length */ SDT_MEMRWA, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 0, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, /* GBIOSARGS_SEL 17 BIOS 16-bit interface (Arguments) */ { 0, /* segment base address (overwritten) */ 0xfffff, /* length */ SDT_MEMRWA, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 0, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, /* GNDIS_SEL 18 NDIS Descriptor */ { 0x0, /* segment base address */ 0x0, /* length */ 0, /* segment type */ 0, /* segment descriptor priority level */ 0, /* segment descriptor present */ 0, 0, 0, /* default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, }; static struct soft_segment_descriptor ldt_segs[] = { /* Null Descriptor - overwritten by call gate */ { 0x0, /* segment base address */ 0x0, /* length - all address space */ 0, /* segment type */ 0, /* segment descriptor priority level */ 0, /* segment descriptor present */ 0, 0, 0, /* default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* Null Descriptor - overwritten by call gate */ { 0x0, /* segment base address */ 0x0, /* length - all address space */ 0, /* segment type */ 0, /* segment descriptor priority level */ 0, /* segment descriptor present */ 0, 0, 0, /* default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* Null Descriptor - overwritten by call gate */ { 0x0, /* segment base address */ 0x0, /* length - all address space */ 0, /* segment type */ 0, /* segment descriptor priority level */ 0, /* segment descriptor present */ 0, 0, 0, /* default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* Code Descriptor for user */ { 0x0, /* segment base address */ 0xfffff, /* length - all address space */ SDT_MEMERA, /* segment type */ SEL_UPL, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 1, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, /* Null Descriptor - overwritten by call gate */ { 0x0, /* segment base address */ 0x0, /* length - all address space */ 0, /* segment type */ 0, /* segment descriptor priority level */ 0, /* segment descriptor present */ 0, 0, 0, /* default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* Data Descriptor for user */ { 0x0, /* segment base address */ 0xfffff, /* length - all address space */ SDT_MEMRWA, /* segment type */ SEL_UPL, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 1, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, }; void setidt(idx, func, typ, dpl, selec) int idx; inthand_t *func; int typ; int dpl; int selec; { struct gate_descriptor *ip; ip = idt + idx; ip->gd_looffset = (int)func; ip->gd_selector = selec; ip->gd_stkcpy = 0; ip->gd_xx = 0; ip->gd_type = typ; ip->gd_dpl = dpl; ip->gd_p = 1; ip->gd_hioffset = ((int)func)>>16 ; } #define IDTVEC(name) __CONCAT(X,name) extern inthand_t IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl), IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(fpusegm), IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot), IDTVEC(page), IDTVEC(mchk), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align), IDTVEC(xmm), IDTVEC(lcall_syscall), IDTVEC(int0x80_syscall); #ifdef DDB /* * Display the index and function name of any IDT entries that don't use * the default 'rsvd' entry point. */ DB_SHOW_COMMAND(idt, db_show_idt) { struct gate_descriptor *ip; int idx, quit; uintptr_t func; ip = idt; db_setup_paging(db_simple_pager, &quit, db_lines_per_page); for (idx = 0, quit = 0; idx < NIDT; idx++) { func = (ip->gd_hioffset << 16 | ip->gd_looffset); if (func != (uintptr_t)&IDTVEC(rsvd)) { db_printf("%3d\t", idx); db_printsym(func, DB_STGY_PROC); db_printf("\n"); } ip++; } } #endif void sdtossd(sd, ssd) struct segment_descriptor *sd; struct soft_segment_descriptor *ssd; { ssd->ssd_base = (sd->sd_hibase << 24) | sd->sd_lobase; ssd->ssd_limit = (sd->sd_hilimit << 16) | sd->sd_lolimit; ssd->ssd_type = sd->sd_type; ssd->ssd_dpl = sd->sd_dpl; ssd->ssd_p = sd->sd_p; ssd->ssd_def32 = sd->sd_def32; ssd->ssd_gran = sd->sd_gran; } #define PHYSMAP_SIZE (2 * 8) /* * Populate the (physmap) array with base/bound pairs describing the * available physical memory in the system, then test this memory and * build the phys_avail array describing the actually-available memory. * * If we cannot accurately determine the physical memory map, then use * value from the 0xE801 call, and failing that, the RTC. * * Total memory size may be set by the kernel environment variable * hw.physmem or the compile-time define MAXMEM. * * XXX first should be vm_paddr_t. */ static void getmemsize(int first) { int i, physmap_idx, pa_indx; int hasbrokenint12; u_long physmem_tunable; u_int extmem; struct vm86frame vmf; struct vm86context vmc; vm_paddr_t pa, physmap[PHYSMAP_SIZE]; pt_entry_t *pte; struct bios_smap *smap; quad_t dcons_addr, dcons_size; hasbrokenint12 = 0; TUNABLE_INT_FETCH("hw.hasbrokenint12", &hasbrokenint12); bzero(&vmf, sizeof(vmf)); bzero(physmap, sizeof(physmap)); basemem = 0; /* * Some newer BIOSes has broken INT 12H implementation which cause * kernel panic immediately. In this case, we need to scan SMAP * with INT 15:E820 first, then determine base memory size. */ if (hasbrokenint12) { goto int15e820; } /* * Perform "base memory" related probes & setup */ vm86_intcall(0x12, &vmf); basemem = vmf.vmf_ax; if (basemem > 640) { printf("Preposterous BIOS basemem of %uK, truncating to 640K\n", basemem); basemem = 640; } /* * XXX if biosbasemem is now < 640, there is a `hole' * between the end of base memory and the start of * ISA memory. The hole may be empty or it may * contain BIOS code or data. Map it read/write so * that the BIOS can write to it. (Memory from 0 to * the physical end of the kernel is mapped read-only * to begin with and then parts of it are remapped. * The parts that aren't remapped form holes that * remain read-only and are unused by the kernel. * The base memory area is below the physical end of * the kernel and right now forms a read-only hole. * The part of it from PAGE_SIZE to * (trunc_page(biosbasemem * 1024) - 1) will be * remapped and used by the kernel later.) * * This code is similar to the code used in * pmap_mapdev, but since no memory needs to be * allocated we simply change the mapping. */ for (pa = trunc_page(basemem * 1024); pa < ISA_HOLE_START; pa += PAGE_SIZE) pmap_kenter(KERNBASE + pa, pa); /* * Map pages between basemem and ISA_HOLE_START, if any, r/w into * the vm86 page table so that vm86 can scribble on them using * the vm86 map too. XXX: why 2 ways for this and only 1 way for * page 0, at least as initialized here? */ pte = (pt_entry_t *)vm86paddr; for (i = basemem / 4; i < 160; i++) pte[i] = (i << PAGE_SHIFT) | PG_V | PG_RW | PG_U; int15e820: /* * map page 1 R/W into the kernel page table so we can use it * as a buffer. The kernel will unmap this page later. */ pmap_kenter(KERNBASE + (1 << PAGE_SHIFT), 1 << PAGE_SHIFT); /* * get memory map with INT 15:E820 */ vmc.npages = 0; smap = (void *)vm86_addpage(&vmc, 1, KERNBASE + (1 << PAGE_SHIFT)); vm86_getptr(&vmc, (vm_offset_t)smap, &vmf.vmf_es, &vmf.vmf_di); physmap_idx = 0; vmf.vmf_ebx = 0; do { vmf.vmf_eax = 0xE820; vmf.vmf_edx = SMAP_SIG; vmf.vmf_ecx = sizeof(struct bios_smap); i = vm86_datacall(0x15, &vmf, &vmc); if (i || vmf.vmf_eax != SMAP_SIG) break; if (boothowto & RB_VERBOSE) printf("SMAP type=%02x base=%016llx len=%016llx\n", smap->type, smap->base, smap->length); if (smap->type != 0x01) continue; if (smap->length == 0) continue; #ifndef PAE if (smap->base >= 0xffffffff) { printf("%uK of memory above 4GB ignored\n", (u_int)(smap->length / 1024)); continue; } #endif for (i = 0; i <= physmap_idx; i += 2) { if (smap->base < physmap[i + 1]) { if (boothowto & RB_VERBOSE) printf( "Overlapping or non-montonic memory region, ignoring second region\n"); continue; } } if (smap->base == physmap[physmap_idx + 1]) { physmap[physmap_idx + 1] += smap->length; continue; } physmap_idx += 2; if (physmap_idx == PHYSMAP_SIZE) { printf( "Too many segments in the physical address map, giving up\n"); break; } physmap[physmap_idx] = smap->base; physmap[physmap_idx + 1] = smap->base + smap->length; } while (vmf.vmf_ebx != 0); /* * Perform "base memory" related probes & setup based on SMAP */ if (basemem == 0) { for (i = 0; i <= physmap_idx; i += 2) { if (physmap[i] == 0x00000000) { basemem = physmap[i + 1] / 1024; break; } } /* * XXX this function is horribly organized and has to the same * things that it does above here. */ if (basemem == 0) basemem = 640; if (basemem > 640) { printf( "Preposterous BIOS basemem of %uK, truncating to 640K\n", basemem); basemem = 640; } /* * Let vm86 scribble on pages between basemem and * ISA_HOLE_START, as above. */ for (pa = trunc_page(basemem * 1024); pa < ISA_HOLE_START; pa += PAGE_SIZE) pmap_kenter(KERNBASE + pa, pa); pte = (pt_entry_t *)vm86paddr; for (i = basemem / 4; i < 160; i++) pte[i] = (i << PAGE_SHIFT) | PG_V | PG_RW | PG_U; } if (physmap[1] != 0) goto physmap_done; /* * If we failed above, try memory map with INT 15:E801 */ vmf.vmf_ax = 0xE801; if (vm86_intcall(0x15, &vmf) == 0) { extmem = vmf.vmf_cx + vmf.vmf_dx * 64; } else { #if 0 vmf.vmf_ah = 0x88; vm86_intcall(0x15, &vmf); extmem = vmf.vmf_ax; #else /* * Prefer the RTC value for extended memory. */ extmem = rtcin(RTC_EXTLO) + (rtcin(RTC_EXTHI) << 8); #endif } /* * Special hack for chipsets that still remap the 384k hole when * there's 16MB of memory - this really confuses people that * are trying to use bus mastering ISA controllers with the * "16MB limit"; they only have 16MB, but the remapping puts * them beyond the limit. * * If extended memory is between 15-16MB (16-17MB phys address range), * chop it to 15MB. */ if ((extmem > 15 * 1024) && (extmem < 16 * 1024)) extmem = 15 * 1024; physmap[0] = 0; physmap[1] = basemem * 1024; physmap_idx = 2; physmap[physmap_idx] = 0x100000; physmap[physmap_idx + 1] = physmap[physmap_idx] + extmem * 1024; physmap_done: /* * Now, physmap contains a map of physical memory. */ #ifdef SMP /* make hole for AP bootstrap code */ physmap[1] = mp_bootaddress(physmap[1]); #endif /* * Maxmem isn't the "maximum memory", it's one larger than the * highest page of the physical address space. It should be * called something like "Maxphyspage". We may adjust this * based on ``hw.physmem'' and the results of the memory test. */ Maxmem = atop(physmap[physmap_idx + 1]); #ifdef MAXMEM Maxmem = MAXMEM / 4; #endif if (TUNABLE_ULONG_FETCH("hw.physmem", &physmem_tunable)) Maxmem = atop(physmem_tunable); if (atop(physmap[physmap_idx + 1]) != Maxmem && (boothowto & RB_VERBOSE)) printf("Physical memory use set to %ldK\n", Maxmem * 4); /* * If Maxmem has been increased beyond what the system has detected, * extend the last memory segment to the new limit. */ if (atop(physmap[physmap_idx + 1]) < Maxmem) physmap[physmap_idx + 1] = ptoa((vm_paddr_t)Maxmem); /* call pmap initialization to make new kernel address space */ pmap_bootstrap(first, 0); /* * Size up each available chunk of physical memory. */ physmap[0] = PAGE_SIZE; /* mask off page 0 */ pa_indx = 0; phys_avail[pa_indx++] = physmap[0]; phys_avail[pa_indx] = physmap[0]; pte = CMAP1; /* * Get dcons buffer address */ if (getenv_quad("dcons.addr", &dcons_addr) == 0 || getenv_quad("dcons.size", &dcons_size) == 0) dcons_addr = 0; /* * physmap is in bytes, so when converting to page boundaries, * round up the start address and round down the end address. */ for (i = 0; i <= physmap_idx; i += 2) { vm_paddr_t end; end = ptoa((vm_paddr_t)Maxmem); if (physmap[i + 1] < end) end = trunc_page(physmap[i + 1]); for (pa = round_page(physmap[i]); pa < end; pa += PAGE_SIZE) { int tmp, page_bad; int *ptr = (int *)CADDR1; /* * block out kernel memory as not available. */ if (pa >= KERNLOAD && pa < first) continue; /* * block out dcons buffer */ if (dcons_addr > 0 && pa >= trunc_page(dcons_addr) && pa < dcons_addr + dcons_size) continue; page_bad = FALSE; /* * map page into kernel: valid, read/write,non-cacheable */ *pte = pa | PG_V | PG_RW | PG_N; invltlb(); tmp = *(int *)ptr; /* * Test for alternating 1's and 0's */ *(volatile int *)ptr = 0xaaaaaaaa; if (*(volatile int *)ptr != 0xaaaaaaaa) { page_bad = TRUE; } /* * Test for alternating 0's and 1's */ *(volatile int *)ptr = 0x55555555; if (*(volatile int *)ptr != 0x55555555) { page_bad = TRUE; } /* * Test for all 1's */ *(volatile int *)ptr = 0xffffffff; if (*(volatile int *)ptr != 0xffffffff) { page_bad = TRUE; } /* * Test for all 0's */ *(volatile int *)ptr = 0x0; if (*(volatile int *)ptr != 0x0) { page_bad = TRUE; } /* * Restore original value. */ *(int *)ptr = tmp; /* * Adjust array of valid/good pages. */ if (page_bad == TRUE) { continue; } /* * If this good page is a continuation of the * previous set of good pages, then just increase * the end pointer. Otherwise start a new chunk. * Note that "end" points one higher than end, * making the range >= start and < end. * If we're also doing a speculative memory * test and we at or past the end, bump up Maxmem * so that we keep going. The first bad page * will terminate the loop. */ if (phys_avail[pa_indx] == pa) { phys_avail[pa_indx] += PAGE_SIZE; } else { pa_indx++; if (pa_indx == PHYS_AVAIL_ARRAY_END) { printf( "Too many holes in the physical address space, giving up\n"); pa_indx--; break; } phys_avail[pa_indx++] = pa; /* start */ phys_avail[pa_indx] = pa + PAGE_SIZE; /* end */ } physmem++; } } *pte = 0; invltlb(); /* * XXX * The last chunk must contain at least one page plus the message * buffer to avoid complicating other code (message buffer address * calculation, etc.). */ while (phys_avail[pa_indx - 1] + PAGE_SIZE + round_page(MSGBUF_SIZE) >= phys_avail[pa_indx]) { physmem -= atop(phys_avail[pa_indx] - phys_avail[pa_indx - 1]); phys_avail[pa_indx--] = 0; phys_avail[pa_indx--] = 0; } Maxmem = atop(phys_avail[pa_indx]); /* Trim off space for the message buffer. */ phys_avail[pa_indx] -= round_page(MSGBUF_SIZE); avail_end = phys_avail[pa_indx]; } void init386(first) int first; { struct gate_descriptor *gdp; int gsel_tss, metadata_missing, off, x; struct pcpu *pc; thread0.td_kstack = proc0kstack; thread0.td_pcb = (struct pcb *) (thread0.td_kstack + KSTACK_PAGES * PAGE_SIZE) - 1; /* * This may be done better later if it gets more high level * components in it. If so just link td->td_proc here. */ proc_linkup(&proc0, &ksegrp0, &thread0); metadata_missing = 0; if (bootinfo.bi_modulep) { preload_metadata = (caddr_t)bootinfo.bi_modulep + KERNBASE; preload_bootstrap_relocate(KERNBASE); } else { metadata_missing = 1; } if (envmode == 1) kern_envp = static_env; else if (bootinfo.bi_envp) kern_envp = (caddr_t)bootinfo.bi_envp + KERNBASE; /* Init basic tunables, hz etc */ init_param1(); /* * Make gdt memory segments. All segments cover the full 4GB * of address space and permissions are enforced at page level. */ gdt_segs[GCODE_SEL].ssd_limit = atop(0 - 1); gdt_segs[GDATA_SEL].ssd_limit = atop(0 - 1); gdt_segs[GUCODE_SEL].ssd_limit = atop(0 - 1); gdt_segs[GUDATA_SEL].ssd_limit = atop(0 - 1); gdt_segs[GUFS_SEL].ssd_limit = atop(0 - 1); gdt_segs[GUGS_SEL].ssd_limit = atop(0 - 1); #ifdef SMP pc = &SMP_prvspace[0].pcpu; #else pc = &__pcpu; #endif gdt_segs[GPRIV_SEL].ssd_limit = atop(0 - 1); gdt_segs[GPRIV_SEL].ssd_base = (int) pc; gdt_segs[GPROC0_SEL].ssd_base = (int) &pc->pc_common_tss; for (x = 0; x < NGDT; x++) ssdtosd(&gdt_segs[x], &gdt[x].sd); r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1; r_gdt.rd_base = (int) gdt; lgdt(&r_gdt); pcpu_init(pc, 0, sizeof(struct pcpu)); PCPU_SET(prvspace, pc); PCPU_SET(curthread, &thread0); PCPU_SET(curpcb, thread0.td_pcb); /* * Initialize mutexes. * * icu_lock: in order to allow an interrupt to occur in a critical * section, to set pcpu->ipending (etc...) properly, we * must be able to get the icu lock, so it can't be * under witness. */ mutex_init(); mtx_init(&clock_lock, "clk", NULL, MTX_SPIN); mtx_init(&icu_lock, "icu", NULL, MTX_SPIN | MTX_NOWITNESS); /* make ldt memory segments */ ldt_segs[LUCODE_SEL].ssd_limit = atop(0 - 1); ldt_segs[LUDATA_SEL].ssd_limit = atop(0 - 1); for (x = 0; x < sizeof ldt_segs / sizeof ldt_segs[0]; x++) ssdtosd(&ldt_segs[x], &ldt[x].sd); _default_ldt = GSEL(GLDT_SEL, SEL_KPL); lldt(_default_ldt); PCPU_SET(currentldt, _default_ldt); /* exceptions */ for (x = 0; x < NIDT; x++) setidt(x, &IDTVEC(rsvd), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_DE, &IDTVEC(div), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_DB, &IDTVEC(dbg), SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_NMI, &IDTVEC(nmi), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_BP, &IDTVEC(bpt), SDT_SYS386IGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_OF, &IDTVEC(ofl), SDT_SYS386TGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_BR, &IDTVEC(bnd), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_UD, &IDTVEC(ill), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_NM, &IDTVEC(dna), SDT_SYS386TGT, SEL_KPL , GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_DF, 0, SDT_SYSTASKGT, SEL_KPL, GSEL(GPANIC_SEL, SEL_KPL)); setidt(IDT_FPUGP, &IDTVEC(fpusegm), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_TS, &IDTVEC(tss), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_NP, &IDTVEC(missing), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_SS, &IDTVEC(stk), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_GP, &IDTVEC(prot), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_PF, &IDTVEC(page), SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_MF, &IDTVEC(fpu), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_AC, &IDTVEC(align), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_MC, &IDTVEC(mchk), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_XF, &IDTVEC(xmm), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_SYSCALL, &IDTVEC(int0x80_syscall), SDT_SYS386TGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL)); r_idt.rd_limit = sizeof(idt0) - 1; r_idt.rd_base = (int) idt; lidt(&r_idt); /* * Initialize the console before we print anything out. */ cninit(); if (metadata_missing) printf("WARNING: loader(8) metadata is missing!\n"); #ifdef DEV_ISA elcr_probe(); atpic_startup(); #endif #ifdef DDB ksym_start = bootinfo.bi_symtab; ksym_end = bootinfo.bi_esymtab; #endif kdb_init(); #ifdef KDB if (boothowto & RB_KDB) kdb_enter("Boot flags requested debugger"); #endif finishidentcpu(); /* Final stage of CPU initialization */ setidt(IDT_UD, &IDTVEC(ill), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_GP, &IDTVEC(prot), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); initializecpu(); /* Initialize CPU registers */ /* make an initial tss so cpu can get interrupt stack on syscall! */ /* Note: -16 is so we can grow the trapframe if we came from vm86 */ PCPU_SET(common_tss.tss_esp0, thread0.td_kstack + KSTACK_PAGES * PAGE_SIZE - sizeof(struct pcb) - 16); PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL)); gsel_tss = GSEL(GPROC0_SEL, SEL_KPL); private_tss = 0; PCPU_SET(tss_gdt, &gdt[GPROC0_SEL].sd); PCPU_SET(common_tssd, *PCPU_GET(tss_gdt)); PCPU_SET(common_tss.tss_ioopt, (sizeof (struct i386tss)) << 16); ltr(gsel_tss); /* pointer to selector slot for %fs/%gs */ PCPU_SET(fsgs_gdt, &gdt[GUFS_SEL].sd); dblfault_tss.tss_esp = dblfault_tss.tss_esp0 = dblfault_tss.tss_esp1 = dblfault_tss.tss_esp2 = (int)&dblfault_stack[sizeof(dblfault_stack)]; dblfault_tss.tss_ss = dblfault_tss.tss_ss0 = dblfault_tss.tss_ss1 = dblfault_tss.tss_ss2 = GSEL(GDATA_SEL, SEL_KPL); #ifdef PAE dblfault_tss.tss_cr3 = (int)IdlePDPT; #else dblfault_tss.tss_cr3 = (int)IdlePTD; #endif dblfault_tss.tss_eip = (int)dblfault_handler; dblfault_tss.tss_eflags = PSL_KERNEL; dblfault_tss.tss_ds = dblfault_tss.tss_es = dblfault_tss.tss_gs = GSEL(GDATA_SEL, SEL_KPL); dblfault_tss.tss_fs = GSEL(GPRIV_SEL, SEL_KPL); dblfault_tss.tss_cs = GSEL(GCODE_SEL, SEL_KPL); dblfault_tss.tss_ldt = GSEL(GLDT_SEL, SEL_KPL); vm86_initialize(); getmemsize(first); init_param2(physmem); /* now running on new page tables, configured,and u/iom is accessible */ /* Map the message buffer. */ for (off = 0; off < round_page(MSGBUF_SIZE); off += PAGE_SIZE) pmap_kenter((vm_offset_t)msgbufp + off, avail_end + off); msgbufinit(msgbufp, MSGBUF_SIZE); /* make a call gate to reenter kernel with */ gdp = &ldt[LSYS5CALLS_SEL].gd; x = (int) &IDTVEC(lcall_syscall); gdp->gd_looffset = x; gdp->gd_selector = GSEL(GCODE_SEL,SEL_KPL); gdp->gd_stkcpy = 1; gdp->gd_type = SDT_SYS386CGT; gdp->gd_dpl = SEL_UPL; gdp->gd_p = 1; gdp->gd_hioffset = x >> 16; /* XXX does this work? */ /* XXX yes! */ ldt[LBSDICALLS_SEL] = ldt[LSYS5CALLS_SEL]; ldt[LSOL26CALLS_SEL] = ldt[LSYS5CALLS_SEL]; /* transfer to user mode */ _ucodesel = GSEL(GUCODE_SEL, SEL_UPL); _udatasel = GSEL(GUDATA_SEL, SEL_UPL); /* setup proc 0's pcb */ thread0.td_pcb->pcb_flags = 0; /* XXXKSE */ #ifdef PAE thread0.td_pcb->pcb_cr3 = (int)IdlePDPT; #else thread0.td_pcb->pcb_cr3 = (int)IdlePTD; #endif thread0.td_pcb->pcb_ext = 0; thread0.td_frame = &proc0_tf; } void cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size) { pcpu->pc_acpi_id = 0xffffffff; } void spinlock_enter(void) { struct thread *td; td = curthread; if (td->td_md.md_spinlock_count == 0) td->td_md.md_saved_flags = intr_disable(); td->td_md.md_spinlock_count++; critical_enter(); } void spinlock_exit(void) { struct thread *td; td = curthread; critical_exit(); td->td_md.md_spinlock_count--; if (td->td_md.md_spinlock_count == 0) intr_restore(td->td_md.md_saved_flags); } #if defined(I586_CPU) && !defined(NO_F00F_HACK) static void f00f_hack(void *unused); SYSINIT(f00f_hack, SI_SUB_INTRINSIC, SI_ORDER_FIRST, f00f_hack, NULL) static void f00f_hack(void *unused) { struct gate_descriptor *new_idt; vm_offset_t tmp; if (!has_f00f_bug) return; GIANT_REQUIRED; printf("Intel Pentium detected, installing workaround for F00F bug\n"); tmp = kmem_alloc(kernel_map, PAGE_SIZE * 2); if (tmp == 0) panic("kmem_alloc returned 0"); /* Put the problematic entry (#6) at the end of the lower page. */ new_idt = (struct gate_descriptor*) (tmp + PAGE_SIZE - 7 * sizeof(struct gate_descriptor)); bcopy(idt, new_idt, sizeof(idt0)); r_idt.rd_base = (u_int)new_idt; lidt(&r_idt); idt = new_idt; if (vm_map_protect(kernel_map, tmp, tmp + PAGE_SIZE, VM_PROT_READ, FALSE) != KERN_SUCCESS) panic("vm_map_protect failed"); } #endif /* defined(I586_CPU) && !NO_F00F_HACK */ /* * Construct a PCB from a trapframe. This is called from kdb_trap() where * we want to start a backtrace from the function that caused us to enter * the debugger. We have the context in the trapframe, but base the trace * on the PCB. The PCB doesn't have to be perfect, as long as it contains * enough for a backtrace. */ void makectx(struct trapframe *tf, struct pcb *pcb) { pcb->pcb_edi = tf->tf_edi; pcb->pcb_esi = tf->tf_esi; pcb->pcb_ebp = tf->tf_ebp; pcb->pcb_ebx = tf->tf_ebx; pcb->pcb_eip = tf->tf_eip; pcb->pcb_esp = (ISPL(tf->tf_cs)) ? tf->tf_esp : (int)(tf + 1) - 8; } int ptrace_set_pc(struct thread *td, u_long addr) { td->td_frame->tf_eip = addr; return (0); } int ptrace_single_step(struct thread *td) { td->td_frame->tf_eflags |= PSL_T; return (0); } int ptrace_clear_single_step(struct thread *td) { td->td_frame->tf_eflags &= ~PSL_T; return (0); } int fill_regs(struct thread *td, struct reg *regs) { struct pcb *pcb; struct trapframe *tp; tp = td->td_frame; regs->r_fs = tp->tf_fs; regs->r_es = tp->tf_es; regs->r_ds = tp->tf_ds; regs->r_edi = tp->tf_edi; regs->r_esi = tp->tf_esi; regs->r_ebp = tp->tf_ebp; regs->r_ebx = tp->tf_ebx; regs->r_edx = tp->tf_edx; regs->r_ecx = tp->tf_ecx; regs->r_eax = tp->tf_eax; regs->r_eip = tp->tf_eip; regs->r_cs = tp->tf_cs; regs->r_eflags = tp->tf_eflags; regs->r_esp = tp->tf_esp; regs->r_ss = tp->tf_ss; pcb = td->td_pcb; regs->r_gs = pcb->pcb_gs; return (0); } int set_regs(struct thread *td, struct reg *regs) { struct pcb *pcb; struct trapframe *tp; tp = td->td_frame; if (!EFL_SECURE(regs->r_eflags, tp->tf_eflags) || !CS_SECURE(regs->r_cs)) return (EINVAL); tp->tf_fs = regs->r_fs; tp->tf_es = regs->r_es; tp->tf_ds = regs->r_ds; tp->tf_edi = regs->r_edi; tp->tf_esi = regs->r_esi; tp->tf_ebp = regs->r_ebp; tp->tf_ebx = regs->r_ebx; tp->tf_edx = regs->r_edx; tp->tf_ecx = regs->r_ecx; tp->tf_eax = regs->r_eax; tp->tf_eip = regs->r_eip; tp->tf_cs = regs->r_cs; tp->tf_eflags = regs->r_eflags; tp->tf_esp = regs->r_esp; tp->tf_ss = regs->r_ss; pcb = td->td_pcb; pcb->pcb_gs = regs->r_gs; return (0); } #ifdef CPU_ENABLE_SSE static void fill_fpregs_xmm(sv_xmm, sv_87) struct savexmm *sv_xmm; struct save87 *sv_87; { register struct env87 *penv_87 = &sv_87->sv_env; register struct envxmm *penv_xmm = &sv_xmm->sv_env; int i; bzero(sv_87, sizeof(*sv_87)); /* FPU control/status */ penv_87->en_cw = penv_xmm->en_cw; penv_87->en_sw = penv_xmm->en_sw; penv_87->en_tw = penv_xmm->en_tw; penv_87->en_fip = penv_xmm->en_fip; penv_87->en_fcs = penv_xmm->en_fcs; penv_87->en_opcode = penv_xmm->en_opcode; penv_87->en_foo = penv_xmm->en_foo; penv_87->en_fos = penv_xmm->en_fos; /* FPU registers */ for (i = 0; i < 8; ++i) sv_87->sv_ac[i] = sv_xmm->sv_fp[i].fp_acc; } static void set_fpregs_xmm(sv_87, sv_xmm) struct save87 *sv_87; struct savexmm *sv_xmm; { register struct env87 *penv_87 = &sv_87->sv_env; register struct envxmm *penv_xmm = &sv_xmm->sv_env; int i; /* FPU control/status */ penv_xmm->en_cw = penv_87->en_cw; penv_xmm->en_sw = penv_87->en_sw; penv_xmm->en_tw = penv_87->en_tw; penv_xmm->en_fip = penv_87->en_fip; penv_xmm->en_fcs = penv_87->en_fcs; penv_xmm->en_opcode = penv_87->en_opcode; penv_xmm->en_foo = penv_87->en_foo; penv_xmm->en_fos = penv_87->en_fos; /* FPU registers */ for (i = 0; i < 8; ++i) sv_xmm->sv_fp[i].fp_acc = sv_87->sv_ac[i]; } #endif /* CPU_ENABLE_SSE */ int fill_fpregs(struct thread *td, struct fpreg *fpregs) { #ifdef CPU_ENABLE_SSE if (cpu_fxsr) { fill_fpregs_xmm(&td->td_pcb->pcb_save.sv_xmm, (struct save87 *)fpregs); return (0); } #endif /* CPU_ENABLE_SSE */ bcopy(&td->td_pcb->pcb_save.sv_87, fpregs, sizeof *fpregs); return (0); } int set_fpregs(struct thread *td, struct fpreg *fpregs) { #ifdef CPU_ENABLE_SSE if (cpu_fxsr) { set_fpregs_xmm((struct save87 *)fpregs, &td->td_pcb->pcb_save.sv_xmm); return (0); } #endif /* CPU_ENABLE_SSE */ bcopy(fpregs, &td->td_pcb->pcb_save.sv_87, sizeof *fpregs); return (0); } /* * Get machine context. */ int get_mcontext(struct thread *td, mcontext_t *mcp, int flags) { struct trapframe *tp; tp = td->td_frame; PROC_LOCK(curthread->td_proc); mcp->mc_onstack = sigonstack(tp->tf_esp); PROC_UNLOCK(curthread->td_proc); mcp->mc_gs = td->td_pcb->pcb_gs; mcp->mc_fs = tp->tf_fs; mcp->mc_es = tp->tf_es; mcp->mc_ds = tp->tf_ds; mcp->mc_edi = tp->tf_edi; mcp->mc_esi = tp->tf_esi; mcp->mc_ebp = tp->tf_ebp; mcp->mc_isp = tp->tf_isp; if (flags & GET_MC_CLEAR_RET) { mcp->mc_eax = 0; mcp->mc_edx = 0; } else { mcp->mc_eax = tp->tf_eax; mcp->mc_edx = tp->tf_edx; } mcp->mc_ebx = tp->tf_ebx; mcp->mc_ecx = tp->tf_ecx; mcp->mc_eip = tp->tf_eip; mcp->mc_cs = tp->tf_cs; mcp->mc_eflags = tp->tf_eflags; mcp->mc_esp = tp->tf_esp; mcp->mc_ss = tp->tf_ss; mcp->mc_len = sizeof(*mcp); get_fpcontext(td, mcp); return (0); } /* * Set machine context. * * However, we don't set any but the user modifiable flags, and we won't * touch the cs selector. */ int set_mcontext(struct thread *td, const mcontext_t *mcp) { struct trapframe *tp; int eflags, ret; tp = td->td_frame; if (mcp->mc_len != sizeof(*mcp)) return (EINVAL); eflags = (mcp->mc_eflags & PSL_USERCHANGE) | (tp->tf_eflags & ~PSL_USERCHANGE); if ((ret = set_fpcontext(td, mcp)) == 0) { tp->tf_fs = mcp->mc_fs; tp->tf_es = mcp->mc_es; tp->tf_ds = mcp->mc_ds; tp->tf_edi = mcp->mc_edi; tp->tf_esi = mcp->mc_esi; tp->tf_ebp = mcp->mc_ebp; tp->tf_ebx = mcp->mc_ebx; tp->tf_edx = mcp->mc_edx; tp->tf_ecx = mcp->mc_ecx; tp->tf_eax = mcp->mc_eax; tp->tf_eip = mcp->mc_eip; tp->tf_eflags = eflags; tp->tf_esp = mcp->mc_esp; tp->tf_ss = mcp->mc_ss; td->td_pcb->pcb_gs = mcp->mc_gs; ret = 0; } return (ret); } static void get_fpcontext(struct thread *td, mcontext_t *mcp) { #ifndef DEV_NPX mcp->mc_fpformat = _MC_FPFMT_NODEV; mcp->mc_ownedfp = _MC_FPOWNED_NONE; #else union savefpu *addr; /* * XXX mc_fpstate might be misaligned, since its declaration is not * unportabilized using __attribute__((aligned(16))) like the * declaration of struct savemm, and anyway, alignment doesn't work * for auto variables since we don't use gcc's pessimal stack * alignment. Work around this by abusing the spare fields after * mcp->mc_fpstate. * * XXX unpessimize most cases by only aligning when fxsave might be * called, although this requires knowing too much about * npxgetregs()'s internals. */ addr = (union savefpu *)&mcp->mc_fpstate; if (td == PCPU_GET(fpcurthread) && #ifdef CPU_ENABLE_SSE cpu_fxsr && #endif ((uintptr_t)(void *)addr & 0xF)) { do addr = (void *)((char *)addr + 4); while ((uintptr_t)(void *)addr & 0xF); } mcp->mc_ownedfp = npxgetregs(td, addr); if (addr != (union savefpu *)&mcp->mc_fpstate) { bcopy(addr, &mcp->mc_fpstate, sizeof(mcp->mc_fpstate)); bzero(&mcp->mc_spare2, sizeof(mcp->mc_spare2)); } mcp->mc_fpformat = npxformat(); #endif } static int set_fpcontext(struct thread *td, const mcontext_t *mcp) { union savefpu *addr; if (mcp->mc_fpformat == _MC_FPFMT_NODEV) return (0); else if (mcp->mc_fpformat != _MC_FPFMT_387 && mcp->mc_fpformat != _MC_FPFMT_XMM) return (EINVAL); else if (mcp->mc_ownedfp == _MC_FPOWNED_NONE) /* We don't care what state is left in the FPU or PCB. */ fpstate_drop(td); else if (mcp->mc_ownedfp == _MC_FPOWNED_FPU || mcp->mc_ownedfp == _MC_FPOWNED_PCB) { /* XXX align as above. */ addr = (union savefpu *)&mcp->mc_fpstate; if (td == PCPU_GET(fpcurthread) && #ifdef CPU_ENABLE_SSE cpu_fxsr && #endif ((uintptr_t)(void *)addr & 0xF)) { do addr = (void *)((char *)addr + 4); while ((uintptr_t)(void *)addr & 0xF); bcopy(&mcp->mc_fpstate, addr, sizeof(mcp->mc_fpstate)); } #ifdef DEV_NPX /* * XXX we violate the dubious requirement that npxsetregs() * be called with interrupts disabled. */ npxsetregs(td, addr); #endif /* * Don't bother putting things back where they were in the * misaligned case, since we know that the caller won't use * them again. */ } else return (EINVAL); return (0); } static void fpstate_drop(struct thread *td) { register_t s; s = intr_disable(); #ifdef DEV_NPX if (PCPU_GET(fpcurthread) == td) npxdrop(); #endif /* * XXX force a full drop of the npx. The above only drops it if we * owned it. npxgetregs() has the same bug in the !cpu_fxsr case. * * XXX I don't much like npxgetregs()'s semantics of doing a full * drop. Dropping only to the pcb matches fnsave's behaviour. * We only need to drop to !PCB_INITDONE in sendsig(). But * sendsig() is the only caller of npxgetregs()... perhaps we just * have too many layers. */ curthread->td_pcb->pcb_flags &= ~PCB_NPXINITDONE; intr_restore(s); } int fill_dbregs(struct thread *td, struct dbreg *dbregs) { struct pcb *pcb; if (td == NULL) { dbregs->dr[0] = rdr0(); dbregs->dr[1] = rdr1(); dbregs->dr[2] = rdr2(); dbregs->dr[3] = rdr3(); dbregs->dr[4] = rdr4(); dbregs->dr[5] = rdr5(); dbregs->dr[6] = rdr6(); dbregs->dr[7] = rdr7(); } else { pcb = td->td_pcb; dbregs->dr[0] = pcb->pcb_dr0; dbregs->dr[1] = pcb->pcb_dr1; dbregs->dr[2] = pcb->pcb_dr2; dbregs->dr[3] = pcb->pcb_dr3; dbregs->dr[4] = 0; dbregs->dr[5] = 0; dbregs->dr[6] = pcb->pcb_dr6; dbregs->dr[7] = pcb->pcb_dr7; } return (0); } int set_dbregs(struct thread *td, struct dbreg *dbregs) { struct pcb *pcb; int i; u_int32_t mask1, mask2; if (td == NULL) { load_dr0(dbregs->dr[0]); load_dr1(dbregs->dr[1]); load_dr2(dbregs->dr[2]); load_dr3(dbregs->dr[3]); load_dr4(dbregs->dr[4]); load_dr5(dbregs->dr[5]); load_dr6(dbregs->dr[6]); load_dr7(dbregs->dr[7]); } else { /* * Don't let an illegal value for dr7 get set. Specifically, * check for undefined settings. Setting these bit patterns * result in undefined behaviour and can lead to an unexpected * TRCTRAP. */ for (i = 0, mask1 = 0x3<<16, mask2 = 0x2<<16; i < 8; i++, mask1 <<= 2, mask2 <<= 2) if ((dbregs->dr[7] & mask1) == mask2) return (EINVAL); pcb = td->td_pcb; /* * Don't let a process set a breakpoint that is not within the * process's address space. If a process could do this, it * could halt the system by setting a breakpoint in the kernel * (if ddb was enabled). Thus, we need to check to make sure * that no breakpoints are being enabled for addresses outside * process's address space, unless, perhaps, we were called by * uid 0. * * XXX - what about when the watched area of the user's * address space is written into from within the kernel * ... wouldn't that still cause a breakpoint to be generated * from within kernel mode? */ if (suser(td) != 0) { if (dbregs->dr[7] & 0x3) { /* dr0 is enabled */ if (dbregs->dr[0] >= VM_MAXUSER_ADDRESS) return (EINVAL); } if (dbregs->dr[7] & (0x3<<2)) { /* dr1 is enabled */ if (dbregs->dr[1] >= VM_MAXUSER_ADDRESS) return (EINVAL); } if (dbregs->dr[7] & (0x3<<4)) { /* dr2 is enabled */ if (dbregs->dr[2] >= VM_MAXUSER_ADDRESS) return (EINVAL); } if (dbregs->dr[7] & (0x3<<6)) { /* dr3 is enabled */ if (dbregs->dr[3] >= VM_MAXUSER_ADDRESS) return (EINVAL); } } pcb->pcb_dr0 = dbregs->dr[0]; pcb->pcb_dr1 = dbregs->dr[1]; pcb->pcb_dr2 = dbregs->dr[2]; pcb->pcb_dr3 = dbregs->dr[3]; pcb->pcb_dr6 = dbregs->dr[6]; pcb->pcb_dr7 = dbregs->dr[7]; pcb->pcb_flags |= PCB_DBREGS; } return (0); } /* * Return > 0 if a hardware breakpoint has been hit, and the * breakpoint was in user space. Return 0, otherwise. */ int user_dbreg_trap(void) { u_int32_t dr7, dr6; /* debug registers dr6 and dr7 */ u_int32_t bp; /* breakpoint bits extracted from dr6 */ int nbp; /* number of breakpoints that triggered */ caddr_t addr[4]; /* breakpoint addresses */ int i; dr7 = rdr7(); if ((dr7 & 0x000000ff) == 0) { /* * all GE and LE bits in the dr7 register are zero, * thus the trap couldn't have been caused by the * hardware debug registers */ return 0; } nbp = 0; dr6 = rdr6(); bp = dr6 & 0x0000000f; if (!bp) { /* * None of the breakpoint bits are set meaning this * trap was not caused by any of the debug registers */ return 0; } /* * at least one of the breakpoints were hit, check to see * which ones and if any of them are user space addresses */ if (bp & 0x01) { addr[nbp++] = (caddr_t)rdr0(); } if (bp & 0x02) { addr[nbp++] = (caddr_t)rdr1(); } if (bp & 0x04) { addr[nbp++] = (caddr_t)rdr2(); } if (bp & 0x08) { addr[nbp++] = (caddr_t)rdr3(); } for (i=0; i /* * Provide stub functions so that the MADT APIC enumerator in the acpi * kernel module will link against a kernel without 'device apic'. * * XXX - This is a gross hack. */ void apic_register_enumerator(struct apic_enumerator *enumerator) { } void * ioapic_create(uintptr_t addr, int32_t id, int intbase) { return (NULL); } int ioapic_disable_pin(void *cookie, u_int pin) { return (ENXIO); } -void -ioapic_enable_mixed_mode(void) -{ -} - int ioapic_get_vector(void *cookie, u_int pin) { return (-1); } void ioapic_register(void *cookie) { } int ioapic_remap_vector(void *cookie, u_int pin, int vector) { return (ENXIO); } int ioapic_set_extint(void *cookie, u_int pin) { return (ENXIO); } int ioapic_set_nmi(void *cookie, u_int pin) { return (ENXIO); } int ioapic_set_polarity(void *cookie, u_int pin, enum intr_polarity pol) { return (ENXIO); } int ioapic_set_triggermode(void *cookie, u_int pin, enum intr_trigger trigger) { return (ENXIO); } void lapic_create(u_int apic_id, int boot_cpu) { } void lapic_init(uintptr_t addr) { } int lapic_set_lvt_mode(u_int apic_id, u_int lvt, u_int32_t mode) { return (ENXIO); } int lapic_set_lvt_polarity(u_int apic_id, u_int lvt, enum intr_polarity pol) { return (ENXIO); } int lapic_set_lvt_triggermode(u_int apic_id, u_int lvt, enum intr_trigger trigger) { return (ENXIO); } #endif #ifdef KDB /* * Provide inb() and outb() as functions. They are normally only * available as macros calling inlined functions, thus cannot be * called from the debugger. * * The actual code is stolen from , and de-inlined. */ #undef inb #undef outb /* silence compiler warnings */ u_char inb(u_int); void outb(u_int, u_char); u_char inb(u_int port) { u_char data; /* * We use %%dx and not %1 here because i/o is done at %dx and not at * %edx, while gcc generates inferior code (movw instead of movl) * if we tell it to load (u_short) port. */ __asm __volatile("inb %%dx,%0" : "=a" (data) : "d" (port)); return (data); } void outb(u_int port, u_char data) { u_char al; /* * Use an unnecessary assignment to help gcc's register allocator. * This make a large difference for gcc-1.40 and a tiny difference * for gcc-2.6.0. For gcc-1.40, al had to be ``asm("ax")'' for * best results. gcc-2.6.0 can't handle this. */ al = data; __asm __volatile("outb %0,%%dx" : : "a" (al), "d" (port)); } #endif /* KDB */ Index: head/sys/i386/i386/mptable.c =================================================================== --- head/sys/i386/i386/mptable.c (revision 145079) +++ head/sys/i386/i386/mptable.c (revision 145080) @@ -1,1048 +1,1047 @@ /*- * Copyright (c) 2003 John Baldwin * Copyright (c) 1996, by Steve Passe * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. The name of the developer may NOT be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_mptable_force_htt.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* string defined by the Intel MP Spec as identifying the MP table */ #define MP_SIG 0x5f504d5f /* _MP_ */ #define NAPICID 32 /* Max number of APIC's */ #ifdef PC98 #define BIOS_BASE (0xe8000) #define BIOS_SIZE (0x18000) #else #define BIOS_BASE (0xf0000) #define BIOS_SIZE (0x10000) #endif #define BIOS_COUNT (BIOS_SIZE/4) typedef void mptable_entry_handler(u_char *entry, void *arg); static basetable_entry basetable_entry_types[] = { {0, 20, "Processor"}, {1, 8, "Bus"}, {2, 8, "I/O APIC"}, {3, 8, "I/O INT"}, {4, 8, "Local INT"} }; typedef struct BUSDATA { u_char bus_id; enum busTypes bus_type; } bus_datum; typedef struct INTDATA { u_char int_type; u_short int_flags; u_char src_bus_id; u_char src_bus_irq; u_char dst_apic_id; u_char dst_apic_int; u_char int_vector; } io_int, local_int; typedef struct BUSTYPENAME { u_char type; char name[7]; } bus_type_name; /* From MP spec v1.4, table 4-8. */ static bus_type_name bus_type_table[] = { {UNKNOWN_BUSTYPE, "CBUS "}, {UNKNOWN_BUSTYPE, "CBUSII"}, {EISA, "EISA "}, {UNKNOWN_BUSTYPE, "FUTURE"}, {UNKNOWN_BUSTYPE, "INTERN"}, {ISA, "ISA "}, {UNKNOWN_BUSTYPE, "MBI "}, {UNKNOWN_BUSTYPE, "MBII "}, {MCA, "MCA "}, {UNKNOWN_BUSTYPE, "MPI "}, {UNKNOWN_BUSTYPE, "MPSA "}, {UNKNOWN_BUSTYPE, "NUBUS "}, {PCI, "PCI "}, {UNKNOWN_BUSTYPE, "PCMCIA"}, {UNKNOWN_BUSTYPE, "TC "}, {UNKNOWN_BUSTYPE, "VL "}, {UNKNOWN_BUSTYPE, "VME "}, {UNKNOWN_BUSTYPE, "XPRESS"} }; /* From MP spec v1.4, table 5-1. */ static int default_data[7][5] = { /* nbus, id0, type0, id1, type1 */ {1, 0, ISA, 255, NOBUS}, {1, 0, EISA, 255, NOBUS}, {1, 0, EISA, 255, NOBUS}, {1, 0, MCA, 255, NOBUS}, {2, 0, ISA, 1, PCI}, {2, 0, EISA, 1, PCI}, {2, 0, MCA, 1, PCI} }; struct pci_probe_table_args { u_char bus; u_char found; }; struct pci_route_interrupt_args { u_char bus; /* Source bus. */ u_char irq; /* Source slot:pin. */ int vector; /* Return value. */ }; static mpfps_t mpfps; static mpcth_t mpct; static void *ioapics[NAPICID]; static bus_datum *busses; static int mptable_nioapics, mptable_nbusses, mptable_maxbusid; static int pci0 = -1; static MALLOC_DEFINE(M_MPTABLE, "MP Table", "MP Table Items"); static enum intr_polarity conforming_polarity(u_char src_bus, u_char src_bus_irq); static enum intr_trigger conforming_trigger(u_char src_bus, u_char src_bus_irq); static enum intr_polarity intentry_polarity(int_entry_ptr intr); static enum intr_trigger intentry_trigger(int_entry_ptr intr); static int lookup_bus_type(char *name); static void mptable_count_items(void); static void mptable_count_items_handler(u_char *entry, void *arg); #ifdef MPTABLE_FORCE_HTT static void mptable_hyperthread_fixup(u_int id_mask); #endif static void mptable_parse_apics_and_busses(void); static void mptable_parse_apics_and_busses_handler(u_char *entry, void *arg); static void mptable_parse_default_config_ints(void); static void mptable_parse_ints(void); static void mptable_parse_ints_handler(u_char *entry, void *arg); static void mptable_parse_io_int(int_entry_ptr intr); static void mptable_parse_local_int(int_entry_ptr intr); static void mptable_pci_probe_table_handler(u_char *entry, void *arg); static void mptable_pci_route_interrupt_handler(u_char *entry, void *arg); static void mptable_pci_setup(void); static int mptable_probe(void); static int mptable_probe_cpus(void); static void mptable_probe_cpus_handler(u_char *entry, void *arg __unused); static void mptable_register(void *dummy); static int mptable_setup_local(void); static int mptable_setup_io(void); static void mptable_walk_table(mptable_entry_handler *handler, void *arg); static int search_for_sig(u_int32_t target, int count); static struct apic_enumerator mptable_enumerator = { "MPTable", mptable_probe, mptable_probe_cpus, mptable_setup_local, mptable_setup_io }; /* * look for the MP spec signature */ static int search_for_sig(u_int32_t target, int count) { int x; u_int32_t *addr = (u_int32_t *) (KERNBASE + target); for (x = 0; x < count; x += 4) if (addr[x] == MP_SIG) /* make array index a byte index */ return (target + (x * sizeof(u_int32_t))); return (-1); } static int lookup_bus_type(char *name) { int x; for (x = 0; x < MAX_BUSTYPE; ++x) if (strncmp(bus_type_table[x].name, name, 6) == 0) return (bus_type_table[x].type); return (UNKNOWN_BUSTYPE); } /* * Look for an Intel MP spec table (ie, SMP capable hardware). */ static int mptable_probe(void) { int x; u_long segment; u_int32_t target; /* see if EBDA exists */ if ((segment = (u_long) * (u_short *) (KERNBASE + 0x40e)) != 0) { /* search first 1K of EBDA */ target = (u_int32_t) (segment << 4); if ((x = search_for_sig(target, 1024 / 4)) >= 0) goto found; } else { /* last 1K of base memory, effective 'top of base' passed in */ target = (u_int32_t) ((basemem * 1024) - 0x400); if ((x = search_for_sig(target, 1024 / 4)) >= 0) goto found; } /* search the BIOS */ target = (u_int32_t) BIOS_BASE; if ((x = search_for_sig(target, BIOS_COUNT)) >= 0) goto found; /* nothing found */ return (ENXIO); found: mpfps = (mpfps_t)(KERNBASE + x); /* Map in the configuration table if it exists. */ if (mpfps->config_type != 0) { if (bootverbose) printf( "MP Table version 1.%d found using Default Configuration %d\n", mpfps->spec_rev, mpfps->config_type); if (mpfps->config_type != 5 && mpfps->config_type != 6) { printf( "MP Table Default Configuration %d is unsupported\n", mpfps->config_type); return (ENXIO); } mpct = NULL; } else { if ((uintptr_t)mpfps->pap >= 1024 * 1024) { printf("%s: Unable to map MP Configuration Table\n", __func__); return (ENXIO); } mpct = (mpcth_t)(KERNBASE + (uintptr_t)mpfps->pap); if (mpct->base_table_length + (uintptr_t)mpfps->pap >= 1024 * 1024) { printf("%s: Unable to map end of MP Config Table\n", __func__); return (ENXIO); } if (mpct->signature[0] != 'P' || mpct->signature[1] != 'C' || mpct->signature[2] != 'M' || mpct->signature[3] != 'P') { printf("%s: MP Config Table has bad signature: %c%c%c%c\n", __func__, mpct->signature[0], mpct->signature[1], mpct->signature[2], mpct->signature[3]); return (ENXIO); } if (bootverbose) printf( "MP Configuration Table version 1.%d found at %p\n", mpct->spec_rev, mpct); } return (-100); } /* * Run through the MP table enumerating CPUs. */ static int mptable_probe_cpus(void) { u_int cpu_mask; /* Is this a pre-defined config? */ if (mpfps->config_type != 0) { lapic_create(0, 1); lapic_create(1, 0); } else { cpu_mask = 0; mptable_walk_table(mptable_probe_cpus_handler, &cpu_mask); #ifdef MPTABLE_FORCE_HTT mptable_hyperthread_fixup(cpu_mask); #endif } return (0); } /* * Initialize the local APIC on the BSP. */ static int mptable_setup_local(void) { /* Is this a pre-defined config? */ printf("MPTable: <"); if (mpfps->config_type != 0) { lapic_init(DEFAULT_APIC_BASE); printf("Default Configuration %d", mpfps->config_type); } else { lapic_init((uintptr_t)mpct->apic_address); printf("%.*s %.*s", (int)sizeof(mpct->oem_id), mpct->oem_id, (int)sizeof(mpct->product_id), mpct->product_id); } printf(">\n"); return (0); } /* * Run through the MP table enumerating I/O APICs. */ static int mptable_setup_io(void) { int i; u_char byte; /* First, we count individual items and allocate arrays. */ mptable_count_items(); busses = malloc((mptable_maxbusid + 1) * sizeof(bus_datum), M_MPTABLE, M_WAITOK); for (i = 0; i <= mptable_maxbusid; i++) busses[i].bus_type = NOBUS; /* Second, we run through adding I/O APIC's and busses. */ - ioapic_enable_mixed_mode(); mptable_parse_apics_and_busses(); /* Third, we run through the table tweaking interrupt sources. */ mptable_parse_ints(); /* Fourth, we register all the I/O APIC's. */ for (i = 0; i < NAPICID; i++) if (ioapics[i] != NULL) ioapic_register(ioapics[i]); /* Fifth, we setup data structures to handle PCI interrupt routing. */ mptable_pci_setup(); /* Finally, we throw the switch to enable the I/O APIC's. */ if (mpfps->mpfb2 & MPFB2_IMCR_PRESENT) { outb(0x22, 0x70); /* select IMCR */ byte = inb(0x23); /* current contents */ byte |= 0x01; /* mask external INTR */ outb(0x23, byte); /* disconnect 8259s/NMI */ } return (0); } static void mptable_register(void *dummy __unused) { apic_register_enumerator(&mptable_enumerator); } SYSINIT(mptable_register, SI_SUB_CPU - 1, SI_ORDER_FIRST, mptable_register, NULL) /* * Call the handler routine for each entry in the MP config table. */ static void mptable_walk_table(mptable_entry_handler *handler, void *arg) { u_int i; u_char *entry; entry = (u_char *)(mpct + 1); for (i = 0; i < mpct->entry_count; i++) { switch (*entry) { case MPCT_ENTRY_PROCESSOR: case MPCT_ENTRY_IOAPIC: case MPCT_ENTRY_BUS: case MPCT_ENTRY_INT: case MPCT_ENTRY_LOCAL_INT: break; default: panic("%s: Unknown MP Config Entry %d\n", __func__, (int)*entry); } handler(entry, arg); entry += basetable_entry_types[*entry].length; } } static void mptable_probe_cpus_handler(u_char *entry, void *arg) { proc_entry_ptr proc; u_int *cpu_mask; switch (*entry) { case MPCT_ENTRY_PROCESSOR: proc = (proc_entry_ptr)entry; if (proc->cpu_flags & PROCENTRY_FLAG_EN) { lapic_create(proc->apic_id, proc->cpu_flags & PROCENTRY_FLAG_BP); cpu_mask = (u_int *)arg; *cpu_mask |= (1 << proc->apic_id); } break; } } static void mptable_count_items_handler(u_char *entry, void *arg __unused) { io_apic_entry_ptr apic; bus_entry_ptr bus; switch (*entry) { case MPCT_ENTRY_BUS: bus = (bus_entry_ptr)entry; mptable_nbusses++; if (bus->bus_id > mptable_maxbusid) mptable_maxbusid = bus->bus_id; break; case MPCT_ENTRY_IOAPIC: apic = (io_apic_entry_ptr)entry; if (apic->apic_flags & IOAPICENTRY_FLAG_EN) mptable_nioapics++; break; } } /* * Count items in the table. */ static void mptable_count_items(void) { /* Is this a pre-defined config? */ if (mpfps->config_type != 0) { mptable_nioapics = 1; switch (mpfps->config_type) { case 1: case 2: case 3: case 4: mptable_nbusses = 1; break; case 5: case 6: case 7: mptable_nbusses = 2; break; default: panic("Unknown pre-defined MP Table config type %d", mpfps->config_type); } mptable_maxbusid = mptable_nbusses - 1; } else mptable_walk_table(mptable_count_items_handler, NULL); } /* * Add a bus or I/O APIC from an entry in the table. */ static void mptable_parse_apics_and_busses_handler(u_char *entry, void *arg __unused) { io_apic_entry_ptr apic; bus_entry_ptr bus; enum busTypes bus_type; int i; switch (*entry) { case MPCT_ENTRY_BUS: bus = (bus_entry_ptr)entry; bus_type = lookup_bus_type(bus->bus_type); if (bus_type == UNKNOWN_BUSTYPE) { printf("MPTable: Unknown bus %d type \"", bus->bus_id); for (i = 0; i < 6; i++) printf("%c", bus->bus_type[i]); printf("\"\n"); } busses[bus->bus_id].bus_id = bus->bus_id; busses[bus->bus_id].bus_type = bus_type; break; case MPCT_ENTRY_IOAPIC: apic = (io_apic_entry_ptr)entry; if (!(apic->apic_flags & IOAPICENTRY_FLAG_EN)) break; if (apic->apic_id >= NAPICID) panic("%s: I/O APIC ID %d too high", __func__, apic->apic_id); if (ioapics[apic->apic_id] != NULL) panic("%s: Double APIC ID %d", __func__, apic->apic_id); ioapics[apic->apic_id] = ioapic_create( (uintptr_t)apic->apic_address, apic->apic_id, -1); break; default: break; } } /* * Enumerate I/O APIC's and busses. */ static void mptable_parse_apics_and_busses(void) { /* Is this a pre-defined config? */ if (mpfps->config_type != 0) { ioapics[2] = ioapic_create(DEFAULT_IO_APIC_BASE, 2, 0); busses[0].bus_id = 0; busses[0].bus_type = default_data[mpfps->config_type - 1][2]; if (mptable_nbusses > 1) { busses[1].bus_id = 1; busses[1].bus_type = default_data[mpfps->config_type - 1][4]; } } else mptable_walk_table(mptable_parse_apics_and_busses_handler, NULL); } /* * Determine conforming polarity for a given bus type. */ static enum intr_polarity conforming_polarity(u_char src_bus, u_char src_bus_irq) { KASSERT(src_bus <= mptable_maxbusid, ("bus id %d too large", src_bus)); switch (busses[src_bus].bus_type) { case ISA: case EISA: return (INTR_POLARITY_HIGH); case PCI: return (INTR_POLARITY_LOW); default: panic("%s: unknown bus type %d", __func__, busses[src_bus].bus_type); } } /* * Determine conforming trigger for a given bus type. */ static enum intr_trigger conforming_trigger(u_char src_bus, u_char src_bus_irq) { KASSERT(src_bus <= mptable_maxbusid, ("bus id %d too large", src_bus)); switch (busses[src_bus].bus_type) { case ISA: #ifndef PC98 if (elcr_found) return (elcr_read_trigger(src_bus_irq)); else #endif return (INTR_TRIGGER_EDGE); case PCI: return (INTR_TRIGGER_LEVEL); #ifndef PC98 case EISA: KASSERT(src_bus_irq < 16, ("Invalid EISA IRQ %d", src_bus_irq)); KASSERT(elcr_found, ("Missing ELCR")); return (elcr_read_trigger(src_bus_irq)); #endif default: panic("%s: unknown bus type %d", __func__, busses[src_bus].bus_type); } } static enum intr_polarity intentry_polarity(int_entry_ptr intr) { switch (intr->int_flags & INTENTRY_FLAGS_POLARITY) { case INTENTRY_FLAGS_POLARITY_CONFORM: return (conforming_polarity(intr->src_bus_id, intr->src_bus_irq)); case INTENTRY_FLAGS_POLARITY_ACTIVEHI: return (INTR_POLARITY_HIGH); case INTENTRY_FLAGS_POLARITY_ACTIVELO: return (INTR_POLARITY_LOW); default: panic("Bogus interrupt flags"); } } static enum intr_trigger intentry_trigger(int_entry_ptr intr) { switch (intr->int_flags & INTENTRY_FLAGS_TRIGGER) { case INTENTRY_FLAGS_TRIGGER_CONFORM: return (conforming_trigger(intr->src_bus_id, intr->src_bus_irq)); case INTENTRY_FLAGS_TRIGGER_EDGE: return (INTR_TRIGGER_EDGE); case INTENTRY_FLAGS_TRIGGER_LEVEL: return (INTR_TRIGGER_LEVEL); default: panic("Bogus interrupt flags"); } } /* * Parse an interrupt entry for an I/O interrupt routed to a pin on an I/O APIC. */ static void mptable_parse_io_int(int_entry_ptr intr) { void *ioapic; u_int pin, apic_id; apic_id = intr->dst_apic_id; if (intr->dst_apic_id == 0xff) { /* * An APIC ID of 0xff means that the interrupt is connected * to the specified pin on all I/O APICs in the system. If * there is only one I/O APIC, then use that APIC to route * the interrupts. If there is more than one I/O APIC, then * punt. */ if (mptable_nioapics == 1) { apic_id = 0; while (ioapics[apic_id] == NULL) apic_id++; } else { printf( "MPTable: Ignoring global interrupt entry for pin %d\n", intr->dst_apic_int); return; } } if (apic_id >= NAPICID) { printf("MPTable: Ignoring interrupt entry for ioapic%d\n", intr->dst_apic_id); return; } ioapic = ioapics[apic_id]; if (ioapic == NULL) { printf( "MPTable: Ignoring interrupt entry for missing ioapic%d\n", apic_id); return; } pin = intr->dst_apic_int; switch (intr->int_type) { case INTENTRY_TYPE_INT: switch (busses[intr->src_bus_id].bus_type) { case NOBUS: panic("interrupt from missing bus"); case ISA: case EISA: if (busses[intr->src_bus_id].bus_type == ISA) ioapic_set_bus(ioapic, pin, APIC_BUS_ISA); else ioapic_set_bus(ioapic, pin, APIC_BUS_EISA); if (intr->src_bus_irq == pin) break; ioapic_remap_vector(ioapic, pin, intr->src_bus_irq); if (ioapic_get_vector(ioapic, intr->src_bus_irq) == intr->src_bus_irq) ioapic_disable_pin(ioapic, intr->src_bus_irq); break; case PCI: ioapic_set_bus(ioapic, pin, APIC_BUS_PCI); break; default: ioapic_set_bus(ioapic, pin, APIC_BUS_UNKNOWN); break; } break; case INTENTRY_TYPE_NMI: ioapic_set_nmi(ioapic, pin); break; case INTENTRY_TYPE_SMI: ioapic_set_smi(ioapic, pin); break; case INTENTRY_TYPE_EXTINT: ioapic_set_extint(ioapic, pin); break; default: panic("%s: invalid interrupt entry type %d\n", __func__, intr->int_type); } if (intr->int_type == INTENTRY_TYPE_INT || (intr->int_flags & INTENTRY_FLAGS_TRIGGER) != INTENTRY_FLAGS_TRIGGER_CONFORM) ioapic_set_triggermode(ioapic, pin, intentry_trigger(intr)); if (intr->int_type == INTENTRY_TYPE_INT || (intr->int_flags & INTENTRY_FLAGS_POLARITY) != INTENTRY_FLAGS_POLARITY_CONFORM) ioapic_set_polarity(ioapic, pin, intentry_polarity(intr)); } /* * Parse an interrupt entry for a local APIC LVT pin. */ static void mptable_parse_local_int(int_entry_ptr intr) { u_int apic_id, pin; if (intr->dst_apic_id == 0xff) apic_id = APIC_ID_ALL; else apic_id = intr->dst_apic_id; if (intr->dst_apic_int == 0) pin = LVT_LINT0; else pin = LVT_LINT1; switch (intr->int_type) { case INTENTRY_TYPE_INT: #if 1 printf( "MPTable: Ignoring vectored local interrupt for LINTIN%d vector %d\n", intr->dst_apic_int, intr->src_bus_irq); return; #else lapic_set_lvt_mode(apic_id, pin, APIC_LVT_DM_FIXED); break; #endif case INTENTRY_TYPE_NMI: lapic_set_lvt_mode(apic_id, pin, APIC_LVT_DM_NMI); break; case INTENTRY_TYPE_SMI: lapic_set_lvt_mode(apic_id, pin, APIC_LVT_DM_SMI); break; case INTENTRY_TYPE_EXTINT: lapic_set_lvt_mode(apic_id, pin, APIC_LVT_DM_EXTINT); break; default: panic("%s: invalid interrupt entry type %d\n", __func__, intr->int_type); } if ((intr->int_flags & INTENTRY_FLAGS_TRIGGER) != INTENTRY_FLAGS_TRIGGER_CONFORM) lapic_set_lvt_triggermode(apic_id, pin, intentry_trigger(intr)); if ((intr->int_flags & INTENTRY_FLAGS_POLARITY) != INTENTRY_FLAGS_POLARITY_CONFORM) lapic_set_lvt_polarity(apic_id, pin, intentry_polarity(intr)); } /* * Parse interrupt entries. */ static void mptable_parse_ints_handler(u_char *entry, void *arg __unused) { int_entry_ptr intr; intr = (int_entry_ptr)entry; switch (*entry) { case MPCT_ENTRY_INT: mptable_parse_io_int(intr); break; case MPCT_ENTRY_LOCAL_INT: mptable_parse_local_int(intr); break; } } /* * Configure interrupt pins for a default configuration. For details see * Table 5-2 in Section 5 of the MP Table specification. */ static void mptable_parse_default_config_ints(void) { struct INTENTRY entry; int pin; /* * All default configs route IRQs from bus 0 to the first 16 pins * of the first I/O APIC with an APIC ID of 2. */ entry.type = MPCT_ENTRY_INT; entry.int_flags = INTENTRY_FLAGS_POLARITY_CONFORM | INTENTRY_FLAGS_TRIGGER_CONFORM; entry.src_bus_id = 0; entry.dst_apic_id = 2; /* Run through all 16 pins. */ for (pin = 0; pin < 16; pin++) { entry.dst_apic_int = pin; switch (pin) { case 0: /* Pin 0 is an ExtINT pin. */ entry.int_type = INTENTRY_TYPE_EXTINT; break; case 2: /* IRQ 0 is routed to pin 2. */ entry.int_type = INTENTRY_TYPE_INT; entry.src_bus_irq = 0; break; default: /* All other pins are identity mapped. */ entry.int_type = INTENTRY_TYPE_INT; entry.src_bus_irq = pin; break; } mptable_parse_io_int(&entry); } /* Certain configs disable certain pins. */ if (mpfps->config_type == 7) ioapic_disable_pin(ioapics[2], 0); if (mpfps->config_type == 2) { ioapic_disable_pin(ioapics[2], 2); ioapic_disable_pin(ioapics[2], 13); } } /* * Configure the interrupt pins */ static void mptable_parse_ints(void) { /* Is this a pre-defined config? */ if (mpfps->config_type != 0) { /* Configure LINT pins. */ lapic_set_lvt_mode(APIC_ID_ALL, LVT_LINT0, APIC_LVT_DM_EXTINT); lapic_set_lvt_mode(APIC_ID_ALL, LVT_LINT1, APIC_LVT_DM_NMI); /* Configure I/O APIC pins. */ mptable_parse_default_config_ints(); } else mptable_walk_table(mptable_parse_ints_handler, NULL); } #ifdef MPTABLE_FORCE_HTT /* * Perform a hyperthreading "fix-up" to enumerate any logical CPU's * that aren't already listed in the table. * * XXX: We assume that all of the physical CPUs in the * system have the same number of logical CPUs. * * XXX: We assume that APIC ID's are allocated such that * the APIC ID's for a physical processor are aligned * with the number of logical CPU's in the processor. */ static void mptable_hyperthread_fixup(u_int id_mask) { u_int i, id, logical_cpus; /* Nothing to do if there is no HTT support. */ if ((cpu_feature & CPUID_HTT) == 0) return; logical_cpus = (cpu_procinfo & CPUID_HTT_CORES) >> 16; if (logical_cpus <= 1) return; /* * For each APIC ID of a CPU that is set in the mask, * scan the other candidate APIC ID's for this * physical processor. If any of those ID's are * already in the table, then kill the fixup. */ for (id = 0; id < NAPICID; id++) { if ((id_mask & 1 << id) == 0) continue; /* First, make sure we are on a logical_cpus boundary. */ if (id % logical_cpus != 0) return; for (i = id + 1; i < id + logical_cpus; i++) if ((id_mask & 1 << i) != 0) return; } /* * Ok, the ID's checked out, so perform the fixup by * adding the logical CPUs. */ while ((id = ffs(id_mask)) != 0) { id--; for (i = id + 1; i < id + logical_cpus; i++) { if (bootverbose) printf( "MPTable: Adding logical CPU %d from main CPU %d\n", i, id); lapic_create(i, 0); } id_mask &= ~(1 << id); } } #endif /* MPTABLE_FORCE_HTT */ /* * Support code for routing PCI interrupts using the MP Table. */ static void mptable_pci_setup(void) { int i; /* * Find the first pci bus and call it 0. Panic if pci0 is not * bus zero and there are multiple PCI busses. */ for (i = 0; i <= mptable_maxbusid; i++) if (busses[i].bus_type == PCI) { if (pci0 == -1) pci0 = i; else if (pci0 != 0) panic( "MPTable contains multiple PCI busses but no PCI bus 0"); } } static void mptable_pci_probe_table_handler(u_char *entry, void *arg) { struct pci_probe_table_args *args; int_entry_ptr intr; if (*entry != MPCT_ENTRY_INT) return; intr = (int_entry_ptr)entry; args = (struct pci_probe_table_args *)arg; KASSERT(args->bus <= mptable_maxbusid, ("bus %d is too big", args->bus)); KASSERT(busses[args->bus].bus_type == PCI, ("probing for non-PCI bus")); if (intr->src_bus_id == args->bus) args->found = 1; } int mptable_pci_probe_table(int bus) { struct pci_probe_table_args args; if (bus < 0) return (EINVAL); if (mpct == NULL || pci0 == -1 || pci0 + bus > mptable_maxbusid) return (ENXIO); if (busses[pci0 + bus].bus_type != PCI) return (ENXIO); args.bus = pci0 + bus; args.found = 0; mptable_walk_table(mptable_pci_probe_table_handler, &args); if (args.found == 0) return (ENXIO); return (0); } static void mptable_pci_route_interrupt_handler(u_char *entry, void *arg) { struct pci_route_interrupt_args *args; int_entry_ptr intr; int vector; if (*entry != MPCT_ENTRY_INT) return; intr = (int_entry_ptr)entry; args = (struct pci_route_interrupt_args *)arg; if (intr->src_bus_id != args->bus || intr->src_bus_irq != args->irq) return; /* Make sure the APIC maps to a known APIC. */ KASSERT(ioapics[intr->dst_apic_id] != NULL, ("No I/O APIC %d to route interrupt to", intr->dst_apic_id)); /* * Look up the vector for this APIC / pin combination. If we * have previously matched an entry for this PCI IRQ but it * has the same vector as this entry, just return. Otherwise, * we use the vector for this APIC / pin combination. */ vector = ioapic_get_vector(ioapics[intr->dst_apic_id], intr->dst_apic_int); if (args->vector == vector) return; KASSERT(args->vector == -1, ("Multiple IRQs for PCI interrupt %d.%d.INT%c: %d and %d\n", args->bus, args->irq >> 2, 'A' + (args->irq & 0x3), args->vector, vector)); args->vector = vector; } int mptable_pci_route_interrupt(device_t pcib, device_t dev, int pin) { struct pci_route_interrupt_args args; int slot; /* Like ACPI, pin numbers are 0-3, not 1-4. */ pin--; KASSERT(pci0 != -1, ("do not know how to route PCI interrupts")); args.bus = pci_get_bus(dev) + pci0; slot = pci_get_slot(dev); /* * PCI interrupt entries in the MP Table encode both the slot and * pin into the IRQ with the pin being the two least significant * bits, the slot being the next five bits, and the most significant * bit being reserved. */ args.irq = slot << 2 | pin; args.vector = -1; mptable_walk_table(mptable_pci_route_interrupt_handler, &args); if (args.vector < 0) { device_printf(pcib, "unable to route slot %d INT%c\n", slot, 'A' + pin); return (PCI_INVALID_IRQ); } if (bootverbose) device_printf(pcib, "slot %d INT%c routed to irq %d\n", slot, 'A' + pin, args.vector); return (args.vector); } Index: head/sys/i386/include/apicvar.h =================================================================== --- head/sys/i386/include/apicvar.h (revision 145079) +++ head/sys/i386/include/apicvar.h (revision 145080) @@ -1,216 +1,215 @@ /*- * Copyright (c) 2003 John Baldwin * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the author nor the names of any co-contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _MACHINE_APICVAR_H_ #define _MACHINE_APICVAR_H_ /* * Local && I/O APIC variable definitions. */ /* * Layout of local APIC interrupt vectors: * * 0xff (255) +-------------+ * | | 15 (Spurious / IPIs / Local Interrupts) * 0xf0 (240) +-------------+ * | | 14 (I/O Interrupts / Timer) * 0xe0 (224) +-------------+ * | | 13 (I/O Interrupts) * 0xd0 (208) +-------------+ * | | 12 (I/O Interrupts) * 0xc0 (192) +-------------+ * | | 11 (I/O Interrupts) * 0xb0 (176) +-------------+ * | | 10 (I/O Interrupts) * 0xa0 (160) +-------------+ * | | 9 (I/O Interrupts) * 0x90 (144) +-------------+ * | | 8 (I/O Interrupts / System Calls) * 0x80 (128) +-------------+ * | | 7 (I/O Interrupts) * 0x70 (112) +-------------+ * | | 6 (I/O Interrupts) * 0x60 (96) +-------------+ * | | 5 (I/O Interrupts) * 0x50 (80) +-------------+ * | | 4 (I/O Interrupts) * 0x40 (64) +-------------+ * | | 3 (I/O Interrupts) * 0x30 (48) +-------------+ * | | 2 (ATPIC Interrupts) * 0x20 (32) +-------------+ * | | 1 (Exceptions, traps, faults, etc.) * 0x10 (16) +-------------+ * | | 0 (Exceptions, traps, faults, etc.) * 0x00 (0) +-------------+ * * Note: 0x80 needs to be handled specially and not allocated to an * I/O device! */ #define APIC_ID_ALL 0xff /* I/O Interrupts are used for external devices such as ISA, PCI, etc. */ #define APIC_IO_INTS (IDT_IO_INTS + 16) #define APIC_NUM_IOINTS 191 /* The timer interrupt is used for clock handling and drives hardclock, etc. */ #define APIC_TIMER_INT (APIC_IO_INTS + APIC_NUM_IOINTS) /* ********************* !!! WARNING !!! ****************************** * Each local apic has an interrupt receive fifo that is two entries deep * for each interrupt priority class (higher 4 bits of interrupt vector). * Once the fifo is full the APIC can no longer receive interrupts for this * class and sending IPIs from other CPUs will be blocked. * To avoid deadlocks there should be no more than two IPI interrupts * pending at the same time. * Currently this is guaranteed by dividing the IPIs in two groups that have * each at most one IPI interrupt pending. The first group is protected by the * smp_ipi_mtx and waits for the completion of the IPI (Only one IPI user * at a time) The second group uses a single interrupt and a bitmap to avoid * redundant IPI interrupts. * * Right now IPI_STOP used by kdb shares the interrupt priority class with * the two IPI groups mentioned above. As such IPI_STOP may cause a deadlock. * Eventually IPI_STOP should use NMI IPIs - this would eliminate this and * other deadlocks caused by IPI_STOP. */ /* Interrupts for local APIC LVT entries other than the timer. */ #define APIC_LOCAL_INTS 240 #define APIC_ERROR_INT APIC_LOCAL_INTS #define APIC_THERMAL_INT (APIC_LOCAL_INTS + 1) #define APIC_IPI_INTS (APIC_LOCAL_INTS + 2) #define IPI_RENDEZVOUS (APIC_IPI_INTS) /* Inter-CPU rendezvous. */ #define IPI_INVLTLB (APIC_IPI_INTS + 1) /* TLB Shootdown IPIs */ #define IPI_INVLPG (APIC_IPI_INTS + 2) #define IPI_INVLRNG (APIC_IPI_INTS + 3) #define IPI_LAZYPMAP (APIC_IPI_INTS + 4) /* Lazy pmap release. */ /* Vector to handle bitmap based IPIs */ #define IPI_BITMAP_VECTOR (APIC_IPI_INTS + 5) /* IPIs handled by IPI_BITMAPED_VECTOR (XXX ups is there a better place?) */ #define IPI_AST 0 /* Generate software trap. */ #define IPI_BITMAP_LAST IPI_AST #define IPI_IS_BITMAPED(x) ((x) <= IPI_BITMAP_LAST) #define IPI_STOP (APIC_IPI_INTS + 6) /* Stop CPU until restarted. */ /* * The spurious interrupt can share the priority class with the IPIs since * it is not a normal interrupt. (Does not use the APIC's interrupt fifo) */ #define APIC_SPURIOUS_INT 255 #define LVT_LINT0 0 #define LVT_LINT1 1 #define LVT_TIMER 2 #define LVT_ERROR 3 #define LVT_PMC 4 #define LVT_THERMAL 5 #define LVT_MAX LVT_THERMAL #ifndef LOCORE #define APIC_IPI_DEST_SELF -1 #define APIC_IPI_DEST_ALL -2 #define APIC_IPI_DEST_OTHERS -3 #define APIC_BUS_UNKNOWN -1 #define APIC_BUS_ISA 0 #define APIC_BUS_EISA 1 #define APIC_BUS_PCI 2 #define APIC_BUS_MAX APIC_BUS_PCI /* * An APIC enumerator is a psuedo bus driver that enumerates APIC's including * CPU's and I/O APIC's. */ struct apic_enumerator { const char *apic_name; int (*apic_probe)(void); int (*apic_probe_cpus)(void); int (*apic_setup_local)(void); int (*apic_setup_io)(void); SLIST_ENTRY(apic_enumerator) apic_next; }; inthand_t IDTVEC(apic_isr1), IDTVEC(apic_isr2), IDTVEC(apic_isr3), IDTVEC(apic_isr4), IDTVEC(apic_isr5), IDTVEC(apic_isr6), IDTVEC(apic_isr7), IDTVEC(spuriousint), IDTVEC(timerint); u_int apic_irq_to_idt(u_int irq); u_int apic_idt_to_irq(u_int vector); void apic_register_enumerator(struct apic_enumerator *enumerator); void *ioapic_create(uintptr_t addr, int32_t id, int intbase); int ioapic_disable_pin(void *cookie, u_int pin); -void ioapic_enable_mixed_mode(void); int ioapic_get_vector(void *cookie, u_int pin); int ioapic_next_logical_cluster(void); void ioapic_register(void *cookie); int ioapic_remap_vector(void *cookie, u_int pin, int vector); int ioapic_set_bus(void *cookie, u_int pin, int bus_type); int ioapic_set_extint(void *cookie, u_int pin); int ioapic_set_nmi(void *cookie, u_int pin); int ioapic_set_polarity(void *cookie, u_int pin, enum intr_polarity pol); int ioapic_set_triggermode(void *cookie, u_int pin, enum intr_trigger trigger); int ioapic_set_smi(void *cookie, u_int pin); void lapic_create(u_int apic_id, int boot_cpu); void lapic_disable(void); void lapic_dump(const char *str); void lapic_enable_intr(u_int vector); void lapic_eoi(void); int lapic_id(void); void lapic_init(uintptr_t addr); int lapic_intr_pending(u_int vector); void lapic_ipi_raw(register_t icrlo, u_int dest); void lapic_ipi_vectored(u_int vector, int dest); int lapic_ipi_wait(int delay); void lapic_handle_intr(struct intrframe frame); void lapic_handle_timer(struct clockframe frame); void lapic_set_logical_id(u_int apic_id, u_int cluster, u_int cluster_id); int lapic_set_lvt_mask(u_int apic_id, u_int lvt, u_char masked); int lapic_set_lvt_mode(u_int apic_id, u_int lvt, u_int32_t mode); int lapic_set_lvt_polarity(u_int apic_id, u_int lvt, enum intr_polarity pol); int lapic_set_lvt_triggermode(u_int apic_id, u_int lvt, enum intr_trigger trigger); void lapic_set_tpr(u_int vector); void lapic_setup(void); int lapic_setup_clock(void); #endif /* !LOCORE */ #endif /* _MACHINE_APICVAR_H_ */