Index: head/sys/x86/acpica/OsdEnvironment.c =================================================================== --- head/sys/x86/acpica/OsdEnvironment.c (revision 326262) +++ head/sys/x86/acpica/OsdEnvironment.c (revision 326263) @@ -1,102 +1,104 @@ /*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * * Copyright (c) 2000,2001 Michael Smith * Copyright (c) 2000 BSDi * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include static u_long acpi_root_phys; SYSCTL_ULONG(_machdep, OID_AUTO, acpi_root, CTLFLAG_RD, &acpi_root_phys, 0, "The physical address of the RSDP"); ACPI_STATUS AcpiOsInitialize(void) { return (AE_OK); } ACPI_STATUS AcpiOsTerminate(void) { return (AE_OK); } static u_long acpi_get_root_from_loader(void) { long acpi_root; if (TUNABLE_ULONG_FETCH("acpi.rsdp", &acpi_root)) return (acpi_root); /* * The hints mechanism is unreliable (it fails if anybody ever * compiled in hints to the kernel). It has been replaced * by the tunable method, but is used here as a fallback to * retain maximum compatibility between old loaders and new * kernels. It can be removed after 11.0R. */ if (resource_long_value("acpi", 0, "rsdp", &acpi_root) == 0) return (acpi_root); return (0); } static u_long acpi_get_root_from_memory(void) { ACPI_PHYSICAL_ADDRESS acpi_root; if (ACPI_SUCCESS(AcpiFindRootPointer(&acpi_root))) return (acpi_root); return (0); } ACPI_PHYSICAL_ADDRESS AcpiOsGetRootPointer(void) { if (acpi_root_phys == 0) { acpi_root_phys = acpi_get_root_from_loader(); if (acpi_root_phys == 0) acpi_root_phys = acpi_get_root_from_memory(); } return (acpi_root_phys); } Index: head/sys/x86/acpica/acpi_apm.c =================================================================== --- head/sys/x86/acpica/acpi_apm.c (revision 326262) +++ head/sys/x86/acpica/acpi_apm.c (revision 326263) @@ -1,452 +1,454 @@ /*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * * Copyright (c) 2001 Mitsuru IWASAKI * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include /* * APM driver emulation */ #define APM_UNKNOWN 0xff static int apm_active; static MALLOC_DEFINE(M_APMDEV, "apmdev", "APM device emulation"); static d_open_t apmopen; static d_write_t apmwrite; static d_ioctl_t apmioctl; static d_poll_t apmpoll; static d_kqfilter_t apmkqfilter; static void apmreadfiltdetach(struct knote *kn); static int apmreadfilt(struct knote *kn, long hint); static struct filterops apm_readfiltops = { .f_isfd = 1, .f_detach = apmreadfiltdetach, .f_event = apmreadfilt, }; static struct cdevsw apm_cdevsw = { .d_version = D_VERSION, .d_open = apmopen, .d_write = apmwrite, .d_ioctl = apmioctl, .d_poll = apmpoll, .d_name = "apm", .d_kqfilter = apmkqfilter }; static int acpi_capm_convert_battstate(struct acpi_battinfo *battp) { int state; state = APM_UNKNOWN; if (battp->state & ACPI_BATT_STAT_DISCHARG) { if (battp->cap >= 50) state = 0; /* high */ else state = 1; /* low */ } if (battp->state & ACPI_BATT_STAT_CRITICAL) state = 2; /* critical */ if (battp->state & ACPI_BATT_STAT_CHARGING) state = 3; /* charging */ /* If still unknown, determine it based on the battery capacity. */ if (state == APM_UNKNOWN) { if (battp->cap >= 50) state = 0; /* high */ else state = 1; /* low */ } return (state); } static int acpi_capm_convert_battflags(struct acpi_battinfo *battp) { int flags; flags = 0; if (battp->cap >= 50) flags |= APM_BATT_HIGH; else { if (battp->state & ACPI_BATT_STAT_CRITICAL) flags |= APM_BATT_CRITICAL; else flags |= APM_BATT_LOW; } if (battp->state & ACPI_BATT_STAT_CHARGING) flags |= APM_BATT_CHARGING; if (battp->state == ACPI_BATT_STAT_NOT_PRESENT) flags = APM_BATT_NOT_PRESENT; return (flags); } static int acpi_capm_get_info(apm_info_t aip) { int acline; struct acpi_battinfo batt; aip->ai_infoversion = 1; aip->ai_major = 1; aip->ai_minor = 2; aip->ai_status = apm_active; aip->ai_capabilities= 0xff00; /* unknown */ if (acpi_acad_get_acline(&acline)) aip->ai_acline = APM_UNKNOWN; /* unknown */ else aip->ai_acline = acline; /* on/off */ if (acpi_battery_get_battinfo(NULL, &batt) != 0) { aip->ai_batt_stat = APM_UNKNOWN; aip->ai_batt_life = APM_UNKNOWN; aip->ai_batt_time = -1; /* unknown */ aip->ai_batteries = ~0U; /* unknown */ } else { aip->ai_batt_stat = acpi_capm_convert_battstate(&batt); aip->ai_batt_life = batt.cap; aip->ai_batt_time = (batt.min == -1) ? -1 : batt.min * 60; aip->ai_batteries = acpi_battery_get_units(); } return (0); } static int acpi_capm_get_pwstatus(apm_pwstatus_t app) { device_t dev; int acline, unit, error; struct acpi_battinfo batt; if (app->ap_device != PMDV_ALLDEV && (app->ap_device < PMDV_BATT0 || app->ap_device > PMDV_BATT_ALL)) return (1); if (app->ap_device == PMDV_ALLDEV) error = acpi_battery_get_battinfo(NULL, &batt); else { unit = app->ap_device - PMDV_BATT0; dev = devclass_get_device(devclass_find("battery"), unit); if (dev != NULL) error = acpi_battery_get_battinfo(dev, &batt); else error = ENXIO; } if (error) return (1); app->ap_batt_stat = acpi_capm_convert_battstate(&batt); app->ap_batt_flag = acpi_capm_convert_battflags(&batt); app->ap_batt_life = batt.cap; app->ap_batt_time = (batt.min == -1) ? -1 : batt.min * 60; if (acpi_acad_get_acline(&acline)) app->ap_acline = APM_UNKNOWN; else app->ap_acline = acline; /* on/off */ return (0); } /* Create a struct for tracking per-device suspend notification. */ static struct apm_clone_data * apm_create_clone(struct cdev *dev, struct acpi_softc *acpi_sc) { struct apm_clone_data *clone; clone = malloc(sizeof(*clone), M_APMDEV, M_WAITOK); clone->cdev = dev; clone->acpi_sc = acpi_sc; clone->notify_status = APM_EV_NONE; bzero(&clone->sel_read, sizeof(clone->sel_read)); knlist_init_mtx(&clone->sel_read.si_note, &acpi_mutex); /* * The acpi device is always managed by devd(8) and is considered * writable (i.e., ack is required to allow suspend to proceed.) */ if (strcmp("acpi", devtoname(dev)) == 0) clone->flags = ACPI_EVF_DEVD | ACPI_EVF_WRITE; else clone->flags = ACPI_EVF_NONE; ACPI_LOCK(acpi); STAILQ_INSERT_TAIL(&acpi_sc->apm_cdevs, clone, entries); ACPI_UNLOCK(acpi); return (clone); } static void apmdtor(void *data) { struct apm_clone_data *clone; struct acpi_softc *acpi_sc; clone = data; acpi_sc = clone->acpi_sc; /* We are about to lose a reference so check if suspend should occur */ if (acpi_sc->acpi_next_sstate != 0 && clone->notify_status != APM_EV_ACKED) acpi_AckSleepState(clone, 0); /* Remove this clone's data from the list and free it. */ ACPI_LOCK(acpi); STAILQ_REMOVE(&acpi_sc->apm_cdevs, clone, apm_clone_data, entries); seldrain(&clone->sel_read); knlist_destroy(&clone->sel_read.si_note); ACPI_UNLOCK(acpi); free(clone, M_APMDEV); } static int apmopen(struct cdev *dev, int flag, int fmt, struct thread *td) { struct acpi_softc *acpi_sc; struct apm_clone_data *clone; acpi_sc = devclass_get_softc(devclass_find("acpi"), 0); clone = apm_create_clone(dev, acpi_sc); devfs_set_cdevpriv(clone, apmdtor); /* If the device is opened for write, record that. */ if ((flag & FWRITE) != 0) clone->flags |= ACPI_EVF_WRITE; return (0); } static int apmioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flag, struct thread *td) { int error; struct apm_clone_data *clone; struct acpi_softc *acpi_sc; struct apm_info info; struct apm_event_info *ev_info; apm_info_old_t aiop; error = 0; devfs_get_cdevpriv((void **)&clone); acpi_sc = clone->acpi_sc; switch (cmd) { case APMIO_SUSPEND: if ((flag & FWRITE) == 0) return (EPERM); if (acpi_sc->acpi_next_sstate == 0) { if (acpi_sc->acpi_suspend_sx != ACPI_STATE_S5) { error = acpi_ReqSleepState(acpi_sc, acpi_sc->acpi_suspend_sx); } else { printf( "power off via apm suspend not supported\n"); error = ENXIO; } } else error = acpi_AckSleepState(clone, 0); break; case APMIO_STANDBY: if ((flag & FWRITE) == 0) return (EPERM); if (acpi_sc->acpi_next_sstate == 0) { if (acpi_sc->acpi_standby_sx != ACPI_STATE_S5) { error = acpi_ReqSleepState(acpi_sc, acpi_sc->acpi_standby_sx); } else { printf( "power off via apm standby not supported\n"); error = ENXIO; } } else error = acpi_AckSleepState(clone, 0); break; case APMIO_NEXTEVENT: printf("apm nextevent start\n"); ACPI_LOCK(acpi); if (acpi_sc->acpi_next_sstate != 0 && clone->notify_status == APM_EV_NONE) { ev_info = (struct apm_event_info *)addr; if (acpi_sc->acpi_next_sstate <= ACPI_STATE_S3) ev_info->type = PMEV_STANDBYREQ; else ev_info->type = PMEV_SUSPENDREQ; ev_info->index = 0; clone->notify_status = APM_EV_NOTIFIED; printf("apm event returning %d\n", ev_info->type); } else error = EAGAIN; ACPI_UNLOCK(acpi); break; case APMIO_GETINFO_OLD: if (acpi_capm_get_info(&info)) error = ENXIO; aiop = (apm_info_old_t)addr; aiop->ai_major = info.ai_major; aiop->ai_minor = info.ai_minor; aiop->ai_acline = info.ai_acline; aiop->ai_batt_stat = info.ai_batt_stat; aiop->ai_batt_life = info.ai_batt_life; aiop->ai_status = info.ai_status; break; case APMIO_GETINFO: if (acpi_capm_get_info((apm_info_t)addr)) error = ENXIO; break; case APMIO_GETPWSTATUS: if (acpi_capm_get_pwstatus((apm_pwstatus_t)addr)) error = ENXIO; break; case APMIO_ENABLE: if ((flag & FWRITE) == 0) return (EPERM); apm_active = 1; break; case APMIO_DISABLE: if ((flag & FWRITE) == 0) return (EPERM); apm_active = 0; break; case APMIO_HALTCPU: break; case APMIO_NOTHALTCPU: break; case APMIO_DISPLAY: if ((flag & FWRITE) == 0) return (EPERM); break; case APMIO_BIOS: if ((flag & FWRITE) == 0) return (EPERM); bzero(addr, sizeof(struct apm_bios_arg)); break; default: error = EINVAL; break; } return (error); } static int apmwrite(struct cdev *dev, struct uio *uio, int ioflag) { return (uio->uio_resid); } static int apmpoll(struct cdev *dev, int events, struct thread *td) { struct apm_clone_data *clone; int revents; revents = 0; devfs_get_cdevpriv((void **)&clone); ACPI_LOCK(acpi); if (clone->acpi_sc->acpi_next_sstate) revents |= events & (POLLIN | POLLRDNORM); else selrecord(td, &clone->sel_read); ACPI_UNLOCK(acpi); return (revents); } static int apmkqfilter(struct cdev *dev, struct knote *kn) { struct apm_clone_data *clone; devfs_get_cdevpriv((void **)&clone); ACPI_LOCK(acpi); kn->kn_hook = clone; kn->kn_fop = &apm_readfiltops; knlist_add(&clone->sel_read.si_note, kn, 0); ACPI_UNLOCK(acpi); return (0); } static void apmreadfiltdetach(struct knote *kn) { struct apm_clone_data *clone; ACPI_LOCK(acpi); clone = kn->kn_hook; knlist_remove(&clone->sel_read.si_note, kn, 0); ACPI_UNLOCK(acpi); } static int apmreadfilt(struct knote *kn, long hint) { struct apm_clone_data *clone; int sleeping; ACPI_LOCK(acpi); clone = kn->kn_hook; sleeping = clone->acpi_sc->acpi_next_sstate ? 1 : 0; ACPI_UNLOCK(acpi); return (sleeping); } void acpi_apm_init(struct acpi_softc *sc) { /* Create a clone for /dev/acpi also. */ STAILQ_INIT(&sc->apm_cdevs); sc->acpi_clone = apm_create_clone(sc->acpi_dev_t, sc); make_dev(&apm_cdevsw, 0, UID_ROOT, GID_OPERATOR, 0660, "apmctl"); make_dev(&apm_cdevsw, 0, UID_ROOT, GID_OPERATOR, 0664, "apm"); } Index: head/sys/x86/acpica/acpi_wakeup.c =================================================================== --- head/sys/x86/acpica/acpi_wakeup.c (revision 326262) +++ head/sys/x86/acpica/acpi_wakeup.c (revision 326263) @@ -1,427 +1,429 @@ /*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * * Copyright (c) 2001 Takanori Watanabe * Copyright (c) 2001-2012 Mitsuru IWASAKI * Copyright (c) 2003 Peter Wemm * Copyright (c) 2008-2012 Jung-uk Kim * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #if defined(__amd64__) #define DEV_APIC #else #include "opt_apic.h" #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef DEV_APIC #include #include #endif #ifdef SMP #include #include #endif #include #include #include "acpi_wakecode.h" #include "acpi_wakedata.h" /* Make sure the code is less than a page and leave room for the stack. */ CTASSERT(sizeof(wakecode) < PAGE_SIZE - 1024); extern int acpi_resume_beep; extern int acpi_reset_video; #ifdef SMP extern struct susppcb **susppcbs; static cpuset_t suspcpus; #else static struct susppcb **susppcbs; #endif static void *acpi_alloc_wakeup_handler(void **); static void acpi_stop_beep(void *); #ifdef SMP static int acpi_wakeup_ap(struct acpi_softc *, int); static void acpi_wakeup_cpus(struct acpi_softc *); #endif #ifdef __amd64__ #define ACPI_WAKEPAGES 4 #else #define ACPI_WAKEPAGES 1 #endif #define WAKECODE_FIXUP(offset, type, val) do { \ type *addr; \ addr = (type *)(sc->acpi_wakeaddr + (offset)); \ *addr = val; \ } while (0) static void acpi_stop_beep(void *arg) { if (acpi_resume_beep != 0) timer_spkr_release(); } #ifdef SMP static int acpi_wakeup_ap(struct acpi_softc *sc, int cpu) { struct pcb *pcb; int vector = (sc->acpi_wakephys >> 12) & 0xff; int apic_id = cpu_apic_ids[cpu]; int ms; pcb = &susppcbs[cpu]->sp_pcb; WAKECODE_FIXUP(wakeup_pcb, struct pcb *, pcb); WAKECODE_FIXUP(wakeup_gdt, uint16_t, pcb->pcb_gdt.rd_limit); WAKECODE_FIXUP(wakeup_gdt + 2, uint64_t, pcb->pcb_gdt.rd_base); ipi_startup(apic_id, vector); /* Wait up to 5 seconds for it to resume. */ for (ms = 0; ms < 5000; ms++) { if (!CPU_ISSET(cpu, &suspended_cpus)) return (1); /* return SUCCESS */ DELAY(1000); } return (0); /* return FAILURE */ } #define WARMBOOT_TARGET 0 #define WARMBOOT_OFF (KERNBASE + 0x0467) #define WARMBOOT_SEG (KERNBASE + 0x0469) #define CMOS_REG (0x70) #define CMOS_DATA (0x71) #define BIOS_RESET (0x0f) #define BIOS_WARM (0x0a) static void acpi_wakeup_cpus(struct acpi_softc *sc) { uint32_t mpbioswarmvec; int cpu; u_char mpbiosreason; /* save the current value of the warm-start vector */ mpbioswarmvec = *((uint32_t *)WARMBOOT_OFF); outb(CMOS_REG, BIOS_RESET); mpbiosreason = inb(CMOS_DATA); /* setup a vector to our boot code */ *((volatile u_short *)WARMBOOT_OFF) = WARMBOOT_TARGET; *((volatile u_short *)WARMBOOT_SEG) = sc->acpi_wakephys >> 4; outb(CMOS_REG, BIOS_RESET); outb(CMOS_DATA, BIOS_WARM); /* 'warm-start' */ /* Wake up each AP. */ for (cpu = 1; cpu < mp_ncpus; cpu++) { if (!CPU_ISSET(cpu, &suspcpus)) continue; if (acpi_wakeup_ap(sc, cpu) == 0) { /* restore the warmstart vector */ *(uint32_t *)WARMBOOT_OFF = mpbioswarmvec; panic("acpi_wakeup: failed to resume AP #%d (PHY #%d)", cpu, cpu_apic_ids[cpu]); } } /* restore the warmstart vector */ *(uint32_t *)WARMBOOT_OFF = mpbioswarmvec; outb(CMOS_REG, BIOS_RESET); outb(CMOS_DATA, mpbiosreason); } #endif int acpi_sleep_machdep(struct acpi_softc *sc, int state) { ACPI_STATUS status; struct pcb *pcb; if (sc->acpi_wakeaddr == 0ul) return (-1); /* couldn't alloc wake memory */ #ifdef SMP suspcpus = all_cpus; CPU_CLR(PCPU_GET(cpuid), &suspcpus); #endif if (acpi_resume_beep != 0) timer_spkr_acquire(); AcpiSetFirmwareWakingVector(sc->acpi_wakephys, 0); intr_suspend(); pcb = &susppcbs[0]->sp_pcb; if (savectx(pcb)) { #ifdef __amd64__ fpususpend(susppcbs[0]->sp_fpususpend); #else npxsuspend(susppcbs[0]->sp_fpususpend); #endif #ifdef SMP if (!CPU_EMPTY(&suspcpus) && suspend_cpus(suspcpus) == 0) { device_printf(sc->acpi_dev, "Failed to suspend APs\n"); return (0); /* couldn't sleep */ } #endif WAKECODE_FIXUP(resume_beep, uint8_t, (acpi_resume_beep != 0)); WAKECODE_FIXUP(reset_video, uint8_t, (acpi_reset_video != 0)); #ifdef __amd64__ WAKECODE_FIXUP(wakeup_efer, uint64_t, rdmsr(MSR_EFER) & ~(EFER_LMA)); #else WAKECODE_FIXUP(wakeup_cr4, register_t, pcb->pcb_cr4); #endif WAKECODE_FIXUP(wakeup_pcb, struct pcb *, pcb); WAKECODE_FIXUP(wakeup_gdt, uint16_t, pcb->pcb_gdt.rd_limit); WAKECODE_FIXUP(wakeup_gdt + 2, uint64_t, pcb->pcb_gdt.rd_base); /* Call ACPICA to enter the desired sleep state */ if (state == ACPI_STATE_S4 && sc->acpi_s4bios) status = AcpiEnterSleepStateS4bios(); else status = AcpiEnterSleepState(state); if (ACPI_FAILURE(status)) { device_printf(sc->acpi_dev, "AcpiEnterSleepState failed - %s\n", AcpiFormatException(status)); return (0); /* couldn't sleep */ } for (;;) ia32_pause(); } else { #ifdef __amd64__ fpuresume(susppcbs[0]->sp_fpususpend); #else npxresume(susppcbs[0]->sp_fpususpend); #endif } return (1); /* wakeup successfully */ } int acpi_wakeup_machdep(struct acpi_softc *sc, int state, int sleep_result, int intr_enabled) { if (sleep_result == -1) return (sleep_result); if (!intr_enabled) { /* Wakeup MD procedures in interrupt disabled context */ if (sleep_result == 1) { pmap_init_pat(); initializecpu(); PCPU_SET(switchtime, 0); PCPU_SET(switchticks, ticks); #ifdef DEV_APIC lapic_xapic_mode(); #endif #ifdef SMP if (!CPU_EMPTY(&suspcpus)) acpi_wakeup_cpus(sc); #endif } #ifdef SMP if (!CPU_EMPTY(&suspcpus)) restart_cpus(suspcpus); #endif mca_resume(); #ifdef __amd64__ if (vmm_resume_p != NULL) vmm_resume_p(); #endif intr_resume(/*suspend_cancelled*/false); AcpiSetFirmwareWakingVector(0, 0); } else { /* Wakeup MD procedures in interrupt enabled context */ if (sleep_result == 1 && mem_range_softc.mr_op != NULL && mem_range_softc.mr_op->reinit != NULL) mem_range_softc.mr_op->reinit(&mem_range_softc); } return (sleep_result); } static void * acpi_alloc_wakeup_handler(void *wakepages[ACPI_WAKEPAGES]) { int i; memset(wakepages, 0, ACPI_WAKEPAGES * sizeof(*wakepages)); /* * Specify the region for our wakeup code. We want it in the low 1 MB * region, excluding real mode IVT (0-0x3ff), BDA (0x400-0x4ff), EBDA * (less than 128KB, below 0xa0000, must be excluded by SMAP and DSDT), * and ROM area (0xa0000 and above). The temporary page tables must be * page-aligned. */ for (i = 0; i < ACPI_WAKEPAGES; i++) { wakepages[i] = contigmalloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT, 0x500, 0xa0000, PAGE_SIZE, 0ul); if (wakepages[i] == NULL) { printf("%s: can't alloc wake memory\n", __func__); goto freepages; } } if (EVENTHANDLER_REGISTER(power_resume, acpi_stop_beep, NULL, EVENTHANDLER_PRI_LAST) == NULL) { printf("%s: can't register event handler\n", __func__); goto freepages; } susppcbs = malloc(mp_ncpus * sizeof(*susppcbs), M_DEVBUF, M_WAITOK); for (i = 0; i < mp_ncpus; i++) { susppcbs[i] = malloc(sizeof(**susppcbs), M_DEVBUF, M_WAITOK); susppcbs[i]->sp_fpususpend = alloc_fpusave(M_WAITOK); } return (wakepages); freepages: for (i = 0; i < ACPI_WAKEPAGES; i++) if (wakepages[i] != NULL) contigfree(wakepages[i], PAGE_SIZE, M_DEVBUF); return (NULL); } void acpi_install_wakeup_handler(struct acpi_softc *sc) { static void *wakeaddr; void *wakepages[ACPI_WAKEPAGES]; #ifdef __amd64__ uint64_t *pt4, *pt3, *pt2; vm_paddr_t pt4pa, pt3pa, pt2pa; int i; #endif if (wakeaddr != NULL) return; if (acpi_alloc_wakeup_handler(wakepages) == NULL) return; wakeaddr = wakepages[0]; sc->acpi_wakeaddr = (vm_offset_t)wakeaddr; sc->acpi_wakephys = vtophys(wakeaddr); #ifdef __amd64__ pt4 = wakepages[1]; pt3 = wakepages[2]; pt2 = wakepages[3]; pt4pa = vtophys(pt4); pt3pa = vtophys(pt3); pt2pa = vtophys(pt2); #endif bcopy(wakecode, (void *)sc->acpi_wakeaddr, sizeof(wakecode)); /* Patch GDT base address, ljmp targets. */ WAKECODE_FIXUP((bootgdtdesc + 2), uint32_t, sc->acpi_wakephys + bootgdt); WAKECODE_FIXUP((wakeup_sw32 + 2), uint32_t, sc->acpi_wakephys + wakeup_32); #ifdef __amd64__ WAKECODE_FIXUP((wakeup_sw64 + 1), uint32_t, sc->acpi_wakephys + wakeup_64); WAKECODE_FIXUP(wakeup_pagetables, uint32_t, pt4pa); #endif /* Save pointers to some global data. */ WAKECODE_FIXUP(wakeup_ret, void *, resumectx); #ifndef __amd64__ #if defined(PAE) || defined(PAE_TABLES) WAKECODE_FIXUP(wakeup_cr3, register_t, vtophys(kernel_pmap->pm_pdpt)); #else WAKECODE_FIXUP(wakeup_cr3, register_t, vtophys(kernel_pmap->pm_pdir)); #endif #else /* __amd64__ */ /* Create the initial 1GB replicated page tables */ for (i = 0; i < 512; i++) { /* * Each slot of the level 4 pages points * to the same level 3 page */ pt4[i] = (uint64_t)pt3pa; pt4[i] |= PG_V | PG_RW | PG_U; /* * Each slot of the level 3 pages points * to the same level 2 page */ pt3[i] = (uint64_t)pt2pa; pt3[i] |= PG_V | PG_RW | PG_U; /* The level 2 page slots are mapped with 2MB pages for 1GB. */ pt2[i] = i * (2 * 1024 * 1024); pt2[i] |= PG_V | PG_RW | PG_PS | PG_U; } #endif /* !__amd64__ */ if (bootverbose) device_printf(sc->acpi_dev, "wakeup code va %#jx pa %#jx\n", (uintmax_t)sc->acpi_wakeaddr, (uintmax_t)sc->acpi_wakephys); } Index: head/sys/x86/acpica/madt.c =================================================================== --- head/sys/x86/acpica/madt.c (revision 326262) +++ head/sys/x86/acpica/madt.c (revision 326263) @@ -1,764 +1,766 @@ /*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * * Copyright (c) 2003 John Baldwin * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* These two arrays are indexed by APIC IDs. */ static struct { void *io_apic; UINT32 io_vector; } *ioapics; static struct lapic_info { u_int la_enabled; u_int la_acpi_id; } *lapics; int madt_found_sci_override; static ACPI_TABLE_MADT *madt; static vm_paddr_t madt_physaddr; static vm_offset_t madt_length; static MALLOC_DEFINE(M_MADT, "madt_table", "ACPI MADT Table Items"); static enum intr_polarity interrupt_polarity(UINT16 IntiFlags, UINT8 Source); static enum intr_trigger interrupt_trigger(UINT16 IntiFlags, UINT8 Source); static int madt_find_cpu(u_int acpi_id, u_int *apic_id); static int madt_find_interrupt(int intr, void **apic, u_int *pin); static void madt_parse_apics(ACPI_SUBTABLE_HEADER *entry, void *arg); static void madt_parse_interrupt_override( ACPI_MADT_INTERRUPT_OVERRIDE *intr); static void madt_parse_ints(ACPI_SUBTABLE_HEADER *entry, void *arg __unused); static void madt_parse_local_nmi(ACPI_MADT_LOCAL_APIC_NMI *nmi); static void madt_parse_nmi(ACPI_MADT_NMI_SOURCE *nmi); static int madt_probe(void); static int madt_probe_cpus(void); static void madt_probe_cpus_handler(ACPI_SUBTABLE_HEADER *entry, void *arg __unused); static void madt_setup_cpus_handler(ACPI_SUBTABLE_HEADER *entry, void *arg __unused); static void madt_register(void *dummy); static int madt_setup_local(void); static int madt_setup_io(void); static void madt_walk_table(acpi_subtable_handler *handler, void *arg); static struct apic_enumerator madt_enumerator = { "MADT", madt_probe, madt_probe_cpus, madt_setup_local, madt_setup_io }; /* * Look for an ACPI Multiple APIC Description Table ("APIC") */ static int madt_probe(void) { madt_physaddr = acpi_find_table(ACPI_SIG_MADT); if (madt_physaddr == 0) return (ENXIO); return (-50); } /* * Run through the MP table enumerating CPUs. */ static int madt_probe_cpus(void) { madt = acpi_map_table(madt_physaddr, ACPI_SIG_MADT); madt_length = madt->Header.Length; KASSERT(madt != NULL, ("Unable to re-map MADT")); madt_walk_table(madt_probe_cpus_handler, NULL); acpi_unmap_table(madt); madt = NULL; return (0); } /* * Initialize the local APIC on the BSP. */ static int madt_setup_local(void) { ACPI_TABLE_DMAR *dmartbl; vm_paddr_t dmartbl_physaddr; const char *reason; char *hw_vendor; u_int p[4]; int user_x2apic; bool bios_x2apic; if ((cpu_feature2 & CPUID2_X2APIC) != 0) { reason = NULL; /* * Automatically detect several configurations where * x2APIC mode is known to cause troubles. User can * override the setting with hw.x2apic_enable tunable. */ dmartbl_physaddr = acpi_find_table(ACPI_SIG_DMAR); if (dmartbl_physaddr != 0) { dmartbl = acpi_map_table(dmartbl_physaddr, ACPI_SIG_DMAR); if ((dmartbl->Flags & ACPI_DMAR_X2APIC_OPT_OUT) != 0) reason = "by DMAR table"; acpi_unmap_table(dmartbl); } if (vm_guest == VM_GUEST_VMWARE) { vmware_hvcall(VMW_HVCMD_GETVCPU_INFO, p); if ((p[0] & VMW_VCPUINFO_VCPU_RESERVED) != 0 || (p[0] & VMW_VCPUINFO_LEGACY_X2APIC) == 0) reason = "inside VMWare without intr redirection"; } else if (vm_guest == VM_GUEST_XEN) { reason = "due to running under XEN"; } else if (vm_guest == VM_GUEST_NO && CPUID_TO_FAMILY(cpu_id) == 0x6 && CPUID_TO_MODEL(cpu_id) == 0x2a) { hw_vendor = kern_getenv("smbios.planar.maker"); /* * It seems that some Lenovo and ASUS * SandyBridge-based notebook BIOSes have a * bug which prevents booting AP in x2APIC * mode. Since the only way to detect mobile * CPU is to check northbridge pci id, which * cannot be done that early, disable x2APIC * for all Lenovo and ASUS SandyBridge * machines. */ if (hw_vendor != NULL) { if (!strcmp(hw_vendor, "LENOVO") || !strcmp(hw_vendor, "ASUSTeK Computer Inc.")) { reason = "for a suspected SandyBridge BIOS bug"; } freeenv(hw_vendor); } } bios_x2apic = lapic_is_x2apic(); if (reason != NULL && bios_x2apic) { if (bootverbose) printf("x2APIC should be disabled %s but " "already enabled by BIOS; enabling.\n", reason); reason = NULL; } if (reason == NULL) x2apic_mode = 1; else if (bootverbose) printf("x2APIC available but disabled %s\n", reason); user_x2apic = x2apic_mode; TUNABLE_INT_FETCH("hw.x2apic_enable", &user_x2apic); if (user_x2apic != x2apic_mode) { if (bios_x2apic && !user_x2apic) printf("x2APIC disabled by tunable and " "enabled by BIOS; ignoring tunable."); else x2apic_mode = user_x2apic; } } /* * Truncate max_apic_id if not in x2APIC mode. Some structures * will already be allocated with the previous max_apic_id, but * at least we can prevent wasting more memory elsewhere. */ if (!x2apic_mode) max_apic_id = min(max_apic_id, xAPIC_MAX_APIC_ID); madt = pmap_mapbios(madt_physaddr, madt_length); lapics = malloc(sizeof(*lapics) * (max_apic_id + 1), M_MADT, M_WAITOK | M_ZERO); madt_walk_table(madt_setup_cpus_handler, NULL); lapic_init(madt->Address); printf("ACPI APIC Table: <%.*s %.*s>\n", (int)sizeof(madt->Header.OemId), madt->Header.OemId, (int)sizeof(madt->Header.OemTableId), madt->Header.OemTableId); /* * We ignore 64-bit local APIC override entries. Should we * perhaps emit a warning here if we find one? */ return (0); } /* * Enumerate I/O APICs and setup interrupt sources. */ static int madt_setup_io(void) { void *ioapic; u_int pin; int i; KASSERT(lapics != NULL, ("local APICs not initialized")); /* Try to initialize ACPI so that we can access the FADT. */ i = acpi_Startup(); if (ACPI_FAILURE(i)) { printf("MADT: ACPI Startup failed with %s\n", AcpiFormatException(i)); printf("Try disabling either ACPI or apic support.\n"); panic("Using MADT but ACPI doesn't work"); } ioapics = malloc(sizeof(*ioapics) * (IOAPIC_MAX_ID + 1), M_MADT, M_WAITOK | M_ZERO); /* First, we run through adding I/O APIC's. */ madt_walk_table(madt_parse_apics, NULL); /* Second, we run through the table tweaking interrupt sources. */ madt_walk_table(madt_parse_ints, NULL); /* * If there was not an explicit override entry for the SCI, * force it to use level trigger and active-low polarity. */ if (!madt_found_sci_override) { if (madt_find_interrupt(AcpiGbl_FADT.SciInterrupt, &ioapic, &pin) != 0) printf("MADT: Could not find APIC for SCI IRQ %u\n", AcpiGbl_FADT.SciInterrupt); else { printf( "MADT: Forcing active-low polarity and level trigger for SCI\n"); ioapic_set_polarity(ioapic, pin, INTR_POLARITY_LOW); ioapic_set_triggermode(ioapic, pin, INTR_TRIGGER_LEVEL); } } /* Third, we register all the I/O APIC's. */ for (i = 0; i <= IOAPIC_MAX_ID; i++) if (ioapics[i].io_apic != NULL) ioapic_register(ioapics[i].io_apic); /* Finally, we throw the switch to enable the I/O APIC's. */ acpi_SetDefaultIntrModel(ACPI_INTR_APIC); free(ioapics, M_MADT); ioapics = NULL; /* NB: this is the last use of the lapics array. */ free(lapics, M_MADT); lapics = NULL; return (0); } static void madt_register(void *dummy __unused) { apic_register_enumerator(&madt_enumerator); } SYSINIT(madt_register, SI_SUB_TUNABLES - 1, SI_ORDER_FIRST, madt_register, NULL); /* * Call the handler routine for each entry in the MADT table. */ static void madt_walk_table(acpi_subtable_handler *handler, void *arg) { acpi_walk_subtables(madt + 1, (char *)madt + madt->Header.Length, handler, arg); } static void madt_parse_cpu(unsigned int apic_id, unsigned int flags) { if (!(flags & ACPI_MADT_ENABLED) || #ifdef SMP mp_ncpus == MAXCPU || #endif apic_id > MAX_APIC_ID) return; #ifdef SMP mp_ncpus++; mp_maxid = mp_ncpus - 1; #endif max_apic_id = max(apic_id, max_apic_id); } static void madt_add_cpu(u_int acpi_id, u_int apic_id, u_int flags) { struct lapic_info *la; /* * The MADT does not include a BSP flag, so we have to let the * MP code figure out which CPU is the BSP on its own. */ if (bootverbose) printf("MADT: Found CPU APIC ID %u ACPI ID %u: %s\n", apic_id, acpi_id, flags & ACPI_MADT_ENABLED ? "enabled" : "disabled"); if (!(flags & ACPI_MADT_ENABLED)) return; if (apic_id > max_apic_id) { printf("MADT: Ignoring local APIC ID %u (too high)\n", apic_id); return; } la = &lapics[apic_id]; KASSERT(la->la_enabled == 0, ("Duplicate local APIC ID %u", apic_id)); la->la_enabled = 1; la->la_acpi_id = acpi_id; lapic_create(apic_id, 0); } static void madt_probe_cpus_handler(ACPI_SUBTABLE_HEADER *entry, void *arg) { ACPI_MADT_LOCAL_APIC *proc; ACPI_MADT_LOCAL_X2APIC *x2apic; switch (entry->Type) { case ACPI_MADT_TYPE_LOCAL_APIC: proc = (ACPI_MADT_LOCAL_APIC *)entry; madt_parse_cpu(proc->Id, proc->LapicFlags); break; case ACPI_MADT_TYPE_LOCAL_X2APIC: x2apic = (ACPI_MADT_LOCAL_X2APIC *)entry; madt_parse_cpu(x2apic->LocalApicId, x2apic->LapicFlags); break; } } static void madt_setup_cpus_handler(ACPI_SUBTABLE_HEADER *entry, void *arg) { ACPI_MADT_LOCAL_APIC *proc; ACPI_MADT_LOCAL_X2APIC *x2apic; switch (entry->Type) { case ACPI_MADT_TYPE_LOCAL_APIC: proc = (ACPI_MADT_LOCAL_APIC *)entry; madt_add_cpu(proc->ProcessorId, proc->Id, proc->LapicFlags); break; case ACPI_MADT_TYPE_LOCAL_X2APIC: x2apic = (ACPI_MADT_LOCAL_X2APIC *)entry; madt_add_cpu(x2apic->Uid, x2apic->LocalApicId, x2apic->LapicFlags); break; } } /* * Add an I/O APIC from an entry in the table. */ static void madt_parse_apics(ACPI_SUBTABLE_HEADER *entry, void *arg __unused) { ACPI_MADT_IO_APIC *apic; switch (entry->Type) { case ACPI_MADT_TYPE_IO_APIC: apic = (ACPI_MADT_IO_APIC *)entry; if (bootverbose) printf( "MADT: Found IO APIC ID %u, Interrupt %u at %p\n", apic->Id, apic->GlobalIrqBase, (void *)(uintptr_t)apic->Address); if (apic->Id > IOAPIC_MAX_ID) panic("%s: I/O APIC ID %u too high", __func__, apic->Id); if (ioapics[apic->Id].io_apic != NULL) panic("%s: Double APIC ID %u", __func__, apic->Id); if (apic->GlobalIrqBase >= FIRST_MSI_INT) { printf("MADT: Ignoring bogus I/O APIC ID %u", apic->Id); break; } ioapics[apic->Id].io_apic = ioapic_create(apic->Address, apic->Id, apic->GlobalIrqBase); ioapics[apic->Id].io_vector = apic->GlobalIrqBase; break; default: break; } } /* * Determine properties of an interrupt source. Note that for ACPI these * functions are only used for ISA interrupts, so we assume ISA bus values * (Active Hi, Edge Triggered) for conforming values except for the ACPI * SCI for which we use Active Lo, Level Triggered. */ static enum intr_polarity interrupt_polarity(UINT16 IntiFlags, UINT8 Source) { switch (IntiFlags & ACPI_MADT_POLARITY_MASK) { default: printf("WARNING: Bogus Interrupt Polarity. Assume CONFORMS\n"); /* FALLTHROUGH*/ case ACPI_MADT_POLARITY_CONFORMS: if (Source == AcpiGbl_FADT.SciInterrupt) return (INTR_POLARITY_LOW); else return (INTR_POLARITY_HIGH); case ACPI_MADT_POLARITY_ACTIVE_HIGH: return (INTR_POLARITY_HIGH); case ACPI_MADT_POLARITY_ACTIVE_LOW: return (INTR_POLARITY_LOW); } } static enum intr_trigger interrupt_trigger(UINT16 IntiFlags, UINT8 Source) { switch (IntiFlags & ACPI_MADT_TRIGGER_MASK) { default: printf("WARNING: Bogus Interrupt Trigger Mode. Assume CONFORMS.\n"); /*FALLTHROUGH*/ case ACPI_MADT_TRIGGER_CONFORMS: if (Source == AcpiGbl_FADT.SciInterrupt) return (INTR_TRIGGER_LEVEL); else return (INTR_TRIGGER_EDGE); case ACPI_MADT_TRIGGER_EDGE: return (INTR_TRIGGER_EDGE); case ACPI_MADT_TRIGGER_LEVEL: return (INTR_TRIGGER_LEVEL); } } /* * Find the local APIC ID associated with a given ACPI Processor ID. */ static int madt_find_cpu(u_int acpi_id, u_int *apic_id) { int i; for (i = 0; i <= max_apic_id; i++) { if (!lapics[i].la_enabled) continue; if (lapics[i].la_acpi_id != acpi_id) continue; *apic_id = i; return (0); } return (ENOENT); } /* * Find the IO APIC and pin on that APIC associated with a given global * interrupt. */ static int madt_find_interrupt(int intr, void **apic, u_int *pin) { int i, best; best = -1; for (i = 0; i <= IOAPIC_MAX_ID; i++) { if (ioapics[i].io_apic == NULL || ioapics[i].io_vector > intr) continue; if (best == -1 || ioapics[best].io_vector < ioapics[i].io_vector) best = i; } if (best == -1) return (ENOENT); *apic = ioapics[best].io_apic; *pin = intr - ioapics[best].io_vector; if (*pin > 32) printf("WARNING: Found intpin of %u for vector %d\n", *pin, intr); return (0); } void madt_parse_interrupt_values(void *entry, enum intr_trigger *trig, enum intr_polarity *pol) { ACPI_MADT_INTERRUPT_OVERRIDE *intr; char buf[64]; intr = entry; if (bootverbose) printf("MADT: Interrupt override: source %u, irq %u\n", intr->SourceIrq, intr->GlobalIrq); KASSERT(intr->Bus == 0, ("bus for interrupt overrides must be zero")); /* * Lookup the appropriate trigger and polarity modes for this * entry. */ *trig = interrupt_trigger(intr->IntiFlags, intr->SourceIrq); *pol = interrupt_polarity(intr->IntiFlags, intr->SourceIrq); /* * If the SCI is identity mapped but has edge trigger and * active-hi polarity or the force_sci_lo tunable is set, * force it to use level/lo. */ if (intr->SourceIrq == AcpiGbl_FADT.SciInterrupt) { madt_found_sci_override = 1; if (getenv_string("hw.acpi.sci.trigger", buf, sizeof(buf))) { if (tolower(buf[0]) == 'e') *trig = INTR_TRIGGER_EDGE; else if (tolower(buf[0]) == 'l') *trig = INTR_TRIGGER_LEVEL; else panic( "Invalid trigger %s: must be 'edge' or 'level'", buf); printf("MADT: Forcing SCI to %s trigger\n", *trig == INTR_TRIGGER_EDGE ? "edge" : "level"); } if (getenv_string("hw.acpi.sci.polarity", buf, sizeof(buf))) { if (tolower(buf[0]) == 'h') *pol = INTR_POLARITY_HIGH; else if (tolower(buf[0]) == 'l') *pol = INTR_POLARITY_LOW; else panic( "Invalid polarity %s: must be 'high' or 'low'", buf); printf("MADT: Forcing SCI to active %s polarity\n", *pol == INTR_POLARITY_HIGH ? "high" : "low"); } } } /* * Parse an interrupt source override for an ISA interrupt. */ static void madt_parse_interrupt_override(ACPI_MADT_INTERRUPT_OVERRIDE *intr) { void *new_ioapic, *old_ioapic; u_int new_pin, old_pin; enum intr_trigger trig; enum intr_polarity pol; if (acpi_quirks & ACPI_Q_MADT_IRQ0 && intr->SourceIrq == 0 && intr->GlobalIrq == 2) { if (bootverbose) printf("MADT: Skipping timer override\n"); return; } if (madt_find_interrupt(intr->GlobalIrq, &new_ioapic, &new_pin) != 0) { printf("MADT: Could not find APIC for vector %u (IRQ %u)\n", intr->GlobalIrq, intr->SourceIrq); return; } madt_parse_interrupt_values(intr, &trig, &pol); /* Remap the IRQ if it is mapped to a different interrupt vector. */ if (intr->SourceIrq != intr->GlobalIrq) { /* * If the SCI is remapped to a non-ISA global interrupt, * then override the vector we use to setup and allocate * the interrupt. */ if (intr->GlobalIrq > 15 && intr->SourceIrq == AcpiGbl_FADT.SciInterrupt) acpi_OverrideInterruptLevel(intr->GlobalIrq); else ioapic_remap_vector(new_ioapic, new_pin, intr->SourceIrq); if (madt_find_interrupt(intr->SourceIrq, &old_ioapic, &old_pin) != 0) printf("MADT: Could not find APIC for source IRQ %u\n", intr->SourceIrq); else if (ioapic_get_vector(old_ioapic, old_pin) == intr->SourceIrq) ioapic_disable_pin(old_ioapic, old_pin); } /* Program the polarity and trigger mode. */ ioapic_set_triggermode(new_ioapic, new_pin, trig); ioapic_set_polarity(new_ioapic, new_pin, pol); } /* * Parse an entry for an NMI routed to an IO APIC. */ static void madt_parse_nmi(ACPI_MADT_NMI_SOURCE *nmi) { void *ioapic; u_int pin; if (madt_find_interrupt(nmi->GlobalIrq, &ioapic, &pin) != 0) { printf("MADT: Could not find APIC for vector %u\n", nmi->GlobalIrq); return; } ioapic_set_nmi(ioapic, pin); if (!(nmi->IntiFlags & ACPI_MADT_TRIGGER_CONFORMS)) ioapic_set_triggermode(ioapic, pin, interrupt_trigger(nmi->IntiFlags, 0)); if (!(nmi->IntiFlags & ACPI_MADT_POLARITY_CONFORMS)) ioapic_set_polarity(ioapic, pin, interrupt_polarity(nmi->IntiFlags, 0)); } /* * Parse an entry for an NMI routed to a local APIC LVT pin. */ static void madt_handle_local_nmi(u_int acpi_id, UINT8 Lint, UINT16 IntiFlags) { u_int apic_id, pin; if (acpi_id == 0xffffffff) apic_id = APIC_ID_ALL; else if (madt_find_cpu(acpi_id, &apic_id) != 0) { if (bootverbose) printf("MADT: Ignoring local NMI routed to " "ACPI CPU %u\n", acpi_id); return; } if (Lint == 0) pin = APIC_LVT_LINT0; else pin = APIC_LVT_LINT1; lapic_set_lvt_mode(apic_id, pin, APIC_LVT_DM_NMI); if (!(IntiFlags & ACPI_MADT_TRIGGER_CONFORMS)) lapic_set_lvt_triggermode(apic_id, pin, interrupt_trigger(IntiFlags, 0)); if (!(IntiFlags & ACPI_MADT_POLARITY_CONFORMS)) lapic_set_lvt_polarity(apic_id, pin, interrupt_polarity(IntiFlags, 0)); } static void madt_parse_local_nmi(ACPI_MADT_LOCAL_APIC_NMI *nmi) { madt_handle_local_nmi(nmi->ProcessorId == 0xff ? 0xffffffff : nmi->ProcessorId, nmi->Lint, nmi->IntiFlags); } static void madt_parse_local_x2apic_nmi(ACPI_MADT_LOCAL_X2APIC_NMI *nmi) { madt_handle_local_nmi(nmi->Uid, nmi->Lint, nmi->IntiFlags); } /* * Parse interrupt entries. */ static void madt_parse_ints(ACPI_SUBTABLE_HEADER *entry, void *arg __unused) { switch (entry->Type) { case ACPI_MADT_TYPE_INTERRUPT_OVERRIDE: madt_parse_interrupt_override( (ACPI_MADT_INTERRUPT_OVERRIDE *)entry); break; case ACPI_MADT_TYPE_NMI_SOURCE: madt_parse_nmi((ACPI_MADT_NMI_SOURCE *)entry); break; case ACPI_MADT_TYPE_LOCAL_APIC_NMI: madt_parse_local_nmi((ACPI_MADT_LOCAL_APIC_NMI *)entry); break; case ACPI_MADT_TYPE_LOCAL_X2APIC_NMI: madt_parse_local_x2apic_nmi( (ACPI_MADT_LOCAL_X2APIC_NMI *)entry); break; } } /* * Setup per-CPU ACPI IDs. */ static void madt_set_ids(void *dummy) { struct lapic_info *la; struct pcpu *pc; u_int i; if (madt == NULL) return; KASSERT(lapics != NULL, ("local APICs not initialized")); CPU_FOREACH(i) { pc = pcpu_find(i); KASSERT(pc != NULL, ("no pcpu data for CPU %u", i)); la = &lapics[pc->pc_apic_id]; if (!la->la_enabled) panic("APIC: CPU with APIC ID %u is not enabled", pc->pc_apic_id); pc->pc_acpi_id = la->la_acpi_id; if (bootverbose) printf("APIC: CPU %u has ACPI ID %u\n", i, la->la_acpi_id); } } SYSINIT(madt_set_ids, SI_SUB_CPU, SI_ORDER_MIDDLE, madt_set_ids, NULL); Index: head/sys/x86/acpica/srat.c =================================================================== --- head/sys/x86/acpica/srat.c (revision 326262) +++ head/sys/x86/acpica/srat.c (revision 326263) @@ -1,572 +1,574 @@ /*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * * Copyright (c) 2010 Hudson River Trading LLC * Written by: John H. Baldwin * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_vm.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #if MAXMEMDOM > 1 static struct cpu_info { int enabled:1; int has_memory:1; int domain; } *cpus; struct mem_affinity mem_info[VM_PHYSSEG_MAX + 1]; int num_mem; static ACPI_TABLE_SRAT *srat; static vm_paddr_t srat_physaddr; static int domain_pxm[MAXMEMDOM]; static int ndomain; static ACPI_TABLE_SLIT *slit; static vm_paddr_t slit_physaddr; static int vm_locality_table[MAXMEMDOM * MAXMEMDOM]; static void srat_walk_table(acpi_subtable_handler *handler, void *arg); /* * SLIT parsing. */ static void slit_parse_table(ACPI_TABLE_SLIT *s) { int i, j; int i_domain, j_domain; int offset = 0; uint8_t e; /* * This maps the SLIT data into the VM-domain centric view. * There may be sparse entries in the PXM namespace, so * remap them to a VM-domain ID and if it doesn't exist, * skip it. * * It should result in a packed 2d array of VM-domain * locality information entries. */ if (bootverbose) printf("SLIT.Localities: %d\n", (int) s->LocalityCount); for (i = 0; i < s->LocalityCount; i++) { i_domain = acpi_map_pxm_to_vm_domainid(i); if (i_domain < 0) continue; if (bootverbose) printf("%d: ", i); for (j = 0; j < s->LocalityCount; j++) { j_domain = acpi_map_pxm_to_vm_domainid(j); if (j_domain < 0) continue; e = s->Entry[i * s->LocalityCount + j]; if (bootverbose) printf("%d ", (int) e); /* 255 == "no locality information" */ if (e == 255) vm_locality_table[offset] = -1; else vm_locality_table[offset] = e; offset++; } if (bootverbose) printf("\n"); } } /* * Look for an ACPI System Locality Distance Information Table ("SLIT") */ static int parse_slit(void) { if (resource_disabled("slit", 0)) { return (-1); } slit_physaddr = acpi_find_table(ACPI_SIG_SLIT); if (slit_physaddr == 0) { return (-1); } /* * Make a pass over the table to populate the cpus[] and * mem_info[] tables. */ slit = acpi_map_table(slit_physaddr, ACPI_SIG_SLIT); slit_parse_table(slit); acpi_unmap_table(slit); slit = NULL; #ifdef VM_NUMA_ALLOC /* Tell the VM about it! */ mem_locality = vm_locality_table; #endif return (0); } /* * SRAT parsing. */ /* * Returns true if a memory range overlaps with at least one range in * phys_avail[]. */ static int overlaps_phys_avail(vm_paddr_t start, vm_paddr_t end) { int i; for (i = 0; phys_avail[i] != 0 && phys_avail[i + 1] != 0; i += 2) { if (phys_avail[i + 1] <= start) continue; if (phys_avail[i] < end) return (1); break; } return (0); } static void srat_parse_entry(ACPI_SUBTABLE_HEADER *entry, void *arg) { ACPI_SRAT_CPU_AFFINITY *cpu; ACPI_SRAT_X2APIC_CPU_AFFINITY *x2apic; ACPI_SRAT_MEM_AFFINITY *mem; int domain, i, slot; switch (entry->Type) { case ACPI_SRAT_TYPE_CPU_AFFINITY: cpu = (ACPI_SRAT_CPU_AFFINITY *)entry; domain = cpu->ProximityDomainLo | cpu->ProximityDomainHi[0] << 8 | cpu->ProximityDomainHi[1] << 16 | cpu->ProximityDomainHi[2] << 24; if (bootverbose) printf("SRAT: Found CPU APIC ID %u domain %d: %s\n", cpu->ApicId, domain, (cpu->Flags & ACPI_SRAT_CPU_ENABLED) ? "enabled" : "disabled"); if (!(cpu->Flags & ACPI_SRAT_CPU_ENABLED)) break; if (cpu->ApicId > max_apic_id) { printf("SRAT: Ignoring local APIC ID %u (too high)\n", cpu->ApicId); break; } if (cpus[cpu->ApicId].enabled) { printf("SRAT: Duplicate local APIC ID %u\n", cpu->ApicId); *(int *)arg = ENXIO; break; } cpus[cpu->ApicId].domain = domain; cpus[cpu->ApicId].enabled = 1; break; case ACPI_SRAT_TYPE_X2APIC_CPU_AFFINITY: x2apic = (ACPI_SRAT_X2APIC_CPU_AFFINITY *)entry; if (bootverbose) printf("SRAT: Found CPU APIC ID %u domain %d: %s\n", x2apic->ApicId, x2apic->ProximityDomain, (x2apic->Flags & ACPI_SRAT_CPU_ENABLED) ? "enabled" : "disabled"); if (!(x2apic->Flags & ACPI_SRAT_CPU_ENABLED)) break; if (x2apic->ApicId > max_apic_id) { printf("SRAT: Ignoring local APIC ID %u (too high)\n", x2apic->ApicId); break; } KASSERT(!cpus[x2apic->ApicId].enabled, ("Duplicate local APIC ID %u", x2apic->ApicId)); cpus[x2apic->ApicId].domain = x2apic->ProximityDomain; cpus[x2apic->ApicId].enabled = 1; break; case ACPI_SRAT_TYPE_MEMORY_AFFINITY: mem = (ACPI_SRAT_MEM_AFFINITY *)entry; if (bootverbose) printf( "SRAT: Found memory domain %d addr 0x%jx len 0x%jx: %s\n", mem->ProximityDomain, (uintmax_t)mem->BaseAddress, (uintmax_t)mem->Length, (mem->Flags & ACPI_SRAT_MEM_ENABLED) ? "enabled" : "disabled"); if (!(mem->Flags & ACPI_SRAT_MEM_ENABLED)) break; if (!overlaps_phys_avail(mem->BaseAddress, mem->BaseAddress + mem->Length)) { printf("SRAT: Ignoring memory at addr 0x%jx\n", (uintmax_t)mem->BaseAddress); break; } if (num_mem == VM_PHYSSEG_MAX) { printf("SRAT: Too many memory regions\n"); *(int *)arg = ENXIO; break; } slot = num_mem; for (i = 0; i < num_mem; i++) { if (mem_info[i].end <= mem->BaseAddress) continue; if (mem_info[i].start < (mem->BaseAddress + mem->Length)) { printf("SRAT: Overlapping memory entries\n"); *(int *)arg = ENXIO; return; } slot = i; } for (i = num_mem; i > slot; i--) mem_info[i] = mem_info[i - 1]; mem_info[slot].start = mem->BaseAddress; mem_info[slot].end = mem->BaseAddress + mem->Length; mem_info[slot].domain = mem->ProximityDomain; num_mem++; break; } } /* * Ensure each memory domain has at least one CPU and that each CPU * has at least one memory domain. */ static int check_domains(void) { int found, i, j; for (i = 0; i < num_mem; i++) { found = 0; for (j = 0; j <= max_apic_id; j++) if (cpus[j].enabled && cpus[j].domain == mem_info[i].domain) { cpus[j].has_memory = 1; found++; } if (!found) { printf("SRAT: No CPU found for memory domain %d\n", mem_info[i].domain); return (ENXIO); } } for (i = 0; i <= max_apic_id; i++) if (cpus[i].enabled && !cpus[i].has_memory) { printf("SRAT: No memory found for CPU %d\n", i); return (ENXIO); } return (0); } /* * Check that the SRAT memory regions cover all of the regions in * phys_avail[]. */ static int check_phys_avail(void) { vm_paddr_t address; int i, j; /* j is the current offset into phys_avail[]. */ address = phys_avail[0]; j = 0; for (i = 0; i < num_mem; i++) { /* * Consume as many phys_avail[] entries as fit in this * region. */ while (address >= mem_info[i].start && address <= mem_info[i].end) { /* * If we cover the rest of this phys_avail[] entry, * advance to the next entry. */ if (phys_avail[j + 1] <= mem_info[i].end) { j += 2; if (phys_avail[j] == 0 && phys_avail[j + 1] == 0) { return (0); } address = phys_avail[j]; } else address = mem_info[i].end + 1; } } printf("SRAT: No memory region found for 0x%jx - 0x%jx\n", (uintmax_t)phys_avail[j], (uintmax_t)phys_avail[j + 1]); return (ENXIO); } /* * Renumber the memory domains to be compact and zero-based if not * already. Returns an error if there are too many domains. */ static int renumber_domains(void) { int i, j, slot; /* Enumerate all the domains. */ ndomain = 0; for (i = 0; i < num_mem; i++) { /* See if this domain is already known. */ for (j = 0; j < ndomain; j++) { if (domain_pxm[j] >= mem_info[i].domain) break; } if (j < ndomain && domain_pxm[j] == mem_info[i].domain) continue; if (ndomain >= MAXMEMDOM) { ndomain = 1; printf("SRAT: Too many memory domains\n"); return (EFBIG); } /* Insert the new domain at slot 'j'. */ slot = j; for (j = ndomain; j > slot; j--) domain_pxm[j] = domain_pxm[j - 1]; domain_pxm[slot] = mem_info[i].domain; ndomain++; } /* Renumber each domain to its index in the sorted 'domain_pxm' list. */ for (i = 0; i < ndomain; i++) { /* * If the domain is already the right value, no need * to renumber. */ if (domain_pxm[i] == i) continue; /* Walk the cpu[] and mem_info[] arrays to renumber. */ for (j = 0; j < num_mem; j++) if (mem_info[j].domain == domain_pxm[i]) mem_info[j].domain = i; for (j = 0; j <= max_apic_id; j++) if (cpus[j].enabled && cpus[j].domain == domain_pxm[i]) cpus[j].domain = i; } return (0); } /* * Look for an ACPI System Resource Affinity Table ("SRAT") */ static int parse_srat(void) { unsigned int idx, size; vm_paddr_t addr; int error; if (resource_disabled("srat", 0)) return (-1); srat_physaddr = acpi_find_table(ACPI_SIG_SRAT); if (srat_physaddr == 0) return (-1); /* * Allocate data structure: * * Find the last physical memory region and steal some memory from * it. This is done because at this point in the boot process * malloc is still not usable. */ for (idx = 0; phys_avail[idx + 1] != 0; idx += 2); KASSERT(idx != 0, ("phys_avail is empty!")); idx -= 2; size = sizeof(*cpus) * (max_apic_id + 1); addr = trunc_page(phys_avail[idx + 1] - size); KASSERT(addr >= phys_avail[idx], ("Not enough memory for SRAT table items")); phys_avail[idx + 1] = addr - 1; /* * We cannot rely on PHYS_TO_DMAP because this code is also used in * i386, so use pmap_mapbios to map the memory, this will end up using * the default memory attribute (WB), and the DMAP when available. */ cpus = (struct cpu_info *)pmap_mapbios(addr, size); bzero(cpus, size); /* * Make a pass over the table to populate the cpus[] and * mem_info[] tables. */ srat = acpi_map_table(srat_physaddr, ACPI_SIG_SRAT); error = 0; srat_walk_table(srat_parse_entry, &error); acpi_unmap_table(srat); srat = NULL; if (error || check_domains() != 0 || check_phys_avail() != 0 || renumber_domains() != 0) { srat_physaddr = 0; return (-1); } #ifdef VM_NUMA_ALLOC /* Point vm_phys at our memory affinity table. */ vm_ndomains = ndomain; mem_affinity = mem_info; #endif return (0); } static void init_mem_locality(void) { int i; /* * For now, assume -1 == "no locality information for * this pairing. */ for (i = 0; i < MAXMEMDOM * MAXMEMDOM; i++) vm_locality_table[i] = -1; } static void parse_acpi_tables(void *dummy) { if (parse_srat() < 0) return; init_mem_locality(); (void) parse_slit(); } SYSINIT(parse_acpi_tables, SI_SUB_VM - 1, SI_ORDER_FIRST, parse_acpi_tables, NULL); static void srat_walk_table(acpi_subtable_handler *handler, void *arg) { acpi_walk_subtables(srat + 1, (char *)srat + srat->Header.Length, handler, arg); } /* * Setup per-CPU domain IDs. */ static void srat_set_cpus(void *dummy) { struct cpu_info *cpu; struct pcpu *pc; u_int i; if (srat_physaddr == 0) return; for (i = 0; i < MAXCPU; i++) { if (CPU_ABSENT(i)) continue; pc = pcpu_find(i); KASSERT(pc != NULL, ("no pcpu data for CPU %u", i)); cpu = &cpus[pc->pc_apic_id]; if (!cpu->enabled) panic("SRAT: CPU with APIC ID %u is not known", pc->pc_apic_id); pc->pc_domain = cpu->domain; CPU_SET(i, &cpuset_domain[cpu->domain]); if (bootverbose) printf("SRAT: CPU %u has memory domain %d\n", i, cpu->domain); } /* Last usage of the cpus array, unmap it. */ pmap_unmapbios((vm_offset_t)cpus, sizeof(*cpus) * (max_apic_id + 1)); cpus = NULL; } SYSINIT(srat_set_cpus, SI_SUB_CPU, SI_ORDER_ANY, srat_set_cpus, NULL); /* * Map a _PXM value to a VM domain ID. * * Returns the domain ID, or -1 if no domain ID was found. */ int acpi_map_pxm_to_vm_domainid(int pxm) { int i; for (i = 0; i < ndomain; i++) { if (domain_pxm[i] == pxm) return (i); } return (-1); } #else /* MAXMEMDOM == 1 */ int acpi_map_pxm_to_vm_domainid(int pxm) { return (-1); } #endif /* MAXMEMDOM > 1 */ Index: head/sys/x86/bios/smbios.c =================================================================== --- head/sys/x86/bios/smbios.c (revision 326262) +++ head/sys/x86/bios/smbios.c (revision 326263) @@ -1,249 +1,251 @@ /*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * * Copyright (c) 2003 Matthew N. Dodd * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * System Management BIOS Reference Specification, v2.4 Final * http://www.dmtf.org/standards/published_documents/DSP0134.pdf */ struct smbios_softc { device_t dev; struct resource * res; int rid; struct smbios_eps * eps; }; #define RES2EPS(res) ((struct smbios_eps *)rman_get_virtual(res)) #define ADDR2EPS(addr) ((struct smbios_eps *)BIOS_PADDRTOVADDR(addr)) static devclass_t smbios_devclass; static void smbios_identify (driver_t *, device_t); static int smbios_probe (device_t); static int smbios_attach (device_t); static int smbios_detach (device_t); static int smbios_modevent (module_t, int, void *); static int smbios_cksum (struct smbios_eps *); static void smbios_identify (driver_t *driver, device_t parent) { device_t child; u_int32_t addr; int length; int rid; if (!device_is_alive(parent)) return; addr = bios_sigsearch(SMBIOS_START, SMBIOS_SIG, SMBIOS_LEN, SMBIOS_STEP, SMBIOS_OFF); if (addr != 0) { rid = 0; length = ADDR2EPS(addr)->length; if (length != 0x1f) { u_int8_t major, minor; major = ADDR2EPS(addr)->major_version; minor = ADDR2EPS(addr)->minor_version; /* SMBIOS v2.1 implementation might use 0x1e. */ if (length == 0x1e && major == 2 && minor == 1) length = 0x1f; else return; } child = BUS_ADD_CHILD(parent, 5, "smbios", -1); device_set_driver(child, driver); bus_set_resource(child, SYS_RES_MEMORY, rid, addr, length); device_set_desc(child, "System Management BIOS"); } return; } static int smbios_probe (device_t dev) { struct resource *res; int rid; int error; error = 0; rid = 0; res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (res == NULL) { device_printf(dev, "Unable to allocate memory resource.\n"); error = ENOMEM; goto bad; } if (smbios_cksum(RES2EPS(res))) { device_printf(dev, "SMBIOS checksum failed.\n"); error = ENXIO; goto bad; } bad: if (res) bus_release_resource(dev, SYS_RES_MEMORY, rid, res); return (error); } static int smbios_attach (device_t dev) { struct smbios_softc *sc; int error; sc = device_get_softc(dev); error = 0; sc->dev = dev; sc->rid = 0; sc->res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &sc->rid, RF_ACTIVE); if (sc->res == NULL) { device_printf(dev, "Unable to allocate memory resource.\n"); error = ENOMEM; goto bad; } sc->eps = RES2EPS(sc->res); device_printf(dev, "Version: %u.%u", sc->eps->major_version, sc->eps->minor_version); if (bcd2bin(sc->eps->BCD_revision)) printf(", BCD Revision: %u.%u", bcd2bin(sc->eps->BCD_revision >> 4), bcd2bin(sc->eps->BCD_revision & 0x0f)); printf("\n"); return (0); bad: if (sc->res) bus_release_resource(dev, SYS_RES_MEMORY, sc->rid, sc->res); return (error); } static int smbios_detach (device_t dev) { struct smbios_softc *sc; sc = device_get_softc(dev); if (sc->res) bus_release_resource(dev, SYS_RES_MEMORY, sc->rid, sc->res); return (0); } static int smbios_modevent (mod, what, arg) module_t mod; int what; void * arg; { device_t * devs; int count; int i; switch (what) { case MOD_LOAD: break; case MOD_UNLOAD: devclass_get_devices(smbios_devclass, &devs, &count); for (i = 0; i < count; i++) { device_delete_child(device_get_parent(devs[i]), devs[i]); } free(devs, M_TEMP); break; default: break; } return (0); } static device_method_t smbios_methods[] = { /* Device interface */ DEVMETHOD(device_identify, smbios_identify), DEVMETHOD(device_probe, smbios_probe), DEVMETHOD(device_attach, smbios_attach), DEVMETHOD(device_detach, smbios_detach), { 0, 0 } }; static driver_t smbios_driver = { "smbios", smbios_methods, sizeof(struct smbios_softc), }; DRIVER_MODULE(smbios, nexus, smbios_driver, smbios_devclass, smbios_modevent, 0); MODULE_VERSION(smbios, 1); static int smbios_cksum (struct smbios_eps *e) { u_int8_t *ptr; u_int8_t cksum; int i; ptr = (u_int8_t *)e; cksum = 0; for (i = 0; i < e->length; i++) { cksum += ptr[i]; } return (cksum); } Index: head/sys/x86/bios/vpd.c =================================================================== --- head/sys/x86/bios/vpd.c (revision 326262) +++ head/sys/x86/bios/vpd.c (revision 326263) @@ -1,297 +1,299 @@ /*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * * Copyright (c) 2003 Matthew N. Dodd * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); /* * VPD decoder for IBM systems (Thinkpads) * http://www-1.ibm.com/support/docview.wss?uid=psg1MIGR-45120 */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * Vital Product Data */ struct vpd { u_int16_t Header; /* 0x55AA */ u_int8_t Signature[3]; /* Always 'VPD' */ u_int8_t Length; /* Sructure Length */ u_int8_t Reserved[7]; /* Reserved */ u_int8_t BuildID[9]; /* BIOS Build ID */ u_int8_t BoxSerial[7]; /* Box Serial Number */ u_int8_t PlanarSerial[11]; /* Motherboard Serial Number */ u_int8_t MachType[7]; /* Machine Type/Model */ u_int8_t Checksum; /* Checksum */ } __packed; struct vpd_softc { device_t dev; struct resource * res; int rid; struct vpd * vpd; struct sysctl_ctx_list ctx; char BuildID[10]; char BoxSerial[8]; char PlanarSerial[12]; char MachineType[5]; char MachineModel[4]; }; #define VPD_START 0xf0000 #define VPD_STEP 0x10 #define VPD_OFF 2 #define VPD_LEN 3 #define VPD_SIG "VPD" #define RES2VPD(res) ((struct vpd *)rman_get_virtual(res)) #define ADDR2VPD(addr) ((struct vpd *)BIOS_PADDRTOVADDR(addr)) static devclass_t vpd_devclass; static void vpd_identify (driver_t *, device_t); static int vpd_probe (device_t); static int vpd_attach (device_t); static int vpd_detach (device_t); static int vpd_modevent (module_t, int, void *); static int vpd_cksum (struct vpd *); static SYSCTL_NODE(_hw, OID_AUTO, vpd, CTLFLAG_RD, NULL, NULL); static SYSCTL_NODE(_hw_vpd, OID_AUTO, machine, CTLFLAG_RD, NULL, NULL); static SYSCTL_NODE(_hw_vpd_machine, OID_AUTO, type, CTLFLAG_RD, NULL, NULL); static SYSCTL_NODE(_hw_vpd_machine, OID_AUTO, model, CTLFLAG_RD, NULL, NULL); static SYSCTL_NODE(_hw_vpd, OID_AUTO, build_id, CTLFLAG_RD, NULL, NULL); static SYSCTL_NODE(_hw_vpd, OID_AUTO, serial, CTLFLAG_RD, NULL, NULL); static SYSCTL_NODE(_hw_vpd_serial, OID_AUTO, box, CTLFLAG_RD, NULL, NULL); static SYSCTL_NODE(_hw_vpd_serial, OID_AUTO, planar, CTLFLAG_RD, NULL, NULL); static void vpd_identify (driver_t *driver, device_t parent) { device_t child; u_int32_t addr; int length; int rid; if (!device_is_alive(parent)) return; addr = bios_sigsearch(VPD_START, VPD_SIG, VPD_LEN, VPD_STEP, VPD_OFF); if (addr != 0) { rid = 0; length = ADDR2VPD(addr)->Length; child = BUS_ADD_CHILD(parent, 5, "vpd", -1); device_set_driver(child, driver); bus_set_resource(child, SYS_RES_MEMORY, rid, addr, length); device_set_desc(child, "Vital Product Data Area"); } return; } static int vpd_probe (device_t dev) { struct resource *res; int rid; int error; error = 0; rid = 0; res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (res == NULL) { device_printf(dev, "Unable to allocate memory resource.\n"); error = ENOMEM; goto bad; } if (vpd_cksum(RES2VPD(res))) device_printf(dev, "VPD checksum failed. BIOS update may be required.\n"); bad: if (res) bus_release_resource(dev, SYS_RES_MEMORY, rid, res); return (error); } static int vpd_attach (device_t dev) { struct vpd_softc *sc; char unit[4]; int error; sc = device_get_softc(dev); error = 0; sc->dev = dev; sc->rid = 0; sc->res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &sc->rid, RF_ACTIVE); if (sc->res == NULL) { device_printf(dev, "Unable to allocate memory resource.\n"); error = ENOMEM; goto bad; } sc->vpd = RES2VPD(sc->res); snprintf(unit, sizeof(unit), "%d", device_get_unit(sc->dev)); snprintf(sc->MachineType, 5, "%.4s", sc->vpd->MachType); snprintf(sc->MachineModel, 4, "%.3s", sc->vpd->MachType+4); snprintf(sc->BuildID, 10, "%.9s", sc->vpd->BuildID); snprintf(sc->BoxSerial, 8, "%.7s", sc->vpd->BoxSerial); snprintf(sc->PlanarSerial, 12, "%.11s", sc->vpd->PlanarSerial); sysctl_ctx_init(&sc->ctx); SYSCTL_ADD_STRING(&sc->ctx, SYSCTL_STATIC_CHILDREN(_hw_vpd_machine_type), OID_AUTO, unit, CTLFLAG_RD|CTLFLAG_DYN, sc->MachineType, 0, NULL); SYSCTL_ADD_STRING(&sc->ctx, SYSCTL_STATIC_CHILDREN(_hw_vpd_machine_model), OID_AUTO, unit, CTLFLAG_RD|CTLFLAG_DYN, sc->MachineModel, 0, NULL); SYSCTL_ADD_STRING(&sc->ctx, SYSCTL_STATIC_CHILDREN(_hw_vpd_build_id), OID_AUTO, unit, CTLFLAG_RD|CTLFLAG_DYN, sc->BuildID, 0, NULL); SYSCTL_ADD_STRING(&sc->ctx, SYSCTL_STATIC_CHILDREN(_hw_vpd_serial_box), OID_AUTO, unit, CTLFLAG_RD|CTLFLAG_DYN, sc->BoxSerial, 0, NULL); SYSCTL_ADD_STRING(&sc->ctx, SYSCTL_STATIC_CHILDREN(_hw_vpd_serial_planar), OID_AUTO, unit, CTLFLAG_RD|CTLFLAG_DYN, sc->PlanarSerial, 0, NULL); device_printf(dev, "Machine Type: %.4s, Model: %.3s, Build ID: %.9s\n", sc->MachineType, sc->MachineModel, sc->BuildID); device_printf(dev, "Box Serial: %.7s, Planar Serial: %.11s\n", sc->BoxSerial, sc->PlanarSerial); return (0); bad: if (sc->res) bus_release_resource(dev, SYS_RES_MEMORY, sc->rid, sc->res); return (error); } static int vpd_detach (device_t dev) { struct vpd_softc *sc; sc = device_get_softc(dev); if (sc->res) bus_release_resource(dev, SYS_RES_MEMORY, sc->rid, sc->res); sysctl_ctx_free(&sc->ctx); return (0); } static int vpd_modevent (mod, what, arg) module_t mod; int what; void * arg; { device_t * devs; int count; int i; switch (what) { case MOD_LOAD: break; case MOD_UNLOAD: devclass_get_devices(vpd_devclass, &devs, &count); for (i = 0; i < count; i++) { device_delete_child(device_get_parent(devs[i]), devs[i]); } break; default: break; } return (0); } static device_method_t vpd_methods[] = { /* Device interface */ DEVMETHOD(device_identify, vpd_identify), DEVMETHOD(device_probe, vpd_probe), DEVMETHOD(device_attach, vpd_attach), DEVMETHOD(device_detach, vpd_detach), { 0, 0 } }; static driver_t vpd_driver = { "vpd", vpd_methods, sizeof(struct vpd_softc), }; DRIVER_MODULE(vpd, nexus, vpd_driver, vpd_devclass, vpd_modevent, 0); MODULE_VERSION(vpd, 1); /* * Perform a checksum over the VPD structure, starting with * the BuildID. (Jean Delvare ) */ static int vpd_cksum (struct vpd *v) { u_int8_t *ptr; u_int8_t cksum; int i; ptr = (u_int8_t *)v; cksum = 0; for (i = offsetof(struct vpd, BuildID); i < v->Length ; i++) cksum += ptr[i]; return (cksum); } Index: head/sys/x86/cpufreq/est.c =================================================================== --- head/sys/x86/cpufreq/est.c (revision 326262) +++ head/sys/x86/cpufreq/est.c (revision 326263) @@ -1,1401 +1,1403 @@ /*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * * Copyright (c) 2004 Colin Percival * Copyright (c) 2005 Nate Lawson * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted providing that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include "cpufreq_if.h" #include #include #include #include #include #include #include "acpi_if.h" /* Status/control registers (from the IA-32 System Programming Guide). */ #define MSR_PERF_STATUS 0x198 #define MSR_PERF_CTL 0x199 /* Register and bit for enabling SpeedStep. */ #define MSR_MISC_ENABLE 0x1a0 #define MSR_SS_ENABLE (1<<16) /* Frequency and MSR control values. */ typedef struct { uint16_t freq; uint16_t volts; uint16_t id16; int power; } freq_info; /* Identifying characteristics of a processor and supported frequencies. */ typedef struct { const u_int vendor_id; uint32_t id32; freq_info *freqtab; } cpu_info; struct est_softc { device_t dev; int acpi_settings; int msr_settings; freq_info *freq_list; }; /* Convert MHz and mV into IDs for passing to the MSR. */ #define ID16(MHz, mV, bus_clk) \ (((MHz / bus_clk) << 8) | ((mV ? mV - 700 : 0) >> 4)) #define ID32(MHz_hi, mV_hi, MHz_lo, mV_lo, bus_clk) \ ((ID16(MHz_lo, mV_lo, bus_clk) << 16) | (ID16(MHz_hi, mV_hi, bus_clk))) /* Format for storing IDs in our table. */ #define FREQ_INFO_PWR(MHz, mV, bus_clk, mW) \ { MHz, mV, ID16(MHz, mV, bus_clk), mW } #define FREQ_INFO(MHz, mV, bus_clk) \ FREQ_INFO_PWR(MHz, mV, bus_clk, CPUFREQ_VAL_UNKNOWN) #define INTEL(tab, zhi, vhi, zlo, vlo, bus_clk) \ { CPU_VENDOR_INTEL, ID32(zhi, vhi, zlo, vlo, bus_clk), tab } #define CENTAUR(tab, zhi, vhi, zlo, vlo, bus_clk) \ { CPU_VENDOR_CENTAUR, ID32(zhi, vhi, zlo, vlo, bus_clk), tab } static int msr_info_enabled = 0; TUNABLE_INT("hw.est.msr_info", &msr_info_enabled); static int strict = -1; TUNABLE_INT("hw.est.strict", &strict); /* Default bus clock value for Centrino processors. */ #define INTEL_BUS_CLK 100 /* XXX Update this if new CPUs have more settings. */ #define EST_MAX_SETTINGS 10 CTASSERT(EST_MAX_SETTINGS <= MAX_SETTINGS); /* Estimate in microseconds of latency for performing a transition. */ #define EST_TRANS_LAT 1000 /* * Frequency (MHz) and voltage (mV) settings. * * Dothan processors have multiple VID#s with different settings for * each VID#. Since we can't uniquely identify this info * without undisclosed methods from Intel, we can't support newer * processors with this table method. If ACPI Px states are supported, * we get info from them. * * Data from the "Intel Pentium M Processor Datasheet", * Order Number 252612-003, Table 5. */ static freq_info PM17_130[] = { /* 130nm 1.70GHz Pentium M */ FREQ_INFO(1700, 1484, INTEL_BUS_CLK), FREQ_INFO(1400, 1308, INTEL_BUS_CLK), FREQ_INFO(1200, 1228, INTEL_BUS_CLK), FREQ_INFO(1000, 1116, INTEL_BUS_CLK), FREQ_INFO( 800, 1004, INTEL_BUS_CLK), FREQ_INFO( 600, 956, INTEL_BUS_CLK), FREQ_INFO( 0, 0, 1), }; static freq_info PM16_130[] = { /* 130nm 1.60GHz Pentium M */ FREQ_INFO(1600, 1484, INTEL_BUS_CLK), FREQ_INFO(1400, 1420, INTEL_BUS_CLK), FREQ_INFO(1200, 1276, INTEL_BUS_CLK), FREQ_INFO(1000, 1164, INTEL_BUS_CLK), FREQ_INFO( 800, 1036, INTEL_BUS_CLK), FREQ_INFO( 600, 956, INTEL_BUS_CLK), FREQ_INFO( 0, 0, 1), }; static freq_info PM15_130[] = { /* 130nm 1.50GHz Pentium M */ FREQ_INFO(1500, 1484, INTEL_BUS_CLK), FREQ_INFO(1400, 1452, INTEL_BUS_CLK), FREQ_INFO(1200, 1356, INTEL_BUS_CLK), FREQ_INFO(1000, 1228, INTEL_BUS_CLK), FREQ_INFO( 800, 1116, INTEL_BUS_CLK), FREQ_INFO( 600, 956, INTEL_BUS_CLK), FREQ_INFO( 0, 0, 1), }; static freq_info PM14_130[] = { /* 130nm 1.40GHz Pentium M */ FREQ_INFO(1400, 1484, INTEL_BUS_CLK), FREQ_INFO(1200, 1436, INTEL_BUS_CLK), FREQ_INFO(1000, 1308, INTEL_BUS_CLK), FREQ_INFO( 800, 1180, INTEL_BUS_CLK), FREQ_INFO( 600, 956, INTEL_BUS_CLK), FREQ_INFO( 0, 0, 1), }; static freq_info PM13_130[] = { /* 130nm 1.30GHz Pentium M */ FREQ_INFO(1300, 1388, INTEL_BUS_CLK), FREQ_INFO(1200, 1356, INTEL_BUS_CLK), FREQ_INFO(1000, 1292, INTEL_BUS_CLK), FREQ_INFO( 800, 1260, INTEL_BUS_CLK), FREQ_INFO( 600, 956, INTEL_BUS_CLK), FREQ_INFO( 0, 0, 1), }; static freq_info PM13_LV_130[] = { /* 130nm 1.30GHz Low Voltage Pentium M */ FREQ_INFO(1300, 1180, INTEL_BUS_CLK), FREQ_INFO(1200, 1164, INTEL_BUS_CLK), FREQ_INFO(1100, 1100, INTEL_BUS_CLK), FREQ_INFO(1000, 1020, INTEL_BUS_CLK), FREQ_INFO( 900, 1004, INTEL_BUS_CLK), FREQ_INFO( 800, 988, INTEL_BUS_CLK), FREQ_INFO( 600, 956, INTEL_BUS_CLK), FREQ_INFO( 0, 0, 1), }; static freq_info PM12_LV_130[] = { /* 130 nm 1.20GHz Low Voltage Pentium M */ FREQ_INFO(1200, 1180, INTEL_BUS_CLK), FREQ_INFO(1100, 1164, INTEL_BUS_CLK), FREQ_INFO(1000, 1100, INTEL_BUS_CLK), FREQ_INFO( 900, 1020, INTEL_BUS_CLK), FREQ_INFO( 800, 1004, INTEL_BUS_CLK), FREQ_INFO( 600, 956, INTEL_BUS_CLK), FREQ_INFO( 0, 0, 1), }; static freq_info PM11_LV_130[] = { /* 130 nm 1.10GHz Low Voltage Pentium M */ FREQ_INFO(1100, 1180, INTEL_BUS_CLK), FREQ_INFO(1000, 1164, INTEL_BUS_CLK), FREQ_INFO( 900, 1100, INTEL_BUS_CLK), FREQ_INFO( 800, 1020, INTEL_BUS_CLK), FREQ_INFO( 600, 956, INTEL_BUS_CLK), FREQ_INFO( 0, 0, 1), }; static freq_info PM11_ULV_130[] = { /* 130 nm 1.10GHz Ultra Low Voltage Pentium M */ FREQ_INFO(1100, 1004, INTEL_BUS_CLK), FREQ_INFO(1000, 988, INTEL_BUS_CLK), FREQ_INFO( 900, 972, INTEL_BUS_CLK), FREQ_INFO( 800, 956, INTEL_BUS_CLK), FREQ_INFO( 600, 844, INTEL_BUS_CLK), FREQ_INFO( 0, 0, 1), }; static freq_info PM10_ULV_130[] = { /* 130 nm 1.00GHz Ultra Low Voltage Pentium M */ FREQ_INFO(1000, 1004, INTEL_BUS_CLK), FREQ_INFO( 900, 988, INTEL_BUS_CLK), FREQ_INFO( 800, 972, INTEL_BUS_CLK), FREQ_INFO( 600, 844, INTEL_BUS_CLK), FREQ_INFO( 0, 0, 1), }; /* * Data from "Intel Pentium M Processor on 90nm Process with * 2-MB L2 Cache Datasheet", Order Number 302189-008, Table 5. */ static freq_info PM_765A_90[] = { /* 90 nm 2.10GHz Pentium M, VID #A */ FREQ_INFO(2100, 1340, INTEL_BUS_CLK), FREQ_INFO(1800, 1276, INTEL_BUS_CLK), FREQ_INFO(1600, 1228, INTEL_BUS_CLK), FREQ_INFO(1400, 1180, INTEL_BUS_CLK), FREQ_INFO(1200, 1132, INTEL_BUS_CLK), FREQ_INFO(1000, 1084, INTEL_BUS_CLK), FREQ_INFO( 800, 1036, INTEL_BUS_CLK), FREQ_INFO( 600, 988, INTEL_BUS_CLK), FREQ_INFO( 0, 0, 1), }; static freq_info PM_765B_90[] = { /* 90 nm 2.10GHz Pentium M, VID #B */ FREQ_INFO(2100, 1324, INTEL_BUS_CLK), FREQ_INFO(1800, 1260, INTEL_BUS_CLK), FREQ_INFO(1600, 1212, INTEL_BUS_CLK), FREQ_INFO(1400, 1180, INTEL_BUS_CLK), FREQ_INFO(1200, 1132, INTEL_BUS_CLK), FREQ_INFO(1000, 1084, INTEL_BUS_CLK), FREQ_INFO( 800, 1036, INTEL_BUS_CLK), FREQ_INFO( 600, 988, INTEL_BUS_CLK), FREQ_INFO( 0, 0, 1), }; static freq_info PM_765C_90[] = { /* 90 nm 2.10GHz Pentium M, VID #C */ FREQ_INFO(2100, 1308, INTEL_BUS_CLK), FREQ_INFO(1800, 1244, INTEL_BUS_CLK), FREQ_INFO(1600, 1212, INTEL_BUS_CLK), FREQ_INFO(1400, 1164, INTEL_BUS_CLK), FREQ_INFO(1200, 1116, INTEL_BUS_CLK), FREQ_INFO(1000, 1084, INTEL_BUS_CLK), FREQ_INFO( 800, 1036, INTEL_BUS_CLK), FREQ_INFO( 600, 988, INTEL_BUS_CLK), FREQ_INFO( 0, 0, 1), }; static freq_info PM_765E_90[] = { /* 90 nm 2.10GHz Pentium M, VID #E */ FREQ_INFO(2100, 1356, INTEL_BUS_CLK), FREQ_INFO(1800, 1292, INTEL_BUS_CLK), FREQ_INFO(1600, 1244, INTEL_BUS_CLK), FREQ_INFO(1400, 1196, INTEL_BUS_CLK), FREQ_INFO(1200, 1148, INTEL_BUS_CLK), FREQ_INFO(1000, 1100, INTEL_BUS_CLK), FREQ_INFO( 800, 1052, INTEL_BUS_CLK), FREQ_INFO( 600, 988, INTEL_BUS_CLK), FREQ_INFO( 0, 0, 1), }; static freq_info PM_755A_90[] = { /* 90 nm 2.00GHz Pentium M, VID #A */ FREQ_INFO(2000, 1340, INTEL_BUS_CLK), FREQ_INFO(1800, 1292, INTEL_BUS_CLK), FREQ_INFO(1600, 1244, INTEL_BUS_CLK), FREQ_INFO(1400, 1196, INTEL_BUS_CLK), FREQ_INFO(1200, 1148, INTEL_BUS_CLK), FREQ_INFO(1000, 1100, INTEL_BUS_CLK), FREQ_INFO( 800, 1052, INTEL_BUS_CLK), FREQ_INFO( 600, 988, INTEL_BUS_CLK), FREQ_INFO( 0, 0, 1), }; static freq_info PM_755B_90[] = { /* 90 nm 2.00GHz Pentium M, VID #B */ FREQ_INFO(2000, 1324, INTEL_BUS_CLK), FREQ_INFO(1800, 1276, INTEL_BUS_CLK), FREQ_INFO(1600, 1228, INTEL_BUS_CLK), FREQ_INFO(1400, 1180, INTEL_BUS_CLK), FREQ_INFO(1200, 1132, INTEL_BUS_CLK), FREQ_INFO(1000, 1084, INTEL_BUS_CLK), FREQ_INFO( 800, 1036, INTEL_BUS_CLK), FREQ_INFO( 600, 988, INTEL_BUS_CLK), FREQ_INFO( 0, 0, 1), }; static freq_info PM_755C_90[] = { /* 90 nm 2.00GHz Pentium M, VID #C */ FREQ_INFO(2000, 1308, INTEL_BUS_CLK), FREQ_INFO(1800, 1276, INTEL_BUS_CLK), FREQ_INFO(1600, 1228, INTEL_BUS_CLK), FREQ_INFO(1400, 1180, INTEL_BUS_CLK), FREQ_INFO(1200, 1132, INTEL_BUS_CLK), FREQ_INFO(1000, 1084, INTEL_BUS_CLK), FREQ_INFO( 800, 1036, INTEL_BUS_CLK), FREQ_INFO( 600, 988, INTEL_BUS_CLK), FREQ_INFO( 0, 0, 1), }; static freq_info PM_755D_90[] = { /* 90 nm 2.00GHz Pentium M, VID #D */ FREQ_INFO(2000, 1276, INTEL_BUS_CLK), FREQ_INFO(1800, 1244, INTEL_BUS_CLK), FREQ_INFO(1600, 1196, INTEL_BUS_CLK), FREQ_INFO(1400, 1164, INTEL_BUS_CLK), FREQ_INFO(1200, 1116, INTEL_BUS_CLK), FREQ_INFO(1000, 1084, INTEL_BUS_CLK), FREQ_INFO( 800, 1036, INTEL_BUS_CLK), FREQ_INFO( 600, 988, INTEL_BUS_CLK), FREQ_INFO( 0, 0, 1), }; static freq_info PM_745A_90[] = { /* 90 nm 1.80GHz Pentium M, VID #A */ FREQ_INFO(1800, 1340, INTEL_BUS_CLK), FREQ_INFO(1600, 1292, INTEL_BUS_CLK), FREQ_INFO(1400, 1228, INTEL_BUS_CLK), FREQ_INFO(1200, 1164, INTEL_BUS_CLK), FREQ_INFO(1000, 1116, INTEL_BUS_CLK), FREQ_INFO( 800, 1052, INTEL_BUS_CLK), FREQ_INFO( 600, 988, INTEL_BUS_CLK), FREQ_INFO( 0, 0, 1), }; static freq_info PM_745B_90[] = { /* 90 nm 1.80GHz Pentium M, VID #B */ FREQ_INFO(1800, 1324, INTEL_BUS_CLK), FREQ_INFO(1600, 1276, INTEL_BUS_CLK), FREQ_INFO(1400, 1212, INTEL_BUS_CLK), FREQ_INFO(1200, 1164, INTEL_BUS_CLK), FREQ_INFO(1000, 1116, INTEL_BUS_CLK), FREQ_INFO( 800, 1052, INTEL_BUS_CLK), FREQ_INFO( 600, 988, INTEL_BUS_CLK), FREQ_INFO( 0, 0, 1), }; static freq_info PM_745C_90[] = { /* 90 nm 1.80GHz Pentium M, VID #C */ FREQ_INFO(1800, 1308, INTEL_BUS_CLK), FREQ_INFO(1600, 1260, INTEL_BUS_CLK), FREQ_INFO(1400, 1212, INTEL_BUS_CLK), FREQ_INFO(1200, 1148, INTEL_BUS_CLK), FREQ_INFO(1000, 1100, INTEL_BUS_CLK), FREQ_INFO( 800, 1052, INTEL_BUS_CLK), FREQ_INFO( 600, 988, INTEL_BUS_CLK), FREQ_INFO( 0, 0, 1), }; static freq_info PM_745D_90[] = { /* 90 nm 1.80GHz Pentium M, VID #D */ FREQ_INFO(1800, 1276, INTEL_BUS_CLK), FREQ_INFO(1600, 1228, INTEL_BUS_CLK), FREQ_INFO(1400, 1180, INTEL_BUS_CLK), FREQ_INFO(1200, 1132, INTEL_BUS_CLK), FREQ_INFO(1000, 1084, INTEL_BUS_CLK), FREQ_INFO( 800, 1036, INTEL_BUS_CLK), FREQ_INFO( 600, 988, INTEL_BUS_CLK), FREQ_INFO( 0, 0, 1), }; static freq_info PM_735A_90[] = { /* 90 nm 1.70GHz Pentium M, VID #A */ FREQ_INFO(1700, 1340, INTEL_BUS_CLK), FREQ_INFO(1400, 1244, INTEL_BUS_CLK), FREQ_INFO(1200, 1180, INTEL_BUS_CLK), FREQ_INFO(1000, 1116, INTEL_BUS_CLK), FREQ_INFO( 800, 1052, INTEL_BUS_CLK), FREQ_INFO( 600, 988, INTEL_BUS_CLK), FREQ_INFO( 0, 0, 1), }; static freq_info PM_735B_90[] = { /* 90 nm 1.70GHz Pentium M, VID #B */ FREQ_INFO(1700, 1324, INTEL_BUS_CLK), FREQ_INFO(1400, 1244, INTEL_BUS_CLK), FREQ_INFO(1200, 1180, INTEL_BUS_CLK), FREQ_INFO(1000, 1116, INTEL_BUS_CLK), FREQ_INFO( 800, 1052, INTEL_BUS_CLK), FREQ_INFO( 600, 988, INTEL_BUS_CLK), FREQ_INFO( 0, 0, 1), }; static freq_info PM_735C_90[] = { /* 90 nm 1.70GHz Pentium M, VID #C */ FREQ_INFO(1700, 1308, INTEL_BUS_CLK), FREQ_INFO(1400, 1228, INTEL_BUS_CLK), FREQ_INFO(1200, 1164, INTEL_BUS_CLK), FREQ_INFO(1000, 1116, INTEL_BUS_CLK), FREQ_INFO( 800, 1052, INTEL_BUS_CLK), FREQ_INFO( 600, 988, INTEL_BUS_CLK), FREQ_INFO( 0, 0, 1), }; static freq_info PM_735D_90[] = { /* 90 nm 1.70GHz Pentium M, VID #D */ FREQ_INFO(1700, 1276, INTEL_BUS_CLK), FREQ_INFO(1400, 1212, INTEL_BUS_CLK), FREQ_INFO(1200, 1148, INTEL_BUS_CLK), FREQ_INFO(1000, 1100, INTEL_BUS_CLK), FREQ_INFO( 800, 1052, INTEL_BUS_CLK), FREQ_INFO( 600, 988, INTEL_BUS_CLK), FREQ_INFO( 0, 0, 1), }; static freq_info PM_725A_90[] = { /* 90 nm 1.60GHz Pentium M, VID #A */ FREQ_INFO(1600, 1340, INTEL_BUS_CLK), FREQ_INFO(1400, 1276, INTEL_BUS_CLK), FREQ_INFO(1200, 1212, INTEL_BUS_CLK), FREQ_INFO(1000, 1132, INTEL_BUS_CLK), FREQ_INFO( 800, 1068, INTEL_BUS_CLK), FREQ_INFO( 600, 988, INTEL_BUS_CLK), FREQ_INFO( 0, 0, 1), }; static freq_info PM_725B_90[] = { /* 90 nm 1.60GHz Pentium M, VID #B */ FREQ_INFO(1600, 1324, INTEL_BUS_CLK), FREQ_INFO(1400, 1260, INTEL_BUS_CLK), FREQ_INFO(1200, 1196, INTEL_BUS_CLK), FREQ_INFO(1000, 1132, INTEL_BUS_CLK), FREQ_INFO( 800, 1068, INTEL_BUS_CLK), FREQ_INFO( 600, 988, INTEL_BUS_CLK), FREQ_INFO( 0, 0, 1), }; static freq_info PM_725C_90[] = { /* 90 nm 1.60GHz Pentium M, VID #C */ FREQ_INFO(1600, 1308, INTEL_BUS_CLK), FREQ_INFO(1400, 1244, INTEL_BUS_CLK), FREQ_INFO(1200, 1180, INTEL_BUS_CLK), FREQ_INFO(1000, 1116, INTEL_BUS_CLK), FREQ_INFO( 800, 1052, INTEL_BUS_CLK), FREQ_INFO( 600, 988, INTEL_BUS_CLK), FREQ_INFO( 0, 0, 1), }; static freq_info PM_725D_90[] = { /* 90 nm 1.60GHz Pentium M, VID #D */ FREQ_INFO(1600, 1276, INTEL_BUS_CLK), FREQ_INFO(1400, 1228, INTEL_BUS_CLK), FREQ_INFO(1200, 1164, INTEL_BUS_CLK), FREQ_INFO(1000, 1116, INTEL_BUS_CLK), FREQ_INFO( 800, 1052, INTEL_BUS_CLK), FREQ_INFO( 600, 988, INTEL_BUS_CLK), FREQ_INFO( 0, 0, 1), }; static freq_info PM_715A_90[] = { /* 90 nm 1.50GHz Pentium M, VID #A */ FREQ_INFO(1500, 1340, INTEL_BUS_CLK), FREQ_INFO(1200, 1228, INTEL_BUS_CLK), FREQ_INFO(1000, 1148, INTEL_BUS_CLK), FREQ_INFO( 800, 1068, INTEL_BUS_CLK), FREQ_INFO( 600, 988, INTEL_BUS_CLK), FREQ_INFO( 0, 0, 1), }; static freq_info PM_715B_90[] = { /* 90 nm 1.50GHz Pentium M, VID #B */ FREQ_INFO(1500, 1324, INTEL_BUS_CLK), FREQ_INFO(1200, 1212, INTEL_BUS_CLK), FREQ_INFO(1000, 1148, INTEL_BUS_CLK), FREQ_INFO( 800, 1068, INTEL_BUS_CLK), FREQ_INFO( 600, 988, INTEL_BUS_CLK), FREQ_INFO( 0, 0, 1), }; static freq_info PM_715C_90[] = { /* 90 nm 1.50GHz Pentium M, VID #C */ FREQ_INFO(1500, 1308, INTEL_BUS_CLK), FREQ_INFO(1200, 1212, INTEL_BUS_CLK), FREQ_INFO(1000, 1132, INTEL_BUS_CLK), FREQ_INFO( 800, 1068, INTEL_BUS_CLK), FREQ_INFO( 600, 988, INTEL_BUS_CLK), FREQ_INFO( 0, 0, 1), }; static freq_info PM_715D_90[] = { /* 90 nm 1.50GHz Pentium M, VID #D */ FREQ_INFO(1500, 1276, INTEL_BUS_CLK), FREQ_INFO(1200, 1180, INTEL_BUS_CLK), FREQ_INFO(1000, 1116, INTEL_BUS_CLK), FREQ_INFO( 800, 1052, INTEL_BUS_CLK), FREQ_INFO( 600, 988, INTEL_BUS_CLK), FREQ_INFO( 0, 0, 1), }; static freq_info PM_778_90[] = { /* 90 nm 1.60GHz Low Voltage Pentium M */ FREQ_INFO(1600, 1116, INTEL_BUS_CLK), FREQ_INFO(1500, 1116, INTEL_BUS_CLK), FREQ_INFO(1400, 1100, INTEL_BUS_CLK), FREQ_INFO(1300, 1084, INTEL_BUS_CLK), FREQ_INFO(1200, 1068, INTEL_BUS_CLK), FREQ_INFO(1100, 1052, INTEL_BUS_CLK), FREQ_INFO(1000, 1052, INTEL_BUS_CLK), FREQ_INFO( 900, 1036, INTEL_BUS_CLK), FREQ_INFO( 800, 1020, INTEL_BUS_CLK), FREQ_INFO( 600, 988, INTEL_BUS_CLK), FREQ_INFO( 0, 0, 1), }; static freq_info PM_758_90[] = { /* 90 nm 1.50GHz Low Voltage Pentium M */ FREQ_INFO(1500, 1116, INTEL_BUS_CLK), FREQ_INFO(1400, 1116, INTEL_BUS_CLK), FREQ_INFO(1300, 1100, INTEL_BUS_CLK), FREQ_INFO(1200, 1084, INTEL_BUS_CLK), FREQ_INFO(1100, 1068, INTEL_BUS_CLK), FREQ_INFO(1000, 1052, INTEL_BUS_CLK), FREQ_INFO( 900, 1036, INTEL_BUS_CLK), FREQ_INFO( 800, 1020, INTEL_BUS_CLK), FREQ_INFO( 600, 988, INTEL_BUS_CLK), FREQ_INFO( 0, 0, 1), }; static freq_info PM_738_90[] = { /* 90 nm 1.40GHz Low Voltage Pentium M */ FREQ_INFO(1400, 1116, INTEL_BUS_CLK), FREQ_INFO(1300, 1116, INTEL_BUS_CLK), FREQ_INFO(1200, 1100, INTEL_BUS_CLK), FREQ_INFO(1100, 1068, INTEL_BUS_CLK), FREQ_INFO(1000, 1052, INTEL_BUS_CLK), FREQ_INFO( 900, 1036, INTEL_BUS_CLK), FREQ_INFO( 800, 1020, INTEL_BUS_CLK), FREQ_INFO( 600, 988, INTEL_BUS_CLK), FREQ_INFO( 0, 0, 1), }; static freq_info PM_773G_90[] = { /* 90 nm 1.30GHz Ultra Low Voltage Pentium M, VID #G */ FREQ_INFO(1300, 956, INTEL_BUS_CLK), FREQ_INFO(1200, 940, INTEL_BUS_CLK), FREQ_INFO(1100, 924, INTEL_BUS_CLK), FREQ_INFO(1000, 908, INTEL_BUS_CLK), FREQ_INFO( 900, 876, INTEL_BUS_CLK), FREQ_INFO( 800, 860, INTEL_BUS_CLK), FREQ_INFO( 600, 812, INTEL_BUS_CLK), }; static freq_info PM_773H_90[] = { /* 90 nm 1.30GHz Ultra Low Voltage Pentium M, VID #H */ FREQ_INFO(1300, 940, INTEL_BUS_CLK), FREQ_INFO(1200, 924, INTEL_BUS_CLK), FREQ_INFO(1100, 908, INTEL_BUS_CLK), FREQ_INFO(1000, 892, INTEL_BUS_CLK), FREQ_INFO( 900, 876, INTEL_BUS_CLK), FREQ_INFO( 800, 860, INTEL_BUS_CLK), FREQ_INFO( 600, 812, INTEL_BUS_CLK), }; static freq_info PM_773I_90[] = { /* 90 nm 1.30GHz Ultra Low Voltage Pentium M, VID #I */ FREQ_INFO(1300, 924, INTEL_BUS_CLK), FREQ_INFO(1200, 908, INTEL_BUS_CLK), FREQ_INFO(1100, 892, INTEL_BUS_CLK), FREQ_INFO(1000, 876, INTEL_BUS_CLK), FREQ_INFO( 900, 860, INTEL_BUS_CLK), FREQ_INFO( 800, 844, INTEL_BUS_CLK), FREQ_INFO( 600, 812, INTEL_BUS_CLK), }; static freq_info PM_773J_90[] = { /* 90 nm 1.30GHz Ultra Low Voltage Pentium M, VID #J */ FREQ_INFO(1300, 908, INTEL_BUS_CLK), FREQ_INFO(1200, 908, INTEL_BUS_CLK), FREQ_INFO(1100, 892, INTEL_BUS_CLK), FREQ_INFO(1000, 876, INTEL_BUS_CLK), FREQ_INFO( 900, 860, INTEL_BUS_CLK), FREQ_INFO( 800, 844, INTEL_BUS_CLK), FREQ_INFO( 600, 812, INTEL_BUS_CLK), }; static freq_info PM_773K_90[] = { /* 90 nm 1.30GHz Ultra Low Voltage Pentium M, VID #K */ FREQ_INFO(1300, 892, INTEL_BUS_CLK), FREQ_INFO(1200, 892, INTEL_BUS_CLK), FREQ_INFO(1100, 876, INTEL_BUS_CLK), FREQ_INFO(1000, 860, INTEL_BUS_CLK), FREQ_INFO( 900, 860, INTEL_BUS_CLK), FREQ_INFO( 800, 844, INTEL_BUS_CLK), FREQ_INFO( 600, 812, INTEL_BUS_CLK), }; static freq_info PM_773L_90[] = { /* 90 nm 1.30GHz Ultra Low Voltage Pentium M, VID #L */ FREQ_INFO(1300, 876, INTEL_BUS_CLK), FREQ_INFO(1200, 876, INTEL_BUS_CLK), FREQ_INFO(1100, 860, INTEL_BUS_CLK), FREQ_INFO(1000, 860, INTEL_BUS_CLK), FREQ_INFO( 900, 844, INTEL_BUS_CLK), FREQ_INFO( 800, 844, INTEL_BUS_CLK), FREQ_INFO( 600, 812, INTEL_BUS_CLK), }; static freq_info PM_753G_90[] = { /* 90 nm 1.20GHz Ultra Low Voltage Pentium M, VID #G */ FREQ_INFO(1200, 956, INTEL_BUS_CLK), FREQ_INFO(1100, 940, INTEL_BUS_CLK), FREQ_INFO(1000, 908, INTEL_BUS_CLK), FREQ_INFO( 900, 892, INTEL_BUS_CLK), FREQ_INFO( 800, 860, INTEL_BUS_CLK), FREQ_INFO( 600, 812, INTEL_BUS_CLK), }; static freq_info PM_753H_90[] = { /* 90 nm 1.20GHz Ultra Low Voltage Pentium M, VID #H */ FREQ_INFO(1200, 940, INTEL_BUS_CLK), FREQ_INFO(1100, 924, INTEL_BUS_CLK), FREQ_INFO(1000, 908, INTEL_BUS_CLK), FREQ_INFO( 900, 876, INTEL_BUS_CLK), FREQ_INFO( 800, 860, INTEL_BUS_CLK), FREQ_INFO( 600, 812, INTEL_BUS_CLK), }; static freq_info PM_753I_90[] = { /* 90 nm 1.20GHz Ultra Low Voltage Pentium M, VID #I */ FREQ_INFO(1200, 924, INTEL_BUS_CLK), FREQ_INFO(1100, 908, INTEL_BUS_CLK), FREQ_INFO(1000, 892, INTEL_BUS_CLK), FREQ_INFO( 900, 876, INTEL_BUS_CLK), FREQ_INFO( 800, 860, INTEL_BUS_CLK), FREQ_INFO( 600, 812, INTEL_BUS_CLK), }; static freq_info PM_753J_90[] = { /* 90 nm 1.20GHz Ultra Low Voltage Pentium M, VID #J */ FREQ_INFO(1200, 908, INTEL_BUS_CLK), FREQ_INFO(1100, 892, INTEL_BUS_CLK), FREQ_INFO(1000, 876, INTEL_BUS_CLK), FREQ_INFO( 900, 860, INTEL_BUS_CLK), FREQ_INFO( 800, 844, INTEL_BUS_CLK), FREQ_INFO( 600, 812, INTEL_BUS_CLK), }; static freq_info PM_753K_90[] = { /* 90 nm 1.20GHz Ultra Low Voltage Pentium M, VID #K */ FREQ_INFO(1200, 892, INTEL_BUS_CLK), FREQ_INFO(1100, 892, INTEL_BUS_CLK), FREQ_INFO(1000, 876, INTEL_BUS_CLK), FREQ_INFO( 900, 860, INTEL_BUS_CLK), FREQ_INFO( 800, 844, INTEL_BUS_CLK), FREQ_INFO( 600, 812, INTEL_BUS_CLK), }; static freq_info PM_753L_90[] = { /* 90 nm 1.20GHz Ultra Low Voltage Pentium M, VID #L */ FREQ_INFO(1200, 876, INTEL_BUS_CLK), FREQ_INFO(1100, 876, INTEL_BUS_CLK), FREQ_INFO(1000, 860, INTEL_BUS_CLK), FREQ_INFO( 900, 844, INTEL_BUS_CLK), FREQ_INFO( 800, 844, INTEL_BUS_CLK), FREQ_INFO( 600, 812, INTEL_BUS_CLK), }; static freq_info PM_733JG_90[] = { /* 90 nm 1.10GHz Ultra Low Voltage Pentium M, VID #G */ FREQ_INFO(1100, 956, INTEL_BUS_CLK), FREQ_INFO(1000, 940, INTEL_BUS_CLK), FREQ_INFO( 900, 908, INTEL_BUS_CLK), FREQ_INFO( 800, 876, INTEL_BUS_CLK), FREQ_INFO( 600, 812, INTEL_BUS_CLK), }; static freq_info PM_733JH_90[] = { /* 90 nm 1.10GHz Ultra Low Voltage Pentium M, VID #H */ FREQ_INFO(1100, 940, INTEL_BUS_CLK), FREQ_INFO(1000, 924, INTEL_BUS_CLK), FREQ_INFO( 900, 892, INTEL_BUS_CLK), FREQ_INFO( 800, 876, INTEL_BUS_CLK), FREQ_INFO( 600, 812, INTEL_BUS_CLK), }; static freq_info PM_733JI_90[] = { /* 90 nm 1.10GHz Ultra Low Voltage Pentium M, VID #I */ FREQ_INFO(1100, 924, INTEL_BUS_CLK), FREQ_INFO(1000, 908, INTEL_BUS_CLK), FREQ_INFO( 900, 892, INTEL_BUS_CLK), FREQ_INFO( 800, 860, INTEL_BUS_CLK), FREQ_INFO( 600, 812, INTEL_BUS_CLK), }; static freq_info PM_733JJ_90[] = { /* 90 nm 1.10GHz Ultra Low Voltage Pentium M, VID #J */ FREQ_INFO(1100, 908, INTEL_BUS_CLK), FREQ_INFO(1000, 892, INTEL_BUS_CLK), FREQ_INFO( 900, 876, INTEL_BUS_CLK), FREQ_INFO( 800, 860, INTEL_BUS_CLK), FREQ_INFO( 600, 812, INTEL_BUS_CLK), }; static freq_info PM_733JK_90[] = { /* 90 nm 1.10GHz Ultra Low Voltage Pentium M, VID #K */ FREQ_INFO(1100, 892, INTEL_BUS_CLK), FREQ_INFO(1000, 876, INTEL_BUS_CLK), FREQ_INFO( 900, 860, INTEL_BUS_CLK), FREQ_INFO( 800, 844, INTEL_BUS_CLK), FREQ_INFO( 600, 812, INTEL_BUS_CLK), }; static freq_info PM_733JL_90[] = { /* 90 nm 1.10GHz Ultra Low Voltage Pentium M, VID #L */ FREQ_INFO(1100, 876, INTEL_BUS_CLK), FREQ_INFO(1000, 876, INTEL_BUS_CLK), FREQ_INFO( 900, 860, INTEL_BUS_CLK), FREQ_INFO( 800, 844, INTEL_BUS_CLK), FREQ_INFO( 600, 812, INTEL_BUS_CLK), }; static freq_info PM_733_90[] = { /* 90 nm 1.10GHz Ultra Low Voltage Pentium M */ FREQ_INFO(1100, 940, INTEL_BUS_CLK), FREQ_INFO(1000, 924, INTEL_BUS_CLK), FREQ_INFO( 900, 892, INTEL_BUS_CLK), FREQ_INFO( 800, 876, INTEL_BUS_CLK), FREQ_INFO( 600, 812, INTEL_BUS_CLK), FREQ_INFO( 0, 0, 1), }; static freq_info PM_723_90[] = { /* 90 nm 1.00GHz Ultra Low Voltage Pentium M */ FREQ_INFO(1000, 940, INTEL_BUS_CLK), FREQ_INFO( 900, 908, INTEL_BUS_CLK), FREQ_INFO( 800, 876, INTEL_BUS_CLK), FREQ_INFO( 600, 812, INTEL_BUS_CLK), FREQ_INFO( 0, 0, 1), }; /* * VIA C7-M 500 MHz FSB, 400 MHz FSB, and ULV variants. * Data from the "VIA C7-M Processor BIOS Writer's Guide (v2.17)" datasheet. */ static freq_info C7M_795[] = { /* 2.00GHz Centaur C7-M 533 Mhz FSB */ FREQ_INFO_PWR(2000, 1148, 133, 20000), FREQ_INFO_PWR(1867, 1132, 133, 18000), FREQ_INFO_PWR(1600, 1100, 133, 15000), FREQ_INFO_PWR(1467, 1052, 133, 13000), FREQ_INFO_PWR(1200, 1004, 133, 10000), FREQ_INFO_PWR( 800, 844, 133, 7000), FREQ_INFO_PWR( 667, 844, 133, 6000), FREQ_INFO_PWR( 533, 844, 133, 5000), FREQ_INFO(0, 0, 1), }; static freq_info C7M_785[] = { /* 1.80GHz Centaur C7-M 533 Mhz FSB */ FREQ_INFO_PWR(1867, 1148, 133, 18000), FREQ_INFO_PWR(1600, 1100, 133, 15000), FREQ_INFO_PWR(1467, 1052, 133, 13000), FREQ_INFO_PWR(1200, 1004, 133, 10000), FREQ_INFO_PWR( 800, 844, 133, 7000), FREQ_INFO_PWR( 667, 844, 133, 6000), FREQ_INFO_PWR( 533, 844, 133, 5000), FREQ_INFO(0, 0, 1), }; static freq_info C7M_765[] = { /* 1.60GHz Centaur C7-M 533 Mhz FSB */ FREQ_INFO_PWR(1600, 1084, 133, 15000), FREQ_INFO_PWR(1467, 1052, 133, 13000), FREQ_INFO_PWR(1200, 1004, 133, 10000), FREQ_INFO_PWR( 800, 844, 133, 7000), FREQ_INFO_PWR( 667, 844, 133, 6000), FREQ_INFO_PWR( 533, 844, 133, 5000), FREQ_INFO(0, 0, 1), }; static freq_info C7M_794[] = { /* 2.00GHz Centaur C7-M 400 Mhz FSB */ FREQ_INFO_PWR(2000, 1148, 100, 20000), FREQ_INFO_PWR(1800, 1132, 100, 18000), FREQ_INFO_PWR(1600, 1100, 100, 15000), FREQ_INFO_PWR(1400, 1052, 100, 13000), FREQ_INFO_PWR(1000, 1004, 100, 10000), FREQ_INFO_PWR( 800, 844, 100, 7000), FREQ_INFO_PWR( 600, 844, 100, 6000), FREQ_INFO_PWR( 400, 844, 100, 5000), FREQ_INFO(0, 0, 1), }; static freq_info C7M_784[] = { /* 1.80GHz Centaur C7-M 400 Mhz FSB */ FREQ_INFO_PWR(1800, 1148, 100, 18000), FREQ_INFO_PWR(1600, 1100, 100, 15000), FREQ_INFO_PWR(1400, 1052, 100, 13000), FREQ_INFO_PWR(1000, 1004, 100, 10000), FREQ_INFO_PWR( 800, 844, 100, 7000), FREQ_INFO_PWR( 600, 844, 100, 6000), FREQ_INFO_PWR( 400, 844, 100, 5000), FREQ_INFO(0, 0, 1), }; static freq_info C7M_764[] = { /* 1.60GHz Centaur C7-M 400 Mhz FSB */ FREQ_INFO_PWR(1600, 1084, 100, 15000), FREQ_INFO_PWR(1400, 1052, 100, 13000), FREQ_INFO_PWR(1000, 1004, 100, 10000), FREQ_INFO_PWR( 800, 844, 100, 7000), FREQ_INFO_PWR( 600, 844, 100, 6000), FREQ_INFO_PWR( 400, 844, 100, 5000), FREQ_INFO(0, 0, 1), }; static freq_info C7M_754[] = { /* 1.50GHz Centaur C7-M 400 Mhz FSB */ FREQ_INFO_PWR(1500, 1004, 100, 12000), FREQ_INFO_PWR(1400, 988, 100, 11000), FREQ_INFO_PWR(1000, 940, 100, 9000), FREQ_INFO_PWR( 800, 844, 100, 7000), FREQ_INFO_PWR( 600, 844, 100, 6000), FREQ_INFO_PWR( 400, 844, 100, 5000), FREQ_INFO(0, 0, 1), }; static freq_info C7M_771[] = { /* 1.20GHz Centaur C7-M 400 Mhz FSB */ FREQ_INFO_PWR(1200, 860, 100, 7000), FREQ_INFO_PWR(1000, 860, 100, 6000), FREQ_INFO_PWR( 800, 844, 100, 5500), FREQ_INFO_PWR( 600, 844, 100, 5000), FREQ_INFO_PWR( 400, 844, 100, 4000), FREQ_INFO(0, 0, 1), }; static freq_info C7M_775_ULV[] = { /* 1.50GHz Centaur C7-M ULV */ FREQ_INFO_PWR(1500, 956, 100, 7500), FREQ_INFO_PWR(1400, 940, 100, 6000), FREQ_INFO_PWR(1000, 860, 100, 5000), FREQ_INFO_PWR( 800, 828, 100, 2800), FREQ_INFO_PWR( 600, 796, 100, 2500), FREQ_INFO_PWR( 400, 796, 100, 2000), FREQ_INFO(0, 0, 1), }; static freq_info C7M_772_ULV[] = { /* 1.20GHz Centaur C7-M ULV */ FREQ_INFO_PWR(1200, 844, 100, 5000), FREQ_INFO_PWR(1000, 844, 100, 4000), FREQ_INFO_PWR( 800, 828, 100, 2800), FREQ_INFO_PWR( 600, 796, 100, 2500), FREQ_INFO_PWR( 400, 796, 100, 2000), FREQ_INFO(0, 0, 1), }; static freq_info C7M_779_ULV[] = { /* 1.00GHz Centaur C7-M ULV */ FREQ_INFO_PWR(1000, 796, 100, 3500), FREQ_INFO_PWR( 800, 796, 100, 2800), FREQ_INFO_PWR( 600, 796, 100, 2500), FREQ_INFO_PWR( 400, 796, 100, 2000), FREQ_INFO(0, 0, 1), }; static freq_info C7M_770_ULV[] = { /* 1.00GHz Centaur C7-M ULV */ FREQ_INFO_PWR(1000, 844, 100, 5000), FREQ_INFO_PWR( 800, 796, 100, 2800), FREQ_INFO_PWR( 600, 796, 100, 2500), FREQ_INFO_PWR( 400, 796, 100, 2000), FREQ_INFO(0, 0, 1), }; static cpu_info ESTprocs[] = { INTEL(PM17_130, 1700, 1484, 600, 956, INTEL_BUS_CLK), INTEL(PM16_130, 1600, 1484, 600, 956, INTEL_BUS_CLK), INTEL(PM15_130, 1500, 1484, 600, 956, INTEL_BUS_CLK), INTEL(PM14_130, 1400, 1484, 600, 956, INTEL_BUS_CLK), INTEL(PM13_130, 1300, 1388, 600, 956, INTEL_BUS_CLK), INTEL(PM13_LV_130, 1300, 1180, 600, 956, INTEL_BUS_CLK), INTEL(PM12_LV_130, 1200, 1180, 600, 956, INTEL_BUS_CLK), INTEL(PM11_LV_130, 1100, 1180, 600, 956, INTEL_BUS_CLK), INTEL(PM11_ULV_130, 1100, 1004, 600, 844, INTEL_BUS_CLK), INTEL(PM10_ULV_130, 1000, 1004, 600, 844, INTEL_BUS_CLK), INTEL(PM_765A_90, 2100, 1340, 600, 988, INTEL_BUS_CLK), INTEL(PM_765B_90, 2100, 1324, 600, 988, INTEL_BUS_CLK), INTEL(PM_765C_90, 2100, 1308, 600, 988, INTEL_BUS_CLK), INTEL(PM_765E_90, 2100, 1356, 600, 988, INTEL_BUS_CLK), INTEL(PM_755A_90, 2000, 1340, 600, 988, INTEL_BUS_CLK), INTEL(PM_755B_90, 2000, 1324, 600, 988, INTEL_BUS_CLK), INTEL(PM_755C_90, 2000, 1308, 600, 988, INTEL_BUS_CLK), INTEL(PM_755D_90, 2000, 1276, 600, 988, INTEL_BUS_CLK), INTEL(PM_745A_90, 1800, 1340, 600, 988, INTEL_BUS_CLK), INTEL(PM_745B_90, 1800, 1324, 600, 988, INTEL_BUS_CLK), INTEL(PM_745C_90, 1800, 1308, 600, 988, INTEL_BUS_CLK), INTEL(PM_745D_90, 1800, 1276, 600, 988, INTEL_BUS_CLK), INTEL(PM_735A_90, 1700, 1340, 600, 988, INTEL_BUS_CLK), INTEL(PM_735B_90, 1700, 1324, 600, 988, INTEL_BUS_CLK), INTEL(PM_735C_90, 1700, 1308, 600, 988, INTEL_BUS_CLK), INTEL(PM_735D_90, 1700, 1276, 600, 988, INTEL_BUS_CLK), INTEL(PM_725A_90, 1600, 1340, 600, 988, INTEL_BUS_CLK), INTEL(PM_725B_90, 1600, 1324, 600, 988, INTEL_BUS_CLK), INTEL(PM_725C_90, 1600, 1308, 600, 988, INTEL_BUS_CLK), INTEL(PM_725D_90, 1600, 1276, 600, 988, INTEL_BUS_CLK), INTEL(PM_715A_90, 1500, 1340, 600, 988, INTEL_BUS_CLK), INTEL(PM_715B_90, 1500, 1324, 600, 988, INTEL_BUS_CLK), INTEL(PM_715C_90, 1500, 1308, 600, 988, INTEL_BUS_CLK), INTEL(PM_715D_90, 1500, 1276, 600, 988, INTEL_BUS_CLK), INTEL(PM_778_90, 1600, 1116, 600, 988, INTEL_BUS_CLK), INTEL(PM_758_90, 1500, 1116, 600, 988, INTEL_BUS_CLK), INTEL(PM_738_90, 1400, 1116, 600, 988, INTEL_BUS_CLK), INTEL(PM_773G_90, 1300, 956, 600, 812, INTEL_BUS_CLK), INTEL(PM_773H_90, 1300, 940, 600, 812, INTEL_BUS_CLK), INTEL(PM_773I_90, 1300, 924, 600, 812, INTEL_BUS_CLK), INTEL(PM_773J_90, 1300, 908, 600, 812, INTEL_BUS_CLK), INTEL(PM_773K_90, 1300, 892, 600, 812, INTEL_BUS_CLK), INTEL(PM_773L_90, 1300, 876, 600, 812, INTEL_BUS_CLK), INTEL(PM_753G_90, 1200, 956, 600, 812, INTEL_BUS_CLK), INTEL(PM_753H_90, 1200, 940, 600, 812, INTEL_BUS_CLK), INTEL(PM_753I_90, 1200, 924, 600, 812, INTEL_BUS_CLK), INTEL(PM_753J_90, 1200, 908, 600, 812, INTEL_BUS_CLK), INTEL(PM_753K_90, 1200, 892, 600, 812, INTEL_BUS_CLK), INTEL(PM_753L_90, 1200, 876, 600, 812, INTEL_BUS_CLK), INTEL(PM_733JG_90, 1100, 956, 600, 812, INTEL_BUS_CLK), INTEL(PM_733JH_90, 1100, 940, 600, 812, INTEL_BUS_CLK), INTEL(PM_733JI_90, 1100, 924, 600, 812, INTEL_BUS_CLK), INTEL(PM_733JJ_90, 1100, 908, 600, 812, INTEL_BUS_CLK), INTEL(PM_733JK_90, 1100, 892, 600, 812, INTEL_BUS_CLK), INTEL(PM_733JL_90, 1100, 876, 600, 812, INTEL_BUS_CLK), INTEL(PM_733_90, 1100, 940, 600, 812, INTEL_BUS_CLK), INTEL(PM_723_90, 1000, 940, 600, 812, INTEL_BUS_CLK), CENTAUR(C7M_795, 2000, 1148, 533, 844, 133), CENTAUR(C7M_794, 2000, 1148, 400, 844, 100), CENTAUR(C7M_785, 1867, 1148, 533, 844, 133), CENTAUR(C7M_784, 1800, 1148, 400, 844, 100), CENTAUR(C7M_765, 1600, 1084, 533, 844, 133), CENTAUR(C7M_764, 1600, 1084, 400, 844, 100), CENTAUR(C7M_754, 1500, 1004, 400, 844, 100), CENTAUR(C7M_775_ULV, 1500, 956, 400, 796, 100), CENTAUR(C7M_771, 1200, 860, 400, 844, 100), CENTAUR(C7M_772_ULV, 1200, 844, 400, 796, 100), CENTAUR(C7M_779_ULV, 1000, 796, 400, 796, 100), CENTAUR(C7M_770_ULV, 1000, 844, 400, 796, 100), { 0, 0, NULL }, }; static void est_identify(driver_t *driver, device_t parent); static int est_features(driver_t *driver, u_int *features); static int est_probe(device_t parent); static int est_attach(device_t parent); static int est_detach(device_t parent); static int est_get_info(device_t dev); static int est_acpi_info(device_t dev, freq_info **freqs); static int est_table_info(device_t dev, uint64_t msr, freq_info **freqs); static int est_msr_info(device_t dev, uint64_t msr, freq_info **freqs); static freq_info *est_get_current(freq_info *freq_list); static int est_settings(device_t dev, struct cf_setting *sets, int *count); static int est_set(device_t dev, const struct cf_setting *set); static int est_get(device_t dev, struct cf_setting *set); static int est_type(device_t dev, int *type); static int est_set_id16(device_t dev, uint16_t id16, int need_check); static void est_get_id16(uint16_t *id16_p); static device_method_t est_methods[] = { /* Device interface */ DEVMETHOD(device_identify, est_identify), DEVMETHOD(device_probe, est_probe), DEVMETHOD(device_attach, est_attach), DEVMETHOD(device_detach, est_detach), /* cpufreq interface */ DEVMETHOD(cpufreq_drv_set, est_set), DEVMETHOD(cpufreq_drv_get, est_get), DEVMETHOD(cpufreq_drv_type, est_type), DEVMETHOD(cpufreq_drv_settings, est_settings), /* ACPI interface */ DEVMETHOD(acpi_get_features, est_features), {0, 0} }; static driver_t est_driver = { "est", est_methods, sizeof(struct est_softc), }; static devclass_t est_devclass; DRIVER_MODULE(est, cpu, est_driver, est_devclass, 0, 0); static int est_features(driver_t *driver, u_int *features) { /* * Notify the ACPI CPU that we support direct access to MSRs. * XXX C1 "I/O then Halt" seems necessary for some broken BIOS. */ *features = ACPI_CAP_PERF_MSRS | ACPI_CAP_C1_IO_HALT; return (0); } static void est_identify(driver_t *driver, device_t parent) { device_t child; /* Make sure we're not being doubly invoked. */ if (device_find_child(parent, "est", -1) != NULL) return; /* Check that CPUID is supported and the vendor is Intel.*/ if (cpu_high == 0 || (cpu_vendor_id != CPU_VENDOR_INTEL && cpu_vendor_id != CPU_VENDOR_CENTAUR)) return; /* * Check if the CPU supports EST. */ if (!(cpu_feature2 & CPUID2_EST)) return; /* * We add a child for each CPU since settings must be performed * on each CPU in the SMP case. */ child = BUS_ADD_CHILD(parent, 10, "est", -1); if (child == NULL) device_printf(parent, "add est child failed\n"); } static int est_probe(device_t dev) { device_t perf_dev; uint64_t msr; int error, type; if (resource_disabled("est", 0)) return (ENXIO); /* * If the ACPI perf driver has attached and is not just offering * info, let it manage things. */ perf_dev = device_find_child(device_get_parent(dev), "acpi_perf", -1); if (perf_dev && device_is_attached(perf_dev)) { error = CPUFREQ_DRV_TYPE(perf_dev, &type); if (error == 0 && (type & CPUFREQ_FLAG_INFO_ONLY) == 0) return (ENXIO); } /* Attempt to enable SpeedStep if not currently enabled. */ msr = rdmsr(MSR_MISC_ENABLE); if ((msr & MSR_SS_ENABLE) == 0) { wrmsr(MSR_MISC_ENABLE, msr | MSR_SS_ENABLE); if (bootverbose) device_printf(dev, "enabling SpeedStep\n"); /* Check if the enable failed. */ msr = rdmsr(MSR_MISC_ENABLE); if ((msr & MSR_SS_ENABLE) == 0) { device_printf(dev, "failed to enable SpeedStep\n"); return (ENXIO); } } device_set_desc(dev, "Enhanced SpeedStep Frequency Control"); return (0); } static int est_attach(device_t dev) { struct est_softc *sc; sc = device_get_softc(dev); sc->dev = dev; /* On SMP system we can't guarantie independent freq setting. */ if (strict == -1 && mp_ncpus > 1) strict = 0; /* Check CPU for supported settings. */ if (est_get_info(dev)) return (ENXIO); cpufreq_register(dev); return (0); } static int est_detach(device_t dev) { struct est_softc *sc; int error; error = cpufreq_unregister(dev); if (error) return (error); sc = device_get_softc(dev); if (sc->acpi_settings || sc->msr_settings) free(sc->freq_list, M_DEVBUF); return (0); } /* * Probe for supported CPU settings. First, check our static table of * settings. If no match, try using the ones offered by acpi_perf * (i.e., _PSS). We use ACPI second because some systems (IBM R/T40 * series) export both legacy SMM IO-based access and direct MSR access * but the direct access specifies invalid values for _PSS. */ static int est_get_info(device_t dev) { struct est_softc *sc; uint64_t msr; int error; sc = device_get_softc(dev); msr = rdmsr(MSR_PERF_STATUS); error = est_table_info(dev, msr, &sc->freq_list); if (error) error = est_acpi_info(dev, &sc->freq_list); if (error) error = est_msr_info(dev, msr, &sc->freq_list); if (error) { printf( "est: CPU supports Enhanced Speedstep, but is not recognized.\n" "est: cpu_vendor %s, msr %0jx\n", cpu_vendor, msr); return (ENXIO); } return (0); } static int est_acpi_info(device_t dev, freq_info **freqs) { struct est_softc *sc; struct cf_setting *sets; freq_info *table; device_t perf_dev; int count, error, i, j; uint16_t saved_id16; perf_dev = device_find_child(device_get_parent(dev), "acpi_perf", -1); if (perf_dev == NULL || !device_is_attached(perf_dev)) return (ENXIO); /* Fetch settings from acpi_perf. */ sc = device_get_softc(dev); table = NULL; sets = malloc(MAX_SETTINGS * sizeof(*sets), M_TEMP, M_NOWAIT); if (sets == NULL) return (ENOMEM); count = MAX_SETTINGS; error = CPUFREQ_DRV_SETTINGS(perf_dev, sets, &count); if (error) goto out; /* Parse settings into our local table format. */ table = malloc((count + 1) * sizeof(freq_info), M_DEVBUF, M_NOWAIT); if (table == NULL) { error = ENOMEM; goto out; } est_get_id16(&saved_id16); for (i = 0, j = 0; i < count; i++) { /* * Confirm id16 value is correct. */ if (sets[i].freq > 0) { error = est_set_id16(dev, sets[i].spec[0], strict); if (error != 0) { if (bootverbose) device_printf(dev, "Invalid freq %u, " "ignored.\n", sets[i].freq); continue; } table[j].freq = sets[i].freq; table[j].volts = sets[i].volts; table[j].id16 = sets[i].spec[0]; table[j].power = sets[i].power; ++j; } } /* restore saved setting */ est_set_id16(dev, saved_id16, 0); /* Mark end of table with a terminator. */ bzero(&table[j], sizeof(freq_info)); sc->acpi_settings = TRUE; *freqs = table; error = 0; out: if (sets) free(sets, M_TEMP); if (error && table) free(table, M_DEVBUF); return (error); } static int est_table_info(device_t dev, uint64_t msr, freq_info **freqs) { cpu_info *p; uint32_t id; /* Find a table which matches (vendor, id32). */ id = msr >> 32; for (p = ESTprocs; p->id32 != 0; p++) { if (p->vendor_id == cpu_vendor_id && p->id32 == id) break; } if (p->id32 == 0) return (EOPNOTSUPP); /* Make sure the current setpoint is valid. */ if (est_get_current(p->freqtab) == NULL) { device_printf(dev, "current setting not found in table\n"); return (EOPNOTSUPP); } *freqs = p->freqtab; return (0); } static int bus_speed_ok(int bus) { switch (bus) { case 100: case 133: case 333: return (1); default: return (0); } } /* * Flesh out a simple rate table containing the high and low frequencies * based on the current clock speed and the upper 32 bits of the MSR. */ static int est_msr_info(device_t dev, uint64_t msr, freq_info **freqs) { struct est_softc *sc; freq_info *fp; int bus, freq, volts; uint16_t id; if (!msr_info_enabled) return (EOPNOTSUPP); /* Figure out the bus clock. */ freq = atomic_load_acq_64(&tsc_freq) / 1000000; id = msr >> 32; bus = freq / (id >> 8); device_printf(dev, "Guessed bus clock (high) of %d MHz\n", bus); if (!bus_speed_ok(bus)) { /* We may be running on the low frequency. */ id = msr >> 48; bus = freq / (id >> 8); device_printf(dev, "Guessed bus clock (low) of %d MHz\n", bus); if (!bus_speed_ok(bus)) return (EOPNOTSUPP); /* Calculate high frequency. */ id = msr >> 32; freq = ((id >> 8) & 0xff) * bus; } /* Fill out a new freq table containing just the high and low freqs. */ sc = device_get_softc(dev); fp = malloc(sizeof(freq_info) * 3, M_DEVBUF, M_WAITOK | M_ZERO); /* First, the high frequency. */ volts = id & 0xff; if (volts != 0) { volts <<= 4; volts += 700; } fp[0].freq = freq; fp[0].volts = volts; fp[0].id16 = id; fp[0].power = CPUFREQ_VAL_UNKNOWN; device_printf(dev, "Guessed high setting of %d MHz @ %d Mv\n", freq, volts); /* Second, the low frequency. */ id = msr >> 48; freq = ((id >> 8) & 0xff) * bus; volts = id & 0xff; if (volts != 0) { volts <<= 4; volts += 700; } fp[1].freq = freq; fp[1].volts = volts; fp[1].id16 = id; fp[1].power = CPUFREQ_VAL_UNKNOWN; device_printf(dev, "Guessed low setting of %d MHz @ %d Mv\n", freq, volts); /* Table is already terminated due to M_ZERO. */ sc->msr_settings = TRUE; *freqs = fp; return (0); } static void est_get_id16(uint16_t *id16_p) { *id16_p = rdmsr(MSR_PERF_STATUS) & 0xffff; } static int est_set_id16(device_t dev, uint16_t id16, int need_check) { uint64_t msr; uint16_t new_id16; int ret = 0; /* Read the current register, mask out the old, set the new id. */ msr = rdmsr(MSR_PERF_CTL); msr = (msr & ~0xffff) | id16; wrmsr(MSR_PERF_CTL, msr); if (need_check) { /* Wait a short while and read the new status. */ DELAY(EST_TRANS_LAT); est_get_id16(&new_id16); if (new_id16 != id16) { if (bootverbose) device_printf(dev, "Invalid id16 (set, cur) " "= (%u, %u)\n", id16, new_id16); ret = ENXIO; } } return (ret); } static freq_info * est_get_current(freq_info *freq_list) { freq_info *f; int i; uint16_t id16; /* * Try a few times to get a valid value. Sometimes, if the CPU * is in the middle of an asynchronous transition (i.e., P4TCC), * we get a temporary invalid result. */ for (i = 0; i < 5; i++) { est_get_id16(&id16); for (f = freq_list; f->id16 != 0; f++) { if (f->id16 == id16) return (f); } DELAY(100); } return (NULL); } static int est_settings(device_t dev, struct cf_setting *sets, int *count) { struct est_softc *sc; freq_info *f; int i; sc = device_get_softc(dev); if (*count < EST_MAX_SETTINGS) return (E2BIG); i = 0; for (f = sc->freq_list; f->freq != 0; f++, i++) { sets[i].freq = f->freq; sets[i].volts = f->volts; sets[i].power = f->power; sets[i].lat = EST_TRANS_LAT; sets[i].dev = dev; } *count = i; return (0); } static int est_set(device_t dev, const struct cf_setting *set) { struct est_softc *sc; freq_info *f; /* Find the setting matching the requested one. */ sc = device_get_softc(dev); for (f = sc->freq_list; f->freq != 0; f++) { if (f->freq == set->freq) break; } if (f->freq == 0) return (EINVAL); /* Read the current register, mask out the old, set the new id. */ est_set_id16(dev, f->id16, 0); return (0); } static int est_get(device_t dev, struct cf_setting *set) { struct est_softc *sc; freq_info *f; sc = device_get_softc(dev); f = est_get_current(sc->freq_list); if (f == NULL) return (ENXIO); set->freq = f->freq; set->volts = f->volts; set->power = f->power; set->lat = EST_TRANS_LAT; set->dev = dev; return (0); } static int est_type(device_t dev, int *type) { if (type == NULL) return (EINVAL); *type = CPUFREQ_TYPE_ABSOLUTE; return (0); } Index: head/sys/x86/cpufreq/hwpstate.c =================================================================== --- head/sys/x86/cpufreq/hwpstate.c (revision 326262) +++ head/sys/x86/cpufreq/hwpstate.c (revision 326263) @@ -1,527 +1,529 @@ /*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * * Copyright (c) 2005 Nate Lawson * Copyright (c) 2004 Colin Percival * Copyright (c) 2004-2005 Bruno Durcot * Copyright (c) 2004 FUKUDA Nobuhiko * Copyright (c) 2009 Michael Reifenberger * Copyright (c) 2009 Norikatsu Shigemura * Copyright (c) 2008-2009 Gen Otsuji * * This code is depending on kern_cpu.c, est.c, powernow.c, p4tcc.c, smist.c * in various parts. The authors of these files are Nate Lawson, * Colin Percival, Bruno Durcot, and FUKUDA Nobuhiko. * This code contains patches by Michael Reifenberger and Norikatsu Shigemura. * Thank you. * * Redistribution and use in source and binary forms, with or without * modification, are permitted providing that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /* * For more info: * BIOS and Kernel Developer's Guide(BKDG) for AMD Family 10h Processors * 31116 Rev 3.20 February 04, 2009 * BIOS and Kernel Developer's Guide(BKDG) for AMD Family 11h Processors * 41256 Rev 3.00 - July 07, 2008 */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "acpi_if.h" #include "cpufreq_if.h" #define MSR_AMD_10H_11H_LIMIT 0xc0010061 #define MSR_AMD_10H_11H_CONTROL 0xc0010062 #define MSR_AMD_10H_11H_STATUS 0xc0010063 #define MSR_AMD_10H_11H_CONFIG 0xc0010064 #define AMD_10H_11H_MAX_STATES 16 /* for MSR_AMD_10H_11H_LIMIT C001_0061 */ #define AMD_10H_11H_GET_PSTATE_MAX_VAL(msr) (((msr) >> 4) & 0x7) #define AMD_10H_11H_GET_PSTATE_LIMIT(msr) (((msr)) & 0x7) /* for MSR_AMD_10H_11H_CONFIG 10h:C001_0064:68 / 11h:C001_0064:6B */ #define AMD_10H_11H_CUR_VID(msr) (((msr) >> 9) & 0x7F) #define AMD_10H_11H_CUR_DID(msr) (((msr) >> 6) & 0x07) #define AMD_10H_11H_CUR_FID(msr) ((msr) & 0x3F) #define AMD_17H_CUR_VID(msr) (((msr) >> 14) & 0xFF) #define AMD_17H_CUR_DID(msr) (((msr) >> 8) & 0x3F) #define AMD_17H_CUR_FID(msr) ((msr) & 0xFF) #define HWPSTATE_DEBUG(dev, msg...) \ do{ \ if(hwpstate_verbose) \ device_printf(dev, msg); \ }while(0) struct hwpstate_setting { int freq; /* CPU clock in Mhz or 100ths of a percent. */ int volts; /* Voltage in mV. */ int power; /* Power consumed in mW. */ int lat; /* Transition latency in us. */ int pstate_id; /* P-State id */ }; struct hwpstate_softc { device_t dev; struct hwpstate_setting hwpstate_settings[AMD_10H_11H_MAX_STATES]; int cfnum; }; static void hwpstate_identify(driver_t *driver, device_t parent); static int hwpstate_probe(device_t dev); static int hwpstate_attach(device_t dev); static int hwpstate_detach(device_t dev); static int hwpstate_set(device_t dev, const struct cf_setting *cf); static int hwpstate_get(device_t dev, struct cf_setting *cf); static int hwpstate_settings(device_t dev, struct cf_setting *sets, int *count); static int hwpstate_type(device_t dev, int *type); static int hwpstate_shutdown(device_t dev); static int hwpstate_features(driver_t *driver, u_int *features); static int hwpstate_get_info_from_acpi_perf(device_t dev, device_t perf_dev); static int hwpstate_get_info_from_msr(device_t dev); static int hwpstate_goto_pstate(device_t dev, int pstate_id); static int hwpstate_verbose = 0; SYSCTL_INT(_debug, OID_AUTO, hwpstate_verbose, CTLFLAG_RWTUN, &hwpstate_verbose, 0, "Debug hwpstate"); static device_method_t hwpstate_methods[] = { /* Device interface */ DEVMETHOD(device_identify, hwpstate_identify), DEVMETHOD(device_probe, hwpstate_probe), DEVMETHOD(device_attach, hwpstate_attach), DEVMETHOD(device_detach, hwpstate_detach), DEVMETHOD(device_shutdown, hwpstate_shutdown), /* cpufreq interface */ DEVMETHOD(cpufreq_drv_set, hwpstate_set), DEVMETHOD(cpufreq_drv_get, hwpstate_get), DEVMETHOD(cpufreq_drv_settings, hwpstate_settings), DEVMETHOD(cpufreq_drv_type, hwpstate_type), /* ACPI interface */ DEVMETHOD(acpi_get_features, hwpstate_features), {0, 0} }; static devclass_t hwpstate_devclass; static driver_t hwpstate_driver = { "hwpstate", hwpstate_methods, sizeof(struct hwpstate_softc), }; DRIVER_MODULE(hwpstate, cpu, hwpstate_driver, hwpstate_devclass, 0, 0); /* * Go to Px-state on all cpus considering the limit. */ static int hwpstate_goto_pstate(device_t dev, int pstate) { sbintime_t sbt; int i; uint64_t msr; int j; int limit; int id = pstate; int error; /* get the current pstate limit */ msr = rdmsr(MSR_AMD_10H_11H_LIMIT); limit = AMD_10H_11H_GET_PSTATE_LIMIT(msr); if (limit > id) id = limit; /* * We are going to the same Px-state on all cpus. * Probably should take _PSD into account. */ error = 0; CPU_FOREACH(i) { /* Bind to each cpu. */ thread_lock(curthread); sched_bind(curthread, i); thread_unlock(curthread); HWPSTATE_DEBUG(dev, "setting P%d-state on cpu%d\n", id, PCPU_GET(cpuid)); /* Go To Px-state */ wrmsr(MSR_AMD_10H_11H_CONTROL, id); } CPU_FOREACH(i) { /* Bind to each cpu. */ thread_lock(curthread); sched_bind(curthread, i); thread_unlock(curthread); /* wait loop (100*100 usec is enough ?) */ for (j = 0; j < 100; j++){ /* get the result. not assure msr=id */ msr = rdmsr(MSR_AMD_10H_11H_STATUS); if (msr == id) break; sbt = SBT_1MS / 10; tsleep_sbt(dev, PZERO, "pstate_goto", sbt, sbt >> tc_precexp, 0); } HWPSTATE_DEBUG(dev, "result: P%d-state on cpu%d\n", (int)msr, PCPU_GET(cpuid)); if (msr != id) { HWPSTATE_DEBUG(dev, "error: loop is not enough.\n"); error = ENXIO; } } thread_lock(curthread); sched_unbind(curthread); thread_unlock(curthread); return (error); } static int hwpstate_set(device_t dev, const struct cf_setting *cf) { struct hwpstate_softc *sc; struct hwpstate_setting *set; int i; if (cf == NULL) return (EINVAL); sc = device_get_softc(dev); set = sc->hwpstate_settings; for (i = 0; i < sc->cfnum; i++) if (CPUFREQ_CMP(cf->freq, set[i].freq)) break; if (i == sc->cfnum) return (EINVAL); return (hwpstate_goto_pstate(dev, set[i].pstate_id)); } static int hwpstate_get(device_t dev, struct cf_setting *cf) { struct hwpstate_softc *sc; struct hwpstate_setting set; uint64_t msr; sc = device_get_softc(dev); if (cf == NULL) return (EINVAL); msr = rdmsr(MSR_AMD_10H_11H_STATUS); if(msr >= sc->cfnum) return (EINVAL); set = sc->hwpstate_settings[msr]; cf->freq = set.freq; cf->volts = set.volts; cf->power = set.power; cf->lat = set.lat; cf->dev = dev; return (0); } static int hwpstate_settings(device_t dev, struct cf_setting *sets, int *count) { struct hwpstate_softc *sc; struct hwpstate_setting set; int i; if (sets == NULL || count == NULL) return (EINVAL); sc = device_get_softc(dev); if (*count < sc->cfnum) return (E2BIG); for (i = 0; i < sc->cfnum; i++, sets++) { set = sc->hwpstate_settings[i]; sets->freq = set.freq; sets->volts = set.volts; sets->power = set.power; sets->lat = set.lat; sets->dev = dev; } *count = sc->cfnum; return (0); } static int hwpstate_type(device_t dev, int *type) { if (type == NULL) return (EINVAL); *type = CPUFREQ_TYPE_ABSOLUTE; return (0); } static void hwpstate_identify(driver_t *driver, device_t parent) { if (device_find_child(parent, "hwpstate", -1) != NULL) return; if (cpu_vendor_id != CPU_VENDOR_AMD || CPUID_TO_FAMILY(cpu_id) < 0x10) return; /* * Check if hardware pstate enable bit is set. */ if ((amd_pminfo & AMDPM_HW_PSTATE) == 0) { HWPSTATE_DEBUG(parent, "hwpstate enable bit is not set.\n"); return; } if (resource_disabled("hwpstate", 0)) return; if (BUS_ADD_CHILD(parent, 10, "hwpstate", -1) == NULL) device_printf(parent, "hwpstate: add child failed\n"); } static int hwpstate_probe(device_t dev) { struct hwpstate_softc *sc; device_t perf_dev; uint64_t msr; int error, type; /* * Only hwpstate0. * It goes well with acpi_throttle. */ if (device_get_unit(dev) != 0) return (ENXIO); sc = device_get_softc(dev); sc->dev = dev; /* * Check if acpi_perf has INFO only flag. */ perf_dev = device_find_child(device_get_parent(dev), "acpi_perf", -1); error = TRUE; if (perf_dev && device_is_attached(perf_dev)) { error = CPUFREQ_DRV_TYPE(perf_dev, &type); if (error == 0) { if ((type & CPUFREQ_FLAG_INFO_ONLY) == 0) { /* * If acpi_perf doesn't have INFO_ONLY flag, * it will take care of pstate transitions. */ HWPSTATE_DEBUG(dev, "acpi_perf will take care of pstate transitions.\n"); return (ENXIO); } else { /* * If acpi_perf has INFO_ONLY flag, (_PCT has FFixedHW) * we can get _PSS info from acpi_perf * without going into ACPI. */ HWPSTATE_DEBUG(dev, "going to fetch info from acpi_perf\n"); error = hwpstate_get_info_from_acpi_perf(dev, perf_dev); } } } if (error == 0) { /* * Now we get _PSS info from acpi_perf without error. * Let's check it. */ msr = rdmsr(MSR_AMD_10H_11H_LIMIT); if (sc->cfnum != 1 + AMD_10H_11H_GET_PSTATE_MAX_VAL(msr)) { HWPSTATE_DEBUG(dev, "MSR (%jd) and ACPI _PSS (%d)" " count mismatch\n", (intmax_t)msr, sc->cfnum); error = TRUE; } } /* * If we cannot get info from acpi_perf, * Let's get info from MSRs. */ if (error) error = hwpstate_get_info_from_msr(dev); if (error) return (error); device_set_desc(dev, "Cool`n'Quiet 2.0"); return (0); } static int hwpstate_attach(device_t dev) { return (cpufreq_register(dev)); } static int hwpstate_get_info_from_msr(device_t dev) { struct hwpstate_softc *sc; struct hwpstate_setting *hwpstate_set; uint64_t msr; int family, i, fid, did; family = CPUID_TO_FAMILY(cpu_id); sc = device_get_softc(dev); /* Get pstate count */ msr = rdmsr(MSR_AMD_10H_11H_LIMIT); sc->cfnum = 1 + AMD_10H_11H_GET_PSTATE_MAX_VAL(msr); hwpstate_set = sc->hwpstate_settings; for (i = 0; i < sc->cfnum; i++) { msr = rdmsr(MSR_AMD_10H_11H_CONFIG + i); if ((msr & ((uint64_t)1 << 63)) == 0) { HWPSTATE_DEBUG(dev, "msr is not valid.\n"); return (ENXIO); } did = AMD_10H_11H_CUR_DID(msr); fid = AMD_10H_11H_CUR_FID(msr); /* Convert fid/did to frequency. */ switch(family) { case 0x11: hwpstate_set[i].freq = (100 * (fid + 0x08)) >> did; break; case 0x10: case 0x12: case 0x15: case 0x16: hwpstate_set[i].freq = (100 * (fid + 0x10)) >> did; break; case 0x17: did = AMD_17H_CUR_DID(msr); if (did == 0) { HWPSTATE_DEBUG(dev, "unexpected did: 0\n"); did = 1; } fid = AMD_17H_CUR_FID(msr); hwpstate_set[i].freq = (200 * fid) / did; break; default: HWPSTATE_DEBUG(dev, "get_info_from_msr: AMD family" " 0x%02x CPUs are not supported yet\n", family); return (ENXIO); } hwpstate_set[i].pstate_id = i; /* There was volts calculation, but deleted it. */ hwpstate_set[i].volts = CPUFREQ_VAL_UNKNOWN; hwpstate_set[i].power = CPUFREQ_VAL_UNKNOWN; hwpstate_set[i].lat = CPUFREQ_VAL_UNKNOWN; } return (0); } static int hwpstate_get_info_from_acpi_perf(device_t dev, device_t perf_dev) { struct hwpstate_softc *sc; struct cf_setting *perf_set; struct hwpstate_setting *hwpstate_set; int count, error, i; perf_set = malloc(MAX_SETTINGS * sizeof(*perf_set), M_TEMP, M_NOWAIT); if (perf_set == NULL) { HWPSTATE_DEBUG(dev, "nomem\n"); return (ENOMEM); } /* * Fetch settings from acpi_perf. * Now it is attached, and has info only flag. */ count = MAX_SETTINGS; error = CPUFREQ_DRV_SETTINGS(perf_dev, perf_set, &count); if (error) { HWPSTATE_DEBUG(dev, "error: CPUFREQ_DRV_SETTINGS.\n"); goto out; } sc = device_get_softc(dev); sc->cfnum = count; hwpstate_set = sc->hwpstate_settings; for (i = 0; i < count; i++) { if (i == perf_set[i].spec[0]) { hwpstate_set[i].pstate_id = i; hwpstate_set[i].freq = perf_set[i].freq; hwpstate_set[i].volts = perf_set[i].volts; hwpstate_set[i].power = perf_set[i].power; hwpstate_set[i].lat = perf_set[i].lat; } else { HWPSTATE_DEBUG(dev, "ACPI _PSS object mismatch.\n"); error = ENXIO; goto out; } } out: if (perf_set) free(perf_set, M_TEMP); return (error); } static int hwpstate_detach(device_t dev) { hwpstate_goto_pstate(dev, 0); return (cpufreq_unregister(dev)); } static int hwpstate_shutdown(device_t dev) { /* hwpstate_goto_pstate(dev, 0); */ return (0); } static int hwpstate_features(driver_t *driver, u_int *features) { /* Notify the ACPI CPU that we support direct access to MSRs */ *features = ACPI_CAP_PERF_MSRS; return (0); } Index: head/sys/x86/cpufreq/p4tcc.c =================================================================== --- head/sys/x86/cpufreq/p4tcc.c (revision 326262) +++ head/sys/x86/cpufreq/p4tcc.c (revision 326263) @@ -1,347 +1,349 @@ /*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * * Copyright (c) 2005 Nate Lawson * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * Throttle clock frequency by using the thermal control circuit. This * operates independently of SpeedStep and ACPI throttling and is supported * on Pentium 4 and later models (feature TM). * * Reference: Intel Developer's manual v.3 #245472-012 * * The original version of this driver was written by Ted Unangst for * OpenBSD and imported by Maxim Sobolev. It was rewritten by Nate Lawson * for use with the cpufreq framework. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include "cpufreq_if.h" #include #include #include "acpi_if.h" struct p4tcc_softc { device_t dev; int set_count; int lowest_val; int auto_mode; }; #define TCC_NUM_SETTINGS 8 #define TCC_ENABLE_ONDEMAND (1<<4) #define TCC_REG_OFFSET 1 #define TCC_SPEED_PERCENT(x) ((10000 * (x)) / TCC_NUM_SETTINGS) static int p4tcc_features(driver_t *driver, u_int *features); static void p4tcc_identify(driver_t *driver, device_t parent); static int p4tcc_probe(device_t dev); static int p4tcc_attach(device_t dev); static int p4tcc_detach(device_t dev); static int p4tcc_settings(device_t dev, struct cf_setting *sets, int *count); static int p4tcc_set(device_t dev, const struct cf_setting *set); static int p4tcc_get(device_t dev, struct cf_setting *set); static int p4tcc_type(device_t dev, int *type); static device_method_t p4tcc_methods[] = { /* Device interface */ DEVMETHOD(device_identify, p4tcc_identify), DEVMETHOD(device_probe, p4tcc_probe), DEVMETHOD(device_attach, p4tcc_attach), DEVMETHOD(device_detach, p4tcc_detach), /* cpufreq interface */ DEVMETHOD(cpufreq_drv_set, p4tcc_set), DEVMETHOD(cpufreq_drv_get, p4tcc_get), DEVMETHOD(cpufreq_drv_type, p4tcc_type), DEVMETHOD(cpufreq_drv_settings, p4tcc_settings), /* ACPI interface */ DEVMETHOD(acpi_get_features, p4tcc_features), {0, 0} }; static driver_t p4tcc_driver = { "p4tcc", p4tcc_methods, sizeof(struct p4tcc_softc), }; static devclass_t p4tcc_devclass; DRIVER_MODULE(p4tcc, cpu, p4tcc_driver, p4tcc_devclass, 0, 0); static int p4tcc_features(driver_t *driver, u_int *features) { /* Notify the ACPI CPU that we support direct access to MSRs */ *features = ACPI_CAP_THR_MSRS; return (0); } static void p4tcc_identify(driver_t *driver, device_t parent) { if ((cpu_feature & (CPUID_ACPI | CPUID_TM)) != (CPUID_ACPI | CPUID_TM)) return; /* Make sure we're not being doubly invoked. */ if (device_find_child(parent, "p4tcc", -1) != NULL) return; /* * We attach a p4tcc child for every CPU since settings need to * be performed on every CPU in the SMP case. See section 13.15.3 * of the IA32 Intel Architecture Software Developer's Manual, * Volume 3, for more info. */ if (BUS_ADD_CHILD(parent, 10, "p4tcc", -1) == NULL) device_printf(parent, "add p4tcc child failed\n"); } static int p4tcc_probe(device_t dev) { if (resource_disabled("p4tcc", 0)) return (ENXIO); device_set_desc(dev, "CPU Frequency Thermal Control"); return (0); } static int p4tcc_attach(device_t dev) { struct p4tcc_softc *sc; struct cf_setting set; sc = device_get_softc(dev); sc->dev = dev; sc->set_count = TCC_NUM_SETTINGS; /* * On boot, the TCC is usually in Automatic mode where reading the * current performance level is likely to produce bogus results. * We record that state here and don't trust the contents of the * status MSR until we've set it ourselves. */ sc->auto_mode = TRUE; /* * XXX: After a cursory glance at various Intel specification * XXX: updates it seems like these tests for errata is bogus. * XXX: As far as I can tell, the failure mode is benign, in * XXX: that cpus with no errata will have their bottom two * XXX: STPCLK# rates disabled, so rather than waste more time * XXX: hunting down intel docs, just document it and punt. /phk */ switch (cpu_id & 0xff) { case 0x22: case 0x24: case 0x25: case 0x27: case 0x29: /* * These CPU models hang when set to 12.5%. * See Errata O50, P44, and Z21. */ sc->set_count -= 1; break; case 0x07: /* errata N44 and P18 */ case 0x0a: case 0x12: case 0x13: case 0x62: /* Pentium D B1: errata AA21 */ case 0x64: /* Pentium D C1: errata AA21 */ case 0x65: /* Pentium D D0: errata AA21 */ /* * These CPU models hang when set to 12.5% or 25%. * See Errata N44, P18l and AA21. */ sc->set_count -= 2; break; } sc->lowest_val = TCC_NUM_SETTINGS - sc->set_count + 1; /* * Before we finish attach, switch to 100%. It's possible the BIOS * set us to a lower rate. The user can override this after boot. */ set.freq = 10000; p4tcc_set(dev, &set); cpufreq_register(dev); return (0); } static int p4tcc_detach(device_t dev) { struct cf_setting set; int error; error = cpufreq_unregister(dev); if (error) return (error); /* * Before we finish detach, switch to Automatic mode. */ set.freq = 10000; p4tcc_set(dev, &set); return(0); } static int p4tcc_settings(device_t dev, struct cf_setting *sets, int *count) { struct p4tcc_softc *sc; int i, val; sc = device_get_softc(dev); if (sets == NULL || count == NULL) return (EINVAL); if (*count < sc->set_count) return (E2BIG); /* Return a list of valid settings for this driver. */ memset(sets, CPUFREQ_VAL_UNKNOWN, sizeof(*sets) * sc->set_count); val = TCC_NUM_SETTINGS; for (i = 0; i < sc->set_count; i++, val--) { sets[i].freq = TCC_SPEED_PERCENT(val); sets[i].dev = dev; } *count = sc->set_count; return (0); } static int p4tcc_set(device_t dev, const struct cf_setting *set) { struct p4tcc_softc *sc; uint64_t mask, msr; int val; if (set == NULL) return (EINVAL); sc = device_get_softc(dev); /* * Validate requested state converts to a setting that is an integer * from [sc->lowest_val .. TCC_NUM_SETTINGS]. */ val = set->freq * TCC_NUM_SETTINGS / 10000; if (val * 10000 != set->freq * TCC_NUM_SETTINGS || val < sc->lowest_val || val > TCC_NUM_SETTINGS) return (EINVAL); /* * Read the current register and mask off the old setting and * On-Demand bit. If the new val is < 100%, set it and the On-Demand * bit, otherwise just return to Automatic mode. */ msr = rdmsr(MSR_THERM_CONTROL); mask = (TCC_NUM_SETTINGS - 1) << TCC_REG_OFFSET; msr &= ~(mask | TCC_ENABLE_ONDEMAND); if (val < TCC_NUM_SETTINGS) msr |= (val << TCC_REG_OFFSET) | TCC_ENABLE_ONDEMAND; wrmsr(MSR_THERM_CONTROL, msr); /* * Record whether we're now in Automatic or On-Demand mode. We have * to cache this since there is no reliable way to check if TCC is in * Automatic mode (i.e., at 100% or possibly 50%). Reading bit 4 of * the ACPI Thermal Monitor Control Register produces 0 no matter * what the current mode. */ if (msr & TCC_ENABLE_ONDEMAND) sc->auto_mode = FALSE; else sc->auto_mode = TRUE; return (0); } static int p4tcc_get(device_t dev, struct cf_setting *set) { struct p4tcc_softc *sc; uint64_t msr; int val; if (set == NULL) return (EINVAL); sc = device_get_softc(dev); /* * Read the current register and extract the current setting. If * in automatic mode, assume we're at TCC_NUM_SETTINGS (100%). * * XXX This is not completely reliable since at high temperatures * the CPU may be automatically throttling to 50% but it's the best * we can do. */ if (!sc->auto_mode) { msr = rdmsr(MSR_THERM_CONTROL); val = (msr >> TCC_REG_OFFSET) & (TCC_NUM_SETTINGS - 1); } else val = TCC_NUM_SETTINGS; memset(set, CPUFREQ_VAL_UNKNOWN, sizeof(*set)); set->freq = TCC_SPEED_PERCENT(val); set->dev = dev; return (0); } static int p4tcc_type(device_t dev, int *type) { if (type == NULL) return (EINVAL); *type = CPUFREQ_TYPE_RELATIVE; return (0); } Index: head/sys/x86/cpufreq/powernow.c =================================================================== --- head/sys/x86/cpufreq/powernow.c (revision 326262) +++ head/sys/x86/cpufreq/powernow.c (revision 326263) @@ -1,970 +1,972 @@ /*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * * Copyright (c) 2004-2005 Bruno Ducrot * Copyright (c) 2004 FUKUDA Nobuhiko * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /* * Many thanks to Nate Lawson for his helpful comments on this driver and * to Jung-uk Kim for testing. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "cpufreq_if.h" #define PN7_TYPE 0 #define PN8_TYPE 1 /* Flags for some hardware bugs. */ #define A0_ERRATA 0x1 /* Bugs for the rev. A0 of Athlon (K7): * Interrupts must be disabled and no half * multipliers are allowed */ #define PENDING_STUCK 0x2 /* With some buggy chipset and some newer AMD64 * processor (Rev. G?): * the pending bit from the msr FIDVID_STATUS * is set forever. No workaround :( */ /* Legacy configuration via BIOS table PSB. */ #define PSB_START 0 #define PSB_STEP 0x10 #define PSB_SIG "AMDK7PNOW!" #define PSB_LEN 10 #define PSB_OFF 0 struct psb_header { char signature[10]; uint8_t version; uint8_t flags; uint16_t settlingtime; uint8_t res1; uint8_t numpst; } __packed; struct pst_header { uint32_t cpuid; uint8_t fsb; uint8_t maxfid; uint8_t startvid; uint8_t numpstates; } __packed; /* * MSRs and bits used by Powernow technology */ #define MSR_AMDK7_FIDVID_CTL 0xc0010041 #define MSR_AMDK7_FIDVID_STATUS 0xc0010042 /* Bitfields used by K7 */ #define PN7_CTR_FID(x) ((x) & 0x1f) #define PN7_CTR_VID(x) (((x) & 0x1f) << 8) #define PN7_CTR_FIDC 0x00010000 #define PN7_CTR_VIDC 0x00020000 #define PN7_CTR_FIDCHRATIO 0x00100000 #define PN7_CTR_SGTC(x) (((uint64_t)(x) & 0x000fffff) << 32) #define PN7_STA_CFID(x) ((x) & 0x1f) #define PN7_STA_SFID(x) (((x) >> 8) & 0x1f) #define PN7_STA_MFID(x) (((x) >> 16) & 0x1f) #define PN7_STA_CVID(x) (((x) >> 32) & 0x1f) #define PN7_STA_SVID(x) (((x) >> 40) & 0x1f) #define PN7_STA_MVID(x) (((x) >> 48) & 0x1f) /* ACPI ctr_val status register to powernow k7 configuration */ #define ACPI_PN7_CTRL_TO_FID(x) ((x) & 0x1f) #define ACPI_PN7_CTRL_TO_VID(x) (((x) >> 5) & 0x1f) #define ACPI_PN7_CTRL_TO_SGTC(x) (((x) >> 10) & 0xffff) /* Bitfields used by K8 */ #define PN8_CTR_FID(x) ((x) & 0x3f) #define PN8_CTR_VID(x) (((x) & 0x1f) << 8) #define PN8_CTR_PENDING(x) (((x) & 1) << 32) #define PN8_STA_CFID(x) ((x) & 0x3f) #define PN8_STA_SFID(x) (((x) >> 8) & 0x3f) #define PN8_STA_MFID(x) (((x) >> 16) & 0x3f) #define PN8_STA_PENDING(x) (((x) >> 31) & 0x01) #define PN8_STA_CVID(x) (((x) >> 32) & 0x1f) #define PN8_STA_SVID(x) (((x) >> 40) & 0x1f) #define PN8_STA_MVID(x) (((x) >> 48) & 0x1f) /* Reserved1 to powernow k8 configuration */ #define PN8_PSB_TO_RVO(x) ((x) & 0x03) #define PN8_PSB_TO_IRT(x) (((x) >> 2) & 0x03) #define PN8_PSB_TO_MVS(x) (((x) >> 4) & 0x03) #define PN8_PSB_TO_BATT(x) (((x) >> 6) & 0x03) /* ACPI ctr_val status register to powernow k8 configuration */ #define ACPI_PN8_CTRL_TO_FID(x) ((x) & 0x3f) #define ACPI_PN8_CTRL_TO_VID(x) (((x) >> 6) & 0x1f) #define ACPI_PN8_CTRL_TO_VST(x) (((x) >> 11) & 0x1f) #define ACPI_PN8_CTRL_TO_MVS(x) (((x) >> 18) & 0x03) #define ACPI_PN8_CTRL_TO_PLL(x) (((x) >> 20) & 0x7f) #define ACPI_PN8_CTRL_TO_RVO(x) (((x) >> 28) & 0x03) #define ACPI_PN8_CTRL_TO_IRT(x) (((x) >> 30) & 0x03) #define WRITE_FIDVID(fid, vid, ctrl) \ wrmsr(MSR_AMDK7_FIDVID_CTL, \ (((ctrl) << 32) | (1ULL << 16) | ((vid) << 8) | (fid))) #define COUNT_OFF_IRT(irt) DELAY(10 * (1 << (irt))) #define COUNT_OFF_VST(vst) DELAY(20 * (vst)) #define FID_TO_VCO_FID(fid) \ (((fid) < 8) ? (8 + ((fid) << 1)) : (fid)) /* * Divide each value by 10 to get the processor multiplier. * Some of those tables are the same as the Linux powernow-k7 * implementation by Dave Jones. */ static int pn7_fid_to_mult[32] = { 110, 115, 120, 125, 50, 55, 60, 65, 70, 75, 80, 85, 90, 95, 100, 105, 30, 190, 40, 200, 130, 135, 140, 210, 150, 225, 160, 165, 170, 180, 0, 0, }; static int pn8_fid_to_mult[64] = { 40, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90, 95, 100, 105, 110, 115, 120, 125, 130, 135, 140, 145, 150, 155, 160, 165, 170, 175, 180, 185, 190, 195, 200, 205, 210, 215, 220, 225, 230, 235, 240, 245, 250, 255, 260, 265, 270, 275, 280, 285, 290, 295, 300, 305, 310, 315, 320, 325, 330, 335, 340, 345, 350, 355, }; /* * Units are in mV. */ /* Mobile VRM (K7) */ static int pn7_mobile_vid_to_volts[] = { 2000, 1950, 1900, 1850, 1800, 1750, 1700, 1650, 1600, 1550, 1500, 1450, 1400, 1350, 1300, 0, 1275, 1250, 1225, 1200, 1175, 1150, 1125, 1100, 1075, 1050, 1025, 1000, 975, 950, 925, 0, }; /* Desktop VRM (K7) */ static int pn7_desktop_vid_to_volts[] = { 2000, 1950, 1900, 1850, 1800, 1750, 1700, 1650, 1600, 1550, 1500, 1450, 1400, 1350, 1300, 0, 1275, 1250, 1225, 1200, 1175, 1150, 1125, 1100, 1075, 1050, 1025, 1000, 975, 950, 925, 0, }; /* Desktop and Mobile VRM (K8) */ static int pn8_vid_to_volts[] = { 1550, 1525, 1500, 1475, 1450, 1425, 1400, 1375, 1350, 1325, 1300, 1275, 1250, 1225, 1200, 1175, 1150, 1125, 1100, 1075, 1050, 1025, 1000, 975, 950, 925, 900, 875, 850, 825, 800, 0, }; #define POWERNOW_MAX_STATES 16 struct powernow_state { int freq; int power; int fid; int vid; }; struct pn_softc { device_t dev; int pn_type; struct powernow_state powernow_states[POWERNOW_MAX_STATES]; u_int fsb; u_int sgtc; u_int vst; u_int mvs; u_int pll; u_int rvo; u_int irt; int low; int powernow_max_states; u_int powernow_state; u_int errata; int *vid_to_volts; }; /* * Offsets in struct cf_setting array for private values given by * acpi_perf driver. */ #define PX_SPEC_CONTROL 0 #define PX_SPEC_STATUS 1 static void pn_identify(driver_t *driver, device_t parent); static int pn_probe(device_t dev); static int pn_attach(device_t dev); static int pn_detach(device_t dev); static int pn_set(device_t dev, const struct cf_setting *cf); static int pn_get(device_t dev, struct cf_setting *cf); static int pn_settings(device_t dev, struct cf_setting *sets, int *count); static int pn_type(device_t dev, int *type); static device_method_t pn_methods[] = { /* Device interface */ DEVMETHOD(device_identify, pn_identify), DEVMETHOD(device_probe, pn_probe), DEVMETHOD(device_attach, pn_attach), DEVMETHOD(device_detach, pn_detach), /* cpufreq interface */ DEVMETHOD(cpufreq_drv_set, pn_set), DEVMETHOD(cpufreq_drv_get, pn_get), DEVMETHOD(cpufreq_drv_settings, pn_settings), DEVMETHOD(cpufreq_drv_type, pn_type), {0, 0} }; static devclass_t pn_devclass; static driver_t pn_driver = { "powernow", pn_methods, sizeof(struct pn_softc), }; DRIVER_MODULE(powernow, cpu, pn_driver, pn_devclass, 0, 0); static int pn7_setfidvid(struct pn_softc *sc, int fid, int vid) { int cfid, cvid; uint64_t status, ctl; status = rdmsr(MSR_AMDK7_FIDVID_STATUS); cfid = PN7_STA_CFID(status); cvid = PN7_STA_CVID(status); /* We're already at the requested level. */ if (fid == cfid && vid == cvid) return (0); ctl = rdmsr(MSR_AMDK7_FIDVID_CTL) & PN7_CTR_FIDCHRATIO; ctl |= PN7_CTR_FID(fid); ctl |= PN7_CTR_VID(vid); ctl |= PN7_CTR_SGTC(sc->sgtc); if (sc->errata & A0_ERRATA) disable_intr(); if (pn7_fid_to_mult[fid] < pn7_fid_to_mult[cfid]) { wrmsr(MSR_AMDK7_FIDVID_CTL, ctl | PN7_CTR_FIDC); if (vid != cvid) wrmsr(MSR_AMDK7_FIDVID_CTL, ctl | PN7_CTR_VIDC); } else { wrmsr(MSR_AMDK7_FIDVID_CTL, ctl | PN7_CTR_VIDC); if (fid != cfid) wrmsr(MSR_AMDK7_FIDVID_CTL, ctl | PN7_CTR_FIDC); } if (sc->errata & A0_ERRATA) enable_intr(); return (0); } static int pn8_read_pending_wait(uint64_t *status) { int i = 10000; do *status = rdmsr(MSR_AMDK7_FIDVID_STATUS); while (PN8_STA_PENDING(*status) && --i); return (i == 0 ? ENXIO : 0); } static int pn8_write_fidvid(u_int fid, u_int vid, uint64_t ctrl, uint64_t *status) { int i = 100; do WRITE_FIDVID(fid, vid, ctrl); while (pn8_read_pending_wait(status) && --i); return (i == 0 ? ENXIO : 0); } static int pn8_setfidvid(struct pn_softc *sc, int fid, int vid) { uint64_t status; int cfid, cvid; int rvo; int rv; u_int val; rv = pn8_read_pending_wait(&status); if (rv) return (rv); cfid = PN8_STA_CFID(status); cvid = PN8_STA_CVID(status); if (fid == cfid && vid == cvid) return (0); /* * Phase 1: Raise core voltage to requested VID if frequency is * going up. */ while (cvid > vid) { val = cvid - (1 << sc->mvs); rv = pn8_write_fidvid(cfid, (val > 0) ? val : 0, 1ULL, &status); if (rv) { sc->errata |= PENDING_STUCK; return (rv); } cvid = PN8_STA_CVID(status); COUNT_OFF_VST(sc->vst); } /* ... then raise to voltage + RVO (if required) */ for (rvo = sc->rvo; rvo > 0 && cvid > 0; --rvo) { /* XXX It's not clear from spec if we have to do that * in 0.25 step or in MVS. Therefore do it as it's done * under Linux */ rv = pn8_write_fidvid(cfid, cvid - 1, 1ULL, &status); if (rv) { sc->errata |= PENDING_STUCK; return (rv); } cvid = PN8_STA_CVID(status); COUNT_OFF_VST(sc->vst); } /* Phase 2: change to requested core frequency */ if (cfid != fid) { u_int vco_fid, vco_cfid, fid_delta; vco_fid = FID_TO_VCO_FID(fid); vco_cfid = FID_TO_VCO_FID(cfid); while (abs(vco_fid - vco_cfid) > 2) { fid_delta = (vco_cfid & 1) ? 1 : 2; if (fid > cfid) { if (cfid > 7) val = cfid + fid_delta; else val = FID_TO_VCO_FID(cfid) + fid_delta; } else val = cfid - fid_delta; rv = pn8_write_fidvid(val, cvid, sc->pll * (uint64_t) sc->fsb, &status); if (rv) { sc->errata |= PENDING_STUCK; return (rv); } cfid = PN8_STA_CFID(status); COUNT_OFF_IRT(sc->irt); vco_cfid = FID_TO_VCO_FID(cfid); } rv = pn8_write_fidvid(fid, cvid, sc->pll * (uint64_t) sc->fsb, &status); if (rv) { sc->errata |= PENDING_STUCK; return (rv); } cfid = PN8_STA_CFID(status); COUNT_OFF_IRT(sc->irt); } /* Phase 3: change to requested voltage */ if (cvid != vid) { rv = pn8_write_fidvid(cfid, vid, 1ULL, &status); cvid = PN8_STA_CVID(status); COUNT_OFF_VST(sc->vst); } /* Check if transition failed. */ if (cfid != fid || cvid != vid) rv = ENXIO; return (rv); } static int pn_set(device_t dev, const struct cf_setting *cf) { struct pn_softc *sc; int fid, vid; int i; int rv; if (cf == NULL) return (EINVAL); sc = device_get_softc(dev); if (sc->errata & PENDING_STUCK) return (ENXIO); for (i = 0; i < sc->powernow_max_states; ++i) if (CPUFREQ_CMP(sc->powernow_states[i].freq / 1000, cf->freq)) break; fid = sc->powernow_states[i].fid; vid = sc->powernow_states[i].vid; rv = ENODEV; switch (sc->pn_type) { case PN7_TYPE: rv = pn7_setfidvid(sc, fid, vid); break; case PN8_TYPE: rv = pn8_setfidvid(sc, fid, vid); break; } return (rv); } static int pn_get(device_t dev, struct cf_setting *cf) { struct pn_softc *sc; u_int cfid = 0, cvid = 0; int i; uint64_t status; if (cf == NULL) return (EINVAL); sc = device_get_softc(dev); if (sc->errata & PENDING_STUCK) return (ENXIO); status = rdmsr(MSR_AMDK7_FIDVID_STATUS); switch (sc->pn_type) { case PN7_TYPE: cfid = PN7_STA_CFID(status); cvid = PN7_STA_CVID(status); break; case PN8_TYPE: cfid = PN8_STA_CFID(status); cvid = PN8_STA_CVID(status); break; } for (i = 0; i < sc->powernow_max_states; ++i) if (cfid == sc->powernow_states[i].fid && cvid == sc->powernow_states[i].vid) break; if (i < sc->powernow_max_states) { cf->freq = sc->powernow_states[i].freq / 1000; cf->power = sc->powernow_states[i].power; cf->lat = 200; cf->volts = sc->vid_to_volts[cvid]; cf->dev = dev; } else { memset(cf, CPUFREQ_VAL_UNKNOWN, sizeof(*cf)); cf->dev = NULL; } return (0); } static int pn_settings(device_t dev, struct cf_setting *sets, int *count) { struct pn_softc *sc; int i; if (sets == NULL|| count == NULL) return (EINVAL); sc = device_get_softc(dev); if (*count < sc->powernow_max_states) return (E2BIG); for (i = 0; i < sc->powernow_max_states; ++i) { sets[i].freq = sc->powernow_states[i].freq / 1000; sets[i].power = sc->powernow_states[i].power; sets[i].lat = 200; sets[i].volts = sc->vid_to_volts[sc->powernow_states[i].vid]; sets[i].dev = dev; } *count = sc->powernow_max_states; return (0); } static int pn_type(device_t dev, int *type) { if (type == NULL) return (EINVAL); *type = CPUFREQ_TYPE_ABSOLUTE; return (0); } /* * Given a set of pair of fid/vid, and number of performance states, * compute powernow_states via an insertion sort. */ static int decode_pst(struct pn_softc *sc, uint8_t *p, int npstates) { int i, j, n; struct powernow_state state; for (i = 0; i < POWERNOW_MAX_STATES; ++i) sc->powernow_states[i].freq = CPUFREQ_VAL_UNKNOWN; for (n = 0, i = 0; i < npstates; ++i) { state.fid = *p++; state.vid = *p++; state.power = CPUFREQ_VAL_UNKNOWN; switch (sc->pn_type) { case PN7_TYPE: state.freq = 100 * pn7_fid_to_mult[state.fid] * sc->fsb; if ((sc->errata & A0_ERRATA) && (pn7_fid_to_mult[state.fid] % 10) == 5) continue; break; case PN8_TYPE: state.freq = 100 * pn8_fid_to_mult[state.fid] * sc->fsb; break; } j = n; while (j > 0 && sc->powernow_states[j - 1].freq < state.freq) { memcpy(&sc->powernow_states[j], &sc->powernow_states[j - 1], sizeof(struct powernow_state)); --j; } memcpy(&sc->powernow_states[j], &state, sizeof(struct powernow_state)); ++n; } /* * Fix powernow_max_states, if errata a0 give us less states * than expected. */ sc->powernow_max_states = n; if (bootverbose) for (i = 0; i < sc->powernow_max_states; ++i) { int fid = sc->powernow_states[i].fid; int vid = sc->powernow_states[i].vid; printf("powernow: %2i %8dkHz FID %02x VID %02x\n", i, sc->powernow_states[i].freq, fid, vid); } return (0); } static int cpuid_is_k7(u_int cpuid) { switch (cpuid) { case 0x760: case 0x761: case 0x762: case 0x770: case 0x771: case 0x780: case 0x781: case 0x7a0: return (TRUE); } return (FALSE); } static int pn_decode_pst(device_t dev) { int maxpst; struct pn_softc *sc; u_int cpuid, maxfid, startvid; u_long sig; struct psb_header *psb; uint8_t *p; u_int regs[4]; uint64_t status; sc = device_get_softc(dev); do_cpuid(0x80000001, regs); cpuid = regs[0]; if ((cpuid & 0xfff) == 0x760) sc->errata |= A0_ERRATA; status = rdmsr(MSR_AMDK7_FIDVID_STATUS); switch (sc->pn_type) { case PN7_TYPE: maxfid = PN7_STA_MFID(status); startvid = PN7_STA_SVID(status); break; case PN8_TYPE: maxfid = PN8_STA_MFID(status); /* * we should actually use a variable named 'maxvid' if K8, * but why introducing a new variable for that? */ startvid = PN8_STA_MVID(status); break; default: return (ENODEV); } if (bootverbose) { device_printf(dev, "STATUS: 0x%jx\n", status); device_printf(dev, "STATUS: maxfid: 0x%02x\n", maxfid); device_printf(dev, "STATUS: %s: 0x%02x\n", sc->pn_type == PN7_TYPE ? "startvid" : "maxvid", startvid); } sig = bios_sigsearch(PSB_START, PSB_SIG, PSB_LEN, PSB_STEP, PSB_OFF); if (sig) { struct pst_header *pst; psb = (struct psb_header*)(uintptr_t)BIOS_PADDRTOVADDR(sig); switch (psb->version) { default: return (ENODEV); case 0x14: /* * We can't be picky about numpst since at least * some systems have a value of 1 and some have 2. * We trust that cpuid_is_k7() will be better at * catching that we're on a K8 anyway. */ if (sc->pn_type != PN8_TYPE) return (EINVAL); sc->vst = psb->settlingtime; sc->rvo = PN8_PSB_TO_RVO(psb->res1); sc->irt = PN8_PSB_TO_IRT(psb->res1); sc->mvs = PN8_PSB_TO_MVS(psb->res1); sc->low = PN8_PSB_TO_BATT(psb->res1); if (bootverbose) { device_printf(dev, "PSB: VST: %d\n", psb->settlingtime); device_printf(dev, "PSB: RVO %x IRT %d " "MVS %d BATT %d\n", sc->rvo, sc->irt, sc->mvs, sc->low); } break; case 0x12: if (sc->pn_type != PN7_TYPE) return (EINVAL); sc->sgtc = psb->settlingtime * sc->fsb; if (sc->sgtc < 100 * sc->fsb) sc->sgtc = 100 * sc->fsb; break; } p = ((uint8_t *) psb) + sizeof(struct psb_header); pst = (struct pst_header*) p; maxpst = 200; do { struct pst_header *pst = (struct pst_header*) p; if (cpuid == pst->cpuid && maxfid == pst->maxfid && startvid == pst->startvid) { sc->powernow_max_states = pst->numpstates; switch (sc->pn_type) { case PN7_TYPE: if (abs(sc->fsb - pst->fsb) > 5) continue; break; case PN8_TYPE: break; } return (decode_pst(sc, p + sizeof(struct pst_header), sc->powernow_max_states)); } p += sizeof(struct pst_header) + (2 * pst->numpstates); } while (cpuid_is_k7(pst->cpuid) && maxpst--); device_printf(dev, "no match for extended cpuid %.3x\n", cpuid); } return (ENODEV); } static int pn_decode_acpi(device_t dev, device_t perf_dev) { int i, j, n; uint64_t status; uint32_t ctrl; u_int cpuid; u_int regs[4]; struct pn_softc *sc; struct powernow_state state; struct cf_setting sets[POWERNOW_MAX_STATES]; int count = POWERNOW_MAX_STATES; int type; int rv; if (perf_dev == NULL) return (ENXIO); rv = CPUFREQ_DRV_SETTINGS(perf_dev, sets, &count); if (rv) return (ENXIO); rv = CPUFREQ_DRV_TYPE(perf_dev, &type); if (rv || (type & CPUFREQ_FLAG_INFO_ONLY) == 0) return (ENXIO); sc = device_get_softc(dev); do_cpuid(0x80000001, regs); cpuid = regs[0]; if ((cpuid & 0xfff) == 0x760) sc->errata |= A0_ERRATA; ctrl = 0; sc->sgtc = 0; for (n = 0, i = 0; i < count; ++i) { ctrl = sets[i].spec[PX_SPEC_CONTROL]; switch (sc->pn_type) { case PN7_TYPE: state.fid = ACPI_PN7_CTRL_TO_FID(ctrl); state.vid = ACPI_PN7_CTRL_TO_VID(ctrl); if ((sc->errata & A0_ERRATA) && (pn7_fid_to_mult[state.fid] % 10) == 5) continue; break; case PN8_TYPE: state.fid = ACPI_PN8_CTRL_TO_FID(ctrl); state.vid = ACPI_PN8_CTRL_TO_VID(ctrl); break; } state.freq = sets[i].freq * 1000; state.power = sets[i].power; j = n; while (j > 0 && sc->powernow_states[j - 1].freq < state.freq) { memcpy(&sc->powernow_states[j], &sc->powernow_states[j - 1], sizeof(struct powernow_state)); --j; } memcpy(&sc->powernow_states[j], &state, sizeof(struct powernow_state)); ++n; } sc->powernow_max_states = n; state = sc->powernow_states[0]; status = rdmsr(MSR_AMDK7_FIDVID_STATUS); switch (sc->pn_type) { case PN7_TYPE: sc->sgtc = ACPI_PN7_CTRL_TO_SGTC(ctrl); /* * XXX Some bios forget the max frequency! * This maybe indicates we have the wrong tables. Therefore, * don't implement a quirk, but fallback to BIOS legacy * tables instead. */ if (PN7_STA_MFID(status) != state.fid) { device_printf(dev, "ACPI MAX frequency not found\n"); return (EINVAL); } sc->fsb = state.freq / 100 / pn7_fid_to_mult[state.fid]; break; case PN8_TYPE: sc->vst = ACPI_PN8_CTRL_TO_VST(ctrl), sc->mvs = ACPI_PN8_CTRL_TO_MVS(ctrl), sc->pll = ACPI_PN8_CTRL_TO_PLL(ctrl), sc->rvo = ACPI_PN8_CTRL_TO_RVO(ctrl), sc->irt = ACPI_PN8_CTRL_TO_IRT(ctrl); sc->low = 0; /* XXX */ /* * powernow k8 supports only one low frequency. */ if (sc->powernow_max_states >= 2 && (sc->powernow_states[sc->powernow_max_states - 2].fid < 8)) return (EINVAL); sc->fsb = state.freq / 100 / pn8_fid_to_mult[state.fid]; break; } return (0); } static void pn_identify(driver_t *driver, device_t parent) { if ((amd_pminfo & AMDPM_FID) == 0 || (amd_pminfo & AMDPM_VID) == 0) return; switch (cpu_id & 0xf00) { case 0x600: case 0xf00: break; default: return; } if (device_find_child(parent, "powernow", -1) != NULL) return; if (BUS_ADD_CHILD(parent, 10, "powernow", -1) == NULL) device_printf(parent, "powernow: add child failed\n"); } static int pn_probe(device_t dev) { struct pn_softc *sc; uint64_t status; uint64_t rate; struct pcpu *pc; u_int sfid, mfid, cfid; sc = device_get_softc(dev); sc->errata = 0; status = rdmsr(MSR_AMDK7_FIDVID_STATUS); pc = cpu_get_pcpu(dev); if (pc == NULL) return (ENODEV); cpu_est_clockrate(pc->pc_cpuid, &rate); switch (cpu_id & 0xf00) { case 0x600: sfid = PN7_STA_SFID(status); mfid = PN7_STA_MFID(status); cfid = PN7_STA_CFID(status); sc->pn_type = PN7_TYPE; sc->fsb = rate / 100000 / pn7_fid_to_mult[cfid]; /* * If start FID is different to max FID, then it is a * mobile processor. If not, it is a low powered desktop * processor. */ if (PN7_STA_SFID(status) != PN7_STA_MFID(status)) { sc->vid_to_volts = pn7_mobile_vid_to_volts; device_set_desc(dev, "PowerNow! K7"); } else { sc->vid_to_volts = pn7_desktop_vid_to_volts; device_set_desc(dev, "Cool`n'Quiet K7"); } break; case 0xf00: sfid = PN8_STA_SFID(status); mfid = PN8_STA_MFID(status); cfid = PN8_STA_CFID(status); sc->pn_type = PN8_TYPE; sc->vid_to_volts = pn8_vid_to_volts; sc->fsb = rate / 100000 / pn8_fid_to_mult[cfid]; if (PN8_STA_SFID(status) != PN8_STA_MFID(status)) device_set_desc(dev, "PowerNow! K8"); else device_set_desc(dev, "Cool`n'Quiet K8"); break; default: return (ENODEV); } return (0); } static int pn_attach(device_t dev) { int rv; device_t child; child = device_find_child(device_get_parent(dev), "acpi_perf", -1); if (child) { rv = pn_decode_acpi(dev, child); if (rv) rv = pn_decode_pst(dev); } else rv = pn_decode_pst(dev); if (rv != 0) return (ENXIO); cpufreq_register(dev); return (0); } static int pn_detach(device_t dev) { return (cpufreq_unregister(dev)); } Index: head/sys/x86/cpufreq/smist.c =================================================================== --- head/sys/x86/cpufreq/smist.c (revision 326262) +++ head/sys/x86/cpufreq/smist.c (revision 326263) @@ -1,514 +1,516 @@ /*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * * Copyright (c) 2005 Bruno Ducrot * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /* * This driver is based upon information found by examining speedstep-0.5 * from Marc Lehman, which includes all the reverse engineering effort of * Malik Martin (function 1 and 2 of the GSI). * * The correct way for the OS to take ownership from the BIOS was found by * Hiroshi Miura (function 0 of the GSI). * * Finally, the int 15h call interface was (partially) documented by Intel. * * Many thanks to Jon Noack for testing and debugging this driver. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "cpufreq_if.h" #if 0 #define DPRINT(dev, x...) device_printf(dev, x) #else #define DPRINT(dev, x...) #endif struct smist_softc { device_t dev; int smi_cmd; int smi_data; int command; int flags; struct cf_setting sets[2]; /* Only two settings. */ }; static char smist_magic[] = "Copyright (c) 1999 Intel Corporation"; static void smist_identify(driver_t *driver, device_t parent); static int smist_probe(device_t dev); static int smist_attach(device_t dev); static int smist_detach(device_t dev); static int smist_settings(device_t dev, struct cf_setting *sets, int *count); static int smist_set(device_t dev, const struct cf_setting *set); static int smist_get(device_t dev, struct cf_setting *set); static int smist_type(device_t dev, int *type); static device_method_t smist_methods[] = { /* Device interface */ DEVMETHOD(device_identify, smist_identify), DEVMETHOD(device_probe, smist_probe), DEVMETHOD(device_attach, smist_attach), DEVMETHOD(device_detach, smist_detach), /* cpufreq interface */ DEVMETHOD(cpufreq_drv_set, smist_set), DEVMETHOD(cpufreq_drv_get, smist_get), DEVMETHOD(cpufreq_drv_type, smist_type), DEVMETHOD(cpufreq_drv_settings, smist_settings), {0, 0} }; static driver_t smist_driver = { "smist", smist_methods, sizeof(struct smist_softc) }; static devclass_t smist_devclass; DRIVER_MODULE(smist, cpu, smist_driver, smist_devclass, 0, 0); struct piix4_pci_device { uint16_t vendor; uint16_t device; char *desc; }; static struct piix4_pci_device piix4_pci_devices[] = { {0x8086, 0x7113, "Intel PIIX4 ISA bridge"}, {0x8086, 0x719b, "Intel PIIX4 ISA bridge (embedded in MX440 chipset)"}, {0, 0, NULL}, }; #define SET_OWNERSHIP 0 #define GET_STATE 1 #define SET_STATE 2 static int int15_gsic_call(int *sig, int *smi_cmd, int *command, int *smi_data, int *flags) { struct vm86frame vmf; bzero(&vmf, sizeof(vmf)); vmf.vmf_eax = 0x0000E980; /* IST support */ vmf.vmf_edx = 0x47534943; /* 'GSIC' in ASCII */ vm86_intcall(0x15, &vmf); if (vmf.vmf_eax == 0x47534943) { *sig = vmf.vmf_eax; *smi_cmd = vmf.vmf_ebx & 0xff; *command = (vmf.vmf_ebx >> 16) & 0xff; *smi_data = vmf.vmf_ecx; *flags = vmf.vmf_edx; } else { *sig = -1; *smi_cmd = -1; *command = -1; *smi_data = -1; *flags = -1; } return (0); } /* Temporary structure to hold mapped page and status. */ struct set_ownership_data { int smi_cmd; int command; int result; void *buf; }; /* Perform actual SMI call to enable SpeedStep. */ static void set_ownership_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error) { struct set_ownership_data *data; data = arg; if (error) { data->result = error; return; } /* Copy in the magic string and send it by writing to the SMI port. */ strlcpy(data->buf, smist_magic, PAGE_SIZE); __asm __volatile( "movl $-1, %%edi\n\t" "out %%al, (%%dx)\n" : "=D" (data->result) : "a" (data->command), "b" (0), "c" (0), "d" (data->smi_cmd), "S" ((uint32_t)segs[0].ds_addr) ); } static int set_ownership(device_t dev) { struct smist_softc *sc; struct set_ownership_data cb_data; bus_dma_tag_t tag; bus_dmamap_t map; /* * Specify the region to store the magic string. Since its address is * passed to the BIOS in a 32-bit register, we have to make sure it is * located in a physical page below 4 GB (i.e., for PAE.) */ sc = device_get_softc(dev); if (bus_dma_tag_create(/*parent*/ NULL, /*alignment*/ PAGE_SIZE, /*no boundary*/ 0, /*lowaddr*/ BUS_SPACE_MAXADDR_32BIT, /*highaddr*/ BUS_SPACE_MAXADDR, NULL, NULL, /*maxsize*/ PAGE_SIZE, /*segments*/ 1, /*maxsegsize*/ PAGE_SIZE, 0, busdma_lock_mutex, &Giant, &tag) != 0) { device_printf(dev, "can't create mem tag\n"); return (ENXIO); } if (bus_dmamem_alloc(tag, &cb_data.buf, BUS_DMA_NOWAIT, &map) != 0) { bus_dma_tag_destroy(tag); device_printf(dev, "can't alloc mapped mem\n"); return (ENXIO); } /* Load the physical page map and take ownership in the callback. */ cb_data.smi_cmd = sc->smi_cmd; cb_data.command = sc->command; if (bus_dmamap_load(tag, map, cb_data.buf, PAGE_SIZE, set_ownership_cb, &cb_data, BUS_DMA_NOWAIT) != 0) { bus_dmamem_free(tag, cb_data.buf, map); bus_dma_tag_destroy(tag); device_printf(dev, "can't load mem\n"); return (ENXIO); } DPRINT(dev, "taking ownership over BIOS return %d\n", cb_data.result); bus_dmamap_unload(tag, map); bus_dmamem_free(tag, cb_data.buf, map); bus_dma_tag_destroy(tag); return (cb_data.result ? ENXIO : 0); } static int getset_state(struct smist_softc *sc, int *state, int function) { int new_state; int result; int eax; if (!sc) return (ENXIO); if (function != GET_STATE && function != SET_STATE) return (EINVAL); DPRINT(sc->dev, "calling GSI\n"); __asm __volatile( "movl $-1, %%edi\n\t" "out %%al, (%%dx)\n" : "=a" (eax), "=b" (new_state), "=D" (result) : "a" (sc->command), "b" (function), "c" (*state), "d" (sc->smi_cmd) ); DPRINT(sc->dev, "GSI returned: eax %.8x ebx %.8x edi %.8x\n", eax, new_state, result); *state = new_state & 1; switch (function) { case GET_STATE: if (eax) return (ENXIO); break; case SET_STATE: if (result) return (ENXIO); break; } return (0); } static void smist_identify(driver_t *driver, device_t parent) { struct piix4_pci_device *id; device_t piix4 = NULL; if (resource_disabled("ichst", 0)) return; /* Check for a supported processor */ if (cpu_vendor_id != CPU_VENDOR_INTEL) return; switch (cpu_id & 0xff0) { case 0x680: /* Pentium III [coppermine] */ case 0x6a0: /* Pentium III [Tualatin] */ break; default: return; } /* Check for a supported PCI-ISA bridge */ for (id = piix4_pci_devices; id->desc != NULL; ++id) { if ((piix4 = pci_find_device(id->vendor, id->device)) != NULL) break; } if (!piix4) return; if (bootverbose) printf("smist: found supported isa bridge %s\n", id->desc); if (device_find_child(parent, "smist", -1) != NULL) return; if (BUS_ADD_CHILD(parent, 30, "smist", -1) == NULL) device_printf(parent, "smist: add child failed\n"); } static int smist_probe(device_t dev) { struct smist_softc *sc; device_t ichss_dev, perf_dev; int sig, smi_cmd, command, smi_data, flags; int type; int rv; if (resource_disabled("smist", 0)) return (ENXIO); sc = device_get_softc(dev); /* * If the ACPI perf or ICH SpeedStep drivers have attached and not * just offering info, let them manage things. */ perf_dev = device_find_child(device_get_parent(dev), "acpi_perf", -1); if (perf_dev && device_is_attached(perf_dev)) { rv = CPUFREQ_DRV_TYPE(perf_dev, &type); if (rv == 0 && (type & CPUFREQ_FLAG_INFO_ONLY) == 0) return (ENXIO); } ichss_dev = device_find_child(device_get_parent(dev), "ichss", -1); if (ichss_dev && device_is_attached(ichss_dev)) return (ENXIO); int15_gsic_call(&sig, &smi_cmd, &command, &smi_data, &flags); if (bootverbose) device_printf(dev, "sig %.8x smi_cmd %.4x command %.2x " "smi_data %.4x flags %.8x\n", sig, smi_cmd, command, smi_data, flags); if (sig != -1) { sc->smi_cmd = smi_cmd; sc->smi_data = smi_data; /* * Sometimes int 15h 'GSIC' returns 0x80 for command, when * it is actually 0x82. The Windows driver will overwrite * this value given by the registry. */ if (command == 0x80) { device_printf(dev, "GSIC returned cmd 0x80, should be 0x82\n"); command = 0x82; } sc->command = (sig & 0xffffff00) | (command & 0xff); sc->flags = flags; } else { /* Give some default values */ sc->smi_cmd = 0xb2; sc->smi_data = 0xb3; sc->command = 0x47534982; sc->flags = 0; } device_set_desc(dev, "SpeedStep SMI"); return (-1500); } static int smist_attach(device_t dev) { struct smist_softc *sc; sc = device_get_softc(dev); sc->dev = dev; /* If we can't take ownership over BIOS, then bail out */ if (set_ownership(dev) != 0) return (ENXIO); /* Setup some defaults for our exported settings. */ sc->sets[0].freq = CPUFREQ_VAL_UNKNOWN; sc->sets[0].volts = CPUFREQ_VAL_UNKNOWN; sc->sets[0].power = CPUFREQ_VAL_UNKNOWN; sc->sets[0].lat = 1000; sc->sets[0].dev = dev; sc->sets[1] = sc->sets[0]; cpufreq_register(dev); return (0); } static int smist_detach(device_t dev) { return (cpufreq_unregister(dev)); } static int smist_settings(device_t dev, struct cf_setting *sets, int *count) { struct smist_softc *sc; struct cf_setting set; int first, i; if (sets == NULL || count == NULL) return (EINVAL); if (*count < 2) { *count = 2; return (E2BIG); } sc = device_get_softc(dev); /* * Estimate frequencies for both levels, temporarily switching to * the other one if we haven't calibrated it yet. */ for (i = 0; i < 2; i++) { if (sc->sets[i].freq == CPUFREQ_VAL_UNKNOWN) { first = (i == 0) ? 1 : 0; smist_set(dev, &sc->sets[i]); smist_get(dev, &set); smist_set(dev, &sc->sets[first]); } } bcopy(sc->sets, sets, sizeof(sc->sets)); *count = 2; return (0); } static int smist_set(device_t dev, const struct cf_setting *set) { struct smist_softc *sc; int rv, state, req_state, try; /* Look up appropriate bit value based on frequency. */ sc = device_get_softc(dev); if (CPUFREQ_CMP(set->freq, sc->sets[0].freq)) req_state = 0; else if (CPUFREQ_CMP(set->freq, sc->sets[1].freq)) req_state = 1; else return (EINVAL); DPRINT(dev, "requested setting %d\n", req_state); rv = getset_state(sc, &state, GET_STATE); if (state == req_state) return (0); try = 3; do { rv = getset_state(sc, &req_state, SET_STATE); /* Sleep for 200 microseconds. This value is just a guess. */ if (rv) DELAY(200); } while (rv && --try); DPRINT(dev, "set_state return %d, tried %d times\n", rv, 4 - try); return (rv); } static int smist_get(device_t dev, struct cf_setting *set) { struct smist_softc *sc; uint64_t rate; int state; int rv; sc = device_get_softc(dev); rv = getset_state(sc, &state, GET_STATE); if (rv != 0) return (rv); /* If we haven't changed settings yet, estimate the current value. */ if (sc->sets[state].freq == CPUFREQ_VAL_UNKNOWN) { cpu_est_clockrate(0, &rate); sc->sets[state].freq = rate / 1000000; DPRINT(dev, "get calibrated new rate of %d\n", sc->sets[state].freq); } *set = sc->sets[state]; return (0); } static int smist_type(device_t dev, int *type) { if (type == NULL) return (EINVAL); *type = CPUFREQ_TYPE_ABSOLUTE; return (0); } Index: head/sys/x86/include/_inttypes.h =================================================================== --- head/sys/x86/include/_inttypes.h (revision 326262) +++ head/sys/x86/include/_inttypes.h (revision 326263) @@ -1,221 +1,223 @@ /*- + * SPDX-License-Identifier: BSD-2-Clause-NetBSD + * * Copyright (c) 2001 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by Klaus Klein. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * * From: $NetBSD: int_fmtio.h,v 1.2 2001/04/26 16:25:21 kleink Exp $ * $FreeBSD$ */ #ifndef _MACHINE_INTTYPES_H_ #define _MACHINE_INTTYPES_H_ /* * Macros for format specifiers. */ #ifdef __LP64__ #define __PRI64 "l" #define __PRIptr "l" #else #define __PRI64 "ll" #define __PRIptr #endif /* fprintf(3) macros for signed integers. */ #define PRId8 "d" /* int8_t */ #define PRId16 "d" /* int16_t */ #define PRId32 "d" /* int32_t */ #define PRId64 __PRI64"d" /* int64_t */ #define PRIdLEAST8 "d" /* int_least8_t */ #define PRIdLEAST16 "d" /* int_least16_t */ #define PRIdLEAST32 "d" /* int_least32_t */ #define PRIdLEAST64 __PRI64"d" /* int_least64_t */ #define PRIdFAST8 "d" /* int_fast8_t */ #define PRIdFAST16 "d" /* int_fast16_t */ #define PRIdFAST32 "d" /* int_fast32_t */ #define PRIdFAST64 __PRI64"d" /* int_fast64_t */ #define PRIdMAX "jd" /* intmax_t */ #define PRIdPTR __PRIptr"d" /* intptr_t */ #define PRIi8 "i" /* int8_t */ #define PRIi16 "i" /* int16_t */ #define PRIi32 "i" /* int32_t */ #define PRIi64 __PRI64"i" /* int64_t */ #define PRIiLEAST8 "i" /* int_least8_t */ #define PRIiLEAST16 "i" /* int_least16_t */ #define PRIiLEAST32 "i" /* int_least32_t */ #define PRIiLEAST64 __PRI64"i" /* int_least64_t */ #define PRIiFAST8 "i" /* int_fast8_t */ #define PRIiFAST16 "i" /* int_fast16_t */ #define PRIiFAST32 "i" /* int_fast32_t */ #define PRIiFAST64 __PRI64"i" /* int_fast64_t */ #define PRIiMAX "ji" /* intmax_t */ #define PRIiPTR __PRIptr"i" /* intptr_t */ /* fprintf(3) macros for unsigned integers. */ #define PRIo8 "o" /* uint8_t */ #define PRIo16 "o" /* uint16_t */ #define PRIo32 "o" /* uint32_t */ #define PRIo64 __PRI64"o" /* uint64_t */ #define PRIoLEAST8 "o" /* uint_least8_t */ #define PRIoLEAST16 "o" /* uint_least16_t */ #define PRIoLEAST32 "o" /* uint_least32_t */ #define PRIoLEAST64 __PRI64"o" /* uint_least64_t */ #define PRIoFAST8 "o" /* uint_fast8_t */ #define PRIoFAST16 "o" /* uint_fast16_t */ #define PRIoFAST32 "o" /* uint_fast32_t */ #define PRIoFAST64 __PRI64"o" /* uint_fast64_t */ #define PRIoMAX "jo" /* uintmax_t */ #define PRIoPTR __PRIptr"o" /* uintptr_t */ #define PRIu8 "u" /* uint8_t */ #define PRIu16 "u" /* uint16_t */ #define PRIu32 "u" /* uint32_t */ #define PRIu64 __PRI64"u" /* uint64_t */ #define PRIuLEAST8 "u" /* uint_least8_t */ #define PRIuLEAST16 "u" /* uint_least16_t */ #define PRIuLEAST32 "u" /* uint_least32_t */ #define PRIuLEAST64 __PRI64"u" /* uint_least64_t */ #define PRIuFAST8 "u" /* uint_fast8_t */ #define PRIuFAST16 "u" /* uint_fast16_t */ #define PRIuFAST32 "u" /* uint_fast32_t */ #define PRIuFAST64 __PRI64"u" /* uint_fast64_t */ #define PRIuMAX "ju" /* uintmax_t */ #define PRIuPTR __PRIptr"u" /* uintptr_t */ #define PRIx8 "x" /* uint8_t */ #define PRIx16 "x" /* uint16_t */ #define PRIx32 "x" /* uint32_t */ #define PRIx64 __PRI64"x" /* uint64_t */ #define PRIxLEAST8 "x" /* uint_least8_t */ #define PRIxLEAST16 "x" /* uint_least16_t */ #define PRIxLEAST32 "x" /* uint_least32_t */ #define PRIxLEAST64 __PRI64"x" /* uint_least64_t */ #define PRIxFAST8 "x" /* uint_fast8_t */ #define PRIxFAST16 "x" /* uint_fast16_t */ #define PRIxFAST32 "x" /* uint_fast32_t */ #define PRIxFAST64 __PRI64"x" /* uint_fast64_t */ #define PRIxMAX "jx" /* uintmax_t */ #define PRIxPTR __PRIptr"x" /* uintptr_t */ #define PRIX8 "X" /* uint8_t */ #define PRIX16 "X" /* uint16_t */ #define PRIX32 "X" /* uint32_t */ #define PRIX64 __PRI64"X" /* uint64_t */ #define PRIXLEAST8 "X" /* uint_least8_t */ #define PRIXLEAST16 "X" /* uint_least16_t */ #define PRIXLEAST32 "X" /* uint_least32_t */ #define PRIXLEAST64 __PRI64"X" /* uint_least64_t */ #define PRIXFAST8 "X" /* uint_fast8_t */ #define PRIXFAST16 "X" /* uint_fast16_t */ #define PRIXFAST32 "X" /* uint_fast32_t */ #define PRIXFAST64 __PRI64"X" /* uint_fast64_t */ #define PRIXMAX "jX" /* uintmax_t */ #define PRIXPTR __PRIptr"X" /* uintptr_t */ /* fscanf(3) macros for signed integers. */ #define SCNd8 "hhd" /* int8_t */ #define SCNd16 "hd" /* int16_t */ #define SCNd32 "d" /* int32_t */ #define SCNd64 __PRI64"d" /* int64_t */ #define SCNdLEAST8 "hhd" /* int_least8_t */ #define SCNdLEAST16 "hd" /* int_least16_t */ #define SCNdLEAST32 "d" /* int_least32_t */ #define SCNdLEAST64 __PRI64"d" /* int_least64_t */ #define SCNdFAST8 "d" /* int_fast8_t */ #define SCNdFAST16 "d" /* int_fast16_t */ #define SCNdFAST32 "d" /* int_fast32_t */ #define SCNdFAST64 __PRI64"d" /* int_fast64_t */ #define SCNdMAX "jd" /* intmax_t */ #define SCNdPTR __PRIptr"d" /* intptr_t */ #define SCNi8 "hhi" /* int8_t */ #define SCNi16 "hi" /* int16_t */ #define SCNi32 "i" /* int32_t */ #define SCNi64 __PRI64"i" /* int64_t */ #define SCNiLEAST8 "hhi" /* int_least8_t */ #define SCNiLEAST16 "hi" /* int_least16_t */ #define SCNiLEAST32 "i" /* int_least32_t */ #define SCNiLEAST64 __PRI64"i" /* int_least64_t */ #define SCNiFAST8 "i" /* int_fast8_t */ #define SCNiFAST16 "i" /* int_fast16_t */ #define SCNiFAST32 "i" /* int_fast32_t */ #define SCNiFAST64 __PRI64"i" /* int_fast64_t */ #define SCNiMAX "ji" /* intmax_t */ #define SCNiPTR __PRIptr"i" /* intptr_t */ /* fscanf(3) macros for unsigned integers. */ #define SCNo8 "hho" /* uint8_t */ #define SCNo16 "ho" /* uint16_t */ #define SCNo32 "o" /* uint32_t */ #define SCNo64 __PRI64"o" /* uint64_t */ #define SCNoLEAST8 "hho" /* uint_least8_t */ #define SCNoLEAST16 "ho" /* uint_least16_t */ #define SCNoLEAST32 "o" /* uint_least32_t */ #define SCNoLEAST64 __PRI64"o" /* uint_least64_t */ #define SCNoFAST8 "o" /* uint_fast8_t */ #define SCNoFAST16 "o" /* uint_fast16_t */ #define SCNoFAST32 "o" /* uint_fast32_t */ #define SCNoFAST64 __PRI64"o" /* uint_fast64_t */ #define SCNoMAX "jo" /* uintmax_t */ #define SCNoPTR __PRIptr"o" /* uintptr_t */ #define SCNu8 "hhu" /* uint8_t */ #define SCNu16 "hu" /* uint16_t */ #define SCNu32 "u" /* uint32_t */ #define SCNu64 __PRI64"u" /* uint64_t */ #define SCNuLEAST8 "hhu" /* uint_least8_t */ #define SCNuLEAST16 "hu" /* uint_least16_t */ #define SCNuLEAST32 "u" /* uint_least32_t */ #define SCNuLEAST64 __PRI64"u" /* uint_least64_t */ #define SCNuFAST8 "u" /* uint_fast8_t */ #define SCNuFAST16 "u" /* uint_fast16_t */ #define SCNuFAST32 "u" /* uint_fast32_t */ #define SCNuFAST64 __PRI64"u" /* uint_fast64_t */ #define SCNuMAX "ju" /* uintmax_t */ #define SCNuPTR __PRIptr"u" /* uintptr_t */ #define SCNx8 "hhx" /* uint8_t */ #define SCNx16 "hx" /* uint16_t */ #define SCNx32 "x" /* uint32_t */ #define SCNx64 __PRI64"x" /* uint64_t */ #define SCNxLEAST8 "hhx" /* uint_least8_t */ #define SCNxLEAST16 "hx" /* uint_least16_t */ #define SCNxLEAST32 "x" /* uint_least32_t */ #define SCNxLEAST64 __PRI64"x" /* uint_least64_t */ #define SCNxFAST8 "x" /* uint_fast8_t */ #define SCNxFAST16 "x" /* uint_fast16_t */ #define SCNxFAST32 "x" /* uint_fast32_t */ #define SCNxFAST64 __PRI64"x" /* uint_fast64_t */ #define SCNxMAX "jx" /* uintmax_t */ #define SCNxPTR __PRIptr"x" /* uintptr_t */ #endif /* !_MACHINE_INTTYPES_H_ */ Index: head/sys/x86/include/_stdint.h =================================================================== --- head/sys/x86/include/_stdint.h (revision 326262) +++ head/sys/x86/include/_stdint.h (revision 326263) @@ -1,191 +1,193 @@ /*- + * SPDX-License-Identifier: BSD-4-Clause + * * Copyright (c) 2001, 2002 Mike Barcroft * Copyright (c) 2001 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by Klaus Klein. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the NetBSD * Foundation, Inc. and its contributors. * 4. Neither the name of The NetBSD Foundation nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _MACHINE__STDINT_H_ #define _MACHINE__STDINT_H_ #include #if !defined(__cplusplus) || defined(__STDC_CONSTANT_MACROS) #define INT8_C(c) (c) #define INT16_C(c) (c) #define INT32_C(c) (c) #define UINT8_C(c) (c) #define UINT16_C(c) (c) #define UINT32_C(c) (c ## U) #ifdef __LP64__ #define INT64_C(c) (c ## L) #define UINT64_C(c) (c ## UL) #else #define INT64_C(c) (c ## LL) #define UINT64_C(c) (c ## ULL) #endif #define INTMAX_C(c) INT64_C(c) #define UINTMAX_C(c) UINT64_C(c) #endif /* !defined(__cplusplus) || defined(__STDC_CONSTANT_MACROS) */ #if !defined(__cplusplus) || defined(__STDC_LIMIT_MACROS) /* * ISO/IEC 9899:1999 * 7.18.2.1 Limits of exact-width integer types */ #define INT8_MIN (-0x7f-1) #define INT16_MIN (-0x7fff-1) #define INT32_MIN (-0x7fffffff-1) #define INT8_MAX 0x7f #define INT16_MAX 0x7fff #define INT32_MAX 0x7fffffff #define UINT8_MAX 0xff #define UINT16_MAX 0xffff #define UINT32_MAX 0xffffffffU #ifdef __LP64__ #define INT64_MIN (-0x7fffffffffffffff-1) #define INT64_MAX 0x7fffffffffffffff #define UINT64_MAX 0xffffffffffffffff #else #define INT64_MIN (-0x7fffffffffffffffLL-1) #define INT64_MAX 0x7fffffffffffffffLL #define UINT64_MAX 0xffffffffffffffffULL #endif /* * ISO/IEC 9899:1999 * 7.18.2.2 Limits of minimum-width integer types */ /* Minimum values of minimum-width signed integer types. */ #define INT_LEAST8_MIN INT8_MIN #define INT_LEAST16_MIN INT16_MIN #define INT_LEAST32_MIN INT32_MIN #define INT_LEAST64_MIN INT64_MIN /* Maximum values of minimum-width signed integer types. */ #define INT_LEAST8_MAX INT8_MAX #define INT_LEAST16_MAX INT16_MAX #define INT_LEAST32_MAX INT32_MAX #define INT_LEAST64_MAX INT64_MAX /* Maximum values of minimum-width unsigned integer types. */ #define UINT_LEAST8_MAX UINT8_MAX #define UINT_LEAST16_MAX UINT16_MAX #define UINT_LEAST32_MAX UINT32_MAX #define UINT_LEAST64_MAX UINT64_MAX /* * ISO/IEC 9899:1999 * 7.18.2.3 Limits of fastest minimum-width integer types */ /* Minimum values of fastest minimum-width signed integer types. */ #define INT_FAST8_MIN INT32_MIN #define INT_FAST16_MIN INT32_MIN #define INT_FAST32_MIN INT32_MIN #define INT_FAST64_MIN INT64_MIN /* Maximum values of fastest minimum-width signed integer types. */ #define INT_FAST8_MAX INT32_MAX #define INT_FAST16_MAX INT32_MAX #define INT_FAST32_MAX INT32_MAX #define INT_FAST64_MAX INT64_MAX /* Maximum values of fastest minimum-width unsigned integer types. */ #define UINT_FAST8_MAX UINT32_MAX #define UINT_FAST16_MAX UINT32_MAX #define UINT_FAST32_MAX UINT32_MAX #define UINT_FAST64_MAX UINT64_MAX /* * ISO/IEC 9899:1999 * 7.18.2.4 Limits of integer types capable of holding object pointers */ #ifdef __LP64__ #define INTPTR_MIN INT64_MIN #define INTPTR_MAX INT64_MAX #define UINTPTR_MAX UINT64_MAX #else #define INTPTR_MIN INT32_MIN #define INTPTR_MAX INT32_MAX #define UINTPTR_MAX UINT32_MAX #endif /* * ISO/IEC 9899:1999 * 7.18.2.5 Limits of greatest-width integer types */ #define INTMAX_MIN INT64_MIN #define INTMAX_MAX INT64_MAX #define UINTMAX_MAX UINT64_MAX /* * ISO/IEC 9899:1999 * 7.18.3 Limits of other integer types */ #ifdef __LP64__ /* Limits of ptrdiff_t. */ #define PTRDIFF_MIN INT64_MIN #define PTRDIFF_MAX INT64_MAX /* Limits of sig_atomic_t. */ #define SIG_ATOMIC_MIN __LONG_MIN #define SIG_ATOMIC_MAX __LONG_MAX /* Limit of size_t. */ #define SIZE_MAX UINT64_MAX #else #define PTRDIFF_MIN INT32_MIN #define PTRDIFF_MAX INT32_MAX #define SIG_ATOMIC_MIN INT32_MIN #define SIG_ATOMIC_MAX INT32_MAX #define SIZE_MAX UINT32_MAX #endif /* Limits of wint_t. */ #define WINT_MIN INT32_MIN #define WINT_MAX INT32_MAX #endif /* !defined(__cplusplus) || defined(__STDC_LIMIT_MACROS) */ #endif /* !_MACHINE__STDINT_H_ */ Index: head/sys/x86/include/acpica_machdep.h =================================================================== --- head/sys/x86/include/acpica_machdep.h (revision 326262) +++ head/sys/x86/include/acpica_machdep.h (revision 326263) @@ -1,88 +1,90 @@ /*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * * Copyright (c) 2002 Mitsuru IWASAKI * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ /****************************************************************************** * * Name: acpica_machdep.h - arch-specific defines, etc. * $Revision$ * *****************************************************************************/ #ifndef __ACPICA_MACHDEP_H__ #define __ACPICA_MACHDEP_H__ #ifdef _KERNEL /* * Calling conventions: * * ACPI_SYSTEM_XFACE - Interfaces to host OS (handlers, threads) * ACPI_EXTERNAL_XFACE - External ACPI interfaces * ACPI_INTERNAL_XFACE - Internal ACPI interfaces * ACPI_INTERNAL_VAR_XFACE - Internal variable-parameter list interfaces */ #define ACPI_SYSTEM_XFACE #define ACPI_EXTERNAL_XFACE #define ACPI_INTERNAL_XFACE #define ACPI_INTERNAL_VAR_XFACE /* Asm macros */ #define ACPI_ASM_MACROS #define BREAKPOINT3 #define ACPI_DISABLE_IRQS() disable_intr() #define ACPI_ENABLE_IRQS() enable_intr() #define ACPI_FLUSH_CPU_CACHE() wbinvd() /* Section 5.2.10.1: global lock acquire/release functions */ int acpi_acquire_global_lock(volatile uint32_t *); int acpi_release_global_lock(volatile uint32_t *); #define ACPI_ACQUIRE_GLOBAL_LOCK(GLptr, Acq) do { \ (Acq) = acpi_acquire_global_lock(&((GLptr)->GlobalLock)); \ } while (0) #define ACPI_RELEASE_GLOBAL_LOCK(GLptr, Acq) do { \ (Acq) = acpi_release_global_lock(&((GLptr)->GlobalLock)); \ } while (0) enum intr_trigger; enum intr_polarity; void acpi_SetDefaultIntrModel(int model); void acpi_cpu_c1(void); void acpi_cpu_idle_mwait(uint32_t mwait_hint); void *acpi_map_table(vm_paddr_t pa, const char *sig); void acpi_unmap_table(void *table); vm_paddr_t acpi_find_table(const char *sig); void madt_parse_interrupt_values(void *entry, enum intr_trigger *trig, enum intr_polarity *pol); extern int madt_found_sci_override; #endif /* _KERNEL */ #endif /* __ACPICA_MACHDEP_H__ */ Index: head/sys/x86/include/apicreg.h =================================================================== --- head/sys/x86/include/apicreg.h (revision 326262) +++ head/sys/x86/include/apicreg.h (revision 326263) @@ -1,548 +1,550 @@ /*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * * Copyright (c) 1996, by Peter Wemm and Steve Passe * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. The name of the developer may NOT be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _X86_APICREG_H_ #define _X86_APICREG_H_ /* * Local && I/O APIC definitions. */ /* * Pentium P54C+ Built-in APIC * (Advanced programmable Interrupt Controller) * * Base Address of Built-in APIC in memory location * is 0xfee00000. * * Map of APIC Registers: * * Offset (hex) Description Read/Write state * 000 Reserved * 010 Reserved * 020 ID Local APIC ID R/W * 030 VER Local APIC Version R * 040 Reserved * 050 Reserved * 060 Reserved * 070 Reserved * 080 Task Priority Register R/W * 090 Arbitration Priority Register R * 0A0 Processor Priority Register R * 0B0 EOI Register W * 0C0 RRR Remote read R * 0D0 Logical Destination R/W * 0E0 Destination Format Register 0..27 R; 28..31 R/W * 0F0 SVR Spurious Interrupt Vector Reg. 0..3 R; 4..9 R/W * 100 ISR 000-031 R * 110 ISR 032-063 R * 120 ISR 064-095 R * 130 ISR 095-128 R * 140 ISR 128-159 R * 150 ISR 160-191 R * 160 ISR 192-223 R * 170 ISR 224-255 R * 180 TMR 000-031 R * 190 TMR 032-063 R * 1A0 TMR 064-095 R * 1B0 TMR 095-128 R * 1C0 TMR 128-159 R * 1D0 TMR 160-191 R * 1E0 TMR 192-223 R * 1F0 TMR 224-255 R * 200 IRR 000-031 R * 210 IRR 032-063 R * 220 IRR 064-095 R * 230 IRR 095-128 R * 240 IRR 128-159 R * 250 IRR 160-191 R * 260 IRR 192-223 R * 270 IRR 224-255 R * 280 Error Status Register R * 290 Reserved * 2A0 Reserved * 2B0 Reserved * 2C0 Reserved * 2D0 Reserved * 2E0 Reserved * 2F0 Local Vector Table (CMCI) R/W * 300 ICR_LOW Interrupt Command Reg. (0-31) R/W * 310 ICR_HI Interrupt Command Reg. (32-63) R/W * 320 Local Vector Table (Timer) R/W * 330 Local Vector Table (Thermal) R/W (PIV+) * 340 Local Vector Table (Performance) R/W (P6+) * 350 LVT1 Local Vector Table (LINT0) R/W * 360 LVT2 Local Vector Table (LINT1) R/W * 370 LVT3 Local Vector Table (ERROR) R/W * 380 Initial Count Reg. for Timer R/W * 390 Current Count of Timer R * 3A0 Reserved * 3B0 Reserved * 3C0 Reserved * 3D0 Reserved * 3E0 Timer Divide Configuration Reg. R/W * 3F0 Reserved */ /****************************************************************************** * global defines, etc. */ /****************************************************************************** * LOCAL APIC structure */ #ifndef LOCORE #include #define PAD3 int : 32; int : 32; int : 32 #define PAD4 int : 32; int : 32; int : 32; int : 32 struct LAPIC { /* reserved */ PAD4; /* reserved */ PAD4; u_int32_t id; PAD3; u_int32_t version; PAD3; /* reserved */ PAD4; /* reserved */ PAD4; /* reserved */ PAD4; /* reserved */ PAD4; u_int32_t tpr; PAD3; u_int32_t apr; PAD3; u_int32_t ppr; PAD3; u_int32_t eoi; PAD3; /* reserved */ PAD4; u_int32_t ldr; PAD3; u_int32_t dfr; PAD3; u_int32_t svr; PAD3; u_int32_t isr0; PAD3; u_int32_t isr1; PAD3; u_int32_t isr2; PAD3; u_int32_t isr3; PAD3; u_int32_t isr4; PAD3; u_int32_t isr5; PAD3; u_int32_t isr6; PAD3; u_int32_t isr7; PAD3; u_int32_t tmr0; PAD3; u_int32_t tmr1; PAD3; u_int32_t tmr2; PAD3; u_int32_t tmr3; PAD3; u_int32_t tmr4; PAD3; u_int32_t tmr5; PAD3; u_int32_t tmr6; PAD3; u_int32_t tmr7; PAD3; u_int32_t irr0; PAD3; u_int32_t irr1; PAD3; u_int32_t irr2; PAD3; u_int32_t irr3; PAD3; u_int32_t irr4; PAD3; u_int32_t irr5; PAD3; u_int32_t irr6; PAD3; u_int32_t irr7; PAD3; u_int32_t esr; PAD3; /* reserved */ PAD4; /* reserved */ PAD4; /* reserved */ PAD4; /* reserved */ PAD4; /* reserved */ PAD4; /* reserved */ PAD4; u_int32_t lvt_cmci; PAD3; u_int32_t icr_lo; PAD3; u_int32_t icr_hi; PAD3; u_int32_t lvt_timer; PAD3; u_int32_t lvt_thermal; PAD3; u_int32_t lvt_pcint; PAD3; u_int32_t lvt_lint0; PAD3; u_int32_t lvt_lint1; PAD3; u_int32_t lvt_error; PAD3; u_int32_t icr_timer; PAD3; u_int32_t ccr_timer; PAD3; /* reserved */ PAD4; /* reserved */ PAD4; /* reserved */ PAD4; /* reserved */ PAD4; u_int32_t dcr_timer; PAD3; /* reserved */ PAD4; }; typedef struct LAPIC lapic_t; enum LAPIC_REGISTERS { LAPIC_ID = 0x2, LAPIC_VERSION = 0x3, LAPIC_TPR = 0x8, LAPIC_APR = 0x9, LAPIC_PPR = 0xa, LAPIC_EOI = 0xb, LAPIC_LDR = 0xd, LAPIC_DFR = 0xe, /* Not in x2APIC */ LAPIC_SVR = 0xf, LAPIC_ISR0 = 0x10, LAPIC_ISR1 = 0x11, LAPIC_ISR2 = 0x12, LAPIC_ISR3 = 0x13, LAPIC_ISR4 = 0x14, LAPIC_ISR5 = 0x15, LAPIC_ISR6 = 0x16, LAPIC_ISR7 = 0x17, LAPIC_TMR0 = 0x18, LAPIC_TMR1 = 0x19, LAPIC_TMR2 = 0x1a, LAPIC_TMR3 = 0x1b, LAPIC_TMR4 = 0x1c, LAPIC_TMR5 = 0x1d, LAPIC_TMR6 = 0x1e, LAPIC_TMR7 = 0x1f, LAPIC_IRR0 = 0x20, LAPIC_IRR1 = 0x21, LAPIC_IRR2 = 0x22, LAPIC_IRR3 = 0x23, LAPIC_IRR4 = 0x24, LAPIC_IRR5 = 0x25, LAPIC_IRR6 = 0x26, LAPIC_IRR7 = 0x27, LAPIC_ESR = 0x28, LAPIC_LVT_CMCI = 0x2f, LAPIC_ICR_LO = 0x30, LAPIC_ICR_HI = 0x31, /* Not in x2APIC */ LAPIC_LVT_TIMER = 0x32, LAPIC_LVT_THERMAL = 0x33, LAPIC_LVT_PCINT = 0x34, LAPIC_LVT_LINT0 = 0x35, LAPIC_LVT_LINT1 = 0x36, LAPIC_LVT_ERROR = 0x37, LAPIC_ICR_TIMER = 0x38, LAPIC_CCR_TIMER = 0x39, LAPIC_DCR_TIMER = 0x3e, LAPIC_SELF_IPI = 0x3f, /* Only in x2APIC */ LAPIC_EXT_FEATURES = 0x40, /* AMD */ LAPIC_EXT_CTRL = 0x41, /* AMD */ LAPIC_EXT_SEOI = 0x42, /* AMD */ LAPIC_EXT_IER0 = 0x48, /* AMD */ LAPIC_EXT_IER1 = 0x49, /* AMD */ LAPIC_EXT_IER2 = 0x4a, /* AMD */ LAPIC_EXT_IER3 = 0x4b, /* AMD */ LAPIC_EXT_IER4 = 0x4c, /* AMD */ LAPIC_EXT_IER5 = 0x4d, /* AMD */ LAPIC_EXT_IER6 = 0x4e, /* AMD */ LAPIC_EXT_IER7 = 0x4f, /* AMD */ LAPIC_EXT_LVT0 = 0x50, /* AMD */ LAPIC_EXT_LVT1 = 0x51, /* AMD */ LAPIC_EXT_LVT2 = 0x52, /* AMD */ LAPIC_EXT_LVT3 = 0x53, /* AMD */ }; #define LAPIC_MEM_MUL 0x10 /* * Although some registers are available on AMD processors only, * it's not a big waste to reserve them on all platforms. * However, we need to watch out for this space being assigned for * non-APIC purposes in the future processor models. */ #define LAPIC_MEM_REGION ((LAPIC_EXT_LVT3 + 1) * LAPIC_MEM_MUL) /****************************************************************************** * I/O APIC structure */ struct IOAPIC { u_int32_t ioregsel; PAD3; u_int32_t iowin; PAD3; }; typedef struct IOAPIC ioapic_t; #undef PAD4 #undef PAD3 #endif /* !LOCORE */ /****************************************************************************** * various code 'logical' values */ /****************************************************************************** * LOCAL APIC defines */ /* default physical locations of LOCAL (CPU) APICs */ #define DEFAULT_APIC_BASE 0xfee00000 /* constants relating to APIC ID registers */ #define APIC_ID_MASK 0xff000000 #define APIC_ID_SHIFT 24 #define APIC_ID_CLUSTER 0xf0 #define APIC_ID_CLUSTER_ID 0x0f #define APIC_MAX_CLUSTER 0xe #define APIC_MAX_INTRACLUSTER_ID 3 #define APIC_ID_CLUSTER_SHIFT 4 /* fields in VER */ #define APIC_VER_VERSION 0x000000ff #define APIC_VER_MAXLVT 0x00ff0000 #define MAXLVTSHIFT 16 #define APIC_VER_EOI_SUPPRESSION 0x01000000 #define APIC_VER_AMD_EXT_SPACE 0x80000000 /* fields in LDR */ #define APIC_LDR_RESERVED 0x00ffffff /* fields in DFR */ #define APIC_DFR_RESERVED 0x0fffffff #define APIC_DFR_MODEL_MASK 0xf0000000 #define APIC_DFR_MODEL_FLAT 0xf0000000 #define APIC_DFR_MODEL_CLUSTER 0x00000000 /* fields in SVR */ #define APIC_SVR_VECTOR 0x000000ff #define APIC_SVR_VEC_PROG 0x000000f0 #define APIC_SVR_VEC_FIX 0x0000000f #define APIC_SVR_ENABLE 0x00000100 # define APIC_SVR_SWDIS 0x00000000 # define APIC_SVR_SWEN 0x00000100 #define APIC_SVR_FOCUS 0x00000200 # define APIC_SVR_FEN 0x00000000 # define APIC_SVR_FDIS 0x00000200 #define APIC_SVR_EOI_SUPPRESSION 0x00001000 /* fields in TPR */ #define APIC_TPR_PRIO 0x000000ff # define APIC_TPR_INT 0x000000f0 # define APIC_TPR_SUB 0x0000000f /* fields in ESR */ #define APIC_ESR_SEND_CS_ERROR 0x00000001 #define APIC_ESR_RECEIVE_CS_ERROR 0x00000002 #define APIC_ESR_SEND_ACCEPT 0x00000004 #define APIC_ESR_RECEIVE_ACCEPT 0x00000008 #define APIC_ESR_SEND_ILLEGAL_VECTOR 0x00000020 #define APIC_ESR_RECEIVE_ILLEGAL_VECTOR 0x00000040 #define APIC_ESR_ILLEGAL_REGISTER 0x00000080 /* fields in ICR_LOW */ #define APIC_VECTOR_MASK 0x000000ff #define APIC_DELMODE_MASK 0x00000700 # define APIC_DELMODE_FIXED 0x00000000 # define APIC_DELMODE_LOWPRIO 0x00000100 # define APIC_DELMODE_SMI 0x00000200 # define APIC_DELMODE_RR 0x00000300 # define APIC_DELMODE_NMI 0x00000400 # define APIC_DELMODE_INIT 0x00000500 # define APIC_DELMODE_STARTUP 0x00000600 # define APIC_DELMODE_RESV 0x00000700 #define APIC_DESTMODE_MASK 0x00000800 # define APIC_DESTMODE_PHY 0x00000000 # define APIC_DESTMODE_LOG 0x00000800 #define APIC_DELSTAT_MASK 0x00001000 # define APIC_DELSTAT_IDLE 0x00000000 # define APIC_DELSTAT_PEND 0x00001000 #define APIC_RESV1_MASK 0x00002000 #define APIC_LEVEL_MASK 0x00004000 # define APIC_LEVEL_DEASSERT 0x00000000 # define APIC_LEVEL_ASSERT 0x00004000 #define APIC_TRIGMOD_MASK 0x00008000 # define APIC_TRIGMOD_EDGE 0x00000000 # define APIC_TRIGMOD_LEVEL 0x00008000 #define APIC_RRSTAT_MASK 0x00030000 # define APIC_RRSTAT_INVALID 0x00000000 # define APIC_RRSTAT_INPROG 0x00010000 # define APIC_RRSTAT_VALID 0x00020000 # define APIC_RRSTAT_RESV 0x00030000 #define APIC_DEST_MASK 0x000c0000 # define APIC_DEST_DESTFLD 0x00000000 # define APIC_DEST_SELF 0x00040000 # define APIC_DEST_ALLISELF 0x00080000 # define APIC_DEST_ALLESELF 0x000c0000 #define APIC_RESV2_MASK 0xfff00000 #define APIC_ICRLO_RESV_MASK (APIC_RESV1_MASK | APIC_RESV2_MASK) /* fields in LVT1/2 */ #define APIC_LVT_VECTOR 0x000000ff #define APIC_LVT_DM 0x00000700 # define APIC_LVT_DM_FIXED 0x00000000 # define APIC_LVT_DM_SMI 0x00000200 # define APIC_LVT_DM_NMI 0x00000400 # define APIC_LVT_DM_INIT 0x00000500 # define APIC_LVT_DM_EXTINT 0x00000700 #define APIC_LVT_DS 0x00001000 #define APIC_LVT_IIPP 0x00002000 #define APIC_LVT_IIPP_INTALO 0x00002000 #define APIC_LVT_IIPP_INTAHI 0x00000000 #define APIC_LVT_RIRR 0x00004000 #define APIC_LVT_TM 0x00008000 #define APIC_LVT_M 0x00010000 /* fields in LVT Timer */ #define APIC_LVTT_VECTOR 0x000000ff #define APIC_LVTT_DS 0x00001000 #define APIC_LVTT_M 0x00010000 #define APIC_LVTT_TM 0x00060000 # define APIC_LVTT_TM_ONE_SHOT 0x00000000 # define APIC_LVTT_TM_PERIODIC 0x00020000 # define APIC_LVTT_TM_TSCDLT 0x00040000 # define APIC_LVTT_TM_RSRV 0x00060000 /* APIC timer current count */ #define APIC_TIMER_MAX_COUNT 0xffffffff /* fields in TDCR */ #define APIC_TDCR_2 0x00 #define APIC_TDCR_4 0x01 #define APIC_TDCR_8 0x02 #define APIC_TDCR_16 0x03 #define APIC_TDCR_32 0x08 #define APIC_TDCR_64 0x09 #define APIC_TDCR_128 0x0a #define APIC_TDCR_1 0x0b /* Constants related to AMD Extended APIC Features Register */ #define APIC_EXTF_ELVT_MASK 0x00ff0000 #define APIC_EXTF_ELVT_SHIFT 16 #define APIC_EXTF_EXTID_CAP 0x00000004 #define APIC_EXTF_SEIO_CAP 0x00000002 #define APIC_EXTF_IER_CAP 0x00000001 /* LVT table indices */ #define APIC_LVT_LINT0 0 #define APIC_LVT_LINT1 1 #define APIC_LVT_TIMER 2 #define APIC_LVT_ERROR 3 #define APIC_LVT_PMC 4 #define APIC_LVT_THERMAL 5 #define APIC_LVT_CMCI 6 #define APIC_LVT_MAX APIC_LVT_CMCI /* AMD extended LVT constants, seem to be assigned by fiat */ #define APIC_ELVT_IBS 0 /* Instruction based sampling */ #define APIC_ELVT_MCA 1 /* MCE thresholding */ #define APIC_ELVT_DEI 2 /* Deferred error interrupt */ #define APIC_ELVT_SBI 3 /* Sideband interface */ #define APIC_ELVT_MAX APIC_ELVT_SBI /****************************************************************************** * I/O APIC defines */ /* default physical locations of an IO APIC */ #define DEFAULT_IO_APIC_BASE 0xfec00000 /* window register offset */ #define IOAPIC_WINDOW 0x10 #define IOAPIC_EOIR 0x40 #define IOAPIC_WND_SIZE 0x50 /* indexes into IO APIC */ #define IOAPIC_ID 0x00 #define IOAPIC_VER 0x01 #define IOAPIC_ARB 0x02 #define IOAPIC_REDTBL 0x10 #define IOAPIC_REDTBL0 IOAPIC_REDTBL #define IOAPIC_REDTBL1 (IOAPIC_REDTBL+0x02) #define IOAPIC_REDTBL2 (IOAPIC_REDTBL+0x04) #define IOAPIC_REDTBL3 (IOAPIC_REDTBL+0x06) #define IOAPIC_REDTBL4 (IOAPIC_REDTBL+0x08) #define IOAPIC_REDTBL5 (IOAPIC_REDTBL+0x0a) #define IOAPIC_REDTBL6 (IOAPIC_REDTBL+0x0c) #define IOAPIC_REDTBL7 (IOAPIC_REDTBL+0x0e) #define IOAPIC_REDTBL8 (IOAPIC_REDTBL+0x10) #define IOAPIC_REDTBL9 (IOAPIC_REDTBL+0x12) #define IOAPIC_REDTBL10 (IOAPIC_REDTBL+0x14) #define IOAPIC_REDTBL11 (IOAPIC_REDTBL+0x16) #define IOAPIC_REDTBL12 (IOAPIC_REDTBL+0x18) #define IOAPIC_REDTBL13 (IOAPIC_REDTBL+0x1a) #define IOAPIC_REDTBL14 (IOAPIC_REDTBL+0x1c) #define IOAPIC_REDTBL15 (IOAPIC_REDTBL+0x1e) #define IOAPIC_REDTBL16 (IOAPIC_REDTBL+0x20) #define IOAPIC_REDTBL17 (IOAPIC_REDTBL+0x22) #define IOAPIC_REDTBL18 (IOAPIC_REDTBL+0x24) #define IOAPIC_REDTBL19 (IOAPIC_REDTBL+0x26) #define IOAPIC_REDTBL20 (IOAPIC_REDTBL+0x28) #define IOAPIC_REDTBL21 (IOAPIC_REDTBL+0x2a) #define IOAPIC_REDTBL22 (IOAPIC_REDTBL+0x2c) #define IOAPIC_REDTBL23 (IOAPIC_REDTBL+0x2e) /* fields in VER */ #define IOART_VER_VERSION 0x000000ff #define IOART_VER_MAXREDIR 0x00ff0000 #define MAXREDIRSHIFT 16 /* * fields in the IO APIC's redirection table entries */ #define IOART_DEST APIC_ID_MASK /* broadcast addr: all APICs */ #define IOART_RESV 0x00fe0000 /* reserved */ #define IOART_INTMASK 0x00010000 /* R/W: INTerrupt mask */ # define IOART_INTMCLR 0x00000000 /* clear, allow INTs */ # define IOART_INTMSET 0x00010000 /* set, inhibit INTs */ #define IOART_TRGRMOD 0x00008000 /* R/W: trigger mode */ # define IOART_TRGREDG 0x00000000 /* edge */ # define IOART_TRGRLVL 0x00008000 /* level */ #define IOART_REM_IRR 0x00004000 /* RO: remote IRR */ #define IOART_INTPOL 0x00002000 /* R/W: INT input pin polarity */ # define IOART_INTAHI 0x00000000 /* active high */ # define IOART_INTALO 0x00002000 /* active low */ #define IOART_DELIVS 0x00001000 /* RO: delivery status */ #define IOART_DESTMOD 0x00000800 /* R/W: destination mode */ # define IOART_DESTPHY 0x00000000 /* physical */ # define IOART_DESTLOG 0x00000800 /* logical */ #define IOART_DELMOD 0x00000700 /* R/W: delivery mode */ # define IOART_DELFIXED 0x00000000 /* fixed */ # define IOART_DELLOPRI 0x00000100 /* lowest priority */ # define IOART_DELSMI 0x00000200 /* System Management INT */ # define IOART_DELRSV1 0x00000300 /* reserved */ # define IOART_DELNMI 0x00000400 /* NMI signal */ # define IOART_DELINIT 0x00000500 /* INIT signal */ # define IOART_DELRSV2 0x00000600 /* reserved */ # define IOART_DELEXINT 0x00000700 /* External INTerrupt */ #define IOART_INTVEC 0x000000ff /* R/W: INTerrupt vector field */ #endif /* _X86_APICREG_H_ */ Index: head/sys/x86/include/apicvar.h =================================================================== --- head/sys/x86/include/apicvar.h (revision 326262) +++ head/sys/x86/include/apicvar.h (revision 326263) @@ -1,491 +1,493 @@ /*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * * Copyright (c) 2003 John Baldwin * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _X86_APICVAR_H_ #define _X86_APICVAR_H_ /* * Local && I/O APIC variable definitions. */ /* * Layout of local APIC interrupt vectors: * * 0xff (255) +-------------+ * | | 15 (Spurious / IPIs / Local Interrupts) * 0xf0 (240) +-------------+ * | | 14 (I/O Interrupts / Timer) * 0xe0 (224) +-------------+ * | | 13 (I/O Interrupts) * 0xd0 (208) +-------------+ * | | 12 (I/O Interrupts) * 0xc0 (192) +-------------+ * | | 11 (I/O Interrupts) * 0xb0 (176) +-------------+ * | | 10 (I/O Interrupts) * 0xa0 (160) +-------------+ * | | 9 (I/O Interrupts) * 0x90 (144) +-------------+ * | | 8 (I/O Interrupts / System Calls) * 0x80 (128) +-------------+ * | | 7 (I/O Interrupts) * 0x70 (112) +-------------+ * | | 6 (I/O Interrupts) * 0x60 (96) +-------------+ * | | 5 (I/O Interrupts) * 0x50 (80) +-------------+ * | | 4 (I/O Interrupts) * 0x40 (64) +-------------+ * | | 3 (I/O Interrupts) * 0x30 (48) +-------------+ * | | 2 (ATPIC Interrupts) * 0x20 (32) +-------------+ * | | 1 (Exceptions, traps, faults, etc.) * 0x10 (16) +-------------+ * | | 0 (Exceptions, traps, faults, etc.) * 0x00 (0) +-------------+ * * Note: 0x80 needs to be handled specially and not allocated to an * I/O device! */ #define xAPIC_MAX_APIC_ID 0xfe #define xAPIC_ID_ALL 0xff #define MAX_APIC_ID 0x200 #define APIC_ID_ALL 0xffffffff #define IOAPIC_MAX_ID xAPIC_MAX_APIC_ID /* I/O Interrupts are used for external devices such as ISA, PCI, etc. */ #define APIC_IO_INTS (IDT_IO_INTS + 16) #define APIC_NUM_IOINTS 191 /* The timer interrupt is used for clock handling and drives hardclock, etc. */ #define APIC_TIMER_INT (APIC_IO_INTS + APIC_NUM_IOINTS) /* ********************* !!! WARNING !!! ****************************** * Each local apic has an interrupt receive fifo that is two entries deep * for each interrupt priority class (higher 4 bits of interrupt vector). * Once the fifo is full the APIC can no longer receive interrupts for this * class and sending IPIs from other CPUs will be blocked. * To avoid deadlocks there should be no more than two IPI interrupts * pending at the same time. * Currently this is guaranteed by dividing the IPIs in two groups that have * each at most one IPI interrupt pending. The first group is protected by the * smp_ipi_mtx and waits for the completion of the IPI (Only one IPI user * at a time) The second group uses a single interrupt and a bitmap to avoid * redundant IPI interrupts. */ /* Interrupts for local APIC LVT entries other than the timer. */ #define APIC_LOCAL_INTS 240 #define APIC_ERROR_INT APIC_LOCAL_INTS #define APIC_THERMAL_INT (APIC_LOCAL_INTS + 1) #define APIC_CMC_INT (APIC_LOCAL_INTS + 2) #define APIC_IPI_INTS (APIC_LOCAL_INTS + 3) #define IPI_RENDEZVOUS (APIC_IPI_INTS) /* Inter-CPU rendezvous. */ #define IPI_INVLTLB (APIC_IPI_INTS + 1) /* TLB Shootdown IPIs */ #define IPI_INVLPG (APIC_IPI_INTS + 2) #define IPI_INVLRNG (APIC_IPI_INTS + 3) #define IPI_INVLCACHE (APIC_IPI_INTS + 4) /* Vector to handle bitmap based IPIs */ #define IPI_BITMAP_VECTOR (APIC_IPI_INTS + 5) /* IPIs handled by IPI_BITMAP_VECTOR */ #define IPI_AST 0 /* Generate software trap. */ #define IPI_PREEMPT 1 #define IPI_HARDCLOCK 2 #define IPI_BITMAP_LAST IPI_HARDCLOCK #define IPI_IS_BITMAPED(x) ((x) <= IPI_BITMAP_LAST) #define IPI_STOP (APIC_IPI_INTS + 6) /* Stop CPU until restarted. */ #define IPI_SUSPEND (APIC_IPI_INTS + 7) /* Suspend CPU until restarted. */ #ifdef __i386__ #define IPI_LAZYPMAP (APIC_IPI_INTS + 8) /* Lazy pmap release. */ #define IPI_DYN_FIRST (APIC_IPI_INTS + 9) #else #define IPI_DYN_FIRST (APIC_IPI_INTS + 8) #endif #define IPI_DYN_LAST (253) /* IPIs allocated at runtime */ /* * IPI_STOP_HARD does not need to occupy a slot in the IPI vector space since * it is delivered using an NMI anyways. */ #define IPI_NMI_FIRST 254 #define IPI_TRACE 254 /* Interrupt for tracing. */ #define IPI_STOP_HARD 255 /* Stop CPU with a NMI. */ /* * The spurious interrupt can share the priority class with the IPIs since * it is not a normal interrupt. (Does not use the APIC's interrupt fifo) */ #define APIC_SPURIOUS_INT 255 #ifndef LOCORE #define APIC_IPI_DEST_SELF -1 #define APIC_IPI_DEST_ALL -2 #define APIC_IPI_DEST_OTHERS -3 #define APIC_BUS_UNKNOWN -1 #define APIC_BUS_ISA 0 #define APIC_BUS_EISA 1 #define APIC_BUS_PCI 2 #define APIC_BUS_MAX APIC_BUS_PCI #define IRQ_EXTINT (NUM_IO_INTS + 1) #define IRQ_NMI (NUM_IO_INTS + 2) #define IRQ_SMI (NUM_IO_INTS + 3) #define IRQ_DISABLED (NUM_IO_INTS + 4) /* * An APIC enumerator is a psuedo bus driver that enumerates APIC's including * CPU's and I/O APIC's. */ struct apic_enumerator { const char *apic_name; int (*apic_probe)(void); int (*apic_probe_cpus)(void); int (*apic_setup_local)(void); int (*apic_setup_io)(void); SLIST_ENTRY(apic_enumerator) apic_next; }; inthand_t IDTVEC(apic_isr1), IDTVEC(apic_isr2), IDTVEC(apic_isr3), IDTVEC(apic_isr4), IDTVEC(apic_isr5), IDTVEC(apic_isr6), IDTVEC(apic_isr7), IDTVEC(cmcint), IDTVEC(errorint), IDTVEC(spuriousint), IDTVEC(timerint); extern vm_paddr_t lapic_paddr; extern int *apic_cpuids; void apic_register_enumerator(struct apic_enumerator *enumerator); void *ioapic_create(vm_paddr_t addr, int32_t apic_id, int intbase); int ioapic_disable_pin(void *cookie, u_int pin); int ioapic_get_vector(void *cookie, u_int pin); void ioapic_register(void *cookie); int ioapic_remap_vector(void *cookie, u_int pin, int vector); int ioapic_set_bus(void *cookie, u_int pin, int bus_type); int ioapic_set_extint(void *cookie, u_int pin); int ioapic_set_nmi(void *cookie, u_int pin); int ioapic_set_polarity(void *cookie, u_int pin, enum intr_polarity pol); int ioapic_set_triggermode(void *cookie, u_int pin, enum intr_trigger trigger); int ioapic_set_smi(void *cookie, u_int pin); /* * Struct containing pointers to APIC functions whose * implementation is run time selectable. */ struct apic_ops { void (*create)(u_int, int); void (*init)(vm_paddr_t); void (*xapic_mode)(void); bool (*is_x2apic)(void); void (*setup)(int); void (*dump)(const char *); void (*disable)(void); void (*eoi)(void); int (*id)(void); int (*intr_pending)(u_int); void (*set_logical_id)(u_int, u_int, u_int); u_int (*cpuid)(u_int); /* Vectors */ u_int (*alloc_vector)(u_int, u_int); u_int (*alloc_vectors)(u_int, u_int *, u_int, u_int); void (*enable_vector)(u_int, u_int); void (*disable_vector)(u_int, u_int); void (*free_vector)(u_int, u_int, u_int); /* PMC */ int (*enable_pmc)(void); void (*disable_pmc)(void); void (*reenable_pmc)(void); /* CMC */ void (*enable_cmc)(void); /* AMD ELVT */ int (*enable_mca_elvt)(void); /* IPI */ void (*ipi_raw)(register_t, u_int); void (*ipi_vectored)(u_int, int); int (*ipi_wait)(int); int (*ipi_alloc)(inthand_t *ipifunc); void (*ipi_free)(int vector); /* LVT */ int (*set_lvt_mask)(u_int, u_int, u_char); int (*set_lvt_mode)(u_int, u_int, u_int32_t); int (*set_lvt_polarity)(u_int, u_int, enum intr_polarity); int (*set_lvt_triggermode)(u_int, u_int, enum intr_trigger); }; extern struct apic_ops apic_ops; static inline void lapic_create(u_int apic_id, int boot_cpu) { apic_ops.create(apic_id, boot_cpu); } static inline void lapic_init(vm_paddr_t addr) { apic_ops.init(addr); } static inline void lapic_xapic_mode(void) { apic_ops.xapic_mode(); } static inline bool lapic_is_x2apic(void) { return (apic_ops.is_x2apic()); } static inline void lapic_setup(int boot) { apic_ops.setup(boot); } static inline void lapic_dump(const char *str) { apic_ops.dump(str); } static inline void lapic_disable(void) { apic_ops.disable(); } static inline void lapic_eoi(void) { apic_ops.eoi(); } static inline int lapic_id(void) { return (apic_ops.id()); } static inline int lapic_intr_pending(u_int vector) { return (apic_ops.intr_pending(vector)); } /* XXX: UNUSED */ static inline void lapic_set_logical_id(u_int apic_id, u_int cluster, u_int cluster_id) { apic_ops.set_logical_id(apic_id, cluster, cluster_id); } static inline u_int apic_cpuid(u_int apic_id) { return (apic_ops.cpuid(apic_id)); } static inline u_int apic_alloc_vector(u_int apic_id, u_int irq) { return (apic_ops.alloc_vector(apic_id, irq)); } static inline u_int apic_alloc_vectors(u_int apic_id, u_int *irqs, u_int count, u_int align) { return (apic_ops.alloc_vectors(apic_id, irqs, count, align)); } static inline void apic_enable_vector(u_int apic_id, u_int vector) { apic_ops.enable_vector(apic_id, vector); } static inline void apic_disable_vector(u_int apic_id, u_int vector) { apic_ops.disable_vector(apic_id, vector); } static inline void apic_free_vector(u_int apic_id, u_int vector, u_int irq) { apic_ops.free_vector(apic_id, vector, irq); } static inline int lapic_enable_pmc(void) { return (apic_ops.enable_pmc()); } static inline void lapic_disable_pmc(void) { apic_ops.disable_pmc(); } static inline void lapic_reenable_pmc(void) { apic_ops.reenable_pmc(); } static inline void lapic_enable_cmc(void) { apic_ops.enable_cmc(); } static inline int lapic_enable_mca_elvt(void) { return (apic_ops.enable_mca_elvt()); } static inline void lapic_ipi_raw(register_t icrlo, u_int dest) { apic_ops.ipi_raw(icrlo, dest); } static inline void lapic_ipi_vectored(u_int vector, int dest) { apic_ops.ipi_vectored(vector, dest); } static inline int lapic_ipi_wait(int delay) { return (apic_ops.ipi_wait(delay)); } static inline int lapic_ipi_alloc(inthand_t *ipifunc) { return (apic_ops.ipi_alloc(ipifunc)); } static inline void lapic_ipi_free(int vector) { return (apic_ops.ipi_free(vector)); } static inline int lapic_set_lvt_mask(u_int apic_id, u_int lvt, u_char masked) { return (apic_ops.set_lvt_mask(apic_id, lvt, masked)); } static inline int lapic_set_lvt_mode(u_int apic_id, u_int lvt, u_int32_t mode) { return (apic_ops.set_lvt_mode(apic_id, lvt, mode)); } static inline int lapic_set_lvt_polarity(u_int apic_id, u_int lvt, enum intr_polarity pol) { return (apic_ops.set_lvt_polarity(apic_id, lvt, pol)); } static inline int lapic_set_lvt_triggermode(u_int apic_id, u_int lvt, enum intr_trigger trigger) { return (apic_ops.set_lvt_triggermode(apic_id, lvt, trigger)); } void lapic_handle_cmc(void); void lapic_handle_error(void); void lapic_handle_intr(int vector, struct trapframe *frame); void lapic_handle_timer(struct trapframe *frame); int ioapic_get_rid(u_int apic_id, uint16_t *ridp); extern int x2apic_mode; extern int lapic_eoi_suppression; #ifdef _SYS_SYSCTL_H_ SYSCTL_DECL(_hw_apic); #endif #endif /* !LOCORE */ #endif /* _X86_APICVAR_H_ */ Index: head/sys/x86/include/bus.h =================================================================== --- head/sys/x86/include/bus.h (revision 326262) +++ head/sys/x86/include/bus.h (revision 326263) @@ -1,1102 +1,1104 @@ /*- + * SPDX-License-Identifier: BSD-3-Clause AND BSD-2-Clause-NetBSDE + * * Copyright (c) KATO Takenori, 1999. * * All rights reserved. Unpublished rights reserved under the copyright * laws of Japan. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer as * the first lines of this file unmodified. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ /* $NetBSD: bus.h,v 1.12 1997/10/01 08:25:15 fvdl Exp $ */ /*- * Copyright (c) 1996, 1997 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, * NASA Ames Research Center. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /*- * Copyright (c) 1996 Charles M. Hannum. All rights reserved. * Copyright (c) 1996 Christopher G. Demetriou. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Christopher G. Demetriou * for the NetBSD Project. * 4. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef _X86_BUS_H_ #define _X86_BUS_H_ #include #include #ifndef __GNUCLIKE_ASM #error "no assembler code for your compiler" #endif /* * Values for the x86 bus space tag, not to be used directly by MI code. */ #define X86_BUS_SPACE_IO 0 /* space is i/o space */ #define X86_BUS_SPACE_MEM 1 /* space is mem space */ #define BUS_SPACE_MAXSIZE_24BIT 0xFFFFFF #define BUS_SPACE_MAXSIZE_32BIT 0xFFFFFFFF #define BUS_SPACE_MAXSIZE 0xFFFFFFFF #define BUS_SPACE_MAXADDR_24BIT 0xFFFFFF #define BUS_SPACE_MAXADDR_32BIT 0xFFFFFFFF #if defined(__amd64__) || defined(PAE) #define BUS_SPACE_MAXADDR 0xFFFFFFFFFFFFFFFFULL #else #define BUS_SPACE_MAXADDR 0xFFFFFFFF #endif #define BUS_SPACE_INVALID_DATA (~0) #define BUS_SPACE_UNRESTRICTED (~0) /* * Map a region of device bus space into CPU virtual address space. */ int bus_space_map(bus_space_tag_t tag, bus_addr_t addr, bus_size_t size, int flags, bus_space_handle_t *bshp); /* * Unmap a region of device bus space. */ void bus_space_unmap(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t size); /* * Get a new handle for a subregion of an already-mapped area of bus space. */ static __inline int bus_space_subregion(bus_space_tag_t t, bus_space_handle_t bsh, bus_size_t offset, bus_size_t size, bus_space_handle_t *nbshp); static __inline int bus_space_subregion(bus_space_tag_t t __unused, bus_space_handle_t bsh, bus_size_t offset, bus_size_t size __unused, bus_space_handle_t *nbshp) { *nbshp = bsh + offset; return (0); } /* * Allocate a region of memory that is accessible to devices in bus space. */ int bus_space_alloc(bus_space_tag_t t, bus_addr_t rstart, bus_addr_t rend, bus_size_t size, bus_size_t align, bus_size_t boundary, int flags, bus_addr_t *addrp, bus_space_handle_t *bshp); /* * Free a region of bus space accessible memory. */ static __inline void bus_space_free(bus_space_tag_t t, bus_space_handle_t bsh, bus_size_t size); static __inline void bus_space_free(bus_space_tag_t t __unused, bus_space_handle_t bsh __unused, bus_size_t size __unused) { } /* * Read a 1, 2, 4, or 8 byte quantity from bus space * described by tag/handle/offset. */ static __inline u_int8_t bus_space_read_1(bus_space_tag_t tag, bus_space_handle_t handle, bus_size_t offset); static __inline u_int16_t bus_space_read_2(bus_space_tag_t tag, bus_space_handle_t handle, bus_size_t offset); static __inline u_int32_t bus_space_read_4(bus_space_tag_t tag, bus_space_handle_t handle, bus_size_t offset); #ifdef __amd64__ static __inline uint64_t bus_space_read_8(bus_space_tag_t tag, bus_space_handle_t handle, bus_size_t offset); #endif static __inline u_int8_t bus_space_read_1(bus_space_tag_t tag, bus_space_handle_t handle, bus_size_t offset) { if (tag == X86_BUS_SPACE_IO) return (inb(handle + offset)); return (*(volatile u_int8_t *)(handle + offset)); } static __inline u_int16_t bus_space_read_2(bus_space_tag_t tag, bus_space_handle_t handle, bus_size_t offset) { if (tag == X86_BUS_SPACE_IO) return (inw(handle + offset)); return (*(volatile u_int16_t *)(handle + offset)); } static __inline u_int32_t bus_space_read_4(bus_space_tag_t tag, bus_space_handle_t handle, bus_size_t offset) { if (tag == X86_BUS_SPACE_IO) return (inl(handle + offset)); return (*(volatile u_int32_t *)(handle + offset)); } #ifdef __amd64__ static __inline uint64_t bus_space_read_8(bus_space_tag_t tag, bus_space_handle_t handle, bus_size_t offset) { if (tag == X86_BUS_SPACE_IO) /* No 8 byte IO space access on x86 */ return (BUS_SPACE_INVALID_DATA); return (*(volatile uint64_t *)(handle + offset)); } #endif /* * Read `count' 1, 2, 4, or 8 byte quantities from bus space * described by tag/handle/offset and copy into buffer provided. */ static __inline void bus_space_read_multi_1(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int8_t *addr, size_t count); static __inline void bus_space_read_multi_2(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int16_t *addr, size_t count); static __inline void bus_space_read_multi_4(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int32_t *addr, size_t count); static __inline void bus_space_read_multi_1(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int8_t *addr, size_t count) { if (tag == X86_BUS_SPACE_IO) insb(bsh + offset, addr, count); else { #ifdef __GNUCLIKE_ASM __asm __volatile(" \n\ cld \n\ 1: movb (%2),%%al \n\ stosb \n\ loop 1b" : "=D" (addr), "=c" (count) : "r" (bsh + offset), "0" (addr), "1" (count) : "%eax", "memory"); #endif } } static __inline void bus_space_read_multi_2(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int16_t *addr, size_t count) { if (tag == X86_BUS_SPACE_IO) insw(bsh + offset, addr, count); else { #ifdef __GNUCLIKE_ASM __asm __volatile(" \n\ cld \n\ 1: movw (%2),%%ax \n\ stosw \n\ loop 1b" : "=D" (addr), "=c" (count) : "r" (bsh + offset), "0" (addr), "1" (count) : "%eax", "memory"); #endif } } static __inline void bus_space_read_multi_4(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int32_t *addr, size_t count) { if (tag == X86_BUS_SPACE_IO) insl(bsh + offset, addr, count); else { #ifdef __GNUCLIKE_ASM __asm __volatile(" \n\ cld \n\ 1: movl (%2),%%eax \n\ stosl \n\ loop 1b" : "=D" (addr), "=c" (count) : "r" (bsh + offset), "0" (addr), "1" (count) : "%eax", "memory"); #endif } } #if 0 /* Cause a link error for bus_space_read_multi_8 */ #define bus_space_read_multi_8 !!! bus_space_read_multi_8 unimplemented !!! #endif /* * Read `count' 1, 2, 4, or 8 byte quantities from bus space * described by tag/handle and starting at `offset' and copy into * buffer provided. */ static __inline void bus_space_read_region_1(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int8_t *addr, size_t count); static __inline void bus_space_read_region_2(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int16_t *addr, size_t count); static __inline void bus_space_read_region_4(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int32_t *addr, size_t count); static __inline void bus_space_read_region_1(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int8_t *addr, size_t count) { if (tag == X86_BUS_SPACE_IO) { int _port_ = bsh + offset; #ifdef __GNUCLIKE_ASM __asm __volatile(" \n\ cld \n\ 1: inb %w2,%%al \n\ stosb \n\ incl %2 \n\ loop 1b" : "=D" (addr), "=c" (count), "=d" (_port_) : "0" (addr), "1" (count), "2" (_port_) : "%eax", "memory", "cc"); #endif } else { bus_space_handle_t _port_ = bsh + offset; #ifdef __GNUCLIKE_ASM __asm __volatile(" \n\ cld \n\ repne \n\ movsb" : "=D" (addr), "=c" (count), "=S" (_port_) : "0" (addr), "1" (count), "2" (_port_) : "memory", "cc"); #endif } } static __inline void bus_space_read_region_2(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int16_t *addr, size_t count) { if (tag == X86_BUS_SPACE_IO) { int _port_ = bsh + offset; #ifdef __GNUCLIKE_ASM __asm __volatile(" \n\ cld \n\ 1: inw %w2,%%ax \n\ stosw \n\ addl $2,%2 \n\ loop 1b" : "=D" (addr), "=c" (count), "=d" (_port_) : "0" (addr), "1" (count), "2" (_port_) : "%eax", "memory", "cc"); #endif } else { bus_space_handle_t _port_ = bsh + offset; #ifdef __GNUCLIKE_ASM __asm __volatile(" \n\ cld \n\ repne \n\ movsw" : "=D" (addr), "=c" (count), "=S" (_port_) : "0" (addr), "1" (count), "2" (_port_) : "memory", "cc"); #endif } } static __inline void bus_space_read_region_4(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int32_t *addr, size_t count) { if (tag == X86_BUS_SPACE_IO) { int _port_ = bsh + offset; #ifdef __GNUCLIKE_ASM __asm __volatile(" \n\ cld \n\ 1: inl %w2,%%eax \n\ stosl \n\ addl $4,%2 \n\ loop 1b" : "=D" (addr), "=c" (count), "=d" (_port_) : "0" (addr), "1" (count), "2" (_port_) : "%eax", "memory", "cc"); #endif } else { bus_space_handle_t _port_ = bsh + offset; #ifdef __GNUCLIKE_ASM __asm __volatile(" \n\ cld \n\ repne \n\ movsl" : "=D" (addr), "=c" (count), "=S" (_port_) : "0" (addr), "1" (count), "2" (_port_) : "memory", "cc"); #endif } } #if 0 /* Cause a link error for bus_space_read_region_8 */ #define bus_space_read_region_8 !!! bus_space_read_region_8 unimplemented !!! #endif /* * Write the 1, 2, 4, or 8 byte value `value' to bus space * described by tag/handle/offset. */ static __inline void bus_space_write_1(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int8_t value); static __inline void bus_space_write_2(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int16_t value); static __inline void bus_space_write_4(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int32_t value); #ifdef __amd64__ static __inline void bus_space_write_8(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, uint64_t value); #endif static __inline void bus_space_write_1(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int8_t value) { if (tag == X86_BUS_SPACE_IO) outb(bsh + offset, value); else *(volatile u_int8_t *)(bsh + offset) = value; } static __inline void bus_space_write_2(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int16_t value) { if (tag == X86_BUS_SPACE_IO) outw(bsh + offset, value); else *(volatile u_int16_t *)(bsh + offset) = value; } static __inline void bus_space_write_4(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int32_t value) { if (tag == X86_BUS_SPACE_IO) outl(bsh + offset, value); else *(volatile u_int32_t *)(bsh + offset) = value; } #ifdef __amd64__ static __inline void bus_space_write_8(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, uint64_t value) { if (tag == X86_BUS_SPACE_IO) /* No 8 byte IO space access on x86 */ return; else *(volatile uint64_t *)(bsh + offset) = value; } #endif /* * Write `count' 1, 2, 4, or 8 byte quantities from the buffer * provided to bus space described by tag/handle/offset. */ static __inline void bus_space_write_multi_1(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, const u_int8_t *addr, size_t count); static __inline void bus_space_write_multi_2(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, const u_int16_t *addr, size_t count); static __inline void bus_space_write_multi_4(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, const u_int32_t *addr, size_t count); static __inline void bus_space_write_multi_1(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, const u_int8_t *addr, size_t count) { if (tag == X86_BUS_SPACE_IO) outsb(bsh + offset, addr, count); else { #ifdef __GNUCLIKE_ASM __asm __volatile(" \n\ cld \n\ 1: lodsb \n\ movb %%al,(%2) \n\ loop 1b" : "=S" (addr), "=c" (count) : "r" (bsh + offset), "0" (addr), "1" (count) : "%eax", "memory", "cc"); #endif } } static __inline void bus_space_write_multi_2(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, const u_int16_t *addr, size_t count) { if (tag == X86_BUS_SPACE_IO) outsw(bsh + offset, addr, count); else { #ifdef __GNUCLIKE_ASM __asm __volatile(" \n\ cld \n\ 1: lodsw \n\ movw %%ax,(%2) \n\ loop 1b" : "=S" (addr), "=c" (count) : "r" (bsh + offset), "0" (addr), "1" (count) : "%eax", "memory", "cc"); #endif } } static __inline void bus_space_write_multi_4(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, const u_int32_t *addr, size_t count) { if (tag == X86_BUS_SPACE_IO) outsl(bsh + offset, addr, count); else { #ifdef __GNUCLIKE_ASM __asm __volatile(" \n\ cld \n\ 1: lodsl \n\ movl %%eax,(%2) \n\ loop 1b" : "=S" (addr), "=c" (count) : "r" (bsh + offset), "0" (addr), "1" (count) : "%eax", "memory", "cc"); #endif } } #if 0 /* Cause a link error for bus_space_write_multi_8 */ #define bus_space_write_multi_8(t, h, o, a, c) \ !!! bus_space_write_multi_8 unimplemented !!! #endif /* * Write `count' 1, 2, 4, or 8 byte quantities from the buffer provided * to bus space described by tag/handle starting at `offset'. */ static __inline void bus_space_write_region_1(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, const u_int8_t *addr, size_t count); static __inline void bus_space_write_region_2(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, const u_int16_t *addr, size_t count); static __inline void bus_space_write_region_4(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, const u_int32_t *addr, size_t count); static __inline void bus_space_write_region_1(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, const u_int8_t *addr, size_t count) { if (tag == X86_BUS_SPACE_IO) { int _port_ = bsh + offset; #ifdef __GNUCLIKE_ASM __asm __volatile(" \n\ cld \n\ 1: lodsb \n\ outb %%al,%w0 \n\ incl %0 \n\ loop 1b" : "=d" (_port_), "=S" (addr), "=c" (count) : "0" (_port_), "1" (addr), "2" (count) : "%eax", "memory", "cc"); #endif } else { bus_space_handle_t _port_ = bsh + offset; #ifdef __GNUCLIKE_ASM __asm __volatile(" \n\ cld \n\ repne \n\ movsb" : "=D" (_port_), "=S" (addr), "=c" (count) : "0" (_port_), "1" (addr), "2" (count) : "memory", "cc"); #endif } } static __inline void bus_space_write_region_2(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, const u_int16_t *addr, size_t count) { if (tag == X86_BUS_SPACE_IO) { int _port_ = bsh + offset; #ifdef __GNUCLIKE_ASM __asm __volatile(" \n\ cld \n\ 1: lodsw \n\ outw %%ax,%w0 \n\ addl $2,%0 \n\ loop 1b" : "=d" (_port_), "=S" (addr), "=c" (count) : "0" (_port_), "1" (addr), "2" (count) : "%eax", "memory", "cc"); #endif } else { bus_space_handle_t _port_ = bsh + offset; #ifdef __GNUCLIKE_ASM __asm __volatile(" \n\ cld \n\ repne \n\ movsw" : "=D" (_port_), "=S" (addr), "=c" (count) : "0" (_port_), "1" (addr), "2" (count) : "memory", "cc"); #endif } } static __inline void bus_space_write_region_4(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, const u_int32_t *addr, size_t count) { if (tag == X86_BUS_SPACE_IO) { int _port_ = bsh + offset; #ifdef __GNUCLIKE_ASM __asm __volatile(" \n\ cld \n\ 1: lodsl \n\ outl %%eax,%w0 \n\ addl $4,%0 \n\ loop 1b" : "=d" (_port_), "=S" (addr), "=c" (count) : "0" (_port_), "1" (addr), "2" (count) : "%eax", "memory", "cc"); #endif } else { bus_space_handle_t _port_ = bsh + offset; #ifdef __GNUCLIKE_ASM __asm __volatile(" \n\ cld \n\ repne \n\ movsl" : "=D" (_port_), "=S" (addr), "=c" (count) : "0" (_port_), "1" (addr), "2" (count) : "memory", "cc"); #endif } } #if 0 /* Cause a link error for bus_space_write_region_8 */ #define bus_space_write_region_8 \ !!! bus_space_write_region_8 unimplemented !!! #endif /* * Write the 1, 2, 4, or 8 byte value `val' to bus space described * by tag/handle/offset `count' times. */ static __inline void bus_space_set_multi_1(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int8_t value, size_t count); static __inline void bus_space_set_multi_2(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int16_t value, size_t count); static __inline void bus_space_set_multi_4(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int32_t value, size_t count); static __inline void bus_space_set_multi_1(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int8_t value, size_t count) { bus_space_handle_t addr = bsh + offset; if (tag == X86_BUS_SPACE_IO) while (count--) outb(addr, value); else while (count--) *(volatile u_int8_t *)(addr) = value; } static __inline void bus_space_set_multi_2(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int16_t value, size_t count) { bus_space_handle_t addr = bsh + offset; if (tag == X86_BUS_SPACE_IO) while (count--) outw(addr, value); else while (count--) *(volatile u_int16_t *)(addr) = value; } static __inline void bus_space_set_multi_4(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int32_t value, size_t count) { bus_space_handle_t addr = bsh + offset; if (tag == X86_BUS_SPACE_IO) while (count--) outl(addr, value); else while (count--) *(volatile u_int32_t *)(addr) = value; } #if 0 /* Cause a link error for bus_space_set_multi_8 */ #define bus_space_set_multi_8 !!! bus_space_set_multi_8 unimplemented !!! #endif /* * Write `count' 1, 2, 4, or 8 byte value `val' to bus space described * by tag/handle starting at `offset'. */ static __inline void bus_space_set_region_1(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int8_t value, size_t count); static __inline void bus_space_set_region_2(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int16_t value, size_t count); static __inline void bus_space_set_region_4(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int32_t value, size_t count); static __inline void bus_space_set_region_1(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int8_t value, size_t count) { bus_space_handle_t addr = bsh + offset; if (tag == X86_BUS_SPACE_IO) for (; count != 0; count--, addr++) outb(addr, value); else for (; count != 0; count--, addr++) *(volatile u_int8_t *)(addr) = value; } static __inline void bus_space_set_region_2(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int16_t value, size_t count) { bus_space_handle_t addr = bsh + offset; if (tag == X86_BUS_SPACE_IO) for (; count != 0; count--, addr += 2) outw(addr, value); else for (; count != 0; count--, addr += 2) *(volatile u_int16_t *)(addr) = value; } static __inline void bus_space_set_region_4(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int32_t value, size_t count) { bus_space_handle_t addr = bsh + offset; if (tag == X86_BUS_SPACE_IO) for (; count != 0; count--, addr += 4) outl(addr, value); else for (; count != 0; count--, addr += 4) *(volatile u_int32_t *)(addr) = value; } #if 0 /* Cause a link error for bus_space_set_region_8 */ #define bus_space_set_region_8 !!! bus_space_set_region_8 unimplemented !!! #endif /* * Copy `count' 1, 2, 4, or 8 byte values from bus space starting * at tag/bsh1/off1 to bus space starting at tag/bsh2/off2. */ static __inline void bus_space_copy_region_1(bus_space_tag_t tag, bus_space_handle_t bsh1, bus_size_t off1, bus_space_handle_t bsh2, bus_size_t off2, size_t count); static __inline void bus_space_copy_region_2(bus_space_tag_t tag, bus_space_handle_t bsh1, bus_size_t off1, bus_space_handle_t bsh2, bus_size_t off2, size_t count); static __inline void bus_space_copy_region_4(bus_space_tag_t tag, bus_space_handle_t bsh1, bus_size_t off1, bus_space_handle_t bsh2, bus_size_t off2, size_t count); static __inline void bus_space_copy_region_1(bus_space_tag_t tag, bus_space_handle_t bsh1, bus_size_t off1, bus_space_handle_t bsh2, bus_size_t off2, size_t count) { bus_space_handle_t addr1 = bsh1 + off1; bus_space_handle_t addr2 = bsh2 + off2; if (tag == X86_BUS_SPACE_IO) { if (addr1 >= addr2) { /* src after dest: copy forward */ for (; count != 0; count--, addr1++, addr2++) outb(addr2, inb(addr1)); } else { /* dest after src: copy backwards */ for (addr1 += (count - 1), addr2 += (count - 1); count != 0; count--, addr1--, addr2--) outb(addr2, inb(addr1)); } } else { if (addr1 >= addr2) { /* src after dest: copy forward */ for (; count != 0; count--, addr1++, addr2++) *(volatile u_int8_t *)(addr2) = *(volatile u_int8_t *)(addr1); } else { /* dest after src: copy backwards */ for (addr1 += (count - 1), addr2 += (count - 1); count != 0; count--, addr1--, addr2--) *(volatile u_int8_t *)(addr2) = *(volatile u_int8_t *)(addr1); } } } static __inline void bus_space_copy_region_2(bus_space_tag_t tag, bus_space_handle_t bsh1, bus_size_t off1, bus_space_handle_t bsh2, bus_size_t off2, size_t count) { bus_space_handle_t addr1 = bsh1 + off1; bus_space_handle_t addr2 = bsh2 + off2; if (tag == X86_BUS_SPACE_IO) { if (addr1 >= addr2) { /* src after dest: copy forward */ for (; count != 0; count--, addr1 += 2, addr2 += 2) outw(addr2, inw(addr1)); } else { /* dest after src: copy backwards */ for (addr1 += 2 * (count - 1), addr2 += 2 * (count - 1); count != 0; count--, addr1 -= 2, addr2 -= 2) outw(addr2, inw(addr1)); } } else { if (addr1 >= addr2) { /* src after dest: copy forward */ for (; count != 0; count--, addr1 += 2, addr2 += 2) *(volatile u_int16_t *)(addr2) = *(volatile u_int16_t *)(addr1); } else { /* dest after src: copy backwards */ for (addr1 += 2 * (count - 1), addr2 += 2 * (count - 1); count != 0; count--, addr1 -= 2, addr2 -= 2) *(volatile u_int16_t *)(addr2) = *(volatile u_int16_t *)(addr1); } } } static __inline void bus_space_copy_region_4(bus_space_tag_t tag, bus_space_handle_t bsh1, bus_size_t off1, bus_space_handle_t bsh2, bus_size_t off2, size_t count) { bus_space_handle_t addr1 = bsh1 + off1; bus_space_handle_t addr2 = bsh2 + off2; if (tag == X86_BUS_SPACE_IO) { if (addr1 >= addr2) { /* src after dest: copy forward */ for (; count != 0; count--, addr1 += 4, addr2 += 4) outl(addr2, inl(addr1)); } else { /* dest after src: copy backwards */ for (addr1 += 4 * (count - 1), addr2 += 4 * (count - 1); count != 0; count--, addr1 -= 4, addr2 -= 4) outl(addr2, inl(addr1)); } } else { if (addr1 >= addr2) { /* src after dest: copy forward */ for (; count != 0; count--, addr1 += 4, addr2 += 4) *(volatile u_int32_t *)(addr2) = *(volatile u_int32_t *)(addr1); } else { /* dest after src: copy backwards */ for (addr1 += 4 * (count - 1), addr2 += 4 * (count - 1); count != 0; count--, addr1 -= 4, addr2 -= 4) *(volatile u_int32_t *)(addr2) = *(volatile u_int32_t *)(addr1); } } } #if 0 /* Cause a link error for bus_space_copy_8 */ #define bus_space_copy_region_8 !!! bus_space_copy_region_8 unimplemented !!! #endif /* * Bus read/write barrier methods. * * void bus_space_barrier(bus_space_tag_t tag, bus_space_handle_t bsh, * bus_size_t offset, bus_size_t len, int flags); * * * Note that BUS_SPACE_BARRIER_WRITE doesn't do anything other than * prevent reordering by the compiler; all Intel x86 processors currently * retire operations outside the CPU in program order. */ #define BUS_SPACE_BARRIER_READ 0x01 /* force read barrier */ #define BUS_SPACE_BARRIER_WRITE 0x02 /* force write barrier */ static __inline void bus_space_barrier(bus_space_tag_t tag __unused, bus_space_handle_t bsh __unused, bus_size_t offset __unused, bus_size_t len __unused, int flags) { #ifdef __GNUCLIKE_ASM if (flags & BUS_SPACE_BARRIER_READ) #ifdef __amd64__ __asm __volatile("lock; addl $0,0(%%rsp)" : : : "memory"); #else __asm __volatile("lock; addl $0,0(%%esp)" : : : "memory"); #endif else __compiler_membar(); #endif } #ifdef BUS_SPACE_NO_LEGACY #undef inb #undef outb #define inb(a) compiler_error #define inw(a) compiler_error #define inl(a) compiler_error #define outb(a, b) compiler_error #define outw(a, b) compiler_error #define outl(a, b) compiler_error #endif #include /* * Stream accesses are the same as normal accesses on x86; there are no * supported bus systems with an endianess different from the host one. */ #define bus_space_read_stream_1(t, h, o) bus_space_read_1((t), (h), (o)) #define bus_space_read_stream_2(t, h, o) bus_space_read_2((t), (h), (o)) #define bus_space_read_stream_4(t, h, o) bus_space_read_4((t), (h), (o)) #define bus_space_read_multi_stream_1(t, h, o, a, c) \ bus_space_read_multi_1((t), (h), (o), (a), (c)) #define bus_space_read_multi_stream_2(t, h, o, a, c) \ bus_space_read_multi_2((t), (h), (o), (a), (c)) #define bus_space_read_multi_stream_4(t, h, o, a, c) \ bus_space_read_multi_4((t), (h), (o), (a), (c)) #define bus_space_write_stream_1(t, h, o, v) \ bus_space_write_1((t), (h), (o), (v)) #define bus_space_write_stream_2(t, h, o, v) \ bus_space_write_2((t), (h), (o), (v)) #define bus_space_write_stream_4(t, h, o, v) \ bus_space_write_4((t), (h), (o), (v)) #define bus_space_write_multi_stream_1(t, h, o, a, c) \ bus_space_write_multi_1((t), (h), (o), (a), (c)) #define bus_space_write_multi_stream_2(t, h, o, a, c) \ bus_space_write_multi_2((t), (h), (o), (a), (c)) #define bus_space_write_multi_stream_4(t, h, o, a, c) \ bus_space_write_multi_4((t), (h), (o), (a), (c)) #define bus_space_set_multi_stream_1(t, h, o, v, c) \ bus_space_set_multi_1((t), (h), (o), (v), (c)) #define bus_space_set_multi_stream_2(t, h, o, v, c) \ bus_space_set_multi_2((t), (h), (o), (v), (c)) #define bus_space_set_multi_stream_4(t, h, o, v, c) \ bus_space_set_multi_4((t), (h), (o), (v), (c)) #define bus_space_read_region_stream_1(t, h, o, a, c) \ bus_space_read_region_1((t), (h), (o), (a), (c)) #define bus_space_read_region_stream_2(t, h, o, a, c) \ bus_space_read_region_2((t), (h), (o), (a), (c)) #define bus_space_read_region_stream_4(t, h, o, a, c) \ bus_space_read_region_4((t), (h), (o), (a), (c)) #define bus_space_write_region_stream_1(t, h, o, a, c) \ bus_space_write_region_1((t), (h), (o), (a), (c)) #define bus_space_write_region_stream_2(t, h, o, a, c) \ bus_space_write_region_2((t), (h), (o), (a), (c)) #define bus_space_write_region_stream_4(t, h, o, a, c) \ bus_space_write_region_4((t), (h), (o), (a), (c)) #define bus_space_set_region_stream_1(t, h, o, v, c) \ bus_space_set_region_1((t), (h), (o), (v), (c)) #define bus_space_set_region_stream_2(t, h, o, v, c) \ bus_space_set_region_2((t), (h), (o), (v), (c)) #define bus_space_set_region_stream_4(t, h, o, v, c) \ bus_space_set_region_4((t), (h), (o), (v), (c)) #define bus_space_copy_region_stream_1(t, h1, o1, h2, o2, c) \ bus_space_copy_region_1((t), (h1), (o1), (h2), (o2), (c)) #define bus_space_copy_region_stream_2(t, h1, o1, h2, o2, c) \ bus_space_copy_region_2((t), (h1), (o1), (h2), (o2), (c)) #define bus_space_copy_region_stream_4(t, h1, o1, h2, o2, c) \ bus_space_copy_region_4((t), (h1), (o1), (h2), (o2), (c)) #endif /* _X86_BUS_H_ */ Index: head/sys/x86/include/busdma_impl.h =================================================================== --- head/sys/x86/include/busdma_impl.h (revision 326262) +++ head/sys/x86/include/busdma_impl.h (revision 326263) @@ -1,96 +1,98 @@ /*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * * Copyright (c) 2013 The FreeBSD Foundation * All rights reserved. * * This software was developed by Konstantin Belousov * under sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef __X86_BUSDMA_IMPL_H #define __X86_BUSDMA_IMPL_H struct bus_dma_tag_common { struct bus_dma_impl *impl; struct bus_dma_tag_common *parent; bus_size_t alignment; bus_addr_t boundary; bus_addr_t lowaddr; bus_addr_t highaddr; bus_dma_filter_t *filter; void *filterarg; bus_size_t maxsize; u_int nsegments; bus_size_t maxsegsz; int flags; bus_dma_lock_t *lockfunc; void *lockfuncarg; int ref_count; }; struct bus_dma_impl { int (*tag_create)(bus_dma_tag_t parent, bus_size_t alignment, bus_addr_t boundary, bus_addr_t lowaddr, bus_addr_t highaddr, bus_dma_filter_t *filter, void *filterarg, bus_size_t maxsize, int nsegments, bus_size_t maxsegsz, int flags, bus_dma_lock_t *lockfunc, void *lockfuncarg, bus_dma_tag_t *dmat); int (*tag_destroy)(bus_dma_tag_t dmat); int (*map_create)(bus_dma_tag_t dmat, int flags, bus_dmamap_t *mapp); int (*map_destroy)(bus_dma_tag_t dmat, bus_dmamap_t map); int (*mem_alloc)(bus_dma_tag_t dmat, void** vaddr, int flags, bus_dmamap_t *mapp); void (*mem_free)(bus_dma_tag_t dmat, void *vaddr, bus_dmamap_t map); int (*load_ma)(bus_dma_tag_t dmat, bus_dmamap_t map, struct vm_page **ma, bus_size_t tlen, int ma_offs, int flags, bus_dma_segment_t *segs, int *segp); int (*load_phys)(bus_dma_tag_t dmat, bus_dmamap_t map, vm_paddr_t buf, bus_size_t buflen, int flags, bus_dma_segment_t *segs, int *segp); int (*load_buffer)(bus_dma_tag_t dmat, bus_dmamap_t map, void *buf, bus_size_t buflen, struct pmap *pmap, int flags, bus_dma_segment_t *segs, int *segp); void (*map_waitok)(bus_dma_tag_t dmat, bus_dmamap_t map, struct memdesc *mem, bus_dmamap_callback_t *callback, void *callback_arg); bus_dma_segment_t *(*map_complete)(bus_dma_tag_t dmat, bus_dmamap_t map, bus_dma_segment_t *segs, int nsegs, int error); void (*map_unload)(bus_dma_tag_t dmat, bus_dmamap_t map); void (*map_sync)(bus_dma_tag_t dmat, bus_dmamap_t map, bus_dmasync_op_t op); }; void bus_dma_dflt_lock(void *arg, bus_dma_lock_op_t op); int bus_dma_run_filter(struct bus_dma_tag_common *dmat, bus_addr_t paddr); int common_bus_dma_tag_create(struct bus_dma_tag_common *parent, bus_size_t alignment, bus_addr_t boundary, bus_addr_t lowaddr, bus_addr_t highaddr, bus_dma_filter_t *filter, void *filterarg, bus_size_t maxsize, int nsegments, bus_size_t maxsegsz, int flags, bus_dma_lock_t *lockfunc, void *lockfuncarg, size_t sz, void **dmat); extern struct bus_dma_impl bus_dma_bounce_impl; #endif Index: head/sys/x86/include/elf.h =================================================================== --- head/sys/x86/include/elf.h (revision 326262) +++ head/sys/x86/include/elf.h (revision 326263) @@ -1,221 +1,223 @@ /*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * * Copyright (c) 1996-1997 John D. Polstra. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _MACHINE_ELF_H_ #define _MACHINE_ELF_H_ 1 #if defined(__i386__) || defined(_MACHINE_ELF_WANT_32BIT) /* * ELF definitions for the i386 architecture. */ #include /* Definitions common to all 32 bit architectures. */ #if defined(__ELF_WORD_SIZE) && __ELF_WORD_SIZE == 64 #include /* Definitions common to all 64 bit architectures. */ #endif #ifndef __ELF_WORD_SIZE #define __ELF_WORD_SIZE 32 /* Used by */ #endif #include #define ELF_ARCH EM_386 #define ELF_MACHINE_OK(x) ((x) == EM_386 || (x) == EM_486) /* * Auxiliary vector entries for passing information to the interpreter. * * The i386 supplement to the SVR4 ABI specification names this "auxv_t", * but POSIX lays claim to all symbols ending with "_t". */ typedef struct { /* Auxiliary vector entry on initial stack */ int a_type; /* Entry type. */ union { long a_val; /* Integer value. */ void *a_ptr; /* Address. */ void (*a_fcn)(void); /* Function pointer (not used). */ } a_un; } Elf32_Auxinfo; #if __ELF_WORD_SIZE == 64 /* Fake for amd64 loader support */ typedef struct { int fake; } Elf64_Auxinfo; #endif __ElfType(Auxinfo); /* Values for a_type. */ #define AT_NULL 0 /* Terminates the vector. */ #define AT_IGNORE 1 /* Ignored entry. */ #define AT_EXECFD 2 /* File descriptor of program to load. */ #define AT_PHDR 3 /* Program header of program already loaded. */ #define AT_PHENT 4 /* Size of each program header entry. */ #define AT_PHNUM 5 /* Number of program header entries. */ #define AT_PAGESZ 6 /* Page size in bytes. */ #define AT_BASE 7 /* Interpreter's base address. */ #define AT_FLAGS 8 /* Flags (unused for i386). */ #define AT_ENTRY 9 /* Where interpreter should transfer control. */ #define AT_NOTELF 10 /* Program is not ELF ?? */ #define AT_UID 11 /* Real uid. */ #define AT_EUID 12 /* Effective uid. */ #define AT_GID 13 /* Real gid. */ #define AT_EGID 14 /* Effective gid. */ #define AT_EXECPATH 15 /* Path to the executable. */ #define AT_CANARY 16 /* Canary for SSP. */ #define AT_CANARYLEN 17 /* Length of the canary. */ #define AT_OSRELDATE 18 /* OSRELDATE. */ #define AT_NCPUS 19 /* Number of CPUs. */ #define AT_PAGESIZES 20 /* Pagesizes. */ #define AT_PAGESIZESLEN 21 /* Number of pagesizes. */ #define AT_TIMEKEEP 22 /* Pointer to timehands. */ #define AT_STACKPROT 23 /* Initial stack protection. */ #define AT_EHDRFLAGS 24 /* e_flags field from elf hdr */ #define AT_HWCAP 25 /* CPU feature flags. */ #define AT_HWCAP2 26 /* CPU feature flags 2. */ #define AT_COUNT 27 /* Count of defined aux entry types. */ /* * Relocation types. */ #define R_386_COUNT 38 /* Count of defined relocation types. */ /* Define "machine" characteristics */ #define ELF_TARG_CLASS ELFCLASS32 #define ELF_TARG_DATA ELFDATA2LSB #define ELF_TARG_MACH EM_386 #define ELF_TARG_VER 1 #define ET_DYN_LOAD_ADDR 0x01001000 #elif defined(__amd64__) /* * ELF definitions for the AMD64 architecture. */ #ifndef __ELF_WORD_SIZE #define __ELF_WORD_SIZE 64 /* Used by */ #endif #include /* Definitions common to all 32 bit architectures. */ #include /* Definitions common to all 64 bit architectures. */ #include #define ELF_ARCH EM_X86_64 #define ELF_ARCH32 EM_386 #define ELF_MACHINE_OK(x) ((x) == EM_X86_64) /* * Auxiliary vector entries for passing information to the interpreter. * * The i386 supplement to the SVR4 ABI specification names this "auxv_t", * but POSIX lays claim to all symbols ending with "_t". */ typedef struct { /* Auxiliary vector entry on initial stack */ int a_type; /* Entry type. */ union { int a_val; /* Integer value. */ } a_un; } Elf32_Auxinfo; typedef struct { /* Auxiliary vector entry on initial stack */ long a_type; /* Entry type. */ union { long a_val; /* Integer value. */ void *a_ptr; /* Address. */ void (*a_fcn)(void); /* Function pointer (not used). */ } a_un; } Elf64_Auxinfo; __ElfType(Auxinfo); /* Values for a_type. */ #define AT_NULL 0 /* Terminates the vector. */ #define AT_IGNORE 1 /* Ignored entry. */ #define AT_EXECFD 2 /* File descriptor of program to load. */ #define AT_PHDR 3 /* Program header of program already loaded. */ #define AT_PHENT 4 /* Size of each program header entry. */ #define AT_PHNUM 5 /* Number of program header entries. */ #define AT_PAGESZ 6 /* Page size in bytes. */ #define AT_BASE 7 /* Interpreter's base address. */ #define AT_FLAGS 8 /* Flags (unused for i386). */ #define AT_ENTRY 9 /* Where interpreter should transfer control. */ #define AT_NOTELF 10 /* Program is not ELF ?? */ #define AT_UID 11 /* Real uid. */ #define AT_EUID 12 /* Effective uid. */ #define AT_GID 13 /* Real gid. */ #define AT_EGID 14 /* Effective gid. */ #define AT_EXECPATH 15 /* Path to the executable. */ #define AT_CANARY 16 /* Canary for SSP */ #define AT_CANARYLEN 17 /* Length of the canary. */ #define AT_OSRELDATE 18 /* OSRELDATE. */ #define AT_NCPUS 19 /* Number of CPUs. */ #define AT_PAGESIZES 20 /* Pagesizes. */ #define AT_PAGESIZESLEN 21 /* Number of pagesizes. */ #define AT_TIMEKEEP 22 /* Pointer to timehands. */ #define AT_STACKPROT 23 /* Initial stack protection. */ #define AT_EHDRFLAGS 24 /* e_flags field from elf hdr */ #define AT_HWCAP 25 /* CPU feature flags. */ #define AT_HWCAP2 26 /* CPU feature flags 2. */ #define AT_COUNT 27 /* Count of defined aux entry types. */ /* * Relocation types. */ #define R_X86_64_COUNT 24 /* Count of defined relocation types. */ /* Define "machine" characteristics */ #if __ELF_WORD_SIZE == 32 #define ELF_TARG_CLASS ELFCLASS32 #else #define ELF_TARG_CLASS ELFCLASS64 #endif #define ELF_TARG_DATA ELFDATA2LSB #define ELF_TARG_MACH EM_X86_64 #define ELF_TARG_VER 1 #if __ELF_WORD_SIZE == 32 #define ET_DYN_LOAD_ADDR 0x01001000 #else #define ET_DYN_LOAD_ADDR 0x01021000 #endif #endif /* __i386__, __amd64__ */ #endif /* !_MACHINE_ELF_H_ */ Index: head/sys/x86/include/fdt.h =================================================================== --- head/sys/x86/include/fdt.h (revision 326262) +++ head/sys/x86/include/fdt.h (revision 326263) @@ -1,36 +1,38 @@ /*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * * Copyright (c) 2013 Juniper Networks, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _MACHINE_FDT_H_ #define _MACHINE_FDT_H_ __BEGIN_DECLS int x86_init_fdt(void); __END_DECLS #endif /* _MACHINE_FDT_H_ */ Index: head/sys/x86/include/init.h =================================================================== --- head/sys/x86/include/init.h (revision 326262) +++ head/sys/x86/include/init.h (revision 326263) @@ -1,58 +1,60 @@ /*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * * Copyright (c) 2013 Roger Pau MonnĂ© * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef __X86_INIT_H__ #define __X86_INIT_H__ /* * Struct containing pointers to init functions whose * implementation is run time selectable. Selection can be made, * for example, based on detection of a BIOS variant or * hypervisor environment. */ struct init_ops { caddr_t (*parse_preload_data)(u_int64_t); void (*early_clock_source_init)(void); void (*early_delay)(int); void (*parse_memmap)(caddr_t, vm_paddr_t *, int *); u_int (*mp_bootaddress)(u_int); int (*start_all_aps)(void); void (*msi_init)(void); }; extern struct init_ops init_ops; /* Knob to disable acpi_cpu devices */ extern bool acpi_cpu_disabled; /* Knob to disable acpi_hpet device */ extern bool acpi_hpet_disabled; /* Knob to disable acpi_timer device */ extern bool acpi_timer_disabled; #endif /* __X86_INIT_H__ */ Index: head/sys/x86/include/legacyvar.h =================================================================== --- head/sys/x86/include/legacyvar.h (revision 326262) +++ head/sys/x86/include/legacyvar.h (revision 326263) @@ -1,71 +1,73 @@ /*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * * Copyright (c) 2000 Peter Wemm * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _X86_LEGACYVAR_H_ #define _X86_LEGACYVAR_H_ enum legacy_device_ivars { LEGACY_IVAR_PCIDOMAIN, LEGACY_IVAR_PCIBUS, LEGACY_IVAR_PCISLOT, LEGACY_IVAR_PCIFUNC }; #define LEGACY_ACCESSOR(var, ivar, type) \ __BUS_ACCESSOR(legacy, var, LEGACY, ivar, type) LEGACY_ACCESSOR(pcidomain, PCIDOMAIN, uint32_t) LEGACY_ACCESSOR(pcibus, PCIBUS, uint32_t) LEGACY_ACCESSOR(pcislot, PCISLOT, int) LEGACY_ACCESSOR(pcifunc, PCIFUNC, int) #undef LEGACY_ACCESSOR int legacy_pcib_maxslots(device_t dev); uint32_t legacy_pcib_read_config(device_t dev, u_int bus, u_int slot, u_int func, u_int reg, int bytes); int legacy_pcib_read_ivar(device_t dev, device_t child, int which, uintptr_t *result); void legacy_pcib_write_config(device_t dev, u_int bus, u_int slot, u_int func, u_int reg, uint32_t data, int bytes); int legacy_pcib_write_ivar(device_t dev, device_t child, int which, uintptr_t value); struct resource *legacy_pcib_alloc_resource(device_t dev, device_t child, int type, int *rid, rman_res_t start, rman_res_t end, rman_res_t count, u_int flags); int legacy_pcib_adjust_resource(device_t dev, device_t child, int type, struct resource *r, rman_res_t start, rman_res_t end); int legacy_pcib_release_resource(device_t dev, device_t child, int type, int rid, struct resource *r); int legacy_pcib_alloc_msi(device_t pcib, device_t dev, int count, int maxcount, int *irqs); int legacy_pcib_alloc_msix(device_t pcib, device_t dev, int *irq); int legacy_pcib_map_msi(device_t pcib, device_t dev, int irq, uint64_t *addr, uint32_t *data); #endif /* !_X86_LEGACYVAR_H_ */ Index: head/sys/x86/include/mca.h =================================================================== --- head/sys/x86/include/mca.h (revision 326262) +++ head/sys/x86/include/mca.h (revision 326263) @@ -1,56 +1,58 @@ /*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * * Copyright (c) 2009 Hudson River Trading LLC * Written by: John H. Baldwin * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef __X86_MCA_H__ #define __X86_MCA_H__ struct mca_record { uint64_t mr_status; uint64_t mr_addr; uint64_t mr_misc; uint64_t mr_tsc; int mr_apic_id; int mr_bank; uint64_t mr_mcg_cap; uint64_t mr_mcg_status; int mr_cpu_id; int mr_cpu_vendor_id; int mr_cpu; }; #ifdef _KERNEL void cmc_intr(void); void mca_init(void); void mca_intr(void); void mca_resume(void); #endif #endif /* !__X86_MCA_H__ */ Index: head/sys/x86/include/mptable.h =================================================================== --- head/sys/x86/include/mptable.h (revision 326262) +++ head/sys/x86/include/mptable.h (revision 326263) @@ -1,204 +1,206 @@ /*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * * Copyright (c) 1996, by Steve Passe * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. The name of the developer may NOT be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef __MACHINE_MPTABLE_H__ #define __MACHINE_MPTABLE_H__ enum busTypes { NOBUS = 0, CBUS = 1, CBUSII = 2, EISA = 3, ISA = 6, MCA = 9, PCI = 13, XPRESS = 18, MAX_BUSTYPE = 18, UNKNOWN_BUSTYPE = 0xff }; /* MP Floating Pointer Structure */ typedef struct MPFPS { uint8_t signature[4]; uint32_t pap; uint8_t length; uint8_t spec_rev; uint8_t checksum; uint8_t config_type; uint8_t mpfb2; uint8_t mpfb3; uint8_t mpfb4; uint8_t mpfb5; } __packed *mpfps_t; #define MPFB2_IMCR_PRESENT 0x80 #define MPFB2_MUL_CLK_SRCS 0x40 /* MP Configuration Table Header */ typedef struct MPCTH { uint8_t signature[4]; uint16_t base_table_length; uint8_t spec_rev; uint8_t checksum; uint8_t oem_id[8]; uint8_t product_id[12]; uint32_t oem_table_pointer; uint16_t oem_table_size; uint16_t entry_count; uint32_t apic_address; uint16_t extended_table_length; uint8_t extended_table_checksum; uint8_t reserved; } __packed *mpcth_t; /* Base table entries */ #define MPCT_ENTRY_PROCESSOR 0 #define MPCT_ENTRY_BUS 1 #define MPCT_ENTRY_IOAPIC 2 #define MPCT_ENTRY_INT 3 #define MPCT_ENTRY_LOCAL_INT 4 typedef struct PROCENTRY { uint8_t type; uint8_t apic_id; uint8_t apic_version; uint8_t cpu_flags; uint32_t cpu_signature; uint32_t feature_flags; uint32_t reserved1; uint32_t reserved2; } __packed *proc_entry_ptr; #define PROCENTRY_FLAG_EN 0x01 #define PROCENTRY_FLAG_BP 0x02 typedef struct BUSENTRY { uint8_t type; uint8_t bus_id; uint8_t bus_type[6]; } __packed *bus_entry_ptr; typedef struct IOAPICENTRY { uint8_t type; uint8_t apic_id; uint8_t apic_version; uint8_t apic_flags; uint32_t apic_address; } __packed *io_apic_entry_ptr; #define IOAPICENTRY_FLAG_EN 0x01 typedef struct INTENTRY { uint8_t type; uint8_t int_type; uint16_t int_flags; uint8_t src_bus_id; uint8_t src_bus_irq; uint8_t dst_apic_id; uint8_t dst_apic_int; } __packed *int_entry_ptr; #define INTENTRY_TYPE_INT 0 #define INTENTRY_TYPE_NMI 1 #define INTENTRY_TYPE_SMI 2 #define INTENTRY_TYPE_EXTINT 3 #define INTENTRY_FLAGS_POLARITY 0x3 #define INTENTRY_FLAGS_POLARITY_CONFORM 0x0 #define INTENTRY_FLAGS_POLARITY_ACTIVEHI 0x1 #define INTENTRY_FLAGS_POLARITY_ACTIVELO 0x3 #define INTENTRY_FLAGS_TRIGGER 0xc #define INTENTRY_FLAGS_TRIGGER_CONFORM 0x0 #define INTENTRY_FLAGS_TRIGGER_EDGE 0x4 #define INTENTRY_FLAGS_TRIGGER_LEVEL 0xc /* Extended table entries */ typedef struct EXTENTRY { uint8_t type; uint8_t length; } __packed *ext_entry_ptr; #define MPCT_EXTENTRY_SAS 0x80 #define MPCT_EXTENTRY_BHD 0x81 #define MPCT_EXTENTRY_CBASM 0x82 typedef struct SASENTRY { uint8_t type; uint8_t length; uint8_t bus_id; uint8_t address_type; uint64_t address_base; uint64_t address_length; } __packed *sas_entry_ptr; #define SASENTRY_TYPE_IO 0 #define SASENTRY_TYPE_MEMORY 1 #define SASENTRY_TYPE_PREFETCH 2 typedef struct BHDENTRY { uint8_t type; uint8_t length; uint8_t bus_id; uint8_t bus_info; uint8_t parent_bus; uint8_t reserved[3]; } __packed *bhd_entry_ptr; #define BHDENTRY_INFO_SUBTRACTIVE_DECODE 0x1 typedef struct CBASMENTRY { uint8_t type; uint8_t length; uint8_t bus_id; uint8_t address_mod; uint32_t predefined_range; } __packed *cbasm_entry_ptr; #define CBASMENTRY_ADDRESS_MOD_ADD 0x0 #define CBASMENTRY_ADDRESS_MOD_SUBTRACT 0x1 #define CBASMENTRY_RANGE_ISA_IO 0 #define CBASMENTRY_RANGE_VGA_IO 1 #ifdef _KERNEL struct mptable_hostb_softc { #ifdef NEW_PCIB struct pcib_host_resources sc_host_res; int sc_decodes_vga_io; int sc_decodes_isa_io; #endif }; #ifdef NEW_PCIB void mptable_pci_host_res_init(device_t pcib); #endif int mptable_pci_probe_table(int bus); int mptable_pci_route_interrupt(device_t pcib, device_t dev, int pin); #endif #endif /* !__MACHINE_MPTABLE_H__ */ Index: head/sys/x86/include/ofw_machdep.h =================================================================== --- head/sys/x86/include/ofw_machdep.h (revision 326262) +++ head/sys/x86/include/ofw_machdep.h (revision 326263) @@ -1,42 +1,44 @@ /*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * * Copyright (c) 2013 Juniper Networks, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _MACHINE_OFW_MACHDEP_H_ #define _MACHINE_OFW_MACHDEP_H_ #include #include typedef uint32_t cell_t; struct mem_region { vm_offset_t mr_start; vm_size_t mr_size; }; #endif /* _MACHINE_OFW_MACHDEP_H_ */ Index: head/sys/x86/include/pci_cfgreg.h =================================================================== --- head/sys/x86/include/pci_cfgreg.h (revision 326262) +++ head/sys/x86/include/pci_cfgreg.h (revision 326263) @@ -1,60 +1,62 @@ /*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * * Copyright (c) 1997, Stefan Esser * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ * */ #ifndef __X86_PCI_CFGREG_H__ #define __X86_PCI_CFGREG_H__ #define CONF1_ADDR_PORT 0x0cf8 #define CONF1_DATA_PORT 0x0cfc #define CONF1_ENABLE 0x80000000ul #define CONF1_ENABLE_CHK 0x80000000ul #define CONF1_ENABLE_MSK 0x7f000000ul #define CONF1_ENABLE_CHK1 0xff000001ul #define CONF1_ENABLE_MSK1 0x80000001ul #define CONF1_ENABLE_RES1 0x80000000ul #define CONF2_ENABLE_PORT 0x0cf8 #define CONF2_FORWARD_PORT 0x0cfa #define CONF2_ENABLE_CHK 0x0e #define CONF2_ENABLE_RES 0x0e rman_res_t hostb_alloc_start(int type, rman_res_t start, rman_res_t end, rman_res_t count); int pcie_cfgregopen(uint64_t base, uint8_t minbus, uint8_t maxbus); int pci_cfgregopen(void); u_int32_t pci_cfgregread(int bus, int slot, int func, int reg, int bytes); void pci_cfgregwrite(int bus, int slot, int func, int reg, u_int32_t data, int bytes); #ifdef __HAVE_PIR void pci_pir_open(void); int pci_pir_probe(int bus, int require_parse); int pci_pir_route_interrupt(int bus, int device, int func, int pin); #endif #endif /* !__X86_PCI_CFGREG_H__ */ Index: head/sys/x86/include/sigframe.h =================================================================== --- head/sys/x86/include/sigframe.h (revision 326262) +++ head/sys/x86/include/sigframe.h (revision 326263) @@ -1,72 +1,74 @@ /*- + * SPDX-License-Identifier: BSD-3-Clause + * * Copyright (c) 1999 Marcel Moolenaar * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer * in this position and unchanged. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _X86_SIGFRAME_H_ #define _X86_SIGFRAME_H_ /* * Signal frames, arguments passed to application signal handlers. */ #ifdef __i386__ struct sigframe { /* * The first four members may be used by applications. * * NOTE: The 4th argument is undocumented, ill commented * on and seems to be somewhat BSD "standard". Handlers * installed with sigvec may be using it. */ register_t sf_signum; register_t sf_siginfo; /* code or pointer to sf_si */ register_t sf_ucontext; /* points to sf_uc */ register_t sf_addr; /* undocumented 4th arg */ union { __siginfohandler_t *sf_action; __sighandler_t *sf_handler; } sf_ahu; ucontext_t sf_uc; /* = *sf_ucontext */ siginfo_t sf_si; /* = *sf_siginfo (SA_SIGINFO case) */ }; #endif /* __i386__ */ #ifdef __amd64__ struct sigframe { union { __siginfohandler_t *sf_action; __sighandler_t *sf_handler; } sf_ahu; ucontext_t sf_uc; /* = *sf_ucontext */ siginfo_t sf_si; /* = *sf_siginfo (SA_SIGINFO case) */ }; #endif /* __amd64__ */ #endif /* _X86_SIGFRAME_H_ */ Index: head/sys/x86/include/ucontext.h =================================================================== --- head/sys/x86/include/ucontext.h (revision 326262) +++ head/sys/x86/include/ucontext.h (revision 326263) @@ -1,165 +1,167 @@ /*- + * SPDX-License-Identifier: BSD-3-Clause + * * Copyright (c) 2003 Peter Wemm * Copyright (c) 1999 Marcel Moolenaar * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer * in this position and unchanged. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _X86_UCONTEXT_H_ #define _X86_UCONTEXT_H_ #ifdef __i386__ /* Keep _MC_* values similar to amd64 */ #define _MC_HASSEGS 0x1 #define _MC_HASBASES 0x2 #define _MC_HASFPXSTATE 0x4 #define _MC_FLAG_MASK (_MC_HASSEGS | _MC_HASBASES | _MC_HASFPXSTATE) typedef struct __mcontext { /* * The definition of mcontext_t must match the layout of * struct sigcontext after the sc_mask member. This is so * that we can support sigcontext and ucontext_t at the same * time. */ __register_t mc_onstack; /* XXX - sigcontext compat. */ __register_t mc_gs; /* machine state (struct trapframe) */ __register_t mc_fs; __register_t mc_es; __register_t mc_ds; __register_t mc_edi; __register_t mc_esi; __register_t mc_ebp; __register_t mc_isp; __register_t mc_ebx; __register_t mc_edx; __register_t mc_ecx; __register_t mc_eax; __register_t mc_trapno; __register_t mc_err; __register_t mc_eip; __register_t mc_cs; __register_t mc_eflags; __register_t mc_esp; __register_t mc_ss; int mc_len; /* sizeof(mcontext_t) */ #define _MC_FPFMT_NODEV 0x10000 /* device not present or configured */ #define _MC_FPFMT_387 0x10001 #define _MC_FPFMT_XMM 0x10002 int mc_fpformat; #define _MC_FPOWNED_NONE 0x20000 /* FP state not used */ #define _MC_FPOWNED_FPU 0x20001 /* FP state came from FPU */ #define _MC_FPOWNED_PCB 0x20002 /* FP state came from PCB */ int mc_ownedfp; __register_t mc_flags; /* * See for the internals of mc_fpstate[]. */ int mc_fpstate[128] __aligned(16); __register_t mc_fsbase; __register_t mc_gsbase; __register_t mc_xfpustate; __register_t mc_xfpustate_len; int mc_spare2[4]; } mcontext_t; #endif /* __i386__ */ #ifdef __amd64__ /* * mc_trapno bits. Shall be in sync with TF_XXX. */ #define _MC_HASSEGS 0x1 #define _MC_HASBASES 0x2 #define _MC_HASFPXSTATE 0x4 #define _MC_FLAG_MASK (_MC_HASSEGS | _MC_HASBASES | _MC_HASFPXSTATE) typedef struct __mcontext { /* * The definition of mcontext_t must match the layout of * struct sigcontext after the sc_mask member. This is so * that we can support sigcontext and ucontext_t at the same * time. */ __register_t mc_onstack; /* XXX - sigcontext compat. */ __register_t mc_rdi; /* machine state (struct trapframe) */ __register_t mc_rsi; __register_t mc_rdx; __register_t mc_rcx; __register_t mc_r8; __register_t mc_r9; __register_t mc_rax; __register_t mc_rbx; __register_t mc_rbp; __register_t mc_r10; __register_t mc_r11; __register_t mc_r12; __register_t mc_r13; __register_t mc_r14; __register_t mc_r15; __uint32_t mc_trapno; __uint16_t mc_fs; __uint16_t mc_gs; __register_t mc_addr; __uint32_t mc_flags; __uint16_t mc_es; __uint16_t mc_ds; __register_t mc_err; __register_t mc_rip; __register_t mc_cs; __register_t mc_rflags; __register_t mc_rsp; __register_t mc_ss; long mc_len; /* sizeof(mcontext_t) */ #define _MC_FPFMT_NODEV 0x10000 /* device not present or configured */ #define _MC_FPFMT_XMM 0x10002 long mc_fpformat; #define _MC_FPOWNED_NONE 0x20000 /* FP state not used */ #define _MC_FPOWNED_FPU 0x20001 /* FP state came from FPU */ #define _MC_FPOWNED_PCB 0x20002 /* FP state came from PCB */ long mc_ownedfp; /* * See for the internals of mc_fpstate[]. */ long mc_fpstate[64] __aligned(16); __register_t mc_fsbase; __register_t mc_gsbase; __register_t mc_xfpustate; __register_t mc_xfpustate_len; long mc_spare[4]; } mcontext_t; #endif /* __amd64__ */ #endif /* !_X86_UCONTEXT_H_ */ Index: head/sys/x86/include/vdso.h =================================================================== --- head/sys/x86/include/vdso.h (revision 326262) +++ head/sys/x86/include/vdso.h (revision 326263) @@ -1,51 +1,53 @@ /*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * * Copyright 2012 Konstantin Belousov . * Copyright 2016 The FreeBSD Foundation. * All rights reserved. * * Portions of this software were developed by Konstantin Belousov * under sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _X86_VDSO_H #define _X86_VDSO_H #define VDSO_TIMEHANDS_MD \ uint32_t th_x86_shift; \ uint32_t th_x86_hpet_idx; \ uint32_t th_res[6]; #define VDSO_TH_ALGO_X86_TSC VDSO_TH_ALGO_1 #define VDSO_TH_ALGO_X86_HPET VDSO_TH_ALGO_2 #define VDSO_TH_ALGO_X86_HVTSC VDSO_TH_ALGO_3 /* Hyper-V ref. TSC */ #ifdef _KERNEL #ifdef COMPAT_FREEBSD32 #define VDSO_TIMEHANDS_MD32 VDSO_TIMEHANDS_MD #endif #endif #endif Index: head/sys/x86/iommu/busdma_dmar.c =================================================================== --- head/sys/x86/iommu/busdma_dmar.c (revision 326262) +++ head/sys/x86/iommu/busdma_dmar.c (revision 326263) @@ -1,912 +1,914 @@ /*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * * Copyright (c) 2013 The FreeBSD Foundation * All rights reserved. * * This software was developed by Konstantin Belousov * under sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * busdma_dmar.c, the implementation of the busdma(9) interface using * DMAR units from Intel VT-d. */ static bool dmar_bus_dma_is_dev_disabled(int domain, int bus, int slot, int func) { char str[128], *env; int default_bounce; bool ret; static const char bounce_str[] = "bounce"; static const char dmar_str[] = "dmar"; default_bounce = 0; env = kern_getenv("hw.busdma.default"); if (env != NULL) { if (strcmp(env, bounce_str) == 0) default_bounce = 1; else if (strcmp(env, dmar_str) == 0) default_bounce = 0; freeenv(env); } snprintf(str, sizeof(str), "hw.busdma.pci%d.%d.%d.%d", domain, bus, slot, func); env = kern_getenv(str); if (env == NULL) return (default_bounce != 0); if (strcmp(env, bounce_str) == 0) ret = true; else if (strcmp(env, dmar_str) == 0) ret = false; else ret = default_bounce != 0; freeenv(env); return (ret); } /* * Given original device, find the requester ID that will be seen by * the DMAR unit and used for page table lookup. PCI bridges may take * ownership of transactions from downstream devices, so it may not be * the same as the BSF of the target device. In those cases, all * devices downstream of the bridge must share a single mapping * domain, and must collectively be assigned to use either DMAR or * bounce mapping. */ device_t dmar_get_requester(device_t dev, uint16_t *rid) { devclass_t pci_class; device_t l, pci, pcib, pcip, pcibp, requester; int cap_offset; uint16_t pcie_flags; bool bridge_is_pcie; pci_class = devclass_find("pci"); l = requester = dev; *rid = pci_get_rid(dev); /* * Walk the bridge hierarchy from the target device to the * host port to find the translating bridge nearest the DMAR * unit. */ for (;;) { pci = device_get_parent(l); KASSERT(pci != NULL, ("dmar_get_requester(%s): NULL parent " "for %s", device_get_name(dev), device_get_name(l))); KASSERT(device_get_devclass(pci) == pci_class, ("dmar_get_requester(%s): non-pci parent %s for %s", device_get_name(dev), device_get_name(pci), device_get_name(l))); pcib = device_get_parent(pci); KASSERT(pcib != NULL, ("dmar_get_requester(%s): NULL bridge " "for %s", device_get_name(dev), device_get_name(pci))); /* * The parent of our "bridge" isn't another PCI bus, * so pcib isn't a PCI->PCI bridge but rather a host * port, and the requester ID won't be translated * further. */ pcip = device_get_parent(pcib); if (device_get_devclass(pcip) != pci_class) break; pcibp = device_get_parent(pcip); if (pci_find_cap(l, PCIY_EXPRESS, &cap_offset) == 0) { /* * Do not stop the loop even if the target * device is PCIe, because it is possible (but * unlikely) to have a PCI->PCIe bridge * somewhere in the hierarchy. */ l = pcib; } else { /* * Device is not PCIe, it cannot be seen as a * requester by DMAR unit. Check whether the * bridge is PCIe. */ bridge_is_pcie = pci_find_cap(pcib, PCIY_EXPRESS, &cap_offset) == 0; requester = pcib; /* * Check for a buggy PCIe/PCI bridge that * doesn't report the express capability. If * the bridge above it is express but isn't a * PCI bridge, then we know pcib is actually a * PCIe/PCI bridge. */ if (!bridge_is_pcie && pci_find_cap(pcibp, PCIY_EXPRESS, &cap_offset) == 0) { pcie_flags = pci_read_config(pcibp, cap_offset + PCIER_FLAGS, 2); if ((pcie_flags & PCIEM_FLAGS_TYPE) != PCIEM_TYPE_PCI_BRIDGE) bridge_is_pcie = true; } if (bridge_is_pcie) { /* * The current device is not PCIe, but * the bridge above it is. This is a * PCIe->PCI bridge. Assume that the * requester ID will be the secondary * bus number with slot and function * set to zero. * * XXX: Doesn't handle the case where * the bridge is PCIe->PCI-X, and the * bridge will only take ownership of * requests in some cases. We should * provide context entries with the * same page tables for taken and * non-taken transactions. */ *rid = PCI_RID(pci_get_bus(l), 0, 0); l = pcibp; } else { /* * Neither the device nor the bridge * above it are PCIe. This is a * conventional PCI->PCI bridge, which * will use the bridge's BSF as the * requester ID. */ *rid = pci_get_rid(pcib); l = pcib; } } } return (requester); } struct dmar_ctx * dmar_instantiate_ctx(struct dmar_unit *dmar, device_t dev, bool rmrr) { device_t requester; struct dmar_ctx *ctx; bool disabled; uint16_t rid; requester = dmar_get_requester(dev, &rid); /* * If the user requested the IOMMU disabled for the device, we * cannot disable the DMAR, due to possibility of other * devices on the same DMAR still requiring translation. * Instead provide the identity mapping for the device * context. */ disabled = dmar_bus_dma_is_dev_disabled(pci_get_domain(requester), pci_get_bus(requester), pci_get_slot(requester), pci_get_function(requester)); ctx = dmar_get_ctx_for_dev(dmar, requester, rid, disabled, rmrr); if (ctx == NULL) return (NULL); if (disabled) { /* * Keep the first reference on context, release the * later refs. */ DMAR_LOCK(dmar); if ((ctx->flags & DMAR_CTX_DISABLED) == 0) { ctx->flags |= DMAR_CTX_DISABLED; DMAR_UNLOCK(dmar); } else { dmar_free_ctx_locked(dmar, ctx); } ctx = NULL; } return (ctx); } bus_dma_tag_t dmar_get_dma_tag(device_t dev, device_t child) { struct dmar_unit *dmar; struct dmar_ctx *ctx; bus_dma_tag_t res; dmar = dmar_find(child); /* Not in scope of any DMAR ? */ if (dmar == NULL) return (NULL); if (!dmar->dma_enabled) return (NULL); dmar_quirks_pre_use(dmar); dmar_instantiate_rmrr_ctxs(dmar); ctx = dmar_instantiate_ctx(dmar, child, false); res = ctx == NULL ? NULL : (bus_dma_tag_t)&ctx->ctx_tag; return (res); } static MALLOC_DEFINE(M_DMAR_DMAMAP, "dmar_dmamap", "Intel DMAR DMA Map"); static void dmar_bus_schedule_dmamap(struct dmar_unit *unit, struct bus_dmamap_dmar *map); static int dmar_bus_dma_tag_create(bus_dma_tag_t parent, bus_size_t alignment, bus_addr_t boundary, bus_addr_t lowaddr, bus_addr_t highaddr, bus_dma_filter_t *filter, void *filterarg, bus_size_t maxsize, int nsegments, bus_size_t maxsegsz, int flags, bus_dma_lock_t *lockfunc, void *lockfuncarg, bus_dma_tag_t *dmat) { struct bus_dma_tag_dmar *newtag, *oldtag; int error; *dmat = NULL; error = common_bus_dma_tag_create(parent != NULL ? &((struct bus_dma_tag_dmar *)parent)->common : NULL, alignment, boundary, lowaddr, highaddr, filter, filterarg, maxsize, nsegments, maxsegsz, flags, lockfunc, lockfuncarg, sizeof(struct bus_dma_tag_dmar), (void **)&newtag); if (error != 0) goto out; oldtag = (struct bus_dma_tag_dmar *)parent; newtag->common.impl = &bus_dma_dmar_impl; newtag->ctx = oldtag->ctx; newtag->owner = oldtag->owner; *dmat = (bus_dma_tag_t)newtag; out: CTR4(KTR_BUSDMA, "%s returned tag %p tag flags 0x%x error %d", __func__, newtag, (newtag != NULL ? newtag->common.flags : 0), error); return (error); } static int dmar_bus_dma_tag_destroy(bus_dma_tag_t dmat1) { struct bus_dma_tag_dmar *dmat, *dmat_copy, *parent; int error; error = 0; dmat_copy = dmat = (struct bus_dma_tag_dmar *)dmat1; if (dmat != NULL) { if (dmat->map_count != 0) { error = EBUSY; goto out; } while (dmat != NULL) { parent = (struct bus_dma_tag_dmar *)dmat->common.parent; if (atomic_fetchadd_int(&dmat->common.ref_count, -1) == 1) { if (dmat == &dmat->ctx->ctx_tag) dmar_free_ctx(dmat->ctx); free(dmat->segments, M_DMAR_DMAMAP); free(dmat, M_DEVBUF); dmat = parent; } else dmat = NULL; } } out: CTR3(KTR_BUSDMA, "%s tag %p error %d", __func__, dmat_copy, error); return (error); } static int dmar_bus_dmamap_create(bus_dma_tag_t dmat, int flags, bus_dmamap_t *mapp) { struct bus_dma_tag_dmar *tag; struct bus_dmamap_dmar *map; WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, "%s", __func__); tag = (struct bus_dma_tag_dmar *)dmat; map = malloc(sizeof(*map), M_DMAR_DMAMAP, M_NOWAIT | M_ZERO); if (map == NULL) { *mapp = NULL; return (ENOMEM); } if (tag->segments == NULL) { tag->segments = malloc(sizeof(bus_dma_segment_t) * tag->common.nsegments, M_DMAR_DMAMAP, M_NOWAIT); if (tag->segments == NULL) { free(map, M_DMAR_DMAMAP); *mapp = NULL; return (ENOMEM); } } TAILQ_INIT(&map->map_entries); map->tag = tag; map->locked = true; map->cansleep = false; tag->map_count++; *mapp = (bus_dmamap_t)map; return (0); } static int dmar_bus_dmamap_destroy(bus_dma_tag_t dmat, bus_dmamap_t map1) { struct bus_dma_tag_dmar *tag; struct bus_dmamap_dmar *map; struct dmar_domain *domain; tag = (struct bus_dma_tag_dmar *)dmat; map = (struct bus_dmamap_dmar *)map1; if (map != NULL) { domain = tag->ctx->domain; DMAR_DOMAIN_LOCK(domain); if (!TAILQ_EMPTY(&map->map_entries)) { DMAR_DOMAIN_UNLOCK(domain); return (EBUSY); } DMAR_DOMAIN_UNLOCK(domain); free(map, M_DMAR_DMAMAP); } tag->map_count--; return (0); } static int dmar_bus_dmamem_alloc(bus_dma_tag_t dmat, void** vaddr, int flags, bus_dmamap_t *mapp) { struct bus_dma_tag_dmar *tag; struct bus_dmamap_dmar *map; int error, mflags; vm_memattr_t attr; error = dmar_bus_dmamap_create(dmat, flags, mapp); if (error != 0) return (error); mflags = (flags & BUS_DMA_NOWAIT) != 0 ? M_NOWAIT : M_WAITOK; mflags |= (flags & BUS_DMA_ZERO) != 0 ? M_ZERO : 0; attr = (flags & BUS_DMA_NOCACHE) != 0 ? VM_MEMATTR_UNCACHEABLE : VM_MEMATTR_DEFAULT; tag = (struct bus_dma_tag_dmar *)dmat; map = (struct bus_dmamap_dmar *)*mapp; if (tag->common.maxsize < PAGE_SIZE && tag->common.alignment <= tag->common.maxsize && attr == VM_MEMATTR_DEFAULT) { *vaddr = malloc(tag->common.maxsize, M_DEVBUF, mflags); map->flags |= BUS_DMAMAP_DMAR_MALLOC; } else { *vaddr = (void *)kmem_alloc_attr(kernel_arena, tag->common.maxsize, mflags, 0ul, BUS_SPACE_MAXADDR, attr); map->flags |= BUS_DMAMAP_DMAR_KMEM_ALLOC; } if (*vaddr == NULL) { dmar_bus_dmamap_destroy(dmat, *mapp); *mapp = NULL; return (ENOMEM); } return (0); } static void dmar_bus_dmamem_free(bus_dma_tag_t dmat, void *vaddr, bus_dmamap_t map1) { struct bus_dma_tag_dmar *tag; struct bus_dmamap_dmar *map; tag = (struct bus_dma_tag_dmar *)dmat; map = (struct bus_dmamap_dmar *)map1; if ((map->flags & BUS_DMAMAP_DMAR_MALLOC) != 0) { free(vaddr, M_DEVBUF); map->flags &= ~BUS_DMAMAP_DMAR_MALLOC; } else { KASSERT((map->flags & BUS_DMAMAP_DMAR_KMEM_ALLOC) != 0, ("dmar_bus_dmamem_free for non alloced map %p", map)); kmem_free(kernel_arena, (vm_offset_t)vaddr, tag->common.maxsize); map->flags &= ~BUS_DMAMAP_DMAR_KMEM_ALLOC; } dmar_bus_dmamap_destroy(dmat, map1); } static int dmar_bus_dmamap_load_something1(struct bus_dma_tag_dmar *tag, struct bus_dmamap_dmar *map, vm_page_t *ma, int offset, bus_size_t buflen, int flags, bus_dma_segment_t *segs, int *segp, struct dmar_map_entries_tailq *unroll_list) { struct dmar_ctx *ctx; struct dmar_domain *domain; struct dmar_map_entry *entry; dmar_gaddr_t size; bus_size_t buflen1; int error, idx, gas_flags, seg; KASSERT(offset < DMAR_PAGE_SIZE, ("offset %d", offset)); if (segs == NULL) segs = tag->segments; ctx = tag->ctx; domain = ctx->domain; seg = *segp; error = 0; idx = 0; while (buflen > 0) { seg++; if (seg >= tag->common.nsegments) { error = EFBIG; break; } buflen1 = buflen > tag->common.maxsegsz ? tag->common.maxsegsz : buflen; size = round_page(offset + buflen1); /* * (Too) optimistically allow split if there are more * then one segments left. */ gas_flags = map->cansleep ? DMAR_GM_CANWAIT : 0; if (seg + 1 < tag->common.nsegments) gas_flags |= DMAR_GM_CANSPLIT; error = dmar_gas_map(domain, &tag->common, size, offset, DMAR_MAP_ENTRY_READ | DMAR_MAP_ENTRY_WRITE, gas_flags, ma + idx, &entry); if (error != 0) break; if ((gas_flags & DMAR_GM_CANSPLIT) != 0) { KASSERT(size >= entry->end - entry->start, ("split increased entry size %jx %jx %jx", (uintmax_t)size, (uintmax_t)entry->start, (uintmax_t)entry->end)); size = entry->end - entry->start; if (buflen1 > size) buflen1 = size; } else { KASSERT(entry->end - entry->start == size, ("no split allowed %jx %jx %jx", (uintmax_t)size, (uintmax_t)entry->start, (uintmax_t)entry->end)); } if (offset + buflen1 > size) buflen1 = size - offset; if (buflen1 > tag->common.maxsegsz) buflen1 = tag->common.maxsegsz; KASSERT(((entry->start + offset) & (tag->common.alignment - 1)) == 0, ("alignment failed: ctx %p start 0x%jx offset %x " "align 0x%jx", ctx, (uintmax_t)entry->start, offset, (uintmax_t)tag->common.alignment)); KASSERT(entry->end <= tag->common.lowaddr || entry->start >= tag->common.highaddr, ("entry placement failed: ctx %p start 0x%jx end 0x%jx " "lowaddr 0x%jx highaddr 0x%jx", ctx, (uintmax_t)entry->start, (uintmax_t)entry->end, (uintmax_t)tag->common.lowaddr, (uintmax_t)tag->common.highaddr)); KASSERT(dmar_test_boundary(entry->start + offset, buflen1, tag->common.boundary), ("boundary failed: ctx %p start 0x%jx end 0x%jx " "boundary 0x%jx", ctx, (uintmax_t)entry->start, (uintmax_t)entry->end, (uintmax_t)tag->common.boundary)); KASSERT(buflen1 <= tag->common.maxsegsz, ("segment too large: ctx %p start 0x%jx end 0x%jx " "buflen1 0x%jx maxsegsz 0x%jx", ctx, (uintmax_t)entry->start, (uintmax_t)entry->end, (uintmax_t)buflen1, (uintmax_t)tag->common.maxsegsz)); DMAR_DOMAIN_LOCK(domain); TAILQ_INSERT_TAIL(&map->map_entries, entry, dmamap_link); entry->flags |= DMAR_MAP_ENTRY_MAP; DMAR_DOMAIN_UNLOCK(domain); TAILQ_INSERT_TAIL(unroll_list, entry, unroll_link); segs[seg].ds_addr = entry->start + offset; segs[seg].ds_len = buflen1; idx += OFF_TO_IDX(trunc_page(offset + buflen1)); offset += buflen1; offset &= DMAR_PAGE_MASK; buflen -= buflen1; } if (error == 0) *segp = seg; return (error); } static int dmar_bus_dmamap_load_something(struct bus_dma_tag_dmar *tag, struct bus_dmamap_dmar *map, vm_page_t *ma, int offset, bus_size_t buflen, int flags, bus_dma_segment_t *segs, int *segp) { struct dmar_ctx *ctx; struct dmar_domain *domain; struct dmar_map_entry *entry, *entry1; struct dmar_map_entries_tailq unroll_list; int error; ctx = tag->ctx; domain = ctx->domain; atomic_add_long(&ctx->loads, 1); TAILQ_INIT(&unroll_list); error = dmar_bus_dmamap_load_something1(tag, map, ma, offset, buflen, flags, segs, segp, &unroll_list); if (error != 0) { /* * The busdma interface does not allow us to report * partial buffer load, so unfortunately we have to * revert all work done. */ DMAR_DOMAIN_LOCK(domain); TAILQ_FOREACH_SAFE(entry, &unroll_list, unroll_link, entry1) { /* * No entries other than what we have created * during the failed run might have been * inserted there in between, since we own ctx * pglock. */ TAILQ_REMOVE(&map->map_entries, entry, dmamap_link); TAILQ_REMOVE(&unroll_list, entry, unroll_link); TAILQ_INSERT_TAIL(&domain->unload_entries, entry, dmamap_link); } DMAR_DOMAIN_UNLOCK(domain); taskqueue_enqueue(domain->dmar->delayed_taskqueue, &domain->unload_task); } if (error == ENOMEM && (flags & BUS_DMA_NOWAIT) == 0 && !map->cansleep) error = EINPROGRESS; if (error == EINPROGRESS) dmar_bus_schedule_dmamap(domain->dmar, map); return (error); } static int dmar_bus_dmamap_load_ma(bus_dma_tag_t dmat, bus_dmamap_t map1, struct vm_page **ma, bus_size_t tlen, int ma_offs, int flags, bus_dma_segment_t *segs, int *segp) { struct bus_dma_tag_dmar *tag; struct bus_dmamap_dmar *map; tag = (struct bus_dma_tag_dmar *)dmat; map = (struct bus_dmamap_dmar *)map1; return (dmar_bus_dmamap_load_something(tag, map, ma, ma_offs, tlen, flags, segs, segp)); } static int dmar_bus_dmamap_load_phys(bus_dma_tag_t dmat, bus_dmamap_t map1, vm_paddr_t buf, bus_size_t buflen, int flags, bus_dma_segment_t *segs, int *segp) { struct bus_dma_tag_dmar *tag; struct bus_dmamap_dmar *map; vm_page_t *ma; vm_paddr_t pstart, pend; int error, i, ma_cnt, offset; tag = (struct bus_dma_tag_dmar *)dmat; map = (struct bus_dmamap_dmar *)map1; pstart = trunc_page(buf); pend = round_page(buf + buflen); offset = buf & PAGE_MASK; ma_cnt = OFF_TO_IDX(pend - pstart); ma = malloc(sizeof(vm_page_t) * ma_cnt, M_DEVBUF, map->cansleep ? M_WAITOK : M_NOWAIT); if (ma == NULL) return (ENOMEM); for (i = 0; i < ma_cnt; i++) ma[i] = PHYS_TO_VM_PAGE(pstart + i * PAGE_SIZE); error = dmar_bus_dmamap_load_something(tag, map, ma, offset, buflen, flags, segs, segp); free(ma, M_DEVBUF); return (error); } static int dmar_bus_dmamap_load_buffer(bus_dma_tag_t dmat, bus_dmamap_t map1, void *buf, bus_size_t buflen, pmap_t pmap, int flags, bus_dma_segment_t *segs, int *segp) { struct bus_dma_tag_dmar *tag; struct bus_dmamap_dmar *map; vm_page_t *ma, fma; vm_paddr_t pstart, pend, paddr; int error, i, ma_cnt, offset; tag = (struct bus_dma_tag_dmar *)dmat; map = (struct bus_dmamap_dmar *)map1; pstart = trunc_page((vm_offset_t)buf); pend = round_page((vm_offset_t)buf + buflen); offset = (vm_offset_t)buf & PAGE_MASK; ma_cnt = OFF_TO_IDX(pend - pstart); ma = malloc(sizeof(vm_page_t) * ma_cnt, M_DEVBUF, map->cansleep ? M_WAITOK : M_NOWAIT); if (ma == NULL) return (ENOMEM); if (dumping) { /* * If dumping, do not attempt to call * PHYS_TO_VM_PAGE() at all. It may return non-NULL * but the vm_page returned might be not initialized, * e.g. for the kernel itself. */ KASSERT(pmap == kernel_pmap, ("non-kernel address write")); fma = malloc(sizeof(struct vm_page) * ma_cnt, M_DEVBUF, M_ZERO | (map->cansleep ? M_WAITOK : M_NOWAIT)); if (fma == NULL) { free(ma, M_DEVBUF); return (ENOMEM); } for (i = 0; i < ma_cnt; i++, pstart += PAGE_SIZE) { paddr = pmap_kextract(pstart); vm_page_initfake(&fma[i], paddr, VM_MEMATTR_DEFAULT); ma[i] = &fma[i]; } } else { fma = NULL; for (i = 0; i < ma_cnt; i++, pstart += PAGE_SIZE) { if (pmap == kernel_pmap) paddr = pmap_kextract(pstart); else paddr = pmap_extract(pmap, pstart); ma[i] = PHYS_TO_VM_PAGE(paddr); KASSERT(VM_PAGE_TO_PHYS(ma[i]) == paddr, ("PHYS_TO_VM_PAGE failed %jx %jx m %p", (uintmax_t)paddr, (uintmax_t)VM_PAGE_TO_PHYS(ma[i]), ma[i])); } } error = dmar_bus_dmamap_load_something(tag, map, ma, offset, buflen, flags, segs, segp); free(ma, M_DEVBUF); free(fma, M_DEVBUF); return (error); } static void dmar_bus_dmamap_waitok(bus_dma_tag_t dmat, bus_dmamap_t map1, struct memdesc *mem, bus_dmamap_callback_t *callback, void *callback_arg) { struct bus_dmamap_dmar *map; if (map1 == NULL) return; map = (struct bus_dmamap_dmar *)map1; map->mem = *mem; map->tag = (struct bus_dma_tag_dmar *)dmat; map->callback = callback; map->callback_arg = callback_arg; } static bus_dma_segment_t * dmar_bus_dmamap_complete(bus_dma_tag_t dmat, bus_dmamap_t map1, bus_dma_segment_t *segs, int nsegs, int error) { struct bus_dma_tag_dmar *tag; struct bus_dmamap_dmar *map; tag = (struct bus_dma_tag_dmar *)dmat; map = (struct bus_dmamap_dmar *)map1; if (!map->locked) { KASSERT(map->cansleep, ("map not locked and not sleepable context %p", map)); /* * We are called from the delayed context. Relock the * driver. */ (tag->common.lockfunc)(tag->common.lockfuncarg, BUS_DMA_LOCK); map->locked = true; } if (segs == NULL) segs = tag->segments; return (segs); } /* * The limitations of busdma KPI forces the dmar to perform the actual * unload, consisting of the unmapping of the map entries page tables, * from the delayed context on i386, since page table page mapping * might require a sleep to be successfull. The unfortunate * consequence is that the DMA requests can be served some time after * the bus_dmamap_unload() call returned. * * On amd64, we assume that sf allocation cannot fail. */ static void dmar_bus_dmamap_unload(bus_dma_tag_t dmat, bus_dmamap_t map1) { struct bus_dma_tag_dmar *tag; struct bus_dmamap_dmar *map; struct dmar_ctx *ctx; struct dmar_domain *domain; #if defined(__amd64__) struct dmar_map_entries_tailq entries; #endif tag = (struct bus_dma_tag_dmar *)dmat; map = (struct bus_dmamap_dmar *)map1; ctx = tag->ctx; domain = ctx->domain; atomic_add_long(&ctx->unloads, 1); #if defined(__i386__) DMAR_DOMAIN_LOCK(domain); TAILQ_CONCAT(&domain->unload_entries, &map->map_entries, dmamap_link); DMAR_DOMAIN_UNLOCK(domain); taskqueue_enqueue(domain->dmar->delayed_taskqueue, &domain->unload_task); #else /* defined(__amd64__) */ TAILQ_INIT(&entries); DMAR_DOMAIN_LOCK(domain); TAILQ_CONCAT(&entries, &map->map_entries, dmamap_link); DMAR_DOMAIN_UNLOCK(domain); THREAD_NO_SLEEPING(); dmar_domain_unload(domain, &entries, false); THREAD_SLEEPING_OK(); KASSERT(TAILQ_EMPTY(&entries), ("lazy dmar_ctx_unload %p", ctx)); #endif } static void dmar_bus_dmamap_sync(bus_dma_tag_t dmat, bus_dmamap_t map, bus_dmasync_op_t op) { } struct bus_dma_impl bus_dma_dmar_impl = { .tag_create = dmar_bus_dma_tag_create, .tag_destroy = dmar_bus_dma_tag_destroy, .map_create = dmar_bus_dmamap_create, .map_destroy = dmar_bus_dmamap_destroy, .mem_alloc = dmar_bus_dmamem_alloc, .mem_free = dmar_bus_dmamem_free, .load_phys = dmar_bus_dmamap_load_phys, .load_buffer = dmar_bus_dmamap_load_buffer, .load_ma = dmar_bus_dmamap_load_ma, .map_waitok = dmar_bus_dmamap_waitok, .map_complete = dmar_bus_dmamap_complete, .map_unload = dmar_bus_dmamap_unload, .map_sync = dmar_bus_dmamap_sync }; static void dmar_bus_task_dmamap(void *arg, int pending) { struct bus_dma_tag_dmar *tag; struct bus_dmamap_dmar *map; struct dmar_unit *unit; unit = arg; DMAR_LOCK(unit); while ((map = TAILQ_FIRST(&unit->delayed_maps)) != NULL) { TAILQ_REMOVE(&unit->delayed_maps, map, delay_link); DMAR_UNLOCK(unit); tag = map->tag; map->cansleep = true; map->locked = false; bus_dmamap_load_mem((bus_dma_tag_t)tag, (bus_dmamap_t)map, &map->mem, map->callback, map->callback_arg, BUS_DMA_WAITOK); map->cansleep = false; if (map->locked) { (tag->common.lockfunc)(tag->common.lockfuncarg, BUS_DMA_UNLOCK); } else map->locked = true; map->cansleep = false; DMAR_LOCK(unit); } DMAR_UNLOCK(unit); } static void dmar_bus_schedule_dmamap(struct dmar_unit *unit, struct bus_dmamap_dmar *map) { map->locked = false; DMAR_LOCK(unit); TAILQ_INSERT_TAIL(&unit->delayed_maps, map, delay_link); DMAR_UNLOCK(unit); taskqueue_enqueue(unit->delayed_taskqueue, &unit->dmamap_load_task); } int dmar_init_busdma(struct dmar_unit *unit) { unit->dma_enabled = 1; TUNABLE_INT_FETCH("hw.dmar.dma", &unit->dma_enabled); TAILQ_INIT(&unit->delayed_maps); TASK_INIT(&unit->dmamap_load_task, 0, dmar_bus_task_dmamap, unit); unit->delayed_taskqueue = taskqueue_create("dmar", M_WAITOK, taskqueue_thread_enqueue, &unit->delayed_taskqueue); taskqueue_start_threads(&unit->delayed_taskqueue, 1, PI_DISK, "dmar%d busdma taskq", unit->unit); return (0); } void dmar_fini_busdma(struct dmar_unit *unit) { if (unit->delayed_taskqueue == NULL) return; taskqueue_drain(unit->delayed_taskqueue, &unit->dmamap_load_task); taskqueue_free(unit->delayed_taskqueue); unit->delayed_taskqueue = NULL; } Index: head/sys/x86/iommu/busdma_dmar.h =================================================================== --- head/sys/x86/iommu/busdma_dmar.h (revision 326262) +++ head/sys/x86/iommu/busdma_dmar.h (revision 326263) @@ -1,65 +1,67 @@ /*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * * Copyright (c) 2013 The FreeBSD Foundation * All rights reserved. * * This software was developed by Konstantin Belousov * under sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef __X86_IOMMU_BUSDMA_DMAR_H #define __X86_IOMMU_BUSDMA_DMAR_H struct dmar_map_entry; TAILQ_HEAD(dmar_map_entries_tailq, dmar_map_entry); struct bus_dma_tag_dmar { struct bus_dma_tag_common common; struct dmar_ctx *ctx; device_t owner; int map_count; bus_dma_segment_t *segments; }; struct bus_dmamap_dmar { struct bus_dma_tag_dmar *tag; struct memdesc mem; bus_dmamap_callback_t *callback; void *callback_arg; struct dmar_map_entries_tailq map_entries; TAILQ_ENTRY(bus_dmamap_dmar) delay_link; bool locked; bool cansleep; int flags; }; #define BUS_DMAMAP_DMAR_MALLOC 0x0001 #define BUS_DMAMAP_DMAR_KMEM_ALLOC 0x0002 extern struct bus_dma_impl bus_dma_dmar_impl; bus_dma_tag_t dmar_get_dma_tag(device_t dev, device_t child); #endif Index: head/sys/x86/iommu/intel_ctx.c =================================================================== --- head/sys/x86/iommu/intel_ctx.c (revision 326262) +++ head/sys/x86/iommu/intel_ctx.c (revision 326263) @@ -1,783 +1,785 @@ /*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * * Copyright (c) 2013 The FreeBSD Foundation * All rights reserved. * * This software was developed by Konstantin Belousov * under sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static MALLOC_DEFINE(M_DMAR_CTX, "dmar_ctx", "Intel DMAR Context"); static MALLOC_DEFINE(M_DMAR_DOMAIN, "dmar_dom", "Intel DMAR Domain"); static void dmar_domain_unload_task(void *arg, int pending); static void dmar_unref_domain_locked(struct dmar_unit *dmar, struct dmar_domain *domain); static void dmar_domain_destroy(struct dmar_domain *domain); static void dmar_ensure_ctx_page(struct dmar_unit *dmar, int bus) { struct sf_buf *sf; dmar_root_entry_t *re; vm_page_t ctxm; /* * Allocated context page must be linked. */ ctxm = dmar_pgalloc(dmar->ctx_obj, 1 + bus, DMAR_PGF_NOALLOC); if (ctxm != NULL) return; /* * Page not present, allocate and link. Note that other * thread might execute this sequence in parallel. This * should be safe, because the context entries written by both * threads are equal. */ TD_PREP_PINNED_ASSERT; ctxm = dmar_pgalloc(dmar->ctx_obj, 1 + bus, DMAR_PGF_ZERO | DMAR_PGF_WAITOK); re = dmar_map_pgtbl(dmar->ctx_obj, 0, DMAR_PGF_NOALLOC, &sf); re += bus; dmar_pte_store(&re->r1, DMAR_ROOT_R1_P | (DMAR_ROOT_R1_CTP_MASK & VM_PAGE_TO_PHYS(ctxm))); dmar_flush_root_to_ram(dmar, re); dmar_unmap_pgtbl(sf); TD_PINNED_ASSERT; } static dmar_ctx_entry_t * dmar_map_ctx_entry(struct dmar_ctx *ctx, struct sf_buf **sfp) { dmar_ctx_entry_t *ctxp; ctxp = dmar_map_pgtbl(ctx->domain->dmar->ctx_obj, 1 + PCI_RID2BUS(ctx->rid), DMAR_PGF_NOALLOC | DMAR_PGF_WAITOK, sfp); ctxp += ctx->rid & 0xff; return (ctxp); } static void ctx_tag_init(struct dmar_ctx *ctx, device_t dev) { bus_addr_t maxaddr; maxaddr = MIN(ctx->domain->end, BUS_SPACE_MAXADDR); ctx->ctx_tag.common.ref_count = 1; /* Prevent free */ ctx->ctx_tag.common.impl = &bus_dma_dmar_impl; ctx->ctx_tag.common.boundary = PCI_DMA_BOUNDARY; ctx->ctx_tag.common.lowaddr = maxaddr; ctx->ctx_tag.common.highaddr = maxaddr; ctx->ctx_tag.common.maxsize = maxaddr; ctx->ctx_tag.common.nsegments = BUS_SPACE_UNRESTRICTED; ctx->ctx_tag.common.maxsegsz = maxaddr; ctx->ctx_tag.ctx = ctx; ctx->ctx_tag.owner = dev; } static void ctx_id_entry_init(struct dmar_ctx *ctx, dmar_ctx_entry_t *ctxp, bool move) { struct dmar_unit *unit; struct dmar_domain *domain; vm_page_t ctx_root; domain = ctx->domain; unit = domain->dmar; KASSERT(move || (ctxp->ctx1 == 0 && ctxp->ctx2 == 0), ("dmar%d: initialized ctx entry %d:%d:%d 0x%jx 0x%jx", unit->unit, pci_get_bus(ctx->ctx_tag.owner), pci_get_slot(ctx->ctx_tag.owner), pci_get_function(ctx->ctx_tag.owner), ctxp->ctx1, ctxp->ctx2)); /* * For update due to move, the store is not atomic. It is * possible that DMAR read upper doubleword, while low * doubleword is not yet updated. The domain id is stored in * the upper doubleword, while the table pointer in the lower. * * There is no good solution, for the same reason it is wrong * to clear P bit in the ctx entry for update. */ dmar_pte_store1(&ctxp->ctx2, DMAR_CTX2_DID(domain->domain) | domain->awlvl); if ((domain->flags & DMAR_DOMAIN_IDMAP) != 0 && (unit->hw_ecap & DMAR_ECAP_PT) != 0) { KASSERT(domain->pgtbl_obj == NULL, ("ctx %p non-null pgtbl_obj", ctx)); dmar_pte_store1(&ctxp->ctx1, DMAR_CTX1_T_PASS | DMAR_CTX1_P); } else { ctx_root = dmar_pgalloc(domain->pgtbl_obj, 0, DMAR_PGF_NOALLOC); dmar_pte_store1(&ctxp->ctx1, DMAR_CTX1_T_UNTR | (DMAR_CTX1_ASR_MASK & VM_PAGE_TO_PHYS(ctx_root)) | DMAR_CTX1_P); } dmar_flush_ctx_to_ram(unit, ctxp); } static int dmar_flush_for_ctx_entry(struct dmar_unit *dmar, bool force) { int error; /* * If dmar declares Caching Mode as Set, follow 11.5 "Caching * Mode Consideration" and do the (global) invalidation of the * negative TLB entries. */ if ((dmar->hw_cap & DMAR_CAP_CM) == 0 && !force) return (0); if (dmar->qi_enabled) { dmar_qi_invalidate_ctx_glob_locked(dmar); if ((dmar->hw_ecap & DMAR_ECAP_DI) != 0 || force) dmar_qi_invalidate_iotlb_glob_locked(dmar); return (0); } error = dmar_inv_ctx_glob(dmar); if (error == 0 && ((dmar->hw_ecap & DMAR_ECAP_DI) != 0 || force)) error = dmar_inv_iotlb_glob(dmar); return (error); } static int domain_init_rmrr(struct dmar_domain *domain, device_t dev) { struct dmar_map_entries_tailq rmrr_entries; struct dmar_map_entry *entry, *entry1; vm_page_t *ma; dmar_gaddr_t start, end; vm_pindex_t size, i; int error, error1; error = 0; TAILQ_INIT(&rmrr_entries); dmar_dev_parse_rmrr(domain, dev, &rmrr_entries); TAILQ_FOREACH_SAFE(entry, &rmrr_entries, unroll_link, entry1) { /* * VT-d specification requires that the start of an * RMRR entry is 4k-aligned. Buggy BIOSes put * anything into the start and end fields. Truncate * and round as neccesary. * * We also allow the overlapping RMRR entries, see * dmar_gas_alloc_region(). */ start = entry->start; end = entry->end; entry->start = trunc_page(start); entry->end = round_page(end); if (entry->start == entry->end) { /* Workaround for some AMI (?) BIOSes */ if (bootverbose) { device_printf(dev, "BIOS bug: dmar%d RMRR " "region (%jx, %jx) corrected\n", domain->dmar->unit, start, end); } entry->end += DMAR_PAGE_SIZE * 0x20; } size = OFF_TO_IDX(entry->end - entry->start); ma = malloc(sizeof(vm_page_t) * size, M_TEMP, M_WAITOK); for (i = 0; i < size; i++) { ma[i] = vm_page_getfake(entry->start + PAGE_SIZE * i, VM_MEMATTR_DEFAULT); } error1 = dmar_gas_map_region(domain, entry, DMAR_MAP_ENTRY_READ | DMAR_MAP_ENTRY_WRITE, DMAR_GM_CANWAIT, ma); /* * Non-failed RMRR entries are owned by context rb * tree. Get rid of the failed entry, but do not stop * the loop. Rest of the parsed RMRR entries are * loaded and removed on the context destruction. */ if (error1 == 0 && entry->end != entry->start) { DMAR_LOCK(domain->dmar); domain->refs++; /* XXXKIB prevent free */ domain->flags |= DMAR_DOMAIN_RMRR; DMAR_UNLOCK(domain->dmar); } else { if (error1 != 0) { device_printf(dev, "dmar%d failed to map RMRR region (%jx, %jx) %d\n", domain->dmar->unit, start, end, error1); error = error1; } TAILQ_REMOVE(&rmrr_entries, entry, unroll_link); dmar_gas_free_entry(domain, entry); } for (i = 0; i < size; i++) vm_page_putfake(ma[i]); free(ma, M_TEMP); } return (error); } static struct dmar_domain * dmar_domain_alloc(struct dmar_unit *dmar, bool id_mapped) { struct dmar_domain *domain; int error, id, mgaw; id = alloc_unr(dmar->domids); if (id == -1) return (NULL); domain = malloc(sizeof(*domain), M_DMAR_DOMAIN, M_WAITOK | M_ZERO); domain->domain = id; LIST_INIT(&domain->contexts); RB_INIT(&domain->rb_root); TAILQ_INIT(&domain->unload_entries); TASK_INIT(&domain->unload_task, 0, dmar_domain_unload_task, domain); mtx_init(&domain->lock, "dmardom", NULL, MTX_DEF); domain->dmar = dmar; /* * For now, use the maximal usable physical address of the * installed memory to calculate the mgaw on id_mapped domain. * It is useful for the identity mapping, and less so for the * virtualized bus address space. */ domain->end = id_mapped ? ptoa(Maxmem) : BUS_SPACE_MAXADDR; mgaw = dmar_maxaddr2mgaw(dmar, domain->end, !id_mapped); error = domain_set_agaw(domain, mgaw); if (error != 0) goto fail; if (!id_mapped) /* Use all supported address space for remapping. */ domain->end = 1ULL << (domain->agaw - 1); dmar_gas_init_domain(domain); if (id_mapped) { if ((dmar->hw_ecap & DMAR_ECAP_PT) == 0) { domain->pgtbl_obj = domain_get_idmap_pgtbl(domain, domain->end); } domain->flags |= DMAR_DOMAIN_IDMAP; } else { error = domain_alloc_pgtbl(domain); if (error != 0) goto fail; /* Disable local apic region access */ error = dmar_gas_reserve_region(domain, 0xfee00000, 0xfeefffff + 1); if (error != 0) goto fail; } return (domain); fail: dmar_domain_destroy(domain); return (NULL); } static struct dmar_ctx * dmar_ctx_alloc(struct dmar_domain *domain, uint16_t rid) { struct dmar_ctx *ctx; ctx = malloc(sizeof(*ctx), M_DMAR_CTX, M_WAITOK | M_ZERO); ctx->domain = domain; ctx->rid = rid; ctx->refs = 1; return (ctx); } static void dmar_ctx_link(struct dmar_ctx *ctx) { struct dmar_domain *domain; domain = ctx->domain; DMAR_ASSERT_LOCKED(domain->dmar); KASSERT(domain->refs >= domain->ctx_cnt, ("dom %p ref underflow %d %d", domain, domain->refs, domain->ctx_cnt)); domain->refs++; domain->ctx_cnt++; LIST_INSERT_HEAD(&domain->contexts, ctx, link); } static void dmar_ctx_unlink(struct dmar_ctx *ctx) { struct dmar_domain *domain; domain = ctx->domain; DMAR_ASSERT_LOCKED(domain->dmar); KASSERT(domain->refs > 0, ("domain %p ctx dtr refs %d", domain, domain->refs)); KASSERT(domain->ctx_cnt >= domain->refs, ("domain %p ctx dtr refs %d ctx_cnt %d", domain, domain->refs, domain->ctx_cnt)); domain->refs--; domain->ctx_cnt--; LIST_REMOVE(ctx, link); } static void dmar_domain_destroy(struct dmar_domain *domain) { KASSERT(TAILQ_EMPTY(&domain->unload_entries), ("unfinished unloads %p", domain)); KASSERT(LIST_EMPTY(&domain->contexts), ("destroying dom %p with contexts", domain)); KASSERT(domain->ctx_cnt == 0, ("destroying dom %p with ctx_cnt %d", domain, domain->ctx_cnt)); KASSERT(domain->refs == 0, ("destroying dom %p with refs %d", domain, domain->refs)); if ((domain->flags & DMAR_DOMAIN_GAS_INITED) != 0) { DMAR_DOMAIN_LOCK(domain); dmar_gas_fini_domain(domain); DMAR_DOMAIN_UNLOCK(domain); } if ((domain->flags & DMAR_DOMAIN_PGTBL_INITED) != 0) { if (domain->pgtbl_obj != NULL) DMAR_DOMAIN_PGLOCK(domain); domain_free_pgtbl(domain); } mtx_destroy(&domain->lock); free_unr(domain->dmar->domids, domain->domain); free(domain, M_DMAR_DOMAIN); } struct dmar_ctx * dmar_get_ctx_for_dev(struct dmar_unit *dmar, device_t dev, uint16_t rid, bool id_mapped, bool rmrr_init) { struct dmar_domain *domain, *domain1; struct dmar_ctx *ctx, *ctx1; dmar_ctx_entry_t *ctxp; struct sf_buf *sf; int bus, slot, func, error; bool enable; bus = pci_get_bus(dev); slot = pci_get_slot(dev); func = pci_get_function(dev); enable = false; TD_PREP_PINNED_ASSERT; DMAR_LOCK(dmar); ctx = dmar_find_ctx_locked(dmar, rid); error = 0; if (ctx == NULL) { /* * Perform the allocations which require sleep or have * higher chance to succeed if the sleep is allowed. */ DMAR_UNLOCK(dmar); dmar_ensure_ctx_page(dmar, PCI_RID2BUS(rid)); domain1 = dmar_domain_alloc(dmar, id_mapped); if (domain1 == NULL) { TD_PINNED_ASSERT; return (NULL); } if (!id_mapped) { error = domain_init_rmrr(domain1, dev); if (error != 0) { dmar_domain_destroy(domain1); TD_PINNED_ASSERT; return (NULL); } } ctx1 = dmar_ctx_alloc(domain1, rid); ctxp = dmar_map_ctx_entry(ctx1, &sf); DMAR_LOCK(dmar); /* * Recheck the contexts, other thread might have * already allocated needed one. */ ctx = dmar_find_ctx_locked(dmar, rid); if (ctx == NULL) { domain = domain1; ctx = ctx1; dmar_ctx_link(ctx); ctx->ctx_tag.owner = dev; ctx_tag_init(ctx, dev); /* * This is the first activated context for the * DMAR unit. Enable the translation after * everything is set up. */ if (LIST_EMPTY(&dmar->domains)) enable = true; LIST_INSERT_HEAD(&dmar->domains, domain, link); ctx_id_entry_init(ctx, ctxp, false); device_printf(dev, "dmar%d pci%d:%d:%d:%d rid %x domain %d mgaw %d " "agaw %d %s-mapped\n", dmar->unit, dmar->segment, bus, slot, func, rid, domain->domain, domain->mgaw, domain->agaw, id_mapped ? "id" : "re"); dmar_unmap_pgtbl(sf); } else { dmar_unmap_pgtbl(sf); dmar_domain_destroy(domain1); /* Nothing needs to be done to destroy ctx1. */ free(ctx1, M_DMAR_CTX); domain = ctx->domain; ctx->refs++; /* tag referenced us */ } } else { domain = ctx->domain; ctx->refs++; /* tag referenced us */ } error = dmar_flush_for_ctx_entry(dmar, enable); if (error != 0) { dmar_free_ctx_locked(dmar, ctx); TD_PINNED_ASSERT; return (NULL); } /* * The dmar lock was potentially dropped between check for the * empty context list and now. Recheck the state of GCMD_TE * to avoid unneeded command. */ if (enable && !rmrr_init && (dmar->hw_gcmd & DMAR_GCMD_TE) == 0) { error = dmar_enable_translation(dmar); if (error != 0) { dmar_free_ctx_locked(dmar, ctx); TD_PINNED_ASSERT; return (NULL); } } DMAR_UNLOCK(dmar); TD_PINNED_ASSERT; return (ctx); } int dmar_move_ctx_to_domain(struct dmar_domain *domain, struct dmar_ctx *ctx) { struct dmar_unit *dmar; struct dmar_domain *old_domain; dmar_ctx_entry_t *ctxp; struct sf_buf *sf; int error; dmar = domain->dmar; old_domain = ctx->domain; if (domain == old_domain) return (0); KASSERT(old_domain->dmar == dmar, ("domain %p %u moving between dmars %u %u", domain, domain->domain, old_domain->dmar->unit, domain->dmar->unit)); TD_PREP_PINNED_ASSERT; ctxp = dmar_map_ctx_entry(ctx, &sf); DMAR_LOCK(dmar); dmar_ctx_unlink(ctx); ctx->domain = domain; dmar_ctx_link(ctx); ctx_id_entry_init(ctx, ctxp, true); dmar_unmap_pgtbl(sf); error = dmar_flush_for_ctx_entry(dmar, true); /* If flush failed, rolling back would not work as well. */ printf("dmar%d rid %x domain %d->%d %s-mapped\n", dmar->unit, ctx->rid, old_domain->domain, domain->domain, (domain->flags & DMAR_DOMAIN_IDMAP) != 0 ? "id" : "re"); dmar_unref_domain_locked(dmar, old_domain); TD_PINNED_ASSERT; return (error); } static void dmar_unref_domain_locked(struct dmar_unit *dmar, struct dmar_domain *domain) { DMAR_ASSERT_LOCKED(dmar); KASSERT(domain->refs >= 1, ("dmar %d domain %p refs %u", dmar->unit, domain, domain->refs)); KASSERT(domain->refs > domain->ctx_cnt, ("dmar %d domain %p refs %d ctx_cnt %d", dmar->unit, domain, domain->refs, domain->ctx_cnt)); if (domain->refs > 1) { domain->refs--; DMAR_UNLOCK(dmar); return; } KASSERT((domain->flags & DMAR_DOMAIN_RMRR) == 0, ("lost ref on RMRR domain %p", domain)); LIST_REMOVE(domain, link); DMAR_UNLOCK(dmar); taskqueue_drain(dmar->delayed_taskqueue, &domain->unload_task); dmar_domain_destroy(domain); } void dmar_free_ctx_locked(struct dmar_unit *dmar, struct dmar_ctx *ctx) { struct sf_buf *sf; dmar_ctx_entry_t *ctxp; struct dmar_domain *domain; DMAR_ASSERT_LOCKED(dmar); KASSERT(ctx->refs >= 1, ("dmar %p ctx %p refs %u", dmar, ctx, ctx->refs)); /* * If our reference is not last, only the dereference should * be performed. */ if (ctx->refs > 1) { ctx->refs--; DMAR_UNLOCK(dmar); return; } KASSERT((ctx->flags & DMAR_CTX_DISABLED) == 0, ("lost ref on disabled ctx %p", ctx)); /* * Otherwise, the context entry must be cleared before the * page table is destroyed. The mapping of the context * entries page could require sleep, unlock the dmar. */ DMAR_UNLOCK(dmar); TD_PREP_PINNED_ASSERT; ctxp = dmar_map_ctx_entry(ctx, &sf); DMAR_LOCK(dmar); KASSERT(ctx->refs >= 1, ("dmar %p ctx %p refs %u", dmar, ctx, ctx->refs)); /* * Other thread might have referenced the context, in which * case again only the dereference should be performed. */ if (ctx->refs > 1) { ctx->refs--; DMAR_UNLOCK(dmar); dmar_unmap_pgtbl(sf); TD_PINNED_ASSERT; return; } KASSERT((ctx->flags & DMAR_CTX_DISABLED) == 0, ("lost ref on disabled ctx %p", ctx)); /* * Clear the context pointer and flush the caches. * XXXKIB: cannot do this if any RMRR entries are still present. */ dmar_pte_clear(&ctxp->ctx1); ctxp->ctx2 = 0; dmar_flush_ctx_to_ram(dmar, ctxp); dmar_inv_ctx_glob(dmar); if ((dmar->hw_ecap & DMAR_ECAP_DI) != 0) { if (dmar->qi_enabled) dmar_qi_invalidate_iotlb_glob_locked(dmar); else dmar_inv_iotlb_glob(dmar); } dmar_unmap_pgtbl(sf); domain = ctx->domain; dmar_ctx_unlink(ctx); free(ctx, M_DMAR_CTX); dmar_unref_domain_locked(dmar, domain); TD_PINNED_ASSERT; } void dmar_free_ctx(struct dmar_ctx *ctx) { struct dmar_unit *dmar; dmar = ctx->domain->dmar; DMAR_LOCK(dmar); dmar_free_ctx_locked(dmar, ctx); } /* * Returns with the domain locked. */ struct dmar_ctx * dmar_find_ctx_locked(struct dmar_unit *dmar, uint16_t rid) { struct dmar_domain *domain; struct dmar_ctx *ctx; DMAR_ASSERT_LOCKED(dmar); LIST_FOREACH(domain, &dmar->domains, link) { LIST_FOREACH(ctx, &domain->contexts, link) { if (ctx->rid == rid) return (ctx); } } return (NULL); } void dmar_domain_free_entry(struct dmar_map_entry *entry, bool free) { struct dmar_domain *domain; domain = entry->domain; DMAR_DOMAIN_LOCK(domain); if ((entry->flags & DMAR_MAP_ENTRY_RMRR) != 0) dmar_gas_free_region(domain, entry); else dmar_gas_free_space(domain, entry); DMAR_DOMAIN_UNLOCK(domain); if (free) dmar_gas_free_entry(domain, entry); else entry->flags = 0; } void dmar_domain_unload_entry(struct dmar_map_entry *entry, bool free) { struct dmar_unit *unit; unit = entry->domain->dmar; if (unit->qi_enabled) { DMAR_LOCK(unit); dmar_qi_invalidate_locked(entry->domain, entry->start, entry->end - entry->start, &entry->gseq, true); if (!free) entry->flags |= DMAR_MAP_ENTRY_QI_NF; TAILQ_INSERT_TAIL(&unit->tlb_flush_entries, entry, dmamap_link); DMAR_UNLOCK(unit); } else { domain_flush_iotlb_sync(entry->domain, entry->start, entry->end - entry->start); dmar_domain_free_entry(entry, free); } } static bool dmar_domain_unload_emit_wait(struct dmar_domain *domain, struct dmar_map_entry *entry) { if (TAILQ_NEXT(entry, dmamap_link) == NULL) return (true); return (domain->batch_no++ % dmar_batch_coalesce == 0); } void dmar_domain_unload(struct dmar_domain *domain, struct dmar_map_entries_tailq *entries, bool cansleep) { struct dmar_unit *unit; struct dmar_map_entry *entry, *entry1; int error; unit = domain->dmar; TAILQ_FOREACH_SAFE(entry, entries, dmamap_link, entry1) { KASSERT((entry->flags & DMAR_MAP_ENTRY_MAP) != 0, ("not mapped entry %p %p", domain, entry)); error = domain_unmap_buf(domain, entry->start, entry->end - entry->start, cansleep ? DMAR_PGF_WAITOK : 0); KASSERT(error == 0, ("unmap %p error %d", domain, error)); if (!unit->qi_enabled) { domain_flush_iotlb_sync(domain, entry->start, entry->end - entry->start); TAILQ_REMOVE(entries, entry, dmamap_link); dmar_domain_free_entry(entry, true); } } if (TAILQ_EMPTY(entries)) return; KASSERT(unit->qi_enabled, ("loaded entry left")); DMAR_LOCK(unit); TAILQ_FOREACH(entry, entries, dmamap_link) { dmar_qi_invalidate_locked(domain, entry->start, entry->end - entry->start, &entry->gseq, dmar_domain_unload_emit_wait(domain, entry)); } TAILQ_CONCAT(&unit->tlb_flush_entries, entries, dmamap_link); DMAR_UNLOCK(unit); } static void dmar_domain_unload_task(void *arg, int pending) { struct dmar_domain *domain; struct dmar_map_entries_tailq entries; domain = arg; TAILQ_INIT(&entries); for (;;) { DMAR_DOMAIN_LOCK(domain); TAILQ_SWAP(&domain->unload_entries, &entries, dmar_map_entry, dmamap_link); DMAR_DOMAIN_UNLOCK(domain); if (TAILQ_EMPTY(&entries)) break; dmar_domain_unload(domain, &entries, true); } } Index: head/sys/x86/iommu/intel_dmar.h =================================================================== --- head/sys/x86/iommu/intel_dmar.h (revision 326262) +++ head/sys/x86/iommu/intel_dmar.h (revision 326263) @@ -1,555 +1,557 @@ /*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * * Copyright (c) 2013-2015 The FreeBSD Foundation * All rights reserved. * * This software was developed by Konstantin Belousov * under sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef __X86_IOMMU_INTEL_DMAR_H #define __X86_IOMMU_INTEL_DMAR_H /* Host or physical memory address, after translation. */ typedef uint64_t dmar_haddr_t; /* Guest or bus address, before translation. */ typedef uint64_t dmar_gaddr_t; struct dmar_qi_genseq { u_int gen; uint32_t seq; }; struct dmar_map_entry { dmar_gaddr_t start; dmar_gaddr_t end; dmar_gaddr_t free_after; /* Free space after the entry */ dmar_gaddr_t free_down; /* Max free space below the current R/B tree node */ u_int flags; TAILQ_ENTRY(dmar_map_entry) dmamap_link; /* Link for dmamap entries */ RB_ENTRY(dmar_map_entry) rb_entry; /* Links for domain entries */ TAILQ_ENTRY(dmar_map_entry) unroll_link; /* Link for unroll after dmamap_load failure */ struct dmar_domain *domain; struct dmar_qi_genseq gseq; }; RB_HEAD(dmar_gas_entries_tree, dmar_map_entry); RB_PROTOTYPE(dmar_gas_entries_tree, dmar_map_entry, rb_entry, dmar_gas_cmp_entries); #define DMAR_MAP_ENTRY_PLACE 0x0001 /* Fake entry */ #define DMAR_MAP_ENTRY_RMRR 0x0002 /* Permanent, not linked by dmamap_link */ #define DMAR_MAP_ENTRY_MAP 0x0004 /* Busdma created, linked by dmamap_link */ #define DMAR_MAP_ENTRY_UNMAPPED 0x0010 /* No backing pages */ #define DMAR_MAP_ENTRY_QI_NF 0x0020 /* qi task, do not free entry */ #define DMAR_MAP_ENTRY_READ 0x1000 /* Read permitted */ #define DMAR_MAP_ENTRY_WRITE 0x2000 /* Write permitted */ #define DMAR_MAP_ENTRY_SNOOP 0x4000 /* Snoop */ #define DMAR_MAP_ENTRY_TM 0x8000 /* Transient */ /* * Locking annotations: * (u) - Protected by dmar unit lock * (d) - Protected by domain lock * (c) - Immutable after initialization */ /* * The domain abstraction. Most non-constant members of the domain * are protected by owning dmar unit lock, not by the domain lock. * Most important, the dmar lock protects the contexts list. * * The domain lock protects the address map for the domain, and list * of unload entries delayed. * * Page tables pages and pages content is protected by the vm object * lock pgtbl_obj, which contains the page tables pages. */ struct dmar_domain { int domain; /* (c) DID, written in context entry */ int mgaw; /* (c) Real max address width */ int agaw; /* (c) Adjusted guest address width */ int pglvl; /* (c) The pagelevel */ int awlvl; /* (c) The pagelevel as the bitmask, to set in context entry */ dmar_gaddr_t end; /* (c) Highest address + 1 in the guest AS */ u_int ctx_cnt; /* (u) Number of contexts owned */ u_int refs; /* (u) Refs, including ctx */ struct dmar_unit *dmar; /* (c) */ struct mtx lock; /* (c) */ LIST_ENTRY(dmar_domain) link; /* (u) Member in the dmar list */ LIST_HEAD(, dmar_ctx) contexts; /* (u) */ vm_object_t pgtbl_obj; /* (c) Page table pages */ u_int flags; /* (u) */ u_int entries_cnt; /* (d) */ struct dmar_gas_entries_tree rb_root; /* (d) */ struct dmar_map_entries_tailq unload_entries; /* (d) Entries to unload */ struct dmar_map_entry *first_place, *last_place; /* (d) */ struct task unload_task; /* (c) */ u_int batch_no; }; struct dmar_ctx { struct bus_dma_tag_dmar ctx_tag; /* (c) Root tag */ uint16_t rid; /* (c) pci RID */ uint64_t last_fault_rec[2]; /* Last fault reported */ struct dmar_domain *domain; /* (c) */ LIST_ENTRY(dmar_ctx) link; /* (u) Member in the domain list */ u_int refs; /* (u) References from tags */ u_int flags; /* (u) */ u_long loads; /* atomic updates, for stat only */ u_long unloads; /* same */ }; #define DMAR_DOMAIN_GAS_INITED 0x0001 #define DMAR_DOMAIN_PGTBL_INITED 0x0002 #define DMAR_DOMAIN_IDMAP 0x0010 /* Domain uses identity page table */ #define DMAR_DOMAIN_RMRR 0x0020 /* Domain contains RMRR entry, cannot be turned off */ /* struct dmar_ctx flags */ #define DMAR_CTX_FAULTED 0x0001 /* Fault was reported, last_fault_rec is valid */ #define DMAR_CTX_DISABLED 0x0002 /* Device is disabled, the ephemeral reference is kept to prevent context destruction */ #define DMAR_DOMAIN_PGLOCK(dom) VM_OBJECT_WLOCK((dom)->pgtbl_obj) #define DMAR_DOMAIN_PGTRYLOCK(dom) VM_OBJECT_TRYWLOCK((dom)->pgtbl_obj) #define DMAR_DOMAIN_PGUNLOCK(dom) VM_OBJECT_WUNLOCK((dom)->pgtbl_obj) #define DMAR_DOMAIN_ASSERT_PGLOCKED(dom) \ VM_OBJECT_ASSERT_WLOCKED((dom)->pgtbl_obj) #define DMAR_DOMAIN_LOCK(dom) mtx_lock(&(dom)->lock) #define DMAR_DOMAIN_UNLOCK(dom) mtx_unlock(&(dom)->lock) #define DMAR_DOMAIN_ASSERT_LOCKED(dom) mtx_assert(&(dom)->lock, MA_OWNED) struct dmar_msi_data { int irq; int irq_rid; struct resource *irq_res; void *intr_handle; int (*handler)(void *); int msi_data_reg; int msi_addr_reg; int msi_uaddr_reg; void (*enable_intr)(struct dmar_unit *); void (*disable_intr)(struct dmar_unit *); const char *name; }; #define DMAR_INTR_FAULT 0 #define DMAR_INTR_QI 1 #define DMAR_INTR_TOTAL 2 struct dmar_unit { device_t dev; int unit; uint16_t segment; uint64_t base; /* Resources */ int reg_rid; struct resource *regs; struct dmar_msi_data intrs[DMAR_INTR_TOTAL]; /* Hardware registers cache */ uint32_t hw_ver; uint64_t hw_cap; uint64_t hw_ecap; uint32_t hw_gcmd; /* Data for being a dmar */ struct mtx lock; LIST_HEAD(, dmar_domain) domains; struct unrhdr *domids; vm_object_t ctx_obj; u_int barrier_flags; /* Fault handler data */ struct mtx fault_lock; uint64_t *fault_log; int fault_log_head; int fault_log_tail; int fault_log_size; struct task fault_task; struct taskqueue *fault_taskqueue; /* QI */ int qi_enabled; vm_offset_t inv_queue; vm_size_t inv_queue_size; uint32_t inv_queue_avail; uint32_t inv_queue_tail; volatile uint32_t inv_waitd_seq_hw; /* hw writes there on wait descr completion */ uint64_t inv_waitd_seq_hw_phys; uint32_t inv_waitd_seq; /* next sequence number to use for wait descr */ u_int inv_waitd_gen; /* seq number generation AKA seq overflows */ u_int inv_seq_waiters; /* count of waiters for seq */ u_int inv_queue_full; /* informational counter */ /* IR */ int ir_enabled; vm_paddr_t irt_phys; dmar_irte_t *irt; u_int irte_cnt; vmem_t *irtids; /* Delayed freeing of map entries queue processing */ struct dmar_map_entries_tailq tlb_flush_entries; struct task qi_task; struct taskqueue *qi_taskqueue; /* Busdma delayed map load */ struct task dmamap_load_task; TAILQ_HEAD(, bus_dmamap_dmar) delayed_maps; struct taskqueue *delayed_taskqueue; int dma_enabled; }; #define DMAR_LOCK(dmar) mtx_lock(&(dmar)->lock) #define DMAR_UNLOCK(dmar) mtx_unlock(&(dmar)->lock) #define DMAR_ASSERT_LOCKED(dmar) mtx_assert(&(dmar)->lock, MA_OWNED) #define DMAR_FAULT_LOCK(dmar) mtx_lock_spin(&(dmar)->fault_lock) #define DMAR_FAULT_UNLOCK(dmar) mtx_unlock_spin(&(dmar)->fault_lock) #define DMAR_FAULT_ASSERT_LOCKED(dmar) mtx_assert(&(dmar)->fault_lock, MA_OWNED) #define DMAR_IS_COHERENT(dmar) (((dmar)->hw_ecap & DMAR_ECAP_C) != 0) #define DMAR_HAS_QI(dmar) (((dmar)->hw_ecap & DMAR_ECAP_QI) != 0) #define DMAR_X2APIC(dmar) \ (x2apic_mode && ((dmar)->hw_ecap & DMAR_ECAP_EIM) != 0) /* Barrier ids */ #define DMAR_BARRIER_RMRR 0 #define DMAR_BARRIER_USEQ 1 struct dmar_unit *dmar_find(device_t dev); struct dmar_unit *dmar_find_hpet(device_t dev, uint16_t *rid); struct dmar_unit *dmar_find_ioapic(u_int apic_id, uint16_t *rid); u_int dmar_nd2mask(u_int nd); bool dmar_pglvl_supported(struct dmar_unit *unit, int pglvl); int domain_set_agaw(struct dmar_domain *domain, int mgaw); int dmar_maxaddr2mgaw(struct dmar_unit *unit, dmar_gaddr_t maxaddr, bool allow_less); vm_pindex_t pglvl_max_pages(int pglvl); int domain_is_sp_lvl(struct dmar_domain *domain, int lvl); dmar_gaddr_t pglvl_page_size(int total_pglvl, int lvl); dmar_gaddr_t domain_page_size(struct dmar_domain *domain, int lvl); int calc_am(struct dmar_unit *unit, dmar_gaddr_t base, dmar_gaddr_t size, dmar_gaddr_t *isizep); struct vm_page *dmar_pgalloc(vm_object_t obj, vm_pindex_t idx, int flags); void dmar_pgfree(vm_object_t obj, vm_pindex_t idx, int flags); void *dmar_map_pgtbl(vm_object_t obj, vm_pindex_t idx, int flags, struct sf_buf **sf); void dmar_unmap_pgtbl(struct sf_buf *sf); int dmar_load_root_entry_ptr(struct dmar_unit *unit); int dmar_inv_ctx_glob(struct dmar_unit *unit); int dmar_inv_iotlb_glob(struct dmar_unit *unit); int dmar_flush_write_bufs(struct dmar_unit *unit); void dmar_flush_pte_to_ram(struct dmar_unit *unit, dmar_pte_t *dst); void dmar_flush_ctx_to_ram(struct dmar_unit *unit, dmar_ctx_entry_t *dst); void dmar_flush_root_to_ram(struct dmar_unit *unit, dmar_root_entry_t *dst); int dmar_enable_translation(struct dmar_unit *unit); int dmar_disable_translation(struct dmar_unit *unit); int dmar_load_irt_ptr(struct dmar_unit *unit); int dmar_enable_ir(struct dmar_unit *unit); int dmar_disable_ir(struct dmar_unit *unit); bool dmar_barrier_enter(struct dmar_unit *dmar, u_int barrier_id); void dmar_barrier_exit(struct dmar_unit *dmar, u_int barrier_id); uint64_t dmar_get_timeout(void); void dmar_update_timeout(uint64_t newval); int dmar_fault_intr(void *arg); void dmar_enable_fault_intr(struct dmar_unit *unit); void dmar_disable_fault_intr(struct dmar_unit *unit); int dmar_init_fault_log(struct dmar_unit *unit); void dmar_fini_fault_log(struct dmar_unit *unit); int dmar_qi_intr(void *arg); void dmar_enable_qi_intr(struct dmar_unit *unit); void dmar_disable_qi_intr(struct dmar_unit *unit); int dmar_init_qi(struct dmar_unit *unit); void dmar_fini_qi(struct dmar_unit *unit); void dmar_qi_invalidate_locked(struct dmar_domain *domain, dmar_gaddr_t start, dmar_gaddr_t size, struct dmar_qi_genseq *psec, bool emit_wait); void dmar_qi_invalidate_ctx_glob_locked(struct dmar_unit *unit); void dmar_qi_invalidate_iotlb_glob_locked(struct dmar_unit *unit); void dmar_qi_invalidate_iec_glob(struct dmar_unit *unit); void dmar_qi_invalidate_iec(struct dmar_unit *unit, u_int start, u_int cnt); vm_object_t domain_get_idmap_pgtbl(struct dmar_domain *domain, dmar_gaddr_t maxaddr); void put_idmap_pgtbl(vm_object_t obj); int domain_map_buf(struct dmar_domain *domain, dmar_gaddr_t base, dmar_gaddr_t size, vm_page_t *ma, uint64_t pflags, int flags); int domain_unmap_buf(struct dmar_domain *domain, dmar_gaddr_t base, dmar_gaddr_t size, int flags); void domain_flush_iotlb_sync(struct dmar_domain *domain, dmar_gaddr_t base, dmar_gaddr_t size); int domain_alloc_pgtbl(struct dmar_domain *domain); void domain_free_pgtbl(struct dmar_domain *domain); struct dmar_ctx *dmar_instantiate_ctx(struct dmar_unit *dmar, device_t dev, bool rmrr); struct dmar_ctx *dmar_get_ctx_for_dev(struct dmar_unit *dmar, device_t dev, uint16_t rid, bool id_mapped, bool rmrr_init); int dmar_move_ctx_to_domain(struct dmar_domain *domain, struct dmar_ctx *ctx); void dmar_free_ctx_locked(struct dmar_unit *dmar, struct dmar_ctx *ctx); void dmar_free_ctx(struct dmar_ctx *ctx); struct dmar_ctx *dmar_find_ctx_locked(struct dmar_unit *dmar, uint16_t rid); void dmar_domain_unload_entry(struct dmar_map_entry *entry, bool free); void dmar_domain_unload(struct dmar_domain *domain, struct dmar_map_entries_tailq *entries, bool cansleep); void dmar_domain_free_entry(struct dmar_map_entry *entry, bool free); int dmar_init_busdma(struct dmar_unit *unit); void dmar_fini_busdma(struct dmar_unit *unit); device_t dmar_get_requester(device_t dev, uint16_t *rid); void dmar_gas_init_domain(struct dmar_domain *domain); void dmar_gas_fini_domain(struct dmar_domain *domain); struct dmar_map_entry *dmar_gas_alloc_entry(struct dmar_domain *domain, u_int flags); void dmar_gas_free_entry(struct dmar_domain *domain, struct dmar_map_entry *entry); void dmar_gas_free_space(struct dmar_domain *domain, struct dmar_map_entry *entry); int dmar_gas_map(struct dmar_domain *domain, const struct bus_dma_tag_common *common, dmar_gaddr_t size, int offset, u_int eflags, u_int flags, vm_page_t *ma, struct dmar_map_entry **res); void dmar_gas_free_region(struct dmar_domain *domain, struct dmar_map_entry *entry); int dmar_gas_map_region(struct dmar_domain *domain, struct dmar_map_entry *entry, u_int eflags, u_int flags, vm_page_t *ma); int dmar_gas_reserve_region(struct dmar_domain *domain, dmar_gaddr_t start, dmar_gaddr_t end); void dmar_dev_parse_rmrr(struct dmar_domain *domain, device_t dev, struct dmar_map_entries_tailq *rmrr_entries); int dmar_instantiate_rmrr_ctxs(struct dmar_unit *dmar); void dmar_quirks_post_ident(struct dmar_unit *dmar); void dmar_quirks_pre_use(struct dmar_unit *dmar); int dmar_init_irt(struct dmar_unit *unit); void dmar_fini_irt(struct dmar_unit *unit); #define DMAR_GM_CANWAIT 0x0001 #define DMAR_GM_CANSPLIT 0x0002 #define DMAR_PGF_WAITOK 0x0001 #define DMAR_PGF_ZERO 0x0002 #define DMAR_PGF_ALLOC 0x0004 #define DMAR_PGF_NOALLOC 0x0008 #define DMAR_PGF_OBJL 0x0010 extern dmar_haddr_t dmar_high; extern int haw; extern int dmar_tbl_pagecnt; extern int dmar_match_verbose; extern int dmar_batch_coalesce; extern int dmar_check_free; static inline uint32_t dmar_read4(const struct dmar_unit *unit, int reg) { return (bus_read_4(unit->regs, reg)); } static inline uint64_t dmar_read8(const struct dmar_unit *unit, int reg) { #ifdef __i386__ uint32_t high, low; low = bus_read_4(unit->regs, reg); high = bus_read_4(unit->regs, reg + 4); return (low | ((uint64_t)high << 32)); #else return (bus_read_8(unit->regs, reg)); #endif } static inline void dmar_write4(const struct dmar_unit *unit, int reg, uint32_t val) { KASSERT(reg != DMAR_GCMD_REG || (val & DMAR_GCMD_TE) == (unit->hw_gcmd & DMAR_GCMD_TE), ("dmar%d clearing TE 0x%08x 0x%08x", unit->unit, unit->hw_gcmd, val)); bus_write_4(unit->regs, reg, val); } static inline void dmar_write8(const struct dmar_unit *unit, int reg, uint64_t val) { KASSERT(reg != DMAR_GCMD_REG, ("8byte GCMD write")); #ifdef __i386__ uint32_t high, low; low = val; high = val >> 32; bus_write_4(unit->regs, reg, low); bus_write_4(unit->regs, reg + 4, high); #else bus_write_8(unit->regs, reg, val); #endif } /* * dmar_pte_store and dmar_pte_clear ensure that on i386, 32bit writes * are issued in the correct order. For store, the lower word, * containing the P or R and W bits, is set only after the high word * is written. For clear, the P bit is cleared first, then the high * word is cleared. * * dmar_pte_update updates the pte. For amd64, the update is atomic. * For i386, it first disables the entry by clearing the word * containing the P bit, and then defer to dmar_pte_store. The locked * cmpxchg8b is probably available on any machine having DMAR support, * but interrupt translation table may be mapped uncached. */ static inline void dmar_pte_store1(volatile uint64_t *dst, uint64_t val) { #ifdef __i386__ volatile uint32_t *p; uint32_t hi, lo; hi = val >> 32; lo = val; p = (volatile uint32_t *)dst; *(p + 1) = hi; *p = lo; #else *dst = val; #endif } static inline void dmar_pte_store(volatile uint64_t *dst, uint64_t val) { KASSERT(*dst == 0, ("used pte %p oldval %jx newval %jx", dst, (uintmax_t)*dst, (uintmax_t)val)); dmar_pte_store1(dst, val); } static inline void dmar_pte_update(volatile uint64_t *dst, uint64_t val) { #ifdef __i386__ volatile uint32_t *p; p = (volatile uint32_t *)dst; *p = 0; #endif dmar_pte_store1(dst, val); } static inline void dmar_pte_clear(volatile uint64_t *dst) { #ifdef __i386__ volatile uint32_t *p; p = (volatile uint32_t *)dst; *p = 0; *(p + 1) = 0; #else *dst = 0; #endif } static inline bool dmar_test_boundary(dmar_gaddr_t start, dmar_gaddr_t size, dmar_gaddr_t boundary) { if (boundary == 0) return (true); return (start + size <= ((start + boundary) & ~(boundary - 1))); } extern struct timespec dmar_hw_timeout; #define DMAR_WAIT_UNTIL(cond) \ { \ struct timespec last, curr; \ bool forever; \ \ if (dmar_hw_timeout.tv_sec == 0 && \ dmar_hw_timeout.tv_nsec == 0) { \ forever = true; \ } else { \ forever = false; \ nanouptime(&curr); \ last = curr; \ timespecadd(&last, &dmar_hw_timeout); \ } \ for (;;) { \ if (cond) { \ error = 0; \ break; \ } \ nanouptime(&curr); \ if (!forever && timespeccmp(&last, &curr, <)) { \ error = ETIMEDOUT; \ break; \ } \ cpu_spinwait(); \ } \ } #ifdef INVARIANTS #define TD_PREP_PINNED_ASSERT \ int old_td_pinned; \ old_td_pinned = curthread->td_pinned #define TD_PINNED_ASSERT \ KASSERT(curthread->td_pinned == old_td_pinned, \ ("pin count leak: %d %d %s:%d", curthread->td_pinned, \ old_td_pinned, __FILE__, __LINE__)) #else #define TD_PREP_PINNED_ASSERT #define TD_PINNED_ASSERT #endif #endif Index: head/sys/x86/iommu/intel_drv.c =================================================================== --- head/sys/x86/iommu/intel_drv.c (revision 326262) +++ head/sys/x86/iommu/intel_drv.c (revision 326263) @@ -1,1311 +1,1313 @@ /*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * * Copyright (c) 2013-2015 The FreeBSD Foundation * All rights reserved. * * This software was developed by Konstantin Belousov * under sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_acpi.h" #if defined(__amd64__) #define DEV_APIC #else #include "opt_apic.h" #endif #include "opt_ddb.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef DEV_APIC #include "pcib_if.h" #include #include #include #endif #define DMAR_FAULT_IRQ_RID 0 #define DMAR_QI_IRQ_RID 1 #define DMAR_REG_RID 2 static devclass_t dmar_devclass; static device_t *dmar_devs; static int dmar_devcnt; typedef int (*dmar_iter_t)(ACPI_DMAR_HEADER *, void *); static void dmar_iterate_tbl(dmar_iter_t iter, void *arg) { ACPI_TABLE_DMAR *dmartbl; ACPI_DMAR_HEADER *dmarh; char *ptr, *ptrend; ACPI_STATUS status; status = AcpiGetTable(ACPI_SIG_DMAR, 1, (ACPI_TABLE_HEADER **)&dmartbl); if (ACPI_FAILURE(status)) return; ptr = (char *)dmartbl + sizeof(*dmartbl); ptrend = (char *)dmartbl + dmartbl->Header.Length; for (;;) { if (ptr >= ptrend) break; dmarh = (ACPI_DMAR_HEADER *)ptr; if (dmarh->Length <= 0) { printf("dmar_identify: corrupted DMAR table, l %d\n", dmarh->Length); break; } ptr += dmarh->Length; if (!iter(dmarh, arg)) break; } AcpiPutTable((ACPI_TABLE_HEADER *)dmartbl); } struct find_iter_args { int i; ACPI_DMAR_HARDWARE_UNIT *res; }; static int dmar_find_iter(ACPI_DMAR_HEADER *dmarh, void *arg) { struct find_iter_args *fia; if (dmarh->Type != ACPI_DMAR_TYPE_HARDWARE_UNIT) return (1); fia = arg; if (fia->i == 0) { fia->res = (ACPI_DMAR_HARDWARE_UNIT *)dmarh; return (0); } fia->i--; return (1); } static ACPI_DMAR_HARDWARE_UNIT * dmar_find_by_index(int idx) { struct find_iter_args fia; fia.i = idx; fia.res = NULL; dmar_iterate_tbl(dmar_find_iter, &fia); return (fia.res); } static int dmar_count_iter(ACPI_DMAR_HEADER *dmarh, void *arg) { if (dmarh->Type == ACPI_DMAR_TYPE_HARDWARE_UNIT) dmar_devcnt++; return (1); } static int dmar_enable = 0; static void dmar_identify(driver_t *driver, device_t parent) { ACPI_TABLE_DMAR *dmartbl; ACPI_DMAR_HARDWARE_UNIT *dmarh; ACPI_STATUS status; int i, error; if (acpi_disabled("dmar")) return; TUNABLE_INT_FETCH("hw.dmar.enable", &dmar_enable); if (!dmar_enable) return; #ifdef INVARIANTS TUNABLE_INT_FETCH("hw.dmar.check_free", &dmar_check_free); #endif TUNABLE_INT_FETCH("hw.dmar.match_verbose", &dmar_match_verbose); status = AcpiGetTable(ACPI_SIG_DMAR, 1, (ACPI_TABLE_HEADER **)&dmartbl); if (ACPI_FAILURE(status)) return; haw = dmartbl->Width + 1; if ((1ULL << (haw + 1)) > BUS_SPACE_MAXADDR) dmar_high = BUS_SPACE_MAXADDR; else dmar_high = 1ULL << (haw + 1); if (bootverbose) { printf("DMAR HAW=%d flags=<%b>\n", dmartbl->Width, (unsigned)dmartbl->Flags, "\020\001INTR_REMAP\002X2APIC_OPT_OUT"); } AcpiPutTable((ACPI_TABLE_HEADER *)dmartbl); dmar_iterate_tbl(dmar_count_iter, NULL); if (dmar_devcnt == 0) return; dmar_devs = malloc(sizeof(device_t) * dmar_devcnt, M_DEVBUF, M_WAITOK | M_ZERO); for (i = 0; i < dmar_devcnt; i++) { dmarh = dmar_find_by_index(i); if (dmarh == NULL) { printf("dmar_identify: cannot find HWUNIT %d\n", i); continue; } dmar_devs[i] = BUS_ADD_CHILD(parent, 1, "dmar", i); if (dmar_devs[i] == NULL) { printf("dmar_identify: cannot create instance %d\n", i); continue; } error = bus_set_resource(dmar_devs[i], SYS_RES_MEMORY, DMAR_REG_RID, dmarh->Address, PAGE_SIZE); if (error != 0) { printf( "dmar%d: unable to alloc register window at 0x%08jx: error %d\n", i, (uintmax_t)dmarh->Address, error); device_delete_child(parent, dmar_devs[i]); dmar_devs[i] = NULL; } } } static int dmar_probe(device_t dev) { if (acpi_get_handle(dev) != NULL) return (ENXIO); device_set_desc(dev, "DMA remap"); return (BUS_PROBE_NOWILDCARD); } static void dmar_release_intr(device_t dev, struct dmar_unit *unit, int idx) { struct dmar_msi_data *dmd; dmd = &unit->intrs[idx]; if (dmd->irq == -1) return; bus_teardown_intr(dev, dmd->irq_res, dmd->intr_handle); bus_release_resource(dev, SYS_RES_IRQ, dmd->irq_rid, dmd->irq_res); bus_delete_resource(dev, SYS_RES_IRQ, dmd->irq_rid); PCIB_RELEASE_MSIX(device_get_parent(device_get_parent(dev)), dev, dmd->irq); dmd->irq = -1; } static void dmar_release_resources(device_t dev, struct dmar_unit *unit) { int i; dmar_fini_busdma(unit); dmar_fini_irt(unit); dmar_fini_qi(unit); dmar_fini_fault_log(unit); for (i = 0; i < DMAR_INTR_TOTAL; i++) dmar_release_intr(dev, unit, i); if (unit->regs != NULL) { bus_deactivate_resource(dev, SYS_RES_MEMORY, unit->reg_rid, unit->regs); bus_release_resource(dev, SYS_RES_MEMORY, unit->reg_rid, unit->regs); unit->regs = NULL; } if (unit->domids != NULL) { delete_unrhdr(unit->domids); unit->domids = NULL; } if (unit->ctx_obj != NULL) { vm_object_deallocate(unit->ctx_obj); unit->ctx_obj = NULL; } } static int dmar_alloc_irq(device_t dev, struct dmar_unit *unit, int idx) { device_t pcib; struct dmar_msi_data *dmd; uint64_t msi_addr; uint32_t msi_data; int error; dmd = &unit->intrs[idx]; pcib = device_get_parent(device_get_parent(dev)); /* Really not pcib */ error = PCIB_ALLOC_MSIX(pcib, dev, &dmd->irq); if (error != 0) { device_printf(dev, "cannot allocate %s interrupt, %d\n", dmd->name, error); goto err1; } error = bus_set_resource(dev, SYS_RES_IRQ, dmd->irq_rid, dmd->irq, 1); if (error != 0) { device_printf(dev, "cannot set %s interrupt resource, %d\n", dmd->name, error); goto err2; } dmd->irq_res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &dmd->irq_rid, RF_ACTIVE); if (dmd->irq_res == NULL) { device_printf(dev, "cannot allocate resource for %s interrupt\n", dmd->name); error = ENXIO; goto err3; } error = bus_setup_intr(dev, dmd->irq_res, INTR_TYPE_MISC, dmd->handler, NULL, unit, &dmd->intr_handle); if (error != 0) { device_printf(dev, "cannot setup %s interrupt, %d\n", dmd->name, error); goto err4; } bus_describe_intr(dev, dmd->irq_res, dmd->intr_handle, "%s", dmd->name); error = PCIB_MAP_MSI(pcib, dev, dmd->irq, &msi_addr, &msi_data); if (error != 0) { device_printf(dev, "cannot map %s interrupt, %d\n", dmd->name, error); goto err5; } dmar_write4(unit, dmd->msi_data_reg, msi_data); dmar_write4(unit, dmd->msi_addr_reg, msi_addr); /* Only for xAPIC mode */ dmar_write4(unit, dmd->msi_uaddr_reg, msi_addr >> 32); return (0); err5: bus_teardown_intr(dev, dmd->irq_res, dmd->intr_handle); err4: bus_release_resource(dev, SYS_RES_IRQ, dmd->irq_rid, dmd->irq_res); err3: bus_delete_resource(dev, SYS_RES_IRQ, dmd->irq_rid); err2: PCIB_RELEASE_MSIX(pcib, dev, dmd->irq); dmd->irq = -1; err1: return (error); } #ifdef DEV_APIC static int dmar_remap_intr(device_t dev, device_t child, u_int irq) { struct dmar_unit *unit; struct dmar_msi_data *dmd; uint64_t msi_addr; uint32_t msi_data; int i, error; unit = device_get_softc(dev); for (i = 0; i < DMAR_INTR_TOTAL; i++) { dmd = &unit->intrs[i]; if (irq == dmd->irq) { error = PCIB_MAP_MSI(device_get_parent( device_get_parent(dev)), dev, irq, &msi_addr, &msi_data); if (error != 0) return (error); DMAR_LOCK(unit); (dmd->disable_intr)(unit); dmar_write4(unit, dmd->msi_data_reg, msi_data); dmar_write4(unit, dmd->msi_addr_reg, msi_addr); dmar_write4(unit, dmd->msi_uaddr_reg, msi_addr >> 32); (dmd->enable_intr)(unit); DMAR_UNLOCK(unit); return (0); } } return (ENOENT); } #endif static void dmar_print_caps(device_t dev, struct dmar_unit *unit, ACPI_DMAR_HARDWARE_UNIT *dmaru) { uint32_t caphi, ecaphi; device_printf(dev, "regs@0x%08jx, ver=%d.%d, seg=%d, flags=<%b>\n", (uintmax_t)dmaru->Address, DMAR_MAJOR_VER(unit->hw_ver), DMAR_MINOR_VER(unit->hw_ver), dmaru->Segment, dmaru->Flags, "\020\001INCLUDE_ALL_PCI"); caphi = unit->hw_cap >> 32; device_printf(dev, "cap=%b,", (u_int)unit->hw_cap, "\020\004AFL\005WBF\006PLMR\007PHMR\010CM\027ZLR\030ISOCH"); printf("%b, ", caphi, "\020\010PSI\027DWD\030DRD\031FL1GP\034PSI"); printf("ndoms=%d, sagaw=%d, mgaw=%d, fro=%d, nfr=%d, superp=%d", DMAR_CAP_ND(unit->hw_cap), DMAR_CAP_SAGAW(unit->hw_cap), DMAR_CAP_MGAW(unit->hw_cap), DMAR_CAP_FRO(unit->hw_cap), DMAR_CAP_NFR(unit->hw_cap), DMAR_CAP_SPS(unit->hw_cap)); if ((unit->hw_cap & DMAR_CAP_PSI) != 0) printf(", mamv=%d", DMAR_CAP_MAMV(unit->hw_cap)); printf("\n"); ecaphi = unit->hw_ecap >> 32; device_printf(dev, "ecap=%b,", (u_int)unit->hw_ecap, "\020\001C\002QI\003DI\004IR\005EIM\007PT\010SC\031ECS\032MTS" "\033NEST\034DIS\035PASID\036PRS\037ERS\040SRS"); printf("%b, ", ecaphi, "\020\002NWFS\003EAFS"); printf("mhmw=%d, iro=%d\n", DMAR_ECAP_MHMV(unit->hw_ecap), DMAR_ECAP_IRO(unit->hw_ecap)); } static int dmar_attach(device_t dev) { struct dmar_unit *unit; ACPI_DMAR_HARDWARE_UNIT *dmaru; uint64_t timeout; int i, error; unit = device_get_softc(dev); unit->dev = dev; unit->unit = device_get_unit(dev); dmaru = dmar_find_by_index(unit->unit); if (dmaru == NULL) return (EINVAL); unit->segment = dmaru->Segment; unit->base = dmaru->Address; unit->reg_rid = DMAR_REG_RID; unit->regs = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &unit->reg_rid, RF_ACTIVE); if (unit->regs == NULL) { device_printf(dev, "cannot allocate register window\n"); return (ENOMEM); } unit->hw_ver = dmar_read4(unit, DMAR_VER_REG); unit->hw_cap = dmar_read8(unit, DMAR_CAP_REG); unit->hw_ecap = dmar_read8(unit, DMAR_ECAP_REG); if (bootverbose) dmar_print_caps(dev, unit, dmaru); dmar_quirks_post_ident(unit); timeout = dmar_get_timeout(); TUNABLE_UINT64_FETCH("hw.dmar.timeout", &timeout); dmar_update_timeout(timeout); for (i = 0; i < DMAR_INTR_TOTAL; i++) unit->intrs[i].irq = -1; unit->intrs[DMAR_INTR_FAULT].name = "fault"; unit->intrs[DMAR_INTR_FAULT].irq_rid = DMAR_FAULT_IRQ_RID; unit->intrs[DMAR_INTR_FAULT].handler = dmar_fault_intr; unit->intrs[DMAR_INTR_FAULT].msi_data_reg = DMAR_FEDATA_REG; unit->intrs[DMAR_INTR_FAULT].msi_addr_reg = DMAR_FEADDR_REG; unit->intrs[DMAR_INTR_FAULT].msi_uaddr_reg = DMAR_FEUADDR_REG; unit->intrs[DMAR_INTR_FAULT].enable_intr = dmar_enable_fault_intr; unit->intrs[DMAR_INTR_FAULT].disable_intr = dmar_disable_fault_intr; error = dmar_alloc_irq(dev, unit, DMAR_INTR_FAULT); if (error != 0) { dmar_release_resources(dev, unit); return (error); } if (DMAR_HAS_QI(unit)) { unit->intrs[DMAR_INTR_QI].name = "qi"; unit->intrs[DMAR_INTR_QI].irq_rid = DMAR_QI_IRQ_RID; unit->intrs[DMAR_INTR_QI].handler = dmar_qi_intr; unit->intrs[DMAR_INTR_QI].msi_data_reg = DMAR_IEDATA_REG; unit->intrs[DMAR_INTR_QI].msi_addr_reg = DMAR_IEADDR_REG; unit->intrs[DMAR_INTR_QI].msi_uaddr_reg = DMAR_IEUADDR_REG; unit->intrs[DMAR_INTR_QI].enable_intr = dmar_enable_qi_intr; unit->intrs[DMAR_INTR_QI].disable_intr = dmar_disable_qi_intr; error = dmar_alloc_irq(dev, unit, DMAR_INTR_QI); if (error != 0) { dmar_release_resources(dev, unit); return (error); } } mtx_init(&unit->lock, "dmarhw", NULL, MTX_DEF); unit->domids = new_unrhdr(0, dmar_nd2mask(DMAR_CAP_ND(unit->hw_cap)), &unit->lock); LIST_INIT(&unit->domains); /* * 9.2 "Context Entry": * When Caching Mode (CM) field is reported as Set, the * domain-id value of zero is architecturally reserved. * Software must not use domain-id value of zero * when CM is Set. */ if ((unit->hw_cap & DMAR_CAP_CM) != 0) alloc_unr_specific(unit->domids, 0); unit->ctx_obj = vm_pager_allocate(OBJT_PHYS, NULL, IDX_TO_OFF(1 + DMAR_CTX_CNT), 0, 0, NULL); /* * Allocate and load the root entry table pointer. Enable the * address translation after the required invalidations are * done. */ dmar_pgalloc(unit->ctx_obj, 0, DMAR_PGF_WAITOK | DMAR_PGF_ZERO); DMAR_LOCK(unit); error = dmar_load_root_entry_ptr(unit); if (error != 0) { DMAR_UNLOCK(unit); dmar_release_resources(dev, unit); return (error); } error = dmar_inv_ctx_glob(unit); if (error != 0) { DMAR_UNLOCK(unit); dmar_release_resources(dev, unit); return (error); } if ((unit->hw_ecap & DMAR_ECAP_DI) != 0) { error = dmar_inv_iotlb_glob(unit); if (error != 0) { DMAR_UNLOCK(unit); dmar_release_resources(dev, unit); return (error); } } DMAR_UNLOCK(unit); error = dmar_init_fault_log(unit); if (error != 0) { dmar_release_resources(dev, unit); return (error); } error = dmar_init_qi(unit); if (error != 0) { dmar_release_resources(dev, unit); return (error); } error = dmar_init_irt(unit); if (error != 0) { dmar_release_resources(dev, unit); return (error); } error = dmar_init_busdma(unit); if (error != 0) { dmar_release_resources(dev, unit); return (error); } #ifdef NOTYET DMAR_LOCK(unit); error = dmar_enable_translation(unit); if (error != 0) { DMAR_UNLOCK(unit); dmar_release_resources(dev, unit); return (error); } DMAR_UNLOCK(unit); #endif return (0); } static int dmar_detach(device_t dev) { return (EBUSY); } static int dmar_suspend(device_t dev) { return (0); } static int dmar_resume(device_t dev) { /* XXXKIB */ return (0); } static device_method_t dmar_methods[] = { DEVMETHOD(device_identify, dmar_identify), DEVMETHOD(device_probe, dmar_probe), DEVMETHOD(device_attach, dmar_attach), DEVMETHOD(device_detach, dmar_detach), DEVMETHOD(device_suspend, dmar_suspend), DEVMETHOD(device_resume, dmar_resume), #ifdef DEV_APIC DEVMETHOD(bus_remap_intr, dmar_remap_intr), #endif DEVMETHOD_END }; static driver_t dmar_driver = { "dmar", dmar_methods, sizeof(struct dmar_unit), }; DRIVER_MODULE(dmar, acpi, dmar_driver, dmar_devclass, 0, 0); MODULE_DEPEND(dmar, acpi, 1, 1, 1); static void dmar_print_path(device_t dev, const char *banner, int busno, int depth, const ACPI_DMAR_PCI_PATH *path) { int i; device_printf(dev, "%s [%d, ", banner, busno); for (i = 0; i < depth; i++) { if (i != 0) printf(", "); printf("(%d, %d)", path[i].Device, path[i].Function); } printf("]\n"); } static int dmar_dev_depth(device_t child) { devclass_t pci_class; device_t bus, pcib; int depth; pci_class = devclass_find("pci"); for (depth = 1; ; depth++) { bus = device_get_parent(child); pcib = device_get_parent(bus); if (device_get_devclass(device_get_parent(pcib)) != pci_class) return (depth); child = pcib; } } static void dmar_dev_path(device_t child, int *busno, ACPI_DMAR_PCI_PATH *path, int depth) { devclass_t pci_class; device_t bus, pcib; pci_class = devclass_find("pci"); for (depth--; depth != -1; depth--) { path[depth].Device = pci_get_slot(child); path[depth].Function = pci_get_function(child); bus = device_get_parent(child); pcib = device_get_parent(bus); if (device_get_devclass(device_get_parent(pcib)) != pci_class) { /* reached a host bridge */ *busno = pcib_get_bus(bus); return; } child = pcib; } panic("wrong depth"); } static int dmar_match_pathes(int busno1, const ACPI_DMAR_PCI_PATH *path1, int depth1, int busno2, const ACPI_DMAR_PCI_PATH *path2, int depth2, enum AcpiDmarScopeType scope_type) { int i, depth; if (busno1 != busno2) return (0); if (scope_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT && depth1 != depth2) return (0); depth = depth1; if (depth2 < depth) depth = depth2; for (i = 0; i < depth; i++) { if (path1[i].Device != path2[i].Device || path1[i].Function != path2[i].Function) return (0); } return (1); } static int dmar_match_devscope(ACPI_DMAR_DEVICE_SCOPE *devscope, device_t dev, int dev_busno, const ACPI_DMAR_PCI_PATH *dev_path, int dev_path_len) { ACPI_DMAR_PCI_PATH *path; int path_len; if (devscope->Length < sizeof(*devscope)) { printf("dmar_find: corrupted DMAR table, dl %d\n", devscope->Length); return (-1); } if (devscope->EntryType != ACPI_DMAR_SCOPE_TYPE_ENDPOINT && devscope->EntryType != ACPI_DMAR_SCOPE_TYPE_BRIDGE) return (0); path_len = devscope->Length - sizeof(*devscope); if (path_len % 2 != 0) { printf("dmar_find_bsf: corrupted DMAR table, dl %d\n", devscope->Length); return (-1); } path_len /= 2; path = (ACPI_DMAR_PCI_PATH *)(devscope + 1); if (path_len == 0) { printf("dmar_find: corrupted DMAR table, dl %d\n", devscope->Length); return (-1); } if (dmar_match_verbose) dmar_print_path(dev, "DMAR", devscope->Bus, path_len, path); return (dmar_match_pathes(devscope->Bus, path, path_len, dev_busno, dev_path, dev_path_len, devscope->EntryType)); } struct dmar_unit * dmar_find(device_t dev) { device_t dmar_dev; ACPI_DMAR_HARDWARE_UNIT *dmarh; ACPI_DMAR_DEVICE_SCOPE *devscope; char *ptr, *ptrend; int i, match, dev_domain, dev_busno, dev_path_len; dmar_dev = NULL; dev_domain = pci_get_domain(dev); dev_path_len = dmar_dev_depth(dev); ACPI_DMAR_PCI_PATH dev_path[dev_path_len]; dmar_dev_path(dev, &dev_busno, dev_path, dev_path_len); if (dmar_match_verbose) dmar_print_path(dev, "PCI", dev_busno, dev_path_len, dev_path); for (i = 0; i < dmar_devcnt; i++) { if (dmar_devs[i] == NULL) continue; dmarh = dmar_find_by_index(i); if (dmarh == NULL) continue; if (dmarh->Segment != dev_domain) continue; if ((dmarh->Flags & ACPI_DMAR_INCLUDE_ALL) != 0) { dmar_dev = dmar_devs[i]; if (dmar_match_verbose) { device_printf(dev, "pci%d:%d:%d:%d matched dmar%d INCLUDE_ALL\n", dev_domain, pci_get_bus(dev), pci_get_slot(dev), pci_get_function(dev), ((struct dmar_unit *)device_get_softc( dmar_dev))->unit); } goto found; } ptr = (char *)dmarh + sizeof(*dmarh); ptrend = (char *)dmarh + dmarh->Header.Length; for (;;) { if (ptr >= ptrend) break; devscope = (ACPI_DMAR_DEVICE_SCOPE *)ptr; ptr += devscope->Length; if (dmar_match_verbose) { device_printf(dev, "pci%d:%d:%d:%d matching dmar%d\n", dev_domain, pci_get_bus(dev), pci_get_slot(dev), pci_get_function(dev), ((struct dmar_unit *)device_get_softc( dmar_devs[i]))->unit); } match = dmar_match_devscope(devscope, dev, dev_busno, dev_path, dev_path_len); if (dmar_match_verbose) { if (match == -1) printf("table error\n"); else if (match == 0) printf("not matched\n"); else printf("matched\n"); } if (match == -1) return (NULL); else if (match == 1) { dmar_dev = dmar_devs[i]; goto found; } } } return (NULL); found: return (device_get_softc(dmar_dev)); } static struct dmar_unit * dmar_find_nonpci(u_int id, u_int entry_type, uint16_t *rid) { device_t dmar_dev; struct dmar_unit *unit; ACPI_DMAR_HARDWARE_UNIT *dmarh; ACPI_DMAR_DEVICE_SCOPE *devscope; ACPI_DMAR_PCI_PATH *path; char *ptr, *ptrend; #ifdef DEV_APIC int error; #endif int i; for (i = 0; i < dmar_devcnt; i++) { dmar_dev = dmar_devs[i]; if (dmar_dev == NULL) continue; unit = (struct dmar_unit *)device_get_softc(dmar_dev); dmarh = dmar_find_by_index(i); if (dmarh == NULL) continue; ptr = (char *)dmarh + sizeof(*dmarh); ptrend = (char *)dmarh + dmarh->Header.Length; for (;;) { if (ptr >= ptrend) break; devscope = (ACPI_DMAR_DEVICE_SCOPE *)ptr; ptr += devscope->Length; if (devscope->EntryType != entry_type) continue; if (devscope->EnumerationId != id) continue; #ifdef DEV_APIC if (entry_type == ACPI_DMAR_SCOPE_TYPE_IOAPIC) { error = ioapic_get_rid(id, rid); /* * If our IOAPIC has PCI bindings then * use the PCI device rid. */ if (error == 0) return (unit); } #endif if (devscope->Length - sizeof(ACPI_DMAR_DEVICE_SCOPE) == 2) { if (rid != NULL) { path = (ACPI_DMAR_PCI_PATH *) (devscope + 1); *rid = PCI_RID(devscope->Bus, path->Device, path->Function); } return (unit); } printf( "dmar_find_nonpci: id %d type %d path length != 2\n", id, entry_type); break; } } return (NULL); } struct dmar_unit * dmar_find_hpet(device_t dev, uint16_t *rid) { return (dmar_find_nonpci(hpet_get_uid(dev), ACPI_DMAR_SCOPE_TYPE_HPET, rid)); } struct dmar_unit * dmar_find_ioapic(u_int apic_id, uint16_t *rid) { return (dmar_find_nonpci(apic_id, ACPI_DMAR_SCOPE_TYPE_IOAPIC, rid)); } struct rmrr_iter_args { struct dmar_domain *domain; device_t dev; int dev_domain; int dev_busno; ACPI_DMAR_PCI_PATH *dev_path; int dev_path_len; struct dmar_map_entries_tailq *rmrr_entries; }; static int dmar_rmrr_iter(ACPI_DMAR_HEADER *dmarh, void *arg) { struct rmrr_iter_args *ria; ACPI_DMAR_RESERVED_MEMORY *resmem; ACPI_DMAR_DEVICE_SCOPE *devscope; struct dmar_map_entry *entry; char *ptr, *ptrend; int match; if (dmarh->Type != ACPI_DMAR_TYPE_RESERVED_MEMORY) return (1); ria = arg; resmem = (ACPI_DMAR_RESERVED_MEMORY *)dmarh; if (dmar_match_verbose) { printf("RMRR [%jx,%jx] segment %d\n", (uintmax_t)resmem->BaseAddress, (uintmax_t)resmem->EndAddress, resmem->Segment); } if (resmem->Segment != ria->dev_domain) return (1); ptr = (char *)resmem + sizeof(*resmem); ptrend = (char *)resmem + resmem->Header.Length; for (;;) { if (ptr >= ptrend) break; devscope = (ACPI_DMAR_DEVICE_SCOPE *)ptr; ptr += devscope->Length; match = dmar_match_devscope(devscope, ria->dev, ria->dev_busno, ria->dev_path, ria->dev_path_len); if (match == 1) { if (dmar_match_verbose) printf("matched\n"); entry = dmar_gas_alloc_entry(ria->domain, DMAR_PGF_WAITOK); entry->start = resmem->BaseAddress; /* The RMRR entry end address is inclusive. */ entry->end = resmem->EndAddress; TAILQ_INSERT_TAIL(ria->rmrr_entries, entry, unroll_link); } else if (dmar_match_verbose) { printf("not matched, err %d\n", match); } } return (1); } void dmar_dev_parse_rmrr(struct dmar_domain *domain, device_t dev, struct dmar_map_entries_tailq *rmrr_entries) { struct rmrr_iter_args ria; ria.dev_domain = pci_get_domain(dev); ria.dev_path_len = dmar_dev_depth(dev); ACPI_DMAR_PCI_PATH dev_path[ria.dev_path_len]; dmar_dev_path(dev, &ria.dev_busno, dev_path, ria.dev_path_len); if (dmar_match_verbose) { device_printf(dev, "parsing RMRR entries for "); dmar_print_path(dev, "PCI", ria.dev_busno, ria.dev_path_len, dev_path); } ria.domain = domain; ria.dev = dev; ria.dev_path = dev_path; ria.rmrr_entries = rmrr_entries; dmar_iterate_tbl(dmar_rmrr_iter, &ria); } struct inst_rmrr_iter_args { struct dmar_unit *dmar; }; static device_t dmar_path_dev(int segment, int path_len, int busno, const ACPI_DMAR_PCI_PATH *path) { devclass_t pci_class; device_t bus, pcib, dev; int i; pci_class = devclass_find("pci"); dev = NULL; for (i = 0; i < path_len; i++, path++) { dev = pci_find_dbsf(segment, busno, path->Device, path->Function); if (dev == NULL) break; if (i != path_len - 1) { bus = device_get_parent(dev); pcib = device_get_parent(bus); if (device_get_devclass(device_get_parent(pcib)) != pci_class) return (NULL); } busno = pcib_get_bus(dev); } return (dev); } static int dmar_inst_rmrr_iter(ACPI_DMAR_HEADER *dmarh, void *arg) { const ACPI_DMAR_RESERVED_MEMORY *resmem; const ACPI_DMAR_DEVICE_SCOPE *devscope; struct inst_rmrr_iter_args *iria; const char *ptr, *ptrend; struct dmar_unit *dev_dmar; device_t dev; if (dmarh->Type != ACPI_DMAR_TYPE_RESERVED_MEMORY) return (1); iria = arg; resmem = (ACPI_DMAR_RESERVED_MEMORY *)dmarh; if (resmem->Segment != iria->dmar->segment) return (1); if (dmar_match_verbose) { printf("dmar%d: RMRR [%jx,%jx]\n", iria->dmar->unit, (uintmax_t)resmem->BaseAddress, (uintmax_t)resmem->EndAddress); } ptr = (const char *)resmem + sizeof(*resmem); ptrend = (const char *)resmem + resmem->Header.Length; for (;;) { if (ptr >= ptrend) break; devscope = (const ACPI_DMAR_DEVICE_SCOPE *)ptr; ptr += devscope->Length; /* XXXKIB bridge */ if (devscope->EntryType != ACPI_DMAR_SCOPE_TYPE_ENDPOINT) continue; if (dmar_match_verbose) { dmar_print_path(iria->dmar->dev, "RMRR scope", devscope->Bus, (devscope->Length - sizeof(ACPI_DMAR_DEVICE_SCOPE)) / 2, (const ACPI_DMAR_PCI_PATH *)(devscope + 1)); } dev = dmar_path_dev(resmem->Segment, (devscope->Length - sizeof(ACPI_DMAR_DEVICE_SCOPE)) / 2, devscope->Bus, (const ACPI_DMAR_PCI_PATH *)(devscope + 1)); if (dev == NULL) { if (dmar_match_verbose) printf("null dev\n"); continue; } dev_dmar = dmar_find(dev); if (dev_dmar != iria->dmar) { if (dmar_match_verbose) { printf("dmar%d matched, skipping\n", dev_dmar->unit); } continue; } if (dmar_match_verbose) printf("matched, instantiating RMRR context\n"); dmar_instantiate_ctx(iria->dmar, dev, true); } return (1); } /* * Pre-create all contexts for the DMAR which have RMRR entries. */ int dmar_instantiate_rmrr_ctxs(struct dmar_unit *dmar) { struct inst_rmrr_iter_args iria; int error; if (!dmar_barrier_enter(dmar, DMAR_BARRIER_RMRR)) return (0); error = 0; iria.dmar = dmar; if (dmar_match_verbose) printf("dmar%d: instantiating RMRR contexts\n", dmar->unit); dmar_iterate_tbl(dmar_inst_rmrr_iter, &iria); DMAR_LOCK(dmar); if (!LIST_EMPTY(&dmar->domains)) { KASSERT((dmar->hw_gcmd & DMAR_GCMD_TE) == 0, ("dmar%d: RMRR not handled but translation is already enabled", dmar->unit)); error = dmar_enable_translation(dmar); } dmar_barrier_exit(dmar, DMAR_BARRIER_RMRR); return (error); } #ifdef DDB #include #include static void dmar_print_domain_entry(const struct dmar_map_entry *entry) { struct dmar_map_entry *l, *r; db_printf( " start %jx end %jx free_after %jx free_down %jx flags %x ", entry->start, entry->end, entry->free_after, entry->free_down, entry->flags); db_printf("left "); l = RB_LEFT(entry, rb_entry); if (l == NULL) db_printf("NULL "); else db_printf("%jx ", l->start); db_printf("right "); r = RB_RIGHT(entry, rb_entry); if (r == NULL) db_printf("NULL"); else db_printf("%jx", r->start); db_printf("\n"); } static void dmar_print_ctx(struct dmar_ctx *ctx) { db_printf( " @%p pci%d:%d:%d refs %d flags %x loads %lu unloads %lu\n", ctx, pci_get_bus(ctx->ctx_tag.owner), pci_get_slot(ctx->ctx_tag.owner), pci_get_function(ctx->ctx_tag.owner), ctx->refs, ctx->flags, ctx->loads, ctx->unloads); } static void dmar_print_domain(struct dmar_domain *domain, bool show_mappings) { struct dmar_map_entry *entry; struct dmar_ctx *ctx; db_printf( " @%p dom %d mgaw %d agaw %d pglvl %d end %jx refs %d\n" " ctx_cnt %d flags %x pgobj %p map_ents %u\n", domain, domain->domain, domain->mgaw, domain->agaw, domain->pglvl, (uintmax_t)domain->end, domain->refs, domain->ctx_cnt, domain->flags, domain->pgtbl_obj, domain->entries_cnt); if (!LIST_EMPTY(&domain->contexts)) { db_printf(" Contexts:\n"); LIST_FOREACH(ctx, &domain->contexts, link) dmar_print_ctx(ctx); } if (!show_mappings) return; db_printf(" mapped:\n"); RB_FOREACH(entry, dmar_gas_entries_tree, &domain->rb_root) { dmar_print_domain_entry(entry); if (db_pager_quit) break; } if (db_pager_quit) return; db_printf(" unloading:\n"); TAILQ_FOREACH(entry, &domain->unload_entries, dmamap_link) { dmar_print_domain_entry(entry); if (db_pager_quit) break; } } DB_FUNC(dmar_domain, db_dmar_print_domain, db_show_table, CS_OWN, NULL) { struct dmar_unit *unit; struct dmar_domain *domain; struct dmar_ctx *ctx; bool show_mappings, valid; int pci_domain, bus, device, function, i, t; db_expr_t radix; valid = false; radix = db_radix; db_radix = 10; t = db_read_token(); if (t == tSLASH) { t = db_read_token(); if (t != tIDENT) { db_printf("Bad modifier\n"); db_radix = radix; db_skip_to_eol(); return; } show_mappings = strchr(db_tok_string, 'm') != NULL; t = db_read_token(); } else { show_mappings = false; } if (t == tNUMBER) { pci_domain = db_tok_number; t = db_read_token(); if (t == tNUMBER) { bus = db_tok_number; t = db_read_token(); if (t == tNUMBER) { device = db_tok_number; t = db_read_token(); if (t == tNUMBER) { function = db_tok_number; valid = true; } } } } db_radix = radix; db_skip_to_eol(); if (!valid) { db_printf("usage: show dmar_domain [/m] " " \n"); return; } for (i = 0; i < dmar_devcnt; i++) { unit = device_get_softc(dmar_devs[i]); LIST_FOREACH(domain, &unit->domains, link) { LIST_FOREACH(ctx, &domain->contexts, link) { if (pci_domain == unit->segment && bus == pci_get_bus(ctx->ctx_tag.owner) && device == pci_get_slot(ctx->ctx_tag.owner) && function == pci_get_function(ctx->ctx_tag.owner)) { dmar_print_domain(domain, show_mappings); goto out; } } } } out:; } static void dmar_print_one(int idx, bool show_domains, bool show_mappings) { struct dmar_unit *unit; struct dmar_domain *domain; int i, frir; unit = device_get_softc(dmar_devs[idx]); db_printf("dmar%d at %p, root at 0x%jx, ver 0x%x\n", unit->unit, unit, dmar_read8(unit, DMAR_RTADDR_REG), dmar_read4(unit, DMAR_VER_REG)); db_printf("cap 0x%jx ecap 0x%jx gsts 0x%x fsts 0x%x fectl 0x%x\n", (uintmax_t)dmar_read8(unit, DMAR_CAP_REG), (uintmax_t)dmar_read8(unit, DMAR_ECAP_REG), dmar_read4(unit, DMAR_GSTS_REG), dmar_read4(unit, DMAR_FSTS_REG), dmar_read4(unit, DMAR_FECTL_REG)); if (unit->ir_enabled) { db_printf("ir is enabled; IRT @%p phys 0x%jx maxcnt %d\n", unit->irt, (uintmax_t)unit->irt_phys, unit->irte_cnt); } db_printf("fed 0x%x fea 0x%x feua 0x%x\n", dmar_read4(unit, DMAR_FEDATA_REG), dmar_read4(unit, DMAR_FEADDR_REG), dmar_read4(unit, DMAR_FEUADDR_REG)); db_printf("primary fault log:\n"); for (i = 0; i < DMAR_CAP_NFR(unit->hw_cap); i++) { frir = (DMAR_CAP_FRO(unit->hw_cap) + i) * 16; db_printf(" %d at 0x%x: %jx %jx\n", i, frir, (uintmax_t)dmar_read8(unit, frir), (uintmax_t)dmar_read8(unit, frir + 8)); } if (DMAR_HAS_QI(unit)) { db_printf("ied 0x%x iea 0x%x ieua 0x%x\n", dmar_read4(unit, DMAR_IEDATA_REG), dmar_read4(unit, DMAR_IEADDR_REG), dmar_read4(unit, DMAR_IEUADDR_REG)); if (unit->qi_enabled) { db_printf("qi is enabled: queue @0x%jx (IQA 0x%jx) " "size 0x%jx\n" " head 0x%x tail 0x%x avail 0x%x status 0x%x ctrl 0x%x\n" " hw compl 0x%x@%p/phys@%jx next seq 0x%x gen 0x%x\n", (uintmax_t)unit->inv_queue, (uintmax_t)dmar_read8(unit, DMAR_IQA_REG), (uintmax_t)unit->inv_queue_size, dmar_read4(unit, DMAR_IQH_REG), dmar_read4(unit, DMAR_IQT_REG), unit->inv_queue_avail, dmar_read4(unit, DMAR_ICS_REG), dmar_read4(unit, DMAR_IECTL_REG), unit->inv_waitd_seq_hw, &unit->inv_waitd_seq_hw, (uintmax_t)unit->inv_waitd_seq_hw_phys, unit->inv_waitd_seq, unit->inv_waitd_gen); } else { db_printf("qi is disabled\n"); } } if (show_domains) { db_printf("domains:\n"); LIST_FOREACH(domain, &unit->domains, link) { dmar_print_domain(domain, show_mappings); if (db_pager_quit) break; } } } DB_SHOW_COMMAND(dmar, db_dmar_print) { bool show_domains, show_mappings; show_domains = strchr(modif, 'd') != NULL; show_mappings = strchr(modif, 'm') != NULL; if (!have_addr) { db_printf("usage: show dmar [/d] [/m] index\n"); return; } dmar_print_one((int)addr, show_domains, show_mappings); } DB_SHOW_ALL_COMMAND(dmars, db_show_all_dmars) { int i; bool show_domains, show_mappings; show_domains = strchr(modif, 'd') != NULL; show_mappings = strchr(modif, 'm') != NULL; for (i = 0; i < dmar_devcnt; i++) { dmar_print_one(i, show_domains, show_mappings); if (db_pager_quit) break; } } #endif Index: head/sys/x86/iommu/intel_fault.c =================================================================== --- head/sys/x86/iommu/intel_fault.c (revision 326262) +++ head/sys/x86/iommu/intel_fault.c (revision 326263) @@ -1,328 +1,330 @@ /*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * * Copyright (c) 2013 The FreeBSD Foundation * All rights reserved. * * This software was developed by Konstantin Belousov * under sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_acpi.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * Fault interrupt handling for DMARs. If advanced fault logging is * not implemented by hardware, the code emulates it. Fast interrupt * handler flushes the fault registers into circular buffer at * unit->fault_log, and schedules a task. * * The fast handler is used since faults usually come in bursts, and * number of fault log registers is limited, e.g. down to one for 5400 * MCH. We are trying to reduce the latency for clearing the fault * register file. The task is usually long-running, since printf() is * slow, but this is not problematic because bursts are rare. * * For the same reason, each translation unit task is executed in its * own thread. * * XXXKIB It seems there is no hardware available which implements * advanced fault logging, so the code to handle AFL is not written. */ static int dmar_fault_next(struct dmar_unit *unit, int faultp) { faultp += 2; if (faultp == unit->fault_log_size) faultp = 0; return (faultp); } static void dmar_fault_intr_clear(struct dmar_unit *unit, uint32_t fsts) { uint32_t clear; clear = 0; if ((fsts & DMAR_FSTS_ITE) != 0) { printf("DMAR%d: Invalidation timed out\n", unit->unit); clear |= DMAR_FSTS_ITE; } if ((fsts & DMAR_FSTS_ICE) != 0) { printf("DMAR%d: Invalidation completion error\n", unit->unit); clear |= DMAR_FSTS_ICE; } if ((fsts & DMAR_FSTS_IQE) != 0) { printf("DMAR%d: Invalidation queue error\n", unit->unit); clear |= DMAR_FSTS_IQE; } if ((fsts & DMAR_FSTS_APF) != 0) { printf("DMAR%d: Advanced pending fault\n", unit->unit); clear |= DMAR_FSTS_APF; } if ((fsts & DMAR_FSTS_AFO) != 0) { printf("DMAR%d: Advanced fault overflow\n", unit->unit); clear |= DMAR_FSTS_AFO; } if (clear != 0) dmar_write4(unit, DMAR_FSTS_REG, clear); } int dmar_fault_intr(void *arg) { struct dmar_unit *unit; uint64_t fault_rec[2]; uint32_t fsts; int fri, frir, faultp; bool enqueue; unit = arg; enqueue = false; fsts = dmar_read4(unit, DMAR_FSTS_REG); dmar_fault_intr_clear(unit, fsts); if ((fsts & DMAR_FSTS_PPF) == 0) goto done; fri = DMAR_FSTS_FRI(fsts); for (;;) { frir = (DMAR_CAP_FRO(unit->hw_cap) + fri) * 16; fault_rec[1] = dmar_read8(unit, frir + 8); if ((fault_rec[1] & DMAR_FRCD2_F) == 0) break; fault_rec[0] = dmar_read8(unit, frir); dmar_write4(unit, frir + 12, DMAR_FRCD2_F32); DMAR_FAULT_LOCK(unit); faultp = unit->fault_log_head; if (dmar_fault_next(unit, faultp) == unit->fault_log_tail) { /* XXXKIB log overflow */ } else { unit->fault_log[faultp] = fault_rec[0]; unit->fault_log[faultp + 1] = fault_rec[1]; unit->fault_log_head = dmar_fault_next(unit, faultp); enqueue = true; } DMAR_FAULT_UNLOCK(unit); fri += 1; if (fri >= DMAR_CAP_NFR(unit->hw_cap)) fri = 0; } done: /* * On SandyBridge, due to errata BJ124, IvyBridge errata * BV100, and Haswell errata HSD40, "Spurious Intel VT-d * Interrupts May Occur When the PFO Bit is Set". Handle the * cases by clearing overflow bit even if no fault is * reported. * * On IvyBridge, errata BV30 states that clearing clear * DMAR_FRCD2_F bit in the fault register causes spurious * interrupt. Do nothing. * */ if ((fsts & DMAR_FSTS_PFO) != 0) { printf("DMAR%d: Fault Overflow\n", unit->unit); dmar_write4(unit, DMAR_FSTS_REG, DMAR_FSTS_PFO); } if (enqueue) { taskqueue_enqueue(unit->fault_taskqueue, &unit->fault_task); } return (FILTER_HANDLED); } static void dmar_fault_task(void *arg, int pending __unused) { struct dmar_unit *unit; struct dmar_ctx *ctx; uint64_t fault_rec[2]; int sid, bus, slot, func, faultp; unit = arg; DMAR_FAULT_LOCK(unit); for (;;) { faultp = unit->fault_log_tail; if (faultp == unit->fault_log_head) break; fault_rec[0] = unit->fault_log[faultp]; fault_rec[1] = unit->fault_log[faultp + 1]; unit->fault_log_tail = dmar_fault_next(unit, faultp); DMAR_FAULT_UNLOCK(unit); sid = DMAR_FRCD2_SID(fault_rec[1]); printf("DMAR%d: ", unit->unit); DMAR_LOCK(unit); ctx = dmar_find_ctx_locked(unit, sid); if (ctx == NULL) { printf(":"); /* * Note that the slot and function will not be correct * if ARI is in use, but without a ctx entry we have * no way of knowing whether ARI is in use or not. */ bus = PCI_RID2BUS(sid); slot = PCI_RID2SLOT(sid); func = PCI_RID2FUNC(sid); } else { ctx->flags |= DMAR_CTX_FAULTED; ctx->last_fault_rec[0] = fault_rec[0]; ctx->last_fault_rec[1] = fault_rec[1]; device_print_prettyname(ctx->ctx_tag.owner); bus = pci_get_bus(ctx->ctx_tag.owner); slot = pci_get_slot(ctx->ctx_tag.owner); func = pci_get_function(ctx->ctx_tag.owner); } DMAR_UNLOCK(unit); printf( "pci%d:%d:%d sid %x fault acc %x adt 0x%x reason 0x%x " "addr %jx\n", bus, slot, func, sid, DMAR_FRCD2_T(fault_rec[1]), DMAR_FRCD2_AT(fault_rec[1]), DMAR_FRCD2_FR(fault_rec[1]), (uintmax_t)fault_rec[0]); DMAR_FAULT_LOCK(unit); } DMAR_FAULT_UNLOCK(unit); } static void dmar_clear_faults(struct dmar_unit *unit) { uint32_t frec, frir, fsts; int i; for (i = 0; i < DMAR_CAP_NFR(unit->hw_cap); i++) { frir = (DMAR_CAP_FRO(unit->hw_cap) + i) * 16; frec = dmar_read4(unit, frir + 12); if ((frec & DMAR_FRCD2_F32) == 0) continue; dmar_write4(unit, frir + 12, DMAR_FRCD2_F32); } fsts = dmar_read4(unit, DMAR_FSTS_REG); dmar_write4(unit, DMAR_FSTS_REG, fsts); } int dmar_init_fault_log(struct dmar_unit *unit) { mtx_init(&unit->fault_lock, "dmarflt", NULL, MTX_SPIN); unit->fault_log_size = 256; /* 128 fault log entries */ TUNABLE_INT_FETCH("hw.dmar.fault_log_size", &unit->fault_log_size); if (unit->fault_log_size % 2 != 0) panic("hw.dmar_fault_log_size must be even"); unit->fault_log = malloc(sizeof(uint64_t) * unit->fault_log_size, M_DEVBUF, M_WAITOK | M_ZERO); TASK_INIT(&unit->fault_task, 0, dmar_fault_task, unit); unit->fault_taskqueue = taskqueue_create_fast("dmarff", M_WAITOK, taskqueue_thread_enqueue, &unit->fault_taskqueue); taskqueue_start_threads(&unit->fault_taskqueue, 1, PI_AV, "dmar%d fault taskq", unit->unit); DMAR_LOCK(unit); dmar_disable_fault_intr(unit); dmar_clear_faults(unit); dmar_enable_fault_intr(unit); DMAR_UNLOCK(unit); return (0); } void dmar_fini_fault_log(struct dmar_unit *unit) { DMAR_LOCK(unit); dmar_disable_fault_intr(unit); DMAR_UNLOCK(unit); if (unit->fault_taskqueue == NULL) return; taskqueue_drain(unit->fault_taskqueue, &unit->fault_task); taskqueue_free(unit->fault_taskqueue); unit->fault_taskqueue = NULL; mtx_destroy(&unit->fault_lock); free(unit->fault_log, M_DEVBUF); unit->fault_log = NULL; unit->fault_log_head = unit->fault_log_tail = 0; } void dmar_enable_fault_intr(struct dmar_unit *unit) { uint32_t fectl; DMAR_ASSERT_LOCKED(unit); fectl = dmar_read4(unit, DMAR_FECTL_REG); fectl &= ~DMAR_FECTL_IM; dmar_write4(unit, DMAR_FECTL_REG, fectl); } void dmar_disable_fault_intr(struct dmar_unit *unit) { uint32_t fectl; DMAR_ASSERT_LOCKED(unit); fectl = dmar_read4(unit, DMAR_FECTL_REG); dmar_write4(unit, DMAR_FECTL_REG, fectl | DMAR_FECTL_IM); } Index: head/sys/x86/iommu/intel_gas.c =================================================================== --- head/sys/x86/iommu/intel_gas.c (revision 326262) +++ head/sys/x86/iommu/intel_gas.c (revision 326263) @@ -1,739 +1,741 @@ /*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * * Copyright (c) 2013 The FreeBSD Foundation * All rights reserved. * * This software was developed by Konstantin Belousov * under sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #define RB_AUGMENT(entry) dmar_gas_augment_entry(entry) #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * Guest Address Space management. */ static uma_zone_t dmar_map_entry_zone; static void intel_gas_init(void) { dmar_map_entry_zone = uma_zcreate("DMAR_MAP_ENTRY", sizeof(struct dmar_map_entry), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); } SYSINIT(intel_gas, SI_SUB_DRIVERS, SI_ORDER_FIRST, intel_gas_init, NULL); struct dmar_map_entry * dmar_gas_alloc_entry(struct dmar_domain *domain, u_int flags) { struct dmar_map_entry *res; KASSERT((flags & ~(DMAR_PGF_WAITOK)) == 0, ("unsupported flags %x", flags)); res = uma_zalloc(dmar_map_entry_zone, ((flags & DMAR_PGF_WAITOK) != 0 ? M_WAITOK : M_NOWAIT) | M_ZERO); if (res != NULL) { res->domain = domain; atomic_add_int(&domain->entries_cnt, 1); } return (res); } void dmar_gas_free_entry(struct dmar_domain *domain, struct dmar_map_entry *entry) { KASSERT(domain == entry->domain, ("mismatched free domain %p entry %p entry->domain %p", domain, entry, entry->domain)); atomic_subtract_int(&domain->entries_cnt, 1); uma_zfree(dmar_map_entry_zone, entry); } static int dmar_gas_cmp_entries(struct dmar_map_entry *a, struct dmar_map_entry *b) { /* Last entry have zero size, so <= */ KASSERT(a->start <= a->end, ("inverted entry %p (%jx, %jx)", a, (uintmax_t)a->start, (uintmax_t)a->end)); KASSERT(b->start <= b->end, ("inverted entry %p (%jx, %jx)", b, (uintmax_t)b->start, (uintmax_t)b->end)); KASSERT(a->end <= b->start || b->end <= a->start || a->end == a->start || b->end == b->start, ("overlapping entries %p (%jx, %jx) %p (%jx, %jx)", a, (uintmax_t)a->start, (uintmax_t)a->end, b, (uintmax_t)b->start, (uintmax_t)b->end)); if (a->end < b->end) return (-1); else if (b->end < a->end) return (1); return (0); } static void dmar_gas_augment_entry(struct dmar_map_entry *entry) { struct dmar_map_entry *l, *r; for (; entry != NULL; entry = RB_PARENT(entry, rb_entry)) { l = RB_LEFT(entry, rb_entry); r = RB_RIGHT(entry, rb_entry); if (l == NULL && r == NULL) { entry->free_down = entry->free_after; } else if (l == NULL && r != NULL) { entry->free_down = MAX(entry->free_after, r->free_down); } else if (/*l != NULL && */ r == NULL) { entry->free_down = MAX(entry->free_after, l->free_down); } else /* if (l != NULL && r != NULL) */ { entry->free_down = MAX(entry->free_after, l->free_down); entry->free_down = MAX(entry->free_down, r->free_down); } } } RB_GENERATE(dmar_gas_entries_tree, dmar_map_entry, rb_entry, dmar_gas_cmp_entries); static void dmar_gas_fix_free(struct dmar_domain *domain, struct dmar_map_entry *entry) { struct dmar_map_entry *next; next = RB_NEXT(dmar_gas_entries_tree, &domain->rb_root, entry); entry->free_after = (next != NULL ? next->start : domain->end) - entry->end; dmar_gas_augment_entry(entry); } #ifdef INVARIANTS static void dmar_gas_check_free(struct dmar_domain *domain) { struct dmar_map_entry *entry, *next, *l, *r; dmar_gaddr_t v; RB_FOREACH(entry, dmar_gas_entries_tree, &domain->rb_root) { KASSERT(domain == entry->domain, ("mismatched free domain %p entry %p entry->domain %p", domain, entry, entry->domain)); next = RB_NEXT(dmar_gas_entries_tree, &domain->rb_root, entry); if (next == NULL) { MPASS(entry->free_after == domain->end - entry->end); } else { MPASS(entry->free_after = next->start - entry->end); MPASS(entry->end <= next->start); } l = RB_LEFT(entry, rb_entry); r = RB_RIGHT(entry, rb_entry); if (l == NULL && r == NULL) { MPASS(entry->free_down == entry->free_after); } else if (l == NULL && r != NULL) { MPASS(entry->free_down = MAX(entry->free_after, r->free_down)); } else if (r == NULL) { MPASS(entry->free_down = MAX(entry->free_after, l->free_down)); } else { v = MAX(entry->free_after, l->free_down); v = MAX(v, r->free_down); MPASS(entry->free_down == v); } } } #endif static bool dmar_gas_rb_insert(struct dmar_domain *domain, struct dmar_map_entry *entry) { struct dmar_map_entry *prev, *found; found = RB_INSERT(dmar_gas_entries_tree, &domain->rb_root, entry); dmar_gas_fix_free(domain, entry); prev = RB_PREV(dmar_gas_entries_tree, &domain->rb_root, entry); if (prev != NULL) dmar_gas_fix_free(domain, prev); return (found == NULL); } static void dmar_gas_rb_remove(struct dmar_domain *domain, struct dmar_map_entry *entry) { struct dmar_map_entry *prev; prev = RB_PREV(dmar_gas_entries_tree, &domain->rb_root, entry); RB_REMOVE(dmar_gas_entries_tree, &domain->rb_root, entry); if (prev != NULL) dmar_gas_fix_free(domain, prev); } void dmar_gas_init_domain(struct dmar_domain *domain) { struct dmar_map_entry *begin, *end; begin = dmar_gas_alloc_entry(domain, DMAR_PGF_WAITOK); end = dmar_gas_alloc_entry(domain, DMAR_PGF_WAITOK); DMAR_DOMAIN_LOCK(domain); KASSERT(domain->entries_cnt == 2, ("dirty domain %p", domain)); KASSERT(RB_EMPTY(&domain->rb_root), ("non-empty entries %p", domain)); begin->start = 0; begin->end = DMAR_PAGE_SIZE; begin->free_after = domain->end - begin->end; begin->flags = DMAR_MAP_ENTRY_PLACE | DMAR_MAP_ENTRY_UNMAPPED; dmar_gas_rb_insert(domain, begin); end->start = domain->end; end->end = domain->end; end->free_after = 0; end->flags = DMAR_MAP_ENTRY_PLACE | DMAR_MAP_ENTRY_UNMAPPED; dmar_gas_rb_insert(domain, end); domain->first_place = begin; domain->last_place = end; domain->flags |= DMAR_DOMAIN_GAS_INITED; DMAR_DOMAIN_UNLOCK(domain); } void dmar_gas_fini_domain(struct dmar_domain *domain) { struct dmar_map_entry *entry, *entry1; DMAR_DOMAIN_ASSERT_LOCKED(domain); KASSERT(domain->entries_cnt == 2, ("domain still in use %p", domain)); entry = RB_MIN(dmar_gas_entries_tree, &domain->rb_root); KASSERT(entry->start == 0, ("start entry start %p", domain)); KASSERT(entry->end == DMAR_PAGE_SIZE, ("start entry end %p", domain)); KASSERT(entry->flags == DMAR_MAP_ENTRY_PLACE, ("start entry flags %p", domain)); RB_REMOVE(dmar_gas_entries_tree, &domain->rb_root, entry); dmar_gas_free_entry(domain, entry); entry = RB_MAX(dmar_gas_entries_tree, &domain->rb_root); KASSERT(entry->start == domain->end, ("end entry start %p", domain)); KASSERT(entry->end == domain->end, ("end entry end %p", domain)); KASSERT(entry->free_after == 0, ("end entry free_after %p", domain)); KASSERT(entry->flags == DMAR_MAP_ENTRY_PLACE, ("end entry flags %p", domain)); RB_REMOVE(dmar_gas_entries_tree, &domain->rb_root, entry); dmar_gas_free_entry(domain, entry); RB_FOREACH_SAFE(entry, dmar_gas_entries_tree, &domain->rb_root, entry1) { KASSERT((entry->flags & DMAR_MAP_ENTRY_RMRR) != 0, ("non-RMRR entry left %p", domain)); RB_REMOVE(dmar_gas_entries_tree, &domain->rb_root, entry); dmar_gas_free_entry(domain, entry); } } struct dmar_gas_match_args { struct dmar_domain *domain; dmar_gaddr_t size; int offset; const struct bus_dma_tag_common *common; u_int gas_flags; struct dmar_map_entry *entry; }; static bool dmar_gas_match_one(struct dmar_gas_match_args *a, struct dmar_map_entry *prev, dmar_gaddr_t end) { dmar_gaddr_t bs, start; if (a->entry->start + a->size > end) return (false); /* DMAR_PAGE_SIZE to create gap after new entry. */ if (a->entry->start < prev->end + DMAR_PAGE_SIZE || a->entry->start + a->size + a->offset + DMAR_PAGE_SIZE > prev->end + prev->free_after) return (false); /* No boundary crossing. */ if (dmar_test_boundary(a->entry->start + a->offset, a->size, a->common->boundary)) return (true); /* * The start + offset to start + offset + size region crosses * the boundary. Check if there is enough space after the * next boundary after the prev->end. */ bs = rounddown2(a->entry->start + a->offset + a->common->boundary, a->common->boundary); start = roundup2(bs, a->common->alignment); /* DMAR_PAGE_SIZE to create gap after new entry. */ if (start + a->offset + a->size + DMAR_PAGE_SIZE <= prev->end + prev->free_after && start + a->offset + a->size <= end && dmar_test_boundary(start + a->offset, a->size, a->common->boundary)) { a->entry->start = start; return (true); } /* * Not enough space to align at the requested boundary, or * boundary is smaller than the size, but allowed to split. * We already checked that start + size does not overlap end. * * XXXKIB. It is possible that bs is exactly at the start of * the next entry, then we do not have gap. Ignore for now. */ if ((a->gas_flags & DMAR_GM_CANSPLIT) != 0) { a->size = bs - a->entry->start; return (true); } return (false); } static void dmar_gas_match_insert(struct dmar_gas_match_args *a, struct dmar_map_entry *prev) { struct dmar_map_entry *next; bool found; /* * The prev->end is always aligned on the page size, which * causes page alignment for the entry->start too. The size * is checked to be multiple of the page size. * * The page sized gap is created between consequent * allocations to ensure that out-of-bounds accesses fault. */ a->entry->end = a->entry->start + a->size; next = RB_NEXT(dmar_gas_entries_tree, &a->domain->rb_root, prev); KASSERT(next->start >= a->entry->end && next->start - a->entry->start >= a->size && prev->end <= a->entry->end, ("dmar_gas_match_insert hole failed %p prev (%jx, %jx) " "free_after %jx next (%jx, %jx) entry (%jx, %jx)", a->domain, (uintmax_t)prev->start, (uintmax_t)prev->end, (uintmax_t)prev->free_after, (uintmax_t)next->start, (uintmax_t)next->end, (uintmax_t)a->entry->start, (uintmax_t)a->entry->end)); prev->free_after = a->entry->start - prev->end; a->entry->free_after = next->start - a->entry->end; found = dmar_gas_rb_insert(a->domain, a->entry); KASSERT(found, ("found dup %p start %jx size %jx", a->domain, (uintmax_t)a->entry->start, (uintmax_t)a->size)); a->entry->flags = DMAR_MAP_ENTRY_MAP; KASSERT(RB_PREV(dmar_gas_entries_tree, &a->domain->rb_root, a->entry) == prev, ("entry %p prev %p inserted prev %p", a->entry, prev, RB_PREV(dmar_gas_entries_tree, &a->domain->rb_root, a->entry))); KASSERT(RB_NEXT(dmar_gas_entries_tree, &a->domain->rb_root, a->entry) == next, ("entry %p next %p inserted next %p", a->entry, next, RB_NEXT(dmar_gas_entries_tree, &a->domain->rb_root, a->entry))); } static int dmar_gas_lowermatch(struct dmar_gas_match_args *a, struct dmar_map_entry *prev) { struct dmar_map_entry *l; int ret; if (prev->end < a->common->lowaddr) { a->entry->start = roundup2(prev->end + DMAR_PAGE_SIZE, a->common->alignment); if (dmar_gas_match_one(a, prev, a->common->lowaddr)) { dmar_gas_match_insert(a, prev); return (0); } } if (prev->free_down < a->size + a->offset + DMAR_PAGE_SIZE) return (ENOMEM); l = RB_LEFT(prev, rb_entry); if (l != NULL) { ret = dmar_gas_lowermatch(a, l); if (ret == 0) return (0); } l = RB_RIGHT(prev, rb_entry); if (l != NULL) return (dmar_gas_lowermatch(a, l)); return (ENOMEM); } static int dmar_gas_uppermatch(struct dmar_gas_match_args *a) { struct dmar_map_entry *next, *prev, find_entry; find_entry.start = a->common->highaddr; next = RB_NFIND(dmar_gas_entries_tree, &a->domain->rb_root, &find_entry); if (next == NULL) return (ENOMEM); prev = RB_PREV(dmar_gas_entries_tree, &a->domain->rb_root, next); KASSERT(prev != NULL, ("no prev %p %jx", a->domain, (uintmax_t)find_entry.start)); for (;;) { a->entry->start = prev->start + DMAR_PAGE_SIZE; if (a->entry->start < a->common->highaddr) a->entry->start = a->common->highaddr; a->entry->start = roundup2(a->entry->start, a->common->alignment); if (dmar_gas_match_one(a, prev, a->domain->end)) { dmar_gas_match_insert(a, prev); return (0); } /* * XXXKIB. This falls back to linear iteration over * the free space in the high region. But high * regions are almost unused, the code should be * enough to cover the case, although in the * non-optimal way. */ prev = next; next = RB_NEXT(dmar_gas_entries_tree, &a->domain->rb_root, prev); KASSERT(next != NULL, ("no next %p %jx", a->domain, (uintmax_t)find_entry.start)); if (next->end >= a->domain->end) return (ENOMEM); } } static int dmar_gas_find_space(struct dmar_domain *domain, const struct bus_dma_tag_common *common, dmar_gaddr_t size, int offset, u_int flags, struct dmar_map_entry *entry) { struct dmar_gas_match_args a; int error; DMAR_DOMAIN_ASSERT_LOCKED(domain); KASSERT(entry->flags == 0, ("dirty entry %p %p", domain, entry)); KASSERT((size & DMAR_PAGE_MASK) == 0, ("size %jx", (uintmax_t)size)); a.domain = domain; a.size = size; a.offset = offset; a.common = common; a.gas_flags = flags; a.entry = entry; /* Handle lower region. */ if (common->lowaddr > 0) { error = dmar_gas_lowermatch(&a, RB_ROOT(&domain->rb_root)); if (error == 0) return (0); KASSERT(error == ENOMEM, ("error %d from dmar_gas_lowermatch", error)); } /* Handle upper region. */ if (common->highaddr >= domain->end) return (ENOMEM); error = dmar_gas_uppermatch(&a); KASSERT(error == ENOMEM, ("error %d from dmar_gas_uppermatch", error)); return (error); } static int dmar_gas_alloc_region(struct dmar_domain *domain, struct dmar_map_entry *entry, u_int flags) { struct dmar_map_entry *next, *prev; bool found; DMAR_DOMAIN_ASSERT_LOCKED(domain); if ((entry->start & DMAR_PAGE_MASK) != 0 || (entry->end & DMAR_PAGE_MASK) != 0) return (EINVAL); if (entry->start >= entry->end) return (EINVAL); if (entry->end >= domain->end) return (EINVAL); next = RB_NFIND(dmar_gas_entries_tree, &domain->rb_root, entry); KASSERT(next != NULL, ("next must be non-null %p %jx", domain, (uintmax_t)entry->start)); prev = RB_PREV(dmar_gas_entries_tree, &domain->rb_root, next); /* prev could be NULL */ /* * Adapt to broken BIOSes which specify overlapping RMRR * entries. * * XXXKIB: this does not handle a case when prev or next * entries are completely covered by the current one, which * extends both ways. */ if (prev != NULL && prev->end > entry->start && (prev->flags & DMAR_MAP_ENTRY_PLACE) == 0) { if ((prev->flags & DMAR_MAP_ENTRY_RMRR) == 0) return (EBUSY); entry->start = prev->end; } if (next != NULL && next->start < entry->end && (next->flags & DMAR_MAP_ENTRY_PLACE) == 0) { if ((next->flags & DMAR_MAP_ENTRY_RMRR) == 0) return (EBUSY); entry->end = next->start; } if (entry->end == entry->start) return (0); if (prev != NULL && prev->end > entry->start) { /* This assumes that prev is the placeholder entry. */ dmar_gas_rb_remove(domain, prev); prev = NULL; } if (next != NULL && next->start < entry->end) { dmar_gas_rb_remove(domain, next); next = NULL; } found = dmar_gas_rb_insert(domain, entry); KASSERT(found, ("found RMRR dup %p start %jx end %jx", domain, (uintmax_t)entry->start, (uintmax_t)entry->end)); entry->flags = DMAR_MAP_ENTRY_RMRR; #ifdef INVARIANTS struct dmar_map_entry *ip, *in; ip = RB_PREV(dmar_gas_entries_tree, &domain->rb_root, entry); in = RB_NEXT(dmar_gas_entries_tree, &domain->rb_root, entry); KASSERT(prev == NULL || ip == prev, ("RMRR %p (%jx %jx) prev %p (%jx %jx) ins prev %p (%jx %jx)", entry, entry->start, entry->end, prev, prev == NULL ? 0 : prev->start, prev == NULL ? 0 : prev->end, ip, ip == NULL ? 0 : ip->start, ip == NULL ? 0 : ip->end)); KASSERT(next == NULL || in == next, ("RMRR %p (%jx %jx) next %p (%jx %jx) ins next %p (%jx %jx)", entry, entry->start, entry->end, next, next == NULL ? 0 : next->start, next == NULL ? 0 : next->end, in, in == NULL ? 0 : in->start, in == NULL ? 0 : in->end)); #endif return (0); } void dmar_gas_free_space(struct dmar_domain *domain, struct dmar_map_entry *entry) { DMAR_DOMAIN_ASSERT_LOCKED(domain); KASSERT((entry->flags & (DMAR_MAP_ENTRY_PLACE | DMAR_MAP_ENTRY_RMRR | DMAR_MAP_ENTRY_MAP)) == DMAR_MAP_ENTRY_MAP, ("permanent entry %p %p", domain, entry)); dmar_gas_rb_remove(domain, entry); entry->flags &= ~DMAR_MAP_ENTRY_MAP; #ifdef INVARIANTS if (dmar_check_free) dmar_gas_check_free(domain); #endif } void dmar_gas_free_region(struct dmar_domain *domain, struct dmar_map_entry *entry) { struct dmar_map_entry *next, *prev; DMAR_DOMAIN_ASSERT_LOCKED(domain); KASSERT((entry->flags & (DMAR_MAP_ENTRY_PLACE | DMAR_MAP_ENTRY_RMRR | DMAR_MAP_ENTRY_MAP)) == DMAR_MAP_ENTRY_RMRR, ("non-RMRR entry %p %p", domain, entry)); prev = RB_PREV(dmar_gas_entries_tree, &domain->rb_root, entry); next = RB_NEXT(dmar_gas_entries_tree, &domain->rb_root, entry); dmar_gas_rb_remove(domain, entry); entry->flags &= ~DMAR_MAP_ENTRY_RMRR; if (prev == NULL) dmar_gas_rb_insert(domain, domain->first_place); if (next == NULL) dmar_gas_rb_insert(domain, domain->last_place); } int dmar_gas_map(struct dmar_domain *domain, const struct bus_dma_tag_common *common, dmar_gaddr_t size, int offset, u_int eflags, u_int flags, vm_page_t *ma, struct dmar_map_entry **res) { struct dmar_map_entry *entry; int error; KASSERT((flags & ~(DMAR_GM_CANWAIT | DMAR_GM_CANSPLIT)) == 0, ("invalid flags 0x%x", flags)); entry = dmar_gas_alloc_entry(domain, (flags & DMAR_GM_CANWAIT) != 0 ? DMAR_PGF_WAITOK : 0); if (entry == NULL) return (ENOMEM); DMAR_DOMAIN_LOCK(domain); error = dmar_gas_find_space(domain, common, size, offset, flags, entry); if (error == ENOMEM) { DMAR_DOMAIN_UNLOCK(domain); dmar_gas_free_entry(domain, entry); return (error); } #ifdef INVARIANTS if (dmar_check_free) dmar_gas_check_free(domain); #endif KASSERT(error == 0, ("unexpected error %d from dmar_gas_find_entry", error)); KASSERT(entry->end < domain->end, ("allocated GPA %jx, max GPA %jx", (uintmax_t)entry->end, (uintmax_t)domain->end)); entry->flags |= eflags; DMAR_DOMAIN_UNLOCK(domain); error = domain_map_buf(domain, entry->start, entry->end - entry->start, ma, ((eflags & DMAR_MAP_ENTRY_READ) != 0 ? DMAR_PTE_R : 0) | ((eflags & DMAR_MAP_ENTRY_WRITE) != 0 ? DMAR_PTE_W : 0) | ((eflags & DMAR_MAP_ENTRY_SNOOP) != 0 ? DMAR_PTE_SNP : 0) | ((eflags & DMAR_MAP_ENTRY_TM) != 0 ? DMAR_PTE_TM : 0), (flags & DMAR_GM_CANWAIT) != 0 ? DMAR_PGF_WAITOK : 0); if (error == ENOMEM) { dmar_domain_unload_entry(entry, true); return (error); } KASSERT(error == 0, ("unexpected error %d from domain_map_buf", error)); *res = entry; return (0); } int dmar_gas_map_region(struct dmar_domain *domain, struct dmar_map_entry *entry, u_int eflags, u_int flags, vm_page_t *ma) { dmar_gaddr_t start; int error; KASSERT(entry->flags == 0, ("used RMRR entry %p %p %x", domain, entry, entry->flags)); KASSERT((flags & ~(DMAR_GM_CANWAIT)) == 0, ("invalid flags 0x%x", flags)); start = entry->start; DMAR_DOMAIN_LOCK(domain); error = dmar_gas_alloc_region(domain, entry, flags); if (error != 0) { DMAR_DOMAIN_UNLOCK(domain); return (error); } entry->flags |= eflags; DMAR_DOMAIN_UNLOCK(domain); if (entry->end == entry->start) return (0); error = domain_map_buf(domain, entry->start, entry->end - entry->start, ma + OFF_TO_IDX(start - entry->start), ((eflags & DMAR_MAP_ENTRY_READ) != 0 ? DMAR_PTE_R : 0) | ((eflags & DMAR_MAP_ENTRY_WRITE) != 0 ? DMAR_PTE_W : 0) | ((eflags & DMAR_MAP_ENTRY_SNOOP) != 0 ? DMAR_PTE_SNP : 0) | ((eflags & DMAR_MAP_ENTRY_TM) != 0 ? DMAR_PTE_TM : 0), (flags & DMAR_GM_CANWAIT) != 0 ? DMAR_PGF_WAITOK : 0); if (error == ENOMEM) { dmar_domain_unload_entry(entry, false); return (error); } KASSERT(error == 0, ("unexpected error %d from domain_map_buf", error)); return (0); } int dmar_gas_reserve_region(struct dmar_domain *domain, dmar_gaddr_t start, dmar_gaddr_t end) { struct dmar_map_entry *entry; int error; entry = dmar_gas_alloc_entry(domain, DMAR_PGF_WAITOK); entry->start = start; entry->end = end; DMAR_DOMAIN_LOCK(domain); error = dmar_gas_alloc_region(domain, entry, DMAR_GM_CANWAIT); if (error == 0) entry->flags |= DMAR_MAP_ENTRY_UNMAPPED; DMAR_DOMAIN_UNLOCK(domain); if (error != 0) dmar_gas_free_entry(domain, entry); return (error); } Index: head/sys/x86/iommu/intel_idpgtbl.c =================================================================== --- head/sys/x86/iommu/intel_idpgtbl.c (revision 326262) +++ head/sys/x86/iommu/intel_idpgtbl.c (revision 326263) @@ -1,800 +1,802 @@ /*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * * Copyright (c) 2013 The FreeBSD Foundation * All rights reserved. * * This software was developed by Konstantin Belousov * under sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static int domain_unmap_buf_locked(struct dmar_domain *domain, dmar_gaddr_t base, dmar_gaddr_t size, int flags); /* * The cache of the identity mapping page tables for the DMARs. Using * the cache saves significant amount of memory for page tables by * reusing the page tables, since usually DMARs are identical and have * the same capabilities. Still, cache records the information needed * to match DMAR capabilities and page table format, to correctly * handle different DMARs. */ struct idpgtbl { dmar_gaddr_t maxaddr; /* Page table covers the guest address range [0..maxaddr) */ int pglvl; /* Total page table levels ignoring superpages */ int leaf; /* The last materialized page table level, it is non-zero if superpages are supported */ vm_object_t pgtbl_obj; /* The page table pages */ LIST_ENTRY(idpgtbl) link; }; static struct sx idpgtbl_lock; SX_SYSINIT(idpgtbl, &idpgtbl_lock, "idpgtbl"); static LIST_HEAD(, idpgtbl) idpgtbls = LIST_HEAD_INITIALIZER(idpgtbls); static MALLOC_DEFINE(M_DMAR_IDPGTBL, "dmar_idpgtbl", "Intel DMAR Identity mappings cache elements"); /* * Build the next level of the page tables for the identity mapping. * - lvl is the level to build; * - idx is the index of the page table page in the pgtbl_obj, which is * being allocated filled now; * - addr is the starting address in the bus address space which is * mapped by the page table page. */ static void domain_idmap_nextlvl(struct idpgtbl *tbl, int lvl, vm_pindex_t idx, dmar_gaddr_t addr) { vm_page_t m1; dmar_pte_t *pte; struct sf_buf *sf; dmar_gaddr_t f, pg_sz; vm_pindex_t base; int i; VM_OBJECT_ASSERT_LOCKED(tbl->pgtbl_obj); if (addr >= tbl->maxaddr) return; (void)dmar_pgalloc(tbl->pgtbl_obj, idx, DMAR_PGF_OBJL | DMAR_PGF_WAITOK | DMAR_PGF_ZERO); base = idx * DMAR_NPTEPG + 1; /* Index of the first child page of idx */ pg_sz = pglvl_page_size(tbl->pglvl, lvl); if (lvl != tbl->leaf) { for (i = 0, f = addr; i < DMAR_NPTEPG; i++, f += pg_sz) domain_idmap_nextlvl(tbl, lvl + 1, base + i, f); } VM_OBJECT_WUNLOCK(tbl->pgtbl_obj); pte = dmar_map_pgtbl(tbl->pgtbl_obj, idx, DMAR_PGF_WAITOK, &sf); if (lvl == tbl->leaf) { for (i = 0, f = addr; i < DMAR_NPTEPG; i++, f += pg_sz) { if (f >= tbl->maxaddr) break; pte[i].pte = (DMAR_PTE_ADDR_MASK & f) | DMAR_PTE_R | DMAR_PTE_W; } } else { for (i = 0, f = addr; i < DMAR_NPTEPG; i++, f += pg_sz) { if (f >= tbl->maxaddr) break; m1 = dmar_pgalloc(tbl->pgtbl_obj, base + i, DMAR_PGF_NOALLOC); KASSERT(m1 != NULL, ("lost page table page")); pte[i].pte = (DMAR_PTE_ADDR_MASK & VM_PAGE_TO_PHYS(m1)) | DMAR_PTE_R | DMAR_PTE_W; } } /* domain_get_idmap_pgtbl flushes CPU cache if needed. */ dmar_unmap_pgtbl(sf); VM_OBJECT_WLOCK(tbl->pgtbl_obj); } /* * Find a ready and compatible identity-mapping page table in the * cache. If not found, populate the identity-mapping page table for * the context, up to the maxaddr. The maxaddr byte is allowed to be * not mapped, which is aligned with the definition of Maxmem as the * highest usable physical address + 1. If superpages are used, the * maxaddr is typically mapped. */ vm_object_t domain_get_idmap_pgtbl(struct dmar_domain *domain, dmar_gaddr_t maxaddr) { struct dmar_unit *unit; struct idpgtbl *tbl; vm_object_t res; vm_page_t m; int leaf, i; leaf = 0; /* silence gcc */ /* * First, determine where to stop the paging structures. */ for (i = 0; i < domain->pglvl; i++) { if (i == domain->pglvl - 1 || domain_is_sp_lvl(domain, i)) { leaf = i; break; } } /* * Search the cache for a compatible page table. Qualified * page table must map up to maxaddr, its level must be * supported by the DMAR and leaf should be equal to the * calculated value. The later restriction could be lifted * but I believe it is currently impossible to have any * deviations for existing hardware. */ sx_slock(&idpgtbl_lock); LIST_FOREACH(tbl, &idpgtbls, link) { if (tbl->maxaddr >= maxaddr && dmar_pglvl_supported(domain->dmar, tbl->pglvl) && tbl->leaf == leaf) { res = tbl->pgtbl_obj; vm_object_reference(res); sx_sunlock(&idpgtbl_lock); domain->pglvl = tbl->pglvl; /* XXXKIB ? */ goto end; } } /* * Not found in cache, relock the cache into exclusive mode to * be able to add element, and recheck cache again after the * relock. */ sx_sunlock(&idpgtbl_lock); sx_xlock(&idpgtbl_lock); LIST_FOREACH(tbl, &idpgtbls, link) { if (tbl->maxaddr >= maxaddr && dmar_pglvl_supported(domain->dmar, tbl->pglvl) && tbl->leaf == leaf) { res = tbl->pgtbl_obj; vm_object_reference(res); sx_xunlock(&idpgtbl_lock); domain->pglvl = tbl->pglvl; /* XXXKIB ? */ return (res); } } /* * Still not found, create new page table. */ tbl = malloc(sizeof(*tbl), M_DMAR_IDPGTBL, M_WAITOK); tbl->pglvl = domain->pglvl; tbl->leaf = leaf; tbl->maxaddr = maxaddr; tbl->pgtbl_obj = vm_pager_allocate(OBJT_PHYS, NULL, IDX_TO_OFF(pglvl_max_pages(tbl->pglvl)), 0, 0, NULL); VM_OBJECT_WLOCK(tbl->pgtbl_obj); domain_idmap_nextlvl(tbl, 0, 0, 0); VM_OBJECT_WUNLOCK(tbl->pgtbl_obj); LIST_INSERT_HEAD(&idpgtbls, tbl, link); res = tbl->pgtbl_obj; vm_object_reference(res); sx_xunlock(&idpgtbl_lock); end: /* * Table was found or created. * * If DMAR does not snoop paging structures accesses, flush * CPU cache to memory. Note that dmar_unmap_pgtbl() coherent * argument was possibly invalid at the time of the identity * page table creation, since DMAR which was passed at the * time of creation could be coherent, while current DMAR is * not. * * If DMAR cannot look into the chipset write buffer, flush it * as well. */ unit = domain->dmar; if (!DMAR_IS_COHERENT(unit)) { VM_OBJECT_WLOCK(res); for (m = vm_page_lookup(res, 0); m != NULL; m = vm_page_next(m)) pmap_invalidate_cache_pages(&m, 1); VM_OBJECT_WUNLOCK(res); } if ((unit->hw_cap & DMAR_CAP_RWBF) != 0) { DMAR_LOCK(unit); dmar_flush_write_bufs(unit); DMAR_UNLOCK(unit); } return (res); } /* * Return a reference to the identity mapping page table to the cache. */ void put_idmap_pgtbl(vm_object_t obj) { struct idpgtbl *tbl, *tbl1; vm_object_t rmobj; sx_slock(&idpgtbl_lock); KASSERT(obj->ref_count >= 2, ("lost cache reference")); vm_object_deallocate(obj); /* * Cache always owns one last reference on the page table object. * If there is an additional reference, object must stay. */ if (obj->ref_count > 1) { sx_sunlock(&idpgtbl_lock); return; } /* * Cache reference is the last, remove cache element and free * page table object, returning the page table pages to the * system. */ sx_sunlock(&idpgtbl_lock); sx_xlock(&idpgtbl_lock); LIST_FOREACH_SAFE(tbl, &idpgtbls, link, tbl1) { rmobj = tbl->pgtbl_obj; if (rmobj->ref_count == 1) { LIST_REMOVE(tbl, link); atomic_subtract_int(&dmar_tbl_pagecnt, rmobj->resident_page_count); vm_object_deallocate(rmobj); free(tbl, M_DMAR_IDPGTBL); } } sx_xunlock(&idpgtbl_lock); } /* * The core routines to map and unmap host pages at the given guest * address. Support superpages. */ /* * Index of the pte for the guest address base in the page table at * the level lvl. */ static int domain_pgtbl_pte_off(struct dmar_domain *domain, dmar_gaddr_t base, int lvl) { base >>= DMAR_PAGE_SHIFT + (domain->pglvl - lvl - 1) * DMAR_NPTEPGSHIFT; return (base & DMAR_PTEMASK); } /* * Returns the page index of the page table page in the page table * object, which maps the given address base at the page table level * lvl. */ static vm_pindex_t domain_pgtbl_get_pindex(struct dmar_domain *domain, dmar_gaddr_t base, int lvl) { vm_pindex_t idx, pidx; int i; KASSERT(lvl >= 0 && lvl < domain->pglvl, ("wrong lvl %p %d", domain, lvl)); for (pidx = idx = 0, i = 0; i < lvl; i++, pidx = idx) { idx = domain_pgtbl_pte_off(domain, base, i) + pidx * DMAR_NPTEPG + 1; } return (idx); } static dmar_pte_t * domain_pgtbl_map_pte(struct dmar_domain *domain, dmar_gaddr_t base, int lvl, int flags, vm_pindex_t *idxp, struct sf_buf **sf) { vm_page_t m; struct sf_buf *sfp; dmar_pte_t *pte, *ptep; vm_pindex_t idx, idx1; DMAR_DOMAIN_ASSERT_PGLOCKED(domain); KASSERT((flags & DMAR_PGF_OBJL) != 0, ("lost PGF_OBJL")); idx = domain_pgtbl_get_pindex(domain, base, lvl); if (*sf != NULL && idx == *idxp) { pte = (dmar_pte_t *)sf_buf_kva(*sf); } else { if (*sf != NULL) dmar_unmap_pgtbl(*sf); *idxp = idx; retry: pte = dmar_map_pgtbl(domain->pgtbl_obj, idx, flags, sf); if (pte == NULL) { KASSERT(lvl > 0, ("lost root page table page %p", domain)); /* * Page table page does not exist, allocate * it and create a pte in the preceeding page level * to reference the allocated page table page. */ m = dmar_pgalloc(domain->pgtbl_obj, idx, flags | DMAR_PGF_ZERO); if (m == NULL) return (NULL); /* * Prevent potential free while pgtbl_obj is * unlocked in the recursive call to * domain_pgtbl_map_pte(), if other thread did * pte write and clean while the lock is * dropped. */ m->wire_count++; sfp = NULL; ptep = domain_pgtbl_map_pte(domain, base, lvl - 1, flags, &idx1, &sfp); if (ptep == NULL) { KASSERT(m->pindex != 0, ("loosing root page %p", domain)); m->wire_count--; dmar_pgfree(domain->pgtbl_obj, m->pindex, flags); return (NULL); } dmar_pte_store(&ptep->pte, DMAR_PTE_R | DMAR_PTE_W | VM_PAGE_TO_PHYS(m)); dmar_flush_pte_to_ram(domain->dmar, ptep); sf_buf_page(sfp)->wire_count += 1; m->wire_count--; dmar_unmap_pgtbl(sfp); /* Only executed once. */ goto retry; } } pte += domain_pgtbl_pte_off(domain, base, lvl); return (pte); } static int domain_map_buf_locked(struct dmar_domain *domain, dmar_gaddr_t base, dmar_gaddr_t size, vm_page_t *ma, uint64_t pflags, int flags) { dmar_pte_t *pte; struct sf_buf *sf; dmar_gaddr_t pg_sz, base1, size1; vm_pindex_t pi, c, idx, run_sz; int lvl; bool superpage; DMAR_DOMAIN_ASSERT_PGLOCKED(domain); base1 = base; size1 = size; flags |= DMAR_PGF_OBJL; TD_PREP_PINNED_ASSERT; for (sf = NULL, pi = 0; size > 0; base += pg_sz, size -= pg_sz, pi += run_sz) { for (lvl = 0, c = 0, superpage = false;; lvl++) { pg_sz = domain_page_size(domain, lvl); run_sz = pg_sz >> DMAR_PAGE_SHIFT; if (lvl == domain->pglvl - 1) break; /* * Check if the current base suitable for the * superpage mapping. First, verify the level. */ if (!domain_is_sp_lvl(domain, lvl)) continue; /* * Next, look at the size of the mapping and * alignment of both guest and host addresses. */ if (size < pg_sz || (base & (pg_sz - 1)) != 0 || (VM_PAGE_TO_PHYS(ma[pi]) & (pg_sz - 1)) != 0) continue; /* All passed, check host pages contiguouty. */ if (c == 0) { for (c = 1; c < run_sz; c++) { if (VM_PAGE_TO_PHYS(ma[pi + c]) != VM_PAGE_TO_PHYS(ma[pi + c - 1]) + PAGE_SIZE) break; } } if (c >= run_sz) { superpage = true; break; } } KASSERT(size >= pg_sz, ("mapping loop overflow %p %jx %jx %jx", domain, (uintmax_t)base, (uintmax_t)size, (uintmax_t)pg_sz)); KASSERT(pg_sz > 0, ("pg_sz 0 lvl %d", lvl)); pte = domain_pgtbl_map_pte(domain, base, lvl, flags, &idx, &sf); if (pte == NULL) { KASSERT((flags & DMAR_PGF_WAITOK) == 0, ("failed waitable pte alloc %p", domain)); if (sf != NULL) dmar_unmap_pgtbl(sf); domain_unmap_buf_locked(domain, base1, base - base1, flags); TD_PINNED_ASSERT; return (ENOMEM); } dmar_pte_store(&pte->pte, VM_PAGE_TO_PHYS(ma[pi]) | pflags | (superpage ? DMAR_PTE_SP : 0)); dmar_flush_pte_to_ram(domain->dmar, pte); sf_buf_page(sf)->wire_count += 1; } if (sf != NULL) dmar_unmap_pgtbl(sf); TD_PINNED_ASSERT; return (0); } int domain_map_buf(struct dmar_domain *domain, dmar_gaddr_t base, dmar_gaddr_t size, vm_page_t *ma, uint64_t pflags, int flags) { struct dmar_unit *unit; int error; unit = domain->dmar; KASSERT((domain->flags & DMAR_DOMAIN_IDMAP) == 0, ("modifying idmap pagetable domain %p", domain)); KASSERT((base & DMAR_PAGE_MASK) == 0, ("non-aligned base %p %jx %jx", domain, (uintmax_t)base, (uintmax_t)size)); KASSERT((size & DMAR_PAGE_MASK) == 0, ("non-aligned size %p %jx %jx", domain, (uintmax_t)base, (uintmax_t)size)); KASSERT(size > 0, ("zero size %p %jx %jx", domain, (uintmax_t)base, (uintmax_t)size)); KASSERT(base < (1ULL << domain->agaw), ("base too high %p %jx %jx agaw %d", domain, (uintmax_t)base, (uintmax_t)size, domain->agaw)); KASSERT(base + size < (1ULL << domain->agaw), ("end too high %p %jx %jx agaw %d", domain, (uintmax_t)base, (uintmax_t)size, domain->agaw)); KASSERT(base + size > base, ("size overflow %p %jx %jx", domain, (uintmax_t)base, (uintmax_t)size)); KASSERT((pflags & (DMAR_PTE_R | DMAR_PTE_W)) != 0, ("neither read nor write %jx", (uintmax_t)pflags)); KASSERT((pflags & ~(DMAR_PTE_R | DMAR_PTE_W | DMAR_PTE_SNP | DMAR_PTE_TM)) == 0, ("invalid pte flags %jx", (uintmax_t)pflags)); KASSERT((pflags & DMAR_PTE_SNP) == 0 || (unit->hw_ecap & DMAR_ECAP_SC) != 0, ("PTE_SNP for dmar without snoop control %p %jx", domain, (uintmax_t)pflags)); KASSERT((pflags & DMAR_PTE_TM) == 0 || (unit->hw_ecap & DMAR_ECAP_DI) != 0, ("PTE_TM for dmar without DIOTLB %p %jx", domain, (uintmax_t)pflags)); KASSERT((flags & ~DMAR_PGF_WAITOK) == 0, ("invalid flags %x", flags)); DMAR_DOMAIN_PGLOCK(domain); error = domain_map_buf_locked(domain, base, size, ma, pflags, flags); DMAR_DOMAIN_PGUNLOCK(domain); if (error != 0) return (error); if ((unit->hw_cap & DMAR_CAP_CM) != 0) domain_flush_iotlb_sync(domain, base, size); else if ((unit->hw_cap & DMAR_CAP_RWBF) != 0) { /* See 11.1 Write Buffer Flushing. */ DMAR_LOCK(unit); dmar_flush_write_bufs(unit); DMAR_UNLOCK(unit); } return (0); } static void domain_unmap_clear_pte(struct dmar_domain *domain, dmar_gaddr_t base, int lvl, int flags, dmar_pte_t *pte, struct sf_buf **sf, bool free_fs); static void domain_free_pgtbl_pde(struct dmar_domain *domain, dmar_gaddr_t base, int lvl, int flags) { struct sf_buf *sf; dmar_pte_t *pde; vm_pindex_t idx; sf = NULL; pde = domain_pgtbl_map_pte(domain, base, lvl, flags, &idx, &sf); domain_unmap_clear_pte(domain, base, lvl, flags, pde, &sf, true); } static void domain_unmap_clear_pte(struct dmar_domain *domain, dmar_gaddr_t base, int lvl, int flags, dmar_pte_t *pte, struct sf_buf **sf, bool free_sf) { vm_page_t m; dmar_pte_clear(&pte->pte); dmar_flush_pte_to_ram(domain->dmar, pte); m = sf_buf_page(*sf); if (free_sf) { dmar_unmap_pgtbl(*sf); *sf = NULL; } m->wire_count--; if (m->wire_count != 0) return; KASSERT(lvl != 0, ("lost reference (lvl) on root pg domain %p base %jx lvl %d", domain, (uintmax_t)base, lvl)); KASSERT(m->pindex != 0, ("lost reference (idx) on root pg domain %p base %jx lvl %d", domain, (uintmax_t)base, lvl)); dmar_pgfree(domain->pgtbl_obj, m->pindex, flags); domain_free_pgtbl_pde(domain, base, lvl - 1, flags); } /* * Assumes that the unmap is never partial. */ static int domain_unmap_buf_locked(struct dmar_domain *domain, dmar_gaddr_t base, dmar_gaddr_t size, int flags) { dmar_pte_t *pte; struct sf_buf *sf; vm_pindex_t idx; dmar_gaddr_t pg_sz; int lvl; DMAR_DOMAIN_ASSERT_PGLOCKED(domain); if (size == 0) return (0); KASSERT((domain->flags & DMAR_DOMAIN_IDMAP) == 0, ("modifying idmap pagetable domain %p", domain)); KASSERT((base & DMAR_PAGE_MASK) == 0, ("non-aligned base %p %jx %jx", domain, (uintmax_t)base, (uintmax_t)size)); KASSERT((size & DMAR_PAGE_MASK) == 0, ("non-aligned size %p %jx %jx", domain, (uintmax_t)base, (uintmax_t)size)); KASSERT(base < (1ULL << domain->agaw), ("base too high %p %jx %jx agaw %d", domain, (uintmax_t)base, (uintmax_t)size, domain->agaw)); KASSERT(base + size < (1ULL << domain->agaw), ("end too high %p %jx %jx agaw %d", domain, (uintmax_t)base, (uintmax_t)size, domain->agaw)); KASSERT(base + size > base, ("size overflow %p %jx %jx", domain, (uintmax_t)base, (uintmax_t)size)); KASSERT((flags & ~DMAR_PGF_WAITOK) == 0, ("invalid flags %x", flags)); pg_sz = 0; /* silence gcc */ flags |= DMAR_PGF_OBJL; TD_PREP_PINNED_ASSERT; for (sf = NULL; size > 0; base += pg_sz, size -= pg_sz) { for (lvl = 0; lvl < domain->pglvl; lvl++) { if (lvl != domain->pglvl - 1 && !domain_is_sp_lvl(domain, lvl)) continue; pg_sz = domain_page_size(domain, lvl); if (pg_sz > size) continue; pte = domain_pgtbl_map_pte(domain, base, lvl, flags, &idx, &sf); KASSERT(pte != NULL, ("sleeping or page missed %p %jx %d 0x%x", domain, (uintmax_t)base, lvl, flags)); if ((pte->pte & DMAR_PTE_SP) != 0 || lvl == domain->pglvl - 1) { domain_unmap_clear_pte(domain, base, lvl, flags, pte, &sf, false); break; } } KASSERT(size >= pg_sz, ("unmapping loop overflow %p %jx %jx %jx", domain, (uintmax_t)base, (uintmax_t)size, (uintmax_t)pg_sz)); } if (sf != NULL) dmar_unmap_pgtbl(sf); /* * See 11.1 Write Buffer Flushing for an explanation why RWBF * can be ignored there. */ TD_PINNED_ASSERT; return (0); } int domain_unmap_buf(struct dmar_domain *domain, dmar_gaddr_t base, dmar_gaddr_t size, int flags) { int error; DMAR_DOMAIN_PGLOCK(domain); error = domain_unmap_buf_locked(domain, base, size, flags); DMAR_DOMAIN_PGUNLOCK(domain); return (error); } int domain_alloc_pgtbl(struct dmar_domain *domain) { vm_page_t m; KASSERT(domain->pgtbl_obj == NULL, ("already initialized %p", domain)); domain->pgtbl_obj = vm_pager_allocate(OBJT_PHYS, NULL, IDX_TO_OFF(pglvl_max_pages(domain->pglvl)), 0, 0, NULL); DMAR_DOMAIN_PGLOCK(domain); m = dmar_pgalloc(domain->pgtbl_obj, 0, DMAR_PGF_WAITOK | DMAR_PGF_ZERO | DMAR_PGF_OBJL); /* No implicit free of the top level page table page. */ m->wire_count = 1; DMAR_DOMAIN_PGUNLOCK(domain); DMAR_LOCK(domain->dmar); domain->flags |= DMAR_DOMAIN_PGTBL_INITED; DMAR_UNLOCK(domain->dmar); return (0); } void domain_free_pgtbl(struct dmar_domain *domain) { vm_object_t obj; vm_page_t m; obj = domain->pgtbl_obj; if (obj == NULL) { KASSERT((domain->dmar->hw_ecap & DMAR_ECAP_PT) != 0 && (domain->flags & DMAR_DOMAIN_IDMAP) != 0, ("lost pagetable object domain %p", domain)); return; } DMAR_DOMAIN_ASSERT_PGLOCKED(domain); domain->pgtbl_obj = NULL; if ((domain->flags & DMAR_DOMAIN_IDMAP) != 0) { put_idmap_pgtbl(obj); domain->flags &= ~DMAR_DOMAIN_IDMAP; return; } /* Obliterate wire_counts */ VM_OBJECT_ASSERT_WLOCKED(obj); for (m = vm_page_lookup(obj, 0); m != NULL; m = vm_page_next(m)) m->wire_count = 0; VM_OBJECT_WUNLOCK(obj); vm_object_deallocate(obj); } static inline uint64_t domain_wait_iotlb_flush(struct dmar_unit *unit, uint64_t wt, int iro) { uint64_t iotlbr; dmar_write8(unit, iro + DMAR_IOTLB_REG_OFF, DMAR_IOTLB_IVT | DMAR_IOTLB_DR | DMAR_IOTLB_DW | wt); for (;;) { iotlbr = dmar_read8(unit, iro + DMAR_IOTLB_REG_OFF); if ((iotlbr & DMAR_IOTLB_IVT) == 0) break; cpu_spinwait(); } return (iotlbr); } void domain_flush_iotlb_sync(struct dmar_domain *domain, dmar_gaddr_t base, dmar_gaddr_t size) { struct dmar_unit *unit; dmar_gaddr_t isize; uint64_t iotlbr; int am, iro; unit = domain->dmar; KASSERT(!unit->qi_enabled, ("dmar%d: sync iotlb flush call", unit->unit)); iro = DMAR_ECAP_IRO(unit->hw_ecap) * 16; DMAR_LOCK(unit); if ((unit->hw_cap & DMAR_CAP_PSI) == 0 || size > 2 * 1024 * 1024) { iotlbr = domain_wait_iotlb_flush(unit, DMAR_IOTLB_IIRG_DOM | DMAR_IOTLB_DID(domain->domain), iro); KASSERT((iotlbr & DMAR_IOTLB_IAIG_MASK) != DMAR_IOTLB_IAIG_INVLD, ("dmar%d: invalidation failed %jx", unit->unit, (uintmax_t)iotlbr)); } else { for (; size > 0; base += isize, size -= isize) { am = calc_am(unit, base, size, &isize); dmar_write8(unit, iro, base | am); iotlbr = domain_wait_iotlb_flush(unit, DMAR_IOTLB_IIRG_PAGE | DMAR_IOTLB_DID(domain->domain), iro); KASSERT((iotlbr & DMAR_IOTLB_IAIG_MASK) != DMAR_IOTLB_IAIG_INVLD, ("dmar%d: PSI invalidation failed " "iotlbr 0x%jx base 0x%jx size 0x%jx am %d", unit->unit, (uintmax_t)iotlbr, (uintmax_t)base, (uintmax_t)size, am)); /* * Any non-page granularity covers whole guest * address space for the domain. */ if ((iotlbr & DMAR_IOTLB_IAIG_MASK) != DMAR_IOTLB_IAIG_PAGE) break; } } DMAR_UNLOCK(unit); } Index: head/sys/x86/iommu/intel_qi.c =================================================================== --- head/sys/x86/iommu/intel_qi.c (revision 326262) +++ head/sys/x86/iommu/intel_qi.c (revision 326263) @@ -1,472 +1,474 @@ /*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * * Copyright (c) 2013 The FreeBSD Foundation * All rights reserved. * * This software was developed by Konstantin Belousov * under sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_acpi.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static bool dmar_qi_seq_processed(const struct dmar_unit *unit, const struct dmar_qi_genseq *pseq) { return (pseq->gen < unit->inv_waitd_gen || (pseq->gen == unit->inv_waitd_gen && pseq->seq <= unit->inv_waitd_seq_hw)); } static int dmar_enable_qi(struct dmar_unit *unit) { int error; DMAR_ASSERT_LOCKED(unit); unit->hw_gcmd |= DMAR_GCMD_QIE; dmar_write4(unit, DMAR_GCMD_REG, unit->hw_gcmd); DMAR_WAIT_UNTIL(((dmar_read4(unit, DMAR_GSTS_REG) & DMAR_GSTS_QIES) != 0)); return (error); } static int dmar_disable_qi(struct dmar_unit *unit) { int error; DMAR_ASSERT_LOCKED(unit); unit->hw_gcmd &= ~DMAR_GCMD_QIE; dmar_write4(unit, DMAR_GCMD_REG, unit->hw_gcmd); DMAR_WAIT_UNTIL(((dmar_read4(unit, DMAR_GSTS_REG) & DMAR_GSTS_QIES) == 0)); return (error); } static void dmar_qi_advance_tail(struct dmar_unit *unit) { DMAR_ASSERT_LOCKED(unit); dmar_write4(unit, DMAR_IQT_REG, unit->inv_queue_tail); } static void dmar_qi_ensure(struct dmar_unit *unit, int descr_count) { uint32_t head; int bytes; DMAR_ASSERT_LOCKED(unit); bytes = descr_count << DMAR_IQ_DESCR_SZ_SHIFT; for (;;) { if (bytes <= unit->inv_queue_avail) break; /* refill */ head = dmar_read4(unit, DMAR_IQH_REG); head &= DMAR_IQH_MASK; unit->inv_queue_avail = head - unit->inv_queue_tail - DMAR_IQ_DESCR_SZ; if (head <= unit->inv_queue_tail) unit->inv_queue_avail += unit->inv_queue_size; if (bytes <= unit->inv_queue_avail) break; /* * No space in the queue, do busy wait. Hardware must * make a progress. But first advance the tail to * inform the descriptor streamer about entries we * might have already filled, otherwise they could * clog the whole queue.. */ dmar_qi_advance_tail(unit); unit->inv_queue_full++; cpu_spinwait(); } unit->inv_queue_avail -= bytes; } static void dmar_qi_emit(struct dmar_unit *unit, uint64_t data1, uint64_t data2) { DMAR_ASSERT_LOCKED(unit); *(volatile uint64_t *)(unit->inv_queue + unit->inv_queue_tail) = data1; unit->inv_queue_tail += DMAR_IQ_DESCR_SZ / 2; KASSERT(unit->inv_queue_tail <= unit->inv_queue_size, ("tail overflow 0x%x 0x%jx", unit->inv_queue_tail, (uintmax_t)unit->inv_queue_size)); unit->inv_queue_tail &= unit->inv_queue_size - 1; *(volatile uint64_t *)(unit->inv_queue + unit->inv_queue_tail) = data2; unit->inv_queue_tail += DMAR_IQ_DESCR_SZ / 2; KASSERT(unit->inv_queue_tail <= unit->inv_queue_size, ("tail overflow 0x%x 0x%jx", unit->inv_queue_tail, (uintmax_t)unit->inv_queue_size)); unit->inv_queue_tail &= unit->inv_queue_size - 1; } static void dmar_qi_emit_wait_descr(struct dmar_unit *unit, uint32_t seq, bool intr, bool memw, bool fence) { DMAR_ASSERT_LOCKED(unit); dmar_qi_emit(unit, DMAR_IQ_DESCR_WAIT_ID | (intr ? DMAR_IQ_DESCR_WAIT_IF : 0) | (memw ? DMAR_IQ_DESCR_WAIT_SW : 0) | (fence ? DMAR_IQ_DESCR_WAIT_FN : 0) | (memw ? DMAR_IQ_DESCR_WAIT_SD(seq) : 0), memw ? unit->inv_waitd_seq_hw_phys : 0); } static void dmar_qi_emit_wait_seq(struct dmar_unit *unit, struct dmar_qi_genseq *pseq, bool emit_wait) { struct dmar_qi_genseq gsec; uint32_t seq; KASSERT(pseq != NULL, ("wait descriptor with no place for seq")); DMAR_ASSERT_LOCKED(unit); if (unit->inv_waitd_seq == 0xffffffff) { gsec.gen = unit->inv_waitd_gen; gsec.seq = unit->inv_waitd_seq; dmar_qi_ensure(unit, 1); dmar_qi_emit_wait_descr(unit, gsec.seq, false, true, false); dmar_qi_advance_tail(unit); while (!dmar_qi_seq_processed(unit, &gsec)) cpu_spinwait(); unit->inv_waitd_gen++; unit->inv_waitd_seq = 1; } seq = unit->inv_waitd_seq++; pseq->gen = unit->inv_waitd_gen; pseq->seq = seq; if (emit_wait) { dmar_qi_ensure(unit, 1); dmar_qi_emit_wait_descr(unit, seq, true, true, false); } } static void dmar_qi_wait_for_seq(struct dmar_unit *unit, const struct dmar_qi_genseq *gseq, bool nowait) { DMAR_ASSERT_LOCKED(unit); unit->inv_seq_waiters++; while (!dmar_qi_seq_processed(unit, gseq)) { if (cold || nowait) { cpu_spinwait(); } else { msleep(&unit->inv_seq_waiters, &unit->lock, 0, "dmarse", hz); } } unit->inv_seq_waiters--; } void dmar_qi_invalidate_locked(struct dmar_domain *domain, dmar_gaddr_t base, dmar_gaddr_t size, struct dmar_qi_genseq *pseq, bool emit_wait) { struct dmar_unit *unit; dmar_gaddr_t isize; int am; unit = domain->dmar; DMAR_ASSERT_LOCKED(unit); for (; size > 0; base += isize, size -= isize) { am = calc_am(unit, base, size, &isize); dmar_qi_ensure(unit, 1); dmar_qi_emit(unit, DMAR_IQ_DESCR_IOTLB_INV | DMAR_IQ_DESCR_IOTLB_PAGE | DMAR_IQ_DESCR_IOTLB_DW | DMAR_IQ_DESCR_IOTLB_DR | DMAR_IQ_DESCR_IOTLB_DID(domain->domain), base | am); } dmar_qi_emit_wait_seq(unit, pseq, emit_wait); dmar_qi_advance_tail(unit); } void dmar_qi_invalidate_ctx_glob_locked(struct dmar_unit *unit) { struct dmar_qi_genseq gseq; DMAR_ASSERT_LOCKED(unit); dmar_qi_ensure(unit, 2); dmar_qi_emit(unit, DMAR_IQ_DESCR_CTX_INV | DMAR_IQ_DESCR_CTX_GLOB, 0); dmar_qi_emit_wait_seq(unit, &gseq, true); dmar_qi_advance_tail(unit); dmar_qi_wait_for_seq(unit, &gseq, false); } void dmar_qi_invalidate_iotlb_glob_locked(struct dmar_unit *unit) { struct dmar_qi_genseq gseq; DMAR_ASSERT_LOCKED(unit); dmar_qi_ensure(unit, 2); dmar_qi_emit(unit, DMAR_IQ_DESCR_IOTLB_INV | DMAR_IQ_DESCR_IOTLB_GLOB | DMAR_IQ_DESCR_IOTLB_DW | DMAR_IQ_DESCR_IOTLB_DR, 0); dmar_qi_emit_wait_seq(unit, &gseq, true); dmar_qi_advance_tail(unit); dmar_qi_wait_for_seq(unit, &gseq, false); } void dmar_qi_invalidate_iec_glob(struct dmar_unit *unit) { struct dmar_qi_genseq gseq; DMAR_ASSERT_LOCKED(unit); dmar_qi_ensure(unit, 2); dmar_qi_emit(unit, DMAR_IQ_DESCR_IEC_INV, 0); dmar_qi_emit_wait_seq(unit, &gseq, true); dmar_qi_advance_tail(unit); dmar_qi_wait_for_seq(unit, &gseq, false); } void dmar_qi_invalidate_iec(struct dmar_unit *unit, u_int start, u_int cnt) { struct dmar_qi_genseq gseq; u_int c, l; DMAR_ASSERT_LOCKED(unit); KASSERT(start < unit->irte_cnt && start < start + cnt && start + cnt <= unit->irte_cnt, ("inv iec overflow %d %d %d", unit->irte_cnt, start, cnt)); for (; cnt > 0; cnt -= c, start += c) { l = ffs(start | cnt) - 1; c = 1 << l; dmar_qi_ensure(unit, 1); dmar_qi_emit(unit, DMAR_IQ_DESCR_IEC_INV | DMAR_IQ_DESCR_IEC_IDX | DMAR_IQ_DESCR_IEC_IIDX(start) | DMAR_IQ_DESCR_IEC_IM(l), 0); } dmar_qi_ensure(unit, 1); dmar_qi_emit_wait_seq(unit, &gseq, true); dmar_qi_advance_tail(unit); /* * The caller of the function, in particular, * dmar_ir_program_irte(), may be called from the context * where the sleeping is forbidden (in fact, the * intr_table_lock mutex may be held, locked from * intr_shuffle_irqs()). Wait for the invalidation completion * using the busy wait. * * The impact on the interrupt input setup code is small, the * expected overhead is comparable with the chipset register * read. It is more harmful for the parallel DMA operations, * since we own the dmar unit lock until whole invalidation * queue is processed, which includes requests possibly issued * before our request. */ dmar_qi_wait_for_seq(unit, &gseq, true); } int dmar_qi_intr(void *arg) { struct dmar_unit *unit; unit = arg; KASSERT(unit->qi_enabled, ("dmar%d: QI is not enabled", unit->unit)); taskqueue_enqueue(unit->qi_taskqueue, &unit->qi_task); return (FILTER_HANDLED); } static void dmar_qi_task(void *arg, int pending __unused) { struct dmar_unit *unit; struct dmar_map_entry *entry; uint32_t ics; unit = arg; DMAR_LOCK(unit); for (;;) { entry = TAILQ_FIRST(&unit->tlb_flush_entries); if (entry == NULL) break; if (!dmar_qi_seq_processed(unit, &entry->gseq)) break; TAILQ_REMOVE(&unit->tlb_flush_entries, entry, dmamap_link); DMAR_UNLOCK(unit); dmar_domain_free_entry(entry, (entry->flags & DMAR_MAP_ENTRY_QI_NF) == 0); DMAR_LOCK(unit); } ics = dmar_read4(unit, DMAR_ICS_REG); if ((ics & DMAR_ICS_IWC) != 0) { ics = DMAR_ICS_IWC; dmar_write4(unit, DMAR_ICS_REG, ics); } if (unit->inv_seq_waiters > 0) wakeup(&unit->inv_seq_waiters); DMAR_UNLOCK(unit); } int dmar_init_qi(struct dmar_unit *unit) { uint64_t iqa; uint32_t ics; int qi_sz; if (!DMAR_HAS_QI(unit) || (unit->hw_cap & DMAR_CAP_CM) != 0) return (0); unit->qi_enabled = 1; TUNABLE_INT_FETCH("hw.dmar.qi", &unit->qi_enabled); if (!unit->qi_enabled) return (0); TAILQ_INIT(&unit->tlb_flush_entries); TASK_INIT(&unit->qi_task, 0, dmar_qi_task, unit); unit->qi_taskqueue = taskqueue_create_fast("dmarqf", M_WAITOK, taskqueue_thread_enqueue, &unit->qi_taskqueue); taskqueue_start_threads(&unit->qi_taskqueue, 1, PI_AV, "dmar%d qi taskq", unit->unit); unit->inv_waitd_gen = 0; unit->inv_waitd_seq = 1; qi_sz = DMAR_IQA_QS_DEF; TUNABLE_INT_FETCH("hw.dmar.qi_size", &qi_sz); if (qi_sz > DMAR_IQA_QS_MAX) qi_sz = DMAR_IQA_QS_MAX; unit->inv_queue_size = (1ULL << qi_sz) * PAGE_SIZE; /* Reserve one descriptor to prevent wraparound. */ unit->inv_queue_avail = unit->inv_queue_size - DMAR_IQ_DESCR_SZ; /* The invalidation queue reads by DMARs are always coherent. */ unit->inv_queue = kmem_alloc_contig(kernel_arena, unit->inv_queue_size, M_WAITOK | M_ZERO, 0, dmar_high, PAGE_SIZE, 0, VM_MEMATTR_DEFAULT); unit->inv_waitd_seq_hw_phys = pmap_kextract( (vm_offset_t)&unit->inv_waitd_seq_hw); DMAR_LOCK(unit); dmar_write8(unit, DMAR_IQT_REG, 0); iqa = pmap_kextract(unit->inv_queue); iqa |= qi_sz; dmar_write8(unit, DMAR_IQA_REG, iqa); dmar_enable_qi(unit); ics = dmar_read4(unit, DMAR_ICS_REG); if ((ics & DMAR_ICS_IWC) != 0) { ics = DMAR_ICS_IWC; dmar_write4(unit, DMAR_ICS_REG, ics); } dmar_enable_qi_intr(unit); DMAR_UNLOCK(unit); return (0); } void dmar_fini_qi(struct dmar_unit *unit) { struct dmar_qi_genseq gseq; if (unit->qi_enabled) return; taskqueue_drain(unit->qi_taskqueue, &unit->qi_task); taskqueue_free(unit->qi_taskqueue); unit->qi_taskqueue = NULL; DMAR_LOCK(unit); /* quisce */ dmar_qi_ensure(unit, 1); dmar_qi_emit_wait_seq(unit, &gseq, true); dmar_qi_advance_tail(unit); dmar_qi_wait_for_seq(unit, &gseq, false); /* only after the quisce, disable queue */ dmar_disable_qi_intr(unit); dmar_disable_qi(unit); KASSERT(unit->inv_seq_waiters == 0, ("dmar%d: waiters on disabled queue", unit->unit)); DMAR_UNLOCK(unit); kmem_free(kernel_arena, unit->inv_queue, unit->inv_queue_size); unit->inv_queue = 0; unit->inv_queue_size = 0; unit->qi_enabled = 0; } void dmar_enable_qi_intr(struct dmar_unit *unit) { uint32_t iectl; DMAR_ASSERT_LOCKED(unit); KASSERT(DMAR_HAS_QI(unit), ("dmar%d: QI is not supported", unit->unit)); iectl = dmar_read4(unit, DMAR_IECTL_REG); iectl &= ~DMAR_IECTL_IM; dmar_write4(unit, DMAR_IECTL_REG, iectl); } void dmar_disable_qi_intr(struct dmar_unit *unit) { uint32_t iectl; DMAR_ASSERT_LOCKED(unit); KASSERT(DMAR_HAS_QI(unit), ("dmar%d: QI is not supported", unit->unit)); iectl = dmar_read4(unit, DMAR_IECTL_REG); dmar_write4(unit, DMAR_IECTL_REG, iectl | DMAR_IECTL_IM); } Index: head/sys/x86/iommu/intel_quirks.c =================================================================== --- head/sys/x86/iommu/intel_quirks.c (revision 326262) +++ head/sys/x86/iommu/intel_quirks.c (revision 326263) @@ -1,238 +1,240 @@ /*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * * Copyright (c) 2013, 2015 The FreeBSD Foundation * All rights reserved. * * This software was developed by Konstantin Belousov * under sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include typedef void (*dmar_quirk_cpu_fun)(struct dmar_unit *); struct intel_dmar_quirk_cpu { u_int ext_family; u_int ext_model; u_int family_code; u_int model; u_int stepping; dmar_quirk_cpu_fun quirk; const char *descr; }; typedef void (*dmar_quirk_nb_fun)(struct dmar_unit *, device_t nb); struct intel_dmar_quirk_nb { u_int dev_id; u_int rev_no; dmar_quirk_nb_fun quirk; const char *descr; }; #define QUIRK_NB_ALL_REV 0xffffffff static void dmar_match_quirks(struct dmar_unit *dmar, const struct intel_dmar_quirk_nb *nb_quirks, int nb_quirks_len, const struct intel_dmar_quirk_cpu *cpu_quirks, int cpu_quirks_len) { device_t nb; const struct intel_dmar_quirk_nb *nb_quirk; const struct intel_dmar_quirk_cpu *cpu_quirk; u_int p[4]; u_int dev_id, rev_no; u_int ext_family, ext_model, family_code, model, stepping; int i; if (nb_quirks != NULL) { nb = pci_find_bsf(0, 0, 0); if (nb != NULL) { dev_id = pci_get_device(nb); rev_no = pci_get_revid(nb); for (i = 0; i < nb_quirks_len; i++) { nb_quirk = &nb_quirks[i]; if (nb_quirk->dev_id == dev_id && (nb_quirk->rev_no == rev_no || nb_quirk->rev_no == QUIRK_NB_ALL_REV)) { if (bootverbose) { device_printf(dmar->dev, "NB IOMMU quirk %s\n", nb_quirk->descr); } nb_quirk->quirk(dmar, nb); } } } else { device_printf(dmar->dev, "cannot find northbridge\n"); } } if (cpu_quirks != NULL) { do_cpuid(1, p); ext_family = (p[0] & CPUID_EXT_FAMILY) >> 20; ext_model = (p[0] & CPUID_EXT_MODEL) >> 16; family_code = (p[0] & CPUID_FAMILY) >> 8; model = (p[0] & CPUID_MODEL) >> 4; stepping = p[0] & CPUID_STEPPING; for (i = 0; i < cpu_quirks_len; i++) { cpu_quirk = &cpu_quirks[i]; if (cpu_quirk->ext_family == ext_family && cpu_quirk->ext_model == ext_model && cpu_quirk->family_code == family_code && cpu_quirk->model == model && (cpu_quirk->stepping == -1 || cpu_quirk->stepping == stepping)) { if (bootverbose) { device_printf(dmar->dev, "CPU IOMMU quirk %s\n", cpu_quirk->descr); } cpu_quirk->quirk(dmar); } } } } static void nb_5400_no_low_high_prot_mem(struct dmar_unit *unit, device_t nb __unused) { unit->hw_cap &= ~(DMAR_CAP_PHMR | DMAR_CAP_PLMR); } static void nb_no_ir(struct dmar_unit *unit, device_t nb __unused) { unit->hw_ecap &= ~(DMAR_ECAP_IR | DMAR_ECAP_EIM); } static void nb_5500_no_ir_rev13(struct dmar_unit *unit, device_t nb) { u_int rev_no; rev_no = pci_get_revid(nb); if (rev_no <= 0x13) nb_no_ir(unit, nb); } static const struct intel_dmar_quirk_nb pre_use_nb[] = { { .dev_id = 0x4001, .rev_no = 0x20, .quirk = nb_5400_no_low_high_prot_mem, .descr = "5400 E23" /* no low/high protected memory */ }, { .dev_id = 0x4003, .rev_no = 0x20, .quirk = nb_5400_no_low_high_prot_mem, .descr = "5400 E23" /* no low/high protected memory */ }, { .dev_id = 0x3403, .rev_no = QUIRK_NB_ALL_REV, .quirk = nb_5500_no_ir_rev13, .descr = "5500 E47, E53" /* interrupt remapping does not work */ }, { .dev_id = 0x3405, .rev_no = QUIRK_NB_ALL_REV, .quirk = nb_5500_no_ir_rev13, .descr = "5500 E47, E53" /* interrupt remapping does not work */ }, { .dev_id = 0x3405, .rev_no = 0x22, .quirk = nb_no_ir, .descr = "5500 E47, E53" /* interrupt remapping does not work */ }, { .dev_id = 0x3406, .rev_no = QUIRK_NB_ALL_REV, .quirk = nb_5500_no_ir_rev13, .descr = "5500 E47, E53" /* interrupt remapping does not work */ }, }; static void cpu_e5_am9(struct dmar_unit *unit) { unit->hw_cap &= ~(0x3fULL << 48); unit->hw_cap |= (9ULL << 48); } static const struct intel_dmar_quirk_cpu post_ident_cpu[] = { { .ext_family = 0, .ext_model = 2, .family_code = 6, .model = 13, .stepping = 6, .quirk = cpu_e5_am9, .descr = "E5 BT176" /* AM should be at most 9 */ }, }; void dmar_quirks_pre_use(struct dmar_unit *dmar) { if (!dmar_barrier_enter(dmar, DMAR_BARRIER_USEQ)) return; DMAR_LOCK(dmar); dmar_match_quirks(dmar, pre_use_nb, nitems(pre_use_nb), NULL, 0); dmar_barrier_exit(dmar, DMAR_BARRIER_USEQ); } void dmar_quirks_post_ident(struct dmar_unit *dmar) { dmar_match_quirks(dmar, NULL, 0, post_ident_cpu, nitems(post_ident_cpu)); } Index: head/sys/x86/iommu/intel_reg.h =================================================================== --- head/sys/x86/iommu/intel_reg.h (revision 326262) +++ head/sys/x86/iommu/intel_reg.h (revision 326263) @@ -1,411 +1,413 @@ /*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * * Copyright (c) 2013-2015 The FreeBSD Foundation * All rights reserved. * * This software was developed by Konstantin Belousov * under sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef __X86_IOMMU_INTEL_REG_H #define __X86_IOMMU_INTEL_REG_H #define DMAR_PAGE_SIZE PAGE_SIZE #define DMAR_PAGE_MASK (DMAR_PAGE_SIZE - 1) #define DMAR_PAGE_SHIFT PAGE_SHIFT #define DMAR_NPTEPG (DMAR_PAGE_SIZE / sizeof(dmar_pte_t)) #define DMAR_NPTEPGSHIFT 9 #define DMAR_PTEMASK (DMAR_NPTEPG - 1) typedef struct dmar_root_entry { uint64_t r1; uint64_t r2; } dmar_root_entry_t; #define DMAR_ROOT_R1_P 1 /* Present */ #define DMAR_ROOT_R1_CTP_MASK 0xfffffffffffff000 /* Mask for Context-Entry Table Pointer */ #define DMAR_CTX_CNT (DMAR_PAGE_SIZE / sizeof(dmar_root_entry_t)) typedef struct dmar_ctx_entry { uint64_t ctx1; uint64_t ctx2; } dmar_ctx_entry_t; #define DMAR_CTX1_P 1 /* Present */ #define DMAR_CTX1_FPD 2 /* Fault Processing Disable */ /* Translation Type: */ #define DMAR_CTX1_T_UNTR 0 /* only Untranslated */ #define DMAR_CTX1_T_TR 4 /* both Untranslated and Translated */ #define DMAR_CTX1_T_PASS 8 /* Pass-Through */ #define DMAR_CTX1_ASR_MASK 0xfffffffffffff000 /* Mask for the Address Space Root */ #define DMAR_CTX2_AW_2LVL 0 /* 2-level page tables */ #define DMAR_CTX2_AW_3LVL 1 /* 3-level page tables */ #define DMAR_CTX2_AW_4LVL 2 /* 4-level page tables */ #define DMAR_CTX2_AW_5LVL 3 /* 5-level page tables */ #define DMAR_CTX2_AW_6LVL 4 /* 6-level page tables */ #define DMAR_CTX2_DID_MASK 0xffff0 #define DMAR_CTX2_DID(x) ((x) << 8) /* Domain Identifier */ #define DMAR_CTX2_GET_DID(ctx2) (((ctx2) & DMAR_CTX2_DID_MASK) >> 8) typedef struct dmar_pte { uint64_t pte; } dmar_pte_t; #define DMAR_PTE_R 1 /* Read */ #define DMAR_PTE_W (1 << 1) /* Write */ #define DMAR_PTE_SP (1 << 7) /* Super Page */ #define DMAR_PTE_SNP (1 << 11) /* Snoop Behaviour */ #define DMAR_PTE_ADDR_MASK 0xffffffffff000 /* Address Mask */ #define DMAR_PTE_TM (1ULL << 62) /* Transient Mapping */ typedef struct dmar_irte { uint64_t irte1; uint64_t irte2; } dmar_irte_t; /* Source Validation Type */ #define DMAR_IRTE2_SVT_NONE (0ULL << (82 - 64)) #define DMAR_IRTE2_SVT_RID (1ULL << (82 - 64)) #define DMAR_IRTE2_SVT_BUS (2ULL << (82 - 64)) /* Source-id Qualifier */ #define DMAR_IRTE2_SQ_RID (0ULL << (80 - 64)) #define DMAR_IRTE2_SQ_RID_N2 (1ULL << (80 - 64)) #define DMAR_IRTE2_SQ_RID_N21 (2ULL << (80 - 64)) #define DMAR_IRTE2_SQ_RID_N210 (3ULL << (80 - 64)) /* Source Identifier */ #define DMAR_IRTE2_SID_RID(x) ((uint64_t)(x)) #define DMAR_IRTE2_SID_BUS(start, end) ((((uint64_t)(start)) << 8) | (end)) /* Destination Id */ #define DMAR_IRTE1_DST_xAPIC(x) (((uint64_t)(x)) << 40) #define DMAR_IRTE1_DST_x2APIC(x) (((uint64_t)(x)) << 32) /* Vector */ #define DMAR_IRTE1_V(x) (((uint64_t)x) << 16) #define DMAR_IRTE1_IM_POSTED (1ULL << 15) /* Posted */ /* Delivery Mode */ #define DMAR_IRTE1_DLM_FM (0ULL << 5) #define DMAR_IRTE1_DLM_LP (1ULL << 5) #define DMAR_IRTE1_DLM_SMI (2ULL << 5) #define DMAR_IRTE1_DLM_NMI (4ULL << 5) #define DMAR_IRTE1_DLM_INIT (5ULL << 5) #define DMAR_IRTE1_DLM_ExtINT (7ULL << 5) /* Trigger Mode */ #define DMAR_IRTE1_TM_EDGE (0ULL << 4) #define DMAR_IRTE1_TM_LEVEL (1ULL << 4) /* Redirection Hint */ #define DMAR_IRTE1_RH_DIRECT (0ULL << 3) #define DMAR_IRTE1_RH_SELECT (1ULL << 3) /* Destination Mode */ #define DMAR_IRTE1_DM_PHYSICAL (0ULL << 2) #define DMAR_IRTE1_DM_LOGICAL (1ULL << 2) #define DMAR_IRTE1_FPD (1ULL << 1) /* Fault Processing Disable */ #define DMAR_IRTE1_P (1ULL) /* Present */ /* Version register */ #define DMAR_VER_REG 0 #define DMAR_MAJOR_VER(x) (((x) >> 4) & 0xf) #define DMAR_MINOR_VER(x) ((x) & 0xf) /* Capabilities register */ #define DMAR_CAP_REG 0x8 #define DMAR_CAP_PI (1ULL << 59) /* Posted Interrupts */ #define DMAR_CAP_FL1GP (1ULL << 56) /* First Level 1GByte Page */ #define DMAR_CAP_DRD (1ULL << 55) /* DMA Read Draining */ #define DMAR_CAP_DWD (1ULL << 54) /* DMA Write Draining */ #define DMAR_CAP_MAMV(x) ((u_int)(((x) >> 48) & 0x3f)) /* Maximum Address Mask */ #define DMAR_CAP_NFR(x) ((u_int)(((x) >> 40) & 0xff) + 1) /* Num of Fault-recording regs */ #define DMAR_CAP_PSI (1ULL << 39) /* Page Selective Invalidation */ #define DMAR_CAP_SPS(x) ((u_int)(((x) >> 34) & 0xf)) /* Super-Page Support */ #define DMAR_CAP_SPS_2M 0x1 #define DMAR_CAP_SPS_1G 0x2 #define DMAR_CAP_SPS_512G 0x4 #define DMAR_CAP_SPS_1T 0x8 #define DMAR_CAP_FRO(x) ((u_int)(((x) >> 24) & 0x1ff)) /* Fault-recording reg offset */ #define DMAR_CAP_ISOCH (1 << 23) /* Isochrony */ #define DMAR_CAP_ZLR (1 << 22) /* Zero-length reads */ #define DMAR_CAP_MGAW(x) ((u_int)(((x) >> 16) & 0x3f)) /* Max Guest Address Width */ #define DMAR_CAP_SAGAW(x) ((u_int)(((x) >> 8) & 0x1f)) /* Adjusted Guest Address Width */ #define DMAR_CAP_SAGAW_2LVL 0x01 #define DMAR_CAP_SAGAW_3LVL 0x02 #define DMAR_CAP_SAGAW_4LVL 0x04 #define DMAR_CAP_SAGAW_5LVL 0x08 #define DMAR_CAP_SAGAW_6LVL 0x10 #define DMAR_CAP_CM (1 << 7) /* Caching mode */ #define DMAR_CAP_PHMR (1 << 6) /* Protected High-mem Region */ #define DMAR_CAP_PLMR (1 << 5) /* Protected Low-mem Region */ #define DMAR_CAP_RWBF (1 << 4) /* Required Write-Buffer Flushing */ #define DMAR_CAP_AFL (1 << 3) /* Advanced Fault Logging */ #define DMAR_CAP_ND(x) ((u_int)((x) & 0x3)) /* Number of domains */ /* Extended Capabilities register */ #define DMAR_ECAP_REG 0x10 #define DMAR_ECAP_PSS(x) (((x) >> 35) & 0xf) /* PASID Size Supported */ #define DMAR_ECAP_EAFS (1ULL << 34) /* Extended Accessed Flag */ #define DMAR_ECAP_NWFS (1ULL << 33) /* No Write Flag */ #define DMAR_ECAP_SRS (1ULL << 31) /* Supervisor Request */ #define DMAR_ECAP_ERS (1ULL << 30) /* Execute Request */ #define DMAR_ECAP_PRS (1ULL << 29) /* Page Request */ #define DMAR_ECAP_PASID (1ULL << 28) /* Process Address Space Id */ #define DMAR_ECAP_DIS (1ULL << 27) /* Deferred Invalidate */ #define DMAR_ECAP_NEST (1ULL << 26) /* Nested Translation */ #define DMAR_ECAP_MTS (1ULL << 25) /* Memory Type */ #define DMAR_ECAP_ECS (1ULL << 24) /* Extended Context */ #define DMAR_ECAP_MHMV(x) ((u_int)(((x) >> 20) & 0xf)) /* Maximum Handle Mask Value */ #define DMAR_ECAP_IRO(x) ((u_int)(((x) >> 8) & 0x3ff)) /* IOTLB Register Offset */ #define DMAR_ECAP_SC (1 << 7) /* Snoop Control */ #define DMAR_ECAP_PT (1 << 6) /* Pass Through */ #define DMAR_ECAP_EIM (1 << 4) /* Extended Interrupt Mode (x2APIC) */ #define DMAR_ECAP_IR (1 << 3) /* Interrupt Remapping */ #define DMAR_ECAP_DI (1 << 2) /* Device IOTLB */ #define DMAR_ECAP_QI (1 << 1) /* Queued Invalidation */ #define DMAR_ECAP_C (1 << 0) /* Coherency */ /* Global Command register */ #define DMAR_GCMD_REG 0x18 #define DMAR_GCMD_TE (1U << 31) /* Translation Enable */ #define DMAR_GCMD_SRTP (1 << 30) /* Set Root Table Pointer */ #define DMAR_GCMD_SFL (1 << 29) /* Set Fault Log */ #define DMAR_GCMD_EAFL (1 << 28) /* Enable Advanced Fault Logging */ #define DMAR_GCMD_WBF (1 << 27) /* Write Buffer Flush */ #define DMAR_GCMD_QIE (1 << 26) /* Queued Invalidation Enable */ #define DMAR_GCMD_IRE (1 << 25) /* Interrupt Remapping Enable */ #define DMAR_GCMD_SIRTP (1 << 24) /* Set Interrupt Remap Table Pointer */ #define DMAR_GCMD_CFI (1 << 23) /* Compatibility Format Interrupt */ /* Global Status register */ #define DMAR_GSTS_REG 0x1c #define DMAR_GSTS_TES (1U << 31) /* Translation Enable Status */ #define DMAR_GSTS_RTPS (1 << 30) /* Root Table Pointer Status */ #define DMAR_GSTS_FLS (1 << 29) /* Fault Log Status */ #define DMAR_GSTS_AFLS (1 << 28) /* Advanced Fault Logging Status */ #define DMAR_GSTS_WBFS (1 << 27) /* Write Buffer Flush Status */ #define DMAR_GSTS_QIES (1 << 26) /* Queued Invalidation Enable Status */ #define DMAR_GSTS_IRES (1 << 25) /* Interrupt Remapping Enable Status */ #define DMAR_GSTS_IRTPS (1 << 24) /* Interrupt Remapping Table Pointer Status */ #define DMAR_GSTS_CFIS (1 << 23) /* Compatibility Format Interrupt Status */ /* Root-Entry Table Address register */ #define DMAR_RTADDR_REG 0x20 #define DMAR_RTADDR_RTT (1 << 11) /* Root Table Type */ #define DMAR_RTADDR_RTA_MASK 0xfffffffffffff000 /* Context Command register */ #define DMAR_CCMD_REG 0x28 #define DMAR_CCMD_ICC (1ULL << 63) /* Invalidate Context-Cache */ #define DMAR_CCMD_ICC32 (1U << 31) #define DMAR_CCMD_CIRG_MASK (0x3ULL << 61) /* Context Invalidation Request Granularity */ #define DMAR_CCMD_CIRG_GLOB (0x1ULL << 61) /* Global */ #define DMAR_CCMD_CIRG_DOM (0x2ULL << 61) /* Domain */ #define DMAR_CCMD_CIRG_DEV (0x3ULL << 61) /* Device */ #define DMAR_CCMD_CAIG(x) (((x) >> 59) & 0x3) /* Context Actual Invalidation Granularity */ #define DMAR_CCMD_CAIG_GLOB 0x1 /* Global */ #define DMAR_CCMD_CAIG_DOM 0x2 /* Domain */ #define DMAR_CCMD_CAIG_DEV 0x3 /* Device */ #define DMAR_CCMD_FM (0x3UUL << 32) /* Function Mask */ #define DMAR_CCMD_SID(x) (((x) & 0xffff) << 16) /* Source-ID */ #define DMAR_CCMD_DID(x) ((x) & 0xffff) /* Domain-ID */ /* Invalidate Address register */ #define DMAR_IVA_REG_OFF 0 #define DMAR_IVA_IH (1 << 6) /* Invalidation Hint */ #define DMAR_IVA_AM(x) ((x) & 0x1f) /* Address Mask */ #define DMAR_IVA_ADDR(x) ((x) & ~0xfffULL) /* Address */ /* IOTLB Invalidate register */ #define DMAR_IOTLB_REG_OFF 0x8 #define DMAR_IOTLB_IVT (1ULL << 63) /* Invalidate IOTLB */ #define DMAR_IOTLB_IVT32 (1U << 31) #define DMAR_IOTLB_IIRG_MASK (0x3ULL << 60) /* Invalidation Request Granularity */ #define DMAR_IOTLB_IIRG_GLB (0x1ULL << 60) /* Global */ #define DMAR_IOTLB_IIRG_DOM (0x2ULL << 60) /* Domain-selective */ #define DMAR_IOTLB_IIRG_PAGE (0x3ULL << 60) /* Page-selective */ #define DMAR_IOTLB_IAIG_MASK (0x3ULL << 57) /* Actual Invalidation Granularity */ #define DMAR_IOTLB_IAIG_INVLD 0 /* Hw detected error */ #define DMAR_IOTLB_IAIG_GLB (0x1ULL << 57) /* Global */ #define DMAR_IOTLB_IAIG_DOM (0x2ULL << 57) /* Domain-selective */ #define DMAR_IOTLB_IAIG_PAGE (0x3ULL << 57) /* Page-selective */ #define DMAR_IOTLB_DR (0x1ULL << 49) /* Drain Reads */ #define DMAR_IOTLB_DW (0x1ULL << 48) /* Drain Writes */ #define DMAR_IOTLB_DID(x) (((uint64_t)(x) & 0xffff) << 32) /* Domain Id */ /* Fault Status register */ #define DMAR_FSTS_REG 0x34 #define DMAR_FSTS_FRI(x) (((x) >> 8) & 0xff) /* Fault Record Index */ #define DMAR_FSTS_ITE (1 << 6) /* Invalidation Time-out */ #define DMAR_FSTS_ICE (1 << 5) /* Invalidation Completion */ #define DMAR_FSTS_IQE (1 << 4) /* Invalidation Queue */ #define DMAR_FSTS_APF (1 << 3) /* Advanced Pending Fault */ #define DMAR_FSTS_AFO (1 << 2) /* Advanced Fault Overflow */ #define DMAR_FSTS_PPF (1 << 1) /* Primary Pending Fault */ #define DMAR_FSTS_PFO 1 /* Fault Overflow */ /* Fault Event Control register */ #define DMAR_FECTL_REG 0x38 #define DMAR_FECTL_IM (1U << 31) /* Interrupt Mask */ #define DMAR_FECTL_IP (1 << 30) /* Interrupt Pending */ /* Fault Event Data register */ #define DMAR_FEDATA_REG 0x3c /* Fault Event Address register */ #define DMAR_FEADDR_REG 0x40 /* Fault Event Upper Address register */ #define DMAR_FEUADDR_REG 0x44 /* Advanced Fault Log register */ #define DMAR_AFLOG_REG 0x58 /* Fault Recording Register, also usable for Advanced Fault Log records */ #define DMAR_FRCD2_F (1ULL << 63) /* Fault */ #define DMAR_FRCD2_F32 (1U << 31) #define DMAR_FRCD2_T(x) ((int)((x >> 62) & 1)) /* Type */ #define DMAR_FRCD2_T_W 0 /* Write request */ #define DMAR_FRCD2_T_R 1 /* Read or AtomicOp */ #define DMAR_FRCD2_AT(x) ((int)((x >> 60) & 0x3)) /* Address Type */ #define DMAR_FRCD2_FR(x) ((int)((x >> 32) & 0xff)) /* Fault Reason */ #define DMAR_FRCD2_SID(x) ((int)(x & 0xffff)) /* Source Identifier */ #define DMAR_FRCS1_FI_MASK 0xffffffffff000 /* Fault Info, Address Mask */ /* Protected Memory Enable register */ #define DMAR_PMEN_REG 0x64 #define DMAR_PMEN_EPM (1U << 31) /* Enable Protected Memory */ #define DMAR_PMEN_PRS 1 /* Protected Region Status */ /* Protected Low-Memory Base register */ #define DMAR_PLMBASE_REG 0x68 /* Protected Low-Memory Limit register */ #define DMAR_PLMLIMIT_REG 0x6c /* Protected High-Memory Base register */ #define DMAR_PHMBASE_REG 0x70 /* Protected High-Memory Limit register */ #define DMAR_PHMLIMIT_REG 0x78 /* Queued Invalidation Descriptors */ #define DMAR_IQ_DESCR_SZ_SHIFT 4 /* Shift for descriptor count to ring offset */ #define DMAR_IQ_DESCR_SZ (1 << DMAR_IQ_DESCR_SZ_SHIFT) /* Descriptor size */ /* Context-cache Invalidate Descriptor */ #define DMAR_IQ_DESCR_CTX_INV 0x1 #define DMAR_IQ_DESCR_CTX_GLOB (0x1 << 4) /* Granularity: Global */ #define DMAR_IQ_DESCR_CTX_DOM (0x2 << 4) /* Granularity: Domain */ #define DMAR_IQ_DESCR_CTX_DEV (0x3 << 4) /* Granularity: Device */ #define DMAR_IQ_DESCR_CTX_DID(x) (((uint32_t)(x)) << 16) /* Domain Id */ #define DMAR_IQ_DESCR_CTX_SRC(x) (((uint64_t)(x)) << 32) /* Source Id */ #define DMAR_IQ_DESCR_CTX_FM(x) (((uint64_t)(x)) << 48) /* Function Mask */ /* IOTLB Invalidate Descriptor */ #define DMAR_IQ_DESCR_IOTLB_INV 0x2 #define DMAR_IQ_DESCR_IOTLB_GLOB (0x1 << 4) /* Granularity: Global */ #define DMAR_IQ_DESCR_IOTLB_DOM (0x2 << 4) /* Granularity: Domain */ #define DMAR_IQ_DESCR_IOTLB_PAGE (0x3 << 4) /* Granularity: Page */ #define DMAR_IQ_DESCR_IOTLB_DW (1 << 6) /* Drain Writes */ #define DMAR_IQ_DESCR_IOTLB_DR (1 << 7) /* Drain Reads */ #define DMAR_IQ_DESCR_IOTLB_DID(x) (((uint32_t)(x)) << 16) /* Domain Id */ /* Device-TLB Invalidate Descriptor */ #define DMAR_IQ_DESCR_DTLB_INV 0x3 /* Invalidate Interrupt Entry Cache */ #define DMAR_IQ_DESCR_IEC_INV 0x4 #define DMAR_IQ_DESCR_IEC_IDX (1 << 4) /* Index-Selective Invalidation */ #define DMAR_IQ_DESCR_IEC_IIDX(x) (((uint64_t)x) << 32) /* Interrupt Index */ #define DMAR_IQ_DESCR_IEC_IM(x) ((x) << 27) /* Index Mask */ /* Invalidation Wait Descriptor */ #define DMAR_IQ_DESCR_WAIT_ID 0x5 #define DMAR_IQ_DESCR_WAIT_IF (1 << 4) /* Interrupt Flag */ #define DMAR_IQ_DESCR_WAIT_SW (1 << 5) /* Status Write */ #define DMAR_IQ_DESCR_WAIT_FN (1 << 6) /* Fence */ #define DMAR_IQ_DESCR_WAIT_SD(x) (((uint64_t)(x)) << 32) /* Status Data */ /* Extended IOTLB Invalidate Descriptor */ #define DMAR_IQ_DESCR_EIOTLB_INV 0x6 /* PASID-Cache Invalidate Descriptor */ #define DMAR_IQ_DESCR_PASIDC_INV 0x7 /* Extended Device-TLB Invalidate Descriptor */ #define DMAR_IQ_DESCR_EDTLB_INV 0x8 /* Invalidation Queue Head register */ #define DMAR_IQH_REG 0x80 #define DMAR_IQH_MASK 0x7fff0 /* Next cmd index mask */ /* Invalidation Queue Tail register */ #define DMAR_IQT_REG 0x88 #define DMAR_IQT_MASK 0x7fff0 /* Invalidation Queue Address register */ #define DMAR_IQA_REG 0x90 #define DMAR_IQA_IQA_MASK 0xfffffffffffff000 /* Invalidation Queue Base Address mask */ #define DMAR_IQA_QS_MASK 0x7 /* Queue Size in pages */ #define DMAR_IQA_QS_MAX 0x7 /* Max Queue size */ #define DMAR_IQA_QS_DEF 3 /* Invalidation Completion Status register */ #define DMAR_ICS_REG 0x9c #define DMAR_ICS_IWC 1 /* Invalidation Wait Descriptor Complete */ /* Invalidation Event Control register */ #define DMAR_IECTL_REG 0xa0 #define DMAR_IECTL_IM (1U << 31) /* Interrupt Mask */ #define DMAR_IECTL_IP (1 << 30) /* Interrupt Pending */ /* Invalidation Event Data register */ #define DMAR_IEDATA_REG 0xa4 /* Invalidation Event Address register */ #define DMAR_IEADDR_REG 0xa8 /* Invalidation Event Upper Address register */ #define DMAR_IEUADDR_REG 0xac /* Interrupt Remapping Table Address register */ #define DMAR_IRTA_REG 0xb8 #define DMAR_IRTA_EIME (1 << 11) /* Extended Interrupt Mode Enable */ #define DMAR_IRTA_S_MASK 0xf /* Size Mask */ #endif Index: head/sys/x86/iommu/intel_utils.c =================================================================== --- head/sys/x86/iommu/intel_utils.c (revision 326262) +++ head/sys/x86/iommu/intel_utils.c (revision 326263) @@ -1,676 +1,678 @@ /*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * * Copyright (c) 2013 The FreeBSD Foundation * All rights reserved. * * This software was developed by Konstantin Belousov * under sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include u_int dmar_nd2mask(u_int nd) { static const u_int masks[] = { 0x000f, /* nd == 0 */ 0x002f, /* nd == 1 */ 0x00ff, /* nd == 2 */ 0x02ff, /* nd == 3 */ 0x0fff, /* nd == 4 */ 0x2fff, /* nd == 5 */ 0xffff, /* nd == 6 */ 0x0000, /* nd == 7 reserved */ }; KASSERT(nd <= 6, ("number of domains %d", nd)); return (masks[nd]); } static const struct sagaw_bits_tag { int agaw; int cap; int awlvl; int pglvl; } sagaw_bits[] = { {.agaw = 30, .cap = DMAR_CAP_SAGAW_2LVL, .awlvl = DMAR_CTX2_AW_2LVL, .pglvl = 2}, {.agaw = 39, .cap = DMAR_CAP_SAGAW_3LVL, .awlvl = DMAR_CTX2_AW_3LVL, .pglvl = 3}, {.agaw = 48, .cap = DMAR_CAP_SAGAW_4LVL, .awlvl = DMAR_CTX2_AW_4LVL, .pglvl = 4}, {.agaw = 57, .cap = DMAR_CAP_SAGAW_5LVL, .awlvl = DMAR_CTX2_AW_5LVL, .pglvl = 5}, {.agaw = 64, .cap = DMAR_CAP_SAGAW_6LVL, .awlvl = DMAR_CTX2_AW_6LVL, .pglvl = 6} }; bool dmar_pglvl_supported(struct dmar_unit *unit, int pglvl) { int i; for (i = 0; i < nitems(sagaw_bits); i++) { if (sagaw_bits[i].pglvl != pglvl) continue; if ((DMAR_CAP_SAGAW(unit->hw_cap) & sagaw_bits[i].cap) != 0) return (true); } return (false); } int domain_set_agaw(struct dmar_domain *domain, int mgaw) { int sagaw, i; domain->mgaw = mgaw; sagaw = DMAR_CAP_SAGAW(domain->dmar->hw_cap); for (i = 0; i < nitems(sagaw_bits); i++) { if (sagaw_bits[i].agaw >= mgaw) { domain->agaw = sagaw_bits[i].agaw; domain->pglvl = sagaw_bits[i].pglvl; domain->awlvl = sagaw_bits[i].awlvl; return (0); } } device_printf(domain->dmar->dev, "context request mgaw %d: no agaw found, sagaw %x\n", mgaw, sagaw); return (EINVAL); } /* * Find a best fit mgaw for the given maxaddr: * - if allow_less is false, must find sagaw which maps all requested * addresses (used by identity mappings); * - if allow_less is true, and no supported sagaw can map all requested * address space, accept the biggest sagaw, whatever is it. */ int dmar_maxaddr2mgaw(struct dmar_unit *unit, dmar_gaddr_t maxaddr, bool allow_less) { int i; for (i = 0; i < nitems(sagaw_bits); i++) { if ((1ULL << sagaw_bits[i].agaw) >= maxaddr && (DMAR_CAP_SAGAW(unit->hw_cap) & sagaw_bits[i].cap) != 0) break; } if (allow_less && i == nitems(sagaw_bits)) { do { i--; } while ((DMAR_CAP_SAGAW(unit->hw_cap) & sagaw_bits[i].cap) == 0); } if (i < nitems(sagaw_bits)) return (sagaw_bits[i].agaw); KASSERT(0, ("no mgaw for maxaddr %jx allow_less %d", (uintmax_t) maxaddr, allow_less)); return (-1); } /* * Calculate the total amount of page table pages needed to map the * whole bus address space on the context with the selected agaw. */ vm_pindex_t pglvl_max_pages(int pglvl) { vm_pindex_t res; int i; for (res = 0, i = pglvl; i > 0; i--) { res *= DMAR_NPTEPG; res++; } return (res); } /* * Return true if the page table level lvl supports the superpage for * the context ctx. */ int domain_is_sp_lvl(struct dmar_domain *domain, int lvl) { int alvl, cap_sps; static const int sagaw_sp[] = { DMAR_CAP_SPS_2M, DMAR_CAP_SPS_1G, DMAR_CAP_SPS_512G, DMAR_CAP_SPS_1T }; alvl = domain->pglvl - lvl - 1; cap_sps = DMAR_CAP_SPS(domain->dmar->hw_cap); return (alvl < nitems(sagaw_sp) && (sagaw_sp[alvl] & cap_sps) != 0); } dmar_gaddr_t pglvl_page_size(int total_pglvl, int lvl) { int rlvl; static const dmar_gaddr_t pg_sz[] = { (dmar_gaddr_t)DMAR_PAGE_SIZE, (dmar_gaddr_t)DMAR_PAGE_SIZE << DMAR_NPTEPGSHIFT, (dmar_gaddr_t)DMAR_PAGE_SIZE << (2 * DMAR_NPTEPGSHIFT), (dmar_gaddr_t)DMAR_PAGE_SIZE << (3 * DMAR_NPTEPGSHIFT), (dmar_gaddr_t)DMAR_PAGE_SIZE << (4 * DMAR_NPTEPGSHIFT), (dmar_gaddr_t)DMAR_PAGE_SIZE << (5 * DMAR_NPTEPGSHIFT) }; KASSERT(lvl >= 0 && lvl < total_pglvl, ("total %d lvl %d", total_pglvl, lvl)); rlvl = total_pglvl - lvl - 1; KASSERT(rlvl < nitems(pg_sz), ("sizeof pg_sz lvl %d", lvl)); return (pg_sz[rlvl]); } dmar_gaddr_t domain_page_size(struct dmar_domain *domain, int lvl) { return (pglvl_page_size(domain->pglvl, lvl)); } int calc_am(struct dmar_unit *unit, dmar_gaddr_t base, dmar_gaddr_t size, dmar_gaddr_t *isizep) { dmar_gaddr_t isize; int am; for (am = DMAR_CAP_MAMV(unit->hw_cap);; am--) { isize = 1ULL << (am + DMAR_PAGE_SHIFT); if ((base & (isize - 1)) == 0 && size >= isize) break; if (am == 0) break; } *isizep = isize; return (am); } dmar_haddr_t dmar_high; int haw; int dmar_tbl_pagecnt; vm_page_t dmar_pgalloc(vm_object_t obj, vm_pindex_t idx, int flags) { vm_page_t m; int zeroed, aflags; zeroed = (flags & DMAR_PGF_ZERO) != 0 ? VM_ALLOC_ZERO : 0; aflags = zeroed | VM_ALLOC_NOBUSY | VM_ALLOC_SYSTEM | VM_ALLOC_NODUMP | ((flags & DMAR_PGF_WAITOK) != 0 ? VM_ALLOC_WAITFAIL : VM_ALLOC_NOWAIT); for (;;) { if ((flags & DMAR_PGF_OBJL) == 0) VM_OBJECT_WLOCK(obj); m = vm_page_lookup(obj, idx); if ((flags & DMAR_PGF_NOALLOC) != 0 || m != NULL) { if ((flags & DMAR_PGF_OBJL) == 0) VM_OBJECT_WUNLOCK(obj); break; } m = vm_page_alloc_contig(obj, idx, aflags, 1, 0, dmar_high, PAGE_SIZE, 0, VM_MEMATTR_DEFAULT); if ((flags & DMAR_PGF_OBJL) == 0) VM_OBJECT_WUNLOCK(obj); if (m != NULL) { if (zeroed && (m->flags & PG_ZERO) == 0) pmap_zero_page(m); atomic_add_int(&dmar_tbl_pagecnt, 1); break; } if ((flags & DMAR_PGF_WAITOK) == 0) break; } return (m); } void dmar_pgfree(vm_object_t obj, vm_pindex_t idx, int flags) { vm_page_t m; if ((flags & DMAR_PGF_OBJL) == 0) VM_OBJECT_WLOCK(obj); m = vm_page_lookup(obj, idx); if (m != NULL) { vm_page_free(m); atomic_subtract_int(&dmar_tbl_pagecnt, 1); } if ((flags & DMAR_PGF_OBJL) == 0) VM_OBJECT_WUNLOCK(obj); } void * dmar_map_pgtbl(vm_object_t obj, vm_pindex_t idx, int flags, struct sf_buf **sf) { vm_page_t m; bool allocated; if ((flags & DMAR_PGF_OBJL) == 0) VM_OBJECT_WLOCK(obj); m = vm_page_lookup(obj, idx); if (m == NULL && (flags & DMAR_PGF_ALLOC) != 0) { m = dmar_pgalloc(obj, idx, flags | DMAR_PGF_OBJL); allocated = true; } else allocated = false; if (m == NULL) { if ((flags & DMAR_PGF_OBJL) == 0) VM_OBJECT_WUNLOCK(obj); return (NULL); } /* Sleepable allocations cannot fail. */ if ((flags & DMAR_PGF_WAITOK) != 0) VM_OBJECT_WUNLOCK(obj); sched_pin(); *sf = sf_buf_alloc(m, SFB_CPUPRIVATE | ((flags & DMAR_PGF_WAITOK) == 0 ? SFB_NOWAIT : 0)); if (*sf == NULL) { sched_unpin(); if (allocated) { VM_OBJECT_ASSERT_WLOCKED(obj); dmar_pgfree(obj, m->pindex, flags | DMAR_PGF_OBJL); } if ((flags & DMAR_PGF_OBJL) == 0) VM_OBJECT_WUNLOCK(obj); return (NULL); } if ((flags & (DMAR_PGF_WAITOK | DMAR_PGF_OBJL)) == (DMAR_PGF_WAITOK | DMAR_PGF_OBJL)) VM_OBJECT_WLOCK(obj); else if ((flags & (DMAR_PGF_WAITOK | DMAR_PGF_OBJL)) == 0) VM_OBJECT_WUNLOCK(obj); return ((void *)sf_buf_kva(*sf)); } void dmar_unmap_pgtbl(struct sf_buf *sf) { sf_buf_free(sf); sched_unpin(); } static void dmar_flush_transl_to_ram(struct dmar_unit *unit, void *dst, size_t sz) { if (DMAR_IS_COHERENT(unit)) return; /* * If DMAR does not snoop paging structures accesses, flush * CPU cache to memory. */ pmap_invalidate_cache_range((uintptr_t)dst, (uintptr_t)dst + sz, TRUE); } void dmar_flush_pte_to_ram(struct dmar_unit *unit, dmar_pte_t *dst) { dmar_flush_transl_to_ram(unit, dst, sizeof(*dst)); } void dmar_flush_ctx_to_ram(struct dmar_unit *unit, dmar_ctx_entry_t *dst) { dmar_flush_transl_to_ram(unit, dst, sizeof(*dst)); } void dmar_flush_root_to_ram(struct dmar_unit *unit, dmar_root_entry_t *dst) { dmar_flush_transl_to_ram(unit, dst, sizeof(*dst)); } /* * Load the root entry pointer into the hardware, busily waiting for * the completion. */ int dmar_load_root_entry_ptr(struct dmar_unit *unit) { vm_page_t root_entry; int error; /* * Access to the GCMD register must be serialized while the * command is submitted. */ DMAR_ASSERT_LOCKED(unit); VM_OBJECT_RLOCK(unit->ctx_obj); root_entry = vm_page_lookup(unit->ctx_obj, 0); VM_OBJECT_RUNLOCK(unit->ctx_obj); dmar_write8(unit, DMAR_RTADDR_REG, VM_PAGE_TO_PHYS(root_entry)); dmar_write4(unit, DMAR_GCMD_REG, unit->hw_gcmd | DMAR_GCMD_SRTP); DMAR_WAIT_UNTIL(((dmar_read4(unit, DMAR_GSTS_REG) & DMAR_GSTS_RTPS) != 0)); return (error); } /* * Globally invalidate the context entries cache, busily waiting for * the completion. */ int dmar_inv_ctx_glob(struct dmar_unit *unit) { int error; /* * Access to the CCMD register must be serialized while the * command is submitted. */ DMAR_ASSERT_LOCKED(unit); KASSERT(!unit->qi_enabled, ("QI enabled")); /* * The DMAR_CCMD_ICC bit in the upper dword should be written * after the low dword write is completed. Amd64 * dmar_write8() does not have this issue, i386 dmar_write8() * writes the upper dword last. */ dmar_write8(unit, DMAR_CCMD_REG, DMAR_CCMD_ICC | DMAR_CCMD_CIRG_GLOB); DMAR_WAIT_UNTIL(((dmar_read4(unit, DMAR_CCMD_REG + 4) & DMAR_CCMD_ICC32) == 0)); return (error); } /* * Globally invalidate the IOTLB, busily waiting for the completion. */ int dmar_inv_iotlb_glob(struct dmar_unit *unit) { int error, reg; DMAR_ASSERT_LOCKED(unit); KASSERT(!unit->qi_enabled, ("QI enabled")); reg = 16 * DMAR_ECAP_IRO(unit->hw_ecap); /* See a comment about DMAR_CCMD_ICC in dmar_inv_ctx_glob. */ dmar_write8(unit, reg + DMAR_IOTLB_REG_OFF, DMAR_IOTLB_IVT | DMAR_IOTLB_IIRG_GLB | DMAR_IOTLB_DR | DMAR_IOTLB_DW); DMAR_WAIT_UNTIL(((dmar_read4(unit, reg + DMAR_IOTLB_REG_OFF + 4) & DMAR_IOTLB_IVT32) == 0)); return (error); } /* * Flush the chipset write buffers. See 11.1 "Write Buffer Flushing" * in the architecture specification. */ int dmar_flush_write_bufs(struct dmar_unit *unit) { int error; DMAR_ASSERT_LOCKED(unit); /* * DMAR_GCMD_WBF is only valid when CAP_RWBF is reported. */ KASSERT((unit->hw_cap & DMAR_CAP_RWBF) != 0, ("dmar%d: no RWBF", unit->unit)); dmar_write4(unit, DMAR_GCMD_REG, unit->hw_gcmd | DMAR_GCMD_WBF); DMAR_WAIT_UNTIL(((dmar_read4(unit, DMAR_GSTS_REG) & DMAR_GSTS_WBFS) != 0)); return (error); } int dmar_enable_translation(struct dmar_unit *unit) { int error; DMAR_ASSERT_LOCKED(unit); unit->hw_gcmd |= DMAR_GCMD_TE; dmar_write4(unit, DMAR_GCMD_REG, unit->hw_gcmd); DMAR_WAIT_UNTIL(((dmar_read4(unit, DMAR_GSTS_REG) & DMAR_GSTS_TES) != 0)); return (error); } int dmar_disable_translation(struct dmar_unit *unit) { int error; DMAR_ASSERT_LOCKED(unit); unit->hw_gcmd &= ~DMAR_GCMD_TE; dmar_write4(unit, DMAR_GCMD_REG, unit->hw_gcmd); DMAR_WAIT_UNTIL(((dmar_read4(unit, DMAR_GSTS_REG) & DMAR_GSTS_TES) == 0)); return (error); } int dmar_load_irt_ptr(struct dmar_unit *unit) { uint64_t irta, s; int error; DMAR_ASSERT_LOCKED(unit); irta = unit->irt_phys; if (DMAR_X2APIC(unit)) irta |= DMAR_IRTA_EIME; s = fls(unit->irte_cnt) - 2; KASSERT(unit->irte_cnt >= 2 && s <= DMAR_IRTA_S_MASK && powerof2(unit->irte_cnt), ("IRTA_REG_S overflow %x", unit->irte_cnt)); irta |= s; dmar_write8(unit, DMAR_IRTA_REG, irta); dmar_write4(unit, DMAR_GCMD_REG, unit->hw_gcmd | DMAR_GCMD_SIRTP); DMAR_WAIT_UNTIL(((dmar_read4(unit, DMAR_GSTS_REG) & DMAR_GSTS_IRTPS) != 0)); return (error); } int dmar_enable_ir(struct dmar_unit *unit) { int error; DMAR_ASSERT_LOCKED(unit); unit->hw_gcmd |= DMAR_GCMD_IRE; unit->hw_gcmd &= ~DMAR_GCMD_CFI; dmar_write4(unit, DMAR_GCMD_REG, unit->hw_gcmd); DMAR_WAIT_UNTIL(((dmar_read4(unit, DMAR_GSTS_REG) & DMAR_GSTS_IRES) != 0)); return (error); } int dmar_disable_ir(struct dmar_unit *unit) { int error; DMAR_ASSERT_LOCKED(unit); unit->hw_gcmd &= ~DMAR_GCMD_IRE; dmar_write4(unit, DMAR_GCMD_REG, unit->hw_gcmd); DMAR_WAIT_UNTIL(((dmar_read4(unit, DMAR_GSTS_REG) & DMAR_GSTS_IRES) == 0)); return (error); } #define BARRIER_F \ u_int f_done, f_inproc, f_wakeup; \ \ f_done = 1 << (barrier_id * 3); \ f_inproc = 1 << (barrier_id * 3 + 1); \ f_wakeup = 1 << (barrier_id * 3 + 2) bool dmar_barrier_enter(struct dmar_unit *dmar, u_int barrier_id) { BARRIER_F; DMAR_LOCK(dmar); if ((dmar->barrier_flags & f_done) != 0) { DMAR_UNLOCK(dmar); return (false); } if ((dmar->barrier_flags & f_inproc) != 0) { while ((dmar->barrier_flags & f_inproc) != 0) { dmar->barrier_flags |= f_wakeup; msleep(&dmar->barrier_flags, &dmar->lock, 0, "dmarb", 0); } KASSERT((dmar->barrier_flags & f_done) != 0, ("dmar%d barrier %d missing done", dmar->unit, barrier_id)); DMAR_UNLOCK(dmar); return (false); } dmar->barrier_flags |= f_inproc; DMAR_UNLOCK(dmar); return (true); } void dmar_barrier_exit(struct dmar_unit *dmar, u_int barrier_id) { BARRIER_F; DMAR_ASSERT_LOCKED(dmar); KASSERT((dmar->barrier_flags & (f_done | f_inproc)) == f_inproc, ("dmar%d barrier %d missed entry", dmar->unit, barrier_id)); dmar->barrier_flags |= f_done; if ((dmar->barrier_flags & f_wakeup) != 0) wakeup(&dmar->barrier_flags); dmar->barrier_flags &= ~(f_inproc | f_wakeup); DMAR_UNLOCK(dmar); } int dmar_match_verbose; int dmar_batch_coalesce = 100; struct timespec dmar_hw_timeout = { .tv_sec = 0, .tv_nsec = 1000000 }; static const uint64_t d = 1000000000; void dmar_update_timeout(uint64_t newval) { /* XXXKIB not atomic */ dmar_hw_timeout.tv_sec = newval / d; dmar_hw_timeout.tv_nsec = newval % d; } uint64_t dmar_get_timeout(void) { return ((uint64_t)dmar_hw_timeout.tv_sec * d + dmar_hw_timeout.tv_nsec); } static int dmar_timeout_sysctl(SYSCTL_HANDLER_ARGS) { uint64_t val; int error; val = dmar_get_timeout(); error = sysctl_handle_long(oidp, &val, 0, req); if (error != 0 || req->newptr == NULL) return (error); dmar_update_timeout(val); return (error); } static SYSCTL_NODE(_hw, OID_AUTO, dmar, CTLFLAG_RD, NULL, ""); SYSCTL_INT(_hw_dmar, OID_AUTO, tbl_pagecnt, CTLFLAG_RD, &dmar_tbl_pagecnt, 0, "Count of pages used for DMAR pagetables"); SYSCTL_INT(_hw_dmar, OID_AUTO, match_verbose, CTLFLAG_RWTUN, &dmar_match_verbose, 0, "Verbose matching of the PCI devices to DMAR paths"); SYSCTL_INT(_hw_dmar, OID_AUTO, batch_coalesce, CTLFLAG_RWTUN, &dmar_batch_coalesce, 0, "Number of qi batches between interrupt"); SYSCTL_PROC(_hw_dmar, OID_AUTO, timeout, CTLTYPE_U64 | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 0, dmar_timeout_sysctl, "QU", "Timeout for command wait, in nanoseconds"); #ifdef INVARIANTS int dmar_check_free; SYSCTL_INT(_hw_dmar, OID_AUTO, check_free, CTLFLAG_RWTUN, &dmar_check_free, 0, "Check the GPA RBtree for free_down and free_after validity"); #endif Index: head/sys/x86/isa/atpic.c =================================================================== --- head/sys/x86/isa/atpic.c (revision 326262) +++ head/sys/x86/isa/atpic.c (revision 326263) @@ -1,591 +1,593 @@ /*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * * Copyright (c) 2003 John Baldwin * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * PIC driver for the 8259A Master and Slave PICs in PC/AT machines. */ #include __FBSDID("$FreeBSD$"); #include "opt_auto_eoi.h" #include "opt_isa.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef __amd64__ #define SDT_ATPIC SDT_SYSIGT #define GSEL_ATPIC 0 #else #define SDT_ATPIC SDT_SYS386IGT #define GSEL_ATPIC GSEL(GCODE_SEL, SEL_KPL) #endif #define MASTER 0 #define SLAVE 1 #define NUM_ISA_IRQS 16 static void atpic_init(void *dummy); unsigned int imen; /* XXX */ inthand_t IDTVEC(atpic_intr0), IDTVEC(atpic_intr1), IDTVEC(atpic_intr2), IDTVEC(atpic_intr3), IDTVEC(atpic_intr4), IDTVEC(atpic_intr5), IDTVEC(atpic_intr6), IDTVEC(atpic_intr7), IDTVEC(atpic_intr8), IDTVEC(atpic_intr9), IDTVEC(atpic_intr10), IDTVEC(atpic_intr11), IDTVEC(atpic_intr12), IDTVEC(atpic_intr13), IDTVEC(atpic_intr14), IDTVEC(atpic_intr15); #define IRQ(ap, ai) ((ap)->at_irqbase + (ai)->at_irq) #define ATPIC(io, base, eoi, imenptr) \ { { atpic_enable_source, atpic_disable_source, (eoi), \ atpic_enable_intr, atpic_disable_intr, atpic_vector, \ atpic_source_pending, NULL, atpic_resume, atpic_config_intr,\ atpic_assign_cpu }, (io), (base), IDT_IO_INTS + (base), \ (imenptr) } #define INTSRC(irq) \ { { &atpics[(irq) / 8].at_pic }, IDTVEC(atpic_intr ## irq ), \ (irq) % 8 } struct atpic { struct pic at_pic; int at_ioaddr; int at_irqbase; uint8_t at_intbase; uint8_t *at_imen; }; struct atpic_intsrc { struct intsrc at_intsrc; inthand_t *at_intr; int at_irq; /* Relative to PIC base. */ enum intr_trigger at_trigger; u_long at_count; u_long at_straycount; }; static void atpic_enable_source(struct intsrc *isrc); static void atpic_disable_source(struct intsrc *isrc, int eoi); static void atpic_eoi_master(struct intsrc *isrc); static void atpic_eoi_slave(struct intsrc *isrc); static void atpic_enable_intr(struct intsrc *isrc); static void atpic_disable_intr(struct intsrc *isrc); static int atpic_vector(struct intsrc *isrc); static void atpic_resume(struct pic *pic, bool suspend_cancelled); static int atpic_source_pending(struct intsrc *isrc); static int atpic_config_intr(struct intsrc *isrc, enum intr_trigger trig, enum intr_polarity pol); static int atpic_assign_cpu(struct intsrc *isrc, u_int apic_id); static void i8259_init(struct atpic *pic, int slave); static struct atpic atpics[] = { ATPIC(IO_ICU1, 0, atpic_eoi_master, (uint8_t *)&imen), ATPIC(IO_ICU2, 8, atpic_eoi_slave, ((uint8_t *)&imen) + 1) }; static struct atpic_intsrc atintrs[] = { INTSRC(0), INTSRC(1), INTSRC(2), INTSRC(3), INTSRC(4), INTSRC(5), INTSRC(6), INTSRC(7), INTSRC(8), INTSRC(9), INTSRC(10), INTSRC(11), INTSRC(12), INTSRC(13), INTSRC(14), INTSRC(15), }; CTASSERT(nitems(atintrs) == NUM_ISA_IRQS); static __inline void _atpic_eoi_master(struct intsrc *isrc) { KASSERT(isrc->is_pic == &atpics[MASTER].at_pic, ("%s: mismatched pic", __func__)); #ifndef AUTO_EOI_1 outb(atpics[MASTER].at_ioaddr, OCW2_EOI); #endif } /* * The data sheet says no auto-EOI on slave, but it sometimes works. * So, if AUTO_EOI_2 is enabled, we use it. */ static __inline void _atpic_eoi_slave(struct intsrc *isrc) { KASSERT(isrc->is_pic == &atpics[SLAVE].at_pic, ("%s: mismatched pic", __func__)); #ifndef AUTO_EOI_2 outb(atpics[SLAVE].at_ioaddr, OCW2_EOI); #ifndef AUTO_EOI_1 outb(atpics[MASTER].at_ioaddr, OCW2_EOI); #endif #endif } static void atpic_enable_source(struct intsrc *isrc) { struct atpic_intsrc *ai = (struct atpic_intsrc *)isrc; struct atpic *ap = (struct atpic *)isrc->is_pic; spinlock_enter(); if (*ap->at_imen & IMEN_MASK(ai)) { *ap->at_imen &= ~IMEN_MASK(ai); outb(ap->at_ioaddr + ICU_IMR_OFFSET, *ap->at_imen); } spinlock_exit(); } static void atpic_disable_source(struct intsrc *isrc, int eoi) { struct atpic_intsrc *ai = (struct atpic_intsrc *)isrc; struct atpic *ap = (struct atpic *)isrc->is_pic; spinlock_enter(); if (ai->at_trigger != INTR_TRIGGER_EDGE) { *ap->at_imen |= IMEN_MASK(ai); outb(ap->at_ioaddr + ICU_IMR_OFFSET, *ap->at_imen); } /* * Take care to call these functions directly instead of through * a function pointer. All of the referenced variables should * still be hot in the cache. */ if (eoi == PIC_EOI) { if (isrc->is_pic == &atpics[MASTER].at_pic) _atpic_eoi_master(isrc); else _atpic_eoi_slave(isrc); } spinlock_exit(); } static void atpic_eoi_master(struct intsrc *isrc) { #ifndef AUTO_EOI_1 spinlock_enter(); _atpic_eoi_master(isrc); spinlock_exit(); #endif } static void atpic_eoi_slave(struct intsrc *isrc) { #ifndef AUTO_EOI_2 spinlock_enter(); _atpic_eoi_slave(isrc); spinlock_exit(); #endif } static void atpic_enable_intr(struct intsrc *isrc) { } static void atpic_disable_intr(struct intsrc *isrc) { } static int atpic_vector(struct intsrc *isrc) { struct atpic_intsrc *ai = (struct atpic_intsrc *)isrc; struct atpic *ap = (struct atpic *)isrc->is_pic; return (IRQ(ap, ai)); } static int atpic_source_pending(struct intsrc *isrc) { struct atpic_intsrc *ai = (struct atpic_intsrc *)isrc; struct atpic *ap = (struct atpic *)isrc->is_pic; return (inb(ap->at_ioaddr) & IMEN_MASK(ai)); } static void atpic_resume(struct pic *pic, bool suspend_cancelled) { struct atpic *ap = (struct atpic *)pic; i8259_init(ap, ap == &atpics[SLAVE]); if (ap == &atpics[SLAVE] && elcr_found) elcr_resume(); } static int atpic_config_intr(struct intsrc *isrc, enum intr_trigger trig, enum intr_polarity pol) { struct atpic_intsrc *ai = (struct atpic_intsrc *)isrc; u_int vector; /* Map conforming values to edge/hi and sanity check the values. */ if (trig == INTR_TRIGGER_CONFORM) trig = INTR_TRIGGER_EDGE; if (pol == INTR_POLARITY_CONFORM) pol = INTR_POLARITY_HIGH; vector = atpic_vector(isrc); if ((trig == INTR_TRIGGER_EDGE && pol == INTR_POLARITY_LOW) || (trig == INTR_TRIGGER_LEVEL && pol == INTR_POLARITY_HIGH)) { printf( "atpic: Mismatched config for IRQ%u: trigger %s, polarity %s\n", vector, trig == INTR_TRIGGER_EDGE ? "edge" : "level", pol == INTR_POLARITY_HIGH ? "high" : "low"); return (EINVAL); } /* If there is no change, just return. */ if (ai->at_trigger == trig) return (0); /* * Certain IRQs can never be level/lo, so don't try to set them * that way if asked. At least some ELCR registers ignore setting * these bits as well. */ if ((vector == 0 || vector == 1 || vector == 2 || vector == 13) && trig == INTR_TRIGGER_LEVEL) { if (bootverbose) printf( "atpic: Ignoring invalid level/low configuration for IRQ%u\n", vector); return (EINVAL); } if (!elcr_found) { if (bootverbose) printf("atpic: No ELCR to configure IRQ%u as %s\n", vector, trig == INTR_TRIGGER_EDGE ? "edge/high" : "level/low"); return (ENXIO); } if (bootverbose) printf("atpic: Programming IRQ%u as %s\n", vector, trig == INTR_TRIGGER_EDGE ? "edge/high" : "level/low"); spinlock_enter(); elcr_write_trigger(atpic_vector(isrc), trig); ai->at_trigger = trig; spinlock_exit(); return (0); } static int atpic_assign_cpu(struct intsrc *isrc, u_int apic_id) { /* * 8259A's are only used in UP in which case all interrupts always * go to the sole CPU and this function shouldn't even be called. */ panic("%s: bad cookie", __func__); } static void i8259_init(struct atpic *pic, int slave) { int imr_addr; /* Reset the PIC and program with next four bytes. */ spinlock_enter(); outb(pic->at_ioaddr, ICW1_RESET | ICW1_IC4); imr_addr = pic->at_ioaddr + ICU_IMR_OFFSET; /* Start vector. */ outb(imr_addr, pic->at_intbase); /* * Setup slave links. For the master pic, indicate what line * the slave is configured on. For the slave indicate * which line on the master we are connected to. */ if (slave) outb(imr_addr, ICU_SLAVEID); else outb(imr_addr, IRQ_MASK(ICU_SLAVEID)); /* Set mode. */ if (slave) outb(imr_addr, SLAVE_MODE); else outb(imr_addr, MASTER_MODE); /* Set interrupt enable mask. */ outb(imr_addr, *pic->at_imen); /* Reset is finished, default to IRR on read. */ outb(pic->at_ioaddr, OCW3_SEL | OCW3_RR); /* OCW2_L1 sets priority order to 3-7, 0-2 (com2 first). */ if (!slave) outb(pic->at_ioaddr, OCW2_R | OCW2_SL | OCW2_L1); spinlock_exit(); } void atpic_startup(void) { struct atpic_intsrc *ai; int i; /* Start off with all interrupts disabled. */ imen = 0xffff; i8259_init(&atpics[MASTER], 0); i8259_init(&atpics[SLAVE], 1); atpic_enable_source((struct intsrc *)&atintrs[ICU_SLAVEID]); /* Install low-level interrupt handlers for all of our IRQs. */ for (i = 0, ai = atintrs; i < NUM_ISA_IRQS; i++, ai++) { if (i == ICU_SLAVEID) continue; ai->at_intsrc.is_count = &ai->at_count; ai->at_intsrc.is_straycount = &ai->at_straycount; setidt(((struct atpic *)ai->at_intsrc.is_pic)->at_intbase + ai->at_irq, ai->at_intr, SDT_ATPIC, SEL_KPL, GSEL_ATPIC); } /* * Look for an ELCR. If we find one, update the trigger modes. * If we don't find one, assume that IRQs 0, 1, 2, and 13 are * edge triggered and that everything else is level triggered. * We only use the trigger information to reprogram the ELCR if * we have one and as an optimization to avoid masking edge * triggered interrupts. For the case that we don't have an ELCR, * it doesn't hurt to mask an edge triggered interrupt, so we * assume level trigger for any interrupt that we aren't sure is * edge triggered. */ if (elcr_found) { for (i = 0, ai = atintrs; i < NUM_ISA_IRQS; i++, ai++) ai->at_trigger = elcr_read_trigger(i); } else { for (i = 0, ai = atintrs; i < NUM_ISA_IRQS; i++, ai++) switch (i) { case 0: case 1: case 2: case 8: case 13: ai->at_trigger = INTR_TRIGGER_EDGE; break; default: ai->at_trigger = INTR_TRIGGER_LEVEL; break; } } } static void atpic_init(void *dummy __unused) { struct atpic_intsrc *ai; int i; /* * Register our PICs, even if we aren't going to use any of their * pins so that they are suspended and resumed. */ if (intr_register_pic(&atpics[0].at_pic) != 0 || intr_register_pic(&atpics[1].at_pic) != 0) panic("Unable to register ATPICs"); /* * If any of the ISA IRQs have an interrupt source already, then * assume that the APICs are being used and don't register any * of our interrupt sources. This makes sure we don't accidentally * use mixed mode. The "accidental" use could otherwise occur on * machines that route the ACPI SCI interrupt to a different ISA * IRQ (at least one machines routes it to IRQ 13) thus disabling * that APIC ISA routing and allowing the ATPIC source for that IRQ * to leak through. We used to depend on this feature for routing * IRQ0 via mixed mode, but now we don't use mixed mode at all. */ for (i = 0; i < NUM_ISA_IRQS; i++) if (intr_lookup_source(i) != NULL) return; /* Loop through all interrupt sources and add them. */ for (i = 0, ai = atintrs; i < NUM_ISA_IRQS; i++, ai++) { if (i == ICU_SLAVEID) continue; intr_register_source(&ai->at_intsrc); } } SYSINIT(atpic_init, SI_SUB_INTR, SI_ORDER_FOURTH, atpic_init, NULL); void atpic_handle_intr(u_int vector, struct trapframe *frame) { struct intsrc *isrc; KASSERT(vector < NUM_ISA_IRQS, ("unknown int %u\n", vector)); isrc = &atintrs[vector].at_intsrc; /* * If we don't have an event, see if this is a spurious * interrupt. */ if (isrc->is_event == NULL && (vector == 7 || vector == 15)) { int port, isr; /* * Read the ISR register to see if IRQ 7/15 is really * pending. Reset read register back to IRR when done. */ port = ((struct atpic *)isrc->is_pic)->at_ioaddr; spinlock_enter(); outb(port, OCW3_SEL | OCW3_RR | OCW3_RIS); isr = inb(port); outb(port, OCW3_SEL | OCW3_RR); spinlock_exit(); if ((isr & IRQ_MASK(7)) == 0) return; } intr_execute_handlers(isrc, frame); } #ifdef DEV_ISA /* * Bus attachment for the ISA PIC. */ static struct isa_pnp_id atpic_ids[] = { { 0x0000d041 /* PNP0000 */, "AT interrupt controller" }, { 0 } }; static int atpic_probe(device_t dev) { int result; result = ISA_PNP_PROBE(device_get_parent(dev), dev, atpic_ids); if (result <= 0) device_quiet(dev); return (result); } /* * We might be granted IRQ 2, as this is typically consumed by chaining * between the two PIC components. If we're using the APIC, however, * this may not be the case, and as such we should free the resource. * (XXX untested) * * The generic ISA attachment code will handle allocating any other resources * that we don't explicitly claim here. */ static int atpic_attach(device_t dev) { struct resource *res; int rid; /* Try to allocate our IRQ and then free it. */ rid = 0; res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, 0); if (res != NULL) bus_release_resource(dev, SYS_RES_IRQ, rid, res); return (0); } static device_method_t atpic_methods[] = { /* Device interface */ DEVMETHOD(device_probe, atpic_probe), DEVMETHOD(device_attach, atpic_attach), DEVMETHOD(device_detach, bus_generic_detach), DEVMETHOD(device_shutdown, bus_generic_shutdown), DEVMETHOD(device_suspend, bus_generic_suspend), DEVMETHOD(device_resume, bus_generic_resume), { 0, 0 } }; static driver_t atpic_driver = { "atpic", atpic_methods, 1, /* no softc */ }; static devclass_t atpic_devclass; DRIVER_MODULE(atpic, isa, atpic_driver, atpic_devclass, 0, 0); DRIVER_MODULE(atpic, acpi, atpic_driver, atpic_devclass, 0, 0); /* * Return a bitmap of the current interrupt requests. This is 8259-specific * and is only suitable for use at probe time. */ intrmask_t isa_irq_pending(void) { u_char irr1; u_char irr2; irr1 = inb(IO_ICU1); irr2 = inb(IO_ICU2); return ((irr2 << 8) | irr1); } #endif /* DEV_ISA */ Index: head/sys/x86/isa/atrtc.c =================================================================== --- head/sys/x86/isa/atrtc.c (revision 326262) +++ head/sys/x86/isa/atrtc.c (revision 326263) @@ -1,430 +1,432 @@ /*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * * Copyright (c) 2008 Poul-Henning Kamp * Copyright (c) 2010 Alexander Motin * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include __FBSDID("$FreeBSD$"); #include "opt_isa.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef DEV_ISA #include #include #endif #include #include "clock_if.h" /* * clock_lock protects low-level access to individual hardware registers. * atrtc_time_lock protects the entire sequence of accessing multiple registers * to read or write the date and time. */ #define RTC_LOCK do { if (!kdb_active) mtx_lock_spin(&clock_lock); } while (0) #define RTC_UNLOCK do { if (!kdb_active) mtx_unlock_spin(&clock_lock); } while (0) struct mtx atrtc_time_lock; MTX_SYSINIT(atrtc_lock_init, &atrtc_time_lock, "atrtc", MTX_DEF); int atrtcclock_disable = 0; static int rtc_reg = -1; static u_char rtc_statusa = RTCSA_DIVIDER | RTCSA_NOPROF; static u_char rtc_statusb = RTCSB_24HR; /* * RTC support routines */ int rtcin(int reg) { u_char val; RTC_LOCK; if (rtc_reg != reg) { inb(0x84); outb(IO_RTC, reg); rtc_reg = reg; inb(0x84); } val = inb(IO_RTC + 1); RTC_UNLOCK; return (val); } void writertc(int reg, u_char val) { RTC_LOCK; if (rtc_reg != reg) { inb(0x84); outb(IO_RTC, reg); rtc_reg = reg; inb(0x84); } outb(IO_RTC + 1, val); inb(0x84); RTC_UNLOCK; } static __inline int readrtc(int port) { int readval; readval = rtcin(port); if (readval >= 0 && (readval & 0xf) < 0xa && (readval & 0xf0) < 0xa0) return (bcd2bin(readval)); return (0); } static void atrtc_start(void) { writertc(RTC_STATUSA, rtc_statusa); writertc(RTC_STATUSB, RTCSB_24HR); } static void atrtc_rate(unsigned rate) { rtc_statusa = RTCSA_DIVIDER | rate; writertc(RTC_STATUSA, rtc_statusa); } static void atrtc_enable_intr(void) { rtc_statusb |= RTCSB_PINTR; writertc(RTC_STATUSB, rtc_statusb); rtcin(RTC_INTR); } static void atrtc_disable_intr(void) { rtc_statusb &= ~RTCSB_PINTR; writertc(RTC_STATUSB, rtc_statusb); rtcin(RTC_INTR); } void atrtc_restore(void) { /* Restore all of the RTC's "status" (actually, control) registers. */ rtcin(RTC_STATUSA); /* dummy to get rtc_reg set */ writertc(RTC_STATUSB, RTCSB_24HR); writertc(RTC_STATUSA, rtc_statusa); writertc(RTC_STATUSB, rtc_statusb); rtcin(RTC_INTR); } static void atrtc_set(struct timespec *ts) { struct clocktime ct; clock_ts_to_ct(ts, &ct); mtx_lock(&atrtc_time_lock); /* Disable RTC updates and interrupts. */ writertc(RTC_STATUSB, RTCSB_HALT | RTCSB_24HR); writertc(RTC_SEC, bin2bcd(ct.sec)); /* Write back Seconds */ writertc(RTC_MIN, bin2bcd(ct.min)); /* Write back Minutes */ writertc(RTC_HRS, bin2bcd(ct.hour)); /* Write back Hours */ writertc(RTC_WDAY, ct.dow + 1); /* Write back Weekday */ writertc(RTC_DAY, bin2bcd(ct.day)); /* Write back Day */ writertc(RTC_MONTH, bin2bcd(ct.mon)); /* Write back Month */ writertc(RTC_YEAR, bin2bcd(ct.year % 100)); /* Write back Year */ #ifdef USE_RTC_CENTURY writertc(RTC_CENTURY, bin2bcd(ct.year / 100)); /* ... and Century */ #endif /* Re-enable RTC updates and interrupts. */ writertc(RTC_STATUSB, rtc_statusb); rtcin(RTC_INTR); mtx_unlock(&atrtc_time_lock); } /********************************************************************** * RTC driver for subr_rtc */ struct atrtc_softc { int port_rid, intr_rid; struct resource *port_res; struct resource *intr_res; void *intr_handler; struct eventtimer et; }; static int rtc_start(struct eventtimer *et, sbintime_t first, sbintime_t period) { atrtc_rate(max(fls(period + (period >> 1)) - 17, 1)); atrtc_enable_intr(); return (0); } static int rtc_stop(struct eventtimer *et) { atrtc_disable_intr(); return (0); } /* * This routine receives statistical clock interrupts from the RTC. * As explained above, these occur at 128 interrupts per second. * When profiling, we receive interrupts at a rate of 1024 Hz. * * This does not actually add as much overhead as it sounds, because * when the statistical clock is active, the hardclock driver no longer * needs to keep (inaccurate) statistics on its own. This decouples * statistics gathering from scheduling interrupts. * * The RTC chip requires that we read status register C (RTC_INTR) * to acknowledge an interrupt, before it will generate the next one. * Under high interrupt load, rtcintr() can be indefinitely delayed and * the clock can tick immediately after the read from RTC_INTR. In this * case, the mc146818A interrupt signal will not drop for long enough * to register with the 8259 PIC. If an interrupt is missed, the stat * clock will halt, considerably degrading system performance. This is * why we use 'while' rather than a more straightforward 'if' below. * Stat clock ticks can still be lost, causing minor loss of accuracy * in the statistics, but the stat clock will no longer stop. */ static int rtc_intr(void *arg) { struct atrtc_softc *sc = (struct atrtc_softc *)arg; int flag = 0; while (rtcin(RTC_INTR) & RTCIR_PERIOD) { flag = 1; if (sc->et.et_active) sc->et.et_event_cb(&sc->et, sc->et.et_arg); } return(flag ? FILTER_HANDLED : FILTER_STRAY); } /* * Attach to the ISA PnP descriptors for the timer and realtime clock. */ static struct isa_pnp_id atrtc_ids[] = { { 0x000bd041 /* PNP0B00 */, "AT realtime clock" }, { 0 } }; static int atrtc_probe(device_t dev) { int result; result = ISA_PNP_PROBE(device_get_parent(dev), dev, atrtc_ids); /* ENOENT means no PnP-ID, device is hinted. */ if (result == ENOENT) { device_set_desc(dev, "AT realtime clock"); return (BUS_PROBE_LOW_PRIORITY); } return (result); } static int atrtc_attach(device_t dev) { struct atrtc_softc *sc; rman_res_t s; int i; sc = device_get_softc(dev); sc->port_res = bus_alloc_resource(dev, SYS_RES_IOPORT, &sc->port_rid, IO_RTC, IO_RTC + 1, 2, RF_ACTIVE); if (sc->port_res == NULL) device_printf(dev, "Warning: Couldn't map I/O.\n"); atrtc_start(); clock_register(dev, 1000000); bzero(&sc->et, sizeof(struct eventtimer)); if (!atrtcclock_disable && (resource_int_value(device_get_name(dev), device_get_unit(dev), "clock", &i) != 0 || i != 0)) { sc->intr_rid = 0; while (bus_get_resource(dev, SYS_RES_IRQ, sc->intr_rid, &s, NULL) == 0 && s != 8) sc->intr_rid++; sc->intr_res = bus_alloc_resource(dev, SYS_RES_IRQ, &sc->intr_rid, 8, 8, 1, RF_ACTIVE); if (sc->intr_res == NULL) { device_printf(dev, "Can't map interrupt.\n"); return (0); } else if ((bus_setup_intr(dev, sc->intr_res, INTR_TYPE_CLK, rtc_intr, NULL, sc, &sc->intr_handler))) { device_printf(dev, "Can't setup interrupt.\n"); return (0); } else { /* Bind IRQ to BSP to avoid live migration. */ bus_bind_intr(dev, sc->intr_res, 0); } sc->et.et_name = "RTC"; sc->et.et_flags = ET_FLAGS_PERIODIC | ET_FLAGS_POW2DIV; sc->et.et_quality = 0; sc->et.et_frequency = 32768; sc->et.et_min_period = 0x00080000; sc->et.et_max_period = 0x80000000; sc->et.et_start = rtc_start; sc->et.et_stop = rtc_stop; sc->et.et_priv = dev; et_register(&sc->et); } return(0); } static int atrtc_resume(device_t dev) { atrtc_restore(); return(0); } static int atrtc_settime(device_t dev __unused, struct timespec *ts) { atrtc_set(ts); return (0); } static int atrtc_gettime(device_t dev, struct timespec *ts) { struct clocktime ct; /* Look if we have a RTC present and the time is valid */ if (!(rtcin(RTC_STATUSD) & RTCSD_PWR)) { device_printf(dev, "WARNING: Battery failure indication\n"); return (EINVAL); } /* * wait for time update to complete * If RTCSA_TUP is zero, we have at least 244us before next update. * This is fast enough on most hardware, but a refinement would be * to make sure that no more than 240us pass after we start reading, * and try again if so. */ mtx_lock(&atrtc_time_lock); while (rtcin(RTC_STATUSA) & RTCSA_TUP) continue; critical_enter(); ct.nsec = 0; ct.sec = readrtc(RTC_SEC); ct.min = readrtc(RTC_MIN); ct.hour = readrtc(RTC_HRS); ct.day = readrtc(RTC_DAY); ct.dow = readrtc(RTC_WDAY) - 1; ct.mon = readrtc(RTC_MONTH); ct.year = readrtc(RTC_YEAR); #ifdef USE_RTC_CENTURY ct.year += readrtc(RTC_CENTURY) * 100; #else ct.year += (ct.year < 80 ? 2000 : 1900); #endif critical_exit(); mtx_unlock(&atrtc_time_lock); /* Set dow = -1 because some clocks don't set it correctly. */ ct.dow = -1; return (clock_ct_to_ts(&ct, ts)); } static device_method_t atrtc_methods[] = { /* Device interface */ DEVMETHOD(device_probe, atrtc_probe), DEVMETHOD(device_attach, atrtc_attach), DEVMETHOD(device_detach, bus_generic_detach), DEVMETHOD(device_shutdown, bus_generic_shutdown), DEVMETHOD(device_suspend, bus_generic_suspend), /* XXX stop statclock? */ DEVMETHOD(device_resume, atrtc_resume), /* clock interface */ DEVMETHOD(clock_gettime, atrtc_gettime), DEVMETHOD(clock_settime, atrtc_settime), { 0, 0 } }; static driver_t atrtc_driver = { "atrtc", atrtc_methods, sizeof(struct atrtc_softc), }; static devclass_t atrtc_devclass; DRIVER_MODULE(atrtc, isa, atrtc_driver, atrtc_devclass, 0, 0); DRIVER_MODULE(atrtc, acpi, atrtc_driver, atrtc_devclass, 0, 0); #include "opt_ddb.h" #ifdef DDB #include DB_SHOW_COMMAND(rtc, rtc) { printf("%02x/%02x/%02x %02x:%02x:%02x, A = %02x, B = %02x, C = %02x\n", rtcin(RTC_YEAR), rtcin(RTC_MONTH), rtcin(RTC_DAY), rtcin(RTC_HRS), rtcin(RTC_MIN), rtcin(RTC_SEC), rtcin(RTC_STATUSA), rtcin(RTC_STATUSB), rtcin(RTC_INTR)); } #endif /* DDB */ Index: head/sys/x86/isa/elcr.c =================================================================== --- head/sys/x86/isa/elcr.c (revision 326262) +++ head/sys/x86/isa/elcr.c (revision 326263) @@ -1,136 +1,138 @@ /*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * * Copyright (c) 2004 John Baldwin * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); /* * The ELCR is a register that controls the trigger mode and polarity of * EISA and ISA interrupts. In FreeBSD 3.x and 4.x, the ELCR was only * consulted for determining the appropriate trigger mode of EISA * interrupts when using an APIC. However, it seems that almost all * systems that include PCI also include an ELCR that manages the ISA * IRQs 0 through 15. Thus, we check for the presence of an ELCR on * every machine by checking to see if the values found at bootup are * sane. Note that the polarity of ISA and EISA IRQs are linked to the * trigger mode. All edge triggered IRQs use active-hi polarity, and * all level triggered interrupts use active-lo polarity. * * The format of the ELCR is simple: it is a 16-bit bitmap where bit 0 * controls IRQ 0, bit 1 controls IRQ 1, etc. If the bit is zero, the * associated IRQ is edge triggered. If the bit is one, the IRQ is * level triggered. */ #include #include #include #include #define ELCR_PORT 0x4d0 #define ELCR_MASK(irq) (1 << (irq)) static int elcr_status; int elcr_found; /* * Check to see if we have what looks like a valid ELCR. We do this by * verifying that IRQs 0, 1, 2, and 13 are all edge triggered. */ int elcr_probe(void) { int i; elcr_status = inb(ELCR_PORT) | inb(ELCR_PORT + 1) << 8; if ((elcr_status & (ELCR_MASK(0) | ELCR_MASK(1) | ELCR_MASK(2) | ELCR_MASK(8) | ELCR_MASK(13))) != 0) return (ENXIO); if (bootverbose) { printf("ELCR Found. ISA IRQs programmed as:\n"); for (i = 0; i < 16; i++) printf(" %2d", i); printf("\n"); for (i = 0; i < 16; i++) if (elcr_status & ELCR_MASK(i)) printf(" L"); else printf(" E"); printf("\n"); } if (resource_disabled("elcr", 0)) return (ENXIO); elcr_found = 1; return (0); } /* * Returns 1 for level trigger, 0 for edge. */ enum intr_trigger elcr_read_trigger(u_int irq) { KASSERT(elcr_found, ("%s: no ELCR was found!", __func__)); KASSERT(irq <= 15, ("%s: invalid IRQ %u", __func__, irq)); if (elcr_status & ELCR_MASK(irq)) return (INTR_TRIGGER_LEVEL); else return (INTR_TRIGGER_EDGE); } /* * Set the trigger mode for a specified IRQ. Mode of 0 means edge triggered, * and a mode of 1 means level triggered. */ void elcr_write_trigger(u_int irq, enum intr_trigger trigger) { int new_status; KASSERT(elcr_found, ("%s: no ELCR was found!", __func__)); KASSERT(irq <= 15, ("%s: invalid IRQ %u", __func__, irq)); if (trigger == INTR_TRIGGER_LEVEL) new_status = elcr_status | ELCR_MASK(irq); else new_status = elcr_status & ~ELCR_MASK(irq); if (new_status == elcr_status) return; elcr_status = new_status; if (irq >= 8) outb(ELCR_PORT + 1, elcr_status >> 8); else outb(ELCR_PORT, elcr_status & 0xff); } void elcr_resume(void) { KASSERT(elcr_found, ("%s: no ELCR was found!", __func__)); outb(ELCR_PORT, elcr_status & 0xff); outb(ELCR_PORT + 1, elcr_status >> 8); } Index: head/sys/x86/isa/isa.c =================================================================== --- head/sys/x86/isa/isa.c (revision 326262) +++ head/sys/x86/isa/isa.c (revision 326263) @@ -1,150 +1,152 @@ /*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD AND MIT + * * Copyright (c) 1998 Doug Rabson * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); /*- * Modifications for Intel architecture by Garrett A. Wollman. * Copyright 1998 Massachusetts Institute of Technology * * Permission to use, copy, modify, and distribute this software and * its documentation for any purpose and without fee is hereby * granted, provided that both the above copyright notice and this * permission notice appear in all copies, that both the above * copyright notice and this permission notice appear in all * supporting documentation, and that the name of M.I.T. not be used * in advertising or publicity pertaining to distribution of the * software without specific, written prior permission. M.I.T. makes * no representations about the suitability of this software for any * purpose. It is provided "as is" without express or implied * warranty. * * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''. M.I.T. DISCLAIMS * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE, * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include void isa_init(device_t dev) { } /* * This implementation simply passes the request up to the parent * bus, which in our case is the special i386 nexus, substituting any * configured values if the caller defaulted. We can get away with * this because there is no special mapping for ISA resources on an Intel * platform. When porting this code to another architecture, it may be * necessary to interpose a mapping layer here. */ struct resource * isa_alloc_resource(device_t bus, device_t child, int type, int *rid, rman_res_t start, rman_res_t end, rman_res_t count, u_int flags) { /* * Consider adding a resource definition. */ int passthrough = (device_get_parent(child) != bus); int isdefault = RMAN_IS_DEFAULT_RANGE(start, end); struct isa_device* idev = DEVTOISA(child); struct resource_list *rl = &idev->id_resources; struct resource_list_entry *rle; if (!passthrough && !isdefault) { rle = resource_list_find(rl, type, *rid); if (!rle) { if (*rid < 0) return 0; switch (type) { case SYS_RES_IRQ: if (*rid >= ISA_NIRQ) return 0; break; case SYS_RES_DRQ: if (*rid >= ISA_NDRQ) return 0; break; case SYS_RES_MEMORY: if (*rid >= ISA_NMEM) return 0; break; case SYS_RES_IOPORT: if (*rid >= ISA_NPORT) return 0; break; default: return 0; } resource_list_add(rl, type, *rid, start, end, count); } } return resource_list_alloc(rl, bus, child, type, rid, start, end, count, flags); } int isa_release_resource(device_t bus, device_t child, int type, int rid, struct resource *r) { struct isa_device* idev = DEVTOISA(child); struct resource_list *rl = &idev->id_resources; return resource_list_release(rl, bus, child, type, rid, r); } /* * On this platform, isa can also attach to the legacy bus. */ DRIVER_MODULE(isa, legacy, isa_driver, isa_devclass, 0, 0); /* * Attach the ISA bus to the xenpv bus in order to get syscons. */ DRIVER_MODULE(isa, xenpv, isa_driver, isa_devclass, 0, 0); Index: head/sys/x86/isa/orm.c =================================================================== --- head/sys/x86/isa/orm.c (revision 326262) +++ head/sys/x86/isa/orm.c (revision 326263) @@ -1,188 +1,190 @@ /*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * * Copyright (c) 2000 Nikolai Saoukh * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); /* * Driver to take care of holes in ISA I/O memory occupied * by option rom(s) */ #include #include #include #include #include #include #include #include #include #include #include #define IOMEM_START 0x0a0000 #define IOMEM_STEP 0x000800 #define IOMEM_END 0x100000 #define ORM_ID 0x00004d3e static struct isa_pnp_id orm_ids[] = { { ORM_ID, NULL }, /* ORM0000 */ { 0, NULL }, }; #define MAX_ROMS 32 struct orm_softc { int rnum; int rid[MAX_ROMS]; struct resource *res[MAX_ROMS]; }; static int orm_probe(device_t dev) { return (ISA_PNP_PROBE(device_get_parent(dev), dev, orm_ids)); } static int orm_attach(device_t dev) { return (0); } static void orm_identify(driver_t* driver, device_t parent) { bus_space_handle_t bh; bus_space_tag_t bt; device_t child; u_int32_t chunk = IOMEM_START; struct resource *res; int rid; u_int32_t rom_size; struct orm_softc *sc; u_int8_t buf[3]; if (resource_disabled("orm", 0)) return; child = BUS_ADD_CHILD(parent, ISA_ORDER_SENSITIVE, "orm", -1); device_set_driver(child, driver); isa_set_logicalid(child, ORM_ID); isa_set_vendorid(child, ORM_ID); sc = device_get_softc(child); sc->rnum = 0; while (sc->rnum < MAX_ROMS && chunk < IOMEM_END) { bus_set_resource(child, SYS_RES_MEMORY, sc->rnum, chunk, IOMEM_STEP); rid = sc->rnum; res = bus_alloc_resource_any(child, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (res == NULL) { bus_delete_resource(child, SYS_RES_MEMORY, sc->rnum); chunk += IOMEM_STEP; continue; } bt = rman_get_bustag(res); bh = rman_get_bushandle(res); bus_space_read_region_1(bt, bh, 0, buf, sizeof(buf)); /* * We need to release and delete the resource since we're * changing its size, or the rom isn't there. There * is a checksum field in the ROM to prevent false * positives. However, some common hardware (IBM thinkpads) * neglects to put a valid checksum in the ROM, so we do * not double check the checksum here. On the ISA bus * areas that have no hardware read back as 0xff, so the * tests to see if we have 0x55 followed by 0xaa are * generally sufficient. */ bus_release_resource(child, SYS_RES_MEMORY, rid, res); bus_delete_resource(child, SYS_RES_MEMORY, sc->rnum); if (buf[0] != 0x55 || buf[1] != 0xAA || (buf[2] & 0x03) != 0) { chunk += IOMEM_STEP; continue; } rom_size = buf[2] << 9; bus_set_resource(child, SYS_RES_MEMORY, sc->rnum, chunk, rom_size); rid = sc->rnum; res = bus_alloc_resource_any(child, SYS_RES_MEMORY, &rid, 0); if (res == NULL) { bus_delete_resource(child, SYS_RES_MEMORY, sc->rnum); chunk += IOMEM_STEP; continue; } sc->rid[sc->rnum] = rid; sc->res[sc->rnum] = res; sc->rnum++; chunk += rom_size; } if (sc->rnum == 0) device_delete_child(parent, child); else if (sc->rnum == 1) device_set_desc(child, "ISA Option ROM"); else device_set_desc(child, "ISA Option ROMs"); } static int orm_detach(device_t dev) { int i; struct orm_softc *sc = device_get_softc(dev); for (i = 0; i < sc->rnum; i++) bus_release_resource(dev, SYS_RES_MEMORY, sc->rid[i], sc->res[i]); return (0); } static device_method_t orm_methods[] = { /* Device interface */ DEVMETHOD(device_identify, orm_identify), DEVMETHOD(device_probe, orm_probe), DEVMETHOD(device_attach, orm_attach), DEVMETHOD(device_detach, orm_detach), { 0, 0 } }; static driver_t orm_driver = { "orm", orm_methods, sizeof (struct orm_softc) }; static devclass_t orm_devclass; DRIVER_MODULE(orm, isa, orm_driver, orm_devclass, 0, 0); Index: head/sys/x86/pci/pci_bus.c =================================================================== --- head/sys/x86/pci/pci_bus.c (revision 326262) +++ head/sys/x86/pci/pci_bus.c (revision 326263) @@ -1,765 +1,767 @@ /*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * * Copyright (c) 1997, Stefan Esser * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_cpu.h" #include #include #include #include #include #include #include #include #include #include #include #include #ifdef CPU_ELAN #include #endif #include #include #include #include "pcib_if.h" int legacy_pcib_maxslots(device_t dev) { return 31; } /* read configuration space register */ uint32_t legacy_pcib_read_config(device_t dev, u_int bus, u_int slot, u_int func, u_int reg, int bytes) { return(pci_cfgregread(bus, slot, func, reg, bytes)); } /* write configuration space register */ void legacy_pcib_write_config(device_t dev, u_int bus, u_int slot, u_int func, u_int reg, uint32_t data, int bytes) { pci_cfgregwrite(bus, slot, func, reg, data, bytes); } /* route interrupt */ static int legacy_pcib_route_interrupt(device_t pcib, device_t dev, int pin) { #ifdef __HAVE_PIR return (pci_pir_route_interrupt(pci_get_bus(dev), pci_get_slot(dev), pci_get_function(dev), pin)); #else /* No routing possible */ return (PCI_INVALID_IRQ); #endif } /* Pass MSI requests up to the nexus. */ int legacy_pcib_alloc_msi(device_t pcib, device_t dev, int count, int maxcount, int *irqs) { device_t bus; bus = device_get_parent(pcib); return (PCIB_ALLOC_MSI(device_get_parent(bus), dev, count, maxcount, irqs)); } int legacy_pcib_alloc_msix(device_t pcib, device_t dev, int *irq) { device_t bus; bus = device_get_parent(pcib); return (PCIB_ALLOC_MSIX(device_get_parent(bus), dev, irq)); } int legacy_pcib_map_msi(device_t pcib, device_t dev, int irq, uint64_t *addr, uint32_t *data) { device_t bus, hostb; int error, func, slot; bus = device_get_parent(pcib); error = PCIB_MAP_MSI(device_get_parent(bus), dev, irq, addr, data); if (error) return (error); slot = legacy_get_pcislot(pcib); func = legacy_get_pcifunc(pcib); if (slot == -1 || func == -1) return (0); hostb = pci_find_bsf(0, slot, func); KASSERT(hostb != NULL, ("%s: missing hostb for 0:%d:%d", __func__, slot, func)); pci_ht_map_msi(hostb, *addr); return (0); } static const char * legacy_pcib_is_host_bridge(int bus, int slot, int func, uint32_t id, uint8_t class, uint8_t subclass, uint8_t *busnum) { #ifdef __i386__ const char *s = NULL; static uint8_t pxb[4]; /* hack for 450nx */ *busnum = 0; switch (id) { case 0x12258086: s = "Intel 824?? host to PCI bridge"; /* XXX This is a guess */ /* *busnum = legacy_pcib_read_config(0, bus, slot, func, 0x41, 1); */ *busnum = bus; break; case 0x71208086: s = "Intel 82810 (i810 GMCH) Host To Hub bridge"; break; case 0x71228086: s = "Intel 82810-DC100 (i810-DC100 GMCH) Host To Hub bridge"; break; case 0x71248086: s = "Intel 82810E (i810E GMCH) Host To Hub bridge"; break; case 0x11308086: s = "Intel 82815 (i815 GMCH) Host To Hub bridge"; break; case 0x71808086: s = "Intel 82443LX (440 LX) host to PCI bridge"; break; case 0x71908086: s = "Intel 82443BX (440 BX) host to PCI bridge"; break; case 0x71928086: s = "Intel 82443BX host to PCI bridge (AGP disabled)"; break; case 0x71948086: s = "Intel 82443MX host to PCI bridge"; break; case 0x71a08086: s = "Intel 82443GX host to PCI bridge"; break; case 0x71a18086: s = "Intel 82443GX host to AGP bridge"; break; case 0x71a28086: s = "Intel 82443GX host to PCI bridge (AGP disabled)"; break; case 0x84c48086: s = "Intel 82454KX/GX (Orion) host to PCI bridge"; *busnum = legacy_pcib_read_config(0, bus, slot, func, 0x4a, 1); break; case 0x84ca8086: /* * For the 450nx chipset, there is a whole bundle of * things pretending to be host bridges. The MIOC will * be seen first and isn't really a pci bridge (the * actual buses are attached to the PXB's). We need to * read the registers of the MIOC to figure out the * bus numbers for the PXB channels. * * Since the MIOC doesn't have a pci bus attached, we * pretend it wasn't there. */ pxb[0] = legacy_pcib_read_config(0, bus, slot, func, 0xd0, 1); /* BUSNO[0] */ pxb[1] = legacy_pcib_read_config(0, bus, slot, func, 0xd1, 1) + 1; /* SUBA[0]+1 */ pxb[2] = legacy_pcib_read_config(0, bus, slot, func, 0xd3, 1); /* BUSNO[1] */ pxb[3] = legacy_pcib_read_config(0, bus, slot, func, 0xd4, 1) + 1; /* SUBA[1]+1 */ return NULL; case 0x84cb8086: switch (slot) { case 0x12: s = "Intel 82454NX PXB#0, Bus#A"; *busnum = pxb[0]; break; case 0x13: s = "Intel 82454NX PXB#0, Bus#B"; *busnum = pxb[1]; break; case 0x14: s = "Intel 82454NX PXB#1, Bus#A"; *busnum = pxb[2]; break; case 0x15: s = "Intel 82454NX PXB#1, Bus#B"; *busnum = pxb[3]; break; } break; case 0x1A308086: s = "Intel 82845 Host to PCI bridge"; break; /* AMD -- vendor 0x1022 */ case 0x30001022: s = "AMD Elan SC520 host to PCI bridge"; #ifdef CPU_ELAN init_AMD_Elan_sc520(); #else printf( "*** WARNING: missing CPU_ELAN -- timekeeping may be wrong\n"); #endif break; case 0x70061022: s = "AMD-751 host to PCI bridge"; break; case 0x700e1022: s = "AMD-761 host to PCI bridge"; break; /* SiS -- vendor 0x1039 */ case 0x04961039: s = "SiS 85c496"; break; case 0x04061039: s = "SiS 85c501"; break; case 0x06011039: s = "SiS 85c601"; break; case 0x55911039: s = "SiS 5591 host to PCI bridge"; break; case 0x00011039: s = "SiS 5591 host to AGP bridge"; break; /* VLSI -- vendor 0x1004 */ case 0x00051004: s = "VLSI 82C592 Host to PCI bridge"; break; /* XXX Here is MVP3, I got the datasheet but NO M/B to test it */ /* totally. Please let me know if anything wrong. -F */ /* XXX need info on the MVP3 -- any takers? */ case 0x05981106: s = "VIA 82C598MVP (Apollo MVP3) host bridge"; break; /* AcerLabs -- vendor 0x10b9 */ /* Funny : The datasheet told me vendor id is "10b8",sub-vendor */ /* id is '10b9" but the register always shows "10b9". -Foxfair */ case 0x154110b9: s = "AcerLabs M1541 (Aladdin-V) PCI host bridge"; break; /* OPTi -- vendor 0x1045 */ case 0xc7011045: s = "OPTi 82C700 host to PCI bridge"; break; case 0xc8221045: s = "OPTi 82C822 host to PCI Bridge"; break; /* ServerWorks -- vendor 0x1166 */ case 0x00051166: s = "ServerWorks NB6536 2.0HE host to PCI bridge"; *busnum = legacy_pcib_read_config(0, bus, slot, func, 0x44, 1); break; case 0x00061166: /* FALLTHROUGH */ case 0x00081166: /* FALLTHROUGH */ case 0x02011166: /* FALLTHROUGH */ case 0x010f1014: /* IBM re-badged ServerWorks chipset */ s = "ServerWorks host to PCI bridge"; *busnum = legacy_pcib_read_config(0, bus, slot, func, 0x44, 1); break; case 0x00091166: s = "ServerWorks NB6635 3.0LE host to PCI bridge"; *busnum = legacy_pcib_read_config(0, bus, slot, func, 0x44, 1); break; case 0x00101166: s = "ServerWorks CIOB30 host to PCI bridge"; *busnum = legacy_pcib_read_config(0, bus, slot, func, 0x44, 1); break; case 0x00111166: /* FALLTHROUGH */ case 0x03021014: /* IBM re-badged ServerWorks chipset */ s = "ServerWorks CMIC-HE host to PCI-X bridge"; *busnum = legacy_pcib_read_config(0, bus, slot, func, 0x44, 1); break; /* XXX unknown chipset, but working */ case 0x00171166: /* FALLTHROUGH */ case 0x01011166: case 0x01101166: case 0x02251166: s = "ServerWorks host to PCI bridge(unknown chipset)"; *busnum = legacy_pcib_read_config(0, bus, slot, func, 0x44, 1); break; /* Compaq/HP -- vendor 0x0e11 */ case 0x60100e11: s = "Compaq/HP Model 6010 HotPlug PCI Bridge"; *busnum = legacy_pcib_read_config(0, bus, slot, func, 0xc8, 1); break; /* Integrated Micro Solutions -- vendor 0x10e0 */ case 0x884910e0: s = "Integrated Micro Solutions VL Bridge"; break; default: if (class == PCIC_BRIDGE && subclass == PCIS_BRIDGE_HOST) s = "Host to PCI bridge"; break; } return s; #else const char *s = NULL; *busnum = 0; if (class == PCIC_BRIDGE && subclass == PCIS_BRIDGE_HOST) s = "Host to PCI bridge"; return s; #endif } /* * Scan the first pci bus for host-pci bridges and add pcib instances * to the nexus for each bridge. */ static void legacy_pcib_identify(driver_t *driver, device_t parent) { int bus, slot, func; uint8_t hdrtype; int found = 0; int pcifunchigh; int found824xx = 0; int found_orion = 0; device_t child; devclass_t pci_devclass; if (pci_cfgregopen() == 0) return; /* * Check to see if we haven't already had a PCI bus added * via some other means. If we have, bail since otherwise * we're going to end up duplicating it. */ if ((pci_devclass = devclass_find("pci")) && devclass_get_device(pci_devclass, 0)) return; bus = 0; retry: for (slot = 0; slot <= PCI_SLOTMAX; slot++) { func = 0; hdrtype = legacy_pcib_read_config(0, bus, slot, func, PCIR_HDRTYPE, 1); /* * When enumerating bus devices, the standard says that * one should check the header type and ignore the slots whose * header types that the software doesn't know about. We use * this to filter out devices. */ if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE) continue; if ((hdrtype & PCIM_MFDEV) && (!found_orion || hdrtype != 0xff)) pcifunchigh = PCI_FUNCMAX; else pcifunchigh = 0; for (func = 0; func <= pcifunchigh; func++) { /* * Read the IDs and class from the device. */ uint32_t id; uint8_t class, subclass, busnum; const char *s; device_t *devs; int ndevs, i; id = legacy_pcib_read_config(0, bus, slot, func, PCIR_DEVVENDOR, 4); if (id == -1) continue; class = legacy_pcib_read_config(0, bus, slot, func, PCIR_CLASS, 1); subclass = legacy_pcib_read_config(0, bus, slot, func, PCIR_SUBCLASS, 1); s = legacy_pcib_is_host_bridge(bus, slot, func, id, class, subclass, &busnum); if (s == NULL) continue; /* * Check to see if the physical bus has already * been seen. Eg: hybrid 32 and 64 bit host * bridges to the same logical bus. */ if (device_get_children(parent, &devs, &ndevs) == 0) { for (i = 0; s != NULL && i < ndevs; i++) { if (strcmp(device_get_name(devs[i]), "pcib") != 0) continue; if (legacy_get_pcibus(devs[i]) == busnum) s = NULL; } free(devs, M_TEMP); } if (s == NULL) continue; /* * Add at priority 100 to make sure we * go after any motherboard resources */ child = BUS_ADD_CHILD(parent, 100, "pcib", busnum); device_set_desc(child, s); legacy_set_pcibus(child, busnum); legacy_set_pcislot(child, slot); legacy_set_pcifunc(child, func); found = 1; if (id == 0x12258086) found824xx = 1; if (id == 0x84c48086) found_orion = 1; } } if (found824xx && bus == 0) { bus++; goto retry; } /* * Make sure we add at least one bridge since some old * hardware doesn't actually have a host-pci bridge device. * Note that pci_cfgregopen() thinks we have PCI devices.. */ if (!found) { if (bootverbose) printf( "legacy_pcib_identify: no bridge found, adding pcib0 anyway\n"); child = BUS_ADD_CHILD(parent, 100, "pcib", 0); legacy_set_pcibus(child, 0); } } static int legacy_pcib_probe(device_t dev) { if (pci_cfgregopen() == 0) return ENXIO; return -100; } static int legacy_pcib_attach(device_t dev) { #ifdef __HAVE_PIR device_t pir; #endif int bus; bus = pcib_get_bus(dev); #ifdef __HAVE_PIR /* * Look for a PCI BIOS interrupt routing table as that will be * our method of routing interrupts if we have one. */ if (pci_pir_probe(bus, 0)) { pir = BUS_ADD_CHILD(device_get_parent(dev), 0, "pir", 0); if (pir != NULL) device_probe_and_attach(pir); } #endif device_add_child(dev, "pci", -1); return bus_generic_attach(dev); } int legacy_pcib_read_ivar(device_t dev, device_t child, int which, uintptr_t *result) { switch (which) { case PCIB_IVAR_DOMAIN: *result = 0; return 0; case PCIB_IVAR_BUS: *result = legacy_get_pcibus(dev); return 0; } return ENOENT; } int legacy_pcib_write_ivar(device_t dev, device_t child, int which, uintptr_t value) { switch (which) { case PCIB_IVAR_DOMAIN: return EINVAL; case PCIB_IVAR_BUS: legacy_set_pcibus(dev, value); return 0; } return ENOENT; } /* * Helper routine for x86 Host-PCI bridge driver resource allocation. * This is used to adjust the start address of wildcard allocation * requests to avoid low addresses that are known to be problematic. * * If no memory preference is given, use upper 32MB slot most BIOSes * use for their memory window. This is typically only used on older * laptops that don't have PCI buses behind a PCI bridge, so assuming * > 32MB is likely OK. * * However, this can cause problems for other chipsets, so we make * this tunable by hw.pci.host_mem_start. */ SYSCTL_DECL(_hw_pci); static unsigned long host_mem_start = 0x80000000; SYSCTL_ULONG(_hw_pci, OID_AUTO, host_mem_start, CTLFLAG_RDTUN, &host_mem_start, 0, "Limit the host bridge memory to being above this address."); rman_res_t hostb_alloc_start(int type, rman_res_t start, rman_res_t end, rman_res_t count) { if (start + count - 1 != end) { if (type == SYS_RES_MEMORY && start < host_mem_start) start = host_mem_start; if (type == SYS_RES_IOPORT && start < 0x1000) start = 0x1000; } return (start); } struct resource * legacy_pcib_alloc_resource(device_t dev, device_t child, int type, int *rid, rman_res_t start, rman_res_t end, rman_res_t count, u_int flags) { #if defined(NEW_PCIB) && defined(PCI_RES_BUS) if (type == PCI_RES_BUS) return (pci_domain_alloc_bus(0, child, rid, start, end, count, flags)); #endif start = hostb_alloc_start(type, start, end, count); return (bus_generic_alloc_resource(dev, child, type, rid, start, end, count, flags)); } #if defined(NEW_PCIB) && defined(PCI_RES_BUS) int legacy_pcib_adjust_resource(device_t dev, device_t child, int type, struct resource *r, rman_res_t start, rman_res_t end) { if (type == PCI_RES_BUS) return (pci_domain_adjust_bus(0, child, r, start, end)); return (bus_generic_adjust_resource(dev, child, type, r, start, end)); } int legacy_pcib_release_resource(device_t dev, device_t child, int type, int rid, struct resource *r) { if (type == PCI_RES_BUS) return (pci_domain_release_bus(0, child, rid, r)); return (bus_generic_release_resource(dev, child, type, rid, r)); } #endif static device_method_t legacy_pcib_methods[] = { /* Device interface */ DEVMETHOD(device_identify, legacy_pcib_identify), DEVMETHOD(device_probe, legacy_pcib_probe), DEVMETHOD(device_attach, legacy_pcib_attach), DEVMETHOD(device_shutdown, bus_generic_shutdown), DEVMETHOD(device_suspend, bus_generic_suspend), DEVMETHOD(device_resume, bus_generic_resume), /* Bus interface */ DEVMETHOD(bus_read_ivar, legacy_pcib_read_ivar), DEVMETHOD(bus_write_ivar, legacy_pcib_write_ivar), DEVMETHOD(bus_alloc_resource, legacy_pcib_alloc_resource), #if defined(NEW_PCIB) && defined(PCI_RES_BUS) DEVMETHOD(bus_adjust_resource, legacy_pcib_adjust_resource), DEVMETHOD(bus_release_resource, legacy_pcib_release_resource), #else DEVMETHOD(bus_adjust_resource, bus_generic_adjust_resource), DEVMETHOD(bus_release_resource, bus_generic_release_resource), #endif DEVMETHOD(bus_activate_resource, bus_generic_activate_resource), DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource), DEVMETHOD(bus_setup_intr, bus_generic_setup_intr), DEVMETHOD(bus_teardown_intr, bus_generic_teardown_intr), /* pcib interface */ DEVMETHOD(pcib_maxslots, legacy_pcib_maxslots), DEVMETHOD(pcib_read_config, legacy_pcib_read_config), DEVMETHOD(pcib_write_config, legacy_pcib_write_config), DEVMETHOD(pcib_route_interrupt, legacy_pcib_route_interrupt), DEVMETHOD(pcib_alloc_msi, legacy_pcib_alloc_msi), DEVMETHOD(pcib_release_msi, pcib_release_msi), DEVMETHOD(pcib_alloc_msix, legacy_pcib_alloc_msix), DEVMETHOD(pcib_release_msix, pcib_release_msix), DEVMETHOD(pcib_map_msi, legacy_pcib_map_msi), DEVMETHOD(pcib_request_feature, pcib_request_feature_allow), DEVMETHOD_END }; static devclass_t hostb_devclass; DEFINE_CLASS_0(pcib, legacy_pcib_driver, legacy_pcib_methods, 1); DRIVER_MODULE(pcib, legacy, legacy_pcib_driver, hostb_devclass, 0, 0); /* * Install placeholder to claim the resources owned by the * PCI bus interface. This could be used to extract the * config space registers in the extreme case where the PnP * ID is available and the PCI BIOS isn't, but for now we just * eat the PnP ID and do nothing else. * * XXX we should silence this probe, as it will generally confuse * people. */ static struct isa_pnp_id pcibus_pnp_ids[] = { { 0x030ad041 /* PNP0A03 */, "PCI Bus" }, { 0x080ad041 /* PNP0A08 */, "PCIe Bus" }, { 0 } }; static int pcibus_pnp_probe(device_t dev) { int result; if ((result = ISA_PNP_PROBE(device_get_parent(dev), dev, pcibus_pnp_ids)) <= 0) device_quiet(dev); return(result); } static int pcibus_pnp_attach(device_t dev) { return(0); } static device_method_t pcibus_pnp_methods[] = { /* Device interface */ DEVMETHOD(device_probe, pcibus_pnp_probe), DEVMETHOD(device_attach, pcibus_pnp_attach), DEVMETHOD(device_detach, bus_generic_detach), DEVMETHOD(device_shutdown, bus_generic_shutdown), DEVMETHOD(device_suspend, bus_generic_suspend), DEVMETHOD(device_resume, bus_generic_resume), { 0, 0 } }; static devclass_t pcibus_pnp_devclass; DEFINE_CLASS_0(pcibus_pnp, pcibus_pnp_driver, pcibus_pnp_methods, 1); DRIVER_MODULE(pcibus_pnp, isa, pcibus_pnp_driver, pcibus_pnp_devclass, 0, 0); #ifdef __HAVE_PIR /* * Provide a PCI-PCI bridge driver for PCI buses behind PCI-PCI bridges * that appear in the PCIBIOS Interrupt Routing Table to use the routing * table for interrupt routing when possible. */ static int pcibios_pcib_probe(device_t bus); static device_method_t pcibios_pcib_pci_methods[] = { /* Device interface */ DEVMETHOD(device_probe, pcibios_pcib_probe), /* pcib interface */ DEVMETHOD(pcib_route_interrupt, legacy_pcib_route_interrupt), {0, 0} }; static devclass_t pcib_devclass; DEFINE_CLASS_1(pcib, pcibios_pcib_driver, pcibios_pcib_pci_methods, sizeof(struct pcib_softc), pcib_driver); DRIVER_MODULE(pcibios_pcib, pci, pcibios_pcib_driver, pcib_devclass, 0, 0); static int pcibios_pcib_probe(device_t dev) { int bus; if ((pci_get_class(dev) != PCIC_BRIDGE) || (pci_get_subclass(dev) != PCIS_BRIDGE_PCI)) return (ENXIO); bus = pci_read_config(dev, PCIR_SECBUS_1, 1); if (bus == 0) return (ENXIO); if (!pci_pir_probe(bus, 1)) return (ENXIO); device_set_desc(dev, "PCIBIOS PCI-PCI bridge"); return (-2000); } #endif Index: head/sys/x86/pci/qpi.c =================================================================== --- head/sys/x86/pci/qpi.c (revision 326262) +++ head/sys/x86/pci/qpi.c (revision 326263) @@ -1,316 +1,318 @@ /*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * * Copyright (c) 2010 Hudson River Trading LLC * Written by: John H. Baldwin * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * This driver provides a pseudo-bus to enumerate the PCI buses * present on a system using a QPI chipset. It creates a qpi0 bus that * is a child of nexus0 and then creates Host-PCI bridges as a * child of that. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "pcib_if.h" struct qpi_device { int qd_pcibus; }; static MALLOC_DEFINE(M_QPI, "qpidrv", "qpi system device"); static void qpi_identify(driver_t *driver, device_t parent) { int do_qpi; /* Check CPUID to ensure this is an i7 CPU of some sort. */ if (cpu_vendor_id != CPU_VENDOR_INTEL || CPUID_TO_FAMILY(cpu_id) != 0x6) return; /* Only discover buses with configuration devices if allowed by user */ do_qpi = 0; TUNABLE_INT_FETCH("hw.attach_intel_csr_pci", &do_qpi); if (!do_qpi) return; /* PCI config register access is required. */ if (pci_cfgregopen() == 0) return; /* Add a qpi bus device. */ if (BUS_ADD_CHILD(parent, 20, "qpi", -1) == NULL) panic("Failed to add qpi bus"); } static int qpi_probe(device_t dev) { device_set_desc(dev, "QPI system bus"); return (BUS_PROBE_SPECIFIC); } /* * Look for a PCI bus with the specified bus address. If one is found, * add a pcib device and return 0. Otherwise, return an error code. */ static int qpi_probe_pcib(device_t dev, int bus) { struct qpi_device *qdev; device_t child; uint32_t devid; int s; /* * If a PCI bus already exists for this bus number, then * fail. */ if (pci_find_bsf(bus, 0, 0) != NULL) return (EEXIST); /* * Attempt to read the device id for every slot, function 0 on * the bus. If all read values are 0xffffffff this means that * the bus is not present. */ for (s = 0; s <= PCI_SLOTMAX; s++) { devid = pci_cfgregread(bus, s, 0, PCIR_DEVVENDOR, 4); if (devid != 0xffffffff) break; } if (devid == 0xffffffff) return (ENOENT); if ((devid & 0xffff) != 0x8086) { if (bootverbose) device_printf(dev, "Device at pci%d.%d.0 has non-Intel vendor 0x%x\n", bus, s, devid & 0xffff); return (ENXIO); } child = BUS_ADD_CHILD(dev, 0, "pcib", -1); if (child == NULL) panic("%s: failed to add pci bus %d", device_get_nameunit(dev), bus); qdev = malloc(sizeof(struct qpi_device), M_QPI, M_WAITOK); qdev->qd_pcibus = bus; device_set_ivars(child, qdev); return (0); } static int qpi_attach(device_t dev) { int bus; /* * Each processor socket has a dedicated PCI bus, sometimes * not enumerated by ACPI. Probe all unattached buses from 0 * to 255. */ for (bus = PCI_BUSMAX; bus >= 0; bus--) qpi_probe_pcib(dev, bus); return (bus_generic_attach(dev)); } static int qpi_print_child(device_t bus, device_t child) { struct qpi_device *qdev; int retval = 0; qdev = device_get_ivars(child); retval += bus_print_child_header(bus, child); if (qdev->qd_pcibus != -1) retval += printf(" pcibus %d", qdev->qd_pcibus); retval += bus_print_child_footer(bus, child); return (retval); } static int qpi_read_ivar(device_t dev, device_t child, int which, uintptr_t *result) { struct qpi_device *qdev; qdev = device_get_ivars(child); switch (which) { case PCIB_IVAR_BUS: *result = qdev->qd_pcibus; break; default: return (ENOENT); } return (0); } static device_method_t qpi_methods[] = { /* Device interface */ DEVMETHOD(device_identify, qpi_identify), DEVMETHOD(device_probe, qpi_probe), DEVMETHOD(device_attach, qpi_attach), DEVMETHOD(device_shutdown, bus_generic_shutdown), DEVMETHOD(device_suspend, bus_generic_suspend), DEVMETHOD(device_resume, bus_generic_resume), /* Bus interface */ DEVMETHOD(bus_print_child, qpi_print_child), DEVMETHOD(bus_add_child, bus_generic_add_child), DEVMETHOD(bus_read_ivar, qpi_read_ivar), DEVMETHOD(bus_alloc_resource, bus_generic_alloc_resource), DEVMETHOD(bus_release_resource, bus_generic_release_resource), DEVMETHOD(bus_activate_resource, bus_generic_activate_resource), DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource), DEVMETHOD(bus_setup_intr, bus_generic_setup_intr), DEVMETHOD(bus_teardown_intr, bus_generic_teardown_intr), { 0, 0 } }; static devclass_t qpi_devclass; DEFINE_CLASS_0(qpi, qpi_driver, qpi_methods, 0); DRIVER_MODULE(qpi, nexus, qpi_driver, qpi_devclass, 0, 0); static int qpi_pcib_probe(device_t dev) { device_set_desc(dev, "QPI Host-PCI bridge"); return (BUS_PROBE_SPECIFIC); } static int qpi_pcib_attach(device_t dev) { device_add_child(dev, "pci", -1); return (bus_generic_attach(dev)); } static int qpi_pcib_read_ivar(device_t dev, device_t child, int which, uintptr_t *result) { switch (which) { case PCIB_IVAR_DOMAIN: *result = 0; return (0); case PCIB_IVAR_BUS: *result = pcib_get_bus(dev); return (0); default: return (ENOENT); } } #if defined(NEW_PCIB) && defined(PCI_RES_BUS) static struct resource * qpi_pcib_alloc_resource(device_t dev, device_t child, int type, int *rid, rman_res_t start, rman_res_t end, rman_res_t count, u_int flags) { if (type == PCI_RES_BUS) return (pci_domain_alloc_bus(0, child, rid, start, end, count, flags)); return (bus_generic_alloc_resource(dev, child, type, rid, start, end, count, flags)); } #endif static int qpi_pcib_map_msi(device_t pcib, device_t dev, int irq, uint64_t *addr, uint32_t *data) { device_t bus; bus = device_get_parent(pcib); return (PCIB_MAP_MSI(device_get_parent(bus), dev, irq, addr, data)); } static device_method_t qpi_pcib_methods[] = { /* Device interface */ DEVMETHOD(device_probe, qpi_pcib_probe), DEVMETHOD(device_attach, qpi_pcib_attach), DEVMETHOD(device_shutdown, bus_generic_shutdown), DEVMETHOD(device_suspend, bus_generic_suspend), DEVMETHOD(device_resume, bus_generic_resume), /* Bus interface */ DEVMETHOD(bus_read_ivar, qpi_pcib_read_ivar), #if defined(NEW_PCIB) && defined(PCI_RES_BUS) DEVMETHOD(bus_alloc_resource, qpi_pcib_alloc_resource), DEVMETHOD(bus_adjust_resource, legacy_pcib_adjust_resource), DEVMETHOD(bus_release_resource, legacy_pcib_release_resource), #else DEVMETHOD(bus_alloc_resource, bus_generic_alloc_resource), DEVMETHOD(bus_release_resource, bus_generic_release_resource), #endif DEVMETHOD(bus_activate_resource, bus_generic_activate_resource), DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource), DEVMETHOD(bus_setup_intr, bus_generic_setup_intr), DEVMETHOD(bus_teardown_intr, bus_generic_teardown_intr), /* pcib interface */ DEVMETHOD(pcib_maxslots, pcib_maxslots), DEVMETHOD(pcib_read_config, legacy_pcib_read_config), DEVMETHOD(pcib_write_config, legacy_pcib_write_config), DEVMETHOD(pcib_alloc_msi, legacy_pcib_alloc_msi), DEVMETHOD(pcib_release_msi, pcib_release_msi), DEVMETHOD(pcib_alloc_msix, legacy_pcib_alloc_msix), DEVMETHOD(pcib_release_msix, pcib_release_msix), DEVMETHOD(pcib_map_msi, qpi_pcib_map_msi), DEVMETHOD_END }; static devclass_t qpi_pcib_devclass; DEFINE_CLASS_0(pcib, qpi_pcib_driver, qpi_pcib_methods, 0); DRIVER_MODULE(pcib, qpi, qpi_pcib_driver, qpi_pcib_devclass, 0, 0); Index: head/sys/x86/x86/busdma_bounce.c =================================================================== --- head/sys/x86/x86/busdma_bounce.c (revision 326262) +++ head/sys/x86/x86/busdma_bounce.c (revision 326263) @@ -1,1283 +1,1285 @@ /*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * * Copyright (c) 1997, 1998 Justin T. Gibbs. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions, and the following disclaimer, * without modification, immediately at the beginning of the file. * 2. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef __i386__ #define MAX_BPAGES 512 #else #define MAX_BPAGES 8192 #endif enum { BUS_DMA_COULD_BOUNCE = 0x01, BUS_DMA_MIN_ALLOC_COMP = 0x02, BUS_DMA_KMEM_ALLOC = 0x04, }; struct bounce_zone; struct bus_dma_tag { struct bus_dma_tag_common common; int map_count; int bounce_flags; bus_dma_segment_t *segments; struct bounce_zone *bounce_zone; }; struct bounce_page { vm_offset_t vaddr; /* kva of bounce buffer */ bus_addr_t busaddr; /* Physical address */ vm_offset_t datavaddr; /* kva of client data */ vm_offset_t dataoffs; /* page offset of client data */ vm_page_t datapage[2]; /* physical page(s) of client data */ bus_size_t datacount; /* client data count */ STAILQ_ENTRY(bounce_page) links; }; int busdma_swi_pending; struct bounce_zone { STAILQ_ENTRY(bounce_zone) links; STAILQ_HEAD(bp_list, bounce_page) bounce_page_list; int total_bpages; int free_bpages; int reserved_bpages; int active_bpages; int total_bounced; int total_deferred; int map_count; bus_size_t alignment; bus_addr_t lowaddr; char zoneid[8]; char lowaddrid[20]; struct sysctl_ctx_list sysctl_tree; struct sysctl_oid *sysctl_tree_top; }; static struct mtx bounce_lock; static int total_bpages; static int busdma_zonecount; static STAILQ_HEAD(, bounce_zone) bounce_zone_list; static SYSCTL_NODE(_hw, OID_AUTO, busdma, CTLFLAG_RD, 0, "Busdma parameters"); SYSCTL_INT(_hw_busdma, OID_AUTO, total_bpages, CTLFLAG_RD, &total_bpages, 0, "Total bounce pages"); struct bus_dmamap { struct bp_list bpages; int pagesneeded; int pagesreserved; bus_dma_tag_t dmat; struct memdesc mem; bus_dmamap_callback_t *callback; void *callback_arg; STAILQ_ENTRY(bus_dmamap) links; }; static STAILQ_HEAD(, bus_dmamap) bounce_map_waitinglist; static STAILQ_HEAD(, bus_dmamap) bounce_map_callbacklist; static struct bus_dmamap nobounce_dmamap; static void init_bounce_pages(void *dummy); static int alloc_bounce_zone(bus_dma_tag_t dmat); static int alloc_bounce_pages(bus_dma_tag_t dmat, u_int numpages); static int reserve_bounce_pages(bus_dma_tag_t dmat, bus_dmamap_t map, int commit); static bus_addr_t add_bounce_page(bus_dma_tag_t dmat, bus_dmamap_t map, vm_offset_t vaddr, bus_addr_t addr1, bus_addr_t addr2, bus_size_t size); static void free_bounce_page(bus_dma_tag_t dmat, struct bounce_page *bpage); int run_filter(bus_dma_tag_t dmat, bus_addr_t paddr); static void _bus_dmamap_count_pages(bus_dma_tag_t dmat, bus_dmamap_t map, pmap_t pmap, void *buf, bus_size_t buflen, int flags); static void _bus_dmamap_count_phys(bus_dma_tag_t dmat, bus_dmamap_t map, vm_paddr_t buf, bus_size_t buflen, int flags); static int _bus_dmamap_reserve_pages(bus_dma_tag_t dmat, bus_dmamap_t map, int flags); /* * Allocate a device specific dma_tag. */ static int bounce_bus_dma_tag_create(bus_dma_tag_t parent, bus_size_t alignment, bus_addr_t boundary, bus_addr_t lowaddr, bus_addr_t highaddr, bus_dma_filter_t *filter, void *filterarg, bus_size_t maxsize, int nsegments, bus_size_t maxsegsz, int flags, bus_dma_lock_t *lockfunc, void *lockfuncarg, bus_dma_tag_t *dmat) { bus_dma_tag_t newtag; int error; *dmat = NULL; error = common_bus_dma_tag_create(parent != NULL ? &parent->common : NULL, alignment, boundary, lowaddr, highaddr, filter, filterarg, maxsize, nsegments, maxsegsz, flags, lockfunc, lockfuncarg, sizeof (struct bus_dma_tag), (void **)&newtag); if (error != 0) return (error); newtag->common.impl = &bus_dma_bounce_impl; newtag->map_count = 0; newtag->segments = NULL; if (parent != NULL && ((newtag->common.filter != NULL) || ((parent->bounce_flags & BUS_DMA_COULD_BOUNCE) != 0))) newtag->bounce_flags |= BUS_DMA_COULD_BOUNCE; if (newtag->common.lowaddr < ptoa((vm_paddr_t)Maxmem) || newtag->common.alignment > 1) newtag->bounce_flags |= BUS_DMA_COULD_BOUNCE; if (((newtag->bounce_flags & BUS_DMA_COULD_BOUNCE) != 0) && (flags & BUS_DMA_ALLOCNOW) != 0) { struct bounce_zone *bz; /* Must bounce */ if ((error = alloc_bounce_zone(newtag)) != 0) { free(newtag, M_DEVBUF); return (error); } bz = newtag->bounce_zone; if (ptoa(bz->total_bpages) < maxsize) { int pages; pages = atop(maxsize) - bz->total_bpages; /* Add pages to our bounce pool */ if (alloc_bounce_pages(newtag, pages) < pages) error = ENOMEM; } /* Performed initial allocation */ newtag->bounce_flags |= BUS_DMA_MIN_ALLOC_COMP; } else error = 0; if (error != 0) free(newtag, M_DEVBUF); else *dmat = newtag; CTR4(KTR_BUSDMA, "%s returned tag %p tag flags 0x%x error %d", __func__, newtag, (newtag != NULL ? newtag->common.flags : 0), error); return (error); } static int bounce_bus_dma_tag_destroy(bus_dma_tag_t dmat) { bus_dma_tag_t dmat_copy, parent; int error; error = 0; dmat_copy = dmat; if (dmat != NULL) { if (dmat->map_count != 0) { error = EBUSY; goto out; } while (dmat != NULL) { parent = (bus_dma_tag_t)dmat->common.parent; atomic_subtract_int(&dmat->common.ref_count, 1); if (dmat->common.ref_count == 0) { if (dmat->segments != NULL) free(dmat->segments, M_DEVBUF); free(dmat, M_DEVBUF); /* * Last reference count, so * release our reference * count on our parent. */ dmat = parent; } else dmat = NULL; } } out: CTR3(KTR_BUSDMA, "%s tag %p error %d", __func__, dmat_copy, error); return (error); } /* * Allocate a handle for mapping from kva/uva/physical * address space into bus device space. */ static int bounce_bus_dmamap_create(bus_dma_tag_t dmat, int flags, bus_dmamap_t *mapp) { struct bounce_zone *bz; int error, maxpages, pages; WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, "%s", __func__); error = 0; if (dmat->segments == NULL) { dmat->segments = (bus_dma_segment_t *)malloc( sizeof(bus_dma_segment_t) * dmat->common.nsegments, M_DEVBUF, M_NOWAIT); if (dmat->segments == NULL) { CTR3(KTR_BUSDMA, "%s: tag %p error %d", __func__, dmat, ENOMEM); return (ENOMEM); } } /* * Bouncing might be required if the driver asks for an active * exclusion region, a data alignment that is stricter than 1, and/or * an active address boundary. */ if (dmat->bounce_flags & BUS_DMA_COULD_BOUNCE) { /* Must bounce */ if (dmat->bounce_zone == NULL) { if ((error = alloc_bounce_zone(dmat)) != 0) return (error); } bz = dmat->bounce_zone; *mapp = (bus_dmamap_t)malloc(sizeof(**mapp), M_DEVBUF, M_NOWAIT | M_ZERO); if (*mapp == NULL) { CTR3(KTR_BUSDMA, "%s: tag %p error %d", __func__, dmat, ENOMEM); return (ENOMEM); } /* Initialize the new map */ STAILQ_INIT(&((*mapp)->bpages)); /* * Attempt to add pages to our pool on a per-instance * basis up to a sane limit. */ if (dmat->common.alignment > 1) maxpages = MAX_BPAGES; else maxpages = MIN(MAX_BPAGES, Maxmem - atop(dmat->common.lowaddr)); if ((dmat->bounce_flags & BUS_DMA_MIN_ALLOC_COMP) == 0 || (bz->map_count > 0 && bz->total_bpages < maxpages)) { pages = MAX(atop(dmat->common.maxsize), 1); pages = MIN(maxpages - bz->total_bpages, pages); pages = MAX(pages, 1); if (alloc_bounce_pages(dmat, pages) < pages) error = ENOMEM; if ((dmat->bounce_flags & BUS_DMA_MIN_ALLOC_COMP) == 0) { if (error == 0) { dmat->bounce_flags |= BUS_DMA_MIN_ALLOC_COMP; } } else error = 0; } bz->map_count++; } else { *mapp = NULL; } if (error == 0) dmat->map_count++; CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d", __func__, dmat, dmat->common.flags, error); return (error); } /* * Destroy a handle for mapping from kva/uva/physical * address space into bus device space. */ static int bounce_bus_dmamap_destroy(bus_dma_tag_t dmat, bus_dmamap_t map) { if (map != NULL && map != &nobounce_dmamap) { if (STAILQ_FIRST(&map->bpages) != NULL) { CTR3(KTR_BUSDMA, "%s: tag %p error %d", __func__, dmat, EBUSY); return (EBUSY); } if (dmat->bounce_zone) dmat->bounce_zone->map_count--; free(map, M_DEVBUF); } dmat->map_count--; CTR2(KTR_BUSDMA, "%s: tag %p error 0", __func__, dmat); return (0); } /* * Allocate a piece of memory that can be efficiently mapped into * bus device space based on the constraints lited in the dma tag. * A dmamap to for use with dmamap_load is also allocated. */ static int bounce_bus_dmamem_alloc(bus_dma_tag_t dmat, void** vaddr, int flags, bus_dmamap_t *mapp) { vm_memattr_t attr; int mflags; WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, "%s", __func__); if (flags & BUS_DMA_NOWAIT) mflags = M_NOWAIT; else mflags = M_WAITOK; /* If we succeed, no mapping/bouncing will be required */ *mapp = NULL; if (dmat->segments == NULL) { dmat->segments = (bus_dma_segment_t *)malloc( sizeof(bus_dma_segment_t) * dmat->common.nsegments, M_DEVBUF, mflags); if (dmat->segments == NULL) { CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d", __func__, dmat, dmat->common.flags, ENOMEM); return (ENOMEM); } } if (flags & BUS_DMA_ZERO) mflags |= M_ZERO; if (flags & BUS_DMA_NOCACHE) attr = VM_MEMATTR_UNCACHEABLE; else attr = VM_MEMATTR_DEFAULT; /* * Allocate the buffer from the malloc(9) allocator if... * - It's small enough to fit into a single power of two sized bucket. * - The alignment is less than or equal to the maximum size * - The low address requirement is fulfilled. * else allocate non-contiguous pages if... * - The page count that could get allocated doesn't exceed * nsegments also when the maximum segment size is less * than PAGE_SIZE. * - The alignment constraint isn't larger than a page boundary. * - There are no boundary-crossing constraints. * else allocate a block of contiguous pages because one or more of the * constraints is something that only the contig allocator can fulfill. * * NOTE: The (dmat->common.alignment <= dmat->maxsize) check * below is just a quick hack. The exact alignment guarantees * of malloc(9) need to be nailed down, and the code below * should be rewritten to take that into account. * * In the meantime warn the user if malloc gets it wrong. */ if ((dmat->common.maxsize <= PAGE_SIZE) && (dmat->common.alignment <= dmat->common.maxsize) && dmat->common.lowaddr >= ptoa((vm_paddr_t)Maxmem) && attr == VM_MEMATTR_DEFAULT) { *vaddr = malloc(dmat->common.maxsize, M_DEVBUF, mflags); } else if (dmat->common.nsegments >= howmany(dmat->common.maxsize, MIN(dmat->common.maxsegsz, PAGE_SIZE)) && dmat->common.alignment <= PAGE_SIZE && (dmat->common.boundary % PAGE_SIZE) == 0) { /* Page-based multi-segment allocations allowed */ *vaddr = (void *)kmem_alloc_attr(kernel_arena, dmat->common.maxsize, mflags, 0ul, dmat->common.lowaddr, attr); dmat->bounce_flags |= BUS_DMA_KMEM_ALLOC; } else { *vaddr = (void *)kmem_alloc_contig(kernel_arena, dmat->common.maxsize, mflags, 0ul, dmat->common.lowaddr, dmat->common.alignment != 0 ? dmat->common.alignment : 1ul, dmat->common.boundary, attr); dmat->bounce_flags |= BUS_DMA_KMEM_ALLOC; } if (*vaddr == NULL) { CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d", __func__, dmat, dmat->common.flags, ENOMEM); return (ENOMEM); } else if (vtophys(*vaddr) & (dmat->common.alignment - 1)) { printf("bus_dmamem_alloc failed to align memory properly.\n"); } CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d", __func__, dmat, dmat->common.flags, 0); return (0); } /* * Free a piece of memory and it's allociated dmamap, that was allocated * via bus_dmamem_alloc. Make the same choice for free/contigfree. */ static void bounce_bus_dmamem_free(bus_dma_tag_t dmat, void *vaddr, bus_dmamap_t map) { /* * dmamem does not need to be bounced, so the map should be * NULL and the BUS_DMA_KMEM_ALLOC flag cleared if malloc() * was used and set if kmem_alloc_contig() was used. */ if (map != NULL) panic("bus_dmamem_free: Invalid map freed\n"); if ((dmat->bounce_flags & BUS_DMA_KMEM_ALLOC) == 0) free(vaddr, M_DEVBUF); else kmem_free(kernel_arena, (vm_offset_t)vaddr, dmat->common.maxsize); CTR3(KTR_BUSDMA, "%s: tag %p flags 0x%x", __func__, dmat, dmat->bounce_flags); } static void _bus_dmamap_count_phys(bus_dma_tag_t dmat, bus_dmamap_t map, vm_paddr_t buf, bus_size_t buflen, int flags) { bus_addr_t curaddr; bus_size_t sgsize; if ((map != &nobounce_dmamap && map->pagesneeded == 0)) { /* * Count the number of bounce pages * needed in order to complete this transfer */ curaddr = buf; while (buflen != 0) { sgsize = MIN(buflen, dmat->common.maxsegsz); if (bus_dma_run_filter(&dmat->common, curaddr)) { sgsize = MIN(sgsize, PAGE_SIZE - (curaddr & PAGE_MASK)); map->pagesneeded++; } curaddr += sgsize; buflen -= sgsize; } CTR1(KTR_BUSDMA, "pagesneeded= %d\n", map->pagesneeded); } } static void _bus_dmamap_count_pages(bus_dma_tag_t dmat, bus_dmamap_t map, pmap_t pmap, void *buf, bus_size_t buflen, int flags) { vm_offset_t vaddr; vm_offset_t vendaddr; bus_addr_t paddr; bus_size_t sg_len; if ((map != &nobounce_dmamap && map->pagesneeded == 0)) { CTR4(KTR_BUSDMA, "lowaddr= %d Maxmem= %d, boundary= %d, " "alignment= %d", dmat->common.lowaddr, ptoa((vm_paddr_t)Maxmem), dmat->common.boundary, dmat->common.alignment); CTR3(KTR_BUSDMA, "map= %p, nobouncemap= %p, pagesneeded= %d", map, &nobounce_dmamap, map->pagesneeded); /* * Count the number of bounce pages * needed in order to complete this transfer */ vaddr = (vm_offset_t)buf; vendaddr = (vm_offset_t)buf + buflen; while (vaddr < vendaddr) { sg_len = PAGE_SIZE - ((vm_offset_t)vaddr & PAGE_MASK); if (pmap == kernel_pmap) paddr = pmap_kextract(vaddr); else paddr = pmap_extract(pmap, vaddr); if (bus_dma_run_filter(&dmat->common, paddr) != 0) { sg_len = roundup2(sg_len, dmat->common.alignment); map->pagesneeded++; } vaddr += sg_len; } CTR1(KTR_BUSDMA, "pagesneeded= %d\n", map->pagesneeded); } } static void _bus_dmamap_count_ma(bus_dma_tag_t dmat, bus_dmamap_t map, struct vm_page **ma, int ma_offs, bus_size_t buflen, int flags) { bus_size_t sg_len, max_sgsize; int page_index; vm_paddr_t paddr; if ((map != &nobounce_dmamap && map->pagesneeded == 0)) { CTR4(KTR_BUSDMA, "lowaddr= %d Maxmem= %d, boundary= %d, " "alignment= %d", dmat->common.lowaddr, ptoa((vm_paddr_t)Maxmem), dmat->common.boundary, dmat->common.alignment); CTR3(KTR_BUSDMA, "map= %p, nobouncemap= %p, pagesneeded= %d", map, &nobounce_dmamap, map->pagesneeded); /* * Count the number of bounce pages * needed in order to complete this transfer */ page_index = 0; while (buflen > 0) { paddr = VM_PAGE_TO_PHYS(ma[page_index]) + ma_offs; sg_len = PAGE_SIZE - ma_offs; max_sgsize = MIN(buflen, dmat->common.maxsegsz); sg_len = MIN(sg_len, max_sgsize); if (bus_dma_run_filter(&dmat->common, paddr) != 0) { sg_len = roundup2(sg_len, dmat->common.alignment); sg_len = MIN(sg_len, max_sgsize); KASSERT((sg_len & (dmat->common.alignment - 1)) == 0, ("Segment size is not aligned")); map->pagesneeded++; } if (((ma_offs + sg_len) & ~PAGE_MASK) != 0) page_index++; ma_offs = (ma_offs + sg_len) & PAGE_MASK; KASSERT(buflen >= sg_len, ("Segment length overruns original buffer")); buflen -= sg_len; } CTR1(KTR_BUSDMA, "pagesneeded= %d\n", map->pagesneeded); } } static int _bus_dmamap_reserve_pages(bus_dma_tag_t dmat, bus_dmamap_t map, int flags) { /* Reserve Necessary Bounce Pages */ mtx_lock(&bounce_lock); if (flags & BUS_DMA_NOWAIT) { if (reserve_bounce_pages(dmat, map, 0) != 0) { mtx_unlock(&bounce_lock); return (ENOMEM); } } else { if (reserve_bounce_pages(dmat, map, 1) != 0) { /* Queue us for resources */ STAILQ_INSERT_TAIL(&bounce_map_waitinglist, map, links); mtx_unlock(&bounce_lock); return (EINPROGRESS); } } mtx_unlock(&bounce_lock); return (0); } /* * Add a single contiguous physical range to the segment list. */ static int _bus_dmamap_addseg(bus_dma_tag_t dmat, bus_dmamap_t map, bus_addr_t curaddr, bus_size_t sgsize, bus_dma_segment_t *segs, int *segp) { bus_addr_t baddr, bmask; int seg; /* * Make sure we don't cross any boundaries. */ bmask = ~(dmat->common.boundary - 1); if (dmat->common.boundary > 0) { baddr = (curaddr + dmat->common.boundary) & bmask; if (sgsize > (baddr - curaddr)) sgsize = (baddr - curaddr); } /* * Insert chunk into a segment, coalescing with * previous segment if possible. */ seg = *segp; if (seg == -1) { seg = 0; segs[seg].ds_addr = curaddr; segs[seg].ds_len = sgsize; } else { if (curaddr == segs[seg].ds_addr + segs[seg].ds_len && (segs[seg].ds_len + sgsize) <= dmat->common.maxsegsz && (dmat->common.boundary == 0 || (segs[seg].ds_addr & bmask) == (curaddr & bmask))) segs[seg].ds_len += sgsize; else { if (++seg >= dmat->common.nsegments) return (0); segs[seg].ds_addr = curaddr; segs[seg].ds_len = sgsize; } } *segp = seg; return (sgsize); } /* * Utility function to load a physical buffer. segp contains * the starting segment on entrace, and the ending segment on exit. */ static int bounce_bus_dmamap_load_phys(bus_dma_tag_t dmat, bus_dmamap_t map, vm_paddr_t buf, bus_size_t buflen, int flags, bus_dma_segment_t *segs, int *segp) { bus_size_t sgsize; bus_addr_t curaddr; int error; if (map == NULL) map = &nobounce_dmamap; if (segs == NULL) segs = dmat->segments; if ((dmat->bounce_flags & BUS_DMA_COULD_BOUNCE) != 0) { _bus_dmamap_count_phys(dmat, map, buf, buflen, flags); if (map->pagesneeded != 0) { error = _bus_dmamap_reserve_pages(dmat, map, flags); if (error) return (error); } } while (buflen > 0) { curaddr = buf; sgsize = MIN(buflen, dmat->common.maxsegsz); if (((dmat->bounce_flags & BUS_DMA_COULD_BOUNCE) != 0) && map->pagesneeded != 0 && bus_dma_run_filter(&dmat->common, curaddr)) { sgsize = MIN(sgsize, PAGE_SIZE - (curaddr & PAGE_MASK)); curaddr = add_bounce_page(dmat, map, 0, curaddr, 0, sgsize); } sgsize = _bus_dmamap_addseg(dmat, map, curaddr, sgsize, segs, segp); if (sgsize == 0) break; buf += sgsize; buflen -= sgsize; } /* * Did we fit? */ return (buflen != 0 ? EFBIG : 0); /* XXX better return value here? */ } /* * Utility function to load a linear buffer. segp contains * the starting segment on entrace, and the ending segment on exit. */ static int bounce_bus_dmamap_load_buffer(bus_dma_tag_t dmat, bus_dmamap_t map, void *buf, bus_size_t buflen, pmap_t pmap, int flags, bus_dma_segment_t *segs, int *segp) { bus_size_t sgsize, max_sgsize; bus_addr_t curaddr; vm_offset_t kvaddr, vaddr; int error; if (map == NULL) map = &nobounce_dmamap; if (segs == NULL) segs = dmat->segments; if ((dmat->bounce_flags & BUS_DMA_COULD_BOUNCE) != 0) { _bus_dmamap_count_pages(dmat, map, pmap, buf, buflen, flags); if (map->pagesneeded != 0) { error = _bus_dmamap_reserve_pages(dmat, map, flags); if (error) return (error); } } vaddr = (vm_offset_t)buf; while (buflen > 0) { /* * Get the physical address for this segment. */ if (pmap == kernel_pmap) { curaddr = pmap_kextract(vaddr); kvaddr = vaddr; } else { curaddr = pmap_extract(pmap, vaddr); kvaddr = 0; } /* * Compute the segment size, and adjust counts. */ max_sgsize = MIN(buflen, dmat->common.maxsegsz); sgsize = PAGE_SIZE - (curaddr & PAGE_MASK); if (((dmat->bounce_flags & BUS_DMA_COULD_BOUNCE) != 0) && map->pagesneeded != 0 && bus_dma_run_filter(&dmat->common, curaddr)) { sgsize = roundup2(sgsize, dmat->common.alignment); sgsize = MIN(sgsize, max_sgsize); curaddr = add_bounce_page(dmat, map, kvaddr, curaddr, 0, sgsize); } else { sgsize = MIN(sgsize, max_sgsize); } sgsize = _bus_dmamap_addseg(dmat, map, curaddr, sgsize, segs, segp); if (sgsize == 0) break; vaddr += sgsize; buflen -= sgsize; } /* * Did we fit? */ return (buflen != 0 ? EFBIG : 0); /* XXX better return value here? */ } static int bounce_bus_dmamap_load_ma(bus_dma_tag_t dmat, bus_dmamap_t map, struct vm_page **ma, bus_size_t buflen, int ma_offs, int flags, bus_dma_segment_t *segs, int *segp) { vm_paddr_t paddr, next_paddr; int error, page_index; bus_size_t sgsize, max_sgsize; if (dmat->common.flags & BUS_DMA_KEEP_PG_OFFSET) { /* * If we have to keep the offset of each page this function * is not suitable, switch back to bus_dmamap_load_ma_triv * which is going to do the right thing in this case. */ error = bus_dmamap_load_ma_triv(dmat, map, ma, buflen, ma_offs, flags, segs, segp); return (error); } if (map == NULL) map = &nobounce_dmamap; if (segs == NULL) segs = dmat->segments; if ((dmat->bounce_flags & BUS_DMA_COULD_BOUNCE) != 0) { _bus_dmamap_count_ma(dmat, map, ma, ma_offs, buflen, flags); if (map->pagesneeded != 0) { error = _bus_dmamap_reserve_pages(dmat, map, flags); if (error) return (error); } } page_index = 0; while (buflen > 0) { /* * Compute the segment size, and adjust counts. */ paddr = VM_PAGE_TO_PHYS(ma[page_index]) + ma_offs; max_sgsize = MIN(buflen, dmat->common.maxsegsz); sgsize = PAGE_SIZE - ma_offs; if (((dmat->bounce_flags & BUS_DMA_COULD_BOUNCE) != 0) && map->pagesneeded != 0 && bus_dma_run_filter(&dmat->common, paddr)) { sgsize = roundup2(sgsize, dmat->common.alignment); sgsize = MIN(sgsize, max_sgsize); KASSERT((sgsize & (dmat->common.alignment - 1)) == 0, ("Segment size is not aligned")); /* * Check if two pages of the user provided buffer * are used. */ if ((ma_offs + sgsize) > PAGE_SIZE) next_paddr = VM_PAGE_TO_PHYS(ma[page_index + 1]); else next_paddr = 0; paddr = add_bounce_page(dmat, map, 0, paddr, next_paddr, sgsize); } else { sgsize = MIN(sgsize, max_sgsize); } sgsize = _bus_dmamap_addseg(dmat, map, paddr, sgsize, segs, segp); if (sgsize == 0) break; KASSERT(buflen >= sgsize, ("Segment length overruns original buffer")); buflen -= sgsize; if (((ma_offs + sgsize) & ~PAGE_MASK) != 0) page_index++; ma_offs = (ma_offs + sgsize) & PAGE_MASK; } /* * Did we fit? */ return (buflen != 0 ? EFBIG : 0); /* XXX better return value here? */ } static void bounce_bus_dmamap_waitok(bus_dma_tag_t dmat, bus_dmamap_t map, struct memdesc *mem, bus_dmamap_callback_t *callback, void *callback_arg) { if (map == NULL) return; map->mem = *mem; map->dmat = dmat; map->callback = callback; map->callback_arg = callback_arg; } static bus_dma_segment_t * bounce_bus_dmamap_complete(bus_dma_tag_t dmat, bus_dmamap_t map, bus_dma_segment_t *segs, int nsegs, int error) { if (segs == NULL) segs = dmat->segments; return (segs); } /* * Release the mapping held by map. */ static void bounce_bus_dmamap_unload(bus_dma_tag_t dmat, bus_dmamap_t map) { struct bounce_page *bpage; if (map == NULL) return; while ((bpage = STAILQ_FIRST(&map->bpages)) != NULL) { STAILQ_REMOVE_HEAD(&map->bpages, links); free_bounce_page(dmat, bpage); } } static void bounce_bus_dmamap_sync(bus_dma_tag_t dmat, bus_dmamap_t map, bus_dmasync_op_t op) { struct bounce_page *bpage; vm_offset_t datavaddr, tempvaddr; bus_size_t datacount1, datacount2; if (map == NULL || (bpage = STAILQ_FIRST(&map->bpages)) == NULL) return; /* * Handle data bouncing. We might also want to add support for * invalidating the caches on broken hardware. */ CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x op 0x%x " "performing bounce", __func__, dmat, dmat->common.flags, op); if ((op & BUS_DMASYNC_PREWRITE) != 0) { while (bpage != NULL) { tempvaddr = 0; datavaddr = bpage->datavaddr; datacount1 = bpage->datacount; if (datavaddr == 0) { tempvaddr = pmap_quick_enter_page(bpage->datapage[0]); datavaddr = tempvaddr | bpage->dataoffs; datacount1 = min(PAGE_SIZE - bpage->dataoffs, datacount1); } bcopy((void *)datavaddr, (void *)bpage->vaddr, datacount1); if (tempvaddr != 0) pmap_quick_remove_page(tempvaddr); if (bpage->datapage[1] == 0) { KASSERT(datacount1 == bpage->datacount, ("Mismatch between data size and provided memory space")); goto next_w; } /* * We are dealing with an unmapped buffer that expands * over two pages. */ datavaddr = pmap_quick_enter_page(bpage->datapage[1]); datacount2 = bpage->datacount - datacount1; bcopy((void *)datavaddr, (void *)(bpage->vaddr + datacount1), datacount2); pmap_quick_remove_page(datavaddr); next_w: bpage = STAILQ_NEXT(bpage, links); } dmat->bounce_zone->total_bounced++; } if ((op & BUS_DMASYNC_POSTREAD) != 0) { while (bpage != NULL) { tempvaddr = 0; datavaddr = bpage->datavaddr; datacount1 = bpage->datacount; if (datavaddr == 0) { tempvaddr = pmap_quick_enter_page(bpage->datapage[0]); datavaddr = tempvaddr | bpage->dataoffs; datacount1 = min(PAGE_SIZE - bpage->dataoffs, datacount1); } bcopy((void *)bpage->vaddr, (void *)datavaddr, datacount1); if (tempvaddr != 0) pmap_quick_remove_page(tempvaddr); if (bpage->datapage[1] == 0) { KASSERT(datacount1 == bpage->datacount, ("Mismatch between data size and provided memory space")); goto next_r; } /* * We are dealing with an unmapped buffer that expands * over two pages. */ datavaddr = pmap_quick_enter_page(bpage->datapage[1]); datacount2 = bpage->datacount - datacount1; bcopy((void *)(bpage->vaddr + datacount1), (void *)datavaddr, datacount2); pmap_quick_remove_page(datavaddr); next_r: bpage = STAILQ_NEXT(bpage, links); } dmat->bounce_zone->total_bounced++; } } static void init_bounce_pages(void *dummy __unused) { total_bpages = 0; STAILQ_INIT(&bounce_zone_list); STAILQ_INIT(&bounce_map_waitinglist); STAILQ_INIT(&bounce_map_callbacklist); mtx_init(&bounce_lock, "bounce pages lock", NULL, MTX_DEF); } SYSINIT(bpages, SI_SUB_LOCK, SI_ORDER_ANY, init_bounce_pages, NULL); static struct sysctl_ctx_list * busdma_sysctl_tree(struct bounce_zone *bz) { return (&bz->sysctl_tree); } static struct sysctl_oid * busdma_sysctl_tree_top(struct bounce_zone *bz) { return (bz->sysctl_tree_top); } static int alloc_bounce_zone(bus_dma_tag_t dmat) { struct bounce_zone *bz; /* Check to see if we already have a suitable zone */ STAILQ_FOREACH(bz, &bounce_zone_list, links) { if ((dmat->common.alignment <= bz->alignment) && (dmat->common.lowaddr >= bz->lowaddr)) { dmat->bounce_zone = bz; return (0); } } if ((bz = (struct bounce_zone *)malloc(sizeof(*bz), M_DEVBUF, M_NOWAIT | M_ZERO)) == NULL) return (ENOMEM); STAILQ_INIT(&bz->bounce_page_list); bz->free_bpages = 0; bz->reserved_bpages = 0; bz->active_bpages = 0; bz->lowaddr = dmat->common.lowaddr; bz->alignment = MAX(dmat->common.alignment, PAGE_SIZE); bz->map_count = 0; snprintf(bz->zoneid, 8, "zone%d", busdma_zonecount); busdma_zonecount++; snprintf(bz->lowaddrid, 18, "%#jx", (uintmax_t)bz->lowaddr); STAILQ_INSERT_TAIL(&bounce_zone_list, bz, links); dmat->bounce_zone = bz; sysctl_ctx_init(&bz->sysctl_tree); bz->sysctl_tree_top = SYSCTL_ADD_NODE(&bz->sysctl_tree, SYSCTL_STATIC_CHILDREN(_hw_busdma), OID_AUTO, bz->zoneid, CTLFLAG_RD, 0, ""); if (bz->sysctl_tree_top == NULL) { sysctl_ctx_free(&bz->sysctl_tree); return (0); /* XXX error code? */ } SYSCTL_ADD_INT(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "total_bpages", CTLFLAG_RD, &bz->total_bpages, 0, "Total bounce pages"); SYSCTL_ADD_INT(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "free_bpages", CTLFLAG_RD, &bz->free_bpages, 0, "Free bounce pages"); SYSCTL_ADD_INT(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "reserved_bpages", CTLFLAG_RD, &bz->reserved_bpages, 0, "Reserved bounce pages"); SYSCTL_ADD_INT(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "active_bpages", CTLFLAG_RD, &bz->active_bpages, 0, "Active bounce pages"); SYSCTL_ADD_INT(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "total_bounced", CTLFLAG_RD, &bz->total_bounced, 0, "Total bounce requests"); SYSCTL_ADD_INT(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "total_deferred", CTLFLAG_RD, &bz->total_deferred, 0, "Total bounce requests that were deferred"); SYSCTL_ADD_STRING(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "lowaddr", CTLFLAG_RD, bz->lowaddrid, 0, ""); SYSCTL_ADD_UAUTO(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "alignment", CTLFLAG_RD, &bz->alignment, ""); return (0); } static int alloc_bounce_pages(bus_dma_tag_t dmat, u_int numpages) { struct bounce_zone *bz; int count; bz = dmat->bounce_zone; count = 0; while (numpages > 0) { struct bounce_page *bpage; bpage = (struct bounce_page *)malloc(sizeof(*bpage), M_DEVBUF, M_NOWAIT | M_ZERO); if (bpage == NULL) break; bpage->vaddr = (vm_offset_t)contigmalloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT, 0ul, bz->lowaddr, PAGE_SIZE, 0); if (bpage->vaddr == 0) { free(bpage, M_DEVBUF); break; } bpage->busaddr = pmap_kextract(bpage->vaddr); mtx_lock(&bounce_lock); STAILQ_INSERT_TAIL(&bz->bounce_page_list, bpage, links); total_bpages++; bz->total_bpages++; bz->free_bpages++; mtx_unlock(&bounce_lock); count++; numpages--; } return (count); } static int reserve_bounce_pages(bus_dma_tag_t dmat, bus_dmamap_t map, int commit) { struct bounce_zone *bz; int pages; mtx_assert(&bounce_lock, MA_OWNED); bz = dmat->bounce_zone; pages = MIN(bz->free_bpages, map->pagesneeded - map->pagesreserved); if (commit == 0 && map->pagesneeded > (map->pagesreserved + pages)) return (map->pagesneeded - (map->pagesreserved + pages)); bz->free_bpages -= pages; bz->reserved_bpages += pages; map->pagesreserved += pages; pages = map->pagesneeded - map->pagesreserved; return (pages); } static bus_addr_t add_bounce_page(bus_dma_tag_t dmat, bus_dmamap_t map, vm_offset_t vaddr, bus_addr_t addr1, bus_addr_t addr2, bus_size_t size) { struct bounce_zone *bz; struct bounce_page *bpage; KASSERT(dmat->bounce_zone != NULL, ("no bounce zone in dma tag")); KASSERT(map != NULL && map != &nobounce_dmamap, ("add_bounce_page: bad map %p", map)); bz = dmat->bounce_zone; if (map->pagesneeded == 0) panic("add_bounce_page: map doesn't need any pages"); map->pagesneeded--; if (map->pagesreserved == 0) panic("add_bounce_page: map doesn't need any pages"); map->pagesreserved--; mtx_lock(&bounce_lock); bpage = STAILQ_FIRST(&bz->bounce_page_list); if (bpage == NULL) panic("add_bounce_page: free page list is empty"); STAILQ_REMOVE_HEAD(&bz->bounce_page_list, links); bz->reserved_bpages--; bz->active_bpages++; mtx_unlock(&bounce_lock); if (dmat->common.flags & BUS_DMA_KEEP_PG_OFFSET) { /* Page offset needs to be preserved. */ bpage->vaddr |= addr1 & PAGE_MASK; bpage->busaddr |= addr1 & PAGE_MASK; KASSERT(addr2 == 0, ("Trying to bounce multiple pages with BUS_DMA_KEEP_PG_OFFSET")); } bpage->datavaddr = vaddr; bpage->datapage[0] = PHYS_TO_VM_PAGE(addr1); KASSERT((addr2 & PAGE_MASK) == 0, ("Second page is not aligned")); bpage->datapage[1] = PHYS_TO_VM_PAGE(addr2); bpage->dataoffs = addr1 & PAGE_MASK; bpage->datacount = size; STAILQ_INSERT_TAIL(&(map->bpages), bpage, links); return (bpage->busaddr); } static void free_bounce_page(bus_dma_tag_t dmat, struct bounce_page *bpage) { struct bus_dmamap *map; struct bounce_zone *bz; bz = dmat->bounce_zone; bpage->datavaddr = 0; bpage->datacount = 0; if (dmat->common.flags & BUS_DMA_KEEP_PG_OFFSET) { /* * Reset the bounce page to start at offset 0. Other uses * of this bounce page may need to store a full page of * data and/or assume it starts on a page boundary. */ bpage->vaddr &= ~PAGE_MASK; bpage->busaddr &= ~PAGE_MASK; } mtx_lock(&bounce_lock); STAILQ_INSERT_HEAD(&bz->bounce_page_list, bpage, links); bz->free_bpages++; bz->active_bpages--; if ((map = STAILQ_FIRST(&bounce_map_waitinglist)) != NULL) { if (reserve_bounce_pages(map->dmat, map, 1) == 0) { STAILQ_REMOVE_HEAD(&bounce_map_waitinglist, links); STAILQ_INSERT_TAIL(&bounce_map_callbacklist, map, links); busdma_swi_pending = 1; bz->total_deferred++; swi_sched(vm_ih, 0); } } mtx_unlock(&bounce_lock); } void busdma_swi(void) { bus_dma_tag_t dmat; struct bus_dmamap *map; mtx_lock(&bounce_lock); while ((map = STAILQ_FIRST(&bounce_map_callbacklist)) != NULL) { STAILQ_REMOVE_HEAD(&bounce_map_callbacklist, links); mtx_unlock(&bounce_lock); dmat = map->dmat; (dmat->common.lockfunc)(dmat->common.lockfuncarg, BUS_DMA_LOCK); bus_dmamap_load_mem(map->dmat, map, &map->mem, map->callback, map->callback_arg, BUS_DMA_WAITOK); (dmat->common.lockfunc)(dmat->common.lockfuncarg, BUS_DMA_UNLOCK); mtx_lock(&bounce_lock); } mtx_unlock(&bounce_lock); } struct bus_dma_impl bus_dma_bounce_impl = { .tag_create = bounce_bus_dma_tag_create, .tag_destroy = bounce_bus_dma_tag_destroy, .map_create = bounce_bus_dmamap_create, .map_destroy = bounce_bus_dmamap_destroy, .mem_alloc = bounce_bus_dmamem_alloc, .mem_free = bounce_bus_dmamem_free, .load_phys = bounce_bus_dmamap_load_phys, .load_buffer = bounce_bus_dmamap_load_buffer, .load_ma = bounce_bus_dmamap_load_ma, .map_waitok = bounce_bus_dmamap_waitok, .map_complete = bounce_bus_dmamap_complete, .map_unload = bounce_bus_dmamap_unload, .map_sync = bounce_bus_dmamap_sync }; Index: head/sys/x86/x86/busdma_machdep.c =================================================================== --- head/sys/x86/x86/busdma_machdep.c (revision 326262) +++ head/sys/x86/x86/busdma_machdep.c (revision 326263) @@ -1,223 +1,225 @@ /*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * * Copyright (c) 1997, 1998 Justin T. Gibbs. * Copyright (c) 2013 The FreeBSD Foundation * All rights reserved. * * This software was developed by Konstantin Belousov * under sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions, and the following disclaimer, * without modification, immediately at the beginning of the file. * 2. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * Convenience function for manipulating driver locks from busdma (during * busdma_swi, for example). Drivers that don't provide their own locks * should specify &Giant to dmat->lockfuncarg. Drivers that use their own * non-mutex locking scheme don't have to use this at all. */ void busdma_lock_mutex(void *arg, bus_dma_lock_op_t op) { struct mtx *dmtx; dmtx = (struct mtx *)arg; switch (op) { case BUS_DMA_LOCK: mtx_lock(dmtx); break; case BUS_DMA_UNLOCK: mtx_unlock(dmtx); break; default: panic("Unknown operation 0x%x for busdma_lock_mutex!", op); } } /* * dflt_lock should never get called. It gets put into the dma tag when * lockfunc == NULL, which is only valid if the maps that are associated * with the tag are meant to never be defered. * XXX Should have a way to identify which driver is responsible here. */ void bus_dma_dflt_lock(void *arg, bus_dma_lock_op_t op) { panic("driver error: busdma dflt_lock called"); } /* * Return true if a match is made. * * To find a match walk the chain of bus_dma_tag_t's looking for 'paddr'. * * If paddr is within the bounds of the dma tag then call the filter callback * to check for a match, if there is no filter callback then assume a match. */ int bus_dma_run_filter(struct bus_dma_tag_common *tc, bus_addr_t paddr) { int retval; retval = 0; do { if (((paddr > tc->lowaddr && paddr <= tc->highaddr) || ((paddr & (tc->alignment - 1)) != 0)) && (tc->filter == NULL || (*tc->filter)(tc->filterarg, paddr) != 0)) retval = 1; tc = tc->parent; } while (retval == 0 && tc != NULL); return (retval); } int common_bus_dma_tag_create(struct bus_dma_tag_common *parent, bus_size_t alignment, bus_addr_t boundary, bus_addr_t lowaddr, bus_addr_t highaddr, bus_dma_filter_t *filter, void *filterarg, bus_size_t maxsize, int nsegments, bus_size_t maxsegsz, int flags, bus_dma_lock_t *lockfunc, void *lockfuncarg, size_t sz, void **dmat) { void *newtag; struct bus_dma_tag_common *common; KASSERT(sz >= sizeof(struct bus_dma_tag_common), ("sz")); /* Basic sanity checking */ if (boundary != 0 && boundary < maxsegsz) maxsegsz = boundary; if (maxsegsz == 0) return (EINVAL); /* Return a NULL tag on failure */ *dmat = NULL; newtag = malloc(sz, M_DEVBUF, M_ZERO | M_NOWAIT); if (newtag == NULL) { CTR4(KTR_BUSDMA, "%s returned tag %p tag flags 0x%x error %d", __func__, newtag, 0, ENOMEM); return (ENOMEM); } common = newtag; common->impl = &bus_dma_bounce_impl; common->parent = parent; common->alignment = alignment; common->boundary = boundary; common->lowaddr = trunc_page((vm_paddr_t)lowaddr) + (PAGE_SIZE - 1); common->highaddr = trunc_page((vm_paddr_t)highaddr) + (PAGE_SIZE - 1); common->filter = filter; common->filterarg = filterarg; common->maxsize = maxsize; common->nsegments = nsegments; common->maxsegsz = maxsegsz; common->flags = flags; common->ref_count = 1; /* Count ourself */ if (lockfunc != NULL) { common->lockfunc = lockfunc; common->lockfuncarg = lockfuncarg; } else { common->lockfunc = bus_dma_dflt_lock; common->lockfuncarg = NULL; } /* Take into account any restrictions imposed by our parent tag */ if (parent != NULL) { common->impl = parent->impl; common->lowaddr = MIN(parent->lowaddr, common->lowaddr); common->highaddr = MAX(parent->highaddr, common->highaddr); if (common->boundary == 0) common->boundary = parent->boundary; else if (parent->boundary != 0) { common->boundary = MIN(parent->boundary, common->boundary); } if (common->filter == NULL) { /* * Short circuit looking at our parent directly * since we have encapsulated all of its information */ common->filter = parent->filter; common->filterarg = parent->filterarg; common->parent = parent->parent; } atomic_add_int(&parent->ref_count, 1); } *dmat = common; return (0); } /* * Allocate a device specific dma_tag. */ int bus_dma_tag_create(bus_dma_tag_t parent, bus_size_t alignment, bus_addr_t boundary, bus_addr_t lowaddr, bus_addr_t highaddr, bus_dma_filter_t *filter, void *filterarg, bus_size_t maxsize, int nsegments, bus_size_t maxsegsz, int flags, bus_dma_lock_t *lockfunc, void *lockfuncarg, bus_dma_tag_t *dmat) { struct bus_dma_tag_common *tc; int error; WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, "%s", __func__); if (parent == NULL) { error = bus_dma_bounce_impl.tag_create(parent, alignment, boundary, lowaddr, highaddr, filter, filterarg, maxsize, nsegments, maxsegsz, flags, lockfunc, lockfuncarg, dmat); } else { tc = (struct bus_dma_tag_common *)parent; error = tc->impl->tag_create(parent, alignment, boundary, lowaddr, highaddr, filter, filterarg, maxsize, nsegments, maxsegsz, flags, lockfunc, lockfuncarg, dmat); } return (error); } int bus_dma_tag_destroy(bus_dma_tag_t dmat) { struct bus_dma_tag_common *tc; tc = (struct bus_dma_tag_common *)dmat; return (tc->impl->tag_destroy(dmat)); } Index: head/sys/x86/x86/dump_machdep.c =================================================================== --- head/sys/x86/x86/dump_machdep.c (revision 326262) +++ head/sys/x86/x86/dump_machdep.c (revision 326263) @@ -1,54 +1,56 @@ /*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * * Copyright (c) 2002 Marcel Moolenaar * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_watchdog.h" #include #include #include #include #include #include #include int do_minidump = 1; SYSCTL_INT(_debug, OID_AUTO, minidump, CTLFLAG_RWTUN, &do_minidump, 0, "Enable mini crash dumps"); void dumpsys_map_chunk(vm_paddr_t pa, size_t chunk, void **va) { int i; vm_paddr_t a; for (i = 0; i < chunk; i++) { a = pa + i * PAGE_SIZE; *va = pmap_kenter_temporary(trunc_page(a), i); } } Index: head/sys/x86/x86/fdt_machdep.c =================================================================== --- head/sys/x86/x86/fdt_machdep.c (revision 326262) +++ head/sys/x86/x86/fdt_machdep.c (revision 326263) @@ -1,77 +1,79 @@ /*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * * Copyright (c) 2013 Juniper Networks, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_platform.h" #include #include #include #include #include #include #include #include int x86_init_fdt(void) { void *dtbp, *mdp; int error; if (OF_install(OFW_FDT, 0) == FALSE) { error = ENXIO; goto out; } mdp = preload_search_by_type("elf kernel"); if (mdp == NULL) mdp = preload_search_by_type("elf32 kernel"); dtbp = MD_FETCH(mdp, MODINFOMD_DTBP, void *); #if defined(FDT_DTB_STATIC) /* * In case the device tree blob was not retrieved (from metadata) try * to use the statically embedded one. */ if (dtbp == NULL) dtbp = &fdt_static_dtb; #endif if (dtbp == NULL) { error = ENOENT; goto out; } error = OF_init(dtbp) ? ENXIO : 0; out: return (error); } Index: head/sys/x86/x86/intr_machdep.c =================================================================== --- head/sys/x86/x86/intr_machdep.c (revision 326262) +++ head/sys/x86/x86/intr_machdep.c (revision 326263) @@ -1,739 +1,741 @@ /*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * * Copyright (c) 2003 John Baldwin * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ /* * Machine dependent interrupt code for x86. For x86, we have to * deal with different PICs. Thus, we use the passed in vector to lookup * an interrupt source associated with that vector. The interrupt source * describes which PIC the source belongs to and includes methods to handle * that source. */ #include "opt_atpic.h" #include "opt_ddb.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef DDB #include #endif #ifndef DEV_ATPIC #include #include #include #include #include #endif #define MAX_STRAY_LOG 5 typedef void (*mask_fn)(void *); static int intrcnt_index; static struct intsrc *interrupt_sources[NUM_IO_INTS]; #ifdef SMP static struct intsrc *interrupt_sorted[NUM_IO_INTS]; CTASSERT(sizeof(interrupt_sources) == sizeof(interrupt_sorted)); static int intrbalance; SYSCTL_INT(_hw, OID_AUTO, intrbalance, CTLFLAG_RW, &intrbalance, 0, "Interrupt auto-balance interval (seconds). Zero disables."); static struct timeout_task intrbalance_task; #endif static struct sx intrsrc_lock; static struct mtx intrpic_lock; static struct mtx intrcnt_lock; static TAILQ_HEAD(pics_head, pic) pics; #if defined(SMP) && !defined(EARLY_AP_STARTUP) static int assign_cpu; #endif u_long intrcnt[INTRCNT_COUNT]; char intrnames[INTRCNT_COUNT * (MAXCOMLEN + 1)]; size_t sintrcnt = sizeof(intrcnt); size_t sintrnames = sizeof(intrnames); static int intr_assign_cpu(void *arg, int cpu); static void intr_disable_src(void *arg); static void intr_init(void *__dummy); static int intr_pic_registered(struct pic *pic); static void intrcnt_setname(const char *name, int index); static void intrcnt_updatename(struct intsrc *is); static void intrcnt_register(struct intsrc *is); static int intr_pic_registered(struct pic *pic) { struct pic *p; TAILQ_FOREACH(p, &pics, pics) { if (p == pic) return (1); } return (0); } /* * Register a new interrupt controller (PIC). This is to support suspend * and resume where we suspend/resume controllers rather than individual * sources. This also allows controllers with no active sources (such as * 8259As in a system using the APICs) to participate in suspend and resume. */ int intr_register_pic(struct pic *pic) { int error; mtx_lock(&intrpic_lock); if (intr_pic_registered(pic)) error = EBUSY; else { TAILQ_INSERT_TAIL(&pics, pic, pics); error = 0; } mtx_unlock(&intrpic_lock); return (error); } /* * Register a new interrupt source with the global interrupt system. * The global interrupts need to be disabled when this function is * called. */ int intr_register_source(struct intsrc *isrc) { int error, vector; KASSERT(intr_pic_registered(isrc->is_pic), ("unregistered PIC")); vector = isrc->is_pic->pic_vector(isrc); if (interrupt_sources[vector] != NULL) return (EEXIST); error = intr_event_create(&isrc->is_event, isrc, 0, vector, intr_disable_src, (mask_fn)isrc->is_pic->pic_enable_source, (mask_fn)isrc->is_pic->pic_eoi_source, intr_assign_cpu, "irq%d:", vector); if (error) return (error); sx_xlock(&intrsrc_lock); if (interrupt_sources[vector] != NULL) { sx_xunlock(&intrsrc_lock); intr_event_destroy(isrc->is_event); return (EEXIST); } intrcnt_register(isrc); interrupt_sources[vector] = isrc; isrc->is_handlers = 0; sx_xunlock(&intrsrc_lock); return (0); } struct intsrc * intr_lookup_source(int vector) { return (interrupt_sources[vector]); } int intr_add_handler(const char *name, int vector, driver_filter_t filter, driver_intr_t handler, void *arg, enum intr_type flags, void **cookiep) { struct intsrc *isrc; int error; isrc = intr_lookup_source(vector); if (isrc == NULL) return (EINVAL); error = intr_event_add_handler(isrc->is_event, name, filter, handler, arg, intr_priority(flags), flags, cookiep); if (error == 0) { sx_xlock(&intrsrc_lock); intrcnt_updatename(isrc); isrc->is_handlers++; if (isrc->is_handlers == 1) { isrc->is_pic->pic_enable_intr(isrc); isrc->is_pic->pic_enable_source(isrc); } sx_xunlock(&intrsrc_lock); } return (error); } int intr_remove_handler(void *cookie) { struct intsrc *isrc; int error; isrc = intr_handler_source(cookie); error = intr_event_remove_handler(cookie); if (error == 0) { sx_xlock(&intrsrc_lock); isrc->is_handlers--; if (isrc->is_handlers == 0) { isrc->is_pic->pic_disable_source(isrc, PIC_NO_EOI); isrc->is_pic->pic_disable_intr(isrc); } intrcnt_updatename(isrc); sx_xunlock(&intrsrc_lock); } return (error); } int intr_config_intr(int vector, enum intr_trigger trig, enum intr_polarity pol) { struct intsrc *isrc; isrc = intr_lookup_source(vector); if (isrc == NULL) return (EINVAL); return (isrc->is_pic->pic_config_intr(isrc, trig, pol)); } static void intr_disable_src(void *arg) { struct intsrc *isrc; isrc = arg; isrc->is_pic->pic_disable_source(isrc, PIC_EOI); } void intr_execute_handlers(struct intsrc *isrc, struct trapframe *frame) { struct intr_event *ie; int vector; /* * We count software interrupts when we process them. The * code here follows previous practice, but there's an * argument for counting hardware interrupts when they're * processed too. */ (*isrc->is_count)++; VM_CNT_INC(v_intr); ie = isrc->is_event; /* * XXX: We assume that IRQ 0 is only used for the ISA timer * device (clk). */ vector = isrc->is_pic->pic_vector(isrc); if (vector == 0) clkintr_pending = 1; /* * For stray interrupts, mask and EOI the source, bump the * stray count, and log the condition. */ if (intr_event_handle(ie, frame) != 0) { isrc->is_pic->pic_disable_source(isrc, PIC_EOI); (*isrc->is_straycount)++; if (*isrc->is_straycount < MAX_STRAY_LOG) log(LOG_ERR, "stray irq%d\n", vector); else if (*isrc->is_straycount == MAX_STRAY_LOG) log(LOG_CRIT, "too many stray irq %d's: not logging anymore\n", vector); } } void intr_resume(bool suspend_cancelled) { struct pic *pic; #ifndef DEV_ATPIC atpic_reset(); #endif mtx_lock(&intrpic_lock); TAILQ_FOREACH(pic, &pics, pics) { if (pic->pic_resume != NULL) pic->pic_resume(pic, suspend_cancelled); } mtx_unlock(&intrpic_lock); } void intr_suspend(void) { struct pic *pic; mtx_lock(&intrpic_lock); TAILQ_FOREACH_REVERSE(pic, &pics, pics_head, pics) { if (pic->pic_suspend != NULL) pic->pic_suspend(pic); } mtx_unlock(&intrpic_lock); } static int intr_assign_cpu(void *arg, int cpu) { #ifdef SMP struct intsrc *isrc; int error; #ifdef EARLY_AP_STARTUP MPASS(mp_ncpus == 1 || smp_started); /* Nothing to do if there is only a single CPU. */ if (mp_ncpus > 1 && cpu != NOCPU) { #else /* * Don't do anything during early boot. We will pick up the * assignment once the APs are started. */ if (assign_cpu && cpu != NOCPU) { #endif isrc = arg; sx_xlock(&intrsrc_lock); error = isrc->is_pic->pic_assign_cpu(isrc, cpu_apic_ids[cpu]); if (error == 0) isrc->is_cpu = cpu; sx_xunlock(&intrsrc_lock); } else error = 0; return (error); #else return (EOPNOTSUPP); #endif } static void intrcnt_setname(const char *name, int index) { snprintf(intrnames + (MAXCOMLEN + 1) * index, MAXCOMLEN + 1, "%-*s", MAXCOMLEN, name); } static void intrcnt_updatename(struct intsrc *is) { intrcnt_setname(is->is_event->ie_fullname, is->is_index); } static void intrcnt_register(struct intsrc *is) { char straystr[MAXCOMLEN + 1]; KASSERT(is->is_event != NULL, ("%s: isrc with no event", __func__)); mtx_lock_spin(&intrcnt_lock); is->is_index = intrcnt_index; intrcnt_index += 2; snprintf(straystr, MAXCOMLEN + 1, "stray irq%d", is->is_pic->pic_vector(is)); intrcnt_updatename(is); is->is_count = &intrcnt[is->is_index]; intrcnt_setname(straystr, is->is_index + 1); is->is_straycount = &intrcnt[is->is_index + 1]; mtx_unlock_spin(&intrcnt_lock); } void intrcnt_add(const char *name, u_long **countp) { mtx_lock_spin(&intrcnt_lock); *countp = &intrcnt[intrcnt_index]; intrcnt_setname(name, intrcnt_index); intrcnt_index++; mtx_unlock_spin(&intrcnt_lock); } static void intr_init(void *dummy __unused) { intrcnt_setname("???", 0); intrcnt_index = 1; TAILQ_INIT(&pics); mtx_init(&intrpic_lock, "intrpic", NULL, MTX_DEF); sx_init(&intrsrc_lock, "intrsrc"); mtx_init(&intrcnt_lock, "intrcnt", NULL, MTX_SPIN); } SYSINIT(intr_init, SI_SUB_INTR, SI_ORDER_FIRST, intr_init, NULL); static void intr_init_final(void *dummy __unused) { /* * Enable interrupts on the BSP after all of the interrupt * controllers are initialized. Device interrupts are still * disabled in the interrupt controllers until interrupt * handlers are registered. Interrupts are enabled on each AP * after their first context switch. */ enable_intr(); } SYSINIT(intr_init_final, SI_SUB_INTR, SI_ORDER_ANY, intr_init_final, NULL); #ifndef DEV_ATPIC /* Initialize the two 8259A's to a known-good shutdown state. */ void atpic_reset(void) { outb(IO_ICU1, ICW1_RESET | ICW1_IC4); outb(IO_ICU1 + ICU_IMR_OFFSET, IDT_IO_INTS); outb(IO_ICU1 + ICU_IMR_OFFSET, IRQ_MASK(ICU_SLAVEID)); outb(IO_ICU1 + ICU_IMR_OFFSET, MASTER_MODE); outb(IO_ICU1 + ICU_IMR_OFFSET, 0xff); outb(IO_ICU1, OCW3_SEL | OCW3_RR); outb(IO_ICU2, ICW1_RESET | ICW1_IC4); outb(IO_ICU2 + ICU_IMR_OFFSET, IDT_IO_INTS + 8); outb(IO_ICU2 + ICU_IMR_OFFSET, ICU_SLAVEID); outb(IO_ICU2 + ICU_IMR_OFFSET, SLAVE_MODE); outb(IO_ICU2 + ICU_IMR_OFFSET, 0xff); outb(IO_ICU2, OCW3_SEL | OCW3_RR); } #endif /* Add a description to an active interrupt handler. */ int intr_describe(u_int vector, void *ih, const char *descr) { struct intsrc *isrc; int error; isrc = intr_lookup_source(vector); if (isrc == NULL) return (EINVAL); error = intr_event_describe_handler(isrc->is_event, ih, descr); if (error) return (error); intrcnt_updatename(isrc); return (0); } void intr_reprogram(void) { struct intsrc *is; int v; sx_xlock(&intrsrc_lock); for (v = 0; v < NUM_IO_INTS; v++) { is = interrupt_sources[v]; if (is == NULL) continue; if (is->is_pic->pic_reprogram_pin != NULL) is->is_pic->pic_reprogram_pin(is); } sx_xunlock(&intrsrc_lock); } #ifdef DDB /* * Dump data about interrupt handlers */ DB_SHOW_COMMAND(irqs, db_show_irqs) { struct intsrc **isrc; int i, verbose; if (strcmp(modif, "v") == 0) verbose = 1; else verbose = 0; isrc = interrupt_sources; for (i = 0; i < NUM_IO_INTS && !db_pager_quit; i++, isrc++) if (*isrc != NULL) db_dump_intr_event((*isrc)->is_event, verbose); } #endif #ifdef SMP /* * Support for balancing interrupt sources across CPUs. For now we just * allocate CPUs round-robin. */ cpuset_t intr_cpus = CPUSET_T_INITIALIZER(0x1); static int current_cpu; /* * Return the CPU that the next interrupt source should use. For now * this just returns the next local APIC according to round-robin. */ u_int intr_next_cpu(void) { u_int apic_id; #ifdef EARLY_AP_STARTUP MPASS(mp_ncpus == 1 || smp_started); if (mp_ncpus == 1) return (PCPU_GET(apic_id)); #else /* Leave all interrupts on the BSP during boot. */ if (!assign_cpu) return (PCPU_GET(apic_id)); #endif mtx_lock_spin(&icu_lock); apic_id = cpu_apic_ids[current_cpu]; do { current_cpu++; if (current_cpu > mp_maxid) current_cpu = 0; } while (!CPU_ISSET(current_cpu, &intr_cpus)); mtx_unlock_spin(&icu_lock); return (apic_id); } /* Attempt to bind the specified IRQ to the specified CPU. */ int intr_bind(u_int vector, u_char cpu) { struct intsrc *isrc; isrc = intr_lookup_source(vector); if (isrc == NULL) return (EINVAL); return (intr_event_bind(isrc->is_event, cpu)); } /* * Add a CPU to our mask of valid CPUs that can be destinations of * interrupts. */ void intr_add_cpu(u_int cpu) { if (cpu >= MAXCPU) panic("%s: Invalid CPU ID", __func__); if (bootverbose) printf("INTR: Adding local APIC %d as a target\n", cpu_apic_ids[cpu]); CPU_SET(cpu, &intr_cpus); } #ifndef EARLY_AP_STARTUP /* * Distribute all the interrupt sources among the available CPUs once the * AP's have been launched. */ static void intr_shuffle_irqs(void *arg __unused) { struct intsrc *isrc; u_int cpu; int i; /* Don't bother on UP. */ if (mp_ncpus == 1) return; /* Round-robin assign a CPU to each enabled source. */ sx_xlock(&intrsrc_lock); assign_cpu = 1; for (i = 0; i < NUM_IO_INTS; i++) { isrc = interrupt_sources[i]; if (isrc != NULL && isrc->is_handlers > 0) { /* * If this event is already bound to a CPU, * then assign the source to that CPU instead * of picking one via round-robin. Note that * this is careful to only advance the * round-robin if the CPU assignment succeeds. */ cpu = isrc->is_event->ie_cpu; if (cpu == NOCPU) cpu = current_cpu; if (isrc->is_pic->pic_assign_cpu(isrc, cpu_apic_ids[cpu]) == 0) { isrc->is_cpu = cpu; if (isrc->is_event->ie_cpu == NOCPU) intr_next_cpu(); } } } sx_xunlock(&intrsrc_lock); } SYSINIT(intr_shuffle_irqs, SI_SUB_SMP, SI_ORDER_SECOND, intr_shuffle_irqs, NULL); #endif /* * TODO: Export this information in a non-MD fashion, integrate with vmstat -i. */ static int sysctl_hw_intrs(SYSCTL_HANDLER_ARGS) { struct sbuf sbuf; struct intsrc *isrc; int error; int i; error = sysctl_wire_old_buffer(req, 0); if (error != 0) return (error); sbuf_new_for_sysctl(&sbuf, NULL, 128, req); sx_slock(&intrsrc_lock); for (i = 0; i < NUM_IO_INTS; i++) { isrc = interrupt_sources[i]; if (isrc == NULL) continue; sbuf_printf(&sbuf, "%s:%d @%d: %ld\n", isrc->is_event->ie_fullname, isrc->is_index, isrc->is_cpu, *isrc->is_count); } sx_sunlock(&intrsrc_lock); error = sbuf_finish(&sbuf); sbuf_delete(&sbuf); return (error); } SYSCTL_PROC(_hw, OID_AUTO, intrs, CTLTYPE_STRING | CTLFLAG_RW, 0, 0, sysctl_hw_intrs, "A", "interrupt:number @cpu: count"); /* * Compare two, possibly NULL, entries in the interrupt source array * by load. */ static int intrcmp(const void *one, const void *two) { const struct intsrc *i1, *i2; i1 = *(const struct intsrc * const *)one; i2 = *(const struct intsrc * const *)two; if (i1 != NULL && i2 != NULL) return (*i1->is_count - *i2->is_count); if (i1 != NULL) return (1); if (i2 != NULL) return (-1); return (0); } /* * Balance IRQs across available CPUs according to load. */ static void intr_balance(void *dummy __unused, int pending __unused) { struct intsrc *isrc; int interval; u_int cpu; int i; interval = intrbalance; if (interval == 0) goto out; /* * Sort interrupts according to count. */ sx_xlock(&intrsrc_lock); memcpy(interrupt_sorted, interrupt_sources, sizeof(interrupt_sorted)); qsort(interrupt_sorted, NUM_IO_INTS, sizeof(interrupt_sorted[0]), intrcmp); /* * Restart the scan from the same location to avoid moving in the * common case. */ current_cpu = 0; /* * Assign round-robin from most loaded to least. */ for (i = NUM_IO_INTS - 1; i >= 0; i--) { isrc = interrupt_sorted[i]; if (isrc == NULL || isrc->is_event->ie_cpu != NOCPU) continue; cpu = current_cpu; intr_next_cpu(); if (isrc->is_cpu != cpu && isrc->is_pic->pic_assign_cpu(isrc, cpu_apic_ids[cpu]) == 0) isrc->is_cpu = cpu; } sx_xunlock(&intrsrc_lock); out: taskqueue_enqueue_timeout(taskqueue_thread, &intrbalance_task, interval ? hz * interval : hz * 60); } static void intr_balance_init(void *dummy __unused) { TIMEOUT_TASK_INIT(taskqueue_thread, &intrbalance_task, 0, intr_balance, NULL); taskqueue_enqueue_timeout(taskqueue_thread, &intrbalance_task, hz); } SYSINIT(intr_balance_init, SI_SUB_SMP, SI_ORDER_ANY, intr_balance_init, NULL); #else /* * Always route interrupts to the current processor in the UP case. */ u_int intr_next_cpu(void) { return (PCPU_GET(apic_id)); } #endif Index: head/sys/x86/x86/io_apic.c =================================================================== --- head/sys/x86/x86/io_apic.c (revision 326262) +++ head/sys/x86/x86/io_apic.c (revision 326263) @@ -1,1231 +1,1233 @@ /*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * * Copyright (c) 2003 John Baldwin * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_acpi.h" #include "opt_isa.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define IOAPIC_ISA_INTS 16 #define IOAPIC_MEM_REGION 32 #define IOAPIC_REDTBL_LO(i) (IOAPIC_REDTBL + (i) * 2) #define IOAPIC_REDTBL_HI(i) (IOAPIC_REDTBL_LO(i) + 1) static MALLOC_DEFINE(M_IOAPIC, "io_apic", "I/O APIC structures"); /* * I/O APIC interrupt source driver. Each pin is assigned an IRQ cookie * as laid out in the ACPI System Interrupt number model where each I/O * APIC has a contiguous chunk of the System Interrupt address space. * We assume that IRQs 1 - 15 behave like ISA IRQs and that all other * IRQs behave as PCI IRQs by default. We also assume that the pin for * IRQ 0 is actually an ExtINT pin. The apic enumerators override the * configuration of individual pins as indicated by their tables. * * Documentation for the I/O APIC: "82093AA I/O Advanced Programmable * Interrupt Controller (IOAPIC)", May 1996, Intel Corp. * ftp://download.intel.com/design/chipsets/datashts/29056601.pdf */ struct ioapic_intsrc { struct intsrc io_intsrc; u_int io_irq; u_int io_intpin:8; u_int io_vector:8; u_int io_cpu; u_int io_activehi:1; u_int io_edgetrigger:1; u_int io_masked:1; int io_bus:4; uint32_t io_lowreg; u_int io_remap_cookie; }; struct ioapic { struct pic io_pic; u_int io_id:8; /* logical ID */ u_int io_apic_id:4; u_int io_intbase:8; /* System Interrupt base */ u_int io_numintr:8; u_int io_haseoi:1; volatile ioapic_t *io_addr; /* XXX: should use bus_space */ vm_paddr_t io_paddr; STAILQ_ENTRY(ioapic) io_next; device_t pci_dev; /* matched pci device, if found */ struct resource *pci_wnd; /* BAR 0, should be same or alias to io_paddr */ struct ioapic_intsrc io_pins[0]; }; static u_int ioapic_read(volatile ioapic_t *apic, int reg); static void ioapic_write(volatile ioapic_t *apic, int reg, u_int val); static const char *ioapic_bus_string(int bus_type); static void ioapic_print_irq(struct ioapic_intsrc *intpin); static void ioapic_enable_source(struct intsrc *isrc); static void ioapic_disable_source(struct intsrc *isrc, int eoi); static void ioapic_eoi_source(struct intsrc *isrc); static void ioapic_enable_intr(struct intsrc *isrc); static void ioapic_disable_intr(struct intsrc *isrc); static int ioapic_vector(struct intsrc *isrc); static int ioapic_source_pending(struct intsrc *isrc); static int ioapic_config_intr(struct intsrc *isrc, enum intr_trigger trig, enum intr_polarity pol); static void ioapic_resume(struct pic *pic, bool suspend_cancelled); static int ioapic_assign_cpu(struct intsrc *isrc, u_int apic_id); static void ioapic_program_intpin(struct ioapic_intsrc *intpin); static void ioapic_reprogram_intpin(struct intsrc *isrc); static STAILQ_HEAD(,ioapic) ioapic_list = STAILQ_HEAD_INITIALIZER(ioapic_list); struct pic ioapic_template = { .pic_enable_source = ioapic_enable_source, .pic_disable_source = ioapic_disable_source, .pic_eoi_source = ioapic_eoi_source, .pic_enable_intr = ioapic_enable_intr, .pic_disable_intr = ioapic_disable_intr, .pic_vector = ioapic_vector, .pic_source_pending = ioapic_source_pending, .pic_suspend = NULL, .pic_resume = ioapic_resume, .pic_config_intr = ioapic_config_intr, .pic_assign_cpu = ioapic_assign_cpu, .pic_reprogram_pin = ioapic_reprogram_intpin, }; static int next_ioapic_base; static u_int next_id; static int enable_extint; SYSCTL_INT(_hw_apic, OID_AUTO, enable_extint, CTLFLAG_RDTUN, &enable_extint, 0, "Enable the ExtINT pin in the first I/O APIC"); static void _ioapic_eoi_source(struct intsrc *isrc, int locked) { struct ioapic_intsrc *src; struct ioapic *io; volatile uint32_t *apic_eoi; uint32_t low1; lapic_eoi(); if (!lapic_eoi_suppression) return; src = (struct ioapic_intsrc *)isrc; if (src->io_edgetrigger) return; io = (struct ioapic *)isrc->is_pic; /* * Handle targeted EOI for level-triggered pins, if broadcast * EOI suppression is supported by LAPICs. */ if (io->io_haseoi) { /* * If IOAPIC has EOI Register, simply write vector * number into the reg. */ apic_eoi = (volatile uint32_t *)((volatile char *) io->io_addr + IOAPIC_EOIR); *apic_eoi = src->io_vector; } else { /* * Otherwise, if IO-APIC is too old to provide EOIR, * do what Intel did for the Linux kernel. Temporary * switch the pin to edge-trigger and back, masking * the pin during the trick. */ if (!locked) mtx_lock_spin(&icu_lock); low1 = src->io_lowreg; low1 &= ~IOART_TRGRLVL; low1 |= IOART_TRGREDG | IOART_INTMSET; ioapic_write(io->io_addr, IOAPIC_REDTBL_LO(src->io_intpin), low1); ioapic_write(io->io_addr, IOAPIC_REDTBL_LO(src->io_intpin), src->io_lowreg); if (!locked) mtx_unlock_spin(&icu_lock); } } static u_int ioapic_read(volatile ioapic_t *apic, int reg) { mtx_assert(&icu_lock, MA_OWNED); apic->ioregsel = reg; return (apic->iowin); } static void ioapic_write(volatile ioapic_t *apic, int reg, u_int val) { mtx_assert(&icu_lock, MA_OWNED); apic->ioregsel = reg; apic->iowin = val; } static const char * ioapic_bus_string(int bus_type) { switch (bus_type) { case APIC_BUS_ISA: return ("ISA"); case APIC_BUS_EISA: return ("EISA"); case APIC_BUS_PCI: return ("PCI"); default: return ("unknown"); } } static void ioapic_print_irq(struct ioapic_intsrc *intpin) { switch (intpin->io_irq) { case IRQ_DISABLED: printf("disabled"); break; case IRQ_EXTINT: printf("ExtINT"); break; case IRQ_NMI: printf("NMI"); break; case IRQ_SMI: printf("SMI"); break; default: printf("%s IRQ %u", ioapic_bus_string(intpin->io_bus), intpin->io_irq); } } static void ioapic_enable_source(struct intsrc *isrc) { struct ioapic_intsrc *intpin = (struct ioapic_intsrc *)isrc; struct ioapic *io = (struct ioapic *)isrc->is_pic; uint32_t flags; mtx_lock_spin(&icu_lock); if (intpin->io_masked) { flags = intpin->io_lowreg & ~IOART_INTMASK; ioapic_write(io->io_addr, IOAPIC_REDTBL_LO(intpin->io_intpin), flags); intpin->io_masked = 0; } mtx_unlock_spin(&icu_lock); } static void ioapic_disable_source(struct intsrc *isrc, int eoi) { struct ioapic_intsrc *intpin = (struct ioapic_intsrc *)isrc; struct ioapic *io = (struct ioapic *)isrc->is_pic; uint32_t flags; mtx_lock_spin(&icu_lock); if (!intpin->io_masked && !intpin->io_edgetrigger) { flags = intpin->io_lowreg | IOART_INTMSET; ioapic_write(io->io_addr, IOAPIC_REDTBL_LO(intpin->io_intpin), flags); intpin->io_masked = 1; } if (eoi == PIC_EOI) _ioapic_eoi_source(isrc, 1); mtx_unlock_spin(&icu_lock); } static void ioapic_eoi_source(struct intsrc *isrc) { _ioapic_eoi_source(isrc, 0); } /* * Completely program an intpin based on the data in its interrupt source * structure. */ static void ioapic_program_intpin(struct ioapic_intsrc *intpin) { struct ioapic *io = (struct ioapic *)intpin->io_intsrc.is_pic; uint32_t low, high, value; #ifdef ACPI_DMAR int error; #endif /* * If a pin is completely invalid or if it is valid but hasn't * been enabled yet, just ensure that the pin is masked. */ mtx_assert(&icu_lock, MA_OWNED); if (intpin->io_irq == IRQ_DISABLED || (intpin->io_irq < NUM_IO_INTS && intpin->io_vector == 0)) { low = ioapic_read(io->io_addr, IOAPIC_REDTBL_LO(intpin->io_intpin)); if ((low & IOART_INTMASK) == IOART_INTMCLR) ioapic_write(io->io_addr, IOAPIC_REDTBL_LO(intpin->io_intpin), low | IOART_INTMSET); #ifdef ACPI_DMAR mtx_unlock_spin(&icu_lock); iommu_unmap_ioapic_intr(io->io_apic_id, &intpin->io_remap_cookie); mtx_lock_spin(&icu_lock); #endif return; } #ifdef ACPI_DMAR mtx_unlock_spin(&icu_lock); error = iommu_map_ioapic_intr(io->io_apic_id, intpin->io_cpu, intpin->io_vector, intpin->io_edgetrigger, intpin->io_activehi, intpin->io_irq, &intpin->io_remap_cookie, &high, &low); mtx_lock_spin(&icu_lock); if (error == 0) { ioapic_write(io->io_addr, IOAPIC_REDTBL_HI(intpin->io_intpin), high); intpin->io_lowreg = low; ioapic_write(io->io_addr, IOAPIC_REDTBL_LO(intpin->io_intpin), low); return; } else if (error != EOPNOTSUPP) { return; } #endif /* Set the destination. */ low = IOART_DESTPHY; high = intpin->io_cpu << APIC_ID_SHIFT; /* Program the rest of the low word. */ if (intpin->io_edgetrigger) low |= IOART_TRGREDG; else low |= IOART_TRGRLVL; if (intpin->io_activehi) low |= IOART_INTAHI; else low |= IOART_INTALO; if (intpin->io_masked) low |= IOART_INTMSET; switch (intpin->io_irq) { case IRQ_EXTINT: KASSERT(intpin->io_edgetrigger, ("ExtINT not edge triggered")); low |= IOART_DELEXINT; break; case IRQ_NMI: KASSERT(intpin->io_edgetrigger, ("NMI not edge triggered")); low |= IOART_DELNMI; break; case IRQ_SMI: KASSERT(intpin->io_edgetrigger, ("SMI not edge triggered")); low |= IOART_DELSMI; break; default: KASSERT(intpin->io_vector != 0, ("No vector for IRQ %u", intpin->io_irq)); low |= IOART_DELFIXED | intpin->io_vector; } /* Write the values to the APIC. */ value = ioapic_read(io->io_addr, IOAPIC_REDTBL_HI(intpin->io_intpin)); value &= ~IOART_DEST; value |= high; ioapic_write(io->io_addr, IOAPIC_REDTBL_HI(intpin->io_intpin), value); intpin->io_lowreg = low; ioapic_write(io->io_addr, IOAPIC_REDTBL_LO(intpin->io_intpin), low); } static void ioapic_reprogram_intpin(struct intsrc *isrc) { mtx_lock_spin(&icu_lock); ioapic_program_intpin((struct ioapic_intsrc *)isrc); mtx_unlock_spin(&icu_lock); } static int ioapic_assign_cpu(struct intsrc *isrc, u_int apic_id) { struct ioapic_intsrc *intpin = (struct ioapic_intsrc *)isrc; struct ioapic *io = (struct ioapic *)isrc->is_pic; u_int old_vector, new_vector; u_int old_id; /* * On Hyper-V: * - Stick to the first cpu for all I/O APIC pins. * - And don't allow destination cpu changes. */ if (vm_guest == VM_GUEST_HV) { if (intpin->io_vector) return (EINVAL); else apic_id = 0; } /* * keep 1st core as the destination for NMI */ if (intpin->io_irq == IRQ_NMI) apic_id = 0; /* * Set us up to free the old irq. */ old_vector = intpin->io_vector; old_id = intpin->io_cpu; if (old_vector && apic_id == old_id) return (0); /* * Allocate an APIC vector for this interrupt pin. Once * we have a vector we program the interrupt pin. */ new_vector = apic_alloc_vector(apic_id, intpin->io_irq); if (new_vector == 0) return (ENOSPC); /* * Mask the old intpin if it is enabled while it is migrated. * * At least some level-triggered interrupts seem to need the * extra DELAY() to avoid being stuck in a non-EOI'd state. */ mtx_lock_spin(&icu_lock); if (!intpin->io_masked && !intpin->io_edgetrigger) { ioapic_write(io->io_addr, IOAPIC_REDTBL_LO(intpin->io_intpin), intpin->io_lowreg | IOART_INTMSET); mtx_unlock_spin(&icu_lock); DELAY(100); mtx_lock_spin(&icu_lock); } intpin->io_cpu = apic_id; intpin->io_vector = new_vector; if (isrc->is_handlers > 0) apic_enable_vector(intpin->io_cpu, intpin->io_vector); if (bootverbose) { printf("ioapic%u: routing intpin %u (", io->io_id, intpin->io_intpin); ioapic_print_irq(intpin); printf(") to lapic %u vector %u\n", intpin->io_cpu, intpin->io_vector); } ioapic_program_intpin(intpin); mtx_unlock_spin(&icu_lock); /* * Free the old vector after the new one is established. This is done * to prevent races where we could miss an interrupt. */ if (old_vector) { if (isrc->is_handlers > 0) apic_disable_vector(old_id, old_vector); apic_free_vector(old_id, old_vector, intpin->io_irq); } return (0); } static void ioapic_enable_intr(struct intsrc *isrc) { struct ioapic_intsrc *intpin = (struct ioapic_intsrc *)isrc; if (intpin->io_vector == 0) if (ioapic_assign_cpu(isrc, intr_next_cpu()) != 0) panic("Couldn't find an APIC vector for IRQ %d", intpin->io_irq); apic_enable_vector(intpin->io_cpu, intpin->io_vector); } static void ioapic_disable_intr(struct intsrc *isrc) { struct ioapic_intsrc *intpin = (struct ioapic_intsrc *)isrc; u_int vector; if (intpin->io_vector != 0) { /* Mask this interrupt pin and free its APIC vector. */ vector = intpin->io_vector; apic_disable_vector(intpin->io_cpu, vector); mtx_lock_spin(&icu_lock); intpin->io_masked = 1; intpin->io_vector = 0; ioapic_program_intpin(intpin); mtx_unlock_spin(&icu_lock); apic_free_vector(intpin->io_cpu, vector, intpin->io_irq); } } static int ioapic_vector(struct intsrc *isrc) { struct ioapic_intsrc *pin; pin = (struct ioapic_intsrc *)isrc; return (pin->io_irq); } static int ioapic_source_pending(struct intsrc *isrc) { struct ioapic_intsrc *intpin = (struct ioapic_intsrc *)isrc; if (intpin->io_vector == 0) return 0; return (lapic_intr_pending(intpin->io_vector)); } static int ioapic_config_intr(struct intsrc *isrc, enum intr_trigger trig, enum intr_polarity pol) { struct ioapic_intsrc *intpin = (struct ioapic_intsrc *)isrc; struct ioapic *io = (struct ioapic *)isrc->is_pic; int changed; KASSERT(!(trig == INTR_TRIGGER_CONFORM || pol == INTR_POLARITY_CONFORM), ("%s: Conforming trigger or polarity\n", __func__)); /* * EISA interrupts always use active high polarity, so don't allow * them to be set to active low. * * XXX: Should we write to the ELCR if the trigger mode changes for * an EISA IRQ or an ISA IRQ with the ELCR present? */ mtx_lock_spin(&icu_lock); if (intpin->io_bus == APIC_BUS_EISA) pol = INTR_POLARITY_HIGH; changed = 0; if (intpin->io_edgetrigger != (trig == INTR_TRIGGER_EDGE)) { if (bootverbose) printf("ioapic%u: Changing trigger for pin %u to %s\n", io->io_id, intpin->io_intpin, trig == INTR_TRIGGER_EDGE ? "edge" : "level"); intpin->io_edgetrigger = (trig == INTR_TRIGGER_EDGE); changed++; } if (intpin->io_activehi != (pol == INTR_POLARITY_HIGH)) { if (bootverbose) printf("ioapic%u: Changing polarity for pin %u to %s\n", io->io_id, intpin->io_intpin, pol == INTR_POLARITY_HIGH ? "high" : "low"); intpin->io_activehi = (pol == INTR_POLARITY_HIGH); changed++; } if (changed) ioapic_program_intpin(intpin); mtx_unlock_spin(&icu_lock); return (0); } static void ioapic_resume(struct pic *pic, bool suspend_cancelled) { struct ioapic *io = (struct ioapic *)pic; int i; mtx_lock_spin(&icu_lock); for (i = 0; i < io->io_numintr; i++) ioapic_program_intpin(&io->io_pins[i]); mtx_unlock_spin(&icu_lock); } /* * Create a plain I/O APIC object. */ void * ioapic_create(vm_paddr_t addr, int32_t apic_id, int intbase) { struct ioapic *io; struct ioapic_intsrc *intpin; volatile ioapic_t *apic; u_int numintr, i; uint32_t value; /* Map the register window so we can access the device. */ apic = pmap_mapdev(addr, IOAPIC_MEM_REGION); mtx_lock_spin(&icu_lock); value = ioapic_read(apic, IOAPIC_VER); mtx_unlock_spin(&icu_lock); /* If it's version register doesn't seem to work, punt. */ if (value == 0xffffffff) { pmap_unmapdev((vm_offset_t)apic, IOAPIC_MEM_REGION); return (NULL); } /* Determine the number of vectors and set the APIC ID. */ numintr = ((value & IOART_VER_MAXREDIR) >> MAXREDIRSHIFT) + 1; io = malloc(sizeof(struct ioapic) + numintr * sizeof(struct ioapic_intsrc), M_IOAPIC, M_WAITOK); io->io_pic = ioapic_template; io->pci_dev = NULL; io->pci_wnd = NULL; mtx_lock_spin(&icu_lock); io->io_id = next_id++; io->io_apic_id = ioapic_read(apic, IOAPIC_ID) >> APIC_ID_SHIFT; if (apic_id != -1 && io->io_apic_id != apic_id) { ioapic_write(apic, IOAPIC_ID, apic_id << APIC_ID_SHIFT); mtx_unlock_spin(&icu_lock); io->io_apic_id = apic_id; printf("ioapic%u: Changing APIC ID to %d\n", io->io_id, apic_id); } else mtx_unlock_spin(&icu_lock); if (intbase == -1) { intbase = next_ioapic_base; printf("ioapic%u: Assuming intbase of %d\n", io->io_id, intbase); } else if (intbase != next_ioapic_base && bootverbose) printf("ioapic%u: WARNING: intbase %d != expected base %d\n", io->io_id, intbase, next_ioapic_base); io->io_intbase = intbase; next_ioapic_base = intbase + numintr; io->io_numintr = numintr; io->io_addr = apic; io->io_paddr = addr; if (bootverbose) { printf("ioapic%u: ver 0x%02x maxredir 0x%02x\n", io->io_id, (value & IOART_VER_VERSION), (value & IOART_VER_MAXREDIR) >> MAXREDIRSHIFT); } /* * The summary information about IO-APIC versions is taken from * the Linux kernel source: * 0Xh 82489DX * 1Xh I/OAPIC or I/O(x)APIC which are not PCI 2.2 Compliant * 2Xh I/O(x)APIC which is PCI 2.2 Compliant * 30h-FFh Reserved * IO-APICs with version >= 0x20 have working EOIR register. */ io->io_haseoi = (value & IOART_VER_VERSION) >= 0x20; /* * Initialize pins. Start off with interrupts disabled. Default * to active-hi and edge-triggered for ISA interrupts and active-lo * and level-triggered for all others. */ bzero(io->io_pins, sizeof(struct ioapic_intsrc) * numintr); mtx_lock_spin(&icu_lock); for (i = 0, intpin = io->io_pins; i < numintr; i++, intpin++) { intpin->io_intsrc.is_pic = (struct pic *)io; intpin->io_intpin = i; intpin->io_irq = intbase + i; /* * Assume that pin 0 on the first I/O APIC is an ExtINT pin. * Assume that pins 1-15 are ISA interrupts and that all * other pins are PCI interrupts. */ if (intpin->io_irq == 0) ioapic_set_extint(io, i); else if (intpin->io_irq < IOAPIC_ISA_INTS) { intpin->io_bus = APIC_BUS_ISA; intpin->io_activehi = 1; intpin->io_edgetrigger = 1; intpin->io_masked = 1; } else { intpin->io_bus = APIC_BUS_PCI; intpin->io_activehi = 0; intpin->io_edgetrigger = 0; intpin->io_masked = 1; } /* * Route interrupts to the BSP by default. Interrupts may * be routed to other CPUs later after they are enabled. */ intpin->io_cpu = PCPU_GET(apic_id); value = ioapic_read(apic, IOAPIC_REDTBL_LO(i)); ioapic_write(apic, IOAPIC_REDTBL_LO(i), value | IOART_INTMSET); #ifdef ACPI_DMAR /* dummy, but sets cookie */ mtx_unlock_spin(&icu_lock); iommu_map_ioapic_intr(io->io_apic_id, intpin->io_cpu, intpin->io_vector, intpin->io_edgetrigger, intpin->io_activehi, intpin->io_irq, &intpin->io_remap_cookie, NULL, NULL); mtx_lock_spin(&icu_lock); #endif } mtx_unlock_spin(&icu_lock); return (io); } int ioapic_get_vector(void *cookie, u_int pin) { struct ioapic *io; io = (struct ioapic *)cookie; if (pin >= io->io_numintr) return (-1); return (io->io_pins[pin].io_irq); } int ioapic_disable_pin(void *cookie, u_int pin) { struct ioapic *io; io = (struct ioapic *)cookie; if (pin >= io->io_numintr) return (EINVAL); if (io->io_pins[pin].io_irq == IRQ_DISABLED) return (EINVAL); io->io_pins[pin].io_irq = IRQ_DISABLED; if (bootverbose) printf("ioapic%u: intpin %d disabled\n", io->io_id, pin); return (0); } int ioapic_remap_vector(void *cookie, u_int pin, int vector) { struct ioapic *io; io = (struct ioapic *)cookie; if (pin >= io->io_numintr || vector < 0) return (EINVAL); if (io->io_pins[pin].io_irq >= NUM_IO_INTS) return (EINVAL); io->io_pins[pin].io_irq = vector; if (bootverbose) printf("ioapic%u: Routing IRQ %d -> intpin %d\n", io->io_id, vector, pin); return (0); } int ioapic_set_bus(void *cookie, u_int pin, int bus_type) { struct ioapic *io; if (bus_type < 0 || bus_type > APIC_BUS_MAX) return (EINVAL); io = (struct ioapic *)cookie; if (pin >= io->io_numintr) return (EINVAL); if (io->io_pins[pin].io_irq >= NUM_IO_INTS) return (EINVAL); if (io->io_pins[pin].io_bus == bus_type) return (0); io->io_pins[pin].io_bus = bus_type; if (bootverbose) printf("ioapic%u: intpin %d bus %s\n", io->io_id, pin, ioapic_bus_string(bus_type)); return (0); } int ioapic_set_nmi(void *cookie, u_int pin) { struct ioapic *io; io = (struct ioapic *)cookie; if (pin >= io->io_numintr) return (EINVAL); if (io->io_pins[pin].io_irq == IRQ_NMI) return (0); if (io->io_pins[pin].io_irq >= NUM_IO_INTS) return (EINVAL); io->io_pins[pin].io_bus = APIC_BUS_UNKNOWN; io->io_pins[pin].io_irq = IRQ_NMI; io->io_pins[pin].io_masked = 0; io->io_pins[pin].io_edgetrigger = 1; io->io_pins[pin].io_activehi = 1; if (bootverbose) printf("ioapic%u: Routing NMI -> intpin %d\n", io->io_id, pin); return (0); } int ioapic_set_smi(void *cookie, u_int pin) { struct ioapic *io; io = (struct ioapic *)cookie; if (pin >= io->io_numintr) return (EINVAL); if (io->io_pins[pin].io_irq == IRQ_SMI) return (0); if (io->io_pins[pin].io_irq >= NUM_IO_INTS) return (EINVAL); io->io_pins[pin].io_bus = APIC_BUS_UNKNOWN; io->io_pins[pin].io_irq = IRQ_SMI; io->io_pins[pin].io_masked = 0; io->io_pins[pin].io_edgetrigger = 1; io->io_pins[pin].io_activehi = 1; if (bootverbose) printf("ioapic%u: Routing SMI -> intpin %d\n", io->io_id, pin); return (0); } int ioapic_set_extint(void *cookie, u_int pin) { struct ioapic *io; io = (struct ioapic *)cookie; if (pin >= io->io_numintr) return (EINVAL); if (io->io_pins[pin].io_irq == IRQ_EXTINT) return (0); if (io->io_pins[pin].io_irq >= NUM_IO_INTS) return (EINVAL); io->io_pins[pin].io_bus = APIC_BUS_UNKNOWN; io->io_pins[pin].io_irq = IRQ_EXTINT; if (enable_extint) io->io_pins[pin].io_masked = 0; else io->io_pins[pin].io_masked = 1; io->io_pins[pin].io_edgetrigger = 1; io->io_pins[pin].io_activehi = 1; if (bootverbose) printf("ioapic%u: Routing external 8259A's -> intpin %d\n", io->io_id, pin); return (0); } int ioapic_set_polarity(void *cookie, u_int pin, enum intr_polarity pol) { struct ioapic *io; int activehi; io = (struct ioapic *)cookie; if (pin >= io->io_numintr || pol == INTR_POLARITY_CONFORM) return (EINVAL); if (io->io_pins[pin].io_irq >= NUM_IO_INTS) return (EINVAL); activehi = (pol == INTR_POLARITY_HIGH); if (io->io_pins[pin].io_activehi == activehi) return (0); io->io_pins[pin].io_activehi = activehi; if (bootverbose) printf("ioapic%u: intpin %d polarity: %s\n", io->io_id, pin, pol == INTR_POLARITY_HIGH ? "high" : "low"); return (0); } int ioapic_set_triggermode(void *cookie, u_int pin, enum intr_trigger trigger) { struct ioapic *io; int edgetrigger; io = (struct ioapic *)cookie; if (pin >= io->io_numintr || trigger == INTR_TRIGGER_CONFORM) return (EINVAL); if (io->io_pins[pin].io_irq >= NUM_IO_INTS) return (EINVAL); edgetrigger = (trigger == INTR_TRIGGER_EDGE); if (io->io_pins[pin].io_edgetrigger == edgetrigger) return (0); io->io_pins[pin].io_edgetrigger = edgetrigger; if (bootverbose) printf("ioapic%u: intpin %d trigger: %s\n", io->io_id, pin, trigger == INTR_TRIGGER_EDGE ? "edge" : "level"); return (0); } /* * Register a complete I/O APIC object with the interrupt subsystem. */ void ioapic_register(void *cookie) { struct ioapic_intsrc *pin; struct ioapic *io; volatile ioapic_t *apic; uint32_t flags; int i; io = (struct ioapic *)cookie; apic = io->io_addr; mtx_lock_spin(&icu_lock); flags = ioapic_read(apic, IOAPIC_VER) & IOART_VER_VERSION; STAILQ_INSERT_TAIL(&ioapic_list, io, io_next); mtx_unlock_spin(&icu_lock); printf("ioapic%u irqs %u-%u on motherboard\n", io->io_id, flags >> 4, flags & 0xf, io->io_intbase, io->io_intbase + io->io_numintr - 1); /* * Reprogram pins to handle special case pins (such as NMI and * SMI) and register valid pins as interrupt sources. */ intr_register_pic(&io->io_pic); for (i = 0, pin = io->io_pins; i < io->io_numintr; i++, pin++) { ioapic_reprogram_intpin(&pin->io_intsrc); if (pin->io_irq < NUM_IO_INTS) intr_register_source(&pin->io_intsrc); } } /* A simple new-bus driver to consume PCI I/O APIC devices. */ static int ioapic_pci_probe(device_t dev) { if (pci_get_class(dev) == PCIC_BASEPERIPH && pci_get_subclass(dev) == PCIS_BASEPERIPH_PIC) { switch (pci_get_progif(dev)) { case PCIP_BASEPERIPH_PIC_IO_APIC: device_set_desc(dev, "IO APIC"); break; case PCIP_BASEPERIPH_PIC_IOX_APIC: device_set_desc(dev, "IO(x) APIC"); break; default: return (ENXIO); } device_quiet(dev); return (-10000); } return (ENXIO); } static int ioapic_pci_attach(device_t dev) { struct resource *res; volatile ioapic_t *apic; struct ioapic *io; int rid; u_int apic_id; /* * Try to match the enumerated ioapic. Match BAR start * against io_paddr. Due to a fear that PCI window is not the * same as the MADT reported io window, but an alias, read the * APIC ID from the mapped BAR and match against it. */ rid = PCIR_BAR(0); res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE | RF_SHAREABLE); if (res == NULL) { if (bootverbose) device_printf(dev, "cannot activate BAR0\n"); return (ENXIO); } apic = (volatile ioapic_t *)rman_get_virtual(res); if (rman_get_size(res) < IOAPIC_WND_SIZE) { if (bootverbose) device_printf(dev, "BAR0 too small (%jd) for IOAPIC window\n", (uintmax_t)rman_get_size(res)); goto fail; } mtx_lock_spin(&icu_lock); apic_id = ioapic_read(apic, IOAPIC_ID) >> APIC_ID_SHIFT; /* First match by io window address */ STAILQ_FOREACH(io, &ioapic_list, io_next) { if (io->io_paddr == (vm_paddr_t)rman_get_start(res)) goto found; } /* Then by apic id */ STAILQ_FOREACH(io, &ioapic_list, io_next) { if (io->io_apic_id == apic_id) goto found; } mtx_unlock_spin(&icu_lock); if (bootverbose) device_printf(dev, "cannot match pci bar apic id %d against MADT\n", apic_id); fail: bus_release_resource(dev, SYS_RES_MEMORY, rid, res); return (ENXIO); found: KASSERT(io->pci_dev == NULL, ("ioapic %d pci_dev not NULL", io->io_id)); KASSERT(io->pci_wnd == NULL, ("ioapic %d pci_wnd not NULL", io->io_id)); io->pci_dev = dev; io->pci_wnd = res; if (bootverbose && (io->io_paddr != (vm_paddr_t)rman_get_start(res) || io->io_apic_id != apic_id)) { device_printf(dev, "pci%d:%d:%d:%d pci BAR0@%jx id %d " "MADT id %d paddr@%jx\n", pci_get_domain(dev), pci_get_bus(dev), pci_get_slot(dev), pci_get_function(dev), (uintmax_t)rman_get_start(res), apic_id, io->io_apic_id, (uintmax_t)io->io_paddr); } mtx_unlock_spin(&icu_lock); return (0); } static device_method_t ioapic_pci_methods[] = { /* Device interface */ DEVMETHOD(device_probe, ioapic_pci_probe), DEVMETHOD(device_attach, ioapic_pci_attach), { 0, 0 } }; DEFINE_CLASS_0(ioapic, ioapic_pci_driver, ioapic_pci_methods, 0); static devclass_t ioapic_devclass; DRIVER_MODULE(ioapic, pci, ioapic_pci_driver, ioapic_devclass, 0, 0); int ioapic_get_rid(u_int apic_id, uint16_t *ridp) { struct ioapic *io; uintptr_t rid; int error; mtx_lock_spin(&icu_lock); STAILQ_FOREACH(io, &ioapic_list, io_next) { if (io->io_apic_id == apic_id) break; } mtx_unlock_spin(&icu_lock); if (io == NULL || io->pci_dev == NULL) return (EINVAL); error = pci_get_id(io->pci_dev, PCI_ID_RID, &rid); if (error != 0) return (error); *ridp = rid; return (0); } /* * A new-bus driver to consume the memory resources associated with * the APICs in the system. On some systems ACPI or PnPBIOS system * resource devices may already claim these resources. To keep from * breaking those devices, we attach ourself to the nexus device after * legacy0 and acpi0 and ignore any allocation failures. */ static void apic_identify(driver_t *driver, device_t parent) { /* * Add at order 12. acpi0 is probed at order 10 and legacy0 * is probed at order 11. */ if (lapic_paddr != 0) BUS_ADD_CHILD(parent, 12, "apic", 0); } static int apic_probe(device_t dev) { device_set_desc(dev, "APIC resources"); device_quiet(dev); return (0); } static void apic_add_resource(device_t dev, int rid, vm_paddr_t base, size_t length) { int error; error = bus_set_resource(dev, SYS_RES_MEMORY, rid, base, length); if (error) panic("apic_add_resource: resource %d failed set with %d", rid, error); bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_SHAREABLE); } static int apic_attach(device_t dev) { struct ioapic *io; int i; /* Reserve the local APIC. */ apic_add_resource(dev, 0, lapic_paddr, LAPIC_MEM_REGION); i = 1; STAILQ_FOREACH(io, &ioapic_list, io_next) { apic_add_resource(dev, i, io->io_paddr, IOAPIC_MEM_REGION); i++; } return (0); } static device_method_t apic_methods[] = { /* Device interface */ DEVMETHOD(device_identify, apic_identify), DEVMETHOD(device_probe, apic_probe), DEVMETHOD(device_attach, apic_attach), { 0, 0 } }; DEFINE_CLASS_0(apic, apic_driver, apic_methods, 0); static devclass_t apic_devclass; DRIVER_MODULE(apic, nexus, apic_driver, apic_devclass, 0, 0); #include "opt_ddb.h" #ifdef DDB #include static const char * ioapic_delivery_mode(uint32_t mode) { switch (mode) { case IOART_DELFIXED: return ("fixed"); case IOART_DELLOPRI: return ("lowestpri"); case IOART_DELSMI: return ("SMI"); case IOART_DELRSV1: return ("rsrvd1"); case IOART_DELNMI: return ("NMI"); case IOART_DELINIT: return ("INIT"); case IOART_DELRSV2: return ("rsrvd2"); case IOART_DELEXINT: return ("ExtINT"); default: return (""); } } static u_int db_ioapic_read(volatile ioapic_t *apic, int reg) { apic->ioregsel = reg; return (apic->iowin); } static void db_show_ioapic_one(volatile ioapic_t *io_addr) { uint32_t r, lo, hi; int mre, i; r = db_ioapic_read(io_addr, IOAPIC_VER); mre = (r & IOART_VER_MAXREDIR) >> MAXREDIRSHIFT; db_printf("Id 0x%08x Ver 0x%02x MRE %d\n", db_ioapic_read(io_addr, IOAPIC_ID), r & IOART_VER_VERSION, mre); for (i = 0; i < mre; i++) { lo = db_ioapic_read(io_addr, IOAPIC_REDTBL_LO(i)); hi = db_ioapic_read(io_addr, IOAPIC_REDTBL_HI(i)); db_printf(" pin %d Dest %s/%x %smasked Trig %s RemoteIRR %d " "Polarity %s Status %s DeliveryMode %s Vec %d\n", i, (lo & IOART_DESTMOD) == IOART_DESTLOG ? "log" : "phy", (hi & IOART_DEST) >> 24, (lo & IOART_INTMASK) == IOART_INTMSET ? "" : "not", (lo & IOART_TRGRMOD) == IOART_TRGRLVL ? "lvl" : "edge", (lo & IOART_REM_IRR) == IOART_REM_IRR ? 1 : 0, (lo & IOART_INTPOL) == IOART_INTALO ? "low" : "high", (lo & IOART_DELIVS) == IOART_DELIVS ? "pend" : "idle", ioapic_delivery_mode(lo & IOART_DELMOD), (lo & IOART_INTVEC)); } } DB_SHOW_COMMAND(ioapic, db_show_ioapic) { struct ioapic *ioapic; int idx, i; if (!have_addr) { db_printf("usage: show ioapic index\n"); return; } idx = (int)addr; i = 0; STAILQ_FOREACH(ioapic, &ioapic_list, io_next) { if (idx == i) { db_show_ioapic_one(ioapic->io_addr); break; } i++; } } DB_SHOW_ALL_COMMAND(ioapics, db_show_all_ioapics) { struct ioapic *ioapic; STAILQ_FOREACH(ioapic, &ioapic_list, io_next) db_show_ioapic_one(ioapic->io_addr); } #endif Index: head/sys/x86/x86/local_apic.c =================================================================== --- head/sys/x86/x86/local_apic.c (revision 326262) +++ head/sys/x86/x86/local_apic.c (revision 326263) @@ -1,2128 +1,2130 @@ /*- + * SPDX-License-Identifier: BSD-3-Clause + * * Copyright (c) 2003 John Baldwin * Copyright (c) 1996, by Steve Passe * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. The name of the developer may NOT be used to endorse or promote products * derived from this software without specific prior written permission. * 3. Neither the name of the author nor the names of any co-contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * Local APIC support on Pentium and later processors. */ #include __FBSDID("$FreeBSD$"); #include "opt_atpic.h" #include "opt_hwpmc_hooks.h" #include "opt_ddb.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef DDB #include #include #endif #ifdef __amd64__ #define SDT_APIC SDT_SYSIGT #define SDT_APICT SDT_SYSIGT #define GSEL_APIC 0 #else #define SDT_APIC SDT_SYS386IGT #define SDT_APICT SDT_SYS386TGT #define GSEL_APIC GSEL(GCODE_SEL, SEL_KPL) #endif static MALLOC_DEFINE(M_LAPIC, "local_apic", "Local APIC items"); /* Sanity checks on IDT vectors. */ CTASSERT(APIC_IO_INTS + APIC_NUM_IOINTS == APIC_TIMER_INT); CTASSERT(APIC_TIMER_INT < APIC_LOCAL_INTS); CTASSERT(APIC_LOCAL_INTS == 240); CTASSERT(IPI_STOP < APIC_SPURIOUS_INT); /* Magic IRQ values for the timer and syscalls. */ #define IRQ_TIMER (NUM_IO_INTS + 1) #define IRQ_SYSCALL (NUM_IO_INTS + 2) #define IRQ_DTRACE_RET (NUM_IO_INTS + 3) #define IRQ_EVTCHN (NUM_IO_INTS + 4) enum lat_timer_mode { LAT_MODE_UNDEF = 0, LAT_MODE_PERIODIC = 1, LAT_MODE_ONESHOT = 2, LAT_MODE_DEADLINE = 3, }; /* * Support for local APICs. Local APICs manage interrupts on each * individual processor as opposed to I/O APICs which receive interrupts * from I/O devices and then forward them on to the local APICs. * * Local APICs can also send interrupts to each other thus providing the * mechanism for IPIs. */ struct lvt { u_int lvt_edgetrigger:1; u_int lvt_activehi:1; u_int lvt_masked:1; u_int lvt_active:1; u_int lvt_mode:16; u_int lvt_vector:8; }; struct lapic { struct lvt la_lvts[APIC_LVT_MAX + 1]; struct lvt la_elvts[APIC_ELVT_MAX + 1];; u_int la_id:8; u_int la_cluster:4; u_int la_cluster_id:2; u_int la_present:1; u_long *la_timer_count; uint64_t la_timer_period; enum lat_timer_mode la_timer_mode; uint32_t lvt_timer_base; uint32_t lvt_timer_last; /* Include IDT_SYSCALL to make indexing easier. */ int la_ioint_irqs[APIC_NUM_IOINTS + 1]; } static *lapics; /* Global defaults for local APIC LVT entries. */ static struct lvt lvts[APIC_LVT_MAX + 1] = { { 1, 1, 1, 1, APIC_LVT_DM_EXTINT, 0 }, /* LINT0: masked ExtINT */ { 1, 1, 0, 1, APIC_LVT_DM_NMI, 0 }, /* LINT1: NMI */ { 1, 1, 1, 1, APIC_LVT_DM_FIXED, APIC_TIMER_INT }, /* Timer */ { 1, 1, 0, 1, APIC_LVT_DM_FIXED, APIC_ERROR_INT }, /* Error */ { 1, 1, 1, 1, APIC_LVT_DM_NMI, 0 }, /* PMC */ { 1, 1, 1, 1, APIC_LVT_DM_FIXED, APIC_THERMAL_INT }, /* Thermal */ { 1, 1, 1, 1, APIC_LVT_DM_FIXED, APIC_CMC_INT }, /* CMCI */ }; /* Global defaults for AMD local APIC ELVT entries. */ static struct lvt elvts[APIC_ELVT_MAX + 1] = { { 1, 1, 1, 0, APIC_LVT_DM_FIXED, 0 }, { 1, 1, 1, 0, APIC_LVT_DM_FIXED, APIC_CMC_INT }, { 1, 1, 1, 0, APIC_LVT_DM_FIXED, 0 }, { 1, 1, 1, 0, APIC_LVT_DM_FIXED, 0 }, }; static inthand_t *ioint_handlers[] = { NULL, /* 0 - 31 */ IDTVEC(apic_isr1), /* 32 - 63 */ IDTVEC(apic_isr2), /* 64 - 95 */ IDTVEC(apic_isr3), /* 96 - 127 */ IDTVEC(apic_isr4), /* 128 - 159 */ IDTVEC(apic_isr5), /* 160 - 191 */ IDTVEC(apic_isr6), /* 192 - 223 */ IDTVEC(apic_isr7), /* 224 - 255 */ }; static u_int32_t lapic_timer_divisors[] = { APIC_TDCR_1, APIC_TDCR_2, APIC_TDCR_4, APIC_TDCR_8, APIC_TDCR_16, APIC_TDCR_32, APIC_TDCR_64, APIC_TDCR_128 }; extern inthand_t IDTVEC(rsvd); volatile char *lapic_map; vm_paddr_t lapic_paddr; int x2apic_mode; int lapic_eoi_suppression; static int lapic_timer_tsc_deadline; static u_long lapic_timer_divisor, count_freq; static struct eventtimer lapic_et; #ifdef SMP static uint64_t lapic_ipi_wait_mult; #endif unsigned int max_apic_id; SYSCTL_NODE(_hw, OID_AUTO, apic, CTLFLAG_RD, 0, "APIC options"); SYSCTL_INT(_hw_apic, OID_AUTO, x2apic_mode, CTLFLAG_RD, &x2apic_mode, 0, ""); SYSCTL_INT(_hw_apic, OID_AUTO, eoi_suppression, CTLFLAG_RD, &lapic_eoi_suppression, 0, ""); SYSCTL_INT(_hw_apic, OID_AUTO, timer_tsc_deadline, CTLFLAG_RD, &lapic_timer_tsc_deadline, 0, ""); static uint32_t lapic_read32(enum LAPIC_REGISTERS reg) { uint32_t res; if (x2apic_mode) { res = rdmsr32(MSR_APIC_000 + reg); } else { res = *(volatile uint32_t *)(lapic_map + reg * LAPIC_MEM_MUL); } return (res); } static void lapic_write32(enum LAPIC_REGISTERS reg, uint32_t val) { if (x2apic_mode) { mfence(); wrmsr(MSR_APIC_000 + reg, val); } else { *(volatile uint32_t *)(lapic_map + reg * LAPIC_MEM_MUL) = val; } } static void lapic_write32_nofence(enum LAPIC_REGISTERS reg, uint32_t val) { if (x2apic_mode) { wrmsr(MSR_APIC_000 + reg, val); } else { *(volatile uint32_t *)(lapic_map + reg * LAPIC_MEM_MUL) = val; } } #ifdef SMP static uint64_t lapic_read_icr(void) { uint64_t v; uint32_t vhi, vlo; if (x2apic_mode) { v = rdmsr(MSR_APIC_000 + LAPIC_ICR_LO); } else { vhi = lapic_read32(LAPIC_ICR_HI); vlo = lapic_read32(LAPIC_ICR_LO); v = ((uint64_t)vhi << 32) | vlo; } return (v); } static uint64_t lapic_read_icr_lo(void) { return (lapic_read32(LAPIC_ICR_LO)); } static void lapic_write_icr(uint32_t vhi, uint32_t vlo) { uint64_t v; if (x2apic_mode) { v = ((uint64_t)vhi << 32) | vlo; mfence(); wrmsr(MSR_APIC_000 + LAPIC_ICR_LO, v); } else { lapic_write32(LAPIC_ICR_HI, vhi); lapic_write32(LAPIC_ICR_LO, vlo); } } #endif /* SMP */ static void native_lapic_enable_x2apic(void) { uint64_t apic_base; apic_base = rdmsr(MSR_APICBASE); apic_base |= APICBASE_X2APIC | APICBASE_ENABLED; wrmsr(MSR_APICBASE, apic_base); } static bool native_lapic_is_x2apic(void) { uint64_t apic_base; apic_base = rdmsr(MSR_APICBASE); return ((apic_base & (APICBASE_X2APIC | APICBASE_ENABLED)) == (APICBASE_X2APIC | APICBASE_ENABLED)); } static void lapic_enable(void); static void lapic_resume(struct pic *pic, bool suspend_cancelled); static void lapic_timer_oneshot(struct lapic *); static void lapic_timer_oneshot_nointr(struct lapic *, uint32_t); static void lapic_timer_periodic(struct lapic *); static void lapic_timer_deadline(struct lapic *); static void lapic_timer_stop(struct lapic *); static void lapic_timer_set_divisor(u_int divisor); static uint32_t lvt_mode(struct lapic *la, u_int pin, uint32_t value); static int lapic_et_start(struct eventtimer *et, sbintime_t first, sbintime_t period); static int lapic_et_stop(struct eventtimer *et); static u_int apic_idt_to_irq(u_int apic_id, u_int vector); static void lapic_set_tpr(u_int vector); struct pic lapic_pic = { .pic_resume = lapic_resume }; /* Forward declarations for apic_ops */ static void native_lapic_create(u_int apic_id, int boot_cpu); static void native_lapic_init(vm_paddr_t addr); static void native_lapic_xapic_mode(void); static void native_lapic_setup(int boot); static void native_lapic_dump(const char *str); static void native_lapic_disable(void); static void native_lapic_eoi(void); static int native_lapic_id(void); static int native_lapic_intr_pending(u_int vector); static u_int native_apic_cpuid(u_int apic_id); static u_int native_apic_alloc_vector(u_int apic_id, u_int irq); static u_int native_apic_alloc_vectors(u_int apic_id, u_int *irqs, u_int count, u_int align); static void native_apic_disable_vector(u_int apic_id, u_int vector); static void native_apic_enable_vector(u_int apic_id, u_int vector); static void native_apic_free_vector(u_int apic_id, u_int vector, u_int irq); static void native_lapic_set_logical_id(u_int apic_id, u_int cluster, u_int cluster_id); static int native_lapic_enable_pmc(void); static void native_lapic_disable_pmc(void); static void native_lapic_reenable_pmc(void); static void native_lapic_enable_cmc(void); static int native_lapic_enable_mca_elvt(void); static int native_lapic_set_lvt_mask(u_int apic_id, u_int lvt, u_char masked); static int native_lapic_set_lvt_mode(u_int apic_id, u_int lvt, uint32_t mode); static int native_lapic_set_lvt_polarity(u_int apic_id, u_int lvt, enum intr_polarity pol); static int native_lapic_set_lvt_triggermode(u_int apic_id, u_int lvt, enum intr_trigger trigger); #ifdef SMP static void native_lapic_ipi_raw(register_t icrlo, u_int dest); static void native_lapic_ipi_vectored(u_int vector, int dest); static int native_lapic_ipi_wait(int delay); #endif /* SMP */ static int native_lapic_ipi_alloc(inthand_t *ipifunc); static void native_lapic_ipi_free(int vector); struct apic_ops apic_ops = { .create = native_lapic_create, .init = native_lapic_init, .xapic_mode = native_lapic_xapic_mode, .is_x2apic = native_lapic_is_x2apic, .setup = native_lapic_setup, .dump = native_lapic_dump, .disable = native_lapic_disable, .eoi = native_lapic_eoi, .id = native_lapic_id, .intr_pending = native_lapic_intr_pending, .set_logical_id = native_lapic_set_logical_id, .cpuid = native_apic_cpuid, .alloc_vector = native_apic_alloc_vector, .alloc_vectors = native_apic_alloc_vectors, .enable_vector = native_apic_enable_vector, .disable_vector = native_apic_disable_vector, .free_vector = native_apic_free_vector, .enable_pmc = native_lapic_enable_pmc, .disable_pmc = native_lapic_disable_pmc, .reenable_pmc = native_lapic_reenable_pmc, .enable_cmc = native_lapic_enable_cmc, .enable_mca_elvt = native_lapic_enable_mca_elvt, #ifdef SMP .ipi_raw = native_lapic_ipi_raw, .ipi_vectored = native_lapic_ipi_vectored, .ipi_wait = native_lapic_ipi_wait, #endif .ipi_alloc = native_lapic_ipi_alloc, .ipi_free = native_lapic_ipi_free, .set_lvt_mask = native_lapic_set_lvt_mask, .set_lvt_mode = native_lapic_set_lvt_mode, .set_lvt_polarity = native_lapic_set_lvt_polarity, .set_lvt_triggermode = native_lapic_set_lvt_triggermode, }; static uint32_t lvt_mode_impl(struct lapic *la, struct lvt *lvt, u_int pin, uint32_t value) { value &= ~(APIC_LVT_M | APIC_LVT_TM | APIC_LVT_IIPP | APIC_LVT_DM | APIC_LVT_VECTOR); if (lvt->lvt_edgetrigger == 0) value |= APIC_LVT_TM; if (lvt->lvt_activehi == 0) value |= APIC_LVT_IIPP_INTALO; if (lvt->lvt_masked) value |= APIC_LVT_M; value |= lvt->lvt_mode; switch (lvt->lvt_mode) { case APIC_LVT_DM_NMI: case APIC_LVT_DM_SMI: case APIC_LVT_DM_INIT: case APIC_LVT_DM_EXTINT: if (!lvt->lvt_edgetrigger && bootverbose) { printf("lapic%u: Forcing LINT%u to edge trigger\n", la->la_id, pin); value &= ~APIC_LVT_TM; } /* Use a vector of 0. */ break; case APIC_LVT_DM_FIXED: value |= lvt->lvt_vector; break; default: panic("bad APIC LVT delivery mode: %#x\n", value); } return (value); } static uint32_t lvt_mode(struct lapic *la, u_int pin, uint32_t value) { struct lvt *lvt; KASSERT(pin <= APIC_LVT_MAX, ("%s: pin %u out of range", __func__, pin)); if (la->la_lvts[pin].lvt_active) lvt = &la->la_lvts[pin]; else lvt = &lvts[pin]; return (lvt_mode_impl(la, lvt, pin, value)); } static uint32_t elvt_mode(struct lapic *la, u_int idx, uint32_t value) { struct lvt *elvt; KASSERT(idx <= APIC_ELVT_MAX, ("%s: idx %u out of range", __func__, idx)); elvt = &la->la_elvts[idx]; KASSERT(elvt->lvt_active, ("%s: ELVT%u is not active", __func__, idx)); KASSERT(elvt->lvt_edgetrigger, ("%s: ELVT%u is not edge triggered", __func__, idx)); KASSERT(elvt->lvt_activehi, ("%s: ELVT%u is not active high", __func__, idx)); return (lvt_mode_impl(la, elvt, idx, value)); } /* * Map the local APIC and setup necessary interrupt vectors. */ static void native_lapic_init(vm_paddr_t addr) { #ifdef SMP uint64_t r, r1, r2, rx; #endif uint32_t ver; u_int regs[4]; int i, arat; /* * Enable x2APIC mode if possible. Map the local APIC * registers page. * * Keep the LAPIC registers page mapped uncached for x2APIC * mode too, to have direct map page attribute set to * uncached. This is needed to work around CPU errata present * on all Intel processors. */ KASSERT(trunc_page(addr) == addr, ("local APIC not aligned on a page boundary")); lapic_paddr = addr; lapic_map = pmap_mapdev(addr, PAGE_SIZE); if (x2apic_mode) { native_lapic_enable_x2apic(); lapic_map = NULL; } /* Setup the spurious interrupt handler. */ setidt(APIC_SPURIOUS_INT, IDTVEC(spuriousint), SDT_APIC, SEL_KPL, GSEL_APIC); /* Perform basic initialization of the BSP's local APIC. */ lapic_enable(); /* Set BSP's per-CPU local APIC ID. */ PCPU_SET(apic_id, lapic_id()); /* Local APIC timer interrupt. */ setidt(APIC_TIMER_INT, IDTVEC(timerint), SDT_APIC, SEL_KPL, GSEL_APIC); /* Local APIC error interrupt. */ setidt(APIC_ERROR_INT, IDTVEC(errorint), SDT_APIC, SEL_KPL, GSEL_APIC); /* XXX: Thermal interrupt */ /* Local APIC CMCI. */ setidt(APIC_CMC_INT, IDTVEC(cmcint), SDT_APICT, SEL_KPL, GSEL_APIC); if ((resource_int_value("apic", 0, "clock", &i) != 0 || i != 0)) { arat = 0; /* Intel CPUID 0x06 EAX[2] set if APIC timer runs in C3. */ if (cpu_vendor_id == CPU_VENDOR_INTEL && cpu_high >= 6) { do_cpuid(0x06, regs); if ((regs[0] & CPUTPM1_ARAT) != 0) arat = 1; } bzero(&lapic_et, sizeof(lapic_et)); lapic_et.et_name = "LAPIC"; lapic_et.et_flags = ET_FLAGS_PERIODIC | ET_FLAGS_ONESHOT | ET_FLAGS_PERCPU; lapic_et.et_quality = 600; if (!arat) { lapic_et.et_flags |= ET_FLAGS_C3STOP; lapic_et.et_quality = 100; } if ((cpu_feature & CPUID_TSC) != 0 && (cpu_feature2 & CPUID2_TSCDLT) != 0 && tsc_is_invariant && tsc_freq != 0) { lapic_timer_tsc_deadline = 1; TUNABLE_INT_FETCH("hw.lapic_tsc_deadline", &lapic_timer_tsc_deadline); } lapic_et.et_frequency = 0; /* We don't know frequency yet, so trying to guess. */ lapic_et.et_min_period = 0x00001000LL; lapic_et.et_max_period = SBT_1S; lapic_et.et_start = lapic_et_start; lapic_et.et_stop = lapic_et_stop; lapic_et.et_priv = NULL; et_register(&lapic_et); } /* * Set lapic_eoi_suppression after lapic_enable(), to not * enable suppression in the hardware prematurely. Note that * we by default enable suppression even when system only has * one IO-APIC, since EOI is broadcasted to all APIC agents, * including CPUs, otherwise. * * It seems that at least some KVM versions report * EOI_SUPPRESSION bit, but auto-EOI does not work. */ ver = lapic_read32(LAPIC_VERSION); if ((ver & APIC_VER_EOI_SUPPRESSION) != 0) { lapic_eoi_suppression = 1; if (vm_guest == VM_GUEST_KVM) { if (bootverbose) printf( "KVM -- disabling lapic eoi suppression\n"); lapic_eoi_suppression = 0; } TUNABLE_INT_FETCH("hw.lapic_eoi_suppression", &lapic_eoi_suppression); } #ifdef SMP #define LOOPS 100000 /* * Calibrate the busy loop waiting for IPI ack in xAPIC mode. * lapic_ipi_wait_mult contains the number of iterations which * approximately delay execution for 1 microsecond (the * argument to native_lapic_ipi_wait() is in microseconds). * * We assume that TSC is present and already measured. * Possible TSC frequency jumps are irrelevant to the * calibration loop below, the CPU clock management code is * not yet started, and we do not enter sleep states. */ KASSERT((cpu_feature & CPUID_TSC) != 0 && tsc_freq != 0, ("TSC not initialized")); if (!x2apic_mode) { r = rdtsc(); for (rx = 0; rx < LOOPS; rx++) { (void)lapic_read_icr_lo(); ia32_pause(); } r = rdtsc() - r; r1 = tsc_freq * LOOPS; r2 = r * 1000000; lapic_ipi_wait_mult = r1 >= r2 ? r1 / r2 : 1; if (bootverbose) { printf("LAPIC: ipi_wait() us multiplier %ju (r %ju " "tsc %ju)\n", (uintmax_t)lapic_ipi_wait_mult, (uintmax_t)r, (uintmax_t)tsc_freq); } } #undef LOOPS #endif /* SMP */ } /* * Create a local APIC instance. */ static void native_lapic_create(u_int apic_id, int boot_cpu) { int i; if (apic_id > max_apic_id) { printf("APIC: Ignoring local APIC with ID %d\n", apic_id); if (boot_cpu) panic("Can't ignore BSP"); return; } KASSERT(!lapics[apic_id].la_present, ("duplicate local APIC %u", apic_id)); /* * Assume no local LVT overrides and a cluster of 0 and * intra-cluster ID of 0. */ lapics[apic_id].la_present = 1; lapics[apic_id].la_id = apic_id; for (i = 0; i <= APIC_LVT_MAX; i++) { lapics[apic_id].la_lvts[i] = lvts[i]; lapics[apic_id].la_lvts[i].lvt_active = 0; } for (i = 0; i <= APIC_ELVT_MAX; i++) { lapics[apic_id].la_elvts[i] = elvts[i]; lapics[apic_id].la_elvts[i].lvt_active = 0; } for (i = 0; i <= APIC_NUM_IOINTS; i++) lapics[apic_id].la_ioint_irqs[i] = -1; lapics[apic_id].la_ioint_irqs[IDT_SYSCALL - APIC_IO_INTS] = IRQ_SYSCALL; lapics[apic_id].la_ioint_irqs[APIC_TIMER_INT - APIC_IO_INTS] = IRQ_TIMER; #ifdef KDTRACE_HOOKS lapics[apic_id].la_ioint_irqs[IDT_DTRACE_RET - APIC_IO_INTS] = IRQ_DTRACE_RET; #endif #ifdef XENHVM lapics[apic_id].la_ioint_irqs[IDT_EVTCHN - APIC_IO_INTS] = IRQ_EVTCHN; #endif #ifdef SMP cpu_add(apic_id, boot_cpu); #endif } static inline uint32_t amd_read_ext_features(void) { uint32_t version; if (cpu_vendor_id != CPU_VENDOR_AMD) return (0); version = lapic_read32(LAPIC_VERSION); if ((version & APIC_VER_AMD_EXT_SPACE) != 0) return (lapic_read32(LAPIC_EXT_FEATURES)); else return (0); } static inline uint32_t amd_read_elvt_count(void) { uint32_t extf; uint32_t count; extf = amd_read_ext_features(); count = (extf & APIC_EXTF_ELVT_MASK) >> APIC_EXTF_ELVT_SHIFT; count = min(count, APIC_ELVT_MAX + 1); return (count); } /* * Dump contents of local APIC registers */ static void native_lapic_dump(const char* str) { uint32_t version; uint32_t maxlvt; uint32_t extf; int elvt_count; int i; version = lapic_read32(LAPIC_VERSION); maxlvt = (version & APIC_VER_MAXLVT) >> MAXLVTSHIFT; printf("cpu%d %s:\n", PCPU_GET(cpuid), str); printf(" ID: 0x%08x VER: 0x%08x LDR: 0x%08x DFR: 0x%08x", lapic_read32(LAPIC_ID), version, lapic_read32(LAPIC_LDR), x2apic_mode ? 0 : lapic_read32(LAPIC_DFR)); if ((cpu_feature2 & CPUID2_X2APIC) != 0) printf(" x2APIC: %d", x2apic_mode); printf("\n lint0: 0x%08x lint1: 0x%08x TPR: 0x%08x SVR: 0x%08x\n", lapic_read32(LAPIC_LVT_LINT0), lapic_read32(LAPIC_LVT_LINT1), lapic_read32(LAPIC_TPR), lapic_read32(LAPIC_SVR)); printf(" timer: 0x%08x therm: 0x%08x err: 0x%08x", lapic_read32(LAPIC_LVT_TIMER), lapic_read32(LAPIC_LVT_THERMAL), lapic_read32(LAPIC_LVT_ERROR)); if (maxlvt >= APIC_LVT_PMC) printf(" pmc: 0x%08x", lapic_read32(LAPIC_LVT_PCINT)); printf("\n"); if (maxlvt >= APIC_LVT_CMCI) printf(" cmci: 0x%08x\n", lapic_read32(LAPIC_LVT_CMCI)); extf = amd_read_ext_features(); if (extf != 0) { printf(" AMD ext features: 0x%08x\n", extf); elvt_count = amd_read_elvt_count(); for (i = 0; i < elvt_count; i++) printf(" AMD elvt%d: 0x%08x\n", i, lapic_read32(LAPIC_EXT_LVT0 + i)); } } static void native_lapic_xapic_mode(void) { register_t saveintr; saveintr = intr_disable(); if (x2apic_mode) native_lapic_enable_x2apic(); intr_restore(saveintr); } static void native_lapic_setup(int boot) { struct lapic *la; uint32_t version; uint32_t maxlvt; register_t saveintr; char buf[MAXCOMLEN + 1]; int elvt_count; int i; saveintr = intr_disable(); la = &lapics[lapic_id()]; KASSERT(la->la_present, ("missing APIC structure")); version = lapic_read32(LAPIC_VERSION); maxlvt = (version & APIC_VER_MAXLVT) >> MAXLVTSHIFT; /* Initialize the TPR to allow all interrupts. */ lapic_set_tpr(0); /* Setup spurious vector and enable the local APIC. */ lapic_enable(); /* Program LINT[01] LVT entries. */ lapic_write32(LAPIC_LVT_LINT0, lvt_mode(la, APIC_LVT_LINT0, lapic_read32(LAPIC_LVT_LINT0))); lapic_write32(LAPIC_LVT_LINT1, lvt_mode(la, APIC_LVT_LINT1, lapic_read32(LAPIC_LVT_LINT1))); /* Program the PMC LVT entry if present. */ if (maxlvt >= APIC_LVT_PMC) { lapic_write32(LAPIC_LVT_PCINT, lvt_mode(la, APIC_LVT_PMC, LAPIC_LVT_PCINT)); } /* Program timer LVT and setup handler. */ la->lvt_timer_base = lvt_mode(la, APIC_LVT_TIMER, lapic_read32(LAPIC_LVT_TIMER)); la->lvt_timer_last = la->lvt_timer_base; lapic_write32(LAPIC_LVT_TIMER, la->lvt_timer_base); if (boot) { snprintf(buf, sizeof(buf), "cpu%d:timer", PCPU_GET(cpuid)); intrcnt_add(buf, &la->la_timer_count); } /* Setup the timer if configured. */ if (la->la_timer_mode != LAT_MODE_UNDEF) { KASSERT(la->la_timer_period != 0, ("lapic%u: zero divisor", lapic_id())); switch (la->la_timer_mode) { case LAT_MODE_PERIODIC: lapic_timer_set_divisor(lapic_timer_divisor); lapic_timer_periodic(la); break; case LAT_MODE_ONESHOT: lapic_timer_set_divisor(lapic_timer_divisor); lapic_timer_oneshot(la); break; case LAT_MODE_DEADLINE: lapic_timer_deadline(la); break; default: panic("corrupted la_timer_mode %p %d", la, la->la_timer_mode); } } /* Program error LVT and clear any existing errors. */ lapic_write32(LAPIC_LVT_ERROR, lvt_mode(la, APIC_LVT_ERROR, lapic_read32(LAPIC_LVT_ERROR))); lapic_write32(LAPIC_ESR, 0); /* XXX: Thermal LVT */ /* Program the CMCI LVT entry if present. */ if (maxlvt >= APIC_LVT_CMCI) { lapic_write32(LAPIC_LVT_CMCI, lvt_mode(la, APIC_LVT_CMCI, lapic_read32(LAPIC_LVT_CMCI))); } elvt_count = amd_read_elvt_count(); for (i = 0; i < elvt_count; i++) { if (la->la_elvts[i].lvt_active) lapic_write32(LAPIC_EXT_LVT0 + i, elvt_mode(la, i, lapic_read32(LAPIC_EXT_LVT0 + i))); } intr_restore(saveintr); } static void native_lapic_reenable_pmc(void) { #ifdef HWPMC_HOOKS uint32_t value; value = lapic_read32(LAPIC_LVT_PCINT); value &= ~APIC_LVT_M; lapic_write32(LAPIC_LVT_PCINT, value); #endif } #ifdef HWPMC_HOOKS static void lapic_update_pmc(void *dummy) { struct lapic *la; la = &lapics[lapic_id()]; lapic_write32(LAPIC_LVT_PCINT, lvt_mode(la, APIC_LVT_PMC, lapic_read32(LAPIC_LVT_PCINT))); } #endif static int native_lapic_enable_pmc(void) { #ifdef HWPMC_HOOKS u_int32_t maxlvt; /* Fail if the local APIC is not present. */ if (!x2apic_mode && lapic_map == NULL) return (0); /* Fail if the PMC LVT is not present. */ maxlvt = (lapic_read32(LAPIC_VERSION) & APIC_VER_MAXLVT) >> MAXLVTSHIFT; if (maxlvt < APIC_LVT_PMC) return (0); lvts[APIC_LVT_PMC].lvt_masked = 0; #ifdef EARLY_AP_STARTUP MPASS(mp_ncpus == 1 || smp_started); smp_rendezvous(NULL, lapic_update_pmc, NULL, NULL); #else #ifdef SMP /* * If hwpmc was loaded at boot time then the APs may not be * started yet. In that case, don't forward the request to * them as they will program the lvt when they start. */ if (smp_started) smp_rendezvous(NULL, lapic_update_pmc, NULL, NULL); else #endif lapic_update_pmc(NULL); #endif return (1); #else return (0); #endif } static void native_lapic_disable_pmc(void) { #ifdef HWPMC_HOOKS u_int32_t maxlvt; /* Fail if the local APIC is not present. */ if (!x2apic_mode && lapic_map == NULL) return; /* Fail if the PMC LVT is not present. */ maxlvt = (lapic_read32(LAPIC_VERSION) & APIC_VER_MAXLVT) >> MAXLVTSHIFT; if (maxlvt < APIC_LVT_PMC) return; lvts[APIC_LVT_PMC].lvt_masked = 1; #ifdef SMP /* The APs should always be started when hwpmc is unloaded. */ KASSERT(mp_ncpus == 1 || smp_started, ("hwpmc unloaded too early")); #endif smp_rendezvous(NULL, lapic_update_pmc, NULL, NULL); #endif } static void lapic_calibrate_initcount(struct eventtimer *et, struct lapic *la) { u_long value; /* Start off with a divisor of 2 (power on reset default). */ lapic_timer_divisor = 2; /* Try to calibrate the local APIC timer. */ do { lapic_timer_set_divisor(lapic_timer_divisor); lapic_timer_oneshot_nointr(la, APIC_TIMER_MAX_COUNT); DELAY(1000000); value = APIC_TIMER_MAX_COUNT - lapic_read32(LAPIC_CCR_TIMER); if (value != APIC_TIMER_MAX_COUNT) break; lapic_timer_divisor <<= 1; } while (lapic_timer_divisor <= 128); if (lapic_timer_divisor > 128) panic("lapic: Divisor too big"); if (bootverbose) { printf("lapic: Divisor %lu, Frequency %lu Hz\n", lapic_timer_divisor, value); } count_freq = value; } static void lapic_calibrate_deadline(struct eventtimer *et, struct lapic *la __unused) { if (bootverbose) { printf("lapic: deadline tsc mode, Frequency %ju Hz\n", (uintmax_t)tsc_freq); } } static void lapic_change_mode(struct eventtimer *et, struct lapic *la, enum lat_timer_mode newmode) { if (la->la_timer_mode == newmode) return; switch (newmode) { case LAT_MODE_PERIODIC: lapic_timer_set_divisor(lapic_timer_divisor); et->et_frequency = count_freq; break; case LAT_MODE_DEADLINE: et->et_frequency = tsc_freq; break; case LAT_MODE_ONESHOT: lapic_timer_set_divisor(lapic_timer_divisor); et->et_frequency = count_freq; break; default: panic("lapic_change_mode %d", newmode); } la->la_timer_mode = newmode; et->et_min_period = (0x00000002LLU << 32) / et->et_frequency; et->et_max_period = (0xfffffffeLLU << 32) / et->et_frequency; } static int lapic_et_start(struct eventtimer *et, sbintime_t first, sbintime_t period) { struct lapic *la; la = &lapics[PCPU_GET(apic_id)]; if (et->et_frequency == 0) { lapic_calibrate_initcount(et, la); if (lapic_timer_tsc_deadline) lapic_calibrate_deadline(et, la); } if (period != 0) { lapic_change_mode(et, la, LAT_MODE_PERIODIC); la->la_timer_period = ((uint32_t)et->et_frequency * period) >> 32; lapic_timer_periodic(la); } else if (lapic_timer_tsc_deadline) { lapic_change_mode(et, la, LAT_MODE_DEADLINE); la->la_timer_period = (et->et_frequency * first) >> 32; lapic_timer_deadline(la); } else { lapic_change_mode(et, la, LAT_MODE_ONESHOT); la->la_timer_period = ((uint32_t)et->et_frequency * first) >> 32; lapic_timer_oneshot(la); } return (0); } static int lapic_et_stop(struct eventtimer *et) { struct lapic *la; la = &lapics[PCPU_GET(apic_id)]; lapic_timer_stop(la); la->la_timer_mode = LAT_MODE_UNDEF; return (0); } static void native_lapic_disable(void) { uint32_t value; /* Software disable the local APIC. */ value = lapic_read32(LAPIC_SVR); value &= ~APIC_SVR_SWEN; lapic_write32(LAPIC_SVR, value); } static void lapic_enable(void) { uint32_t value; /* Program the spurious vector to enable the local APIC. */ value = lapic_read32(LAPIC_SVR); value &= ~(APIC_SVR_VECTOR | APIC_SVR_FOCUS); value |= APIC_SVR_FEN | APIC_SVR_SWEN | APIC_SPURIOUS_INT; if (lapic_eoi_suppression) value |= APIC_SVR_EOI_SUPPRESSION; lapic_write32(LAPIC_SVR, value); } /* Reset the local APIC on the BSP during resume. */ static void lapic_resume(struct pic *pic, bool suspend_cancelled) { lapic_setup(0); } static int native_lapic_id(void) { uint32_t v; KASSERT(x2apic_mode || lapic_map != NULL, ("local APIC is not mapped")); v = lapic_read32(LAPIC_ID); if (!x2apic_mode) v >>= APIC_ID_SHIFT; return (v); } static int native_lapic_intr_pending(u_int vector) { uint32_t irr; /* * The IRR registers are an array of registers each of which * only describes 32 interrupts in the low 32 bits. Thus, we * divide the vector by 32 to get the register index. * Finally, we modulus the vector by 32 to determine the * individual bit to test. */ irr = lapic_read32(LAPIC_IRR0 + vector / 32); return (irr & 1 << (vector % 32)); } static void native_lapic_set_logical_id(u_int apic_id, u_int cluster, u_int cluster_id) { struct lapic *la; KASSERT(lapics[apic_id].la_present, ("%s: APIC %u doesn't exist", __func__, apic_id)); KASSERT(cluster <= APIC_MAX_CLUSTER, ("%s: cluster %u too big", __func__, cluster)); KASSERT(cluster_id <= APIC_MAX_INTRACLUSTER_ID, ("%s: intra cluster id %u too big", __func__, cluster_id)); la = &lapics[apic_id]; la->la_cluster = cluster; la->la_cluster_id = cluster_id; } static int native_lapic_set_lvt_mask(u_int apic_id, u_int pin, u_char masked) { if (pin > APIC_LVT_MAX) return (EINVAL); if (apic_id == APIC_ID_ALL) { lvts[pin].lvt_masked = masked; if (bootverbose) printf("lapic:"); } else { KASSERT(lapics[apic_id].la_present, ("%s: missing APIC %u", __func__, apic_id)); lapics[apic_id].la_lvts[pin].lvt_masked = masked; lapics[apic_id].la_lvts[pin].lvt_active = 1; if (bootverbose) printf("lapic%u:", apic_id); } if (bootverbose) printf(" LINT%u %s\n", pin, masked ? "masked" : "unmasked"); return (0); } static int native_lapic_set_lvt_mode(u_int apic_id, u_int pin, u_int32_t mode) { struct lvt *lvt; if (pin > APIC_LVT_MAX) return (EINVAL); if (apic_id == APIC_ID_ALL) { lvt = &lvts[pin]; if (bootverbose) printf("lapic:"); } else { KASSERT(lapics[apic_id].la_present, ("%s: missing APIC %u", __func__, apic_id)); lvt = &lapics[apic_id].la_lvts[pin]; lvt->lvt_active = 1; if (bootverbose) printf("lapic%u:", apic_id); } lvt->lvt_mode = mode; switch (mode) { case APIC_LVT_DM_NMI: case APIC_LVT_DM_SMI: case APIC_LVT_DM_INIT: case APIC_LVT_DM_EXTINT: lvt->lvt_edgetrigger = 1; lvt->lvt_activehi = 1; if (mode == APIC_LVT_DM_EXTINT) lvt->lvt_masked = 1; else lvt->lvt_masked = 0; break; default: panic("Unsupported delivery mode: 0x%x\n", mode); } if (bootverbose) { printf(" Routing "); switch (mode) { case APIC_LVT_DM_NMI: printf("NMI"); break; case APIC_LVT_DM_SMI: printf("SMI"); break; case APIC_LVT_DM_INIT: printf("INIT"); break; case APIC_LVT_DM_EXTINT: printf("ExtINT"); break; } printf(" -> LINT%u\n", pin); } return (0); } static int native_lapic_set_lvt_polarity(u_int apic_id, u_int pin, enum intr_polarity pol) { if (pin > APIC_LVT_MAX || pol == INTR_POLARITY_CONFORM) return (EINVAL); if (apic_id == APIC_ID_ALL) { lvts[pin].lvt_activehi = (pol == INTR_POLARITY_HIGH); if (bootverbose) printf("lapic:"); } else { KASSERT(lapics[apic_id].la_present, ("%s: missing APIC %u", __func__, apic_id)); lapics[apic_id].la_lvts[pin].lvt_active = 1; lapics[apic_id].la_lvts[pin].lvt_activehi = (pol == INTR_POLARITY_HIGH); if (bootverbose) printf("lapic%u:", apic_id); } if (bootverbose) printf(" LINT%u polarity: %s\n", pin, pol == INTR_POLARITY_HIGH ? "high" : "low"); return (0); } static int native_lapic_set_lvt_triggermode(u_int apic_id, u_int pin, enum intr_trigger trigger) { if (pin > APIC_LVT_MAX || trigger == INTR_TRIGGER_CONFORM) return (EINVAL); if (apic_id == APIC_ID_ALL) { lvts[pin].lvt_edgetrigger = (trigger == INTR_TRIGGER_EDGE); if (bootverbose) printf("lapic:"); } else { KASSERT(lapics[apic_id].la_present, ("%s: missing APIC %u", __func__, apic_id)); lapics[apic_id].la_lvts[pin].lvt_edgetrigger = (trigger == INTR_TRIGGER_EDGE); lapics[apic_id].la_lvts[pin].lvt_active = 1; if (bootverbose) printf("lapic%u:", apic_id); } if (bootverbose) printf(" LINT%u trigger: %s\n", pin, trigger == INTR_TRIGGER_EDGE ? "edge" : "level"); return (0); } /* * Adjust the TPR of the current CPU so that it blocks all interrupts below * the passed in vector. */ static void lapic_set_tpr(u_int vector) { #ifdef CHEAP_TPR lapic_write32(LAPIC_TPR, vector); #else uint32_t tpr; tpr = lapic_read32(LAPIC_TPR) & ~APIC_TPR_PRIO; tpr |= vector; lapic_write32(LAPIC_TPR, tpr); #endif } static void native_lapic_eoi(void) { lapic_write32_nofence(LAPIC_EOI, 0); } void lapic_handle_intr(int vector, struct trapframe *frame) { struct intsrc *isrc; isrc = intr_lookup_source(apic_idt_to_irq(PCPU_GET(apic_id), vector)); intr_execute_handlers(isrc, frame); } void lapic_handle_timer(struct trapframe *frame) { struct lapic *la; struct trapframe *oldframe; struct thread *td; /* Send EOI first thing. */ lapic_eoi(); #if defined(SMP) && !defined(SCHED_ULE) /* * Don't do any accounting for the disabled HTT cores, since it * will provide misleading numbers for the userland. * * No locking is necessary here, since even if we lose the race * when hlt_cpus_mask changes it is not a big deal, really. * * Don't do that for ULE, since ULE doesn't consider hlt_cpus_mask * and unlike other schedulers it actually schedules threads to * those CPUs. */ if (CPU_ISSET(PCPU_GET(cpuid), &hlt_cpus_mask)) return; #endif /* Look up our local APIC structure for the tick counters. */ la = &lapics[PCPU_GET(apic_id)]; (*la->la_timer_count)++; critical_enter(); if (lapic_et.et_active) { td = curthread; td->td_intr_nesting_level++; oldframe = td->td_intr_frame; td->td_intr_frame = frame; lapic_et.et_event_cb(&lapic_et, lapic_et.et_arg); td->td_intr_frame = oldframe; td->td_intr_nesting_level--; } critical_exit(); } static void lapic_timer_set_divisor(u_int divisor) { KASSERT(powerof2(divisor), ("lapic: invalid divisor %u", divisor)); KASSERT(ffs(divisor) <= nitems(lapic_timer_divisors), ("lapic: invalid divisor %u", divisor)); lapic_write32(LAPIC_DCR_TIMER, lapic_timer_divisors[ffs(divisor) - 1]); } static void lapic_timer_oneshot(struct lapic *la) { uint32_t value; value = la->lvt_timer_base; value &= ~(APIC_LVTT_TM | APIC_LVT_M); value |= APIC_LVTT_TM_ONE_SHOT; la->lvt_timer_last = value; lapic_write32(LAPIC_LVT_TIMER, value); lapic_write32(LAPIC_ICR_TIMER, la->la_timer_period); } static void lapic_timer_oneshot_nointr(struct lapic *la, uint32_t count) { uint32_t value; value = la->lvt_timer_base; value &= ~APIC_LVTT_TM; value |= APIC_LVTT_TM_ONE_SHOT | APIC_LVT_M; la->lvt_timer_last = value; lapic_write32(LAPIC_LVT_TIMER, value); lapic_write32(LAPIC_ICR_TIMER, count); } static void lapic_timer_periodic(struct lapic *la) { uint32_t value; value = la->lvt_timer_base; value &= ~(APIC_LVTT_TM | APIC_LVT_M); value |= APIC_LVTT_TM_PERIODIC; la->lvt_timer_last = value; lapic_write32(LAPIC_LVT_TIMER, value); lapic_write32(LAPIC_ICR_TIMER, la->la_timer_period); } static void lapic_timer_deadline(struct lapic *la) { uint32_t value; value = la->lvt_timer_base; value &= ~(APIC_LVTT_TM | APIC_LVT_M); value |= APIC_LVTT_TM_TSCDLT; if (value != la->lvt_timer_last) { la->lvt_timer_last = value; lapic_write32_nofence(LAPIC_LVT_TIMER, value); if (!x2apic_mode) mfence(); } wrmsr(MSR_TSC_DEADLINE, la->la_timer_period + rdtsc()); } static void lapic_timer_stop(struct lapic *la) { uint32_t value; if (la->la_timer_mode == LAT_MODE_DEADLINE) { wrmsr(MSR_TSC_DEADLINE, 0); mfence(); } else { value = la->lvt_timer_base; value &= ~APIC_LVTT_TM; value |= APIC_LVT_M; la->lvt_timer_last = value; lapic_write32(LAPIC_LVT_TIMER, value); } } void lapic_handle_cmc(void) { lapic_eoi(); cmc_intr(); } /* * Called from the mca_init() to activate the CMC interrupt if this CPU is * responsible for monitoring any MC banks for CMC events. Since mca_init() * is called prior to lapic_setup() during boot, this just needs to unmask * this CPU's LVT_CMCI entry. */ static void native_lapic_enable_cmc(void) { u_int apic_id; #ifdef DEV_ATPIC if (!x2apic_mode && lapic_map == NULL) return; #endif apic_id = PCPU_GET(apic_id); KASSERT(lapics[apic_id].la_present, ("%s: missing APIC %u", __func__, apic_id)); lapics[apic_id].la_lvts[APIC_LVT_CMCI].lvt_masked = 0; lapics[apic_id].la_lvts[APIC_LVT_CMCI].lvt_active = 1; if (bootverbose) printf("lapic%u: CMCI unmasked\n", apic_id); } static int native_lapic_enable_mca_elvt(void) { u_int apic_id; uint32_t value; int elvt_count; #ifdef DEV_ATPIC if (lapic_map == NULL) return (-1); #endif apic_id = PCPU_GET(apic_id); KASSERT(lapics[apic_id].la_present, ("%s: missing APIC %u", __func__, apic_id)); elvt_count = amd_read_elvt_count(); if (elvt_count <= APIC_ELVT_MCA) return (-1); value = lapic_read32(LAPIC_EXT_LVT0 + APIC_ELVT_MCA); if ((value & APIC_LVT_M) == 0) { if (bootverbose) printf("AMD MCE Thresholding Extended LVT is already active\n"); return (APIC_ELVT_MCA); } lapics[apic_id].la_elvts[APIC_ELVT_MCA].lvt_masked = 0; lapics[apic_id].la_elvts[APIC_ELVT_MCA].lvt_active = 1; if (bootverbose) printf("lapic%u: MCE Thresholding ELVT unmasked\n", apic_id); return (APIC_ELVT_MCA); } void lapic_handle_error(void) { uint32_t esr; /* * Read the contents of the error status register. Write to * the register first before reading from it to force the APIC * to update its value to indicate any errors that have * occurred since the previous write to the register. */ lapic_write32(LAPIC_ESR, 0); esr = lapic_read32(LAPIC_ESR); printf("CPU%d: local APIC error 0x%x\n", PCPU_GET(cpuid), esr); lapic_eoi(); } static u_int native_apic_cpuid(u_int apic_id) { #ifdef SMP return apic_cpuids[apic_id]; #else return 0; #endif } /* Request a free IDT vector to be used by the specified IRQ. */ static u_int native_apic_alloc_vector(u_int apic_id, u_int irq) { u_int vector; KASSERT(irq < NUM_IO_INTS, ("Invalid IRQ %u", irq)); /* * Search for a free vector. Currently we just use a very simple * algorithm to find the first free vector. */ mtx_lock_spin(&icu_lock); for (vector = 0; vector < APIC_NUM_IOINTS; vector++) { if (lapics[apic_id].la_ioint_irqs[vector] != -1) continue; lapics[apic_id].la_ioint_irqs[vector] = irq; mtx_unlock_spin(&icu_lock); return (vector + APIC_IO_INTS); } mtx_unlock_spin(&icu_lock); return (0); } /* * Request 'count' free contiguous IDT vectors to be used by 'count' * IRQs. 'count' must be a power of two and the vectors will be * aligned on a boundary of 'align'. If the request cannot be * satisfied, 0 is returned. */ static u_int native_apic_alloc_vectors(u_int apic_id, u_int *irqs, u_int count, u_int align) { u_int first, run, vector; KASSERT(powerof2(count), ("bad count")); KASSERT(powerof2(align), ("bad align")); KASSERT(align >= count, ("align < count")); #ifdef INVARIANTS for (run = 0; run < count; run++) KASSERT(irqs[run] < NUM_IO_INTS, ("Invalid IRQ %u at index %u", irqs[run], run)); #endif /* * Search for 'count' free vectors. As with apic_alloc_vector(), * this just uses a simple first fit algorithm. */ run = 0; first = 0; mtx_lock_spin(&icu_lock); for (vector = 0; vector < APIC_NUM_IOINTS; vector++) { /* Vector is in use, end run. */ if (lapics[apic_id].la_ioint_irqs[vector] != -1) { run = 0; first = 0; continue; } /* Start a new run if run == 0 and vector is aligned. */ if (run == 0) { if ((vector & (align - 1)) != 0) continue; first = vector; } run++; /* Keep looping if the run isn't long enough yet. */ if (run < count) continue; /* Found a run, assign IRQs and return the first vector. */ for (vector = 0; vector < count; vector++) lapics[apic_id].la_ioint_irqs[first + vector] = irqs[vector]; mtx_unlock_spin(&icu_lock); return (first + APIC_IO_INTS); } mtx_unlock_spin(&icu_lock); printf("APIC: Couldn't find APIC vectors for %u IRQs\n", count); return (0); } /* * Enable a vector for a particular apic_id. Since all lapics share idt * entries and ioint_handlers this enables the vector on all lapics. lapics * which do not have the vector configured would report spurious interrupts * should it fire. */ static void native_apic_enable_vector(u_int apic_id, u_int vector) { KASSERT(vector != IDT_SYSCALL, ("Attempt to overwrite syscall entry")); KASSERT(ioint_handlers[vector / 32] != NULL, ("No ISR handler for vector %u", vector)); #ifdef KDTRACE_HOOKS KASSERT(vector != IDT_DTRACE_RET, ("Attempt to overwrite DTrace entry")); #endif setidt(vector, ioint_handlers[vector / 32], SDT_APIC, SEL_KPL, GSEL_APIC); } static void native_apic_disable_vector(u_int apic_id, u_int vector) { KASSERT(vector != IDT_SYSCALL, ("Attempt to overwrite syscall entry")); #ifdef KDTRACE_HOOKS KASSERT(vector != IDT_DTRACE_RET, ("Attempt to overwrite DTrace entry")); #endif KASSERT(ioint_handlers[vector / 32] != NULL, ("No ISR handler for vector %u", vector)); #ifdef notyet /* * We can not currently clear the idt entry because other cpus * may have a valid vector at this offset. */ setidt(vector, &IDTVEC(rsvd), SDT_APICT, SEL_KPL, GSEL_APIC); #endif } /* Release an APIC vector when it's no longer in use. */ static void native_apic_free_vector(u_int apic_id, u_int vector, u_int irq) { struct thread *td; KASSERT(vector >= APIC_IO_INTS && vector != IDT_SYSCALL && vector <= APIC_IO_INTS + APIC_NUM_IOINTS, ("Vector %u does not map to an IRQ line", vector)); KASSERT(irq < NUM_IO_INTS, ("Invalid IRQ %u", irq)); KASSERT(lapics[apic_id].la_ioint_irqs[vector - APIC_IO_INTS] == irq, ("IRQ mismatch")); #ifdef KDTRACE_HOOKS KASSERT(vector != IDT_DTRACE_RET, ("Attempt to overwrite DTrace entry")); #endif /* * Bind us to the cpu that owned the vector before freeing it so * we don't lose an interrupt delivery race. */ td = curthread; if (!rebooting) { thread_lock(td); if (sched_is_bound(td)) panic("apic_free_vector: Thread already bound.\n"); sched_bind(td, apic_cpuid(apic_id)); thread_unlock(td); } mtx_lock_spin(&icu_lock); lapics[apic_id].la_ioint_irqs[vector - APIC_IO_INTS] = -1; mtx_unlock_spin(&icu_lock); if (!rebooting) { thread_lock(td); sched_unbind(td); thread_unlock(td); } } /* Map an IDT vector (APIC) to an IRQ (interrupt source). */ static u_int apic_idt_to_irq(u_int apic_id, u_int vector) { int irq; KASSERT(vector >= APIC_IO_INTS && vector != IDT_SYSCALL && vector <= APIC_IO_INTS + APIC_NUM_IOINTS, ("Vector %u does not map to an IRQ line", vector)); #ifdef KDTRACE_HOOKS KASSERT(vector != IDT_DTRACE_RET, ("Attempt to overwrite DTrace entry")); #endif irq = lapics[apic_id].la_ioint_irqs[vector - APIC_IO_INTS]; if (irq < 0) irq = 0; return (irq); } #ifdef DDB /* * Dump data about APIC IDT vector mappings. */ DB_SHOW_COMMAND(apic, db_show_apic) { struct intsrc *isrc; int i, verbose; u_int apic_id; u_int irq; if (strcmp(modif, "vv") == 0) verbose = 2; else if (strcmp(modif, "v") == 0) verbose = 1; else verbose = 0; for (apic_id = 0; apic_id <= max_apic_id; apic_id++) { if (lapics[apic_id].la_present == 0) continue; db_printf("Interrupts bound to lapic %u\n", apic_id); for (i = 0; i < APIC_NUM_IOINTS + 1 && !db_pager_quit; i++) { irq = lapics[apic_id].la_ioint_irqs[i]; if (irq == -1 || irq == IRQ_SYSCALL) continue; #ifdef KDTRACE_HOOKS if (irq == IRQ_DTRACE_RET) continue; #endif #ifdef XENHVM if (irq == IRQ_EVTCHN) continue; #endif db_printf("vec 0x%2x -> ", i + APIC_IO_INTS); if (irq == IRQ_TIMER) db_printf("lapic timer\n"); else if (irq < NUM_IO_INTS) { isrc = intr_lookup_source(irq); if (isrc == NULL || verbose == 0) db_printf("IRQ %u\n", irq); else db_dump_intr_event(isrc->is_event, verbose == 2); } else db_printf("IRQ %u ???\n", irq); } } } static void dump_mask(const char *prefix, uint32_t v, int base) { int i, first; first = 1; for (i = 0; i < 32; i++) if (v & (1 << i)) { if (first) { db_printf("%s:", prefix); first = 0; } db_printf(" %02x", base + i); } if (!first) db_printf("\n"); } /* Show info from the lapic regs for this CPU. */ DB_SHOW_COMMAND(lapic, db_show_lapic) { uint32_t v; db_printf("lapic ID = %d\n", lapic_id()); v = lapic_read32(LAPIC_VERSION); db_printf("version = %d.%d\n", (v & APIC_VER_VERSION) >> 4, v & 0xf); db_printf("max LVT = %d\n", (v & APIC_VER_MAXLVT) >> MAXLVTSHIFT); v = lapic_read32(LAPIC_SVR); db_printf("SVR = %02x (%s)\n", v & APIC_SVR_VECTOR, v & APIC_SVR_ENABLE ? "enabled" : "disabled"); db_printf("TPR = %02x\n", lapic_read32(LAPIC_TPR)); #define dump_field(prefix, regn, index) \ dump_mask(__XSTRING(prefix ## index), \ lapic_read32(LAPIC_ ## regn ## index), \ index * 32) db_printf("In-service Interrupts:\n"); dump_field(isr, ISR, 0); dump_field(isr, ISR, 1); dump_field(isr, ISR, 2); dump_field(isr, ISR, 3); dump_field(isr, ISR, 4); dump_field(isr, ISR, 5); dump_field(isr, ISR, 6); dump_field(isr, ISR, 7); db_printf("TMR Interrupts:\n"); dump_field(tmr, TMR, 0); dump_field(tmr, TMR, 1); dump_field(tmr, TMR, 2); dump_field(tmr, TMR, 3); dump_field(tmr, TMR, 4); dump_field(tmr, TMR, 5); dump_field(tmr, TMR, 6); dump_field(tmr, TMR, 7); db_printf("IRR Interrupts:\n"); dump_field(irr, IRR, 0); dump_field(irr, IRR, 1); dump_field(irr, IRR, 2); dump_field(irr, IRR, 3); dump_field(irr, IRR, 4); dump_field(irr, IRR, 5); dump_field(irr, IRR, 6); dump_field(irr, IRR, 7); #undef dump_field } #endif /* * APIC probing support code. This includes code to manage enumerators. */ static SLIST_HEAD(, apic_enumerator) enumerators = SLIST_HEAD_INITIALIZER(enumerators); static struct apic_enumerator *best_enum; void apic_register_enumerator(struct apic_enumerator *enumerator) { #ifdef INVARIANTS struct apic_enumerator *apic_enum; SLIST_FOREACH(apic_enum, &enumerators, apic_next) { if (apic_enum == enumerator) panic("%s: Duplicate register of %s", __func__, enumerator->apic_name); } #endif SLIST_INSERT_HEAD(&enumerators, enumerator, apic_next); } /* * We have to look for CPU's very, very early because certain subsystems * want to know how many CPU's we have extremely early on in the boot * process. */ static void apic_init(void *dummy __unused) { struct apic_enumerator *enumerator; int retval, best; /* We only support built in local APICs. */ if (!(cpu_feature & CPUID_APIC)) return; /* Don't probe if APIC mode is disabled. */ if (resource_disabled("apic", 0)) return; /* Probe all the enumerators to find the best match. */ best_enum = NULL; best = 0; SLIST_FOREACH(enumerator, &enumerators, apic_next) { retval = enumerator->apic_probe(); if (retval > 0) continue; if (best_enum == NULL || best < retval) { best_enum = enumerator; best = retval; } } if (best_enum == NULL) { if (bootverbose) printf("APIC: Could not find any APICs.\n"); #ifndef DEV_ATPIC panic("running without device atpic requires a local APIC"); #endif return; } if (bootverbose) printf("APIC: Using the %s enumerator.\n", best_enum->apic_name); #ifdef I686_CPU /* * To work around an errata, we disable the local APIC on some * CPUs during early startup. We need to turn the local APIC back * on on such CPUs now. */ ppro_reenable_apic(); #endif /* Probe the CPU's in the system. */ retval = best_enum->apic_probe_cpus(); if (retval != 0) printf("%s: Failed to probe CPUs: returned %d\n", best_enum->apic_name, retval); } SYSINIT(apic_init, SI_SUB_TUNABLES - 1, SI_ORDER_SECOND, apic_init, NULL); /* * Setup the local APIC. We have to do this prior to starting up the APs * in the SMP case. */ static void apic_setup_local(void *dummy __unused) { int retval; if (best_enum == NULL) return; lapics = malloc(sizeof(*lapics) * (max_apic_id + 1), M_LAPIC, M_WAITOK | M_ZERO); /* Initialize the local APIC. */ retval = best_enum->apic_setup_local(); if (retval != 0) printf("%s: Failed to setup the local APIC: returned %d\n", best_enum->apic_name, retval); } SYSINIT(apic_setup_local, SI_SUB_CPU, SI_ORDER_SECOND, apic_setup_local, NULL); /* * Setup the I/O APICs. */ static void apic_setup_io(void *dummy __unused) { int retval; if (best_enum == NULL) return; /* * Local APIC must be registered before other PICs and pseudo PICs * for proper suspend/resume order. */ intr_register_pic(&lapic_pic); retval = best_enum->apic_setup_io(); if (retval != 0) printf("%s: Failed to setup I/O APICs: returned %d\n", best_enum->apic_name, retval); /* * Finish setting up the local APIC on the BSP once we know * how to properly program the LINT pins. In particular, this * enables the EOI suppression mode, if LAPIC support it and * user did not disabled the mode. */ lapic_setup(1); if (bootverbose) lapic_dump("BSP"); /* Enable the MSI "pic". */ init_ops.msi_init(); } SYSINIT(apic_setup_io, SI_SUB_INTR, SI_ORDER_THIRD, apic_setup_io, NULL); #ifdef SMP /* * Inter Processor Interrupt functions. The lapic_ipi_*() functions are * private to the MD code. The public interface for the rest of the * kernel is defined in mp_machdep.c. */ /* * Wait delay microseconds for IPI to be sent. If delay is -1, we * wait forever. */ static int native_lapic_ipi_wait(int delay) { uint64_t rx; /* LAPIC_ICR.APIC_DELSTAT_MASK is undefined in x2APIC mode */ if (x2apic_mode) return (1); for (rx = 0; delay == -1 || rx < lapic_ipi_wait_mult * delay; rx++) { if ((lapic_read_icr_lo() & APIC_DELSTAT_MASK) == APIC_DELSTAT_IDLE) return (1); ia32_pause(); } return (0); } static void native_lapic_ipi_raw(register_t icrlo, u_int dest) { uint64_t icr; uint32_t vhi, vlo; register_t saveintr; /* XXX: Need more sanity checking of icrlo? */ KASSERT(x2apic_mode || lapic_map != NULL, ("%s called too early", __func__)); KASSERT(x2apic_mode || (dest & ~(APIC_ID_MASK >> APIC_ID_SHIFT)) == 0, ("%s: invalid dest field", __func__)); KASSERT((icrlo & APIC_ICRLO_RESV_MASK) == 0, ("%s: reserved bits set in ICR LO register", __func__)); /* Set destination in ICR HI register if it is being used. */ if (!x2apic_mode) { saveintr = intr_disable(); icr = lapic_read_icr(); } if ((icrlo & APIC_DEST_MASK) == APIC_DEST_DESTFLD) { if (x2apic_mode) { vhi = dest; } else { vhi = icr >> 32; vhi &= ~APIC_ID_MASK; vhi |= dest << APIC_ID_SHIFT; } } else { vhi = 0; } /* Program the contents of the IPI and dispatch it. */ if (x2apic_mode) { vlo = icrlo; } else { vlo = icr; vlo &= APIC_ICRLO_RESV_MASK; vlo |= icrlo; } lapic_write_icr(vhi, vlo); if (!x2apic_mode) intr_restore(saveintr); } #define BEFORE_SPIN 50000 #ifdef DETECT_DEADLOCK #define AFTER_SPIN 50 #endif static void native_lapic_ipi_vectored(u_int vector, int dest) { register_t icrlo, destfield; KASSERT((vector & ~APIC_VECTOR_MASK) == 0, ("%s: invalid vector %d", __func__, vector)); icrlo = APIC_DESTMODE_PHY | APIC_TRIGMOD_EDGE | APIC_LEVEL_ASSERT; /* * NMI IPIs are just fake vectors used to send a NMI. Use special rules * regarding NMIs if passed, otherwise specify the vector. */ if (vector >= IPI_NMI_FIRST) icrlo |= APIC_DELMODE_NMI; else icrlo |= vector | APIC_DELMODE_FIXED; destfield = 0; switch (dest) { case APIC_IPI_DEST_SELF: icrlo |= APIC_DEST_SELF; break; case APIC_IPI_DEST_ALL: icrlo |= APIC_DEST_ALLISELF; break; case APIC_IPI_DEST_OTHERS: icrlo |= APIC_DEST_ALLESELF; break; default: KASSERT(x2apic_mode || (dest & ~(APIC_ID_MASK >> APIC_ID_SHIFT)) == 0, ("%s: invalid destination 0x%x", __func__, dest)); destfield = dest; } /* Wait for an earlier IPI to finish. */ if (!lapic_ipi_wait(BEFORE_SPIN)) { if (panicstr != NULL) return; else panic("APIC: Previous IPI is stuck"); } lapic_ipi_raw(icrlo, destfield); #ifdef DETECT_DEADLOCK /* Wait for IPI to be delivered. */ if (!lapic_ipi_wait(AFTER_SPIN)) { #ifdef needsattention /* * XXX FIXME: * * The above function waits for the message to actually be * delivered. It breaks out after an arbitrary timeout * since the message should eventually be delivered (at * least in theory) and that if it wasn't we would catch * the failure with the check above when the next IPI is * sent. * * We could skip this wait entirely, EXCEPT it probably * protects us from other routines that assume that the * message was delivered and acted upon when this function * returns. */ printf("APIC: IPI might be stuck\n"); #else /* !needsattention */ /* Wait until mesage is sent without a timeout. */ while (lapic_read_icr_lo() & APIC_DELSTAT_PEND) ia32_pause(); #endif /* needsattention */ } #endif /* DETECT_DEADLOCK */ } #endif /* SMP */ /* * Since the IDT is shared by all CPUs the IPI slot update needs to be globally * visible. * * Consider the case where an IPI is generated immediately after allocation: * vector = lapic_ipi_alloc(ipifunc); * ipi_selected(other_cpus, vector); * * In xAPIC mode a write to ICR_LO has serializing semantics because the * APIC page is mapped as an uncached region. In x2APIC mode there is an * explicit 'mfence' before the ICR MSR is written. Therefore in both cases * the IDT slot update is globally visible before the IPI is delivered. */ static int native_lapic_ipi_alloc(inthand_t *ipifunc) { struct gate_descriptor *ip; long func; int idx, vector; KASSERT(ipifunc != &IDTVEC(rsvd), ("invalid ipifunc %p", ipifunc)); vector = -1; mtx_lock_spin(&icu_lock); for (idx = IPI_DYN_FIRST; idx <= IPI_DYN_LAST; idx++) { ip = &idt[idx]; func = (ip->gd_hioffset << 16) | ip->gd_looffset; if (func == (uintptr_t)&IDTVEC(rsvd)) { vector = idx; setidt(vector, ipifunc, SDT_APIC, SEL_KPL, GSEL_APIC); break; } } mtx_unlock_spin(&icu_lock); return (vector); } static void native_lapic_ipi_free(int vector) { struct gate_descriptor *ip; long func; KASSERT(vector >= IPI_DYN_FIRST && vector <= IPI_DYN_LAST, ("%s: invalid vector %d", __func__, vector)); mtx_lock_spin(&icu_lock); ip = &idt[vector]; func = (ip->gd_hioffset << 16) | ip->gd_looffset; KASSERT(func != (uintptr_t)&IDTVEC(rsvd), ("invalid idtfunc %#lx", func)); setidt(vector, &IDTVEC(rsvd), SDT_APICT, SEL_KPL, GSEL_APIC); mtx_unlock_spin(&icu_lock); } Index: head/sys/x86/x86/mca.c =================================================================== --- head/sys/x86/x86/mca.c (revision 326262) +++ head/sys/x86/x86/mca.c (revision 326263) @@ -1,1265 +1,1267 @@ /*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * * Copyright (c) 2009 Hudson River Trading LLC * Written by: John H. Baldwin * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * Support for x86 machine check architecture. */ #include __FBSDID("$FreeBSD$"); #ifdef __amd64__ #define DEV_APIC #else #include "opt_apic.h" #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* Modes for mca_scan() */ enum scan_mode { POLLED, MCE, CMCI, }; #ifdef DEV_APIC /* * State maintained for each monitored MCx bank to control the * corrected machine check interrupt threshold. */ struct cmc_state { int max_threshold; time_t last_intr; }; struct amd_et_state { int cur_threshold; time_t last_intr; }; #endif struct mca_internal { struct mca_record rec; int logged; STAILQ_ENTRY(mca_internal) link; }; static MALLOC_DEFINE(M_MCA, "MCA", "Machine Check Architecture"); static volatile int mca_count; /* Number of records stored. */ static int mca_banks; /* Number of per-CPU register banks. */ static SYSCTL_NODE(_hw, OID_AUTO, mca, CTLFLAG_RD, NULL, "Machine Check Architecture"); static int mca_enabled = 1; SYSCTL_INT(_hw_mca, OID_AUTO, enabled, CTLFLAG_RDTUN, &mca_enabled, 0, "Administrative toggle for machine check support"); static int amd10h_L1TP = 1; SYSCTL_INT(_hw_mca, OID_AUTO, amd10h_L1TP, CTLFLAG_RDTUN, &amd10h_L1TP, 0, "Administrative toggle for logging of level one TLB parity (L1TP) errors"); static int intel6h_HSD131; SYSCTL_INT(_hw_mca, OID_AUTO, intel6h_HSD131, CTLFLAG_RDTUN, &intel6h_HSD131, 0, "Administrative toggle for logging of spurious corrected errors"); int workaround_erratum383; SYSCTL_INT(_hw_mca, OID_AUTO, erratum383, CTLFLAG_RDTUN, &workaround_erratum383, 0, "Is the workaround for Erratum 383 on AMD Family 10h processors enabled?"); static STAILQ_HEAD(, mca_internal) mca_freelist; static int mca_freecount; static STAILQ_HEAD(, mca_internal) mca_records; static struct callout mca_timer; static int mca_ticks = 3600; /* Check hourly by default. */ static struct taskqueue *mca_tq; static struct task mca_refill_task, mca_scan_task; static struct mtx mca_lock; #ifdef DEV_APIC static struct cmc_state **cmc_state; /* Indexed by cpuid, bank. */ static struct amd_et_state **amd_et_state; /* Indexed by cpuid, bank. */ static int cmc_throttle = 60; /* Time in seconds to throttle CMCI. */ static int amd_elvt = -1; static inline bool amd_thresholding_supported(void) { if (cpu_vendor_id != CPU_VENDOR_AMD) return (false); /* * The RASCap register is wholly reserved in families 0x10-0x15 (through model 1F). * * It begins to be documented in family 0x15 model 30 and family 0x16, * but neither of these families documents the ScalableMca bit, which * supposedly defines the presence of this feature on family 0x17. */ if (CPUID_TO_FAMILY(cpu_id) >= 0x10 && CPUID_TO_FAMILY(cpu_id) <= 0x16) return (true); if (CPUID_TO_FAMILY(cpu_id) >= 0x17) return ((amd_rascap & AMDRAS_SCALABLE_MCA) != 0); return (false); } #endif static inline bool cmci_supported(uint64_t mcg_cap) { /* * MCG_CAP_CMCI_P bit is reserved in AMD documentation. Until * it is defined, do not use it to check for CMCI support. */ if (cpu_vendor_id != CPU_VENDOR_INTEL) return (false); return ((mcg_cap & MCG_CAP_CMCI_P) != 0); } static int sysctl_positive_int(SYSCTL_HANDLER_ARGS) { int error, value; value = *(int *)arg1; error = sysctl_handle_int(oidp, &value, 0, req); if (error || req->newptr == NULL) return (error); if (value <= 0) return (EINVAL); *(int *)arg1 = value; return (0); } static int sysctl_mca_records(SYSCTL_HANDLER_ARGS) { int *name = (int *)arg1; u_int namelen = arg2; struct mca_record record; struct mca_internal *rec; int i; if (namelen != 1) return (EINVAL); if (name[0] < 0 || name[0] >= mca_count) return (EINVAL); mtx_lock_spin(&mca_lock); if (name[0] >= mca_count) { mtx_unlock_spin(&mca_lock); return (EINVAL); } i = 0; STAILQ_FOREACH(rec, &mca_records, link) { if (i == name[0]) { record = rec->rec; break; } i++; } mtx_unlock_spin(&mca_lock); return (SYSCTL_OUT(req, &record, sizeof(record))); } static const char * mca_error_ttype(uint16_t mca_error) { switch ((mca_error & 0x000c) >> 2) { case 0: return ("I"); case 1: return ("D"); case 2: return ("G"); } return ("?"); } static const char * mca_error_level(uint16_t mca_error) { switch (mca_error & 0x0003) { case 0: return ("L0"); case 1: return ("L1"); case 2: return ("L2"); case 3: return ("LG"); } return ("L?"); } static const char * mca_error_request(uint16_t mca_error) { switch ((mca_error & 0x00f0) >> 4) { case 0x0: return ("ERR"); case 0x1: return ("RD"); case 0x2: return ("WR"); case 0x3: return ("DRD"); case 0x4: return ("DWR"); case 0x5: return ("IRD"); case 0x6: return ("PREFETCH"); case 0x7: return ("EVICT"); case 0x8: return ("SNOOP"); } return ("???"); } static const char * mca_error_mmtype(uint16_t mca_error) { switch ((mca_error & 0x70) >> 4) { case 0x0: return ("GEN"); case 0x1: return ("RD"); case 0x2: return ("WR"); case 0x3: return ("AC"); case 0x4: return ("MS"); } return ("???"); } static int mca_mute(const struct mca_record *rec) { /* * Skip spurious corrected parity errors generated by Intel Haswell- * and Broadwell-based CPUs (see HSD131, HSM142, HSW131 and BDM48 * erratum respectively), unless reporting is enabled. * Note that these errors also have been observed with the D0-stepping * of Haswell, while at least initially the CPU specification updates * suggested only the C0-stepping to be affected. Similarly, Celeron * 2955U with a CPU ID of 0x45 apparently are also concerned with the * same problem, with HSM142 only referring to 0x3c and 0x46. */ if (cpu_vendor_id == CPU_VENDOR_INTEL && CPUID_TO_FAMILY(cpu_id) == 0x6 && (CPUID_TO_MODEL(cpu_id) == 0x3c || /* HSD131, HSM142, HSW131 */ CPUID_TO_MODEL(cpu_id) == 0x3d || /* BDM48 */ CPUID_TO_MODEL(cpu_id) == 0x45 || CPUID_TO_MODEL(cpu_id) == 0x46) && /* HSM142 */ rec->mr_bank == 0 && (rec->mr_status & 0xa0000000ffffffff) == 0x80000000000f0005 && !intel6h_HSD131) return (1); return (0); } /* Dump details about a single machine check. */ static void mca_log(const struct mca_record *rec) { uint16_t mca_error; if (mca_mute(rec)) return; printf("MCA: Bank %d, Status 0x%016llx\n", rec->mr_bank, (long long)rec->mr_status); printf("MCA: Global Cap 0x%016llx, Status 0x%016llx\n", (long long)rec->mr_mcg_cap, (long long)rec->mr_mcg_status); printf("MCA: Vendor \"%s\", ID 0x%x, APIC ID %d\n", cpu_vendor, rec->mr_cpu_id, rec->mr_apic_id); printf("MCA: CPU %d ", rec->mr_cpu); if (rec->mr_status & MC_STATUS_UC) printf("UNCOR "); else { printf("COR "); if (cmci_supported(rec->mr_mcg_cap)) printf("(%lld) ", ((long long)rec->mr_status & MC_STATUS_COR_COUNT) >> 38); } if (rec->mr_status & MC_STATUS_PCC) printf("PCC "); if (rec->mr_status & MC_STATUS_OVER) printf("OVER "); mca_error = rec->mr_status & MC_STATUS_MCA_ERROR; switch (mca_error) { /* Simple error codes. */ case 0x0000: printf("no error"); break; case 0x0001: printf("unclassified error"); break; case 0x0002: printf("ucode ROM parity error"); break; case 0x0003: printf("external error"); break; case 0x0004: printf("FRC error"); break; case 0x0005: printf("internal parity error"); break; case 0x0400: printf("internal timer error"); break; default: if ((mca_error & 0xfc00) == 0x0400) { printf("internal error %x", mca_error & 0x03ff); break; } /* Compound error codes. */ /* Memory hierarchy error. */ if ((mca_error & 0xeffc) == 0x000c) { printf("%s memory error", mca_error_level(mca_error)); break; } /* TLB error. */ if ((mca_error & 0xeff0) == 0x0010) { printf("%sTLB %s error", mca_error_ttype(mca_error), mca_error_level(mca_error)); break; } /* Memory controller error. */ if ((mca_error & 0xef80) == 0x0080) { printf("%s channel ", mca_error_mmtype(mca_error)); if ((mca_error & 0x000f) != 0x000f) printf("%d", mca_error & 0x000f); else printf("??"); printf(" memory error"); break; } /* Cache error. */ if ((mca_error & 0xef00) == 0x0100) { printf("%sCACHE %s %s error", mca_error_ttype(mca_error), mca_error_level(mca_error), mca_error_request(mca_error)); break; } /* Bus and/or Interconnect error. */ if ((mca_error & 0xe800) == 0x0800) { printf("BUS%s ", mca_error_level(mca_error)); switch ((mca_error & 0x0600) >> 9) { case 0: printf("Source"); break; case 1: printf("Responder"); break; case 2: printf("Observer"); break; default: printf("???"); break; } printf(" %s ", mca_error_request(mca_error)); switch ((mca_error & 0x000c) >> 2) { case 0: printf("Memory"); break; case 2: printf("I/O"); break; case 3: printf("Other"); break; default: printf("???"); break; } if (mca_error & 0x0100) printf(" timed out"); break; } printf("unknown error %x", mca_error); break; } printf("\n"); if (rec->mr_status & MC_STATUS_ADDRV) printf("MCA: Address 0x%llx\n", (long long)rec->mr_addr); if (rec->mr_status & MC_STATUS_MISCV) printf("MCA: Misc 0x%llx\n", (long long)rec->mr_misc); } static int mca_check_status(int bank, struct mca_record *rec) { uint64_t status; u_int p[4]; status = rdmsr(MSR_MC_STATUS(bank)); if (!(status & MC_STATUS_VAL)) return (0); /* Save exception information. */ rec->mr_status = status; rec->mr_bank = bank; rec->mr_addr = 0; if (status & MC_STATUS_ADDRV) rec->mr_addr = rdmsr(MSR_MC_ADDR(bank)); rec->mr_misc = 0; if (status & MC_STATUS_MISCV) rec->mr_misc = rdmsr(MSR_MC_MISC(bank)); rec->mr_tsc = rdtsc(); rec->mr_apic_id = PCPU_GET(apic_id); rec->mr_mcg_cap = rdmsr(MSR_MCG_CAP); rec->mr_mcg_status = rdmsr(MSR_MCG_STATUS); rec->mr_cpu_id = cpu_id; rec->mr_cpu_vendor_id = cpu_vendor_id; rec->mr_cpu = PCPU_GET(cpuid); /* * Clear machine check. Don't do this for uncorrectable * errors so that the BIOS can see them. */ if (!(rec->mr_status & (MC_STATUS_PCC | MC_STATUS_UC))) { wrmsr(MSR_MC_STATUS(bank), 0); do_cpuid(0, p); } return (1); } static void mca_fill_freelist(void) { struct mca_internal *rec; int desired; /* * Ensure we have at least one record for each bank and one * record per CPU. */ desired = imax(mp_ncpus, mca_banks); mtx_lock_spin(&mca_lock); while (mca_freecount < desired) { mtx_unlock_spin(&mca_lock); rec = malloc(sizeof(*rec), M_MCA, M_WAITOK); mtx_lock_spin(&mca_lock); STAILQ_INSERT_TAIL(&mca_freelist, rec, link); mca_freecount++; } mtx_unlock_spin(&mca_lock); } static void mca_refill(void *context, int pending) { mca_fill_freelist(); } static void mca_record_entry(enum scan_mode mode, const struct mca_record *record) { struct mca_internal *rec; if (mode == POLLED) { rec = malloc(sizeof(*rec), M_MCA, M_WAITOK); mtx_lock_spin(&mca_lock); } else { mtx_lock_spin(&mca_lock); rec = STAILQ_FIRST(&mca_freelist); if (rec == NULL) { printf("MCA: Unable to allocate space for an event.\n"); mca_log(record); mtx_unlock_spin(&mca_lock); return; } STAILQ_REMOVE_HEAD(&mca_freelist, link); mca_freecount--; } rec->rec = *record; rec->logged = 0; STAILQ_INSERT_TAIL(&mca_records, rec, link); mca_count++; mtx_unlock_spin(&mca_lock); if (mode == CMCI && !cold) taskqueue_enqueue(mca_tq, &mca_refill_task); } #ifdef DEV_APIC /* * Update the interrupt threshold for a CMCI. The strategy is to use * a low trigger that interrupts as soon as the first event occurs. * However, if a steady stream of events arrive, the threshold is * increased until the interrupts are throttled to once every * cmc_throttle seconds or the periodic scan. If a periodic scan * finds that the threshold is too high, it is lowered. */ static int update_threshold(enum scan_mode mode, int valid, int last_intr, int count, int cur_threshold, int max_threshold) { u_int delta; int limit; delta = (u_int)(time_uptime - last_intr); limit = cur_threshold; /* * If an interrupt was received less than cmc_throttle seconds * since the previous interrupt and the count from the current * event is greater than or equal to the current threshold, * double the threshold up to the max. */ if (mode == CMCI && valid) { if (delta < cmc_throttle && count >= limit && limit < max_threshold) { limit = min(limit << 1, max_threshold); } return (limit); } /* * When the banks are polled, check to see if the threshold * should be lowered. */ if (mode != POLLED) return (limit); /* If a CMCI occured recently, do nothing for now. */ if (delta < cmc_throttle) return (limit); /* * Compute a new limit based on the average rate of events per * cmc_throttle seconds since the last interrupt. */ if (valid) { limit = count * cmc_throttle / delta; if (limit <= 0) limit = 1; else if (limit > max_threshold) limit = max_threshold; } else { limit = 1; } return (limit); } static void cmci_update(enum scan_mode mode, int bank, int valid, struct mca_record *rec) { struct cmc_state *cc; uint64_t ctl; int cur_threshold, new_threshold; int count; /* Fetch the current limit for this bank. */ cc = &cmc_state[PCPU_GET(cpuid)][bank]; ctl = rdmsr(MSR_MC_CTL2(bank)); count = (rec->mr_status & MC_STATUS_COR_COUNT) >> 38; cur_threshold = ctl & MC_CTL2_THRESHOLD; new_threshold = update_threshold(mode, valid, cc->last_intr, count, cur_threshold, cc->max_threshold); if (mode == CMCI && valid) cc->last_intr = time_uptime; if (new_threshold != cur_threshold) { ctl &= ~MC_CTL2_THRESHOLD; ctl |= new_threshold; wrmsr(MSR_MC_CTL2(bank), ctl); } } static void amd_thresholding_update(enum scan_mode mode, int bank, int valid) { struct amd_et_state *cc; uint64_t misc; int new_threshold; int count; cc = &amd_et_state[PCPU_GET(cpuid)][bank]; misc = rdmsr(MSR_MC_MISC(bank)); count = (misc & MC_MISC_AMD_CNT_MASK) >> MC_MISC_AMD_CNT_SHIFT; count = count - (MC_MISC_AMD_CNT_MAX - cc->cur_threshold); new_threshold = update_threshold(mode, valid, cc->last_intr, count, cc->cur_threshold, MC_MISC_AMD_CNT_MAX); cc->cur_threshold = new_threshold; misc &= ~MC_MISC_AMD_CNT_MASK; misc |= (uint64_t)(MC_MISC_AMD_CNT_MAX - cc->cur_threshold) << MC_MISC_AMD_CNT_SHIFT; misc &= ~MC_MISC_AMD_OVERFLOW; wrmsr(MSR_MC_MISC(bank), misc); if (mode == CMCI && valid) cc->last_intr = time_uptime; } #endif /* * This scans all the machine check banks of the current CPU to see if * there are any machine checks. Any non-recoverable errors are * reported immediately via mca_log(). The current thread must be * pinned when this is called. The 'mode' parameter indicates if we * are being called from the MC exception handler, the CMCI handler, * or the periodic poller. In the MC exception case this function * returns true if the system is restartable. Otherwise, it returns a * count of the number of valid MC records found. */ static int mca_scan(enum scan_mode mode, int *recoverablep) { struct mca_record rec; uint64_t mcg_cap, ucmask; int count, i, recoverable, valid; count = 0; recoverable = 1; ucmask = MC_STATUS_UC | MC_STATUS_PCC; /* When handling a MCE#, treat the OVER flag as non-restartable. */ if (mode == MCE) ucmask |= MC_STATUS_OVER; mcg_cap = rdmsr(MSR_MCG_CAP); for (i = 0; i < (mcg_cap & MCG_CAP_COUNT); i++) { #ifdef DEV_APIC /* * For a CMCI, only check banks this CPU is * responsible for. */ if (mode == CMCI && !(PCPU_GET(cmci_mask) & 1 << i)) continue; #endif valid = mca_check_status(i, &rec); if (valid) { count++; if (rec.mr_status & ucmask) { recoverable = 0; mtx_lock_spin(&mca_lock); mca_log(&rec); mtx_unlock_spin(&mca_lock); } mca_record_entry(mode, &rec); } #ifdef DEV_APIC /* * If this is a bank this CPU monitors via CMCI, * update the threshold. */ if (PCPU_GET(cmci_mask) & 1 << i) { if (cmc_state != NULL) cmci_update(mode, i, valid, &rec); else amd_thresholding_update(mode, i, valid); } #endif } if (mode == POLLED) mca_fill_freelist(); if (recoverablep != NULL) *recoverablep = recoverable; return (count); } /* * Scan the machine check banks on all CPUs by binding to each CPU in * turn. If any of the CPUs contained new machine check records, log * them to the console. */ static void mca_scan_cpus(void *context, int pending) { struct mca_internal *mca; struct thread *td; int count, cpu; mca_fill_freelist(); td = curthread; count = 0; thread_lock(td); CPU_FOREACH(cpu) { sched_bind(td, cpu); thread_unlock(td); count += mca_scan(POLLED, NULL); thread_lock(td); sched_unbind(td); } thread_unlock(td); if (count != 0) { mtx_lock_spin(&mca_lock); STAILQ_FOREACH(mca, &mca_records, link) { if (!mca->logged) { mca->logged = 1; mca_log(&mca->rec); } } mtx_unlock_spin(&mca_lock); } } static void mca_periodic_scan(void *arg) { taskqueue_enqueue(mca_tq, &mca_scan_task); callout_reset(&mca_timer, mca_ticks * hz, mca_periodic_scan, NULL); } static int sysctl_mca_scan(SYSCTL_HANDLER_ARGS) { int error, i; i = 0; error = sysctl_handle_int(oidp, &i, 0, req); if (error) return (error); if (i) taskqueue_enqueue(mca_tq, &mca_scan_task); return (0); } static void mca_createtq(void *dummy) { if (mca_banks <= 0) return; mca_tq = taskqueue_create_fast("mca", M_WAITOK, taskqueue_thread_enqueue, &mca_tq); taskqueue_start_threads(&mca_tq, 1, PI_SWI(SWI_TQ), "mca taskq"); /* CMCIs during boot may have claimed items from the freelist. */ mca_fill_freelist(); } SYSINIT(mca_createtq, SI_SUB_CONFIGURE, SI_ORDER_ANY, mca_createtq, NULL); static void mca_startup(void *dummy) { if (mca_banks <= 0) return; callout_reset(&mca_timer, mca_ticks * hz, mca_periodic_scan, NULL); } #ifdef EARLY_AP_STARTUP SYSINIT(mca_startup, SI_SUB_KICK_SCHEDULER, SI_ORDER_ANY, mca_startup, NULL); #else SYSINIT(mca_startup, SI_SUB_SMP, SI_ORDER_ANY, mca_startup, NULL); #endif #ifdef DEV_APIC static void cmci_setup(void) { int i; cmc_state = malloc((mp_maxid + 1) * sizeof(struct cmc_state *), M_MCA, M_WAITOK); for (i = 0; i <= mp_maxid; i++) cmc_state[i] = malloc(sizeof(struct cmc_state) * mca_banks, M_MCA, M_WAITOK | M_ZERO); SYSCTL_ADD_PROC(NULL, SYSCTL_STATIC_CHILDREN(_hw_mca), OID_AUTO, "cmc_throttle", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, &cmc_throttle, 0, sysctl_positive_int, "I", "Interval in seconds to throttle corrected MC interrupts"); } static void amd_thresholding_setup(void) { int i; amd_et_state = malloc((mp_maxid + 1) * sizeof(struct amd_et_state *), M_MCA, M_WAITOK); for (i = 0; i <= mp_maxid; i++) amd_et_state[i] = malloc(sizeof(struct amd_et_state) * mca_banks, M_MCA, M_WAITOK | M_ZERO); SYSCTL_ADD_PROC(NULL, SYSCTL_STATIC_CHILDREN(_hw_mca), OID_AUTO, "cmc_throttle", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, &cmc_throttle, 0, sysctl_positive_int, "I", "Interval in seconds to throttle corrected MC interrupts"); } #endif static void mca_setup(uint64_t mcg_cap) { /* * On AMD Family 10h processors, unless logging of level one TLB * parity (L1TP) errors is disabled, enable the recommended workaround * for Erratum 383. */ if (cpu_vendor_id == CPU_VENDOR_AMD && CPUID_TO_FAMILY(cpu_id) == 0x10 && amd10h_L1TP) workaround_erratum383 = 1; mca_banks = mcg_cap & MCG_CAP_COUNT; mtx_init(&mca_lock, "mca", NULL, MTX_SPIN); STAILQ_INIT(&mca_records); TASK_INIT(&mca_scan_task, 0, mca_scan_cpus, NULL); callout_init(&mca_timer, 1); STAILQ_INIT(&mca_freelist); TASK_INIT(&mca_refill_task, 0, mca_refill, NULL); mca_fill_freelist(); SYSCTL_ADD_INT(NULL, SYSCTL_STATIC_CHILDREN(_hw_mca), OID_AUTO, "count", CTLFLAG_RD, (int *)(uintptr_t)&mca_count, 0, "Record count"); SYSCTL_ADD_PROC(NULL, SYSCTL_STATIC_CHILDREN(_hw_mca), OID_AUTO, "interval", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, &mca_ticks, 0, sysctl_positive_int, "I", "Periodic interval in seconds to scan for machine checks"); SYSCTL_ADD_NODE(NULL, SYSCTL_STATIC_CHILDREN(_hw_mca), OID_AUTO, "records", CTLFLAG_RD, sysctl_mca_records, "Machine check records"); SYSCTL_ADD_PROC(NULL, SYSCTL_STATIC_CHILDREN(_hw_mca), OID_AUTO, "force_scan", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, 0, sysctl_mca_scan, "I", "Force an immediate scan for machine checks"); #ifdef DEV_APIC if (cmci_supported(mcg_cap)) cmci_setup(); else if (amd_thresholding_supported()) amd_thresholding_setup(); #endif } #ifdef DEV_APIC /* * See if we should monitor CMCI for this bank. If CMCI_EN is already * set in MC_CTL2, then another CPU is responsible for this bank, so * ignore it. If CMCI_EN returns zero after being set, then this bank * does not support CMCI_EN. If this CPU sets CMCI_EN, then it should * now monitor this bank. */ static void cmci_monitor(int i) { struct cmc_state *cc; uint64_t ctl; KASSERT(i < mca_banks, ("CPU %d has more MC banks", PCPU_GET(cpuid))); ctl = rdmsr(MSR_MC_CTL2(i)); if (ctl & MC_CTL2_CMCI_EN) /* Already monitored by another CPU. */ return; /* Set the threshold to one event for now. */ ctl &= ~MC_CTL2_THRESHOLD; ctl |= MC_CTL2_CMCI_EN | 1; wrmsr(MSR_MC_CTL2(i), ctl); ctl = rdmsr(MSR_MC_CTL2(i)); if (!(ctl & MC_CTL2_CMCI_EN)) /* This bank does not support CMCI. */ return; cc = &cmc_state[PCPU_GET(cpuid)][i]; /* Determine maximum threshold. */ ctl &= ~MC_CTL2_THRESHOLD; ctl |= 0x7fff; wrmsr(MSR_MC_CTL2(i), ctl); ctl = rdmsr(MSR_MC_CTL2(i)); cc->max_threshold = ctl & MC_CTL2_THRESHOLD; /* Start off with a threshold of 1. */ ctl &= ~MC_CTL2_THRESHOLD; ctl |= 1; wrmsr(MSR_MC_CTL2(i), ctl); /* Mark this bank as monitored. */ PCPU_SET(cmci_mask, PCPU_GET(cmci_mask) | 1 << i); } /* * For resume, reset the threshold for any banks we monitor back to * one and throw away the timestamp of the last interrupt. */ static void cmci_resume(int i) { struct cmc_state *cc; uint64_t ctl; KASSERT(i < mca_banks, ("CPU %d has more MC banks", PCPU_GET(cpuid))); /* Ignore banks not monitored by this CPU. */ if (!(PCPU_GET(cmci_mask) & 1 << i)) return; cc = &cmc_state[PCPU_GET(cpuid)][i]; cc->last_intr = 0; ctl = rdmsr(MSR_MC_CTL2(i)); ctl &= ~MC_CTL2_THRESHOLD; ctl |= MC_CTL2_CMCI_EN | 1; wrmsr(MSR_MC_CTL2(i), ctl); } /* * Apply an AMD ET configuration to the corresponding MSR. */ static void amd_thresholding_start(struct amd_et_state *cc, int bank) { uint64_t misc; KASSERT(amd_elvt >= 0, ("ELVT offset is not set")); misc = rdmsr(MSR_MC_MISC(bank)); misc &= ~MC_MISC_AMD_INT_MASK; misc |= MC_MISC_AMD_INT_LVT; misc &= ~MC_MISC_AMD_LVT_MASK; misc |= (uint64_t)amd_elvt << MC_MISC_AMD_LVT_SHIFT; misc &= ~MC_MISC_AMD_CNT_MASK; misc |= (uint64_t)(MC_MISC_AMD_CNT_MAX - cc->cur_threshold) << MC_MISC_AMD_CNT_SHIFT; misc &= ~MC_MISC_AMD_OVERFLOW; misc |= MC_MISC_AMD_CNTEN; wrmsr(MSR_MC_MISC(bank), misc); } static void amd_thresholding_monitor(int i) { struct amd_et_state *cc; uint64_t misc; /* * Kludge: On 10h, banks after 4 are not thresholding but also may have * bogus Valid bits. Skip them. This is definitely fixed in 15h, but * I have not investigated whether it is fixed in earlier models. */ if (CPUID_TO_FAMILY(cpu_id) < 0x15 && i >= 5) return; /* The counter must be valid and present. */ misc = rdmsr(MSR_MC_MISC(i)); if ((misc & (MC_MISC_AMD_VAL | MC_MISC_AMD_CNTP)) != (MC_MISC_AMD_VAL | MC_MISC_AMD_CNTP)) return; /* The register should not be locked. */ if ((misc & MC_MISC_AMD_LOCK) != 0) { if (bootverbose) printf("%s: 0x%jx: Bank %d: locked\n", __func__, (uintmax_t)misc, i); return; } /* * If counter is enabled then either the firmware or another CPU * has already claimed it. */ if ((misc & MC_MISC_AMD_CNTEN) != 0) { if (bootverbose) printf("%s: 0x%jx: Bank %d: already enabled\n", __func__, (uintmax_t)misc, i); return; } /* * Configure an Extended Interrupt LVT register for reporting * counter overflows if that feature is supported and the first * extended register is available. */ amd_elvt = lapic_enable_mca_elvt(); if (amd_elvt < 0) { printf("%s: Bank %d: lapic enable mca elvt failed: %d\n", __func__, i, amd_elvt); return; } /* Re-use Intel CMC support infrastructure. */ if (bootverbose) printf("%s: Starting AMD thresholding on bank %d\n", __func__, i); cc = &amd_et_state[PCPU_GET(cpuid)][i]; cc->cur_threshold = 1; amd_thresholding_start(cc, i); /* Mark this bank as monitored. */ PCPU_SET(cmci_mask, PCPU_GET(cmci_mask) | 1 << i); } static void amd_thresholding_resume(int i) { struct amd_et_state *cc; KASSERT(i < mca_banks, ("CPU %d has more MC banks", PCPU_GET(cpuid))); /* Ignore banks not monitored by this CPU. */ if (!(PCPU_GET(cmci_mask) & 1 << i)) return; cc = &amd_et_state[PCPU_GET(cpuid)][i]; cc->last_intr = 0; cc->cur_threshold = 1; amd_thresholding_start(cc, i); } #endif /* * Initializes per-CPU machine check registers and enables corrected * machine check interrupts. */ static void _mca_init(int boot) { uint64_t mcg_cap; uint64_t ctl, mask; int i, skip, family; family = CPUID_TO_FAMILY(cpu_id); /* MCE is required. */ if (!mca_enabled || !(cpu_feature & CPUID_MCE)) return; if (cpu_feature & CPUID_MCA) { if (boot) PCPU_SET(cmci_mask, 0); mcg_cap = rdmsr(MSR_MCG_CAP); if (mcg_cap & MCG_CAP_CTL_P) /* Enable MCA features. */ wrmsr(MSR_MCG_CTL, MCG_CTL_ENABLE); if (PCPU_GET(cpuid) == 0 && boot) mca_setup(mcg_cap); /* * Disable logging of level one TLB parity (L1TP) errors by * the data cache as an alternative workaround for AMD Family * 10h Erratum 383. Unlike the recommended workaround, there * is no performance penalty to this workaround. However, * L1TP errors will go unreported. */ if (cpu_vendor_id == CPU_VENDOR_AMD && family == 0x10 && !amd10h_L1TP) { mask = rdmsr(MSR_MC0_CTL_MASK); if ((mask & (1UL << 5)) == 0) wrmsr(MSR_MC0_CTL_MASK, mask | (1UL << 5)); } for (i = 0; i < (mcg_cap & MCG_CAP_COUNT); i++) { /* By default enable logging of all errors. */ ctl = 0xffffffffffffffffUL; skip = 0; if (cpu_vendor_id == CPU_VENDOR_INTEL) { /* * For P6 models before Nehalem MC0_CTL is * always enabled and reserved. */ if (i == 0 && family == 0x6 && CPUID_TO_MODEL(cpu_id) < 0x1a) skip = 1; } else if (cpu_vendor_id == CPU_VENDOR_AMD) { /* BKDG for Family 10h: unset GartTblWkEn. */ if (i == MC_AMDNB_BANK && family >= 0xf) ctl &= ~(1UL << 10); } if (!skip) wrmsr(MSR_MC_CTL(i), ctl); #ifdef DEV_APIC if (cmci_supported(mcg_cap)) { if (boot) cmci_monitor(i); else cmci_resume(i); } else if (amd_thresholding_supported()) { if (boot) amd_thresholding_monitor(i); else amd_thresholding_resume(i); } #endif /* Clear all errors. */ wrmsr(MSR_MC_STATUS(i), 0); } #ifdef DEV_APIC if (!amd_thresholding_supported() && PCPU_GET(cmci_mask) != 0 && boot) lapic_enable_cmc(); #endif } load_cr4(rcr4() | CR4_MCE); } /* Must be executed on each CPU during boot. */ void mca_init(void) { _mca_init(1); } /* Must be executed on each CPU during resume. */ void mca_resume(void) { _mca_init(0); } /* * The machine check registers for the BSP cannot be initialized until * the local APIC is initialized. This happens at SI_SUB_CPU, * SI_ORDER_SECOND. */ static void mca_init_bsp(void *arg __unused) { mca_init(); } SYSINIT(mca_init_bsp, SI_SUB_CPU, SI_ORDER_ANY, mca_init_bsp, NULL); /* Called when a machine check exception fires. */ void mca_intr(void) { uint64_t mcg_status; int recoverable, count; if (!(cpu_feature & CPUID_MCA)) { /* * Just print the values of the old Pentium registers * and panic. */ printf("MC Type: 0x%jx Address: 0x%jx\n", (uintmax_t)rdmsr(MSR_P5_MC_TYPE), (uintmax_t)rdmsr(MSR_P5_MC_ADDR)); panic("Machine check"); } /* Scan the banks and check for any non-recoverable errors. */ count = mca_scan(MCE, &recoverable); mcg_status = rdmsr(MSR_MCG_STATUS); if (!(mcg_status & MCG_STATUS_RIPV)) recoverable = 0; if (!recoverable) { /* * Only panic if the error was detected local to this CPU. * Some errors will assert a machine check on all CPUs, but * only certain CPUs will find a valid bank to log. */ while (count == 0) cpu_spinwait(); panic("Unrecoverable machine check exception"); } /* Clear MCIP. */ wrmsr(MSR_MCG_STATUS, mcg_status & ~MCG_STATUS_MCIP); } #ifdef DEV_APIC /* Called for a CMCI (correctable machine check interrupt). */ void cmc_intr(void) { struct mca_internal *mca; int count; /* * Serialize MCA bank scanning to prevent collisions from * sibling threads. */ count = mca_scan(CMCI, NULL); /* If we found anything, log them to the console. */ if (count != 0) { mtx_lock_spin(&mca_lock); STAILQ_FOREACH(mca, &mca_records, link) { if (!mca->logged) { mca->logged = 1; mca_log(&mca->rec); } } mtx_unlock_spin(&mca_lock); } } #endif Index: head/sys/x86/x86/mptable.c =================================================================== --- head/sys/x86/x86/mptable.c (revision 326262) +++ head/sys/x86/x86/mptable.c (revision 326263) @@ -1,1258 +1,1260 @@ /*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * * Copyright (c) 2003 John Baldwin * Copyright (c) 1996, by Steve Passe * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. The name of the developer may NOT be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_mptable_force_htt.h" #include #include #include #include #include #include #include #ifdef NEW_PCIB #include #endif #include #include #include #include #ifdef NEW_PCIB #include #endif #include #include #include #include #include #include #ifdef NEW_PCIB #include #endif #include /* string defined by the Intel MP Spec as identifying the MP table */ #define MP_SIG 0x5f504d5f /* _MP_ */ #ifdef __amd64__ #define MAX_LAPIC_ID 63 /* Max local APIC ID for HTT fixup */ #else #define MAX_LAPIC_ID 31 /* Max local APIC ID for HTT fixup */ #endif #define BIOS_BASE (0xf0000) #define BIOS_SIZE (0x10000) #define BIOS_COUNT (BIOS_SIZE/4) typedef void mptable_entry_handler(u_char *entry, void *arg); typedef void mptable_extended_entry_handler(ext_entry_ptr entry, void *arg); /* descriptions of MP table entries */ typedef struct BASETABLE_ENTRY { uint8_t type; uint8_t length; uint8_t name[16]; } basetable_entry; static basetable_entry basetable_entry_types[] = { {0, 20, "Processor"}, {1, 8, "Bus"}, {2, 8, "I/O APIC"}, {3, 8, "I/O INT"}, {4, 8, "Local INT"} }; typedef struct BUSDATA { u_char bus_id; enum busTypes bus_type; } bus_datum; typedef struct INTDATA { u_char int_type; u_short int_flags; u_char src_bus_id; u_char src_bus_irq; u_char dst_apic_id; u_char dst_apic_int; u_char int_vector; } io_int, local_int; typedef struct BUSTYPENAME { u_char type; char name[7]; } bus_type_name; /* From MP spec v1.4, table 4-8. */ static bus_type_name bus_type_table[] = { {UNKNOWN_BUSTYPE, "CBUS "}, {UNKNOWN_BUSTYPE, "CBUSII"}, {EISA, "EISA "}, {UNKNOWN_BUSTYPE, "FUTURE"}, {UNKNOWN_BUSTYPE, "INTERN"}, {ISA, "ISA "}, {UNKNOWN_BUSTYPE, "MBI "}, {UNKNOWN_BUSTYPE, "MBII "}, {MCA, "MCA "}, {UNKNOWN_BUSTYPE, "MPI "}, {UNKNOWN_BUSTYPE, "MPSA "}, {UNKNOWN_BUSTYPE, "NUBUS "}, {PCI, "PCI "}, {UNKNOWN_BUSTYPE, "PCMCIA"}, {UNKNOWN_BUSTYPE, "TC "}, {UNKNOWN_BUSTYPE, "VL "}, {UNKNOWN_BUSTYPE, "VME "}, {UNKNOWN_BUSTYPE, "XPRESS"} }; /* From MP spec v1.4, table 5-1. */ static int default_data[7][5] = { /* nbus, id0, type0, id1, type1 */ {1, 0, ISA, 255, NOBUS}, {1, 0, EISA, 255, NOBUS}, {1, 0, EISA, 255, NOBUS}, {1, 0, MCA, 255, NOBUS}, {2, 0, ISA, 1, PCI}, {2, 0, EISA, 1, PCI}, {2, 0, MCA, 1, PCI} }; struct pci_probe_table_args { u_char bus; u_char found; }; struct pci_route_interrupt_args { u_char bus; /* Source bus. */ u_char irq; /* Source slot:pin. */ int vector; /* Return value. */ }; static mpfps_t mpfps; static mpcth_t mpct; static ext_entry_ptr mpet; static void *ioapics[IOAPIC_MAX_ID + 1]; static bus_datum *busses; static int mptable_nioapics, mptable_nbusses, mptable_maxbusid; static int pci0 = -1; static MALLOC_DEFINE(M_MPTABLE, "mptable", "MP Table Items"); static enum intr_polarity conforming_polarity(u_char src_bus, u_char src_bus_irq); static enum intr_trigger conforming_trigger(u_char src_bus, u_char src_bus_irq); static enum intr_polarity intentry_polarity(int_entry_ptr intr); static enum intr_trigger intentry_trigger(int_entry_ptr intr); static int lookup_bus_type(char *name); static void mptable_count_items(void); static void mptable_count_items_handler(u_char *entry, void *arg); #ifdef MPTABLE_FORCE_HTT static void mptable_hyperthread_fixup(u_int id_mask); #endif static void mptable_parse_apics_and_busses(void); static void mptable_parse_apics_and_busses_handler(u_char *entry, void *arg); static void mptable_parse_default_config_ints(void); static void mptable_parse_ints(void); static void mptable_parse_ints_handler(u_char *entry, void *arg); static void mptable_parse_io_int(int_entry_ptr intr); static void mptable_parse_local_int(int_entry_ptr intr); static void mptable_pci_probe_table_handler(u_char *entry, void *arg); static void mptable_pci_route_interrupt_handler(u_char *entry, void *arg); static void mptable_pci_setup(void); static int mptable_probe(void); static int mptable_probe_cpus(void); static void mptable_probe_cpus_handler(u_char *entry, void *arg __unused); static void mptable_setup_cpus_handler(u_char *entry, void *arg __unused); static void mptable_register(void *dummy); static int mptable_setup_local(void); static int mptable_setup_io(void); #ifdef NEW_PCIB static void mptable_walk_extended_table( mptable_extended_entry_handler *handler, void *arg); #endif static void mptable_walk_table(mptable_entry_handler *handler, void *arg); static int search_for_sig(u_int32_t target, int count); static struct apic_enumerator mptable_enumerator = { "MPTable", mptable_probe, mptable_probe_cpus, mptable_setup_local, mptable_setup_io }; /* * look for the MP spec signature */ static int search_for_sig(u_int32_t target, int count) { int x; u_int32_t *addr = (u_int32_t *) (KERNBASE + target); for (x = 0; x < count; x += 4) if (addr[x] == MP_SIG) /* make array index a byte index */ return (target + (x * sizeof(u_int32_t))); return (-1); } static int lookup_bus_type(char *name) { int x; for (x = 0; x < MAX_BUSTYPE; ++x) if (strncmp(bus_type_table[x].name, name, 6) == 0) return (bus_type_table[x].type); return (UNKNOWN_BUSTYPE); } /* * Look for an Intel MP spec table (ie, SMP capable hardware). */ static int mptable_probe(void) { int x; u_long segment; u_int32_t target; /* see if EBDA exists */ if ((segment = (u_long) * (u_short *) (KERNBASE + 0x40e)) != 0) { /* search first 1K of EBDA */ target = (u_int32_t) (segment << 4); if ((x = search_for_sig(target, 1024 / 4)) >= 0) goto found; } else { /* last 1K of base memory, effective 'top of base' passed in */ target = (u_int32_t) ((basemem * 1024) - 0x400); if ((x = search_for_sig(target, 1024 / 4)) >= 0) goto found; } /* search the BIOS */ target = (u_int32_t) BIOS_BASE; if ((x = search_for_sig(target, BIOS_COUNT)) >= 0) goto found; /* nothing found */ return (ENXIO); found: mpfps = (mpfps_t)(KERNBASE + x); /* Map in the configuration table if it exists. */ if (mpfps->config_type != 0) { if (bootverbose) printf( "MP Table version 1.%d found using Default Configuration %d\n", mpfps->spec_rev, mpfps->config_type); if (mpfps->config_type != 5 && mpfps->config_type != 6) { printf( "MP Table Default Configuration %d is unsupported\n", mpfps->config_type); return (ENXIO); } mpct = NULL; } else { if ((uintptr_t)mpfps->pap >= 1024 * 1024) { printf("%s: Unable to map MP Configuration Table\n", __func__); return (ENXIO); } mpct = (mpcth_t)(KERNBASE + (uintptr_t)mpfps->pap); if (mpct->base_table_length + (uintptr_t)mpfps->pap >= 1024 * 1024) { printf("%s: Unable to map end of MP Config Table\n", __func__); return (ENXIO); } if (mpct->extended_table_length != 0 && mpct->extended_table_length + mpct->base_table_length + (uintptr_t)mpfps->pap < 1024 * 1024) mpet = (ext_entry_ptr)((char *)mpct + mpct->base_table_length); if (mpct->signature[0] != 'P' || mpct->signature[1] != 'C' || mpct->signature[2] != 'M' || mpct->signature[3] != 'P') { printf("%s: MP Config Table has bad signature: %c%c%c%c\n", __func__, mpct->signature[0], mpct->signature[1], mpct->signature[2], mpct->signature[3]); return (ENXIO); } if (bootverbose) printf( "MP Configuration Table version 1.%d found at %p\n", mpct->spec_rev, mpct); } return (-100); } /* * Run through the MP table enumerating CPUs. */ static int mptable_probe_cpus(void) { u_int cpu_mask; /* Is this a pre-defined config? */ if (mpfps->config_type != 0) { #ifdef SMP mp_ncpus = 2; mp_maxid = 1; #endif max_apic_id = 1; } else { mptable_walk_table(mptable_probe_cpus_handler, &cpu_mask); } return (0); } /* * Initialize the local APIC on the BSP. */ static int mptable_setup_local(void) { vm_paddr_t addr; u_int cpu_mask; /* Is this a pre-defined config? */ printf("MPTable: <"); if (mpfps->config_type != 0) { lapic_create(0, 1); lapic_create(1, 0); addr = DEFAULT_APIC_BASE; printf("Default Configuration %d", mpfps->config_type); } else { cpu_mask = 0; mptable_walk_table(mptable_setup_cpus_handler, &cpu_mask); #ifdef MPTABLE_FORCE_HTT mptable_hyperthread_fixup(cpu_mask); #endif addr = mpct->apic_address; printf("%.*s %.*s", (int)sizeof(mpct->oem_id), mpct->oem_id, (int)sizeof(mpct->product_id), mpct->product_id); } printf(">\n"); lapic_init(addr); return (0); } /* * Run through the MP table enumerating I/O APICs. */ static int mptable_setup_io(void) { int i; u_char byte; /* First, we count individual items and allocate arrays. */ mptable_count_items(); busses = malloc((mptable_maxbusid + 1) * sizeof(bus_datum), M_MPTABLE, M_WAITOK); for (i = 0; i <= mptable_maxbusid; i++) busses[i].bus_type = NOBUS; /* Second, we run through adding I/O APIC's and buses. */ mptable_parse_apics_and_busses(); /* Third, we run through the table tweaking interrupt sources. */ mptable_parse_ints(); /* Fourth, we register all the I/O APIC's. */ for (i = 0; i <= IOAPIC_MAX_ID; i++) if (ioapics[i] != NULL) ioapic_register(ioapics[i]); /* Fifth, we setup data structures to handle PCI interrupt routing. */ mptable_pci_setup(); /* Finally, we throw the switch to enable the I/O APIC's. */ if (mpfps->mpfb2 & MPFB2_IMCR_PRESENT) { outb(0x22, 0x70); /* select IMCR */ byte = inb(0x23); /* current contents */ byte |= 0x01; /* mask external INTR */ outb(0x23, byte); /* disconnect 8259s/NMI */ } return (0); } static void mptable_register(void *dummy __unused) { apic_register_enumerator(&mptable_enumerator); } SYSINIT(mptable_register, SI_SUB_TUNABLES - 1, SI_ORDER_FIRST, mptable_register, NULL); /* * Call the handler routine for each entry in the MP config base table. */ static void mptable_walk_table(mptable_entry_handler *handler, void *arg) { u_int i; u_char *entry; entry = (u_char *)(mpct + 1); for (i = 0; i < mpct->entry_count; i++) { switch (*entry) { case MPCT_ENTRY_PROCESSOR: case MPCT_ENTRY_IOAPIC: case MPCT_ENTRY_BUS: case MPCT_ENTRY_INT: case MPCT_ENTRY_LOCAL_INT: break; default: panic("%s: Unknown MP Config Entry %d\n", __func__, (int)*entry); } handler(entry, arg); entry += basetable_entry_types[*entry].length; } } #ifdef NEW_PCIB /* * Call the handler routine for each entry in the MP config extended * table. */ static void mptable_walk_extended_table(mptable_extended_entry_handler *handler, void *arg) { ext_entry_ptr end, entry; if (mpet == NULL) return; entry = mpet; end = (ext_entry_ptr)((char *)mpet + mpct->extended_table_length); while (entry < end) { handler(entry, arg); entry = (ext_entry_ptr)((char *)entry + entry->length); } } #endif static void mptable_probe_cpus_handler(u_char *entry, void *arg) { proc_entry_ptr proc; switch (*entry) { case MPCT_ENTRY_PROCESSOR: proc = (proc_entry_ptr)entry; if (proc->cpu_flags & PROCENTRY_FLAG_EN && proc->apic_id < MAX_LAPIC_ID && mp_ncpus < MAXCPU) { #ifdef SMP mp_ncpus++; mp_maxid = mp_ncpus - 1; #endif max_apic_id = max(max_apic_id, proc->apic_id); } break; } } static void mptable_setup_cpus_handler(u_char *entry, void *arg) { proc_entry_ptr proc; u_int *cpu_mask; switch (*entry) { case MPCT_ENTRY_PROCESSOR: proc = (proc_entry_ptr)entry; if (proc->cpu_flags & PROCENTRY_FLAG_EN) { lapic_create(proc->apic_id, proc->cpu_flags & PROCENTRY_FLAG_BP); if (proc->apic_id < MAX_LAPIC_ID) { cpu_mask = (u_int *)arg; *cpu_mask |= (1ul << proc->apic_id); } } break; } } static void mptable_count_items_handler(u_char *entry, void *arg __unused) { io_apic_entry_ptr apic; bus_entry_ptr bus; switch (*entry) { case MPCT_ENTRY_BUS: bus = (bus_entry_ptr)entry; mptable_nbusses++; if (bus->bus_id > mptable_maxbusid) mptable_maxbusid = bus->bus_id; break; case MPCT_ENTRY_IOAPIC: apic = (io_apic_entry_ptr)entry; if (apic->apic_flags & IOAPICENTRY_FLAG_EN) mptable_nioapics++; break; } } /* * Count items in the table. */ static void mptable_count_items(void) { /* Is this a pre-defined config? */ if (mpfps->config_type != 0) { mptable_nioapics = 1; switch (mpfps->config_type) { case 1: case 2: case 3: case 4: mptable_nbusses = 1; break; case 5: case 6: case 7: mptable_nbusses = 2; break; default: panic("Unknown pre-defined MP Table config type %d", mpfps->config_type); } mptable_maxbusid = mptable_nbusses - 1; } else mptable_walk_table(mptable_count_items_handler, NULL); } /* * Add a bus or I/O APIC from an entry in the table. */ static void mptable_parse_apics_and_busses_handler(u_char *entry, void *arg __unused) { io_apic_entry_ptr apic; bus_entry_ptr bus; enum busTypes bus_type; int i; switch (*entry) { case MPCT_ENTRY_BUS: bus = (bus_entry_ptr)entry; bus_type = lookup_bus_type(bus->bus_type); if (bus_type == UNKNOWN_BUSTYPE) { printf("MPTable: Unknown bus %d type \"", bus->bus_id); for (i = 0; i < 6; i++) printf("%c", bus->bus_type[i]); printf("\"\n"); } busses[bus->bus_id].bus_id = bus->bus_id; busses[bus->bus_id].bus_type = bus_type; break; case MPCT_ENTRY_IOAPIC: apic = (io_apic_entry_ptr)entry; if (!(apic->apic_flags & IOAPICENTRY_FLAG_EN)) break; if (apic->apic_id > IOAPIC_MAX_ID) panic("%s: I/O APIC ID %d too high", __func__, apic->apic_id); if (ioapics[apic->apic_id] != NULL) panic("%s: Double APIC ID %d", __func__, apic->apic_id); ioapics[apic->apic_id] = ioapic_create(apic->apic_address, apic->apic_id, -1); break; default: break; } } /* * Enumerate I/O APIC's and buses. */ static void mptable_parse_apics_and_busses(void) { /* Is this a pre-defined config? */ if (mpfps->config_type != 0) { ioapics[2] = ioapic_create(DEFAULT_IO_APIC_BASE, 2, 0); busses[0].bus_id = 0; busses[0].bus_type = default_data[mpfps->config_type - 1][2]; if (mptable_nbusses > 1) { busses[1].bus_id = 1; busses[1].bus_type = default_data[mpfps->config_type - 1][4]; } } else mptable_walk_table(mptable_parse_apics_and_busses_handler, NULL); } /* * Determine conforming polarity for a given bus type. */ static enum intr_polarity conforming_polarity(u_char src_bus, u_char src_bus_irq) { KASSERT(src_bus <= mptable_maxbusid, ("bus id %d too large", src_bus)); switch (busses[src_bus].bus_type) { case ISA: case EISA: return (INTR_POLARITY_HIGH); case PCI: return (INTR_POLARITY_LOW); default: panic("%s: unknown bus type %d", __func__, busses[src_bus].bus_type); } } /* * Determine conforming trigger for a given bus type. */ static enum intr_trigger conforming_trigger(u_char src_bus, u_char src_bus_irq) { KASSERT(src_bus <= mptable_maxbusid, ("bus id %d too large", src_bus)); switch (busses[src_bus].bus_type) { case ISA: if (elcr_found) return (elcr_read_trigger(src_bus_irq)); else return (INTR_TRIGGER_EDGE); case PCI: return (INTR_TRIGGER_LEVEL); case EISA: KASSERT(src_bus_irq < 16, ("Invalid EISA IRQ %d", src_bus_irq)); KASSERT(elcr_found, ("Missing ELCR")); return (elcr_read_trigger(src_bus_irq)); default: panic("%s: unknown bus type %d", __func__, busses[src_bus].bus_type); } } static enum intr_polarity intentry_polarity(int_entry_ptr intr) { switch (intr->int_flags & INTENTRY_FLAGS_POLARITY) { case INTENTRY_FLAGS_POLARITY_CONFORM: return (conforming_polarity(intr->src_bus_id, intr->src_bus_irq)); case INTENTRY_FLAGS_POLARITY_ACTIVEHI: return (INTR_POLARITY_HIGH); case INTENTRY_FLAGS_POLARITY_ACTIVELO: return (INTR_POLARITY_LOW); default: panic("Bogus interrupt flags"); } } static enum intr_trigger intentry_trigger(int_entry_ptr intr) { switch (intr->int_flags & INTENTRY_FLAGS_TRIGGER) { case INTENTRY_FLAGS_TRIGGER_CONFORM: return (conforming_trigger(intr->src_bus_id, intr->src_bus_irq)); case INTENTRY_FLAGS_TRIGGER_EDGE: return (INTR_TRIGGER_EDGE); case INTENTRY_FLAGS_TRIGGER_LEVEL: return (INTR_TRIGGER_LEVEL); default: panic("Bogus interrupt flags"); } } /* * Parse an interrupt entry for an I/O interrupt routed to a pin on an I/O APIC. */ static void mptable_parse_io_int(int_entry_ptr intr) { void *ioapic; u_int pin, apic_id; apic_id = intr->dst_apic_id; if (intr->dst_apic_id == 0xff) { /* * An APIC ID of 0xff means that the interrupt is connected * to the specified pin on all I/O APICs in the system. If * there is only one I/O APIC, then use that APIC to route * the interrupts. If there is more than one I/O APIC, then * punt. */ if (mptable_nioapics == 1) { apic_id = 0; while (ioapics[apic_id] == NULL) apic_id++; } else { printf( "MPTable: Ignoring global interrupt entry for pin %d\n", intr->dst_apic_int); return; } } if (apic_id > IOAPIC_MAX_ID) { printf("MPTable: Ignoring interrupt entry for ioapic%d\n", intr->dst_apic_id); return; } ioapic = ioapics[apic_id]; if (ioapic == NULL) { printf( "MPTable: Ignoring interrupt entry for missing ioapic%d\n", apic_id); return; } pin = intr->dst_apic_int; switch (intr->int_type) { case INTENTRY_TYPE_INT: switch (busses[intr->src_bus_id].bus_type) { case NOBUS: panic("interrupt from missing bus"); case ISA: case EISA: if (busses[intr->src_bus_id].bus_type == ISA) ioapic_set_bus(ioapic, pin, APIC_BUS_ISA); else ioapic_set_bus(ioapic, pin, APIC_BUS_EISA); if (intr->src_bus_irq == pin) break; ioapic_remap_vector(ioapic, pin, intr->src_bus_irq); if (ioapic_get_vector(ioapic, intr->src_bus_irq) == intr->src_bus_irq) ioapic_disable_pin(ioapic, intr->src_bus_irq); break; case PCI: ioapic_set_bus(ioapic, pin, APIC_BUS_PCI); break; default: ioapic_set_bus(ioapic, pin, APIC_BUS_UNKNOWN); break; } break; case INTENTRY_TYPE_NMI: ioapic_set_nmi(ioapic, pin); break; case INTENTRY_TYPE_SMI: ioapic_set_smi(ioapic, pin); break; case INTENTRY_TYPE_EXTINT: ioapic_set_extint(ioapic, pin); break; default: panic("%s: invalid interrupt entry type %d\n", __func__, intr->int_type); } if (intr->int_type == INTENTRY_TYPE_INT || (intr->int_flags & INTENTRY_FLAGS_TRIGGER) != INTENTRY_FLAGS_TRIGGER_CONFORM) ioapic_set_triggermode(ioapic, pin, intentry_trigger(intr)); if (intr->int_type == INTENTRY_TYPE_INT || (intr->int_flags & INTENTRY_FLAGS_POLARITY) != INTENTRY_FLAGS_POLARITY_CONFORM) ioapic_set_polarity(ioapic, pin, intentry_polarity(intr)); } /* * Parse an interrupt entry for a local APIC LVT pin. */ static void mptable_parse_local_int(int_entry_ptr intr) { u_int apic_id, pin; if (intr->dst_apic_id == 0xff) apic_id = APIC_ID_ALL; else apic_id = intr->dst_apic_id; if (intr->dst_apic_int == 0) pin = APIC_LVT_LINT0; else pin = APIC_LVT_LINT1; switch (intr->int_type) { case INTENTRY_TYPE_INT: #if 1 printf( "MPTable: Ignoring vectored local interrupt for LINTIN%d vector %d\n", intr->dst_apic_int, intr->src_bus_irq); return; #else lapic_set_lvt_mode(apic_id, pin, APIC_LVT_DM_FIXED); break; #endif case INTENTRY_TYPE_NMI: lapic_set_lvt_mode(apic_id, pin, APIC_LVT_DM_NMI); break; case INTENTRY_TYPE_SMI: lapic_set_lvt_mode(apic_id, pin, APIC_LVT_DM_SMI); break; case INTENTRY_TYPE_EXTINT: lapic_set_lvt_mode(apic_id, pin, APIC_LVT_DM_EXTINT); break; default: panic("%s: invalid interrupt entry type %d\n", __func__, intr->int_type); } if ((intr->int_flags & INTENTRY_FLAGS_TRIGGER) != INTENTRY_FLAGS_TRIGGER_CONFORM) lapic_set_lvt_triggermode(apic_id, pin, intentry_trigger(intr)); if ((intr->int_flags & INTENTRY_FLAGS_POLARITY) != INTENTRY_FLAGS_POLARITY_CONFORM) lapic_set_lvt_polarity(apic_id, pin, intentry_polarity(intr)); } /* * Parse interrupt entries. */ static void mptable_parse_ints_handler(u_char *entry, void *arg __unused) { int_entry_ptr intr; intr = (int_entry_ptr)entry; switch (*entry) { case MPCT_ENTRY_INT: mptable_parse_io_int(intr); break; case MPCT_ENTRY_LOCAL_INT: mptable_parse_local_int(intr); break; } } /* * Configure interrupt pins for a default configuration. For details see * Table 5-2 in Section 5 of the MP Table specification. */ static void mptable_parse_default_config_ints(void) { struct INTENTRY entry; int pin; /* * All default configs route IRQs from bus 0 to the first 16 pins * of the first I/O APIC with an APIC ID of 2. */ entry.type = MPCT_ENTRY_INT; entry.int_flags = INTENTRY_FLAGS_POLARITY_CONFORM | INTENTRY_FLAGS_TRIGGER_CONFORM; entry.src_bus_id = 0; entry.dst_apic_id = 2; /* Run through all 16 pins. */ for (pin = 0; pin < 16; pin++) { entry.dst_apic_int = pin; switch (pin) { case 0: /* Pin 0 is an ExtINT pin. */ entry.int_type = INTENTRY_TYPE_EXTINT; break; case 2: /* IRQ 0 is routed to pin 2. */ entry.int_type = INTENTRY_TYPE_INT; entry.src_bus_irq = 0; break; default: /* All other pins are identity mapped. */ entry.int_type = INTENTRY_TYPE_INT; entry.src_bus_irq = pin; break; } mptable_parse_io_int(&entry); } /* Certain configs disable certain pins. */ if (mpfps->config_type == 7) ioapic_disable_pin(ioapics[2], 0); if (mpfps->config_type == 2) { ioapic_disable_pin(ioapics[2], 2); ioapic_disable_pin(ioapics[2], 13); } } /* * Configure the interrupt pins */ static void mptable_parse_ints(void) { /* Is this a pre-defined config? */ if (mpfps->config_type != 0) { /* Configure LINT pins. */ lapic_set_lvt_mode(APIC_ID_ALL, APIC_LVT_LINT0, APIC_LVT_DM_EXTINT); lapic_set_lvt_mode(APIC_ID_ALL, APIC_LVT_LINT1, APIC_LVT_DM_NMI); /* Configure I/O APIC pins. */ mptable_parse_default_config_ints(); } else mptable_walk_table(mptable_parse_ints_handler, NULL); } #ifdef MPTABLE_FORCE_HTT /* * Perform a hyperthreading "fix-up" to enumerate any logical CPU's * that aren't already listed in the table. * * XXX: We assume that all of the physical CPUs in the * system have the same number of logical CPUs. * * XXX: We assume that APIC ID's are allocated such that * the APIC ID's for a physical processor are aligned * with the number of logical CPU's in the processor. */ static void mptable_hyperthread_fixup(u_int id_mask) { u_int i, id, logical_cpus; /* Nothing to do if there is no HTT support. */ if ((cpu_feature & CPUID_HTT) == 0) return; logical_cpus = (cpu_procinfo & CPUID_HTT_CORES) >> 16; if (logical_cpus <= 1) return; /* * For each APIC ID of a CPU that is set in the mask, * scan the other candidate APIC ID's for this * physical processor. If any of those ID's are * already in the table, then kill the fixup. */ for (id = 0; id <= MAX_LAPIC_ID; id++) { if ((id_mask & 1 << id) == 0) continue; /* First, make sure we are on a logical_cpus boundary. */ if (id % logical_cpus != 0) return; for (i = id + 1; i < id + logical_cpus; i++) if ((id_mask & 1 << i) != 0) return; } /* * Ok, the ID's checked out, so perform the fixup by * adding the logical CPUs. */ while ((id = ffs(id_mask)) != 0) { id--; for (i = id + 1; i < id + logical_cpus; i++) { if (bootverbose) printf( "MPTable: Adding logical CPU %d from main CPU %d\n", i, id); lapic_create(i, 0); } id_mask &= ~(1 << id); } } #endif /* MPTABLE_FORCE_HTT */ /* * Support code for routing PCI interrupts using the MP Table. */ static void mptable_pci_setup(void) { int i; /* * Find the first pci bus and call it 0. Panic if pci0 is not * bus zero and there are multiple PCI buses. */ for (i = 0; i <= mptable_maxbusid; i++) if (busses[i].bus_type == PCI) { if (pci0 == -1) pci0 = i; else if (pci0 != 0) panic( "MPTable contains multiple PCI buses but no PCI bus 0"); } } static void mptable_pci_probe_table_handler(u_char *entry, void *arg) { struct pci_probe_table_args *args; int_entry_ptr intr; if (*entry != MPCT_ENTRY_INT) return; intr = (int_entry_ptr)entry; args = (struct pci_probe_table_args *)arg; KASSERT(args->bus <= mptable_maxbusid, ("bus %d is too big", args->bus)); KASSERT(busses[args->bus].bus_type == PCI, ("probing for non-PCI bus")); if (intr->src_bus_id == args->bus) args->found = 1; } int mptable_pci_probe_table(int bus) { struct pci_probe_table_args args; if (bus < 0) return (EINVAL); if (mpct == NULL || pci0 == -1 || pci0 + bus > mptable_maxbusid) return (ENXIO); if (busses[pci0 + bus].bus_type != PCI) return (ENXIO); args.bus = pci0 + bus; args.found = 0; mptable_walk_table(mptable_pci_probe_table_handler, &args); if (args.found == 0) return (ENXIO); return (0); } static void mptable_pci_route_interrupt_handler(u_char *entry, void *arg) { struct pci_route_interrupt_args *args; int_entry_ptr intr; int vector; if (*entry != MPCT_ENTRY_INT) return; intr = (int_entry_ptr)entry; args = (struct pci_route_interrupt_args *)arg; if (intr->src_bus_id != args->bus || intr->src_bus_irq != args->irq) return; /* Make sure the APIC maps to a known APIC. */ KASSERT(ioapics[intr->dst_apic_id] != NULL, ("No I/O APIC %d to route interrupt to", intr->dst_apic_id)); /* * Look up the vector for this APIC / pin combination. If we * have previously matched an entry for this PCI IRQ but it * has the same vector as this entry, just return. Otherwise, * we use the vector for this APIC / pin combination. */ vector = ioapic_get_vector(ioapics[intr->dst_apic_id], intr->dst_apic_int); if (args->vector == vector) return; KASSERT(args->vector == -1, ("Multiple IRQs for PCI interrupt %d.%d.INT%c: %d and %d\n", args->bus, args->irq >> 2, 'A' + (args->irq & 0x3), args->vector, vector)); args->vector = vector; } int mptable_pci_route_interrupt(device_t pcib, device_t dev, int pin) { struct pci_route_interrupt_args args; int slot; /* Like ACPI, pin numbers are 0-3, not 1-4. */ pin--; KASSERT(pci0 != -1, ("do not know how to route PCI interrupts")); args.bus = pci_get_bus(dev) + pci0; slot = pci_get_slot(dev); /* * PCI interrupt entries in the MP Table encode both the slot and * pin into the IRQ with the pin being the two least significant * bits, the slot being the next five bits, and the most significant * bit being reserved. */ args.irq = slot << 2 | pin; args.vector = -1; mptable_walk_table(mptable_pci_route_interrupt_handler, &args); if (args.vector < 0) { device_printf(pcib, "unable to route slot %d INT%c\n", slot, 'A' + pin); return (PCI_INVALID_IRQ); } if (bootverbose) device_printf(pcib, "slot %d INT%c routed to irq %d\n", slot, 'A' + pin, args.vector); return (args.vector); } #ifdef NEW_PCIB struct host_res_args { struct mptable_hostb_softc *sc; device_t dev; u_char bus; }; /* * Initialize a Host-PCI bridge so it can restrict resource allocation * requests to the resources it actually decodes according to MP * config table extended entries. */ static void mptable_host_res_handler(ext_entry_ptr entry, void *arg) { struct host_res_args *args; cbasm_entry_ptr cbasm; sas_entry_ptr sas; const char *name; uint64_t start, end; int error, *flagp, flags, type; args = arg; switch (entry->type) { case MPCT_EXTENTRY_SAS: sas = (sas_entry_ptr)entry; if (sas->bus_id != args->bus) break; switch (sas->address_type) { case SASENTRY_TYPE_IO: type = SYS_RES_IOPORT; flags = 0; break; case SASENTRY_TYPE_MEMORY: type = SYS_RES_MEMORY; flags = 0; break; case SASENTRY_TYPE_PREFETCH: type = SYS_RES_MEMORY; flags = RF_PREFETCHABLE; break; default: printf( "MPTable: Unknown systems address space type for bus %u: %d\n", sas->bus_id, sas->address_type); return; } start = sas->address_base; end = sas->address_base + sas->address_length - 1; #ifdef __i386__ if (start > ULONG_MAX) { device_printf(args->dev, "Ignoring %d range above 4GB (%#jx-%#jx)\n", type, (uintmax_t)start, (uintmax_t)end); break; } if (end > ULONG_MAX) { device_printf(args->dev, "Truncating end of %d range above 4GB (%#jx-%#jx)\n", type, (uintmax_t)start, (uintmax_t)end); end = ULONG_MAX; } #endif error = pcib_host_res_decodes(&args->sc->sc_host_res, type, start, end, flags); if (error) panic("Failed to manage %d range (%#jx-%#jx): %d", type, (uintmax_t)start, (uintmax_t)end, error); break; case MPCT_EXTENTRY_CBASM: cbasm = (cbasm_entry_ptr)entry; if (cbasm->bus_id != args->bus) break; switch (cbasm->predefined_range) { case CBASMENTRY_RANGE_ISA_IO: flagp = &args->sc->sc_decodes_isa_io; name = "ISA I/O"; break; case CBASMENTRY_RANGE_VGA_IO: flagp = &args->sc->sc_decodes_vga_io; name = "VGA I/O"; break; default: printf( "MPTable: Unknown compatiblity address space range for bus %u: %d\n", cbasm->bus_id, cbasm->predefined_range); return; } if (*flagp != 0) printf( "MPTable: Duplicate compatibility %s range for bus %u\n", name, cbasm->bus_id); switch (cbasm->address_mod) { case CBASMENTRY_ADDRESS_MOD_ADD: *flagp = 1; if (bootverbose) device_printf(args->dev, "decoding %s ports\n", name); break; case CBASMENTRY_ADDRESS_MOD_SUBTRACT: *flagp = -1; if (bootverbose) device_printf(args->dev, "not decoding %s ports\n", name); break; default: printf( "MPTable: Unknown compatibility address space modifier: %u\n", cbasm->address_mod); break; } break; } } void mptable_pci_host_res_init(device_t pcib) { struct host_res_args args; KASSERT(pci0 != -1, ("do not know how to map PCI bus IDs")); args.bus = pci_get_bus(pcib) + pci0; args.dev = pcib; args.sc = device_get_softc(pcib); if (pcib_host_res_init(pcib, &args.sc->sc_host_res) != 0) panic("failed to init hostb resources"); mptable_walk_extended_table(mptable_host_res_handler, &args); } #endif Index: head/sys/x86/x86/mptable_pci.c =================================================================== --- head/sys/x86/x86/mptable_pci.c (revision 326262) +++ head/sys/x86/x86/mptable_pci.c (revision 326263) @@ -1,240 +1,242 @@ /*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * * Copyright (c) 2003 John Baldwin * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * Host to PCI and PCI to PCI bridge drivers that use the MP Table to route * interrupts from PCI devices to I/O APICs. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include "pcib_if.h" /* Host to PCI bridge driver. */ static int mptable_hostb_probe(device_t dev) { if (pci_cfgregopen() == 0) return (ENXIO); if (mptable_pci_probe_table(pcib_get_bus(dev)) != 0) return (ENXIO); device_set_desc(dev, "MPTable Host-PCI bridge"); return (0); } static int mptable_hostb_attach(device_t dev) { #ifdef NEW_PCIB mptable_pci_host_res_init(dev); #endif device_add_child(dev, "pci", -1); return (bus_generic_attach(dev)); } #ifdef NEW_PCIB static int mptable_is_isa_range(rman_res_t start, rman_res_t end) { if (end >= 0x10000) return (0); if ((start & 0xfc00) != (end & 0xfc00)) return (0); start &= ~0xfc00; end &= ~0xfc00; return (start >= 0x100 && end <= 0x3ff); } static int mptable_is_vga_range(rman_res_t start, rman_res_t end) { if (end >= 0x10000) return (0); if ((start & 0xfc00) != (end & 0xfc00)) return (0); start &= ~0xfc00; end &= ~0xfc00; return (pci_is_vga_ioport_range(start, end)); } static struct resource * mptable_hostb_alloc_resource(device_t dev, device_t child, int type, int *rid, rman_res_t start, rman_res_t end, rman_res_t count, u_int flags) { struct mptable_hostb_softc *sc; #ifdef PCI_RES_BUS if (type == PCI_RES_BUS) return (pci_domain_alloc_bus(0, child, rid, start, end, count, flags)); #endif sc = device_get_softc(dev); if (type == SYS_RES_IOPORT && start + count - 1 == end) { if (mptable_is_isa_range(start, end)) { switch (sc->sc_decodes_isa_io) { case -1: return (NULL); case 1: return (bus_generic_alloc_resource(dev, child, type, rid, start, end, count, flags)); default: break; } } if (mptable_is_vga_range(start, end)) { switch (sc->sc_decodes_vga_io) { case -1: return (NULL); case 1: return (bus_generic_alloc_resource(dev, child, type, rid, start, end, count, flags)); default: break; } } } start = hostb_alloc_start(type, start, end, count); return (pcib_host_res_alloc(&sc->sc_host_res, child, type, rid, start, end, count, flags)); } static int mptable_hostb_adjust_resource(device_t dev, device_t child, int type, struct resource *r, rman_res_t start, rman_res_t end) { struct mptable_hostb_softc *sc; #ifdef PCI_RES_BUS if (type == PCI_RES_BUS) return (pci_domain_adjust_bus(0, child, r, start, end)); #endif sc = device_get_softc(dev); return (pcib_host_res_adjust(&sc->sc_host_res, child, type, r, start, end)); } #endif static device_method_t mptable_hostb_methods[] = { /* Device interface */ DEVMETHOD(device_probe, mptable_hostb_probe), DEVMETHOD(device_attach, mptable_hostb_attach), DEVMETHOD(device_shutdown, bus_generic_shutdown), DEVMETHOD(device_suspend, bus_generic_suspend), DEVMETHOD(device_resume, bus_generic_resume), /* Bus interface */ DEVMETHOD(bus_read_ivar, legacy_pcib_read_ivar), DEVMETHOD(bus_write_ivar, legacy_pcib_write_ivar), #ifdef NEW_PCIB DEVMETHOD(bus_alloc_resource, mptable_hostb_alloc_resource), DEVMETHOD(bus_adjust_resource, mptable_hostb_adjust_resource), #else DEVMETHOD(bus_alloc_resource, legacy_pcib_alloc_resource), DEVMETHOD(bus_adjust_resource, bus_generic_adjust_resource), #endif #if defined(NEW_PCIB) && defined(PCI_RES_BUS) DEVMETHOD(bus_release_resource, legacy_pcib_release_resource), #else DEVMETHOD(bus_release_resource, bus_generic_release_resource), #endif DEVMETHOD(bus_activate_resource, bus_generic_activate_resource), DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource), DEVMETHOD(bus_setup_intr, bus_generic_setup_intr), DEVMETHOD(bus_teardown_intr, bus_generic_teardown_intr), /* pcib interface */ DEVMETHOD(pcib_maxslots, legacy_pcib_maxslots), DEVMETHOD(pcib_read_config, legacy_pcib_read_config), DEVMETHOD(pcib_write_config, legacy_pcib_write_config), DEVMETHOD(pcib_route_interrupt, mptable_pci_route_interrupt), DEVMETHOD(pcib_alloc_msi, legacy_pcib_alloc_msi), DEVMETHOD(pcib_release_msi, pcib_release_msi), DEVMETHOD(pcib_alloc_msix, legacy_pcib_alloc_msix), DEVMETHOD(pcib_release_msix, pcib_release_msix), DEVMETHOD(pcib_map_msi, legacy_pcib_map_msi), DEVMETHOD_END }; static devclass_t hostb_devclass; DEFINE_CLASS_0(pcib, mptable_hostb_driver, mptable_hostb_methods, sizeof(struct mptable_hostb_softc)); DRIVER_MODULE(mptable_pcib, legacy, mptable_hostb_driver, hostb_devclass, 0, 0); /* PCI to PCI bridge driver. */ static int mptable_pcib_probe(device_t dev) { int bus; if ((pci_get_class(dev) != PCIC_BRIDGE) || (pci_get_subclass(dev) != PCIS_BRIDGE_PCI)) return (ENXIO); bus = pci_read_config(dev, PCIR_SECBUS_1, 1); if (bus == 0) return (ENXIO); if (mptable_pci_probe_table(bus) != 0) return (ENXIO); device_set_desc(dev, "MPTable PCI-PCI bridge"); return (-1000); } static device_method_t mptable_pcib_pci_methods[] = { /* Device interface */ DEVMETHOD(device_probe, mptable_pcib_probe), /* pcib interface */ DEVMETHOD(pcib_route_interrupt, mptable_pci_route_interrupt), {0, 0} }; static devclass_t pcib_devclass; DEFINE_CLASS_1(pcib, mptable_pcib_driver, mptable_pcib_pci_methods, sizeof(struct pcib_softc), pcib_driver); DRIVER_MODULE(mptable_pcib, pci, mptable_pcib_driver, pcib_devclass, 0, 0); Index: head/sys/x86/x86/tsc.c =================================================================== --- head/sys/x86/x86/tsc.c (revision 326262) +++ head/sys/x86/x86/tsc.c (revision 326263) @@ -1,759 +1,761 @@ /*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * * Copyright (c) 1998-2003 Poul-Henning Kamp * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_compat.h" #include "opt_clock.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "cpufreq_if.h" uint64_t tsc_freq; int tsc_is_invariant; int tsc_perf_stat; static eventhandler_tag tsc_levels_tag, tsc_pre_tag, tsc_post_tag; SYSCTL_INT(_kern_timecounter, OID_AUTO, invariant_tsc, CTLFLAG_RDTUN, &tsc_is_invariant, 0, "Indicates whether the TSC is P-state invariant"); #ifdef SMP int smp_tsc; SYSCTL_INT(_kern_timecounter, OID_AUTO, smp_tsc, CTLFLAG_RDTUN, &smp_tsc, 0, "Indicates whether the TSC is safe to use in SMP mode"); int smp_tsc_adjust = 0; SYSCTL_INT(_kern_timecounter, OID_AUTO, smp_tsc_adjust, CTLFLAG_RDTUN, &smp_tsc_adjust, 0, "Try to adjust TSC on APs to match BSP"); #endif static int tsc_shift = 1; SYSCTL_INT(_kern_timecounter, OID_AUTO, tsc_shift, CTLFLAG_RDTUN, &tsc_shift, 0, "Shift to pre-apply for the maximum TSC frequency"); static int tsc_disabled; SYSCTL_INT(_machdep, OID_AUTO, disable_tsc, CTLFLAG_RDTUN, &tsc_disabled, 0, "Disable x86 Time Stamp Counter"); static int tsc_skip_calibration; SYSCTL_INT(_machdep, OID_AUTO, disable_tsc_calibration, CTLFLAG_RDTUN, &tsc_skip_calibration, 0, "Disable TSC frequency calibration"); static void tsc_freq_changed(void *arg, const struct cf_level *level, int status); static void tsc_freq_changing(void *arg, const struct cf_level *level, int *status); static unsigned tsc_get_timecount(struct timecounter *tc); static inline unsigned tsc_get_timecount_low(struct timecounter *tc); static unsigned tsc_get_timecount_lfence(struct timecounter *tc); static unsigned tsc_get_timecount_low_lfence(struct timecounter *tc); static unsigned tsc_get_timecount_mfence(struct timecounter *tc); static unsigned tsc_get_timecount_low_mfence(struct timecounter *tc); static void tsc_levels_changed(void *arg, int unit); static uint32_t x86_tsc_vdso_timehands(struct vdso_timehands *vdso_th, struct timecounter *tc); #ifdef COMPAT_FREEBSD32 static uint32_t x86_tsc_vdso_timehands32(struct vdso_timehands32 *vdso_th32, struct timecounter *tc); #endif static struct timecounter tsc_timecounter = { .tc_get_timecount = tsc_get_timecount, .tc_counter_mask = ~0u, .tc_name = "TSC", .tc_quality = 800, /* adjusted in code */ .tc_fill_vdso_timehands = x86_tsc_vdso_timehands, #ifdef COMPAT_FREEBSD32 .tc_fill_vdso_timehands32 = x86_tsc_vdso_timehands32, #endif }; static void tsc_freq_vmware(void) { u_int regs[4]; if (hv_high >= 0x40000010) { do_cpuid(0x40000010, regs); tsc_freq = regs[0] * 1000; } else { vmware_hvcall(VMW_HVCMD_GETHZ, regs); if (regs[1] != UINT_MAX) tsc_freq = regs[0] | ((uint64_t)regs[1] << 32); } tsc_is_invariant = 1; } static void tsc_freq_intel(void) { char brand[48]; u_int regs[4]; uint64_t freq; char *p; u_int i; /* * Intel Processor Identification and the CPUID Instruction * Application Note 485. * http://www.intel.com/assets/pdf/appnote/241618.pdf */ if (cpu_exthigh >= 0x80000004) { p = brand; for (i = 0x80000002; i < 0x80000005; i++) { do_cpuid(i, regs); memcpy(p, regs, sizeof(regs)); p += sizeof(regs); } p = NULL; for (i = 0; i < sizeof(brand) - 1; i++) if (brand[i] == 'H' && brand[i + 1] == 'z') p = brand + i; if (p != NULL) { p -= 5; switch (p[4]) { case 'M': i = 1; break; case 'G': i = 1000; break; case 'T': i = 1000000; break; default: return; } #define C2D(c) ((c) - '0') if (p[1] == '.') { freq = C2D(p[0]) * 1000; freq += C2D(p[2]) * 100; freq += C2D(p[3]) * 10; freq *= i * 1000; } else { freq = C2D(p[0]) * 1000; freq += C2D(p[1]) * 100; freq += C2D(p[2]) * 10; freq += C2D(p[3]); freq *= i * 1000000; } #undef C2D tsc_freq = freq; } } } static void probe_tsc_freq(void) { u_int regs[4]; uint64_t tsc1, tsc2; if (cpu_high >= 6) { do_cpuid(6, regs); if ((regs[2] & CPUID_PERF_STAT) != 0) { /* * XXX Some emulators expose host CPUID without actual * support for these MSRs. We must test whether they * really work. */ wrmsr(MSR_MPERF, 0); wrmsr(MSR_APERF, 0); DELAY(10); if (rdmsr(MSR_MPERF) > 0 && rdmsr(MSR_APERF) > 0) tsc_perf_stat = 1; } } if (vm_guest == VM_GUEST_VMWARE) { tsc_freq_vmware(); return; } switch (cpu_vendor_id) { case CPU_VENDOR_AMD: if ((amd_pminfo & AMDPM_TSC_INVARIANT) != 0 || (vm_guest == VM_GUEST_NO && CPUID_TO_FAMILY(cpu_id) >= 0x10)) tsc_is_invariant = 1; if (cpu_feature & CPUID_SSE2) { tsc_timecounter.tc_get_timecount = tsc_get_timecount_mfence; } break; case CPU_VENDOR_INTEL: if ((amd_pminfo & AMDPM_TSC_INVARIANT) != 0 || (vm_guest == VM_GUEST_NO && ((CPUID_TO_FAMILY(cpu_id) == 0x6 && CPUID_TO_MODEL(cpu_id) >= 0xe) || (CPUID_TO_FAMILY(cpu_id) == 0xf && CPUID_TO_MODEL(cpu_id) >= 0x3)))) tsc_is_invariant = 1; if (cpu_feature & CPUID_SSE2) { tsc_timecounter.tc_get_timecount = tsc_get_timecount_lfence; } break; case CPU_VENDOR_CENTAUR: if (vm_guest == VM_GUEST_NO && CPUID_TO_FAMILY(cpu_id) == 0x6 && CPUID_TO_MODEL(cpu_id) >= 0xf && (rdmsr(0x1203) & 0x100000000ULL) == 0) tsc_is_invariant = 1; if (cpu_feature & CPUID_SSE2) { tsc_timecounter.tc_get_timecount = tsc_get_timecount_lfence; } break; } if (tsc_skip_calibration) { if (cpu_vendor_id == CPU_VENDOR_INTEL) tsc_freq_intel(); return; } if (bootverbose) printf("Calibrating TSC clock ... "); tsc1 = rdtsc(); DELAY(1000000); tsc2 = rdtsc(); tsc_freq = tsc2 - tsc1; if (bootverbose) printf("TSC clock: %ju Hz\n", (intmax_t)tsc_freq); } void init_TSC(void) { if ((cpu_feature & CPUID_TSC) == 0 || tsc_disabled) return; #ifdef __i386__ /* The TSC is known to be broken on certain CPUs. */ switch (cpu_vendor_id) { case CPU_VENDOR_AMD: switch (cpu_id & 0xFF0) { case 0x500: /* K5 Model 0 */ return; } break; case CPU_VENDOR_CENTAUR: switch (cpu_id & 0xff0) { case 0x540: /* * http://www.centtech.com/c6_data_sheet.pdf * * I-12 RDTSC may return incoherent values in EDX:EAX * I-13 RDTSC hangs when certain event counters are used */ return; } break; case CPU_VENDOR_NSC: switch (cpu_id & 0xff0) { case 0x540: if ((cpu_id & CPUID_STEPPING) == 0) return; break; } break; } #endif probe_tsc_freq(); /* * Inform CPU accounting about our boot-time clock rate. This will * be updated if someone loads a cpufreq driver after boot that * discovers a new max frequency. */ if (tsc_freq != 0) set_cputicker(rdtsc, tsc_freq, !tsc_is_invariant); if (tsc_is_invariant) return; /* Register to find out about changes in CPU frequency. */ tsc_pre_tag = EVENTHANDLER_REGISTER(cpufreq_pre_change, tsc_freq_changing, NULL, EVENTHANDLER_PRI_FIRST); tsc_post_tag = EVENTHANDLER_REGISTER(cpufreq_post_change, tsc_freq_changed, NULL, EVENTHANDLER_PRI_FIRST); tsc_levels_tag = EVENTHANDLER_REGISTER(cpufreq_levels_changed, tsc_levels_changed, NULL, EVENTHANDLER_PRI_ANY); } #ifdef SMP /* * RDTSC is not a serializing instruction, and does not drain * instruction stream, so we need to drain the stream before executing * it. It could be fixed by use of RDTSCP, except the instruction is * not available everywhere. * * Use CPUID for draining in the boot-time SMP constistency test. The * timecounters use MFENCE for AMD CPUs, and LFENCE for others (Intel * and VIA) when SSE2 is present, and nothing on older machines which * also do not issue RDTSC prematurely. There, testing for SSE2 and * vendor is too cumbersome, and we learn about TSC presence from CPUID. * * Do not use do_cpuid(), since we do not need CPUID results, which * have to be written into memory with do_cpuid(). */ #define TSC_READ(x) \ static void \ tsc_read_##x(void *arg) \ { \ uint64_t *tsc = arg; \ u_int cpu = PCPU_GET(cpuid); \ \ __asm __volatile("cpuid" : : : "eax", "ebx", "ecx", "edx"); \ tsc[cpu * 3 + x] = rdtsc(); \ } TSC_READ(0) TSC_READ(1) TSC_READ(2) #undef TSC_READ #define N 1000 static void comp_smp_tsc(void *arg) { uint64_t *tsc; int64_t d1, d2; u_int cpu = PCPU_GET(cpuid); u_int i, j, size; size = (mp_maxid + 1) * 3; for (i = 0, tsc = arg; i < N; i++, tsc += size) CPU_FOREACH(j) { if (j == cpu) continue; d1 = tsc[cpu * 3 + 1] - tsc[j * 3]; d2 = tsc[cpu * 3 + 2] - tsc[j * 3 + 1]; if (d1 <= 0 || d2 <= 0) { smp_tsc = 0; return; } } } static void adj_smp_tsc(void *arg) { uint64_t *tsc; int64_t d, min, max; u_int cpu = PCPU_GET(cpuid); u_int first, i, size; first = CPU_FIRST(); if (cpu == first) return; min = INT64_MIN; max = INT64_MAX; size = (mp_maxid + 1) * 3; for (i = 0, tsc = arg; i < N; i++, tsc += size) { d = tsc[first * 3] - tsc[cpu * 3 + 1]; if (d > min) min = d; d = tsc[first * 3 + 1] - tsc[cpu * 3 + 2]; if (d > min) min = d; d = tsc[first * 3 + 1] - tsc[cpu * 3]; if (d < max) max = d; d = tsc[first * 3 + 2] - tsc[cpu * 3 + 1]; if (d < max) max = d; } if (min > max) return; d = min / 2 + max / 2; __asm __volatile ( "movl $0x10, %%ecx\n\t" "rdmsr\n\t" "addl %%edi, %%eax\n\t" "adcl %%esi, %%edx\n\t" "wrmsr\n" : /* No output */ : "D" ((uint32_t)d), "S" ((uint32_t)(d >> 32)) : "ax", "cx", "dx", "cc" ); } static int test_tsc(void) { uint64_t *data, *tsc; u_int i, size, adj; if ((!smp_tsc && !tsc_is_invariant) || vm_guest) return (-100); size = (mp_maxid + 1) * 3; data = malloc(sizeof(*data) * size * N, M_TEMP, M_WAITOK); adj = 0; retry: for (i = 0, tsc = data; i < N; i++, tsc += size) smp_rendezvous(tsc_read_0, tsc_read_1, tsc_read_2, tsc); smp_tsc = 1; /* XXX */ smp_rendezvous(smp_no_rendezvous_barrier, comp_smp_tsc, smp_no_rendezvous_barrier, data); if (!smp_tsc && adj < smp_tsc_adjust) { adj++; smp_rendezvous(smp_no_rendezvous_barrier, adj_smp_tsc, smp_no_rendezvous_barrier, data); goto retry; } free(data, M_TEMP); if (bootverbose) printf("SMP: %sed TSC synchronization test%s\n", smp_tsc ? "pass" : "fail", adj > 0 ? " after adjustment" : ""); if (smp_tsc && tsc_is_invariant) { switch (cpu_vendor_id) { case CPU_VENDOR_AMD: /* * Starting with Family 15h processors, TSC clock * source is in the north bridge. Check whether * we have a single-socket/multi-core platform. * XXX Need more work for complex cases. */ if (CPUID_TO_FAMILY(cpu_id) < 0x15 || (amd_feature2 & AMDID2_CMP) == 0 || smp_cpus > (cpu_procinfo2 & AMDID_CMP_CORES) + 1) break; return (1000); case CPU_VENDOR_INTEL: /* * XXX Assume Intel platforms have synchronized TSCs. */ return (1000); } return (800); } return (-100); } #undef N #else /* * The function is not called, it is provided to avoid linking failure * on uniprocessor kernel. */ static int test_tsc(void) { return (0); } #endif /* SMP */ static void init_TSC_tc(void) { uint64_t max_freq; int shift; if ((cpu_feature & CPUID_TSC) == 0 || tsc_disabled) return; /* * Limit timecounter frequency to fit in an int and prevent it from * overflowing too fast. */ max_freq = UINT_MAX; /* * We can not use the TSC if we support APM. Precise timekeeping * on an APM'ed machine is at best a fools pursuit, since * any and all of the time spent in various SMM code can't * be reliably accounted for. Reading the RTC is your only * source of reliable time info. The i8254 loses too, of course, * but we need to have some kind of time... * We don't know at this point whether APM is going to be used * or not, nor when it might be activated. Play it safe. */ if (power_pm_get_type() == POWER_PM_TYPE_APM) { tsc_timecounter.tc_quality = -1000; if (bootverbose) printf("TSC timecounter disabled: APM enabled.\n"); goto init; } /* * Intel CPUs without a C-state invariant TSC can stop the TSC * in either C2 or C3. Disable use of C2 and C3 while using * the TSC as the timecounter. The timecounter can be changed * to enable C2 and C3. * * Note that the TSC is used as the cputicker for computing * thread runtime regardless of the timecounter setting, so * using an alternate timecounter and enabling C2 or C3 can * result incorrect runtimes for kernel idle threads (but not * for any non-idle threads). */ if (cpu_vendor_id == CPU_VENDOR_INTEL && (amd_pminfo & AMDPM_TSC_INVARIANT) == 0) { tsc_timecounter.tc_flags |= TC_FLAGS_C2STOP; if (bootverbose) printf("TSC timecounter disables C2 and C3.\n"); } /* * We can not use the TSC in SMP mode unless the TSCs on all CPUs * are synchronized. If the user is sure that the system has * synchronized TSCs, set kern.timecounter.smp_tsc tunable to a * non-zero value. The TSC seems unreliable in virtualized SMP * environments, so it is set to a negative quality in those cases. */ if (mp_ncpus > 1) tsc_timecounter.tc_quality = test_tsc(); else if (tsc_is_invariant) tsc_timecounter.tc_quality = 1000; max_freq >>= tsc_shift; init: for (shift = 0; shift <= 31 && (tsc_freq >> shift) > max_freq; shift++) ; if ((cpu_feature & CPUID_SSE2) != 0 && mp_ncpus > 1) { if (cpu_vendor_id == CPU_VENDOR_AMD) { tsc_timecounter.tc_get_timecount = shift > 0 ? tsc_get_timecount_low_mfence : tsc_get_timecount_mfence; } else { tsc_timecounter.tc_get_timecount = shift > 0 ? tsc_get_timecount_low_lfence : tsc_get_timecount_lfence; } } else { tsc_timecounter.tc_get_timecount = shift > 0 ? tsc_get_timecount_low : tsc_get_timecount; } if (shift > 0) { tsc_timecounter.tc_name = "TSC-low"; if (bootverbose) printf("TSC timecounter discards lower %d bit(s)\n", shift); } if (tsc_freq != 0) { tsc_timecounter.tc_frequency = tsc_freq >> shift; tsc_timecounter.tc_priv = (void *)(intptr_t)shift; tc_init(&tsc_timecounter); } } SYSINIT(tsc_tc, SI_SUB_SMP, SI_ORDER_ANY, init_TSC_tc, NULL); /* * When cpufreq levels change, find out about the (new) max frequency. We * use this to update CPU accounting in case it got a lower estimate at boot. */ static void tsc_levels_changed(void *arg, int unit) { device_t cf_dev; struct cf_level *levels; int count, error; uint64_t max_freq; /* Only use values from the first CPU, assuming all are equal. */ if (unit != 0) return; /* Find the appropriate cpufreq device instance. */ cf_dev = devclass_get_device(devclass_find("cpufreq"), unit); if (cf_dev == NULL) { printf("tsc_levels_changed() called but no cpufreq device?\n"); return; } /* Get settings from the device and find the max frequency. */ count = 64; levels = malloc(count * sizeof(*levels), M_TEMP, M_NOWAIT); if (levels == NULL) return; error = CPUFREQ_LEVELS(cf_dev, levels, &count); if (error == 0 && count != 0) { max_freq = (uint64_t)levels[0].total_set.freq * 1000000; set_cputicker(rdtsc, max_freq, 1); } else printf("tsc_levels_changed: no max freq found\n"); free(levels, M_TEMP); } /* * If the TSC timecounter is in use, veto the pending change. It may be * possible in the future to handle a dynamically-changing timecounter rate. */ static void tsc_freq_changing(void *arg, const struct cf_level *level, int *status) { if (*status != 0 || timecounter != &tsc_timecounter) return; printf("timecounter TSC must not be in use when " "changing frequencies; change denied\n"); *status = EBUSY; } /* Update TSC freq with the value indicated by the caller. */ static void tsc_freq_changed(void *arg, const struct cf_level *level, int status) { uint64_t freq; /* If there was an error during the transition, don't do anything. */ if (tsc_disabled || status != 0) return; /* Total setting for this level gives the new frequency in MHz. */ freq = (uint64_t)level->total_set.freq * 1000000; atomic_store_rel_64(&tsc_freq, freq); tsc_timecounter.tc_frequency = freq >> (int)(intptr_t)tsc_timecounter.tc_priv; } static int sysctl_machdep_tsc_freq(SYSCTL_HANDLER_ARGS) { int error; uint64_t freq; freq = atomic_load_acq_64(&tsc_freq); if (freq == 0) return (EOPNOTSUPP); error = sysctl_handle_64(oidp, &freq, 0, req); if (error == 0 && req->newptr != NULL) { atomic_store_rel_64(&tsc_freq, freq); atomic_store_rel_64(&tsc_timecounter.tc_frequency, freq >> (int)(intptr_t)tsc_timecounter.tc_priv); } return (error); } SYSCTL_PROC(_machdep, OID_AUTO, tsc_freq, CTLTYPE_U64 | CTLFLAG_RW, 0, 0, sysctl_machdep_tsc_freq, "QU", "Time Stamp Counter frequency"); static u_int tsc_get_timecount(struct timecounter *tc __unused) { return (rdtsc32()); } static inline u_int tsc_get_timecount_low(struct timecounter *tc) { uint32_t rv; __asm __volatile("rdtsc; shrd %%cl, %%edx, %0" : "=a" (rv) : "c" ((int)(intptr_t)tc->tc_priv) : "edx"); return (rv); } static u_int tsc_get_timecount_lfence(struct timecounter *tc __unused) { lfence(); return (rdtsc32()); } static u_int tsc_get_timecount_low_lfence(struct timecounter *tc) { lfence(); return (tsc_get_timecount_low(tc)); } static u_int tsc_get_timecount_mfence(struct timecounter *tc __unused) { mfence(); return (rdtsc32()); } static u_int tsc_get_timecount_low_mfence(struct timecounter *tc) { mfence(); return (tsc_get_timecount_low(tc)); } static uint32_t x86_tsc_vdso_timehands(struct vdso_timehands *vdso_th, struct timecounter *tc) { vdso_th->th_algo = VDSO_TH_ALGO_X86_TSC; vdso_th->th_x86_shift = (int)(intptr_t)tc->tc_priv; vdso_th->th_x86_hpet_idx = 0xffffffff; bzero(vdso_th->th_res, sizeof(vdso_th->th_res)); return (1); } #ifdef COMPAT_FREEBSD32 static uint32_t x86_tsc_vdso_timehands32(struct vdso_timehands32 *vdso_th32, struct timecounter *tc) { vdso_th32->th_algo = VDSO_TH_ALGO_X86_TSC; vdso_th32->th_x86_shift = (int)(intptr_t)tc->tc_priv; vdso_th32->th_x86_hpet_idx = 0xffffffff; bzero(vdso_th32->th_res, sizeof(vdso_th32->th_res)); return (1); } #endif Index: head/sys/x86/xen/hvm.c =================================================================== --- head/sys/x86/xen/hvm.c (revision 326262) +++ head/sys/x86/xen/hvm.c (revision 326263) @@ -1,425 +1,427 @@ -/* +/*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * * Copyright (c) 2008, 2013 Citrix Systems, Inc. * Copyright (c) 2012 Spectra Logic Corporation * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /*--------------------------- Forward Declarations ---------------------------*/ static void xen_hvm_cpu_init(void); /*-------------------------------- Local Types -------------------------------*/ enum xen_hvm_init_type { XEN_HVM_INIT_COLD, XEN_HVM_INIT_CANCELLED_SUSPEND, XEN_HVM_INIT_RESUME }; /*-------------------------------- Global Data -------------------------------*/ enum xen_domain_type xen_domain_type = XEN_NATIVE; #ifdef SMP struct cpu_ops xen_hvm_cpu_ops = { .cpu_init = xen_hvm_cpu_init, .cpu_resume = xen_hvm_cpu_init }; #endif static MALLOC_DEFINE(M_XENHVM, "xen_hvm", "Xen HVM PV Support"); /** * If non-zero, the hypervisor has been configured to use a direct * IDT event callback for interrupt injection. */ int xen_vector_callback_enabled; /*------------------------------- Per-CPU Data -------------------------------*/ DPCPU_DEFINE(struct vcpu_info, vcpu_local_info); DPCPU_DEFINE(struct vcpu_info *, vcpu_info); /*------------------ Hypervisor Access Shared Memory Regions -----------------*/ shared_info_t *HYPERVISOR_shared_info; start_info_t *HYPERVISOR_start_info; /*------------------------------ Sysctl tunables -----------------------------*/ int xen_disable_pv_disks = 0; int xen_disable_pv_nics = 0; TUNABLE_INT("hw.xen.disable_pv_disks", &xen_disable_pv_disks); TUNABLE_INT("hw.xen.disable_pv_nics", &xen_disable_pv_nics); /*---------------------- XEN Hypervisor Probe and Setup ----------------------*/ static uint32_t xen_hvm_cpuid_base(void) { uint32_t base, regs[4]; for (base = 0x40000000; base < 0x40010000; base += 0x100) { do_cpuid(base, regs); if (!memcmp("XenVMMXenVMM", ®s[1], 12) && (regs[0] - base) >= 2) return (base); } return (0); } /* * Allocate and fill in the hypcall page. */ static int xen_hvm_init_hypercall_stubs(enum xen_hvm_init_type init_type) { uint32_t base, regs[4]; int i; if (xen_pv_domain()) { /* hypercall page is already set in the PV case */ return (0); } base = xen_hvm_cpuid_base(); if (base == 0) return (ENXIO); if (init_type == XEN_HVM_INIT_COLD) { int major, minor; do_cpuid(base + 1, regs); major = regs[0] >> 16; minor = regs[0] & 0xffff; printf("XEN: Hypervisor version %d.%d detected.\n", major, minor); #ifdef SMP if (((major < 4) || (major == 4 && minor <= 5)) && msix_disable_migration == -1) { /* * Xen hypervisors prior to 4.6.0 do not properly * handle updates to enabled MSI-X table entries, * so disable MSI-X interrupt migration in that * case. */ if (bootverbose) printf( "Disabling MSI-X interrupt migration due to Xen hypervisor bug.\n" "Set machdep.msix_disable_migration=0 to forcefully enable it.\n"); msix_disable_migration = 1; } #endif } /* * Find the hypercall pages. */ do_cpuid(base + 2, regs); for (i = 0; i < regs[0]; i++) wrmsr(regs[1], vtophys(&hypercall_page + i * PAGE_SIZE) + i); return (0); } static void xen_hvm_init_shared_info_page(void) { struct xen_add_to_physmap xatp; if (xen_pv_domain()) { /* * Already setup in the PV case, shared_info is passed inside * of the start_info struct at start of day. */ return; } if (HYPERVISOR_shared_info == NULL) { HYPERVISOR_shared_info = malloc(PAGE_SIZE, M_XENHVM, M_NOWAIT); if (HYPERVISOR_shared_info == NULL) panic("Unable to allocate Xen shared info page"); } xatp.domid = DOMID_SELF; xatp.idx = 0; xatp.space = XENMAPSPACE_shared_info; xatp.gpfn = vtophys(HYPERVISOR_shared_info) >> PAGE_SHIFT; if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp)) panic("HYPERVISOR_memory_op failed"); } /* * Tell the hypervisor how to contact us for event channel callbacks. */ void xen_hvm_set_callback(device_t dev) { struct xen_hvm_param xhp; int irq; if (xen_vector_callback_enabled) return; xhp.domid = DOMID_SELF; xhp.index = HVM_PARAM_CALLBACK_IRQ; if (xen_feature(XENFEAT_hvm_callback_vector) != 0) { int error; xhp.value = HVM_CALLBACK_VECTOR(IDT_EVTCHN); error = HYPERVISOR_hvm_op(HVMOP_set_param, &xhp); if (error == 0) { xen_vector_callback_enabled = 1; return; } printf("Xen HVM callback vector registration failed (%d). " "Falling back to emulated device interrupt\n", error); } xen_vector_callback_enabled = 0; if (dev == NULL) { /* * Called from early boot or resume. * xenpci will invoke us again later. */ return; } irq = pci_get_irq(dev); if (irq < 16) { xhp.value = HVM_CALLBACK_GSI(irq); } else { u_int slot; u_int pin; slot = pci_get_slot(dev); pin = pci_get_intpin(dev) - 1; xhp.value = HVM_CALLBACK_PCI_INTX(slot, pin); } if (HYPERVISOR_hvm_op(HVMOP_set_param, &xhp) != 0) panic("Can't set evtchn callback"); } #define XEN_MAGIC_IOPORT 0x10 enum { XMI_MAGIC = 0x49d2, XMI_UNPLUG_IDE_DISKS = 0x01, XMI_UNPLUG_NICS = 0x02, XMI_UNPLUG_IDE_EXCEPT_PRI_MASTER = 0x04 }; static void xen_hvm_disable_emulated_devices(void) { u_short disable_devs = 0; if (xen_pv_domain()) { /* * No emulated devices in the PV case, so no need to unplug * anything. */ if (xen_disable_pv_disks != 0 || xen_disable_pv_nics != 0) printf("PV devices cannot be disabled in PV guests\n"); return; } if (inw(XEN_MAGIC_IOPORT) != XMI_MAGIC) return; if (xen_disable_pv_disks == 0) { if (bootverbose) printf("XEN: disabling emulated disks\n"); disable_devs |= XMI_UNPLUG_IDE_DISKS; } if (xen_disable_pv_nics == 0) { if (bootverbose) printf("XEN: disabling emulated nics\n"); disable_devs |= XMI_UNPLUG_NICS; } if (disable_devs != 0) outw(XEN_MAGIC_IOPORT, disable_devs); } static void xen_hvm_init(enum xen_hvm_init_type init_type) { int error; int i; if (init_type == XEN_HVM_INIT_CANCELLED_SUSPEND) return; error = xen_hvm_init_hypercall_stubs(init_type); switch (init_type) { case XEN_HVM_INIT_COLD: if (error != 0) return; /* * If xen_domain_type is not set at this point * it means we are inside a (PV)HVM guest, because * for PVH the guest type is set much earlier * (see hammer_time_xen). */ if (!xen_domain()) { xen_domain_type = XEN_HVM_DOMAIN; vm_guest = VM_GUEST_XEN; } setup_xen_features(); #ifdef SMP cpu_ops = xen_hvm_cpu_ops; #endif break; case XEN_HVM_INIT_RESUME: if (error != 0) panic("Unable to init Xen hypercall stubs on resume"); /* Clear stale vcpu_info. */ CPU_FOREACH(i) DPCPU_ID_SET(i, vcpu_info, NULL); break; default: panic("Unsupported HVM initialization type"); } xen_vector_callback_enabled = 0; xen_hvm_set_callback(NULL); /* * On (PV)HVM domains we need to request the hypervisor to * fill the shared info page, for PVH guest the shared_info page * is passed inside the start_info struct and is already set, so this * functions are no-ops. */ xen_hvm_init_shared_info_page(); xen_hvm_disable_emulated_devices(); } void xen_hvm_suspend(void) { } void xen_hvm_resume(bool suspend_cancelled) { xen_hvm_init(suspend_cancelled ? XEN_HVM_INIT_CANCELLED_SUSPEND : XEN_HVM_INIT_RESUME); /* Register vcpu_info area for CPU#0. */ xen_hvm_cpu_init(); } static void xen_hvm_sysinit(void *arg __unused) { xen_hvm_init(XEN_HVM_INIT_COLD); } static void xen_set_vcpu_id(void) { struct pcpu *pc; int i; if (!xen_hvm_domain()) return; /* Set vcpu_id to acpi_id */ CPU_FOREACH(i) { pc = pcpu_find(i); pc->pc_vcpu_id = pc->pc_acpi_id; if (bootverbose) printf("XEN: CPU %u has VCPU ID %u\n", i, pc->pc_vcpu_id); } } static void xen_hvm_cpu_init(void) { struct vcpu_register_vcpu_info info; struct vcpu_info *vcpu_info; int cpu, rc; if (!xen_domain()) return; if (DPCPU_GET(vcpu_info) != NULL) { /* * vcpu_info is already set. We're resuming * from a failed migration and our pre-suspend * configuration is still valid. */ return; } vcpu_info = DPCPU_PTR(vcpu_local_info); cpu = PCPU_GET(vcpu_id); info.mfn = vtophys(vcpu_info) >> PAGE_SHIFT; info.offset = vtophys(vcpu_info) - trunc_page(vtophys(vcpu_info)); rc = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_info, cpu, &info); if (rc != 0) DPCPU_SET(vcpu_info, &HYPERVISOR_shared_info->vcpu_info[cpu]); else DPCPU_SET(vcpu_info, vcpu_info); } SYSINIT(xen_hvm_init, SI_SUB_HYPERVISOR, SI_ORDER_FIRST, xen_hvm_sysinit, NULL); SYSINIT(xen_hvm_cpu_init, SI_SUB_INTR, SI_ORDER_FIRST, xen_hvm_cpu_init, NULL); SYSINIT(xen_set_vcpu_id, SI_SUB_CPU, SI_ORDER_ANY, xen_set_vcpu_id, NULL); Index: head/sys/x86/xen/pv.c =================================================================== --- head/sys/x86/xen/pv.c (revision 326262) +++ head/sys/x86/xen/pv.c (revision 326263) @@ -1,439 +1,441 @@ -/* +/*- + * SPDX-License-Identifier: BSD-2-Clause-NetBSD + * * Copyright (c) 2004 Christian Limpach. * Copyright (c) 2004-2006,2008 Kip Macy * Copyright (c) 2008 The NetBSD Foundation, Inc. * Copyright (c) 2013 Roger Pau MonnĂ© * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_ddb.h" #include "opt_kstack_pages.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef DDB #include #endif /* Native initial function */ extern u_int64_t hammer_time(u_int64_t, u_int64_t); /* Xen initial function */ uint64_t hammer_time_xen(start_info_t *, uint64_t); #define MAX_E820_ENTRIES 128 /*--------------------------- Forward Declarations ---------------------------*/ static caddr_t xen_pv_parse_preload_data(u_int64_t); static void xen_pv_parse_memmap(caddr_t, vm_paddr_t *, int *); #ifdef SMP static int xen_pv_start_all_aps(void); #endif /*---------------------------- Extern Declarations ---------------------------*/ #ifdef SMP /* Variables used by amd64 mp_machdep to start APs */ extern char *doublefault_stack; extern char *nmi_stack; #endif /* * Placed by the linker at the end of the bss section, which is the last * section loaded by Xen before loading the symtab and strtab. */ extern uint32_t end; /*-------------------------------- Global Data -------------------------------*/ /* Xen init_ops implementation. */ struct init_ops xen_init_ops = { .parse_preload_data = xen_pv_parse_preload_data, .early_clock_source_init = xen_clock_init, .early_delay = xen_delay, .parse_memmap = xen_pv_parse_memmap, #ifdef SMP .start_all_aps = xen_pv_start_all_aps, #endif .msi_init = xen_msi_init, }; static struct bios_smap xen_smap[MAX_E820_ENTRIES]; /*-------------------------------- Xen PV init -------------------------------*/ /* * First function called by the Xen PVH boot sequence. * * Set some Xen global variables and prepare the environment so it is * as similar as possible to what native FreeBSD init function expects. */ uint64_t hammer_time_xen(start_info_t *si, uint64_t xenstack) { uint64_t physfree; uint64_t *PT4 = (u_int64_t *)xenstack; uint64_t *PT3 = (u_int64_t *)(xenstack + PAGE_SIZE); uint64_t *PT2 = (u_int64_t *)(xenstack + 2 * PAGE_SIZE); int i; xen_domain_type = XEN_PV_DOMAIN; vm_guest = VM_GUEST_XEN; if ((si == NULL) || (xenstack == 0)) { xc_printf("ERROR: invalid start_info or xen stack, halting\n"); HYPERVISOR_shutdown(SHUTDOWN_crash); } xc_printf("FreeBSD PVH running on %s\n", si->magic); /* We use 3 pages of xen stack for the boot pagetables */ physfree = xenstack + 3 * PAGE_SIZE - KERNBASE; /* Setup Xen global variables */ HYPERVISOR_start_info = si; HYPERVISOR_shared_info = (shared_info_t *)(si->shared_info + KERNBASE); /* * Setup some misc global variables for Xen devices * * XXX: Devices that need these specific variables should * be rewritten to fetch this info by themselves from the * start_info page. */ xen_store = (struct xenstore_domain_interface *) (ptoa(si->store_mfn) + KERNBASE); console_page = (char *)(ptoa(si->console.domU.mfn) + KERNBASE); /* * Use the stack Xen gives us to build the page tables * as native FreeBSD expects to find them (created * by the boot trampoline). */ for (i = 0; i < (PAGE_SIZE / sizeof(uint64_t)); i++) { /* * Each slot of the level 4 pages points * to the same level 3 page */ PT4[i] = ((uint64_t)&PT3[0]) - KERNBASE; PT4[i] |= PG_V | PG_RW | PG_U; /* * Each slot of the level 3 pages points * to the same level 2 page */ PT3[i] = ((uint64_t)&PT2[0]) - KERNBASE; PT3[i] |= PG_V | PG_RW | PG_U; /* * The level 2 page slots are mapped with * 2MB pages for 1GB. */ PT2[i] = i * (2 * 1024 * 1024); PT2[i] |= PG_V | PG_RW | PG_PS | PG_U; } load_cr3(((uint64_t)&PT4[0]) - KERNBASE); /* Set the hooks for early functions that diverge from bare metal */ init_ops = xen_init_ops; apic_ops = xen_apic_ops; /* Now we can jump into the native init function */ return (hammer_time(0, physfree)); } /*-------------------------------- PV specific -------------------------------*/ #ifdef SMP static bool start_xen_ap(int cpu) { struct vcpu_guest_context *ctxt; int ms, cpus = mp_naps; const size_t stacksize = kstack_pages * PAGE_SIZE; /* allocate and set up an idle stack data page */ bootstacks[cpu] = (void *)kmem_malloc(kernel_arena, stacksize, M_WAITOK | M_ZERO); doublefault_stack = (char *)kmem_malloc(kernel_arena, PAGE_SIZE, M_WAITOK | M_ZERO); nmi_stack = (char *)kmem_malloc(kernel_arena, PAGE_SIZE, M_WAITOK | M_ZERO); dpcpu = (void *)kmem_malloc(kernel_arena, DPCPU_SIZE, M_WAITOK | M_ZERO); bootSTK = (char *)bootstacks[cpu] + kstack_pages * PAGE_SIZE - 8; bootAP = cpu; ctxt = malloc(sizeof(*ctxt), M_TEMP, M_WAITOK | M_ZERO); ctxt->flags = VGCF_IN_KERNEL; ctxt->user_regs.rip = (unsigned long) init_secondary; ctxt->user_regs.rsp = (unsigned long) bootSTK; /* Set the AP to use the same page tables */ ctxt->ctrlreg[3] = KPML4phys; if (HYPERVISOR_vcpu_op(VCPUOP_initialise, cpu, ctxt)) panic("unable to initialize AP#%d", cpu); free(ctxt, M_TEMP); /* Launch the vCPU */ if (HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL)) panic("unable to start AP#%d", cpu); /* Wait up to 5 seconds for it to start. */ for (ms = 0; ms < 5000; ms++) { if (mp_naps > cpus) return (true); DELAY(1000); } return (false); } static int xen_pv_start_all_aps(void) { int cpu; mtx_init(&ap_boot_mtx, "ap boot", NULL, MTX_SPIN); for (cpu = 1; cpu < mp_ncpus; cpu++) { /* attempt to start the Application Processor */ if (!start_xen_ap(cpu)) panic("AP #%d failed to start!", cpu); CPU_SET(cpu, &all_cpus); /* record AP in CPU map */ } return (mp_naps); } #endif /* SMP */ /* * Functions to convert the "extra" parameters passed by Xen * into FreeBSD boot options. */ static void xen_pv_set_env(void) { char *cmd_line_next, *cmd_line; size_t env_size; cmd_line = HYPERVISOR_start_info->cmd_line; env_size = sizeof(HYPERVISOR_start_info->cmd_line); /* Skip leading spaces */ for (; isspace(*cmd_line) && (env_size != 0); cmd_line++) env_size--; /* Replace ',' with '\0' */ for (cmd_line_next = cmd_line; strsep(&cmd_line_next, ",") != NULL;) ; init_static_kenv(cmd_line, 0); } static void xen_pv_set_boothowto(void) { int i; char *env; /* get equivalents from the environment */ for (i = 0; howto_names[i].ev != NULL; i++) { if ((env = kern_getenv(howto_names[i].ev)) != NULL) { boothowto |= howto_names[i].mask; freeenv(env); } } } #ifdef DDB /* * The way Xen loads the symtab is different from the native boot loader, * because it's tailored for NetBSD. So we have to adapt and use the same * method as NetBSD. Portions of the code below have been picked from NetBSD: * sys/kern/kern_ksyms.c CVS Revision 1.71. */ static void xen_pv_parse_symtab(void) { Elf_Ehdr *ehdr; Elf_Shdr *shdr; vm_offset_t sym_end; uint32_t size; int i, j; size = end; sym_end = HYPERVISOR_start_info->mod_start != 0 ? HYPERVISOR_start_info->mod_start : HYPERVISOR_start_info->mfn_list; /* * Make sure the size is right headed, sym_end is just a * high boundary, but at least allows us to fail earlier. */ if ((vm_offset_t)&end + size > sym_end) { xc_printf("Unable to load ELF symtab: size mismatch\n"); return; } ehdr = (Elf_Ehdr *)(&end + 1); if (memcmp(ehdr->e_ident, ELFMAG, SELFMAG) || ehdr->e_ident[EI_CLASS] != ELF_TARG_CLASS || ehdr->e_version > 1) { xc_printf("Unable to load ELF symtab: invalid symbol table\n"); return; } shdr = (Elf_Shdr *)((uint8_t *)ehdr + ehdr->e_shoff); /* Find the symbol table and the corresponding string table. */ for (i = 1; i < ehdr->e_shnum; i++) { if (shdr[i].sh_type != SHT_SYMTAB) continue; if (shdr[i].sh_offset == 0) continue; ksymtab = (uintptr_t)((uint8_t *)ehdr + shdr[i].sh_offset); ksymtab_size = shdr[i].sh_size; j = shdr[i].sh_link; if (shdr[j].sh_offset == 0) continue; /* Can this happen? */ kstrtab = (uintptr_t)((uint8_t *)ehdr + shdr[j].sh_offset); break; } if (ksymtab == 0 || kstrtab == 0) { xc_printf( "Unable to load ELF symtab: could not find symtab or strtab\n"); return; } } #endif static caddr_t xen_pv_parse_preload_data(u_int64_t modulep) { caddr_t kmdp; vm_ooffset_t off; vm_paddr_t metadata; char *envp; if (HYPERVISOR_start_info->mod_start != 0) { preload_metadata = (caddr_t)(HYPERVISOR_start_info->mod_start); kmdp = preload_search_by_type("elf kernel"); if (kmdp == NULL) kmdp = preload_search_by_type("elf64 kernel"); KASSERT(kmdp != NULL, ("unable to find kernel")); /* * Xen has relocated the metadata and the modules, * so we need to recalculate it's position. This is * done by saving the original modulep address and * then calculating the offset with mod_start, * which contains the relocated modulep address. */ metadata = MD_FETCH(kmdp, MODINFOMD_MODULEP, vm_paddr_t); off = HYPERVISOR_start_info->mod_start - metadata; preload_bootstrap_relocate(off); boothowto = MD_FETCH(kmdp, MODINFOMD_HOWTO, int); envp = MD_FETCH(kmdp, MODINFOMD_ENVP, char *); if (envp != NULL) envp += off; init_static_kenv(envp, 0); } else { /* Parse the extra boot information given by Xen */ xen_pv_set_env(); xen_pv_set_boothowto(); kmdp = NULL; } #ifdef DDB xen_pv_parse_symtab(); #endif return (kmdp); } static void xen_pv_parse_memmap(caddr_t kmdp, vm_paddr_t *physmap, int *physmap_idx) { struct xen_memory_map memmap; u_int32_t size; int rc; /* Fetch the E820 map from Xen */ memmap.nr_entries = MAX_E820_ENTRIES; set_xen_guest_handle(memmap.buffer, xen_smap); rc = HYPERVISOR_memory_op(XENMEM_memory_map, &memmap); if (rc) panic("unable to fetch Xen E820 memory map"); size = memmap.nr_entries * sizeof(xen_smap[0]); bios_add_smap_entries(xen_smap, size, physmap, physmap_idx); } Index: head/sys/x86/xen/pvcpu_enum.c =================================================================== --- head/sys/x86/xen/pvcpu_enum.c (revision 326262) +++ head/sys/x86/xen/pvcpu_enum.c (revision 326263) @@ -1,265 +1,267 @@ /*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * * Copyright (c) 2003 John Baldwin * Copyright (c) 2013 Roger Pau MonnĂ© * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static int xenpv_probe(void); static int xenpv_probe_cpus(void); static int xenpv_setup_local(void); static int xenpv_setup_io(void); static ACPI_TABLE_MADT *madt; static vm_paddr_t madt_physaddr; static vm_offset_t madt_length; static struct apic_enumerator xenpv_enumerator = { "Xen PV", xenpv_probe, xenpv_probe_cpus, xenpv_setup_local, xenpv_setup_io }; /*--------------------- Helper functions to parse MADT -----------------------*/ /* * Parse an interrupt source override for an ISA interrupt. */ static void madt_parse_interrupt_override(ACPI_MADT_INTERRUPT_OVERRIDE *intr) { enum intr_trigger trig; enum intr_polarity pol; int ret; if (acpi_quirks & ACPI_Q_MADT_IRQ0 && intr->SourceIrq == 0 && intr->GlobalIrq == 2) { if (bootverbose) printf("MADT: Skipping timer override\n"); return; } madt_parse_interrupt_values(intr, &trig, &pol); /* Remap the IRQ if it is mapped to a different interrupt vector. */ if (intr->SourceIrq != intr->GlobalIrq && intr->GlobalIrq > 15 && intr->SourceIrq == AcpiGbl_FADT.SciInterrupt) /* * If the SCI is remapped to a non-ISA global interrupt, * then override the vector we use to setup. */ acpi_OverrideInterruptLevel(intr->GlobalIrq); /* Register the IRQ with the polarity and trigger mode found. */ ret = xen_register_pirq(intr->GlobalIrq, trig, pol); if (ret != 0) panic("Unable to register interrupt override"); } /* * Call the handler routine for each entry in the MADT table. */ static void madt_walk_table(acpi_subtable_handler *handler, void *arg) { acpi_walk_subtables(madt + 1, (char *)madt + madt->Header.Length, handler, arg); } /* * Parse interrupt entries. */ static void madt_parse_ints(ACPI_SUBTABLE_HEADER *entry, void *arg __unused) { if (entry->Type == ACPI_MADT_TYPE_INTERRUPT_OVERRIDE) madt_parse_interrupt_override( (ACPI_MADT_INTERRUPT_OVERRIDE *)entry); } /*---------------------------- Xen PV enumerator -----------------------------*/ /* * This enumerator will only be registered on PVH */ static int xenpv_probe(void) { return (0); } /* * Test each possible vCPU in order to find the number of vCPUs */ static int xenpv_probe_cpus(void) { #ifdef SMP int i, ret; for (i = 0; i < MAXCPU && (i * 2) < MAX_APIC_ID; i++) { ret = HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL); mp_ncpus = min(mp_ncpus + 1, MAXCPU); } mp_maxid = mp_ncpus - 1; max_apic_id = mp_ncpus * 2; #endif return (0); } /* * Initialize the vCPU id of the BSP */ static int xenpv_setup_local(void) { #ifdef SMP int i, ret; for (i = 0; i < MAXCPU && (i * 2) < MAX_APIC_ID; i++) { ret = HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL); if (ret >= 0) lapic_create((i * 2), (i == 0)); } #endif PCPU_SET(vcpu_id, 0); lapic_init(0); return (0); } /* * On PVH guests there's no IO APIC */ static int xenpv_setup_io(void) { if (xen_initial_domain()) { int i, ret; /* Map MADT */ madt_physaddr = acpi_find_table(ACPI_SIG_MADT); madt = acpi_map_table(madt_physaddr, ACPI_SIG_MADT); madt_length = madt->Header.Length; /* Try to initialize ACPI so that we can access the FADT. */ i = acpi_Startup(); if (ACPI_FAILURE(i)) { printf("MADT: ACPI Startup failed with %s\n", AcpiFormatException(i)); printf("Try disabling either ACPI or apic support.\n"); panic("Using MADT but ACPI doesn't work"); } /* Run through the table to see if there are any overrides. */ madt_walk_table(madt_parse_ints, NULL); /* * If there was not an explicit override entry for the SCI, * force it to use level trigger and active-low polarity. */ if (!madt_found_sci_override) { printf( "MADT: Forcing active-low polarity and level trigger for SCI\n"); ret = xen_register_pirq(AcpiGbl_FADT.SciInterrupt, INTR_TRIGGER_LEVEL, INTR_POLARITY_LOW); if (ret != 0) panic("Unable to register SCI IRQ"); } /* Register legacy ISA IRQs */ for (i = 1; i < 16; i++) { if (intr_lookup_source(i) != NULL) continue; ret = xen_register_pirq(i, INTR_TRIGGER_EDGE, INTR_POLARITY_LOW); if (ret != 0 && bootverbose) printf("Unable to register legacy IRQ#%d: %d\n", i, ret); } acpi_SetDefaultIntrModel(ACPI_INTR_APIC); } return (0); } static void xenpv_register(void *dummy __unused) { if (xen_pv_domain()) { apic_register_enumerator(&xenpv_enumerator); } } SYSINIT(xenpv_register, SI_SUB_TUNABLES - 1, SI_ORDER_FIRST, xenpv_register, NULL); /* * Setup per-CPU vCPU IDs */ static void xenpv_set_ids(void *dummy) { struct pcpu *pc; int i; CPU_FOREACH(i) { pc = pcpu_find(i); pc->pc_vcpu_id = i; } } SYSINIT(xenpv_set_ids, SI_SUB_CPU, SI_ORDER_MIDDLE, xenpv_set_ids, NULL);