diff --git a/sys/dev/acpica/acpi_perf.c b/sys/dev/acpica/acpi_perf.c index df0fa9a29b6e..5134bbaaa4dc 100644 --- a/sys/dev/acpica/acpi_perf.c +++ b/sys/dev/acpica/acpi_perf.c @@ -1,595 +1,596 @@ /*- * Copyright (c) 2003-2005 Nate Lawson (SDG) * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_acpi.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "cpufreq_if.h" /* * Support for ACPI processor performance states (Px) according to * section 8.3.3 of the ACPI 2.0c specification. */ struct acpi_px { uint32_t core_freq; uint32_t power; uint32_t trans_lat; uint32_t bm_lat; uint32_t ctrl_val; uint32_t sts_val; }; /* Offsets in struct cf_setting array for storing driver-specific values. */ #define PX_SPEC_CONTROL 0 #define PX_SPEC_STATUS 1 #define MAX_PX_STATES 16 struct acpi_perf_softc { device_t dev; ACPI_HANDLE handle; struct resource *perf_ctrl; /* Set new performance state. */ int perf_ctrl_type; /* Resource type for perf_ctrl. */ struct resource *perf_status; /* Check that transition succeeded. */ int perf_sts_type; /* Resource type for perf_status. */ struct acpi_px *px_states; /* ACPI perf states. */ uint32_t px_count; /* Total number of perf states. */ uint32_t px_max_avail; /* Lowest index state available. */ int px_curr_state; /* Active state index. */ int px_rid; int info_only; /* Can we set new states? */ }; #define PX_GET_REG(reg) \ (bus_space_read_4(rman_get_bustag((reg)), \ rman_get_bushandle((reg)), 0)) #define PX_SET_REG(reg, val) \ (bus_space_write_4(rman_get_bustag((reg)), \ rman_get_bushandle((reg)), 0, (val))) #define ACPI_NOTIFY_PERF_STATES 0x80 /* _PSS changed. */ static void acpi_perf_identify(driver_t *driver, device_t parent); static int acpi_perf_probe(device_t dev); static int acpi_perf_attach(device_t dev); static int acpi_perf_detach(device_t dev); static int acpi_perf_evaluate(device_t dev); static int acpi_px_to_set(device_t dev, struct acpi_px *px, struct cf_setting *set); static void acpi_px_available(struct acpi_perf_softc *sc); static void acpi_px_startup(void *arg); static void acpi_px_notify(ACPI_HANDLE h, UINT32 notify, void *context); static int acpi_px_settings(device_t dev, struct cf_setting *sets, int *count); static int acpi_px_set(device_t dev, const struct cf_setting *set); static int acpi_px_get(device_t dev, struct cf_setting *set); static int acpi_px_type(device_t dev, int *type); static device_method_t acpi_perf_methods[] = { /* Device interface */ DEVMETHOD(device_identify, acpi_perf_identify), DEVMETHOD(device_probe, acpi_perf_probe), DEVMETHOD(device_attach, acpi_perf_attach), DEVMETHOD(device_detach, acpi_perf_detach), /* cpufreq interface */ DEVMETHOD(cpufreq_drv_set, acpi_px_set), DEVMETHOD(cpufreq_drv_get, acpi_px_get), DEVMETHOD(cpufreq_drv_type, acpi_px_type), DEVMETHOD(cpufreq_drv_settings, acpi_px_settings), DEVMETHOD_END }; static driver_t acpi_perf_driver = { "acpi_perf", acpi_perf_methods, sizeof(struct acpi_perf_softc), }; static devclass_t acpi_perf_devclass; DRIVER_MODULE(acpi_perf, cpu, acpi_perf_driver, acpi_perf_devclass, 0, 0); MODULE_DEPEND(acpi_perf, acpi, 1, 1, 1); static MALLOC_DEFINE(M_ACPIPERF, "acpi_perf", "ACPI Performance states"); static void acpi_perf_identify(driver_t *driver, device_t parent) { ACPI_HANDLE handle; device_t dev; /* Make sure we're not being doubly invoked. */ if (device_find_child(parent, "acpi_perf", -1) != NULL) return; /* Get the handle for the Processor object and check for perf states. */ handle = acpi_get_handle(parent); if (handle == NULL) return; if (ACPI_FAILURE(AcpiEvaluateObject(handle, "_PSS", NULL, NULL))) return; /* * Add a child to every CPU that has the right methods. In future * versions of the ACPI spec, CPUs can have different settings. * We probe this child now so that other devices that depend * on it (i.e., for info about supported states) will see it. */ - if ((dev = BUS_ADD_CHILD(parent, 0, "acpi_perf", -1)) != NULL) + if ((dev = BUS_ADD_CHILD(parent, 0, "acpi_perf", + device_get_unit(parent))) != NULL) device_probe_and_attach(dev); else device_printf(parent, "add acpi_perf child failed\n"); } static int acpi_perf_probe(device_t dev) { ACPI_HANDLE handle; ACPI_OBJECT *pkg; struct resource *res; ACPI_BUFFER buf; int error, rid, type; if (resource_disabled("acpi_perf", 0)) return (ENXIO); /* * Check the performance state registers. If they are of type * "functional fixed hardware", we attach quietly since we will * only be providing information on settings to other drivers. */ error = ENXIO; handle = acpi_get_handle(dev); buf.Pointer = NULL; buf.Length = ACPI_ALLOCATE_BUFFER; if (ACPI_FAILURE(AcpiEvaluateObject(handle, "_PCT", NULL, &buf))) return (error); pkg = (ACPI_OBJECT *)buf.Pointer; if (ACPI_PKG_VALID(pkg, 2)) { rid = 0; error = acpi_PkgGas(dev, pkg, 0, &type, &rid, &res, 0); switch (error) { case 0: bus_release_resource(dev, type, rid, res); bus_delete_resource(dev, type, rid); device_set_desc(dev, "ACPI CPU Frequency Control"); break; case EOPNOTSUPP: device_quiet(dev); error = 0; break; } } AcpiOsFree(buf.Pointer); return (error); } static int acpi_perf_attach(device_t dev) { struct acpi_perf_softc *sc; sc = device_get_softc(dev); sc->dev = dev; sc->handle = acpi_get_handle(dev); sc->px_max_avail = 0; sc->px_curr_state = CPUFREQ_VAL_UNKNOWN; if (acpi_perf_evaluate(dev) != 0) return (ENXIO); AcpiOsExecute(OSL_NOTIFY_HANDLER, acpi_px_startup, NULL); if (!sc->info_only) cpufreq_register(dev); return (0); } static int acpi_perf_detach(device_t dev) { /* TODO: teardown registers, remove notify handler. */ return (ENXIO); } /* Probe and setup any valid performance states (Px). */ static int acpi_perf_evaluate(device_t dev) { struct acpi_perf_softc *sc; ACPI_BUFFER buf; ACPI_OBJECT *pkg, *res; ACPI_STATUS status; int count, error, i, j; static int once = 1; uint32_t *p; /* Get the control values and parameters for each state. */ error = ENXIO; sc = device_get_softc(dev); buf.Pointer = NULL; buf.Length = ACPI_ALLOCATE_BUFFER; status = AcpiEvaluateObject(sc->handle, "_PSS", NULL, &buf); if (ACPI_FAILURE(status)) return (ENXIO); pkg = (ACPI_OBJECT *)buf.Pointer; if (!ACPI_PKG_VALID(pkg, 1)) { device_printf(dev, "invalid top level _PSS package\n"); goto out; } sc->px_count = pkg->Package.Count; sc->px_states = malloc(sc->px_count * sizeof(struct acpi_px), M_ACPIPERF, M_WAITOK | M_ZERO); /* * Each state is a package of {CoreFreq, Power, TransitionLatency, * BusMasterLatency, ControlVal, StatusVal}, sorted from highest * performance to lowest. */ count = 0; for (i = 0; i < sc->px_count; i++) { res = &pkg->Package.Elements[i]; if (!ACPI_PKG_VALID(res, 6)) { if (once) { once = 0; device_printf(dev, "invalid _PSS package\n"); } continue; } /* Parse the rest of the package into the struct. */ p = &sc->px_states[count].core_freq; for (j = 0; j < 6; j++, p++) acpi_PkgInt32(res, j, p); /* * Check for some impossible frequencies that some systems * use to indicate they don't actually support this Px state. */ if (sc->px_states[count].core_freq == 0 || sc->px_states[count].core_freq == 9999 || sc->px_states[count].core_freq == 0x9999 || sc->px_states[count].core_freq >= 0xffff) continue; /* Check for duplicate entries */ if (count > 0 && sc->px_states[count - 1].core_freq == sc->px_states[count].core_freq) continue; count++; } sc->px_count = count; /* No valid Px state found so give up. */ if (count == 0) goto out; AcpiOsFree(buf.Pointer); /* Get the control and status registers (one of each). */ buf.Pointer = NULL; buf.Length = ACPI_ALLOCATE_BUFFER; status = AcpiEvaluateObject(sc->handle, "_PCT", NULL, &buf); if (ACPI_FAILURE(status)) goto out; /* Check the package of two registers, each a Buffer in GAS format. */ pkg = (ACPI_OBJECT *)buf.Pointer; if (!ACPI_PKG_VALID(pkg, 2)) { device_printf(dev, "invalid perf register package\n"); goto out; } error = acpi_PkgGas(sc->dev, pkg, 0, &sc->perf_ctrl_type, &sc->px_rid, &sc->perf_ctrl, 0); if (error) { /* * If the register is of type FFixedHW, we can only return * info, we can't get or set new settings. */ if (error == EOPNOTSUPP) { sc->info_only = TRUE; error = 0; } else device_printf(dev, "failed in PERF_CTL attach\n"); goto out; } sc->px_rid++; error = acpi_PkgGas(sc->dev, pkg, 1, &sc->perf_sts_type, &sc->px_rid, &sc->perf_status, 0); if (error) { if (error == EOPNOTSUPP) { sc->info_only = TRUE; error = 0; } else device_printf(dev, "failed in PERF_STATUS attach\n"); goto out; } sc->px_rid++; /* Get our current limit and register for notifies. */ acpi_px_available(sc); AcpiInstallNotifyHandler(sc->handle, ACPI_DEVICE_NOTIFY, acpi_px_notify, sc); error = 0; out: if (error) { if (sc->px_states) { free(sc->px_states, M_ACPIPERF); sc->px_states = NULL; } if (sc->perf_ctrl) { bus_release_resource(sc->dev, sc->perf_ctrl_type, 0, sc->perf_ctrl); bus_delete_resource(sc->dev, sc->perf_ctrl_type, 0); sc->perf_ctrl = NULL; } if (sc->perf_status) { bus_release_resource(sc->dev, sc->perf_sts_type, 1, sc->perf_status); bus_delete_resource(sc->dev, sc->perf_sts_type, 1); sc->perf_status = NULL; } sc->px_rid = 0; sc->px_count = 0; } if (buf.Pointer) AcpiOsFree(buf.Pointer); return (error); } static void acpi_px_startup(void *arg) { /* Signal to the platform that we are taking over CPU control. */ if (AcpiGbl_FADT.PstateControl == 0) return; ACPI_LOCK(acpi); AcpiOsWritePort(AcpiGbl_FADT.SmiCommand, AcpiGbl_FADT.PstateControl, 8); ACPI_UNLOCK(acpi); } static void acpi_px_notify(ACPI_HANDLE h, UINT32 notify, void *context) { struct acpi_perf_softc *sc; sc = context; if (notify != ACPI_NOTIFY_PERF_STATES) return; acpi_px_available(sc); /* TODO: Implement notification when frequency changes. */ } /* * Find the highest currently-supported performance state. * This can be called at runtime (e.g., due to a docking event) at * the request of a Notify on the processor object. */ static void acpi_px_available(struct acpi_perf_softc *sc) { ACPI_STATUS status; struct cf_setting set; status = acpi_GetInteger(sc->handle, "_PPC", &sc->px_max_avail); /* If the old state is too high, set current state to the new max. */ if (ACPI_SUCCESS(status)) { if (sc->px_curr_state != CPUFREQ_VAL_UNKNOWN && sc->px_curr_state > sc->px_max_avail) { acpi_px_to_set(sc->dev, &sc->px_states[sc->px_max_avail], &set); acpi_px_set(sc->dev, &set); } } else sc->px_max_avail = 0; } static int acpi_px_to_set(device_t dev, struct acpi_px *px, struct cf_setting *set) { if (px == NULL || set == NULL) return (EINVAL); set->freq = px->core_freq; set->power = px->power; /* XXX Include BM latency too? */ set->lat = px->trans_lat; set->volts = CPUFREQ_VAL_UNKNOWN; set->dev = dev; set->spec[PX_SPEC_CONTROL] = px->ctrl_val; set->spec[PX_SPEC_STATUS] = px->sts_val; return (0); } static int acpi_px_settings(device_t dev, struct cf_setting *sets, int *count) { struct acpi_perf_softc *sc; int x, y; sc = device_get_softc(dev); if (sets == NULL || count == NULL) return (EINVAL); if (*count < sc->px_count - sc->px_max_avail) return (E2BIG); /* Return a list of settings that are currently valid. */ y = 0; for (x = sc->px_max_avail; x < sc->px_count; x++, y++) acpi_px_to_set(dev, &sc->px_states[x], &sets[y]); *count = sc->px_count - sc->px_max_avail; return (0); } static int acpi_px_set(device_t dev, const struct cf_setting *set) { struct acpi_perf_softc *sc; int i, status, sts_val, tries; if (set == NULL) return (EINVAL); sc = device_get_softc(dev); /* If we can't set new states, return immediately. */ if (sc->info_only) return (ENXIO); /* Look up appropriate state, based on frequency. */ for (i = sc->px_max_avail; i < sc->px_count; i++) { if (CPUFREQ_CMP(set->freq, sc->px_states[i].core_freq)) break; } if (i == sc->px_count) return (EINVAL); /* Write the appropriate value to the register. */ PX_SET_REG(sc->perf_ctrl, sc->px_states[i].ctrl_val); /* * Try for up to 10 ms to verify the desired state was selected. * This is longer than the standard says (1 ms) but in some modes, * systems may take longer to respond. */ sts_val = sc->px_states[i].sts_val; for (tries = 0; tries < 1000; tries++) { status = PX_GET_REG(sc->perf_status); /* * If we match the status or the desired status is 8 bits * and matches the relevant bits, assume we succeeded. It * appears some systems (IBM R32) expect byte-wide access * even though the standard says the register is 32-bit. */ if (status == sts_val || ((sts_val & ~0xff) == 0 && (status & 0xff) == sts_val)) break; DELAY(10); } if (tries == 1000) { device_printf(dev, "Px transition to %d failed\n", sc->px_states[i].core_freq); return (ENXIO); } sc->px_curr_state = i; return (0); } static int acpi_px_get(device_t dev, struct cf_setting *set) { struct acpi_perf_softc *sc; uint64_t rate; int i; struct pcpu *pc; if (set == NULL) return (EINVAL); sc = device_get_softc(dev); /* If we can't get new states, return immediately. */ if (sc->info_only) return (ENXIO); /* If we've set the rate before, use the cached value. */ if (sc->px_curr_state != CPUFREQ_VAL_UNKNOWN) { acpi_px_to_set(dev, &sc->px_states[sc->px_curr_state], set); return (0); } /* Otherwise, estimate and try to match against our settings. */ pc = cpu_get_pcpu(dev); if (pc == NULL) return (ENXIO); cpu_est_clockrate(pc->pc_cpuid, &rate); rate /= 1000000; for (i = 0; i < sc->px_count; i++) { if (CPUFREQ_CMP(sc->px_states[i].core_freq, rate)) { sc->px_curr_state = i; acpi_px_to_set(dev, &sc->px_states[i], set); break; } } /* No match, give up. */ if (i == sc->px_count) { sc->px_curr_state = CPUFREQ_VAL_UNKNOWN; set->freq = CPUFREQ_VAL_UNKNOWN; } return (0); } static int acpi_px_type(device_t dev, int *type) { struct acpi_perf_softc *sc; if (type == NULL) return (EINVAL); sc = device_get_softc(dev); *type = CPUFREQ_TYPE_ABSOLUTE; if (sc->info_only) *type |= CPUFREQ_FLAG_INFO_ONLY; return (0); } diff --git a/sys/dev/acpica/acpi_throttle.c b/sys/dev/acpica/acpi_throttle.c index 883b5d34de09..a9cc2d77ecf9 100644 --- a/sys/dev/acpica/acpi_throttle.c +++ b/sys/dev/acpica/acpi_throttle.c @@ -1,443 +1,444 @@ /*- * Copyright (c) 2003-2005 Nate Lawson (SDG) * Copyright (c) 2001 Michael Smith * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_acpi.h" #include #include #include #include #include #include #include #include #include #include #include #include "cpufreq_if.h" /* * Throttling provides relative frequency control. It involves modulating * the clock so that the CPU is active for only a fraction of the normal * clock cycle. It does not change voltage and so is less efficient than * other mechanisms. Since it is relative, it can be used in addition to * absolute cpufreq drivers. We support the ACPI 2.0 specification. */ struct acpi_throttle_softc { device_t cpu_dev; ACPI_HANDLE cpu_handle; uint32_t cpu_p_blk; /* ACPI P_BLK location */ uint32_t cpu_p_blk_len; /* P_BLK length (must be 6). */ struct resource *cpu_p_cnt; /* Throttling control register */ int cpu_p_type; /* Resource type for cpu_p_cnt. */ uint32_t cpu_thr_state; /* Current throttle setting. */ }; #define THR_GET_REG(reg) \ (bus_space_read_4(rman_get_bustag((reg)), \ rman_get_bushandle((reg)), 0)) #define THR_SET_REG(reg, val) \ (bus_space_write_4(rman_get_bustag((reg)), \ rman_get_bushandle((reg)), 0, (val))) /* * Speeds are stored in counts, from 1 to CPU_MAX_SPEED, and * reported to the user in hundredths of a percent. */ #define CPU_MAX_SPEED (1 << cpu_duty_width) #define CPU_SPEED_PERCENT(x) ((10000 * (x)) / CPU_MAX_SPEED) #define CPU_SPEED_PRINTABLE(x) (CPU_SPEED_PERCENT(x) / 10), \ (CPU_SPEED_PERCENT(x) % 10) #define CPU_P_CNT_THT_EN (1<<4) #define CPU_QUIRK_NO_THROTTLE (1<<1) /* Throttling is not usable. */ #define PCI_VENDOR_INTEL 0x8086 #define PCI_DEVICE_82371AB_3 0x7113 /* PIIX4 chipset for quirks. */ #define PCI_REVISION_A_STEP 0 #define PCI_REVISION_B_STEP 1 static uint32_t cpu_duty_offset; /* Offset in P_CNT of throttle val. */ static uint32_t cpu_duty_width; /* Bit width of throttle value. */ static int thr_rid; /* Driver-wide resource id. */ static int thr_quirks; /* Indicate any hardware bugs. */ static void acpi_throttle_identify(driver_t *driver, device_t parent); static int acpi_throttle_probe(device_t dev); static int acpi_throttle_attach(device_t dev); static int acpi_throttle_evaluate(struct acpi_throttle_softc *sc); static void acpi_throttle_quirks(struct acpi_throttle_softc *sc); static int acpi_thr_settings(device_t dev, struct cf_setting *sets, int *count); static int acpi_thr_set(device_t dev, const struct cf_setting *set); static int acpi_thr_get(device_t dev, struct cf_setting *set); static int acpi_thr_type(device_t dev, int *type); static device_method_t acpi_throttle_methods[] = { /* Device interface */ DEVMETHOD(device_identify, acpi_throttle_identify), DEVMETHOD(device_probe, acpi_throttle_probe), DEVMETHOD(device_attach, acpi_throttle_attach), /* cpufreq interface */ DEVMETHOD(cpufreq_drv_set, acpi_thr_set), DEVMETHOD(cpufreq_drv_get, acpi_thr_get), DEVMETHOD(cpufreq_drv_type, acpi_thr_type), DEVMETHOD(cpufreq_drv_settings, acpi_thr_settings), DEVMETHOD_END }; static driver_t acpi_throttle_driver = { "acpi_throttle", acpi_throttle_methods, sizeof(struct acpi_throttle_softc), }; static devclass_t acpi_throttle_devclass; DRIVER_MODULE(acpi_throttle, cpu, acpi_throttle_driver, acpi_throttle_devclass, 0, 0); static void acpi_throttle_identify(driver_t *driver, device_t parent) { ACPI_BUFFER buf; ACPI_HANDLE handle; ACPI_OBJECT *obj; /* Make sure we're not being doubly invoked. */ if (device_find_child(parent, "acpi_throttle", -1)) return; /* Check for a valid duty width and parent CPU type. */ handle = acpi_get_handle(parent); if (handle == NULL) return; if (AcpiGbl_FADT.DutyWidth == 0 || acpi_get_type(parent) != ACPI_TYPE_PROCESSOR) return; /* * Add a child if there's a non-NULL P_BLK and correct length, or * if the _PTC method is present. */ buf.Pointer = NULL; buf.Length = ACPI_ALLOCATE_BUFFER; if (ACPI_FAILURE(AcpiEvaluateObject(handle, NULL, NULL, &buf))) return; obj = (ACPI_OBJECT *)buf.Pointer; if ((obj->Processor.PblkAddress && obj->Processor.PblkLength >= 4) || ACPI_SUCCESS(AcpiEvaluateObject(handle, "_PTC", NULL, NULL))) { - if (BUS_ADD_CHILD(parent, 0, "acpi_throttle", -1) == NULL) + if (BUS_ADD_CHILD(parent, 0, "acpi_throttle", + device_get_unit(parent)) == NULL) device_printf(parent, "add throttle child failed\n"); } AcpiOsFree(obj); } static int acpi_throttle_probe(device_t dev) { if (resource_disabled("acpi_throttle", 0)) return (ENXIO); /* * On i386 platforms at least, ACPI throttling is accomplished by * the chipset modulating the STPCLK# pin based on the duty cycle. * Since p4tcc uses the same mechanism (but internal to the CPU), * we disable acpi_throttle when p4tcc is also present. */ if (device_find_child(device_get_parent(dev), "p4tcc", -1) && !resource_disabled("p4tcc", 0)) return (ENXIO); device_set_desc(dev, "ACPI CPU Throttling"); return (0); } static int acpi_throttle_attach(device_t dev) { struct acpi_throttle_softc *sc; struct cf_setting set; ACPI_BUFFER buf; ACPI_OBJECT *obj; ACPI_STATUS status; int error; sc = device_get_softc(dev); sc->cpu_dev = dev; sc->cpu_handle = acpi_get_handle(dev); buf.Pointer = NULL; buf.Length = ACPI_ALLOCATE_BUFFER; status = AcpiEvaluateObject(sc->cpu_handle, NULL, NULL, &buf); if (ACPI_FAILURE(status)) { device_printf(dev, "attach failed to get Processor obj - %s\n", AcpiFormatException(status)); return (ENXIO); } obj = (ACPI_OBJECT *)buf.Pointer; sc->cpu_p_blk = obj->Processor.PblkAddress; sc->cpu_p_blk_len = obj->Processor.PblkLength; AcpiOsFree(obj); /* If this is the first device probed, check for quirks. */ if (device_get_unit(dev) == 0) acpi_throttle_quirks(sc); /* Attempt to attach the actual throttling register. */ error = acpi_throttle_evaluate(sc); if (error) return (error); /* * Set our initial frequency to the highest since some systems * seem to boot with this at the lowest setting. */ set.freq = 10000; acpi_thr_set(dev, &set); /* Everything went ok, register with cpufreq(4). */ cpufreq_register(dev); return (0); } static int acpi_throttle_evaluate(struct acpi_throttle_softc *sc) { uint32_t duty_end; ACPI_BUFFER buf; ACPI_OBJECT obj; ACPI_GENERIC_ADDRESS gas; ACPI_STATUS status; /* Get throttling parameters from the FADT. 0 means not supported. */ if (device_get_unit(sc->cpu_dev) == 0) { cpu_duty_offset = AcpiGbl_FADT.DutyOffset; cpu_duty_width = AcpiGbl_FADT.DutyWidth; } if (cpu_duty_width == 0 || (thr_quirks & CPU_QUIRK_NO_THROTTLE) != 0) return (ENXIO); /* Validate the duty offset/width. */ duty_end = cpu_duty_offset + cpu_duty_width - 1; if (duty_end > 31) { device_printf(sc->cpu_dev, "CLK_VAL field overflows P_CNT register\n"); return (ENXIO); } if (cpu_duty_offset <= 4 && duty_end >= 4) { device_printf(sc->cpu_dev, "CLK_VAL field overlaps THT_EN bit\n"); return (ENXIO); } /* * If not present, fall back to using the processor's P_BLK to find * the P_CNT register. * * Note that some systems seem to duplicate the P_BLK pointer * across multiple CPUs, so not getting the resource is not fatal. */ buf.Pointer = &obj; buf.Length = sizeof(obj); status = AcpiEvaluateObject(sc->cpu_handle, "_PTC", NULL, &buf); if (ACPI_SUCCESS(status)) { if (obj.Buffer.Length < sizeof(ACPI_GENERIC_ADDRESS) + 3) { device_printf(sc->cpu_dev, "_PTC buffer too small\n"); return (ENXIO); } memcpy(&gas, obj.Buffer.Pointer + 3, sizeof(gas)); acpi_bus_alloc_gas(sc->cpu_dev, &sc->cpu_p_type, &thr_rid, &gas, &sc->cpu_p_cnt, 0); if (sc->cpu_p_cnt != NULL && bootverbose) { device_printf(sc->cpu_dev, "P_CNT from _PTC %#jx\n", gas.Address); } } /* If _PTC not present or other failure, try the P_BLK. */ if (sc->cpu_p_cnt == NULL) { /* * The spec says P_BLK must be 6 bytes long. However, some * systems use it to indicate a fractional set of features * present so we take anything >= 4. */ if (sc->cpu_p_blk_len < 4) return (ENXIO); gas.Address = sc->cpu_p_blk; gas.SpaceId = ACPI_ADR_SPACE_SYSTEM_IO; gas.BitWidth = 32; acpi_bus_alloc_gas(sc->cpu_dev, &sc->cpu_p_type, &thr_rid, &gas, &sc->cpu_p_cnt, 0); if (sc->cpu_p_cnt != NULL) { if (bootverbose) device_printf(sc->cpu_dev, "P_CNT from P_BLK %#x\n", sc->cpu_p_blk); } else { device_printf(sc->cpu_dev, "failed to attach P_CNT\n"); return (ENXIO); } } thr_rid++; return (0); } static void acpi_throttle_quirks(struct acpi_throttle_softc *sc) { #ifdef __i386__ device_t acpi_dev; /* Look for various quirks of the PIIX4 part. */ acpi_dev = pci_find_device(PCI_VENDOR_INTEL, PCI_DEVICE_82371AB_3); if (acpi_dev) { switch (pci_get_revid(acpi_dev)) { /* * Disable throttling control on PIIX4 A and B-step. * See specification changes #13 ("Manual Throttle Duty Cycle") * and #14 ("Enabling and Disabling Manual Throttle"), plus * erratum #5 ("STPCLK# Deassertion Time") from the January * 2002 PIIX4 specification update. Note that few (if any) * mobile systems ever used this part. */ case PCI_REVISION_A_STEP: case PCI_REVISION_B_STEP: thr_quirks |= CPU_QUIRK_NO_THROTTLE; break; default: break; } } #endif } static int acpi_thr_settings(device_t dev, struct cf_setting *sets, int *count) { int i, speed; if (sets == NULL || count == NULL) return (EINVAL); if (*count < CPU_MAX_SPEED) return (E2BIG); /* Return a list of valid settings for this driver. */ memset(sets, CPUFREQ_VAL_UNKNOWN, sizeof(*sets) * CPU_MAX_SPEED); for (i = 0, speed = CPU_MAX_SPEED; speed != 0; i++, speed--) { sets[i].freq = CPU_SPEED_PERCENT(speed); sets[i].dev = dev; } *count = CPU_MAX_SPEED; return (0); } static int acpi_thr_set(device_t dev, const struct cf_setting *set) { struct acpi_throttle_softc *sc; uint32_t clk_val, p_cnt, speed; if (set == NULL) return (EINVAL); sc = device_get_softc(dev); /* * Validate requested state converts to a duty cycle that is an * integer from [1 .. CPU_MAX_SPEED]. */ speed = set->freq * CPU_MAX_SPEED / 10000; if (speed * 10000 != set->freq * CPU_MAX_SPEED || speed < 1 || speed > CPU_MAX_SPEED) return (EINVAL); /* If we're at this setting, don't bother applying it again. */ if (speed == sc->cpu_thr_state) return (0); /* Get the current P_CNT value and disable throttling */ p_cnt = THR_GET_REG(sc->cpu_p_cnt); p_cnt &= ~CPU_P_CNT_THT_EN; THR_SET_REG(sc->cpu_p_cnt, p_cnt); /* If we're at maximum speed, that's all */ if (speed < CPU_MAX_SPEED) { /* Mask the old CLK_VAL off and OR in the new value */ clk_val = (CPU_MAX_SPEED - 1) << cpu_duty_offset; p_cnt &= ~clk_val; p_cnt |= (speed << cpu_duty_offset); /* Write the new P_CNT value and then enable throttling */ THR_SET_REG(sc->cpu_p_cnt, p_cnt); p_cnt |= CPU_P_CNT_THT_EN; THR_SET_REG(sc->cpu_p_cnt, p_cnt); } sc->cpu_thr_state = speed; return (0); } static int acpi_thr_get(device_t dev, struct cf_setting *set) { struct acpi_throttle_softc *sc; uint32_t p_cnt, clk_val; if (set == NULL) return (EINVAL); sc = device_get_softc(dev); /* Get the current throttling setting from P_CNT. */ p_cnt = THR_GET_REG(sc->cpu_p_cnt); clk_val = (p_cnt >> cpu_duty_offset) & (CPU_MAX_SPEED - 1); sc->cpu_thr_state = clk_val; memset(set, CPUFREQ_VAL_UNKNOWN, sizeof(*set)); set->freq = CPU_SPEED_PERCENT(clk_val); set->dev = dev; return (0); } static int acpi_thr_type(device_t dev, int *type) { if (type == NULL) return (EINVAL); *type = CPUFREQ_TYPE_RELATIVE; return (0); } diff --git a/sys/dev/amdtemp/amdtemp.c b/sys/dev/amdtemp/amdtemp.c index 21227a47c513..c1d696d070a5 100644 --- a/sys/dev/amdtemp/amdtemp.c +++ b/sys/dev/amdtemp/amdtemp.c @@ -1,850 +1,851 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2008, 2009 Rui Paulo * Copyright (c) 2009 Norikatsu Shigemura * Copyright (c) 2009-2012 Jung-uk Kim * All rights reserved. * Copyright (c) 2017-2020 Conrad Meyer . All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /* * Driver for the AMD CPU on-die thermal sensors. * Initially based on the k8temp Linux driver. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include typedef enum { CORE0_SENSOR0, CORE0_SENSOR1, CORE1_SENSOR0, CORE1_SENSOR1, CORE0, CORE1, CCD1, CCD_BASE = CCD1, CCD2, CCD3, CCD4, CCD5, CCD6, CCD7, CCD8, CCD_MAX = CCD8, NUM_CCDS = CCD_MAX - CCD_BASE + 1, } amdsensor_t; struct amdtemp_softc { int sc_ncores; int sc_ntemps; int sc_flags; #define AMDTEMP_FLAG_CS_SWAP 0x01 /* ThermSenseCoreSel is inverted. */ #define AMDTEMP_FLAG_CT_10BIT 0x02 /* CurTmp is 10-bit wide. */ #define AMDTEMP_FLAG_ALT_OFFSET 0x04 /* CurTmp starts at -28C. */ int32_t sc_offset; int32_t (*sc_gettemp)(device_t, amdsensor_t); struct sysctl_oid *sc_sysctl_cpu[MAXCPU]; struct intr_config_hook sc_ich; device_t sc_smn; }; /* * N.B. The numbers in macro names below are significant and represent CPU * family and model numbers. Do not make up fictitious family or model numbers * when adding support for new devices. */ #define VENDORID_AMD 0x1022 #define DEVICEID_AMD_MISC0F 0x1103 #define DEVICEID_AMD_MISC10 0x1203 #define DEVICEID_AMD_MISC11 0x1303 #define DEVICEID_AMD_MISC14 0x1703 #define DEVICEID_AMD_MISC15 0x1603 #define DEVICEID_AMD_MISC15_M10H 0x1403 #define DEVICEID_AMD_MISC15_M30H 0x141d #define DEVICEID_AMD_MISC15_M60H_ROOT 0x1576 #define DEVICEID_AMD_MISC16 0x1533 #define DEVICEID_AMD_MISC16_M30H 0x1583 #define DEVICEID_AMD_HOSTB17H_ROOT 0x1450 #define DEVICEID_AMD_HOSTB17H_M10H_ROOT 0x15d0 #define DEVICEID_AMD_HOSTB17H_M30H_ROOT 0x1480 /* Also M70H, F19H M00H/M20H */ #define DEVICEID_AMD_HOSTB17H_M60H_ROOT 0x1630 static const struct amdtemp_product { uint16_t amdtemp_vendorid; uint16_t amdtemp_deviceid; /* * 0xFC register is only valid on the D18F3 PCI device; SMN temp * drivers do not attach to that device. */ bool amdtemp_has_cpuid; } amdtemp_products[] = { { VENDORID_AMD, DEVICEID_AMD_MISC0F, true }, { VENDORID_AMD, DEVICEID_AMD_MISC10, true }, { VENDORID_AMD, DEVICEID_AMD_MISC11, true }, { VENDORID_AMD, DEVICEID_AMD_MISC14, true }, { VENDORID_AMD, DEVICEID_AMD_MISC15, true }, { VENDORID_AMD, DEVICEID_AMD_MISC15_M10H, true }, { VENDORID_AMD, DEVICEID_AMD_MISC15_M30H, true }, { VENDORID_AMD, DEVICEID_AMD_MISC15_M60H_ROOT, false }, { VENDORID_AMD, DEVICEID_AMD_MISC16, true }, { VENDORID_AMD, DEVICEID_AMD_MISC16_M30H, true }, { VENDORID_AMD, DEVICEID_AMD_HOSTB17H_ROOT, false }, { VENDORID_AMD, DEVICEID_AMD_HOSTB17H_M10H_ROOT, false }, { VENDORID_AMD, DEVICEID_AMD_HOSTB17H_M30H_ROOT, false }, { VENDORID_AMD, DEVICEID_AMD_HOSTB17H_M60H_ROOT, false }, }; /* * Reported Temperature Control Register, family 0Fh-15h (some models), 16h. */ #define AMDTEMP_REPTMP_CTRL 0xa4 #define AMDTEMP_REPTMP10H_CURTMP_MASK 0x7ff #define AMDTEMP_REPTMP10H_CURTMP_SHIFT 21 #define AMDTEMP_REPTMP10H_TJSEL_MASK 0x3 #define AMDTEMP_REPTMP10H_TJSEL_SHIFT 16 /* * Reported Temperature, Family 15h, M60+ * * Same register bit definitions as other Family 15h CPUs, but access is * indirect via SMN, like Family 17h. */ #define AMDTEMP_15H_M60H_REPTMP_CTRL 0xd8200ca4 /* * Reported Temperature, Family 17h * * According to AMD OSRR for 17H, section 4.2.1, bits 31-21 of this register * provide the current temp. bit 19, when clear, means the temp is reported in * a range 0.."225C" (probable typo for 255C), and when set changes the range * to -49..206C. */ #define AMDTEMP_17H_CUR_TMP 0x59800 #define AMDTEMP_17H_CUR_TMP_RANGE_SEL (1u << 19) /* * The following register set was discovered experimentally by Ondrej Čerman * and collaborators, but is not (yet) documented in a PPR/OSRR (other than * the M70H PPR SMN memory map showing [0x59800, +0x314] as allocated to * SMU::THM). It seems plausible and the Linux sensor folks have adopted it. */ #define AMDTEMP_17H_CCD_TMP_BASE 0x59954 #define AMDTEMP_17H_CCD_TMP_VALID (1u << 11) /* * AMD temperature range adjustment, in deciKelvins (i.e., 49.0 Celsius). */ #define AMDTEMP_CURTMP_RANGE_ADJUST 490 /* * Thermaltrip Status Register (Family 0Fh only) */ #define AMDTEMP_THERMTP_STAT 0xe4 #define AMDTEMP_TTSR_SELCORE 0x04 #define AMDTEMP_TTSR_SELSENSOR 0x40 /* * DRAM Configuration High Register */ #define AMDTEMP_DRAM_CONF_HIGH 0x94 /* Function 2 */ #define AMDTEMP_DRAM_MODE_DDR3 0x0100 /* * CPU Family/Model Register */ #define AMDTEMP_CPUID 0xfc /* * Device methods. */ static void amdtemp_identify(driver_t *driver, device_t parent); static int amdtemp_probe(device_t dev); static int amdtemp_attach(device_t dev); static void amdtemp_intrhook(void *arg); static int amdtemp_detach(device_t dev); static int32_t amdtemp_gettemp0f(device_t dev, amdsensor_t sensor); static int32_t amdtemp_gettemp(device_t dev, amdsensor_t sensor); static int32_t amdtemp_gettemp15hm60h(device_t dev, amdsensor_t sensor); static int32_t amdtemp_gettemp17h(device_t dev, amdsensor_t sensor); static void amdtemp_probe_ccd_sensors17h(device_t dev, uint32_t model); static void amdtemp_probe_ccd_sensors19h(device_t dev, uint32_t model); static int amdtemp_sysctl(SYSCTL_HANDLER_ARGS); static device_method_t amdtemp_methods[] = { /* Device interface */ DEVMETHOD(device_identify, amdtemp_identify), DEVMETHOD(device_probe, amdtemp_probe), DEVMETHOD(device_attach, amdtemp_attach), DEVMETHOD(device_detach, amdtemp_detach), DEVMETHOD_END }; static driver_t amdtemp_driver = { "amdtemp", amdtemp_methods, sizeof(struct amdtemp_softc), }; static devclass_t amdtemp_devclass; DRIVER_MODULE(amdtemp, hostb, amdtemp_driver, amdtemp_devclass, NULL, NULL); MODULE_VERSION(amdtemp, 1); MODULE_DEPEND(amdtemp, amdsmn, 1, 1, 1); MODULE_PNP_INFO("U16:vendor;U16:device", pci, amdtemp, amdtemp_products, nitems(amdtemp_products)); static bool amdtemp_match(device_t dev, const struct amdtemp_product **product_out) { int i; uint16_t vendor, devid; vendor = pci_get_vendor(dev); devid = pci_get_device(dev); for (i = 0; i < nitems(amdtemp_products); i++) { if (vendor == amdtemp_products[i].amdtemp_vendorid && devid == amdtemp_products[i].amdtemp_deviceid) { if (product_out != NULL) *product_out = &amdtemp_products[i]; return (true); } } return (false); } static void amdtemp_identify(driver_t *driver, device_t parent) { device_t child; /* Make sure we're not being doubly invoked. */ if (device_find_child(parent, "amdtemp", -1) != NULL) return; if (amdtemp_match(parent, NULL)) { - child = device_add_child(parent, "amdtemp", -1); + child = device_add_child(parent, "amdtemp", + device_get_unit(parent)); if (child == NULL) device_printf(parent, "add amdtemp child failed\n"); } } static int amdtemp_probe(device_t dev) { uint32_t family, model; if (resource_disabled("amdtemp", 0)) return (ENXIO); if (!amdtemp_match(device_get_parent(dev), NULL)) return (ENXIO); family = CPUID_TO_FAMILY(cpu_id); model = CPUID_TO_MODEL(cpu_id); switch (family) { case 0x0f: if ((model == 0x04 && (cpu_id & CPUID_STEPPING) == 0) || (model == 0x05 && (cpu_id & CPUID_STEPPING) <= 1)) return (ENXIO); break; case 0x10: case 0x11: case 0x12: case 0x14: case 0x15: case 0x16: case 0x17: case 0x19: break; default: return (ENXIO); } device_set_desc(dev, "AMD CPU On-Die Thermal Sensors"); return (BUS_PROBE_GENERIC); } static int amdtemp_attach(device_t dev) { char tn[32]; u_int regs[4]; const struct amdtemp_product *product; struct amdtemp_softc *sc; struct sysctl_ctx_list *sysctlctx; struct sysctl_oid *sysctlnode; uint32_t cpuid, family, model; u_int bid; int erratum319, unit; bool needsmn; sc = device_get_softc(dev); erratum319 = 0; needsmn = false; if (!amdtemp_match(device_get_parent(dev), &product)) return (ENXIO); cpuid = cpu_id; family = CPUID_TO_FAMILY(cpuid); model = CPUID_TO_MODEL(cpuid); /* * This checks for the byzantine condition of running a heterogenous * revision multi-socket system where the attach thread is potentially * probing a remote socket's PCI device. * * Currently, such scenarios are unsupported on models using the SMN * (because on those models, amdtemp(4) attaches to a different PCI * device than the one that contains AMDTEMP_CPUID). * * The ancient 0x0F family of devices only supports this register from * models 40h+. */ if (product->amdtemp_has_cpuid && (family > 0x0f || (family == 0x0f && model >= 0x40))) { cpuid = pci_read_config(device_get_parent(dev), AMDTEMP_CPUID, 4); family = CPUID_TO_FAMILY(cpuid); model = CPUID_TO_MODEL(cpuid); } switch (family) { case 0x0f: /* * Thermaltrip Status Register * * - ThermSenseCoreSel * * Revision F & G: 0 - Core1, 1 - Core0 * Other: 0 - Core0, 1 - Core1 * * - CurTmp * * Revision G: bits 23-14 * Other: bits 23-16 * * XXX According to the BKDG, CurTmp, ThermSenseSel and * ThermSenseCoreSel bits were introduced in Revision F * but CurTmp seems working fine as early as Revision C. * However, it is not clear whether ThermSenseSel and/or * ThermSenseCoreSel work in undocumented cases as well. * In fact, the Linux driver suggests it may not work but * we just assume it does until we find otherwise. * * XXX According to Linux, CurTmp starts at -28C on * Socket AM2 Revision G processors, which is not * documented anywhere. */ if (model >= 0x40) sc->sc_flags |= AMDTEMP_FLAG_CS_SWAP; if (model >= 0x60 && model != 0xc1) { do_cpuid(0x80000001, regs); bid = (regs[1] >> 9) & 0x1f; switch (model) { case 0x68: /* Socket S1g1 */ case 0x6c: case 0x7c: break; case 0x6b: /* Socket AM2 and ASB1 (2 cores) */ if (bid != 0x0b && bid != 0x0c) sc->sc_flags |= AMDTEMP_FLAG_ALT_OFFSET; break; case 0x6f: /* Socket AM2 and ASB1 (1 core) */ case 0x7f: if (bid != 0x07 && bid != 0x09 && bid != 0x0c) sc->sc_flags |= AMDTEMP_FLAG_ALT_OFFSET; break; default: sc->sc_flags |= AMDTEMP_FLAG_ALT_OFFSET; } sc->sc_flags |= AMDTEMP_FLAG_CT_10BIT; } /* * There are two sensors per core. */ sc->sc_ntemps = 2; sc->sc_gettemp = amdtemp_gettemp0f; break; case 0x10: /* * Erratum 319 Inaccurate Temperature Measurement * * http://support.amd.com/us/Processor_TechDocs/41322.pdf */ do_cpuid(0x80000001, regs); switch ((regs[1] >> 28) & 0xf) { case 0: /* Socket F */ erratum319 = 1; break; case 1: /* Socket AM2+ or AM3 */ if ((pci_cfgregread(pci_get_bus(dev), pci_get_slot(dev), 2, AMDTEMP_DRAM_CONF_HIGH, 2) & AMDTEMP_DRAM_MODE_DDR3) != 0 || model > 0x04 || (model == 0x04 && (cpuid & CPUID_STEPPING) >= 3)) break; /* XXX 00100F42h (RB-C2) exists in both formats. */ erratum319 = 1; break; } /* FALLTHROUGH */ case 0x11: case 0x12: case 0x14: case 0x15: case 0x16: sc->sc_ntemps = 1; /* * Some later (60h+) models of family 15h use a similar SMN * network as family 17h. (However, the register index differs * from 17h and the decoding matches other 10h-15h models, * which differ from 17h.) */ if (family == 0x15 && model >= 0x60) { sc->sc_gettemp = amdtemp_gettemp15hm60h; needsmn = true; } else sc->sc_gettemp = amdtemp_gettemp; break; case 0x17: case 0x19: sc->sc_ntemps = 1; sc->sc_gettemp = amdtemp_gettemp17h; needsmn = true; break; default: device_printf(dev, "Bogus family 0x%x\n", family); return (ENXIO); } if (needsmn) { sc->sc_smn = device_find_child( device_get_parent(dev), "amdsmn", -1); if (sc->sc_smn == NULL) { if (bootverbose) device_printf(dev, "No SMN device found\n"); return (ENXIO); } } /* Find number of cores per package. */ sc->sc_ncores = (amd_feature2 & AMDID2_CMP) != 0 ? (cpu_procinfo2 & AMDID_CMP_CORES) + 1 : 1; if (sc->sc_ncores > MAXCPU) return (ENXIO); if (erratum319) device_printf(dev, "Erratum 319: temperature measurement may be inaccurate\n"); if (bootverbose) device_printf(dev, "Found %d cores and %d sensors.\n", sc->sc_ncores, sc->sc_ntemps > 1 ? sc->sc_ntemps * sc->sc_ncores : 1); /* * dev.amdtemp.N tree. */ unit = device_get_unit(dev); snprintf(tn, sizeof(tn), "dev.amdtemp.%d.sensor_offset", unit); TUNABLE_INT_FETCH(tn, &sc->sc_offset); sysctlctx = device_get_sysctl_ctx(dev); SYSCTL_ADD_INT(sysctlctx, SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "sensor_offset", CTLFLAG_RW, &sc->sc_offset, 0, "Temperature sensor offset"); sysctlnode = SYSCTL_ADD_NODE(sysctlctx, SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "core0", CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "Core 0"); SYSCTL_ADD_PROC(sysctlctx, SYSCTL_CHILDREN(sysctlnode), OID_AUTO, "sensor0", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_NEEDGIANT, dev, CORE0_SENSOR0, amdtemp_sysctl, "IK", "Core 0 / Sensor 0 temperature"); if (family == 0x17) amdtemp_probe_ccd_sensors17h(dev, model); else if (family == 0x19) amdtemp_probe_ccd_sensors19h(dev, model); else if (sc->sc_ntemps > 1) { SYSCTL_ADD_PROC(sysctlctx, SYSCTL_CHILDREN(sysctlnode), OID_AUTO, "sensor1", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_NEEDGIANT, dev, CORE0_SENSOR1, amdtemp_sysctl, "IK", "Core 0 / Sensor 1 temperature"); if (sc->sc_ncores > 1) { sysctlnode = SYSCTL_ADD_NODE(sysctlctx, SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "core1", CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "Core 1"); SYSCTL_ADD_PROC(sysctlctx, SYSCTL_CHILDREN(sysctlnode), OID_AUTO, "sensor0", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_NEEDGIANT, dev, CORE1_SENSOR0, amdtemp_sysctl, "IK", "Core 1 / Sensor 0 temperature"); SYSCTL_ADD_PROC(sysctlctx, SYSCTL_CHILDREN(sysctlnode), OID_AUTO, "sensor1", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_NEEDGIANT, dev, CORE1_SENSOR1, amdtemp_sysctl, "IK", "Core 1 / Sensor 1 temperature"); } } /* * Try to create dev.cpu sysctl entries and setup intrhook function. * This is needed because the cpu driver may be loaded late on boot, * after us. */ amdtemp_intrhook(dev); sc->sc_ich.ich_func = amdtemp_intrhook; sc->sc_ich.ich_arg = dev; if (config_intrhook_establish(&sc->sc_ich) != 0) { device_printf(dev, "config_intrhook_establish failed!\n"); return (ENXIO); } return (0); } void amdtemp_intrhook(void *arg) { struct amdtemp_softc *sc; struct sysctl_ctx_list *sysctlctx; device_t dev = (device_t)arg; device_t acpi, cpu, nexus; amdsensor_t sensor; int i; sc = device_get_softc(dev); /* * dev.cpu.N.temperature. */ nexus = device_find_child(root_bus, "nexus", 0); acpi = device_find_child(nexus, "acpi", 0); for (i = 0; i < sc->sc_ncores; i++) { if (sc->sc_sysctl_cpu[i] != NULL) continue; cpu = device_find_child(acpi, "cpu", device_get_unit(dev) * sc->sc_ncores + i); if (cpu != NULL) { sysctlctx = device_get_sysctl_ctx(cpu); sensor = sc->sc_ntemps > 1 ? (i == 0 ? CORE0 : CORE1) : CORE0_SENSOR0; sc->sc_sysctl_cpu[i] = SYSCTL_ADD_PROC(sysctlctx, SYSCTL_CHILDREN(device_get_sysctl_tree(cpu)), OID_AUTO, "temperature", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_NEEDGIANT, dev, sensor, amdtemp_sysctl, "IK", "Current temparature"); } } if (sc->sc_ich.ich_arg != NULL) config_intrhook_disestablish(&sc->sc_ich); } int amdtemp_detach(device_t dev) { struct amdtemp_softc *sc = device_get_softc(dev); int i; for (i = 0; i < sc->sc_ncores; i++) if (sc->sc_sysctl_cpu[i] != NULL) sysctl_remove_oid(sc->sc_sysctl_cpu[i], 1, 0); /* NewBus removes the dev.amdtemp.N tree by itself. */ return (0); } static int amdtemp_sysctl(SYSCTL_HANDLER_ARGS) { device_t dev = (device_t)arg1; struct amdtemp_softc *sc = device_get_softc(dev); amdsensor_t sensor = (amdsensor_t)arg2; int32_t auxtemp[2], temp; int error; switch (sensor) { case CORE0: auxtemp[0] = sc->sc_gettemp(dev, CORE0_SENSOR0); auxtemp[1] = sc->sc_gettemp(dev, CORE0_SENSOR1); temp = imax(auxtemp[0], auxtemp[1]); break; case CORE1: auxtemp[0] = sc->sc_gettemp(dev, CORE1_SENSOR0); auxtemp[1] = sc->sc_gettemp(dev, CORE1_SENSOR1); temp = imax(auxtemp[0], auxtemp[1]); break; default: temp = sc->sc_gettemp(dev, sensor); break; } error = sysctl_handle_int(oidp, &temp, 0, req); return (error); } #define AMDTEMP_ZERO_C_TO_K 2731 static int32_t amdtemp_gettemp0f(device_t dev, amdsensor_t sensor) { struct amdtemp_softc *sc = device_get_softc(dev); uint32_t mask, offset, temp; /* Set Sensor/Core selector. */ temp = pci_read_config(dev, AMDTEMP_THERMTP_STAT, 1); temp &= ~(AMDTEMP_TTSR_SELCORE | AMDTEMP_TTSR_SELSENSOR); switch (sensor) { case CORE0_SENSOR1: temp |= AMDTEMP_TTSR_SELSENSOR; /* FALLTHROUGH */ case CORE0_SENSOR0: case CORE0: if ((sc->sc_flags & AMDTEMP_FLAG_CS_SWAP) != 0) temp |= AMDTEMP_TTSR_SELCORE; break; case CORE1_SENSOR1: temp |= AMDTEMP_TTSR_SELSENSOR; /* FALLTHROUGH */ case CORE1_SENSOR0: case CORE1: if ((sc->sc_flags & AMDTEMP_FLAG_CS_SWAP) == 0) temp |= AMDTEMP_TTSR_SELCORE; break; default: __assert_unreachable(); } pci_write_config(dev, AMDTEMP_THERMTP_STAT, temp, 1); mask = (sc->sc_flags & AMDTEMP_FLAG_CT_10BIT) != 0 ? 0x3ff : 0x3fc; offset = (sc->sc_flags & AMDTEMP_FLAG_ALT_OFFSET) != 0 ? 28 : 49; temp = pci_read_config(dev, AMDTEMP_THERMTP_STAT, 4); temp = ((temp >> 14) & mask) * 5 / 2; temp += AMDTEMP_ZERO_C_TO_K + (sc->sc_offset - offset) * 10; return (temp); } static uint32_t amdtemp_decode_fam10h_to_17h(int32_t sc_offset, uint32_t val, bool minus49) { uint32_t temp; /* Convert raw register subfield units (0.125C) to units of 0.1C. */ temp = (val & AMDTEMP_REPTMP10H_CURTMP_MASK) * 5 / 4; if (minus49) temp -= AMDTEMP_CURTMP_RANGE_ADJUST; temp += AMDTEMP_ZERO_C_TO_K + sc_offset * 10; return (temp); } static uint32_t amdtemp_decode_fam10h_to_16h(int32_t sc_offset, uint32_t val) { bool minus49; /* * On Family 15h and higher, if CurTmpTjSel is 11b, the range is * adjusted down by 49.0 degrees Celsius. (This adjustment is not * documented in BKDGs prior to family 15h model 00h.) */ minus49 = (CPUID_TO_FAMILY(cpu_id) >= 0x15 && ((val >> AMDTEMP_REPTMP10H_TJSEL_SHIFT) & AMDTEMP_REPTMP10H_TJSEL_MASK) == 0x3); return (amdtemp_decode_fam10h_to_17h(sc_offset, val >> AMDTEMP_REPTMP10H_CURTMP_SHIFT, minus49)); } static uint32_t amdtemp_decode_fam17h_tctl(int32_t sc_offset, uint32_t val) { bool minus49; minus49 = ((val & AMDTEMP_17H_CUR_TMP_RANGE_SEL) != 0); return (amdtemp_decode_fam10h_to_17h(sc_offset, val >> AMDTEMP_REPTMP10H_CURTMP_SHIFT, minus49)); } static int32_t amdtemp_gettemp(device_t dev, amdsensor_t sensor) { struct amdtemp_softc *sc = device_get_softc(dev); uint32_t temp; temp = pci_read_config(dev, AMDTEMP_REPTMP_CTRL, 4); return (amdtemp_decode_fam10h_to_16h(sc->sc_offset, temp)); } static int32_t amdtemp_gettemp15hm60h(device_t dev, amdsensor_t sensor) { struct amdtemp_softc *sc = device_get_softc(dev); uint32_t val; int error; error = amdsmn_read(sc->sc_smn, AMDTEMP_15H_M60H_REPTMP_CTRL, &val); KASSERT(error == 0, ("amdsmn_read")); return (amdtemp_decode_fam10h_to_16h(sc->sc_offset, val)); } static int32_t amdtemp_gettemp17h(device_t dev, amdsensor_t sensor) { struct amdtemp_softc *sc = device_get_softc(dev); uint32_t val; int error; switch (sensor) { case CORE0_SENSOR0: /* Tctl */ error = amdsmn_read(sc->sc_smn, AMDTEMP_17H_CUR_TMP, &val); KASSERT(error == 0, ("amdsmn_read")); return (amdtemp_decode_fam17h_tctl(sc->sc_offset, val)); case CCD_BASE ... CCD_MAX: /* Tccd */ error = amdsmn_read(sc->sc_smn, AMDTEMP_17H_CCD_TMP_BASE + (((int)sensor - CCD_BASE) * sizeof(val)), &val); KASSERT(error == 0, ("amdsmn_read2")); KASSERT((val & AMDTEMP_17H_CCD_TMP_VALID) != 0, ("sensor %d: not valid", (int)sensor)); return (amdtemp_decode_fam10h_to_17h(sc->sc_offset, val, true)); default: __assert_unreachable(); } } static void amdtemp_probe_ccd_sensors(device_t dev, uint32_t maxreg) { char sensor_name[16], sensor_descr[32]; struct amdtemp_softc *sc; uint32_t i, val; int error; sc = device_get_softc(dev); for (i = 0; i < maxreg; i++) { error = amdsmn_read(sc->sc_smn, AMDTEMP_17H_CCD_TMP_BASE + (i * sizeof(val)), &val); if (error != 0) continue; if ((val & AMDTEMP_17H_CCD_TMP_VALID) == 0) continue; snprintf(sensor_name, sizeof(sensor_name), "ccd%u", i); snprintf(sensor_descr, sizeof(sensor_descr), "CCD %u temperature (Tccd%u)", i, i); SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, sensor_name, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, dev, CCD_BASE + i, amdtemp_sysctl, "IK", sensor_descr); } } static void amdtemp_probe_ccd_sensors17h(device_t dev, uint32_t model) { uint32_t maxreg; switch (model) { case 0x00 ... 0x2f: /* Zen1, Zen+ */ maxreg = 4; break; case 0x30 ... 0x3f: /* Zen2 TR (Castle Peak)/EPYC (Rome) */ case 0x60 ... 0x7f: /* Zen2 Ryzen (Renoir APU, Matisse) */ case 0x90 ... 0x9f: /* Zen2 Ryzen (Van Gogh APU) */ maxreg = 8; _Static_assert((int)NUM_CCDS >= 8, ""); break; default: device_printf(dev, "Unrecognized Family 17h Model: %02xh\n", model); return; } amdtemp_probe_ccd_sensors(dev, maxreg); } static void amdtemp_probe_ccd_sensors19h(device_t dev, uint32_t model) { uint32_t maxreg; switch (model) { case 0x00 ... 0x0f: /* Zen3 EPYC "Milan" */ case 0x20 ... 0x2f: /* Zen3 Ryzen "Vermeer" */ maxreg = 8; _Static_assert((int)NUM_CCDS >= 8, ""); break; default: device_printf(dev, "Unrecognized Family 19h Model: %02xh\n", model); return; } amdtemp_probe_ccd_sensors(dev, maxreg); } diff --git a/sys/dev/coretemp/coretemp.c b/sys/dev/coretemp/coretemp.c index bdc71b284ac7..251fb08b4c11 100644 --- a/sys/dev/coretemp/coretemp.c +++ b/sys/dev/coretemp/coretemp.c @@ -1,443 +1,443 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2007, 2008 Rui Paulo * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /* * Device driver for Intel's On Die thermal sensor via MSR. * First introduced in Intel's Core line of processors. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include /* for curthread */ #include #include #include #include #include #include #include #define TZ_ZEROC 2731 #define THERM_STATUS_LOG 0x02 #define THERM_STATUS 0x01 #define THERM_STATUS_TEMP_SHIFT 16 #define THERM_STATUS_TEMP_MASK 0x7f #define THERM_STATUS_RES_SHIFT 27 #define THERM_STATUS_RES_MASK 0x0f #define THERM_STATUS_VALID_SHIFT 31 #define THERM_STATUS_VALID_MASK 0x01 struct coretemp_softc { device_t sc_dev; int sc_tjmax; unsigned int sc_throttle_log; }; /* * Device methods. */ static void coretemp_identify(driver_t *driver, device_t parent); static int coretemp_probe(device_t dev); static int coretemp_attach(device_t dev); static int coretemp_detach(device_t dev); static uint64_t coretemp_get_thermal_msr(int cpu); static void coretemp_clear_thermal_msr(int cpu); static int coretemp_get_val_sysctl(SYSCTL_HANDLER_ARGS); static int coretemp_throttle_log_sysctl(SYSCTL_HANDLER_ARGS); static device_method_t coretemp_methods[] = { /* Device interface */ DEVMETHOD(device_identify, coretemp_identify), DEVMETHOD(device_probe, coretemp_probe), DEVMETHOD(device_attach, coretemp_attach), DEVMETHOD(device_detach, coretemp_detach), DEVMETHOD_END }; static driver_t coretemp_driver = { "coretemp", coretemp_methods, sizeof(struct coretemp_softc), }; enum therm_info { CORETEMP_TEMP, CORETEMP_DELTA, CORETEMP_RESOLUTION, CORETEMP_TJMAX, }; static devclass_t coretemp_devclass; DRIVER_MODULE(coretemp, cpu, coretemp_driver, coretemp_devclass, NULL, NULL); static void coretemp_identify(driver_t *driver, device_t parent) { device_t child; u_int regs[4]; /* Make sure we're not being doubly invoked. */ if (device_find_child(parent, "coretemp", -1) != NULL) return; /* Check that CPUID 0x06 is supported and the vendor is Intel.*/ if (cpu_high < 6 || cpu_vendor_id != CPU_VENDOR_INTEL) return; /* * CPUID 0x06 returns 1 if the processor has on-die thermal * sensors. EBX[0:3] contains the number of sensors. */ do_cpuid(0x06, regs); if ((regs[0] & 0x1) != 1) return; /* * We add a child for each CPU since settings must be performed * on each CPU in the SMP case. */ - child = device_add_child(parent, "coretemp", -1); + child = device_add_child(parent, "coretemp", device_get_unit(parent)); if (child == NULL) device_printf(parent, "add coretemp child failed\n"); } static int coretemp_probe(device_t dev) { if (resource_disabled("coretemp", 0)) return (ENXIO); device_set_desc(dev, "CPU On-Die Thermal Sensors"); if (!bootverbose && device_get_unit(dev) != 0) device_quiet(dev); return (BUS_PROBE_GENERIC); } static int coretemp_attach(device_t dev) { struct coretemp_softc *sc = device_get_softc(dev); device_t pdev; uint64_t msr; int cpu_model, cpu_stepping; int ret, tjtarget; struct sysctl_oid *oid; struct sysctl_ctx_list *ctx; sc->sc_dev = dev; pdev = device_get_parent(dev); cpu_model = CPUID_TO_MODEL(cpu_id); cpu_stepping = CPUID_TO_STEPPING(cpu_id); /* * Some CPUs, namely the PIII, don't have thermal sensors, but * report them when the CPUID check is performed in * coretemp_identify(). This leads to a later GPF when the sensor * is queried via a MSR, so we stop here. */ if (cpu_model < 0xe) return (ENXIO); #if 0 /* * XXXrpaulo: I have this CPU model and when it returns from C3 * coretemp continues to function properly. */ /* * Check for errata AE18. * "Processor Digital Thermal Sensor (DTS) Readout stops * updating upon returning from C3/C4 state." * * Adapted from the Linux coretemp driver. */ if (cpu_model == 0xe && cpu_stepping < 0xc) { msr = rdmsr(MSR_BIOS_SIGN); msr = msr >> 32; if (msr < 0x39) { device_printf(dev, "not supported (Intel errata " "AE18), try updating your BIOS\n"); return (ENXIO); } } #endif /* * Use 100C as the initial value. */ sc->sc_tjmax = 100; if ((cpu_model == 0xf && cpu_stepping >= 2) || cpu_model == 0xe) { /* * On some Core 2 CPUs, there's an undocumented MSR that * can tell us if Tj(max) is 100 or 85. * * The if-clause for CPUs having the MSR_IA32_EXT_CONFIG was adapted * from the Linux coretemp driver. */ msr = rdmsr(MSR_IA32_EXT_CONFIG); if (msr & (1 << 30)) sc->sc_tjmax = 85; } else if (cpu_model == 0x17) { switch (cpu_stepping) { case 0x6: /* Mobile Core 2 Duo */ sc->sc_tjmax = 105; break; default: /* Unknown stepping */ break; } } else if (cpu_model == 0x1c) { switch (cpu_stepping) { case 0xa: /* 45nm Atom D400, N400 and D500 series */ sc->sc_tjmax = 100; break; default: sc->sc_tjmax = 90; break; } } else { /* * Attempt to get Tj(max) from MSR IA32_TEMPERATURE_TARGET. * * This method is described in Intel white paper "CPU * Monitoring With DTS/PECI". (#322683) */ ret = rdmsr_safe(MSR_IA32_TEMPERATURE_TARGET, &msr); if (ret == 0) { tjtarget = (msr >> 16) & 0xff; /* * On earlier generation of processors, the value * obtained from IA32_TEMPERATURE_TARGET register is * an offset that needs to be summed with a model * specific base. It is however not clear what * these numbers are, with the publicly available * documents from Intel. * * For now, we consider [70, 110]C range, as * described in #322683, as "reasonable" and accept * these values whenever the MSR is available for * read, regardless the CPU model. */ if (tjtarget >= 70 && tjtarget <= 110) sc->sc_tjmax = tjtarget; else device_printf(dev, "Tj(target) value %d " "does not seem right.\n", tjtarget); } else device_printf(dev, "Can not get Tj(target) " "from your CPU, using 100C.\n"); } if (bootverbose) device_printf(dev, "Setting TjMax=%d\n", sc->sc_tjmax); ctx = device_get_sysctl_ctx(dev); oid = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(device_get_sysctl_tree(pdev)), OID_AUTO, "coretemp", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Per-CPU thermal information"); /* * Add the MIBs to dev.cpu.N and dev.cpu.N.coretemp. */ SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(device_get_sysctl_tree(pdev)), OID_AUTO, "temperature", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, dev, CORETEMP_TEMP, coretemp_get_val_sysctl, "IK", "Current temperature"); SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(oid), OID_AUTO, "delta", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, dev, CORETEMP_DELTA, coretemp_get_val_sysctl, "I", "Delta between TCC activation and current temperature"); SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(oid), OID_AUTO, "resolution", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, dev, CORETEMP_RESOLUTION, coretemp_get_val_sysctl, "I", "Resolution of CPU thermal sensor"); SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(oid), OID_AUTO, "tjmax", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, dev, CORETEMP_TJMAX, coretemp_get_val_sysctl, "IK", "TCC activation temperature"); SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(oid), OID_AUTO, "throttle_log", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, dev, 0, coretemp_throttle_log_sysctl, "I", "Set to 1 if the thermal sensor has tripped"); return (0); } static int coretemp_detach(device_t dev) { return (0); } struct coretemp_args { u_int msr; uint64_t val; }; /* * The digital temperature reading is located at bit 16 * of MSR_THERM_STATUS. * * There is a bit on that MSR that indicates whether the * temperature is valid or not. * * The temperature is computed by subtracting the temperature * reading by Tj(max). */ static uint64_t coretemp_get_thermal_msr(int cpu) { uint64_t res; x86_msr_op(MSR_THERM_STATUS, MSR_OP_RENDEZVOUS_ONE | MSR_OP_READ | MSR_OP_CPUID(cpu), 0, &res); return (res); } static void coretemp_clear_thermal_msr(int cpu) { x86_msr_op(MSR_THERM_STATUS, MSR_OP_RENDEZVOUS_ONE | MSR_OP_WRITE | MSR_OP_CPUID(cpu), 0, NULL); } static int coretemp_get_val_sysctl(SYSCTL_HANDLER_ARGS) { device_t dev; uint64_t msr; int val, tmp; struct coretemp_softc *sc; enum therm_info type; char stemp[16]; dev = (device_t) arg1; msr = coretemp_get_thermal_msr(device_get_unit(dev)); sc = device_get_softc(dev); type = arg2; if (((msr >> THERM_STATUS_VALID_SHIFT) & THERM_STATUS_VALID_MASK) != 1) { val = -1; } else { switch (type) { case CORETEMP_TEMP: tmp = (msr >> THERM_STATUS_TEMP_SHIFT) & THERM_STATUS_TEMP_MASK; val = (sc->sc_tjmax - tmp) * 10 + TZ_ZEROC; break; case CORETEMP_DELTA: val = (msr >> THERM_STATUS_TEMP_SHIFT) & THERM_STATUS_TEMP_MASK; break; case CORETEMP_RESOLUTION: val = (msr >> THERM_STATUS_RES_SHIFT) & THERM_STATUS_RES_MASK; break; case CORETEMP_TJMAX: val = sc->sc_tjmax * 10 + TZ_ZEROC; break; } } if (msr & THERM_STATUS_LOG) { coretemp_clear_thermal_msr(device_get_unit(dev)); sc->sc_throttle_log = 1; /* * Check for Critical Temperature Status and Critical * Temperature Log. It doesn't really matter if the * current temperature is invalid because the "Critical * Temperature Log" bit will tell us if the Critical * Temperature has * been reached in past. It's not * directly related to the current temperature. * * If we reach a critical level, allow devctl(4) * to catch this and shutdown the system. */ if (msr & THERM_STATUS) { tmp = (msr >> THERM_STATUS_TEMP_SHIFT) & THERM_STATUS_TEMP_MASK; tmp = (sc->sc_tjmax - tmp) * 10 + TZ_ZEROC; device_printf(dev, "critical temperature detected, " "suggest system shutdown\n"); snprintf(stemp, sizeof(stemp), "%d", tmp); devctl_notify("coretemp", "Thermal", stemp, "notify=0xcc"); } } return (sysctl_handle_int(oidp, &val, 0, req)); } static int coretemp_throttle_log_sysctl(SYSCTL_HANDLER_ARGS) { device_t dev; uint64_t msr; int error, val; struct coretemp_softc *sc; dev = (device_t) arg1; msr = coretemp_get_thermal_msr(device_get_unit(dev)); sc = device_get_softc(dev); if (msr & THERM_STATUS_LOG) { coretemp_clear_thermal_msr(device_get_unit(dev)); sc->sc_throttle_log = 1; } val = sc->sc_throttle_log; error = sysctl_handle_int(oidp, &val, 0, req); if (error || !req->newptr) return (error); else if (val != 0) return (EINVAL); coretemp_clear_thermal_msr(device_get_unit(dev)); sc->sc_throttle_log = 0; return (0); } diff --git a/sys/dev/cpufreq/cpufreq_dt.c b/sys/dev/cpufreq/cpufreq_dt.c index 3beffeb7063a..4ab021a97d31 100644 --- a/sys/dev/cpufreq/cpufreq_dt.c +++ b/sys/dev/cpufreq/cpufreq_dt.c @@ -1,573 +1,574 @@ /*- * Copyright (c) 2018 Emmanuel Vadot * Copyright (c) 2016 Jared McNeill * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ /* * Generic DT based cpufreq driver */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include "cpufreq_if.h" #if 0 #define DPRINTF(dev, msg...) device_printf(dev, "cpufreq_dt: " msg); #else #define DPRINTF(dev, msg...) #endif enum opp_version { OPP_V1 = 1, OPP_V2, }; struct cpufreq_dt_opp { uint64_t freq; uint32_t uvolt_target; uint32_t uvolt_min; uint32_t uvolt_max; uint32_t uamps; uint32_t clk_latency; bool turbo_mode; bool opp_suspend; }; struct cpufreq_dt_softc { device_t dev; clk_t clk; regulator_t reg; struct cpufreq_dt_opp *opp; ssize_t nopp; int cpu; cpuset_t cpus; }; static void cpufreq_dt_notify(device_t dev, uint64_t freq) { struct cpufreq_dt_softc *sc; struct pcpu *pc; int cpu; sc = device_get_softc(dev); CPU_FOREACH(cpu) { if (CPU_ISSET(cpu, &sc->cpus)) { pc = pcpu_find(cpu); pc->pc_clock = freq; } } } static const struct cpufreq_dt_opp * cpufreq_dt_find_opp(device_t dev, uint64_t freq) { struct cpufreq_dt_softc *sc; ssize_t n; sc = device_get_softc(dev); DPRINTF(dev, "Looking for freq %ju\n", freq); for (n = 0; n < sc->nopp; n++) if (CPUFREQ_CMP(sc->opp[n].freq, freq)) return (&sc->opp[n]); DPRINTF(dev, "Couldn't find one\n"); return (NULL); } static void cpufreq_dt_opp_to_setting(device_t dev, const struct cpufreq_dt_opp *opp, struct cf_setting *set) { struct cpufreq_dt_softc *sc; sc = device_get_softc(dev); memset(set, 0, sizeof(*set)); set->freq = opp->freq / 1000000; set->volts = opp->uvolt_target / 1000; set->power = CPUFREQ_VAL_UNKNOWN; set->lat = opp->clk_latency; set->dev = dev; } static int cpufreq_dt_get(device_t dev, struct cf_setting *set) { struct cpufreq_dt_softc *sc; const struct cpufreq_dt_opp *opp; uint64_t freq; sc = device_get_softc(dev); DPRINTF(dev, "cpufreq_dt_get\n"); if (clk_get_freq(sc->clk, &freq) != 0) return (ENXIO); opp = cpufreq_dt_find_opp(dev, freq); if (opp == NULL) { device_printf(dev, "Can't find the current freq in opp\n"); return (ENOENT); } cpufreq_dt_opp_to_setting(dev, opp, set); DPRINTF(dev, "Current freq %dMhz\n", set->freq); return (0); } static int cpufreq_dt_set(device_t dev, const struct cf_setting *set) { struct cpufreq_dt_softc *sc; const struct cpufreq_dt_opp *opp, *copp; uint64_t freq; int uvolt, error; sc = device_get_softc(dev); DPRINTF(dev, "Working on cpu %d\n", sc->cpu); DPRINTF(dev, "We have %d cpu on this dev\n", CPU_COUNT(&sc->cpus)); if (!CPU_ISSET(sc->cpu, &sc->cpus)) { DPRINTF(dev, "Not for this CPU\n"); return (0); } if (clk_get_freq(sc->clk, &freq) != 0) { device_printf(dev, "Can't get current clk freq\n"); return (ENXIO); } /* Try to get current valtage by using regulator first. */ error = regulator_get_voltage(sc->reg, &uvolt); if (error != 0) { /* * Try oppoints table as backup way. However, * this is insufficient because the actual processor * frequency may not be in the table. PLL frequency * granularity can be different that granularity of * oppoint table. */ copp = cpufreq_dt_find_opp(sc->dev, freq); if (copp == NULL) { device_printf(dev, "Can't find the current freq in opp\n"); return (ENOENT); } uvolt = copp->uvolt_target; } opp = cpufreq_dt_find_opp(sc->dev, set->freq * 1000000); if (opp == NULL) { device_printf(dev, "Couldn't find an opp for this freq\n"); return (EINVAL); } DPRINTF(sc->dev, "Current freq %ju, uvolt: %d\n", freq, uvolt); DPRINTF(sc->dev, "Target freq %ju, , uvolt: %d\n", opp->freq, opp->uvolt_target); if (uvolt < opp->uvolt_target) { DPRINTF(dev, "Changing regulator from %u to %u\n", uvolt, opp->uvolt_target); error = regulator_set_voltage(sc->reg, opp->uvolt_min, opp->uvolt_max); if (error != 0) { DPRINTF(dev, "Failed, backout\n"); return (ENXIO); } } DPRINTF(dev, "Setting clk to %ju\n", opp->freq); error = clk_set_freq(sc->clk, opp->freq, CLK_SET_ROUND_DOWN); if (error != 0) { DPRINTF(dev, "Failed, backout\n"); /* Restore previous voltage (best effort) */ error = regulator_set_voltage(sc->reg, copp->uvolt_min, copp->uvolt_max); return (ENXIO); } if (uvolt > opp->uvolt_target) { DPRINTF(dev, "Changing regulator from %u to %u\n", uvolt, opp->uvolt_target); error = regulator_set_voltage(sc->reg, opp->uvolt_min, opp->uvolt_max); if (error != 0) { DPRINTF(dev, "Failed to switch regulator to %d\n", opp->uvolt_target); /* Restore previous CPU frequency (best effort) */ (void)clk_set_freq(sc->clk, copp->freq, 0); return (ENXIO); } } if (clk_get_freq(sc->clk, &freq) == 0) cpufreq_dt_notify(dev, freq); return (0); } static int cpufreq_dt_type(device_t dev, int *type) { if (type == NULL) return (EINVAL); *type = CPUFREQ_TYPE_ABSOLUTE; return (0); } static int cpufreq_dt_settings(device_t dev, struct cf_setting *sets, int *count) { struct cpufreq_dt_softc *sc; ssize_t n; DPRINTF(dev, "cpufreq_dt_settings\n"); if (sets == NULL || count == NULL) return (EINVAL); sc = device_get_softc(dev); if (*count < sc->nopp) { *count = (int)sc->nopp; return (E2BIG); } for (n = 0; n < sc->nopp; n++) cpufreq_dt_opp_to_setting(dev, &sc->opp[n], &sets[n]); *count = (int)sc->nopp; return (0); } static void cpufreq_dt_identify(driver_t *driver, device_t parent) { phandle_t node; /* Properties must be listed under node /cpus/cpu@0 */ node = ofw_bus_get_node(parent); /* The cpu@0 node must have the following properties */ if (!OF_hasprop(node, "clocks") || (!OF_hasprop(node, "cpu-supply") && !OF_hasprop(node, "cpu0-supply"))) return; if (!OF_hasprop(node, "operating-points") && !OF_hasprop(node, "operating-points-v2")) return; if (device_find_child(parent, "cpufreq_dt", -1) != NULL) return; - if (BUS_ADD_CHILD(parent, 0, "cpufreq_dt", -1) == NULL) + if (BUS_ADD_CHILD(parent, 0, "cpufreq_dt", device_get_unit(parent)) + == NULL) device_printf(parent, "add cpufreq_dt child failed\n"); } static int cpufreq_dt_probe(device_t dev) { phandle_t node; node = ofw_bus_get_node(device_get_parent(dev)); if (!OF_hasprop(node, "clocks") || (!OF_hasprop(node, "cpu-supply") && !OF_hasprop(node, "cpu0-supply"))) return (ENXIO); if (!OF_hasprop(node, "operating-points") && !OF_hasprop(node, "operating-points-v2")) return (ENXIO); device_set_desc(dev, "Generic cpufreq driver"); return (BUS_PROBE_GENERIC); } static int cpufreq_dt_oppv1_parse(struct cpufreq_dt_softc *sc, phandle_t node) { uint32_t *opp, lat; ssize_t n; sc->nopp = OF_getencprop_alloc_multi(node, "operating-points", sizeof(uint32_t) * 2, (void **)&opp); if (sc->nopp == -1) return (ENXIO); if (OF_getencprop(node, "clock-latency", &lat, sizeof(lat)) == -1) lat = CPUFREQ_VAL_UNKNOWN; sc->opp = malloc(sizeof(*sc->opp) * sc->nopp, M_DEVBUF, M_WAITOK); for (n = 0; n < sc->nopp; n++) { sc->opp[n].freq = opp[n * 2 + 0] * 1000; sc->opp[n].uvolt_min = opp[n * 2 + 1]; sc->opp[n].uvolt_max = sc->opp[n].uvolt_min; sc->opp[n].uvolt_target = sc->opp[n].uvolt_min; sc->opp[n].clk_latency = lat; if (bootverbose) device_printf(sc->dev, "%ju.%03ju MHz, %u uV\n", sc->opp[n].freq / 1000000, sc->opp[n].freq % 1000000, sc->opp[n].uvolt_target); } free(opp, M_OFWPROP); return (0); } static int cpufreq_dt_oppv2_parse(struct cpufreq_dt_softc *sc, phandle_t node) { phandle_t opp, opp_table, opp_xref; pcell_t cell[2]; uint32_t *volts, lat; int nvolt, i; if (OF_getencprop(node, "operating-points-v2", &opp_xref, sizeof(opp_xref)) == -1) { device_printf(sc->dev, "Cannot get xref to oppv2 table\n"); return (ENXIO); } opp_table = OF_node_from_xref(opp_xref); if (opp_table == opp_xref) return (ENXIO); if (!OF_hasprop(opp_table, "opp-shared")) { device_printf(sc->dev, "Only opp-shared is supported\n"); return (ENXIO); } for (opp = OF_child(opp_table); opp > 0; opp = OF_peer(opp)) sc->nopp += 1; sc->opp = malloc(sizeof(*sc->opp) * sc->nopp, M_DEVBUF, M_WAITOK); for (i = 0, opp_table = OF_child(opp_table); opp_table > 0; opp_table = OF_peer(opp_table), i++) { /* opp-hz is a required property */ if (OF_getencprop(opp_table, "opp-hz", cell, sizeof(cell)) == -1) continue; sc->opp[i].freq = cell[0]; sc->opp[i].freq <<= 32; sc->opp[i].freq |= cell[1]; if (OF_getencprop(opp_table, "clock-latency", &lat, sizeof(lat)) == -1) sc->opp[i].clk_latency = CPUFREQ_VAL_UNKNOWN; else sc->opp[i].clk_latency = (int)lat; if (OF_hasprop(opp_table, "turbo-mode")) sc->opp[i].turbo_mode = true; if (OF_hasprop(opp_table, "opp-suspend")) sc->opp[i].opp_suspend = true; nvolt = OF_getencprop_alloc_multi(opp_table, "opp-microvolt", sizeof(*volts), (void **)&volts); if (nvolt == 1) { sc->opp[i].uvolt_target = volts[0]; sc->opp[i].uvolt_min = volts[0]; sc->opp[i].uvolt_max = volts[0]; } else if (nvolt == 3) { sc->opp[i].uvolt_target = volts[0]; sc->opp[i].uvolt_min = volts[1]; sc->opp[i].uvolt_max = volts[2]; } else { device_printf(sc->dev, "Wrong count of opp-microvolt property\n"); OF_prop_free(volts); free(sc->opp, M_DEVBUF); return (ENXIO); } OF_prop_free(volts); if (bootverbose) device_printf(sc->dev, "%ju.%03ju Mhz (%u uV)\n", sc->opp[i].freq / 1000000, sc->opp[i].freq % 1000000, sc->opp[i].uvolt_target); } return (0); } static int cpufreq_dt_attach(device_t dev) { struct cpufreq_dt_softc *sc; phandle_t node; phandle_t cnode, opp, copp; int cpu; uint64_t freq; int rv = 0; char device_type[16]; enum opp_version version; sc = device_get_softc(dev); sc->dev = dev; node = ofw_bus_get_node(device_get_parent(dev)); sc->cpu = device_get_unit(device_get_parent(dev)); DPRINTF(dev, "cpu=%d\n", sc->cpu); if (sc->cpu >= mp_ncpus) { device_printf(dev, "Not attaching as cpu is not present\n"); return (ENXIO); } if (regulator_get_by_ofw_property(dev, node, "cpu-supply", &sc->reg) != 0) { if (regulator_get_by_ofw_property(dev, node, "cpu0-supply", &sc->reg) != 0) { device_printf(dev, "no regulator for %s\n", ofw_bus_get_name(device_get_parent(dev))); return (ENXIO); } } if (clk_get_by_ofw_index(dev, node, 0, &sc->clk) != 0) { device_printf(dev, "no clock for %s\n", ofw_bus_get_name(device_get_parent(dev))); regulator_release(sc->reg); return (ENXIO); } if (OF_hasprop(node, "operating-points")) { version = OPP_V1; rv = cpufreq_dt_oppv1_parse(sc, node); if (rv != 0) { device_printf(dev, "Failed to parse opp-v1 table\n"); return (rv); } OF_getencprop(node, "operating-points", &opp, sizeof(opp)); } else { version = OPP_V2; rv = cpufreq_dt_oppv2_parse(sc, node); if (rv != 0) { device_printf(dev, "Failed to parse opp-v2 table\n"); return (rv); } OF_getencprop(node, "operating-points-v2", &opp, sizeof(opp)); } /* * Find all CPUs that share the same opp table */ CPU_ZERO(&sc->cpus); cnode = OF_parent(node); for (cpu = 0, cnode = OF_child(cnode); cnode > 0; cnode = OF_peer(cnode)) { if (OF_getprop(cnode, "device_type", device_type, sizeof(device_type)) <= 0) continue; if (strcmp(device_type, "cpu") != 0) continue; if (cpu == sc->cpu) { DPRINTF(dev, "Skipping our cpu\n"); CPU_SET(cpu, &sc->cpus); cpu++; continue; } DPRINTF(dev, "Testing CPU %d\n", cpu); copp = -1; if (version == OPP_V1) OF_getencprop(cnode, "operating-points", &copp, sizeof(copp)); else if (version == OPP_V2) OF_getencprop(cnode, "operating-points-v2", &copp, sizeof(copp)); if (opp == copp) { DPRINTF(dev, "CPU %d is using the same opp as this one (%d)\n", cpu, sc->cpu); CPU_SET(cpu, &sc->cpus); } cpu++; } if (clk_get_freq(sc->clk, &freq) == 0) cpufreq_dt_notify(dev, freq); cpufreq_register(dev); return (0); } static device_method_t cpufreq_dt_methods[] = { /* Device interface */ DEVMETHOD(device_identify, cpufreq_dt_identify), DEVMETHOD(device_probe, cpufreq_dt_probe), DEVMETHOD(device_attach, cpufreq_dt_attach), /* cpufreq interface */ DEVMETHOD(cpufreq_drv_get, cpufreq_dt_get), DEVMETHOD(cpufreq_drv_set, cpufreq_dt_set), DEVMETHOD(cpufreq_drv_type, cpufreq_dt_type), DEVMETHOD(cpufreq_drv_settings, cpufreq_dt_settings), DEVMETHOD_END }; static driver_t cpufreq_dt_driver = { "cpufreq_dt", cpufreq_dt_methods, sizeof(struct cpufreq_dt_softc), }; static devclass_t cpufreq_dt_devclass; DRIVER_MODULE(cpufreq_dt, cpu, cpufreq_dt_driver, cpufreq_dt_devclass, 0, 0); MODULE_VERSION(cpufreq_dt, 1); diff --git a/sys/kern/kern_cpu.c b/sys/kern/kern_cpu.c index 2f44405aef37..380849974a35 100644 --- a/sys/kern/kern_cpu.c +++ b/sys/kern/kern_cpu.c @@ -1,1152 +1,1152 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2004-2007 Nate Lawson (SDG) * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "cpufreq_if.h" /* * Common CPU frequency glue code. Drivers for specific hardware can * attach this interface to allow users to get/set the CPU frequency. */ /* * Number of levels we can handle. Levels are synthesized from settings * so for M settings and N drivers, there may be M*N levels. */ #define CF_MAX_LEVELS 256 struct cf_saved_freq { struct cf_level level; int priority; SLIST_ENTRY(cf_saved_freq) link; }; struct cpufreq_softc { struct sx lock; struct cf_level curr_level; int curr_priority; SLIST_HEAD(, cf_saved_freq) saved_freq; struct cf_level_lst all_levels; int all_count; int max_mhz; device_t dev; device_t cf_drv_dev; struct sysctl_ctx_list sysctl_ctx; struct task startup_task; struct cf_level *levels_buf; }; struct cf_setting_array { struct cf_setting sets[MAX_SETTINGS]; int count; TAILQ_ENTRY(cf_setting_array) link; }; TAILQ_HEAD(cf_setting_lst, cf_setting_array); #define CF_MTX_INIT(x) sx_init((x), "cpufreq lock") #define CF_MTX_LOCK(x) sx_xlock((x)) #define CF_MTX_UNLOCK(x) sx_xunlock((x)) #define CF_MTX_ASSERT(x) sx_assert((x), SX_XLOCKED) #define CF_DEBUG(msg...) do { \ if (cf_verbose) \ printf("cpufreq: " msg); \ } while (0) static int cpufreq_attach(device_t dev); static void cpufreq_startup_task(void *ctx, int pending); static int cpufreq_detach(device_t dev); static int cf_set_method(device_t dev, const struct cf_level *level, int priority); static int cf_get_method(device_t dev, struct cf_level *level); static int cf_levels_method(device_t dev, struct cf_level *levels, int *count); static int cpufreq_insert_abs(struct cpufreq_softc *sc, struct cf_setting *sets, int count); static int cpufreq_expand_set(struct cpufreq_softc *sc, struct cf_setting_array *set_arr); static struct cf_level *cpufreq_dup_set(struct cpufreq_softc *sc, struct cf_level *dup, struct cf_setting *set); static int cpufreq_curr_sysctl(SYSCTL_HANDLER_ARGS); static int cpufreq_levels_sysctl(SYSCTL_HANDLER_ARGS); static int cpufreq_settings_sysctl(SYSCTL_HANDLER_ARGS); static device_method_t cpufreq_methods[] = { DEVMETHOD(device_probe, bus_generic_probe), DEVMETHOD(device_attach, cpufreq_attach), DEVMETHOD(device_detach, cpufreq_detach), DEVMETHOD(cpufreq_set, cf_set_method), DEVMETHOD(cpufreq_get, cf_get_method), DEVMETHOD(cpufreq_levels, cf_levels_method), {0, 0} }; static driver_t cpufreq_driver = { "cpufreq", cpufreq_methods, sizeof(struct cpufreq_softc) }; static devclass_t cpufreq_dc; DRIVER_MODULE(cpufreq, cpu, cpufreq_driver, cpufreq_dc, 0, 0); static int cf_lowest_freq; static int cf_verbose; static SYSCTL_NODE(_debug, OID_AUTO, cpufreq, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "cpufreq debugging"); SYSCTL_INT(_debug_cpufreq, OID_AUTO, lowest, CTLFLAG_RWTUN, &cf_lowest_freq, 1, "Don't provide levels below this frequency."); SYSCTL_INT(_debug_cpufreq, OID_AUTO, verbose, CTLFLAG_RWTUN, &cf_verbose, 1, "Print verbose debugging messages"); /* * This is called as the result of a hardware specific frequency control driver * calling cpufreq_register. It provides a general interface for system wide * frequency controls and operates on a per cpu basis. */ static int cpufreq_attach(device_t dev) { struct cpufreq_softc *sc; struct pcpu *pc; device_t parent; uint64_t rate; CF_DEBUG("initializing %s\n", device_get_nameunit(dev)); sc = device_get_softc(dev); parent = device_get_parent(dev); sc->dev = dev; sysctl_ctx_init(&sc->sysctl_ctx); TAILQ_INIT(&sc->all_levels); CF_MTX_INIT(&sc->lock); sc->curr_level.total_set.freq = CPUFREQ_VAL_UNKNOWN; SLIST_INIT(&sc->saved_freq); /* Try to get nominal CPU freq to use it as maximum later if needed */ sc->max_mhz = cpu_get_nominal_mhz(dev); /* If that fails, try to measure the current rate */ if (sc->max_mhz <= 0) { CF_DEBUG("Unable to obtain nominal frequency.\n"); pc = cpu_get_pcpu(dev); if (cpu_est_clockrate(pc->pc_cpuid, &rate) == 0) sc->max_mhz = rate / 1000000; else sc->max_mhz = CPUFREQ_VAL_UNKNOWN; } CF_DEBUG("initializing one-time data for %s\n", device_get_nameunit(dev)); sc->levels_buf = malloc(CF_MAX_LEVELS * sizeof(*sc->levels_buf), M_DEVBUF, M_WAITOK); SYSCTL_ADD_PROC(&sc->sysctl_ctx, SYSCTL_CHILDREN(device_get_sysctl_tree(parent)), OID_AUTO, "freq", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, sc, 0, cpufreq_curr_sysctl, "I", "Current CPU frequency"); SYSCTL_ADD_PROC(&sc->sysctl_ctx, SYSCTL_CHILDREN(device_get_sysctl_tree(parent)), OID_AUTO, "freq_levels", CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_NEEDGIANT, sc, 0, cpufreq_levels_sysctl, "A", "CPU frequency levels"); /* * Queue a one-shot broadcast that levels have changed. * It will run once the system has completed booting. */ TASK_INIT(&sc->startup_task, 0, cpufreq_startup_task, dev); taskqueue_enqueue(taskqueue_thread, &sc->startup_task); return (0); } /* Handle any work to be done for all drivers that attached during boot. */ static void cpufreq_startup_task(void *ctx, int pending) { cpufreq_settings_changed((device_t)ctx); } static int cpufreq_detach(device_t dev) { struct cpufreq_softc *sc; struct cf_saved_freq *saved_freq; CF_DEBUG("shutdown %s\n", device_get_nameunit(dev)); sc = device_get_softc(dev); sysctl_ctx_free(&sc->sysctl_ctx); while ((saved_freq = SLIST_FIRST(&sc->saved_freq)) != NULL) { SLIST_REMOVE_HEAD(&sc->saved_freq, link); free(saved_freq, M_TEMP); } free(sc->levels_buf, M_DEVBUF); return (0); } static int cf_set_method(device_t dev, const struct cf_level *level, int priority) { struct cpufreq_softc *sc; const struct cf_setting *set; struct cf_saved_freq *saved_freq, *curr_freq; struct pcpu *pc; int error, i; u_char pri; sc = device_get_softc(dev); error = 0; set = NULL; saved_freq = NULL; /* We are going to change levels so notify the pre-change handler. */ EVENTHANDLER_INVOKE(cpufreq_pre_change, level, &error); if (error != 0) { EVENTHANDLER_INVOKE(cpufreq_post_change, level, error); return (error); } CF_MTX_LOCK(&sc->lock); #ifdef SMP #ifdef EARLY_AP_STARTUP MPASS(mp_ncpus == 1 || smp_started); #else /* * If still booting and secondary CPUs not started yet, don't allow * changing the frequency until they're online. This is because we * can't switch to them using sched_bind() and thus we'd only be * switching the main CPU. XXXTODO: Need to think more about how to * handle having different CPUs at different frequencies. */ if (mp_ncpus > 1 && !smp_started) { device_printf(dev, "rejecting change, SMP not started yet\n"); error = ENXIO; goto out; } #endif #endif /* SMP */ /* * If the requested level has a lower priority, don't allow * the new level right now. */ if (priority < sc->curr_priority) { CF_DEBUG("ignoring, curr prio %d less than %d\n", priority, sc->curr_priority); error = EPERM; goto out; } /* * If the caller didn't specify a level and one is saved, prepare to * restore the saved level. If none has been saved, return an error. */ if (level == NULL) { saved_freq = SLIST_FIRST(&sc->saved_freq); if (saved_freq == NULL) { CF_DEBUG("NULL level, no saved level\n"); error = ENXIO; goto out; } level = &saved_freq->level; priority = saved_freq->priority; CF_DEBUG("restoring saved level, freq %d prio %d\n", level->total_set.freq, priority); } /* Reject levels that are below our specified threshold. */ if (level->total_set.freq < cf_lowest_freq) { CF_DEBUG("rejecting freq %d, less than %d limit\n", level->total_set.freq, cf_lowest_freq); error = EINVAL; goto out; } /* If already at this level, just return. */ if (sc->curr_level.total_set.freq == level->total_set.freq) { CF_DEBUG("skipping freq %d, same as current level %d\n", level->total_set.freq, sc->curr_level.total_set.freq); goto skip; } /* First, set the absolute frequency via its driver. */ set = &level->abs_set; if (set->dev) { if (!device_is_attached(set->dev)) { error = ENXIO; goto out; } /* Bind to the target CPU before switching. */ pc = cpu_get_pcpu(set->dev); /* Skip settings if CPU is not started. */ if (pc == NULL) { error = 0; goto out; } thread_lock(curthread); pri = curthread->td_priority; sched_prio(curthread, PRI_MIN); sched_bind(curthread, pc->pc_cpuid); thread_unlock(curthread); CF_DEBUG("setting abs freq %d on %s (cpu %d)\n", set->freq, device_get_nameunit(set->dev), PCPU_GET(cpuid)); error = CPUFREQ_DRV_SET(set->dev, set); thread_lock(curthread); sched_unbind(curthread); sched_prio(curthread, pri); thread_unlock(curthread); if (error) { goto out; } } /* Next, set any/all relative frequencies via their drivers. */ for (i = 0; i < level->rel_count; i++) { set = &level->rel_set[i]; if (!device_is_attached(set->dev)) { error = ENXIO; goto out; } /* Bind to the target CPU before switching. */ pc = cpu_get_pcpu(set->dev); thread_lock(curthread); pri = curthread->td_priority; sched_prio(curthread, PRI_MIN); sched_bind(curthread, pc->pc_cpuid); thread_unlock(curthread); CF_DEBUG("setting rel freq %d on %s (cpu %d)\n", set->freq, device_get_nameunit(set->dev), PCPU_GET(cpuid)); error = CPUFREQ_DRV_SET(set->dev, set); thread_lock(curthread); sched_unbind(curthread); sched_prio(curthread, pri); thread_unlock(curthread); if (error) { /* XXX Back out any successful setting? */ goto out; } } skip: /* * Before recording the current level, check if we're going to a * higher priority. If so, save the previous level and priority. */ if (sc->curr_level.total_set.freq != CPUFREQ_VAL_UNKNOWN && priority > sc->curr_priority) { CF_DEBUG("saving level, freq %d prio %d\n", sc->curr_level.total_set.freq, sc->curr_priority); curr_freq = malloc(sizeof(*curr_freq), M_TEMP, M_NOWAIT); if (curr_freq == NULL) { error = ENOMEM; goto out; } curr_freq->level = sc->curr_level; curr_freq->priority = sc->curr_priority; SLIST_INSERT_HEAD(&sc->saved_freq, curr_freq, link); } sc->curr_level = *level; sc->curr_priority = priority; /* If we were restoring a saved state, reset it to "unused". */ if (saved_freq != NULL) { CF_DEBUG("resetting saved level\n"); sc->curr_level.total_set.freq = CPUFREQ_VAL_UNKNOWN; SLIST_REMOVE_HEAD(&sc->saved_freq, link); free(saved_freq, M_TEMP); } out: CF_MTX_UNLOCK(&sc->lock); /* * We changed levels (or attempted to) so notify the post-change * handler of new frequency or error. */ EVENTHANDLER_INVOKE(cpufreq_post_change, level, error); if (error && set) device_printf(set->dev, "set freq failed, err %d\n", error); return (error); } static int cpufreq_get_frequency(device_t dev) { struct cf_setting set; if (CPUFREQ_DRV_GET(dev, &set) != 0) return (-1); return (set.freq); } /* Returns the index into *levels with the match */ static int cpufreq_get_level(device_t dev, struct cf_level *levels, int count) { int i, freq; if ((freq = cpufreq_get_frequency(dev)) < 0) return (-1); for (i = 0; i < count; i++) if (freq == levels[i].total_set.freq) return (i); return (-1); } /* * Used by the cpufreq core, this function will populate *level with the current * frequency as either determined by a cached value sc->curr_level, or in the * case the lower level driver has set the CPUFREQ_FLAG_UNCACHED flag, it will * obtain the frequency from the driver itself. */ static int cf_get_method(device_t dev, struct cf_level *level) { struct cpufreq_softc *sc; struct cf_level *levels; struct cf_setting *curr_set; struct pcpu *pc; int bdiff, count, diff, error, i, type; uint64_t rate; sc = device_get_softc(dev); error = 0; levels = NULL; /* * If we already know the current frequency, and the driver didn't ask * for uncached usage, we're done. */ CF_MTX_LOCK(&sc->lock); curr_set = &sc->curr_level.total_set; error = CPUFREQ_DRV_TYPE(sc->cf_drv_dev, &type); if (error == 0 && (type & CPUFREQ_FLAG_UNCACHED)) { struct cf_setting set; /* * If the driver wants to always report back the real frequency, * first try the driver and if that fails, fall back to * estimating. */ if (CPUFREQ_DRV_GET(sc->cf_drv_dev, &set) == 0) { sc->curr_level.total_set = set; CF_DEBUG("get returning immediate freq %d\n", curr_set->freq); goto out; } } else if (curr_set->freq != CPUFREQ_VAL_UNKNOWN) { CF_DEBUG("get returning known freq %d\n", curr_set->freq); error = 0; goto out; } CF_MTX_UNLOCK(&sc->lock); /* * We need to figure out the current level. Loop through every * driver, getting the current setting. Then, attempt to get a best * match of settings against each level. */ count = CF_MAX_LEVELS; levels = malloc(count * sizeof(*levels), M_TEMP, M_NOWAIT); if (levels == NULL) return (ENOMEM); error = CPUFREQ_LEVELS(sc->dev, levels, &count); if (error) { if (error == E2BIG) printf("cpufreq: need to increase CF_MAX_LEVELS\n"); free(levels, M_TEMP); return (error); } /* * Reacquire the lock and search for the given level. * * XXX Note: this is not quite right since we really need to go * through each level and compare both absolute and relative * settings for each driver in the system before making a match. * The estimation code below catches this case though. */ CF_MTX_LOCK(&sc->lock); i = cpufreq_get_level(sc->cf_drv_dev, levels, count); if (i >= 0) sc->curr_level = levels[i]; else CF_DEBUG("Couldn't find supported level for %s\n", device_get_nameunit(sc->cf_drv_dev)); if (curr_set->freq != CPUFREQ_VAL_UNKNOWN) { CF_DEBUG("get matched freq %d from drivers\n", curr_set->freq); goto out; } /* * We couldn't find an exact match, so attempt to estimate and then * match against a level. */ pc = cpu_get_pcpu(dev); if (pc == NULL) { error = ENXIO; goto out; } cpu_est_clockrate(pc->pc_cpuid, &rate); rate /= 1000000; bdiff = 1 << 30; for (i = 0; i < count; i++) { diff = abs(levels[i].total_set.freq - rate); if (diff < bdiff) { bdiff = diff; sc->curr_level = levels[i]; } } CF_DEBUG("get estimated freq %d\n", curr_set->freq); out: if (error == 0) *level = sc->curr_level; CF_MTX_UNLOCK(&sc->lock); if (levels) free(levels, M_TEMP); return (error); } /* * Either directly obtain settings from the cpufreq driver, or build a list of * relative settings to be integrated later against an absolute max. */ static int cpufreq_add_levels(device_t cf_dev, struct cf_setting_lst *rel_sets) { struct cf_setting_array *set_arr; struct cf_setting *sets; device_t dev; struct cpufreq_softc *sc; int type, set_count, error; sc = device_get_softc(cf_dev); dev = sc->cf_drv_dev; /* Skip devices that aren't ready. */ if (!device_is_attached(cf_dev)) return (0); /* * Get settings, skipping drivers that offer no settings or * provide settings for informational purposes only. */ error = CPUFREQ_DRV_TYPE(dev, &type); if (error != 0 || (type & CPUFREQ_FLAG_INFO_ONLY)) { if (error == 0) { CF_DEBUG("skipping info-only driver %s\n", device_get_nameunit(cf_dev)); } return (error); } sets = malloc(MAX_SETTINGS * sizeof(*sets), M_TEMP, M_NOWAIT); if (sets == NULL) return (ENOMEM); set_count = MAX_SETTINGS; error = CPUFREQ_DRV_SETTINGS(dev, sets, &set_count); if (error != 0 || set_count == 0) goto out; /* Add the settings to our absolute/relative lists. */ switch (type & CPUFREQ_TYPE_MASK) { case CPUFREQ_TYPE_ABSOLUTE: error = cpufreq_insert_abs(sc, sets, set_count); break; case CPUFREQ_TYPE_RELATIVE: CF_DEBUG("adding %d relative settings\n", set_count); set_arr = malloc(sizeof(*set_arr), M_TEMP, M_NOWAIT); if (set_arr == NULL) { error = ENOMEM; goto out; } bcopy(sets, set_arr->sets, set_count * sizeof(*sets)); set_arr->count = set_count; TAILQ_INSERT_TAIL(rel_sets, set_arr, link); break; default: error = EINVAL; } out: free(sets, M_TEMP); return (error); } static int cf_levels_method(device_t dev, struct cf_level *levels, int *count) { struct cf_setting_array *set_arr; struct cf_setting_lst rel_sets; struct cpufreq_softc *sc; struct cf_level *lev; struct pcpu *pc; int error, i; uint64_t rate; if (levels == NULL || count == NULL) return (EINVAL); TAILQ_INIT(&rel_sets); sc = device_get_softc(dev); CF_MTX_LOCK(&sc->lock); error = cpufreq_add_levels(sc->dev, &rel_sets); if (error) goto out; /* * If there are no absolute levels, create a fake one at 100%. We * then cache the clockrate for later use as our base frequency. */ if (TAILQ_EMPTY(&sc->all_levels)) { struct cf_setting set; CF_DEBUG("No absolute levels returned by driver\n"); if (sc->max_mhz == CPUFREQ_VAL_UNKNOWN) { sc->max_mhz = cpu_get_nominal_mhz(dev); /* * If the CPU can't report a rate for 100%, hope * the CPU is running at its nominal rate right now, * and use that instead. */ if (sc->max_mhz <= 0) { pc = cpu_get_pcpu(dev); cpu_est_clockrate(pc->pc_cpuid, &rate); sc->max_mhz = rate / 1000000; } } memset(&set, CPUFREQ_VAL_UNKNOWN, sizeof(set)); set.freq = sc->max_mhz; set.dev = NULL; error = cpufreq_insert_abs(sc, &set, 1); if (error) goto out; } /* Create a combined list of absolute + relative levels. */ TAILQ_FOREACH(set_arr, &rel_sets, link) cpufreq_expand_set(sc, set_arr); /* If the caller doesn't have enough space, return the actual count. */ if (sc->all_count > *count) { *count = sc->all_count; error = E2BIG; goto out; } /* Finally, output the list of levels. */ i = 0; TAILQ_FOREACH(lev, &sc->all_levels, link) { /* Skip levels that have a frequency that is too low. */ if (lev->total_set.freq < cf_lowest_freq) { sc->all_count--; continue; } levels[i] = *lev; i++; } *count = sc->all_count; error = 0; out: /* Clear all levels since we regenerate them each time. */ while ((lev = TAILQ_FIRST(&sc->all_levels)) != NULL) { TAILQ_REMOVE(&sc->all_levels, lev, link); free(lev, M_TEMP); } sc->all_count = 0; CF_MTX_UNLOCK(&sc->lock); while ((set_arr = TAILQ_FIRST(&rel_sets)) != NULL) { TAILQ_REMOVE(&rel_sets, set_arr, link); free(set_arr, M_TEMP); } return (error); } /* * Create levels for an array of absolute settings and insert them in * sorted order in the specified list. */ static int cpufreq_insert_abs(struct cpufreq_softc *sc, struct cf_setting *sets, int count) { struct cf_level_lst *list; struct cf_level *level, *search; int i, inserted; CF_MTX_ASSERT(&sc->lock); list = &sc->all_levels; for (i = 0; i < count; i++) { level = malloc(sizeof(*level), M_TEMP, M_NOWAIT | M_ZERO); if (level == NULL) return (ENOMEM); level->abs_set = sets[i]; level->total_set = sets[i]; level->total_set.dev = NULL; sc->all_count++; inserted = 0; if (TAILQ_EMPTY(list)) { CF_DEBUG("adding abs setting %d at head\n", sets[i].freq); TAILQ_INSERT_HEAD(list, level, link); continue; } TAILQ_FOREACH_REVERSE(search, list, cf_level_lst, link) if (sets[i].freq <= search->total_set.freq) { CF_DEBUG("adding abs setting %d after %d\n", sets[i].freq, search->total_set.freq); TAILQ_INSERT_AFTER(list, search, level, link); inserted = 1; break; } if (inserted == 0) { TAILQ_FOREACH(search, list, link) if (sets[i].freq >= search->total_set.freq) { CF_DEBUG("adding abs setting %d before %d\n", sets[i].freq, search->total_set.freq); TAILQ_INSERT_BEFORE(search, level, link); break; } } } return (0); } /* * Expand a group of relative settings, creating derived levels from them. */ static int cpufreq_expand_set(struct cpufreq_softc *sc, struct cf_setting_array *set_arr) { struct cf_level *fill, *search; struct cf_setting *set; int i; CF_MTX_ASSERT(&sc->lock); /* * Walk the set of all existing levels in reverse. This is so we * create derived states from the lowest absolute settings first * and discard duplicates created from higher absolute settings. * For instance, a level of 50 Mhz derived from 100 Mhz + 50% is * preferable to 200 Mhz + 25% because absolute settings are more * efficient since they often change the voltage as well. */ TAILQ_FOREACH_REVERSE(search, &sc->all_levels, cf_level_lst, link) { /* Add each setting to the level, duplicating if necessary. */ for (i = 0; i < set_arr->count; i++) { set = &set_arr->sets[i]; /* * If this setting is less than 100%, split the level * into two and add this setting to the new level. */ fill = search; if (set->freq < 10000) { fill = cpufreq_dup_set(sc, search, set); /* * The new level was a duplicate of an existing * level or its absolute setting is too high * so we freed it. For example, we discard a * derived level of 1000 MHz/25% if a level * of 500 MHz/100% already exists. */ if (fill == NULL) break; } /* Add this setting to the existing or new level. */ KASSERT(fill->rel_count < MAX_SETTINGS, ("cpufreq: too many relative drivers (%d)", MAX_SETTINGS)); fill->rel_set[fill->rel_count] = *set; fill->rel_count++; CF_DEBUG( "expand set added rel setting %d%% to %d level\n", set->freq / 100, fill->total_set.freq); } } return (0); } static struct cf_level * cpufreq_dup_set(struct cpufreq_softc *sc, struct cf_level *dup, struct cf_setting *set) { struct cf_level_lst *list; struct cf_level *fill, *itr; struct cf_setting *fill_set, *itr_set; int i; CF_MTX_ASSERT(&sc->lock); /* * Create a new level, copy it from the old one, and update the * total frequency and power by the percentage specified in the * relative setting. */ fill = malloc(sizeof(*fill), M_TEMP, M_NOWAIT); if (fill == NULL) return (NULL); *fill = *dup; fill_set = &fill->total_set; fill_set->freq = ((uint64_t)fill_set->freq * set->freq) / 10000; if (fill_set->power != CPUFREQ_VAL_UNKNOWN) { fill_set->power = ((uint64_t)fill_set->power * set->freq) / 10000; } if (set->lat != CPUFREQ_VAL_UNKNOWN) { if (fill_set->lat != CPUFREQ_VAL_UNKNOWN) fill_set->lat += set->lat; else fill_set->lat = set->lat; } CF_DEBUG("dup set considering derived setting %d\n", fill_set->freq); /* * If we copied an old level that we already modified (say, at 100%), * we need to remove that setting before adding this one. Since we * process each setting array in order, we know any settings for this * driver will be found at the end. */ for (i = fill->rel_count; i != 0; i--) { if (fill->rel_set[i - 1].dev != set->dev) break; CF_DEBUG("removed last relative driver: %s\n", device_get_nameunit(set->dev)); fill->rel_count--; } /* * Insert the new level in sorted order. If it is a duplicate of an * existing level (1) or has an absolute setting higher than the * existing level (2), do not add it. We can do this since any such * level is guaranteed use less power. For example (1), a level with * one absolute setting of 800 Mhz uses less power than one composed * of an absolute setting of 1600 Mhz and a relative setting at 50%. * Also for example (2), a level of 800 Mhz/75% is preferable to * 1600 Mhz/25% even though the latter has a lower total frequency. */ list = &sc->all_levels; KASSERT(!TAILQ_EMPTY(list), ("all levels list empty in dup set")); TAILQ_FOREACH_REVERSE(itr, list, cf_level_lst, link) { itr_set = &itr->total_set; if (CPUFREQ_CMP(fill_set->freq, itr_set->freq)) { CF_DEBUG("dup set rejecting %d (dupe)\n", fill_set->freq); itr = NULL; break; } else if (fill_set->freq < itr_set->freq) { if (fill->abs_set.freq <= itr->abs_set.freq) { CF_DEBUG( "dup done, inserting new level %d after %d\n", fill_set->freq, itr_set->freq); TAILQ_INSERT_AFTER(list, itr, fill, link); sc->all_count++; } else { CF_DEBUG("dup set rejecting %d (abs too big)\n", fill_set->freq); itr = NULL; } break; } } /* We didn't find a good place for this new level so free it. */ if (itr == NULL) { CF_DEBUG("dup set freeing new level %d (not optimal)\n", fill_set->freq); free(fill, M_TEMP); fill = NULL; } return (fill); } static int cpufreq_curr_sysctl(SYSCTL_HANDLER_ARGS) { struct cpufreq_softc *sc; struct cf_level *levels; int best, count, diff, bdiff, devcount, error, freq, i, n; device_t *devs; devs = NULL; sc = oidp->oid_arg1; levels = sc->levels_buf; error = CPUFREQ_GET(sc->dev, &levels[0]); if (error) goto out; freq = levels[0].total_set.freq; error = sysctl_handle_int(oidp, &freq, 0, req); if (error != 0 || req->newptr == NULL) goto out; /* * While we only call cpufreq_get() on one device (assuming all * CPUs have equal levels), we call cpufreq_set() on all CPUs. * This is needed for some MP systems. */ error = devclass_get_devices(cpufreq_dc, &devs, &devcount); if (error) goto out; for (n = 0; n < devcount; n++) { count = CF_MAX_LEVELS; error = CPUFREQ_LEVELS(devs[n], levels, &count); if (error) { if (error == E2BIG) printf( "cpufreq: need to increase CF_MAX_LEVELS\n"); break; } best = 0; bdiff = 1 << 30; for (i = 0; i < count; i++) { diff = abs(levels[i].total_set.freq - freq); if (diff < bdiff) { bdiff = diff; best = i; } } error = CPUFREQ_SET(devs[n], &levels[best], CPUFREQ_PRIO_USER); } out: if (devs) free(devs, M_TEMP); return (error); } static int cpufreq_levels_sysctl(SYSCTL_HANDLER_ARGS) { struct cpufreq_softc *sc; struct cf_level *levels; struct cf_setting *set; struct sbuf sb; int count, error, i; sc = oidp->oid_arg1; sbuf_new(&sb, NULL, 128, SBUF_AUTOEXTEND); /* Get settings from the device and generate the output string. */ count = CF_MAX_LEVELS; levels = sc->levels_buf; if (levels == NULL) { sbuf_delete(&sb); return (ENOMEM); } error = CPUFREQ_LEVELS(sc->dev, levels, &count); if (error) { if (error == E2BIG) printf("cpufreq: need to increase CF_MAX_LEVELS\n"); goto out; } if (count) { for (i = 0; i < count; i++) { set = &levels[i].total_set; sbuf_printf(&sb, "%d/%d ", set->freq, set->power); } } else sbuf_cpy(&sb, "0"); sbuf_trim(&sb); sbuf_finish(&sb); error = sysctl_handle_string(oidp, sbuf_data(&sb), sbuf_len(&sb), req); out: sbuf_delete(&sb); return (error); } static int cpufreq_settings_sysctl(SYSCTL_HANDLER_ARGS) { device_t dev; struct cf_setting *sets; struct sbuf sb; int error, i, set_count; dev = oidp->oid_arg1; sbuf_new(&sb, NULL, 128, SBUF_AUTOEXTEND); /* Get settings from the device and generate the output string. */ set_count = MAX_SETTINGS; sets = malloc(set_count * sizeof(*sets), M_TEMP, M_NOWAIT); if (sets == NULL) { sbuf_delete(&sb); return (ENOMEM); } error = CPUFREQ_DRV_SETTINGS(dev, sets, &set_count); if (error) goto out; if (set_count) { for (i = 0; i < set_count; i++) sbuf_printf(&sb, "%d/%d ", sets[i].freq, sets[i].power); } else sbuf_cpy(&sb, "0"); sbuf_trim(&sb); sbuf_finish(&sb); error = sysctl_handle_string(oidp, sbuf_data(&sb), sbuf_len(&sb), req); out: free(sets, M_TEMP); sbuf_delete(&sb); return (error); } static void cpufreq_add_freq_driver_sysctl(device_t cf_dev) { struct cpufreq_softc *sc; sc = device_get_softc(cf_dev); SYSCTL_ADD_CONST_STRING(&sc->sysctl_ctx, SYSCTL_CHILDREN(device_get_sysctl_tree(cf_dev)), OID_AUTO, "freq_driver", CTLFLAG_RD, device_get_nameunit(sc->cf_drv_dev), "cpufreq driver used by this cpu"); } int cpufreq_register(device_t dev) { struct cpufreq_softc *sc; device_t cf_dev, cpu_dev; int error; /* Add a sysctl to get each driver's settings separately. */ SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "freq_settings", CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_NEEDGIANT, dev, 0, cpufreq_settings_sysctl, "A", "CPU frequency driver settings"); /* * Add only one cpufreq device to each CPU. Currently, all CPUs * must offer the same levels and be switched at the same time. */ cpu_dev = device_get_parent(dev); if ((cf_dev = device_find_child(cpu_dev, "cpufreq", -1))) { sc = device_get_softc(cf_dev); sc->max_mhz = CPUFREQ_VAL_UNKNOWN; MPASS(sc->cf_drv_dev != NULL); return (0); } /* Add the child device and possibly sysctls. */ - cf_dev = BUS_ADD_CHILD(cpu_dev, 0, "cpufreq", -1); + cf_dev = BUS_ADD_CHILD(cpu_dev, 0, "cpufreq", device_get_unit(cpu_dev)); if (cf_dev == NULL) return (ENOMEM); device_quiet(cf_dev); error = device_probe_and_attach(cf_dev); if (error) return (error); sc = device_get_softc(cf_dev); sc->cf_drv_dev = dev; cpufreq_add_freq_driver_sysctl(cf_dev); return (error); } int cpufreq_unregister(device_t dev) { device_t cf_dev; struct cpufreq_softc *sc; /* * If this is the last cpufreq child device, remove the control * device as well. We identify cpufreq children by calling a method * they support. */ cf_dev = device_find_child(device_get_parent(dev), "cpufreq", -1); if (cf_dev == NULL) { device_printf(dev, "warning: cpufreq_unregister called with no cpufreq device active\n"); return (0); } sc = device_get_softc(cf_dev); MPASS(sc->cf_drv_dev == dev); device_delete_child(device_get_parent(cf_dev), cf_dev); return (0); } int cpufreq_settings_changed(device_t dev) { EVENTHANDLER_INVOKE(cpufreq_levels_changed, device_get_unit(device_get_parent(dev))); return (0); } diff --git a/sys/x86/cpufreq/est.c b/sys/x86/cpufreq/est.c index 258a9c493d0f..cdca2fdfec5f 100644 --- a/sys/x86/cpufreq/est.c +++ b/sys/x86/cpufreq/est.c @@ -1,1368 +1,1368 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2004 Colin Percival * Copyright (c) 2005 Nate Lawson * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted providing that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include "cpufreq_if.h" #include #include #include #include #include #include #include "acpi_if.h" #include /* Status/control registers (from the IA-32 System Programming Guide). */ #define MSR_PERF_STATUS 0x198 #define MSR_PERF_CTL 0x199 /* Register and bit for enabling SpeedStep. */ #define MSR_MISC_ENABLE 0x1a0 #define MSR_SS_ENABLE (1<<16) /* Frequency and MSR control values. */ typedef struct { uint16_t freq; uint16_t volts; uint16_t id16; int power; } freq_info; /* Identifying characteristics of a processor and supported frequencies. */ typedef struct { const u_int vendor_id; uint32_t id32; freq_info *freqtab; size_t tablen; } cpu_info; struct est_softc { device_t dev; int acpi_settings; int msr_settings; freq_info *freq_list; size_t flist_len; }; /* Convert MHz and mV into IDs for passing to the MSR. */ #define ID16(MHz, mV, bus_clk) \ (((MHz / bus_clk) << 8) | ((mV ? mV - 700 : 0) >> 4)) #define ID32(MHz_hi, mV_hi, MHz_lo, mV_lo, bus_clk) \ ((ID16(MHz_lo, mV_lo, bus_clk) << 16) | (ID16(MHz_hi, mV_hi, bus_clk))) /* Format for storing IDs in our table. */ #define FREQ_INFO_PWR(MHz, mV, bus_clk, mW) \ { MHz, mV, ID16(MHz, mV, bus_clk), mW } #define FREQ_INFO(MHz, mV, bus_clk) \ FREQ_INFO_PWR(MHz, mV, bus_clk, CPUFREQ_VAL_UNKNOWN) #define INTEL(tab, zhi, vhi, zlo, vlo, bus_clk) \ { CPU_VENDOR_INTEL, ID32(zhi, vhi, zlo, vlo, bus_clk), tab, nitems(tab) } #define CENTAUR(tab, zhi, vhi, zlo, vlo, bus_clk) \ { CPU_VENDOR_CENTAUR, ID32(zhi, vhi, zlo, vlo, bus_clk), tab, nitems(tab) } static int msr_info_enabled = 0; TUNABLE_INT("hw.est.msr_info", &msr_info_enabled); static int strict = -1; TUNABLE_INT("hw.est.strict", &strict); /* Default bus clock value for Centrino processors. */ #define INTEL_BUS_CLK 100 /* XXX Update this if new CPUs have more settings. */ #define EST_MAX_SETTINGS 10 CTASSERT(EST_MAX_SETTINGS <= MAX_SETTINGS); /* Estimate in microseconds of latency for performing a transition. */ #define EST_TRANS_LAT 1000 /* * Frequency (MHz) and voltage (mV) settings. * * Dothan processors have multiple VID#s with different settings for * each VID#. Since we can't uniquely identify this info * without undisclosed methods from Intel, we can't support newer * processors with this table method. If ACPI Px states are supported, * we get info from them. * * Data from the "Intel Pentium M Processor Datasheet", * Order Number 252612-003, Table 5. */ static freq_info PM17_130[] = { /* 130nm 1.70GHz Pentium M */ FREQ_INFO(1700, 1484, INTEL_BUS_CLK), FREQ_INFO(1400, 1308, INTEL_BUS_CLK), FREQ_INFO(1200, 1228, INTEL_BUS_CLK), FREQ_INFO(1000, 1116, INTEL_BUS_CLK), FREQ_INFO( 800, 1004, INTEL_BUS_CLK), FREQ_INFO( 600, 956, INTEL_BUS_CLK), }; static freq_info PM16_130[] = { /* 130nm 1.60GHz Pentium M */ FREQ_INFO(1600, 1484, INTEL_BUS_CLK), FREQ_INFO(1400, 1420, INTEL_BUS_CLK), FREQ_INFO(1200, 1276, INTEL_BUS_CLK), FREQ_INFO(1000, 1164, INTEL_BUS_CLK), FREQ_INFO( 800, 1036, INTEL_BUS_CLK), FREQ_INFO( 600, 956, INTEL_BUS_CLK), }; static freq_info PM15_130[] = { /* 130nm 1.50GHz Pentium M */ FREQ_INFO(1500, 1484, INTEL_BUS_CLK), FREQ_INFO(1400, 1452, INTEL_BUS_CLK), FREQ_INFO(1200, 1356, INTEL_BUS_CLK), FREQ_INFO(1000, 1228, INTEL_BUS_CLK), FREQ_INFO( 800, 1116, INTEL_BUS_CLK), FREQ_INFO( 600, 956, INTEL_BUS_CLK), }; static freq_info PM14_130[] = { /* 130nm 1.40GHz Pentium M */ FREQ_INFO(1400, 1484, INTEL_BUS_CLK), FREQ_INFO(1200, 1436, INTEL_BUS_CLK), FREQ_INFO(1000, 1308, INTEL_BUS_CLK), FREQ_INFO( 800, 1180, INTEL_BUS_CLK), FREQ_INFO( 600, 956, INTEL_BUS_CLK), }; static freq_info PM13_130[] = { /* 130nm 1.30GHz Pentium M */ FREQ_INFO(1300, 1388, INTEL_BUS_CLK), FREQ_INFO(1200, 1356, INTEL_BUS_CLK), FREQ_INFO(1000, 1292, INTEL_BUS_CLK), FREQ_INFO( 800, 1260, INTEL_BUS_CLK), FREQ_INFO( 600, 956, INTEL_BUS_CLK), }; static freq_info PM13_LV_130[] = { /* 130nm 1.30GHz Low Voltage Pentium M */ FREQ_INFO(1300, 1180, INTEL_BUS_CLK), FREQ_INFO(1200, 1164, INTEL_BUS_CLK), FREQ_INFO(1100, 1100, INTEL_BUS_CLK), FREQ_INFO(1000, 1020, INTEL_BUS_CLK), FREQ_INFO( 900, 1004, INTEL_BUS_CLK), FREQ_INFO( 800, 988, INTEL_BUS_CLK), FREQ_INFO( 600, 956, INTEL_BUS_CLK), }; static freq_info PM12_LV_130[] = { /* 130 nm 1.20GHz Low Voltage Pentium M */ FREQ_INFO(1200, 1180, INTEL_BUS_CLK), FREQ_INFO(1100, 1164, INTEL_BUS_CLK), FREQ_INFO(1000, 1100, INTEL_BUS_CLK), FREQ_INFO( 900, 1020, INTEL_BUS_CLK), FREQ_INFO( 800, 1004, INTEL_BUS_CLK), FREQ_INFO( 600, 956, INTEL_BUS_CLK), }; static freq_info PM11_LV_130[] = { /* 130 nm 1.10GHz Low Voltage Pentium M */ FREQ_INFO(1100, 1180, INTEL_BUS_CLK), FREQ_INFO(1000, 1164, INTEL_BUS_CLK), FREQ_INFO( 900, 1100, INTEL_BUS_CLK), FREQ_INFO( 800, 1020, INTEL_BUS_CLK), FREQ_INFO( 600, 956, INTEL_BUS_CLK), }; static freq_info PM11_ULV_130[] = { /* 130 nm 1.10GHz Ultra Low Voltage Pentium M */ FREQ_INFO(1100, 1004, INTEL_BUS_CLK), FREQ_INFO(1000, 988, INTEL_BUS_CLK), FREQ_INFO( 900, 972, INTEL_BUS_CLK), FREQ_INFO( 800, 956, INTEL_BUS_CLK), FREQ_INFO( 600, 844, INTEL_BUS_CLK), }; static freq_info PM10_ULV_130[] = { /* 130 nm 1.00GHz Ultra Low Voltage Pentium M */ FREQ_INFO(1000, 1004, INTEL_BUS_CLK), FREQ_INFO( 900, 988, INTEL_BUS_CLK), FREQ_INFO( 800, 972, INTEL_BUS_CLK), FREQ_INFO( 600, 844, INTEL_BUS_CLK), }; /* * Data from "Intel Pentium M Processor on 90nm Process with * 2-MB L2 Cache Datasheet", Order Number 302189-008, Table 5. */ static freq_info PM_765A_90[] = { /* 90 nm 2.10GHz Pentium M, VID #A */ FREQ_INFO(2100, 1340, INTEL_BUS_CLK), FREQ_INFO(1800, 1276, INTEL_BUS_CLK), FREQ_INFO(1600, 1228, INTEL_BUS_CLK), FREQ_INFO(1400, 1180, INTEL_BUS_CLK), FREQ_INFO(1200, 1132, INTEL_BUS_CLK), FREQ_INFO(1000, 1084, INTEL_BUS_CLK), FREQ_INFO( 800, 1036, INTEL_BUS_CLK), FREQ_INFO( 600, 988, INTEL_BUS_CLK), }; static freq_info PM_765B_90[] = { /* 90 nm 2.10GHz Pentium M, VID #B */ FREQ_INFO(2100, 1324, INTEL_BUS_CLK), FREQ_INFO(1800, 1260, INTEL_BUS_CLK), FREQ_INFO(1600, 1212, INTEL_BUS_CLK), FREQ_INFO(1400, 1180, INTEL_BUS_CLK), FREQ_INFO(1200, 1132, INTEL_BUS_CLK), FREQ_INFO(1000, 1084, INTEL_BUS_CLK), FREQ_INFO( 800, 1036, INTEL_BUS_CLK), FREQ_INFO( 600, 988, INTEL_BUS_CLK), }; static freq_info PM_765C_90[] = { /* 90 nm 2.10GHz Pentium M, VID #C */ FREQ_INFO(2100, 1308, INTEL_BUS_CLK), FREQ_INFO(1800, 1244, INTEL_BUS_CLK), FREQ_INFO(1600, 1212, INTEL_BUS_CLK), FREQ_INFO(1400, 1164, INTEL_BUS_CLK), FREQ_INFO(1200, 1116, INTEL_BUS_CLK), FREQ_INFO(1000, 1084, INTEL_BUS_CLK), FREQ_INFO( 800, 1036, INTEL_BUS_CLK), FREQ_INFO( 600, 988, INTEL_BUS_CLK), }; static freq_info PM_765E_90[] = { /* 90 nm 2.10GHz Pentium M, VID #E */ FREQ_INFO(2100, 1356, INTEL_BUS_CLK), FREQ_INFO(1800, 1292, INTEL_BUS_CLK), FREQ_INFO(1600, 1244, INTEL_BUS_CLK), FREQ_INFO(1400, 1196, INTEL_BUS_CLK), FREQ_INFO(1200, 1148, INTEL_BUS_CLK), FREQ_INFO(1000, 1100, INTEL_BUS_CLK), FREQ_INFO( 800, 1052, INTEL_BUS_CLK), FREQ_INFO( 600, 988, INTEL_BUS_CLK), }; static freq_info PM_755A_90[] = { /* 90 nm 2.00GHz Pentium M, VID #A */ FREQ_INFO(2000, 1340, INTEL_BUS_CLK), FREQ_INFO(1800, 1292, INTEL_BUS_CLK), FREQ_INFO(1600, 1244, INTEL_BUS_CLK), FREQ_INFO(1400, 1196, INTEL_BUS_CLK), FREQ_INFO(1200, 1148, INTEL_BUS_CLK), FREQ_INFO(1000, 1100, INTEL_BUS_CLK), FREQ_INFO( 800, 1052, INTEL_BUS_CLK), FREQ_INFO( 600, 988, INTEL_BUS_CLK), }; static freq_info PM_755B_90[] = { /* 90 nm 2.00GHz Pentium M, VID #B */ FREQ_INFO(2000, 1324, INTEL_BUS_CLK), FREQ_INFO(1800, 1276, INTEL_BUS_CLK), FREQ_INFO(1600, 1228, INTEL_BUS_CLK), FREQ_INFO(1400, 1180, INTEL_BUS_CLK), FREQ_INFO(1200, 1132, INTEL_BUS_CLK), FREQ_INFO(1000, 1084, INTEL_BUS_CLK), FREQ_INFO( 800, 1036, INTEL_BUS_CLK), FREQ_INFO( 600, 988, INTEL_BUS_CLK), }; static freq_info PM_755C_90[] = { /* 90 nm 2.00GHz Pentium M, VID #C */ FREQ_INFO(2000, 1308, INTEL_BUS_CLK), FREQ_INFO(1800, 1276, INTEL_BUS_CLK), FREQ_INFO(1600, 1228, INTEL_BUS_CLK), FREQ_INFO(1400, 1180, INTEL_BUS_CLK), FREQ_INFO(1200, 1132, INTEL_BUS_CLK), FREQ_INFO(1000, 1084, INTEL_BUS_CLK), FREQ_INFO( 800, 1036, INTEL_BUS_CLK), FREQ_INFO( 600, 988, INTEL_BUS_CLK), }; static freq_info PM_755D_90[] = { /* 90 nm 2.00GHz Pentium M, VID #D */ FREQ_INFO(2000, 1276, INTEL_BUS_CLK), FREQ_INFO(1800, 1244, INTEL_BUS_CLK), FREQ_INFO(1600, 1196, INTEL_BUS_CLK), FREQ_INFO(1400, 1164, INTEL_BUS_CLK), FREQ_INFO(1200, 1116, INTEL_BUS_CLK), FREQ_INFO(1000, 1084, INTEL_BUS_CLK), FREQ_INFO( 800, 1036, INTEL_BUS_CLK), FREQ_INFO( 600, 988, INTEL_BUS_CLK), }; static freq_info PM_745A_90[] = { /* 90 nm 1.80GHz Pentium M, VID #A */ FREQ_INFO(1800, 1340, INTEL_BUS_CLK), FREQ_INFO(1600, 1292, INTEL_BUS_CLK), FREQ_INFO(1400, 1228, INTEL_BUS_CLK), FREQ_INFO(1200, 1164, INTEL_BUS_CLK), FREQ_INFO(1000, 1116, INTEL_BUS_CLK), FREQ_INFO( 800, 1052, INTEL_BUS_CLK), FREQ_INFO( 600, 988, INTEL_BUS_CLK), }; static freq_info PM_745B_90[] = { /* 90 nm 1.80GHz Pentium M, VID #B */ FREQ_INFO(1800, 1324, INTEL_BUS_CLK), FREQ_INFO(1600, 1276, INTEL_BUS_CLK), FREQ_INFO(1400, 1212, INTEL_BUS_CLK), FREQ_INFO(1200, 1164, INTEL_BUS_CLK), FREQ_INFO(1000, 1116, INTEL_BUS_CLK), FREQ_INFO( 800, 1052, INTEL_BUS_CLK), FREQ_INFO( 600, 988, INTEL_BUS_CLK), }; static freq_info PM_745C_90[] = { /* 90 nm 1.80GHz Pentium M, VID #C */ FREQ_INFO(1800, 1308, INTEL_BUS_CLK), FREQ_INFO(1600, 1260, INTEL_BUS_CLK), FREQ_INFO(1400, 1212, INTEL_BUS_CLK), FREQ_INFO(1200, 1148, INTEL_BUS_CLK), FREQ_INFO(1000, 1100, INTEL_BUS_CLK), FREQ_INFO( 800, 1052, INTEL_BUS_CLK), FREQ_INFO( 600, 988, INTEL_BUS_CLK), }; static freq_info PM_745D_90[] = { /* 90 nm 1.80GHz Pentium M, VID #D */ FREQ_INFO(1800, 1276, INTEL_BUS_CLK), FREQ_INFO(1600, 1228, INTEL_BUS_CLK), FREQ_INFO(1400, 1180, INTEL_BUS_CLK), FREQ_INFO(1200, 1132, INTEL_BUS_CLK), FREQ_INFO(1000, 1084, INTEL_BUS_CLK), FREQ_INFO( 800, 1036, INTEL_BUS_CLK), FREQ_INFO( 600, 988, INTEL_BUS_CLK), }; static freq_info PM_735A_90[] = { /* 90 nm 1.70GHz Pentium M, VID #A */ FREQ_INFO(1700, 1340, INTEL_BUS_CLK), FREQ_INFO(1400, 1244, INTEL_BUS_CLK), FREQ_INFO(1200, 1180, INTEL_BUS_CLK), FREQ_INFO(1000, 1116, INTEL_BUS_CLK), FREQ_INFO( 800, 1052, INTEL_BUS_CLK), FREQ_INFO( 600, 988, INTEL_BUS_CLK), }; static freq_info PM_735B_90[] = { /* 90 nm 1.70GHz Pentium M, VID #B */ FREQ_INFO(1700, 1324, INTEL_BUS_CLK), FREQ_INFO(1400, 1244, INTEL_BUS_CLK), FREQ_INFO(1200, 1180, INTEL_BUS_CLK), FREQ_INFO(1000, 1116, INTEL_BUS_CLK), FREQ_INFO( 800, 1052, INTEL_BUS_CLK), FREQ_INFO( 600, 988, INTEL_BUS_CLK), }; static freq_info PM_735C_90[] = { /* 90 nm 1.70GHz Pentium M, VID #C */ FREQ_INFO(1700, 1308, INTEL_BUS_CLK), FREQ_INFO(1400, 1228, INTEL_BUS_CLK), FREQ_INFO(1200, 1164, INTEL_BUS_CLK), FREQ_INFO(1000, 1116, INTEL_BUS_CLK), FREQ_INFO( 800, 1052, INTEL_BUS_CLK), FREQ_INFO( 600, 988, INTEL_BUS_CLK), }; static freq_info PM_735D_90[] = { /* 90 nm 1.70GHz Pentium M, VID #D */ FREQ_INFO(1700, 1276, INTEL_BUS_CLK), FREQ_INFO(1400, 1212, INTEL_BUS_CLK), FREQ_INFO(1200, 1148, INTEL_BUS_CLK), FREQ_INFO(1000, 1100, INTEL_BUS_CLK), FREQ_INFO( 800, 1052, INTEL_BUS_CLK), FREQ_INFO( 600, 988, INTEL_BUS_CLK), }; static freq_info PM_725A_90[] = { /* 90 nm 1.60GHz Pentium M, VID #A */ FREQ_INFO(1600, 1340, INTEL_BUS_CLK), FREQ_INFO(1400, 1276, INTEL_BUS_CLK), FREQ_INFO(1200, 1212, INTEL_BUS_CLK), FREQ_INFO(1000, 1132, INTEL_BUS_CLK), FREQ_INFO( 800, 1068, INTEL_BUS_CLK), FREQ_INFO( 600, 988, INTEL_BUS_CLK), }; static freq_info PM_725B_90[] = { /* 90 nm 1.60GHz Pentium M, VID #B */ FREQ_INFO(1600, 1324, INTEL_BUS_CLK), FREQ_INFO(1400, 1260, INTEL_BUS_CLK), FREQ_INFO(1200, 1196, INTEL_BUS_CLK), FREQ_INFO(1000, 1132, INTEL_BUS_CLK), FREQ_INFO( 800, 1068, INTEL_BUS_CLK), FREQ_INFO( 600, 988, INTEL_BUS_CLK), }; static freq_info PM_725C_90[] = { /* 90 nm 1.60GHz Pentium M, VID #C */ FREQ_INFO(1600, 1308, INTEL_BUS_CLK), FREQ_INFO(1400, 1244, INTEL_BUS_CLK), FREQ_INFO(1200, 1180, INTEL_BUS_CLK), FREQ_INFO(1000, 1116, INTEL_BUS_CLK), FREQ_INFO( 800, 1052, INTEL_BUS_CLK), FREQ_INFO( 600, 988, INTEL_BUS_CLK), }; static freq_info PM_725D_90[] = { /* 90 nm 1.60GHz Pentium M, VID #D */ FREQ_INFO(1600, 1276, INTEL_BUS_CLK), FREQ_INFO(1400, 1228, INTEL_BUS_CLK), FREQ_INFO(1200, 1164, INTEL_BUS_CLK), FREQ_INFO(1000, 1116, INTEL_BUS_CLK), FREQ_INFO( 800, 1052, INTEL_BUS_CLK), FREQ_INFO( 600, 988, INTEL_BUS_CLK), }; static freq_info PM_715A_90[] = { /* 90 nm 1.50GHz Pentium M, VID #A */ FREQ_INFO(1500, 1340, INTEL_BUS_CLK), FREQ_INFO(1200, 1228, INTEL_BUS_CLK), FREQ_INFO(1000, 1148, INTEL_BUS_CLK), FREQ_INFO( 800, 1068, INTEL_BUS_CLK), FREQ_INFO( 600, 988, INTEL_BUS_CLK), }; static freq_info PM_715B_90[] = { /* 90 nm 1.50GHz Pentium M, VID #B */ FREQ_INFO(1500, 1324, INTEL_BUS_CLK), FREQ_INFO(1200, 1212, INTEL_BUS_CLK), FREQ_INFO(1000, 1148, INTEL_BUS_CLK), FREQ_INFO( 800, 1068, INTEL_BUS_CLK), FREQ_INFO( 600, 988, INTEL_BUS_CLK), }; static freq_info PM_715C_90[] = { /* 90 nm 1.50GHz Pentium M, VID #C */ FREQ_INFO(1500, 1308, INTEL_BUS_CLK), FREQ_INFO(1200, 1212, INTEL_BUS_CLK), FREQ_INFO(1000, 1132, INTEL_BUS_CLK), FREQ_INFO( 800, 1068, INTEL_BUS_CLK), FREQ_INFO( 600, 988, INTEL_BUS_CLK), }; static freq_info PM_715D_90[] = { /* 90 nm 1.50GHz Pentium M, VID #D */ FREQ_INFO(1500, 1276, INTEL_BUS_CLK), FREQ_INFO(1200, 1180, INTEL_BUS_CLK), FREQ_INFO(1000, 1116, INTEL_BUS_CLK), FREQ_INFO( 800, 1052, INTEL_BUS_CLK), FREQ_INFO( 600, 988, INTEL_BUS_CLK), }; static freq_info PM_778_90[] = { /* 90 nm 1.60GHz Low Voltage Pentium M */ FREQ_INFO(1600, 1116, INTEL_BUS_CLK), FREQ_INFO(1500, 1116, INTEL_BUS_CLK), FREQ_INFO(1400, 1100, INTEL_BUS_CLK), FREQ_INFO(1300, 1084, INTEL_BUS_CLK), FREQ_INFO(1200, 1068, INTEL_BUS_CLK), FREQ_INFO(1100, 1052, INTEL_BUS_CLK), FREQ_INFO(1000, 1052, INTEL_BUS_CLK), FREQ_INFO( 900, 1036, INTEL_BUS_CLK), FREQ_INFO( 800, 1020, INTEL_BUS_CLK), FREQ_INFO( 600, 988, INTEL_BUS_CLK), }; static freq_info PM_758_90[] = { /* 90 nm 1.50GHz Low Voltage Pentium M */ FREQ_INFO(1500, 1116, INTEL_BUS_CLK), FREQ_INFO(1400, 1116, INTEL_BUS_CLK), FREQ_INFO(1300, 1100, INTEL_BUS_CLK), FREQ_INFO(1200, 1084, INTEL_BUS_CLK), FREQ_INFO(1100, 1068, INTEL_BUS_CLK), FREQ_INFO(1000, 1052, INTEL_BUS_CLK), FREQ_INFO( 900, 1036, INTEL_BUS_CLK), FREQ_INFO( 800, 1020, INTEL_BUS_CLK), FREQ_INFO( 600, 988, INTEL_BUS_CLK), }; static freq_info PM_738_90[] = { /* 90 nm 1.40GHz Low Voltage Pentium M */ FREQ_INFO(1400, 1116, INTEL_BUS_CLK), FREQ_INFO(1300, 1116, INTEL_BUS_CLK), FREQ_INFO(1200, 1100, INTEL_BUS_CLK), FREQ_INFO(1100, 1068, INTEL_BUS_CLK), FREQ_INFO(1000, 1052, INTEL_BUS_CLK), FREQ_INFO( 900, 1036, INTEL_BUS_CLK), FREQ_INFO( 800, 1020, INTEL_BUS_CLK), FREQ_INFO( 600, 988, INTEL_BUS_CLK), }; static freq_info PM_773G_90[] = { /* 90 nm 1.30GHz Ultra Low Voltage Pentium M, VID #G */ FREQ_INFO(1300, 956, INTEL_BUS_CLK), FREQ_INFO(1200, 940, INTEL_BUS_CLK), FREQ_INFO(1100, 924, INTEL_BUS_CLK), FREQ_INFO(1000, 908, INTEL_BUS_CLK), FREQ_INFO( 900, 876, INTEL_BUS_CLK), FREQ_INFO( 800, 860, INTEL_BUS_CLK), FREQ_INFO( 600, 812, INTEL_BUS_CLK), }; static freq_info PM_773H_90[] = { /* 90 nm 1.30GHz Ultra Low Voltage Pentium M, VID #H */ FREQ_INFO(1300, 940, INTEL_BUS_CLK), FREQ_INFO(1200, 924, INTEL_BUS_CLK), FREQ_INFO(1100, 908, INTEL_BUS_CLK), FREQ_INFO(1000, 892, INTEL_BUS_CLK), FREQ_INFO( 900, 876, INTEL_BUS_CLK), FREQ_INFO( 800, 860, INTEL_BUS_CLK), FREQ_INFO( 600, 812, INTEL_BUS_CLK), }; static freq_info PM_773I_90[] = { /* 90 nm 1.30GHz Ultra Low Voltage Pentium M, VID #I */ FREQ_INFO(1300, 924, INTEL_BUS_CLK), FREQ_INFO(1200, 908, INTEL_BUS_CLK), FREQ_INFO(1100, 892, INTEL_BUS_CLK), FREQ_INFO(1000, 876, INTEL_BUS_CLK), FREQ_INFO( 900, 860, INTEL_BUS_CLK), FREQ_INFO( 800, 844, INTEL_BUS_CLK), FREQ_INFO( 600, 812, INTEL_BUS_CLK), }; static freq_info PM_773J_90[] = { /* 90 nm 1.30GHz Ultra Low Voltage Pentium M, VID #J */ FREQ_INFO(1300, 908, INTEL_BUS_CLK), FREQ_INFO(1200, 908, INTEL_BUS_CLK), FREQ_INFO(1100, 892, INTEL_BUS_CLK), FREQ_INFO(1000, 876, INTEL_BUS_CLK), FREQ_INFO( 900, 860, INTEL_BUS_CLK), FREQ_INFO( 800, 844, INTEL_BUS_CLK), FREQ_INFO( 600, 812, INTEL_BUS_CLK), }; static freq_info PM_773K_90[] = { /* 90 nm 1.30GHz Ultra Low Voltage Pentium M, VID #K */ FREQ_INFO(1300, 892, INTEL_BUS_CLK), FREQ_INFO(1200, 892, INTEL_BUS_CLK), FREQ_INFO(1100, 876, INTEL_BUS_CLK), FREQ_INFO(1000, 860, INTEL_BUS_CLK), FREQ_INFO( 900, 860, INTEL_BUS_CLK), FREQ_INFO( 800, 844, INTEL_BUS_CLK), FREQ_INFO( 600, 812, INTEL_BUS_CLK), }; static freq_info PM_773L_90[] = { /* 90 nm 1.30GHz Ultra Low Voltage Pentium M, VID #L */ FREQ_INFO(1300, 876, INTEL_BUS_CLK), FREQ_INFO(1200, 876, INTEL_BUS_CLK), FREQ_INFO(1100, 860, INTEL_BUS_CLK), FREQ_INFO(1000, 860, INTEL_BUS_CLK), FREQ_INFO( 900, 844, INTEL_BUS_CLK), FREQ_INFO( 800, 844, INTEL_BUS_CLK), FREQ_INFO( 600, 812, INTEL_BUS_CLK), }; static freq_info PM_753G_90[] = { /* 90 nm 1.20GHz Ultra Low Voltage Pentium M, VID #G */ FREQ_INFO(1200, 956, INTEL_BUS_CLK), FREQ_INFO(1100, 940, INTEL_BUS_CLK), FREQ_INFO(1000, 908, INTEL_BUS_CLK), FREQ_INFO( 900, 892, INTEL_BUS_CLK), FREQ_INFO( 800, 860, INTEL_BUS_CLK), FREQ_INFO( 600, 812, INTEL_BUS_CLK), }; static freq_info PM_753H_90[] = { /* 90 nm 1.20GHz Ultra Low Voltage Pentium M, VID #H */ FREQ_INFO(1200, 940, INTEL_BUS_CLK), FREQ_INFO(1100, 924, INTEL_BUS_CLK), FREQ_INFO(1000, 908, INTEL_BUS_CLK), FREQ_INFO( 900, 876, INTEL_BUS_CLK), FREQ_INFO( 800, 860, INTEL_BUS_CLK), FREQ_INFO( 600, 812, INTEL_BUS_CLK), }; static freq_info PM_753I_90[] = { /* 90 nm 1.20GHz Ultra Low Voltage Pentium M, VID #I */ FREQ_INFO(1200, 924, INTEL_BUS_CLK), FREQ_INFO(1100, 908, INTEL_BUS_CLK), FREQ_INFO(1000, 892, INTEL_BUS_CLK), FREQ_INFO( 900, 876, INTEL_BUS_CLK), FREQ_INFO( 800, 860, INTEL_BUS_CLK), FREQ_INFO( 600, 812, INTEL_BUS_CLK), }; static freq_info PM_753J_90[] = { /* 90 nm 1.20GHz Ultra Low Voltage Pentium M, VID #J */ FREQ_INFO(1200, 908, INTEL_BUS_CLK), FREQ_INFO(1100, 892, INTEL_BUS_CLK), FREQ_INFO(1000, 876, INTEL_BUS_CLK), FREQ_INFO( 900, 860, INTEL_BUS_CLK), FREQ_INFO( 800, 844, INTEL_BUS_CLK), FREQ_INFO( 600, 812, INTEL_BUS_CLK), }; static freq_info PM_753K_90[] = { /* 90 nm 1.20GHz Ultra Low Voltage Pentium M, VID #K */ FREQ_INFO(1200, 892, INTEL_BUS_CLK), FREQ_INFO(1100, 892, INTEL_BUS_CLK), FREQ_INFO(1000, 876, INTEL_BUS_CLK), FREQ_INFO( 900, 860, INTEL_BUS_CLK), FREQ_INFO( 800, 844, INTEL_BUS_CLK), FREQ_INFO( 600, 812, INTEL_BUS_CLK), }; static freq_info PM_753L_90[] = { /* 90 nm 1.20GHz Ultra Low Voltage Pentium M, VID #L */ FREQ_INFO(1200, 876, INTEL_BUS_CLK), FREQ_INFO(1100, 876, INTEL_BUS_CLK), FREQ_INFO(1000, 860, INTEL_BUS_CLK), FREQ_INFO( 900, 844, INTEL_BUS_CLK), FREQ_INFO( 800, 844, INTEL_BUS_CLK), FREQ_INFO( 600, 812, INTEL_BUS_CLK), }; static freq_info PM_733JG_90[] = { /* 90 nm 1.10GHz Ultra Low Voltage Pentium M, VID #G */ FREQ_INFO(1100, 956, INTEL_BUS_CLK), FREQ_INFO(1000, 940, INTEL_BUS_CLK), FREQ_INFO( 900, 908, INTEL_BUS_CLK), FREQ_INFO( 800, 876, INTEL_BUS_CLK), FREQ_INFO( 600, 812, INTEL_BUS_CLK), }; static freq_info PM_733JH_90[] = { /* 90 nm 1.10GHz Ultra Low Voltage Pentium M, VID #H */ FREQ_INFO(1100, 940, INTEL_BUS_CLK), FREQ_INFO(1000, 924, INTEL_BUS_CLK), FREQ_INFO( 900, 892, INTEL_BUS_CLK), FREQ_INFO( 800, 876, INTEL_BUS_CLK), FREQ_INFO( 600, 812, INTEL_BUS_CLK), }; static freq_info PM_733JI_90[] = { /* 90 nm 1.10GHz Ultra Low Voltage Pentium M, VID #I */ FREQ_INFO(1100, 924, INTEL_BUS_CLK), FREQ_INFO(1000, 908, INTEL_BUS_CLK), FREQ_INFO( 900, 892, INTEL_BUS_CLK), FREQ_INFO( 800, 860, INTEL_BUS_CLK), FREQ_INFO( 600, 812, INTEL_BUS_CLK), }; static freq_info PM_733JJ_90[] = { /* 90 nm 1.10GHz Ultra Low Voltage Pentium M, VID #J */ FREQ_INFO(1100, 908, INTEL_BUS_CLK), FREQ_INFO(1000, 892, INTEL_BUS_CLK), FREQ_INFO( 900, 876, INTEL_BUS_CLK), FREQ_INFO( 800, 860, INTEL_BUS_CLK), FREQ_INFO( 600, 812, INTEL_BUS_CLK), }; static freq_info PM_733JK_90[] = { /* 90 nm 1.10GHz Ultra Low Voltage Pentium M, VID #K */ FREQ_INFO(1100, 892, INTEL_BUS_CLK), FREQ_INFO(1000, 876, INTEL_BUS_CLK), FREQ_INFO( 900, 860, INTEL_BUS_CLK), FREQ_INFO( 800, 844, INTEL_BUS_CLK), FREQ_INFO( 600, 812, INTEL_BUS_CLK), }; static freq_info PM_733JL_90[] = { /* 90 nm 1.10GHz Ultra Low Voltage Pentium M, VID #L */ FREQ_INFO(1100, 876, INTEL_BUS_CLK), FREQ_INFO(1000, 876, INTEL_BUS_CLK), FREQ_INFO( 900, 860, INTEL_BUS_CLK), FREQ_INFO( 800, 844, INTEL_BUS_CLK), FREQ_INFO( 600, 812, INTEL_BUS_CLK), }; static freq_info PM_733_90[] = { /* 90 nm 1.10GHz Ultra Low Voltage Pentium M */ FREQ_INFO(1100, 940, INTEL_BUS_CLK), FREQ_INFO(1000, 924, INTEL_BUS_CLK), FREQ_INFO( 900, 892, INTEL_BUS_CLK), FREQ_INFO( 800, 876, INTEL_BUS_CLK), FREQ_INFO( 600, 812, INTEL_BUS_CLK), }; static freq_info PM_723_90[] = { /* 90 nm 1.00GHz Ultra Low Voltage Pentium M */ FREQ_INFO(1000, 940, INTEL_BUS_CLK), FREQ_INFO( 900, 908, INTEL_BUS_CLK), FREQ_INFO( 800, 876, INTEL_BUS_CLK), FREQ_INFO( 600, 812, INTEL_BUS_CLK), }; /* * VIA C7-M 500 MHz FSB, 400 MHz FSB, and ULV variants. * Data from the "VIA C7-M Processor BIOS Writer's Guide (v2.17)" datasheet. */ static freq_info C7M_795[] = { /* 2.00GHz Centaur C7-M 533 Mhz FSB */ FREQ_INFO_PWR(2000, 1148, 133, 20000), FREQ_INFO_PWR(1867, 1132, 133, 18000), FREQ_INFO_PWR(1600, 1100, 133, 15000), FREQ_INFO_PWR(1467, 1052, 133, 13000), FREQ_INFO_PWR(1200, 1004, 133, 10000), FREQ_INFO_PWR( 800, 844, 133, 7000), FREQ_INFO_PWR( 667, 844, 133, 6000), FREQ_INFO_PWR( 533, 844, 133, 5000), }; static freq_info C7M_785[] = { /* 1.80GHz Centaur C7-M 533 Mhz FSB */ FREQ_INFO_PWR(1867, 1148, 133, 18000), FREQ_INFO_PWR(1600, 1100, 133, 15000), FREQ_INFO_PWR(1467, 1052, 133, 13000), FREQ_INFO_PWR(1200, 1004, 133, 10000), FREQ_INFO_PWR( 800, 844, 133, 7000), FREQ_INFO_PWR( 667, 844, 133, 6000), FREQ_INFO_PWR( 533, 844, 133, 5000), }; static freq_info C7M_765[] = { /* 1.60GHz Centaur C7-M 533 Mhz FSB */ FREQ_INFO_PWR(1600, 1084, 133, 15000), FREQ_INFO_PWR(1467, 1052, 133, 13000), FREQ_INFO_PWR(1200, 1004, 133, 10000), FREQ_INFO_PWR( 800, 844, 133, 7000), FREQ_INFO_PWR( 667, 844, 133, 6000), FREQ_INFO_PWR( 533, 844, 133, 5000), }; static freq_info C7M_794[] = { /* 2.00GHz Centaur C7-M 400 Mhz FSB */ FREQ_INFO_PWR(2000, 1148, 100, 20000), FREQ_INFO_PWR(1800, 1132, 100, 18000), FREQ_INFO_PWR(1600, 1100, 100, 15000), FREQ_INFO_PWR(1400, 1052, 100, 13000), FREQ_INFO_PWR(1000, 1004, 100, 10000), FREQ_INFO_PWR( 800, 844, 100, 7000), FREQ_INFO_PWR( 600, 844, 100, 6000), FREQ_INFO_PWR( 400, 844, 100, 5000), }; static freq_info C7M_784[] = { /* 1.80GHz Centaur C7-M 400 Mhz FSB */ FREQ_INFO_PWR(1800, 1148, 100, 18000), FREQ_INFO_PWR(1600, 1100, 100, 15000), FREQ_INFO_PWR(1400, 1052, 100, 13000), FREQ_INFO_PWR(1000, 1004, 100, 10000), FREQ_INFO_PWR( 800, 844, 100, 7000), FREQ_INFO_PWR( 600, 844, 100, 6000), FREQ_INFO_PWR( 400, 844, 100, 5000), }; static freq_info C7M_764[] = { /* 1.60GHz Centaur C7-M 400 Mhz FSB */ FREQ_INFO_PWR(1600, 1084, 100, 15000), FREQ_INFO_PWR(1400, 1052, 100, 13000), FREQ_INFO_PWR(1000, 1004, 100, 10000), FREQ_INFO_PWR( 800, 844, 100, 7000), FREQ_INFO_PWR( 600, 844, 100, 6000), FREQ_INFO_PWR( 400, 844, 100, 5000), }; static freq_info C7M_754[] = { /* 1.50GHz Centaur C7-M 400 Mhz FSB */ FREQ_INFO_PWR(1500, 1004, 100, 12000), FREQ_INFO_PWR(1400, 988, 100, 11000), FREQ_INFO_PWR(1000, 940, 100, 9000), FREQ_INFO_PWR( 800, 844, 100, 7000), FREQ_INFO_PWR( 600, 844, 100, 6000), FREQ_INFO_PWR( 400, 844, 100, 5000), }; static freq_info C7M_771[] = { /* 1.20GHz Centaur C7-M 400 Mhz FSB */ FREQ_INFO_PWR(1200, 860, 100, 7000), FREQ_INFO_PWR(1000, 860, 100, 6000), FREQ_INFO_PWR( 800, 844, 100, 5500), FREQ_INFO_PWR( 600, 844, 100, 5000), FREQ_INFO_PWR( 400, 844, 100, 4000), }; static freq_info C7M_775_ULV[] = { /* 1.50GHz Centaur C7-M ULV */ FREQ_INFO_PWR(1500, 956, 100, 7500), FREQ_INFO_PWR(1400, 940, 100, 6000), FREQ_INFO_PWR(1000, 860, 100, 5000), FREQ_INFO_PWR( 800, 828, 100, 2800), FREQ_INFO_PWR( 600, 796, 100, 2500), FREQ_INFO_PWR( 400, 796, 100, 2000), }; static freq_info C7M_772_ULV[] = { /* 1.20GHz Centaur C7-M ULV */ FREQ_INFO_PWR(1200, 844, 100, 5000), FREQ_INFO_PWR(1000, 844, 100, 4000), FREQ_INFO_PWR( 800, 828, 100, 2800), FREQ_INFO_PWR( 600, 796, 100, 2500), FREQ_INFO_PWR( 400, 796, 100, 2000), }; static freq_info C7M_779_ULV[] = { /* 1.00GHz Centaur C7-M ULV */ FREQ_INFO_PWR(1000, 796, 100, 3500), FREQ_INFO_PWR( 800, 796, 100, 2800), FREQ_INFO_PWR( 600, 796, 100, 2500), FREQ_INFO_PWR( 400, 796, 100, 2000), }; static freq_info C7M_770_ULV[] = { /* 1.00GHz Centaur C7-M ULV */ FREQ_INFO_PWR(1000, 844, 100, 5000), FREQ_INFO_PWR( 800, 796, 100, 2800), FREQ_INFO_PWR( 600, 796, 100, 2500), FREQ_INFO_PWR( 400, 796, 100, 2000), }; static cpu_info ESTprocs[] = { INTEL(PM17_130, 1700, 1484, 600, 956, INTEL_BUS_CLK), INTEL(PM16_130, 1600, 1484, 600, 956, INTEL_BUS_CLK), INTEL(PM15_130, 1500, 1484, 600, 956, INTEL_BUS_CLK), INTEL(PM14_130, 1400, 1484, 600, 956, INTEL_BUS_CLK), INTEL(PM13_130, 1300, 1388, 600, 956, INTEL_BUS_CLK), INTEL(PM13_LV_130, 1300, 1180, 600, 956, INTEL_BUS_CLK), INTEL(PM12_LV_130, 1200, 1180, 600, 956, INTEL_BUS_CLK), INTEL(PM11_LV_130, 1100, 1180, 600, 956, INTEL_BUS_CLK), INTEL(PM11_ULV_130, 1100, 1004, 600, 844, INTEL_BUS_CLK), INTEL(PM10_ULV_130, 1000, 1004, 600, 844, INTEL_BUS_CLK), INTEL(PM_765A_90, 2100, 1340, 600, 988, INTEL_BUS_CLK), INTEL(PM_765B_90, 2100, 1324, 600, 988, INTEL_BUS_CLK), INTEL(PM_765C_90, 2100, 1308, 600, 988, INTEL_BUS_CLK), INTEL(PM_765E_90, 2100, 1356, 600, 988, INTEL_BUS_CLK), INTEL(PM_755A_90, 2000, 1340, 600, 988, INTEL_BUS_CLK), INTEL(PM_755B_90, 2000, 1324, 600, 988, INTEL_BUS_CLK), INTEL(PM_755C_90, 2000, 1308, 600, 988, INTEL_BUS_CLK), INTEL(PM_755D_90, 2000, 1276, 600, 988, INTEL_BUS_CLK), INTEL(PM_745A_90, 1800, 1340, 600, 988, INTEL_BUS_CLK), INTEL(PM_745B_90, 1800, 1324, 600, 988, INTEL_BUS_CLK), INTEL(PM_745C_90, 1800, 1308, 600, 988, INTEL_BUS_CLK), INTEL(PM_745D_90, 1800, 1276, 600, 988, INTEL_BUS_CLK), INTEL(PM_735A_90, 1700, 1340, 600, 988, INTEL_BUS_CLK), INTEL(PM_735B_90, 1700, 1324, 600, 988, INTEL_BUS_CLK), INTEL(PM_735C_90, 1700, 1308, 600, 988, INTEL_BUS_CLK), INTEL(PM_735D_90, 1700, 1276, 600, 988, INTEL_BUS_CLK), INTEL(PM_725A_90, 1600, 1340, 600, 988, INTEL_BUS_CLK), INTEL(PM_725B_90, 1600, 1324, 600, 988, INTEL_BUS_CLK), INTEL(PM_725C_90, 1600, 1308, 600, 988, INTEL_BUS_CLK), INTEL(PM_725D_90, 1600, 1276, 600, 988, INTEL_BUS_CLK), INTEL(PM_715A_90, 1500, 1340, 600, 988, INTEL_BUS_CLK), INTEL(PM_715B_90, 1500, 1324, 600, 988, INTEL_BUS_CLK), INTEL(PM_715C_90, 1500, 1308, 600, 988, INTEL_BUS_CLK), INTEL(PM_715D_90, 1500, 1276, 600, 988, INTEL_BUS_CLK), INTEL(PM_778_90, 1600, 1116, 600, 988, INTEL_BUS_CLK), INTEL(PM_758_90, 1500, 1116, 600, 988, INTEL_BUS_CLK), INTEL(PM_738_90, 1400, 1116, 600, 988, INTEL_BUS_CLK), INTEL(PM_773G_90, 1300, 956, 600, 812, INTEL_BUS_CLK), INTEL(PM_773H_90, 1300, 940, 600, 812, INTEL_BUS_CLK), INTEL(PM_773I_90, 1300, 924, 600, 812, INTEL_BUS_CLK), INTEL(PM_773J_90, 1300, 908, 600, 812, INTEL_BUS_CLK), INTEL(PM_773K_90, 1300, 892, 600, 812, INTEL_BUS_CLK), INTEL(PM_773L_90, 1300, 876, 600, 812, INTEL_BUS_CLK), INTEL(PM_753G_90, 1200, 956, 600, 812, INTEL_BUS_CLK), INTEL(PM_753H_90, 1200, 940, 600, 812, INTEL_BUS_CLK), INTEL(PM_753I_90, 1200, 924, 600, 812, INTEL_BUS_CLK), INTEL(PM_753J_90, 1200, 908, 600, 812, INTEL_BUS_CLK), INTEL(PM_753K_90, 1200, 892, 600, 812, INTEL_BUS_CLK), INTEL(PM_753L_90, 1200, 876, 600, 812, INTEL_BUS_CLK), INTEL(PM_733JG_90, 1100, 956, 600, 812, INTEL_BUS_CLK), INTEL(PM_733JH_90, 1100, 940, 600, 812, INTEL_BUS_CLK), INTEL(PM_733JI_90, 1100, 924, 600, 812, INTEL_BUS_CLK), INTEL(PM_733JJ_90, 1100, 908, 600, 812, INTEL_BUS_CLK), INTEL(PM_733JK_90, 1100, 892, 600, 812, INTEL_BUS_CLK), INTEL(PM_733JL_90, 1100, 876, 600, 812, INTEL_BUS_CLK), INTEL(PM_733_90, 1100, 940, 600, 812, INTEL_BUS_CLK), INTEL(PM_723_90, 1000, 940, 600, 812, INTEL_BUS_CLK), CENTAUR(C7M_795, 2000, 1148, 533, 844, 133), CENTAUR(C7M_794, 2000, 1148, 400, 844, 100), CENTAUR(C7M_785, 1867, 1148, 533, 844, 133), CENTAUR(C7M_784, 1800, 1148, 400, 844, 100), CENTAUR(C7M_765, 1600, 1084, 533, 844, 133), CENTAUR(C7M_764, 1600, 1084, 400, 844, 100), CENTAUR(C7M_754, 1500, 1004, 400, 844, 100), CENTAUR(C7M_775_ULV, 1500, 956, 400, 796, 100), CENTAUR(C7M_771, 1200, 860, 400, 844, 100), CENTAUR(C7M_772_ULV, 1200, 844, 400, 796, 100), CENTAUR(C7M_779_ULV, 1000, 796, 400, 796, 100), CENTAUR(C7M_770_ULV, 1000, 844, 400, 796, 100), { 0, 0, NULL }, }; static void est_identify(driver_t *driver, device_t parent); static int est_features(driver_t *driver, u_int *features); static int est_probe(device_t parent); static int est_attach(device_t parent); static int est_detach(device_t parent); static int est_get_info(device_t dev); static int est_acpi_info(device_t dev, freq_info **freqs, size_t *freqslen); static int est_table_info(device_t dev, uint64_t msr, freq_info **freqs, size_t *freqslen); static int est_msr_info(device_t dev, uint64_t msr, freq_info **freqs, size_t *freqslen); static freq_info *est_get_current(freq_info *freq_list, size_t tablen); static int est_settings(device_t dev, struct cf_setting *sets, int *count); static int est_set(device_t dev, const struct cf_setting *set); static int est_get(device_t dev, struct cf_setting *set); static int est_type(device_t dev, int *type); static int est_set_id16(device_t dev, uint16_t id16, int need_check); static void est_get_id16(uint16_t *id16_p); static device_method_t est_methods[] = { /* Device interface */ DEVMETHOD(device_identify, est_identify), DEVMETHOD(device_probe, est_probe), DEVMETHOD(device_attach, est_attach), DEVMETHOD(device_detach, est_detach), /* cpufreq interface */ DEVMETHOD(cpufreq_drv_set, est_set), DEVMETHOD(cpufreq_drv_get, est_get), DEVMETHOD(cpufreq_drv_type, est_type), DEVMETHOD(cpufreq_drv_settings, est_settings), /* ACPI interface */ DEVMETHOD(acpi_get_features, est_features), {0, 0} }; static driver_t est_driver = { "est", est_methods, sizeof(struct est_softc), }; static devclass_t est_devclass; DRIVER_MODULE(est, cpu, est_driver, est_devclass, 0, 0); MODULE_DEPEND(est, hwpstate_intel, 1, 1, 1); static int est_features(driver_t *driver, u_int *features) { /* * Notify the ACPI CPU that we support direct access to MSRs. * XXX C1 "I/O then Halt" seems necessary for some broken BIOS. */ *features = ACPI_CAP_PERF_MSRS | ACPI_CAP_C1_IO_HALT; return (0); } static void est_identify(driver_t *driver, device_t parent) { device_t child; /* * Defer to hwpstate if it is present. This priority logic * should be replaced with normal newbus probing in the * future. */ intel_hwpstate_identify(NULL, parent); if (device_find_child(parent, "hwpstate_intel", -1) != NULL) return; /* Make sure we're not being doubly invoked. */ if (device_find_child(parent, "est", -1) != NULL) return; /* Check that CPUID is supported and the vendor is Intel.*/ if (cpu_high == 0 || (cpu_vendor_id != CPU_VENDOR_INTEL && cpu_vendor_id != CPU_VENDOR_CENTAUR)) return; /* * Check if the CPU supports EST. */ if (!(cpu_feature2 & CPUID2_EST)) return; /* * We add a child for each CPU since settings must be performed * on each CPU in the SMP case. */ - child = BUS_ADD_CHILD(parent, 10, "est", -1); + child = BUS_ADD_CHILD(parent, 10, "est", device_get_unit(parent)); if (child == NULL) device_printf(parent, "add est child failed\n"); } static int est_probe(device_t dev) { device_t perf_dev; uint64_t msr; int error, type; if (resource_disabled("est", 0)) return (ENXIO); /* * If the ACPI perf driver has attached and is not just offering * info, let it manage things. */ perf_dev = device_find_child(device_get_parent(dev), "acpi_perf", -1); if (perf_dev && device_is_attached(perf_dev)) { error = CPUFREQ_DRV_TYPE(perf_dev, &type); if (error == 0 && (type & CPUFREQ_FLAG_INFO_ONLY) == 0) return (ENXIO); } /* Attempt to enable SpeedStep if not currently enabled. */ msr = rdmsr(MSR_MISC_ENABLE); if ((msr & MSR_SS_ENABLE) == 0) { wrmsr(MSR_MISC_ENABLE, msr | MSR_SS_ENABLE); if (bootverbose) device_printf(dev, "enabling SpeedStep\n"); /* Check if the enable failed. */ msr = rdmsr(MSR_MISC_ENABLE); if ((msr & MSR_SS_ENABLE) == 0) { device_printf(dev, "failed to enable SpeedStep\n"); return (ENXIO); } } device_set_desc(dev, "Enhanced SpeedStep Frequency Control"); return (0); } static int est_attach(device_t dev) { struct est_softc *sc; sc = device_get_softc(dev); sc->dev = dev; /* On SMP system we can't guarantie independent freq setting. */ if (strict == -1 && mp_ncpus > 1) strict = 0; /* Check CPU for supported settings. */ if (est_get_info(dev)) return (ENXIO); cpufreq_register(dev); return (0); } static int est_detach(device_t dev) { struct est_softc *sc; int error; error = cpufreq_unregister(dev); if (error) return (error); sc = device_get_softc(dev); if (sc->acpi_settings || sc->msr_settings) free(sc->freq_list, M_DEVBUF); return (0); } /* * Probe for supported CPU settings. First, check our static table of * settings. If no match, try using the ones offered by acpi_perf * (i.e., _PSS). We use ACPI second because some systems (IBM R/T40 * series) export both legacy SMM IO-based access and direct MSR access * but the direct access specifies invalid values for _PSS. */ static int est_get_info(device_t dev) { struct est_softc *sc; uint64_t msr; int error; sc = device_get_softc(dev); msr = rdmsr(MSR_PERF_STATUS); error = est_table_info(dev, msr, &sc->freq_list, &sc->flist_len); if (error) error = est_acpi_info(dev, &sc->freq_list, &sc->flist_len); if (error) error = est_msr_info(dev, msr, &sc->freq_list, &sc->flist_len); if (error) { printf( "est: CPU supports Enhanced Speedstep, but is not recognized.\n" "est: cpu_vendor %s, msr %0jx\n", cpu_vendor, msr); return (ENXIO); } return (0); } static int est_acpi_info(device_t dev, freq_info **freqs, size_t *freqslen) { struct est_softc *sc; struct cf_setting *sets; freq_info *table; device_t perf_dev; int count, error, i, j; uint16_t saved_id16; perf_dev = device_find_child(device_get_parent(dev), "acpi_perf", -1); if (perf_dev == NULL || !device_is_attached(perf_dev)) return (ENXIO); /* Fetch settings from acpi_perf. */ sc = device_get_softc(dev); table = NULL; sets = malloc(MAX_SETTINGS * sizeof(*sets), M_TEMP, M_NOWAIT); if (sets == NULL) return (ENOMEM); count = MAX_SETTINGS; error = CPUFREQ_DRV_SETTINGS(perf_dev, sets, &count); if (error) goto out; /* Parse settings into our local table format. */ table = malloc(count * sizeof(*table), M_DEVBUF, M_NOWAIT); if (table == NULL) { error = ENOMEM; goto out; } est_get_id16(&saved_id16); for (i = 0, j = 0; i < count; i++) { /* * Confirm id16 value is correct. */ if (sets[i].freq > 0) { error = est_set_id16(dev, sets[i].spec[0], strict); if (error != 0) { if (bootverbose) device_printf(dev, "Invalid freq %u, " "ignored.\n", sets[i].freq); continue; } table[j].freq = sets[i].freq; table[j].volts = sets[i].volts; table[j].id16 = sets[i].spec[0]; table[j].power = sets[i].power; ++j; } } /* restore saved setting */ est_set_id16(dev, saved_id16, 0); sc->acpi_settings = TRUE; *freqs = table; *freqslen = j; error = 0; out: if (sets) free(sets, M_TEMP); if (error && table) free(table, M_DEVBUF); return (error); } static int est_table_info(device_t dev, uint64_t msr, freq_info **freqs, size_t *freqslen) { cpu_info *p; uint32_t id; /* Find a table which matches (vendor, id32). */ id = msr >> 32; for (p = ESTprocs; p->id32 != 0; p++) { if (p->vendor_id == cpu_vendor_id && p->id32 == id) break; } if (p->id32 == 0) return (EOPNOTSUPP); /* Make sure the current setpoint is valid. */ if (est_get_current(p->freqtab, p->tablen) == NULL) { device_printf(dev, "current setting not found in table\n"); return (EOPNOTSUPP); } *freqs = p->freqtab; *freqslen = p->tablen; return (0); } static int bus_speed_ok(int bus) { switch (bus) { case 100: case 133: case 333: return (1); default: return (0); } } /* * Flesh out a simple rate table containing the high and low frequencies * based on the current clock speed and the upper 32 bits of the MSR. */ static int est_msr_info(device_t dev, uint64_t msr, freq_info **freqs, size_t *freqslen) { struct est_softc *sc; freq_info *fp; int bus, freq, volts; uint16_t id; if (!msr_info_enabled) return (EOPNOTSUPP); /* Figure out the bus clock. */ freq = atomic_load_acq_64(&tsc_freq) / 1000000; id = msr >> 32; bus = freq / (id >> 8); device_printf(dev, "Guessed bus clock (high) of %d MHz\n", bus); if (!bus_speed_ok(bus)) { /* We may be running on the low frequency. */ id = msr >> 48; bus = freq / (id >> 8); device_printf(dev, "Guessed bus clock (low) of %d MHz\n", bus); if (!bus_speed_ok(bus)) return (EOPNOTSUPP); /* Calculate high frequency. */ id = msr >> 32; freq = ((id >> 8) & 0xff) * bus; } /* Fill out a new freq table containing just the high and low freqs. */ sc = device_get_softc(dev); fp = malloc(sizeof(freq_info) * 2, M_DEVBUF, M_WAITOK | M_ZERO); /* First, the high frequency. */ volts = id & 0xff; if (volts != 0) { volts <<= 4; volts += 700; } fp[0].freq = freq; fp[0].volts = volts; fp[0].id16 = id; fp[0].power = CPUFREQ_VAL_UNKNOWN; device_printf(dev, "Guessed high setting of %d MHz @ %d Mv\n", freq, volts); /* Second, the low frequency. */ id = msr >> 48; freq = ((id >> 8) & 0xff) * bus; volts = id & 0xff; if (volts != 0) { volts <<= 4; volts += 700; } fp[1].freq = freq; fp[1].volts = volts; fp[1].id16 = id; fp[1].power = CPUFREQ_VAL_UNKNOWN; device_printf(dev, "Guessed low setting of %d MHz @ %d Mv\n", freq, volts); /* Table is already terminated due to M_ZERO. */ sc->msr_settings = TRUE; *freqs = fp; *freqslen = 2; return (0); } static void est_get_id16(uint16_t *id16_p) { *id16_p = rdmsr(MSR_PERF_STATUS) & 0xffff; } static int est_set_id16(device_t dev, uint16_t id16, int need_check) { uint64_t msr; uint16_t new_id16; int ret = 0; /* Read the current register, mask out the old, set the new id. */ msr = rdmsr(MSR_PERF_CTL); msr = (msr & ~0xffff) | id16; wrmsr(MSR_PERF_CTL, msr); if (need_check) { /* Wait a short while and read the new status. */ DELAY(EST_TRANS_LAT); est_get_id16(&new_id16); if (new_id16 != id16) { if (bootverbose) device_printf(dev, "Invalid id16 (set, cur) " "= (%u, %u)\n", id16, new_id16); ret = ENXIO; } } return (ret); } static freq_info * est_get_current(freq_info *freq_list, size_t tablen) { freq_info *f; int i; uint16_t id16; /* * Try a few times to get a valid value. Sometimes, if the CPU * is in the middle of an asynchronous transition (i.e., P4TCC), * we get a temporary invalid result. */ for (i = 0; i < 5; i++) { est_get_id16(&id16); for (f = freq_list; f < freq_list + tablen; f++) { if (f->id16 == id16) return (f); } DELAY(100); } return (NULL); } static int est_settings(device_t dev, struct cf_setting *sets, int *count) { struct est_softc *sc; freq_info *f; int i; sc = device_get_softc(dev); if (*count < EST_MAX_SETTINGS) return (E2BIG); i = 0; for (f = sc->freq_list; f < sc->freq_list + sc->flist_len; f++, i++) { sets[i].freq = f->freq; sets[i].volts = f->volts; sets[i].power = f->power; sets[i].lat = EST_TRANS_LAT; sets[i].dev = dev; } *count = i; return (0); } static int est_set(device_t dev, const struct cf_setting *set) { struct est_softc *sc; freq_info *f; /* Find the setting matching the requested one. */ sc = device_get_softc(dev); for (f = sc->freq_list; f < sc->freq_list + sc->flist_len; f++) { if (f->freq == set->freq) break; } if (f->freq == 0) return (EINVAL); /* Read the current register, mask out the old, set the new id. */ est_set_id16(dev, f->id16, 0); return (0); } static int est_get(device_t dev, struct cf_setting *set) { struct est_softc *sc; freq_info *f; sc = device_get_softc(dev); f = est_get_current(sc->freq_list, sc->flist_len); if (f == NULL) return (ENXIO); set->freq = f->freq; set->volts = f->volts; set->power = f->power; set->lat = EST_TRANS_LAT; set->dev = dev; return (0); } static int est_type(device_t dev, int *type) { if (type == NULL) return (EINVAL); *type = CPUFREQ_TYPE_ABSOLUTE; return (0); } diff --git a/sys/x86/cpufreq/hwpstate_amd.c b/sys/x86/cpufreq/hwpstate_amd.c index ee67da43de53..3193cbd908cc 100644 --- a/sys/x86/cpufreq/hwpstate_amd.c +++ b/sys/x86/cpufreq/hwpstate_amd.c @@ -1,554 +1,555 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2005 Nate Lawson * Copyright (c) 2004 Colin Percival * Copyright (c) 2004-2005 Bruno Durcot * Copyright (c) 2004 FUKUDA Nobuhiko * Copyright (c) 2009 Michael Reifenberger * Copyright (c) 2009 Norikatsu Shigemura * Copyright (c) 2008-2009 Gen Otsuji * * This code is depending on kern_cpu.c, est.c, powernow.c, p4tcc.c, smist.c * in various parts. The authors of these files are Nate Lawson, * Colin Percival, Bruno Durcot, and FUKUDA Nobuhiko. * This code contains patches by Michael Reifenberger and Norikatsu Shigemura. * Thank you. * * Redistribution and use in source and binary forms, with or without * modification, are permitted providing that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /* * For more info: * BIOS and Kernel Developer's Guide(BKDG) for AMD Family 10h Processors * 31116 Rev 3.20 February 04, 2009 * BIOS and Kernel Developer's Guide(BKDG) for AMD Family 11h Processors * 41256 Rev 3.00 - July 07, 2008 */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "acpi_if.h" #include "cpufreq_if.h" #define MSR_AMD_10H_11H_LIMIT 0xc0010061 #define MSR_AMD_10H_11H_CONTROL 0xc0010062 #define MSR_AMD_10H_11H_STATUS 0xc0010063 #define MSR_AMD_10H_11H_CONFIG 0xc0010064 #define AMD_10H_11H_MAX_STATES 16 /* for MSR_AMD_10H_11H_LIMIT C001_0061 */ #define AMD_10H_11H_GET_PSTATE_MAX_VAL(msr) (((msr) >> 4) & 0x7) #define AMD_10H_11H_GET_PSTATE_LIMIT(msr) (((msr)) & 0x7) /* for MSR_AMD_10H_11H_CONFIG 10h:C001_0064:68 / 11h:C001_0064:6B */ #define AMD_10H_11H_CUR_VID(msr) (((msr) >> 9) & 0x7F) #define AMD_10H_11H_CUR_DID(msr) (((msr) >> 6) & 0x07) #define AMD_10H_11H_CUR_FID(msr) ((msr) & 0x3F) #define AMD_17H_CUR_VID(msr) (((msr) >> 14) & 0xFF) #define AMD_17H_CUR_DID(msr) (((msr) >> 8) & 0x3F) #define AMD_17H_CUR_FID(msr) ((msr) & 0xFF) #define HWPSTATE_DEBUG(dev, msg...) \ do { \ if (hwpstate_verbose) \ device_printf(dev, msg); \ } while (0) struct hwpstate_setting { int freq; /* CPU clock in Mhz or 100ths of a percent. */ int volts; /* Voltage in mV. */ int power; /* Power consumed in mW. */ int lat; /* Transition latency in us. */ int pstate_id; /* P-State id */ }; struct hwpstate_softc { device_t dev; struct hwpstate_setting hwpstate_settings[AMD_10H_11H_MAX_STATES]; int cfnum; }; static void hwpstate_identify(driver_t *driver, device_t parent); static int hwpstate_probe(device_t dev); static int hwpstate_attach(device_t dev); static int hwpstate_detach(device_t dev); static int hwpstate_set(device_t dev, const struct cf_setting *cf); static int hwpstate_get(device_t dev, struct cf_setting *cf); static int hwpstate_settings(device_t dev, struct cf_setting *sets, int *count); static int hwpstate_type(device_t dev, int *type); static int hwpstate_shutdown(device_t dev); static int hwpstate_features(driver_t *driver, u_int *features); static int hwpstate_get_info_from_acpi_perf(device_t dev, device_t perf_dev); static int hwpstate_get_info_from_msr(device_t dev); static int hwpstate_goto_pstate(device_t dev, int pstate_id); static int hwpstate_verbose; SYSCTL_INT(_debug, OID_AUTO, hwpstate_verbose, CTLFLAG_RWTUN, &hwpstate_verbose, 0, "Debug hwpstate"); static int hwpstate_verify; SYSCTL_INT(_debug, OID_AUTO, hwpstate_verify, CTLFLAG_RWTUN, &hwpstate_verify, 0, "Verify P-state after setting"); static bool hwpstate_pstate_limit; SYSCTL_BOOL(_debug, OID_AUTO, hwpstate_pstate_limit, CTLFLAG_RWTUN, &hwpstate_pstate_limit, 0, "If enabled (1), limit administrative control of P-states to the value in " "CurPstateLimit"); static device_method_t hwpstate_methods[] = { /* Device interface */ DEVMETHOD(device_identify, hwpstate_identify), DEVMETHOD(device_probe, hwpstate_probe), DEVMETHOD(device_attach, hwpstate_attach), DEVMETHOD(device_detach, hwpstate_detach), DEVMETHOD(device_shutdown, hwpstate_shutdown), /* cpufreq interface */ DEVMETHOD(cpufreq_drv_set, hwpstate_set), DEVMETHOD(cpufreq_drv_get, hwpstate_get), DEVMETHOD(cpufreq_drv_settings, hwpstate_settings), DEVMETHOD(cpufreq_drv_type, hwpstate_type), /* ACPI interface */ DEVMETHOD(acpi_get_features, hwpstate_features), {0, 0} }; static devclass_t hwpstate_devclass; static driver_t hwpstate_driver = { "hwpstate", hwpstate_methods, sizeof(struct hwpstate_softc), }; DRIVER_MODULE(hwpstate, cpu, hwpstate_driver, hwpstate_devclass, 0, 0); /* * Go to Px-state on all cpus, considering the limit register (if so * configured). */ static int hwpstate_goto_pstate(device_t dev, int id) { sbintime_t sbt; uint64_t msr; int cpu, i, j, limit; if (hwpstate_pstate_limit) { /* get the current pstate limit */ msr = rdmsr(MSR_AMD_10H_11H_LIMIT); limit = AMD_10H_11H_GET_PSTATE_LIMIT(msr); if (limit > id) { HWPSTATE_DEBUG(dev, "Restricting requested P%d to P%d " "due to HW limit\n", id, limit); id = limit; } } cpu = curcpu; HWPSTATE_DEBUG(dev, "setting P%d-state on cpu%d\n", id, cpu); /* Go To Px-state */ wrmsr(MSR_AMD_10H_11H_CONTROL, id); /* * We are going to the same Px-state on all cpus. * Probably should take _PSD into account. */ CPU_FOREACH(i) { if (i == cpu) continue; /* Bind to each cpu. */ thread_lock(curthread); sched_bind(curthread, i); thread_unlock(curthread); HWPSTATE_DEBUG(dev, "setting P%d-state on cpu%d\n", id, i); /* Go To Px-state */ wrmsr(MSR_AMD_10H_11H_CONTROL, id); } /* * Verify whether each core is in the requested P-state. */ if (hwpstate_verify) { CPU_FOREACH(i) { thread_lock(curthread); sched_bind(curthread, i); thread_unlock(curthread); /* wait loop (100*100 usec is enough ?) */ for (j = 0; j < 100; j++) { /* get the result. not assure msr=id */ msr = rdmsr(MSR_AMD_10H_11H_STATUS); if (msr == id) break; sbt = SBT_1MS / 10; tsleep_sbt(dev, PZERO, "pstate_goto", sbt, sbt >> tc_precexp, 0); } HWPSTATE_DEBUG(dev, "result: P%d-state on cpu%d\n", (int)msr, i); if (msr != id) { HWPSTATE_DEBUG(dev, "error: loop is not enough.\n"); return (ENXIO); } } } return (0); } static int hwpstate_set(device_t dev, const struct cf_setting *cf) { struct hwpstate_softc *sc; struct hwpstate_setting *set; int i; if (cf == NULL) return (EINVAL); sc = device_get_softc(dev); set = sc->hwpstate_settings; for (i = 0; i < sc->cfnum; i++) if (CPUFREQ_CMP(cf->freq, set[i].freq)) break; if (i == sc->cfnum) return (EINVAL); return (hwpstate_goto_pstate(dev, set[i].pstate_id)); } static int hwpstate_get(device_t dev, struct cf_setting *cf) { struct hwpstate_softc *sc; struct hwpstate_setting set; uint64_t msr; sc = device_get_softc(dev); if (cf == NULL) return (EINVAL); msr = rdmsr(MSR_AMD_10H_11H_STATUS); if (msr >= sc->cfnum) return (EINVAL); set = sc->hwpstate_settings[msr]; cf->freq = set.freq; cf->volts = set.volts; cf->power = set.power; cf->lat = set.lat; cf->dev = dev; return (0); } static int hwpstate_settings(device_t dev, struct cf_setting *sets, int *count) { struct hwpstate_softc *sc; struct hwpstate_setting set; int i; if (sets == NULL || count == NULL) return (EINVAL); sc = device_get_softc(dev); if (*count < sc->cfnum) return (E2BIG); for (i = 0; i < sc->cfnum; i++, sets++) { set = sc->hwpstate_settings[i]; sets->freq = set.freq; sets->volts = set.volts; sets->power = set.power; sets->lat = set.lat; sets->dev = dev; } *count = sc->cfnum; return (0); } static int hwpstate_type(device_t dev, int *type) { if (type == NULL) return (EINVAL); *type = CPUFREQ_TYPE_ABSOLUTE; return (0); } static void hwpstate_identify(driver_t *driver, device_t parent) { if (device_find_child(parent, "hwpstate", -1) != NULL) return; if ((cpu_vendor_id != CPU_VENDOR_AMD || CPUID_TO_FAMILY(cpu_id) < 0x10) && cpu_vendor_id != CPU_VENDOR_HYGON) return; /* * Check if hardware pstate enable bit is set. */ if ((amd_pminfo & AMDPM_HW_PSTATE) == 0) { HWPSTATE_DEBUG(parent, "hwpstate enable bit is not set.\n"); return; } if (resource_disabled("hwpstate", 0)) return; - if (BUS_ADD_CHILD(parent, 10, "hwpstate", -1) == NULL) + if (BUS_ADD_CHILD(parent, 10, "hwpstate", device_get_unit(parent)) + == NULL) device_printf(parent, "hwpstate: add child failed\n"); } static int hwpstate_probe(device_t dev) { struct hwpstate_softc *sc; device_t perf_dev; uint64_t msr; int error, type; /* * Only hwpstate0. * It goes well with acpi_throttle. */ if (device_get_unit(dev) != 0) return (ENXIO); sc = device_get_softc(dev); sc->dev = dev; /* * Check if acpi_perf has INFO only flag. */ perf_dev = device_find_child(device_get_parent(dev), "acpi_perf", -1); error = TRUE; if (perf_dev && device_is_attached(perf_dev)) { error = CPUFREQ_DRV_TYPE(perf_dev, &type); if (error == 0) { if ((type & CPUFREQ_FLAG_INFO_ONLY) == 0) { /* * If acpi_perf doesn't have INFO_ONLY flag, * it will take care of pstate transitions. */ HWPSTATE_DEBUG(dev, "acpi_perf will take care of pstate transitions.\n"); return (ENXIO); } else { /* * If acpi_perf has INFO_ONLY flag, (_PCT has FFixedHW) * we can get _PSS info from acpi_perf * without going into ACPI. */ HWPSTATE_DEBUG(dev, "going to fetch info from acpi_perf\n"); error = hwpstate_get_info_from_acpi_perf(dev, perf_dev); } } } if (error == 0) { /* * Now we get _PSS info from acpi_perf without error. * Let's check it. */ msr = rdmsr(MSR_AMD_10H_11H_LIMIT); if (sc->cfnum != 1 + AMD_10H_11H_GET_PSTATE_MAX_VAL(msr)) { HWPSTATE_DEBUG(dev, "MSR (%jd) and ACPI _PSS (%d)" " count mismatch\n", (intmax_t)msr, sc->cfnum); error = TRUE; } } /* * If we cannot get info from acpi_perf, * Let's get info from MSRs. */ if (error) error = hwpstate_get_info_from_msr(dev); if (error) return (error); device_set_desc(dev, "Cool`n'Quiet 2.0"); return (0); } static int hwpstate_attach(device_t dev) { return (cpufreq_register(dev)); } static int hwpstate_get_info_from_msr(device_t dev) { struct hwpstate_softc *sc; struct hwpstate_setting *hwpstate_set; uint64_t msr; int family, i, fid, did; family = CPUID_TO_FAMILY(cpu_id); sc = device_get_softc(dev); /* Get pstate count */ msr = rdmsr(MSR_AMD_10H_11H_LIMIT); sc->cfnum = 1 + AMD_10H_11H_GET_PSTATE_MAX_VAL(msr); hwpstate_set = sc->hwpstate_settings; for (i = 0; i < sc->cfnum; i++) { msr = rdmsr(MSR_AMD_10H_11H_CONFIG + i); if ((msr & ((uint64_t)1 << 63)) == 0) { HWPSTATE_DEBUG(dev, "msr is not valid.\n"); return (ENXIO); } did = AMD_10H_11H_CUR_DID(msr); fid = AMD_10H_11H_CUR_FID(msr); /* Convert fid/did to frequency. */ switch (family) { case 0x11: hwpstate_set[i].freq = (100 * (fid + 0x08)) >> did; break; case 0x10: case 0x12: case 0x15: case 0x16: hwpstate_set[i].freq = (100 * (fid + 0x10)) >> did; break; case 0x17: case 0x18: did = AMD_17H_CUR_DID(msr); if (did == 0) { HWPSTATE_DEBUG(dev, "unexpected did: 0\n"); did = 1; } fid = AMD_17H_CUR_FID(msr); hwpstate_set[i].freq = (200 * fid) / did; break; default: HWPSTATE_DEBUG(dev, "get_info_from_msr: %s family" " 0x%02x CPUs are not supported yet\n", cpu_vendor_id == CPU_VENDOR_HYGON ? "Hygon" : "AMD", family); return (ENXIO); } hwpstate_set[i].pstate_id = i; /* There was volts calculation, but deleted it. */ hwpstate_set[i].volts = CPUFREQ_VAL_UNKNOWN; hwpstate_set[i].power = CPUFREQ_VAL_UNKNOWN; hwpstate_set[i].lat = CPUFREQ_VAL_UNKNOWN; } return (0); } static int hwpstate_get_info_from_acpi_perf(device_t dev, device_t perf_dev) { struct hwpstate_softc *sc; struct cf_setting *perf_set; struct hwpstate_setting *hwpstate_set; int count, error, i; perf_set = malloc(MAX_SETTINGS * sizeof(*perf_set), M_TEMP, M_NOWAIT); if (perf_set == NULL) { HWPSTATE_DEBUG(dev, "nomem\n"); return (ENOMEM); } /* * Fetch settings from acpi_perf. * Now it is attached, and has info only flag. */ count = MAX_SETTINGS; error = CPUFREQ_DRV_SETTINGS(perf_dev, perf_set, &count); if (error) { HWPSTATE_DEBUG(dev, "error: CPUFREQ_DRV_SETTINGS.\n"); goto out; } sc = device_get_softc(dev); sc->cfnum = count; hwpstate_set = sc->hwpstate_settings; for (i = 0; i < count; i++) { if (i == perf_set[i].spec[0]) { hwpstate_set[i].pstate_id = i; hwpstate_set[i].freq = perf_set[i].freq; hwpstate_set[i].volts = perf_set[i].volts; hwpstate_set[i].power = perf_set[i].power; hwpstate_set[i].lat = perf_set[i].lat; } else { HWPSTATE_DEBUG(dev, "ACPI _PSS object mismatch.\n"); error = ENXIO; goto out; } } out: if (perf_set) free(perf_set, M_TEMP); return (error); } static int hwpstate_detach(device_t dev) { hwpstate_goto_pstate(dev, 0); return (cpufreq_unregister(dev)); } static int hwpstate_shutdown(device_t dev) { /* hwpstate_goto_pstate(dev, 0); */ return (0); } static int hwpstate_features(driver_t *driver, u_int *features) { /* Notify the ACPI CPU that we support direct access to MSRs */ *features = ACPI_CAP_PERF_MSRS; return (0); } diff --git a/sys/x86/cpufreq/hwpstate_intel.c b/sys/x86/cpufreq/hwpstate_intel.c index e3d17aa0bd1c..1ed1056024c1 100644 --- a/sys/x86/cpufreq/hwpstate_intel.c +++ b/sys/x86/cpufreq/hwpstate_intel.c @@ -1,640 +1,638 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2018 Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted providing that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "acpi_if.h" #include "cpufreq_if.h" extern uint64_t tsc_freq; static int intel_hwpstate_probe(device_t dev); static int intel_hwpstate_attach(device_t dev); static int intel_hwpstate_detach(device_t dev); static int intel_hwpstate_suspend(device_t dev); static int intel_hwpstate_resume(device_t dev); static int intel_hwpstate_get(device_t dev, struct cf_setting *cf); static int intel_hwpstate_type(device_t dev, int *type); static device_method_t intel_hwpstate_methods[] = { /* Device interface */ DEVMETHOD(device_identify, intel_hwpstate_identify), DEVMETHOD(device_probe, intel_hwpstate_probe), DEVMETHOD(device_attach, intel_hwpstate_attach), DEVMETHOD(device_detach, intel_hwpstate_detach), DEVMETHOD(device_suspend, intel_hwpstate_suspend), DEVMETHOD(device_resume, intel_hwpstate_resume), /* cpufreq interface */ DEVMETHOD(cpufreq_drv_get, intel_hwpstate_get), DEVMETHOD(cpufreq_drv_type, intel_hwpstate_type), DEVMETHOD_END }; struct hwp_softc { device_t dev; bool hwp_notifications; bool hwp_activity_window; bool hwp_pref_ctrl; bool hwp_pkg_ctrl; bool hwp_pkg_ctrl_en; bool hwp_perf_bias; bool hwp_perf_bias_cached; uint64_t req; /* Cached copy of HWP_REQUEST */ uint64_t hwp_energy_perf_bias; /* Cache PERF_BIAS */ uint8_t high; uint8_t guaranteed; uint8_t efficient; uint8_t low; }; static devclass_t hwpstate_intel_devclass; static driver_t hwpstate_intel_driver = { "hwpstate_intel", intel_hwpstate_methods, sizeof(struct hwp_softc), }; DRIVER_MODULE(hwpstate_intel, cpu, hwpstate_intel_driver, hwpstate_intel_devclass, NULL, NULL); MODULE_VERSION(hwpstate_intel, 1); static bool hwpstate_pkg_ctrl_enable = true; SYSCTL_BOOL(_machdep, OID_AUTO, hwpstate_pkg_ctrl, CTLFLAG_RDTUN, &hwpstate_pkg_ctrl_enable, 0, "Set 1 (default) to enable package-level control, 0 to disable"); static int intel_hwp_dump_sysctl_handler(SYSCTL_HANDLER_ARGS) { device_t dev; struct pcpu *pc; struct sbuf *sb; struct hwp_softc *sc; uint64_t data, data2; int ret; sc = (struct hwp_softc *)arg1; dev = sc->dev; pc = cpu_get_pcpu(dev); if (pc == NULL) return (ENXIO); sb = sbuf_new(NULL, NULL, 1024, SBUF_FIXEDLEN | SBUF_INCLUDENUL); sbuf_putc(sb, '\n'); thread_lock(curthread); sched_bind(curthread, pc->pc_cpuid); thread_unlock(curthread); rdmsr_safe(MSR_IA32_PM_ENABLE, &data); sbuf_printf(sb, "CPU%d: HWP %sabled\n", pc->pc_cpuid, ((data & 1) ? "En" : "Dis")); if (data == 0) { ret = 0; goto out; } rdmsr_safe(MSR_IA32_HWP_CAPABILITIES, &data); sbuf_printf(sb, "\tHighest Performance: %03ju\n", data & 0xff); sbuf_printf(sb, "\tGuaranteed Performance: %03ju\n", (data >> 8) & 0xff); sbuf_printf(sb, "\tEfficient Performance: %03ju\n", (data >> 16) & 0xff); sbuf_printf(sb, "\tLowest Performance: %03ju\n", (data >> 24) & 0xff); rdmsr_safe(MSR_IA32_HWP_REQUEST, &data); data2 = 0; if (sc->hwp_pkg_ctrl && (data & IA32_HWP_REQUEST_PACKAGE_CONTROL)) rdmsr_safe(MSR_IA32_HWP_REQUEST_PKG, &data2); sbuf_putc(sb, '\n'); #define pkg_print(x, name, offset) do { \ if (!sc->hwp_pkg_ctrl || (data & x) != 0) \ sbuf_printf(sb, "\t%s: %03u\n", name, \ (unsigned)(data >> offset) & 0xff); \ else \ sbuf_printf(sb, "\t%s: %03u\n", name, \ (unsigned)(data2 >> offset) & 0xff); \ } while (0) pkg_print(IA32_HWP_REQUEST_EPP_VALID, "Requested Efficiency Performance Preference", 24); pkg_print(IA32_HWP_REQUEST_DESIRED_VALID, "Requested Desired Performance", 16); pkg_print(IA32_HWP_REQUEST_MAXIMUM_VALID, "Requested Maximum Performance", 8); pkg_print(IA32_HWP_REQUEST_MINIMUM_VALID, "Requested Minimum Performance", 0); #undef pkg_print sbuf_putc(sb, '\n'); out: thread_lock(curthread); sched_unbind(curthread); thread_unlock(curthread); ret = sbuf_finish(sb); if (ret == 0) ret = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb)); sbuf_delete(sb); return (ret); } static inline int percent_to_raw(int x) { MPASS(x <= 100 && x >= 0); return (0xff * x / 100); } /* * Given x * 10 in [0, 1000], round to the integer nearest x. * * This allows round-tripping nice human readable numbers through this * interface. Otherwise, user-provided percentages such as 25, 50, 75 get * rounded down to 24, 49, and 74, which is a bit ugly. */ static inline int round10(int xtimes10) { return ((xtimes10 + 5) / 10); } static inline int raw_to_percent(int x) { MPASS(x <= 0xff && x >= 0); return (round10(x * 1000 / 0xff)); } /* Range of MSR_IA32_ENERGY_PERF_BIAS is more limited: 0-0xf. */ static inline int percent_to_raw_perf_bias(int x) { /* * Round up so that raw values present as nice round human numbers and * also round-trip to the same raw value. */ MPASS(x <= 100 && x >= 0); return (((0xf * x) + 50) / 100); } static inline int raw_to_percent_perf_bias(int x) { /* Rounding to nice human numbers despite a step interval of 6.67%. */ MPASS(x <= 0xf && x >= 0); return (((x * 20) / 0xf) * 5); } static int sysctl_epp_select(SYSCTL_HANDLER_ARGS) { struct hwp_softc *sc; device_t dev; struct pcpu *pc; uint64_t epb; uint32_t val; int ret; dev = oidp->oid_arg1; sc = device_get_softc(dev); if (!sc->hwp_pref_ctrl && !sc->hwp_perf_bias) return (ENODEV); pc = cpu_get_pcpu(dev); if (pc == NULL) return (ENXIO); thread_lock(curthread); sched_bind(curthread, pc->pc_cpuid); thread_unlock(curthread); if (sc->hwp_pref_ctrl) { val = (sc->req & IA32_HWP_REQUEST_ENERGY_PERFORMANCE_PREFERENCE) >> 24; val = raw_to_percent(val); } else { /* * If cpuid indicates EPP is not supported, the HWP controller * uses MSR_IA32_ENERGY_PERF_BIAS instead (Intel SDM §14.4.4). * This register is per-core (but not HT). */ if (!sc->hwp_perf_bias_cached) { ret = rdmsr_safe(MSR_IA32_ENERGY_PERF_BIAS, &epb); if (ret) goto out; sc->hwp_energy_perf_bias = epb; sc->hwp_perf_bias_cached = true; } val = sc->hwp_energy_perf_bias & IA32_ENERGY_PERF_BIAS_POLICY_HINT_MASK; val = raw_to_percent_perf_bias(val); } MPASS(val >= 0 && val <= 100); ret = sysctl_handle_int(oidp, &val, 0, req); if (ret || req->newptr == NULL) goto out; if (val > 100) { ret = EINVAL; goto out; } if (sc->hwp_pref_ctrl) { val = percent_to_raw(val); sc->req = ((sc->req & ~IA32_HWP_REQUEST_ENERGY_PERFORMANCE_PREFERENCE) | (val << 24u)); if (sc->hwp_pkg_ctrl_en) ret = wrmsr_safe(MSR_IA32_HWP_REQUEST_PKG, sc->req); else ret = wrmsr_safe(MSR_IA32_HWP_REQUEST, sc->req); } else { val = percent_to_raw_perf_bias(val); MPASS((val & ~IA32_ENERGY_PERF_BIAS_POLICY_HINT_MASK) == 0); sc->hwp_energy_perf_bias = ((sc->hwp_energy_perf_bias & ~IA32_ENERGY_PERF_BIAS_POLICY_HINT_MASK) | val); ret = wrmsr_safe(MSR_IA32_ENERGY_PERF_BIAS, sc->hwp_energy_perf_bias); } out: thread_lock(curthread); sched_unbind(curthread); thread_unlock(curthread); return (ret); } void intel_hwpstate_identify(driver_t *driver, device_t parent) { if (device_find_child(parent, "hwpstate_intel", -1) != NULL) return; if (cpu_vendor_id != CPU_VENDOR_INTEL) return; if (resource_disabled("hwpstate_intel", 0)) return; /* * Intel SDM 14.4.1 (HWP Programming Interfaces): * Availability of HWP baseline resource and capability, * CPUID.06H:EAX[bit 7]: If this bit is set, HWP provides several new * architectural MSRs: IA32_PM_ENABLE, IA32_HWP_CAPABILITIES, * IA32_HWP_REQUEST, IA32_HWP_STATUS. */ if ((cpu_power_eax & CPUTPM1_HWP) == 0) return; - if (BUS_ADD_CHILD(parent, 10, "hwpstate_intel", -1) == NULL) - return; - - if (bootverbose) - device_printf(parent, "hwpstate registered\n"); + if (BUS_ADD_CHILD(parent, 10, "hwpstate_intel", device_get_unit(parent)) + == NULL) + device_printf(parent, "hwpstate_intel: add child failed\n"); } static int intel_hwpstate_probe(device_t dev) { device_set_desc(dev, "Intel Speed Shift"); return (BUS_PROBE_NOWILDCARD); } static int set_autonomous_hwp(struct hwp_softc *sc) { struct pcpu *pc; device_t dev; uint64_t caps; int ret; dev = sc->dev; pc = cpu_get_pcpu(dev); if (pc == NULL) return (ENXIO); thread_lock(curthread); sched_bind(curthread, pc->pc_cpuid); thread_unlock(curthread); /* XXX: Many MSRs aren't readable until feature is enabled */ ret = wrmsr_safe(MSR_IA32_PM_ENABLE, 1); if (ret) { /* * This is actually a package-level MSR, and only the first * write is not ignored. So it is harmless to enable it across * all devices, and this allows us not to care especially in * which order cores (and packages) are probed. This error * condition should not happen given we gate on the HWP CPUID * feature flag, if the Intel SDM is correct. */ device_printf(dev, "Failed to enable HWP for cpu%d (%d)\n", pc->pc_cpuid, ret); goto out; } ret = rdmsr_safe(MSR_IA32_HWP_REQUEST, &sc->req); if (ret) { device_printf(dev, "Failed to read HWP request MSR for cpu%d (%d)\n", pc->pc_cpuid, ret); goto out; } ret = rdmsr_safe(MSR_IA32_HWP_CAPABILITIES, &caps); if (ret) { device_printf(dev, "Failed to read HWP capabilities MSR for cpu%d (%d)\n", pc->pc_cpuid, ret); goto out; } /* * High and low are static; "guaranteed" is dynamic; and efficient is * also dynamic. */ sc->high = IA32_HWP_CAPABILITIES_HIGHEST_PERFORMANCE(caps); sc->guaranteed = IA32_HWP_CAPABILITIES_GUARANTEED_PERFORMANCE(caps); sc->efficient = IA32_HWP_CAPABILITIES_EFFICIENT_PERFORMANCE(caps); sc->low = IA32_HWP_CAPABILITIES_LOWEST_PERFORMANCE(caps); /* hardware autonomous selection determines the performance target */ sc->req &= ~IA32_HWP_DESIRED_PERFORMANCE; /* enable HW dynamic selection of window size */ sc->req &= ~IA32_HWP_ACTIVITY_WINDOW; /* IA32_HWP_REQUEST.Minimum_Performance = IA32_HWP_CAPABILITIES.Lowest_Performance */ sc->req &= ~IA32_HWP_MINIMUM_PERFORMANCE; sc->req |= sc->low; /* IA32_HWP_REQUEST.Maximum_Performance = IA32_HWP_CAPABILITIES.Highest_Performance. */ sc->req &= ~IA32_HWP_REQUEST_MAXIMUM_PERFORMANCE; sc->req |= sc->high << 8; /* If supported, request package-level control for this CPU. */ if (sc->hwp_pkg_ctrl_en) ret = wrmsr_safe(MSR_IA32_HWP_REQUEST, sc->req | IA32_HWP_REQUEST_PACKAGE_CONTROL); else ret = wrmsr_safe(MSR_IA32_HWP_REQUEST, sc->req); if (ret) { device_printf(dev, "Failed to setup%s autonomous HWP for cpu%d\n", sc->hwp_pkg_ctrl_en ? " PKG" : "", pc->pc_cpuid); goto out; } /* If supported, write the PKG-wide control MSR. */ if (sc->hwp_pkg_ctrl_en) { /* * "The structure of the IA32_HWP_REQUEST_PKG MSR * (package-level) is identical to the IA32_HWP_REQUEST MSR * with the exception of the Package Control field, which does * not exist." (Intel SDM §14.4.4) */ ret = wrmsr_safe(MSR_IA32_HWP_REQUEST_PKG, sc->req); if (ret) { device_printf(dev, "Failed to set autonomous HWP for package\n"); } } out: thread_lock(curthread); sched_unbind(curthread); thread_unlock(curthread); return (ret); } static int intel_hwpstate_attach(device_t dev) { struct hwp_softc *sc; int ret; sc = device_get_softc(dev); sc->dev = dev; /* eax */ if (cpu_power_eax & CPUTPM1_HWP_NOTIFICATION) sc->hwp_notifications = true; if (cpu_power_eax & CPUTPM1_HWP_ACTIVITY_WINDOW) sc->hwp_activity_window = true; if (cpu_power_eax & CPUTPM1_HWP_PERF_PREF) sc->hwp_pref_ctrl = true; if (cpu_power_eax & CPUTPM1_HWP_PKG) sc->hwp_pkg_ctrl = true; /* Allow administrators to disable pkg-level control. */ sc->hwp_pkg_ctrl_en = (sc->hwp_pkg_ctrl && hwpstate_pkg_ctrl_enable); /* ecx */ if (cpu_power_ecx & CPUID_PERF_BIAS) sc->hwp_perf_bias = true; ret = set_autonomous_hwp(sc); if (ret) return (ret); SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_STATIC_CHILDREN(_debug), OID_AUTO, device_get_nameunit(dev), CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_SKIP | CTLFLAG_NEEDGIANT, sc, 0, intel_hwp_dump_sysctl_handler, "A", ""); SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "epp", CTLTYPE_INT | CTLFLAG_RWTUN | CTLFLAG_NEEDGIANT, dev, 0, sysctl_epp_select, "I", "Efficiency/Performance Preference " "(range from 0, most performant, through 100, most efficient)"); return (cpufreq_register(dev)); } static int intel_hwpstate_detach(device_t dev) { return (cpufreq_unregister(dev)); } static int intel_hwpstate_get(device_t dev, struct cf_setting *set) { struct pcpu *pc; uint64_t rate; int ret; if (set == NULL) return (EINVAL); pc = cpu_get_pcpu(dev); if (pc == NULL) return (ENXIO); memset(set, CPUFREQ_VAL_UNKNOWN, sizeof(*set)); set->dev = dev; ret = cpu_est_clockrate(pc->pc_cpuid, &rate); if (ret == 0) set->freq = rate / 1000000; set->volts = CPUFREQ_VAL_UNKNOWN; set->power = CPUFREQ_VAL_UNKNOWN; set->lat = CPUFREQ_VAL_UNKNOWN; return (0); } static int intel_hwpstate_type(device_t dev, int *type) { if (type == NULL) return (EINVAL); *type = CPUFREQ_TYPE_ABSOLUTE | CPUFREQ_FLAG_INFO_ONLY | CPUFREQ_FLAG_UNCACHED; return (0); } static int intel_hwpstate_suspend(device_t dev) { return (0); } /* * Redo a subset of set_autonomous_hwp on resume; untested. Without this, * testers observed that on resume MSR_IA32_HWP_REQUEST was bogus. */ static int intel_hwpstate_resume(device_t dev) { struct hwp_softc *sc; struct pcpu *pc; int ret; sc = device_get_softc(dev); pc = cpu_get_pcpu(dev); if (pc == NULL) return (ENXIO); thread_lock(curthread); sched_bind(curthread, pc->pc_cpuid); thread_unlock(curthread); ret = wrmsr_safe(MSR_IA32_PM_ENABLE, 1); if (ret) { device_printf(dev, "Failed to enable HWP for cpu%d after suspend (%d)\n", pc->pc_cpuid, ret); goto out; } if (sc->hwp_pkg_ctrl_en) ret = wrmsr_safe(MSR_IA32_HWP_REQUEST, sc->req | IA32_HWP_REQUEST_PACKAGE_CONTROL); else ret = wrmsr_safe(MSR_IA32_HWP_REQUEST, sc->req); if (ret) { device_printf(dev, "Failed to set%s autonomous HWP for cpu%d after suspend\n", sc->hwp_pkg_ctrl_en ? " PKG" : "", pc->pc_cpuid); goto out; } if (sc->hwp_pkg_ctrl_en) { ret = wrmsr_safe(MSR_IA32_HWP_REQUEST_PKG, sc->req); if (ret) { device_printf(dev, "Failed to set autonomous HWP for package after " "suspend\n"); goto out; } } if (!sc->hwp_pref_ctrl && sc->hwp_perf_bias_cached) { ret = wrmsr_safe(MSR_IA32_ENERGY_PERF_BIAS, sc->hwp_energy_perf_bias); if (ret) { device_printf(dev, "Failed to set energy perf bias for cpu%d after " "suspend\n", pc->pc_cpuid); } } out: thread_lock(curthread); sched_unbind(curthread); thread_unlock(curthread); return (ret); } diff --git a/sys/x86/cpufreq/p4tcc.c b/sys/x86/cpufreq/p4tcc.c index 89bfef3f6930..4370da81cf06 100644 --- a/sys/x86/cpufreq/p4tcc.c +++ b/sys/x86/cpufreq/p4tcc.c @@ -1,348 +1,349 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2005 Nate Lawson * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * Throttle clock frequency by using the thermal control circuit. This * operates independently of SpeedStep and ACPI throttling and is supported * on Pentium 4 and later models (feature TM). * * Reference: Intel Developer's manual v.3 #245472-012 * * The original version of this driver was written by Ted Unangst for * OpenBSD and imported by Maxim Sobolev. It was rewritten by Nate Lawson * for use with the cpufreq framework. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include "cpufreq_if.h" #include #include #include "acpi_if.h" struct p4tcc_softc { device_t dev; int set_count; int lowest_val; int auto_mode; }; #define TCC_NUM_SETTINGS 8 #define TCC_ENABLE_ONDEMAND (1<<4) #define TCC_REG_OFFSET 1 #define TCC_SPEED_PERCENT(x) ((10000 * (x)) / TCC_NUM_SETTINGS) static int p4tcc_features(driver_t *driver, u_int *features); static void p4tcc_identify(driver_t *driver, device_t parent); static int p4tcc_probe(device_t dev); static int p4tcc_attach(device_t dev); static int p4tcc_detach(device_t dev); static int p4tcc_settings(device_t dev, struct cf_setting *sets, int *count); static int p4tcc_set(device_t dev, const struct cf_setting *set); static int p4tcc_get(device_t dev, struct cf_setting *set); static int p4tcc_type(device_t dev, int *type); static device_method_t p4tcc_methods[] = { /* Device interface */ DEVMETHOD(device_identify, p4tcc_identify), DEVMETHOD(device_probe, p4tcc_probe), DEVMETHOD(device_attach, p4tcc_attach), DEVMETHOD(device_detach, p4tcc_detach), /* cpufreq interface */ DEVMETHOD(cpufreq_drv_set, p4tcc_set), DEVMETHOD(cpufreq_drv_get, p4tcc_get), DEVMETHOD(cpufreq_drv_type, p4tcc_type), DEVMETHOD(cpufreq_drv_settings, p4tcc_settings), /* ACPI interface */ DEVMETHOD(acpi_get_features, p4tcc_features), {0, 0} }; static driver_t p4tcc_driver = { "p4tcc", p4tcc_methods, sizeof(struct p4tcc_softc), }; static devclass_t p4tcc_devclass; DRIVER_MODULE(p4tcc, cpu, p4tcc_driver, p4tcc_devclass, 0, 0); static int p4tcc_features(driver_t *driver, u_int *features) { /* Notify the ACPI CPU that we support direct access to MSRs */ *features = ACPI_CAP_THR_MSRS; return (0); } static void p4tcc_identify(driver_t *driver, device_t parent) { if ((cpu_feature & (CPUID_ACPI | CPUID_TM)) != (CPUID_ACPI | CPUID_TM)) return; /* Make sure we're not being doubly invoked. */ if (device_find_child(parent, "p4tcc", -1) != NULL) return; /* * We attach a p4tcc child for every CPU since settings need to * be performed on every CPU in the SMP case. See section 13.15.3 * of the IA32 Intel Architecture Software Developer's Manual, * Volume 3, for more info. */ - if (BUS_ADD_CHILD(parent, 10, "p4tcc", -1) == NULL) + if (BUS_ADD_CHILD(parent, 10, "p4tcc", device_get_unit(parent)) + == NULL) device_printf(parent, "add p4tcc child failed\n"); } static int p4tcc_probe(device_t dev) { if (resource_disabled("p4tcc", 0)) return (ENXIO); device_set_desc(dev, "CPU Frequency Thermal Control"); return (0); } static int p4tcc_attach(device_t dev) { struct p4tcc_softc *sc; struct cf_setting set; sc = device_get_softc(dev); sc->dev = dev; sc->set_count = TCC_NUM_SETTINGS; /* * On boot, the TCC is usually in Automatic mode where reading the * current performance level is likely to produce bogus results. * We record that state here and don't trust the contents of the * status MSR until we've set it ourselves. */ sc->auto_mode = TRUE; /* * XXX: After a cursory glance at various Intel specification * XXX: updates it seems like these tests for errata is bogus. * XXX: As far as I can tell, the failure mode is benign, in * XXX: that cpus with no errata will have their bottom two * XXX: STPCLK# rates disabled, so rather than waste more time * XXX: hunting down intel docs, just document it and punt. /phk */ switch (cpu_id & 0xff) { case 0x22: case 0x24: case 0x25: case 0x27: case 0x29: /* * These CPU models hang when set to 12.5%. * See Errata O50, P44, and Z21. */ sc->set_count -= 1; break; case 0x07: /* errata N44 and P18 */ case 0x0a: case 0x12: case 0x13: case 0x62: /* Pentium D B1: errata AA21 */ case 0x64: /* Pentium D C1: errata AA21 */ case 0x65: /* Pentium D D0: errata AA21 */ /* * These CPU models hang when set to 12.5% or 25%. * See Errata N44, P18l and AA21. */ sc->set_count -= 2; break; } sc->lowest_val = TCC_NUM_SETTINGS - sc->set_count + 1; /* * Before we finish attach, switch to 100%. It's possible the BIOS * set us to a lower rate. The user can override this after boot. */ set.freq = 10000; p4tcc_set(dev, &set); cpufreq_register(dev); return (0); } static int p4tcc_detach(device_t dev) { struct cf_setting set; int error; error = cpufreq_unregister(dev); if (error) return (error); /* * Before we finish detach, switch to Automatic mode. */ set.freq = 10000; p4tcc_set(dev, &set); return(0); } static int p4tcc_settings(device_t dev, struct cf_setting *sets, int *count) { struct p4tcc_softc *sc; int i, val; sc = device_get_softc(dev); if (sets == NULL || count == NULL) return (EINVAL); if (*count < sc->set_count) return (E2BIG); /* Return a list of valid settings for this driver. */ memset(sets, CPUFREQ_VAL_UNKNOWN, sizeof(*sets) * sc->set_count); val = TCC_NUM_SETTINGS; for (i = 0; i < sc->set_count; i++, val--) { sets[i].freq = TCC_SPEED_PERCENT(val); sets[i].dev = dev; } *count = sc->set_count; return (0); } static int p4tcc_set(device_t dev, const struct cf_setting *set) { struct p4tcc_softc *sc; uint64_t mask, msr; int val; if (set == NULL) return (EINVAL); sc = device_get_softc(dev); /* * Validate requested state converts to a setting that is an integer * from [sc->lowest_val .. TCC_NUM_SETTINGS]. */ val = set->freq * TCC_NUM_SETTINGS / 10000; if (val * 10000 != set->freq * TCC_NUM_SETTINGS || val < sc->lowest_val || val > TCC_NUM_SETTINGS) return (EINVAL); /* * Read the current register and mask off the old setting and * On-Demand bit. If the new val is < 100%, set it and the On-Demand * bit, otherwise just return to Automatic mode. */ msr = rdmsr(MSR_THERM_CONTROL); mask = (TCC_NUM_SETTINGS - 1) << TCC_REG_OFFSET; msr &= ~(mask | TCC_ENABLE_ONDEMAND); if (val < TCC_NUM_SETTINGS) msr |= (val << TCC_REG_OFFSET) | TCC_ENABLE_ONDEMAND; wrmsr(MSR_THERM_CONTROL, msr); /* * Record whether we're now in Automatic or On-Demand mode. We have * to cache this since there is no reliable way to check if TCC is in * Automatic mode (i.e., at 100% or possibly 50%). Reading bit 4 of * the ACPI Thermal Monitor Control Register produces 0 no matter * what the current mode. */ if (msr & TCC_ENABLE_ONDEMAND) sc->auto_mode = FALSE; else sc->auto_mode = TRUE; return (0); } static int p4tcc_get(device_t dev, struct cf_setting *set) { struct p4tcc_softc *sc; uint64_t msr; int val; if (set == NULL) return (EINVAL); sc = device_get_softc(dev); /* * Read the current register and extract the current setting. If * in automatic mode, assume we're at TCC_NUM_SETTINGS (100%). * * XXX This is not completely reliable since at high temperatures * the CPU may be automatically throttling to 50% but it's the best * we can do. */ if (!sc->auto_mode) { msr = rdmsr(MSR_THERM_CONTROL); val = (msr >> TCC_REG_OFFSET) & (TCC_NUM_SETTINGS - 1); } else val = TCC_NUM_SETTINGS; memset(set, CPUFREQ_VAL_UNKNOWN, sizeof(*set)); set->freq = TCC_SPEED_PERCENT(val); set->dev = dev; return (0); } static int p4tcc_type(device_t dev, int *type) { if (type == NULL) return (EINVAL); *type = CPUFREQ_TYPE_RELATIVE; return (0); } diff --git a/sys/x86/cpufreq/powernow.c b/sys/x86/cpufreq/powernow.c index da5598557387..977798ab2f77 100644 --- a/sys/x86/cpufreq/powernow.c +++ b/sys/x86/cpufreq/powernow.c @@ -1,969 +1,970 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2004-2005 Bruno Ducrot * Copyright (c) 2004 FUKUDA Nobuhiko * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /* * Many thanks to Nate Lawson for his helpful comments on this driver and * to Jung-uk Kim for testing. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "cpufreq_if.h" #define PN7_TYPE 0 #define PN8_TYPE 1 /* Flags for some hardware bugs. */ #define A0_ERRATA 0x1 /* Bugs for the rev. A0 of Athlon (K7): * Interrupts must be disabled and no half * multipliers are allowed */ #define PENDING_STUCK 0x2 /* With some buggy chipset and some newer AMD64 * processor (Rev. G?): * the pending bit from the msr FIDVID_STATUS * is set forever. No workaround :( */ /* Legacy configuration via BIOS table PSB. */ #define PSB_START 0 #define PSB_STEP 0x10 #define PSB_SIG "AMDK7PNOW!" #define PSB_LEN 10 #define PSB_OFF 0 struct psb_header { char signature[10]; uint8_t version; uint8_t flags; uint16_t settlingtime; uint8_t res1; uint8_t numpst; } __packed; struct pst_header { uint32_t cpuid; uint8_t fsb; uint8_t maxfid; uint8_t startvid; uint8_t numpstates; } __packed; /* * MSRs and bits used by Powernow technology */ #define MSR_AMDK7_FIDVID_CTL 0xc0010041 #define MSR_AMDK7_FIDVID_STATUS 0xc0010042 /* Bitfields used by K7 */ #define PN7_CTR_FID(x) ((x) & 0x1f) #define PN7_CTR_VID(x) (((x) & 0x1f) << 8) #define PN7_CTR_FIDC 0x00010000 #define PN7_CTR_VIDC 0x00020000 #define PN7_CTR_FIDCHRATIO 0x00100000 #define PN7_CTR_SGTC(x) (((uint64_t)(x) & 0x000fffff) << 32) #define PN7_STA_CFID(x) ((x) & 0x1f) #define PN7_STA_SFID(x) (((x) >> 8) & 0x1f) #define PN7_STA_MFID(x) (((x) >> 16) & 0x1f) #define PN7_STA_CVID(x) (((x) >> 32) & 0x1f) #define PN7_STA_SVID(x) (((x) >> 40) & 0x1f) #define PN7_STA_MVID(x) (((x) >> 48) & 0x1f) /* ACPI ctr_val status register to powernow k7 configuration */ #define ACPI_PN7_CTRL_TO_FID(x) ((x) & 0x1f) #define ACPI_PN7_CTRL_TO_VID(x) (((x) >> 5) & 0x1f) #define ACPI_PN7_CTRL_TO_SGTC(x) (((x) >> 10) & 0xffff) /* Bitfields used by K8 */ #define PN8_CTR_FID(x) ((x) & 0x3f) #define PN8_CTR_VID(x) (((x) & 0x1f) << 8) #define PN8_CTR_PENDING(x) (((x) & 1) << 32) #define PN8_STA_CFID(x) ((x) & 0x3f) #define PN8_STA_SFID(x) (((x) >> 8) & 0x3f) #define PN8_STA_MFID(x) (((x) >> 16) & 0x3f) #define PN8_STA_PENDING(x) (((x) >> 31) & 0x01) #define PN8_STA_CVID(x) (((x) >> 32) & 0x1f) #define PN8_STA_SVID(x) (((x) >> 40) & 0x1f) #define PN8_STA_MVID(x) (((x) >> 48) & 0x1f) /* Reserved1 to powernow k8 configuration */ #define PN8_PSB_TO_RVO(x) ((x) & 0x03) #define PN8_PSB_TO_IRT(x) (((x) >> 2) & 0x03) #define PN8_PSB_TO_MVS(x) (((x) >> 4) & 0x03) #define PN8_PSB_TO_BATT(x) (((x) >> 6) & 0x03) /* ACPI ctr_val status register to powernow k8 configuration */ #define ACPI_PN8_CTRL_TO_FID(x) ((x) & 0x3f) #define ACPI_PN8_CTRL_TO_VID(x) (((x) >> 6) & 0x1f) #define ACPI_PN8_CTRL_TO_VST(x) (((x) >> 11) & 0x1f) #define ACPI_PN8_CTRL_TO_MVS(x) (((x) >> 18) & 0x03) #define ACPI_PN8_CTRL_TO_PLL(x) (((x) >> 20) & 0x7f) #define ACPI_PN8_CTRL_TO_RVO(x) (((x) >> 28) & 0x03) #define ACPI_PN8_CTRL_TO_IRT(x) (((x) >> 30) & 0x03) #define WRITE_FIDVID(fid, vid, ctrl) \ wrmsr(MSR_AMDK7_FIDVID_CTL, \ (((ctrl) << 32) | (1ULL << 16) | ((vid) << 8) | (fid))) #define COUNT_OFF_IRT(irt) DELAY(10 * (1 << (irt))) #define COUNT_OFF_VST(vst) DELAY(20 * (vst)) #define FID_TO_VCO_FID(fid) \ (((fid) < 8) ? (8 + ((fid) << 1)) : (fid)) /* * Divide each value by 10 to get the processor multiplier. * Some of those tables are the same as the Linux powernow-k7 * implementation by Dave Jones. */ static int pn7_fid_to_mult[32] = { 110, 115, 120, 125, 50, 55, 60, 65, 70, 75, 80, 85, 90, 95, 100, 105, 30, 190, 40, 200, 130, 135, 140, 210, 150, 225, 160, 165, 170, 180, 0, 0, }; static int pn8_fid_to_mult[64] = { 40, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90, 95, 100, 105, 110, 115, 120, 125, 130, 135, 140, 145, 150, 155, 160, 165, 170, 175, 180, 185, 190, 195, 200, 205, 210, 215, 220, 225, 230, 235, 240, 245, 250, 255, 260, 265, 270, 275, 280, 285, 290, 295, 300, 305, 310, 315, 320, 325, 330, 335, 340, 345, 350, 355, }; /* * Units are in mV. */ /* Mobile VRM (K7) */ static int pn7_mobile_vid_to_volts[] = { 2000, 1950, 1900, 1850, 1800, 1750, 1700, 1650, 1600, 1550, 1500, 1450, 1400, 1350, 1300, 0, 1275, 1250, 1225, 1200, 1175, 1150, 1125, 1100, 1075, 1050, 1025, 1000, 975, 950, 925, 0, }; /* Desktop VRM (K7) */ static int pn7_desktop_vid_to_volts[] = { 2000, 1950, 1900, 1850, 1800, 1750, 1700, 1650, 1600, 1550, 1500, 1450, 1400, 1350, 1300, 0, 1275, 1250, 1225, 1200, 1175, 1150, 1125, 1100, 1075, 1050, 1025, 1000, 975, 950, 925, 0, }; /* Desktop and Mobile VRM (K8) */ static int pn8_vid_to_volts[] = { 1550, 1525, 1500, 1475, 1450, 1425, 1400, 1375, 1350, 1325, 1300, 1275, 1250, 1225, 1200, 1175, 1150, 1125, 1100, 1075, 1050, 1025, 1000, 975, 950, 925, 900, 875, 850, 825, 800, 0, }; #define POWERNOW_MAX_STATES 16 struct powernow_state { int freq; int power; int fid; int vid; }; struct pn_softc { device_t dev; int pn_type; struct powernow_state powernow_states[POWERNOW_MAX_STATES]; u_int fsb; u_int sgtc; u_int vst; u_int mvs; u_int pll; u_int rvo; u_int irt; int low; int powernow_max_states; u_int powernow_state; u_int errata; int *vid_to_volts; }; /* * Offsets in struct cf_setting array for private values given by * acpi_perf driver. */ #define PX_SPEC_CONTROL 0 #define PX_SPEC_STATUS 1 static void pn_identify(driver_t *driver, device_t parent); static int pn_probe(device_t dev); static int pn_attach(device_t dev); static int pn_detach(device_t dev); static int pn_set(device_t dev, const struct cf_setting *cf); static int pn_get(device_t dev, struct cf_setting *cf); static int pn_settings(device_t dev, struct cf_setting *sets, int *count); static int pn_type(device_t dev, int *type); static device_method_t pn_methods[] = { /* Device interface */ DEVMETHOD(device_identify, pn_identify), DEVMETHOD(device_probe, pn_probe), DEVMETHOD(device_attach, pn_attach), DEVMETHOD(device_detach, pn_detach), /* cpufreq interface */ DEVMETHOD(cpufreq_drv_set, pn_set), DEVMETHOD(cpufreq_drv_get, pn_get), DEVMETHOD(cpufreq_drv_settings, pn_settings), DEVMETHOD(cpufreq_drv_type, pn_type), {0, 0} }; static devclass_t pn_devclass; static driver_t pn_driver = { "powernow", pn_methods, sizeof(struct pn_softc), }; DRIVER_MODULE(powernow, cpu, pn_driver, pn_devclass, 0, 0); static int pn7_setfidvid(struct pn_softc *sc, int fid, int vid) { int cfid, cvid; uint64_t status, ctl; status = rdmsr(MSR_AMDK7_FIDVID_STATUS); cfid = PN7_STA_CFID(status); cvid = PN7_STA_CVID(status); /* We're already at the requested level. */ if (fid == cfid && vid == cvid) return (0); ctl = rdmsr(MSR_AMDK7_FIDVID_CTL) & PN7_CTR_FIDCHRATIO; ctl |= PN7_CTR_FID(fid); ctl |= PN7_CTR_VID(vid); ctl |= PN7_CTR_SGTC(sc->sgtc); if (sc->errata & A0_ERRATA) disable_intr(); if (pn7_fid_to_mult[fid] < pn7_fid_to_mult[cfid]) { wrmsr(MSR_AMDK7_FIDVID_CTL, ctl | PN7_CTR_FIDC); if (vid != cvid) wrmsr(MSR_AMDK7_FIDVID_CTL, ctl | PN7_CTR_VIDC); } else { wrmsr(MSR_AMDK7_FIDVID_CTL, ctl | PN7_CTR_VIDC); if (fid != cfid) wrmsr(MSR_AMDK7_FIDVID_CTL, ctl | PN7_CTR_FIDC); } if (sc->errata & A0_ERRATA) enable_intr(); return (0); } static int pn8_read_pending_wait(uint64_t *status) { int i = 10000; do *status = rdmsr(MSR_AMDK7_FIDVID_STATUS); while (PN8_STA_PENDING(*status) && --i); return (i == 0 ? ENXIO : 0); } static int pn8_write_fidvid(u_int fid, u_int vid, uint64_t ctrl, uint64_t *status) { int i = 100; do WRITE_FIDVID(fid, vid, ctrl); while (pn8_read_pending_wait(status) && --i); return (i == 0 ? ENXIO : 0); } static int pn8_setfidvid(struct pn_softc *sc, int fid, int vid) { uint64_t status; int cfid, cvid; int rvo; int rv; u_int val; rv = pn8_read_pending_wait(&status); if (rv) return (rv); cfid = PN8_STA_CFID(status); cvid = PN8_STA_CVID(status); if (fid == cfid && vid == cvid) return (0); /* * Phase 1: Raise core voltage to requested VID if frequency is * going up. */ while (cvid > vid) { val = cvid - (1 << sc->mvs); rv = pn8_write_fidvid(cfid, (val > 0) ? val : 0, 1ULL, &status); if (rv) { sc->errata |= PENDING_STUCK; return (rv); } cvid = PN8_STA_CVID(status); COUNT_OFF_VST(sc->vst); } /* ... then raise to voltage + RVO (if required) */ for (rvo = sc->rvo; rvo > 0 && cvid > 0; --rvo) { /* XXX It's not clear from spec if we have to do that * in 0.25 step or in MVS. Therefore do it as it's done * under Linux */ rv = pn8_write_fidvid(cfid, cvid - 1, 1ULL, &status); if (rv) { sc->errata |= PENDING_STUCK; return (rv); } cvid = PN8_STA_CVID(status); COUNT_OFF_VST(sc->vst); } /* Phase 2: change to requested core frequency */ if (cfid != fid) { u_int vco_fid, vco_cfid, fid_delta; vco_fid = FID_TO_VCO_FID(fid); vco_cfid = FID_TO_VCO_FID(cfid); while (abs(vco_fid - vco_cfid) > 2) { fid_delta = (vco_cfid & 1) ? 1 : 2; if (fid > cfid) { if (cfid > 7) val = cfid + fid_delta; else val = FID_TO_VCO_FID(cfid) + fid_delta; } else val = cfid - fid_delta; rv = pn8_write_fidvid(val, cvid, sc->pll * (uint64_t) sc->fsb, &status); if (rv) { sc->errata |= PENDING_STUCK; return (rv); } cfid = PN8_STA_CFID(status); COUNT_OFF_IRT(sc->irt); vco_cfid = FID_TO_VCO_FID(cfid); } rv = pn8_write_fidvid(fid, cvid, sc->pll * (uint64_t) sc->fsb, &status); if (rv) { sc->errata |= PENDING_STUCK; return (rv); } cfid = PN8_STA_CFID(status); COUNT_OFF_IRT(sc->irt); } /* Phase 3: change to requested voltage */ if (cvid != vid) { rv = pn8_write_fidvid(cfid, vid, 1ULL, &status); cvid = PN8_STA_CVID(status); COUNT_OFF_VST(sc->vst); } /* Check if transition failed. */ if (cfid != fid || cvid != vid) rv = ENXIO; return (rv); } static int pn_set(device_t dev, const struct cf_setting *cf) { struct pn_softc *sc; int fid, vid; int i; int rv; if (cf == NULL) return (EINVAL); sc = device_get_softc(dev); if (sc->errata & PENDING_STUCK) return (ENXIO); for (i = 0; i < sc->powernow_max_states; ++i) if (CPUFREQ_CMP(sc->powernow_states[i].freq / 1000, cf->freq)) break; fid = sc->powernow_states[i].fid; vid = sc->powernow_states[i].vid; rv = ENODEV; switch (sc->pn_type) { case PN7_TYPE: rv = pn7_setfidvid(sc, fid, vid); break; case PN8_TYPE: rv = pn8_setfidvid(sc, fid, vid); break; } return (rv); } static int pn_get(device_t dev, struct cf_setting *cf) { struct pn_softc *sc; u_int cfid = 0, cvid = 0; int i; uint64_t status; if (cf == NULL) return (EINVAL); sc = device_get_softc(dev); if (sc->errata & PENDING_STUCK) return (ENXIO); status = rdmsr(MSR_AMDK7_FIDVID_STATUS); switch (sc->pn_type) { case PN7_TYPE: cfid = PN7_STA_CFID(status); cvid = PN7_STA_CVID(status); break; case PN8_TYPE: cfid = PN8_STA_CFID(status); cvid = PN8_STA_CVID(status); break; } for (i = 0; i < sc->powernow_max_states; ++i) if (cfid == sc->powernow_states[i].fid && cvid == sc->powernow_states[i].vid) break; if (i < sc->powernow_max_states) { cf->freq = sc->powernow_states[i].freq / 1000; cf->power = sc->powernow_states[i].power; cf->lat = 200; cf->volts = sc->vid_to_volts[cvid]; cf->dev = dev; } else { memset(cf, CPUFREQ_VAL_UNKNOWN, sizeof(*cf)); cf->dev = NULL; } return (0); } static int pn_settings(device_t dev, struct cf_setting *sets, int *count) { struct pn_softc *sc; int i; if (sets == NULL|| count == NULL) return (EINVAL); sc = device_get_softc(dev); if (*count < sc->powernow_max_states) return (E2BIG); for (i = 0; i < sc->powernow_max_states; ++i) { sets[i].freq = sc->powernow_states[i].freq / 1000; sets[i].power = sc->powernow_states[i].power; sets[i].lat = 200; sets[i].volts = sc->vid_to_volts[sc->powernow_states[i].vid]; sets[i].dev = dev; } *count = sc->powernow_max_states; return (0); } static int pn_type(device_t dev, int *type) { if (type == NULL) return (EINVAL); *type = CPUFREQ_TYPE_ABSOLUTE; return (0); } /* * Given a set of pair of fid/vid, and number of performance states, * compute powernow_states via an insertion sort. */ static int decode_pst(struct pn_softc *sc, uint8_t *p, int npstates) { int i, j, n; struct powernow_state state; for (i = 0; i < POWERNOW_MAX_STATES; ++i) sc->powernow_states[i].freq = CPUFREQ_VAL_UNKNOWN; for (n = 0, i = 0; i < npstates; ++i) { state.fid = *p++; state.vid = *p++; state.power = CPUFREQ_VAL_UNKNOWN; switch (sc->pn_type) { case PN7_TYPE: state.freq = 100 * pn7_fid_to_mult[state.fid] * sc->fsb; if ((sc->errata & A0_ERRATA) && (pn7_fid_to_mult[state.fid] % 10) == 5) continue; break; case PN8_TYPE: state.freq = 100 * pn8_fid_to_mult[state.fid] * sc->fsb; break; } j = n; while (j > 0 && sc->powernow_states[j - 1].freq < state.freq) { memcpy(&sc->powernow_states[j], &sc->powernow_states[j - 1], sizeof(struct powernow_state)); --j; } memcpy(&sc->powernow_states[j], &state, sizeof(struct powernow_state)); ++n; } /* * Fix powernow_max_states, if errata a0 give us less states * than expected. */ sc->powernow_max_states = n; if (bootverbose) for (i = 0; i < sc->powernow_max_states; ++i) { int fid = sc->powernow_states[i].fid; int vid = sc->powernow_states[i].vid; printf("powernow: %2i %8dkHz FID %02x VID %02x\n", i, sc->powernow_states[i].freq, fid, vid); } return (0); } static int cpuid_is_k7(u_int cpuid) { switch (cpuid) { case 0x760: case 0x761: case 0x762: case 0x770: case 0x771: case 0x780: case 0x781: case 0x7a0: return (TRUE); } return (FALSE); } static int pn_decode_pst(device_t dev) { int maxpst; struct pn_softc *sc; u_int cpuid, maxfid, startvid; u_long sig; struct psb_header *psb; uint8_t *p; u_int regs[4]; uint64_t status; sc = device_get_softc(dev); do_cpuid(0x80000001, regs); cpuid = regs[0]; if ((cpuid & 0xfff) == 0x760) sc->errata |= A0_ERRATA; status = rdmsr(MSR_AMDK7_FIDVID_STATUS); switch (sc->pn_type) { case PN7_TYPE: maxfid = PN7_STA_MFID(status); startvid = PN7_STA_SVID(status); break; case PN8_TYPE: maxfid = PN8_STA_MFID(status); /* * we should actually use a variable named 'maxvid' if K8, * but why introducing a new variable for that? */ startvid = PN8_STA_MVID(status); break; default: return (ENODEV); } if (bootverbose) { device_printf(dev, "STATUS: 0x%jx\n", status); device_printf(dev, "STATUS: maxfid: 0x%02x\n", maxfid); device_printf(dev, "STATUS: %s: 0x%02x\n", sc->pn_type == PN7_TYPE ? "startvid" : "maxvid", startvid); } sig = bios_sigsearch(PSB_START, PSB_SIG, PSB_LEN, PSB_STEP, PSB_OFF); if (sig) { struct pst_header *pst; psb = (struct psb_header*)(uintptr_t)BIOS_PADDRTOVADDR(sig); switch (psb->version) { default: return (ENODEV); case 0x14: /* * We can't be picky about numpst since at least * some systems have a value of 1 and some have 2. * We trust that cpuid_is_k7() will be better at * catching that we're on a K8 anyway. */ if (sc->pn_type != PN8_TYPE) return (EINVAL); sc->vst = psb->settlingtime; sc->rvo = PN8_PSB_TO_RVO(psb->res1); sc->irt = PN8_PSB_TO_IRT(psb->res1); sc->mvs = PN8_PSB_TO_MVS(psb->res1); sc->low = PN8_PSB_TO_BATT(psb->res1); if (bootverbose) { device_printf(dev, "PSB: VST: %d\n", psb->settlingtime); device_printf(dev, "PSB: RVO %x IRT %d " "MVS %d BATT %d\n", sc->rvo, sc->irt, sc->mvs, sc->low); } break; case 0x12: if (sc->pn_type != PN7_TYPE) return (EINVAL); sc->sgtc = psb->settlingtime * sc->fsb; if (sc->sgtc < 100 * sc->fsb) sc->sgtc = 100 * sc->fsb; break; } p = ((uint8_t *) psb) + sizeof(struct psb_header); pst = (struct pst_header*) p; maxpst = 200; do { struct pst_header *pst = (struct pst_header*) p; if (cpuid == pst->cpuid && maxfid == pst->maxfid && startvid == pst->startvid) { sc->powernow_max_states = pst->numpstates; switch (sc->pn_type) { case PN7_TYPE: if (abs(sc->fsb - pst->fsb) > 5) continue; break; case PN8_TYPE: break; } return (decode_pst(sc, p + sizeof(struct pst_header), sc->powernow_max_states)); } p += sizeof(struct pst_header) + (2 * pst->numpstates); } while (cpuid_is_k7(pst->cpuid) && maxpst--); device_printf(dev, "no match for extended cpuid %.3x\n", cpuid); } return (ENODEV); } static int pn_decode_acpi(device_t dev, device_t perf_dev) { int i, j, n; uint64_t status; uint32_t ctrl; u_int cpuid; u_int regs[4]; struct pn_softc *sc; struct powernow_state state; struct cf_setting sets[POWERNOW_MAX_STATES]; int count = POWERNOW_MAX_STATES; int type; int rv; if (perf_dev == NULL) return (ENXIO); rv = CPUFREQ_DRV_SETTINGS(perf_dev, sets, &count); if (rv) return (ENXIO); rv = CPUFREQ_DRV_TYPE(perf_dev, &type); if (rv || (type & CPUFREQ_FLAG_INFO_ONLY) == 0) return (ENXIO); sc = device_get_softc(dev); do_cpuid(0x80000001, regs); cpuid = regs[0]; if ((cpuid & 0xfff) == 0x760) sc->errata |= A0_ERRATA; ctrl = 0; sc->sgtc = 0; for (n = 0, i = 0; i < count; ++i) { ctrl = sets[i].spec[PX_SPEC_CONTROL]; switch (sc->pn_type) { case PN7_TYPE: state.fid = ACPI_PN7_CTRL_TO_FID(ctrl); state.vid = ACPI_PN7_CTRL_TO_VID(ctrl); if ((sc->errata & A0_ERRATA) && (pn7_fid_to_mult[state.fid] % 10) == 5) continue; break; case PN8_TYPE: state.fid = ACPI_PN8_CTRL_TO_FID(ctrl); state.vid = ACPI_PN8_CTRL_TO_VID(ctrl); break; } state.freq = sets[i].freq * 1000; state.power = sets[i].power; j = n; while (j > 0 && sc->powernow_states[j - 1].freq < state.freq) { memcpy(&sc->powernow_states[j], &sc->powernow_states[j - 1], sizeof(struct powernow_state)); --j; } memcpy(&sc->powernow_states[j], &state, sizeof(struct powernow_state)); ++n; } sc->powernow_max_states = n; state = sc->powernow_states[0]; status = rdmsr(MSR_AMDK7_FIDVID_STATUS); switch (sc->pn_type) { case PN7_TYPE: sc->sgtc = ACPI_PN7_CTRL_TO_SGTC(ctrl); /* * XXX Some bios forget the max frequency! * This maybe indicates we have the wrong tables. Therefore, * don't implement a quirk, but fallback to BIOS legacy * tables instead. */ if (PN7_STA_MFID(status) != state.fid) { device_printf(dev, "ACPI MAX frequency not found\n"); return (EINVAL); } sc->fsb = state.freq / 100 / pn7_fid_to_mult[state.fid]; break; case PN8_TYPE: sc->vst = ACPI_PN8_CTRL_TO_VST(ctrl), sc->mvs = ACPI_PN8_CTRL_TO_MVS(ctrl), sc->pll = ACPI_PN8_CTRL_TO_PLL(ctrl), sc->rvo = ACPI_PN8_CTRL_TO_RVO(ctrl), sc->irt = ACPI_PN8_CTRL_TO_IRT(ctrl); sc->low = 0; /* XXX */ /* * powernow k8 supports only one low frequency. */ if (sc->powernow_max_states >= 2 && (sc->powernow_states[sc->powernow_max_states - 2].fid < 8)) return (EINVAL); sc->fsb = state.freq / 100 / pn8_fid_to_mult[state.fid]; break; } return (0); } static void pn_identify(driver_t *driver, device_t parent) { if ((amd_pminfo & AMDPM_FID) == 0 || (amd_pminfo & AMDPM_VID) == 0) return; switch (cpu_id & 0xf00) { case 0x600: case 0xf00: break; default: return; } if (device_find_child(parent, "powernow", -1) != NULL) return; - if (BUS_ADD_CHILD(parent, 10, "powernow", -1) == NULL) + if (BUS_ADD_CHILD(parent, 10, "powernow", device_get_unit(parent)) + == NULL) device_printf(parent, "powernow: add child failed\n"); } static int pn_probe(device_t dev) { struct pn_softc *sc; uint64_t status; uint64_t rate; struct pcpu *pc; u_int sfid, mfid, cfid; sc = device_get_softc(dev); sc->errata = 0; status = rdmsr(MSR_AMDK7_FIDVID_STATUS); pc = cpu_get_pcpu(dev); if (pc == NULL) return (ENODEV); cpu_est_clockrate(pc->pc_cpuid, &rate); switch (cpu_id & 0xf00) { case 0x600: sfid = PN7_STA_SFID(status); mfid = PN7_STA_MFID(status); cfid = PN7_STA_CFID(status); sc->pn_type = PN7_TYPE; sc->fsb = rate / 100000 / pn7_fid_to_mult[cfid]; /* * If start FID is different to max FID, then it is a * mobile processor. If not, it is a low powered desktop * processor. */ if (PN7_STA_SFID(status) != PN7_STA_MFID(status)) { sc->vid_to_volts = pn7_mobile_vid_to_volts; device_set_desc(dev, "PowerNow! K7"); } else { sc->vid_to_volts = pn7_desktop_vid_to_volts; device_set_desc(dev, "Cool`n'Quiet K7"); } break; case 0xf00: sfid = PN8_STA_SFID(status); mfid = PN8_STA_MFID(status); cfid = PN8_STA_CFID(status); sc->pn_type = PN8_TYPE; sc->vid_to_volts = pn8_vid_to_volts; sc->fsb = rate / 100000 / pn8_fid_to_mult[cfid]; if (PN8_STA_SFID(status) != PN8_STA_MFID(status)) device_set_desc(dev, "PowerNow! K8"); else device_set_desc(dev, "Cool`n'Quiet K8"); break; default: return (ENODEV); } return (0); } static int pn_attach(device_t dev) { int rv; device_t child; child = device_find_child(device_get_parent(dev), "acpi_perf", -1); if (child) { rv = pn_decode_acpi(dev, child); if (rv) rv = pn_decode_pst(dev); } else rv = pn_decode_pst(dev); if (rv != 0) return (ENXIO); cpufreq_register(dev); return (0); } static int pn_detach(device_t dev) { return (cpufreq_unregister(dev)); } diff --git a/sys/x86/cpufreq/smist.c b/sys/x86/cpufreq/smist.c index 309b725d31ad..129ec2d4884b 100644 --- a/sys/x86/cpufreq/smist.c +++ b/sys/x86/cpufreq/smist.c @@ -1,515 +1,516 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2005 Bruno Ducrot * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /* * This driver is based upon information found by examining speedstep-0.5 * from Marc Lehman, which includes all the reverse engineering effort of * Malik Martin (function 1 and 2 of the GSI). * * The correct way for the OS to take ownership from the BIOS was found by * Hiroshi Miura (function 0 of the GSI). * * Finally, the int 15h call interface was (partially) documented by Intel. * * Many thanks to Jon Noack for testing and debugging this driver. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "cpufreq_if.h" #if 0 #define DPRINT(dev, x...) device_printf(dev, x) #else #define DPRINT(dev, x...) #endif struct smist_softc { device_t dev; int smi_cmd; int smi_data; int command; int flags; struct cf_setting sets[2]; /* Only two settings. */ }; static char smist_magic[] = "Copyright (c) 1999 Intel Corporation"; static void smist_identify(driver_t *driver, device_t parent); static int smist_probe(device_t dev); static int smist_attach(device_t dev); static int smist_detach(device_t dev); static int smist_settings(device_t dev, struct cf_setting *sets, int *count); static int smist_set(device_t dev, const struct cf_setting *set); static int smist_get(device_t dev, struct cf_setting *set); static int smist_type(device_t dev, int *type); static device_method_t smist_methods[] = { /* Device interface */ DEVMETHOD(device_identify, smist_identify), DEVMETHOD(device_probe, smist_probe), DEVMETHOD(device_attach, smist_attach), DEVMETHOD(device_detach, smist_detach), /* cpufreq interface */ DEVMETHOD(cpufreq_drv_set, smist_set), DEVMETHOD(cpufreq_drv_get, smist_get), DEVMETHOD(cpufreq_drv_type, smist_type), DEVMETHOD(cpufreq_drv_settings, smist_settings), {0, 0} }; static driver_t smist_driver = { "smist", smist_methods, sizeof(struct smist_softc) }; static devclass_t smist_devclass; DRIVER_MODULE(smist, cpu, smist_driver, smist_devclass, 0, 0); struct piix4_pci_device { uint16_t vendor; uint16_t device; char *desc; }; static struct piix4_pci_device piix4_pci_devices[] = { {0x8086, 0x7113, "Intel PIIX4 ISA bridge"}, {0x8086, 0x719b, "Intel PIIX4 ISA bridge (embedded in MX440 chipset)"}, {0, 0, NULL}, }; #define SET_OWNERSHIP 0 #define GET_STATE 1 #define SET_STATE 2 static int int15_gsic_call(int *sig, int *smi_cmd, int *command, int *smi_data, int *flags) { struct vm86frame vmf; bzero(&vmf, sizeof(vmf)); vmf.vmf_eax = 0x0000E980; /* IST support */ vmf.vmf_edx = 0x47534943; /* 'GSIC' in ASCII */ vm86_intcall(0x15, &vmf); if (vmf.vmf_eax == 0x47534943) { *sig = vmf.vmf_eax; *smi_cmd = vmf.vmf_ebx & 0xff; *command = (vmf.vmf_ebx >> 16) & 0xff; *smi_data = vmf.vmf_ecx; *flags = vmf.vmf_edx; } else { *sig = -1; *smi_cmd = -1; *command = -1; *smi_data = -1; *flags = -1; } return (0); } /* Temporary structure to hold mapped page and status. */ struct set_ownership_data { int smi_cmd; int command; int result; void *buf; }; /* Perform actual SMI call to enable SpeedStep. */ static void set_ownership_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error) { struct set_ownership_data *data; data = arg; if (error) { data->result = error; return; } /* Copy in the magic string and send it by writing to the SMI port. */ strlcpy(data->buf, smist_magic, PAGE_SIZE); __asm __volatile( "movl $-1, %%edi\n\t" "out %%al, (%%dx)\n" : "=D" (data->result) : "a" (data->command), "b" (0), "c" (0), "d" (data->smi_cmd), "S" ((uint32_t)segs[0].ds_addr) ); } static int set_ownership(device_t dev) { struct smist_softc *sc; struct set_ownership_data cb_data; bus_dma_tag_t tag; bus_dmamap_t map; /* * Specify the region to store the magic string. Since its address is * passed to the BIOS in a 32-bit register, we have to make sure it is * located in a physical page below 4 GB (i.e., for PAE.) */ sc = device_get_softc(dev); if (bus_dma_tag_create(/*parent*/ NULL, /*alignment*/ PAGE_SIZE, /*no boundary*/ 0, /*lowaddr*/ BUS_SPACE_MAXADDR_32BIT, /*highaddr*/ BUS_SPACE_MAXADDR, NULL, NULL, /*maxsize*/ PAGE_SIZE, /*segments*/ 1, /*maxsegsize*/ PAGE_SIZE, 0, NULL, NULL, &tag) != 0) { device_printf(dev, "can't create mem tag\n"); return (ENXIO); } if (bus_dmamem_alloc(tag, &cb_data.buf, BUS_DMA_NOWAIT, &map) != 0) { bus_dma_tag_destroy(tag); device_printf(dev, "can't alloc mapped mem\n"); return (ENXIO); } /* Load the physical page map and take ownership in the callback. */ cb_data.smi_cmd = sc->smi_cmd; cb_data.command = sc->command; if (bus_dmamap_load(tag, map, cb_data.buf, PAGE_SIZE, set_ownership_cb, &cb_data, BUS_DMA_NOWAIT) != 0) { bus_dmamem_free(tag, cb_data.buf, map); bus_dma_tag_destroy(tag); device_printf(dev, "can't load mem\n"); return (ENXIO); } DPRINT(dev, "taking ownership over BIOS return %d\n", cb_data.result); bus_dmamap_unload(tag, map); bus_dmamem_free(tag, cb_data.buf, map); bus_dma_tag_destroy(tag); return (cb_data.result ? ENXIO : 0); } static int getset_state(struct smist_softc *sc, int *state, int function) { int new_state; int result; int eax; if (!sc) return (ENXIO); if (function != GET_STATE && function != SET_STATE) return (EINVAL); DPRINT(sc->dev, "calling GSI\n"); __asm __volatile( "movl $-1, %%edi\n\t" "out %%al, (%%dx)\n" : "=a" (eax), "=b" (new_state), "=D" (result) : "a" (sc->command), "b" (function), "c" (*state), "d" (sc->smi_cmd) ); DPRINT(sc->dev, "GSI returned: eax %.8x ebx %.8x edi %.8x\n", eax, new_state, result); *state = new_state & 1; switch (function) { case GET_STATE: if (eax) return (ENXIO); break; case SET_STATE: if (result) return (ENXIO); break; } return (0); } static void smist_identify(driver_t *driver, device_t parent) { struct piix4_pci_device *id; device_t piix4 = NULL; if (resource_disabled("ichst", 0)) return; /* Check for a supported processor */ if (cpu_vendor_id != CPU_VENDOR_INTEL) return; switch (cpu_id & 0xff0) { case 0x680: /* Pentium III [coppermine] */ case 0x6a0: /* Pentium III [Tualatin] */ break; default: return; } /* Check for a supported PCI-ISA bridge */ for (id = piix4_pci_devices; id->desc != NULL; ++id) { if ((piix4 = pci_find_device(id->vendor, id->device)) != NULL) break; } if (!piix4) return; if (bootverbose) printf("smist: found supported isa bridge %s\n", id->desc); if (device_find_child(parent, "smist", -1) != NULL) return; - if (BUS_ADD_CHILD(parent, 30, "smist", -1) == NULL) + if (BUS_ADD_CHILD(parent, 30, "smist", device_get_unit(parent)) + == NULL) device_printf(parent, "smist: add child failed\n"); } static int smist_probe(device_t dev) { struct smist_softc *sc; device_t ichss_dev, perf_dev; int sig, smi_cmd, command, smi_data, flags; int type; int rv; if (resource_disabled("smist", 0)) return (ENXIO); sc = device_get_softc(dev); /* * If the ACPI perf or ICH SpeedStep drivers have attached and not * just offering info, let them manage things. */ perf_dev = device_find_child(device_get_parent(dev), "acpi_perf", -1); if (perf_dev && device_is_attached(perf_dev)) { rv = CPUFREQ_DRV_TYPE(perf_dev, &type); if (rv == 0 && (type & CPUFREQ_FLAG_INFO_ONLY) == 0) return (ENXIO); } ichss_dev = device_find_child(device_get_parent(dev), "ichss", -1); if (ichss_dev && device_is_attached(ichss_dev)) return (ENXIO); int15_gsic_call(&sig, &smi_cmd, &command, &smi_data, &flags); if (bootverbose) device_printf(dev, "sig %.8x smi_cmd %.4x command %.2x " "smi_data %.4x flags %.8x\n", sig, smi_cmd, command, smi_data, flags); if (sig != -1) { sc->smi_cmd = smi_cmd; sc->smi_data = smi_data; /* * Sometimes int 15h 'GSIC' returns 0x80 for command, when * it is actually 0x82. The Windows driver will overwrite * this value given by the registry. */ if (command == 0x80) { device_printf(dev, "GSIC returned cmd 0x80, should be 0x82\n"); command = 0x82; } sc->command = (sig & 0xffffff00) | (command & 0xff); sc->flags = flags; } else { /* Give some default values */ sc->smi_cmd = 0xb2; sc->smi_data = 0xb3; sc->command = 0x47534982; sc->flags = 0; } device_set_desc(dev, "SpeedStep SMI"); return (-1500); } static int smist_attach(device_t dev) { struct smist_softc *sc; sc = device_get_softc(dev); sc->dev = dev; /* If we can't take ownership over BIOS, then bail out */ if (set_ownership(dev) != 0) return (ENXIO); /* Setup some defaults for our exported settings. */ sc->sets[0].freq = CPUFREQ_VAL_UNKNOWN; sc->sets[0].volts = CPUFREQ_VAL_UNKNOWN; sc->sets[0].power = CPUFREQ_VAL_UNKNOWN; sc->sets[0].lat = 1000; sc->sets[0].dev = dev; sc->sets[1] = sc->sets[0]; cpufreq_register(dev); return (0); } static int smist_detach(device_t dev) { return (cpufreq_unregister(dev)); } static int smist_settings(device_t dev, struct cf_setting *sets, int *count) { struct smist_softc *sc; struct cf_setting set; int first, i; if (sets == NULL || count == NULL) return (EINVAL); if (*count < 2) { *count = 2; return (E2BIG); } sc = device_get_softc(dev); /* * Estimate frequencies for both levels, temporarily switching to * the other one if we haven't calibrated it yet. */ for (i = 0; i < 2; i++) { if (sc->sets[i].freq == CPUFREQ_VAL_UNKNOWN) { first = (i == 0) ? 1 : 0; smist_set(dev, &sc->sets[i]); smist_get(dev, &set); smist_set(dev, &sc->sets[first]); } } bcopy(sc->sets, sets, sizeof(sc->sets)); *count = 2; return (0); } static int smist_set(device_t dev, const struct cf_setting *set) { struct smist_softc *sc; int rv, state, req_state, try; /* Look up appropriate bit value based on frequency. */ sc = device_get_softc(dev); if (CPUFREQ_CMP(set->freq, sc->sets[0].freq)) req_state = 0; else if (CPUFREQ_CMP(set->freq, sc->sets[1].freq)) req_state = 1; else return (EINVAL); DPRINT(dev, "requested setting %d\n", req_state); rv = getset_state(sc, &state, GET_STATE); if (state == req_state) return (0); try = 3; do { rv = getset_state(sc, &req_state, SET_STATE); /* Sleep for 200 microseconds. This value is just a guess. */ if (rv) DELAY(200); } while (rv && --try); DPRINT(dev, "set_state return %d, tried %d times\n", rv, 4 - try); return (rv); } static int smist_get(device_t dev, struct cf_setting *set) { struct smist_softc *sc; uint64_t rate; int state; int rv; sc = device_get_softc(dev); rv = getset_state(sc, &state, GET_STATE); if (rv != 0) return (rv); /* If we haven't changed settings yet, estimate the current value. */ if (sc->sets[state].freq == CPUFREQ_VAL_UNKNOWN) { cpu_est_clockrate(0, &rate); sc->sets[state].freq = rate / 1000000; DPRINT(dev, "get calibrated new rate of %d\n", sc->sets[state].freq); } *set = sc->sets[state]; return (0); } static int smist_type(device_t dev, int *type) { if (type == NULL) return (EINVAL); *type = CPUFREQ_TYPE_ABSOLUTE; return (0); }