Index: head/share/man/man4/hwpstate_intel.4 =================================================================== --- head/share/man/man4/hwpstate_intel.4 (revision 357378) +++ head/share/man/man4/hwpstate_intel.4 (revision 357379) @@ -1,89 +1,97 @@ .\" .\" Copyright (c) 2019 Intel Corporation .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" .\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" .\" $FreeBSD$ .\" -.Dd January 22, 2020 +.Dd February 1, 2020 .Dt HWPSTATE_INTEL 4 .Os .Sh NAME .Nm hwpstate_intel .Nd Intel Speed Shift Technology driver .Sh SYNOPSIS To compile this driver into your kernel place the following line in your kernel configuration file: .Bd -ragged -offset indent .Cd "device cpufreq" .Ed .Sh DESCRIPTION The .Nm driver provides support for hardware-controlled performance states on Intel platforms, also known as Intel Speed Shift Technology. .Sh LOADER TUNABLES .Bl -tag -width indent .It Va hint.hwpstate_intel.0.disabled Can be used to disable .Nm , allowing other compatible drivers to manage performance states, like .Xr est 4 . -.Pq default 0 +Defaults to +.Dv Qq 0 +(enabled). +.It Va machdep.hwpstate_pkg_ctrl +Selects between package-level control (the default) and per-core control. +.Dv Qq 1 +selects package-level control and +.Dv Qq 0 +selects core-level control. .El .Sh SYSCTL VARIABLES The following .Xr sysctl 8 values are available .Bl -tag -width indent .It Va dev.hwpstate_intel.%d.\%desc Describes the attached driver .It dev.hwpstate_intel.0.%desc: Intel Speed Shift .It Va dev.hwpstate_intel.%d.\%driver Driver in use, always hwpstate_intel. .It dev.hwpstate_intel.0.%driver: hwpstate_intel .It Va dev.hwpstate_intel.%d.\%parent .It dev.hwpstate_intel.0.%parent: cpu0 The cpu that is exposing these frequencies. For example .Va cpu0 . .It Va dev.hwpstate_intel.%d.epp Energy/Performance Preference. Valid values range from 0 to 100. Setting this field conveys a hint to the hardware regarding a preference towards performance (at value 0), energy efficiency (at value 100), or somewhere in between. .It dev.hwpstate_intel.0.epp: 0 .El .Sh COMPATIBILITY .Nm is only found on supported Intel CPUs. .Sh SEE ALSO .Xr cpufreq 4 .Rs .%T "Intel 64 and IA-32 Architectures Software Developer Manuals" .%U "http://www.intel.com/content/www/us/en/processors/architectures-software-developer-manuals.html" .Re .Sh AUTHORS This manual page was written by .An D Scott Phillips Aq Mt scottph@FreeBSD.org . Index: head/sys/x86/cpufreq/hwpstate_intel.c =================================================================== --- head/sys/x86/cpufreq/hwpstate_intel.c (revision 357378) +++ head/sys/x86/cpufreq/hwpstate_intel.c (revision 357379) @@ -1,574 +1,617 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2018 Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted providing that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "acpi_if.h" #include "cpufreq_if.h" extern uint64_t tsc_freq; static int intel_hwpstate_probe(device_t dev); static int intel_hwpstate_attach(device_t dev); static int intel_hwpstate_detach(device_t dev); static int intel_hwpstate_suspend(device_t dev); static int intel_hwpstate_resume(device_t dev); static int intel_hwpstate_get(device_t dev, struct cf_setting *cf); static int intel_hwpstate_type(device_t dev, int *type); static device_method_t intel_hwpstate_methods[] = { /* Device interface */ DEVMETHOD(device_identify, intel_hwpstate_identify), DEVMETHOD(device_probe, intel_hwpstate_probe), DEVMETHOD(device_attach, intel_hwpstate_attach), DEVMETHOD(device_detach, intel_hwpstate_detach), DEVMETHOD(device_suspend, intel_hwpstate_suspend), DEVMETHOD(device_resume, intel_hwpstate_resume), /* cpufreq interface */ DEVMETHOD(cpufreq_drv_get, intel_hwpstate_get), DEVMETHOD(cpufreq_drv_type, intel_hwpstate_type), DEVMETHOD_END }; struct hwp_softc { device_t dev; bool hwp_notifications; bool hwp_activity_window; bool hwp_pref_ctrl; bool hwp_pkg_ctrl; + bool hwp_pkg_ctrl_en; bool hwp_perf_bias; uint64_t req; /* Cached copy of last request */ uint8_t high; uint8_t guaranteed; uint8_t efficient; uint8_t low; }; static devclass_t hwpstate_intel_devclass; static driver_t hwpstate_intel_driver = { "hwpstate_intel", intel_hwpstate_methods, sizeof(struct hwp_softc), }; DRIVER_MODULE(hwpstate_intel, cpu, hwpstate_intel_driver, hwpstate_intel_devclass, NULL, NULL); MODULE_VERSION(hwpstate_intel, 1); +static bool hwpstate_pkg_ctrl_enable = true; +SYSCTL_BOOL(_machdep, OID_AUTO, hwpstate_pkg_ctrl, CTLFLAG_RDTUN, + &hwpstate_pkg_ctrl_enable, 0, + "Set 1 (default) to enable package-level control, 0 to disable"); + static int intel_hwp_dump_sysctl_handler(SYSCTL_HANDLER_ARGS) { device_t dev; struct pcpu *pc; struct sbuf *sb; struct hwp_softc *sc; uint64_t data, data2; int ret; sc = (struct hwp_softc *)arg1; dev = sc->dev; pc = cpu_get_pcpu(dev); if (pc == NULL) return (ENXIO); sb = sbuf_new(NULL, NULL, 1024, SBUF_FIXEDLEN | SBUF_INCLUDENUL); sbuf_putc(sb, '\n'); thread_lock(curthread); sched_bind(curthread, pc->pc_cpuid); thread_unlock(curthread); rdmsr_safe(MSR_IA32_PM_ENABLE, &data); sbuf_printf(sb, "CPU%d: HWP %sabled\n", pc->pc_cpuid, ((data & 1) ? "En" : "Dis")); if (data == 0) { ret = 0; goto out; } rdmsr_safe(MSR_IA32_HWP_CAPABILITIES, &data); sbuf_printf(sb, "\tHighest Performance: %03ju\n", data & 0xff); sbuf_printf(sb, "\tGuaranteed Performance: %03ju\n", (data >> 8) & 0xff); sbuf_printf(sb, "\tEfficient Performance: %03ju\n", (data >> 16) & 0xff); sbuf_printf(sb, "\tLowest Performance: %03ju\n", (data >> 24) & 0xff); rdmsr_safe(MSR_IA32_HWP_REQUEST, &data); data2 = 0; if (sc->hwp_pkg_ctrl && (data & IA32_HWP_REQUEST_PACKAGE_CONTROL)) rdmsr_safe(MSR_IA32_HWP_REQUEST_PKG, &data2); sbuf_putc(sb, '\n'); #define pkg_print(x, name, offset) do { \ if (!sc->hwp_pkg_ctrl || (data & x) != 0) \ sbuf_printf(sb, "\t%s: %03u\n", name, \ (unsigned)(data >> offset) & 0xff); \ else \ sbuf_printf(sb, "\t%s: %03u\n", name, \ (unsigned)(data2 >> offset) & 0xff); \ } while (0) pkg_print(IA32_HWP_REQUEST_EPP_VALID, "Requested Efficiency Performance Preference", 24); pkg_print(IA32_HWP_REQUEST_DESIRED_VALID, "Requested Desired Performance", 16); pkg_print(IA32_HWP_REQUEST_MAXIMUM_VALID, "Requested Maximum Performance", 8); pkg_print(IA32_HWP_REQUEST_MINIMUM_VALID, "Requested Minimum Performance", 0); #undef pkg_print sbuf_putc(sb, '\n'); out: thread_lock(curthread); sched_unbind(curthread); thread_unlock(curthread); ret = sbuf_finish(sb); if (ret == 0) ret = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb)); sbuf_delete(sb); return (ret); } static inline int percent_to_raw(int x) { MPASS(x <= 100 && x >= 0); return (0xff * x / 100); } /* * Given x * 10 in [0, 1000], round to the integer nearest x. * * This allows round-tripping nice human readable numbers through this * interface. Otherwise, user-provided percentages such as 25, 50, 75 get * rounded down to 24, 49, and 74, which is a bit ugly. */ static inline int round10(int xtimes10) { return ((xtimes10 + 5) / 10); } static inline int raw_to_percent(int x) { MPASS(x <= 0xff && x >= 0); return (round10(x * 1000 / 0xff)); } /* Range of MSR_IA32_ENERGY_PERF_BIAS is more limited: 0-0xf. */ static inline int percent_to_raw_perf_bias(int x) { /* * Round up so that raw values present as nice round human numbers and * also round-trip to the same raw value. */ MPASS(x <= 100 && x >= 0); return (((0xf * x) + 50) / 100); } static inline int raw_to_percent_perf_bias(int x) { /* Rounding to nice human numbers despite a step interval of 6.67%. */ MPASS(x <= 0xf && x >= 0); return (((x * 20) / 0xf) * 5); } static int sysctl_epp_select(SYSCTL_HANDLER_ARGS) { struct hwp_softc *sc; device_t dev; struct pcpu *pc; uint64_t requested; uint32_t val; int ret; dev = oidp->oid_arg1; sc = device_get_softc(dev); if (!sc->hwp_pref_ctrl && !sc->hwp_perf_bias) return (ENODEV); pc = cpu_get_pcpu(dev); if (pc == NULL) return (ENXIO); thread_lock(curthread); sched_bind(curthread, pc->pc_cpuid); thread_unlock(curthread); if (sc->hwp_pref_ctrl) { - ret = rdmsr_safe(MSR_IA32_HWP_REQUEST, &requested); + if (sc->hwp_pkg_ctrl_en) + ret = rdmsr_safe(MSR_IA32_HWP_REQUEST_PKG, &requested); + else + ret = rdmsr_safe(MSR_IA32_HWP_REQUEST, &requested); if (ret) goto out; val = (requested & IA32_HWP_REQUEST_ENERGY_PERFORMANCE_PREFERENCE) >> 24; val = raw_to_percent(val); } else { /* * If cpuid indicates EPP is not supported, the HWP controller * uses MSR_IA32_ENERGY_PERF_BIAS instead (Intel SDM §14.4.4). * This register is per-core (but not HT). */ ret = rdmsr_safe(MSR_IA32_ENERGY_PERF_BIAS, &requested); if (ret) goto out; val = requested & IA32_ENERGY_PERF_BIAS_POLICY_HINT_MASK; val = raw_to_percent_perf_bias(val); } MPASS(val >= 0 && val <= 100); ret = sysctl_handle_int(oidp, &val, 0, req); if (ret || req->newptr == NULL) goto out; if (val > 100) { ret = EINVAL; goto out; } if (sc->hwp_pref_ctrl) { val = percent_to_raw(val); requested &= ~IA32_HWP_REQUEST_ENERGY_PERFORMANCE_PREFERENCE; - requested |= val << 24; + requested |= val << 24u; - ret = wrmsr_safe(MSR_IA32_HWP_REQUEST, requested); + if (sc->hwp_pkg_ctrl_en) + ret = wrmsr_safe(MSR_IA32_HWP_REQUEST_PKG, requested); + else + ret = wrmsr_safe(MSR_IA32_HWP_REQUEST, requested); } else { requested = percent_to_raw_perf_bias(val); MPASS((requested & ~IA32_ENERGY_PERF_BIAS_POLICY_HINT_MASK) == 0); ret = wrmsr_safe(MSR_IA32_ENERGY_PERF_BIAS, requested); } out: thread_lock(curthread); sched_unbind(curthread); thread_unlock(curthread); return (ret); } void intel_hwpstate_identify(driver_t *driver, device_t parent) { if (device_find_child(parent, "hwpstate_intel", -1) != NULL) return; if (cpu_vendor_id != CPU_VENDOR_INTEL) return; if (resource_disabled("hwpstate_intel", 0)) return; /* * Intel SDM 14.4.1 (HWP Programming Interfaces): * Availability of HWP baseline resource and capability, * CPUID.06H:EAX[bit 7]: If this bit is set, HWP provides several new * architectural MSRs: IA32_PM_ENABLE, IA32_HWP_CAPABILITIES, * IA32_HWP_REQUEST, IA32_HWP_STATUS. */ if ((cpu_power_eax & CPUTPM1_HWP) == 0) return; if (BUS_ADD_CHILD(parent, 10, "hwpstate_intel", -1) == NULL) return; if (bootverbose) device_printf(parent, "hwpstate registered\n"); } static int intel_hwpstate_probe(device_t dev) { device_set_desc(dev, "Intel Speed Shift"); return (BUS_PROBE_NOWILDCARD); } -/* FIXME: Need to support PKG variant */ static int set_autonomous_hwp(struct hwp_softc *sc) { struct pcpu *pc; device_t dev; uint64_t caps; int ret; dev = sc->dev; pc = cpu_get_pcpu(dev); if (pc == NULL) return (ENXIO); thread_lock(curthread); sched_bind(curthread, pc->pc_cpuid); thread_unlock(curthread); /* XXX: Many MSRs aren't readable until feature is enabled */ ret = wrmsr_safe(MSR_IA32_PM_ENABLE, 1); if (ret) { /* * This is actually a package-level MSR, and only the first * write is not ignored. So it is harmless to enable it across * all devices, and this allows us not to care especially in * which order cores (and packages) are probed. This error * condition should not happen given we gate on the HWP CPUID * feature flag, if the Intel SDM is correct. */ device_printf(dev, "Failed to enable HWP for cpu%d (%d)\n", pc->pc_cpuid, ret); goto out; } ret = rdmsr_safe(MSR_IA32_HWP_REQUEST, &sc->req); if (ret) { device_printf(dev, "Failed to read HWP request MSR for cpu%d (%d)\n", pc->pc_cpuid, ret); goto out; } ret = rdmsr_safe(MSR_IA32_HWP_CAPABILITIES, &caps); if (ret) { device_printf(dev, "Failed to read HWP capabilities MSR for cpu%d (%d)\n", pc->pc_cpuid, ret); goto out; } /* * High and low are static; "guaranteed" is dynamic; and efficient is * also dynamic. */ sc->high = IA32_HWP_CAPABILITIES_HIGHEST_PERFORMANCE(caps); sc->guaranteed = IA32_HWP_CAPABILITIES_GUARANTEED_PERFORMANCE(caps); sc->efficient = IA32_HWP_CAPABILITIES_EFFICIENT_PERFORMANCE(caps); sc->low = IA32_HWP_CAPABILITIES_LOWEST_PERFORMANCE(caps); /* hardware autonomous selection determines the performance target */ sc->req &= ~IA32_HWP_DESIRED_PERFORMANCE; /* enable HW dynamic selection of window size */ sc->req &= ~IA32_HWP_ACTIVITY_WINDOW; /* IA32_HWP_REQUEST.Minimum_Performance = IA32_HWP_CAPABILITIES.Lowest_Performance */ sc->req &= ~IA32_HWP_MINIMUM_PERFORMANCE; sc->req |= sc->low; /* IA32_HWP_REQUEST.Maximum_Performance = IA32_HWP_CAPABILITIES.Highest_Performance. */ sc->req &= ~IA32_HWP_REQUEST_MAXIMUM_PERFORMANCE; sc->req |= sc->high << 8; - ret = wrmsr_safe(MSR_IA32_HWP_REQUEST, sc->req); + /* If supported, request package-level control for this CPU. */ + if (sc->hwp_pkg_ctrl_en) + ret = wrmsr_safe(MSR_IA32_HWP_REQUEST, sc->req | + IA32_HWP_REQUEST_PACKAGE_CONTROL); + else + ret = wrmsr_safe(MSR_IA32_HWP_REQUEST, sc->req); if (ret) { device_printf(dev, - "Failed to setup autonomous HWP for cpu%d\n", - pc->pc_cpuid); + "Failed to setup%s autonomous HWP for cpu%d\n", + sc->hwp_pkg_ctrl_en ? " PKG" : "", pc->pc_cpuid); + goto out; } + /* If supported, write the PKG-wide control MSR. */ + if (sc->hwp_pkg_ctrl_en) { + /* + * "The structure of the IA32_HWP_REQUEST_PKG MSR + * (package-level) is identical to the IA32_HWP_REQUEST MSR + * with the exception of the Package Control field, which does + * not exist." (Intel SDM §14.4.4) + */ + ret = wrmsr_safe(MSR_IA32_HWP_REQUEST_PKG, sc->req); + device_printf(dev, + "Failed to set autonomous HWP for package\n"); + } + out: thread_lock(curthread); sched_unbind(curthread); thread_unlock(curthread); return (ret); } static int intel_hwpstate_attach(device_t dev) { struct hwp_softc *sc; int ret; sc = device_get_softc(dev); sc->dev = dev; /* eax */ if (cpu_power_eax & CPUTPM1_HWP_NOTIFICATION) sc->hwp_notifications = true; if (cpu_power_eax & CPUTPM1_HWP_ACTIVITY_WINDOW) sc->hwp_activity_window = true; if (cpu_power_eax & CPUTPM1_HWP_PERF_PREF) sc->hwp_pref_ctrl = true; if (cpu_power_eax & CPUTPM1_HWP_PKG) sc->hwp_pkg_ctrl = true; + /* Allow administrators to disable pkg-level control. */ + sc->hwp_pkg_ctrl_en = (sc->hwp_pkg_ctrl && hwpstate_pkg_ctrl_enable); + /* ecx */ if (cpu_power_ecx & CPUID_PERF_BIAS) sc->hwp_perf_bias = true; ret = set_autonomous_hwp(sc); if (ret) return (ret); SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_STATIC_CHILDREN(_debug), OID_AUTO, device_get_nameunit(dev), CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_SKIP, sc, 0, intel_hwp_dump_sysctl_handler, "A", ""); SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "epp", CTLTYPE_INT | CTLFLAG_RWTUN, dev, 0, sysctl_epp_select, "I", "Efficiency/Performance Preference " "(range from 0, most performant, through 100, most efficient)"); return (cpufreq_register(dev)); } static int intel_hwpstate_detach(device_t dev) { return (cpufreq_unregister(dev)); } static int intel_hwpstate_get(device_t dev, struct cf_setting *set) { struct pcpu *pc; uint64_t rate; int ret; if (set == NULL) return (EINVAL); pc = cpu_get_pcpu(dev); if (pc == NULL) return (ENXIO); memset(set, CPUFREQ_VAL_UNKNOWN, sizeof(*set)); set->dev = dev; ret = cpu_est_clockrate(pc->pc_cpuid, &rate); if (ret == 0) set->freq = rate / 1000000; set->volts = CPUFREQ_VAL_UNKNOWN; set->power = CPUFREQ_VAL_UNKNOWN; set->lat = CPUFREQ_VAL_UNKNOWN; return (0); } static int intel_hwpstate_type(device_t dev, int *type) { if (type == NULL) return (EINVAL); *type = CPUFREQ_TYPE_ABSOLUTE | CPUFREQ_FLAG_INFO_ONLY | CPUFREQ_FLAG_UNCACHED; return (0); } static int intel_hwpstate_suspend(device_t dev) { return (0); } /* * Redo a subset of set_autonomous_hwp on resume; untested. Without this, * testers observed that on resume MSR_IA32_HWP_REQUEST was bogus. */ static int intel_hwpstate_resume(device_t dev) { struct hwp_softc *sc; struct pcpu *pc; int ret; sc = device_get_softc(dev); pc = cpu_get_pcpu(dev); if (pc == NULL) return (ENXIO); thread_lock(curthread); sched_bind(curthread, pc->pc_cpuid); thread_unlock(curthread); ret = wrmsr_safe(MSR_IA32_PM_ENABLE, 1); if (ret) { device_printf(dev, "Failed to enable HWP for cpu%d after suspend (%d)\n", pc->pc_cpuid, ret); goto out; } - ret = wrmsr_safe(MSR_IA32_HWP_REQUEST, sc->req); + if (sc->hwp_pkg_ctrl_en) + ret = wrmsr_safe(MSR_IA32_HWP_REQUEST, sc->req | + IA32_HWP_REQUEST_PACKAGE_CONTROL); + else + ret = wrmsr_safe(MSR_IA32_HWP_REQUEST, sc->req); if (ret) { device_printf(dev, - "Failed to setup autonomous HWP for cpu%d after suspend\n", - pc->pc_cpuid); + "Failed to set%s autonomous HWP for cpu%d after suspend\n", + sc->hwp_pkg_ctrl_en ? " PKG" : "", pc->pc_cpuid); + goto out; + } + if (sc->hwp_pkg_ctrl_en) { + ret = wrmsr_safe(MSR_IA32_HWP_REQUEST_PKG, sc->req); + device_printf(dev, + "Failed to set autonomous HWP for package after suspend\n"); } out: thread_lock(curthread); sched_unbind(curthread); thread_unlock(curthread); return (ret); }