Changeset View
Standalone View
sys/x86/cpufreq/hwpstate_intel.c
- This file was added.
/*- | |||||
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD | |||||
* | |||||
* Copyright (c) 2018 Intel Corporation | |||||
* | |||||
* Redistribution and use in source and binary forms, with or without | |||||
* modification, are permitted providing that the following conditions | |||||
* are met: | |||||
* 1. Redistributions of source code must retain the above copyright | |||||
* notice, this list of conditions and the following disclaimer. | |||||
* 2. Redistributions in binary form must reproduce the above copyright | |||||
* notice, this list of conditions and the following disclaimer in the | |||||
* documentation and/or other materials provided with the distribution. | |||||
* | |||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR``AS IS'' AND ANY EXPRESS OR | |||||
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED | |||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY | |||||
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, | |||||
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING | |||||
* IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | |||||
* POSSIBILITY OF SUCH DAMAGE. | |||||
*/ | |||||
#include <sys/cdefs.h> | |||||
__FBSDID("$FreeBSD$"); | |||||
#include <sys/types.h> | |||||
#include <sys/sbuf.h> | |||||
#include <sys/module.h> | |||||
#include <sys/systm.h> | |||||
#include <sys/errno.h> | |||||
#include <sys/param.h> | |||||
#include <sys/kernel.h> | |||||
#include <sys/bus.h> | |||||
#include <sys/cpu.h> | |||||
#include <sys/smp.h> | |||||
#include <sys/proc.h> | |||||
#include <sys/sched.h> | |||||
#include <machine/cpu.h> | |||||
#include <machine/md_var.h> | |||||
#include <machine/cputypes.h> | |||||
#include <machine/specialreg.h> | |||||
#include <contrib/dev/acpica/include/acpi.h> | |||||
#include <dev/acpica/acpivar.h> | |||||
#include "acpi_if.h" | |||||
#include "cpufreq_if.h" | |||||
kib: The header is already included at line 43 above. Is the second time needed ? | |||||
extern uint64_t tsc_freq; | |||||
Done Inline ActionsIf you export the value with SYSCTL_UINT, then the type of the var better be uint. Or use SYSCTL_U32. kib: If you export the value with SYSCTL_UINT, then the type of the var better be uint. Or use… | |||||
bool intel_speed_shift = true; | |||||
cemUnsubmitted Done Inline ActionsGiven the core kernel refers to this variable (ACPI), this driver cannot be compiled out (even as a module). I mean, I've said before and will repeat that I think this mechanism should die anyway, but — as-is, it creates some additional limitations. (E.g., the "device cpufreq" line in the new manual page is entirely moot since an amd64 kernel without the cpufreq device won't link.) cem: Given the core kernel refers to this variable (ACPI), this driver cannot be compiled out (even… | |||||
SYSCTL_BOOL(_machdep, OID_AUTO, intel_speed_shift, CTLFLAG_RDTUN, &intel_speed_shift, | |||||
Done Inline ActionsThis doesn't match the extern uint32_t declarations of the variable. Style(9) nit: spaces around = operator. cem: This doesn't match the `extern uint32_t` declarations of the variable.
Style(9) nit: spaces… | |||||
0, "Enable Intel Speed Shift (HWP)"); | |||||
static void intel_hwpstate_identify(driver_t *driver, device_t parent); | |||||
static int intel_hwpstate_probe(device_t dev); | |||||
static int intel_hwpstate_attach(device_t dev); | |||||
static int intel_hwpstate_detach(device_t dev); | |||||
static int intel_hwpstate_get(device_t dev, struct cf_setting *cf); | |||||
static int intel_hwpstate_type(device_t dev, int *type); | |||||
Done Inline ActionsI'd add a comment referring to est_identify, as well as explaining why we do this funky thing for now. And IMO, the preferred commented-out code style is #if 0 / foo / #endif, but I'm not sure if that's just personal preference or something we actually follow consistently in the tree. cem: I'd add a comment referring to est_identify, as well as explaining why we do this funky thing… | |||||
static device_method_t intel_hwpstate_methods[] = { | |||||
/* Device interface */ | |||||
DEVMETHOD(device_identify, intel_hwpstate_identify), | |||||
DEVMETHOD(device_probe, intel_hwpstate_probe), | |||||
DEVMETHOD(device_attach, intel_hwpstate_attach), | |||||
DEVMETHOD(device_detach, intel_hwpstate_detach), | |||||
/* cpufreq interface */ | |||||
DEVMETHOD(cpufreq_drv_get, intel_hwpstate_get), | |||||
DEVMETHOD(cpufreq_drv_type, intel_hwpstate_type), | |||||
DEVMETHOD_END | |||||
}; | |||||
Done Inline ActionsDEVMETHOD_END is canonical. I don't feel strongly about it, though. cem: `DEVMETHOD_END` is canonical. I don't feel strongly about it, though. | |||||
struct hwp_softc { | |||||
device_t dev; | |||||
bool hwp_notifications; | |||||
bool hwp_activity_window; | |||||
bool hwp_pref_ctrl; | |||||
bool hwp_pkg_ctrl; | |||||
uint64_t req; /* Cached copy of last request */ | |||||
uint8_t high; | |||||
uint8_t guaranteed; | |||||
uint8_t efficient; | |||||
uint8_t low; | |||||
}; | |||||
static devclass_t hwpstate_intel_devclass; | |||||
static driver_t hwpstate_intel_driver = { | |||||
"hwpstate_intel", | |||||
intel_hwpstate_methods, | |||||
sizeof(struct hwp_softc), | |||||
}; | |||||
Not Done Inline ActionsMinor style nit: we prefer NULL for null pointer values. cem: Minor style nit: we prefer `NULL` for null pointer values. | |||||
/* | |||||
* NB: This must run before the est and acpi_perf module!!!! | |||||
Done Inline ActionsThis comment is now mildly stale. cem: This comment is now mildly stale. | |||||
Done Inline Actionswe shouldn't be dependent on the ordering now, so i've taken away the order and the comment. scottph: we shouldn't be dependent on the ordering now, so i've taken away the order and the comment. | |||||
Not Done Inline Actionsperfect cem: perfect | |||||
* | |||||
* If a user opts in to hwp, but the CPU doesn't support it, we need to find that | |||||
* out before est loads or else we won't be able to use est as a backup. | |||||
*/ | |||||
DRIVER_MODULE_ORDERED(hwpstate_intel, cpu, hwpstate_intel_driver, | |||||
hwpstate_intel_devclass, 0, 0, SI_ORDER_FIRST); | |||||
Done Inline ActionsThe canonical way to do something like this would be with a DEVICE_PROBE(9) implementation. There is a manual page and also some documentation in kern/device_if.m. All drivers that identify themselves as candidates for a given devclass are PROBEd to determine which driver should be attached. It is possible for multiple drivers to successfully PROBE a given device; the priority is used to select the best driver for a given device. For Intel CPUs, you would have est probe a lower priority and hwpstate_intel probe a higher priority (if available), on the same device or pseudo-device. cem: The canonical way to do something like this would be with a `DEVICE_PROBE(9)` implementation. | |||||
Done Inline ActionsI do agree what's there now is a kludge, however, the way that the device tree looks today is that cpufreq is a child of cpu, and the controller driver (in this case hwpstate_intel) is also a child of cpu. Each controller driver has its own devclass. So that leaves a few options. The most obvious are:
I am/was in favor of #2, however it was a pretty large change and the quick and dirty way (which jhb@ recommended) seemed the most expedient to get the feature landed. bwidawsk: I do agree what's there now is a kludge, however, the way that the device tree looks today is… | |||||
Not Done Inline ActionsYeah, that's fair. #2 seems like a reasonable approach to me. I don't know enough about the drawbacks of #1 to dismiss it. cem: Yeah, that's fair. #2 seems like a reasonable approach to me. I don't know enough about the… | |||||
static int | |||||
intel_hwp_dump_sysctl_handler(SYSCTL_HANDLER_ARGS) | |||||
{ | |||||
device_t dev; | |||||
struct pcpu *pc; | |||||
struct sbuf *sb; | |||||
struct hwp_softc *sc; | |||||
uint64_t data, data2; | |||||
int ret; | |||||
Not Done Inline ActionsYou can save a lot of vertical space by oring the conditions instead of using separate ifs. kib: You can save a lot of vertical space by oring the conditions instead of using separate ifs. | |||||
Done Inline ActionsI personally like to have things broken out individually so it makes things very obvious. However, I'm fine to change it if generally, people condense things. Can you please advise? bwidawsk: I personally like to have things broken out individually so it makes things very obvious. | |||||
Done Inline ActionsStyle(9) only uses vertical space before multi line comment and after the block which was started by that comment. kib: Style(9) only uses vertical space before multi line comment and after the block which was… | |||||
sc = (struct hwp_softc *)arg1; | |||||
dev = sc->dev; | |||||
pc = cpu_get_pcpu(dev); | |||||
if (pc == NULL) | |||||
return (ENXIO); | |||||
sb = sbuf_new_for_sysctl(NULL, NULL, 1024, req); | |||||
sbuf_putc(sb, '\n'); | |||||
thread_lock(curthread); | |||||
sched_bind(curthread, pc->pc_cpuid); | |||||
thread_unlock(curthread); | |||||
rdmsr_safe(MSR_IA32_PM_ENABLE, &data); | |||||
sbuf_printf(sb, "CPU%d: HWP %sabled\n", pc->pc_cpuid, | |||||
((data & 1) ? "En" : "Dis")); | |||||
Done Inline ActionsI think this call can sleep and lock some VM locks as well. It is not good to lock while the thread is bound. How hard is to restructure the code to read all MSRs first, then unbind, then do printfs ? kib: I think this call can sleep and lock some VM locks as well. It is not good to lock while the… | |||||
Done Inline ActionsIn practice, I don't think this will ever sleep or allocate in the locked section; none of the formats are large enough to exceed the pre-allocated 1024 byte buffer. In that case, however, it would make sense not to have an automatic drain function and just explicitly SYSCTL_OUT at the end. cem: In practice, I don't think this will ever sleep or allocate in the locked section; none of the… | |||||
Done Inline ActionsSo I can punt until later? bwidawsk: So I can punt until later? | |||||
Done Inline ActionsNo, you should still initialize sb differently so it doesn't have a drain function. sb = sbuf_new(NULL, NULL, 1024, SBUF_FIXEDLEN | SBUF_INCLUDENUL); cem: No, you should still initialize `sb` differently so it doesn't have a drain function.
`sb =… | |||||
Done Inline ActionsThis one is still open and easy to fix. cem: This one is still open and easy to fix. | |||||
Done Inline ActionsSorry missed this one in reading through the old comments. I think it's fixed properly now. scottph: Sorry missed this one in reading through the old comments. I think it's fixed properly now. | |||||
Not Done Inline ActionsLooks good to me, thanks! Alternatively you could just read the 1-4 64-bit MSRs in the CPU-bound section, storing them on the stack, and use sbuf printing only after unbinding the CPU. That'd avoid malloc/free and also avoid possibly truncating output. I'm not requesting any change, it's just another valid approach. cem: Looks good to me, thanks!
Alternatively you could just read the 1-4 64-bit MSRs in the CPU… | |||||
Done Inline ActionsThat seems like a reasonable idea but I left it as-is for now. scottph: That seems like a reasonable idea but I left it as-is for now. | |||||
Not Done Inline Actionsworks for me. cem: works for me. | |||||
if (data == 0) { | |||||
Done Inline ActionsPlease add a symbol for the bit name. kib: Please add a symbol for the bit name. | |||||
ret = 0; | |||||
goto out; | |||||
} | |||||
rdmsr_safe(MSR_IA32_HWP_CAPABILITIES, &data); | |||||
sbuf_printf(sb, "\tHighest Performance: %03lu\n", data & 0xff); | |||||
sbuf_printf(sb, "\tGuaranteed Performance: %03lu\n", (data >> 8) & 0xff); | |||||
sbuf_printf(sb, "\tEfficient Performance: %03lu\n", (data >> 16) & 0xff); | |||||
sbuf_printf(sb, "\tLowest Performance: %03lu\n", (data >> 24) & 0xff); | |||||
rdmsr_safe(MSR_IA32_HWP_REQUEST, &data); | |||||
if (sc->hwp_pkg_ctrl && (data & IA32_HWP_REQUEST_PACKAGE_CONTROL)) { | |||||
rdmsr_safe(MSR_IA32_HWP_REQUEST_PKG, &data2); | |||||
} | |||||
sbuf_putc(sb, '\n'); | |||||
#define pkg_print(x, name, offset) do { \ | |||||
if (!sc->hwp_pkg_ctrl || (data & x) != 0) \ | |||||
sbuf_printf(sb, "\t%s: %03lu\n", name, (data >> offset) & 0xff);\ | |||||
else \ | |||||
Done Inline ActionsStyle: put all declarations at the start of the function. Sorry. kib: Style: put all declarations at the start of the function. Sorry. | |||||
sbuf_printf(sb, "\t%s: %03lu\n", name, (data2 >> offset) & 0xff);\ | |||||
} while (0) | |||||
pkg_print(IA32_HWP_REQUEST_EPP_VALID, | |||||
"Requested Efficiency Performance Preference", 24); | |||||
pkg_print(IA32_HWP_REQUEST_DESIRED_VALID, | |||||
"Requested Desired Performance", 16); | |||||
pkg_print(IA32_HWP_REQUEST_MAXIMUM_VALID, | |||||
"Requested Maximum Performance", 8); | |||||
pkg_print(IA32_HWP_REQUEST_MINIMUM_VALID, | |||||
"Requested Minimum Performance", 0); | |||||
#undef pkg_print | |||||
sbuf_putc(sb, '\n'); | |||||
out: | |||||
thread_lock(curthread); | |||||
sched_unbind(curthread); | |||||
thread_unlock(curthread); | |||||
ret = sbuf_finish(sb); | |||||
if (ret == 0) | |||||
ret = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb)); | |||||
Done Inline ActionsIn here, after finish and before delete, you would add: if (ret == 0) ret = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb)); cem: In here, after `finish` and before `delete`, you would add:
```
if (ret == 0)
ret =… | |||||
cemUnsubmitted Done Inline ActionsThis is good but you missed the sbuf_new change that goes along with it above. cem: This is good but you missed the `sbuf_new` change that goes along with it above. | |||||
sbuf_delete(sb); | |||||
return (ret); | |||||
} | |||||
Done Inline ActionsToo many blank lines. kib: Too many blank lines. | |||||
Done Inline Actionsinline keyword is probably gratuitous (below as well). The function is small enough (certainly for non-DEBUG builds) to be inlined automatically, and even if it is not, these routines are only invoked from an (administrative) sysctl anyway — there isn't going to be any significant performance impact either way. It is fine to leave it if you prefer, though. cem: `inline` keyword is probably gratuitous (below as well).
The function is small enough… | |||||
static inline int | |||||
Done Inline ActionsI'd avoid double underscore names here too cem: I'd avoid double underscore names here too | |||||
percent_to_raw(int x) | |||||
{ | |||||
MPASS(x <= 100 && x >= 0); | |||||
return (0xff * x / 100); | |||||
} | |||||
static inline int | |||||
raw_to_percent(int x) | |||||
{ | |||||
MPASS(x <= 0xff && x >= 0); | |||||
return (x * 100 / 0xff); | |||||
} | |||||
static int | |||||
sysctl_epp_select(SYSCTL_HANDLER_ARGS) | |||||
{ | |||||
device_t dev; | |||||
struct pcpu *pc; | |||||
uint64_t requested; | |||||
uint32_t val; | |||||
int ret; | |||||
dev = oidp->oid_arg1; | |||||
pc = cpu_get_pcpu(dev); | |||||
if (pc == NULL) | |||||
return (ENXIO); | |||||
Done Inline ActionsCan we get symbolic names for the bits ? kib: Can we get symbolic names for the bits ? | |||||
thread_lock(curthread); | |||||
sched_bind(curthread, pc->pc_cpuid); | |||||
thread_unlock(curthread); | |||||
rdmsr_safe(MSR_IA32_HWP_REQUEST, &requested); | |||||
val = (requested & IA32_HWP_REQUEST_ENERGY_PERFORMANCE_PREFERENCE) >> 24; | |||||
val = raw_to_percent(val); | |||||
MPASS(val >= 0 && val <= 100); | |||||
ret = sysctl_handle_int(oidp, &val, 0, req); | |||||
if (ret || req->newptr == NULL) | |||||
Done Inline ActionsThe compiler will probably complain about the val < 0 check since val is unsigned. if (val > 100) ... alone is sufficient. cem: The compiler will probably complain about the `val < 0` check since `val` is unsigned. `if… | |||||
goto out; | |||||
if (val < 0) | |||||
Done Inline ActionsDon't you leak the binding after the end of the loop ? kib: Don't you leak the binding after the end of the loop ? | |||||
val = 0; | |||||
Not Done Inline ActionsIt is enough to do it only once, after the loop. In fact, very careful code might save cpu binding, if any, before doing this, and restore it after the loop. See e.g. sys/dev/cpuctl for example. I do not think it is warranted there. kib: It is enough to do it only once, after the loop.
In fact, very careful code might save cpu… | |||||
Done Inline ActionsUsually I'd reject invalid values rather than clamping them to the valid range, unless it makes sense that the valid range may grow or shrink in the future; here it doesn't (IMO). cem: Usually I'd reject invalid values rather than clamping them to the valid range, unless it makes… | |||||
Done Inline ActionsThe valid range can indeed change from platform to platform. Of course a user of this sysctl could clamp themselves, but it seemed easier to provide the service for them to keep the API a little more stable. Marking as done, but feel free to complain again and I will adjust. bwidawsk: The valid range can indeed change from platform to platform. Of course a user of this sysctl… | |||||
Done Inline ActionsThe logical percent range can change to something other than 0-100%? I'm confused. cem: The logical percent range can change to something other than 0-100%? I'm confused. | |||||
if (val > 100) | |||||
val = 100; | |||||
cemUnsubmitted Done Inline ActionsI'd still encourage just rejecting invalid percentages at the sysctl boundary. For this use, negative and >100 percentages do not make sense. cem: I'd still encourage just rejecting invalid percentages at the sysctl boundary. For this use… | |||||
val = percent_to_raw(val); | |||||
requested &= ~IA32_HWP_REQUEST_ENERGY_PERFORMANCE_PREFERENCE; | |||||
requested |= val << 24; | |||||
Done Inline ActionsYou need a blank line after '{' if there is no locals. kib: You need a blank line after '{' if there is no locals. | |||||
wrmsr_safe(MSR_IA32_HWP_REQUEST, requested); | |||||
out: | |||||
thread_lock(curthread); | |||||
sched_unbind(curthread); | |||||
thread_unlock(curthread); | |||||
return (ret); | |||||
} | |||||
static void | |||||
intel_hwpstate_identify(driver_t *driver, device_t parent) | |||||
{ | |||||
uint32_t regs[4]; | |||||
if (!intel_speed_shift) | |||||
return; | |||||
if (device_find_child(parent, "hwpstate_intel", -1) != NULL) { | |||||
Done Inline ActionsIsn't this backwards? If we already attached intel hwpstate, we're using intel_speed_shift and it should remain true. cem: Isn't this backwards? If we already attached intel hwpstate, we're using `intel_speed_shift`… | |||||
intel_speed_shift = false; | |||||
return; | |||||
} | |||||
if (cpu_vendor_id != CPU_VENDOR_INTEL) { | |||||
intel_speed_shift = false; | |||||
return; | |||||
} | |||||
if (resource_disabled("hwpstate_intel", 0)) { | |||||
intel_speed_shift = false; | |||||
return; | |||||
} | |||||
/* | |||||
* Intel SDM 14.4.1 (HWP Programming Interfaces): | |||||
* The CPUID instruction allows software to discover the presence of | |||||
* HWP support in an Intel processor. Specifically, execute CPUID | |||||
* instruction with EAX=06H as input will return 5 bit flags covering | |||||
* the following aspects in bits 7 through 11 of CPUID.06H:EAX. | |||||
*/ | |||||
Not Done Inline ActionsFWIW, you can just check the flag in the cached cpu_power_eax instead of cpuid(6) + regs[0] after rS353712. cpu_power_foo represents the cpuid 6 leaf. (The name isn't especially creative; sorry.) Fine to leave as-is, especially if you want to MFC this. cem: FWIW, you can just check the flag in the cached `cpu_power_eax` instead of `cpuid(6)` + `regs… | |||||
Done Inline ActionsI'd prefer to do that as a follow on change for ease of mfc. scottph: I'd prefer to do that as a follow on change for ease of mfc. | |||||
Not Done Inline Actionsworks for me. cem: works for me. | |||||
if (cpu_high < 6) | |||||
goto out; | |||||
/* | |||||
* Intel SDM 14.4.1 (HWP Programming Interfaces): | |||||
* Availability of HWP baseline resource and capability, | |||||
* CPUID.06H:EAX[bit 7]: If this bit is set, HWP provides several new | |||||
Done Inline ActionsThis is probably not necessary, or should be conditional on bootverbose. Because the device isn't marked "quiet," the description set in intel_hwpstate_probe is automatically printed at boot time when the device is attached. E.g., hwpstate0: <Cool`n'Quiet 2.0> on cpu0 Where hwpstate is the driver, and "Cool`n'Quiet 2.0" is the string from device_set_desc() below. cem: This is probably not necessary, or should be conditional on `bootverbose`.
Because the… | |||||
* architectural MSRs: IA32_PM_ENABLE, IA32_HWP_CAPABILITIES, | |||||
* IA32_HWP_REQUEST, IA32_HWP_STATUS. | |||||
*/ | |||||
Done Inline Actionswhat does this number represent? cem: what does this number represent? | |||||
Done Inline ActionsIt's a mask of features supported by the hardware. I don't yet do anything with the features anyway, so I will drop it. bwidawsk: It's a mask of features supported by the hardware. I don't yet do anything with the features… | |||||
do_cpuid(6, regs); | |||||
if ((regs[0] & CPUTPM1_HWP) == 0) | |||||
goto out; | |||||
Done Inline ActionsBUS_PROBE_NOWILDCARD might be more appropriate for now. I don't think the current value will break anything, though, so feel free to leave as-is. (If/when est and intel_hwpstate are separated and probe for the same newbus pseudo-device, BUS_PROBE_DEFAULT for this one and BUS_PROBE_LOW_PRIORITY for est might make sense.) cem: `BUS_PROBE_NOWILDCARD` might be more appropriate for now. I don't think the current value will… | |||||
if (BUS_ADD_CHILD(parent, 10, "hwpstate_intel", -1) == NULL) | |||||
goto out; | |||||
device_printf(parent, "hwpstate registered"); | |||||
return; | |||||
out: | |||||
device_printf(parent, "Speed Shift unavailable. Falling back to est\n"); | |||||
intel_speed_shift = false; | |||||
} | |||||
static int | |||||
intel_hwpstate_probe(device_t dev) | |||||
{ | |||||
device_t perf_dev; | |||||
Done Inline ActionsI'm not sure this check is robust; I don't understand why we would bail if an INFO_ONLY driver is also attached under the same parent? But not if a !INFO_ONLY driver is attached? The condition in est is the opposite — *allow* an INFO_ONLY sibling, but bail if there are existing !INFO_ONLY siblings. cem: I'm not sure this check is robust; I don't understand why we would bail if an INFO_ONLY driver… | |||||
int ret, type; | |||||
/* | |||||
* It is currently impossible for conflicting cpufreq driver to be loaded at | |||||
* this point since it's protected by the boolean intel_speed_shift. | |||||
* However, if at some point the knobs are made a bit more robust to | |||||
Done Inline Actionsstyle(9) nit: space before =. Ditto below cem: style(9) nit: space before `=`. Ditto below | |||||
* control cpufreq, or, at some point INFO_ONLY drivers are permitted, | |||||
* this should make sure things work properly. | |||||
* | |||||
* IOW: This is a no-op for now. | |||||
*/ | |||||
perf_dev = device_find_child(device_get_parent(dev), "acpi_perf", -1); | |||||
if (perf_dev && device_is_attached(perf_dev)) { | |||||
ret= CPUFREQ_DRV_TYPE(perf_dev, &type); | |||||
if (ret == 0) { | |||||
if ((type & CPUFREQ_FLAG_INFO_ONLY) == 0) { | |||||
device_printf(dev, "Avoiding acpi_perf\n"); | |||||
return (ENXIO); | |||||
} | |||||
} | |||||
} | |||||
perf_dev = device_find_child(device_get_parent(dev), "est", -1); | |||||
if (perf_dev && device_is_attached(perf_dev)) { | |||||
ret= CPUFREQ_DRV_TYPE(perf_dev, &type); | |||||
if (ret == 0) { | |||||
if ((type & CPUFREQ_FLAG_INFO_ONLY) == 0) { | |||||
device_printf(dev, "Avoiding EST\n"); | |||||
return (ENXIO); | |||||
} | |||||
} | |||||
} | |||||
device_set_desc(dev, "Intel Speed Shift"); | |||||
return (BUS_PROBE_DEFAULT); | |||||
} | |||||
/* FIXME: Need to support PKG variant */ | |||||
static int | |||||
set_autonomous_hwp(struct hwp_softc *sc) | |||||
{ | |||||
struct pcpu *pc; | |||||
device_t dev; | |||||
uint64_t caps; | |||||
int ret; | |||||
dev = sc->dev; | |||||
pc = cpu_get_pcpu(dev); | |||||
if (pc == NULL) | |||||
return (ENXIO); | |||||
thread_lock(curthread); | |||||
sched_bind(curthread, pc->pc_cpuid); | |||||
thread_unlock(curthread); | |||||
/* XXX: Many MSRs aren't readable until feature is enabled */ | |||||
ret = wrmsr_safe(MSR_IA32_PM_ENABLE, 1); | |||||
if (ret) { | |||||
device_printf(dev, "Failed to enable HWP for cpu%d (%d)\n", | |||||
pc->pc_cpuid, ret); | |||||
goto out; | |||||
} | |||||
ret = rdmsr_safe(MSR_IA32_HWP_REQUEST, &sc->req); | |||||
if (ret) | |||||
return (ret); | |||||
Not Done Inline ActionsDitto other comment about cpuid cem: Ditto other comment about cpuid | |||||
ret = rdmsr_safe(MSR_IA32_HWP_CAPABILITIES, &caps); | |||||
if (ret) | |||||
return (ret); | |||||
sc->high = IA32_HWP_CAPABILITIES_HIGHEST_PERFORMANCE(caps); | |||||
sc->guaranteed = IA32_HWP_CAPABILITIES_GUARANTEED_PERFORMANCE(caps); | |||||
sc->efficient = IA32_HWP_CAPABILITIES_EFFICIENT_PERFORMANCE(caps); | |||||
sc->low = IA32_HWP_CAPABILITIES_LOWEST_PERFORMANCE(caps); | |||||
/* hardware autonomous selection determines the performance target */ | |||||
sc->req &= ~IA32_HWP_DESIRED_PERFORMANCE; | |||||
/* enable HW dynamic selection of window size */ | |||||
sc->req &= ~IA32_HWP_ACTIVITY_WINDOW; | |||||
/* IA32_HWP_REQUEST.Minimum_Performance = IA32_HWP_CAPABILITIES.Lowest_Performance */ | |||||
sc->req &= ~IA32_HWP_MINIMUM_PERFORMANCE; | |||||
sc->req |= sc->low; | |||||
/* IA32_HWP_REQUEST.Maximum_Performance = IA32_HWP_CAPABILITIES.Highest_Performance. */ | |||||
sc->req &= ~IA32_HWP_REQUEST_MAXIMUM_PERFORMANCE; | |||||
sc->req |= sc->high << 8; | |||||
ret = wrmsr_safe(MSR_IA32_HWP_REQUEST, sc->req); | |||||
if (ret) { | |||||
device_printf(dev, | |||||
"Failed to setup autonomous HWP for cpu%d (file a bug)\n", | |||||
pc->pc_cpuid); | |||||
} | |||||
out: | |||||
thread_lock(curthread); | |||||
sched_unbind(curthread); | |||||
thread_unlock(curthread); | |||||
return (ret); | |||||
} | |||||
static int | |||||
intel_hwpstate_attach(device_t dev) | |||||
{ | |||||
struct hwp_softc *sc; | |||||
uint32_t regs[4]; | |||||
int ret; | |||||
KASSERT(device_find_child(device_get_parent(dev), "est", -1) == NULL, | |||||
("EST driver already loaded")); | |||||
KASSERT(device_find_child(device_get_parent(dev), "acpi_perf", -1) == NULL, | |||||
("ACPI driver already loaded")); | |||||
sc = device_get_softc(dev); | |||||
sc->dev = dev; | |||||
do_cpuid(6, regs); | |||||
if (regs[0] & CPUTPM1_HWP_NOTIFICATION) | |||||
sc->hwp_notifications = true; | |||||
if (regs[0] & CPUTPM1_HWP_ACTIVITY_WINDOW) | |||||
sc->hwp_activity_window = true; | |||||
if (regs[0] & CPUTPM1_HWP_PERF_PREF) | |||||
sc->hwp_pref_ctrl = true; | |||||
if (regs[0] & CPUTPM1_HWP_PKG) | |||||
Done Inline Actionsit doesn't make sense to me to size buf to 128 but hardcode 18 here. usually i'd suggest snprintf(foo, sizeof(foo), ...) and then hardcode foo[N], if anything. either way, there may be no need for this buf? you could just use device_get_nameunit. it would be "debug.intel_hwpstateN" instead of "debug.intel_hwp_debugN", but arguably that is better anyway. I'm not sure that placing debug sysctls under "debug." instead of just using CTLFLAG_SKIP and placing them in the normal device_get_sysctl_tree location is canonical; it may be. cem: it doesn't make sense to me to size `buf` to 128 but hardcode 18 here.
usually i'd suggest… | |||||
sc->hwp_pkg_ctrl = true; | |||||
ret = set_autonomous_hwp(sc); | |||||
if (ret) | |||||
return (ret); | |||||
SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), | |||||
SYSCTL_STATIC_CHILDREN(_debug), OID_AUTO, device_get_nameunit(dev), | |||||
CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_SKIP, | |||||
sc, 0, intel_hwp_dump_sysctl_handler, "A", ""); | |||||
SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), | |||||
SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, | |||||
"epp", CTLTYPE_INT | CTLFLAG_RWTUN, dev, sizeof(dev), | |||||
sysctl_epp_select, "I", | |||||
"Efficiency/Performance Preference (0-100)"); | |||||
return (cpufreq_register(dev)); | |||||
} | |||||
static int | |||||
intel_hwpstate_detach(device_t dev) | |||||
{ | |||||
return (cpufreq_unregister(dev)); | |||||
} | |||||
static int | |||||
intel_hwpstate_get(device_t dev, struct cf_setting *set) | |||||
{ | |||||
struct pcpu *pc; | |||||
uint64_t rate; | |||||
int ret; | |||||
if (set == NULL) | |||||
return (EINVAL); | |||||
pc = cpu_get_pcpu(dev); | |||||
if (pc == NULL) | |||||
return (ENXIO); | |||||
memset(set, CPUFREQ_VAL_UNKNOWN, sizeof(*set)); | |||||
Done Inline ActionsIs this a leftover debugging print? It doesn't seem like something that should go to console on every invocation of the GET method. cem: Is this a leftover debugging print? It doesn't seem like something that should go to console… | |||||
set->dev = dev; | |||||
ret = cpu_est_clockrate(pc->pc_cpuid, &rate); | |||||
if (ret == 0) | |||||
set->freq = rate / 1000000; | |||||
set->volts = CPUFREQ_VAL_UNKNOWN; | |||||
set->power = CPUFREQ_VAL_UNKNOWN; | |||||
set->lat = CPUFREQ_VAL_UNKNOWN; | |||||
return (0); | |||||
} | |||||
static int | |||||
intel_hwpstate_type(device_t dev, int *type) | |||||
{ | |||||
if (type == NULL) | |||||
return (EINVAL); | |||||
*type = CPUFREQ_TYPE_ABSOLUTE | CPUFREQ_FLAG_INFO_ONLY | CPUFREQ_FLAG_UNCACHED; | |||||
return (0); | |||||
} |
The header is already included at line 43 above. Is the second time needed ?