Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F145784957
D18028.id53817.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
30 KB
Referenced Files
None
Subscribers
None
D18028.id53817.diff
View Options
Index: sys/conf/files.amd64
===================================================================
--- sys/conf/files.amd64
+++ sys/conf/files.amd64
@@ -707,7 +707,8 @@
x86/bios/vpd.c optional vpd
x86/cpufreq/powernow.c optional cpufreq
x86/cpufreq/est.c optional cpufreq
-x86/cpufreq/hwpstate.c optional cpufreq
+x86/cpufreq/hwpstate_amd.c optional cpufreq
+x86/cpufreq/hwpstate_intel.c optional cpufreq
x86/cpufreq/p4tcc.c optional cpufreq
x86/iommu/busdma_dmar.c optional acpi acpi_dmar pci
x86/iommu/intel_ctx.c optional acpi acpi_dmar pci
Index: sys/conf/files.i386
===================================================================
--- sys/conf/files.i386
+++ sys/conf/files.i386
@@ -558,7 +558,8 @@
x86/bios/smbios.c optional smbios
x86/bios/vpd.c optional vpd
x86/cpufreq/est.c optional cpufreq
-x86/cpufreq/hwpstate.c optional cpufreq
+x86/cpufreq/hwpstate_amd.c optional cpufreq
+x86/cpufreq/hwpstate_intel.c optional cpufreq
x86/cpufreq/p4tcc.c optional cpufreq
x86/cpufreq/powernow.c optional cpufreq
x86/cpufreq/smist.c optional cpufreq
Index: sys/dev/acpica/acpi_perf.c
===================================================================
--- sys/dev/acpica/acpi_perf.c
+++ sys/dev/acpica/acpi_perf.c
@@ -50,6 +50,8 @@
#include "cpufreq_if.h"
+extern uint32_t intel_speed_shift;
+
/*
* Support for ACPI processor performance states (Px) according to
* section 8.3.3 of the ACPI 2.0c specification.
@@ -144,6 +146,9 @@
ACPI_HANDLE handle;
device_t dev;
+ if (intel_speed_shift == true)
+ return;
+
/* Make sure we're not being doubly invoked. */
if (device_find_child(parent, "acpi_perf", -1) != NULL)
return;
Index: sys/kern/kern_cpu.c
===================================================================
--- sys/kern/kern_cpu.c
+++ sys/kern/kern_cpu.c
@@ -76,6 +76,7 @@
int all_count;
int max_mhz;
device_t dev;
+ device_t freq_dev;
struct sysctl_ctx_list sysctl_ctx;
struct task startup_task;
struct cf_level *levels_buf;
@@ -142,6 +143,11 @@
SYSCTL_INT(_debug_cpufreq, OID_AUTO, verbose, CTLFLAG_RWTUN, &cf_verbose, 1,
"Print verbose debugging messages");
+/*
+ * This is called as the result of a hardware specific frequency control driver
+ * calling cpufreq_register. It provides a general interface for system wide
+ * frequency controls and operates on a per cpu basic.
+ */
static int
cpufreq_attach(device_t dev)
{
@@ -149,7 +155,6 @@
struct pcpu *pc;
device_t parent;
uint64_t rate;
- int numdevs;
CF_DEBUG("initializing %s\n", device_get_nameunit(dev));
sc = device_get_softc(dev);
@@ -164,6 +169,7 @@
sc->max_mhz = cpu_get_nominal_mhz(dev);
/* If that fails, try to measure the current rate */
if (sc->max_mhz <= 0) {
+ CF_DEBUG("Unable to obtain nominal frequency.\n");
pc = cpu_get_pcpu(dev);
if (cpu_est_clockrate(pc->pc_cpuid, &rate) == 0)
sc->max_mhz = rate / 1000000;
@@ -171,15 +177,6 @@
sc->max_mhz = CPUFREQ_VAL_UNKNOWN;
}
- /*
- * Only initialize one set of sysctls for all CPUs. In the future,
- * if multiple CPUs can have different settings, we can move these
- * sysctls to be under every CPU instead of just the first one.
- */
- numdevs = devclass_get_count(cpufreq_dc);
- if (numdevs > 1)
- return (0);
-
CF_DEBUG("initializing one-time data for %s\n",
device_get_nameunit(dev));
sc->levels_buf = malloc(CF_MAX_LEVELS * sizeof(*sc->levels_buf),
@@ -216,7 +213,6 @@
{
struct cpufreq_softc *sc;
struct cf_saved_freq *saved_freq;
- int numdevs;
CF_DEBUG("shutdown %s\n", device_get_nameunit(dev));
sc = device_get_softc(dev);
@@ -227,12 +223,7 @@
free(saved_freq, M_TEMP);
}
- /* Only clean up these resources when the last device is detaching. */
- numdevs = devclass_get_count(cpufreq_dc);
- if (numdevs == 1) {
- CF_DEBUG("final shutdown for %s\n", device_get_nameunit(dev));
- free(sc->levels_buf, M_DEVBUF);
- }
+ free(sc->levels_buf, M_DEVBUF);
return (0);
}
@@ -421,28 +412,80 @@
return (error);
}
+static int
+__get_frequency(device_t dev)
+{
+ struct cf_setting set;
+
+ if (CPUFREQ_DRV_GET(dev, &set) != 0)
+ return -1;
+
+ return set.freq;
+}
+
+/* Returns the index into *levels with the match */
+static int
+__get_level(device_t dev, struct cf_level *levels, int count)
+{
+ struct cf_setting set;
+ int i;
+
+ if (!device_is_attached(dev))
+ return -(ENXIO);
+ if (__get_frequency(dev) < 0)
+ return -(ENODEV);
+ for (i = 0; i < count; i++)
+ if (set.freq == levels[i].total_set.freq)
+ return (i);
+
+ return -(ENOENT);
+}
+
+/*
+ * Used by the cpufreq core, this function will populate *level with the current
+ * frequency as either determined by a cached value sc->curr_level, or in the
+ * case the lower level driver has set the CPUFREQ_FLAG_UNCACHED flag, it will
+ * obtain the frequency from the driver itself.
+ */
static int
cf_get_method(device_t dev, struct cf_level *level)
{
struct cpufreq_softc *sc;
struct cf_level *levels;
- struct cf_setting *curr_set, set;
+ struct cf_setting *curr_set;
struct pcpu *pc;
- device_t *devs;
- int bdiff, count, diff, error, i, n, numdevs;
+ int bdiff, count, diff, error, i, type;
uint64_t rate;
sc = device_get_softc(dev);
error = 0;
levels = NULL;
- /* If we already know the current frequency, we're done. */
+ /*
+ * If we already know the current frequency, and the driver didn't ask
+ * for uncached usage, we're done.
+ */
CF_MTX_LOCK(&sc->lock);
curr_set = &sc->curr_level.total_set;
- if (curr_set->freq != CPUFREQ_VAL_UNKNOWN) {
+ error = CPUFREQ_DRV_TYPE(sc->freq_dev, &type);
+ if ((curr_set->freq != CPUFREQ_VAL_UNKNOWN) &&
+ (error == 0 && ((type & CPUFREQ_FLAG_UNCACHED) == 0))) {
CF_DEBUG("get returning known freq %d\n", curr_set->freq);
goto out;
}
+
+ /* If the driver wants to always report back the real frequency, first
+ * try the driver and if that fails, fall back to estimating.
+ */
+ if (error == 0 && (type & CPUFREQ_FLAG_UNCACHED)) {
+ struct cf_setting set;
+ if (CPUFREQ_DRV_GET(sc->freq_dev, &set) != 0)
+ goto estimate;
+ sc->curr_level.total_set = set;
+ CF_DEBUG("get returning immediate freq %d\n", curr_set->freq);
+ goto out;
+ }
+
CF_MTX_UNLOCK(&sc->lock);
/*
@@ -454,18 +497,13 @@
levels = malloc(count * sizeof(*levels), M_TEMP, M_NOWAIT);
if (levels == NULL)
return (ENOMEM);
- error = CPUFREQ_LEVELS(sc->dev, levels, &count);
+ error = CPUFREQ_LEVELS(dev, levels, &count);
if (error) {
if (error == E2BIG)
printf("cpufreq: need to increase CF_MAX_LEVELS\n");
free(levels, M_TEMP);
return (error);
}
- error = device_get_children(device_get_parent(dev), &devs, &numdevs);
- if (error) {
- free(levels, M_TEMP);
- return (error);
- }
/*
* Reacquire the lock and search for the given level.
@@ -476,24 +514,19 @@
* The estimation code below catches this case though.
*/
CF_MTX_LOCK(&sc->lock);
- for (n = 0; n < numdevs && curr_set->freq == CPUFREQ_VAL_UNKNOWN; n++) {
- if (!device_is_attached(devs[n]))
- continue;
- if (CPUFREQ_DRV_GET(devs[n], &set) != 0)
- continue;
- for (i = 0; i < count; i++) {
- if (set.freq == levels[i].total_set.freq) {
- sc->curr_level = levels[i];
- break;
- }
- }
- }
- free(devs, M_TEMP);
+ i = __get_level(sc->freq_dev, levels, count);
+ if (i >= 0)
+ sc->curr_level = levels[i];
+ else
+ CF_DEBUG("Couldn't find supported level for %s\n",
+ device_get_nameunit(sc->freq_dev));
+
if (curr_set->freq != CPUFREQ_VAL_UNKNOWN) {
CF_DEBUG("get matched freq %d from drivers\n", curr_set->freq);
goto out;
}
+estimate:
/*
* We couldn't find an exact match, so attempt to estimate and then
* match against a level.
@@ -525,6 +558,66 @@
return (error);
}
+/*
+ * Either directly settings obtained from the cpufreq driver, or build a list of
+ * relative settings to be integrated later against an absolute max.
+ */
+static int
+__add_levels(device_t dev, struct cf_setting_lst *rel_sets)
+{
+ struct cf_setting_array *set_arr;
+ static struct cf_setting sets[MAX_SETTINGS];
+ struct cpufreq_softc *sc;
+ int type, set_count, error;
+
+ sc = device_get_softc(dev);
+
+ /* Skip devices that aren't ready. */
+ if (!device_is_attached(dev))
+ return (0);
+
+ /*
+ * Get settings, skipping drivers that offer no settings or
+ * provide settings for informational purposes only.
+ */
+ error = CPUFREQ_DRV_TYPE(dev, &type);
+ if (error || (type & CPUFREQ_FLAG_INFO_ONLY)) {
+ if (error == 0) {
+ CF_DEBUG("skipping info-only driver %s\n",
+ device_get_nameunit(dev));
+ }
+ goto out;
+ }
+
+ set_count = MAX_SETTINGS;
+ error = CPUFREQ_DRV_SETTINGS(dev, sets, &set_count);
+ if (error || set_count == 0)
+ goto out;
+
+ /* Add the settings to our absolute/relative lists. */
+ switch (type & CPUFREQ_TYPE_MASK) {
+ case CPUFREQ_TYPE_ABSOLUTE:
+ error = cpufreq_insert_abs(sc, sets, set_count);
+ break;
+ case CPUFREQ_TYPE_RELATIVE:
+ CF_DEBUG("adding %d relative settings\n", set_count);
+ set_arr = malloc(sizeof(*set_arr), M_TEMP, M_NOWAIT);
+ if (set_arr == NULL) {
+ error = ENOMEM;
+ goto out;
+ }
+ bcopy(sets, set_arr->sets, set_count * sizeof(*sets));
+ set_arr->count = set_count;
+ TAILQ_INSERT_TAIL(rel_sets, set_arr, link);
+ break;
+ default:
+ error = EINVAL;
+ }
+
+out:
+ return (error);
+}
+
static int
cf_levels_method(device_t dev, struct cf_level *levels, int *count)
{
@@ -532,10 +625,8 @@
struct cf_setting_lst rel_sets;
struct cpufreq_softc *sc;
struct cf_level *lev;
- struct cf_setting *sets;
struct pcpu *pc;
- device_t *devs;
- int error, i, numdevs, set_count, type;
+ int error, i;
uint64_t rate;
if (levels == NULL || count == NULL)
@@ -543,67 +634,22 @@
TAILQ_INIT(&rel_sets);
sc = device_get_softc(dev);
- error = device_get_children(device_get_parent(dev), &devs, &numdevs);
- if (error)
- return (error);
- sets = malloc(MAX_SETTINGS * sizeof(*sets), M_TEMP, M_NOWAIT);
- if (sets == NULL) {
- free(devs, M_TEMP);
- return (ENOMEM);
- }
/* Get settings from all cpufreq drivers. */
CF_MTX_LOCK(&sc->lock);
- for (i = 0; i < numdevs; i++) {
- /* Skip devices that aren't ready. */
- if (!device_is_attached(devs[i]))
- continue;
-
- /*
- * Get settings, skipping drivers that offer no settings or
- * provide settings for informational purposes only.
- */
- error = CPUFREQ_DRV_TYPE(devs[i], &type);
- if (error || (type & CPUFREQ_FLAG_INFO_ONLY)) {
- if (error == 0) {
- CF_DEBUG("skipping info-only driver %s\n",
- device_get_nameunit(devs[i]));
- }
- continue;
- }
- set_count = MAX_SETTINGS;
- error = CPUFREQ_DRV_SETTINGS(devs[i], sets, &set_count);
- if (error || set_count == 0)
- continue;
-
- /* Add the settings to our absolute/relative lists. */
- switch (type & CPUFREQ_TYPE_MASK) {
- case CPUFREQ_TYPE_ABSOLUTE:
- error = cpufreq_insert_abs(sc, sets, set_count);
- break;
- case CPUFREQ_TYPE_RELATIVE:
- CF_DEBUG("adding %d relative settings\n", set_count);
- set_arr = malloc(sizeof(*set_arr), M_TEMP, M_NOWAIT);
- if (set_arr == NULL) {
- error = ENOMEM;
- goto out;
- }
- bcopy(sets, set_arr->sets, set_count * sizeof(*sets));
- set_arr->count = set_count;
- TAILQ_INSERT_TAIL(&rel_sets, set_arr, link);
- break;
- default:
- error = EINVAL;
- }
- if (error)
- goto out;
- }
+ error = __add_levels(sc->freq_dev, &rel_sets);
+ if (error)
+ goto out;
/*
* If there are no absolute levels, create a fake one at 100%. We
* then cache the clockrate for later use as our base frequency.
*/
if (TAILQ_EMPTY(&sc->all_levels)) {
+ struct cf_setting set;
+
+ CF_DEBUG("No absolute levels returned by driver\n");
+
if (sc->max_mhz == CPUFREQ_VAL_UNKNOWN) {
sc->max_mhz = cpu_get_nominal_mhz(dev);
/*
@@ -617,10 +663,10 @@
sc->max_mhz = rate / 1000000;
}
}
- memset(&sets[0], CPUFREQ_VAL_UNKNOWN, sizeof(*sets));
- sets[0].freq = sc->max_mhz;
- sets[0].dev = NULL;
- error = cpufreq_insert_abs(sc, sets, 1);
+ memset(&set, CPUFREQ_VAL_UNKNOWN, sizeof(set));
+ set.freq = sc->max_mhz;
+ set.dev = NULL;
+ error = cpufreq_insert_abs(sc, &set, 1);
if (error)
goto out;
}
@@ -665,8 +711,6 @@
TAILQ_REMOVE(&rel_sets, set_arr, link);
free(set_arr, M_TEMP);
}
- free(devs, M_TEMP);
- free(sets, M_TEMP);
return (error);
}
@@ -1016,6 +1060,7 @@
{
struct cpufreq_softc *sc;
device_t cf_dev, cpu_dev;
+ int ret;
/* Add a sysctl to get each driver's settings separately. */
SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
@@ -1031,6 +1076,7 @@
if ((cf_dev = device_find_child(cpu_dev, "cpufreq", -1))) {
sc = device_get_softc(cf_dev);
sc->max_mhz = CPUFREQ_VAL_UNKNOWN;
+ sc->freq_dev = dev;
return (0);
}
@@ -1040,40 +1086,37 @@
return (ENOMEM);
device_quiet(cf_dev);
- return (device_probe_and_attach(cf_dev));
+ ret = device_probe_and_attach(cf_dev);
+ if (ret)
+ return (ret);
+
+ sc = device_get_softc(cf_dev);
+ sc->freq_dev = dev;
+ return (ret);
}
int
cpufreq_unregister(device_t dev)
{
- device_t cf_dev, *devs;
- int cfcount, devcount, error, i, type;
+ device_t cf_dev;
+ struct cpufreq_softc *sc;
/*
* If this is the last cpufreq child device, remove the control
* device as well. We identify cpufreq children by calling a method
* they support.
*/
- error = device_get_children(device_get_parent(dev), &devs, &devcount);
- if (error)
- return (error);
+
cf_dev = device_find_child(device_get_parent(dev), "cpufreq", -1);
if (cf_dev == NULL) {
device_printf(dev,
"warning: cpufreq_unregister called with no cpufreq device active\n");
- free(devs, M_TEMP);
return (0);
}
- cfcount = 0;
- for (i = 0; i < devcount; i++) {
- if (!device_is_attached(devs[i]))
- continue;
- if (CPUFREQ_DRV_TYPE(devs[i], &type) == 0)
- cfcount++;
- }
- if (cfcount <= 1)
- device_delete_child(device_get_parent(cf_dev), cf_dev);
- free(devs, M_TEMP);
+
+ sc = device_get_softc(cf_dev);
+ MPASS(sc->freq_dev == dev);
+ device_delete_child(device_get_parent(cf_dev), cf_dev);
return (0);
}
Index: sys/modules/cpufreq/Makefile
===================================================================
--- sys/modules/cpufreq/Makefile
+++ sys/modules/cpufreq/Makefile
@@ -11,7 +11,7 @@
.PATH: ${SRCTOP}/sys/x86/cpufreq
SRCS+= acpi_if.h opt_acpi.h
-SRCS+= est.c hwpstate.c p4tcc.c powernow.c
+SRCS+= est.c hwpstate_amd.c p4tcc.c powernow.c hwpstate_intel.c
.endif
.if ${MACHINE} == "i386"
Index: sys/sys/cpu.h
===================================================================
--- sys/sys/cpu.h
+++ sys/sys/cpu.h
@@ -120,11 +120,16 @@
* information about settings but rely on another machine-dependent driver
* for actually performing the frequency transition (e.g., ACPI performance
* states of type "functional fixed hardware.")
+ *
+ * The "uncached" flag tells CPUFREQ_DRV_GET to try obtaining the real
+ * instantaneous frequency from the underlying hardware regardless of cached
+ * state. It is probably a bug to not combine this with "info only"
*/
#define CPUFREQ_TYPE_MASK 0xffff
#define CPUFREQ_TYPE_RELATIVE (1<<0)
#define CPUFREQ_TYPE_ABSOLUTE (1<<1)
#define CPUFREQ_FLAG_INFO_ONLY (1<<16)
+#define CPUFREQ_FLAG_UNCACHED (1<<17)
/*
* When setting a level, the caller indicates the priority of this request.
Index: sys/x86/cpufreq/est.c
===================================================================
--- sys/x86/cpufreq/est.c
+++ sys/x86/cpufreq/est.c
@@ -50,6 +50,8 @@
#include <dev/acpica/acpivar.h>
#include "acpi_if.h"
+extern uint32_t intel_speed_shift;
+
/* Status/control registers (from the IA-32 System Programming Guide). */
#define MSR_PERF_STATUS 0x198
#define MSR_PERF_CTL 0x199
@@ -916,6 +918,10 @@
{
device_t child;
+ /* If the Intel driver is handling this */
+ if (intel_speed_shift == true)
+ return;
+
/* Make sure we're not being doubly invoked. */
if (device_find_child(parent, "est", -1) != NULL)
return;
Index: sys/x86/cpufreq/hwpstate_amd.c
===================================================================
--- sys/x86/cpufreq/hwpstate_amd.c
+++ sys/x86/cpufreq/hwpstate_amd.c
@@ -124,11 +124,11 @@
static int hwpstate_goto_pstate(device_t dev, int pstate_id);
static int hwpstate_verbose;
-SYSCTL_INT(_debug, OID_AUTO, hwpstate_verbose, CTLFLAG_RWTUN,
+SYSCTL_INT(_debug, OID_AUTO, hwpstate_amd_verbose, CTLFLAG_RWTUN,
&hwpstate_verbose, 0, "Debug hwpstate");
static int hwpstate_verify;
-SYSCTL_INT(_debug, OID_AUTO, hwpstate_verify, CTLFLAG_RWTUN,
+SYSCTL_INT(_debug, OID_AUTO, hwpstate_amd_verify, CTLFLAG_RWTUN,
&hwpstate_verify, 0, "Verify P-state after setting");
static device_method_t hwpstate_methods[] = {
@@ -151,14 +151,14 @@
{0, 0}
};
-static devclass_t hwpstate_devclass;
-static driver_t hwpstate_driver = {
- "hwpstate",
+static devclass_t hwpstate_amd_devclass;
+static driver_t hwpstate_amd_driver = {
+ "hwpstate_amd",
hwpstate_methods,
sizeof(struct hwpstate_softc),
};
-DRIVER_MODULE(hwpstate, cpu, hwpstate_driver, hwpstate_devclass, 0, 0);
+DRIVER_MODULE(hwpstate_amd, cpu, hwpstate_amd_driver, hwpstate_amd_devclass, 0, 0);
/*
* Go to Px-state on all cpus considering the limit.
@@ -312,7 +312,7 @@
hwpstate_identify(driver_t *driver, device_t parent)
{
- if (device_find_child(parent, "hwpstate", -1) != NULL)
+ if (device_find_child(parent, "hwpstate_amd", -1) != NULL)
return;
if (cpu_vendor_id != CPU_VENDOR_AMD || CPUID_TO_FAMILY(cpu_id) < 0x10)
@@ -326,10 +326,10 @@
return;
}
- if (resource_disabled("hwpstate", 0))
+ if (resource_disabled("hwpstate_amd", 0))
return;
- if (BUS_ADD_CHILD(parent, 10, "hwpstate", -1) == NULL)
+ if (BUS_ADD_CHILD(parent, 10, "hwpstate_amd", -1) == NULL)
device_printf(parent, "hwpstate: add child failed\n");
}
Index: sys/x86/cpufreq/hwpstate_intel.c
===================================================================
--- /dev/null
+++ sys/x86/cpufreq/hwpstate_intel.c
@@ -0,0 +1,505 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2018 Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted providing that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/types.h>
+#include <sys/sbuf.h>
+#include <sys/module.h>
+#include <sys/systm.h>
+#include <sys/errno.h>
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/bus.h>
+#include <sys/cpu.h>
+#include <sys/smp.h>
+#include <sys/proc.h>
+#include <sys/sched.h>
+
+#include <machine/cpu.h>
+#include <machine/md_var.h>
+#include <machine/cputypes.h>
+#include <machine/specialreg.h>
+
+#include <contrib/dev/acpica/include/acpi.h>
+
+#include <dev/acpica/acpivar.h>
+
+#include "acpi_if.h"
+#include "cpufreq_if.h"
+
+extern uint64_t tsc_freq;
+
+bool intel_speed_shift=true;
+SYSCTL_BOOL(_machdep, OID_AUTO, intel_speed_shift, CTLFLAG_RDTUN, &intel_speed_shift,
+ 0, "Enable Intel Speed Shift (HWP)");
+
+static void intel_hwpstate_identify(driver_t *driver, device_t parent);
+static int intel_hwpstate_probe(device_t dev);
+static int intel_hwpstate_attach(device_t dev);
+static int intel_hwpstate_detach(device_t dev);
+
+static int intel_hwpstate_get(device_t dev, struct cf_setting *cf);
+static int intel_hwpstate_type(device_t dev, int *type);
+
+static device_method_t intel_hwpstate_methods[] = {
+ /* Device interface */
+ DEVMETHOD(device_identify, intel_hwpstate_identify),
+ DEVMETHOD(device_probe, intel_hwpstate_probe),
+ DEVMETHOD(device_attach, intel_hwpstate_attach),
+ DEVMETHOD(device_detach, intel_hwpstate_detach),
+
+ /* cpufreq interface */
+ DEVMETHOD(cpufreq_drv_get, intel_hwpstate_get),
+ DEVMETHOD(cpufreq_drv_type, intel_hwpstate_type),
+
+ {0, 0}
+};
+
+struct hwp_softc {
+ device_t dev;
+ bool hwp_notifications;
+ bool hwp_activity_window;
+ bool hwp_pref_ctrl;
+ bool hwp_pkg_ctrl;
+
+ uint64_t req; /* Cached copy of last request */
+
+ uint8_t high;
+ uint8_t guaranteed;
+ uint8_t efficient;
+ uint8_t low;
+};
+
+static devclass_t hwpstate_intel_devclass;
+static driver_t hwpstate_intel_driver = {
+ "hwpstate_intel",
+ intel_hwpstate_methods,
+ sizeof(struct hwp_softc),
+};
+
+/*
+ * NB: This must run before the est and acpi_perf module!!!!
+ *
+ * If a user opts in to hwp, but the CPU doesn't support it, we need to find that
+ * out before est loads or else we won't be able to use est as a backup.
+ */
+DRIVER_MODULE_ORDERED(hwpstate_intel, cpu, hwpstate_intel_driver,
+ hwpstate_intel_devclass, 0, 0, SI_ORDER_FIRST);
+
+static int
+intel_hwp_dump_sysctl_handler(SYSCTL_HANDLER_ARGS)
+{
+ device_t dev;
+ struct pcpu *pc;
+ struct sbuf *sb;
+ struct hwp_softc *sc;
+ uint64_t data, data2;
+ int ret;
+
+ sc = (struct hwp_softc *)arg1;
+ dev = sc->dev;
+
+ pc = cpu_get_pcpu(dev);
+ if (pc == NULL)
+ return (ENXIO);
+
+ sb = sbuf_new_for_sysctl(NULL, NULL, 1024, req);
+ sbuf_putc(sb, '\n');
+ thread_lock(curthread);
+ sched_bind(curthread, pc->pc_cpuid);
+ thread_unlock(curthread);
+
+ rdmsr_safe(MSR_IA32_PM_ENABLE, &data);
+ sbuf_printf(sb, "CPU%d: HWP %sabled\n", pc->pc_cpuid,
+ ((data & 1) ? "En" : "Dis"));
+
+ if (data == 0) {
+ ret = 0;
+ goto out;
+ }
+
+ rdmsr_safe(MSR_IA32_HWP_CAPABILITIES, &data);
+ sbuf_printf(sb, "\tHighest Performance: %03lu\n", data & 0xff);
+ sbuf_printf(sb, "\tGuaranteed Performance: %03lu\n", (data >> 8) & 0xff);
+ sbuf_printf(sb, "\tEfficient Performance: %03lu\n", (data >> 16) & 0xff);
+ sbuf_printf(sb, "\tLowest Performance: %03lu\n", (data >> 24) & 0xff);
+
+ rdmsr_safe(MSR_IA32_HWP_REQUEST, &data);
+ if (sc->hwp_pkg_ctrl && (data & IA32_HWP_REQUEST_PACKAGE_CONTROL)) {
+ rdmsr_safe(MSR_IA32_HWP_REQUEST_PKG, &data2);
+ }
+
+ sbuf_putc(sb, '\n');
+
+#define pkg_print(x, name, offset) do { \
+ if (!sc->hwp_pkg_ctrl || (data & x) != 0) \
+ sbuf_printf(sb, "\t%s: %03lu\n", name, (data >> offset) & 0xff);\
+ else \
+ sbuf_printf(sb, "\t%s: %03lu\n", name, (data2 >> offset) & 0xff);\
+} while (0)
+
+ pkg_print(IA32_HWP_REQUEST_EPP_VALID,
+ "Requested Efficiency Performance Preference", 24);
+ pkg_print(IA32_HWP_REQUEST_DESIRED_VALID,
+ "Requested Desired Performance", 16);
+ pkg_print(IA32_HWP_REQUEST_MAXIMUM_VALID,
+ "Requested Maximum Performance", 8);
+ pkg_print(IA32_HWP_REQUEST_MINIMUM_VALID,
+ "Requested Minimum Performance", 0);
+#undef pkg_print
+
+ sbuf_putc(sb, '\n');
+
+out:
+ thread_lock(curthread);
+ sched_unbind(curthread);
+ thread_unlock(curthread);
+
+ ret = sbuf_finish(sb);
+ sbuf_delete(sb);
+
+ return (ret);
+}
+
+static inline int
+__percent_to_raw(int x)
+{
+
+ MPASS(x <= 100 && x >= 0);
+ return (0xff * x / 100);
+}
+
+static inline int
+__raw_to_percent(int x)
+{
+
+ MPASS(x <= 0xff && x >= 0);
+ return (x * 100 / 0xff);
+}
+
+static int
+sysctl_epp_select(SYSCTL_HANDLER_ARGS)
+{
+ device_t dev;
+ struct pcpu *pc;
+ uint64_t requested;
+ uint32_t val;
+ int ret;
+
+ dev = oidp->oid_arg1;
+ pc = cpu_get_pcpu(dev);
+ if (pc == NULL)
+ return (ENXIO);
+
+ thread_lock(curthread);
+ sched_bind(curthread, pc->pc_cpuid);
+ thread_unlock(curthread);
+
+ rdmsr_safe(MSR_IA32_HWP_REQUEST, &requested);
+ val = (requested & IA32_HWP_REQUEST_ENERGY_PERFORMANCE_PREFERENCE) >> 24;
+ val = __raw_to_percent(val);
+
+ MPASS(val >= 0 && val <= 100);
+
+ ret = sysctl_handle_int(oidp, &val, 0, req);
+ if (ret || req->newptr == NULL)
+ goto out;
+
+ if (val < 0)
+ val = 0;
+ if (val > 100)
+ val = 100;
+
+ val = __percent_to_raw(val);
+
+ requested &= ~IA32_HWP_REQUEST_ENERGY_PERFORMANCE_PREFERENCE;
+ requested |= val << 24;
+
+ wrmsr_safe(MSR_IA32_HWP_REQUEST, requested);
+
+out:
+ thread_lock(curthread);
+ sched_unbind(curthread);
+ thread_unlock(curthread);
+
+ return (ret);
+}
+
+static void
+intel_hwpstate_identify(driver_t *driver, device_t parent)
+{
+ uint32_t regs[4];
+
+ if (intel_speed_shift == false)
+ return;
+
+ if (device_find_child(parent, "hwpstate_intel", -1) != NULL)
+ return;
+
+ if (cpu_vendor_id != CPU_VENDOR_INTEL)
+ return;
+
+ if (resource_disabled("hwpstate_intel", 0))
+ return;
+ /*
+ * Intel SDM 14.4.1 (HWP Programming Interfaces):
+ * The CPUID instruction allows software to discover the presence of
+ * HWP support in an Intel processor. Specifically, execute CPUID
+ * instruction with EAX=06H as input will return 5 bit flags covering
+ * the following aspects in bits 7 through 11 of CPUID.06H:EAX.
+ */
+
+ if (cpu_high < 6)
+ goto out;
+
+ /*
+ * Intel SDM 14.4.1 (HWP Programming Interfaces):
+ * Availability of HWP baseline resource and capability,
+ * CPUID.06H:EAX[bit 7]: If this bit is set, HWP provides several new
+ * architectural MSRs: IA32_PM_ENABLE, IA32_HWP_CAPABILITIES,
+ * IA32_HWP_REQUEST, IA32_HWP_STATUS.
+ */
+
+ do_cpuid(6, regs);
+ if ((regs[0] & CPUTPM1_HWP) == 0)
+ goto out;
+
+ if (BUS_ADD_CHILD(parent, 10, "hwpstate_intel", -1) == NULL)
+ goto out;
+
+ device_printf(parent, "hwpstate registered (0x%x)\n", (regs[0] >> 7) & 0x1f);
+ return;
+
+out:
+ device_printf(parent, "Speed Shift unavailable. Falling back to est\n");
+ intel_speed_shift = false;
+}
+
+static int
+intel_hwpstate_probe(device_t dev)
+{
+ device_t perf_dev;
+ int ret, type;
+
+ /*
+ * It is currently impossible for conflicting cpufreq driver to be loaded at
+ * this point since it's protected by the boolean intel_speed_shift.
+ * However, if at some point the knobs are made a bit more robust to
+ * control cpufreq, or, at some point INFO_ONLY drivers are permitted,
+ * this should make sure things work properly.
+ *
+ * IOW: This is a no-op for now.
+ */
+ perf_dev = device_find_child(device_get_parent(dev), "acpi_perf", -1);
+ if (perf_dev && device_is_attached(perf_dev)) {
+ ret= CPUFREQ_DRV_TYPE(perf_dev, &type);
+ if (ret== 0) {
+ if ((type & CPUFREQ_FLAG_INFO_ONLY) != 0) {
+ device_printf(dev, "Avoiding acpi_perf\n");
+ return (ENXIO);
+ }
+ }
+ }
+
+ perf_dev = device_find_child(device_get_parent(dev), "est", -1);
+ if (perf_dev && device_is_attached(perf_dev)) {
+ ret= CPUFREQ_DRV_TYPE(perf_dev, &type);
+ if (ret== 0) {
+ if ((type & CPUFREQ_FLAG_INFO_ONLY) != 0) {
+ device_printf(dev, "Avoiding EST\n");
+ return (ENXIO);
+ }
+ }
+ }
+
+ device_set_desc(dev, "Intel Speed Shift");
+ return (BUS_PROBE_DEFAULT);
+}
+
+/* FIXME: Need to support PKG variant */
+static int
+set_autonomous_hwp(struct hwp_softc *sc)
+{
+ struct pcpu *pc;
+ device_t dev;
+ uint64_t caps;
+ int ret;
+
+ dev = sc->dev;
+
+ pc = cpu_get_pcpu(dev);
+ if (pc == NULL)
+ return (ENXIO);
+
+ thread_lock(curthread);
+ sched_bind(curthread, pc->pc_cpuid);
+ thread_unlock(curthread);
+
+ /* XXX: Many MSRs aren't readable until feature is enabled */
+ ret = wrmsr_safe(MSR_IA32_PM_ENABLE, 1);
+ if (ret) {
+ device_printf(dev, "Failed to enable HWP for cpu%d (%d)\n",
+ pc->pc_cpuid, ret);
+ goto out;
+ }
+
+ ret = rdmsr_safe(MSR_IA32_HWP_REQUEST, &sc->req);
+ if (ret)
+ return (ret);
+
+ ret = rdmsr_safe(MSR_IA32_HWP_CAPABILITIES, &caps);
+ if (ret)
+ return (ret);
+
+ sc->high = IA32_HWP_CAPABILITIES_HIGHEST_PERFORMANCE(caps);
+ sc->guaranteed = IA32_HWP_CAPABILITIES_GUARANTEED_PERFORMANCE(caps);
+ sc->efficient = IA32_HWP_CAPABILITIES_EFFICIENT_PERFORMANCE(caps);
+ sc->low = IA32_HWP_CAPABILITIES_LOWEST_PERFORMANCE(caps);
+
+ /* hardware autonomous selection determines the performance target */
+ sc->req &= ~IA32_HWP_DESIRED_PERFORMANCE;
+
+ /* enable HW dynamic selection of window size */
+ sc->req &= ~IA32_HWP_ACTIVITY_WINDOW;
+
+ /* IA32_HWP_REQUEST.Minimum_Performance = IA32_HWP_CAPABILITIES.Lowest_Performance */
+ sc->req &= ~IA32_HWP_MINIMUM_PERFORMANCE;
+ sc->req |= sc->low;
+
+ /* IA32_HWP_REQUEST.Maximum_Performance = IA32_HWP_CAPABILITIES.Highest_Performance. */
+ sc->req &= ~IA32_HWP_REQUEST_MAXIMUM_PERFORMANCE;
+ sc->req |= sc->high << 8;
+
+ ret = wrmsr_safe(MSR_IA32_HWP_REQUEST, sc->req);
+ if (ret) {
+ device_printf(dev,
+ "Failed to setup autonomous HWP for cpu%d (file a bug)\n",
+ pc->pc_cpuid);
+ }
+
+out:
+ thread_lock(curthread);
+ sched_unbind(curthread);
+ thread_unlock(curthread);
+
+ return (ret);
+}
+
+static int
+intel_hwpstate_attach(device_t dev)
+{
+ struct hwp_softc *sc;
+ uint32_t regs[4];
+ char buf[128];
+ int ret;
+
+ KASSERT(device_find_child(device_get_parent(dev), "est", -1) == NULL,
+ ("EST driver already loaded"));
+
+ KASSERT(device_find_child(device_get_parent(dev), "acpi_perf", -1) == NULL,
+ ("ACPI driver already loaded"));
+
+ sc = device_get_softc(dev);
+ sc->dev = dev;
+
+ do_cpuid(6, regs);
+ if (regs[0] & CPUTPM1_HWP_NOTIFICATION)
+ sc->hwp_notifications = true;
+ if (regs[0] & CPUTPM1_HWP_ACTIVITY_WINDOW)
+ sc->hwp_activity_window = true;
+ if (regs[0] & CPUTPM1_HWP_PERF_PREF)
+ sc->hwp_pref_ctrl = true;
+ if (regs[0] & CPUTPM1_HWP_PKG)
+ sc->hwp_pkg_ctrl = true;
+
+ ret = set_autonomous_hwp(sc);
+ if (ret)
+ return (ret);
+
+ snprintf(buf, 18, "intel_hwp_debug%d", device_get_unit(dev));
+ SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
+ SYSCTL_STATIC_CHILDREN(_debug), OID_AUTO, buf,
+ CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_SKIP,
+ sc, 0, intel_hwp_dump_sysctl_handler, "A", "");
+
+ SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
+ SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO,
+ "epp", CTLTYPE_INT | CTLFLAG_RWTUN, dev, sizeof(dev),
+ sysctl_epp_select, "I",
+ "Efficiency/Performance Preference (0-100)");
+
+ return (cpufreq_register(dev));
+}
+
+static int
+intel_hwpstate_detach(device_t dev)
+{
+
+ return (cpufreq_unregister(dev));
+}
+
+static int
+intel_hwpstate_get(device_t dev, struct cf_setting *set)
+{
+ struct pcpu *pc;
+ uint64_t rate;
+ int ret;
+
+ if (set == NULL)
+ return (EINVAL);
+
+ pc = cpu_get_pcpu(dev);
+ if (pc == NULL)
+ return (ENXIO);
+
+ memset(set, CPUFREQ_VAL_UNKNOWN, sizeof(*set));
+ set->dev = dev;
+
+ ret = cpu_est_clockrate(pc->pc_cpuid, &rate);
+ if (ret == 0) {
+ set->freq = rate / 1000000;
+ printf("cpu%d, %jd.%02jd MHz\n", pc->pc_cpuid, rate / 1000000, (rate / 10000) % 100);
+ }
+
+ set->volts = CPUFREQ_VAL_UNKNOWN;
+ set->power = CPUFREQ_VAL_UNKNOWN;
+ set->lat = CPUFREQ_VAL_UNKNOWN;
+
+ return (0);
+}
+
+static int
+intel_hwpstate_type(device_t dev, int *type)
+{
+ if (type == NULL)
+ return (EINVAL);
+ *type = CPUFREQ_TYPE_ABSOLUTE | CPUFREQ_FLAG_INFO_ONLY | CPUFREQ_FLAG_UNCACHED;
+
+ return (0);
+}
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Wed, Feb 25, 11:22 AM (1 h, 14 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
28987152
Default Alt Text
D18028.id53817.diff (30 KB)
Attached To
Mode
D18028: Add support for Intel Speed Shift
Attached
Detach File
Event Timeline
Log In to Comment