diff --git a/sys/x86/cpufreq/hwpstate_amd.c b/sys/x86/cpufreq/hwpstate_amd.c --- a/sys/x86/cpufreq/hwpstate_amd.c +++ b/sys/x86/cpufreq/hwpstate_amd.c @@ -8,6 +8,7 @@ * Copyright (c) 2009 Michael Reifenberger * Copyright (c) 2009 Norikatsu Shigemura * Copyright (c) 2008-2009 Gen Otsuji + * Copyright (c) 2025 ShengYi Hung * * This code is depending on kern_cpu.c, est.c, powernow.c, p4tcc.c, smist.c * in various parts. The authors of these files are Nate Lawson, @@ -55,6 +56,7 @@ #include #include #include +#include #include #include @@ -74,6 +76,15 @@ #define MSR_AMD_10H_11H_STATUS 0xc0010063 #define MSR_AMD_10H_11H_CONFIG 0xc0010064 +#define MSR_AMD_CPPC_CAPS_1 0xc00102b0 +#define MSR_AMD_CPPC_ENABLE 0xc00102b1 +#define MSR_AMD_CPPC_CAPS_2 0xc00102b2 +#define MSR_AMD_CPPC_REQUEST 0xc00102b3 +#define MSR_AMD_CPPC_STATUS 0xc00102b4 + +#define MSR_AMD_PWR_ACC 0xc001007a +#define MSR_AMD_PWR_ACC_MX 0xc001007b + #define AMD_10H_11H_MAX_STATES 16 /* for MSR_AMD_10H_11H_LIMIT C001_0061 */ @@ -92,6 +103,16 @@ #define AMD_1AH_CUR_FID(msr) ((msr) & 0xFFF) +#define AMD_CPPC_CAPS_1_LOW_PERF(msr) ((msr >> 0) & 0xFF) +#define AMD_CPPC_CAPS_1_LOW_NONLIN_PERF(msr) ((msr >> 8) & 0xFF) +#define AMD_CPPC_CAPS_1_NOMINAL_PERF(msr) ((msr >> 16) & 0xFF) +#define AMD_CPPC_CAPS_1_HIGH_PERF(msr) ((msr >> 24) & 0xFF) + +#define AMD_CPPC_REQUEST_ENERGY_PERF_BITS 0xff000000 +#define AMD_CPPC_REQUEST_DES_PERF_BITS 0x00ff0000 +#define AMD_CPPC_REQUEST_MIN_PERF_BITS 0x0000ff00 +#define AMD_CPPC_REQUEST_MAX_PERF_BITS 0x000000ff + #define HWPSTATE_DEBUG(dev, msg...) \ do { \ if (hwpstate_verbose) \ @@ -106,10 +127,27 @@ int pstate_id; /* P-State id */ }; +struct hwpstate_cppc_setting { + uint8_t high; + uint8_t guaranteed; + uint8_t efficient; + uint8_t low; +}; + +enum hwpstate_flags { + PSTATE_CPPC = 1, +}; + struct hwpstate_softc { device_t dev; - struct hwpstate_setting hwpstate_settings[AMD_10H_11H_MAX_STATES]; + union { + struct hwpstate_setting + hwpstate_settings[AMD_10H_11H_MAX_STATES]; + struct hwpstate_cppc_setting cppc_settings; + }; int cfnum; + uint32_t flags; + uint64_t req; }; static void hwpstate_identify(driver_t *driver, device_t parent); @@ -140,6 +178,11 @@ "If enabled (1), limit administrative control of P-states to the value in " "CurPstateLimit"); +static bool hwpstate_pkg_ctrl_enable = true; +SYSCTL_BOOL(_machdep, OID_AUTO, hwpstate_pkg_ctrl, CTLFLAG_RDTUN, + &hwpstate_pkg_ctrl_enable, 0, + "Set 1 (default) to enable package-level control, 0 to disable"); + static device_method_t hwpstate_methods[] = { /* Device interface */ DEVMETHOD(device_identify, hwpstate_identify), @@ -159,6 +202,136 @@ {0, 0} }; +static int +amdhwp_dump_sysctl_handler(SYSCTL_HANDLER_ARGS) +{ + device_t dev; + struct pcpu *pc; + struct sbuf *sb; + struct hwpstate_softc *sc; + uint64_t data; + int ret; + + sc = (struct hwpstate_softc *)arg1; + dev = sc->dev; + + pc = cpu_get_pcpu(dev); + if (pc == NULL) + return (ENXIO); + + sb = sbuf_new(NULL, NULL, 1024, SBUF_FIXEDLEN | SBUF_INCLUDENUL); + sbuf_putc(sb, '\n'); + thread_lock(curthread); + sched_bind(curthread, pc->pc_cpuid); + thread_unlock(curthread); + + rdmsr_safe(MSR_AMD_CPPC_ENABLE, &data); + sbuf_printf(sb, "CPU%d: HWP %sabled\n", pc->pc_cpuid, + ((data & 1) ? "En" : "Dis")); + + if (data == 0) { + ret = 0; + goto out; + } + + rdmsr_safe(MSR_AMD_CPPC_CAPS_1, &data); + sbuf_printf(sb, "\tHighest Performance: %03ju\n", + AMD_CPPC_CAPS_1_HIGH_PERF(data)); + sbuf_printf(sb, "\tGuaranteed Performance: %03ju\n", + AMD_CPPC_CAPS_1_NOMINAL_PERF(data)); + sbuf_printf(sb, "\tEfficient Performance: %03ju\n", + AMD_CPPC_CAPS_1_LOW_NONLIN_PERF(data)); + sbuf_printf(sb, "\tLowest Performance: %03ju\n", + AMD_CPPC_CAPS_1_LOW_PERF(data)); + sbuf_putc(sb, '\n'); + + rdmsr_safe(MSR_AMD_CPPC_REQUEST, &data); + +#define pkg_print(name, offset) \ + do { \ + sbuf_printf(sb, "\t%s: %03u\n", name, \ + (unsigned)(data >> offset) & 0xff); \ + } while (0) + + pkg_print("Requested Efficiency Performance Preference", 24); + pkg_print("Requested Desired Performance", 16); + pkg_print("Requested Maximum Performance", 8); + pkg_print("Requested Minimum Performance", 0); +#undef pkg_print + + sbuf_putc(sb, '\n'); + +out: + thread_lock(curthread); + sched_unbind(curthread); + thread_unlock(curthread); + + ret = sbuf_finish(sb); + if (ret == 0) + ret = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb)); + sbuf_delete(sb); + + return (ret); +} + +static int +sysctl_epp_select(SYSCTL_HANDLER_ARGS) +{ + device_t dev; + struct hwpstate_softc *sc; + struct pcpu *pc; + uint32_t val; + int ret = 0; + uint64_t r; + + dev = oidp->oid_arg1; + sc = device_get_softc(dev); + + if (!(sc->flags & PSTATE_CPPC)) + return (ENODEV); + + pc = cpu_get_pcpu(dev); + if (pc == NULL) + return (ENXIO); + + val = ((sc->req & AMD_CPPC_REQUEST_ENERGY_PERF_BITS) >> + (ffsll(AMD_CPPC_REQUEST_ENERGY_PERF_BITS) - 1)) * + 100 / 0xFF; + ret = sysctl_handle_int(oidp, &val, 0, req); + if (ret || req->newptr == NULL) + goto end; + if (val > 100) { + ret = EINVAL; + goto end; + } + val = (val * 0xFF) / 100; + + thread_lock(curthread); + sched_bind(curthread, pc->pc_cpuid); + thread_unlock(curthread); + + ret = rdmsr_safe(MSR_AMD_CPPC_REQUEST, &sc->req); + if (ret) + goto end; + + r = sc->req; + r = r & ~AMD_CPPC_REQUEST_ENERGY_PERF_BITS; + r |= (val & 0xFFULL) << (ffsll(AMD_CPPC_REQUEST_ENERGY_PERF_BITS) - 1); + if (r == sc->req) + goto end; + ret = wrmsr_safe(MSR_AMD_CPPC_REQUEST, r); + if (ret) + goto end; + sc->req = r; + +end: + thread_lock(curthread); + sched_unbind(curthread); + thread_unlock(curthread); + + return (ret); +} + static driver_t hwpstate_driver = { "hwpstate", hwpstate_methods, @@ -269,6 +442,8 @@ if (cf == NULL) return (EINVAL); sc = device_get_softc(dev); + if (sc->flags & PSTATE_CPPC) + return (EOPNOTSUPP); set = sc->hwpstate_settings; for (i = 0; i < sc->cfnum; i++) if (CPUFREQ_CMP(cf->freq, set[i].freq)) @@ -284,21 +459,38 @@ { struct hwpstate_softc *sc; struct hwpstate_setting set; + struct pcpu *pc; uint64_t msr; + uint64_t rate; + int ret; sc = device_get_softc(dev); if (cf == NULL) return (EINVAL); - msr = rdmsr(MSR_AMD_10H_11H_STATUS); - if (msr >= sc->cfnum) - return (EINVAL); - set = sc->hwpstate_settings[msr]; - cf->freq = set.freq; - cf->volts = set.volts; - cf->power = set.power; - cf->lat = set.lat; - cf->dev = dev; + if (sc->flags & PSTATE_CPPC) { + pc = cpu_get_pcpu(dev); + if (pc == NULL) + return (ENXIO); + + memset(cf, CPUFREQ_VAL_UNKNOWN, sizeof(*cf)); + cf->dev = dev; + if ((ret = cpu_est_clockrate(pc->pc_cpuid, &rate))) + return (ret); + cf->freq = rate / 1000000; + } else { + msr = rdmsr(MSR_AMD_10H_11H_STATUS); + if (msr >= sc->cfnum) + return (EINVAL); + set = sc->hwpstate_settings[msr]; + + cf->freq = set.freq; + cf->volts = set.volts; + cf->power = set.power; + cf->lat = set.lat; + cf->dev = dev; + } + return (0); } @@ -312,6 +504,9 @@ if (sets == NULL || count == NULL) return (EINVAL); sc = device_get_softc(dev); + if (sc->flags & PSTATE_CPPC) + return (EOPNOTSUPP); + if (*count < sc->cfnum) return (E2BIG); for (i = 0; i < sc->cfnum; i++, sets++) { @@ -330,11 +525,16 @@ static int hwpstate_type(device_t dev, int *type) { + struct hwpstate_softc *sc; if (type == NULL) return (EINVAL); + sc = device_get_softc(dev); *type = CPUFREQ_TYPE_ABSOLUTE; + *type |= sc->flags & PSTATE_CPPC ? + CPUFREQ_FLAG_INFO_ONLY | CPUFREQ_FLAG_UNCACHED : + 0; return (0); } @@ -365,6 +565,79 @@ device_printf(parent, "hwpstate: add child failed\n"); } +static int +amd_set_autonomous_hwp(struct hwpstate_softc *sc) +{ + struct pcpu *pc; + device_t dev; + uint64_t caps; + int ret; + + dev = sc->dev; + pc = cpu_get_pcpu(dev); + if (pc == NULL) + return (ENXIO); + + thread_lock(curthread); + sched_bind(curthread, pc->pc_cpuid); + thread_unlock(curthread); + + ret = wrmsr_safe(MSR_AMD_CPPC_ENABLE, 1); + if (ret) { + device_printf(dev, "Failed to enable cppc for cpu%d (%d)\n", + pc->pc_cpuid, ret); + goto out; + } + + ret = rdmsr_safe(MSR_AMD_CPPC_REQUEST, &sc->req); + if (ret) { + device_printf(dev, + "Failed to read CPPC request MSR for cpu%d (%d)\n", + pc->pc_cpuid, ret); + goto out; + } + + ret = rdmsr_safe(MSR_AMD_CPPC_CAPS_1, &caps); + if (ret) { + device_printf(dev, + "Failed to read HWP capabilities MSR for cpu%d (%d)\n", + pc->pc_cpuid, ret); + goto out; + } + sc->cppc_settings.high = AMD_CPPC_CAPS_1_HIGH_PERF(caps); + sc->cppc_settings.guaranteed = AMD_CPPC_CAPS_1_NOMINAL_PERF(caps); + sc->cppc_settings.efficient = AMD_CPPC_CAPS_1_LOW_NONLIN_PERF(caps); + sc->cppc_settings.low = AMD_CPPC_CAPS_1_LOW_PERF(caps); + + /* enable autonomous mode by setting desired performance to 0 */ + sc->req &= ~AMD_CPPC_REQUEST_DES_PERF_BITS; +#define SET_VALUES(val, BITS) \ + sc->req = (sc->req & ~(BITS)) | \ + (((uint32_t)(val) & 0xFF) << (ffsll((BITS)) - 1)) + /* + * For intel, the default value of EPP is 0x80u which is the balanced + * mode + */ + SET_VALUES(0x80U, AMD_CPPC_REQUEST_ENERGY_PERF_BITS); + SET_VALUES(sc->cppc_settings.low, AMD_CPPC_REQUEST_MIN_PERF_BITS); + SET_VALUES(sc->cppc_settings.high, AMD_CPPC_REQUEST_MAX_PERF_BITS); +#undef SET_VALUES + + ret = wrmsr_safe(MSR_AMD_CPPC_REQUEST, sc->req); + if (ret) { + device_printf(dev, + "Failed to setup autonomous HWP for cpu%d\n", + pc->pc_cpuid); + goto out; + } +out: + thread_lock(curthread); + sched_unbind(curthread); + thread_unlock(curthread); + + return (ret); +} + static int hwpstate_probe(device_t dev) { @@ -373,15 +646,23 @@ uint64_t msr; int error, type; - /* - * Only hwpstate0. - * It goes well with acpi_throttle. - */ - if (device_get_unit(dev) != 0) - return (ENXIO); - sc = device_get_softc(dev); + + if (amd_extended_feature_extensions & AMDFEID_CPPC) { + sc->flags |= PSTATE_CPPC; + } else { + /* + * No CPPC support, failed back to ACPI or hwp so only contains + * hwpstate0. + */ + if (device_get_unit(dev) != 0) + return (ENXIO); + } + sc->dev = dev; + device_set_desc(dev, "Cool`n'Quiet 2.0"); + if (sc->flags & PSTATE_CPPC) + return (BUS_PROBE_NOWILDCARD); /* * Check if acpi_perf has INFO only flag. @@ -433,14 +714,32 @@ if (error) return (error); - device_set_desc(dev, "Cool`n'Quiet 2.0"); return (0); } static int hwpstate_attach(device_t dev) { + struct hwpstate_softc *sc; + int res; + sc = device_get_softc(dev); + if (sc->flags & PSTATE_CPPC) { + if ((res = amd_set_autonomous_hwp(sc))) + return res; + SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), + SYSCTL_STATIC_CHILDREN(_debug), OID_AUTO, + device_get_nameunit(dev), + CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_SKIP | CTLFLAG_MPSAFE, + sc, 0, amdhwp_dump_sysctl_handler, "A", ""); + + SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), + SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, + "epp", CTLTYPE_INT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, dev, 0, + sysctl_epp_select, "I", + "Efficiency/Performance Preference " + "(range from 0, most performant, through 100, most efficient)"); + } return (cpufreq_register(dev)); } @@ -584,8 +883,11 @@ static int hwpstate_detach(device_t dev) { + struct hwpstate_softc *sc; - hwpstate_goto_pstate(dev, 0); + sc = device_get_softc(dev); + if (!(sc->flags & PSTATE_CPPC)) + hwpstate_goto_pstate(dev, 0); return (cpufreq_unregister(dev)); } diff --git a/sys/x86/include/specialreg.h b/sys/x86/include/specialreg.h --- a/sys/x86/include/specialreg.h +++ b/sys/x86/include/specialreg.h @@ -418,6 +418,7 @@ #define AMDPM_HW_PSTATE 0x00000080 #define AMDPM_TSC_INVARIANT 0x00000100 #define AMDPM_CPB 0x00000200 +#define AMDPM_PWR_REPORT 0x00001000 /* * AMD extended function 8000_0008h ebx info (amd_extended_feature_extensions)