diff --git a/sys/x86/cpufreq/hwpstate_amd.c b/sys/x86/cpufreq/hwpstate_amd.c --- a/sys/x86/cpufreq/hwpstate_amd.c +++ b/sys/x86/cpufreq/hwpstate_amd.c @@ -51,21 +51,22 @@ #include #include #include -#include #include -#include +#include #include -#include +#include +#include #include +#include -#include #include +#include #include -#include - #include +#include + #include "acpi_if.h" #include "cpufreq_if.h" @@ -74,6 +75,15 @@ #define MSR_AMD_10H_11H_STATUS 0xc0010063 #define MSR_AMD_10H_11H_CONFIG 0xc0010064 +#define MSR_AMD_CPPC_CAPS_1 0xc00102b0 +#define MSR_AMD_CPPC_ENABLE 0xc00102b1 +#define MSR_AMD_CPPC_CAPS_2 0xc00102b2 +#define MSR_AMD_CPPC_REQUEST 0xc00102b3 +#define MSR_AMD_CPPC_STATUS 0xc00102b4 + +#define MSR_AMD_PWR_ACC 0xc001007a +#define MSR_AMD_PWR_ACC_MX 0xc001007b + #define AMD_10H_11H_MAX_STATES 16 /* for MSR_AMD_10H_11H_LIMIT C001_0061 */ @@ -92,6 +102,16 @@ #define AMD_1AH_CUR_FID(msr) ((msr) & 0xFFF) +#define AMD_CPPC_LOW_PERF(msr) ((msr >> 0) & 0xFF) +#define AMD_CPPC_LOW_NONLIN_PERF(msr) ((msr >> 8) & 0xFF) +#define AMD_CPPC_NOMINAL_PERF(msr) ((msr >> 16) & 0xFF) +#define AMD_CPPC_HIGH_PERF(msr) ((msr >> 24) & 0xFF) + +#define AMD_CPPC_ENERGY_PERF_BITS 0xff000000 +#define AMD_CPPC_DES_PERF_BITS 0x00ff0000 +#define AMD_CPPC_MIN_PERF_BITS 0x0000ff00 +#define AMD_CPPC_MAX_PERF_BITS 0x000000ff + #define HWPSTATE_DEBUG(dev, msg...) \ do { \ if (hwpstate_verbose) \ @@ -106,10 +126,29 @@ int pstate_id; /* P-State id */ }; +struct hwpstate_cppc_setting { + uint8_t high; + uint8_t guaranteed; + uint8_t efficient; + uint8_t low; +}; + +enum hwpstate_flags { + PSTATE_CPPC = 1, + PSTATE_CPPC_PWR_CALC = 2, + PSTATE_CPPC_PKG_CTRL = 4, +}; + struct hwpstate_softc { device_t dev; - struct hwpstate_setting hwpstate_settings[AMD_10H_11H_MAX_STATES]; + union { + struct hwpstate_setting + hwpstate_settings[AMD_10H_11H_MAX_STATES]; + struct hwpstate_cppc_setting cppc_settings; + }; int cfnum; + uint32_t flags; + uint64_t req; }; static void hwpstate_identify(driver_t *driver, device_t parent); @@ -140,6 +179,11 @@ "If enabled (1), limit administrative control of P-states to the value in " "CurPstateLimit"); +static bool hwpstate_pkg_ctrl_enable = true; +SYSCTL_BOOL(_machdep, OID_AUTO, hwpstate_pkg_ctrl, CTLFLAG_RDTUN, + &hwpstate_pkg_ctrl_enable, 0, + "Set 1 (default) to enable package-level control, 0 to disable"); + static device_method_t hwpstate_methods[] = { /* Device interface */ DEVMETHOD(device_identify, hwpstate_identify), @@ -159,6 +203,140 @@ {0, 0} }; +static int +amdhwp_dump_sysctl_handler(SYSCTL_HANDLER_ARGS) +{ + device_t dev; + struct pcpu *pc; + struct sbuf *sb; + struct hwpstate_softc *sc; + uint64_t data, data2; + int ret; + + sc = (struct hwpstate_softc *)arg1; + dev = sc->dev; + + pc = cpu_get_pcpu(dev); + if (pc == NULL) + return (ENXIO); + + sb = sbuf_new(NULL, NULL, 1024, SBUF_FIXEDLEN | SBUF_INCLUDENUL); + sbuf_putc(sb, '\n'); + thread_lock(curthread); + sched_bind(curthread, pc->pc_cpuid); + thread_unlock(curthread); + + rdmsr_safe(MSR_AMD_CPPC_ENABLE, &data); + sbuf_printf(sb, "CPU%d: HWP %sabled\n", pc->pc_cpuid, + ((data & 1) ? "En" : "Dis")); + + if (data == 0) { + ret = 0; + goto out; + } + + rdmsr_safe(MSR_AMD_CPPC_CAPS_1, &data); + sbuf_printf(sb, "\tHighest Performance: %03ju\n", data & 0xff); + sbuf_printf(sb, "\tGuaranteed Performance: %03ju\n", + (data >> 8) & 0xff); + sbuf_printf(sb, "\tEfficient Performance: %03ju\n", + (data >> 16) & 0xff); + sbuf_printf(sb, "\tLowest Performance: %03ju\n", (data >> 24) & 0xff); + + rdmsr_safe(MSR_AMD_CPPC_REQUEST, &data); + data2 = 0; + if (sc->flags & PSTATE_CPPC_PKG_CTRL) + rdmsr_safe(MSR_AMD_CPPC_REQUEST, &data2); + + sbuf_putc(sb, '\n'); + +#define pkg_print(name, offset) \ + do { \ + if (!(sc->flags & PSTATE_CPPC_PKG_CTRL)) \ + sbuf_printf(sb, "\t%s: %03u\n", name, \ + (unsigned)(data >> offset) & 0xff); \ + else \ + sbuf_printf(sb, "\t%s: %03u\n", name, \ + (unsigned)(data2 >> offset) & 0xff); \ + } while (0) + + pkg_print("Requested Efficiency Performance Preference", 24); + pkg_print("Requested Desired Performance", 16); + pkg_print("Requested Maximum Performance", 8); + pkg_print("Requested Minimum Performance", 0); +#undef pkg_print + + sbuf_putc(sb, '\n'); + +out: + thread_lock(curthread); + sched_unbind(curthread); + thread_unlock(curthread); + + ret = sbuf_finish(sb); + if (ret == 0) + ret = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb)); + sbuf_delete(sb); + + return (ret); +} + +static int +sysctl_epp_select(SYSCTL_HANDLER_ARGS) +{ + device_t dev; + struct hwpstate_softc *sc; + struct pcpu *pc; + uint32_t val; + int ret = 0; + uint64_t r; + + dev = oidp->oid_arg1; + sc = device_get_softc(dev); + + if (!(sc->flags & PSTATE_CPPC_PKG_CTRL)) + return (ENODEV); + + pc = cpu_get_pcpu(dev); + if (pc == NULL) + return (ENXIO); + + ret = sysctl_handle_int(oidp, &val, 0, req); + if (ret || req->newptr == NULL) + goto end; + + thread_lock(curthread); + sched_bind(curthread, pc->pc_cpuid); + thread_unlock(curthread); + + ret = rdmsr_safe(MSR_AMD_CPPC_REQUEST, &r); + if (ret) + goto end; + val = ((r >> 24) & 0xFFULL) * 100 / 0xff; + + if (val > 100) { + ret = EINVAL; + goto end; + } + val = (val * 0xFF) / 100; + r = sc->req; + r = r & ~AMD_CPPC_ENERGY_PERF_BITS; + r |= (val & 0xFFULL) << 24; + if (r == sc->req) + goto end; + ret = wrmsr_safe(MSR_AMD_CPPC_REQUEST, r); + if (ret) + goto end; + sc->req = r; + +end: + thread_lock(curthread); + sched_unbind(curthread); + thread_unlock(curthread); + + return (ret); +} + static driver_t hwpstate_driver = { "hwpstate", hwpstate_methods, @@ -269,6 +447,8 @@ if (cf == NULL) return (EINVAL); sc = device_get_softc(dev); + if (sc->flags & PSTATE_CPPC) + return (EOPNOTSUPP); set = sc->hwpstate_settings; for (i = 0; i < sc->cfnum; i++) if (CPUFREQ_CMP(cf->freq, set[i].freq)) @@ -279,26 +459,79 @@ return (hwpstate_goto_pstate(dev, set[i].pstate_id)); } +static int +hwpstate_calc_power(struct hwpstate_softc *sc) +{ + device_t dev; + struct pcpu *pc; + uint64_t jx, tx, jy, ty, jmax, jdelta; + register_t reg; + + dev = sc->dev; + pc = cpu_get_pcpu(dev); + + thread_lock(curthread); + sched_bind(curthread, pc->pc_cpuid); + thread_unlock(curthread); + + reg = intr_disable(); + jx = rdmsr(MSR_AMD_PWR_ACC); + tx = rdtsc(); + DELAY(1000); + jy = rdmsr(MSR_AMD_PWR_ACC); + ty = rdtsc(); + jmax = rdmsr(MSR_AMD_PWR_ACC_MX); + intr_restore(reg); + thread_lock(curthread); + sched_unbind(curthread); + thread_unlock(curthread); + + if (jy < jx) + jdelta = jy + jmax - jx; + else + jdelta = jy - jx; + + return (amd_pwrsamplerate * jdelta / (ty - tx)); +} + static int hwpstate_get(device_t dev, struct cf_setting *cf) { struct hwpstate_softc *sc; struct hwpstate_setting set; + struct pcpu *pc; uint64_t msr; + uint64_t rate; + int ret; sc = device_get_softc(dev); if (cf == NULL) return (EINVAL); - msr = rdmsr(MSR_AMD_10H_11H_STATUS); - if (msr >= sc->cfnum) - return (EINVAL); - set = sc->hwpstate_settings[msr]; - cf->freq = set.freq; - cf->volts = set.volts; - cf->power = set.power; - cf->lat = set.lat; - cf->dev = dev; + if (sc->flags & PSTATE_CPPC) { + pc = cpu_get_pcpu(dev); + if (pc == NULL) + return (ENXIO); + + memset(cf, CPUFREQ_VAL_UNKNOWN, sizeof(*cf)); + cf->dev = dev; + if ((ret = cpu_est_clockrate(pc->pc_cpuid, &rate))) + return (ret); + cf->freq = rate / 1000000; + if (sc->flags & PSTATE_CPPC_PWR_CALC) + cf->power = hwpstate_calc_power(sc); + } else { + msr = rdmsr(MSR_AMD_10H_11H_STATUS); + if (msr >= sc->cfnum) + return (EINVAL); + set = sc->hwpstate_settings[msr]; + cf->freq = set.freq; + cf->volts = set.volts; + cf->power = set.power; + cf->lat = set.lat; + cf->dev = dev; + } + return (0); } @@ -312,6 +545,9 @@ if (sets == NULL || count == NULL) return (EINVAL); sc = device_get_softc(dev); + if (sc->flags & PSTATE_CPPC) + return (EOPNOTSUPP); + if (*count < sc->cfnum) return (E2BIG); for (i = 0; i < sc->cfnum; i++, sets++) { @@ -330,11 +566,16 @@ static int hwpstate_type(device_t dev, int *type) { + struct hwpstate_softc *sc; if (type == NULL) return (EINVAL); + sc = device_get_softc(dev); *type = CPUFREQ_TYPE_ABSOLUTE; + *type |= sc->flags & PSTATE_CPPC ? + CPUFREQ_FLAG_INFO_ONLY | CPUFREQ_FLAG_UNCACHED : + 0; return (0); } @@ -365,6 +606,84 @@ device_printf(parent, "hwpstate: add child failed\n"); } +static int +amd_set_autonomous_hwp(struct hwpstate_softc *sc) +{ + struct pcpu *pc; + device_t dev; + uint64_t caps; + int ret; + + dev = sc->dev; + pc = cpu_get_pcpu(dev); + if (pc == NULL) + return (ENXIO); + + thread_lock(curthread); + sched_bind(curthread, pc->pc_cpuid); + thread_unlock(curthread); + + ret = wrmsr_safe(MSR_AMD_CPPC_ENABLE, 1); + if (ret) { + device_printf(dev, "Failed to enable cppc for cpu%d (%d)\n", + pc->pc_cpuid, ret); + goto out; + } + + ret = rdmsr_safe(MSR_AMD_CPPC_REQUEST, &sc->req); + if (ret) { + device_printf(dev, + "Failed to read CPPC request MSR for cpu%d (%d)\n", + pc->pc_cpuid, ret); + goto out; + } + + ret = rdmsr_safe(MSR_AMD_CPPC_CAPS_1, &caps); + if (ret) { + device_printf(dev, + "Failed to read HWP capabilities MSR for cpu%d (%d)\n", + pc->pc_cpuid, ret); + goto out; + } + sc->cppc_settings.high = AMD_CPPC_HIGH_PERF(caps); + sc->cppc_settings.guaranteed = AMD_CPPC_NOMINAL_PERF(caps); + sc->cppc_settings.efficient = AMD_CPPC_LOW_NONLIN_PERF(caps); + sc->cppc_settings.low = AMD_CPPC_LOW_PERF(caps); + + /* enable autonomous mode by setting desired performance to 0 */ + sc->req &= ~AMD_CPPC_DES_PERF_BITS; + + /* + * For intel, the default value of EPP is 0x80u which is the balanced + * mode + */ + sc->req &= ~AMD_CPPC_ENERGY_PERF_BITS; + sc->req |= 0x80U << 24; + + sc->req &= ~AMD_CPPC_MIN_PERF_BITS; + sc->req |= sc->cppc_settings.low << 8; + + sc->req &= ~AMD_CPPC_MAX_PERF_BITS; + sc->req |= sc->cppc_settings.high; + + ret = wrmsr_safe(MSR_AMD_CPPC_REQUEST, sc->req); + if (ret) { + device_printf(dev, + "Failed to setup autonomous HWP for cpu%d\n", + pc->pc_cpuid); + goto out; + } +out: + thread_lock(curthread); + sched_unbind(curthread); + thread_unlock(curthread); + + if (!ret) + device_set_desc(dev, "Cool`n'Quiet 2.0"); + + return (ret ? ret : BUS_PROBE_NOWILDCARD); +} + static int hwpstate_probe(device_t dev) { @@ -373,15 +692,28 @@ uint64_t msr; int error, type; - /* - * Only hwpstate0. - * It goes well with acpi_throttle. - */ - if (device_get_unit(dev) != 0) - return (ENXIO); - sc = device_get_softc(dev); + + if (amd_extended_feature_extensions & AMDFEID_CPPC) { + sc->flags |= PSTATE_CPPC; + } else { + /* + * No CPPC support, failed back to ACPI or hwp so only contains + * hwpstate0. + */ + if (device_get_unit(dev) != 0) + return (ENXIO); + } + + if (amd_pminfo & AMDPM_PWR_REPORT) + sc->flags |= PSTATE_CPPC_PWR_CALC; + + if (hwpstate_pkg_ctrl_enable) + sc->flags |= PSTATE_CPPC_PKG_CTRL; + sc->dev = dev; + if (sc->flags & PSTATE_CPPC) + return (amd_set_autonomous_hwp(sc)); /* * Check if acpi_perf has INFO only flag. @@ -439,7 +771,23 @@ static int hwpstate_attach(device_t dev) { + struct hwpstate_softc *sc; + sc = device_get_softc(dev); + if (sc->flags & PSTATE_CPPC) { + SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), + SYSCTL_STATIC_CHILDREN(_debug), OID_AUTO, + device_get_nameunit(dev), + CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_SKIP | CTLFLAG_MPSAFE, + sc, 0, amdhwp_dump_sysctl_handler, "A", ""); + + SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), + SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, + "epp", CTLTYPE_INT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, dev, 0, + sysctl_epp_select, "I", + "Efficiency/Performance Preference " + "(range from 0, most performant, through 100, most efficient)"); + } return (cpufreq_register(dev)); } @@ -583,8 +931,11 @@ static int hwpstate_detach(device_t dev) { + struct hwpstate_softc *sc; - hwpstate_goto_pstate(dev, 0); + sc = device_get_softc(dev); + if (!(sc->flags & PSTATE_CPPC)) + hwpstate_goto_pstate(dev, 0); return (cpufreq_unregister(dev)); } diff --git a/sys/x86/include/specialreg.h b/sys/x86/include/specialreg.h --- a/sys/x86/include/specialreg.h +++ b/sys/x86/include/specialreg.h @@ -418,6 +418,7 @@ #define AMDPM_HW_PSTATE 0x00000080 #define AMDPM_TSC_INVARIANT 0x00000100 #define AMDPM_CPB 0x00000200 +#define AMDPM_PWR_REPORT 0x00001000 /* * AMD extended function 8000_0008h ebx info (amd_extended_feature_extensions) diff --git a/sys/x86/include/x86_var.h b/sys/x86/include/x86_var.h --- a/sys/x86/include/x86_var.h +++ b/sys/x86/include/x86_var.h @@ -43,6 +43,7 @@ extern u_int amd_feature2; extern u_int amd_rascap; extern u_int amd_pminfo; +extern u_int amd_pwrsamplerate; extern u_int amd_extended_feature_extensions; extern u_int via_feature_rng; extern u_int via_feature_xcrypt; diff --git a/sys/x86/x86/identcpu.c b/sys/x86/x86/identcpu.c --- a/sys/x86/x86/identcpu.c +++ b/sys/x86/x86/identcpu.c @@ -98,6 +98,7 @@ u_int amd_feature2; /* AMD feature flags */ u_int amd_rascap; /* AMD RAS capabilities */ u_int amd_pminfo; /* AMD advanced power management info */ +u_int amd_pwrsamplerate; u_int amd_extended_feature_extensions; u_int via_feature_rng; /* VIA RNG features */ u_int via_feature_xcrypt; /* VIA ACE features */ @@ -1690,6 +1691,7 @@ if (cpu_exthigh >= 0x80000007) { do_cpuid(0x80000007, regs); amd_rascap = regs[1]; + amd_pwrsamplerate = regs[2]; amd_pminfo = regs[3]; } if (cpu_exthigh >= 0x80000008) {