diff --git a/sbin/nvmecontrol/nvmecontrol.8 b/sbin/nvmecontrol/nvmecontrol.8 --- a/sbin/nvmecontrol/nvmecontrol.8 +++ b/sbin/nvmecontrol/nvmecontrol.8 @@ -172,7 +172,10 @@ .Aq Ar device-id .Nm .Ic power +.Op Fl a Ar apst_state +.Op Fl d Ar apst_data .Op Fl l +.Op Fl m Ar apst_limit .Op Fl p Ar power_state .Op Fl w Ar workload_hint .Nm @@ -481,8 +484,24 @@ .Ss power Manage the power modes of the NVMe controller. .Bl -tag -width 6n +.It Fl a Ar state +Enable (1) or disable (0) Autonomous Power State Transition (APST). +When enabled, the controller will autonomously switch power states +according to configuration data. +.It Fl d Ar target:idle[,target:idle ...] +Set the APST data. +Each target:idle pair corresponds to a specific available power +state starting from the lowest, and defines the target state to +transition to, as well as the idle time in milliseconds to wait +before that transition. +Setting both parameters to zero disables switching from that state. .It Fl l List all supported power modes. +.It Fl m Ar limit +Configure APST to use all available non-operational power states +with total latency below the specified +.Ar limit +(in milliseconds). .It Fl p Ar mode Set the power mode to .Ar mode . diff --git a/sbin/nvmecontrol/power.c b/sbin/nvmecontrol/power.c --- a/sbin/nvmecontrol/power.c +++ b/sbin/nvmecontrol/power.c @@ -27,6 +27,7 @@ #include #include +#include #include #include #include @@ -43,17 +44,23 @@ _Static_assert(sizeof(struct nvme_power_state) == 256 / NBBY, "nvme_power_state size wrong"); -#define POWER_NONE 0xffffffffu +#define NONE 0xffffffffu static struct options { bool list; + uint32_t apst; + uint32_t apst_limit; uint32_t power; uint32_t workload; + const char *apst_data; const char *dev; } opt = { .list = false, - .power = POWER_NONE, + .apst = NONE, + .apst_limit = NONE, + .power = NONE, .workload = 0, + .apst_data = NULL, .dev = NULL, }; @@ -120,10 +127,131 @@ errx(EX_IOERR, "set feature power mgmt request returned error"); } +enum feat_opc { GET, SET }; + +static int +power_apst_cmd(int fd, enum feat_opc opc, bool enable, uint64_t *data, + int size) +{ + struct nvme_pt_command pt; + + memset(&pt, 0, sizeof(pt)); + pt.cmd.opc = (opc == SET) ? + NVME_OPC_SET_FEATURES : NVME_OPC_GET_FEATURES; + pt.cmd.cdw10 = htole32(NVME_FEAT_AUTONOMOUS_POWER_STATE_TRANSITION); + pt.cmd.cdw11 = htole32(enable); + pt.buf = data; + pt.len = size; + pt.is_read = (opc == GET) ? 1 : 0; + + if (ioctl(fd, NVME_PASSTHROUGH_CMD, &pt) == -1) + err(EX_IOERR, "APST %s command failed", + (opc == SET) ? "set" : "get"); + + if (nvme_completion_is_error(&pt.cpl)) + errx(EX_IOERR, "APST %s command returned error", + (opc == SET) ? "set" : "get"); + + return (pt.cpl.cdw0); +} + static void -power_show(int fd) +power_apst_data_generate(struct nvme_controller_data *cdata, + uint64_t *data, int num, int limit) +{ + int i, itpt, latency; + + assert(cdata->npss < num); + + for (i = cdata->npss; i > 0; --i) { + if (!NVMEV(NVME_PWR_ST_NOPS, + cdata->power_state[i].mps_nops)) { + data[i - 1] = data[i]; + continue; + } + + latency = (cdata->power_state[i].enlat + + cdata->power_state[i].exlat) / 1000; + if (latency > limit) + continue; + + /* Wait 50x the latency before each transition. */ + itpt = MIN(latency * 50, (1 << 24) - 1); + data[i - 1] = htole64(itpt << 8 | i << 3); + } +} + +static void +power_apst_data_parse(uint64_t *data, int num, const char *dstr) +{ + int i, itps, itpt; + char *str, *token; + + str = strdup(dstr); + + for (i = 0; (token = strsep(&str, " ,")) != NULL && i < num; ++i) { + if (sscanf(token, "%i:%i", &itps, &itpt) != 2) + errx(EX_USAGE, "cannot parse provided configuration"); + + if (itps < 0 || itps >= 1 << 5) + errx(EX_USAGE, "invalid ITPS=%d (must be 0..%d)", + itps, 1 << 5); + if (itpt < 0 || itpt >= 1 << 24) + errx(EX_USAGE, "invalid ITPT=%d (must be 0..%d)", + itpt, 1 << 24); + + data[i] = htole64(itpt << 8 | itps << 3); + } +} + +static void +power_apst_show(uint64_t *data, int num, bool enabled) +{ + int entry, i; + + while (num > 0 && data[num - 1] == 0) + --num; + + printf("APST %s\n", enabled ? "enabled" : "disabled"); + printf("\n # ITPS ITPT Hex\n"); + printf("-- ---- ------ --------\n"); + for (i = 0; i < num || i == 0; ++i) { + entry = letoh(data[i]); + printf("%2d: %4d %4dms %#8x\n", + i, (entry & 0xF8) >> 3, entry >> 8, entry); + } +} + +static void +power_apst(int fd, struct nvme_controller_data *cdata, + uint32_t enable, const char *dstr, uint32_t limit) +{ + uint64_t data[32]; + + if (cdata->apsta == 0) + errx(EX_UNAVAILABLE, "Not supported by the controller"); + + if (enable == NONE) + enable = power_apst_cmd(fd, GET, 0, NULL, 0); + + memset(&data, 0, sizeof(data)); + + if (dstr != NULL) + power_apst_data_parse(data, nitems(data), dstr); + else if (limit != NONE) + power_apst_data_generate(cdata, data, nitems(data), limit); + else + power_apst_cmd(fd, GET, 0, data, sizeof(data)); + + power_apst_cmd(fd, SET, enable, data, sizeof(data)); +} + +static void +power_show(int fd, struct nvme_controller_data *cdata) { struct nvme_pt_command pt; + uint64_t data[32]; + int status; memset(&pt, 0, sizeof(pt)); pt.cmd.opc = NVME_OPC_GET_FEATURES; @@ -137,6 +265,11 @@ printf("Current Power State is %d\n", pt.cpl.cdw0 & 0x1F); printf("Current Workload Hint is %d\n", pt.cpl.cdw0 >> 5); + + if (cdata->apsta != 0) { + status = power_apst_cmd(fd, GET, 0, data, sizeof(data)); + power_apst_show(data, nitems(data), status); + } } static void @@ -150,7 +283,7 @@ if (arg_parse(argc, argv, f)) return; - if (opt.list && opt.power != POWER_NONE) { + if (opt.list && opt.power != NONE) { fprintf(stderr, "Can't set power and list power states\n"); arg_help(argc, argv, f); } @@ -163,18 +296,27 @@ } free(path); + if (opt.power != NONE) { + power_set(fd, opt.power, opt.workload, 0); + goto out; + } + + if (read_controller_data(fd, &cdata)) + errx(EX_IOERR, "Identify request failed"); + if (opt.list) { - if (read_controller_data(fd, &cdata)) - errx(EX_IOERR, "Identify request failed"); power_list(&cdata); goto out; } - if (opt.power != POWER_NONE) { - power_set(fd, opt.power, opt.workload, 0); + if (opt.apst != NONE || opt.apst_limit != NONE || + opt.apst_data != NULL) { + power_apst(fd, &cdata, opt.apst, + opt.apst_data, opt.apst_limit); goto out; } - power_show(fd); + + power_show(fd, &cdata); out: close(fd); @@ -183,8 +325,14 @@ static const struct opts power_opts[] = { #define OPT(l, s, t, opt, addr, desc) { l, s, t, &opt.addr, desc } + OPT("apst", 'a', arg_uint32, opt, apst, + "Enable or disable APST"), + OPT("data", 'd', arg_string, opt, apst_data, + "Set the APST configuration"), OPT("list", 'l', arg_none, opt, list, "List the valid power states"), + OPT("limit", 'm', arg_uint32, opt, apst_limit, + "Set the APST latency limit"), OPT("power", 'p', arg_uint32, opt, power, "Set the power state"), OPT("workload", 'w', arg_uint32, opt, workload,